From d954c19b21bcf53da3f15fae40a0751c6dcc73df Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Fri, 28 Mar 2025 10:46:34 +0100 Subject: [PATCH 001/340] CASSANDRA-20296 follow-up: by default, explicitly set -XX:MaxDirectMemorySize to half of -Xmx patch by Stefan Miklosovic; reviewed by Michael Semb Wever for CASSANDRA-20296 --- NEWS.txt | 1 + conf/cassandra-env.sh | 14 ++++++++++++++ conf/jvm-server.options | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/NEWS.txt b/NEWS.txt index 5b5ec9b92a1a..efbc8d7ac86a 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -74,6 +74,7 @@ Upgrading storage-attached indexes (SAI) to make migration between the two safer. This behavior can be switched off via the flag `sai_options.prioritize_over_legacy_index` (which defaults to `false`) in `cassandra.yaml` or via `setPrioritizeSAIOverLegacyIndex(boolean)` in the JMX MBean `org.apache.cassandra.db:type=StorageService`. + - Java's -XX:MaxDirectMemorySize is by default explicitly half of max heap size (-Xmx) instead of implicitly equal to it. 5.0.1 ===== diff --git a/conf/cassandra-env.sh b/conf/cassandra-env.sh index 209816197955..2d83763b8994 100644 --- a/conf/cassandra-env.sh +++ b/conf/cassandra-env.sh @@ -56,11 +56,14 @@ calculate_heap_sizes() heap_limit="31744" fi half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $system_memory_in_mb / 4` if [ "$half_system_memory_in_mb" -gt "$heap_limit" ] ; then CALCULATED_MAX_HEAP_SIZE="${heap_limit}M" + CALCULATED_MAX_DIRECT_MEMORY_SIZE="`expr $heap_limit / 2`M" CALCULATED_CMS_HEAP_NEWSIZE="8G" else CALCULATED_MAX_HEAP_SIZE="${half_system_memory_in_mb}M" + CALCULATED_MAX_DIRECT_MEMORY_SIZE="${quarter_system_memory_in_mb}M" CALCULATED_CMS_HEAP_NEWSIZE="`expr $half_system_memory_in_mb / 4`M" fi } @@ -87,6 +90,8 @@ echo $JVM_OPTS | grep -q Xmx DEFINED_XMX=$? echo $JVM_OPTS | grep -q Xms DEFINED_XMS=$? +echo $JVM_OPTS | grep -q MaxDirectMemorySize +DEFINED_MAX_DIRECT_MEMORY_SIZE=$? echo $JVM_OPTS | grep -q ParallelGCThreads DEFINED_PARALLEL_GC_THREADS=$? echo $JVM_OPTS | grep -q ConcGCThreads @@ -112,6 +117,7 @@ calculate_heap_sizes #MAX_HEAP_SIZE="20G" #HEAP_NEWSIZE="10G" +#MAX_DIRECT_MEMORY_SIZE="10G" # Set this to control the amount of arenas per-thread in glibc #export MALLOC_ARENA_MAX=4 @@ -130,6 +136,10 @@ elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 exit 1 fi +if [ "x$MAX_DIRECT_MEMORY_SIZE" = "x" ]; then + MAX_DIRECT_MEMORY_SIZE="$CALCULATED_MAX_DIRECT_MEMORY_SIZE" +fi + if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then export MALLOC_ARENA_MAX=4 fi @@ -144,6 +154,10 @@ elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then exit 1 fi +if [ $DEFINED_MAX_DIRECT_MEMORY_SIZE -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:MaxDirectMemorySize=${MAX_DIRECT_MEMORY_SIZE}" +fi + # We only set -Xmn flag if it was not defined in jvm-server.options file # and CMS is being used. If defined, both Xmn and Xmx must be defined together. if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then diff --git a/conf/jvm-server.options b/conf/jvm-server.options index 547a06dce3f7..f68d875a51e2 100644 --- a/conf/jvm-server.options +++ b/conf/jvm-server.options @@ -166,6 +166,12 @@ # For production use you may wish to adjust this for your environment. # If that's the case, see MAX_HEAP_SIZE (and HEAP_NEWSIZE for CMS) in cassandra-env.sh +##################### +# OFF-HEAP SETTINGS # +##################### + +# By default, this setting is half of max heap size +#-XX:MaxDirectMemorySize= ################################### # EXPIRATION DATE OVERFLOW POLICY # From bb66561142788270ab450c02de836b3952ed37b4 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Sun, 23 Mar 2025 23:37:11 +0100 Subject: [PATCH 002/340] Various fixes in constraint framework - fix some edge cases for NOT_NULL - ability to specify constraints when altering a column - ensure constraint is specified on a column it is bound to - fix nullity check on map type - fix satistfiability check on function constraints patch by Stefan Miklosovic; reviewed by Bernardo Botella for CASSANDRA-20481 --- CHANGES.txt | 1 + src/antlr/Parser.g | 10 +- ...AbstractFunctionSatisfiabilityChecker.java | 27 ++- .../cql3/constraints/ColumnConstraint.java | 5 +- .../cql3/constraints/ColumnConstraints.java | 18 +- .../cql3/constraints/ConstraintFunction.java | 10 +- .../cql3/constraints/JsonConstraint.java | 9 +- .../cql3/constraints/NotNullConstraint.java | 9 +- .../constraints/UnaryConstraintFunction.java | 37 ++++ .../cql3/statements/UpdateStatement.java | 4 +- .../schema/AlterTableStatement.java | 19 +- .../schema/CreateTableStatement.java | 2 +- .../cassandra/db/marshal/AbstractType.java | 14 +- .../cassandra/db/marshal/CollectionType.java | 6 + .../apache/cassandra/db/marshal/MapType.java | 6 - .../cassandra/schema/TableMetadata.java | 4 +- ...ableWithTableConstraintValidationTest.java | 42 +++- .../ConstraintsSatisfiabilityTest.java | 17 +- ...WithColumnCqlConstraintValidationTest.java | 18 ++ .../constraints/JsonConstraintTest.java | 2 +- .../constraints/NotNullConstraintTest.java | 165 ++++++++++++++ .../cql3/ColumnSpecificationTest.java | 206 ++++++++++++++++++ .../db/marshal/AbstractTypeTest.java | 9 +- 23 files changed, 568 insertions(+), 72 deletions(-) create mode 100644 src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java create mode 100644 test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java create mode 100644 test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 0e155821c0ab..c1ee79f162d1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Various fixes in constraint framework (CASSANDRA-20481) * Add support in CAS for -= on numeric types, and fixed improper handling of empty bytes which lead to NPE (CASSANDRA-20477) * Do not fail to start a node with materialized views after they are turned off in config (CASSANDRA-20452) * Fix nodetool gcstats output, support human-readable units and more output formats (CASSANDRA-19022) diff --git a/src/antlr/Parser.g b/src/antlr/Parser.g index dd9eb181dfe7..c91dfe60d925 100644 --- a/src/antlr/Parser.g +++ b/src/antlr/Parser.g @@ -988,14 +988,14 @@ alterTableStatement returns [AlterTableStatement.Raw stmt] | K_ALTER ( K_IF K_EXISTS { $stmt.ifColumnExists(true); } )? id=cident ( mask=columnMask { $stmt.mask(id, mask); } + | constraints=columnConstraints { $stmt.constraint(id, constraints); } | K_DROP K_MASKED { $stmt.mask(id, null); } - | K_DROP K_CHECK { $stmt.constraint(id, null); } - | (constraints=columnConstraints) { $stmt.constraint(id, constraints); }) + | K_DROP K_CHECK { $stmt.constraint(id, null); }) | K_ADD ( K_IF K_NOT K_EXISTS { $stmt.ifColumnNotExists(true); } )? - ( id=ident v=comparatorType b=isStaticColumn (m=columnMask)? { $stmt.add(id, v, b, m); } - | ('(' id1=ident v1=comparatorType b1=isStaticColumn (m1=columnMask)? { $stmt.add(id1, v1, b1, m1); } - ( ',' idn=ident vn=comparatorType bn=isStaticColumn (mn=columnMask)? { $stmt.add(idn, vn, bn, mn); mn=null; } )* ')') ) + ( id=ident v=comparatorType b=isStaticColumn (m=columnMask)? (c=columnConstraints)? { $stmt.add(id, v, b, m, c); } + | ('(' id1=ident v1=comparatorType b1=isStaticColumn (m1=columnMask)? (c=columnConstraints)? { $stmt.add(id1, v1, b1, m1, c); } + ( ',' idn=ident vn=comparatorType bn=isStaticColumn (mn=columnMask)? (c=columnConstraints)? { $stmt.add(idn, vn, bn, mn, c); mn=null; c=null;} )* ')') ) | K_DROP ( K_IF K_EXISTS { $stmt.ifColumnExists(true); } )? ( id=ident { $stmt.drop(id); } diff --git a/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionSatisfiabilityChecker.java b/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionSatisfiabilityChecker.java index 91d448da012a..cebc5c36d3dd 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionSatisfiabilityChecker.java +++ b/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionSatisfiabilityChecker.java @@ -26,6 +26,8 @@ import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.functions.types.ParseUtils; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.utils.Pair; @@ -70,6 +72,8 @@ public void check(String functionName, List> constraints, Co */ abstract Pair, List> categorizeConstraints(List> constraints, String functionName); + abstract AbstractType returnType(ColumnMetadata columnMetadata); + private void checkSupportedOperators(List allConstraints, String functionName) { for (CONSTRAINT_TYPE constraint : allConstraints) @@ -147,11 +151,12 @@ else if (firstRelation == NEQ && secondRelation == NEQ) } else { - ByteBuffer firstTermBuffer = columnMetadata.type.fromString(ParseUtils.unquote(firstTerm)); - ByteBuffer secondTermBuffer = columnMetadata.type.fromString(ParseUtils.unquote(secondTerm)); + AbstractType returnType = returnType(columnMetadata); + ByteBuffer firstTermBuffer = returnType.fromString(ParseUtils.unquote(firstTerm)); + ByteBuffer secondTermBuffer = returnType.fromString(ParseUtils.unquote(secondTerm)); - boolean firstSatisfaction = firstRelation.isSatisfiedBy(columnMetadata.type, secondTermBuffer, firstTermBuffer); - boolean secondSatisfaction = secondRelation.isSatisfiedBy(columnMetadata.type, firstTermBuffer, secondTermBuffer); + boolean firstSatisfaction = firstRelation.isSatisfiedBy(returnType, secondTermBuffer, firstTermBuffer); + boolean secondSatisfaction = secondRelation.isSatisfiedBy(returnType, firstTermBuffer, secondTermBuffer); if (!firstSatisfaction || !secondSatisfaction) throw new InvalidConstraintDefinitionException(format("Constraints of %s are not satisfiable: %s %s %s, %s %s %s", @@ -186,6 +191,14 @@ public Pair, List> categori return Pair.create(scalars, notEqualScalars); } + + @Override + AbstractType returnType(ColumnMetadata metadata) + { + // function constraints will always have terms of int32 type + // unlike scalar constraints where it will be a type of column + return metadata.type; + } }; public static final AbstractFunctionSatisfiabilityChecker FUNCTION_SATISFIABILITY_CHECKER = new AbstractFunctionSatisfiabilityChecker<>() @@ -215,5 +228,11 @@ public Pair, List> cate return Pair.create(funnctionColumnConstraints, notEqualConstraints); } + + @Override + AbstractType returnType(ColumnMetadata columnMetadata) + { + return Int32Type.instance; + } }; } diff --git a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java index ddcca653ea30..eecc0b8ecce1 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java @@ -30,6 +30,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.utils.ByteBufferUtil; import static java.lang.String.format; @@ -116,8 +117,10 @@ public String fullName() */ public void evaluate(AbstractType valueType, ByteBuffer columnValue) throws ConstraintViolationException { - if (columnValue.capacity() == 0) + if (columnValue == ByteBufferUtil.EMPTY_BYTE_BUFFER) throw new ConstraintViolationException("Column value does not satisfy value constraint for column '" + columnName + "' as it is null."); + else if (valueType.isEmptyValueMeaningless() && columnValue.capacity() == 0) + throw new ConstraintViolationException("Column value does not satisfy value constraint for column '" + columnName + "' as it is empty."); internalEvaluate(valueType, columnValue); } diff --git a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java index 0acace098f46..21b119522866 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.TreeSet; +import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; @@ -108,7 +109,7 @@ public int getSize() // Checks if there is at least one constraint that will perform checks public boolean hasRelevantConstraints() { - for (ColumnConstraint c : constraints) + for (ColumnConstraint c : constraints) { if (c != ColumnConstraints.NO_OP) return true; @@ -120,9 +121,12 @@ public boolean hasRelevantConstraints() public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException { if (!columnMetadata.type.isConstrainable()) + { throw new InvalidConstraintDefinitionException("Constraint cannot be defined on the column " + columnMetadata.name + " of type " + columnMetadata.type.asCQL3Type() - + " for the table " + columnMetadata.ksName + "." + columnMetadata.cfName); + + " for the table " + columnMetadata.ksName + '.' + columnMetadata.cfName + '.' + + (columnMetadata.type.isCollection() ? " When using collections, constraints can be used only of frozen collections." : "")); + } // this will look at constraints as a whole, // checking if combinations of a particular constraint make sense (duplicities, satisfiability etc.). @@ -207,10 +211,18 @@ public Raw() this.constraints = Collections.emptyList(); } - public ColumnConstraints prepare() + public ColumnConstraints prepare(ColumnIdentifier column) { if (constraints.isEmpty()) return NO_OP; + + for (ColumnConstraint constraint : constraints) + { + if (constraint.columnName != null && !column.equals(constraint.columnName)) + throw new InvalidConstraintDefinitionException(format("Constraint %s was not specified on a column it operates on: %s but on: %s", + constraint, column.toCQLString(), constraint.columnName)); + } + return new ColumnConstraints(constraints); } } diff --git a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java index 9952ab32d94f..ad8424c3f563 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java @@ -25,6 +25,7 @@ import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; import static org.apache.cassandra.cql3.Operator.EQ; import static org.apache.cassandra.cql3.Operator.GT; @@ -55,8 +56,10 @@ public ConstraintFunction(ColumnIdentifier columnName, String name) */ public void evaluate(AbstractType valueType, Operator relationType, String term, ByteBuffer columnValue) throws ConstraintViolationException { - if (columnValue.capacity() == 0) + if (columnValue == ByteBufferUtil.EMPTY_BYTE_BUFFER) throw new ConstraintViolationException("Column value does not satisfy value constraint for column '" + columnName + "' as it is null."); + else if (valueType.isEmptyValueMeaningless() && columnValue.capacity() == 0) + throw new ConstraintViolationException("Column value does not satisfy value constraint for column '" + columnName + "' as it is empty."); internalEvaluate(valueType, relationType, term, columnValue); } @@ -88,10 +91,7 @@ public void validate(ColumnMetadata columnMetadata, String term) throws InvalidC * * @return list of operators this function is allowed to have. */ - public List getSupportedOperators() - { - return List.of(); - } + public abstract List getSupportedOperators(); /** * Tells what types of columns are supported by this constraint. diff --git a/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java index 99aeb6734e63..95fbac5b3c0a 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java @@ -31,7 +31,7 @@ import static java.lang.String.format; -public class JsonConstraint extends ConstraintFunction +public class JsonConstraint extends UnaryConstraintFunction { private static final List> SUPPORTED_TYPES = List.of(UTF8Type.instance, AsciiType.instance); @@ -39,12 +39,7 @@ public class JsonConstraint extends ConstraintFunction public JsonConstraint(ColumnIdentifier columnName) { - this(columnName, FUNCTION_NAME); - } - - public JsonConstraint(ColumnIdentifier columnName, String name) - { - super(columnName, name); + super(columnName, FUNCTION_NAME); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java index fb9f7de95b2d..af79086701f0 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java @@ -28,18 +28,13 @@ import static java.lang.String.format; -public class NotNullConstraint extends ConstraintFunction +public class NotNullConstraint extends UnaryConstraintFunction { public static final String FUNCTION_NAME = "NOT_NULL"; public NotNullConstraint(ColumnIdentifier columnName) { - this(columnName, FUNCTION_NAME); - } - - public NotNullConstraint(ColumnIdentifier columnName, String name) - { - super(columnName, name); + super(columnName, FUNCTION_NAME); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java b/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java new file mode 100644 index 000000000000..8696e81a65a7 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.constraints; + +import java.util.List; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.Operator; + +public abstract class UnaryConstraintFunction extends ConstraintFunction +{ + public UnaryConstraintFunction(ColumnIdentifier columnName, String name) + { + super(columnName, name); + } + + public List getSupportedOperators() + { + return List.of(); + } +} diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java index d8310cdd2f8d..f6ecda2b87e9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java @@ -371,9 +371,7 @@ public static void evaluateConstraintsForRow(Row row, TableMetadata metadata) public static void evaluateConstraint(ColumnMetadata columnMetadata, ByteBuffer cellData) { - for (ColumnConstraint constraint : columnMetadata.getColumnConstraints().getConstraints()) - { + for (ColumnConstraint constraint : columnMetadata.getColumnConstraints().getConstraints()) constraint.evaluate(columnMetadata.type, cellData); - } } } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index fc2ab582f754..94120ac63ce1 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -257,13 +257,16 @@ private static class Column private final boolean isStatic; @Nullable private final ColumnMask.Raw mask; + @Nullable + private final ColumnConstraints.Raw constraints; - Column(ColumnIdentifier name, CQL3Type.Raw type, boolean isStatic, @Nullable ColumnMask.Raw mask) + Column(ColumnIdentifier name, CQL3Type.Raw type, boolean isStatic, @Nullable ColumnMask.Raw mask, @Nullable ColumnConstraints.Raw constraints) { this.name = name; this.type = type; this.isStatic = isStatic; this.mask = mask; + this.constraints = constraints; } } @@ -311,6 +314,7 @@ private void addColumn(KeyspaceMetadata keyspace, AbstractType type = column.type.prepare(keyspaceName, keyspace.types).getType(); boolean isStatic = column.isStatic; ColumnMask mask = column.mask == null ? null : column.mask.prepare(keyspaceName, tableName, name, type, keyspace.userFunctions); + ColumnConstraints columnConstraints = column.constraints == null ? ColumnConstraints.NO_OP : column.constraints.prepare(name); if (null != tableBuilder.getColumn(name)) { if (!ifColumnNotExists) @@ -361,9 +365,9 @@ private void addColumn(KeyspaceMetadata keyspace, } if (isStatic) - tableBuilder.addStaticColumn(name, type, mask); + tableBuilder.addStaticColumn(name, type, mask, columnConstraints); else - tableBuilder.addRegularColumn(name, type, mask); + tableBuilder.addRegularColumn(name, type, mask, columnConstraints); if (!isStatic) { @@ -372,7 +376,8 @@ private void addColumn(KeyspaceMetadata keyspace, if (view.includeAllColumns) { ColumnMetadata viewColumn = ColumnMetadata.regularColumn(view.metadata, name.bytes, type) - .withNewMask(mask); + .withNewMask(mask) + .withNewColumnConstraints(columnConstraints); viewsBuilder.put(viewsBuilder.get(view.name()).withAddedRegularColumn(viewColumn)); } } @@ -732,7 +737,7 @@ public KeyspaceMetadata apply(Epoch epoch, KeyspaceMetadata keyspace, TableMetad if (column != null) { ColumnConstraints oldConstraints = column.getColumnConstraints(); - ColumnConstraints newConstraints = constraints == null ? ColumnConstraints.NO_OP : constraints.prepare(); + ColumnConstraints newConstraints = constraints == null ? ColumnConstraints.NO_OP : constraints.prepare(columnName); if (Objects.equals(oldConstraints, newConstraints)) return keyspace; newConstraints.validate(column); @@ -837,10 +842,10 @@ public void mask(ColumnIdentifier name, ColumnMask.Raw mask) rawMask = mask; } - public void add(ColumnIdentifier name, CQL3Type.Raw type, boolean isStatic, @Nullable ColumnMask.Raw mask) + public void add(ColumnIdentifier name, CQL3Type.Raw type, boolean isStatic, @Nullable ColumnMask.Raw mask, @Nullable ColumnConstraints.Raw constraints) { kind = Kind.ADD_COLUMNS; - addedColumns.add(new AddColumns.Column(name, type, isStatic, mask)); + addedColumns.add(new AddColumns.Column(name, type, isStatic, mask, constraints)); } public void drop(ColumnIdentifier name) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java index 997a40200077..b8e51d1286e5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java @@ -578,7 +578,7 @@ public void addColumn(ColumnIdentifier column, CQL3Type.Raw type, boolean isStat if (null == constraints) columnConstraints.put(column, ColumnConstraints.NO_OP); else - columnConstraints.put(column, constraints.prepare()); + columnConstraints.put(column, constraints.prepare(column)); } public void setCompactStorage() diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractType.java b/src/java/org/apache/cassandra/db/marshal/AbstractType.java index 5378a4cd3fba..b5156c4fdb8f 100644 --- a/src/java/org/apache/cassandra/db/marshal/AbstractType.java +++ b/src/java/org/apache/cassandra/db/marshal/AbstractType.java @@ -211,20 +211,12 @@ public void validate(V value, ValueAccessor accessor) throws MarshalExcep public void checkConstraints(ByteBuffer bytes, ColumnConstraints constraints) throws ConstraintViolationException { - if (constraints.isEmpty()) - return; - - T value = getSerializer().deserialize(bytes); - constraints.evaluate(this, bytes); + checkConstraints(bytes, constraints.getConstraints()); } - public void checkConstraints(ByteBuffer bytes, List constraints) throws ConstraintViolationException + public void checkConstraints(ByteBuffer bytes, List> constraints) throws ConstraintViolationException { - if (constraints.isEmpty()) - return; - - T value = getSerializer().deserialize(bytes); - for (ColumnConstraint constraint : constraints) + for (ColumnConstraint constraint : constraints) constraint.evaluate(this, bytes); } diff --git a/src/java/org/apache/cassandra/db/marshal/CollectionType.java b/src/java/org/apache/cassandra/db/marshal/CollectionType.java index 8c39dbab4f5b..3952ee137e0a 100644 --- a/src/java/org/apache/cassandra/db/marshal/CollectionType.java +++ b/src/java/org/apache/cassandra/db/marshal/CollectionType.java @@ -171,6 +171,12 @@ public boolean isFreezable() return true; } + @Override + public boolean isConstrainable() + { + return isFrozenCollection(); + } + public ByteBuffer serializeForNativeProtocol(Iterator> cells) { assert isMultiCell(); diff --git a/src/java/org/apache/cassandra/db/marshal/MapType.java b/src/java/org/apache/cassandra/db/marshal/MapType.java index f8ac6c00680e..69ea6d17e1d3 100644 --- a/src/java/org/apache/cassandra/db/marshal/MapType.java +++ b/src/java/org/apache/cassandra/db/marshal/MapType.java @@ -456,10 +456,4 @@ public ByteBuffer getElement(@Nullable ColumnData columnData, ByteBuffer keyOrIn return getSerializer().getSerializedValue(((Cell) columnData).buffer(), keyOrIndex, getValuesType()); } - - @Override - public boolean isConstrainable() - { - return false; - } } diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 9fdf5e821542..268111abd68f 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -204,7 +204,7 @@ public enum Kind // We cache the columns with constraints to avoid iterations over columns // Partition keys columns are evaluated separately, so we keep the two of them in // two different variables. - public final List partitionKeyConstraints; + public final List> partitionKeyConstraints; public final List columnsWithConstraints; public final List notNullColumns; @@ -248,7 +248,7 @@ else if (isIndex()) else ref = TableMetadataRef.withInitialReference(new TableMetadataRef(Schema.instance, keyspace, name, id), this); - List pkConstraints = new ArrayList<>(this.partitionKeyColumns.size()); + List> pkConstraints = new ArrayList<>(this.partitionKeyColumns.size()); for (ColumnMetadata column : this.partitionKeyColumns) { if (column.hasConstraint()) diff --git a/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java index d5f3f4a32c50..91f21f8b0922 100644 --- a/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java @@ -22,10 +22,8 @@ import org.apache.cassandra.exceptions.InvalidRequestException; - public class AlterTableWithTableConstraintValidationTest extends CqlConstraintValidationTester { - @Test public void testCreateTableWithColumnNamedConstraintDescribeTableNonFunction() throws Throwable { @@ -239,4 +237,44 @@ public void testCreateTableAddConstraintWithNonExistingColumn() throws Throwable String expectedErrorMessage = "Column 'foo' doesn't exist"; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "ALTER TABLE %s ALTER foo CHECK foo < 100"); } + + @Test + public void testAlterTableAlterExistingColumnWithCheckOnNonExistingColumn() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck1 text, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));"); + assertInvalidThrowMessage("Constraint ck3 < 100 was not specified on a column it operates on: ck1 but on: ck3", + InvalidRequestException.class, + "ALTER TABLE %s ALTER ck1 CHECK ck3 < 100"); + assertInvalidThrowMessage("Constraint NOT_NULL(ck3) was not specified on a column it operates on: ck1 but on: ck3", + InvalidRequestException.class, + "ALTER TABLE %s ALTER ck1 CHECK NOT_NULL(ck3)"); + assertInvalidThrowMessage("Constraint LENGTH(ck3) > 10 was not specified on a column it operates on: ck1 but on: ck3", + InvalidRequestException.class, + "ALTER TABLE %s ALTER ck1 CHECK LENGTH(ck3) > 10"); + } + + @Test + public void testAlterTableAddNewColumnWithCheckOnNonExistingColumn() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck1 text, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));"); + + assertInvalidThrowMessage("Constraint v3 < 100 was not specified on a column it operates on: v2 but on: v3", + InvalidRequestException.class, + "ALTER TABLE %s ADD v2 int CHECK v3 < 100"); + + assertInvalidThrowMessage("Constraint NOT_NULL(v3) was not specified on a column it operates on: v2 but on: v3", + InvalidRequestException.class, + "ALTER TABLE %s ADD v2 int CHECK NOT_NULL(v3)"); + + assertInvalidThrowMessage("Constraint LENGTH(v3) > 10 was not specified on a column it operates on: v2 but on: v3", + InvalidRequestException.class, + "ALTER TABLE %s ADD v2 int CHECK LENGTH(v3) > 10"); + } + + @Test + public void testAlterTableAddColumnWithCheck() + { + createTable("CREATE TABLE %s (pk text, col1 int, primary key (pk));"); + execute("ALTER TABLE %s ADD col2 int CHECK col2 > 0"); + } } diff --git a/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java b/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java index 6f087c5850b5..148aaa48a67f 100644 --- a/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java +++ b/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java @@ -71,13 +71,13 @@ private void run(QuadFunction quadFunction, ColumnMetadata columnMetadata if (op1 == NEQ) { // a_column != 0 and a_column != 10 -> valid - check(op1, 0, op2, 100, quadFunction, null, columnMetadata); + check(op1, 50, op2, 100, quadFunction, null, columnMetadata); // does not make sense to check twice // check a_column != 0 and a_column != 0 check(op1, 0, op2, 0, quadFunction, "There are duplicate constraint definitions on column", columnMetadata); } else - check(op1, 0, op2, 100, quadFunction, "There are duplicate constraint definitions on column", columnMetadata); + check(op1, 50, op2, 100, quadFunction, "There are duplicate constraint definitions on column", columnMetadata); } else if ((op1 == GT && op2 == GTE) || (op1 == GTE && op2 == GT) || @@ -85,18 +85,25 @@ else if ((op1 == GT && op2 == GTE) || (op1 == LTE && op2 == LT) || (op1 == EQ || op2 == EQ)) { - check(op1, 0, op2, 100, quadFunction, "not supported", columnMetadata); + check(op1, 50, op2, 100, quadFunction, "not supported", columnMetadata); } else if ((op1 == LTE && op2 == GT) || (op1 == LT && op2 == GT) || (op1 == LTE && op2 == GTE) || (op1 == LT && op2 == GTE)) { - check(op1, 0, op2, 100, quadFunction, "are not satisfiable", columnMetadata); + check(op1, 50, op2, 100, quadFunction, "are not satisfiable", columnMetadata); + } + else if ((op1 == GT && op2 == LTE) || + (op1 == GT && op2 == LT) || + (op1 == GTE && op2 == LTE) || + (op1 == GTE && op2 == LT)) + { + check(op1, 50, op2, 100, quadFunction, null, columnMetadata); } else if (!(op1 == NEQ || op2 == NEQ)) { - check(op1, 0, op2, 100, quadFunction, null, columnMetadata); + check(op1, 50, op2, 100, quadFunction, null, columnMetadata); } else { diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java index 6dc160468642..857ec85f408b 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java @@ -26,9 +26,11 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.cql3.constraints.InvalidConstraintDefinitionException; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.utils.Generators; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static accord.utils.Property.qt; @@ -1438,4 +1440,20 @@ public void testCreateTableWithColumnWithClusteringColumnLessThanScalarConstrain } }); } + + @Test + public void testCreateTableAddConstraintWithCheckOnNonExistingColumn() throws Throwable + { + assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 text CHECK NOT_NULL(ck3), ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) + .hasRootCauseMessage("Constraint NOT_NULL(ck3) was not specified on a column it operates on: ck1 but on: ck3") + .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); + + assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 int CHECK ck3 > 5, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) + .hasRootCauseMessage("Constraint ck3 > 5 was not specified on a column it operates on: ck1 but on: ck3") + .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); + + assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck3) > 10, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) + .hasRootCauseMessage("Constraint LENGTH(ck3) > 10 was not specified on a column it operates on: ck1 but on: ck3") + .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); + } } diff --git a/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java b/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java index 95db5b7604de..adf86093bc42 100644 --- a/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java +++ b/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java @@ -50,7 +50,7 @@ public void testJsonConstraint() throws Throwable run("{}"); run("{\"a\": 5, \"b\": \"1\", \"c\": [1,2,3]}"); run("nonsense", "Value for column 'a_column' violated JSON constraint as it is not a valid JSON."); - run("", "Column value does not satisfy value constraint for column 'a_column' as it is null."); + run("", "Value for column 'a_column' violated JSON constraint as it is not a valid JSON."); } @Test diff --git a/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java b/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java new file mode 100644 index 000000000000..61d24d850683 --- /dev/null +++ b/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.constraints; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.junit.Test; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.constraints.ColumnConstraints; +import org.apache.cassandra.cql3.constraints.ConstraintViolationException; +import org.apache.cassandra.cql3.constraints.FunctionColumnConstraint; +import org.apache.cassandra.cql3.constraints.InvalidConstraintDefinitionException; +import org.apache.cassandra.cql3.constraints.NotNullConstraint; +import org.apache.cassandra.cql3.constraints.ScalarColumnConstraint; +import org.apache.cassandra.cql3.constraints.UnaryFunctionColumnConstraint; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.schema.ColumnMetadata; + +import static java.util.List.of; +import static org.apache.cassandra.cql3.Operator.GT; +import static org.apache.cassandra.schema.ColumnMetadata.Kind.REGULAR; +import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * TODO - UDTs are not supported yet in constraints as such + */ +public class NotNullConstraintTest +{ + private static final ColumnIdentifier columnIdentifier = new ColumnIdentifier("a_column", false); + private static final ColumnConstraints unaryConstraint = new ColumnConstraints(of(new UnaryFunctionColumnConstraint.Raw(new ColumnIdentifier(NotNullConstraint.FUNCTION_NAME, false), columnIdentifier).prepare())); + private static final ColumnConstraints scalarConstraint = new ColumnConstraints(of(new ScalarColumnConstraint.Raw(columnIdentifier, GT, "5").prepare())); + private static final ColumnConstraints functionConstraint = new ColumnConstraints(of(new FunctionColumnConstraint.Raw(new ColumnIdentifier("LENGTH", false), columnIdentifier, GT, "5").prepare())); + + @Test + public void testNotNullConstraintValidation() + { + // unary + unaryConstraint.validate(getColumnOfType(UTF8Type.instance)); + assertThatThrownBy(() -> unaryConstraint.evaluate(UTF8Type.instance, EMPTY_BYTE_BUFFER)) + .hasMessage("Column value does not satisfy value constraint for column 'a_column' as it is null.") + .isInstanceOf(ConstraintViolationException.class); + + // not null / empty + unaryConstraint.evaluate(UTF8Type.instance, UTF8Type.instance.fromString("a value")); + + // scalar + scalarConstraint.validate(getColumnOfType(Int32Type.instance)); + assertThatThrownBy(() -> scalarConstraint.evaluate(Int32Type.instance, EMPTY_BYTE_BUFFER)) + .hasMessage("Column value does not satisfy value constraint for column 'a_column' as it is null.") + .isInstanceOf(ConstraintViolationException.class); + + // function, e.g. length + functionConstraint.validate(getColumnOfType(UTF8Type.instance)); + assertThatThrownBy(() -> functionConstraint.evaluate(UTF8Type.instance, EMPTY_BYTE_BUFFER)) + .hasMessage("Column value does not satisfy value constraint for column 'a_column' as it is null.") + .isInstanceOf(ConstraintViolationException.class); + + // empty string is not _null_ string so this passes + unaryConstraint.evaluate(UTF8Type.instance, UTF8Type.instance.fromString("")); + + // test a type for which empty value is meaningless + + assertThatThrownBy(() -> unaryConstraint.evaluate(UUIDType.instance, ByteBuffer.allocate(0))) + .hasMessage("Column value does not satisfy value constraint for column 'a_column' as it is empty.") + .isInstanceOf(ConstraintViolationException.class); + } + + @Test + public void testCollections() + { + checkList(false); + checkSet(false); + checkMap(false); + + checkList(true); + checkSet(true); + checkMap(true); + } + + private static ColumnMetadata getColumnOfType(AbstractType type) + { + return new ColumnMetadata("a", "b", columnIdentifier, type, -1, REGULAR, null); + } + + private void checkList(boolean frozen) + { + if (frozen) + { + ListType listType = ListType.getInstance(Int32Type.instance, false); + ByteBuffer payload = listType.getSerializer().serialize(List.of(1, 2, 3)); + checkFrozenCollection(listType, payload); + } + else + checkUnfrozenCollection(ListType.getInstance(Int32Type.instance, true)); + } + + private void checkMap(boolean frozen) + { + if (frozen) + { + MapType mapType = MapType.getInstance(Int32Type.instance, Int32Type.instance, false); + ByteBuffer payload = mapType.getSerializer().serialize(Map.of(1, 1, 2, 2, 3, 3)); + checkFrozenCollection(mapType, payload); + } + else + checkUnfrozenCollection(MapType.getInstance(Int32Type.instance, Int32Type.instance, true)); + } + + private void checkSet(boolean frozen) + { + if (frozen) + { + SetType setType = SetType.getInstance(Int32Type.instance, false); + ByteBuffer payload = setType.getSerializer().serialize(Set.of(1, 2, 3)); + checkFrozenCollection(setType, payload); + } + else + checkUnfrozenCollection(SetType.getInstance(Int32Type.instance, true)); + } + + private void checkFrozenCollection(AbstractType type, ByteBuffer payload) + { + unaryConstraint.validate(getColumnOfType(type)); + unaryConstraint.evaluate(type, payload); + + assertThatThrownBy(() -> unaryConstraint.evaluate(type, EMPTY_BYTE_BUFFER)) + .hasMessage("Column value does not satisfy value constraint for column 'a_column' as it is null.") + .isInstanceOf(ConstraintViolationException.class); + } + + private void checkUnfrozenCollection(AbstractType type) + { + assertThatThrownBy(() -> unaryConstraint.validate(getColumnOfType(type))) + .hasMessageContaining("Constraint cannot be defined on the column") + .hasMessageContaining("When using collections, constraints can be used only of frozen collections") + .isInstanceOf(InvalidConstraintDefinitionException.class); + } +} diff --git a/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java b/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java new file mode 100644 index 000000000000..8513a5440023 --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3; + +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.schema.CompactionParams; + +/** + * Test various "extensions" to a column spec when altering / creating a table + */ +public class ColumnSpecificationTest extends CQLTester +{ + @Before + public void before() + { + DatabaseDescriptor.setDynamicDataMaskingEnabled(true); + } + + @Test + public void testCreateTableWithColumnHavingMaskBeforeCheck() + { + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAlterTableAlterColumnWithMaskAndCheckStandalone() + { + createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); + execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default()"); + execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1;"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAlterTableAlterColumnWithMask() + { + createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); + execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default()"); + verifyColumnSpec("name text MASKED WITH system.mask_default()"); + } + + @Test + public void testAlterTableAlterColumnWithCheck() + { + createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); + execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1;"); + verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAddingCheckToColumnWithMask() + { + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default());"); + execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAddingMaskToColumnWithCheck() + { + createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default()"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testDroppingCheckKeepsMask() + { + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + execute("ALTER TABLE %s ALTER name DROP CHECK"); + verifyColumnSpec("name text MASKED WITH system.mask_default()"); + } + + @Test + public void droppingMaskKeepsCheck() + { + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + execute("ALTER TABLE %s ALTER name DROP MASKED"); + verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAlterTableAddColumnWithCheck() + { + createTable("CREATE TABLE %s (pk text primary key);"); + execute("ALTER TABLE %s ADD name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + @Test + public void testAlterTableAddColumnWithMask() + { + createTable("CREATE TABLE %s (pk text primary key);"); + execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default()"); + verifyColumnSpec("name text MASKED WITH system.mask_default()"); + } + + @Test + public void testAlterTableAddColumnWithMaskAndCheck() + { + createTable("CREATE TABLE %s (pk text primary key);"); + execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT_NULL(name)"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name)"); + } + + @Test + public void testAlterTableAddColumnWithMaskAndMultipleChecks() + { + createTable("CREATE TABLE %s (pk text primary key);"); + execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + /** + * TODO - investigate if it is possible to specify checks before mask when creating a table + */ + @Test(expected = RuntimeException.class) + public void testFailingCreateTableWithColumnHavingMaskAfterCheck() + { + createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT_NULL(name) AND LENGTH(name) > 1 MASKED WITH system.mask_default());"); + } + + /** + * TODO - investigate if it is possible to specify both check and mask, check being first + */ + @Test(expected = RuntimeException.class) + public void testFailingAlterTableAlterColumnWithCheckAndMask() + { + createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); + execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1 MASKED WITH system.mask_default();"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + /** + * TODO - investigate if it is possible to specify both check and mask, mask being first + */ + @Test(expected = RuntimeException.class) + public void testFailingAlterTableAlterColumnWithMaskAndCheck() + { + createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); + execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + } + + private void verifyColumnSpec(String modifiedColumn) + { + assertRowsContains(executeNetWithoutPaging("DESCRIBE TABLE " + KEYSPACE + '.' + currentTable()), + row(KEYSPACE, + "table", + currentTable(), + "CREATE TABLE " + KEYSPACE + '.' + currentTable() + " (\n" + + " pk text PRIMARY KEY,\n" + + " " + modifiedColumn + '\n' + + ") WITH " + tableParametersCql())); + } + + static String tableParametersCql() + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p';"; + } + + private static String cqlQuoted(Map map) + { + return new CqlBuilder().append(map).toString(); + } +} diff --git a/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java b/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java index 5cb051b35e77..630c322fce3a 100644 --- a/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java @@ -263,10 +263,15 @@ private boolean isTestType(Class klass) public void isConstrainedTest() { qt().forAll(genBuilder().build()).checkAssert(type -> { - if (type instanceof MapType || type instanceof TupleType || type instanceof AbstractCompositeType) + if (type instanceof TupleType || type instanceof AbstractCompositeType) assertThat(type.isConstrainable()).isEqualTo(false); else - assertThat(type.isConstrainable()).isEqualTo(true); + { + if (type.isCollection() && !type.isFrozenCollection()) + assertThat(type.isConstrainable()).isEqualTo(false); + else + assertThat(type.isConstrainable()).isEqualTo(true); + } }); } From 73f0e2e4017397bfdcf16b7e333c1279ada57c74 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Tue, 1 Apr 2025 13:03:24 +0200 Subject: [PATCH 003/340] Suppress CVE-2025-25193 patch by Stefan Miklosovic; reviewed by Michael Semb Wever, Brandon Williams for CASSANDRA-20504 --- .build/dependency-check-suppressions.xml | 1 + .snyk | 2 ++ CHANGES.txt | 1 + 3 files changed, 4 insertions(+) diff --git a/.build/dependency-check-suppressions.xml b/.build/dependency-check-suppressions.xml index 4cdcc9149c7c..70e1f8716493 100644 --- a/.build/dependency-check-suppressions.xml +++ b/.build/dependency-check-suppressions.xml @@ -50,6 +50,7 @@ CVE-2022-41881 CVE-2023-34462 CVE-2023-44487 + CVE-2025-25193 diff --git a/.snyk b/.snyk index 66d72fb74c58..50af01c673d7 100644 --- a/.snyk +++ b/.snyk @@ -52,3 +52,5 @@ ignore: - reason: Suppressed due to internal review, see project's .build/dependency-check-suppressions.xml CVE-2024-12801: - reason: Suppressed due to internal review, see project's .build/dependency-check-suppressions.xml + CVE-2025-25193: + - reason: netty's http stuff is not applicable here -- ^pkg:maven/io\.netty/netty\-all@.*$ diff --git a/CHANGES.txt b/CHANGES.txt index a1d17c3e5f5b..da5ad11d3aec 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Suppress CVE-2025-25193 (CASSANDRA-20504) * Include in source tree and build packages a Snyk policy file that lists known false positives (CASSANDRA-20319) * Update zstd-jni to 1.5.7-2 (CASSANDRA-20453) * Suppress CVE-2024-12801 (CASSANDRA-20412) From b01274d6f674e4f56811d800b6d6aebc0f5ee66c Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Wed, 2 Apr 2025 15:34:19 -0400 Subject: [PATCH 004/340] CASSANDRA-20496 - Add CQLSSTableWriter tests for Vectors and Constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New features in the storage engine need to be appropriately tested in the CQLSSTableWriter code so they can be used in the Analytics library and others utilizing the writer library. Add a test for support of Vector data types and constraints to ensure they work with the CQLSSTableWriter and make any changes necessary if they don’t. patch by Doug Rohrer; reviewed by Bernardo Botella, Stefan Miklosovic for CASSANDRA-20496 --- .../io/sstable/CQLSSTableWriterTest.java | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java index 45de365a6ced..b7b67ba9bde7 100644 --- a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java @@ -39,6 +39,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.cql3.constraints.ConstraintViolationException; + import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; @@ -54,6 +57,7 @@ import org.apache.cassandra.cql3.functions.types.TypeCodec; import org.apache.cassandra.cql3.functions.types.UDTValue; import org.apache.cassandra.cql3.functions.types.UserType; +import org.apache.cassandra.db.marshal.FloatType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; @@ -77,6 +81,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JavaDriverUtils; import org.apache.cassandra.utils.OutputHandler; +import org.assertj.core.api.Assertions; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.junit.Assert.assertEquals; @@ -1578,6 +1583,83 @@ public void testSkipBuildingIndexesWithSAI() throws Exception assertFalse(indexDescriptor.isPerColumnIndexBuildComplete(new IndexIdentifier(keyspace, table, "idx2"))); } + @Test + public void testWritingVectorData() throws Exception + { + final String schema = "CREATE TABLE " + qualifiedTable + " (" + + " k int," + + " v1 VECTOR," + + " PRIMARY KEY (k)" + + ")"; + + CQLSSTableWriter writer = CQLSSTableWriter.builder() + .inDirectory(dataDir) + .forTable(schema) + .using("INSERT INTO " + keyspace + "." + table + " (k, v1) " + + "VALUES (?, ?)").build(); + + for (int i = 0; i < 100; i++) + { + writer.addRow(i, List.of( (float)i, (float)i, (float)i, (float)i, (float)i)); + } + + writer.close(); + loadSSTables(dataDir, keyspace, table); + + if (verifyDataAfterLoading) + { + UntypedResultSet resultSet = QueryProcessor.executeInternal("SELECT * FROM " + keyspace + "." + table); + + assertEquals(resultSet.size(), 100); + int cnt = 0; + for (UntypedResultSet.Row row : resultSet) + { + assertEquals(cnt, row.getInt("k")); + List vector = row.getVector("v1", FloatType.instance, 5); + Assertions.assertThat(vector).hasSize(5); + final float floatCount = (float)cnt; + Assertions.assertThat(vector).allMatch(val -> val == floatCount); + cnt++; + } + } + } + + @Test + public void testConstraintViolation() throws Exception + { + final String schema = "CREATE TABLE " + qualifiedTable + " (" + + " k int," + + " v1 int CHECK v1 < 5 ," + + " PRIMARY KEY (k)" + + ")"; + + CQLSSTableWriter writer = CQLSSTableWriter.builder() + .inDirectory(dataDir) + .forTable(schema) + .using("INSERT INTO " + keyspace + "." + table + " (k, v1) " + + "VALUES (?, ?)").build(); + + writer.addRow(1, 4); + + Assertions.assertThatThrownBy(() -> writer.addRow(2, 11)) + .describedAs("Should throw when adding a row that violates constraints") + .isInstanceOf(ConstraintViolationException.class) + .hasMessageContaining("Column value does not satisfy value constraint for column 'v1'. It should be v1 < 5"); + + writer.close(); + loadSSTables(dataDir, keyspace, table); + + if (verifyDataAfterLoading) + { + UntypedResultSet resultSet = QueryProcessor.executeInternal("SELECT * FROM " + keyspace + "." + table); + + assertEquals(resultSet.size(), 1); + UntypedResultSet.Row row = resultSet.one(); + assertEquals(1, row.getInt("k")); + assertEquals(4, row.getInt("v1")); + } + } + protected static void loadSSTables(File dataDir, final String ks, final String tb) throws ExecutionException, InterruptedException { SSTableLoader loader = new SSTableLoader(dataDir, new SSTableLoader.Client() From 27a6ef3ea8ba7362c8cbe2b6593341058e1b3153 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Tue, 1 Apr 2025 13:33:49 +0200 Subject: [PATCH 005/340] Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final patch by Stefan Miklosovic; reviewed by Dmitry Konstantinov for CASSANDRA-20314 --- .build/build-resolver.xml | 4 ++-- .build/parent-pom-template.xml | 10 +++++----- CHANGES.txt | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.build/build-resolver.xml b/.build/build-resolver.xml index 55718377e244..09263d42aa6e 100644 --- a/.build/build-resolver.xml +++ b/.build/build-resolver.xml @@ -275,8 +275,8 @@ - - + + diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml index 5e873eae2bb8..cb15badbb954 100644 --- a/.build/parent-pom-template.xml +++ b/.build/parent-pom-template.xml @@ -729,7 +729,7 @@ io.netty netty-all - 4.1.96.Final + 4.1.119.Final io.netty @@ -800,7 +800,7 @@ io.netty netty-tcnative-boringssl-static - 2.0.61.Final + 2.0.70.Final org.bouncycastle @@ -823,18 +823,18 @@ io.netty netty-transport-native-epoll - 4.1.96.Final + 4.1.119.Final io.netty netty-transport-native-epoll - 4.1.96.Final + 4.1.119.Final linux-x86_64 io.netty netty-transport-native-epoll - 4.1.96.Final + 4.1.119.Final linux-aarch_64 diff --git a/CHANGES.txt b/CHANGES.txt index c9dbd21a4f98..31ad210e4e3e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.4 + * Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final (CASSANDRA-20314) * Serialization can lose complex deletions in a mutation with multiple collections in a row (CASSANDRA-20449) * Improve error messages when initializing auth classes (CASSANDRA-20368) * Prioritize legacy 2i over SAI for columns with multiple indexes (CASSANDRA-20334) From 51cf55747bbc6a5a6bd9e7e66db6a1a4f1588094 Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Mon, 31 Mar 2025 19:08:53 +0200 Subject: [PATCH 006/340] Update OWASP dependency checker to version 12.1.0 patch by Doug Rohrer; reviewed by Stefan Miklosovic for CASSANDRA-20501 --- .build/build-owasp.xml | 4 ++-- CHANGES.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.build/build-owasp.xml b/.build/build-owasp.xml index 3b4a5a62988b..5d5999f43713 100644 --- a/.build/build-owasp.xml +++ b/.build/build-owasp.xml @@ -19,7 +19,7 @@ - + @@ -34,7 +34,7 @@ unless="dependency-check-ant.archive.present"> - diff --git a/CHANGES.txt b/CHANGES.txt index da5ad11d3aec..0f9e790dc74e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) * Suppress CVE-2025-25193 (CASSANDRA-20504) * Include in source tree and build packages a Snyk policy file that lists known false positives (CASSANDRA-20319) * Update zstd-jni to 1.5.7-2 (CASSANDRA-20453) From a449a4f76baf41b4707f177df208131589f981bb Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Fri, 21 Mar 2025 15:45:17 -0400 Subject: [PATCH 007/340] PaxosCleanupLocalCoordinator wait for transaction timeout before repairing Patch by Ariel Weisberg; Reviewed by Benedict Elliott Smith for CASSANDRA-20469 --- CHANGES.txt | 1 + .../org/apache/cassandra/config/Config.java | 2 ++ .../cassandra/config/DatabaseDescriptor.java | 20 ++++++++--- .../cassandra/service/StorageService.java | 14 +++++++- .../service/StorageServiceMBean.java | 4 +++ .../cleanup/PaxosCleanupLocalCoordinator.java | 34 +++++++++++++++++-- 6 files changed, 67 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f4e934ab168e..04a146aa0237 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Fix Paxos repair interrupts running transactions (CASSANDRA-20469) * Various fixes in constraint framework (CASSANDRA-20481) * Add support in CAS for -= on numeric types, and fixed improper handling of empty bytes which lead to NPE (CASSANDRA-20477) * Do not fail to start a node with materialized views after they are turned off in config (CASSANDRA-20452) diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 2ec1d78e301f..2943c973ed7e 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -1467,4 +1467,6 @@ public enum CQLStartTime // 3.x Cassandra Driver has its "read" timeout set to 12 seconds, default matches this. public DurationSpec.LongMillisecondsBound native_transport_timeout = new DurationSpec.LongMillisecondsBound("12s"); public boolean enforce_native_deadline_for_hints = false; + + public boolean paxos_repair_race_wait = true; } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index f6fd1b52ff44..6afcabf5fb7e 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -61,7 +61,6 @@ import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.RateLimiter; - import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -105,13 +104,13 @@ import org.apache.cassandra.locator.EndpointSnitchInfo; import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.Locator; -import org.apache.cassandra.locator.LocationInfo; import org.apache.cassandra.locator.InitialLocationProvider; +import org.apache.cassandra.locator.LocationInfo; +import org.apache.cassandra.locator.Locator; import org.apache.cassandra.locator.NodeAddressConfig; +import org.apache.cassandra.locator.NodeProximity; import org.apache.cassandra.locator.ReconnectableSnitchHelper; import org.apache.cassandra.locator.SeedProvider; -import org.apache.cassandra.locator.NodeProximity; import org.apache.cassandra.locator.SnitchAdapter; import org.apache.cassandra.security.AbstractCryptoProvider; import org.apache.cassandra.security.EncryptionContext; @@ -129,8 +128,8 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.ALLOCATE_TOKENS_FOR_KEYSPACE; import static org.apache.cassandra.config.CassandraRelevantProperties.ALLOW_UNLIMITED_CONCURRENT_VALIDATIONS; import static org.apache.cassandra.config.CassandraRelevantProperties.AUTO_BOOTSTRAP; -import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_LOADER; import static org.apache.cassandra.config.CassandraRelevantProperties.CHRONICLE_ANALYTICS_DISABLE; +import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_LOADER; import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_STCS_IN_L0; import static org.apache.cassandra.config.CassandraRelevantProperties.INITIAL_TOKEN; import static org.apache.cassandra.config.CassandraRelevantProperties.IO_NETTY_TRANSPORT_ESTIMATE_SIZE_ON_SUBMIT; @@ -5574,4 +5573,15 @@ public static void setPurgeableTobmstonesMetricGranularity(Config.TombstonesMetr { conf.tombstone_read_purgeable_metric_granularity = granularity; } + + public static boolean getPaxosRepairRaceWait() + { + return conf.paxos_repair_race_wait; + } + + @VisibleForTesting + public static void setPaxosRepairRaceWait(boolean paxosRepairRaceWait) + { + conf.paxos_repair_race_wait = paxosRepairRaceWait; + } } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 812dc31cd492..e8a3b40a86d3 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -142,11 +142,11 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.LocalStrategy; import org.apache.cassandra.locator.MetaStrategy; +import org.apache.cassandra.locator.NodeProximity; import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.locator.RangesByEndpoint; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.Replicas; -import org.apache.cassandra.locator.NodeProximity; import org.apache.cassandra.locator.SnitchAdapter; import org.apache.cassandra.locator.SystemReplicas; import org.apache.cassandra.metrics.Sampler; @@ -5541,4 +5541,16 @@ public void setPrioritizeSAIOverLegacyIndex(boolean value) { DatabaseDescriptor.setPrioritizeSAIOverLegacyIndex(value); } + + @Override + public void setPaxosRepairRaceWait(boolean paxosRepairRaceWait) + { + DatabaseDescriptor.setPaxosRepairRaceWait(paxosRepairRaceWait); + } + + @Override + public boolean getPaxosRepairRaceWait() + { + return DatabaseDescriptor.getPaxosRepairRaceWait(); + } } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index 7760ea01883e..b738ecd48678 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1356,4 +1356,8 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e boolean getPrioritizeSAIOverLegacyIndex(); void setPrioritizeSAIOverLegacyIndex(boolean value); + + void setPaxosRepairRaceWait(boolean paxosRepairCoordinatorWait); + + boolean getPaxosRepairRaceWait(); } diff --git a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java index a53fec3e6081..7e5935f03d4a 100644 --- a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java +++ b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java @@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,9 +42,15 @@ import org.apache.cassandra.service.paxos.PaxosState; import org.apache.cassandra.service.paxos.uncommitted.UncommittedPaxosKey; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.concurrent.AsyncFuture; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.config.DatabaseDescriptor.getCasContentionTimeout; +import static org.apache.cassandra.config.DatabaseDescriptor.getWriteRpcTimeout; import static org.apache.cassandra.service.paxos.cleanup.PaxosCleanupSession.TIMEOUT_NANOS; public class PaxosCleanupLocalCoordinator extends AsyncFuture @@ -134,8 +141,10 @@ private void scheduleKeyRepairsOrFinish() return; } + long txnTimeoutMicros = Math.max(getCasContentionTimeout(MICROSECONDS), getWriteRpcTimeout(MICROSECONDS)); + boolean waitForCoordinator = DatabaseDescriptor.getPaxosRepairRaceWait(); while (inflight.size() < parallelism && uncommittedIter.hasNext()) - repairKey(uncommittedIter.next()); + repairKey(uncommittedIter.next(), txnTimeoutMicros, waitForCoordinator); } @@ -143,7 +152,7 @@ private void scheduleKeyRepairsOrFinish() finish(); } - private boolean repairKey(UncommittedPaxosKey uncommitted) + private boolean repairKey(UncommittedPaxosKey uncommitted, long txnTimeoutMicros, boolean waitForCoordinator) { logger.trace("repairing {}", uncommitted); Preconditions.checkState(!inflight.containsKey(uncommitted.getKey())); @@ -154,6 +163,9 @@ private boolean repairKey(UncommittedPaxosKey uncommitted) if (consistency == null) return false; + if (waitForCoordinator) + maybeWaitForOriginalCoordinator(uncommitted, txnTimeoutMicros); + inflight.put(uncommitted.getKey(), tableRepairs.startOrGetOrQueue(uncommitted.getKey(), uncommitted.ballot(), uncommitted.getConsistencyLevel(), table, result -> { if (result.wasSuccessful()) onKeyFinish(uncommitted.getKey()); @@ -163,6 +175,24 @@ private boolean repairKey(UncommittedPaxosKey uncommitted) return true; } + /** + * Wait to repair things that are still potentially executing at the original coordinator to avoid + * causing timeouts. This should only have to happen at most a few times when the repair starts + */ + private static void maybeWaitForOriginalCoordinator(UncommittedPaxosKey uncommitted, long txnTimeoutMicros) + { + long nowMicros = MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + long ballotElapsedMicros = nowMicros - uncommitted.ballot().unixMicros(); + if (ballotElapsedMicros < 0 && Math.abs(ballotElapsedMicros) > SECONDS.toMicros(1)) + logger.warn("Encountered ballot that is more than 1 second in the future, is there a clock sync issue? {}", uncommitted.ballot()); + if (ballotElapsedMicros < txnTimeoutMicros) + { + long sleepMicros = txnTimeoutMicros - ballotElapsedMicros; + logger.info("Paxos auto repair encountered a potentially in progress ballot, sleeping {}us to allow the in flight operation to finish", sleepMicros); + Uninterruptibles.sleepUninterruptibly(sleepMicros, MICROSECONDS); + } + } + private synchronized void onKeyFinish(DecoratedKey key) { if (!inflight.containsKey(key)) From 5aadbc62f021b7ac1880c7e84ab176cca01d1889 Mon Sep 17 00:00:00 2001 From: "Schoening, Brad" Date: Tue, 25 Mar 2025 16:21:19 -0400 Subject: [PATCH 008/340] CASSANDRA-20117: fixed typos in NTR spec and SASI documents --- doc/SASI.md | 14 +++--- doc/native_protocol_v3.spec | 44 ++++++++--------- doc/native_protocol_v4.spec | 58 +++++++++++----------- doc/native_protocol_v5.spec | 96 ++++++++++++++++++------------------- 4 files changed, 106 insertions(+), 106 deletions(-) diff --git a/doc/SASI.md b/doc/SASI.md index fc38845ce2cd..c7bf17391846 100644 --- a/doc/SASI.md +++ b/doc/SASI.md @@ -199,7 +199,7 @@ cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi SASI supports queries with multiple predicates, however, due to the nature of the default indexing implementation, CQL requires the user -to specify `ALLOW FILTERING` to opt-in to the potential performance +to specify `ALLOW FILTERING` to opt in to the potential performance pitfalls of such a query. With SASI, while the requirement to include `ALLOW FILTERING` remains, to reduce modifications to the grammar, the performance pitfalls do not exist because filtering is not @@ -383,7 +383,7 @@ of the memtable to disk -- this is the origin of the name "SSTable Attached Secondary Index". The SASI index data structures are built in memory as the SSTable is -being written and they are flushed to disk before the writing of the +being written, and they are flushed to disk before the writing of the SSTable completes. The writing of each index file only requires sequential writes to disk. In some cases, partial flushes are performed, and later stitched back together, to reduce memory @@ -467,7 +467,7 @@ collision. To optimize for its write-once environment the [`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java) -completely loads its interior nodes as the tree is built and it uses +completely loads its interior nodes as the tree is built, and it uses the well-known algorithm optimized for bulk-loading the data structure. @@ -562,7 +562,7 @@ been found, or there is no more matching data, the result set is returned to the coordinator through the existing internal components. The number of queries (total/failed/timed-out), and their latencies, -are maintined per-table/column family. +are maintained per-table/column family. SASI also supports concurrently iterating terms for the same index across SSTables. The concurrency factor is controlled by the @@ -713,7 +713,7 @@ the documentation The abstract `RangeIterator` class provides a unified interface over the two main operations performed by SASI at various layers in the execution path: set intersection and union. These operations are -performed in a iterated, or "streaming", fashion to prevent unneeded +performed in an iterated, or "streaming", fashion to prevent unneeded reads of elements from either set. In both the intersection and union cases the algorithms take advantage of the data being pre-sorted using the same sort order, e.g. term or token order. @@ -725,7 +725,7 @@ performs the "Merge-Join" portion of the algorithm, with the properties of an outer-join, or union. It is implemented with several optimizations to improve its performance over a large number of iterators -- sets to union. Specifically, the -iterator exploits the likely case of the data having many sub-groups +iterator exploits the likely case of the data having many subgroups of overlapping ranges and the unlikely case that all ranges will overlap each other. For more details see the [javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21). @@ -742,7 +742,7 @@ between them based on some properties of the data. the [`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java) in that it performs a "Merge-Join", however, its nature is similar to -a inner-join, where like values are merged by a data-specific merge +an inner-join, where like values are merged by a data-specific merge function (e.g. merging two tokens in a list to lookup in a SSTable later). See the [javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101) diff --git a/doc/native_protocol_v3.spec b/doc/native_protocol_v3.spec index 30881c949790..a104993367f4 100644 --- a/doc/native_protocol_v3.spec +++ b/doc/native_protocol_v3.spec @@ -228,7 +228,7 @@ Table of Contents representing the port. [consistency] A consistency level specification. This is a [short] representing a consistency level with the following - correspondance: + correspondence: 0x0000 ANY 0x0001 ONE 0x0002 TWO @@ -267,7 +267,7 @@ Table of Contents The body is a [string map] of options. Possible options are: - "CQL_VERSION": the version of CQL to use. This option is mandatory and - currenty, the only version supported is "3.0.0". Note that this is + currently, the only version supported is "3.0.0". Note that this is different from the protocol version. - "COMPRESSION": the compression algorithm to use for frames (See section 5). This is optional, if not specified no compression will be used. @@ -316,8 +316,8 @@ Table of Contents values are provided. Those value are used for bound variables in the query. Optionally, if the 0x40 flag is present, each value will be preceded by a [string] name, representing the name of - the marker the value must be binded to. This is optional, and - if not present, values will be binded by position. + the marker the value must be bound to. This is optional, and + if not present, values will be bound by position. 0x02: Skip_metadata. If present, the Result Set returned as a response to that query (if any) will have the NO_METADATA flag (see Section 4.2.5.2). @@ -332,8 +332,8 @@ Table of Contents started (See Section 8 for more details). 0x10: With serial consistency. If present, should be present. is the [consistency] level for the - serial phase of conditional updates. That consitency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + either SERIAL or LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else that a conditional update/insert. 0x20: With default timestamp. If present, should be present. @@ -400,8 +400,8 @@ Table of Contents flags are, given there mask: 0x10: With serial consistency. If present, should be present. is the [consistency] level for the - serial phase of conditional updates. That consitency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + either SERIAL or LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else that a conditional update/insert. 0x20: With default timestamp. If present, should be present. @@ -435,8 +435,8 @@ Table of Contents - is the [consistency] level for the operation. - is only present if the 0x10 flag is set. In that case, is the [consistency] level for the serial phase of - conditional updates. That consitency can only be either SERIAL or - LOCAL_SERIAL and if not present will defaults to SERIAL. This option will + conditional updates. Consistency can be either SERIAL or + LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else that a conditional update/insert. The server will respond with a RESULT message. @@ -461,7 +461,7 @@ Table of Contents This section describes the content of the frame body for the different responses. Please note that to make room for future evolution, clients should - support extra informations (that they should simply discard) to the one + support extra information (that they should simply discard) to the one described in this document at the end of the frame body. 4.2.1. ERROR @@ -488,7 +488,7 @@ Table of Contents The authentication is SASL based and thus consists on a number of server challenges (AUTH_CHALLENGE, Section 4.2.7) followed by client responses - (AUTH_RESPONSE, Section 4.1.2). The Initial exchange is however boostrapped + (AUTH_RESPONSE, Section 4.1.2). The Initial exchange is however bootstrapped by an initial client response. The details of that exchange (including how much challenge-response pair are required) are specific to the authenticator in use. The exchange ends when the server sends an AUTH_SUCCESS message or @@ -541,7 +541,7 @@ Table of Contents [][?...] where: - is an [int]. The bits of provides information on the - formatting of the remaining informations. A flag is set if the bit + formatting of the remaining information. A flag is set if the bit corresponding to its `mask` is set. Supported flags are, given there mask: 0x0001 Global_tables_spec: if set, only one table spec (keyspace @@ -555,7 +555,7 @@ Table of Contents this query (See Section 8 for more details). 0x0004 No_metadata: if set, the is only composed of these , the and optionally the - (depending on the Has_more_pages flage) but + (depending on the Has_more_pages flag) but no other information (so no nor ). This will only ever be the case if this was requested during the query (see QUERY and RESULT messages). @@ -567,8 +567,8 @@ Table of Contents (unique) keyspace name and table name the columns return are of. - specifies the columns returned in the query. There is such column specifications that are composed of: - ()? - The initial and are two [string] are only present + ()? + The initial and are two [string] are only present if the Global_tables_spec flag is not set. The is a [string] and is an [option] that correspond to the description (what this description is depends a bit on the context: in results to @@ -608,7 +608,7 @@ Table of Contents - is a [string] representing the keyspace name this UDT is part of. - is a [string] representing the UDT name. - - is a [short] reprensenting the number of fields of + - is a [short] representing the number of fields of the UDT, and thus the number of pair following - is a [string] representing the name of the @@ -657,7 +657,7 @@ Table of Contents Note that prepared query ID return is global to the node on which the query has been prepared. It can be used on any connection to that node and this - until the node is restarted (after which the query must be reprepared). + until the node is restarted (after which the query must be re-prepared). 4.2.5.5. Schema_change @@ -759,7 +759,7 @@ Table of Contents bytes). - snappy (https://code.google.com/p/snappy/). This compression might not be available as it depends on a native lib (server-side) that might not be - avaivable on some installation. + available on some installation. 6. Data Type Serialization Formats @@ -981,7 +981,7 @@ Table of Contents is an [int] representing the number of replica whose acknowledgement is required to achieve . is a [string] that describe the type of the write - that timeouted. The value of that string can be one + that timed out. The value of that string can be one of: - "SIMPLE": the write was a non-batched non-counter write. @@ -993,10 +993,10 @@ Table of Contents batch. Not batch log write has been attempted. - "COUNTER": the write was a counter write (batched or not). - - "BATCH_LOG": the timeout occured during the + - "BATCH_LOG": the timeout occurred during the write to the batch log when a (logged) batch write was requested. - - "CAS": the timeout occured during the Compare And Set write/update. + - "CAS": the timeout occurred during the Compare And Set write/update. 0x1200 Read_timeout: Timeout exception during a read request. The rest of the ERROR message body will be diff --git a/doc/native_protocol_v4.spec b/doc/native_protocol_v4.spec index 6def73721d98..cd55137a8f57 100644 --- a/doc/native_protocol_v4.spec +++ b/doc/native_protocol_v4.spec @@ -245,7 +245,7 @@ Table of Contents representing the port. [consistency] A consistency level specification. This is a [short] representing a consistency level with the following - correspondance: + correspondence: 0x0000 ANY 0x0001 ONE 0x0002 TWO @@ -366,8 +366,8 @@ Table of Contents started (See Section 8 for more details). 0x10: With serial consistency. If set, should be present. is the [consistency] level for the - serial phase of conditional updates. That consitency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + SERIAL or LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. 0x20: With default timestamp. If set, should be present. @@ -432,8 +432,8 @@ Table of Contents flags are, given their mask: 0x10: With serial consistency. If set, should be present. is the [consistency] level for the - serial phase of conditional updates. That consistency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + either SERIAL or LOCAL_SERIAL, and if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. 0x20: With default timestamp. If set, should be present. @@ -467,8 +467,8 @@ Table of Contents - is the [consistency] level for the operation. - is only present if the 0x10 flag is set. In that case, is the [consistency] level for the serial phase of - conditional updates. That consitency can only be either SERIAL or - LOCAL_SERIAL and if not present will defaults to SERIAL. This option will + conditional updates. Consistency can be SERIAL or + LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. The server will respond with a RESULT message. @@ -493,7 +493,7 @@ Table of Contents This section describes the content of the frame body for the different responses. Please note that to make room for future evolution, clients should - support extra informations (that they should simply discard) to the one + support extra information (that they should simply discard) to the one described in this document at the end of the frame body. 4.2.1. ERROR @@ -521,7 +521,7 @@ Table of Contents The authentication is SASL based and thus consists of a number of server challenges (AUTH_CHALLENGE, Section 4.2.7) followed by client responses - (AUTH_RESPONSE, Section 4.1.2). The initial exchange is however boostrapped + (AUTH_RESPONSE, Section 4.1.2). The initial exchange is however bootstrapped by an initial client response. The details of that exchange (including how many challenge-response pairs are required) are specific to the authenticator in use. The exchange ends when the server sends an AUTH_SUCCESS message or @@ -600,8 +600,8 @@ Table of Contents (unique) keyspace name and table name the columns belong to. - specifies the columns returned in the query. There are such column specifications that are composed of: - ()? - The initial and are two [string] and are only present + ()? + The initial and are two [string] and are only present if the Global_tables_spec flag is not set. The is a [string] and is an [option] that corresponds to the description (what this description is depends a bit on the context: in results to @@ -713,8 +713,8 @@ Table of Contents - specifies the bind markers in the prepared statement. There are such column specifications, each with the following format: - ()? - The initial and are two [string] that are only + ()? + The initial and are two [string] that are only present if the Global_tables_spec flag is not set. The field is a [string] that holds the name of the bind marker (if named), or the name of the column, field, or expression that the bind marker @@ -737,7 +737,7 @@ Table of Contents Note that the prepared query ID returned is global to the node on which the query has been prepared. It can be used on any connection to that node - until the node is restarted (after which the query must be reprepared). + until the node is restarted (after which the query must be re-prepared). 4.2.5.5. Schema_change @@ -754,7 +754,7 @@ Table of Contents 4.2.6. EVENT An event pushed by the server. A client will only receive events for the - types it has REGISTERed to. The body of an EVENT message will start with a + types it has REGISTER-ed to. The body of an EVENT message will start with a [string] representing the event type. The rest of the message depends on the event type. The valid event types are: - "TOPOLOGY_CHANGE": events related to change in the cluster topology. @@ -842,7 +842,7 @@ Table of Contents bytes). - snappy (https://code.google.com/p/snappy/). This compression might not be available as it depends on a native lib (server-side) that might not be - avaivable on some installations. + available on some installations. 6. Data Type Serialization Formats @@ -1099,11 +1099,11 @@ Table of Contents - "BATCH_LOG": the timeout occurred during the write to the batch log when a (logged) batch write was requested. - - "CAS": the timeout occured during the Compare And Set write/update. - - "VIEW": the timeout occured when a write involves - VIEW update and failure to acqiure local view(MV) + - "CAS": the timeout occurred during the Compare And Set write/update. + - "VIEW": the timeout occurred when a write involves + VIEW update and failure to acquire local view(MV) lock for key within timeout - - "CDC": the timeout occured when cdc_total_space is + - "CDC": the timeout occurred when cdc_total_space is exceeded when doing a write to data tracked by cdc. 0x1200 Read_timeout: Timeout exception during a read request. The rest of the ERROR message body will be @@ -1124,7 +1124,7 @@ Table of Contents responded. Otherwise, the value is != 0. 0x1300 Read_failure: A non-timeout exception during a read request. The rest of the ERROR message body will be - + where: is the [consistency] level of the query having triggered the exception. @@ -1132,7 +1132,7 @@ Table of Contents answered the request. is an [int] representing the number of replicas whose acknowledgement is required to achieve . - is an [int] representing the number of nodes that + is an [int] representing the number of nodes that experience a failure while executing the request. is a single byte. If its value is 0, it means the replica that was asked for data had not @@ -1146,7 +1146,7 @@ Table of Contents [string list] one string for each argument type (as CQL type) of the failed function 0x1500 Write_failure: A non-timeout exception during a write request. The rest of the ERROR message body will be - + where: is the [consistency] level of the query having triggered the exception. @@ -1154,7 +1154,7 @@ Table of Contents answered the request. is an [int] representing the number of replicas whose acknowledgement is required to achieve . - is an [int] representing the number of nodes that + is an [int] representing the number of nodes that experience a failure while executing the request. is a [string] that describes the type of the write that failed. The value of that string can be one @@ -1169,14 +1169,14 @@ Table of Contents batch. No batch log write has been attempted. - "COUNTER": the write was a counter write (batched or not). - - "BATCH_LOG": the failure occured during the + - "BATCH_LOG": the failure occurred during the write to the batch log when a (logged) batch write was requested. - - "CAS": the failure occured during the Compare And Set write/update. - - "VIEW": the failure occured when a write involves - VIEW update and failure to acqiure local view(MV) + - "CAS": the failure occurred during the Compare And Set write/update. + - "VIEW": the failure occurred when a write involves + VIEW update and failure to acquire local view(MV) lock for key within timeout - - "CDC": the failure occured when cdc_total_space is + - "CDC": the failure occurred when cdc_total_space is exceeded when doing a write to data tracked by cdc. 0x2000 Syntax_error: The submitted query has a syntax error. diff --git a/doc/native_protocol_v5.spec b/doc/native_protocol_v5.spec index e080801978c5..88d6a948a709 100644 --- a/doc/native_protocol_v5.spec +++ b/doc/native_protocol_v5.spec @@ -404,7 +404,7 @@ Table of Contents The purpose is to send small negative values as small unsigned values, so that we save bytes on the wire. To encode a value n use "(n >> 31) ^ (n << 1)" for 32 bit values, and "(n >> 63) ^ (n << 1)" for 64 bit values where "^" is the xor operation, "<<" is the left shift operation and ">>" is - the arithemtic right shift operation (highest-order bit is replicated). + the arithmetic right shift operation (highest-order bit is replicated). Decode with "(n >> 1) ^ -(n & 1)". [option] A pair of where is a [short] representing @@ -422,7 +422,7 @@ Table of Contents [byte] representing the IP address. [consistency] A consistency level specification. This is a [short] representing a consistency level with the following - correspondance: + correspondence: 0x0000 ANY 0x0001 ONE 0x0002 TWO @@ -478,7 +478,7 @@ Table of Contents This is optional; if not specified no compression will be used. - "DRIVER_NAME": allows clients to supply a free-form label representing the driver implementation. This is displayed in the output of `nodetool clientstats` - - "DRIVER_VERSION": allows clients to supply a free-form label represting the driver + - "DRIVER_VERSION": allows clients to supply a free-form label representing the driver version. This is displayed in the output of `nodetool clientstats` - "THROW_ON_OVERLOAD": flag to specify server behaviour where the incoming message rate is too high. An [string] value of "1" instructs the server to respond with @@ -548,8 +548,8 @@ Table of Contents started (See Section 7 for more details). 0x0010: With serial consistency. If set, should be present. is the [consistency] level for the - serial phase of conditional updates. That consitency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + either SERIAL or LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. 0x0020: With default timestamp. If set, must be present. @@ -567,7 +567,7 @@ Table of Contents and using this flag, while supported, is almost surely inefficient. 0x0080: With keyspace. If set, must be present. is a [string] indicating the keyspace that the query should be executed in. - It supercedes the keyspace that the connection is bound to, if any. + It supersedes the keyspace that the connection is bound to, if any. 0x0100: With now in seconds. If set, must be present. is an [int] representing the current time (now) for the query. Affects TTL cell liveness in read queries and local deletion @@ -593,7 +593,7 @@ Table of Contents flags are, given their mask: 0x01: With keyspace. If set, must be present. is a [string] indicating the keyspace that the query should be executed in. - It supercedes the keyspace that the connection is bound to, if any. + It supersedes the keyspace that the connection is bound to, if any. The server will respond with a RESULT message with a `prepared` kind (0x0004, see Section 4.2.5). @@ -606,10 +606,10 @@ Table of Contents where - is the prepared query ID. It's the [short bytes] returned as a response to a PREPARE message. - - is the ID of the resultset metadata that was sent + - is the ID of the result set metadata that was sent along with response to PREPARE message. If a RESULT/Rows message reports - changed resultset metadata with the Metadata_changed flag, the reported new - resultset metadata must be used in subsequent executions. + changed result set metadata with the Metadata_changed flag, the reported new + result set metadata must be used in subsequent executions. - has the exact same definition as in QUERY (see Section 4.1.4). @@ -634,8 +634,8 @@ Table of Contents flags are, given their mask: 0x0010: With serial consistency. If set, should be present. is the [consistency] level for the - serial phase of conditional updates. That consistency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to + serial phase of conditional updates. Consistency can be + either SERIAL or LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. 0x0020: With default timestamp. If set, should be present. @@ -652,7 +652,7 @@ Table of Contents more details]. 0x0080: With keyspace. If set, must be present. is a [string] indicating the keyspace that the query should be executed in. - It supercedes the keyspace that the connection is bound to, if any. + It supersedes the keyspace that the connection is bound to, if any. 0x0100: With now in seconds. If set, must be present. is an [int] representing the current time (now) for the query. Affects TTL cell liveness in read queries and local deletion @@ -677,8 +677,8 @@ Table of Contents - is the [consistency] level for the operation. - is only present if the 0x10 flag is set. In that case, is the [consistency] level for the serial phase of - conditional updates. That consitency can only be either SERIAL or - LOCAL_SERIAL and if not present will defaults to SERIAL. This option will + conditional updates. Consistency can be either SERIAL or + LOCAL_SERIAL, if not present, it defaults to SERIAL. This option will be ignored for anything else other than a conditional update/insert. The server will respond with a RESULT message. @@ -703,7 +703,7 @@ Table of Contents This section describes the content of the frame body for the different responses. Please note that to make room for future evolution, clients should - support extra informations (that they should simply discard) to the one + support extra information (that they should simply discard) to the one described in this document at the end of the frame body. 4.2.1. ERROR @@ -731,7 +731,7 @@ Table of Contents The authentication is SASL based and thus consists of a number of server challenges (AUTH_CHALLENGE, Section 4.2.7) followed by client responses - (AUTH_RESPONSE, Section 4.1.2). The initial exchange is however boostrapped + (AUTH_RESPONSE, Section 4.1.2). The initial exchange is however bootstrapped by an initial client response. The details of that exchange (including how many challenge-response pairs are required) are specific to the authenticator in use. The exchange ends when the server sends an AUTH_SUCCESS message or @@ -809,12 +809,12 @@ Table of Contents during the query (see QUERY and RESULT messages). 0x0008 Metadata_changed: if set, the No_metadata flag has to be unset and has to be supplied. This flag is to be - used to avoid a roundtrip in case of metadata changes for queries + used to avoid a round trip in case of metadata changes for queries that requested metadata to be skipped. - is an [int] representing the number of columns selected by the query that produced this result. It defines the number of elements in and the number of elements for each row in . - - is [short bytes] representing the new, changed resultset + - is [short bytes] representing the new, changed result set metadata. The new metadata ID must also be used in subsequent executions of the corresponding prepared statement, if any. - is present if the Global_tables_spec is set in @@ -822,8 +822,8 @@ Table of Contents (unique) keyspace name and table name the columns belong to. - specifies the columns returned in the query. There are such column specifications that are composed of: - ()? - The initial and are two [string] and are only present + ()? + The initial and are two [string] and are only present if the Global_tables_spec flag is not set. The is a [string] and is an [option] that corresponds to the description (what this description is depends a bit on the context: in results to @@ -901,7 +901,7 @@ Table of Contents where: - is [short bytes] representing the prepared query ID. - - is [short bytes] representing the resultset metadata ID. + - is [short bytes] representing the result set metadata ID. - is composed of: [...][?...] where: @@ -937,8 +937,8 @@ Table of Contents - specifies the bind markers in the prepared statement. There are such column specifications, each with the following format: - ()? - The initial and are two [string] that are only + ()? + The initial and are two [string] that are only present if the Global_tables_spec flag is not set. The field is a [string] that holds the name of the bind marker (if named), or the name of the column, field, or expression that the bind marker @@ -961,7 +961,7 @@ Table of Contents Note that the prepared query ID returned is global to the node on which the query has been prepared. It can be used on any connection to that node - until the node is restarted (after which the query must be reprepared). + until the node is restarted (after which the query must be re-prepared). 4.2.5.5. Schema_change @@ -978,7 +978,7 @@ Table of Contents 4.2.6. EVENT An event pushed by the server. A client will only receive events for the - types it has REGISTERed to. The body of an EVENT message will start with a + types it has REGISTER-ed to. The body of an EVENT message will start with a [string] representing the event type. The rest of the message depends on the event type. The valid event types are: - "TOPOLOGY_CHANGE": events related to change in the cluster topology. @@ -1209,7 +1209,7 @@ Table of Contents 5.25 vector For a vector of n dimensions of a fixed-length type, a sequence of those n elements. - For a vector with variable-length elements, the size of the elements will preced + For a vector with variable-length elements, the size of the elements will precede each element. Each element is the [bytes] representing the serialized value. The number of dimensions is not encoded, since it's part of the type definition. @@ -1318,13 +1318,13 @@ Table of Contents - "BATCH_LOG": the timeout occurred during the write to the batch log when a (logged) batch write was requested. - - "CAS": the timeout occured during the Compare And Set write/update. - - "VIEW": the timeout occured when a write involves - VIEW update and failure to acqiure local view(MV) + - "CAS": the timeout occurred during the Compare And Set write/update. + - "VIEW": the timeout occurred when a write involves + VIEW update and failure to acquire local view(MV) lock for key within timeout - - "CDC": the timeout occured when cdc_total_space is + - "CDC": the timeout occurred when cdc_total_space is exceeded when doing a write to data tracked by cdc. - is a [short] that describes the number of contentions occured during the CAS operation. + is a [short] that describes the number of contentions occurred during the CAS operation. The field only presents when the is "CAS". 0x1200 Read_timeout: Timeout exception during a read request. The rest of the ERROR message body will be @@ -1345,7 +1345,7 @@ Table of Contents responded. Otherwise, the value is != 0. 0x1300 Read_failure: A non-timeout exception during a read request. The rest of the ERROR message body will be - + where: is the [consistency] level of the query having triggered the exception. @@ -1353,12 +1353,12 @@ Table of Contents answered the request. is an [int] representing the number of replicas whose acknowledgement is required to achieve . - is a map of endpoint to failure reason codes. This maps + is a map of endpoint to failure reason codes. This maps the endpoints of the replica nodes that failed when executing the request to a code representing the reason for the failure. The map is encoded starting with an [int] n - followed by n pairs of where - is an [inetaddr] and is a [short]. + followed by n pairs of where + is an [inetaddr] and is a [short]. is a single byte. If its value is 0, it means the replica that was asked for data had not responded. Otherwise, the value is != 0. @@ -1371,7 +1371,7 @@ Table of Contents [string list] one string for each argument type (as CQL type) of the failed function 0x1500 Write_failure: A non-timeout exception during a write request. The rest of the ERROR message body will be - + where: is the [consistency] level of the query having triggered the exception. @@ -1379,12 +1379,12 @@ Table of Contents answered the request. is an [int] representing the number of replicas whose acknowledgement is required to achieve . - is a map of endpoint to failure reason codes. This maps + is a map of endpoint to failure reason codes. This maps the endpoints of the replica nodes that failed when executing the request to a code representing the reason for the failure. The map is encoded starting with an [int] n - followed by n pairs of where - is an [inetaddr] and is a [short]. + followed by n pairs of where + is an [inetaddr] and is a [short]. is a [string] that describes the type of the write that failed. The value of that string can be one of: @@ -1398,17 +1398,17 @@ Table of Contents batch. No batch log write has been attempted. - "COUNTER": the write was a counter write (batched or not). - - "BATCH_LOG": the failure occured during the + - "BATCH_LOG": the failure occurred during the write to the batch log when a (logged) batch write was requested. - - "CAS": the failure occured during the Compare And Set write/update. - - "VIEW": the failure occured when a write involves - VIEW update and failure to acqiure local view(MV) + - "CAS": the failure occurred during the Compare And Set write/update. + - "VIEW": the failure occurred when a write involves + VIEW update and failure to acquire local view(MV) lock for key within timeout - - "CDC": the failure occured when cdc_total_space is + - "CDC": the failure occurred when cdc_total_space is exceeded when doing a write to data tracked by cdc. 0x1600 CDC_WRITE_FAILURE: // todo - 0x1700 CAS_WRITE_UNKNOWN: An exception occured due to contended Compare And Set write/update. + 0x1700 CAS_WRITE_UNKNOWN: An exception occurred due to contended Compare And Set write/update. The CAS operation was only partially completed and the operation may or may not get completed by the contending CAS write or SERIAL/LOCAL_SERIAL read. The rest of the ERROR message body will be @@ -1444,8 +1444,8 @@ Table of Contents * Added result set metadata id to Prepared responses (Section 4.2.5.4) * Beta protocol flag for v5 native protocol is added (Section 2.2) - * in Read_failure and Write_failure error message bodies (Section 9) - has been replaced with . The maps node IP addresses to + * in Read_failure and Write_failure error message bodies (Section 9) + has been replaced with . The maps node IP addresses to a failure reason code which indicates why the request failed on that node. * Enlarged flag's bitmaps for QUERY, EXECUTE and BATCH messages from [byte] to [int] (Sections 4.1.4, 4.1.6 and 4.1.7). From cf60eb8672bf162a47b55ed31ff785440fa53840 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Thu, 3 Apr 2025 11:31:28 -0700 Subject: [PATCH 009/340] Fix mixed mode paxos hang - reinstates the mrc check from CASSANDRA-12043 for legacy paxos purging Patch by Blake Eggleston; Reviewed by Ariel Weisberg for CASSANDRA-20514 --- CHANGES.txt | 1 + .../cassandra/service/StorageProxy.java | 2 +- .../service/paxos/v1/PrepareCallback.java | 29 +++++- .../upgrade/MixedModePaxosTTLTest.java | 94 +++++++++++++++++++ .../upgrade/MixedModePaxosTestBase.java | 55 +++++++++-- 5 files changed, 170 insertions(+), 11 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTTLTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 0dfb6d1c0b0a..d1aa662fe986 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.1.9 + * Fix mixed mode paxos ttl commit hang (CASSANDRA-20514) * Fix paxos mixed mode infinite loop (CASSANDRA-20493) * Optionally skip exception logging on invalid legacy protocol magic exception (CASSANDRA-19483) * Fix SimpleClient ability to release acquired capacity (CASSANDRA-20202) diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index d4a61b996dfa..41703c439c67 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -652,7 +652,7 @@ private static PaxosBallotAndContention beginAndRepairPaxos(Dispatcher.RequestTi // https://issues.apache.org/jira/browse/CASSANDRA-5062?focusedCommentId=13619810&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13619810) // Since we waited for quorum nodes, if some of them haven't seen the last commit (which may just be a timing issue, but may also // mean we lost messages), we pro-actively "repair" those nodes, and retry. - Iterable missingMRC = summary.replicasMissingMostRecentCommit(); + Iterable missingMRC = summary.replicasMissingMostRecentCommit(metadata); if (Iterables.size(missingMRC) > 0) { Tracing.trace("Repairing replicas that missed the most recent commit"); diff --git a/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java b/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java index 4aedb6d63dee..315c7abb9fd6 100644 --- a/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java +++ b/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java @@ -19,11 +19,16 @@ package org.apache.cassandra.service.paxos.v1; +import java.util.Collections; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; import com.google.common.collect.Iterables; +import org.apache.cassandra.config.Config; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.db.ConsistencyLevel; @@ -35,6 +40,8 @@ import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.PrepareResponse; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.UUIDGen; public class PrepareCallback extends AbstractPaxosCallback { @@ -79,8 +86,28 @@ public synchronized void onResponse(Message message) latch.decrement(); } - public Iterable replicasMissingMostRecentCommit() + public Iterable replicasMissingMostRecentCommit(TableMetadata metadata) { + /** + * this check is only needed for mixed mode operation with 4.0 and can be removed once upgrade support dropped + * see the comment in {@link org.apache.cassandra.distributed.upgrade.MixedModePaxosTTLTest} for a full explanation. + */ + if (DatabaseDescriptor.paxosStatePurging() == Config.PaxosStatePurging.legacy) + { + // In general, we need every replicas that have answered to the prepare (a quorum) to agree on the MRC (see + // coment in StorageProxy.beginAndRepairPaxos(), but basically we need to make sure at least a quorum of nodes + // have learn a commit before commit a new one otherwise that previous commit is not guaranteed to have reach a + // quorum and further commit may proceed on incomplete information). + // However, if that commit is too hold, it may have been expired from some of the replicas paxos table (we don't + // keep the paxos state forever or that could grow unchecked), and we could end up in some infinite loop as + // explained on CASSANDRA-12043. To avoid that, we ignore an MRC that is too old, i.e. older than the TTL we set + // on paxos tables. For such an old commit, we rely on hints and repair to ensure the commit has indeed been + // propagated to all nodes. + long paxosTtlSec = SystemKeyspace.legacyPaxosTtlSec(metadata); + if (TimeUnit.MICROSECONDS.toSeconds(mostRecentCommit.ballot.unixMicros()) + paxosTtlSec < FBUtilities.nowInSeconds()) + return Collections.emptySet(); + } + return Iterables.filter(commitsByReplica.keySet(), inetAddress -> (!commitsByReplica.get(inetAddress).ballot.equals(mostRecentCommit.ballot))); } } diff --git a/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTTLTest.java b/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTTLTest.java new file mode 100644 index 000000000000..f7e2d9272c2e --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTTLTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.upgrade; + +import java.util.concurrent.TimeUnit; + +import org.junit.Test; + +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.upgrade.MixedModePaxosTestBase.FakePaxosHelper; + +import static java.lang.String.format; + +public class MixedModePaxosTTLTest extends UpgradeTestBase +{ + /** + * Tests the mixed mode paxos loop bug in CASSANDRA-20514 + * + * CEP-14 changed the ttl behavior of legacy paxos state to expire based off the ballot time of the operation being + * persisted, not the time a commit is persisted. This eliminated the race addressed by CASSANDRA-12043, and so the + * check it added to the most recent commit prepare logic was removed. + * + * When operating in mixed mode though, this can still be a problem. If a 4.1 or higher node is coordinating a paxos + * operation with 2 or more replicas on 4.0 or lower, this race becomes a problem again. You need 3 things to make + * this an infinite loop + * 1. a 4.1 node coordinating a paxos operation with 2x 4.0 replicas + * 2. replica A) a 4.0 node returns a most recent commit for a ballot that's could have been ttld + * 3. replica B) a 4.0 node has ttl'd that mrc AND converted the ttld cells into tombstones + * + * The 4.1 coordinator receives the mrc from replica A, but since it no longer disregards missing most recent commits + * past the ttl window, it sends the "missing" commit to replica B. Since replica B now has a tombstone for that mrc, + * and tombstones win when reconciled with live cells, even ones with ttls, the commit is a noop and it continues + * to report nothing for its mrc value when the coordinator restarts the prepare phase. This loops until the query + * times out. + */ + @Test + public void legacyExpiredStateTest() throws Throwable + { + String keyspace = "ks"; + String table = "tbl"; + int gcGrace = 60*60*24; // 1 day + int key = 100; // hashes to nodes 2 & 3 w/ murmur @ RF=2 + new TestCase() + .withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK).set("cas_contention_timeout", "500ms")) + .nodes(3) + .nodesToUpgrade(1) + .singleUpgrade(v40) + .setup(cluster -> { + cluster.schemaChange(format("CREATE KEYSPACE %s WITH REPLICATION={'class': 'SimpleStrategy', 'replication_factor': '2'}", keyspace)); + cluster.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) " + + "WITH gc_grace_seconds=%s", keyspace, table, gcGrace)); + }) + .runAfterClusterUpgrade(cluster -> { + // disable compaction to prevent paxos state from being purged + cluster.forEach(instance -> instance.nodetool("disableautocompaction")); + + long ballotMicros = TimeUnit.MILLISECONDS.toMicros(System.currentTimeMillis()); + ballotMicros -= TimeUnit.SECONDS.toMicros(gcGrace + 10); + FakePaxosHelper helper = FakePaxosHelper.create(cluster.coordinator(1), keyspace, table, key, gcGrace, ballotMicros); + + // confirm none of the nodes have paxos state + for (int i = 1; i <= cluster.size(); i++) + helper.assertNoPaxosData(cluster.coordinator(i)); + + // save a tombstoned commit to one node to simulate expired cells being converted to tombstones + helper.tombstoneCommit(cluster.coordinator(2)); + + // insert paxos state and confirm it hasn't ttl'd yet + helper.saveCommit(cluster.coordinator(3)); + helper.assertPaxosData(cluster.coordinator(3)); + + // paxos operation should not timeout + cluster.coordinator(1).execute(format("SELECT * FROM %s.%s WHERE k=%s", keyspace, table, key), ConsistencyLevel.SERIAL); + }) + .run(); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTestBase.java b/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTestBase.java index 12e7b9656112..e9e16f738a03 100644 --- a/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/upgrade/MixedModePaxosTestBase.java @@ -76,6 +76,7 @@ private void ttldPaxosStateTest(boolean legacyAware, boolean upgradeAware) throw String keyspace = KEYSPACE; String table = "tbl"; int gcGrace = 10; + int key = 1; new TestCase() .withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)) .nodes(2) @@ -91,11 +92,11 @@ private void ttldPaxosStateTest(boolean legacyAware, boolean upgradeAware) throw // insert a ttl'd committed paxos state long ballotMicros = TimeUnit.NANOSECONDS.toMicros(System.currentTimeMillis()); - FakePaxosHelper helper = FakePaxosHelper.create(cluster.coordinator(1), keyspace, table, gcGrace, ballotMicros); + FakePaxosHelper helper = FakePaxosHelper.create(cluster.coordinator(1), keyspace, table, key, gcGrace, ballotMicros); // confirm none of the nodes have paxos state for (int i = 1; i <= cluster.size(); i++) - Assert.assertEquals(0, cluster.coordinator(i).execute("SELECT * FROM system.paxos", ConsistencyLevel.ONE).length); + helper.assertNoPaxosData(cluster.coordinator(i)); // save commit to both nodes @@ -109,11 +110,11 @@ private void ttldPaxosStateTest(boolean legacyAware, boolean upgradeAware) throw Thread.sleep(TimeUnit.SECONDS.toMillis(gcGrace * 2)); // confirm paxos state has ttld - Assert.assertEquals(0, cluster.coordinator(1).execute("SELECT * FROM system.paxos", ConsistencyLevel.ONE).length); - Assert.assertEquals(0, cluster.coordinator(2).execute("SELECT * FROM system.paxos", ConsistencyLevel.ONE).length); + helper.assertNoPaxosData(cluster.coordinator(1)); + helper.assertNoPaxosData(cluster.coordinator(2)); // paxos operation should not timeout - cluster.coordinator(upgradedCoordinator() ? 1 : 2).execute(format("SELECT * FROM %s.%s WHERE k=1", keyspace, table), ConsistencyLevel.SERIAL); + cluster.coordinator(upgradedCoordinator() ? 1 : 2).execute(format("SELECT * FROM %s.%s WHERE k=%s", keyspace, table, key), ConsistencyLevel.SERIAL); }) .run(); } @@ -133,14 +134,14 @@ public void legacyAwareTTldPaxosStateTest() throws Throwable @Test public void bothAwareTTldPaxosStateTest() throws Throwable { - ttldPaxosStateTest(true, false); + ttldPaxosStateTest(true, true); } /** * This is an upgrade test, and paxos internally limits ttls to 3 hours, so we have to manually save commits in * the paxos table to get entries ttl'd in a reasonable amount of time */ - private static class FakePaxosHelper + static class FakePaxosHelper { static final int current_version = MessagingService.current_version; static final int version_40a = MessagingService.VERSION_40; @@ -181,6 +182,21 @@ ByteBuffer updateBytes(int version) return PartitionUpdate.toBytes(update, version); } + private Object[][] paxosData(ICoordinator coordinator) + { + return coordinator.execute("SELECT * FROM system.paxos WHERE row_key = ? AND cf_id = ?", ConsistencyLevel.ONE, key, cfId); + } + + void assertNoPaxosData(ICoordinator coordinator) + { + Assert.assertEquals(0, paxosData(coordinator).length); + } + + void assertPaxosData(ICoordinator coordinator) + { + Assert.assertEquals(1, paxosData(coordinator).length); + } + void saveCommit(ICoordinator coordinator) { String cql = "UPDATE system.paxos USING TIMESTAMP ? AND TTL ? SET proposal_ballot = null, proposal = null, most_recent_commit_at = ?, most_recent_commit = ?, most_recent_commit_version = ? WHERE row_key = ? AND cf_id = ?"; @@ -194,10 +210,31 @@ void saveCommit(ICoordinator coordinator) cfId); } - public static FakePaxosHelper create(ICoordinator coordinator, String keyspace, String table, int ttl, long ballotMicros) + void tombstoneCommit(ICoordinator coordinator) + { + String cql = "DELETE proposal_ballot, proposal, most_recent_commit_at, most_recent_commit, most_recent_commit_version FROM system.paxos USING TIMESTAMP ? WHERE row_key = ? AND cf_id = ?"; + coordinator.execute(cql, ConsistencyLevel.ONE, + ballotMicros, + key, + cfId); + } + + void saveCommitNoTTL(ICoordinator coordinator) + { + String cql = "UPDATE system.paxos USING TIMESTAMP ? SET proposal_ballot = null, proposal = null, most_recent_commit_at = ?, most_recent_commit = ?, most_recent_commit_version = ? WHERE row_key = ? AND cf_id = ?"; + coordinator.execute(cql, ConsistencyLevel.ONE, + ballotMicros, + ballot, + updateBytes(version_40a), + version_40a, + key, + cfId); + } + + public static FakePaxosHelper create(ICoordinator coordinator, String keyspace, String table, int key, int ttl, long ballotMicros) { UUID cfId = (UUID) coordinator.execute("SELECT id FROM system_schema.tables WHERE keyspace_name=? AND table_name=?", ConsistencyLevel.ONE, keyspace, table)[0][0]; - return new FakePaxosHelper(keyspace, table, cfId, 1, ttl, ballotMicros); + return new FakePaxosHelper(keyspace, table, cfId, key, ttl, ballotMicros); } } } From 50978a0d0738327290d06288c78967a61643506b Mon Sep 17 00:00:00 2001 From: Ekaterina Dimitrova Date: Thu, 5 Dec 2024 16:28:08 -0500 Subject: [PATCH 010/340] CASSANDRA-20402: Add new reason RequestFailureReason.INDEX_BUILD_IN_PROGRESS and IndexBuildInProgress exception when queries fail during index build patch by Ekaterina Dimitrova; reviewed by Caleb Rackliffe for CASSANDRA-20402 --- CHANGES.txt | 1 + .../exceptions/RequestFailureReason.java | 58 +++++++------- .../index/IndexBuildInProgressException.java | 36 +++++++++ .../cassandra/index/IndexStatusManager.java | 12 ++- .../index/SecondaryIndexManager.java | 15 +++- .../org/apache/cassandra/net/InboundSink.java | 14 +++- .../test/sai/IndexAvailabilityTest.java | 80 ++++++++++++++++++- .../entities/SecondaryIndexTest.java | 13 +-- .../exceptions/RequestFailureReasonTest.java | 43 +++++++++- .../index/IndexStatusManagerTest.java | 2 +- .../index/sai/cql/AllowFilteringTest.java | 32 ++++++++ 11 files changed, 267 insertions(+), 39 deletions(-) create mode 100644 src/java/org/apache/cassandra/index/IndexBuildInProgressException.java diff --git a/CHANGES.txt b/CHANGES.txt index 029a87ae2965..c03ffcafa4a5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Throw new IndexBuildInProgressException when queries fail during index build, instead of IndexNotAvailableException (CASSANDRA-20402) * Fix Paxos repair interrupts running transactions (CASSANDRA-20469) * Various fixes in constraint framework (CASSANDRA-20481) * Add support in CAS for -= on numeric types, and fixed improper handling of empty bytes which lead to NPE (CASSANDRA-20477) diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java index 1bc86ff061ab..9faff584f140 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java @@ -18,15 +18,18 @@ package org.apache.cassandra.exceptions; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.cassandra.db.filter.TombstoneOverwhelmingException; +import org.apache.cassandra.index.IndexBuildInProgressException; +import org.apache.cassandra.index.IndexNotAvailableException; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.tcm.NotCMSException; import org.apache.cassandra.utils.vint.VIntCoding; -import static java.lang.Math.max; import static org.apache.cassandra.net.MessagingService.VERSION_40; public enum RequestFailureReason @@ -36,14 +39,16 @@ public enum RequestFailureReason TIMEOUT (2), INCOMPATIBLE_SCHEMA (3), READ_SIZE (4), + // below reason is only logged, but it does not have associated exception NODE_DOWN (5), INDEX_NOT_AVAILABLE (6), + // below reason does not have an associated exception READ_TOO_MANY_INDEXES (7), NOT_CMS (8), INVALID_ROUTING (9), COORDINATOR_BEHIND (10), - ; - + // The following codes have been ported from an external fork, where they were offset explicitly to avoid conflicts. + INDEX_BUILD_IN_PROGRESS (503); public static final Serializer serializer = new Serializer(); public final int code; @@ -53,26 +58,32 @@ public enum RequestFailureReason this.code = code; } - private static final RequestFailureReason[] codeToReasonMap; + private static final Map codeToReasonMap = new HashMap<>(); + private static final Map, RequestFailureReason> exceptionToReasonMap = new HashMap<>(); + private static final int REASONS_WITHOUT_EXCEPTIONS = 3; // UNKNOWN, NODE_DOWN, and READ_TOO_MANY_INDEXES static { RequestFailureReason[] reasons = values(); - int max = -1; - for (RequestFailureReason r : reasons) - max = max(r.code, max); - - RequestFailureReason[] codeMap = new RequestFailureReason[max + 1]; - for (RequestFailureReason reason : reasons) { - if (codeMap[reason.code] != null) + if (codeToReasonMap.put(reason.code, reason) != null) throw new RuntimeException("Two RequestFailureReason-s that map to the same code: " + reason.code); - codeMap[reason.code] = reason; } - codeToReasonMap = codeMap; + exceptionToReasonMap.put(TombstoneOverwhelmingException.class, READ_TOO_MANY_TOMBSTONES); + exceptionToReasonMap.put(WriteTimeoutException.class, TIMEOUT); + exceptionToReasonMap.put(IncompatibleSchemaException.class, INCOMPATIBLE_SCHEMA); + exceptionToReasonMap.put(ReadSizeAbortException.class, READ_SIZE); + exceptionToReasonMap.put(IndexNotAvailableException.class, INDEX_NOT_AVAILABLE); + exceptionToReasonMap.put(NotCMSException.class, NOT_CMS); + exceptionToReasonMap.put(InvalidRoutingException.class, INVALID_ROUTING); + exceptionToReasonMap.put(CoordinatorBehindException.class, COORDINATOR_BEHIND); + exceptionToReasonMap.put(IndexBuildInProgressException.class, INDEX_BUILD_IN_PROGRESS); + + if (exceptionToReasonMap.size() != reasons.length - REASONS_WITHOUT_EXCEPTIONS) + throw new RuntimeException("A new RequestFailureReasons was probably added and you may need to update the exceptionToReasonMap"); } public static RequestFailureReason fromCode(int code) @@ -81,25 +92,18 @@ public static RequestFailureReason fromCode(int code) throw new IllegalArgumentException("RequestFailureReason code must be non-negative (got " + code + ')'); // be forgiving and return UNKNOWN if we aren't aware of the code - for forward compatibility - return code < codeToReasonMap.length ? codeToReasonMap[code] : UNKNOWN; + return codeToReasonMap.getOrDefault(code, UNKNOWN); } public static RequestFailureReason forException(Throwable t) { - if (t instanceof TombstoneOverwhelmingException) - return READ_TOO_MANY_TOMBSTONES; - - if (t instanceof IncompatibleSchemaException) - return INCOMPATIBLE_SCHEMA; - - if (t instanceof NotCMSException) - return NOT_CMS; - - if (t instanceof InvalidRoutingException) - return INVALID_ROUTING; + RequestFailureReason r = exceptionToReasonMap.get(t.getClass()); + if (r != null) + return r; - if (t instanceof CoordinatorBehindException) - return COORDINATOR_BEHIND; + for (Map.Entry, RequestFailureReason> entry : exceptionToReasonMap.entrySet()) + if (entry.getKey().isInstance(t)) + return entry.getValue(); return UNKNOWN; } diff --git a/src/java/org/apache/cassandra/index/IndexBuildInProgressException.java b/src/java/org/apache/cassandra/index/IndexBuildInProgressException.java new file mode 100644 index 000000000000..1807ff36e0b4 --- /dev/null +++ b/src/java/org/apache/cassandra/index/IndexBuildInProgressException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index; + +/** + * Thrown if a secondary index is not currently available because it is building. + */ +public final class IndexBuildInProgressException extends RuntimeException +{ + public static final String INDEX_BUILD_IN_PROGRESS_ERROR = "The secondary index '%s' is not yet available as it is building"; + + /** + * Creates a new IndexIsBuildingException for the specified index. + * @param index the index + */ + public IndexBuildInProgressException(Index index) + { + super(String.format(INDEX_BUILD_IN_PROGRESS_ERROR, index.getIndexMetadata().name)); + } +} diff --git a/src/java/org/apache/cassandra/index/IndexStatusManager.java b/src/java/org/apache/cassandra/index/IndexStatusManager.java index cc98def63e9a..b11ecd1094bb 100644 --- a/src/java/org/apache/cassandra/index/IndexStatusManager.java +++ b/src/java/org/apache/cassandra/index/IndexStatusManager.java @@ -89,6 +89,7 @@ public > E filterForQuery(E liveEndpoints, Keyspace keysp { // UNKNOWN states are transient/rare; only a few replicas should have this state at any time. See CASSANDRA-19400 Set queryableNonSucceeded = new HashSet<>(4); + Map indexStatusMap = new HashMap<>(); E queryableEndpoints = liveEndpoints.filter(replica -> { @@ -97,7 +98,10 @@ public > E filterForQuery(E liveEndpoints, Keyspace keysp { Index.Status status = getIndexStatus(replica.endpoint(), keyspace.getName(), index.getIndexMetadata().name); if (!index.isQueryable(status)) + { + indexStatusMap.put(replica.endpoint(), status); return false; + } if (status != Index.Status.BUILD_SUCCEEDED) allBuilt = false; @@ -125,7 +129,13 @@ public > E filterForQuery(E liveEndpoints, Keyspace keysp { Map failureReasons = new HashMap<>(); liveEndpoints.without(queryableEndpoints.endpoints()) - .forEach(replica -> failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_NOT_AVAILABLE)); + .forEach(replica -> { + Index.Status status = indexStatusMap.get(replica.endpoint()); + if (status == Index.Status.FULL_REBUILD_STARTED) + failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_BUILD_IN_PROGRESS); + else + failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_NOT_AVAILABLE); + }); throw new ReadFailureException(level, filtered, required, false, failureReasons); } diff --git a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java index 791293fbb951..5f1c6e3d52fb 100644 --- a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java +++ b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java @@ -41,6 +41,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.util.concurrent.FutureCallback; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -306,17 +307,29 @@ public boolean isIndexQueryable(Index index) /** * Throws an {@link IndexNotAvailableException} if any of the indexes in the specified {@link Index.QueryPlan} is - * not queryable, as it's defined by {@link #isIndexQueryable(Index)}. + * not queryable, as it's defined by {@link #isIndexQueryable(Index)}. If the reason for the index to be not available + * is that it's building, it will throw an {@link IndexBuildInProgressException}. * * @param queryPlan a query plan * @throws IndexNotAvailableException if the query plan has any index that is not queryable */ public void checkQueryability(Index.QueryPlan queryPlan) { + InetAddressAndPort endpoint = FBUtilities.getBroadcastAddressAndPort(); + for (Index index : queryPlan.getIndexes()) { + String indexName = index.getIndexMetadata().name; + Index.Status indexStatus = IndexStatusManager.instance.getIndexStatus(endpoint, keyspace.getName(), indexName); + if (!isIndexQueryable(index)) + { + // isQueryable is always true for non-SAI index implementations, thus we need to check both not queryable and building + if (indexStatus == Index.Status.FULL_REBUILD_STARTED) + throw new IndexBuildInProgressException(index); + throw new IndexNotAvailableException(index); + } } } diff --git a/src/java/org/apache/cassandra/net/InboundSink.java b/src/java/org/apache/cassandra/net/InboundSink.java index d07703963547..2e8c8413dcb7 100644 --- a/src/java/org/apache/cassandra/net/InboundSink.java +++ b/src/java/org/apache/cassandra/net/InboundSink.java @@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; import java.util.function.Predicate; +import org.apache.cassandra.index.IndexBuildInProgressException; import org.slf4j.LoggerFactory; import net.openhft.chronicle.core.util.ThrowingConsumer; @@ -126,13 +127,24 @@ public void accept(Message message) fail(message.header, t); if (t instanceof NotCMSException || t instanceof CoordinatorBehindException) + { noSpamLogger.warn(t.getMessage()); - else if (t instanceof TombstoneOverwhelmingException || t instanceof IndexNotAvailableException || t instanceof InvalidRoutingException) + } + else if (t instanceof TombstoneOverwhelmingException || + t instanceof IndexNotAvailableException || + t instanceof IndexBuildInProgressException || + t instanceof InvalidRoutingException) + { noSpamLogger.error(t.getMessage()); + } else if (t instanceof RuntimeException) + { throw (RuntimeException) t; + } else + { throw new RuntimeException(t); + } } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/sai/IndexAvailabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/sai/IndexAvailabilityTest.java index 53cca2614dc3..66a63fa11b91 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/sai/IndexAvailabilityTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/sai/IndexAvailabilityTest.java @@ -33,6 +33,7 @@ import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.index.Index; @@ -48,6 +49,7 @@ import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NETWORK; import static org.apache.cassandra.distributed.test.sai.SAIUtil.waitForIndexQueryable; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.awaitility.Awaitility.await; import static org.junit.Assert.assertEquals; @@ -57,7 +59,7 @@ public class IndexAvailabilityTest extends TestBaseImpl private static final String CREATE_TABLE = "CREATE TABLE %s.%s (pk text primary key, v1 int, v2 text) " + "WITH compaction = {'class' : 'SizeTieredCompactionStrategy', 'enabled' : false }"; private static final String CREATE_INDEX = "CREATE CUSTOM INDEX %s ON %s.%s(%s) USING 'StorageAttachedIndex'"; - + private static final Map expectedNodeIndexQueryability = new ConcurrentHashMap<>(); private List keyspaces; private List indexesPerKs; @@ -188,6 +190,82 @@ private void markIndexNonQueryable(IInvokableInstance node, String keyspace, Str }); } + @Test + public void testIndexExceptionsTwoIndexesOn3NodeCluster() throws Exception + { + try (Cluster cluster = init(Cluster.build(3) + .withConfig(config -> config.with(GOSSIP) + .with(NETWORK)) + .start())) + { + String ks2 = "ks2"; + String cf1 = "cf1"; + String index1 = "cf1_idx1"; + String index2 = "cf1_idx2"; + + // Create keyspace, table with correct column types + cluster.schemaChange(String.format(CREATE_KEYSPACE, ks2, 2)); + cluster.schemaChange("CREATE TABLE " + ks2 + '.' + cf1 + " (pk int PRIMARY KEY, v1 int, v2 int)"); + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v1=0 AND v2=0 ALLOW FILTERING"); + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v2=0 ALLOW FILTERING"); + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v1=0 ALLOW FILTERING"); + + cluster.schemaChange(String.format(CREATE_INDEX, index1, ks2, cf1, "v1")); + cluster.schemaChange(String.format(CREATE_INDEX, index2, ks2, cf1, "v2")); + cluster.forEach(node -> expectedNodeIndexQueryability.put(NodeIndex.create(ks2, index1, node), Index.Status.BUILD_SUCCEEDED)); + for (IInvokableInstance node : cluster.get(2, 1, 3)) + for (IInvokableInstance replica : cluster.get(1, 2, 3)) + waitForIndexingStatus(node, ks2, index1, replica, Index.Status.BUILD_SUCCEEDED); + + // Mark only index2 as building on node3, leave index1 in BUILD_SUCCEEDED state + markIndexBuilding(cluster.get(3), ks2, cf1, index2); + cluster.forEach(node -> expectedNodeIndexQueryability.put(NodeIndex.create(ks2, index2, node), Index.Status.FULL_REBUILD_STARTED)); + for (IInvokableInstance node : cluster.get(1, 2, 3)) + waitForIndexingStatus(node, ks2, index2, cluster.get(3), Index.Status.FULL_REBUILD_STARTED); + + assertThatThrownBy(() -> + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v1=0 AND v2=0")) + .hasMessageContaining("Operation failed - received 1 responses and 1 failures: INDEX_BUILD_IN_PROGRESS"); + + // Mark only index2 as failing on node2, leave index1 in BUILD_SUCCEEDED state + markIndexBuilding(cluster.get(2), ks2, cf1, index2); + cluster.forEach(node -> expectedNodeIndexQueryability.put(NodeIndex.create(ks2, index2, node), Index.Status.FULL_REBUILD_STARTED)); + for (IInvokableInstance node : cluster.get(1, 2, 3)) + waitForIndexingStatus(node, ks2, index2, cluster.get(2), Index.Status.FULL_REBUILD_STARTED); + + + assertThatThrownBy(() -> + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v1=0 AND v2=0")) + .hasMessageContaining("Operation failed - received 1 responses and 1 failures: INDEX_BUILD_IN_PROGRESS"); + + // Mark only index2 as failing on node1, leave index1 in BUILD_SUCCEEDED state + markIndexNonQueryable(cluster.get(1), ks2, cf1, index2); + cluster.forEach(node -> expectedNodeIndexQueryability.put(NodeIndex.create(ks2, index2, node), Index.Status.BUILD_FAILED)); + for (IInvokableInstance node : cluster.get(1, 2, 3)) { + waitForIndexingStatus(node, ks2, index2, cluster.get(1), Index.Status.BUILD_FAILED); + } + + assertThatThrownBy(() -> + executeOnAllCoordinators(cluster, + "SELECT pk FROM " + ks2 + '.' + cf1 + " WHERE v1=0 AND v2=0")) + .hasMessageMatching("^Operation failed - received 0 responses and 2 failures: INDEX_NOT_AVAILABLE from .+, INDEX_BUILD_IN_PROGRESS from .+$"); + } + } + + private void executeOnAllCoordinators(Cluster cluster, String query) + { + // test different coordinator + for (int nodeId = 1; nodeId <= cluster.size(); nodeId++) + { + assertEquals(0, cluster.coordinator(nodeId).execute(query, ConsistencyLevel.LOCAL_QUORUM).length); + } + } + @SuppressWarnings("DataFlowIssue") private void markIndexQueryable(IInvokableInstance node, String keyspace, String table, String indexName) { diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java index c1365e4cc36d..6888ff3a9314 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java @@ -25,12 +25,11 @@ import java.util.concurrent.CountDownLatch; import com.google.common.collect.ImmutableSet; + import org.apache.commons.lang3.StringUtils; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.index.internal.CassandraIndex; -import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.config.DatabaseDescriptor; @@ -46,10 +45,12 @@ import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; -import org.apache.cassandra.index.IndexNotAvailableException; +import org.apache.cassandra.index.IndexBuildInProgressException; import org.apache.cassandra.index.SecondaryIndexManager; import org.apache.cassandra.index.StubIndex; +import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.index.internal.CustomCassandraIndex; +import org.apache.cassandra.index.sai.StorageAttachedIndex; import org.apache.cassandra.index.sasi.SASIIndex; import org.apache.cassandra.schema.IndexMetadata; import org.apache.cassandra.service.ClientState; @@ -1090,7 +1091,7 @@ public void testIndexQueriesWithIndexNotReady() throws Throwable execute("SELECT value FROM %s WHERE value = 2"); fail(); } - catch (IndexNotAvailableException e) + catch (IndexBuildInProgressException e) { assertTrue(true); } @@ -1124,7 +1125,7 @@ public void testReadOnlyIndex() throws Throwable indexName = createIndexAsync("CREATE CUSTOM INDEX ON %s (value) USING '" + ReadOnlyOnFailureIndex.class.getName() + "'"); index = (ReadOnlyOnFailureIndex) getCurrentColumnFamilyStore().indexManager.getIndexByName(indexName); waitForIndexBuilds(indexName); - assertInvalidThrow(IndexNotAvailableException.class, "SELECT value FROM %s WHERE value = 1"); + assertInvalidThrow(IndexBuildInProgressException.class, "SELECT value FROM %s WHERE value = 1"); execute("INSERT INTO %s (pk, ck, value) VALUES (?, ?, ?)", 1, 1, 1); assertEquals(0, index.rowsInserted.size()); @@ -1164,7 +1165,7 @@ public void testWriteOnlyIndex() throws Throwable waitForIndexBuilds(indexName); execute("INSERT INTO %s (pk, ck, value) VALUES (?, ?, ?)", 1, 1, 1); assertEquals(1, index.rowsInserted.size()); - assertInvalidThrow(IndexNotAvailableException.class, "SELECT value FROM %s WHERE value = 1"); + assertInvalidThrow(IndexBuildInProgressException.class, "SELECT value FROM %s WHERE value = 1"); // Upon recovery, we can query data again index.reset(); diff --git a/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java b/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java index b2fdcd365d73..3b89fe9c64b1 100644 --- a/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java +++ b/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java @@ -20,8 +20,10 @@ import org.junit.Test; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertEquals; + public class RequestFailureReasonTest { private static final RequestFailureReason[] REASONS = RequestFailureReason.values(); @@ -37,7 +39,8 @@ public class RequestFailureReasonTest { 7, "READ_TOO_MANY_INDEXES" }, { 8, "NOT_CMS" }, { 9, "INVALID_ROUTING" }, - { 10, "COORDINATOR_BEHIND" } + { 10, "COORDINATOR_BEHIND" }, + { 503, "INDEX_BUILD_IN_PROGRESS" } }; @Test @@ -54,4 +57,42 @@ public void testEnumCodesAndNames() assertEquals("Number of RequestFailureReason enum constants has changed. Update the test.", EXPECTED_VALUES.length, REASONS.length); } + + @Test + public void testFromCode() + { + // Test valid codes + for (Object[] expected : EXPECTED_VALUES) + { + int code = (Integer) expected[0]; + String name = (String) expected[1]; + assertEquals(RequestFailureReason.valueOf(name), RequestFailureReason.fromCode(code)); + } + + // Test invalid codes + assertEquals(RequestFailureReason.UNKNOWN, RequestFailureReason.fromCode(200)); + assertEquals(RequestFailureReason.UNKNOWN, RequestFailureReason.fromCode(999)); + assertThatThrownBy(() -> RequestFailureReason.fromCode(-1)).isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void testExceptionSubclassMapping() + { + // Create a subclass of UnknownTableException + class CustomUnknownTableException extends IncompatibleSchemaException + { + public CustomUnknownTableException(String ks) + { + super(ks); + } + } + + // Verify the parent class still maps correctly + assertEquals(RequestFailureReason.INCOMPATIBLE_SCHEMA, + RequestFailureReason.forException(new CustomUnknownTableException("ks"))); + + // Test unmapped exception returns UNKNOWN + assertEquals(RequestFailureReason.UNKNOWN, + RequestFailureReason.forException(new RuntimeException("test"))); + } } diff --git a/test/unit/org/apache/cassandra/index/IndexStatusManagerTest.java b/test/unit/org/apache/cassandra/index/IndexStatusManagerTest.java index 947b7a57bc47..39401ac1bc47 100644 --- a/test/unit/org/apache/cassandra/index/IndexStatusManagerTest.java +++ b/test/unit/org/apache/cassandra/index/IndexStatusManagerTest.java @@ -359,7 +359,7 @@ public void shouldThrowWhenNoQueryableEndpoints() .hasMessageStartingWith("Operation failed") .hasMessageContaining("INDEX_NOT_AVAILABLE from /127.0.0.253:7000") .hasMessageContaining("INDEX_NOT_AVAILABLE from /127.0.0.254:7000") - .hasMessageContaining("INDEX_NOT_AVAILABLE from /127.0.0.255:7000"); + .hasMessageContaining("INDEX_BUILD_IN_PROGRESS from /127.0.0.255:7000"); } void runTest(Testcase testcase) diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java index 7a9198a7009a..3ae090518585 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java @@ -21,10 +21,14 @@ import org.junit.Test; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; +import org.apache.cassandra.index.IndexBuildInProgressException; import org.apache.cassandra.index.sai.SAITester; import org.apache.cassandra.index.sai.StorageAttachedIndex; +import org.apache.cassandra.inject.Injections; +import org.apache.cassandra.inject.InvokePointBuilder; import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertNotNull; /** @@ -391,4 +395,32 @@ private void test(String query, boolean requiresAllowFiltering) throws Throwable assertNotNull(execute(query + " ALLOW FILTERING")); } + private static final Injections.Barrier blockIndexBuild = Injections.newBarrier("block_index_build", 2, false) + .add(InvokePointBuilder.newInvokePoint() + .onClass(StorageAttachedIndex.class) + .onMethod("startInitialBuild")) + .build(); + + @Test + public void testAllowFilteringDuringIndexBuild() throws Throwable + { + createTable("CREATE TABLE %s (k int PRIMARY KEY, v int)"); + Injections.inject(blockIndexBuild); + String idx = createIndexAsync(String.format("CREATE CUSTOM INDEX ON %%s(v) USING '%s'", StorageAttachedIndex.class.getName())); + + String expectedErrorMessage = String.format(IndexBuildInProgressException.INDEX_BUILD_IN_PROGRESS_ERROR, idx); + assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE v=0")) + .hasMessage(expectedErrorMessage) + .isInstanceOf(IndexBuildInProgressException.class); + + assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE v=0 ALLOW FILTERING")) + .hasMessage(expectedErrorMessage) + .isInstanceOf(IndexBuildInProgressException.class); + + blockIndexBuild.countDown(); + blockIndexBuild.disable(); + waitForIndexQueryable(idx); + execute("SELECT * FROM %s WHERE v=0"); + execute("SELECT * FROM %s WHERE v=0 ALLOW FILTERING"); + } } From 5bc66043d78d15c990ee2d5c94559af682efa225 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 4 Apr 2025 09:25:57 -0700 Subject: [PATCH 011/340] ninja: remove unused import --- .../org/apache/cassandra/service/paxos/v1/PrepareCallback.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java b/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java index 315c7abb9fd6..717acf4ab58f 100644 --- a/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java +++ b/src/java/org/apache/cassandra/service/paxos/v1/PrepareCallback.java @@ -41,7 +41,6 @@ import org.apache.cassandra.service.paxos.PrepareResponse; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.UUIDGen; public class PrepareCallback extends AbstractPaxosCallback { From 7f1503d9c9b78512a34c38c0df98b4bd89538d09 Mon Sep 17 00:00:00 2001 From: Matt Byrd Date: Tue, 18 Mar 2025 11:04:00 -0700 Subject: [PATCH 012/340] Split out truncation record locking to prevent it being blocked by slow interval tree build on removeEndpoint Patch by Matt Byrd; reviewed by Ariel Weisberg, Marcus Eriksson for CASSANDRA-20480 --- CHANGES.txt | 1 + .../apache/cassandra/db/SystemKeyspace.java | 47 ++++++++++++------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index c03ffcafa4a5..45a4f16b56e3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Split out truncation record lock (CASSANDRA-20480) * Throw new IndexBuildInProgressException when queries fail during index build, instead of IndexNotAvailableException (CASSANDRA-20402) * Fix Paxos repair interrupts running transactions (CASSANDRA-20469) * Various fixes in constraint framework (CASSANDRA-20481) diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 64436a940339..81186c512a7f 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -606,6 +606,8 @@ private static Tables tables() private static volatile Map> truncationRecords; + private static final Object truncationRecordLock = new Object(); + public enum BootstrapState { NEEDS_BOOTSTRAP, @@ -801,27 +803,33 @@ public static Map, Pair> getViewBuildStatus(String ksn return status; } - public static synchronized void saveTruncationRecord(ColumnFamilyStore cfs, long truncatedAt, CommitLogPosition position) + public static void saveTruncationRecord(ColumnFamilyStore cfs, long truncatedAt, CommitLogPosition position) { - String req = "UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'"; - executeInternal(format(req, LOCAL, LOCAL), truncationAsMapEntry(cfs, truncatedAt, position)); - truncationRecords = null; - forceBlockingFlush(LOCAL); + synchronized (truncationRecordLock) + { + String req = "UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'"; + executeInternal(format(req, LOCAL, LOCAL), truncationAsMapEntry(cfs, truncatedAt, position)); + truncationRecords = null; + forceBlockingFlush(LOCAL); + } } /** * This method is used to remove information about truncation time for specified column family */ - public static synchronized void removeTruncationRecord(TableId id) + public static void removeTruncationRecord(TableId id) { - Pair truncationRecord = getTruncationRecord(id); - if (truncationRecord == null) - return; - - String req = "DELETE truncated_at[?] from system.%s WHERE key = '%s'"; - executeInternal(format(req, LOCAL, LOCAL), id.asUUID()); - truncationRecords = null; - forceBlockingFlush(LOCAL); + synchronized (truncationRecordLock) + { + Pair truncationRecord = getTruncationRecord(id); + if (truncationRecord == null) + return; + + String req = "DELETE truncated_at[?] from system.%s WHERE key = '%s'"; + executeInternal(format(req, LOCAL, LOCAL), id.asUUID()); + truncationRecords = null; + forceBlockingFlush(LOCAL); + } } private static Map truncationAsMapEntry(ColumnFamilyStore cfs, long truncatedAt, CommitLogPosition position) @@ -850,11 +858,14 @@ public static long getTruncatedAt(TableId id) return record == null ? Long.MIN_VALUE : record.right; } - private static synchronized Pair getTruncationRecord(TableId id) + private static Pair getTruncationRecord(TableId id) { - if (truncationRecords == null) - truncationRecords = readTruncationRecords(); - return truncationRecords.get(id); + synchronized (truncationRecordLock) + { + if (truncationRecords == null) + truncationRecords = readTruncationRecords(); + return truncationRecords.get(id); + } } private static Map> readTruncationRecords() From 38f3afcd42f11e8317d620fecafa7f61bc3129f3 Mon Sep 17 00:00:00 2001 From: Sarma Pydipally Date: Fri, 6 Dec 2024 11:28:46 -0500 Subject: [PATCH 013/340] Several doc fixes, particularly developing/cql/ , developing/data-modeling/ and managing/ patch by Sarma Pydipally; reviewed by Bernardo Botella, Brad Schoening, Mick Semb Wever for CASSANDRA-20170 --- .../examples/CQL/create_ks_trans_repl.cql | 2 +- .../cassandra/examples/CQL/no_revoke.cql | 10 +++--- .../vector-search/vector-search-cycling.cql | 4 +-- .../examples/RESULTS/2i/2i-check.result | 8 ++++- .../cassandra/pages/developing/cql/SASI.adoc | 34 +++++++++---------- .../pages/developing/cql/changes.adoc | 10 +++--- .../cassandra/pages/developing/cql/ddl.adoc | 2 +- .../pages/developing/cql/functions.adoc | 2 +- .../cassandra/pages/developing/cql/mvs.adoc | 4 +-- .../pages/developing/cql/security.adoc | 6 ++-- .../data-modeling_conceptual.adoc | 2 +- .../data-modeling/data-modeling_logical.adoc | 6 ++-- .../data-modeling/data-modeling_physical.adoc | 6 ++-- .../data-modeling/data-modeling_queries.adoc | 2 +- .../data-modeling/data-modeling_rdbms.adoc | 2 +- .../data-modeling/data-modeling_refining.adoc | 2 +- .../configuration/cass_jvm_options_file.adoc | 2 +- .../pages/managing/operating/cdc.adoc | 4 +-- .../operating/compaction/tombstones.adoc | 2 +- .../managing/operating/topo_changes.adoc | 2 +- .../managing/tools/sstable/sstableverify.adoc | 4 +-- .../reference/cql-commands/create-index.adoc | 4 +-- .../cql-commands/create-table-examples.adoc | 2 +- .../cassandra/pages/reference/static.adoc | 2 +- .../pages/troubleshooting/use_tools.adoc | 4 +-- .../pages/vector-search/data-modeling.adoc | 4 +-- .../cassandra/partials/cql-syntax-legend.adoc | 4 +-- .../partials/table-column-definitions.adoc | 2 +- .../cassandra/partials/table-properties.adoc | 4 +-- 29 files changed, 74 insertions(+), 68 deletions(-) diff --git a/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql index afff433eec8f..4fe1c3a98e77 100644 --- a/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql +++ b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql @@ -1,2 +1,2 @@ CREATE KEYSPACE some_keyspace - WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : '3/1'', 'DC2' : '5/2'}; + WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : '3/1', 'DC2' : '5/2'}; diff --git a/doc/modules/cassandra/examples/CQL/no_revoke.cql b/doc/modules/cassandra/examples/CQL/no_revoke.cql index b6a044cf2038..c12b210b76e5 100644 --- a/doc/modules/cassandra/examples/CQL/no_revoke.cql +++ b/doc/modules/cassandra/examples/CQL/no_revoke.cql @@ -1,5 +1,5 @@ -* `system_schema.keyspaces` -* `system_schema.columns` -* `system_schema.tables` -* `system.local` -* `system.peers` +* system_schema.keyspaces +* system_schema.columns +* system_schema.tables +* system.local +* system.peers diff --git a/doc/modules/cassandra/examples/CQL/vector-search/vector-search-cycling.cql b/doc/modules/cassandra/examples/CQL/vector-search/vector-search-cycling.cql index cc8dad67741f..9b3984b19503 100644 --- a/doc/modules/cassandra/examples/CQL/vector-search/vector-search-cycling.cql +++ b/doc/modules/cassandra/examples/CQL/vector-search/vector-search-cycling.cql @@ -22,7 +22,7 @@ WITH CLUSTERING ORDER BY (created_at DESC); // tag::alter-vs-table[] ALTER TABLE cycling.comments_vs - ADD comment_vector VECTOR ; <1> + ADD comment_vector VECTOR ; // end::alter-vs-table[] // tag::create-vs-index[] @@ -116,4 +116,4 @@ SELECT comment, similarity_cosine(comment_vector, [0.2, 0.15, 0.3, 0.2, 0.05]) FROM cycling.comments_vs ORDER BY comment_vector ANN OF [0.1, 0.15, 0.3, 0.12, 0.05] LIMIT 1; -// end::select-vector-data-similarity-cycling[] \ No newline at end of file +// end::select-vector-data-similarity-cycling[] diff --git a/doc/modules/cassandra/examples/RESULTS/2i/2i-check.result b/doc/modules/cassandra/examples/RESULTS/2i/2i-check.result index 2fd9f9570028..2d7ce864b95e 100644 --- a/doc/modules/cassandra/examples/RESULTS/2i/2i-check.result +++ b/doc/modules/cassandra/examples/RESULTS/2i/2i-check.result @@ -1 +1,7 @@ -TBD \ No newline at end of file +CREATE TABLE cycling.birthday_list ( + cyclist_name text PRIMARY KEY, +. +. +. + +CREATE INDEX blist_values_idx ON cycling.birthday_list (values(blist)); diff --git a/doc/modules/cassandra/pages/developing/cql/SASI.adoc b/doc/modules/cassandra/pages/developing/cql/SASI.adoc index 705cf1d3372c..93d87f8ff385 100644 --- a/doc/modules/cassandra/pages/developing/cql/SASI.adoc +++ b/doc/modules/cassandra/pages/developing/cql/SASI.adoc @@ -1,4 +1,4 @@ -== SASI Index += SASI Index https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java[`SASIIndex`], or ``SASI`` for short, is an implementation of Cassandra's `Index` @@ -9,7 +9,7 @@ has superior performance in cases where queries would previously require filtering. In achieving this performance, SASI aims to be significantly less resource intensive than existing implementations, in memory, disk, and CPU usage. In addition, SASI supports prefix and contains queries on -strings (similar to SQL's `LIKE = "foo*"` or `LIKE = "*foo*"'`). +strings (similar to SQL's ``LIKE = "foo\*"`` or ``LIKE = "*foo*"`` ). The following goes on describe how to get up and running with SASI, demonstrates usage with examples, and provides some details on its @@ -357,7 +357,7 @@ parts: Indexing and Querying. Further, Cassandra makes it possible to divide those responsibilities into the memory and disk components. SASI takes advantage of Cassandra's write-once, immutable, ordered data model to build indexes along with the flushing of the memtable to disk – this -is the origin of the name ``SSTable Attached Secondary Index''. +is the origin of the name `SSTable Attached Secondary Index`. The SASI index data structures are built in memory as the SSTable is being written and they are flushed to disk before the writing of the @@ -405,15 +405,15 @@ or more page-sized blocks. The https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] is structured as a tree of arrays, where each level describes the terms in the level below, the final level being the terms themselves. The -`PointerLevel`s and their `PointerBlock`s contain terms and pointers to +``PointerLevel``s and their ``PointerBlock``s contain terms and pointers to other blocks that _end_ with those terms. The `DataLevel`, the final -level, and its `DataBlock`s contain terms and point to the data itself, +level, and its ``DataBlock``s contain terms and point to the data itself, contained in https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s. The terms written to the https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] -vary depending on its ``mode'': either `PREFIX`, `CONTAINS`, or +vary depending on its `mode` : either `PREFIX`, `CONTAINS`, or `SPARSE`. In the `PREFIX` and `SPARSE` cases, terms' exact values are written exactly once per `OnDiskIndex`. For example, when using a `PREFIX` index with terms `Jason`, `Jordan`, `Pavel`, all three will be @@ -428,14 +428,14 @@ is built merging all the https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s for each term into a single one. This copy of the data is used for efficient iteration of large ranges of e.g. timestamps. The index -``mode'' is configurable per column at index creation time. +`mode` is configurable per column at index creation time. ===== TokenTree(Builder) The https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`] is an implementation of the well-known -https://en.wikipedia.org/wiki/B%2B_tree[B+-tree] that has been modified +https://en.wikipedia.org/wiki/B%2B_tree[B+ tree] that has been modified to optimize for its use-case. In particular, it has been optimized to associate tokens, longs, with a set of positions in an SSTable, also longs. Allowing the set of long values accommodates the possibility of a @@ -519,7 +519,7 @@ execution. During the analysis phase, https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] -converts from Cassandra's internal representation of `IndexExpression`s, +converts from Cassandra's internal representation of ``IndexExpression``s, which has also been modified to support encoding queries that contain ORs and groupings of expressions using parentheses (see the link:#cassandra-internal-changes[Cassandra Internal Changes] section @@ -653,8 +653,8 @@ like this: The last type of optimization applied, for this query, is to merge range expressions across branches of the tree – without modifying the meaning of the query, of course. In this case, because the query contains all -`AND`s the `age` expressions can be collapsed. Along with this -optimization, the initial collapsing of unneeded `AND`s can also be +``AND``s the `age` expressions can be collapsed. Along with this +optimization, the initial collapsing of unneeded ``AND``s can also be applied once more to result in this final tree using to execute the query: @@ -683,7 +683,7 @@ class, more specifically, can have zero, one, or two https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s as children and an unlimited number of expressions. The iterators used to perform the queries, discussed below in the -``Range(Union|Intersection)Iterator'' section, implement the necessary +`Range(Union|Intersection)Iterator` section, implement the necessary logic to merge results transparently regardless of the https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s children. @@ -706,14 +706,14 @@ the code]. The abstract `RangeIterator` class provides a unified interface over the two main operations performed by SASI at various layers in the execution path: set intersection and union. These operations are performed in a -iterated, or ``streaming'', fashion to prevent unneeded reads of +iterated, or `streaming`, fashion to prevent unneeded reads of elements from either set. In both the intersection and union cases the algorithms take advantage of the data being pre-sorted using the same sort order, e.g. term or token order. The https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`] -performs the ``Merge-Join'' portion of the +performs the `Merge-Join` portion of the https://en.wikipedia.org/wiki/Sort-merge_join[Sort-Merge-Join] algorithm, with the properties of an outer-join, or union. It is implemented with several optimizations to improve its performance over a @@ -733,7 +733,7 @@ between them based on some properties of the data. `BounceIntersectionIterator`, and the `BOUNCE` strategy, works like the https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`] -in that it performs a ``Merge-Join'', however, its nature is similar to +in that it performs a `Merge-Join`, however, its nature is similar to a inner-join, where like values are merged by a data-specific merge function (e.g. merging two tokens in a list to lookup in a SSTable later). See the @@ -742,7 +742,7 @@ for more details on its implementation. `LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a different operation, more similar to a lookup in an associative data -structure, or ``hash lookup'' in database terminology. Once again, +structure, or `hash lookup` in database terminology. Once again, details on the implementation can be found in the https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208[javadoc]. @@ -794,7 +794,7 @@ The following are items that can be addressed in future updates but are not available in this repository or are not currently implemented. * The cluster must be configured to use a partitioner that produces -`LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners +``LongToken``s, e.g. `Murmur3Partitioner`. Other existing partitioners which don't produce LongToken e.g. `ByteOrderedPartitioner` and `RandomPartitioner` will not work with SASI. * Not Equals and OR support have been removed in this release while diff --git a/doc/modules/cassandra/pages/developing/cql/changes.adoc b/doc/modules/cassandra/pages/developing/cql/changes.adoc index 7bf6bd6d03b9..7bee9a235c43 100644 --- a/doc/modules/cassandra/pages/developing/cql/changes.adoc +++ b/doc/modules/cassandra/pages/developing/cql/changes.adoc @@ -1,4 +1,4 @@ -= Changes += CQL Changes The following describes the changes in each version of CQL. @@ -57,15 +57,15 @@ explicitly set. * `ALTER TABLE` `ADD` and `DROP` now allow multiple columns to be added/removed. * New `PER PARTITION LIMIT` option for `SELECT` statements (see -https://issues.apache.org/jira/browse/CASSANDRA-7017)[CASSANDRA-7017]. +https://issues.apache.org/jira/browse/CASSANDRA-7017[CASSANDRA-7017]). * `User-defined functions ` can now instantiate `UDTValue` and `TupleValue` instances via the new `UDFContext` interface (see -https://issues.apache.org/jira/browse/CASSANDRA-10818)[CASSANDRA-10818]. +https://issues.apache.org/jira/browse/CASSANDRA-10818[CASSANDRA-10818]). * `User-defined types ` may now be stored in a non-frozen form, allowing individual fields to be updated and deleted in `UPDATE` statements and `DELETE` statements, respectively. -(https://issues.apache.org/jira/browse/CASSANDRA-7423)[CASSANDRA-7423]). +(https://issues.apache.org/jira/browse/CASSANDRA-7423[CASSANDRA-7423]). == 3.4.1 @@ -169,7 +169,7 @@ and `UPDATE` supports `IF` conditions. * `SELECT`, `UPDATE`, and `DELETE` statements now allow empty `IN` relations (see -https://issues.apache.org/jira/browse/CASSANDRA-5626)[CASSANDRA-5626]. +https://issues.apache.org/jira/browse/CASSANDRA-5626[CASSANDRA-5626]). == 3.0.4 diff --git a/doc/modules/cassandra/pages/developing/cql/ddl.adoc b/doc/modules/cassandra/pages/developing/cql/ddl.adoc index a546e12b92b9..c18b26b83f33 100644 --- a/doc/modules/cassandra/pages/developing/cql/ddl.adoc +++ b/doc/modules/cassandra/pages/developing/cql/ddl.adoc @@ -283,7 +283,7 @@ following modifiers: Some columns can be declared as `STATIC` in a table definition. A column that is static will be “shared” by all the rows belonging to the same -partition (having the same xref:cassandra:developing/cql/ddl.adoc#partition-key[partition key]. +partition (having the same xref:cassandra:developing/cql/ddl.adoc#partition-key[partition key]). For example: diff --git a/doc/modules/cassandra/pages/developing/cql/functions.adoc b/doc/modules/cassandra/pages/developing/cql/functions.adoc index 75786de271a3..9599b98a2434 100644 --- a/doc/modules/cassandra/pages/developing/cql/functions.adoc +++ b/doc/modules/cassandra/pages/developing/cql/functions.adoc @@ -301,7 +301,7 @@ UDFs can be _overloaded_, so that multiple UDFs with different argument types ca [NOTE] ==== _JavaScript_ user-defined functions have been deprecated in Cassandra 4.1. In preparation for Cassandra 5.0, their removal is -already in progress. For more information - CASSANDRA-17281, CASSANDRA-18252. +already in progress. For more information - https://issues.apache.org/jira/browse/CASSANDRA-17281[CASSANDRA-17281], https://issues.apache.org/jira/browse/CASSANDRA-18252[CASSANDRA-18252]. ==== For example: diff --git a/doc/modules/cassandra/pages/developing/cql/mvs.adoc b/doc/modules/cassandra/pages/developing/cql/mvs.adoc index e2949fd73685..00f023206f3d 100644 --- a/doc/modules/cassandra/pages/developing/cql/mvs.adoc +++ b/doc/modules/cassandra/pages/developing/cql/mvs.adoc @@ -73,7 +73,7 @@ The `WHERE` clause has the following restrictions: ** no other restriction is allowed ** cannot have columns that are part of the _view_ primary key be null, they must always be at least restricted by a `IS NOT NULL` restriction (or any other restriction, but they must have one). -* cannot have an xref:cassandra:developing/cql/dml.adoc#ordering-clause[ordering clause], a xref:cassandra:developing/cql/dml.adoc#limit-clause[limit], or xref:cassandra:developing/cql/dml.adoc#allow-filtering[ALLOW FILTERING +* cannot have an xref:cassandra:developing/cql/dml.adoc#ordering-clause[ordering clause], a xref:cassandra:developing/cql/dml.adoc#limit-clause[limit], or xref:cassandra:developing/cql/dml.adoc#allow-filtering[ALLOW FILTERING] === MV primary key @@ -152,5 +152,5 @@ Removal of columns not selected in the Materialized View (via `DELETE unselected_column FROM base`) may shadow missed updates to other columns received by hints or repair. For this reason, we advise against doing deletions on base columns not selected in views until this is -fixed on CASSANDRA-13826. +fixed on https://issues.apache.org/jira/browse/CASSANDRA-13826[CASSANDRA-13826]. ==== diff --git a/doc/modules/cassandra/pages/developing/cql/security.adoc b/doc/modules/cassandra/pages/developing/cql/security.adoc index f751a1658df2..0af30a9d1541 100644 --- a/doc/modules/cassandra/pages/developing/cql/security.adoc +++ b/doc/modules/cassandra/pages/developing/cql/security.adoc @@ -171,7 +171,7 @@ xref:cassandra:developing/cql/security.adoc#authorization[authorization]. However, if authorization is enabled, xref:cassandra:developing/cql/security.adoc#cql-permissions[permissions] of the dropped role are also revoked, subject to the xref:cassandra:developing/cql/security.adoc#auth-caching[caching options] configured in xref:cassandra:developing/cql/configuring.adoc#cassandra.yaml[cassandra-yaml] file. Should a dropped role be subsequently recreated and have new xref:security.adoc#grant-permission-statement[permissions] or -xref:security.adoc#grant-role-statement[roles]` granted to it, any client sessions still +xref:security.adoc#grant-role-statement[roles] granted to it, any client sessions still connected will acquire the newly granted permissions and roles. ==== @@ -332,7 +332,7 @@ Existing users can be listed using the `LIST USERS` statement: include::cassandra:example$BNF/list_users_statement.bnf[] ---- -Note that this statement is equivalent to xref:security.adoc#list-roles-statement[`LIST ROLES], but only roles with the `LOGIN` privilege are included in the output. +Note that this statement is equivalent to xref:security.adoc#list-roles-statement[LIST ROLES], but only roles with the `LOGIN` privilege are included in the output. == Data Control @@ -648,5 +648,5 @@ which were directly granted to `bob` or one of `bob`'s roles: include::cassandra:example$CQL/list_select_perm.cql[] ---- -Show any permissions granted to `carlos` or any of `carlos`'s roles, +Show any permissions granted to `carlos` or any roles assigned to `carlos`, limited to `SELECT` permissions on any resource. diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_conceptual.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_conceptual.adoc index ca59a38800d1..3e28c34b9819 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_conceptual.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_conceptual.adoc @@ -30,7 +30,7 @@ underlined. Relationships between entities are represented as diamonds, and the connectors between the relationship and each entity show the multiplicity of the connection. -image::data-modeling_hotel_erd.png[image] +image::data_modeling_hotel_erd.png[image] Obviously, in the real world, there would be many more considerations and much more complexity. For example, hotel rates are notoriously diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_logical.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_logical.adoc index 82aeb5d11446..80ddf3b6f0d6 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_logical.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_logical.adoc @@ -34,7 +34,7 @@ informative way to visualize the relationships between queries and tables in your designs. This figure shows the Chebotko notation for a logical data model. -image::cassandra:developing/data-modeling/data-modeling_chebotko_logical.png[image] +image::cassandra:developing/data-modeling/data_modeling_chebotko_logical.png[image] Each table is shown with its title and a list of columns. Primary key columns are identified via symbols such as *K* for partition key columns @@ -51,7 +51,7 @@ dedicated tables for rooms or amenities, as you had in the relational design. This is because the workflow didn't identify any queries requiring this direct access. -image::cassandra:developing/data-modeling/data-modeling_hotel_logical.png[image] +image::cassandra:developing/data-modeling/data_modeling_hotel_logical.png[image] Let's explore the details of each of these tables. @@ -127,7 +127,7 @@ shows a logical data model for reservations. You'll notice that these tables represent a denormalized design; the same data appears in multiple tables, with differing keys. -image::cassandra:developing/data-modeling/data-modeling_reservation_logical.png[image] +image::cassandra:developing/data-modeling/data_modeling_reservation_logical.png[image] In order to satisfy Q6, the `reservations_by_guest` table can be used to look up the reservation by guest name. You could envision query Q7 being diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_physical.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_physical.adoc index 1328e459be17..ca9839b75057 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_physical.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_physical.adoc @@ -19,7 +19,7 @@ notation for physical data models. To draw physical models, you need to be able to add the typing information for each column. This figure shows the addition of a type for each column in a sample table. -image::cassandra:developing/data-modeling/data-modeling_chebotko_physical.png[image] +image::cassandra:developing/data-modeling/data_modeling_chebotko_physical.png[image] The figure includes a designation of the keyspace containing each table and visual cues for columns represented using collections and @@ -61,7 +61,7 @@ As you work to create physical representations of various tables in the logical hotel data model, you use the same approach. The resulting design is shown in this figure: -image::cassandra:developing/data-modeling/data-modeling_hotel_physical.png[image] +image::cassandra:developing/data-modeling/data_modeling_hotel_physical.png[image] Note that the `address` type is also included in the design. It is designated with an asterisk to denote that it is a user-defined type, @@ -86,7 +86,7 @@ first iteration of your physical data model design, assume you're going to manage this denormalization manually. Note that this design could be revised to use Cassandra's (experimental) materialized view feature. -image::cassandra:developing/data-modeling/data-modeling_reservation_physical.png[image] +image::cassandra:developing/data-modeling/data_modeling_reservation_physical.png[image] Note that the `address` type is reproduced in this keyspace and `guest_id` is modeled as a `uuid` type in all of the tables. diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc index 3a4fb8d54a2c..b33e91e05e4f 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc @@ -53,7 +53,7 @@ to obtain detailed description of the hotel. The act of booking a room creates a reservation record that may be accessed by the guest and hotel staff at a later time through various additional queries. -image::cassandra:developing/data-modeling/data-modeling_hotel_queries.png[image] +image::cassandra:developing/data-modeling/data_modeling_hotel_queries.png[image] _Material adapted from Cassandra, The Definitive Guide. Published by O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_rdbms.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_rdbms.adoc index 3de1210a5543..c045d7321463 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_rdbms.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_rdbms.adoc @@ -12,7 +12,7 @@ relationships from the conceptual model of hotels-to-points of interest, rooms-to-amenities, rooms-to-availability, and guests-to-rooms (via a reservation). -image::data-modeling_hotel_relational.png[image] +image::data_modeling_hotel_relational.png[image] == Design Differences Between RDBMS and Cassandra diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_refining.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_refining.adoc index d613c2cea816..d7ea619c88f1 100644 --- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_refining.adoc +++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_refining.adoc @@ -188,7 +188,7 @@ the original design is shown in the figure below. While the `month` column is partially duplicative of the `date`, it provides a nice way of grouping related data in a partition that will not get too large. -image::data-modeling_hotel_bucketing.png[image] +image::data_modeling_hotel_bucketing.png[image] If you really felt strongly about preserving a wide partition design, you could instead add the `room_id` to the partition key, so that each diff --git a/doc/modules/cassandra/pages/managing/configuration/cass_jvm_options_file.adoc b/doc/modules/cassandra/pages/managing/configuration/cass_jvm_options_file.adoc index 79f67b6abb06..b9057a490424 100644 --- a/doc/modules/cassandra/pages/managing/configuration/cass_jvm_options_file.adoc +++ b/doc/modules/cassandra/pages/managing/configuration/cass_jvm_options_file.adoc @@ -14,7 +14,7 @@ See each file for examples of settings. [NOTE] ==== -The `jvm-*` files replace the `cassandra-envsh` file used in Cassandra +The `jvm-\*` files replace the `cassandra-env.sh` file used in Cassandra versions prior to Cassandra 3.0. The `cassandra-env.sh` bash script file is still useful if JVM settings must be dynamically calculated based on system settings. The `jvm-*` files only store static JVM settings. diff --git a/doc/modules/cassandra/pages/managing/operating/cdc.adoc b/doc/modules/cassandra/pages/managing/operating/cdc.adoc index b368633c250b..98956caebecc 100644 --- a/doc/modules/cassandra/pages/managing/operating/cdc.adoc +++ b/doc/modules/cassandra/pages/managing/operating/cdc.adoc @@ -87,5 +87,5 @@ tables will be rejected unless some consumption process is in place. == Further Reading -* https://issues.apache.org/jira/browse/CASSANDRA-8844[JIRA ticket] -* https://issues.apache.org/jira/browse/CASSANDRA-12148[JIRA ticket] +* Change Data Capture ( https://issues.apache.org/jira/browse/CASSANDRA-8844[CASSANDRA-8844 JIRA ticket] ) +* Improve determinism of CDC data availability ( https://issues.apache.org/jira/browse/CASSANDRA-12148[CASSANDRA-12148 JIRA ticket] ) diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc index 39592865247b..9e0dcb6f7879 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/tombstones.adoc @@ -35,7 +35,7 @@ This kind of deleted but persistent object is called a https://cassandra.apache. == Grace period To prevent the reappearance of zombies, {cassandra} gives each tombstone a grace period. -The grace period for a tombstone is set with the table property ` WITH gc_grace_seconds`. +The grace period for a tombstone is set with the table property `WITH gc_grace_seconds`. Its default value is 864000 seconds (ten days), after which a tombstone expires and can be deleted during compaction. Prior to the grace period expiring, {cassandra} will retain a tombstone through compaction events. Each table can have its own value for this property. diff --git a/doc/modules/cassandra/pages/managing/operating/topo_changes.adoc b/doc/modules/cassandra/pages/managing/operating/topo_changes.adoc index 9c1e9519c63d..2bd85519b54c 100644 --- a/doc/modules/cassandra/pages/managing/operating/topo_changes.adoc +++ b/doc/modules/cassandra/pages/managing/operating/topo_changes.adoc @@ -97,7 +97,7 @@ in `nodetool netstats`. The replacing node will now start to bootstrap the data from the rest of the nodes in the cluster. A replacing node will only receive writes during the bootstrapping phase if it has a different ip address to the -node that is being replaced. (See CASSANDRA-8523 and CASSANDRA-12344) +node that is being replaced. ( See https://issues.apache.org/jira/browse/CASSANDRA-8523[CASSANDRA-8523] and https://issues.apache.org/jira/browse/CASSANDRA-12344[CASSANDRA-12344] ) Once the bootstrapping is complete the node will be marked "UP". diff --git a/doc/modules/cassandra/pages/managing/tools/sstable/sstableverify.adoc b/doc/modules/cassandra/pages/managing/tools/sstable/sstableverify.adoc index 061edf4978de..a807078a2c40 100644 --- a/doc/modules/cassandra/pages/managing/tools/sstable/sstableverify.adoc +++ b/doc/modules/cassandra/pages/managing/tools/sstable/sstableverify.adoc @@ -9,7 +9,7 @@ results will occur. Note: the script does not verify that Cassandra is stopped. == WARNING -See CASSANDRA-9947 and CASSANDRA-17017 for discussion around risks with this tool. Specifically: "We mark sstables that fail verification as unrepaired, but that's not going to do what you think. What it means is that the local node will use that sstable in the next repair, but other nodes will not. So all we'll end up doing is streaming whatever data we can read from it, to the other replicas. If we could magically mark whatever sstables correspond on the remote nodes, to the data in the local sstable, that would work, but we can't." +See https://issues.apache.org/jira/browse/CASSANDRA-9947[CASSANDRA-9947] and https://issues.apache.org/jira/browse/CASSANDRA-17017[CASSANDRA-17017] for discussion around risks with this tool. Specifically: "We mark sstables that fail verification as unrepaired, but that's not going to do what you think. What it means is that the local node will use that sstable in the next repair, but other nodes will not. So all we'll end up doing is streaming whatever data we can read from it, to the other replicas. If we could magically mark whatever sstables correspond on the remote nodes, to the data in the local sstable, that would work, but we can't." This tool requires the use of a -f or --force flag to indicate that the user understands the risks and would like to attempt its usage anyway. @@ -23,7 +23,7 @@ sstableverify |-e, --extended |extended verification |-h, --help |display this help message |-v, --verbose |verbose output -|-f, --force |allow use of tool (see CASSANDRA-17017 for risks) +|-f, --force |allow use of tool (see https://issues.apache.org/jira/browse/CASSANDRA-17017[CASSANDRA-17017] for risks) |=== == Basic Verification diff --git a/doc/modules/cassandra/pages/reference/cql-commands/create-index.adoc b/doc/modules/cassandra/pages/reference/cql-commands/create-index.adoc index 71994d86e194..ae95c33a1d50 100644 --- a/doc/modules/cassandra/pages/reference/cql-commands/create-index.adoc +++ b/doc/modules/cassandra/pages/reference/cql-commands/create-index.adoc @@ -240,7 +240,7 @@ include::cassandra:example$RESULTS/sai/select_all_from_cyclist_career_teams-team You can create an index on xref:cassandra:developing/cql/indexing/2i/_2i-create-on-collection.adoc[map collection keys]. If an index of the map values of the collection exists, drop that index before creating an index on the map collection keys. -Assume a cyclist table contains this map data where `nation is the map key and `Canada` is the map value`: +Assume a cyclist table contains this map data where `nation` is the map key and `Canada` is the map value: [source,no-highlight] ---- @@ -471,4 +471,4 @@ SELECT result:: include::cassandra:example$RESULTS/sai/race_starts-queries.result[] ---- -- -==== \ No newline at end of file +==== diff --git a/doc/modules/cassandra/pages/reference/cql-commands/create-table-examples.adoc b/doc/modules/cassandra/pages/reference/cql-commands/create-table-examples.adoc index 47ab8497344b..d3487202b56e 100644 --- a/doc/modules/cassandra/pages/reference/cql-commands/create-table-examples.adoc +++ b/doc/modules/cassandra/pages/reference/cql-commands/create-table-examples.adoc @@ -65,7 +65,7 @@ CDC logging must be enabled in cassandra.yaml. ==== Before enabling CDC logging, have a plan for moving and consuming the log information. After the disk space limit is reached, writes to CDC-enabled tables are rejected until more space is freed. -See https://docs.datastax.com/en/dse/6.8/dse-admin/datastax_enterprise/config/configCassandra_yaml.html#configCassandra_yaml__cdcSpaceSection[Change-data-capture (CDC) space settings] for information about available CDC settings. +See https://docs.datastax.com/en/dse/6.8/dse-admin/datastax_enterprise/config/configCassandra_yaml.html#cdcSpaceSection[Change-data-capture (CDC) space settings] for information about available CDC settings. ==== == Storing data in descending order diff --git a/doc/modules/cassandra/pages/reference/static.adoc b/doc/modules/cassandra/pages/reference/static.adoc index afa193cdbb33..d27adc76ea5f 100644 --- a/doc/modules/cassandra/pages/reference/static.adoc +++ b/doc/modules/cassandra/pages/reference/static.adoc @@ -2,7 +2,7 @@ :description: In a table that uses clustering columns, non-clustering columns can be declared static in the table definition. Static column values are shared among the rows in the partition. -In a table that uses https://cassandra.apache.org/_/glossary.html#clustering-column[clustering columns], non-clustering columns can be declared static in the table definition. +In a table that uses https://cassandra.apache.org/\_/glossary.html#clustering-column[clustering columns], non-clustering columns can be declared static in the table definition. https://cassandra.apache.org/_/glossary.html#static-column[Static columns] are only static within a given partition. In the following example, the `flag` column is static: diff --git a/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc b/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc index e458b55919aa..ed72f5433d72 100644 --- a/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc +++ b/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc @@ -18,10 +18,10 @@ stacks. [arabic] . By default Cassandra ships with `-XX:+PerfDisableSharedMem` set to -prevent long pauses (see `CASSANDRA-9242` and `CASSANDRA-9483` for +prevent long pauses (see https://issues.apache.org/jira/browse/CASSANDRA-9242[CASSANDRA-9242] and https://issues.apache.org/jira/browse/CASSANDRA-9483[CASSANDRA-9483] for details). If you want to use JVM tooling you can instead have `/tmp` mounted on an in memory `tmpfs` which also effectively works around -`CASSANDRA-9242`. +https://issues.apache.org/jira/browse/CASSANDRA-9242[CASSANDRA-9242] . . Make sure you run the tools as the same user as Cassandra is running as, e.g. if the database is running as `cassandra` the tool also has to be run as `cassandra`, e.g. via `sudo -u cassandra `. diff --git a/doc/modules/cassandra/pages/vector-search/data-modeling.adoc b/doc/modules/cassandra/pages/vector-search/data-modeling.adoc index 232873ea196c..d9a862377de9 100644 --- a/doc/modules/cassandra/pages/vector-search/data-modeling.adoc +++ b/doc/modules/cassandra/pages/vector-search/data-modeling.adoc @@ -1,4 +1,4 @@ -= Data Modeling += Vector Search : Data Modeling As you develop AI and Machine Learning (ML) applications using Vector Search, here are some data modeling considerations. These factors help effectively leverage vector search to produce accurate and efficient search responses within your application. @@ -162,4 +162,4 @@ While the vector embeddings can replace or augment some functions of a tradition * Vector embeddings are not human-readable. Embeddings are not recommended when seeking to directly retrieve data from a table. -* The model might not be able to capture all relevant information from the data, leading to incorrect or incomplete results. \ No newline at end of file +* The model might not be able to capture all relevant information from the data, leading to incorrect or incomplete results. diff --git a/doc/modules/cassandra/partials/cql-syntax-legend.adoc b/doc/modules/cassandra/partials/cql-syntax-legend.adoc index 0064826f66ce..500b5b71688f 100644 --- a/doc/modules/cassandra/partials/cql-syntax-legend.adoc +++ b/doc/modules/cassandra/partials/cql-syntax-legend.adoc @@ -41,7 +41,7 @@ Use single quotation marks to preserve upper case. Braces (`{ }`) enclose map collections or key value pairs. A colon separates the key and the value. -| `<,>` +| `< , >` | Set, list, map, or tuple. Angle brackets ( `< >` ) enclose data types in a set, list, map, or tuple. Separate the data types with a comma. @@ -60,4 +60,4 @@ This syntax is useful when arguments might be mistaken for command line options. | `@=''` | Search CQL only: Identify the entity and literal value to overwrite the XML element in the schema and solrConfig files. -|=== \ No newline at end of file +|=== diff --git a/doc/modules/cassandra/partials/table-column-definitions.adoc b/doc/modules/cassandra/partials/table-column-definitions.adoc index 9abe065f6418..2a5e3e40df5a 100644 --- a/doc/modules/cassandra/partials/table-column-definitions.adoc +++ b/doc/modules/cassandra/partials/table-column-definitions.adoc @@ -8,7 +8,7 @@ Each column is defined using the following syntax: `+column_name cql_type_defini *Restriction:* * A table must have at least one `PRIMARY KEY`. -* When `PRIMARY KEY` is at the end of a column definition, that column is the only primary key for the table, and is defined as the https://cassandra.apache.org/_/glossary.html#[partition-key][partition key]. +* When `PRIMARY KEY` is at the end of a column definition, that column is the only primary key for the table, and is defined as the https://cassandra.apache.org/_/glossary.html#partition-key[partition key]. * A static column cannot be a primary key. * Primary keys can include frozen collections. diff --git a/doc/modules/cassandra/partials/table-properties.adoc b/doc/modules/cassandra/partials/table-properties.adoc index d15ad30eb9aa..9aa6f16d3da5 100644 --- a/doc/modules/cassandra/partials/table-properties.adoc +++ b/doc/modules/cassandra/partials/table-properties.adoc @@ -90,7 +90,7 @@ Tombstoned records within the grace period are excluded from xref:managing/opera ==== + In a single-node cluster, this property can safely be set to zero. -You can also reduce this value for tables whose data is not explicitly deleted -- for example, tables containing only data with https://cassandra.apache.org/_/glossary.html#gloss_ttl[TTL] set, or tables with `default_time_to_live` set. +You can also reduce this value for tables whose data is not explicitly deleted -- for example, tables containing only data with https://cassandra.apache.org/_/glossary.html#ttl[TTL] set, or tables with `default_time_to_live` set. However, if you lower the `gc_grace_seconds` value, consider its interaction with these operations: + @@ -127,7 +127,7 @@ The max_index_interval is the sparsest possible sampling in relation to memory p *speculative_retry* :: Configures https://www.datastax.com/dev/blog/rapid-read-protection-in-cassandra-2-0-2[rapid read protection]. -Normal read requests are sent to just enough replica nodes to satisfy the https://cassandra.apache.org/_/glossary.html#gloss_consistency_level[consistency level]. +Normal read requests are sent to just enough replica nodes to satisfy the https://cassandra.apache.org/_/glossary.html#consistency-level[consistency level]. In rapid read protection, extra read requests are sent to other replicas, even after the consistency level has been met. The speculative retry property specifies the trigger for these extra read requests. + From b31d15b9b58926676436807ddc1efdd5616e13b3 Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Wed, 26 Mar 2025 15:48:13 +0100 Subject: [PATCH 014/340] Avoid failing queries when epoch changes and replica goes up/down Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-20489 --- CHANGES.txt | 1 + .../cassandra/locator/ReplicaLayout.java | 16 ++- .../apache/cassandra/locator/ReplicaPlan.java | 38 +++--- .../cassandra/locator/ReplicaPlans.java | 33 +++-- .../apache/cassandra/service/paxos/Paxos.java | 6 + .../test/RepairDigestTrackingTest.java | 6 +- .../tcm/FailureDetectorRecomputeTest.java | 113 ++++++++++++++++++ .../service/reads/DataResolverTest.java | 2 +- .../service/reads/DigestResolverTest.java | 2 +- .../service/reads/ReadExecutorTest.java | 2 +- .../reads/repair/AbstractReadRepairTest.java | 1 + 11 files changed, 172 insertions(+), 48 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/tcm/FailureDetectorRecomputeTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 45a4f16b56e3..eee55c65e67f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Avoid failing queries when epoch changes and replica goes up/down (CASSANDRA-20489) * Split out truncation record lock (CASSANDRA-20480) * Throw new IndexBuildInProgressException when queries fail during index build, instead of IndexNotAvailableException (CASSANDRA-20402) * Fix Paxos repair interrupts running transactions (CASSANDRA-20469) diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java b/src/java/org/apache/cassandra/locator/ReplicaLayout.java index f0069f2555cc..30a52be73ade 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java +++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java @@ -25,7 +25,6 @@ import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.FBUtilities; @@ -354,32 +353,31 @@ static EndpointsForToken resolveWriteConflictsInPending(EndpointsForToken natura } /** - * @return the read layout for a token - this includes only live natural replicas, i.e. those that are not pending - * and not marked down by the failure detector. these are reverse sorted by the badness score of the configured snitch + * @return the read layout for a token - this includes natural replicas, i.e. those that are not pending. + * They are reverse sorted by the badness score of the configured snitch */ - static ReplicaLayout.ForTokenRead forTokenReadLiveSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) + static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) { EndpointsForToken replicas = keyspace.getMetadata().params.replication.isLocal() ? forLocalStrategyToken(metadata, replicationStrategy, token) : forNonLocalStrategyTokenRead(metadata, keyspace.getMetadata(), token); + replicas = DatabaseDescriptor.getNodeProximity().sortedByProximity(FBUtilities.getBroadcastAddressAndPort(), replicas); - replicas = replicas.filter(FailureDetector.isReplicaAlive); + return new ReplicaLayout.ForTokenRead(replicationStrategy, replicas); } /** * TODO: we should really double check that the provided range does not overlap multiple token ring regions - * @return the read layout for a range - this includes only live natural replicas, i.e. those that are not pending - * and not marked down by the failure detector. these are reverse sorted by the badness score of the configured snitch + * @return the read layout for a range - these are reverse sorted by the badness score of the configured snitch */ - static ReplicaLayout.ForRangeRead forRangeReadLiveSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, AbstractBounds range) + static ReplicaLayout.ForRangeRead forRangeReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, AbstractBounds range) { EndpointsForRange replicas = keyspace.getMetadata().params.replication.isLocal() ? forLocalStrategyRange(metadata, replicationStrategy, range) : forNonLocalStategyRangeRead(metadata, keyspace.getMetadata(), range); replicas = DatabaseDescriptor.getNodeProximity().sortedByProximity(FBUtilities.getBroadcastAddressAndPort(), replicas); - replicas = replicas.filter(FailureDetector.isReplicaAlive); return new ReplicaLayout.ForRangeRead(replicationStrategy, range, replicas); } diff --git a/src/java/org/apache/cassandra/locator/ReplicaPlan.java b/src/java/org/apache/cassandra/locator/ReplicaPlan.java index 62db6f85f343..7d08b341b8eb 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaPlan.java +++ b/src/java/org/apache/cassandra/locator/ReplicaPlan.java @@ -27,7 +27,6 @@ import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.FBUtilities; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; @@ -44,6 +43,7 @@ public interface ReplicaPlan, P extends ReplicaPlan ConsistencyLevel consistencyLevel(); E contacts(); + E liveAndDown(); Replica lookup(InetAddressAndPort endpoint); P withContacts(E contacts); @@ -82,29 +82,28 @@ abstract class AbstractReplicaPlan, P extends ReplicaPlan // - paxos, includes all live replicas (natural+pending), for this DC if SERIAL_LOCAL // ==> live.all() (if consistencyLevel.isDCLocal(), then .filter(consistencyLevel.isLocal)) protected final E contacts; + protected final E liveAndDown; protected final Function recompute; protected List contacted = new CopyOnWriteArrayList<>(); - AbstractReplicaPlan(Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, ConsistencyLevel consistencyLevel, E contacts, Function recompute, Epoch epoch) + AbstractReplicaPlan(Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, ConsistencyLevel consistencyLevel, E contacts, E liveAndDown, Function recompute, Epoch epoch) { assert contacts != null; this.keyspace = keyspace; this.replicationStrategy = replicationStrategy; this.consistencyLevel = consistencyLevel; this.contacts = contacts; + this.liveAndDown = liveAndDown; this.recompute = recompute; this.epoch = epoch; } public E contacts() { return contacts; } + public E liveAndDown() { return liveAndDown; } public Keyspace keyspace() { return keyspace; } public AbstractReplicationStrategy replicationStrategy() { return replicationStrategy; } public ConsistencyLevel consistencyLevel() { return consistencyLevel; } - public boolean canDoLocalRequest() - { - return contacts.contains(FBUtilities.getBroadcastAddressAndPort()); - } public Epoch epoch() { @@ -132,10 +131,11 @@ public static abstract class AbstractForRead, P extends F ConsistencyLevel consistencyLevel, E candidates, E contacts, + E liveAndDown, Function recompute, Epoch epoch) { - super(keyspace, replicationStrategy, consistencyLevel, contacts, recompute, epoch); + super(keyspace, replicationStrategy, consistencyLevel, contacts, liveAndDown, recompute, epoch); this.candidates = candidates; this.readQuorum = consistencyLevel.blockFor(replicationStrategy); } @@ -171,13 +171,13 @@ public boolean stillAppliesTo(ClusterMetadata newMetadata) ForRead newPlan = recompute.apply(newMetadata); - if (readCandidates().equals(newPlan.readCandidates())) + if (liveAndDown().equals(newPlan.liveAndDown())) return true; int readQuorum = newPlan.readQuorum(); for (InetAddressAndPort addr : contacted) { - if (newPlan.readCandidates().contains(addr)) + if (newPlan.liveAndDown().contains(addr)) readQuorum--; } @@ -204,17 +204,18 @@ public ForTokenRead(Keyspace keyspace, ConsistencyLevel consistencyLevel, EndpointsForToken candidates, EndpointsForToken contacts, + EndpointsForToken liveAndDown, Function recompute, Function, ReplicaPlan.ForWrite> repairPlan, Epoch epoch) { - super(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, recompute, epoch); + super(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, liveAndDown, recompute, epoch); this.repairPlan = repairPlan; } public ForTokenRead withContacts(EndpointsForToken newContacts) { - ForTokenRead res = new ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, newContacts, recompute, repairPlan, epoch); + ForTokenRead res = new ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, newContacts, liveAndDown, recompute, repairPlan, epoch); res.contacted.addAll(contacted); return res; } @@ -240,12 +241,13 @@ public ForRangeRead(Keyspace keyspace, AbstractBounds range, EndpointsForRange candidates, EndpointsForRange contact, + EndpointsForRange liveAndDown, int vnodeCount, Function recompute, BiFunction, Token, ReplicaPlan.ForWrite> repairPlan, Epoch epoch) { - super(keyspace, replicationStrategy, consistencyLevel, candidates, contact, recompute, epoch); + super(keyspace, replicationStrategy, consistencyLevel, candidates, contact, liveAndDown, recompute, epoch); this.range = range; this.vnodeCount = vnodeCount; this.repairPlan = repairPlan; @@ -260,7 +262,7 @@ public ForRangeRead(Keyspace keyspace, public ForRangeRead withContacts(EndpointsForRange newContact) { - ForRangeRead res = new ForRangeRead(keyspace, replicationStrategy, consistencyLevel, range, readCandidates(), newContact, vnodeCount, recompute, repairPlan, epoch); + ForRangeRead res = new ForRangeRead(keyspace, replicationStrategy, consistencyLevel, range, readCandidates(), newContact, liveAndDown, vnodeCount, recompute, repairPlan, epoch); res.contacted.addAll(contacted); return res; } @@ -284,6 +286,7 @@ public ForFullRangeRead(Keyspace keyspace, AbstractBounds range, EndpointsForRange candidates, EndpointsForRange contact, + EndpointsForRange liveAndDown, int vnodeCount, Epoch epoch) { @@ -291,7 +294,7 @@ public ForFullRangeRead(Keyspace keyspace, // the epoch change during the course of query execution so no recomputation function is supplied. Likewise, // no read repair is expected to be performed during this type of query so a null is also used in place of a // function for calculating the repair plan. - super(keyspace, replicationStrategy, consistencyLevel, range, candidates, contact, vnodeCount, null, null, epoch); + super(keyspace, replicationStrategy, consistencyLevel, range, candidates, contact, liveAndDown, vnodeCount, null, null, epoch); } @Override @@ -305,7 +308,6 @@ public static class ForWrite extends AbstractReplicaPlan recompute, Epoch epoch) { - super(keyspace, replicationStrategy, consistencyLevel, contact, recompute, epoch); + super(keyspace, replicationStrategy, consistencyLevel, contact, liveAndDown, recompute, epoch); this.pending = pending; - this.liveAndDown = liveAndDown; this.live = live; this.writeQuorum = consistencyLevel.blockForWrite(replicationStrategy, pending); } @@ -331,9 +332,6 @@ public ForWrite(Keyspace keyspace, /** Replicas that a region of the ring is moving to; not yet ready to serve reads, but should receive writes */ public EndpointsForToken pending() { return pending; } - /** Replicas that can participate in the write - this always includes all nodes (pending and natural) in all DCs, except for paxos LOCAL_QUORUM (which is local DC only) */ - public EndpointsForToken liveAndDown() { return liveAndDown; } - /** The live replicas present in liveAndDown, usually derived from FailureDetector.isReplicaAlive */ public EndpointsForToken live() { return live; } diff --git a/src/java/org/apache/cassandra/locator/ReplicaPlans.java b/src/java/org/apache/cassandra/locator/ReplicaPlans.java index b6a03b683bd1..53b32797c6fb 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaPlans.java +++ b/src/java/org/apache/cassandra/locator/ReplicaPlans.java @@ -839,13 +839,9 @@ public static ReplicaPlan.ForTokenRead forSingleReplicaRead(Keyspace keyspace, T private static ReplicaPlan.ForTokenRead forSingleReplicaRead(ClusterMetadata metadata, Keyspace keyspace, Token token, Replica replica) { - // todo; replica does not always contain token, figure out why -// if (!metadata.placements.get(keyspace.getMetadata().params.replication).reads.forToken(token).contains(replica)) -// throw UnavailableException.create(ConsistencyLevel.ONE, 1, 1, 0, 0); - EndpointsForToken one = EndpointsForToken.of(token, replica); - return new ReplicaPlan.ForTokenRead(keyspace, keyspace.getReplicationStrategy(), ConsistencyLevel.ONE, one, one, + return new ReplicaPlan.ForTokenRead(keyspace, keyspace.getReplicationStrategy(), ConsistencyLevel.ONE, one, one, one, (newClusterMetadata) -> forSingleReplicaRead(newClusterMetadata, keyspace, token, replica), (self) -> { throw new IllegalStateException("Read repair is not supported for short read/replica filtering protection."); @@ -866,7 +862,7 @@ private static ReplicaPlan.ForRangeRead forSingleReplicaRead(ClusterMetadata met // TODO: this is unsafe, as one.range() may be inconsistent with our supplied range; should refactor Range/AbstractBounds to single class EndpointsForRange one = EndpointsForRange.of(replica); - return new ReplicaPlan.ForRangeRead(keyspace, keyspace.getReplicationStrategy(), ConsistencyLevel.ONE, range, one, one, vnodeCount, + return new ReplicaPlan.ForRangeRead(keyspace, keyspace.getReplicationStrategy(), ConsistencyLevel.ONE, range, one, one, one, vnodeCount, (newClusterMetadata) -> forSingleReplicaRead(metadata, keyspace, range, replica, vnodeCount), (self, token) -> { throw new IllegalStateException("Read repair is not supported for short read/replica filtering protection."); @@ -901,17 +897,24 @@ public static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, return forRead(metadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, true); } - private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, Keyspace keyspace, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, SpeculativeRetryPolicy retry, boolean throwOnInsufficientLiveReplicas) + private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, + Keyspace keyspace, + Token token, + @Nullable Index.QueryPlan indexQueryPlan, + ConsistencyLevel consistencyLevel, + SpeculativeRetryPolicy retry, + boolean throwOnInsufficientLiveReplicas) { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); - ReplicaLayout.ForTokenRead forTokenRead = ReplicaLayout.forTokenReadLiveSorted(metadata, keyspace, replicationStrategy, token); - EndpointsForToken candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forTokenRead.natural()); + ReplicaLayout.ForTokenRead forTokenReadLiveAndDown = ReplicaLayout.forTokenReadSorted(metadata, keyspace, replicationStrategy, token); + ReplicaLayout.ForTokenRead forTokenReadLive = forTokenReadLiveAndDown.filter(FailureDetector.isReplicaAlive); + EndpointsForToken candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forTokenReadLive.all()); EndpointsForToken contacts = contactForRead(metadata.locator, replicationStrategy, consistencyLevel, retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE), candidates); if (throwOnInsufficientLiveReplicas) assureSufficientLiveReplicasForRead(metadata.locator, replicationStrategy, consistencyLevel, contacts); - return new ReplicaPlan.ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, + return new ReplicaPlan.ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, forTokenReadLiveAndDown.all(), (newClusterMetadata) -> forRead(newClusterMetadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, false), (self) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive), metadata.epoch); @@ -942,8 +945,9 @@ public static ReplicaPlan.ForRangeRead forRangeRead(ClusterMetadata metadata, boolean throwOnInsufficientLiveReplicas) { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); - ReplicaLayout.ForRangeRead forRangeRead = ReplicaLayout.forRangeReadLiveSorted(metadata, keyspace, replicationStrategy, range); - EndpointsForRange candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forRangeRead.natural()); + ReplicaLayout.ForRangeRead forRangeReadLiveAndDown = ReplicaLayout.forRangeReadSorted(metadata, keyspace, replicationStrategy, range); + ReplicaLayout.ForRangeRead forRangeReadLive = forRangeReadLiveAndDown.filter(FailureDetector.isReplicaAlive); + EndpointsForRange candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forRangeReadLive.natural()); EndpointsForRange contacts = contactForRead(metadata.locator, replicationStrategy, consistencyLevel, false, candidates); if (throwOnInsufficientLiveReplicas) @@ -955,6 +959,7 @@ public static ReplicaPlan.ForRangeRead forRangeRead(ClusterMetadata metadata, range, candidates, contacts, + forRangeReadLiveAndDown.all(), vnodeCount, (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, indexQueryPlan, consistencyLevel, range, vnodeCount, false), (self, token) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive), @@ -983,7 +988,7 @@ public static ReplicaPlan.ForRangeRead forFullRangeRead(Keyspace keyspace, EndpointsForRange contacts = builder.build(); ClusterMetadata metadata = ClusterMetadata.current(); - return new ReplicaPlan.ForFullRangeRead(keyspace, replicationStrategy, consistencyLevel, range, contacts, contacts, vnodeCount, metadata.epoch); + return new ReplicaPlan.ForFullRangeRead(keyspace, replicationStrategy, consistencyLevel, range, contacts, contacts, contacts, vnodeCount, metadata.epoch); } /** @@ -1000,6 +1005,7 @@ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, if (!left.epoch.equals(right.epoch)) return null; + EndpointsForRange mergedLiveAndDown = left.liveAndDown().keep(right.liveAndDown().endpoints()); EndpointsForRange mergedCandidates = left.readCandidates().keep(right.readCandidates().endpoints()); AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); EndpointsForRange contacts = contactForRead(metadata.locator, replicationStrategy, consistencyLevel, false, mergedCandidates); @@ -1023,6 +1029,7 @@ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, newRange, mergedCandidates, contacts, + mergedLiveAndDown, newVnodeCount, (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, diff --git a/src/java/org/apache/cassandra/service/paxos/Paxos.java b/src/java/org/apache/cassandra/service/paxos/Paxos.java index 06f90907d502..15d2b320fb75 100644 --- a/src/java/org/apache/cassandra/service/paxos/Paxos.java +++ b/src/java/org/apache/cassandra/service/paxos/Paxos.java @@ -415,6 +415,12 @@ public EndpointsForToken readCandidates() return electorateNatural; } + @Override + public EndpointsForToken liveAndDown() + { + return all; + } + @Override public boolean stillAppliesTo(ClusterMetadata newMetadata) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java index 1e5773c67a05..ce3df571d8f7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java @@ -380,7 +380,7 @@ else if (ccAfterPartitionRead != ccBefore) * local reads triggered by read repair (after speculative reads) execute at roughly the same time. * * This test depends on whether node1 gets a data or a digest request first, we force it to be a digest request - * in the forTokenReadLiveSorted ByteBuddy rule below. + * in the forTokenReadSorted ByteBuddy rule below. */ @Test public void testLocalDataAndRemoteRequestConcurrency() throws Exception @@ -440,7 +440,7 @@ public static void install(ClassLoader classLoader, Integer num) .load(classLoader, ClassLoadingStrategy.Default.INJECTION); new ByteBuddy().rebase(ReplicaLayout.class) - .method(named("forTokenReadLiveSorted").and(takesArguments(ClusterMetadata.class, Keyspace.class, AbstractReplicationStrategy.class, Token.class))) + .method(named("forTokenReadSorted").and(takesArguments(ClusterMetadata.class, Keyspace.class, AbstractReplicationStrategy.class, Token.class))) .intercept(MethodDelegation.to(BBHelper.class)) .make() .load(classLoader, ClassLoadingStrategy.Default.INJECTION); @@ -475,7 +475,7 @@ public static UnfilteredPartitionIterator executeLocally(ReadExecutionController } @SuppressWarnings({ "unused" }) - public static ReplicaLayout.ForTokenRead forTokenReadLiveSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) + public static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) { try { diff --git a/test/distributed/org/apache/cassandra/distributed/test/tcm/FailureDetectorRecomputeTest.java b/test/distributed/org/apache/cassandra/distributed/test/tcm/FailureDetectorRecomputeTest.java new file mode 100644 index 000000000000..7abfd7386cf2 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/tcm/FailureDetectorRecomputeTest.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.tcm; + +import java.io.IOException; + +import org.junit.Test; + +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicBoolean; + +import net.bytebuddy.ByteBuddy; +import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; +import net.bytebuddy.implementation.MethodDelegation; +import net.bytebuddy.implementation.bind.annotation.SuperCall; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.gms.FailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.transformations.CustomTransformation; + +import static net.bytebuddy.matcher.ElementMatchers.named; +import static net.bytebuddy.matcher.ElementMatchers.takesArguments; + +public class FailureDetectorRecomputeTest extends TestBaseImpl +{ + @Test + public void readTest() throws IOException + { + try (Cluster cluster = init(Cluster.build(3) + .withInstanceInitializer(BB::install) + .start())) + { + cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key)")); + cluster.get(1).runOnInstance(() -> BB.enabled.set(true)); + for (int i = 0; i < 10; i++) + cluster.coordinator(1).execute(withKeyspace("select * from %s.tbl where id=?"), ConsistencyLevel.QUORUM, i); + } + } + + @Test + public void writeTest() throws IOException + { + try (Cluster cluster = init(Cluster.build(3) + .withInstanceInitializer(BB::install) + .start())) + { + cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key)")); + cluster.get(1).runOnInstance(() -> BB.enabled.set(true)); + for (int i = 0; i < 10; i++) + cluster.coordinator(1).execute(withKeyspace("insert into %s.tbl (id) values (?)"), ConsistencyLevel.QUORUM, i); + } + } + + public static class BB + { + public static AtomicBoolean enabled = new AtomicBoolean(); + + public static void install(ClassLoader cl, int i) + { + new ByteBuddy().rebase(FailureDetector.class) + .method(named("isAlive").and(takesArguments(1))) + .intercept(MethodDelegation.to(FailureDetectorRecomputeTest.BB.class)) + .make() + .load(cl, ClassLoadingStrategy.Default.INJECTION); + + new ByteBuddy().rebase(ReplicaPlan.AbstractForRead.class) + .method(named("stillAppliesTo").and(takesArguments(1))) + .intercept(MethodDelegation.to(FailureDetectorRecomputeTest.BB.class)) + .make() + .load(cl, ClassLoadingStrategy.Default.INJECTION); + } + + static int downNode = 1; + public static boolean isAlive(InetAddressAndPort ep) + { + if (!enabled.get()) + return true; + enabled.set(false); + ClusterMetadataService.instance().commit(CustomTransformation.make("hello")); + enabled.set(true); + return !ep.equals(InetAddressAndPort.getByNameUnchecked("127.0.0." + ((downNode % 3) + 1))); + } + + public static boolean stillAppliesTo(ClusterMetadata metadata, @SuperCall Callable zuper) throws Exception + { + if (!enabled.get()) + return true; + downNode++; + return zuper.call(); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java index 9c56f00a8098..d281025666af 100644 --- a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java +++ b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java @@ -1331,7 +1331,7 @@ private ReplicaPlan.SharedForRangeRead plan(EndpointsForRange replicas, Consiste ks.getReplicationStrategy(), consistencyLevel, ReplicaUtils.FULL_BOUNDS, - replicas, replicas, + replicas, replicas, replicas, 1, null, repairPlan, Epoch.EMPTY)); diff --git a/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java b/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java index 84a116729567..17baa4fa55f0 100644 --- a/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java +++ b/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java @@ -215,7 +215,7 @@ public void transientResponseData() private ReplicaPlan.SharedForTokenRead plan(ConsistencyLevel consistencyLevel, EndpointsForToken replicas) { - return ReplicaPlan.shared(new ReplicaPlan.ForTokenRead(ks, ks.getReplicationStrategy(), consistencyLevel, replicas, replicas, null, (self) -> null, Epoch.EMPTY)); + return ReplicaPlan.shared(new ReplicaPlan.ForTokenRead(ks, ks.getReplicationStrategy(), consistencyLevel, replicas, replicas, replicas, null, (self) -> null, Epoch.EMPTY)); } private void waitForLatch(CountDownLatch startlatch) diff --git a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java index 046da259e98e..e23c7078b40d 100644 --- a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java +++ b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java @@ -278,6 +278,6 @@ private ReplicaPlan.ForTokenRead plan(EndpointsForToken targets, ConsistencyLeve private ReplicaPlan.ForTokenRead plan(ConsistencyLevel consistencyLevel, EndpointsForToken natural, EndpointsForToken selected) { - return new ReplicaPlan.ForTokenRead(ks, ks.getReplicationStrategy(), consistencyLevel, natural, selected, (cm) -> null, (self) -> null, Epoch.EMPTY); + return new ReplicaPlan.ForTokenRead(ks, ks.getReplicationStrategy(), consistencyLevel, natural, selected, natural, (cm) -> null, (self) -> null, Epoch.EMPTY); } } diff --git a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java index f0026d35b9c0..1689069cf97a 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java @@ -357,6 +357,7 @@ static ReplicaPlan.ForRangeRead replicaPlan(Keyspace keyspace, ConsistencyLevel ReplicaUtils.FULL_BOUNDS, replicas, targets, + replicas, 1, null, (self, token) -> forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, (r) -> true), From 95aca49915fc0dab09129bcc662449cef75dceab Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Thu, 3 Apr 2025 15:01:55 +0200 Subject: [PATCH 015/340] Avoid NPE during cms initialization abort Patch by marcuse; reviewed by David Capwell and Caleb Rackliffe for CASSANDRA-20527 --- CHANGES.txt | 1 + src/java/org/apache/cassandra/tcm/Startup.java | 2 +- .../tcm/migration/CMSInitializationRequest.java | 14 +++++++------- .../apache/cassandra/tcm/migration/Election.java | 12 +++++++----- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index eee55c65e67f..cd2619e4cb44 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Avoid NPE during cms initialization abort (CASSANDRA-20527) * Avoid failing queries when epoch changes and replica goes up/down (CASSANDRA-20489) * Split out truncation record lock (CASSANDRA-20480) * Throw new IndexBuildInProgressException when queries fail during index build, instead of IndexNotAvailableException (CASSANDRA-20402) diff --git a/src/java/org/apache/cassandra/tcm/Startup.java b/src/java/org/apache/cassandra/tcm/Startup.java index d17b68769886..6d3fe6b2afd5 100644 --- a/src/java/org/apache/cassandra/tcm/Startup.java +++ b/src/java/org/apache/cassandra/tcm/Startup.java @@ -246,7 +246,7 @@ public static void initializeForDiscovery(Runnable initMessaging) else { CMSInitializationRequest.Initiator initiator = Election.instance.initiator(); - candidates = Discovery.instance.discoverOnce(initiator == null ? null : initiator.initiator); + candidates = Discovery.instance.discoverOnce(initiator == null ? null : initiator.endpoint); } Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); } diff --git a/src/java/org/apache/cassandra/tcm/migration/CMSInitializationRequest.java b/src/java/org/apache/cassandra/tcm/migration/CMSInitializationRequest.java index dac50e5edbdc..599bfca0da7d 100644 --- a/src/java/org/apache/cassandra/tcm/migration/CMSInitializationRequest.java +++ b/src/java/org/apache/cassandra/tcm/migration/CMSInitializationRequest.java @@ -109,12 +109,12 @@ public long serializedSize(CMSInitializationRequest t, int version) public static class Initiator { public static final Serializer serializer = new Serializer(); - public final InetAddressAndPort initiator; + public final InetAddressAndPort endpoint; public final UUID initToken; public Initiator(InetAddressAndPort initiator, UUID initToken) { - this.initiator = initiator; + this.endpoint = initiator; this.initToken = initToken; } @@ -124,20 +124,20 @@ public boolean equals(Object o) if (this == o) return true; if (!(o instanceof Initiator)) return false; Initiator other = (Initiator) o; - return Objects.equals(initiator, other.initiator) && Objects.equals(initToken, other.initToken); + return Objects.equals(endpoint, other.endpoint) && Objects.equals(initToken, other.initToken); } @Override public int hashCode() { - return Objects.hash(initiator, initToken); + return Objects.hash(endpoint, initToken); } @Override public String toString() { return "Initiator{" + - "initiator=" + initiator + + "initiator=" + endpoint + ", initToken=" + initToken + '}'; } @@ -147,7 +147,7 @@ public static class Serializer implements IVersionedSerializer @Override public void serialize(Initiator t, DataOutputPlus out, int version) throws IOException { - InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serialize(t.initiator, out, version); + InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serialize(t.endpoint, out, version); UUIDSerializer.serializer.serialize(t.initToken, out, version); } @@ -161,7 +161,7 @@ public Initiator deserialize(DataInputPlus in, int version) throws IOException @Override public long serializedSize(Initiator t, int version) { - return InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serializedSize(t.initiator, version) + + return InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serializedSize(t.endpoint, version) + UUIDSerializer.serializer.serializedSize(t.initToken, version); } } diff --git a/src/java/org/apache/cassandra/tcm/migration/Election.java b/src/java/org/apache/cassandra/tcm/migration/Election.java index 94f5dc4a06d3..507a55d31c82 100644 --- a/src/java/org/apache/cassandra/tcm/migration/Election.java +++ b/src/java/org/apache/cassandra/tcm/migration/Election.java @@ -134,7 +134,7 @@ private void finish(Set sendTo) { CMSInitializationRequest.Initiator currentInitiator = initiator.get(); if (currentInitiator != null && - Objects.equals(currentInitiator.initiator, FBUtilities.getBroadcastAddressAndPort()) && + Objects.equals(currentInitiator.endpoint, FBUtilities.getBroadcastAddressAndPort()) && initiator.compareAndSet(currentInitiator, MIGRATING)) { Startup.initializeAsFirstCMSNode(); @@ -183,7 +183,7 @@ public void abortInitialization(String initiatorEp) { InetAddressAndPort expectedInitiator = InetAddressAndPort.getByNameUnchecked(initiatorEp); CMSInitializationRequest.Initiator currentInitiator = initiator.get(); - if (currentInitiator != null && Objects.equals(currentInitiator.initiator, expectedInitiator) && initiator.compareAndSet(currentInitiator, null)) + if (currentInitiator != null && Objects.equals(currentInitiator.endpoint, expectedInitiator) && initiator.compareAndSet(currentInitiator, null)) { ClusterMetadata metadata = ClusterMetadata.current(); for (Map.Entry entry : metadata.directory.states.entrySet()) @@ -243,9 +243,11 @@ public class AbortHandler implements IVerbHandler message) throws IOException { logger.info("Received election abort message {} from {}", message.payload, message.from()); - CMSInitializationRequest.Initiator initiator = message.payload; - if (!initiator.initiator.equals(initiator().initiator) || !updateInitiator(message.payload, null)) - logger.error("Could not clear initiator - initiator is set to {}, abort message received from {}", initiator(), message.payload); + CMSInitializationRequest.Initiator remoteInitiator = message.payload; + if (initiator() == null) + logger.info("Initiator already cleared, ignoring abort message from {}: {}", message.from(), remoteInitiator); + else if (!remoteInitiator.endpoint.equals(initiator().endpoint) || !updateInitiator(remoteInitiator, null)) + logger.error("Could not clear initiator - initiator is set to {}, abort message received from {}: {}", initiator(), message.from(), remoteInitiator); } } } From c3089b564aad4d8df9a23560371d31060e45d312 Mon Sep 17 00:00:00 2001 From: Pedro Gordo Date: Sun, 19 Jan 2025 17:20:42 +0000 Subject: [PATCH 016/340] Ignore repetitions of semicolon in CQLSH When grouping the tokens, skip statements composed by a single endtoken. patch by Pedro Gordo; reviewed by Brad Schoening, Stefan Miklosovic for CASSANDRA-19956 --- CHANGES.txt | 1 + pylib/cqlshlib/cqlhandling.py | 29 +++++++++++++++++++++++-- pylib/cqlshlib/test/test_cql_parsing.py | 11 ++++++++++ pylib/cqlshlib/util.py | 21 ------------------ 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index cd2619e4cb44..199716afe6b5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956) * Avoid NPE during cms initialization abort (CASSANDRA-20527) * Avoid failing queries when epoch changes and replica goes up/down (CASSANDRA-20489) * Split out truncation record lock (CASSANDRA-20480) diff --git a/pylib/cqlshlib/cqlhandling.py b/pylib/cqlshlib/cqlhandling.py index 504371c16dcb..2cc49fbee4b6 100644 --- a/pylib/cqlshlib/cqlhandling.py +++ b/pylib/cqlshlib/cqlhandling.py @@ -113,7 +113,9 @@ def cql_massage_tokens(self, toklist): curstmt.append(t) if t[0] == 'endtoken': term_on_nl = False - output.extend(curstmt) + # skip empty statements + if len(curstmt) > 1: + output.extend(curstmt) curstmt = [] else: if len(curstmt) == 1: @@ -135,7 +137,7 @@ def cql_whole_parse_tokens(self, toklist, srcstr=None, startsymbol='Start'): def cql_split_statements(self, text): tokens = self.lex(text) tokens = self.cql_massage_tokens(tokens) - stmts = util.split_list(tokens, lambda t: t[0] == 'endtoken') + stmts = self.group_tokens(tokens) output = [] in_batch = False in_pg_string = len([st for st in tokens if len(st) > 0 and st[0] == 'unclosedPgString']) == 1 @@ -151,6 +153,29 @@ def cql_split_statements(self, text): in_batch = True return output, in_batch or in_pg_string + def group_tokens(self, items): + """ + Split an iterable into sublists, using 'endtoken' to mark the end of each sublist. + Each sublist accumulates elements until an 'endtoken' is encountered. If the sublist + consists only of a single 'endtoken', it is excluded. An empty list is added to the + result after the last 'endtoken' for cases like autocompletion. + + Parameters: + - items (iterable): An iterable of tokens, including 'endtoken' elements. + + Returns: + - list: A list of sublists, with each sublist containing tokens split by 'endtoken'. + """ + + thisresult = [] + results = [thisresult] + for i in items: + thisresult.append(i) + if i[0] == 'endtoken': + thisresult = [] + results.append(thisresult) + return results + def cql_complete_single(self, text, partial, init_bindings=None, ignore_case=True, startsymbol='Start'): tokens = (self.cql_split_statements(text)[0] or [[]])[-1] diff --git a/pylib/cqlshlib/test/test_cql_parsing.py b/pylib/cqlshlib/test/test_cql_parsing.py index b9eb716a7843..7a98c6d88f2a 100644 --- a/pylib/cqlshlib/test/test_cql_parsing.py +++ b/pylib/cqlshlib/test/test_cql_parsing.py @@ -804,6 +804,17 @@ def test_strip_comment_blocks_from_input(self): ''') self.assertRaises(SyntaxError) + def test_skip_duplicate_endtokens(self): + parsed = parse_cqlsh_statements('SELECT * FROM my_table;;;;') + expected_output = [ + ('SELECT', 'reserved_identifier'), + ('*', 'star'), + ('FROM', 'reserved_identifier'), + ('my_table', 'identifier'), + (';', 'endtoken') + ] + self.assertSequenceEqual(tokens_with_types(parsed), expected_output) + def parse_cqlsh_statements(text): """ diff --git a/pylib/cqlshlib/util.py b/pylib/cqlshlib/util.py index 8874be011e65..96d9bd272ea7 100644 --- a/pylib/cqlshlib/util.py +++ b/pylib/cqlshlib/util.py @@ -31,27 +31,6 @@ HAS_LINE_PROFILER = False -def split_list(items, pred): - """ - Split up a list (or other iterable) on the elements which satisfy the - given predicate 'pred'. Elements for which 'pred' returns true start a new - sublist for subsequent elements, which will accumulate in the new sublist - until the next satisfying element. - - >>> split_list([0, 1, 2, 5, 99, 8], lambda n: (n % 2) == 0) - [[0], [1, 2], [5, 99, 8], []] - """ - - thisresult = [] - results = [thisresult] - for i in items: - thisresult.append(i) - if pred(i): - thisresult = [] - results.append(thisresult) - return results - - def find_common_prefix(strs): """ Given a list (iterable) of strings, return the longest common prefix. From 07bbf2cd978bf71d70863f7680a388e19821c481 Mon Sep 17 00:00:00 2001 From: pranavchaurasia18 Date: Mon, 7 Apr 2025 10:09:32 +0200 Subject: [PATCH 017/340] Docs should mention that cassandra.yaml should be secure patch by Pranav Chaurasia; reviewed by Stefan Miklosovic for CASSANDRA-18297 --- doc/modules/cassandra/pages/getting_started/configuring.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/getting_started/configuring.adoc b/doc/modules/cassandra/pages/getting_started/configuring.adoc index ba72f97917b9..b099836caa0c 100644 --- a/doc/modules/cassandra/pages/getting_started/configuring.adoc +++ b/doc/modules/cassandra/pages/getting_started/configuring.adoc @@ -14,7 +14,7 @@ to various Cassandra configuration files. Some examples that require non-default configuration are deploying a multi-node cluster or using clients that are not running on a cluster node. -* `cassandra.yaml`: the main configuration file for Cassandra +* `cassandra.yaml`: the main configuration file for Cassandra, it contains sensitive settings and therefore should not be accessed or modified by untrusted users * `cassandra-env.sh`: environment variables can be set * `cassandra-rackdc.properties` OR `cassandra-topology.properties`: set rack and datacenter information for a cluster From b81163b04b1d99036730ff233595d7bfb88611d1 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 7 Apr 2025 07:05:05 -0500 Subject: [PATCH 018/340] Prepare debian changelog for 5.0.4 --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 0f65b81c051b..359a8c81265a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -cassandra (5.0.4) UNRELEASED; urgency=medium +cassandra (5.0.4) unstable; urgency=medium * New release - -- Stefan Miklosovic Tue, 04 Feb 2025 09:43:30 +0100 + -- Brandon Williams Mon, 07 Apr 2025 07:04:52 -0500 cassandra (5.0.3) unstable; urgency=medium From 694a88bdbe9aa6a4773c99814da33e0a65030ab3 Mon Sep 17 00:00:00 2001 From: mck Date: Sun, 6 Apr 2025 19:10:55 +0200 Subject: [PATCH 019/340] Upgrade java-driver-core to 3.12.1 and org.apache coords patch by Mick Semb Wever; reviewed by Brandon Williams for CASSANDRA-17231 --- .build/cassandra-deps-template.xml | 2 +- .build/parent-pom-template.xml | 4 ++-- CHANGES.txt | 1 + .../apache/cassandra/tools/nodetool/ClientStatsTest.java | 8 ++++---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.build/cassandra-deps-template.xml b/.build/cassandra-deps-template.xml index ab98e36ab85f..e7d8b78ef41f 100644 --- a/.build/cassandra-deps-template.xml +++ b/.build/cassandra-deps-template.xml @@ -145,7 +145,7 @@ logback-classic - com.datastax.cassandra + org.apache.cassandra cassandra-driver-core shaded diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml index cb15badbb954..2cb5f8ca0f92 100644 --- a/.build/parent-pom-template.xml +++ b/.build/parent-pom-template.xml @@ -940,9 +940,9 @@ - com.datastax.cassandra + org.apache.cassandra cassandra-driver-core - 3.11.5 + 3.12.1 shaded diff --git a/CHANGES.txt b/CHANGES.txt index 6e53c8ac93c7..09cd12badbb4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.4 + * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) * Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final (CASSANDRA-20314) * Serialization can lose complex deletions in a mutation with multiple collections in a row (CASSANDRA-20449) * Improve error messages when initializing auth classes (CASSANDRA-20368) diff --git a/test/unit/org/apache/cassandra/tools/nodetool/ClientStatsTest.java b/test/unit/org/apache/cassandra/tools/nodetool/ClientStatsTest.java index d5b01730ef97..d4d9d6ec157b 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/ClientStatsTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/ClientStatsTest.java @@ -166,7 +166,7 @@ public void testClientStatsAll() tool.assertOnCleanExit(); String stdout = tool.getStdout(); assertThat(stdout).containsPattern("Address +SSL +Cipher +Protocol +Version +User +Keyspace +Requests +Driver-Name +Driver-Version"); - assertThat(stdout).containsPattern("/127.0.0.1:[0-9]+ false undefined undefined [0-9]+ +anonymous +[0-9]+ +DataStax Java Driver 3.11.5"); + assertThat(stdout).containsPattern("/127.0.0.1:[0-9]+ false undefined undefined [0-9]+ +anonymous +[0-9]+ +Apache Cassandra Java Driver 3.12.1"); assertThat(stdout).contains("Total connected clients: 2"); assertThat(stdout).contains("User Connections"); assertThat(stdout).contains("anonymous 2"); @@ -179,9 +179,9 @@ public void testClientStatsClientOptions() tool.assertOnCleanExit(); String stdout = tool.getStdout(); assertThat(stdout).containsPattern("Address +SSL +Cipher +Protocol +Version +User +Keyspace +Requests +Driver-Name +Driver-Version +Client-Options"); - assertThat(stdout).containsPattern("/127.0.0.1:[0-9]+ false undefined undefined [0-9]+ +anonymous +[0-9]+ +DataStax Java Driver 3.11.5"); - assertThat(stdout).containsPattern("DRIVER_NAME=DataStax Java Driver"); - assertThat(stdout).containsPattern("DRIVER_VERSION=3.11.5"); + assertThat(stdout).containsPattern("/127.0.0.1:[0-9]+ false undefined undefined [0-9]+ +anonymous +[0-9]+ +Apache Cassandra Java Driver 3.12.1"); + assertThat(stdout).containsPattern("DRIVER_NAME=Apache Cassandra Java Driver"); + assertThat(stdout).containsPattern("DRIVER_VERSION=3.12.1"); assertThat(stdout).containsPattern("CQL_VERSION=3.0.0"); assertThat(stdout).contains("Total connected clients: 2"); assertThat(stdout).contains("User Connections"); From f6de28f7279a757f889bfdb9c7ff0b4eb5485ca7 Mon Sep 17 00:00:00 2001 From: mck Date: Tue, 8 Apr 2025 14:53:43 +0200 Subject: [PATCH 020/340] Move generate-snyk-file target dependency from build-project to dependency-check patch by Mick Semb Wever; reviewed by Brandon Williams, Alex Petrov for CASSANDRA-20319 --- .build/build-owasp.xml | 2 +- build.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.build/build-owasp.xml b/.build/build-owasp.xml index 5d5999f43713..03ba76676d00 100644 --- a/.build/build-owasp.xml +++ b/.build/build-owasp.xml @@ -105,7 +105,7 @@ - + diff --git a/build.xml b/build.xml index 313a8caabbd7..34e95ba43103 100644 --- a/build.xml +++ b/build.xml @@ -943,7 +943,7 @@ - From 270839ddef485608551ae7841d548d00b3a2fa86 Mon Sep 17 00:00:00 2001 From: Madhavan Sridharan Date: Tue, 8 Apr 2025 08:37:57 -0400 Subject: [PATCH 021/340] =?UTF-8?q?ninja-fix=20=E2=80=93=20cqlversion=20in?= =?UTF-8?q?=20docker=20quickstart=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch by Madhavan Sridharan; reviewed by Mick Semb Wever --- .../cassandra/examples/BASH/docker-run-cqlsh-quickstart.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/cassandra/examples/BASH/docker-run-cqlsh-quickstart.sh b/doc/modules/cassandra/examples/BASH/docker-run-cqlsh-quickstart.sh index c75954924aaa..cbfc0bddbde1 100644 --- a/doc/modules/cassandra/examples/BASH/docker-run-cqlsh-quickstart.sh +++ b/doc/modules/cassandra/examples/BASH/docker-run-cqlsh-quickstart.sh @@ -1,3 +1,3 @@ docker run --rm -it --network \ cassandra nuvo/docker-cqlsh cqlsh cassandra \ -9042 --cqlversion='3.4.5' \ No newline at end of file +9042 --cqlversion='3.4.7' \ No newline at end of file From f33c3450d4367a35317409e698c8999b5d5e63e9 Mon Sep 17 00:00:00 2001 From: mck Date: Tue, 8 Apr 2025 11:39:30 +0200 Subject: [PATCH 022/340] Only prefetch docker image needed in jenkinsfile build stages patch by Mick Semb Wever; reviewed by Brandon Williams for CASSANDRA-20537 --- .jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index 4d5cb6dfa189..7cee6634a390 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -337,7 +337,7 @@ def build(command, cell) { test -f .jenkins/Jenkinsfile || { echo "Invalid git fork/branch"; exit 1; } grep -q "Jenkins CI declaration" .jenkins/Jenkinsfile || { echo "Only Cassandra 5.0+ supported"; exit 1; } """ - fetchDockerImages(['almalinux-build', 'bullseye-build']) + fetchDockerImages("redhat" == cell.step ? ['almalinux-build'] : ['bullseye-build']) def cell_suffix = "_jdk${cell.jdk}_${cell.arch}" def logfile = "stage-logs/${JOB_NAME}_${BUILD_NUMBER}_${cell.step}${cell_suffix}_attempt${attempt}.log.xz" def script_vars = "#!/bin/bash \n set -o pipefail ; " // pipe to tee needs pipefail From 96bfbe62500d0769306728a18256a407a08d1a4f Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Tue, 8 Apr 2025 15:08:22 -0400 Subject: [PATCH 023/340] Dropwizard Meter causes timeouts when infrequently used patch by Ariel Weisberg; reviewed by Maxim Muzafarov for CASSANDRA-19332 --- CHANGES.txt | 1 + .../metrics/CassandraMetricsRegistry.java | 79 ++++++++++++++++++- .../metrics/CassandraMetricsRegistryTest.java | 38 ++++++++- 3 files changed, 111 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 0f9e790dc74e..25aea7b20ab7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) * Suppress CVE-2025-25193 (CASSANDRA-20504) * Include in source tree and build packages a Snyk policy file that lists known false positives (CASSANDRA-20319) diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 1ae24556e4dc..2a2794c27af0 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -18,20 +18,38 @@ package org.apache.cassandra.metrics; import java.lang.reflect.Method; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import javax.management.MalformedObjectNameException; import javax.management.ObjectName; import com.google.common.annotations.VisibleForTesting; - -import com.codahale.metrics.*; +import org.github.jamm.Unmetered; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.Metered; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.utils.MBeanWrapper; +import org.apache.cassandra.utils.Throwables; + +import static com.google.common.base.Preconditions.checkArgument; /** * Makes integrating 3.0 metrics API with 2.0. @@ -41,14 +59,30 @@ */ public class CassandraMetricsRegistry extends MetricRegistry { - public static final CassandraMetricsRegistry Metrics = new CassandraMetricsRegistry(); + private static final Logger logger = LoggerFactory.getLogger(CassandraMetricsRegistry.class); + + public static final CassandraMetricsRegistry Metrics = new CassandraMetricsRegistry(TimeUnit.DAYS.toMicros(1)); private final Map threadPoolMetrics = new ConcurrentHashMap<>(); + /** + * {@link org.apache.cassandra.repair.RepairJobTest#testNoTreesRetainedAfterDifference() RepairJobTest#testNoTreesRetainedAfterDifference()} + * calls {@link org.apache.cassandra.utils.ObjectSizes#measureDeep(Object) ObjectSizes.measureDeep(Object)} on + * {@link org.apache.cassandra.repair.RepairSession RepairSession} which reachs the {@link #mBeanServer} reference + * to {@link org.apache.cassandra.utils.MBeanWrapper#instance} via the lambda in {@link #periodicMeterTicker} which + * then attempts to private final fields accessible that can't be changed. We didn't want to measure that stuff + * anyways, but the executor tasks actualy really do need to be measured for that test to work so make this @Unmetered. + */ + @Unmetered private final MBeanWrapper mBeanServer = MBeanWrapper.instance; - private CassandraMetricsRegistry() + final ScheduledFuture periodicMeterTicker; + + CassandraMetricsRegistry(long tickMetersPeriodMicros) { super(); + checkArgument(tickMetersPeriodMicros >= 0); + long initialDelay = ThreadLocalRandom.current().nextLong(tickMetersPeriodMicros); + periodicMeterTicker = ScheduledExecutors.scheduledTasks.scheduleAtFixedRate(this::tickMeters, initialDelay, tickMetersPeriodMicros, TimeUnit.MICROSECONDS); } public Counter counter(MetricName name) @@ -219,6 +253,43 @@ private void removeAlias(MetricName name) if (mBeanServer.isRegistered(name.getMBeanName())) MBeanWrapper.instance.unregisterMBean(name.getMBeanName(), MBeanWrapper.OnException.IGNORE); } + + /** + * Very infrequently used meters generate a linear amount of tick work based on how long it has been + * since the meter was last marked or read. On scales of a year this can be enough to cause the first request + * that needs to mark the meter to time out. Once a day read every meter to force them to run Meter.tickIfNecessary + * so we only ever run at most one day worth of tick work per meter in the request path. + * + * This can be removed if we ever upgrade and switch the default MovingAverage from EWMA to SlidingWindowTimeAverages + */ + private void tickMeters() + { + List failures = new ArrayList<>(); + int droppedFailures = 0; + for (Meter meter : getMeters().values()) + { + try + { + meter.getOneMinuteRate(); + } + catch (Throwable t) + { + if (failures.size() < 10) + failures.add(t); + else + droppedFailures++; + } + } + if (!failures.isEmpty()) + { + Throwable failure = null; + for (Throwable t : failures) + failure = Throwables.merge(failure, t); + // To avoid the scheduled task being cancelled don't leak exceptions + // Runs only once a day so noise is not an issue + logger.error(String.format("Had error(s) attempting to tick meter. Dropped %d exceptions.", droppedFailures), failure); + } + } /** * Strips a single final '$' from input diff --git a/test/unit/org/apache/cassandra/metrics/CassandraMetricsRegistryTest.java b/test/unit/org/apache/cassandra/metrics/CassandraMetricsRegistryTest.java index cd9866c3194b..0877812b7342 100644 --- a/test/unit/org/apache/cassandra/metrics/CassandraMetricsRegistryTest.java +++ b/test/unit/org/apache/cassandra/metrics/CassandraMetricsRegistryTest.java @@ -20,18 +20,24 @@ */ package org.apache.cassandra.metrics; -import static org.junit.Assert.*; - import java.lang.management.ManagementFactory; import java.util.Collection; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; -import org.apache.cassandra.metrics.CassandraMetricsRegistry.MetricName; import org.junit.Test; +import com.codahale.metrics.Meter; import com.codahale.metrics.jvm.BufferPoolMetricSet; import com.codahale.metrics.jvm.GarbageCollectorMetricSet; import com.codahale.metrics.jvm.MemoryUsageGaugeSet; +import org.apache.cassandra.metrics.CassandraMetricsRegistry.MetricName; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; public class CassandraMetricsRegistryTest { @@ -107,4 +113,30 @@ public void testDeltaHistogramSizeChange() assertArrayEquals(count, CassandraMetricsRegistry.delta(count, new long[3])); assertArrayEquals(new long[6], CassandraMetricsRegistry.delta(count, new long[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})); } + + @Test + public void testTickMeters() throws InterruptedException + { + CassandraMetricsRegistry cmr = new CassandraMetricsRegistry(TimeUnit.SECONDS.toMicros(1)); + int numMeters = 1000; + CountDownLatch ticked = new CountDownLatch(numMeters); + AtomicInteger counted = new AtomicInteger(); + Meter m = new Meter() + { + @Override + public double getOneMinuteRate() { + if (counted.incrementAndGet() % 2 == 0) + throw new RuntimeException("test failure handling"); + ticked.countDown(); + return super.getOneMinuteRate(); + } + }; + for (int ii = 0; ii < numMeters; ii++) + { + cmr.register("ignored" + ii, m); + } + assertNotNull(cmr.periodicMeterTicker); + assertTrue(cmr.periodicMeterTicker.getDelay(TimeUnit.SECONDS) <= 1); + assertTrue(ticked.await(1, TimeUnit.MINUTES)); + } } From 67df6a5bffed0de78ba8680802b9e8f9c8ccdb77 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 31 Mar 2025 14:31:58 -0400 Subject: [PATCH 024/340] Add SSTableIntervalTree latency metric Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-20502 --- CHANGES.txt | 1 + .../cassandra/db/ColumnFamilyStore.java | 2 +- .../db/lifecycle/LifecycleTransaction.java | 4 ++-- .../cassandra/db/lifecycle/Tracker.java | 22 +++++++++++++------ .../apache/cassandra/db/lifecycle/View.java | 21 +++++++++++++----- .../cassandra/metrics/KeyspaceMetrics.java | 4 ++++ .../cassandra/metrics/TableMetrics.java | 5 +++++ .../cassandra/db/lifecycle/ViewTest.java | 8 +++---- 8 files changed, 47 insertions(+), 20 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 489cc0662b15..4b9488ff1e5c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Add SSTableIntervalTree latency metric (CASSANDRA-20502) * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956) * Avoid NPE during cms initialization abort (CASSANDRA-20527) * Avoid failing queries when epoch changes and replica goes up/down (CASSANDRA-20489) diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 91a3a40152af..365bc98643dd 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -512,7 +512,7 @@ public ColumnFamilyStore(Keyspace keyspace, { Directories.SSTableLister sstableFiles = directories.sstableLister(Directories.OnTxnErr.IGNORE).skipTemporary(true); sstables = SSTableReader.openAll(this, sstableFiles.list().entrySet(), metadata); - data.addInitialSSTablesWithoutUpdatingSize(sstables, this); + data.addInitialSSTablesWithoutUpdatingSize(sstables); } // compaction strategy should be created after the CFS has been prepared diff --git a/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java b/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java index 79fe0b2923c3..6cbf0d483e00 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java @@ -294,7 +294,7 @@ public Throwable doAbort(Throwable accumulate) // replace all updated readers with a version restored to its original state List restored = restoreUpdatedOriginals(); List invalid = Lists.newArrayList(Iterables.concat(logged.update, logged.obsolete)); - accumulate = tracker.apply(updateLiveSet(logged.update, restored), accumulate); + accumulate = tracker.apply(updateLiveSet(logged.update, restored, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()), accumulate); accumulate = tracker.notifySSTablesChanged(invalid, restored, OperationType.COMPACTION, accumulate); // setReplaced immediately preceding versions that have not been obsoleted accumulate = setReplaced(logged.update, accumulate); @@ -374,7 +374,7 @@ private Throwable checkpoint(Throwable accumulate) // and don't want anyone else messing with them // apply atomically along with updating the live set of readers tracker.apply(compose(updateCompacting(emptySet(), fresh), - updateLiveSet(toUpdate, staged.update))); + updateLiveSet(toUpdate, staged.update, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()))); // log the staged changes and our newly marked readers marked.addAll(fresh); diff --git a/src/java/org/apache/cassandra/db/lifecycle/Tracker.java b/src/java/org/apache/cassandra/db/lifecycle/Tracker.java index a8b22d1df6d9..73c18328a3d6 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/Tracker.java +++ b/src/java/org/apache/cassandra/db/lifecycle/Tracker.java @@ -45,6 +45,7 @@ import org.apache.cassandra.io.sstable.metadata.StatsMetadata; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.metrics.LatencyMetrics; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.notifications.INotification; import org.apache.cassandra.notifications.INotificationConsumer; @@ -251,14 +252,14 @@ public void addInitialSSTables(Collection sstables) addSSTablesInternal(sstables, true, false, true); } - public void addInitialSSTablesWithoutUpdatingSize(Collection sstables, ColumnFamilyStore cfs) + public void addInitialSSTablesWithoutUpdatingSize(Collection sstables) { if (!isDummy()) { for (SSTableReader reader : sstables) reader.setupOnline(); } - apply(updateLiveSet(emptySet(), sstables)); + apply(updateLiveSet(emptySet(), sstables, maybeGetSSTableIntervalTreeLatencyMetrics())); notifyAdded(sstables, true); } @@ -279,7 +280,7 @@ private void addSSTablesInternal(Collection sstables, { if (!isDummy()) setupOnline(sstables); - apply(updateLiveSet(emptySet(), sstables)); + apply(updateLiveSet(emptySet(), sstables, maybeGetSSTableIntervalTreeLatencyMetrics())); if(updateSize) maybeFail(updateSizeTracking(emptySet(), sstables, null)); if (maybeIncrementallyBackup) @@ -332,7 +333,7 @@ public Throwable dropSSTables(final Predicate remove, OperationTy { Pair result = apply(view -> { Set toremove = copyOf(filter(view.sstables, and(remove, notIn(view.compacting)))); - return updateLiveSet(toremove, emptySet()).apply(view); + return updateLiveSet(toremove, emptySet(), maybeGetSSTableIntervalTreeLatencyMetrics()).apply(view); }); Set removed = Sets.difference(result.left.sstables, result.right.sstables); @@ -434,7 +435,7 @@ public void replaceFlushed(Memtable memtable, Collection sstables { // sstable may be null if we flushed batchlog and nothing needed to be retained // if it's null, we don't care what state the cfstore is in, we just replace it and continue - apply(View.replaceFlushed(memtable, null)); + apply(View.replaceFlushed(memtable, null, maybeGetSSTableIntervalTreeLatencyMetrics())); return; } @@ -442,7 +443,7 @@ public void replaceFlushed(Memtable memtable, Collection sstables // back up before creating a new Snapshot (which makes the new one eligible for compaction) maybeIncrementallyBackup(sstables); - apply(View.replaceFlushed(memtable, sstables)); + apply(View.replaceFlushed(memtable, sstables, maybeGetSSTableIntervalTreeLatencyMetrics())); Throwable fail; fail = updateSizeTracking(emptySet(), sstables, null); @@ -625,6 +626,13 @@ public View getView() @VisibleForTesting public void removeUnsafe(Set toRemove) { - Pair result = apply(view -> updateLiveSet(toRemove, emptySet()).apply(view)); + Pair result = apply(view -> updateLiveSet(toRemove, emptySet(), maybeGetSSTableIntervalTreeLatencyMetrics()).apply(view)); + } + + public LatencyMetrics maybeGetSSTableIntervalTreeLatencyMetrics() + { + if (cfstore == null) + return null; + return cfstore.metric != null ? cfstore.metric.viewSSTableIntervalTree : null; } } diff --git a/src/java/org/apache/cassandra/db/lifecycle/View.java b/src/java/org/apache/cassandra/db/lifecycle/View.java index ba200d5d0bc1..15c02eeb8c50 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/View.java +++ b/src/java/org/apache/cassandra/db/lifecycle/View.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; @@ -36,6 +37,8 @@ import org.apache.cassandra.db.memtable.Memtable; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.metrics.LatencyMetrics; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Interval; import static com.google.common.base.Predicates.equalTo; @@ -297,7 +300,7 @@ public boolean apply(View view) } // construct a function to change the liveset in a Snapshot - static Function updateLiveSet(final Set remove, final Collection add) + static Function updateLiveSet(final Set remove, final Collection add, @Nullable LatencyMetrics sstableIntervalTreeLatency) { if (remove.isEmpty() && Iterables.isEmpty(add)) return Functions.identity(); @@ -306,8 +309,11 @@ static Function updateLiveSet(final Set remove, final public View apply(View view) { Map sstableMap = replace(view.sstablesMap, remove, add); - return new View(view.liveMemtables, view.flushingMemtables, sstableMap, view.compactingMap, - SSTableIntervalTree.update(view.intervalTree, remove, add)); + long treeBuildStart = Clock.Global.nanoTime(); + SSTableIntervalTree sstableIntervalTree = SSTableIntervalTree.update(view.intervalTree, remove, add); + if (sstableIntervalTreeLatency != null) + sstableIntervalTreeLatency.addNano(Clock.Global.nanoTime() - treeBuildStart); + return new View(view.liveMemtables, view.flushingMemtables, sstableMap, view.compactingMap, sstableIntervalTree); } }; } @@ -346,7 +352,7 @@ public View apply(View view) } // called after flush: removes memtable from flushingMemtables, and inserts flushed into the live sstable set - static Function replaceFlushed(final Memtable memtable, final Collection flushed) + static Function replaceFlushed(final Memtable memtable, final Collection flushed, @Nullable LatencyMetrics sstableIntervalTreeLatency) { return new Function() { @@ -360,8 +366,11 @@ public View apply(View view) view.compactingMap, view.intervalTree); Map sstableMap = replace(view.sstablesMap, emptySet(), flushed); - return new View(view.liveMemtables, flushingMemtables, sstableMap, view.compactingMap, - SSTableIntervalTree.update(view.intervalTree, null, flushed)); + long treeBuildStart = Clock.Global.nanoTime(); + SSTableIntervalTree sstableIntervalTree = SSTableIntervalTree.update(view.intervalTree, null, flushed); + if (sstableIntervalTreeLatency != null) + sstableIntervalTreeLatency.addNano(Clock.Global.nanoTime() - treeBuildStart); + return new View(view.liveMemtables, flushingMemtables, sstableMap, view.compactingMap, sstableIntervalTree); } }; } diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index a1916bebd071..e603381affa6 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -183,6 +183,8 @@ public class KeyspaceMetrics public final Meter tooManySSTableIndexesReadWarnings; public final Meter tooManySSTableIndexesReadAborts; + public final LatencyMetrics viewSSTableIntervalTree; + public final ImmutableMap, ImmutableMap>> formatSpecificGauges; private final KeyspaceMetricNameFactory factory; @@ -291,6 +293,8 @@ public KeyspaceMetrics(final Keyspace ks) outOfRangeTokenReads = createKeyspaceCounter("ReadOutOfRangeToken"); outOfRangeTokenWrites = createKeyspaceCounter("WriteOutOfRangeToken"); outOfRangeTokenPaxosRequests = createKeyspaceCounter("PaxosOutOfRangeToken"); + + viewSSTableIntervalTree = createLatencyMetrics("ViewSSTableIntervalTree"); } /** diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index fabb0814e49a..fd3a6bac2e75 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -289,6 +289,9 @@ public class TableMetrics public final ImmutableMap, ImmutableMap>> formatSpecificGauges; + // Time spent building SSTableIntervalTree when constructing a new View under the Tracker lock + public final LatencyMetrics viewSSTableIntervalTree; + private static Pair totalNonSystemTablesSize(Predicate predicate) { long total = 0; @@ -861,6 +864,8 @@ public Long getValue() tooManySSTableIndexesReadWarnings = createTableMeter("TooManySSTableIndexesReadWarnings", cfs.keyspace.metric.tooManySSTableIndexesReadWarnings); tooManySSTableIndexesReadAborts = createTableMeter("TooManySSTableIndexesReadAborts", cfs.keyspace.metric.tooManySSTableIndexesReadAborts); + viewSSTableIntervalTree = createLatencyMetrics("ViewSSTableIntervalTree", cfs.keyspace.metric.viewSSTableIntervalTree); + formatSpecificGauges = createFormatSpecificGauges(cfs); } diff --git a/test/unit/org/apache/cassandra/db/lifecycle/ViewTest.java b/test/unit/org/apache/cassandra/db/lifecycle/ViewTest.java index 3364b88133f7..6e5b04941733 100644 --- a/test/unit/org/apache/cassandra/db/lifecycle/ViewTest.java +++ b/test/unit/org/apache/cassandra/db/lifecycle/ViewTest.java @@ -119,7 +119,7 @@ public void testCompaction() testFailure(View.updateCompacting(emptySet(), of(r2)), cur); // update one compacting, one non-compacting, of the liveset to another instance of the same readers; // confirm liveset changes but compacting does not - cur = View.updateLiveSet(copyOf(readers.subList(1, 3)), of(r1, r2)).apply(cur); + cur = View.updateLiveSet(copyOf(readers.subList(1, 3)), of(r1, r2), cfs.metric.viewSSTableIntervalTree).apply(cur); Assert.assertSame(readers.get(0), cur.sstablesMap.get(r0)); Assert.assertSame(r1, cur.sstablesMap.get(r1)); Assert.assertSame(r2, cur.sstablesMap.get(r2)); @@ -179,7 +179,7 @@ public void testFlushing() Assert.assertEquals(memtable2, cur.liveMemtables.get(1)); Assert.assertEquals(memtable3, cur.getCurrentMemtable()); - testFailure(View.replaceFlushed(memtable2, null), cur); + testFailure(View.replaceFlushed(memtable2, null, cfs.metric.viewSSTableIntervalTree), cur); cur = View.markFlushing(memtable2).apply(cur); Assert.assertTrue(cur.flushingMemtables.contains(memtable2)); @@ -196,14 +196,14 @@ public void testFlushing() Assert.assertEquals(memtable2, cur.flushingMemtables.get(1)); Assert.assertEquals(memtable3, cur.getCurrentMemtable()); - cur = View.replaceFlushed(memtable2, null).apply(cur); + cur = View.replaceFlushed(memtable2, null, cfs.metric.viewSSTableIntervalTree).apply(cur); Assert.assertEquals(1, cur.liveMemtables.size()); Assert.assertEquals(1, cur.flushingMemtables.size()); Assert.assertEquals(memtable1, cur.flushingMemtables.get(0)); Assert.assertEquals(memtable3, cur.getCurrentMemtable()); SSTableReader sstable = MockSchema.sstable(1, cfs); - cur = View.replaceFlushed(memtable1, singleton(sstable)).apply(cur); + cur = View.replaceFlushed(memtable1, singleton(sstable), cfs.metric.viewSSTableIntervalTree).apply(cur); Assert.assertEquals(0, cur.flushingMemtables.size()); Assert.assertEquals(1, cur.liveMemtables.size()); Assert.assertEquals(memtable3, cur.getCurrentMemtable()); From d06e49677330b19db99691b8f2bd3f5faedeba5d Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 8 Apr 2025 18:26:15 -0700 Subject: [PATCH 025/340] Update AST Harry CAS fuzz tests to validate the CAS response patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20517 --- .../cql3/statements/CQL3CasRequest.java | 23 +- .../test/cql3/CasMultiNodeTableWalkBase.java | 3 +- .../test/cql3/SingleNodeTableWalkTest.java | 3 +- .../cql3/SingleNodeTokenConflictTest.java | 7 +- .../test/cql3/StatefulASTBase.java | 8 +- .../cassandra/harry/gen/BijectionCache.java | 6 +- .../harry/model/ASTSingleTableModel.java | 615 ++++++++++++++---- .../harry/model/ASTSingleTableModelTest.java | 87 ++- .../harry/model/BytesPartitionState.java | 153 +++-- .../cassandra/harry/model/PartitionState.java | 6 +- .../cassandra/harry/util/StringUtils.java | 2 +- .../org/apache/cassandra/cql3/KnownIssue.java | 4 + .../cql3/ast/AssignmentOperator.java | 6 + .../cassandra/cql3/ast/CasCondition.java | 11 + .../cassandra/cql3/ast/Conditional.java | 5 + .../apache/cassandra/cql3/ast/Elements.java | 9 + .../apache/cassandra/cql3/ast/Expression.java | 5 + .../apache/cassandra/cql3/ast/Literal.java | 6 + .../apache/cassandra/cql3/ast/Mutation.java | 147 ++++- .../apache/cassandra/cql3/ast/Operator.java | 6 + .../org/apache/cassandra/cql3/ast/Select.java | 15 + .../cassandra/cql3/ast/StandardVisitors.java | 10 + .../apache/cassandra/cql3/ast/Statement.java | 7 +- .../org/apache/cassandra/cql3/ast/Value.java | 5 + .../apache/cassandra/utils/ASTGenerators.java | 97 ++- .../cassandra/utils/ImmutableUniqueList.java | 54 +- 26 files changed, 1048 insertions(+), 252 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index 4db98459ec1f..332f8b9388fa 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -149,7 +149,7 @@ public void addConditions(Clustering clustering, Collection } else if (!(condition instanceof ColumnsConditions)) { - throw new InvalidRequestException("Cannot mix IF conditions and IF NOT EXISTS for the same row"); + throw new InvalidRequestException("Cannot mix IF conditions and " + ((ToCQL) condition).toCQL() + " for the same row"); } ((ColumnsConditions)condition).addConditions(conds, options); } @@ -352,7 +352,12 @@ protected RowCondition(Clustering clustering) public abstract boolean appliesTo(FilteredPartition current) throws InvalidRequestException; } - private static class NotExistCondition extends RowCondition + private interface ToCQL + { + String toCQL(); + } + + private static class NotExistCondition extends RowCondition implements ToCQL { private NotExistCondition(Clustering clustering) { @@ -363,9 +368,15 @@ public boolean appliesTo(FilteredPartition current) { return current.getRow(clustering) == null; } + + @Override + public String toCQL() + { + return "IF NOT EXISTS"; + } } - private static class ExistCondition extends RowCondition + private static class ExistCondition extends RowCondition implements ToCQL { private ExistCondition(Clustering clustering) { @@ -376,6 +387,12 @@ public boolean appliesTo(FilteredPartition current) { return current.getRow(clustering) != null; } + + @Override + public String toCQL() + { + return "IF EXISTS"; + } } private static class ColumnsConditions extends RowCondition diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java index bf8a44dcb946..31d1aab31f15 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java @@ -78,7 +78,8 @@ private State(RandomSource rs, Cluster cluster) @Override protected Gen toMutationGen(ASTGenerators.MutationGenBuilder mutationGenBuilder) { - mutationGenBuilder.withCasGen(i -> true); + mutationGenBuilder.withCasGen(i -> true) + .withAllowUpdateMultipleClusteringKeys(false); // paxos supports but the model doesn't yet // generator might not always generate a cas statement... should fix generator! Gen gen = toGen(mutationGenBuilder.build()).filter(Mutation::isCas); if (metadata.regularAndStaticColumns().stream().anyMatch(c -> c.type.isUDT()) diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 1feb9c5f8693..785db954c801 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -458,7 +458,8 @@ public State(RandomSource rs, Cluster cluster) .withoutTtl() .withoutTimestamp() .withPartitions(Generators.fromGen(Gens.mixedDistribution(uniquePartitions).next(rs))) - .withColumnExpressions(e -> e.withOperators(Generators.fromGen(BOOLEAN_DISTRIBUTION.next(rs)))); + .withColumnExpressions(e -> e.withOperators(Generators.fromGen(BOOLEAN_DISTRIBUTION.next(rs)))) + .withIgnoreIssues(IGNORED_ISSUES); if (IGNORED_ISSUES.contains(KnownIssue.SAI_EMPTY_TYPE)) { model.factory.regularAndStaticColumns.stream() diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java index 179b107eae77..dd15b99d5520 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java @@ -335,9 +335,9 @@ class State extends CommonState this.neighbors = rs.nextBoolean() ? Collections.emptyList() : extractNeighbors(pkValues); // in case neighbors conflicts with pkValues or tokenValues, use ImmutableUniqueList which will ignore rather than fail this.pkValues = ImmutableUniqueList.builder() - .mayAddAll(pkValues) - .mayAddAll(tokenValues) - .mayAddAll(neighbors) + .addAll(pkValues) + .addAll(tokenValues) + .addAll(neighbors) .build(); this.pkGen = Gens.pick(pkValues); this.order = new TreeSet<>(PK_TYPE); @@ -367,6 +367,7 @@ class State extends CommonState .withoutTtl() .withoutTimestamp() .withPartitions(SourceDSL.arbitrary().pick(uniquePartitions)) + .withIgnoreIssues(IGNORED_ISSUES) .build()); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index a90b4679797c..4a4a1e6a6147 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -269,7 +269,7 @@ protected BaseState(RandomSource rs, Cluster cluster, TableMetadata metadata) this.metadata = metadata; this.tableRef = TableReference.from(metadata); - this.model = new ASTSingleTableModel(metadata); + this.model = new ASTSingleTableModel(metadata, IGNORED_ISSUES); createTable(metadata); } @@ -388,8 +388,8 @@ protected ConsistencyLevel mutationCl() else annotate += ", " + postfix; Mutation finalMutation = mutation; return new Property.SimpleCommand<>(humanReadable(mutation, annotate), s -> { - s.executeQuery(inst, Integer.MAX_VALUE, s.mutationCl(), finalMutation); - s.model.update(finalMutation); + var result = s.executeQuery(inst, Integer.MAX_VALUE, s.mutationCl(), finalMutation); + s.model.updateAndValidate(result, finalMutation); s.mutation(); }); } @@ -451,7 +451,7 @@ protected ByteBuffer[][] executeQuery(IInstance instance, int fetchSize, Consist SimpleStatement ss = new SimpleStatement(stmt.toCQL(), (Object[]) stmt.bindsEncoded()); if (fetchSize != Integer.MAX_VALUE) ss.setFetchSize(fetchSize); - if (stmt instanceof Mutation) + if (stmt.kind() == Statement.Kind.MUTATION) { switch (cl) { diff --git a/test/harry/main/org/apache/cassandra/harry/gen/BijectionCache.java b/test/harry/main/org/apache/cassandra/harry/gen/BijectionCache.java index a388f195e956..d1f9e0db7b52 100644 --- a/test/harry/main/org/apache/cassandra/harry/gen/BijectionCache.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/BijectionCache.java @@ -41,6 +41,8 @@ public BijectionCache(Comparator comparator) @Override public T inflate(long descriptor) { + if (MagicConstants.NIL_DESCR == descriptor) + throw new IllegalArgumentException("Asked for NIL_DESCR"); T value = valueToDescriptor.inverse().get(descriptor); if (value == null) throw new IllegalArgumentException(String.format("Attempted to inflate %d, but it is undefined", descriptor)); @@ -124,6 +126,8 @@ public int byteSize() @Override public int compare(long l, long r) { - throw new UnsupportedOperationException(); + T lhs = inflate(l); + T rhs = inflate(r); + return comparator.compare(lhs, rhs); } } diff --git a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java index f6a03bdd1784..ee08634e1ada 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java +++ b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java @@ -24,8 +24,10 @@ import java.util.BitSet; import java.util.Collection; import java.util.Collections; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NavigableSet; @@ -33,17 +35,18 @@ import java.util.Optional; import java.util.Set; import java.util.TreeMap; +import java.util.function.Function; import java.util.function.IntFunction; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nullable; import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import accord.utils.Invariants; +import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.AssignmentOperator; import org.apache.cassandra.cql3.ast.CasCondition; import org.apache.cassandra.cql3.ast.Conditional; @@ -65,6 +68,7 @@ import org.apache.cassandra.db.BufferClustering; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.BooleanType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.Token; import org.apache.cassandra.harry.model.BytesPartitionState.PrimaryKey; @@ -75,18 +79,32 @@ import org.apache.cassandra.utils.ImmutableUniqueList; import org.apache.cassandra.utils.Pair; +import static org.apache.cassandra.cql3.ast.Elements.symbols; import static org.apache.cassandra.harry.model.BytesPartitionState.asCQL; public class ASTSingleTableModel { private static final ByteBuffer[][] NO_ROWS = new ByteBuffer[0][]; + private static final Symbol CAS_APPLIED = new Symbol.UnquotedSymbol("[applied]", BooleanType.instance); + private static final ImmutableUniqueList CAS_APPLIED_COLUMNS = ImmutableUniqueList.builder().add(CAS_APPLIED).build(); + private static final ByteBuffer[][] CAS_SUCCESS_RESULT = new ByteBuffer[][] { new ByteBuffer[] {BooleanType.instance.decompose(true)} }; + private static final ByteBuffer FALSE = BooleanType.instance.decompose(false); + private static final ByteBuffer[][] CAS_REJECTION_RESULT = new ByteBuffer[][] { new ByteBuffer[] {FALSE} }; public final BytesPartitionState.Factory factory; + private final EnumSet ignoredIssues; private final TreeMap partitions = new TreeMap<>(); + private long numMutations = 0; public ASTSingleTableModel(TableMetadata metadata) + { + this(metadata, EnumSet.noneOf(KnownIssue.class)); + } + + public ASTSingleTableModel(TableMetadata metadata, EnumSet ignoredIssues) { this.factory = new BytesPartitionState.Factory(metadata); + this.ignoredIssues = Objects.requireNonNull(ignoredIssues); } public NavigableSet partitionKeys() @@ -191,6 +209,212 @@ private void indexRowColumn(TreeMap> index, boolean public void update(Mutation mutation) { if (!shouldApply(mutation)) return; + updateInternal(mutation); + } + + public void updateAndValidate(ByteBuffer[][] actual, Mutation mutation) + { + if (!shouldApply(mutation)) + { + if (mutation.isCas()) + validateCasNotApplied(actual, mutation); + return; + } + if (mutation.isCas()) + validate(CAS_APPLIED_COLUMNS, actual, CAS_SUCCESS_RESULT); + updateInternal(mutation); + } + + private void validateCasNotApplied(ByteBuffer[][] actual, Mutation mutation) + { + // see org.apache.cassandra.cql3.statements.ModificationStatement.buildCasFailureResultSet + var condition = mutation.casCondition().get(); + var partition = partitions.get(referencePartition(mutation)); + var cd = cdOrNull(mutation); + BytesPartitionState.Row row = partition == null ? null : partition.get(cd); + boolean touchesStaticColumns = !factory.staticColumns.isEmpty() + && symbols(mutation).anyMatch(factory.staticColumns::contains); + ImmutableUniqueList columns; + ByteBuffer[][] expected; + if (partition == null) + { + columns = CAS_APPLIED_COLUMNS; + expected = CAS_REJECTION_RESULT; + } + else if (condition instanceof CasCondition.IfCondition) + { + if (touchesStaticColumns + && cd == null + && ignoredIssues.contains(KnownIssue.CAS_ON_STATIC_ROW)) + { + if (casOnStaticRowCouldReturnData(partition)) + { + // if the static row exists, we can match the col condition + // if the static row doesn't exist, and there are rows, then we can return null + List conditionReferencedColumns = conditionReferencedColumns(mutation); + columns = ImmutableUniqueList.builder(conditionReferencedColumns.size() + 1) + .add(CAS_APPLIED) + .addAll(conditionReferencedColumns) + .build(); + ByteBuffer[] result = getRowAsByteBuffer(columns, partition, row); + result[0] = FALSE; + + expected = new ByteBuffer[][]{ result }; + } + else + { + // static/row don't exist, so can't return a current state + columns = CAS_APPLIED_COLUMNS; + expected = CAS_REJECTION_RESULT; + } + } + else if (partition.staticRow().isEmpty() + && (cd == null || row == null)) + { + // static/row don't exist, so can't return a current state + columns = CAS_APPLIED_COLUMNS; + expected = CAS_REJECTION_RESULT; + } + else + { + List conditionReferencedColumns = conditionReferencedColumns(mutation); + columns = ImmutableUniqueList.builder(conditionReferencedColumns.size() + 1) + .add(CAS_APPLIED) + .addAll(conditionReferencedColumns) + .build(); + ByteBuffer[] result = getRowAsByteBuffer(columns, partition, row); + result[0] = FALSE; + + expected = new ByteBuffer[][]{ result }; + } + } + else if (condition == CasCondition.Simple.Exists) + { + if (touchesStaticColumns + && cd == null + && ignoredIssues.contains(KnownIssue.CAS_ON_STATIC_ROW)) + { + if (casOnStaticRowCouldReturnData(partition)) + { + if (!partition.rows().isEmpty()) + row = partition.rows().get(0); + // Partition level IF EXISTS checks if the static row exists (which is defined as notEmpty), so its known that the static row is empty! + // One would expect that the DELETE just returns [[applied]] but it actually returns a row... but we are not working with rows, we are working with partitions... + // This is a leaky implementation detail! Checking for the partition to exist is the following ReadCommand: + // SELECT s0, s1 WHERE pk = ? LIMIT 1 + // this doesn't include the row columns, only the static columns... but the LIMIT returned a row and not + // the static row (because the static row is empty)! + columns = ImmutableUniqueList.builder(factory.selectionOrder.size() + 1) + .add(CAS_APPLIED) + .addAll(factory.selectionOrder) + .build(); + ByteBuffer[] result = getRowAsByteBuffer(columns, partition, row); + result[0] = FALSE; + if (row != null) + { + for (var c : factory.regularColumns) + // null out the row columns.... + result[columns.indexOf(c)] = null; + } + + expected = new ByteBuffer[][]{ result }; + } + else + { + // static/row don't exist, so can't return a current state + columns = CAS_APPLIED_COLUMNS; + expected = CAS_REJECTION_RESULT; + } + } + else if (!touchesStaticColumns || partition.staticRow().isEmpty()) + { + columns = CAS_APPLIED_COLUMNS; + expected = CAS_REJECTION_RESULT; + } + else + { + columns = ImmutableUniqueList.builder(factory.selectionOrder.size() + 1) + .add(CAS_APPLIED) + .addAll(factory.selectionOrder) + .build(); + ByteBuffer[] result = getRowAsByteBuffer(columns, partition, row); + result[0] = FALSE; + + expected = new ByteBuffer[][]{ result }; + } + } + else if (condition == CasCondition.Simple.NotExists) + { + if (touchesStaticColumns + && cd == null + && ignoredIssues.contains(KnownIssue.CAS_ON_STATIC_ROW) + && !partition.rows().isEmpty()) + row = partition.rows().get(0); + columns = ImmutableUniqueList.builder(factory.selectionOrder.size() + 1) + .add(CAS_APPLIED) + .addAll(factory.selectionOrder) + .build(); + ByteBuffer[] result = getRowAsByteBuffer(columns, partition, row); + result[0] = FALSE; + if (!touchesStaticColumns) + { + for (var s : factory.staticColumns) + result[columns.indexOf(s)] = null; + } + + if (cd == null + && ignoredIssues.contains(KnownIssue.CAS_ON_STATIC_ROW) + && row != null) + { + for (var c : factory.regularColumns) + // null out the row columns.... + result[columns.indexOf(c)] = null; + } + + expected = new ByteBuffer[][]{ result }; + } + else + { + throw new AssertionError(); + } + validate(columns, actual, expected); + } + + private static boolean casOnStaticRowCouldReturnData(BytesPartitionState partition) + { + return !partition.staticRow().isEmpty() + || !partition.rows().isEmpty(); + } + private List conditionReferencedColumns(Mutation mutation) + { + //TODO (correctness): does ast.AND support the correct "order" as seen from CAS? + LinkedHashSet regularCols = null, staticCols = null; + for (var c : (Iterable) () -> symbols(mutation.casCondition().get()).distinct().iterator()) + { + if (factory.staticColumns.contains(c)) + { + if (staticCols == null) + staticCols = new LinkedHashSet<>(); + staticCols.add(c); + } + else + { + if (regularCols == null) + regularCols = new LinkedHashSet<>(); + regularCols.add(c); + } + } + List ordered = new ArrayList<>(); + if (regularCols != null) + ordered.addAll(regularCols); + if (staticCols != null) + ordered.addAll(staticCols); + return ordered; + } + + private void updateInternal(Mutation mutation) + { + numMutations++; switch (mutation.kind) { case INSERT: @@ -209,6 +433,7 @@ public void update(Mutation mutation) private void update(Mutation.Insert insert) { + long nowTs = insert.timestampOrDefault(numMutations); Clustering pd = pd(insert); BytesPartitionState partition = partitions.get(factory.createRef(pd)); if (partition == null) @@ -219,25 +444,25 @@ private void update(Mutation.Insert insert) Map values = insert.values; if (!factory.staticColumns.isEmpty() && !Sets.intersection(factory.staticColumns.asSet(), values.keySet()).isEmpty()) { - // static columns to add in. If we are doing something like += to a row that doesn't exist, we still update statics... - Map write = new HashMap<>(); - for (Symbol col : Sets.intersection(factory.staticColumns.asSet(), values.keySet())) - write.put(col, eval(values.get(col))); - partition.setStaticColumns(write); + maybeUpdateColumns(Sets.intersection(factory.staticColumns.asSet(), values.keySet()), + partition.staticRow(), + nowTs, values, + partition::setStaticColumns); } // table has clustering but non are in the write, so only pk/static can be updated if (!factory.clusteringColumns.isEmpty() && Sets.intersection(factory.clusteringColumns.asSet(), values.keySet()).isEmpty()) return; - Map write = new HashMap<>(); - for (Symbol col : Sets.intersection(factory.regularColumns.asSet(), values.keySet())) - write.put(col, eval(values.get(col))); - partition.setColumns(key(insert.values, factory.clusteringColumns), - write, - true); + BytesPartitionState finalPartition = partition; + var cd = key(insert.values, factory.clusteringColumns); + maybeUpdateColumns(Sets.intersection(factory.regularColumns.asSet(), values.keySet()), + partition.get(cd), + nowTs, values, + (ts, write) -> finalPartition.setColumns(cd, ts, write, true)); } private void update(Mutation.Update update) { + long nowTs = update.timestampOrDefault(numMutations); var split = splitOnPartition(update.where.simplify()); List> pks = split.left; List remaining = split.right; @@ -252,43 +477,30 @@ private void update(Mutation.Update update) Map set = update.set; if (!factory.staticColumns.isEmpty() && !Sets.intersection(factory.staticColumns.asSet(), set.keySet()).isEmpty()) { - // static columns to add in. If we are doing something like += to a row that doesn't exist, we still update statics... - Map write = new HashMap<>(); - for (Symbol col : Sets.intersection(factory.staticColumns.asSet(), set.keySet())) - { - ByteBuffer current = partition.staticRow().get(col); - EvalResult result = eval(col, current, set.get(col)); - if (result.kind == EvalResult.Kind.SKIP) continue; - write.put(col, result.value); - } - if (!write.isEmpty()) - partition.setStaticColumns(write); + maybeUpdateColumns(Sets.intersection(factory.staticColumns.asSet(), set.keySet()), + partition.staticRow(), + nowTs, set, + partition::setStaticColumns); } // table has clustering but non are in the write, so only pk/static can be updated if (!factory.clusteringColumns.isEmpty() && remaining.isEmpty()) return; + BytesPartitionState finalPartition = partition; for (Clustering cd : clustering(remaining)) { - Map write = new HashMap<>(); - for (Symbol col : Sets.intersection(factory.regularColumns.asSet(), set.keySet())) - { - ByteBuffer current = partition.get(cd, col); - EvalResult result = eval(col, current, set.get(col)); - if (result.kind == EvalResult.Kind.SKIP) continue; - write.put(col, result.value); - } - - if (!write.isEmpty()) - partition.setColumns(cd, write, false); + maybeUpdateColumns(Sets.intersection(factory.regularColumns.asSet(), set.keySet()), + partition.get(cd), + nowTs, set, + (ts, write) -> finalPartition.setColumns(cd, ts, write, false)); } } } private enum DeleteKind {PARTITION, ROW, COLUMN} - private void update(Mutation.Delete delete) { + long nowTs = delete.timestampOrDefault(numMutations); //TODO (coverage): range deletes var split = splitOnPartition(delete.where.simplify()); List> pks = split.left; @@ -313,7 +525,7 @@ else if (!clusterings.isEmpty()) case ROW: for (Clustering cd : clusterings) { - partition.deleteRow(cd); + partition.deleteRow(cd, nowTs); if (partition.shouldDelete()) partitions.remove(partition.ref()); } @@ -321,7 +533,7 @@ else if (!clusterings.isEmpty()) case COLUMN: if (clusterings.isEmpty()) { - partition.deleteStaticColumns(columns); + partition.deleteStaticColumns(nowTs, columns); if (partition.shouldDelete()) partitions.remove(partition.ref()); } @@ -329,7 +541,7 @@ else if (!clusterings.isEmpty()) { for (Clustering cd : clusterings) { - partition.deleteColumns(cd, columns); + partition.deleteColumns(cd, nowTs, columns); if (partition.shouldDelete()) partitions.remove(partition.ref()); } @@ -341,54 +553,68 @@ else if (!clusterings.isEmpty()) } } + private static void maybeUpdateColumns(Set columns, + @Nullable BytesPartitionState.Row row, + long nowTs, Map set, + ColumnUpdate update) + { + if (columns.isEmpty()) + { + update.update(nowTs, Collections.emptyMap()); + return; + } + // static columns to add in. If we are doing something like += to a row that doesn't exist, we still update statics... + Map write = new HashMap<>(); + for (Symbol col : columns) + { + ByteBuffer current = row == null ? null : row.get(col); + EvalResult result = eval(col, current, set.get(col)); + if (result.kind == EvalResult.Kind.SKIP) continue; + write.put(col, result.value); + } + if (!write.isEmpty()) + update.update(nowTs, write); + } + public boolean shouldApply(Mutation mutation) { if (!mutation.isCas()) return true; return shouldApply(mutation, selectPartitionForCAS(mutation)); } - private SelectResult selectPartitionForCAS(Mutation mutation) + private CasContext selectPartitionForCAS(Mutation mutation) { - var partition = partitions.get(factory.createRef(pd(mutation))); - if (partition == null) return SelectResult.ordered(factory.selectionOrder, NO_ROWS); - - var cd = cdOrNull(mutation); - var row = cd == null ? null : partition.get(cd); - ImmutableUniqueList columns = cd != null ? factory.selectionOrder : factory.partitionAndStaticColumns; - return SelectResult.ordered(columns, new ByteBuffer[][] { getRowAsByteBuffer(columns, partition, row)}); + BytesPartitionState.Ref ref = referencePartition(mutation); + Clustering cd = cdOrNull(mutation); + BytesPartitionState partition = partitions.get(ref); + return new CasContext(ref, cd, partition); } - private boolean shouldApply(Mutation mutation, SelectResult current) + private boolean shouldApply(Mutation mutation, CasContext ctx) { Preconditions.checkArgument(mutation.isCas()); // process condition - CasCondition condition; - switch (mutation.kind) - { - case INSERT: - condition = CasCondition.Simple.NotExists; - break; - case UPDATE: - condition = ((Mutation.Update) mutation).casCondition.get(); - break; - case DELETE: - condition = ((Mutation.Delete) mutation).casCondition.get(); - break; - default: - throw new UnsupportedOperationException(mutation.kind.name()); - } + CasCondition condition = mutation.casCondition().get(); + boolean partitionOrRow = ctx.clustering == null; + boolean partitionKnown = ctx.partition != null; + BytesPartitionState.Row row = partitionKnown && !partitionOrRow + ? ctx.partition.get(ctx.clustering) + : null; if (condition instanceof CasCondition.Simple) { - boolean hasPartition = current.rows.length > 0; - boolean partitionOrRow = current.columns.equals(factory.partitionAndStaticColumns); - boolean hasRow = partitionOrRow ? hasPartition : current.isAllDefined(factory.clusteringColumns); + if (partitionOrRow && factory.staticColumns.isEmpty()) + throw new AssertionError("Attempted to create a EXISTS condition on partition without static columns; " + mutation.toCQL()); + // CAS's definition of partition EXISTS isn't based off the partition existing, its based off the static row + // existing (aka at least 1 static column exists and is not null). + boolean hasPartition = partitionKnown && !ctx.partition.staticRow().isEmpty(); + boolean hasRow = row != null; // don't do !isEmpty here as liveness dictates the existence of a row. If you INSERT a row then delete all its columns, it still exists! var simple = (CasCondition.Simple) condition; switch (simple) { case Exists: - return hasRow; + return partitionOrRow ? hasPartition : hasRow; case NotExists: - return !hasRow; + return partitionOrRow ? !hasPartition : !hasRow; default: throw new UnsupportedOperationException(simple.name()); } @@ -396,6 +622,11 @@ private boolean shouldApply(Mutation mutation, SelectResult current) var ifCondition = (CasCondition.IfCondition) condition; String letRow = "row"; Symbol rowSymbol = Symbol.unknownType(letRow); + ImmutableUniqueList columns = partitionOrRow ? factory.partitionAndStaticColumns : factory.selectionOrder; + SelectResult current = SelectResult.ordered(columns, + partitionKnown + ? new ByteBuffer[][] { getRowAsByteBuffer(columns, ctx.partition, row)} + : NO_ROWS); Map lets = Map.of(letRow, current); // point the columns to be row.column that way it matches LET clause in BEGIN TRANSACTION, allowing better reuse var updatedCondition = ifCondition.conditional.visit(new Visitor() @@ -410,6 +641,11 @@ public ReferenceExpression visit(ReferenceExpression r) return process(updatedCondition, lets); } + public BytesPartitionState.Ref referencePartition(Mutation mutation) + { + return factory.createRef(pd(mutation)); + } + private boolean process(Conditional condition, Map lets) { if (condition.getClass() == Conditional.Is.class) @@ -531,7 +767,7 @@ private Pair>, List> splitOnClustering( private Pair>, List> splitOn(ImmutableUniqueList.AsSet columns, List conditionals) { // pk requires equality - Map> pks = new HashMap<>(); + Map> pks = new HashMap<>(); List other = new ArrayList<>(); for (Conditional c : conditionals) { @@ -544,7 +780,7 @@ private Pair>, List> splitOn(ImmutableU ByteBuffer bb = eval(w.rhs); if (pks.containsKey(col)) throw new IllegalArgumentException("Partition column " + col + " was defined multiple times in the WHERE clause"); - pks.put(col, Collections.singleton(bb)); + pks.put(col, Collections.singletonList(bb)); } else { @@ -559,8 +795,8 @@ else if (c instanceof Conditional.In) Symbol col = (Symbol) i.ref; if (pks.containsKey(col)) throw new IllegalArgumentException("Partition column " + col + " was defined multiple times in the WHERE clause"); - var set = i.expressions.stream().map(ASTSingleTableModel::eval).collect(Collectors.toSet()); - pks.put(col, set); + var list = i.expressions.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList()); + pks.put(col, list); } else { @@ -582,19 +818,51 @@ else if (c instanceof Conditional.In) return Pair.create(partitionKeys, other); } - private List> keys(Collection columns, Map> pks) + private static ImmutableUniqueList> keys(Collection columns, Map> columnValues) { - //TODO (coverage): handle IN - ByteBuffer[] bbs = new ByteBuffer[columns.size()]; + return keys(columns, columnValues, Function.identity()); + } + + private static ImmutableUniqueList> keys(Map> values, Collection columns) + { + return keys(columns, values, ASTSingleTableModel::eval); + } + + private static ImmutableUniqueList> keys(Collection columns, + Map> columnValues, + Function eval) + { + if (columns.isEmpty()) return ImmutableUniqueList.empty(); + List current = new ArrayList<>(); + current.add(new ByteBuffer[columns.size()]); int idx = 0; - for (Symbol s : columns) + for (Symbol symbol : columns) { - Set values = pks.get(s); - if (values.size() > 1) - throw new UnsupportedOperationException("IN clause is currently unsupported... its on the backlog!"); - bbs[idx++] = Iterables.getFirst(values, null); + int position = idx++; + List expressions = columnValues.get(symbol); + ByteBuffer firstBB = eval.apply(expressions.get(0)); + current.forEach(bbs -> bbs[position] = firstBB); + if (expressions.size() > 1) + { + // this has a multiplying effect... if there is 1 row and there are 2 expressions, then we have 2 rows + // if there are 2 rows and 2 expressions, we have 4 rows... and so on... + List copy = new ArrayList<>(current); + for (int i = 1; i < expressions.size(); i++) + { + ByteBuffer bb = eval.apply(expressions.get(i)); + for (ByteBuffer[] bbs : copy) + { + bbs = bbs.clone(); + bbs[position] = bb; + current.add(bbs); + } + } + } } - return Collections.singletonList(BufferClustering.make(bbs)); + var builder = ImmutableUniqueList.>builder(); + for (var row : current) + builder.add(new BufferClustering(row)); + return builder.build(); } private Clustering pd(Mutation mutation) @@ -683,6 +951,18 @@ public List getByToken(Token token) public void validate(ByteBuffer[][] actual, Select select) { + if (select.source.isEmpty()) + throw new AssertionError("SELECT without a FROM only allowed in a BEGIN TRANSACTION"); + { + var ref = select.source.get(); + if (ref.keyspace.isPresent()) + { + if (!factory.metadata.keyspace.equals(ref.keyspace.get())) + throw new AssertionError("Incorrect keyspace: expected " + factory.metadata.keyspace + " but given " + ref.keyspace.get()); + } + if (!factory.metadata.name.equals(ref.name)) + throw new AssertionError("Incorrect table: expected " + factory.metadata.name + " but given " + ref.name); + } SelectResult results = getRowsAsByteBuffer(select); try { @@ -692,7 +972,7 @@ public void validate(ByteBuffer[][] actual, Select select) } else { - validate(actual, results.rows); + validate(results.columns, actual, results.rows); } } catch (AssertionError e) @@ -704,13 +984,19 @@ public void validate(ByteBuffer[][] actual, Select select) } } - public void validate(ByteBuffer[][] actual, ByteBuffer[][] expected) - { - validate(factory.selectionOrder, actual, expected); - } - private static void validate(ImmutableUniqueList columns, ByteBuffer[][] actual, ByteBuffer[][] expected) { + int expectedLength = columns.size(); + for (var a : actual) + { + if (a.length != expectedLength) + throw new AssertionError("actual rows do not match the schema " + columns + "; found " + Arrays.toString(a)); + } + for (var e : expected) + { + if (e.length != expectedLength) + throw new AssertionError("expected rows do not match the schema " + columns + "; found " + Arrays.toString(e)); + } // check any order validateAnyOrder(columns, toRow(columns, actual), toRow(columns, expected)); // all rows match, but are they in the right order? @@ -722,27 +1008,9 @@ private static void validateAnyOrder(ImmutableUniqueList columns, Set toRow(ImmutableUniqueList columns, ByteBuffer[][ return set; } + private static class CasContext + { + private final BytesPartitionState.Ref ref; + @Nullable + private final Clustering clustering; + @Nullable + private final BytesPartitionState partition; + + private CasContext(BytesPartitionState.Ref ref, @Nullable Clustering clustering, @Nullable BytesPartitionState partition) + { + this.ref = ref; + this.clustering = clustering; + this.partition = partition; + } + } + private static class SelectResult { private final ImmutableUniqueList columns; @@ -885,17 +1198,47 @@ public boolean isAllDefined(ImmutableUniqueList selectColumns) } } - public ImmutableUniqueList columns(Select select) + private ImmutableUniqueList columns(Select select) { if (select.selections.isEmpty()) return factory.selectionOrder; - throw new UnsupportedOperationException("Getting columns from select other than SELECT * is currently not supported"); + var builder = ImmutableUniqueList.builder(); + for (var e : select.selections) + { + if (!(e instanceof Symbol)) + throw new UnsupportedOperationException("Only column selection is currently supported"); + builder.add((Symbol) e); + } + return builder.build(); + } + + private static ByteBuffer[][] filter(ByteBuffer[][] rows, ImmutableUniqueList actualOrder, ImmutableUniqueList targetOrder) + { + if (actualOrder.equals(targetOrder)) return rows; + if (rows.length == 0) return rows; + if (!actualOrder.containsAll(targetOrder)) + throw new UnsupportedOperationException("Only column selection is currently supported"); + ByteBuffer[][] result = new ByteBuffer[rows.length][]; + for (int i = 0; i < rows.length; i++) + { + ByteBuffer[] actual = rows[i]; + ByteBuffer[] target = new ByteBuffer[targetOrder.size()]; + for (int j = 0; j < targetOrder.size(); j++) + { + Symbol col = targetOrder.get(j); + int actualIndex = actualOrder.indexOf(col); + target[j] = actual[actualIndex]; + } + result[i] = target; + } + return result; } private SelectResult getRowsAsByteBuffer(Select select) { - ImmutableUniqueList columns = columns(select); + ImmutableUniqueList selectOrder = factory.selectionOrder; + ImmutableUniqueList targetOrder = columns(select); if (select.where.isEmpty()) - return SelectResult.ordered(columns, getRowsAsByteBuffer(applyLimits(all(), select.perPartitionLimit, select.limit))); + return SelectResult.ordered(targetOrder, filter(getRowsAsByteBuffer(applyLimits(all(), select.perPartitionLimit, select.limit)), selectOrder, targetOrder)); LookupContext ctx = context(select); List primaryKeys; if (ctx.unmatchable) @@ -923,7 +1266,7 @@ else if (ctx.tokenLowerBound != null || ctx.tokenUpperBound != null) } primaryKeys = applyLimits(primaryKeys, select.perPartitionLimit, select.limit); //TODO (correctness): now that we have the rows we need to handle the selections/aggregation/limit/group-by/etc. - return new SelectResult(columns, getRowsAsByteBuffer(primaryKeys), ctx.unordered); + return new SelectResult(targetOrder, filter(getRowsAsByteBuffer(primaryKeys), selectOrder, targetOrder), ctx.unordered); } private List applyLimits(List primaryKeys, Optional perPartitionLimitOpt, Optional limitOpt) @@ -1199,37 +1542,6 @@ private Clustering key(Map values, ImmutableUniq return keys.get(0); } - private List> keys(Map> values, ImmutableUniqueList columns) - { - if (columns.isEmpty()) return Collections.singletonList(Clustering.EMPTY); - List current = new ArrayList<>(); - current.add(new ByteBuffer[columns.size()]); - for (Symbol symbol : columns) - { - int position = columns.indexOf(symbol); - List expressions = values.get(symbol); - ByteBuffer firstBB = eval(expressions.get(0)); - current.forEach(bbs -> bbs[position] = firstBB); - if (expressions.size() > 1) - { - // this has a multiplying effect... if there is 1 row and there are 2 expressions, then we have 2 rows - // if there are 2 rows and 2 expressions, we have 4 rows... and so on... - List copy = new ArrayList<>(current); - for (int i = 1; i < expressions.size(); i++) - { - ByteBuffer bb = eval(expressions.get(i)); - for (ByteBuffer[] bbs : copy) - { - bbs = bbs.clone(); - bbs[position] = bb; - current.add(bbs); - } - } - } - } - return current.stream().map(BufferClustering::new).collect(Collectors.toList()); - } - private static class EvalResult { private static final EvalResult SKIP = new EvalResult(Kind.SKIP, null); @@ -1393,6 +1705,22 @@ private LookupContext(Select select) maybeNormalizeTokenBounds(); } + private LookupContext(Mutation mutation) + { + if (mutation.kind == Mutation.Kind.INSERT) + { + var insert = mutation.asInsert(); + for (var e : insert.values.entrySet()) + eq.put(e.getKey(), Collections.singletonList(e.getValue())); + } + else + { + addConditional(mutation.kind == Mutation.Kind.UPDATE + ? mutation.asUpdate().where + : mutation.asDelete().where); + } + } + private void maybeNormalizeTokenBounds() { if (tokenLowerBound != null && tokenUpperBound != null) @@ -1685,4 +2013,9 @@ private TokenCondition(Inequality inequality, Token token) this.token = token; } } + + private interface ColumnUpdate + { + void update(long nowTs, Map write); + } } diff --git a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModelTest.java b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModelTest.java index a04425f82722..3bdf742ce84d 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModelTest.java +++ b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModelTest.java @@ -651,17 +651,102 @@ public void assignmentOperatorMultiCellCollections() model.validate(rows(row(metadata, 0, List.of(42, 42), Set.of(0, 42), Map.of(42, 0), List.of(42, 42), Set.of(0, 42), Map.of(42, 0))), Select.builder(metadata).build()); } + @Test + public void insertEmptyRow() + { + TableMetadata metadata = defaultTable() + .addPartitionKeyColumn("pk", Int32Type.instance) + .addStaticColumn("s", Int32Type.instance) + .addClusteringColumn("ck", Int32Type.instance) + .addRegularColumn("r", Int32Type.instance) + .build(); + ASTSingleTableModel model = new ASTSingleTableModel(metadata); + + model.update(Mutation.insert(metadata) + .value("pk", 0) + .value("s", 0) + .value("ck", 0) + .build()); + model.validate(rows(row(metadata, 0, 0, 0, null)), Select.builder(metadata).build()); + } + + @Test + public void updateEmptyRow() + { + TableMetadata metadata = defaultTable() + .addPartitionKeyColumn("pk", Int32Type.instance) + .addStaticColumn("s", Int32Type.instance) + .addClusteringColumn("ck", Int32Type.instance) + .addRegularColumn("r", Int32Type.instance) + .build(); + ASTSingleTableModel model = new ASTSingleTableModel(metadata); + + model.update(Mutation.update(metadata) + .set("s", 0) + .value("pk", 0) + .value("ck", 0) + .build()); + model.validate(rows(row(metadata, 0, null, 0, null)), Select.builder(metadata).build()); + } + + @Test + public void deleteColumnUpdateDoesntHavePartitionState() + { + TableMetadata metadata = defaultTable() + .addPartitionKeyColumn("pk", Int32Type.instance) + .addStaticColumn("s", Int32Type.instance) + .addClusteringColumn("ck", Int32Type.instance) + .addRegularColumn("r", ListType.getInstance(Int32Type.instance, true)) + .build(); + ASTSingleTableModel model = new ASTSingleTableModel(metadata); + + model.update(Mutation.update(metadata) + .set("r", List.of(0)) + .set("s", 0) + .value("pk", 0) + .value("ck", 0) + .build()); + model.update(Mutation.update(metadata) + .set("r", List.of(1)) + .value("pk", 0) + .value("ck", 1) + .build()); + model.validate(rows(row(metadata, 0, 0, 0, List.of(0)), + row(metadata, 0, 1, 0, List.of(1))), Select.builder(metadata).build()); + + model.update(Mutation.delete(metadata) + .columns("r", "s") + .value("pk", 0) + .value("ck", 0) + .build()); + model.validate(rows(row(metadata, 0, 1, null, List.of(1))), Select.builder(metadata).build()); + } + + private interface SimpleWrite + { + void write(String name, T value, long ts); + } + private static ByteBuffer[][] rows(ByteBuffer[]... rows) { return rows; } + private static ByteBuffer[] row(ByteBuffer... values) + { + return values; + } + private static ByteBuffer[] row(TableMetadata metadata, Object... values) { ByteBuffer[] row = new ByteBuffer[values.length]; var it = metadata.allColumnsInSelectOrder(); for (int i = 0; i < values.length && it.hasNext(); i++) - row[i] = it.next().type.decomposeUntyped(values[i]); + { + ColumnMetadata column = it.next(); + Object value = values[i]; + row[i] = value == null ? null : column.type.decomposeUntyped(value); + } return row; } diff --git a/test/harry/main/org/apache/cassandra/harry/model/BytesPartitionState.java b/test/harry/main/org/apache/cassandra/harry/model/BytesPartitionState.java index c2d18e573d81..70988e7801d4 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/BytesPartitionState.java +++ b/test/harry/main/org/apache/cassandra/harry/model/BytesPartitionState.java @@ -20,8 +20,8 @@ import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Collection; import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NavigableSet; @@ -32,6 +32,8 @@ import java.util.stream.Stream; import javax.annotation.Nullable; +import com.google.common.collect.Sets; + import org.apache.cassandra.cql3.ast.Symbol; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringComparator; @@ -44,6 +46,7 @@ import org.apache.cassandra.harry.util.BitSet; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FastByteOperations; import org.apache.cassandra.utils.ImmutableUniqueList; @@ -62,31 +65,36 @@ private BytesPartitionState(Factory factory, Clustering key) this.state = factory.partitionState(key); } - public void deleteRow(Clustering clustering) + public void deleteRow(Clustering clustering, long ts) { long cd = factory.clusteringCache.deflateOrUndefined(clustering); if (MagicConstants.UNSET_DESCR == cd) return; - state.delete(cd, MagicConstants.NO_TIMESTAMP); + deleteRow(cd, ts); } - public void deleteColumns(Clustering clustering, Set columns) + private void deleteRow(long cd, long ts) + { + state.delete(cd, ts); + } + + public void deleteColumns(Clustering clustering, long ts, Set columns) { long cd = factory.clusteringCache.deflateOrUndefined(clustering); if (cd != MagicConstants.UNSET_DESCR) { BitSet regularColumns = bitset(columns, true); if (!regularColumns.allUnset()) - state.deleteRegularColumns(MagicConstants.NO_TIMESTAMP, cd, regularColumns); + state.deleteRegularColumns(ts, cd, regularColumns); } - deleteStaticColumns(columns); + deleteStaticColumns(ts, columns); } - public void deleteStaticColumns(Set columns) + public void deleteStaticColumns(long ts, Set columns) { BitSet staticColumns = bitset(columns, false); if (!staticColumns.allUnset()) - state.deleteStaticColumns(MagicConstants.NO_TIMESTAMP, staticColumns); + state.deleteStaticColumns(ts, staticColumns); } private BitSet bitset(Set columns, boolean regular) @@ -109,28 +117,27 @@ public Ref ref() public PrimaryKey partitionRowRef() { - return new PrimaryKey(ref(), null); + return new PrimaryKey(factory, ref(), null); } - public void setStaticColumns(Map values) + public void setStaticColumns(long ts, Map values) { if (factory.staticColumns.isEmpty() || values.isEmpty()) throw new IllegalStateException("Attempt to write to static columns; but they do not exist"); - long[] sds = toDescriptor(factory.staticColumns, values); - state.writeStatic(sds, MagicConstants.NO_TIMESTAMP); + + state.writeStatic(toDescriptor(factory.staticColumns, values), ts); } - public void setColumns(Clustering clustering, Map values, boolean writePrimaryKeyLiveness) + public void setColumns(Clustering clustering, long ts, Map values, boolean writePrimaryKeyLiveness) { long cd = factory.clusteringCache.deflate(clustering); - long[] vds = toDescriptor(factory.regularColumns, values); - state.writeRegular(cd, vds, MagicConstants.NO_TIMESTAMP, writePrimaryKeyLiveness); + state.writeRegular(cd, toDescriptor(factory.regularColumns, values), ts, writePrimaryKeyLiveness); // UDT's have the ability to "update" that triggers a delete; this allows creating an "empty" row. // When an empty row exists without liveness info, then purge the row var row = state.rows.get(cd); if (row.isEmpty() && !row.hasPrimaryKeyLivenessInfo) - state.delete(cd, MagicConstants.NO_TIMESTAMP); + deleteRow(cd, ts); } private long[] toDescriptor(ImmutableUniqueList positions, Map values) @@ -200,6 +207,8 @@ public boolean staticOnly() @Nullable public Row get(Clustering clustering) { + if (clustering == Clustering.STATIC_CLUSTERING) + return staticRow(); long cd = factory.clusteringCache.deflateOrUndefined(clustering); if (cd == MagicConstants.UNSET_DESCR) return null; @@ -216,6 +225,12 @@ public ByteBuffer get(Clustering clustering, Symbol column) return row == null ? null : row.get(column); } + public long timestamp(Clustering clustering, Symbol column) + { + Row row = get(clustering); + return row == null ? MagicConstants.NO_TIMESTAMP : row.timestamp(column); + } + private Row toRow(PartitionState.RowState rowState) { Clustering clustering; @@ -230,10 +245,10 @@ private Row toRow(PartitionState.RowState rowState) clustering = factory.clusteringCache.inflate(rowState.cd); values = fromDescriptor(factory.regularColumns, rowState.vds); } - return new Row(clustering, values); + return new Row(clustering, values, rowState.lts); } - public Collection rows() + public List rows() { return state.rows().values().stream().map(this::toRow).collect(Collectors.toList()); } @@ -281,18 +296,26 @@ private static void appendValues(StringBuilder sb, List columns, Cluster sb.append(')'); } - public class PrimaryKey implements Comparable + public static class PrimaryKey implements Comparable { + private final Factory factory; public final BytesPartitionState.Ref partition; @Nullable public final Clustering clustering; - public PrimaryKey(BytesPartitionState.Ref partition, @Nullable Clustering clustering) + private PrimaryKey(Factory factory, BytesPartitionState.Ref partition, @Nullable Clustering clustering) { + this.factory = factory; this.partition = partition; this.clustering = clustering; } + public boolean isPartitionLevel() + { + return clustering == null // has clustering, but only referencing partition + || Clustering.EMPTY.equals(clustering); // doesn't have clustering + } + @Override public int compareTo(PrimaryKey o) { @@ -324,7 +347,8 @@ public String toString() StringBuilder sb = new StringBuilder("(partition="); sb.append(partition); sb.append(", clustering="); - appendValues(sb, factory.clusteringColumns, clustering); + if (clustering == null) sb.append("null"); + else appendValues(sb, factory.clusteringColumns, clustering); sb.append(')'); return sb.toString(); } @@ -415,12 +439,22 @@ public class Row public final Clustering clustering; private final ImmutableUniqueList columnNames; private final ByteBuffer[] columns; + private final long[] lts; - private Row(Clustering clustering, ByteBuffer[] columns) + private Row(Clustering clustering, ByteBuffer[] columns, long[] lts) { this.clustering = clustering; this.columnNames = clustering == Clustering.STATIC_CLUSTERING ? factory.staticColumns : factory.regularColumns; this.columns = columns; + this.lts = lts; + } + + private Row(Clustering clustering, ImmutableUniqueList columnNames, ByteBuffer[] columns, long[] lts) + { + this.clustering = clustering; + this.columnNames = columnNames; + this.columns = columns; + this.lts = lts; } public ByteBuffer get(Symbol col) @@ -433,15 +467,50 @@ public ByteBuffer get(int offset) return columns[offset]; } + public long timestamp(Symbol col) + { + return lts[columnNames.indexOf(col)]; + } + + public long timestamp(int offset) + { + return lts[offset]; + } + public PrimaryKey ref() { - return new PrimaryKey(BytesPartitionState.this.ref(), clustering); + return new PrimaryKey(factory, BytesPartitionState.this.ref(), clustering); } public boolean isEmpty() { return Stream.of(columns).allMatch(b -> b == null ); } + + public Row select(List selection) + { + if (columnNames.equals(selection)) return this; + selection = validateSelect(selection); + ByteBuffer[] selected = new ByteBuffer[selection.size()]; + ImmutableUniqueList.Builder names = ImmutableUniqueList.builder(selected.length); + for (int i = 0; i < selection.size(); i++) + { + Symbol col = selection.get(i); + selected[i] = columns[columnNames.indexOf(col)]; + names.add(col); + } + + return new Row(clustering, names.build(), selected, lts); + } + + private List validateSelect(List selection) + { + LinkedHashSet uniqueSelection = new LinkedHashSet<>(selection); + var unknown = Sets.difference(uniqueSelection, columnNames.asSet()); + if (!unknown.isEmpty()) + throw new AssertionError("Unable to select columns " + selection + "; has unknown columns " + unknown); + return uniqueSelection.size() == selection.size() ? selection : new ArrayList<>(uniqueSelection); + } } public static class Factory @@ -452,14 +521,19 @@ public static class Factory public final ImmutableUniqueList primaryColumns; public final ImmutableUniqueList staticColumns; public final ImmutableUniqueList regularColumns; - public final ImmutableUniqueList selectionOrder, partitionAndStaticColumns, regularAndStaticColumns; + public final ImmutableUniqueList selectionOrder, partitionAndStaticColumns, clusteringAndRegularColumns, regularAndStaticColumns; public final ClusteringComparator clusteringComparator; // translation layer for harry interop private final BijectionCache> partitionCache = new BijectionCache<>(Reject.instance.as()); private final BijectionCache> clusteringCache; - private final BijectionCache valueCache = new BijectionCache<>(Reject.instance.as()); + private final BijectionCache valueCache = new BijectionCache<>((l, r) -> { + if (!l.type.equals(r.type)) + throw new IllegalArgumentException("Unable to compare different types: " + l.type.asCQL3Type() + " != " + r.type.asCQL3Type()); + // Cells resolve based off unsigned byte order and not type order + return ByteBufferUtil.compareUnsigned(l.value, r.value); + }); private final ValueGenerators, Clustering> valueGenerators; public Factory(TableMetadata metadata) @@ -475,27 +549,27 @@ public Factory(TableMetadata metadata) if (clusteringColumns.isEmpty()) primaryColumns = partitionColumns; else { - symbolListBuilder.addAll(partitionColumns); - symbolListBuilder.addAll(clusteringColumns); - primaryColumns = symbolListBuilder.buildAndClear(); + primaryColumns = symbolListBuilder.addAll(partitionColumns) + .addAll(clusteringColumns) + .buildAndClear(); } - for (ColumnMetadata pk : metadata.staticColumns()) - symbolListBuilder.add(Symbol.from(pk)); + metadata.staticColumns().selectOrderIterator().forEachRemaining(cm -> symbolListBuilder.add(Symbol.from(cm))); staticColumns = symbolListBuilder.buildAndClear(); if (staticColumns.isEmpty()) partitionAndStaticColumns = partitionColumns; else { - symbolListBuilder.addAll(partitionColumns); - symbolListBuilder.addAll(staticColumns); - partitionAndStaticColumns = symbolListBuilder.buildAndClear(); + partitionAndStaticColumns = symbolListBuilder.addAll(partitionColumns) + .addAll(staticColumns) + .buildAndClear(); } - for (ColumnMetadata pk : metadata.regularColumns()) - symbolListBuilder.add(Symbol.from(pk)); + metadata.regularColumns().selectOrderIterator().forEachRemaining(cm -> symbolListBuilder.add(Symbol.from(cm))); regularColumns = symbolListBuilder.buildAndClear(); + clusteringAndRegularColumns = symbolListBuilder.addAll(clusteringColumns) + .addAll(regularColumns) + .buildAndClear(); metadata.allColumnsInSelectOrder().forEachRemaining(cm -> symbolListBuilder.add(Symbol.from(cm))); selectionOrder = symbolListBuilder.buildAndClear(); - metadata.regularAndStaticColumns().forEach(cm -> symbolListBuilder.add(Symbol.from(cm))); - regularAndStaticColumns = symbolListBuilder.buildAndClear(); + regularAndStaticColumns = symbolListBuilder.addAll(staticColumns).addAll(regularColumns).buildAndClear(); clusteringComparator = new ClusteringComparator(clusteringColumns.stream().map(Symbol::rawType).collect(Collectors.toList())); @@ -569,6 +643,11 @@ public BytesPartitionState.Ref createRef(Token token, boolean nullKeyGtMatchingT return new BytesPartitionState.Ref(this, token, nullKeyGtMatchingToken); } + public PrimaryKey createPrimaryKey(Ref pk, @Nullable Clustering cd) + { + return new BytesPartitionState.PrimaryKey(this, pk, cd); + } + private PartitionState partitionState(Clustering key) { return new PartitionState(partitionCache.deflate(key), valueGenerators); diff --git a/test/harry/main/org/apache/cassandra/harry/model/PartitionState.java b/test/harry/main/org/apache/cassandra/harry/model/PartitionState.java index cf4d70bd958f..57ec2ad4af61 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/PartitionState.java +++ b/test/harry/main/org/apache/cassandra/harry/model/PartitionState.java @@ -236,8 +236,12 @@ private RowState updateRowState(RowState currentState, IntFunction column = columns.apply(i); - if (column.compare(vds[i], currentState.vds[i]) > 0) + if (vds[i] == MagicConstants.NIL_DESCR // writing a null is the same as a tombstone, which has higher priority + || (currentState.vds[i] != MagicConstants.NIL_DESCR + && column.compare(vds[i], currentState.vds[i]) > 0)) + { currentState.vds[i] = vds[i]; + } } else { diff --git a/test/harry/main/org/apache/cassandra/harry/util/StringUtils.java b/test/harry/main/org/apache/cassandra/harry/util/StringUtils.java index 0b3b94b73fea..b33a407e5288 100644 --- a/test/harry/main/org/apache/cassandra/harry/util/StringUtils.java +++ b/test/harry/main/org/apache/cassandra/harry/util/StringUtils.java @@ -35,7 +35,7 @@ public static String escapeControlChars(String input) for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); - if (Character.isISOControl(c)) + if (Character.isISOControl(c) && c != '\n') result.append(String.format("\\u%04X", (int) c)); else result.append(c); diff --git a/test/unit/org/apache/cassandra/cql3/KnownIssue.java b/test/unit/org/apache/cassandra/cql3/KnownIssue.java index be2dfe75248c..4b2bec6d3cef 100644 --- a/test/unit/org/apache/cassandra/cql3/KnownIssue.java +++ b/test/unit/org/apache/cassandra/cql3/KnownIssue.java @@ -43,6 +43,10 @@ public enum KnownIssue "When doing an SAI query, if the where clause also contains a vector column bad results can be produced"), CAS_CONDITION_ON_UDT_W_EMPTY_BYTES("https://issues.apache.org/jira/browse/CASSANDRA-20479", "WHERE clause blocks operations on UDTs but CAS allows in IF clause. During this path empty can be confused with null which allows non-existing rows to match empty bytes"), + CAS_ON_STATIC_ROW("", + "When you do a CAS to the partition level the read is SELECT statics LIMIT 1, if the CAS doesn't apply the response includes the first row in the partition with its values redacted... this statement is partition level and not row level, would expect just the applied column like the other cases where the static row isn't present"), + STATIC_LIST_APPEND_WITH_CLUSTERING_IN("", + "When an 'UPDATE SET s += [0] WHERE pk = ? AND ck IN (?, ?)' happens the static operation happens twice, so the list append adds 2 elements!"), ; KnownIssue(String url, String description) diff --git a/test/unit/org/apache/cassandra/cql3/ast/AssignmentOperator.java b/test/unit/org/apache/cassandra/cql3/ast/AssignmentOperator.java index e3918f70da7c..499b36551f48 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/AssignmentOperator.java +++ b/test/unit/org/apache/cassandra/cql3/ast/AssignmentOperator.java @@ -123,4 +123,10 @@ public Expression visit(Visitor v) if (r == right) return this; return new AssignmentOperator(kind, r); } + + @Override + public String toString() + { + return debugCQL(); + } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/CasCondition.java b/test/unit/org/apache/cassandra/cql3/ast/CasCondition.java index d0d4d0e35b09..c60e27a85df2 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/CasCondition.java +++ b/test/unit/org/apache/cassandra/cql3/ast/CasCondition.java @@ -24,6 +24,11 @@ public interface CasCondition extends Element { CasCondition visit(Visitor v); + default String debugCQL() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } + enum Simple implements CasCondition { NotExists("IF NOT EXISTS"), @@ -80,5 +85,11 @@ public CasCondition visit(Visitor v) if (c == conditional) return this; return new IfCondition(c); } + + @Override + public String toString() + { + return toCQL(); + } } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/Conditional.java b/test/unit/org/apache/cassandra/cql3/ast/Conditional.java index 66012c060cba..4fc8a2085085 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Conditional.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Conditional.java @@ -50,6 +50,11 @@ default Conditional visit(Visitor v) return v.visit(this); } + default String debugCQL() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } + default List simplify() { return Collections.singletonList(this); diff --git a/test/unit/org/apache/cassandra/cql3/ast/Elements.java b/test/unit/org/apache/cassandra/cql3/ast/Elements.java index f750c4efc10d..4713c30c15be 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Elements.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Elements.java @@ -18,6 +18,8 @@ package org.apache.cassandra.cql3.ast; +import java.util.stream.Stream; + public final class Elements { private Elements() @@ -29,4 +31,11 @@ public static void newLine(StringBuilder sb, int indent) for (int i = 0; i < indent; i++) sb.append(' '); } + + public static Stream symbols(Element element) + { + return element.streamRecursive(true) + .filter(e -> e instanceof Symbol) + .map(e -> (Symbol) e); + } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/Expression.java b/test/unit/org/apache/cassandra/cql3/ast/Expression.java index 96bc41a78c9e..9a0e554968df 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Expression.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Expression.java @@ -32,4 +32,9 @@ default Expression visit(Visitor v) { return v.visit(this); } + + default String debugCQL() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/Literal.java b/test/unit/org/apache/cassandra/cql3/ast/Literal.java index 4bd2f9b6319c..eb6d83df41e0 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Literal.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Literal.java @@ -23,6 +23,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.StringType; public class Literal implements Value @@ -41,6 +42,11 @@ public static Literal of(int value) return new Literal(value, Int32Type.instance); } + public static Literal of(long value) + { + return new Literal(value, LongType.instance); + } + @Override public AbstractType type() { diff --git a/test/unit/org/apache/cassandra/cql3/ast/Mutation.java b/test/unit/org/apache/cassandra/cql3/ast/Mutation.java index 95987dc657ac..7c764853e5b5 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Mutation.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Mutation.java @@ -27,10 +27,10 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Stream; import javax.annotation.Nullable; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.schema.ColumnMetadata; @@ -50,6 +50,23 @@ protected Mutation(Kind kind, TableReference table) this.table = table; } + public Insert asInsert() + { + return (Insert) this; + } + + public Update asUpdate() + { + return (Update) this; + } + + public Delete asDelete() + { + return (Delete) this; + } + + public abstract long timestampOrDefault(long defaultValue); + public abstract boolean isCas(); public abstract Mutation withoutTimestamp(); @@ -61,6 +78,9 @@ public Mutation withTimestamp(long timestamp) public abstract Mutation withTimestamp(Timestamp timestamp); + + public abstract Optional casCondition(); + public final Kind mutationKind() { return kind; @@ -161,6 +181,13 @@ public Stream stream() { return Stream.of(value); } + + public long get() + { + if (value.value() instanceof Long) + return (long) value.value(); + return LongType.instance.compose(value.valueEncoded()); + } } public static class Using implements Element @@ -168,15 +195,24 @@ public static class Using implements Element public final Optional ttl; public final Optional timestamp; - public Using(Optional ttl, Optional timestamp) + private Using(Optional ttl, Optional timestamp) { this.ttl = ttl; this.timestamp = timestamp; + if (ttl.isEmpty() && timestamp.isEmpty()) + throw new IllegalStateException("Empty USING isnt allowed"); + } + + public static Optional create(Optional ttl, Optional timestamp) + { + if (ttl.isEmpty() && timestamp.isEmpty()) return Optional.empty(); + return Optional.of(new Using(ttl, timestamp)); } - public Using withoutTimestamp() + public Optional withoutTimestamp() { - return new Using(ttl, Optional.empty()); + if (ttl.isEmpty()) return Optional.empty(); + return Optional.of(new Using(ttl, Optional.empty())); } public Using withTimestamp(Timestamp timestamp) @@ -187,8 +223,6 @@ public Using withTimestamp(Timestamp timestamp) @Override public void toCQL(StringBuilder sb, CQLFormatter formatter) { - if (ttl.isEmpty() && timestamp.isEmpty()) - return; sb.append("USING "); if (ttl.isPresent()) ttl.get().toCQL(sb, formatter); @@ -227,6 +261,16 @@ public Insert(TableReference table, LinkedHashMap values, bo this.using = using; } + @Override + public long timestampOrDefault(long defaultValue) + { + if (using.isEmpty()) return defaultValue; + var opt = using.get().timestamp; + if (opt.isEmpty()) return defaultValue; + var timestamp = opt.get(); + return timestamp.get(); + } + @Override public void toCQL(StringBuilder sb, CQLFormatter formatter) { @@ -311,7 +355,7 @@ public Mutation withoutTimestamp() { return new Insert(table, values, ifNotExists, using.isEmpty() ? using - : using.map(u -> u.withoutTimestamp())); + : using.flatMap(u -> u.withoutTimestamp())); } @Override @@ -321,6 +365,12 @@ public Insert withTimestamp(Timestamp timestamp) ? Optional.of(new Using(Optional.empty(), Optional.of(timestamp))) : using.map(u -> u.withTimestamp(timestamp))); } + + @Override + public Optional casCondition() + { + return ifNotExists ? Optional.of(CasCondition.Simple.NotExists) : Optional.empty(); + } } public static class Update extends Mutation @@ -339,6 +389,16 @@ public Update(TableReference table, Optional using, LinkedHashMap u.withoutTimestamp()), set, where, casCondition); + return new Update(table, using.isEmpty() ? using : using.flatMap(u -> u.withoutTimestamp()), set, where, casCondition); } @Override @@ -455,6 +515,12 @@ public Update withTimestamp(Timestamp timestamp) : using.map(u -> u.withTimestamp(timestamp)); return new Update(table, updated, set, where, casCondition); } + + @Override + public Optional casCondition() + { + return casCondition; + } } public static class Delete extends Mutation @@ -477,6 +543,15 @@ public Delete(List columns, this.casCondition = casCondition; } + @Override + public long timestampOrDefault(long defaultValue) + { + var opt = timestamp; + if (opt.isEmpty()) return defaultValue; + var timestamp = opt.get(); + return timestamp.get(); + } + /* DELETE [column_name (term)][, ...] FROM [keyspace_name.] table_name @@ -585,6 +660,12 @@ public Delete withTimestamp(Timestamp timestamp) { return new Delete(columns, table, Optional.of(timestamp), where, casCondition); } + + @Override + public Optional casCondition() + { + return casCondition; + } } public static abstract class BaseBuilder> implements Conditional.EqBuilderPlus @@ -612,6 +693,11 @@ protected BaseBuilder(Kind kind, TableMetadata table) neededPks.addAll(partitionColumns); } + protected Symbol find(String name) + { + return allColumns.stream().filter(s -> s.symbol.equals(name)).findAny().get(); + } + public abstract T build(); @Override @@ -678,6 +764,11 @@ public InsertBuilder ifNotExists() return this; } + public InsertBuilder timestamp(long value) + { + return timestamp(Literal.of(value)); + } + public InsertBuilder timestamp(Value value) { this.timestamp = new Timestamp(value); @@ -727,6 +818,11 @@ protected UpdateBuilder(TableMetadata table) super(Kind.UPDATE, table); } + public UpdateBuilder timestamp(long value) + { + return timestamp(Literal.of(value)); + } + public UpdateBuilder timestamp(Value value) { this.timestamp = new Timestamp(value); @@ -766,18 +862,32 @@ public UpdateBuilder set(Symbol column, Expression value) public UpdateBuilder set(String column, int value) { - return set(new Symbol(column, Int32Type.instance), Bind.of(value)); + Symbol symbol = find(column); + if (!symbol.type().equals(Int32Type.instance)) + throw new AssertionError("Expected int type but given " + symbol.type().asCQL3Type()); + return set(symbol, Bind.of(value)); + } + + public UpdateBuilder set(String column, Object value) + { + Symbol symbol = find(column); + return set(symbol, new Bind(value, symbol.type())); } public UpdateBuilder set(String column, Expression expression) { - Symbol symbol = new Symbol(metadata.getColumn(new ColumnIdentifier(column, true))); - return set(symbol, expression); + return set(find(column), expression); + } + + public UpdateBuilder set(String column, Function fn) + { + Symbol symbol = find(column); + return set(symbol, fn.apply(symbol)); } public UpdateBuilder set(String column, String value) { - Symbol symbol = new Symbol(metadata.getColumn(new ColumnIdentifier(column, true))); + Symbol symbol = find(column); return set(symbol, new Bind(symbol.type().asCQL3Type().fromCQLLiteral(value), symbol.type())); } @@ -857,9 +967,15 @@ public List columns() return Collections.unmodifiableList(columns); } + public DeleteBuilder columns(String... names) + { + Stream.of(names).map(this::find).forEach(this::column); + return this; + } + public DeleteBuilder column(String columnName) { - return column(Symbol.from(metadata.getColumn(new ColumnIdentifier(columnName, true)))); + return column(find(columnName)); } public DeleteBuilder column(Symbol symbol) @@ -881,6 +997,11 @@ public DeleteBuilder column(List symbols) return this; } + public DeleteBuilder timestamp(long value) + { + return timestamp(Literal.of(value)); + } + public DeleteBuilder timestamp(Value value) { this.timestamp = new Timestamp(value); diff --git a/test/unit/org/apache/cassandra/cql3/ast/Operator.java b/test/unit/org/apache/cassandra/cql3/ast/Operator.java index d0baa10ec01c..35d745142d63 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Operator.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Operator.java @@ -103,4 +103,10 @@ public Expression visit(Visitor v) if (left == this.left && right == this.right) return this; return new Operator(kind, left, right); } + + @Override + public String toString() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/Select.java b/test/unit/org/apache/cassandra/cql3/ast/Select.java index 28134dde8e95..10d98dee6305 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Select.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Select.java @@ -29,6 +29,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ImmutableUniqueList; public class Select implements Statement { @@ -479,11 +480,15 @@ public Builder table(TableMetadata table) public static class TableBasedBuilder extends BaseBuilder implements Conditional.ConditionalBuilderPlus { private final TableMetadata metadata; + private final ImmutableUniqueList columns; public TableBasedBuilder(TableMetadata metadata) { this.metadata = metadata; source = Optional.of(TableReference.from(metadata)); + var builder = ImmutableUniqueList.builder(); + metadata.allColumnsInSelectOrder().forEachRemaining(c -> builder.add(Symbol.from(c))); + columns = builder.buildAndClear(); } @Override @@ -491,5 +496,15 @@ public TableMetadata metadata() { return metadata; } + + private Symbol find(String name) + { + return columns.stream().filter(s -> s.symbol.equals(name)).findAny().get(); + } + + public TableBasedBuilder columnSelection(String name) + { + return selection(find(name)); + } } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/StandardVisitors.java b/test/unit/org/apache/cassandra/cql3/ast/StandardVisitors.java index 4cbf3d989f03..85c8a8bc7040 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/StandardVisitors.java +++ b/test/unit/org/apache/cassandra/cql3/ast/StandardVisitors.java @@ -30,6 +30,16 @@ public Value visit(Value v) return new Literal(b.value(), b.type()); } }; + public static final Visitor LITERAL_TO_BIND = new Visitor() + { + @Override + public Value visit(Value v) + { + if (!(v instanceof Literal)) return v; + Literal b = (Literal) v; + return new Bind(b.value(), b.type()); + } + }; public static final Visitor UNWRAP_TYPE_HINT = new Visitor() { diff --git a/test/unit/org/apache/cassandra/cql3/ast/Statement.java b/test/unit/org/apache/cassandra/cql3/ast/Statement.java index ffcba03465b8..fd9a9ab2a964 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Statement.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Statement.java @@ -51,12 +51,17 @@ default String detailedToString() { Object[] binds = binds(); return "CQL:\n" + toCQL() + "\nBinds:\n" + IntStream.range(0, binds.length) - .mapToObj(i -> i + " -> " + binds[i].getClass().getCanonicalName() + "(" + normalize(binds[i]) + ")") + .mapToObj(i -> i + " -> " + binds[i] == null ? "null" : binds[i].getClass().getCanonicalName() + "(" + normalize(binds[i]) + ")") .collect(Collectors.joining("\n")); } Statement visit(Visitor v); + default String debugCQL() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } + static boolean hasByteBuffer(Object value) { if (value == null) diff --git a/test/unit/org/apache/cassandra/cql3/ast/Value.java b/test/unit/org/apache/cassandra/cql3/ast/Value.java index 92ef0101831d..ab467c3b8378 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Value.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Value.java @@ -37,4 +37,9 @@ default Value visit(Visitor v) { return v.visit(this); } + + default String debugCQL() + { + return visit(StandardVisitors.DEBUG).toCQL(); + } } diff --git a/test/unit/org/apache/cassandra/utils/ASTGenerators.java b/test/unit/org/apache/cassandra/utils/ASTGenerators.java index cdf533cb420e..1279f60cb7fe 100644 --- a/test/unit/org/apache/cassandra/utils/ASTGenerators.java +++ b/test/unit/org/apache/cassandra/utils/ASTGenerators.java @@ -61,6 +61,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.IntegerType; +import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; @@ -373,6 +374,10 @@ public enum DeleteKind { Partition, Row, Column } private BiFunction, List> ifConditionFilter = (rnd, symbols) -> symbols; private Gen deleteKindGen = SourceDSL.arbitrary().enumValues(DeleteKind.class); private Map columnExpressions = new LinkedHashMap<>(); + private boolean allowPartitionOnlyUpdate = true; + private boolean allowPartitionOnlyInsert = true; + private boolean allowUpdateMultipleClusteringKeys = true; + private EnumSet ignoreIssues = IGNORE_ISSUES; public MutationGenBuilder(TableMetadata metadata) { @@ -391,6 +396,30 @@ public MutationGenBuilder(TableMetadata metadata) columnExpressions.put(symbol, new ExpressionBuilder(symbol.type())); } + public MutationGenBuilder withIgnoreIssues(EnumSet ignoreIssues) + { + this.ignoreIssues = Objects.requireNonNull(ignoreIssues); + return this; + } + + public MutationGenBuilder withAllowPartitionOnlyUpdate(boolean value) + { + this.allowPartitionOnlyUpdate = value; + return this; + } + + public MutationGenBuilder withAllowPartitionOnlyInsert(boolean value) + { + this.allowPartitionOnlyInsert = value; + return this; + } + + public MutationGenBuilder withAllowUpdateMultipleClusteringKeys(boolean allowUpdateMultipleClusteringKeys) + { + this.allowUpdateMultipleClusteringKeys = allowUpdateMultipleClusteringKeys; + return this; + } + public MutationGenBuilder withColumnExpressions(Consumer fn) { for (Symbol symbol : allColumns) @@ -534,16 +563,45 @@ private static void values(RandomnessSource rnd, } else { - //TODO (coverage): support IN rather than just EQ for (Symbol s : columns) builder.value(s, columnExpressions.get(s).build().generate(rnd)); } } + private static void where(RandomnessSource rnd, + Map columnExpressions, + Conditional.ConditionalBuilder builder, + LinkedHashSet columns, + @Nullable Gen> gen) + { + if (gen != null) + { + Map map = gen.generate(rnd); + for (Map.Entry e : assertDeterministic(map).entrySet()) + builder.value(e.getKey(), valueGen(e.getValue(), e.getKey().type()).generate(rnd)); + return; + } + + for (Symbol s : columns) + { + if (SourceDSL.booleans().all().generate(rnd)) + { + builder.value(s, columnExpressions.get(s).build().generate(rnd)); + continue; + } + var valueGen = columnExpressions.get(s).build(); + builder.in(s, SourceDSL.lists().of(valueGen).ofSizeBetween(1, 3).generate(rnd)); + } + } + public Gen build() { Gen bool = SourceDSL.booleans().all(); Map, List> typeToReference = references.stream().collect(Collectors.groupingBy(Reference::type)); + if (allowUpdateMultipleClusteringKeys + && ignoreIssues.contains(KnownIssue.STATIC_LIST_APPEND_WITH_CLUSTERING_IN) + && staticColumns.stream().anyMatch(s -> s.type().isMultiCell() && s.type().getClass() == ListType.class)) + allowUpdateMultipleClusteringKeys = false; return rnd -> { Mutation.Kind kind = kindGen.generate(rnd); // when there are not non-primary-columns then can't support UPDATE @@ -572,6 +630,12 @@ public Gen build() if (timestamp.isPresent()) builder.timestamp(valueGen(timestamp.getAsLong(), LongType.instance).generate(rnd)); values(rnd, columnExpressions, builder, partitionColumns, partitionValueGen); + if (!staticColumns.isEmpty() && allowPartitionOnlyInsert && bool.generate(rnd)) + { + var columnsToGenerate = new LinkedHashSet<>(subset(rnd, staticColumns)); + generateRemaining(rnd, bool, Mutation.Kind.INSERT, isTransaction, typeToReference, builder, columnsToGenerate); + return builder.build(); + } values(rnd, columnExpressions, builder, clusteringColumns, clusteringValueGen); LinkedHashSet columnsToGenerate; if (regularAndStaticColumns.isEmpty()) @@ -601,6 +665,35 @@ else if (regularAndStaticColumns.size() == 1 || bool.generate(rnd)) var timestamp = timestampGen.generate(rnd); if (timestamp.isPresent()) builder.timestamp(valueGen(timestamp.getAsLong(), LongType.instance).generate(rnd)); + if (allowUpdateMultipleClusteringKeys) + where(rnd, columnExpressions, builder, partitionColumns, partitionValueGen); + else + values(rnd, columnExpressions, builder, partitionColumns, partitionValueGen); + + if (!staticColumns.isEmpty() && allowPartitionOnlyUpdate && bool.generate(rnd)) + { + var columnsToGenerate = new LinkedHashSet<>(subset(rnd, staticColumns)); + Conditional.EqBuilder setBuilder = builder::set; + generateRemaining(rnd, bool, Mutation.Kind.UPDATE, isTransaction, typeToReference, setBuilder, columnsToGenerate); + + if (isCas) + { + if (useCasIf.generate(rnd)) + { + ifGen(new ArrayList<>(staticColumns)).generate(rnd).ifPresent(c -> builder.ifCondition(c)); + } + else + { + builder.ifExists(); + } + } + return builder.build(); + } + if (allowUpdateMultipleClusteringKeys) + where(rnd, columnExpressions, builder, clusteringColumns, clusteringValueGen); + else + values(rnd, columnExpressions, builder, clusteringColumns, clusteringValueGen); + if (isCas) { if (useCasIf.generate(rnd)) @@ -612,8 +705,6 @@ else if (regularAndStaticColumns.size() == 1 || bool.generate(rnd)) builder.ifExists(); } } - values(rnd, columnExpressions, builder, partitionColumns, partitionValueGen); - values(rnd, columnExpressions, builder, clusteringColumns, clusteringValueGen); LinkedHashSet columnsToGenerate; if (regularAndStaticColumns.size() == 1 || bool.generate(rnd)) diff --git a/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java b/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java index 00fabea136b5..d4b7393dcd83 100644 --- a/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java +++ b/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java @@ -26,8 +26,6 @@ import java.util.List; import java.util.RandomAccess; -import com.google.common.collect.Iterators; - import org.agrona.collections.Object2IntHashMap; public class ImmutableUniqueList extends AbstractList implements RandomAccess @@ -43,6 +41,12 @@ private ImmutableUniqueList(Builder builder) indexLookup = new Object2IntHashMap<>(builder.indexLookup); } + public static ImmutableUniqueList copyOf(Collection collection) + { + if (collection instanceof ImmutableUniqueList) return (ImmutableUniqueList) collection; + return ImmutableUniqueList.builder().addAll(collection).build(); + } + public static Builder builder() { return new Builder<>(); @@ -95,7 +99,7 @@ public int size() return values.length; } - public static final class Builder extends AbstractSet + public static final class Builder { private final List values; private final Object2IntHashMap indexLookup = new Object2IntHashMap<>(-1); @@ -111,29 +115,21 @@ public Builder(int expectedSize) this.values = new ArrayList<>(expectedSize); } - public Builder mayAddAll(Collection values) - { - addAll(values); - return this; - } - - @Override - public boolean add(T t) + public Builder add(T t) { - if (indexLookup.containsKey(t)) return false; + if (indexLookup.containsKey(t)) return this; int idx = this.idx++; indexLookup.put(t, idx); values.add(t); - return true; + return this; } - @Override - public boolean remove(Object o) + public Builder addAll(Collection c) { - throw new UnsupportedOperationException(); + c.forEach(this::add); + return this; } - @Override public void clear() { values.clear(); @@ -141,30 +137,6 @@ public void clear() idx = 0; } - @Override - public boolean isEmpty() - { - return values.isEmpty(); - } - - @Override - public boolean contains(Object o) - { - return indexLookup.containsKey(o); - } - - @Override - public Iterator iterator() - { - return Iterators.unmodifiableIterator(values.iterator()); - } - - @Override - public int size() - { - return values.size(); - } - public ImmutableUniqueList build() { return new ImmutableUniqueList<>(this); From 4e2e5f3c57cbd375e1ec112e25578a184618c4b5 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Thu, 20 Feb 2025 12:25:07 -0600 Subject: [PATCH 026/340] Fixed multiple single-node SAI query bugs relating to static columns - Ensure MemtableIndexWriter calculates min/max properly with indexes on partition key elements - Ensure only rows with live data are indexed - Ensure min cannot be greater than max in intersection statistics with static keys - Correct tracking of last key in the searcher in the presence of static keys patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-20338 --- CHANGES.txt | 1 + .../index/sai/StorageAttachedIndexGroup.java | 8 +- .../sai/disk/StorageAttachedIndexWriter.java | 10 ++- .../sai/disk/v1/MemtableIndexWriter.java | 6 +- .../KeyRangeIntersectionIterator.java | 4 + .../plan/StorageAttachedIndexSearcher.java | 4 +- .../cassandra/index/sai/utils/PrimaryKey.java | 18 +++- .../cql/CompositePartitionKeyIndexTest.java | 88 ++++++++++++++++++- .../index/sai/cql/StaticColumnIndexTest.java | 44 ++++++++++ 9 files changed, 170 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4f8ae9036657..f4b1be2002f8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.4 + * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) * Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final (CASSANDRA-20314) * Serialization can lose complex deletions in a mutation with multiple collections in a row (CASSANDRA-20449) diff --git a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java index bddeacecc7cd..02e7971814bf 100644 --- a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java +++ b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java @@ -177,7 +177,7 @@ public Index.Indexer indexerFor(Predicate indexSelector, public void insertRow(Row row) { // SAI does not index deletions, as these are resolved during post-filtering. - if (row.deletion().isLive()) + if (row.hasLiveData(nowInSec, false)) for (Index.Indexer indexer : indexers) indexer.insertRow(row); } @@ -185,8 +185,10 @@ public void insertRow(Row row) @Override public void updateRow(Row oldRow, Row newRow) { - for (Index.Indexer indexer : indexers) - indexer.updateRow(oldRow, newRow); + // SAI does not index deletions, as these are resolved during post-filtering. + if (newRow.hasLiveData(nowInSec, false)) + for (Index.Indexer indexer : indexers) + indexer.updateRow(oldRow, newRow); } }; } diff --git a/src/java/org/apache/cassandra/index/sai/disk/StorageAttachedIndexWriter.java b/src/java/org/apache/cassandra/index/sai/disk/StorageAttachedIndexWriter.java index f35ae67f93c2..341bcaac5e37 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/StorageAttachedIndexWriter.java +++ b/src/java/org/apache/cassandra/index/sai/disk/StorageAttachedIndexWriter.java @@ -37,6 +37,7 @@ import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.io.sstable.SSTableFlushObserver; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Throwables; /** @@ -52,6 +53,8 @@ public class StorageAttachedIndexWriter implements SSTableFlushObserver private final PerSSTableIndexWriter perSSTableWriter; private final Stopwatch stopwatch = Stopwatch.createUnstarted(); private final RowMapping rowMapping; + private final long nowInSeconds = FBUtilities.nowInSeconds(); + private DecoratedKey currentKey; private boolean tokenOffsetWriterCompleted = false; private boolean aborted = false; @@ -126,9 +129,14 @@ public void nextUnfilteredCluster(Unfiltered unfiltered) if (!unfiltered.isRow()) return; + // Ignore rows with no live data... + Row row = (Row) unfiltered; + if (!row.hasLiveData(nowInSeconds, false)) + return; + try { - addRow((Row)unfiltered); + addRow(row); } catch (Throwable t) { diff --git a/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java b/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java index 0650e9b9d6dc..04d3185bfc5b 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java @@ -91,14 +91,16 @@ public void addRow(PrimaryKey key, Row row, long sstableRowId) // keys and row IDs in the flushing SSTable. This writer, therefore, does nothing in // response to the flushing of individual rows except for keeping index-specific statistics. boolean isStatic = indexTermType.columnMetadata().isStatic(); + boolean isPartitionKey = indexTermType.columnMetadata().isPartitionKey(); // Indexes on static columns should only track static rows, and indexes on non-static columns // should only track non-static rows. (Within a partition, the row ID for a static row will always - // come before any non-static row.) - if (key.kind() == PrimaryKey.Kind.STATIC && isStatic || key.kind() != PrimaryKey.Kind.STATIC && !isStatic) + // come before any non-static row.) The only exception to this is indexes on partition key elements. + if ((key.kind() == PrimaryKey.Kind.STATIC && (isStatic || isPartitionKey)) || key.kind() != PrimaryKey.Kind.STATIC && !isStatic) { if (minKey == null) minKey = key; + maxKey = key; rowCount++; maxSSTableRowId = Math.max(maxSSTableRowId, sstableRowId); diff --git a/src/java/org/apache/cassandra/index/sai/iterators/KeyRangeIntersectionIterator.java b/src/java/org/apache/cassandra/index/sai/iterators/KeyRangeIntersectionIterator.java index 6237aa013180..4909033bf86e 100644 --- a/src/java/org/apache/cassandra/index/sai/iterators/KeyRangeIntersectionIterator.java +++ b/src/java/org/apache/cassandra/index/sai/iterators/KeyRangeIntersectionIterator.java @@ -375,6 +375,10 @@ public void update(KeyRangeIterator range) min = nullSafeMax(min, range.getMinimum()); // maximum of the intersection is the smallest maximum of individual iterators max = nullSafeMin(max, range.getMaximum()); + + // With STATIC keys, it is possible for the min to overtake the max, which must be corrected. + min = nullSafeMin(min, max); + if (empty) { empty = false; diff --git a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java index 858242006aa2..9116db0d3107 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java +++ b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java @@ -229,7 +229,7 @@ private List nextSelectedKeysInRange() if (firstKey == null) return Collections.emptyList(); } - while (queryController.doesNotSelect(firstKey) || firstKey.equals(lastKey)); + while (queryController.doesNotSelect(firstKey) || firstKey.equals(lastKey, false)); lastKey = firstKey; threadLocalNextKeys.add(firstKey); @@ -291,7 +291,7 @@ private void fillNextSelectedKeysInPartition(DecoratedKey partitionKey, List static, v0 smallint, PRIMARY KEY ((pk0, pk1), ck0)) WITH CLUSTERING ORDER BY (ck0 DESC)"); + disableCompaction(KEYSPACE); + createIndex("CREATE INDEX tbl_pk0 ON %s(pk0) USING 'sai'"); + createIndex("CREATE INDEX tbl_pk1 ON %s(pk1) USING 'sai'"); + createIndex("CREATE INDEX tbl_s1 ON %s(s1) USING 'sai'"); + createIndex("CREATE INDEX tbl_v0 ON %s(v0) USING 'sai'"); + + execute("INSERT INTO %s (pk0, pk1, ck0, s1, s0, v0) VALUES (-62, -5815950741950477880, 0x326f, '켅\uF6EB憓ᤃ\uEF32ꝃ窰ŷ', {00000000-0000-4700-aa00-000000000000}, 19310) USING TIMESTAMP 1"); + execute("DELETE FROM %s USING TIMESTAMP 2 WHERE pk0 = 45 AND pk1 = 6014418364385708772 AND ck0 = 0x7c10"); + execute("DELETE FROM %s USING TIMESTAMP 3 WHERE pk0 = -41 AND pk1 = -3934225888295599640"); + execute("INSERT INTO %s (pk0, pk1, ck0, s1, s0, v0) " + + "VALUES (-64, 7973592261481566341, 0x0d, '\uE11B摻', {00000000-0000-4800-8900-000000000000, 00000000-0000-4900-8600-000000000000}, -23873) USING TIMESTAMP 4"); + flush(KEYSPACE); + + execute("UPDATE %s USING TIMESTAMP 5 SET v0=-359, s1='ل≻Ⱆ喡䮠?' WHERE pk0 = -64 AND pk1 = 7973592261481566341 AND ck0 = 0x99d570024de738f37877"); + execute("INSERT INTO %s (pk0, pk1, ck0, v0, s1, s0) " + + "VALUES (-104, -4990846884898776392, 0xf7ac771298eaf1d4, -6977, '凘纖볭菮⏏↶?蜑', null) USING TIMESTAMP 6"); + execute("INSERT INTO %s (pk0, pk1, ck0, s1, s0, v0) " + + "VALUES (-62, -5815950741950477880, 0x9277e744212e1c4b50, '\uF6AD瀛⛕徳倬糽ᢷ' + '雴', {00000000-0000-4700-b100-000000000000, 00000000-0000-4800-9300-000000000000}, 5423) USING TIMESTAMP 7"); + execute("DELETE FROM %s USING TIMESTAMP 8 WHERE pk0 = -64 AND pk1 = 7973592261481566341"); + flush(KEYSPACE); + + execute("DELETE s0, s1, s0 FROM %s USING TIMESTAMP 9 WHERE pk0 = -62 AND pk1 = -5815950741950477880"); + execute("DELETE FROM %s USING TIMESTAMP 10 WHERE pk0 = -41 AND pk1 = -3934225888295599640 AND ck0 = 0xd753dc3a473acaf665"); + execute("INSERT INTO %s (pk0, pk1, ck0, s1, s0, v0) " + + "VALUES (-62, -5815950741950477880, 0xd1e07b568a7188, 'ᑿ鼾戆' + '篐뵡?䰫', {00000000-0000-4500-b000-000000000000}, 17933) USING TIMESTAMP 11"); + execute("UPDATE %s USING TIMESTAMP 12 SET v0=null, s0={00000000-0000-4600-a000-000000000000, 00000000-0000-4d00-8200-000000000000, 00000000-0000-4f00-9200-000000000000} " + + "WHERE pk0 = -41 AND pk1 = -3934225888295599640 AND ck0 = 0x0dab3b038131efa2"); + + assertRowCount(execute("SELECT * FROM %s WHERE pk0 >= ? LIMIT 81", (byte) -104), 5); + execute("DELETE FROM %s USING TIMESTAMP 13 WHERE pk0 = -64 AND pk1 = 7973592261481566341"); + flush(KEYSPACE); + + beforeAndAfterFlush(() -> + assertRows(execute("SELECT pk0, pk1, ck0 FROM %s WHERE pk0 >= ?", (byte) -104), + row((byte) -62, -5815950741950477880L, ByteBufferUtil.hexToBytes("d1e07b568a7188")), + row((byte) -62, -5815950741950477880L, ByteBufferUtil.hexToBytes("9277e744212e1c4b50")), + row((byte) -62, -5815950741950477880L, ByteBufferUtil.hexToBytes("326f")), + row((byte) -104, -4990846884898776392L, ByteBufferUtil.hexToBytes("f7ac771298eaf1d4")), + row((byte) -41, -3934225888295599640L, null))); + } + + @Test + public void testIgnoreCellDeletions() throws Throwable + { + createTable("CREATE TABLE %s (pk0 boolean, pk1 varint, ck0 tinyint, ck1 varint, s0 list>> static, " + + " s1 map>, frozen>> static, v0 frozen>, uuid>>, " + + " PRIMARY KEY ((pk0, pk1), ck0, ck1)) WITH CLUSTERING ORDER BY (ck0 DESC, ck1 DESC)"); + disableCompaction(KEYSPACE); + createIndex("CREATE INDEX tbl_pk0 ON %s(pk0) USING 'sai'"); + + execute("INSERT INTO %s (pk0, pk1, ck0, ck1, s0, s1, v0) " + + "VALUES (true, 0, 109, 0, [{2.2352903520430565E260: -29214, 2.605618737869944E274: -13041}], " + + " {{00000000-0000-4400-9f00-000000000000, 00000000-0000-4500-9b00-000000000000, 00000000-0000-4b00-bf00-000000000000}: {'18.112.79.221': '-2306623-03-19', '227.58.183.116': '-3929454-04-25'}}, " + + " {{'⭎憢?', '黣偛紑'}: 00000000-0000-4900-8600-000000000000, {'㛽ꓗ', '剢ꮱ死䰀륬ਐ喑ퟚ', '竖䝏爐뷤曀'}: 00000000-0000-4900-bc00-000000000000}) USING TIMESTAMP 1"); + execute("INSERT INTO %s (pk0, pk1, ck0, ck1, s1, v0) " + + "VALUES (true, 0, 114, 742, {{00000000-0000-4000-9a00-000000000000, 00000000-0000-4700-ba00-000000000000}: {'96.31.70.25': '-912836-06-15', '185.90.18.173': '-5257542-01-31', '223.18.191.245': '-4633145-10-30'}}, " + + " {{'뫥㩎뎠ྭẒ'}: 00000000-0000-4800-8600-000000000000}) USING TIMESTAMP 2"); + + // This will result in the creation of erroneous postings if cell deletions are not accounted for: + execute("DELETE v0, s1, s0 FROM %s USING TIMESTAMP 6 WHERE pk0 = true AND pk1 = 0 AND ck0 = 121 AND ck1 = 1"); + + execute("UPDATE %s USING TIMESTAMP 8 SET s0 += [{4.3056056376102396E-169: 22551, 1.439623561042819E208: 20450}, {-2.7900719406964408E-242: 30147, 8.586565205109037E-211: 28721, 4.603864140847754E20: -12814}], " + + " s1 += {{00000000-0000-4200-b900-000000000000, 00000000-0000-4500-ab00-000000000000}: {'2.67.240.121': '-471656-04-17', '134.186.187.51': '-2056459-04-13'}}, " + + " v0={{'?', '蠥╩徰昰弳펠재', '됢簔Ὕ텇⢌យ稭澣'}: 00000000-0000-4d00-8d00-000000000000} " + + "WHERE pk0 = true AND pk1 = 0 AND ck0 = 37 AND ck1 = 0"); + + beforeAndAfterFlush(() -> + assertRows(execute("SELECT pk0, pk1, ck0, ck1 FROM %s WHERE pk0 = ? LIMIT 4", true), + row(true, IntegerType.instance.fromString("0"), ByteType.instance.fromString("114"), IntegerType.instance.fromString("742")), + row(true, IntegerType.instance.fromString("0"), ByteType.instance.fromString("109"), IntegerType.instance.fromString("0")), + row(true, IntegerType.instance.fromString("0"), ByteType.instance.fromString("37"), IntegerType.instance.fromString("0")))); + } + @Test public void testIntersectionOnMixedPostingsOnDelete() throws Throwable { createTable("CREATE TABLE %s (pk0 boolean, pk1 uuid, ck0 date, ck1 smallint, s0 timeuuid static, v0 bigint, v1 float, PRIMARY KEY ((pk0, pk1), ck0, ck1)) WITH CLUSTERING ORDER BY (ck0 DESC, ck1 ASC)"); - + disableCompaction(KEYSPACE); createIndex("CREATE INDEX tbl_pk0 ON %s(pk0) USING 'sai'"); createIndex("CREATE INDEX tbl_ck0 ON %s(ck0) USING 'sai'"); @@ -54,7 +134,7 @@ public void testIntersectionOnMixedPostingsOnDelete() throws Throwable public void testIntersectionOnMixedPostingsOnUpdate() throws Throwable { createTable("CREATE TABLE %s (pk0 boolean, pk1 uuid, ck0 date, ck1 smallint, s0 timeuuid static, v0 bigint, v1 float, PRIMARY KEY ((pk0, pk1), ck0, ck1)) WITH CLUSTERING ORDER BY (ck0 DESC, ck1 ASC)"); - + disableCompaction(KEYSPACE); createIndex("CREATE INDEX tbl_pk0 ON %s(pk0) USING 'sai'"); createIndex("CREATE INDEX tbl_ck0 ON %s(ck0) USING 'sai'"); @@ -74,6 +154,7 @@ public void testIntersectionOnMixedPostingsOnUpdate() throws Throwable public void testIntersectionWithStaticOverlap() throws Throwable { createTable("CREATE TABLE %s (pk0 int, pk1 int, ck0 int, s1 int static, v0 int, PRIMARY KEY((pk0, pk1), ck0))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(pk0) USING 'sai'"); execute("UPDATE %s USING TIMESTAMP 1 SET s1 = 0, v0 = 0 WHERE pk0 = 0 AND pk1 = 1 AND ck0 = 0"); @@ -91,6 +172,7 @@ public void testIntersectionWithStaticOverlap() throws Throwable public void testIntersectionWithStaticUpdate() throws Throwable { createTable("CREATE TABLE %s (pk0 time, pk1 varint, ck0 date, s0 boolean static, s1 text static, v0 boolean, PRIMARY KEY ((pk0, pk1), ck0))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX tbl_pk0 ON %s(pk0) USING 'sai'"); createIndex("CREATE INDEX tbl_s0 ON %s(s0) USING 'sai'"); @@ -116,6 +198,7 @@ public void testIntersectionWithStaticUpdate() throws Throwable public void testCompositePartitionIndex() throws Throwable { createTable("CREATE TABLE %s (pk1 int, pk2 text, val int, PRIMARY KEY((pk1, pk2)))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(pk1) USING 'sai'"); createIndex("CREATE INDEX ON %s(pk2) USING 'sai'"); @@ -168,6 +251,7 @@ public void testCompositePartitionIndex() throws Throwable public void testFilterWithIndexForContains() throws Throwable { createTable("CREATE TABLE %s (k1 int, k2 int, v set, PRIMARY KEY ((k1, k2)))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(k2) USING 'sai'"); execute("INSERT INTO %s (k1, k2, v) VALUES (?, ?, ?)", 0, 0, set(1, 2, 3)); diff --git a/test/unit/org/apache/cassandra/index/sai/cql/StaticColumnIndexTest.java b/test/unit/org/apache/cassandra/index/sai/cql/StaticColumnIndexTest.java index faaf74f5c603..d9dec8e3a761 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/StaticColumnIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/StaticColumnIndexTest.java @@ -20,6 +20,9 @@ import org.junit.Test; +import org.apache.cassandra.db.marshal.BytesType; +import org.apache.cassandra.db.marshal.TimeType; +import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.index.sai.SAITester; public class StaticColumnIndexTest extends SAITester @@ -28,6 +31,7 @@ public class StaticColumnIndexTest extends SAITester public void staticIndexReturnsAllRowsInPartition() throws Throwable { createTable("CREATE TABLE %s (pk int, ck int, val1 int static, val2 int, PRIMARY KEY(pk, ck))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(val1) USING 'sai'"); execute("INSERT INTO %s(pk, ck, val1, val2) VALUES(?, ?, ?, ?)", 1, 1, 2, 1); @@ -42,6 +46,7 @@ public void staticIndexReturnsAllRowsInPartition() throws Throwable public void staticIndexAndNonStaticIndex() throws Throwable { createTable("CREATE TABLE %s (pk int, ck int, val1 int static, val2 int, PRIMARY KEY(pk, ck))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(val1) USING 'sai'"); createIndex("CREATE INDEX ON %s(val2) USING 'sai'"); @@ -57,6 +62,7 @@ public void staticIndexAndNonStaticIndex() throws Throwable public void staticAndNonStaticRangeIntersection() throws Throwable { createTable("CREATE TABLE %s (pk int, ck int, v1 int, s1 int static, PRIMARY KEY(pk, ck))"); + disableCompaction(KEYSPACE); createIndex("CREATE INDEX ON %s(v1) USING 'sai'"); createIndex("CREATE INDEX ON %s(s1) USING 'sai'"); @@ -71,4 +77,42 @@ public void staticAndNonStaticRangeIntersection() throws Throwable beforeAndAfterFlush(() -> assertRowCount(execute("SELECT * FROM %s WHERE pk = ? AND v1 > ? AND s1 = ?", 0, 2, 100), 3)); } + + @Test + public void testTupleAndBlobFiltering() throws Throwable + { + String blobTupleType = createType("CREATE TYPE IF NOT EXISTS %s (f0 blob)"); + String boolTinyTextType = createType("CREATE TYPE IF NOT EXISTS %s (f0 boolean, f1 tinyint, f2 text)"); + createTable("CREATE TABLE %s (pk0 time, pk1 uuid, ck0 uuid, ck1 blob, s0 frozen>>> static, " + + " v0 vector, 3>, v1 frozen, vector>>, " + + " v2 vector, 2>, v3 bigint, PRIMARY KEY ((pk0, pk1), ck0, ck1)) WITH CLUSTERING ORDER BY (ck0 DESC, ck1 DESC)"); + disableCompaction(KEYSPACE); + createIndex("CREATE INDEX tbl_pk1 ON %s(pk1) USING 'sai'"); + createIndex("CREATE INDEX tbl_s0 ON %s(s0) USING 'sai'"); + + execute("INSERT INTO %s (pk0, pk1, ck0, ck1, s0, v0, v1, v2, v3) " + + "VALUES ('02:43:47.716011275', 00000000-0000-4200-b200-000000000000, 00000000-0000-4e00-8400-000000000000, 0xf2791941aea8e469, " + + " (12129, {-2.58545975E14}), [[-1781797567, 330686172], [103364202, 2031130152], [-550709009, 492544493]], " + + " {{f0: 0x34839b8bae653b2bdee8}: [-8431172225521461427, 8894719445990427242]}, [{f0: false, f1: 53, f2: '嵆왛孷쏆䊖恣'}, {f0: true, f1: 21, f2: 'ᨚ?榥쯢?ɚ챛ퟡ'}], 9167463065336786821) USING TIMESTAMP 3"); + execute("UPDATE %s USING TIMESTAMP 4 " + + "SET s0=(23307, {-8.214548E-18}), v0=[[672139924, -1253475201], [353181149, -1829076723], [179355765, 379303855]], " + + " v1={{f0: 0x64850696464d}: [-7485547085069825418, 7795885370802556756], {f0: 0x67633db6f091}: [-8484578637223040646, 8216210044102487771]}, " + + " v2=[{f0: true, f1: 68, f2: '䝿ᝧ䶨푥펟겭매郂쀌'}, {f0: true, f1: 98, f2: '髃爫삿챥卛☓읂ີ?'}], v3=-4626482462417652499 * -7377486305688263453 " + + "WHERE pk0 = '03:36:30.876439626' AND pk1 = 00000000-0000-4000-ad00-000000000000 AND ck0 = 00000000-0000-4000-9f00-000000000000 AND ck1 = 0xa06bb301"); + execute("INSERT INTO %s (pk0, pk1, ck0, ck1, s0, v0, v1, v2, v3) " + + "VALUES ('07:08:47.775161332', 00000000-0000-4800-ad00-000000000000, 00000000-0000-4a00-a500-000000000000, 0xfef0d63ff7, (-15283, {-1.132058E24, 2.9319742E-31}), " + + " [[-335960956, 678086816], [-2139882146, 1011627708], [-55338955, -2094185756]], {{f0: 0xd9c3ab}: [-9002034104664383537, -8074261670215737032]}, " + + " [{f0: true, f1: -79, f2: '霠♘칳⦵ঋ幗䶐'}, {f0: true, f1: 7, f2: '䉻ݹ鞞텔㙠'}], 1885613374025825905) USING TIMESTAMP 5"); + execute("DELETE FROM %s USING TIMESTAMP 6 WHERE pk0 = '14:02:14.975449434' AND pk1 = 00000000-0000-4900-9900-000000000000"); + execute("DELETE FROM %s USING TIMESTAMP 7 WHERE pk0 = '12:15:35.151327231' AND pk1 = 00000000-0000-4500-ac00-000000000000"); + execute("DELETE FROM %s USING TIMESTAMP 8 WHERE pk0 = '07:08:47.775161332' AND pk1 = 00000000-0000-4800-ad00-000000000000 AND ck0 = 00000000-0000-4b00-b000-000000000000 AND ck1 = 0xa4121adb08"); + execute("INSERT INTO %s (pk0, pk1, ck0, ck1, s0, v0, v1, v2, v3) " + + "VALUES ('03:36:30.876439626', 00000000-0000-4000-ad00-000000000000, 00000000-0000-4600-b400-000000000000, 0x63f5, (28387, {-1.18764904E-20}), " + + " [[-441895935, 313114446], [-740629531, -678512740], [1429899934, -1259907921]], {{f0: 0x5df1}: [414225888834712632, -5730196176171247108], " + + " {f0: 0x92c1497d7072b81c91}: [-7587541014989351350, -2813091340484612608]}, [{f0: true, f1: 41, f2: '쎺╇⒀왶'}, {f0: true, f1: -84, f2: '턺䋏篷'}], -1473884563651667176 + 128345915915881356) USING TIMESTAMP 9"); + + beforeAndAfterFlush(() -> assertRows(execute("SELECT pk0, pk1, ck0, ck1 FROM %s WHERE s0 = (28387, {-1.18764904E-20}) AND pk1 = 00000000-0000-4000-ad00-000000000000 AND ck1 = 0xa06bb301 LIMIT 307 ALLOW FILTERING"), + row(TimeType.instance.fromString("03:36:30.876439626"), UUIDType.instance.fromString("00000000-0000-4000-ad00-000000000000"), + UUIDType.instance.fromString("00000000-0000-4000-9f00-000000000000"), BytesType.instance.fromString("a06bb301")))); + } } From 6472340d93c57d358eff72b3ab02e7594872de10 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 9 Apr 2025 13:06:28 -0700 Subject: [PATCH 027/340] AST Harrys multi node tests are flakey when multi cell list happens cross instances patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-20544 --- .../test/cql3/MultiNodeTableWalkBase.java | 53 ++++++++++++++++++- .../test/cql3/MultiNodeTokenConflictTest.java | 19 ++++--- .../test/cql3/SingleNodeTableWalkTest.java | 8 +-- .../cql3/SingleNodeTokenConflictTest.java | 2 +- .../test/cql3/StatefulASTBase.java | 19 ++++++- 5 files changed, 84 insertions(+), 17 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java index 3e9c1195f6a1..da4ac26cbc11 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java @@ -21,12 +21,19 @@ import java.io.IOException; import accord.utils.RandomSource; +import net.bytebuddy.ByteBuddy; +import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; +import net.bytebuddy.implementation.MethodDelegation; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; +import org.apache.cassandra.utils.Shared; +import org.apache.cassandra.utils.TimeUUID; + +import static net.bytebuddy.matcher.ElementMatchers.named; public abstract class MultiNodeTableWalkBase extends SingleNodeTableWalkTest { @@ -53,7 +60,7 @@ protected TableMetadata defineTable(RandomSource rs, String ks) @Override protected Cluster createCluster() throws IOException { - return createCluster(mockMultiNode ? 1 : 3, this::clusterConfig); + return createCluster(mockMultiNode ? 1 : 3); } @Override @@ -66,6 +73,12 @@ protected void clusterConfig(IInstanceConfig c) .set("slow_query_log_timeout", "180s"); } + @Override + protected void clusterInitializer(ClassLoader cl, int node) + { + BBHelper.install(cl, node); + } + @Override protected State createState(RandomSource rs, Cluster cluster) { @@ -131,4 +144,42 @@ protected ConsistencyLevel mutationCl() return ConsistencyLevel.NODE_LOCAL; } } + + /** + * This is not a deterministic clock for TimeUUID, but it's a monotonic clock, which means that any instance that gets + * a TimeUUID from this clock has the propery that its happens-after all other ones cross all instances. + * + * This class came around because TimeUUID.Generator.nextUnixMicros works with milliseconds, and when time doesn't + * move forward (goes back or test is "too fast") then it becomes an instance local bump-counter; this counter allows + * a logically later timeuuid to happens-before a logically earlier one! + */ + @Shared + public static class GlobalClock + { + private static long lastMicros = 0; + public synchronized static long nextUnixMicros() + { + return ++lastMicros; + } + + public synchronized static void reset() + { + // this method isn't actually needed for the property of this class, but it does help isolate any non-deterministic issues + lastMicros = 0; + } + } + + public static class BBHelper + { + static void install(ClassLoader cl, int nodeNumber) + { + new ByteBuddy().rebase(TimeUUID.Generator.class) + .method(named("nextUnixMicros")) + .intercept(MethodDelegation.to(GlobalClock.class)) + .make() + .load(cl, ClassLoadingStrategy.Default.INJECTION); + + GlobalClock.reset(); + } + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java index 2d296277cfd4..969b0756432b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java @@ -24,6 +24,7 @@ import accord.utils.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -46,16 +47,20 @@ protected TableMetadata defineTable(RandomSource rs, String ks) return tbl.unbuild().params(tbl.params.unbuild().readRepair(ReadRepairStrategy.NONE).build()).build(); } + @Override + protected void clusterConfig(IInstanceConfig c) + { + c.set("range_request_timeout", "180s") + .set("read_request_timeout", "180s") + .set("write_request_timeout", "180s") + .set("native_transport_timeout", "180s") + .set("slow_query_log_timeout", "180s"); + } + @Override protected Cluster createCluster() throws IOException { - return createCluster(3, c -> { - c.set("range_request_timeout", "180s") - .set("read_request_timeout", "180s") - .set("write_request_timeout", "180s") - .set("native_transport_timeout", "180s") - .set("slow_query_log_timeout", "180s"); - }); + return createCluster(3); } @Override diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 785db954c801..924bd3eeeb47 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -59,7 +59,6 @@ import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.test.sai.SAIUtil; import org.apache.cassandra.harry.model.BytesPartitionState; import org.apache.cassandra.schema.ColumnMetadata; @@ -350,12 +349,7 @@ protected State createState(RandomSource rs, Cluster cluster) protected Cluster createCluster() throws IOException { - return createCluster(1, this::clusterConfig); - } - - protected void clusterConfig(IInstanceConfig config) - { - + return createCluster(1); } @Test diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java index dd15b99d5520..0f0bef9bfff9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java @@ -240,7 +240,7 @@ protected void preCheck(Property.StatefulBuilder builder) protected Cluster createCluster() throws IOException { - return createCluster(1, i -> {}); + return createCluster(1); } @Test diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index 4a4a1e6a6147..83548ba83475 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -27,6 +27,7 @@ import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -137,9 +138,25 @@ protected static String nextKeyspace() return "ks" + COUNTER.incrementAndGet(); } - protected static Cluster createCluster(int nodeCount, Consumer config) throws IOException + protected void clusterConfig(IInstanceConfig config) + { + + } + + protected void clusterInitializer(ClassLoader cl, int node) + { + + } + + protected Cluster createCluster(int nodeCount) throws IOException + { + return createCluster(nodeCount, this::clusterConfig, this::clusterInitializer); + } + + protected static Cluster createCluster(int nodeCount, Consumer config, BiConsumer instanceInitializer) throws IOException { Cluster cluster = Cluster.build(nodeCount) + .withInstanceInitializer(instanceInitializer) .withConfig(c -> { c.with(Feature.NATIVE_PROTOCOL, Feature.NETWORK, Feature.GOSSIP) // When drop tables or truncate are performed, we attempt to take snapshots. This can be costly and isn't needed by these tests From c18b1e937f06515009170ff72ba0f4f1166d1c11 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Thu, 10 Apr 2025 11:54:03 -0500 Subject: [PATCH 028/340] Bump version, prepare CHANGES --- CHANGES.txt | 3 +++ build.xml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 6e53c8ac93c7..25e7988bc930 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,6 @@ +5.0.5 + + 5.0.4 * Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final (CASSANDRA-20314) * Serialization can lose complex deletions in a mutation with multiple collections in a row (CASSANDRA-20449) diff --git a/build.xml b/build.xml index b2c80ed7d375..c40fc783a357 100644 --- a/build.xml +++ b/build.xml @@ -33,7 +33,7 @@ - + From 040d5e0111cf0b4505b1870ae2f4772378f2caa3 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Thu, 10 Apr 2025 11:54:03 -0500 Subject: [PATCH 029/340] Bump version, prepare CHANGES --- CHANGES.txt | 3 +++ build.xml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index f4b1be2002f8..be125526515d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,6 @@ +5.0.5 + + 5.0.4 * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) diff --git a/build.xml b/build.xml index 611e121f0b12..d8e0ad2e390f 100644 --- a/build.xml +++ b/build.xml @@ -33,7 +33,7 @@ - + From 1f9648a7a8f335e2fcc909f6586012fb0fca09e3 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Thu, 10 Apr 2025 13:18:13 -0500 Subject: [PATCH 030/340] correct CHANGES --- CHANGES.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index be125526515d..fae559164930 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,9 +1,11 @@ 5.0.5 + * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) + * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) +Merged from 4.0: + * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) 5.0.4 - * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) - * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) * Update netty to 4.1.119.Final and netty-tcnative to 2.0.70.Final (CASSANDRA-20314) * Serialization can lose complex deletions in a mutation with multiple collections in a row (CASSANDRA-20449) * Improve error messages when initializing auth classes (CASSANDRA-20368) @@ -29,7 +31,6 @@ Merged from 4.1: * Fix SimpleClient ability to release acquired capacity (CASSANDRA-20202) * Fix WaitQueue.Signal.awaitUninterruptibly may block forever if invoking thread is interrupted (CASSANDRA-20084) Merged from 4.0: - * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) * Suppress CVE-2025-25193 (CASSANDRA-20504) * Include in source tree and build packages a Snyk policy file that lists known false positives (CASSANDRA-20319) From bcd5c483079f3c969c775200a218d90dfa418825 Mon Sep 17 00:00:00 2001 From: Jordan West Date: Wed, 2 Apr 2025 15:35:58 -0700 Subject: [PATCH 031/340] Fix test failure with negative position in ThreadLocalReadAheadBufferTest patch by Jordan West; reviewed by Jon Haddad, Dmitry Konstantinov for CASSANDRA-20507 --- .../io/util/ThreadLocalReadAheadBufferTest.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/unit/org/apache/cassandra/io/util/ThreadLocalReadAheadBufferTest.java b/test/unit/org/apache/cassandra/io/util/ThreadLocalReadAheadBufferTest.java index 4d43017b2a5a..9bb957e3a5e1 100644 --- a/test/unit/org/apache/cassandra/io/util/ThreadLocalReadAheadBufferTest.java +++ b/test/unit/org/apache/cassandra/io/util/ThreadLocalReadAheadBufferTest.java @@ -41,6 +41,7 @@ import org.quicktheories.WithQuickTheories; import org.quicktheories.core.Gen; +import static java.lang.Math.max; import static org.apache.cassandra.config.CassandraRelevantProperties.JAVA_IO_TMPDIR; public class ThreadLocalReadAheadBufferTest implements WithQuickTheories @@ -48,16 +49,17 @@ public class ThreadLocalReadAheadBufferTest implements WithQuickTheories private static final int numFiles = 5; private static final File[] files = new File[numFiles]; private static final Logger logger = LoggerFactory.getLogger(ThreadLocalReadAheadBufferTest.class); + private static Integer seed; @BeforeClass public static void setup() { - int seed = new Random().nextInt(); + seed = new Random().nextInt(); logger.info("Seed: {}", seed); for (int i = 0; i < numFiles; i++) { - int size = new Random().nextInt((Integer.MAX_VALUE - 1) / 8); + int size = new Random(seed).nextInt((Integer.MAX_VALUE - 1) / 8); files[i] = writeFile(seed, size); } } @@ -81,7 +83,7 @@ public static void cleanup() @Test public void testLastBlockReads() { - qt().forAll(lastBlockReads()) + qt().withFixedSeed(seed).forAll(lastBlockReads()) .checkAssert(this::testReads); } @@ -89,7 +91,7 @@ public void testLastBlockReads() public void testReadsLikeChannelProxy() { - qt().forAll(randomReads()) + qt().withFixedSeed(seed).forAll(reads()) .checkAssert(this::testReads); } @@ -127,7 +129,7 @@ private void testReads(InputData propertyInputs) } } - private Gen lastBlockReads() + private Gen reads() { return arbitrary().pick(List.of(files)) .flatMap((file) -> @@ -137,12 +139,12 @@ private Gen lastBlockReads() } - private Gen randomReads() + private Gen lastBlockReads() { int blockSize = new DataStorageSpec.IntKibibytesBound("256KiB").toBytes(); return arbitrary().pick(List.of(files)) .flatMap((file) -> - lists().of(longs().between(fileSize(file) - blockSize, fileSize(file)).zip(integers().between(1, 100), Pair::create)) + lists().of(longs().between(max(0, fileSize(file) - blockSize), fileSize(file)).zip(integers().between(1, 100), Pair::create)) .ofSizeBetween(5, 10) .map(positionsAndLengths -> new InputData(file, positionsAndLengths))); From 725e4ba3eb1dfd9bdc1facd6a75072e89ba7ea86 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Wed, 9 Apr 2025 14:22:42 -0500 Subject: [PATCH 032/340] Avoid purging deletions in RowFilter when reconciliation is required patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-20541 --- CHANGES.txt | 1 + .../apache/cassandra/db/filter/RowFilter.java | 5 ++- .../TableLevelIncrementalBackupsTest.java | 6 ---- .../distributed/test/TestBaseImpl.java | 6 ++++ .../sai/ReplicaFilteringWithStaticsTest.java | 36 +++++++++++++++++++ 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index fae559164930..d7a7a61e1901 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Avoid purging deletions in RowFilter when reconciliation is required (CASSANDRA-20541) * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) Merged from 4.0: diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index 9b83a56f23f5..f1b095920f4a 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -257,7 +257,10 @@ protected BaseRowIterator applyToPartition(BaseRowIterator partition) @Override public Row applyToRow(Row row) { - Row purged = row.purge(DeletionPurger.PURGE_ALL, nowInSec, metadata.enforceStrictLiveness()); + // If we purge deletions when reconciliation is required, we hide information replica filtering + // protection would require to filter rows that are no longer matches are the coordinator. + Row purged = needsReconciliation() ? row : row.purge(DeletionPurger.PURGE_ALL, nowInSec, metadata.enforceStrictLiveness()); + if (purged == null) return null; diff --git a/test/distributed/org/apache/cassandra/distributed/test/TableLevelIncrementalBackupsTest.java b/test/distributed/org/apache/cassandra/distributed/test/TableLevelIncrementalBackupsTest.java index 2bf6c5f35068..833b9484d051 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TableLevelIncrementalBackupsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TableLevelIncrementalBackupsTest.java @@ -141,12 +141,6 @@ private void flush(Cluster cluster, String keyspace) cluster.get(i).flush(keyspace); } - private void disableCompaction(Cluster cluster, String keyspace, String table) - { - for (int i = 1; i < cluster.size() + 1; i++) - cluster.get(i).nodetool("disableautocompaction", keyspace, table); - } - private static void assertBackupSSTablesCount(Cluster cluster, int expectedTablesCount, boolean enable, String ks, String... tableNames) { for (int i = 1; i < cluster.size() + 1; i++) diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index 35c59046eae2..6f36408e9eb4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -212,4 +212,10 @@ public static void fixDistributedSchemas(Cluster cluster) // in real live repair is needed in this case, but in the test case it doesn't matter if the tables loose // anything, so ignoring repair to speed up the tests. } + + protected static void disableCompaction(Cluster cluster, String keyspace, String table) + { + for (int i = 1; i < cluster.size() + 1; i++) + cluster.get(i).nodetool("disableautocompaction", keyspace, table); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringWithStaticsTest.java b/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringWithStaticsTest.java index 59cc1cc6a51f..189215771201 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringWithStaticsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringWithStaticsTest.java @@ -44,6 +44,42 @@ public static void setUpCluster() throws IOException CLUSTER = init(Cluster.build(3).withConfig(config -> config.set("hinted_handoff_enabled", false).with(GOSSIP).with(NETWORK)).start()); } + @Test + public void testRowFilterDeletePurging() + { + testRowFilterDeletePurging(false); + } + + @Test + public void testRowFilterDeletePurgingSAI() + { + testRowFilterDeletePurging(true); + } + + public void testRowFilterDeletePurging(boolean sai) + { + String table = "row_filtering_delete_purging" + (sai ? "_sai" : ""); + + CLUSTER.schemaChange(withKeyspace("CREATE TABLE %s." + table + " (pk0 double, ck0 boolean, s0 ascii static, v0 ascii, " + + "PRIMARY KEY (pk0, ck0)) WITH CLUSTERING ORDER BY (ck0 DESC) AND read_repair = 'NONE'")); + disableCompaction(CLUSTER, KEYSPACE, table); + + if (sai) + { + CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s." + table + "(s0) USING 'sai'")); + SAIUtil.waitForIndexQueryable(CLUSTER, KEYSPACE); + } + + CLUSTER.get(3).executeInternal(withKeyspace("UPDATE %s." + table + " USING TIMESTAMP 1 SET s0='foo', v0='c' WHERE pk0 = 2.9 AND ck0 IN (false, true)")); + + // This delete must be resolved by RFP to eliminate the row with ck0 = true from node 3: + CLUSTER.get(1).executeInternal(withKeyspace("DELETE FROM %s." + table + " USING TIMESTAMP 2 WHERE pk0 = 2.9 AND ck0 = true")); + CLUSTER.get(1).executeInternal(withKeyspace("INSERT INTO %s." + table + " (pk0, ck0, s0, v0) VALUES (2.9, false, 'bar', 'xyz') USING TIMESTAMP 3")); + + String select = withKeyspace("SELECT ck0 FROM %s." + table + " WHERE s0 = 'bar' ALLOW FILTERING"); + assertRows(CLUSTER.coordinator(1).executeWithPaging(select, ALL, 100), row(false)); + } + @Test public void testStaticMatchWithPartitionDelete() { From 7a888149dff4afaea8753571097bd8bca6a4fbfd Mon Sep 17 00:00:00 2001 From: Sam Tunnicliffe Date: Fri, 12 Jan 2024 13:00:00 +0000 Subject: [PATCH 033/340] Support topology-safe changes to Datacenter & Rack for live nodes Patch by Sam Tunnicliffe; reviewed by Marcus Eriksson for CASSANDRA-20528 --- CHANGES.txt | 1 + .../apache/cassandra/db/SystemKeyspace.java | 7 + .../cassandra/service/StorageService.java | 18 ++ .../service/StorageServiceMBean.java | 2 + .../apache/cassandra/tcm/Transformation.java | 3 +- .../tcm/listeners/LegacyStateListener.java | 7 +- .../listeners/PlacementsChangeListener.java | 2 +- .../cassandra/tcm/membership/Directory.java | 25 +- .../cassandra/tcm/membership/Location.java | 12 + .../tcm/ownership/DataPlacement.java | 5 + .../tcm/ownership/DataPlacements.java | 18 ++ .../tcm/ownership/ReplicaGroups.java | 15 + .../cassandra/tcm/ownership/TokenMap.java | 4 +- .../tcm/ownership/VersionedEndpoints.java | 7 +- .../sequences/CancelCMSReconfiguration.java | 2 +- .../tcm/transformations/AlterSchema.java | 2 +- .../tcm/transformations/AlterTopology.java | 193 +++++++++++++ .../org/apache/cassandra/tools/NodeTool.java | 1 + .../tools/nodetool/AlterTopology.java | 49 ++++ .../test/log/AlterTopologyTest.java | 267 ++++++++++++++++++ .../log/MetadataChangeSimulationTest.java | 4 +- .../distributed/test/log/ModelState.java | 14 + .../test/log/ReconfigureCMSTest.java | 4 +- .../test/log/SimulatedOperation.java | 21 ++ .../harry/model/TokenPlacementModel.java | 17 ++ .../SchemaChangeDuringRangeMovementTest.java | 48 +--- .../service/AlterTopologyArgParsingTest.java | 149 ++++++++++ .../tcm/membership/DirectoryTest.java | 126 +++++++++ .../InProgressSequenceCancellationTest.java | 36 +-- .../tcm/sequences/SequencesUtils.java | 72 +++++ 30 files changed, 1036 insertions(+), 95 deletions(-) create mode 100644 src/java/org/apache/cassandra/tcm/transformations/AlterTopology.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/AlterTopology.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/log/AlterTopologyTest.java create mode 100644 test/unit/org/apache/cassandra/service/AlterTopologyArgParsingTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/membership/DirectoryTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 2fb55b032e11..26eb4a062cec 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Support topology-safe changes to Datacenter & Rack for live nodes (CASSANDRA-20528) * Add SSTableIntervalTree latency metric (CASSANDRA-20502) * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956) * Avoid NPE during cms initialization abort (CASSANDRA-20527) diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 81186c512a7f..36fc953360c1 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -975,6 +975,13 @@ public static synchronized void updateRack(String rack) executeInternal(format(req, LOCAL, LOCAL), rack); } + public static synchronized void updateLocation(Location location) + { + String req = "INSERT INTO system.%s (key, data_center, rack) VALUES ('%s', ?, ?)"; + executeInternal(format(req, LOCAL, LOCAL), location.datacenter, location.rack); + forceBlockingFlush(LOCAL); + } + public static Set tokensAsSet(Collection tokens) { if (tokens.isEmpty()) diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index e8a3b40a86d3..bc914a9e55be 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -186,6 +186,7 @@ import org.apache.cassandra.tcm.compatibility.GossipHelper; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; @@ -199,6 +200,7 @@ import org.apache.cassandra.tcm.sequences.SingleNodeSequences; import org.apache.cassandra.tcm.transformations.Assassinate; import org.apache.cassandra.tcm.transformations.CancelInProgressSequence; +import org.apache.cassandra.tcm.transformations.AlterTopology; import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.tcm.transformations.Startup; import org.apache.cassandra.tcm.transformations.Unregister; @@ -5553,4 +5555,20 @@ public boolean getPaxosRepairRaceWait() { return DatabaseDescriptor.getPaxosRepairRaceWait(); } + + public void alterTopology(String changes) + { + Map updates = AlterTopology.parseArgs(changes, ClusterMetadata.current().directory); + logger.info("Received request to modify rack assignments. Proposed changes: {}", updates); + if (updates.isEmpty()) + return; + + AlterTopology transform = new AlterTopology(updates, ClusterMetadataService.instance().placementProvider()); + ClusterMetadataService.instance() + .commit(transform, + m -> { logger.info("Rack changes committed successfully"); return m; }, + (c, r) -> { + throw new IllegalArgumentException("Unable to commit rack changes: " + r); + }); + } } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index b738ecd48678..74017b257995 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1360,4 +1360,6 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e void setPaxosRepairRaceWait(boolean paxosRepairCoordinatorWait); boolean getPaxosRepairRaceWait(); + // Comma delimited list of "nodeId=dc:rack" or "endpoint=dc:rack" + void alterTopology(String updates); } diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index 8cfda01e26c0..864d9a5d94fa 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -219,7 +219,8 @@ enum Kind PREPARE_SIMPLE_CMS_RECONFIGURATION(31, () -> PrepareCMSReconfiguration.Simple.serializer), PREPARE_COMPLEX_CMS_RECONFIGURATION(32, () -> PrepareCMSReconfiguration.Complex.serializer), ADVANCE_CMS_RECONFIGURATION(33, () -> AdvanceCMSReconfiguration.serializer), - CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer) + CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer), + ALTER_TOPOLOGY(35, () -> AlterTopology.serializer), ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/listeners/LegacyStateListener.java b/src/java/org/apache/cassandra/tcm/listeners/LegacyStateListener.java index f0fced7ae6d5..798583a533f8 100644 --- a/src/java/org/apache/cassandra/tcm/listeners/LegacyStateListener.java +++ b/src/java/org/apache/cassandra/tcm/listeners/LegacyStateListener.java @@ -115,6 +115,9 @@ public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean // state for the local node. Gossiper.instance.maybeInitializeLocalState(SystemKeyspace.incrementAndGetGeneration()); Gossiper.instance.addLocalApplicationState(SCHEMA, StorageService.instance.valueFactory.schema(next.schema.getVersion())); + // if the local node's location has changed, update system.local. + if (!next.directory.location(change).equals(prev.directory.location(change))) + SystemKeyspace.updateLocation(next.directory.location(change)); } if (next.directory.peerState(change) == REGISTERED) @@ -181,6 +184,8 @@ private boolean directoryEntryChangedFor(NodeId nodeId, Directory prev, Director { return prev.peerState(nodeId) != next.peerState(nodeId) || !Objects.equals(prev.getNodeAddresses(nodeId), next.getNodeAddresses(nodeId)) || - !Objects.equals(prev.version(nodeId), next.version(nodeId)); + !Objects.equals(prev.version(nodeId), next.version(nodeId)) || + !Objects.equals(prev.location(nodeId), next.location(nodeId)); + } } diff --git a/src/java/org/apache/cassandra/tcm/listeners/PlacementsChangeListener.java b/src/java/org/apache/cassandra/tcm/listeners/PlacementsChangeListener.java index 80da4ec844d1..605b52637813 100644 --- a/src/java/org/apache/cassandra/tcm/listeners/PlacementsChangeListener.java +++ b/src/java/org/apache/cassandra/tcm/listeners/PlacementsChangeListener.java @@ -34,7 +34,7 @@ public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean private boolean shouldInvalidate(ClusterMetadata prev, ClusterMetadata next) { if (!prev.placements.lastModified().equals(next.placements.lastModified()) && - !prev.placements.equals(next.placements)) // <- todo should we update lastModified if the result is the same? + !prev.placements.equivalentTo(next.placements)) // <- todo should we update lastModified if the result is the same? return true; if (prev.schema.getKeyspaces().size() != next.schema.getKeyspaces().size()) diff --git a/src/java/org/apache/cassandra/tcm/membership/Directory.java b/src/java/org/apache/cassandra/tcm/membership/Directory.java index 51ab84c52051..436ded0ab319 100644 --- a/src/java/org/apache/cassandra/tcm/membership/Directory.java +++ b/src/java/org/apache/cassandra/tcm/membership/Directory.java @@ -237,7 +237,6 @@ public Directory withRackAndDC(NodeId id) { InetAddressAndPort endpoint = peers.get(id); Location location = locations.get(id); - BTreeMultimap rackEP = (BTreeMultimap) racksByDC.get(location.datacenter); if (rackEP == null) rackEP = BTreeMultimap.empty(); @@ -268,6 +267,26 @@ public Directory withoutRackAndDC(NodeId id) newRacksByDC); } + public Directory withUpdatedRackAndDc(NodeId id, Location location) + { + if (!peers.containsKey(id)) + throw new IllegalArgumentException(String.format("Node %s has no registered location to update", id)); + + return withoutRackAndDC(id).withLocation(id, location).withRackAndDC(id); + } + + private Directory withLocation(NodeId id, Location location) + { + if (!locations.containsKey(id)) + throw new IllegalArgumentException(String.format("Node %s has no registered location to update", id)); + + if (locations.get(id).equals(location)) + return this; + + return new Directory(nextId, lastModified, peers, locations.withForce(id, location), states, versions, hostIds, + addresses, endpointsByDC, racksByDC); + } + public Directory without(NodeId id) { InetAddressAndPort endpoint = peers.get(id); @@ -665,7 +684,7 @@ public boolean equals(Object o) Directory directory = (Directory) o; return Objects.equals(lastModified, directory.lastModified) && - isEquivalent(directory); + equivalentTo(directory); } private static Pair minMaxVersions(BTreeMap states, BTreeMap versions) @@ -700,7 +719,7 @@ public int hashCode() * does not check equality of lastModified */ @VisibleForTesting - public boolean isEquivalent(Directory directory) + public boolean equivalentTo(Directory directory) { return nextId == directory.nextId && Objects.equals(peers, directory.peers) && diff --git a/src/java/org/apache/cassandra/tcm/membership/Location.java b/src/java/org/apache/cassandra/tcm/membership/Location.java index faf8230d94fe..08ad29dde983 100644 --- a/src/java/org/apache/cassandra/tcm/membership/Location.java +++ b/src/java/org/apache/cassandra/tcm/membership/Location.java @@ -67,6 +67,18 @@ public String toString() return datacenter + '/' + rack; } + public static Location fromString(String value) + { + if (value == null || value.isEmpty()) + return null; + + String[] parts = value.split(":"); + if (parts.length < 2) + throw new IllegalArgumentException("Invalid datacenter:rack - " + value); + else + return new Location(parts[0].trim(), parts[1].trim()); + } + public static class Serializer implements MetadataSerializer { public void serialize(Location t, DataOutputPlus out, Version version) throws IOException diff --git a/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java b/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java index f42d8b54792e..12920d68624d 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java +++ b/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java @@ -184,6 +184,11 @@ public int hashCode() return Objects.hash(reads, writes); } + public boolean equivalentTo(DataPlacement other) + { + return reads.equivalentTo(other.reads) && writes.equivalentTo(other.writes); + } + public static class Serializer implements MetadataSerializer { private final IPartitioner partitioner; diff --git a/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java b/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java index 988d2b1bcb81..b89ecde7d329 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java +++ b/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java @@ -141,6 +141,24 @@ public String toString() '}'; } + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (!(o instanceof DataPlacements)) return false; + DataPlacements that = (DataPlacements) o; + return this.map.equals(that.map); + } + + public boolean equivalentTo(DataPlacements other) + { + if (!map.keySet().equals(other.map.keySet())) + return false; + return map.entrySet() + .stream() + .allMatch(e -> e.getValue().equivalentTo(other.get(e.getKey()))); + } + public static DataPlacements sortReplicaGroups(DataPlacements placements, Comparator comparator) { Builder builder = DataPlacements.builder(placements.size()); diff --git a/src/java/org/apache/cassandra/tcm/ownership/ReplicaGroups.java b/src/java/org/apache/cassandra/tcm/ownership/ReplicaGroups.java index adc26ff820c9..33d160fa10a8 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/ReplicaGroups.java +++ b/src/java/org/apache/cassandra/tcm/ownership/ReplicaGroups.java @@ -534,4 +534,19 @@ public int hashCode() { return Objects.hash(ranges, endpoints); } + + public boolean equivalentTo(ReplicaGroups other) + { + if (!ranges.equals(other.ranges)) + return false; + + for (int i = 0; i < ranges.size(); i++) + { + EndpointsForRange e1 = endpoints.get(i).get(); + EndpointsForRange e2 = other.forRange(ranges.get(i)).get(); + if (e1.size() != e2.size() || !e1.stream().allMatch(e2::contains)) + return false; + } + return true; + } } diff --git a/src/java/org/apache/cassandra/tcm/ownership/TokenMap.java b/src/java/org/apache/cassandra/tcm/ownership/TokenMap.java index c32f6c351c1a..c17d91ae4819 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/TokenMap.java +++ b/src/java/org/apache/cassandra/tcm/ownership/TokenMap.java @@ -255,7 +255,7 @@ public boolean equals(Object o) if (!(o instanceof TokenMap)) return false; TokenMap tokenMap = (TokenMap) o; return Objects.equals(lastModified, tokenMap.lastModified) && - isEquivalent(tokenMap); + equivalentTo(tokenMap); } @Override @@ -269,7 +269,7 @@ public int hashCode() * * does not check equality of lastModified */ - public boolean isEquivalent(TokenMap tokenMap) + public boolean equivalentTo(TokenMap tokenMap) { return Objects.equals(map, tokenMap.map) && Objects.equals(partitioner, tokenMap.partitioner); diff --git a/src/java/org/apache/cassandra/tcm/ownership/VersionedEndpoints.java b/src/java/org/apache/cassandra/tcm/ownership/VersionedEndpoints.java index 90148f2c836e..2f429138d2d4 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/VersionedEndpoints.java +++ b/src/java/org/apache/cassandra/tcm/ownership/VersionedEndpoints.java @@ -116,7 +116,9 @@ public boolean equals(Object o) if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ForRange forRange = (ForRange) o; - return Objects.equals(endpointsForRange.sorted(Replica::compareTo), forRange.endpointsForRange.sorted(Replica::compareTo)); + return lastModified.equals(forRange.lastModified) && + Objects.equals(endpointsForRange.sorted(Replica::compareTo), + forRange.endpointsForRange.sorted(Replica::compareTo)); } public boolean isEmpty() @@ -184,7 +186,8 @@ public boolean equals(Object o) if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ForToken forToken = (ForToken) o; - return Objects.equals(endpointsForToken, forToken.endpointsForToken); + return lastModified.equals(forToken.lastModified) && + Objects.equals(endpointsForToken, forToken.endpointsForToken); } public boolean isEmpty() diff --git a/src/java/org/apache/cassandra/tcm/sequences/CancelCMSReconfiguration.java b/src/java/org/apache/cassandra/tcm/sequences/CancelCMSReconfiguration.java index 3d6499f61b06..665c6ec797ca 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/CancelCMSReconfiguration.java +++ b/src/java/org/apache/cassandra/tcm/sequences/CancelCMSReconfiguration.java @@ -77,7 +77,7 @@ public Result execute(ClusterMetadata prev) .withoutWriteReplica(prev.nextEpoch(), pendingReplica) .build(); } - if (!placement.reads.equals(placement.writes)) + if (!placement.reads.equivalentTo(placement.writes)) return new Rejected(ExceptionCode.INVALID, String.format("Placements will be inconsistent if this transformation is applied:\nReads %s\nWrites: %s", placement.reads, placement.writes)); diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java index cec1d42ca86d..faa86e357b6d 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java @@ -224,7 +224,7 @@ public final Result execute(ClusterMetadata prev) calculatedPlacements.forEach((params, newPlacement) -> { DataPlacement previousPlacement = prev.placements.get(params); // Preserve placement versioning that has resulted from natural application where possible - if (previousPlacement.equals(newPlacement)) + if (previousPlacement.equivalentTo(newPlacement)) newPlacementsBuilder.with(params, previousPlacement); else newPlacementsBuilder.with(params, newPlacement); diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterTopology.java b/src/java/org/apache/cassandra/tcm/transformations/AlterTopology.java new file mode 100644 index 000000000000..e247f66a8805 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterTopology.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.tcm.ownership.PlacementProvider; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; + +public class AlterTopology implements Transformation +{ + private static final Logger logger = LoggerFactory.getLogger(AlterTopology.class); + public static final Serializer serializer = new Serializer(); + + private final Map updates; + private final PlacementProvider placementProvider; + + public AlterTopology(Map updates, PlacementProvider placementProvider) + { + this.updates = updates; + this.placementProvider = placementProvider; + } + + public static Map parseArgs(String args, Directory directory) + { + Map asMap = new HashMap<>(); + for (String change : args.split(",")) + { + String[] parts = change.trim().split("="); + if (parts.length != 2) + throw new IllegalArgumentException("Invalid specification: " + change); + + if (parts[0].isEmpty() || parts[1].isEmpty()) + throw new IllegalArgumentException("Invalid specification: " + change); + + NodeId id = getNodeIdFromString(parts[0].trim(), directory); + if (asMap.containsKey(id)) + throw new IllegalArgumentException("Multiple updates for node " + id + " (" + parts[0].trim() + " )"); + asMap.put(getNodeIdFromString(parts[0].trim(), directory), Location.fromString(parts[1].trim())); + } + return asMap; + } + + private static NodeId getNodeIdFromString(String s, Directory directory) + { + // first try to parse the id as a node id, either in UUID or int form + try + { + return NodeId.fromString(s); + } + catch (Exception e) + { + // fall back to trying the supplied id as an endpoint + try + { + InetAddressAndPort endpoint = InetAddressAndPort.getByName(s); + return directory.peerId(endpoint); + } + catch (UnknownHostException u) + { + throw new IllegalArgumentException("Invalid node identifier supplied: " + s); + } + + } + } + + @Override + public Kind kind() + { + return Kind.ALTER_TOPOLOGY; + } + + @Override + public Result execute(ClusterMetadata prev) + { + // Check no inflight range movements + if (!prev.lockedRanges.locked.isEmpty()) + return new Rejected(INVALID, "The requested topology changes cannot be executed while there are ongoing range movements."); + + Directory dir = prev.directory; + // Check all node ids are present + Set missing = updates.keySet() + .stream() + .filter(location -> (null == dir.location(location))) + .collect(Collectors.toSet()); + if (!missing.isEmpty()) + return new Rejected(INVALID, String.format("Some updates specify an unregistered node: %s", missing)); + + // Validate there will be no change to placements + Directory updated = prev.directory; + for (Map.Entry update : updates.entrySet()) + updated = updated.withUpdatedRackAndDc(update.getKey(), update.getValue()); + ClusterMetadata proposed = prev.transformer().with(updated).build().metadata; + DataPlacements proposedPlacements = placementProvider.calculatePlacements(prev.placements.lastModified(), + proposed.tokenMap.toRanges(), + proposed, + proposed.schema.getKeyspaces()); + if (!proposedPlacements.equivalentTo(prev.placements)) + { + logger.info("Rejecting topology modifications which would materially change data placements: {}", updates); + return new Rejected(INVALID, "Proposed updates modify data placements, violating consistency guarantees"); + } + + ClusterMetadata.Transformer next = prev.transformer().with(updated); + return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); + } + + + @Override + public String toString() + { + return "AlterTopology{" + + "updates=" + updates + + '}'; + } + + static class Serializer implements AsymmetricMetadataSerializer + { + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + assert t instanceof AlterTopology; + AlterTopology alterTopology = (AlterTopology)t; + int size = alterTopology.updates.size(); + out.writeInt(size); + for (Map.Entry entry : alterTopology.updates.entrySet()) + { + NodeId.serializer.serialize(entry.getKey(), out, version); + Location.serializer.serialize(entry.getValue(), out, version); + } + } + + public AlterTopology deserialize(DataInputPlus in, Version version) throws IOException + { + int size = in.readInt(); + Map updates = new HashMap<>(size); + for (int i = 0; i < size; i++) + updates.put(NodeId.serializer.deserialize(in, version), Location.serializer.deserialize(in, version)); + return new AlterTopology(updates, ClusterMetadataService.instance().placementProvider()); + } + + public long serializedSize(Transformation t, Version version) + { + assert t instanceof AlterTopology; + AlterTopology alterTopology = (AlterTopology) t; + long size = TypeSizes.sizeof(alterTopology.updates.size()); + for (Map.Entry entry : alterTopology.updates.entrySet()) + { + size += NodeId.serializer.serializedSize(entry.getKey(), version); + size += Location.serializer.serializedSize(entry.getValue(), version); + } + return size; + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index d7bcc25b7110..1cc12f38827a 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -95,6 +95,7 @@ public int execute(String... args) { List> commands = newArrayList( AbortBootstrap.class, + AlterTopology.class, Assassinate.class, CassHelp.class, CIDRFilteringStats.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/AlterTopology.java b/src/java/org/apache/cassandra/tools/nodetool/AlterTopology.java new file mode 100644 index 000000000000..e8078d3d5784 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/AlterTopology.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.tools.nodetool; + +import java.util.ArrayList; +import java.util.List; + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool.NodeToolCmd; + +import static com.google.common.base.Preconditions.checkArgument; + +@Command(name = "altertopology", description = "Modify the datacenter and/or rack of one or more nodes") +public class AlterTopology extends NodeToolCmd +{ + @Arguments(usage = " [...]", description = "One or more node identifiers, which may be either a node id, host id or broadcast address, each with a target dc:rack") + private List args = new ArrayList<>(); + + @Override + public void execute(NodeProbe probe) + { + checkArgument(!args.isEmpty(), "Invalid arguments; no changes specified"); + try + { + probe.getStorageService().alterTopology(String.join(",", args)); + } + catch (Exception e) + { + throw new IllegalArgumentException(e.getMessage()); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/AlterTopologyTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/AlterTopologyTest.java new file mode 100644 index 000000000000..85c7040a58a7 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/log/AlterTopologyTest.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.log; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.Test; + +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.exceptions.ExceptionCode; +import org.apache.cassandra.harry.SchemaSpec; +import org.apache.cassandra.harry.dsl.HistoryBuilder; +import org.apache.cassandra.harry.dsl.ReplayingHistoryBuilder; +import org.apache.cassandra.harry.execution.InJvmDTestVisitExecutor; +import org.apache.cassandra.harry.gen.Generator; +import org.apache.cassandra.harry.gen.Generators; +import org.apache.cassandra.harry.gen.SchemaGenerators; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.PlacementProvider; +import org.apache.cassandra.tcm.sequences.SequencesUtils.ClearLockedRanges; +import org.apache.cassandra.tcm.sequences.SequencesUtils.LockRanges; +import org.apache.cassandra.tcm.transformations.AlterTopology; +import org.apache.cassandra.tcm.transformations.CustomTransformation; +import org.awaitility.Awaitility; +import org.awaitility.core.ConditionFactory; + +import static java.time.Duration.ofSeconds; +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.shared.ClusterUtils.waitForCMSToQuiesce; +import static org.apache.cassandra.harry.checker.TestHelper.withRandom; +import static org.junit.Assert.assertEquals; + +public class AlterTopologyTest extends FuzzTestBase +{ + @Test + public void testTopologyChanges() throws Exception + { + Generator schemaGen = SchemaGenerators.schemaSpecGen(KEYSPACE, "change_topology_test", 1000); + try (Cluster cluster = builder().withTokenSupplier(TokenSupplier.evenlyDistributedTokens(4)) + .withRack("dc1", "rack1", 1) + .withRack("dc1", "rack2", 1) + .withRack("dc1", "rack3", 1) + .withRack("dc1", "rack4", 1) + .withConfig(config -> config.with(GOSSIP)) + .withNodes(4) + .start()) + { + IInvokableInstance cmsInstance = cluster.get(1); + + withRandom(rng -> { + SchemaSpec schema = schemaGen.generate(rng); + Generators.TrackingGenerator pkGen = Generators.tracking(Generators.int32(0, Math.min(schema.valueGenerators.pkPopulation(), 1000))); + Generator ckGen = Generators.int32(0, Math.min(schema.valueGenerators.ckPopulation(), 1000)); + + HistoryBuilder history = new ReplayingHistoryBuilder(schema.valueGenerators, + (hb) -> InJvmDTestVisitExecutor.builder() + .nodeSelector(i -> 1) + .build(schema, hb, cluster)); + history.custom(() -> { + cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + + " WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1' : 3 };"); + cluster.schemaChange(schema.compile()); + waitForCMSToQuiesce(cluster, cmsInstance); + }, "Setup"); + + + Runnable writeAndValidate = () -> { + for (int i = 0; i < 2000; i++) + history.insert(pkGen.generate(rng), ckGen.generate(rng)); + + for (int pk : pkGen.generated()) + history.selectPartition(pk); + }; + writeAndValidate.run(); + + cluster.forEach(i -> i.runOnInstance(() -> { + CustomTransformation.registerExtension(LockRanges.NAME, LockRanges.serializer); + CustomTransformation.registerExtension(ClearLockedRanges.NAME, ClearLockedRanges.serializer); + })); + + // a dc change which affects placements is not allowed, so expect a rejection + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + NodeId id = ClusterMetadata.current().myNodeId(); + Map updates = new HashMap<>(); + updates.put(id, new Location("dcX", "rack1")); + assertAlterTopologyRejection(pp, updates, "Proposed updates modify data placements"); + }); + }, "DC change affecting placements"); + + // a rack change which affects placements is also not allowed + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + NodeId id = ClusterMetadata.current().myNodeId(); + Map updates = new HashMap<>(); + updates.put(id, new Location("dc1", "rack2")); + assertAlterTopologyRejection(pp, updates, "Proposed updates modify data placements"); + }); + },"Rack change affecting placements "); + + // submit an update which would not modify placements so would normally be accepted + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + NodeId id = ClusterMetadata.current().myNodeId(); + Map updates = new HashMap<>(); + updates.put(id, new Location("dc1", "rack99")); + // if there are locked ranges, implying in-progress range movements, any update is rejected + ClusterMetadataService.instance().commit(new CustomTransformation(LockRanges.NAME, new LockRanges())); + assertAlterTopologyRejection(pp, updates, "The requested topology changes cannot be executed while there are ongoing range movements"); + + // but if no movements are in flight, the update is allowed + ClusterMetadataService.instance().commit(new CustomTransformation(ClearLockedRanges.NAME, new ClearLockedRanges())); + ClusterMetadataService.instance().commit(new AlterTopology(updates, pp)); + if (!ClusterMetadata.current().directory.location(id).rack.equals("rack99")) + throw new AssertionError("Expected rack to have changed"); + }); + }, "Rack change not affecting placements"); + + // changing multiple/all racks atomically + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + Map updates = new HashMap<>(); + Directory dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + updates.put(nodeId, new Location("dc1", "rack" + (nodeId.id() + 100))); + + ClusterMetadataService.instance().commit(new AlterTopology(updates, pp)); + dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + if (!ClusterMetadata.current().directory.location(nodeId).rack.equals("rack" + (nodeId.id() + 100))) + throw new AssertionError("Expected rack to have changed"); + }); + }, "Modify all racks not affecting placements"); + + // renaming a datacenter is supported, as long as it is not referenced in any replication params as that + // would impact placements + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + Map updates = new HashMap<>(); + Directory dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + updates.put(nodeId, new Location("renamed_dc", dir.location(nodeId).rack)); + assertAlterTopologyRejection(pp, updates, "Proposed updates modify data placements"); + }); + }, "Renaming DC referenced in replication params"); + + // after modifying replication for the test keyspace, this should be allowed + history.custom(() -> { + cmsInstance.runOnInstance(() -> { + PlacementProvider pp = ClusterMetadataService.instance().placementProvider(); + Map updates = new HashMap<>(); + QueryProcessor.executeInternal("ALTER KEYSPACE " + KEYSPACE + + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3 };"); + Directory dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + updates.put(nodeId, new Location("renamed_dc", dir.location(nodeId).rack)); + + ClusterMetadataService.instance().commit(new AlterTopology(updates, pp)); + + for (NodeId nodeId : dir.peerIds()) + if (!ClusterMetadata.current().directory.location(nodeId).datacenter.equals("renamed_dc")) + throw new AssertionError("Expected dc to have changed"); + + // modify both datacenter and racks + dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + updates.put(nodeId, new Location("renamed_dc_again", "rack" + (nodeId.id() + 200))); + + ClusterMetadataService.instance().commit(new AlterTopology(updates, pp)); + dir = ClusterMetadata.current().directory; + for (NodeId nodeId : dir.peerIds()) + if (!ClusterMetadata.current().directory.location(nodeId).equals(new Location("renamed_dc_again", "rack" + (nodeId.id() + 200)))) + throw new AssertionError("Expected dc to have changed"); + }); + waitForCMSToQuiesce(cluster, cmsInstance); + },"Renaming DC not referenced in replication params"); + + // updates to system tables run asynchronously so spin until they're done + history.custom(() -> { + cluster.forEach(i -> await(60).until(() -> i.callOnInstance(() -> { + ClusterMetadata metadata = ClusterMetadata.current(); + NodeId myId = metadata.myNodeId(); + Directory dir = metadata.directory; + for (NodeId nodeId : dir.peerIds()) + { + String query = nodeId.equals(myId) + ? "select data_center, rack from system.local" + : String.format("select data_center, rack from system.peers_v2 where peer = '%s'", + dir.endpoint(nodeId).getHostAddress(false)); + UntypedResultSet res = QueryProcessor.executeInternal(query); + if (!res.one().getString("data_center").equals("renamed_dc_again")) + return false; + if (!res.one().getString("rack").equals("rack" + (nodeId.id() + 200))) + return false; + } + return true; + }))); + }, "Verify local system table updates"); + + // check gossip is also updated + history.custom(() -> { + Map> gossipInfo = ClusterUtils.gossipInfo(cmsInstance); + gossipInfo.forEach((ep, states) -> { + String nodeId = states.get("HOST_ID").split(":")[1]; + String dc = states.get("DC").split(":")[1]; + assertEquals("renamed_dc_again", dc); + String rack = states.get("RACK").split(":")[1]; + String expected = "rack" + (NodeId.fromString(nodeId).id() + 200); + assertEquals(expected, rack); + }); + }, "Verify gossip state"); + + writeAndValidate.run(); + }); + } + } + + private static void assertAlterTopologyRejection(PlacementProvider pp, Map updates, String error) + { + ClusterMetadataService.instance() + .commit(new AlterTopology(updates, pp), + m -> { throw new AssertionError("Expected rejection");}, + (c, r) -> { + if (!(c == ExceptionCode.INVALID && r.startsWith(error))) + throw new AssertionError("Unexpected failure response: " + r); + return ClusterMetadata.current(); + }); + + } + + private static ConditionFactory await(int seconds) + { + return Awaitility.await().atMost(ofSeconds(seconds)).pollDelay(ofSeconds(1)); + } + +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/MetadataChangeSimulationTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/MetadataChangeSimulationTest.java index 414f74bbef47..44fdc7d70153 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/MetadataChangeSimulationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/MetadataChangeSimulationTest.java @@ -935,10 +935,10 @@ public void testPlacementsAllSettled() throws Throwable while (!state.inFlightOperations.isEmpty()) { state = state.inFlightOperations.get(random.nextInt(state.inFlightOperations.size())).advance(state); - Assert.assertEquals(allSettled, sut.service.metadata().writePlacementAllSettled(ksm)); + Assert.assertTrue(allSettled.equivalentTo(sut.service.metadata().writePlacementAllSettled(ksm))); validatePlacements(sut, state); } - Assert.assertEquals(allSettled, sut.service.metadata().placements.get(ksm.params.replication)); + Assert.assertTrue(allSettled.equivalentTo(sut.service.metadata().placements.get(ksm.params.replication))); } } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ModelState.java b/test/distributed/org/apache/cassandra/distributed/test/log/ModelState.java index 0eb73b86940e..9dea76040de5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ModelState.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ModelState.java @@ -343,6 +343,20 @@ public Transformer withReplaced(Node oldNode, Node newNode) return this; } + public Transformer withUpdatedRacks(Map updates) + { + assert currentNodes.containsAll(updates.keySet()); + List newNodes = new ArrayList<>(); + currentNodes.forEach(node -> { + if (updates.containsKey(node)) + newNodes.add(node.withNewRack(updates.get(node))); + else + newNodes.add(node); + }); + currentNodes = newNodes; + return this; + } + public Transformer updateSimulation(PlacementSimulator.SimulatedPlacements simulatedPlacements) { this.simulatedPlacements = simulatedPlacements; diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ReconfigureCMSTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/ReconfigureCMSTest.java index 2869fe913ac8..0f95735e5a4e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ReconfigureCMSTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ReconfigureCMSTest.java @@ -134,7 +134,7 @@ public void cancelCMSReconfigurationTest() throws Throwable assertEquals(2, metadata.fullCMSMembers().size()); ReplicationParams params = ReplicationParams.meta(metadata); DataPlacement placements = metadata.placements.get(params); - assertEquals(placements.reads, placements.writes); + assertTrue(placements.reads.equivalentTo(placements.writes)); assertEquals(metadata.fullCMSMembers().size(), Integer.parseInt(params.asMap().get("dc0"))); }); @@ -159,7 +159,7 @@ public void cancelCMSReconfigurationTest() throws Throwable Assert.assertTrue(metadata.fullCMSMembers().contains(FBUtilities.getBroadcastAddressAndPort())); assertEquals(3, metadata.fullCMSMembers().size()); DataPlacement placements = metadata.placements.get(ReplicationParams.meta(metadata)); - assertEquals(placements.reads, placements.writes); + Assert.assertTrue(placements.reads.equivalentTo(placements.writes)); }); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/SimulatedOperation.java b/test/distributed/org/apache/cassandra/distributed/test/log/SimulatedOperation.java index 77638319331d..c6fa2da54b47 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/SimulatedOperation.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/SimulatedOperation.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -40,10 +41,13 @@ import org.apache.cassandra.tcm.MultiStepOperation; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.ownership.VersionedEndpoints; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.sequences.BootstrapAndJoin; import org.apache.cassandra.tcm.sequences.BootstrapAndReplace; import org.apache.cassandra.tcm.sequences.LeaveStreams; import org.apache.cassandra.tcm.sequences.UnbootstrapAndLeave; +import org.apache.cassandra.tcm.transformations.AlterTopology; import org.apache.cassandra.tcm.transformations.CancelInProgressSequence; import org.apache.cassandra.tcm.transformations.PrepareJoin; import org.apache.cassandra.tcm.transformations.PrepareLeave; @@ -101,6 +105,23 @@ public static ModelState joinWithoutBootstrap(ModelState state, } + public static ModelState changeRacks(CMSSut sut, ModelState state, Map updates) + { + ModelState.Transformer transformer = state.transformer() + .withUpdatedRacks(updates) + .updateSimulation(state.simulatedPlacements); + + Map serviceUpdates = new HashMap<>(); + for (Map.Entry entry : updates.entrySet()) + { + Node n = entry.getKey(); + String rack = entry.getValue(); + serviceUpdates.put(n.nodeId(), new Location(n.dc(), rack)); + } + sut.service.commit(new AlterTopology(serviceUpdates, sut.service.placementProvider())); + return transformer.transform(); + } + public static ModelState leave(CMSSut sut, ModelState state, Node node) { ModelState.Transformer transformer = state.transformer(); diff --git a/test/harry/main/org/apache/cassandra/harry/model/TokenPlacementModel.java b/test/harry/main/org/apache/cassandra/harry/model/TokenPlacementModel.java index 2c24ec257c79..c9dcbcb137c1 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/TokenPlacementModel.java +++ b/test/harry/main/org/apache/cassandra/harry/model/TokenPlacementModel.java @@ -725,6 +725,7 @@ public interface Lookup long token(int tokenIdx); Lookup forceToken(int tokenIdx, long token); void reset(); + int rackIdx(String rack); default NodeId nodeId(int nodeIdx) { @@ -785,6 +786,11 @@ public InetAddressAndPort addr(int idx) return null; } + public int rackIdx(String rack) + { + throw new UnsupportedOperationException(); + } + public void reset() { throw new UnsupportedOperationException(); @@ -843,6 +849,11 @@ public String rack(int rackIdx) { return String.format("rack%d", rackIdx); } + + public int rackIdx(String rack) + { + throw new UnsupportedOperationException(); + } } public static class HumanReadableTokensLookup extends DefaultLookup { @@ -1022,6 +1033,12 @@ public Node overrideToken(long override) { return new Node(tokenIdx, nodeIdx, dcIdx, rackIdx, lookup.forceToken(tokenIdx, override)); } + + public Node withNewRack(String newRack) + { + return new Node(tokenIdx, nodeIdx, dcIdx, lookup.rackIdx(newRack), lookup); + } + public Murmur3Partitioner.LongToken longToken() { return new Murmur3Partitioner.LongToken(token()); diff --git a/test/unit/org/apache/cassandra/schema/SchemaChangeDuringRangeMovementTest.java b/test/unit/org/apache/cassandra/schema/SchemaChangeDuringRangeMovementTest.java index e6cdb1be8248..d54b2e5e31d5 100644 --- a/test/unit/org/apache/cassandra/schema/SchemaChangeDuringRangeMovementTest.java +++ b/test/unit/org/apache/cassandra/schema/SchemaChangeDuringRangeMovementTest.java @@ -20,30 +20,21 @@ import org.junit.Test; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.dht.Range; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.Transformation; -import org.apache.cassandra.tcm.sequences.LockedRanges; import org.apache.cassandra.tcm.transformations.AlterSchema; import org.apache.cassandra.triggers.TriggersTest; +import static org.apache.cassandra.tcm.sequences.SequencesUtils.ClearLockedRanges; +import static org.apache.cassandra.tcm.sequences.SequencesUtils.LockRanges; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class SchemaChangeDuringRangeMovementTest extends CQLTester { - // at the moment, the detail of the specific LockedRanges doesn't matter, transformations - // which are rejected in the presence of locking are rejected whatever is actually locked - private static final LockedRanges.AffectedRanges toLock = - LockedRanges.AffectedRanges.singleton(ReplicationParams.simple(3), - new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), - DatabaseDescriptor.getPartitioner().getRandomToken())); - @Test public void testAlwaysPermittedChanges() throws Throwable { @@ -216,39 +207,4 @@ private void withAndWithoutLockedRanges(TestActions actions) throws Throwable metadata = ClusterMetadataService.instance().commit(new ClearLockedRanges()); assertTrue(metadata.lockedRanges.locked.isEmpty()); } - - - // Custom transforms to lock/unlock an arbitrary set of ranges to - // avoid having to actually initiate some range movement - private static class LockRanges implements Transformation - { - @Override - public Kind kind() - { - return Kind.CUSTOM; - } - - @Override - public Result execute(ClusterMetadata metadata) - { - LockedRanges newLocked = metadata.lockedRanges.lock(LockedRanges.keyFor(metadata.epoch), toLock); - return Transformation.success(metadata.transformer().with(newLocked), toLock); - } - } - - private static class ClearLockedRanges implements Transformation - { - @Override - public Kind kind() - { - return Kind.CUSTOM; - } - - @Override - public Result execute(ClusterMetadata metadata) - { - LockedRanges newLocked = LockedRanges.EMPTY; - return Transformation.success(metadata.transformer().with(newLocked), LockedRanges.AffectedRanges.EMPTY); - } - } } diff --git a/test/unit/org/apache/cassandra/service/AlterTopologyArgParsingTest.java b/test/unit/org/apache/cassandra/service/AlterTopologyArgParsingTest.java new file mode 100644 index 000000000000..59a7bb58d904 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AlterTopologyArgParsingTest.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.MembershipUtils; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.transformations.AlterTopology; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class AlterTopologyArgParsingTest +{ + Location loc = new Location("test_dc", "test_rack"); + NodeId id = new NodeId(1); + Directory dir; + + @Before + public void setup() + { + dir = new Directory(); + } + + @Test + public void testSingleChangeByInt() + { + String arg = "1=test_dc:test_rack"; + Map parsed = AlterTopology.parseArgs(arg, dir); + assertEquals(1, parsed.size()); + assertEquals(parsed.get(id), loc); + } + + @Test + public void testSingleChangeByUUID() + { + String arg = String.format("%s=test_dc:test_rack", id.toUUID().toString()); + Map parsed = AlterTopology.parseArgs(arg, dir); + assertEquals(1, parsed.size()); + assertEquals(parsed.get(id), loc); + } + + @Test + public void testSingleChangeByEndpoint() + { + InetAddressAndPort ep = MembershipUtils.endpoint(1); + dir = dir.with(new NodeAddresses(ep), loc); // this will associate NodeId(1) with ep + String arg = String.format("%s=test_dc:test_rack", ep.getHostAddressAndPort()); + Map parsed = AlterTopology.parseArgs(arg, dir); + assertEquals(1, parsed.size()); + assertEquals(parsed.get(id), loc); + } + + @Test + public void testSingleChangeByEndpointAddress() + { + InetAddressAndPort ep = MembershipUtils.endpoint(1); + dir = dir.with(new NodeAddresses(ep), loc); // this will associate NodeId(1) with ep + String arg = String.format("%s=test_dc:test_rack", ep.getHostAddress(false)); + Map parsed = AlterTopology.parseArgs(arg, dir); + assertEquals(1, parsed.size()); + assertEquals(parsed.get(id), loc); + } + + @Test + public void testInvalidArg() + { + String[] args = new String[]{ "invalid", "1=", "=dc:rack", "1=dc", "1=dc:" }; + for (String invalid : args) + { + try + { + AlterTopology.parseArgs(invalid, dir); + fail("Expected exception"); + } + catch (IllegalArgumentException e) + { + } + } + } + + @Test + public void testMultipleChanges() + { + NodeId otherId = new NodeId(2); + InetAddressAndPort ep = MembershipUtils.endpoint(1); + dir = dir.with(new NodeAddresses(ep), loc); // this will associate NodeId(1) with ep + String arg = String.format("%s=dc1:rack1,%s=dc2:rack2,3=dc3:rack3,", + ep.getHostAddress(true), + otherId.toUUID().toString()); + Map parsed = AlterTopology.parseArgs(arg, dir); + assertEquals(3, parsed.size()); + assertEquals(parsed.get(id).datacenter, "dc1"); + assertEquals(parsed.get(id).rack, "rack1"); + assertEquals(parsed.get(otherId).datacenter, "dc2"); + assertEquals(parsed.get(otherId).rack, "rack2"); + assertEquals(parsed.get(new NodeId(3)).datacenter, "dc3"); + assertEquals(parsed.get(new NodeId(3)).rack, "rack3"); + } + + @Test + public void testMultipleChangesForSameNode() + { + InetAddressAndPort ep = MembershipUtils.endpoint(1); + dir = dir.with(new NodeAddresses(ep), loc); // this will associate NodeId(1) with ep + String epString = ep.getHostAddress(true); + String idString = id.toUUID().toString(); + assertIllegalArgument(String.format("%1$s=dc1:rack1,%1$s=dc2:rack2", id.id())); + assertIllegalArgument(String.format("%s=dc1:rack1,%s=dc2:rack2", id.id(), idString)); + assertIllegalArgument(String.format("%s=dc1:rack1,%s=dc2:rack2", id.id(), epString)); + assertIllegalArgument(String.format("%1$s=dc1:rack1,%1$s=dc2:rack2", epString)); + assertIllegalArgument(String.format("%1$s=dc1:rack1,%1$s=dc2:rack2", idString)); + assertIllegalArgument(String.format("%s=dc1:rack1,%s=dc2:rack2,%s=dc3:rack3", id.id(), idString, epString)); + } + + private void assertIllegalArgument(String arg) + { + try + { + AlterTopology.parseArgs(arg, dir); + fail("Expected exception"); + } + catch (IllegalArgumentException e) {} + } +} diff --git a/test/unit/org/apache/cassandra/tcm/membership/DirectoryTest.java b/test/unit/org/apache/cassandra/tcm/membership/DirectoryTest.java new file mode 100644 index 000000000000..ea66961ee5cf --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/membership/DirectoryTest.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.membership; + +import org.junit.Test; + +import static org.apache.cassandra.tcm.membership.MembershipUtils.endpoint; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class DirectoryTest +{ + + @Test + public void updateLocationTest() + { + Location DC1_R1 = new Location("datacenter1", "rack1"); + Directory dir = new Directory(); + assertTrue(dir.isEmpty()); + assertTrue(dir.knownDatacenters().isEmpty()); + + NodeId missing = new NodeId(1000); + assertInvalidLocationUpdate(dir, missing, DC1_R1, "Node " + missing + " has no registered location to update"); + + // add a new node and retrieve its Location + NodeAddresses addresses = new NodeAddresses(endpoint(1)); + dir = dir.with(addresses, DC1_R1); + NodeId node = dir.peerId(addresses.broadcastAddress); + assertEquals(DC1_R1, dir.location(node)); + assertTrue(dir.knownDatacenters().contains("datacenter1")); + + // endpoints by DC & rack are not updated immediately, this is an explicit step when a node joins + assertTrue(dir.allDatacenterEndpoints().isEmpty()); + assertTrue(dir.allDatacenterRacks().isEmpty()); + + // when a node joins, its DC and rack become active + dir = dir.withRackAndDC(node); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter1").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter1").get("rack1").contains(addresses.broadcastAddress)); + + // update rack + Location DC1_R2 = new Location("datacenter1", "rack2"); + dir = dir.withUpdatedRackAndDc(node, DC1_R2); + assertEquals(DC1_R2, dir.location(node)); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter1").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter1").get("rack2").contains(addresses.broadcastAddress)); + // previous rack is no longer present as it was made empty + assertFalse(dir.allDatacenterRacks().get("datacenter1").containsKey("rack1")); + + // update DC + Location DC2_R2 = new Location("datacenter2", "rack2"); + dir = dir.withUpdatedRackAndDc(node, DC2_R2); + assertEquals(DC2_R2, dir.location(node)); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack2").contains(addresses.broadcastAddress)); + // datacenter1 is no longer present as it was made empty + assertFalse(dir.allDatacenterRacks().containsKey("datacenter1")); + assertFalse(dir.knownDatacenters().contains("datacenter1")); + assertTrue(dir.knownDatacenters().contains("datacenter2")); + + // Add a second node in the same dc & rack + NodeAddresses otherAddresses = new NodeAddresses(endpoint(2)); + dir = dir.with(otherAddresses, DC2_R2); + NodeId otherNode = dir.peerId(otherAddresses.broadcastAddress); + dir = dir.withRackAndDC(otherNode); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(otherAddresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack2").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack2").contains(otherAddresses.broadcastAddress)); + + // now updating the rack of the first node should not remove rack2 altogether as it not empty + Location DC2_R3 = new Location("datacenter2", "rack3"); + dir = dir.withUpdatedRackAndDc(node, DC2_R3); + assertEquals(DC2_R3, dir.location(node)); + // updated node is removed from rack2 and added to rack3 + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack3").contains(addresses.broadcastAddress)); + assertFalse(dir.allDatacenterRacks().get("datacenter2").get("rack2").contains(addresses.broadcastAddress)); + // other node is still present in rack2 + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(otherAddresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack2").contains(otherAddresses.broadcastAddress)); + assertFalse(dir.allDatacenterRacks().get("datacenter2").get("rack3").contains(otherAddresses.broadcastAddress)); + + // simulate what happens when the nodes leave the cluster + dir = dir.withoutRackAndDC(otherNode); + assertFalse(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(otherAddresses.broadcastAddress)); + assertFalse(dir.allDatacenterRacks().get("datacenter2").containsKey("rack2")); + assertTrue(dir.allDatacenterEndpoints().asMap().get("datacenter2").contains(addresses.broadcastAddress)); + assertTrue(dir.allDatacenterRacks().get("datacenter2").get("rack3").contains(addresses.broadcastAddress)); + + dir = dir.withoutRackAndDC(node); + assertTrue(dir.allDatacenterEndpoints().isEmpty()); + assertTrue(dir.allDatacenterRacks().isEmpty()); + } + + private void assertInvalidLocationUpdate(Directory dir, NodeId nodeId, Location loc, String message) + { + try + { + dir.withUpdatedRackAndDc(nodeId, loc); + fail("Expected an exception"); + } + catch (IllegalArgumentException e) + { + assertTrue(e.getMessage().equals(message)); + } + } +} diff --git a/test/unit/org/apache/cassandra/tcm/sequences/InProgressSequenceCancellationTest.java b/test/unit/org/apache/cassandra/tcm/sequences/InProgressSequenceCancellationTest.java index 1c1a44296b71..100979104f7e 100644 --- a/test/unit/org/apache/cassandra/tcm/sequences/InProgressSequenceCancellationTest.java +++ b/test/unit/org/apache/cassandra/tcm/sequences/InProgressSequenceCancellationTest.java @@ -34,7 +34,6 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.ClusterMetadata; @@ -44,10 +43,8 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementDeltas; -import org.apache.cassandra.tcm.ownership.ReplicaGroups; import org.apache.cassandra.tcm.transformations.PrepareJoin; import org.apache.cassandra.tcm.transformations.PrepareLeave; import org.apache.cassandra.tcm.transformations.PrepareReplace; @@ -304,9 +301,9 @@ private void testRevertingReplace(long seed) private void assertRelevantMetadata(ClusterMetadata first, ClusterMetadata second) { - assertPlacementsEquivalent(first.placements, second.placements); - assertTrue(first.directory.isEquivalent(second.directory)); - assertTrue(first.tokenMap.isEquivalent(second.tokenMap)); + assertTrue(first.placements.equivalentTo(second.placements)); + assertTrue(first.directory.equivalentTo(second.directory)); + assertTrue(first.tokenMap.equivalentTo(second.tokenMap)); assertEquals(first.lockedRanges.locked.keySet(), second.lockedRanges.locked.keySet()); } @@ -314,31 +311,4 @@ private static ClusterMetadata metadata(Directory directory) { return new ClusterMetadata(Murmur3Partitioner.instance, directory); } - - private void assertPlacementsEquivalent(DataPlacements first, DataPlacements second) - { - assertEquals(first.keys(), second.keys()); - - first.asMap().forEach((params, placement) -> { - DataPlacement otherPlacement = second.get(params); - ReplicaGroups r1 = placement.reads; - ReplicaGroups r2 = otherPlacement.reads; - assertEquals(r1.ranges, r2.ranges); - r1.forEach((range, e1) -> { - EndpointsForRange e2 = r2.forRange(range).get(); - assertEquals(e1.size(),e2.size()); - assertTrue(e1.get().stream().allMatch(e2::contains)); - }); - - ReplicaGroups w1 = placement.reads; - ReplicaGroups w2 = otherPlacement.reads; - assertEquals(w1.ranges, w2.ranges); - w1.forEach((range, e1) -> { - EndpointsForRange e2 = w2.forRange(range).get(); - assertEquals(e1.size(),e2.size()); - assertTrue(e1.get().stream().allMatch(e2::contains)); - }); - - }); - } } diff --git a/test/unit/org/apache/cassandra/tcm/sequences/SequencesUtils.java b/test/unit/org/apache/cassandra/tcm/sequences/SequencesUtils.java index eb310cc78b08..44b3b59ce3dd 100644 --- a/test/unit/org/apache/cassandra/tcm/sequences/SequencesUtils.java +++ b/test/unit/org/apache/cassandra/tcm/sequences/SequencesUtils.java @@ -18,19 +18,27 @@ package org.apache.cassandra.tcm.sequences; +import java.io.Serializable; import java.util.List; import java.util.Random; import java.util.Set; import java.util.function.Predicate; import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementDeltas; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.tcm.transformations.PrepareJoin; import org.apache.cassandra.tcm.transformations.PrepareLeave; import org.apache.cassandra.tcm.transformations.PrepareMove; @@ -174,4 +182,68 @@ public static Epoch epoch(int epoch) { return Epoch.create(epoch); } + + // Custom transforms to lock/unlock an arbitrary set of ranges to + // avoid having to actually initiate some range movement + public static class LockRanges implements Transformation, Serializable + { + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version){} + @Override + public LockRanges deserialize(DataInputPlus in, Version version) {return new LockRanges();} + @Override + public long serializedSize(Transformation t, Version version) {return 0;} + }; + + public static final String NAME = "TestLockRanges"; + + // at the moment, the detail of the specific LockedRanges doesn't matter, transformations + // which are rejected in the presence of locking are rejected whatever is actually locked + private static final LockedRanges.AffectedRanges toLock = + LockedRanges.AffectedRanges.singleton(ReplicationParams.simple(3), + new Range<>(Murmur3Partitioner.instance.getMinimumToken(), + Murmur3Partitioner.instance.getRandomToken())); + + @Override + public Kind kind() + { + return Kind.CUSTOM; + } + + @Override + public Result execute(ClusterMetadata metadata) + { + LockedRanges newLocked = metadata.lockedRanges.lock(LockedRanges.keyFor(metadata.epoch), toLock); + return Transformation.success(metadata.transformer().with(newLocked), toLock); + } + } + + public static class ClearLockedRanges implements Transformation, Serializable + { + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version) {} + @Override + public ClearLockedRanges deserialize(DataInputPlus in, Version version) {return new ClearLockedRanges();} + @Override + public long serializedSize(Transformation t, Version version) {return 0;} + }; + public static final String NAME = "TestClearLockedRanges"; + + @Override + public Kind kind() + { + return Kind.CUSTOM; + } + + @Override + public Result execute(ClusterMetadata metadata) + { + LockedRanges newLocked = LockedRanges.EMPTY; + return Transformation.success(metadata.transformer().with(newLocked), LockedRanges.AffectedRanges.EMPTY); + } + } } From 6f79207c34e2d828e798a8a96df67aeaea2a45a1 Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Fri, 4 Apr 2025 08:13:11 +0200 Subject: [PATCH 034/340] Improve metadata log catch up with inter-DC mutation forwarding Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-20523 --- CHANGES.txt | 1 + .../db/AbstractMutationVerbHandler.java | 28 ++++----- .../test/log/FetchLogFromPeersDCTest.java | 60 +++++++++++++++++++ 3 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeersDCTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 26eb4a062cec..f74d385e8367 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Improve metadata log catch up with inter-DC mutation forwarding (CASSANDRA-20523) * Support topology-safe changes to Datacenter & Rack for live nodes (CASSANDRA-20528) * Add SSTableIntervalTree latency metric (CASSANDRA-20502) * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956) diff --git a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java index cfea7eb45c5f..fe3acdba06c3 100644 --- a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java +++ b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java @@ -54,15 +54,15 @@ protected void processMessage(Message message, InetAddressAndPort respondTo) if (message.epoch().isAfter(Epoch.EMPTY)) { ClusterMetadata metadata = ClusterMetadata.current(); - metadata = checkTokenOwnership(metadata, message); - metadata = checkSchemaVersion(metadata, message); + metadata = checkTokenOwnership(metadata, message, respondTo); + metadata = checkSchemaVersion(metadata, message, respondTo); } applyMutation(message, respondTo); } abstract void applyMutation(Message message, InetAddressAndPort respondToAddress); - private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message message) + private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message message, InetAddressAndPort respondTo) { String keyspace = message.payload.getKeyspaceName(); DecoratedKey key = message.payload.key(); @@ -75,13 +75,13 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message // since coordinator's routing may be more recent. if (!forToken.get().containsSelf()) { - metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, message.from(), message.epoch()); + metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, respondTo, message.epoch()); forToken = writePlacements(metadata, keyspace, key); } // Otherwise, coordinator and the replica agree about the placement of the givent token, so catch-up can be async else { - ClusterMetadataService.instance().fetchLogFromPeerOrCMSAsync(metadata, message.from(), message.epoch()); + ClusterMetadataService.instance().fetchLogFromPeerOrCMSAsync(metadata, respondTo, message.epoch()); } } @@ -89,8 +89,8 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message { StorageService.instance.incOutOfRangeOperationCount(); Keyspace.open(message.payload.getKeyspaceName()).metric.outOfRangeTokenWrites.inc(); - NoSpamLogger.log(logger, NoSpamLogger.Level.WARN, 1, TimeUnit.SECONDS, logMessageTemplate, message.from(), key.getToken(), message.payload.getKeyspaceName()); - throw InvalidRoutingException.forWrite(message.from(), key.getToken(), metadata.epoch, message.payload); + NoSpamLogger.log(logger, NoSpamLogger.Level.WARN, 1, TimeUnit.SECONDS, logMessageTemplate, respondTo, key.getToken(), message.payload.getKeyspaceName()); + throw InvalidRoutingException.forWrite(respondTo, key.getToken(), metadata.epoch, message.payload); } if (forToken.lastModified().isAfter(message.epoch())) @@ -103,7 +103,7 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message return metadata; } - private ClusterMetadata checkSchemaVersion(ClusterMetadata metadata, Message message) + private ClusterMetadata checkSchemaVersion(ClusterMetadata metadata, Message message, InetAddressAndPort respondTo) { if (SchemaConstants.isSystemKeyspace(message.payload.getKeyspaceName()) || message.epoch().is(metadata.epoch)) return metadata; @@ -121,10 +121,10 @@ private ClusterMetadata checkSchemaVersion(ClusterMetadata metadata, Message { // the partition update was serialized after the epoch we currently know, catch up and // make sure we've seen the epoch it has seen, otherwise fail request. - metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, message.from(), message.epoch()); + metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, respondTo, message.epoch()); if (pu.serializedAtEpoch.isAfter(metadata.epoch)) throw new IllegalStateException(String.format("Coordinator %s is still ahead after fetching log, our epoch = %s, their epoch = %s", - message.from(), + respondTo, metadata.epoch, message.epoch())); } } @@ -143,7 +143,7 @@ else if (message.epoch().isBefore(metadata.schema.lastModified())) { TCMMetrics.instance.coordinatorBehindSchema.mark(); throw new CoordinatorBehindException(String.format("Coordinator %s is behind, our epoch = %s, their epoch = %s", - message.from(), + respondTo, metadata.epoch, message.epoch())); } } @@ -151,7 +151,7 @@ else if (message.epoch().isBefore(metadata.schema.lastModified())) { TCMMetrics.instance.coordinatorBehindSchema.mark(); throw new CoordinatorBehindException(String.format("Schema mismatch, coordinator %s is behind, we're missing table %s.%s, our epoch = %s, their epoch = %s", - message.from(), + respondTo, pu.metadata().keyspace, pu.metadata().name, metadata.epoch, message.epoch())); @@ -165,13 +165,13 @@ else if (message.epoch().isBefore(metadata.schema.lastModified())) { TCMMetrics.instance.coordinatorBehindSchema.mark(); throw new CoordinatorBehindException(String.format("Schema mismatch, coordinator %s is behind, we're missing keyspace %s, our epoch = %s, their epoch = %s", - message.from(), + respondTo, keyspace, metadata.epoch, message.epoch())); } else { - metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, message.from(), message.epoch()); + metadata = ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, respondTo, message.epoch()); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeersDCTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeersDCTest.java new file mode 100644 index 000000000000..b47108d1d0e7 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeersDCTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.log; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.shared.NetworkTopology; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.tcm.ClusterMetadata; + +import static org.apache.cassandra.net.Verb.TCM_FETCH_PEER_LOG_REQ; +import static org.apache.cassandra.net.Verb.TCM_REPLICATION; +import static org.junit.Assert.assertEquals; + +public class FetchLogFromPeersDCTest extends TestBaseImpl +{ + + @Test + public void catchupCoordinatorBehindTestPlacements() throws Exception + { + try (Cluster cluster = init(builder().withNodes(4).withConfig(c -> c.with(Feature.NETWORK, Feature.GOSSIP)) + .withoutVNodes() + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(4)) + .withNodeIdTopology(NetworkTopology.networkTopology(4, (i) -> NetworkTopology.dcAndRack("dc" + (i <= 2 ? 0 : 1), "rack" + i))) + .start())) + { + cluster.schemaChange(withKeyspace("alter keyspace %s with replication = {'class':'NetworkTopologyStrategy', 'dc0':2, 'dc1':2}")); + cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key)")); + cluster.filters().inbound().verbs(TCM_REPLICATION.id).from(1).to(3, 4).drop(); + // don't allow the dc1 nodes to catch up from eachother - we should catch up from the actual originator of the message: + cluster.filters().inbound().verbs(TCM_FETCH_PEER_LOG_REQ.id).from(3, 4).to(3,4).drop(); + cluster.get(1).schemaChangeInternal(withKeyspace("alter table %s.tbl with comment='abc'")); + cluster.coordinator(1).execute(withKeyspace("insert into %s.tbl (id) values (1)"), ConsistencyLevel.ALL); + long epoch = cluster.get(1).callOnInstance(() -> ClusterMetadata.current().epoch.getEpoch()); + cluster.forEach(i -> i.runOnInstance(() -> { + assertEquals(epoch, ClusterMetadata.current().epoch.getEpoch()); + })); + } + } +} From eb0c77ae1ecd1cdc96ce64d91a36e5c4993af691 Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Fri, 4 Apr 2025 08:16:24 +0200 Subject: [PATCH 035/340] Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows in the virtual table Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-20524 --- CHANGES.txt | 1 + .../CollectionVirtualTableAdapter.java | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f74d385e8367..ba71e542b4cc 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows in the virtual table (CASSANDRA-20524) * Improve metadata log catch up with inter-DC mutation forwarding (CASSANDRA-20523) * Support topology-safe changes to Datacenter & Rack for live nodes (CASSANDRA-20528) * Add SSTableIntervalTree latency metric (CASSANDRA-20502) diff --git a/src/java/org/apache/cassandra/db/virtual/CollectionVirtualTableAdapter.java b/src/java/org/apache/cassandra/db/virtual/CollectionVirtualTableAdapter.java index 47aa3bd5c43c..cc311e1a2653 100644 --- a/src/java/org/apache/cassandra/db/virtual/CollectionVirtualTableAdapter.java +++ b/src/java/org/apache/cassandra/db/virtual/CollectionVirtualTableAdapter.java @@ -31,7 +31,6 @@ import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiFunction; import java.util.function.Function; @@ -366,14 +365,16 @@ protected UnfilteredRowIterator computeNext() private Iterator buildDataRangeIterator(DataRange dataRange, ColumnFilter columnFilter) { - NavigableMap, Row>> partitionMap = new ConcurrentSkipListMap<>(DecoratedKey.comparator); - StreamSupport.stream(data.spliterator(), true) - .map(row -> makeRow(row, columnFilter)) - .filter(cr -> dataRange.keyRange().contains(cr.key.get())) - .forEach(cr -> partitionMap.computeIfAbsent(cr.key.get(), - key -> new TreeMap<>(metadata.comparator)) - .put(cr.clustering, cr.rowSup.get())); - + NavigableMap, Row>> partitionMap = new TreeMap<>(DecoratedKey.comparator); + for (R row : data) + { + CollectionRow cr = makeRow(row, columnFilter); + if (dataRange.keyRange().contains(cr.key.get())) + { + partitionMap.computeIfAbsent(cr.key.get(), + key -> new TreeMap<>(metadata.comparator)).put(cr.clustering, cr.rowSup.get()); + } + } return partitionMap.entrySet().stream().map( e -> new DataRowUnfilteredIterator(e.getKey(), dataRange.clusteringIndexFilter(e.getKey()), columnFilter, e.getValue())).iterator(); From 2c05f82755625c805ce5587ae71a502dab7b6d35 Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Fri, 4 Apr 2025 08:19:17 +0200 Subject: [PATCH 036/340] Add nodetool command to dump the contents of the system_views.{cluster_metadata_log, cluster_metadata_directory} tables Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-20525 --- CHANGES.txt | 1 + .../ClusterMetadataDirectoryTable.java | 71 ++++++++--- .../db/virtual/ClusterMetadataLogTable.java | 34 +++-- .../apache/cassandra/tcm/CMSOperations.java | 27 ++++ .../cassandra/tcm/CMSOperationsMBean.java | 2 + .../org/apache/cassandra/tools/NodeTool.java | 4 +- .../cassandra/tools/nodetool/CMSAdmin.java | 50 ++++++++ .../test/log/ClusterMetadataDumpTest.java | 116 ++++++++++++++++++ 8 files changed, 280 insertions(+), 25 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataDumpTest.java diff --git a/CHANGES.txt b/CHANGES.txt index ba71e542b4cc..c4676baf0702 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Add nodetool command to dump the contents of the system_views.{cluster_metadata_log, cluster_metadata_directory} tables (CASSANDRA-20525) * Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows in the virtual table (CASSANDRA-20524) * Improve metadata log catch up with inter-DC mutation forwarding (CASSANDRA-20523) * Support topology-safe changes to Datacenter & Rack for live nodes (CASSANDRA-20528) diff --git a/src/java/org/apache/cassandra/db/virtual/ClusterMetadataDirectoryTable.java b/src/java/org/apache/cassandra/db/virtual/ClusterMetadataDirectoryTable.java index 0d026ce65d42..e7fba1519b40 100644 --- a/src/java/org/apache/cassandra/db/virtual/ClusterMetadataDirectoryTable.java +++ b/src/java/org/apache/cassandra/db/virtual/ClusterMetadataDirectoryTable.java @@ -17,16 +17,26 @@ */ package org.apache.cassandra.db.virtual; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableMap; import org.apache.cassandra.db.marshal.InetAddressType; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.MultiStepOperation; import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; @@ -35,7 +45,7 @@ import org.apache.cassandra.tcm.membership.NodeVersion; -final class ClusterMetadataDirectoryTable extends AbstractVirtualTable +public final class ClusterMetadataDirectoryTable extends AbstractVirtualTable { private static final String NODE_ID = "node_id"; private static final String HOST_ID = "host_id"; @@ -50,6 +60,8 @@ final class ClusterMetadataDirectoryTable extends AbstractVirtualTable private static final String LOCAL_PORT = "local_port"; private static final String NATIVE_ADDRESS = "native_address"; private static final String NATIVE_PORT = "native_port"; + private static final String TOKENS = "tokens"; + private static final String MULTI_STEP_OPERATION = "multi_step_operation"; ClusterMetadataDirectoryTable(String keyspace) @@ -71,15 +83,31 @@ final class ClusterMetadataDirectoryTable extends AbstractVirtualTable .addRegularColumn(LOCAL_PORT, Int32Type.instance) .addRegularColumn(NATIVE_ADDRESS, InetAddressType.instance) .addRegularColumn(NATIVE_PORT, Int32Type.instance) + .addRegularColumn(TOKENS, ListType.getInstance(UTF8Type.instance, false)) + .addRegularColumn(MULTI_STEP_OPERATION, MapType.getInstance(UTF8Type.instance, UTF8Type.instance, false)) .build()); } @Override public DataSet data() + { + SimpleDataSet result = new SimpleDataSet(metadata()); + + for (Map.Entry> entry : directory(true).entrySet()) + { + result = result.row(entry.getKey().intValue()); + for (Map.Entry row : entry.getValue().entrySet()) + result = result.column(row.getKey(), row.getValue()); + } + return result; + } + + public static Map> directory(boolean tokens) { ClusterMetadata metadata = ClusterMetadata.current(); Directory directory = metadata.directory; - SimpleDataSet result = new SimpleDataSet(metadata()); + Map> result = new LinkedHashMap<>(); + for (Map.Entry entry : directory.states.entrySet()) { NodeId nodeId = entry.getKey(); @@ -87,20 +115,33 @@ public DataSet data() NodeAddresses address = directory.getNodeAddresses(nodeId); Location location = directory.location(nodeId); NodeVersion version = directory.version(nodeId); - result.row(nodeId.id()) - .column(HOST_ID, nodeId.toUUID()) - .column(STATE, nodeState.toString()) - .column(CASSANDRA_VERSION, version != null ? version.cassandraVersion.toString() : null) - .column(SERIALIZATION_VERSION, version != null ? version.serializationVersion : null) - .column(RACK, location != null ? location.rack : null) - .column(DC, location != null ? location.datacenter : null) - .column(BROADCAST_ADDRESS, address != null ? address.broadcastAddress.getAddress() : null) - .column(BROADCAST_PORT, address != null ? address.broadcastAddress.getPort() : null) - .column(LOCAL_ADDRESS, address != null ? address.localAddress.getAddress() : null) - .column(LOCAL_PORT, address != null ? address.localAddress.getPort() : null) - .column(NATIVE_ADDRESS, address != null ? address.nativeAddress.getAddress() : null) - .column(NATIVE_PORT, address != null ? address.nativeAddress.getPort() : null); + Map row = new HashMap<>(); + row.put(HOST_ID, nodeId.toUUID()); + row.put(STATE, nodeState.toString()); + row.put(CASSANDRA_VERSION, version != null ? version.cassandraVersion.toString() : null); + row.put(SERIALIZATION_VERSION, version != null ? version.serializationVersion : null); + row.put(RACK, location != null ? location.rack : null); + row.put(DC, location != null ? location.datacenter : null); + row.put(BROADCAST_ADDRESS, address != null ? address.broadcastAddress.getAddress() : null); + row.put(BROADCAST_PORT, address != null ? address.broadcastAddress.getPort() : null); + row.put(LOCAL_ADDRESS, address != null ? address.localAddress.getAddress() : null); + row.put(LOCAL_PORT, address != null ? address.localAddress.getPort() : null); + row.put(NATIVE_ADDRESS, address != null ? address.nativeAddress.getAddress() : null); + row.put(NATIVE_PORT, address != null ? address.nativeAddress.getPort() : null); + if (tokens) + row.put(TOKENS, tokensToString(metadata.tokenMap.tokens(nodeId))); + MultiStepOperation mso = metadata.inProgressSequences.get(nodeId); + if (mso != null) + row.put(MULTI_STEP_OPERATION, ImmutableMap.of("kind", mso.kind().name(), + "status", mso.status(), + "nextStep", mso.nextStep().name())); + result.put((long)nodeId.id(), row); } return result; } + + private static List tokensToString(List tokens) + { + return tokens.stream().map(Object::toString).collect(Collectors.toList()); + } } diff --git a/src/java/org/apache/cassandra/db/virtual/ClusterMetadataLogTable.java b/src/java/org/apache/cassandra/db/virtual/ClusterMetadataLogTable.java index 152b4769a728..cd755115ec76 100644 --- a/src/java/org/apache/cassandra/db/virtual/ClusterMetadataLogTable.java +++ b/src/java/org/apache/cassandra/db/virtual/ClusterMetadataLogTable.java @@ -19,6 +19,9 @@ import java.io.IOException; import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.ConsistencyLevel; @@ -27,6 +30,7 @@ import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.locator.MetaStrategy; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import static java.lang.String.format; @@ -34,7 +38,7 @@ import static org.apache.cassandra.schema.DistributedMetadataLogKeyspace.TABLE_NAME; import static org.apache.cassandra.schema.SchemaConstants.METADATA_KEYSPACE_NAME; -final class ClusterMetadataLogTable extends AbstractVirtualTable +public final class ClusterMetadataLogTable extends AbstractVirtualTable { private static final String EPOCH = "epoch"; private static final String KIND = "kind"; @@ -58,22 +62,34 @@ final class ClusterMetadataLogTable extends AbstractVirtualTable @Override public DataSet data() + { + SimpleDataSet result = new SimpleDataSet(metadata()); + for (Map.Entry> entry : log(Epoch.FIRST.getEpoch(), Long.MAX_VALUE).entrySet()) + { + SimpleDataSet data = result.row(entry.getKey()); + for (Map.Entry rowEntry : entry.getValue().entrySet()) + data = data.column(rowEntry.getKey(), rowEntry.getValue()); + } + return result; + } + + public static Map> log(long startEpoch, long endEpoch) { try { - SimpleDataSet result = new SimpleDataSet(metadata()); + Map> result = new LinkedHashMap<>(); UntypedResultSet res = execute(format("SELECT epoch, kind, transformation, entry_id, writetime(kind) as wt " + - "FROM %s.%s", METADATA_KEYSPACE_NAME, TABLE_NAME), ConsistencyLevel.QUORUM); + "FROM %s.%s WHERE token(epoch) >= token(?) AND token(epoch) <= token(?)", METADATA_KEYSPACE_NAME, TABLE_NAME), ConsistencyLevel.QUORUM, endEpoch, startEpoch); for (UntypedResultSet.Row r : res) { Transformation.Kind kind = Transformation.Kind.fromId(r.getInt("kind")); Transformation transformation = kind.fromVersionedBytes(r.getBlob("transformation")); - - result.row(r.getLong("epoch")) - .column(KIND, kind.toString()) - .column(TRANSFORMATION, transformation.toString()) - .column(ENTRY_ID, r.getLong("entry_id")) - .column(ENTRY_TIME, new Date(r.getLong("wt") / 1000)); + Map row = new HashMap<>(); + row.put(KIND, kind.toString()); + row.put(TRANSFORMATION, transformation.toString()); + row.put(ENTRY_ID, r.getLong("entry_id")); + row.put(ENTRY_TIME, new Date(r.getLong("wt") / 1000)); + result.put(r.getLong("epoch"), row); } return result; } diff --git a/src/java/org/apache/cassandra/tcm/CMSOperations.java b/src/java/org/apache/cassandra/tcm/CMSOperations.java index b37da9dd94b9..5b21acd1428e 100644 --- a/src/java/org/apache/cassandra/tcm/CMSOperations.java +++ b/src/java/org/apache/cassandra/tcm/CMSOperations.java @@ -31,6 +31,8 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.virtual.ClusterMetadataDirectoryTable; +import org.apache.cassandra.db.virtual.ClusterMetadataLogTable; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; @@ -261,4 +263,29 @@ public void unregisterLeftNodes(List nodeIdStrings) cms.commit(new Unregister(nodeId, EnumSet.of(NodeState.LEFT), ClusterMetadataService.instance().placementProvider())); } } + + public Map> dumpDirectory(boolean tokens) + { + Map> directory = ClusterMetadataDirectoryTable.directory(tokens); + return convertToStringValues(directory); + } + + public Map> dumpLog(long startEpoch, long endEpoch) + { + Map> log = ClusterMetadataLogTable.log(startEpoch, endEpoch); + return convertToStringValues(log); + } + + private Map> convertToStringValues(Map> log) + { + Map> res = new LinkedHashMap<>(); + for (Map.Entry> outerEntry : log.entrySet()) + { + Map rowRes = new HashMap<>(); + for (Map.Entry row : outerEntry.getValue().entrySet()) + rowRes.put(row.getKey(), row.getValue().toString()); + res.put(outerEntry.getKey(), rowRes); + } + return res; + } } diff --git a/src/java/org/apache/cassandra/tcm/CMSOperationsMBean.java b/src/java/org/apache/cassandra/tcm/CMSOperationsMBean.java index 1e2d9e147313..7ff0c0191b2a 100644 --- a/src/java/org/apache/cassandra/tcm/CMSOperationsMBean.java +++ b/src/java/org/apache/cassandra/tcm/CMSOperationsMBean.java @@ -46,4 +46,6 @@ public interface CMSOperationsMBean public boolean cancelInProgressSequences(String sequenceOwner, String expectedSequenceKind); public void unregisterLeftNodes(List nodeIds); + public Map> dumpDirectory(boolean includeTokens); + public Map> dumpLog(long startEpoch, long endEpoch); } diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 1cc12f38827a..5b149acd9a0b 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -270,7 +270,9 @@ public int execute(String... args) .withCommand(CMSAdmin.ReconfigureCMS.class) .withCommand(CMSAdmin.Snapshot.class) .withCommand(CMSAdmin.Unregister.class) - .withCommand(CMSAdmin.AbortInitialization.class); + .withCommand(CMSAdmin.AbortInitialization.class) + .withCommand(CMSAdmin.DumpDirectory.class) + .withCommand(CMSAdmin.DumpLog.class); Cli parser = builder.build(); diff --git a/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java index 02cc045545b7..7f54fdd9be0c 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java +++ b/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java @@ -18,14 +18,19 @@ package org.apache.cassandra.tools.nodetool; +import java.io.PrintStream; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; +import com.google.common.collect.ImmutableList; + import io.airlift.airline.Arguments; import io.airlift.airline.Command; import io.airlift.airline.Option; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; @@ -207,4 +212,49 @@ protected void execute(NodeProbe probe) probe.getCMSOperationsProxy().abortInitialization(initiator); } } + + @Command(name = "dumpdirectory", description = "Dump the directory from the current ClusterMetadata") + public static class DumpDirectory extends NodeTool.NodeToolCmd + { + @Option(name = "--tokens", title = "Include tokens", description = "Include tokens in output") + public boolean tokens = false; + @Override + protected void execute(NodeProbe probe) + { + output(probe.output().out, "NodeId", probe.getCMSOperationsProxy().dumpDirectory(tokens)); + } + } + + @Command(name = "dumplog", description = "Dump the metadata log") + public static class DumpLog extends NodeTool.NodeToolCmd + { + @Option(name = "--start", title = "Start epoch") + long startEpoch = Epoch.FIRST.getEpoch(); + @Option(name = "--end", title = "End epoch") + long endEpoch = Long.MAX_VALUE; + @Override + protected void execute(NodeProbe probe) + { + output(probe.output().out, "Epoch", probe.getCMSOperationsProxy().dumpLog(startEpoch, endEpoch)); + } + } + + private static void output(PrintStream out, String title, Map> map) + { + if (map.isEmpty()) + return; + int keywidth = keywidth(map); + for (Long key : ImmutableList.sortedCopyOf(map.keySet())) + { + out.println(title + ": " + key); + for (Map.Entry nodeEntry : map.get(key).entrySet()) + out.printf(" %-" + keywidth + "s%s%n", nodeEntry.getKey(), nodeEntry.getValue()); + } + } + + private static int keywidth(Map> map) + { + assert !map.isEmpty(); + return map.entrySet().iterator().next().getValue().keySet().stream().max(Comparator.comparingInt(String::length)).get().length() + 1; + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataDumpTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataDumpTest.java new file mode 100644 index 000000000000..b0f66ff885a8 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataDumpTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.log; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.transformations.CustomTransformation; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class ClusterMetadataDumpTest extends TestBaseImpl +{ + @Test + public void dumpLogTest() throws IOException + { + try (Cluster cluster = init(builder().withNodes(3) + .start())) + { + cluster.get(1).runOnInstance(() -> { + for (int i = 0; i < 10; i++) + ClusterMetadataService.instance().commit(new CustomTransformation(CustomTransformation.PokeInt.NAME, new CustomTransformation.PokeInt(i))); + }); + + NodeToolResult res = cluster.get(1).nodetoolResult("cms", "dumplog"); + res.asserts().success(); + int unsafeJoinSeen = 0; + int registerSeen = 0; + int epochsSeen = 0; + for (String l : res.getStdout().split("\n")) + { + if (l.contains("kind")) + { + if (l.contains("REGISTER")) + registerSeen++; + else if (l.contains("UNSAFE_JOIN")) + unsafeJoinSeen++; + } + if (l.startsWith("Epoch:")) + epochsSeen++; + } + assertEquals(3, unsafeJoinSeen); + assertEquals(3, registerSeen); + assertTrue(epochsSeen > 15); + + res = cluster.get(1).nodetoolResult("cms", "dumplog", "--start", "10", "--end", "15"); + epochsSeen = 0; + for (String l : res.getStdout().split("\n")) + { + if (l.startsWith("Epoch: ")) + { + epochsSeen++; + long epoch = Long.parseLong(l.split(": ")[1]); + assertTrue(epoch >= 10 && epoch <= 15); + } + } + assertEquals(6, epochsSeen); + } + } + + @Test + public void dumpDirectoryTest() throws IOException + { + try (Cluster cluster = init(builder().withNodes(3) + .start())) + { + NodeToolResult res = cluster.get(1).nodetoolResult("cms", "dumpdirectory"); + res.asserts().success(); + int nodesFound = 0; + for (String l : res.getStdout().split("\n")) + { + if (l.startsWith("NodeId")) + nodesFound++; + assertFalse(l.contains("tokens")); + } + assertEquals(3, nodesFound); + res = cluster.get(1).nodetoolResult("cms", "dumpdirectory", "--tokens"); + res.asserts().success(); + nodesFound = 0; + int tokensFound = 0; + for (String l : res.getStdout().split("\n")) + { + if (l.startsWith("NodeId")) + nodesFound++; + + if (l.contains("tokens")) + tokensFound++; + } + assertEquals(3, nodesFound); + assertEquals(3, tokensFound); + } + } +} From 8404d2fd5cbda4ba5210522ee612ac2fd169278e Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Fri, 4 Apr 2025 08:23:32 +0200 Subject: [PATCH 037/340] Improve performance when getting writePlacementAllSettled from ClusterMetadata in large cluster with many range movements Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-20526 --- CHANGES.txt | 1 + .../apache/cassandra/tcm/ClusterMetadata.java | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index c4676baf0702..b6c8599ab6f1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Improve performance when getting writePlacementsAllSettled from ClusterMetadata (CASSANDRA-20526) * Add nodetool command to dump the contents of the system_views.{cluster_metadata_log, cluster_metadata_directory} tables (CASSANDRA-20525) * Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows in the virtual table (CASSANDRA-20524) * Improve metadata log catch up with inter-DC mutation forwarding (CASSANDRA-20523) diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 64eadc76dddb..607978806e76 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -104,6 +104,7 @@ public class ClusterMetadata private EndpointsForRange fullCMSReplicas; private Set fullCMSEndpoints; private Set fullCMSIds; + private DataPlacements writePlacementAllSettled; public ClusterMetadata(IPartitioner partitioner) { @@ -282,15 +283,19 @@ public Epoch nextEpoch() public DataPlacement writePlacementAllSettled(KeyspaceMetadata ksm) { - ClusterMetadata metadata = this; - Iterator> iter = metadata.inProgressSequences.iterator(); - while (iter.hasNext()) + if (writePlacementAllSettled == null) { - Transformation.Result result = iter.next().applyTo(metadata); - assert result.isSuccess(); - metadata = result.success().metadata; + ClusterMetadata metadata = this; + Iterator> iter = metadata.inProgressSequences.iterator(); + while (iter.hasNext()) + { + Transformation.Result result = iter.next().applyTo(metadata); + assert result.isSuccess(); + metadata = result.success().metadata; + } + writePlacementAllSettled = metadata.placements; } - return metadata.placements.get(ksm.params.replication); + return writePlacementAllSettled.get(ksm.params.replication); } // TODO Remove this as it isn't really an equivalent to the previous concept of pending ranges From d3658f0491a7c01490be49270d468f5602c3baf7 Mon Sep 17 00:00:00 2001 From: Francisco Guerrero Date: Tue, 8 Apr 2025 14:19:56 -0700 Subject: [PATCH 038/340] Fix JMX initialization problem in injvm-dtest framework Patch by Francisco Guerrerro, Doug Rohrer; reviewed by TBD for CASSANDRA-20539 Co-authored-by: Doug Rohrer --- .../cassandra/distributed/impl/Instance.java | 9 +++++++- .../distributed/impl/IsolatedJmx.java | 14 +++++++++++-- .../distributed/test/jmx/JMXTestsUtil.java | 21 ++++++++++++++++++- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index a7cb238119db..06258b6145bf 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -645,9 +645,14 @@ public void startup(ICluster cluster) initialized = true; } - private synchronized void startJmx() + private synchronized void setupMbeanWrapper() { this.isolatedJmx = new IsolatedJmx(this, inInstancelogger); + this.isolatedJmx.setupMBeanWrapper(); + } + + private synchronized void startJmx() + { isolatedJmx.startJmx(); } @@ -707,6 +712,8 @@ protected void partialStartup(ICluster cluster) throws IOException, NoSuchFie assert config.networkTopology().contains(config.broadcastAddress()) : String.format("Network topology %s doesn't contain the address %s", config.networkTopology(), config.broadcastAddress()); DistributedTestInitialLocationProvider.assign(config.networkTopology()); + if (config.has(JMX)) + setupMbeanWrapper(); DatabaseDescriptor.daemonInitialization(); if (config.has(JMX)) startJmx(); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java index 18eaeb85c532..59e9e8ca04de 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java @@ -24,6 +24,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.TimeUnit; import javax.management.remote.JMXConnector; import javax.management.remote.JMXConnectorServer; @@ -76,6 +77,8 @@ public void startJmx() { try { + Objects.requireNonNull(wrapper, "Must call setupMBeanWrapper before use"); + // Several RMI threads hold references to in-jvm dtest objects, and are, by default, kept // alive for long enough (minutes) to keep classloaders from being collected. // Set these two system properties to a low value to allow cleanup to occur fast enough @@ -88,8 +91,6 @@ public void startJmx() int jmxPort = config.jmxPort(); String hostname = addr.getHostAddress(); - wrapper = new MBeanWrapper.InstanceMBeanWrapper(hostname + ":" + jmxPort); - ((MBeanWrapper.DelegatingMbeanWrapper) MBeanWrapper.instance).setDelegate(wrapper); // CASSANDRA-18508: Sensitive JMX SSL configuration options can be easily exposed Map jmxServerOptionsMap = (Map) config.getParams().get("jmx_server_options"); @@ -158,6 +159,15 @@ public void startJmx() } } + public void setupMBeanWrapper() + { + InetAddress addr = config.broadcastAddress().getAddress(); + int jmxPort = config.jmxPort(); + String hostname = addr.getHostAddress(); + wrapper = new MBeanWrapper.InstanceMBeanWrapper(hostname + ':' + jmxPort); + ((MBeanWrapper.DelegatingMbeanWrapper) MBeanWrapper.instance).setDelegate(wrapper); + } + /** * Builds {@code EncryptionOptions} from the map based SSL configuration properties. * diff --git a/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java b/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java index 5726c242d9e7..ad9fd21a4860 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java +++ b/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -66,6 +67,15 @@ public class JMXTestsUtil "org.apache.cassandra.db:type=CIDRGroupsMappingManager:loadCidrGroupsCache", // AllowAllCIDRAuthorizer doesn't support this operation, as feature is disabled by default "org.apache.cassandra.db:type=StorageService:forceRemoveCompletion" // deprecated (TCM) ); + // This set of mbeans are registered early enough during the startup of a + // Cassandra instance for in-jvm dtests to avoid missing registration of mbeans. + // We ignore both "org.apache.cassandra.diag:type=DiagnosticEventService" and + // "org.apache.cassandra.diag:type=LastEventIdBroadcaster" because they are being intialized + // outside the scope of the in-jvm Instance initialization. + private static final Set EXPECTED_MBEANS_TO_BE_REGISTERED = Set.of( + "org.apache.cassandra.db:type=EndpointSnitchInfo", + "org.apache.cassandra.db:type=LocationInfo" + ); /** * Tests JMX getters and operations and allows passing JMX Env used for the client JMX connection. @@ -75,6 +85,7 @@ public class JMXTestsUtil */ public static void testAllValidGetters(Cluster cluster, Map jmxEnv) throws Exception { + Set missingExpectedMbeans = new HashSet<>(EXPECTED_MBEANS_TO_BE_REGISTERED); for (IInvokableInstance instance : cluster) { if (instance.isShutdown()) @@ -91,6 +102,7 @@ public static void testAllValidGetters(Cluster cluster, Map jmxEnv) t { if (!name.getDomain().startsWith("org.apache.cassandra")) continue; + missingExpectedMbeans.remove(name.getCanonicalName()); MBeanInfo info = mbsc.getMBeanInfo(name); for (MBeanAttributeInfo a : info.getAttributes()) { @@ -123,7 +135,7 @@ public static void testAllValidGetters(Cluster cluster, Map jmxEnv) t } } } - if (!errors.isEmpty()) + if (!errors.isEmpty() || !missingExpectedMbeans.isEmpty()) { AssertionError root = new AssertionError(); for (Named error : errors) @@ -132,6 +144,13 @@ public static void testAllValidGetters(Cluster cluster, Map jmxEnv) t logger.error("Error {}", error.getMessage()); root.addSuppressed(error); } + for (String missingMbean : missingExpectedMbeans) + { + // The Named object's message has the cause also so this only logs the message + String errorMessage = String.format("Expected mbean %s was not found", missingMbean); + logger.error(errorMessage); + root.addSuppressed(new RuntimeException(errorMessage)); + } throw root; } } From fcea0b6fd8aa6685e7a7e4be5bcde0ee87efc75f Mon Sep 17 00:00:00 2001 From: Bereng Date: Thu, 10 Apr 2025 13:33:33 +0200 Subject: [PATCH 039/340] CASSANDRA-19633 Replaced node is stuck in a loop calculating ranges --- .../config/CassandraRelevantProperties.java | 9 +- .../apache/cassandra/dht/RangeStreamer.java | 9 +- .../cassandra/dht/BootStrapperTest.java | 86 +++++++++++++++---- 3 files changed, 84 insertions(+), 20 deletions(-) diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 17577688973e..c5286f17fafc 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -259,8 +259,13 @@ public enum CassandraRelevantProperties /** * Number of replicas required to store batchlog for atomicity, only accepts values of 1 or 2. */ - REQUIRED_BATCHLOG_REPLICA_COUNT("cassandra.batchlog.required_replica_count", "2") - ; + REQUIRED_BATCHLOG_REPLICA_COUNT("cassandra.batchlog.required_replica_count", "2"), + + /** + * Do not try to calculate optimal streaming candidates. This can take a lot of time in some configs specially + * with vnodes. + */ + SKIP_OPTIMAL_STREAMING_CANDIDATES_CALCULATION("cassandra.skip_optimal_streaming_candidates_calculation", "false"); CassandraRelevantProperties(String key, String defaultVal) { diff --git a/src/java/org/apache/cassandra/dht/RangeStreamer.java b/src/java/org/apache/cassandra/dht/RangeStreamer.java index dda6863153a3..42bcea199868 100644 --- a/src/java/org/apache/cassandra/dht/RangeStreamer.java +++ b/src/java/org/apache/cassandra/dht/RangeStreamer.java @@ -41,6 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.gms.FailureDetector; @@ -331,8 +332,12 @@ public void addRanges(String keyspaceName, ReplicaCollection replicas) Multimap workMap; //Only use the optimized strategy if we don't care about strict sources, have a replication factor > 1, and no - //transient replicas. - if (useStrictSource || strat == null || strat.getReplicationFactor().allReplicas == 1 || strat.getReplicationFactor().hasTransientReplicas()) + //transient replicas or it is intentionally skipped. + if (CassandraRelevantProperties.SKIP_OPTIMAL_STREAMING_CANDIDATES_CALCULATION.getBoolean() || + useStrictSource || + strat == null || + strat.getReplicationFactor().allReplicas == 1 || + strat.getReplicationFactor().hasTransientReplicas()) { workMap = convertPreferredEndpointsToWorkMap(fetchMap); } diff --git a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java index 05d42cf32c7a..4b2a56a2d2bf 100644 --- a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java +++ b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java @@ -20,6 +20,7 @@ import java.net.UnknownHostException; import java.util.List; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.base.Predicate; import com.google.common.base.Predicates; @@ -28,8 +29,10 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.RangeStreamer.FetchReplica; @@ -42,16 +45,35 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.StreamOperation; +import org.jboss.byteman.contrib.bmunit.BMRule; +import org.jboss.byteman.contrib.bmunit.BMRules; +import org.jboss.byteman.contrib.bmunit.BMUnitRunner; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; - +@RunWith(BMUnitRunner.class) public class BootStrapperTest { static IPartitioner oldPartitioner; - static Predicate originalAlivePredicate = RangeStreamer.ALIVE_PREDICATE; + public static AtomicBoolean nonOptimizationHit = new AtomicBoolean(false); + public static AtomicBoolean optimizationHit = new AtomicBoolean(false); + private static final IFailureDetector mockFailureDetector = new IFailureDetector() + { + public boolean isAlive(InetAddressAndPort ep) + { + return true; + } + + public void interpret(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } + public void report(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } + public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) { throw new UnsupportedOperationException(); } + public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) { throw new UnsupportedOperationException(); } + public void remove(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } + public void forceConviction(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } + }; + @BeforeClass public static void setup() throws ConfigurationException { @@ -83,6 +105,52 @@ public void testSourceTargetComputation() throws UnknownHostException } } + @Test + @BMRules(rules = { @BMRule(name = "Make sure the non-optimized path is picked up for some operations", + targetClass = "org.apache.cassandra.dht.RangeStreamer", + targetMethod = "convertPreferredEndpointsToWorkMap(EndpointsByReplica)", + action = "org.apache.cassandra.dht.BootStrapperTest.nonOptimizationHit.set(true)"), + @BMRule(name = "Make sure the optimized path is picked up for some operations", + targetClass = "org.apache.cassandra.dht.RangeStreamer", + targetMethod = "getOptimizedWorkMap(EndpointsByReplica,Collection,String)", + action = "org.apache.cassandra.dht.BootStrapperTest.optimizationHit.set(true)") }) + public void testStreamingCandidatesOptmizationSkip() throws UnknownHostException + { + testSkipStreamingCandidatesOptmizationFeatureFlag(true, true, false); + testSkipStreamingCandidatesOptmizationFeatureFlag(false, true, true); + } + + private void testSkipStreamingCandidatesOptmizationFeatureFlag(boolean disableOptimization, boolean nonOptimizedPathHit, boolean optimizedPathHit) throws UnknownHostException + { + try + { + nonOptimizationHit.set(false); + optimizationHit.set(false); + CassandraRelevantProperties.SKIP_OPTIMAL_STREAMING_CANDIDATES_CALCULATION.setBoolean(disableOptimization); + + for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces()) + { + StorageService ss = StorageService.instance; + TokenMetadata tmd = ss.getTokenMetadata(); + + generateFakeEndpoints(10); + Token myToken = tmd.partitioner.getRandomToken(); + InetAddressAndPort myEndpoint = InetAddressAndPort.getByName("127.0.0.1"); + + assertEquals(10, tmd.sortedTokens().size()); + RangeStreamer s = new RangeStreamer(tmd, null, myEndpoint, StreamOperation.BOOTSTRAP, true, DatabaseDescriptor.getEndpointSnitch(), new StreamStateStore(), mockFailureDetector, false, 1); + s.addRanges(keyspaceName, Keyspace.open(keyspaceName).getReplicationStrategy().getPendingAddressRanges(tmd, myToken, myEndpoint)); + } + + assertEquals(nonOptimizedPathHit, nonOptimizationHit.get()); + assertEquals(optimizedPathHit, optimizationHit.get()); + } + finally + { + CassandraRelevantProperties.SKIP_OPTIMAL_STREAMING_CANDIDATES_CALCULATION.reset(); + } + } + private RangeStreamer testSourceTargetComputation(String keyspaceName, int numOldNodes, int replicationFactor) throws UnknownHostException { StorageService ss = StorageService.instance; @@ -93,20 +161,6 @@ private RangeStreamer testSourceTargetComputation(String keyspaceName, int numOl InetAddressAndPort myEndpoint = InetAddressAndPort.getByName("127.0.0.1"); assertEquals(numOldNodes, tmd.sortedTokens().size()); - IFailureDetector mockFailureDetector = new IFailureDetector() - { - public boolean isAlive(InetAddressAndPort ep) - { - return true; - } - - public void interpret(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } - public void report(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } - public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) { throw new UnsupportedOperationException(); } - public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) { throw new UnsupportedOperationException(); } - public void remove(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } - public void forceConviction(InetAddressAndPort ep) { throw new UnsupportedOperationException(); } - }; RangeStreamer s = new RangeStreamer(tmd, null, myEndpoint, StreamOperation.BOOTSTRAP, true, DatabaseDescriptor.getEndpointSnitch(), new StreamStateStore(), mockFailureDetector, false, 1); assertNotNull(Keyspace.open(keyspaceName)); s.addRanges(keyspaceName, Keyspace.open(keyspaceName).getReplicationStrategy().getPendingAddressRanges(tmd, myToken, myEndpoint)); From 6b2cdba56b85b948a8716a02b2cd3015e8d1ff9a Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 15 Apr 2025 10:24:08 -0700 Subject: [PATCH 040/340] AST fuzz tests can be flakey in multi node clusters due to ephemeral read errors caused by a race condition issue with SAIUtils test class patch by David Capwell; reviewed by Bernardo Botella Corbi, Caleb Rackliffe for CASSANDRA-20550 --- .../test/cql3/MultiNodeTableWalkBase.java | 21 ------------- .../test/cql3/StatefulASTBase.java | 30 ++++++++++++++++++- .../distributed/test/sai/SAIUtil.java | 22 +++++++------- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java index da4ac26cbc11..d6c01834737f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java @@ -92,27 +92,6 @@ public MultiNodeState(RandomSource rs, Cluster cluster) super(rs, cluster); } - @Override - public boolean allowNonPartitionQuery() - { - // This is disabled to make CI stable. There are known issues that are being fixed so have to exclude for now - return false; - } - - @Override - public boolean allowNonPartitionMultiColumnQuery() - { - // This is disabled to make CI stable. There are known issues that are being fixed so have to exclude for now - return false; - } - - @Override - public boolean allowPartitionQuery() - { - // This is disabled to make CI stable. There are known issues that are being fixed so have to exclude for now - return false; - } - @Override protected boolean isMultiNode() { diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index 83548ba83475..3a23e1bfcc0a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -34,6 +34,7 @@ import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; import org.slf4j.Logger; import accord.utils.Gen; @@ -46,6 +47,8 @@ import com.datastax.driver.core.Session; import com.datastax.driver.core.SimpleStatement; import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.exceptions.ReadFailureException; +import com.datastax.driver.core.exceptions.WriteFailureException; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.KnownIssue; @@ -73,6 +76,7 @@ import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.test.JavaDriverUtils; import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.harry.model.ASTSingleTableModel; import org.apache.cassandra.harry.util.StringUtils; import org.apache.cassandra.schema.TableMetadata; @@ -496,11 +500,35 @@ protected ByteBuffer[][] executeQuery(IInstance instance, int fetchSize, Consist .findAny() .get(); ss.setHost(host); - ResultSet result = session.execute(ss); + ResultSet result; + try + { + result = session.execute(ss); + } + catch (ReadFailureException t) + { + throw new AssertionError("failed from=" + Maps.transformValues(t.getFailuresMap(), BaseState::safeErrorCode), t); + } + catch (WriteFailureException t) + { + throw new AssertionError("failed from=" + Maps.transformValues(t.getFailuresMap(), BaseState::safeErrorCode), t); + } return getRowsAsByteBuffer(result); } } + private static String safeErrorCode(Integer code) + { + try + { + return RequestFailureReason.fromCode(code).name(); + } + catch (IllegalArgumentException e) + { + return "Unexpected code " + code + ": " + e.getMessage(); + } + } + @VisibleForTesting static ByteBuffer[][] getRowsAsByteBuffer(ResultSet result) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/sai/SAIUtil.java b/test/distributed/org/apache/cassandra/distributed/test/sai/SAIUtil.java index 36c6e8445d7a..c00e5699ba8c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/sai/SAIUtil.java +++ b/test/distributed/org/apache/cassandra/distributed/test/sai/SAIUtil.java @@ -29,7 +29,6 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.IInstance; -import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexStatusManager; @@ -86,23 +85,26 @@ public static void assertIndexQueryable(Cluster cluster, String keyspace, String */ private static void assertIndexesQueryable(Cluster cluster, String keyspace, final Iterable indexes) { - IInvokableInstance localNode = cluster.get(1); final List nodes = cluster.stream() .map(node -> nodeAddress(node.broadcastAddress())) .collect(Collectors.toList()); - localNode.runOnInstance(() -> { - for (String index : indexes) - { - for (InetAddressAndPort node : nodes) + for (var localNode : cluster) + { + if (localNode.isShutdown()) continue; + localNode.runOnInstance(() -> { + for (String index : indexes) { - Index.Status status = IndexStatusManager.instance.getIndexStatus(node, keyspace, index); - assert status == Index.Status.BUILD_SUCCEEDED + for (InetAddressAndPort node : nodes) + { + Index.Status status = IndexStatusManager.instance.getIndexStatus(node, keyspace, index); + assert status == Index.Status.BUILD_SUCCEEDED : "Index " + index + " not queryable on node " + node + " (status = " + status + ')'; + } } - } - }); + }); + } } private static InetAddressAndPort nodeAddress(InetSocketAddress address) From 78290bed4518d403407398331673bf35a37c5011 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Tue, 15 Apr 2025 10:11:45 -0500 Subject: [PATCH 041/340] Avoid computing prepared statement size for unprepared batches patch by Caleb Rackliffe; reviewed by Berenguer Blasi and Marcus Eriksson for CASSANDRA-20556 --- CHANGES.txt | 1 + src/java/org/apache/cassandra/cql3/QueryProcessor.java | 10 +++++++++- .../cassandra/transport/messages/BatchMessage.java | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 25aea7b20ab7..67672cd0c51f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) * Suppress CVE-2025-25193 (CASSANDRA-20504) diff --git a/src/java/org/apache/cassandra/cql3/QueryProcessor.java b/src/java/org/apache/cassandra/cql3/QueryProcessor.java index 910f8c79f8e5..c1045548b8f5 100644 --- a/src/java/org/apache/cassandra/cql3/QueryProcessor.java +++ b/src/java/org/apache/cassandra/cql3/QueryProcessor.java @@ -392,6 +392,11 @@ public static Prepared prepareInternal(String query) throws RequestValidationExc } public static Prepared parseAndPrepare(String query, ClientState clientState, boolean isInternal) throws RequestValidationException + { + return parseAndPrepare(query, clientState, isInternal, true); + } + + public static Prepared parseAndPrepare(String query, ClientState clientState, boolean isInternal, boolean measure) throws RequestValidationException { CQLStatement.Raw raw = parseStatement(query); @@ -416,7 +421,10 @@ public static Prepared parseAndPrepare(String query, ClientState clientState, bo res = new Prepared(statement, "", fullyQualified, keyspace); else res = new Prepared(statement, query, fullyQualified, keyspace); - res.pstmntSize = measurePstmnt(res); + + // Some prepared statements will not be cached and therefore do not require a pre-computed size. + if (measure) + res.pstmntSize = measurePstmnt(res); return res; } diff --git a/src/java/org/apache/cassandra/transport/messages/BatchMessage.java b/src/java/org/apache/cassandra/transport/messages/BatchMessage.java index afc308aee411..071741509e5e 100644 --- a/src/java/org/apache/cassandra/transport/messages/BatchMessage.java +++ b/src/java/org/apache/cassandra/transport/messages/BatchMessage.java @@ -184,7 +184,7 @@ protected Message.Response execute(QueryState state, long queryStartNanoTime, bo { p = QueryProcessor.parseAndPrepare((String) query, state.getClientState().cloneWithKeyspaceIfSet(options.getKeyspace()), - false); + false, false); } else { From 4436b84a5ae090c79e9dee687d7c5c9074f7eae4 Mon Sep 17 00:00:00 2001 From: Sunil Ramchandra Pawar Date: Wed, 16 Apr 2025 13:35:41 -0700 Subject: [PATCH 042/340] SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map patch by Sunil Ramchandra Pawar; reviewed by Caleb Rackliffe, David Capwell for CASSANDRA-19891 --- CHANGES.txt | 1 + .../index/sai/utils/IndexTermType.java | 3 +- .../index/sai/cql/ComplexQueryTest.java | 44 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index ef6b848ff2e6..1332682353a7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map (CASSANDRA-19891) * Avoid purging deletions in RowFilter when reconciliation is required (CASSANDRA-20541) * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java index 7fa226e9582d..f3c7e2c05f96 100644 --- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java +++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java @@ -144,7 +144,8 @@ private IndexTermType(ColumnMetadata columnMetadata, List partit AbstractType baseType = indexType.unwrap(); - if (baseType.subTypes().isEmpty()) + // We only need to inspect subtypes when it is possible for them to be queried individually. + if (baseType.subTypes().isEmpty() || indexTargetType == IndexTarget.Type.SIMPLE || indexTargetType == IndexTarget.Type.FULL) { this.subTypes = Collections.emptyList(); } diff --git a/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java b/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java index 0b4a053d1232..2aa334c29d6f 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java @@ -65,4 +65,48 @@ public void splitRowsWithBooleanLogic() var result = execute("SELECT pk FROM %s WHERE str_val = 'A' AND val = 'A'"); assertRows(result, row(3)); } + + @Test + public void compositeTypeWithMapInsideQuery() + { + createTable(KEYSPACE, "CREATE TABLE %s (" + + "pk1 frozenLongType,I=>ByteType,6=>LexicalUUIDType)'>>," + + "pk2 frozen>>>," + + "ck1 frozen>>>," + + "ck2 tinyint," + + "r1 frozenDecimalType,y=>TimestampType,f=>BooleanType)'>> static," + + "r2 'DynamicCompositeType(P=>ShortType)'," + + "r3 'CompositeType(FrozenType(ListType(DoubleType)),FrozenType(MapType(LongType,DoubleType)),DoubleType)'," + + "r4 frozen>>>," + + "r5 'CompositeType(CompositeType(ShortType,SimpleDateType,BooleanType),CompositeType(FloatType),MapType(ByteType,TimeType))'," + + "r6 set," + + "PRIMARY KEY ((pk1, pk2), ck1, ck2))"); + + + + createIndex("CREATE INDEX ON %s (FULL(ck1)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (FULL(pk1)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (FULL(r4)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (r2) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (r3) USING 'SAI'"); + + + UntypedResultSet withMultipleColumns = execute("SELECT pk1 FROM " + + "%s " + + "WHERE r5 = 0x0010000230bd00000457f0bd31000001000000000700049f647252000000260000000200000001f300000008000001c4e14bba4b00000001260000000800003f2b300d385d00" + + " AND r3 = 0x001c00000002000000083380d171eace676900000008e153bb97fdd5c22e00006d000000030000000897c5493857999fc000000013f08cc4fad0f04d0de51cff28d4ae743d2da1c40000000857108e8c372c868400000013f0cc6bca55f0ee240b27ff12c77a7b7dc3c665000000086c07d25fcdd3403500000013f0745922bdf0ac44c9b5ffd80f025ded9a211d000008200547f5da7a43aa00" + + " AND r2 = 0x8050000255e200 " + + " AND pk2 = ((-1.2651989E-23))" + + " ALLOW FILTERING;"); + + assertRowCount(withMultipleColumns, 0); + + UntypedResultSet withoutSAI = execute("SELECT pk1 FROM " + + "%s " + + " WHERE r5 = 0x001c00000002000000083380d171eace676900000008e153bb97fdd5c22e00006d000000030000000897c5493857999fc000000013f08cc4fad0f04d0de51cff28d4ae743d2da1c40000000857108e8c372c868400000013f0cc6bca55f0ee240b27ff12c77a7b7dc3c665000000086c07d25fcdd3403500000013f0745922bdf0ac44c9b5ffd80f025ded9a211d000008200547f5da7a43aa00" + + " ALLOW FILTERING;"); + + + assertRowCount(withoutSAI, 0); + } } From 09c8fa10301e4377353b24feec00057d091f9939 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 21 Oct 2022 13:10:08 -0700 Subject: [PATCH 043/340] CEP-15 (C*): Messaging and storage engine integration patch by Blake Eggleston; reviewed by Benedict Elliott Smith, David Capwell for CASSANDRA-17103 --- .build/build-accord.xml | 37 + .build/build-checkstyle.xml | 4 +- .build/build-rat.xml | 1 + .build/build-resolver.xml | 4 +- .build/cassandra-build-deps-template.xml | 5 + .build/cassandra-deps-template.xml | 4 + .../post-checkout/100-update-submodules.sh | 41 + .build/git/git-hooks/post-switch | 1 + .../100-verify-submodules-pushed.sh | 98 + .../git-hooks/pre-push/100-push-submodules.sh | 51 + .build/git/install-git-defaults.sh | 117 + .build/parent-pom-template.xml | 36 + .build/sh/bump-accord.sh | 38 + .build/sh/change-submodule-accord.sh | 25 + .build/sh/change-submodule.sh | 52 + .build/sh/development-switch.sh | 117 + .gitmodules | 4 + CHANGES.txt | 1 + CONTRIBUTING.md | 31 + accord_demo.txt | 19 + build.xml | 28 +- conf/cassandra.yaml | 3 + ide/idea-iml-file.xml | 20 + ide/idea/vcs.xml | 3 +- ide/idea/workspace.xml | 20 +- modules/accord | 1 + pylib/cqlshlib/cqlhandling.py | 8 +- simulator.sh | 88 + src/antlr/Cql.g | 1 + src/antlr/Lexer.g | 5 + src/antlr/Parser.g | 249 +- .../audit/AuditLogEntryCategory.java | 2 +- .../cassandra/audit/AuditLogEntryType.java | 1 + .../cassandra/audit/AuditLogFilter.java | 2 +- .../concurrent/SingleThreadExecutorPlus.java | 1 + .../apache/cassandra/concurrent/Stage.java | 2 +- .../config/CassandraRelevantProperties.java | 3 + .../org/apache/cassandra/config/Config.java | 36 +- .../cassandra/config/DatabaseDescriptor.java | 54 + .../apache/cassandra/cql3/CQLStatement.java | 10 + .../org/apache/cassandra/cql3/Operation.java | 32 +- .../org/apache/cassandra/cql3/Operations.java | 62 +- .../org/apache/cassandra/cql3/Operator.java | 66 +- .../apache/cassandra/cql3/QueryProcessor.java | 8 +- .../org/apache/cassandra/cql3/ResultSet.java | 5 +- .../cassandra/cql3/UpdateParameters.java | 30 +- .../cql3/conditions/ColumnCondition.java | 204 +- .../restrictions/StatementRestrictions.java | 20 +- .../cassandra/cql3/selection/Selectable.java | 8 +- .../cassandra/cql3/selection/Selection.java | 108 +- .../cassandra/cql3/selection/Selector.java | 2 +- .../cql3/statements/BatchStatement.java | 15 +- .../cql3/statements/CQL3CasRequest.java | 129 +- .../cql3/statements/DeleteStatement.java | 2 +- .../statements/ModificationStatement.java | 130 +- .../cql3/statements/QualifiedStatement.java | 46 + .../cql3/statements/SelectStatement.java | 139 +- .../cql3/statements/TransactionStatement.java | 563 ++++ .../cql3/statements/UpdateStatement.java | 90 +- .../cassandra/cql3/terms/Constants.java | 26 +- .../apache/cassandra/cql3/terms/Lists.java | 2 +- .../org/apache/cassandra/cql3/terms/Maps.java | 2 +- .../cassandra/cql3/terms/UserTypes.java | 2 +- .../cql3/transactions/ConditionStatement.java | 148 + .../cql3/transactions/ReferenceOperation.java | 178 ++ .../cql3/transactions/ReferenceValue.java | 155 + .../cql3/transactions/RowDataReference.java | 405 +++ .../transactions/SelectReferenceSource.java | 52 + .../org/apache/cassandra/db/DeletionTime.java | 2 +- .../cassandra/db/MutableDeletionInfo.java | 10 + .../cassandra/db/PartitionPosition.java | 2 +- .../cassandra/db/RangeTombstoneList.java | 9 + .../db/SinglePartitionReadCommand.java | 22 +- .../apache/cassandra/db/SystemKeyspace.java | 11 +- .../org/apache/cassandra/db/WriteType.java | 7 +- .../db/filter/ClusteringIndexNamesFilter.java | 14 - .../cassandra/db/filter/ColumnFilter.java | 2 +- .../cassandra/db/marshal/AbstractType.java | 2 +- .../db/marshal/ByteArrayAccessor.java | 2 + .../apache/cassandra/db/marshal/ListType.java | 6 +- .../apache/cassandra/db/marshal/MapType.java | 4 +- .../apache/cassandra/db/marshal/SetType.java | 10 +- .../apache/cassandra/db/marshal/UserType.java | 19 +- .../db/partitions/AbstractBTreePartition.java | 4 +- .../db/partitions/FilteredPartition.java | 6 + .../cassandra/db/partitions/Partition.java | 1 + .../db/partitions/PartitionUpdate.java | 29 +- .../cassandra/db/rows/AbstractCell.java | 7 + .../apache/cassandra/db/rows/BTreeRow.java | 14 +- .../apache/cassandra/db/rows/ColumnData.java | 1 + .../cassandra/db/rows/ComplexColumnData.java | 7 + .../org/apache/cassandra/db/rows/Row.java | 2 + .../db/virtual/AccordVirtualTables.java | 84 + .../db/virtual/SystemViewsKeyspace.java | 1 + .../cassandra/dht/AccordBytesSplitter.java | 89 + .../apache/cassandra/dht/AccordSplitter.java | 103 + .../cassandra/dht/ByteOrderedPartitioner.java | 61 +- .../cassandra/dht/ComparableObjectToken.java | 7 + .../apache/cassandra/dht/IPartitioner.java | 8 + .../cassandra/dht/LocalPartitioner.java | 14 + .../cassandra/dht/Murmur3Partitioner.java | 45 + .../dht/OrderPreservingPartitioner.java | 71 +- .../cassandra/dht/RandomPartitioner.java | 36 + .../dht/ReversedLongLocalPartitioner.java | 21 + .../org/apache/cassandra/dht/Splitter.java | 8 +- src/java/org/apache/cassandra/dht/Token.java | 48 + .../io/LocalVersionedSerializer.java | 94 + .../cassandra/io/MessageVersionProvider.java | 24 + .../io/sstable/CQLSSTableWriter.java | 10 +- .../cassandra/io/util/DataOutputPlus.java | 1 - .../metrics/AccordClientRequestMetrics.java | 46 + .../org/apache/cassandra/metrics/Sampler.java | 1 + .../apache/cassandra/net/ForwardingInfo.java | 1 + .../org/apache/cassandra/net/Message.java | 3 +- .../cassandra/net/MessagingService.java | 13 + src/java/org/apache/cassandra/net/Verb.java | 142 +- .../cassandra/schema/ColumnMetadata.java | 39 +- .../org/apache/cassandra/schema/Schema.java | 14 +- .../cassandra/schema/SchemaConstants.java | 3 +- .../cassandra/schema/SchemaKeyspace.java | 2 +- .../cassandra/schema/SchemaProvider.java | 18 + .../org/apache/cassandra/schema/TableId.java | 17 +- .../cassandra/schema/TableMetadata.java | 6 +- .../cassandra/serializers/ListSerializer.java | 30 +- .../cassandra/serializers/SetSerializer.java | 2 +- .../apache/cassandra/service/CASRequest.java | 13 +- .../cassandra/service/StorageProxy.java | 53 +- .../cassandra/service/StorageProxyMBean.java | 3 +- .../cassandra/service/StorageService.java | 11 + .../service/StorageServiceMBean.java | 3 + .../service/accord/AccordCallback.java | 69 + .../service/accord/AccordCommand.java | 824 ++++++ .../service/accord/AccordCommandStore.java | 490 ++++ .../service/accord/AccordCommandStores.java | 73 + .../service/accord/AccordCommandsForKey.java | 433 +++ .../accord/AccordConfigurationService.java | 94 + .../service/accord/AccordKeyspace.java | 807 ++++++ .../service/accord/AccordMessageSink.java | 127 + .../service/accord/AccordObjectSizes.java | 228 ++ .../service/accord/AccordPartialCommand.java | 209 ++ .../accord/AccordSerializerVersion.java | 114 + .../service/accord/AccordSerializers.java | 249 ++ .../service/accord/AccordService.java | 322 +++ .../cassandra/service/accord/AccordState.java | 105 + .../service/accord/AccordStateCache.java | 647 +++++ .../service/accord/AccordTopologyUtils.java | 100 + .../service/accord/AccordVerbHandler.java | 48 + .../service/accord/EndpointMapping.java | 136 + .../service/accord/IAccordService.java | 46 + .../service/accord/ListenerProxy.java | 275 ++ .../cassandra/service/accord/ReadFuture.java | 304 ++ .../cassandra/service/accord/TokenRange.java | 82 + .../service/accord/api/AccordAgent.java | 63 + .../service/accord/api/AccordRoutableKey.java | 82 + .../service/accord/api/AccordRoutingKey.java | 334 +++ .../service/accord/api/AccordScheduler.java | 97 + .../service/accord/api/PartitionKey.java | 181 ++ .../service/accord/async/AsyncContext.java | 116 + .../service/accord/async/AsyncLoader.java | 240 ++ .../service/accord/async/AsyncOperation.java | 270 ++ .../service/accord/async/AsyncWriter.java | 334 +++ .../accord/serializers/AcceptSerializers.java | 162 ++ .../accord/serializers/ApplySerializers.java | 89 + .../BeginInvalidationSerializers.java | 103 + .../serializers/CheckStatusSerializers.java | 185 ++ .../serializers/CommandSerializers.java | 200 ++ .../accord/serializers/CommitSerializers.java | 103 + .../accord/serializers/DepsSerializer.java | 200 ++ .../accord/serializers/EnumSerializer.java | 58 + .../serializers/GetDepsSerializers.java | 84 + .../serializers/InformDurableSerializers.java | 58 + .../InformHomeDurableSerializers.java | 68 + .../serializers/InformOfTxnIdSerializers.java | 53 + .../accord/serializers/KeySerializers.java | 401 +++ .../serializers/PreacceptSerializers.java | 115 + .../serializers/ReadDataSerializers.java | 107 + .../serializers/RecoverySerializers.java | 168 ++ .../serializers/TopologySerializers.java | 72 + .../serializers/TxnRequestSerializer.java | 105 + .../serializers/WaitOnCommitSerializer.java | 77 + .../accord/store/AbstractStoredField.java | 152 + .../service/accord/store/StoredBoolean.java | 85 + .../service/accord/store/StoredLong.java | 86 + .../accord/store/StoredNavigableMap.java | 224 ++ .../service/accord/store/StoredSet.java | 249 ++ .../service/accord/store/StoredValue.java | 128 + .../service/accord/txn/AbstractKeySorted.java | 155 + .../accord/txn/AbstractSerialized.java | 83 + .../accord/txn/AccordUpdateParameters.java | 83 + .../service/accord/txn/TxnCondition.java | 584 ++++ .../cassandra/service/accord/txn/TxnData.java | 199 ++ .../service/accord/txn/TxnDataName.java | 257 ++ .../service/accord/txn/TxnNamedRead.java | 173 ++ .../service/accord/txn/TxnQuery.java | 137 + .../cassandra/service/accord/txn/TxnRead.java | 217 ++ .../service/accord/txn/TxnReference.java | 326 +++ .../accord/txn/TxnReferenceOperation.java | 302 ++ .../accord/txn/TxnReferenceOperations.java | 132 + .../service/accord/txn/TxnReferenceValue.java | 215 ++ .../service/accord/txn/TxnUpdate.java | 304 ++ .../service/accord/txn/TxnWrite.java | 395 +++ .../apache/cassandra/service/paxos/Paxos.java | 17 +- .../cassandra/service/paxos/PaxosPrepare.java | 13 +- .../service/paxos/PaxosPrepareRefresh.java | 8 +- .../cassandra/service/paxos/PaxosRepair.java | 33 +- .../cassandra/service/paxos/PaxosState.java | 27 +- .../org/apache/cassandra/tools/NodeTool.java | 1 + .../tools/nodetool/CreateEpochUnsafe.java | 33 + .../cassandra/utils/ArraySerializers.java | 55 + .../cassandra/utils/ByteBufferUtil.java | 61 +- ...alizer.java => CollectionSerializers.java} | 73 +- src/java/org/apache/cassandra/utils/Hex.java | 13 +- .../cassandra/utils/NullableSerializer.java | 14 +- .../cassandra/utils/btree/BTreeSet.java | 11 + .../utils/concurrent/FutureCombiner.java | 1 + .../cassandra/utils/concurrent/Semaphore.java | 1 + .../utils/logging/ClassNameFilter.java | 47 + .../cassandra/cql3/reserved_keywords.txt | 5 + test/conf/cassandra.yaml | 1 + test/conf/logback-dtest.xml | 5 + test/conf/logback-simulator.xml | 24 +- .../apache/cassandra/distributed/api/Row.java | 160 ++ .../distributed/api/SimpleQueryResult.java | 13 +- .../distributed/impl/Coordinator.java | 5 +- .../cassandra/distributed/impl/Instance.java | 3 + .../distributed/impl/InstanceConfig.java | 4 +- .../distributed/impl/MessageImpl.java | 1 + .../cassandra/distributed/impl/Query.java | 14 +- .../distributed/impl/UnsafeGossipHelper.java | 1 + .../shared/VersionedApplicationState.java | 1 + .../test/accord/AccordCQLTest.java | 2521 +++++++++++++++++ .../test/accord/AccordFeatureFlagTest.java | 103 + .../test/accord/AccordIntegrationTest.java | 117 + .../test/accord/AccordTestBase.java | 308 ++ .../test/accord/AccordTopologyTest.java | 42 + .../test/cql3/CasMultiNodeTableWalkBase.java | 4 +- .../test/cql3/MultiNodeTableWalkBase.java | 2 +- .../MultiNodeTableWalkWithReadRepairTest.java | 2 +- ...ltiNodeTableWalkWithoutReadRepairTest.java | 2 +- .../test/cql3/MultiNodeTokenConflictTest.java | 4 +- .../cql3/PaxosV1MultiNodeTableWalkTest.java | 2 +- .../cql3/PaxosV2MultiNodeTableWalkTest.java | 2 +- .../test/cql3/SingleNodeTableWalkTest.java | 12 +- .../cql3/SingleNodeTokenConflictTest.java | 12 +- .../test/cql3/StatefulASTBase.java | 10 +- .../distributed/test/jmx/JMXTestsUtil.java | 3 +- .../distributed/util/QueryResultUtil.java | 72 +- .../fuzz/snapshots/SnapshotsTest.java | 8 +- .../fuzz/topology/HarryTopologyMixupTest.java | 12 +- .../fuzz/topology/TopologyMixupTestBase.java | 31 +- .../harry/execution/DataTracker.java | 7 +- .../cassandra/harry/gen/Generators.java | 4 +- .../harry/gen/InvertibleGenerator.java | 4 +- .../harry/model/ASTSingleTableModel.java | 2 +- .../harry/test/SimpleBijectionTest.java | 2 +- .../simulator/asm/InterceptClasses.java | 4 +- .../cassandra/simulator/ActionList.java | 4 + .../cassandra/simulator/ActionSchedule.java | 12 +- .../simulator/ClusterSimulation.java | 3 + .../org/apache/cassandra/simulator/Debug.java | 32 +- .../cassandra/simulator/SimulationRunner.java | 21 +- .../cassandra/simulator/SimulatorUtils.java | 7 +- .../simulator/cluster/KeyspaceActions.java | 12 +- .../simulator/logging/RunStartDefiner.java | 37 + .../simulator/logging/SeedDefiner.java | 42 + ...bstractPairOfSequencesPaxosSimulation.java | 299 ++ .../paxos/AccordClusterSimulation.java | 87 + .../paxos/AccordSimulationRunner.java | 78 + .../simulator/paxos/HistoryChecker.java | 32 +- .../simulator/paxos/HistoryValidator.java | 52 + .../paxos/LinearizabilityValidator.java | 83 + .../paxos/LoggingHistoryValidator.java | 73 + .../simulator/paxos/Observation.java | 16 +- .../PairOfSequencesAccordSimulation.java | 304 ++ .../paxos/PairOfSequencesPaxosSimulation.java | 226 +- .../simulator/paxos/PaxosSimulation.java | 42 +- .../paxos/StrictSerializabilityValidator.java | 111 + .../systems/InterceptingGlobalMethods.java | 6 +- .../systems/InterceptingMonitors.java | 16 +- .../systems/InterceptorOfGlobalMethods.java | 16 +- .../simulator/systems/SimulatedAction.java | 3 +- .../simulator/systems/SimulatedQuery.java | 5 +- .../simulator/paxos/HistoryValidatorTest.java | 593 ++++ .../simulator/test/HarrySimulatorTest.java | 7 +- .../test/ShortAccordSimulationTest.java | 34 + test/unit/accord/utils/random/Picker.java | 2 +- .../{utils => utilsfork}/DefaultRandom.java | 2 +- .../unit/accord/{utils => utilsfork}/Gen.java | 32 +- .../accord/{utils => utilsfork}/Gens.java | 26 +- .../{utils => utilsfork}/Invariants.java | 2 +- .../accord/{utils => utilsfork}/Property.java | 93 +- .../{utils => utilsfork}/RandomSource.java | 8 +- .../{utils => utilsfork}/SeedProvider.java | 2 +- .../WrappedRandomSource.java | 4 +- .../async/TimeoutUtils.java | 2 +- test/unit/org/apache/cassandra/Util.java | 8 + .../cassandra/audit/AuditLoggerTest.java | 24 +- .../auth/AllowAllCIDRAuthorizerTest.java | 12 +- .../apache/cassandra/auth/AuthTestUtils.java | 24 + .../auth/CIDRGroupsMappingManagerTest.java | 14 +- ...assandraCIDRAuthorizerEnforceModeTest.java | 12 +- ...assandraCIDRAuthorizerMonitorModeTest.java | 14 +- .../auth/CassandraNetworkAuthorizerTest.java | 12 +- .../apache/cassandra/auth/TxnAuthTest.java | 174 ++ .../concurrent/SimulatedExecutorFactory.java | 4 +- .../config/DatabaseDescriptorRefTest.java | 1 + .../config/DatabaseDescriptorTest.java | 2 + ...WithColumnCqlConstraintValidationTest.java | 2 +- ...mnOctetLengthConstraintValidationTest.java | 2 +- .../org/apache/cassandra/cql3/CQLTester.java | 36 +- .../cql3/NodeLocalConsistencyTest.java | 23 +- .../cql3/PreparedStatementsTest.java | 257 +- .../cassandra/cql3/ast/ExpressionTest.java | 8 +- .../cql3/conditions/ColumnConditionTest.java | 225 ++ .../statements/DescribeStatementTest.java | 2 + .../statements/TransactionStatementTest.java | 372 +++ .../cql3/statements/TxnDataNameTest.java | 69 + .../cassandra/cql3/terms/ListsTest.java | 1 - .../validation/operations/InsertTest.java | 2 +- .../CIDRFilteringMetricsTableTest.java | 15 +- .../dht/ByteOrderedPartitionerTest.java | 16 + .../cassandra/dht/KeyCollisionTest.java | 6 + .../cassandra/dht/LengthPartitioner.java | 44 +- .../dht/OrderPreservingPartitionerTest.java | 18 +- .../cassandra/dht/PartitionerTestCase.java | 114 + .../cassandra/gms/VersionedValueTest.java | 6 +- .../org/apache/cassandra/index/StubIndex.java | 8 +- .../sai/cql/AbstractSimpleEqTestBase.java | 6 +- .../index/sai/cql/AllTypesSimpleEqTest.java | 4 +- .../io/util/CompressedChunkReaderTest.java | 6 +- .../cassandra/net/MessageDeliveryTest.java | 4 +- .../net/SimulatedMessageDelivery.java | 4 +- .../ConcurrentIrWithPreviewFuzzTest.java | 6 +- .../cassandra/repair/FailedAckTest.java | 6 +- .../repair/FailingRepairFuzzTest.java | 6 +- .../apache/cassandra/repair/FuzzTestBase.java | 8 +- .../cassandra/repair/HappyPathFuzzTest.java | 6 +- .../cassandra/repair/SlowMessageFuzzTest.java | 6 +- .../cassandra/schema/MemtableParamsTest.java | 4 +- .../cassandra/schema/ValidationTest.java | 50 +- .../serializers/SerializationUtils.java | 5 - .../accord/AccordCommandStoreTest.java | 186 ++ .../service/accord/AccordCommandTest.java | 193 ++ .../service/accord/AccordStateCacheTest.java | 494 ++++ .../service/accord/AccordTestUtils.java | 273 ++ .../service/accord/AccordTopologyTest.java | 71 + .../service/accord/EndpointMappingTest.java | 42 + .../service/accord/api/AccordKeyTest.java | 133 + .../service/accord/async/AsyncLoaderTest.java | 326 +++ .../accord/async/AsyncOperationTest.java | 225 ++ .../service/accord/async/AsyncWriterTest.java | 241 ++ .../serializers/CommandSerializersTest.java | 61 + .../serializers/TopologySerializersTest.java | 40 + .../service/accord/store/StoredMapTest.java | 203 ++ .../service/accord/store/StoredSetTest.java | 202 ++ .../service/accord/store/StoredValueTest.java | 85 + .../accord/txn/AbstractKeySortedTest.java | 158 ++ .../service/accord/txn/TxnUpdateTest.java | 50 + .../cassandra/transport/CBUtilTest.java | 4 +- .../cassandra/utils/AssertionUtils.java | 46 +- .../cassandra/utils/ConfigGenBuilder.java | 6 +- .../cassandra/utils/ConfigGenBuilderTest.java | 4 +- .../apache/cassandra/utils/Generators.java | 8 +- .../cassandra/utils/SerializerTestUtils.java | 74 + .../io/sstable/StressCQLSSTableWriter.java | 1 - 365 files changed, 29678 insertions(+), 1042 deletions(-) create mode 100644 .build/build-accord.xml create mode 100755 .build/git/git-hooks/post-checkout/100-update-submodules.sh create mode 120000 .build/git/git-hooks/post-switch create mode 100755 .build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh create mode 100755 .build/git/git-hooks/pre-push/100-push-submodules.sh create mode 100755 .build/git/install-git-defaults.sh create mode 100755 .build/sh/bump-accord.sh create mode 100755 .build/sh/change-submodule-accord.sh create mode 100755 .build/sh/change-submodule.sh create mode 100755 .build/sh/development-switch.sh create mode 100644 .gitmodules create mode 100644 accord_demo.txt create mode 160000 modules/accord create mode 100755 simulator.sh create mode 100644 src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java create mode 100644 src/java/org/apache/cassandra/cql3/transactions/ConditionStatement.java create mode 100644 src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java create mode 100644 src/java/org/apache/cassandra/cql3/transactions/ReferenceValue.java create mode 100644 src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java create mode 100644 src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java create mode 100644 src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java create mode 100644 src/java/org/apache/cassandra/dht/AccordBytesSplitter.java create mode 100644 src/java/org/apache/cassandra/dht/AccordSplitter.java create mode 100644 src/java/org/apache/cassandra/io/LocalVersionedSerializer.java create mode 100644 src/java/org/apache/cassandra/io/MessageVersionProvider.java create mode 100644 src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCallback.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommand.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandStore.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandStores.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordKeyspace.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordMessageSink.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSerializerVersion.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordService.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordState.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordStateCache.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java create mode 100644 src/java/org/apache/cassandra/service/accord/EndpointMapping.java create mode 100644 src/java/org/apache/cassandra/service/accord/IAccordService.java create mode 100644 src/java/org/apache/cassandra/service/accord/ListenerProxy.java create mode 100644 src/java/org/apache/cassandra/service/accord/ReadFuture.java create mode 100644 src/java/org/apache/cassandra/service/accord/TokenRange.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordAgent.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/PartitionKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncContext.java create mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java create mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java create mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredLong.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredSet.java create mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredValue.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnData.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnRead.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnReference.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java create mode 100644 src/java/org/apache/cassandra/utils/ArraySerializers.java rename src/java/org/apache/cassandra/utils/{CollectionSerializer.java => CollectionSerializers.java} (51%) create mode 100644 src/java/org/apache/cassandra/utils/logging/ClassNameFilter.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordTopologyTest.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryValidator.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/LinearizabilityValidator.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/LoggingHistoryValidator.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java create mode 100644 test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java create mode 100644 test/simulator/test/org/apache/cassandra/simulator/test/ShortAccordSimulationTest.java rename test/unit/accord/{utils => utilsfork}/DefaultRandom.java (98%) rename test/unit/accord/{utils => utilsfork}/Gen.java (83%) rename test/unit/accord/{utils => utilsfork}/Gens.java (97%) rename test/unit/accord/{utils => utilsfork}/Invariants.java (99%) rename test/unit/accord/{utils => utilsfork}/Property.java (89%) rename test/unit/accord/{utils => utilsfork}/RandomSource.java (98%) rename test/unit/accord/{utils => utilsfork}/SeedProvider.java (98%) rename test/unit/accord/{utils => utilsfork}/WrappedRandomSource.java (95%) rename test/unit/accord/{utils => utilsfork}/async/TimeoutUtils.java (98%) create mode 100644 test/unit/org/apache/cassandra/auth/TxnAuthTest.java create mode 100644 test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java create mode 100644 test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/TopologySerializersTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java create mode 100644 test/unit/org/apache/cassandra/utils/SerializerTestUtils.java diff --git a/.build/build-accord.xml b/.build/build-accord.xml new file mode 100644 index 000000000000..eeadf4dd1883 --- /dev/null +++ b/.build/build-accord.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/.build/build-checkstyle.xml b/.build/build-checkstyle.xml index af5867e4aa9a..0484e4098c66 100644 --- a/.build/build-checkstyle.xml +++ b/.build/build-checkstyle.xml @@ -19,7 +19,7 @@ - + @@ -45,7 +45,7 @@ - + diff --git a/.build/build-rat.xml b/.build/build-rat.xml index 2f6f5c715666..9333d2b8da92 100644 --- a/.build/build-rat.xml +++ b/.build/build-rat.xml @@ -78,6 +78,7 @@ + diff --git a/.build/build-resolver.xml b/.build/build-resolver.xml index 5a1a65308604..29031b33a115 100644 --- a/.build/build-resolver.xml +++ b/.build/build-resolver.xml @@ -178,7 +178,7 @@ - + @@ -206,7 +206,7 @@ - + diff --git a/.build/cassandra-build-deps-template.xml b/.build/cassandra-build-deps-template.xml index 4ec59cdf2d4b..c6b56955e013 100644 --- a/.build/cassandra-build-deps-template.xml +++ b/.build/cassandra-build-deps-template.xml @@ -155,5 +155,10 @@ org.bouncycastle bcutil-jdk18on + + org.apache.cassandra + cassandra-accord + tests + diff --git a/.build/cassandra-deps-template.xml b/.build/cassandra-deps-template.xml index 20223c3169b4..be58faa2f478 100644 --- a/.build/cassandra-deps-template.xml +++ b/.build/cassandra-deps-template.xml @@ -116,6 +116,10 @@ org.mindrot jbcrypt + + org.apache.cassandra + cassandra-accord + io.airlift airline diff --git a/.build/git/git-hooks/post-checkout/100-update-submodules.sh b/.build/git/git-hooks/post-checkout/100-update-submodules.sh new file mode 100755 index 000000000000..b495ed086054 --- /dev/null +++ b/.build/git/git-hooks/post-checkout/100-update-submodules.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Redirect output to stderr. +exec 1>&2 + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" + +_main() { + # In case the usage happens at a different layer, make sure to cd to the toplevel + local root_dir + root_dir="$(git rev-parse --show-toplevel)" + cd "$root_dir" + + if [[ ! -e .gitmodules ]]; then + # nothing to see here, look away! + return 0 + fi + git submodule update --init --recursive +} + +_main "$@" diff --git a/.build/git/git-hooks/post-switch b/.build/git/git-hooks/post-switch new file mode 120000 index 000000000000..5513d1deed30 --- /dev/null +++ b/.build/git/git-hooks/post-switch @@ -0,0 +1 @@ +post-checkout \ No newline at end of file diff --git a/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh new file mode 100755 index 000000000000..c54099ac0f9a --- /dev/null +++ b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## +## When working with submodules the top level project (Apache Cassandra) needs to commit all submodule +## changes so the top level knows what SHA to use. When working in a development environment it is +## common that multiple commits will exist in both projects, if the submodule has its history +## rewritten, then historic top level commits are no longer valid unless the SHAs are pushed to a +## remote repo; this is what the script attempts to do, make sure all SHAs added to the +## Apache Cassandra are backed up to a remote repo to make the Cassandra SHA buildable. +## + +# Redirect output to stderr. +exec 1>&2 + + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" + +_log() { + echo -e "[pre-commit]\t$*" +} + +error() { + _log "$@" 1>&2 + exit 1 +} + +# Status Table +# A Added +# C Copied +# D Deleted +# M Modified +# R Renamed +# T Type Changed (i.e. regular file, symlink, submodule, …<200b>) +# U Unmerged +# X Unknown +# B Broken +_main() { + # In case the usage happens at a different layer, make sure to cd to the toplevel + local root_dir + root_dir="$(git rev-parse --show-toplevel)" + cd "$root_dir" + + [[ ! -e .gitmodules ]] && return 0 + local enabled=$(git config --bool cassandra.pre-commit.verify-submodules.enabled || echo true) + [ "$enabled" == "false" ] && return 0 + local submodules=( $(git config --file .gitmodules --get-regexp path | awk '{ print $2 }') ) + + local is_submodule=false + local git_sub_dir + local git_sha + while read status file; do + is_submodule=false + for to_check in "${submodules[*]}"; do + if [[ "$to_check" == "$file" ]]; then + is_submodule=true + break + fi + done + if $is_submodule; then + local enabled=$(git config --bool cassandra.pre-commit.verify-submodule-${file}.enabled || echo true) + [ "$enabled" == "false" ] && continue + _log "Submodule detected: ${file} with status ${status}; attempting a push" + _log "\tTo disable pushes, run" + _log "\t\tgit config --local cassandra.pre-commit.verify-submodules.enabled false" + _log "\tOr" + _log "\t\tgit config --local cassandra.pre-commit.verify-submodule-${file}.enabled false" + set -x + git_sub_dir="${file}/.git" + branch="$(git config -f .gitmodules "submodule.${file}.branch")" + [[ -z "${branch:-}" ]] && error "Submodule ${file} does not define a branch" + git_sha="$(git --git-dir "${git_sub_dir}" rev-parse HEAD)" + git --git-dir "${git_sub_dir}" fetch origin + git --git-dir "${git_sub_dir}" branch "origin/${branch}" --contains "${git_sha}" || error "Git commit ${git_sha} not found in $(git remote get-url origin) on branch ${branch}" + fi + done < <(git diff --cached --name-status) +} + +_main "$@" diff --git a/.build/git/git-hooks/pre-push/100-push-submodules.sh b/.build/git/git-hooks/pre-push/100-push-submodules.sh new file mode 100755 index 000000000000..c3daa9559748 --- /dev/null +++ b/.build/git/git-hooks/pre-push/100-push-submodules.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Redirect output to stderr. +exec 1>&2 + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" + +_main() { + # In case the usage happens at a different layer, make sure to cd to the toplevel + local root_dir + root_dir="$(git rev-parse --show-toplevel)" + cd "$root_dir" + + if [[ ! -e .gitmodules ]]; then + # nothing to see here, look away! + return 0 + fi + + local -r cmd=' +branch="$(git rev-parse --abbrev-ref HEAD)" +[[ "$branch" == "HEAD" ]] && exit 0 + +default_remote="$(git config --local --get branch."${branch}".remote || true)" +remote="${default_remote:-origin}" + +git push --atomic "$remote" "$branch" +' + git submodule foreach --recursive "$cmd" +} + +_main "$@" diff --git a/.build/git/install-git-defaults.sh b/.build/git/install-git-defaults.sh new file mode 100755 index 000000000000..00f1dc435dbe --- /dev/null +++ b/.build/git/install-git-defaults.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" + +install_template_script() { + local -r name="$1" + local -r d_dir="$2" + + cat < "$name" +#!/usr/bin/env bash + +# This script is autogenerated by the Apache Cassandra build; DO NOT CHANGE! +# When this script is not found it will be installed automatically by the build +# If an existing script is found, that script will be reloated under ${d_dir} as 000-original.sh + +# Redirect output to stderr. +exec 1>&2 + +# Find all scripts to run +for path in \$(find "$d_dir" -name '*.sh' | perl -e "print sort{(split '/', \\\$a)[-1] <=> (split '/', \\\$b)[-1]}<>"); do + "\$path" "\$@" +done +EOF + chmod a+x "$name" +} + +install_hook() { + local -r git_dir="$1" + local -r hooks_dir="${git_dir}/hooks" + local -r name="$2" + local -r d_dir="${hooks_dir}/${name}.d" + local -r trigger_on_install=$3 + + mkdir "${d_dir}" &> /dev/null || true + local -r script_name="${hooks_dir}/${name}" + local installed=true + if [[ -e "$script_name" ]]; then + # was the script already installed? + if ! grep "This script is autogenerated by the Apache Cassandra build" "$script_name" &> /dev/null ; then + echo "$script_name found, but was not generated by the Apache Cassandra build; please remove or move to ${d_dir}/000-original.sh; creating and moving to ${d_dir} will cause it to run as expected, but won't conflict with hooks this build adds" 1>&2 + exit 1 + else + installed=false + fi + fi + # install all hooks + cp "$bin"/git-hooks/"${name}"/* "$d_dir"/ + + # install coordinator hook + install_template_script "$script_name" "$d_dir" + if $installed && $trigger_on_install ; then + echo "Running script $script_name" + "$script_name" + fi +} + +_install_hooks() { + local git_dir + # make sure to use --git-common-dir and not --git-dir to support worktrees + git_dir="$(git rev-parse --git-common-dir 2> /dev/null || true)" + if [[ -z "${git_dir:-}" ]]; then + # not in a git repo, noop + return 0 + fi + + # make sure hooks directory exists; does not exist by default for worktrees + mkdir -p "${git_dir}/hooks" &> /dev/null || true + + install_hook "$git_dir" "post-checkout" true + install_hook "$git_dir" "post-switch" false + install_hook "$git_dir" "pre-commit" false + install_hook "$git_dir" "pre-push" false +} + +_git_config_set() { + local -r name="$1" + # only care about rc + git config --local --get "$name" &> /dev/null +} + +_install_configs() { + # when doing pull, this makes sure submodules are updated + _git_config_set submodule.recurse || git config --local submodule.recurse true +} + +_main() { + local git_dir + # make sure to use --git-common-dir and not --git-dir to support worktrees + git_dir="$(git rev-parse --git-common-dir 2> /dev/null || true)" + # not in a git repo, noop + [[ -z "${git_dir:-}" ]] && return 0 + + _install_configs + _install_hooks +} + +_main "$@" diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml index aafd527bade7..2bb3c2692723 100644 --- a/.build/parent-pom-template.xml +++ b/.build/parent-pom-template.xml @@ -715,6 +715,42 @@ jbcrypt 0.4 + + org.apache.cassandra + cassandra-accord + @version@ + + + org.apache.cassandra + cassandra-all + + + + + org.apache.cassandra + cassandra-accord + @version@ + tests + test + + + org.junit.jupiter + junit-jupiter-api + + + org.junit.jupiter + junit-jupiter-engine + + + ch.qos.logback + logback-classic + + + org.apache.cassandra + cassandra-all + + + io.airlift airline diff --git a/.build/sh/bump-accord.sh b/.build/sh/bump-accord.sh new file mode 100755 index 000000000000..43a476f3edfb --- /dev/null +++ b/.build/sh/bump-accord.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +_main() { + local home + home="$(git rev-parse --show-toplevel)" + cd "$home" + + git submodule status modules/accord + echo "Is this the correct SHA? [y/n; default=y]" + read correct + if [[ "${correct:-y}" != "y" ]]; then + echo "Please update Accord's SHA and try again" + exit 1 + fi + git commit -m "Change Accord to $(cd modules/accord; git log -1 --format='%h: %B')" modules/accord +} + +_main "$@" diff --git a/.build/sh/change-submodule-accord.sh b/.build/sh/change-submodule-accord.sh new file mode 100755 index 000000000000..997db3dc2c29 --- /dev/null +++ b/.build/sh/change-submodule-accord.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" + +"$bin"/change-submodule.sh modules/accord 'https://github.com/apache/cassandra-accord.git' trunk diff --git a/.build/sh/change-submodule.sh b/.build/sh/change-submodule.sh new file mode 100755 index 000000000000..6ab2d3795afd --- /dev/null +++ b/.build/sh/change-submodule.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +_usage() { + cat <&2 + exit 1 +} + +_usage() { + cat < +``` + +When changes are made to a submodule (such as to accord), you need to commit and update the reference in Apache Cassandra + +``` +$ (cd modules/accord ; git commit -am 'Saving progress') +$ .build/sh/bump-accord.sh +``` + +## Commit and Merge Process + +Due to the nature of submodules, the changes to the submodules must be committed and pushed before the changes to Apache Cassandra; these are different repositories so git's `--atomic` does not prevent conflicts from concurrent merges; the basic process is as follows: + +* Follow the normal merge process for the submodule +* Update Apache Cassandra's submodule entry to point to the newly committed change; follow the Accord example below for an example + +``` +$ .build/sh/change-submodule-accord.sh +$ .build/sh/bump-accord.sh +``` + # Useful Links - How you can contribute to Apache Cassandra [presentation](http://www.slideshare.net/yukim/cassandrasummit2013) by Yuki Morishita diff --git a/accord_demo.txt b/accord_demo.txt new file mode 100644 index 000000000000..b8834515221c --- /dev/null +++ b/accord_demo.txt @@ -0,0 +1,19 @@ + +ccm create accord-cql-poc -n 3 +ccm start + +bin/cqlsh -e "create keyspace ks with replication={'class':'SimpleStrategy', 'replication_factor':3};" +bin/cqlsh -e "create table ks.tbl1 (k int primary key, v int);" +bin/cqlsh -e "create table ks.tbl2 (k int primary key, v int);" + +bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7100 createepochunsafe +bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7200 createepochunsafe +bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7300 createepochunsafe + +BEGIN TRANSACTION + LET row1 = (SELECT * FROM ks.tbl1 WHERE k = 1); + SELECT row1.v; + IF row1 IS NULL THEN + INSERT INTO ks.tbl1 (k, v) VALUES (1, 2); + END IF +COMMIT TRANSACTION; \ No newline at end of file diff --git a/build.xml b/build.xml index c1b9d74b964d..563ac70ea238 100644 --- a/build.xml +++ b/build.xml @@ -100,6 +100,8 @@ the user specifies the tmp.dir property --> + + @@ -109,8 +111,12 @@ + + + + @@ -396,6 +402,7 @@ + @@ -527,7 +534,8 @@ - + + @@ -973,6 +981,9 @@ + + + @@ -992,6 +1003,7 @@ + @@ -2061,6 +2073,7 @@ + + + + + + + diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index c813bf530ad2..89582a23de1a 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2199,6 +2199,9 @@ drop_compact_storage_enabled: false # Whether or not USE is allowed. This is enabled by default to avoid failure on upgrade. #use_statements_enabled: true +# Enables the execution of Accord (multi-key) transactions on this node. +accord_transactions_enabled: false + # When the client triggers a protocol exception or unknown issue (Cassandra bug) we increment # a client metric showing this; this logic will exclude specific subnets from updating these # metrics diff --git a/ide/idea-iml-file.xml b/ide/idea-iml-file.xml index 13e66fa61308..1d189db8d6bc 100644 --- a/ide/idea-iml-file.xml +++ b/ide/idea-iml-file.xml @@ -49,6 +49,16 @@ + + + + + + + + + + @@ -56,6 +66,8 @@ + + @@ -63,12 +75,17 @@ + + + + + @@ -76,6 +93,9 @@ + + + diff --git a/ide/idea/vcs.xml b/ide/idea/vcs.xml index 81872fd3f150..a5367a526e4d 100644 --- a/ide/idea/vcs.xml +++ b/ide/idea/vcs.xml @@ -2,6 +2,7 @@ + - \ No newline at end of file + diff --git a/ide/idea/workspace.xml b/ide/idea/workspace.xml index c5c0e28b963b..7f688b3d9626 100644 --- a/ide/idea/workspace.xml +++ b/ide/idea/workspace.xml @@ -183,24 +183,38 @@
WHERE k=1 AND c=2) + * ex. LET y = (SELECT * FROM
WHERE k=1 LIMIT 1) + */ +letStatement returns [SelectStatement.RawStatement expr] + @init { + Term.Raw limit = null; + } + : K_LET txnVar=IDENT '=' + '(' K_SELECT assignments=letSelectors K_FROM cf=columnFamilyName K_WHERE wclause=whereClause ( K_LIMIT rows=intValue { limit = rows; } )? ')' + { + SelectStatement.Parameters params = new SelectStatement.Parameters(Collections.emptyList(), Collections.emptyList(), false, false, false, $txnVar.text); + WhereClause where = wclause == null ? WhereClause.empty() : wclause.build(); + $expr = new SelectStatement.RawStatement(cf, params, assignments, where, limit, null); + } + ; + +letSelectors returns [List expr] + : t1=letSelector { $expr = new ArrayList(); $expr.add(t1); } (',' tN=letSelector { $expr.add(tN); })* + | '\*' { $expr = Collections.emptyList();} + ; + +letSelector returns [RawSelector s] + @init{ ColumnIdentifier alias = null; } + : us=unaliasedSelector { $s = new RawSelector(us, alias); } + ; selectClause returns [boolean isDistinct, List selectors] @init{ $isDistinct = false; } @@ -489,7 +549,7 @@ normalInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsert e } : '(' c1=cident { columnNames.add(c1); } ( ',' cn=cident { columnNames.add(cn); } )* ')' K_VALUES - '(' v1=term { values.add(v1); } ( ',' vn=term { values.add(vn); } )* ')' + '(' insertValue[values] ( ',' insertValue[values] )* ')' ( K_IF K_NOT K_EXISTS { ifNotExists = true; } )? ( usingClause[attrs] )? { @@ -497,6 +557,11 @@ normalInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsert e } ; +insertValue[List values] + : t=term { values.add(t); } + | {isParsingTxn}? dr=rowDataReference { values.add(new ReferenceValue.Substitution.Raw(dr)); } + ; + jsonInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsertJson expr] @init { Attributes.Raw attrs = new Attributes.Raw(); @@ -537,7 +602,7 @@ usingClauseObjective[Attributes.Raw attrs] updateStatement returns [UpdateStatement.ParsedUpdate expr] @init { Attributes.Raw attrs = new Attributes.Raw(); - List> operations = new ArrayList<>(); + UpdateStatement.OperationCollector operations = new UpdateStatement.OperationCollector(); boolean ifExists = false; } : K_UPDATE cf=columnFamilyName @@ -551,7 +616,8 @@ updateStatement returns [UpdateStatement.ParsedUpdate expr] operations, wclause.build(), conditions == null ? Collections.emptyList() : conditions, - ifExists); + ifExists, + isParsingTxn); } ; @@ -650,6 +716,102 @@ batchStatementObjective returns [ModificationStatement.Parsed statement] | d=deleteStatement { $statement = d; } ; +/** + * ex. conditional update returning pre-update values + * + * BEGIN TRANSACTION + * LET row1 = (SELECT * FROM
WHERE k=1 AND c=2); + * LET row2 = (SELECT * FROM
WHERE k=2 AND c=2); + * SELECT row1.v, row2.v; + * IF row1.v = 3 AND row2.v = 4 THEN + * UPDATE
SET v = row1.v + 1 WHERE k = 1 AND c = 2; + * END IF + * COMMIT TRANSACTION + * + * ex. read-only transaction + * + * BEGIN TRANSACTION + * SELECT * FROM
WHERE k=1 AND c=2; + * COMMIT TRANSACTION + * + * ex. write-only transaction + * + * BEGIN TRANSACTION + * INSERT INTO
(k, c, v) VALUES (0, 0, 1); + * COMMIT TRANSACTION + */ +batchTxnStatement returns [TransactionStatement.Parsed expr] + @init { + isParsingTxn = true; + List assignments = new ArrayList<>(); + SelectStatement.RawStatement select = null; + List returning = null; + List updates = new ArrayList<>(); + } + : K_BEGIN K_TRANSACTION + ( let=letStatement ';' { assignments.add(let); })* + ( ( (selectStatement) => s=selectStatement ';' { select = s; }) | ( K_SELECT drs=rowDataReferences ';' { returning = drs; }) )? + ( K_IF conditions=txnConditions K_THEN { isTxnConditional = true; } )? + ( upd=batchStatementObjective ';' { updates.add(upd); } )* + ( {!isTxnConditional}? (K_COMMIT K_TRANSACTION) | {isTxnConditional}? (K_END K_IF K_COMMIT K_TRANSACTION)) + { + $expr = new TransactionStatement.Parsed(assignments, select, returning, updates, conditions, references); + } + ; + finally { isParsingTxn = false; } + +rowDataReferences returns [List refs] + : r1=rowDataReference { refs = new ArrayList(); refs.add(r1); } (',' rN=rowDataReference { refs.add(rN); })* + ; + +rowDataReference returns [RowDataReference.Raw rawRef] + @init { Selectable.RawIdentifier tuple = null; Selectable.Raw selectable = null; } + @after { $rawRef = newRowDataReference(tuple, selectable); } + : t=sident ('.' s=referenceSelection)? { tuple = t; selectable = s; } + ; + +referenceSelection returns [Selectable.Raw s] + : g=referenceSelectionWithoutField m=selectorModifier[g] {$s = m;} + ; + +referenceSelectionWithoutField returns [Selectable.Raw s] + @init { Selectable.Raw tmp = null; } + @after { $s = tmp; } + : sn=sident { tmp=sn; } + | (selectionTypeHint)=> h=selectionTypeHint { tmp=h; } + | t=selectionTupleOrNestedSelector { tmp=t; } + | l=selectionList { tmp=l; } + | m=selectionMapOrSet { tmp=m; } + // UDTs are equivalent to maps from the syntax point of view, so the final decision will be done in Selectable.WithMapOrUdt + ; + +txnConditions returns [List conditions] + @init { conditions = new ArrayList(); } + : txnColumnCondition[conditions] ( K_AND txnColumnCondition[conditions] )* + ; + +txnConditionKind returns [ConditionStatement.Kind op] + : '=' { $op = ConditionStatement.Kind.EQ; } + | '<' { $op = ConditionStatement.Kind.LT; } + | '<=' { $op = ConditionStatement.Kind.LTE; } + | '>' { $op = ConditionStatement.Kind.GT; } + | '>=' { $op = ConditionStatement.Kind.GTE; } + | '!=' { $op = ConditionStatement.Kind.NEQ; } + ; + +txnColumnCondition[List conditions] + : lhs=rowDataReference + ( + K_IS + ( + K_NOT K_NULL { conditions.add(new ConditionStatement.Raw(lhs, ConditionStatement.Kind.IS_NOT_NULL, null)); } + | K_NULL { conditions.add(new ConditionStatement.Raw(lhs, ConditionStatement.Kind.IS_NULL, null)); } + ) + | (txnConditionKind term)=> op=txnConditionKind t=term { conditions.add(new ConditionStatement.Raw(lhs, op, t)); } + ) + | lhs=term op=txnConditionKind rhs=rowDataReference { conditions.add(new ConditionStatement.Raw(lhs, op, rhs)); } + ; + createAggregateStatement returns [CreateAggregateStatement.Raw stmt] @init { boolean orReplace = false; @@ -1727,18 +1889,18 @@ simpleTerm returns [Term.Raw term] | K_CAST '(' t=simpleTerm K_AS n=native_type ')' { $term = FunctionCall.Raw.newCast(t, n); } ; -columnOperation[List> operations] +columnOperation[UpdateStatement.OperationCollector operations] : key=cident columnOperationDifferentiator[operations, key] ; -columnOperationDifferentiator[List> operations, ColumnIdentifier key] +columnOperationDifferentiator[UpdateStatement.OperationCollector operations, ColumnIdentifier key] : '=' normalColumnOperation[operations, key] | shorthandColumnOperation[operations, key] | '[' k=term ']' collectionColumnOperation[operations, key, k] | '.' field=fident udtColumnOperation[operations, key, field] ; -normalColumnOperation[List> operations, ColumnIdentifier key] +normalColumnOperation[UpdateStatement.OperationCollector operations, ColumnIdentifier key] : t=term ('+' c=cident )? { if (c == null) @@ -1766,27 +1928,56 @@ normalColumnOperation[List> operatio addRecognitionError("Only expressions of the form X = X " + ($i.text.charAt(0) == '-' ? '-' : '+') + " are supported."); addRawUpdate(operations, key, new Operation.Addition(Constants.Literal.integer($i.text))); } + | {isParsingTxn}? r=rowDataReference + { + addRawReferenceOperation(operations, key, new ReferenceOperation.Raw(new Operation.SetValue(r), key, new ReferenceValue.Substitution.Raw(r))); + } ; -shorthandColumnOperation[List> operations, ColumnIdentifier key] - : sig=('+=' | '-=') t=term - { - addRawUpdate(operations, key, $sig.text.equals("+=") ? new Operation.Addition(t) : new Operation.Substraction(t)); - } +shorthandColumnOperation[UpdateStatement.OperationCollector operations, ColumnIdentifier key] + : sig=('+=' | '-=') + ( + t=term + { + addRawUpdate(operations, key, $sig.text.equals("+=") ? new Operation.Addition(t) : new Operation.Substraction(t)); + } + | {isParsingTxn}? dr=rowDataReference + { + ReferenceValue.Raw right = new ReferenceValue.Substitution.Raw(dr); + Operation.RawUpdate operation = $sig.text.equals("+=") ? new Operation.Addition(dr) : new Operation.Substraction(dr); + addRawReferenceOperation(operations, key, new ReferenceOperation.Raw(operation, key, right)); + } + ) ; -collectionColumnOperation[List> operations, ColumnIdentifier key, Term.Raw k] - : '=' t=term - { - addRawUpdate(operations, key, new Operation.SetElement(k, t)); - } +collectionColumnOperation[UpdateStatement.OperationCollector operations, ColumnIdentifier key, Term.Raw k] + : '=' + ( + t=term + { + addRawUpdate(operations, key, new Operation.SetElement(k, t)); + } + | {isParsingTxn}? dr=rowDataReference + { + ReferenceValue.Raw right = new ReferenceValue.Substitution.Raw(dr); + addRawReferenceOperation(operations, key, new ReferenceOperation.Raw(new Operation.SetElement(k, dr), key, right)); + } + ) ; -udtColumnOperation[List> operations, ColumnIdentifier key, FieldIdentifier field] - : '=' t=term - { - addRawUpdate(operations, key, new Operation.SetField(field, t)); - } +udtColumnOperation[UpdateStatement.OperationCollector operations, ColumnIdentifier key, FieldIdentifier field] + : '=' + ( + t=term + { + addRawUpdate(operations, key, new Operation.SetField(field, t)); + } + | {isParsingTxn}? dr=rowDataReference + { + ReferenceValue.Raw right = new ReferenceValue.Substitution.Raw(dr); + addRawReferenceOperation(operations, key, new ReferenceOperation.Raw(new Operation.SetField(field, dr), key, right)); + } + ) ; columnCondition returns [ColumnCondition.Raw condition] diff --git a/src/java/org/apache/cassandra/audit/AuditLogEntryCategory.java b/src/java/org/apache/cassandra/audit/AuditLogEntryCategory.java index b848440607a5..9f9af16e8abb 100644 --- a/src/java/org/apache/cassandra/audit/AuditLogEntryCategory.java +++ b/src/java/org/apache/cassandra/audit/AuditLogEntryCategory.java @@ -23,5 +23,5 @@ */ public enum AuditLogEntryCategory { - QUERY, DML, DDL, DCL, OTHER, AUTH, ERROR, PREPARE, JMX + QUERY, DML, DDL, DCL, OTHER, AUTH, ERROR, PREPARE, JMX, TRANSACTION } diff --git a/src/java/org/apache/cassandra/audit/AuditLogEntryType.java b/src/java/org/apache/cassandra/audit/AuditLogEntryType.java index 2bbff08429e1..4ee3348055fb 100644 --- a/src/java/org/apache/cassandra/audit/AuditLogEntryType.java +++ b/src/java/org/apache/cassandra/audit/AuditLogEntryType.java @@ -63,6 +63,7 @@ public enum AuditLogEntryType DROP_IDENTITY(AuditLogEntryCategory.DCL), USE_KEYSPACE(AuditLogEntryCategory.OTHER), DESCRIBE(AuditLogEntryCategory.OTHER), + TRANSACTION(AuditLogEntryCategory.TRANSACTION), /* * Common Audit Log Entry Types diff --git a/src/java/org/apache/cassandra/audit/AuditLogFilter.java b/src/java/org/apache/cassandra/audit/AuditLogFilter.java index b775ac7785cf..ec53212bce97 100644 --- a/src/java/org/apache/cassandra/audit/AuditLogFilter.java +++ b/src/java/org/apache/cassandra/audit/AuditLogFilter.java @@ -28,7 +28,7 @@ final class AuditLogFilter { private static final Logger logger = LoggerFactory.getLogger(AuditLogFilter.class); - private static ImmutableSet EMPTY_FILTERS = ImmutableSet.of(); + private static final ImmutableSet EMPTY_FILTERS = ImmutableSet.of(); final ImmutableSet excludedKeyspaces; final ImmutableSet includedKeyspaces; diff --git a/src/java/org/apache/cassandra/concurrent/SingleThreadExecutorPlus.java b/src/java/org/apache/cassandra/concurrent/SingleThreadExecutorPlus.java index eb2827774a59..553855ad7bc5 100644 --- a/src/java/org/apache/cassandra/concurrent/SingleThreadExecutorPlus.java +++ b/src/java/org/apache/cassandra/concurrent/SingleThreadExecutorPlus.java @@ -25,6 +25,7 @@ public class SingleThreadExecutorPlus extends ThreadPoolExecutorPlus implements { public static class AtLeastOnce extends AtomicBoolean implements AtLeastOnceTrigger, Runnable { + private static final long serialVersionUID = 0; // for simulator support protected final SequentialExecutorPlus executor; protected final Runnable run; diff --git a/src/java/org/apache/cassandra/concurrent/Stage.java b/src/java/org/apache/cassandra/concurrent/Stage.java index 23f80b5a575f..d0ea2c10f597 100644 --- a/src/java/org/apache/cassandra/concurrent/Stage.java +++ b/src/java/org/apache/cassandra/concurrent/Stage.java @@ -47,6 +47,7 @@ public enum Stage MUTATION (true, "MutationStage", "request", DatabaseDescriptor::getConcurrentWriters, DatabaseDescriptor::setConcurrentWriters, Stage::multiThreadedLowSignalStage), COUNTER_MUTATION (true, "CounterMutationStage", "request", DatabaseDescriptor::getConcurrentCounterWriters, DatabaseDescriptor::setConcurrentCounterWriters, Stage::multiThreadedLowSignalStage), VIEW_MUTATION (true, "ViewMutationStage", "request", DatabaseDescriptor::getConcurrentViewWriters, DatabaseDescriptor::setConcurrentViewWriters, Stage::multiThreadedLowSignalStage), + ACCORD (true, "AccordStage", "request", DatabaseDescriptor::getConcurrentAccordOps, DatabaseDescriptor::setConcurrentAccordOps, Stage::multiThreadedLowSignalStage), GOSSIP (true, "GossipStage", "internal", () -> 1, null, Stage::singleThreadedStage), REQUEST_RESPONSE (false, "RequestResponseStage", "request", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedLowSignalStage), ANTI_ENTROPY (false, "AntiEntropyStage", "internal", () -> 1, null, Stage::singleThreadedStage), @@ -59,7 +60,6 @@ public enum Stage INTERNAL_METADATA (false, "InternalMetadataStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), FETCH_LOG (false, "MetadataFetchLogStage", "internal", () -> 1, null, Stage::singleThreadedStage) ; - public final String jmxName; private final Supplier executorSupplier; private volatile ExecutorPlus executor; diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index e14afdcdcac3..c8f57f4e4c70 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -491,6 +491,8 @@ public enum CassandraRelevantProperties SERIALIZATION_EMPTY_TYPE_NONEMPTY_BEHAVIOR("cassandra.serialization.emptytype.nonempty_behavior"), SET_SEP_THREAD_NAME("cassandra.set_sep_thread_name", "true"), SHUTDOWN_ANNOUNCE_DELAY_IN_MS("cassandra.shutdown_announce_in_ms", "2000"), + SIMULATOR_SEED("cassandra.simulator.seed"), + SIMULATOR_STARTED("cassandra.simulator.started"), SIZE_RECORDER_INTERVAL("cassandra.size_recorder_interval", "300"), SKIP_AUTH_SETUP("cassandra.skip_auth_setup", "false"), SKIP_GC_INSPECTOR("cassandra.skip_gc_inspector", "false"), @@ -595,6 +597,7 @@ public enum CassandraRelevantProperties TEST_READ_ITERATION_DELAY_MS("cassandra.test.read_iteration_delay_ms", "0"), TEST_REUSE_PREPARED("cassandra.test.reuse_prepared", "true"), TEST_ROW_CACHE_SIZE("cassandra.test.row_cache_size"), + TEST_SEED("cassandra.test.seed"), TEST_SERIALIZATION_WRITES("cassandra.test-serialization-writes"), TEST_SIMULATOR_DEBUG("cassandra.test.simulator.debug"), TEST_SIMULATOR_DETERMINISM_CHECK("cassandra.test.simulator.determinismcheck", "none"), diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 2943c973ed7e..b8cae7b18263 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -174,6 +174,8 @@ public static Set splitCommaDelimited(String src) public volatile DurationSpec.LongMillisecondsBound stream_transfer_task_timeout = new DurationSpec.LongMillisecondsBound("12h"); + public volatile DurationSpec.LongMillisecondsBound transaction_timeout = new DurationSpec.LongMillisecondsBound("30s"); + public volatile DurationSpec.LongMillisecondsBound cms_await_timeout = new DurationSpec.LongMillisecondsBound("120000ms"); public volatile int cms_default_max_retries = 10; public volatile DurationSpec.IntMillisecondsBound cms_default_retry_backoff = new DurationSpec.IntMillisecondsBound("50ms"); @@ -188,6 +190,7 @@ public static Set splitCommaDelimited(String src) public int concurrent_reads = 32; public int concurrent_writes = 32; + public int concurrent_accord_operations = 32; public int concurrent_counter_writes = 32; public int concurrent_materialized_view_writes = 32; public int available_processors = -1; @@ -620,6 +623,8 @@ public static class SSTableConfig public volatile boolean use_statements_enabled = true; + public boolean accord_transactions_enabled = false; + /** * Optionally disable asynchronous UDF execution. * Disabling asynchronous UDF execution also implicitly disables the security-manager! @@ -1159,6 +1164,8 @@ public enum PaxosOnLinearizabilityViolation public volatile boolean client_request_size_metrics_enabled = true; + public LegacyPaxosStrategy legacy_paxos_strategy = LegacyPaxosStrategy.migration; + public volatile int max_top_size_partition_count = 10; public volatile int max_top_tombstone_partition_count = 10; public volatile DataStorageSpec.LongBytesBound min_tracked_partition_size = new DataStorageSpec.LongBytesBound("1MiB"); @@ -1374,6 +1381,29 @@ public enum TombstonesMetricGranularity cell } + /** + * How to pick a consensus protocol for CAS + * and serial read operations. Transaction statements + * will always run on Accord. Legacy in this context includes PaxosV2. + */ + public enum LegacyPaxosStrategy + { + /** + * Allow both Accord and PaxosV1/V2 to run on the same cluster + * Some keys and ranges might be running on Accord if they + * have been migrated and the rest will run on Paxos until + * they are migrated. + */ + migration, + + /** + * Everything will be run on Accord. Useful for new deployments + * that don't want to accidentally start using legacy Paxos + * requiring migration to Accord. + */ + accord + } + private static final Set SENSITIVE_KEYS = new HashSet() {{ add("client_encryption_options"); add("server_encryption_options"); @@ -1400,10 +1430,10 @@ public static void log(Config config) String value; try { - // Field.get() can throw NPE if the value of the field is null - value = field.get(config).toString(); + Object obj = field.get(config); + value = obj != null ? obj.toString() : "null"; } - catch (NullPointerException | IllegalAccessException npe) + catch (IllegalAccessException npe) { value = "null"; } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 6afcabf5fb7e..5e0b01426d62 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -166,6 +166,9 @@ public class DatabaseDescriptor { + public static final String NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE = + "Cannot use legacy_paxos_strategy \"accord\" while Accord transactions are disabled."; + static { CHRONICLE_ANALYTICS_DISABLE.setBoolean(true); @@ -640,6 +643,9 @@ else if (conf.disk_access_mode == DiskAccessMode.direct) if (conf.concurrent_counter_writes < 2) throw new ConfigurationException("concurrent_counter_writes must be at least 2, but was " + conf.concurrent_counter_writes, false); + if (conf.concurrent_accord_operations < 1) + throw new ConfigurationException("concurrent_accord_operations must be at least 1, but was " + conf.concurrent_accord_operations, false); + if (conf.networking_cache_size == null) conf.networking_cache_size = new DataStorageSpec.IntMebibytesBound(Math.min(128, (int) (Runtime.getRuntime().maxMemory() / (16 * 1048576)))); @@ -1116,6 +1122,9 @@ else if (conf.max_value_size.toMebibytes() >= 2048) // run audit logging options through sanitation and validation if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); + + if (conf.legacy_paxos_strategy == Config.LegacyPaxosStrategy.accord && !conf.accord_transactions_enabled) + throw new ConfigurationException(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); } @VisibleForTesting @@ -1473,6 +1482,12 @@ static void checkForLowestAcceptedTimeouts(Config conf) logInfo("truncate_request_timeout", conf.truncate_request_timeout, LOWEST_ACCEPTED_TIMEOUT); conf.truncate_request_timeout = LOWEST_ACCEPTED_TIMEOUT; } + + if (conf.transaction_timeout.toMilliseconds() < LOWEST_ACCEPTED_TIMEOUT.toMilliseconds()) + { + logInfo("transaction_timeout", conf.transaction_timeout, LOWEST_ACCEPTED_TIMEOUT); + conf.transaction_timeout = LOWEST_ACCEPTED_TIMEOUT; + } } private static void logInfo(String property, DurationSpec.LongMillisecondsBound actualValue, DurationSpec.LongMillisecondsBound lowestAcceptedValue) @@ -2427,6 +2442,16 @@ public static long getCasContentionTimeout(TimeUnit unit) return conf.cas_contention_timeout.to(unit); } + public static long getTransactionTimeout(TimeUnit unit) + { + return conf.transaction_timeout.to(unit); + } + + public static void setTransactionTimeout(long timeOutInMillis) + { + conf.transaction_timeout = new DurationSpec.LongMillisecondsBound(timeOutInMillis); + } + public static void setCasContentionTimeout(long timeOutInMillis) { conf.cas_contention_timeout = new DurationSpec.LongMillisecondsBound(timeOutInMillis); @@ -2645,6 +2670,20 @@ public static void setConcurrentViewWriters(int concurrent_materialized_view_wri conf.concurrent_materialized_view_writes = concurrent_materialized_view_writes; } + public static int getConcurrentAccordOps() + { + return conf.concurrent_accord_operations; + } + + public static void setConcurrentAccordOps(int concurrent_operations) + { + if (concurrent_operations < 0) + { + throw new IllegalArgumentException("Concurrent accord operations must be non-negative"); + } + conf.concurrent_accord_operations = concurrent_operations; + } + public static int getFlushWriters() { return conf.memtable_flush_writers; @@ -3545,6 +3584,11 @@ public static boolean paxoTopologyRepairStrictEachQuorum() return conf.paxos_topology_repair_strict_each_quorum; } + public static Config.LegacyPaxosStrategy getLegacyPaxosStrategy() + { + return conf.legacy_paxos_strategy; + } + public static void setNativeTransportMaxRequestDataInFlightPerIpInBytes(long maxRequestDataInFlightInBytes) { if (maxRequestDataInFlightInBytes == -1) @@ -5144,6 +5188,16 @@ public static void setUseStatementsEnabled(boolean enabled) } } + public static boolean getAccordTransactionsEnabled() + { + return conf.accord_transactions_enabled; + } + + public static void setAccordTransactionsEnabled(boolean b) + { + conf.accord_transactions_enabled = b; + } + public static boolean getForceNewPreparedStatementBehaviour() { return conf.force_new_prepared_statement_behaviour; diff --git a/src/java/org/apache/cassandra/cql3/CQLStatement.java b/src/java/org/apache/cassandra/cql3/CQLStatement.java index 349e79b30ff4..db9896361ecf 100644 --- a/src/java/org/apache/cassandra/cql3/CQLStatement.java +++ b/src/java/org/apache/cassandra/cql3/CQLStatement.java @@ -133,4 +133,14 @@ interface SingleKeyspaceCqlStatement extends CQLStatement { String keyspace(); } + + interface CompositeCQLStatement extends CQLStatement + { + Iterable getStatements(); + } + + interface ReturningCQLStatement extends CQLStatement + { + ResultSet.ResultMetadata getResultMetadata(); + } } diff --git a/src/java/org/apache/cassandra/cql3/Operation.java b/src/java/org/apache/cassandra/cql3/Operation.java index 7c5e02eb63e3..728e04b8a011 100644 --- a/src/java/org/apache/cassandra/cql3/Operation.java +++ b/src/java/org/apache/cassandra/cql3/Operation.java @@ -27,7 +27,15 @@ import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.cql3.terms.UserTypes; import org.apache.cassandra.db.DecoratedKey; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.CounterColumnType; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.NumberType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.StringType; +import org.apache.cassandra.db.marshal.TupleType; +import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; @@ -62,6 +70,11 @@ protected Operation(ColumnMetadata column, Term t) this.t = t; } + public Term term() + { + return t; + } + public void addFunctionsTo(List functions) { if (t != null) @@ -69,8 +82,7 @@ public void addFunctionsTo(List functions) } /** - * @return whether the operation requires a read of the previous value to be executed - * (only lists setterByIdx, discard and discardByIdx requires that). + * @return whether the operation requires a read of the existing value to be executed */ public boolean requiresRead() { @@ -178,7 +190,7 @@ public Operation prepare(TableMetadata metadata, ColumnMetadata receiver, boolea if (receiver.type.isCollection()) { - switch (((CollectionType) receiver.type).kind) + switch (((CollectionType) receiver.type).kind) { case LIST: return new Lists.Setter(receiver, v); @@ -228,7 +240,7 @@ public Operation prepare(TableMetadata metadata, ColumnMetadata receiver, boolea else if (!(receiver.type.isMultiCell())) throw new InvalidRequestException(String.format("Invalid operation (%s) for frozen collection column %s", toString(receiver), receiver.name)); - switch (((CollectionType)receiver.type).kind) + switch (((CollectionType)receiver.type).kind) { case LIST: Term idx = selector.prepare(metadata.keyspace, Lists.indexSpecOf(receiver)); @@ -328,7 +340,7 @@ public Operation prepare(TableMetadata metadata, ColumnMetadata receiver, boolea else if (!(receiver.type.isMultiCell())) throw new InvalidRequestException(String.format("Invalid operation (%s) for frozen collection column %s", toString(receiver), receiver.name)); - switch (((CollectionType)receiver.type).kind) + switch (((CollectionType)receiver.type).kind) { case LIST: return new Lists.Appender(receiver, value.prepare(metadata.keyspace, receiver)); @@ -371,7 +383,7 @@ public Substraction(Term.Raw value) } public Operation prepare(TableMetadata metadata, ColumnMetadata receiver, boolean canReadExistingState) throws InvalidRequestException - { + { if (!(receiver.type instanceof CollectionType)) { if (canReadExistingState) @@ -389,7 +401,7 @@ public Operation prepare(TableMetadata metadata, ColumnMetadata receiver, boolea else if (!(receiver.type.isMultiCell())) throw new InvalidRequestException(String.format("Invalid operation (%s) for frozen collection column %s", toString(receiver), receiver.name)); - switch (((CollectionType)receiver.type).kind) + switch (((CollectionType)receiver.type).kind) { case LIST: return new Lists.Discarder(receiver, value.prepare(metadata.keyspace, receiver)); @@ -400,7 +412,7 @@ else if (!(receiver.type.isMultiCell())) ColumnSpecification vr = new ColumnSpecification(receiver.ksName, receiver.cfName, receiver.name, - SetType.getInstance(((MapType)receiver.type).getKeysType(), false)); + SetType.getInstance(((MapType) receiver.type).getKeysType(), true)); Term term; try { @@ -502,7 +514,7 @@ public Operation prepare(String keyspace, ColumnMetadata receiver, TableMetadata else if (!(receiver.type.isMultiCell())) throw new InvalidRequestException(String.format("Invalid deletion operation for frozen collection column %s", receiver.name)); - switch (((CollectionType)receiver.type).kind) + switch (((CollectionType)receiver.type).kind) { case LIST: Term idx = element.prepare(keyspace, Lists.indexSpecOf(receiver)); diff --git a/src/java/org/apache/cassandra/cql3/Operations.java b/src/java/org/apache/cassandra/cql3/Operations.java index a9451d7fc544..952305ec0d46 100644 --- a/src/java/org/apache/cassandra/cql3/Operations.java +++ b/src/java/org/apache/cassandra/cql3/Operations.java @@ -21,10 +21,12 @@ import java.util.Iterator; import java.util.List; +import com.google.common.collect.Iterators; + import org.apache.cassandra.cql3.functions.Function; import org.apache.cassandra.cql3.statements.StatementType; - -import com.google.common.collect.Iterators; +import org.apache.cassandra.cql3.transactions.ReferenceOperation; +import org.apache.cassandra.schema.ColumnMetadata; /** * A set of Operations. @@ -47,11 +49,34 @@ public final class Operations implements Iterable */ private final List staticOperations = new ArrayList<>(); + private final List regularSubstitutions = new ArrayList<>(); + private final List staticSubstitutions = new ArrayList<>(); + public Operations(StatementType type) { this.type = type; } + public void migrateReadRequiredOperations() + { + migrateReadRequiredOperations(staticOperations, staticSubstitutions); + migrateReadRequiredOperations(regularOperations, regularSubstitutions); + } + + private static void migrateReadRequiredOperations(List src, List dest) + { + Iterator it = src.iterator(); + while (it.hasNext()) + { + Operation next = it.next(); + if (next.requiresRead()) + { + it.remove(); + dest.add(ReferenceOperation.create(next)); + } + } + } + /** * Checks if some of the operations apply to static columns. * @@ -105,6 +130,14 @@ public void add(Operation operation) regularOperations.add(operation); } + public void add(ColumnMetadata column, ReferenceOperation operation) + { + if (column.isStatic()) + staticSubstitutions.add(operation); + else + regularSubstitutions.add(operation); + } + /** * Checks if one of the operations requires a read. * @@ -143,4 +176,29 @@ public void addFunctionsTo(List functions) regularOperations.forEach(p -> p.addFunctionsTo(functions)); staticOperations.forEach(p -> p.addFunctionsTo(functions)); } + + public List allSubstitutions() + { + if (staticSubstitutions.isEmpty()) + return regularSubstitutions; + + if (regularSubstitutions.isEmpty()) + return staticSubstitutions; + + // Only create a new list if we actually have something to combine + List list = new ArrayList<>(staticSubstitutions.size() + regularSubstitutions.size()); + list.addAll(staticSubstitutions); + list.addAll(regularSubstitutions); + return list; + } + + public List regularSubstitutions() + { + return regularSubstitutions; + } + + public List staticSubstitutions() + { + return staticSubstitutions; + } } diff --git a/src/java/org/apache/cassandra/cql3/Operator.java b/src/java/org/apache/cassandra/cql3/Operator.java index 64658b43226f..201a046b2f57 100644 --- a/src/java/org/apache/cassandra/cql3/Operator.java +++ b/src/java/org/apache/cassandra/cql3/Operator.java @@ -26,6 +26,7 @@ import java.util.Objects; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import com.google.common.collect.RangeSet; @@ -39,13 +40,18 @@ import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.serializers.ListSerializer; import org.apache.cassandra.utils.ByteBufferUtil; +import static com.google.common.base.Preconditions.checkArgument; + import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; import static org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; public enum Operator { @@ -828,6 +834,25 @@ public enum Kind BINARY, TERNARY, MULTI_VALUE; }; + private static final Operator[] idToOperatorMapping; + + static + { + Operator[] operators = values(); + int maxId = Stream.of(operators) + .map(Operator::getValue) + .max(Integer::compareTo) + .get(); + + idToOperatorMapping = new Operator[maxId + 1]; + for (Operator operator : operators) + { + if (null != idToOperatorMapping[operator.b]) + throw new IllegalStateException("Duplicate Operator id " + operator.b); + idToOperatorMapping[operator.b] = operator; + } + } + /** * The binary representation of this Enum value. */ @@ -853,6 +878,17 @@ public void writeTo(DataOutput output) throws IOException output.writeInt(getValue()); } + /** + * Write the serialized version of this Operator to the specified output. + * + * @param output the output to write to + * @throws IOException if an I/O problem occurs while writing to the specified output + */ + public void writeToUnsignedVInt(DataOutputPlus output) throws IOException + { + output.writeUnsignedVInt32(b); + } + public int getValue() { return b; @@ -885,12 +921,27 @@ public boolean isTernary() */ public static Operator readFrom(DataInput input) throws IOException { - int b = input.readInt(); - for (Operator operator : values()) - if (operator.b == b) - return operator; + return fromBinary(input.readInt()); + } + + /** + * Deserializes a Operator instance from the specified input. + * + * @param input the input to read from + * @return the Operator instance deserialized + * @throws IOException if a problem occurs while deserializing the Type instance. + */ + public static Operator readFromUnsignedVInt(DataInputPlus input) throws IOException + { + return fromBinary(input.readUnsignedVInt32()); + } - throw new IOException(String.format("Cannot resolve Relation.Type from binary representation: %s", b)); + private static Operator fromBinary(int b) throws IOException + { + checkArgument(b > -1, "b must be > -1 to be a valid Operator id"); + if (b > idToOperatorMapping.length) + throw new IOException(String.format("Cannot resolve Operator from binary representation: %s", b)); + return idToOperatorMapping[b]; } @@ -1149,4 +1200,9 @@ private String buildCQLString(String leftOperand, T rightOperand, Function prefetchedRows; private Row.Builder staticBuilder; @@ -57,17 +56,14 @@ public class UpdateParameters private Row.Builder builder; public UpdateParameters(TableMetadata metadata, - RegularAndStaticColumns updatedColumns, ClientState clientState, QueryOptions options, long timestamp, long nowInSec, int ttl, - Map prefetchedRows) - throws InvalidRequestException + Map prefetchedRows) throws InvalidRequestException { this.metadata = metadata; - this.updatedColumns = updatedColumns; this.clientState = clientState; this.options = options; @@ -123,10 +119,20 @@ public Clustering currentClustering() public void addPrimaryKeyLivenessInfo() { - builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(timestamp, ttl, nowInSec)); + addPrimaryKeyLivenessInfo(LivenessInfo.create(timestamp, ttl, nowInSec)); + } + + private void addPrimaryKeyLivenessInfo(LivenessInfo info) + { + builder.addPrimaryKeyLivenessInfo(info); } public void addRowDeletion() + { + addRowDeletion(Row.Deletion.regular(deletionTime)); + } + + private void addRowDeletion(Row.Deletion deletion) { // For compact tables, at the exclusion of the static row (of static compact tables), each row ever has a single column, // the "compact" one. As such, deleting the row or deleting that single cell is equivalent. We favor the later @@ -134,7 +140,7 @@ public void addRowDeletion() if (metadata.isCompactTable() && builder.clustering() != Clustering.STATIC_CLUSTERING) addTombstone(((TableMetadata.CompactTableMetadata) metadata).compactValueColumn); else - builder.addRowDeletion(Row.Deletion.regular(deletionTime)); + builder.addRowDeletion(deletion); } public void addTombstone(ColumnMetadata column) throws InvalidRequestException @@ -179,6 +185,14 @@ public Cell addCell(ColumnMetadata column, CellPath path, ByteBuffer value) t return cell; } + public void addRow(Row row) + { + newRow(row.clustering()); + addRowDeletion(row.deletion()); + addPrimaryKeyLivenessInfo(row.primaryKeyLivenessInfo()); + row.cells().forEach(builder::addCell); + } + private void validateColumnSize(ColumnMetadata column, ByteBuffer value) { CQL3Type cql3Type = column.type.asCQL3Type(); diff --git a/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java b/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java index 1d0afcddc2e0..650e6ab9821e 100644 --- a/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java +++ b/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.cql3.conditions; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.*; @@ -42,11 +43,17 @@ import org.apache.cassandra.db.rows.ColumnData; import org.apache.cassandra.db.rows.ComplexColumnData; import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.ByteBufferUtil; import static org.apache.cassandra.cql3.statements.RequestValidations.*; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; +import static org.apache.cassandra.service.accord.AccordSerializers.columnMetadataSerializer; +import static org.apache.cassandra.utils.ByteBufferUtil.nullableByteBufferSerializer; /** * A CQL3 condition on the value of a column or collection element. For example, "UPDATE .. IF a = 0". @@ -171,6 +178,41 @@ public String toCQLString() return operator.buildCQLString(columnsExpression, values); } + private interface BoundSerializer + { + default void serialize(T bound, DataOutputPlus out, int version) throws IOException {} + Bound deserialize(DataInputPlus in, int version, ColumnMetadata column, Operator operator, ByteBuffer value) throws IOException; + default long serializedSize(T condition, int version) { return 0; } + } + + enum BoundKind + { + Simple(0, SimpleBound.serializer), + ElementOrFieldAccess(1, ElementOrFieldAccessBound.serializer), + MultiCell(2, MultiCellBound.serializer); + + private final int id; + @SuppressWarnings("rawtypes") + private final BoundSerializer serializer; + + BoundKind(int id, BoundSerializer serializer) + { + this.id = id; + this.serializer = serializer; + } + + static BoundKind valueOf(int id) + { + switch (id) + { + case 0: return BoundKind.Simple; + case 1: return BoundKind.ElementOrFieldAccess; + case 2: return BoundKind.MultiCell; + default: throw new IllegalArgumentException("Unknown id: " + id); + } + } + } + public static abstract class Bound { protected final ColumnMetadata column; @@ -188,14 +230,55 @@ protected Bound(ColumnMetadata column, Operator operator, ByteBuffer value) * Validates whether this condition applies to {@code current}. */ public abstract boolean appliesTo(Row row); + + protected abstract BoundKind kind(); + + public static final IVersionedSerializer serializer = new IVersionedSerializer<>() + { + @Override + @SuppressWarnings("unchecked") + public void serialize(Bound bound, DataOutputPlus out, int version) throws IOException + { + columnMetadataSerializer.serialize(bound.column, out, version); + bound.operator.writeToUnsignedVInt(out); + nullableByteBufferSerializer.serialize(bound.value, out, version); + BoundKind kind = bound.kind(); + out.writeUnsignedVInt32(kind.ordinal()); + kind.serializer.serialize(bound, out, version); + } + + @Override + public Bound deserialize(DataInputPlus in, int version) throws IOException + { + ColumnMetadata column = columnMetadataSerializer.deserialize(in, version); + Operator operator = Operator.readFromUnsignedVInt(in); + ByteBuffer value = nullableByteBufferSerializer.deserialize(in, version); + BoundKind boundKind = BoundKind.valueOf(in.readUnsignedVInt32()); + return boundKind.serializer.deserialize(in, version, column, operator, value); + } + + @Override + @SuppressWarnings("unchecked") + public long serializedSize(Bound bound, int version) + { + BoundKind kind = bound.kind(); + return columnMetadataSerializer.serializedSize(bound.column, version) + + bound.operator.sizeAsUnsignedVInt() + + nullableByteBufferSerializer.serializedSize(bound.value, version) + + sizeofUnsignedVInt(kind.ordinal()) + + kind.serializer.serializedSize(bound, version); + } + }; } /** * A condition on a single non-collection column. */ - private static final class SimpleBound extends Bound + public static class SimpleBound extends Bound { - private SimpleBound(ColumnMetadata column, Operator operator, ByteBuffer value) + private static final BoundSerializer serializer = (in, version, column, operator, value) -> new SimpleBound(column, operator, value); + + public SimpleBound(ColumnMetadata column, Operator operator, ByteBuffer value) { super(column, operator, value); } @@ -206,7 +289,7 @@ public boolean appliesTo(Row row) return operator.isSatisfiedBy(column.type, rowValue(row), value); } - private ByteBuffer rowValue(Row row) + protected ByteBuffer rowValue(Row row) { // If we're asking for a given cell, and we didn't get any row from our read, it's // the same as not having said cell. @@ -216,13 +299,70 @@ private ByteBuffer rowValue(Row row) Cell c = row.getCell(column); return c == null ? null : c.buffer(); } + + @Override + protected BoundKind kind() + { + return BoundKind.Simple; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SimpleBound bound = (SimpleBound) o; + return column.equals(bound.column) && operator == bound.operator && Objects.equals(value, bound.value); + } + + @Override + public int hashCode() + { + return Objects.hash(column, operator, value); + } + } + + public static class SimpleClusteringBound extends SimpleBound + { + public SimpleClusteringBound(ColumnMetadata column, Operator operator, ByteBuffer value) + { + super(column, operator, value); + assert column.isClusteringColumn() : String.format("Column must be a clustering column, but given %s", column); + } + + @Override + protected ByteBuffer rowValue(Row row) + { + return row == null ? null : row.clustering().bufferAt(column.position()); + } } /** * A condition on a collection element or a UDT field. */ - private static final class ElementOrFieldAccessBound extends Bound + public static final class ElementOrFieldAccessBound extends Bound { + private static final BoundSerializer serializer = new BoundSerializer<>() + { + @Override + public void serialize(ElementOrFieldAccessBound bound, DataOutputPlus out, int version) throws IOException + { + nullableByteBufferSerializer.serialize(bound.keyOrIndex, out, version); + } + + @Override + public Bound deserialize(DataInputPlus in, int version, ColumnMetadata column, Operator operator, ByteBuffer value) throws IOException + { + ByteBuffer keyOrIndex = nullableByteBufferSerializer.deserialize(in, version); + return new ElementOrFieldAccessBound(column, keyOrIndex, operator, value); + } + + @Override + public long serializedSize(ElementOrFieldAccessBound condition, int version) + { + return nullableByteBufferSerializer.serializedSize(condition.keyOrIndex, version); + } + }; /** * The collection element or UDT field type. */ @@ -234,16 +374,22 @@ private static final class ElementOrFieldAccessBound extends Bound private final ByteBuffer keyOrIndex; - private ElementOrFieldAccessBound(ColumnMetadata column, - ByteBuffer keyOrIndex, - Operator operator, - ByteBuffer value) + public ElementOrFieldAccessBound(ColumnMetadata column, + ByteBuffer keyOrIndex, + Operator operator, + ByteBuffer value) { super(column, operator, value); this.elementType = ((MultiElementType) column.type).elementType(keyOrIndex); this.keyOrIndex = keyOrIndex; } + @Override + protected BoundKind kind() + { + return BoundKind.ElementOrFieldAccess; + } + @Override public boolean appliesTo(Row row) { @@ -260,17 +406,40 @@ private ColumnData columnData(Row row) { return row == null ? null : row.getColumnData(column); } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ElementOrFieldAccessBound bound = (ElementOrFieldAccessBound) o; + return column.equals(bound.column) && operator == bound.operator && Objects.equals(value, bound.value) && Objects.equals(keyOrIndex, bound.keyOrIndex); + } + + @Override + public int hashCode() + { + return Objects.hash(column, operator, value); + } } /** * A condition on a multicell column. */ - private static final class MultiCellBound extends Bound + public static final class MultiCellBound extends Bound { + private static final BoundSerializer serializer = (in, version, column, operator, value) -> new MultiCellBound(column, operator, value); + public MultiCellBound(ColumnMetadata column, Operator operator, ByteBuffer value) { super(column, operator, value); - assert column.type.isMultiCell(); + assert column.type.isMultiCell() : String.format("Unexpected type: %s", column.type); + } + + @Override + protected BoundKind kind() + { + return BoundKind.MultiCell; } public boolean appliesTo(Row row) @@ -278,6 +447,21 @@ public boolean appliesTo(Row row) ComplexColumnData columnData = row == null ? null : row.getComplexColumnData(column); return operator.isSatisfiedBy((MultiElementType) column.type, columnData, value); } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MultiCellBound bound = (MultiCellBound) o; + return column.equals(bound.column) && operator == bound.operator && Objects.equals(value, bound.value); + } + + @Override + public int hashCode() + { + return Objects.hash(column, operator, value); + } } public static class Raw diff --git a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java index e619993c6af6..f827942b5ce7 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java @@ -868,13 +868,23 @@ private void validateSecondaryIndexSelections() * * @return true if all the primary key columns are restricted by an equality relation. */ - public boolean hasAllPKColumnsRestrictedByEqualities() + public boolean hasAllPrimaryKeyColumnsRestrictedByEqualities() + { + return hasAllPartitionKeyColumnsRestrictedByEqualities() + && !hasUnrestrictedClusteringColumns() + && (clusteringColumnsRestrictions.hasOnlyEqualityRestrictions()); + } + + /** + * Checks that all the partition key columns are restricted by an equality relation ('=' or 'IN'). + * + * @return true if all the partition key columns are restricted by an equality relation. + */ + public boolean hasAllPartitionKeyColumnsRestrictedByEqualities() { return !isPartitionKeyRestrictionsOnToken() - && !partitionKeyRestrictions.hasUnrestrictedPartitionKeyComponents() - && (partitionKeyRestrictions.hasOnlyEqualityRestrictions()) - && !hasUnrestrictedClusteringColumns() - && (clusteringColumnsRestrictions.hasOnlyEqualityRestrictions()); + && !partitionKeyRestrictions.hasUnrestrictedPartitionKeyComponents() + && (partitionKeyRestrictions.hasOnlyEqualityRestrictions()); } /** diff --git a/src/java/org/apache/cassandra/cql3/selection/Selectable.java b/src/java/org/apache/cassandra/cql3/selection/Selectable.java index 56ee558ebb32..c4e7dd6d73f9 100644 --- a/src/java/org/apache/cassandra/cql3/selection/Selectable.java +++ b/src/java/org/apache/cassandra/cql3/selection/Selectable.java @@ -577,8 +577,8 @@ public boolean selectColumns(Predicate predicate) public static class Raw implements Selectable.Raw { - private final Selectable.Raw selected; - private final FieldIdentifier field; + public final Selectable.Raw selected; + public final FieldIdentifier field; public Raw(Selectable.Raw selected, FieldIdentifier field) { @@ -1471,8 +1471,8 @@ public boolean selectColumns(Predicate predicate) public static class Raw implements Selectable.Raw { - private final Selectable.Raw selected; - private final Term.Raw element; + public final Selectable.Raw selected; + public final Term.Raw element; public Raw(Selectable.Raw selected, Term.Raw element) { diff --git a/src/java/org/apache/cassandra/cql3/selection/Selection.java b/src/java/org/apache/cassandra/cql3/selection/Selection.java index 743da6934e05..34d0bf9207ad 100644 --- a/src/java/org/apache/cassandra/cql3/selection/Selection.java +++ b/src/java/org/apache/cassandra/cql3/selection/Selection.java @@ -18,7 +18,12 @@ package org.apache.cassandra.cql3.selection; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; import com.google.common.base.MoreObjects; import com.google.common.base.Predicate; @@ -134,6 +139,11 @@ public ResultSet.ResultMetadata getResultMetadata() return resultMetadata; } + public static Selection.Selectors noopSelector() + { + return new SimpleSelectors(); + } + public static Selection wildcard(TableMetadata table, boolean isJson, boolean returnStaticContentOnPartitionWithNoRows) { List all = new ArrayList<>(table.columns().size()); @@ -346,55 +356,72 @@ private static List rowToJson(List row, return Arrays.asList(jsonRow); } - public static interface Selectors + public interface Selectors { /** * Returns the {@code ColumnFilter} corresponding to those selectors * * @return the {@code ColumnFilter} corresponding to those selectors */ - public ColumnFilter getColumnFilter(); + default ColumnFilter getColumnFilter() { return ColumnFilter.NONE; } /** * Checks if this Selectors perform some processing * @return {@code true} if this Selectors perform some processing, {@code false} otherwise. */ - public boolean hasProcessing(); + default boolean hasProcessing() { return false; } /** * Checks if one of the selectors perform some aggregations. * @return {@code true} if one of the selectors perform some aggregations, {@code false} otherwise. */ - public boolean isAggregate(); - - /** - * Returns the number of fetched columns - * @return the number of fetched columns - */ - public int numberOfFetchedColumns(); + default boolean isAggregate() { return false; } /** * Checks if one of the selectors collect TTLs. * @return {@code true} if one of the selectors collect TTLs, {@code false} otherwise. */ - public boolean collectTTLs(); + default boolean collectTTLs() { return false; } /** * Checks if one of the selectors collects write timestamps. * @return {@code true} if one of the selectors collects write timestamps, {@code false} otherwise. */ - public boolean collectWritetimes(); + default boolean collectWritetimes() { return false; } /** * Adds the current row of the specified ResultSetBuilder. * * @param input the input row */ - public void addInputRow(InputRow input); + void addInputRow(InputRow input); - public List getOutputRow(); + List getOutputRow(); - public void reset(); + void reset(); + } + + public static class SimpleSelectors implements Selectors + { + protected List current; + + @Override + public void addInputRow(InputRow input) + { + current = input.getValues(); + } + + @Override + public List getOutputRow() + { + return current; + } + + @Override + public void reset() + { + current = null; + } } // Special cased selection for when only columns are selected. @@ -466,15 +493,9 @@ public boolean isAggregate() public Selectors newSelectors(QueryOptions options) { - return new Selectors() + return new SimpleSelectors() { - private List current; - - public void reset() - { - current = null; - } - + @Override public List getOutputRow() { if (isJson) @@ -482,39 +503,6 @@ public List getOutputRow() return current; } - public void addInputRow(InputRow input) - { - current = input.getValues(); - } - - public boolean isAggregate() - { - return false; - } - - public boolean hasProcessing() - { - return false; - } - - @Override - public int numberOfFetchedColumns() - { - return getColumns().size(); - } - - @Override - public boolean collectTTLs() - { - return false; - } - - @Override - public boolean collectWritetimes() - { - return false; - } - @Override public ColumnFilter getColumnFilter() { @@ -615,12 +603,6 @@ public void addInputRow(InputRow input) selector.addInput(input); } - @Override - public int numberOfFetchedColumns() - { - return getColumns().size(); - } - @Override public boolean collectTTLs() { diff --git a/src/java/org/apache/cassandra/cql3/selection/Selector.java b/src/java/org/apache/cassandra/cql3/selection/Selector.java index fce2ef063407..fa22ea0bb5e2 100644 --- a/src/java/org/apache/cassandra/cql3/selection/Selector.java +++ b/src/java/org/apache/cassandra/cql3/selection/Selector.java @@ -414,7 +414,7 @@ private void add(ComplexColumnData ccd, long nowInSec) UserType udt = (UserType) type; int size = udt.size(); - values[index] = udt.serializeForNativeProtocol(ccd.iterator(), protocolVersion); + values[index] = udt.serializeForNativeProtocol(ccd.iterator()); short fieldPosition = 0; for (Cell cell : ccd) diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java index 6c5b12199252..c99dac31219c 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java @@ -59,7 +59,7 @@ /** * A BATCH statement parsed from a CQL query. */ -public class BatchStatement implements CQLStatement +public class BatchStatement implements CQLStatement.CompositeCQLStatement { public enum Type { @@ -268,6 +268,7 @@ public void validate(ClientState state) throws InvalidRequestException statement.validate(state); } + @Override public List getStatements() { return statements; @@ -617,7 +618,7 @@ public String toString() return String.format("BatchStatement(type=%s, statements=%s)", type, statements); } - public static class Parsed extends QualifiedStatement + public static class Parsed extends QualifiedStatement.Composite { private final Type type; private final Attributes.Raw attrs; @@ -625,21 +626,15 @@ public static class Parsed extends QualifiedStatement public Parsed(Type type, Attributes.Raw attrs, List parsedStatements) { - super(null); this.type = type; this.attrs = attrs; this.parsedStatements = parsedStatements; } - // Not doing this in the constructor since we only need this for prepared statements @Override - public boolean isFullyQualified() + protected Iterable getStatements() { - for (ModificationStatement.Parsed statement : parsedStatements) - if (!statement.isFullyQualified()) - return false; - - return true; + return parsedStatements; } @Override diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index 332f8b9388fa..96d75d2651e0 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -17,28 +17,60 @@ */ package org.apache.cassandra.cql3.statements; -import java.util.*; - import org.apache.cassandra.db.marshal.TimeUUIDType; import org.apache.cassandra.index.IndexRegistry; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.cql3.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +import accord.api.Update; +import accord.primitives.Txn; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.cql3.conditions.ColumnCondition; -import org.apache.cassandra.db.*; -import org.apache.cassandra.db.filter.*; -import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.Columns; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.Slice; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.Partition; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.CASRequest; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.txn.TxnCondition; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnDataName; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnReference; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.TimeUUID; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; +import static com.google.common.base.Preconditions.checkState; +import static org.apache.cassandra.service.accord.txn.TxnDataName.Kind.USER; /** * Processed CAS conditions and update on potentially multiple rows of the same partition. @@ -259,9 +291,9 @@ private static class CASUpdateParameters extends UpdateParameters final long timeUuidMsb; long timeUuidNanos; - public CASUpdateParameters(TableMetadata metadata, RegularAndStaticColumns updatedColumns, ClientState state, QueryOptions options, long timestamp, long nowInSec, int ttl, Map prefetchedRows, long timeUuidMsb, long timeUuidNanos) throws InvalidRequestException + public CASUpdateParameters(TableMetadata metadata, ClientState state, QueryOptions options, long timestamp, long nowInSec, int ttl, Map prefetchedRows, long timeUuidMsb, long timeUuidNanos) throws InvalidRequestException { - super(metadata, updatedColumns, state, options, timestamp, nowInSec, ttl, prefetchedRows); + super(metadata, state, options, timestamp, nowInSec, ttl, prefetchedRows); this.timeUuidMsb = timeUuidMsb; this.timeUuidNanos = timeUuidNanos; } @@ -299,7 +331,7 @@ long applyUpdates(FilteredPartition current, PartitionUpdate.Builder updateBuild { Map map = stmt.requiresRead() ? Collections.singletonMap(key, current) : null; CASUpdateParameters params = - new CASUpdateParameters(metadata, updateBuilder.columns(), state, options, timestamp, nowInSeconds, + new CASUpdateParameters(metadata, state, options, timestamp, nowInSeconds, stmt.getTimeToLive(options), map, timeUuidMsb, timeUuidNanos); stmt.addUpdateForKey(updateBuilder, clustering, params); return params.timeUuidNanos; @@ -329,7 +361,6 @@ void applyUpdates(FilteredPartition current, PartitionUpdate.Builder updateBuild Map map = stmt.requiresRead() ? Collections.singletonMap(key, current) : null; UpdateParameters params = new UpdateParameters(metadata, - updateBuilder.columns(), state, options, timestamp, @@ -350,6 +381,8 @@ protected RowCondition(Clustering clustering) } public abstract boolean appliesTo(FilteredPartition current) throws InvalidRequestException; + + public abstract TxnCondition asTxnCondition(); } private interface ToCQL @@ -374,6 +407,13 @@ public String toCQL() { return "IF NOT EXISTS"; } + + public TxnCondition asTxnCondition() + { + TxnDataName txnDataName = new TxnDataName(USER, clustering, TxnRead.SERIAL_READ_NAME); + TxnReference txnReference = new TxnReference(txnDataName, null); + return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NULL); + } } private static class ExistCondition extends RowCondition implements ToCQL @@ -393,6 +433,13 @@ public String toCQL() { return "IF EXISTS"; } + + public TxnCondition asTxnCondition() + { + TxnDataName txnDataName = new TxnDataName(USER, clustering, TxnRead.SERIAL_READ_NAME); + TxnReference txnReference = new TxnReference(txnDataName, null); + return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NOT_NULL); + } } private static class ColumnsConditions extends RowCondition @@ -422,6 +469,12 @@ public boolean appliesTo(FilteredPartition current) throws InvalidRequestExcepti } return true; } + + @Override + public TxnCondition asTxnCondition() + { + return new TxnCondition.ColumnConditionsAdapter(clustering, conditions); + } } @Override @@ -429,4 +482,56 @@ public String toString() { return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE); } + + @Override + public Txn toAccordTxn(ClientState clientState, long nowInSecs) + { + SinglePartitionReadCommand readCommand = readCommand(nowInSecs); + Update update = createUpdate(clientState); + // In a CAS request only one key is supported and writes + // can't be dependent on any data that is read (only conditions) + // so the only relevant keys are the read key + TxnRead read = TxnRead.createSerialRead(readCommand); + return new Txn.InMemory(read.keys(), read, TxnQuery.CONDITION, update); + } + + private Update createUpdate(ClientState clientState) + { + return new TxnUpdate(createWriteFragments(clientState), createCondition()); + } + + private TxnCondition createCondition() + { + List txnConditions = new ArrayList<>(conditions.size() + (staticConditions == null ? 0 : 1)); + if (staticConditions != null) + { + txnConditions.add(staticConditions.asTxnCondition()); + } + for (RowCondition condition : conditions.values()) + txnConditions.add(condition.asTxnCondition()); + // CAS forbids empty conditions + checkState(!txnConditions.isEmpty()); + return conditions.size() == 1 ? txnConditions.get(0) : new TxnCondition.BooleanGroup(TxnCondition.Kind.AND, txnConditions); + } + + private List createWriteFragments(ClientState state) + { + List fragments = new ArrayList<>(); + int idx = 0; + for (RowUpdate update : updates) + { + ModificationStatement modification = update.stmt; + QueryOptions options = update.options; + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); + fragments.add(fragment); + } + return fragments; + } + + @Override + public RowIterator toCasResult(TxnData txnData) + { + FilteredPartition partition = txnData.get(TxnRead.SERIAL_READ); + return partition != null ? partition.rowIterator() : null; + } } diff --git a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java index 0bc22842556d..3ba34319a70e 100644 --- a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java @@ -177,7 +177,7 @@ protected ModificationStatement prepareInternal(ClientState state, conditions, attrs); - if (stmt.hasConditions() && !restrictions.hasAllPKColumnsRestrictedByEqualities()) + if (stmt.hasConditions() && !restrictions.hasAllPrimaryKeyColumnsRestrictedByEqualities()) { checkFalse(stmt.isVirtual(), "DELETE statements must restrict all PRIMARY KEY columns with equality relations"); diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index 21da99c14af4..a4a3c50de022 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -18,15 +18,45 @@ package org.apache.cassandra.cql3.statements; import java.nio.ByteBuffer; -import java.util.*; - +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; + +import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.auth.Permission; +import org.apache.cassandra.cql3.Attributes; +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.Ordering; +import org.apache.cassandra.cql3.terms.Constants; +import org.apache.cassandra.cql3.Operation; +import org.apache.cassandra.cql3.Operations; +import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.cql3.Validation; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.WhereClause; import org.apache.cassandra.cql3.constraints.ConstraintViolationException; import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.dht.Token; @@ -37,7 +67,6 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.ViewMetadata; -import org.apache.cassandra.cql3.*; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.ColumnConditions; import org.apache.cassandra.cql3.conditions.Conditions; @@ -46,6 +75,7 @@ import org.apache.cassandra.cql3.selection.ResultSetBuilder; import org.apache.cassandra.cql3.selection.Selection; import org.apache.cassandra.cql3.selection.Selection.Selectors; +import org.apache.cassandra.cql3.transactions.ReferenceOperation; import org.apache.cassandra.db.*; import org.apache.cassandra.db.filter.*; import org.apache.cassandra.db.marshal.BooleanType; @@ -62,8 +92,12 @@ import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.service.paxos.Commit.Proposal; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; +import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.triggers.TriggerExecutor; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MD5Digest; @@ -146,6 +180,15 @@ public ModificationStatement(StatementType type, requiresReadBuilder.add(operation.column); } } + for (ReferenceOperation operation : operations.allSubstitutions()) + { + ColumnMetadata receiver = operation.getReceiver(); + updatedColumnsBuilder.add(receiver); + // If the operation requires a read-before-write, make sure its receiver is selected by the auto-read the + // transaction creates during update creation. (see createSelectForTxn()) + if (operation.requiresRead()) + requiresReadBuilder.add(receiver); + } RegularAndStaticColumns modifiedColumns = updatedColumnsBuilder.build(); @@ -389,6 +432,11 @@ public List getStaticOperations() return operations.staticOperations(); } + public Collection allReferenceOperations() + { + return operations.allSubstitutions(); + } + public Iterable getColumnsWithConditions() { return conditions.getColumns(); @@ -451,7 +499,7 @@ public boolean requiresRead() // * Deleting list element by value // * Performing addition on a StringType (i.e. concatenation, only supported for CAS operations) // * Performing addition on a NumberType, again only supported for CAS operations. - return !requiresRead.isEmpty(); + return operations.requiresRead(); } private Map readRequiredLists(Collection partitionKeys, @@ -775,7 +823,7 @@ static RowIterator casInternal(ClientState state, CQL3CasRequest request, long t * * @return list of the mutations */ - private List getMutations(ClientState state, + public List getMutations(ClientState state, QueryOptions options, boolean local, long timestamp, @@ -797,6 +845,56 @@ private List getMutations(ClientState state, } } + @VisibleForTesting + public PartitionUpdate getTxnUpdate(ClientState state, QueryOptions options) + { + List mutations = getMutations(state, options, false, 0, 0, new Dispatcher.RequestTime(0, 0)); + if (mutations.size() != 1) + throw new IllegalArgumentException("When running withing a transaction, modification statements may only mutate a single partition"); + return Iterables.getOnlyElement(mutations.get(0).getPartitionUpdates()); + } + + private static List getTxnReferenceOps(List operations, QueryOptions options) + { + if (operations.isEmpty()) + return Collections.emptyList(); + + List result = new ArrayList<>(operations.size()); + for (ReferenceOperation operation : operations) + result.add(operation.bindAndGet(options)); + return result; + } + + public TxnReferenceOperations getTxnReferenceOps(QueryOptions options, ClientState state) + { + List regularOps = getTxnReferenceOps(operations.regularSubstitutions(), options); + List staticOps = getTxnReferenceOps(operations.staticSubstitutions(), options); + Clustering clustering = !regularOps.isEmpty() ? Iterables.getOnlyElement(createClustering(options, state)) : null; + return new TxnReferenceOperations(metadata, clustering, regularOps, staticOps); + } + + @VisibleForTesting + public void migrateReadRequiredOperations() + { + operations.migrateReadRequiredOperations(); + } + + @VisibleForTesting + public List getSubstitutions() + { + return operations.allSubstitutions(); + } + + public TxnWrite.Fragment getTxnWriteFragment(int index, ClientState state, QueryOptions options) + { + // When an Operation requires a read, this cannot be done right away and must be done by the transaction itself, + // so migrate those Operations to a ReferenceOperation (which works properly in this case). + operations.migrateReadRequiredOperations(); + PartitionUpdate baseUpdate = getTxnUpdate(state, options); + TxnReferenceOperations referenceOps = getTxnReferenceOps(options, state); + return new TxnWrite.Fragment(index, baseUpdate, referenceOps); + } + final void addUpdates(UpdatesCollector collector, List keys, ClientState state, @@ -949,7 +1047,6 @@ private UpdateParameters makeUpdateParameters(Collection keys, requestTime); return new UpdateParameters(metadata(), - updatedColumns(), state, options, getTimestamp(timestamp, options), @@ -995,6 +1092,7 @@ public ModificationStatement prepare(ClientState state, VariableSpecifications b Conditions preparedConditions = prepareConditions(metadata, bindVariables); + // TODO: if this is a txn and has a read name, and updates non-static columns, confirm it selects an entire row return prepareInternal(state, metadata, bindVariables, preparedConditions, preparedAttributes); } @@ -1019,7 +1117,6 @@ private Conditions prepareConditions(TableMetadata metadata, VariableSpecificati if (ifNotExists) { assert conditions.isEmpty(); - assert !ifExists; return Conditions.IF_NOT_EXISTS_CONDITION; } @@ -1088,4 +1185,23 @@ public List getConditions() return conditions; } } + + private static final Constants.Value ONE = new Constants.Value(ByteBufferUtil.bytes(1)); + + public SelectStatement createSelectForTxn() + { + // TODO: get working with static-only updates that don't specify any/all primary key columns + Preconditions.checkState(getRestrictions().hasAllPrimaryKeyColumnsRestrictedByEqualities()); + Selection selection = Selection.forColumns(metadata, Lists.newArrayList(requiresRead), false); + return new SelectStatement(metadata, + bindVariables, + SelectStatement.defaultParameters, + selection, + getRestrictions(), + false, + null, + null, + ONE, + null); + } } diff --git a/src/java/org/apache/cassandra/cql3/statements/QualifiedStatement.java b/src/java/org/apache/cassandra/cql3/statements/QualifiedStatement.java index 4ed41d168888..c7183a9d1b54 100644 --- a/src/java/org/apache/cassandra/cql3/statements/QualifiedStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/QualifiedStatement.java @@ -78,4 +78,50 @@ public String toString() { return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE); } + + public static abstract class Composite extends QualifiedStatement + { + Composite() + { + super(null); + } + + protected abstract Iterable getStatements(); + + @Override + public boolean isFullyQualified() + { + for (QualifiedStatement statement : getStatements()) + if (!statement.isFullyQualified()) + return false; + + return true; + } + + @Override + public void setKeyspace(ClientState state) + { + for (QualifiedStatement statement : getStatements()) + statement.setKeyspace(state); + } + + @Override + public void setKeyspace(String keyspace) + { + for (QualifiedStatement statement : getStatements()) + statement.setKeyspace(keyspace); + } + + @Override + public String keyspace() + { + return null; + } + + @Override + public String name() + { + return null; + } + } } diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 209ba88f5260..26d8d3f467d3 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -18,10 +18,20 @@ package org.apache.cassandra.cql3.statements; import java.nio.ByteBuffer; -import java.util.*; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.TimeUnit; - +import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; import com.google.common.annotations.VisibleForTesting; @@ -30,24 +40,26 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; - +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; import org.apache.cassandra.auth.Permission; -import org.apache.cassandra.cql3.restrictions.SingleRestriction; -import org.apache.cassandra.cql3.terms.Term; -import org.apache.cassandra.db.guardrails.Guardrails; -import org.apache.cassandra.index.Index; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.TableMetadataRef; -import org.apache.cassandra.cql3.*; +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.Ordering; +import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.WhereClause; import org.apache.cassandra.cql3.functions.Function; +import org.apache.cassandra.cql3.restrictions.SingleRestriction; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; import org.apache.cassandra.cql3.selection.RawSelector; import org.apache.cassandra.cql3.selection.ResultSetBuilder; @@ -56,10 +68,31 @@ import org.apache.cassandra.cql3.selection.Selection; import org.apache.cassandra.cql3.selection.Selection.Selectors; import org.apache.cassandra.cql3.selection.Selector; -import org.apache.cassandra.db.*; +import org.apache.cassandra.cql3.terms.Marker; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.PartitionRangeReadQuery; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.ReadQuery; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.SinglePartitionReadQuery; +import org.apache.cassandra.db.Slice; +import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.aggregation.AggregationSpecification; import org.apache.cassandra.db.aggregation.GroupMaker; -import org.apache.cassandra.db.filter.*; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.partitions.PartitionIterator; @@ -67,9 +100,20 @@ import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.view.View; import org.apache.cassandra.dht.AbstractBounds; -import org.apache.cassandra.exceptions.*; -import org.apache.cassandra.metrics.ClientRequestSizeMetrics; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.exceptions.ReadSizeAbortException; +import org.apache.cassandra.exceptions.RequestExecutionException; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestValidationException; +import org.apache.cassandra.exceptions.UnauthorizedException; +import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexRegistry; +import org.apache.cassandra.metrics.ClientRequestSizeMetrics; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.ClientWarn; @@ -85,9 +129,6 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.NoSpamLogger; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; - import static java.lang.String.format; import static org.apache.cassandra.cql3.restrictions.StatementRestrictions.requiresAllowFilteringIfNotSpecified; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; @@ -109,7 +150,7 @@ * Note that select statements can be accessed by multiple threads, so we cannot rely on mutable attributes. */ @ThreadSafe -public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement +public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement, CQLStatement.ReturningCQLStatement { private static final Logger logger = LoggerFactory.getLogger(SelectStatement.class); private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(SelectStatement.logger, 1, TimeUnit.MINUTES); @@ -147,7 +188,7 @@ public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement private final ColumnComparator> orderingComparator; // Used by forSelection below - private static final Parameters defaultParameters = new Parameters(Collections.emptyList(), + public static final Parameters defaultParameters = new Parameters(Collections.emptyList(), Collections.emptyList(), false, false, @@ -243,6 +284,7 @@ static SelectStatement forSelection(TableMetadata table, Selection selection) null); } + @Override public ResultSet.ResultMetadata getResultMetadata() { return selection.getResultMetadata(); @@ -911,6 +953,11 @@ public int getLimit(QueryOptions options) return getLimit(limit, options); } + public boolean isLimitMarker() + { + return limit instanceof Marker; + } + /** * Returns the per partition limit specified by the user. * May be used by custom QueryHandler implementations @@ -1186,10 +1233,28 @@ public SelectStatement prepare(ClientState state) { // Cache locally for use by Guardrails this.state = state; - return prepare(state, false); + return prepare(state, false, bindVariables); + } + + public SelectStatement prepare(ClientState state, boolean forView) + { + return prepare(state, forView, bindVariables); + } + + public SelectStatement prepare(VariableSpecifications variableSpecifications) + { + return prepare(state, false, variableSpecifications); } - public SelectStatement prepare(ClientState state, boolean forView) throws InvalidRequestException + public SelectStatement prepare(boolean forView) + { + return prepare(state, forView, bindVariables); + } + + /** + * @throws InvalidRequestException if the statement being prepared is invalid + */ + public SelectStatement prepare(ClientState state, boolean forView, VariableSpecifications variableSpecifications) throws InvalidRequestException { TableMetadata table = Schema.instance.validateTable(keyspace(), name()); @@ -1197,7 +1262,7 @@ public SelectStatement prepare(ClientState state, boolean forView) throws Invali boolean containsOnlyStaticColumns = selectOnlyStaticColumns(table, selectables); List orderings = getOrderings(table); - StatementRestrictions restrictions = prepareRestrictions(state, table, bindVariables, orderings, containsOnlyStaticColumns, forView); + StatementRestrictions restrictions = prepareRestrictions(state, table, variableSpecifications, orderings, containsOnlyStaticColumns, forView); // If we order post-query, the sorted column needs to be in the ResultSet for sorting, // even if we don't ultimately ship them to the client (CASSANDRA-4911). @@ -1206,7 +1271,7 @@ public SelectStatement prepare(ClientState state, boolean forView) throws Invali Selection selection = prepareSelection(table, selectables, - bindVariables, + variableSpecifications, resultSetOrderingColumns, restrictions); @@ -1242,15 +1307,15 @@ public SelectStatement prepare(ClientState state, boolean forView) throws Invali checkNeedsFiltering(table, restrictions); return new SelectStatement(table, - bindVariables, + variableSpecifications, parameters, selection, restrictions, isReversed, aggregationSpecFactory, orderingComparator, - prepareLimit(bindVariables, limit, keyspace(), limitReceiver()), - prepareLimit(bindVariables, perPartitionLimit, keyspace(), perPartitionLimitReceiver())); + prepareLimit(variableSpecifications, limit, keyspace(), limitReceiver()), + prepareLimit(variableSpecifications, perPartitionLimit, keyspace(), perPartitionLimitReceiver())); } private Set getResultSetOrdering(StatementRestrictions restrictions, Map orderingColumns) @@ -1620,18 +1685,30 @@ public static class Parameters public final boolean isDistinct; public final boolean allowFiltering; public final boolean isJson; + public final String refName; public Parameters(List orderings, List groups, boolean isDistinct, boolean allowFiltering, boolean isJson) + { + this(orderings, groups, isDistinct, allowFiltering, isJson, null); + } + + public Parameters(List orderings, + List groups, + boolean isDistinct, + boolean allowFiltering, + boolean isJson, + String refName) { this.orderings = orderings; this.groups = groups; this.isDistinct = isDistinct; this.allowFiltering = allowFiltering; this.isJson = isJson; + this.refName = refName; } } @@ -1810,7 +1887,7 @@ private String loggableTokens(QueryOptions options, ClientState state) } } - private String asCQL(QueryOptions options, ClientState state) + public String asCQL(QueryOptions options, ClientState state) { ColumnFilter columnFilter = selection.newSelectors(options).getColumnFilter(); StringBuilder sb = new StringBuilder(); diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java new file mode 100644 index 000000000000..d8adb6787a92 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -0,0 +1,563 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.statements; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Key; +import accord.primitives.Keys; +import accord.primitives.Txn; +import org.apache.cassandra.audit.AuditLogContext; +import org.apache.cassandra.audit.AuditLogEntryType; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.selection.ResultSetBuilder; +import org.apache.cassandra.cql3.selection.Selection; +import org.apache.cassandra.cql3.transactions.ConditionStatement; +import org.apache.cassandra.cql3.transactions.ReferenceOperation; +import org.apache.cassandra.cql3.transactions.RowDataReference; +import org.apache.cassandra.cql3.transactions.SelectReferenceSource; +import org.apache.cassandra.db.ReadQuery; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.SinglePartitionReadQuery; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.txn.TxnCondition; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnDataName; +import org.apache.cassandra.service.accord.txn.TxnNamedRead; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnReference; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.transport.messages.ResultMessage; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.LazyToString; + +import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; +import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; +import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; + +public class TransactionStatement implements CQLStatement.CompositeCQLStatement, CQLStatement.ReturningCQLStatement +{ + private static final Logger logger = LoggerFactory.getLogger(TransactionStatement.class); + + public static final String DUPLICATE_TUPLE_NAME_MESSAGE = "The name '%s' has already been used by a LET assignment."; + public static final String INCOMPLETE_PRIMARY_KEY_LET_MESSAGE = "SELECT in LET assignment must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; CQL %s"; + public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "Normal SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; CQL %s"; + public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions."; + public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps."; + public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; + public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; + public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord_transactions_enabled in cassandra.yaml)"; + public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction"; + + static class NamedSelect + { + final TxnDataName name; + final SelectStatement select; + + public NamedSelect(TxnDataName name, SelectStatement select) + { + this.name = name; + this.select = select; + } + } + + private final List assignments; + private final NamedSelect returningSelect; + private final List returningReferences; + private final List updates; + private final List conditions; + + private final VariableSpecifications bindVariables; + private final ResultSet.ResultMetadata resultMetadata; + + public TransactionStatement(List assignments, + NamedSelect returningSelect, + List returningReferences, + List updates, + List conditions, + VariableSpecifications bindVariables) + { + this.assignments = assignments; + this.returningSelect = returningSelect; + this.returningReferences = returningReferences; + this.updates = updates; + this.conditions = conditions; + this.bindVariables = bindVariables; + + if (returningSelect != null) + { + resultMetadata = returningSelect.select.getResultMetadata(); + } + else if (returningReferences != null && !returningReferences.isEmpty()) + { + List names = new ArrayList<>(returningReferences.size()); + for (RowDataReference reference : returningReferences) + names.add(reference.toResultMetadata()); + resultMetadata = new ResultSet.ResultMetadata(names); + } + else + { + resultMetadata = ResultSet.ResultMetadata.EMPTY; + } + } + + public List getUpdates() + { + return updates; + } + + @Override + public ImmutableList getBindVariables() + { + return bindVariables.getImmutableBindVariables(); + } + + @Override + public void authorize(ClientState state) + { + // Assess read permissions for all data from both explicit LET statements and generated reads. + for (NamedSelect let : assignments) + let.select.authorize(state); + + if (returningSelect != null) + returningSelect.select.authorize(state); + + for (ModificationStatement update : updates) + update.authorize(state); + } + + @Override + public void validate(ClientState state) + { + for (NamedSelect statement : assignments) + statement.select.validate(state); + if (returningSelect != null) + returningSelect.select.validate(state); + for (ModificationStatement statement : updates) + statement.validate(state); + } + + @Override + public Iterable getStatements() + { + return () -> { + Stream stream = assignments.stream().map(n -> n.select); + if (returningSelect != null) + stream = Stream.concat(stream, Stream.of(returningSelect.select)); + stream = Stream.concat(stream, updates.stream()); + return stream.iterator(); + }; + } + + @Override + public ResultSet.ResultMetadata getResultMetadata() + { + return resultMetadata; + } + + TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, ClientState state) + { + SelectStatement select = namedSelect.select; + ReadQuery readQuery = select.getQuery(options, 0); + checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, select.asCQL(options, state)); + + // We reject reads from both LET and SELECT that do not specify a single row. + @SuppressWarnings("unchecked") + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + + if (selectQuery.queries.size() != 1) + throw new IllegalArgumentException("Within a transaction, SELECT statements must select a single partition; found " + selectQuery.queries.size() + " partitions"); + + return new TxnNamedRead(namedSelect.name, Iterables.getOnlyElement(selectQuery.queries)); + } + + List createNamedReads(NamedSelect namedSelect, QueryOptions options, ClientState state) + { + SelectStatement select = namedSelect.select; + ReadQuery readQuery = select.getQuery(options, 0); + checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, select.asCQL(options, state)); + + // We reject reads from both LET and SELECT that do not specify a single row. + @SuppressWarnings("unchecked") + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + + if (selectQuery.queries.size() == 1) + return Collections.singletonList(new TxnNamedRead(namedSelect.name, Iterables.getOnlyElement(selectQuery.queries))); + + List list = new ArrayList<>(selectQuery.queries.size()); + for (int i = 0; i < selectQuery.queries.size(); i++) + list.add(new TxnNamedRead(TxnDataName.returning(i), selectQuery.queries.get(i))); + return list; + } + + private List createNamedReads(QueryOptions options, ClientState state, Map autoReads, Consumer keyConsumer) + { + List reads = new ArrayList<>(assignments.size() + 1); + + for (NamedSelect select : assignments) + { + TxnNamedRead read = createNamedRead(select, options, state); + keyConsumer.accept(read.key()); + reads.add(read); + } + + if (returningSelect != null) + { + for (TxnNamedRead read : createNamedReads(returningSelect, options, state)) + { + keyConsumer.accept(read.key()); + reads.add(read); + } + } + + for (NamedSelect select : autoReads.values()) + // don't need keyConsumer as the keys are known to exist due to Modification + reads.add(createNamedRead(select, options, state)); + + return reads; + } + + TxnCondition createCondition(QueryOptions options) + { + if (conditions.isEmpty()) + return TxnCondition.none(); + if (conditions.size() == 1) + return conditions.get(0).createCondition(options); + + List result = new ArrayList<>(conditions.size()); + for (ConditionStatement condition : conditions) + result.add(condition.createCondition(options)); + + // TODO: OR support + return new TxnCondition.BooleanGroup(TxnCondition.Kind.AND, result); + } + + List createWriteFragments(ClientState state, QueryOptions options, Map autoReads, Consumer keyConsumer) + { + List fragments = new ArrayList<>(updates.size()); + int idx = 0; + for (ModificationStatement modification : updates) + { + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx, state, options); + keyConsumer.accept(fragment.key); + fragments.add(fragment); + + if (modification.allReferenceOperations().stream().anyMatch(ReferenceOperation::requiresRead)) + { + // Reads are not merged by partition here due to potentially differing columns retrieved, etc. + TxnDataName partitionName = TxnDataName.partitionRead(modification.metadata(), fragment.key.partitionKey(), idx); + if (!autoReads.containsKey(partitionName)) + autoReads.put(partitionName, new NamedSelect(partitionName, modification.createSelectForTxn())); + } + + idx++; + } + return fragments; + } + + TxnUpdate createUpdate(ClientState state, QueryOptions options, Map autoReads, Consumer keyConsumer) + { + return new TxnUpdate(createWriteFragments(state, options, autoReads, keyConsumer), createCondition(options)); + } + + Keys toKeys(SortedSet keySet) + { + return new Keys(keySet); + } + + @VisibleForTesting + public Txn createTxn(ClientState state, QueryOptions options) + { + SortedSet keySet = new TreeSet<>(); + + if (updates.isEmpty()) + { + // TODO: Test case around this... + Preconditions.checkState(conditions.isEmpty(), "No condition should exist without updates present"); + List reads = createNamedReads(options, state, ImmutableMap.of(), keySet::add); + Keys txnKeys = toKeys(keySet); + TxnRead read = new TxnRead(reads, txnKeys); + return new Txn.InMemory(txnKeys, read, TxnQuery.ALL); + } + else + { + Map autoReads = new HashMap<>(); + TxnUpdate update = createUpdate(state, options, autoReads, keySet::add); + List reads = createNamedReads(options, state, autoReads, keySet::add); + Keys txnKeys = toKeys(keySet); + TxnRead read = new TxnRead(reads, txnKeys); + return new Txn.InMemory(txnKeys, read, TxnQuery.ALL, update); + } + } + + private static void checkAtMostOneRowSpecified(ClientState clientState, @Nullable QueryOptions options, SelectStatement select, String failureMessage) + { + if (select.getRestrictions().hasAllPrimaryKeyColumnsRestrictedByEqualities()) + return; + + if (options == null) + { + // If the limit is a non-terminal marker (because we're preparing), defer validation until execution. + if (select.isLimitMarker()) + return; + + // The limit is already defined, so proceed with validation... + options = QueryOptions.DEFAULT; + } + + int limit = select.getLimit(options); + QueryOptions finalOptions = options; // javac thinks this is mutable so requires a copy + checkTrue(limit == 1 && select.getRestrictions().hasAllPartitionKeyColumnsRestrictedByEqualities(), failureMessage, LazyToString.lazy(() -> select.asCQL(finalOptions, clientState))); + } + + @Override + public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher.RequestTime requestTime) + { + checkTrue(DatabaseDescriptor.getAccordTransactionsEnabled(), TRANSACTIONS_DISABLED_MESSAGE); + + try + { + for (NamedSelect assignment : assignments) + checkAtMostOneRowSpecified(state.getClientState(), options, assignment.select, INCOMPLETE_PRIMARY_KEY_LET_MESSAGE); + + if (returningSelect != null) + checkAtMostOneRowSpecified(state.getClientState(), options, returningSelect.select, INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE); + + TxnData data = AccordService.instance().coordinate(createTxn(state.getClientState(), options), options.getConsistency()); + + if (returningSelect != null) + { + ReadQuery readQuery = returningSelect.select.getQuery(options, 0); + checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, returningSelect.select.asCQL(options, state.getClientState())); + + @SuppressWarnings("unchecked") + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + Selection.Selectors selectors = returningSelect.select.getSelection().newSelectors(options); + ResultSetBuilder result = new ResultSetBuilder(resultMetadata, selectors, false); + if (selectQuery.queries.size() == 1) + { + FilteredPartition partition = data.get(TxnDataName.returning()); + if (partition != null) + returningSelect.select.processPartition(partition.rowIterator(), options, result, FBUtilities.nowInSeconds()); + } + else + { + long nowInSec = FBUtilities.nowInSeconds(); + for (int i = 0; i < selectQuery.queries.size(); i++) + { + FilteredPartition partition = data.get(TxnDataName.returning(i)); + if (partition != null) + returningSelect.select.processPartition(partition.rowIterator(), options, result, nowInSec); + } + } + return new ResultMessage.Rows(result.build()); + } + + if (returningReferences != null) + { + List> resultType = new ArrayList<>(returningReferences.size()); + List columns = new ArrayList<>(returningReferences.size()); + + for (RowDataReference reference : returningReferences) + { + ColumnMetadata forMetadata = reference.toResultMetadata(); + resultType.add(forMetadata.type); + columns.add(reference.column()); + } + + ResultSetBuilder result = new ResultSetBuilder(resultMetadata, Selection.noopSelector(), false); + result.newRow(options.getProtocolVersion(), null, null, columns); + + for (int i = 0; i < returningReferences.size(); i++) + { + RowDataReference reference = returningReferences.get(i); + TxnReference txnReference = reference.toTxnReference(options); + ByteBuffer buffer = txnReference.toByteBuffer(data, resultType.get(i)); + result.add(buffer); + } + + return new ResultMessage.Rows(result.build()); + } + + // In the case of a write-only transaction, just return and empty result. + // TODO: This could be modified to return an indication of whether a condition (if present) succeeds. + return new ResultMessage.Void(); + } + catch (Throwable t) + { + logger.error("Unexpected error with transaction", t); + throw t; + } + } + + @Override + public ResultMessage executeLocally(QueryState state, QueryOptions options) + { + return execute(state, options, Dispatcher.RequestTime.forImmediateExecution()); + } + + @Override + public AuditLogContext getAuditLogContext() + { + return new AuditLogContext(AuditLogEntryType.TRANSACTION); + } + + @Override + public boolean eligibleAsPreparedStatement() + { + // false is the default, but still best to be explicit. + return false; + } + + public static class Parsed extends QualifiedStatement.Composite + { + private final List assignments; + private final SelectStatement.RawStatement select; + private final List returning; + private final List updates; + private final List conditions; + private final List dataReferences; + + public Parsed(List assignments, + SelectStatement.RawStatement select, + List returning, + List updates, + List conditions, + List dataReferences) + { + this.assignments = assignments; + this.select = select; + this.returning = returning; + this.updates = updates; + this.conditions = conditions != null ? conditions : Collections.emptyList(); + this.dataReferences = dataReferences; + } + + @Override + protected Iterable getStatements() + { + Iterable group = Iterables.concat(assignments, updates); + if (select != null) + group = Iterables.concat(group, Collections.singleton(select)); + return group; + } + + @Override + public CQLStatement prepare(ClientState state) + { + checkFalse(updates.isEmpty() && returning == null && select == null, EMPTY_TRANSACTION_MESSAGE); + + if (select != null || returning != null) + checkTrue(select != null ^ returning != null, "Cannot specify both a full SELECT and a SELECT w/ LET references."); + + List preparedAssignments = new ArrayList<>(assignments.size()); + Map refSources = new HashMap<>(); + Set selectNames = new HashSet<>(); + + for (SelectStatement.RawStatement select : assignments) + { + checkNotNull(select.parameters.refName, "Assignments must be named"); + TxnDataName name = TxnDataName.user(select.parameters.refName); + checkTrue(selectNames.add(name), DUPLICATE_TUPLE_NAME_MESSAGE, name.name()); + + SelectStatement prepared = select.prepare(bindVariables); + NamedSelect namedSelect = new NamedSelect(name, prepared); + checkAtMostOneRowSpecified(state, null, namedSelect.select, INCOMPLETE_PRIMARY_KEY_LET_MESSAGE); + preparedAssignments.add(namedSelect); + refSources.put(name, new SelectReferenceSource(prepared)); + } + + if (dataReferences != null) + for (RowDataReference.Raw reference : dataReferences) + reference.resolveReference(refSources); + + NamedSelect returningSelect = null; + if (select != null) + { + returningSelect = new NamedSelect(TxnDataName.returning(), select.prepare(bindVariables)); + checkAtMostOneRowSpecified(state, null, returningSelect.select, INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE); + } + + List returningReferences = null; + + if (returning != null) + { + // TODO: Eliminate/modify this check if we allow full tuple selections. + returningReferences = returning.stream().peek(raw -> checkTrue(raw.column() != null, SELECT_REFS_NEED_COLUMN_MESSAGE)) + .map(RowDataReference.Raw::prepareAsReceiver) + .collect(Collectors.toList()); + } + + List preparedUpdates = new ArrayList<>(updates.size()); + + // check for any read-before-write updates + for (int i = 0; i < updates.size(); i++) + { + ModificationStatement.Parsed parsed = updates.get(i); + + ModificationStatement prepared = parsed.prepare(state, bindVariables); + checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE); + checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE); + + preparedUpdates.add(prepared); + } + + List preparedConditions = new ArrayList<>(conditions.size()); + for (ConditionStatement.Raw condition : conditions) + // TODO: If we eventually support IF ks.function(ref) THEN, the keyspace will have to be provided here + preparedConditions.add(condition.prepare("[txn]", bindVariables)); + + return new TransactionStatement(preparedAssignments, returningSelect, returningReferences, preparedUpdates, preparedConditions, bindVariables); + } + } +} diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java index f6ecda2b87e9..2344850ceeb9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java @@ -18,10 +18,13 @@ package org.apache.cassandra.cql3.statements; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; +import com.google.common.base.Preconditions; + import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; import org.apache.cassandra.cql3.*; @@ -31,6 +34,8 @@ import org.apache.cassandra.cql3.restrictions.StatementRestrictions; import org.apache.cassandra.cql3.terms.Constants; import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.transactions.ReferenceOperation; +import org.apache.cassandra.cql3.transactions.ReferenceValue; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.Slice; import org.apache.cassandra.db.partitions.PartitionUpdate; @@ -39,6 +44,7 @@ import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Pair; import org.apache.commons.lang3.builder.ToStringBuilder; @@ -53,6 +59,9 @@ */ public class UpdateStatement extends ModificationStatement { + public static final String UPDATING_PRIMARY_KEY_MESSAGE = "PRIMARY KEY part %s found in SET part"; + public static final String CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE = "Value reference %s cannot be used to insert PRIMARY KEY column %s"; + private static final Constants.Value EMPTY = new Constants.Value(ByteBufferUtil.EMPTY_BYTE_BUFFER); private UpdateStatement(StatementType type, @@ -174,8 +183,16 @@ protected ModificationStatement prepareInternal(ClientState state, if (def.isPrimaryKeyColumn()) { + checkFalse(value instanceof ReferenceValue.Raw, String.format(CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE, value, def)); whereClause.add(Relation.singleColumn(columnNames.get(i), Operator.EQ, value)); } + else if (value instanceof ReferenceValue.Raw) + { + ReferenceValue.Raw raw = (ReferenceValue.Raw) value; + ReferenceValue referenceValue = raw.prepare(def, bindVariables); + ReferenceOperation operation = new ReferenceOperation(def, TxnReferenceOperation.Kind.setterFor(def), null, null, referenceValue); + operations.add(def, operation); + } else { Operation operation = new Operation.SetValue(value).prepare(metadata, def, !conditions.isEmpty()); @@ -277,11 +294,59 @@ protected ModificationStatement prepareInternal(ClientState state, } } + public static class OperationCollector + { + public final List> operations = new ArrayList<>(); + public final List> referenceOps = new ArrayList<>(); + + public boolean conflictsWithExistingUpdate(ColumnIdentifier column, Operation.RawUpdate update) + { + for (Pair p : operations) + { + if (p.left.equals(column) && !p.right.isCompatibleWith(update)) + return true; + } + return false; + } + + public boolean conflictsWithExistingSubstitution(ColumnIdentifier column) + { + for (Pair p : referenceOps) + { + if (p.left.equals(column)) + return true; + } + return false; + } + + public void addRawUpdate(ColumnIdentifier column, Operation.RawUpdate update) + { + operations.add(Pair.create(column, update)); + } + + public boolean conflictsWithExistingUpdate(ColumnIdentifier column) + { + for (Pair p : operations) + { + if (p.left.equals(column)) + return true; + } + return false; + } + + public void addRawReferenceOperation(ColumnIdentifier column, ReferenceOperation.Raw substitution) + { + // TODO: Make sure there's more than a tuple name here...i.e. an actual reference column? + referenceOps.add(Pair.create(column, substitution)); + } + } + public static class ParsedUpdate extends ModificationStatement.Parsed { // Provided for an UPDATE - private final List> updates; + private final OperationCollector updates; private final WhereClause whereClause; + private final boolean isForTxn; /** * Creates a new UpdateStatement from a column family name, columns map, consistency @@ -295,14 +360,16 @@ public static class ParsedUpdate extends ModificationStatement.Parsed * */ public ParsedUpdate(QualifiedName name, Attributes.Raw attrs, - List> updates, + OperationCollector updates, WhereClause whereClause, List conditions, - boolean ifExists) + boolean ifExists, + boolean isForTxn) { super(name, StatementType.UPDATE, attrs, conditions, false, ifExists); this.updates = updates; this.whereClause = whereClause; + this.isForTxn = isForTxn; } @Override @@ -314,17 +381,24 @@ protected ModificationStatement prepareInternal(ClientState state, { Operations operations = new Operations(type); - for (Pair entry : updates) + for (Pair entry : updates.operations) { ColumnMetadata def = metadata.getExistingColumn(entry.left); - - checkFalse(def.isPrimaryKeyColumn(), "PRIMARY KEY part %s found in SET part", def.name); - - Operation operation = entry.right.prepare(metadata, def, !conditions.isEmpty()); + checkFalse(def.isPrimaryKeyColumn(), UPDATING_PRIMARY_KEY_MESSAGE, def.name); + Operation operation = entry.right.prepare(metadata, def, !conditions.isEmpty() || isForTxn); operation.collectMarkerSpecification(bindVariables); operations.add(operation); } + Preconditions.checkState(updates.referenceOps.isEmpty() || isForTxn); + for (Pair entry : updates.referenceOps) + { + ColumnMetadata def = metadata.getExistingColumn(entry.left); + checkFalse(def.isPrimaryKeyColumn(), UPDATING_PRIMARY_KEY_MESSAGE, def.name); + ReferenceOperation operation = entry.right.prepare(metadata, bindVariables); + operations.add(def, operation); + } + StatementRestrictions restrictions = newRestrictions(state, metadata, bindVariables, diff --git a/src/java/org/apache/cassandra/cql3/terms/Constants.java b/src/java/org/apache/cassandra/cql3/terms/Constants.java index 3b9ae6b4c90e..a912f5556d7f 100644 --- a/src/java/org/apache/cassandra/cql3/terms/Constants.java +++ b/src/java/org/apache/cassandra/cql3/terms/Constants.java @@ -20,7 +20,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import org.apache.cassandra.cql3.AssignmentTestable; import org.apache.cassandra.cql3.CQL3Type; @@ -30,7 +29,24 @@ import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.DecoratedKey; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.AsciiType; +import org.apache.cassandra.db.marshal.BooleanType; +import org.apache.cassandra.db.marshal.ByteType; +import org.apache.cassandra.db.marshal.BytesType; +import org.apache.cassandra.db.marshal.CounterColumnType; +import org.apache.cassandra.db.marshal.DecimalType; +import org.apache.cassandra.db.marshal.DoubleType; +import org.apache.cassandra.db.marshal.DurationType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.IntegerType; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.NumberType; +import org.apache.cassandra.db.marshal.ReversedType; +import org.apache.cassandra.db.marshal.StringType; +import org.apache.cassandra.db.marshal.TimeUUIDType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.exceptions.InvalidRequestException; @@ -39,12 +55,13 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FastByteOperations; +import static java.nio.charset.StandardCharsets.US_ASCII; + /** * Static helper methods and classes for constants. */ public abstract class Constants { - private static ByteBuffer getCurrentCellBuffer(ColumnMetadata column, DecoratedKey key, UpdateParameters params) { Row currentRow = params.getPrefetchedRow(key, column.isStatic() ? Clustering.STATIC_CLUSTERING : params.currentClustering()); @@ -59,7 +76,7 @@ public enum Type @Override public AbstractType getPreferedTypeFor(String text) { - if (StandardCharsets.US_ASCII.newEncoder().canEncode(text)) + if (US_ASCII.newEncoder().canEncode(text)) { return AsciiType.instance; } @@ -272,6 +289,7 @@ public static Literal duration(String text) return new Literal(Type.DURATION, text); } + @Override public Value prepare(String keyspace, ColumnSpecification receiver) throws InvalidRequestException { if (!testAssignment(keyspace, receiver).isAssignable()) diff --git a/src/java/org/apache/cassandra/cql3/terms/Lists.java b/src/java/org/apache/cassandra/cql3/terms/Lists.java index 1e95fcfd83c4..153316226757 100644 --- a/src/java/org/apache/cassandra/cql3/terms/Lists.java +++ b/src/java/org/apache/cassandra/cql3/terms/Lists.java @@ -312,7 +312,7 @@ private static int existingSize(Row row, ColumnMetadata column) public static class SetterByIndex extends Operation { - private final Term idx; + public final Term idx; public SetterByIndex(ColumnMetadata column, Term idx, Term t) { diff --git a/src/java/org/apache/cassandra/cql3/terms/Maps.java b/src/java/org/apache/cassandra/cql3/terms/Maps.java index 4355e8a3bf5c..b21d84bf81ed 100644 --- a/src/java/org/apache/cassandra/cql3/terms/Maps.java +++ b/src/java/org/apache/cassandra/cql3/terms/Maps.java @@ -267,7 +267,7 @@ public void execute(DecoratedKey partitionKey, UpdateParameters params) throws I public static class SetterByKey extends Operation { - private final Term k; + public final Term k; public SetterByKey(ColumnMetadata column, Term k, Term t) { diff --git a/src/java/org/apache/cassandra/cql3/terms/UserTypes.java b/src/java/org/apache/cassandra/cql3/terms/UserTypes.java index 08c6abb722cb..85b33efba2c3 100644 --- a/src/java/org/apache/cassandra/cql3/terms/UserTypes.java +++ b/src/java/org/apache/cassandra/cql3/terms/UserTypes.java @@ -255,7 +255,7 @@ public void execute(DecoratedKey partitionKey, UpdateParameters params) throws I public static class SetterByField extends Operation { - private final FieldIdentifier field; + public final FieldIdentifier field; public SetterByField(ColumnMetadata column, FieldIdentifier field, Term t) { diff --git a/src/java/org/apache/cassandra/cql3/transactions/ConditionStatement.java b/src/java/org/apache/cassandra/cql3/transactions/ConditionStatement.java new file mode 100644 index 000000000000..2ce6f3350236 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/transactions/ConditionStatement.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.transactions; + +import com.google.common.base.Preconditions; + +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.service.accord.txn.TxnCondition; + +public class ConditionStatement +{ + public enum Kind + { + IS_NOT_NULL(TxnCondition.Kind.IS_NOT_NULL, null), + IS_NULL(TxnCondition.Kind.IS_NULL, null), + EQ(TxnCondition.Kind.EQUAL, TxnCondition.Kind.EQUAL), + NEQ(TxnCondition.Kind.NOT_EQUAL, TxnCondition.Kind.NOT_EQUAL), + GT(TxnCondition.Kind.GREATER_THAN, TxnCondition.Kind.LESS_THAN), + GTE(TxnCondition.Kind.GREATER_THAN_OR_EQUAL, TxnCondition.Kind.LESS_THAN_OR_EQUAL), + LT(TxnCondition.Kind.LESS_THAN, TxnCondition.Kind.GREATER_THAN), + LTE(TxnCondition.Kind.LESS_THAN_OR_EQUAL, TxnCondition.Kind.GREATER_THAN_OR_EQUAL); + + // TODO: Support for IN, CONTAINS, CONTAINS KEY + + private final TxnCondition.Kind kind; + private final TxnCondition.Kind reversedKind; + + Kind(TxnCondition.Kind kind, TxnCondition.Kind reversedKind) + { + this.kind = kind; + this.reversedKind = reversedKind; + } + + TxnCondition.Kind toTxnKind(boolean reversed) + { + return reversed ? reversedKind : kind; + } + } + + private final RowDataReference reference; + private final Kind kind; + private final Term value; + private final boolean reversed; + + public ConditionStatement(RowDataReference reference, Kind kind, Term value, boolean reversed) + { + this.reference = reference; + this.kind = kind; + this.value = value; + this.reversed = reversed; + } + + public static class Raw + { + private final Term.Raw lhs; + private final Kind kind; + private final Term.Raw rhs; + + public Raw(Term.Raw lhs, Kind kind, Term.Raw rhs) + { + Preconditions.checkArgument(lhs != null); + Preconditions.checkArgument((rhs == null) == (kind == Kind.IS_NOT_NULL || kind == Kind.IS_NULL)); + this.lhs = lhs; + this.kind = kind; + this.rhs = rhs; + } + + public ConditionStatement prepare(String keyspace, VariableSpecifications bindVariables) + { + if (rhs == null) + { + // In the IS NULL/IS NOT NULL case, the reference will always be on the LHS + RowDataReference reference = ((RowDataReference.Raw) lhs).prepareAsReceiver(); + reference.collectMarkerSpecification(bindVariables); + return new ConditionStatement(reference, kind, null, false); + } + + RowDataReference reference; + Term value; + boolean reversed = false; + + if (lhs instanceof RowDataReference.Raw) + { + reference = ((RowDataReference.Raw) lhs).prepareAsReceiver(); + ColumnSpecification receiver = reference.getValueReceiver(); + value = rhs.prepare(keyspace, receiver); + } + else if (rhs instanceof RowDataReference.Raw) + { + reference = ((RowDataReference.Raw) rhs).prepareAsReceiver(); + ColumnSpecification receiver = reference.getValueReceiver(); + value = lhs.prepare(keyspace, receiver); + // TxnCondition expects the reference to be on the LHS, so reverse the operator. + reversed = true; + } + else + { + throw new IllegalStateException("Either the left-hand or right-hand side must be a reference!"); + } + + reference.collectMarkerSpecification(bindVariables); + value.collectMarkerSpecification(bindVariables); + return new ConditionStatement(reference, kind, value, reversed); + } + } + + public TxnCondition createCondition(QueryOptions options) + { + switch (kind) + { + case IS_NOT_NULL: + case IS_NULL: + return new TxnCondition.Exists(reference.toTxnReference(options), kind.toTxnKind(reversed)); + case EQ: + case NEQ: + case GT: + case GTE: + case LT: + case LTE: + // TODO: Support for references on LHS and RHS + return new TxnCondition.Value(reference.toTxnReference(options), + kind.toTxnKind(reversed), + value.bindAndGet(options), + options.getProtocolVersion()); + default: + throw new IllegalStateException(); + } + } +} diff --git a/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java b/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java new file mode 100644 index 000000000000..ecf7d8cae795 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.transactions; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.FieldIdentifier; +import org.apache.cassandra.cql3.Operation; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.terms.Lists; +import org.apache.cassandra.cql3.terms.Maps; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.terms.UserTypes; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; +import static org.apache.cassandra.db.marshal.CollectionType.Kind.MAP; +import static org.apache.cassandra.schema.TableMetadata.UNDEFINED_COLUMN_NAME_MESSAGE; + +public class ReferenceOperation +{ + private final ColumnMetadata receiver; + private final TxnReferenceOperation.Kind kind; + private final FieldIdentifier field; + private final Term key; + private final ReferenceValue value; + + public ReferenceOperation(ColumnMetadata receiver, TxnReferenceOperation.Kind kind, Term key, FieldIdentifier field, ReferenceValue value) + { + this.receiver = receiver; + this.kind = kind; + this.key = key; + this.field = field; + this.value = value; + } + + /** + * Creates a {@link ReferenceOperation} from the given {@link Operation} for the purpose of defering execution + * within a transaction. When the language sees an Operation using a reference one is created already, but for cases + * that needs to defer execution (such as when {@link Operation#requiresRead()} is true), this method can be used. + */ + public static ReferenceOperation create(Operation operation) + { + TxnReferenceOperation.Kind kind = TxnReferenceOperation.Kind.from(operation); + ColumnMetadata receiver = operation.column; + + // We already have a prepared reference value, so there is no need to inspect the value type. + ReferenceValue value = new ReferenceValue.Constant(operation.term()); + Term key = extractKeyOrIndex(operation); + FieldIdentifier field = extractField(operation); + return new ReferenceOperation(receiver, kind, key, field, value); + } + + public TxnReferenceOperation.Kind getKind() + { + return kind; + } + + public ReferenceValue getValue() + { + return value; + } + + public ColumnMetadata getReceiver() + { + return receiver; + } + + public boolean requiresRead() + { + // TODO: Find a better way than delegating to the operation? + return kind.toOperation(receiver, null, null, null).requiresRead(); + } + + public TxnReferenceOperation bindAndGet(QueryOptions options) + { + return new TxnReferenceOperation(kind, + receiver, + key != null ? key.bindAndGet(options) : null, + field != null ? field.bytes : null, + value.bindAndGet(options)); + } + + public static class Raw + { + private final Operation.RawUpdate rawUpdate; + public final ColumnIdentifier column; + private final ReferenceValue.Raw value; + + public Raw(Operation.RawUpdate rawUpdate, ColumnIdentifier column, ReferenceValue.Raw value) + { + this.rawUpdate = rawUpdate; + this.column = column; + this.value = value; + } + + public ReferenceOperation prepare(TableMetadata metadata, VariableSpecifications bindVariables) + { + ColumnMetadata receiver = metadata.getColumn(column); + Operation operation = rawUpdate.prepare(metadata, receiver, true); + TxnReferenceOperation.Kind kind = TxnReferenceOperation.Kind.from(operation); + Term key = extractKeyOrIndex(operation); + + checkTrue(receiver != null, UNDEFINED_COLUMN_NAME_MESSAGE, column.toCQLString(), metadata); + AbstractType type = receiver.type; + ColumnMetadata valueReceiver = receiver; + + if (type.isCollection()) + { + CollectionType collectionType = (CollectionType) type; + + // The value for a map subtraction is actually a set (see Operation.Substraction) + if (kind == TxnReferenceOperation.Kind.SetDiscarder && collectionType.kind == MAP) + valueReceiver = valueReceiver.withNewType(SetType.getInstance(((MapType) type).getKeysType(), true)); + + if (kind == TxnReferenceOperation.Kind.ListSetterByIndex || kind == TxnReferenceOperation.Kind.MapSetterByKey) + valueReceiver = valueReceiver.withNewType(collectionType.valueComparator()); + } + + FieldIdentifier field = extractField(operation); + + if (type.isUDT()) + { + if (kind == TxnReferenceOperation.Kind.UserTypeSetterByField) + { + @SuppressWarnings("ConstantConditions") UserType userType = (UserType) type; + CellPath fieldPath = userType.cellPathForField(field); + int i = ByteBufferUtil.getUnsignedShort(fieldPath.get(0), 0); + valueReceiver = valueReceiver.withNewType(userType.fieldType(i)); + } + } + + return new ReferenceOperation(receiver, kind, key, field, value.prepare(valueReceiver, bindVariables)); + } + } + + private static FieldIdentifier extractField(Operation operation) + { + if (operation instanceof UserTypes.SetterByField) + return ((UserTypes.SetterByField) operation).field; + return null; + } + + private static Term extractKeyOrIndex(Operation operation) + { + // TODO: Is there a way to do this without exposing k and idx? + if (operation instanceof Maps.SetterByKey) + return ((Maps.SetterByKey) operation).k; + else if (operation instanceof Lists.SetterByIndex) + return ((Lists.SetterByIndex) operation).idx; + return null; + } +} diff --git a/src/java/org/apache/cassandra/cql3/transactions/ReferenceValue.java b/src/java/org/apache/cassandra/cql3/transactions/ReferenceValue.java new file mode 100644 index 000000000000..d6a4ab8acf4a --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/transactions/ReferenceValue.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.transactions; + +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.accord.txn.TxnReferenceValue; + +import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; + +public abstract class ReferenceValue +{ + public abstract TxnReferenceValue bindAndGet(QueryOptions options); + + public static abstract class Raw extends Term.Raw + { + public abstract ReferenceValue prepare(ColumnMetadata receiver, VariableSpecifications bindVariables); + } + + public static class Constant extends ReferenceValue + { + private final Term term; + + public Constant(Term term) + { + this.term = term; + } + + @Override + public TxnReferenceValue bindAndGet(QueryOptions options) + { + return new TxnReferenceValue.Constant(term.bindAndGet(options)); + } + + public static class Raw extends ReferenceValue.Raw + { + private final Term.Raw term; + + public Raw(Term.Raw term) + { + this.term = term; + } + + @Override + public ReferenceValue prepare(ColumnMetadata receiver, VariableSpecifications bindVariables) + { + return new Constant(term.prepare(receiver.ksName, receiver)); + } + + @Override + public TestResult testAssignment(String keyspace, ColumnSpecification receiver) + { + return term.testAssignment(keyspace, receiver); + } + + @Override + public Term prepare(String keyspace, ColumnSpecification receiver) throws InvalidRequestException + { + return term.prepare(keyspace, receiver); + } + + @Override + public String getText() + { + return term.getText(); + } + + @Override + public AbstractType getExactTypeIfKnown(String keyspace) + { + return term.getExactTypeIfKnown(keyspace); + } + } + } + + public static class Substitution extends ReferenceValue + { + private final RowDataReference reference; + + public Substitution(RowDataReference reference) + { + this.reference = reference; + } + + @Override + public TxnReferenceValue bindAndGet(QueryOptions options) + { + return new TxnReferenceValue.Substitution(reference.toTxnReference(options)); + } + + public static class Raw extends ReferenceValue.Raw + { + private final RowDataReference.Raw reference; + + public Raw(RowDataReference.Raw reference) + { + this.reference = reference; + } + + + @Override + public ReferenceValue prepare(ColumnMetadata receiver, VariableSpecifications bindVariables) + { + reference.checkResolved(); + checkTrue(reference.column() != null, "substitution references must reference a column (%s)", reference); + return new Substitution((RowDataReference) reference.prepare(null, receiver)); + } + + @Override + public TestResult testAssignment(String keyspace, ColumnSpecification receiver) + { + return reference.testAssignment(keyspace, receiver); + } + + @Override + public Term prepare(String keyspace, ColumnSpecification receiver) throws InvalidRequestException + { + return reference.prepare(keyspace, receiver); + } + + @Override + public String getText() + { + return reference.getText(); + } + + @Override + public AbstractType getExactTypeIfKnown(String keyspace) + { + return reference.getExactTypeIfKnown(keyspace); + } + } + } +} diff --git a/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java b/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java new file mode 100644 index 000000000000..bb5b410ea17f --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.transactions; + +import java.util.List; +import java.util.Map; + +import com.google.common.base.Preconditions; + +import org.apache.cassandra.cql3.AssignmentTestable; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.FieldIdentifier; +import org.apache.cassandra.cql3.terms.Lists; +import org.apache.cassandra.cql3.terms.Maps; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.terms.Sets; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.terms.UserTypes; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.functions.Function; +import org.apache.cassandra.cql3.functions.types.utils.Bytes; +import org.apache.cassandra.cql3.selection.Selectable; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.service.accord.txn.TxnDataName; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.accord.txn.TxnReference; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; + +public class RowDataReference extends Term.NonTerminal +{ + public static final String CANNOT_FIND_TUPLE_MESSAGE = "Cannot resolve reference to tuple '%s'."; + public static final String COLUMN_NOT_IN_TUPLE_MESSAGE = "Column '%s' does not exist in tuple '%s'."; + + private final TxnDataName selectName; + private final ColumnMetadata column; + private final Term elementPath; + private final CellPath fieldPath; + + public RowDataReference(TxnDataName selectName, ColumnMetadata column, Term elementPath, CellPath fieldPath) + { + Preconditions.checkArgument(elementPath == null || fieldPath == null, "Cannot specify both element and field paths"); + + this.selectName = selectName; + this.column = column; + this.elementPath = elementPath; + this.fieldPath = fieldPath; + } + + @Override + public void collectMarkerSpecification(VariableSpecifications boundNames) + { + if (elementPath != null) + elementPath.collectMarkerSpecification(boundNames); + } + + @Override + public Terminal bind(QueryOptions options) throws InvalidRequestException + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean containsBindMarker() + { + return elementPath != null && elementPath.containsBindMarker(); + } + + @Override + public void addFunctionsTo(List functions) + { + throw new UnsupportedOperationException("Functions are not currently supported w/ reference terms."); + } + + public ColumnMetadata toResultMetadata() + { + ColumnIdentifier fullName = getFullyQualifiedName(); + ColumnMetadata forMetadata = column.withNewName(fullName); + + if (isElementSelection()) + { + if (forMetadata.type instanceof ListType) + forMetadata = forMetadata.withNewType(((ListType) forMetadata.type).valueComparator()); + else if (forMetadata.type instanceof SetType) + forMetadata = forMetadata.withNewType(((SetType) forMetadata.type).nameComparator()); + else if (forMetadata.type instanceof MapType) + forMetadata = forMetadata.withNewType(((MapType) forMetadata.type).valueComparator()); + } + else if (isFieldSelection()) + { + forMetadata = forMetadata.withNewType(getFieldSelectionType()); + } + return forMetadata; + } + + public ColumnSpecification getValueReceiver() + { + if (isElementSelection()) + { + CollectionType.Kind collectionKind = ((CollectionType) column.type).kind; + switch (collectionKind) + { + case LIST: + return Lists.valueSpecOf(column); + case MAP: + return Maps.valueSpecOf(column); + default: + throw new InvalidRequestException(String.format("Element selection not supported for column %s of type %s" , + column.name, collectionKind)); + } + } + else if (isFieldSelection()) + { + return getFieldSelectionSpec(); + } + + return column; + } + + public boolean isElementSelection() + { + return elementPath != null && column.type.isCollection(); + } + + public boolean isFieldSelection() + { + return fieldPath != null && column.type.isUDT(); + } + + private AbstractType getFieldSelectionType() + { + assert isFieldSelection() : "No field selection type exists"; + return getFieldSelectionType(column, fieldPath); + } + + private static AbstractType getFieldSelectionType(ColumnMetadata column, CellPath fieldPath) + { + return ((UserType) column.type).fieldType(fieldPath); + } + + public ColumnSpecification getFieldSelectionSpec() + { + assert isFieldSelection() : "No field selection type exists"; + int field = ByteBufferUtil.getUnsignedShort(fieldPath.get(0), 0); + return UserTypes.fieldSpecOf(column, field); + } + + private CellPath bindCellPath(QueryOptions options) + { + if (fieldPath != null) + return fieldPath; + + return elementPath != null ? CellPath.create(elementPath.bindAndGet(options)) : null; + } + + public TxnReference toTxnReference(QueryOptions options) + { + Preconditions.checkState(elementPath == null || column.isComplex() || column.type.isFrozenCollection()); + Preconditions.checkState(fieldPath == null || column.isComplex() || column.type.isUDT()); + return new TxnReference(selectName, column, bindCellPath(options)); + } + + public ColumnIdentifier getFullyQualifiedName() + { + // TODO: Make this more user-friendly... + String path = fieldPath != null ? '.' + Bytes.toHexString(fieldPath.get(0)) : (elementPath == null ? "" : "[0x" + elementPath + ']'); + String fullName = selectName.name() + '.' + column.name.toString() + path; + return new ColumnIdentifier(fullName, true); + } + + public ColumnMetadata column() + { + return column; + } + + public static class Raw extends Term.Raw + { + private final Selectable.RawIdentifier tuple; + private final Selectable.RawIdentifier selected; + private final Object fieldOrElement; + + private boolean isResolved = false; + + private TxnDataName tupleName; + private ColumnMetadata column; + private Term elementPath = null; + private CellPath fieldPath = null; + + public Raw(Selectable.RawIdentifier tuple, Selectable.Raw selected, Object fieldOrElement) + { + Preconditions.checkArgument(tuple != null, "tuple is null"); + Preconditions.checkArgument(selected == null || selected instanceof Selectable.RawIdentifier, "selected is not a Selectable.RawIdentifier: " + selected); + this.tuple = tuple; + this.selected = (Selectable.RawIdentifier) selected; + this.fieldOrElement = fieldOrElement; + } + + public static Raw fromSelectable(Selectable.RawIdentifier tuple, Selectable.Raw selectable) + { + if (selectable == null) + return new RowDataReference.Raw(tuple, null, null); + + // TODO: Ideally it would be nice not to have to make items in the Selectables public + if (selectable instanceof Selectable.WithFieldSelection.Raw) + { + Selectable.WithFieldSelection.Raw selection = (Selectable.WithFieldSelection.Raw) selectable; + return new RowDataReference.Raw(tuple, selection.selected, selection.field); + } + else if (selectable instanceof Selectable.WithElementSelection.Raw) + { + Selectable.WithElementSelection.Raw elementSelection = (Selectable.WithElementSelection.Raw) selectable; + return new RowDataReference.Raw(tuple, elementSelection.selected, elementSelection.element); + } + else if (selectable instanceof Selectable.RawIdentifier) + { + Selectable.RawIdentifier selection = (Selectable.RawIdentifier) selectable; + return new RowDataReference.Raw(tuple, selection, null); + } + + throw new UnsupportedOperationException("Cannot create column reference from selectable: " + selectable); + } + + private void resolveFinished() + { + isResolved = true; + } + + public void resolveReference(Map sources) + { + if (isResolved) + return; + + // root level name + tupleName = TxnDataName.user(tuple.toString()); + ReferenceSource source = sources.get(tupleName); + checkNotNull(source, CANNOT_FIND_TUPLE_MESSAGE, tupleName.name()); + + if (selected == null) + { + resolveFinished(); + return; + } + + column = source.getColumn(selected.toString()); + checkNotNull(column, COLUMN_NOT_IN_TUPLE_MESSAGE, selected.toString(), tupleName.name()); + + // TODO: confirm update partition key terms don't contain column references. This can't be done in prepare + // because there can be intermediate functions (ie: pk=row.v+1 or pk=_add(row.v, 5)). Need a recursive Term visitor + + if (fieldOrElement == null) + { + resolveFinished(); + return; + } + + if (column.type.isCollection()) + { + Term.Raw element = (Term.Raw) fieldOrElement; + elementPath = element.prepare(column.ksName, specForElementOrSlice(column)); + } + else if (column.type.isUDT()) + { + FieldIdentifier field = (FieldIdentifier) fieldOrElement; + UserType userType = (UserType) column.type; + fieldPath = userType.cellPathForField(field); + } + + resolveFinished(); + } + + private ColumnSpecification specForElementOrSlice(ColumnSpecification receiver) + { + switch (((CollectionType) receiver.type).kind) + { + case LIST: return Lists.indexSpecOf(receiver); + case SET: return Sets.valueSpecOf(receiver); + case MAP: return Maps.keySpecOf(receiver); + default: throw new AssertionError("Unknown collection type: " + receiver.type); + } + } + + public void checkResolved() + { + if (!isResolved) + throw new IllegalStateException(); + } + + @Override + public AssignmentTestable.TestResult testAssignment(String keyspace, ColumnSpecification receiver) + { + checkResolved(); + + AbstractType type = column.type; + + if (elementPath != null) + { + CollectionType collectionType = (CollectionType) type; + type = collectionType.kind == CollectionType.Kind.SET ? collectionType.nameComparator() : collectionType.valueComparator(); + } + else if (fieldPath != null) + { + type = RowDataReference.getFieldSelectionType(column, fieldPath); + } + + return type.testAssignment(receiver.type); + } + + @Override + public Term prepare(String keyspace, ColumnSpecification receiver) throws InvalidRequestException + { + return prepare(keyspace, receiver, tupleName, column, elementPath, fieldPath); + } + + public RowDataReference prepareAsReceiver() + { + checkResolved(); + return new RowDataReference(tupleName, column, elementPath, fieldPath); + } + + private RowDataReference prepare(String keyspace, + ColumnSpecification receiver, + TxnDataName selectName, + ColumnMetadata column, + Term elementPath, + CellPath fieldPath) + { + if (!testAssignment(keyspace, receiver).isAssignable()) + throw new InvalidRequestException(String.format("Invalid reference type %s (%s) for \"%s\" of type %s", + column.type, column.name, receiver.name, receiver.type.asCQL3Type())); + + return new RowDataReference(selectName, column, elementPath, fieldPath); + } + + @Override + public String getText() + { + StringBuilder text = new StringBuilder(tuple.toString()); + + if (selected != null) + text.append('.').append(selected); + + if (fieldOrElement != null) + { + if (fieldOrElement instanceof Term.Raw) + { + Term.Raw element = (Term.Raw) fieldOrElement; + text.append('.').append(element.getText()); + } + else if (fieldOrElement instanceof FieldIdentifier) + { + FieldIdentifier field = (FieldIdentifier) fieldOrElement; + text.append('.').append(field); + } + else + { + throw new IllegalStateException("Field or element is neither a raw term nor a field identifier"); + } + } + + return text.toString(); + } + + @Override + public AbstractType getExactTypeIfKnown(String keyspace) + { + checkResolved(); + return column.type; + } + + public ColumnMetadata column() + { + return column; + } + } + + public interface ReferenceSource + { + ColumnMetadata getColumn(String name); + } +} diff --git a/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java b/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java new file mode 100644 index 000000000000..ae5099aa3b11 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.transactions; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.statements.SelectStatement; +import org.apache.cassandra.schema.ColumnMetadata; + +import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; + +public class SelectReferenceSource implements RowDataReference.ReferenceSource +{ + public static final String COLUMN_NOT_IN_SELECT_MESSAGE = "%s refererences a column not included in the select"; + + private final SelectStatement statement; + + public SelectReferenceSource(SelectStatement statement) + { + this.statement = statement; + } + + @Override + public ColumnMetadata getColumn(String name) + { + ColumnMetadata column = statement.table.getColumn(new ColumnIdentifier(name, true)); + if (column != null) + { + Set selectedColumns = new HashSet<>(statement.getSelection().getColumns()); + checkTrue(selectedColumns.contains(column), COLUMN_NOT_IN_SELECT_MESSAGE, statement); + } + return column; + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/DeletionTime.java b/src/java/org/apache/cassandra/db/DeletionTime.java index 5970fbb042a4..190f54d3437b 100644 --- a/src/java/org/apache/cassandra/db/DeletionTime.java +++ b/src/java/org/apache/cassandra/db/DeletionTime.java @@ -61,7 +61,7 @@ public static DeletionTime build(long markedForDeleteAt, long localDeletionTime) // Do not use. This is a perf optimization where some data structures known to hold valid uints are allowed to use it. // You should use 'build' instead to not workaround validations, corruption detections, etc - static DeletionTime buildUnsafeWithUnsignedInteger(long markedForDeleteAt, int localDeletionTimeUnsignedInteger) + public static DeletionTime buildUnsafeWithUnsignedInteger(long markedForDeleteAt, int localDeletionTimeUnsignedInteger) { return CassandraUInt.compare(Cell.MAX_DELETION_TIME_UNSIGNED_INTEGER, localDeletionTimeUnsignedInteger) < 0 ? new InvalidDeletionTime(markedForDeleteAt) diff --git a/src/java/org/apache/cassandra/db/MutableDeletionInfo.java b/src/java/org/apache/cassandra/db/MutableDeletionInfo.java index c8d9fd18116d..3076c5494669 100644 --- a/src/java/org/apache/cassandra/db/MutableDeletionInfo.java +++ b/src/java/org/apache/cassandra/db/MutableDeletionInfo.java @@ -230,6 +230,16 @@ public DeletionInfo updateAllTimestamp(long timestamp) return this; } + public DeletionInfo updateAllTimestampAndLocalDeletionTime(long timestamp, int localDeletionTime) + { + if (partitionDeletion.markedForDeleteAt() != Long.MIN_VALUE) + partitionDeletion = DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, localDeletionTime); + + if (ranges != null) + ranges.updateAllTimestampAndLocalDeletionTime(timestamp, localDeletionTime); + return this; + } + @Override public boolean equals(Object o) { diff --git a/src/java/org/apache/cassandra/db/PartitionPosition.java b/src/java/org/apache/cassandra/db/PartitionPosition.java index e8d29cbd9464..d936e566a7cf 100644 --- a/src/java/org/apache/cassandra/db/PartitionPosition.java +++ b/src/java/org/apache/cassandra/db/PartitionPosition.java @@ -29,7 +29,7 @@ public interface PartitionPosition extends RingPosition, ByteComparable { - public static enum Kind + public enum Kind { // Only add new values to the end of the enum, the ordinal is used // during serialization diff --git a/src/java/org/apache/cassandra/db/RangeTombstoneList.java b/src/java/org/apache/cassandra/db/RangeTombstoneList.java index 8b8cee2d39bd..8eee422be309 100644 --- a/src/java/org/apache/cassandra/db/RangeTombstoneList.java +++ b/src/java/org/apache/cassandra/db/RangeTombstoneList.java @@ -328,6 +328,15 @@ public void updateAllTimestamp(long timestamp) markedAts[i] = timestamp; } + public void updateAllTimestampAndLocalDeletionTime(long timestamp, int localDeletionTime) + { + for (int i = 0; i < size; i++) + { + markedAts[i] = timestamp; + delTimesUnsignedIntegers[i] = localDeletionTime; + } + } + private RangeTombstone rangeTombstone(int idx) { return new RangeTombstone(Slice.make(starts[idx], ends[idx]), DeletionTime.buildUnsafeWithUnsignedInteger(markedAts[idx], delTimesUnsignedIntegers[idx])); diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index ad6da1e88a5f..6a129aeb5d0f 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -434,6 +434,24 @@ public SinglePartitionReadCommand withUpdatedLimit(DataLimits newLimits) isTrackingWarnings()); } + public SinglePartitionReadCommand withNowInSec(int nowInSec) + { + return new SinglePartitionReadCommand(serializedAtEpoch(), + isDigestQuery(), + digestVersion(), + acceptsTransient(), + metadata(), + nowInSec, + columnFilter(), + rowFilter(), + limits(), + partitionKey(), + clusteringIndexFilter(), + indexQueryPlan(), + isTrackingWarnings(), + dataRange()); + } + @Override public DecoratedKey partitionKey() { @@ -492,7 +510,9 @@ protected void recordLatency(TableMetrics metric, long latencyNanos) metric.readLatency.addNano(latencyNanos); } - protected UnfilteredPartitionIterator queryStorage(final ColumnFamilyStore cfs, ReadExecutionController executionController) + @VisibleForTesting + @SuppressWarnings("resource") // we close the created iterator through closing the result of this method (and SingletonUnfilteredPartitionIterator ctor cannot fail) + public UnfilteredPartitionIterator queryStorage(final ColumnFamilyStore cfs, ReadExecutionController executionController) { // skip the row cache and go directly to sstables/memtable if repaired status of // data is being tracked. This is only requested after an initial digest mismatch diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 36fc953360c1..18bb9963936f 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -79,6 +79,7 @@ import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.sstable.SequenceBasedSSTableId; import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.RebufferingInputStream; @@ -1855,9 +1856,9 @@ public static void snapshotOnVersionChange() if (!previous.equals(NULL_VERSION.toString()) && !previous.equals(next)) { List entities = new ArrayList<>(); - for (String keyspace : SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES) + for (Keyspace keyspace : Keyspace.system()) { - for (ColumnFamilyStore cfs : Keyspace.open(keyspace).getColumnFamilyStores()) + for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores()) entities.add(cfs.getKeyspaceTableName()); } @@ -1934,12 +1935,10 @@ public static ByteBuffer rangeToBytes(Range range) @SuppressWarnings("unchecked") private static Range byteBufferToRange(ByteBuffer rawRange, IPartitioner partitioner) { - try + try (DataInputPlus.DataInputStreamPlus in = new DataInputBuffer(ByteBufferUtil.getArray(rawRange))) { // See rangeToBytes above for why version is 0. - return (Range) Range.tokenSerializer.deserialize(new DataInputBuffer(ByteBufferUtil.getArray(rawRange)), - partitioner, - 0); + return (Range) Range.tokenSerializer.deserialize(in, partitioner, 0); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/db/WriteType.java b/src/java/org/apache/cassandra/db/WriteType.java index 11909e747614..3d0077046515 100644 --- a/src/java/org/apache/cassandra/db/WriteType.java +++ b/src/java/org/apache/cassandra/db/WriteType.java @@ -17,6 +17,10 @@ */ package org.apache.cassandra.db; +/** + * Identifier for what type of operation timed out. This type is driver facing as a String, but some drivers convert + * this to an enum, meaning any changes to this type require protocol changes and driver support. + */ public enum WriteType { SIMPLE, @@ -26,5 +30,6 @@ public enum WriteType BATCH_LOG, CAS, VIEW, - CDC; + CDC + //TODO update client protocol to support "TRANSACTION" } diff --git a/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java b/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java index a98e3bde99ba..d646511e164d 100644 --- a/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java +++ b/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java @@ -197,20 +197,6 @@ public String toCQLString(TableMetadata metadata, RowFilter rowFilter) return sb.toString(); } - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ClusteringIndexNamesFilter that = (ClusteringIndexNamesFilter) o; - return Objects.equals(clusterings, that.clusterings) && - Objects.equals(reversed, that.reversed); - } - - public int hashCode() - { - return Objects.hash(clusterings, reversed); - } - public Kind kind() { return Kind.NAMES; diff --git a/src/java/org/apache/cassandra/db/filter/ColumnFilter.java b/src/java/org/apache/cassandra/db/filter/ColumnFilter.java index ae043039e25e..49f21bd8fd3b 100644 --- a/src/java/org/apache/cassandra/db/filter/ColumnFilter.java +++ b/src/java/org/apache/cassandra/db/filter/ColumnFilter.java @@ -682,7 +682,7 @@ public SelectionColumnFilter(FetchingStrategy fetchingStrategy, SortedSetMultimap subSelections) { assert queried != null; - assert fetched.includes(queried); + assert fetched.includes(queried) : String.format("Queries columns %s are not included in the fetch strategy %s", queried, fetched); this.fetchingStrategy = fetchingStrategy; this.queried = queried; diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractType.java b/src/java/org/apache/cassandra/db/marshal/AbstractType.java index b5156c4fdb8f..42190e0c2e84 100644 --- a/src/java/org/apache/cassandra/db/marshal/AbstractType.java +++ b/src/java/org/apache/cassandra/db/marshal/AbstractType.java @@ -440,7 +440,7 @@ public boolean isFreezable() return false; } - public AbstractType freeze() + public AbstractType freeze() { return this; } diff --git a/src/java/org/apache/cassandra/db/marshal/ByteArrayAccessor.java b/src/java/org/apache/cassandra/db/marshal/ByteArrayAccessor.java index 55eae805124b..01b1de63e1f6 100644 --- a/src/java/org/apache/cassandra/db/marshal/ByteArrayAccessor.java +++ b/src/java/org/apache/cassandra/db/marshal/ByteArrayAccessor.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.UUID; +import accord.utils.Invariants; import org.apache.cassandra.db.Digest; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; @@ -107,6 +108,7 @@ public byte[] read(DataInputPlus in, int length) throws IOException @Override public byte[] slice(byte[] input, int offset, int length) { + Invariants.checkArgument(offset + length <= input.length); return Arrays.copyOfRange(input, offset, offset + length); } diff --git a/src/java/org/apache/cassandra/db/marshal/ListType.java b/src/java/org/apache/cassandra/db/marshal/ListType.java index 94d302d05981..6c391b050e00 100644 --- a/src/java/org/apache/cassandra/db/marshal/ListType.java +++ b/src/java/org/apache/cassandra/db/marshal/ListType.java @@ -37,9 +37,9 @@ import org.apache.cassandra.serializers.ListSerializer; import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.JsonUtils; import org.apache.cassandra.utils.TimeUUID; -import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.bytecomparable.ByteComparable.Version; import org.apache.cassandra.utils.bytecomparable.ByteSource; @@ -52,7 +52,7 @@ public class ListType extends CollectionType> private static final ConcurrentHashMap, ListType> frozenInstances = new ConcurrentHashMap<>(); private final AbstractType elements; - public final ListSerializer serializer; + private final ListSerializer serializer; private final boolean isMultiCell; public static ListType getInstance(TypeParser parser) throws ConfigurationException, SyntaxException @@ -131,7 +131,7 @@ public ListSerializer getSerializer() } @Override - public AbstractType freeze() + public ListType freeze() { // freeze elements to match org.apache.cassandra.cql3.CQL3Type.Raw.RawCollection.freeze return isMultiCell ? getInstance(this.elements.freeze(), false) : this; diff --git a/src/java/org/apache/cassandra/db/marshal/MapType.java b/src/java/org/apache/cassandra/db/marshal/MapType.java index 69ea6d17e1d3..8ed43c3c8e1c 100644 --- a/src/java/org/apache/cassandra/db/marshal/MapType.java +++ b/src/java/org/apache/cassandra/db/marshal/MapType.java @@ -44,10 +44,10 @@ import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.JsonUtils; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.bytecomparable.ByteComparable.Version; import org.apache.cassandra.utils.bytecomparable.ByteSource; import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; -import org.apache.cassandra.utils.Pair; public class MapType extends CollectionType> { @@ -153,7 +153,7 @@ public List> subTypes() } @Override - public AbstractType freeze() + public MapType freeze() { // freeze key/value to match org.apache.cassandra.cql3.CQL3Type.Raw.RawCollection.freeze return isMultiCell ? getInstance(this.keys.freeze(), this.values.freeze(), false) : this; diff --git a/src/java/org/apache/cassandra/db/marshal/SetType.java b/src/java/org/apache/cassandra/db/marshal/SetType.java index c2fdf0042c7d..f2568e3cbc5d 100644 --- a/src/java/org/apache/cassandra/db/marshal/SetType.java +++ b/src/java/org/apache/cassandra/db/marshal/SetType.java @@ -18,7 +18,13 @@ package org.apache.cassandra.db.marshal; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Consumer; @@ -117,7 +123,7 @@ public boolean isMultiCell() } @Override - public AbstractType freeze() + public SetType freeze() { // freeze elements to match org.apache.cassandra.cql3.CQL3Type.Raw.RawCollection.freeze return isMultiCell ? getInstance(this.elements.freeze(), false) : this; diff --git a/src/java/org/apache/cassandra/db/marshal/UserType.java b/src/java/org/apache/cassandra/db/marshal/UserType.java index 804891448345..15ab78e82a2a 100644 --- a/src/java/org/apache/cassandra/db/marshal/UserType.java +++ b/src/java/org/apache/cassandra/db/marshal/UserType.java @@ -24,6 +24,7 @@ import javax.annotation.Nullable; import com.google.common.base.Objects; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.slf4j.Logger; @@ -136,6 +137,12 @@ public AbstractType fieldType(int i) return type(i); } + public AbstractType fieldType(CellPath path) + { + int field = ByteBufferUtil.getUnsignedShort(path.get(0), 0); + return fieldType(field); + } + public List> fieldTypes() { return types; @@ -146,6 +153,11 @@ public FieldIdentifier fieldName(int i) return fieldNames.get(i); } + public FieldIdentifier fieldName(CellPath path) + { + return fieldNames.get(fieldPosition(path)); + } + public String fieldNameAsString(int i) { return stringFieldNames.get(i); @@ -166,6 +178,11 @@ public int fieldPosition(FieldIdentifier fieldName) return fieldNames.indexOf(fieldName); } + public int fieldPosition(CellPath path) + { + return Preconditions.checkElementIndex(ByteBufferUtil.getUnsignedShort(path.get(0), 0), fieldNames.size()); + } + public CellPath cellPathForField(FieldIdentifier fieldName) { // we use the field position instead of the field name to allow for field renaming in ALTER TYPE statements @@ -177,7 +194,7 @@ public ShortType nameComparator() return ShortType.instance; } - public ByteBuffer serializeForNativeProtocol(Iterator> cells, ProtocolVersion protocolVersion) + public ByteBuffer serializeForNativeProtocol(Iterator> cells) { assert isMultiCell; diff --git a/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java index 33272375733f..857e0dfde909 100644 --- a/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java +++ b/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java @@ -386,10 +386,10 @@ public String toString(boolean includeFullDetails) @Override public boolean equals(Object obj) { - if (!(obj instanceof PartitionUpdate)) + if (!(obj instanceof AbstractBTreePartition)) return false; - PartitionUpdate that = (PartitionUpdate) obj; + AbstractBTreePartition that = (AbstractBTreePartition) obj; BTreePartitionData a = this.holder(), b = that.holder(); return partitionKey.equals(that.partitionKey) && metadata().id.equals(that.metadata().id) diff --git a/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java b/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java index d7a0171d9a20..138c853224f6 100644 --- a/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java +++ b/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java @@ -24,6 +24,7 @@ import org.apache.cassandra.db.DeletionInfo; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.utils.btree.BTree; public class FilteredPartition extends ImmutableBTreePartition { @@ -43,6 +44,11 @@ public static FilteredPartition create(RowIterator iterator) return new FilteredPartition(iterator); } + public Row getAtIdx(int idx) + { + return BTree.findByIndex(holder.tree, idx); + } + public RowIterator rowIterator() { final Iterator iter = iterator(); diff --git a/src/java/org/apache/cassandra/db/partitions/Partition.java b/src/java/org/apache/cassandra/db/partitions/Partition.java index 8888104d95fe..601934a8e714 100644 --- a/src/java/org/apache/cassandra/db/partitions/Partition.java +++ b/src/java/org/apache/cassandra/db/partitions/Partition.java @@ -37,6 +37,7 @@ public interface Partition { public TableMetadata metadata(); + public DecoratedKey partitionKey(); public DeletionTime partitionLevelDeletion(); diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java index 00f26451c1a3..cb0fdfb9ffe9 100644 --- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java +++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java @@ -249,7 +249,8 @@ public PartitionUpdate withOnlyPresentColumns() } - protected boolean canHaveShadowedData() + @Override + public boolean canHaveShadowedData() { return canHaveShadowedData; } @@ -586,6 +587,15 @@ public static PartitionUpdate unsafeConstruct(TableMetadata metadata, return new PartitionUpdate(metadata, metadata.epoch, key, holder, deletionInfo, canHaveShadowedData); } + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof PartitionUpdate)) + return false; + + return super.equals(obj); + } + /** * Interface for building partition updates geared towards human. *

@@ -914,6 +924,15 @@ public Builder(TableMetadata metadata, this(metadata, key, columns, initialRowCapacity, canHaveShadowedData, Rows.EMPTY_STATIC_ROW, MutableDeletionInfo.live(), BTree.empty()); } + public Builder(TableMetadata metadata, + DecoratedKey key, + RegularAndStaticColumns columns, + Row staticRow, + int initialRowCapacity) + { + this(metadata, key, columns, initialRowCapacity, true, staticRow, MutableDeletionInfo.live(), BTree.empty()); + } + private Builder(TableMetadata metadata, DecoratedKey key, RegularAndStaticColumns columns, @@ -1090,6 +1109,14 @@ public Builder updateAllTimestamp(long newTimestamp) return this; } + public Builder updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + { + deletionInfo.updateAllTimestampAndLocalDeletionTime(newTimestamp - 1, newLocalDeletionTime); + tree = BTree.transformAndFilter(tree, (x) -> x.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + staticRow = this.staticRow.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime); + return this; + } + @Override public String toString() { diff --git a/src/java/org/apache/cassandra/db/rows/AbstractCell.java b/src/java/org/apache/cassandra/db/rows/AbstractCell.java index 69ca0b1c315d..c3df806b8c25 100644 --- a/src/java/org/apache/cassandra/db/rows/AbstractCell.java +++ b/src/java/org/apache/cassandra/db/rows/AbstractCell.java @@ -117,6 +117,13 @@ public Cell updateAllTimestamp(long newTimestamp) return new BufferCell(column, isTombstone() ? newTimestamp - 1 : newTimestamp, ttl(), localDeletionTime(), buffer(), path()); } + @Override + public ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + { + long localDeletionTime = localDeletionTime() != NO_DELETION_TIME ? newLocalDeletionTime : NO_DELETION_TIME; + return new BufferCell(column, isTombstone() ? newTimestamp - 1 : newTimestamp, ttl(), localDeletionTime, buffer(), path()); + } + public int dataSize() { CellPath path = path(); diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java index 4acb20878e75..08888b14ea58 100644 --- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java +++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java @@ -267,7 +267,7 @@ public Deletion deletion() public Cell getCell(ColumnMetadata c) { - assert !c.isComplex(); + assert !c.isComplex(): String.format("Column %s.%s#%s", c.ksName, c.cfName, c.name); return (Cell) BTree.find(btree, ColumnMetadata.asymmetricColumnDataComparator, c); } @@ -445,6 +445,18 @@ public Row updateAllTimestamp(long newTimestamp) return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestamp(newTimestamp)); } + @Override + public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + { + LivenessInfo newInfo = primaryKeyLivenessInfo.isEmpty() ? primaryKeyLivenessInfo : primaryKeyLivenessInfo.withUpdatedTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime); + // If the deletion is shadowable and the row has a timestamp, we'll forced the deletion timestamp to be less than the row one, so we + // should get rid of said deletion. + Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty()) + ? Deletion.LIVE + : new Deletion(DeletionTime.buildUnsafeWithUnsignedInteger(newTimestamp - 1, newLocalDeletionTime), deletion.isShadowable()); + return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + } + public Row withRowDeletion(DeletionTime newDeletion) { // Note that: diff --git a/src/java/org/apache/cassandra/db/rows/ColumnData.java b/src/java/org/apache/cassandra/db/rows/ColumnData.java index b9f19dc07fce..18530b2d3929 100644 --- a/src/java/org/apache/cassandra/db/rows/ColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ColumnData.java @@ -284,6 +284,7 @@ public static void digest(Digest digest, ColumnData cd) * This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details. */ public abstract ColumnData updateAllTimestamp(long newTimestamp); + public abstract ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime); public abstract ColumnData markCounterLocalToBeCleared(); diff --git a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java index dea77413c09d..f8cc58a84cf3 100644 --- a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java @@ -264,6 +264,13 @@ public ComplexColumnData updateAllTimestamp(long newTimestamp) return transformAndFilter(newDeletion, (cell) -> (Cell) cell.updateAllTimestamp(newTimestamp)); } + @Override + public ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + { + DeletionTime newDeletion = complexDeletion.isLive() ? complexDeletion : DeletionTime.buildUnsafeWithUnsignedInteger(newTimestamp - 1, newLocalDeletionTime); + return transformAndFilter(newDeletion, (cell) -> (Cell) cell.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + } + public long maxTimestamp() { long timestamp = complexDeletion.markedForDeleteAt(); diff --git a/src/java/org/apache/cassandra/db/rows/Row.java b/src/java/org/apache/cassandra/db/rows/Row.java index 5e0bbaf6edf7..ee836446d491 100644 --- a/src/java/org/apache/cassandra/db/rows/Row.java +++ b/src/java/org/apache/cassandra/db/rows/Row.java @@ -299,6 +299,8 @@ public interface Row extends Unfiltered, Iterable, IMeasurableMemory */ public Row updateAllTimestamp(long newTimestamp); + public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime); + /** * Returns a copy of this row with the new deletion as row deletion if it is more recent * than the current row deletion. diff --git a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java new file mode 100644 index 000000000000..42a518b961a7 --- /dev/null +++ b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; + +public class AccordVirtualTables +{ + private AccordVirtualTables() + { + + } + + public static Collection getAll(String keyspace) + { + if (!DatabaseDescriptor.getAccordTransactionsEnabled()) + return Collections.emptyList(); + + return Arrays.asList( + new Epoch(keyspace) + ); + } + + @VisibleForTesting + public static final class Epoch extends AbstractVirtualTable + { + + protected Epoch(String keyspace) + { + super(parse(keyspace, "Accord Epochs", + "CREATE TABLE accord_epochs(\n" + + " epoch bigint,\n" + + " PRIMARY KEY ( (epoch) )" + + ")")); + } + + @Override + public DataSet data() + { + IAccordService accord = AccordService.instance(); + accord.createEpochFromConfigUnsafe(); + + long epoch = accord.currentEpoch(); + + SimpleDataSet result = new SimpleDataSet(metadata()); + result.row(epoch); + return result; + } + } + + private static TableMetadata parse(String keyspace, String comment, String query) + { + return CreateTableStatement.parse(query, keyspace) + .comment(comment) + .kind(TableMetadata.Kind.VIRTUAL) + .build(); + } +} diff --git a/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java b/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java index dacf9f643a8c..3ca8f728a8b3 100644 --- a/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java +++ b/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java @@ -69,6 +69,7 @@ private SystemViewsKeyspace() .addAll(LocalRepairTables.getAll(VIRTUAL_VIEWS)) .addAll(CIDRFilteringMetricsTable.getAll(VIRTUAL_VIEWS)) .addAll(StorageAttachedIndexTables.getAll(VIRTUAL_VIEWS)) + .addAll(AccordVirtualTables.getAll(VIRTUAL_VIEWS)) .build()); } } diff --git a/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java b/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java new file mode 100644 index 000000000000..c27bc4359927 --- /dev/null +++ b/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import java.math.BigInteger; + +import accord.api.RoutingKey; +import accord.primitives.Ranges; +import accord.utils.Invariants; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; + +import static accord.utils.Invariants.checkArgument; +import static java.math.BigInteger.ONE; +import static java.math.BigInteger.ZERO; + +public class AccordBytesSplitter extends AccordSplitter +{ + final int byteLength; + + protected AccordBytesSplitter(Ranges ranges) + { + int bytesLength = 0; + for (accord.primitives.Range range : ranges) + { + bytesLength = Integer.max(bytesLength, byteLength(range.start())); + bytesLength = Integer.max(bytesLength, byteLength(range.end())); + } + this.byteLength = bytesLength; + } + + @Override + BigInteger minimumValue() + { + return ZERO; + } + + @Override + BigInteger maximumValue() + { + return ONE.shiftLeft(8 * byteLength).subtract(ONE); + } + + @Override + BigInteger valueForToken(Token token) + { + byte[] bytes = ((ByteOrderedPartitioner.BytesToken) token).token; + checkArgument(bytes.length <= byteLength); + BigInteger value = ZERO; + for (int i = 0 ; i < bytes.length ; ++i) + value = value.add(BigInteger.valueOf(bytes[i] & 0xffL).shiftLeft((byteLength - 1 - i) * 8)); + return value; + } + + @Override + Token tokenForValue(BigInteger value) + { + Invariants.checkArgument(value.compareTo(ZERO) >= 0); + byte[] bytes = new byte[byteLength]; + for (int i = 0 ; i < bytes.length ; ++i) + bytes[i] = value.shiftRight((byteLength - 1 - i) * 8).byteValue(); + return new ByteOrderedPartitioner.BytesToken(bytes); + } + + private static int byteLength(RoutingKey routingKey) + { + return byteLength(((AccordRoutingKey) routingKey).token()); + } + + private static int byteLength(Token token) + { + return ((ByteOrderedPartitioner.BytesToken) token).token.length; + } +} diff --git a/src/java/org/apache/cassandra/dht/AccordSplitter.java b/src/java/org/apache/cassandra/dht/AccordSplitter.java new file mode 100644 index 000000000000..232a47d4542f --- /dev/null +++ b/src/java/org/apache/cassandra/dht/AccordSplitter.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import java.math.BigInteger; + +import accord.local.ShardDistributor; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; + +import static java.math.BigInteger.ZERO; + +public abstract class AccordSplitter implements ShardDistributor.EvenSplit.Splitter +{ + abstract BigInteger valueForToken(Token token); + abstract Token tokenForValue(BigInteger value); + abstract BigInteger minimumValue(); + abstract BigInteger maximumValue(); + + @Override + public BigInteger sizeOf(accord.primitives.Range range) + { + // note: minimum value + BigInteger start = range.start() instanceof SentinelKey ? minimumValue() : valueForToken(((AccordRoutingKey)range.start()).token()); + BigInteger end = range.end() instanceof SentinelKey ? maximumValue() : valueForToken(((AccordRoutingKey)range.end()).token()); + return end.subtract(start); + } + + @Override + public accord.primitives.Range subRange(accord.primitives.Range range, BigInteger startOffset, BigInteger endOffset) + { + AccordRoutingKey startBound = (AccordRoutingKey)range.start(); + AccordRoutingKey endBound = (AccordRoutingKey)range.end(); + + BigInteger start = startBound instanceof SentinelKey ? minimumValue() : valueForToken(startBound.token()); + BigInteger end = endBound instanceof SentinelKey ? maximumValue() : valueForToken(endBound.token()); + BigInteger sizeOfRange = end.subtract(start); + + String keyspace = startBound.keyspace(); + return new TokenRange(startOffset.equals(ZERO) ? startBound : new TokenKey(keyspace, tokenForValue(start.add(startOffset))), + endOffset.equals(sizeOfRange) ? endBound : new TokenKey(keyspace, tokenForValue(start.add(endOffset)))); + } + + @Override + public BigInteger zero() + { + return ZERO; + } + + @Override + public BigInteger add(BigInteger a, BigInteger b) + { + return a.add(b); + } + + @Override + public BigInteger subtract(BigInteger a, BigInteger b) + { + return a.subtract(b); + } + + @Override + public BigInteger divide(BigInteger a, int i) + { + return a.divide(BigInteger.valueOf(i)); + } + + @Override + public BigInteger multiply(BigInteger a, int i) + { + return a.multiply(BigInteger.valueOf(i)); + } + + @Override + public int min(BigInteger v, int i) + { + return v.min(BigInteger.valueOf(i)).intValue(); + } + + @Override + public int compare(BigInteger a, BigInteger b) + { + return a.compareTo(b); + } +} diff --git a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java index 88f2a3b6f004..9b3f63b82097 100644 --- a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java +++ b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.dht; +import accord.primitives.Ranges; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.db.BufferDecoratedKey; @@ -44,6 +45,7 @@ import java.util.Map; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; import com.google.common.collect.Maps; @@ -128,6 +130,12 @@ public Object getTokenValue() return token; } + @Override + public int tokenHash() + { + return hashCode(); + } + @Override public double size(Token next) { @@ -138,8 +146,51 @@ public double size(Token next) @Override public Token nextValidToken() { - throw new UnsupportedOperationException(String.format("Token type %s does not support token allocation.", - getClass().getSimpleName())); + // find first byte we can increment + int i = token.length - 1; + while (i >= 0) + { + if (token[i] != -1) + break; + --i; + } + if (i == -1) + return new BytesToken(Arrays.copyOf(token, token.length + 1)); + + // increment and fill remainder with zeros + byte[] newToken = token.clone(); + ++newToken[i]; + Arrays.fill(newToken, i + 1, newToken.length, (byte)0); + return new BytesToken(newToken); + } + + @Override + public Token decreaseSlightly() + { + if (token.length == 0) + throw new IndexOutOfBoundsException("Cannot create a smaller token the MINIMUM"); + + // find first byte we can decrement + int i = token.length - 1; + while (i >= 0) + { + if (token[i] != 0) + break; + --i; + } + if (i == -1) + { + byte[] newToken = Arrays.copyOf(token, token.length - 1); + if (newToken.length > 0) + newToken[newToken.length - 1] = (byte)-1; + return new BytesToken(newToken); + } + + // decrement and fill remainder with -1 + byte[] newToken = token.clone(); + --newToken[i]; + Arrays.fill(newToken, i + 1, newToken.length, (byte)-1); + return new BytesToken(newToken); } } @@ -339,4 +390,10 @@ public AbstractType partitionOrdering() { return BytesType.instance; } + + @Override + public Function accordSplitter() + { + return AccordBytesSplitter::new; + } } diff --git a/src/java/org/apache/cassandra/dht/ComparableObjectToken.java b/src/java/org/apache/cassandra/dht/ComparableObjectToken.java index 4a6aa8d5a879..8aada75663ff 100644 --- a/src/java/org/apache/cassandra/dht/ComparableObjectToken.java +++ b/src/java/org/apache/cassandra/dht/ComparableObjectToken.java @@ -80,4 +80,11 @@ public Token nextValidToken() throw new UnsupportedOperationException(String.format("Token type %s does not support token allocation.", getClass().getSimpleName())); } + + @Override + public Token decreaseSlightly() + { + throw new UnsupportedOperationException(String.format("Token type %s does not support token allocation.", + getClass().getSimpleName())); + } } diff --git a/src/java/org/apache/cassandra/dht/IPartitioner.java b/src/java/org/apache/cassandra/dht/IPartitioner.java index 341ebc47f1d4..b543fab33ff1 100644 --- a/src/java/org/apache/cassandra/dht/IPartitioner.java +++ b/src/java/org/apache/cassandra/dht/IPartitioner.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.Optional; import java.util.Random; +import java.util.function.Function; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.DecoratedKey; @@ -144,6 +145,13 @@ default Optional splitter() return Optional.empty(); } + Function accordSplitter(); + + default boolean isFixedLength() + { + return false; + } + default public int getMaxTokenSize() { return Integer.MIN_VALUE; diff --git a/src/java/org/apache/cassandra/dht/LocalPartitioner.java b/src/java/org/apache/cassandra/dht/LocalPartitioner.java index 185871d9a27a..e485ccba597a 100644 --- a/src/java/org/apache/cassandra/dht/LocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/LocalPartitioner.java @@ -22,7 +22,9 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.Function; +import accord.primitives.Ranges; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.CachedHashDecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; @@ -174,6 +176,12 @@ public int hashCode() return prime + token.hashCode(); } + @Override + public int tokenHash() + { + return hashCode(); + } + @Override public boolean equals(Object obj) { @@ -203,4 +211,10 @@ public long getHeapSize() return EMPTY_SIZE + ObjectSizes.sizeOnHeapOf(token); } } + + @Override + public Function accordSplitter() + { + return AccordBytesSplitter::new; + } } diff --git a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java index dfe0971f7a46..f80d6d4843a2 100644 --- a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java +++ b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java @@ -23,7 +23,9 @@ import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; +import accord.primitives.Ranges; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PreHashedDecoratedKey; import org.apache.cassandra.db.TypeSizes; @@ -58,6 +60,8 @@ public class Murmur3Partitioner implements IPartitioner private final Splitter splitter = new Splitter(this) { + final BigInteger MAX = BigInteger.valueOf(Long.MAX_VALUE), MIN = BigInteger.valueOf(Long.MIN_VALUE); + public Token tokenForValue(BigInteger value) { return new LongToken(value.longValue()); @@ -67,6 +71,18 @@ public BigInteger valueForToken(Token token) { return BigInteger.valueOf(((LongToken) token).token); } + + @Override + BigInteger minimumValue() + { + return MIN; + } + + @Override + BigInteger maximumValue() + { + return MAX; + } }; public DecoratedKey decorateKey(ByteBuffer key) @@ -214,6 +230,12 @@ public long getLongValue() return token; } + @Override + public int tokenHash() + { + return Long.hashCode(token); + } + @Override public double size(Token next) { @@ -226,11 +248,22 @@ public double size(Token next) @Override public LongToken nextValidToken() { + // CASSANDRA-17109 Added the below checks, but paxos tests were not updated, rather than fix + // the paxos tests, disabling the checks for now. The current paxos tests bias twards MIN but + // not for MAX, which makes the test very flaky as when MAX is generated the test fails... +// if (token == MAXIMUM) +// throw new IllegalArgumentException("Cannot increase above MAXIMUM"); + return new LongToken(token + 1); } public LongToken decreaseSlightly() { + // CASSANDRA-17109 Added the below checks, but paxos tests were not updated, rather than fix + // the paxos tests, disabling the checks for now +// if (equals(MINIMUM)) +// throw new IllegalArgumentException("Cannot decrease below MINIMUM"); + return new LongToken(token - 1); } @@ -271,6 +304,12 @@ private LongToken getToken(ByteBuffer key, long[] hash) return new LongToken(normalize(hash[0])); } + @Override + public boolean isFixedLength() + { + return true; + } + public int getMaxTokenSize() { return MAXIMUM_TOKEN_SIZE; @@ -441,4 +480,10 @@ public Optional splitter() { return Optional.of(splitter); } + + @Override + public Function accordSplitter() + { + return ignore -> splitter; + } } diff --git a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java index eb2e01e3bcb3..227d043382bc 100644 --- a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java +++ b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java @@ -22,7 +22,10 @@ import java.nio.charset.CharacterCodingException; import java.util.*; import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; +import accord.api.RoutingKey; +import accord.primitives.Ranges; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.CachedHashDecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; @@ -32,6 +35,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; @@ -40,6 +44,11 @@ import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.Pair; +import static accord.utils.Invariants.checkArgument; +import static java.lang.Integer.max; +import static java.math.BigInteger.ONE; +import static java.math.BigInteger.ZERO; + public class OrderPreservingPartitioner implements IPartitioner { private static final String rndchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -90,7 +99,7 @@ private static BigInteger bigForString(String str, int sigchars) { assert str.length() <= sigchars; - BigInteger big = BigInteger.ZERO; + BigInteger big = ZERO; for (int i = 0; i < str.length(); i++) { int charpos = 16 * (sigchars - (i + 1)); @@ -232,6 +241,12 @@ public int compareTo(Token o) return super.compareTo(o); } + + @Override + public int tokenHash() + { + return token.hashCode(); + } } public StringToken getToken(ByteBuffer key) @@ -296,4 +311,58 @@ public AbstractType partitionOrdering() { return UTF8Type.instance; } + + @Override + public Function accordSplitter() + { + return ranges -> new AccordSplitter() + { + final int charLength = ranges.stream().mapToInt(range -> max(charLength(range.start()), charLength(range.end()))) + .max().orElse(0); + + @Override + BigInteger valueForToken(Token token) + { + String chars = ((StringToken) token).token; + checkArgument(chars.length() <= charLength); + BigInteger value = ZERO; + for (int i = 0 ; i < chars.length() ; ++i) + value = value.add(BigInteger.valueOf(chars.charAt(i) & 0xffffL).shiftLeft((charLength - 1 - i) * 16)); + return value; + } + + @Override + Token tokenForValue(BigInteger value) + { + // TODO (required): test + checkArgument(value.compareTo(ZERO) >= 0); + char[] chars = new char[charLength]; + for (int i = 0 ; i < chars.length ; ++i) + chars[i] = (char) value.shiftRight((charLength - 1 - i) * 16).shortValue(); + return new StringToken(new String(chars)); + } + + @Override + BigInteger minimumValue() + { + return ZERO; + } + + @Override + BigInteger maximumValue() + { + return ONE.shiftLeft(charLength * 16).subtract(ONE); + } + }; + } + + private static int charLength(RoutingKey routingKey) + { + return charLength(((AccordRoutingKey) routingKey).token()); + } + + private static int charLength(Token token) + { + return ((StringToken) token).token.length(); + } } diff --git a/src/java/org/apache/cassandra/dht/RandomPartitioner.java b/src/java/org/apache/cassandra/dht/RandomPartitioner.java index 9b833e3868d8..44f1893f0bb6 100644 --- a/src/java/org/apache/cassandra/dht/RandomPartitioner.java +++ b/src/java/org/apache/cassandra/dht/RandomPartitioner.java @@ -23,9 +23,11 @@ import java.nio.ByteBuffer; import java.security.MessageDigest; import java.util.*; +import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; +import accord.primitives.Ranges; import org.apache.cassandra.db.CachedHashDecoratedKey; import org.apache.cassandra.db.marshal.ByteArrayAccessor; import org.apache.cassandra.db.marshal.ByteBufferAccessor; @@ -92,6 +94,18 @@ public BigInteger valueForToken(Token token) { return ((BigIntegerToken)token).getTokenValue(); } + + @Override + BigInteger minimumValue() + { + return MINIMUM.getTokenValue(); + } + + @Override + BigInteger maximumValue() + { + return MAXIMUM; + } }; public DecoratedKey decorateKey(ByteBuffer key) @@ -278,9 +292,25 @@ public long getHeapSize() public Token nextValidToken() { + if (token.equals(MAXIMUM)) + throw new IllegalArgumentException("Cannot increase above MAXIMUM"); return new BigIntegerToken(token.add(BigInteger.ONE)); } + @Override + public Token decreaseSlightly() + { + if (token.equals(MINIMUM.token)) + throw new IllegalArgumentException("Cannot decrease below MINIMUM"); + return new BigIntegerToken(token.subtract(BigInteger.ONE)); + } + + @Override + public int tokenHash() + { + return token.hashCode(); + } + public double size(Token next) { BigIntegerToken n = (BigIntegerToken) next; @@ -362,6 +392,12 @@ public Optional splitter() return Optional.of(splitter); } + @Override + public Function accordSplitter() + { + return ignore -> splitter; + } + private static BigInteger hashToBigInteger(ByteBuffer data) { MessageDigest messageDigest = localMD5Digest.get(); diff --git a/src/java/org/apache/cassandra/dht/ReversedLongLocalPartitioner.java b/src/java/org/apache/cassandra/dht/ReversedLongLocalPartitioner.java index 43f9ab832d18..f95f1e776400 100644 --- a/src/java/org/apache/cassandra/dht/ReversedLongLocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/ReversedLongLocalPartitioner.java @@ -23,10 +23,12 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Longs; +import accord.primitives.Ranges; import org.apache.cassandra.db.CachedHashDecoratedKey; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; @@ -156,6 +158,12 @@ public AbstractType partitionOrdering() return LongType.instance; } + @Override + public Function accordSplitter() + { + throw new UnsupportedOperationException("Accord is not supported by " + getClass().getName()); + } + @VisibleForTesting public static class ReversedLongLocalToken extends Token { @@ -184,6 +192,12 @@ public Object getTokenValue() return token; } + @Override + public int tokenHash() + { + return Long.hashCode(token); + } + @Override public long getLongValue() { @@ -210,6 +224,13 @@ public Token nextValidToken() getClass().getSimpleName())); } + @Override + public Token decreaseSlightly() + { + throw new UnsupportedOperationException(String.format("Token type %s does not support token allocation.", + getClass().getSimpleName())); + } + @Override public int compareTo(Token o) { diff --git a/src/java/org/apache/cassandra/dht/Splitter.java b/src/java/org/apache/cassandra/dht/Splitter.java index 53b4462221cd..3f9d663b7e5b 100644 --- a/src/java/org/apache/cassandra/dht/Splitter.java +++ b/src/java/org/apache/cassandra/dht/Splitter.java @@ -36,7 +36,7 @@ /** * Partition splitter. */ -public abstract class Splitter +public abstract class Splitter extends AccordSplitter { private final IPartitioner partitioner; @@ -45,12 +45,6 @@ protected Splitter(IPartitioner partitioner) this.partitioner = partitioner; } - @VisibleForTesting - protected abstract Token tokenForValue(BigInteger value); - - @VisibleForTesting - protected abstract BigInteger valueForToken(Token token); - @VisibleForTesting protected BigInteger tokensInRange(Range range) { diff --git a/src/java/org/apache/cassandra/dht/Token.java b/src/java/org/apache/cassandra/dht/Token.java index 3b78d7b84dc0..f5ff32e00edc 100644 --- a/src/java/org/apache/cassandra/dht/Token.java +++ b/src/java/org/apache/cassandra/dht/Token.java @@ -38,6 +38,7 @@ public abstract class Token implements RingPosition, Serializable public static final TokenSerializer serializer = new TokenSerializer(); public static final MetadataSerializer metadataSerializer = new MetadataSerializer(); + public static final CompactTokenSerializer compactSerializer = new CompactTokenSerializer(); public static abstract class TokenFactory { @@ -80,6 +81,14 @@ public void serialize(Token token, ByteBuffer out) throws IOException out.put(toByteArray(token)); } + public Token deserialize(DataInputPlus in, IPartitioner p) throws IOException + { + int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); + byte[] bytes = new byte[size]; + in.readFully(bytes); + return p.getTokenFactory().fromByteArray(ByteBuffer.wrap(bytes)); + } + public Token fromByteBuffer(ByteBuffer bytes, int position, int length) { bytes = bytes.duplicate(); @@ -158,9 +167,38 @@ public long serializedSize(Token object, int version) } } + public static class CompactTokenSerializer implements IPartitionerDependentSerializer + { + public void serialize(Token token, DataOutputPlus out, int version) throws IOException + { + IPartitioner p = token.getPartitioner(); + if (!p.isFixedLength()) + out.writeUnsignedVInt32(p.getTokenFactory().byteSize(token)); + p.getTokenFactory().serialize(token, out); + } + + public Token deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException + { + int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); + byte[] bytes = new byte[size]; + in.readFully(bytes); + return p.getTokenFactory().fromByteArray(ByteBuffer.wrap(bytes)); + } + + public long serializedSize(Token object, int version) + { + IPartitioner p = object.getPartitioner(); + int byteSize = p.getTokenFactory().byteSize(object); + if (p.isFixedLength()) + return byteSize; + return TypeSizes.sizeofUnsignedVInt(byteSize) + byteSize; + } + } + abstract public IPartitioner getPartitioner(); abstract public long getHeapSize(); abstract public Object getTokenValue(); + abstract public int tokenHash(); /** * This method exists so that callers can access the primitive {@code long} value for this {@link Token}, if @@ -196,6 +234,7 @@ public long getLongValue() * Used by the token allocation algorithm (see CASSANDRA-7032). */ abstract public double size(Token next); + /** * Returns the next possible token in the token space, one that compares * greater than this and such that there is no other token that sits @@ -209,6 +248,15 @@ public long getLongValue() * constructing token ranges for sstables. */ abstract public Token nextValidToken(); + /** + * Returns a token that is slightly more than this. This is NOT guaranteed to be the directly following token. + */ + public Token increaseSlightly() { return nextValidToken(); } + + /** + * Returns a token that is slightly less than this. + */ + abstract public Token decreaseSlightly(); public Token getToken() { diff --git a/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java b/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java new file mode 100644 index 000000000000..739f00784675 --- /dev/null +++ b/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +/** + * Serializer that stores the version within the buffer. Normal usage of {@link IVersionedSerializer} is to rely on + * {@link org.apache.cassandra.net.MessagingService#current_version} and messaging version numbers, but that implies + * that a messaging version bump is required if a change is made to this field; for some cases the serializer isn't + * dealing with messages and instead are blobs stored in a table, for these cases it may be better to rely on a field + * specific versioning that gets stored along the data. + */ +public class LocalVersionedSerializer +{ + private final MessageVersionProvider currentVersion; + private final IVersionedSerializer versionSerializer; + private final IVersionedSerializer serializer; + + public LocalVersionedSerializer(V currentVersion, + IVersionedSerializer versionSerializer, + IVersionedSerializer serializer) + { + // V is local to the constructor to validate at construction time things are fine, but don't want in the type + // sig of the class as it just gets verbose... + this.currentVersion = Objects.requireNonNull(currentVersion); + this.versionSerializer = (IVersionedSerializer) Objects.requireNonNull(versionSerializer); + this.serializer = Objects.requireNonNull(serializer); + } + + public IVersionedSerializer serializer() + { + return serializer; + } + + /** + * Serialize the specified type into the specified DataOutputStream instance. + * + * @param t type that needs to be serialized + * @param out DataOutput into which serialization needs to happen. + * @throws IOException if serialization fails + */ + public void serialize(I t, DataOutputPlus out) throws IOException + { + versionSerializer.serialize(currentVersion, out, currentVersion.messageVersion()); + serializer.serialize(t, out, currentVersion.messageVersion()); + } + + /** + * Deserialize into the specified DataInputStream instance. + * + * @param in DataInput from which deserialization needs to happen. + * @return the type that was deserialized + * @throws IOException if deserialization fails + */ + public I deserialize(DataInputPlus in) throws IOException + { + MessageVersionProvider version = versionSerializer.deserialize(in, currentVersion.messageVersion()); + return serializer.deserialize(in, version.messageVersion()); + } + + /** + * Calculate serialized size of object without actually serializing. + * + * @param t object to calculate serialized size + * @return serialized size of object t + */ + public long serializedSize(I t) + { + long size = versionSerializer.serializedSize(currentVersion, currentVersion.messageVersion()); + size += serializer.serializedSize(t, currentVersion.messageVersion()); + return size; + } +} diff --git a/src/java/org/apache/cassandra/io/MessageVersionProvider.java b/src/java/org/apache/cassandra/io/MessageVersionProvider.java new file mode 100644 index 000000000000..a6ad468281c0 --- /dev/null +++ b/src/java/org/apache/cassandra/io/MessageVersionProvider.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +public interface MessageVersionProvider +{ + int messageVersion(); +} diff --git a/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java index dbe91cee106c..c37886364abb 100644 --- a/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java @@ -26,9 +26,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.NavigableSet; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -280,7 +282,6 @@ public CQLSSTableWriter rawAddRow(List values) // Note that we asks indexes to not validate values (the last 'false' arg below) because that triggers a 'Keyspace.open' // and that forces a lot of initialization that we don't want. UpdateParameters params = new UpdateParameters(modificationStatement.metadata, - modificationStatement.updatedColumns(), ClientState.forInternalCalls(), options, modificationStatement.getTimestamp(TimeUnit.MILLISECONDS.toMicros(now), options), @@ -661,7 +662,12 @@ public CQLSSTableWriter build() if (modificationStatement == null) throw new IllegalStateException("No modification (INSERT/UPDATE/DELETE) statement specified, you should provide a modification statement through using()"); - Preconditions.checkState(Sets.difference(SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES, Schema.instance.getKeyspaces()).isEmpty(), + Set activeKeyspaces = new HashSet<>(SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES); + + if (!DatabaseDescriptor.getAccordTransactionsEnabled()) + activeKeyspaces.remove(SchemaConstants.ACCORD_KEYSPACE_NAME); + + Preconditions.checkState(Sets.difference(activeKeyspaces, Schema.instance.getKeyspaces()).isEmpty(), "Local keyspaces were not loaded. If this is running as a client, please make sure to add %s=true system property.", CassandraRelevantProperties.FORCE_LOAD_LOCAL_KEYSPACES.getKey()); diff --git a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java index f8bc95953164..483ee5e1dce5 100644 --- a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java +++ b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java @@ -130,7 +130,6 @@ default void writeMostSignificantBytes(long register, int bytes) throws IOExcept default: throw new IllegalArgumentException(); } - } /** diff --git a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java new file mode 100644 index 000000000000..c95c3bd11fc3 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; + +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class AccordClientRequestMetrics extends ClientRequestMetrics +{ + public final Meter preempts; + public final Histogram keySize; + + public AccordClientRequestMetrics(String scope) + { + super(scope); + + preempts = Metrics.meter(factory.createMetricName("Preempts")); + keySize = Metrics.histogram(factory.createMetricName("KeySizeHistogram"), false); + } + + @Override + public void release() + { + super.release(); + Metrics.remove(factory.createMetricName("Preempts")); + Metrics.remove(factory.createMetricName("KeySizeHistogram")); + } +} diff --git a/src/java/org/apache/cassandra/metrics/Sampler.java b/src/java/org/apache/cassandra/metrics/Sampler.java index 4c4739b32984..b32d8111e2e7 100644 --- a/src/java/org/apache/cassandra/metrics/Sampler.java +++ b/src/java/org/apache/cassandra/metrics/Sampler.java @@ -170,6 +170,7 @@ public void updateEndTime(long endTimeMillis) */ public static class Sample implements Serializable { + private static final long serialVersionUID = 0; // for simulator support public final S value; public final long count; public final long error; diff --git a/src/java/org/apache/cassandra/net/ForwardingInfo.java b/src/java/org/apache/cassandra/net/ForwardingInfo.java index 7a117bd99915..bb8880c5b874 100644 --- a/src/java/org/apache/cassandra/net/ForwardingInfo.java +++ b/src/java/org/apache/cassandra/net/ForwardingInfo.java @@ -40,6 +40,7 @@ */ public final class ForwardingInfo implements Serializable { + private static final long serialVersionUID = 0; // for simulator support final List targets; final long[] messageIds; diff --git a/src/java/org/apache/cassandra/net/Message.java b/src/java/org/apache/cassandra/net/Message.java index ead4317576ab..f6ca39cd853d 100644 --- a/src/java/org/apache/cassandra/net/Message.java +++ b/src/java/org/apache/cassandra/net/Message.java @@ -34,6 +34,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.messages.ReplyContext; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.io.IVersionedAsymmetricSerializer; @@ -71,7 +72,7 @@ * * @param The type of the message payload. */ -public class Message +public class Message implements ReplyContext { private static final Logger logger = LoggerFactory.getLogger(Message.class); private static final NoSpamLogger noSpam1m = NoSpamLogger.getLogger(logger, 1, TimeUnit.MINUTES); diff --git a/src/java/org/apache/cassandra/net/MessagingService.java b/src/java/org/apache/cassandra/net/MessagingService.java index f26d35ad95a4..bb672e5b0f20 100644 --- a/src/java/org/apache/cassandra/net/MessagingService.java +++ b/src/java/org/apache/cassandra/net/MessagingService.java @@ -240,6 +240,8 @@ public enum Version } } + public static final Version MIN_ACCORD_VERSION = Version.VERSION_51; + public final int value; Version(int value) @@ -256,6 +258,17 @@ public static List supportedVersions() return Collections.unmodifiableList(versions); } + + public List greaterThanOrEqual() + { + Version[] all = Version.values(); + if (ordinal() == all.length - 1) + return Collections.singletonList(this); + List values = new ArrayList<>(all.length - ordinal()); + for (int i = ordinal(); i < all.length; i++) + values.add(all[i]); + return values; + } } // Maintance Note: // Try to keep Version enum in-sync for testing. By having the versions in the enum tests can get access without forcing this class diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index c2cce663efd6..377bb1dc5edc 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -21,6 +21,7 @@ import java.lang.reflect.Modifier; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.function.Supplier; import java.util.function.ToLongFunction; @@ -76,6 +77,21 @@ import org.apache.cassandra.schema.SchemaPullVerbHandler; import org.apache.cassandra.schema.SchemaPushVerbHandler; import org.apache.cassandra.schema.SchemaVersionVerbHandler; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.serializers.AcceptSerializers; +import org.apache.cassandra.service.accord.serializers.ApplySerializers; +import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; +import org.apache.cassandra.service.accord.serializers.CheckStatusSerializers; +import org.apache.cassandra.service.accord.serializers.CommitSerializers; +import org.apache.cassandra.service.accord.serializers.EnumSerializer; +import org.apache.cassandra.service.accord.serializers.GetDepsSerializers; +import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; +import org.apache.cassandra.service.accord.serializers.InformHomeDurableSerializers; +import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; +import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; +import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; +import org.apache.cassandra.service.accord.serializers.RecoverySerializers; +import org.apache.cassandra.service.accord.serializers.WaitOnCommitSerializer; import org.apache.cassandra.service.paxos.PaxosCommit; import org.apache.cassandra.service.paxos.PaxosCommitAndPrepare; import org.apache.cassandra.service.paxos.PaxosPrepare; @@ -114,6 +130,7 @@ import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.Stage.*; +import static org.apache.cassandra.net.ResponseHandlerSupplier.RESPONSE_HANDLER; import static org.apache.cassandra.net.VerbTimeouts.*; import static org.apache.cassandra.net.Verb.Kind.*; import static org.apache.cassandra.net.Verb.Priority.*; @@ -126,35 +143,36 @@ /** * Note that priorities except P0 are presently unused. P0 corresponds to urgent, i.e. what used to be the "Gossip" connection. */ +@SuppressWarnings("Convert2MethodRef") // we must defer all initialisation, which includes e.g. taking a method reference to a static object/singleton, which this inspection does not disambiguate public enum Verb { - MUTATION_RSP (60, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + MUTATION_RSP (60, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), MUTATION_REQ (0, P3, writeTimeout, MUTATION, () -> Mutation.serializer, () -> MutationVerbHandler.instance, MUTATION_RSP ), - HINT_RSP (61, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + HINT_RSP (61, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), HINT_REQ (1, P4, writeTimeout, MUTATION, () -> HintMessage.serializer, () -> HintVerbHandler.instance, HINT_RSP ), - READ_REPAIR_RSP (62, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + READ_REPAIR_RSP (62, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), READ_REPAIR_REQ (2, P1, writeTimeout, MUTATION, () -> Mutation.serializer, () -> ReadRepairVerbHandler.instance, READ_REPAIR_RSP ), - BATCH_STORE_RSP (65, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + BATCH_STORE_RSP (65, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), BATCH_STORE_REQ (5, P3, writeTimeout, MUTATION, () -> Batch.serializer, () -> BatchStoreVerbHandler.instance, BATCH_STORE_RSP ), - BATCH_REMOVE_RSP (66, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + BATCH_REMOVE_RSP (66, P1, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), BATCH_REMOVE_REQ (6, P3, writeTimeout, MUTATION, () -> TimeUUID.Serializer.instance, () -> BatchRemoveVerbHandler.instance, BATCH_REMOVE_RSP ), - PAXOS_PREPARE_RSP (93, P2, writeTimeout, REQUEST_RESPONSE, () -> PrepareResponse.serializer, () -> ResponseVerbHandler.instance ), + PAXOS_PREPARE_RSP (93, P2, writeTimeout, REQUEST_RESPONSE, () -> PrepareResponse.serializer, RESPONSE_HANDLER ), PAXOS_PREPARE_REQ (33, P2, writeTimeout, MUTATION, () -> Commit.serializer, () -> PrepareVerbHandler.instance, PAXOS_PREPARE_RSP ), - PAXOS_PROPOSE_RSP (94, P2, writeTimeout, REQUEST_RESPONSE, () -> BooleanSerializer.serializer, () -> ResponseVerbHandler.instance ), + PAXOS_PROPOSE_RSP (94, P2, writeTimeout, REQUEST_RESPONSE, () -> BooleanSerializer.serializer, RESPONSE_HANDLER ), PAXOS_PROPOSE_REQ (34, P2, writeTimeout, MUTATION, () -> Commit.serializer, () -> ProposeVerbHandler.instance, PAXOS_PROPOSE_RSP ), - PAXOS_COMMIT_RSP (95, P2, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + PAXOS_COMMIT_RSP (95, P2, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), PAXOS_COMMIT_REQ (35, P2, writeTimeout, MUTATION, () -> Agreed.serializer, () -> PaxosCommit.requestHandler, PAXOS_COMMIT_RSP ), - TRUNCATE_RSP (79, P0, truncateTimeout, REQUEST_RESPONSE, () -> TruncateResponse.serializer, () -> ResponseVerbHandler.instance ), + TRUNCATE_RSP (79, P0, truncateTimeout, REQUEST_RESPONSE, () -> TruncateResponse.serializer, RESPONSE_HANDLER ), TRUNCATE_REQ (19, P0, truncateTimeout, MUTATION, () -> TruncateRequest.serializer, () -> TruncateVerbHandler.instance, TRUNCATE_RSP ), - COUNTER_MUTATION_RSP (84, P1, counterTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + COUNTER_MUTATION_RSP (84, P1, counterTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), COUNTER_MUTATION_REQ (24, P2, counterTimeout, COUNTER_MUTATION, () -> CounterMutation.serializer, () -> CounterMutationVerbHandler.instance, COUNTER_MUTATION_RSP), - READ_RSP (63, P2, readTimeout, REQUEST_RESPONSE, () -> ReadResponse.serializer, () -> ResponseVerbHandler.instance ), + READ_RSP (63, P2, readTimeout, REQUEST_RESPONSE, () -> ReadResponse.serializer, RESPONSE_HANDLER ), READ_REQ (3, P3, readTimeout, READ, () -> ReadCommand.serializer, () -> ReadCommandVerbHandler.instance, READ_RSP ), - RANGE_RSP (69, P2, rangeTimeout, REQUEST_RESPONSE, () -> ReadResponse.serializer, () -> ResponseVerbHandler.instance ), + RANGE_RSP (69, P2, rangeTimeout, REQUEST_RESPONSE, () -> ReadResponse.serializer, RESPONSE_HANDLER ), RANGE_REQ (9, P3, rangeTimeout, READ, () -> ReadCommand.serializer, () -> ReadCommandVerbHandler.instance, RANGE_RSP ), GOSSIP_DIGEST_SYN (14, P0, longTimeout, GOSSIP, () -> GossipDigestSyn.serializer, () -> GossipDigestSynVerbHandler.instance ), @@ -162,21 +180,21 @@ public enum Verb GOSSIP_DIGEST_ACK2 (16, P0, longTimeout, GOSSIP, () -> GossipDigestAck2.serializer, () -> GossipDigestAck2VerbHandler.instance ), GOSSIP_SHUTDOWN (29, P0, rpcTimeout, GOSSIP, () -> GossipShutdown.serializer, () -> GossipShutdownVerbHandler.instance ), - ECHO_RSP (91, P0, rpcTimeout, GOSSIP, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + ECHO_RSP (91, P0, rpcTimeout, GOSSIP, () -> NoPayload.serializer, RESPONSE_HANDLER ), ECHO_REQ (31, P0, rpcTimeout, GOSSIP, () -> NoPayload.serializer, () -> EchoVerbHandler.instance, ECHO_RSP ), - PING_RSP (97, P1, pingTimeout, GOSSIP, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + PING_RSP (97, P1, pingTimeout, GOSSIP, () -> NoPayload.serializer, RESPONSE_HANDLER ), PING_REQ (37, P1, pingTimeout, GOSSIP, () -> PingRequest.serializer, () -> PingVerbHandler.instance, PING_RSP ), // P1 because messages can be arbitrarily large or aren't crucial @Deprecated (since = "CEP-21") - SCHEMA_PUSH_RSP (98, P1, rpcTimeout, MIGRATION, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + SCHEMA_PUSH_RSP (98, P1, rpcTimeout, MIGRATION, () -> NoPayload.serializer, RESPONSE_HANDLER ), @Deprecated (since = "CEP-21") SCHEMA_PUSH_REQ (18, P1, rpcTimeout, MIGRATION, () -> SchemaMutationsSerializer.instance, () -> SchemaPushVerbHandler.instance, SCHEMA_PUSH_RSP ), @Deprecated (since = "CEP-21") - SCHEMA_PULL_RSP (88, P1, rpcTimeout, MIGRATION, () -> SchemaMutationsSerializer.instance, () -> ResponseVerbHandler.instance ), + SCHEMA_PULL_RSP (88, P1, rpcTimeout, MIGRATION, () -> SchemaMutationsSerializer.instance, RESPONSE_HANDLER ), @Deprecated (since = "CEP-21") SCHEMA_PULL_REQ (28, P1, rpcTimeout, MIGRATION, () -> NoPayload.serializer, () -> SchemaPullVerbHandler.instance, SCHEMA_PULL_RSP ), - SCHEMA_VERSION_RSP (80, P1, rpcTimeout, MIGRATION, () -> UUIDSerializer.serializer, () -> ResponseVerbHandler.instance ), + SCHEMA_VERSION_RSP (80, P1, rpcTimeout, MIGRATION, () -> UUIDSerializer.serializer, RESPONSE_HANDLER ), SCHEMA_VERSION_REQ (20, P1, rpcTimeout, MIGRATION, () -> NoPayload.serializer, () -> SchemaVersionVerbHandler.instance, SCHEMA_VERSION_RSP ), // repair; mostly doesn't use callbacks and sends responses as their own request messages, with matching sessions by uuid; should eventually harmonize and make idiomatic @@ -198,31 +216,31 @@ public enum Verb STATUS_RSP (115, P1, repairTimeout, ANTI_ENTROPY, () -> StatusResponse.serializer, () -> RepairMessageVerbHandler.instance(), REPAIR_RSP ), STATUS_REQ (114, P1, repairTimeout, ANTI_ENTROPY, () -> StatusRequest.serializer, () -> RepairMessageVerbHandler.instance(), REPAIR_RSP ), - REPLICATION_DONE_RSP (82, P0, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + REPLICATION_DONE_RSP (82, P0, rpcTimeout, MISC, () -> NoPayload.serializer, RESPONSE_HANDLER ), REPLICATION_DONE_REQ (22, P0, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ReplicationDoneVerbHandler.instance, REPLICATION_DONE_RSP), - SNAPSHOT_RSP (87, P0, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + SNAPSHOT_RSP (87, P0, rpcTimeout, MISC, () -> NoPayload.serializer, RESPONSE_HANDLER ), SNAPSHOT_REQ (27, P0, rpcTimeout, MISC, () -> SnapshotCommand.serializer, () -> SnapshotVerbHandler.instance, SNAPSHOT_RSP ), PAXOS2_COMMIT_REMOTE_REQ (38, P2, writeTimeout, MUTATION, () -> Mutation.serializer, () -> MutationVerbHandler.instance, MUTATION_RSP ), - PAXOS2_COMMIT_REMOTE_RSP (39, P2, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), - PAXOS2_PREPARE_RSP (50, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, () -> ResponseVerbHandler.instance ), + PAXOS2_COMMIT_REMOTE_RSP (39, P2, writeTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), + PAXOS2_PREPARE_RSP (50, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, RESPONSE_HANDLER ), PAXOS2_PREPARE_REQ (40, P2, writeTimeout, MUTATION, () -> PaxosPrepare.requestSerializer, () -> PaxosPrepare.requestHandler, PAXOS2_PREPARE_RSP ), - PAXOS2_PREPARE_REFRESH_RSP (51, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepareRefresh.responseSerializer, () -> ResponseVerbHandler.instance ), + PAXOS2_PREPARE_REFRESH_RSP (51, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepareRefresh.responseSerializer, RESPONSE_HANDLER ), PAXOS2_PREPARE_REFRESH_REQ (41, P2, writeTimeout, MUTATION, () -> PaxosPrepareRefresh.requestSerializer, () -> PaxosPrepareRefresh.requestHandler, PAXOS2_PREPARE_REFRESH_RSP ), - PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.responseSerializer, () -> ResponseVerbHandler.instance ), + PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.responseSerializer, RESPONSE_HANDLER ), PAXOS2_PROPOSE_REQ (42, P2, writeTimeout, MUTATION, () -> PaxosPropose.requestSerializer, () -> PaxosPropose.requestHandler, PAXOS2_PROPOSE_RSP ), - PAXOS2_COMMIT_AND_PREPARE_RSP (53, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, () -> ResponseVerbHandler.instance ), + PAXOS2_COMMIT_AND_PREPARE_RSP (53, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, RESPONSE_HANDLER ), PAXOS2_COMMIT_AND_PREPARE_REQ (43, P2, writeTimeout, MUTATION, () -> PaxosCommitAndPrepare.requestSerializer, () -> PaxosCommitAndPrepare.requestHandler, PAXOS2_COMMIT_AND_PREPARE_RSP ), - PAXOS2_REPAIR_RSP (54, P2, writeTimeout, PAXOS_REPAIR, () -> PaxosRepair.responseSerializer, () -> ResponseVerbHandler.instance ), + PAXOS2_REPAIR_RSP (54, P2, writeTimeout, PAXOS_REPAIR, () -> PaxosRepair.responseSerializer, RESPONSE_HANDLER ), PAXOS2_REPAIR_REQ (44, P2, writeTimeout, PAXOS_REPAIR, () -> PaxosRepair.requestSerializer, () -> PaxosRepair.requestHandler, PAXOS2_REPAIR_RSP ), - PAXOS2_CLEANUP_START_PREPARE_RSP (55, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupHistory.serializer, () -> ResponseVerbHandler.instance ), + PAXOS2_CLEANUP_START_PREPARE_RSP (55, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupHistory.serializer, RESPONSE_HANDLER ), PAXOS2_CLEANUP_START_PREPARE_REQ (45, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosStartPrepareCleanup.serializer, () -> PaxosStartPrepareCleanup.verbHandler, PAXOS2_CLEANUP_START_PREPARE_RSP ), - PAXOS2_CLEANUP_RSP (56, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + PAXOS2_CLEANUP_RSP (56, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, RESPONSE_HANDLER ), PAXOS2_CLEANUP_REQ (46, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupRequest.serializer, () -> PaxosCleanupRequest.verbHandler, PAXOS2_CLEANUP_RSP ), PAXOS2_CLEANUP_RSP2 (57, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupResponse.serializer, () -> PaxosCleanupResponse.verbHandler ), - PAXOS2_CLEANUP_FINISH_PREPARE_RSP(58, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + PAXOS2_CLEANUP_FINISH_PREPARE_RSP(58, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, RESPONSE_HANDLER ), PAXOS2_CLEANUP_FINISH_PREPARE_REQ(47, P2, repairTimeout, IMMEDIATE, () -> PaxosCleanupHistory.serializer, () -> PaxosFinishPrepareCleanup.verbHandler, PAXOS2_CLEANUP_FINISH_PREPARE_RSP), - PAXOS2_CLEANUP_COMPLETE_RSP (59, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + PAXOS2_CLEANUP_COMPLETE_RSP (59, P2, repairTimeout, PAXOS_REPAIR, () -> NoPayload.serializer, RESPONSE_HANDLER ), PAXOS2_CLEANUP_COMPLETE_REQ (48, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupComplete.serializer, () -> PaxosCleanupComplete.verbHandler, PAXOS2_CLEANUP_COMPLETE_RSP ), // transactional cluster metadata @@ -247,8 +265,47 @@ public enum Verb DATA_MOVEMENT_EXECUTED_RSP (816, P1, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), DATA_MOVEMENT_EXECUTED_REQ (817, P1, rpcTimeout, MISC, () -> DataMovement.Status.serializer, () -> DataMovements.instance, DATA_MOVEMENT_EXECUTED_RSP ), + // accord + ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER), + + ACCORD_PREACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER), + ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, ACCORD, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP), + + ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER), + ACCORD_ACCEPT_REQ (122, P2, writeTimeout, ACCORD, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, ACCORD, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + + ACCORD_READ_RSP (128, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER), + ACCORD_READ_REQ (127, P2, writeTimeout, ACCORD, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (125, P2, writeTimeout, ACCORD, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, ACCORD, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler()), + + ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER), + ACCORD_APPLY_REQ (129, P2, writeTimeout, ACCORD, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP), + + ACCORD_RECOVER_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER), + ACCORD_RECOVER_REQ (133, P2, writeTimeout, ACCORD, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), + ACCORD_BEGIN_INVALIDATE_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER), + ACCORD_BEGIN_INVALIDATE_REQ (135, P2, writeTimeout, ACCORD, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP), + ACCORD_WAIT_COMMIT_RSP (138, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER), + ACCORD_WAIT_COMMIT_REQ (137, P2, writeTimeout, ACCORD, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP), + + ACCORD_INFORM_OF_TXNID_RSP(140, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER), + ACCORD_INFORM_OF_TXNID_REQ(139, P2, writeTimeout, ACCORD, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_INFORM_OF_TXNID_RSP), + + ACCORD_INFORM_HOME_DURABLE_REQ(141, P2, writeTimeout, ACCORD, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP), + + ACCORD_INFORM_DURABLE_REQ(143, P2, writeTimeout, ACCORD, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP), + + ACCORD_CHECK_STATUS_RSP (146, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER), + ACCORD_CHECK_STATUS_REQ (145, P2, writeTimeout, ACCORD, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP), + + ACCORD_GET_DEPS_RSP (148, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER), + ACCORD_GET_DEPS_REQ (147, P2, writeTimeout, ACCORD, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP), + + // generic failure response - FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailureReason.serializer, () -> ResponseVerbHandler.instance ), + FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailureReason.serializer, RESPONSE_HANDLER ), // dummy verbs _TRACE (30, P1, rpcTimeout, TRACING, () -> NoPayload.serializer, () -> null ), @@ -318,20 +375,30 @@ public enum Kind */ Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler) { - this(id, priority, expiration, stage, serializer, handler, null); + this(NORMAL, id, priority, expiration, stage, serializer, handler, null, null); + } + + Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Predicate isFinalReply) + { + this(NORMAL, id, priority, expiration, stage, serializer, handler, null, isFinalReply); } Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb) { - this(NORMAL, id, priority, expiration, stage, serializer, handler, responseVerb); + this(NORMAL, id, priority, expiration, stage, serializer, handler, responseVerb, null); } Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler) { - this(kind, id, priority, expiration, stage, serializer, handler, null); + this(kind, id, priority, expiration, stage, serializer, handler, null, null); } - Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb) + Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Predicate isFinalReply) + { + this(kind, id, priority, expiration, stage, serializer, handler, null, isFinalReply); + } + + Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb, Predicate isFinalReply) { this.stage = stage; if (id < 0) @@ -380,7 +447,7 @@ public long expiresAfterNanos() // this is a little hacky, but reduces the number of parameters up top public boolean isResponse() { - return handler.get() == ResponseVerbHandler.instance; + return handler == RESPONSE_HANDLER; } @VisibleForTesting @@ -534,3 +601,8 @@ class VerbTimeouts return rpcTimeout.applyAsLong(units); }; } + +class ResponseHandlerSupplier +{ + static final Supplier> RESPONSE_HANDLER = () -> ResponseVerbHandler.instance; +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/schema/ColumnMetadata.java b/src/java/org/apache/cassandra/schema/ColumnMetadata.java index e28312532be0..b00f34ed4cf2 100644 --- a/src/java/org/apache/cassandra/schema/ColumnMetadata.java +++ b/src/java/org/apache/cassandra/schema/ColumnMetadata.java @@ -19,7 +19,13 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import java.util.function.Predicate; import javax.annotation.Nonnull; @@ -29,24 +35,33 @@ import com.google.common.base.MoreObjects; import com.google.common.collect.Collections2; import com.google.common.collect.Lists; +import org.github.jamm.Unmetered; -import org.apache.cassandra.cql3.*; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.CqlBuilder; +import org.apache.cassandra.cql3.VariableSpecifications; import org.apache.cassandra.cql3.constraints.ColumnConstraint; import org.apache.cassandra.cql3.constraints.ColumnConstraints; import org.apache.cassandra.cql3.functions.masking.ColumnMask; import org.apache.cassandra.cql3.selection.Selectable; import org.apache.cassandra.cql3.selection.Selector; import org.apache.cassandra.cql3.selection.SimpleSelector; -import org.apache.cassandra.db.rows.*; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.ReversedType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.ColumnData; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.tcm.serialization.UDTAndFunctionsAwareMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.utils.ByteBufferUtil; -import org.github.jamm.Unmetered; import static org.apache.cassandra.db.TypeSizes.BOOL_SIZE; import static org.apache.cassandra.db.TypeSizes.sizeof; @@ -396,12 +411,12 @@ public boolean equals(Object o) private boolean equalsWithoutType(ColumnMetadata other) { return name.equals(other.name) - && kind == other.kind - && position == other.position - && ksName.equals(other.ksName) - && cfName.equals(other.cfName) - && Objects.equals(mask, other.mask) - && Objects.equals(columnConstraints, other.columnConstraints); + && kind == other.kind + && position == other.position + && ksName.equals(other.ksName) + && cfName.equals(other.cfName) + && Objects.equals(mask, other.mask) + && Objects.equals(columnConstraints, other.columnConstraints); } Optional compare(ColumnMetadata other) diff --git a/src/java/org/apache/cassandra/schema/Schema.java b/src/java/org/apache/cassandra/schema/Schema.java index afb987a7a0c1..24d76239d7b8 100644 --- a/src/java/org/apache/cassandra/schema/Schema.java +++ b/src/java/org/apache/cassandra/schema/Schema.java @@ -31,6 +31,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry; @@ -41,6 +42,7 @@ import org.apache.cassandra.exceptions.UnauthorizedException; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.locator.LocalStrategy; +import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.transformations.AlterSchema; @@ -73,10 +75,14 @@ public final class Schema implements SchemaProvider private static Schema initialize() { - Keyspaces initialLocal = ((FORCE_LOAD_LOCAL_KEYSPACES || isDaemonInitialized() || isToolInitialized())) - ? Keyspaces.of(SchemaKeyspace.metadata(), - SystemKeyspace.metadata()) - : Keyspaces.NONE; + Keyspaces initialLocal = Keyspaces.NONE; + + if (FORCE_LOAD_LOCAL_KEYSPACES || isDaemonInitialized() || isToolInitialized()) + { + initialLocal = Keyspaces.of(SchemaKeyspace.metadata(), SystemKeyspace.metadata()); + initialLocal = DatabaseDescriptor.getAccordTransactionsEnabled() ? initialLocal.with(AccordKeyspace.metadata()) : initialLocal; + } + Schema schema = new Schema(initialLocal); for (KeyspaceMetadata ks : schema.localKeyspaces) schema.localKeyspaceInstances.put(ks.name, new LazyVariable<>(() -> Keyspace.forSchema(ks.name, schema))); diff --git a/src/java/org/apache/cassandra/schema/SchemaConstants.java b/src/java/org/apache/cassandra/schema/SchemaConstants.java index 2323893c4b3b..f264c7442779 100644 --- a/src/java/org/apache/cassandra/schema/SchemaConstants.java +++ b/src/java/org/apache/cassandra/schema/SchemaConstants.java @@ -46,6 +46,7 @@ public final class SchemaConstants public static final String METADATA_KEYSPACE_NAME = "system_cluster_metadata"; public static final String TRACE_KEYSPACE_NAME = "system_traces"; + public static final String ACCORD_KEYSPACE_NAME = "system_accord"; public static final String AUTH_KEYSPACE_NAME = "system_auth"; public static final String DISTRIBUTED_KEYSPACE_NAME = "system_distributed"; @@ -58,7 +59,7 @@ public final class SchemaConstants /* system keyspace names (the ones with LocalStrategy replication strategy) */ public static final Set LOCAL_SYSTEM_KEYSPACE_NAMES = - ImmutableSet.of(SYSTEM_KEYSPACE_NAME, SCHEMA_KEYSPACE_NAME); + ImmutableSet.of(SYSTEM_KEYSPACE_NAME, SCHEMA_KEYSPACE_NAME, ACCORD_KEYSPACE_NAME); /* virtual table system keyspace names */ public static final Set VIRTUAL_SYSTEM_KEYSPACE_NAMES = diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index b3ed1c2702e1..ce8fa750b38c 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -343,7 +343,7 @@ static void saveSystemKeyspacesSchema() for (String schemaTable : ALL) { String query = String.format("DELETE FROM %s.%s USING TIMESTAMP ? WHERE keyspace_name = ?", SchemaConstants.SCHEMA_KEYSPACE_NAME, schemaTable); - for (String systemKeyspace : SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES) + for (String systemKeyspace : Schema.instance.localKeyspaces().names()) executeOnceInternal(query, timestamp, systemKeyspace); } diff --git a/src/java/org/apache/cassandra/schema/SchemaProvider.java b/src/java/org/apache/cassandra/schema/SchemaProvider.java index 0e34ee55095d..daef867951c2 100644 --- a/src/java/org/apache/cassandra/schema/SchemaProvider.java +++ b/src/java/org/apache/cassandra/schema/SchemaProvider.java @@ -18,6 +18,7 @@ package org.apache.cassandra.schema; +import java.nio.ByteBuffer; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -26,6 +27,7 @@ import java.util.UUID; import javax.annotation.Nullable; +import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.functions.Function; import org.apache.cassandra.cql3.functions.FunctionName; import org.apache.cassandra.cql3.functions.UserFunction; @@ -147,6 +149,22 @@ default TableMetadataRef getTableMetadataRef(String keyspace, String table) return getTableMetadata(keyspace, table).ref; } + @Nullable + default ColumnMetadata getColumnMetadata(String keyspace, String table, ColumnIdentifier name) + { + TableMetadata metadata = getTableMetadata(keyspace, table); + if (metadata == null) return null; + return metadata.getColumn(name); + } + + @Nullable + default ColumnMetadata getColumnMetadata(String keyspace, String table, ByteBuffer name) + { + TableMetadata metadata = getTableMetadata(keyspace, table); + if (metadata == null) return null; + return metadata.getColumn(name); + } + default TableMetadata getExistingTableMetadata(TableId id) throws UnknownTableException { TableMetadata metadata = getTableMetadata(id); diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index d6b7b141a94a..eaf2166bb053 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -28,6 +28,8 @@ import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.commons.lang3.ArrayUtils; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Pair; @@ -163,7 +165,15 @@ public void serialize(DataOutput out) throws IOException out.writeLong(id.getLeastSignificantBits()); } - public int serializedSize() + public int serialize(V dst, ValueAccessor accessor, int offset) + { + int position = offset; + position += accessor.putLong(dst, position, id.getMostSignificantBits()); + position += accessor.putLong(dst, position, id.getLeastSignificantBits()); + return position - offset; + } + + public final int serializedSize() { return 16; } @@ -173,6 +183,11 @@ public static TableId deserialize(DataInput in) throws IOException return new TableId(new UUID(in.readLong(), in.readLong())); } + public static TableId deserialize(V src, ValueAccessor accessor, int offset) throws IOException + { + return new TableId(new UUID(accessor.getLong(src, offset), accessor.getLong(src, offset + TypeSizes.LONG_SIZE))); + } + @Override public int compareTo(TableId o) { diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 268111abd68f..1c22d955b7d8 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -42,8 +42,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.cassandra.auth.DataResource; import org.apache.cassandra.config.DatabaseDescriptor; @@ -94,7 +92,7 @@ public class TableMetadata implements SchemaElement { public static final Serializer serializer = new Serializer(); - private static final Logger logger = LoggerFactory.getLogger(TableMetadata.class); + public static final String UNDEFINED_COLUMN_NAME_MESSAGE = "Undefined column name %s in table %s"; // Please note that currently the only one truly useful flag is COUNTER, as the rest of the flags were about // differencing between CQL tables and the various types of COMPACT STORAGE tables (pre-4.0). As those "compact" @@ -472,7 +470,7 @@ public ColumnMetadata getExistingColumn(ColumnIdentifier name) { ColumnMetadata def = getColumn(name); if (def == null) - throw new InvalidRequestException(format("Undefined column name %s in table %s", name.toCQLString(), this)); + throw new InvalidRequestException(format(UNDEFINED_COLUMN_NAME_MESSAGE, name.toCQLString(), this)); return def; } /* diff --git a/src/java/org/apache/cassandra/serializers/ListSerializer.java b/src/java/org/apache/cassandra/serializers/ListSerializer.java index 020abd2f4647..6bd688858a14 100644 --- a/src/java/org/apache/cassandra/serializers/ListSerializer.java +++ b/src/java/org/apache/cassandra/serializers/ListSerializer.java @@ -20,17 +20,20 @@ import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.function.Predicate; +import com.google.common.base.Preconditions; import com.google.common.collect.Range; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.utils.ByteBufferUtil; public class ListSerializer extends CollectionSerializer> { @@ -223,10 +226,29 @@ public Class> getType() } @Override - public ByteBuffer getSerializedValue(ByteBuffer collection, ByteBuffer key, AbstractType comparator) + public ByteBuffer getSerializedValue(ByteBuffer collection, ByteBuffer index, AbstractType comparator) { - // We don't allow selecting an element of a list, so we don't need this. - throw new UnsupportedOperationException(); + try + { + int n = readCollectionSize(collection, ByteBufferAccessor.instance); + // Start the offset after the (size of) the collection size we just read + int offset = sizeOfCollectionSize(); + int idx = ByteBufferUtil.toInt(index); + + Preconditions.checkElementIndex(idx, n); + + for (int i = 0; i <= idx; i++) + { + if (i == idx) + return readValue(collection, ByteBufferAccessor.instance, offset); + offset += skipValue(collection, ByteBufferAccessor.instance, offset); + } + throw new AssertionError("Asked to read index " + idx + " but never read the index"); + } + catch (BufferUnderflowException | IndexOutOfBoundsException e) + { + throw new MarshalException("Not enough bytes to read a list"); + } } @Override diff --git a/src/java/org/apache/cassandra/serializers/SetSerializer.java b/src/java/org/apache/cassandra/serializers/SetSerializer.java index 72a652d4a8e0..eb49e5c3d13c 100644 --- a/src/java/org/apache/cassandra/serializers/SetSerializer.java +++ b/src/java/org/apache/cassandra/serializers/SetSerializer.java @@ -116,7 +116,7 @@ public Set deserialize(V input, ValueAccessor accessor) l.add(elements.deserialize(value, accessor)); } if (!accessor.isEmptyFromOffset(input, offset)) - throw new MarshalException("Unexpected extraneous bytes after set value"); + throw new MarshalException("Unexpected extraneous bytes after set value" + l + "," + accessor.toHex(input)); return l; } catch (BufferUnderflowException | IndexOutOfBoundsException e) diff --git a/src/java/org/apache/cassandra/service/CASRequest.java b/src/java/org/apache/cassandra/service/CASRequest.java index 50ea5852a63a..f118dcf84722 100644 --- a/src/java/org/apache/cassandra/service/CASRequest.java +++ b/src/java/org/apache/cassandra/service/CASRequest.java @@ -17,10 +17,13 @@ */ package org.apache.cassandra.service; +import accord.primitives.Txn; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.transport.Dispatcher; @@ -34,17 +37,21 @@ public interface CASRequest /** * The command to use to fetch the value to compare for the CAS. */ - public SinglePartitionReadCommand readCommand(long nowInSec); + SinglePartitionReadCommand readCommand(long nowInSec); /** * Returns whether the provided CF, that represents the values fetched using the * readFilter(), match the CAS conditions this object stands for. */ - public boolean appliesTo(FilteredPartition current) throws InvalidRequestException; + boolean appliesTo(FilteredPartition current) throws InvalidRequestException; /** * The updates to perform of a CAS success. The values fetched using the readFilter() * are passed as argument. */ - public PartitionUpdate makeUpdates(FilteredPartition current, ClientState clientState, Ballot ballot) throws InvalidRequestException; + PartitionUpdate makeUpdates(FilteredPartition current, ClientState clientState, Ballot ballot) throws InvalidRequestException; + + Txn toAccordTxn(ClientState clientState, long nowInSecs); + + RowIterator toCasResult(TxnData data); } diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 5f9266728172..94eb38c96bc4 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -47,6 +47,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.primitives.Txn; import org.apache.cassandra.batchlog.Batch; import org.apache.cassandra.batchlog.BatchlogManager; import org.apache.cassandra.concurrent.DebuggableTask.RunnableDebuggableTask; @@ -58,6 +59,7 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.CounterMutation; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.EmptyIterators; import org.apache.cassandra.db.IMutation; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.MessageParams; @@ -122,6 +124,10 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.ContentionStrategy; @@ -152,6 +158,7 @@ import static com.google.common.collect.Iterables.concat; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.config.Config.LegacyPaxosStrategy.accord; import static org.apache.cassandra.db.ConsistencyLevel.SERIAL; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casWriteMetrics; @@ -322,9 +329,17 @@ public static RowIterator cas(String keyspaceName, key, keyspaceName, cfName)); } - return (Paxos.useV2() || keyspaceName.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) - ? Paxos.cas(key, request, consistencyForPaxos, consistencyForCommit, clientState) - : legacyCas(keyspaceName, cfName, key, request, consistencyForPaxos, consistencyForCommit, clientState, nowInSeconds, requestTime); + if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord) + { + TxnData data = AccordService.instance().coordinate(request.toAccordTxn(clientState, nowInSeconds), consistencyForPaxos); + return request.toCasResult(data); + } + else + { + return (Paxos.useV2() || keyspaceName.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) + ? Paxos.cas(key, request, consistencyForPaxos, consistencyForCommit, clientState) + : legacyCas(keyspaceName, cfName, key, request, consistencyForPaxos, consistencyForCommit, clientState, nowInSeconds, requestTime); + } } public static RowIterator legacyCas(String keyspaceName, @@ -1850,7 +1865,7 @@ public static PartitionIterator read(SinglePartitionReadCommand.Group group, Con } return consistencyLevel.isSerialConsistency() - ? readWithPaxos(group, consistencyLevel, requestTime) + ? readWithConsensus(group, consistencyLevel, requestTime) : readRegular(group, consistencyLevel, requestTime); } @@ -1866,6 +1881,19 @@ public static boolean hasJoined() return metadata.myNodeState() == NodeState.JOINED; } + private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException + { + if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord) + { + return readWithAccord(group, consistencyLevel); + } + else + { + return readWithPaxos(group, consistencyLevel, requestTime); + } + } + private static PartitionIterator readWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { @@ -1874,6 +1902,20 @@ private static PartitionIterator readWithPaxos(SinglePartitionReadCommand.Group : legacyReadWithPaxos(group, consistencyLevel, requestTime); } + private static PartitionIterator readWithAccord(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel) + { + if (group.queries.size() > 1) + throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time"); + TxnRead read = TxnRead.createSerialRead(group.queries.get(0)); + Txn txn = new Txn.InMemory(read.keys(), read, TxnQuery.ALL); + TxnData data = AccordService.instance().coordinate(txn, consistencyLevel); + FilteredPartition partition = data.get(TxnRead.SERIAL_READ); + if (partition != null) + return PartitionIterators.singletonIterator(partition.rowIterator()); + else + return EmptyIterators.partition(); + } + private static PartitionIterator legacyReadWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { @@ -2818,6 +2860,9 @@ private static Future submitHint(HintRunnable runnable) public Long getTruncateRpcTimeout() { return DatabaseDescriptor.getTruncateRpcTimeout(MILLISECONDS); } public void setTruncateRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setTruncateRpcTimeout(timeoutInMillis); } + public Long getTransactionTimeout() { return DatabaseDescriptor.getTransactionTimeout(MILLISECONDS); } + public void setTransactionTimeout(Long value) { DatabaseDescriptor.setTransactionTimeout(value); } + public Long getNativeTransportMaxConcurrentConnections() { return DatabaseDescriptor.getNativeTransportMaxConcurrentConnections(); } public void setNativeTransportMaxConcurrentConnections(Long nativeTransportMaxConcurrentConnections) { DatabaseDescriptor.setNativeTransportMaxConcurrentConnections(nativeTransportMaxConcurrentConnections); } diff --git a/src/java/org/apache/cassandra/service/StorageProxyMBean.java b/src/java/org/apache/cassandra/service/StorageProxyMBean.java index 395b49a84834..f89cdad95e70 100644 --- a/src/java/org/apache/cassandra/service/StorageProxyMBean.java +++ b/src/java/org/apache/cassandra/service/StorageProxyMBean.java @@ -21,7 +21,6 @@ import java.util.Map; import java.util.Set; - public interface StorageProxyMBean { public long getTotalHints(); @@ -52,6 +51,8 @@ public interface StorageProxyMBean public void setRangeRpcTimeout(Long timeoutInMillis); public Long getTruncateRpcTimeout(); public void setTruncateRpcTimeout(Long timeoutInMillis); + public Long getTransactionTimeout(); + public void setTransactionTimeout(Long timeoutInMillis); public void setNativeTransportMaxConcurrentConnections(Long nativeTransportMaxConcurrentConnections); public Long getNativeTransportMaxConcurrentConnections(); diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index bc914a9e55be..edc1cd0c96f6 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1258,6 +1258,17 @@ public long getTruncateRpcTimeout() return DatabaseDescriptor.getTruncateRpcTimeout(MILLISECONDS); } + public void setTransactionTimeout(long value) + { + DatabaseDescriptor.setTransactionTimeout(value); + logger.info("set transaction timeout to {} ms", value); + } + + public long getTransactionTimeout() + { + return DatabaseDescriptor.getTransactionTimeout(MILLISECONDS); + } + /** @deprecated See CASSANDRA-15234 */ @Deprecated(since = "4.1") public void setStreamThroughputMbPerSec(int value) diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index 74017b257995..6e8a2ad449f7 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -790,6 +790,9 @@ default int upgradeSSTables(String keyspaceName, boolean excludeCurrentVersion, public void setTruncateRpcTimeout(long value); public long getTruncateRpcTimeout(); + public void setTransactionTimeout(long value); + public long getTransactionTimeout(); + public void setStreamThroughputMbitPerSec(int value); /** * @return stream_throughput_outbound in megabits diff --git a/src/java/org/apache/cassandra/service/accord/AccordCallback.java b/src/java/org/apache/cassandra/service/accord/AccordCallback.java new file mode 100644 index 000000000000..60b5d6988a90 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCallback.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.coordinate.Timeout; +import accord.messages.Callback; +import accord.messages.Reply; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; + +class AccordCallback implements RequestCallback +{ + private static final Logger logger = LoggerFactory.getLogger(AccordCallback.class); + private final Callback callback; + + public AccordCallback(Callback callback) + { + this.callback = callback; + } + + @Override + public void onResponse(Message msg) + { + logger.debug("Received response {} from {}", msg.payload, msg.from()); + callback.onSuccess(EndpointMapping.endpointToId(msg.from()), msg.payload); + } + + private static Throwable convertReason(RequestFailureReason reason) + { + return reason == RequestFailureReason.TIMEOUT ? + new Timeout(null, null) : + new RuntimeException(reason.toString()); + } + + @Override + public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + { + logger.debug("Received failure {} from {} for {}", failureReason, from, callback); + // TODO (now): we should distinguish timeout failures with some placeholder Exception + callback.onFailure(EndpointMapping.endpointToId(from), convertReason(failureReason)); + } + + @Override + public boolean invokeOnFailure() + { + return true; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommand.java b/src/java/org/apache/cassandra/service/accord/AccordCommand.java new file mode 100644 index 000000000000..8020b29ac7a0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCommand.java @@ -0,0 +1,824 @@ +/* +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicInteger; + +import javax.annotation.Nullable; + +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Data; +import accord.api.Result; +import accord.api.RoutingKey; +import accord.local.Command; +import accord.local.CommandStore; +import accord.local.CommandListener; +import accord.local.Listeners; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.local.Status.Durability; +import accord.local.Status.Known; +import accord.primitives.Ballot; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Route; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.utils.DeterministicIdentitySet; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.async.AsyncContext; +import org.apache.cassandra.service.accord.store.StoredNavigableMap; +import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.service.accord.store.StoredValue; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; + +import static accord.local.Status.Durability.Local; +import static accord.local.Status.Durability.NotDurable; +import static accord.local.Status.PreApplied; +import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applyMapChanges; +import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applySetChanges; + +public class AccordCommand extends Command implements AccordState +{ + private static final Logger logger = LoggerFactory.getLogger(AccordCommand.class); + + private static final AtomicInteger INSTANCE_COUNTER = new AtomicInteger(0); + + private static final long EMPTY_SIZE = ObjectSizes.measure(new AccordCommand(null)); + + public static class WriteOnly extends AccordCommand implements AccordState.WriteOnly + { + private Future future = null; + + public WriteOnly(TxnId txnId) + { + super(txnId); + } + + @Override + public void future(Future future) + { + Preconditions.checkArgument(this.future == null); + this.future = future; + } + + @Override + public Future future() + { + return future; + } + + @Override + public void applyChanges(AccordCommand instance) + { + applySetChanges(this, instance, cmd -> cmd.waitingOnCommit); + applyMapChanges(this, instance, cmd -> cmd.waitingOnApply); + applySetChanges(this, instance, cmd -> cmd.blockingCommitOn); + applySetChanges(this, instance, cmd -> cmd.blockingApplyOn); + } + } + + private final TxnId txnId; + private final int instanceCount = INSTANCE_COUNTER.getAndIncrement(); + public final StoredValue> route; + public final StoredValue homeKey; + public final StoredValue progressKey; + public final StoredValue partialTxn; + public final StoredValue kind; // TODO: store this in TxnId + public final StoredValue promised; + public final StoredValue accepted; + public final StoredValue executeAt; + public final StoredValue partialDeps; + public final StoredValue writes; + public final StoredValue result; + + public final StoredValue.HistoryPreserving status; + public final StoredValue durability; + + public final StoredSet.Navigable waitingOnCommit; + public final StoredNavigableMap waitingOnApply; + public final StoredSet.Navigable blockingCommitOn; + public final StoredSet.Navigable blockingApplyOn; + + public final StoredSet.DeterministicIdentity storedListeners; + private final Listeners transientListeners; + + public AccordCommand(TxnId txnId) + { + logger.trace("Instantiating new command {} @ {}", txnId, instanceHash()); + this.txnId = txnId; + homeKey = new StoredValue<>(rw()); + progressKey = new StoredValue<>(rw()); + route = new StoredValue<>(rw()); + partialTxn = new StoredValue<>(rw()); + kind = new StoredValue<>(rw()); + promised = new StoredValue<>(rw()); + accepted = new StoredValue<>(rw()); + executeAt = new StoredValue<>(rw()); + partialDeps = new StoredValue<>(rw()); + writes = new StoredValue<>(rw()); + result = new StoredValue<>(rw()); + status = new StoredValue.HistoryPreserving<>(rw()); + durability = new StoredValue<>(rw()); + waitingOnCommit = new StoredSet.Navigable<>(rw()); + waitingOnApply = new StoredNavigableMap<>(rw()); + storedListeners = new StoredSet.DeterministicIdentity<>(rw()); + transientListeners = new Listeners(); + blockingCommitOn = new StoredSet.Navigable<>(rw()); + blockingApplyOn = new StoredSet.Navigable<>(rw()); + } + + @Override + public String toString() + { + return "AccordCommand{" + + "txnId=" + txnId + + ", instanceHash=" + instanceHash() + + ", status=" + status + + ", executeAt=" + executeAt + + ", promised=" + promised + + ", accepted=" + accepted + +// ", deps=" + deps + +// ", homeKey=" + homeKey + +// ", progressKey=" + progressKey + +// ", txn=" + txn + +// ", writes=" + writes + +// ", result=" + result + + // TODO: Should we have to check for isLoaded() here? + ", txn is null?=" + (!partialTxn.isLoaded() || partialTxn.get() == null) + + ", durability=" + durability + + ", waitingOnCommit=" + waitingOnCommit + + ", waitingOnApply=" + waitingOnApply + + ", storedListeners=" + storedListeners + + ", transientListeners=" + transientListeners + + ", blockingCommitOn=" + blockingCommitOn + + ", blockingApplyOn=" + blockingApplyOn + + '}'; + } + + @Override + public boolean isEmpty() + { + return homeKey.isEmpty() + || progressKey.isEmpty() + || route.isEmpty() + || partialTxn.isEmpty() + || promised.isEmpty() + || accepted.isEmpty() + || executeAt.isEmpty() + || partialDeps.isEmpty() + || writes.isEmpty() + || result.isEmpty() + || status.isEmpty() + || durability.isEmpty() + || waitingOnCommit.isEmpty() + || blockingCommitOn.isEmpty() + || waitingOnApply.isEmpty() + || blockingApplyOn.isEmpty() + || storedListeners.isEmpty(); + } + + public void setEmpty() + { + homeKey.setEmpty(); + progressKey.setEmpty(); + route.setEmpty(); + partialTxn.setEmpty(); + promised.setEmpty(); + accepted.setEmpty(); + executeAt.setEmpty(); + partialDeps.setEmpty(); + writes.setEmpty(); + result.setEmpty(); + status.setEmpty(); + durability.setEmpty(); + waitingOnCommit.setEmpty(); + blockingCommitOn.setEmpty(); + waitingOnApply.setEmpty(); + blockingApplyOn.setEmpty(); + storedListeners.setEmpty();; + } + + public AccordCommand initialize() + { + logger.trace("Initializing command {} @ {}", txnId, instanceHash()); + status.set(SaveStatus.NotWitnessed); + homeKey.set(null); + progressKey.set(null); + route.set(null); + partialTxn.set(null); + kind.set(null); + executeAt.load(null); + promised.set(Ballot.ZERO); + accepted.set(Ballot.ZERO); + partialDeps.set(PartialDeps.NONE); + writes.load(null); + result.load(null); + durability.set(Durability.NotDurable); + waitingOnCommit.load(new TreeSet<>()); + waitingOnApply.load(new TreeMap<>()); + blockingCommitOn.load(new TreeSet<>()); + blockingApplyOn.load(new TreeSet<>()); + storedListeners.load(new DeterministicIdentitySet<>()); + return this; + } + + @Override + public boolean isLoaded() + { + return homeKey.isLoaded() + && progressKey.isLoaded() + && route.isLoaded() + && partialTxn.isLoaded() + && promised.isLoaded() + && accepted.isLoaded() + && executeAt.isLoaded() + && partialDeps.isLoaded() + && writes.isLoaded() + && result.isLoaded() + && status.isLoaded() + && durability.isLoaded() + && waitingOnCommit.isLoaded() + && blockingCommitOn.isLoaded() + && waitingOnApply.isLoaded() + && blockingApplyOn.isLoaded() + && storedListeners.isLoaded(); + } + + public boolean isPartiallyLoaded() + { + return homeKey.isLoaded() + || progressKey.isLoaded() + || route.isLoaded() + || partialTxn.isLoaded() + || promised.isLoaded() + || accepted.isLoaded() + || executeAt.isLoaded() + || partialDeps.isLoaded() + || writes.isLoaded() + || result.isLoaded() + || status.isLoaded() + || durability.isLoaded() + || waitingOnCommit.isLoaded() + || blockingCommitOn.isLoaded() + || waitingOnApply.isLoaded() + || blockingApplyOn.isLoaded() + || storedListeners.isLoaded(); + } + + @Override + public boolean hasModifications() + { + return homeKey.hasModifications() + || progressKey.hasModifications() + || route.hasModifications() + || partialTxn.hasModifications() + || promised.hasModifications() + || accepted.hasModifications() + || executeAt.hasModifications() + || partialDeps.hasModifications() + || writes.hasModifications() + || result.hasModifications() + || status.hasModifications() + || durability.hasModifications() + || waitingOnCommit.hasModifications() + || blockingCommitOn.hasModifications() + || waitingOnApply.hasModifications() + || blockingApplyOn.hasModifications() + || storedListeners.hasModifications(); + } + + @Override + public void clearModifiedFlag() + { + logger.trace("Clearing modified flag on command {} @ {}", txnId, instanceHash()); + homeKey.clearModifiedFlag(); + progressKey.clearModifiedFlag(); + route.clearModifiedFlag(); + partialTxn.clearModifiedFlag(); + promised.clearModifiedFlag(); + accepted.clearModifiedFlag(); + executeAt.clearModifiedFlag(); + partialDeps.clearModifiedFlag(); + writes.clearModifiedFlag(); + result.clearModifiedFlag(); + status.clearModifiedFlag(); + durability.clearModifiedFlag(); + waitingOnCommit.clearModifiedFlag(); + blockingCommitOn.clearModifiedFlag(); + waitingOnApply.clearModifiedFlag(); + blockingApplyOn.clearModifiedFlag(); + storedListeners.clearModifiedFlag(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordCommand command = (AccordCommand) o; + return homeKey.equals(command.homeKey) + && progressKey.equals(command.progressKey) + && route.equals(command.route) + && txnId.equals(command.txnId) + && partialTxn.equals(command.partialTxn) + && promised.equals(command.promised) + && accepted.equals(command.accepted) + && executeAt.equals(command.executeAt) + && partialDeps.equals(command.partialDeps) + && writes.equals(command.writes) + && result.equals(command.result) + && status.equals(command.status) + && durability.equals(command.durability) + && waitingOnCommit.equals(command.waitingOnCommit) + && blockingCommitOn.equals(command.blockingCommitOn) + && waitingOnApply.equals(command.waitingOnApply) + && blockingApplyOn.equals(command.blockingApplyOn) + && storedListeners.equals(command.storedListeners) + && transientListeners.equals(command.transientListeners); + } + + boolean isReadOnly() + { + return false; + } + + private int instanceHash() + { +// return System.identityHashCode(this); + return instanceCount; + } + + @Override + public int hashCode() + { + return Objects.hash(txnId, + homeKey, + progressKey, + route, + partialTxn, + promised, + accepted, + executeAt, + partialDeps, + writes, + result, + status, + durability, + waitingOnCommit, + blockingCommitOn, + waitingOnApply, + blockingApplyOn, + storedListeners, + transientListeners); + } + + @Override + public TxnId key() + { + return txnId; + } + + @Override + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + size += AccordObjectSizes.timestamp(txnId); + size += homeKey.estimatedSizeOnHeap(AccordObjectSizes::key); + size += progressKey.estimatedSizeOnHeap(AccordObjectSizes::key); + size += route.estimatedSizeOnHeap(AccordObjectSizes::route); + size += partialTxn.estimatedSizeOnHeap(AccordObjectSizes::txn); + size += promised.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += accepted.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += executeAt.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += partialDeps.estimatedSizeOnHeap(AccordObjectSizes::dependencies); + size += writes.estimatedSizeOnHeap(AccordObjectSizes::writes); + size += result.estimatedSizeOnHeap(r -> ((TxnData) r).estimatedSizeOnHeap()); + size += status.estimatedSizeOnHeap(s -> 0); + size += durability.estimatedSizeOnHeap(s -> 0); + size += waitingOnCommit.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += blockingCommitOn.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += waitingOnApply.estimatedSizeOnHeap(AccordObjectSizes::timestamp, AccordObjectSizes::timestamp); + size += blockingApplyOn.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += storedListeners.estimatedSizeOnHeap(ListenerProxy::estimatedSizeOnHeap); + return size; + } + + public boolean shouldUpdateDenormalizedWaitingOn() + { + if (blockingCommitOn.getView().isEmpty() && blockingApplyOn.getView().isEmpty()) + return false; + return AccordPartialCommand.serializer.needsUpdate(this); + } + + @Override + public TxnId txnId() + { + return txnId; + } + + @Override + public RoutingKey homeKey() + { + return homeKey.get(); + } + + @Override + protected void setHomeKey(RoutingKey key) + { + homeKey.set(key); + } + + @Override + public RoutingKey progressKey() + { + return progressKey.get(); + } + + @Override + protected void setProgressKey(RoutingKey key) + { + progressKey.set(key); + } + + @Override + public Route route() + { + return route.get(); + } + + @Override + protected void setRoute(Route newRoute) + { + route.set(newRoute); + } + + @Override + public PartialTxn partialTxn() + { + return partialTxn.get(); + } + + @Override + public void setPartialTxn(PartialTxn txn) + { + this.partialTxn.set(txn); + //TODO remove. This was added to fix tests after Partial Replication was added, this was added for tests + this.kind.set(txn.kind()); + } + + @Override + public Ballot promised() + { + return promised.get(); + } + + @Override + public void setPromised(Ballot ballot) + { + this.promised.set(ballot); + } + + @Override + public Ballot accepted() + { + return accepted.get(); + } + + @Override + public void setAccepted(Ballot ballot) + { + this.accepted.set(ballot); + } + + @Override + public Timestamp executeAt() + { + return executeAt.get(); + } + + @Override + public void setExecuteAt(Timestamp timestamp) + { + Preconditions.checkState(!status().hasBeen(Status.Committed) || executeAt().equals(timestamp)); + this.executeAt.set(timestamp); + } + + @Override + public PartialDeps partialDeps() + { + return partialDeps.get(); + } + + @Override + public void setPartialDeps(PartialDeps deps) + { + this.partialDeps.set(deps); + } + + @Override + public Writes writes() + { + return writes.get(); + } + + @Override + public void setWrites(Writes writes) + { + this.writes.set(writes); + } + + @Override + public Result result() + { + return result.get(); + } + + @Override + public void setResult(Result result) + { + this.result.set(result); + } + + @Override + public SaveStatus saveStatus() + { + return status.get(); + } + + @Override + public void setSaveStatus(SaveStatus status) + { + this.status.set(status); + } + + @Override + public void setStatus(Status status) + { + super.setStatus(status); + } + + @Override + public Known known() + { + return this.status.get().known; + } + + @Override + public Durability durability() + { + Durability durability = this.durability.get(); + if (status().hasBeen(PreApplied) && durability == NotDurable) + return Local; // not necessary anywhere, but helps for logical consistency + return durability; + } + + @Override + public void setDurability(Durability v) + { + durability.set(v); + } + + @Override + protected void postApply(SafeCommandStore safeStore) + { + AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); + cache.cleanupWriteFuture(txnId); + super.postApply(safeStore); + } + + private boolean canApplyWithCurrentScope(SafeCommandStore safeStore) + { + Ranges ranges = safeStore.ranges().at(executeAt().epoch()); + Seekables keys = partialTxn().keys(); + for (int i=0,mi=keys.size(); i applyWithCorrectScope(CommandStore unsafeStore) + { + TxnId txnId = txnId(); + AsyncPromise promise = new AsyncPromise<>(); + unsafeStore.execute(this, safeStore -> { + AccordCommand command = (AccordCommand) safeStore.command(txnId); + command.apply(safeStore, false).addCallback((v, throwable) -> { + if (throwable != null) + promise.tryFailure(throwable); + else + promise.trySuccess(null); + }); + }); + return promise; + } + + private Future apply(SafeCommandStore safeStore, boolean canReschedule) + { + AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); + Future future = cache.getWriteFuture(txnId); + if (future != null) + return future; + + // this can be called via a listener callback, in which case we won't + // have the appropriate commandsForKey in scope, so start a new operation + // with the correct scope and notify the caller when that completes + if (!canApplyWithCurrentScope(safeStore)) + { + Preconditions.checkArgument(canReschedule); + return applyWithCorrectScope(safeStore.commandStore()); + } + + future = super.apply(safeStore); + cache.setWriteFuture(txnId, future); + return future; + } + + @Override + public Future apply(SafeCommandStore safeStore) + { + return apply(safeStore, true); + } + + @Override + public Future read(SafeCommandStore safeStore) + { + AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); + Future future = cache.getReadFuture(txnId); + if (future != null) + return future; + future = super.read(safeStore); + cache.setReadFuture(txnId, future); + return future; + } + + private CommandListener maybeWrapListener(CommandListener listener) + { + if (listener.isTransient()) + return listener; + + if (listener instanceof AccordCommand) + return new ListenerProxy.CommandListenerProxy(((AccordCommand) listener).txnId()); + + if (listener instanceof AccordCommandsForKey) + return new ListenerProxy.CommandsForKeyListenerProxy(((AccordCommandsForKey) listener).key()); + + //TODO - Support accord.messages.Defer + + throw new RuntimeException("Unhandled non-transient listener: " + listener); + } + + @Override + public Command addListener(CommandListener listener) + { + listener = maybeWrapListener(listener); + if (listener instanceof ListenerProxy) + storedListeners.blindAdd((ListenerProxy) listener); + else + transientListeners.add(listener); + return this; + } + + @Override + public void removeListener(CommandListener listener) + { + listener = maybeWrapListener(listener); + if (listener instanceof ListenerProxy) + storedListeners.blindRemove((ListenerProxy) listener); + else + transientListeners.remove(listener); + } + + public boolean hasListenerFor(TxnId txnId) + { + return storedListeners.getView().contains(new ListenerProxy.CommandListenerProxy(txnId)); + } + + @Override + public void notifyListeners(SafeCommandStore safeStore) + { + // TODO: efficiency (introduce BiConsumer method) + storedListeners.getView().forEach(l -> l.onChange(safeStore, this)); + transientListeners.forEach(listener -> { + PreLoadContext ctx = listener.listenerPreLoadContext(txnId()); + AsyncContext context = ((SafeAccordCommandStore)safeStore).context(); + if (context.containsScopedItems(ctx)) + { + logger.trace("{}: synchronously updating listener {}", txnId(), listener); + listener.onChange(safeStore, this); + } + else + { + logger.trace("{}: asynchronously updating listener {}", txnId(), listener); + safeStore.execute(ctx, reSafeStore -> { + listener.onChange(reSafeStore, reSafeStore.command(txnId())); + }); + } + }); + } + + @Override + public void addWaitingOnCommit(TxnId txnId) + { + waitingOnCommit.blindAdd(txnId); + } + + public boolean isWaitingOnCommit() + { + return !waitingOnCommit.getView().isEmpty(); + } + + @Override + public void removeWaitingOnCommit(TxnId txnId) + { + waitingOnCommit.blindRemove(txnId); + } + + @Override + public TxnId firstWaitingOnCommit() + { + if (!isWaitingOnCommit()) + return null; + return waitingOnCommit.getView().first(); + } + + @Override + public void addWaitingOnApplyIfAbsent(TxnId txnId, Timestamp executeAt) + { + waitingOnApply.blindPut(executeAt, txnId); + } + + public boolean isWaitingOnApply() + { + return !waitingOnApply.getView().isEmpty(); + } + + @Override + public void removeWaitingOn(TxnId txnId, Timestamp executeAt) + { + waitingOnCommit.blindRemove(txnId); + waitingOnApply.blindRemove(executeAt, txnId); + } + + @Override + public boolean isWaitingOnDependency() + { + return isWaitingOnCommit() || isWaitingOnApply(); + } + + @Override + public TxnId firstWaitingOnApply(@Nullable TxnId ifExecutesBefore) + { + if (!isWaitingOnApply()) + return null; + + Map.Entry first = waitingOnApply.getView().firstEntry(); + if (ifExecutesBefore == null || first.getKey().compareTo(ifExecutesBefore) < 0) + return first.getValue(); + + return null; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java new file mode 100644 index 000000000000..87c9c752d9b9 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -0,0 +1,490 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Comparator; +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import java.util.function.BinaryOperator; +import java.util.function.Consumer; +import java.util.function.Function; + +import javax.annotation.Nullable; + +import accord.api.Agent; +import accord.api.DataStore; +import accord.api.Key; +import accord.api.ProgressLog; +import accord.impl.CommandsForKey; +import accord.local.Command; +import accord.local.CommandListener; +import accord.local.CommandStore; +import accord.local.CommandStores.RangesForEpoch; +import accord.local.CommandStores.RangesForEpochHolder; +import accord.local.NodeTimeService; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.local.Status; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Routables; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.AbstractKeys; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.async.AsyncContext; +import org.apache.cassandra.service.accord.async.AsyncOperation; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; + +public class AccordCommandStore extends CommandStore +{ + public class SafeAccordCommandStore implements SafeCommandStore + { + final RangesForEpoch rangesForEpoch; + final AsyncContext context; + + SafeAccordCommandStore(RangesForEpoch rangesForEpoch, AsyncContext context) + { + this.rangesForEpoch = rangesForEpoch; + this.context = context; + } + + public AsyncContext context() + { + return context; + } + + @Override + public Command command(TxnId txnId) + { + AccordCommand command = getCommandInternal(txnId); + if (command.isEmpty()) + command.initialize(); + return command; + } + + @Override + public Command ifPresent(TxnId txnId) + { + AccordCommand command = getCommandInternal(txnId); + return !command.isEmpty() ? command : null; + } + + @Override + public Command ifLoaded(TxnId txnId) + { + AccordCommand command = commandCache.getOrNull(txnId); + if (command != null && command.isLoaded()) + { + getContext().commands.add(command); + return command; + } + return null; + } + + public T mapReduce(Routables keysOrRanges, Function map, BinaryOperator reduce, T initialValue) + { + switch (keysOrRanges.domain()) { + default: + throw new AssertionError(); + case Key: + AbstractKeys keys = (AbstractKeys) keysOrRanges; + return keys.stream() + .map(this::commandsForKey) + .map(map) + .reduce(initialValue, reduce); + case Range: + // TODO: implement + throw new UnsupportedOperationException(); + } + } + + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + { + switch (keysOrRanges.domain()) { + default: + throw new AssertionError(); + case Key: + // TODO: efficiency + AbstractKeys keys = (AbstractKeys) keysOrRanges; + for (Key key : keys) + { + if (!slice.contains(key)) continue; + CommandsForKey forKey = commandsForKey(key); + accumulate = map.apply(forKey, accumulate); + if (accumulate.equals(terminalValue)) + return accumulate; + } + break; + case Range: + // TODO (required): implement + throw new UnsupportedOperationException(); + } + return accumulate; + } + + @Override + public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) + { + accumulate = mapReduceForKey(keysOrRanges, slice, (forKey, prev) -> { + CommandsForKey.CommandTimeseries timeseries; + switch (testTimestamp) + { + default: throw new AssertionError(); + case STARTED_AFTER: + case STARTED_BEFORE: + timeseries = forKey.byId(); + break; + case EXECUTES_AFTER: + case MAY_EXECUTE_BEFORE: + timeseries = forKey.byExecuteAt(); + } + CommandsForKey.CommandTimeseries.TestTimestamp remapTestTimestamp; + switch (testTimestamp) + { + default: throw new AssertionError(); + case STARTED_AFTER: + case EXECUTES_AFTER: + remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.AFTER; + break; + case STARTED_BEFORE: + case MAY_EXECUTE_BEFORE: + remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.BEFORE; + } + return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminalValue); + }, accumulate, terminalValue); + + return accumulate; + } + + @Override + public void register(Seekables keysOrRanges, Ranges slice, Command command) + { + // TODO (required): support ranges + Routables.foldl((Keys)keysOrRanges, slice, (k, v, i) -> { commandsForKey(k).register(command); return v; }, null); + } + + @Override + public void register(Seekable keyOrRange, Ranges slice, Command command) + { + // TODO (required): support ranges + Key key = (Key) keyOrRange; + if (slice.contains(key)) + commandsForKey(key).register(command); + } + + public AccordCommandsForKey commandsForKey(Key key) + { + AccordCommandsForKey commandsForKey = getCommandsForKeyInternal(key); + if (commandsForKey.isEmpty()) + commandsForKey.initialize(); + return commandsForKey; + } + + public AccordCommandsForKey maybeCommandsForKey(Key key) + { + AccordCommandsForKey commandsForKey = getCommandsForKeyInternal(key); + return !commandsForKey.isEmpty() ? commandsForKey : null; + } + + @Override + public void addAndInvokeListener(TxnId txnId, CommandListener listener) + { + AccordCommand.WriteOnly command = (AccordCommand.WriteOnly) getContext().commands.getOrCreateWriteOnly(txnId, (ignore, id) -> new AccordCommand.WriteOnly(id), commandStore()); + command.addListener(listener); + execute(listener.listenerPreLoadContext(txnId), store -> { + listener.onChange(store, store.command(txnId)); + }); + } + + @Override + public AccordCommandStore commandStore() + { + return AccordCommandStore.this; + } + + @Override + public DataStore dataStore() + { + return dataStore; + } + + @Override + public Agent agent() + { + return agent; + } + + @Override + public ProgressLog progressLog() + { + return progressLog; + } + + @Override + public RangesForEpoch ranges() + { + return rangesForEpoch; + } + + @Override + public long latestEpoch() + { + return time.epoch(); + } + + @Override + public Timestamp preaccept(TxnId txnId, Seekables keys) + { + Timestamp max = maxConflict(keys); + long epoch = latestEpoch(); + if (txnId.compareTo(max) > 0 && txnId.epoch() >= epoch && !agent.isExpired(txnId, time.now())) + return txnId; + + return time.uniqueNow(max); + } + + @Override + public Future execute(PreLoadContext context, Consumer consumer) + { + return AccordCommandStore.this.execute(context, consumer); + } + + @Override + public Future submit(PreLoadContext context, Function function) + { + return AccordCommandStore.this.submit(context, function); + } + + @Override + public NodeTimeService time() + { + return time; + } + + public Timestamp maxConflict(Seekables keys) + { + // TODO: Seekables + // TODO: efficiency + return ((Keys)keys).stream() + .map(this::maybeCommandsForKey) + .filter(Objects::nonNull) + .map(CommandsForKey::max) + .max(Comparator.naturalOrder()) + .orElse(Timestamp.NONE); + } + } + + private static long getThreadId(ExecutorService executor) + { + try + { + return executor.submit(() -> Thread.currentThread().getId()).get(); + } + catch (InterruptedException e) + { + throw new AssertionError(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + } + + private final long threadId; + public final String loggingId; + private final ExecutorService executor; + private final AccordStateCache stateCache; + private final AccordStateCache.Instance commandCache; + private final AccordStateCache.Instance commandsForKeyCache; + private AsyncContext currentCtx = null; + private long lastSystemTimestampMicros = Long.MIN_VALUE; + + private final NodeTimeService time; + private final Agent agent; + private final DataStore dataStore; + private final ProgressLog progressLog; + private final RangesForEpochHolder rangesForEpochHolder; + + public AccordCommandStore(int id, + NodeTimeService time, + Agent agent, + DataStore dataStore, + ProgressLog.Factory progressLogFactory, + RangesForEpochHolder rangesForEpoch) + { + super(id); + this.time = time; + this.agent = agent; + this.dataStore = dataStore; + this.progressLog = progressLogFactory.create(this); + this.rangesForEpochHolder = rangesForEpoch; + this.loggingId = String.format("[%s]", id); + this.executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); + this.threadId = getThreadId(this.executor); + this.stateCache = new AccordStateCache(0); + this.commandCache = stateCache.instance(TxnId.class, + AccordCommand.class, + AccordCommand::new); + this.commandsForKeyCache = stateCache.instance(PartitionKey.class, + AccordCommandsForKey.class, + key -> new AccordCommandsForKey(this, key)); + } + + void setCacheSize(long bytes) + { + checkInStoreThread(); + stateCache.setMaxSize(bytes); + } + + public SafeAccordCommandStore safeStore(AsyncContext context) + { + return new SafeAccordCommandStore(rangesForEpochHolder.get(), context); + } + + public void checkInStoreThread() + { + Invariants.checkState(Thread.currentThread().getId() == threadId); + } + + public void checkNotInStoreThread() + { + Invariants.checkState(Thread.currentThread().getId() != threadId); + } + + public ExecutorService executor() + { + return executor; + } + + public AccordStateCache.Instance commandCache() + { + return commandCache; + } + + public AccordStateCache.Instance commandsForKeyCache() + { + return commandsForKeyCache; + } + + public void setContext(AsyncContext context) + { + Invariants.checkState(currentCtx == null); + currentCtx = context; + } + + public AsyncContext getContext() + { + Invariants.checkState(currentCtx != null); + return currentCtx; + } + + public void unsetContext(AsyncContext context) + { + Invariants.checkState(currentCtx == context); + currentCtx = null; + } + + public long nextSystemTimestampMicros() + { + lastSystemTimestampMicros = Math.max(TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()), lastSystemTimestampMicros + 1); + return lastSystemTimestampMicros; + } + + private AccordCommand getCommandInternal(TxnId txnId) + { + Invariants.checkState(currentCtx != null); + AccordCommand command = currentCtx.commands.get(txnId); + if (command == null) + throw new IllegalArgumentException("No command in context for txnId " + txnId); + Invariants.checkState(command.isLoaded() || (command.isReadOnly() && command.isPartiallyLoaded())); + return command; + } + + public boolean isCommandsForKeyInContext(PartitionKey key) + { + return currentCtx.commandsForKey.get(key) != null; + } + + private AccordCommandsForKey getCommandsForKeyInternal(Key key) + { + Objects.requireNonNull(currentCtx, "current context"); + if (!(key instanceof PartitionKey)) + throw new IllegalArgumentException("Attempted to use non-PartitionKey; given " + key.getClass()); + AccordCommandsForKey commandsForKey = currentCtx.commandsForKey.get((PartitionKey) key); + if (commandsForKey == null) + throw new IllegalArgumentException("No commandsForKey in context for key " + key); + Invariants.checkState(commandsForKey.isLoaded()); + return commandsForKey; + } + + @Override + public Future submit(PreLoadContext loadCtx, Function function) + { + AsyncOperation operation = AsyncOperation.create(this, loadCtx, function); + executor.execute(operation); + return operation; + } + + @Override + public Agent agent() + { + return agent; + } + + @Override + public Future execute(PreLoadContext preLoadContext, Consumer consumer) + { + AsyncOperation operation = AsyncOperation.create(this, preLoadContext, consumer); + executor.execute(operation); + return operation; + } + + public void executeBlocking(Runnable runnable) + { + try + { + executor.submit(runnable).get(); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + } + + @Override + public void shutdown() + { + executor.shutdown(); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java new file mode 100644 index 000000000000..14dd0851c294 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.api.Agent; +import accord.api.DataStore; +import accord.api.ProgressLog; +import accord.local.AsyncCommandStores; +import accord.local.NodeTimeService; +import accord.local.ShardDistributor; +import accord.topology.Topology; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; + +public class AccordCommandStores extends AsyncCommandStores +{ + private long cacheSize; + AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, + ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory) + { + super(time, agent, store, shardDistributor, progressLogFactory, AccordCommandStore::new); + setCacheSize(maxCacheSize()); + } + + synchronized void setCacheSize(long bytes) + { + cacheSize = bytes; + refreshCacheSizes(); + } + + synchronized void refreshCacheSizes() + { + if (count() == 0) + return; + long perStore = cacheSize / count(); + // TODO (low priority, safety): we might transiently breach our limit if we increase one store before decreasing another + forEach(commandStore -> ((SafeAccordCommandStore) commandStore).commandStore().setCacheSize(perStore)); + } + + private static long maxCacheSize() + { + return 5 << 20; // TODO (required): make configurable + } + + @Override + public synchronized void updateTopology(Topology newTopology) + { + super.updateTopology(newTopology); + refreshCacheSizes(); + } + + @Override + public synchronized void shutdown() + { + super.shutdown(); + //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java new file mode 100644 index 000000000000..c58199139191 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java @@ -0,0 +1,433 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.Objects; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Command; +import accord.local.CommandStore; +import accord.impl.CommandsForKey; +import accord.local.SafeCommandStore; +import accord.local.SafeCommandStore.TestDep; +import accord.local.SafeCommandStore.TestKind; +import accord.local.Status; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.store.StoredLong; +import org.apache.cassandra.service.accord.store.StoredNavigableMap; +import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.service.accord.store.StoredValue; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.concurrent.Future; + +import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; +import static accord.local.SafeCommandStore.TestDep.WITH; +import static accord.local.SafeCommandStore.TestKind.Ws; +import static accord.local.Status.KnownDeps.DepsUnknown; +import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applyMapChanges; +import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applySetChanges; + +public class AccordCommandsForKey extends CommandsForKey implements AccordState +{ + private static final Logger logger = LoggerFactory.getLogger(AccordCommandsForKey.class); + + private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new AccordCommandsForKey(null, null)); + + public static class Defaults + { + public static final Timestamp maxTimestamp = Timestamp.NONE; + public static final Timestamp lastExecutedTimestamp = Timestamp.NONE; + public static final Timestamp lastWriteTimestamp = Timestamp.NONE; + public static final long lastExecutedMicros = 0; + } + + public static class WriteOnly extends AccordCommandsForKey implements AccordState.WriteOnly + { + private Future future = null; + + public WriteOnly(AccordCommandStore commandStore, PartitionKey key) + { + super(commandStore, key); + } + + @Override + public void future(Future future) + { + Preconditions.checkArgument(this.future == null); + this.future = future; + + } + + @Override + public Future future() + { + return future; + } + + @Override + public void applyChanges(AccordCommandsForKey instance) + { + applySetChanges(this, instance, cfk -> cfk.blindWitnessed); + applyMapChanges(this, instance, cfk -> cfk.byId.map); + applyMapChanges(this, instance, cfk -> cfk.byExecuteAt.map); + } + } + + public enum SeriesKind + { + BY_ID, BY_EXECUTE_AT; + } + + public class Series implements CommandTimeseries + { + public final SeriesKind kind; + public final StoredNavigableMap map; + + public Series(ReadWrite readWrite, SeriesKind kind) + { + this.kind = kind; + map = new StoredNavigableMap<>(readWrite); + } + + @Override + public void add(Timestamp timestamp, Command command) + { + map.blindPut(timestamp, AccordPartialCommand.serializer.serialize(new AccordPartialCommand(key, command))); + } + + @Override + public void remove(Timestamp timestamp) + { + map.blindRemove(timestamp); + } + + private Stream idsToCommands(Collection blobs) + { + return blobs.stream().map(blob -> AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, blob)); + } + + @Override + public boolean isEmpty() + { + return map.getView().isEmpty(); + } + + public T mapReduce(TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, + TestDep testDep, @Nullable TxnId depId, + @Nullable Status minStatus, @Nullable Status maxStatus, + SafeCommandStore.CommandFunction map, T initialValue, T terminalValue) + { + + for (ByteBuffer buffer : (testTimestamp == TestTimestamp.BEFORE ? this.map.getView().headMap(timestamp, false) : this.map.getView().tailMap(timestamp, false)).values()) + { + AccordPartialCommand cmd = AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, buffer); + if (testKind == Ws && cmd.txnId().isRead()) continue; + // If we don't have any dependencies, we treat a dependency filter as a mismatch + if (testDep != ANY_DEPS && (cmd.known().deps == DepsUnknown || (cmd.deps().contains(depId) != (testDep == WITH)))) + continue; + if (minStatus != null && minStatus.compareTo(cmd.status()) > 0) + continue; + if (maxStatus != null && maxStatus.compareTo(cmd.status()) < 0) + continue; + initialValue = map.apply(key, cmd.txnId(), cmd.executeAt(), initialValue); + if (initialValue.equals(terminalValue)) + break; + } + return initialValue; + } + + @VisibleForTesting + public Stream all() + { + return idsToCommands(map.getView().values()); + } + + public AccordPartialCommand get(Timestamp timestamp) + { + ByteBuffer blob = map.getView().get(timestamp); + if (blob == null) + return null; + return AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, blob); + } + } + + private final AccordCommandStore commandStore; + private final PartitionKey key; + public final StoredValue maxTimestamp; + public final StoredValue lastExecutedTimestamp; + public final StoredLong lastExecutedMicros; + public final StoredValue lastWriteTimestamp; + public final StoredSet.Navigable blindWitnessed; + public final Series byId; + public final Series byExecuteAt; + + public AccordCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + this.commandStore = commandStore; + this.key = key; + maxTimestamp = new StoredValue<>(rw()); + lastExecutedTimestamp = new StoredValue<>(rw()); + lastExecutedMicros = new StoredLong(rw()); + lastWriteTimestamp = new StoredValue<>(rw()); + blindWitnessed = new StoredSet.Navigable<>(rw()); + byId = new Series(rw(), SeriesKind.BY_ID); + byExecuteAt = new Series(rw(), SeriesKind.BY_EXECUTE_AT); + } + + @Override + public boolean isEmpty() + { + return maxTimestamp.isEmpty() + && lastExecutedTimestamp.isEmpty() + && lastExecutedMicros.isEmpty() + && lastWriteTimestamp.isEmpty() + && blindWitnessed.isEmpty() + && byId.map.isEmpty() + && byExecuteAt.map.isEmpty(); + } + + public void setEmpty() + { + maxTimestamp.setEmpty(); + lastExecutedTimestamp.setEmpty(); + lastExecutedMicros.setEmpty(); + lastWriteTimestamp.setEmpty(); + blindWitnessed.setEmpty(); + byId.map.setEmpty(); + byExecuteAt.map.setEmpty(); + } + + public AccordCommandsForKey initialize() + { + maxTimestamp.set(Defaults.maxTimestamp); + lastExecutedTimestamp.load(Defaults.lastExecutedTimestamp); + lastExecutedMicros.load(Defaults.lastExecutedMicros); + lastWriteTimestamp.load(Defaults.lastWriteTimestamp); + blindWitnessed.load(new TreeSet<>()); + byId.map.load(new TreeMap<>()); + byExecuteAt.map.load(new TreeMap<>()); + return this; + } + + @Override + public boolean hasModifications() + { + return maxTimestamp.hasModifications() + || lastExecutedTimestamp.hasModifications() + || lastExecutedMicros.hasModifications() + || lastWriteTimestamp.hasModifications() + || blindWitnessed.hasModifications() + || byId.map.hasModifications() + || byExecuteAt.map.hasModifications(); + } + + @Override + public void clearModifiedFlag() + { + maxTimestamp.clearModifiedFlag(); + lastExecutedTimestamp.clearModifiedFlag(); + lastExecutedMicros.clearModifiedFlag(); + lastWriteTimestamp.clearModifiedFlag(); + blindWitnessed.clearModifiedFlag(); + byId.map.clearModifiedFlag(); + byExecuteAt.map.clearModifiedFlag(); + } + + @Override + public boolean isLoaded() + { + return maxTimestamp.isLoaded() + && lastExecutedTimestamp.isLoaded() + && lastExecutedMicros.isLoaded() + && lastWriteTimestamp.isLoaded() + && blindWitnessed.isLoaded() + && byId.map.isLoaded() + && byExecuteAt.map.isLoaded(); + } + + public CommandStore commandStore() + { + return commandStore; + } + + @Override + public PartitionKey key() + { + return key; + } + + @Override + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + size += maxTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += lastExecutedTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += lastExecutedMicros.estimatedSizeOnHeap(); + size += lastWriteTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += blindWitnessed.estimatedSizeOnHeap(AccordObjectSizes::timestamp); + size += byId.map.estimatedSizeOnHeap(AccordObjectSizes::timestamp, ByteBufferUtil::estimatedSizeOnHeap); + size += byExecuteAt.map.estimatedSizeOnHeap(AccordObjectSizes::timestamp, ByteBufferUtil::estimatedSizeOnHeap); + return size; + } + + @Override + public Series byId() + { + return byId; + } + + @Override + public Series byExecuteAt() + { + return byExecuteAt; + } + + @Override + public Timestamp max() + { + return maxTimestamp.get(); + } + + @Override + public void updateMax(Timestamp timestamp) + { + if (isFullInstance()) + { + if (maxTimestamp.get().compareTo(timestamp) >= 0) + return; + maxTimestamp.set(timestamp); + } + else + { + Preconditions.checkState(isWriteOnlyInstance()); + blindWitnessed.blindAdd(timestamp); + } + } + + public void applyBlindWitnessedTimestamps() + { + if (isEmpty() || blindWitnessed.getView().isEmpty()) + return; + + logger.trace("Applying blind witnessed timestamps for {}: {}", key(), blindWitnessed.getView()); + blindWitnessed.getView().forEach(this::updateMax); + blindWitnessed.clear(); + } + + public void updateSummaries(AccordCommand command) + { + ByteBuffer partialCommand = AccordPartialCommand.serializer.serialize(new AccordPartialCommand(key, command)); + byId.map.blindPut(command.txnId(), partialCommand); + byExecuteAt.map.blindPut(command.executeAt(), partialCommand); + } + + private static long getTimestampMicros(Timestamp timestamp) + { + return timestamp.hlc(); + } + + private void maybeUpdatelastTimestamp(Timestamp executeAt, boolean isForWriteTxn) + { + Timestamp lastWrite = lastWriteTimestamp.get(); + + if (executeAt.compareTo(lastWrite) < 0) + throw new IllegalArgumentException(String.format("%s is less than the most recent write timestamp %s", executeAt, lastWrite)); + + Timestamp lastExecuted = lastExecutedTimestamp.get(); + int cmp = executeAt.compareTo(lastExecuted); + // execute can be in the past if it's for a read and after the most recent write + if (cmp == 0 || (!isForWriteTxn && cmp < 0)) + return; + if (cmp < 0) + throw new IllegalArgumentException(String.format("%s is less than the most recent executed timestamp %s", executeAt, lastExecuted)); + + long micros = getTimestampMicros(executeAt); + long lastMicros = lastExecutedMicros.get(); + lastExecutedTimestamp.set(executeAt); + lastExecutedMicros.set(Math.max(micros, lastMicros + 1)); + if (isForWriteTxn) + lastWriteTimestamp.set(executeAt); + } + + public int nowInSecondsFor(Timestamp executeAt, boolean isForWriteTxn) + { + maybeUpdatelastTimestamp(executeAt, isForWriteTxn); + // we use the executeAt time instead of the monotonic database timestamp to prevent uneven + // ttl expiration in extreme cases, ie 1M+ writes/second to a key causing timestamps to overflow + // into the next second on some keys and not others. + return Math.toIntExact(TimeUnit.MICROSECONDS.toSeconds(getTimestampMicros(lastExecutedTimestamp.get()))); + } + + public long timestampMicrosFor(Timestamp executeAt, boolean isForWriteTxn) + { + maybeUpdatelastTimestamp(executeAt, isForWriteTxn); + return lastExecutedMicros.get(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordCommandsForKey that = (AccordCommandsForKey) o; + return commandStore == that.commandStore + && key.equals(that.key) + && maxTimestamp.equals(that.maxTimestamp) + && lastExecutedTimestamp.equals(that.lastExecutedTimestamp) + && lastExecutedMicros.equals(that.lastExecutedMicros) + && lastWriteTimestamp.equals(that.lastWriteTimestamp) + && blindWitnessed.equals(that.blindWitnessed) + && byId.map.equals(that.byId.map) + && byExecuteAt.map.equals(that.byExecuteAt.map); + } + + @Override + public int hashCode() + { + return Objects.hash(commandStore, key, blindWitnessed, maxTimestamp, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp, byId, byExecuteAt); + } + + @Override + public String toString() + { + return "AccordCommandsForKey{" + + "key=" + key + + ", maxTs=" + maxTimestamp + + ", lastExecutedTimestamp=" + lastExecutedTimestamp + + ", lastExecutedMicros=" + lastExecutedMicros + + ", lastWriteTimestamp=" + lastWriteTimestamp + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java new file mode 100644 index 000000000000..2c212b8d311e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.List; + +import accord.api.ConfigurationService; +import accord.local.Node; +import accord.topology.Topology; + +/** + * Currently a stubbed out config service meant to be triggered from a dtest + */ +public class AccordConfigurationService implements ConfigurationService +{ + private final Node.Id localId; + private final List listeners = new ArrayList<>(); + private final List epochs = new ArrayList<>(); + + public AccordConfigurationService(Node.Id localId) + { + this.localId = localId; + epochs.add(Topology.EMPTY); + } + + @Override + public synchronized void registerListener(Listener listener) + { + listeners.add(listener); + } + + @Override + public synchronized Topology currentTopology() + { + return epochs.get(epochs.size() - 1); + } + + @Override + public Topology getTopologyForEpoch(long epoch) + { + return epochs.get((int) epoch); + } + + @Override + public void fetchTopologyForEpoch(long epoch) + { + throw new UnsupportedOperationException(); + } + + @Override + public void acknowledgeEpoch(long epoch) + { + Topology acknowledged = getTopologyForEpoch(epoch); + for (Node.Id node : acknowledged.nodes()) + { + if (node.equals(localId)) + continue; + for (Listener listener : listeners) + listener.onEpochSyncComplete(node, epoch); + } + } + + public void createEpochFromConfig() + { + Topology topology = AccordTopologyUtils.createTopology(epochs.size()); + epochs.add(topology); + for (Listener listener : listeners) + listener.onTopologyUpdate(topology); + + // TODO: This is a hack to enable simplistic cluster reuse for TxnAuthTest, AccordCQLTest, etc. + // Since we don't have a dist sys that sets this up, we have to just lie... + EndpointMapping.knownIds().forEach(id -> { + for (Listener listener : listeners) + listener.onEpochSyncComplete(id, topology.epoch()); + }); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java new file mode 100644 index 000000000000..316a78a5a971 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -0,0 +1,807 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.CommandStore; +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Route; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.utils.DeterministicIdentitySet; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ClusteringComparator; +import org.apache.cassandra.db.Columns; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.BytesType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.TupleType; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.db.transform.FilteredPartitions; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.LocalVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.schema.Types; +import org.apache.cassandra.schema.UserFunctions; +import org.apache.cassandra.schema.Views; +import org.apache.cassandra.serializers.UUIDSerializer; +import org.apache.cassandra.service.accord.AccordCommandsForKey.SeriesKind; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.DepsSerializer; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.store.StoredNavigableMap; +import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Clock; + +import static java.lang.String.format; +import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; +import static org.apache.cassandra.db.rows.BufferCell.*; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER; +import static org.apache.cassandra.utils.ByteBufferUtil.bytes; + +public class AccordKeyspace +{ + private static final Logger logger = LoggerFactory.getLogger(AccordKeyspace.class); + + public static final String COMMANDS = "commands"; + public static final String COMMANDS_FOR_KEY = "commands_for_key"; + + private static final String TIMESTAMP_TUPLE = "tuple"; + private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); + private static final String KEY_TUPLE = "tuple"; + + private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); + + // TODO: store timestamps as blobs (confirm there are no negative numbers, or offset) + private static final TableMetadata Commands = + parse(COMMANDS, + "accord commands", + "CREATE TABLE %s (" + + "store_id int," + + format("txn_id %s,", TIMESTAMP_TUPLE) + + "status int," + + "home_key blob," + + "progress_key blob," + + "route blob," + + "durability int," + + "txn blob," + + "kind int," + + format("execute_at %s,", TIMESTAMP_TUPLE) + + format("promised_ballot %s,", TIMESTAMP_TUPLE) + + format("accepted_ballot %s,", TIMESTAMP_TUPLE) + + "dependencies blob," + + "writes blob," + + "result blob," + + format("waiting_on_commit set<%s>,", TIMESTAMP_TUPLE) + + format("waiting_on_apply map<%s, blob>,", TIMESTAMP_TUPLE) + + "listeners set, " + + format("blocking_commit_on set<%s>, ", TIMESTAMP_TUPLE) + + format("blocking_apply_on set<%s>, ", TIMESTAMP_TUPLE) + + "PRIMARY KEY((store_id, txn_id))" + + ')'); + + // TODO: naming is not very clearly distinct from the base serializers + private static class CommandsSerializers + { + static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); + static final LocalVersionedSerializer routingKey = localSerializer(AccordRoutingKey.serializer); + static final LocalVersionedSerializer partialTxn = localSerializer(CommandSerializers.partialTxn); + static final LocalVersionedSerializer partialDeps = localSerializer(DepsSerializer.partialDeps); + static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); + static final LocalVersionedSerializer result = localSerializer(TxnData.serializer); + + private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) + { + return new LocalVersionedSerializer<>(AccordSerializerVersion.CURRENT, AccordSerializerVersion.serializer, serializer); + } + } + + private static ColumnMetadata getColumn(TableMetadata metadata, String name) + { + ColumnMetadata column = metadata.getColumn(new ColumnIdentifier(name, true)); + if (column == null) + throw new IllegalArgumentException(String.format("Unknown column %s for %s.%s", name, metadata.keyspace, metadata.name)); + return column; + } + + private static class CommandsColumns + { + static final ClusteringComparator keyComparator = Commands.partitionKeyAsClusteringComparator(); + static final ColumnMetadata status = getColumn(Commands, "status"); + static final ColumnMetadata home_key = getColumn(Commands, "home_key"); + static final ColumnMetadata progress_key = getColumn(Commands, "progress_key"); + static final ColumnMetadata route = getColumn(Commands, "route"); + static final ColumnMetadata durability = getColumn(Commands, "durability"); + static final ColumnMetadata txn = getColumn(Commands, "txn"); + static final ColumnMetadata kind = getColumn(Commands, "kind"); + static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); + static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); + static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); + static final ColumnMetadata dependencies = getColumn(Commands, "dependencies"); + static final ColumnMetadata writes = getColumn(Commands, "writes"); + static final ColumnMetadata result = getColumn(Commands, "result"); + static final ColumnMetadata waiting_on_commit = getColumn(Commands, "waiting_on_commit"); + static final ColumnMetadata waiting_on_apply = getColumn(Commands, "waiting_on_apply"); + static final ColumnMetadata listeners = getColumn(Commands, "listeners"); + static final ColumnMetadata blocking_commit_on = getColumn(Commands, "blocking_commit_on"); + static final ColumnMetadata blocking_apply_on = getColumn(Commands, "blocking_apply_on"); + } + + private static final TableMetadata CommandsForKey = + parse(COMMANDS_FOR_KEY, + "accord commands per key", + "CREATE TABLE %s (" + + "store_id int, " + + format("key %s, ", KEY_TUPLE) + + format("max_timestamp %s static, ", TIMESTAMP_TUPLE) + + format("last_executed_timestamp %s static, ", TIMESTAMP_TUPLE) + + "last_executed_micros bigint static, " + + format("last_write_timestamp %s static, ", TIMESTAMP_TUPLE) + + format("blind_witnessed set<%s> static, ", TIMESTAMP_TUPLE) + + "series int, " + + format("timestamp %s, ", TIMESTAMP_TUPLE) + + "data blob, " + + "PRIMARY KEY((store_id, key), series, timestamp)" + + ')'); + + private static class CommandsForKeyColumns + { + static final ClusteringComparator keyComparator = CommandsForKey.partitionKeyAsClusteringComparator(); + static final ColumnFilter allColumns = ColumnFilter.all(CommandsForKey); + static final ColumnMetadata max_timestamp = getColumn(CommandsForKey, "max_timestamp"); + static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKey, "last_executed_timestamp"); + static final ColumnMetadata last_executed_micros = getColumn(CommandsForKey, "last_executed_micros"); + static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKey, "last_write_timestamp"); + static final ColumnMetadata blind_witnessed = getColumn(CommandsForKey, "blind_witnessed"); + + static final ColumnMetadata series = getColumn(CommandsForKey, "series"); + static final ColumnMetadata timestamp = getColumn(CommandsForKey, "timestamp"); + static final ColumnMetadata data = getColumn(CommandsForKey, "data"); + + static final Columns statics = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp, blind_witnessed)); + static final Columns regulars = Columns.from(Lists.newArrayList(data)); + private static final RegularAndStaticColumns all = new RegularAndStaticColumns(statics, regulars); + private static final RegularAndStaticColumns justStatic = new RegularAndStaticColumns(statics, Columns.NONE); + private static final RegularAndStaticColumns justRegular = new RegularAndStaticColumns(Columns.NONE, regulars); + + static boolean hasStaticChanges(AccordCommandsForKey commandsForKey) + { + return commandsForKey.maxTimestamp.hasModifications() + || commandsForKey.lastExecutedTimestamp.hasModifications() + || commandsForKey.lastExecutedMicros.hasModifications() + || commandsForKey.lastWriteTimestamp.hasModifications() + || commandsForKey.blindWitnessed.hasModifications(); + } + + private static boolean hasRegularChanges(AccordCommandsForKey commandsForKey) + { + return commandsForKey.byId.map.hasModifications() + || commandsForKey.byExecuteAt.map.hasModifications(); + } + + static RegularAndStaticColumns columnsFor(AccordCommandsForKey commandsForKey) + { + boolean hasStaticChanges = hasStaticChanges(commandsForKey); + boolean hasRegularChanges = hasRegularChanges(commandsForKey); + + if (hasStaticChanges && hasRegularChanges) + return all; + else if (hasStaticChanges) + return justStatic; + else if (hasRegularChanges) + return justRegular; + else + throw new IllegalArgumentException("CommandsForKey has_modifications=" + commandsForKey.hasModifications() + ", but no Static or Regular columns changed!"); + } + } + + private static TableMetadata parse(String name, String description, String cql) + { + return CreateTableStatement.parse(format(cql, name), ACCORD_KEYSPACE_NAME) + .id(TableId.forSystemTable(ACCORD_KEYSPACE_NAME, name)) + .comment(description) + .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)) + .build(); + } + + public static KeyspaceMetadata metadata() + { + return KeyspaceMetadata.create(ACCORD_KEYSPACE_NAME, KeyspaceParams.local(), tables(), Views.none(), Types.none(), UserFunctions.none()); + } + + private static Tables tables() + { + return Tables.of(Commands, CommandsForKey); + } + + private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException + { + int size = (int) serializer.serializedSize(obj); + try (DataOutputBuffer out = new DataOutputBuffer(size)) + { + serializer.serialize(obj, out); + ByteBuffer bb = out.buffer(); + assert size == bb.limit() : String.format("Expected to write %d but wrote %d", size, bb.limit()); + return bb; + } + } + + private static ByteBuffer serializeOrNull(T obj, LocalVersionedSerializer serializer) throws IOException + { + return obj != null ? serialize(obj, serializer) : EMPTY_BYTE_BUFFER; + } + + private static T deserialize(ByteBuffer bytes, LocalVersionedSerializer serializer) throws IOException + { + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + return serializer.deserialize(in); + } + } + + private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerializer serializer) throws IOException + { + return bytes != null && ! ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; + } + + private static NavigableMap deserializeWaitingOnApply(Map serialized) + { + if (serialized == null || serialized.isEmpty()) + return new TreeMap<>(); + + NavigableMap result = new TreeMap<>(); + for (Map.Entry entry : serialized.entrySet()) + result.put(deserializeTimestampOrNull(entry.getKey(), Timestamp::fromBits), deserializeTimestampOrNull(entry.getValue(), TxnId::fromBits)); + return result; + } + + private static > S deserializeTimestampSet(Set serialized, Supplier setFactory, TimestampFactory timestampFactory) + { + S result = setFactory.get(); + if (serialized == null || serialized.isEmpty()) + return result; + + for (ByteBuffer bytes : serialized) + result.add(deserializeTimestampOrNull(bytes, timestampFactory)); + + return result; + } + + private static NavigableSet deserializeTxnIdNavigableSet(UntypedResultSet.Row row, String name) + { + return deserializeTimestampSet(row.getSet(name, BytesType.instance), TreeSet::new, TxnId::fromBits); + } + + private static DeterministicIdentitySet deserializeListeners(Set serialized) throws IOException + { + if (serialized == null || serialized.isEmpty()) + return new DeterministicIdentitySet<>(); + DeterministicIdentitySet result = new DeterministicIdentitySet<>(); + for (ByteBuffer bytes : serialized) + { + result.add(ListenerProxy.deserialize(bytes, ByteBufferAccessor.instance, 0)); + } + return result; + } + + private static DeterministicIdentitySet deserializeListeners(UntypedResultSet.Row row, String name) throws IOException + { + return deserializeListeners(row.getSet(name, BytesType.instance)); + } + + private static , V> void addStoredMapChanges(Row.Builder builder, + ColumnMetadata column, + long timestamp, + int nowInSec, + StoredNavigableMap map, + Function serializeKey, + Function serializeVal) + { + if (map.wasCleared()) + { + if (!map.hasAdditions()) + { + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, nowInSec)); + return; + } + else + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp - 1, nowInSec)); + } + + map.forEachAddition((k, v) -> builder.addCell(live(column, timestamp, serializeVal.apply(v), CellPath.create(serializeKey.apply(k))))); + + if (!map.wasCleared()) + map.forEachDeletion(k -> builder.addCell(tombstone(column, timestamp, nowInSec, CellPath.create(serializeKey.apply(k))))); + } + + private static > void addStoredSetChanges(Row.Builder builder, + ColumnMetadata column, + long timestamp, + int nowInSec, + StoredSet map, + Function serialize) + { + if (map.wasCleared()) + { + if (!map.hasAdditions()) + { + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, nowInSec)); + return; + } + else + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp - 1, nowInSec)); + } + + map.forEachAddition(i -> builder.addCell(live(column, timestamp, EMPTY_BYTE_BUFFER, CellPath.create(serialize.apply(i))))); + + if (!map.wasCleared()) + map.forEachDeletion(k -> builder.addCell(tombstone(column, timestamp, nowInSec, CellPath.create(serialize.apply(k))))); + } + + public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordCommand command, long timestampMicros) + { + try + { + Preconditions.checkArgument(command.hasModifications()); + + // TODO: convert to byte arrays + ValueAccessor accessor = ByteBufferAccessor.instance; + + Row.Builder builder = BTreeRow.unsortedBuilder(); + builder.newRow(Clustering.EMPTY); + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + + + if (command.status.hasModifications()) + builder.addCell(live(CommandsColumns.status, timestampMicros, accessor.valueOf(command.status.get().ordinal()))); + + if (command.homeKey.hasModifications()) + builder.addCell(live(CommandsColumns.home_key, timestampMicros, serializeOrNull((AccordRoutingKey) command.homeKey.get(), CommandsSerializers.routingKey))); + + if (command.progressKey.hasModifications()) + builder.addCell(live(CommandsColumns.progress_key, timestampMicros, serializeOrNull((AccordRoutingKey) command.progressKey.get(), CommandsSerializers.routingKey))); + + if (command.route.hasModifications()) + builder.addCell(live(CommandsColumns.route, timestampMicros, serializeOrNull(command.route.get(), CommandsSerializers.route))); + + if (command.durability.hasModifications()) + builder.addCell(live(CommandsColumns.durability, timestampMicros, accessor.valueOf(command.durability.get().ordinal()))); + + if (command.partialTxn.hasModifications()) + builder.addCell(live(CommandsColumns.txn, timestampMicros, serializeOrNull(command.partialTxn.get(), CommandsSerializers.partialTxn))); + + if (command.kind.hasModifications() && command.kind.get() != null) // initialize sets hasModification(), and don't want to persist null + builder.addCell(live(CommandsColumns.kind, timestampMicros, accessor.valueOf(command.kind.get().ordinal()))); + + if (command.executeAt.hasModifications()) + builder.addCell(live(CommandsColumns.execute_at, timestampMicros, serializeTimestamp(command.executeAt.get()))); + + if (command.promised.hasModifications()) + builder.addCell(live(CommandsColumns.promised_ballot, timestampMicros, serializeTimestamp(command.promised.get()))); + + if (command.accepted.hasModifications()) + builder.addCell(live(CommandsColumns.accepted_ballot, timestampMicros, serializeTimestamp(command.accepted.get()))); + + if (command.partialDeps.hasModifications()) + builder.addCell(live(CommandsColumns.dependencies, timestampMicros, serializeOrNull(command.partialDeps.get(), CommandsSerializers.partialDeps))); + + if (command.writes.hasModifications()) + builder.addCell(live(CommandsColumns.writes, timestampMicros, serialize(command.writes.get(), CommandsSerializers.writes))); + + if (command.result.hasModifications()) + builder.addCell(live(CommandsColumns.result, timestampMicros, serialize((TxnData) command.result.get(), CommandsSerializers.result))); + + if (command.waitingOnCommit.hasModifications()) + { + addStoredSetChanges(builder, CommandsColumns.waiting_on_commit, + timestampMicros, nowInSeconds, command.waitingOnCommit, + AccordKeyspace::serializeTimestamp); + } + + if (command.blockingCommitOn.hasModifications()) + { + addStoredSetChanges(builder, CommandsColumns.blocking_commit_on, + timestampMicros, nowInSeconds, command.blockingApplyOn, + AccordKeyspace::serializeTimestamp); + } + + if (command.waitingOnApply.hasModifications()) + { + addStoredMapChanges(builder, CommandsColumns.waiting_on_apply, + timestampMicros, nowInSeconds, command.waitingOnApply, + AccordKeyspace::serializeTimestamp, AccordKeyspace::serializeTimestamp); + } + + if (command.blockingApplyOn.hasModifications()) + { + addStoredSetChanges(builder, CommandsColumns.blocking_apply_on, + timestampMicros, nowInSeconds, command.blockingApplyOn, + AccordKeyspace::serializeTimestamp); + } + + if (command.storedListeners.hasModifications()) + { + addStoredSetChanges(builder, CommandsColumns.listeners, + timestampMicros, nowInSeconds, command.storedListeners, + ListenerProxy::identifier); + } + ByteBuffer key = CommandsColumns.keyComparator.make(commandStore.id(), + serializeTimestamp(command.txnId())).serializeAsPartitionKey(); + PartitionUpdate update = PartitionUpdate.singleRowUpdate(Commands, key, builder.build()); + return new Mutation(update); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + private static ByteBuffer serializeKey(PartitionKey key) + { + return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(UUIDSerializer.instance.serialize(key.tableId().asUUID()), key.partitionKey().getKey())); + } + + private static ByteBuffer serializeTimestamp(Timestamp timestamp) + { + return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(bytes(timestamp.msb), bytes(timestamp.lsb), bytes(timestamp.node.id))); + } + + public interface TimestampFactory + { + T create(long msb, long lsb, Node.Id node); + } + + public static T deserializeTimestampOrNull(ByteBuffer bytes, TimestampFactory factory) + { + if (bytes == null || ByteBufferAccessor.instance.isEmpty(bytes)) + return null; + List split = TIMESTAMP_TYPE.unpack(bytes, ByteBufferAccessor.instance); + return factory.create(split.get(0).getLong(), split.get(1).getLong(), new Node.Id(split.get(2).getInt())); + } + + private static T deserializeTimestampOrNull(UntypedResultSet.Row row, String name, TimestampFactory factory) + { + return deserializeTimestampOrNull(row.getBlob(name), factory); + } + + public static AccordCommand loadCommand(AccordCommandStore commandStore, TxnId txnId) + { + AccordCommand command = new AccordCommand(txnId); + loadCommand(commandStore, command); + return command; + } + + private static T deserializeWithVersionOr(UntypedResultSet.Row row, String dataColumn, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException + { + if (!row.has(dataColumn)) + return defaultSupplier.get(); + + return deserialize(row.getBlob(dataColumn), serializer); + } + + public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId txnId) + { + String cql = "SELECT * FROM %s.%s " + + "WHERE store_id = ? " + + "AND txn_id=(?, ?, ?)"; + + return executeOnceInternal(String.format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), + commandStore.id(), + txnId.msb, txnId.lsb, txnId.node.id); + } + + public static void loadCommand(AccordCommandStore commandStore, AccordCommand command) + { + Preconditions.checkArgument(!command.isLoaded()); + TxnId txnId = command.txnId(); + commandStore.checkNotInStoreThread(); + + UntypedResultSet result = loadCommandRow(commandStore, command.txnId()); + + if (result.isEmpty()) + { + command.setEmpty(); + return; + } + + try + { + UntypedResultSet.Row row = result.one(); + Preconditions.checkState(deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits).equals(txnId)); + command.status.load(SaveStatus.values()[row.getInt("status")]); + command.homeKey.load(deserializeOrNull(row.getBlob("home_key"), CommandsSerializers.routingKey)); + command.progressKey.load(deserializeOrNull(row.getBlob("progress_key"), CommandsSerializers.routingKey)); + command.route.load(deserializeOrNull(row.getBlob("route"), CommandsSerializers.route)); + // TODO: something less brittle than ordinal, more efficient than values() + command.durability.load(Status.Durability.values()[row.getInt("durability", 0)]); + command.partialTxn.load(deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn)); + command.kind.load(row.has("kind") ? Txn.Kind.values()[row.getInt("kind")] : null); + command.executeAt.load(deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits)); + command.promised.load(deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits)); + command.accepted.load(deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits)); + command.partialDeps.load(deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps)); + command.writes.load(deserializeWithVersionOr(row, "writes", CommandsSerializers.writes, () -> null)); + command.result.load(deserializeWithVersionOr(row, "result", CommandsSerializers.result, () -> null)); + command.waitingOnCommit.load(deserializeTxnIdNavigableSet(row, "waiting_on_commit")); + command.blockingCommitOn.load(deserializeTxnIdNavigableSet(row, "blocking_commit_on")); + command.waitingOnApply.load(deserializeWaitingOnApply(row.getMap("waiting_on_apply", BytesType.instance, BytesType.instance))); + command.blockingApplyOn.load(deserializeTxnIdNavigableSet(row, "blocking_apply_on")); + command.storedListeners.load(deserializeListeners(row, "listeners")); + } + catch (IOException e) + { + logger.error("Exception loading AccordCommand " + command.txnId(), e); + throw new RuntimeException(e); + } + catch (Throwable t) + { + logger.error("Exception loading AccordCommand " + command.txnId(), t); + throw t; + } + } + + private static void addSeriesMutations(AccordCommandsForKey.Series series, + PartitionUpdate.Builder partitionBuilder, + Row.Builder rowBuilder, + long timestampMicros, + int nowInSeconds) + { + if (!series.map.hasModifications()) + return; + + Row.Deletion deletion = series.map.hasDeletions() ? + Row.Deletion.regular(DeletionTime.buildUnsafeWithUnsignedInteger(timestampMicros, nowInSeconds)) : + null; + ByteBuffer ordinalBytes = bytes(series.kind.ordinal()); + series.map.forEachAddition((timestamp, bytes) -> { + rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); + rowBuilder.addCell(live(CommandsForKeyColumns.data, timestampMicros, bytes)); + partitionBuilder.add(rowBuilder.build()); + }); + series.map.forEachDeletion(timestamp -> { + rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); + rowBuilder.addRowDeletion(deletion); + partitionBuilder.add(rowBuilder.build()); + }); + } + + private static DecoratedKey makeKey(CommandStore commandStore, PartitionKey key) + { + ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(commandStore.id(), + serializeKey(key)).serializeAsPartitionKey(); + return CommandsForKey.partitioner.decorateKey(pk); + } + + private static DecoratedKey makeKey(AccordCommandsForKey cfk) + { + return makeKey(cfk.commandStore(), cfk.key()); + } + + public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore, AccordCommandsForKey cfk, long timestampMicros) + { + Preconditions.checkArgument(cfk.hasModifications()); + + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + + int expectedRows = (CommandsForKeyColumns.hasStaticChanges(cfk) ? 1 : 0) + + cfk.byId.map.totalModifications() + + cfk.byExecuteAt.map.totalModifications(); + + PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(CommandsForKey, + makeKey(cfk), + CommandsForKeyColumns.columnsFor(cfk), + expectedRows); + + Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); + boolean updateStaticRow = cfk.maxTimestamp.hasModifications() + || cfk.lastExecutedTimestamp.hasModifications() + || cfk.lastExecutedMicros.hasModifications() + || cfk.lastWriteTimestamp.hasModifications() + || cfk.blindWitnessed.hasModifications(); + if (updateStaticRow) + { + rowBuilder.newRow(Clustering.STATIC_CLUSTERING); + + if (cfk.maxTimestamp.hasModifications()) + rowBuilder.addCell(live(CommandsForKeyColumns.max_timestamp, timestampMicros, serializeTimestamp(cfk.maxTimestamp.get()))); + + if (cfk.lastExecutedTimestamp.hasModifications()) + rowBuilder.addCell(live(CommandsForKeyColumns.last_executed_timestamp, timestampMicros, serializeTimestamp(cfk.lastExecutedTimestamp.get()))); + + if (cfk.lastExecutedMicros.hasModifications()) + rowBuilder.addCell(live(CommandsForKeyColumns.last_executed_micros, timestampMicros, ByteBufferUtil.bytes(cfk.lastExecutedMicros.get()))); + + if (cfk.lastWriteTimestamp.hasModifications()) + rowBuilder.addCell(live(CommandsForKeyColumns.last_write_timestamp, timestampMicros, serializeTimestamp(cfk.lastWriteTimestamp.get()))); + + if (cfk.blindWitnessed.hasModifications()) + addStoredSetChanges(rowBuilder, CommandsForKeyColumns.blind_witnessed, + timestampMicros, nowInSeconds, cfk.blindWitnessed, + AccordKeyspace::serializeTimestamp); + + partitionBuilder.add(rowBuilder.build()); + } + + addSeriesMutations(cfk.byId, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + addSeriesMutations(cfk.byExecuteAt, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + + return new Mutation(partitionBuilder.build()); + } + + public static AccordCommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + AccordCommandsForKey commandsForKey = new AccordCommandsForKey(commandStore, key); + loadCommandsForKey(commandsForKey); + return commandsForKey; + } + + private static ByteBuffer cellValue(Cell cell) + { + return cell.accessor().toBuffer(cell.value()); + } + + // TODO: convert to byte array + private static ByteBuffer cellValue(Row row, ColumnMetadata column) + { + Cell cell = row.getCell(column); + return (cell != null && !cell.isTombstone()) ? cellValue(cell) : null; + } + + private static ByteBuffer clusteringValue(Clustering clustering, int idx) + { + return clustering.accessor().toBuffer(clustering.get(idx)); + } + + public static SinglePartitionReadCommand getCommandsForKeyRead(CommandStore commandStore, PartitionKey key, long nowInSeconds) + { + return SinglePartitionReadCommand.create(CommandsForKey, nowInSeconds, + CommandsForKeyColumns.allColumns, + RowFilter.none(), + DataLimits.NONE, + makeKey(commandStore, key), + FULL_PARTITION); + } + + public static void loadCommandsForKey(AccordCommandsForKey cfk) + { + Preconditions.checkArgument(!cfk.isLoaded()); + ((AccordCommandStore) cfk.commandStore()).checkNotInStoreThread(); + long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + + SinglePartitionReadCommand command = getCommandsForKeyRead(cfk.commandStore(), cfk.key(), nowInSeconds); + + EnumMap> seriesMaps = new EnumMap<>(SeriesKind.class); + for (SeriesKind kind : SeriesKind.values()) + seriesMaps.put(kind, new TreeMap<>()); + + try(ReadExecutionController controller = command.executionController(); + FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) + { + if (!partitions.hasNext()) + { + cfk.setEmpty(); + return; + } + + try (RowIterator partition = partitions.next()) + { + // empty static row will be interpreted as all null cells which will cause everything to be initialized + Row staticRow = partition.staticRow(); + Cell cell = staticRow.getCell(CommandsForKeyColumns.max_timestamp); + cfk.maxTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) + : AccordCommandsForKey.Defaults.maxTimestamp); + + cell = staticRow.getCell(CommandsForKeyColumns.last_executed_timestamp); + cfk.lastExecutedTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) + : AccordCommandsForKey.Defaults.lastExecutedTimestamp); + + cell = staticRow.getCell(CommandsForKeyColumns.last_executed_micros); + ByteBuffer microsBytes = cell != null && !cell.isTombstone() ? cellValue(cell) : null; + cfk.lastExecutedMicros.load(microsBytes != null ? microsBytes.getLong(microsBytes.position()) + : AccordCommandsForKey.Defaults.lastExecutedMicros); + + cell = staticRow.getCell(CommandsForKeyColumns.last_write_timestamp); + cfk.lastWriteTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) + : AccordCommandsForKey.Defaults.lastWriteTimestamp); + + TreeSet blindWitnessed = new TreeSet<>(); + ComplexColumnData cmplx = staticRow.getComplexColumnData(CommandsForKeyColumns.blind_witnessed); + if (cmplx != null) + cmplx.forEach(c -> blindWitnessed.add(deserializeTimestampOrNull(c.path().get(0), Timestamp::fromBits))); + cfk.blindWitnessed.load(blindWitnessed); + + while (partition.hasNext()) + { + Row row = partition.next(); + Clustering clustering = row.clustering(); + int ordinal = Int32Type.instance.compose(clusteringValue(clustering, 0)); + Timestamp timestamp = deserializeTimestampOrNull(clusteringValue(clustering, 1), Timestamp::fromBits); + ByteBuffer data = cellValue(row, CommandsForKeyColumns.data); + if (data == null) + continue; + seriesMaps.get(SeriesKind.values()[ordinal]).put(timestamp, data); + } + } + Preconditions.checkState(!partitions.hasNext()); + + cfk.byId.map.load(seriesMaps.get(SeriesKind.BY_ID)); + cfk.byExecuteAt.map.load(seriesMaps.get(SeriesKind.BY_EXECUTE_AT)); + } + catch (Throwable t) + { + logger.error("Exception loading AccordCommandsForKey " + cfk.key(), t); + throw t; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java new file mode 100644 index 000000000000..dc329e8807dd --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.EnumMap; +import java.util.Map; +import java.util.Objects; + +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.MessageSink; +import accord.local.Node; +import accord.messages.Callback; +import accord.messages.MessageType; +import accord.messages.Reply; +import accord.messages.ReplyContext; +import accord.messages.Request; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.Verb; + +import static org.apache.cassandra.service.accord.EndpointMapping.getEndpoint; + +public class AccordMessageSink implements MessageSink +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMessageSink.class); + + private static class VerbMapping + { + private static final VerbMapping instance = new VerbMapping(); + + private final Map mapping = new EnumMap<>(MessageType.class); + + private VerbMapping() + { + mapping.put(MessageType.PREACCEPT_REQ, Verb.ACCORD_PREACCEPT_REQ); + mapping.put(MessageType.PREACCEPT_RSP, Verb.ACCORD_PREACCEPT_RSP); + mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); + mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); + mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); + mapping.put(MessageType.COMMIT_REQ, Verb.ACCORD_COMMIT_REQ); + mapping.put(MessageType.COMMIT_INVALIDATE, Verb.ACCORD_COMMIT_INVALIDATE_REQ); + mapping.put(MessageType.APPLY_REQ, Verb.ACCORD_APPLY_REQ); + mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); + mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); + mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); + mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_RECOVER_REQ); + mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_RECOVER_RSP); + mapping.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); + mapping.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); + mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_COMMIT_REQ); + mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_COMMIT_RSP); + mapping.put(MessageType.INFORM_TXNID_REQ, Verb.ACCORD_INFORM_OF_TXNID_REQ); + mapping.put(MessageType.INFORM_HOME_DURABLE_REQ,Verb.ACCORD_INFORM_HOME_DURABLE_REQ); + mapping.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); + mapping.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); + mapping.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); + mapping.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); + mapping.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); + mapping.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); + + for (MessageType type : MessageType.values()) + { + if (!mapping.containsKey(type)) + throw new AssertionError("Missing mapping for Accord MessageType " + type); + } + } + } + + private static Verb getVerb(MessageType type) + { + return VerbMapping.instance.mapping.get(type); + } + + @Override + public void send(Node.Id to, Request request) + { + Verb verb = getVerb(request.type()); + Objects.requireNonNull(verb, "verb"); + Message message = Message.out(verb, request); + InetAddressAndPort endpoint = getEndpoint(to); + logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); + MessagingService.instance().send(message, endpoint); + } + + @Override + public void send(Node.Id to, Request request, Callback callback) + { + Verb verb = getVerb(request.type()); + Preconditions.checkArgument(verb != null); + Message message = Message.out(verb, request); + InetAddressAndPort endpoint = getEndpoint(to); + logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); + MessagingService.instance().sendWithCallback(message, endpoint, new AccordCallback<>((Callback) callback)); + } + + @Override + public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply) + { + Message replyTo = (Message) replyContext; + Message replyMsg = replyTo.responseWith(reply); + Preconditions.checkArgument(replyMsg.verb() == getVerb(reply.type())); + InetAddressAndPort endpoint = getEndpoint(replyingToNode); + logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); + MessagingService.instance().send(replyMsg, endpoint); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java new file mode 100644 index 000000000000..bef7b43ad2bd --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.Node; +import accord.primitives.AbstractKeys; +import accord.primitives.AbstractRanges; +import accord.primitives.Deps; +import accord.primitives.FullKeyRoute; +import accord.primitives.FullRangeRoute; +import accord.primitives.KeyDeps; +import accord.primitives.Keys; +import accord.primitives.PartialKeyRoute; +import accord.primitives.PartialRangeRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Range; +import accord.primitives.RangeDeps; +import accord.primitives.Ranges; +import accord.primitives.RoutingKeys; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Unseekables; +import accord.primitives.Writes; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.utils.ObjectSizes; + +public class AccordObjectSizes +{ + public static long key(Key key) + { + return ((PartitionKey) key).estimatedSizeOnHeap(); + } + + public static long key(RoutingKey key) + { + return ((AccordRoutingKey) key).estimatedSizeOnHeap(); + } + + private static final long EMPTY_RANGE_SIZE = ObjectSizes.measure(TokenRange.fullRange("")); + public static long range(Range range) + { + return EMPTY_RANGE_SIZE + key(range.start()) + key(range.end()); + } + + private static final long EMPTY_RANGES_SIZE = ObjectSizes.measure(Ranges.of()); + public static long ranges(Ranges ranges) + { + long size = EMPTY_RANGES_SIZE; + size += ObjectSizes.sizeOfReferenceArray(ranges.size()); + // TODO: many ranges are fixed size, can compute by multiplication + for (int i = 0, mi = ranges.size() ; i < mi ; i++) + size += range(ranges.get(i)); + return size; + } + + private static final long EMPTY_KEYS_SIZE = ObjectSizes.measure(Keys.of()); + public static long keys(Keys keys) + { + long size = EMPTY_KEYS_SIZE; + size += ObjectSizes.sizeOfReferenceArray(keys.size()); + for (int i=0, mi=keys.size(); i seekables) + { + switch (seekables.domain()) + { + default: throw new AssertionError(); + case Key: return keys((Keys) seekables); + case Range: return ranges((Ranges) seekables); + } + } + + private static long routingKeysOnly(AbstractKeys keys) + { + // TODO: many routing keys are fixed size, can compute by multiplication + long size = ObjectSizes.sizeOfReferenceArray(keys.size()); + for (int i=0, mi=keys.size(); i ranges) + { + long size = ObjectSizes.sizeOfReferenceArray(ranges.size()); + for (int i=0, mi=ranges.size(); i unseekables) + { + switch (unseekables.kind()) + { + default: throw new AssertionError(); + case RoutingKeys: return routingKeys((RoutingKeys) unseekables); + case PartialKeyRoute: return partialKeyRoute((PartialKeyRoute) unseekables); + case FullKeyRoute: return fullKeyRoute((FullKeyRoute) unseekables); + case RoutingRanges: return ranges((Ranges) unseekables); + case PartialRangeRoute: return partialRangeRoute((PartialRangeRoute) unseekables); + case FullRangeRoute: return fullRangeRoute((FullRangeRoute) unseekables); + } + } + + private static final long EMPTY_TXN = ObjectSizes.measure(new PartialTxn.InMemory(null, null, null, null, null, null)); + public static long txn(PartialTxn txn) + { + long size = EMPTY_TXN; + size += seekables(txn.keys()); + size += ((TxnRead) txn.read()).estimatedSizeOnHeap(); + if (txn.update() != null) + size += ((TxnUpdate) txn.update()).estimatedSizeOnHeap(); + if (txn.query() != null) + size += ((TxnQuery) txn.query()).estimatedSizeOnHeap(); + return size; + } + + private static final long TIMESTAMP_SIZE = ObjectSizes.measureDeep(Timestamp.fromBits(0, 0, new Node.Id(0))); + + public static long timestamp() + { + return TIMESTAMP_SIZE; + } + public static long timestamp(Timestamp timestamp) + { + return TIMESTAMP_SIZE; + } + + private static final long EMPTY_DEPS_SIZE = ObjectSizes.measureDeep(Deps.NONE); + public static long dependencies(Deps dependencies) + { + // TODO (expected): this doesn't measure the backing arrays, is inefficient; + // doesn't account for txnIdToKeys, txnIdToRanges, and searchable fields; + // fix to accunt for, in case caching isn't redone + long size = EMPTY_DEPS_SIZE - EMPTY_KEYS_SIZE - ObjectSizes.sizeOfReferenceArray(0); + size += keys(dependencies.keyDeps.keys()); + for (int i = 0 ; i < dependencies.rangeDeps.rangeCount() ; ++i) + size += range(dependencies.rangeDeps.range(i)); + size += ObjectSizes.sizeOfReferenceArray(dependencies.rangeDeps.rangeCount()); + + for (int i = 0 ; i < dependencies.keyDeps.txnIdCount() ; ++i) + size += timestamp(dependencies.keyDeps.txnId(i)); + for (int i = 0 ; i < dependencies.rangeDeps.txnIdCount() ; ++i) + size += timestamp(dependencies.rangeDeps.txnId(i)); + + size += KeyDeps.SerializerSupport.keysToTxnIdsCount(dependencies.keyDeps) * 4L; + size += RangeDeps.SerializerSupport.rangesToTxnIdsCount(dependencies.rangeDeps) * 4L; + return size; + } + + private static final long EMPTY_WRITES_SIZE = ObjectSizes.measure(new Writes(null, null, null)); + public static long writes(Writes writes) + { + long size = EMPTY_WRITES_SIZE; + size += timestamp(writes.executeAt); + size += seekables(writes.keys); + if (writes.write != null) + size += ((TxnWrite) writes.write).estimatedSizeOnHeap(); + return size; + } + +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java b/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java new file mode 100644 index 000000000000..19f71da75c61 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import accord.api.Key; +import accord.local.Command; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.local.Status.Known; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.async.AsyncContext; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; + +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class AccordPartialCommand +{ + public static final PartialCommandSerializer serializer = new PartialCommandSerializer(); + + private final TxnId txnId; + private final Timestamp executeAt; + + // TODO (soon): this should only be a list of TxnId (the deps for the key we are persisted against); but should also be stored separately and not brought into memory + private final List deps; + // TODO (soon): we only require this for Accepted; perhaps more tightly couple query API for efficiency + private final SaveStatus status; + + AccordPartialCommand(TxnId txnId, Timestamp executeAt, List deps, SaveStatus status) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.deps = deps; + this.status = status; + } + + public AccordPartialCommand(Key key, Command command) + { + this(command.txnId(), command.executeAt(), + command.partialDeps() == null ? Collections.emptyList() : command.partialDeps().txnIds(key), + command.saveStatus()); + } + + public TxnId txnId() + { + return txnId; + } + + public Timestamp executeAt() + { + return executeAt; + } + + public List deps() + { + return deps; + } + + public boolean hasDep(TxnId txnId) + { + return Collections.binarySearch(deps, txnId) >= 0; + } + + public Status status() + { + return status.status; + } + + public Known known() + { + return status.known; + } + + @Override + public boolean equals(Object obj) + { + if (obj.getClass() != AccordPartialCommand.class) + return false; + AccordPartialCommand that = (AccordPartialCommand) obj; + return txnId.equals(that.txnId) + && Objects.equals(executeAt, that.executeAt) + && Objects.equals(deps, that.deps) + && status == that.status; + } + + public static class PartialCommandSerializer + { + public void serialize(AccordPartialCommand command, DataOutputPlus out, AccordSerializerVersion version) throws IOException + { + out.write(version.version); + CommandSerializers.txnId.serialize(command.txnId(), out, version.msgVersion); + serializeNullable(command.executeAt(), out, version.msgVersion, CommandSerializers.timestamp); + CommandSerializers.saveStatus.serialize(command.status, out, version.msgVersion); + serializeCollection(command.deps, out, version.msgVersion, CommandSerializers.txnId); + } + + public ByteBuffer serialize(AccordPartialCommand command) + { + AccordSerializerVersion version = AccordSerializerVersion.CURRENT; + int size = serializedSize(command, version); + try (DataOutputBuffer out = new DataOutputBuffer(size)) + { + serialize(command, out, version); + return out.buffer(false); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public AccordSerializerVersion deserializeVersion(DataInputPlus in) throws IOException + { + return AccordSerializerVersion.serializer.deserialize(in); + } + + // check for cached command first, otherwise deserialize + private AccordPartialCommand deserialize(AccordCommandsForKey commandsForKey, AccordCommandStore commandStore, DataInputPlus in) throws IOException + { + AccordSerializerVersion version = deserializeVersion(in); + TxnId txnId = CommandSerializers.txnId.deserialize(in, version.msgVersion); + AsyncContext context = commandStore.getContext(); + AccordPartialCommand command = getCachedFull(commandsForKey, txnId, context); + if (command != null) + return command; + + Timestamp executeAt = deserializeNullable(in, version.msgVersion, CommandSerializers.timestamp); + SaveStatus status = CommandSerializers.saveStatus.deserialize(in, version.msgVersion); + List deps = deserializeList(in, version.msgVersion, CommandSerializers.txnId); + AccordPartialCommand partial = new AccordPartialCommand(txnId, executeAt, deps, status); + addToContext(partial, context); + return partial; + } + + public AccordPartialCommand deserialize(AccordCommandsForKey commandsForKey, AccordCommandStore commandStore, ByteBuffer bytes) + { + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + return deserialize(commandsForKey, commandStore, in); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public int serializedSize(AccordPartialCommand command, AccordSerializerVersion version) + { + int size = Math.toIntExact(AccordSerializerVersion.serializer.serializedSize(version)); + size += CommandSerializers.txnId.serializedSize(); + size += serializedNullableSize(command.executeAt(), version.msgVersion, CommandSerializers.timestamp); + size += CommandSerializers.saveStatus.serializedSize(command.status, version.msgVersion); + size += serializedCollectionSize(command.deps, version.msgVersion, CommandSerializers.txnId); + return size; + } + + private AccordPartialCommand getCachedFull(AccordCommandsForKey commandsForKey, TxnId txnId, AsyncContext context) + { + AccordCommand command = context.commands.get(txnId); + if (command == null) + return null; + return new AccordPartialCommand(commandsForKey.key(), command); + } + + private void addToContext(AccordPartialCommand command, AsyncContext context) + { + context.commands.addPartialCommand(command); + } + + /** + * Determines if current modifications require updating command data duplicated elsewhere + */ + public boolean needsUpdate(AccordCommand command) + { + return command.executeAt.hasModifications() || command.status.hasModifications() || command.partialDeps.hasModifications(); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordSerializerVersion.java b/src/java/org/apache/cassandra/service/accord/AccordSerializerVersion.java new file mode 100644 index 000000000000..c0c03c8d93e3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSerializerVersion.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.MessageVersionProvider; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; + +public enum AccordSerializerVersion implements MessageVersionProvider +{ + // If MessagingService version bumps, this mapping does not need to be updated; only updates needed are those that + // include accord serializer changes. + V1(1, MessagingService.VERSION_40); + + public static final AccordSerializerVersion CURRENT = V1; + public static final Serializer serializer = new Serializer(); + + public final int version; + public final int msgVersion; + + AccordSerializerVersion(int version, int msgVersion) + { + this.version = version; + this.msgVersion = msgVersion; + } + + public static AccordSerializerVersion fromVersion(int version) + { + switch (version) + { + case 1: + return V1; + default: + throw new IllegalArgumentException(); + } + } + + public static AccordSerializerVersion fromMessageVersion(int version) + { + AccordSerializerVersion[] versions = values(); + for (int i = versions.length - 1; i >= 0; i--) + { + AccordSerializerVersion v = versions[i]; + // If network version bumped (12 to 13), the accord serializers may not have been changed; use the largest + // version smaller than or equal to this version + if (v.msgVersion <= version) + return v; + } + throw new IllegalArgumentException("Attempted to use message version " + version + " which is smaller than " + versions[0] + " can handle (" + versions[0].msgVersion + ")"); + } + + @Override + public int messageVersion() + { + return msgVersion; + } + + public static class Serializer implements IVersionedSerializer + { + @Override + public void serialize(AccordSerializerVersion t, DataOutputPlus out, int version) throws IOException + { + serialize(t, out); + } + + public void serialize(AccordSerializerVersion t, DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(t.version); + } + + @Override + public AccordSerializerVersion deserialize(DataInputPlus in, int version) throws IOException + { + return deserialize(in); + } + + public AccordSerializerVersion deserialize(DataInputPlus in) throws IOException + { + return fromVersion(in.readUnsignedVInt32()); + } + + @Override + public long serializedSize(AccordSerializerVersion t, int version) + { + return serializedSize(t); + } + + public long serializedSize(AccordSerializerVersion t) + { + return TypeSizes.sizeofUnsignedVInt(t.version); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java new file mode 100644 index 000000000000..43dc7c84f59d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; + +import org.apache.cassandra.cql3.terms.MultiElements; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.db.ArrayClustering; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ClusteringPrefix; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.DeserializationHelper; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.db.TypeSizes.sizeof; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; +import static org.apache.cassandra.db.marshal.CollectionType.Kind.LIST; +import static org.apache.cassandra.db.marshal.CollectionType.Kind.MAP; +import static org.apache.cassandra.db.marshal.CollectionType.Kind.SET; + +public class AccordSerializers +{ + public static ByteBuffer serialize(T item, IVersionedSerializer serializer) + { + int version = MessagingService.current_version; + long size = serializer.serializedSize(item, version) + sizeofUnsignedVInt(version); + try (DataOutputBuffer out = new DataOutputBuffer((int) size)) + { + out.writeUnsignedVInt32(version); + serializer.serialize(item, out, version); + return out.buffer(false); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public static ByteBuffer[] serialize(List items, IVersionedSerializer serializer) + { + ByteBuffer[] result = new ByteBuffer[items.size()]; + for (int i = 0, mi = items.size(); i < mi; i++) + result[i] = serialize(items.get(i), serializer); + return result; + } + + public static T deserialize(ByteBuffer bytes, IVersionedSerializer serializer) + { + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + int version = in.readUnsignedVInt32(); + return serializer.deserialize(in, version); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public static Term.Terminal deserializeCqlCollectionAsTerm(ByteBuffer buffer, AbstractType type) + { + CollectionType collectionType = (CollectionType) type; + + if (collectionType.kind == SET) + return MultiElements.Value.fromSerialized(buffer, (SetType) type); + else if (collectionType.kind == LIST) + return MultiElements.Value.fromSerialized(buffer, (ListType) type); + else if (collectionType.kind == MAP) + return MultiElements.Value.fromSerialized(buffer, (MapType) type); + + throw new UnsupportedOperationException("Unsupported collection type: " + type); + } + + public static final IVersionedSerializer partitionUpdateSerializer = new IVersionedSerializer() + { + @Override + public void serialize(PartitionUpdate upd, DataOutputPlus out, int version) throws IOException + { + PartitionUpdate.serializer.serialize(upd, out, version); + } + + @Override + public PartitionUpdate deserialize(DataInputPlus in, int version) throws IOException + { + return PartitionUpdate.serializer.deserialize(in, version, DeserializationHelper.Flag.FROM_REMOTE); + } + + @Override + public long serializedSize(PartitionUpdate upd, int version) + { + return PartitionUpdate.serializer.serializedSize(upd, version); + } + }; + + public static final IVersionedSerializer columnMetadataSerializer = new IVersionedSerializer() + { + @Override + public void serialize(ColumnMetadata column, DataOutputPlus out, int version) throws IOException + { + out.writeUTF(column.ksName); + out.writeUTF(column.cfName); + ByteBufferUtil.writeWithShortLength(column.name.bytes, out); + } + + @Override + public ColumnMetadata deserialize(DataInputPlus in, int version) throws IOException + { + String keyspace = in.readUTF(); + String table = in.readUTF(); + ByteBuffer name = ByteBufferUtil.readWithShortLength(in); + return Schema.instance.getColumnMetadata(keyspace, table, name); + } + + @Override + public long serializedSize(ColumnMetadata column, int version) + { + long size = 0; + size += sizeof(column.ksName); + size += sizeof(column.cfName); + size += ByteBufferUtil.serializedSizeWithShortLength(column.name.bytes); + return size; + } + }; + + public static final IVersionedSerializer tableMetadataSerializer = new IVersionedSerializer() + { + @Override + public void serialize(TableMetadata metadata, DataOutputPlus out, int version) throws IOException + { + metadata.id.serialize(out); + } + + @Override + public TableMetadata deserialize(DataInputPlus in, int version) throws IOException + { + return Schema.instance.getTableMetadata(TableId.deserialize(in)); + } + + @Override + public long serializedSize(TableMetadata metadata, int version) + { + return metadata.id.serializedSize(); + } + }; + + public static final IVersionedSerializer> clusteringSerializer = new IVersionedSerializer>() + { + @Override + public void serialize(Clustering clustering, DataOutputPlus out, int version) throws IOException + { + doSerialize(clustering, out); + } + + public void doSerialize(Clustering clustering, DataOutputPlus out) throws IOException + { + if (clustering.kind() == ClusteringPrefix.Kind.STATIC_CLUSTERING) + { + out.writeBoolean(true); + } + else + { + out.writeBoolean(false); + out.writeUnsignedVInt32(clustering.size()); + ValueAccessor accessor = clustering.accessor(); + for (int i = 0; i < clustering.size(); i++) + { + accessor.writeWithVIntLength(clustering.get(i), out); + } + } + } + + @Override + public Clustering deserialize(DataInputPlus in, int version) throws IOException + { + Clustering clustering; + if (in.readBoolean()) + { + clustering = Clustering.STATIC_CLUSTERING; + } + else + { + int numComponents = in.readUnsignedVInt32(); + byte[][] components = new byte[numComponents][]; + for (int ci = 0; ci < numComponents; ci++) + { + int componentLength = in.readUnsignedVInt32(); + components[ci] = new byte[componentLength]; + in.readFully(components[ci]); + } + clustering = new ArrayClustering(components); + } + return clustering; + } + + @Override + public long serializedSize(Clustering clustering, int version) + { + return computeSerializedSize(clustering); + } + + private long computeSerializedSize(Clustering clustering) + { + int size = sizeof(true) + sizeofUnsignedVInt(clustering.size()); + ValueAccessor accessor = clustering.accessor(); + for (int i = 0; i < clustering.size(); i++) + { + int valueSize = accessor.size(clustering.get(i)); + size += valueSize; + size += sizeofUnsignedVInt(valueSize); + } + return size; + } + }; +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java new file mode 100644 index 000000000000..c308afa2dee7 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Arrays; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Result; +import accord.coordinate.Preempted; +import accord.coordinate.Timeout; +import accord.impl.SimpleProgressLog; +import accord.impl.SizeOfIntersectionSorter; +import accord.local.Node; +import accord.local.ShardDistributor.EvenSplit; +import accord.messages.Request; +import accord.primitives.Txn; +import accord.topology.TopologyManager; +import org.apache.cassandra.concurrent.Shutdownable; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.metrics.AccordClientRequestMetrics; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; +import org.apache.cassandra.service.accord.api.AccordScheduler; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +import static org.apache.cassandra.config.DatabaseDescriptor.getConcurrentAccordOps; +import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +public class AccordService implements IAccordService, Shutdownable +{ + public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); + public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); + + private final Node node; + private final Shutdownable nodeShutdown; + private final AccordMessageSink messageSink; + private final AccordConfigurationService configService; + private final AccordScheduler scheduler; + private final AccordVerbHandler verbHandler; + + private static final IAccordService NOOP_SERVICE = new IAccordService() + { + @Override + public IVerbHandler verbHandler() + { + return null; + } + + @Override + public void createEpochFromConfigUnsafe() { } + + @Override + public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) + { + throw new UnsupportedOperationException("No accord transaction should be executed when accord_transactions_enabled = false in cassandra.yaml"); + } + + @Override + public long currentEpoch() + { + throw new UnsupportedOperationException("Cannot return epoch when accord_transactions_enabled = false in cassandra.yaml"); + } + + @Override + public void setCacheSize(long kb) { } + + @Override + public TopologyManager topology() + { + throw new UnsupportedOperationException("Cannot return topology when accord_transactions_enabled = false in cassandra.yaml"); + } + + @Override + public void shutdownAndWait(long timeout, TimeUnit unit) { } + }; + + private static class Handle + { + public static final AccordService instance = new AccordService(); + } + + public static IAccordService instance() + { + return DatabaseDescriptor.getAccordTransactionsEnabled() ? Handle.instance : NOOP_SERVICE; + } + + public static long uniqueNow() + { + return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + } + + private AccordService() + { + Node.Id localId = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + this.messageSink = new AccordMessageSink(); + this.configService = new AccordConfigurationService(localId); + this.scheduler = new AccordScheduler(); + this.node = new Node(localId, + messageSink, + configService, + AccordService::uniqueNow, + () -> null, + new KeyspaceSplitter(new EvenSplit<>(getConcurrentAccordOps(), getPartitioner().accordSplitter())), + new AccordAgent(), + new Random(), + scheduler, + SizeOfIntersectionSorter.SUPPLIER, + SimpleProgressLog::new, + AccordCommandStores::new); + this.nodeShutdown = toShutdownable(node); + this.verbHandler = new AccordVerbHandler<>(this.node); + } + + @Override + public IVerbHandler verbHandler() + { + return verbHandler; + } + + @Override + @VisibleForTesting + public void createEpochFromConfigUnsafe() + { + configService.createEpochFromConfig(); + } + + public static long nowInMicros() + { + return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + } + + @Override + public long currentEpoch() + { + return configService.currentEpoch(); + } + + @Override + public TopologyManager topology() + { + return node.topology(); + } + + /** + * Consistency level is just echoed back in timeouts, in the future it may be used for interoperability + * with non-Accord operations. + */ + @Override + public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) + { + AccordClientRequestMetrics metrics = txn.isWrite() ? writeMetrics : readMetrics; + final long startNanos = nanoTime(); + try + { + metrics.keySize.update(txn.keys().size()); + Future future = node.coordinate(txn); + Result result = future.get(DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + return (TxnData) result; + } + catch (ExecutionException e) + { + Throwable cause = e.getCause(); + if (cause instanceof Timeout) + { + metrics.timeouts.mark(); + throw throwTimeout(txn, consistencyLevel); + } + if (cause instanceof Preempted) + { + metrics.preempts.mark(); + //TODO need to improve + // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. + // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match + throw throwTimeout(txn, consistencyLevel); + } + metrics.failures.mark(); + throw new RuntimeException(cause); + } + catch (InterruptedException e) + { + metrics.failures.mark(); + throw new UncheckedInterruptedException(e); + } + catch (TimeoutException e) + { + metrics.timeouts.mark(); + throw throwTimeout(txn, consistencyLevel); + } + finally + { + metrics.addNano(nanoTime() - startNanos); + } + } + + private static RuntimeException throwTimeout(Txn txn, ConsistencyLevel consistencyLevel) + { + throw txn.isWrite() ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0) + : new ReadTimeoutException(consistencyLevel, 0, 0, false); + } + + @VisibleForTesting + AccordMessageSink messageSink() + { + return messageSink; + } + + @Override + public void setCacheSize(long kb) + { + long bytes = kb << 10; + AccordCommandStores commandStores = (AccordCommandStores) node.commandStores(); + commandStores.setCacheSize(bytes); + } + + @Override + public boolean isTerminated() + { + return scheduler.isTerminated(); + } + + @Override + public void shutdown() + { + ExecutorUtils.shutdown(Arrays.asList(scheduler, nodeShutdown)); + } + + @Override + public Object shutdownNow() + { + ExecutorUtils.shutdownNow(Arrays.asList(scheduler, nodeShutdown)); + return null; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException + { + try + { + ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(scheduler, nodeShutdown)); + return true; + } + catch (TimeoutException e) + { + return false; + } + } + + @VisibleForTesting + @Override + public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException + { + ExecutorUtils.shutdownAndWait(timeout, unit, this); + } + + private static Shutdownable toShutdownable(Node node) + { + return new Shutdownable() { + private volatile boolean isShutdown = false; + + @Override + public boolean isTerminated() + { + // we don't know about terminiated... so settle for shutdown! + return isShutdown; + } + + @Override + public void shutdown() + { + isShutdown = true; + node.shutdown(); + } + + @Override + public Object shutdownNow() + { + // node doesn't offer shutdownNow + shutdown(); + return null; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) + { + // node doesn't offer + return true; + } + }; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordState.java b/src/java/org/apache/cassandra/service/accord/AccordState.java new file mode 100644 index 000000000000..2f5a9dc68c93 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordState.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.function.BiConsumer; +import java.util.function.Function; + +import org.apache.cassandra.service.accord.store.StoredNavigableMap; +import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.utils.concurrent.Future; + +public interface AccordState +{ + enum ReadWrite { FULL, WRITE_ONLY, READ_ONLY } + + K key(); + + boolean hasModifications(); + + void clearModifiedFlag(); + + boolean isEmpty(); + + boolean isLoaded(); + + long estimatedSizeOnHeap(); + + default ReadWrite rw() + { + return ReadWrite.FULL; + } + + default boolean isFullInstance() + { + return rw() == ReadWrite.FULL; + } + + default boolean isWriteOnlyInstance() + { + return rw() == ReadWrite.WRITE_ONLY; + } + + default boolean isReadOnlyInstance() + { + return rw() == ReadWrite.READ_ONLY; + } + + interface WriteOnly> extends AccordState + { + @Override + default ReadWrite rw() + { + return ReadWrite.WRITE_ONLY; + } + + void future(Future future); + + Future future(); + + /** + * Apply the write only changes to the full instance + */ + void applyChanges(V instance); + + static , V> void applyMapChanges(T from, T to, Function> getMap) + { + StoredNavigableMap fromMap = getMap.apply(from); + + if (!fromMap.hasModifications()) + return; + + StoredNavigableMap toMap = getMap.apply(to); + fromMap.forEachAddition(toMap::blindPut); + fromMap.forEachDeletion((BiConsumer) toMap::blindRemove); + } + + static > void applySetChanges(T from, T to, Function> getSet) + { + StoredSet fromSet = getSet.apply(from); + + if (!fromSet.hasModifications()) + return; + + StoredSet toSet = getSet.apply(to); + fromSet.forEachAddition(toSet::blindAdd); + fromSet.forEachDeletion(toSet::blindRemove); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java new file mode 100644 index 000000000000..37e7ba17cf72 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -0,0 +1,647 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Data; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + +/** + * Cache for AccordCommand and AccordCommandsForKey, available memory is shared between the two object types. + * + * Supports dynamic object sizes. After each acquire/free cycle, the cacheable objects size is recomputed to + * account for data added/removed during txn processing if it's modified flag is set + * + * TODO: explain how items move to and from the active pool and are evicted + */ +public class AccordStateCache +{ + private static final Logger logger = LoggerFactory.getLogger(AccordStateCache.class); + + private static class WriteOnlyGroup> + { + private boolean locked = false; + private List> items = new ArrayList<>(); + + @Override + public String toString() + { + return "WriteOnlyGroup{" + + "locked=" + locked + + ", items=" + items + + '}'; + } + + void lock() + { + locked = true; + } + + void add(AccordState.WriteOnly item) + { + items.add(item); + } + + void purge() + { + if (locked) + return; + + while (!items.isEmpty()) + { + AccordState.WriteOnly item = items.get(0); + + // we can't remove items out of order, so if we encounter a write is still pending, we stop + if (item.future() == null || !item.future().isDone()) + break; + + items.remove(0); + } + } + + boolean isEmpty() + { + return items.isEmpty(); + } + } + + static class Node> + { + static final long EMPTY_SIZE = ObjectSizes.measure(new AccordStateCache.Node<>(null)); + + final V value; + private Node prev; + private Node next; + private int references = 0; + private long lastQueriedEstimatedSizeOnHeap = 0; + + Node(V value) + { + this.value = value; + } + + long estimatedSizeOnHeap() + { + long result = EMPTY_SIZE + value.estimatedSizeOnHeap(); + lastQueriedEstimatedSizeOnHeap = result; + return result; + } + + long estimatedSizeOnHeapDelta() + { + long prevSize = lastQueriedEstimatedSizeOnHeap; + return estimatedSizeOnHeap() - prevSize; + } + + K key() + { + return value.key(); + } + } + + static class Stats + { + private long queries; + private long hits; + private long misses; + } + + private static class NamedMap extends HashMap + { + final String name; + + public NamedMap(String name) + { + this.name = name; + } + } + + public final Map> active = new HashMap<>(); + private final Map> cache = new HashMap<>(); + private final Map> pendingWriteOnly = new HashMap<>(); + private final Set> instances = new HashSet<>(); + + private final NamedMap> loadFutures = new NamedMap<>("loadFutures"); + private final NamedMap> saveFutures = new NamedMap<>("saveFutures"); + + private final NamedMap> readFutures = new NamedMap<>("readFutures"); + private final NamedMap> writeFutures = new NamedMap<>("writeFutures"); + + Node head; + Node tail; + private long maxSizeInBytes; + private long bytesCached = 0; + private final Stats stats = new Stats(); + + public AccordStateCache(long maxSizeInBytes) + { + this.maxSizeInBytes = maxSizeInBytes; + } + + public void setMaxSize(long size) + { + maxSizeInBytes = size; + maybeEvict(); + } + + private void unlink(Node node) + { + Node prev = node.prev; + Node next = node.next; + + if (prev == null) + { + Preconditions.checkState(head == node, "previous is null but the head isnt the provided node!"); + head = next; + } + else + { + prev.next = next; + } + + if (next == null) + { + Preconditions.checkState(tail == node, "next is null but the tail isnt the provided node!"); + tail = prev; + } + else + { + next.prev = prev; + } + + node.prev = null; + node.next = null; + } + + private void push(Node node) + { + if (head != null) + { + node.prev = null; + node.next = head; + head.prev = node; + head = node; + } + else + { + head = node; + tail = node; + } + } + + private void updateSize(Node node) + { + bytesCached += node.estimatedSizeOnHeapDelta(); + } + + // don't evict if there's an outstanding save future. If an item is evicted then reloaded + // before it's mutation is applied, out of date info will be loaded + private boolean canEvict(Object key) + { + // getFuture only returns a future if it is running, so don't need to check if its still running + Future future = getFuture(saveFutures, key); + return future == null; + } + + private void maybeEvict() + { + if (bytesCached <= maxSizeInBytes) + return; + + Node current = tail; + while (current != null && bytesCached > maxSizeInBytes) + { + Node evict = current; + current = current.prev; + + // if there are any dangling write only groups, apply them and + // move their futures into write futures so we don't evict + applyAndRemoveWriteOnlyGroup(evict.value); + if (!canEvict(evict.key())) + continue; + + logger.trace("Evicting {} {}", evict.value.getClass().getSimpleName(), evict.key()); + unlink(evict); + cache.remove(evict.key()); + bytesCached -= evict.estimatedSizeOnHeap(); + } + } + + private static > F getFuture(NamedMap futuresMap, K key) + { + F r = futuresMap.get(key); + if (r == null) + return null; + + if (!r.isDone()) + return r; + + if (logger.isTraceEnabled()) + logger.trace("Clearing future for {} from {}: {}", key, futuresMap.name, r); + futuresMap.remove(key); + return null; + } + + private static > void setFuture(Map futuresMap, K key, F future) + { + Preconditions.checkState(!futuresMap.containsKey(key)); + futuresMap.put(key, future); + } + + private static void mergeFuture(Map> futuresMap, K key, Future future) + { + Future existing = futuresMap.get(key); + if (existing != null && !existing.isDone()) + { + logger.trace("Merging future {} with existing {}", future, existing); + future = FutureCombiner.allOf(ImmutableList.of(existing, future)); + } + + futuresMap.put(key, future); + } + + private void maybeClearFuture(K key) + { + // will clear if it's done + getFuture(loadFutures, key); + getFuture(saveFutures, key); + getFuture(readFutures, key); + getFuture(writeFutures, key); + } + + public > void applyAndRemoveWriteOnlyGroup(V instance) + { + WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.remove(instance.key()); + if (group == null) + return; + + logger.trace("Applying and removing write only group for {} ({})", instance.key(), group); + for (AccordState.WriteOnly writeOnly : group.items) + { + writeOnly.applyChanges(instance); + if (!writeOnly.future().isDone()) + mergeFuture(saveFutures, instance.key(), writeOnly.future()); + } + } + + public class Instance> + { + private final Class keyClass; + private final Class valClass; + private final Function factory; + private final Stats stats = new Stats(); + + public Instance(Class keyClass, Class valClass, Function factory) + { + this.keyClass = keyClass; + this.valClass = valClass; + this.factory = factory; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Instance instance = (Instance) o; + return keyClass.equals(instance.keyClass) && valClass.equals(instance.valClass); + } + + @Override + public int hashCode() + { + return Objects.hash(keyClass, valClass); + } + + private V getOrCreate(K key, boolean createIfAbsent) + { + stats.queries++; + AccordStateCache.this.stats.queries++; + + Node node = (Node) active.get(key); + if (node != null) + { + stats.hits++; + AccordStateCache.this.stats.hits++; + node.references++; + return node.value; + } + + node = (Node) cache.remove(key); + + if (node == null) + { + stats.misses++; + AccordStateCache.this.stats.misses++; + if (!createIfAbsent) + return null; + V value = factory.apply(key); + node = new Node<>(value); + updateSize(node); + } + else + { + stats.hits++; + AccordStateCache.this.stats.hits++; + unlink(node); + } + + Preconditions.checkState(node.references == 0); + maybeEvict(); + + node.references++; + active.put(key, node); + + return node.value; + } + + public V getOrCreate(K key) + { + return getOrCreate(key, true); + } + + public V getOrNull(K key) + { + return getOrCreate(key, false); + } + + public void release(V value) + { + K key = value.key(); + logger.trace("Releasing resources for {}: {}", key, value); + maybeClearFuture(key); + Node node = (Node) active.get(key); + Preconditions.checkState(node != null && node.references > 0); + Preconditions.checkState(node.value == value); + if (--node.references == 0) + { + logger.trace("Moving {} from active pool to cache", key); + active.remove(key); + cache.put(key, node); + push(node); + } + + if (value.hasModifications()) + { + value.clearModifiedFlag(); + updateSize(node); + } + maybeEvict(); + } + + @VisibleForTesting + boolean canEvict(K key) + { + return AccordStateCache.this.canEvict(key); + } + + @VisibleForTesting + boolean writeOnlyGroupIsLocked(K key) + { + WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); + return group != null && group.locked; + } + + @VisibleForTesting + int pendingWriteOnlyOperations(K key) + { + WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); + return group != null ? group.items.size() : 0; + } + + public void lockWriteOnlyGroupIfExists(K key) + { + WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); + if (group == null) + return; + + logger.trace("Locking write only group for {} ({})", key, group); + group.purge(); + if (!group.isEmpty()) + group.lock(); + } + + public void applyAndRemoveWriteOnlyGroup(V instance) + { + AccordStateCache.this.applyAndRemoveWriteOnlyGroup(instance); + } + + public void addWriteOnly(AccordState.WriteOnly writeOnly) + { + K key = writeOnly.key(); + Preconditions.checkArgument(writeOnly.future() != null); + WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.computeIfAbsent(key, k -> new WriteOnlyGroup<>()); + + // if a load future exists for the key we're creating a write group for, we need to lock + // the group so the loading instance gets changes applied when it finishes loading + if (getLoadFuture(key) != null) + group.lock(); + + group.add(writeOnly); + } + + public void purgeWriteOnly(K key) + { + WriteOnlyGroup items = pendingWriteOnly.get(key); + if (items == null) + return; + + items.purge(); + if (items.isEmpty()) + pendingWriteOnly.remove(key); + } + + public boolean writeOnlyGroupExists(K key) + { + return pendingWriteOnly.get(key) != null; + } + + public int getWriteOnlyGroupSize(K key) + { + WriteOnlyGroup group = pendingWriteOnly.get(key); + return group != null ? group.items.size() : 0; + } + + public Future getLoadFuture(K key) + { + return getFuture(loadFutures, key); + } + + public void cleanupLoadFuture(K key) + { + getLoadFuture(key); + } + + @VisibleForTesting + public boolean hasLoadFuture(K key) + { + return loadFutures.get(key) != null; + } + + public void setLoadFuture(K key, Future future) + { + setFuture(loadFutures, key, future); + } + + public Future getSaveFuture(K key) + { + return getFuture(saveFutures, key); + } + + public void addSaveFuture(K key, Future future) + { + logger.trace("Adding save future for {}: {}", key, future); + mergeFuture(saveFutures, key, future); + } + + public void cleanupSaveFuture(K key) + { + getSaveFuture(key); + } + + @VisibleForTesting + public boolean hasSaveFuture(K key) + { + return saveFutures.get(key) != null; + } + + public Future getReadFuture(K key) + { + return getFuture(readFutures, key); + } + + public void setReadFuture(K key, Future future) + { + setFuture(readFutures, key, future); + } + + public void cleanupReadFuture(K key) + { + getReadFuture(key); + } + + public Future getWriteFuture(K key) + { + return (Future) getFuture(writeFutures, key); + } + + public void setWriteFuture(K key, Future future) + { + setFuture(writeFutures, key, future); + } + + public void cleanupWriteFuture(K key) + { + getWriteFuture(key); + } + + public long cacheQueries() + { + return stats.queries; + } + + public long cacheHits() + { + return stats.hits; + } + + public long cacheMisses() + { + return stats.misses; + } + } + + public > Instance instance(Class keyClass, Class valClass, Function factory) + { + Instance instance = new Instance<>(keyClass, valClass, factory); + if (!instances.add(instance)) + throw new IllegalArgumentException(String.format("Cache instances for types %s -> %s already exists", + keyClass.getName(), valClass.getName())); + return instance; + } + + @VisibleForTesting + int numActiveEntries() + { + return active.size(); + } + + @VisibleForTesting + int numCachedEntries() + { + return cache.size(); + } + + @VisibleForTesting + long bytesCached() + { + return bytesCached; + } + + @VisibleForTesting + boolean keyIsActive(Object key) + { + return active.containsKey(key); + } + + @VisibleForTesting + boolean keyIsCached(Object key) + { + return cache.containsKey(key); + } + + @VisibleForTesting + int references(Object key) + { + Node node = active.get(key); + return node != null ? node.references : 0; + } + + public long cacheQueries() + { + return stats.queries; + } + + public long cacheHits() + { + return stats.hits; + } + + public long cacheMisses() + { + return stats.misses; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java new file mode 100644 index 000000000000..a4bd5d028c3e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.stream.Collectors; + +import accord.topology.Shard; +import accord.topology.Topology; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; + +public class AccordTopologyUtils +{ + private static Shard createShard(TokenRange range, EndpointsForToken natural, EndpointsForToken pending) + { + return new Shard(range, + natural.stream().map(EndpointMapping::getId).collect(Collectors.toList()), + natural.stream().map(EndpointMapping::getId).collect(Collectors.toSet()), + pending.stream().map(EndpointMapping::getId).collect(Collectors.toSet())); + } + + private static TokenRange minRange(String keyspace, Token token) + { + return new TokenRange(SentinelKey.min(keyspace), new TokenKey(keyspace, token)); + } + + private static TokenRange maxRange(String keyspace, Token token) + { + return new TokenRange(new TokenKey(keyspace, token), SentinelKey.max(keyspace)); + } + + private static TokenRange range(String keyspace, Token left, Token right) + { + return new TokenRange(new TokenKey(keyspace, left), new TokenKey(keyspace, right)); + } + +// private static List createShards(String keyspace, TokenMetadata tokenMetadata) +// { +// AbstractReplicationStrategy replication = Keyspace.open(keyspace).getReplicationStrategy(); +// Set tokenSet = new HashSet<>(tokenMetadata.sortedTokens()); +// tokenSet.addAll(tokenMetadata.getBootstrapTokens().keySet()); +// tokenMetadata.getMovingEndpoints().forEach(p -> tokenSet.add(p.left)); +// List tokens = new ArrayList<>(tokenSet); +// tokens.sort(Comparator.naturalOrder()); +// +// List shards = new ArrayList<>(tokens.size() + 1); +// Shard finalShard = null; +// for (int i=0, mi=tokens.size(); i keyspaces = new ArrayList<>(Schema.instance.distributedKeyspaces().names()); +// keyspaces.sort(String::compareTo); +// +// List shards = new ArrayList<>(); +// for (String keyspace : keyspaces) +// shards.addAll(createShards(keyspace, tokenMetadata)); +// +// return new Topology(epoch, shards.toArray(new Shard[0])); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java new file mode 100644 index 000000000000..216d64df42e1 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Node; +import accord.messages.Request; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; + +public class AccordVerbHandler implements IVerbHandler +{ + private static final Logger logger = LoggerFactory.getLogger(AccordVerbHandler.class); + + private final Node node; + + public AccordVerbHandler(Node node) + { + this.node = node; + } + + @Override + public void doVerb(Message message) throws IOException + { + logger.debug("Receiving {} from {}", message.payload, message.from()); + message.payload.process(node, EndpointMapping.getId(message.from()), message); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java new file mode 100644 index 000000000000..a863ca7410ac --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.Inet4Address; +import java.net.InetAddress; +import java.net.UnknownHostException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableMap; +import com.google.common.primitives.Ints; + +import accord.local.Node; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; + +public class EndpointMapping +{ + static Node.Id endpointToId(InetAddressAndPort endpoint) + { + Preconditions.checkArgument(endpoint.getAddress() instanceof Inet4Address); + Inet4Address address = (Inet4Address) endpoint.getAddress(); + int id = Ints.fromByteArray(address.getAddress()); + return new Node.Id(id); + } + + static InetAddressAndPort idToEndpoint(Node.Id node) + { + byte[] bytes = Ints.toByteArray(node.id); + try + { + return InetAddressAndPort.getByAddress(InetAddress.getByAddress(bytes)); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + // TODO: Remove this if its one usage in AccordConfigurationService is removed. + public static ImmutableCollection knownIds() + { + return mapping.endpointToId.values(); + } + + private static class Mapping + { + private static final Mapping EMPTY = new Mapping(ImmutableMap.of(), ImmutableMap.of()); + final ImmutableMap idToEndpoint; + final ImmutableMap endpointToId; + + public Mapping(ImmutableMap idToEndpoint, + ImmutableMap endpointToId) + { + this.idToEndpoint = idToEndpoint; + this.endpointToId = endpointToId; + } + + private static ImmutableMap put(ImmutableMap current, K key, V val) + { + return ImmutableMap.builderWithExpectedSize(current.size() + 1).putAll(current).put(key, val).build(); + } + + public Mapping add(InetAddressAndPort endpoint) + { + if (endpointToId.containsKey(endpoint)) + return this; + Node.Id id = endpointToId(endpoint); + return new Mapping(put(idToEndpoint, id, endpoint), put(endpointToId, endpoint, id)); + } + + public Mapping add(Node.Id id) + { + if (idToEndpoint.containsKey(id)) + return this; + + InetAddressAndPort endpoint = idToEndpoint(id); + return new Mapping(put(idToEndpoint, id, endpoint), put(endpointToId, endpoint, id)); + } + } + + private static volatile Mapping mapping = Mapping.EMPTY; + + private EndpointMapping() {} + + public static Node.Id getId(InetAddressAndPort endpoint) + { + Node.Id id = mapping.endpointToId.get(endpoint); + if (id == null) + { + synchronized (EndpointMapping.class) + { + mapping = mapping.add(endpoint); + id = mapping.endpointToId.get(endpoint); + } + } + return id; + } + + // FIXME: put this stuff into the configuration service, where it will eventually live + public static Node.Id getId(Replica replica) + { + return getId(replica.endpoint()); + } + + public static InetAddressAndPort getEndpoint(Node.Id id) + { + InetAddressAndPort endpoint = mapping.idToEndpoint.get(id); + if (endpoint == null) + { + synchronized (EndpointMapping.class) + { + mapping = mapping.add(id); + endpoint = mapping.idToEndpoint.get(id); + } + } + return endpoint; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java new file mode 100644 index 000000000000..7a01fc52ab38 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.messages.Request; +import accord.primitives.Txn; +import accord.topology.TopologyManager; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.service.accord.txn.TxnData; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +public interface IAccordService +{ + IVerbHandler verbHandler(); + + void createEpochFromConfigUnsafe(); + + TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel); + + long currentEpoch(); + + void setCacheSize(long kb); + + TopologyManager topology(); + + void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException; +} diff --git a/src/java/org/apache/cassandra/service/accord/ListenerProxy.java b/src/java/org/apache/cassandra/service/accord/ListenerProxy.java new file mode 100644 index 000000000000..ea7a74c0c658 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/ListenerProxy.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; + +import com.google.common.collect.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Command; +import accord.local.CommandListener; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.primitives.Keys; +import accord.primitives.TxnId; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.async.AsyncContext; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.utils.ObjectSizes; + +public abstract class ListenerProxy implements CommandListener, Comparable +{ + private static final Logger logger = LoggerFactory.getLogger(ListenerProxy.class); + + public enum Kind { COMMAND, COMMANDS_FOR_KEY } + + public abstract Kind kind(); + public abstract ByteBuffer identifier(); + + private ListenerProxy() + { + } + + @Override + public int compareTo(ListenerProxy that) + { + return kind().compareTo(that.kind()); + } + + protected abstract long estimatedSizeOnHeap(); + + static class CommandListenerProxy extends ListenerProxy + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new CommandListenerProxy(null)); + private final TxnId txnId; + + public CommandListenerProxy(TxnId txnId) + { + this.txnId = txnId; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CommandListenerProxy that = (CommandListenerProxy) o; + return txnId.equals(that.txnId); + } + + @Override + public int hashCode() + { + return Objects.hash(txnId); + } + + @Override + public int compareTo(ListenerProxy that) + { + int cmp = super.compareTo(that); + if (cmp != 0) + return cmp; + + return this.txnId.compareTo(((CommandListenerProxy) that).txnId); + } + + @Override + public String toString() + { + return "CommandListenerProxy{" + + "txnId=" + txnId + + '}'; + } + + @Override + public PreLoadContext listenerPreLoadContext(TxnId caller) + { + throw new UnsupportedOperationException(); + } + + @Override + public Kind kind() + { + return Kind.COMMAND; + } + + @Override + public ByteBuffer identifier() + { + ByteBuffer bytes = ByteBuffer.allocate(1 + CommandSerializers.txnId.serializedSize()); + ByteBufferAccessor.instance.putByte(bytes, 0, (byte) kind().ordinal()); + CommandSerializers.txnId.serialize(txnId, bytes, ByteBufferAccessor.instance, 1); + return bytes; + } + + @Override + public void onChange(SafeCommandStore safeStore, Command c) + { + AccordCommand command = (AccordCommand) c; + SafeAccordCommandStore commandStore = (SafeAccordCommandStore) safeStore; + AsyncContext context = commandStore.context(); + PreLoadContext loadCtx = PreLoadContext.contextFor(ImmutableList.of(command.txnId(), txnId), Keys.EMPTY); + if (context.containsScopedItems(loadCtx)) + { + // TODO (soon): determine if this can break anything by not waiting for the current operation to denormalize it's data + // the summary loader may default to commands in context, in case it wouldn't + logger.trace("{}: synchronously updating listening command {}", c.txnId(), txnId); + commandStore.command(txnId).onChange(safeStore, c); + } + else + { + TxnId callingTxnId = command.txnId(); + logger.trace("{}: asynchronously updating listening command {}", c.txnId(), txnId); + commandStore.execute(loadCtx, reSafeStore -> { + Command caller = reSafeStore.command(callingTxnId); + commandStore.command(txnId).onChange(reSafeStore, caller); + }); + } + } + + @Override + protected long estimatedSizeOnHeap() + { + return EMPTY_SIZE + AccordObjectSizes.timestamp(txnId); + } + } + + /** + * These always need to be run in either the same task as the notifying command, or immediately afterwards, otherwise we + * may use stale max timestamps for preaccept + */ + static class CommandsForKeyListenerProxy extends ListenerProxy + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new CommandsForKeyListenerProxy(null)); + private final PartitionKey key; + + public CommandsForKeyListenerProxy(PartitionKey key) + { + this.key = key; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CommandsForKeyListenerProxy that = (CommandsForKeyListenerProxy) o; + return key.equals(that.key); + } + + @Override + public int hashCode() + { + return Objects.hash(key); + } + + @Override + public int compareTo(ListenerProxy that) + { + int cmp = super.compareTo(that); + if (cmp != 0) + return cmp; + + return this.key.compareTo(((CommandsForKeyListenerProxy) that).key); + } + + @Override + public String toString() + { + return "CommandsForKeyListenerProxy{" + + "key=" + key + + '}'; + } + + @Override + public PreLoadContext listenerPreLoadContext(TxnId caller) + { + throw new UnsupportedOperationException(); + } + + @Override + public Kind kind() + { + return Kind.COMMANDS_FOR_KEY; + } + + @Override + public ByteBuffer identifier() + { + ByteBuffer bytes = ByteBuffer.allocate((int) (1 + PartitionKey.serializer.serializedSize(key))); + ByteBufferAccessor.instance.putByte(bytes, 0, (byte) kind().ordinal()); + PartitionKey.serializer.serialize(key, bytes, ByteBufferAccessor.instance, 1); + return bytes; + } + + @Override + public void onChange(SafeCommandStore safeStore, Command c) + { + AccordCommand command = (AccordCommand) c; + SafeAccordCommandStore commandStore = (SafeAccordCommandStore) safeStore; + AsyncContext context = commandStore.context(); + PreLoadContext loadCtx = PreLoadContext.contextFor(ImmutableList.of(command.txnId()), Keys.of(key)); + if (context.containsScopedItems(loadCtx)) + { + logger.trace("{}: synchronously updating listening cfk {}", c.txnId(), key); + commandStore.commandsForKey(key).onChange(safeStore, c); + } + else + { + TxnId callingTxnId = command.txnId(); + logger.trace("{}: asynchronously updating listening cfk {}", c.txnId(), key); + commandStore.execute(loadCtx, reSafeStore -> { + Command caller = reSafeStore.command(callingTxnId); + commandStore.commandsForKey(key).onChange(reSafeStore, caller); + }); + } + } + + @Override + protected long estimatedSizeOnHeap() + { + return EMPTY_SIZE + key.estimatedSizeOnHeap(); + } + } + + public static ListenerProxy deserialize(V src, ValueAccessor accessor, int offset) throws IOException + { + int ordinal = accessor.getByte(src, offset); + Kind kind = Kind.values()[ordinal]; + offset += 1; + switch (kind) + { + case COMMAND: + TxnId txnId = CommandSerializers.txnId.deserialize(src, accessor, offset); + return new CommandListenerProxy(txnId); + case COMMANDS_FOR_KEY: + PartitionKey key = PartitionKey.serializer.deserialize(src, accessor, offset); + return new CommandsForKeyListenerProxy(key); + default: + throw new IOException("Unknown kind ordinal " + ordinal); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/ReadFuture.java b/src/java/org/apache/cassandra/service/accord/ReadFuture.java new file mode 100644 index 000000000000..f28f4856b997 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/ReadFuture.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; + +import com.google.common.util.concurrent.FutureCallback; + +import accord.api.Data; +import io.netty.util.concurrent.GenericFutureListener; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +public class ReadFuture implements Future +{ + private final Future wrappped; + + public ReadFuture(Future wrappped) + { + this.wrappped = wrappped; + } + + @Override + public Future await() throws InterruptedException + { + return wrappped.await(); + } + + @Override + public Future awaitUninterruptibly() + { + return wrappped.awaitUninterruptibly(); + } + + @Override + public Future awaitThrowUncheckedOnInterrupt() + { + return wrappped.awaitThrowUncheckedOnInterrupt(); + } + + @Override + public void rethrowIfFailed() + { + wrappped.rethrowIfFailed(); + } + + @Override + public Future sync() throws InterruptedException + { + return wrappped.sync(); + } + + @Override + public Future syncUninterruptibly() + { + return wrappped.syncUninterruptibly(); + } + + @Override + public Future syncThrowUncheckedOnInterrupt() + { + return wrappped.syncThrowUncheckedOnInterrupt(); + } + + @Override + @Deprecated(since = "5.1", forRemoval = true) + public boolean await(long l) throws InterruptedException + { + return wrappped.await(l); + } + + @Override + @Deprecated(since = "5.1", forRemoval = true) + public boolean awaitUninterruptibly(long l) + { + return wrappped.awaitUninterruptibly(l); + } + + @Override + public Future addCallback(BiConsumer callback) + { + return wrappped.addCallback(callback); + } + + @Override + public Future addCallback(BiConsumer callback, Executor executor) + { + return wrappped.addCallback(callback, executor); + } + + @Override + public Future addCallback(FutureCallback callback) + { + return wrappped.addCallback(callback); + } + + @Override + public Future addCallback(FutureCallback callback, Executor executor) + { + return wrappped.addCallback(callback, executor); + } + + @Override + public Future addCallback(Consumer onSuccess, Consumer onFailure) + { + return wrappped.addCallback(onSuccess, onFailure); + } + + @Override + public Future addCallback(Consumer onSuccess, Consumer onFailure, Executor executor) + { + return wrappped.addCallback(onSuccess, onFailure, executor); + } + + @Override + public Future map(Function mapper) + { + return wrappped.map(mapper); + } + + @Override + public Future map(Function mapper, Executor executor) + { + return wrappped.map(mapper, executor); + } + + @Override + public Future flatMap(Function> flatMapper) + { + return wrappped.flatMap(flatMapper); + } + + @Override + public Future flatMap(Function> flatMapper, Executor executor) + { + return wrappped.flatMap(flatMapper, executor); + } + + @Override + public Future andThenAsync(Function> andThen) + { + throw new UnsupportedOperationException("git rebase: this goes away once AsyncChain comes in"); + } + + @Override + public Future andThenAsync(Function> andThen, Executor executor) + { + throw new UnsupportedOperationException("git rebase: this goes away once AsyncChain comes in"); + } + + @Override + public void addListener(Runnable runnable, Executor executor) + { + wrappped.addListener(runnable, executor); + } + + @Override + public void addListener(Runnable runnable) + { + wrappped.addListener(runnable); + } + + @Override + public Executor notifyExecutor() + { + return wrappped.notifyExecutor(); + } + + @Override + public Future addListener(GenericFutureListener> genericFutureListener) + { + return wrappped.addListener(genericFutureListener); + } + + @Override + public Future addListeners(GenericFutureListener>... genericFutureListeners) + { + return wrappped.addListeners(genericFutureListeners); + } + + @Override + public Future removeListener(GenericFutureListener> genericFutureListener) + { + return wrappped.removeListener(genericFutureListener); + } + + @Override + public Future removeListeners(GenericFutureListener>... genericFutureListeners) + { + return wrappped.removeListeners(genericFutureListeners); + } + + @Override + public boolean isSuccess() + { + return wrappped.isSuccess(); + } + + @Override + public boolean isCancellable() + { + return wrappped.isCancellable(); + } + + @Override + public Throwable cause() + { + return wrappped.cause(); + } + + @Override + public boolean await(long timeout, TimeUnit unit) throws InterruptedException + { + return wrappped.await(timeout, unit); + } + + @Override + public boolean awaitUninterruptibly(long timeout, TimeUnit unit) + { + return wrappped.awaitUninterruptibly(timeout, unit); + } + + @Override + public Data getNow() + { + return wrappped.getNow(); + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) + { + return wrappped.cancel(mayInterruptIfRunning); + } + + @Override + public boolean isCancelled() + { + return wrappped.isCancelled(); + } + + @Override + public boolean isDone() + { + return wrappped.isDone(); + } + + @Override + public Data get() throws InterruptedException, ExecutionException + { + return wrappped.get(); + } + + @Override + public Data get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException + { + return wrappped.get(timeout, unit); + } + + @Override + public boolean awaitUntil(long nanoTimeDeadline) throws InterruptedException + { + return wrappped.awaitUntil(nanoTimeDeadline); + } + + @Override + public boolean awaitUntilThrowUncheckedOnInterrupt(long nanoTimeDeadline) throws UncheckedInterruptedException + { + return wrappped.awaitUntilThrowUncheckedOnInterrupt(nanoTimeDeadline); + } + + @Override + public boolean awaitUntilUninterruptibly(long nanoTimeDeadline) + { + return wrappped.awaitUntilUninterruptibly(nanoTimeDeadline); + } + + @Override + public boolean awaitThrowUncheckedOnInterrupt(long time, TimeUnit units) throws UncheckedInterruptedException + { + return wrappped.awaitThrowUncheckedOnInterrupt(time, units); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java new file mode 100644 index 000000000000..a0b6f67cccb3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; + +import accord.api.RoutingKey; +import accord.primitives.Range; +import accord.primitives.Ranges; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; + +public class TokenRange extends Range.EndInclusive +{ + public TokenRange(AccordRoutingKey start, AccordRoutingKey end) + { + super(start, end); + } + + public static TokenRange fullRange(String keyspace) + { + return new TokenRange(SentinelKey.min(keyspace), SentinelKey.max(keyspace)); + } + + @Override + public TokenRange newRange(RoutingKey start, RoutingKey end) + { + return new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end); + } + + @Override + public RoutingKey someIntersectingRoutingKey(Ranges ranges) + { + RoutingKey pick = super.someIntersectingRoutingKey(ranges); + if (pick instanceof SentinelKey) + pick = ((SentinelKey) pick).toTokenKey(); + return pick; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TokenRange range, DataOutputPlus out, int version) throws IOException + { + AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.start(), out, version); + AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.end(), out, version); + } + + @Override + public TokenRange deserialize(DataInputPlus in, int version) throws IOException + { + return new TokenRange(AccordRoutingKey.serializer.deserialize(in, version), + AccordRoutingKey.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(TokenRange range, int version) + { + return AccordRoutingKey.serializer.serializedSize((AccordRoutingKey) range.start(), version) + + AccordRoutingKey.serializer.serializedSize((AccordRoutingKey) range.end(), version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java new file mode 100644 index 000000000000..72aee3c92a13 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import accord.api.Agent; +import accord.api.Result; +import accord.local.Command; +import accord.local.Node; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; + +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; + +public class AccordAgent implements Agent +{ + @Override + public void onRecover(Node node, Result success, Throwable fail) + { + // TODO: this + } + + @Override + public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp next) + { + // TODO: this + } + + @Override + public void onUncaughtException(Throwable t) + { + // TODO: this + } + + @Override + public void onHandledException(Throwable throwable) + { + // TODO: this + } + + @Override + public boolean isExpired(TxnId initiated, long now) + { + // TODO: should distinguish between reads and writes + return now - initiated.hlc() > getReadRpcTimeout(MICROSECONDS); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java new file mode 100644 index 000000000000..d19f832ace16 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.util.Objects; + +import accord.primitives.RoutableKey; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; + +public abstract class AccordRoutableKey implements RoutableKey +{ + final String keyspace; // TODO (desired): use an id (TrM) + + protected AccordRoutableKey(String keyspace) + { + this.keyspace = keyspace; + } + + public final String keyspace() { return keyspace; } + public abstract Token token(); + + @Override + public int hashCode() + { + return Objects.hash(keyspace, token().tokenHash()); + } + + @Override + public final int compareTo(RoutableKey that) + { + return compareTo((AccordRoutableKey) that); + } + + public final int compareTo(AccordRoutableKey that) + { + int cmp = this.keyspace().compareTo(that.keyspace()); + if (cmp != 0) + return cmp; + + if (this.getClass() == SentinelKey.class || that.getClass() == SentinelKey.class) + { + int leftInt = this.getClass() == SentinelKey.class ? ((SentinelKey) this).asInt() : 0; + int rightInt = that.getClass() == SentinelKey.class ? ((SentinelKey) that).asInt() : 0; + return Integer.compare(leftInt, rightInt); + } + + cmp = this.token().compareTo(that.token()); + if (cmp != 0) + return cmp; + + if (this.getClass() == TokenKey.class) + return that.getClass() == TokenKey.class ? 0 : 1; + return that.getClass() == TokenKey.class ? -1 : ((PartitionKey)this).tableId.compareTo(((PartitionKey)that).tableId); + } + + @Override + public final boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordRoutableKey that = (AccordRoutableKey) o; + return compareTo(that) == 0; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java new file mode 100644 index 000000000000..7054fb3ba949 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.ShardDistributor; +import accord.primitives.Range; +import accord.primitives.Ranges; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; + +import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; + +public abstract class AccordRoutingKey extends AccordRoutableKey implements RoutingKey +{ + enum RoutingKeyKind + { + TOKEN, SENTINEL + } + + protected AccordRoutingKey(String keyspace) + { + super(keyspace); + } + + public abstract RoutingKeyKind kindOfRoutingKey(); + public abstract long estimatedSizeOnHeap(); + + public static AccordRoutingKey of(Key key) + { + return (AccordRoutingKey) key; + } + + // final in part because we refer to its class directly in AccordRoutableKey.compareTo + public static final class SentinelKey extends AccordRoutingKey + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new SentinelKey(null, true)); + + private final boolean isMin; + + private SentinelKey(String keyspace, boolean isMin) + { + super(keyspace); + this.isMin = isMin; + } + + @Override + public int hashCode() + { + return Objects.hash(keyspace, isMin); + } + + @Override + public RoutingKeyKind kindOfRoutingKey() + { + return RoutingKeyKind.SENTINEL; + } + + @Override + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE; + } + + public static SentinelKey min(String keyspace) + { + return new SentinelKey(keyspace, true); + } + + public static SentinelKey max(String keyspace) + { + return new SentinelKey(keyspace, false); + } + + public TokenKey toTokenKey() + { + IPartitioner partitioner = getPartitioner(); + return new TokenKey(keyspace, isMin ? + partitioner.getMinimumToken().nextValidToken() : + partitioner.getMaximumToken().decreaseSlightly()); + } + + @Override + public Token token() + { + throw new UnsupportedOperationException(); + } + + int asInt() + { + return isMin ? -1 : 1; + } + + @Override + public String toString() + { + return "SentinelKey{" + + "keyspace=" + keyspace + + ", key=" + (isMin ? "min": "max") + + '}'; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(SentinelKey key, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(key.isMin); + out.writeUTF(key.keyspace); + } + + @Override + public SentinelKey deserialize(DataInputPlus in, int version) throws IOException + { + boolean isMin = in.readBoolean(); + String keyspace = in.readUTF(); + return new SentinelKey(keyspace, isMin); + } + + @Override + public long serializedSize(SentinelKey key, int version) + { + return TypeSizes.BOOL_SIZE + TypeSizes.sizeof(key.keyspace); + } + }; + + @Override + public Range asRange() + { + throw new UnsupportedOperationException(); + } + } + + // final in part because we refer to its class directly in AccordRoutableKey.compareToe + public static final class TokenKey extends AccordRoutingKey + { + private static final long EMPTY_SIZE; + + @Override + public Range asRange() + { + AccordRoutingKey before = token.isMinimum() + ? new SentinelKey(keyspace, true) + : new TokenKey(keyspace, token.decreaseSlightly()); + + return new TokenRange(before, this); + } + + static + { + Token key = getPartitioner().decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER).getToken(); + EMPTY_SIZE = ObjectSizes.measureDeep(new TokenKey(null, key)); + } + + final Token token; + public TokenKey(String keyspace, Token token) + { + super(keyspace); + this.token = token; + } + + @Override + public Token token() + { + return token; + } + + @Override + public RoutingKeyKind kindOfRoutingKey() + { + return RoutingKeyKind.TOKEN; + } + + @Override + public String toString() + { + return "TokenKey{" + + "keyspace=" + keyspace() + + ", key=" + token() + + '}'; + } + + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE + token().getHeapSize(); + } + + public static final Serializer serializer = new Serializer(); + public static class Serializer implements IVersionedSerializer + { + private Serializer() {} + + @Override + public void serialize(TokenKey key, DataOutputPlus out, int version) throws IOException + { + out.writeUTF(key.keyspace); + Token.compactSerializer.serialize(key.token, out, version); + } + + @Override + public TokenKey deserialize(DataInputPlus in, int version) throws IOException + { + String keyspace = in.readUTF(); + Token token = Token.compactSerializer.deserialize(in, getPartitioner(), version); + return new TokenKey(keyspace, token); + } + + @Override + public long serializedSize(TokenKey key, int version) + { + return TypeSizes.sizeof(key.keyspace) + Token.compactSerializer.serializedSize(key.token(), version); + } + } + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + final RoutingKeyKind[] kinds = RoutingKeyKind.values(); + @Override + public void serialize(AccordRoutingKey key, DataOutputPlus out, int version) throws IOException + { + out.write(key.kindOfRoutingKey().ordinal()); + switch (key.kindOfRoutingKey()) + { + case TOKEN: + TokenKey.serializer.serialize((TokenKey) key, out, version); + break; + case SENTINEL: + SentinelKey.serializer.serialize((SentinelKey) key, out, version); + break; + default: + throw new IllegalArgumentException(); + } + } + + @Override + public AccordRoutingKey deserialize(DataInputPlus in, int version) throws IOException + { + RoutingKeyKind kind = kinds[in.readByte()]; + switch (kind) + { + case TOKEN: + return TokenKey.serializer.deserialize(in, version); + case SENTINEL: + return SentinelKey.serializer.deserialize(in, version); + default: + throw new IllegalArgumentException(); + } + } + + @Override + public long serializedSize(AccordRoutingKey key, int version) + { + long size = TypeSizes.BYTE_SIZE; // kind ordinal + switch (key.kindOfRoutingKey()) + { + case TOKEN: + size += TokenKey.serializer.serializedSize((TokenKey) key, version); + break; + case SENTINEL: + size += SentinelKey.serializer.serializedSize((SentinelKey) key, version); + break; + default: + throw new IllegalArgumentException(); + } + return size; + } + }; + + public static class KeyspaceSplitter implements ShardDistributor + { + final EvenSplit subSplitter; + public KeyspaceSplitter(EvenSplit subSplitter) + { + this.subSplitter = subSplitter; + } + + @Override + public List split(Ranges ranges) + { + Map> byKeyspace = new TreeMap<>(); + for (Range range : ranges) + { + byKeyspace.computeIfAbsent(((AccordRoutableKey)range.start()).keyspace, ignore -> new ArrayList<>()) + .add(range); + } + + List results = new ArrayList<>(); + for (List keyspaceRanges : byKeyspace.values()) + { + List splits = subSplitter.split(Ranges.ofSortedAndDeoverlapped(keyspaceRanges.toArray(new Range[0]))); + + for (int i = 0; i < splits.size(); i++) + { + if (i == results.size()) results.add(Ranges.EMPTY); + results.set(i, results.get(i).with(splits.get(i))); + } + } + return results; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java new file mode 100644 index 000000000000..df83a92932f0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.util.List; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import accord.api.Scheduler; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.concurrent.Shutdownable; + +public class AccordScheduler implements Scheduler, Shutdownable +{ + private final ScheduledExecutorPlus scheduledExecutor = ExecutorFactory.Global.executorFactory().scheduled("AccordScheduled"); + + private static class ScheduledFutureWrapper implements Scheduled + { + private final ScheduledFuture future; + + public ScheduledFutureWrapper(ScheduledFuture future) + { + this.future = future; + } + + @Override + public void cancel() + { + future.cancel(false); + } + } + + @Override + public Scheduled recurring(Runnable run, long delay, TimeUnit units) + { + ScheduledFuture future = scheduledExecutor.scheduleAtFixedRate(run, delay, delay, units); + return new ScheduledFutureWrapper(future); + } + + @Override + public Scheduled once(Runnable run, long delay, TimeUnit units) + { + ScheduledFuture future = scheduledExecutor.schedule(run, delay, units); + return new ScheduledFutureWrapper(future); + } + + @Override + public void now(Runnable task) + { + // called from the mutation stage configured by the verb + if (scheduledExecutor.isShutdown()) + throw new RejectedExecutionException("Scheduler has shut down."); + scheduledExecutor.submit(task); + } + + @Override + public boolean isTerminated() + { + return scheduledExecutor.isTerminated(); + } + + @Override + public void shutdown() + { + scheduledExecutor.shutdown(); + } + + @Override + public List shutdownNow() + { + return scheduledExecutor.shutdownNow(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException + { + return scheduledExecutor.awaitTermination(timeout, units); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java new file mode 100644 index 000000000000..13e54398029e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import com.google.common.base.Preconditions; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.primitives.Routable; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.db.partitions.Partition; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; + +// final in part because we refer to its class directly in AccordRoutableKey.compareTo +public final class PartitionKey extends AccordRoutableKey implements Key +{ + private static final long EMPTY_SIZE; + + static + { + DecoratedKey key = DatabaseDescriptor.getPartitioner().decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER); + EMPTY_SIZE = ObjectSizes.measureDeep(new PartitionKey(null, null, key)); + } + + final TableId tableId; // TODO (expected): move to PartitionKey + final DecoratedKey key; + + public PartitionKey(String keyspace, TableId tableId, DecoratedKey key) + { + super(keyspace); + this.tableId = tableId; + this.key = key; + } + + public static PartitionKey of(Key key) + { + return (PartitionKey) key; + } + + public static PartitionKey of(Partition partition) + { + return new PartitionKey(partition.metadata().keyspace, partition.metadata().id, partition.partitionKey()); + } + + public static PartitionKey of(SinglePartitionReadCommand command) + { + return new PartitionKey(command.metadata().keyspace, command.metadata().id, command.partitionKey()); + } + + public final TableId tableId() { return tableId; } + + @Override + public Token token() + { + return partitionKey().getToken(); + } + + public DecoratedKey partitionKey() + { + return key; + } + + @Override + public RoutingKey toUnseekable() + { + return new TokenKey(keyspace, token()); + } + + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE + ByteBufferAccessor.instance.size(partitionKey().getKey()); + } + + @Override + public String toString() + { + return "PartitionKey{" + + "tableId=" + tableId() + + ", key=" + partitionKey() + + '}'; + } + + // TODO: callers to this method are not correctly handling ranges + public static PartitionKey toPartitionKey(Routable routable) + { + return (PartitionKey) routable; + } + + public static final Serializer serializer = new Serializer(); + public static class Serializer implements IVersionedSerializer + { + // TODO: add vint to value accessor and use vints + private Serializer() {} + + @Override + public void serialize(PartitionKey key, DataOutputPlus out, int version) throws IOException + { + key.tableId().serialize(out); + ByteBufferUtil.writeWithShortLength(key.partitionKey().getKey(), out); + } + + public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int offset) + { + int position = offset; + position += key.tableId().serialize(dst, accessor, position); + ByteBuffer bytes = key.partitionKey().getKey(); + int numBytes = ByteBufferAccessor.instance.size(bytes); + Preconditions.checkState(numBytes <= Short.MAX_VALUE); + position += accessor.putShort(dst, position, (short) numBytes); + position += accessor.copyByteBufferTo(bytes, 0, dst, position, numBytes); + return position - offset; + + } + + @Override + public PartitionKey deserialize(DataInputPlus in, int version) throws IOException + { + TableId tableId = TableId.deserialize(in); + TableMetadata metadata = Schema.instance.getExistingTableMetadata(tableId); + DecoratedKey key = metadata.partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); + return new PartitionKey(metadata.keyspace, tableId, key); + } + + public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException + { + TableId tableId = TableId.deserialize(src, accessor, offset); + offset += tableId.serializedSize(); + TableMetadata metadata = Schema.instance.getTableMetadata(tableId); + int numBytes = accessor.getShort(src, offset); + offset += TypeSizes.SHORT_SIZE; + ByteBuffer bytes = ByteBuffer.allocate(numBytes); + accessor.copyTo(src, offset, bytes, ByteBufferAccessor.instance, 0, numBytes); + DecoratedKey key = metadata.partitioner.decorateKey(bytes); + return new PartitionKey(metadata.keyspace, tableId, key); + } + + @Override + public long serializedSize(PartitionKey key, int version) + { + return serializedSize(key); + } + + public long serializedSize(PartitionKey key) + { + return key.tableId().serializedSize() + ByteBufferUtil.serializedSizeWithShortLength(key.partitionKey().getKey()); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java b/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java new file mode 100644 index 000000000000..4a307333a433 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; + +import accord.local.PreLoadContext; +import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordPartialCommand; +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.AccordState.WriteOnly; +import org.apache.cassandra.service.accord.api.PartitionKey; + +public class AsyncContext +{ + public static class Group> + { + final Map items = new HashMap<>(); + final Map> writeOnly = new HashMap<>(); + + @VisibleForTesting + public void add(V item) + { + items.put(item.key(), item); + } + + public V get(K key) + { + return items.get(key); + } + + void releaseResources(AccordStateCache.Instance cache) + { + items.values().forEach(cache::release); + items.clear(); + writeOnly.clear(); + } + + public WriteOnly getOrCreateWriteOnly(K key, BiFunction> factory, AccordCommandStore commandStore) + { + Preconditions.checkState(!items.containsKey(key)); + WriteOnly command = writeOnly.get(key); + if (command == null) + { + command = factory.apply(commandStore, key); + writeOnly.put(key, command); + } + return command; + } + } + + public static class CommandGroup extends Group + { + List partials = new ArrayList<>(); + + public void addPartialCommand(AccordPartialCommand partial) + { + partials.add(partial); + } + + @Override + void releaseResources(AccordStateCache.Instance cache) + { + super.releaseResources(cache); + partials.clear(); + } + } + + public final CommandGroup commands = new CommandGroup(); + public final Group commandsForKey = new Group<>(); + + public boolean containsScopedItems(PreLoadContext loadContext) + { + return Iterables.all(loadContext.txnIds(), commands.items::containsKey) && Iterables.all(loadContext.keys(), commandsForKey.items::containsKey); + } + + void verifyLoaded() + { + commands.items.forEach((key, command) -> Preconditions.checkState(command.isLoaded())); + commandsForKey.items.forEach((key, cfk) -> Preconditions.checkState(cfk.isLoaded())); + } + + void releaseResources(AccordCommandStore commandStore) + { + commands.releaseResources(commandStore.commandCache()); + commandsForKey.releaseResources(commandStore.commandsForKeyCache()); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java new file mode 100644 index 000000000000..3bbc9c2828a2 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.primitives.TxnId; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + + +public class AsyncLoader +{ + private static final Logger logger = LoggerFactory.getLogger(AsyncLoader.class); + enum State + { + INITIALIZED, + SETUP, + LOADING, + FINISHED + } + + private State state = State.INITIALIZED; + private final AccordCommandStore commandStore; + + private final Iterable txnIds; + private final Iterable keys; + + protected Future readFuture; + + public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + { + this.commandStore = commandStore; + this.txnIds = txnIds; + this.keys = keys; + } + + private > Future referenceAndDispatch(K key, + AccordStateCache.Instance cache, + Map context, + Function> readFunction, + Object callback) + { + V item; + Future future = cache.getLoadFuture(key); + if (future != null) + { + // if a load future exists for this, it must be present in the cache + item = cache.getOrNull(key); + Preconditions.checkState(item != null); + context.put(key, item); + if (logger.isTraceEnabled()) + logger.trace("Existing load future found for {} while loading for {}. ({})", item.key(), callback, item); + return future; + } + + item = cache.getOrCreate(key); + context.put(key, item); + if (item.isLoaded()) + { + if (logger.isTraceEnabled()) + logger.trace("Cached item found for {} while loading for {}. ({})", item.key(), callback, item); + return null; + } + + future = readFunction.apply(item); + cache.setLoadFuture(item.key(), future); + if (logger.isTraceEnabled()) + logger.trace("Loading new item for {} while loading for {}. ({})", item.key(), callback, item); + return future; + } + + + private > List> referenceAndDispatchReads(Iterable keys, + AccordStateCache.Instance cache, + Map context, + Function> readFunction, + List> futures, + Object callback) + { + for (K key : keys) + { + Future future = referenceAndDispatch(key, cache, context, readFunction, callback); + if (future == null) + continue; + + if (futures == null) + futures = new ArrayList<>(); + + futures.add(future); + } + + return futures; + } + + @VisibleForTesting + Function> loadCommandFunction(Object callback) + { + return command -> Stage.READ.submit(() -> { + try + { + logger.trace("Starting load of {} for {}", command.txnId(), callback); + AccordKeyspace.loadCommand(commandStore, command); + logger.trace("Completed load of {} for {}", command.txnId(), callback); + } + catch (Throwable t) + { + logger.error("Exception loading {} for {}", command.txnId(), callback, t); + throw t; + } + }); + } + + @VisibleForTesting + Function> loadCommandsPerKeyFunction(Object callback) + { + return cfk -> Stage.READ.submit(() -> { + try + { + logger.trace("Starting load of {} for {}", cfk.key(), callback); + AccordKeyspace.loadCommandsForKey(cfk); + logger.trace("Completed load of {} for {}", cfk.key(), callback); + } + catch (Throwable t) + { + logger.error("Exception loading {} for {}", cfk.key(), callback, t); + throw t; + } + }); + } + + private Future referenceAndDispatchReads(AsyncContext context, Object callback) + { + List> futures = null; + + futures = referenceAndDispatchReads(txnIds, + commandStore.commandCache(), + context.commands.items, + loadCommandFunction(callback), + futures, + callback); + + futures = referenceAndDispatchReads(keys, + commandStore.commandsForKeyCache(), + context.commandsForKey.items, + loadCommandsPerKeyFunction(callback), + futures, + callback); + + return futures != null ? FutureCombiner.allOf(futures) : null; + } + + @VisibleForTesting + void state(State state) + { + this.state = state; + } + + public boolean load(AsyncContext context, BiConsumer callback) + { + logger.trace("Running load for {} with state {}: {} {}", callback, state, txnIds, keys); + commandStore.checkInStoreThread(); + switch (state) + { + case INITIALIZED: + state(State.SETUP); + case SETUP: + // notify any pending write only groups we're loading a full instance so the pending changes aren't removed + txnIds.forEach(commandStore.commandCache()::lockWriteOnlyGroupIfExists); + keys.forEach(commandStore.commandsForKeyCache()::lockWriteOnlyGroupIfExists); + readFuture = referenceAndDispatchReads(context, callback); + state(State.LOADING); + case LOADING: + if (readFuture != null) + { + if (readFuture.isSuccess()) + { + logger.trace("Read future succeeded for {}", callback); + context.verifyLoaded(); + readFuture = null; + } + else + { + logger.trace("Adding callback for read future: {}", callback); + readFuture.addCallback(callback, commandStore.executor()); + break; + } + } + // apply any pending write only changes that may not have made it to disk in time to be loaded + context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupLoadFuture); + context.commands.items.values().forEach(commandStore.commandCache()::applyAndRemoveWriteOnlyGroup); + context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupLoadFuture); + context.commandsForKey.items.values().forEach(commandStore.commandsForKeyCache()::applyAndRemoveWriteOnlyGroup); + // apply blindly reported timestamps + context.commandsForKey.items.values().forEach(AccordCommandsForKey::applyBlindWitnessedTimestamps); + state(State.FINISHED); + case FINISHED: + break; + default: + throw new IllegalStateException("Unexpected state: " + state); + } + + logger.trace("Exiting load for {} with state {}: {} {}", callback, state, txnIds, keys); + return state == State.FINISHED; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java new file mode 100644 index 000000000000..e302b4248976 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.MDC; + +import accord.local.CommandStore; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.primitives.Seekables; +import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.concurrent.AsyncPromise; + +public abstract class AsyncOperation extends AsyncPromise implements Runnable, Function, BiConsumer +{ + private static final Logger logger = LoggerFactory.getLogger(AsyncOperation.class); + + private static class LoggingProps + { + private static final String COMMAND_STORE = "command_store"; + private static final String ASYNC_OPERATION = "async_op"; + } + + enum State + { + INITIALIZED, + LOADING, + RUNNING, + SAVING, + AWAITING_SAVE, + COMPLETING, + FINISHED, + FAILED + } + + public interface Context + { + + } + + private State state = State.INITIALIZED; + private final AccordCommandStore commandStore; + private final AsyncLoader loader; + private final AsyncWriter writer; + private final AsyncContext context = new AsyncContext(); + private R result; + private final String loggingId; + + private void setLoggingIds() + { + MDC.put(LoggingProps.COMMAND_STORE, commandStore.loggingId); + MDC.put(LoggingProps.ASYNC_OPERATION, loggingId); + } + + private void clearLoggingIds() + { + MDC.remove(LoggingProps.COMMAND_STORE); + MDC.remove(LoggingProps.ASYNC_OPERATION); + } + + public AsyncOperation(AccordCommandStore commandStore, Iterable commandsToLoad, Iterable keyCommandsToLoad) + { + this.loggingId = "0x" + Integer.toHexString(System.identityHashCode(this)); + this.commandStore = commandStore; + this.loader = createAsyncLoader(commandStore, commandsToLoad, keyCommandsToLoad); + setLoggingIds(); + this.writer = createAsyncWriter(commandStore); + logger.trace("Created {} on {}", this, commandStore); + clearLoggingIds(); + } + + @Override + public String toString() + { + return "AsyncOperation{" + state + "}-" + loggingId; + } + + AsyncWriter createAsyncWriter(AccordCommandStore commandStore) + { + return new AsyncWriter(commandStore); + } + + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + { + return new AsyncLoader(commandStore, txnIds, keys); + } + + @VisibleForTesting + State state() + { + return state; + } + + @VisibleForTesting + protected void setState(State state) + { + this.state = state; + } + + /** + * callback for loader and writer + */ + @Override + public void accept(Object o, Throwable throwable) + { + if (throwable != null) + { + logger.error(String.format("Operation %s failed", this), throwable); + state = State.FAILED; + tryFailure(throwable); + } + else + run(); + } + + protected void runInternal() + { + SafeAccordCommandStore safeStore = commandStore.safeStore(context); + switch (state) + { + case INITIALIZED: + state = State.LOADING; + case LOADING: + if (!loader.load(context, this)) + return; + + state = State.RUNNING; + result = apply(safeStore); + + state = State.SAVING; + case SAVING: + case AWAITING_SAVE: + boolean updatesPersisted = writer.save(context, this); + + if (state != State.AWAITING_SAVE) + { + // with any updates on the way to disk, release resources so operations waiting + // to use these objects don't have issues with fields marked as unsaved + context.releaseResources(commandStore); + state = State.AWAITING_SAVE; + } + + if (!updatesPersisted) + return; + + state = State.COMPLETING; + setSuccess(result); + state = State.FINISHED; + case FINISHED: + break; + default: + throw new IllegalStateException(); + } + } + + + @Override + public void run() + { + setLoggingIds(); + logger.trace("Running {} with state {}", this, state); + try + { + commandStore.checkInStoreThread(); + commandStore.setContext(context); + try + { + runInternal(); + } + catch (Throwable t) + { + logger.error(String.format("Operation %s failed", this), t); + tryFailure(t); + } + finally + { + commandStore.unsetContext(context); + } + } + finally + { + logger.trace("Exiting {}", this); + clearLoggingIds(); + } + } + + private static Iterable toPartitionKeys(Seekables keys) + { + switch (keys.domain()) + { + default: throw new AssertionError(); + case Key: + return (Iterable) keys; + case Range: + // TODO (required): implement + throw new UnsupportedOperationException(); + } + } + + static class ForFunction extends AsyncOperation + { + private final Function function; + + public ForFunction(AccordCommandStore commandStore, Iterable txnIds, Iterable keys, Function function) + { + super(commandStore, txnIds, keys); + this.function = function; + } + + @Override + public R apply(SafeCommandStore commandStore) + { + return function.apply(commandStore); + } + } + + public static AsyncOperation create(CommandStore commandStore, PreLoadContext loadCtx, Function function) + { + return new ForFunction<>((AccordCommandStore) commandStore, loadCtx.txnIds(), AsyncOperation.toPartitionKeys(loadCtx.keys()), function); + } + + static class ForConsumer extends AsyncOperation + { + private final Consumer consumer; + + public ForConsumer(AccordCommandStore commandStore, Iterable txnIds, Iterable keys, Consumer consumer) + { + super(commandStore, txnIds, keys); + this.consumer = consumer; + } + + @Override + public Void apply(SafeCommandStore commandStore) + { + consumer.accept(commandStore); + return null; + } + } + + public static AsyncOperation create(CommandStore commandStore, PreLoadContext loadCtx, Consumer consumer) + { + return new ForConsumer((AccordCommandStore) commandStore, loadCtx.txnIds(), AsyncOperation.toPartitionKeys(loadCtx.keys()), consumer); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java new file mode 100644 index 000000000000..c920a0f7bb29 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.primitives.Seekable; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordPartialCommand; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + +import static accord.primitives.Routable.Domain.Range; + +public class AsyncWriter +{ + private static final Logger logger = LoggerFactory.getLogger(AsyncWriter.class); + + enum State + { + INITIALIZED, + SETUP, + SAVING, + FINISHED + } + + private State state = State.INITIALIZED; + protected Future writeFuture; + private final AccordCommandStore commandStore; + final AccordStateCache.Instance commandCache; + final AccordStateCache.Instance cfkCache; + + public AsyncWriter(AccordCommandStore commandStore) + { + this.commandStore = commandStore; + this.commandCache = commandStore.commandCache(); + this.cfkCache = commandStore.commandsForKeyCache(); + } + + private interface StateMutationFunction> + { + Mutation apply(AccordCommandStore commandStore, V state, long timestamp); + } + + private static > List> dispatchWrites(AsyncContext.Group ctxGroup, + AccordStateCache.Instance cache, + StateMutationFunction mutationFunction, + long timestamp, + AccordCommandStore commandStore, + List> futures, + Object callback) + { + for (V item : ctxGroup.items.values()) + { + if (!item.hasModifications()) + { + if (logger.isTraceEnabled()) + logger.trace("No modifications for {} for {}, {}", item.key(), callback, item); + continue; + } + + if (futures == null) + futures = new ArrayList<>(); + K key = item.key(); + Mutation mutation = mutationFunction.apply(commandStore, item, timestamp); + if (logger.isTraceEnabled()) + logger.trace("Dispatching mutation for {} for {}, {} -> {}", key, callback, item, mutation); + Future future = Stage.MUTATION.submit(() -> { + try + { + if (logger.isTraceEnabled()) + logger.trace("Applying mutation for {} for {}: {}", key, callback, mutation); + mutation.apply(); + if (logger.isTraceEnabled()) + logger.trace("Completed applying mutation for {} for {}: {}", key, callback, mutation); + } + catch (Throwable t) + { + logger.error(String.format("Exception applying mutation for %s for %s: %s", key, callback, mutation), t); + throw t; + } + }); + cache.addSaveFuture(item.key(), future); + futures.add(future); + } + + for (AccordState.WriteOnly item : ctxGroup.writeOnly.values()) + { + Preconditions.checkState(item.hasModifications()); + if (futures == null) futures = new ArrayList<>(); + Mutation mutation = mutationFunction.apply(commandStore, (V) item, timestamp); + Future future = Stage.MUTATION.submit((Runnable) mutation::apply); + future.addListener(() -> cache.purgeWriteOnly(item.key()), commandStore.executor()); + item.future(future); + futures.add(future); + } + + return futures; + } + + private Future maybeDispatchWrites(AsyncContext context, Object callback) throws IOException + { + List> futures = null; + + long timestamp = commandStore.nextSystemTimestampMicros(); + futures = dispatchWrites(context.commands, + commandStore.commandCache(), + AccordKeyspace::getCommandMutation, + timestamp, + commandStore, + futures, + callback); + + futures = dispatchWrites(context.commandsForKey, + commandStore.commandsForKeyCache(), + AccordKeyspace::getCommandsForKeyMutation, + timestamp, + commandStore, + futures, + callback); + + return futures != null ? FutureCombiner.allOf(futures) : null; + } + + private void denormalizeBlockedOn(AccordCommand command, + AsyncContext context, + Function> waitingField, + Function> blockingField) + { + StoredSet.Changes waitingOn = waitingField.apply(command); + waitingOn.forEachDeletion(deletedId -> { + AccordCommand blockedOn = commandForDenormalization(deletedId, context); + blockingField.apply(blockedOn).blindRemove(command.txnId()); + }); + + waitingOn.forEachAddition(addedId -> { + AccordCommand blockedOn = commandForDenormalization(addedId, context); + blockingField.apply(blockedOn).blindAdd(command.txnId()); + }); + } + + private void denormalizeWaitingOnSummaries(AccordCommand command, + AsyncContext context, + Function> waitingField, + Function> blockingField) + { + blockingField.apply(command).getView().forEach(blockingId -> { + AccordCommand blocking = commandForDenormalization(blockingId, context); + waitingField.apply(blocking).accept(command.txnId(), command.executeAt()); + }); + } + + private static > + AccordState getForDenormalization(K key, + AccordCommandStore commandStore, + AsyncContext.Group ctxGroup, + AccordStateCache.Instance cache, + BiFunction> factory) + { + V item = ctxGroup.get(key); + if (item != null) + return item; + + item = cache.getOrNull(key); + if (item != null && !cache.hasLoadFuture(key)) + { + ctxGroup.items.put(key, item); + return item; + } + + return ctxGroup.getOrCreateWriteOnly(key, factory, commandStore); + } + + private AccordCommand commandForDenormalization(TxnId txnId, AsyncContext context) + { + return (AccordCommand) getForDenormalization(txnId, commandStore, context.commands, commandCache, (ignore, id) -> new AccordCommand.WriteOnly(id)); + } + + private AccordCommandsForKey cfkForDenormalization(PartitionKey key, AsyncContext context) + { + return (AccordCommandsForKey) getForDenormalization(key, commandStore, context.commandsForKey, cfkCache, AccordCommandsForKey.WriteOnly::new); + } + + private void denormalize(AccordCommand command, AsyncContext context, Object callback) + { + if (!command.hasModifications()) + return; + + // notify commands we're waiting on that they need to update the summaries in our maps + if (command.waitingOnCommit.hasModifications()) + { + denormalizeBlockedOn(command, context, cmd -> cmd.waitingOnCommit, cmd -> cmd.blockingCommitOn); + } + if (command.waitingOnApply.hasModifications()) + { + denormalizeBlockedOn(command, context, cmd -> new StoredSet.Changes() + { + @Override + public void forEachAddition(Consumer consumer) + { + cmd.waitingOnApply.forEachAddition((ignore, txnId) -> consumer.accept(txnId)); + } + + @Override + public void forEachDeletion(Consumer consumer) + { + cmd.waitingOnApply.forEachDeletion((ignore, txnId) -> consumer.accept(txnId)); + + } + }, cmd -> cmd.blockingApplyOn); + } + + if (command.shouldUpdateDenormalizedWaitingOn()) + { + denormalizeWaitingOnSummaries(command, context, cmd -> (txnId, ignore) -> cmd.waitingOnCommit.blindAdd(txnId), cmd -> cmd.blockingCommitOn); + denormalizeWaitingOnSummaries(command, context, cmd -> (txnId, executeAt) -> cmd.waitingOnApply.blindPut(executeAt, txnId), cmd -> cmd.blockingApplyOn); + } + + // There won't be a txn to denormalize against until the command has been preaccepted + // TODO (now): this maybe insufficient for correctness? on Accept we use the explicitly provided keys to register + // the transaction here. It's possible a sequence of two Accept, with second taking a higher timestamp + // might not reflect the update timestamp in the map? Probably best addressed following Blake's refactor. + if (command.known().isDefinitionKnown() && AccordPartialCommand.serializer.needsUpdate(command)) + { + for (Seekable key : command.partialTxn().keys()) + { + // TODO: implement + if (key.domain() == Range) + throw new UnsupportedOperationException(); + PartitionKey partitionKey = (PartitionKey) key; + AccordCommandsForKey cfk = cfkForDenormalization(partitionKey, context); + cfk.updateSummaries(command); + } + } + + if (logger.isTraceEnabled()) + { + context.commands.items.forEach((txnId, cmd) -> logger.trace("Denormalized command {} for {}: {}", txnId, callback, cmd)); + context.commandsForKey.items.forEach((key, cfk) -> logger.trace("Denormalized cfk {} for {}: {}", key, callback, cfk)); + } + } + + private void denormalize(AsyncContext context, Object callback) + { + // need to clone "values" as denormalize will mutate it + new ArrayList<>(context.commands.items.values()).forEach(command -> denormalize(command, context, callback)); + } + + @VisibleForTesting + void setState(State state) + { + this.state = state; + } + + public boolean save(AsyncContext context, BiConsumer callback) + { + logger.trace("Running save for {} with state {}", callback, state); + commandStore.checkInStoreThread(); + try + { + switch (state) + { + case INITIALIZED: + setState(State.SETUP); + case SETUP: + denormalize(context, callback); + writeFuture = maybeDispatchWrites(context, callback); + + setState(State.SAVING); + case SAVING: + if (writeFuture != null && !writeFuture.isSuccess()) + { + logger.trace("Adding callback for write future: {}", callback); + writeFuture.addCallback(callback, commandStore.executor()); + break; + } + context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupSaveFuture); + context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveFuture); + setState(State.FINISHED); + case FINISHED: + break; + default: + throw new IllegalStateException("Unexpected state: " + state); + } + } + catch (IOException e) + { + throw new RuntimeException(e); + } + + logger.trace("Exiting save for {} with state {}", callback, state); + return state == State.FINISHED; + } + +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java new file mode 100644 index 000000000000..609e7ee080d0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.Accept; +import accord.messages.Accept.AcceptReply; +import accord.primitives.PartialRoute; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static accord.local.Command.AcceptOutcome.RejectedBallot; +import static accord.messages.Accept.SerializerSupport.create; + +public class AcceptSerializers +{ + private AcceptSerializers() {} + + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + { + @Override + public void serializeBody(Accept accept, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.ballot.serialize(accept.ballot, out, version); + CommandSerializers.timestamp.serialize(accept.executeAt, out, version); + KeySerializers.seekables.serialize(accept.keys, out, version); + DepsSerializer.partialDeps.serialize(accept.partialDeps, out, version); + } + + @Override + public Accept deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + { + return create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, + CommandSerializers.ballot.deserialize(in, version), + CommandSerializers.timestamp.deserialize(in, version), + KeySerializers.seekables.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version)); + } + + @Override + public long serializedBodySize(Accept accept, int version) + { + return CommandSerializers.ballot.serializedSize(accept.ballot, version) + + CommandSerializers.timestamp.serializedSize(accept.executeAt, version) + + KeySerializers.seekables.serializedSize(accept.keys, version) + + DepsSerializer.partialDeps.serializedSize(accept.partialDeps, version); + } + }; + + public static final IVersionedSerializer invalidate = new IVersionedSerializer() + { + @Override + public void serialize(Accept.Invalidate invalidate, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.ballot.serialize(invalidate.ballot, out, version); + CommandSerializers.txnId.serialize(invalidate.txnId, out, version); + KeySerializers.routingKey.serialize(invalidate.someKey, out, version); + } + + @Override + public Accept.Invalidate deserialize(DataInputPlus in, int version) throws IOException + { + return new Accept.Invalidate(CommandSerializers.ballot.deserialize(in, version), + CommandSerializers.txnId.deserialize(in, version), + KeySerializers.routingKey.deserialize(in, version)); + } + + @Override + public long serializedSize(Accept.Invalidate invalidate, int version) + { + return CommandSerializers.ballot.serializedSize(invalidate.ballot, version) + + CommandSerializers.txnId.serializedSize(invalidate.txnId, version) + + KeySerializers.routingKey.serializedSize(invalidate.someKey, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(AcceptReply reply, DataOutputPlus out, int version) throws IOException + { + switch (reply.outcome()) + { + default: throw new AssertionError(); + case Success: + if (reply.deps != null) + { + out.writeByte(1); + DepsSerializer.partialDeps.serialize(reply.deps, out, version); + } + else + { + out.writeByte(2); + } + break; + case Redundant: + out.writeByte(3); + break; + case RejectedBallot: + out.writeByte(4); + CommandSerializers.ballot.serialize(reply.supersededBy, out, version); + } + } + + @Override + public AcceptReply deserialize(DataInputPlus in, int version) throws IOException + { + int type = in.readByte(); + switch (type) + { + default: throw new IllegalStateException("Unexpected AcceptNack type: " + type); + case 1: + return new AcceptReply(DepsSerializer.partialDeps.deserialize(in, version)); + case 2: + return AcceptReply.ACCEPT_INVALIDATE; + case 3: + return AcceptReply.REDUNDANT; + case 4: + return new AcceptReply(CommandSerializers.ballot.deserialize(in, version)); + } + } + + @Override + public long serializedSize(AcceptReply reply, int version) + { + long size = TypeSizes.BYTE_SIZE; + switch (reply.outcome()) + { + default: throw new AssertionError(); + case Success: + if (reply.deps != null) + size += DepsSerializer.partialDeps.serializedSize(reply.deps, version); + break; + case Redundant: + break; + case RejectedBallot: + size += CommandSerializers.ballot.serializedSize(reply.supersededBy, version); + } + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java new file mode 100644 index 000000000000..801c7d332b3d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.Apply; +import accord.primitives.PartialRoute; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.TxnData; + +public class ApplySerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer() + { + @Override + public void serializeBody(Apply apply, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt(apply.untilEpoch); + CommandSerializers.timestamp.serialize(apply.executeAt, out, version); + DepsSerializer.partialDeps.serialize(apply.deps, out, version); + CommandSerializers.writes.serialize(apply.writes, out, version); + TxnData.serializer.serialize((TxnData) apply.result, out, version); + } + + @Override + public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + { + return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, in.readUnsignedVInt(), + CommandSerializers.timestamp.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version), + CommandSerializers.writes.deserialize(in, version), + TxnData.serializer.deserialize(in, version)); + } + + @Override + public long serializedBodySize(Apply apply, int version) + { + return TypeSizes.sizeofUnsignedVInt(apply.untilEpoch) + + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + + CommandSerializers.writes.serializedSize(apply.writes, version) + + TxnData.serializer.serializedSize((TxnData) apply.result, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + private final Apply.ApplyReply[] replies = Apply.ApplyReply.values(); + + @Override + public void serialize(Apply.ApplyReply t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t.ordinal()); + } + + @Override + public Apply.ApplyReply deserialize(DataInputPlus in, int version) throws IOException + { + return replies[in.readByte()]; + } + + @Override + public long serializedSize(Apply.ApplyReply t, int version) + { + return 1; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java new file mode 100644 index 000000000000..5568b9b01ed0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.RoutingKey; +import accord.local.Status; +import accord.messages.BeginInvalidation; +import accord.messages.BeginInvalidation.InvalidateReply; +import accord.primitives.Ballot; +import accord.primitives.Route; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class BeginInvalidationSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(BeginInvalidation begin, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(begin.txnId, out, version); + KeySerializers.unseekables.serialize(begin.someUnseekables, out, version); + CommandSerializers.ballot.serialize(begin.ballot, out, version); + } + + @Override + public BeginInvalidation deserialize(DataInputPlus in, int version) throws IOException + { + return new BeginInvalidation(CommandSerializers.txnId.deserialize(in, version), + KeySerializers.unseekables.deserialize(in, version), + CommandSerializers.ballot.deserialize(in, version)); + } + + @Override + public long serializedSize(BeginInvalidation begin, int version) + { + return CommandSerializers.txnId.serializedSize(begin.txnId, version) + + KeySerializers.unseekables.serializedSize(begin.someUnseekables, version) + + CommandSerializers.ballot.serializedSize(begin.ballot, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(InvalidateReply reply, DataOutputPlus out, int version) throws IOException + { + serializeNullable(reply.supersededBy, out, version, CommandSerializers.ballot); + CommandSerializers.ballot.serialize(reply.accepted, out, version); + CommandSerializers.status.serialize(reply.status, out, version); + out.writeBoolean(reply.acceptedFastPath); + serializeNullable(reply.route, out, version, KeySerializers.route); + serializeNullable(reply.homeKey, out, version, KeySerializers.routingKey); + } + + @Override + public InvalidateReply deserialize(DataInputPlus in, int version) throws IOException + { + Ballot supersededBy = deserializeNullable(in, version, CommandSerializers.ballot); + Ballot accepted = CommandSerializers.ballot.deserialize(in, version); + Status status = CommandSerializers.status.deserialize(in, version); + boolean acceptedFastPath = in.readBoolean(); + Route route = deserializeNullable(in, version, KeySerializers.route); + RoutingKey homeKey = deserializeNullable(in, version, KeySerializers.routingKey); + return new InvalidateReply(supersededBy, accepted, status, acceptedFastPath, route, homeKey); + } + + @Override + public long serializedSize(InvalidateReply reply, int version) + { + return serializedNullableSize(reply.supersededBy, version, CommandSerializers.ballot) + + CommandSerializers.ballot.serializedSize(reply.accepted, version) + + CommandSerializers.status.serializedSize(reply.status, version) + + TypeSizes.BOOL_SIZE + + serializedNullableSize(reply.route, version, KeySerializers.route) + + serializedNullableSize(reply.homeKey, version, KeySerializers.routingKey); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java new file mode 100644 index 000000000000..803a25b9c27d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.Result; +import accord.api.RoutingKey; +import accord.local.SaveStatus; +import accord.local.Status.Durability; +import accord.messages.CheckStatus; +import accord.messages.CheckStatus.CheckStatusNack; +import accord.messages.CheckStatus.CheckStatusOk; +import accord.messages.CheckStatus.CheckStatusOkFull; +import accord.messages.CheckStatus.CheckStatusReply; +import accord.primitives.Ballot; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Route; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.primitives.Unseekables; +import accord.primitives.Writes; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.TxnData; + +import static accord.messages.CheckStatus.SerializationSupport.createOk; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class CheckStatusSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + final CheckStatus.IncludeInfo[] infos = CheckStatus.IncludeInfo.values(); + + @Override + public void serialize(CheckStatus check, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(check.txnId, out, version); + KeySerializers.unseekables.serialize(check.query, out, version); + out.writeUnsignedVInt(check.startEpoch); + out.writeUnsignedVInt(check.endEpoch - check.startEpoch); + out.writeByte(check.includeInfo.ordinal()); + } + + @Override + public CheckStatus deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Unseekables query = KeySerializers.unseekables.deserialize(in, version); + long startEpoch = in.readUnsignedVInt(); + long endEpoch = in.readUnsignedVInt() + startEpoch; + CheckStatus.IncludeInfo info = infos[in.readByte()]; + return new CheckStatus(txnId, query, startEpoch, endEpoch, info); + } + + @Override + public long serializedSize(CheckStatus check, int version) + { + return CommandSerializers.txnId.serializedSize(check.txnId, version) + + KeySerializers.unseekables.serializedSize(check.query, version) + + TypeSizes.sizeofUnsignedVInt(check.startEpoch) + + TypeSizes.sizeofUnsignedVInt(check.endEpoch - check.startEpoch) + + TypeSizes.BYTE_SIZE; + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + private static final byte OK = 0x00; + private static final byte FULL = 0x01; + private static final byte NACK = 0x02; + + @Override + public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) throws IOException + { + if (!reply.isOk()) + { + out.write(NACK); + return; + } + + CheckStatusOk ok = (CheckStatusOk) reply; + out.write(reply instanceof CheckStatusOkFull ? FULL : OK); + CommandSerializers.saveStatus.serialize(ok.saveStatus, out, version); + CommandSerializers.ballot.serialize(ok.promised, out, version); + CommandSerializers.ballot.serialize(ok.accepted, out, version); + serializeNullable(ok.executeAt, out, version, CommandSerializers.timestamp); + out.writeBoolean(ok.isCoordinating); + CommandSerializers.durability.serialize(ok.durability, out, version); + serializeNullable(ok.route, out, version, KeySerializers.route); + serializeNullable(ok.homeKey, out, version, KeySerializers.routingKey); + + if (!(reply instanceof CheckStatusOkFull)) + return; + + CheckStatusOkFull okFull = (CheckStatusOkFull) ok; + serializeNullable(okFull.partialTxn, out, version, CommandSerializers.partialTxn); + serializeNullable(okFull.committedDeps, out, version, DepsSerializer.partialDeps); + serializeNullable(okFull.writes, out, version, CommandSerializers.writes); + serializeNullable((TxnData) okFull.result, out, version, TxnData.serializer); + } + + @Override + public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOException + { + byte kind = in.readByte(); + switch (kind) + { + default: throw new IOException("Unhandled CheckStatusReply kind: " + Integer.toHexString(Byte.toUnsignedInt(kind))); + case NACK: + return CheckStatusNack.nack(); + case OK: + case FULL: + SaveStatus status = CommandSerializers.saveStatus.deserialize(in, version); + Ballot promised = CommandSerializers.ballot.deserialize(in, version); + Ballot accepted = CommandSerializers.ballot.deserialize(in, version); + Timestamp executeAt = deserializeNullable(in, version, CommandSerializers.timestamp); + boolean isCoordinating = in.readBoolean(); + Durability durability = CommandSerializers.durability.deserialize(in, version); + Route route = deserializeNullable(in, version, KeySerializers.route); + RoutingKey homeKey = deserializeNullable(in, version, KeySerializers.routingKey); + + if (kind == OK) + return createOk(status, promised, accepted, executeAt, isCoordinating, durability, route, homeKey); + + PartialTxn partialTxn = deserializeNullable(in, version, CommandSerializers.partialTxn); + PartialDeps committedDeps = deserializeNullable(in, version, DepsSerializer.partialDeps); + Writes writes = deserializeNullable(in, version, CommandSerializers.writes); + Result result = deserializeNullable(in, version, TxnData.serializer); + return createOk(status, promised, accepted, executeAt, isCoordinating, durability, route, homeKey, + partialTxn, committedDeps, writes, result); + } + } + + @Override + public long serializedSize(CheckStatusReply reply, int version) + { + long size = TypeSizes.BYTE_SIZE; + if (!reply.isOk()) + return size; + + CheckStatusOk ok = (CheckStatusOk) reply; + size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); + size += CommandSerializers.ballot.serializedSize(ok.promised, version); + size += CommandSerializers.ballot.serializedSize(ok.accepted, version); + size += serializedNullableSize(ok.executeAt, version, CommandSerializers.timestamp); + size += TypeSizes.BOOL_SIZE; + size += CommandSerializers.durability.serializedSize(ok.durability, version); + size += serializedNullableSize(ok.homeKey, version, KeySerializers.routingKey); + size += serializedNullableSize(ok.route, version, KeySerializers.route); + + if (!(reply instanceof CheckStatusOkFull)) + return size; + + CheckStatusOkFull okFull = (CheckStatusOkFull) ok; + size += serializedNullableSize(okFull.partialTxn, version, CommandSerializers.partialTxn); + size += serializedNullableSize(okFull.committedDeps, version, DepsSerializer.partialDeps); + size += serializedNullableSize(okFull.writes, version, CommandSerializers.writes); + size += serializedNullableSize((TxnData) okFull.result, version, TxnData.serializer); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java new file mode 100644 index 000000000000..0441c640c818 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import com.google.common.base.Preconditions; + +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.local.Status.Durability; +import accord.primitives.Ballot; +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; + +public class CommandSerializers +{ + private CommandSerializers() {} + + public static final TimestampSerializer txnId = new TimestampSerializer<>(TxnId::fromBits); + public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); + public static final TimestampSerializer ballot = new TimestampSerializer<>(Ballot::fromBits); + public static final EnumSerializer kind = new EnumSerializer<>(Txn.Kind.class); + + public static class TimestampSerializer implements IVersionedSerializer + { + interface Factory + { + T create(long msb, long lsb, Node.Id node); + } + + private final TimestampSerializer.Factory factory; + + private TimestampSerializer(TimestampSerializer.Factory factory) + { + this.factory = factory; + } + + @Override + public void serialize(T ts, DataOutputPlus out, int version) throws IOException + { + out.writeLong(ts.msb); + out.writeLong(ts.lsb); + TopologySerializers.nodeId.serialize(ts.node, out, version); + } + + public int serialize(T ts, V dst, ValueAccessor accessor, int offset) + { + int position = offset; + position += accessor.putLong(dst, position, ts.msb); + position += accessor.putLong(dst, position, ts.lsb); + position += TopologySerializers.nodeId.serialize(ts.node, dst, accessor, position); + int size = position - offset; + Preconditions.checkState(size == serializedSize()); + return size; + } + + @Override + public T deserialize(DataInputPlus in, int version) throws IOException + { + return factory.create(in.readLong(), + in.readLong(), + TopologySerializers.nodeId.deserialize(in, version)); + } + + public T deserialize(V src, ValueAccessor accessor, int offset) + { + long msb = accessor.getLong(src, offset); + offset += TypeSizes.LONG_SIZE; + long lsb = accessor.getLong(src, offset); + offset += TypeSizes.LONG_SIZE; + Node.Id node = TopologySerializers.nodeId.deserialize(src, accessor, offset); + return factory.create(msb, lsb, node); + } + + @Override + public long serializedSize(T ts, int version) + { + return serializedSize(); + } + + public int serializedSize() + { + return TypeSizes.LONG_SIZE + // ts.msb + TypeSizes.LONG_SIZE + // ts.lsb + TopologySerializers.nodeId.serializedSize(); // ts.node + } + } + + public static final IVersionedSerializer partialTxn = new IVersionedSerializer() + { + @Override + public void serialize(PartialTxn txn, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.kind.serialize(txn.kind(), out, version); + KeySerializers.ranges.serialize(txn.covering(), out, version); + KeySerializers.seekables.serialize(txn.keys(), out, version); + TxnRead.serializer.serialize((TxnRead) txn.read(), out, version); + TxnQuery.serializer.serialize((TxnQuery) txn.query(), out, version); + out.writeBoolean(txn.update() != null); + if (txn.update() != null) + TxnUpdate.serializer.serialize((TxnUpdate) txn.update(), out, version); + } + + @Override + public PartialTxn deserialize(DataInputPlus in, int version) throws IOException + { + Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); + Ranges covering = KeySerializers.ranges.deserialize(in, version); + Seekables keys = KeySerializers.seekables.deserialize(in, version); + TxnRead read = TxnRead.serializer.deserialize(in, version); + TxnQuery query = TxnQuery.serializer.deserialize(in, version); + TxnUpdate update = in.readBoolean() ? TxnUpdate.serializer.deserialize(in, version) : null; + return new PartialTxn.InMemory(covering, kind, keys, read, query, update); + } + + @Override + public long serializedSize(PartialTxn txn, int version) + { + long size = CommandSerializers.kind.serializedSize(txn.kind(), version); + size += KeySerializers.ranges.serializedSize(txn.covering(), version); + size += KeySerializers.seekables.serializedSize(txn.keys(), version); + size += TxnRead.serializer.serializedSize((TxnRead) txn.read(), version); + size += TxnQuery.serializer.serializedSize((TxnQuery) txn.query(), version); + size += TypeSizes.sizeof(txn.update() != null); + if (txn.update() != null) + size += TxnUpdate.serializer.serializedSize((TxnUpdate) txn.update(), version); + return size; + } + }; + + public static final IVersionedSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); + public static final IVersionedSerializer status = new EnumSerializer<>(Status.class); + public static final IVersionedSerializer durability = new EnumSerializer<>(Durability.class); + + public static final IVersionedSerializer writes = new IVersionedSerializer() + { + @Override + public void serialize(Writes writes, DataOutputPlus out, int version) throws IOException + { + timestamp.serialize(writes.executeAt, out, version); + KeySerializers.seekables.serialize(writes.keys, out, version); + boolean hasWrites = writes.write != null; + out.writeBoolean(hasWrites); + if (hasWrites) + TxnWrite.serializer.serialize((TxnWrite) writes.write, out, version); + } + + @Override + public Writes deserialize(DataInputPlus in, int version) throws IOException + { + return new Writes(timestamp.deserialize(in, version), + KeySerializers.seekables.deserialize(in, version), + in.readBoolean() ? TxnWrite.serializer.deserialize(in, version) : null); + } + + @Override + public long serializedSize(Writes writes, int version) + { + long size = timestamp.serializedSize(writes.executeAt, version); + size += KeySerializers.seekables.serializedSize(writes.keys, version); + boolean hasWrites = writes.write != null; + size += TypeSizes.sizeof(hasWrites); + if (hasWrites) + size += TxnWrite.serializer.serializedSize((TxnWrite) writes.write, version); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java new file mode 100644 index 000000000000..6b349e7ee91e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.Commit; +import accord.primitives.PartialRoute; +import accord.primitives.TxnId; +import accord.primitives.Unseekables; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class CommitSerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer() + { + @Override + public void serializeBody(Commit msg, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.timestamp.serialize(msg.executeAt, out, version); + serializeNullable(msg.partialTxn, out, version, CommandSerializers.partialTxn); + DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); + serializeNullable(msg.route, out, version, KeySerializers.fullRoute); + serializeNullable(msg.read, out, version, ReadDataSerializers.request); + } + + @Override + public Commit deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + { + return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, + CommandSerializers.timestamp.deserialize(in, version), + deserializeNullable(in, version, CommandSerializers.partialTxn), + DepsSerializer.partialDeps.deserialize(in, version), + deserializeNullable(in, version, KeySerializers.fullRoute), + deserializeNullable(in, version, ReadDataSerializers.request) + ); + } + + @Override + public long serializedBodySize(Commit msg, int version) + { + return CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + + serializedNullableSize(msg.partialTxn, version, CommandSerializers.partialTxn) + + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) + + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) + + serializedNullableSize(msg.read, version, ReadDataSerializers.request); + } + }; + + public static final IVersionedSerializer invalidate = new IVersionedSerializer() + { + @Override + public void serialize(Commit.Invalidate invalidate, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(invalidate.txnId, out, version); + KeySerializers.unseekables.serialize(invalidate.scope, out, version); + out.writeUnsignedVInt(invalidate.waitForEpoch); + out.writeUnsignedVInt(invalidate.invalidateUntilEpoch - invalidate.waitForEpoch); + } + + @Override + public Commit.Invalidate deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Unseekables scope = KeySerializers.unseekables.deserialize(in, version); + long waitForEpoch = in.readUnsignedVInt(); + long invalidateUntilEpoch = in.readUnsignedVInt() + waitForEpoch; + return Commit.Invalidate.SerializerSupport.create(txnId, scope, waitForEpoch, invalidateUntilEpoch); + } + + @Override + public long serializedSize(Commit.Invalidate invalidate, int version) + { + return CommandSerializers.txnId.serializedSize(invalidate.txnId, version) + + KeySerializers.unseekables.serializedSize(invalidate.scope, version) + + TypeSizes.sizeofUnsignedVInt(invalidate.waitForEpoch) + + TypeSizes.sizeofUnsignedVInt(invalidate.invalidateUntilEpoch - invalidate.waitForEpoch); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java new file mode 100644 index 000000000000..5e4e806b251e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import com.google.common.primitives.Ints; + +import accord.primitives.Deps; +import accord.primitives.KeyDeps; +import accord.primitives.Keys; +import accord.primitives.PartialDeps; +import accord.primitives.Range; +import accord.primitives.RangeDeps; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.TokenRange; + +import static accord.primitives.KeyDeps.SerializerSupport.keysToTxnIds; +import static accord.primitives.KeyDeps.SerializerSupport.keysToTxnIdsCount; +import static accord.primitives.RangeDeps.SerializerSupport.rangesToTxnIds; +import static accord.primitives.RangeDeps.SerializerSupport.rangesToTxnIdsCount; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; + +public abstract class DepsSerializer implements IVersionedSerializer +{ + public static final DepsSerializer deps = new DepsSerializer() + { + @Override + Deps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) + { + return new Deps(keyDeps, rangeDeps); + } + }; + + public static final DepsSerializer partialDeps = new DepsSerializer() + { + @Override + PartialDeps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException + { + Ranges covering = KeySerializers.ranges.deserialize(in, version); + return new PartialDeps(covering, keyDeps, rangeDeps); + } + + @Override + public void serialize(PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException + { + super.serialize(partialDeps, out, version); + KeySerializers.ranges.serialize(partialDeps.covering, out, version); + } + + @Override + public long serializedSize(PartialDeps partialDeps, int version) + { + return super.serializedSize(partialDeps, version) + + KeySerializers.ranges.serializedSize(partialDeps.covering, version); + } + }; + + @Override + public void serialize(D deps, DataOutputPlus out, int version) throws IOException + { + KeyDeps keyDeps = deps.keyDeps; + { + KeySerializers.keys.serialize(keyDeps.keys(), out, version); + + int txnIdCount = keyDeps.txnIdCount(); + out.writeUnsignedVInt32(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + CommandSerializers.txnId.serialize(keyDeps.txnId(i), out, version); + + int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); + out.writeUnsignedVInt32(keysToTxnIdsCount); + for (int i = 0; i < keysToTxnIdsCount; i++) + out.writeUnsignedVInt32(keysToTxnIds(keyDeps, i)); + } + + RangeDeps rangeDeps = deps.rangeDeps; + { + int rangeCount = rangeDeps.rangeCount(); + out.writeUnsignedVInt32(rangeCount); + for (int i = 0; i < rangeCount; i++) + TokenRange.serializer.serialize((TokenRange) rangeDeps.range(i), out, version); + + int txnIdCount = rangeDeps.txnIdCount(); + out.writeUnsignedVInt32(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + CommandSerializers.txnId.serialize(rangeDeps.txnId(i), out, version); + + int rangesToTxnIdsCount = rangesToTxnIdsCount(rangeDeps); + out.writeUnsignedVInt32(rangesToTxnIdsCount); + for (int i = 0; i < rangesToTxnIdsCount; i++) + out.writeUnsignedVInt32(rangesToTxnIds(rangeDeps, i)); + } + } + + @Override + public D deserialize(DataInputPlus in, int version) throws IOException + { + KeyDeps keyDeps; + { + Keys keys = KeySerializers.keys.deserialize(in, version); + + int txnIdCount = in.readUnsignedVInt32(); + TxnId[] txnIds = new TxnId[txnIdCount]; + for (int i = 0; i < txnIdCount; i++) + txnIds[i] = CommandSerializers.txnId.deserialize(in, version); + + int keysToTxnIdsCount = in.readUnsignedVInt32(); + int[] keysToTxnIds = new int[keysToTxnIdsCount]; + for (int i = 0; i < keysToTxnIdsCount; i++) + keysToTxnIds[i] = in.readUnsignedVInt32(); + + keyDeps = KeyDeps.SerializerSupport.create(keys, txnIds, keysToTxnIds); + } + + RangeDeps rangeDeps; + { + int rangeCount = Ints.checkedCast(in.readUnsignedVInt32()); + Range[] ranges = new Range[rangeCount]; + for (int i = 0; i < rangeCount; i++) + ranges[i] = TokenRange.serializer.deserialize(in, version); + + int txnIdCount = in.readUnsignedVInt32(); + TxnId[] txnIds = new TxnId[txnIdCount]; + for (int i = 0; i < txnIdCount; i++) + txnIds[i] = CommandSerializers.txnId.deserialize(in, version); + + int rangesToTxnIdsCount = in.readUnsignedVInt32(); + int[] rangesToTxnIds = new int[rangesToTxnIdsCount]; + for (int i = 0; i < rangesToTxnIdsCount; i++) + rangesToTxnIds[i] = in.readUnsignedVInt32(); + + rangeDeps = RangeDeps.SerializerSupport.create(ranges, txnIds, rangesToTxnIds); + } + + return deserialize(keyDeps, rangeDeps, in, version); + } + + abstract D deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException; + + @Override + public long serializedSize(D deps, int version) + { + long size = 0L; + + KeyDeps keyDeps = deps.keyDeps; + { + size += KeySerializers.keys.serializedSize(keyDeps.keys(), version); + + int txnIdCount = keyDeps.txnIdCount(); + size += sizeofUnsignedVInt(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + size += CommandSerializers.txnId.serializedSize(keyDeps.txnId(i), version); + + int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); + size += sizeofUnsignedVInt(keysToTxnIdsCount); + for (int i = 0; i < keysToTxnIdsCount; i++) + size += sizeofUnsignedVInt(keysToTxnIds(keyDeps, i)); + } + + RangeDeps rangeDeps = deps.rangeDeps; + { + int rangeCount = rangeDeps.rangeCount(); + size += sizeofUnsignedVInt(rangeCount); + for (int i = 0 ; i < rangeCount ; ++i) + size += TokenRange.serializer.serializedSize((TokenRange) rangeDeps.range(i), version); + + int txnIdCount = rangeDeps.txnIdCount(); + size += sizeofUnsignedVInt(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + size += CommandSerializers.txnId.serializedSize(rangeDeps.txnId(i), version); + + int rangesToTxnIdsCount = rangesToTxnIdsCount(rangeDeps); + size += sizeofUnsignedVInt(rangesToTxnIdsCount); + for (int i = 0; i < rangesToTxnIdsCount; i++) + size += sizeofUnsignedVInt(rangesToTxnIds(rangeDeps, i)); + } + + return size; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java new file mode 100644 index 000000000000..1bad94da824b --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.SimpleReply; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class EnumSerializer> implements IVersionedSerializer +{ + public static final EnumSerializer simpleReply = new EnumSerializer<>(SimpleReply.class); + + // TODO: should use something other than ordinal for ser/deser + final E[] values; + + public EnumSerializer(Class clazz) + { + this.values = clazz.getEnumConstants(); + } + + @Override + public void serialize(E t, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(t.ordinal()); + } + + @Override + public E deserialize(DataInputPlus in, int version) throws IOException + { + return values[in.readUnsignedVInt32()]; + } + + @Override + public long serializedSize(E t, int version) + { + return TypeSizes.sizeofUnsignedVInt(t.ordinal()); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java new file mode 100644 index 000000000000..1de5b96a5b21 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.GetDeps; +import accord.messages.GetDeps.GetDepsOk; +import accord.primitives.PartialRoute; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class GetDepsSerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + { + @Override + public void serializeBody(GetDeps msg, DataOutputPlus out, int version) throws IOException + { + KeySerializers.seekables.serialize(msg.keys, out, version); + CommandSerializers.timestamp.serialize(msg.executeAt, out, version); + CommandSerializers.kind.serialize(msg.kind, out, version); + } + + @Override + public GetDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + { + Seekables keys = KeySerializers.seekables.deserialize(in, version); + Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); + Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); + return GetDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, keys, executeAt, kind); + } + + @Override + public long serializedBodySize(GetDeps msg, int version) + { + return KeySerializers.seekables.serializedSize(msg.keys, version) + + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + + CommandSerializers.kind.serializedSize(msg.kind, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(GetDepsOk reply, DataOutputPlus out, int version) throws IOException + { + DepsSerializer.partialDeps.serialize(reply.deps, out, version); + } + + @Override + public GetDepsOk deserialize(DataInputPlus in, int version) throws IOException + { + return new GetDepsOk(DepsSerializer.partialDeps.deserialize(in, version)); + } + + @Override + public long serializedSize(GetDepsOk reply, int version) + { + return DepsSerializer.partialDeps.serializedSize(reply.deps, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java new file mode 100644 index 000000000000..66c649eb9216 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.local.Status; +import accord.messages.InformDurable; +import accord.primitives.PartialRoute; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class InformDurableSerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer() + { + @Override + public void serializeBody(InformDurable msg, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.timestamp.serialize(msg.executeAt, out, version); + CommandSerializers.durability.serialize(msg.durability, out, version); + } + + @Override + public InformDurable deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + { + Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); + Status.Durability durability = CommandSerializers.durability.deserialize(in, version); + return InformDurable.SerializationSupport.create(txnId, scope, waitForEpoch, executeAt, durability); + } + + @Override + public long serializedBodySize(InformDurable msg, int version) + { + return CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + + CommandSerializers.durability.serializedSize(msg.durability, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java new file mode 100644 index 000000000000..38e88ea15768 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.InformHomeDurable; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; + +public class InformHomeDurableSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(inform.txnId, out, version); + KeySerializers.routingKey.serialize(inform.homeKey, out, version); + CommandSerializers.timestamp.serialize(inform.executeAt, out, version); + CommandSerializers.durability.serialize(inform.durability, out, version); + serializeCollection(inform.persistedOn, out, version, TopologySerializers.nodeId); + + } + + @Override + public InformHomeDurable deserialize(DataInputPlus in, int version) throws IOException + { + return new InformHomeDurable(CommandSerializers.txnId.deserialize(in, version), + KeySerializers.routingKey.deserialize(in, version), + CommandSerializers.timestamp.deserialize(in, version), + CommandSerializers.durability.deserialize(in, version), + deserializeSet(in, version, TopologySerializers.nodeId)); + } + + @Override + public long serializedSize(InformHomeDurable inform, int version) + { + return CommandSerializers.txnId.serializedSize(inform.txnId, version) + + KeySerializers.routingKey.serializedSize(inform.homeKey, version) + + CommandSerializers.timestamp.serializedSize(inform.executeAt, version) + + CommandSerializers.durability.serializedSize(inform.durability, version) + + serializedCollectionSize(inform.persistedOn, version, TopologySerializers.nodeId); + } + + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java new file mode 100644 index 000000000000..e773a405e280 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.InformOfTxnId; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class InformOfTxnIdSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(InformOfTxnId inform, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(inform.txnId, out, version); + KeySerializers.routingKey.serialize(inform.homeKey, out, version); + } + + @Override + public InformOfTxnId deserialize(DataInputPlus in, int version) throws IOException + { + return new InformOfTxnId(CommandSerializers.txnId.deserialize(in, version), + KeySerializers.routingKey.deserialize(in, version)); + } + + @Override + public long serializedSize(InformOfTxnId inform, int version) + { + return CommandSerializers.txnId.serializedSize(inform.txnId, version) + + KeySerializers.routingKey.serializedSize(inform.homeKey, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java new file mode 100644 index 000000000000..078051bfe121 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.function.IntFunction; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.primitives.AbstractKeys; +import accord.primitives.AbstractRanges; +import accord.primitives.FullKeyRoute; +import accord.primitives.FullRangeRoute; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.PartialKeyRoute; +import accord.primitives.PartialRangeRoute; +import accord.primitives.PartialRoute; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.RoutableKey; +import accord.primitives.Route; +import accord.primitives.RoutingKeys; +import accord.primitives.Seekables; +import accord.primitives.Unseekables; +import accord.primitives.Unseekables.UnseekablesKind; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; + +public class KeySerializers +{ + private KeySerializers() {} + + public static final IVersionedSerializer key = (IVersionedSerializer) (IVersionedSerializer) PartitionKey.serializer; + public static final IVersionedSerializer routingKey = (IVersionedSerializer) (IVersionedSerializer) AccordRoutingKey.serializer; + + public static final IVersionedSerializer routingKeys = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + { + @Override RoutingKeys deserialize(DataInputPlus in, int version, RoutingKey[] keys) + { + return RoutingKeys.SerializationSupport.create(keys); + } + }; + + public static final IVersionedSerializer keys = new AbstractKeysSerializer(key, Key[]::new) + { + @Override Keys deserialize(DataInputPlus in, int version, Key[] keys) + { + return Keys.SerializationSupport.create(keys); + } + }; + + public static final IVersionedSerializer ranges = new AbstractRangesSerializer() + { + @Override + public Ranges deserialize(DataInputPlus in, int version, Range[] ranges) + { + return Ranges.ofSortedAndDeoverlapped(ranges); + } + }; + + public static final IVersionedSerializer partialKeyRoute = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + { + @Override PartialKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException + { + Ranges covering = ranges.deserialize(in, version); + RoutingKey homeKey = routingKey.deserialize(in, version); + return PartialKeyRoute.SerializationSupport.create(covering, homeKey, keys); + } + + @Override + public void serialize(PartialKeyRoute keys, DataOutputPlus out, int version) throws IOException + { + super.serialize(keys, out, version); + ranges.serialize(keys.covering, out, version); + routingKey.serialize(keys.homeKey, out, version); + } + + @Override + public long serializedSize(PartialKeyRoute keys, int version) + { + return super.serializedSize(keys, version) + + ranges.serializedSize(keys.covering, version) + + routingKey.serializedSize(keys.homeKey, version); + } + }; + + public static final IVersionedSerializer fullKeyRoute = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + { + @Override FullKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException + { + RoutingKey homeKey = routingKey.deserialize(in, version); + return FullKeyRoute.SerializationSupport.create(homeKey, keys); + } + + @Override + public void serialize(FullKeyRoute keys, DataOutputPlus out, int version) throws IOException + { + super.serialize(keys, out, version); + routingKey.serialize(keys.homeKey, out, version); + } + + @Override + public long serializedSize(FullKeyRoute keys, int version) + { + return super.serializedSize(keys, version) + + routingKey.serializedSize(keys.homeKey, version); + } + }; + + public static final IVersionedSerializer partialRangeRoute = new AbstractRangesSerializer() + { + @Override PartialRangeRoute deserialize(DataInputPlus in, int version, Range[] rs) throws IOException + { + Ranges covering = ranges.deserialize(in, version); + RoutingKey homeKey = routingKey.deserialize(in, version); + return PartialRangeRoute.SerializationSupport.create(covering, homeKey, rs); + } + + @Override + public void serialize(PartialRangeRoute rs, DataOutputPlus out, int version) throws IOException + { + super.serialize(rs, out, version); + ranges.serialize(rs.covering, out, version); + routingKey.serialize(rs.homeKey, out, version); + } + + @Override + public long serializedSize(PartialRangeRoute rs, int version) + { + return super.serializedSize(rs, version) + + ranges.serializedSize(rs.covering, version) + + routingKey.serializedSize(rs.homeKey, version); + } + }; + + public static final IVersionedSerializer fullRangeRoute = new AbstractRangesSerializer() + { + @Override FullRangeRoute deserialize(DataInputPlus in, int version, Range[] Ranges) throws IOException + { + RoutingKey homeRange = routingKey.deserialize(in, version); + return FullRangeRoute.SerializationSupport.create(homeRange, Ranges); + } + + @Override + public void serialize(FullRangeRoute Ranges, DataOutputPlus out, int version) throws IOException + { + super.serialize(Ranges, out, version); + routingKey.serialize(Ranges.homeKey, out, version); + } + + @Override + public long serializedSize(FullRangeRoute ranges, int version) + { + return super.serializedSize(ranges, version) + + routingKey.serializedSize(ranges.homeKey(), version); + } + }; + + public static final IVersionedSerializer> route = new AbstractRoutablesSerializer<>( + EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.FullKeyRoute, UnseekablesKind.PartialRangeRoute, UnseekablesKind.FullRangeRoute) + ); + + public static final IVersionedSerializer> partialRoute = new AbstractRoutablesSerializer<>( + EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.PartialRangeRoute) + ); + + public static final IVersionedSerializer> fullRoute = new AbstractRoutablesSerializer<>( + EnumSet.of(UnseekablesKind.FullKeyRoute, UnseekablesKind.FullRangeRoute) + ); + + public static final IVersionedSerializer> unseekables = new AbstractRoutablesSerializer<>( + EnumSet.allOf(UnseekablesKind.class) + ); + + static class AbstractRoutablesSerializer> implements IVersionedSerializer + { + final EnumSet permitted; + protected AbstractRoutablesSerializer(EnumSet permitted) + { + this.permitted = permitted; + } + + @Override + public void serialize(RS t, DataOutputPlus out, int version) throws IOException + { + UnseekablesKind kind = t.kind(); + if (!permitted.contains(kind)) + throw new IllegalArgumentException(); + + switch (kind) + { + default: throw new AssertionError(); + case RoutingKeys: + out.writeByte(1); + routingKeys.serialize((RoutingKeys)t, out, version); + break; + case PartialKeyRoute: + out.writeByte(2); + partialKeyRoute.serialize((PartialKeyRoute)t, out, version); + break; + case FullKeyRoute: + out.writeByte(3); + fullKeyRoute.serialize((FullKeyRoute)t, out, version); + break; + case RoutingRanges: + out.writeByte(4); + ranges.serialize((Ranges)t, out, version); + break; + case PartialRangeRoute: + out.writeByte(5); + partialRangeRoute.serialize((PartialRangeRoute)t, out, version); + break; + case FullRangeRoute: + out.writeByte(6); + fullRangeRoute.serialize((FullRangeRoute)t, out, version); + break; + } + } + + @Override + public RS deserialize(DataInputPlus in, int version) throws IOException + { + byte b = in.readByte(); + UnseekablesKind kind; + RS result; + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + case 1: kind = UnseekablesKind.RoutingKeys; result = (RS)routingKeys.deserialize(in, version); break; + case 2: kind = UnseekablesKind.PartialKeyRoute; result = (RS)partialKeyRoute.deserialize(in, version); break; + case 3: kind = UnseekablesKind.FullKeyRoute; result = (RS)fullKeyRoute.deserialize(in, version); break; + case 4: kind = UnseekablesKind.RoutingRanges; result = (RS)ranges.deserialize(in, version); break; + case 5: kind = UnseekablesKind.PartialRangeRoute; result = (RS)partialRangeRoute.deserialize(in, version); break; + case 6: kind = UnseekablesKind.FullRangeRoute; result = (RS)fullRangeRoute.deserialize(in, version); break; + } + if (!permitted.contains(kind)) + throw new IllegalStateException(); + return result; + } + + @Override + public long serializedSize(RS t, int version) + { + switch (t.kind()) + { + default: throw new AssertionError(); + case RoutingKeys: + return 1 + routingKeys.serializedSize((RoutingKeys)t, version); + case PartialKeyRoute: + return 1 + partialKeyRoute.serializedSize((PartialKeyRoute)t, version); + case FullKeyRoute: + return 1 + fullKeyRoute.serializedSize((FullKeyRoute)t, version); + case RoutingRanges: + return 1 + ranges.serializedSize((Ranges)t, version); + case PartialRangeRoute: + return 1 + partialRangeRoute.serializedSize((PartialRangeRoute)t, version); + case FullRangeRoute: + return 1 + fullRangeRoute.serializedSize((FullRangeRoute)t, version); + } + } + } + + public static final IVersionedSerializer> seekables = new IVersionedSerializer>() + { + @Override + public void serialize(Seekables t, DataOutputPlus out, int version) throws IOException + { + switch (t.domain()) + { + default: throw new AssertionError(); + case Key: + out.writeByte(1); + keys.serialize((Keys)t, out, version); + break; + case Range: + out.writeByte(2); + ranges.serialize((Ranges)t, out, version); + break; + } + } + + @Override + public Seekables deserialize(DataInputPlus in, int version) throws IOException + { + byte b = in.readByte(); + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1 or 2, received " + b); + case 1: return keys.deserialize(in, version); + case 2: return ranges.deserialize(in, version); + } + } + + @Override + public long serializedSize(Seekables t, int version) + { + switch (t.domain()) + { + default: throw new AssertionError(); + case Key: + return 1 + keys.serializedSize((Keys)t, version); + case Range: + return 1 + ranges.serializedSize((Ranges)t, version); + } + } + }; + + public static abstract class AbstractKeysSerializer> implements IVersionedSerializer + { + final IVersionedSerializer keySerializer; + final IntFunction allocate; + + public AbstractKeysSerializer(IVersionedSerializer keySerializer, IntFunction allocate) + { + this.keySerializer = keySerializer; + this.allocate = allocate; + } + + @Override + public void serialize(KS keys, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(keys.size()); + for (int i=0, mi=keys.size(); i> implements IVersionedSerializer + { + @Override + public void serialize(RS ranges, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(ranges.size()); + for (int i=0, mi=ranges.size(); i request = new WithUnsyncedSerializer() + { + @Override + public void serializeBody(PreAccept msg, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.partialTxn.serialize(msg.partialTxn, out, version); + serializeNullable(msg.route, out, version, KeySerializers.fullRoute); + out.writeUnsignedVInt(msg.maxEpoch - msg.minUnsyncedEpoch); + } + + @Override + public PreAccept deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + { + PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); + @Nullable FullRoute fullRoute = deserializeNullable(in, version, KeySerializers.fullRoute); + long maxEpoch = in.readUnsignedVInt() + minEpoch; + return PreAccept.SerializerSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, + maxEpoch, partialTxn, fullRoute); + } + + @Override + public long serializedBodySize(PreAccept msg, int version) + { + return CommandSerializers.partialTxn.serializedSize(msg.partialTxn, version) + + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) + + TypeSizes.sizeofUnsignedVInt(msg.maxEpoch - msg.minUnsyncedEpoch); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(PreAcceptReply reply, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(reply.isOk()); + if (!reply.isOk()) + return; + + PreAcceptOk preAcceptOk = (PreAcceptOk) reply; + CommandSerializers.txnId.serialize(preAcceptOk.txnId, out, version); + CommandSerializers.timestamp.serialize(preAcceptOk.witnessedAt, out, version); + DepsSerializer.partialDeps.serialize(preAcceptOk.deps, out, version); + } + + @Override + public PreAcceptReply deserialize(DataInputPlus in, int version) throws IOException + { + if (!in.readBoolean()) + return PreAccept.PreAcceptNack.INSTANCE; + + return new PreAcceptOk(CommandSerializers.txnId.deserialize(in, version), + CommandSerializers.timestamp.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version)); + } + + @Override + public long serializedSize(PreAcceptReply reply, int version) + { + long size = TypeSizes.sizeof(reply.isOk()); + if (!reply.isOk()) + return size; + + PreAcceptOk preAcceptOk = (PreAcceptOk) reply; + size += CommandSerializers.txnId.serializedSize(preAcceptOk.txnId, version); + size += CommandSerializers.timestamp.serializedSize(preAcceptOk.witnessedAt, version); + size += DepsSerializer.partialDeps.serializedSize(preAcceptOk.deps, version); + + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java new file mode 100644 index 000000000000..4899316cc570 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.ReadData; +import accord.messages.ReadData.ReadNack; +import accord.messages.ReadData.ReadOk; +import accord.messages.ReadData.ReadReply; +import accord.primitives.Seekables; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.TxnData; + +public class ReadDataSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(ReadData read, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(read.txnId, out, version); + KeySerializers.seekables.serialize(read.readScope, out, version); + out.writeUnsignedVInt(read.waitForEpoch()); + out.writeUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + } + + @Override + public ReadData deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Seekables readScope = KeySerializers.seekables.deserialize(in, version); + long waitForEpoch = in.readUnsignedVInt(); + long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; + return ReadData.SerializerSupport.create(txnId, readScope, executeAtEpoch, waitForEpoch); + } + + @Override + public long serializedSize(ReadData read, int version) + { + return CommandSerializers.txnId.serializedSize(read.txnId, version) + + KeySerializers.seekables.serializedSize(read.readScope, version) + + TypeSizes.sizeofUnsignedVInt(read.waitForEpoch()) + + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + // TODO (now): use something other than ordinal + final ReadNack[] nacks = ReadNack.values(); + + @Override + public void serialize(ReadReply reply, DataOutputPlus out, int version) throws IOException + { + if (!reply.isOk()) + { + out.writeByte(1 + ((ReadNack) reply).ordinal()); + return; + } + + out.writeByte(0); + ReadOk readOk = (ReadOk) reply; + TxnData.serializer.serialize((TxnData) readOk.data, out, version); + } + + @Override + public ReadReply deserialize(DataInputPlus in, int version) throws IOException + { + int id = in.readByte(); + if (id != 0) + return nacks[id - 1]; + + return new ReadOk(TxnData.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(ReadReply reply, int version) + { + if (!reply.isOk()) + return TypeSizes.BYTE_SIZE; + + ReadOk readOk = (ReadOk) reply; + return TypeSizes.BYTE_SIZE + TxnData.serializer.serializedSize((TxnData) readOk.data, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java new file mode 100644 index 000000000000..fbd00f637e30 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import javax.annotation.Nullable; + +import accord.api.Result; +import accord.local.Status; +import accord.messages.BeginRecovery; +import accord.messages.BeginRecovery.RecoverNack; +import accord.messages.BeginRecovery.RecoverOk; +import accord.messages.BeginRecovery.RecoverReply; +import accord.primitives.Ballot; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.TxnData; + +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class RecoverySerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer() + { + @Override + public void serializeBody(BeginRecovery recover, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.partialTxn.serialize(recover.partialTxn, out, version); + CommandSerializers.ballot.serialize(recover.ballot, out, version); + serializeNullable(recover.route, out, version, KeySerializers.fullRoute); + } + + @Override + public BeginRecovery deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + { + PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); + Ballot ballot = CommandSerializers.ballot.deserialize(in, version); + @Nullable FullRoute route = deserializeNullable(in, version, KeySerializers.fullRoute); + return BeginRecovery.SerializationSupport.create(txnId, scope, waitForEpoch, partialTxn, ballot, route); + } + + @Override + public long serializedBodySize(BeginRecovery recover, int version) + { + return CommandSerializers.partialTxn.serializedSize(recover.partialTxn, version) + + CommandSerializers.ballot.serializedSize(recover.ballot, version) + + serializedNullableSize(recover.route, version, KeySerializers.fullRoute); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + void serializeNack(RecoverNack recoverNack, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.ballot.serialize(recoverNack.supersededBy, out, version); + } + + void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(recoverOk.txnId, out, version); + CommandSerializers.status.serialize(recoverOk.status, out, version); + CommandSerializers.ballot.serialize(recoverOk.accepted, out, version); + serializeNullable(recoverOk.executeAt, out, version, CommandSerializers.timestamp); + DepsSerializer.partialDeps.serialize(recoverOk.deps, out, version); + DepsSerializer.deps.serialize(recoverOk.earlierCommittedWitness, out, version); + DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); + out.writeBoolean(recoverOk.rejectsFastPath); + serializeNullable(recoverOk.writes, out, version, CommandSerializers.writes); + serializeNullable((TxnData) recoverOk.result, out, version, TxnData.serializer); + } + + @Override + public final void serialize(RecoverReply reply, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(reply.isOk()); + if (!reply.isOk()) + serializeNack((RecoverNack) reply, out, version); + else + serializeOk((RecoverOk) reply, out, version); + } + + RecoverNack deserializeNack(Ballot supersededBy, DataInputPlus in, int version) + { + return new RecoverNack(supersededBy); + } + + RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, PartialDeps deps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) + { + return new RecoverOk(txnId, status, accepted, executeAt, deps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); + } + + @Override + public final RecoverReply deserialize(DataInputPlus in, int version) throws IOException + { + boolean isOk = in.readBoolean(); + if (!isOk) + return deserializeNack(CommandSerializers.ballot.deserialize(in, version), in, version); + + return deserializeOk(CommandSerializers.txnId.deserialize(in, version), + CommandSerializers.status.deserialize(in, version), + CommandSerializers.ballot.deserialize(in, version), + deserializeNullable(in, version, CommandSerializers.timestamp), + DepsSerializer.partialDeps.deserialize(in, version), + DepsSerializer.deps.deserialize(in, version), + DepsSerializer.deps.deserialize(in, version), + in.readBoolean(), + deserializeNullable(in, version, CommandSerializers.writes), + deserializeNullable(in, version, TxnData.serializer), + in, + version); + } + + long serializedNackSize(RecoverNack recoverNack, int version) + { + return CommandSerializers.ballot.serializedSize(recoverNack.supersededBy, version); + } + + long serializedOkSize(RecoverOk recoverOk, int version) + { + long size = CommandSerializers.txnId.serializedSize(recoverOk.txnId, version); + size += CommandSerializers.status.serializedSize(recoverOk.status, version); + size += CommandSerializers.ballot.serializedSize(recoverOk.accepted, version); + size += serializedNullableSize(recoverOk.executeAt, version, CommandSerializers.timestamp); + size += DepsSerializer.partialDeps.serializedSize(recoverOk.deps, version); + size += DepsSerializer.deps.serializedSize(recoverOk.earlierCommittedWitness, version); + size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); + size += TypeSizes.sizeof(recoverOk.rejectsFastPath); + size += serializedNullableSize(recoverOk.writes, version, CommandSerializers.writes); + size += serializedNullableSize((TxnData) recoverOk.result, version, TxnData.serializer); + return size; + } + + @Override + public final long serializedSize(RecoverReply reply, int version) + { + return TypeSizes.sizeof(reply.isOk()) + + (reply.isOk() ? serializedOkSize((RecoverOk) reply, version) : serializedNackSize((RecoverNack) reply, version)); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java new file mode 100644 index 000000000000..04afa5b250ad --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.local.Node; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class TopologySerializers +{ + private TopologySerializers() {} + + public static final NodeIdSerializer nodeId = new NodeIdSerializer(); + public static class NodeIdSerializer implements IVersionedSerializer + { + private NodeIdSerializer() {} + + @Override + public void serialize(Node.Id id, DataOutputPlus out, int version) throws IOException + { + out.writeInt(id.id); + } + + public int serialize(Node.Id id, V dst, ValueAccessor accessor, int offset) + { + return accessor.putInt(dst, offset, id.id); + } + + @Override + public Node.Id deserialize(DataInputPlus in, int version) throws IOException + { + return new Node.Id(in.readInt()); + } + + public Node.Id deserialize(V src, ValueAccessor accessor, int offset) + { + return new Node.Id(accessor.getInt(src, offset)); + } + + @Override + public long serializedSize(Node.Id id, int version) + { + return serializedSize(); + } + + public int serializedSize() + { + return TypeSizes.INT_SIZE; // id.id + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java new file mode 100644 index 000000000000..c9da12801e41 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.TxnRequest; +import accord.primitives.PartialRoute; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public abstract class TxnRequestSerializer> implements IVersionedSerializer +{ + void serializeHeader(T msg, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(msg.txnId, out, version); + KeySerializers.partialRoute.serialize(msg.scope, out, version); + out.writeUnsignedVInt(msg.waitForEpoch); + } + + public abstract void serializeBody(T msg, DataOutputPlus out, int version) throws IOException; + + @Override + public final void serialize(T msg, DataOutputPlus out, int version) throws IOException + { + serializeHeader(msg, out, version); + serializeBody(msg, out, version); + } + + public abstract T deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException; + + @Override + public final T deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + PartialRoute scope = KeySerializers.partialRoute.deserialize(in, version); + // TODO: there should be a base epoch + long waitForEpoch = in.readUnsignedVInt(); + return deserializeBody(in, version, txnId, scope, waitForEpoch); + } + + long serializedHeaderSize(T msg, int version) + { + return CommandSerializers.txnId.serializedSize(msg.txnId, version) + + KeySerializers.partialRoute.serializedSize(msg.scope(), version) + + TypeSizes.sizeofUnsignedVInt(msg.waitForEpoch); + } + + public abstract long serializedBodySize(T msg, int version); + + @Override + public final long serializedSize(T msg, int version) + { + return serializedHeaderSize(msg, version) + serializedBodySize(msg, version); + } + + public static abstract class WithUnsyncedSerializer> extends TxnRequestSerializer + { + @Override + void serializeHeader(T msg, DataOutputPlus out, int version) throws IOException + { + super.serializeHeader(msg, out, version); + out.writeUnsignedVInt(msg.minUnsyncedEpoch); + out.writeBoolean(msg.doNotComputeProgressKey); + } + + public abstract T deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException; + + @Override + public final T deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + { + long minEpoch = in.readUnsignedVInt(); + boolean doNotComputeProgressKey = in.readBoolean(); + return deserializeBody(in, version, txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey); + } + + @Override + long serializedHeaderSize(T msg, int version) + { + long size = super.serializedHeaderSize(msg, version); + size += TypeSizes.sizeofUnsignedVInt(msg.minUnsyncedEpoch); + size += TypeSizes.BOOL_SIZE; + return size; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java new file mode 100644 index 000000000000..a56b1b29bf4d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.WaitOnCommit; +import accord.messages.WaitOnCommit.WaitOnCommitOk; +import accord.primitives.TxnId; +import accord.primitives.Unseekables; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class WaitOnCommitSerializer +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(WaitOnCommit wait, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(wait.txnId, out, version); + KeySerializers.unseekables.serialize(wait.scope, out, version); + } + + @Override + public WaitOnCommit deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Unseekables scope = KeySerializers.unseekables.deserialize(in, version); + return WaitOnCommit.SerializerSupport.create(txnId, scope); + } + + @Override + public long serializedSize(WaitOnCommit wait, int version) + { + return CommandSerializers.txnId.serializedSize(wait.txnId, version) + + KeySerializers.unseekables.serializedSize(wait.scope, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(WaitOnCommitOk ok, DataOutputPlus out, int version) throws IOException + { + } + + @Override + public WaitOnCommitOk deserialize(DataInputPlus in, int version) throws IOException + { + return WaitOnCommitOk.INSTANCE; + } + + @Override + public long serializedSize(WaitOnCommitOk ok, int version) + { + return 0; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java b/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java new file mode 100644 index 000000000000..b09bc31827f3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import org.apache.cassandra.service.accord.AccordState; + +public abstract class AbstractStoredField +{ + private static final int LOADED_FLAG = 0x01; + private static final int EMPTY_FLAG = 0x01 << 1; + private static final int CHANGED_FLAG = 0x01 << 2; + private static final int CLEARED_FLAG = 0x01 << 3; + private static final int WRITE_ONLY_FLAG = 0x01 << 4; + private static final int READ_ONLY_FLAG = 0x01 << 5; + + private byte flag; + + public AbstractStoredField(AccordState.ReadWrite readWrite) + { + this.flag = 0; + if (readWrite == AccordState.ReadWrite.WRITE_ONLY) + set(WRITE_ONLY_FLAG); + if (readWrite == AccordState.ReadWrite.READ_ONLY) + set(READ_ONLY_FLAG); + } + + @Override + public String toString() + { + if (!hasValue()) + return ""; + if (check(WRITE_ONLY_FLAG)) + return ""; + preGet(); + if (hasModifications()) + return '*' + valueString(); + return valueString(); + } + + private void clear(int v) + { + flag &= ~v; + } + + private boolean check(int v) + { + return (flag & v) != 0; + } + + private void set(int v) + { + flag |= v; + } + + public boolean hasValue() + { + return isLoaded() && !isEmpty(); + } + + public boolean isLoaded() + { + return check(LOADED_FLAG); + } + + public void setEmpty() + { + if (check(0xFF)) + throw new IllegalStateException("Cannot set previously loaded/initialized commands to empty"); + set(LOADED_FLAG | EMPTY_FLAG); + } + + public boolean isEmpty() + { + return check(EMPTY_FLAG); + } + + void preUnload() + { + if (hasModifications()) + throw new IllegalStateException("Cannot unload a field with unsaved changes"); + flag = 0; + } + + void preLoad() + { + if (hasModifications()) + throw new IllegalStateException("Cannot load into a field with unsaved changes"); + clear(EMPTY_FLAG); + set(LOADED_FLAG); + } + + void preChange() + { + if (check(READ_ONLY_FLAG)) + throw new IllegalStateException("Cannot update a read only field"); + clear(EMPTY_FLAG); + set(LOADED_FLAG | CHANGED_FLAG); + } + + void preBlindChange() + { + set(CHANGED_FLAG); + } + + void preGet() + { + if (!check(LOADED_FLAG)) + throw new IllegalStateException("Cannot read unloaded fields"); + if (check(EMPTY_FLAG)) + throw new IllegalStateException("Cannot read empty fields"); + if (check(WRITE_ONLY_FLAG)) + throw new IllegalStateException("Cannot read write only fields"); + } + + void preClear() + { + set(CLEARED_FLAG | LOADED_FLAG | CHANGED_FLAG); + } + + public boolean hasModifications() + { + return check(CHANGED_FLAG); + } + + public void clearModifiedFlag() + { + clear(CHANGED_FLAG); + } + + public boolean wasCleared() + { + return check(CLEARED_FLAG); + } + + public abstract String valueString(); +} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java b/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java new file mode 100644 index 000000000000..fb81da9df4bf --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Objects; + +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.utils.ObjectSizes; + +public class StoredBoolean extends AbstractStoredField +{ + public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredBoolean(AccordState.ReadWrite.FULL)); + protected boolean value; + + public StoredBoolean(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StoredBoolean that = (StoredBoolean) o; + return value == that.value; + } + + @Override + public int hashCode() + { + return Objects.hash(value); + } + + @Override + public String valueString() + { + return Boolean.toString(value); + } + + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE; + } + + public void unload() + { + preUnload(); + value = false; + } + + public void load(boolean value) + { + preLoad(); + this.value = value; + } + + public void set(boolean value) + { + preChange(); + this.value = value; + } + + public boolean get() + { + preGet(); + return value; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredLong.java b/src/java/org/apache/cassandra/service/accord/store/StoredLong.java new file mode 100644 index 000000000000..3534d68b9397 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/StoredLong.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Objects; + +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.utils.ObjectSizes; + +public class StoredLong extends AbstractStoredField +{ + public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredLong(AccordState.ReadWrite.FULL)); + + protected long value; + + public StoredLong(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StoredLong that = (StoredLong) o; + return value == that.value; + } + + @Override + public int hashCode() + { + return Objects.hash(value); + } + + @Override + public String valueString() + { + return Long.toString(value); + } + + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE; + } + + public void unload() + { + preUnload(); + value = 0; + } + + public void load(long value) + { + preLoad(); + this.value = value; + } + + public void set(long value) + { + preChange(); + this.value = value; + } + + public long get() + { + preGet(); + return value; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java b/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java new file mode 100644 index 000000000000..81f7efdc0358 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Collections; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Objects; +import java.util.TreeMap; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.ToLongFunction; +import java.util.stream.Collectors; + +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.utils.ObjectSizes; + +/** + * Navigable Map, capable of blind add/remove + */ +public class StoredNavigableMap, V> extends AbstractStoredField +{ + private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new StoredNavigableMap<>(AccordState.ReadWrite.FULL)); + private NavigableMap map = null; + private NavigableMap view = null; + private NavigableMap additions = null; + private NavigableMap deletions = null; + + public StoredNavigableMap(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + @Override + public boolean equals(Object o) + { + preGet(); + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StoredNavigableMap that = (StoredNavigableMap) o; + return Objects.equals(map, that.map); + } + + @Override + public int hashCode() + { + preGet(); + return Objects.hash(map); + } + + @Override + public String valueString() + { + if (view == null) + return ""; + return view.entrySet().stream() + .map(e -> e.getKey() + "=" + e.getValue()) + .collect(Collectors.joining(", ", "{", "}")); + } + + public void unload() + { + preUnload(); + map = null; + view = null; + additions = null; + deletions = null; + } + + void setInternal(NavigableMap map) + { + this.map = map; + this.view = Collections.unmodifiableNavigableMap(map); + } + + public void load(NavigableMap map) + { + preLoad(); + setInternal(map); + } + + public NavigableMap getView() + { + preGet(); + return view; + } + + public void blindPut(K key, V val) + { + preBlindChange(); + if (hasValue()) + map.put(key, val); + + if (additions == null) + additions = new TreeMap<>(); + + additions.put(key, val); + if (deletions != null) + deletions.remove(key); + } + + public void blindRemove(K key) + { + preBlindChange(); + if (hasValue()) + map.remove(key); + + if (!wasCleared()) + { + if (deletions == null) + deletions = new TreeMap<>(); + deletions.put(key, null); + } + if (additions != null) + additions.remove(key); + } + + // TODO: this is a kludge, but will suffice until we can more fully rework efficiency of waitingOn collections + // this is semantically equivalent to blindRemove(key) but stores the value we believe was bound to key on removal + // so that it can be used by forEachDeletion + public void blindRemove(K key, V value) + { + preBlindChange(); + if (hasValue()) + map.remove(key); + + if (!wasCleared()) + { + if (deletions == null) + deletions = new TreeMap<>(); + deletions.put(key, value); + } + if (additions != null) + additions.remove(key); + } + + public void clear() + { + clearModifiedFlag(); + preClear(); + setInternal(new TreeMap<>()); + } + + @Override + public void clearModifiedFlag() + { + super.clearModifiedFlag(); + if (additions != null) additions.clear(); + if (deletions != null) deletions.clear(); + } + + public boolean hasAdditions() + { + return additions != null && !additions.isEmpty(); + } + + public int additionsSize() + { + return additions != null ? additions.size() : 0; + } + + public boolean hasDeletions() + { + return deletions != null && !deletions.isEmpty(); + } + + public int deletionsSize() + { + return deletions != null ? deletions.size() : 0; + } + + public int totalModifications() + { + return additionsSize() + deletionsSize(); + } + + public void forEachAddition(BiConsumer consumer) + { + if (additions != null) + additions.forEach(consumer); + } + + public void forEachDeletion(Consumer consumer) + { + if (deletions != null) + deletions.keySet().forEach(consumer); + } + + public void forEachDeletion(BiConsumer consumer) + { + if (deletions != null) + deletions.forEach(consumer); + } + + public long estimatedSizeOnHeap(ToLongFunction measureKey, ToLongFunction measureVal) + { + long size = EMPTY_SIZE; + if (hasValue() && ! map.isEmpty()) + { + for (Map.Entry entry : map.entrySet()) + { + size += measureKey.applyAsLong(entry.getKey()); + size += measureVal.applyAsLong(entry.getValue()); + } + } + return size; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredSet.java b/src/java/org/apache/cassandra/service/accord/store/StoredSet.java new file mode 100644 index 000000000000..699c8bf48f68 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/StoredSet.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Collections; +import java.util.HashSet; +import java.util.NavigableSet; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.Consumer; +import java.util.function.ToLongFunction; +import java.util.stream.Collectors; + +import accord.utils.DeterministicIdentitySet; +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.utils.ObjectSizes; + +public abstract class StoredSet> extends AbstractStoredField +{ + private S set = null; + private S view = null; + private Set additions = null; + private Set deletions = null; + + abstract S createDataSet(); + abstract Set createMetaSet(); + abstract S createView(S data); + abstract long emptySize(); + + public StoredSet(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + @Override + public boolean equals(Object o) + { + preGet(); + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StoredSet that = (StoredSet) o; + return Objects.equals(set, that.set); + } + + @Override + public int hashCode() + { + preGet(); + return Objects.hash(set); + } + + @Override + public String valueString() + { + return view.stream() + .map(Object::toString) + .collect(Collectors.joining(", ", "{", "}")); + } + + public void unload() + { + preUnload(); + set = null; + view = null; + additions = null; + deletions = null; + } + + void setInternal(S set) + { + this.set = set; + this.view = createView(set); + } + + public void load(S set) + { + preLoad(); + setInternal(set); + } + + public S getView() + { + preGet(); + return view; + } + + public void blindAdd(T item) + { + preBlindChange(); + if (hasValue()) + set.add(item); + + if (additions == null) + additions = createMetaSet(); + + additions.add(item); + if (deletions != null) + deletions.remove(item); + } + + public void blindRemove(T item) + { + preBlindChange(); + if (hasValue()) + set.remove(item); + + if (!wasCleared()) + { + if (deletions == null) + deletions = createMetaSet(); + deletions.add(item); + } + if (additions != null) + additions.remove(item); + } + + public void clear() + { + clearModifiedFlag(); + preClear(); + setInternal(createDataSet()); + } + + @Override + public void clearModifiedFlag() + { + super.clearModifiedFlag(); + if (additions != null) additions.clear(); + if (deletions != null) deletions.clear(); + } + + public boolean hasAdditions() + { + return additions != null && !additions.isEmpty(); + } + + public boolean hasDeletions() + { + return deletions != null && !deletions.isEmpty(); + } + + public void forEachAddition(Consumer consumer) + { + if (additions != null) + additions.forEach(consumer); + } + + public void forEachDeletion(Consumer consumer) + { + if (deletions != null) + deletions.forEach(consumer); + } + + public long estimatedSizeOnHeap(ToLongFunction measure) + { + long size = emptySize(); + if (hasValue() && !set.isEmpty()) + { + for (T val : set) + size += measure.applyAsLong(val); + } + return size; + } + + public interface Changes + { + void forEachAddition(Consumer consumer); + void forEachDeletion(Consumer consumer); + } + + public static class Navigable> extends StoredSet> implements Changes + { + private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new Navigable<>(AccordState.ReadWrite.FULL)); + + public Navigable(AccordState.ReadWrite readWrite) { super(readWrite); } + + @Override + NavigableSet createDataSet() + { + return new TreeSet<>(); + } + + @Override + NavigableSet createMetaSet() + { + return new TreeSet<>(); + } + + @Override + NavigableSet createView(NavigableSet data) + { + return Collections.unmodifiableNavigableSet(data); + } + + @Override + long emptySize() + { + return EMPTY_SIZE; + } + } + + public static class DeterministicIdentity extends StoredSet> implements Changes + { + private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new DeterministicIdentity<>(AccordState.ReadWrite.FULL)); + + public DeterministicIdentity(AccordState.ReadWrite readWrite) { super(readWrite); } + + @Override + DeterministicIdentitySet createDataSet() + { + return new DeterministicIdentitySet<>(); + } + + @Override + Set createMetaSet() + { + return new HashSet<>(); + } + + @Override + Set createView(Set data) + { + return Collections.unmodifiableSet(data); + } + + @Override + long emptySize() + { + return EMPTY_SIZE; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredValue.java b/src/java/org/apache/cassandra/service/accord/store/StoredValue.java new file mode 100644 index 000000000000..3e08c8b43a13 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/store/StoredValue.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Objects; +import java.util.function.ToLongFunction; + +import org.apache.cassandra.service.accord.AccordState; +import org.apache.cassandra.utils.ObjectSizes; + +public class StoredValue extends AbstractStoredField +{ + public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredValue<>(AccordState.ReadWrite.FULL)); + protected T value; + + public StoredValue(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + @Override + public boolean equals(Object o) + { + this.preGet(); + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StoredValue that = (StoredValue) o; + that.preGet(); + return Objects.equals(value, that.value); + } + + @Override + public int hashCode() + { + preGet(); + return Objects.hash(value); + } + + @Override + public String valueString() + { + return Objects.toString(value); + } + + public long estimatedSizeOnHeap(ToLongFunction measure) + { + if (!hasValue() || value == null) + return EMPTY_SIZE; + + return EMPTY_SIZE + measure.applyAsLong(value); + } + + public void unload() + { + preUnload(); + value = null; + } + + public void load(T value) + { + preLoad(); + this.value = value; + } + + public void set(T value) + { + preChange(); + this.value = value; + } + + public T get() + { + preGet(); + return value; + } + + public static class HistoryPreserving extends StoredValue + { + T previous; + + public HistoryPreserving(AccordState.ReadWrite readWrite) + { + super(readWrite); + } + + public T previous() + { + return previous; + } + + @Override + public void unload() + { + super.unload(); + previous = null; + } + + @Override + public void load(T value) + { + super.load(value); + previous = value; + } + + @Override + public void clearModifiedFlag() + { + super.clearModifiedFlag(); + previous = value; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java new file mode 100644 index 000000000000..e5381aacdfef --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterators; + +import accord.api.Key; +import accord.primitives.Keys; +import org.apache.cassandra.service.accord.api.PartitionKey; + +/** + * Immutable collection of items, sorted first by their partition key + */ +public abstract class AbstractKeySorted implements Iterable +{ + public static final String ITEMS_OUT_OF_ORDER_MESSAGE = "Items are out of order ([%s] %s >= [%s] %s)"; + + protected final Keys itemKeys; + protected final T[] items; + + public AbstractKeySorted(T[] items) + { + this.items = items; + validateOrder(); + this.itemKeys = extractItemKeys(); + } + + public AbstractKeySorted(List items) + { + T[] arr = newArray(items.size()); + items.toArray(arr); + Arrays.sort(arr, this::compare); + this.items = arr; + validateOrder(); + this.itemKeys = extractItemKeys(); + } + + private Keys extractItemKeys() + { + SortedSet keysSet = new TreeSet<>(Key::compareTo); + forEach(i -> keysSet.add(getKey(i))); + return new Keys(keysSet); + } + + @Override + public Iterator iterator() + { + return Iterators.forArray(items); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + Arrays.stream(items) + .map(Objects::toString) + .collect(Collectors.joining(", ", "{", "}")); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AbstractKeySorted that = (AbstractKeySorted) o; + return Arrays.equals(items, that.items); + } + + @Override + public int hashCode() + { + return Arrays.hashCode(items); + } + + @VisibleForTesting + public Keys keys() + { + return itemKeys; + } + + /** + * Compare the non-key component of items (since this class handles sorting by key) + */ + abstract int compareNonKeyFields(T left, T right); + + abstract PartitionKey getKey(T item); + abstract T[] newArray(int size); + + private int compare(T left, T right) + { + int cmp = getKey(left).compareTo(getKey(right)); + return cmp != 0 ? cmp : compareNonKeyFields(left, right); + } + + @VisibleForTesting + void validateOrder() + { + for (int i = 1; i < items.length; i++) + { + T prev = items[i-1]; + T next = items[i]; + + if (compare(prev, next) >= 0) + throw new IllegalStateException(String.format(ITEMS_OUT_OF_ORDER_MESSAGE, i - 1, prev, i, next)); + } + } + + public int size() + { + return items.length; + } + + public void forEachWithKey(PartitionKey key, Consumer consumer) + { + for (int i = firstPossibleKeyIdx(key); i < items.length && getKey(items[i]).equals(key); i++) + consumer.accept(items[i]); + } + + private int firstPossibleKeyIdx(PartitionKey key) + { + int idx = Arrays.binarySearch(items, key, (l, r) -> { + PartitionKey lk = getKey((T) l); + PartitionKey rk = (PartitionKey) r; + int cmp = lk.compareTo(rk); + return cmp != 0 ? cmp : 1; + }); + + return -1 - idx; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java b/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java new file mode 100644 index 000000000000..b8011c68e08e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.nio.ByteBuffer; +import java.util.Objects; + +import javax.annotation.concurrent.NotThreadSafe; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.service.accord.AccordSerializers; + +/** + * Item that is serialized by default + */ +@NotThreadSafe +public abstract class AbstractSerialized +{ + private final ByteBuffer bytes; + private T memoized = null; + + public AbstractSerialized(ByteBuffer bytes) + { + this.bytes = bytes; + } + + public AbstractSerialized(T value) + { + this.bytes = AccordSerializers.serialize(value, serializer()); + this.memoized = value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AbstractSerialized that = (AbstractSerialized) o; + return bytes.equals(that.bytes); + } + + @Override + public int hashCode() + { + return Objects.hash(bytes); + } + + @Override + public String toString() + { + return get().toString(); + } + + protected abstract IVersionedSerializer serializer(); + + protected T get() + { + if (memoized == null) + memoized = AccordSerializers.deserialize(bytes, serializer()); + return memoized; + } + + protected ByteBuffer bytes() + { + return bytes; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java new file mode 100644 index 000000000000..afe7e87e0ae8 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.util.Collections; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.partitions.Partition; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.ClientState; + +public class AccordUpdateParameters +{ + private final TxnData data; + private final QueryOptions options; + + public AccordUpdateParameters(TxnData data, QueryOptions options) + { + this.data = data; + this.options = options; + } + + public TxnData getData() + { + return data; + } + + public UpdateParameters updateParameters(TableMetadata metadata, int rowIndex) + { + // This is currently only used by Guardrails, but this logically have issues with Accord as drifts in config + // values could cause unexpected issues in Accord. (ex. some nodes reject writes while others accept) + // For the time being, guardrails are disabled for Accord queries. + ClientState disabledGuardrails = null; + + // What we use here doesn't matter as they get replaced before actually performing the write. + // see org.apache.cassandra.service.accord.txn.TxnWrite.Update.write + int nowInSeconds = 42; + long timestamp = nowInSeconds; + + // TODO: How should Accord work with TTL? + int ttl = metadata.params.defaultTimeToLive; + return new UpdateParameters(metadata, + disabledGuardrails, + options, + timestamp, + nowInSeconds, + ttl, + prefetchRow(metadata, rowIndex)); + } + + private Map prefetchRow(TableMetadata metadata, int index) + { + for (Map.Entry e : data.entrySet()) + { + TxnDataName name = e.getKey(); + if (name.isAutoRead() && name.atIndex(index)) + return ImmutableMap.of(name.getDecoratedKey(metadata), e.getValue()); + } + return Collections.emptyMap(); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java new file mode 100644 index 000000000000..905667d35c69 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java @@ -0,0 +1,584 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; + +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.conditions.ColumnCondition; +import org.apache.cassandra.cql3.conditions.ColumnCondition.Bound; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.transport.ProtocolVersion; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static com.google.common.base.Preconditions.checkNotNull; + +import static org.apache.cassandra.service.accord.AccordSerializers.clusteringSerializer; +import static org.apache.cassandra.service.accord.txn.TxnRead.SERIAL_READ; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.CollectionSerializers.serializedListSize; + +public abstract class TxnCondition +{ + private interface ConditionSerializer + { + void serialize(T condition, DataOutputPlus out, int version) throws IOException; + T deserialize(DataInputPlus in, int version, Kind kind) throws IOException; + long serializedSize(T condition, int version); + } + + public enum Kind + { + NONE("n/a", null), + AND("AND", null), + OR("OR", null), + IS_NOT_NULL("IS NOT NULL", null), + IS_NULL("IS NULL", null), + EQUAL("=", Operator.EQ), + NOT_EQUAL("!=", Operator.NEQ), + GREATER_THAN(">", Operator.GT), + GREATER_THAN_OR_EQUAL(">=", Operator.GTE), + LESS_THAN("<", Operator.LT), + LESS_THAN_OR_EQUAL("<=", Operator.LTE), + COLUMN_CONDITIONS("COLUMN_CONDITIONS", null); + + @Nonnull + private final String symbol; + @Nullable + private final Operator operator; + + Kind(String symbol, Operator operator) + { + this.symbol = symbol; + this.operator = operator; + } + + @SuppressWarnings("rawtypes") + private ConditionSerializer serializer() + { + switch (this) + { + case IS_NOT_NULL: + case IS_NULL: + return Exists.serializer; + case EQUAL: + case NOT_EQUAL: + case LESS_THAN: + case LESS_THAN_OR_EQUAL: + case GREATER_THAN: + case GREATER_THAN_OR_EQUAL: + return Value.serializer; + case AND: + case OR: + return BooleanGroup.serializer; + case NONE: + return None.serializer; + case COLUMN_CONDITIONS: + return ColumnConditionsAdapter.serializer; + default: + throw new IllegalArgumentException("No serializer exists for kind " + this); + } + } + } + + protected final Kind kind; + + public TxnCondition(Kind kind) + { + this.kind = kind; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnCondition condition = (TxnCondition) o; + return kind == condition.kind; + } + + @Override + public int hashCode() + { + return Objects.hash(kind); + } + + public Kind kind() + { + return kind; + } + + public abstract boolean applies(TxnData data); + + private static class None extends TxnCondition + { + private static final None instance = new None(); + + private None() + { + super(Kind.NONE); + } + + @Override + public String toString() + { + return kind.toString(); + } + + @Override + public boolean applies(TxnData data) + { + return true; + } + + private static final ConditionSerializer serializer = new ConditionSerializer() + { + @Override + public void serialize(None condition, DataOutputPlus out, int version) {} + @Override + public None deserialize(DataInputPlus in, int version, Kind kind) { return instance; } + @Override + public long serializedSize(None condition, int version) { return 0; } + }; + } + + public static TxnCondition none() + { + return None.instance; + } + + public static class Exists extends TxnCondition + { + private static final Set KINDS = ImmutableSet.of(Kind.IS_NOT_NULL, Kind.IS_NULL); + + public final TxnReference reference; + + public Exists(TxnReference reference, Kind kind) + { + super(kind); + Preconditions.checkArgument(KINDS.contains(kind), "Kind " + kind + " cannot be used with an existence condition"); + this.reference = reference; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + Exists exists = (Exists) o; + return reference.equals(exists.reference); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), reference); + } + + @Override + public String toString() + { + return reference.toString() + ' ' + kind.toString(); + } + + @Override + public boolean applies(TxnData data) + { + FilteredPartition partition = reference.getPartition(data); + boolean exists = partition != null && !partition.isEmpty(); + + Row row = null; + if (exists) + { + row = reference.getRow(partition); + exists = row != null && !row.isEmpty(); + } + + if (exists && reference.selectsColumn()) + { + ColumnData columnData = reference.getColumnData(row); + + if (columnData == null) + { + exists = false; + } + else if (columnData.column().isComplex()) + { + if (reference.isElementSelection() || reference.isFieldSelection()) + { + Cell cell = (Cell) columnData; + exists = !cell.isTombstone(); + } + else + { + // TODO: Is this even necessary, given the partition is already filtered? + if (!((ComplexColumnData) columnData).complexDeletion().isLive()) + exists = false; + } + } + else if (reference.isElementSelection()) + { + // This is frozen, so check if the Cell is a tombstone and that the element is present. + Cell cell = (Cell) columnData; + ByteBuffer element = reference.getFrozenCollectionElement(cell); + exists = element != null && !cell.isTombstone(); + } + else if (reference.isFieldSelection()) + { + // This is frozen, so check if the Cell is a tombstone and that the field is present. + Cell cell = (Cell) columnData; + ByteBuffer fieldValue = reference.getFrozenFieldValue(cell); + exists = fieldValue != null && !cell.isTombstone(); + } + else + { + Cell cell = (Cell) columnData; + exists = !cell.isTombstone(); + } + } + + switch (kind()) + { + case IS_NOT_NULL: + return exists; + case IS_NULL: + return !exists; + default: + throw new IllegalStateException(); + } + } + + private static final ConditionSerializer serializer = new ConditionSerializer() + { + @Override + public void serialize(Exists condition, DataOutputPlus out, int version) throws IOException + { + TxnReference.serializer.serialize(condition.reference, out, version); + } + + @Override + public Exists deserialize(DataInputPlus in, int version, Kind kind) throws IOException + { + return new Exists(TxnReference.serializer.deserialize(in, version), kind); + } + + @Override + public long serializedSize(Exists condition, int version) + { + return TxnReference.serializer.serializedSize(condition.reference, version); + } + }; + } + + public static class ColumnConditionsAdapter extends TxnCondition { + @Nonnull + public final Collection bounds; + + @Nonnull + public final Clustering clustering; + + public ColumnConditionsAdapter(Clustering clustering, Collection bounds) + { + super(Kind.COLUMN_CONDITIONS); + checkNotNull(bounds); + checkNotNull(clustering); + this.bounds = bounds; + this.clustering = clustering; + } + + @Override + public boolean applies(@Nonnull TxnData data) + { + checkNotNull(data); + FilteredPartition partition = data.get(SERIAL_READ); + Row row = partition != null ? partition.getRow(clustering) : null; + for (Bound bound : bounds) + { + if (!bound.appliesTo(row)) + return false; + } + return true; + } + + private static final ConditionSerializer serializer = new ConditionSerializer() + { + @Override + public void serialize(ColumnConditionsAdapter condition, DataOutputPlus out, int version) throws IOException + { + clusteringSerializer.serialize(condition.clustering, out, version); + serializeCollection(condition.bounds, out, version, Bound.serializer); + } + + @Override + public ColumnConditionsAdapter deserialize(DataInputPlus in, int version, Kind ignored) throws IOException + { + Clustering clustering = clusteringSerializer.deserialize(in, version); + List bounds = deserializeList(in, version, Bound.serializer); + return new ColumnConditionsAdapter(clustering, bounds); + } + + @Override + public long serializedSize(ColumnConditionsAdapter condition, int version) + { + return clusteringSerializer.serializedSize(condition.clustering, version) + + serializedCollectionSize(condition.bounds, version, Bound.serializer); + } + }; + } + + public static class Value extends TxnCondition + { + private static final Set KINDS = ImmutableSet.of(Kind.EQUAL, Kind.NOT_EQUAL, + Kind.GREATER_THAN, Kind.GREATER_THAN_OR_EQUAL, + Kind.LESS_THAN, Kind.LESS_THAN_OR_EQUAL); + + private final TxnReference reference; + private final ByteBuffer value; + private final ProtocolVersion version; + + public Value(TxnReference reference, Kind kind, ByteBuffer value, ProtocolVersion version) + { + super(kind); + Preconditions.checkArgument(KINDS.contains(kind), "Kind " + kind + " cannot be used with a value condition"); + Preconditions.checkArgument(reference.selectsColumn(), "Reference " + reference + " does not select a column"); + this.reference = reference; + this.value = value; + this.version = version; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + Value value1 = (Value) o; + return reference.equals(value1.reference) && value.equals(value1.value); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), reference, value); + } + + @Override + public String toString() + { + return reference.toString() + ' ' + kind.symbol + " 0x" + ByteBufferUtil.bytesToHex(value); + } + + private Bound getBounds(TxnData data) + { + ColumnMetadata column = reference.column(); + if (column.isPartitionKey()) + { + ByteBuffer bb = reference.getPartitionKey(data); + return new ColumnCondition.SimpleBound(column, kind.operator, value) + { + @Override + protected ByteBuffer rowValue(Row row) + { + return bb; + } + }; + } + else if (column.isClusteringColumn()) + return new ColumnCondition.SimpleClusteringBound(column, kind.operator, value); + AbstractType type = column.type; + if (type.isCollection()) + { + if (reference.selectsPath()) + return new ColumnCondition.ElementOrFieldAccessBound(column, reference.path().get(0), kind.operator, value); + if (type.isMultiCell()) + return new ColumnCondition.MultiCellBound(column, kind.operator, value); + } + else if (type.isUDT()) + { + if (reference.isFieldSelection()) + { + UserType ut = (UserType) type; + return new ColumnCondition.ElementOrFieldAccessBound(column, ut.fieldName(reference.path()).bytes, kind.operator, value); + } + if (type.isMultiCell()) + return new ColumnCondition.MultiCellBound(column, kind.operator, value); + } + return new ColumnCondition.SimpleBound(column, kind.operator, value); + } + + @Override + public boolean applies(TxnData data) + { + return getBounds(data).appliesTo(reference.getRow(data)); + } + + private static final ConditionSerializer serializer = new ConditionSerializer<>() + { + @Override + public void serialize(Value condition, DataOutputPlus out, int version) throws IOException + { + TxnReference.serializer.serialize(condition.reference, out, version); + ByteBufferUtil.writeWithVIntLength(condition.value, out); + out.writeUTF(condition.version.name()); + } + + @Override + public Value deserialize(DataInputPlus in, int version, Kind kind) throws IOException + { + TxnReference reference = TxnReference.serializer.deserialize(in, version); + ByteBuffer value = ByteBufferUtil.readWithVIntLength(in); + ProtocolVersion protocolVersion = ProtocolVersion.valueOf(in.readUTF()); + return new Value(reference, kind, value, protocolVersion); + } + + @Override + public long serializedSize(Value condition, int version) + { + long size = 0; + size += TxnReference.serializer.serializedSize(condition.reference, version); + size += ByteBufferUtil.serializedSizeWithVIntLength(condition.value); + size += TypeSizes.sizeof(condition.version.name()); + return size; + } + }; + } + + public static class BooleanGroup extends TxnCondition + { + private static final Set KINDS = ImmutableSet.of(Kind.AND, Kind.OR); + + public final List conditions; + + public BooleanGroup(Kind kind, List conditions) + { + super(kind); + Preconditions.checkArgument(KINDS.contains(kind), "Kind " + kind + " cannot be used at the root of a boolean condition"); + this.conditions = conditions; + } + + @Override + public String toString() + { + return '(' + conditions.stream().map(Objects::toString).reduce((a, b) -> a + ' ' + kind.symbol + ' ' + b).orElse("") + ')'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + BooleanGroup that = (BooleanGroup) o; + return Objects.equals(conditions, that.conditions); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), conditions); + } + + @Override + public boolean applies(TxnData data) + { + switch (kind()) + { + case AND: + return Iterables.all(conditions, c -> c.applies(data)); + case OR: + return Iterables.any(conditions, c -> c.applies(data)); + default: + throw new IllegalStateException(); + } + } + + private static final ConditionSerializer serializer = new ConditionSerializer() + { + @Override + public void serialize(BooleanGroup condition, DataOutputPlus out, int version) throws IOException + { + serializeList(condition.conditions, out, version, TxnCondition.serializer); + } + + @Override + public BooleanGroup deserialize(DataInputPlus in, int version, Kind kind) throws IOException + { + return new BooleanGroup(kind, deserializeList(in, version, TxnCondition.serializer)); + } + + @Override + public long serializedSize(BooleanGroup condition, int version) + { + return serializedListSize(condition.conditions, version, TxnCondition.serializer); + } + }; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @SuppressWarnings("unchecked") + @Override + public void serialize(TxnCondition condition, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(condition.kind.ordinal()); + condition.kind.serializer().serialize(condition, out, version); + } + + @Override + public TxnCondition deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.values()[in.readUnsignedVInt32()]; + return kind.serializer().deserialize(in, version, kind); + } + + @SuppressWarnings("unchecked") + @Override + public long serializedSize(TxnCondition condition, int version) + { + return TypeSizes.sizeofUnsignedVInt(condition.kind.ordinal()) + condition.kind.serializer().serializedSize(condition, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java new file mode 100644 index 000000000000..a095d8ba78de --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import accord.api.Data; +import accord.api.Result; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.rows.DeserializationHelper; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIteratorSerializer; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ObjectSizes; + +public class TxnData implements Data, Result, Iterable +{ + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnData()); + + private final Map data; + + public TxnData(Map data) + { + this.data = data; + } + + public TxnData() + { + this(new HashMap<>()); + } + + public void put(TxnDataName name, FilteredPartition partition) + { + data.put(name, partition); + } + + public FilteredPartition get(TxnDataName name) + { + return data.get(name); + } + + public Set> entrySet() + { + return data.entrySet(); + } + + @Override + public Data merge(Data data) + { + TxnData that = (TxnData) data; + TxnData merged = new TxnData(); + this.data.forEach(merged::put); + that.data.forEach(merged::put); + return merged; + } + + public static Data merge(Data left, Data right) + { + if (left == null) + return right; + if (right == null) + return null; + + return left.merge(right); + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + for (Map.Entry entry : data.entrySet()) + { + size += entry.getKey().estimatedSizeOnHeap(); + + for (Row row : entry.getValue()) + size += row.unsharedHeapSize(); + + // TODO: Include the other parts of FilteredPartition after we rebase to pull in BTreePartitionData? + } + return size; + } + + @Override + public Iterator iterator() + { + return data.values().iterator(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnData that = (TxnData) o; + return data.equals(that.data); + } + + private static final IVersionedSerializer partitionSerializer = new IVersionedSerializer() + { + @Override + public void serialize(FilteredPartition partition, DataOutputPlus out, int version) throws IOException + { + partition.metadata().id.serialize(out); + try (UnfilteredRowIterator iterator = partition.unfilteredIterator()) + { + UnfilteredRowIteratorSerializer.serializer.serialize(iterator, ColumnFilter.all(partition.metadata()), out, version, partition.rowCount()); + } + } + + @Override + public FilteredPartition deserialize(DataInputPlus in, int version) throws IOException + { + TableMetadata metadata = Schema.instance.getExistingTableMetadata(TableId.deserialize(in)); + try (UnfilteredRowIterator partition = UnfilteredRowIteratorSerializer.serializer.deserialize(in, version, metadata, ColumnFilter.all(metadata), DeserializationHelper.Flag.FROM_REMOTE)) + { + return new FilteredPartition(UnfilteredRowIterators.filter(partition, 0)); + } + } + + @Override + public long serializedSize(FilteredPartition partition, int version) + { + TableId tableId = partition.metadata().id; + long size = tableId.serializedSize(); + try (UnfilteredRowIterator iterator = partition.unfilteredIterator()) + { + return size + UnfilteredRowIteratorSerializer.serializer.serializedSize(iterator, ColumnFilter.all(partition.metadata()), version, partition.rowCount()); + } + } + }; + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnData data, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(data.data.size()); + for (Map.Entry entry : data.data.entrySet()) + { + TxnDataName.serializer.serialize(entry.getKey(), out, version); + partitionSerializer.serialize(entry.getValue(), out, version); + } + } + + @Override + public TxnData deserialize(DataInputPlus in, int version) throws IOException + { + Map data = new HashMap<>(); + long size = in.readUnsignedVInt(); + for (int i=0; i entry : data.data.entrySet()) + { + size += TxnDataName.serializer.serializedSize(entry.getKey(), version); + size += partitionSerializer.serializedSize(entry.getValue(), version); + } + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java b/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java new file mode 100644 index 000000000000..92a9be1ab3b0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; + +import static com.google.common.base.Preconditions.checkNotNull; + +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; +import static org.apache.cassandra.service.accord.AccordSerializers.clusteringSerializer; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class TxnDataName implements Comparable +{ + private static final TxnDataName RETURNING = new TxnDataName(Kind.RETURNING); + private static final long EMPTY_SIZE = ObjectSizes.measure(RETURNING); + + public enum Kind + { + USER((byte) 1), + RETURNING((byte) 2), + AUTO_READ((byte) 3); + + private final byte value; + + Kind(byte value) + { + this.value = value; + } + + public static Kind from(byte b) + { + switch (b) + { + case 1: + return USER; + case 2: + return RETURNING; + case 3: + return AUTO_READ; + default: + throw new IllegalArgumentException("Unknown kind: " + b); + } + } + } + + @Nonnull + private final Kind kind; + + @Nonnull + private final String[] parts; + + @Nullable + private final Clustering clustering; + + public TxnDataName(@Nonnull Kind kind, @Nonnull String... parts) + { + this(kind, null, parts); + } + + public TxnDataName(@Nonnull Kind kind, @Nullable Clustering clustering, @Nonnull String... parts) + { + checkNotNull(kind); + checkNotNull(parts); + this.kind = kind; + this.parts = parts; + this.clustering = clustering; + } + + public static TxnDataName user(String name) + { + return new TxnDataName(Kind.USER, name); + } + + public static TxnDataName returning() + { + return RETURNING; + } + + public static TxnDataName returning(int index) + { + return new TxnDataName(Kind.RETURNING, Integer.toString(index)); + } + + public static TxnDataName partitionRead(TableMetadata metadata, DecoratedKey key, int index) + { + return new TxnDataName(Kind.AUTO_READ, metadata.keyspace, metadata.name, bytesToString(key.getKey()), String.valueOf(index)); + } + + private static String bytesToString(ByteBuffer bytes) + { + return ByteBufferUtil.bytesToHex(bytes); + } + + private static ByteBuffer stringToBytes(String string) + { + return ByteBufferUtil.hexToBytes(string); + } + + public Kind getKind() + { + return kind; + } + + public List getParts() + { + return Collections.unmodifiableList(Arrays.asList(parts)); + } + + public boolean isAutoRead() + { + return kind == Kind.AUTO_READ; + } + + public DecoratedKey getDecoratedKey(TableMetadata metadata) + { + checkKind(Kind.AUTO_READ); + ByteBuffer data = stringToBytes(parts[2]); + return metadata.partitioner.decorateKey(data); + } + + public boolean atIndex(int index) + { + checkKind(Kind.AUTO_READ); + return Integer.parseInt(parts[3]) == index; + } + + private void checkKind(Kind expected) + { + if (kind != expected) + throw new IllegalStateException("Expected kind " + expected + " but is " + kind); + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + for (String part : parts) + size += ObjectSizes.sizeOf(part); + return size; + } + + @Override + public int compareTo(TxnDataName o) + { + int rc = kind.compareTo(o.kind); + if (rc != 0) + return rc; + // same kind has same length + int size = parts.length; + assert o.parts.length == size : String.format("Expected other.parts.length == %d but was %d", size, o.parts.length); + for (int i = 0; i < size; i++) + { + rc = parts[i].compareTo(o.parts[i]); + if (rc != 0) + return rc; + } + return 0; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnDataName that = (TxnDataName) o; + return kind == that.kind && Arrays.equals(parts, that.parts); + } + + @Override + public int hashCode() + { + int result = Objects.hash(kind); + result = 31 * result + Arrays.hashCode(parts); + return result; + } + + public String name() + { + return String.join(":", parts); + } + + @Override + public String toString() + { + return kind.name() + ':' + name(); + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnDataName t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t.kind.value); + out.writeUnsignedVInt32(t.parts.length); + for (String part : t.parts) + out.writeUTF(part); + serializeNullable(t.clustering, out, version, clusteringSerializer); + } + + @Override + public TxnDataName deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.from(in.readByte()); + int length = in.readUnsignedVInt32(); + String[] parts = new String[length]; + for (int i = 0; i < length; i++) + parts[i] = in.readUTF(); + Clustering clustering = deserializeNullable(in, version, clusteringSerializer); + return new TxnDataName(kind, clustering, parts); + } + + @Override + public long serializedSize(TxnDataName t, int version) + { + int size = Byte.BYTES + sizeofUnsignedVInt(t.parts.length); + for (String part : t.parts) + size += TypeSizes.sizeof(part); + size += serializedNullableSize(t.clustering, version, clusteringSerializer); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java new file mode 100644 index 000000000000..534f4aa26295 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +import accord.api.Data; +import accord.local.SafeCommandStore; +import accord.primitives.Timestamp; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.partitions.PartitionIterator; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.concurrent.Future; + +import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; +import static org.apache.cassandra.utils.ByteBufferUtil.serializedSizeWithVIntLength; +import static org.apache.cassandra.utils.ByteBufferUtil.writeWithVIntLength; + +public class TxnNamedRead extends AbstractSerialized +{ + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnNamedRead(null, null, null)); + + private final TxnDataName name; + private final PartitionKey key; + + public TxnNamedRead(TxnDataName name, SinglePartitionReadCommand value) + { + super(value); + this.name = name; + this.key = new PartitionKey(value.metadata().keyspace, value.metadata().id, value.partitionKey()); + } + + private TxnNamedRead(TxnDataName name, PartitionKey key, ByteBuffer bytes) + { + super(bytes); + this.name = name; + this.key = key; + } + + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE + name.estimatedSizeOnHeap() + key.estimatedSizeOnHeap() + ByteBufferUtil.estimatedSizeOnHeap(bytes()); + } + + @Override + protected IVersionedSerializer serializer() + { + return SinglePartitionReadCommand.serializer; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + TxnNamedRead namedRead = (TxnNamedRead) o; + return name.equals(namedRead.name) && key.equals(namedRead.key); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), name, key); + } + + @Override + public String toString() + { + return "TxnNamedRead{name='" + name + '\'' + ", key=" + key + ", update=" + get() + '}'; + } + + public TxnDataName txnDataName() + { + return name; + } + + public PartitionKey key() + { + return key; + } + + public Future read(boolean isForWriteTxn, SafeCommandStore safeStore, Timestamp executeAt) + { + SinglePartitionReadCommand command = (SinglePartitionReadCommand) get(); + // TODO (required, safety): before release, double check reasoning that this is safe +// AccordCommandsForKey cfk = ((SafeAccordCommandStore)safeStore).commandsForKey(key); +// int nowInSeconds = cfk.nowInSecondsFor(executeAt, isForWriteTxn); + // It's fine for our nowInSeconds to lag slightly our insertion timestamp, as to the user + // this simply looks like the transaction witnessed TTL'd data and the data then expired + // immediately after the transaction executed, and this simplifies things a great deal + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); + + return Stage.READ.submit(() -> + { + SinglePartitionReadCommand read = command.withNowInSec(nowInSeconds); + + try (ReadExecutionController controller = read.executionController(); + UnfilteredPartitionIterator partition = read.executeLocally(controller); + PartitionIterator iterator = UnfilteredPartitionIterators.filter(partition, read.nowInSec())) + { + TxnData result = new TxnData(); + if (iterator.hasNext()) + { + FilteredPartition filtered = FilteredPartition.create(iterator.next()); + if (filtered.hasRows() || read.selectsFullPartition()) + result.put(name, filtered); + } + return result; + } + }); + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnNamedRead read, DataOutputPlus out, int version) throws IOException + { + TxnDataName.serializer.serialize(read.name, out, version); + PartitionKey.serializer.serialize(read.key, out, version); + writeWithVIntLength(read.bytes(), out); + } + + @Override + public TxnNamedRead deserialize(DataInputPlus in, int version) throws IOException + { + TxnDataName name = TxnDataName.serializer.deserialize(in, version); + PartitionKey key = PartitionKey.serializer.deserialize(in, version); + ByteBuffer bytes = readWithVIntLength(in); + return new TxnNamedRead(name, key, bytes); + } + + @Override + public long serializedSize(TxnNamedRead read, int version) + { + long size = 0; + size += TxnDataName.serializer.serializedSize(read.name, version); + size += PartitionKey.serializer.serializedSize(read.key, version); + size += serializedSizeWithVIntLength(read.bytes()); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java new file mode 100644 index 000000000000..29b28606ba78 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import javax.annotation.Nullable; + +import com.google.common.base.Preconditions; + +import accord.api.Data; +import accord.api.Query; +import accord.api.Read; +import accord.api.Result; +import accord.api.Update; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ObjectSizes; + +import static com.google.common.base.Preconditions.checkNotNull; + +public abstract class TxnQuery implements Query +{ + public static final TxnQuery ALL = new TxnQuery() + { + @Override + protected byte type() + { + return 1; + } + + @Override + public Result compute(TxnId txnId, Data data, @Nullable Read read, @Nullable Update update) + { + return data != null ? (TxnData) data : new TxnData(); + } + }; + + public static final TxnQuery NONE = new TxnQuery() + { + @Override + protected byte type() + { + return 2; + } + + @Override + public Result compute(TxnId txnId, Data data, @Nullable Read read, @Nullable Update update) + { + return new TxnData(); + } + }; + + public static final TxnQuery CONDITION = new TxnQuery() + { + @Override + protected byte type() + { + return 3; + } + + @Override + public Result compute(TxnId txnId, Data data, @Nullable Read read, Update update) + { + checkNotNull(txnId, "txnId should not be null"); + checkNotNull(data, "data should not be null"); + checkNotNull(update, "update should not be null"); + TxnUpdate txnUpdate = (TxnUpdate)update; + boolean conditionCheck = txnUpdate.checkCondition(data); + // If the condition applied an empty result indicates success + if (conditionCheck) + return new TxnData(); + else + // If it failed to apply the partition contents (if present) are returned and it indicates failure + return (TxnData)data; + } + }; + + private static final long SIZE = ObjectSizes.measure(ALL); + + private TxnQuery() {} + + abstract protected byte type(); + + public long estimatedSizeOnHeap() + { + return SIZE; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnQuery query, DataOutputPlus out, int version) throws IOException + { + Preconditions.checkArgument(query == null || query == ALL || query == NONE || query == CONDITION); + out.writeByte(query == null ? 0 : query.type()); + } + + @Override + public TxnQuery deserialize(DataInputPlus in, int version) throws IOException + { + switch (in.readByte()) + { + default: throw new AssertionError(); + case 0: return null; + case 1: return ALL; + case 2: return NONE; + case 3: return CONDITION; + } + } + + @Override + public long serializedSize(TxnQuery query, int version) + { + Preconditions.checkArgument(query == null || query == ALL || query == NONE || query == CONDITION); + return TypeSizes.sizeof((byte)2); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java new file mode 100644 index 000000000000..eeb57f13f5ae --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.function.BiConsumer; + +import com.google.common.collect.ImmutableList; + +import accord.api.Data; +import accord.api.DataStore; +import accord.api.Read; +import accord.local.SafeCommandStore; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Seekable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.Simulate; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.ImmediateFuture; + +import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; +import static org.apache.cassandra.utils.ArraySerializers.serializeArray; +import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; + +public class TxnRead extends AbstractKeySorted implements Read +{ + // There is only potentially one partition in a CAS and SERIAL/LOCAL_SERIAL read + public static final String SERIAL_READ_NAME = "SERIAL_READ"; + public static final TxnDataName SERIAL_READ = TxnDataName.user(SERIAL_READ_NAME); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnRead(new TxnNamedRead[0], null)); + + private final Keys txnKeys; + + public TxnRead(TxnNamedRead[] items, Keys txnKeys) + { + super(items); + this.txnKeys = txnKeys; + } + + public TxnRead(List items, Keys txnKeys) + { + super(items); + this.txnKeys = txnKeys; + } + + public static TxnRead createSerialRead(SinglePartitionReadCommand readCommand) + { + TxnNamedRead read = new TxnNamedRead(SERIAL_READ, readCommand); + return new TxnRead(ImmutableList.of(read), Keys.of(read.key())); + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + for (TxnNamedRead read : items) + size += read.estimatedSizeOnHeap(); + return size; + } + + @Override + int compareNonKeyFields(TxnNamedRead left, TxnNamedRead right) + { + return left.txnDataName().compareTo(right.txnDataName()); + } + + @Override + PartitionKey getKey(TxnNamedRead read) + { + return read.key(); + } + + @Override + TxnNamedRead[] newArray(int size) + { + return new TxnNamedRead[size]; + } + + @Override + public Keys keys() + { + return txnKeys; + } + + public Keys readKeys() + { + return itemKeys; + } + + @Override + public Read slice(Ranges ranges) + { + Keys keys = itemKeys.slice(ranges); + List reads = new ArrayList<>(keys.size()); + + for (TxnNamedRead read : items) + if (keys.contains(read.key())) + reads.add(read); + + return new TxnRead(reads, txnKeys.slice(ranges)); + } + + @Override + public Read merge(Read read) + { + List reads = new ArrayList<>(items.length); + Collections.addAll(reads, items); + + for (TxnNamedRead namedRead : (TxnRead) read) + if (!reads.contains(namedRead)) + reads.add(namedRead); + + return new TxnRead(reads, txnKeys.with((Keys)read.keys())); + } + + @Override + public Future read(Seekable key, Txn.Kind kind, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + { + List> futures = new ArrayList<>(); + forEachWithKey((PartitionKey) key, read -> futures.add(read.read(kind.isWrite(), safeStore, executeAt))); + + if (futures.isEmpty()) + return ImmediateFuture.success(new TxnData()); + + if (futures.size() == 1) + return futures.get(0); + + return new MultiReadFuture(futures); + } + + @Simulate(with = MONITORS) + private static class MultiReadFuture extends AsyncPromise implements BiConsumer + { + private Data result = null; + private int pending; + + public MultiReadFuture(List> futures) + { + pending = futures.size(); + listen(futures); + } + + private synchronized void listen(List> futures) + { + for (int i=0, mi=futures.size(); i serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnRead read, DataOutputPlus out, int version) throws IOException + { + KeySerializers.keys.serialize(read.txnKeys, out, version); + serializeArray(read.items, out, version, TxnNamedRead.serializer); + } + + @Override + public TxnRead deserialize(DataInputPlus in, int version) throws IOException + { + Keys keys = KeySerializers.keys.deserialize(in, version); + return new TxnRead(deserializeArray(in, version, TxnNamedRead.serializer, TxnNamedRead[]::new), keys); + } + + @Override + public long serializedSize(TxnRead read, int version) + { + long size = KeySerializers.keys.serializedSize(read.txnKeys, version); + size += serializedArraySize(read.items, version, TxnNamedRead.serializer); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java new file mode 100644 index 000000000000..0da007930339 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.CompositeType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.db.marshal.CollectionType.Kind.SET; +import static org.apache.cassandra.service.accord.AccordSerializers.columnMetadataSerializer; + +public class TxnReference +{ + private final TxnDataName tuple; + private final ColumnMetadata column; + private final CellPath path; + + public TxnReference(TxnDataName tuple, ColumnMetadata column, CellPath path) + { + this.tuple = tuple; + this.column = column; + this.path = path; + } + + public TxnReference(TxnDataName tuple, ColumnMetadata column) + { + this(tuple, column, null); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnReference reference = (TxnReference) o; + return tuple.equals(reference.tuple) && Objects.equals(column, reference.column) && Objects.equals(path, reference.path); + } + + @Override + public int hashCode() + { + return Objects.hash(tuple, column, path); + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder().append(tuple); + if (column != null) + sb.append(':').append(column.ksName).append('.').append(column.cfName).append('.').append(column.name.toString()); + if (path != null) + sb.append(path); + return sb.toString(); + } + + public ColumnMetadata column() + { + return column; + } + + public CellPath path() + { + return path; + } + + public boolean selectsColumn() + { + return column != null; + } + + public boolean selectsPath() + { + return selectsColumn() && path != null; + } + + public boolean isElementSelection() + { + return selectsPath() && column.type.isCollection(); + } + + public boolean isFieldSelection() + { + return selectsPath() && column.type.isUDT(); + } + + public ByteBuffer getPartitionKey(TxnData data) + { + FilteredPartition partition = getPartition(data); + if (partition == null) return null; + return partition.metadata().partitionKeyColumns().size() == 1 + ? partition.partitionKey().getKey() + : ((CompositeType) partition.metadata().partitionKeyType).split(partition.partitionKey().getKey())[column.position()]; + } + + public ByteBuffer getClusteringKey(TxnData data) + { + Row row = getRow(data); + if (row == null) + return null; + return row.clustering().bufferAt(column.position()); + } + + public FilteredPartition getPartition(TxnData data) + { + return data.get(tuple); + } + + public Row getRow(TxnData data) + { + FilteredPartition partition = getPartition(data); + return partition != null ? getRow(partition) : null; + } + + public Row getRow(FilteredPartition partition) + { + if (column != null && column.isStatic()) + return partition.staticRow(); + assert partition.rowCount() <= 1 : "Multi-row references are not allowed"; + if (partition.rowCount() == 0) + return null; + return partition.getAtIdx(0); + } + + public ColumnData getColumnData(Row row) + { + if (column.isComplex() && path == null) + return row.getComplexColumnData(column); + + if (path != null && column.type.isMultiCell()) + { + if (column.type.isCollection()) + { + CollectionType collectionType = (CollectionType) column.type; + + if (collectionType.kind == CollectionType.Kind.LIST) + return row.getComplexColumnData(column).getCellByIndex(ByteBufferUtil.toInt(path.get(0))); + } + + return row.getCell(column, path); + } + + return row.getCell(column); + } + + public ColumnData getColumnData(TxnData data) + { + Row row = getRow(data); + return row != null ? getColumnData(row) : null; + } + + public ByteBuffer getFrozenCollectionElement(Cell collection) + { + CollectionType collectionType = (CollectionType) column.type; + return collectionType.getSerializer().getSerializedValue(collection.buffer(), path.get(0), collectionType.nameComparator()); + } + + public ByteBuffer getFrozenFieldValue(Cell udt) + { + UserType userType = (UserType) column.type; + int field = ByteBufferUtil.getUnsignedShort(path.get(0), 0); + return userType.unpack(udt.buffer(), ByteBufferAccessor.instance).get(field); + } + + public AbstractType getFieldSelectionType() + { + assert isFieldSelection() : "No field selection type exists"; + UserType userType = (UserType) column.type; + int field = ByteBufferUtil.getUnsignedShort(path.get(0), 0); + return userType.fieldType(field); + } + + public ByteBuffer toByteBuffer(TxnData data, AbstractType receiver) + { + // TODO: confirm all references can be satisfied as part of the txn condition + AbstractType type = column().type; + + // Modify the type we'll check if the reference is to a collection element. + if (selectsPath()) + { + if (type.isCollection()) + { + CollectionType collectionType = (CollectionType) type; + type = collectionType.kind == SET ? collectionType.nameComparator() : collectionType.valueComparator(); + } + else if (type.isUDT()) + type = getFieldSelectionType(); + } + + // Account for frozen collection and reversed clustering key references: + AbstractType receiveType = type.isFrozenCollection() ? receiver.freeze().unwrap() : receiver.unwrap(); + if (!(receiveType == type.unwrap())) + throw new IllegalArgumentException("Receiving type " + receiveType + " does not match " + type.unwrap()); + + if (column().isPartitionKey()) + return getPartitionKey(data); + else if (column().isClusteringColumn()) + return getClusteringKey(data); + + ColumnData columnData = getColumnData(data); + + if (columnData == null) + return null; + + if (selectsComplex()) + { + ComplexColumnData complex = (ComplexColumnData) columnData; + + if (type instanceof CollectionType) + { + CollectionType col = (CollectionType) type; + return col.serializeForNativeProtocol(complex.iterator()); + } + else if (type instanceof UserType) + { + UserType udt = (UserType) type; + return udt.serializeForNativeProtocol(complex.iterator()); + } + + throw new UnsupportedOperationException("Unsupported complex type: " + type); + } + else if (selectsFrozenCollectionElement()) + { + // If a path is selected for a non-frozen collection, the element will already be materialized. + return getFrozenCollectionElement((Cell) columnData); + } + else if (selectsFrozenUDTField()) + { + return getFrozenFieldValue((Cell) columnData); + } + + Cell cell = (Cell) columnData; + return selectsSetElement() ? cell.path().get(0) : cell.buffer(); + } + + private boolean selectsComplex() + { + return column.isComplex() && path == null; + } + + private boolean selectsSetElement() + { + return selectsPath() && column.type instanceof SetType; + } + + private boolean selectsFrozenCollectionElement() + { + return selectsPath() && column.type.isFrozenCollection(); + } + + private boolean selectsFrozenUDTField() + { + return selectsPath() && column.type.isUDT() && !column.type.isMultiCell(); + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnReference reference, DataOutputPlus out, int version) throws IOException + { + TxnDataName.serializer.serialize(reference.tuple, out, version); + out.writeBoolean(reference.column != null); + if (reference.column != null) + columnMetadataSerializer.serialize(reference.column, out, version); + out.writeBoolean(reference.path != null); + if (reference.path != null) + CollectionType.cellPathSerializer.serialize(reference.path, out); + } + + @Override + public TxnReference deserialize(DataInputPlus in, int version) throws IOException + { + TxnDataName name = TxnDataName.serializer.deserialize(in, version); + ColumnMetadata column = in.readBoolean() ? columnMetadataSerializer.deserialize(in, version) : null; + CellPath path = in.readBoolean() ? CollectionType.cellPathSerializer.deserialize(in) : null; + return new TxnReference(name, column, path); + } + + @Override + public long serializedSize(TxnReference reference, int version) + { + long size = 0; + size += TxnDataName.serializer.serializedSize(reference.tuple, version); + size += TypeSizes.BOOL_SIZE; + if (reference.column != null) + size += columnMetadataSerializer.serializedSize(reference.column, version); + if (reference.path != null) + size += CollectionType.cellPathSerializer.serializedSize(reference.path); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java new file mode 100644 index 000000000000..e49c1803dc98 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import org.apache.cassandra.cql3.FieldIdentifier; +import org.apache.cassandra.cql3.Operation; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.cql3.terms.Constants; +import org.apache.cassandra.cql3.terms.Lists; +import org.apache.cassandra.cql3.terms.Maps; +import org.apache.cassandra.cql3.terms.MultiElements; +import org.apache.cassandra.cql3.terms.Sets; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.terms.UserTypes; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.TupleType; +import org.apache.cassandra.db.marshal.UserType; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.accord.AccordSerializers; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.db.marshal.CollectionType.Kind.MAP; +import static org.apache.cassandra.service.accord.AccordSerializers.columnMetadataSerializer; + +public class TxnReferenceOperation +{ + private static final Map, Kind> operationKindMap = initOperationKindMap(); + + private static Map, Kind> initOperationKindMap() + { + Map, Kind> temp = new HashMap<>(); + temp.put(Sets.Adder.class, Kind.SetAdder); + temp.put(Constants.Adder.class, Kind.ConstantAdder); + temp.put(Lists.Appender.class, Kind.ListAppender); + temp.put(Sets.Discarder.class, Kind.SetDiscarder); + temp.put(Lists.Discarder.class, Kind.ListDiscarder); + temp.put(Lists.Prepender.class, Kind.ListPrepender); + temp.put(Maps.Putter.class, Kind.MapPutter); + temp.put(Lists.Setter.class, Kind.ListSetter); + temp.put(Sets.Setter.class, Kind.SetSetter); + temp.put(Maps.Setter.class, Kind.MapSetter); + temp.put(UserTypes.Setter.class, Kind.UserTypeSetter); + temp.put(Constants.Setter.class, Kind.ConstantSetter); + temp.put(Constants.Substracter.class, Kind.ConstantSubtracter); + temp.put(Maps.SetterByKey.class, Kind.MapSetterByKey); + temp.put(Lists.SetterByIndex.class, Kind.ListSetterByIndex); + temp.put(UserTypes.SetterByField.class, Kind.UserTypeSetterByField); + return temp; + } + + private interface ToOperation + { + Operation apply(ColumnMetadata column, Term keyOrIndex, FieldIdentifier field, Term value); + } + + public enum Kind + { + SetAdder((byte) 1, (column, keyOrIndex, field, value) -> new Sets.Adder(column, value)), + ConstantAdder((byte) 2, (column, keyOrIndex, field, value) -> new Constants.Adder(column, value)), + ListAppender((byte) 3, (column, keyOrIndex, field, value) -> new Lists.Appender(column, value)), + SetDiscarder((byte) 4, (column, keyOrIndex, field, value) -> new Sets.Discarder(column, value)), + ListDiscarder((byte) 5, (column, keyOrIndex, field, value) -> new Lists.Discarder(column, value)), + ListPrepender((byte) 6, (column, keyOrIndex, field, value) -> new Lists.Prepender(column, value)), + MapPutter((byte) 7, (column, keyOrIndex, field, value) -> new Maps.Putter(column, value)), + ListSetter((byte) 8, (column, keyOrIndex, field, value) -> new Lists.Setter(column, value)), + SetSetter((byte) 9, (column, keyOrIndex, field, value) -> new Sets.Setter(column, value)), + MapSetter((byte) 10, (column, keyOrIndex, field, value) -> new Maps.Setter(column, value)), + UserTypeSetter((byte) 11, (column, keyOrIndex, field, value) -> new UserTypes.Setter(column, value)), + ConstantSetter((byte) 12, (column, keyOrIndex, field, value) -> new Constants.Setter(column, value)), + ConstantSubtracter((byte) 13, (column, keyOrIndex, field, value) -> new Constants.Substracter(column, value)), + MapSetterByKey((byte) 14, (column, keyOrIndex, field, value) -> new Maps.SetterByKey(column, keyOrIndex, value)), + ListSetterByIndex((byte) 15, (column, keyOrIndex, field, value) -> new Lists.SetterByIndex(column, keyOrIndex, value)), + UserTypeSetterByField((byte) 16, (column, keyOrIndex, field, value) -> new UserTypes.SetterByField(column, field, value)); + + private final byte id; + private final ToOperation toOperation; + + Kind(byte id, ToOperation toOperation) + { + this.id = id; + this.toOperation = toOperation; + } + + public static Kind from(byte b) + { + for (Kind k : values()) + if (k.id == b) + return k; + + throw new IllegalArgumentException("There is no kind with id: " + b); + } + + public static Kind from(Operation operation) + { + Class clazz = operation.getClass(); + Kind kind = operationKindMap.get(clazz); + if (kind == null) + throw new IllegalArgumentException("There is no Kind associated with operation: " + clazz); + return kind; + } + + public static Kind setterFor(ColumnMetadata column) + { + if (column.type instanceof ListType) + return ListSetter; + else if (column.type instanceof SetType) + return SetSetter; + else if (column.type instanceof MapType) + return MapSetter; + else if (column.type instanceof UserType) + return UserTypeSetter; + + return ConstantSetter; + } + + public Operation toOperation(ColumnMetadata column, Term keyOrIndex, FieldIdentifier field, Term value) + { + return toOperation.apply(column, keyOrIndex, field, value); + } + } + + private final Kind kind; + private final ColumnMetadata receiver; + private final ByteBuffer key; + private final ByteBuffer field; + private final TxnReferenceValue value; + private final AbstractType valueType; + + public TxnReferenceOperation(Kind kind, ColumnMetadata receiver, ByteBuffer key, ByteBuffer field, TxnReferenceValue value) + { + this.kind = kind; + this.receiver = receiver; + this.key = key; + this.field = field; + + // We don't expect operators on clustering keys, but unwrap just in case. + AbstractType receiverType = receiver.type.unwrap(); + + if (kind == TxnReferenceOperation.Kind.SetDiscarder && receiverType.isCollection() && (((CollectionType) receiverType).kind == MAP)) + { + // The value for a map subtraction is actually a set (see Operation.Substraction) + this.valueType = SetType.getInstance(((MapType) receiverType).getKeysType(), true); + } + else if (kind == Kind.MapSetterByKey || kind == Kind.ListSetterByIndex) + { + this.valueType = ((CollectionType) receiverType).valueComparator(); + } + else if (kind == Kind.UserTypeSetterByField) + { + UserType userType = (UserType) receiverType; + CellPath fieldPath = userType.cellPathForField(new FieldIdentifier(field)); + this.valueType = userType.fieldType(fieldPath); + } + else + { + this.valueType = receiverType; + } + + this.value = value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnReferenceOperation that = (TxnReferenceOperation) o; + return Objects.equals(receiver, that.receiver) + && kind == that.kind + && Objects.equals(key, that.key) + && Objects.equals(field, that.field) + && Objects.equals(value, that.value); + } + + @Override + public int hashCode() + { + return Objects.hash(receiver, kind, key, field, value); + } + + @Override + public String toString() + { + return receiver + " = " + value; + } + + public ColumnMetadata receiver() + { + return receiver; + } + + public void apply(TxnData data, DecoratedKey key, UpdateParameters up) + { + Operation operation = toOperation(data); + operation.execute(key, up); + } + + private Operation toOperation(TxnData data) + { + FieldIdentifier fieldIdentifier = field == null ? null : new FieldIdentifier(field); + Term valueTerm = toTerm(data, valueType); + Term keyorIndexTerm = key == null ? null : toTerm(key, valueType); + return kind.toOperation(receiver, keyorIndexTerm, fieldIdentifier, valueTerm); + } + + private Term toTerm(TxnData data, AbstractType receivingType) + { + ByteBuffer bytes = value.compute(data, receivingType); + if (bytes == null) + return Constants.NULL_VALUE; + return toTerm(bytes, receivingType); + } + + private Term toTerm(ByteBuffer bytes, AbstractType receivingType) + { + if (receivingType.isCollection()) + return AccordSerializers.deserializeCqlCollectionAsTerm(bytes, receivingType); + else if (receivingType.isUDT()) + return MultiElements.Value.fromSerialized(bytes, (UserType) receivingType); + else if (receivingType.isTuple()) + return MultiElements.Value.fromSerialized(bytes, (TupleType) receivingType); + + return new Constants.Value(bytes); + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnReferenceOperation operation, DataOutputPlus out, int version) throws IOException + { + out.writeByte(operation.kind.id); + columnMetadataSerializer.serialize(operation.receiver, out, version); + TxnReferenceValue.serializer.serialize(operation.value, out, version); + + out.writeBoolean(operation.key != null); + if (operation.key != null) + ByteBufferUtil.writeWithVIntLength(operation.key, out); + + out.writeBoolean(operation.field != null); + if (operation.field != null) + ByteBufferUtil.writeWithVIntLength(operation.field, out); + } + + @Override + public TxnReferenceOperation deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.from(in.readByte()); + ColumnMetadata receiver = columnMetadataSerializer.deserialize(in, version); + TxnReferenceValue value = TxnReferenceValue.serializer.deserialize(in, version); + ByteBuffer key = in.readBoolean() ? ByteBufferUtil.readWithVIntLength(in) : null; + ByteBuffer field = in.readBoolean() ? ByteBufferUtil.readWithVIntLength(in) : null; + return new TxnReferenceOperation(kind, receiver, key, field, value); + } + + @Override + public long serializedSize(TxnReferenceOperation operation, int version) + { + long size = Byte.BYTES; + size += columnMetadataSerializer.serializedSize(operation.receiver, version); + size += TxnReferenceValue.serializer.serializedSize(operation.value, version); + + if (operation.key != null) + size += ByteBufferUtil.serializedSizeWithVIntLength(operation.key); + + if (operation.field != null) + size += ByteBufferUtil.serializedSizeWithVIntLength(operation.field); + + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java new file mode 100644 index 000000000000..f926ad3df181 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import com.google.common.base.Preconditions; + +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableMetadata; + +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializedListSize; +import static org.apache.cassandra.service.accord.AccordSerializers.tableMetadataSerializer; + +public class TxnReferenceOperations +{ + private static final TxnReferenceOperations EMPTY = new TxnReferenceOperations(null, null, Collections.emptyList(), Collections.emptyList()); + + private final TableMetadata metadata; + final Clustering clustering; + final List regulars; + final List statics; + + public TxnReferenceOperations(TableMetadata metadata, Clustering clustering, List regulars, List statics) + { + this.metadata = metadata; + Preconditions.checkArgument(clustering != null || regulars.isEmpty()); + this.clustering = clustering; + this.regulars = regulars; + this.statics = statics; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnReferenceOperations that = (TxnReferenceOperations) o; + return metadata.equals(that.metadata) && Objects.equals(clustering, that.clustering) && regulars.equals(that.regulars) && statics.equals(that.statics); + } + + @Override + public int hashCode() + { + return Objects.hash(metadata, clustering, regulars, statics); + } + + @Override + public String toString() + { + return "TxnReferenceOperations{metadata=" + metadata + ", clustering=" + clustering + ", regulars=" + regulars + ", statics=" + statics + '}'; + } + + public static TxnReferenceOperations empty() + { + return EMPTY; + } + + public boolean isEmpty() + { + return regulars.isEmpty() && statics.isEmpty(); + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnReferenceOperations operations, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(!operations.isEmpty()); + if (operations.isEmpty()) + return; + tableMetadataSerializer.serialize(operations.metadata, out, version); + out.writeBoolean(operations.clustering != null); + if (operations.clustering != null) + Clustering.serializer.serialize(operations.clustering, out, version, operations.metadata.comparator.subtypes()); + serializeList(operations.regulars, out, version, TxnReferenceOperation.serializer); + serializeList(operations.statics, out, version, TxnReferenceOperation.serializer); + + } + + @Override + public TxnReferenceOperations deserialize(DataInputPlus in, int version) throws IOException + { + if (!in.readBoolean()) + return TxnReferenceOperations.empty(); + TableMetadata metadata = tableMetadataSerializer.deserialize(in, version); + Clustering clustering = in.readBoolean() ? Clustering.serializer.deserialize(in, version, metadata.comparator.subtypes()) : null; + return new TxnReferenceOperations(metadata, clustering, deserializeList(in, version, TxnReferenceOperation.serializer), + deserializeList(in, version, TxnReferenceOperation.serializer)); + } + + @Override + public long serializedSize(TxnReferenceOperations operations, int version) + { + long size = TypeSizes.BOOL_SIZE; + if (operations.isEmpty()) + return size; + size += tableMetadataSerializer.serializedSize(operations.metadata, version); + size += TypeSizes.BOOL_SIZE; + if (operations.clustering != null) + size += Clustering.serializer.serializedSize(operations.clustering, version, operations.metadata.comparator.subtypes()); + size += serializedListSize(operations.regulars, version, TxnReferenceOperation.serializer); + size += serializedListSize(operations.statics, version, TxnReferenceOperation.serializer); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java new file mode 100644 index 000000000000..73033fe61c91 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ByteBufferUtil; + +public abstract class TxnReferenceValue +{ + private interface Serializer + { + void serialize(T t, DataOutputPlus out, int version) throws IOException; + T deserialize(DataInputPlus in, int version, Kind kind) throws IOException; + long serializedSize(T t, int version); + } + + enum Kind + { + CONSTANT(Constant.serializer), + SUBSTITUTION(Substitution.serializer); + + @SuppressWarnings("rawtypes") + final Serializer serializer; + + Kind(Serializer serializer) + { + this.serializer = serializer; + } + } + + protected abstract Kind kind(); + abstract ByteBuffer compute(TxnData data, AbstractType receiver); + + public static class Constant extends TxnReferenceValue + { + private final ByteBuffer value; + + public Constant(ByteBuffer value) + { + this.value = value; + } + + public ByteBuffer getValue() + { + return value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Constant constant = (Constant) o; + return value.equals(constant.value); + } + + @Override + public int hashCode() + { + return Objects.hash(value); + } + + @Override + public String toString() + { + return "Constant=" + ByteBufferUtil.bytesToHex(value); + } + + @Override + public Kind kind() + { + return Kind.CONSTANT; + } + + @Override + public ByteBuffer compute(TxnData data, AbstractType receiver) + { + return value; + } + + private static final Serializer serializer = new Serializer() + { + @Override + public void serialize(Constant constant, DataOutputPlus out, int version) throws IOException + { + ByteBufferUtil.writeWithVIntLength(constant.value, out); + } + + @Override + public Constant deserialize(DataInputPlus in, int version, Kind kind) throws IOException + { + return new Constant(ByteBufferUtil.readWithVIntLength(in)); + } + + @Override + public long serializedSize(Constant constant, int version) + { + return ByteBufferUtil.serializedSizeWithVIntLength(constant.value); + } + }; + } + + public static class Substitution extends TxnReferenceValue + { + private final TxnReference reference; + + public Substitution(TxnReference reference) + { + this.reference = reference; + } + + @Override + public String toString() + { + return reference.toString(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Substitution that = (Substitution) o; + return reference.equals(that.reference); + } + + @Override + public int hashCode() + { + return Objects.hash(reference); + } + + @Override + public Kind kind() + { + return Kind.SUBSTITUTION; + } + + @Override + public ByteBuffer compute(TxnData data, AbstractType receiver) + { + return reference.toByteBuffer(data, receiver); + } + + private static final Serializer serializer = new Serializer() + { + @Override + public void serialize(Substitution substitution, DataOutputPlus out, int version) throws IOException + { + TxnReference.serializer.serialize(substitution.reference, out, version); + } + + @Override + public Substitution deserialize(DataInputPlus in, int version, Kind kind) throws IOException + { + return new Substitution(TxnReference.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(Substitution substitution, int version) + { + return TxnReference.serializer.serializedSize(substitution.reference, version); + } + }; + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @SuppressWarnings("unchecked") + @Override + public void serialize(TxnReferenceValue value, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(value.kind().ordinal()); + value.kind().serializer.serialize(value, out, version); + } + + @Override + public TxnReferenceValue deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.values()[in.readUnsignedVInt32()]; + return kind.serializer.deserialize(in, version, kind); + } + + @SuppressWarnings("unchecked") + @Override + public long serializedSize(TxnReferenceValue value, int version) + { + return TypeSizes.sizeofUnsignedVInt(value.kind().ordinal()) + value.kind().serializer.serializedSize(value, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java new file mode 100644 index 000000000000..f7b6565dbe89 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +import accord.api.Data; +import accord.api.Key; +import accord.api.Update; +import accord.api.Write; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.AccordSerializers; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.transport.ProtocolVersion; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; + +import static accord.utils.SortedArrays.Search.CEIL; +import static org.apache.cassandra.service.accord.AccordSerializers.serialize; +import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; +import static org.apache.cassandra.utils.ArraySerializers.serializeArray; +import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; +import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; +import static org.apache.cassandra.utils.ByteBufferUtil.serializedSizeWithVIntLength; +import static org.apache.cassandra.utils.ByteBufferUtil.writeWithVIntLength; + +public class TxnUpdate implements Update +{ + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null)); + + private final Keys keys; + private final ByteBuffer[] fragments; + private final ByteBuffer condition; + + // Memoize computation of condition + private Boolean conditionResult; + + public TxnUpdate(List fragments, TxnCondition condition) + { + // TODO: Figure out a way to shove keys into TxnCondition, and have it implement slice/merge. + this.keys = Keys.of(fragments, fragment -> fragment.key); + fragments.sort(TxnWrite.Fragment::compareKeys); + this.fragments = toSerializedValuesArray(keys, fragments, fragment -> fragment.key, TxnWrite.Fragment.serializer); + this.condition = serialize(condition, TxnCondition.serializer); + } + + private TxnUpdate(Keys keys, ByteBuffer[] fragments, ByteBuffer condition) + { + this.keys = keys; + this.fragments = fragments; + this.condition = condition; + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE + ByteBufferUtil.estimatedSizeOnHeap(condition); + for (ByteBuffer update : fragments) + size += ByteBufferUtil.estimatedSizeOnHeap(update); + return size; + } + + @Override + public String toString() + { + return "TxnUpdate{updates=" + deserialize(fragments, TxnWrite.Fragment.serializer) + + ", condition=" + AccordSerializers.deserialize(condition, TxnCondition.serializer) + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TxnUpdate txnUpdate = (TxnUpdate) o; + return Arrays.equals(fragments, txnUpdate.fragments) && Objects.equals(condition, txnUpdate.condition); + } + + @Override + public int hashCode() + { + int result = Objects.hash(condition); + result = 31 * result + Arrays.hashCode(fragments); + return result; + } + + @Override + public Keys keys() + { + // TODO: It doesn't seem to affect correctness, but should we return the union of the fragment + condition keys? + return keys; + } + + @Override + public Update slice(Ranges ranges) + { + Keys keys = this.keys.slice(ranges); + // TODO: Slice the condition. + return new TxnUpdate(keys, select(this.keys, keys, fragments), condition); + } + + private static ByteBuffer[] select(Keys in, Keys out, ByteBuffer[] from) + { + ByteBuffer[] result = new ByteBuffer[out.size()]; + int j = 0; + for (int i = 0 ; i < out.size() ; ++i) + { + j = in.findNext(j, out.get(i), CEIL); + result[i] = from[j]; + } + return result; + } + + @Override + public Update merge(Update update) + { + // TODO: special method for linear merging keyed and non-keyed lists simultaneously + TxnUpdate that = (TxnUpdate) update; + Keys mergedKeys = this.keys.with(that.keys); + ByteBuffer[] mergedFragments = merge(this.keys, that.keys, this.fragments, that.fragments, mergedKeys.size()); + return new TxnUpdate(mergedKeys, mergedFragments, condition); + } + + private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] left, ByteBuffer[] right, int outputSize) + { + ByteBuffer[] out = new ByteBuffer[outputSize]; + int l = 0, r = 0, o = 0; + while (l < leftKeys.size() && r < rightKeys.size()) + { + int c = leftKeys.get(l).compareTo(rightKeys.get(r)); + if (c < 0) { out[o++] = left[l++]; } + else if (c > 0) { out[o++] = right[r++]; } + else if (ByteBufferUtil.compareUnsigned(left[l], right[r]) != 0) { throw new IllegalStateException("The same keys have different values in each input"); } + else { out[o++] = left[l++]; r++; } + } + while (l < leftKeys.size()) { out[o++] = left[l++]; } + while (r < rightKeys.size()) { out[o++] = right[r++]; } + return out; + } + + @Override + public Write apply(Data data) + { + if (!checkCondition(data)) + return TxnWrite.EMPTY; + + List fragments = deserialize(this.fragments, TxnWrite.Fragment.serializer); + List updates = new ArrayList<>(fragments.size()); + QueryOptions options = QueryOptions.forProtocolVersion(ProtocolVersion.CURRENT); + AccordUpdateParameters parameters = new AccordUpdateParameters((TxnData) data, options); + + for (TxnWrite.Fragment fragment : fragments) + updates.add(fragment.complete(parameters)); + + return new TxnWrite(updates); + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnUpdate update, DataOutputPlus out, int version) throws IOException + { + KeySerializers.keys.serialize(update.keys, out, version); + writeWithVIntLength(update.condition, out); + serializeArray(update.fragments, out, version, ByteBufferUtil.byteBufferSerializer); + } + + @Override + public TxnUpdate deserialize(DataInputPlus in, int version) throws IOException + { + Keys keys = KeySerializers.keys.deserialize(in, version); + ByteBuffer condition = readWithVIntLength(in); + ByteBuffer[] fragments = deserializeArray(in, version, ByteBufferUtil.byteBufferSerializer, ByteBuffer[]::new); + return new TxnUpdate(keys, fragments, condition); + } + + @Override + public long serializedSize(TxnUpdate update, int version) + { + long size = KeySerializers.keys.serializedSize(update.keys, version); + size += serializedSizeWithVIntLength(update.condition); + size += serializedArraySize(update.fragments, version, ByteBufferUtil.byteBufferSerializer); + assert(ByteBufferUtil.serialized(this, update, version).remaining() == size); + return size; + } + }; + + private static ByteBuffer[] toSerializedValuesArray(Keys keys, List items, Function toKey, IVersionedSerializer serializer) + { + ByteBuffer[] result = new ByteBuffer[keys.size()]; + int i = 0, mi = items.size(), ki = 0; + while (i < mi) + { + Key key = toKey.apply(items.get(i)); + int j = i + 1; + while (j < mi && toKey.apply(items.get(j)).equals(key)) + ++j; + + int nextki = keys.findNext(ki, key, CEIL); + Arrays.fill(result, ki, nextki, ByteBufferUtil.EMPTY_BYTE_BUFFER); + ki = nextki; + result[ki++] = toSerializedValues(items, i, j, serializer, MessagingService.current_version); + i = j; + } + Arrays.fill(result, ki, result.length, ByteBufferUtil.EMPTY_BYTE_BUFFER); + return result; + } + + private static ByteBuffer toSerializedValues(List items, int start, int end, IVersionedSerializer serializer, int version) + { + long size = TypeSizes.sizeofUnsignedVInt(version) + TypeSizes.sizeofUnsignedVInt(end - start); + for (int i = start ; i < end ; ++i) + size += serializer.serializedSize(items.get(i), version); + + try (DataOutputBuffer out = new DataOutputBuffer((int) size)) + { + out.writeUnsignedVInt32(version); + out.writeUnsignedVInt32(end - start); + for (int i = start ; i < end ; ++i) + serializer.serialize(items.get(i), out, version); + return out.buffer(false); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + private static List deserialize(ByteBuffer bytes, IVersionedSerializer serializer) + { + if (!bytes.hasRemaining()) + return Collections.emptyList(); + + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + int version = in.readUnsignedVInt32(); + int count = in.readUnsignedVInt32(); + switch (count) + { + case 0: throw new IllegalStateException(); + case 1: return Collections.singletonList(serializer.deserialize(in, version)); + default: + List result = new ArrayList<>(); + for (int i = 0 ; i < count ; ++i) + result.add(serializer.deserialize(in, version)); + return result; + } + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + private static List deserialize(ByteBuffer[] buffers, IVersionedSerializer serializer) + { + List result = new ArrayList<>(buffers.length); + for (ByteBuffer bytes : buffers) + result.addAll(deserialize(bytes, serializer)); + return result; + } + + // maybeCheckCondition? checkConditionMemoized? + public boolean checkCondition(Data data) + { + // Assert data that was memoized is same as data that is provided? + if (conditionResult != null) + return conditionResult; + TxnCondition condition = AccordSerializers.deserialize(this.condition, TxnCondition.serializer); + conditionResult = condition.applies((TxnData) data); + return conditionResult; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java new file mode 100644 index 000000000000..94593ffc520a --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; + +import accord.api.DataStore; +import accord.api.Key; +import accord.api.Write; +import accord.local.SafeCommandStore; +import accord.primitives.Seekable; +import accord.primitives.Timestamp; +import accord.primitives.Writes; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.Columns; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.concurrent.*; + +import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; +import static org.apache.cassandra.service.accord.AccordSerializers.partitionUpdateSerializer; +import static org.apache.cassandra.utils.ArraySerializers.serializeArray; +import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; + +public class TxnWrite extends AbstractKeySorted implements Write +{ + public static final TxnWrite EMPTY = new TxnWrite(Collections.emptyList()); + + private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY); + + public static class Update extends AbstractSerialized + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new Update(null, 0, (ByteBuffer) null)); + public final PartitionKey key; + public final int index; + + public Update(PartitionKey key, int index, PartitionUpdate update) + { + super(update); + this.key = key; + this.index = index; + } + + private Update(PartitionKey key, int index, ByteBuffer bytes) + { + super(bytes); + this.key = key; + this.index = index; + } + + long estimatedSizeOnHeap() + { + return EMPTY_SIZE + + key.estimatedSizeOnHeap() + + ByteBufferUtil.estimatedSizeOnHeap(bytes()); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + Update update = (Update) o; + return index == update.index && key.equals(update.key); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), key, index); + } + + @Override + public String toString() + { + return "Complete{" + + "key=" + key + + ", index=" + index + + ", update=" + get() + + '}'; + } + + public Future write(long timestamp, int nowInSeconds) + { + PartitionUpdate update = new PartitionUpdate.Builder(get(), 0).updateAllTimestampAndLocalDeletionTime(timestamp, nowInSeconds).build(); + Mutation mutation = new Mutation(update); + return Stage.MUTATION.submit((Runnable) mutation::apply); + } + + @Override + protected IVersionedSerializer serializer() + { + return partitionUpdateSerializer; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Update write, DataOutputPlus out, int version) throws IOException + { + PartitionKey.serializer.serialize(write.key, out, version); + out.writeInt(write.index); + ByteBufferUtil.writeWithVIntLength(write.bytes(), out); + + } + + @Override + public Update deserialize(DataInputPlus in, int version) throws IOException + { + PartitionKey key = PartitionKey.serializer.deserialize(in, version); + int index = in.readInt(); + ByteBuffer bytes = ByteBufferUtil.readWithVIntLength(in); + return new Update(key, index, bytes); + } + + @Override + public long serializedSize(Update write, int version) + { + long size = 0; + size += PartitionKey.serializer.serializedSize(write.key, version); + size += TypeSizes.INT_SIZE; + size += ByteBufferUtil.serializedSizeWithVIntLength(write.bytes()); + return size; + } + }; + } + + + /** + * Partition update that can later be supplemented with data from the read phase + */ + public static class Fragment + { + public final PartitionKey key; + public final int index; + public final PartitionUpdate baseUpdate; + public final TxnReferenceOperations referenceOps; + + public Fragment(PartitionKey key, int index, PartitionUpdate baseUpdate, TxnReferenceOperations referenceOps) + { + this.key = key; + this.index = index; + this.baseUpdate = baseUpdate; + this.referenceOps = referenceOps; + } + + public Fragment(int index, PartitionUpdate baseUpdate, TxnReferenceOperations referenceOps) + { + this(PartitionKey.of(baseUpdate), index, baseUpdate, referenceOps); + } + + public static int compareKeys(Fragment left, Fragment right) + { + return left.key.compareTo(right.key); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Fragment fragment = (Fragment) o; + return index == fragment.index && key.equals(fragment.key) && baseUpdate.equals(fragment.baseUpdate) && referenceOps.equals(fragment.referenceOps); + } + + @Override + public int hashCode() + { + return Objects.hash(key, index, baseUpdate, referenceOps); + } + + @Override + public String toString() + { + return "Fragment{key=" + key + ", index=" + index + ", baseUpdate=" + baseUpdate + ", referenceOps=" + referenceOps + '}'; + } + + public Update complete(AccordUpdateParameters parameters) + { + if (referenceOps.isEmpty()) + return new Update(key, index, baseUpdate); + + DecoratedKey key = baseUpdate.partitionKey(); + PartitionUpdate.Builder updateBuilder = new PartitionUpdate.Builder(baseUpdate.metadata(), + key, + columns(baseUpdate, referenceOps), + baseUpdate.rowCount(), + baseUpdate.canHaveShadowedData()); + + UpdateParameters up = parameters.updateParameters(baseUpdate.metadata(), index); + TxnData data = parameters.getData(); + Row staticRow = applyUpdates(baseUpdate.staticRow(), referenceOps.statics, key, Clustering.STATIC_CLUSTERING, up, data); + + if (!staticRow.isEmpty()) + updateBuilder.add(staticRow); + + Row existing = !baseUpdate.isEmpty() ? Iterables.getOnlyElement(baseUpdate) : null; + Row row = applyUpdates(existing, referenceOps.regulars, key, referenceOps.clustering, up, data); + if (row != null) + updateBuilder.add(row); + + return new Update(this.key, index, updateBuilder.build()); + } + + private static Columns columns(Columns current, List referenceOps) + { + if (referenceOps.isEmpty()) + return current; + + Set combined = new HashSet<>(current); + referenceOps.forEach(op -> combined.add(op.receiver())); + return Columns.from(combined); + } + + private static RegularAndStaticColumns columns(PartitionUpdate update, TxnReferenceOperations referenceOps) + { + Preconditions.checkState(!referenceOps.isEmpty()); + RegularAndStaticColumns current = update.columns(); + return new RegularAndStaticColumns(columns(current.statics, referenceOps.statics), + columns(current.regulars, referenceOps.regulars)); + } + + private static Row applyUpdates(Row existing, List operations, DecoratedKey key, Clustering clustering, UpdateParameters up, TxnData data) + { + if (operations.isEmpty()) + return existing; + + if (existing != null && !existing.isEmpty()) + { + Preconditions.checkState(existing.clustering().equals(clustering)); + up.addRow(existing); + } + else + up.newRow(clustering); + + operations.forEach(op -> op.apply(data, key, up)); + return up.buildRow(); + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Fragment fragment, DataOutputPlus out, int version) throws IOException + { + PartitionKey.serializer.serialize(fragment.key, out, version); + out.writeInt(fragment.index); + partitionUpdateSerializer.serialize(fragment.baseUpdate, out, version); + TxnReferenceOperations.serializer.serialize(fragment.referenceOps, out, version); + } + + @Override + public Fragment deserialize(DataInputPlus in, int version) throws IOException + { + PartitionKey key = PartitionKey.serializer.deserialize(in, version); + int idx = in.readInt(); + PartitionUpdate baseUpdate = partitionUpdateSerializer.deserialize(in, version); + TxnReferenceOperations referenceOps = TxnReferenceOperations.serializer.deserialize(in, version); + return new Fragment(key, idx, baseUpdate, referenceOps); + } + + @Override + public long serializedSize(Fragment fragment, int version) + { + long size = 0; + size += PartitionKey.serializer.serializedSize(fragment.key, version); + size += TypeSizes.INT_SIZE; + size += partitionUpdateSerializer.serializedSize(fragment.baseUpdate, version); + size += TxnReferenceOperations.serializer.serializedSize(fragment.referenceOps, version); + return size; + } + }; + } + + private TxnWrite(Update[] items) + { + super(items); + } + + public TxnWrite(List items) + { + super(items); + } + + @Override + int compareNonKeyFields(Update left, Update right) + { + return Integer.compare(left.index, right.index); + } + + @Override + PartitionKey getKey(Update item) + { + return item.key; + } + + @Override + Update[] newArray(int size) + { + return new Update[size]; + } + + @Override + public Future apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + { + AccordCommandsForKey cfk = ((SafeAccordCommandStore) safeStore).commandsForKey((Key)key); + // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing + // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; + // any that aren't can just use executeAt.hlc + long timestamp = cfk.timestampMicrosFor(executeAt, true); + // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) + int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); + + List> futures = new ArrayList<>(); + forEachWithKey((PartitionKey) key, write -> futures.add(write.write(timestamp, nowInSeconds))); + + if (futures.isEmpty()) + return Writes.SUCCESS; + + if (futures.size() == 1) + return futures.get(0).flatMap(o -> Writes.SUCCESS); + + return FutureCombiner.allOf(futures).flatMap(objects -> Writes.SUCCESS); + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + for (Update update : this) + size += update.estimatedSizeOnHeap(); + return size; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(TxnWrite write, DataOutputPlus out, int version) throws IOException + { + serializeArray(write.items, out, version, Update.serializer); + } + + @Override + public TxnWrite deserialize(DataInputPlus in, int version) throws IOException + { + return new TxnWrite(deserializeArray(in, version, Update.serializer, Update[]::new)); + } + + @Override + public long serializedSize(TxnWrite write, int version) + { + return serializedArraySize(write.items, version, Update.serializer); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/paxos/Paxos.java b/src/java/org/apache/cassandra/service/paxos/Paxos.java index 15d2b320fb75..a82396cd2449 100644 --- a/src/java/org/apache/cassandra/service/paxos/Paxos.java +++ b/src/java/org/apache/cassandra/service/paxos/Paxos.java @@ -100,7 +100,6 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.triggers.TriggerExecutor; import org.apache.cassandra.utils.CassandraVersion; -import org.apache.cassandra.utils.CollectionSerializer; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteAccepted; import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteCommitted; @@ -135,7 +134,9 @@ import static org.apache.cassandra.service.paxos.PaxosPrepare.prepare; import static org.apache.cassandra.service.paxos.PaxosPropose.propose; import static org.apache.cassandra.utils.Clock.Global.nanoTime; -import static org.apache.cassandra.utils.CollectionSerializer.newHashSet; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; import static org.apache.cassandra.utils.FBUtilities.getBroadcastAddressAndPort; import static org.apache.cassandra.utils.NoSpamLogger.Level.WARN; @@ -307,21 +308,21 @@ static class Serializer implements IVersionedSerializer { public void serialize(Electorate electorate, DataOutputPlus out, int version) throws IOException { - CollectionSerializer.serializeCollection(inetAddressAndPortSerializer, electorate.natural, out, version); - CollectionSerializer.serializeCollection(inetAddressAndPortSerializer, electorate.pending, out, version); + serializeCollection(electorate.natural, out, version, inetAddressAndPortSerializer); + serializeCollection(electorate.pending, out, version, inetAddressAndPortSerializer); } public Electorate deserialize(DataInputPlus in, int version) throws IOException { - Set endpoints = CollectionSerializer.deserializeCollection(inetAddressAndPortSerializer, newHashSet(), in, version); - Set pending = CollectionSerializer.deserializeCollection(inetAddressAndPortSerializer, newHashSet(), in, version); + Set endpoints = deserializeSet(in, version, inetAddressAndPortSerializer); + Set pending = deserializeSet(in, version, inetAddressAndPortSerializer); return new Electorate(endpoints, pending); } public long serializedSize(Electorate electorate, int version) { - return CollectionSerializer.serializedSizeCollection(inetAddressAndPortSerializer, electorate.natural, version) + - CollectionSerializer.serializedSizeCollection(inetAddressAndPortSerializer, electorate.pending, version); + return serializedCollectionSize(electorate.natural, version, inetAddressAndPortSerializer) + + serializedCollectionSize(electorate.pending, version, inetAddressAndPortSerializer); } } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java index 3b04acd26ba6..f293f1dba9e1 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java @@ -71,10 +71,9 @@ import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.service.paxos.PaxosState.*; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.*; -import static org.apache.cassandra.utils.CollectionSerializer.deserializeMap; -import static org.apache.cassandra.utils.CollectionSerializer.newHashMap; -import static org.apache.cassandra.utils.CollectionSerializer.serializeMap; -import static org.apache.cassandra.utils.CollectionSerializer.serializedSizeMap; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; import static org.apache.cassandra.utils.concurrent.Awaitable.SyncAwaitable.waitUntil; /** @@ -1232,7 +1231,7 @@ public void serialize(Response response, DataOutputPlus out, int version) throws Committed.serializer.serialize(promised.latestCommitted, out, version); if (promised.readResponse != null) ReadResponse.serializer.serialize(promised.readResponse, out, version); - serializeMap(inetAddressAndPortSerializer, EndpointState.nullableSerializer, promised.gossipInfo, out, version); + serializeMap(promised.gossipInfo, out, version, inetAddressAndPortSerializer, EndpointState.nullableSerializer); if (version >= MessagingService.VERSION_51) Epoch.messageSerializer.serialize(promised.electorateEpoch, out, version); if (promised.outcome == PERMIT_READ) @@ -1254,7 +1253,7 @@ public Response deserialize(DataInputPlus in, int version) throws IOException Accepted acceptedNotCommitted = (flags & 2) != 0 ? Accepted.serializer.deserialize(in, version) : null; Committed committed = Committed.serializer.deserialize(in, version); ReadResponse readResponse = (flags & 4) != 0 ? ReadResponse.serializer.deserialize(in, version) : null; - Map gossipInfo = deserializeMap(inetAddressAndPortSerializer, EndpointState.nullableSerializer, newHashMap(), in, version); + Map gossipInfo = deserializeMap(in, version, inetAddressAndPortSerializer, EndpointState.nullableSerializer); Epoch electorateEpoch = version >= MessagingService.VERSION_51 ? Epoch.messageSerializer.deserialize(in, version) : Epoch.EMPTY; MaybePromise.Outcome outcome = (flags & 16) != 0 ? PERMIT_READ : PROMISE; boolean hasProposalStability = (flags & 8) != 0; @@ -1279,7 +1278,7 @@ public long serializedSize(Response response, int version) + (permitted.latestAcceptedButNotCommitted == null ? 0 : Accepted.serializer.serializedSize(permitted.latestAcceptedButNotCommitted, version)) + Committed.serializer.serializedSize(permitted.latestCommitted, version) + (permitted.readResponse == null ? 0 : ReadResponse.serializer.serializedSize(permitted.readResponse, version)) - + serializedSizeMap(inetAddressAndPortSerializer, EndpointState.nullableSerializer, permitted.gossipInfo, version) + + serializedMapSize(permitted.gossipInfo, version, inetAddressAndPortSerializer, EndpointState.nullableSerializer) + (version >= MessagingService.VERSION_51 ? Epoch.messageSerializer.serializedSize(permitted.electorateEpoch, version) : 0) + (permitted.outcome == PERMIT_READ ? Ballot.sizeInBytes() : 0); } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java index 925daaf9dd86..fbdab4c6fdfe 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java @@ -46,7 +46,7 @@ import static org.apache.cassandra.utils.FBUtilities.getBroadcastAddressAndPort; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializedSizeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; /** * Nodes that have promised in response to our prepare, may be missing the latestCommit, meaning we cannot be sure the @@ -226,18 +226,18 @@ public static class ResponseSerializer implements IVersionedSerializer { public void serialize(Response response, DataOutputPlus out, int version) throws IOException { - serializeNullable(Ballot.Serializer.instance, response.isSupersededBy, out, version); + serializeNullable(response.isSupersededBy, out, version, Ballot.Serializer.instance); } public Response deserialize(DataInputPlus in, int version) throws IOException { - Ballot isSupersededBy = deserializeNullable(Ballot.Serializer.instance, in, version); + Ballot isSupersededBy = deserializeNullable(in, version, Ballot.Serializer.instance); return new Response(isSupersededBy); } public long serializedSize(Response response, int version) { - return serializedSizeNullable(Ballot.Serializer.instance, response.isSupersededBy, version); + return serializedNullableSize(response.isSupersededBy, version, Ballot.Serializer.instance); } } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java index e36e1d352250..0242a5110e66 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java @@ -20,7 +20,12 @@ import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Function; @@ -63,21 +68,31 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MonotonicClock; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_REPAIR_RETRY_TIMEOUT_IN_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_PAXOS_REPAIR_VERSION_VALIDATION; import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; import static org.apache.cassandra.net.Verb.PAXOS2_REPAIR_REQ; -import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static org.apache.cassandra.service.paxos.Commit.*; +import static org.apache.cassandra.service.paxos.Commit.Accepted; +import static org.apache.cassandra.service.paxos.Commit.Committed; +import static org.apache.cassandra.service.paxos.Commit.Proposal; +import static org.apache.cassandra.service.paxos.Commit.isAfter; +import static org.apache.cassandra.service.paxos.Commit.latest; +import static org.apache.cassandra.service.paxos.Commit.timestampsClash; import static org.apache.cassandra.service.paxos.ContentionStrategy.Type.REPAIR; import static org.apache.cassandra.service.paxos.ContentionStrategy.waitUntilForContention; -import static org.apache.cassandra.service.paxos.Paxos.*; -import static org.apache.cassandra.service.paxos.PaxosPrepare.*; +import static org.apache.cassandra.service.paxos.Paxos.Participants; +import static org.apache.cassandra.service.paxos.Paxos.isInRangeAndShouldProcess; +import static org.apache.cassandra.service.paxos.Paxos.staleBallotNewerThan; +import static org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteAccepted; +import static org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteCommitted; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status; +import static org.apache.cassandra.service.paxos.PaxosPrepare.prepareWithBallot; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializedSizeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; /** * Facility to finish any in-progress paxos transaction, and ensure that a quorum of nodes agree on the most recent operation. @@ -623,14 +638,14 @@ public static class ResponseSerializer implements IVersionedSerializer public void serialize(Response response, DataOutputPlus out, int version) throws IOException { response.latestWitnessedOrLowBound.serialize(out); - serializeNullable(Accepted.serializer, response.acceptedButNotCommitted, out, version); + serializeNullable(response.acceptedButNotCommitted, out, version, Accepted.serializer); Committed.serializer.serialize(response.committed, out, version); } public Response deserialize(DataInputPlus in, int version) throws IOException { Ballot latestWitnessed = Ballot.deserialize(in); - Accepted acceptedButNotCommitted = deserializeNullable(Accepted.serializer, in, version); + Accepted acceptedButNotCommitted = deserializeNullable(in, version, Accepted.serializer); Committed committed = Committed.serializer.deserialize(in, version); return new Response(latestWitnessed, acceptedButNotCommitted, committed); } @@ -638,7 +653,7 @@ public Response deserialize(DataInputPlus in, int version) throws IOException public long serializedSize(Response response, int version) { return Ballot.sizeInBytes() - + serializedSizeNullable(Accepted.serializer, response.acceptedButNotCommitted, version) + + serializedNullableSize(response.acceptedButNotCommitted, version, Accepted.serializer) + Committed.serializer.serializedSize(response.committed, version); } } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosState.java b/src/java/org/apache/cassandra/service/paxos/PaxosState.java index a3f019e4bf25..4fe03d6ff511 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosState.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosState.java @@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; import java.util.function.BiConsumer; import java.util.function.Function; - import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -37,12 +36,18 @@ import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.*; -import org.apache.cassandra.metrics.PaxosMetrics; -import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Directories; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.SystemKeyspace; +import org.apache.cassandra.db.WriteType; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.metrics.PaxosMetrics; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.paxos.uncommitted.PaxosBallotTracker; import org.apache.cassandra.service.paxos.uncommitted.PaxosStateTracker; import org.apache.cassandra.service.paxos.uncommitted.PaxosUncommittedTracker; @@ -51,15 +56,23 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_DISABLE_COORDINATOR_LOCKING; -import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.config.Config.PaxosStatePurging.gc_grace; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; -import static org.apache.cassandra.service.paxos.Commit.*; -import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.*; +import static org.apache.cassandra.service.paxos.Commit.Accepted; import static org.apache.cassandra.service.paxos.Commit.Accepted.latestAccepted; +import static org.apache.cassandra.service.paxos.Commit.AcceptedWithTTL; +import static org.apache.cassandra.service.paxos.Commit.Agreed; +import static org.apache.cassandra.service.paxos.Commit.Committed; import static org.apache.cassandra.service.paxos.Commit.Committed.latestCommitted; +import static org.apache.cassandra.service.paxos.Commit.CommittedWithTTL; +import static org.apache.cassandra.service.paxos.Commit.Proposal; import static org.apache.cassandra.service.paxos.Commit.isAfter; +import static org.apache.cassandra.service.paxos.Commit.latest; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PERMIT_READ; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.REJECT; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; /** * We save to memory the result of each operation before persisting to disk, however each operation that performs diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 5b149acd9a0b..f1535adcdc9a 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -105,6 +105,7 @@ public int execute(String... args) Compact.class, CompactionHistory.class, CompactionStats.class, + CreateEpochUnsafe.class, DataPaths.class, Decommission.class, Decommission.Abort.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java b/src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java new file mode 100644 index 000000000000..c5ea1246d61e --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import io.airlift.airline.Command; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; + +@Command(name="createepochunsafe", description = "manually create an Accord epoch from current topology") +public class CreateEpochUnsafe extends NodeTool.NodeToolCmd +{ + @Override + protected void execute(NodeProbe probe) + { + throw new UnsupportedOperationException("git rebase to pick up TCM removes this"); + } +} diff --git a/src/java/org/apache/cassandra/utils/ArraySerializers.java b/src/java/org/apache/cassandra/utils/ArraySerializers.java new file mode 100644 index 000000000000..63ee3029c75b --- /dev/null +++ b/src/java/org/apache/cassandra/utils/ArraySerializers.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.io.IOException; +import java.util.function.IntFunction; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; + +public class ArraySerializers +{ + public static void serializeArray(T[] items, DataOutputPlus out, int version, IVersionedSerializer serializer) throws IOException + { + out.writeUnsignedVInt32(items.length); + for (T item : items) + serializer.serialize(item, out, version); + } + + public static T[] deserializeArray(DataInputPlus in, int version, IVersionedSerializer serializer, IntFunction arrayFactory) throws IOException + { + int size = in.readUnsignedVInt32(); + T[] items = arrayFactory.apply(size); + for (int i = 0; i < size; i++) + items[i] = serializer.deserialize(in, version); + return items; + } + + public static long serializedArraySize(T[] array, int version, IVersionedSerializer serializer) + { + long size = sizeofUnsignedVInt(array.length); + for (T item : array) + size += serializer.serializedSize(item, version); + return size; + } +} diff --git a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java index 73725a6ce3df..a89a4f6381c6 100644 --- a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java +++ b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java @@ -46,6 +46,7 @@ import java.util.stream.Collectors; import net.nicoulaj.compilecommand.annotations.Inline; + import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.BooleanType; import org.apache.cassandra.db.marshal.BytesType; @@ -53,8 +54,10 @@ import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.TimestampType; +import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.compress.BufferType; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.FileUtils; @@ -102,6 +105,8 @@ public class ByteBufferUtil /** Represents an unset value in bound variables */ public static final ByteBuffer UNSET_BYTE_BUFFER = ByteBuffer.wrap(new byte[]{}); + public static final long EMPTY_SIZE_ON_HEAP = ObjectSizes.measureDeep(ByteBufferUtil.EMPTY_BYTE_BUFFER); + public static final ByteBuffer[] EMPTY_ARRAY = new ByteBuffer[0]; @Inline @@ -369,6 +374,17 @@ public static void writeWithVIntLength(ByteBuffer bytes, DataOutputPlus out) thr out.writeUnsignedVInt32(bytes.remaining()); out.write(bytes); } + public static void writeWithVIntLengthAndNull(ByteBuffer bytes, DataOutputPlus out) throws IOException + { + if (bytes == null) + { + out.writeVInt32(-1); + return; + } + + out.writeVInt32(bytes.remaining()); + out.write(bytes); + } public static void writeWithShortLength(ByteBuffer buffer, DataOutputPlus out) throws IOException { @@ -399,16 +415,15 @@ public static ByteBuffer readWithVIntLength(DataInputPlus in) throws IOException return ByteBufferUtil.read(in, length); } - public static int serializedSizeWithLength(ByteBuffer buffer) + public static int serializedSizeWithVIntLength(ByteBuffer buffer) { int size = buffer.remaining(); - return TypeSizes.sizeof(size) + size; + return TypeSizes.sizeofUnsignedVInt(size) + size; } - public static int serializedSizeWithVIntLength(ByteBuffer buffer) + public static long estimatedSizeOnHeap(ByteBuffer buffer) { - int size = buffer.remaining(); - return TypeSizes.sizeofUnsignedVInt(size) + size; + return EMPTY_SIZE_ON_HEAP + buffer.remaining(); } public static void skipWithVIntLength(DataInputPlus in) throws IOException @@ -985,4 +1000,40 @@ public static void readFully(FileChannel channel, ByteBuffer dst, long position) position += read; } } + + public static ByteBuffer serialized(IVersionedSerializer serializer, T value, int version) + { + try (DataOutputBuffer dob = new DataOutputBuffer()) + { + serializer.serialize(value, dob, version); + return dob.buffer(); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public static final IVersionedSerializer byteBufferSerializer = new IVersionedSerializer() + { + @Override + public void serialize(ByteBuffer bytes, DataOutputPlus out, int version) throws IOException + { + writeWithVIntLength(bytes, out); + } + + @Override + public ByteBuffer deserialize(DataInputPlus in, int version) throws IOException + { + return readWithVIntLength(in); + } + + @Override + public long serializedSize(ByteBuffer bytes, int version) + { + return serializedSizeWithVIntLength(bytes); + } + }; + + public static final IVersionedSerializer nullableByteBufferSerializer = NullableSerializer.wrap(byteBufferSerializer); } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializer.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java similarity index 51% rename from src/java/org/apache/cassandra/utils/CollectionSerializer.java rename to src/java/org/apache/cassandra/utils/CollectionSerializers.java index 9de64509bb26..93fa02a04dee 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializer.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -19,33 +19,35 @@ package org.apache.cassandra.utils; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.RandomAccess; import java.util.Set; import java.util.function.IntFunction; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -public class CollectionSerializer +import static com.google.common.primitives.Ints.checkedCast; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; + +public class CollectionSerializers { - public static void serializeCollection(IVersionedSerializer valueSerializer, Collection values, DataOutputPlus out, int version) throws IOException + public static void serializeCollection(Collection values, DataOutputPlus out, int version, IVersionedSerializer valueSerializer) throws IOException { out.writeUnsignedVInt32(values.size()); for (V value : values) valueSerializer.serialize(value, out, version); } - public static & RandomAccess> void serializeList(IVersionedSerializer valueSerializer, L values, DataOutputPlus out, int version) throws IOException + public static > void serializeList(L values, DataOutputPlus out, int version, IVersionedSerializer valueSerializer) throws IOException { int size = values.size(); out.writeUnsignedVInt32(size); @@ -53,7 +55,7 @@ public static & RandomAccess> void serializeList(IVersione valueSerializer.serialize(values.get(i), out, version); } - public static void serializeMap(IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, Map map, DataOutputPlus out, int version) throws IOException + public static void serializeMap(Map map, DataOutputPlus out, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer) throws IOException { out.writeUnsignedVInt32(map.size()); for (Map.Entry e : map.entrySet()) @@ -63,19 +65,20 @@ public static void serializeMap(IVersionedSerializer keySerializer, IV } } - public static > C deserializeCollection(IVersionedSerializer serializer, IntFunction factory, DataInputPlus in, int version) throws IOException + public static List deserializeList(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException { - int size = in.readUnsignedVInt32(); - C result = factory.apply(size); - while (size-- > 0) - result.add(serializer.deserialize(in, version)); - return result; + return deserializeCollection(in, version, serializer, newArrayList()); + } + + public static Set deserializeSet(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException + { + return deserializeCollection(in, version, serializer, newHashSet()); } - public static > M deserializeMap(IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, IntFunction factory, DataInputPlus in, int version) throws IOException + public static Map deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, IntFunction> factory) throws IOException { - int size = in.readUnsignedVInt32(); - M result = factory.apply(size); + int size = checkedCast(in.readUnsignedVInt32()); + Map result = factory.apply(size); while (size-- > 0) { K key = keySerializer.deserialize(in, version); @@ -85,27 +88,31 @@ public static > M deserializeMap(IVersionedSerializer< return result; } - public static long serializedSizeCollection(IVersionedSerializer valueSerializer, Collection values, int version) + public static Map deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer) throws IOException + { + return deserializeMap(in, version, keySerializer, valueSerializer, newHashMap()); + } + + public static long serializedCollectionSize(Collection values, int version, IVersionedSerializer valueSerializer) { - long size = TypeSizes.sizeofUnsignedVInt(values.size()); + long size = sizeofUnsignedVInt(values.size()); for (V value : values) size += valueSerializer.serializedSize(value, version); return size; } - public static & RandomAccess> long serializedSizeList(IVersionedSerializer valueSerializer, L values, int version) throws IOException + public static > long serializedListSize(L values, int version, IVersionedSerializer valueSerializer) { int items = values.size(); - long size = TypeSizes.sizeofUnsignedVInt(items); + long size = sizeofUnsignedVInt(items); for (int i = 0 ; i < items ; ++i) size += valueSerializer.serializedSize(values.get(i), version); return size; } - - public static long serializedSizeMap(IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, Map map, int version) + public static long serializedMapSize(Map map, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer) { - long size = TypeSizes.sizeofUnsignedVInt(map.size()); + long size = sizeofUnsignedVInt(map.size()); for (Map.Entry e : map.entrySet()) size += keySerializer.serializedSize(e.getKey(), version) + valueSerializer.serializedSize(e.getValue(), version); @@ -122,4 +129,26 @@ public static IntFunction> newHashMap() return i -> i == 0 ? Collections.emptyMap() : Maps.newHashMapWithExpectedSize(i); } + public static IntFunction> newArrayList() + { + return i -> i == 0 ? Collections.emptyList() : new ArrayList<>(i); + } + + public static int readCollectionSize(DataInputPlus in, int version) throws IOException + { + return checkedCast(in.readUnsignedVInt()); + } + + /* + * Private to push auto-complete to the convenience methods + * Feel free to make public if there is a weird collection you want to use + */ + private static > C deserializeCollection(DataInputPlus in, int version, IVersionedSerializer serializer, IntFunction factory) throws IOException + { + int size = checkedCast(in.readUnsignedVInt32()); + C result = factory.apply(size); + while (size-- > 0) + result.add(serializer.deserialize(in, version)); + return result; + } } diff --git a/src/java/org/apache/cassandra/utils/Hex.java b/src/java/org/apache/cassandra/utils/Hex.java index b8044b86e1e7..19d470363f7f 100644 --- a/src/java/org/apache/cassandra/utils/Hex.java +++ b/src/java/org/apache/cassandra/utils/Hex.java @@ -27,7 +27,16 @@ public class Hex { private static final Constructor stringConstructor = getProtectedConstructor(String.class, int.class, int.class, char[].class); private final static byte[] charToByte = new byte[256]; - private static final Logger logger = LoggerFactory.getLogger(Hex.class); + + private static class LoggerHandle + { + private static final Logger logger = LoggerFactory.getLogger(Hex.class); + } + + private static Logger logger() + { + return LoggerHandle.logger; + } // package protected for use by ByteBufferUtil. Do not modify this array !! static final char[] byteToChar = new char[16]; @@ -123,7 +132,7 @@ public static String wrapCharArray(char[] c) { // The underlying constructor failed. Unwrapping the exception. Throwable cause = ite.getCause(); - logger.error("Underlying string constructor threw an error: {}", + logger().error("Underlying string constructor threw an error: {}", cause == null ? ite.getMessage() : cause.getMessage()); } catch (Exception e) diff --git a/src/java/org/apache/cassandra/utils/NullableSerializer.java b/src/java/org/apache/cassandra/utils/NullableSerializer.java index 67e2d6a0a925..7d834be99566 100644 --- a/src/java/org/apache/cassandra/utils/NullableSerializer.java +++ b/src/java/org/apache/cassandra/utils/NullableSerializer.java @@ -27,20 +27,19 @@ public class NullableSerializer { - - public static void serializeNullable(IVersionedSerializer serializer, T value, DataOutputPlus out, int version) throws IOException + public static void serializeNullable(T value, DataOutputPlus out, int version, IVersionedSerializer serializer) throws IOException { out.writeBoolean(value != null); if (value != null) serializer.serialize(value, out, version); } - public static T deserializeNullable(IVersionedSerializer serializer, DataInputPlus in, int version) throws IOException + public static T deserializeNullable(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException { return in.readBoolean() ? serializer.deserialize(in, version) : null; } - public static long serializedSizeNullable(IVersionedSerializer serializer, T value, int version) + public static long serializedNullableSize(T value, int version, IVersionedSerializer serializer) { return value != null ? TypeSizes.sizeof(true) + serializer.serializedSize(value, version) @@ -52,19 +51,18 @@ public static IVersionedSerializer wrap(IVersionedSerializer wrap) return new IVersionedSerializer() { public void serialize(T t, DataOutputPlus out, int version) throws IOException { - serializeNullable(wrap, t, out, version); + serializeNullable(t, out, version, wrap); } public T deserialize(DataInputPlus in, int version) throws IOException { - return deserializeNullable(wrap, in, version); + return deserializeNullable(in, version, wrap); } public long serializedSize(T t, int version) { - return serializedSizeNullable(wrap, t, version); + return serializedNullableSize(t, version, wrap); } }; } - } diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java index b9d9ed71729b..b8be5126baae 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java +++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java @@ -27,11 +27,13 @@ import java.util.NavigableSet; import java.util.NoSuchElementException; import java.util.Objects; +import java.util.Set; import java.util.SortedSet; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Function; +import com.google.common.collect.Iterables; import com.google.common.collect.Ordering; import org.apache.cassandra.utils.btree.BTree.Dir; @@ -239,6 +241,15 @@ public boolean containsAll(Collection c) return false; return true; } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || !(o instanceof Set)) return false; + return Iterables.elementsEqual(this, (Set) o); + } + public int hashCode() { // we can't just delegate to Arrays.deepHashCode(), diff --git a/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java b/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java index b01ce0b59469..325b4baca1e6 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java +++ b/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java @@ -56,6 +56,7 @@ private interface ListenerFactory */ private static class Listener extends AtomicInteger implements GenericFutureListener> { + private static final long serialVersionUID = 0; // for simulator support Supplier onSuccess; // non-final so we can release resources immediately when failing fast final FutureCombiner complete; diff --git a/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java b/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java index 01c52c5d9343..c9c253f1d57a 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java +++ b/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java @@ -99,6 +99,7 @@ public static Semaphore newFairSemaphore(int permits) public static class Standard extends java.util.concurrent.Semaphore implements Semaphore { + private static final long serialVersionUID = 0; // for simulator support public Standard(int permits) { this(permits, false); diff --git a/src/java/org/apache/cassandra/utils/logging/ClassNameFilter.java b/src/java/org/apache/cassandra/utils/logging/ClassNameFilter.java new file mode 100644 index 000000000000..ef0ca2c5ca4b --- /dev/null +++ b/src/java/org/apache/cassandra/utils/logging/ClassNameFilter.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.logging; + +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.filter.AbstractMatcherFilter; +import ch.qos.logback.core.spi.FilterReply; + +public class ClassNameFilter extends AbstractMatcherFilter +{ + String loggerName; + + public void setLoggerName(String loggerName) + { + this.loggerName = loggerName; + } + + @Override + public FilterReply decide(ILoggingEvent event) + { + if (!isStarted()) return FilterReply.NEUTRAL; + if (event.getLoggerName().equals(loggerName)) return onMatch; + return onMismatch; + } + + @Override + public void start() + { + if (loggerName != null) super.start(); + } +} diff --git a/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt b/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt index 8a1d2987f9d2..6403600acc89 100644 --- a/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt +++ b/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt @@ -9,11 +9,13 @@ BATCH BEGIN BY COLUMNFAMILY +COMMIT CREATE DELETE DESC DESCRIBE DROP +END ENTRIES EXECUTE FROM @@ -27,6 +29,7 @@ INSERT INTO IS KEYSPACE +LET LIMIT MATERIALIZED MODIFY @@ -45,8 +48,10 @@ SCHEMA SELECT SET TABLE +THEN TO TOKEN +TRANSACTION TRUNCATE UNLOGGED UPDATE diff --git a/test/conf/cassandra.yaml b/test/conf/cassandra.yaml index e9ba02c4415e..ff017b3afc81 100644 --- a/test/conf/cassandra.yaml +++ b/test/conf/cassandra.yaml @@ -56,6 +56,7 @@ file_cache_enabled: true full_query_logging_options: allow_nodetool_archive_command: true auto_hints_cleanup_enabled: true +accord_transactions_enabled: true heap_dump_path: build/test dump_heap_on_uncaught_exception: false diff --git a/test/conf/logback-dtest.xml b/test/conf/logback-dtest.xml index 48d9859b67e3..d854f8c77120 100644 --- a/test/conf/logback-dtest.xml +++ b/test/conf/logback-dtest.xml @@ -53,6 +53,11 @@ + + diff --git a/test/conf/logback-simulator.xml b/test/conf/logback-simulator.xml index d0082d43fa1d..ffa1ffa088c7 100644 --- a/test/conf/logback-simulator.xml +++ b/test/conf/logback-simulator.xml @@ -17,23 +17,37 @@ --> - + + + + ./build/test/logs/simulator/${run_start}-${run_seed}/history.log + + %msg%n + + true + + org.apache.cassandra.simulator.paxos.LoggingHistoryValidator + ACCEPT + DENY + + + - ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log + ./build/test/logs/simulator/${run_start}-${run_seed}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{ISO8601} %msg%n true - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{ISO8601} %F:%L - %msg%n WARN @@ -49,10 +63,12 @@ + + diff --git a/test/distributed/org/apache/cassandra/distributed/api/Row.java b/test/distributed/org/apache/cassandra/distributed/api/Row.java index 9d08cb2859c5..711627881c60 100644 --- a/test/distributed/org/apache/cassandra/distributed/api/Row.java +++ b/test/distributed/org/apache/cassandra/distributed/api/Row.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Date; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; @@ -85,6 +86,17 @@ public T get(int index) return (T) results[index]; } + public T get(int index, T defaultValue) + { + checkAccess(); + if (index < 0 || index >= results.length) + throw new NoSuchElementException("by index: " + index); + T result = (T) results[index]; + if (result == null) + return defaultValue; + return result; + } + public T get(String name) { checkAccess(); @@ -94,66 +106,158 @@ public T get(String name) return (T) results[idx]; } + public T get(String name, T defaultValue) + { + checkAccess(); + int idx = findIndex(name); + if (idx == NOT_FOUND) + throw new NoSuchElementException("by name: " + name); + T result = (T) results[idx]; + if (result == null) + return defaultValue; + return result; + } + + public Boolean getBoolean(int index) + { + return get(index); + } + + public Boolean getBoolean(int index, Boolean defaultValue) + { + return get(index, defaultValue); + } + + public Boolean getBoolean(String name) + { + return get(name); + } + + public Boolean getBoolean(String name, Boolean defaultValue) + { + return get(name, defaultValue); + } + public Short getShort(int index) { return get(index); } + public Short getShort(int index, Short defaultValue) + { + return get(index, defaultValue); + } + public Short getShort(String name) { return get(name); } + public Short getShort(String name, Short defaultValue) + { + return get(name, defaultValue); + } + public Integer getInteger(int index) { return get(index); } + public Integer getInteger(int index, Integer defaultValue) + { + return get(index, defaultValue); + } + public Integer getInteger(String name) { return get(name); } + public Integer getInteger(String name, Integer defaultValue) + { + return get(name, defaultValue); + } + public Long getLong(int index) { return get(index); } + public Long getLong(int index, Long defaultValue) + { + return get(index, defaultValue); + } + public Long getLong(String name) { return get(name); } + public Long getLong(String name, Long defaultValue) + { + return get(name, defaultValue); + } + public Float getFloat(int index) { return get(index); } + public Float getFloat(int index, Float defaultValue) + { + return get(index, defaultValue); + } + public Float getFloat(String name) { return get(name); } + public Float getFloat(String name, Float defaultValue) + { + return get(name, defaultValue); + } + public Double getDouble(int index) { return get(index); } + public Double getDouble(int index, Double defaultValue) + { + return get(index, defaultValue); + } + public Double getDouble(String name) { return get(name); } + public Double getDouble(String name, Double defaultValue) + { + return get(name, defaultValue); + } + public String getString(int index) { return get(index); } + public String getString(int index, String defaultValue) + { + return get(index, defaultValue); + } + public String getString(String name) { return get(name); } + public String getString(String name, String defaultValue) + { + return get(name, defaultValue); + } + public UUID getUUID(int index) { Object uuid = get(index); @@ -162,6 +266,14 @@ public UUID getUUID(int index) return (UUID) uuid; } + public UUID getUUID(int index, UUID defaultValue) + { + Object uuid = get(index, defaultValue); + if (uuid instanceof TimeUUID) + return ((TimeUUID) uuid).asUUID(); + return (UUID) uuid; + } + public UUID getUUID(String name) { Object uuid = get(name); @@ -170,26 +282,74 @@ public UUID getUUID(String name) return (UUID) uuid; } + public UUID getUUID(String name, UUID defaultValue) + { + Object uuid = get(name, defaultValue); + if (uuid instanceof TimeUUID) + return ((TimeUUID) uuid).asUUID(); + return (UUID) uuid; + } + public Date getTimestamp(int index) { return get(index); } + public Date getTimestamp(int index, Date defaultValue) + { + return get(index, defaultValue); + } + public Date getTimestamp(String name) { return get(name); } + public Date getTimestamp(String name, Date defaultValue) + { + return get(name, defaultValue); + } + public Set getSet(int index) { return get(index); } + public Set getSet(int index, Set defaultValue) + { + return get(index, defaultValue); + } + public Set getSet(String name) { return get(name); } + public Set getSet(String name, Set defaultValue) + { + return get(name, defaultValue); + } + + public List getList(int index) + { + return get(index); + } + + public List getList(int index, List defaultValue) + { + return get(index, defaultValue); + } + + public List getList(String name) + { + return get(name); + } + + public List getList(String name, List defaultValue) + { + return get(name, defaultValue); + } + /** * Get the row as a array. */ diff --git a/test/distributed/org/apache/cassandra/distributed/api/SimpleQueryResult.java b/test/distributed/org/apache/cassandra/distributed/api/SimpleQueryResult.java index 2b71e8b8b17f..d49ed57b2f58 100644 --- a/test/distributed/org/apache/cassandra/distributed/api/SimpleQueryResult.java +++ b/test/distributed/org/apache/cassandra/distributed/api/SimpleQueryResult.java @@ -61,12 +61,15 @@ */ public class SimpleQueryResult implements QueryResult { + private static final int NO_MARK = -1; + private final String[] names; private final Object[][] results; private final List warnings; private final Predicate filter; private final Row row; - private int offset = -1; + private int offset = NO_MARK; + private int mark = NO_MARK; public SimpleQueryResult(String[] names, Object[][] results) { @@ -108,12 +111,18 @@ public SimpleQueryResult filter(Predicate fn) return new SimpleQueryResult(names, results, filter.and(fn), offset); } + public void mark() + { + mark = offset; + } + /** * Reset the cursor to the start of the query result; if the query result has not been iterated, this has no effect. */ public void reset() { - offset = -1; + offset = mark; + mark = NO_MARK; row.setResults(null); } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java index 20293ad11996..9d0616f90f02 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java @@ -28,7 +28,7 @@ import com.google.common.collect.Iterators; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; @@ -127,8 +127,7 @@ public QueryResult executeWithPagingWithResult(String query, ConsistencyLevel co boundBBValues.add(ByteBufferUtil.objectToBytes(boundValue)); prepared.validate(clientState); - Invariants.checkState(prepared instanceof SelectStatement, - "Only SELECT statements can be executed with paging %s", prepared); + Invariants.checkState(prepared instanceof SelectStatement, "Only SELECT statements can be executed with paging %s", prepared); Dispatcher.RequestTime requestTime = Dispatcher.RequestTime.forImmediateExecution(); SelectStatement selectStatement = (SelectStatement) prepared; diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index 06258b6145bf..a3918443e4a2 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -127,6 +127,7 @@ import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.StorageServiceMBean; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.paxos.PaxosRepair; import org.apache.cassandra.service.paxos.PaxosState; import org.apache.cassandra.service.paxos.uncommitted.UncommittedTableData; @@ -979,6 +980,8 @@ public Future shutdown(boolean runOnExitThreads, boolean shutdownMessaging () -> SharedExecutorPool.SHARED.shutdownAndWait(1L, MINUTES) ); + error = parallelRun(error, executor, () -> AccordService.instance().shutdownAndWait(1l, MINUTES)); + // CommitLog must shut down after Stage, or threads from the latter may attempt to use the former. // (ex. A Mutation stage thread may attempt to add a mutation to the CommitLog.) error = parallelRun(error, executor, CommitLog.instance::shutdownBlocking); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java index 8afd90faf78b..19194188ee25 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java @@ -28,7 +28,6 @@ import java.util.TreeMap; import java.util.UUID; import java.util.function.Function; -import java.util.stream.Collectors; import com.vdurmont.semver4j.Semver; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -83,7 +82,7 @@ private InstanceConfig(int num, this.hostId = new UUID(0x4000L, (1L << 63) | num); // deterministic hostId for simulator //TODO move away from magic strings in favor of constants this .set("num_tokens", initial_token.size()) - .set("initial_token", initial_token.stream().collect(Collectors.joining(","))) + .set("initial_token", String.join(",", initial_token)) .set("broadcast_address", broadcast_address) .set("listen_address", listen_address) .set("broadcast_rpc_address", broadcast_rpc_address) @@ -114,6 +113,7 @@ private InstanceConfig(int num, // required settings for dtest functionality .set("diagnostic_events_enabled", true) .set("auto_bootstrap", false) + .set("accord_transactions_enabled", true) // capacities that are based on `totalMemory` that should be fixed size .set("index_summary_capacity", "50MiB") .set("counter_cache_size", "50MiB") diff --git a/test/distributed/org/apache/cassandra/distributed/impl/MessageImpl.java b/test/distributed/org/apache/cassandra/distributed/impl/MessageImpl.java index 758d41358342..937670b61258 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/MessageImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/MessageImpl.java @@ -26,6 +26,7 @@ // a container for simplifying the method signature for per-instance message handling/delivery public class MessageImpl implements IMessage { + private static final long serialVersionUID = 0; // for simulator support public final int verb; public final byte[] bytes; public final long id; diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Query.java b/test/distributed/org/apache/cassandra/distributed/impl/Query.java index 82933e735a62..26428034e682 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Query.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Query.java @@ -27,6 +27,7 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.service.QueryState; @@ -36,15 +37,15 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -public class Query implements IIsolatedExecutor.SerializableCallable +public class Query implements IIsolatedExecutor.SerializableCallable { private static final long serialVersionUID = 1L; - final String query; + public final String query; final long timestamp; final org.apache.cassandra.distributed.api.ConsistencyLevel commitConsistencyOrigin; final org.apache.cassandra.distributed.api.ConsistencyLevel serialConsistencyOrigin; - final Object[] boundValues; + public final Object[] boundValues; public Query(String query, long timestamp, org.apache.cassandra.distributed.api.ConsistencyLevel commitConsistencyOrigin, org.apache.cassandra.distributed.api.ConsistencyLevel serialConsistencyOrigin, Object[] boundValues) { @@ -55,7 +56,8 @@ public Query(String query, long timestamp, org.apache.cassandra.distributed.api. this.boundValues = boundValues; } - public Object[][] call() + @Override + public SimpleQueryResult call() { ConsistencyLevel commitConsistency = toCassandraCL(commitConsistencyOrigin); ConsistencyLevel serialConsistency = serialConsistencyOrigin == null ? null : toCassandraCL(serialConsistencyOrigin); @@ -88,7 +90,7 @@ public Object[][] call() if (res != null) res.setWarnings(ClientWarn.instance.getWarnings()); - return RowUtil.toQueryResult(res).toObjectArrays(); + return RowUtil.toQueryResult(res); } public String toString() @@ -100,4 +102,4 @@ static org.apache.cassandra.db.ConsistencyLevel toCassandraCL(org.apache.cassand { return org.apache.cassandra.db.ConsistencyLevel.fromCode(cl.ordinal()); } -} \ No newline at end of file +} diff --git a/test/distributed/org/apache/cassandra/distributed/impl/UnsafeGossipHelper.java b/test/distributed/org/apache/cassandra/distributed/impl/UnsafeGossipHelper.java index 377ac198802a..ca647f5336d8 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/UnsafeGossipHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/UnsafeGossipHelper.java @@ -49,6 +49,7 @@ public class UnsafeGossipHelper { public static class HostInfo implements Serializable { + private static final long serialVersionUID = 0; // for simulator support final InetSocketAddress address; final UUID hostId; final String tokenString; diff --git a/test/distributed/org/apache/cassandra/distributed/shared/VersionedApplicationState.java b/test/distributed/org/apache/cassandra/distributed/shared/VersionedApplicationState.java index fd3e40a70d69..97d202b697f1 100644 --- a/test/distributed/org/apache/cassandra/distributed/shared/VersionedApplicationState.java +++ b/test/distributed/org/apache/cassandra/distributed/shared/VersionedApplicationState.java @@ -22,6 +22,7 @@ public class VersionedApplicationState implements Serializable { + private static final long serialVersionUID = 0; // for simulator support public final int applicationState; public final String value; public final int version; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java new file mode 100644 index 000000000000..17cfd4cada52 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -0,0 +1,2521 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.distributed.Cluster; +import org.assertj.core.api.Assertions; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.primitives.Unseekables; +import accord.topology.Topologies; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.functions.types.utils.Bytes; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.QueryResults; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.apache.cassandra.cql3.CQLTester.row; +import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; + +public class AccordCQLTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordCQLTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + AccordTestBase.setupClass(); + SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); + } + + @Test + public void testMultiPartitionReturn() throws Exception + { + test(cluster -> { + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 10; j++) + cluster.coordinator(1).execute("INSERT INTO " + currentTable + "(k, c, v) VALUES (?, ?, ?);", ConsistencyLevel.ALL, i, j, i + j); + } + // multi row + String cql = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=? AND c IN (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(cql, ConsistencyLevel.ANY, 0, 0, 1); + assertThat(result).isEqualTo(QueryResults.builder() + .columns("k", "c", "v") + .row(0, 0, 0) + .row(0, 1, 1) + .build()); + // Results should be in Partiton/Clustering order, so make sure + // multi partition + cql = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k IN (?, ?) AND c = ?;\n" + + "COMMIT TRANSACTION"; + for (boolean asc : Arrays.asList(true, false)) + { + Object[] binds = asc ? row(0, 1, 0) : row(1, 0, 0); + result = cluster.coordinator(1).executeWithResult(cql, ConsistencyLevel.ANY, binds); + assertThat(result).isEqualTo(QueryResults.builder() + .columns("k", "c", "v") + .row(0, 0, 0) + .row(1, 0, 1) + .build()); + } + + // multi-partition, multi-clustering + cql = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k IN (?, ?) AND c IN (?, ?);\n" + + "COMMIT TRANSACTION"; + for (boolean asc : Arrays.asList(true, false)) + { + Object[] binds = asc ? row(0, 1, 0, 1) : row(1, 0, 1, 0); + result = cluster.coordinator(1).executeWithResult(cql, ConsistencyLevel.ANY, binds); + assertThat(result).isEqualTo(QueryResults.builder() + .columns("k", "c", "v") + .row(0, 0, 0) + .row(0, 1, 1) + .row(1, 0, 1) + .row(1, 1, 2) + .build()); + } + }); + } + + @Test + public void testMultipleShards() throws Exception + { + String keyspace = "multipleShards"; + String currentTable = keyspace + ".tbl"; + List ddls = Arrays.asList("CREATE KEYSPACE " + keyspace + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}", + "CREATE TABLE " + currentTable + " (k blob, c int, v int, primary key (k, c))"); + List tokens = SHARED_CLUSTER.stream() + .flatMap(i -> StreamSupport.stream(Splitter.on(",").split(i.config().getString("initial_token")).spliterator(), false)) + .collect(Collectors.toList()); + + List keys = tokens.stream() + .map(t -> (Murmur3Partitioner.LongToken) Murmur3Partitioner.instance.getTokenFactory().fromString(t)) + .map(Murmur3Partitioner.LongToken::keyForToken) + .collect(Collectors.toList()); + List keyStrings = keys.stream().map(bb -> "0x" + ByteBufferUtil.bytesToHex(bb)).collect(Collectors.toList()); + StringBuilder query = new StringBuilder("BEGIN TRANSACTION\n"); + + for (int i = 0; i < keys.size(); i++) + query.append(" LET row" + i + " = (SELECT * FROM " + currentTable + " WHERE k=" + keyStrings.get(i) + " AND c=0);\n"); + + query.append(" SELECT row0.v;\n") + .append(" IF "); + + for (int i = 0; i < keys.size(); i++) + query.append((i > 0 ? " AND row" : "row") + i + " IS NULL"); + + query.append(" THEN\n"); + + for (int i = 0; i < keys.size(); i++) + query.append(" INSERT INTO " + currentTable + " (k, c, v) VALUES (" + keyStrings.get(i) + ", 0, " + i +");\n"); + + query.append(" END IF\n"); + query.append("COMMIT TRANSACTION"); + + test(ddls, cluster -> { + // row0.v shouldn't have existed when the txn's SELECT was executed + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ null }, query.toString()); + + cluster.get(1).runOnInstance(() -> { + StringBuilder sb = new StringBuilder("BEGIN TRANSACTION\n"); + for (int i = 0; i < keyStrings.size() - 1; i++) + sb.append(String.format("LET row%d = (SELECT * FROM %s WHERE k=%s AND c=0);\n", i, currentTable, keyStrings.get(i))); + sb.append(String.format("SELECT * FROM %s WHERE k=%s AND c=0;\n", currentTable, keyStrings.get(keyStrings.size() - 1))); + sb.append("COMMIT TRANSACTION"); + + Unseekables routables = AccordTestUtils.createTxn(sb.toString()).keys().toUnseekables(); + Topologies topology = AccordService.instance().topology().withUnsyncedEpochs(routables, AccordService.instance().topology().epoch()); + // we don't detect out-of-bounds read/write yet, so use this to validate we reach different shards + Assertions.assertThat(topology.totalShards()).isEqualTo(2); + }); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + + for (int i = 0; i < keys.size(); i++) + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { keys.get(i), 0, i}, check, keys.get(i), 0); + }); + } + + @Test + public void testScalarBindVariables() throws Throwable + { + test(cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " IF row1 IS NULL AND row2.v = ? THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Object[][] result = cluster.coordinator(1).execute(query, + ConsistencyLevel.ANY, + 0, 0, + 1, 0, + 1, 0, + 3, + 0, 0, 1); + assertEquals(3, result[0][0]); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); + }); + } + + @Test + public void testRegularScalarIsNull() throws Throwable + { + testScalarIsNull("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))"); + } + + @Test + public void testStaticScalarIsNull() throws Throwable + { + testScalarIsNull("CREATE TABLE " + currentTable + " (k int, c int, v int static, primary key (k, c))"); + } + + private void testScalarIsNull(String tableDDL) throws Exception { + test(tableDDL, + cluster -> + { + String insertNull = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " SELECT row0.k, row0.v;\n" + + " IF row0.v IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, null);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null, null }, insertNull, 0, 0); + + String insert = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " SELECT row0.k, row0.v;\n" + + " IF row0.v IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, null }, insert, 0, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT k, c, v FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); + }); + } + + @Test + public void testQueryStaticColumn() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, s int static, v int, primary key (k, c))", + cluster -> + { + // select partition key, clustering key and static column, restrict on partition and clustering + testQueryStaticColumn(cluster, + "LET row0 = (SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? AND c = 0);\n" + + "SELECT row0.k, row0.c, row0.s, row0.v;\n", + + "SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? AND c = 0"); + + // select partition key, clustering key and static column, restrict on partition and limit to 1 row + testQueryStaticColumn(cluster, + "LET row0 = (SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + "SELECT row0.k, row0.c, row0.s, row0.v;\n", + + "SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? LIMIT 1"); + + // select static column and regular column, restrict on partition and clustering + testQueryStaticColumn(cluster, + "LET row0 = (SELECT s, v FROM " + currentTable + " WHERE k = ? AND c = 0);\n" + + "SELECT row0.s, row0.v;\n", + + "SELECT s, v FROM " + currentTable + " WHERE k = ? AND c = 0"); + + // select just static column, restrict on partition and limit to 1 row + testQueryStaticColumn(cluster, + "LET row0 = (SELECT s FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + "SELECT row0.s;\n", + + "SELECT s FROM " + currentTable + " WHERE k = ? LIMIT 1"); + }); + } + + private void testQueryStaticColumn(Cluster cluster, String accordReadQuery, String simpleReadQuery) + { + logger().info("Empty table"); + int key = 10; + assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); + + cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, s) VALUES (?, null);", ConsistencyLevel.ALL, key); + logger().info("null -> static column"); + assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); + + cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, s) VALUES (?, 1);", ConsistencyLevel.ALL, key); + logger().info("Inserted 1 -> static column"); + assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); + + cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, c) VALUES (?, 0);", ConsistencyLevel.ALL, key); + logger().info("Inserted 0 -> clustering"); + assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key); + } + + @Test + public void testUpdateStaticColumn() throws Exception { + test("CREATE TABLE " + currentTable + " (k int, c int, s int static, v int, primary key (k, c))", + cluster -> + { + checkUpdateStatic(cluster, "SET s=1 WHERE k=?", 101, "[[101, null, 1, null]]", "[]"); + checkUpdateStatic(cluster, "SET s=1, v=11 WHERE k=? AND c=0", 101, "[[101, 0, 1, 11]]", "[[101, 0, 1, 11]]"); + + // commented out until org.apache.cassandra.cql3.statements.ModificationStatement.createSelectForTxn is fixed + // checkUpdateStatic(cluster, "SET s+=1 WHERE k=?", 101, "[]", "[]"); + + checkUpdateStatic(cluster, "SET s+=1, v+=11 WHERE k=? AND c=0", 101, "[]", "[]"); + }); + } + + private void checkUpdateStatic(Cluster cluster, String update, int key, String expPart, String expClust) + { + Object[][] r1, r2, r3, r4, r; + r = cluster.get(1).coordinator().execute("UPDATE " + currentTable + " " + update + " IF s = NULL;", ConsistencyLevel.QUORUM, key); + Assertions.assertThat(Arrays.deepToString(r)).isEqualTo("[[true]]"); + r1 = cluster.get(1).coordinator().execute("SELECT * FROM " + currentTable + " WHERE k = ? LIMIT 1;", ConsistencyLevel.SERIAL, key); + r2 = cluster.get(1).coordinator().execute("SELECT * FROM " + currentTable + " WHERE k = ? AND c = 0;", ConsistencyLevel.SERIAL, key); + cluster.get(1).coordinator().execute("TRUNCATE " + currentTable, ConsistencyLevel.ALL); + + executeAsTxn(cluster, "UPDATE " + currentTable + " " + update + ";", key); + r3 = executeAsTxn(cluster, "SELECT * FROM " + currentTable + " WHERE k = ? LIMIT 1;", key).toObjectArrays(); + r4 = executeAsTxn(cluster, "SELECT * FROM " + currentTable + " WHERE k = ? AND c = 0;", key).toObjectArrays(); + cluster.get(1).coordinator().execute("TRUNCATE " + currentTable, ConsistencyLevel.ALL); + + Assertions.assertThat(Arrays.deepToString(r1)).isEqualTo(expPart); + Assertions.assertThat(Arrays.deepToString(r2)).isEqualTo(expClust); + Assertions.assertThat(Arrays.deepToString(r3)).isEqualTo(expPart); + Assertions.assertThat(Arrays.deepToString(r4)).isEqualTo(expClust); + } + + private void assertResultsFromAccordMatches(Cluster cluster, String accordRead, String simpleRead, int key) + { + Object[][] simpleReadResult = cluster.get(1).executeInternal(simpleRead, key); + Object[][] accordReadResult = executeWithRetry(cluster, accordRead, key).toObjectArrays(); + + Assertions.assertThat(withRemovedNullOnlyRows(accordReadResult)).isEqualTo(withRemovedNullOnlyRows(simpleReadResult)); + } + + private static Object[][] withRemovedNullOnlyRows(Object[][] results) + { + return Arrays.stream(results) + .filter(row -> !Arrays.stream(row).allMatch(Objects::isNull)) + .toArray(Object[][]::new); + } + + @Test + public void testScalarEQ() throws Throwable + { + testScalarCondition(3, "=", 3, "="); + } + + @Test + public void testScalarNEQ() throws Throwable + { + testScalarCondition(3, "!=", 4, "!="); + } + + @Test + public void testScalarLt() throws Throwable + { + testScalarCondition(3, "<", 4, ">"); + } + + @Test + public void testScalarLte() throws Throwable + { + testScalarCondition(3, "<=", 3, ">="); + setup(); + testScalarCondition(3, "<=", 4, ">="); + } + + @Test + public void testScalarGt() throws Throwable + { + testScalarCondition(4, ">", 3, "<"); + } + + @Test + public void testScalarGte() throws Throwable + { + testScalarCondition(4, ">=", 3, "<="); + setup(); + testScalarCondition(4, ">=", 4, "<="); + } + + @Test + public void testStaticScalarEQ() throws Throwable + { + testScalarCondition("CREATE TABLE " + currentTable + " (k int, c int, v int static, primary key (k, c))", 3, "=", 3, "="); + } + + private void testScalarCondition(int lhs, String operator, int rhs, String reversedOperator) throws Exception + { + testScalarCondition("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", lhs, operator, rhs, reversedOperator); + } + + private void testScalarCondition(String tableDDL, int lhs, String operator, int rhs, String reversedOperator) throws Exception + { + test(tableDDL, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, " + lhs + ");", ConsistencyLevel.ALL); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + " SELECT row1.v;\n" + + " IF row1.v " + operator + " ? THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, query, 0, rhs, 1, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, 0, 1 }, check, 1, 0); + + String queryWithReversed = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + " SELECT row1.v;\n" + + " IF ? " + reversedOperator + " row1.v THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, queryWithReversed, 0, rhs, 2, 0, 1); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2, 0, 1 }, check, 2, 0); + }); + } + + @Test + public void testReadOnlyTx() throws Exception + { + test(cluster -> + { + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); + assertFalse(result.hasNext()); + }); + } + + @Test + public void testWriteOnlyTx() throws Exception + { + test(cluster -> + { + String query = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1); + assertFalse(result.hasNext()); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=? AND c=?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check, 0, 0); + }); + } + + @Test + public void testReturningLetReferences() throws Throwable + { + test(cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " SELECT row1.v, row2.k, row2.c, row2.v;\n" + + " IF row1 IS NULL AND row2.v = ? THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 3, 0, 0, 1); + assertEquals(ImmutableList.of("row1.v", "row2.k", "row2.c", "row2.v"), result.names()); + assertThat(result).hasSize(1).contains(null, 1, 0, 3); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check); + }); + } + + @Test + public void testReversedClusteringReference() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1);\n" + + " SELECT row1.k, row1.c, row1.v;\n" + + " IF row1.c = 1 THEN\n" + + " UPDATE " + currentTable + " SET v += row1.c WHERE k=1 AND c=1;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); + }); + } + + @Test + public void testScalarShorthandAddition() throws Exception + { + testScalarShorthandOperation(1, "+=", 2); + } + + @Test + public void testScalarShorthandSubtraction() throws Exception + { + testScalarShorthandOperation(3, "-=", 2); + } + + private void testScalarShorthandOperation(int startingValue, String operation, int endingvalue) throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.v;\n" + + " UPDATE " + currentTable + " SET v " + operation + " 1 WHERE k = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEquals(cluster, new Object[] { startingValue }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT v FROM " + currentTable + " WHERE k = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { endingvalue }, check); + }); + } + + @Test + public void testPartitionKeyReferenceCondition() throws Exception + { + test("CREATE TABLE " + currentTable + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1);\n" + + " SELECT row1.k, row1.c, row1.v;\n" + + " IF row1.k = 1 THEN\n" + + " UPDATE " + currentTable + " SET v += row1.k WHERE k=1 AND c=1;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); + }); + } + + @Test + public void testMultiPartitionKeyReferenceCondition() throws Exception + { + test("CREATE TABLE " + currentTable + " (pk1 INT, pk2 INT, c INT, v INT, PRIMARY KEY ((pk1, pk2), c)) WITH CLUSTERING ORDER BY (c DESC)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (pk1, pk2, c, v) VALUES (1, 1, 1, 1)", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1);\n" + + " SELECT row1.pk1, row1.pk2, row1.c, row1.v;\n" + + " IF row1.pk1 = 1 THEN\n" + + " UPDATE " + currentTable + " SET v += row1.pk2 WHERE pk1 = 1 AND pk2 = 1 AND c=1;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 1}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 2}, check); + }); + } + + @Test + public void testMultiCellListEqCondition() throws Exception + { + testListEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + } + + @Test + public void testFrozenListEqCondition() throws Exception + { + testListEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + } + + private void testListEqCondition(String ddl) throws Exception + { + test(ddl, + cluster -> + { + ListType listType = ListType.getInstance(Int32Type.instance, true); + List initialList = Arrays.asList(1, 2); + ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialListBytes); + assertFalse(result.hasNext()); + + List updatedList = Arrays.asList(1, 2, 3); + ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list = ? THEN\n" + + " UPDATE " + currentTable + " SET int_list = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialList}, update, 0, initialListBytes, updatedListBytes, 0); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedList}, check, 0); + } + ); + } + + @Test + public void testMultiCellSetEqCondition() throws Exception + { + testSetEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + } + + @Test + public void testFrozenSetEqCondition() throws Exception + { + testSetEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + } + + private void testSetEqCondition(String ddl) throws Exception + { + test(ddl, + cluster -> + { + SetType setType = SetType.getInstance(Int32Type.instance, true); + Set initialSet = ImmutableSet.of(1, 2); + ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialSetBytes); + assertFalse(result.hasNext()); + + Set updatedSet = ImmutableSet.of(1, 2, 3); + ByteBuffer updatedSetBytes = setType.getSerializer().serialize(updatedSet); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set = ? THEN\n" + + " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialSet}, update, 0, initialSetBytes, updatedSetBytes, 0); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedSet}, check, 0); + } + ); + } + + @Test + public void testMultiCellMapEqCondition() throws Exception + { + testMapEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + } + + @Test + public void testFrozenMapEqCondition() throws Exception + { + testMapEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + } + + private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); + Map initialMap = ImmutableMap.of("one", 1, "two", 2); + ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialMapBytes); + assertFalse(result.hasNext()); + + Map updatedMap = ImmutableMap.of("one", 1, "two", 2, "three", 3); + ByteBuffer updatedMapBytes = mapType.getSerializer().serialize(updatedMap); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map = ? THEN\n" + + " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, initialMapBytes, updatedMapBytes, 0); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, check, 0); + } + ); + } + + @Test + public void testMultiCellUDTEqCondition() throws Exception + { + testUDTEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testFrozenUDTEqCondition() throws Exception + { + testUDTEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testUDTEqCondition(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); + assertFalse(result.hasNext()); + + Object updatedPersonValue = CQLTester.userType("height", 73, "age", 40); + ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer = ? THEN\n" + + " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, initialPersonBuffer, updatedPersonBuffer, 0); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, check, 0); + } + ); + } + + @Test + public void testTupleEqCondition() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, pair tuple)", + cluster -> + { + Object initialTupleValue = CQLTester.tuple("age", 37); + ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, pair) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialTupleBuffer); + assertFalse(result.hasNext()); + + Object updatedTupleValue = CQLTester.userType("age", 40); + ByteBuffer updatedTupleBuffer = CQLTester.makeByteBuffer(updatedTupleValue, null); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.pair;\n" + + " IF row1.pair = ? THEN\n" + + " UPDATE " + currentTable + " SET pair = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, update, 0, initialTupleBuffer, updatedTupleBuffer, 0); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedTupleBuffer }, check, 0); + } + ); + } + + @Test + public void testIsNullWithComplexDeletion() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + cluster -> + { + ListType listType = ListType.getInstance(Int32Type.instance, true); + List initialList = Arrays.asList(1, 2); + ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); + + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 0, ?);", ConsistencyLevel.ALL, initialListBytes); + cluster.forEach(i -> i.flush(KEYSPACE)); + cluster.coordinator(1).execute("DELETE int_list FROM " + currentTable + " WHERE k = 0 AND c = 0;", ConsistencyLevel.ALL); + + List updatedList = Arrays.asList(1, 2, 3); + ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, int_list) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, 0, 0, updatedListBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, updatedList }, check, 0, 0); + } + ); + } + + @Test + public void testNullMultiCellListConditions() throws Exception + { + testNullListConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + } + + @Test + public void testNullFrozenListConditions() throws Exception + { + testNullListConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + } + + private void testNullListConditions(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, null);", ConsistencyLevel.ALL); + + ListType listType = ListType.getInstance(Int32Type.instance, true); + List initialList = Arrays.asList(1, 2); + ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialListBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialList}, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET int_list = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + List updatedList = Arrays.asList(1, 2, 3); + ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialList}, update, 0, updatedListBytes, 0); + } + ); + } + + @Test + public void testNullMultiCellSetConditions() throws Exception + { + testNullSetConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + } + + @Test + public void testNullFrozenSetConditions() throws Exception + { + testNullSetConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + } + + private void testNullSetConditions(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, null);", ConsistencyLevel.ALL); + + SetType setType = SetType.getInstance(Int32Type.instance, true); + Set initialSet = ImmutableSet.of(1, 2); + ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Set updatedSet = ImmutableSet.of(1, 2, 3); + ByteBuffer updatedSetBytes = setType.getSerializer().serialize(updatedSet); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialSet}, update, 0, updatedSetBytes, 0); + } + ); + } + + @Test + public void testNullMultiCellMapConditions() throws Exception + { + testNullMapConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + } + + @Test + public void testNullFrozenMapConditions() throws Exception + { + testNullMapConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + } + + private void testNullMapConditions(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + + MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); + Map initialMap = ImmutableMap.of("one", 1, "two", 2); + ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialMapBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Map updatedMap = ImmutableMap.of("one", 1, "two", 2, "three", 3); + ByteBuffer updatedMapBytes = mapType.getSerializer().serialize(updatedMap); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, updatedMapBytes, 0); + + String checkUpdate = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); + } + ); + } + + @Test + public void testNullMultiCellUDTCondition() throws Exception + { + testNullUDTCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testNullFrozenUDTCondition() throws Exception + { + testNullUDTCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testNullUDTCondition(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Object updatedPersonValue = CQLTester.userType("height", 73, "age", 40); + ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); + + String checkUpdate = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); + } + ); + } + + @Test + public void testNullMultiCellSetElementConditions() throws Exception + { + testNullSetElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + } + + @Test + public void testNullFrozenSetElementConditions() throws Exception + { + testNullSetElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + } + + private void testNullSetElementConditions(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1});", ConsistencyLevel.ALL); + + SetType setType = SetType.getInstance(Int32Type.instance, true); + Set initialSet = ImmutableSet.of(1, 2); + ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set[2];\n" + + " IF row1.int_set[2] IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set[2] IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Set updatedSet = ImmutableSet.of(1, 2, 3); + ByteBuffer updatedSetBytes = setType.getSerializer().serialize(updatedSet); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialSet}, update, 0, updatedSetBytes, 0); + } + ); + } + + @Test + public void testNullMultiCellMapElementConditions() throws Exception + { + testNullMapElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + } + + @Test + public void testNullFrozenMapElementConditions() throws Exception + { + testNullMapElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + } + + private void testNullMapElementConditions(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + + MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); + Map initialMap = ImmutableMap.of("one", 1, "two", 2); + ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map[?] IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, "one", 0, initialMapBytes); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map[?] IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Map updatedMap = ImmutableMap.of("one", 1, "two", 2, "three", 3); + ByteBuffer updatedMapBytes = mapType.getSerializer().serialize(updatedMap); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, "two", updatedMapBytes, 0); + + String checkUpdate = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); + } + ); + } + + @Test + public void testNullMultiCellUDTFieldCondition() throws Exception + { + testNullUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testNullFrozenUDTFieldCondition() throws Exception + { + testNullUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testNullUDTFieldCondition(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer.age IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer.age IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Object updatedPersonValue = CQLTester.userType("height", 73, "age", 40); + ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); + + String checkUpdate = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); + } + ); + } + + @Test + public void testMultiCellListSubstitution() throws Exception + { + testListSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", true); + } + + @Test + public void testFrozenListSubstitution() throws Exception + { + testListSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)", false); + } + + private void testListSubstitution(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + ListType listType = ListType.getInstance(Int32Type.instance, isMultiCell); + List initialList = Arrays.asList(1, 2); + ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); + + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, ?);", ConsistencyLevel.ALL, initialListBytes); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, row1.int_list);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialList }, insert, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialList }, check, 1); + } + ); + } + + @Test + public void testMultiCellSetSubstitution() throws Exception + { + testSetSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", true); + } + + @Test + public void testFrozenSetSubstitution() throws Exception + { + testSetSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)", false); + } + + private void testSetSubstitution(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + SetType setType = SetType.getInstance(Int32Type.instance, isMultiCell); + Set initialSet = ImmutableSet.of(1, 2); + ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); + + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, ?);", ConsistencyLevel.ALL, initialSetBytes); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, row1.int_set);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialSet }, insert, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialSet }, check, 1); + } + ); + } + + @Test + public void testMultiCellMapSubstitution() throws Exception + { + testMapSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + } + + @Test + public void testFrozenMapSubstitution() throws Exception + { + testMapSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + } + + private void testMapSubstitution(String ddl, boolean isMultiCell) throws Exception + { + test(ddl, + cluster -> + { + MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); + Map initialMap = ImmutableMap.of("one", 1, "two", 2); + ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); + + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, ?);", ConsistencyLevel.ALL, initialMapBytes); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, row1.int_map);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialMap }, insert, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialMap }, check, 1); + } + ); + } + + @Test + public void testMultiCellUDTSubstitution() throws Exception + { + testUDTSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testFrozenUDTSubstitution() throws Exception + { + testUDTSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testUDTSubstitution(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, row1.customer);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialPersonBuffer }, insert, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialPersonBuffer }, check, 1); + } + ); + } + + @Test + public void testTupleSubstitution() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, pair tuple)", + cluster -> + { + Object initialTupleValue = CQLTester.tuple("age", 37); + ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, pair) VALUES (0, ?);", ConsistencyLevel.ALL, initialTupleBuffer); + + String insert = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.pair;\n" + + " IF row1.pair IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, pair) VALUES (?, row1.pair);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, insert, 0, 1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialTupleBuffer }, check, 1); + } + ); + } + + @Test + public void testMultiCellListReplacement() throws Exception + { + testListReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + } + + @Test + public void testFrozenListReplacement() throws Exception + { + testListReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + } + + private void testListReplacement(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list = [3, 4] THEN\n" + + " UPDATE " + currentTable + " SET int_list = row1.int_list WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(3, 4)}, check); + } + ); + } + + @Test + public void testMultiCellSetReplacement() throws Exception + { + testSetReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + } + + @Test + public void testFrozenSetReplacement() throws Exception + { + testSetReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + } + + private void testSetReplacement(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set = {3, 4} THEN\n" + + " UPDATE " + currentTable + " SET int_set = row1.int_set WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(3, 4) }, check); + } + ); + } + + @Test + public void testListAppendFromReference() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list = [3, 4] THEN\n" + + " UPDATE " + currentTable + " SET int_list += row1.int_list WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2, 3, 4)}, check); + } + ); + } + + @Test + public void testSetByIndexFromMultiCellListElement() throws Exception + { + testListSetByIndexFromListElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, src_int_list list, dest_int_list list)"); + } + + @Test + public void testSetByIndexFromFrozenListElement() throws Exception + { + testListSetByIndexFromListElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list)"); + } + + private void testListSetByIndexFromListElement(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, dest_int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, src_int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.src_int_list;\n" + + " UPDATE " + currentTable + " SET dest_int_list[0] = row1.src_int_list[0] WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT dest_int_list FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2)}, check); + } + ); + } + + @Test + public void testListSetByIndexFromScalar() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0);\n" + + " SELECT row0.int_list;\n" + + " UPDATE " + currentTable + " SET int_list[0] = 2 WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(1, 2)}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT int_list FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(2, 2)}, check); + } + ); + } + + @Test + public void testAutoReadSelectionConstruction() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c))", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, other_counter) VALUES (0, 1, 1, 1);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " SELECT row0.counter, row0.other_counter;\n" + + " UPDATE " + currentTable + " SET other_counter += 1, counter += row0.counter WHERE k = 0 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEquals(cluster, new Object[] { 1, 1 }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT counter, other_counter FROM " + currentTable + " WHERE k = 0 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2, 2 }, check); + } + ); + } + + @Test + public void testMultiMutationsSameKey() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c))", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " SELECT row0.counter, row0.int_list;\n" + + " UPDATE " + currentTable + " SET int_list[0] = 42 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + currentTable + " SET counter += 1 WHERE k = 0 AND c = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEquals(cluster, new Object[] { 0, Arrays.asList(1, 2) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT counter, int_list FROM " + currentTable + " WHERE k = 0 AND c = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, Arrays.asList(42, 2)}, check); + } + ); + } + + @Test + public void testLetLargerThanOneWithPK() throws Exception + { + test(cluster -> { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + + String cql = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0 LIMIT 2);\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ 0 }, cql, 1); + }); + } + + @Test + public void testLetLimitUsingBind() throws Exception + { + test(cluster -> { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + + String cql = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT ?);\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, cql, 1); + }); + } + + @Test + public void testListSetByIndexMultiRow() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 1);\n" + + " SELECT row0.int_list;\n" + + " UPDATE " + currentTable + " SET int_list[0] = row1.int_list[0] WHERE k = 0 AND c = 0;\n" + + " UPDATE " + currentTable + " SET int_list[0] = row0.int_list[0] WHERE k = 0 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { Arrays.asList(1, 2) }, update); + + String check = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 1);\n" + + " SELECT row0.int_list, row1.int_list;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2), Arrays.asList(1, 4)}, check); + } + ); + } + + @Test + public void testSetAppend() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + " UPDATE " + currentTable + " SET int_set += row1.int_set WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2, 3, 4) }, check); + } + ); + } + + @Test + public void testAssignmentFromMultiCellSetElement() throws Exception + { + testAssignmentFromSetElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_set set)"); + } + + @Test + public void testAssignmentFromFrozenSetElement() throws Exception + { + testAssignmentFromSetElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_set frozen>)"); + } + + private void testAssignmentFromSetElement(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_set) VALUES (0, 0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_set) VALUES (1, 0, {3, 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + " UPDATE " + currentTable + " SET v = row1.int_set[4] WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); + } + ); + } + + @Test + public void testMapAppend() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map;\n" + + " UPDATE " + currentTable + " SET int_map += row1.int_map WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2, "three", 4) }, check); + } + ); + } + + @Test + public void testAssignmentFromMultiCellMapElement() throws Exception + { + testAssignmentFromMapElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_map map)"); + } + + @Test + public void testAssignmentFromFrozenMapElement() throws Exception + { + testAssignmentFromMapElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_map frozen>)"); + } + + private void testAssignmentFromMapElement(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_map) VALUES (0, 0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_map) VALUES (1, 0, {'three': 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map;\n" + + " UPDATE " + currentTable + " SET v = row1.int_map[?] WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); + + String check = "BEGIN TRANSACTION\n" + + " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); + } + ); + } + + @Test + public void testAssignmentFromMultiCellUDTField() throws Exception + { + testAssignmentFromUDTField("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, customer person)"); + } + + @Test + public void testAssignmentFromFrozenUDTField() throws Exception + { + testAssignmentFromUDTField("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, customer frozen)"); + } + + private void testAssignmentFromUDTField(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, customer) VALUES (0, 0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, customer) VALUES (1, 0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.customer;\n" + + " UPDATE " + currentTable + " SET v = row1.customer.age WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, check); + } + ); + } + + @Test + public void testSetMapElementFromMapElementReference() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map;\n" + + " UPDATE " + currentTable + " SET int_map[?] = row1.int_map[?] WHERE k=0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "one", "three"); + + String check = "BEGIN TRANSACTION\n" + + " SELECT int_map[?] FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check, "one"); + } + ); + } + + @Test + public void testSetUDTFieldFromUDTFieldReference() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)", + cluster -> + { + Object youngPerson = CQLTester.userType("height", 58, "age", 9); + ByteBuffer youngPersonBuffer = CQLTester.makeByteBuffer(youngPerson, null); + Object adultPerson = CQLTester.userType("height", 74, "age", 37); + ByteBuffer adultPersonBuffer = CQLTester.makeByteBuffer(adultPerson, null); + + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, youngPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (1, ?);", ConsistencyLevel.ALL, adultPersonBuffer); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.customer;\n" + + " UPDATE " + currentTable + " SET customer.age = row1.customer.age WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { adultPersonBuffer }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT customer.height, customer.age FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 58, 37 }, check); + } + ); + } + + @Test + public void testMultiCellListElementCondition() throws Exception + { + testListElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + } + + @Test + public void testFrozenListElementCondition() throws Exception + { + testListElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + } + + private void testListElementCondition(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list[1] = 4 THEN\n" + + " UPDATE " + currentTable + " SET int_list = [3, 4] WHERE k = 0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(3, 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableList.of(3, 4) }, check); + } + ); + } + + @Test + public void testMultiCellMapElementCondition() throws Exception + { + testMapElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)"); + } + + @Test + public void testFrozenMapElementCondition() throws Exception + { + testMapElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)"); + } + + private void testMapElementCondition(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map;\n" + + " IF row1.int_map[?] = 4 THEN\n" + + " UPDATE " + currentTable + " SET int_map = {'three': 4} WHERE k = 0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("three", 4) }, check); + } + ); + } + + @Test + public void testMultiCellUDTFieldCondition() throws Exception + { + testUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testFrozenUDTFieldCondition() throws Exception + { + testUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testUDTFieldCondition(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); + assertFalse(result.hasNext()); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row1.customer;\n" + + " IF row1.customer.age = 37 THEN\n" + + " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Object updatedPersonValue = CQLTester.userType("height", 73, "age", 40); + ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); + + String checkUpdate = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); + } + ); + } + + @Test + public void testListSubtraction() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list;\n" + + " IF row1.int_list = [3, 4] THEN\n" + + " UPDATE " + currentTable + " SET int_list -= row1.int_list WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2)}, check); + } + ); + } + + @Test + public void testSetSubtraction() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set = {3, 4} THEN\n" + + " UPDATE " + currentTable + " SET int_set -= row1.int_set WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2) }, check); + } + ); + } + + @Test + public void testMultiCellMapSubtraction() throws Exception + { + testMapSubtraction("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map, int_set set)"); + } + + @Test + public void testFrozenMapSubtraction() throws Exception + { + testMapSubtraction("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map, int_set frozen>)"); + } + + private void testMapSubtraction(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, { 'one': 2, 'three': 4 });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, { 'three' });", ConsistencyLevel.ALL); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + " IF row1.int_set = { 'three' } THEN\n" + + " UPDATE " + currentTable + " SET int_map -= row1.int_set WHERE k=0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of("three") }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2), null}, check); + } + ); + } + + @Test + public void testMultiCellListSelection() throws Exception + { + testListSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + } + + @Test + public void testFrozenListSelection() throws Exception + { + testListSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + } + + private void testListSelection(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [10, 20, 30, 40]);", ConsistencyLevel.ALL); + + String selectEntireSet = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(10, 20, 30, 40) }, selectEntireSet); + + String selectSingleElement = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_list[0];\n" + + "COMMIT TRANSACTION"; + + SimpleQueryResult result = executeWithRetry(cluster, selectSingleElement); + // TODO: Improve user frieldliness of the hex key name here... + Assertions.assertThat(result.names()).contains("row1.int_list[0x00000000]"); + Assertions.assertThat(result.toObjectArrays()).isEqualTo(new Object[] { new Object[] { 10 } }); + } + ); + } + + @Test + public void testMultiCellSetSelection() throws Exception + { + testSetSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + } + + @Test + public void testFrozenSetSelection() throws Exception + { + testSetSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + } + + private void testSetSelection(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {10, 20, 30, 40});", ConsistencyLevel.ALL); + + String selectEntireSet = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(10, 20, 30, 40) }, selectEntireSet); + + String selectSingleElement = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_set[10];\n" + + "COMMIT TRANSACTION"; + + SimpleQueryResult result = executeWithRetry(cluster, selectSingleElement); + // TODO: Improve user frieldliness of the hex key name here... + Assertions.assertThat(result.names()).contains("row1.int_set[0x0000000a]"); + Assertions.assertThat(result.toObjectArrays()).isEqualTo(new Object[] { new Object[] { 10 } }); + } + ); + } + + @Test + public void testMultiCellMapSelection() throws Exception + { + testMapSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)"); + } + + @Test + public void testFrozenMapSelection() throws Exception + { + testMapSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)"); + } + + private void testMapSelection(String ddl) throws Exception + { + test(ddl, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, { 'ten': 20, 'thirty': 40 });", ConsistencyLevel.ALL); + + String selectEntireMap = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("ten", 20, "thirty", 40) }, selectEntireMap); + + String selectSingleElement = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " SELECT row1.int_map['ten'];\n" + + "COMMIT TRANSACTION"; + + SimpleQueryResult result = executeWithRetry(cluster, selectSingleElement); + Assertions.assertThat(result.names()).contains("row1.int_map[" + Bytes.toHexString("ten".getBytes()) + ']'); + Assertions.assertThat(result.toObjectArrays()).isEqualTo(new Object[] { new Object[] { 20 } }); + } + ); + } + + @Test + public void testScalarUpdateSubstitution() + { + String KEYSPACE = "ks" + System.currentTimeMillis(); + SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "1 (k int, c int, v int, primary key (k, c))"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "2 (k int, c int, v int, primary key (k, c))"); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + "1 WHERE k=1 AND c=2);\n" + + " LET row2 = (SELECT * FROM " + currentTable + "2 WHERE k=2 AND c=2);\n" + + " SELECT v FROM " + currentTable + "1 WHERE k=1 AND c=2;\n" + + " IF row1.v = 3 AND row2.v = 4 THEN\n" + + " UPDATE " + currentTable + "1 SET v = row2.v WHERE k=1 AND c=2;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + Object[][] result = SHARED_CLUSTER.coordinator(1).execute(query, ConsistencyLevel.ANY); + assertEquals(3, result[0][0]); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + "1 WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(SHARED_CLUSTER, new Object[]{1, 2, 4}, check); + } + + @Test + public void testRegularScalarInsertSubstitution() throws Exception + { + testScalarInsertSubstitution("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))"); + } + + @Test + public void testStaticScalarInsertSubstitution() throws Exception + { + testScalarInsertSubstitution("CREATE TABLE " + currentTable + " (k int, c int, v int static, PRIMARY KEY (k, c))"); + } + + private void testScalarInsertSubstitution(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);", ConsistencyLevel.ALL); + + String insert = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " SELECT row0.v;\n" + + " IF row0.v IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 1, row0.v);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, insert); + + String check = "BEGIN TRANSACTION\n" + + " SELECT k, c, v FROM " + currentTable + " WHERE k = 0 AND c = 1;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 1, 1 }, check); + } + ); + } + + @Test + public void testSelectMultiCellUDTReference() throws Exception + { + testSelectUDTReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testSelectFrozenUDTReference() throws Exception + { + testSelectUDTReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testSelectUDTReference(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object personValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); + assertFalse(result.hasNext()); + + String read = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row0.customer;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { personBuffer }, read, 0); + } + ); + } + + @Test + public void testSelectMultiCellUDTFieldReference() throws Exception + { + testSelectUDTFieldReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + } + + @Test + public void testSelectFrozenUDTFieldReference() throws Exception + { + testSelectUDTFieldReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + } + + private void testSelectUDTFieldReference(String tableDDL) throws Exception + { + test(tableDDL, + cluster -> + { + Object personValue = CQLTester.userType("height", 74, "age", 37); + ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); + + String insert = "BEGIN TRANSACTION\n" + + " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); + assertFalse(result.hasNext()); + + String read = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " SELECT row0.customer.age;\n" + + "COMMIT TRANSACTION"; + result = assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, read, 0); + // TODO: Improve user frieldliness of the field name here... + assertEquals(ImmutableList.of("row0.customer.0x0001"), result.names()); + } + ); + } + + @Test + public void testMultiKeyQueryAndInsert() throws Throwable + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", + cluster -> + { + String query1 = "BEGIN TRANSACTION\n" + + " LET select1 = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + currentTable + " WHERE k=1 AND c=0);\n" + + " SELECT v FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " IF select1 IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 0);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertEmptyWithPreemptedRetry(cluster, query1); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 0}, check, 0, 0); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, 0, 0}, check, 1, 0); + + String query2 = "BEGIN TRANSACTION\n" + + " LET select1 = (SELECT * FROM " + currentTable + " WHERE k=1 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + currentTable + " WHERE k=2 AND c=0);\n" + + " SELECT v FROM " + currentTable + " WHERE k=1 AND c=0;\n" + + " IF select1.v = ? THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 1);\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (2, 0, 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, query2, 0); + + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 0}, check, 0, 0); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, 0, 1}, check, 1, 0); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {2, 0, 1}, check, 2, 0); + }); + } + + @Test + public void demoTest() throws Throwable + { + SHARED_CLUSTER.schemaChange("CREATE KEYSPACE demo_ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2};"); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_docs ( org_name text, doc_id int, contents_version int static, title text, permissions int, PRIMARY KEY (org_name, doc_id) );"); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) );"); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) );"); + + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); + + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.org_users (org_name, user, members_version, permissions) VALUES ('demo', 'blake', 5, 777);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.org_users (org_name, user, members_version, permissions) VALUES ('demo', 'scott', 5, 777);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.org_docs (org_name, doc_id, contents_version, title, permissions) VALUES ('demo', 100, 5, 'README', 644);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.user_docs (user, doc_id, title, org_name, permissions) VALUES ('blake', 1, 'recipes', NULL, 777);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.user_docs (user, doc_id, title, org_name, permissions) VALUES ('blake', 100, 'README', 'demo', 644);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.user_docs (user, doc_id, title, org_name, permissions) VALUES ('scott', 2, 'to do list', NULL, 777);\n", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.user_docs (user, doc_id, title, org_name, permissions) VALUES ('scott', 100, 'README', 'demo', 644);\n", ConsistencyLevel.ALL); + + String addDoc = "BEGIN TRANSACTION\n" + + " LET demo_user = (SELECT * FROM demo_ks.org_users WHERE org_name='demo' LIMIT 1);\n" + + " LET existing = (SELECT * FROM demo_ks.org_docs WHERE org_name='demo' AND doc_id=101);\n" + + " SELECT members_version FROM demo_ks.org_users WHERE org_name='demo' LIMIT 1;\n" + + " IF demo_user.members_version = 5 AND existing IS NULL THEN\n" + + " UPDATE demo_ks.org_docs SET title='slides.key', permissions=777, contents_version = 6 WHERE org_name='demo' AND doc_id=101;\n" + + " UPDATE demo_ks.user_docs SET title='slides.key', permissions=777 WHERE user='blake' AND doc_id=101;\n" + + " UPDATE demo_ks.user_docs SET title='slides.key', permissions=777 WHERE user='scott' AND doc_id=101;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEquals(SHARED_CLUSTER, new Object[] { 5 }, addDoc); + + String addUser = "BEGIN TRANSACTION\n" + + " LET demo_doc = (SELECT * FROM demo_ks.org_docs WHERE org_name='demo' LIMIT 1);\n" + + " LET existing = (SELECT * FROM demo_ks.org_users WHERE org_name='demo' AND user='benedict');\n" + + " SELECT contents_version FROM demo_ks.org_docs WHERE org_name='demo' LIMIT 1;\n" + + " IF demo_doc.contents_version = 6 AND existing IS NULL THEN\n" + + " UPDATE demo_ks.org_users SET permissions=777, members_version += 1 WHERE org_name='demo' AND user='benedict';\n" + + " UPDATE demo_ks.user_docs SET title='README', permissions=644 WHERE user='benedict' AND doc_id=100;\n" + + " UPDATE demo_ks.user_docs SET title='slides.key', permissions=777 WHERE user='benedict' AND doc_id=101;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEquals(SHARED_CLUSTER, new Object[] { 6 }, addUser); + } + + // TODO: Implement support for basic arithmetic on references in INSERT + @Ignore + @Test + public void testReferenceArithmeticInInsert() throws Exception + { + test(cluster -> { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + + String cql = "BEGIN TRANSACTION\n" + + " LET a = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " IF a IS NOT NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 1, a.v + 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertEmptyWithPreemptedRetry(cluster, cql); + }); + } + + // TODO: Implement support for basic arithmetic on references in UPDATE + @Ignore + @Test + public void testReferenceArithmeticInUpdate() throws Exception + { + test(cluster -> { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + + String cql = "BEGIN TRANSACTION\n" + + " LET a = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " IF a IS NOT NULL THEN\n" + + " UPDATE " + currentTable + " SET v = a.v + 1 WHERE k = 0 and c = 1;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertEmptyWithPreemptedRetry(cluster, cql); + }); + } + + @Test + public void testCASAndSerialRead() throws Exception + { + test("CREATE TABLE " + currentTable + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c));", + cluster -> { + ICoordinator coordinator = cluster.coordinator(1); + int startingAccordCoordinateCount = getAccordCoordinateCount(); + coordinator.execute("INSERT INTO " + currentTable + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{ false, 4 }, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + + // Test working with a static column + assertRowEquals(cluster, new Object[]{ false, 5 }, "UPDATE " + currentTable + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 4"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET s = 6 WHERE id = 1 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); + // Make sure all the consensus using queries actually were run on Accord + assertEquals( 11, getAccordCoordinateCount() - startingAccordCoordinateCount); + }); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java new file mode 100644 index 000000000000..6118f765a56b --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.cassandra.db.virtual.AccordVirtualTables; +import org.apache.cassandra.db.virtual.SystemViewsKeyspace; +import org.apache.cassandra.db.virtual.VirtualTable; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.utils.AssertionUtils; + +import static org.junit.Assert.assertEquals; + +import static org.apache.cassandra.config.DatabaseDescriptor.NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_MESSAGE; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; + +public class AccordFeatureFlagTest extends TestBaseImpl +{ + @Test + public void shouldHideAccordTransactions() throws IOException + { + try (Cluster cluster = init(Cluster.build(1) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK).set("accord_transactions_enabled", "false")) + .start())) + { + cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (k int, c int, v int, primary key (k, c))"); + + // Any transaction should fail to execute: + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + KEYSPACE + ".tbl WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + Assertions.assertThatThrownBy(() -> cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY)) + .has(AssertionUtils.isThrowableInstanceof(InvalidRequestException.class)) + .hasMessage(TRANSACTIONS_DISABLED_MESSAGE); + + // The Accord system keyspace should not be present: + assertEquals("The Accord system keyspace should not exist", + Optional.empty(), cluster.get(1).callOnInstance(() -> Schema.instance.localKeyspaces().get(ACCORD_KEYSPACE_NAME))); + + // Make sure virtual tables don't exist: + IIsolatedExecutor.SerializableCallable> hasAccordVirtualTables = + () -> SystemViewsKeyspace.instance.tables().stream().filter(t -> t.getClass().equals(AccordVirtualTables.Epoch.class)); + List tables = cluster.get(1).callOnInstance(hasAccordVirtualTables).collect(Collectors.toList()); + assertEquals("No Accord virtual tables should exist", Collections.emptyList(), tables); + + // Make sure we throw if someone tries to coordinate a transaction against the no-op service: + Assertions.assertThatThrownBy(() -> cluster.get(1).callOnInstance(() -> AccordService.instance().coordinate(null, null))) + .isInstanceOf(UnsupportedOperationException.class); + } + } + + @SuppressWarnings("Convert2MethodRef") + @Test + public void shouldFailOnAccordMigrationWithAccordDisabled() throws IOException + { + try (Cluster cluster = Cluster.build(1) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK) + .set("accord_transactions_enabled", "false") + .set("legacy_paxos_strategy", "accord")).createWithoutStarting()) + { + + Assertions.assertThatThrownBy(() -> cluster.startup()) + .has(AssertionUtils.isThrowableInstanceof(ConfigurationException.class)) + .hasMessage(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java new file mode 100644 index 000000000000..d128ead48976 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.messages.Commit; +import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.distributed.impl.Instance; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.Verb; + +@SuppressWarnings("Convert2MethodRef") +public class AccordIntegrationTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordIntegrationTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @Test + public void testRecovery() throws Exception + { + test(cluster -> { + IMessageFilters.Filter lostApply = cluster.filters().verbs(Verb.ACCORD_APPLY_REQ.id).drop(); + IMessageFilters.Filter lostCommit = cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).to(2).drop(); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + // row1.v shouldn't have existed when the txn's SELECT was executed + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, query); + + lostApply.off(); + lostCommit.off(); + + // Querying again should trigger recovery... + query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " SELECT row1.v;\n" + + " IF row1.v = 1 THEN\n" + + " UPDATE " + currentTable + " SET v=2 WHERE k = 0 AND c = 0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, query); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 2}, check, 0, 0); + + query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 3);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, query); + + assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 2}, check, 0, 0); + }); + } + + @Test + public void testLostCommitReadTriggersFallbackRead() throws Exception + { + test(cluster -> { + // It's expected that the required Read will happen regardless of whether this fails to return a read + cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).messagesMatching((from, to, iMessage) -> cluster.get(from).callOnInstance(() -> { + Message msg = Instance.deserializeMessage(iMessage); + if (msg.payload instanceof Commit) + return ((Commit) msg.payload).read != null; + return false; + })).drop(); + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, query); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check, 0, 0); + }); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java new file mode 100644 index 000000000000..1a7d59f8fe47 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.coordinate.Preempted; +import accord.primitives.Txn; +import net.bytebuddy.ByteBuddy; +import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; +import net.bytebuddy.implementation.MethodDelegation; +import net.bytebuddy.implementation.bind.annotation.SuperCall; +import net.bytebuddy.implementation.bind.annotation.This; +import org.apache.cassandra.cql3.statements.ModificationStatement; +import org.apache.cassandra.cql3.statements.TransactionStatement; +import org.apache.cassandra.cql3.transactions.ReferenceValue; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.QueryResults; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.distributed.util.QueryResultUtil; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.AssertionUtils; +import org.apache.cassandra.utils.FailingConsumer; + +import static net.bytebuddy.matcher.ElementMatchers.named; +import static net.bytebuddy.matcher.ElementMatchers.takesArguments; +import static org.junit.Assert.assertArrayEquals; + +public abstract class AccordTestBase extends TestBaseImpl +{ + private static final Logger logger = LoggerFactory.getLogger(AccordTestBase.class); + private static final int MAX_RETRIES = 10; + + protected static final AtomicInteger COUNTER = new AtomicInteger(0); + + protected static Cluster SHARED_CLUSTER; + + protected String currentTable; + + @BeforeClass + public static void setupClass() throws IOException + { + SHARED_CLUSTER = createCluster(); + } + + @AfterClass + public static void teardown() + { + if (SHARED_CLUSTER != null) + SHARED_CLUSTER.close(); + } + + @Before + public void setup() + { + currentTable = KEYSPACE + ".tbl" + COUNTER.getAndIncrement(); + } + + protected static void assertRowSerial(Cluster cluster, String query, int k, int c, int v, int s) + { + Object[][] result = cluster.coordinator(1).execute(query, ConsistencyLevel.SERIAL); + assertArrayEquals(new Object[]{new Object[] {k, c, v, s}}, result); + } + + protected void test(String tableDDL, FailingConsumer fn) throws Exception + { + test(Collections.singletonList(tableDDL), fn); + } + + protected void test(List ddls, FailingConsumer fn) throws Exception + { + for (String ddl : ddls) + SHARED_CLUSTER.schemaChange(ddl); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); + + // Evict commands from the cache immediately to expose problems loading from disk. + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); + + try + { + fn.accept(SHARED_CLUSTER); + } + finally + { + SHARED_CLUSTER.filters().reset(); + } + } + + protected void test(FailingConsumer fn) throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", fn); + } + + protected int getAccordCoordinateCount() + { + return SHARED_CLUSTER.get(1).callOnInstance(() -> BBAccordCoordinateCountHelper.count.get()); + } + + private static Cluster createCluster() throws IOException + { + // need to up the timeout else tests get flaky + // disable vnode for now, but should enable before trunk + return init(Cluster.build(2) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK).set("write_request_timeout", "10s") + .set("transaction_timeout", "15s") + .set("legacy_paxos_strategy", "accord")) + .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install) + .withInstanceInitializer(BBAccordCoordinateCountHelper::install) + .start()); + } + + protected static SimpleQueryResult executeAsTxn(Cluster cluster, String check, Object... boundValues) + { + String normalized = wrapInTxn(check); + logger.info("Executing transaction statement:\n{}", normalized); + return cluster.coordinator(1).executeWithResult(normalized, ConsistencyLevel.ANY, boundValues); + } + + protected static SimpleQueryResult execute(Cluster cluster, String check, Object... boundValues) + { + logger.info("Executing statement:\n{}", check); + return cluster.coordinator(1).executeWithResult(check, ConsistencyLevel.ANY, boundValues); + } + + protected static SimpleQueryResult assertRowEquals(Cluster cluster, SimpleQueryResult expected, String check, Object... boundValues) + { + SimpleQueryResult result = execute(cluster, check, boundValues); + QueryResultUtil.assertThat(result).isEqualTo(expected); + return result; + } + + protected static SimpleQueryResult assertRowEquals(Cluster cluster, Object[] row, String check, Object... boundValues) + { + return assertRowEquals(cluster, QueryResults.builder().row(row).build(), check, boundValues); + } + + // TODO: Retry on preemption may become unnecessary after the Unified Log is integrated. + protected SimpleQueryResult assertRowEqualsWithPreemptedRetry(Cluster cluster, Object[] row, String check, Object... boundValues) + { + return assertRowWithPreemptedRetry(cluster, QueryResults.builder().row(row).build(), check, boundValues); + } + + protected SimpleQueryResult assertEmptyWithPreemptedRetry(Cluster cluster, String check, Object... boundValues) + { + return assertRowWithPreemptedRetry(cluster, QueryResults.builder().build(), check, boundValues); + } + + private SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, SimpleQueryResult expected, String check, Object... boundValues) + { + SimpleQueryResult result = executeWithRetry(cluster, check, boundValues); + QueryResultUtil.assertThat(result).isEqualTo(expected); + return result; + } + + private SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String check, Object... boundValues) + { + try + { + return execute(cluster, check, boundValues); + } + catch (RuntimeException ex) + { + if (count <= MAX_RETRIES && AssertionUtils.rootCauseIs(Preempted.class).matches(ex)) + { + logger.warn("[Retry attempt={}] Preempted failure for\n{}", count, check); + return executeWithRetry0(count + 1, cluster, check, boundValues); + } + + throw ex; + } + } + + protected SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) + { + check = wrapInTxn(check); + + // is this method safe? + + if (!isIdempotent(cluster, check)) + throw new AssertionError("Unable to retry txn that is not idempotent: cql=\n" + check); + + return executeWithRetry0(0, cluster, check, boundValues); + } + + private boolean isIdempotent(Cluster cluster, String cql) + { + return cluster.get(1).callOnInstance(() -> { + TransactionStatement stmt = AccordTestUtils.parse(cql); + return isIdempotent(stmt); + }); + } + + private static String wrapInTxn(String statement) + { + if (!statement.trim().toUpperCase().startsWith("BEGIN TRANSACTION")) + { + statement = statement.trim(); + statement = Arrays.stream(statement.split("\\n")).collect(Collectors.joining("\n ", "BEGIN TRANSACTION\n ", "\nCOMMIT TRANSACTION")); + } + return statement; + } + + public static boolean isIdempotent(TransactionStatement statement) + { + for (ModificationStatement update : statement.getUpdates()) + { + if (!isIdempotent(update)) + return false; + } + return true; + } + + private static boolean isIdempotent(ModificationStatement update) + { + update.migrateReadRequiredOperations(); + // ReferenceValue.Constant is used during migration, which means a case like "a += 1" + // ReferenceValue.Substitution uses a LET reference, so rerunning would always just see the new state + long numConstants = update.getSubstitutions().stream() + .filter(f -> f.getValue() instanceof ReferenceValue.Constant) + .filter(f -> !f.getKind().name().contains("Setter")) + .count(); + return numConstants == 0; + } + + public static class EnforceUpdateDoesNotPerformRead + { + public static void install(ClassLoader classLoader, Integer num) + { + new ByteBuddy().rebase(ModificationStatement.class) + .method(named("readRequiredLists")) + .intercept(MethodDelegation.to(EnforceUpdateDoesNotPerformRead.class)) + .make() + .load(classLoader, ClassLoadingStrategy.Default.INJECTION); + } + + @SuppressWarnings("unused") + public static Map readRequiredLists(@This ModificationStatement stmt, @SuperCall Callable> fn) throws Exception + { + Map map = fn.call(); + if (map != null) + { + // if the call tree has a TransactionStatement, then fail as this violates the query + for (StackTraceElement e : Thread.currentThread().getStackTrace()) + if (TransactionStatement.class.getCanonicalName().equals(e.getClassName())) + throw new IllegalStateException("Attempted to load required partition!"); + } + return map; + } + } + + public static class BBAccordCoordinateCountHelper + { + static AtomicInteger count = new AtomicInteger(); + static void install(ClassLoader cl, int nodeNumber) + { + if (nodeNumber != 1) + return; + new ByteBuddy().rebase(AccordService.class) + .method(named("coordinate").and(takesArguments(2))) + .intercept(MethodDelegation.to(BBAccordCoordinateCountHelper.class)) + .make() + .load(cl, ClassLoadingStrategy.Default.INJECTION); + } + + public static TxnData coordinate(Txn txn, @SuperCall Callable actual) throws Exception + { + count.incrementAndGet(); + return actual.call(); + } + } + + protected abstract Logger logger(); +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTopologyTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTopologyTest.java new file mode 100644 index 000000000000..1a4bc2b7e46e --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTopologyTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.test.TestBaseImpl; + +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.api.Feature.NETWORK; + +public class AccordTopologyTest extends TestBaseImpl +{ + @Test + public void name() throws Throwable + { + try (Cluster cluster = builder().withNodes(3) + .withConfig(config -> config.with(GOSSIP).with(NETWORK)) + .createWithoutStarting()) + { + cluster.get(1).startup(); + + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java index 31d1aab31f15..ee2be216f0c8 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java @@ -18,8 +18,8 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utils.Gen; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.RandomSource; import org.apache.cassandra.config.Config; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.CasCondition; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java index d6c01834737f..5de56c923074 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java @@ -20,7 +20,7 @@ import java.io.IOException; -import accord.utils.RandomSource; +import accord.utilsfork.RandomSource; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java index 7727e3a76ab3..7d1b7ab71d83 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java @@ -20,7 +20,7 @@ import org.junit.Ignore; -import accord.utils.Property; +import accord.utilsfork.Property; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java index 5a0ce66ccca9..f0c3d57ec4c0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utils.Property; +import accord.utilsfork.Property; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java index 969b0756432b..64d6b91ea33c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java @@ -20,8 +20,8 @@ import java.io.IOException; -import accord.utils.Property; -import accord.utils.RandomSource; +import accord.utilsfork.Property; +import accord.utilsfork.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInstanceConfig; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java index 0cf333d2ab84..1d8a5919f170 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utils.Property; +import accord.utilsfork.Property; import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.Cluster; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java index fa098edaacbc..d8d8bcb1bd1b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utils.Property; +import accord.utilsfork.Property; import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.Cluster; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 924bd3eeeb47..5bffefb11186 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -38,10 +38,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.Property; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.Property; +import accord.utilsfork.RandomSource; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.Bind; import org.apache.cassandra.cql3.ast.Conditional; @@ -72,8 +72,8 @@ import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.ImmutableUniqueList; -import static accord.utils.Property.commands; -import static accord.utils.Property.stateful; +import static accord.utilsfork.Property.commands; +import static accord.utilsfork.Property.stateful; import static org.apache.cassandra.utils.AbstractTypeGenerators.getTypeSupport; import static org.apache.cassandra.utils.Generators.toGen; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java index 0f0bef9bfff9..df276143972c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java @@ -37,10 +37,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.Property; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.Property; +import accord.utilsfork.RandomSource; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.Conditional.Where.Inequality; @@ -66,8 +66,8 @@ import org.apache.cassandra.utils.ImmutableUniqueList; import org.quicktheories.generators.SourceDSL; -import static accord.utils.Property.commands; -import static accord.utils.Property.stateful; +import static accord.utilsfork.Property.commands; +import static accord.utilsfork.Property.stateful; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.utils.Generators.toGen; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index 3a23e1bfcc0a..90a538566772 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -37,10 +37,10 @@ import com.google.common.collect.Maps; import org.slf4j.Logger; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.Property; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.Property; +import accord.utilsfork.RandomSource; import com.datastax.driver.core.ColumnDefinitions; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; @@ -86,7 +86,7 @@ import org.apache.cassandra.utils.Generators; import org.quicktheories.generators.SourceDSL; -import static accord.utils.Property.multistep; +import static accord.utilsfork.Property.multistep; import static org.apache.cassandra.distributed.test.JavaDriverUtils.toDriverCL; import static org.apache.cassandra.utils.AbstractTypeGenerators.overridePrimitiveTypeSupport; import static org.apache.cassandra.utils.AbstractTypeGenerators.stringComparator; diff --git a/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java b/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java index ad9fd21a4860..ce6c301b3993 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java +++ b/test/distributed/org/apache/cassandra/distributed/test/jmx/JMXTestsUtil.java @@ -65,7 +65,8 @@ public class JMXTestsUtil "org.apache.cassandra.db:type=StorageService:resumeMove", // throws since there is no move in progress "org.apache.cassandra.db:type=StorageService:abortMove", // throws since there is no move in progress "org.apache.cassandra.db:type=CIDRGroupsMappingManager:loadCidrGroupsCache", // AllowAllCIDRAuthorizer doesn't support this operation, as feature is disabled by default - "org.apache.cassandra.db:type=StorageService:forceRemoveCompletion" // deprecated (TCM) + "org.apache.cassandra.db:type=StorageService:forceRemoveCompletion", // deprecated (TCM) + "org.apache.cassandra.db:type=StorageService:createEpochUnsafe" // for Accord testing, but will likely be removed ); // This set of mbeans are registered early enough during the startup of a // Cassandra instance for in-jvm dtests to avoid missing registration of mbeans. diff --git a/test/distributed/org/apache/cassandra/distributed/util/QueryResultUtil.java b/test/distributed/org/apache/cassandra/distributed/util/QueryResultUtil.java index 6ac7c60fb073..c0fe2515f99f 100644 --- a/test/distributed/org/apache/cassandra/distributed/util/QueryResultUtil.java +++ b/test/distributed/org/apache/cassandra/distributed/util/QueryResultUtil.java @@ -20,16 +20,20 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.function.Function; import java.util.function.Predicate; import com.google.monitoring.runtime.instrumentation.common.collect.Iterators; +import org.assertj.core.api.Assertions; +import org.assertj.core.data.Index; + import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.distributed.api.Row; import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.shared.AssertUtils; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; -import org.assertj.core.api.Assertions; -import org.assertj.core.data.Index; public class QueryResultUtil { @@ -69,6 +73,33 @@ public static void orderBy(SimpleQueryResult qr, String... columns) }); } + @SuppressWarnings("unchecked") + public static SimpleQueryResult map(SimpleQueryResult input, Map> mapper) + { + if (input.toObjectArrays().length == 0 || mapper == null || mapper.isEmpty()) + return input; + for (String name : mapper.keySet()) + { + if (!input.names().contains(name)) + throw new IllegalArgumentException("Unable to find column " + name); + } + Object[][] rows = input.toObjectArrays().clone(); + List names = new ArrayList<>(mapper.keySet()); + int[] idxes = names.stream().mapToInt(input.names()::indexOf).toArray(); + for (int i = 0; i < rows.length; i++) + { + Object[] row = rows[i].clone(); + for (int j = 0; j < idxes.length; j++) + { + @SuppressWarnings("rawtypes") Function map = mapper.get(names.get(j)); + int idx = idxes[j]; + row[idx] = map.apply(row[idx]); + } + rows[i] = row; + } + return new SimpleQueryResult(input.names().toArray(new String[0]), rows, input.warnings()); + } + public static boolean contains(SimpleQueryResult qr, Object... values) { return contains(qr, a -> equals(a, values)); @@ -121,6 +152,7 @@ public static String expand(SimpleQueryResult qr) { StringBuilder sb = new StringBuilder(); int rowNum = 1; + qr.mark(); while (qr.hasNext()) { sb.append("@ Row ").append(rowNum).append('\n'); @@ -133,6 +165,7 @@ public static String expand(SimpleQueryResult qr) } sb.append(table); } + qr.reset(); return sb.toString(); } @@ -199,6 +232,41 @@ public SimpleQueryResultAssertHelper contains(Predicate fn) return this; } + public SimpleQueryResultAssertHelper isEqualTo(SimpleQueryResult expectedResult) + { + qr.mark(); + expectedResult.mark(); + try + { + // org.apache.cassandra.distributed.shared.AssertUtils.assertRows has some issues with the error msg + // so rewrite to make sure to have a nicer msg + List otherNames = qr.names().isEmpty() ? expectedResult.names() : qr.names(); + Assertions.assertThat(otherNames).describedAs("Column names do not match").isEqualTo(qr.names()); + int rowId = 0; + while (qr.hasNext()) + { + if (!expectedResult.hasNext()) + throw new AssertionError("Unexpected row at index " + rowId + "; found " + Arrays.toString(qr.next().toObjectArray())); + Row next = qr.next(); + Row expected = expectedResult.next(); + if (!Arrays.equals(next.toObjectArray(), expected.toObjectArray())) + throw new AssertionError("Expected row " + rowId + " to be " + Arrays.toString(expected.toObjectArray()) + " but was " + Arrays.toString(next.toObjectArray())); + + rowId++; + } + if (expectedResult.hasNext()) + throw new AssertionError("Expected row " + rowId + " to be " + Arrays.toString(expectedResult.next().toObjectArray()) + " but was missing"); + + AssertUtils.assertRows(qr, expectedResult); + } + finally + { + qr.reset(); + expectedResult.reset(); + } + return this; + } + public SimpleQueryResultAssertHelper isEqualTo(Object... values) { Assertions.assertThat(qr.toObjectArrays()) diff --git a/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java b/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java index 2145a0ac6af3..233f49976d48 100644 --- a/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java +++ b/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java @@ -41,8 +41,8 @@ import com.google.common.util.concurrent.Uninterruptibles; import org.junit.Test; -import accord.utils.Property.StateOnlyCommand; -import accord.utils.RandomSource; +import accord.utilsfork.Property.StateOnlyCommand; +import accord.utilsfork.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.IInvokableInstance; @@ -65,8 +65,8 @@ import org.quicktheories.generators.SourceDSL; import org.quicktheories.impl.JavaRandom; -import static accord.utils.Property.commands; -import static accord.utils.Property.stateful; +import static accord.utilsfork.Property.commands; +import static accord.utilsfork.Property.stateful; import static com.google.common.collect.Sets.difference; import static java.lang.String.format; import static java.util.UUID.randomUUID; diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java b/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java index 6f83ae00136c..73f59c2d4309 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java @@ -30,12 +30,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Gen; -import accord.utils.Property; -import accord.utils.Property.Command; -import accord.utils.Property.PreCheckResult; -import accord.utils.Property.SimpleCommand; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Property; +import accord.utilsfork.Property.Command; +import accord.utilsfork.Property.PreCheckResult; +import accord.utilsfork.Property.SimpleCommand; +import accord.utilsfork.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.exceptions.RequestTimeoutException; diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java index 192713d361c3..74988fb61890 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java @@ -49,29 +49,28 @@ import org.agrona.collections.Int2ObjectHashMap; import org.agrona.collections.IntArrayList; import org.agrona.collections.IntHashSet; -import org.apache.cassandra.distributed.Constants; -import org.apache.cassandra.distributed.api.ICoordinator; -import org.apache.cassandra.distributed.api.Row; -import org.apache.cassandra.distributed.api.SimpleQueryResult; - import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.Invariants; -import accord.utils.Property; -import accord.utils.Property.Command; -import accord.utils.Property.SimpleCommand; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.Invariants; +import accord.utilsfork.Property; +import accord.utilsfork.Property.Command; +import accord.utilsfork.Property.SimpleCommand; +import accord.utilsfork.RandomSource; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.YamlConfigurationLoader; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.Constants; import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.api.Row; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.api.TokenSupplier; import org.apache.cassandra.distributed.impl.INodeProvisionStrategy; import org.apache.cassandra.distributed.impl.InstanceConfig; @@ -88,10 +87,10 @@ import org.apache.cassandra.utils.ConfigGenBuilder; import org.apache.cassandra.utils.Retry; -import static accord.utils.Property.commands; -import static accord.utils.Property.ignoreCommand; -import static accord.utils.Property.multistep; -import static accord.utils.Property.stateful; +import static accord.utilsfork.Property.commands; +import static accord.utilsfork.Property.ignoreCommand; +import static accord.utilsfork.Property.multistep; +import static accord.utilsfork.Property.stateful; import static org.apache.cassandra.harry.model.TokenPlacementModel.Range; import static org.apache.cassandra.harry.model.TokenPlacementModel.Replica; import static org.apache.cassandra.harry.model.TokenPlacementModel.ReplicatedRanges; diff --git a/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java b/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java index f2a8c9332fdc..df4d44cbc92b 100644 --- a/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java +++ b/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java @@ -27,7 +27,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicLong; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.apache.cassandra.harry.op.Visit; import org.apache.cassandra.harry.op.Operations; import org.apache.cassandra.harry.model.Model; @@ -88,8 +88,7 @@ public void begin(Visit visit) public void end(Visit visit) { long current = started.get(); - Invariants.checkState(current == visit.lts, - "Current stated %d, current visit: %d", current, visit.lts); + Invariants.checkState(current == visit.lts, "Current stated %d, current visit: %d", current, visit.lts); finished.set(visit.lts); } @@ -168,4 +167,4 @@ public boolean allFinished() return started.size() == finished.size(); } } -} \ No newline at end of file +} diff --git a/test/harry/main/org/apache/cassandra/harry/gen/Generators.java b/test/harry/main/org/apache/cassandra/harry/gen/Generators.java index ca2e5f274982..1f39b0cd2f25 100644 --- a/test/harry/main/org/apache/cassandra/harry/gen/Generators.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/Generators.java @@ -31,7 +31,7 @@ import java.util.UUID; import java.util.function.Supplier; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.apache.cassandra.harry.util.BitSet; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.TimeUUID; @@ -460,4 +460,4 @@ public static Generator constant(Supplier constant) { return (random) -> constant.get(); } -} \ No newline at end of file +} diff --git a/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java b/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java index 8c023e5c8cbd..50a42f67baab 100644 --- a/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java @@ -27,7 +27,7 @@ import java.util.Map; import java.util.stream.Collectors; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.agrona.collections.IntHashSet; import org.apache.cassandra.harry.ColumnSpec; import org.apache.cassandra.harry.MagicConstants; @@ -258,4 +258,4 @@ public Comparator descriptorsComparator() descriptorToIdx.put(allocatedDescriptors.get(i), i); return Comparator.comparingInt(descriptorToIdx::get); } -} \ No newline at end of file +} diff --git a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java index ee08634e1ada..0382f7987225 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java +++ b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java @@ -45,7 +45,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.AssignmentOperator; import org.apache.cassandra.cql3.ast.CasCondition; diff --git a/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java b/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java index fde20aa363f3..cc6531e132e6 100644 --- a/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java +++ b/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.harry.test; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import org.apache.cassandra.harry.ColumnSpec; import org.apache.cassandra.harry.SchemaSpec; import org.apache.cassandra.harry.dsl.HistoryBuilder; diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java index 473cc27032b8..90792dee5564 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java @@ -61,7 +61,9 @@ public class InterceptClasses implements BiFunction "|org[/.]apache[/.]cassandra[/.]db.streaming[/.].*" + "|org[/.]apache[/.]cassandra[/.]distributed[/.]impl[/.]DirectStreamingConnectionFactory.*" + "|org[/.]apache[/.]cassandra[/.]db[/.]commitlog[/.].*" + - "|org[/.]apache[/.]cassandra[/.]service[/.]paxos[/.].*"); + "|org[/.]apache[/.]cassandra[/.]service[/.]paxos[/.].*" + + "|accord[/.].*" + ); private static final Pattern GLOBAL_METHODS = Pattern.compile("org[/.]apache[/.]cassandra[/.](?!simulator[/.]).*" + "|org[/.]apache[/.]cassandra[/.]simulator[/.]test[/.].*" + diff --git a/test/simulator/main/org/apache/cassandra/simulator/ActionList.java b/test/simulator/main/org/apache/cassandra/simulator/ActionList.java index a6178c187078..dde9bbfcfd24 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ActionList.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ActionList.java @@ -41,6 +41,10 @@ public class ActionList extends AbstractCollection public static ActionList empty() { return EMPTY; } public static ActionList of(Action action) { return new ActionList(new Action[] { action }); } public static ActionList of(Stream action) { return new ActionList(action.toArray(Action[]::new)); } + public static ActionList of(Stream action, Stream... actions) + { + return new ActionList(Stream.concat(action, Stream.of(actions).flatMap(a -> a)).toArray(Action[]::new)); + } public static ActionList of(Collection actions) { return actions.isEmpty() ? EMPTY : new ActionList(actions.toArray(new Action[0])); } public static ActionList of(Action ... actions) { return new ActionList(actions); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java b/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java index 6119e4706f53..427a777abee8 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java @@ -281,12 +281,6 @@ public boolean hasNext() if (!runnable.isEmpty() || !scheduled.isEmpty()) return true; - while (moreWork()) - { - if (!runnable.isEmpty() || !scheduled.isEmpty()) - return true; - } - if (!sequences.isEmpty()) { // TODO (feature): detection of which action is blocking progress, and logging of its stack trace only @@ -319,6 +313,12 @@ public boolean hasNext() throw failWithOOM(); } + while (moreWork()) + { + if (!runnable.isEmpty() || !scheduled.isEmpty()) + return true; + } + return false; } diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index ef092709320d..4998e0e67ec7 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -687,6 +687,9 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, nodeToDc[n++] = i; } } + if (builder.topologyChangeLimit < 0) + initialRf = maxRf; + snitch = new SimulatedSnitch(nodeToDc, numInDcs); execution = new SimulatedExecution(); diff --git a/test/simulator/main/org/apache/cassandra/simulator/Debug.java b/test/simulator/main/org/apache/cassandra/simulator/Debug.java index bcf0947fe406..cf0be6709255 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/Debug.java +++ b/test/simulator/main/org/apache/cassandra/simulator/Debug.java @@ -67,7 +67,15 @@ // TODO (feature): log only deltas for schema/cluster data public class Debug { - private static final Logger logger = LoggerFactory.getLogger(Debug.class); + private static class LoggerHandle + { + private static final Logger logger = LoggerFactory.getLogger(Debug.class); + } + + private static Logger logger() + { + return LoggerHandle.logger; + } public enum EventType { PARTITION, CLUSTER } public enum Level @@ -219,15 +227,15 @@ private LogOne(SimulatedTime time, boolean logConsequences) @Override public void before(Action action, Before before) { - if (logger.isWarnEnabled()) // invoke toString() eagerly to ensure we have the task's descriptin - logger.warn(String.format("%6ds %s %s", TimeUnit.NANOSECONDS.toSeconds(time.nanoTime()), before, action)); + if (logger().isWarnEnabled()) // invoke toString() eagerly to ensure we have the task's descriptin + logger().warn(String.format("%6ds %s %s", TimeUnit.NANOSECONDS.toSeconds(time.nanoTime()), before, action)); } @Override public void consequences(ActionList consequences) { - if (logConsequences && !consequences.isEmpty() && logger.isWarnEnabled()) - logger.warn(String.format("%6ds Next: %s", TimeUnit.NANOSECONDS.toSeconds(time.nanoTime()), consequences)); + if (logConsequences && !consequences.isEmpty() && logger().isWarnEnabled()) + logger().warn(String.format("%6ds Next: %s", TimeUnit.NANOSECONDS.toSeconds(time.nanoTime()), consequences)); } } @@ -241,7 +249,7 @@ public LogTermination(ActionListener wrap) @Override public void transitivelyAfter(Action finished) { - logger.warn("Terminated {}", finished); + logger().warn("Terminated {}", finished); } } @@ -268,7 +276,7 @@ private Consumer debugGossip(Cluster cluster) for (InetAddressAndPort ep : Gossiper.instance.getLiveMembers()) { EndpointState epState = Gossiper.instance.getEndpointStateForEndpoint(ep); - logger.warn("Gossip {}: {} {}", ep, epState.isAlive(), epState.states().stream() + logger().warn("Gossip {}: {} {}", ep, epState.isAlive(), epState.states().stream() .map(e -> e.getKey().toString() + "=(" + e.getValue().value + ',' + e.getValue().version + ')') .collect(Collectors.joining(", ", "[", "]"))); } @@ -305,11 +313,11 @@ public static Consumer debugPaxos(Cluster cluster, String keyspace, int TableMetadata metadata = Keyspace.open(keyspace).getColumnFamilyStore("tbl").metadata.get(); ByteBuffer pkbb = Int32Type.instance.decompose(pkint); DecoratedKey key = new BufferDecoratedKey(DatabaseDescriptor.getPartitioner().getToken(pkbb), pkbb); - logger.warn("node{}({}): {}", num, primaryKey, paxosDebugInfo(key, metadata, FBUtilities.nowInSeconds())); + logger().warn("node{}({}): {}", num, primaryKey, paxosDebugInfo(key, metadata, FBUtilities.nowInSeconds())); } catch (Throwable t) { - logger.warn("node{}({})", num, primaryKey, t); + logger().warn("node{}({})", num, primaryKey, t); } }, node, primaryKey); } @@ -320,7 +328,7 @@ public static Consumer debugRf(Cluster cluster, String keyspace) { return ignore -> { cluster.forEach(i -> i.unsafeRunOnThisThread(() -> { - logger.warn("{} {}", + logger().warn("{} {}", Schema.instance.getKeyspaceMetadata(keyspace) == null ? "" : Schema.instance.getKeyspaceMetadata(keyspace).params.replication.toString(), Schema.instance.getKeyspaceMetadata(keyspace) == null ? "" : Keyspace.open(keyspace).getReplicationStrategy().configOptions.toString()); })); @@ -332,7 +340,7 @@ public static Consumer debugOwnership(Cluster cluster, String keyspace, return ignore -> { for (int node = 1 ; node <= cluster.size() ; ++node) { - logger.warn("node{}({}): {}", node, primaryKey, cluster.get(node).unsafeApplyOnThisThread(v -> { + logger().warn("node{}({}): {}", node, primaryKey, cluster.get(node).unsafeApplyOnThisThread(v -> { try { return ReplicaLayout.forTokenWriteLiveAndDown(Keyspace.open(keyspace), Murmur3Partitioner.instance.getToken(Int32Type.instance.decompose(v))).all().endpointList().toString(); @@ -350,7 +358,7 @@ public static Consumer debugRing(Cluster cluster, String keyspace) { return ignore -> cluster.forEach(i -> i.unsafeRunOnThisThread(() -> { if (Schema.instance.getKeyspaceMetadata(keyspace) != null) - logger.warn("{}", ClusterMetadata.current()); + logger().warn("{}", ClusterMetadata.current()); })); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java index 97c2db35a551..61b15dd40ed6 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java @@ -43,6 +43,7 @@ import org.apache.cassandra.simulator.Debug.Levels; import org.apache.cassandra.simulator.cluster.ClusterActions.TopologyChange; import org.apache.cassandra.simulator.debug.SelfReconcile; +import org.apache.cassandra.simulator.logging.SeedDefiner; import org.apache.cassandra.simulator.systems.InterceptedWait; import org.apache.cassandra.simulator.systems.InterceptedWait.CaptureSites.Capture; import org.apache.cassandra.simulator.systems.InterceptibleThread; @@ -85,7 +86,15 @@ @SuppressWarnings({ "ZeroLengthArrayAllocation", "CodeBlock2Expr", "SameParameterValue", "DynamicRegexReplaceableByCompiledPattern", "CallToSystemGC" }) public class SimulationRunner { - private static final Logger logger = LoggerFactory.getLogger(SimulationRunner.class); + private static class LoggerHandle + { + private static final Logger logger = LoggerFactory.getLogger(SimulationRunner.class); + } + + private static Logger logger() + { + return LoggerHandle.logger; + } public enum RecordOption { NONE, VALUE, WITH_CALLSITES } @@ -319,6 +328,9 @@ protected void propagate(B builder) public void run(B builder) throws IOException { + long seed = parseHex(Optional.ofNullable(this.seed)).orElse(new Random(System.nanoTime()).nextLong()); + SeedDefiner.setSeed(seed); + logger(); beforeAll(); Thread.setDefaultUncaughtExceptionHandler((th, e) -> { boolean isInterrupt = false; @@ -329,14 +341,12 @@ public void run(B builder) throws IOException t = t.getCause(); } if (!isInterrupt) - logger.error("Uncaught exception on {}", th, e); + logger().error("Uncaught exception on {}", th, e); if (e instanceof Error) throw (Error) e; }); propagate(builder); - - long seed = parseHex(Optional.ofNullable(this.seed)).orElse(new Random(System.nanoTime()).nextLong()); for (int i = 0 ; i < simulationCount ; ++i) { cleanup(); @@ -353,7 +363,7 @@ protected static class Run> extends Basic { protected void run(long seed, B builder) throws IOException { - logger.error("Seed 0x{}", Long.toHexString(seed)); + logger().error("Seed 0x{}", Long.toHexString(seed)); try (ClusterSimulation cluster = builder.create(seed)) { @@ -363,6 +373,7 @@ protected void run(long seed, B builder) throws IOException } catch (Throwable t) { + logger().error("Failed on seed 0x{}", Long.toHexString(seed), t); throw new SimulationException(seed, t); } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java b/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java index 5be3384eebd1..23fe5eaed65e 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java @@ -40,9 +40,10 @@ public static RuntimeException failWithOOM() public static void dumpStackTraces(Logger logger) { Map threadMap = Thread.getAllStackTraces(); - threadMap.forEach((thread, ste) -> { - logger.error("{}:\n {}", thread, Threads.prettyPrint(ste, false, " ", "\n", "")); - }); + String prefix = " "; + String delimiter = "\n" + prefix; + threadMap.forEach((thread, ste) -> + logger.error("{}:\n{}", thread, Threads.prettyPrint(ste, false, prefix, delimiter, ""))); FastThreadLocal.destroy(); } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java index 5d32ab6eaa91..1e8656dd2aca 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java @@ -119,12 +119,11 @@ public KeyspaceActions(SimulatedSystems simulated, currentRf = initialRf.clone(); membersOfQuorumDcs = serialConsistency == LOCAL_SERIAL ? all.dcs[0] : all.toArray(); ops.addAll(Arrays.asList(options.allChoices.options)); - } - public ActionPlan plan() + public ActionPlan plan(boolean joinAll) { - ActionList pre = ActionList.of(pre(createKeyspaceCql(keyspace), createTableCql)); + ActionList pre = ActionList.of(pre(createKeyspaceCql(keyspace), createTableCql, joinAll)); ActionList interleave = stream(); ActionList post = ActionList.empty(); return new ActionPlan(pre, singletonList(interleave), post); @@ -140,12 +139,13 @@ private String createKeyspaceCql(String keyspace) return createKeyspaceCql; } - private Action pre(String createKeyspaceCql, String createTableCql) + private Action pre(String createKeyspaceCql, String createTableCql, boolean joinAll) { + int[] joinPerDC = joinAll ? options.maxRf : options.initialRf; // randomise initial cluster, and return action to initialise it - for (int dc = 0 ; dc < options.initialRf.length ; ++dc) + for (int dc = 0 ; dc < joinPerDC.length ; ++dc) { - for (int i = 0 ; i < options.initialRf[dc] ; ++i) + for (int i = 0 ; i < joinPerDC[dc] ; ++i) { int join = registered.removeRandom(random, dc); joined.add(join); diff --git a/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java new file mode 100644 index 000000000000..92066c182470 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.logging; + +import accord.utils.Invariants; +import ch.qos.logback.core.PropertyDefinerBase; +import org.apache.cassandra.config.CassandraRelevantProperties; + +public class RunStartDefiner extends PropertyDefinerBase +{ + static + { + Invariants.checkState(CassandraRelevantProperties.SIMULATOR_STARTED.getString() != null); + } + + @Override + public String getPropertyValue() + { + return CassandraRelevantProperties.SIMULATOR_STARTED.getString(); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java b/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java new file mode 100644 index 000000000000..a3e11abe3c32 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.logging; + +import ch.qos.logback.core.PropertyDefinerBase; +import org.apache.cassandra.config.CassandraRelevantProperties; + +public class SeedDefiner extends PropertyDefinerBase +{ + + public static void setSeed(long seed) + { + CassandraRelevantProperties.SIMULATOR_SEED.setString("0x" + Long.toHexString(seed)); + } + + @Override + public String getPropertyValue() + { + if (CassandraRelevantProperties.SIMULATOR_SEED.getString() == null) + { + System.err.println("SeedDefiner is being called before the seed has been set, check static init order"); + CassandraRelevantProperties.SIMULATOR_SEED.setString(""); + } + return CassandraRelevantProperties.SIMULATOR_SEED.getString(); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java new file mode 100644 index 000000000000..5a528468ea39 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; +import java.util.function.LongSupplier; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.api.LogResult; +import org.apache.cassandra.distributed.impl.FileLogAction; +import org.apache.cassandra.distributed.impl.Instance; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.simulator.Action; +import org.apache.cassandra.simulator.ActionList; +import org.apache.cassandra.simulator.ActionPlan; +import org.apache.cassandra.simulator.Actions; +import org.apache.cassandra.simulator.Debug; +import org.apache.cassandra.simulator.RandomSource; +import org.apache.cassandra.simulator.RunnableActionScheduler; +import org.apache.cassandra.simulator.cluster.ClusterActions; +import org.apache.cassandra.simulator.cluster.KeyspaceActions; +import org.apache.cassandra.simulator.logging.RunStartDefiner; +import org.apache.cassandra.simulator.logging.SeedDefiner; +import org.apache.cassandra.simulator.systems.SimulatedActionTask; +import org.apache.cassandra.simulator.systems.SimulatedSystems; +import org.apache.cassandra.simulator.utils.IntRange; +import org.apache.cassandra.utils.Pair; + +import static java.util.Collections.singletonList; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.simulator.Action.Modifiers.RELIABLE; +import static org.apache.cassandra.simulator.Action.Modifiers.RELIABLE_NO_TIMEOUTS; +import static org.apache.cassandra.simulator.ActionSchedule.Mode.STREAM_LIMITED; +import static org.apache.cassandra.simulator.ActionSchedule.Mode.TIME_AND_STREAM_LIMITED; +import static org.apache.cassandra.simulator.ActionSchedule.Mode.TIME_LIMITED; + +@SuppressWarnings("unused") +abstract class AbstractPairOfSequencesPaxosSimulation extends PaxosSimulation +{ + private static final Logger logger = LoggerFactory.getLogger(AbstractPairOfSequencesPaxosSimulation.class); + + static final String KEYSPACE = "simple_paxos_simulation"; + static final String TABLE = "tbl"; + static final ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); + + final ClusterActions.Options clusterOptions; + final float readRatio; + final IntRange withinKeyConcurrency; + final int concurrency; + final IntRange simulateKeyForSeconds; + final ConsistencyLevel serialConsistency; + final Debug debug; + final AtomicInteger successfulReads = new AtomicInteger(); + final AtomicInteger successfulWrites = new AtomicInteger(); + final AtomicInteger failedReads = new AtomicInteger(); + final AtomicInteger failedWrites = new AtomicInteger(); + final long seed; + final int[] primaryKeys; + + public AbstractPairOfSequencesPaxosSimulation(SimulatedSystems simulated, + Cluster cluster, + ClusterActions.Options clusterOptions, + float readRatio, + int concurrency, IntRange simulateKeyForSeconds, IntRange withinKeyConcurrency, + ConsistencyLevel serialConsistency, RunnableActionScheduler scheduler, Debug debug, + long seed, int[] primaryKeys, + long runForNanos, LongSupplier jitter) + { + super(runForNanos < 0 ? STREAM_LIMITED : clusterOptions.topologyChangeLimit < 0 ? TIME_LIMITED : TIME_AND_STREAM_LIMITED, + simulated, cluster, scheduler, runForNanos, jitter); + this.readRatio = readRatio; + this.concurrency = concurrency; + this.simulateKeyForSeconds = simulateKeyForSeconds; + this.withinKeyConcurrency = withinKeyConcurrency; + this.serialConsistency = serialConsistency; + this.clusterOptions = clusterOptions; + this.debug = debug; + this.seed = seed; + this.primaryKeys = primaryKeys.clone(); + Arrays.sort(this.primaryKeys); + } + + protected abstract String createTableStmt(); + + protected abstract String preInsertStmt(); + + abstract boolean joinAll(); + boolean allowMultiplePartitions() { return false; } + + abstract BiFunction> actionFactory(); + + protected Action checkErrorLogs(IInvokableInstance inst) + { + DatabaseDescriptor.clientInitialization(); + return new Action("Error logs for node" + inst.config().num(), Action.Modifiers.NONE) + { + @Override + protected ActionList performSimple() + { + // can't use inst.logs as that runs in the class loader, which uses in-memory file system + String suite = new RunStartDefiner().getPropertyValue() + "-" + new SeedDefiner().getPropertyValue(); + String instanceId = "node" + inst.config().num(); + File logFile = new File(String.format("build/test/logs/simulator/%s/%s/system.log", suite, instanceId)); + FileLogAction logs = new FileLogAction(logFile); + + LogResult> errors = logs.grepForErrors(); + if (!errors.getResult().isEmpty()) + { + List> errorsSeen = new ArrayList<>(); + for (String error : errors.getResult()) + { + for (String line : error.split("\\n")) + { + line = line.trim(); + if (line.startsWith("ERROR")) continue; + if (line.startsWith("at ")) continue; + errorsSeen.add(Pair.create(line.split(":")[0], error)); + break; + } + } + Class[] expected = expectedExceptions(); + StringBuilder sb = new StringBuilder(); + for (Pair pair : errorsSeen) + { + String name = pair.left; + String exception = pair.right; + Class klass; + try + { + klass = Class.forName(name); + } + catch (ClassNotFoundException e) + { + throw new RuntimeException(e); + } + + if (!Stream.of(expected).anyMatch(e -> e.isAssignableFrom(klass))) + sb.append("Unexpected exception:\n").append(exception).append('\n'); + } + if (sb.length() > 0) + { + AssertionError error = new AssertionError("Saw errors in node" + inst.config().num() + ": " + sb); + // this stacktrace isn't helpful, can be more confusing + error.setStackTrace(new StackTraceElement[0]); + throw error; + } + } + return ActionList.empty(); + } + }; + } + + public ActionPlan plan() + { + ActionPlan plan = new KeyspaceActions(simulated, KEYSPACE, TABLE, createTableStmt(), cluster, + clusterOptions, serialConsistency, this, primaryKeys, debug).plan(joinAll()); + + plan = plan.encapsulate(ActionPlan.setUpTearDown( + ActionList.of( + cluster.stream().map(i -> simulated.run("Insert Partitions", i, executeForPrimaryKeys(preInsertStmt(), primaryKeys))), + // TODO (now): this is temporary until we have correct epoch handling + cluster.stream().map(i -> simulated.run("Create Accord Epoch", i, () -> AccordService.instance().createEpochFromConfigUnsafe())) + ), + ActionList.of( + cluster.stream().map(i -> checkErrorLogs(i)), + cluster.stream().map(i -> SimulatedActionTask.unsafeTask("Shutdown " + i.broadcastAddress(), RELIABLE, RELIABLE_NO_TIMEOUTS, simulated, i, i::shutdown)) + ) + )); + + BiFunction> factory = actionFactory(); + + List available = IntStream.range(0, primaryKeys.length).boxed().collect(Collectors.toList()); + Action stream = Actions.infiniteStream(concurrency, new Supplier() { + @Override + public Action get() + { + int[] primaryKeyIndex = consume(simulated.random, available); + long untilNanos = simulated.time.nanoTime() + SECONDS.toNanos(simulateKeyForSeconds.select(simulated.random)); + int concurrency = withinKeyConcurrency.select(simulated.random); + Supplier supplier = factory.apply(simulated, primaryKeyIndex); + // while this stream is finite, it participates in an infinite stream via its parent, so we want to permit termination while it's running + return Actions.infiniteStream(concurrency, new Supplier() + { + @Override + public Action get() + { + if (simulated.time.nanoTime() >= untilNanos) + { + IntStream.of(primaryKeyIndex).boxed().forEach(available::add); + return null; + } + return supplier.get(); + } + + @Override + public String toString() + { + return supplier.toString(); + } + }); + } + + @Override + public String toString() + { + return "Primary Key Actions"; + } + }); + + return simulated.execution.plan() + .encapsulate(plan) + .encapsulate(ActionPlan.interleave(singletonList(ActionList.of(stream)))); + } + + private int[] consume(RandomSource random, List available) + { + if (available.isEmpty()) + throw new AssertionError("available partitions are empty!"); + int numPartitions = available.size() == 1 || !allowMultiplePartitions() ? 1 : random.uniform(1, available.size()); + int[] partitions = new int[numPartitions]; + for (int counter = 0; counter < numPartitions; counter++) + { + int idx = random.uniform(0, available.size()); + int next = available.get(idx); + int last = available.get(available.size() - 1); + if (available.set(idx, last) != next) + throw new IllegalStateException("Expected to set " + last + " index " + idx + " but did not return " + next); + int removed = available.remove(available.size() - 1); + if (last != removed) + throw new IllegalStateException("Expected to remove " + last + " but removed " + removed); + + partitions[counter] = next; + } + Arrays.sort(partitions); + return partitions; + } + + IIsolatedExecutor.SerializableRunnable executeForPrimaryKeys(String cql, int[] primaryKeys) + { + return () -> { + for (int primaryKey : primaryKeys) + Instance.unsafeExecuteInternalWithResult(cql, primaryKey); + }; + } + + @Override + public TopologyChangeValidator newTopologyChangeValidator(Object id) + { + return new PaxosTopologyChangeVerifier(cluster, KEYSPACE, TABLE, id); + } + + @Override + public RepairValidator newRepairValidator(Object id) + { + return new PaxosRepairValidator(cluster, KEYSPACE, TABLE, id); + } + + @Override + public void run() + { + super.run(); + logger.warn("Writes: {} successful, {} failed", successfulWrites, failedWrites); + logger.warn("Reads: {} successful {} failed", successfulReads, failedReads); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java new file mode 100644 index 000000000000..7f7cab1110bb --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.io.IOException; + +import org.apache.cassandra.simulator.ClusterSimulation; +import org.apache.cassandra.simulator.RandomSource; +import org.apache.cassandra.simulator.utils.IntRange; +import org.apache.cassandra.simulator.utils.KindOfSequence; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.SERIAL; + +class AccordClusterSimulation extends ClusterSimulation implements AutoCloseable +{ + @SuppressWarnings("UnusedReturnValue") + static class Builder extends ClusterSimulation.Builder + { + public AccordClusterSimulation create(long seed) throws IOException + { + RandomSource random = randomSupplier.get(); + random.reset(seed); + return new AccordClusterSimulation(random, seed, uniqueNum, this); + } + + public void applyHandicaps() + { + /** + * TODO: remove after partial replication patch + * The current homekey implementation isn't compatible with the C* commands per key implementation when + * a non-replica coordinates a query. + * + * This creates a few problems. + * + * First when a non-replica coordinator chooses a home key, it chooses the end of one of it's ranges and + * adds it to the txn. This doesn't work with the C* CFK implementation, because it expects a partition + * key. This will change with the partial replication patch, so we can re-evaluate then. + * + * Second, nodes that haven't joined the ring have no ranges to pull home keys from, so they npe + */ + dcCount = new IntRange(1, 1); + nodeCount = new IntRange(3, 3); + } + } + + AccordClusterSimulation(RandomSource random, long seed, int uniqueNum, Builder builder) throws IOException + { + super(random, seed, uniqueNum, builder, + config -> {}, + (simulated, schedulers, cluster, options) -> { + int[] primaryKeys = primaryKeys(seed, builder.primaryKeyCount()); + KindOfSequence.Period jitter = RandomSource.Choices.uniform(KindOfSequence.values()).choose(random) + .period(builder.schedulerJitterNanos(), random); + return new PairOfSequencesAccordSimulation(simulated, cluster, options, + builder.readChance().select(random), builder.concurrency(), builder.primaryKeySeconds(), builder.withinKeyConcurrency(), + SERIAL, schedulers, builder.debug(), seed, + primaryKeys, builder.secondsToSimulate() >= 0 ? SECONDS.toNanos(builder.secondsToSimulate()) : -1, + () -> jitter.get(random)); + }); + } + + private static int[] primaryKeys(long seed, int count) + { + int primaryKey = (int) (seed); + int[] primaryKeys = new int[count]; + for (int i = 0 ; i < primaryKeys.length ; ++i) + primaryKeys[i] = primaryKey += 1 << 20; + return primaryKeys; + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java new file mode 100644 index 000000000000..f4bd21aaf980 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + +import io.airlift.airline.Cli; +import io.airlift.airline.Command; +import org.apache.cassandra.simulator.SimulationRunner; + +public class AccordSimulationRunner extends SimulationRunner +{ + @Command(name = "run") + public static class Run extends SimulationRunner.Run + { + public Run() {} + + @Override + protected void run(long seed, AccordClusterSimulation.Builder builder) throws IOException + { + builder.applyHandicaps(); + super.run(seed, builder); + } + } + + @Command(name = "record") + public static class Record extends SimulationRunner.Record + { + public Record() {} + } + + @Command(name = "reconcile") + public static class Reconcile extends SimulationRunner.Reconcile + { + public Reconcile() {} + } + + public static class Help extends HelpCommand {} + + // for simple unit tests so we can simply invoke main() + private static final AtomicInteger uniqueNum = new AtomicInteger(); + + /** + * See {@link org.apache.cassandra.simulator} package info for execution tips + */ + public static void main(String[] args) throws IOException + { + AccordClusterSimulation.Builder builder = new AccordClusterSimulation.Builder(); + builder.unique(uniqueNum.getAndIncrement()); + + Cli.>builder("accord") + .withCommand(Run.class) + .withCommand(Reconcile.class) + .withCommand(Record.class) + .withCommand(Help.class) + .withDefaultCommand(Help.class) + .build() + .parse(args) + .run(builder); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryChecker.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryChecker.java index d1e0771b1ecc..2465bf62cf99 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryChecker.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryChecker.java @@ -127,13 +127,23 @@ Event setById(int id, Event event) return byId[id] = event; } - void witness(Observation witness, int[] witnessSequence, int start, int end) + private static int eventId(int[] witnessSequence, int eventPosition) + { + return eventPosition == 0 ? -1 : witnessSequence[eventPosition - 1]; + } + + void witness(Observation witness, int[] witnessSequence) + { + witness(witness.id, witnessSequence, witness.start, witness.end); + } + + void witness(int id, int[] witnessSequence, int start, int end) { int eventPosition = witnessSequence.length; - int eventId = eventPosition == 0 ? -1 : witnessSequence[eventPosition - 1]; - setById(witness.id, new Event(witness.id)).log.add(new VerboseWitness(witness.id, start, end, witnessSequence)); + int eventId = eventId(witnessSequence, eventPosition); + setById(id, new Event(id)).log.add(new VerboseWitness(id, start, end, witnessSequence)); Event event = get(eventPosition, eventId); - recordWitness(event, witness, witnessSequence); + recordWitness(event, id, start, end, witnessSequence); recordVisibleBy(event, end); recordVisibleUntil(event, start); @@ -154,7 +164,7 @@ void witness(Observation witness, int[] witnessSequence, int start, int end) } else if (e.result) { - throw fail(primaryKey, "%d witnessed as absent by %d", e.eventId, witness.id); + throw fail(primaryKey, "%d witnessed as absent by %d", e.eventId, id); } } } @@ -181,16 +191,16 @@ void applied(int eventId, int start, int end, boolean success) } } - void recordWitness(Event event, Observation witness, int[] witnessSequence) + void recordWitness(Event event, int id, int start, int end, int[] witnessSequence) { - recordWitness(event, witness, witnessSequence.length, witnessSequence); + recordWitness(event, id, start, end, witnessSequence.length, witnessSequence); } - void recordWitness(Event event, Observation witness, int eventPosition, int[] witnessSequence) + void recordWitness(Event event, int id, int start, int end, int eventPosition, int[] witnessSequence) { while (true) { - event.log.add(new Witness(READ, witness.id, witness.start, witness.end)); + event.log.add(new Witness(READ, id, start, end)); if (event.witnessSequence != null) { if (!Arrays.equals(event.witnessSequence, witnessSequence)) @@ -238,7 +248,7 @@ void recordVisibleUntil(Event event, int visibleUntil) event.visibleUntil = visibleUntil; Event next = next(event); if (next != null && visibleUntil >= next.visibleBy) - throw fail(primaryKey, "%s %d not witnessed >= %d, but also witnessed <= %d", next.witnessSequence, next.eventId, event.visibleUntil, next.visibleBy); + throw fail(primaryKey, "%s+%d not witnessed >= %d, but also witnessed <= %d", next.witnessSequence, next.eventId, event.visibleUntil, next.visibleBy); } } @@ -295,7 +305,7 @@ Event prev(Event event) return null; // initialise the event, if necessary importing information from byId - return get(eventPosition, eventPosition == 0 ? -1 : event.witnessSequence[eventPosition - 1]); + return get(eventPosition, eventId(event.witnessSequence, eventPosition)); } Event next(Event event) diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryValidator.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryValidator.java new file mode 100644 index 000000000000..282b16d3b1af --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/HistoryValidator.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import javax.annotation.Nullable; + +public interface HistoryValidator +{ + Checker witness(int start, int end); + + void print(@Nullable Integer pk); + + interface Checker extends AutoCloseable + { + void read(int pk, int id, int count, int[] seq); + void write(int pk, int id, boolean success); + + default void writeSuccess(int pk, int id) + { + write(pk, id, true); + } + + default void writeUnknownFailure(int pk, int id) + { + write(pk, id, false); + } + + @Override + default void close() {} + } + + interface Factory + { + HistoryValidator create(int[] partitions); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/LinearizabilityValidator.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/LinearizabilityValidator.java new file mode 100644 index 000000000000..67c95a7378fe --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/LinearizabilityValidator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.util.function.Consumer; +import javax.annotation.Nullable; + +import com.carrotsearch.hppc.IntObjectHashMap; +import com.carrotsearch.hppc.IntObjectMap; +import com.carrotsearch.hppc.cursors.ObjectCursor; + +public class LinearizabilityValidator implements HistoryValidator +{ + private final IntObjectMap historyCheckers; + + public LinearizabilityValidator(int[] primaryKeys) + { + historyCheckers = new IntObjectHashMap<>(primaryKeys.length); + for (int primaryKey : primaryKeys) + historyCheckers.put(primaryKey, new HistoryChecker(primaryKey)); + } + + @Override + public Checker witness(int start, int end) + { + return new Checker() + { + @Override + public void read(int pk, int id, int count, int[] seq) + { + get(pk).witness(id, seq, start, end); + } + + @Override + public void write(int pk, int id, boolean success) + { + get(pk).applied(id, start, end, success); + } + }; + } + + @Override + public void print(@Nullable Integer pk) + { + if (pk == null) historyCheckers.values().forEach((Consumer>) c -> c.value.print()); + else historyCheckers.get(pk).print(); + } + + private HistoryChecker get(int pk) + { + HistoryChecker checker = historyCheckers.get(pk); + if (checker == null) + throw new NullPointerException("Unable to find checker for pk=" + pk); + return checker; + } + + public static class Factory implements HistoryValidator.Factory + { + public static final Factory instance = new Factory(); + + @Override + public HistoryValidator create(int[] partitions) + { + return new LinearizabilityValidator(partitions); + } + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/LoggingHistoryValidator.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/LoggingHistoryValidator.java new file mode 100644 index 000000000000..b39c3111ea66 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/LoggingHistoryValidator.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.util.Arrays; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LoggingHistoryValidator implements HistoryValidator +{ + private static final Logger logger = LoggerFactory.getLogger(LoggingHistoryValidator.class); + private final HistoryValidator delegate; + + public LoggingHistoryValidator(HistoryValidator delegate) + { + this.delegate = delegate; + } + + @Override + public Checker witness(int start, int end) + { + StringBuilder sb = new StringBuilder(); + sb.append("Witness(start=").append(start).append(", end=").append(end).append(")\n"); + Checker sub = delegate.witness(start, end); + return new Checker() + { + @Override + public void read(int pk, int id, int count, int[] seq) + { + sb.append("\tread(pk=").append(pk).append(", id=").append(id).append(", count=").append(count).append(", seq=").append(Arrays.toString(seq)).append(")\n"); + sub.read(pk, id, count, seq); + } + + @Override + public void write(int pk, int id, boolean success) + { + sb.append("\twrite(pk=").append(pk).append(", id=").append(id).append(", success=").append(success).append(")\n"); + sub.write(pk, id, success); + } + + @Override + public void close() + { + logger.info(sb.toString()); + sub.close(); + } + }; + } + + @Override + public void print(@Nullable Integer pk) + { + delegate.print(pk); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/Observation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/Observation.java index 546fd3179fc3..41eb2c348dcc 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/Observation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/Observation.java @@ -18,14 +18,16 @@ package org.apache.cassandra.simulator.paxos; +import org.apache.cassandra.distributed.api.SimpleQueryResult; + class Observation implements Comparable { final int id; - final Object[][] result; + final SimpleQueryResult result; final int start; final int end; - Observation(int id, Object[][] result, int start, int end) + Observation(int id, SimpleQueryResult result, int start, int end) { this.id = id; this.result = result; @@ -33,6 +35,16 @@ class Observation implements Comparable this.end = end; } + boolean isSuccess() + { + return result != null; + } + + boolean isUnknownFailure() + { + return result == null; + } + // computes a PARTIAL ORDER on when the outcome occurred, i.e. for many pair-wise comparisons the answer is 0 public int compareTo(Observation that) { diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java new file mode 100644 index 000000000000..fc929a9460a6 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; +import java.util.function.LongSupplier; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.cursors.IntCursor; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.QueryResults; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.impl.Query; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.simulator.Action; +import org.apache.cassandra.simulator.Debug; +import org.apache.cassandra.simulator.RunnableActionScheduler; +import org.apache.cassandra.simulator.cluster.ClusterActions; +import org.apache.cassandra.simulator.systems.SimulatedSystems; +import org.apache.cassandra.simulator.utils.IntRange; + +import static org.apache.cassandra.simulator.paxos.HistoryChecker.fail; + +// TODO: the class hierarchy is a bit broken, but hard to untangle. Need to go Paxos->Consensus, probably. +@SuppressWarnings("unused") +public class PairOfSequencesAccordSimulation extends AbstractPairOfSequencesPaxosSimulation +{ + private static final Logger logger = LoggerFactory.getLogger(PairOfSequencesAccordSimulation.class); + private static final String SELECT = "SELECT pk, count, seq FROM " + KEYSPACE + ".tbl WHERE pk IN (%s);"; + private static final String UPDATE = "UPDATE " + KEYSPACE + ".tbl SET count += 1, seq = seq + ? WHERE pk = ?;"; + + private static void append(TableMetadata metadata, ByteBuffer[] keyComponents, Row row, QueryResults.Builder builder, String[] columnNames) + { + Object[] buffer = new Object[columnNames.length]; + Clustering clustering = row.clustering(); + int idx = 0; + for (String columnName : columnNames) + { + ColumnMetadata column = metadata.getColumn(new ColumnIdentifier(columnName, true)); + switch (column.kind) + { + case PARTITION_KEY: + buffer[idx++] = column.type.compose(keyComponents[column.position()]); + break; + case CLUSTERING: + buffer[idx++] = column.type.compose(clustering.bufferAt(column.position())); + break; + case REGULAR: + { + if (column.isComplex()) + { + ComplexColumnData data = row.getComplexColumnData(column); + if (data == null) + { + buffer[idx++] = new ArrayList<>(); + } + else + { + List result = new ArrayList<>(data.cellsCount()); + for (Cell cell : data) + result.add(column.cellValueType().compose(cell.buffer())); + buffer[idx++] = result; + } + } + else + { + //TODO deletes + buffer[idx++] = column.type.compose(row.getCell(column).buffer()); + } + } + break; +// case STATIC: + default: + throw new IllegalArgumentException("Unsupported kind: " + column.kind); + } + } + builder.row(buffer); + } + + @Override + void log(@Nullable Integer pk) + { + validator.print(pk); + } + + private final float writeRatio; + private final HistoryValidator validator; + + public PairOfSequencesAccordSimulation(SimulatedSystems simulated, + Cluster cluster, + ClusterActions.Options clusterOptions, + float readRatio, + int concurrency, IntRange simulateKeyForSeconds, IntRange withinKeyConcurrency, + ConsistencyLevel serialConsistency, RunnableActionScheduler scheduler, Debug debug, + long seed, int[] primaryKeys, + long runForNanos, LongSupplier jitter) + { + super(simulated, cluster, clusterOptions, + readRatio, concurrency, simulateKeyForSeconds, withinKeyConcurrency, + serialConsistency, + scheduler, debug, + seed, primaryKeys, + runForNanos, jitter); + this.writeRatio = 1F - readRatio; + validator = new LoggingHistoryValidator(new StrictSerializabilityValidator(primaryKeys)); + } + + @Override + protected String createTableStmt() + { + return "CREATE TABLE " + KEYSPACE + ".tbl (pk int, count int, seq text, PRIMARY KEY (pk))"; + } + + @Override + protected String preInsertStmt() + { + return "INSERT INTO " + KEYSPACE + ".tbl (pk, count, seq) VALUES (?, 0, '') USING TIMESTAMP 0"; + } + + @Override + boolean allowMultiplePartitions() { return true; } + + @Override + BiFunction> actionFactory() + { + AtomicInteger id = new AtomicInteger(0); + + return (simulated, primaryKeyIndex) -> { + int[] primaryKeys = IntStream.of(primaryKeyIndex).map(i -> this.primaryKeys[i]).toArray(); + return () -> accordAction(id.getAndIncrement(), simulated, primaryKeys); + }; + } + + public class ReadWriteOperation extends Operation + { + private final IntHashSet reads, writes; + + public ReadWriteOperation(int id, int[] primaryKeys, IntHashSet reads, IntHashSet writes, IInvokableInstance instance) + { + super(primaryKeys, id, instance, "Accord", createQuery(id, reads, writes)); + this.reads = reads; + this.writes = writes; + } + + @Override + void verify(Observation outcome) + { + SimpleQueryResult result = outcome.result; + (result != null ? successfulWrites : failedWrites).incrementAndGet(); + if (result != null) + { + IntHashSet seen = new IntHashSet(); + //TODO if there isn't a value then we get empty read, which then doesn't make it into the QueryResult + // given the fact that we always run with the partitions defined this should be fine + try (HistoryValidator.Checker checker = validator.witness(outcome.start, outcome.end)) + { + while (result.hasNext()) + { + org.apache.cassandra.distributed.api.Row row = result.next(); + + int pk = row.getInteger("pk"); + int count = row.getInteger("count", 0); + int[] seq = Arrays.stream(row.getString("seq", "").split(",")) + .filter(s -> !s.isEmpty()) + .mapToInt(Integer::parseInt) + .toArray(); + + if (!seen.add(pk)) + throw new IllegalStateException("Duplicate partition key " + pk); + // every partition was read, but not all were written to... need to verify each partition + if (seq.length != count) + throw fail(pk, "%d != #%s", count, seq); + + checker.read(pk, outcome.id, count, seq); + } + if (!seen.equals(reads)) + throw fail(0, "#result had %s partitions, but should have had %s", seen, reads); + // handle writes + for (IntCursor c : writes) + checker.write(c.value, outcome.id, outcome.isSuccess()); + } + } + } + } + + private Action accordAction(int id, SimulatedSystems simulated, int[] partitions) + { + IntArrayList reads = new IntArrayList(); + IntArrayList writes = new IntArrayList(); + for (int partition : partitions) + { + boolean added = false; + if (simulated.random.decide(readRatio)) + { + reads.add(partition); + added = true; + } + if (simulated.random.decide(writeRatio)) + { + writes.add(partition); + added = true; + } + if (!added) + { + // when read ratio fails that implies write + // when write ratio fails that implies read + // so make that case a read/write + // Its possible that both cases were true leading to a read/write; which is fine + // this just makes sure every partition is consumed. + reads.add(partition); + writes.add(partition); + } + } + + int node = simulated.random.uniform(1, cluster.size() + 1); + IInvokableInstance instance = cluster.get(node); + return new ReadWriteOperation(id, partitions, new IntHashSet(reads), new IntHashSet(writes), instance); + } + + private int[] genReadOnly(SimulatedSystems simulated, int[] partitions) + { + IntArrayList readOnly = new IntArrayList(); + for (int partition : partitions) + { + if (simulated.random.decide(readRatio)) + readOnly.add(partition); + } + return readOnly.toArray(); + } + + private static Query createQuery(int id, IntHashSet reads, IntHashSet writes) + { + if (reads.isEmpty() && writes.isEmpty()) + throw new IllegalArgumentException("Partitions are empty"); + List binds = new ArrayList<>(); + StringBuilder sb = new StringBuilder(); + sb.append("BEGIN TRANSACTION\n"); + if (!reads.isEmpty()) + { + + sb.append("\t") + .append(String.format(SELECT, String.join(", ", IntStream.of(reads.toArray()) + .mapToObj(i -> { + binds.add(i); + return "?"; + }) + .collect(Collectors.joining(", "))))) + .append('\n'); + } + + for (IntCursor c : writes) + { + sb.append('\t').append(UPDATE).append("\n"); + binds.add(id + ","); + binds.add(c.value); + } + + sb.append("COMMIT TRANSACTION"); + return new Query(sb.toString(), 0, ConsistencyLevel.ANY, ConsistencyLevel.ANY, binds.toArray(new Object[0])); + } + + @Override + boolean joinAll() + { + return true; + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java index 77eefb337bda..b07b4a86cd89 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java @@ -21,34 +21,26 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; import java.util.function.LongSupplier; import java.util.function.Supplier; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - import javax.annotation.Nullable; +import org.apache.commons.lang3.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.db.marshal.Int32Type; -import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; -import org.apache.cassandra.distributed.api.IIsolatedExecutor; -import org.apache.cassandra.distributed.impl.Instance; +import org.apache.cassandra.distributed.api.Row; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.impl.Query; import org.apache.cassandra.simulator.Action; -import org.apache.cassandra.simulator.ActionList; import org.apache.cassandra.simulator.ActionListener; -import org.apache.cassandra.simulator.ActionPlan; +import org.apache.cassandra.simulator.Debug; import org.apache.cassandra.simulator.RunnableActionScheduler; -import org.apache.cassandra.simulator.Actions; import org.apache.cassandra.simulator.cluster.ClusterActions; -import org.apache.cassandra.simulator.Debug; -import org.apache.cassandra.simulator.cluster.KeyspaceActions; -import org.apache.cassandra.simulator.systems.SimulatedActionTask; import org.apache.cassandra.simulator.systems.SimulatedSystems; import org.apache.cassandra.simulator.utils.IntRange; import org.apache.cassandra.utils.ByteBufferUtil; @@ -56,31 +48,18 @@ import static java.lang.Boolean.TRUE; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; -import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.distributed.api.ConsistencyLevel.ANY; -import static org.apache.cassandra.simulator.Action.Modifiers.RELIABLE; -import static org.apache.cassandra.simulator.Action.Modifiers.RELIABLE_NO_TIMEOUTS; -import static org.apache.cassandra.simulator.ActionSchedule.Mode.STREAM_LIMITED; -import static org.apache.cassandra.simulator.ActionSchedule.Mode.TIME_AND_STREAM_LIMITED; -import static org.apache.cassandra.simulator.ActionSchedule.Mode.TIME_LIMITED; import static org.apache.cassandra.simulator.Debug.EventType.PARTITION; import static org.apache.cassandra.simulator.paxos.HistoryChecker.fail; @SuppressWarnings("unused") -public class PairOfSequencesPaxosSimulation extends PaxosSimulation +public class PairOfSequencesPaxosSimulation extends AbstractPairOfSequencesPaxosSimulation { private static final Logger logger = LoggerFactory.getLogger(PairOfSequencesPaxosSimulation.class); - - private static final String KEYSPACE = "simple_paxos_simulation"; - private static final String TABLE = "tbl"; - private static final String CREATE_TABLE = "CREATE TABLE " + KEYSPACE + ".tbl (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))"; - private static final String INSERT = "INSERT INTO " + KEYSPACE + ".tbl (pk, count, seq1, seq2) VALUES (?, 0, '', []) IF NOT EXISTS"; - private static final String INSERT1 = "INSERT INTO " + KEYSPACE + ".tbl (pk, count, seq1, seq2) VALUES (?, 0, '', []) USING TIMESTAMP 0"; private static final String UPDATE = "UPDATE " + KEYSPACE + ".tbl SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS"; private static final String SELECT = "SELECT pk, count, seq1, seq2 FROM " + KEYSPACE + ".tbl WHERE pk = ?"; - private static final ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); - class VerifyingOperation extends Operation + class VerifyingOperation extends PaxosOperation { final HistoryChecker historyChecker; public VerifyingOperation(int id, IInvokableInstance instance, ConsistencyLevel consistencyLevel, int primaryKey, HistoryChecker historyChecker) @@ -91,23 +70,26 @@ public VerifyingOperation(int id, IInvokableInstance instance, ConsistencyLevel void verify(Observation outcome) { - (outcome.result != null ? successfulReads : failedReads).incrementAndGet(); + SimpleQueryResult result = outcome.result; + (result != null ? successfulReads : failedReads).incrementAndGet(); - if (outcome.result == null) + if (result == null) return; - if (outcome.result.length != 1) - throw fail(primaryKey, "#result (%s) != 1", Arrays.toString(outcome.result)); + if (!result.hasNext()) + throw fail(primaryKey, "#result: ([]) != 1"); + + // pk, count, seq1, seq2 + Row row = result.next(); - Object[] row = outcome.result[0]; // first verify internally consistent - int count = row[1] == null ? 0 : (Integer) row[1]; - int[] seq1 = Arrays.stream((row[2] == null ? "" : (String) row[2]).split(",")) + int count = row.getInteger("count", 0); + int[] seq1 = Arrays.stream(row.getString("seq1", "").split(",")) .filter(s -> !s.isEmpty()) .mapToInt(Integer::parseInt) .toArray(); - int[] seq2 = ((List) (row[3] == null ? emptyList() : row[3])) - .stream().mapToInt(x -> x).toArray(); + + int[] seq2 = row.getList("seq2", emptyList()).stream().mapToInt(x -> x).toArray(); if (!Arrays.equals(seq1, seq2)) throw fail(primaryKey, "%s != %s", seq1, seq2); @@ -115,11 +97,24 @@ void verify(Observation outcome) if (seq1.length != count) throw fail(primaryKey, "%d != #%s", count, seq1); - historyChecker.witness(outcome, seq1, outcome.start, outcome.end); + if (result.hasNext()) + throw fail(primaryKey, "#result (%s) != 1", ArrayUtils.toString(result.toObjectArrays())); + + historyChecker.witness(outcome, seq1); } } - class NonVerifyingOperation extends Operation + private abstract class PaxosOperation extends Operation + { + final int primaryKey; + PaxosOperation(int primaryKey, int id, IInvokableInstance instance, String idString, String query, ConsistencyLevel commitConsistency, ConsistencyLevel serialConsistency, Object... params) + { + super(new int[] {primaryKey}, id, instance, idString, new Query(query, -1, commitConsistency, serialConsistency, params)); + this.primaryKey = primaryKey; + } + } + + class NonVerifyingOperation extends PaxosOperation { public NonVerifyingOperation(int id, IInvokableInstance instance, ConsistencyLevel consistencyLevel, int primaryKey, HistoryChecker historyChecker) { @@ -131,7 +126,7 @@ void verify(Observation outcome) } } - public class ModifyingOperation extends Operation + public class ModifyingOperation extends PaxosOperation { final HistoryChecker historyChecker; public ModifyingOperation(int id, IInvokableInstance instance, ConsistencyLevel commitConsistency, ConsistencyLevel serialConsistency, int primaryKey, HistoryChecker historyChecker) @@ -142,32 +137,22 @@ public ModifyingOperation(int id, IInvokableInstance instance, ConsistencyLevel void verify(Observation outcome) { - (outcome.result != null ? successfulWrites : failedWrites).incrementAndGet(); - if (outcome.result != null) + SimpleQueryResult result = outcome.result; + (result != null ? successfulWrites : failedWrites).incrementAndGet(); + if (result != null) { - if (outcome.result.length != 1) - throw fail(primaryKey, "Result: 1 != #%s", Arrays.toString(outcome.result)); - if (outcome.result[0][0] != TRUE) + if (!result.hasNext()) + throw fail(primaryKey, "Paxos Result: 1 != #[]"); + if (result.next().getBoolean(0) != TRUE) throw fail(primaryKey, "Result != TRUE"); + if (result.hasNext()) + throw fail(primaryKey, "Paxos Result: 1 != #%s", ArrayUtils.toString(result.toObjectArrays())); } - historyChecker.applied(outcome.id, outcome.start, outcome.end, outcome.result != null); + historyChecker.applied(outcome.id, outcome.start, outcome.end, outcome.isSuccess()); } } - final ClusterActions.Options clusterOptions; - final float readRatio; - final IntRange withinKeyConcurrency; - final int concurrency; - final IntRange simulateKeyForSeconds; - final ConsistencyLevel serialConsistency; - final Debug debug; final List historyCheckers = new ArrayList<>(); - final AtomicInteger successfulReads = new AtomicInteger(); - final AtomicInteger successfulWrites = new AtomicInteger(); - final AtomicInteger failedReads = new AtomicInteger(); - final AtomicInteger failedWrites = new AtomicInteger(); - final long seed; - final int[] primaryKeys; public PairOfSequencesPaxosSimulation(SimulatedSystems simulated, Cluster cluster, @@ -178,34 +163,17 @@ public PairOfSequencesPaxosSimulation(SimulatedSystems simulated, long seed, int[] primaryKeys, long runForNanos, LongSupplier jitter) { - super(runForNanos < 0 ? STREAM_LIMITED : clusterOptions.topologyChangeLimit < 0 ? TIME_LIMITED : TIME_AND_STREAM_LIMITED, - simulated, cluster, scheduler, runForNanos, jitter); - this.readRatio = readRatio; - this.concurrency = concurrency; - this.simulateKeyForSeconds = simulateKeyForSeconds; - this.withinKeyConcurrency = withinKeyConcurrency; - this.serialConsistency = serialConsistency; - this.clusterOptions = clusterOptions; - this.debug = debug; - this.seed = seed; - this.primaryKeys = primaryKeys.clone(); - Arrays.sort(this.primaryKeys); + super(simulated, cluster, clusterOptions, + readRatio, concurrency, simulateKeyForSeconds, withinKeyConcurrency, + serialConsistency, + scheduler, debug, + seed, primaryKeys, + runForNanos, jitter); } - public ActionPlan plan() + @Override + BiFunction> actionFactory() { - ActionPlan plan = new KeyspaceActions(simulated, KEYSPACE, TABLE, CREATE_TABLE, cluster, - clusterOptions, serialConsistency, this, primaryKeys, debug).plan(); - - plan = plan.encapsulate(ActionPlan.setUpTearDown( - ActionList.of( - cluster.stream().map(i -> simulated.run("Insert Partitions", i, executeForPrimaryKeys(INSERT1, primaryKeys))) - ), - ActionList.of( - cluster.stream().map(i -> SimulatedActionTask.unsafeTask("Shutdown " + i.broadcastAddress(), RELIABLE, RELIABLE_NO_TIMEOUTS, simulated, i, i::shutdown)) - ) - )); - final int nodes = cluster.size(); for (int primaryKey : primaryKeys) historyCheckers.add(new HistoryChecker(primaryKey)); @@ -231,12 +199,12 @@ public Action get() if (simulated.snitch.dcOf(node) > 0) { // perform some queries against these nodes but don't expect them to be linearizable - return new NonVerifyingOperation(i++, instance, serialConsistency, primaryKey, historyChecker); + return nonVerifying(i++, instance, primaryKey, historyChecker); } case SERIAL: return simulated.random.decide(readRatio) - ? new VerifyingOperation(i++, instance, serialConsistency, primaryKey, historyChecker) - : new ModifyingOperation(i++, instance, ANY, serialConsistency, primaryKey, historyChecker); + ? verifying(i++, instance, primaryKey, historyChecker) + : modifying(i++, instance, primaryKey, historyChecker); } } @@ -271,71 +239,41 @@ public String toString() primaryKeyActions.add(supplier); } + return (ignore, primaryKeyIndex) -> primaryKeyActions.get(only(primaryKeyIndex)); + } - List available = IntStream.range(0, primaryKeys.length).boxed().collect(Collectors.toList()); - Action stream = Actions.infiniteStream(concurrency, new Supplier() { - @Override - public Action get() - { - int i = simulated.random.uniform(0, available.size()); - int next = available.get(i); - available.set(i, available.get(available.size() - 1)); - available.remove(available.size() - 1); - long untilNanos = simulated.time.nanoTime() + SECONDS.toNanos(simulateKeyForSeconds.select(simulated.random)); - int concurrency = withinKeyConcurrency.select(simulated.random); - Supplier supplier = primaryKeyActions.get(next); - // while this stream is finite, it participates in an infinite stream via its parent, so we want to permit termination while it's running - return Actions.infiniteStream(concurrency, new Supplier() - { - @Override - public Action get() - { - if (simulated.time.nanoTime() >= untilNanos) - { - available.add(next); - return null; - } - return supplier.get(); - } - - @Override - public String toString() - { - return supplier.toString(); - } - }); - } + private static int only(int[] array) + { + if (array.length != 1) + throw new AssertionError("Require only 1 element but found array " + Arrays.toString(array)); + return array[0]; + } - @Override - public String toString() - { - return "Primary Key Actions"; - } - }); + @Override + protected String createTableStmt() + { + return "CREATE TABLE " + KEYSPACE + ".tbl (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))"; + } - return simulated.execution.plan() - .encapsulate(plan) - .encapsulate(ActionPlan.interleave(singletonList(ActionList.of(stream)))); + @Override + protected String preInsertStmt() + { + return "INSERT INTO " + KEYSPACE + ".tbl (pk, count, seq1, seq2) VALUES (?, 0, '', []) USING TIMESTAMP 0"; } - private IIsolatedExecutor.SerializableRunnable executeForPrimaryKeys(String cql, int[] primaryKeys) + private Operation verifying(int operationId, IInvokableInstance instance, int primaryKey, HistoryChecker historyChecker) { - return () -> { - for (int primaryKey : primaryKeys) - Instance.unsafeExecuteInternalWithResult(cql, primaryKey); - }; + return new VerifyingOperation(operationId, instance, serialConsistency, primaryKey, historyChecker); } - @Override - public TopologyChangeValidator newTopologyChangeValidator(Object id) + private Operation nonVerifying(int operationId, IInvokableInstance instance, int primaryKey, HistoryChecker historyChecker) { - return new PaxosTopologyChangeVerifier(cluster, KEYSPACE, TABLE, id); + return new NonVerifyingOperation(operationId, instance, serialConsistency, primaryKey, historyChecker); } - @Override - public RepairValidator newRepairValidator(Object id) + private Operation modifying(int operationId, IInvokableInstance instance, int primaryKey, HistoryChecker historyChecker) { - return new PaxosRepairValidator(cluster, KEYSPACE, TABLE, id); + return new ModifyingOperation(operationId, instance, ANY, serialConsistency, primaryKey, historyChecker); } @Override @@ -346,10 +284,8 @@ void log(@Nullable Integer primaryKey) } @Override - public void run() + boolean joinAll() { - super.run(); - logger.warn("Writes: {} successful, {} failed", successfulWrites, failedWrites); - logger.warn("Reads: {} successful {} failed", successfulReads, failedReads); + return false; } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java index a6fbc444651b..981c96576ec2 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java @@ -18,6 +18,7 @@ package org.apache.cassandra.simulator.paxos; +import java.util.Arrays; import java.util.Map; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; @@ -26,6 +27,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.LongSupplier; +import java.util.stream.Stream; import javax.annotation.Nullable; import com.google.common.base.Throwables; @@ -35,8 +37,9 @@ import org.apache.cassandra.concurrent.ExecutorFactory; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.simulator.ActionList; @@ -46,8 +49,9 @@ import org.apache.cassandra.simulator.Simulation; import org.apache.cassandra.simulator.cluster.ClusterActionListener; import org.apache.cassandra.simulator.systems.InterceptorOfGlobalMethods; -import org.apache.cassandra.simulator.systems.SimulatedQuery; +import org.apache.cassandra.simulator.systems.SimulatedActionCallable; import org.apache.cassandra.simulator.systems.SimulatedSystems; +import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.concurrent.Threads; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -57,22 +61,34 @@ import static org.apache.cassandra.simulator.Action.Modifiers.NONE; import static org.apache.cassandra.simulator.SimulatorUtils.failWithOOM; import static org.apache.cassandra.simulator.paxos.HistoryChecker.causedBy; +import static org.apache.cassandra.utils.AssertionUtils.anyOf; +import static org.apache.cassandra.utils.AssertionUtils.hasCause; public abstract class PaxosSimulation implements Simulation, ClusterActionListener { private static final Logger logger = LoggerFactory.getLogger(PaxosSimulation.class); - abstract class Operation extends SimulatedQuery implements BiConsumer + private static String createDescription(int[] primaryKeys, int id, String idString) { - final int primaryKey; + return primaryKeys.length == 1 ? Integer.toString(primaryKeys[0]) : Arrays.toString(primaryKeys) + "/" + id + ": " + idString; + } + + protected Class[] expectedExceptions() + { + return (Class[]) new Class[] { RequestExecutionException.class }; + } + + abstract class Operation extends SimulatedActionCallable implements BiConsumer + { + final int[] primaryKeys; final int id; int start; - public Operation(int primaryKey, int id, IInvokableInstance instance, - String idString, String query, ConsistencyLevel commitConsistency, ConsistencyLevel serialConistency, Object... params) + public Operation(int[] primaryKeys, int id, IInvokableInstance instance, + String idString, IIsolatedExecutor.SerializableCallable query) { - super(primaryKey + "/" + id + ": " + idString, DISPLAY_ORIGIN, NONE, PaxosSimulation.this.simulated, instance, query, commitConsistency, serialConistency, params); - this.primaryKey = primaryKey; + super(createDescription(primaryKeys, id, idString), DISPLAY_ORIGIN, NONE, PaxosSimulation.this.simulated, instance, query); + this.primaryKeys = primaryKeys; this.id = id; } @@ -83,9 +99,9 @@ public ActionList performAndRegister() } @Override - public void accept(Object[][] success, Throwable failure) + public void accept(SimpleQueryResult success, Throwable failure) { - if (failure != null && !(failure instanceof RequestExecutionException)) + if (failure != null && !expectedException(failure)) { if (!simulated.failures.hasFailure() || !(failure instanceof UncheckedInterruptedException)) logger.error("Unexpected exception", failure); @@ -96,10 +112,14 @@ else if (failure != null) { logger.trace("{}", failure.getMessage()); } - verify(new Observation(id, success, start, logicalClock.incrementAndGet())); } + protected boolean expectedException(Throwable failure) + { + // due to class loaders can't use instanceOf directly + return hasCause(anyOf(Stream.of(expectedExceptions()).map(AssertionUtils::isThrowableInstanceof))).matches(failure); + } abstract void verify(Observation outcome); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java new file mode 100644 index 000000000000..c50a1b442852 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import javax.annotation.Nullable; + +import accord.verify.StrictSerializabilityVerifier; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntIntMap; + +public class StrictSerializabilityValidator implements HistoryValidator +{ + private final StrictSerializabilityVerifier verifier; + private final IntIntMap pkToIndex; + private final int[] indexToPk; + + public StrictSerializabilityValidator(int[] primaryKeys) + { + this.verifier = new StrictSerializabilityVerifier(primaryKeys.length); + pkToIndex = new IntIntHashMap(primaryKeys.length); + indexToPk = new int[primaryKeys.length]; + for (int i = 0; i < primaryKeys.length; i++) + { + pkToIndex.put(primaryKeys[i], i); + indexToPk[i] = primaryKeys[i]; + } + } + + @Override + public Checker witness(int start, int end) + { + verifier.begin(); + return new Checker() + { + @Override + public void read(int pk, int id, int count, int[] seq) + { + verifier.witnessRead(get(pk), seq); + } + + @Override + public void write(int pk, int id, boolean success) + { + verifier.witnessWrite(get(pk), id); + } + + @Override + public void close() + { + convertHistoryViolation(() -> verifier.apply(start, end)); + } + }; + } + + @Override + public void print(@Nullable Integer pk) + { + if (pk == null) verifier.print(); + else verifier.print(get(pk)); + } + + private int get(int pk) + { + if (pkToIndex.containsKey(pk)) + return pkToIndex.get(pk); + throw new IllegalArgumentException("Unknown pk=" + pk); + } + + private void convertHistoryViolation(Runnable fn) + { + try + { + fn.run(); + } + catch (accord.verify.HistoryViolation e) + { + if (!(e.primaryKey() >= 0 && e.primaryKey() < indexToPk.length)) throw new IllegalArgumentException("Unable to find primary key by index " + e.primaryKey()); + int pk = indexToPk[e.primaryKey()]; + HistoryViolation v = new HistoryViolation(pk, e.getMessage()); + v.setStackTrace(e.getStackTrace()); + throw v; + } + } + + public static class Factory implements HistoryValidator.Factory + { + public static final Factory instance = new Factory(); + + @Override + public HistoryValidator create(int[] partitions) + { + return new StrictSerializabilityValidator(partitions); + } + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java index 34c0f6bacc26..40582706d97e 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java @@ -24,9 +24,6 @@ import javax.annotation.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.cassandra.simulator.RandomSource; import org.apache.cassandra.simulator.systems.InterceptedWait.CaptureSites.Capture; import org.apache.cassandra.simulator.systems.InterceptedWait.InterceptedConditionWait; @@ -45,7 +42,6 @@ @PerClassLoader public class InterceptingGlobalMethods extends InterceptingMonitors implements InterceptorOfGlobalMethods { - private static final Logger logger = LoggerFactory.getLogger(InterceptingGlobalMethods.class); private static final boolean isDeterminismCheckStrict = TEST_SIMULATOR_DETERMINISM_CHECK.convert(name -> name.equals("strict")); private final @Nullable LongConsumer onThreadLocalRandomCheck; @@ -116,7 +112,7 @@ public InterceptibleThread ifIntercepted() if (!disabled) { - logger.error("Caught a non-intercepted thread! " + thread, new RuntimeException()); + logger().error("Caught a non-intercepted thread! " + thread, new RuntimeException()); throw failWithOOM(); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java index 3aabd18e5e29..489ff15b253e 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java @@ -55,9 +55,21 @@ @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") public abstract class InterceptingMonitors implements InterceptorOfGlobalMethods, Closeable { - private static final Logger logger = LoggerFactory.getLogger(InterceptingMonitors.class); private static final boolean DEBUG_MONITOR_STATE = TEST_SIMULATOR_DEBUG.getBoolean(); + // eagerly initializing the logger prevents the dtest instance variables + // from being setup correctly, which causes all nodes to log as
+ private static class LoggerHandle + { + private static final Logger logger = LoggerFactory.getLogger(InterceptingMonitors.class); + } + + protected static Logger logger() + { + return LoggerHandle.logger; + } + + static class MonitorState { InterceptedMonitorWait waitingOnNotify; @@ -825,7 +837,7 @@ private void checkForDeadlock(Thread waiting, Thread blockedBy) return; // not really waiting, just hasn't woken up yet if (next == waiting) { - logger.error("Deadlock between {}{} and {}{}", waiting, Threads.prettyPrintStackTrace(waiting, true, ";"), cur, Threads.prettyPrintStackTrace(cur, true, ";")); + logger().error("Deadlock between {}{} and {}{}", waiting, Threads.prettyPrintStackTrace(waiting, true, ";"), cur, Threads.prettyPrintStackTrace(cur, true, ";")); throw failWithOOM(); } cur = next; diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptorOfGlobalMethods.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptorOfGlobalMethods.java index adb8183bffc6..eca4ebb2b436 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptorOfGlobalMethods.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptorOfGlobalMethods.java @@ -25,6 +25,9 @@ import java.util.function.LongConsumer; import java.util.function.ToIntFunction; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import net.openhft.chronicle.core.util.WeakIdentityHashMap; import org.apache.cassandra.simulator.systems.InterceptedWait.CaptureSites; import org.apache.cassandra.utils.Clock; @@ -377,6 +380,16 @@ public void close() @SuppressWarnings("unused") public static class Global { + private static class LoggerHandle + { + private static final Logger logger = LoggerFactory.getLogger(Global.class); + } + + private static Logger logger() + { + return LoggerHandle.logger; + } + private static InterceptorOfGlobalMethods methods; public static WaitQueue newWaitQueue() @@ -426,8 +439,7 @@ public static InterceptibleThread ifIntercepted() public static void uncaughtException(Thread thread, Throwable throwable) { - System.err.println(thread); - throwable.printStackTrace(System.err); + logger().error("Exception in thread {}", thread, throwable); methods.uncaughtException(thread, throwable); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java index 0cf60dfb5e61..a1d19b63751d 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java @@ -242,7 +242,8 @@ private ActionList simulate(Runnable simulate) } catch (Throwable t) { - consequences.forEach(Action::invalidate); + if (consequences != null) + consequences.forEach(Action::invalidate); throw t; } diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedQuery.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedQuery.java index d190fd7a1530..106cd8c027cf 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedQuery.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedQuery.java @@ -20,9 +20,10 @@ import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.impl.Query; -public class SimulatedQuery extends SimulatedActionCallable +public class SimulatedQuery extends SimulatedActionCallable { public SimulatedQuery(Object description, SimulatedSystems simulated, IInvokableInstance instance, String query, ConsistencyLevel commitConsistency, ConsistencyLevel serialConsistency, Object... params) { @@ -45,7 +46,7 @@ public SimulatedQuery(Object description, Modifiers self, Modifiers transitive, } @Override - public void accept(Object[][] success, Throwable failure) + public void accept(SimpleQueryResult success, Throwable failure) { if (failure != null) simulated.failures.accept(failure); diff --git a/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java new file mode 100644 index 000000000000..018affc43108 --- /dev/null +++ b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java @@ -0,0 +1,593 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.paxos; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.EnumSet; +import java.util.List; +import java.util.Random; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.junit.Assume; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.MethodSorters; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntIntMap; +import com.carrotsearch.hppc.IntSet; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.distributed.api.QueryResults; +import org.apache.cassandra.utils.Clock; +import org.assertj.core.api.AbstractThrowableAssert; +import org.assertj.core.api.Assertions; + +import static org.apache.commons.lang3.ArrayUtils.add; +import static org.apache.commons.lang3.ArrayUtils.swap; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Notes: + * * anomalyDirtyRead was left out as Accord doesn't reject requests, so without a way to reject or abort + * requests then client doesn't have any way to abserve a REJECT, so all issues are UNKNOWN. + * + */ +@RunWith(Parameterized.class) +@FixMethodOrder(MethodSorters.NAME_ASCENDING) // since Random is used, make sure tests run in a determanistic order +public class HistoryValidatorTest +{ + private static final Logger logger = LoggerFactory.getLogger(HistoryValidatorTest.class); + private static final Random RANDOM = random(); + private static final int[] PARTITIONS = IntStream.range(0, 10).toArray(); + private static final int x = 1; + private static final int y = 2; + + @Parameterized.Parameters(name = "{0}") + public static Collection data() + { + List tests = new ArrayList<>(); + tests.add(test(LinearizabilityValidator.Factory.instance)); + tests.add(test(StrictSerializabilityValidator.Factory.instance)); + return tests; + } + + private static Object[] test(HistoryValidator.Factory factory) + { + return new Object[]{ factory }; + } + + private final HistoryValidator.Factory factory; + + public HistoryValidatorTest(HistoryValidator.Factory factory) + { + this.factory = factory; + } + + @Test + public void orderingWithWriteTimeout() + { + IntSet timeoutEvents = set(4, 17, 83); + for (boolean reject : Arrays.asList(true, false)) + { + HistoryValidator validator = create(); + + int logicalClock = 1; + int[] seq = seq(); + for (int eventId = 0; eventId < 100; eventId++) + { + if (timeoutEvents.contains(eventId)) + { + if (!reject) + seq = add(seq, eventId); // wastn't observed, but was applied + continue; + } + single(validator, ob(eventId, ++logicalClock, ++logicalClock), 1, seq, true); + seq = add(seq, eventId); //TODO forgot to add this and LinearizabilityValidator was success... should reject! + } + } + } + + /** + * This test differs from {@link #orderingWithWriteTimeout} as it defines event orders assuming + * requests were concurrent, so may happen in different orderings. + *

+ * This means that we may see the results out of order, but the sequence/count ordering will remain + */ + @Test + public void orderingWithWriteTimeoutWithConcurrency() + { + IntSet timeoutEvents = set(4, 17, 83); + for (boolean reject : Arrays.asList(true, false)) + { + HistoryValidator validator = create(); + // Since the requests are "concurrent" the window in which the operations happened are between start=1 and + // end=responseOrder. + int start = 1; + int logicalClock = start; + + // 'ordering' is the order in which the txns are applied + // 'indexOrder' is the order in which the events are seen; since requests are "concurrent" the order we + // validate may differ from the ordering they were applied. + int[] ordering = IntStream.range(0, 100).toArray(); + if (reject) + ordering = IntStream.of(ordering).filter(i -> !timeoutEvents.contains(i)).toArray(); + shuffle(ordering); + int[] indexOrder = IntStream.range(0, ordering.length - 1).toArray(); + shuffle(indexOrder); + for (int i = 0; i < indexOrder.length; i++) + { + int idx = indexOrder[i]; + int eventId = ordering[idx]; + if (timeoutEvents.contains(eventId)) + continue; + int[] seq = Arrays.copyOf(ordering, idx); + single(validator, ob(eventId, start, ++logicalClock), 1, seq, true); + } + } + } + + @Test + public void anomalyNonMonotonicRead() + { + // Session1: w[x=10] -> Session2: r[x=10] -> r[x=0] + test(dsl -> { + dsl.txn(writeOnly(x)); + dsl.txn(readOnly(x, seq(0))); + dsl.failingTxn(readOnly(x, seq())).isInstanceOf(HistoryViolation.class); + }); + } + + @Test + public void anomalyNonMonotonicWrite() + { + requiresMultiKeySupport(); + // Session1: w[x=10] -> w[y=10] -> Session2: r[y=10] -> r[x=0] + test(dsl -> { + dsl.txn(writeOnly(x)); + dsl.txn(writeOnly(y)); + dsl.txn(readOnly(y, seq(1))); + dsl.failingTxn(readOnly(x, seq())).isInstanceOf(HistoryViolation.class); + }); + } + + @Test + public void anomalyNonMonotonicTransaction() + { + // Session1: r[x=5] -> w[y=10] -> Session2: r[y=10] -> r[x=0] + requiresMultiKeySupport(); + test(dsl -> { + dsl.txn(writeOnly(x), writeOnly(y)); + + dsl.txn(readOnly(x, seq(0))); + dsl.txn(writeOnly(y)); + dsl.txn(readOnly(y, seq(0, 2))); + + dsl.failingTxn(readOnly(x, seq())).isInstanceOf(HistoryViolation.class); + }); + } + + @Test + public void anomalyReadYourOwnWrites() + { + // This test is kinda a duplicate; here just for completness + // w[x=12] -> r[x=8] + test(dsl -> { + dsl.txn(writeOnly(x)); + dsl.failingTxn(readOnly(x, seq())).isInstanceOf(HistoryViolation.class); + }); + } + + //TODO write skew + @Test + public void anomalyReadSkew() + { + requiresMultiKeySupport(); + // two different txn are involved to make this happen + // x=0, y=0 + // U1: starts + // U2: starts + // U1: r[x=0] + // U2: w[x=5], w[y=5] + // U2: commit + // U1: r[y=5] + // U1: commit + HistoryValidator validator = create(); + + // init + txn(validator, ob(0, 1, 2), writeOnly(x), writeOnly(y)); + int u1 = 1, u1_start = 3, u1_end = 6; + int u2 = 2, u2_start = 4, u2_end = 5; + txn(validator, ob(u2, u2_start, u2_end), readWrite(x, seq(0)), readWrite(y, seq(0))); + Assertions.assertThatThrownBy(() -> txn(validator, ob(u1, u1_start, u1_end), readWrite(x, seq(0)), readWrite(y, seq(0, u2)))) + .isInstanceOf(HistoryViolation.class); + } + + @Test + public void anomalyWriteSkew() + { + // two different txn are involved to make this happen + // x=0, y=0 + // U1: starts + // U2: starts + // U1: r[x=0] + } + + @Test + public void seenBehavior() + { + fromLog("Witness(start=4, end=7)\n" + + "\tread(pk=121901541, id=2, count=0, seq=[])\n" + + "\twrite(pk=121901541, id=2, success=true)\n" + + + "Witness(start=3, end=8)\n" + + "\tread(pk=122950117, id=0, count=0, seq=[])\n" + + "\twrite(pk=122950117, id=0, success=true)\n" + + "\twrite(pk=119804389, id=0, success=true)\n" + + + "Witness(start=5, end=9)\n" + + "\tread(pk=121901541, id=3, count=1, seq=[2])\n" + + "\twrite(pk=121901541, id=3, success=true)\n" + + + "Witness(start=2, end=10)\n" + + "\twrite(pk=122950117, id=1, success=true)\n" + + "\twrite(pk=119804389, id=1, success=true)\n" + + + "Witness(start=6, end=11)\n" + + "\tread(pk=121901541, id=4, count=2, seq=[2, 3])\n" + + "\twrite(pk=121901541, id=4, success=true)\n" + + + "Witness(start=12, end=14)\n" + + "\twrite(pk=121901541, id=5, success=true)\n" + + + "Witness(start=13, end=16)\n" + + "\tread(pk=119804389, id=6, count=2, seq=[0, 1])\n" + + "\twrite(pk=119804389, id=6, success=true)\n" + + "\twrite(pk=122950117, id=6, success=true)\n" + + + "Witness(start=15, end=18)\n" + + "\tread(pk=121901541, id=7, count=4, seq=[2, 3, 4, 5])\n" + + "\twrite(pk=121901541, id=7, success=true)\n" + + + "Witness(start=17, end=20)\n" + + "\tread(pk=119804389, id=8, count=3, seq=[0, 1, 6])\n" + + "\twrite(pk=119804389, id=8, success=true)\n" + + "\twrite(pk=122950117, id=8, success=true)\n" // this partition is what triggers + ); + } + + private void requiresMultiKeySupport() + { + Assume.assumeTrue("Validator " + factory.getClass() + " does not support multi-key", factory instanceof StrictSerializabilityValidator.Factory); + } + + private int[] shuffle(int[] ordering) + { + // shuffle array + for (int i = ordering.length; i > 1; i--) + swap(ordering, i - 1, RANDOM.nextInt(i)); + return ordering; + } + + private static void txn(HistoryValidator validator, Observation ob, Event... events) + { + String type = events.length == 1 ? "single" : "multiple"; + logger.info("[Validator={}, Observation=({}, {}, {})] Validating {} {}}", validator.getClass().getSimpleName(), ob.id, ob.start, ob.end, type, events); + try (HistoryValidator.Checker check = validator.witness(ob.start, ob.end)) + { + for (Event e : events) + e.process(ob, check); + } + } + + private static void single(HistoryValidator validator, Observation ob, int pk, int[] seq, boolean hasWrite) + { + txn(validator, ob, hasWrite ? readWrite(pk, seq) : readOnly(pk, seq)); + } + + private static Observation ob(int id, int start, int end) + { + // why empty result? The users don't actually check the result's data, just existence + return new Observation(id, QueryResults.empty(), start, end); + } + + private static int[] seq(int... seq) + { + return seq; + } + + private HistoryValidator create() + { + return factory.create(PARTITIONS); + } + + private static IntSet set(int... values) + { + IntSet set = new IntHashSet(values.length); + for (int v : values) + set.add(v); + return set; + } + + private static Random random() + { + long seed = Long.parseLong(CassandraRelevantProperties.TEST_SEED.getString(Long.toString(Clock.Global.nanoTime()))); + logger.info("Random seed={}; set -Dcassandra.test.seed={} while reruning the tests to get the same order", seed, seed); + return new Random(seed); + } + + private static Event readWrite(int pk, int[] seq) + { + return new Event(EnumSet.of(Event.Type.READ, Event.Type.WRITE), pk, seq); + } + + private static Event readOnly(int pk, int[] seq) + { + return new Event(EnumSet.of(Event.Type.READ), pk, seq); + } + + private static Event writeOnly(int pk) + { + return new Event(EnumSet.of(Event.Type.WRITE), pk, null); + } + + private void fromLog(String log) + { + IntSet pks = new IntHashSet(); + class Read + { + final int pk, id, count; + final int[] seq; + + Read(int pk, int id, int count, int[] seq) + { + this.pk = pk; + this.id = id; + this.count = count; + this.seq = seq; + } + } + class Write + { + final int pk, id; + final boolean success; + + Write(int pk, int id, boolean success) + { + this.pk = pk; + this.id = id; + this.success = success; + } + } + class Witness + { + final int start, end; + final List actions = new ArrayList<>(); + + Witness(int start, int end) + { + this.start = start; + this.end = end; + } + + void read(int pk, int id, int count, int[] seq) + { + actions.add(new Read(pk, id, count, seq)); + } + + void write(int pk, int id, boolean success) + { + actions.add(new Write(pk, id, success)); + } + + void process(HistoryValidator validator) + { + try (HistoryValidator.Checker check = validator.witness(start, end)) + { + for (Object a : actions) + { + if (a instanceof Read) + { + Read read = (Read) a; + check.read(read.pk, read.id, read.count, read.seq); + } + else + { + Write write = (Write) a; + check.write(write.pk, write.id, write.success); + } + } + } + } + } + List witnesses = new ArrayList<>(); + Witness current = null; + for (String line : log.split("\n")) + { + if (line.startsWith("Witness")) + { + if (current != null) + witnesses.add(current); + Matcher matcher = Pattern.compile("Witness\\(start=(.+), end=(.+)\\)").matcher(line); + if (!matcher.find()) throw new AssertionError("Unable to match start/end of " + line); + current = new Witness(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2))); + } + else if (line.startsWith("\tread")) + { + Matcher matcher = Pattern.compile("\tread\\(pk=(.+), id=(.+), count=(.+), seq=\\[(.*)\\]\\)").matcher(line); + if (!matcher.find()) throw new AssertionError("Unable to match read of " + line); + int pk = Integer.parseInt(matcher.group(1)); + pks.add(pk); + int id = Integer.parseInt(matcher.group(2)); + int count = Integer.parseInt(matcher.group(3)); + String seqStr = matcher.group(4); + int[] seq = seqStr.isEmpty() ? new int[0] : Stream.of(seqStr.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray(); + current.read(pk, id, count, seq); + } + else if (line.startsWith("\twrite")) + { + Matcher matcher = Pattern.compile("\twrite\\(pk=(.+), id=(.+), success=(.+)\\)").matcher(line); + if (!matcher.find()) throw new AssertionError("Unable to match write of " + line); + int pk = Integer.parseInt(matcher.group(1)); + pks.add(pk); + int id = Integer.parseInt(matcher.group(2)); + boolean success = Boolean.parseBoolean(matcher.group(3)); + current.write(pk, id, success); + } + else + { + throw new IllegalArgumentException("Unknow line: " + line); + } + } + if (current != null) + witnesses.add(current); + int[] keys = pks.toArray(); + Arrays.sort(keys); + HistoryValidator validator = factory.create(keys); + for (Witness w : witnesses) + w.process(validator); + } + + private static class Event + { + enum Type + {READ, WRITE} + + ; + private final EnumSet types; + private final int pk; + private final int[] seq; + + private Event(EnumSet types, int pk, int[] seq) + { + this.types = types; + this.pk = pk; + this.seq = seq; + } + + private void process(Observation ob, HistoryValidator.Checker check) + { + if (types.contains(Type.READ)) + check.read(pk, ob.id, seq.length, seq); + if (types.contains(Type.WRITE)) + check.write(pk, ob.id, ob.isSuccess()); + } + } + + private interface TestDSL + { + void txn(Event... events); + + AbstractThrowableAssert failingTxn(Event... events); + } + + private static boolean supportMultiKey(HistoryValidator validator) + { + return validator instanceof StrictSerializabilityValidator; + } + + private void test(Consumer fn) + { + HistoryValidator validator = create(); + boolean global = supportMultiKey(validator); + EventIdGen eventIdGen = global ? new AllPks() : new PerPk(); + TestDSL dsl = new TestDSL() + { + int logicalClock = 0; + + @Override + public void txn(Event... events) + { + if (global) + { + int eventId = eventIdGen.next(); + HistoryValidatorTest.txn(validator, ob(eventId, ++logicalClock, ++logicalClock), events); + } + else + { + for (Event e : events) + { + int eventId = eventIdGen.next(e.pk); + HistoryValidatorTest.txn(validator, ob(eventId, ++logicalClock, ++logicalClock), e); + } + } + } + + @Override + public AbstractThrowableAssert failingTxn(Event... events) + { + return assertThatThrownBy(() -> txn(events)); + } + }; + fn.accept(dsl); + } + + private interface EventIdGen + { + int next(int pk); + + int next(); + } + + private static class PerPk implements EventIdGen + { + private final IntIntMap map = new IntIntHashMap(); + + @Override + public int next(int pk) + { + int next = !map.containsKey(pk) ? 0 : map.get(pk) + 1; + map.put(pk, next); + return next; + } + + @Override + public int next() + { + throw new UnsupportedOperationException("next without pk not supported"); + } + } + + private static class AllPks implements EventIdGen + { + private int value = 0; + + @Override + public int next(int pk) + { + return next(); + } + + @Override + public int next() + { + return value++; + } + } +} diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java index 9fc7e6983243..f7e13c1927af 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java @@ -41,7 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Invariants; +import accord.utilsfork.Invariants; import io.airlift.airline.Command; import io.airlift.airline.HelpOption; import io.airlift.airline.Option; @@ -52,6 +52,7 @@ import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.impl.Query; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.harry.SchemaSpec; @@ -798,7 +799,7 @@ public void run() } @Override - public void accept(Object[][] result, Throwable failure) + public void accept(SimpleQueryResult result, Throwable failure) { if (failure != null) simulated.failures.accept(failure); @@ -824,7 +825,7 @@ public RetryingQuery(String query, ConsistencyLevel cl, Object[] boundValues) } @Override - public Object[][] call() + public SimpleQueryResult call() { while (true) { diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/ShortAccordSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/ShortAccordSimulationTest.java new file mode 100644 index 000000000000..a51fa078add1 --- /dev/null +++ b/test/simulator/test/org/apache/cassandra/simulator/test/ShortAccordSimulationTest.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.test; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.cassandra.simulator.paxos.AccordSimulationRunner; + +public class ShortAccordSimulationTest +{ + @Test + public void simulationTest() throws IOException + { + AccordSimulationRunner.main(new String[] { "run", "-n", "3..6", "-t", "1000", "--cluster-action-limit", "-1", "-c", "2", "-s", "30"}); + } +} diff --git a/test/unit/accord/utils/random/Picker.java b/test/unit/accord/utils/random/Picker.java index f12369d35fca..f83d57763b00 100644 --- a/test/unit/accord/utils/random/Picker.java +++ b/test/unit/accord/utils/random/Picker.java @@ -22,7 +22,7 @@ import java.util.function.Supplier; import accord.utils.Invariants; -import accord.utils.RandomSource; +import accord.utilsfork.RandomSource; public class Picker { diff --git a/test/unit/accord/utils/DefaultRandom.java b/test/unit/accord/utilsfork/DefaultRandom.java similarity index 98% rename from test/unit/accord/utils/DefaultRandom.java rename to test/unit/accord/utilsfork/DefaultRandom.java index b16f1f8bbf34..49f9085569a6 100644 --- a/test/unit/accord/utils/DefaultRandom.java +++ b/test/unit/accord/utilsfork/DefaultRandom.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.util.Random; diff --git a/test/unit/accord/utils/Gen.java b/test/unit/accord/utilsfork/Gen.java similarity index 83% rename from test/unit/accord/utils/Gen.java rename to test/unit/accord/utilsfork/Gen.java index 523812ccf433..e9468cb24e89 100644 --- a/test/unit/accord/utils/Gen.java +++ b/test/unit/accord/utilsfork/Gen.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.util.function.BiFunction; import java.util.function.Function; @@ -44,14 +44,14 @@ static Gen of(Gen fn) return fn; } - A next(RandomSource random); + A next(accord.utilsfork.RandomSource random); default Gen map(Function fn) { return r -> fn.apply(this.next(r)); } - default Gen map(BiFunction fn) + default Gen map(BiFunction fn) { return r -> fn.apply(r, this.next(r)); } @@ -71,7 +71,7 @@ default Gen flatMap(Function> mapper) return rs -> mapper.apply(this.next(rs)).next(rs); } - default Gen flatMap(BiFunction> mapper) + default Gen flatMap(BiFunction> mapper) { return rs -> mapper.apply(rs, this.next(rs)).next(rs); } @@ -105,37 +105,37 @@ default Gen filter(int maxAttempts, A defaultValue, Predicate fn) }; } - default Supplier asSupplier(RandomSource rs) + default Supplier asSupplier(accord.utilsfork.RandomSource rs) { return () -> next(rs); } - default Stream asStream(RandomSource rs) + default Stream asStream(accord.utilsfork.RandomSource rs) { return Stream.generate(() -> next(rs)); } interface Int2IntMapFunction { - int applyAsInt(RandomSource rs, int value); + int applyAsInt(accord.utilsfork.RandomSource rs, int value); } interface Int2LongMapFunction { - long applyAsLong(RandomSource rs, int value); + long applyAsLong(accord.utilsfork.RandomSource rs, int value); } interface Long2LongMapFunction { - long applyAsLong(RandomSource rs, long value); + long applyAsLong(accord.utilsfork.RandomSource rs, long value); } interface IntGen extends Gen { - int nextInt(RandomSource random); + int nextInt(accord.utilsfork.RandomSource random); @Override - default Integer next(RandomSource random) + default Integer next(accord.utilsfork.RandomSource random) { return nextInt(random); } @@ -174,12 +174,12 @@ default Gen.IntGen filter(Predicate fn) return filterAsInt(i -> fn.test(i)); } - default IntSupplier asIntSupplier(RandomSource rs) + default IntSupplier asIntSupplier(accord.utilsfork.RandomSource rs) { return () -> nextInt(rs); } - default IntStream asIntStream(RandomSource rs) + default IntStream asIntStream(accord.utilsfork.RandomSource rs) { return IntStream.generate(() -> nextInt(rs)); } @@ -187,10 +187,10 @@ default IntStream asIntStream(RandomSource rs) interface LongGen extends Gen { - long nextLong(RandomSource random); + long nextLong(accord.utilsfork.RandomSource random); @Override - default Long next(RandomSource random) + default Long next(accord.utilsfork.RandomSource random) { return nextLong(random); } @@ -224,7 +224,7 @@ default Gen.LongGen filter(Predicate fn) return filterAsLong(i -> fn.test(i)); } - default LongSupplier asLongSupplier(RandomSource rs) + default LongSupplier asLongSupplier(accord.utilsfork.RandomSource rs) { return () -> nextLong(rs); } diff --git a/test/unit/accord/utils/Gens.java b/test/unit/accord/utilsfork/Gens.java similarity index 97% rename from test/unit/accord/utils/Gens.java rename to test/unit/accord/utilsfork/Gens.java index 218189206f39..4f696361b9ad 100644 --- a/test/unit/accord/utils/Gens.java +++ b/test/unit/accord/utilsfork/Gens.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.lang.reflect.Array; import java.math.BigDecimal; @@ -601,7 +601,7 @@ public static Gen charArray(Gen.IntGen sizes, char[] domain, IntCharBiPr }; } - public static Gen random() { + public static Gen random() { return r -> r; } @@ -645,7 +645,7 @@ public static StringDSL strings() return new StringDSL(); } - public static BooleanSupplier supplier(Gen gen, RandomSource rs) + public static BooleanSupplier supplier(Gen gen, accord.utilsfork.RandomSource rs) { return () -> gen.next(rs); } @@ -654,12 +654,12 @@ public static class BooleanDSL { public Gen all() { - return RandomSource::nextBoolean; + return accord.utilsfork.RandomSource::nextBoolean; } public Gen biasedRepeatingRuns(double ratio, int maxRuns) { - Invariants.checkArgument(ratio > 0 && ratio <= 1, "Expected %d to be larger than 0 and <= 1", ratio); + accord.utilsfork.Invariants.checkArgument(ratio > 0 && ratio <= 1, "Expected %d to be larger than 0 and <= 1", ratio); double lower = ratio * .8; double upper = ratio * 1.2; return new Gen() { @@ -667,7 +667,7 @@ public Gen biasedRepeatingRuns(double ratio, int maxRuns) private int run = -1; private long falseCount = 0, trueCount = 0; @Override - public Boolean next(RandomSource rs) + public Boolean next(accord.utilsfork.RandomSource rs) { if (run != -1) { @@ -733,12 +733,12 @@ public Gen.IntGen of(int value) public Gen.IntGen all() { - return RandomSource::nextInt; + return accord.utilsfork.RandomSource::nextInt; } public Gen.IntGen between(int min, int max) { - Invariants.checkArgument(max >= min, "max (%d) < min (%d)", max, min); + accord.utilsfork.Invariants.checkArgument(max >= min, "max (%d) < min (%d)", max, min); if (min == max) return of(min); // since bounds is exclusive, if max == max_value unable to do +1 to include... so will return a gen @@ -766,7 +766,7 @@ public Gen.LongGen of(long value) } public Gen.LongGen all() { - return RandomSource::nextLong; + return accord.utilsfork.RandomSource::nextLong; } public Gen.LongGen between(long min, long max) { @@ -1060,7 +1060,7 @@ private GenReset(Gen fn, boolean bestEffort) } @Override - public T next(RandomSource random) + public T next(accord.utilsfork.RandomSource random) { if (!bestEffort) { @@ -1095,12 +1095,12 @@ private static class IntGenReset implements Gen.IntGen, Reset { private final GenReset base; - private IntGenReset(Gen.IntGen fn) + private IntGenReset(IntGen fn) { this.base = new GenReset<>(fn, false); } @Override - public int nextInt(RandomSource random) { + public int nextInt(accord.utilsfork.RandomSource random) { return base.next(random); } @@ -1114,7 +1114,7 @@ private static class LongGenReset implements Gen.LongGen, Reset { private final GenReset base; - private LongGenReset(Gen.LongGen fn) + private LongGenReset(LongGen fn) { this.base = new GenReset<>(fn, false); } diff --git a/test/unit/accord/utils/Invariants.java b/test/unit/accord/utilsfork/Invariants.java similarity index 99% rename from test/unit/accord/utils/Invariants.java rename to test/unit/accord/utilsfork/Invariants.java index 2977272d4aa7..6028b69078ac 100644 --- a/test/unit/accord/utils/Invariants.java +++ b/test/unit/accord/utilsfork/Invariants.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import net.nicoulaj.compilecommand.annotations.Inline; diff --git a/test/unit/accord/utils/Property.java b/test/unit/accord/utilsfork/Property.java similarity index 89% rename from test/unit/accord/utils/Property.java rename to test/unit/accord/utilsfork/Property.java index 79c29c5a41de..fbf1f4c7c575 100644 --- a/test/unit/accord/utils/Property.java +++ b/test/unit/accord/utilsfork/Property.java @@ -16,10 +16,9 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; -import accord.utils.async.TimeoutUtils; -import org.agrona.collections.LongArrayList; +import accord.utilsfork.async.TimeoutUtils; import java.time.Duration; import java.util.ArrayList; @@ -41,6 +40,8 @@ import java.util.stream.Collectors; import javax.annotation.Nullable; +import org.agrona.collections.LongArrayList; + public class Property { public static abstract class Common> @@ -114,22 +115,22 @@ protected void checkWithTimeout(Runnable fn) public static class ForBuilder extends Common { - public void check(FailingConsumer fn) + public void check(FailingConsumer fn) { - forAll(Gens.random()).check(fn); + forAll(accord.utilsfork.Gens.random()).check(fn); } - public SingleBuilder forAll(Gen gen) + public SingleBuilder forAll(accord.utilsfork.Gen gen) { return new SingleBuilder<>(gen, this); } - public DoubleBuilder forAll(Gen a, Gen b) + public DoubleBuilder forAll(accord.utilsfork.Gen a, accord.utilsfork.Gen b) { return new DoubleBuilder<>(a, b, this); } - public TrippleBuilder forAll(Gen a, Gen b, Gen c) + public TrippleBuilder forAll(accord.utilsfork.Gen a, accord.utilsfork.Gen b, accord.utilsfork.Gen c) { return new TrippleBuilder<>(a, b, c, this); } @@ -239,9 +240,9 @@ public interface FailingConsumer public static class SingleBuilder extends Common> { - private final Gen gen; + private final accord.utilsfork.Gen gen; - private SingleBuilder(Gen gen, Common other) { + private SingleBuilder(accord.utilsfork.Gen gen, Common other) { super(other); this.gen = Objects.requireNonNull(gen); } @@ -258,7 +259,7 @@ public void check(FailingConsumer fn) private void checkInternal(FailingConsumer fn) { - RandomSource random = new DefaultRandom(seed); + accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); for (int i = 0; i < examples; i++) { T value = null; @@ -287,10 +288,10 @@ public interface FailingBiConsumer public static class DoubleBuilder extends Common> { - private final Gen aGen; - private final Gen bGen; + private final accord.utilsfork.Gen aGen; + private final accord.utilsfork.Gen bGen; - private DoubleBuilder(Gen aGen, Gen bGen, Common other) { + private DoubleBuilder(accord.utilsfork.Gen aGen, accord.utilsfork.Gen bGen, Common other) { super(other); this.aGen = Objects.requireNonNull(aGen); this.bGen = Objects.requireNonNull(bGen); @@ -308,7 +309,7 @@ public void check(FailingBiConsumer fn) private void checkInternal(FailingBiConsumer fn) { - RandomSource random = new DefaultRandom(seed); + accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); for (int i = 0; i < examples; i++) { A a = null; @@ -338,11 +339,11 @@ public interface FailingTriConsumer public static class TrippleBuilder extends Common> { - private final Gen as; - private final Gen bs; - private final Gen cs; + private final accord.utilsfork.Gen as; + private final accord.utilsfork.Gen bs; + private final accord.utilsfork.Gen cs; - public TrippleBuilder(Gen as, Gen bs, Gen cs, Common other) + public TrippleBuilder(accord.utilsfork.Gen as, accord.utilsfork.Gen bs, accord.utilsfork.Gen cs, Common other) { super(other); this.as = as; @@ -362,7 +363,7 @@ public void check(FailingTriConsumer fn) private void checkInternal(FailingTriConsumer fn) { - RandomSource random = new DefaultRandom(seed); + accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); for (int i = 0; i < examples; i++) { A a = null; @@ -441,7 +442,7 @@ public StatefulBuilder withStepTimeout(Duration duration) @SuppressWarnings("rawtypes") public void check(Commands commands) { - RandomSource rs = new DefaultRandom(seed); + accord.utilsfork.RandomSource rs = new DefaultRandom(seed); for (int i = 0; i < examples; i++) { State state = null; @@ -458,7 +459,7 @@ public void check(Commands comm { for (int j = 0; j < steps; j++) { - Gen> cmdGen = commands.commands(state); + accord.utilsfork.Gen> cmdGen = commands.commands(state); Command cmd = cmdGen.next(rs); for (int a = 0; cmd.checkPreconditions(state) != PreCheckResult.Ok && a < 42; a++) { @@ -770,20 +771,20 @@ public void applyUnit(State state) public interface Commands { - Gen genInitialState() throws Throwable; + accord.utilsfork.Gen genInitialState() throws Throwable; SystemUnderTest createSut(State state) throws Throwable; default void onSuccess(State state, SystemUnderTest sut, List history) throws Throwable {} default void destroyState(State state, @Nullable Throwable cause) throws Throwable {} default void destroySut(SystemUnderTest sut, @Nullable Throwable cause) throws Throwable {} - Gen> commands(State state) throws Throwable; + accord.utilsfork.Gen> commands(State state) throws Throwable; } - public static CommandsBuilder commands(Supplier> stateGen, Function sutFactory) + public static CommandsBuilder commands(Supplier> stateGen, Function sutFactory) { return new CommandsBuilder<>(stateGen, sutFactory); } - public static CommandsBuilder commands(Supplier> stateGen) + public static CommandsBuilder commands(Supplier> stateGen) { return new CommandsBuilder<>(stateGen, ignore -> null); } @@ -797,16 +798,16 @@ public static class CommandsBuilder { public interface Setup { - Command setup(RandomSource rs, State state); + Command setup(accord.utilsfork.RandomSource rs, State state); } - private final Supplier> stateGen; + private final Supplier> stateGen; private final Function sutFactory; private final Map, Integer> knownWeights = new LinkedHashMap<>(); @Nullable private Set> unknownWeights = null; @Nullable private Map, List>> conditionalCommands = null; - private Gen.IntGen unknownWeightGen = Gens.ints().between(1, 10); + private accord.utilsfork.Gen.IntGen unknownWeightGen = accord.utilsfork.Gens.ints().between(1, 10); @Nullable private FailingConsumer preCommands = null; @Nullable @@ -814,10 +815,10 @@ public interface Setup @Nullable private FailingBiConsumer destroySut = null; @Nullable - private BiFunction>, Gen>> commandsTransformer = null; + private BiFunction>, accord.utilsfork.Gen>> commandsTransformer = null; private final List> onSuccess = new ArrayList<>(); - public CommandsBuilder(Supplier> stateGen, Function sutFactory) + public CommandsBuilder(Supplier> stateGen, Function sutFactory) { this.stateGen = stateGen; this.sutFactory = sutFactory; @@ -862,7 +863,7 @@ public CommandsBuilder add(int weight, Command cmd); } - public CommandsBuilder add(int weight, Gen> cmd) + public CommandsBuilder add(int weight, accord.utilsfork.Gen> cmd) { return add(weight, (rs, state) -> cmd.next(rs)); } @@ -878,7 +879,7 @@ public CommandsBuilder add(Command cmd); } - public CommandsBuilder add(Gen> cmd) + public CommandsBuilder add(accord.utilsfork.Gen> cmd) { return add((rs, state) -> cmd.next(rs)); } @@ -891,7 +892,7 @@ public CommandsBuilder add(Setup return this; } - public CommandsBuilder addIf(Predicate predicate, Gen> cmd) + public CommandsBuilder addIf(Predicate predicate, accord.utilsfork.Gen> cmd) { return addIf(predicate, (rs, state) -> cmd.next(rs)); } @@ -935,13 +936,13 @@ public interface IfBuilder IfBuilder addIf(Predicate predicate, Setup cmd); } - public CommandsBuilder unknownWeight(Gen.IntGen unknownWeightGen) + public CommandsBuilder unknownWeight(accord.utilsfork.Gen.IntGen unknownWeightGen) { this.unknownWeightGen = Objects.requireNonNull(unknownWeightGen); return this; } - public CommandsBuilder commandsTransformer(BiFunction>, Gen>> commandsTransformer) + public CommandsBuilder commandsTransformer(BiFunction>, accord.utilsfork.Gen>> commandsTransformer) { this.commandsTransformer = commandsTransformer; return this; @@ -955,18 +956,18 @@ public CommandsBuilder onSuccess(StatefulSuccess build() { - Gen> commandsGen; + accord.utilsfork.Gen> commandsGen; if (unknownWeights == null && conditionalCommands == null) { - commandsGen = Gens.pick(new LinkedHashMap<>(knownWeights)); + commandsGen = accord.utilsfork.Gens.pick(new LinkedHashMap<>(knownWeights)); } else { - class DynamicWeightsGen implements Gen>, Gens.Reset + class DynamicWeightsGen implements accord.utilsfork.Gen>, accord.utilsfork.Gens.Reset { LinkedHashMap, Integer> weights; LinkedHashMap, Integer> conditionalWeights; - Gen> nonConditional; + accord.utilsfork.Gen> nonConditional; @Override public Setup next(RandomSource rs) { @@ -979,7 +980,7 @@ public Setup next(RandomSource rs) for (Setup s : unknownWeights) weights.put(s, unknownWeightGen.nextInt(rs)); } - nonConditional = Gens.pick(weights); + nonConditional = accord.utilsfork.Gens.pick(weights); if (conditionalCommands != null) { conditionalWeights = new LinkedHashMap<>(); @@ -999,7 +1000,7 @@ public Setup next(RandomSource rs) if (e.getKey().test(s)) e.getValue().forEach(c -> clone.put(c, conditionalWeights.get(c))); } - Setup select = Gens.pick(clone).next(r); + Setup select = accord.utilsfork.Gens.pick(clone).next(r); return select.setup(r, s); }; } @@ -1017,7 +1018,7 @@ public void reset() return new Commands<>() { @Override - public Gen genInitialState() throws Throwable + public accord.utilsfork.Gen genInitialState() throws Throwable { return stateGen.get(); } @@ -1029,18 +1030,18 @@ public SystemUnderTest createSut(State state) throws Throwable } @Override - public Gen> commands(State state) throws Throwable + public accord.utilsfork.Gen> commands(State state) throws Throwable { if (preCommands != null) preCommands.accept(state); - Gen> map = commandsGen.map((rs, setup) -> setup.setup(rs, state)); + accord.utilsfork.Gen> map = commandsGen.map((rs, setup) -> setup.setup(rs, state)); return commandsTransformer == null ? map : commandsTransformer.apply(state, map); } @Override public void destroyState(State state, @Nullable Throwable cause) throws Throwable { - Gens.Reset.tryReset(commandsGen); + accord.utilsfork.Gens.Reset.tryReset(commandsGen); if (destroyState != null) destroyState.accept(state, cause); } diff --git a/test/unit/accord/utils/RandomSource.java b/test/unit/accord/utilsfork/RandomSource.java similarity index 98% rename from test/unit/accord/utils/RandomSource.java rename to test/unit/accord/utilsfork/RandomSource.java index ddba6237adb1..830e52d9a0cb 100644 --- a/test/unit/accord/utils/RandomSource.java +++ b/test/unit/accord/utilsfork/RandomSource.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.util.ArrayList; import java.util.Comparator; @@ -40,7 +40,7 @@ public interface RandomSource { static RandomSource wrap(Random random) { - return new WrappedRandomSource(random); + return new accord.utilsfork.WrappedRandomSource(random); } void nextBytes(byte[] bytes); @@ -260,7 +260,7 @@ default int pickInt(int[] array) default int pickInt(int[] array, int offset, int length) { - Invariants.checkIndexInBounds(array.length, offset, length); + accord.utilsfork.Invariants.checkIndexInBounds(array.length, offset, length); if (length == 1) return array[offset]; return array[nextInt(offset, offset + length)]; @@ -284,7 +284,7 @@ default long pickLong(long[] array) default long pickLong(long[] array, int offset, int length) { - Invariants.checkIndexInBounds(array.length, offset, length); + accord.utilsfork.Invariants.checkIndexInBounds(array.length, offset, length); if (length == 1) return array[offset]; return array[nextInt(offset, offset + length)]; diff --git a/test/unit/accord/utils/SeedProvider.java b/test/unit/accord/utilsfork/SeedProvider.java similarity index 98% rename from test/unit/accord/utils/SeedProvider.java rename to test/unit/accord/utilsfork/SeedProvider.java index 9c7858dafed8..ded732f42f2b 100644 --- a/test/unit/accord/utils/SeedProvider.java +++ b/test/unit/accord/utilsfork/SeedProvider.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.util.concurrent.atomic.AtomicLong; diff --git a/test/unit/accord/utils/WrappedRandomSource.java b/test/unit/accord/utilsfork/WrappedRandomSource.java similarity index 95% rename from test/unit/accord/utils/WrappedRandomSource.java rename to test/unit/accord/utilsfork/WrappedRandomSource.java index 3d02c101fbb8..39e899cb1df5 100644 --- a/test/unit/accord/utils/WrappedRandomSource.java +++ b/test/unit/accord/utilsfork/WrappedRandomSource.java @@ -16,11 +16,11 @@ * limitations under the License. */ -package accord.utils; +package accord.utilsfork; import java.util.Random; -class WrappedRandomSource implements RandomSource +class WrappedRandomSource implements accord.utilsfork.RandomSource { private final Random random; diff --git a/test/unit/accord/utils/async/TimeoutUtils.java b/test/unit/accord/utilsfork/async/TimeoutUtils.java similarity index 98% rename from test/unit/accord/utils/async/TimeoutUtils.java rename to test/unit/accord/utilsfork/async/TimeoutUtils.java index f12c0b17e964..2008918ac1db 100644 --- a/test/unit/accord/utils/async/TimeoutUtils.java +++ b/test/unit/accord/utilsfork/async/TimeoutUtils.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils.async; +package accord.utilsfork.async; import java.time.Duration; import java.util.concurrent.ExecutionException; diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 590f7e356f12..60203f5e25eb 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -84,6 +84,7 @@ import org.apache.cassandra.db.PartitionRangeReadCommand; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.compaction.AbstractCompactionTask; import org.apache.cassandra.db.compaction.ActiveCompactionsTracker; import org.apache.cassandra.db.compaction.CompactionManager; @@ -832,6 +833,13 @@ public static UnfilteredPartitionIterator executeLocally(PartitionRangeReadComma return command.queryStorage(cfs, controller); } + public static UnfilteredPartitionIterator executeLocally(SinglePartitionReadCommand command, + ColumnFamilyStore cfs, + ReadExecutionController controller) + { + return command.queryStorage(cfs, controller); + } + public static Closeable markDirectoriesUnwriteable(ColumnFamilyStore cfs) { try diff --git a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java index f97e0bf5cef2..4721a7b4ebe2 100644 --- a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java +++ b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java @@ -34,6 +34,7 @@ import javax.management.remote.JMXConnectorFactory; import javax.management.remote.JMXServiceURL; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -63,8 +64,8 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.JMXServerUtils; -import org.assertj.core.api.Assertions; import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_JMX_AUTHORIZER; import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_JMX_LOCAL_PORT; @@ -439,6 +440,27 @@ public void testCqlBatch_MultipleTablesAuditing() assertEquals(0, size); } + @Test + public void testTransactionAuditing() + { + createTable("CREATE TABLE %s (key int PRIMARY KEY, val int)"); + AccordService.instance().createEpochFromConfigUnsafe(); + + Session session = sessionNet(); + String fqTableName = KEYSPACE + "." + currentTable(); + String query = "BEGIN TRANSACTION\n" + + " LET a = (SELECT * FROM " + fqTableName + " WHERE key = 0);\n" + + " SELECT a.val;\n" + + " IF a IS NULL THEN\n" + + " INSERT INTO " + fqTableName + " (key, val) VALUES (0, 0);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + session.execute(query); + AuditLogEntry logEntry = ((InMemoryAuditLogger) AuditLogManager.instance.getLogger()).inMemQueue.poll(); + assertLogEntry(query, AuditLogEntryType.TRANSACTION, logEntry, true, null); + } + @Test public void testCqlKeyspaceAuditing() throws Throwable { diff --git a/test/unit/org/apache/cassandra/auth/AllowAllCIDRAuthorizerTest.java b/test/unit/org/apache/cassandra/auth/AllowAllCIDRAuthorizerTest.java index fb7a57a3521b..4f7637d9c371 100644 --- a/test/unit/org/apache/cassandra/auth/AllowAllCIDRAuthorizerTest.java +++ b/test/unit/org/apache/cassandra/auth/AllowAllCIDRAuthorizerTest.java @@ -47,16 +47,6 @@ */ public class AllowAllCIDRAuthorizerTest extends CQLTester { - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " - + "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } - @BeforeClass public static void defineSchema() throws ConfigurationException { @@ -65,7 +55,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraAuthorizer(), new AuthTestUtils.LocalCassandraNetworkAuthorizer(), new AuthTestUtils.LocalAllowAllCIDRAuthorizer()); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/auth/AuthTestUtils.java b/test/unit/org/apache/cassandra/auth/AuthTestUtils.java index ce5a8284f8f5..228c93b3969a 100644 --- a/test/unit/org/apache/cassandra/auth/AuthTestUtils.java +++ b/test/unit/org/apache/cassandra/auth/AuthTestUtils.java @@ -35,6 +35,7 @@ import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang3.RandomStringUtils; import org.apache.cassandra.auth.jmx.AuthorizationProxy; import org.apache.cassandra.config.DatabaseDescriptor; @@ -45,6 +46,7 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.statements.AlterRoleStatement; import org.apache.cassandra.cql3.statements.AuthenticationStatement; +import org.apache.cassandra.cql3.statements.AuthorizationStatement; import org.apache.cassandra.cql3.statements.BatchStatement; import org.apache.cassandra.cql3.statements.CreateRoleStatement; import org.apache.cassandra.cql3.statements.DropRoleStatement; @@ -444,4 +446,26 @@ public static void waitForExistingRoles() .atMost(10, SECONDS) .until(CassandraRoleManager::hasExistingRoles); } + + static void authorize(String query, Object... args) + { + CQLStatement statement = QueryProcessor.parseStatement(String.format(query, args)).prepare(ClientState.forInternalCalls()); + assert statement instanceof AuthorizationStatement; + AuthorizationStatement authStmt = (AuthorizationStatement) statement; + + // invalidate roles cache so that any changes to the underlying roles are picked up + AuthenticatedUser.permissionsCache.invalidate(); + authStmt.execute(getClientState()); + } + + static String createName() + { + return RandomStringUtils.randomAlphabetic(8).toLowerCase(); + } + + public static void setupSuperUser() + { + QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) VALUES ('%s', true, true, '%s')", + AUTH_KEYSPACE_NAME, AuthKeyspace.ROLES, CassandraRoleManager.DEFAULT_SUPERUSER_NAME, "xxx")); + } } diff --git a/test/unit/org/apache/cassandra/auth/CIDRGroupsMappingManagerTest.java b/test/unit/org/apache/cassandra/auth/CIDRGroupsMappingManagerTest.java index a3a899d65dcf..44d5ef435a15 100644 --- a/test/unit/org/apache/cassandra/auth/CIDRGroupsMappingManagerTest.java +++ b/test/unit/org/apache/cassandra/auth/CIDRGroupsMappingManagerTest.java @@ -32,10 +32,8 @@ import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.CIDR; -import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.exceptions.ConfigurationException; -import static org.apache.cassandra.schema.SchemaConstants.AUTH_KEYSPACE_NAME; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertEquals; @@ -43,16 +41,6 @@ public class CIDRGroupsMappingManagerTest { CIDRGroupsMappingManager cidrGroupsMappingManager; - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " - + "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } - @BeforeClass public static void defineSchema() throws ConfigurationException { @@ -64,7 +52,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraNetworkAuthorizer(), new AuthTestUtils.LocalCassandraCIDRAuthorizer()); AuthCacheService.initializeAndRegisterCaches(); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerEnforceModeTest.java b/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerEnforceModeTest.java index 0ef3917b91e8..25079e4913ed 100644 --- a/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerEnforceModeTest.java +++ b/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerEnforceModeTest.java @@ -59,16 +59,6 @@ public class CassandraCIDRAuthorizerEnforceModeTest extends CQLTester { private static final AuthTestUtils.LocalCassandraCIDRAuthorizer cidrAuthorizer = new AuthTestUtils.LocalCassandraCIDRAuthorizer(); - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " - + "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } - @BeforeClass public static void defineSchema() throws ConfigurationException { @@ -78,7 +68,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraNetworkAuthorizer(), cidrAuthorizer); AuthCacheService.initializeAndRegisterCaches(); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerMonitorModeTest.java b/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerMonitorModeTest.java index 54fe18bc2d89..b9225b519861 100644 --- a/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerMonitorModeTest.java +++ b/test/unit/org/apache/cassandra/auth/CassandraCIDRAuthorizerMonitorModeTest.java @@ -33,7 +33,6 @@ import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.cql3.CIDR; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.service.ClientState; @@ -45,16 +44,7 @@ public class CassandraCIDRAuthorizerMonitorModeTest extends CQLTester { private static final AuthTestUtils.LocalCassandraCIDRAuthorizer cidrAuthorizer = - new AuthTestUtils.LocalCassandraCIDRAuthorizer(ICIDRAuthorizer.CIDRAuthorizerMode.MONITOR); - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " - + "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } + new AuthTestUtils.LocalCassandraCIDRAuthorizer(ICIDRAuthorizer.CIDRAuthorizerMode.MONITOR); @BeforeClass public static void defineSchema() throws ConfigurationException @@ -65,7 +55,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraNetworkAuthorizer(), cidrAuthorizer); AuthCacheService.initializeAndRegisterCaches(); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/auth/CassandraNetworkAuthorizerTest.java b/test/unit/org/apache/cassandra/auth/CassandraNetworkAuthorizerTest.java index 2e233ba8512a..aa22f5be4f5b 100644 --- a/test/unit/org/apache/cassandra/auth/CassandraNetworkAuthorizerTest.java +++ b/test/unit/org/apache/cassandra/auth/CassandraNetworkAuthorizerTest.java @@ -50,16 +50,6 @@ public class CassandraNetworkAuthorizerTest extends CQLTester { - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " - + "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } - @BeforeClass public static void defineSchema() throws ConfigurationException { @@ -69,7 +59,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraNetworkAuthorizer(), new AuthTestUtils.LocalCassandraCIDRAuthorizer()); AuthCacheService.initializeAndRegisterCaches(); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java new file mode 100644 index 000000000000..4743144eba85 --- /dev/null +++ b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.auth; + +import java.net.InetSocketAddress; +import java.util.Collections; + +import org.apache.cassandra.transport.Dispatcher; +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.statements.TransactionStatement; +import org.apache.cassandra.exceptions.UnauthorizedException; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.transport.messages.ResultMessage; + +import static org.junit.Assert.assertEquals; + +import static org.apache.cassandra.auth.AuthTestUtils.auth; +import static org.apache.cassandra.db.ConsistencyLevel.NODE_LOCAL; + +public class TxnAuthTest extends CQLTester +{ + @BeforeClass + public static void setUpAuthAndAccord() throws Exception + { + CassandraRelevantProperties.ENABLE_NODELOCAL_QUERIES.setBoolean(true); + + SchemaLoader.prepareServer(); + IRoleManager roleManager = new AuthTestUtils.LocalCassandraRoleManager(); + SchemaLoader.setupAuth(roleManager, + new AuthTestUtils.LocalPasswordAuthenticator(), + new AuthTestUtils.LocalCassandraAuthorizer(), + new AuthTestUtils.LocalCassandraNetworkAuthorizer(), + new AuthTestUtils.LocalCassandraCIDRAuthorizer()); + roleManager.setup(); + AuthCacheService.initializeAndRegisterCaches(); + AuthTestUtils.setupSuperUser(); + + requireNetwork(); + } + + @Before + public void setUpTest() + { + createTable("CREATE TABLE %s (k int, v int, PRIMARY KEY(k))"); + AccordService.instance().createEpochFromConfigUnsafe(); + } + + @Test + public void canSelectInTxnWithPermissions() + { + QueryProcessor.process(formatQuery("INSERT INTO %s (k, v) VALUES (0, 0)"), NODE_LOCAL); + + ClientState clientState = createUserAndLogin(); + String query = formatQuery("BEGIN TRANSACTION\n" + + " SELECT * FROM %s WHERE k = 0;\n" + + "COMMIT TRANSACTION"); + + assertUnauthorized(query, clientState); + + grantTo(clientState, Permission.SELECT); + ResultMessage.Rows message = (ResultMessage.Rows) execute(query, clientState); + assertEquals(1, message.result.size()); + } + + @Test + public void canSelectRefsInTxnWithPermissions() + { + QueryProcessor.process(formatQuery("INSERT INTO %s (k, v) VALUES (0, 0)"), NODE_LOCAL); + + ClientState clientState = createUserAndLogin(); + String query = formatQuery("BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM %s WHERE k = 0);\n" + + " SELECT row0.v;\n" + + "COMMIT TRANSACTION"); + + assertUnauthorized(query, clientState); + + grantTo(clientState, Permission.SELECT); + ResultMessage.Rows message = (ResultMessage.Rows) execute(query, clientState); + assertEquals(1, message.result.size()); + } + + @Test + public void canInsertOnlyInTxnWithPermissions() + { + ClientState clientState = createUserAndLogin(); + String insert = formatQuery("BEGIN TRANSACTION\n" + + " INSERT INTO %s (k, v) VALUES (0, 0);\n" + + "COMMIT TRANSACTION"); + + assertUnauthorized(insert, clientState); + + grantTo(clientState, Permission.MODIFY); + execute(insert, clientState); + } + + @Test + public void canExecuteTxnWithAutoGeneratedRead() + { + QueryProcessor.process(formatQuery("INSERT INTO %s (k, v) VALUES (0, 0)"), NODE_LOCAL); + + ClientState clientState = createUserAndLogin(); + String update = "BEGIN TRANSACTION\n" + + formatQuery("SELECT * FROM %s WHERE k = 0;\n") + + formatQuery("UPDATE %s SET v += 1 WHERE k = 0 ;\n") + + "COMMIT TRANSACTION"; + + assertUnauthorized(update, clientState); + + // We should still fail here, given we need permisions to SELECT for the generated reads. + grantTo(clientState, Permission.MODIFY); + assertUnauthorized(update, clientState); + + grantTo(clientState, Permission.SELECT); + execute(update, clientState); + } + + private void assertUnauthorized(String query, ClientState clientState) + { + Assertions.assertThatThrownBy(() -> execute(query, clientState)) + .isInstanceOf(UnauthorizedException.class) + .hasMessageContaining(clientState.getUser().getName()); + } + + private void grantTo(ClientState clientState, Permission permission) + { + AuthTestUtils.authorize(formatQuery("GRANT " + permission + " ON TABLE %s TO " + clientState.getUser().getName())); + } + + private ClientState createUserAndLogin() + { + String username = AuthTestUtils.createName(); + auth("CREATE ROLE %s WITH password = 'password' AND LOGIN = true", username); + ClientState clientState = ClientState.forExternalCalls(InetSocketAddress.createUnresolved("127.0.0.1", 123)); + clientState.login(new AuthenticatedUser(username)); + return clientState; + } + + private ResultMessage execute(String query, ClientState clientState) + { + TransactionStatement.Parsed parsed = (TransactionStatement.Parsed) QueryProcessor.parseStatement(query); + TransactionStatement statement = (TransactionStatement) parsed.prepare(clientState); + QueryOptions options = QueryOptions.forInternalCalls(NODE_LOCAL, Collections.emptyList()); + QueryState queryState = new QueryState(clientState); + return QueryProcessor.instance.process(statement, queryState, options, Dispatcher.RequestTime.forImmediateExecution()); + } +} diff --git a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java index 01540499f4ca..fe0ae0566c31 100644 --- a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java +++ b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java @@ -39,8 +39,8 @@ import javax.annotation.Nullable; -import accord.utils.Gens; -import accord.utils.RandomSource; +import accord.utilsfork.Gens; +import accord.utilsfork.RandomSource; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.concurrent.Future; diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 34656eac55ae..385ca62a337c 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -90,6 +90,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$DiskOptimizationStrategy", "org.apache.cassandra.config.Config$FlushCompression", "org.apache.cassandra.config.Config$InternodeCompression", + "org.apache.cassandra.config.Config$LegacyPaxosStrategy", "org.apache.cassandra.config.Config$MemtableAllocationType", "org.apache.cassandra.config.Config$PaxosOnLinearizabilityViolation", "org.apache.cassandra.config.Config$PaxosStatePurging", diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorTest.java index dcd617adcf9c..c52e82921f58 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorTest.java @@ -383,6 +383,7 @@ public void testLowestAcceptableTimeouts() throws ConfigurationException testConfig.cas_contention_timeout = lowerThanLowestTimeout; testConfig.counter_write_request_timeout = lowerThanLowestTimeout; testConfig.request_timeout = lowerThanLowestTimeout; + testConfig.transaction_timeout = lowerThanLowestTimeout; DatabaseDescriptor.checkForLowestAcceptedTimeouts(testConfig); @@ -393,6 +394,7 @@ public void testLowestAcceptableTimeouts() throws ConfigurationException assertEquals(testConfig.cas_contention_timeout, DatabaseDescriptor.LOWEST_ACCEPTED_TIMEOUT); assertEquals(testConfig.counter_write_request_timeout, DatabaseDescriptor.LOWEST_ACCEPTED_TIMEOUT); assertEquals(testConfig.request_timeout, DatabaseDescriptor.LOWEST_ACCEPTED_TIMEOUT); + assertEquals(testConfig.transaction_timeout, DatabaseDescriptor.LOWEST_ACCEPTED_TIMEOUT); } @Test diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java index 857ec85f408b..4c9ec9fdc1f5 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java @@ -33,7 +33,7 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; import static org.quicktheories.generators.SourceDSL.doubles; import static org.quicktheories.generators.SourceDSL.integers; diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java index 6f9260f022ec..9d0816e6440e 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java @@ -29,7 +29,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.utils.Generators; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.quicktheories.generators.SourceDSL.integers; diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 22ab64c12fca..e988b979abb3 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -66,6 +66,8 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import org.assertj.core.api.Assertions; +import org.awaitility.Awaitility; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.junit.After; @@ -80,10 +82,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.DefaultRandom; -import accord.utils.Gen; -import accord.utils.Property; -import accord.utils.RandomSource; +import accord.utilsfork.DefaultRandom; +import accord.utilsfork.Gen; +import accord.utilsfork.Property; +import accord.utilsfork.RandomSource; import com.codahale.metrics.Gauge; import com.datastax.driver.core.CloseFuture; import com.datastax.driver.core.Cluster; @@ -188,10 +190,9 @@ import org.apache.cassandra.utils.ConfigGenBuilder; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JMXServerUtils; +import org.apache.cassandra.utils.LazyToString; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; -import org.assertj.core.api.Assertions; -import org.awaitility.Awaitility; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_CONNECTION_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_READ_TIMEOUT_MS; @@ -1776,9 +1777,10 @@ protected void assertRowsNet(ProtocolVersion protocolVersion, ResultSet result, Object[] expected = rows[i]; Row actual = iter.next(); - Assert.assertEquals(String.format("Invalid number of (expected) values provided for row %d (using protocol version %s)", - i, protocolVersion), - meta.size(), expected.length); + Assertions.assertThat(meta.size()) + .describedAs("Invalid number of (expected) values provided for row %d (using protocol version %s); expected=%s, actual=%s", + i, protocolVersion, LazyToString.lazy(() -> Arrays.toString(expected)), LazyToString.lazy(() -> Arrays.toString(toObjectArray(actual)))) + .isEqualTo(expected.length); for (int j = 0; j < meta.size(); j++) { @@ -2036,6 +2038,14 @@ private boolean equalsWithoutKsTb(ColumnMetadata left, ColumnMetadata right) && left.type.equals(right.type); } + private static Object[] toObjectArray(Row actual) + { + Object[] row = new Object[actual.getColumnDefinitions().size()]; + for (int i = 0; i < row.length; i++) + row[i] = actual.getObject(i); + return row; + } + protected void assertRowCountNet(ResultSet r1, int expectedCount) { Assert.assertFalse("Received a null resultset when expected count was > 0", expectedCount > 0 && r1 == null); @@ -2712,7 +2722,7 @@ else if (type instanceof BytesType) return s; } - protected static ByteBuffer makeByteBuffer(Object value, AbstractType type) + public static ByteBuffer makeByteBuffer(Object value, AbstractType type) { if (value == null) return null; @@ -2750,12 +2760,12 @@ private static String formatValue(ByteBuffer bb, AbstractType type) } } - protected TupleValue tuple(Object...values) + public static TupleValue tuple(Object...values) { return new TupleValue(values); } - protected Object userType(Object... values) + public static UserTypeValue userType(Object... values) { if (values.length % 2 != 0) throw new IllegalArgumentException("userType() requires an even number of arguments"); @@ -3024,7 +3034,7 @@ public int hashCode() } } - private static class UserTypeValue extends TupleValue + public static class UserTypeValue extends TupleValue { private final String[] fieldNames; diff --git a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java index a9f3d267df63..4638970628dc 100644 --- a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java +++ b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java @@ -21,13 +21,15 @@ import org.junit.Test; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.service.accord.AccordService; + +import static org.junit.Assert.assertEquals; import static org.apache.cassandra.db.ConsistencyLevel.NODE_LOCAL; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.readMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.readMetricsForLevel; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.writeMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.writeMetricsForLevel; -import static org.junit.Assert.assertEquals; public class NodeLocalConsistencyTest extends CQLTester { @@ -35,6 +37,7 @@ public class NodeLocalConsistencyTest extends CQLTester public static void setUp() throws Exception { CassandraRelevantProperties.ENABLE_NODELOCAL_QUERIES.setBoolean(true); + requireNetwork(); } @Test @@ -87,4 +90,20 @@ public void testSelect() assertEquals(1, afterLevel - beforeLevel); assertEquals(1, afterGlobal - beforeGlobal); } -} \ No newline at end of file + + @Test + public void testTransaction() + { + createTable("CREATE TABLE %s (key text, val int, PRIMARY KEY(key))"); + QueryProcessor.process(formatQuery("INSERT INTO %s (key, val) VALUES ('foo', 0)"), NODE_LOCAL); + + AccordService.instance().createEpochFromConfigUnsafe(); + + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM %s WHERE key = 'foo';\n" + + "COMMIT TRANSACTION"; + + UntypedResultSet rows = QueryProcessor.process(formatQuery(query), NODE_LOCAL); + assertEquals(1, rows.size()); + } +} diff --git a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java index 39c641daa278..2d09317333cc 100644 --- a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java @@ -22,9 +22,13 @@ import java.util.Collections; import java.util.EnumSet; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import com.google.common.util.concurrent.Uninterruptibles; +import org.junit.Assume; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import com.datastax.driver.core.Cluster; @@ -32,6 +36,8 @@ import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Session; import com.datastax.driver.core.exceptions.SyntaxError; +import com.datastax.driver.core.exceptions.WriteTimeoutException; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.exceptions.PreparedQueryNotFoundException; import org.apache.cassandra.index.StubIndex; @@ -39,9 +45,11 @@ import org.apache.cassandra.serializers.Int32Serializer; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.ClientWarn; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.transport.SimpleClient; import org.apache.cassandra.transport.messages.ResultMessage; +import org.assertj.core.api.Assertions; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -55,6 +63,13 @@ public class PreparedStatementsTest extends CQLTester " WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };"; private static final String dropKsStatement = "DROP KEYSPACE IF EXISTS " + KEYSPACE; + @BeforeClass + public static void setUpClass() + { + DatabaseDescriptor.setAccordTransactionsEnabled(true); + CQLTester.setUpClass(); + } + @Before public void setup() { @@ -164,24 +179,30 @@ public void testInvalidatePreparedStatementsOnDrop() session.execute(createTableStatement); - PreparedStatement prepared = session.prepare("INSERT INTO " + KEYSPACE + ".qp_cleanup (id, cid, val) VALUES (?, ?, ?)"); - PreparedStatement preparedBatch = session.prepare("BEGIN BATCH " + - "INSERT INTO " + KEYSPACE + ".qp_cleanup (id, cid, val) VALUES (?, ?, ?);" + - "APPLY BATCH;"); + String insert = "INSERT INTO " + KEYSPACE + ".qp_cleanup (id, cid, val) VALUES (?, ?, ?)"; + PreparedStatement prepared = session.prepare(insert); + PreparedStatement preparedBatch = session.prepare(batch(insert)); + PreparedStatement preparedTxn = session.prepare(txn(insert)); + session.execute(dropTableStatement); session.execute(createTableStatement); + updateTxnState(); + session.execute(prepared.bind(1, 1, "value")); session.execute(preparedBatch.bind(2, 2, "value2")); + session.execute(preparedTxn.bind(3, 3, "value3")); session.execute(dropKsStatement); session.execute(createKsStatement); session.execute(createTableStatement); + updateTxnState(); // The driver will get a response about the prepared statement being invalid, causing it to transparently // re-prepare the statement. We'll rely on the fact that we get no errors while executing this to show that // the statements have been invalidated. session.execute(prepared.bind(1, 1, "value")); session.execute(preparedBatch.bind(2, 2, "value2")); + session.execute(preparedTxn.bind(3, 3, "value3")); session.execute(dropKsStatement); } @@ -206,8 +227,11 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo session.execute(dropKsStatement); session.execute(createKsStatement); session.execute(createTableStatement); + updateTxnState(); - PreparedStatement preparedSelect = session.prepare("SELECT * FROM " + KEYSPACE + ".qp_cleanup"); + String select = "SELECT * FROM " + KEYSPACE + ".qp_cleanup"; + PreparedStatement preparedSelect = session.prepare(select); + PreparedStatement preparedSelectTxn = session.prepare(txn(select + " WHERE a = ?")); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c) VALUES (?, ?, ?);", 1, 2, 3); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c) VALUES (?, ?, ?);", @@ -216,8 +240,14 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo assertRowsNet(session.execute(preparedSelect.bind()), row(1, 2, 3), row(2, 3, 4)); + assertRowsNet(session.execute(preparedSelectTxn.bind(1)), + row(1, 2, 3)); + assertRowsNet(session.execute(preparedSelectTxn.bind(2)), + row(2, 3, 4)); session.execute(alterTableStatement); + updateTxnState(); + session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c, d) VALUES (?, ?, ?, ?);", 3, 4, 5, 6); @@ -231,15 +261,32 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo row(2, 3, 4, null), row(3, 4, 5, 6)); assertEquals(rs.getColumnDefinitions().size(), 4); + + for (int i = 1; i <= 3; i++) + { + rs = session.execute(preparedSelectTxn.bind(i)); + assertRowsNet(version, + rs, + row(i, i + 1, i + 2, i == 3 ? 6 : null)); + assertEquals(rs.getColumnDefinitions().size(), 4); + } } else { rs = session.execute(preparedSelect.bind()); - assertRowsNet(rs, + assertRowsNet(version, + rs, row(1, 2, 3), row(2, 3, 4), row(3, 4, 5)); - assertEquals(rs.getColumnDefinitions().size(), 3); + assertEquals(3, rs.getColumnDefinitions().size()); + for (int i = 1; i <= 3; i++) + { + rs = session.execute(preparedSelectTxn.bind(i)); + Assertions.assertThat(columnNames(rs)) + .containsExactlyInAnyOrder("a", "b", "c"); + assertRowsNet(version, rs, row(i, i + 1, i + 2)); + } } session.execute(dropKsStatement); @@ -266,31 +313,49 @@ private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVer session.execute(dropKsStatement); session.execute(createKsStatement); session.execute(createTableStatement); + updateTxnState(); - PreparedStatement preparedSelect = session.prepare("SELECT a, b, c FROM " + KEYSPACE + ".qp_cleanup"); + String select = "SELECT a, b, c FROM " + KEYSPACE + ".qp_cleanup"; + PreparedStatement preparedSelect = session.prepare(select); + PreparedStatement preparedSelectTxn = session.prepare(txn(select + " WHERE a = ?")); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c) VALUES (?, ?, ?);", 1, 2, 3); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c) VALUES (?, ?, ?);", 2, 3, 4); ResultSet rs = session.execute(preparedSelect.bind()); - assertRowsNet(rs, row(1, 2, 3), row(2, 3, 4)); assertEquals(rs.getColumnDefinitions().size(), 3); + for (int i = 1; i <= 2; i++) + { + rs = session.execute(preparedSelectTxn.bind(i)); + assertRowsNet(rs, row(i, i + 1, i + 2)); + Assertions.assertThat(columnNames(rs)).containsExactlyInAnyOrder("a", "b", "c"); + } + session.execute(alterTableStatement); + updateTxnState(); + session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c, d) VALUES (?, ?, ?, ?);", 3, 4, 5, 6); rs = session.execute(preparedSelect.bind()); - assertRowsNet(rs, + assertRowsNet(version, rs, row(1, 2, 3), row(2, 3, 4), row(3, 4, 5)); assertEquals(rs.getColumnDefinitions().size(), 3); + for (int i = 1; i <= 3; i++) + { + rs = session.execute(preparedSelectTxn.bind(i)); + assertRowsNet(rs, row(i, i + 1, i + 2)); + Assertions.assertThat(columnNames(rs)).containsExactlyInAnyOrder("a", "b", "c"); + } + session.execute(dropKsStatement); } @@ -302,18 +367,21 @@ public void testStatementRePreparationOnReconnect() session.execute(dropKsStatement); session.execute(createKsStatement); - createTable("CREATE TABLE %s (id int PRIMARY KEY, cid int, val text);"); - + updateTxnState(); String insertCQL = "INSERT INTO " + currentTable() + " (id, cid, val) VALUES (?, ?, ?)"; String selectCQL = "Select * from " + currentTable() + " where id = ?"; PreparedStatement preparedInsert = session.prepare(insertCQL); PreparedStatement preparedSelect = session.prepare(selectCQL); + PreparedStatement preparedTxn = session.prepare(txn(selectCQL, insertCQL)); session.execute(preparedInsert.bind(1, 1, "value")); assertEquals(1, session.execute(preparedSelect.bind(1)).all().size()); + // txn will return state before mutations are applied, so null result + assertRowsNet(ProtocolVersion.V5, + session.execute(preparedTxn.bind(2, 2, 2, "value2"))); try (Cluster newCluster = Cluster.builder() .addContactPoints(nativeAddr) @@ -328,15 +396,19 @@ public void testStatementRePreparationOnReconnect() newSession.execute("USE " + keyspace()); preparedInsert = newSession.prepare(insertCQL); preparedSelect = newSession.prepare(selectCQL); - newSession.execute(preparedInsert.bind(1, 1, "value")); + newSession.execute(preparedInsert.bind(1, 1, "value")); assertEquals(1, newSession.execute(preparedSelect.bind(1)).all().size()); + + assertRowsNet(ProtocolVersion.V5, + session.execute(preparedTxn.bind(2, 2, 2, "value2")), + row(2, 2, "value2")); } } } @Test - public void prepareAndExecuteWithCustomExpressions() throws Throwable + public void prepareAndExecuteWithCustomExpressions() { Session session = sessionNet(ProtocolVersion.V5); @@ -349,25 +421,25 @@ public void prepareAndExecuteWithCustomExpressions() throws Throwable KEYSPACE, table)); session.execute(String.format("CREATE CUSTOM INDEX %s ON %s.%s(val) USING '%s'", index, KEYSPACE, table, StubIndex.class.getName())); - session.execute(String.format("INSERT INTO %s.%s(id, cid, val) VALUES (0, 0, 'test')", KEYSPACE, table)); - - PreparedStatement prepared1 = session.prepare(String.format("SELECT * FROM %s.%s WHERE expr(%s, 'foo')", - KEYSPACE, table, index)); - assertEquals(1, session.execute(prepared1.bind()).all().size()); + updateTxnState(); - PreparedStatement prepared2 = session.prepare(String.format("SELECT * FROM %s.%s WHERE expr(%s, ?)", - KEYSPACE, table, index)); - assertEquals(1, session.execute(prepared2.bind("foo bar baz")).all().size()); + session.execute(String.format("INSERT INTO %s.%s(id, cid, val) VALUES (0, 0, 'test')", KEYSPACE, table)); - try - { - session.prepare(String.format("SELECT * FROM %s.%s WHERE expr(?, 'foo bar baz')", KEYSPACE, table)); - fail("Expected syntax exception, but none was thrown"); - } - catch(SyntaxError e) - { - assertEquals("Bind variables cannot be used for index names", e.getMessage()); - } + String select = String.format("SELECT * FROM %s.%s WHERE expr(%s, 'foo')", KEYSPACE, table, index); + assertEquals(1, session.execute(session.prepare(select).bind()).all().size()); + assertEquals(1, session.execute(session.prepare(txn(select + " AND id = ?")).bind(0)).all().size()); + + String select2 = String.format("SELECT * FROM %s.%s WHERE expr(%s, ?)", KEYSPACE, table, index); + assertEquals(1, session.execute(session.prepare(select2).bind("foo bar baz")).all().size()); + assertEquals(1, session.execute(session.prepare(txn(select2 + " AND id = ?")).bind("foo bar baz", 0)).all().size()); + + String badSelect = String.format("SELECT * FROM %s.%s WHERE expr(?, 'foo bar baz')", KEYSPACE, table); + Assertions.assertThatThrownBy(() -> session.prepare(badSelect)) + .isInstanceOf(SyntaxError.class) + .hasMessage("Bind variables cannot be used for index names"); + Assertions.assertThatThrownBy(() -> session.prepare(txn(badSelect + " AND id = ?"))) + .isInstanceOf(SyntaxError.class) + .hasMessage("Bind variables cannot be used for index names"); } @Test @@ -678,4 +750,127 @@ private void testPrepareWithBatchLWT(ProtocolVersion version) throws Throwable row(false, 1, 10, 20, null)); assertEquals(rs.getColumnDefinitions().size(), 5); } + + @Test + public void testPrepareWithAccordV4() + { + testPrepareWithAccord(ProtocolVersion.V4); + } + + @Test + public void testPrepareWithAccordV5() + { + Assume.assumeTrue("Protocol v5 is CURRENT", ProtocolVersion.CURRENT != ProtocolVersion.V5); + testPrepareWithAccord(ProtocolVersion.V5); + } + + @Test + public void testPrepareWithAccordCurrent() + { + testPrepareWithAccord(ProtocolVersion.CURRENT); + } + + private void testPrepareWithAccord(ProtocolVersion version) + { + int maxAttempts = 3; + Session session = sessionNet(version); + session.execute("USE " + keyspace()); + createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))"); + updateTxnState(); + + PreparedStatement writeOnly = session.prepare(txn( + "INSERT INTO " + currentTable() + " (pk, v1, v2) VALUES (?, ?, ?)" + )); + PreparedStatement returnSelect = session.prepare(txn( + "SELECT * FROM " + currentTable() + " WHERE pk=?", + "UPDATE " + currentTable() + " SET v1 += 1, v2 += 2 WHERE pk = ?" + )); + PreparedStatement returnRef = session.prepare(txn( + "LET a = (SELECT * FROM " + currentTable() + " WHERE pk=?)", + "SELECT a.pk, a.v1, a.v2", + "UPDATE " + currentTable() + " SET v1 += 1, v2 += 2 WHERE pk = ?" + )); + // populate every row + int numPartitions = 5; + int[][] model = new int[numPartitions][]; + for (int writePk = 0; writePk < numPartitions; writePk++) + { + model[writePk] = new int[] {0, 0}; + assertRowsNet(version, session.execute(writeOnly.bind(writePk, 0, 0))); + } + + for (int writePk = 0; writePk < numPartitions; writePk++) + { + for (int readPk = 0; readPk < numPartitions; readPk++) + { + int[] expected = model[readPk]; + int[] mutated = model[writePk]; + for (boolean select : Arrays.asList(true, false)) + { + for (int retries = 0; retries < maxAttempts; retries++) + { + try + { + ResultSet rs = session.execute(select ? returnSelect.bind(readPk, writePk) + : returnRef.bind(readPk, writePk)); + assertRowsNet(version, rs, row(readPk, expected[0], expected[1])); + break; + } + catch (WriteTimeoutException e) + { + logger.warn("Write timeout seen", e); + if (retries >= maxAttempts - 1) throw e; + Uninterruptibles.sleepUninterruptibly(500, TimeUnit.MILLISECONDS); + } + finally + { + // update to account for counter bumps + mutated[0]++; + mutated[1] = mutated[1] + 2; + } + } + } + } + } + } + + private static String txn(String... stmts) + { + StringBuilder sb = new StringBuilder(); + sb.append("BEGIN TRANSACTION\n"); + for (String stmt : stmts) + { + sb.append(" ").append(stmt); + if (!stmt.endsWith(";")) sb.append(';'); + sb.append('\n'); + } + sb.append("COMMIT TRANSACTION"); + return sb.toString(); + } + + private static String batch(String... stmts) + { + StringBuilder sb = new StringBuilder(); + sb.append("BEGIN BATCH\n"); + for (String stmt : stmts) + { + sb.append(" ").append(stmt); + if (!stmt.endsWith(";")) sb.append(';'); + sb.append('\n'); + } + sb.append("APPLY BATCH"); + return sb.toString(); + } + + private static List columnNames(ResultSet rs) + { + return rs.getColumnDefinitions().asList().stream().map(d -> d.getName()).collect(Collectors.toList()); + } + + private static void updateTxnState() + { + //TODO Remove this method once CEP-21 and CEP-15 integrate + AccordService.instance().createEpochFromConfigUnsafe(); + AccordService.instance().setCacheSize(0); + } } diff --git a/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java b/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java index 333f3dec8e1d..fe4f73efe086 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java +++ b/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java @@ -20,14 +20,14 @@ import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.cql3.ast.Conditional.And; import org.apache.cassandra.cql3.ast.Conditional.Where; import org.apache.cassandra.db.marshal.Int32Type; import org.assertj.core.api.Assertions; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class ExpressionTest { @@ -122,4 +122,4 @@ private static Gen expressions() return rs.nextBoolean() ? Literal.of(value) : Bind.of(value); }; } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java index 771c8c357064..4d72bae35c3e 100644 --- a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java +++ b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java @@ -20,6 +20,9 @@ import java.nio.ByteBuffer; import java.util.*; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.RandomSource; import org.apache.cassandra.cql3.terms.*; import org.junit.Assert; import org.junit.Test; @@ -27,19 +30,36 @@ import org.apache.cassandra.cql3.*; import org.apache.cassandra.cql3.terms.Constants; import org.apache.cassandra.cql3.terms.MultiElements; +import org.apache.cassandra.cql3.terms.Sets; import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.cql3.terms.Terms; import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.IVersionedSerializers; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaProvider; +import org.apache.cassandra.utils.AbstractTypeGenerators; +import org.apache.cassandra.utils.AbstractTypeGenerators.TypeKind; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.TimeUUID; +import org.assertj.core.api.Assertions; +import org.mockito.Mockito; +import org.quicktheories.generators.SourceDSL; +import static accord.utilsfork.Property.qt; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; @@ -65,6 +85,17 @@ public class ColumnConditionTest public static final ByteBuffer ZERO = Int32Type.instance.fromString("0"); public static final ByteBuffer ONE = Int32Type.instance.fromString("1"); public static final ByteBuffer TWO = Int32Type.instance.fromString("2"); + public static final String KEYSPACE = "ks"; + public static final FieldIdentifier UDT_FIELD_A = FieldIdentifier.forUnquoted("a"); + public static final FieldIdentifier UDT_FIELD_B = FieldIdentifier.forUnquoted("b"); + public static final UserType UDT_FROZEN = new UserType(KEYSPACE, ByteBufferUtil.bytes("simple"), + Arrays.asList(UDT_FIELD_A, UDT_FIELD_B), + Arrays.asList(Int32Type.instance, Int32Type.instance), + false); + public static final UserType UDT_MULTI_CELL = new UserType(KEYSPACE, ByteBufferUtil.bytes("simple"), + Arrays.asList(UDT_FIELD_A, UDT_FIELD_B), + Arrays.asList(Int32Type.instance, Int32Type.instance), + true); private static Row newRow(ColumnMetadata definition, ByteBuffer value) { @@ -199,6 +230,47 @@ private static boolean conditionContainsApplies(SortedSet rowValue, return bound.appliesTo(newRow(definition, rowValue)); } + private boolean conditionUDTApplies(ByteBuffer rowValue, Operator op, ByteBuffer conditionValue) + { + boolean frozen = conditionUDTApplies(UDT_FROZEN, rowValue, op, conditionValue); + boolean multi = conditionUDTApplies(UDT_MULTI_CELL, rowValue, op, conditionValue); + Assertions.assertThat(frozen).isEqualTo(multi); + return frozen; + } + + private boolean conditionUDTApplies(UserType ut, ByteBuffer rowValue, Operator op, ByteBuffer conditionValue) + { + ColumnMetadata column = ColumnMetadata.regularColumn(KEYSPACE, "tbl", "c", ut); + ColumnCondition.ElementOrFieldAccessBound bounds = new ColumnCondition.ElementOrFieldAccessBound(column, UDT_FIELD_A.bytes, op, conditionValue); + Row row; + if (ut.isMultiCell()) + { + Row.Builder builder = BTreeRow.sortedBuilder(); + builder.newRow(Clustering.EMPTY); + if (rowValue != null) + { + builder.addCell(new BufferCell(column, + 0L, + Cell.NO_TTL, + Cell.NO_DELETION_TIME, + rowValue, + ut.cellPathForField(UDT_FIELD_A))); + builder.addCell(new BufferCell(column, + 0L, + Cell.NO_TTL, + Cell.NO_DELETION_TIME, + EMPTY_BYTE_BUFFER, + ut.cellPathForField(UDT_FIELD_B))); + } + row = builder.build(); + } + else + { + row = newRow(column, ut.pack(rowValue, EMPTY_BYTE_BUFFER)); + } + return bounds.appliesTo(row); + } + private static boolean appliesMapCondition(Map rowValue, Operator op, SortedMap conditionValue) { MapType type = MapType.getInstance(Int32Type.instance, Int32Type.instance, true); @@ -738,4 +810,157 @@ public void toCQLStringTest() assertEquals("col.f1 = ?", udtFieldCondition(col, f, EQ, Terms.Raw.of(marker)).toCQLString()); assertEquals("col.f1 = 1", udtFieldCondition(col, f, EQ, Terms.Raw.of(one)).toCQLString()); } + + @Test + public void testUDTBound() throws InvalidRequestException + { + // EQ + assertTrue(conditionUDTApplies(ONE, EQ, ONE)); + assertFalse(conditionUDTApplies(ONE, EQ, ZERO)); + assertFalse(conditionUDTApplies(ZERO, EQ, ONE)); + assertFalse(conditionUDTApplies(ONE, EQ, null)); + + assertFalse(conditionUDTApplies(ONE, EQ, null)); + assertFalse(conditionUDTApplies(null, EQ, ONE)); + assertTrue(conditionUDTApplies(null, EQ, null)); + + assertFalse(conditionUDTApplies(ONE, EQ, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, EQ, ONE)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, EQ, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + + // NEQ + assertFalse(conditionUDTApplies(ONE, NEQ, ONE)); + assertTrue(conditionUDTApplies(ONE, NEQ, ZERO)); + assertTrue(conditionUDTApplies(ZERO, NEQ, ONE)); + assertTrue(conditionUDTApplies(ONE, NEQ, null)); + assertTrue(conditionUDTApplies(null, NEQ, ONE)); + + assertTrue(conditionUDTApplies(ONE, NEQ, null)); + assertTrue(conditionUDTApplies(null, NEQ, ONE)); + assertFalse(conditionUDTApplies(null, NEQ, null)); + + assertTrue(conditionUDTApplies(ONE, NEQ, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, NEQ, ONE)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, NEQ, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + + // LT + assertFalse(conditionUDTApplies(ONE, LT, ONE)); + assertThatThrownBy(() -> conditionUDTApplies(null, LT, null)).isInstanceOf(InvalidRequestException.class); + assertFalse(conditionUDTApplies(ONE, LT, ZERO)); + assertTrue(conditionUDTApplies(ZERO, LT, ONE)); + assertThatThrownBy(() -> conditionUDTApplies(ONE, LT, null)).isInstanceOf(InvalidRequestException.class); + + assertFalse(conditionUDTApplies(ONE, LT, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, LT, ONE)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, LT, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + + // LTE + assertTrue(conditionUDTApplies(ONE, LTE, ONE)); + assertFalse(conditionUDTApplies(ONE, LTE, ZERO)); + assertTrue(conditionUDTApplies(ZERO, LTE, ONE)); + assertThatThrownBy(() -> conditionUDTApplies(ONE, LTE, null)).isInstanceOf(InvalidRequestException.class); + + assertFalse(conditionUDTApplies(ONE, LTE, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, LTE, ONE)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, LTE, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + + // GT + assertFalse(conditionUDTApplies(ONE, GT, ONE)); + assertTrue(conditionUDTApplies(ONE, GT, ZERO)); + assertFalse(conditionUDTApplies(ZERO, GT, ONE)); + assertThatThrownBy(() -> conditionUDTApplies(ONE, GT, null)).isInstanceOf(InvalidRequestException.class); + + assertTrue(conditionUDTApplies(ONE, GT, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, GT, ONE)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, GT, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + + // GTE + assertTrue(conditionUDTApplies(ONE, GTE, ONE)); + assertTrue(conditionUDTApplies(ONE, GTE, ZERO)); + assertFalse(conditionUDTApplies(ZERO, GTE, ONE)); + assertTrue(conditionUDTApplies(ONE, GTE, ONE)); + assertThatThrownBy(() -> conditionUDTApplies(ONE, GTE, null)).isInstanceOf(InvalidRequestException.class); + + assertTrue(conditionUDTApplies(ONE, GTE, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + assertFalse(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, GTE, ONE)); + assertTrue(conditionUDTApplies(ByteBufferUtil.EMPTY_BYTE_BUFFER, GTE, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + } + + @Test + public void serde() + { + DataOutputBuffer out = new DataOutputBuffer(); + qt().forAll(boundGen()).check(bounds -> { + Schema.instance = Mockito.mock(SchemaProvider.class); + Mockito.when(Schema.instance.getColumnMetadata(Mockito.eq(bounds.column.ksName), Mockito.eq(bounds.column.cfName), Mockito.eq(bounds.column.name.bytes))).thenReturn(bounds.column); + for (MessagingService.Version version : MessagingService.Version.MIN_ACCORD_VERSION.greaterThanOrEqual()) + IVersionedSerializers.testSerde(out, ColumnCondition.Bound.serializer, bounds, version.value); + }); + } + + private static Gen columnMetadataGen(ColumnCondition.BoundKind kind) + { + var typeGen = selectTypes(kind); + var columnKindGen = selectColumnKinds(kind); + return Generators.toGen(CassandraGenerators.columnMetadataGen(columnKindGen, typeGen)); + } + + private static org.quicktheories.core.Gen selectColumnKinds(ColumnCondition.BoundKind kind) + { + if (kind == ColumnCondition.BoundKind.MultiCell || kind == ColumnCondition.BoundKind.ElementOrFieldAccess) + return SourceDSL.arbitrary().pick(ColumnMetadata.Kind.STATIC, ColumnMetadata.Kind.REGULAR); + return SourceDSL.arbitrary().enumValues(ColumnMetadata.Kind.class); + } + + private static ColumnMetadata createColumnMetadata(RandomSource rs, ColumnCondition.BoundKind kind) + { + return columnMetadataGen(kind).next(rs); + } + + private static org.quicktheories.core.Gen> selectTypes(ColumnCondition.BoundKind kind) + { + switch (kind) + { + // A condition on a single non-collection column. + case Simple: + return new AbstractTypeGenerators.TypeGenBuilder().build(); + // A condition on a multicell column. + // assert column.type.isMultiCell(); + case MultiCell: + return new AbstractTypeGenerators.TypeGenBuilder().withTypeKinds(TypeKind.UDT, TypeKind.LIST, TypeKind.MAP, TypeKind.SET).withMultiCell(true).build(); + // The map key, list index or UDT fieldname. + case ElementOrFieldAccess: + return new AbstractTypeGenerators.TypeGenBuilder().withTypeKinds(TypeKind.UDT, TypeKind.LIST, TypeKind.MAP).withMultiCell(true).build(); + default: throw new UnsupportedOperationException(kind.name()); + } + } + + private static Gen boundGen() + { + Gen kindGen = Gens.enums().all(ColumnCondition.BoundKind.class); + Gen operatorGen = Gens.enums().all(Operator.class); + Gen nonNullValuesGen = Generators.toGen(Generators.bytes(1, 100)); + Gen valueGen = rs -> { + if (rs.decide(.2)) return null; + return nonNullValuesGen.next(rs); + }; + + return rs -> { + ColumnCondition.BoundKind kind = kindGen.next(rs); + ColumnMetadata metadata = createColumnMetadata(rs, kind); + Operator operator = operatorGen.next(rs); + ByteBuffer value = valueGen.next(rs); + switch (kind) + { + // A condition on a single non-collection column. + case Simple: return new ColumnCondition.SimpleBound(metadata, operator, value); + // A condition on a multicell column. + // assert column.type.isMultiCell(); + case MultiCell: return new ColumnCondition.MultiCellBound(metadata, operator, value); + // The map key, list index or UDT fieldname. + case ElementOrFieldAccess: return new ColumnCondition.ElementOrFieldAccessBound(metadata, Generators.toGen(AbstractTypeGenerators.elementAccess(metadata.type).bytesGen()).next(rs), operator, value); + default: throw new UnsupportedOperationException(kind.name()); + } + }; + } } diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 506a69e352ff..dce719a5cdb1 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -45,6 +45,7 @@ import org.apache.cassandra.transport.ProtocolVersion; import static java.lang.String.format; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.AUTH_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.METADATA_KEYSPACE_NAME; @@ -313,6 +314,7 @@ public void testDescribe() throws Throwable Object[][] testKeyspacesOutput = rows(row(KEYSPACE, "keyspace", KEYSPACE), row(KEYSPACE_PER_TEST, "keyspace", KEYSPACE_PER_TEST), row(SYSTEM_KEYSPACE_NAME, "keyspace", SYSTEM_KEYSPACE_NAME), + row(ACCORD_KEYSPACE_NAME, "keyspace", ACCORD_KEYSPACE_NAME), row(AUTH_KEYSPACE_NAME, "keyspace", AUTH_KEYSPACE_NAME), row(METADATA_KEYSPACE_NAME, "keyspace", METADATA_KEYSPACE_NAME), row(DISTRIBUTED_KEYSPACE_NAME, "keyspace", DISTRIBUTED_KEYSPACE_NAME), diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java new file mode 100644 index 000000000000..2b2750476d4c --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -0,0 +1,372 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.statements; + +import org.apache.cassandra.transport.Dispatcher; +import org.assertj.core.api.Assertions; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.exceptions.SyntaxException; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.transport.messages.ResultMessage; + +import static org.apache.cassandra.cql3.statements.TransactionStatement.DUPLICATE_TUPLE_NAME_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.EMPTY_TRANSACTION_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_LET_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_CONDITIONS_IN_UPDATES_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.SELECT_REFS_NEED_COLUMN_MESSAGE; +import static org.apache.cassandra.cql3.statements.UpdateStatement.CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE; +import static org.apache.cassandra.cql3.statements.UpdateStatement.UPDATING_PRIMARY_KEY_MESSAGE; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.cql3.transactions.RowDataReference.CANNOT_FIND_TUPLE_MESSAGE; +import static org.apache.cassandra.cql3.transactions.RowDataReference.COLUMN_NOT_IN_TUPLE_MESSAGE; +import static org.apache.cassandra.schema.TableMetadata.UNDEFINED_COLUMN_NAME_MESSAGE; + +public class TransactionStatementTest +{ + private static final TableId TABLE1_ID = TableId.fromString("00000000-0000-0000-0000-000000000001"); + private static final TableId TABLE2_ID = TableId.fromString("00000000-0000-0000-0000-000000000002"); + private static final TableId TABLE3_ID = TableId.fromString("00000000-0000-0000-0000-000000000003"); + private static final TableId TABLE4_ID = TableId.fromString("00000000-0000-0000-0000-000000000004"); + private static final TableId TABLE5_ID = TableId.fromString("00000000-0000-0000-0000-000000000005"); + + @BeforeClass + public static void beforeClass() throws Exception + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl1 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE1_ID), + parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE2_ID), + parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int)", "ks").id(TABLE3_ID), + parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list)", "ks").id(TABLE4_ID), + parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int)", "ks").id(TABLE5_ID)); + } + + @Test + public void shouldRejectReferenceSelectOutsideTxn() + { + String query = "SELECT row1.v, row2.v;"; + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("expecting K_FROM"); + } + + @Test + public void shouldRejectReferenceUpdateOutsideTxn() + { + String query = "UPDATE ks.tbl1 SET v = row2.v WHERE k=1 AND c=2;"; + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("failed predicate"); + } + + @Test + public void shouldRejectConditionalWithNoEndIf() + { + String query = "BEGIN TRANSACTION\n" + + " IF row1 IS NOT NULL AND row1.v = 3 AND row2.v=4 THEN\n" + + " UPDATE ks.tbl1 SET v=1 WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("failed predicate"); + } + + @Test + public void shouldRejectConditionalWithEndIfButNoIf() + { + String query = "BEGIN TRANSACTION\n" + + " UPDATE ks.tbl1 SET v=1 WHERE k=1 AND c=2;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("failed predicate"); + } + + @Test + public void shouldRejectLetOnlyStatement() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=2);\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessage(EMPTY_TRANSACTION_MESSAGE); + } + + @Test + public void shouldRejectEntireTupleSelect() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=2);\n" + + " SELECT row1;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessage(SELECT_REFS_NEED_COLUMN_MESSAGE); + } + + @Test + public void shouldRejectDuplicateTupleName() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=2);\n" + + " LET row1 = (SELECT * FROM ks.tbl2 WHERE k=2 AND c=2);\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(DUPLICATE_TUPLE_NAME_MESSAGE, "row1")); + } + + @Test + public void shouldRejectIllegalLimitInLet() + { + String letSelect = "SELECT * FROM ks.tbl1 WHERE k = 1 LIMIT 2"; + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (" + letSelect + ");\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect)); + } + + @Test + public void shouldRejectIllegalBindLimitInLet() + { + String letSelect = "SELECT * FROM ks.tbl1 WHERE k = 1 LIMIT ?"; + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (" + letSelect + ");\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> execute(query, 2)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect.replace("?", "2"))); + } + + @Test + public void shouldRejectIncompletePrimaryKeyInLet() + { + String letSelect = "SELECT * FROM ks.tbl1 WHERE k = 1"; + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (" + letSelect + ");\n" + + " SELECT row1.v;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect)); + } + + @Test + public void shouldRejectIllegalLimitInSelect() + { + String select = "SELECT * FROM ks.tbl1 WHERE k = 1 LIMIT 2"; + String query = "BEGIN TRANSACTION\n" + select + ";\nCOMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, select)); + } + + @Test + public void shouldRejectIncompletePrimaryKeyInSelect() + { + String select = "SELECT * FROM ks.tbl1 WHERE k = 1"; + String query = "BEGIN TRANSACTION\n" + select + ";\nCOMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, select)); + } + + @Test + public void shouldRejectUpdateWithCondition() + { + String query = "BEGIN TRANSACTION\n" + + " INSERT INTO ks.tbl1 (k, c, v) VALUES (0, 0, 1) IF NOT EXISTS;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(NO_CONDITIONS_IN_UPDATES_MESSAGE); + } + + @Test + public void shouldRejectUpdateWithCustomTimestamp() + { + String query = "BEGIN TRANSACTION\n" + + " INSERT INTO ks.tbl1 (k, c, v) VALUES (0, 0, 1) USING TIMESTAMP 1;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(NO_TIMESTAMPS_IN_UPDATES_MESSAGE); + } + + @Test + public void shouldRejectBothFullSelectAndSelectWithReferences() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=2);\n" + + " SELECT v FROM ks.tbl1 WHERE k=2 AND c=2;\n" + + " SELECT row1.v;\n" + + " IF row1 IS NOT NULL AND row1.v = 3 AND row2.v=4 THEN\n" + + " UPDATE ks.tbl1 SET v=1 WHERE k=1 AND c=2;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("no viable alternative"); + } + + @Test + public void shouldRejectPrimaryKeyValueReference() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=1);\n" + + " IF row1 IS NULL THEN\n" + + " UPDATE ks.tbl1 SET c = row1.c WHERE k=1 AND c=2;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(UPDATING_PRIMARY_KEY_MESSAGE, "c")); + } + + @Test + public void shouldRejectShorthandAssignmentToUnknownColumn() + { + String query = "BEGIN TRANSACTION\n" + + " UPDATE ks.tbl1 SET q += 1 WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(UNDEFINED_COLUMN_NAME_MESSAGE, "q", "ks.tbl1")); + } + + @Test + public void shouldRejectAdditionToUnknownColumn() + { + String query = "BEGIN TRANSACTION\n" + + " UPDATE ks.tbl1 SET v = q + 1 WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> QueryProcessor.parseStatement(query)) + .isInstanceOf(SyntaxException.class) + .hasMessageContaining("Only expressions of the form X = X + are supported."); + } + + @Test + public void shouldRejectUnknownSubstitutionTuple() + { + String query = "BEGIN TRANSACTION\n" + + " UPDATE ks.tbl1 SET v = row1.v WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(CANNOT_FIND_TUPLE_MESSAGE, "row1")); + } + + @Test + public void shouldRejectUnknownSubstitutionColumn() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl1 WHERE k=1 AND c=2);\n" + + " UPDATE ks.tbl1 SET v = row1.q WHERE k=1 AND c=2;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(COLUMN_NOT_IN_TUPLE_MESSAGE, "q", "row1")); + } + + @Test + public void shouldRejectInsertPartiitonKeyReference() + { + String query = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM ks.tbl1 WHERE k = 0 AND c = 0);\n" + + " INSERT INTO ks.tbl1 (k, c, v) VALUES (row0.k, 1, 1);\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE, "row0.k", "k")); + } + + @Test + public void shouldRejectNormalSelectWithIncompletePartitionKey() + { + String query = "BEGIN TRANSACTION\n" + + " SELECT k, v FROM ks.tbl5 LIMIT 1;\n" + + "COMMIT TRANSACTION;\n"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "SELECT v FROM ks.tbl5 LIMIT 1")); + } + + @Test + public void shouldRejectLetSelectWithIncompletePartitionKey() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT k, v FROM ks.tbl5 WHERE token(k) > token(123) LIMIT 1); \n" + + " SELECT row1.k, row1.v;\n" + + "COMMIT TRANSACTION;\n"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, "SELECT v FROM ks.tbl5 WHERE token(k) > 0000007b LIMIT 1")); + } + + private static CQLStatement prepare(String query) + { + TransactionStatement.Parsed parsed = (TransactionStatement.Parsed) QueryProcessor.parseStatement(query); + return parsed.prepare(ClientState.forInternalCalls()); + } + + private static ResultMessage execute(String query, Object... binds) + { + CQLStatement stmt = prepare(query); + return stmt.execute(QueryState.forInternalCalls(), QueryProcessor.makeInternalOptions(stmt, binds), Dispatcher.RequestTime.forImmediateExecution()); + } +} diff --git a/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java b/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java new file mode 100644 index 000000000000..8c1214a26bde --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.statements; + +import org.apache.cassandra.service.accord.txn.TxnDataName; +import org.junit.Test; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.utils.Generators; +import org.assertj.core.api.Assertions; +import org.quicktheories.core.Gen; +import org.quicktheories.generators.SourceDSL; + +import static org.apache.cassandra.utils.FailingConsumer.orFail; +import static org.quicktheories.QuickTheory.qt; + +public class TxnDataNameTest +{ + @Test + public void serde() + { + try (DataOutputBuffer out = new DataOutputBuffer()) + { + qt().forAll(gen()).checkAssert(orFail(name -> { + out.clear(); + + long expectedSize = TxnDataName.serializer.serializedSize(name, 12); + TxnDataName.serializer.serialize(name, out, 12); + Assertions.assertThat(out.getLength()).isEqualTo(expectedSize); + + TxnDataName read = TxnDataName.serializer.deserialize(new DataInputBuffer(out.toByteArray()), 12); + Assertions.assertThat(read).isEqualTo(name); + })); + } + } + + public static Gen gen() + { + Gen kindGen = SourceDSL.arbitrary().enumValues(TxnDataName.Kind.class); + Gen symbolGen = Generators.SYMBOL_GEN; + return rnd -> { + TxnDataName.Kind kind = kindGen.generate(rnd); + switch (kind) + { + case USER: return TxnDataName.user(symbolGen.generate(rnd)); + case RETURNING: return TxnDataName.returning(); + case AUTO_READ: return new TxnDataName(kind, symbolGen.generate(rnd), symbolGen.generate(rnd), symbolGen.generate(rnd)); + default: throw new IllegalArgumentException("Unknown kind: " + kind); + } + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/cql3/terms/ListsTest.java b/test/unit/org/apache/cassandra/cql3/terms/ListsTest.java index 30921a8870b1..a45c2930cba9 100644 --- a/test/unit/org/apache/cassandra/cql3/terms/ListsTest.java +++ b/test/unit/org/apache/cassandra/cql3/terms/ListsTest.java @@ -147,7 +147,6 @@ private void testPrepender_execute(List terms) ByteBuffer keyBuf = ByteBufferUtil.bytes("key"); DecoratedKey key = Murmur3Partitioner.instance.decorateKey(keyBuf); UpdateParameters parameters = new UpdateParameters(metaData, - null, ClientState.forInternalCalls(), QueryOptions.DEFAULT, System.currentTimeMillis(), diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/InsertTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/InsertTest.java index 70b8d3e9da77..f382675955c9 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/InsertTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/InsertTest.java @@ -66,7 +66,7 @@ public void testInsertZeroDuration() throws Throwable row(12, expectedDuration), row(13, expectedDuration), row(14, expectedDuration)); - assertInvalidMessage("no viable alternative at input ')' (... b) VALUES (15, [P]))","INSERT INTO %s (a, b) VALUES (15, P)"); + assertInvalid("no viable alternative at input ')' (... b) VALUES (15, [P]))","INSERT INTO %s (a, b) VALUES (15, P)"); } @Test diff --git a/test/unit/org/apache/cassandra/db/virtual/CIDRFilteringMetricsTableTest.java b/test/unit/org/apache/cassandra/db/virtual/CIDRFilteringMetricsTableTest.java index b357c00e0fb7..b9d78ac464de 100644 --- a/test/unit/org/apache/cassandra/db/virtual/CIDRFilteringMetricsTableTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/CIDRFilteringMetricsTableTest.java @@ -35,14 +35,11 @@ import com.codahale.metrics.Snapshot; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.auth.AuthCacheService; -import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.auth.AuthTestUtils; import org.apache.cassandra.auth.AuthenticatedUser; -import org.apache.cassandra.auth.CassandraRoleManager; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CIDR; import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.ConfigurationException; @@ -58,16 +55,6 @@ public class CIDRFilteringMetricsTableTest extends CQLTester { private static final String KS_NAME = "vts"; - private static void setupSuperUser() - { - QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (role, is_superuser, can_login, salted_hash) " + - "VALUES ('%s', true, true, '%s')", - AUTH_KEYSPACE_NAME, - AuthKeyspace.ROLES, - CassandraRoleManager.DEFAULT_SUPERUSER_NAME, - "xxx")); - } - @BeforeClass public static void defineSchema() throws ConfigurationException { @@ -77,7 +64,7 @@ public static void defineSchema() throws ConfigurationException new AuthTestUtils.LocalCassandraNetworkAuthorizer(), new AuthTestUtils.LocalCassandraCIDRAuthorizer()); AuthCacheService.initializeAndRegisterCaches(); - setupSuperUser(); + AuthTestUtils.setupSuperUser(); } @Before diff --git a/test/unit/org/apache/cassandra/dht/ByteOrderedPartitionerTest.java b/test/unit/org/apache/cassandra/dht/ByteOrderedPartitionerTest.java index f40c284b0e36..e4ff1bc66ef1 100644 --- a/test/unit/org/apache/cassandra/dht/ByteOrderedPartitionerTest.java +++ b/test/unit/org/apache/cassandra/dht/ByteOrderedPartitionerTest.java @@ -17,6 +17,12 @@ */ package org.apache.cassandra.dht; +import java.util.Arrays; + +import org.junit.Assert; + +import org.apache.cassandra.dht.ByteOrderedPartitioner.BytesToken; + public class ByteOrderedPartitionerTest extends PartitionerTestCase { public void initPartitioner() @@ -28,4 +34,14 @@ protected boolean shouldStopRecursion(Token left, Token right) { return false; } + + @Override + protected void checkRoundTrip(Token original, Token roundTrip) + { + BytesToken orig = (BytesToken) original; + BytesToken rt = (BytesToken) roundTrip; + Assert.assertArrayEquals(orig.token, Arrays.copyOf(rt.token, orig.token.length)); + for (int i = orig.token.length ; i < rt.token.length ; ++i) + Assert.assertEquals((byte)0, rt.token[i]); + } } diff --git a/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java b/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java index c0573d61f7dc..6cd4a1331dd9 100644 --- a/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java +++ b/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java @@ -122,6 +122,12 @@ public IPartitioner getPartitioner() return LengthPartitioner.instance; } + @Override + public int tokenHash() + { + return token.hashCode(); + } + @Override public long getHeapSize() { diff --git a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java index 07e40c571c1f..01b41d4b3b4c 100644 --- a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java +++ b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java @@ -19,17 +19,23 @@ import java.math.BigInteger; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; +import accord.primitives.Ranges; import org.apache.cassandra.db.BufferDecoratedKey; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.IntegerType; import org.apache.cassandra.db.marshal.PartitionerDefinedOrder; import org.apache.cassandra.dht.KeyCollisionTest.BigIntegerToken; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; @@ -37,7 +43,7 @@ import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; -public class LengthPartitioner implements IPartitioner +public class LengthPartitioner extends AccordSplitter implements IPartitioner { public static final BigInteger ZERO = new BigInteger("0"); public static final BigIntegerToken MINIMUM = new BigIntegerToken("-1"); @@ -184,4 +190,34 @@ public AbstractType partitionOrdering(AbstractType partitionKeyType) { return new PartitionerDefinedOrder(this, partitionKeyType); } + + @Override + public Function accordSplitter() + { + return ignore -> this; + } + + @Override + BigInteger valueForToken(Token token) + { + return ((BigIntegerToken)token).token; + } + + @Override + Token tokenForValue(BigInteger value) + { + return new BigIntegerToken(value); + } + + @Override + BigInteger minimumValue() + { + throw new UnsupportedOperationException(); + } + + @Override + BigInteger maximumValue() + { + throw new UnsupportedOperationException(); + } } diff --git a/test/unit/org/apache/cassandra/dht/OrderPreservingPartitionerTest.java b/test/unit/org/apache/cassandra/dht/OrderPreservingPartitionerTest.java index 6ab5b456b3d8..131ce13d58ea 100644 --- a/test/unit/org/apache/cassandra/dht/OrderPreservingPartitionerTest.java +++ b/test/unit/org/apache/cassandra/dht/OrderPreservingPartitionerTest.java @@ -19,10 +19,11 @@ import java.io.IOException; +import org.junit.Assert; import org.junit.BeforeClass; -import org.junit.Test; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; public class OrderPreservingPartitionerTest extends PartitionerTestCase { @@ -44,15 +45,12 @@ protected boolean shouldStopRecursion(Token left, Token right) return false; } - @Test - public void testCompare() + @Override + protected void checkRoundTrip(Token original, Token roundTrip) { - assert tok("").compareTo(tok("asdf")) < 0; - assert tok("asdf").compareTo(tok("")) > 0; - assert tok("").compareTo(tok("")) == 0; - assert tok("z").compareTo(tok("a")) > 0; - assert tok("a").compareTo(tok("z")) < 0; - assert tok("asdf").compareTo(tok("asdf")) == 0; - assert tok("asdz").compareTo(tok("asdf")) > 0; + StringToken orig = (StringToken) original; + StringToken rt = (StringToken) roundTrip; + Assert.assertEquals(orig.token, rt.token.substring(0, orig.token.length())); + Assert.assertTrue(rt.token.substring(orig.token.length()).matches("\0*")); } } diff --git a/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java b/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java index eb9733c92615..ace18db5498c 100644 --- a/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java +++ b/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.dht; +import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; @@ -24,13 +25,17 @@ import java.util.Map; import java.util.Random; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import accord.primitives.Ranges; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -216,4 +221,113 @@ private void testDescribeOwnershipWith(int numTokens) totalOwnership += ownership; assertEquals(1.0, totalOwnership, 0.001); } + + @Test + public void testCompare() + { + if (!partitioner.preservesOrder()) + return; + + assert tok("").compareTo(tok("asdf")) < 0; + assert tok("asdf").compareTo(tok("")) > 0; + assert tok("").compareTo(tok("")) == 0; + assert tok("z").compareTo(tok("a")) > 0; + assert tok("a").compareTo(tok("z")) < 0; + assert tok("asdf").compareTo(tok("asdf")) == 0; + assert tok("asdz").compareTo(tok("asdf")) > 0; + } + + @Test + public void testCompareSplitter() + { + for (int i = 0 ; i < 16 ; ++i) + { + Token a = partitioner.getRandomToken(), b = partitioner.getRandomToken(); + while (a.equals(b)) + b = partitioner.getRandomToken(); + if (a.compareTo(b) > 0) { Token tmp = a; a = b; b = tmp; } + testCompareSplitter(a, b); + } + + if (!partitioner.preservesOrder()) + return; + + testCompareSplitter(tok(""), tok("asdf")); + testCompareSplitter(tok(""), tok("")); + testCompareSplitter(tok("a"), tok("z")); + testCompareSplitter(tok("asdf"), tok("asdf")); + testCompareSplitter(tok("asd"), tok("asdf")); + testCompareSplitter(tok("asdf"), tok("asf")); + testCompareSplitter(tok("asdf"), tok("asdz")); + } + + @Test + public void testSplitter() + { + for (int i = 0 ; i < 1024 ; ++i) + { + Token a = partitioner.getRandomToken(), b = partitioner.getRandomToken(); + while (a.equals(b)) + b = partitioner.getRandomToken(); + if (a.compareTo(b) > 0) { Token tmp = a; a = b; b = tmp; } + testSplitter(a, b); + } + + if (!partitioner.preservesOrder()) + return; + + testSplitter(tok(""), tok("asdf")); + testSplitter(tok("a"), tok("z")); + testSplitter(tok("asd"), tok("asdf")); + testSplitter(tok("asdf"), tok("asdz")); + } + + void testCompareSplitter(Token less, Token more) + { + Ranges ranges; + if (less.equals(more) && less.isMinimum()) + ranges = Ranges.EMPTY; + else if (less.equals(more)) + ranges = Ranges.of(new TokenRange(new TokenKey("", partitioner.getMinimumToken()), new TokenKey("", less))); + else + ranges = Ranges.of(new TokenRange(new TokenKey("", less), new TokenKey("", more))); + + AccordSplitter splitter = partitioner.accordSplitter().apply(ranges); + BigInteger lv = splitter.valueForToken(less); + BigInteger rv = splitter.valueForToken(more); + Assert.assertEquals(less.equals(more) ? 0 : -1, normaliseCompare(lv.compareTo(rv))); + Assert.assertEquals(less.equals(more) ? 0 : 1, normaliseCompare(rv.compareTo(lv))); + checkRoundTrip(less, splitter.tokenForValue(lv)); + checkRoundTrip(more, splitter.tokenForValue(rv)); + } + + void testSplitter(Token start, Token end) + { + accord.primitives.Range range = new TokenRange(new TokenKey("", start), new TokenKey("", end)); + AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); + if (!start.isMinimum()) + testSplitter(new TokenRange(new TokenKey("", partitioner.getMinimumToken()), new TokenKey("", start))); + testSplitter(new TokenRange(new TokenKey("", start), new TokenKey("", splitter.tokenForValue(splitter.maximumValue())))); + checkRoundTrip(start, splitter.tokenForValue(splitter.valueForToken(start))); + checkRoundTrip(end, splitter.tokenForValue(splitter.valueForToken(end))); + } + + void testSplitter(accord.primitives.Range range) + { + AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); + BigInteger size = splitter.sizeOf(range); + Assert.assertEquals(range, splitter.subRange(range, BigInteger.ZERO, size)); + } + + protected void checkRoundTrip(Token original, Token roundTrip) + { + Assert.assertEquals(original, roundTrip); + } + + static int normaliseCompare(int compareResult) + { + if (compareResult < 0) return -1; + if (compareResult > 0) return 1; + return 0; + } } diff --git a/test/unit/org/apache/cassandra/gms/VersionedValueTest.java b/test/unit/org/apache/cassandra/gms/VersionedValueTest.java index ac47ee5496ce..42728040ecf4 100644 --- a/test/unit/org/apache/cassandra/gms/VersionedValueTest.java +++ b/test/unit/org/apache/cassandra/gms/VersionedValueTest.java @@ -20,7 +20,7 @@ import org.junit.Test; -import accord.utils.Gen; +import accord.utilsfork.Gen; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializers; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -28,7 +28,7 @@ import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.Generators; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class VersionedValueTest { @@ -50,4 +50,4 @@ private static Gen values() // sometimes the text is too big, must not be larger than Short.MAX_VALUE .filter(vv -> TypeSizes.encodedUTF8Length(vv.value) <= Short.MAX_VALUE); } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/index/StubIndex.java b/test/unit/org/apache/cassandra/index/StubIndex.java index cfaff698ec10..696f05062832 100644 --- a/test/unit/org/apache/cassandra/index/StubIndex.java +++ b/test/unit/org/apache/cassandra/index/StubIndex.java @@ -228,9 +228,13 @@ public ReadCommand command() } @Override - public UnfilteredPartitionIterator search(ReadExecutionController executionController) + public UnfilteredPartitionIterator search(ReadExecutionController controller) { - return Util.executeLocally((PartitionRangeReadCommand)command, baseCfs, executionController); + if (command instanceof PartitionRangeReadCommand) + return Util.executeLocally((PartitionRangeReadCommand)command, baseCfs, controller); + if (command instanceof SinglePartitionReadCommand) + return Util.executeLocally((SinglePartitionReadCommand) command, baseCfs, controller); + throw new IllegalArgumentException("Unexpected ReadCommand type: " + command.getClass()); } } } diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java index fae644f3899f..1663fbe5c3d8 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java @@ -24,8 +24,8 @@ import java.util.TreeMap; import javax.annotation.Nullable; -import accord.utils.Gen; -import accord.utils.Property; +import accord.utilsfork.Gen; +import accord.utilsfork.Property; import org.agrona.collections.IntArrayList; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.UntypedResultSet; @@ -33,7 +33,7 @@ import org.apache.cassandra.index.sai.SAITester; import org.assertj.core.api.Assertions; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public abstract class AbstractSimpleEqTestBase extends SAITester { diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java index 91bff0de97bd..9cca64d3d575 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java @@ -28,8 +28,8 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.cql3.CQL3Type; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.DecimalType; diff --git a/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java b/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java index af4b458fecc4..e1a42552d1fb 100644 --- a/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java +++ b/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java @@ -18,8 +18,8 @@ package org.apache.cassandra.io.util; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ClusteringComparator; import org.apache.cassandra.io.compress.CompressedSequentialWriter; @@ -36,7 +36,7 @@ import java.nio.file.Files; import java.util.concurrent.atomic.AtomicInteger; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class CompressedChunkReaderTest { diff --git a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java index 59d7106506d8..c2d4656ddceb 100644 --- a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java +++ b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java @@ -30,7 +30,7 @@ import org.junit.Assert; import org.junit.Test; -import accord.utils.RandomSource; +import accord.utilsfork.RandomSource; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.concurrent.SimulatedExecutorFactory; import org.apache.cassandra.config.DatabaseDescriptor; @@ -46,7 +46,7 @@ import org.apache.cassandra.utils.Backoff; import org.mockito.Mockito; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; import static org.assertj.core.api.Assertions.assertThat; public class MessageDeliveryTest diff --git a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java index f0f34888a29c..6a335f4aab90 100644 --- a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java +++ b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java @@ -29,8 +29,8 @@ import java.util.function.LongSupplier; import javax.annotation.Nullable; -import accord.utils.Gens; -import accord.utils.RandomSource; +import accord.utilsfork.Gens; +import accord.utilsfork.RandomSource; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.concurrent.AsyncPromise; diff --git a/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java b/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java index 4eba7f0e36d5..e4791f5a54f9 100644 --- a/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java @@ -24,8 +24,8 @@ import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.db.ColumnFamilyStore; @@ -35,7 +35,7 @@ import org.apache.cassandra.utils.FailingBiConsumer; import org.assertj.core.api.Assertions; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class ConcurrentIrWithPreviewFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FailedAckTest.java b/test/unit/org/apache/cassandra/repair/FailedAckTest.java index c77a812f92ef..f96bf3732db9 100644 --- a/test/unit/org/apache/cassandra/repair/FailedAckTest.java +++ b/test/unit/org/apache/cassandra/repair/FailedAckTest.java @@ -23,8 +23,8 @@ import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.db.compaction.ICompactionManager; @@ -39,7 +39,7 @@ import org.assertj.core.api.Assertions; import org.mockito.Mockito; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class FailedAckTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java index 2d1438e0302a..cc0d781898f4 100644 --- a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java @@ -29,8 +29,8 @@ import com.google.common.collect.ImmutableList; import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.locator.InetAddressAndPort; @@ -41,7 +41,7 @@ import org.assertj.core.api.AbstractStringAssert; import org.assertj.core.api.Assertions; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class FailingRepairFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 044711a617b6..9169fb4e88e8 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -55,10 +55,10 @@ import org.apache.cassandra.config.UnitConfigOverride; import org.junit.BeforeClass; -import accord.utils.DefaultRandom; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.RandomSource; +import accord.utilsfork.DefaultRandom; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.RandomSource; import org.agrona.collections.LongHashSet; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.concurrent.ExecutorBuilder; diff --git a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java index b6e34fbcf09c..b145e28fb943 100644 --- a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java @@ -23,13 +23,13 @@ import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.utils.Closeable; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class HappyPathFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java b/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java index 03c151ec683e..6160532e8fe9 100644 --- a/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java @@ -23,13 +23,13 @@ import org.junit.Test; -import accord.utils.Gen; -import accord.utils.Gens; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.utils.Closeable; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class SlowMessageFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java b/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java index e34d299a1505..acd0e0f8f822 100644 --- a/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java +++ b/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java @@ -21,7 +21,7 @@ import java.util.LinkedHashMap; import java.util.Map; -import accord.utils.Gen; +import accord.utilsfork.Gen; import com.google.common.collect.ImmutableMap; import org.apache.cassandra.config.Config; import org.apache.cassandra.utils.ConfigGenBuilder; @@ -33,7 +33,7 @@ import org.apache.cassandra.db.memtable.SkipListMemtableFactory; import org.apache.cassandra.exceptions.ConfigurationException; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; import static org.apache.cassandra.config.YamlConfigurationLoader.fromMap; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; diff --git a/test/unit/org/apache/cassandra/schema/ValidationTest.java b/test/unit/org/apache/cassandra/schema/ValidationTest.java index 8eb1247c5b0c..9d072755dd77 100644 --- a/test/unit/org/apache/cassandra/schema/ValidationTest.java +++ b/test/unit/org/apache/cassandra/schema/ValidationTest.java @@ -47,35 +47,35 @@ public void testIsNameValidNegative() assertFalse(SchemaConstants.isValidName("!")); } - private static Set primitiveTypes = - new HashSet<>(Arrays.asList(new String[] { "ascii", "bigint", "blob", "boolean", "date", - "duration", "decimal", "double", "float", - "inet", "int", "smallint", "text", "time", - "timestamp", "timeuuid", "tinyint", "uuid", - "varchar", "varint" })); + private static final Set primitiveTypes = + new HashSet<>(Arrays.asList("ascii", "bigint", "blob", "boolean", "date", + "duration", "decimal", "double", "float", + "inet", "int", "smallint", "text", "time", + "timestamp", "timeuuid", "tinyint", "uuid", + "varchar", "varint")); @Test public void typeCompatibilityTest() { Map> compatibilityMap = new HashMap<>(); - compatibilityMap.put("bigint", new HashSet<>(Arrays.asList(new String[] {"timestamp"}))); - compatibilityMap.put("blob", new HashSet<>(Arrays.asList(new String[] {"ascii", "bigint", "boolean", "date", "decimal", "double", "duration", - "float", "inet", "int", "smallint", "text", "time", "timestamp", - "timeuuid", "tinyint", "uuid", "varchar", "varint"}))); - compatibilityMap.put("date", new HashSet<>(Arrays.asList(new String[] {"int"}))); - compatibilityMap.put("time", new HashSet<>(Arrays.asList(new String[] {"bigint"}))); - compatibilityMap.put("text", new HashSet<>(Arrays.asList(new String[] {"ascii", "varchar"}))); - compatibilityMap.put("timestamp", new HashSet<>(Arrays.asList(new String[] {"bigint"}))); - compatibilityMap.put("varchar", new HashSet<>(Arrays.asList(new String[] {"ascii", "text"}))); - compatibilityMap.put("varint", new HashSet<>(Arrays.asList(new String[] {"bigint", "int", "timestamp"}))); - compatibilityMap.put("uuid", new HashSet<>(Arrays.asList(new String[] {"timeuuid"}))); + compatibilityMap.put("bigint", new HashSet<>(Arrays.asList("timestamp"))); + compatibilityMap.put("blob", new HashSet<>(Arrays.asList("ascii", "bigint", "boolean", "date", "decimal", "double", "duration", + "float", "inet", "int", "smallint", "text", "time", "timestamp", + "timeuuid", "tinyint", "uuid", "varchar", "varint"))); + compatibilityMap.put("date", new HashSet<>(Arrays.asList("int"))); + compatibilityMap.put("time", new HashSet<>(Arrays.asList("bigint"))); + compatibilityMap.put("text", new HashSet<>(Arrays.asList("ascii", "varchar"))); + compatibilityMap.put("timestamp", new HashSet<>(Arrays.asList("bigint"))); + compatibilityMap.put("varchar", new HashSet<>(Arrays.asList("ascii", "text"))); + compatibilityMap.put("varint", new HashSet<>(Arrays.asList("bigint", "int", "timestamp"))); + compatibilityMap.put("uuid", new HashSet<>(Arrays.asList("timeuuid"))); for (String sourceTypeString: primitiveTypes) { - AbstractType sourceType = CQLTypeParser.parse("KEYSPACE", sourceTypeString, Types.none()); + AbstractType sourceType = CQLTypeParser.parse("KEYSPACE", sourceTypeString, Types.none()); for (String destinationTypeString: primitiveTypes) { - AbstractType destinationType = CQLTypeParser.parse("KEYSPACE", destinationTypeString, Types.none()); + AbstractType destinationType = CQLTypeParser.parse("KEYSPACE", destinationTypeString, Types.none()); if (compatibilityMap.get(destinationTypeString) != null && compatibilityMap.get(destinationTypeString).contains(sourceTypeString) || @@ -94,19 +94,19 @@ public void typeCompatibilityTest() } @Test - public void clusteringColumnTypeCompatibilityTest() throws Throwable + public void clusteringColumnTypeCompatibilityTest() { Map> compatibilityMap = new HashMap<>(); - compatibilityMap.put("blob", new HashSet<>(Arrays.asList(new String[] {"ascii", "text", "varchar"}))); - compatibilityMap.put("text", new HashSet<>(Arrays.asList(new String[] {"ascii", "varchar"}))); - compatibilityMap.put("varchar", new HashSet<>(Arrays.asList(new String[] {"ascii", "text" }))); + compatibilityMap.put("blob", new HashSet<>(Arrays.asList("ascii", "text", "varchar"))); + compatibilityMap.put("text", new HashSet<>(Arrays.asList("ascii", "varchar"))); + compatibilityMap.put("varchar", new HashSet<>(Arrays.asList("ascii", "text"))); for (String sourceTypeString: primitiveTypes) { - AbstractType sourceType = CQLTypeParser.parse("KEYSPACE", sourceTypeString, Types.none()); + AbstractType sourceType = CQLTypeParser.parse("KEYSPACE", sourceTypeString, Types.none()); for (String destinationTypeString: primitiveTypes) { - AbstractType destinationType = CQLTypeParser.parse("KEYSPACE", destinationTypeString, Types.none()); + AbstractType destinationType = CQLTypeParser.parse("KEYSPACE", destinationTypeString, Types.none()); if (compatibilityMap.get(destinationTypeString) != null && compatibilityMap.get(destinationTypeString).contains(sourceTypeString) || diff --git a/test/unit/org/apache/cassandra/serializers/SerializationUtils.java b/test/unit/org/apache/cassandra/serializers/SerializationUtils.java index b88b56f003de..da37cb2a5a6d 100644 --- a/test/unit/org/apache/cassandra/serializers/SerializationUtils.java +++ b/test/unit/org/apache/cassandra/serializers/SerializationUtils.java @@ -49,11 +49,6 @@ public static T cycleSerialization(T src, IVersionedSerializer serializer } } - public static T cycleSerialization(T src, IVersionedSerializer serializer) - { - return cycleSerialization(src, serializer, MessagingService.current_version); - } - public static void assertSerializationCycle(T src, IVersionedSerializer serializer, int version) { T dst = cycleSerialization(src, serializer, version); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java new file mode 100644 index 000000000000..915bad76d7d1 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.collect.Iterables; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Key; +import accord.local.Command; +import accord.local.Status; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.api.PartitionKey; + +import static accord.local.Status.Durability.Durable; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.ballot; +import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; +import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.processCommandResult; +import static org.apache.cassandra.service.accord.AccordTestUtils.timestamp; +import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; + +public class AccordCommandStoreTest +{ + private static final Logger logger = LoggerFactory.getLogger(AccordCommandStoreTest.class); + + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + @Before + public void setUp() throws Exception + { + Keyspace.open(SchemaConstants.ACCORD_KEYSPACE_NAME).getColumnFamilyStores().forEach(ColumnFamilyStore::truncateBlocking); + } + + @Test + public void commandLoadSave() throws Throwable + { + AtomicLong clock = new AtomicLong(0); + PartialTxn depTxn = createPartialTxn(0); + Key key = (Key)depTxn.keys().get(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + PartialDeps dependencies; + try (PartialDeps.Builder builder = PartialDeps.builder(depTxn.covering())) + { + builder.add(key, txnId(1, clock.incrementAndGet(), 1)); + dependencies = builder.build(); + } + + QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1)"); + TxnId oldTxnId1 = txnId(1, clock.incrementAndGet(), 1); + TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1); + TxnId oldTimestamp = txnId(1, clock.incrementAndGet(), 1); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + AccordCommand command = new AccordCommand(txnId).initialize(); + command.setPartialTxn(createPartialTxn(0)); + command.homeKey(key.toUnseekable()); + command.progressKey(key.toUnseekable()); + command.setDurability(Durable); + command.setPromised(ballot(1, clock.incrementAndGet(), 1)); + command.setAccepted(ballot(1, clock.incrementAndGet(), 1)); + command.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); + command.setPartialDeps(dependencies); + command.setStatus(Status.Accepted); + command.addWaitingOnCommit(oldTxnId1); + command.addWaitingOnApplyIfAbsent(oldTxnId2, oldTimestamp); + command.storedListeners.clear(); + command.addListener(new AccordCommand(oldTxnId1)); + processCommandResult(commandStore, command); + + AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); + logger.info("E: {}", command); + Command actual = AccordKeyspace.loadCommand(commandStore, txnId); + logger.info("A: {}", actual); + + Assert.assertEquals(command, actual); + } + + @Test + public void commandsForKeyLoadSave() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + Timestamp maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); + + PartialTxn txn = createPartialTxn(1); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); + TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); + AccordCommand command1 = new AccordCommand(txnId1).initialize(); + AccordCommand command2 = new AccordCommand(txnId2).initialize(); + command1.setPartialTxn(txn); + command2.setPartialTxn(txn); + command1.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); + command2.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); + + AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + cfk.updateMax(maxTimestamp); + + Assert.assertEquals(txnId1.hlc(), cfk.timestampMicrosFor(txnId1, true)); + Assert.assertEquals(txnId2.hlc(), cfk.timestampMicrosFor(txnId2, true)); + Assert.assertEquals(txnId2, cfk.lastExecutedTimestamp.get()); + Assert.assertEquals(txnId2.hlc(), cfk.lastExecutedMicros.get()); + + cfk.register(command1); + cfk.register(command2); + + AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + logger.info("E: {}", cfk); + AccordCommandsForKey actual = AccordKeyspace.loadCommandsForKey(commandStore, key); + logger.info("A: {}", actual); + + Assert.assertEquals(cfk, actual); + } + + @Test + public void commandsForKeyBlindWitnessed() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + PartialTxn txn = createPartialTxn(1); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + AccordCommandsForKey.WriteOnly writeOnlyCfk = new AccordCommandsForKey.WriteOnly(commandStore, key); + Timestamp maxTimestamp = null; + TreeSet expected = new TreeSet<>(); + + for (int i=0; i<4; i++) + { + maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); + expected.add(maxTimestamp); + writeOnlyCfk.updateMax(maxTimestamp); + } + + AccordKeyspace.getCommandsForKeyMutation(commandStore, writeOnlyCfk, commandStore.nextSystemTimestampMicros()).apply(); + AccordCommandsForKey fullCfk = AccordKeyspace.loadCommandsForKey(commandStore, key); + + Assert.assertEquals(expected, fullCfk.blindWitnessed.getView()); + + fullCfk.applyBlindWitnessedTimestamps(); + Assert.assertEquals(maxTimestamp, fullCfk.max()); + Assert.assertTrue(fullCfk.blindWitnessed.getView().isEmpty()); + } + +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java new file mode 100644 index 000000000000..6d892465934a --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.Command; +import accord.local.Node; +import accord.local.PreLoadContext; +import accord.local.Status; +import accord.messages.Accept; +import accord.messages.Commit; +import accord.messages.PreAccept; +import accord.primitives.Ballot; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.*; + +public class AccordCommandTest +{ + + static final AtomicLong clock = new AtomicLong(0); + private static final Node.Id ID1 = new Node.Id(1); + private static final Node.Id ID2 = new Node.Id(2); + private static final Node.Id ID3 = new Node.Id(3); + + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + private static PartitionKey key(int k) + { + TableMetadata metadata = Schema.instance.getTableMetadata("ks", "tbl"); + return new PartitionKey(metadata.keyspace, metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(k))); + } + + /** + * disable cache and make sure correct values are coming in and out of the accord table + */ + @Test + public void basicCycleTest() throws ExecutionException, InterruptedException + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); }).get(); + + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + Txn txn = createTxn(1); + Key key = (Key)txn.keys().get(0); + RoutingKey homeKey = key.toUnseekable(); + FullRoute fullRoute = txn.keys().toRoute(homeKey); + PartialRoute route = fullRoute.slice(fullRange(txn)); + PartialTxn partialTxn = txn.slice(route.covering(), true); + PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); + + // Check preaccept + commandStore.execute(preAccept, instance -> { + PreAccept.PreAcceptReply reply = preAccept.apply(instance); + Assert.assertTrue(reply.isOk()); + PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; + Assert.assertEquals(txnId, ok.witnessedAt); + Assert.assertTrue(ok.deps.isEmpty()); + }).get(); + + commandStore.execute(preAccept, instance -> { + Command command = instance.command(txnId); + Assert.assertEquals(txnId, command.executeAt()); + Assert.assertEquals(Status.PreAccepted, command.status()); + Assert.assertTrue(command.partialDeps().isEmpty()); + + AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + Assert.assertEquals(txnId, cfk.max()); + Assert.assertNotNull((cfk.byId()).get(txnId)); + Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); + }).get(); + + // check accept + TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); + Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); + PartialDeps deps; + try (PartialDeps.Builder builder = PartialDeps.builder(route.covering())) + { + builder.add(key, txnId2); + deps = builder.build(); + } + Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); + + commandStore.execute(accept, instance -> { + Accept.AcceptReply reply = accept.apply(instance); + Assert.assertTrue(reply.isOk()); + Assert.assertTrue(reply.deps.isEmpty()); + }).get(); + + commandStore.execute(accept, instance -> { + Command command = instance.command(txnId); + Assert.assertEquals(executeAt, command.executeAt()); + Assert.assertEquals(Status.Accepted, command.status()); + Assert.assertEquals(deps, command.partialDeps()); + + AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + Assert.assertEquals(executeAt, cfk.max()); + Assert.assertNotNull((cfk.byId()).get(txnId)); + Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); + }).get(); + + // check commit + Commit commit = Commit.SerializerSupport.create(txnId, route, 1, executeAt, partialTxn, deps, fullRoute, null); + commandStore.execute(commit, commit::apply).get(); + + commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { + Command command = instance.command(txnId); + Assert.assertEquals(commit.executeAt, command.executeAt()); + Assert.assertTrue(command.hasBeen(Status.Committed)); + Assert.assertEquals(commit.partialDeps, command.partialDeps()); + + AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + Assert.assertNotNull((cfk.byId()).get(txnId)); + Assert.assertNotNull((cfk.byExecuteAt()).get(commit.executeAt)); + }).get(); + } + + @Test + public void computeDeps() throws Throwable + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); }).get(); + + TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); + Txn txn = createTxn(2); + Key key = (Key)txn.keys().get(0); + RoutingKey homeKey = key.toUnseekable(); + FullRoute fullRoute = txn.keys().toRoute(homeKey); + PartialRoute route = fullRoute.slice(fullRange(txn)); + PartialTxn partialTxn = txn.slice(route.covering(), true); + PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); + + commandStore.execute(preAccept1, preAccept1::apply).get(); + + // second preaccept should identify txnId1 as a dependency + TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); + PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); + commandStore.execute(preAccept2, instance -> { + PreAccept.PreAcceptReply reply = preAccept2.apply(instance); + Assert.assertTrue(reply.isOk()); + PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; + Assert.assertTrue(ok.deps.contains(txnId1)); + }).get(); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java new file mode 100644 index 000000000000..2cc2b3de18bf --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.HashSet; +import java.util.Set; + +import com.google.common.collect.ImmutableSet; +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; + +public class AccordStateCacheTest +{ + private static final long DEFAULT_ITEM_SIZE = 100; + private static final long KEY_SIZE = 4; + private static final long DEFAULT_NODE_SIZE = nodeSize(DEFAULT_ITEM_SIZE); + + private static class Item implements AccordState + { + long size = DEFAULT_ITEM_SIZE; + + final Integer key; + boolean modified = false; + boolean initialized = false; + + public Item(Integer key) + { + this.key = key; + } + + @Override + public boolean isEmpty() + { + return initialized; + } + + @Override + public Integer key() + { + return key; + } + + @Override + public boolean hasModifications() + { + return modified; + } + + @Override + public void clearModifiedFlag() + { + modified = false; + } + + @Override + public boolean isLoaded() + { + return true; + } + + @Override + public long estimatedSizeOnHeap() + { + return size + KEY_SIZE; + } + } + + private static long nodeSize(long itemSize) + { + return itemSize + KEY_SIZE + AccordStateCache.Node.EMPTY_SIZE; + } + + private static void assertCacheState(AccordStateCache cache, int active, int cached, long bytes) + { + Assert.assertEquals(active, cache.numActiveEntries()); + Assert.assertEquals(cached, cache.numCachedEntries()); + Assert.assertEquals(bytes, cache.bytesCached()); + } + + @Test + public void testAcquisitionAndRelease() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item item1 = instance.getOrCreate(1); + assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + Assert.assertNull(cache.head); + Assert.assertNull(cache.tail); + + item1.size = 110; + item1.modified = true; + instance.release(item1); + assertCacheState(cache, 0, 1, nodeSize(110)); + Assert.assertSame(item1, cache.tail.value); + Assert.assertSame(item1, cache.head.value); + + Item item2 = instance.getOrCreate(2); + assertCacheState(cache, 1, 1, DEFAULT_NODE_SIZE + nodeSize(110)); + instance.release(item2); + assertCacheState(cache, 0, 2, DEFAULT_NODE_SIZE + nodeSize(110)); + + Assert.assertSame(item1, cache.tail.value); + Assert.assertSame(item2, cache.head.value); + } + + @Test + public void testRotation() + { + AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 5); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item[] items = new Item[3]; + for (int i=0; i<3; i++) + { + Item item = instance.getOrCreate(i); + items[i] = item; + instance.release(item); + } + + Assert.assertSame(items[0], cache.tail.value); + Assert.assertSame(items[2], cache.head.value); + assertCacheState(cache, 0, 3, DEFAULT_NODE_SIZE * 3); + + Item item = instance.getOrCreate(1); + assertCacheState(cache, 1, 2, DEFAULT_NODE_SIZE * 3); + + // releasing item should return it to the head + instance.release(item); + assertCacheState(cache, 0, 3, DEFAULT_NODE_SIZE * 3); + Assert.assertSame(items[0], cache.tail.value); + Assert.assertSame(items[1], cache.head.value); + } + + @Test + public void testEvictionOnAcquire() + { + AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 5); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item[] items = new Item[5]; + for (int i=0; i<5; i++) + { + Item item = instance.getOrCreate(i); + items[i] = item; + instance.release(item); + } + + assertCacheState(cache, 0, 5, DEFAULT_NODE_SIZE * 5); + Assert.assertSame(items[0], cache.tail.value); + Assert.assertSame(items[4], cache.head.value); + + instance.getOrCreate(5); + assertCacheState(cache, 1, 4, DEFAULT_NODE_SIZE * 5); + Assert.assertSame(items[1], cache.tail.value); + Assert.assertSame(items[4], cache.head.value); + Assert.assertFalse(cache.keyIsCached(0)); + Assert.assertFalse(cache.keyIsActive(0)); + } + + @Test + public void testEvictionOnRelease() + { + AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item[] items = new Item[5]; + for (int i=0; i<5; i++) + { + Item item = instance.getOrCreate(i); + items[i] = item; + } + + assertCacheState(cache, 5, 0, DEFAULT_NODE_SIZE * 5); + Assert.assertNull(cache.head); + Assert.assertNull(cache.tail); + + instance.release(items[2]); + assertCacheState(cache, 4, 0, DEFAULT_NODE_SIZE * 4); + Assert.assertNull(cache.head); + Assert.assertNull(cache.tail); + + instance.release(items[4]); + assertCacheState(cache, 3, 1, DEFAULT_NODE_SIZE * 4); + Assert.assertSame(items[4], cache.tail.value); + Assert.assertSame(items[4], cache.head.value); + } + + @Test + public void testMultiAcquireRelease() + { + AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item item = instance.getOrCreate(0); + Assert.assertNotNull(item); + Assert.assertEquals(1, cache.references(0)); + assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + + Assert.assertNotNull(instance.getOrCreate(0)); + Assert.assertEquals(2, cache.references(0)); + assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + + instance.release(item); + assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + instance.release(item); + assertCacheState(cache, 0, 1, DEFAULT_NODE_SIZE); + } + + @Test + public void evictionBlockedOnSaveFuture() + { + AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); + AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + assertCacheState(cache, 0, 0, 0); + + Item[] items = new Item[4]; + for (int i=0; i<4; i++) + { + Item item = instance.getOrCreate(i); + items[i] = item; + instance.release(item); + } + + assertCacheState(cache, 0, 4, DEFAULT_NODE_SIZE * 4); + + AsyncPromise saveFuture = new AsyncPromise<>(); + instance.addSaveFuture(0, saveFuture); + cache.setMaxSize(0); + + // all should have been evicted except 0 + assertCacheState(cache, 0, 1, DEFAULT_NODE_SIZE); + Assert.assertTrue(cache.keyIsCached(0)); + Assert.assertFalse(cache.keyIsCached(1)); + Assert.assertFalse(cache.keyIsCached(2)); + Assert.assertFalse(cache.keyIsCached(3)); + } + + static class SetItem implements AccordState + { + final Integer key; + final Set set = new HashSet<>(); + boolean modified = false; + boolean initialized = false; + + static class WriteOnly extends SetItem implements AccordState.WriteOnly + { + AsyncPromise promise = null; + final Set added = new HashSet<>(); + final Set remove = new HashSet<>(); + + public WriteOnly(Integer key) + { + super(key); + } + + @Override + public void future(Future future) + { + Assert.assertTrue(future instanceof AsyncPromise); + this.promise = (AsyncPromise) future; + } + + @Override + public Future future() + { + return promise; + } + + @Override + public void applyChanges(SetItem instance) + { + instance.set.addAll(added); + instance.set.removeAll(remove); + } + } + + + public SetItem(Integer key) + { + this.key = key; + } + + @Override + public boolean isEmpty() + { + return initialized; + } + + @Override + public Integer key() + { + return key; + } + + @Override + public boolean hasModifications() + { + return modified; + } + + @Override + public void clearModifiedFlag() + { + this.modified = false; + } + + @Override + public boolean isLoaded() + { + return true; + } + + @Override + public long estimatedSizeOnHeap() + { + return set.size() * 100L; + } + } + + @Test + public void writeOnlyCycle() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + SetItem onDisk = new SetItem(5); + onDisk.set.addAll(ImmutableSet.of(1, 2, 3)); + Assert.assertEquals(0, instance.pendingWriteOnlyOperations(5)); + + SetItem.WriteOnly writeOnly1 = new SetItem.WriteOnly(5); + writeOnly1.added.addAll(ImmutableSet.of(4, 5)); + writeOnly1.future(new AsyncPromise<>()); + instance.addWriteOnly(writeOnly1); + Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); + + SetItem.WriteOnly writeOnly2 = new SetItem.WriteOnly(5); + writeOnly2.remove.addAll(ImmutableSet.of(2, 4)); + writeOnly2.future(new AsyncPromise<>()); + instance.addWriteOnly(writeOnly2); + Assert.assertEquals(2, instance.pendingWriteOnlyOperations(5)); + + Assert.assertNull(instance.getSaveFuture(5)); + Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); + + instance.lockWriteOnlyGroupIfExists(5); + Assert.assertTrue(instance.writeOnlyGroupIsLocked(5)); + Assert.assertEquals(ImmutableSet.of(1, 2, 3), onDisk.set); + Assert.assertTrue(instance.canEvict(5)); + + instance.applyAndRemoveWriteOnlyGroup(onDisk); + Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); + Assert.assertEquals(ImmutableSet.of(1, 3, 5), onDisk.set); + + // write only futures should have been merged and promoted to normal save futures, which would + // prevent the cached object from being purged until they were completed + Future saveFuture = instance.getSaveFuture(5); + Assert.assertNotNull(saveFuture); + Assert.assertFalse(saveFuture.isDone()); + Assert.assertFalse(instance.canEvict(5)); + + writeOnly1.promise.setSuccess(null); + Assert.assertFalse(saveFuture.isDone()); + Assert.assertFalse(instance.canEvict(5)); + + writeOnly2.promise.setSuccess(null); + Assert.assertTrue(saveFuture.isDone()); + Assert.assertTrue(instance.canEvict(5)); + } + + // write only operations should not be purged out of order + @Test + public void writeOnlyPurging() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + SetItem.WriteOnly[] writeOnly = new SetItem.WriteOnly[4]; + for (int i=0; i()); + instance.addWriteOnly(item); + writeOnly[i] = item; + } + + Assert.assertEquals(4, instance.pendingWriteOnlyOperations(5)); + + // finishing the first item should cause it to be purged + writeOnly[0].promise.setSuccess(null); + instance.purgeWriteOnly(5); + Assert.assertEquals(3, instance.pendingWriteOnlyOperations(5)); + + // finishing the second item should not, since the (now) first item has not completed + writeOnly[2].promise.setSuccess(null); + instance.purgeWriteOnly(5); + Assert.assertEquals(3, instance.pendingWriteOnlyOperations(5)); + + // then finishing the first item should cause both items to be purged + writeOnly[1].promise.setSuccess(null); + instance.purgeWriteOnly(5); + Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); + } + + @Test + public void writeOnlyPurgedLock() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + + SetItem.WriteOnly item = new SetItem.WriteOnly(5); + item.added.add(0); + item.future(new AsyncPromise<>()); + instance.addWriteOnly(item); + + instance.lockWriteOnlyGroupIfExists(5); + + // the write only item should not be purged, even though it's complete + item.promise.setSuccess(null); + instance.purgeWriteOnly(5); + Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); + } + + // if a load future exists for the key we're creating a write group for, we need to lock + // the group so the loading instance gets changes applied when it finishes loading + @Test + public void testLoadFutureAutoLocksWriteOnlyInstances() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + + AsyncPromise loadfuture = new AsyncPromise<>(); + instance.setLoadFuture(5, loadfuture); + + Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); + Assert.assertEquals(0, instance.pendingWriteOnlyOperations(5)); + + // adding a write only object should immediately lock the group, since there's an existing load future + SetItem.WriteOnly item = new SetItem.WriteOnly(5); + item.added.add(0); + item.future(new AsyncPromise<>()); + instance.addWriteOnly(item); + + Assert.assertTrue(instance.writeOnlyGroupIsLocked(5)); + Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); + } + + // if a future is added and another one exists for the same key, they should be merged + @Test + public void testFutureMerging() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + AsyncPromise promise1 = new AsyncPromise<>(); + AsyncPromise promise2 = new AsyncPromise<>(); + instance.addSaveFuture(5, promise1); + instance.addSaveFuture(5, promise2); + + Future future = instance.getSaveFuture(5); + Assert.assertNotSame(future, promise1); + Assert.assertNotSame(future, promise2); + + Assert.assertFalse(future.isDone()); + + promise1.setSuccess(null); + Assert.assertFalse(future.isDone()); + + promise2.setSuccess(null); + Assert.assertTrue(future.isDone()); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java new file mode 100644 index 000000000000..531b513fa6d2 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.function.LongSupplier; + +import javax.annotation.Nullable; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import org.junit.Assert; + +import accord.api.Data; +import accord.api.ProgressLog; +import accord.api.RoutingKey; +import accord.api.Write; +import accord.impl.InMemoryCommandStore; +import accord.local.Command; +import accord.local.CommandStores; +import accord.local.Node; +import accord.local.Node.Id; +import accord.local.NodeTimeService; +import accord.local.PreLoadContext; +import accord.local.Status.Known; +import accord.primitives.Ballot; +import accord.primitives.Ranges; +import accord.primitives.Keys; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Unseekables; +import accord.primitives.Writes; +import accord.topology.Shard; +import accord.topology.Topology; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.statements.TransactionStatement; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +import static accord.primitives.Routable.Domain.Key; +import static java.lang.String.format; + +public class AccordTestUtils +{ + public static Id localNodeId() + { + return EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + } + + public static final ProgressLog NOOP_PROGRESS_LOG = new ProgressLog() + { + @Override public void unwitnessed(TxnId txnId, RoutingKey homeKey, ProgressShard shard) {} + @Override public void preaccepted(Command command, ProgressShard progressShard) {} + @Override public void accepted(Command command, ProgressShard progressShard) {} + @Override public void committed(Command command, ProgressShard progressShard) {} + @Override public void readyToExecute(Command command, ProgressShard progressShard) {} + @Override public void executed(Command command, ProgressShard progressShard) {} + @Override public void invalidated(Command command, ProgressShard progressShard) {} + @Override public void durable(Command command, Set persistedOn) {} + @Override public void durable(TxnId txnId, @Nullable Unseekables someKeys, ProgressShard shard) {} + @Override public void durableLocal(TxnId txnId) {} + @Override public void waiting(TxnId blockedBy, Known blockedUntil, Unseekables blockedOn) {} + }; + + public static TxnId txnId(long epoch, long hlc, int node) + { + return new TxnId(epoch, hlc, Txn.Kind.Write, Key, new Node.Id(node)); + } + + public static Timestamp timestamp(long epoch, long hlc, int node) + { + return Timestamp.fromValues(epoch, hlc, new Node.Id(node)); + } + + public static Ballot ballot(long epoch, long hlc, int node) + { + return Ballot.fromValues(epoch, hlc, new Node.Id(node)); + } + + /** + * does the reads, writes, and results for a command without the consensus + */ + public static void processCommandResult(AccordCommandStore commandStore, Command command) throws Throwable + { + + commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), command.partialTxn().keys()), + instance -> { + PartialTxn txn = command.partialTxn(); + TxnRead read = (TxnRead) txn.read(); + Data readData = read.keys().stream() + .map(key -> { + try + { + return read.read(key, command.txnId().rw(), instance, command.executeAt(), null).get(); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + }) + .reduce(null, TxnData::merge); + Write write = txn.update().apply(readData); + ((AccordCommand)command).setWrites(new Writes(command.executeAt(), (Keys)txn.keys(), write)); + ((AccordCommand)command).setResult(txn.query().compute(command.txnId(), readData, txn.read(), txn.update())); + }).get(); + } + + public static Txn createTxn(String query) + { + return createTxn(query, QueryOptions.DEFAULT); + } + + public static Txn createTxn(String query, List binds) + { + TransactionStatement statement = parse(query); + QueryOptions options = QueryProcessor.makeInternalOptions(statement, binds.toArray(new Object[binds.size()])); + return statement.createTxn(ClientState.forInternalCalls(), options); + } + + public static Txn createTxn(String query, QueryOptions options) + { + TransactionStatement statement = parse(query); + return statement.createTxn(ClientState.forInternalCalls(), options); + } + + public static TransactionStatement parse(String query) + { + TransactionStatement.Parsed parsed = (TransactionStatement.Parsed) QueryProcessor.parseStatement(query); + Assert.assertNotNull(parsed); + TransactionStatement statement = (TransactionStatement) parsed.prepare(ClientState.forInternalCalls()); + return statement; + } + + public static Txn createTxn(int readKey, int... writeKeys) + { + StringBuilder sb = new StringBuilder("BEGIN TRANSACTION\n"); + sb.append(format("LET row1 = (SELECT * FROM ks.tbl WHERE k=%s AND c=0);\n", readKey)); + sb.append("SELECT row1.v;\n"); + sb.append("IF row1 IS NULL THEN\n"); + for (int key : writeKeys) + sb.append(format("INSERT INTO ks.tbl (k, c, v) VALUES (%s, 0, 1);\n", key)); + sb.append("END IF\n"); + sb.append("COMMIT TRANSACTION"); + return createTxn(sb.toString()); + } + + public static Txn createTxn(int key) + { + return createTxn(key, key); + } + + public static Ranges fullRange(Txn txn) + { + PartitionKey key = (PartitionKey) txn.keys().get(0); + return Ranges.of(TokenRange.fullRange(key.keyspace())); + } + + public static PartialTxn createPartialTxn(int key) + { + Txn txn = createTxn(key, key); + Ranges ranges = fullRange(txn); + return new PartialTxn.InMemory(ranges, txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update()); + } + + private static class SingleEpochRanges extends CommandStores.RangesForEpochHolder + { + private final Ranges ranges; + + public SingleEpochRanges(Ranges ranges) + { + this.ranges = ranges; + this.current = new CommandStores.RangesForEpoch(1, ranges); + } + } + + public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongSupplier now, String keyspace, String table) + { + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + TokenRange range = TokenRange.fullRange(metadata.keyspace); + Node.Id node = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); + NodeTimeService time = new NodeTimeService() + { + @Override public Id id() { return node;} + @Override public long epoch() {return 1; } + @Override public long now() {return now.getAsLong(); } + @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } + }; + return new InMemoryCommandStore.Synchronized(0, + time, + new AccordAgent(), + null, + cs -> null, + new SingleEpochRanges(Ranges.of(range))); + } + + public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupplier now, Topology topology) + { + NodeTimeService time = new NodeTimeService() + { + @Override public Id id() { return node;} + @Override public long epoch() {return 1; } + @Override public long now() {return now.getAsLong(); } + @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } + }; + return new AccordCommandStore(0, + time, + new AccordAgent(), + null, + cs -> NOOP_PROGRESS_LOG, + new SingleEpochRanges(topology.rangesForNode(node))); + } + public static AccordCommandStore createAccordCommandStore(LongSupplier now, String keyspace, String table) + { + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + TokenRange range = TokenRange.fullRange(metadata.keyspace); + Node.Id node = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); + AccordCommandStore store = createAccordCommandStore(node, now, topology); + store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCacheSize(1 << 20)); + return store; + } + + public static void execute(AccordCommandStore commandStore, Runnable runnable) + { + try + { + commandStore.executor().submit(runnable).get(); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e.getCause()); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java new file mode 100644 index 000000000000..9c8540ebda8c --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.topology.Topology; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.BufferDecoratedKey; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; + +public class AccordTopologyTest +{ + @BeforeClass + public static void beforeClass() throws Throwable + { + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + @Test + public void minMaxTokenTest() + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + Topology topology = AccordTopologyUtils.createTopology(1); + Assert.assertNotEquals(0, topology.size()); + TableId tableId = Schema.instance.getTableMetadata("ks", "tbl").id; + Token minToken = partitioner.getMinimumToken(); + Token maxToken = partitioner.getMaximumToken(); + +// topology.forKey(new AccordKey.TokenKey(tableId, minToken.minKeyBound())); + topology.forKey(new PartitionKey("ks", tableId, new BufferDecoratedKey(minToken, ByteBufferUtil.bytes(0))).toUnseekable()); +// topology.forKey(new AccordKey.TokenKey(tableId, minToken.maxKeyBound())); +// topology.forKey(new AccordKey.TokenKey(tableId, maxToken.minKeyBound())); + topology.forKey(new PartitionKey("ks", tableId, new BufferDecoratedKey(maxToken, ByteBufferUtil.bytes(0))).toUnseekable()); +// topology.forKey(new AccordKey.TokenKey(tableId, maxToken.maxKeyBound())); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java b/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java new file mode 100644 index 000000000000..c35fbce0d401 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import org.junit.Assert; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Node; +import org.apache.cassandra.locator.InetAddressAndPort; + +public class EndpointMappingTest +{ + private static final Logger logger = LoggerFactory.getLogger(EndpointMappingTest.class); + + @Test + public void identityTest() throws Throwable + { + InetAddressAndPort endpoint = InetAddressAndPort.getByName("127.0.0.1"); + Node.Id id = EndpointMapping.endpointToId(endpoint); + Assert.assertEquals(endpoint, EndpointMapping.idToEndpoint(id)); + logger.info("{} -> {}", endpoint, id); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java new file mode 100644 index 000000000000..64feaedc982d --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.SerializerTestUtils; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; + +public class AccordKeyTest +{ + private static final TableId TABLE1 = TableId.fromString("00000000-0000-0000-0000-000000000001"); + private static final TableId TABLE2 = TableId.fromString("00000000-0000-0000-0000-000000000002"); + + @BeforeClass + public static void setupClass() + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl1 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE1), + parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE2)); + + } + + public static IPartitioner partitioner(TableId tableId) + { + return Schema.instance.getTableMetadata(tableId).partitioner; + } + + @Test + public void partitionKeyTest() + { + DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk = new PartitionKey("ks", TABLE1, dk); + SerializerTestUtils.assertSerializerIOEquality(pk, PartitionKey.serializer); + } + + @Test + public void tokenKeyTest() + { + DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + TokenKey pk = new TokenKey("", dk.getToken()); + SerializerTestUtils.assertSerializerIOEquality(pk, TokenKey.serializer); + } + + @Test + public void comparisonTest() + { + DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk = new PartitionKey("", TABLE1, dk); + TokenKey tk = new TokenKey("", dk.getToken()); + TokenKey tkLow = new TokenKey("", dk.getToken().decreaseSlightly()); + TokenKey tkHigh = new TokenKey("", dk.getToken().increaseSlightly()); + + Assert.assertTrue(tk.compareTo(pk) > 0); + Assert.assertTrue(tkLow.compareTo(pk) < 0); + Assert.assertTrue(pk.compareTo(tkHigh) < 0); + } + + @Test + public void tableComparisonTest() + { + Assert.assertTrue(TABLE1.compareTo(TABLE2) < 0); + + DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk1 = new PartitionKey("", TABLE1, dk1); + + DecoratedKey dk2 = partitioner(TABLE2).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk2 = new PartitionKey("", TABLE2, dk2); + + Assert.assertTrue(pk1.compareTo(pk2) < 0); + } + + @Test + public void keyspaceComparisonTest() + { + DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk1 = new PartitionKey("a", TABLE1, dk1); + + DecoratedKey dk2 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk2 = new PartitionKey("b", TABLE1, dk2); + + Assert.assertTrue(pk1.compareTo(pk2) < 0); + } + + @Test + public void sentinelComparisonTest() + { + Assert.assertTrue(TABLE1.compareTo(TABLE2) < 0); + DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk1 = new PartitionKey("a", TABLE1, dk1); + + DecoratedKey dk2 = partitioner(TABLE2).decorateKey(ByteBufferUtil.bytes(5)); + PartitionKey pk2 = new PartitionKey("b", TABLE2, dk2); + + SentinelKey loSentinel = SentinelKey.min("a"); + SentinelKey hiSentinel = SentinelKey.max("a"); + Assert.assertTrue(loSentinel.compareTo(hiSentinel) < 0); + Assert.assertTrue(pk1.compareTo(loSentinel) > 0); + Assert.assertTrue(loSentinel.compareTo(pk1) < 0); + Assert.assertTrue(pk1.compareTo(hiSentinel) < 0); + Assert.assertTrue(hiSentinel.compareTo(pk1) > 0); + Assert.assertTrue(hiSentinel.compareTo(pk2) < 0); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java new file mode 100644 index 000000000000..c5bc97984d2e --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.Collections; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Status; +import accord.primitives.PartialTxn; +import accord.primitives.TxnId; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; + +import static com.google.common.collect.Iterables.getOnlyElement; +import static java.util.Collections.singleton; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; +import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.execute; +import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; + +public class AsyncLoaderTest +{ + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + /** + * Loading a cached resource shoudln't block + */ + @Test + public void cachedTest() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + // acquire / release + AccordCommand command = commandCache.getOrCreate(txnId).initialize(); + command.setPartialTxn(txn); + commandCache.release(command); + AccordCommandsForKey cfk = cfkCacche.getOrCreate(key).initialize(); + cfkCacche.release(cfk); + + AsyncContext context = new AsyncContext(); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + + // everything is cached, so the loader should return immediately + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(result); + }); + + Assert.assertSame(command, context.commands.get(txnId)); + Assert.assertSame(cfk, context.commandsForKey.get(key)); + } + + /** + * Loading a cached resource should block + */ + @Test + public void loadTest() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + // create / persist + AccordCommand command = new AccordCommand(txnId).initialize(); + command.setPartialTxn(txn); + AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); + AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + + // resources are on disk only, so the loader should suspend... + AsyncContext context = new AsyncContext(); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncPromise cbFired = new AsyncPromise<>(); + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> { + Assert.assertNull(t); + cbFired.setSuccess(null); + }); + Assert.assertFalse(result); + }); + + cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); + + // then return immediately after the callback has fired + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(result); + }); + } + + /** + * Test when some resources are cached and others need to be loaded + */ + @Test + public void partialLoadTest() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + // acquire /release, create / persist + AccordCommand command = commandCache.getOrCreate(txnId).initialize(); + command.setPartialTxn(txn); + commandCache.release(command); + AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + + // resources are on disk only, so the loader should suspend... + AsyncContext context = new AsyncContext(); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncPromise cbFired = new AsyncPromise<>(); + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> { + Assert.assertNull(t); + cbFired.setSuccess(null); + }); + Assert.assertFalse(result); + }); + + cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); + + // then return immediately after the callback has fired + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(result); + }); + } + + /** + * If another process is loading a resource, piggyback on it's future + */ + @Test + public void inProgressLoadTest() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + // acquire / release + AccordCommand command = commandCache.getOrCreate(txnId).initialize(); + command.setPartialTxn(txn); + commandCache.release(command); + AccordCommandsForKey cfk = cfkCacche.getOrCreate(key).initialize(); + cfkCacche.release(cfk); + + AsyncContext context = new AsyncContext(); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + + // since there's a read future associated with the txnId, we'll wait for it to load + AsyncPromise readFuture = new AsyncPromise<>(); + commandCache.setLoadFuture(command.txnId(), readFuture); + + AsyncPromise cbFired = new AsyncPromise<>(); + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> { + Assert.assertNull(t); + cbFired.setSuccess(null); + }); + Assert.assertFalse(result); + }); + + Assert.assertFalse(cbFired.isSuccess()); + readFuture.setSuccess(null); + cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); + Assert.assertTrue(cbFired.isSuccess()); + + // then return immediately after the callback has fired + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(result); + }); + } + + @Test + public void pendingWriteOnlyApplied() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + TxnId blockApply = txnId(1, clock.incrementAndGet(), 1); + TxnId blockCommit = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + + AccordCommand command = new AccordCommand(txnId).initialize(); + command.setPartialTxn(txn); + command.setExecuteAt(txnId); + command.setStatus(Status.Committed); + AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); + command.clearModifiedFlag(); + + execute(commandStore, () -> { + AccordStateCache.Instance cache = commandStore.commandCache(); + AccordCommand.WriteOnly writeOnly1 = new AccordCommand.WriteOnly(txnId); + writeOnly1.blockingApplyOn.blindAdd(blockApply); + writeOnly1.future(new AsyncPromise<>()); + cache.addWriteOnly(writeOnly1); + + AccordCommand.WriteOnly writeOnly2 = new AccordCommand.WriteOnly(txnId); + writeOnly2.blockingCommitOn.blindAdd(blockCommit); + writeOnly2.future(new AsyncPromise<>()); + cache.addWriteOnly(writeOnly2); + + AsyncContext context = new AsyncContext(); + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId), Collections.emptyList()); + while (true) + { + if (loader.load(context, (o, t) -> Assert.assertNull(t))) + break; + } + AccordCommand loaded = context.commands.get(txnId); + + Assert.assertEquals(txnId, loaded.executeAt()); + Assert.assertEquals(Status.Committed, loaded.status()); + Assert.assertEquals(blockApply, Iterables.getOnlyElement(loaded.blockingApplyOn.getView())); + Assert.assertEquals(blockCommit, Iterables.getOnlyElement(loaded.blockingCommitOn.getView())); + }); + } + + @Test + public void failedLoadTest() throws Throwable + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); + TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); + + AsyncPromise promise1 = new AsyncPromise<>(); + AsyncPromise promise2 = new AsyncPromise<>(); + AsyncPromise callback = new AsyncPromise<>(); + RuntimeException failure = new RuntimeException(); + + execute(commandStore, () -> { + AsyncContext context = new AsyncContext(); + AtomicInteger loadCalls = new AtomicInteger(); + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Collections.emptyList()){ + @Override + Function> loadCommandFunction(Object callback) + { + return cmd -> { + TxnId txnId = cmd.txnId(); + loadCalls.incrementAndGet(); + if (txnId.equals(txnId1)) + return promise1; + if (txnId.equals(txnId2)) + return promise2; + throw new AssertionError("Unknown txnId: " + txnId); + }; + } + }; + + boolean result = loader.load(context, (u, t) -> { + Assert.assertFalse(callback.isDone()); + Assert.assertNull(u); + Assert.assertEquals(failure, t); + callback.trySuccess(null); + }); + Assert.assertFalse(result); + Assert.assertEquals(2, loadCalls.get()); + }); + + promise1.tryFailure(failure); + callback.get(); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java new file mode 100644 index 000000000000..4a71bff6162d --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; + +import com.google.common.collect.Iterables; +import com.google.common.util.concurrent.Futures; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Command; +import accord.local.SafeCommandStore; +import accord.local.Status; +import accord.primitives.Keys; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.transform.FilteredPartitions; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.FBUtilities; + +import static accord.local.PreLoadContext.contextFor; +import static java.util.Collections.emptyList; +import static java.util.Collections.singleton; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; +import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; + +public class AsyncOperationTest +{ + private static final AtomicLong clock = new AtomicLong(0); + + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + @Before + public void before() + { + QueryProcessor.executeInternal("TRUNCATE system_accord.commands"); + QueryProcessor.executeInternal("TRUNCATE system_accord.commands_for_key"); + } + + /** + * Commands which were not previously on disk and were only accessed via `ifPresent`, and therefore, + * not initialized, should not be saved at the end of the operation + */ + @Test + public void optionalCommandTest() throws Throwable + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + Txn txn = createTxn((int)clock.incrementAndGet()); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + commandStore.execute(contextFor(txnId), instance -> { + Command command = instance.ifPresent(txnId); + Assert.assertNull(command); + }).get(); + + UntypedResultSet result = AccordKeyspace.loadCommandRow(commandStore, txnId); + Assert.assertTrue(result.isEmpty()); + } + + @Test + public void optionalCommandsForKeyTest() throws Throwable + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + Txn txn = createTxn((int)clock.incrementAndGet()); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + + commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)),instance -> { + AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).maybeCommandsForKey(key); + Assert.assertNull(cfk); + }).get(); + + long nowInSeconds = FBUtilities.nowInSeconds(); + SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore, key, nowInSeconds); + try(ReadExecutionController controller = command.executionController(); + FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) + { + Assert.assertFalse(partitions.hasNext()); + } + } + + private static AccordCommand createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) + { + AccordCommand command = new AccordCommand(txnId).initialize(); + command.setPartialTxn(createPartialTxn(0)); + command.setExecuteAt(executeAt); + command.setStatus(Status.Committed); + AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); + command.clearModifiedFlag(); + return command; + } + + private static AccordCommand createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId) + { + return createCommittedAndPersist(commandStore, txnId, txnId); + } + + private static void assertFutureState(AccordStateCache.Instance cache, TxnId txnId, boolean expectLoadFuture, boolean expectSaveFuture) + { + if (cache.hasLoadFuture(txnId) != expectLoadFuture) + throw new AssertionError(expectLoadFuture ? "Load future unexpectedly not found for " + txnId + : "Unexpectedly found load future for " + txnId); + if (cache.hasSaveFuture(txnId) != expectSaveFuture) + throw new AssertionError(expectSaveFuture ? "Save future unexpectedly not found for " + txnId + : "Unexpectedly found save future for " + txnId); + + } + + /** + * save and load futures should be cleaned up as part of the operation + */ + @Test + public void testFutureCleanup() + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + + AccordCommand command = createCommittedAndPersist(commandStore, txnId); + + Consumer consumer = instance -> ((AccordCommand)instance.command(txnId)).setStatus(Status.PreApplied); + AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, singleton(txnId), emptyList(), consumer) + { + + private AccordStateCache.Instance cache() + { + return commandStore.commandCache(); + } + + @Override + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + { + return new AsyncLoader(commandStore, txnIds, keys) { + + @Override + void state(State state) + { + switch (state) + { + case SETUP: + case FINISHED: + assertFutureState(cache(), txnId, false, false); + break; + case LOADING: + assertFutureState(cache(), txnId, true, false); + } + super.state(state); + } + }; + } + + @Override + AsyncWriter createAsyncWriter(AccordCommandStore commandStore) + { + return new AsyncWriter(commandStore) { + + @Override + void setState(State state) + { + switch (state) + { + case SETUP: + case FINISHED: + assertFutureState(cache(), txnId, false, false); + break; + case SAVING: + assertFutureState(cache(), txnId, false, true); + + } + super.setState(state); + } + }; + } + }; + + commandStore.executor().submit(operation); + + Futures.getUnchecked(operation); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java new file mode 100644 index 000000000000..8526daa67552 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Command; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommand; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordPartialCommand; +import org.apache.cassandra.service.accord.api.PartitionKey; + +import static accord.local.PreLoadContext.contextFor; +import static com.google.common.collect.Iterables.getOnlyElement; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.execute; +import static org.apache.cassandra.service.accord.AccordTestUtils.fullRange; +import static org.apache.cassandra.service.accord.AccordTestUtils.timestamp; +import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; + +public class AsyncWriterTest +{ + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + private static void save(AccordCommandStore commandStore, AsyncContext context) + { + execute(commandStore, () -> { + AsyncWriter writer = new AsyncWriter(commandStore); + while (true) + { + if (writer.save(context, (o, t) -> Assert.assertNull(t))) + break; + } + }); + context.commands.items.values().forEach(AccordCommand::clearModifiedFlag); + context.commandsForKey.items.values().forEach(AccordCommandsForKey::clearModifiedFlag); + } + + @Test + public void waitingOnDenormalization() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + TxnId blockingId = txnId(1, clock.incrementAndGet(), 1); + TxnId waitingId = txnId(1, clock.incrementAndGet(), 1); + Txn txn = createTxn(0); + Ranges ranges = fullRange(txn); + AccordCommand blocking = new AccordCommand(blockingId).initialize(); + blocking.setPartialTxn(txn.slice(ranges, true)); + blocking.setExecuteAt(blockingId); + blocking.setStatus(Status.Committed); + AccordKeyspace.getCommandMutation(commandStore, blocking, commandStore.nextSystemTimestampMicros()).apply(); + blocking.clearModifiedFlag(); + + AccordCommand waiting = new AccordCommand(waitingId).initialize(); + waiting.setPartialTxn(txn.slice(ranges, true)); + waiting.setExecuteAt(waitingId); + waiting.setStatus(Status.Committed); + AccordKeyspace.getCommandMutation(commandStore, waiting, commandStore.nextSystemTimestampMicros()).apply(); + waiting.clearModifiedFlag(); + + AsyncContext context = new AsyncContext(); + waiting.addWaitingOnApplyIfAbsent(blocking.txnId(), blocking.executeAt()); + context.commands.add(waiting); + save(commandStore, context); + + // load the blocking command and confirm the waiting command is listed as being blocked + blocking = AccordKeyspace.loadCommand(commandStore, blockingId); + Assert.assertTrue(blocking.blockingApplyOn.getView().contains(waitingId)); + + // now change the blocking command and check its changes are reflected in the waiting command + context = new AsyncContext(); + blocking.setStatus(Status.ReadyToExecute); + context.commands.add(blocking); + save(commandStore, context); + + waiting = AccordKeyspace.loadCommand(commandStore, waitingId); + AccordCommand waitingFinal = waiting; + execute(commandStore, () -> { + AsyncContext ctx = new AsyncContext(); + commandStore.setContext(ctx); + TxnId blockingSummary = waitingFinal.firstWaitingOnApply(null); + Assert.assertEquals(blockingId, blockingSummary); + commandStore.unsetContext(ctx); + }); + } + + @Test + public void commandsPerKeyDenormalization() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); + Txn txn = createTxn(0); + Ranges ranges = fullRange(txn); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + + AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + Assert.assertTrue(cfk.byExecuteAt.isEmpty()); + Assert.assertTrue(cfk.byId.isEmpty()); + + AccordCommand command = new AccordCommand(txnId).initialize(); + command.setPartialTxn(txn.slice(ranges, true)); + command.setExecuteAt(executeAt); + command.setSaveStatus(SaveStatus.AcceptedWithDefinition); + AsyncContext context = new AsyncContext(); + context.commands.add(command); + save(commandStore, context); + + AccordCommandsForKey cfkUncommitted = AccordKeyspace.loadCommandsForKey(commandStore, key); + execute(commandStore, () -> { + AsyncContext ctx = new AsyncContext(); + commandStore.setContext(ctx); + AccordPartialCommand summary = getOnlyElement(cfkUncommitted.byId().all().collect(Collectors.toList())); + Assert.assertTrue(cfkUncommitted.byId.map.getView().containsKey(txnId)); + Assert.assertTrue(cfkUncommitted.byExecuteAt.map.getView().containsKey(executeAt)); + Assert.assertEquals(Status.Accepted, summary.status()); + Assert.assertEquals(executeAt, summary.executeAt()); + commandStore.unsetContext(ctx); + }); + + // commit, summary should be moved to committed maps + command.setStatus(Status.Committed); + context = new AsyncContext(); + context.commands.add(command); + save(commandStore, context); + + AccordCommandsForKey cfkCommitted = AccordKeyspace.loadCommandsForKey(commandStore, key); + execute(commandStore, () -> { + AsyncContext ctx = new AsyncContext(); + commandStore.setContext(ctx); + AccordPartialCommand idSummary = getOnlyElement(cfkCommitted.byId().all().collect(Collectors.toList())); + AccordPartialCommand executeSummary = getOnlyElement(cfkCommitted.byExecuteAt().all().collect(Collectors.toList())); + + Assert.assertTrue(cfkCommitted.byId.map.getView().containsKey(txnId)); + Assert.assertTrue(cfkCommitted.byExecuteAt.map.getView().containsKey(executeAt)); + Assert.assertEquals(idSummary, executeSummary); + + Assert.assertEquals(Status.Committed, idSummary.status()); + Assert.assertEquals(executeAt, idSummary.executeAt()); + commandStore.unsetContext(ctx); + }); + } + + @Test + public void partialCommandDenormalization() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + TxnId blockingId = txnId(1, clock.incrementAndGet(), 1); + TxnId waitingId = txnId(1, clock.incrementAndGet(), 1); + Txn txn = createTxn(0); + Ranges ranges = fullRange(txn); + + { + AccordCommand blocking = new AccordCommand(blockingId).initialize(); + blocking.setPartialTxn(txn.slice(ranges, true)); + blocking.setExecuteAt(blockingId); + blocking.setStatus(Status.Committed); + + AccordCommand waiting = new AccordCommand(waitingId).initialize(); + waiting.setPartialTxn(txn.slice(ranges, true)); + waiting.setExecuteAt(waitingId); + waiting.setStatus(Status.Committed); + waiting.addWaitingOnApplyIfAbsent(blocking.txnId(), blocking.executeAt()); + + blocking.addListener(waiting); + + AccordKeyspace.getCommandMutation(commandStore, blocking, commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getCommandMutation(commandStore, waiting, commandStore.nextSystemTimestampMicros()).apply(); + blocking.clearModifiedFlag(); + waiting.clearModifiedFlag(); + } + + // confirm the blocking operation has the waiting one as a listener + commandStore.execute(contextFor(blockingId), cs -> { + AccordCommand blocking = (AccordCommand) cs.command(blockingId); + Assert.assertTrue(blocking.hasListenerFor(waitingId)); + }); + + // remove listener from PartialCommand + commandStore.execute(contextFor(waitingId), cs -> { + Command waiting = cs.command(waitingId); + TxnId blocking = ((AccordCommand)waiting).firstWaitingOnApply(null); + Assert.assertNotNull(blocking); + Assert.assertEquals(blockingId, blocking); + }); + + // confirm it was propagated to the full command + commandStore.execute(contextFor(blockingId), cs -> { + AccordCommand blocking = (AccordCommand) cs.command(blockingId); + Assert.assertFalse(blocking.hasListenerFor(waitingId)); + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java new file mode 100644 index 000000000000..34fda94017a1 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Txn; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.SerializerTestUtils; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; + +public class CommandSerializersTest +{ + @BeforeClass + public static void setupClass() + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + + } + + @Test + public void txnSerializer() + { + Txn txn = AccordTestUtils.createTxn("BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl WHERE k=0 AND c=0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"); + PartitionKey key = (PartitionKey) txn.keys().get(0); + PartialTxn expected = txn.slice(Ranges.of(TokenRange.fullRange(key.keyspace())), true); + SerializerTestUtils.assertSerializerIOEquality(expected, CommandSerializers.partialTxn); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/TopologySerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/TopologySerializersTest.java new file mode 100644 index 000000000000..835d36313d95 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/TopologySerializersTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import org.junit.Test; + +import accord.local.Node; +import org.apache.cassandra.utils.SerializerTestUtils; + + +public class TopologySerializersTest +{ + @Test + public void nodeId() + { + SerializerTestUtils.assertSerializerIOEquality(new Node.Id(1234567890), TopologySerializers.nodeId); + } + + @Test + public void requestScopeTest() + { + + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java new file mode 100644 index 000000000000..d7062a5e0874 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Collections; +import java.util.HashSet; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.service.accord.AccordState; + +import static org.apache.cassandra.service.accord.store.StoredValueTest.assertISE; + +public class StoredMapTest +{ + + private static NavigableMap getAdditions(StoredNavigableMap map) + { + NavigableMap result = new TreeMap<>(); + map.forEachAddition(result::put); + return result; + } + + private static Set getDeletions(StoredNavigableMap map) + { + Set result = new HashSet<>(); + map.forEachDeletion(result::add); + return result; + } + + @Test + public void loadMap() + { + NavigableMap expectedData = new TreeMap<>(); + expectedData.put(1, 2); + expectedData.put(5, 6); + + StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); + + // no values loaded, getView should fail + assertISE(map::getView); + + map.load(new TreeMap<>(expectedData)); + Assert.assertEquals(expectedData, map.getView()); + Assert.assertFalse(map.hasModifications()); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + // check additions + NavigableMap expectedAdditions = new TreeMap<>(); + expectedAdditions.put(3, 4); + expectedData.put(3, 4); + map.blindPut(3, 4); + Assert.assertEquals(expectedData, map.getView()); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + Assert.assertEquals(expectedAdditions, getAdditions(map)); + + // check deletions + Set expectedDeletions = new HashSet<>(); + expectedDeletions.add(5); + expectedDeletions.add(6); + map.blindRemove(5); + map.blindRemove(6); + expectedData.remove(5); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertTrue(map.hasDeletions()); + + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + map.clearModifiedFlag(); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + Assert.assertTrue(getAdditions(map).isEmpty()); + Assert.assertTrue(getDeletions(map).isEmpty()); + + map.unload(); + assertISE(map::getView); + Assert.assertFalse(map.hasModifications()); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + } + + @Test + public void unloadedAddsAndRemoves() + { + StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); + assertISE(map::getView); + + // check additions + NavigableMap expectedAdditions = new TreeMap<>(); + expectedAdditions.put(3, 4); + map.blindPut(3, 4); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + Assert.assertEquals(expectedAdditions, getAdditions(map)); + + // check deletions + Set expectedDeletions = new HashSet<>(); + expectedDeletions.add(5); + expectedDeletions.add(6); + map.blindRemove(5); + map.blindRemove(6); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertTrue(map.hasDeletions()); + + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + // still shouldn't be able to read a complete map + assertISE(map::getView); + } + + // deleting a key should remove it from additions + @Test + public void additionDeletionCanceling() + { + NavigableMap expectedData = new TreeMap<>(); + NavigableMap expectedAdditions = new TreeMap<>(); + Set expectedDeletions = new HashSet<>(); + + StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); + map.load(new TreeMap<>()); + Assert.assertEquals(expectedData, map.getView()); + + // add + map.blindPut(1, 2); + map.blindPut(3, 4); + + expectedData.put(1, 2); + expectedData.put(3, 4); + expectedAdditions.put(1, 2); + expectedAdditions.put(3, 4); + + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + // remove + map.blindRemove(3); + expectedData.remove(3); + expectedAdditions.remove(3); + expectedDeletions.add(3); + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + + } + + @Test + public void clearMap() + { + NavigableMap expectedData = new TreeMap<>(); + NavigableMap expectedAdditions = new TreeMap<>(); + + expectedData.put(1, 2); + StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); + map.load(new TreeMap<>(expectedData)); + Assert.assertEquals(expectedData, map.getView()); + + map.clear(); + expectedData.clear(); + Assert.assertEquals(expectedData, map.getView()); + + map.blindPut(3, 4); + map.blindPut(5, 6); + map.blindRemove(3); + + // since this will be written with a range tombstone, deletes shouldn't be tracked + expectedData.put(5, 6); + expectedAdditions.put(5, 6); + + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(Collections.emptySet(), getDeletions(map)); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java new file mode 100644 index 000000000000..8099dc4e97c8 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import java.util.Collections; +import java.util.HashSet; +import java.util.NavigableSet; +import java.util.Set; +import java.util.TreeSet; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.service.accord.AccordState; + +import static org.apache.cassandra.service.accord.store.StoredValueTest.assertISE; + +public class StoredSetTest +{ + private static NavigableSet getAdditions(StoredSet.Navigable set) + { + NavigableSet result = new TreeSet<>(); + set.forEachAddition(result::add); + return result; + } + + private static Set getDeletions(StoredSet.Navigable set) + { + Set result = new HashSet<>(); + set.forEachDeletion(result::add); + return result; + } + + @Test + public void loadMap() + { + NavigableSet expected = new TreeSet<>(); + expected.add(1); + expected.add(5); + + StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); + + // no values loaded, getView should fail + assertISE(map::getView); + + map.load(new TreeSet<>(expected)); + Assert.assertEquals(expected, map.getView()); + Assert.assertFalse(map.hasModifications()); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + // check additions + NavigableSet expectedAdditions = new TreeSet<>(); + expectedAdditions.add(3); + expected.add(3); + map.blindAdd(3); + Assert.assertEquals(expected, map.getView()); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + Assert.assertEquals(expectedAdditions, getAdditions(map)); + + // check deletions + Set expectedDeletions = new HashSet<>(); + expectedDeletions.add(5); + expectedDeletions.add(6); + map.blindRemove(5); + map.blindRemove(6); + expected.remove(5); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertTrue(map.hasDeletions()); + + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + map.clearModifiedFlag(); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + Assert.assertTrue(getAdditions(map).isEmpty()); + Assert.assertTrue(getDeletions(map).isEmpty()); + + map.unload(); + assertISE(map::getView); + Assert.assertFalse(map.hasModifications()); + Assert.assertFalse(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + } + + @Test + public void unloadedAddsAndRemoves() + { + StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); + assertISE(map::getView); + + // check additions + NavigableSet expectedAdditions = new TreeSet<>(); + expectedAdditions.add(3); + map.blindAdd(3); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertFalse(map.hasDeletions()); + + Assert.assertEquals(expectedAdditions, getAdditions(map)); + + // check deletions + Set expectedDeletions = new HashSet<>(); + expectedDeletions.add(5); + expectedDeletions.add(6); + map.blindRemove(5); + map.blindRemove(6); + Assert.assertTrue(map.hasModifications()); + Assert.assertTrue(map.hasAdditions()); + Assert.assertTrue(map.hasDeletions()); + + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + // still shouldn't be able to read a complete map + assertISE(map::getView); + } + + // deleting a key should remove it from additions + @Test + public void additionDeletionCanceling() + { + NavigableSet expectedData = new TreeSet<>(); + NavigableSet expectedAdditions = new TreeSet<>(); + Set expectedDeletions = new HashSet<>(); + + StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); + map.load(new TreeSet<>()); + Assert.assertEquals(expectedData, map.getView()); + + // add + map.blindAdd(1); + map.blindAdd(3); + + expectedData.add(1); + expectedData.add(3); + expectedAdditions.add(1); + expectedAdditions.add(3); + + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + // remove + map.blindRemove(3); + expectedData.remove(3); + expectedAdditions.remove(3); + expectedDeletions.add(3); + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(expectedDeletions, getDeletions(map)); + + + } + + @Test + public void clearMap() + { + NavigableSet expectedData = new TreeSet<>(); + NavigableSet expectedAdditions = new TreeSet<>(); + + expectedData.add(1); + StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); + map.load(new TreeSet<>(expectedData)); + Assert.assertEquals(expectedData, map.getView()); + + map.clear(); + expectedData.clear(); + Assert.assertEquals(expectedData, map.getView()); + + map.blindAdd(3); + map.blindAdd(5); + map.blindRemove(3); + + // since this will be written with a range tombstone, deletes shouldn't be tracked + expectedData.add(5); + expectedAdditions.add(5); + + Assert.assertEquals(expectedData, map.getView()); + Assert.assertEquals(expectedAdditions, getAdditions(map)); + Assert.assertEquals(Collections.emptySet(), getDeletions(map)); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java new file mode 100644 index 000000000000..d0250a982764 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.store; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.service.accord.AccordState; + +public class StoredValueTest +{ + static void assertISE(Runnable runnable) + { + try + { + runnable.run(); + Assert.fail("Expected IllegalStateException"); + } + catch (IllegalStateException e) + { + // noop + } + } + + @Test + public void storedValueTest() + { + StoredValue value = new StoredValue<>(AccordState.ReadWrite.FULL); + // value is unloaded, read should fail + assertISE(value::get); + + value.load(5); + Assert.assertFalse(value.hasModifications()); + Assert.assertEquals(Integer.valueOf(5), value.get()); + + value.set(6); + Assert.assertTrue(value.hasModifications()); + Assert.assertEquals(Integer.valueOf(6), value.get()); + + // loading into an unsaved field should fail + assertISE(() -> value.load(7)); + + value.clearModifiedFlag(); + Assert.assertFalse(value.hasModifications()); + Assert.assertEquals(Integer.valueOf(6), value.get()); + + value.unload(); + // value is unloaded again, read should fail + assertISE(() -> value.get()); + } + + @Test + public void historyPreservingTest() + { + StoredValue.HistoryPreserving value = new StoredValue.HistoryPreserving<>(AccordState.ReadWrite.FULL); + value.load(5); + + Assert.assertEquals(Integer.valueOf(5), value.get()); + Assert.assertEquals(Integer.valueOf(5), value.previous()); + + value.set(6); + Assert.assertEquals(Integer.valueOf(6), value.get()); + Assert.assertEquals(Integer.valueOf(5), value.previous()); + + value.clearModifiedFlag(); + Assert.assertEquals(Integer.valueOf(6), value.get()); + Assert.assertEquals(Integer.valueOf(6), value.previous()); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java new file mode 100644 index 000000000000..890c760b0dee --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.ByteBufferUtil; + +public class AbstractKeySortedTest +{ + private static final TableId TABLE1 = TableId.fromString("00000000-0000-0000-0000-000000000001"); + + @BeforeClass + public static void beforeClass() throws Exception + { + SchemaLoader.prepareServer(); + } + + static class Item + { + final PartitionKey key; + final int value; + + public Item(PartitionKey key, int value) + { + this.key = key; + this.value = value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Item item = (Item) o; + return value == item.value && key.equals(item.key); + } + + @Override + public int hashCode() + { + return Objects.hash(key, value); + } + + @Override + public String toString() + { + return "Item{" + + "key=" + key + + ", value=" + value + + '}'; + } + } + + static class SortedItems extends AbstractKeySorted + { + public SortedItems(Item... items) + { + super(items); + } + + public SortedItems(List items) + { + super(items); + } + + @Override + int compareNonKeyFields(Item left, Item right) + { + return Integer.compare(left.value, right.value); + } + + @Override + PartitionKey getKey(Item item) + { + return item.key; + } + + @Override + Item[] newArray(int size) + { + return new Item[size]; + } + } + + private static PartitionKey key(int k) + { + DecoratedKey dk = ByteOrderedPartitioner.instance.decorateKey(ByteBufferUtil.bytes(k)); + return new PartitionKey("", TABLE1, dk); + } + + private static Item item(int k, int v) + { + return new Item(key(k), v); + } + + private static List itemList(Item... items) + { + return Lists.newArrayList(items); + } + + @Test + public void checkInitialSorting() + { + List initial = itemList(item(5, 4), item(3, 3), item(3, 1), item(6, 5)); + SortedItems expected = new SortedItems(item(3, 1), item(3, 3), item(5, 4), item(6, 5)); + expected.validateOrder(); + SortedItems actual = new SortedItems(initial); + actual.validateOrder(); + Assert.assertEquals(expected, actual); + } + + @Test + public void checkIterationForKey() + { + SortedItems source = new SortedItems(item(1, 5), item(3, 1), item(3, 3), item(5, 4), item(6, 5)); + source.validateOrder(); + + source.forEachWithKey(key(0), i -> Assert.fail()); + source.forEachWithKey(key(1), i -> Assert.assertEquals(item(1, 5), i)); + source.forEachWithKey(key(2), i -> Assert.fail()); + List actual = new ArrayList<>(); + source.forEachWithKey(key(3), actual::add); + Assert.assertEquals(itemList(item(3, 1), item(3, 3)), actual); + source.forEachWithKey(key(4), i -> Assert.fail()); + source.forEachWithKey(key(5), i -> Assert.assertEquals(item(5, 4), i)); + source.forEachWithKey(key(6), i -> Assert.assertEquals(item(6, 5), i)); + source.forEachWithKey(key(7), i -> Assert.fail()); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java b/test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java new file mode 100644 index 000000000000..203e60ab16d2 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.primitives.Txn; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.schema.KeyspaceParams; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.utils.SerializerTestUtils.assertSerializerIOEquality; + +public class TxnUpdateTest +{ + @BeforeClass + public static void setupClass() + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + + } + + @Test + public void predicateSerializer() + { + Txn txn = AccordTestUtils.createTxn(0, 0); + TxnUpdate update = (TxnUpdate) txn.update(); + assertSerializerIOEquality(update, TxnUpdate.serializer); + } +} diff --git a/test/unit/org/apache/cassandra/transport/CBUtilTest.java b/test/unit/org/apache/cassandra/transport/CBUtilTest.java index 4409655d334c..3ce860307855 100644 --- a/test/unit/org/apache/cassandra/transport/CBUtilTest.java +++ b/test/unit/org/apache/cassandra/transport/CBUtilTest.java @@ -22,13 +22,13 @@ import org.junit.Assert; import org.junit.Test; -import accord.utils.Gens; +import accord.utilsfork.Gens; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.PooledByteBufAllocator; import org.assertj.core.api.Assertions; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class CBUtilTest { diff --git a/test/unit/org/apache/cassandra/utils/AssertionUtils.java b/test/unit/org/apache/cassandra/utils/AssertionUtils.java index d5b1981fc142..c122a95315b2 100644 --- a/test/unit/org/apache/cassandra/utils/AssertionUtils.java +++ b/test/unit/org/apache/cassandra/utils/AssertionUtils.java @@ -18,8 +18,11 @@ package org.apache.cassandra.utils; +import java.util.stream.Stream; + import com.google.common.base.Throwables; +import org.assertj.core.api.Assertions; import org.assertj.core.api.Condition; public class AssertionUtils @@ -28,6 +31,16 @@ private AssertionUtils() { } + public static Condition anyOf(Stream> stream) { + Iterable> it = () -> stream.iterator(); + return Assertions.anyOf(it); + } + + public static Condition anyOfThrowable(Class... klasses) + { + return anyOf(Stream.of(klasses).map(AssertionUtils::isThrowable)); + } + /** * When working with jvm-dtest the thrown error is in a different {@link ClassLoader} causing type checks * to fail; this method relies on naming instead. @@ -100,6 +113,11 @@ public String toString() }; } + public static Condition isThrowableInstanceof(Class klass) + { + return (Condition) (Condition) isInstanceof(klass); + } + public static Condition rootCause(Condition other) { return new Condition() { @@ -119,6 +137,32 @@ public String toString() public static Condition rootCauseIs(Class klass) { - return rootCause((Condition) (Condition) is(klass)); + return rootCause(isThrowable(klass)); + } + + public static Condition hasCause(Class klass) + { + return hasCause(isThrowable(klass)); + } + + public static Condition hasCauseAnyOf(Class... matchers) + { + return hasCause(anyOfThrowable(matchers)); + } + + public static Condition hasCause(Condition matcher) + { + return new Condition() { + @Override + public boolean matches(Throwable value) + { + for (Throwable cause = value; cause != null; cause = cause.getCause()) + { + if (matcher.matches(cause)) + return true; + } + return false; + } + }; } } diff --git a/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java b/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java index c103a8dc6d8b..dcc54f5157f3 100644 --- a/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java +++ b/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java @@ -25,9 +25,9 @@ import com.google.common.collect.ImmutableMap; -import accord.utils.Gen; -import accord.utils.Gens; -import accord.utils.RandomSource; +import accord.utilsfork.Gen; +import accord.utilsfork.Gens; +import accord.utilsfork.RandomSource; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.dht.IPartitioner; diff --git a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java index 6bae12c31987..9b5280025068 100644 --- a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java +++ b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java @@ -23,7 +23,7 @@ import com.google.common.jimfs.Jimfs; import org.junit.Test; -import accord.utils.Gen; +import accord.utilsfork.Gen; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.ParameterizedClass; @@ -31,7 +31,7 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.SimpleSeedProvider; -import static accord.utils.Property.qt; +import static accord.utilsfork.Property.qt; public class ConfigGenBuilderTest { diff --git a/test/unit/org/apache/cassandra/utils/Generators.java b/test/unit/org/apache/cassandra/utils/Generators.java index 6bb7f56a8d5c..2acedf174bd2 100644 --- a/test/unit/org/apache/cassandra/utils/Generators.java +++ b/test/unit/org/apache/cassandra/utils/Generators.java @@ -42,8 +42,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.DefaultRandom; -import accord.utils.RandomSource; +import accord.utilsfork.DefaultRandom; +import accord.utilsfork.RandomSource; import org.apache.cassandra.cql3.ReservedKeywords; import org.quicktheories.core.Gen; import org.quicktheories.core.RandomnessSource; @@ -613,7 +613,7 @@ public static Gen> forwardRanges(int min, int max) .map(end -> Range.closed(start, end))); } - public static accord.utils.Gen toGen(org.quicktheories.core.Gen qt) + public static accord.utilsfork.Gen toGen(org.quicktheories.core.Gen qt) { return rs -> { JavaRandom r = new JavaRandom(rs.asJdkRandom()); @@ -621,7 +621,7 @@ public static accord.utils.Gen toGen(org.quicktheories.core.Gen qt) }; } - public static org.quicktheories.core.Gen fromGen(accord.utils.Gen accord) + public static org.quicktheories.core.Gen fromGen(accord.utilsfork.Gen accord) { return rnd -> { RandomSource rs = new DefaultRandom(rnd.next(Constraint.none())); diff --git a/test/unit/org/apache/cassandra/utils/SerializerTestUtils.java b/test/unit/org/apache/cassandra/utils/SerializerTestUtils.java new file mode 100644 index 000000000000..37435a42a65b --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/SerializerTestUtils.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.io.IOException; + +import org.junit.Assert; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; + +public class SerializerTestUtils +{ + private static int MS_VERSION = MessagingService.current_version; + + public static T serdes(IVersionedSerializer serializer, T message) + { + int expectedSize = (int) serializer.serializedSize(message, MS_VERSION); + try (DataOutputBuffer out = new DataOutputBuffer(expectedSize)) + { + serializer.serialize(message, out, MS_VERSION); + Assert.assertEquals(expectedSize, out.buffer().limit()); + try (DataInputBuffer in = new DataInputBuffer(out.buffer(), false)) + { + return serializer.deserialize(in, MS_VERSION); + } + } + catch (IOException e) + { + throw new AssertionError(e); + } + } + + public static void assertSerializerIOEquality(T expected, IVersionedSerializer serializer, int version) + { + int expectedSize = (int) serializer.serializedSize(expected, version); + try (DataOutputBuffer out = new DataOutputBuffer(expectedSize)) + { + serializer.serialize(expected, out, version); + Assert.assertEquals(expectedSize, out.buffer().limit()); + try (DataInputBuffer in = new DataInputBuffer(out.buffer(), false)) + { + Assert.assertEquals(expected, serializer.deserialize(in, version)); + } + } + catch (IOException e) + { + throw new AssertionError(e); + } + } + + public static void assertSerializerIOEquality(T expected, IVersionedSerializer serializer) + { + assertSerializerIOEquality(expected, serializer, MS_VERSION); + } +} diff --git a/tools/stress/src/org/apache/cassandra/io/sstable/StressCQLSSTableWriter.java b/tools/stress/src/org/apache/cassandra/io/sstable/StressCQLSSTableWriter.java index 3745f717be1b..c7b9cbef4bff 100644 --- a/tools/stress/src/org/apache/cassandra/io/sstable/StressCQLSSTableWriter.java +++ b/tools/stress/src/org/apache/cassandra/io/sstable/StressCQLSSTableWriter.java @@ -262,7 +262,6 @@ public StressCQLSSTableWriter rawAddRow(List values) // Note that we asks indexes to not validate values (the last 'false' arg below) because that triggers a 'Keyspace.open' // and that forces a lot of initialization that we don't want. UpdateParameters params = new UpdateParameters(insert.metadata(), - insert.updatedColumns(), ClientState.forInternalCalls(), options, insert.getTimestamp(TimeUnit.MILLISECONDS.toMicros(now), options), From e6a8c030c12f9cd17b0cdc5df6a0f73eead457a3 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 27 Jan 2023 09:41:59 -0800 Subject: [PATCH 044/340] add AsyncChain implementations and tests Patch by Blake Eggleston; Reviewed by David Capwell & Benedict Elliott Smith for CASSANDRA-18004 --- modules/accord | 2 +- .../service/accord/AccordCommand.java | 74 +++-- .../service/accord/AccordCommandStore.java | 18 +- .../service/accord/AccordCommandsForKey.java | 14 +- .../service/accord/AccordService.java | 7 +- .../cassandra/service/accord/AccordState.java | 6 +- .../service/accord/AccordStateCache.java | 128 ++++---- .../cassandra/service/accord/ReadFuture.java | 304 ------------------ .../service/accord/async/AsyncLoader.java | 92 +++--- .../service/accord/async/AsyncOperation.java | 46 ++- .../service/accord/async/AsyncWriter.java | 63 ++-- .../service/accord/txn/TxnNamedRead.java | 7 +- .../cassandra/service/accord/txn/TxnRead.java | 57 +--- .../service/accord/txn/TxnWrite.java | 21 +- .../service/accord/AccordCommandTest.java | 36 +-- .../service/accord/AccordStateCacheTest.java | 47 +-- .../service/accord/AccordTestUtils.java | 8 +- .../service/accord/async/AsyncLoaderTest.java | 21 +- .../accord/async/AsyncOperationTest.java | 16 +- 19 files changed, 333 insertions(+), 634 deletions(-) delete mode 100644 src/java/org/apache/cassandra/service/accord/ReadFuture.java diff --git a/modules/accord b/modules/accord index b9025e59395f..07e351462b14 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit b9025e59395f47535e4ed1fec20b1186cdb07db8 +Subproject commit 07e351462b147b831c2d416b8568449b06ccbb51 diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommand.java b/src/java/org/apache/cassandra/service/accord/AccordCommand.java index 8020b29ac7a0..88b39922cee7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommand.java @@ -58,14 +58,15 @@ import accord.utils.DeterministicIdentitySet; import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; import org.apache.cassandra.service.accord.api.PartitionKey; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.service.accord.async.AsyncContext; import org.apache.cassandra.service.accord.store.StoredNavigableMap; import org.apache.cassandra.service.accord.store.StoredSet; import org.apache.cassandra.service.accord.store.StoredValue; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.concurrent.AsyncPromise; -import org.apache.cassandra.utils.concurrent.Future; import static accord.local.Status.Durability.Local; import static accord.local.Status.Durability.NotDurable; @@ -83,7 +84,7 @@ public class AccordCommand extends Command implements AccordState public static class WriteOnly extends AccordCommand implements AccordState.WriteOnly { - private Future future = null; + private AsyncResult asyncResult = null; public WriteOnly(TxnId txnId) { @@ -91,16 +92,16 @@ public WriteOnly(TxnId txnId) } @Override - public void future(Future future) + public void asyncResult(AsyncResult notifier) { - Preconditions.checkArgument(this.future == null); - this.future = future; + Preconditions.checkArgument(this.asyncResult == null); + this.asyncResult = notifier; } @Override - public Future future() + public AsyncResult asyncResult() { - return future; + return asyncResult; } @Override @@ -618,7 +619,7 @@ public void setDurability(Durability v) protected void postApply(SafeCommandStore safeStore) { AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - cache.cleanupWriteFuture(txnId); + cache.cleanupWriteResult(txnId); super.postApply(safeStore); } @@ -640,58 +641,69 @@ private boolean canApplyWithCurrentScope(SafeCommandStore safeStore) return true; } - private Future applyWithCorrectScope(CommandStore unsafeStore) + private AsyncResult applyWithCorrectScope(CommandStore unsafeStore) { TxnId txnId = txnId(); - AsyncPromise promise = new AsyncPromise<>(); + AsyncResult.Settable result = AsyncResults.settable(); unsafeStore.execute(this, safeStore -> { AccordCommand command = (AccordCommand) safeStore.command(txnId); - command.apply(safeStore, false).addCallback((v, throwable) -> { - if (throwable != null) - promise.tryFailure(throwable); - else - promise.trySuccess(null); - }); + command.applyChain(safeStore, false).begin(result.settingCallback()); + }).begin((unused, throwable) -> { + if (throwable != null) + result.tryFailure(throwable); }); - return promise; + return result; } - private Future apply(SafeCommandStore safeStore, boolean canReschedule) + private AsyncChain applyChain(SafeCommandStore safeStore, boolean canReschedule) { AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - Future future = cache.getWriteFuture(txnId); - if (future != null) - return future; + AsyncResult writeResult = cache.getWriteResult(txnId); + if (writeResult != null) + return writeResult; // this can be called via a listener callback, in which case we won't // have the appropriate commandsForKey in scope, so start a new operation // with the correct scope and notify the caller when that completes if (!canApplyWithCurrentScope(safeStore)) { + return writeResult; + } + + if (canApplyWithCurrentScope(safeStore)) + { + AsyncChain chain = super.applyChain(safeStore); + writeResult = AsyncResults.forChain(chain); + } + else + { + // this can be called via a listener callback, in which case we won't + // have the appropriate commandsForKey in scope, so start a new operation + // with the correct scope and notify the caller when that completes Preconditions.checkArgument(canReschedule); return applyWithCorrectScope(safeStore.commandStore()); } + cache.setWriteResult(txnId, writeResult); - future = super.apply(safeStore); - cache.setWriteFuture(txnId, future); - return future; + return writeResult; } @Override - public Future apply(SafeCommandStore safeStore) + protected AsyncChain applyChain(SafeCommandStore safeStore) { - return apply(safeStore, true); + + return applyChain(safeStore, true); } @Override - public Future read(SafeCommandStore safeStore) + public AsyncChain read(SafeCommandStore safeStore) { AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - Future future = cache.getReadFuture(txnId); + AsyncResult future = cache.getReadResult(txnId); if (future != null) return future; - future = super.read(safeStore); - cache.setReadFuture(txnId, future); + future = AsyncResults.forChain(super.read(safeStore)); + cache.setReadResult(txnId, future); return future; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 87c9c752d9b9..b7c6e9754ea4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -54,10 +54,10 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.service.accord.api.PartitionKey; +import accord.utils.async.AsyncChain; import org.apache.cassandra.service.accord.async.AsyncContext; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; @@ -271,13 +271,13 @@ public Timestamp preaccept(TxnId txnId, Seekables keys) } @Override - public Future execute(PreLoadContext context, Consumer consumer) + public AsyncChain execute(PreLoadContext context, Consumer consumer) { return AccordCommandStore.this.execute(context, consumer); } @Override - public Future submit(PreLoadContext context, Function function) + public AsyncChain submit(PreLoadContext context, Function function) { return AccordCommandStore.this.submit(context, function); } @@ -445,11 +445,9 @@ private AccordCommandsForKey getCommandsForKeyInternal(Key key) } @Override - public Future submit(PreLoadContext loadCtx, Function function) + public AsyncChain submit(PreLoadContext loadCtx, Function function) { - AsyncOperation operation = AsyncOperation.create(this, loadCtx, function); - executor.execute(operation); - return operation; + return AsyncOperation.create(this, loadCtx, function); } @Override @@ -459,11 +457,9 @@ public Agent agent() } @Override - public Future execute(PreLoadContext preLoadContext, Consumer consumer) + public AsyncChain execute(PreLoadContext preLoadContext, Consumer consumer) { - AsyncOperation operation = AsyncOperation.create(this, preLoadContext, consumer); - executor.execute(operation); - return operation; + return AsyncOperation.create(this, preLoadContext, consumer); } public void executeBlocking(Runnable runnable) diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java index c58199139191..025f8f5d434a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java @@ -44,13 +44,13 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.service.accord.api.PartitionKey; +import accord.utils.async.AsyncResult; import org.apache.cassandra.service.accord.store.StoredLong; import org.apache.cassandra.service.accord.store.StoredNavigableMap; import org.apache.cassandra.service.accord.store.StoredSet; import org.apache.cassandra.service.accord.store.StoredValue; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.concurrent.Future; import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; import static accord.local.SafeCommandStore.TestDep.WITH; @@ -75,7 +75,7 @@ public static class Defaults public static class WriteOnly extends AccordCommandsForKey implements AccordState.WriteOnly { - private Future future = null; + private AsyncResult result = null; public WriteOnly(AccordCommandStore commandStore, PartitionKey key) { @@ -83,17 +83,17 @@ public WriteOnly(AccordCommandStore commandStore, PartitionKey key) } @Override - public void future(Future future) + public void asyncResult(AsyncResult result) { - Preconditions.checkArgument(this.future == null); - this.future = future; + Preconditions.checkArgument(this.result == null); + this.result = result; } @Override - public Future future() + public AsyncResult asyncResult() { - return future; + return result; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index c308afa2dee7..28f2d878c6a0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -37,6 +37,8 @@ import accord.primitives.Txn; import accord.topology.TopologyManager; import org.apache.cassandra.concurrent.Shutdownable; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; @@ -51,7 +53,6 @@ import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.config.DatabaseDescriptor.getConcurrentAccordOps; @@ -185,8 +186,8 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) try { metrics.keySize.update(txn.keys().size()); - Future future = node.coordinate(txn); - Result result = future.get(DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + AsyncResult asyncResult = node.coordinate(txn); + Result result = AsyncResults.getBlocking(asyncResult, DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); return (TxnData) result; } catch (ExecutionException e) diff --git a/src/java/org/apache/cassandra/service/accord/AccordState.java b/src/java/org/apache/cassandra/service/accord/AccordState.java index 2f5a9dc68c93..0e1a224cd1f2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordState.java @@ -21,9 +21,9 @@ import java.util.function.BiConsumer; import java.util.function.Function; +import accord.utils.async.AsyncResult; import org.apache.cassandra.service.accord.store.StoredNavigableMap; import org.apache.cassandra.service.accord.store.StoredSet; -import org.apache.cassandra.utils.concurrent.Future; public interface AccordState { @@ -69,9 +69,9 @@ default ReadWrite rw() return ReadWrite.WRITE_ONLY; } - void future(Future future); + void asyncResult(AsyncResult notifier); - Future future(); + AsyncResult asyncResult(); /** * Apply the write only changes to the full instance diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 37e7ba17cf72..5993b73d22f4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -35,9 +35,9 @@ import org.slf4j.LoggerFactory; import accord.api.Data; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.concurrent.Future; -import org.apache.cassandra.utils.concurrent.FutureCombiner; /** * Cache for AccordCommand and AccordCommandsForKey, available memory is shared between the two object types. @@ -85,7 +85,7 @@ void purge() AccordState.WriteOnly item = items.get(0); // we can't remove items out of order, so if we encounter a write is still pending, we stop - if (item.future() == null || !item.future().isDone()) + if (item.asyncResult() == null || !item.asyncResult().isDone()) break; items.remove(0); @@ -154,11 +154,11 @@ public NamedMap(String name) private final Map> pendingWriteOnly = new HashMap<>(); private final Set> instances = new HashSet<>(); - private final NamedMap> loadFutures = new NamedMap<>("loadFutures"); - private final NamedMap> saveFutures = new NamedMap<>("saveFutures"); + private final NamedMap> loadResults = new NamedMap<>("loadResults"); + private final NamedMap> saveResults = new NamedMap<>("saveResults"); - private final NamedMap> readFutures = new NamedMap<>("readFutures"); - private final NamedMap> writeFutures = new NamedMap<>("writeFutures"); + private final NamedMap> readResults = new NamedMap<>("readResults"); + private final NamedMap> writeResults = new NamedMap<>("writeResults"); Node head; Node tail; @@ -227,13 +227,13 @@ private void updateSize(Node node) bytesCached += node.estimatedSizeOnHeapDelta(); } - // don't evict if there's an outstanding save future. If an item is evicted then reloaded + // don't evict if there's an outstanding save result. If an item is evicted then reloaded // before it's mutation is applied, out of date info will be loaded private boolean canEvict(Object key) { - // getFuture only returns a future if it is running, so don't need to check if its still running - Future future = getFuture(saveFutures, key); - return future == null; + // getResult only returns a result if it is running, so don't need to check if its still running + AsyncResult result = getAsyncResult(saveResults, key); + return result == null || result.isDone(); } private void maybeEvict() @@ -248,7 +248,7 @@ private void maybeEvict() current = current.prev; // if there are any dangling write only groups, apply them and - // move their futures into write futures so we don't evict + // move their results into write results so we don't evict applyAndRemoveWriteOnlyGroup(evict.value); if (!canEvict(evict.key())) continue; @@ -260,9 +260,9 @@ private void maybeEvict() } } - private static > F getFuture(NamedMap futuresMap, K key) + private static > F getAsyncResult(NamedMap resultMap, K key) { - F r = futuresMap.get(key); + F r = resultMap.get(key); if (r == null) return null; @@ -270,36 +270,36 @@ private static > F getFuture(NamedMap futuresM return r; if (logger.isTraceEnabled()) - logger.trace("Clearing future for {} from {}: {}", key, futuresMap.name, r); - futuresMap.remove(key); + logger.trace("Clearing result for {} from {}: {}", key, resultMap.name, r); + resultMap.remove(key); return null; } - private static > void setFuture(Map futuresMap, K key, F future) + private static > void setAsyncResult(Map resultsMap, K key, F result) { - Preconditions.checkState(!futuresMap.containsKey(key)); - futuresMap.put(key, future); + Preconditions.checkState(!resultsMap.containsKey(key)); + resultsMap.put(key, result); } - private static void mergeFuture(Map> futuresMap, K key, Future future) + private static void mergeAsyncResult(Map> resultMap, K key, AsyncResult result) { - Future existing = futuresMap.get(key); + AsyncResult existing = resultMap.get(key); if (existing != null && !existing.isDone()) { - logger.trace("Merging future {} with existing {}", future, existing); - future = FutureCombiner.allOf(ImmutableList.of(existing, future)); + logger.trace("Merging result {} with existing {}", result, existing); + result = AsyncResults.reduce(ImmutableList.of(existing, result), (a, b) -> null).beginAsResult(); } - futuresMap.put(key, future); + resultMap.put(key, result); } - private void maybeClearFuture(K key) + private void maybeClearAsyncResult(K key) { // will clear if it's done - getFuture(loadFutures, key); - getFuture(saveFutures, key); - getFuture(readFutures, key); - getFuture(writeFutures, key); + getAsyncResult(loadResults, key); + getAsyncResult(saveResults, key); + getAsyncResult(readResults, key); + getAsyncResult(writeResults, key); } public > void applyAndRemoveWriteOnlyGroup(V instance) @@ -312,8 +312,8 @@ public > void applyAndRemoveWriteOnlyGroup(V instanc for (AccordState.WriteOnly writeOnly : group.items) { writeOnly.applyChanges(instance); - if (!writeOnly.future().isDone()) - mergeFuture(saveFutures, instance.key(), writeOnly.future()); + if (!writeOnly.asyncResult().isDone()) + mergeAsyncResult(saveResults, instance.key(), writeOnly.asyncResult()); } } @@ -402,7 +402,7 @@ public void release(V value) { K key = value.key(); logger.trace("Releasing resources for {}: {}", key, value); - maybeClearFuture(key); + maybeClearAsyncResult(key); Node node = (Node) active.get(key); Preconditions.checkState(node != null && node.references > 0); Preconditions.checkState(node.value == value); @@ -462,12 +462,12 @@ public void applyAndRemoveWriteOnlyGroup(V instance) public void addWriteOnly(AccordState.WriteOnly writeOnly) { K key = writeOnly.key(); - Preconditions.checkArgument(writeOnly.future() != null); + Preconditions.checkArgument(writeOnly.asyncResult() != null); WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.computeIfAbsent(key, k -> new WriteOnlyGroup<>()); - // if a load future exists for the key we're creating a write group for, we need to lock + // if a load result exists for the key we're creating a write group for, we need to lock // the group so the loading instance gets changes applied when it finishes loading - if (getLoadFuture(key) != null) + if (getLoadResult(key) != null) group.lock(); group.add(writeOnly); @@ -495,77 +495,77 @@ public int getWriteOnlyGroupSize(K key) return group != null ? group.items.size() : 0; } - public Future getLoadFuture(K key) + public AsyncResult getLoadResult(K key) { - return getFuture(loadFutures, key); + return getAsyncResult(loadResults, key); } - public void cleanupLoadFuture(K key) + public void cleanupLoadResult(K key) { - getLoadFuture(key); + getLoadResult(key); } @VisibleForTesting - public boolean hasLoadFuture(K key) + public boolean hasLoadResult(K key) { - return loadFutures.get(key) != null; + return loadResults.get(key) != null; } - public void setLoadFuture(K key, Future future) + public void setLoadResult(K key, AsyncResult result) { - setFuture(loadFutures, key, future); + setAsyncResult(loadResults, key, result); } - public Future getSaveFuture(K key) + public AsyncResult getSaveResult(K key) { - return getFuture(saveFutures, key); + return getAsyncResult(saveResults, key); } - public void addSaveFuture(K key, Future future) + public void addSaveResult(K key, AsyncResult result) { - logger.trace("Adding save future for {}: {}", key, future); - mergeFuture(saveFutures, key, future); + logger.trace("Adding save result for {}: {}", key, result); + mergeAsyncResult(saveResults, key, result); } - public void cleanupSaveFuture(K key) + public void cleanupSaveResult(K key) { - getSaveFuture(key); + getSaveResult(key); } @VisibleForTesting - public boolean hasSaveFuture(K key) + public boolean hasSaveResult(K key) { - return saveFutures.get(key) != null; + return saveResults.get(key) != null; } - public Future getReadFuture(K key) + public AsyncResult getReadResult(K key) { - return getFuture(readFutures, key); + return getAsyncResult(readResults, key); } - public void setReadFuture(K key, Future future) + public void setReadResult(K key, AsyncResult result) { - setFuture(readFutures, key, future); + setAsyncResult(readResults, key, result); } - public void cleanupReadFuture(K key) + public void cleanupReadResult(K key) { - getReadFuture(key); + getReadResult(key); } - public Future getWriteFuture(K key) + public AsyncResult getWriteResult(K key) { - return (Future) getFuture(writeFutures, key); + return getAsyncResult(writeResults, key); } - public void setWriteFuture(K key, Future future) + public void setWriteResult(K key, AsyncResult result) { - setFuture(writeFutures, key, future); + setAsyncResult(writeResults, key, result); } - public void cleanupWriteFuture(K key) + public void cleanupWriteResult(K key) { - getWriteFuture(key); + getWriteResult(key); } public long cacheQueries() diff --git a/src/java/org/apache/cassandra/service/accord/ReadFuture.java b/src/java/org/apache/cassandra/service/accord/ReadFuture.java deleted file mode 100644 index f28f4856b997..000000000000 --- a/src/java/org/apache/cassandra/service/accord/ReadFuture.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.function.Function; - -import com.google.common.util.concurrent.FutureCallback; - -import accord.api.Data; -import io.netty.util.concurrent.GenericFutureListener; -import org.apache.cassandra.utils.concurrent.Future; -import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; - -public class ReadFuture implements Future -{ - private final Future wrappped; - - public ReadFuture(Future wrappped) - { - this.wrappped = wrappped; - } - - @Override - public Future await() throws InterruptedException - { - return wrappped.await(); - } - - @Override - public Future awaitUninterruptibly() - { - return wrappped.awaitUninterruptibly(); - } - - @Override - public Future awaitThrowUncheckedOnInterrupt() - { - return wrappped.awaitThrowUncheckedOnInterrupt(); - } - - @Override - public void rethrowIfFailed() - { - wrappped.rethrowIfFailed(); - } - - @Override - public Future sync() throws InterruptedException - { - return wrappped.sync(); - } - - @Override - public Future syncUninterruptibly() - { - return wrappped.syncUninterruptibly(); - } - - @Override - public Future syncThrowUncheckedOnInterrupt() - { - return wrappped.syncThrowUncheckedOnInterrupt(); - } - - @Override - @Deprecated(since = "5.1", forRemoval = true) - public boolean await(long l) throws InterruptedException - { - return wrappped.await(l); - } - - @Override - @Deprecated(since = "5.1", forRemoval = true) - public boolean awaitUninterruptibly(long l) - { - return wrappped.awaitUninterruptibly(l); - } - - @Override - public Future addCallback(BiConsumer callback) - { - return wrappped.addCallback(callback); - } - - @Override - public Future addCallback(BiConsumer callback, Executor executor) - { - return wrappped.addCallback(callback, executor); - } - - @Override - public Future addCallback(FutureCallback callback) - { - return wrappped.addCallback(callback); - } - - @Override - public Future addCallback(FutureCallback callback, Executor executor) - { - return wrappped.addCallback(callback, executor); - } - - @Override - public Future addCallback(Consumer onSuccess, Consumer onFailure) - { - return wrappped.addCallback(onSuccess, onFailure); - } - - @Override - public Future addCallback(Consumer onSuccess, Consumer onFailure, Executor executor) - { - return wrappped.addCallback(onSuccess, onFailure, executor); - } - - @Override - public Future map(Function mapper) - { - return wrappped.map(mapper); - } - - @Override - public Future map(Function mapper, Executor executor) - { - return wrappped.map(mapper, executor); - } - - @Override - public Future flatMap(Function> flatMapper) - { - return wrappped.flatMap(flatMapper); - } - - @Override - public Future flatMap(Function> flatMapper, Executor executor) - { - return wrappped.flatMap(flatMapper, executor); - } - - @Override - public Future andThenAsync(Function> andThen) - { - throw new UnsupportedOperationException("git rebase: this goes away once AsyncChain comes in"); - } - - @Override - public Future andThenAsync(Function> andThen, Executor executor) - { - throw new UnsupportedOperationException("git rebase: this goes away once AsyncChain comes in"); - } - - @Override - public void addListener(Runnable runnable, Executor executor) - { - wrappped.addListener(runnable, executor); - } - - @Override - public void addListener(Runnable runnable) - { - wrappped.addListener(runnable); - } - - @Override - public Executor notifyExecutor() - { - return wrappped.notifyExecutor(); - } - - @Override - public Future addListener(GenericFutureListener> genericFutureListener) - { - return wrappped.addListener(genericFutureListener); - } - - @Override - public Future addListeners(GenericFutureListener>... genericFutureListeners) - { - return wrappped.addListeners(genericFutureListeners); - } - - @Override - public Future removeListener(GenericFutureListener> genericFutureListener) - { - return wrappped.removeListener(genericFutureListener); - } - - @Override - public Future removeListeners(GenericFutureListener>... genericFutureListeners) - { - return wrappped.removeListeners(genericFutureListeners); - } - - @Override - public boolean isSuccess() - { - return wrappped.isSuccess(); - } - - @Override - public boolean isCancellable() - { - return wrappped.isCancellable(); - } - - @Override - public Throwable cause() - { - return wrappped.cause(); - } - - @Override - public boolean await(long timeout, TimeUnit unit) throws InterruptedException - { - return wrappped.await(timeout, unit); - } - - @Override - public boolean awaitUninterruptibly(long timeout, TimeUnit unit) - { - return wrappped.awaitUninterruptibly(timeout, unit); - } - - @Override - public Data getNow() - { - return wrappped.getNow(); - } - - @Override - public boolean cancel(boolean mayInterruptIfRunning) - { - return wrappped.cancel(mayInterruptIfRunning); - } - - @Override - public boolean isCancelled() - { - return wrappped.isCancelled(); - } - - @Override - public boolean isDone() - { - return wrappped.isDone(); - } - - @Override - public Data get() throws InterruptedException, ExecutionException - { - return wrappped.get(); - } - - @Override - public Data get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException - { - return wrappped.get(timeout, unit); - } - - @Override - public boolean awaitUntil(long nanoTimeDeadline) throws InterruptedException - { - return wrappped.awaitUntil(nanoTimeDeadline); - } - - @Override - public boolean awaitUntilThrowUncheckedOnInterrupt(long nanoTimeDeadline) throws UncheckedInterruptedException - { - return wrappped.awaitUntilThrowUncheckedOnInterrupt(nanoTimeDeadline); - } - - @Override - public boolean awaitUntilUninterruptibly(long nanoTimeDeadline) - { - return wrappped.awaitUntilUninterruptibly(nanoTimeDeadline); - } - - @Override - public boolean awaitThrowUncheckedOnInterrupt(long time, TimeUnit units) throws UncheckedInterruptedException - { - return wrappped.awaitThrowUncheckedOnInterrupt(time, units); - } -} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 3bbc9c2828a2..75c2d356a5ce 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -31,6 +31,9 @@ import org.slf4j.LoggerFactory; import accord.primitives.TxnId; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.service.accord.AccordCommand; import org.apache.cassandra.service.accord.AccordCommandStore; @@ -39,9 +42,8 @@ import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.AccordState; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.utils.concurrent.Future; -import org.apache.cassandra.utils.concurrent.FutureCombiner; +import static accord.utils.async.AsyncResults.ofRunnable; public class AsyncLoader { @@ -60,7 +62,7 @@ enum State private final Iterable txnIds; private final Iterable keys; - protected Future readFuture; + protected AsyncResult readResult; public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) { @@ -69,23 +71,23 @@ public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iter this.keys = keys; } - private > Future referenceAndDispatch(K key, - AccordStateCache.Instance cache, - Map context, - Function> readFunction, - Object callback) + private > AsyncResult referenceAndDispatch(K key, + AccordStateCache.Instance cache, + Map context, + Function> readFunction, + Object callback) { V item; - Future future = cache.getLoadFuture(key); - if (future != null) + AsyncResult result = cache.getLoadResult(key); + if (result != null) { - // if a load future exists for this, it must be present in the cache + // if a load result exists for this, it must be present in the cache item = cache.getOrNull(key); Preconditions.checkState(item != null); context.put(key, item); if (logger.isTraceEnabled()) - logger.trace("Existing load future found for {} while loading for {}. ({})", item.key(), callback, item); - return future; + logger.trace("Existing load result found for {} while loading for {}. ({})", item.key(), callback, item); + return result; } item = cache.getOrCreate(key); @@ -97,40 +99,40 @@ private > Future referenceAndDispatch(K key, return null; } - future = readFunction.apply(item); - cache.setLoadFuture(item.key(), future); + result = readFunction.apply(item); + cache.setLoadResult(item.key(), result); if (logger.isTraceEnabled()) logger.trace("Loading new item for {} while loading for {}. ({})", item.key(), callback, item); - return future; + return result; } - private > List> referenceAndDispatchReads(Iterable keys, + private > List> referenceAndDispatchReads(Iterable keys, AccordStateCache.Instance cache, Map context, - Function> readFunction, - List> futures, + Function> readFunction, + List> results, Object callback) { for (K key : keys) { - Future future = referenceAndDispatch(key, cache, context, readFunction, callback); - if (future == null) + AsyncResult result = referenceAndDispatch(key, cache, context, readFunction, callback); + if (result == null) continue; - if (futures == null) - futures = new ArrayList<>(); + if (results == null) + results = new ArrayList<>(); - futures.add(future); + results.add(result); } - return futures; + return results; } @VisibleForTesting - Function> loadCommandFunction(Object callback) + Function> loadCommandFunction(Object callback) { - return command -> Stage.READ.submit(() -> { + return command -> ofRunnable(Stage.READ.executor(), () -> { try { logger.trace("Starting load of {} for {}", command.txnId(), callback); @@ -146,9 +148,9 @@ Function> loadCommandFunction(Object callback) } @VisibleForTesting - Function> loadCommandsPerKeyFunction(Object callback) + Function> loadCommandsPerKeyFunction(Object callback) { - return cfk -> Stage.READ.submit(() -> { + return cfk -> ofRunnable(Stage.READ.executor(), () -> { try { logger.trace("Starting load of {} for {}", cfk.key(), callback); @@ -163,25 +165,25 @@ Function> loadCommandsPerKeyFunction(Object call }); } - private Future referenceAndDispatchReads(AsyncContext context, Object callback) + private AsyncResult referenceAndDispatchReads(AsyncContext context, Object callback) { - List> futures = null; + List> results = null; - futures = referenceAndDispatchReads(txnIds, + results = referenceAndDispatchReads(txnIds, commandStore.commandCache(), context.commands.items, loadCommandFunction(callback), - futures, + results, callback); - futures = referenceAndDispatchReads(keys, + results = referenceAndDispatchReads(keys, commandStore.commandsForKeyCache(), context.commandsForKey.items, loadCommandsPerKeyFunction(callback), - futures, + results, callback); - return futures != null ? FutureCombiner.allOf(futures) : null; + return results != null ? AsyncResults.reduce(results, (a, b ) -> null).beginAsResult() : null; } @VisibleForTesting @@ -202,28 +204,28 @@ public boolean load(AsyncContext context, BiConsumer callback // notify any pending write only groups we're loading a full instance so the pending changes aren't removed txnIds.forEach(commandStore.commandCache()::lockWriteOnlyGroupIfExists); keys.forEach(commandStore.commandsForKeyCache()::lockWriteOnlyGroupIfExists); - readFuture = referenceAndDispatchReads(context, callback); + readResult = referenceAndDispatchReads(context, callback); state(State.LOADING); case LOADING: - if (readFuture != null) + if (readResult != null) { - if (readFuture.isSuccess()) + if (readResult.isSuccess()) { - logger.trace("Read future succeeded for {}", callback); + logger.trace("Read result succeeded for {}", callback); context.verifyLoaded(); - readFuture = null; + readResult = null; } else { - logger.trace("Adding callback for read future: {}", callback); - readFuture.addCallback(callback, commandStore.executor()); + logger.trace("Adding callback for read result: {}", callback); + readResult.addCallback(callback, commandStore.executor()); break; } } // apply any pending write only changes that may not have made it to disk in time to be loaded - context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupLoadFuture); + context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupLoadResult); context.commands.items.values().forEach(commandStore.commandCache()::applyAndRemoveWriteOnlyGroup); - context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupLoadFuture); + context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupLoadResult); context.commandsForKey.items.values().forEach(commandStore.commandsForKeyCache()::applyAndRemoveWriteOnlyGroup); // apply blindly reported timestamps context.commandsForKey.items.values().forEach(AccordCommandsForKey::applyBlindWitnessedTimestamps); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index e302b4248976..b4b490fcdd13 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -23,6 +23,7 @@ import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -32,12 +33,12 @@ import accord.local.SafeCommandStore; import accord.primitives.Seekables; import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.utils.concurrent.AsyncPromise; -public abstract class AsyncOperation extends AsyncPromise implements Runnable, Function, BiConsumer +public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function { private static final Logger logger = LoggerFactory.getLogger(AsyncOperation.class); @@ -50,6 +51,7 @@ private static class LoggingProps enum State { INITIALIZED, + SUBMITTED, LOADING, RUNNING, SAVING, @@ -71,6 +73,7 @@ public interface Context private final AsyncContext context = new AsyncContext(); private R result; private final String loggingId; + private BiConsumer callback; private void setLoggingIds() { @@ -123,22 +126,32 @@ protected void setState(State state) this.state = state; } - /** - * callback for loader and writer - */ - @Override - public void accept(Object o, Throwable throwable) + private void callback(Object o, Throwable throwable) { if (throwable != null) { logger.error(String.format("Operation %s failed", this), throwable); state = State.FAILED; - tryFailure(throwable); + fail(throwable); } else run(); } + private void finish(R result) + { + Preconditions.checkArgument(state == State.COMPLETING); + callback.accept(result, null); + state = State.FINISHED; + } + + private void fail(Throwable throwable) + { + Preconditions.checkArgument(state != State.FINISHED && state != State.FAILED); + callback.accept(null, throwable); + state = State.FAILED; + } + protected void runInternal() { SafeAccordCommandStore safeStore = commandStore.safeStore(context); @@ -147,7 +160,7 @@ protected void runInternal() case INITIALIZED: state = State.LOADING; case LOADING: - if (!loader.load(context, this)) + if (!loader.load(context, this::callback)) return; state = State.RUNNING; @@ -156,7 +169,7 @@ protected void runInternal() state = State.SAVING; case SAVING: case AWAITING_SAVE: - boolean updatesPersisted = writer.save(context, this); + boolean updatesPersisted = writer.save(context, this::callback); if (state != State.AWAITING_SAVE) { @@ -170,8 +183,7 @@ protected void runInternal() return; state = State.COMPLETING; - setSuccess(result); - state = State.FINISHED; + finish(result); case FINISHED: break; default: @@ -196,7 +208,7 @@ public void run() catch (Throwable t) { logger.error(String.format("Operation %s failed", this), t); - tryFailure(t); + fail(t); } finally { @@ -210,6 +222,14 @@ public void run() } } + @Override + public void begin(BiConsumer callback) + { + Preconditions.checkArgument(this.callback == null); + this.callback = callback; + commandStore.executor().submit(this); + } + private static Iterable toPartitionKeys(Seekables keys) { switch (keys.domain()) diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java index c920a0f7bb29..fb7f38687c26 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java @@ -35,6 +35,9 @@ import accord.primitives.Seekable; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.service.accord.AccordCommand; @@ -46,8 +49,8 @@ import org.apache.cassandra.service.accord.AccordState; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.store.StoredSet; -import org.apache.cassandra.utils.concurrent.Future; -import org.apache.cassandra.utils.concurrent.FutureCombiner; + +import static accord.utils.async.AsyncResults.ofRunnable; import static accord.primitives.Routable.Domain.Range; @@ -64,7 +67,7 @@ enum State } private State state = State.INITIALIZED; - protected Future writeFuture; + protected AsyncResult writeResult; private final AccordCommandStore commandStore; final AccordStateCache.Instance commandCache; final AccordStateCache.Instance cfkCache; @@ -81,12 +84,12 @@ private interface StateMutationFunction> Mutation apply(AccordCommandStore commandStore, V state, long timestamp); } - private static > List> dispatchWrites(AsyncContext.Group ctxGroup, + private static > List> dispatchWrites(AsyncContext.Group ctxGroup, AccordStateCache.Instance cache, StateMutationFunction mutationFunction, long timestamp, AccordCommandStore commandStore, - List> futures, + List> results, Object callback) { for (V item : ctxGroup.items.values()) @@ -98,13 +101,13 @@ private static > List> dispatchWrites(Asyn continue; } - if (futures == null) - futures = new ArrayList<>(); + if (results == null) + results = new ArrayList<>(); K key = item.key(); Mutation mutation = mutationFunction.apply(commandStore, item, timestamp); if (logger.isTraceEnabled()) logger.trace("Dispatching mutation for {} for {}, {} -> {}", key, callback, item, mutation); - Future future = Stage.MUTATION.submit(() -> { + AsyncResult result = ofRunnable(Stage.MUTATION.executor(), () -> { try { if (logger.isTraceEnabled()) @@ -119,46 +122,46 @@ private static > List> dispatchWrites(Asyn throw t; } }); - cache.addSaveFuture(item.key(), future); - futures.add(future); + cache.addSaveResult(item.key(), result); + results.add(result); } for (AccordState.WriteOnly item : ctxGroup.writeOnly.values()) { Preconditions.checkState(item.hasModifications()); - if (futures == null) futures = new ArrayList<>(); + if (results == null) results = new ArrayList<>(); Mutation mutation = mutationFunction.apply(commandStore, (V) item, timestamp); - Future future = Stage.MUTATION.submit((Runnable) mutation::apply); - future.addListener(() -> cache.purgeWriteOnly(item.key()), commandStore.executor()); - item.future(future); - futures.add(future); + AsyncResult result = AsyncResults.ofRunnable(Stage.MUTATION.executor(), mutation::apply); + result.addListener(() -> cache.purgeWriteOnly(item.key()), commandStore.executor()); + item.asyncResult(result); + results.add(result); } - return futures; + return results; } - private Future maybeDispatchWrites(AsyncContext context, Object callback) throws IOException + private AsyncResult maybeDispatchWrites(AsyncContext context, Object callback) throws IOException { - List> futures = null; + List> results = null; long timestamp = commandStore.nextSystemTimestampMicros(); - futures = dispatchWrites(context.commands, + results = dispatchWrites(context.commands, commandStore.commandCache(), AccordKeyspace::getCommandMutation, timestamp, commandStore, - futures, + results, callback); - futures = dispatchWrites(context.commandsForKey, + results = dispatchWrites(context.commandsForKey, commandStore.commandsForKeyCache(), AccordKeyspace::getCommandsForKeyMutation, timestamp, commandStore, - futures, + results, callback); - return futures != null ? FutureCombiner.allOf(futures) : null; + return results != null ? AsyncResults.reduce(results, (a, b) -> null).beginAsResult() : null; } private void denormalizeBlockedOn(AccordCommand command, @@ -201,7 +204,7 @@ AccordState getForDenormalization(K key, return item; item = cache.getOrNull(key); - if (item != null && !cache.hasLoadFuture(key)) + if (item != null && !cache.hasLoadResult(key)) { ctxGroup.items.put(key, item); return item; @@ -303,18 +306,18 @@ public boolean save(AsyncContext context, BiConsumer callback setState(State.SETUP); case SETUP: denormalize(context, callback); - writeFuture = maybeDispatchWrites(context, callback); + writeResult = maybeDispatchWrites(context, callback); setState(State.SAVING); case SAVING: - if (writeFuture != null && !writeFuture.isSuccess()) + if (writeResult != null && !writeResult.isSuccess()) { - logger.trace("Adding callback for write future: {}", callback); - writeFuture.addCallback(callback, commandStore.executor()); + logger.trace("Adding callback for write result: {}", callback); + writeResult.addCallback(callback, commandStore.executor()); break; } - context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupSaveFuture); - context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveFuture); + context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupSaveResult); + context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveResult); setState(State.FINISHED); case FINISHED: break; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index 534f4aa26295..acab7c89f792 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -26,6 +26,8 @@ import accord.api.Data; import accord.local.SafeCommandStore; import accord.primitives.Timestamp; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; @@ -40,7 +42,6 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.concurrent.Future; import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; import static org.apache.cassandra.utils.ByteBufferUtil.serializedSizeWithVIntLength; @@ -110,7 +111,7 @@ public PartitionKey key() return key; } - public Future read(boolean isForWriteTxn, SafeCommandStore safeStore, Timestamp executeAt) + public AsyncChain read(boolean isForWriteTxn, SafeCommandStore safeStore, Timestamp executeAt) { SinglePartitionReadCommand command = (SinglePartitionReadCommand) get(); // TODO (required, safety): before release, double check reasoning that this is safe @@ -121,7 +122,7 @@ public Future read(boolean isForWriteTxn, SafeCommandStore safeStore, Time // immediately after the transaction executed, and this simplifies things a great deal int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); - return Stage.READ.submit(() -> + return AsyncChains.ofCallable(Stage.READ.executor(), () -> { SinglePartitionReadCommand read = command.withNowInSec(nowInSeconds); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index eeb57f13f5ae..5b5812aed81d 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.function.BiConsumer; import com.google.common.collect.ImmutableList; @@ -36,21 +35,18 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import org.apache.cassandra.db.SinglePartitionReadCommand; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.Simulate; -import org.apache.cassandra.utils.concurrent.AsyncPromise; -import org.apache.cassandra.utils.concurrent.Future; -import org.apache.cassandra.utils.concurrent.ImmediateFuture; import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; -import static org.apache.cassandra.utils.Simulate.With.MONITORS; public class TxnRead extends AbstractKeySorted implements Read { @@ -143,51 +139,18 @@ public Read merge(Read read) } @Override - public Future read(Seekable key, Txn.Kind kind, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { - List> futures = new ArrayList<>(); - forEachWithKey((PartitionKey) key, read -> futures.add(read.read(kind.isWrite(), safeStore, executeAt))); + List> results = new ArrayList<>(); + forEachWithKey((PartitionKey) key, read -> results.add(read.read(kind.isWrite(), safeStore, executeAt))); - if (futures.isEmpty()) - return ImmediateFuture.success(new TxnData()); + if (results.isEmpty()) + return AsyncChains.success(new TxnData()); - if (futures.size() == 1) - return futures.get(0); + if (results.size() == 1) + return results.get(0); - return new MultiReadFuture(futures); - } - - @Simulate(with = MONITORS) - private static class MultiReadFuture extends AsyncPromise implements BiConsumer - { - private Data result = null; - private int pending; - - public MultiReadFuture(List> futures) - { - pending = futures.size(); - listen(futures); - } - - private synchronized void listen(List> futures) - { - for (int i=0, mi=futures.size(); i serializer = new IVersionedSerializer() diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 94593ffc520a..d6f3a71c8b14 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -37,6 +37,8 @@ import accord.primitives.Seekable; import accord.primitives.Timestamp; import accord.primitives.Writes; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.db.Clustering; @@ -56,7 +58,6 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; -import org.apache.cassandra.utils.concurrent.*; import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.service.accord.AccordSerializers.partitionUpdateSerializer; @@ -122,11 +123,11 @@ public String toString() '}'; } - public Future write(long timestamp, int nowInSeconds) + public AsyncChain write(long timestamp, int nowInSeconds) { PartitionUpdate update = new PartitionUpdate.Builder(get(), 0).updateAllTimestampAndLocalDeletionTime(timestamp, nowInSeconds).build(); Mutation mutation = new Mutation(update); - return Stage.MUTATION.submit((Runnable) mutation::apply); + return AsyncChains.ofRunnable(Stage.MUTATION.executor(), mutation::apply); } @Override @@ -342,7 +343,7 @@ Update[] newArray(int size) } @Override - public Future apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { AccordCommandsForKey cfk = ((SafeAccordCommandStore) safeStore).commandsForKey((Key)key); // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing @@ -352,16 +353,16 @@ public Future apply(Seekable key, SafeCommandStore safeStore, Timestamp ex // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); - List> futures = new ArrayList<>(); - forEachWithKey((PartitionKey) key, write -> futures.add(write.write(timestamp, nowInSeconds))); + List> results = new ArrayList<>(); + forEachWithKey((PartitionKey) key, write -> results.add(write.write(timestamp, nowInSeconds))); - if (futures.isEmpty()) + if (results.isEmpty()) return Writes.SUCCESS; - if (futures.size() == 1) - return futures.get(0).flatMap(o -> Writes.SUCCESS); + if (results.size() == 1) + return results.get(0).flatMap(o -> Writes.SUCCESS); - return FutureCombiner.allOf(futures).flatMap(objects -> Writes.SUCCESS); + return AsyncChains.all(results).flatMap(objects -> Writes.SUCCESS); } public long estimatedSizeOnHeap() diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 6d892465934a..02ccdc44aeab 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; -import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; import org.junit.Assert; @@ -52,6 +51,7 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; +import static accord.utils.async.AsyncChains.awaitUninterruptibly; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.*; @@ -82,10 +82,10 @@ private static PartitionKey key(int k) * disable cache and make sure correct values are coming in and out of the accord table */ @Test - public void basicCycleTest() throws ExecutionException, InterruptedException + public void basicCycleTest() { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); }).get(); + awaitUninterruptibly(commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); })); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); @@ -98,15 +98,15 @@ public void basicCycleTest() throws ExecutionException, InterruptedException PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); // Check preaccept - commandStore.execute(preAccept, instance -> { + awaitUninterruptibly(commandStore.execute(preAccept, instance -> { PreAccept.PreAcceptReply reply = preAccept.apply(instance); Assert.assertTrue(reply.isOk()); PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; Assert.assertEquals(txnId, ok.witnessedAt); Assert.assertTrue(ok.deps.isEmpty()); - }).get(); + })); - commandStore.execute(preAccept, instance -> { + awaitUninterruptibly(commandStore.execute(preAccept, instance -> { Command command = instance.command(txnId); Assert.assertEquals(txnId, command.executeAt()); Assert.assertEquals(Status.PreAccepted, command.status()); @@ -116,7 +116,7 @@ public void basicCycleTest() throws ExecutionException, InterruptedException Assert.assertEquals(txnId, cfk.max()); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); - }).get(); + })); // check accept TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); @@ -129,13 +129,13 @@ public void basicCycleTest() throws ExecutionException, InterruptedException } Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); - commandStore.execute(accept, instance -> { + awaitUninterruptibly(commandStore.execute(accept, instance -> { Accept.AcceptReply reply = accept.apply(instance); Assert.assertTrue(reply.isOk()); Assert.assertTrue(reply.deps.isEmpty()); - }).get(); + })); - commandStore.execute(accept, instance -> { + awaitUninterruptibly(commandStore.execute(accept, instance -> { Command command = instance.command(txnId); Assert.assertEquals(executeAt, command.executeAt()); Assert.assertEquals(Status.Accepted, command.status()); @@ -145,13 +145,13 @@ public void basicCycleTest() throws ExecutionException, InterruptedException Assert.assertEquals(executeAt, cfk.max()); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); - }).get(); + })); // check commit Commit commit = Commit.SerializerSupport.create(txnId, route, 1, executeAt, partialTxn, deps, fullRoute, null); - commandStore.execute(commit, commit::apply).get(); + awaitUninterruptibly(commandStore.execute(commit, commit::apply)); - commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { + awaitUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { Command command = instance.command(txnId); Assert.assertEquals(commit.executeAt, command.executeAt()); Assert.assertTrue(command.hasBeen(Status.Committed)); @@ -160,14 +160,14 @@ public void basicCycleTest() throws ExecutionException, InterruptedException AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(commit.executeAt)); - }).get(); + })); } @Test public void computeDeps() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); }).get(); + awaitUninterruptibly(commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); })); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); Txn txn = createTxn(2); @@ -178,16 +178,16 @@ public void computeDeps() throws Throwable PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); - commandStore.execute(preAccept1, preAccept1::apply).get(); + awaitUninterruptibly(commandStore.execute(preAccept1, preAccept1::apply)); // second preaccept should identify txnId1 as a dependency TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); - commandStore.execute(preAccept2, instance -> { + awaitUninterruptibly(commandStore.execute(preAccept2, instance -> { PreAccept.PreAcceptReply reply = preAccept2.apply(instance); Assert.assertTrue(reply.isOk()); PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; Assert.assertTrue(ok.deps.contains(txnId1)); - }).get(); + })); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 2cc2b3de18bf..a686afad7ca7 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -21,12 +21,13 @@ import java.util.HashSet; import java.util.Set; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.Test; -import org.apache.cassandra.utils.concurrent.AsyncPromise; -import org.apache.cassandra.utils.concurrent.Future; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; public class AccordStateCacheTest { @@ -248,8 +249,8 @@ public void evictionBlockedOnSaveFuture() assertCacheState(cache, 0, 4, DEFAULT_NODE_SIZE * 4); - AsyncPromise saveFuture = new AsyncPromise<>(); - instance.addSaveFuture(0, saveFuture); + AsyncResult saveFuture = AsyncResults.settable(); + instance.addSaveResult(0, saveFuture); cache.setMaxSize(0); // all should have been evicted except 0 @@ -269,7 +270,7 @@ static class SetItem implements AccordState static class WriteOnly extends SetItem implements AccordState.WriteOnly { - AsyncPromise promise = null; + AsyncResult.Settable promise = null; final Set added = new HashSet<>(); final Set remove = new HashSet<>(); @@ -279,14 +280,14 @@ public WriteOnly(Integer key) } @Override - public void future(Future future) + public void asyncResult(AsyncResult notifier) { - Assert.assertTrue(future instanceof AsyncPromise); - this.promise = (AsyncPromise) future; + Preconditions.checkArgument(notifier instanceof AsyncResult.Settable); + this.promise = (AsyncResult.Settable) notifier; } @Override - public Future future() + public AsyncResult asyncResult() { return promise; } @@ -353,17 +354,17 @@ public void writeOnlyCycle() SetItem.WriteOnly writeOnly1 = new SetItem.WriteOnly(5); writeOnly1.added.addAll(ImmutableSet.of(4, 5)); - writeOnly1.future(new AsyncPromise<>()); + writeOnly1.asyncResult(AsyncResults.settable()); instance.addWriteOnly(writeOnly1); Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); SetItem.WriteOnly writeOnly2 = new SetItem.WriteOnly(5); writeOnly2.remove.addAll(ImmutableSet.of(2, 4)); - writeOnly2.future(new AsyncPromise<>()); + writeOnly2.asyncResult(AsyncResults.settable()); instance.addWriteOnly(writeOnly2); Assert.assertEquals(2, instance.pendingWriteOnlyOperations(5)); - Assert.assertNull(instance.getSaveFuture(5)); + Assert.assertNull(instance.getSaveResult(5)); Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); instance.lockWriteOnlyGroupIfExists(5); @@ -377,7 +378,7 @@ public void writeOnlyCycle() // write only futures should have been merged and promoted to normal save futures, which would // prevent the cached object from being purged until they were completed - Future saveFuture = instance.getSaveFuture(5); + AsyncResult saveFuture = instance.getSaveResult(5); Assert.assertNotNull(saveFuture); Assert.assertFalse(saveFuture.isDone()); Assert.assertFalse(instance.canEvict(5)); @@ -402,7 +403,7 @@ public void writeOnlyPurging() { SetItem.WriteOnly item = new SetItem.WriteOnly(5); item.added.add(i); - item.future(new AsyncPromise<>()); + item.asyncResult(AsyncResults.settable()); instance.addWriteOnly(item); writeOnly[i] = item; } @@ -433,7 +434,7 @@ public void writeOnlyPurgedLock() SetItem.WriteOnly item = new SetItem.WriteOnly(5); item.added.add(0); - item.future(new AsyncPromise<>()); + item.asyncResult(AsyncResults.settable()); instance.addWriteOnly(item); instance.lockWriteOnlyGroupIfExists(5); @@ -452,8 +453,8 @@ public void testLoadFutureAutoLocksWriteOnlyInstances() AccordStateCache cache = new AccordStateCache(500); AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - AsyncPromise loadfuture = new AsyncPromise<>(); - instance.setLoadFuture(5, loadfuture); + AsyncResult loadfuture = AsyncResults.settable(); + instance.setLoadResult(5, loadfuture); Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); Assert.assertEquals(0, instance.pendingWriteOnlyOperations(5)); @@ -461,7 +462,7 @@ public void testLoadFutureAutoLocksWriteOnlyInstances() // adding a write only object should immediately lock the group, since there's an existing load future SetItem.WriteOnly item = new SetItem.WriteOnly(5); item.added.add(0); - item.future(new AsyncPromise<>()); + item.asyncResult(AsyncResults.settable()); instance.addWriteOnly(item); Assert.assertTrue(instance.writeOnlyGroupIsLocked(5)); @@ -474,12 +475,12 @@ public void testFutureMerging() { AccordStateCache cache = new AccordStateCache(500); AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - AsyncPromise promise1 = new AsyncPromise<>(); - AsyncPromise promise2 = new AsyncPromise<>(); - instance.addSaveFuture(5, promise1); - instance.addSaveFuture(5, promise2); + AsyncResult.Settable promise1 = AsyncResults.settable(); + AsyncResult.Settable promise2 = AsyncResults.settable(); + instance.addSaveResult(5, promise1); + instance.addSaveResult(5, promise2); - Future future = instance.getSaveFuture(5); + AsyncResult future = instance.getSaveResult(5); Assert.assertNotSame(future, promise1); Assert.assertNotSame(future, promise2); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 531b513fa6d2..0c457ae067aa 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -54,6 +54,7 @@ import accord.primitives.Writes; import accord.topology.Shard; import accord.topology.Topology; +import accord.utils.async.AsyncChains; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.TransactionStatement; @@ -68,6 +69,7 @@ import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.primitives.Routable.Domain.Key; +import static accord.utils.async.AsyncChains.awaitUninterruptibly; import static java.lang.String.format; public class AccordTestUtils @@ -113,7 +115,7 @@ public static Ballot ballot(long epoch, long hlc, int node) public static void processCommandResult(AccordCommandStore commandStore, Command command) throws Throwable { - commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), command.partialTxn().keys()), + awaitUninterruptibly(commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), command.partialTxn().keys()), instance -> { PartialTxn txn = command.partialTxn(); TxnRead read = (TxnRead) txn.read(); @@ -121,7 +123,7 @@ public static void processCommandResult(AccordCommandStore commandStore, Command .map(key -> { try { - return read.read(key, command.txnId().rw(), instance, command.executeAt(), null).get(); + return AsyncChains.getBlocking(read.read(key, command.txnId().rw(), instance, command.executeAt(), null)); } catch (InterruptedException e) { @@ -136,7 +138,7 @@ public static void processCommandResult(AccordCommandStore commandStore, Command Write write = txn.update().apply(readData); ((AccordCommand)command).setWrites(new Writes(command.executeAt(), (Keys)txn.keys(), write)); ((AccordCommand)command).setResult(txn.query().compute(command.txnId(), readData, txn.read(), txn.update())); - }).get(); + })); } public static Txn createTxn(String query) diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index c5bc97984d2e..3cdfceda182c 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -33,6 +33,8 @@ import accord.local.Status; import accord.primitives.PartialTxn; import accord.primitives.TxnId; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.StorageService; @@ -43,7 +45,6 @@ import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.concurrent.AsyncPromise; -import org.apache.cassandra.utils.concurrent.Future; import static com.google.common.collect.Iterables.getOnlyElement; import static java.util.Collections.singleton; @@ -207,8 +208,8 @@ public void inProgressLoadTest() AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); // since there's a read future associated with the txnId, we'll wait for it to load - AsyncPromise readFuture = new AsyncPromise<>(); - commandCache.setLoadFuture(command.txnId(), readFuture); + AsyncResult.Settable readFuture = AsyncResults.settable(); + commandCache.setLoadResult(command.txnId(), readFuture); AsyncPromise cbFired = new AsyncPromise<>(); commandStore.executeBlocking(() -> { @@ -254,12 +255,12 @@ public void pendingWriteOnlyApplied() AccordStateCache.Instance cache = commandStore.commandCache(); AccordCommand.WriteOnly writeOnly1 = new AccordCommand.WriteOnly(txnId); writeOnly1.blockingApplyOn.blindAdd(blockApply); - writeOnly1.future(new AsyncPromise<>()); + writeOnly1.asyncResult(AsyncResults.settable()); cache.addWriteOnly(writeOnly1); AccordCommand.WriteOnly writeOnly2 = new AccordCommand.WriteOnly(txnId); writeOnly2.blockingCommitOn.blindAdd(blockCommit); - writeOnly2.future(new AsyncPromise<>()); + writeOnly2.asyncResult(AsyncResults.settable()); cache.addWriteOnly(writeOnly2); AsyncContext context = new AsyncContext(); @@ -286,9 +287,9 @@ public void failedLoadTest() throws Throwable TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); - AsyncPromise promise1 = new AsyncPromise<>(); - AsyncPromise promise2 = new AsyncPromise<>(); - AsyncPromise callback = new AsyncPromise<>(); + AsyncResult.Settable promise1 = AsyncResults.settable(); + AsyncResult.Settable promise2 = AsyncResults.settable(); + AsyncResult.Settable callback = AsyncResults.settable(); RuntimeException failure = new RuntimeException(); execute(commandStore, () -> { @@ -296,7 +297,7 @@ public void failedLoadTest() throws Throwable AtomicInteger loadCalls = new AtomicInteger(); AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Collections.emptyList()){ @Override - Function> loadCommandFunction(Object callback) + Function> loadCommandFunction(Object callback) { return cmd -> { TxnId txnId = cmd.txnId(); @@ -321,6 +322,6 @@ Function> loadCommandFunction(Object callback) }); promise1.tryFailure(failure); - callback.get(); + AsyncResults.awaitUninterruptibly(callback); } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 4a71bff6162d..53da7b29951b 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -23,7 +23,6 @@ import java.util.function.Consumer; import com.google.common.collect.Iterables; -import com.google.common.util.concurrent.Futures; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -54,6 +53,7 @@ import org.apache.cassandra.utils.FBUtilities; import static accord.local.PreLoadContext.contextFor; +import static accord.utils.async.AsyncChains.awaitUninterruptibly; import static java.util.Collections.emptyList; import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -94,10 +94,10 @@ public void optionalCommandTest() throws Throwable Txn txn = createTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - commandStore.execute(contextFor(txnId), instance -> { + awaitUninterruptibly(commandStore.execute(contextFor(txnId), instance -> { Command command = instance.ifPresent(txnId); Assert.assertNull(command); - }).get(); + })); UntypedResultSet result = AccordKeyspace.loadCommandRow(commandStore, txnId); Assert.assertTrue(result.isEmpty()); @@ -110,10 +110,10 @@ public void optionalCommandsForKeyTest() throws Throwable Txn txn = createTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)),instance -> { + awaitUninterruptibly(commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)),instance -> { AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).maybeCommandsForKey(key); Assert.assertNull(cfk); - }).get(); + })); long nowInSeconds = FBUtilities.nowInSeconds(); SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore, key, nowInSeconds); @@ -142,10 +142,10 @@ private static AccordCommand createCommittedAndPersist(AccordCommandStore comman private static void assertFutureState(AccordStateCache.Instance cache, TxnId txnId, boolean expectLoadFuture, boolean expectSaveFuture) { - if (cache.hasLoadFuture(txnId) != expectLoadFuture) + if (cache.hasLoadResult(txnId) != expectLoadFuture) throw new AssertionError(expectLoadFuture ? "Load future unexpectedly not found for " + txnId : "Unexpectedly found load future for " + txnId); - if (cache.hasSaveFuture(txnId) != expectSaveFuture) + if (cache.hasSaveResult(txnId) != expectSaveFuture) throw new AssertionError(expectSaveFuture ? "Save future unexpectedly not found for " + txnId : "Unexpectedly found save future for " + txnId); @@ -220,6 +220,6 @@ void setState(State state) commandStore.executor().submit(operation); - Futures.getUnchecked(operation); + awaitUninterruptibly(operation); } } From 34bd73e2a70242921919b70d2270dceec930c0d5 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 3 Feb 2023 11:52:06 -0800 Subject: [PATCH 045/340] CEP-15/Accord Immutable State Refactor Patch by Blake Eggleston; Reviewed by David Capwell and Benedict Elliott Smith for Cassandra-18192 --- .build/build-accord.xml | 6 + modules/accord | 2 +- .../cql3/statements/TransactionStatement.java | 3 +- .../exceptions/ReadTimeoutException.java | 6 + .../service/accord/AccordCommand.java | 836 ------------------ .../service/accord/AccordCommandStore.java | 390 ++------ .../service/accord/AccordCommandStores.java | 7 +- .../service/accord/AccordCommandsForKey.java | 433 --------- .../accord/AccordConfigurationService.java | 21 +- .../service/accord/AccordKeyspace.java | 643 ++++++++------ .../service/accord/AccordLoadingState.java | 163 ++++ .../service/accord/AccordObjectSizes.java | 162 +++- .../service/accord/AccordPartialCommand.java | 209 ----- .../service/accord/AccordSafeCommand.java | 124 +++ .../accord/AccordSafeCommandStore.java | 260 ++++++ .../accord/AccordSafeCommandsForKey.java | 125 +++ .../service/accord/AccordSafeState.java | 76 ++ .../service/accord/AccordService.java | 35 +- .../cassandra/service/accord/AccordState.java | 105 --- .../service/accord/AccordStateCache.java | 476 +++++----- .../service/accord/ListenerProxy.java | 275 ------ .../service/accord/api/AccordAgent.java | 7 +- .../service/accord/async/AsyncContext.java | 116 --- .../service/accord/async/AsyncLoader.java | 189 ++-- .../service/accord/async/AsyncOperation.java | 142 ++- .../service/accord/async/AsyncWriter.java | 276 ++---- .../exceptions/ReadPreemptedException.java | 36 + .../exceptions/WritePreemptedException.java | 37 + .../accord/serializers/AcceptSerializers.java | 1 - .../accord/serializers/ApplySerializers.java | 3 + .../serializers/CommandsForKeySerializer.java | 207 +++++ .../serializers/ListenerSerializers.java | 151 ++++ .../accord/store/AbstractStoredField.java | 152 ---- .../service/accord/store/StoredBoolean.java | 85 -- .../service/accord/store/StoredLong.java | 86 -- .../accord/store/StoredNavigableMap.java | 224 ----- .../service/accord/store/StoredSet.java | 249 ------ .../service/accord/store/StoredValue.java | 128 --- .../service/accord/txn/TxnWrite.java | 13 +- .../test/accord/AccordTestBase.java | 5 +- .../simulator/paxos/PaxosSimulation.java | 20 +- .../accord/AccordCommandStoreTest.java | 113 +-- .../service/accord/AccordCommandTest.java | 41 +- .../accord/AccordLoadingStateTest.java | 178 ++++ .../service/accord/AccordStateCacheTest.java | 524 +++++------ .../service/accord/AccordTestUtils.java | 175 +++- .../service/accord/async/AsyncLoaderTest.java | 194 ++-- .../accord/async/AsyncOperationTest.java | 439 ++++++++- .../service/accord/async/AsyncWriterTest.java | 241 ----- .../CommandsForKeySerializerTest.java | 95 ++ .../service/accord/store/StoredMapTest.java | 203 ----- .../service/accord/store/StoredSetTest.java | 202 ----- .../service/accord/store/StoredValueTest.java | 85 -- .../cassandra/utils/AccordGenerators.java | 84 ++ .../cassandra/utils/AssertionUtils.java | 31 + 55 files changed, 3625 insertions(+), 5464 deletions(-) delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommand.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordLoadingState.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeState.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordState.java delete mode 100644 src/java/org/apache/cassandra/service/accord/ListenerProxy.java delete mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncContext.java create mode 100644 src/java/org/apache/cassandra/service/accord/exceptions/ReadPreemptedException.java create mode 100644 src/java/org/apache/cassandra/service/accord/exceptions/WritePreemptedException.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredLong.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredSet.java delete mode 100644 src/java/org/apache/cassandra/service/accord/store/StoredValue.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java create mode 100644 test/unit/org/apache/cassandra/utils/AccordGenerators.java diff --git a/.build/build-accord.xml b/.build/build-accord.xml index eeadf4dd1883..eba85912d52e 100644 --- a/.build/build-accord.xml +++ b/.build/build-accord.xml @@ -33,5 +33,11 @@ + + + + + + diff --git a/modules/accord b/modules/accord index 07e351462b14..f607a05b76df 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 07e351462b147b831c2d416b8568449b06ccbb51 +Subproject commit f607a05b76df32b39c97a6e49068ae35057be98a diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index d8adb6787a92..a596a5a0e935 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -435,7 +435,8 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. } catch (Throwable t) { - logger.error("Unexpected error with transaction", t); + //TODO remove before merge to trunk + logger.error("Unexpected error with transaction: {}", t.toString()); throw t; } } diff --git a/src/java/org/apache/cassandra/exceptions/ReadTimeoutException.java b/src/java/org/apache/cassandra/exceptions/ReadTimeoutException.java index 05f3510e7b39..809f0a1780fd 100644 --- a/src/java/org/apache/cassandra/exceptions/ReadTimeoutException.java +++ b/src/java/org/apache/cassandra/exceptions/ReadTimeoutException.java @@ -28,4 +28,10 @@ public ReadTimeoutException(ConsistencyLevel consistency, int received, int bloc super(ExceptionCode.READ_TIMEOUT, consistency, received, blockFor); this.dataPresent = dataPresent; } + + public ReadTimeoutException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, String msg) + { + super(ExceptionCode.READ_TIMEOUT, consistency, received, blockFor, msg); + this.dataPresent = dataPresent; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommand.java b/src/java/org/apache/cassandra/service/accord/AccordCommand.java deleted file mode 100644 index 88b39922cee7..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordCommand.java +++ /dev/null @@ -1,836 +0,0 @@ -/* -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.Map; -import java.util.Objects; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.atomic.AtomicInteger; - -import javax.annotation.Nullable; - -import com.google.common.base.Preconditions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.api.Data; -import accord.api.Result; -import accord.api.RoutingKey; -import accord.local.Command; -import accord.local.CommandStore; -import accord.local.CommandListener; -import accord.local.Listeners; -import accord.local.PreLoadContext; -import accord.local.SafeCommandStore; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.local.Status.Durability; -import accord.local.Status.Known; -import accord.primitives.Ballot; -import accord.primitives.PartialDeps; -import accord.primitives.PartialTxn; -import accord.primitives.Ranges; -import accord.primitives.Route; -import accord.primitives.Seekables; -import accord.primitives.Timestamp; -import accord.primitives.Txn; -import accord.primitives.TxnId; -import accord.primitives.Writes; -import accord.utils.DeterministicIdentitySet; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -import org.apache.cassandra.service.accord.api.PartitionKey; -import accord.utils.async.AsyncChain; -import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; -import org.apache.cassandra.service.accord.async.AsyncContext; -import org.apache.cassandra.service.accord.store.StoredNavigableMap; -import org.apache.cassandra.service.accord.store.StoredSet; -import org.apache.cassandra.service.accord.store.StoredValue; -import org.apache.cassandra.service.accord.txn.TxnData; -import org.apache.cassandra.utils.ObjectSizes; - -import static accord.local.Status.Durability.Local; -import static accord.local.Status.Durability.NotDurable; -import static accord.local.Status.PreApplied; -import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applyMapChanges; -import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applySetChanges; - -public class AccordCommand extends Command implements AccordState -{ - private static final Logger logger = LoggerFactory.getLogger(AccordCommand.class); - - private static final AtomicInteger INSTANCE_COUNTER = new AtomicInteger(0); - - private static final long EMPTY_SIZE = ObjectSizes.measure(new AccordCommand(null)); - - public static class WriteOnly extends AccordCommand implements AccordState.WriteOnly - { - private AsyncResult asyncResult = null; - - public WriteOnly(TxnId txnId) - { - super(txnId); - } - - @Override - public void asyncResult(AsyncResult notifier) - { - Preconditions.checkArgument(this.asyncResult == null); - this.asyncResult = notifier; - } - - @Override - public AsyncResult asyncResult() - { - return asyncResult; - } - - @Override - public void applyChanges(AccordCommand instance) - { - applySetChanges(this, instance, cmd -> cmd.waitingOnCommit); - applyMapChanges(this, instance, cmd -> cmd.waitingOnApply); - applySetChanges(this, instance, cmd -> cmd.blockingCommitOn); - applySetChanges(this, instance, cmd -> cmd.blockingApplyOn); - } - } - - private final TxnId txnId; - private final int instanceCount = INSTANCE_COUNTER.getAndIncrement(); - public final StoredValue> route; - public final StoredValue homeKey; - public final StoredValue progressKey; - public final StoredValue partialTxn; - public final StoredValue kind; // TODO: store this in TxnId - public final StoredValue promised; - public final StoredValue accepted; - public final StoredValue executeAt; - public final StoredValue partialDeps; - public final StoredValue writes; - public final StoredValue result; - - public final StoredValue.HistoryPreserving status; - public final StoredValue durability; - - public final StoredSet.Navigable waitingOnCommit; - public final StoredNavigableMap waitingOnApply; - public final StoredSet.Navigable blockingCommitOn; - public final StoredSet.Navigable blockingApplyOn; - - public final StoredSet.DeterministicIdentity storedListeners; - private final Listeners transientListeners; - - public AccordCommand(TxnId txnId) - { - logger.trace("Instantiating new command {} @ {}", txnId, instanceHash()); - this.txnId = txnId; - homeKey = new StoredValue<>(rw()); - progressKey = new StoredValue<>(rw()); - route = new StoredValue<>(rw()); - partialTxn = new StoredValue<>(rw()); - kind = new StoredValue<>(rw()); - promised = new StoredValue<>(rw()); - accepted = new StoredValue<>(rw()); - executeAt = new StoredValue<>(rw()); - partialDeps = new StoredValue<>(rw()); - writes = new StoredValue<>(rw()); - result = new StoredValue<>(rw()); - status = new StoredValue.HistoryPreserving<>(rw()); - durability = new StoredValue<>(rw()); - waitingOnCommit = new StoredSet.Navigable<>(rw()); - waitingOnApply = new StoredNavigableMap<>(rw()); - storedListeners = new StoredSet.DeterministicIdentity<>(rw()); - transientListeners = new Listeners(); - blockingCommitOn = new StoredSet.Navigable<>(rw()); - blockingApplyOn = new StoredSet.Navigable<>(rw()); - } - - @Override - public String toString() - { - return "AccordCommand{" + - "txnId=" + txnId + - ", instanceHash=" + instanceHash() + - ", status=" + status + - ", executeAt=" + executeAt + - ", promised=" + promised + - ", accepted=" + accepted + -// ", deps=" + deps + -// ", homeKey=" + homeKey + -// ", progressKey=" + progressKey + -// ", txn=" + txn + -// ", writes=" + writes + -// ", result=" + result + - // TODO: Should we have to check for isLoaded() here? - ", txn is null?=" + (!partialTxn.isLoaded() || partialTxn.get() == null) + - ", durability=" + durability + - ", waitingOnCommit=" + waitingOnCommit + - ", waitingOnApply=" + waitingOnApply + - ", storedListeners=" + storedListeners + - ", transientListeners=" + transientListeners + - ", blockingCommitOn=" + blockingCommitOn + - ", blockingApplyOn=" + blockingApplyOn + - '}'; - } - - @Override - public boolean isEmpty() - { - return homeKey.isEmpty() - || progressKey.isEmpty() - || route.isEmpty() - || partialTxn.isEmpty() - || promised.isEmpty() - || accepted.isEmpty() - || executeAt.isEmpty() - || partialDeps.isEmpty() - || writes.isEmpty() - || result.isEmpty() - || status.isEmpty() - || durability.isEmpty() - || waitingOnCommit.isEmpty() - || blockingCommitOn.isEmpty() - || waitingOnApply.isEmpty() - || blockingApplyOn.isEmpty() - || storedListeners.isEmpty(); - } - - public void setEmpty() - { - homeKey.setEmpty(); - progressKey.setEmpty(); - route.setEmpty(); - partialTxn.setEmpty(); - promised.setEmpty(); - accepted.setEmpty(); - executeAt.setEmpty(); - partialDeps.setEmpty(); - writes.setEmpty(); - result.setEmpty(); - status.setEmpty(); - durability.setEmpty(); - waitingOnCommit.setEmpty(); - blockingCommitOn.setEmpty(); - waitingOnApply.setEmpty(); - blockingApplyOn.setEmpty(); - storedListeners.setEmpty();; - } - - public AccordCommand initialize() - { - logger.trace("Initializing command {} @ {}", txnId, instanceHash()); - status.set(SaveStatus.NotWitnessed); - homeKey.set(null); - progressKey.set(null); - route.set(null); - partialTxn.set(null); - kind.set(null); - executeAt.load(null); - promised.set(Ballot.ZERO); - accepted.set(Ballot.ZERO); - partialDeps.set(PartialDeps.NONE); - writes.load(null); - result.load(null); - durability.set(Durability.NotDurable); - waitingOnCommit.load(new TreeSet<>()); - waitingOnApply.load(new TreeMap<>()); - blockingCommitOn.load(new TreeSet<>()); - blockingApplyOn.load(new TreeSet<>()); - storedListeners.load(new DeterministicIdentitySet<>()); - return this; - } - - @Override - public boolean isLoaded() - { - return homeKey.isLoaded() - && progressKey.isLoaded() - && route.isLoaded() - && partialTxn.isLoaded() - && promised.isLoaded() - && accepted.isLoaded() - && executeAt.isLoaded() - && partialDeps.isLoaded() - && writes.isLoaded() - && result.isLoaded() - && status.isLoaded() - && durability.isLoaded() - && waitingOnCommit.isLoaded() - && blockingCommitOn.isLoaded() - && waitingOnApply.isLoaded() - && blockingApplyOn.isLoaded() - && storedListeners.isLoaded(); - } - - public boolean isPartiallyLoaded() - { - return homeKey.isLoaded() - || progressKey.isLoaded() - || route.isLoaded() - || partialTxn.isLoaded() - || promised.isLoaded() - || accepted.isLoaded() - || executeAt.isLoaded() - || partialDeps.isLoaded() - || writes.isLoaded() - || result.isLoaded() - || status.isLoaded() - || durability.isLoaded() - || waitingOnCommit.isLoaded() - || blockingCommitOn.isLoaded() - || waitingOnApply.isLoaded() - || blockingApplyOn.isLoaded() - || storedListeners.isLoaded(); - } - - @Override - public boolean hasModifications() - { - return homeKey.hasModifications() - || progressKey.hasModifications() - || route.hasModifications() - || partialTxn.hasModifications() - || promised.hasModifications() - || accepted.hasModifications() - || executeAt.hasModifications() - || partialDeps.hasModifications() - || writes.hasModifications() - || result.hasModifications() - || status.hasModifications() - || durability.hasModifications() - || waitingOnCommit.hasModifications() - || blockingCommitOn.hasModifications() - || waitingOnApply.hasModifications() - || blockingApplyOn.hasModifications() - || storedListeners.hasModifications(); - } - - @Override - public void clearModifiedFlag() - { - logger.trace("Clearing modified flag on command {} @ {}", txnId, instanceHash()); - homeKey.clearModifiedFlag(); - progressKey.clearModifiedFlag(); - route.clearModifiedFlag(); - partialTxn.clearModifiedFlag(); - promised.clearModifiedFlag(); - accepted.clearModifiedFlag(); - executeAt.clearModifiedFlag(); - partialDeps.clearModifiedFlag(); - writes.clearModifiedFlag(); - result.clearModifiedFlag(); - status.clearModifiedFlag(); - durability.clearModifiedFlag(); - waitingOnCommit.clearModifiedFlag(); - blockingCommitOn.clearModifiedFlag(); - waitingOnApply.clearModifiedFlag(); - blockingApplyOn.clearModifiedFlag(); - storedListeners.clearModifiedFlag(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - AccordCommand command = (AccordCommand) o; - return homeKey.equals(command.homeKey) - && progressKey.equals(command.progressKey) - && route.equals(command.route) - && txnId.equals(command.txnId) - && partialTxn.equals(command.partialTxn) - && promised.equals(command.promised) - && accepted.equals(command.accepted) - && executeAt.equals(command.executeAt) - && partialDeps.equals(command.partialDeps) - && writes.equals(command.writes) - && result.equals(command.result) - && status.equals(command.status) - && durability.equals(command.durability) - && waitingOnCommit.equals(command.waitingOnCommit) - && blockingCommitOn.equals(command.blockingCommitOn) - && waitingOnApply.equals(command.waitingOnApply) - && blockingApplyOn.equals(command.blockingApplyOn) - && storedListeners.equals(command.storedListeners) - && transientListeners.equals(command.transientListeners); - } - - boolean isReadOnly() - { - return false; - } - - private int instanceHash() - { -// return System.identityHashCode(this); - return instanceCount; - } - - @Override - public int hashCode() - { - return Objects.hash(txnId, - homeKey, - progressKey, - route, - partialTxn, - promised, - accepted, - executeAt, - partialDeps, - writes, - result, - status, - durability, - waitingOnCommit, - blockingCommitOn, - waitingOnApply, - blockingApplyOn, - storedListeners, - transientListeners); - } - - @Override - public TxnId key() - { - return txnId; - } - - @Override - public long estimatedSizeOnHeap() - { - long size = EMPTY_SIZE; - size += AccordObjectSizes.timestamp(txnId); - size += homeKey.estimatedSizeOnHeap(AccordObjectSizes::key); - size += progressKey.estimatedSizeOnHeap(AccordObjectSizes::key); - size += route.estimatedSizeOnHeap(AccordObjectSizes::route); - size += partialTxn.estimatedSizeOnHeap(AccordObjectSizes::txn); - size += promised.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += accepted.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += executeAt.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += partialDeps.estimatedSizeOnHeap(AccordObjectSizes::dependencies); - size += writes.estimatedSizeOnHeap(AccordObjectSizes::writes); - size += result.estimatedSizeOnHeap(r -> ((TxnData) r).estimatedSizeOnHeap()); - size += status.estimatedSizeOnHeap(s -> 0); - size += durability.estimatedSizeOnHeap(s -> 0); - size += waitingOnCommit.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += blockingCommitOn.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += waitingOnApply.estimatedSizeOnHeap(AccordObjectSizes::timestamp, AccordObjectSizes::timestamp); - size += blockingApplyOn.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += storedListeners.estimatedSizeOnHeap(ListenerProxy::estimatedSizeOnHeap); - return size; - } - - public boolean shouldUpdateDenormalizedWaitingOn() - { - if (blockingCommitOn.getView().isEmpty() && blockingApplyOn.getView().isEmpty()) - return false; - return AccordPartialCommand.serializer.needsUpdate(this); - } - - @Override - public TxnId txnId() - { - return txnId; - } - - @Override - public RoutingKey homeKey() - { - return homeKey.get(); - } - - @Override - protected void setHomeKey(RoutingKey key) - { - homeKey.set(key); - } - - @Override - public RoutingKey progressKey() - { - return progressKey.get(); - } - - @Override - protected void setProgressKey(RoutingKey key) - { - progressKey.set(key); - } - - @Override - public Route route() - { - return route.get(); - } - - @Override - protected void setRoute(Route newRoute) - { - route.set(newRoute); - } - - @Override - public PartialTxn partialTxn() - { - return partialTxn.get(); - } - - @Override - public void setPartialTxn(PartialTxn txn) - { - this.partialTxn.set(txn); - //TODO remove. This was added to fix tests after Partial Replication was added, this was added for tests - this.kind.set(txn.kind()); - } - - @Override - public Ballot promised() - { - return promised.get(); - } - - @Override - public void setPromised(Ballot ballot) - { - this.promised.set(ballot); - } - - @Override - public Ballot accepted() - { - return accepted.get(); - } - - @Override - public void setAccepted(Ballot ballot) - { - this.accepted.set(ballot); - } - - @Override - public Timestamp executeAt() - { - return executeAt.get(); - } - - @Override - public void setExecuteAt(Timestamp timestamp) - { - Preconditions.checkState(!status().hasBeen(Status.Committed) || executeAt().equals(timestamp)); - this.executeAt.set(timestamp); - } - - @Override - public PartialDeps partialDeps() - { - return partialDeps.get(); - } - - @Override - public void setPartialDeps(PartialDeps deps) - { - this.partialDeps.set(deps); - } - - @Override - public Writes writes() - { - return writes.get(); - } - - @Override - public void setWrites(Writes writes) - { - this.writes.set(writes); - } - - @Override - public Result result() - { - return result.get(); - } - - @Override - public void setResult(Result result) - { - this.result.set(result); - } - - @Override - public SaveStatus saveStatus() - { - return status.get(); - } - - @Override - public void setSaveStatus(SaveStatus status) - { - this.status.set(status); - } - - @Override - public void setStatus(Status status) - { - super.setStatus(status); - } - - @Override - public Known known() - { - return this.status.get().known; - } - - @Override - public Durability durability() - { - Durability durability = this.durability.get(); - if (status().hasBeen(PreApplied) && durability == NotDurable) - return Local; // not necessary anywhere, but helps for logical consistency - return durability; - } - - @Override - public void setDurability(Durability v) - { - durability.set(v); - } - - @Override - protected void postApply(SafeCommandStore safeStore) - { - AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - cache.cleanupWriteResult(txnId); - super.postApply(safeStore); - } - - private boolean canApplyWithCurrentScope(SafeCommandStore safeStore) - { - Ranges ranges = safeStore.ranges().at(executeAt().epoch()); - Seekables keys = partialTxn().keys(); - for (int i=0,mi=keys.size(); i applyWithCorrectScope(CommandStore unsafeStore) - { - TxnId txnId = txnId(); - AsyncResult.Settable result = AsyncResults.settable(); - unsafeStore.execute(this, safeStore -> { - AccordCommand command = (AccordCommand) safeStore.command(txnId); - command.applyChain(safeStore, false).begin(result.settingCallback()); - }).begin((unused, throwable) -> { - if (throwable != null) - result.tryFailure(throwable); - }); - return result; - } - - private AsyncChain applyChain(SafeCommandStore safeStore, boolean canReschedule) - { - AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - AsyncResult writeResult = cache.getWriteResult(txnId); - if (writeResult != null) - return writeResult; - - // this can be called via a listener callback, in which case we won't - // have the appropriate commandsForKey in scope, so start a new operation - // with the correct scope and notify the caller when that completes - if (!canApplyWithCurrentScope(safeStore)) - { - return writeResult; - } - - if (canApplyWithCurrentScope(safeStore)) - { - AsyncChain chain = super.applyChain(safeStore); - writeResult = AsyncResults.forChain(chain); - } - else - { - // this can be called via a listener callback, in which case we won't - // have the appropriate commandsForKey in scope, so start a new operation - // with the correct scope and notify the caller when that completes - Preconditions.checkArgument(canReschedule); - return applyWithCorrectScope(safeStore.commandStore()); - } - cache.setWriteResult(txnId, writeResult); - - return writeResult; - } - - @Override - protected AsyncChain applyChain(SafeCommandStore safeStore) - { - - return applyChain(safeStore, true); - } - - @Override - public AsyncChain read(SafeCommandStore safeStore) - { - AccordStateCache.Instance cache = ((SafeAccordCommandStore) safeStore).commandStore().commandCache(); - AsyncResult future = cache.getReadResult(txnId); - if (future != null) - return future; - future = AsyncResults.forChain(super.read(safeStore)); - cache.setReadResult(txnId, future); - return future; - } - - private CommandListener maybeWrapListener(CommandListener listener) - { - if (listener.isTransient()) - return listener; - - if (listener instanceof AccordCommand) - return new ListenerProxy.CommandListenerProxy(((AccordCommand) listener).txnId()); - - if (listener instanceof AccordCommandsForKey) - return new ListenerProxy.CommandsForKeyListenerProxy(((AccordCommandsForKey) listener).key()); - - //TODO - Support accord.messages.Defer - - throw new RuntimeException("Unhandled non-transient listener: " + listener); - } - - @Override - public Command addListener(CommandListener listener) - { - listener = maybeWrapListener(listener); - if (listener instanceof ListenerProxy) - storedListeners.blindAdd((ListenerProxy) listener); - else - transientListeners.add(listener); - return this; - } - - @Override - public void removeListener(CommandListener listener) - { - listener = maybeWrapListener(listener); - if (listener instanceof ListenerProxy) - storedListeners.blindRemove((ListenerProxy) listener); - else - transientListeners.remove(listener); - } - - public boolean hasListenerFor(TxnId txnId) - { - return storedListeners.getView().contains(new ListenerProxy.CommandListenerProxy(txnId)); - } - - @Override - public void notifyListeners(SafeCommandStore safeStore) - { - // TODO: efficiency (introduce BiConsumer method) - storedListeners.getView().forEach(l -> l.onChange(safeStore, this)); - transientListeners.forEach(listener -> { - PreLoadContext ctx = listener.listenerPreLoadContext(txnId()); - AsyncContext context = ((SafeAccordCommandStore)safeStore).context(); - if (context.containsScopedItems(ctx)) - { - logger.trace("{}: synchronously updating listener {}", txnId(), listener); - listener.onChange(safeStore, this); - } - else - { - logger.trace("{}: asynchronously updating listener {}", txnId(), listener); - safeStore.execute(ctx, reSafeStore -> { - listener.onChange(reSafeStore, reSafeStore.command(txnId())); - }); - } - }); - } - - @Override - public void addWaitingOnCommit(TxnId txnId) - { - waitingOnCommit.blindAdd(txnId); - } - - public boolean isWaitingOnCommit() - { - return !waitingOnCommit.getView().isEmpty(); - } - - @Override - public void removeWaitingOnCommit(TxnId txnId) - { - waitingOnCommit.blindRemove(txnId); - } - - @Override - public TxnId firstWaitingOnCommit() - { - if (!isWaitingOnCommit()) - return null; - return waitingOnCommit.getView().first(); - } - - @Override - public void addWaitingOnApplyIfAbsent(TxnId txnId, Timestamp executeAt) - { - waitingOnApply.blindPut(executeAt, txnId); - } - - public boolean isWaitingOnApply() - { - return !waitingOnApply.getView().isEmpty(); - } - - @Override - public void removeWaitingOn(TxnId txnId, Timestamp executeAt) - { - waitingOnCommit.blindRemove(txnId); - waitingOnApply.blindRemove(executeAt, txnId); - } - - @Override - public boolean isWaitingOnDependency() - { - return isWaitingOnCommit() || isWaitingOnApply(); - } - - @Override - public TxnId firstWaitingOnApply(@Nullable TxnId ifExecutesBefore) - { - if (!isWaitingOnApply()) - return null; - - Map.Entry first = waitingOnApply.getView().firstEntry(); - if (ifExecutesBefore == null || first.getKey().compareTo(ifExecutesBefore) < 0) - return first.getValue(); - - return null; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index b7c6e9754ea4..5c9f3e4e9da8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -18,289 +18,38 @@ package org.apache.cassandra.service.accord; -import java.util.Comparator; -import java.util.Objects; +import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; -import java.util.function.BiFunction; -import java.util.function.BinaryOperator; import java.util.function.Consumer; import java.util.function.Function; -import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; import accord.api.Agent; import accord.api.DataStore; -import accord.api.Key; import accord.api.ProgressLog; import accord.impl.CommandsForKey; import accord.local.Command; -import accord.local.CommandListener; import accord.local.CommandStore; import accord.local.CommandStores.RangesForEpoch; import accord.local.CommandStores.RangesForEpochHolder; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; -import accord.local.Status; -import accord.primitives.Keys; -import accord.primitives.Ranges; -import accord.primitives.Routables; -import accord.primitives.Seekable; -import accord.primitives.Seekables; -import accord.primitives.Timestamp; -import accord.primitives.AbstractKeys; +import accord.primitives.RoutableKey; import accord.primitives.TxnId; import accord.utils.Invariants; -import org.apache.cassandra.service.accord.api.PartitionKey; import accord.utils.async.AsyncChain; -import org.apache.cassandra.service.accord.async.AsyncContext; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -public class AccordCommandStore extends CommandStore +public class AccordCommandStore implements CommandStore { - public class SafeAccordCommandStore implements SafeCommandStore - { - final RangesForEpoch rangesForEpoch; - final AsyncContext context; - - SafeAccordCommandStore(RangesForEpoch rangesForEpoch, AsyncContext context) - { - this.rangesForEpoch = rangesForEpoch; - this.context = context; - } - - public AsyncContext context() - { - return context; - } - - @Override - public Command command(TxnId txnId) - { - AccordCommand command = getCommandInternal(txnId); - if (command.isEmpty()) - command.initialize(); - return command; - } - - @Override - public Command ifPresent(TxnId txnId) - { - AccordCommand command = getCommandInternal(txnId); - return !command.isEmpty() ? command : null; - } - - @Override - public Command ifLoaded(TxnId txnId) - { - AccordCommand command = commandCache.getOrNull(txnId); - if (command != null && command.isLoaded()) - { - getContext().commands.add(command); - return command; - } - return null; - } - - public T mapReduce(Routables keysOrRanges, Function map, BinaryOperator reduce, T initialValue) - { - switch (keysOrRanges.domain()) { - default: - throw new AssertionError(); - case Key: - AbstractKeys keys = (AbstractKeys) keysOrRanges; - return keys.stream() - .map(this::commandsForKey) - .map(map) - .reduce(initialValue, reduce); - case Range: - // TODO: implement - throw new UnsupportedOperationException(); - } - } - - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) - { - switch (keysOrRanges.domain()) { - default: - throw new AssertionError(); - case Key: - // TODO: efficiency - AbstractKeys keys = (AbstractKeys) keysOrRanges; - for (Key key : keys) - { - if (!slice.contains(key)) continue; - CommandsForKey forKey = commandsForKey(key); - accumulate = map.apply(forKey, accumulate); - if (accumulate.equals(terminalValue)) - return accumulate; - } - break; - case Range: - // TODO (required): implement - throw new UnsupportedOperationException(); - } - return accumulate; - } - - @Override - public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) - { - accumulate = mapReduceForKey(keysOrRanges, slice, (forKey, prev) -> { - CommandsForKey.CommandTimeseries timeseries; - switch (testTimestamp) - { - default: throw new AssertionError(); - case STARTED_AFTER: - case STARTED_BEFORE: - timeseries = forKey.byId(); - break; - case EXECUTES_AFTER: - case MAY_EXECUTE_BEFORE: - timeseries = forKey.byExecuteAt(); - } - CommandsForKey.CommandTimeseries.TestTimestamp remapTestTimestamp; - switch (testTimestamp) - { - default: throw new AssertionError(); - case STARTED_AFTER: - case EXECUTES_AFTER: - remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.AFTER; - break; - case STARTED_BEFORE: - case MAY_EXECUTE_BEFORE: - remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.BEFORE; - } - return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminalValue); - }, accumulate, terminalValue); - - return accumulate; - } - - @Override - public void register(Seekables keysOrRanges, Ranges slice, Command command) - { - // TODO (required): support ranges - Routables.foldl((Keys)keysOrRanges, slice, (k, v, i) -> { commandsForKey(k).register(command); return v; }, null); - } - - @Override - public void register(Seekable keyOrRange, Ranges slice, Command command) - { - // TODO (required): support ranges - Key key = (Key) keyOrRange; - if (slice.contains(key)) - commandsForKey(key).register(command); - } - - public AccordCommandsForKey commandsForKey(Key key) - { - AccordCommandsForKey commandsForKey = getCommandsForKeyInternal(key); - if (commandsForKey.isEmpty()) - commandsForKey.initialize(); - return commandsForKey; - } - - public AccordCommandsForKey maybeCommandsForKey(Key key) - { - AccordCommandsForKey commandsForKey = getCommandsForKeyInternal(key); - return !commandsForKey.isEmpty() ? commandsForKey : null; - } - - @Override - public void addAndInvokeListener(TxnId txnId, CommandListener listener) - { - AccordCommand.WriteOnly command = (AccordCommand.WriteOnly) getContext().commands.getOrCreateWriteOnly(txnId, (ignore, id) -> new AccordCommand.WriteOnly(id), commandStore()); - command.addListener(listener); - execute(listener.listenerPreLoadContext(txnId), store -> { - listener.onChange(store, store.command(txnId)); - }); - } - - @Override - public AccordCommandStore commandStore() - { - return AccordCommandStore.this; - } - - @Override - public DataStore dataStore() - { - return dataStore; - } - - @Override - public Agent agent() - { - return agent; - } - - @Override - public ProgressLog progressLog() - { - return progressLog; - } - - @Override - public RangesForEpoch ranges() - { - return rangesForEpoch; - } - - @Override - public long latestEpoch() - { - return time.epoch(); - } - - @Override - public Timestamp preaccept(TxnId txnId, Seekables keys) - { - Timestamp max = maxConflict(keys); - long epoch = latestEpoch(); - if (txnId.compareTo(max) > 0 && txnId.epoch() >= epoch && !agent.isExpired(txnId, time.now())) - return txnId; - - return time.uniqueNow(max); - } - - @Override - public AsyncChain execute(PreLoadContext context, Consumer consumer) - { - return AccordCommandStore.this.execute(context, consumer); - } - - @Override - public AsyncChain submit(PreLoadContext context, Function function) - { - return AccordCommandStore.this.submit(context, function); - } - - @Override - public NodeTimeService time() - { - return time; - } - - public Timestamp maxConflict(Seekables keys) - { - // TODO: Seekables - // TODO: efficiency - return ((Keys)keys).stream() - .map(this::maybeCommandsForKey) - .filter(Objects::nonNull) - .map(CommandsForKey::max) - .max(Comparator.naturalOrder()) - .orElse(Timestamp.NONE); - } - } - private static long getThreadId(ExecutorService executor) { try @@ -317,13 +66,15 @@ private static long getThreadId(ExecutorService executor) } } + private final int id; private final long threadId; public final String loggingId; private final ExecutorService executor; private final AccordStateCache stateCache; - private final AccordStateCache.Instance commandCache; - private final AccordStateCache.Instance commandsForKeyCache; - private AsyncContext currentCtx = null; + private final AccordStateCache.Instance commandCache; + private final AccordStateCache.Instance commandsForKeyCache; + private AsyncOperation currentOperation = null; + private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; private final NodeTimeService time; @@ -339,7 +90,7 @@ public AccordCommandStore(int id, ProgressLog.Factory progressLogFactory, RangesForEpochHolder rangesForEpoch) { - super(id); + this.id = id; this.time = time; this.agent = agent; this.dataStore = dataStore; @@ -348,24 +99,26 @@ public AccordCommandStore(int id, this.loggingId = String.format("[%s]", id); this.executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); this.threadId = getThreadId(this.executor); - this.stateCache = new AccordStateCache(0); - this.commandCache = stateCache.instance(TxnId.class, - AccordCommand.class, - AccordCommand::new); - this.commandsForKeyCache = stateCache.instance(PartitionKey.class, - AccordCommandsForKey.class, - key -> new AccordCommandsForKey(this, key)); + this.stateCache = new AccordStateCache(8<<20); + this.commandCache = stateCache.instance(TxnId.class, accord.local.Command.class, AccordSafeCommand::new, AccordObjectSizes::command); + this.commandsForKeyCache = stateCache.instance(RoutableKey.class, CommandsForKey.class, AccordSafeCommandsForKey::new, AccordObjectSizes::commandsForKey); } - void setCacheSize(long bytes) + @Override + public int id() + { + return id; + } + + public void setCacheSize(long bytes) { checkInStoreThread(); stateCache.setMaxSize(bytes); } - public SafeAccordCommandStore safeStore(AsyncContext context) + public long getCacheSize() { - return new SafeAccordCommandStore(rangesForEpochHolder.get(), context); + return stateCache.getMaxSize(); } public void checkInStoreThread() @@ -383,32 +136,44 @@ public ExecutorService executor() return executor; } - public AccordStateCache.Instance commandCache() + public AccordStateCache.Instance commandCache() { return commandCache; } - public AccordStateCache.Instance commandsForKeyCache() + public AccordStateCache.Instance commandsForKeyCache() { return commandsForKeyCache; } - public void setContext(AsyncContext context) + @VisibleForTesting + public AccordStateCache cache() + { + return stateCache; + } + + @VisibleForTesting + public void clearCache() + { + stateCache.clear(); + } + + public void setCurrentOperation(AsyncOperation operation) { - Invariants.checkState(currentCtx == null); - currentCtx = context; + Invariants.checkState(currentOperation == null); + currentOperation = operation; } - public AsyncContext getContext() + public AsyncOperation getContext() { - Invariants.checkState(currentCtx != null); - return currentCtx; + Invariants.checkState(currentOperation != null); + return currentOperation; } - public void unsetContext(AsyncContext context) + public void unsetCurrentOperation(AsyncOperation operation) { - Invariants.checkState(currentCtx == context); - currentCtx = null; + Invariants.checkState(currentOperation == operation); + currentOperation = null; } public long nextSystemTimestampMicros() @@ -416,44 +181,36 @@ public long nextSystemTimestampMicros() lastSystemTimestampMicros = Math.max(TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()), lastSystemTimestampMicros + 1); return lastSystemTimestampMicros; } + @Override + public AsyncChain submit(PreLoadContext loadCtx, Function function) + { + return AsyncOperation.create(this, loadCtx, function); + } - private AccordCommand getCommandInternal(TxnId txnId) + public DataStore dataStore() { - Invariants.checkState(currentCtx != null); - AccordCommand command = currentCtx.commands.get(txnId); - if (command == null) - throw new IllegalArgumentException("No command in context for txnId " + txnId); - Invariants.checkState(command.isLoaded() || (command.isReadOnly() && command.isPartiallyLoaded())); - return command; + return dataStore; } - public boolean isCommandsForKeyInContext(PartitionKey key) + @Override + public Agent agent() { - return currentCtx.commandsForKey.get(key) != null; + return agent; } - private AccordCommandsForKey getCommandsForKeyInternal(Key key) + NodeTimeService time() { - Objects.requireNonNull(currentCtx, "current context"); - if (!(key instanceof PartitionKey)) - throw new IllegalArgumentException("Attempted to use non-PartitionKey; given " + key.getClass()); - AccordCommandsForKey commandsForKey = currentCtx.commandsForKey.get((PartitionKey) key); - if (commandsForKey == null) - throw new IllegalArgumentException("No commandsForKey in context for key " + key); - Invariants.checkState(commandsForKey.isLoaded()); - return commandsForKey; + return time; } - @Override - public AsyncChain submit(PreLoadContext loadCtx, Function function) + ProgressLog progressLog() { - return AsyncOperation.create(this, loadCtx, function); + return progressLog; } - @Override - public Agent agent() + RangesForEpoch ranges() { - return agent; + return rangesForEpochHolder.get(); } @Override @@ -478,6 +235,31 @@ public void executeBlocking(Runnable runnable) } } + public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, + Map commands, + Map commandsForKeys) + { + Invariants.checkState(current == null); + commands.values().forEach(AccordSafeState::preExecute); + commandsForKeys.values().forEach(AccordSafeState::preExecute); + current = new AccordSafeCommandStore(preLoadContext, commands, commandsForKeys, this); + return current; + } + + public void completeOperation(AccordSafeCommandStore store, + Map commands, + Map commandsForKeys) + { + Invariants.checkState(current == store); + current.complete(); + current = null; + } + + public void abortCurrentOperation() + { + current = null; + } + @Override public void shutdown() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 14dd0851c294..0708f092d274 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -21,13 +21,12 @@ import accord.api.Agent; import accord.api.DataStore; import accord.api.ProgressLog; -import accord.local.AsyncCommandStores; +import accord.local.CommandStores; import accord.local.NodeTimeService; import accord.local.ShardDistributor; import accord.topology.Topology; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -public class AccordCommandStores extends AsyncCommandStores +public class AccordCommandStores extends CommandStores { private long cacheSize; AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, @@ -49,7 +48,7 @@ synchronized void refreshCacheSizes() return; long perStore = cacheSize / count(); // TODO (low priority, safety): we might transiently breach our limit if we increase one store before decreasing another - forEach(commandStore -> ((SafeAccordCommandStore) commandStore).commandStore().setCacheSize(perStore)); + forEach(commandStore -> ((AccordSafeCommandStore) commandStore).commandStore().setCacheSize(perStore)); } private static long maxCacheSize() diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java deleted file mode 100644 index 025f8f5d434a..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKey.java +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.Objects; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.TimeUnit; -import java.util.stream.Stream; - -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.local.Command; -import accord.local.CommandStore; -import accord.impl.CommandsForKey; -import accord.local.SafeCommandStore; -import accord.local.SafeCommandStore.TestDep; -import accord.local.SafeCommandStore.TestKind; -import accord.local.Status; -import accord.primitives.Timestamp; -import accord.primitives.TxnId; -import org.apache.cassandra.service.accord.api.PartitionKey; -import accord.utils.async.AsyncResult; -import org.apache.cassandra.service.accord.store.StoredLong; -import org.apache.cassandra.service.accord.store.StoredNavigableMap; -import org.apache.cassandra.service.accord.store.StoredSet; -import org.apache.cassandra.service.accord.store.StoredValue; -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.ObjectSizes; - -import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; -import static accord.local.SafeCommandStore.TestDep.WITH; -import static accord.local.SafeCommandStore.TestKind.Ws; -import static accord.local.Status.KnownDeps.DepsUnknown; -import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applyMapChanges; -import static org.apache.cassandra.service.accord.AccordState.WriteOnly.applySetChanges; - -public class AccordCommandsForKey extends CommandsForKey implements AccordState -{ - private static final Logger logger = LoggerFactory.getLogger(AccordCommandsForKey.class); - - private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new AccordCommandsForKey(null, null)); - - public static class Defaults - { - public static final Timestamp maxTimestamp = Timestamp.NONE; - public static final Timestamp lastExecutedTimestamp = Timestamp.NONE; - public static final Timestamp lastWriteTimestamp = Timestamp.NONE; - public static final long lastExecutedMicros = 0; - } - - public static class WriteOnly extends AccordCommandsForKey implements AccordState.WriteOnly - { - private AsyncResult result = null; - - public WriteOnly(AccordCommandStore commandStore, PartitionKey key) - { - super(commandStore, key); - } - - @Override - public void asyncResult(AsyncResult result) - { - Preconditions.checkArgument(this.result == null); - this.result = result; - - } - - @Override - public AsyncResult asyncResult() - { - return result; - } - - @Override - public void applyChanges(AccordCommandsForKey instance) - { - applySetChanges(this, instance, cfk -> cfk.blindWitnessed); - applyMapChanges(this, instance, cfk -> cfk.byId.map); - applyMapChanges(this, instance, cfk -> cfk.byExecuteAt.map); - } - } - - public enum SeriesKind - { - BY_ID, BY_EXECUTE_AT; - } - - public class Series implements CommandTimeseries - { - public final SeriesKind kind; - public final StoredNavigableMap map; - - public Series(ReadWrite readWrite, SeriesKind kind) - { - this.kind = kind; - map = new StoredNavigableMap<>(readWrite); - } - - @Override - public void add(Timestamp timestamp, Command command) - { - map.blindPut(timestamp, AccordPartialCommand.serializer.serialize(new AccordPartialCommand(key, command))); - } - - @Override - public void remove(Timestamp timestamp) - { - map.blindRemove(timestamp); - } - - private Stream idsToCommands(Collection blobs) - { - return blobs.stream().map(blob -> AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, blob)); - } - - @Override - public boolean isEmpty() - { - return map.getView().isEmpty(); - } - - public T mapReduce(TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, - TestDep testDep, @Nullable TxnId depId, - @Nullable Status minStatus, @Nullable Status maxStatus, - SafeCommandStore.CommandFunction map, T initialValue, T terminalValue) - { - - for (ByteBuffer buffer : (testTimestamp == TestTimestamp.BEFORE ? this.map.getView().headMap(timestamp, false) : this.map.getView().tailMap(timestamp, false)).values()) - { - AccordPartialCommand cmd = AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, buffer); - if (testKind == Ws && cmd.txnId().isRead()) continue; - // If we don't have any dependencies, we treat a dependency filter as a mismatch - if (testDep != ANY_DEPS && (cmd.known().deps == DepsUnknown || (cmd.deps().contains(depId) != (testDep == WITH)))) - continue; - if (minStatus != null && minStatus.compareTo(cmd.status()) > 0) - continue; - if (maxStatus != null && maxStatus.compareTo(cmd.status()) < 0) - continue; - initialValue = map.apply(key, cmd.txnId(), cmd.executeAt(), initialValue); - if (initialValue.equals(terminalValue)) - break; - } - return initialValue; - } - - @VisibleForTesting - public Stream all() - { - return idsToCommands(map.getView().values()); - } - - public AccordPartialCommand get(Timestamp timestamp) - { - ByteBuffer blob = map.getView().get(timestamp); - if (blob == null) - return null; - return AccordPartialCommand.serializer.deserialize(AccordCommandsForKey.this, commandStore, blob); - } - } - - private final AccordCommandStore commandStore; - private final PartitionKey key; - public final StoredValue maxTimestamp; - public final StoredValue lastExecutedTimestamp; - public final StoredLong lastExecutedMicros; - public final StoredValue lastWriteTimestamp; - public final StoredSet.Navigable blindWitnessed; - public final Series byId; - public final Series byExecuteAt; - - public AccordCommandsForKey(AccordCommandStore commandStore, PartitionKey key) - { - this.commandStore = commandStore; - this.key = key; - maxTimestamp = new StoredValue<>(rw()); - lastExecutedTimestamp = new StoredValue<>(rw()); - lastExecutedMicros = new StoredLong(rw()); - lastWriteTimestamp = new StoredValue<>(rw()); - blindWitnessed = new StoredSet.Navigable<>(rw()); - byId = new Series(rw(), SeriesKind.BY_ID); - byExecuteAt = new Series(rw(), SeriesKind.BY_EXECUTE_AT); - } - - @Override - public boolean isEmpty() - { - return maxTimestamp.isEmpty() - && lastExecutedTimestamp.isEmpty() - && lastExecutedMicros.isEmpty() - && lastWriteTimestamp.isEmpty() - && blindWitnessed.isEmpty() - && byId.map.isEmpty() - && byExecuteAt.map.isEmpty(); - } - - public void setEmpty() - { - maxTimestamp.setEmpty(); - lastExecutedTimestamp.setEmpty(); - lastExecutedMicros.setEmpty(); - lastWriteTimestamp.setEmpty(); - blindWitnessed.setEmpty(); - byId.map.setEmpty(); - byExecuteAt.map.setEmpty(); - } - - public AccordCommandsForKey initialize() - { - maxTimestamp.set(Defaults.maxTimestamp); - lastExecutedTimestamp.load(Defaults.lastExecutedTimestamp); - lastExecutedMicros.load(Defaults.lastExecutedMicros); - lastWriteTimestamp.load(Defaults.lastWriteTimestamp); - blindWitnessed.load(new TreeSet<>()); - byId.map.load(new TreeMap<>()); - byExecuteAt.map.load(new TreeMap<>()); - return this; - } - - @Override - public boolean hasModifications() - { - return maxTimestamp.hasModifications() - || lastExecutedTimestamp.hasModifications() - || lastExecutedMicros.hasModifications() - || lastWriteTimestamp.hasModifications() - || blindWitnessed.hasModifications() - || byId.map.hasModifications() - || byExecuteAt.map.hasModifications(); - } - - @Override - public void clearModifiedFlag() - { - maxTimestamp.clearModifiedFlag(); - lastExecutedTimestamp.clearModifiedFlag(); - lastExecutedMicros.clearModifiedFlag(); - lastWriteTimestamp.clearModifiedFlag(); - blindWitnessed.clearModifiedFlag(); - byId.map.clearModifiedFlag(); - byExecuteAt.map.clearModifiedFlag(); - } - - @Override - public boolean isLoaded() - { - return maxTimestamp.isLoaded() - && lastExecutedTimestamp.isLoaded() - && lastExecutedMicros.isLoaded() - && lastWriteTimestamp.isLoaded() - && blindWitnessed.isLoaded() - && byId.map.isLoaded() - && byExecuteAt.map.isLoaded(); - } - - public CommandStore commandStore() - { - return commandStore; - } - - @Override - public PartitionKey key() - { - return key; - } - - @Override - public long estimatedSizeOnHeap() - { - long size = EMPTY_SIZE; - size += maxTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += lastExecutedTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += lastExecutedMicros.estimatedSizeOnHeap(); - size += lastWriteTimestamp.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += blindWitnessed.estimatedSizeOnHeap(AccordObjectSizes::timestamp); - size += byId.map.estimatedSizeOnHeap(AccordObjectSizes::timestamp, ByteBufferUtil::estimatedSizeOnHeap); - size += byExecuteAt.map.estimatedSizeOnHeap(AccordObjectSizes::timestamp, ByteBufferUtil::estimatedSizeOnHeap); - return size; - } - - @Override - public Series byId() - { - return byId; - } - - @Override - public Series byExecuteAt() - { - return byExecuteAt; - } - - @Override - public Timestamp max() - { - return maxTimestamp.get(); - } - - @Override - public void updateMax(Timestamp timestamp) - { - if (isFullInstance()) - { - if (maxTimestamp.get().compareTo(timestamp) >= 0) - return; - maxTimestamp.set(timestamp); - } - else - { - Preconditions.checkState(isWriteOnlyInstance()); - blindWitnessed.blindAdd(timestamp); - } - } - - public void applyBlindWitnessedTimestamps() - { - if (isEmpty() || blindWitnessed.getView().isEmpty()) - return; - - logger.trace("Applying blind witnessed timestamps for {}: {}", key(), blindWitnessed.getView()); - blindWitnessed.getView().forEach(this::updateMax); - blindWitnessed.clear(); - } - - public void updateSummaries(AccordCommand command) - { - ByteBuffer partialCommand = AccordPartialCommand.serializer.serialize(new AccordPartialCommand(key, command)); - byId.map.blindPut(command.txnId(), partialCommand); - byExecuteAt.map.blindPut(command.executeAt(), partialCommand); - } - - private static long getTimestampMicros(Timestamp timestamp) - { - return timestamp.hlc(); - } - - private void maybeUpdatelastTimestamp(Timestamp executeAt, boolean isForWriteTxn) - { - Timestamp lastWrite = lastWriteTimestamp.get(); - - if (executeAt.compareTo(lastWrite) < 0) - throw new IllegalArgumentException(String.format("%s is less than the most recent write timestamp %s", executeAt, lastWrite)); - - Timestamp lastExecuted = lastExecutedTimestamp.get(); - int cmp = executeAt.compareTo(lastExecuted); - // execute can be in the past if it's for a read and after the most recent write - if (cmp == 0 || (!isForWriteTxn && cmp < 0)) - return; - if (cmp < 0) - throw new IllegalArgumentException(String.format("%s is less than the most recent executed timestamp %s", executeAt, lastExecuted)); - - long micros = getTimestampMicros(executeAt); - long lastMicros = lastExecutedMicros.get(); - lastExecutedTimestamp.set(executeAt); - lastExecutedMicros.set(Math.max(micros, lastMicros + 1)); - if (isForWriteTxn) - lastWriteTimestamp.set(executeAt); - } - - public int nowInSecondsFor(Timestamp executeAt, boolean isForWriteTxn) - { - maybeUpdatelastTimestamp(executeAt, isForWriteTxn); - // we use the executeAt time instead of the monotonic database timestamp to prevent uneven - // ttl expiration in extreme cases, ie 1M+ writes/second to a key causing timestamps to overflow - // into the next second on some keys and not others. - return Math.toIntExact(TimeUnit.MICROSECONDS.toSeconds(getTimestampMicros(lastExecutedTimestamp.get()))); - } - - public long timestampMicrosFor(Timestamp executeAt, boolean isForWriteTxn) - { - maybeUpdatelastTimestamp(executeAt, isForWriteTxn); - return lastExecutedMicros.get(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - AccordCommandsForKey that = (AccordCommandsForKey) o; - return commandStore == that.commandStore - && key.equals(that.key) - && maxTimestamp.equals(that.maxTimestamp) - && lastExecutedTimestamp.equals(that.lastExecutedTimestamp) - && lastExecutedMicros.equals(that.lastExecutedMicros) - && lastWriteTimestamp.equals(that.lastWriteTimestamp) - && blindWitnessed.equals(that.blindWitnessed) - && byId.map.equals(that.byId.map) - && byExecuteAt.map.equals(that.byExecuteAt.map); - } - - @Override - public int hashCode() - { - return Objects.hash(commandStore, key, blindWitnessed, maxTimestamp, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp, byId, byExecuteAt); - } - - @Override - public String toString() - { - return "AccordCommandsForKey{" + - "key=" + key + - ", maxTs=" + maxTimestamp + - ", lastExecutedTimestamp=" + lastExecutedTimestamp + - ", lastExecutedMicros=" + lastExecutedMicros + - ", lastWriteTimestamp=" + lastWriteTimestamp + - '}'; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 2c212b8d311e..d62985b0a1e8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -21,6 +21,8 @@ import java.util.ArrayList; import java.util.List; +import com.google.common.base.Preconditions; + import accord.api.ConfigurationService; import accord.local.Node; import accord.topology.Topology; @@ -59,9 +61,15 @@ public Topology getTopologyForEpoch(long epoch) } @Override - public void fetchTopologyForEpoch(long epoch) + public synchronized void fetchTopologyForEpoch(long epoch) { - throw new UnsupportedOperationException(); + Topology current = currentTopology(); + Preconditions.checkArgument(epoch > current.epoch(), "Requested to fetch epoch %d which is <= %d (current epoch)", epoch, current.epoch()); + while (current.epoch() < epoch) + { + current = AccordTopologyUtils.createTopology(epochs.size()); + unsafeAddEpoch(current); + } } @Override @@ -77,9 +85,16 @@ public void acknowledgeEpoch(long epoch) } } - public void createEpochFromConfig() + public synchronized void createEpochFromConfig() { + Topology current = currentTopology(); Topology topology = AccordTopologyUtils.createTopology(epochs.size()); + if (current.equals(topology.withEpoch(current.epoch()))) return; + unsafeAddEpoch(topology); + } + + private void unsafeAddEpoch(Topology topology) + { epochs.add(topology); for (Listener listener : listeners) listener.onTopologyUpdate(topology); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 316a78a5a971..b718da793f21 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -20,18 +20,35 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.function.Supplier; -import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Lists; - +import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.Result; +import accord.impl.CommandsForKey; +import accord.impl.CommandsForKey.CommandTimeseries; +import accord.local.Command; +import accord.local.CommandListener; import accord.local.CommandStore; +import accord.local.CommonAttributes; +import accord.local.Listeners; import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; @@ -40,10 +57,9 @@ import accord.primitives.PartialTxn; import accord.primitives.Route; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Writes; -import accord.utils.DeterministicIdentitySet; +import accord.utils.Invariants; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; @@ -72,7 +88,6 @@ import org.apache.cassandra.db.rows.BTreeRow; import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.CellPath; -import org.apache.cassandra.db.rows.ComplexColumnData; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.transform.FilteredPartitions; @@ -90,21 +105,20 @@ import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.Views; import org.apache.cassandra.serializers.UUIDSerializer; -import org.apache.cassandra.service.accord.AccordCommandsForKey.SeriesKind; -import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.DepsSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; -import org.apache.cassandra.service.accord.store.StoredNavigableMap; -import org.apache.cassandra.service.accord.store.StoredSet; +import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.txn.TxnData; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Clock; import static java.lang.String.format; -import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; -import static org.apache.cassandra.db.rows.BufferCell.*; +import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; +import static org.apache.cassandra.db.rows.BufferCell.live; +import static org.apache.cassandra.db.rows.BufferCell.tombstone; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER; import static org.apache.cassandra.utils.ByteBufferUtil.bytes; @@ -122,6 +136,28 @@ public class AccordKeyspace private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); + public enum SeriesKind + { + BY_ID(CommandsForKey::byId), + BY_EXECUTE_AT(CommandsForKey::byExecuteAt); + + private final Function> getSeries; + + SeriesKind(Function> getSeries) + { + this.getSeries = getSeries; + } + + ImmutableSortedMap getValues(CommandsForKey cfk) + { + if (cfk == null) + return ImmutableSortedMap.of(); + + CommandTimeseries series = getSeries.apply(cfk); + return (ImmutableSortedMap) series.commands; + } + } + // TODO: store timestamps as blobs (confirm there are no negative numbers, or offset) private static final TableMetadata Commands = parse(COMMANDS, @@ -135,7 +171,6 @@ public class AccordKeyspace + "route blob," + "durability int," + "txn blob," - + "kind int," + format("execute_at %s,", TIMESTAMP_TUPLE) + format("promised_ballot %s,", TIMESTAMP_TUPLE) + format("accepted_ballot %s,", TIMESTAMP_TUPLE) @@ -145,8 +180,6 @@ public class AccordKeyspace + format("waiting_on_commit set<%s>,", TIMESTAMP_TUPLE) + format("waiting_on_apply map<%s, blob>,", TIMESTAMP_TUPLE) + "listeners set, " - + format("blocking_commit_on set<%s>, ", TIMESTAMP_TUPLE) - + format("blocking_apply_on set<%s>, ", TIMESTAMP_TUPLE) + "PRIMARY KEY((store_id, txn_id))" + ')'); @@ -159,6 +192,7 @@ private static class CommandsSerializers static final LocalVersionedSerializer partialDeps = localSerializer(DepsSerializer.partialDeps); static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); static final LocalVersionedSerializer result = localSerializer(TxnData.serializer); + static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) { @@ -183,7 +217,6 @@ private static class CommandsColumns static final ColumnMetadata route = getColumn(Commands, "route"); static final ColumnMetadata durability = getColumn(Commands, "durability"); static final ColumnMetadata txn = getColumn(Commands, "txn"); - static final ColumnMetadata kind = getColumn(Commands, "kind"); static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); @@ -193,11 +226,9 @@ private static class CommandsColumns static final ColumnMetadata waiting_on_commit = getColumn(Commands, "waiting_on_commit"); static final ColumnMetadata waiting_on_apply = getColumn(Commands, "waiting_on_apply"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); - static final ColumnMetadata blocking_commit_on = getColumn(Commands, "blocking_commit_on"); - static final ColumnMetadata blocking_apply_on = getColumn(Commands, "blocking_apply_on"); } - private static final TableMetadata CommandsForKey = + private static final TableMetadata CommandsForKeys = parse(COMMANDS_FOR_KEY, "accord commands per key", "CREATE TABLE %s (" @@ -216,17 +247,17 @@ private static class CommandsColumns private static class CommandsForKeyColumns { - static final ClusteringComparator keyComparator = CommandsForKey.partitionKeyAsClusteringComparator(); - static final ColumnFilter allColumns = ColumnFilter.all(CommandsForKey); - static final ColumnMetadata max_timestamp = getColumn(CommandsForKey, "max_timestamp"); - static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKey, "last_executed_timestamp"); - static final ColumnMetadata last_executed_micros = getColumn(CommandsForKey, "last_executed_micros"); - static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKey, "last_write_timestamp"); - static final ColumnMetadata blind_witnessed = getColumn(CommandsForKey, "blind_witnessed"); + static final ClusteringComparator keyComparator = CommandsForKeys.partitionKeyAsClusteringComparator(); + static final ColumnFilter allColumns = ColumnFilter.all(CommandsForKeys); + static final ColumnMetadata max_timestamp = getColumn(CommandsForKeys, "max_timestamp"); + static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKeys, "last_executed_timestamp"); + static final ColumnMetadata last_executed_micros = getColumn(CommandsForKeys, "last_executed_micros"); + static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKeys, "last_write_timestamp"); + static final ColumnMetadata blind_witnessed = getColumn(CommandsForKeys, "blind_witnessed"); - static final ColumnMetadata series = getColumn(CommandsForKey, "series"); - static final ColumnMetadata timestamp = getColumn(CommandsForKey, "timestamp"); - static final ColumnMetadata data = getColumn(CommandsForKey, "data"); + static final ColumnMetadata series = getColumn(CommandsForKeys, "series"); + static final ColumnMetadata timestamp = getColumn(CommandsForKeys, "timestamp"); + static final ColumnMetadata data = getColumn(CommandsForKeys, "data"); static final Columns statics = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp, blind_witnessed)); static final Columns regulars = Columns.from(Lists.newArrayList(data)); @@ -234,25 +265,23 @@ private static class CommandsForKeyColumns private static final RegularAndStaticColumns justStatic = new RegularAndStaticColumns(statics, Columns.NONE); private static final RegularAndStaticColumns justRegular = new RegularAndStaticColumns(Columns.NONE, regulars); - static boolean hasStaticChanges(AccordCommandsForKey commandsForKey) + static boolean hasStaticChanges(CommandsForKey original, CommandsForKey current) { - return commandsForKey.maxTimestamp.hasModifications() - || commandsForKey.lastExecutedTimestamp.hasModifications() - || commandsForKey.lastExecutedMicros.hasModifications() - || commandsForKey.lastWriteTimestamp.hasModifications() - || commandsForKey.blindWitnessed.hasModifications(); + return valueModified(CommandsForKey::max, original, current) + || valueModified(CommandsForKey::lastExecutedTimestamp, original, current) + || valueModified(CommandsForKey::lastWriteTimestamp, original, current); } - private static boolean hasRegularChanges(AccordCommandsForKey commandsForKey) + private static boolean hasRegularChanges(CommandsForKey original, CommandsForKey current) { - return commandsForKey.byId.map.hasModifications() - || commandsForKey.byExecuteAt.map.hasModifications(); + return valueModified(CommandsForKey::byId, original, current) + || valueModified(CommandsForKey::byExecuteAt, original, current); } - static RegularAndStaticColumns columnsFor(AccordCommandsForKey commandsForKey) + static RegularAndStaticColumns columnsFor(CommandsForKey original, CommandsForKey current) { - boolean hasStaticChanges = hasStaticChanges(commandsForKey); - boolean hasRegularChanges = hasRegularChanges(commandsForKey); + boolean hasStaticChanges = hasStaticChanges(original, current); + boolean hasRegularChanges = hasRegularChanges(original, current); if (hasStaticChanges && hasRegularChanges) return all; @@ -261,7 +290,7 @@ else if (hasStaticChanges) else if (hasRegularChanges) return justRegular; else - throw new IllegalArgumentException("CommandsForKey has_modifications=" + commandsForKey.hasModifications() + ", but no Static or Regular columns changed!"); + throw new IllegalArgumentException("No Static or Regular columns changed for CFK " + current.key()); } } @@ -281,7 +310,7 @@ public static KeyspaceMetadata metadata() private static Tables tables() { - return Tables.of(Commands, CommandsForKey); + return Tables.of(Commands, CommandsForKeys); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException @@ -314,190 +343,194 @@ private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerialize return bytes != null && ! ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; } - private static NavigableMap deserializeWaitingOnApply(Map serialized) + private static ImmutableSortedMap deserializeWaitingOnApply(Map serialized) { if (serialized == null || serialized.isEmpty()) - return new TreeMap<>(); + return ImmutableSortedMap.of(); NavigableMap result = new TreeMap<>(); for (Map.Entry entry : serialized.entrySet()) result.put(deserializeTimestampOrNull(entry.getKey(), Timestamp::fromBits), deserializeTimestampOrNull(entry.getValue(), TxnId::fromBits)); - return result; + return ImmutableSortedMap.copyOf(result); } - private static > S deserializeTimestampSet(Set serialized, Supplier setFactory, TimestampFactory timestampFactory) + private static ImmutableSortedSet deserializeTimestampSet(Set serialized, TimestampFactory timestampFactory) { - S result = setFactory.get(); if (serialized == null || serialized.isEmpty()) - return result; + return ImmutableSortedSet.of(); + List result = new ArrayList<>(serialized.size()); for (ByteBuffer bytes : serialized) result.add(deserializeTimestampOrNull(bytes, timestampFactory)); - return result; + return ImmutableSortedSet.copyOf(result); } - private static NavigableSet deserializeTxnIdNavigableSet(UntypedResultSet.Row row, String name) + private static ImmutableSortedSet deserializeTxnIdNavigableSet(UntypedResultSet.Row row, String name) { - return deserializeTimestampSet(row.getSet(name, BytesType.instance), TreeSet::new, TxnId::fromBits); + return deserializeTimestampSet(row.getSet(name, BytesType.instance), TxnId::fromBits); } - private static DeterministicIdentitySet deserializeListeners(Set serialized) throws IOException + private static Listeners.Immutable deserializeListeners(Set serialized) throws IOException { if (serialized == null || serialized.isEmpty()) - return new DeterministicIdentitySet<>(); - DeterministicIdentitySet result = new DeterministicIdentitySet<>(); + return Listeners.Immutable.EMPTY; + Listeners result = new Listeners(); for (ByteBuffer bytes : serialized) { - result.add(ListenerProxy.deserialize(bytes, ByteBufferAccessor.instance, 0)); + result.add(deserialize(bytes, CommandsSerializers.listeners)); } - return result; + return new Listeners.Immutable(result); } - private static DeterministicIdentitySet deserializeListeners(UntypedResultSet.Row row, String name) throws IOException + private static Listeners.Immutable deserializeListeners(UntypedResultSet.Row row, String name) throws IOException { return deserializeListeners(row.getSet(name, BytesType.instance)); } - private static , V> void addStoredMapChanges(Row.Builder builder, - ColumnMetadata column, - long timestamp, - int nowInSec, - StoredNavigableMap map, - Function serializeKey, - Function serializeVal) + private interface SerializeFunction { - if (map.wasCleared()) - { - if (!map.hasAdditions()) - { - builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, nowInSec)); - return; - } - else - builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp - 1, nowInSec)); - } - - map.forEachAddition((k, v) -> builder.addCell(live(column, timestamp, serializeVal.apply(v), CellPath.create(serializeKey.apply(k))))); - - if (!map.wasCleared()) - map.forEachDeletion(k -> builder.addCell(tombstone(column, timestamp, nowInSec, CellPath.create(serializeKey.apply(k))))); + ByteBuffer apply(V v) throws IOException; } - private static > void addStoredSetChanges(Row.Builder builder, - ColumnMetadata column, - long timestamp, - int nowInSec, - StoredSet map, - Function serialize) + private static boolean valueModified(Function get, C original, C current) { - if (map.wasCleared()) - { - if (!map.hasAdditions()) - { - builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, nowInSec)); - return; - } - else - builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestamp - 1, nowInSec)); - } + V prev = original != null ? get.apply(original) : null; + V value = get.apply(current); - map.forEachAddition(i -> builder.addCell(live(column, timestamp, EMPTY_BYTE_BUFFER, CellPath.create(serialize.apply(i))))); - - if (!map.wasCleared()) - map.forEachDeletion(k -> builder.addCell(tombstone(column, timestamp, nowInSec, CellPath.create(serialize.apply(k))))); + return prev != value; } - public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordCommand command, long timestampMicros) + private static void addCellIfModified(ColumnMetadata column, Function get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, C original, C current) throws IOException { - try - { - Preconditions.checkArgument(command.hasModifications()); - - // TODO: convert to byte arrays - ValueAccessor accessor = ByteBufferAccessor.instance; + if (valueModified(get, original, current)) + builder.addCell(live(column, timestampMicros, serialize.apply(get.apply(current)))); + } - Row.Builder builder = BTreeRow.unsortedBuilder(); - builder.newRow(Clustering.EMPTY); - int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + private static void addCellIfModified(ColumnMetadata column, Function get, LocalVersionedSerializer serializer, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + { + addCellIfModified(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, original, command); + } + private static void addKeyCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + { + addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, CommandsSerializers.routingKey), builder, timestampMicros, original, command); + } - if (command.status.hasModifications()) - builder.addCell(live(CommandsColumns.status, timestampMicros, accessor.valueOf(command.status.get().ordinal()))); + private static > void addEnumCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + { + // TODO: convert to byte arrays + ValueAccessor accessor = ByteBufferAccessor.instance; + addCellIfModified(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, original, command); + } - if (command.homeKey.hasModifications()) - builder.addCell(live(CommandsColumns.home_key, timestampMicros, serializeOrNull((AccordRoutingKey) command.homeKey.get(), CommandsSerializers.routingKey))); + private static void addSetChanges(ColumnMetadata column, Function> get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, int nowInSec, C original, C command) throws IOException + { + Set prev = original != null ? get.apply(original) : Collections.emptySet(); + if (prev == null) prev = Collections.emptySet(); + Set value = get.apply(command); + if (value == null) value = Collections.emptySet(); - if (command.progressKey.hasModifications()) - builder.addCell(live(CommandsColumns.progress_key, timestampMicros, serializeOrNull((AccordRoutingKey) command.progressKey.get(), CommandsSerializers.routingKey))); + if (value.isEmpty() && !prev.isEmpty()) + { + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestampMicros, nowInSec)); + return; + } - if (command.route.hasModifications()) - builder.addCell(live(CommandsColumns.route, timestampMicros, serializeOrNull(command.route.get(), CommandsSerializers.route))); + for (V item : Sets.difference(value, prev)) + builder.addCell(live(column, timestampMicros, EMPTY_BYTE_BUFFER, CellPath.create(serialize.apply(item)))); - if (command.durability.hasModifications()) - builder.addCell(live(CommandsColumns.durability, timestampMicros, accessor.valueOf(command.durability.get().ordinal()))); + for (V item : Sets.difference(prev, value)) + builder.addCell(tombstone(column, timestampMicros, nowInSec, CellPath.create(serialize.apply(item)))); + } - if (command.partialTxn.hasModifications()) - builder.addCell(live(CommandsColumns.txn, timestampMicros, serializeOrNull(command.partialTxn.get(), CommandsSerializers.partialTxn))); + private static void addMapChanges(ColumnMetadata column, Function> get, SerializeFunction serializeKey, SerializeFunction serializeVal, Row.Builder builder, long timestampMicros, int nowInSec, C original, C command) throws IOException + { + Map prev = original != null ? get.apply(original) : Collections.emptyMap(); + if (prev == null) prev = Collections.emptyMap(); + Map value = get.apply(command); + if (value == null) value = Collections.emptyMap(); - if (command.kind.hasModifications() && command.kind.get() != null) // initialize sets hasModification(), and don't want to persist null - builder.addCell(live(CommandsColumns.kind, timestampMicros, accessor.valueOf(command.kind.get().ordinal()))); + if (value.isEmpty() && !prev.isEmpty()) + { + builder.addComplexDeletion(column, DeletionTime.buildUnsafeWithUnsignedInteger(timestampMicros, nowInSec)); + return; + } - if (command.executeAt.hasModifications()) - builder.addCell(live(CommandsColumns.execute_at, timestampMicros, serializeTimestamp(command.executeAt.get()))); + for (Map.Entry entry : value.entrySet()) + { + K key = entry.getKey(); + V pVal = prev.get(key); + if (pVal != null && pVal.equals(entry.getValue())) + continue; + builder.addCell(live(column, timestampMicros, serializeVal.apply(entry.getValue()), CellPath.create(serializeKey.apply(key)))); + } + for (K key : Sets.difference(prev.keySet(), value.keySet())) + builder.addCell(tombstone(column, timestampMicros, nowInSec, CellPath.create(serializeKey.apply(key)))); + } - if (command.promised.hasModifications()) - builder.addCell(live(CommandsColumns.promised_ballot, timestampMicros, serializeTimestamp(command.promised.get()))); + private static int estimateMapChanges(Map prev, Map value) + { + return Math.abs(prev.size() - value.size()); + } - if (command.accepted.hasModifications()) - builder.addCell(live(CommandsColumns.accepted_ballot, timestampMicros, serializeTimestamp(command.accepted.get()))); + private static int estimateMapChanges(Function> get, C original, C command) + { + Map prev = original != null ? get.apply(original) : Collections.emptyMap(); + if (prev == null) prev = Collections.emptyMap(); + Map value = get.apply(command); + if (value == null) value = Collections.emptyMap(); + return estimateMapChanges(prev, value); + } - if (command.partialDeps.hasModifications()) - builder.addCell(live(CommandsColumns.dependencies, timestampMicros, serializeOrNull(command.partialDeps.get(), CommandsSerializers.partialDeps))); + public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordSafeCommand liveCommand, long timestampMicros) + { + try + { + Command command = liveCommand.current(); + Command original = liveCommand.original(); + Invariants.checkArgument(original != command); - if (command.writes.hasModifications()) - builder.addCell(live(CommandsColumns.writes, timestampMicros, serialize(command.writes.get(), CommandsSerializers.writes))); + Row.Builder builder = BTreeRow.unsortedBuilder(); + builder.newRow(Clustering.EMPTY); + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - if (command.result.hasModifications()) - builder.addCell(live(CommandsColumns.result, timestampMicros, serialize((TxnData) command.result.get(), CommandsSerializers.result))); + addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, original, command); + addKeyCellIfModified(CommandsColumns.home_key, Command::homeKey, builder, timestampMicros, original, command); + addKeyCellIfModified(CommandsColumns.progress_key, Command::progressKey, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.route, Command::route, CommandsSerializers.route, builder, timestampMicros, original, command); + addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.txn, Command::partialTxn, CommandsSerializers.partialTxn, builder, timestampMicros, original, command); - if (command.waitingOnCommit.hasModifications()) - { - addStoredSetChanges(builder, CommandsColumns.waiting_on_commit, - timestampMicros, nowInSeconds, command.waitingOnCommit, - AccordKeyspace::serializeTimestamp); - } + addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); - if (command.blockingCommitOn.hasModifications()) - { - addStoredSetChanges(builder, CommandsColumns.blocking_commit_on, - timestampMicros, nowInSeconds, command.blockingApplyOn, - AccordKeyspace::serializeTimestamp); - } + addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, CommandsSerializers.partialDeps, builder, timestampMicros, original, command); - if (command.waitingOnApply.hasModifications()) - { - addStoredMapChanges(builder, CommandsColumns.waiting_on_apply, - timestampMicros, nowInSeconds, command.waitingOnApply, - AccordKeyspace::serializeTimestamp, AccordKeyspace::serializeTimestamp); - } + addSetChanges(CommandsColumns.listeners, cmd -> Sets.filter(cmd.listeners(), l -> !l.isTransient()), v -> serialize(v, CommandsSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); - if (command.blockingApplyOn.hasModifications()) + if (command.isCommitted()) { - addStoredSetChanges(builder, CommandsColumns.blocking_apply_on, - timestampMicros, nowInSeconds, command.blockingApplyOn, - AccordKeyspace::serializeTimestamp); + Command.Committed committed = command.asCommitted(); + Command.Committed originalCommitted = original != null && original.isCommitted() ? original.asCommitted() : null; + addSetChanges(CommandsColumns.waiting_on_commit, Command.Committed::waitingOnCommit, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, originalCommitted, committed); + addMapChanges(CommandsColumns.waiting_on_apply, Command.Committed::waitingOnApply, AccordKeyspace::serializeTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, originalCommitted, committed); + if (command.isExecuted()) + { + Command.Executed executed = command.asExecuted(); + Command.Executed originalExecuted = original != null && original.isExecuted() ? original.asExecuted() : null; + addCellIfModified(CommandsColumns.writes, Command.Executed::writes, v -> serialize(v, CommandsSerializers.writes), builder, timestampMicros, originalExecuted, executed); + addCellIfModified(CommandsColumns.result, Command.Executed::result, v -> serialize((TxnData) v, CommandsSerializers.result), builder, timestampMicros, originalExecuted, executed); + } } - if (command.storedListeners.hasModifications()) - { - addStoredSetChanges(builder, CommandsColumns.listeners, - timestampMicros, nowInSeconds, command.storedListeners, - ListenerProxy::identifier); - } ByteBuffer key = CommandsColumns.keyComparator.make(commandStore.id(), serializeTimestamp(command.txnId())).serializeAsPartitionKey(); - PartitionUpdate update = PartitionUpdate.singleRowUpdate(Commands, key, builder.build()); + Row row = builder.build(); + if (row.isEmpty()) + return null; + PartitionUpdate update = PartitionUpdate.singleRowUpdate(Commands, key, row); return new Mutation(update); } catch (IOException e) @@ -506,6 +539,7 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor } } + private static ByteBuffer serializeKey(PartitionKey key) { return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(UUIDSerializer.instance.serialize(key.tableId().asUUID()), key.partitionKey().getKey())); @@ -534,11 +568,21 @@ private static T deserializeTimestampOrNull(UntypedResultS return deserializeTimestampOrNull(row.getBlob(name), factory); } - public static AccordCommand loadCommand(AccordCommandStore commandStore, TxnId txnId) + private static ByteBuffer bytesOrNull(Row row, ColumnMetadata column) + { + Cell cell = row.getCell(column); + return cell != null && !cell.isTombstone() ? cell.buffer() : null; + } + + private static T deserializeTimestampOrDefault(Row row, ColumnMetadata column, TimestampFactory factory, T valIfNull) { - AccordCommand command = new AccordCommand(txnId); - loadCommand(commandStore, command); - return command; + ByteBuffer bytes = bytesOrNull(row, column); + if (bytes == null) + return valIfNull; + T result = deserializeTimestampOrNull(bytes, factory); + if (result == null) + return valIfNull; + return result; } private static T deserializeWithVersionOr(UntypedResultSet.Row row, String dataColumn, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException @@ -555,154 +599,180 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t "WHERE store_id = ? " + "AND txn_id=(?, ?, ?)"; - return executeOnceInternal(String.format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), - commandStore.id(), - txnId.msb, txnId.lsb, txnId.node.id); + return executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), + commandStore.id(), + txnId.msb, txnId.lsb, txnId.node.id); } - public static void loadCommand(AccordCommandStore commandStore, AccordCommand command) + public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) { - Preconditions.checkArgument(!command.isLoaded()); - TxnId txnId = command.txnId(); commandStore.checkNotInStoreThread(); - UntypedResultSet result = loadCommandRow(commandStore, command.txnId()); + UntypedResultSet rows = loadCommandRow(commandStore, txnId); - if (result.isEmpty()) + if (rows.isEmpty()) { - command.setEmpty(); - return; + return null; } try { - UntypedResultSet.Row row = result.one(); - Preconditions.checkState(deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits).equals(txnId)); - command.status.load(SaveStatus.values()[row.getInt("status")]); - command.homeKey.load(deserializeOrNull(row.getBlob("home_key"), CommandsSerializers.routingKey)); - command.progressKey.load(deserializeOrNull(row.getBlob("progress_key"), CommandsSerializers.routingKey)); - command.route.load(deserializeOrNull(row.getBlob("route"), CommandsSerializers.route)); + UntypedResultSet.Row row = rows.one(); + Invariants.checkState(deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits).equals(txnId)); + SaveStatus status = SaveStatus.values()[row.getInt("status")]; + CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); // TODO: something less brittle than ordinal, more efficient than values() - command.durability.load(Status.Durability.values()[row.getInt("durability", 0)]); - command.partialTxn.load(deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn)); - command.kind.load(row.has("kind") ? Txn.Kind.values()[row.getInt("kind")] : null); - command.executeAt.load(deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits)); - command.promised.load(deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits)); - command.accepted.load(deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits)); - command.partialDeps.load(deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps)); - command.writes.load(deserializeWithVersionOr(row, "writes", CommandsSerializers.writes, () -> null)); - command.result.load(deserializeWithVersionOr(row, "result", CommandsSerializers.result, () -> null)); - command.waitingOnCommit.load(deserializeTxnIdNavigableSet(row, "waiting_on_commit")); - command.blockingCommitOn.load(deserializeTxnIdNavigableSet(row, "blocking_commit_on")); - command.waitingOnApply.load(deserializeWaitingOnApply(row.getMap("waiting_on_apply", BytesType.instance, BytesType.instance))); - command.blockingApplyOn.load(deserializeTxnIdNavigableSet(row, "blocking_apply_on")); - command.storedListeners.load(deserializeListeners(row, "listeners")); + attributes.durability(Status.Durability.values()[row.getInt("durability", 0)]); + attributes.homeKey(deserializeOrNull(row.getBlob("home_key"), CommandsSerializers.routingKey)); + attributes.progressKey(deserializeOrNull(row.getBlob("progress_key"), CommandsSerializers.routingKey)); + attributes.route(deserializeOrNull(row.getBlob("route"), CommandsSerializers.route)); + attributes.partialTxn(deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn)); + attributes.partialDeps(deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps)); + attributes.setListeners(deserializeListeners(row, "listeners")); + + Timestamp executeAt = deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); + Ballot promised = deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits); + Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); + ImmutableSortedSet waitingOnCommit = deserializeTxnIdNavigableSet(row, "waiting_on_commit"); + ImmutableSortedMap waitingOnApply = deserializeWaitingOnApply(row.getMap("waiting_on_apply", BytesType.instance, BytesType.instance)); + Writes writes = deserializeWithVersionOr(row, "writes", CommandsSerializers.writes, () -> null); + Result result = deserializeWithVersionOr(row, "result", CommandsSerializers.result, () -> null); + + switch (status.status) + { + case NotWitnessed: + return Command.SerializerSupport.notWitnessed(attributes, promised); + case PreAccepted: + return Command.SerializerSupport.preaccepted(attributes, executeAt, promised); + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + return Command.SerializerSupport.accepted(attributes, status, executeAt, promised, accepted); + case Committed: + case ReadyToExecute: + return Command.SerializerSupport.committed(attributes, status, executeAt, promised, accepted, waitingOnCommit, waitingOnApply); + case PreApplied: + case Applied: + case Invalidated: + return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOnCommit, waitingOnApply, writes, result); + default: + throw new IllegalStateException("Unhandled status " + status); + } } catch (IOException e) { - logger.error("Exception loading AccordCommand " + command.txnId(), e); + logger.error("Exception loading AccordCommand " + txnId, e); throw new RuntimeException(e); } catch (Throwable t) { - logger.error("Exception loading AccordCommand " + command.txnId(), t); + logger.error("Exception loading AccordCommand " + txnId, t); throw t; } } - private static void addSeriesMutations(AccordCommandsForKey.Series series, + private static void addSeriesMutations(ImmutableSortedMap prev, + ImmutableSortedMap value, + SeriesKind kind, PartitionUpdate.Builder partitionBuilder, Row.Builder rowBuilder, long timestampMicros, int nowInSeconds) { - if (!series.map.hasModifications()) + if (prev == value) return; - Row.Deletion deletion = series.map.hasDeletions() ? + Set deletions = Sets.difference(prev.keySet(), value.keySet()); + + Row.Deletion deletion = !deletions.isEmpty() ? Row.Deletion.regular(DeletionTime.buildUnsafeWithUnsignedInteger(timestampMicros, nowInSeconds)) : null; - ByteBuffer ordinalBytes = bytes(series.kind.ordinal()); - series.map.forEachAddition((timestamp, bytes) -> { + ByteBuffer ordinalBytes = bytes(kind.ordinal()); + value.forEach((timestamp, bytes) -> { + if (bytes.equals(prev.get(timestamp))) + return; rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); rowBuilder.addCell(live(CommandsForKeyColumns.data, timestampMicros, bytes)); partitionBuilder.add(rowBuilder.build()); }); - series.map.forEachDeletion(timestamp -> { + deletions.forEach(timestamp -> { rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); rowBuilder.addRowDeletion(deletion); partitionBuilder.add(rowBuilder.build()); }); } + private static void addSeriesMutations(CommandsForKey original, + CommandsForKey cfk, + SeriesKind kind, + PartitionUpdate.Builder partitionBuilder, + Row.Builder rowBuilder, + long timestampMicros, + int nowInSeconds) + { + addSeriesMutations(kind.getValues(original), kind.getValues(cfk), kind, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + } + private static DecoratedKey makeKey(CommandStore commandStore, PartitionKey key) { ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(commandStore.id(), serializeKey(key)).serializeAsPartitionKey(); - return CommandsForKey.partitioner.decorateKey(pk); + return CommandsForKeys.partitioner.decorateKey(pk); } - private static DecoratedKey makeKey(AccordCommandsForKey cfk) + private static DecoratedKey makeKey(CommandStore commandStore, CommandsForKey cfk) { - return makeKey(cfk.commandStore(), cfk.key()); + return makeKey(commandStore, (PartitionKey) cfk.key()); } - public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore, AccordCommandsForKey cfk, long timestampMicros) + public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore, AccordSafeCommandsForKey liveCfk, long timestampMicros) { - Preconditions.checkArgument(cfk.hasModifications()); - - int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - - int expectedRows = (CommandsForKeyColumns.hasStaticChanges(cfk) ? 1 : 0) - + cfk.byId.map.totalModifications() - + cfk.byExecuteAt.map.totalModifications(); - - PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(CommandsForKey, - makeKey(cfk), - CommandsForKeyColumns.columnsFor(cfk), - expectedRows); - - Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); - boolean updateStaticRow = cfk.maxTimestamp.hasModifications() - || cfk.lastExecutedTimestamp.hasModifications() - || cfk.lastExecutedMicros.hasModifications() - || cfk.lastWriteTimestamp.hasModifications() - || cfk.blindWitnessed.hasModifications(); - if (updateStaticRow) + try { - rowBuilder.newRow(Clustering.STATIC_CLUSTERING); - - if (cfk.maxTimestamp.hasModifications()) - rowBuilder.addCell(live(CommandsForKeyColumns.max_timestamp, timestampMicros, serializeTimestamp(cfk.maxTimestamp.get()))); - - if (cfk.lastExecutedTimestamp.hasModifications()) - rowBuilder.addCell(live(CommandsForKeyColumns.last_executed_timestamp, timestampMicros, serializeTimestamp(cfk.lastExecutedTimestamp.get()))); + CommandsForKey cfk = liveCfk.current(); + CommandsForKey original = liveCfk.original(); + Invariants.checkArgument(original != cfk); + // TODO: convert to byte arrays + ValueAccessor accessor = ByteBufferAccessor.instance; - if (cfk.lastExecutedMicros.hasModifications()) - rowBuilder.addCell(live(CommandsForKeyColumns.last_executed_micros, timestampMicros, ByteBufferUtil.bytes(cfk.lastExecutedMicros.get()))); + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - if (cfk.lastWriteTimestamp.hasModifications()) - rowBuilder.addCell(live(CommandsForKeyColumns.last_write_timestamp, timestampMicros, serializeTimestamp(cfk.lastWriteTimestamp.get()))); + boolean hasStaticChanges = CommandsForKeyColumns.hasStaticChanges(original, cfk); + int expectedRows = (hasStaticChanges ? 1 : 0) + + estimateMapChanges(c -> c.byId().commands, original, cfk) + + estimateMapChanges(c -> c.byExecuteAt().commands, original, cfk); - if (cfk.blindWitnessed.hasModifications()) - addStoredSetChanges(rowBuilder, CommandsForKeyColumns.blind_witnessed, - timestampMicros, nowInSeconds, cfk.blindWitnessed, - AccordKeyspace::serializeTimestamp); + PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(CommandsForKeys, + makeKey(commandStore, cfk), + CommandsForKeyColumns.columnsFor(original, cfk), + expectedRows); - partitionBuilder.add(rowBuilder.build()); - } + Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); - addSeriesMutations(cfk.byId, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); - addSeriesMutations(cfk.byExecuteAt, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + if (hasStaticChanges) + { + rowBuilder.newRow(Clustering.STATIC_CLUSTERING); + addCellIfModified(CommandsForKeyColumns.max_timestamp, CommandsForKey::max, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_executed_timestamp, CommandsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_executed_micros, CommandsForKey::lastExecutedMicros, accessor::valueOf, rowBuilder, timestampMicros, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_write_timestamp, CommandsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); + Row row = rowBuilder.build(); + if (!row.isEmpty()) + partitionBuilder.add(row); + } - return new Mutation(partitionBuilder.build()); - } + addSeriesMutations(original, cfk, SeriesKind.BY_ID, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + addSeriesMutations(original, cfk, SeriesKind.BY_EXECUTE_AT, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); - public static AccordCommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) - { - AccordCommandsForKey commandsForKey = new AccordCommandsForKey(commandStore, key); - loadCommandsForKey(commandsForKey); - return commandsForKey; + PartitionUpdate update = partitionBuilder.build(); + if (update.isEmpty()) + return null; + return new Mutation(update); + } + catch (IOException e) + { + throw new RuntimeException(e); + } } private static ByteBuffer cellValue(Cell cell) @@ -724,7 +794,7 @@ private static ByteBuffer clusteringValue(Clustering clustering, int idx) public static SinglePartitionReadCommand getCommandsForKeyRead(CommandStore commandStore, PartitionKey key, long nowInSeconds) { - return SinglePartitionReadCommand.create(CommandsForKey, nowInSeconds, + return SinglePartitionReadCommand.create(CommandsForKeys, nowInSeconds, CommandsForKeyColumns.allColumns, RowFilter.none(), DataLimits.NONE, @@ -732,54 +802,43 @@ public static SinglePartitionReadCommand getCommandsForKeyRead(CommandStore comm FULL_PARTITION); } - public static void loadCommandsForKey(AccordCommandsForKey cfk) + public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) { - Preconditions.checkArgument(!cfk.isLoaded()); - ((AccordCommandStore) cfk.commandStore()).checkNotInStoreThread(); + commandStore.checkNotInStoreThread(); long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - SinglePartitionReadCommand command = getCommandsForKeyRead(cfk.commandStore(), cfk.key(), nowInSeconds); + SinglePartitionReadCommand command = getCommandsForKeyRead(commandStore, key, nowInSeconds); - EnumMap> seriesMaps = new EnumMap<>(SeriesKind.class); + EnumMap> seriesMaps = new EnumMap<>(SeriesKind.class); for (SeriesKind kind : SeriesKind.values()) - seriesMaps.put(kind, new TreeMap<>()); + seriesMaps.put(kind, new ImmutableSortedMap.Builder<>(Comparator.naturalOrder())); try(ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { if (!partitions.hasNext()) { - cfk.setEmpty(); - return; + return null; } + Timestamp max = Timestamp.NONE; + Timestamp lastExecutedTimestamp = Timestamp.NONE; + long lastExecutedMicros = 0; + Timestamp lastWriteTimestamp = Timestamp.NONE; + try (RowIterator partition = partitions.next()) { // empty static row will be interpreted as all null cells which will cause everything to be initialized Row staticRow = partition.staticRow(); - Cell cell = staticRow.getCell(CommandsForKeyColumns.max_timestamp); - cfk.maxTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) - : AccordCommandsForKey.Defaults.maxTimestamp); - - cell = staticRow.getCell(CommandsForKeyColumns.last_executed_timestamp); - cfk.lastExecutedTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) - : AccordCommandsForKey.Defaults.lastExecutedTimestamp); - - cell = staticRow.getCell(CommandsForKeyColumns.last_executed_micros); - ByteBuffer microsBytes = cell != null && !cell.isTombstone() ? cellValue(cell) : null; - cfk.lastExecutedMicros.load(microsBytes != null ? microsBytes.getLong(microsBytes.position()) - : AccordCommandsForKey.Defaults.lastExecutedMicros); + max = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.max_timestamp, Timestamp::fromBits, max); + lastExecutedTimestamp = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.last_executed_timestamp, Timestamp::fromBits, lastExecutedTimestamp); - cell = staticRow.getCell(CommandsForKeyColumns.last_write_timestamp); - cfk.lastWriteTimestamp.load(cell != null && !cell.isTombstone() ? deserializeTimestampOrNull(cellValue(cell), Timestamp::fromBits) - : AccordCommandsForKey.Defaults.lastWriteTimestamp); + ByteBuffer microsBytes = bytesOrNull(staticRow, CommandsForKeyColumns.last_executed_micros); + if (microsBytes != null) + lastExecutedMicros = microsBytes.getLong(microsBytes.position()); - TreeSet blindWitnessed = new TreeSet<>(); - ComplexColumnData cmplx = staticRow.getComplexColumnData(CommandsForKeyColumns.blind_witnessed); - if (cmplx != null) - cmplx.forEach(c -> blindWitnessed.add(deserializeTimestampOrNull(c.path().get(0), Timestamp::fromBits))); - cfk.blindWitnessed.load(blindWitnessed); + lastWriteTimestamp = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.last_write_timestamp, Timestamp::fromBits, lastWriteTimestamp); while (partition.hasNext()) { @@ -793,14 +852,16 @@ public static void loadCommandsForKey(AccordCommandsForKey cfk) seriesMaps.get(SeriesKind.values()[ordinal]).put(timestamp, data); } } - Preconditions.checkState(!partitions.hasNext()); + Invariants.checkState(!partitions.hasNext()); - cfk.byId.map.load(seriesMaps.get(SeriesKind.BY_ID)); - cfk.byExecuteAt.map.load(seriesMaps.get(SeriesKind.BY_EXECUTE_AT)); + return CommandsForKey.SerializerSupport.create(key, max, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp, + CommandsForKeySerializer.loader, + seriesMaps.get(SeriesKind.BY_ID).build(), + seriesMaps.get(SeriesKind.BY_EXECUTE_AT).build()); } catch (Throwable t) { - logger.error("Exception loading AccordCommandsForKey " + cfk.key(), t); + logger.error("Exception loading AccordCommandsForKey " + key, t); throw t; } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java b/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java new file mode 100644 index 000000000000..1316b02d4848 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.concurrent.Callable; +import java.util.function.Function; + +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResults; + +/** + * Global state that manages loading states + */ +public class AccordLoadingState +{ + public enum LoadingState { UNINITIALIZED, PENDING, LOADED, FAILED } + private interface NonValueState {} + + private static final NonValueState UNINITIALIZED = new NonValueState() {}; + + private static class PendingLoad extends AsyncResults.RunnableResult implements NonValueState + { + public PendingLoad(Callable callable) + { + super(callable); + } + } + + private static class FailedLoad implements NonValueState + { + private final Throwable cause; + + public FailedLoad(Throwable cause) + { + this.cause = cause; + } + } + + private final K key; + private Object state = UNINITIALIZED; + + public AccordLoadingState(K key) + { + this.key = key; + } + + private LoadingState maybeCleanupLoad() + { + PendingLoad load = (PendingLoad) state; + if (!load.isDone()) + return LoadingState.PENDING; + + if (load.isSuccess()) + { + state = load.result(); + return LoadingState.LOADED; + } + else + { + state = new FailedLoad(load.failure()); + return LoadingState.FAILED; + } + } + + private static IllegalStateException unexpectedState(LoadingState expected, LoadingState actual) + { + return new IllegalStateException(String.format("Unexpected state. Expected %s, was %s", expected, actual)); + } + + /** + * Returns the current loading state. Since most calls here will be initiated by AsyncChain callbacks on + * load completion/failure, we attempt to complete any pending states so the caller doesn't have to remember + * to. The exception is the listen method, to prevent races where the caller found a pending load, attempts + * to register a callback, but gets an exception because the load completed in the meantime. + */ + private LoadingState state(boolean attemptLoadCompletion) + { + if (!(state instanceof NonValueState)) + return LoadingState.LOADED; + + if (state == UNINITIALIZED) + return LoadingState.UNINITIALIZED; + + if (state instanceof PendingLoad) + return attemptLoadCompletion + ? maybeCleanupLoad() + : LoadingState.PENDING; + + if (state instanceof FailedLoad) + return LoadingState.FAILED; + + throw new IllegalStateException("Unhandled state " + state); + } + + public LoadingState state() + { + return state(true); + } + + private void checkState(LoadingState expected, boolean attemptLoadCompletion) + { + LoadingState actual = state(attemptLoadCompletion); + if (actual != expected) + throw unexpectedState(expected, actual); + } + + public K key() + { + return key; + } + + public V value() + { + checkState(LoadingState.LOADED, true); + return (V) state; + } + + public void value(V value) + { + checkState(LoadingState.LOADED, true); + state = value; + } + + public Throwable failure() + { + checkState(LoadingState.FAILED, true); + return ((FailedLoad) state).cause; + } + + /** + * Return a runnable that will run the loadFunction in a separate thread. When the runnable + * has completed, the state load will have either completed, or failed. + */ + public AsyncResults.RunnableResult load(Function loadFunction) + { + checkState(LoadingState.UNINITIALIZED, true); + PendingLoad pendingLoad = new PendingLoad<>(() -> loadFunction.apply(key)); + state = pendingLoad; + return pendingLoad; + } + + public AsyncChain listen() + { + checkState(LoadingState.PENDING, false); + return (PendingLoad) state; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index bef7b43ad2bd..d56e88ba5775 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -18,11 +18,25 @@ package org.apache.cassandra.service.accord; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.function.ToLongFunction; + +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.ImmutableSortedSet; + import accord.api.Key; +import accord.api.Result; import accord.api.RoutingKey; +import accord.impl.CommandsForKey; +import accord.local.Command; +import accord.local.CommandListener; +import accord.local.CommonAttributes; import accord.local.Node; +import accord.local.SaveStatus; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullKeyRoute; import accord.primitives.FullRangeRoute; @@ -37,17 +51,22 @@ import accord.primitives.RoutingKeys; import accord.primitives.Seekables; import accord.primitives.Timestamp; +import accord.primitives.TxnId; import accord.primitives.Unseekables; import accord.primitives.Writes; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; +import static org.apache.cassandra.utils.ObjectSizes.measure; + public class AccordObjectSizes { public static long key(Key key) @@ -60,13 +79,13 @@ public static long key(RoutingKey key) return ((AccordRoutingKey) key).estimatedSizeOnHeap(); } - private static final long EMPTY_RANGE_SIZE = ObjectSizes.measure(TokenRange.fullRange("")); + private static final long EMPTY_RANGE_SIZE = measure(TokenRange.fullRange("")); public static long range(Range range) { return EMPTY_RANGE_SIZE + key(range.start()) + key(range.end()); } - private static final long EMPTY_RANGES_SIZE = ObjectSizes.measure(Ranges.of()); + private static final long EMPTY_RANGES_SIZE = measure(Ranges.of()); public static long ranges(Ranges ranges) { long size = EMPTY_RANGES_SIZE; @@ -77,7 +96,7 @@ public static long ranges(Ranges ranges) return size; } - private static final long EMPTY_KEYS_SIZE = ObjectSizes.measure(Keys.of()); + private static final long EMPTY_KEYS_SIZE = measure(Keys.of()); public static long keys(Keys keys) { long size = EMPTY_KEYS_SIZE; @@ -106,13 +125,13 @@ private static long routingKeysOnly(AbstractKeys keys) return size; } - private static final long EMPTY_ROUTING_KEYS_SIZE = ObjectSizes.measure(RoutingKeys.of()); + private static final long EMPTY_ROUTING_KEYS_SIZE = measure(RoutingKeys.of()); public static long routingKeys(RoutingKeys keys) { return EMPTY_ROUTING_KEYS_SIZE + routingKeysOnly(keys); } - private static final long EMPTY_FULL_KEY_ROUTE_SIZE = ObjectSizes.measure(new FullKeyRoute(new TokenKey(null, null), new RoutingKey[0])); + private static final long EMPTY_FULL_KEY_ROUTE_SIZE = measure(new FullKeyRoute(new TokenKey(null, null), new RoutingKey[0])); public static long fullKeyRoute(FullKeyRoute route) { return EMPTY_FULL_KEY_ROUTE_SIZE @@ -120,7 +139,7 @@ public static long fullKeyRoute(FullKeyRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = ObjectSizes.measure(new PartialKeyRoute(Ranges.EMPTY, new TokenKey(null, null), new RoutingKey[0])); + private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(Ranges.EMPTY, new TokenKey(null, null), new RoutingKey[0])); public static long partialKeyRoute(PartialKeyRoute route) { return EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE @@ -137,7 +156,7 @@ private static long rangesOnly(AbstractRanges ranges) return size; } - private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = ObjectSizes.measure(new FullRangeRoute(new TokenKey(null, null), new Range[0])); + private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = measure(new FullRangeRoute(new TokenKey(null, null), new Range[0])); public static long fullRangeRoute(FullRangeRoute route) { return EMPTY_FULL_RANGE_ROUTE_SIZE @@ -145,7 +164,7 @@ public static long fullRangeRoute(FullRangeRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = ObjectSizes.measure(new PartialRangeRoute(Ranges.EMPTY, new TokenKey(null, null), new Range[0])); + private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(Ranges.EMPTY, new TokenKey(null, null), new Range[0])); public static long partialRangeRoute(PartialRangeRoute route) { return EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE @@ -168,7 +187,7 @@ public static long route(Unseekables unseekables) } } - private static final long EMPTY_TXN = ObjectSizes.measure(new PartialTxn.InMemory(null, null, null, null, null, null)); + private static final long EMPTY_TXN = measure(new PartialTxn.InMemory(null, null, null, null, null, null)); public static long txn(PartialTxn txn) { long size = EMPTY_TXN; @@ -214,7 +233,7 @@ public static long dependencies(Deps dependencies) return size; } - private static final long EMPTY_WRITES_SIZE = ObjectSizes.measure(new Writes(null, null, null)); + private static final long EMPTY_WRITES_SIZE = measure(new Writes(null, null, null)); public static long writes(Writes writes) { long size = EMPTY_WRITES_SIZE; @@ -225,4 +244,127 @@ public static long writes(Writes writes) return size; } + private static final long EMPTY_COMMAND_LISTENER = measure(new Command.Listener(null)); + private static final long EMPTY_CFK_LISTENER = measure(new CommandsForKey.Listener((Key) null)); + public static long listener(CommandListener listener) + { + if (listener.isTransient()) + return 0; + if (listener instanceof Command.Listener) + return EMPTY_COMMAND_LISTENER + timestamp(((Command.Listener) listener).txnId()); + if (listener instanceof CommandsForKey.Listener) + return EMPTY_CFK_LISTENER + key(((CommandsForKey.Listener) listener).key()); + throw new IllegalArgumentException("Unhandled listener type: " + listener.getClass()); + } + + private static class CommandEmptySizes + { + private static final CommonAttributes EMPTY_ATTRS = new CommonAttributes.Mutable((TxnId) null); + final static long NOT_WITNESSED = measure(Command.SerializerSupport.notWitnessed(EMPTY_ATTRS, Ballot.ZERO)); + final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(EMPTY_ATTRS, null, null)); + final static long ACCEPTED = measure(Command.SerializerSupport.accepted(EMPTY_ATTRS, SaveStatus.Accepted, null, null, null)); + final static long COMMITTED = measure(Command.SerializerSupport.committed(EMPTY_ATTRS, SaveStatus.Committed, null, null, null, ImmutableSortedSet.of(), ImmutableSortedMap.of())); + final static long EXECUTED = measure(Command.SerializerSupport.executed(EMPTY_ATTRS, SaveStatus.Applied, null, null, null, ImmutableSortedSet.of(), ImmutableSortedMap.of(), null, null)); + + + private static long emptySize(Command command) + { + switch (command.status()) + { + case NotWitnessed: + return NOT_WITNESSED; + case PreAccepted: + return PREACCEPTED; + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + return ACCEPTED; + case Committed: + case ReadyToExecute: + return COMMITTED; + case PreApplied: + case Applied: + case Invalidated: + return EXECUTED; + default: + throw new IllegalStateException("Unhandled status " + command.status()); + } + } + } + + private static long sizeNullable(T value, ToLongFunction measure) + { + if (value == null) + return 0; + return measure.applyAsLong(value); + } + + public static long command(Command command) + { + long size = CommandEmptySizes.emptySize(command); + size += sizeNullable(command.homeKey(), AccordObjectSizes::key); + size += sizeNullable(command.progressKey(), AccordObjectSizes::key); + size += sizeNullable(command.route(), AccordObjectSizes::route); + size += sizeNullable(command.promised(), AccordObjectSizes::timestamp); + for (CommandListener listener : command.listeners()) + size += listener(listener); + + if (!command.isWitnessed()) + return size; + + Command.PreAccepted preaccepted = command.asWitnessed(); + size += timestamp(preaccepted.executeAt()); + size += sizeNullable(preaccepted.partialTxn(), AccordObjectSizes::txn); + size += sizeNullable(preaccepted.partialDeps(), AccordObjectSizes::dependencies); + + if (!command.isAccepted()) + return size; + + Command.Accepted accepted = command.asAccepted(); + size += timestamp(accepted.accepted()); + + if (!command.isCommitted()) + return size; + + Command.Committed committed = command.asCommitted(); + size += TIMESTAMP_SIZE * committed.waitingOnCommit().size(); + size += TIMESTAMP_SIZE * 2 * committed.waitingOnApply().size(); + + if (!command.isExecuted()) + return size; + + Command.Executed executed = command.asExecuted(); + size += sizeNullable(executed.writes(), AccordObjectSizes::writes); + Result result = executed.result(); + if (result != null) + size += ((TxnData) result).estimatedSizeOnHeap(); + + return size; + } + + private static long cfkSeriesSize(ImmutableSortedMap series) + { + long size = 0; + for (Map.Entry entry : series.entrySet()) + { + size += timestamp(entry.getKey()); + size += ByteBufferUtil.estimatedSizeOnHeap(entry.getValue()); + } + return size; + } + + private static long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, null, 0, null, null, + ImmutableSortedMap.of(), + ImmutableSortedMap.of())); + public static long commandsForKey(CommandsForKey cfk) + { + long size = EMPTY_CFK_SIZE; + size += key(cfk.key()); + size += timestamp(cfk.max()); + size += timestamp(cfk.lastExecutedTimestamp()); + size += timestamp(cfk.lastWriteTimestamp()); + size += cfkSeriesSize((ImmutableSortedMap) cfk.byId().commands); + size += cfkSeriesSize((ImmutableSortedMap) cfk.byExecuteAt().commands); + return size; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java b/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java deleted file mode 100644 index 19f71da75c61..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordPartialCommand.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -import accord.api.Key; -import accord.local.Command; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.local.Status.Known; -import accord.primitives.Timestamp; -import accord.primitives.TxnId; -import org.apache.cassandra.io.util.DataInputBuffer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.async.AsyncContext; -import org.apache.cassandra.service.accord.serializers.CommandSerializers; - -import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; -import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; -import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; -import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; - -public class AccordPartialCommand -{ - public static final PartialCommandSerializer serializer = new PartialCommandSerializer(); - - private final TxnId txnId; - private final Timestamp executeAt; - - // TODO (soon): this should only be a list of TxnId (the deps for the key we are persisted against); but should also be stored separately and not brought into memory - private final List deps; - // TODO (soon): we only require this for Accepted; perhaps more tightly couple query API for efficiency - private final SaveStatus status; - - AccordPartialCommand(TxnId txnId, Timestamp executeAt, List deps, SaveStatus status) - { - this.txnId = txnId; - this.executeAt = executeAt; - this.deps = deps; - this.status = status; - } - - public AccordPartialCommand(Key key, Command command) - { - this(command.txnId(), command.executeAt(), - command.partialDeps() == null ? Collections.emptyList() : command.partialDeps().txnIds(key), - command.saveStatus()); - } - - public TxnId txnId() - { - return txnId; - } - - public Timestamp executeAt() - { - return executeAt; - } - - public List deps() - { - return deps; - } - - public boolean hasDep(TxnId txnId) - { - return Collections.binarySearch(deps, txnId) >= 0; - } - - public Status status() - { - return status.status; - } - - public Known known() - { - return status.known; - } - - @Override - public boolean equals(Object obj) - { - if (obj.getClass() != AccordPartialCommand.class) - return false; - AccordPartialCommand that = (AccordPartialCommand) obj; - return txnId.equals(that.txnId) - && Objects.equals(executeAt, that.executeAt) - && Objects.equals(deps, that.deps) - && status == that.status; - } - - public static class PartialCommandSerializer - { - public void serialize(AccordPartialCommand command, DataOutputPlus out, AccordSerializerVersion version) throws IOException - { - out.write(version.version); - CommandSerializers.txnId.serialize(command.txnId(), out, version.msgVersion); - serializeNullable(command.executeAt(), out, version.msgVersion, CommandSerializers.timestamp); - CommandSerializers.saveStatus.serialize(command.status, out, version.msgVersion); - serializeCollection(command.deps, out, version.msgVersion, CommandSerializers.txnId); - } - - public ByteBuffer serialize(AccordPartialCommand command) - { - AccordSerializerVersion version = AccordSerializerVersion.CURRENT; - int size = serializedSize(command, version); - try (DataOutputBuffer out = new DataOutputBuffer(size)) - { - serialize(command, out, version); - return out.buffer(false); - } - catch (IOException e) - { - throw new RuntimeException(e); - } - } - - public AccordSerializerVersion deserializeVersion(DataInputPlus in) throws IOException - { - return AccordSerializerVersion.serializer.deserialize(in); - } - - // check for cached command first, otherwise deserialize - private AccordPartialCommand deserialize(AccordCommandsForKey commandsForKey, AccordCommandStore commandStore, DataInputPlus in) throws IOException - { - AccordSerializerVersion version = deserializeVersion(in); - TxnId txnId = CommandSerializers.txnId.deserialize(in, version.msgVersion); - AsyncContext context = commandStore.getContext(); - AccordPartialCommand command = getCachedFull(commandsForKey, txnId, context); - if (command != null) - return command; - - Timestamp executeAt = deserializeNullable(in, version.msgVersion, CommandSerializers.timestamp); - SaveStatus status = CommandSerializers.saveStatus.deserialize(in, version.msgVersion); - List deps = deserializeList(in, version.msgVersion, CommandSerializers.txnId); - AccordPartialCommand partial = new AccordPartialCommand(txnId, executeAt, deps, status); - addToContext(partial, context); - return partial; - } - - public AccordPartialCommand deserialize(AccordCommandsForKey commandsForKey, AccordCommandStore commandStore, ByteBuffer bytes) - { - try (DataInputBuffer in = new DataInputBuffer(bytes, true)) - { - return deserialize(commandsForKey, commandStore, in); - } - catch (IOException e) - { - throw new RuntimeException(e); - } - } - - public int serializedSize(AccordPartialCommand command, AccordSerializerVersion version) - { - int size = Math.toIntExact(AccordSerializerVersion.serializer.serializedSize(version)); - size += CommandSerializers.txnId.serializedSize(); - size += serializedNullableSize(command.executeAt(), version.msgVersion, CommandSerializers.timestamp); - size += CommandSerializers.saveStatus.serializedSize(command.status, version.msgVersion); - size += serializedCollectionSize(command.deps, version.msgVersion, CommandSerializers.txnId); - return size; - } - - private AccordPartialCommand getCachedFull(AccordCommandsForKey commandsForKey, TxnId txnId, AsyncContext context) - { - AccordCommand command = context.commands.get(txnId); - if (command == null) - return null; - return new AccordPartialCommand(commandsForKey.key(), command); - } - - private void addToContext(AccordPartialCommand command, AsyncContext context) - { - context.commands.addPartialCommand(command); - } - - /** - * Determines if current modifications require updating command data duplicated elsewhere - */ - public boolean needsUpdate(AccordCommand command) - { - return command.executeAt.hasModifications() || command.status.hasModifications() || command.partialDeps.hasModifications(); - } - } -} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java new file mode 100644 index 000000000000..3dc37b2f3c23 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Objects; + +import com.google.common.annotations.VisibleForTesting; + +import accord.local.Command; +import accord.local.SafeCommand; +import accord.primitives.TxnId; + +public class AccordSafeCommand extends SafeCommand implements AccordSafeState +{ + private boolean invalidated; + private final AccordLoadingState global; + private Command original; + private Command current; + + public AccordSafeCommand(AccordLoadingState global) + { + super(global.key()); + this.global = global; + this.original = null; + this.current = null; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordSafeCommand that = (AccordSafeCommand) o; + return Objects.equals(original, that.original) && Objects.equals(current, that.current); + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() + { + return "AccordSafeCommand{" + + "invalidated=" + invalidated + + ", global=" + global + + ", original=" + original + + ", current=" + current + + '}'; + } + + @Override + public AccordLoadingState global() + { + checkNotInvalidated(); + return global; + } + + @Override + public Command current() + { + checkNotInvalidated(); + return current; + } + + @Override + @VisibleForTesting + public void set(Command command) + { + checkNotInvalidated(); + this.current = command; + } + + public Command original() + { + checkNotInvalidated(); + return original; + } + + @Override + public void preExecute() + { + checkNotInvalidated(); + original = global.value(); + current = original; + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + global.value(current); + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java new file mode 100644 index 000000000000..c71ba18f69a3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; + +import javax.annotation.Nullable; + +import accord.api.Agent; +import accord.api.DataStore; +import accord.api.Key; +import accord.api.ProgressLog; +import accord.impl.AbstractSafeCommandStore; +import accord.impl.CommandsForKey; +import accord.impl.SafeCommandsForKey; +import accord.local.CommandStores.RangesForEpoch; +import accord.local.CommonAttributes; +import accord.local.NodeTimeService; +import accord.local.PreLoadContext; +import accord.local.Status; +import accord.primitives.AbstractKeys; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.RoutableKey; +import accord.primitives.Routables; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; + +public class AccordSafeCommandStore extends AbstractSafeCommandStore +{ + private final Map commands; + private final Map commandsForKeys; + private final AccordCommandStore commandStore; + + public AccordSafeCommandStore(PreLoadContext context, + Map commands, + Map commandsForKey, + AccordCommandStore commandStore) + { + super(context); + this.commands = commands; + this.commandsForKeys = commandsForKey; + this.commandStore = commandStore; + } + + @Override + protected AccordSafeCommand getCommandInternal(TxnId txnId) + { + return commands.get(txnId); + } + + @Override + protected void addCommandInternal(AccordSafeCommand command) + { + commands.put(command.txnId(), command); + } + + @Override + protected AccordSafeCommandsForKey getCommandsForKeyInternal(RoutableKey key) + { + return commandsForKeys.get(key); + } + + @Override + protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) + { + commandsForKeys.put(cfk.key(), cfk); + } + + @Override + protected AccordSafeCommand getIfLoaded(TxnId txnId) + { + return commandStore.commandCache().referenceAndGetIfLoaded(txnId); + } + + @Override + protected AccordSafeCommandsForKey getIfLoaded(RoutableKey key) + { + return commandStore.commandsForKeyCache().referenceAndGetIfLoaded(key); + } + + @Override + public AccordCommandStore commandStore() + { + return commandStore; + } + + @Override + public DataStore dataStore() + { + return commandStore().dataStore(); + } + + @Override + public Agent agent() + { + return commandStore.agent(); + } + + @Override + public ProgressLog progressLog() + { + return commandStore().progressLog(); + } + + @Override + public NodeTimeService time() + { + return commandStore.time(); + } + + @Override + public RangesForEpoch ranges() + { + return commandStore().ranges(); + } + + @Override + public long latestEpoch() + { + return commandStore().time().epoch(); + } + + @Override + protected Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) + { + // TODO: Seekables + // TODO: efficiency + return ((Keys)keysOrRanges).stream() + .map(this::maybeCommandsForKey) + .filter(Objects::nonNull) + .map(SafeCommandsForKey::current) + .filter(Objects::nonNull) + .map(CommandsForKey::max) + .max(Comparator.naturalOrder()) + .orElse(Timestamp.NONE); + } + + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + { + switch (keysOrRanges.domain()) { + default: + throw new AssertionError(); + case Key: + // TODO: efficiency + AbstractKeys keys = (AbstractKeys) keysOrRanges; + for (Key key : keys) + { + if (!slice.contains(key)) continue; + SafeCommandsForKey forKey = commandsForKey(key); + accumulate = map.apply(forKey.current(), accumulate); + if (accumulate.equals(terminalValue)) + return accumulate; + } + break; + case Range: + // TODO (required): implement + throw new UnsupportedOperationException(); + } + return accumulate; + } + + @Override + public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) + { + accumulate = mapReduceForKey(keysOrRanges, slice, (forKey, prev) -> { + CommandsForKey.CommandTimeseries timeseries; + switch (testTimestamp) + { + default: throw new AssertionError(); + case STARTED_AFTER: + case STARTED_BEFORE: + timeseries = forKey.byId(); + break; + case EXECUTES_AFTER: + case MAY_EXECUTE_BEFORE: + timeseries = forKey.byExecuteAt(); + } + CommandsForKey.CommandTimeseries.TestTimestamp remapTestTimestamp; + switch (testTimestamp) + { + default: throw new AssertionError(); + case STARTED_AFTER: + case EXECUTES_AFTER: + remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.AFTER; + break; + case STARTED_BEFORE: + case MAY_EXECUTE_BEFORE: + remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.BEFORE; + } + return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminalValue); + }, accumulate, terminalValue); + + return accumulate; + } + + @Override + public CommonAttributes completeRegistration(Seekables seekables, Ranges ranges, AccordSafeCommand liveCommand, CommonAttributes attrs) + { + for (Seekable seekable : seekables) + attrs = completeRegistration(seekable, ranges, liveCommand, attrs); + return attrs; + } + + @Override + public CommonAttributes completeRegistration(Seekable seekable, Ranges ranges, AccordSafeCommand liveCommand, CommonAttributes attrs) + { + Key key = (Key) seekable; + if (ranges.contains(key)) + { + AccordSafeCommandsForKey cfk = commandsForKey(key); + cfk.register(liveCommand.current()); + attrs = attrs.mutable().addListener(new CommandsForKey.Listener(key)); + } + return attrs; + } + + @Override + protected void invalidateSafeState() + { + commands.values().forEach(AccordSafeCommand::invalidate); + commandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); + } + + @Override + public CommandsForKey.CommandLoader cfkLoader() + { + return CommandsForKeySerializer.loader; + } + + public void postExecute(Map commands, + Map commandsForKeys) + { + postExecute(); + commands.values().forEach(AccordSafeState::postExecute); + commandsForKeys.values().forEach(AccordSafeState::postExecute); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java new file mode 100644 index 000000000000..a3b0595f2eb5 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Objects; + +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Key; +import accord.impl.CommandsForKey; +import accord.impl.SafeCommandsForKey; +import accord.primitives.RoutableKey; + +public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState +{ + private boolean invalidated; + private final AccordLoadingState global; + private CommandsForKey original; + private CommandsForKey current; + + public AccordSafeCommandsForKey(AccordLoadingState global) + { + super((Key) global.key()); + this.global = global; + this.original = null; + this.current = null; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordSafeCommandsForKey that = (AccordSafeCommandsForKey) o; + return Objects.equals(original, that.original) && Objects.equals(current, that.current); + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() + { + return "AccordSafeCommandsForKey{" + + "invalidated=" + invalidated + + ", global=" + global + + ", original=" + original + + ", current=" + current + + '}'; + } + + @Override + public AccordLoadingState global() + { + checkNotInvalidated(); + return global; + } + + @Override + public CommandsForKey current() + { + checkNotInvalidated(); + return current; + } + + @Override + @VisibleForTesting + public void set(CommandsForKey cfk) + { + checkNotInvalidated(); + this.current = cfk; + } + + public CommandsForKey original() + { + checkNotInvalidated(); + return original; + } + + @Override + public void preExecute() + { + checkNotInvalidated(); + original = global.value(); + current = original; + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + global.value(current); + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java new file mode 100644 index 000000000000..4c8f2ec1f983 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.function.Function; + +import accord.impl.SafeState; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResults; +import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; + +public interface AccordSafeState extends SafeState +{ + void set(V update); + V original(); + void preExecute(); + void postExecute(); + AccordLoadingState global(); + + default boolean hasUpdate() + { + return original() != current(); + } + + default void revert() + { + set(original()); + } + + default K key() + { + return global().key(); + } + + default LoadingState loadingState() + { + return global().state(); + } + + default AsyncResults.RunnableResult load(Function loadFunction) + { + return global().load(loadFunction); + } + + default AsyncChain listen() + { + return global().listen(); + } + + default Throwable failure() + { + return global().failure(); + } + + default void checkNotInvalidated() + { + if (invalidated()) + throw new IllegalStateException("Cannot access invalidated " + this); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 28f2d878c6a0..7e68da36b042 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -26,6 +26,9 @@ import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.api.Result; import accord.coordinate.Preempted; import accord.coordinate.Timeout; @@ -35,10 +38,11 @@ import accord.local.ShardDistributor.EvenSplit; import accord.messages.Request; import accord.primitives.Txn; +import accord.primitives.TxnId; import accord.topology.TopologyManager; +import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Shutdownable; import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; @@ -49,6 +53,8 @@ import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.api.AccordScheduler; +import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; +import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; @@ -61,6 +67,8 @@ public class AccordService implements IAccordService, Shutdownable { + private static final Logger logger = LoggerFactory.getLogger(AccordService.class); + public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); @@ -125,6 +133,7 @@ public static long uniqueNow() private AccordService() { Node.Id localId = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + logger.info("Starting accord with nodeId {}", localId); this.messageSink = new AccordMessageSink(); this.configService = new AccordConfigurationService(localId); this.scheduler = new AccordScheduler(); @@ -182,12 +191,14 @@ public TopologyManager topology() public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) { AccordClientRequestMetrics metrics = txn.isWrite() ? writeMetrics : readMetrics; + TxnId txnId = null; final long startNanos = nanoTime(); try { metrics.keySize.update(txn.keys().size()); - AsyncResult asyncResult = node.coordinate(txn); - Result result = AsyncResults.getBlocking(asyncResult, DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + txnId = node.nextTxnId(txn.kind(), txn.keys().domain()); + AsyncResult asyncResult = node.coordinate(txnId, txn); + Result result = AsyncChains.getBlocking(asyncResult, DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); return (TxnData) result; } catch (ExecutionException e) @@ -196,7 +207,7 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) if (cause instanceof Timeout) { metrics.timeouts.mark(); - throw throwTimeout(txn, consistencyLevel); + throw throwTimeout(txnId, txn, consistencyLevel); } if (cause instanceof Preempted) { @@ -204,7 +215,7 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match - throw throwTimeout(txn, consistencyLevel); + throw throwPreempted(txnId, txn, consistencyLevel); } metrics.failures.mark(); throw new RuntimeException(cause); @@ -217,7 +228,7 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) catch (TimeoutException e) { metrics.timeouts.mark(); - throw throwTimeout(txn, consistencyLevel); + throw throwTimeout(txnId, txn, consistencyLevel); } finally { @@ -225,10 +236,16 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) } } - private static RuntimeException throwTimeout(Txn txn, ConsistencyLevel consistencyLevel) + private static RuntimeException throwTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) + { + throw txn.isWrite() ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) + : new ReadTimeoutException(consistencyLevel, 0, 0, false, txnId.toString()); + } + + private static RuntimeException throwPreempted(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) { - throw txn.isWrite() ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0) - : new ReadTimeoutException(consistencyLevel, 0, 0, false); + throw txn.isWrite() ? new WritePreemptedException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) + : new ReadPreemptedException(consistencyLevel, 0, 0, false, txnId.toString()); } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/AccordState.java b/src/java/org/apache/cassandra/service/accord/AccordState.java deleted file mode 100644 index 0e1a224cd1f2..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordState.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.function.BiConsumer; -import java.util.function.Function; - -import accord.utils.async.AsyncResult; -import org.apache.cassandra.service.accord.store.StoredNavigableMap; -import org.apache.cassandra.service.accord.store.StoredSet; - -public interface AccordState -{ - enum ReadWrite { FULL, WRITE_ONLY, READ_ONLY } - - K key(); - - boolean hasModifications(); - - void clearModifiedFlag(); - - boolean isEmpty(); - - boolean isLoaded(); - - long estimatedSizeOnHeap(); - - default ReadWrite rw() - { - return ReadWrite.FULL; - } - - default boolean isFullInstance() - { - return rw() == ReadWrite.FULL; - } - - default boolean isWriteOnlyInstance() - { - return rw() == ReadWrite.WRITE_ONLY; - } - - default boolean isReadOnlyInstance() - { - return rw() == ReadWrite.READ_ONLY; - } - - interface WriteOnly> extends AccordState - { - @Override - default ReadWrite rw() - { - return ReadWrite.WRITE_ONLY; - } - - void asyncResult(AsyncResult notifier); - - AsyncResult asyncResult(); - - /** - * Apply the write only changes to the full instance - */ - void applyChanges(V instance); - - static , V> void applyMapChanges(T from, T to, Function> getMap) - { - StoredNavigableMap fromMap = getMap.apply(from); - - if (!fromMap.hasModifications()) - return; - - StoredNavigableMap toMap = getMap.apply(to); - fromMap.forEachAddition(toMap::blindPut); - fromMap.forEachDeletion((BiConsumer) toMap::blindRemove); - } - - static > void applySetChanges(T from, T to, Function> getSet) - { - StoredSet fromSet = getSet.apply(from); - - if (!fromSet.hasModifications()) - return; - - StoredSet toSet = getSet.apply(to); - fromSet.forEachAddition(toSet::blindAdd); - fromSet.forEachDeletion(toSet::blindRemove); - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 5993b73d22f4..2bb852eb5411 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -18,117 +18,114 @@ package org.apache.cassandra.service.accord; -import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.function.Function; +import java.util.function.ToLongFunction; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.api.Data; +import accord.utils.Invariants; +import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; import org.apache.cassandra.utils.ObjectSizes; +import static org.apache.cassandra.service.accord.AccordLoadingState.LoadingState.FAILED; +import static org.apache.cassandra.service.accord.AccordLoadingState.LoadingState.LOADED; + /** * Cache for AccordCommand and AccordCommandsForKey, available memory is shared between the two object types. * * Supports dynamic object sizes. After each acquire/free cycle, the cacheable objects size is recomputed to * account for data added/removed during txn processing if it's modified flag is set - * - * TODO: explain how items move to and from the active pool and are evicted */ public class AccordStateCache { private static final Logger logger = LoggerFactory.getLogger(AccordStateCache.class); - private static class WriteOnlyGroup> + public static class Node extends AccordLoadingState { - private boolean locked = false; - private List> items = new ArrayList<>(); + static final long EMPTY_SIZE = ObjectSizes.measure(new AccordStateCache.Node(null)); - @Override - public String toString() + private Node prev; + private Node next; + private int references = 0; + private long lastQueriedEstimatedSizeOnHeap = 0; + + public Node(K key) { - return "WriteOnlyGroup{" + - "locked=" + locked + - ", items=" + items + - '}'; + super(key); } - void lock() + public int referenceCount() { - locked = true; + return references; } - void add(AccordState.WriteOnly item) + boolean isLoaded() { - items.add(item); + return state() == LOADED; } - void purge() + public boolean isComplete() { - if (locked) - return; - - while (!items.isEmpty()) + switch (state()) { - AccordState.WriteOnly item = items.get(0); - - // we can't remove items out of order, so if we encounter a write is still pending, we stop - if (item.asyncResult() == null || !item.asyncResult().isDone()) - break; - - items.remove(0); + case PENDING: + case UNINITIALIZED: + return false; + case FAILED: + case LOADED: + return true; + default: throw new UnsupportedOperationException("Unknown state: " + state()); } } - boolean isEmpty() + private boolean isInQueue() { - return items.isEmpty(); + return prev != null && next != null; } - } - static class Node> - { - static final long EMPTY_SIZE = ObjectSizes.measure(new AccordStateCache.Node<>(null)); - - final V value; - private Node prev; - private Node next; - private int references = 0; - private long lastQueriedEstimatedSizeOnHeap = 0; + long estimatedSizeOnHeap(ToLongFunction estimator) + { + long result = EMPTY_SIZE; + V v; + if (isLoaded() && (v = value()) != null) + result += estimator.applyAsLong(v); + lastQueriedEstimatedSizeOnHeap = result; + return result; + } - Node(V value) + long estimatedSizeOnHeapDelta(ToLongFunction estimator) { - this.value = value; + long prevSize = lastQueriedEstimatedSizeOnHeap; + return estimatedSizeOnHeap(estimator) - prevSize; } - long estimatedSizeOnHeap() + boolean shouldUpdateSize() { - long result = EMPTY_SIZE + value.estimatedSizeOnHeap(); - lastQueriedEstimatedSizeOnHeap = result; - return result; + return isLoaded() && lastQueriedEstimatedSizeOnHeap == EMPTY_SIZE; } - long estimatedSizeOnHeapDelta() + void maybeCleanupLoad() { - long prevSize = lastQueriedEstimatedSizeOnHeap; - return estimatedSizeOnHeap() - prevSize; + state(); } - K key() + @Override + public String toString() { - return value.key(); + return "Node{" + state() + + ", key=" + key() + + ", references=" + references + + "}@" + Integer.toHexString(System.identityHashCode(this)); } } @@ -149,17 +146,12 @@ public NamedMap(String name) } } - public final Map> active = new HashMap<>(); private final Map> cache = new HashMap<>(); - private final Map> pendingWriteOnly = new HashMap<>(); - private final Set> instances = new HashSet<>(); + private final Set> instances = new HashSet<>(); - private final NamedMap> loadResults = new NamedMap<>("loadResults"); private final NamedMap> saveResults = new NamedMap<>("saveResults"); - private final NamedMap> readResults = new NamedMap<>("readResults"); - private final NamedMap> writeResults = new NamedMap<>("writeResults"); - + private int unreferenced = 0; Node head; Node tail; private long maxSizeInBytes; @@ -177,6 +169,25 @@ public void setMaxSize(long size) maybeEvict(); } + public long getMaxSize() + { + return maxSizeInBytes; + } + + @VisibleForTesting + public void clear() + { + head = tail = null; + cache.clear(); + saveResults.clear(); + } + + @VisibleForTesting + public Map> saveResults() + { + return saveResults; + } + private void unlink(Node node) { Node prev = node.prev; @@ -204,6 +215,7 @@ private void unlink(Node node) node.prev = null; node.next = null; + unreferenced--; } private void push(Node node) @@ -220,20 +232,20 @@ private void push(Node node) head = node; tail = node; } + unreferenced++; } - private void updateSize(Node node) + private void updateSize(Node node, ToLongFunction estimator) { - bytesCached += node.estimatedSizeOnHeapDelta(); + bytesCached += node.estimatedSizeOnHeapDelta(estimator); } // don't evict if there's an outstanding save result. If an item is evicted then reloaded // before it's mutation is applied, out of date info will be loaded - private boolean canEvict(Object key) + private boolean canEvict(Node node) { - // getResult only returns a result if it is running, so don't need to check if its still running - AsyncResult result = getAsyncResult(saveResults, key); - return result == null || result.isDone(); + Invariants.checkState(node.references == 0); + return node.state() == FAILED || !hasActiveAsyncResult(saveResults, node.key()); } private void maybeEvict() @@ -247,26 +259,36 @@ private void maybeEvict() Node evict = current; current = current.prev; - // if there are any dangling write only groups, apply them and - // move their results into write results so we don't evict - applyAndRemoveWriteOnlyGroup(evict.value); - if (!canEvict(evict.key())) + // TODO (expected, efficiency): can this be reworked so we're not skipping unevictable nodes everytime we try to evict? + if (!canEvict(evict)) continue; - logger.trace("Evicting {} {}", evict.value.getClass().getSimpleName(), evict.key()); - unlink(evict); - cache.remove(evict.key()); - bytesCached -= evict.estimatedSizeOnHeap(); + evict(evict, true); } } - private static > F getAsyncResult(NamedMap resultMap, K key) + private void evict(Node evict, boolean unlink) + { + logger.trace("Evicting {} {} - {}", evict.state(), evict.key(), evict.isLoaded() ? evict.value() : null); + if (unlink) + unlink(evict); + else + Invariants.checkState(!evict.isInQueue()); + + Node self = cache.get(evict.key()); + Invariants.checkState(self == evict, "Leaked node detected; was attempting to remove %s but cache had %s", evict, self); + cache.remove(evict.key()); + bytesCached -= evict.lastQueriedEstimatedSizeOnHeap; + } + + private static > F getAsyncResult(NamedMap resultMap, K key) { F r = resultMap.get(key); if (r == null) return null; - if (!r.isDone()) + // if the result was a failure, can not remove from the map as this would allow eviction + if (!r.isSuccess()) return r; if (logger.isTraceEnabled()) @@ -281,54 +303,53 @@ private static > void setAsyncResult(Map resultsMap.put(key, result); } + private static boolean hasActiveAsyncResult(NamedMap> resultMap, K key) + { + // getResult only returns a result if it is not complete, so don't need to check if its been completed + return getAsyncResult(resultMap, key) != null; + } + private static void mergeAsyncResult(Map> resultMap, K key, AsyncResult result) { AsyncResult existing = resultMap.get(key); if (existing != null && !existing.isDone()) { logger.trace("Merging result {} with existing {}", result, existing); - result = AsyncResults.reduce(ImmutableList.of(existing, result), (a, b) -> null).beginAsResult(); + result = AsyncChains.reduce(existing, result, (a, b) -> null).beginAsResult(); } resultMap.put(key, result); } - private void maybeClearAsyncResult(K key) + @VisibleForTesting + private void maybeCleanupLoad(K key) { - // will clear if it's done - getAsyncResult(loadResults, key); - getAsyncResult(saveResults, key); - getAsyncResult(readResults, key); - getAsyncResult(writeResults, key); + Node node = cache.get(key); + if (node != null) + node.maybeCleanupLoad(); } - public > void applyAndRemoveWriteOnlyGroup(V instance) + private void maybeClearAsyncResult(K key) { - WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.remove(instance.key()); - if (group == null) - return; - - logger.trace("Applying and removing write only group for {} ({})", instance.key(), group); - for (AccordState.WriteOnly writeOnly : group.items) - { - writeOnly.applyChanges(instance); - if (!writeOnly.asyncResult().isDone()) - mergeAsyncResult(saveResults, instance.key(), writeOnly.asyncResult()); - } + maybeCleanupLoad(key); + // will clear if it's done + getAsyncResult(saveResults, key); } - public class Instance> + public class Instance> { private final Class keyClass; private final Class valClass; - private final Function factory; + private final Function, S> safeRefFactory; + private final ToLongFunction heapEstimator; private final Stats stats = new Stats(); - public Instance(Class keyClass, Class valClass, Function factory) + public Instance(Class keyClass, Class valClass, Function, S> safeRefFactory, ToLongFunction heapEstimator) { this.keyClass = keyClass; this.valClass = valClass; - this.factory = factory; + this.safeRefFactory = safeRefFactory; + this.heapEstimator = heapEstimator; } @Override @@ -336,7 +357,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - Instance instance = (Instance) o; + Instance instance = (Instance) o; return keyClass.equals(instance.keyClass) && valClass.equals(instance.valClass); } @@ -346,174 +367,138 @@ public int hashCode() return Objects.hash(keyClass, valClass); } - private V getOrCreate(K key, boolean createIfAbsent) + private Node reference(K key, boolean createIfAbsent) { stats.queries++; AccordStateCache.this.stats.queries++; - Node node = (Node) active.get(key); - if (node != null) - { - stats.hits++; - AccordStateCache.this.stats.hits++; - node.references++; - return node.value; - } - - node = (Node) cache.remove(key); - + Node node = (Node) cache.get(key); if (node == null) { stats.misses++; AccordStateCache.this.stats.misses++; if (!createIfAbsent) return null; - V value = factory.apply(key); - node = new Node<>(value); - updateSize(node); + node = new Node<>(key); + // need to store ref right away, so eviction can not remove + node.references++; + cache.put(key, node); + updateSize(node, heapEstimator); + maybeEvict(); } else { + if (node.state() == FAILED) + { + if (node.references != 0) + { + //TODO concurrent access to a failed node + // the API does not return Node but instead what node points to, this is a problem in this case as + // releasing 42 would attempt to release the retry and not the failed that is trying to cleanup + throw new UnsupportedOperationException("Attempted to reference failed node " + node); + } + + evict(node, true); + return reference(key, createIfAbsent); + } stats.hits++; AccordStateCache.this.stats.hits++; - unlink(node); + if (node.references == 0) + unlink(node); + else + Invariants.checkState(!node.isInQueue()); + node.references++; } - Preconditions.checkState(node.references == 0); - maybeEvict(); - - node.references++; - active.put(key, node); - - return node.value; - } - - public V getOrCreate(K key) - { - return getOrCreate(key, true); + return node; } - public V getOrNull(K key) + public S reference(K key) { - return getOrCreate(key, false); + Node node = reference(key, true); + return safeRefFactory.apply(node); } - public void release(V value) + public S referenceAndGetIfLoaded(K key) { - K key = value.key(); - logger.trace("Releasing resources for {}: {}", key, value); - maybeClearAsyncResult(key); - Node node = (Node) active.get(key); - Preconditions.checkState(node != null && node.references > 0); - Preconditions.checkState(node.value == value); - if (--node.references == 0) - { - logger.trace("Moving {} from active pool to cache", key); - active.remove(key); - cache.put(key, node); - push(node); - } - - if (value.hasModifications()) - { - value.clearModifiedFlag(); - updateSize(node); - } - maybeEvict(); + Node node = reference(key, false); + if (node == null || !node.isLoaded()) + return null; + S safeRef = safeRefFactory.apply(node); + safeRef.preExecute(); + return safeRef; } @VisibleForTesting - boolean canEvict(K key) + public Node getUnsafe(K key) { - return AccordStateCache.this.canEvict(key); + return (Node) cache.get(key); } @VisibleForTesting - boolean writeOnlyGroupIsLocked(K key) + public boolean isReferenced(K key) { - WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); - return group != null && group.locked; + Node node = (Node) cache.get(key); + return node != null && node.references > 0; } @VisibleForTesting - int pendingWriteOnlyOperations(K key) + public boolean isLoaded(K key) { - WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); - return group != null ? group.items.size() : 0; + Node node = (Node) cache.get(key); + return node != null && node.isLoaded(); } - public void lockWriteOnlyGroupIfExists(K key) + public void release(S safeRef) { - WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.get(key); - if (group == null) - return; - - logger.trace("Locking write only group for {} ({})", key, group); - group.purge(); - if (!group.isEmpty()) - group.lock(); - } - - public void applyAndRemoveWriteOnlyGroup(V instance) - { - AccordStateCache.this.applyAndRemoveWriteOnlyGroup(instance); - } - - public void addWriteOnly(AccordState.WriteOnly writeOnly) - { - K key = writeOnly.key(); - Preconditions.checkArgument(writeOnly.asyncResult() != null); - WriteOnlyGroup group = (WriteOnlyGroup) pendingWriteOnly.computeIfAbsent(key, k -> new WriteOnlyGroup<>()); - - // if a load result exists for the key we're creating a write group for, we need to lock - // the group so the loading instance gets changes applied when it finishes loading - if (getLoadResult(key) != null) - group.lock(); - - group.add(writeOnly); - } - - public void purgeWriteOnly(K key) - { - WriteOnlyGroup items = pendingWriteOnly.get(key); - if (items == null) - return; - - items.purge(); - if (items.isEmpty()) - pendingWriteOnly.remove(key); - } + K key = safeRef.global().key(); + logger.trace("Releasing resources for {}: {}", key, safeRef); + maybeClearAsyncResult(key); + Node node = (Node) cache.get(key); + Invariants.checkState(node != null, "node is null for %s", key); + Invariants.checkState(node.references > 0, "references (%d) are zero for %s (%s)", node.references, key, node); - public boolean writeOnlyGroupExists(K key) - { - return pendingWriteOnly.get(key) != null; - } + Invariants.checkState(safeRef.global() == node); + if (node.isLoaded() && (safeRef.hasUpdate() || node.shouldUpdateSize())) + { + node.value(safeRef.current()); + updateSize(node, heapEstimator); + } - public int getWriteOnlyGroupSize(K key) - { - WriteOnlyGroup group = pendingWriteOnly.get(key); - return group != null ? group.items.size() : 0; - } + if (--node.references == 0) + { + if (node.state() == FAILED) + { + logger.trace("Found failed node {}, evicting", key); + evict(node, false); + } + else + { + logger.trace("Moving {} from active pool to cache", key); + Invariants.checkState(!node.isInQueue()); + push(node); + } + } - public AsyncResult getLoadResult(K key) - { - return getAsyncResult(loadResults, key); + maybeEvict(); } - public void cleanupLoadResult(K key) + @VisibleForTesting + public boolean canEvict(K key) { - getLoadResult(key); + return AccordStateCache.this.canEvict(cache.get(key)); } @VisibleForTesting public boolean hasLoadResult(K key) { - return loadResults.get(key) != null; + Node node = cache.get(key); + return node != null && !node.isLoaded(); } - public void setLoadResult(K key, AsyncResult result) + public void cleanupLoadResult(K key) { - setAsyncResult(loadResults, key, result); + maybeCleanupLoad(key); } public AsyncResult getSaveResult(K key) @@ -538,36 +523,6 @@ public boolean hasSaveResult(K key) return saveResults.get(key) != null; } - public AsyncResult getReadResult(K key) - { - return getAsyncResult(readResults, key); - } - - public void setReadResult(K key, AsyncResult result) - { - setAsyncResult(readResults, key, result); - } - - public void cleanupReadResult(K key) - { - getReadResult(key); - } - - public AsyncResult getWriteResult(K key) - { - return getAsyncResult(writeResults, key); - } - - public void setWriteResult(K key, AsyncResult result) - { - setAsyncResult(writeResults, key, result); - } - - public void cleanupWriteResult(K key) - { - getWriteResult(key); - } - public long cacheQueries() { return stats.queries; @@ -584,9 +539,11 @@ public long cacheMisses() } } - public > Instance instance(Class keyClass, Class valClass, Function factory) + public > Instance instance(Class keyClass, Class valClass, + Function, S> safeRefFactory, + ToLongFunction heapEstimator) { - Instance instance = new Instance<>(keyClass, valClass, factory); + Instance instance = new Instance<>(keyClass, valClass, safeRefFactory, heapEstimator); if (!instances.add(instance)) throw new IllegalArgumentException(String.format("Cache instances for types %s -> %s already exists", keyClass.getName(), valClass.getName())); @@ -594,13 +551,19 @@ public > Instance instance(Class keyClass, } @VisibleForTesting - int numActiveEntries() + int numReferencedEntries() + { + return cache.size() - unreferenced; + } + + @VisibleForTesting + int numUnreferencedEntries() { - return active.size(); + return unreferenced; } @VisibleForTesting - int numCachedEntries() + int totalNumEntries() { return cache.size(); } @@ -612,9 +575,10 @@ long bytesCached() } @VisibleForTesting - boolean keyIsActive(Object key) + boolean keyIsReferenced(Object key) { - return active.containsKey(key); + Node node = cache.get(key); + return node != null && node.references > 0; } @VisibleForTesting @@ -626,7 +590,7 @@ boolean keyIsCached(Object key) @VisibleForTesting int references(Object key) { - Node node = active.get(key); + Node node = cache.get(key); return node != null ? node.references : 0; } diff --git a/src/java/org/apache/cassandra/service/accord/ListenerProxy.java b/src/java/org/apache/cassandra/service/accord/ListenerProxy.java deleted file mode 100644 index ea7a74c0c658..000000000000 --- a/src/java/org/apache/cassandra/service/accord/ListenerProxy.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Objects; - -import com.google.common.collect.ImmutableList; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.local.Command; -import accord.local.CommandListener; -import accord.local.PreLoadContext; -import accord.local.SafeCommandStore; -import accord.primitives.Keys; -import accord.primitives.TxnId; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.db.marshal.ValueAccessor; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.async.AsyncContext; -import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import org.apache.cassandra.utils.ObjectSizes; - -public abstract class ListenerProxy implements CommandListener, Comparable -{ - private static final Logger logger = LoggerFactory.getLogger(ListenerProxy.class); - - public enum Kind { COMMAND, COMMANDS_FOR_KEY } - - public abstract Kind kind(); - public abstract ByteBuffer identifier(); - - private ListenerProxy() - { - } - - @Override - public int compareTo(ListenerProxy that) - { - return kind().compareTo(that.kind()); - } - - protected abstract long estimatedSizeOnHeap(); - - static class CommandListenerProxy extends ListenerProxy - { - private static final long EMPTY_SIZE = ObjectSizes.measure(new CommandListenerProxy(null)); - private final TxnId txnId; - - public CommandListenerProxy(TxnId txnId) - { - this.txnId = txnId; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - CommandListenerProxy that = (CommandListenerProxy) o; - return txnId.equals(that.txnId); - } - - @Override - public int hashCode() - { - return Objects.hash(txnId); - } - - @Override - public int compareTo(ListenerProxy that) - { - int cmp = super.compareTo(that); - if (cmp != 0) - return cmp; - - return this.txnId.compareTo(((CommandListenerProxy) that).txnId); - } - - @Override - public String toString() - { - return "CommandListenerProxy{" + - "txnId=" + txnId + - '}'; - } - - @Override - public PreLoadContext listenerPreLoadContext(TxnId caller) - { - throw new UnsupportedOperationException(); - } - - @Override - public Kind kind() - { - return Kind.COMMAND; - } - - @Override - public ByteBuffer identifier() - { - ByteBuffer bytes = ByteBuffer.allocate(1 + CommandSerializers.txnId.serializedSize()); - ByteBufferAccessor.instance.putByte(bytes, 0, (byte) kind().ordinal()); - CommandSerializers.txnId.serialize(txnId, bytes, ByteBufferAccessor.instance, 1); - return bytes; - } - - @Override - public void onChange(SafeCommandStore safeStore, Command c) - { - AccordCommand command = (AccordCommand) c; - SafeAccordCommandStore commandStore = (SafeAccordCommandStore) safeStore; - AsyncContext context = commandStore.context(); - PreLoadContext loadCtx = PreLoadContext.contextFor(ImmutableList.of(command.txnId(), txnId), Keys.EMPTY); - if (context.containsScopedItems(loadCtx)) - { - // TODO (soon): determine if this can break anything by not waiting for the current operation to denormalize it's data - // the summary loader may default to commands in context, in case it wouldn't - logger.trace("{}: synchronously updating listening command {}", c.txnId(), txnId); - commandStore.command(txnId).onChange(safeStore, c); - } - else - { - TxnId callingTxnId = command.txnId(); - logger.trace("{}: asynchronously updating listening command {}", c.txnId(), txnId); - commandStore.execute(loadCtx, reSafeStore -> { - Command caller = reSafeStore.command(callingTxnId); - commandStore.command(txnId).onChange(reSafeStore, caller); - }); - } - } - - @Override - protected long estimatedSizeOnHeap() - { - return EMPTY_SIZE + AccordObjectSizes.timestamp(txnId); - } - } - - /** - * These always need to be run in either the same task as the notifying command, or immediately afterwards, otherwise we - * may use stale max timestamps for preaccept - */ - static class CommandsForKeyListenerProxy extends ListenerProxy - { - private static final long EMPTY_SIZE = ObjectSizes.measure(new CommandsForKeyListenerProxy(null)); - private final PartitionKey key; - - public CommandsForKeyListenerProxy(PartitionKey key) - { - this.key = key; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - CommandsForKeyListenerProxy that = (CommandsForKeyListenerProxy) o; - return key.equals(that.key); - } - - @Override - public int hashCode() - { - return Objects.hash(key); - } - - @Override - public int compareTo(ListenerProxy that) - { - int cmp = super.compareTo(that); - if (cmp != 0) - return cmp; - - return this.key.compareTo(((CommandsForKeyListenerProxy) that).key); - } - - @Override - public String toString() - { - return "CommandsForKeyListenerProxy{" + - "key=" + key + - '}'; - } - - @Override - public PreLoadContext listenerPreLoadContext(TxnId caller) - { - throw new UnsupportedOperationException(); - } - - @Override - public Kind kind() - { - return Kind.COMMANDS_FOR_KEY; - } - - @Override - public ByteBuffer identifier() - { - ByteBuffer bytes = ByteBuffer.allocate((int) (1 + PartitionKey.serializer.serializedSize(key))); - ByteBufferAccessor.instance.putByte(bytes, 0, (byte) kind().ordinal()); - PartitionKey.serializer.serialize(key, bytes, ByteBufferAccessor.instance, 1); - return bytes; - } - - @Override - public void onChange(SafeCommandStore safeStore, Command c) - { - AccordCommand command = (AccordCommand) c; - SafeAccordCommandStore commandStore = (SafeAccordCommandStore) safeStore; - AsyncContext context = commandStore.context(); - PreLoadContext loadCtx = PreLoadContext.contextFor(ImmutableList.of(command.txnId()), Keys.of(key)); - if (context.containsScopedItems(loadCtx)) - { - logger.trace("{}: synchronously updating listening cfk {}", c.txnId(), key); - commandStore.commandsForKey(key).onChange(safeStore, c); - } - else - { - TxnId callingTxnId = command.txnId(); - logger.trace("{}: asynchronously updating listening cfk {}", c.txnId(), key); - commandStore.execute(loadCtx, reSafeStore -> { - Command caller = reSafeStore.command(callingTxnId); - commandStore.commandsForKey(key).onChange(reSafeStore, caller); - }); - } - } - - @Override - protected long estimatedSizeOnHeap() - { - return EMPTY_SIZE + key.estimatedSizeOnHeap(); - } - } - - public static ListenerProxy deserialize(V src, ValueAccessor accessor, int offset) throws IOException - { - int ordinal = accessor.getByte(src, offset); - Kind kind = Kind.values()[ordinal]; - offset += 1; - switch (kind) - { - case COMMAND: - TxnId txnId = CommandSerializers.txnId.deserialize(src, accessor, offset); - return new CommandListenerProxy(txnId); - case COMMANDS_FOR_KEY: - PartitionKey key = PartitionKey.serializer.deserialize(src, accessor, offset); - return new CommandsForKeyListenerProxy(key); - default: - throw new IOException("Unknown kind ordinal " + ordinal); - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 72aee3c92a13..232684565a79 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -24,6 +24,7 @@ import accord.local.Node; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import org.apache.cassandra.utils.JVMStabilityInspector; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; @@ -40,16 +41,20 @@ public void onRecover(Node node, Result success, Throwable fail) public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp next) { // TODO: this + AssertionError error = new AssertionError("Inconsistent execution timestamp detected for txnId " + command.txnId() + ": " + prev + " != " + next); + onUncaughtException(error); + throw error; } @Override public void onUncaughtException(Throwable t) { // TODO: this + JVMStabilityInspector.uncaughtException(Thread.currentThread(), t); } @Override - public void onHandledException(Throwable throwable) + public void onHandledException(Throwable t) { // TODO: this } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java b/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java deleted file mode 100644 index 4a307333a433..000000000000 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncContext.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.async; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.BiFunction; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; - -import accord.local.PreLoadContext; -import accord.primitives.TxnId; -import org.apache.cassandra.service.accord.AccordCommand; -import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; -import org.apache.cassandra.service.accord.AccordPartialCommand; -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.AccordState.WriteOnly; -import org.apache.cassandra.service.accord.api.PartitionKey; - -public class AsyncContext -{ - public static class Group> - { - final Map items = new HashMap<>(); - final Map> writeOnly = new HashMap<>(); - - @VisibleForTesting - public void add(V item) - { - items.put(item.key(), item); - } - - public V get(K key) - { - return items.get(key); - } - - void releaseResources(AccordStateCache.Instance cache) - { - items.values().forEach(cache::release); - items.clear(); - writeOnly.clear(); - } - - public WriteOnly getOrCreateWriteOnly(K key, BiFunction> factory, AccordCommandStore commandStore) - { - Preconditions.checkState(!items.containsKey(key)); - WriteOnly command = writeOnly.get(key); - if (command == null) - { - command = factory.apply(commandStore, key); - writeOnly.put(key, command); - } - return command; - } - } - - public static class CommandGroup extends Group - { - List partials = new ArrayList<>(); - - public void addPartialCommand(AccordPartialCommand partial) - { - partials.add(partial); - } - - @Override - void releaseResources(AccordStateCache.Instance cache) - { - super.releaseResources(cache); - partials.clear(); - } - } - - public final CommandGroup commands = new CommandGroup(); - public final Group commandsForKey = new Group<>(); - - public boolean containsScopedItems(PreLoadContext loadContext) - { - return Iterables.all(loadContext.txnIds(), commands.items::containsKey) && Iterables.all(loadContext.keys(), commandsForKey.items::containsKey); - } - - void verifyLoaded() - { - commands.items.forEach((key, command) -> Preconditions.checkState(command.isLoaded())); - commandsForKey.items.forEach((key, cfk) -> Preconditions.checkState(cfk.isLoaded())); - } - - void releaseResources(AccordCommandStore commandStore) - { - commands.releaseResources(commandStore.commandCache()); - commandsForKey.releaseResources(commandStore.commandsForKeyCache()); - } -} diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 75c2d356a5ce..34ba0936c678 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -25,26 +25,27 @@ import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.impl.CommandsForKey; +import accord.local.Command; +import accord.primitives.RoutableKey; import accord.primitives.TxnId; +import accord.utils.Invariants; import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.service.accord.AccordCommand; import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordLoadingState; +import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.AccordState; import org.apache.cassandra.service.accord.api.PartitionKey; -import static accord.utils.async.AsyncResults.ofRunnable; - public class AsyncLoader { private static final Logger logger = LoggerFactory.getLogger(AsyncLoader.class); @@ -60,130 +61,91 @@ enum State private final AccordCommandStore commandStore; private final Iterable txnIds; - private final Iterable keys; + private final Iterable keys; protected AsyncResult readResult; - public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) { this.commandStore = commandStore; this.txnIds = txnIds; this.keys = keys; } - private > AsyncResult referenceAndDispatch(K key, - AccordStateCache.Instance cache, - Map context, - Function> readFunction, - Object callback) - { - V item; - AsyncResult result = cache.getLoadResult(key); - if (result != null) - { - // if a load result exists for this, it must be present in the cache - item = cache.getOrNull(key); - Preconditions.checkState(item != null); - context.put(key, item); - if (logger.isTraceEnabled()) - logger.trace("Existing load result found for {} while loading for {}. ({})", item.key(), callback, item); - return result; - } - - item = cache.getOrCreate(key); - context.put(key, item); - if (item.isLoaded()) - { - if (logger.isTraceEnabled()) - logger.trace("Cached item found for {} while loading for {}. ({})", item.key(), callback, item); - return null; - } - - result = readFunction.apply(item); - cache.setLoadResult(item.key(), result); - if (logger.isTraceEnabled()) - logger.trace("Loading new item for {} while loading for {}. ({})", item.key(), callback, item); - return result; - } - - - private > List> referenceAndDispatchReads(Iterable keys, - AccordStateCache.Instance cache, - Map context, - Function> readFunction, - List> results, - Object callback) + private > void referenceAndAssembleReads(Iterable keys, + Map context, + AccordStateCache.Instance cache, + Function loadFunction, + List loadRunnables, + List> listenChains) { for (K key : keys) { - AsyncResult result = referenceAndDispatch(key, cache, context, readFunction, callback); - if (result == null) - continue; - - if (results == null) - results = new ArrayList<>(); - - results.add(result); + S safeRef = cache.reference(key); + context.put(key, safeRef); + AccordLoadingState.LoadingState state = safeRef.loadingState(); + switch (state) + { + case UNINITIALIZED: + AsyncResults.RunnableResult load = safeRef.load(loadFunction); + listenChains.add(load); + loadRunnables.add(load); + break; + case PENDING: + listenChains.add(safeRef.listen()); + break; + case LOADED: + break; + case FAILED: + throw new RuntimeException(safeRef.failure()); + default: + throw new IllegalStateException("Unhandled loading state: " + state); + } } - - return results; } @VisibleForTesting - Function> loadCommandFunction(Object callback) + Function loadCommandFunction() { - return command -> ofRunnable(Stage.READ.executor(), () -> { - try - { - logger.trace("Starting load of {} for {}", command.txnId(), callback); - AccordKeyspace.loadCommand(commandStore, command); - logger.trace("Completed load of {} for {}", command.txnId(), callback); - } - catch (Throwable t) - { - logger.error("Exception loading {} for {}", command.txnId(), callback, t); - throw t; - } - }); + return txnId -> AccordKeyspace.loadCommand(commandStore, txnId); } @VisibleForTesting - Function> loadCommandsPerKeyFunction(Object callback) + Function loadCommandsPerKeyFunction() { - return cfk -> ofRunnable(Stage.READ.executor(), () -> { - try - { - logger.trace("Starting load of {} for {}", cfk.key(), callback); - AccordKeyspace.loadCommandsForKey(cfk); - logger.trace("Completed load of {} for {}", cfk.key(), callback); - } - catch (Throwable t) - { - logger.error("Exception loading {} for {}", cfk.key(), callback, t); - throw t; - } - }); + return key -> AccordKeyspace.loadCommandsForKey(commandStore, (PartitionKey) key); } - private AsyncResult referenceAndDispatchReads(AsyncContext context, Object callback) + private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) { - List> results = null; - - results = referenceAndDispatchReads(txnIds, - commandStore.commandCache(), - context.commands.items, - loadCommandFunction(callback), - results, - callback); - - results = referenceAndDispatchReads(keys, - commandStore.commandsForKeyCache(), - context.commandsForKey.items, - loadCommandsPerKeyFunction(callback), - results, - callback); - - return results != null ? AsyncResults.reduce(results, (a, b ) -> null).beginAsResult() : null; + List readRunnables = new ArrayList<>(); + List> chains = new ArrayList<>(); + + referenceAndAssembleReads(txnIds, + context.commands, + commandStore.commandCache(), + loadCommandFunction(), + readRunnables, + chains); + + referenceAndAssembleReads(keys, + context.commandsForKeys, + commandStore.commandsForKeyCache(), + loadCommandsPerKeyFunction(), + readRunnables, + chains); + + if (chains.isEmpty()) + { + Invariants.checkState(readRunnables.isEmpty()); + return null; + } + + // runnable results are already contained in the chains collection + if (!readRunnables.isEmpty()) + AsyncChains.ofRunnables(Stage.READ.executor(), readRunnables).begin(commandStore.agent()); + + return !chains.isEmpty() ? AsyncChains.reduce(chains, (a, b) -> null).beginAsResult() : null; } @VisibleForTesting @@ -192,7 +154,7 @@ void state(State state) this.state = state; } - public boolean load(AsyncContext context, BiConsumer callback) + public boolean load(AsyncOperation.Context context, BiConsumer callback) { logger.trace("Running load for {} with state {}: {} {}", callback, state, txnIds, keys); commandStore.checkInStoreThread(); @@ -201,10 +163,7 @@ public boolean load(AsyncContext context, BiConsumer callback case INITIALIZED: state(State.SETUP); case SETUP: - // notify any pending write only groups we're loading a full instance so the pending changes aren't removed - txnIds.forEach(commandStore.commandCache()::lockWriteOnlyGroupIfExists); - keys.forEach(commandStore.commandsForKeyCache()::lockWriteOnlyGroupIfExists); - readResult = referenceAndDispatchReads(context, callback); + readResult = referenceAndDispatchReads(context); state(State.LOADING); case LOADING: if (readResult != null) @@ -212,7 +171,6 @@ public boolean load(AsyncContext context, BiConsumer callback if (readResult.isSuccess()) { logger.trace("Read result succeeded for {}", callback); - context.verifyLoaded(); readResult = null; } else @@ -222,13 +180,6 @@ public boolean load(AsyncContext context, BiConsumer callback break; } } - // apply any pending write only changes that may not have made it to disk in time to be loaded - context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupLoadResult); - context.commands.items.values().forEach(commandStore.commandCache()::applyAndRemoveWriteOnlyGroup); - context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupLoadResult); - context.commandsForKey.items.values().forEach(commandStore.commandsForKeyCache()::applyAndRemoveWriteOnlyGroup); - // apply blindly reported timestamps - context.commandsForKey.items.values().forEach(AccordCommandsForKey::applyBlindWitnessedTimestamps); state(State.FINISHED); case FINISHED: break; diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index b4b490fcdd13..db659d623f83 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -18,12 +18,12 @@ package org.apache.cassandra.service.accord.async; +import java.util.HashMap; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -31,12 +31,16 @@ import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; +import accord.primitives.RoutableKey; import accord.primitives.Seekables; import accord.primitives.TxnId; +import accord.utils.Invariants; import accord.utils.async.AsyncChains; import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.AccordSafeCommand; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordSafeState; public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function { @@ -48,29 +52,44 @@ private static class LoggingProps private static final String ASYNC_OPERATION = "async_op"; } + static class Context + { + final HashMap commands = new HashMap<>(); + final HashMap commandsForKeys = new HashMap<>(); + + void releaseResources(AccordCommandStore commandStore) + { + commands.values().forEach(commandStore.commandCache()::release); + commandsForKeys.values().forEach(commandStore.commandsForKeyCache()::release); + } + + void revertChanges() + { + commands.values().forEach(AccordSafeState::revert); + commandsForKeys.values().forEach(AccordSafeState::revert); + } + } + enum State { INITIALIZED, - SUBMITTED, LOADING, + PREPARING_OPERATION, // setup safe store for RUNNING RUNNING, - SAVING, - AWAITING_SAVE, + SAVING, // submits write to mutation stage + AWAITING_SAVE, // wait for writes to complete COMPLETING, FINISHED, FAILED } - public interface Context - { - - } - private State state = State.INITIALIZED; private final AccordCommandStore commandStore; + private final PreLoadContext preLoadContext; + private final Context context = new Context(); + private AccordSafeCommandStore safeStore; private final AsyncLoader loader; private final AsyncWriter writer; - private final AsyncContext context = new AsyncContext(); private R result; private final String loggingId; private BiConsumer callback; @@ -87,11 +106,12 @@ private void clearLoggingIds() MDC.remove(LoggingProps.ASYNC_OPERATION); } - public AsyncOperation(AccordCommandStore commandStore, Iterable commandsToLoad, Iterable keyCommandsToLoad) + public AsyncOperation(AccordCommandStore commandStore, PreLoadContext preLoadContext) { this.loggingId = "0x" + Integer.toHexString(System.identityHashCode(this)); this.commandStore = commandStore; - this.loader = createAsyncLoader(commandStore, commandsToLoad, keyCommandsToLoad); + this.preLoadContext = preLoadContext; + this.loader = createAsyncLoader(commandStore, preLoadContext); setLoggingIds(); this.writer = createAsyncWriter(commandStore); logger.trace("Created {} on {}", this, commandStore); @@ -109,9 +129,9 @@ AsyncWriter createAsyncWriter(AccordCommandStore commandStore) return new AsyncWriter(commandStore); } - AsyncLoader createAsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds, keys); + return new AsyncLoader(commandStore, preLoadContext.txnIds(), toRoutableKeys(preLoadContext.keys())); } @VisibleForTesting @@ -131,30 +151,67 @@ private void callback(Object o, Throwable throwable) if (throwable != null) { logger.error(String.format("Operation %s failed", this), throwable); - state = State.FAILED; fail(throwable); } else run(); } + private void finish(R result, Throwable failure) + { + try + { + if (callback != null) + callback.accept(result, failure); + } + finally + { + state = failure == null ? State.FINISHED : State.FAILED; + } + } + private void finish(R result) { - Preconditions.checkArgument(state == State.COMPLETING); - callback.accept(result, null); - state = State.FINISHED; + Invariants.checkArgument(state == State.COMPLETING, "Unexpected state %s", state); + finish(result, null); } private void fail(Throwable throwable) { - Preconditions.checkArgument(state != State.FINISHED && state != State.FAILED); - callback.accept(null, throwable); - state = State.FAILED; + Invariants.nonNull(throwable); + Invariants.checkArgument(state != State.FINISHED && state != State.FAILED, "Unexpected state %s", state); + try + { + switch (state) + { + case INITIALIZED: + case COMPLETING: + // nothing to cleanup, call callback + break; + case RUNNING: + context.revertChanges(); + case PREPARING_OPERATION: + commandStore.abortCurrentOperation(); + case LOADING: + context.releaseResources(commandStore); + break; + case SAVING: + case AWAITING_SAVE: + // TODO: revert changs + // TODO: panic? + break; + } + } + catch (Throwable cleanup) + { + commandStore.agent().onUncaughtException(cleanup); + throwable.addSuppressed(cleanup); + } + finish(null, throwable); } protected void runInternal() { - SafeAccordCommandStore safeStore = commandStore.safeStore(context); switch (state) { case INITIALIZED: @@ -163,19 +220,23 @@ protected void runInternal() if (!loader.load(context, this::callback)) return; + state = State.PREPARING_OPERATION; + safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.commandsForKeys); state = State.RUNNING; result = apply(safeStore); + safeStore.postExecute(context.commands, context.commandsForKeys); state = State.SAVING; case SAVING: case AWAITING_SAVE: boolean updatesPersisted = writer.save(context, this::callback); - if (state != State.AWAITING_SAVE) + if (state == State.SAVING) { + context.releaseResources(commandStore); + commandStore.completeOperation(safeStore, context.commands, context.commandsForKeys); // with any updates on the way to disk, release resources so operations waiting // to use these objects don't have issues with fields marked as unsaved - context.releaseResources(commandStore); state = State.AWAITING_SAVE; } @@ -185,9 +246,10 @@ protected void runInternal() state = State.COMPLETING; finish(result); case FINISHED: + case FAILED: break; default: - throw new IllegalStateException(); + throw new IllegalStateException("Unexpected state " + state); } } @@ -200,7 +262,7 @@ public void run() try { commandStore.checkInStoreThread(); - commandStore.setContext(context); + commandStore.setCurrentOperation(this); try { runInternal(); @@ -212,7 +274,7 @@ public void run() } finally { - commandStore.unsetContext(context); + commandStore.unsetCurrentOperation(this); } } finally @@ -223,20 +285,20 @@ public void run() } @Override - public void begin(BiConsumer callback) + public void start(BiConsumer callback) { - Preconditions.checkArgument(this.callback == null); + Invariants.checkArgument(this.callback == null); this.callback = callback; commandStore.executor().submit(this); } - private static Iterable toPartitionKeys(Seekables keys) + private static Iterable toRoutableKeys(Seekables keys) { switch (keys.domain()) { - default: throw new AssertionError(); + default: throw new AssertionError("Unexpected domain: " + keys.domain()); case Key: - return (Iterable) keys; + return (Iterable) keys; case Range: // TODO (required): implement throw new UnsupportedOperationException(); @@ -247,9 +309,9 @@ static class ForFunction extends AsyncOperation { private final Function function; - public ForFunction(AccordCommandStore commandStore, Iterable txnIds, Iterable keys, Function function) + public ForFunction(AccordCommandStore commandStore, PreLoadContext loadCtx, Function function) { - super(commandStore, txnIds, keys); + super(commandStore, loadCtx); this.function = function; } @@ -262,16 +324,16 @@ public R apply(SafeCommandStore commandStore) public static AsyncOperation create(CommandStore commandStore, PreLoadContext loadCtx, Function function) { - return new ForFunction<>((AccordCommandStore) commandStore, loadCtx.txnIds(), AsyncOperation.toPartitionKeys(loadCtx.keys()), function); + return new ForFunction<>((AccordCommandStore) commandStore, loadCtx, function); } static class ForConsumer extends AsyncOperation { private final Consumer consumer; - public ForConsumer(AccordCommandStore commandStore, Iterable txnIds, Iterable keys, Consumer consumer) + public ForConsumer(AccordCommandStore commandStore, PreLoadContext loadCtx, Consumer consumer) { - super(commandStore, txnIds, keys); + super(commandStore, loadCtx); this.consumer = consumer; } @@ -285,6 +347,6 @@ public Void apply(SafeCommandStore commandStore) public static AsyncOperation create(CommandStore commandStore, PreLoadContext loadCtx, Consumer consumer) { - return new ForConsumer((AccordCommandStore) commandStore, loadCtx.txnIds(), AsyncOperation.toPartitionKeys(loadCtx.keys()), consumer); + return new ForConsumer((AccordCommandStore) commandStore, loadCtx, consumer); } } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java index fb7f38687c26..177d17fa27cc 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java @@ -21,38 +21,28 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.function.BiConsumer; -import java.util.function.BiFunction; -import java.util.function.Consumer; -import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.primitives.Seekable; -import accord.primitives.Timestamp; +import accord.impl.CommandsForKey; +import accord.local.Command; +import accord.primitives.RoutableKey; import accord.primitives.TxnId; -import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.db.Mutation; -import org.apache.cassandra.service.accord.AccordCommand; import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordPartialCommand; +import org.apache.cassandra.service.accord.AccordSafeCommand; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.store.StoredSet; - -import static accord.utils.async.AsyncResults.ofRunnable; - -import static accord.primitives.Routable.Domain.Range; public class AsyncWriter { @@ -69,8 +59,8 @@ enum State private State state = State.INITIALIZED; protected AsyncResult writeResult; private final AccordCommandStore commandStore; - final AccordStateCache.Instance commandCache; - final AccordStateCache.Instance cfkCache; + final AccordStateCache.Instance commandCache; + final AccordStateCache.Instance cfkCache; public AsyncWriter(AccordCommandStore commandStore) { @@ -79,213 +69,70 @@ public AsyncWriter(AccordCommandStore commandStore) this.cfkCache = commandStore.commandsForKeyCache(); } - private interface StateMutationFunction> + public interface StateMutationFunction> { - Mutation apply(AccordCommandStore commandStore, V state, long timestamp); + Mutation apply(AccordCommandStore commandStore, V value, long timestamp); } - private static > List> dispatchWrites(AsyncContext.Group ctxGroup, - AccordStateCache.Instance cache, - StateMutationFunction mutationFunction, - long timestamp, - AccordCommandStore commandStore, - List> results, - Object callback) + private static > void assembleWrites(Map context, + AccordStateCache.Instance cache, + StateMutationFunction mutationFunction, + long timestamp, + AccordCommandStore commandStore, + List> chains) { - for (V item : ctxGroup.items.values()) - { - if (!item.hasModifications()) - { - if (logger.isTraceEnabled()) - logger.trace("No modifications for {} for {}, {}", item.key(), callback, item); - continue; - } - - if (results == null) - results = new ArrayList<>(); - K key = item.key(); - Mutation mutation = mutationFunction.apply(commandStore, item, timestamp); + context.forEach((key, value) -> { + if (!value.hasUpdate()) + return; + Mutation mutation = mutationFunction.apply(commandStore, value, timestamp); + if (mutation == null) + return; if (logger.isTraceEnabled()) - logger.trace("Dispatching mutation for {} for {}, {} -> {}", key, callback, item, mutation); - AsyncResult result = ofRunnable(Stage.MUTATION.executor(), () -> { - try - { - if (logger.isTraceEnabled()) - logger.trace("Applying mutation for {} for {}: {}", key, callback, mutation); - mutation.apply(); - if (logger.isTraceEnabled()) - logger.trace("Completed applying mutation for {} for {}: {}", key, callback, mutation); - } - catch (Throwable t) - { - logger.error(String.format("Exception applying mutation for %s for %s: %s", key, callback, mutation), t); - throw t; - } - }); - cache.addSaveResult(item.key(), result); - results.add(result); - } - - for (AccordState.WriteOnly item : ctxGroup.writeOnly.values()) - { - Preconditions.checkState(item.hasModifications()); - if (results == null) results = new ArrayList<>(); - Mutation mutation = mutationFunction.apply(commandStore, (V) item, timestamp); - AsyncResult result = AsyncResults.ofRunnable(Stage.MUTATION.executor(), mutation::apply); - result.addListener(() -> cache.purgeWriteOnly(item.key()), commandStore.executor()); - item.asyncResult(result); - results.add(result); - } - - return results; - } - - private AsyncResult maybeDispatchWrites(AsyncContext context, Object callback) throws IOException - { - List> results = null; - - long timestamp = commandStore.nextSystemTimestampMicros(); - results = dispatchWrites(context.commands, - commandStore.commandCache(), - AccordKeyspace::getCommandMutation, - timestamp, - commandStore, - results, - callback); - - results = dispatchWrites(context.commandsForKey, - commandStore.commandsForKeyCache(), - AccordKeyspace::getCommandsForKeyMutation, - timestamp, - commandStore, - results, - callback); - - return results != null ? AsyncResults.reduce(results, (a, b) -> null).beginAsResult() : null; - } - - private void denormalizeBlockedOn(AccordCommand command, - AsyncContext context, - Function> waitingField, - Function> blockingField) - { - StoredSet.Changes waitingOn = waitingField.apply(command); - waitingOn.forEachDeletion(deletedId -> { - AccordCommand blockedOn = commandForDenormalization(deletedId, context); - blockingField.apply(blockedOn).blindRemove(command.txnId()); - }); - - waitingOn.forEachAddition(addedId -> { - AccordCommand blockedOn = commandForDenormalization(addedId, context); - blockingField.apply(blockedOn).blindAdd(command.txnId()); - }); - } - - private void denormalizeWaitingOnSummaries(AccordCommand command, - AsyncContext context, - Function> waitingField, - Function> blockingField) - { - blockingField.apply(command).getView().forEach(blockingId -> { - AccordCommand blocking = commandForDenormalization(blockingId, context); - waitingField.apply(blocking).accept(command.txnId(), command.executeAt()); + logger.trace("Dispatching mutation for {}, {} -> {}", key, value.current(), mutation); + AsyncResults.RunnableResult result = AsyncResults.runnableResult(() -> mutation.apply()); + cache.addSaveResult(key, result); + chains.add(result); }); } - private static > - AccordState getForDenormalization(K key, - AccordCommandStore commandStore, - AsyncContext.Group ctxGroup, - AccordStateCache.Instance cache, - BiFunction> factory) + protected StateMutationFunction writeCommandFunction() { - V item = ctxGroup.get(key); - if (item != null) - return item; - - item = cache.getOrNull(key); - if (item != null && !cache.hasLoadResult(key)) - { - ctxGroup.items.put(key, item); - return item; - } - - return ctxGroup.getOrCreateWriteOnly(key, factory, commandStore); - } - - private AccordCommand commandForDenormalization(TxnId txnId, AsyncContext context) - { - return (AccordCommand) getForDenormalization(txnId, commandStore, context.commands, commandCache, (ignore, id) -> new AccordCommand.WriteOnly(id)); + return AccordKeyspace::getCommandMutation; } - private AccordCommandsForKey cfkForDenormalization(PartitionKey key, AsyncContext context) + protected StateMutationFunction writeCommandForKeysFunction() { - return (AccordCommandsForKey) getForDenormalization(key, commandStore, context.commandsForKey, cfkCache, AccordCommandsForKey.WriteOnly::new); + return AccordKeyspace::getCommandsForKeyMutation; } - private void denormalize(AccordCommand command, AsyncContext context, Object callback) + private AsyncResult maybeDispatchWrites(AsyncOperation.Context context) throws IOException { - if (!command.hasModifications()) - return; - - // notify commands we're waiting on that they need to update the summaries in our maps - if (command.waitingOnCommit.hasModifications()) - { - denormalizeBlockedOn(command, context, cmd -> cmd.waitingOnCommit, cmd -> cmd.blockingCommitOn); - } - if (command.waitingOnApply.hasModifications()) - { - denormalizeBlockedOn(command, context, cmd -> new StoredSet.Changes() - { - @Override - public void forEachAddition(Consumer consumer) - { - cmd.waitingOnApply.forEachAddition((ignore, txnId) -> consumer.accept(txnId)); - } - - @Override - public void forEachDeletion(Consumer consumer) - { - cmd.waitingOnApply.forEachDeletion((ignore, txnId) -> consumer.accept(txnId)); - - } - }, cmd -> cmd.blockingApplyOn); - } - - if (command.shouldUpdateDenormalizedWaitingOn()) - { - denormalizeWaitingOnSummaries(command, context, cmd -> (txnId, ignore) -> cmd.waitingOnCommit.blindAdd(txnId), cmd -> cmd.blockingCommitOn); - denormalizeWaitingOnSummaries(command, context, cmd -> (txnId, executeAt) -> cmd.waitingOnApply.blindPut(executeAt, txnId), cmd -> cmd.blockingApplyOn); - } - - // There won't be a txn to denormalize against until the command has been preaccepted - // TODO (now): this maybe insufficient for correctness? on Accept we use the explicitly provided keys to register - // the transaction here. It's possible a sequence of two Accept, with second taking a higher timestamp - // might not reflect the update timestamp in the map? Probably best addressed following Blake's refactor. - if (command.known().isDefinitionKnown() && AccordPartialCommand.serializer.needsUpdate(command)) - { - for (Seekable key : command.partialTxn().keys()) - { - // TODO: implement - if (key.domain() == Range) - throw new UnsupportedOperationException(); - PartitionKey partitionKey = (PartitionKey) key; - AccordCommandsForKey cfk = cfkForDenormalization(partitionKey, context); - cfk.updateSummaries(command); - } - } + if (context.commands.isEmpty() && context.commandsForKeys.isEmpty()) + return null; - if (logger.isTraceEnabled()) - { - context.commands.items.forEach((txnId, cmd) -> logger.trace("Denormalized command {} for {}: {}", txnId, callback, cmd)); - context.commandsForKey.items.forEach((key, cfk) -> logger.trace("Denormalized cfk {} for {}: {}", key, callback, cfk)); - } - } + List> writes = new ArrayList<>(context.commands.size() + context.commandsForKeys.size()); - private void denormalize(AsyncContext context, Object callback) - { - // need to clone "values" as denormalize will mutate it - new ArrayList<>(context.commands.items.values()).forEach(command -> denormalize(command, context, callback)); + long timestamp = commandStore.nextSystemTimestampMicros(); + assembleWrites(context.commands, + commandStore.commandCache(), + writeCommandFunction(), + timestamp, + commandStore, + writes); + + assembleWrites(context.commandsForKeys, + commandStore.commandsForKeyCache(), + writeCommandForKeysFunction(), + timestamp, + commandStore, + writes); + + if (writes.isEmpty()) + return null; + + AsyncChains.ofRunnables(Stage.MUTATION.executor(), writes).begin(commandStore.agent()); + + return AsyncChains.reduce(writes, (a, b) -> null).beginAsResult(); } @VisibleForTesting @@ -294,7 +141,7 @@ void setState(State state) this.state = state; } - public boolean save(AsyncContext context, BiConsumer callback) + public boolean save(AsyncOperation.Context context, BiConsumer callback) { logger.trace("Running save for {} with state {}", callback, state); commandStore.checkInStoreThread(); @@ -305,8 +152,7 @@ public boolean save(AsyncContext context, BiConsumer callback case INITIALIZED: setState(State.SETUP); case SETUP: - denormalize(context, callback); - writeResult = maybeDispatchWrites(context, callback); + writeResult = maybeDispatchWrites(context); setState(State.SAVING); case SAVING: @@ -316,8 +162,8 @@ public boolean save(AsyncContext context, BiConsumer callback writeResult.addCallback(callback, commandStore.executor()); break; } - context.commands.items.keySet().forEach(commandStore.commandCache()::cleanupSaveResult); - context.commandsForKey.items.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveResult); + context.commands.keySet().forEach(commandStore.commandCache()::cleanupSaveResult); + context.commandsForKeys.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveResult); setState(State.FINISHED); case FINISHED: break; diff --git a/src/java/org/apache/cassandra/service/accord/exceptions/ReadPreemptedException.java b/src/java/org/apache/cassandra/service/accord/exceptions/ReadPreemptedException.java new file mode 100644 index 000000000000..c67256c157de --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/exceptions/ReadPreemptedException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.exceptions; + +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.exceptions.ReadTimeoutException; + +// shim to allow tests to tell the difference between preemption and other protocol timeouts +public class ReadPreemptedException extends ReadTimeoutException +{ + public ReadPreemptedException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent) + { + super(consistency, received, blockFor, dataPresent); + } + + public ReadPreemptedException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, String msg) + { + super(consistency, received, blockFor, dataPresent, msg); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/exceptions/WritePreemptedException.java b/src/java/org/apache/cassandra/service/accord/exceptions/WritePreemptedException.java new file mode 100644 index 000000000000..f2f28ef67df6 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/exceptions/WritePreemptedException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.exceptions; + +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.exceptions.WriteTimeoutException; + +// quick hack to allow tests to tell the difference between preemption and other protocol timeouts +public class WritePreemptedException extends WriteTimeoutException +{ + public WritePreemptedException(WriteType writeType, ConsistencyLevel consistency, int received, int blockFor) + { + super(writeType, consistency, received, blockFor); + } + + public WritePreemptedException(WriteType writeType, ConsistencyLevel consistency, int received, int blockFor, String msg) + { + super(writeType, consistency, received, blockFor, msg); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java index 609e7ee080d0..8377991354dc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -29,7 +29,6 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import static accord.local.Command.AcceptOutcome.RejectedBallot; import static accord.messages.Accept.SerializerSupport.create; public class AcceptSerializers diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 801c7d332b3d..8a1b22b6a339 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -37,6 +37,7 @@ public class ApplySerializers public void serializeBody(Apply apply, DataOutputPlus out, int version) throws IOException { out.writeUnsignedVInt(apply.untilEpoch); + KeySerializers.seekables.serialize(apply.keys(), out, version); CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); CommandSerializers.writes.serialize(apply.writes, out, version); @@ -47,6 +48,7 @@ public void serializeBody(Apply apply, DataOutputPlus out, int version) throws I public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, in.readUnsignedVInt(), + KeySerializers.seekables.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), @@ -57,6 +59,7 @@ public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, Partial public long serializedBodySize(Apply apply, int version) { return TypeSizes.sizeofUnsignedVInt(apply.untilEpoch) + + KeySerializers.seekables.serializedSize(apply.keys(), version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + CommandSerializers.writes.serializedSize(apply.writes, version) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java new file mode 100644 index 000000000000..f65b1fb43788 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; + +import accord.impl.CommandsForKey.CommandLoader; +import accord.local.Command; +import accord.local.SaveStatus; +import accord.primitives.PartialDeps; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.LocalVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.AccordSerializerVersion; + +public class CommandsForKeySerializer +{ + @VisibleForTesting + public static final IVersionedSerializer> depsIdSerializer = new IVersionedSerializer>() + { + @Override + public void serialize(List ids, DataOutputPlus out, int version) throws IOException + { + out.writeInt(ids.size()); + for (int i=0,mi=ids.size(); i deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readInt(); + List ids = new ArrayList<>(size); + for (int i=0; i ids, int version) + { + long size = TypeSizes.INT_SIZE; + for (int i=0,mi=ids.size(); i> depsIdsLocalSerializer = new LocalVersionedSerializer<>(AccordSerializerVersion.CURRENT, AccordSerializerVersion.serializer, depsIdSerializer); + + public static final CommandLoader loader = new AccordCFKLoader(); + private static class AccordCFKLoader implements CommandLoader + { + private static final int HAS_DEPS = 0x01; + private static final int HAS_EXECUTE_AT = 0x02; + + private static final long FIXED_SIZE; + private static final int FLAG_OFFSET; + private static final int STATUS_OFFSET; + private static final int TXNID_OFFSET; + private static final int EXECUTEAT_OFFSET; + private static final int DEPS_OFFSET; + + static + { + long size = 0; + + FLAG_OFFSET = (int) size; + size += TypeSizes.BYTE_SIZE; + + STATUS_OFFSET = (int) size; + size += TypeSizes.BYTE_SIZE; + + TXNID_OFFSET = (int) size; + size += CommandSerializers.txnId.serializedSize(); + + FIXED_SIZE = size; + + EXECUTEAT_OFFSET = (int) size; + size += CommandSerializers.timestamp.serializedSize(); + + DEPS_OFFSET = (int) size; + } + + private int serializedSize(Command command) + { + return (int) (FIXED_SIZE + + (command.executeAt() != null ? CommandSerializers.timestamp.serializedSize() : 0) + + (command.partialDeps() != null ? depsIdsLocalSerializer.serializedSize(command.partialDeps().txnIds()) : 0)); + } + + private static final ValueAccessor accessor = ByteBufferAccessor.instance; + + private static byte toByte(int v) + { + Invariants.checkArgument(v < Byte.MAX_VALUE, "Value %d is larger than %d", v, Byte.MAX_VALUE); + return (byte) v; + } + + private AccordCFKLoader() {} + + @Override + public ByteBuffer saveForCFK(Command command) + { + int flags = 0; + + PartialDeps deps = command.partialDeps(); + Timestamp executeAt = command.executeAt(); + if (deps != null) + flags |= HAS_DEPS; + if (executeAt != null) + flags |= HAS_EXECUTE_AT; + + int size = serializedSize(command); + ByteBuffer buffer = accessor.allocate(size); + accessor.putByte(buffer, FLAG_OFFSET, toByte(flags)); + accessor.putByte(buffer, STATUS_OFFSET, toByte(command.saveStatus().ordinal())); + CommandSerializers.txnId.serialize(command.txnId(), buffer, accessor, TXNID_OFFSET); + if (executeAt != null) + CommandSerializers.timestamp.serialize(executeAt, buffer, accessor, EXECUTEAT_OFFSET); + if (deps != null) + { + ByteBuffer duplicate = buffer.duplicate(); + duplicate.position(executeAt != null ? DEPS_OFFSET : EXECUTEAT_OFFSET); + try (DataOutputBuffer out = new DataOutputBuffer(duplicate)) + { + depsIdsLocalSerializer.serialize(deps.txnIds(), out); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + return buffer; + } + + @Override + public TxnId txnId(ByteBuffer data) + { + return CommandSerializers.txnId.deserialize(data, accessor, TXNID_OFFSET); + } + + @Override + public Timestamp executeAt(ByteBuffer data) + { + byte flags = accessor.getByte(data, FLAG_OFFSET); + if ((flags & HAS_EXECUTE_AT) == 0) + return null; + return CommandSerializers.timestamp.deserialize(data, accessor, EXECUTEAT_OFFSET); + } + + @Override + public SaveStatus saveStatus(ByteBuffer data) + { + return SaveStatus.values()[accessor.getByte(data, STATUS_OFFSET)]; + } + + @Override + public List depsIds(ByteBuffer data) + { + byte flags = accessor.getByte(data, FLAG_OFFSET); + if ((flags & HAS_DEPS) == 0) + return null; + ByteBuffer buffer = data.duplicate(); + int offset = (flags & HAS_EXECUTE_AT) == 0 ? EXECUTEAT_OFFSET : DEPS_OFFSET; + buffer.position(data.position() + offset); + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + { + return depsIdsLocalSerializer.deserialize(in); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java new file mode 100644 index 000000000000..37afe0a59a62 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.impl.CommandsForKey; +import accord.local.Command; +import accord.local.CommandListener; +import accord.utils.Invariants; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.api.PartitionKey; + +public class ListenerSerializers +{ + public enum Kind + { + COMMAND, COMMANDS_FOR_KEY; + + private static Kind of(CommandListener listener) + { + if (listener instanceof Command.Listener) + return COMMAND; + + if (listener instanceof CommandsForKey.Listener) + return COMMANDS_FOR_KEY; + + throw new IllegalArgumentException("Unsupported listener type: " + listener.getClass().getName()); + } + } + + + private static final IVersionedSerializer commandListener = new IVersionedSerializer() + { + @Override + public void serialize(Command.Listener listener, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(listener.txnId(), out, version); + } + + @Override + public Command.Listener deserialize(DataInputPlus in, int version) throws IOException + { + return new Command.Listener(CommandSerializers.txnId.deserialize(in, version)); + } + + @Override + public long serializedSize(Command.Listener listener, int version) + { + return CommandSerializers.txnId.serializedSize(listener.txnId(), version); + } + }; + + private static final IVersionedSerializer cfkListener = new IVersionedSerializer() + { + @Override + public void serialize(CommandsForKey.Listener listener, DataOutputPlus out, int version) throws IOException + { + PartitionKey.serializer.serialize((PartitionKey) listener.key(), out, version); + } + + @Override + public CommandsForKey.Listener deserialize(DataInputPlus in, int version) throws IOException + { + return CommandsForKey.SerializerSupport.listener(PartitionKey.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(CommandsForKey.Listener listener, int version) + { + return PartitionKey.serializer.serializedSize((PartitionKey) listener.key(), version); + } + }; + + public static final IVersionedSerializer listener = new IVersionedSerializer() + { + @Override + public void serialize(CommandListener listener, DataOutputPlus out, int version) throws IOException + { + Invariants.checkArgument(!listener.isTransient()); + Kind kind = Kind.of(listener); + out.write(kind.ordinal()); + switch (kind) + { + case COMMAND: + commandListener.serialize((Command.Listener) listener, out, version); + break; + case COMMANDS_FOR_KEY: + cfkListener.serialize((CommandsForKey.Listener) listener, out, version); + break; + default: + throw new IllegalArgumentException(); + } + } + + @Override + public CommandListener deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.values()[in.readByte()]; + switch (kind) + { + case COMMAND: + return commandListener.deserialize(in, version); + case COMMANDS_FOR_KEY: + return cfkListener.deserialize(in, version); + default: + throw new IllegalArgumentException(); + } + } + + @Override + public long serializedSize(CommandListener listener, int version) + { + Invariants.checkArgument(!listener.isTransient()); + Kind kind = Kind.of(listener); + long size = TypeSizes.BYTE_SIZE; + switch (kind) + { + case COMMAND: + size += commandListener.serializedSize((Command.Listener) listener, version); + break; + case COMMANDS_FOR_KEY: + size += cfkListener.serializedSize((CommandsForKey.Listener) listener, version); + break; + default: + throw new IllegalArgumentException(); + } + + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java b/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java deleted file mode 100644 index b09bc31827f3..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/AbstractStoredField.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import org.apache.cassandra.service.accord.AccordState; - -public abstract class AbstractStoredField -{ - private static final int LOADED_FLAG = 0x01; - private static final int EMPTY_FLAG = 0x01 << 1; - private static final int CHANGED_FLAG = 0x01 << 2; - private static final int CLEARED_FLAG = 0x01 << 3; - private static final int WRITE_ONLY_FLAG = 0x01 << 4; - private static final int READ_ONLY_FLAG = 0x01 << 5; - - private byte flag; - - public AbstractStoredField(AccordState.ReadWrite readWrite) - { - this.flag = 0; - if (readWrite == AccordState.ReadWrite.WRITE_ONLY) - set(WRITE_ONLY_FLAG); - if (readWrite == AccordState.ReadWrite.READ_ONLY) - set(READ_ONLY_FLAG); - } - - @Override - public String toString() - { - if (!hasValue()) - return ""; - if (check(WRITE_ONLY_FLAG)) - return ""; - preGet(); - if (hasModifications()) - return '*' + valueString(); - return valueString(); - } - - private void clear(int v) - { - flag &= ~v; - } - - private boolean check(int v) - { - return (flag & v) != 0; - } - - private void set(int v) - { - flag |= v; - } - - public boolean hasValue() - { - return isLoaded() && !isEmpty(); - } - - public boolean isLoaded() - { - return check(LOADED_FLAG); - } - - public void setEmpty() - { - if (check(0xFF)) - throw new IllegalStateException("Cannot set previously loaded/initialized commands to empty"); - set(LOADED_FLAG | EMPTY_FLAG); - } - - public boolean isEmpty() - { - return check(EMPTY_FLAG); - } - - void preUnload() - { - if (hasModifications()) - throw new IllegalStateException("Cannot unload a field with unsaved changes"); - flag = 0; - } - - void preLoad() - { - if (hasModifications()) - throw new IllegalStateException("Cannot load into a field with unsaved changes"); - clear(EMPTY_FLAG); - set(LOADED_FLAG); - } - - void preChange() - { - if (check(READ_ONLY_FLAG)) - throw new IllegalStateException("Cannot update a read only field"); - clear(EMPTY_FLAG); - set(LOADED_FLAG | CHANGED_FLAG); - } - - void preBlindChange() - { - set(CHANGED_FLAG); - } - - void preGet() - { - if (!check(LOADED_FLAG)) - throw new IllegalStateException("Cannot read unloaded fields"); - if (check(EMPTY_FLAG)) - throw new IllegalStateException("Cannot read empty fields"); - if (check(WRITE_ONLY_FLAG)) - throw new IllegalStateException("Cannot read write only fields"); - } - - void preClear() - { - set(CLEARED_FLAG | LOADED_FLAG | CHANGED_FLAG); - } - - public boolean hasModifications() - { - return check(CHANGED_FLAG); - } - - public void clearModifiedFlag() - { - clear(CHANGED_FLAG); - } - - public boolean wasCleared() - { - return check(CLEARED_FLAG); - } - - public abstract String valueString(); -} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java b/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java deleted file mode 100644 index fb81da9df4bf..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/StoredBoolean.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Objects; - -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.utils.ObjectSizes; - -public class StoredBoolean extends AbstractStoredField -{ - public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredBoolean(AccordState.ReadWrite.FULL)); - protected boolean value; - - public StoredBoolean(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StoredBoolean that = (StoredBoolean) o; - return value == that.value; - } - - @Override - public int hashCode() - { - return Objects.hash(value); - } - - @Override - public String valueString() - { - return Boolean.toString(value); - } - - public long estimatedSizeOnHeap() - { - return EMPTY_SIZE; - } - - public void unload() - { - preUnload(); - value = false; - } - - public void load(boolean value) - { - preLoad(); - this.value = value; - } - - public void set(boolean value) - { - preChange(); - this.value = value; - } - - public boolean get() - { - preGet(); - return value; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredLong.java b/src/java/org/apache/cassandra/service/accord/store/StoredLong.java deleted file mode 100644 index 3534d68b9397..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/StoredLong.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Objects; - -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.utils.ObjectSizes; - -public class StoredLong extends AbstractStoredField -{ - public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredLong(AccordState.ReadWrite.FULL)); - - protected long value; - - public StoredLong(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StoredLong that = (StoredLong) o; - return value == that.value; - } - - @Override - public int hashCode() - { - return Objects.hash(value); - } - - @Override - public String valueString() - { - return Long.toString(value); - } - - public long estimatedSizeOnHeap() - { - return EMPTY_SIZE; - } - - public void unload() - { - preUnload(); - value = 0; - } - - public void load(long value) - { - preLoad(); - this.value = value; - } - - public void set(long value) - { - preChange(); - this.value = value; - } - - public long get() - { - preGet(); - return value; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java b/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java deleted file mode 100644 index 81f7efdc0358..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/StoredNavigableMap.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Collections; -import java.util.Map; -import java.util.NavigableMap; -import java.util.Objects; -import java.util.TreeMap; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.function.ToLongFunction; -import java.util.stream.Collectors; - -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.utils.ObjectSizes; - -/** - * Navigable Map, capable of blind add/remove - */ -public class StoredNavigableMap, V> extends AbstractStoredField -{ - private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new StoredNavigableMap<>(AccordState.ReadWrite.FULL)); - private NavigableMap map = null; - private NavigableMap view = null; - private NavigableMap additions = null; - private NavigableMap deletions = null; - - public StoredNavigableMap(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - @Override - public boolean equals(Object o) - { - preGet(); - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StoredNavigableMap that = (StoredNavigableMap) o; - return Objects.equals(map, that.map); - } - - @Override - public int hashCode() - { - preGet(); - return Objects.hash(map); - } - - @Override - public String valueString() - { - if (view == null) - return ""; - return view.entrySet().stream() - .map(e -> e.getKey() + "=" + e.getValue()) - .collect(Collectors.joining(", ", "{", "}")); - } - - public void unload() - { - preUnload(); - map = null; - view = null; - additions = null; - deletions = null; - } - - void setInternal(NavigableMap map) - { - this.map = map; - this.view = Collections.unmodifiableNavigableMap(map); - } - - public void load(NavigableMap map) - { - preLoad(); - setInternal(map); - } - - public NavigableMap getView() - { - preGet(); - return view; - } - - public void blindPut(K key, V val) - { - preBlindChange(); - if (hasValue()) - map.put(key, val); - - if (additions == null) - additions = new TreeMap<>(); - - additions.put(key, val); - if (deletions != null) - deletions.remove(key); - } - - public void blindRemove(K key) - { - preBlindChange(); - if (hasValue()) - map.remove(key); - - if (!wasCleared()) - { - if (deletions == null) - deletions = new TreeMap<>(); - deletions.put(key, null); - } - if (additions != null) - additions.remove(key); - } - - // TODO: this is a kludge, but will suffice until we can more fully rework efficiency of waitingOn collections - // this is semantically equivalent to blindRemove(key) but stores the value we believe was bound to key on removal - // so that it can be used by forEachDeletion - public void blindRemove(K key, V value) - { - preBlindChange(); - if (hasValue()) - map.remove(key); - - if (!wasCleared()) - { - if (deletions == null) - deletions = new TreeMap<>(); - deletions.put(key, value); - } - if (additions != null) - additions.remove(key); - } - - public void clear() - { - clearModifiedFlag(); - preClear(); - setInternal(new TreeMap<>()); - } - - @Override - public void clearModifiedFlag() - { - super.clearModifiedFlag(); - if (additions != null) additions.clear(); - if (deletions != null) deletions.clear(); - } - - public boolean hasAdditions() - { - return additions != null && !additions.isEmpty(); - } - - public int additionsSize() - { - return additions != null ? additions.size() : 0; - } - - public boolean hasDeletions() - { - return deletions != null && !deletions.isEmpty(); - } - - public int deletionsSize() - { - return deletions != null ? deletions.size() : 0; - } - - public int totalModifications() - { - return additionsSize() + deletionsSize(); - } - - public void forEachAddition(BiConsumer consumer) - { - if (additions != null) - additions.forEach(consumer); - } - - public void forEachDeletion(Consumer consumer) - { - if (deletions != null) - deletions.keySet().forEach(consumer); - } - - public void forEachDeletion(BiConsumer consumer) - { - if (deletions != null) - deletions.forEach(consumer); - } - - public long estimatedSizeOnHeap(ToLongFunction measureKey, ToLongFunction measureVal) - { - long size = EMPTY_SIZE; - if (hasValue() && ! map.isEmpty()) - { - for (Map.Entry entry : map.entrySet()) - { - size += measureKey.applyAsLong(entry.getKey()); - size += measureVal.applyAsLong(entry.getValue()); - } - } - return size; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredSet.java b/src/java/org/apache/cassandra/service/accord/store/StoredSet.java deleted file mode 100644 index 699c8bf48f68..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/StoredSet.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Collections; -import java.util.HashSet; -import java.util.NavigableSet; -import java.util.Objects; -import java.util.Set; -import java.util.TreeSet; -import java.util.function.Consumer; -import java.util.function.ToLongFunction; -import java.util.stream.Collectors; - -import accord.utils.DeterministicIdentitySet; -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.utils.ObjectSizes; - -public abstract class StoredSet> extends AbstractStoredField -{ - private S set = null; - private S view = null; - private Set additions = null; - private Set deletions = null; - - abstract S createDataSet(); - abstract Set createMetaSet(); - abstract S createView(S data); - abstract long emptySize(); - - public StoredSet(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - @Override - public boolean equals(Object o) - { - preGet(); - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StoredSet that = (StoredSet) o; - return Objects.equals(set, that.set); - } - - @Override - public int hashCode() - { - preGet(); - return Objects.hash(set); - } - - @Override - public String valueString() - { - return view.stream() - .map(Object::toString) - .collect(Collectors.joining(", ", "{", "}")); - } - - public void unload() - { - preUnload(); - set = null; - view = null; - additions = null; - deletions = null; - } - - void setInternal(S set) - { - this.set = set; - this.view = createView(set); - } - - public void load(S set) - { - preLoad(); - setInternal(set); - } - - public S getView() - { - preGet(); - return view; - } - - public void blindAdd(T item) - { - preBlindChange(); - if (hasValue()) - set.add(item); - - if (additions == null) - additions = createMetaSet(); - - additions.add(item); - if (deletions != null) - deletions.remove(item); - } - - public void blindRemove(T item) - { - preBlindChange(); - if (hasValue()) - set.remove(item); - - if (!wasCleared()) - { - if (deletions == null) - deletions = createMetaSet(); - deletions.add(item); - } - if (additions != null) - additions.remove(item); - } - - public void clear() - { - clearModifiedFlag(); - preClear(); - setInternal(createDataSet()); - } - - @Override - public void clearModifiedFlag() - { - super.clearModifiedFlag(); - if (additions != null) additions.clear(); - if (deletions != null) deletions.clear(); - } - - public boolean hasAdditions() - { - return additions != null && !additions.isEmpty(); - } - - public boolean hasDeletions() - { - return deletions != null && !deletions.isEmpty(); - } - - public void forEachAddition(Consumer consumer) - { - if (additions != null) - additions.forEach(consumer); - } - - public void forEachDeletion(Consumer consumer) - { - if (deletions != null) - deletions.forEach(consumer); - } - - public long estimatedSizeOnHeap(ToLongFunction measure) - { - long size = emptySize(); - if (hasValue() && !set.isEmpty()) - { - for (T val : set) - size += measure.applyAsLong(val); - } - return size; - } - - public interface Changes - { - void forEachAddition(Consumer consumer); - void forEachDeletion(Consumer consumer); - } - - public static class Navigable> extends StoredSet> implements Changes - { - private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new Navigable<>(AccordState.ReadWrite.FULL)); - - public Navigable(AccordState.ReadWrite readWrite) { super(readWrite); } - - @Override - NavigableSet createDataSet() - { - return new TreeSet<>(); - } - - @Override - NavigableSet createMetaSet() - { - return new TreeSet<>(); - } - - @Override - NavigableSet createView(NavigableSet data) - { - return Collections.unmodifiableNavigableSet(data); - } - - @Override - long emptySize() - { - return EMPTY_SIZE; - } - } - - public static class DeterministicIdentity extends StoredSet> implements Changes - { - private static final long EMPTY_SIZE = ObjectSizes.measureDeep(new DeterministicIdentity<>(AccordState.ReadWrite.FULL)); - - public DeterministicIdentity(AccordState.ReadWrite readWrite) { super(readWrite); } - - @Override - DeterministicIdentitySet createDataSet() - { - return new DeterministicIdentitySet<>(); - } - - @Override - Set createMetaSet() - { - return new HashSet<>(); - } - - @Override - Set createView(Set data) - { - return Collections.unmodifiableSet(data); - } - - @Override - long emptySize() - { - return EMPTY_SIZE; - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/store/StoredValue.java b/src/java/org/apache/cassandra/service/accord/store/StoredValue.java deleted file mode 100644 index 3e08c8b43a13..000000000000 --- a/src/java/org/apache/cassandra/service/accord/store/StoredValue.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Objects; -import java.util.function.ToLongFunction; - -import org.apache.cassandra.service.accord.AccordState; -import org.apache.cassandra.utils.ObjectSizes; - -public class StoredValue extends AbstractStoredField -{ - public static final long EMPTY_SIZE = ObjectSizes.measure(new StoredValue<>(AccordState.ReadWrite.FULL)); - protected T value; - - public StoredValue(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - @Override - public boolean equals(Object o) - { - this.preGet(); - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StoredValue that = (StoredValue) o; - that.preGet(); - return Objects.equals(value, that.value); - } - - @Override - public int hashCode() - { - preGet(); - return Objects.hash(value); - } - - @Override - public String valueString() - { - return Objects.toString(value); - } - - public long estimatedSizeOnHeap(ToLongFunction measure) - { - if (!hasValue() || value == null) - return EMPTY_SIZE; - - return EMPTY_SIZE + measure.applyAsLong(value); - } - - public void unload() - { - preUnload(); - value = null; - } - - public void load(T value) - { - preLoad(); - this.value = value; - } - - public void set(T value) - { - preChange(); - this.value = value; - } - - public T get() - { - preGet(); - return value; - } - - public static class HistoryPreserving extends StoredValue - { - T previous; - - public HistoryPreserving(AccordState.ReadWrite readWrite) - { - super(readWrite); - } - - public T previous() - { - return previous; - } - - @Override - public void unload() - { - super.unload(); - previous = null; - } - - @Override - public void load(T value) - { - super.load(value); - previous = value; - } - - @Override - public void clearModifiedFlag() - { - super.clearModifiedFlag(); - previous = value; - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index d6f3a71c8b14..b82013faa012 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -31,9 +31,9 @@ import com.google.common.collect.Iterables; import accord.api.DataStore; -import accord.api.Key; import accord.api.Write; import accord.local.SafeCommandStore; +import accord.primitives.RoutableKey; import accord.primitives.Seekable; import accord.primitives.Timestamp; import accord.primitives.Writes; @@ -53,8 +53,8 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; @@ -345,13 +345,14 @@ Update[] newArray(int size) @Override public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { - AccordCommandsForKey cfk = ((SafeAccordCommandStore) safeStore).commandsForKey((Key)key); // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc - long timestamp = cfk.timestampMicrosFor(executeAt, true); + AccordSafeCommandsForKey cfk = ((AccordSafeCommandStore) safeStore).commandsForKey((RoutableKey) key); + cfk.updateLastExecutionTimestamps(executeAt, true); + long timestamp = cfk.current().timestampMicrosFor(executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) - int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); + int nowInSeconds = cfk.current().nowInSecondsFor(executeAt, true); List> results = new ArrayList<>(); forEachWithKey((PartitionKey) key, write -> results.add(write.write(timestamp, nowInSeconds))); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 1a7d59f8fe47..359882adadb7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -33,7 +33,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.coordinate.Preempted; import accord.primitives.Txn; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; @@ -52,6 +51,8 @@ import org.apache.cassandra.distributed.util.QueryResultUtil; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; +import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FailingConsumer; @@ -195,7 +196,7 @@ private SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String c } catch (RuntimeException ex) { - if (count <= MAX_RETRIES && AssertionUtils.rootCauseIs(Preempted.class).matches(ex)) + if (count <= MAX_RETRIES && (AssertionUtils.rootCauseIs(ReadPreemptedException.class).matches(ex) || AssertionUtils.rootCauseIs(WritePreemptedException.class).matches(ex))) { logger.warn("[Retry attempt={}] Preempted failure for\n{}", count, check); return executeWithRetry0(count + 1, cluster, check, boundValues); diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java index 981c96576ec2..004dc5138247 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java @@ -98,12 +98,30 @@ public ActionList performAndRegister() return super.performAndRegister(); } + private boolean wasInterrupted(Throwable failure) + { + if (failure instanceof UncheckedInterruptedException) + return true; + + if (failure instanceof InterruptedException) + return true; + + Throwable cause = failure.getCause(); + while (cause != null && cause != failure) + { + if (cause instanceof InterruptedException) + return true; + cause = cause.getCause(); + } + return false; + } + @Override public void accept(SimpleQueryResult success, Throwable failure) { if (failure != null && !expectedException(failure)) { - if (!simulated.failures.hasFailure() || !(failure instanceof UncheckedInterruptedException)) + if (!simulated.failures.hasFailure() || !wasInterrupted(failure)) logger.error("Unexpected exception", failure); simulated.failures.accept(failure); return; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 915bad76d7d1..dce4312d687c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -18,9 +18,10 @@ package org.apache.cassandra.service.accord; -import java.util.TreeSet; import java.util.concurrent.atomic.AtomicLong; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import org.junit.Assert; import org.junit.Before; @@ -30,12 +31,17 @@ import org.slf4j.LoggerFactory; import accord.api.Key; +import accord.api.Result; +import accord.impl.CommandsForKey; import accord.local.Command; -import accord.local.Status; +import accord.local.CommonAttributes; +import accord.local.SaveStatus; +import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.primitives.Writes; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.ColumnFamilyStore; @@ -44,13 +50,16 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; +import org.apache.cassandra.utils.Pair; import static accord.local.Status.Durability.Durable; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.preaccepted; import static org.apache.cassandra.service.accord.AccordTestUtils.ballot; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; -import static org.apache.cassandra.service.accord.AccordTestUtils.processCommandResult; +import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; import static org.apache.cassandra.service.accord.AccordTestUtils.timestamp; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; @@ -93,23 +102,26 @@ public void commandLoadSave() throws Throwable TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1); TxnId oldTimestamp = txnId(1, clock.incrementAndGet(), 1); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - AccordCommand command = new AccordCommand(txnId).initialize(); - command.setPartialTxn(createPartialTxn(0)); - command.homeKey(key.toUnseekable()); - command.progressKey(key.toUnseekable()); - command.setDurability(Durable); - command.setPromised(ballot(1, clock.incrementAndGet(), 1)); - command.setAccepted(ballot(1, clock.incrementAndGet(), 1)); - command.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); - command.setPartialDeps(dependencies); - command.setStatus(Status.Accepted); - command.addWaitingOnCommit(oldTxnId1); - command.addWaitingOnApplyIfAbsent(oldTxnId2, oldTimestamp); - command.storedListeners.clear(); - command.addListener(new AccordCommand(oldTxnId1)); - processCommandResult(commandStore, command); - - AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); + + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + PartialTxn txn = createPartialTxn(0); + attrs.homeKey(key.toUnseekable()); + attrs.progressKey(key.toUnseekable()); + attrs.durability(Durable); + Ballot promised = ballot(1, clock.incrementAndGet(), 1); + Ballot accepted = ballot(1, clock.incrementAndGet(), 1); + Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); + attrs.partialDeps(dependencies); + ImmutableSortedSet waitingOnCommit = ImmutableSortedSet.of(oldTxnId1); + ImmutableSortedMap waitingOnApply = ImmutableSortedMap.of(oldTimestamp, oldTxnId2); + attrs.addListener(new Command.Listener(oldTxnId1)); + Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); + Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, + waitingOnCommit, waitingOnApply, result.left, result.right); + + AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); + safeCommand.set(command); + AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", command); Command actual = AccordKeyspace.loadCommand(commandStore, txnId); logger.info("A: {}", actual); @@ -128,59 +140,32 @@ public void commandsForKeyLoadSave() PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); - AccordCommand command1 = new AccordCommand(txnId1).initialize(); - AccordCommand command2 = new AccordCommand(txnId2).initialize(); - command1.setPartialTxn(txn); - command2.setPartialTxn(txn); - command1.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); - command2.setExecuteAt(timestamp(1, clock.incrementAndGet(), 1)); - - AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + + Command command1 = preaccepted(txnId1, txn, timestamp(1, clock.incrementAndGet(), 1)); + Command command2 = preaccepted(txnId2, txn, timestamp(1, clock.incrementAndGet(), 1)); + + AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); + cfk.initialize(CommandsForKeySerializer.loader); cfk.updateMax(maxTimestamp); - Assert.assertEquals(txnId1.hlc(), cfk.timestampMicrosFor(txnId1, true)); - Assert.assertEquals(txnId2.hlc(), cfk.timestampMicrosFor(txnId2, true)); - Assert.assertEquals(txnId2, cfk.lastExecutedTimestamp.get()); - Assert.assertEquals(txnId2.hlc(), cfk.lastExecutedMicros.get()); + cfk.updateLastExecutionTimestamps(txnId1, true); + Assert.assertEquals(txnId1.hlc(), cfk.current().timestampMicrosFor(txnId1, true)); + + cfk.updateLastExecutionTimestamps(txnId2, true); + Assert.assertEquals(txnId2.hlc(), cfk.current().timestampMicrosFor(txnId2, true)); + + Assert.assertEquals(txnId2, cfk.current().lastExecutedTimestamp()); + Assert.assertEquals(txnId2.hlc(), cfk.current().lastExecutedMicros()); + cfk.register(command1); cfk.register(command2); AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", cfk); - AccordCommandsForKey actual = AccordKeyspace.loadCommandsForKey(commandStore, key); + CommandsForKey actual = AccordKeyspace.loadCommandsForKey(commandStore, key); logger.info("A: {}", actual); - Assert.assertEquals(cfk, actual); - } - - @Test - public void commandsForKeyBlindWitnessed() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - - AccordCommandsForKey.WriteOnly writeOnlyCfk = new AccordCommandsForKey.WriteOnly(commandStore, key); - Timestamp maxTimestamp = null; - TreeSet expected = new TreeSet<>(); - - for (int i=0; i<4; i++) - { - maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); - expected.add(maxTimestamp); - writeOnlyCfk.updateMax(maxTimestamp); - } - - AccordKeyspace.getCommandsForKeyMutation(commandStore, writeOnlyCfk, commandStore.nextSystemTimestampMicros()).apply(); - AccordCommandsForKey fullCfk = AccordKeyspace.loadCommandsForKey(commandStore, key); - - Assert.assertEquals(expected, fullCfk.blindWitnessed.getView()); - - fullCfk.applyBlindWitnessedTimestamps(); - Assert.assertEquals(maxTimestamp, fullCfk.max()); - Assert.assertTrue(fullCfk.blindWitnessed.getView().isEmpty()); + Assert.assertEquals(cfk.current(), actual); } - } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 02ccdc44aeab..187753adb90f 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -26,6 +26,7 @@ import accord.api.Key; import accord.api.RoutingKey; +import accord.impl.CommandsForKey; import accord.local.Command; import accord.local.Node; import accord.local.PreLoadContext; @@ -47,11 +48,10 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; -import static accord.utils.async.AsyncChains.awaitUninterruptibly; +import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.*; @@ -82,11 +82,10 @@ private static PartitionKey key(int k) * disable cache and make sure correct values are coming in and out of the accord table */ @Test - public void basicCycleTest() + public void basicCycleTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - awaitUninterruptibly(commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); })); - + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); Txn txn = createTxn(1); @@ -98,7 +97,7 @@ public void basicCycleTest() PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); // Check preaccept - awaitUninterruptibly(commandStore.execute(preAccept, instance -> { + getUninterruptibly(commandStore.execute(preAccept, instance -> { PreAccept.PreAcceptReply reply = preAccept.apply(instance); Assert.assertTrue(reply.isOk()); PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; @@ -106,13 +105,13 @@ public void basicCycleTest() Assert.assertTrue(ok.deps.isEmpty()); })); - awaitUninterruptibly(commandStore.execute(preAccept, instance -> { - Command command = instance.command(txnId); + getUninterruptibly(commandStore.execute(preAccept, instance -> { + Command command = instance.command(txnId).current(); Assert.assertEquals(txnId, command.executeAt()); Assert.assertEquals(Status.PreAccepted, command.status()); - Assert.assertTrue(command.partialDeps().isEmpty()); + Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); - AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); Assert.assertEquals(txnId, cfk.max()); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); @@ -129,19 +128,19 @@ public void basicCycleTest() } Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); - awaitUninterruptibly(commandStore.execute(accept, instance -> { + getUninterruptibly(commandStore.execute(accept, instance -> { Accept.AcceptReply reply = accept.apply(instance); Assert.assertTrue(reply.isOk()); Assert.assertTrue(reply.deps.isEmpty()); })); - awaitUninterruptibly(commandStore.execute(accept, instance -> { - Command command = instance.command(txnId); + getUninterruptibly(commandStore.execute(accept, instance -> { + Command command = instance.command(txnId).current(); Assert.assertEquals(executeAt, command.executeAt()); Assert.assertEquals(Status.Accepted, command.status()); Assert.assertEquals(deps, command.partialDeps()); - AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); Assert.assertEquals(executeAt, cfk.max()); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); @@ -149,15 +148,15 @@ public void basicCycleTest() // check commit Commit commit = Commit.SerializerSupport.create(txnId, route, 1, executeAt, partialTxn, deps, fullRoute, null); - awaitUninterruptibly(commandStore.execute(commit, commit::apply)); + getUninterruptibly(commandStore.execute(commit, commit::apply)); - awaitUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { - Command command = instance.command(txnId); + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { + Command command = instance.command(txnId).current(); Assert.assertEquals(commit.executeAt, command.executeAt()); Assert.assertTrue(command.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, command.partialDeps()); - AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).commandsForKey(key(1)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); Assert.assertNotNull((cfk.byId()).get(txnId)); Assert.assertNotNull((cfk.byExecuteAt()).get(commit.executeAt)); })); @@ -167,7 +166,7 @@ public void basicCycleTest() public void computeDeps() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - awaitUninterruptibly(commandStore.execute(PreLoadContext.empty(), instance -> { ((SafeAccordCommandStore) instance).commandStore().setCacheSize(0); })); + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); Txn txn = createTxn(2); @@ -178,12 +177,12 @@ public void computeDeps() throws Throwable PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); - awaitUninterruptibly(commandStore.execute(preAccept1, preAccept1::apply)); + getUninterruptibly(commandStore.execute(preAccept1, preAccept1::apply)); // second preaccept should identify txnId1 as a dependency TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); - awaitUninterruptibly(commandStore.execute(preAccept2, instance -> { + getUninterruptibly(commandStore.execute(preAccept2, instance -> { PreAccept.PreAcceptReply reply = preAccept2.apply(instance); Assert.assertTrue(reply.isOk()); PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java b/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java new file mode 100644 index 000000000000..94440226a94e --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.function.BiConsumer; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; + +public class AccordLoadingStateTest +{ + static class LoadableState extends AccordLoadingState + { + public LoadableState(String key) + { + super(key); + } + } + + static class InspectableCallback implements BiConsumer + { + boolean called; + V result; + Throwable failure; + + @Override + public void accept(V result, Throwable failure) + { + Assert.assertFalse(called); + called = true; + this.result = result; + this.failure = failure; + } + } + + private static void assertIllegalState(Runnable runnable) + { + try + { + runnable.run(); + Assert.fail("Expected IllegalStateException"); + } + catch (IllegalStateException ise) + { + // expected + } + } + + + @Test + public void loadSuccessTest() + { + LoadableState state = new LoadableState("K"); + + Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); + assertIllegalState(() -> state.value()); + assertIllegalState(() -> state.value("VVVV")); + assertIllegalState(() -> state.listen()); + + Runnable runnable = state.load(k -> { + Assert.assertEquals("K", k); + return "V"; + }); + Assert.assertEquals(LoadingState.PENDING, state.state()); + + runnable.run(); + Assert.assertEquals(LoadingState.LOADED, state.state()); + Assert.assertEquals("V", state.value()); + + assertIllegalState(() -> state.load(k -> "CCC")); + assertIllegalState(() -> state.listen()); + } + + @Test + public void loadNullTest() + { + LoadableState state = new LoadableState("K"); + Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); + + assertIllegalState(() -> state.value()); + assertIllegalState(() -> state.value("VVVV")); + assertIllegalState(() -> state.listen()); + Runnable runnable = state.load(k -> { + Assert.assertEquals("K", k); + return null; + }); + + Assert.assertEquals(LoadingState.PENDING, state.state()); + + runnable.run(); + Assert.assertEquals(LoadingState.LOADED, state.state()); + Assert.assertEquals(null, state.value()); + + assertIllegalState(() -> state.load(k -> "CCC")); + assertIllegalState(() -> state.listen()); + } + + @Test + public void additionalCallbackTest() + { + LoadableState state = new LoadableState("K"); + Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); + + assertIllegalState(() -> state.value()); + assertIllegalState(() -> state.value("VVVV")); + assertIllegalState(() -> state.listen()); + Runnable runnable = state.load(k -> { + Assert.assertEquals("K", k); + return "V"; + }); + + Assert.assertEquals(LoadingState.PENDING, state.state()); + + // register other callbacks + InspectableCallback callback1 = new InspectableCallback<>(); + InspectableCallback callback2 = new InspectableCallback<>(); + + + Assert.assertEquals(LoadingState.PENDING, state.state()); + state.listen().addCallback(callback1); + runnable.run(); + state.listen().addCallback(callback2); + + Assert.assertTrue(callback1.called); + Assert.assertNull(callback1.failure); + + Assert.assertTrue(callback2.called); + Assert.assertNull(callback2.failure); + + Assert.assertEquals(LoadingState.LOADED, state.state()); + Assert.assertEquals("V", state.value()); + + assertIllegalState(() -> state.load(k -> "CCC")); + assertIllegalState(() -> state.listen()); + } + + @Test + public void loadFailureTest() + { + LoadableState state = new LoadableState("K"); + + Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); + assertIllegalState(() -> state.value()); + assertIllegalState(() -> state.value("VVVV")); + assertIllegalState(() -> state.listen()); + + Runnable runnable = state.load(k -> { + throw new RuntimeException(); + }); + Assert.assertEquals(LoadingState.PENDING, state.state()); + + runnable.run(); + Assert.assertEquals(LoadingState.FAILED, state.state()); + assertIllegalState(() -> state.value()); + Assert.assertTrue(state.failure() instanceof RuntimeException); + + assertIllegalState(() -> state.load(k -> "CCC")); + assertIllegalState(() -> state.listen()); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index a686afad7ca7..375f2c7e8c68 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -18,82 +18,125 @@ package org.apache.cassandra.service.accord; -import java.util.HashSet; -import java.util.Set; +import java.util.function.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.Test; +import accord.utils.async.AsyncChain; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; +import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; + +import static org.apache.cassandra.service.accord.AccordTestUtils.testLoad; public class AccordStateCacheTest { - private static final long DEFAULT_ITEM_SIZE = 100; - private static final long KEY_SIZE = 4; - private static final long DEFAULT_NODE_SIZE = nodeSize(DEFAULT_ITEM_SIZE); + private static final long DEFAULT_NODE_SIZE = nodeSize(0); - private static class Item implements AccordState + private static class SafeString implements AccordSafeState { - long size = DEFAULT_ITEM_SIZE; + private boolean invalidated = false; + private final AccordLoadingState global; + private String original = null; + + public SafeString(AccordLoadingState global) + { + this.global = global; + } + + public AccordLoadingState global() + { + return global; + } + + @Override + public String key() + { + return global.key(); + } + + @Override + public String current() + { + return global.value(); + } - final Integer key; - boolean modified = false; - boolean initialized = false; + @Override + public void set(String update) + { + global.value(update); + } - public Item(Integer key) + @Override + public String original() { - this.key = key; + return original; } @Override - public boolean isEmpty() + public void preExecute() { - return initialized; + original = global.value(); } @Override - public Integer key() + public void postExecute() { - return key; + } @Override - public boolean hasModifications() + public LoadingState loadingState() { - return modified; + return global.state(); } @Override - public void clearModifiedFlag() + public AsyncResults.RunnableResult load(Function loadFunction) { - modified = false; + return global.load(loadFunction); } @Override - public boolean isLoaded() + public AsyncChain listen() { - return true; + return global.listen(); } @Override - public long estimatedSizeOnHeap() + public Throwable failure() { - return size + KEY_SIZE; + return global.failure(); } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } + } + + private static long emptyNodeSize() + { + return AccordStateCache.Node.EMPTY_SIZE; } private static long nodeSize(long itemSize) { - return itemSize + KEY_SIZE + AccordStateCache.Node.EMPTY_SIZE; + return itemSize + emptyNodeSize(); } - private static void assertCacheState(AccordStateCache cache, int active, int cached, long bytes) + private static void assertCacheState(AccordStateCache cache, int referencd, int total, long bytes) { - Assert.assertEquals(active, cache.numActiveEntries()); - Assert.assertEquals(cached, cache.numCachedEntries()); + Assert.assertEquals(referencd, cache.numReferencedEntries()); + Assert.assertEquals(total, cache.totalNumEntries()); Assert.assertEquals(bytes, cache.bytesCached()); } @@ -101,372 +144,187 @@ private static void assertCacheState(AccordStateCache cache, int active, int cac public void testAcquisitionAndRelease() { AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item item1 = instance.getOrCreate(1); - assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + + SafeString safeString1 = instance.reference("1"); + assertCacheState(cache, 1, 1, emptyNodeSize()); + testLoad(safeString1, "1"); Assert.assertNull(cache.head); Assert.assertNull(cache.tail); - item1.size = 110; - item1.modified = true; - instance.release(item1); - assertCacheState(cache, 0, 1, nodeSize(110)); - Assert.assertSame(item1, cache.tail.value); - Assert.assertSame(item1, cache.head.value); + instance.release(safeString1); + assertCacheState(cache, 0, 1, nodeSize(1)); + Assert.assertSame(safeString1.global, cache.tail); + Assert.assertSame(safeString1.global, cache.head); - Item item2 = instance.getOrCreate(2); - assertCacheState(cache, 1, 1, DEFAULT_NODE_SIZE + nodeSize(110)); - instance.release(item2); - assertCacheState(cache, 0, 2, DEFAULT_NODE_SIZE + nodeSize(110)); + SafeString safeString2 = instance.reference("2"); + assertCacheState(cache, 1, 2, DEFAULT_NODE_SIZE + nodeSize(1)); + testLoad(safeString2, "2"); + instance.release(safeString2); + assertCacheState(cache, 0, 2, nodeSize(1) + nodeSize(1)); - Assert.assertSame(item1, cache.tail.value); - Assert.assertSame(item2, cache.head.value); + Assert.assertSame(safeString1.global, cache.tail); + Assert.assertSame(safeString2.global, cache.head); } @Test public void testRotation() { AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 5); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item[] items = new Item[3]; + SafeString[] items = new SafeString[3]; for (int i=0; i<3; i++) { - Item item = instance.getOrCreate(i); - items[i] = item; - instance.release(item); + SafeString safeString = instance.reference(Integer.toString(i)); + items[i] = safeString; + Assert.assertNotNull(safeString); + testLoad(safeString, Integer.toString(i)); + Assert.assertTrue(instance.isReferenced(safeString.key())); + instance.release(safeString); } - Assert.assertSame(items[0], cache.tail.value); - Assert.assertSame(items[2], cache.head.value); - assertCacheState(cache, 0, 3, DEFAULT_NODE_SIZE * 3); + Assert.assertSame(items[0].global, cache.tail); + Assert.assertSame(items[2].global, cache.head); + assertCacheState(cache, 0, 3, nodeSize(1) * 3); + + SafeString safeString = instance.reference("1"); + Assert.assertEquals(LoadingState.LOADED, safeString.loadingState()); - Item item = instance.getOrCreate(1); - assertCacheState(cache, 1, 2, DEFAULT_NODE_SIZE * 3); + assertCacheState(cache, 1, 3, nodeSize(1) * 3); // releasing item should return it to the head - instance.release(item); - assertCacheState(cache, 0, 3, DEFAULT_NODE_SIZE * 3); - Assert.assertSame(items[0], cache.tail.value); - Assert.assertSame(items[1], cache.head.value); + instance.release(safeString); + assertCacheState(cache, 0, 3, nodeSize(1) * 3); + Assert.assertSame(items[0].global, cache.tail); + Assert.assertSame(items[1].global, cache.head); } @Test public void testEvictionOnAcquire() { - AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 5); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache cache = new AccordStateCache(nodeSize(1) * 5); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item[] items = new Item[5]; + SafeString[] items = new SafeString[5]; for (int i=0; i<5; i++) { - Item item = instance.getOrCreate(i); - items[i] = item; - instance.release(item); + SafeString safeString = instance.reference(Integer.toString(i)); + items[i] = safeString; + testLoad(safeString, Integer.toString(i)); + Assert.assertTrue(instance.isReferenced(safeString.key())); + instance.release(safeString); } - assertCacheState(cache, 0, 5, DEFAULT_NODE_SIZE * 5); - Assert.assertSame(items[0], cache.tail.value); - Assert.assertSame(items[4], cache.head.value); - - instance.getOrCreate(5); - assertCacheState(cache, 1, 4, DEFAULT_NODE_SIZE * 5); - Assert.assertSame(items[1], cache.tail.value); - Assert.assertSame(items[4], cache.head.value); - Assert.assertFalse(cache.keyIsCached(0)); - Assert.assertFalse(cache.keyIsActive(0)); + assertCacheState(cache, 0, 5, nodeSize(1) * 5); + Assert.assertSame(items[0].global, cache.tail); + Assert.assertSame(items[4].global, cache.head); + + SafeString safeString = instance.reference("5"); + Assert.assertTrue(instance.isReferenced(safeString.key())); + + // since it's not loaded, only the node size is counted here + assertCacheState(cache, 1, 5, nodeSize(1) * 4 + nodeSize(0)); + Assert.assertSame(items[1].global, cache.tail); + Assert.assertSame(items[4].global, cache.head); + Assert.assertFalse(cache.keyIsCached("0")); + Assert.assertFalse(cache.keyIsReferenced("0")); + + testLoad(safeString, "5"); + instance.release(safeString); + assertCacheState(cache, 0, 5, nodeSize(1) * 5); + Assert.assertSame(items[1].global, cache.tail); + Assert.assertSame(safeString.global, cache.head); } @Test public void testEvictionOnRelease() { - AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache cache = new AccordStateCache(nodeSize(1) * 4); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item[] items = new Item[5]; + SafeString[] items = new SafeString[5]; for (int i=0; i<5; i++) { - Item item = instance.getOrCreate(i); - items[i] = item; + SafeString safeString = instance.reference(Integer.toString(i)); + items[i] = safeString; + testLoad(safeString, Integer.toString(i)); + Assert.assertTrue(instance.isReferenced(safeString.key())); } - assertCacheState(cache, 5, 0, DEFAULT_NODE_SIZE * 5); + assertCacheState(cache, 5, 5, nodeSize(0) * 5); Assert.assertNull(cache.head); Assert.assertNull(cache.tail); instance.release(items[2]); - assertCacheState(cache, 4, 0, DEFAULT_NODE_SIZE * 4); + assertCacheState(cache, 4, 4, nodeSize(0) * 4); Assert.assertNull(cache.head); Assert.assertNull(cache.tail); instance.release(items[4]); - assertCacheState(cache, 3, 1, DEFAULT_NODE_SIZE * 4); - Assert.assertSame(items[4], cache.tail.value); - Assert.assertSame(items[4], cache.head.value); + assertCacheState(cache, 3, 4, nodeSize(0) * 3 + nodeSize(1)); + Assert.assertSame(items[4].global, cache.tail); + Assert.assertSame(items[4].global, cache.head); } @Test public void testMultiAcquireRelease() { AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item item = instance.getOrCreate(0); - Assert.assertNotNull(item); - Assert.assertEquals(1, cache.references(0)); - assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + SafeString safeString1 = instance.reference("0"); + testLoad(safeString1, "0"); + Assert.assertEquals(LoadingState.LOADED, safeString1.loadingState()); - Assert.assertNotNull(instance.getOrCreate(0)); - Assert.assertEquals(2, cache.references(0)); - assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); + Assert.assertEquals(1, cache.references("0")); + assertCacheState(cache, 1, 1, nodeSize(0)); - instance.release(item); - assertCacheState(cache, 1, 0, DEFAULT_NODE_SIZE); - instance.release(item); - assertCacheState(cache, 0, 1, DEFAULT_NODE_SIZE); + SafeString safeString2 = instance.reference("0"); + Assert.assertEquals(LoadingState.LOADED, safeString1.loadingState()); + Assert.assertEquals(2, cache.references("0")); + assertCacheState(cache, 1, 1, nodeSize(0)); + + instance.release(safeString1); + assertCacheState(cache, 1, 1, nodeSize(1)); + instance.release(safeString2); + assertCacheState(cache, 0, 1, nodeSize(1)); } @Test public void evictionBlockedOnSaveFuture() { - AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); - AccordStateCache.Instance instance = cache.instance(Integer.class, Item.class, Item::new); + AccordStateCache cache = new AccordStateCache(nodeSize(1) * 4); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); assertCacheState(cache, 0, 0, 0); - Item[] items = new Item[4]; + SafeString[] items = new SafeString[4]; for (int i=0; i<4; i++) { - Item item = instance.getOrCreate(i); - items[i] = item; + SafeString item = instance.reference(Integer.toString(i)); + testLoad(item, Integer.toString(i)); + Assert.assertTrue(instance.isReferenced(item.key())); instance.release(item); } - assertCacheState(cache, 0, 4, DEFAULT_NODE_SIZE * 4); + assertCacheState(cache, 0, 4, nodeSize(1) * 4); AsyncResult saveFuture = AsyncResults.settable(); - instance.addSaveResult(0, saveFuture); + instance.addSaveResult("0", saveFuture); cache.setMaxSize(0); // all should have been evicted except 0 - assertCacheState(cache, 0, 1, DEFAULT_NODE_SIZE); - Assert.assertTrue(cache.keyIsCached(0)); - Assert.assertFalse(cache.keyIsCached(1)); - Assert.assertFalse(cache.keyIsCached(2)); - Assert.assertFalse(cache.keyIsCached(3)); - } - - static class SetItem implements AccordState - { - final Integer key; - final Set set = new HashSet<>(); - boolean modified = false; - boolean initialized = false; - - static class WriteOnly extends SetItem implements AccordState.WriteOnly - { - AsyncResult.Settable promise = null; - final Set added = new HashSet<>(); - final Set remove = new HashSet<>(); - - public WriteOnly(Integer key) - { - super(key); - } - - @Override - public void asyncResult(AsyncResult notifier) - { - Preconditions.checkArgument(notifier instanceof AsyncResult.Settable); - this.promise = (AsyncResult.Settable) notifier; - } - - @Override - public AsyncResult asyncResult() - { - return promise; - } - - @Override - public void applyChanges(SetItem instance) - { - instance.set.addAll(added); - instance.set.removeAll(remove); - } - } - - - public SetItem(Integer key) - { - this.key = key; - } - - @Override - public boolean isEmpty() - { - return initialized; - } - - @Override - public Integer key() - { - return key; - } - - @Override - public boolean hasModifications() - { - return modified; - } - - @Override - public void clearModifiedFlag() - { - this.modified = false; - } - - @Override - public boolean isLoaded() - { - return true; - } - - @Override - public long estimatedSizeOnHeap() - { - return set.size() * 100L; - } - } - - @Test - public void writeOnlyCycle() - { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - SetItem onDisk = new SetItem(5); - onDisk.set.addAll(ImmutableSet.of(1, 2, 3)); - Assert.assertEquals(0, instance.pendingWriteOnlyOperations(5)); - - SetItem.WriteOnly writeOnly1 = new SetItem.WriteOnly(5); - writeOnly1.added.addAll(ImmutableSet.of(4, 5)); - writeOnly1.asyncResult(AsyncResults.settable()); - instance.addWriteOnly(writeOnly1); - Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); - - SetItem.WriteOnly writeOnly2 = new SetItem.WriteOnly(5); - writeOnly2.remove.addAll(ImmutableSet.of(2, 4)); - writeOnly2.asyncResult(AsyncResults.settable()); - instance.addWriteOnly(writeOnly2); - Assert.assertEquals(2, instance.pendingWriteOnlyOperations(5)); - - Assert.assertNull(instance.getSaveResult(5)); - Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); - - instance.lockWriteOnlyGroupIfExists(5); - Assert.assertTrue(instance.writeOnlyGroupIsLocked(5)); - Assert.assertEquals(ImmutableSet.of(1, 2, 3), onDisk.set); - Assert.assertTrue(instance.canEvict(5)); - - instance.applyAndRemoveWriteOnlyGroup(onDisk); - Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); - Assert.assertEquals(ImmutableSet.of(1, 3, 5), onDisk.set); - - // write only futures should have been merged and promoted to normal save futures, which would - // prevent the cached object from being purged until they were completed - AsyncResult saveFuture = instance.getSaveResult(5); - Assert.assertNotNull(saveFuture); - Assert.assertFalse(saveFuture.isDone()); - Assert.assertFalse(instance.canEvict(5)); - - writeOnly1.promise.setSuccess(null); - Assert.assertFalse(saveFuture.isDone()); - Assert.assertFalse(instance.canEvict(5)); - - writeOnly2.promise.setSuccess(null); - Assert.assertTrue(saveFuture.isDone()); - Assert.assertTrue(instance.canEvict(5)); - } - - // write only operations should not be purged out of order - @Test - public void writeOnlyPurging() - { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - SetItem.WriteOnly[] writeOnly = new SetItem.WriteOnly[4]; - for (int i=0; i instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - - SetItem.WriteOnly item = new SetItem.WriteOnly(5); - item.added.add(0); - item.asyncResult(AsyncResults.settable()); - instance.addWriteOnly(item); - - instance.lockWriteOnlyGroupIfExists(5); - - // the write only item should not be purged, even though it's complete - item.promise.setSuccess(null); - instance.purgeWriteOnly(5); - Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); - } - - // if a load future exists for the key we're creating a write group for, we need to lock - // the group so the loading instance gets changes applied when it finishes loading - @Test - public void testLoadFutureAutoLocksWriteOnlyInstances() - { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); - - AsyncResult loadfuture = AsyncResults.settable(); - instance.setLoadResult(5, loadfuture); - - Assert.assertFalse(instance.writeOnlyGroupIsLocked(5)); - Assert.assertEquals(0, instance.pendingWriteOnlyOperations(5)); - - // adding a write only object should immediately lock the group, since there's an existing load future - SetItem.WriteOnly item = new SetItem.WriteOnly(5); - item.added.add(0); - item.asyncResult(AsyncResults.settable()); - instance.addWriteOnly(item); - - Assert.assertTrue(instance.writeOnlyGroupIsLocked(5)); - Assert.assertEquals(1, instance.pendingWriteOnlyOperations(5)); + assertCacheState(cache, 0, 1, nodeSize(1)); + Assert.assertTrue(cache.keyIsCached("0")); + Assert.assertFalse(cache.keyIsCached("1")); + Assert.assertFalse(cache.keyIsCached("2")); + Assert.assertFalse(cache.keyIsCached("3")); } // if a future is added and another one exists for the same key, they should be merged @@ -474,13 +332,13 @@ public void testLoadFutureAutoLocksWriteOnlyInstances() public void testFutureMerging() { AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(Integer.class, SetItem.class, SetItem::new); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); AsyncResult.Settable promise1 = AsyncResults.settable(); AsyncResult.Settable promise2 = AsyncResults.settable(); - instance.addSaveResult(5, promise1); - instance.addSaveResult(5, promise2); + instance.addSaveResult("5", promise1); + instance.addSaveResult("5", promise2); - AsyncResult future = instance.getSaveResult(5); + AsyncResult future = instance.getSaveResult("5"); Assert.assertNotSame(future, promise1); Assert.assertNotSame(future, promise2); @@ -492,4 +350,28 @@ public void testFutureMerging() promise2.setSuccess(null); Assert.assertTrue(future.isDone()); } + + @Test + public void testUpdates() + { + AccordStateCache cache = new AccordStateCache(500); + AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + assertCacheState(cache, 0, 0, 0); + + SafeString safeString = instance.reference("1"); + testLoad(safeString, "1"); + assertCacheState(cache, 1, 1, emptyNodeSize()); + Assert.assertNull(cache.head); + Assert.assertNull(cache.tail); + + Assert.assertTrue(instance.isReferenced(safeString.key())); + assertCacheState(cache, 1, 1, nodeSize(0)); + + safeString.set("11"); + instance.release(safeString); + assertCacheState(cache, 0, 1, nodeSize(2)); + Assert.assertSame(safeString.global, cache.tail); + Assert.assertSame(safeString.global, cache.head); + + } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 0c457ae067aa..031777764067 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -22,31 +22,41 @@ import java.util.List; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; import java.util.function.LongSupplier; - +import java.util.stream.Collectors; +import java.util.stream.IntStream; import javax.annotation.Nullable; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import org.junit.Assert; import accord.api.Data; +import accord.api.Key; import accord.api.ProgressLog; +import accord.api.Result; import accord.api.RoutingKey; import accord.api.Write; +import accord.impl.CommandsForKey; import accord.impl.InMemoryCommandStore; import accord.local.Command; import accord.local.CommandStores; +import accord.local.CommonAttributes; import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.SaveStatus; import accord.local.Status.Known; import accord.primitives.Ballot; -import accord.primitives.Ranges; import accord.primitives.Keys; import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -58,18 +68,22 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.TransactionStatement; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.primitives.Routable.Domain.Key; -import static accord.utils.async.AsyncChains.awaitUninterruptibly; +import static accord.utils.async.AsyncChains.getUninterruptibly; import static java.lang.String.format; public class AccordTestUtils @@ -79,19 +93,90 @@ public static Id localNodeId() return EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); } + public static class Commands + { + public static Command notWitnessed(TxnId txnId, PartialTxn txn) + { + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + attrs.partialTxn(txn); + return Command.SerializerSupport.notWitnessed(attrs, Ballot.ZERO); + } + + public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp executeAt) + { + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + attrs.partialTxn(txn); + return Command.SerializerSupport.preaccepted(attrs, executeAt, Ballot.ZERO); + } + + public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt) + { + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + attrs.partialTxn(txn); + return Command.SerializerSupport.committed(attrs, + SaveStatus.Committed, + executeAt, + Ballot.ZERO, + Ballot.ZERO, + ImmutableSortedSet.of(), + ImmutableSortedMap.of()); + } + } + + public static CommandsForKey commandsForKey(Key key) + { + return new CommandsForKey(key, CommandsForKeySerializer.loader); + } + + public static AccordLoadingState loaded(K key, V value) + { + AccordLoadingState global = new AccordLoadingState<>(key); + global.load(k -> { + Assert.assertEquals(key, k); + return value; + }).run(); + Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, global.state()); + return global; + } + + public static AccordSafeCommand safeCommand(Command command) + { + AccordLoadingState global = loaded(command.txnId(), command); + return new AccordSafeCommand(global); + } + + public static Function testableLoad(K key, V val) + { + return k -> { + Assert.assertEquals(key, k); + return val; + }; + } + + public static void testLoad(AccordSafeState safeState, V val) + { + Assert.assertEquals(AccordLoadingState.LoadingState.UNINITIALIZED, safeState.loadingState()); + Runnable load = safeState.load(testableLoad(safeState.key(), val)); + Assert.assertEquals(AccordLoadingState.LoadingState.PENDING, safeState.loadingState()); + load.run(); + Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, safeState.loadingState()); + safeState.preExecute(); + Assert.assertEquals(val, safeState.current()); + } + public static final ProgressLog NOOP_PROGRESS_LOG = new ProgressLog() { - @Override public void unwitnessed(TxnId txnId, RoutingKey homeKey, ProgressShard shard) {} + @Override public void unwitnessed(TxnId txnId, RoutingKey routingKey, ProgressShard progressShard) {} @Override public void preaccepted(Command command, ProgressShard progressShard) {} @Override public void accepted(Command command, ProgressShard progressShard) {} @Override public void committed(Command command, ProgressShard progressShard) {} @Override public void readyToExecute(Command command, ProgressShard progressShard) {} @Override public void executed(Command command, ProgressShard progressShard) {} @Override public void invalidated(Command command, ProgressShard progressShard) {} - @Override public void durable(Command command, Set persistedOn) {} - @Override public void durable(TxnId txnId, @Nullable Unseekables someKeys, ProgressShard shard) {} @Override public void durableLocal(TxnId txnId) {} - @Override public void waiting(TxnId blockedBy, Known blockedUntil, Unseekables blockedOn) {} + @Override public void durable(Command command, @Nullable Set set) {} + @Override public void durable(TxnId txnId, @Nullable Unseekables unseekables, ProgressShard progressShard) {} + @Override public void waiting(TxnId txnId, Known known, Unseekables unseekables) {} }; public static TxnId txnId(long epoch, long hlc, int node) @@ -109,38 +194,35 @@ public static Ballot ballot(long epoch, long hlc, int node) return Ballot.fromValues(epoch, hlc, new Node.Id(node)); } - /** - * does the reads, writes, and results for a command without the consensus - */ - public static void processCommandResult(AccordCommandStore commandStore, Command command) throws Throwable + public static Pair processTxnResult(AccordCommandStore commandStore, TxnId txnId, PartialTxn txn, Timestamp executeAt) throws Throwable { - - awaitUninterruptibly(commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), command.partialTxn().keys()), - instance -> { - PartialTxn txn = command.partialTxn(); - TxnRead read = (TxnRead) txn.read(); - Data readData = read.keys().stream() - .map(key -> { - try - { - return AsyncChains.getBlocking(read.read(key, command.txnId().rw(), instance, command.executeAt(), null)); - } - catch (InterruptedException e) - { - throw new UncheckedInterruptedException(e); - } - catch (ExecutionException e) - { - throw new RuntimeException(e); - } - }) - .reduce(null, TxnData::merge); - Write write = txn.update().apply(readData); - ((AccordCommand)command).setWrites(new Writes(command.executeAt(), (Keys)txn.keys(), write)); - ((AccordCommand)command).setResult(txn.query().compute(command.txnId(), readData, txn.read(), txn.update())); - })); + AtomicReference> result = new AtomicReference<>(); + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), txn.keys()), + safeStore -> { + TxnRead read = (TxnRead) txn.read(); + Data readData = read.keys().stream().map(key -> { + try + { + return AsyncChains.getBlocking(read.read(key, txn.kind(), safeStore, executeAt, null)); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + }) + .reduce(null, TxnData::merge); + Write write = txn.update().apply(readData); + result.set(Pair.create(new Writes(executeAt, (Keys)txn.keys(), write), + txn.query().compute(txnId, readData, txn.read(), txn.update()))); + })); + return result.get(); } + public static Txn createTxn(String query) { return createTxn(query, QueryOptions.DEFAULT); @@ -187,7 +269,12 @@ public static Txn createTxn(int key) public static Ranges fullRange(Txn txn) { - PartitionKey key = (PartitionKey) txn.keys().get(0); + return fullRange(txn.keys()); + } + + public static Ranges fullRange(Seekables keys) + { + PartitionKey key = (PartitionKey) keys.get(0); return Ranges.of(TokenRange.fullRange(key.keyspace())); } @@ -246,6 +333,7 @@ public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupp cs -> NOOP_PROGRESS_LOG, new SingleEpochRanges(topology.rangesForNode(node))); } + public static AccordCommandStore createAccordCommandStore(LongSupplier now, String keyspace, String table) { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); @@ -272,4 +360,15 @@ public static void execute(AccordCommandStore commandStore, Runnable runnable) throw new RuntimeException(e.getCause()); } } + + public static PartitionKey key(TableMetadata table, int key) + { + DecoratedKey dk = table.partitioner.decorateKey(Int32Type.instance.decompose(key)); + return new PartitionKey(table.keyspace, table.id, dk); + } + + public static Keys keys(TableMetadata table, int... keys) + { + return Keys.of(IntStream.of(keys).mapToObj(key -> key(table, key)).collect(Collectors.toList())); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 3cdfceda182c..db0dbd6af55c 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import java.util.function.Function; import com.google.common.collect.ImmutableList; @@ -30,28 +32,37 @@ import org.junit.BeforeClass; import org.junit.Test; -import accord.local.Status; +import accord.impl.CommandsForKey; +import accord.local.Command; import accord.primitives.PartialTxn; +import accord.primitives.RoutableKey; import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommand; import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordLoadingState; +import org.apache.cassandra.service.accord.AccordSafeCommand; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.async.AsyncOperation.Context; import org.apache.cassandra.utils.concurrent.AsyncPromise; -import static com.google.common.collect.Iterables.getOnlyElement; import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.notWitnessed; +import static org.apache.cassandra.service.accord.AccordTestUtils.commandsForKey; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; import static org.apache.cassandra.service.accord.AccordTestUtils.execute; +import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; +import static org.apache.cassandra.service.accord.AccordTestUtils.testLoad; +import static org.apache.cassandra.service.accord.AccordTestUtils.testableLoad; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; public class AsyncLoaderTest @@ -73,30 +84,37 @@ public void cachedTest() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + commandStore.executeBlocking(() -> commandStore.setCacheSize(1024)); + + AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire / release - AccordCommand command = commandCache.getOrCreate(txnId).initialize(); - command.setPartialTxn(txn); - commandCache.release(command); - AccordCommandsForKey cfk = cfkCacche.getOrCreate(key).initialize(); - cfkCacche.release(cfk); - AsyncContext context = new AsyncContext(); + AccordSafeCommand safeCommand = commandCache.reference(txnId); + testLoad(safeCommand, notWitnessed(txnId, txn)); + commandCache.release(safeCommand); + + AccordSafeCommandsForKey safeCfk = cfkCache.reference(key); + testLoad(safeCfk, commandsForKey(key)); + cfkCache.release(safeCfk); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { + Context context = new Context(); boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertEquals(safeCommand.global(), context.commands.get(txnId).global()); + Assert.assertEquals(safeCfk.global(), context.commandsForKeys.get(key).global()); Assert.assertTrue(result); }); - Assert.assertSame(command, context.commands.get(txnId)); - Assert.assertSame(cfk, context.commandsForKey.get(key)); + Assert.assertSame(safeCommand.global(), commandCache.getUnsafe(txnId)); + Assert.assertSame(safeCfk.global(), cfkCache.getUnsafe(key)); } /** @@ -107,26 +125,32 @@ public void loadTest() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // create / persist - AccordCommand command = new AccordCommand(txnId).initialize(); - command.setPartialTxn(txn); - AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); - AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); + AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); + safeCommand.preExecute(); + safeCommand.set(notWitnessed(txnId, txn)); + AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); + + AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); + safeCommand.preExecute(); + cfk.set(commandsForKey(key)); AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncContext context = new AsyncContext(); AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); AsyncPromise cbFired = new AsyncPromise<>(); + Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -137,6 +161,8 @@ public void loadTest() // then return immediately after the callback has fired commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); Assert.assertTrue(result); }); } @@ -149,26 +175,31 @@ public void partialLoadTest() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire /release, create / persist - AccordCommand command = commandCache.getOrCreate(txnId).initialize(); - command.setPartialTxn(txn); - commandCache.release(command); - AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); - AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + AccordSafeCommand safeCommand = commandCache.reference(txnId); + testLoad(safeCommand, notWitnessed(txnId, txn)); + commandCache.release(safeCommand); + + + AccordSafeCommandsForKey safeCfk = new AccordSafeCommandsForKey(loaded(key, null)); + safeCfk.set(commandsForKey(key)); + AccordKeyspace.getCommandsForKeyMutation(commandStore, safeCfk, commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncContext context = new AsyncContext(); AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); AsyncPromise cbFired = new AsyncPromise<>(); + Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -178,7 +209,10 @@ public void partialLoadTest() // then return immediately after the callback has fired commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); Assert.assertTrue(result); }); } @@ -187,98 +221,59 @@ public void partialLoadTest() * If another process is loading a resource, piggyback on it's future */ @Test - public void inProgressLoadTest() + public void inProgressLoadTest() throws Throwable { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); + commandStore.executor().submit(() -> commandStore.setCacheSize(1024)).get(); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire / release - AccordCommand command = commandCache.getOrCreate(txnId).initialize(); - command.setPartialTxn(txn); - commandCache.release(command); - AccordCommandsForKey cfk = cfkCacche.getOrCreate(key).initialize(); - cfkCacche.release(cfk); + AccordSafeCommand safeCommand = commandCache.reference(txnId); + Assert.assertEquals(AccordLoadingState.LoadingState.UNINITIALIZED, safeCommand.loadingState()); + Runnable load = safeCommand.load(testableLoad(safeCommand.key(), notWitnessed(txnId, txn))); + Assert.assertEquals(AccordLoadingState.LoadingState.PENDING, safeCommand.loadingState()); + Assert.assertTrue(commandCache.isReferenced(txnId)); + Assert.assertFalse(commandCache.isLoaded(txnId)); + + AccordSafeCommandsForKey safeCfk = cfkCache.reference(key); + testLoad(safeCfk, commandsForKey(key)); + cfkCache.release(safeCfk); - AsyncContext context = new AsyncContext(); AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); // since there's a read future associated with the txnId, we'll wait for it to load - AsyncResult.Settable readFuture = AsyncResults.settable(); - commandCache.setLoadResult(command.txnId(), readFuture); - AsyncPromise cbFired = new AsyncPromise<>(); + Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); }); Assert.assertFalse(cbFired.isSuccess()); - readFuture.setSuccess(null); + load.run(); + Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, safeCommand.loadingState()); cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); Assert.assertTrue(cbFired.isSuccess()); // then return immediately after the callback has fired commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(context.commandsForKeys.containsKey(key)); Assert.assertTrue(result); }); } - @Test - public void pendingWriteOnlyApplied() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - TxnId blockApply = txnId(1, clock.incrementAndGet(), 1); - TxnId blockCommit = txnId(1, clock.incrementAndGet(), 1); - PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); - - AccordCommand command = new AccordCommand(txnId).initialize(); - command.setPartialTxn(txn); - command.setExecuteAt(txnId); - command.setStatus(Status.Committed); - AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); - command.clearModifiedFlag(); - - execute(commandStore, () -> { - AccordStateCache.Instance cache = commandStore.commandCache(); - AccordCommand.WriteOnly writeOnly1 = new AccordCommand.WriteOnly(txnId); - writeOnly1.blockingApplyOn.blindAdd(blockApply); - writeOnly1.asyncResult(AsyncResults.settable()); - cache.addWriteOnly(writeOnly1); - - AccordCommand.WriteOnly writeOnly2 = new AccordCommand.WriteOnly(txnId); - writeOnly2.blockingCommitOn.blindAdd(blockCommit); - writeOnly2.asyncResult(AsyncResults.settable()); - cache.addWriteOnly(writeOnly2); - - AsyncContext context = new AsyncContext(); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId), Collections.emptyList()); - while (true) - { - if (loader.load(context, (o, t) -> Assert.assertNull(t))) - break; - } - AccordCommand loaded = context.commands.get(txnId); - - Assert.assertEquals(txnId, loaded.executeAt()); - Assert.assertEquals(Status.Committed, loaded.status()); - Assert.assertEquals(blockApply, Iterables.getOnlyElement(loaded.blockingApplyOn.getView())); - Assert.assertEquals(blockCommit, Iterables.getOnlyElement(loaded.blockingCommitOn.getView())); - }); - } - @Test public void failedLoadTest() throws Throwable { @@ -288,30 +283,35 @@ public void failedLoadTest() throws Throwable TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); AsyncResult.Settable promise1 = AsyncResults.settable(); + AtomicReference> consumer1 = new AtomicReference<>(); AsyncResult.Settable promise2 = AsyncResults.settable(); + AtomicReference> consumer2 = new AtomicReference<>(); AsyncResult.Settable callback = AsyncResults.settable(); RuntimeException failure = new RuntimeException(); execute(commandStore, () -> { - AsyncContext context = new AsyncContext(); AtomicInteger loadCalls = new AtomicInteger(); AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Collections.emptyList()){ + @Override - Function> loadCommandFunction(Object callback) + Function loadCommandFunction() { - return cmd -> { - TxnId txnId = cmd.txnId(); + return txnId -> { loadCalls.incrementAndGet(); if (txnId.equals(txnId1)) - return promise1; + { + throw failure; + } if (txnId.equals(txnId2)) - return promise2; + { + return notWitnessed(txnId, null); + } throw new AssertionError("Unknown txnId: " + txnId); }; } }; - boolean result = loader.load(context, (u, t) -> { + boolean result = loader.load(new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); Assert.assertNull(u); Assert.assertEquals(failure, t); @@ -322,6 +322,6 @@ Function> loadCommandFunction(Object callback) }); promise1.tryFailure(failure); - AsyncResults.awaitUninterruptibly(callback); + AsyncChains.getUninterruptibly(callback); } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 53da7b29951b..b2ef10660131 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -18,52 +18,91 @@ package org.apache.cassandra.service.accord.async; +import java.time.Duration; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.RoutingKey; +import accord.impl.SafeCommandsForKey; import accord.local.Command; +import accord.local.Commands; +import accord.local.PreLoadContext; +import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.FullRoute; import accord.primitives.Keys; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.RoutableKey; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.transform.FilteredPartitions; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommand; import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandStore.SafeAccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordSafeCommand; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FBUtilities; +import org.assertj.core.api.Assertions; +import org.awaitility.Awaitility; +import org.mockito.Mockito; import static accord.local.PreLoadContext.contextFor; -import static accord.utils.async.AsyncChains.awaitUninterruptibly; -import static java.util.Collections.emptyList; +import static accord.utils.Property.qt; +import static accord.utils.async.AsyncChains.getUninterruptibly; import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.keys; +import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; public class AsyncOperationTest { + private static final Logger logger = LoggerFactory.getLogger(AsyncOperationTest.class); private static final AtomicLong clock = new AtomicLong(0); @BeforeClass @@ -94,8 +133,8 @@ public void optionalCommandTest() throws Throwable Txn txn = createTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - awaitUninterruptibly(commandStore.execute(contextFor(txnId), instance -> { - Command command = instance.ifPresent(txnId); + getUninterruptibly(commandStore.execute(contextFor(txnId), instance -> { + SafeCommand command = instance.ifPresent(txnId); Assert.assertNull(command); })); @@ -110,8 +149,8 @@ public void optionalCommandsForKeyTest() throws Throwable Txn txn = createTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - awaitUninterruptibly(commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)),instance -> { - AccordCommandsForKey cfk = ((SafeAccordCommandStore)instance).maybeCommandsForKey(key); + getUninterruptibly(commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)), instance -> { + SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeCommandsForKey(key); Assert.assertNull(cfk); })); @@ -124,24 +163,65 @@ public void optionalCommandsForKeyTest() throws Throwable } } - private static AccordCommand createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) + private static Command createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) { - AccordCommand command = new AccordCommand(txnId).initialize(); - command.setPartialTxn(createPartialTxn(0)); - command.setExecuteAt(executeAt); - command.setStatus(Status.Committed); - AccordKeyspace.getCommandMutation(commandStore, command, commandStore.nextSystemTimestampMicros()).apply(); - command.clearModifiedFlag(); + Command command = AccordTestUtils.Commands.committed(txnId, createPartialTxn(0), executeAt); + AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); + safeCommand.set(command); + AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); return command; } - private static AccordCommand createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId) + private static Command createCommittedAndPersist(AccordCommandStore commandStore, TxnId txnId) { return createCommittedAndPersist(commandStore, txnId, txnId); } - private static void assertFutureState(AccordStateCache.Instance cache, TxnId txnId, boolean expectLoadFuture, boolean expectSaveFuture) + private static Command createCommittedUsingLifeCycle(AccordCommandStore commandStore, TxnId txnId) + { + return createCommittedUsingLifeCycle(commandStore, txnId, txnId); + } + + private static Command createCommittedUsingLifeCycle(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) + { + PartialTxn partialTxn = createPartialTxn(0); + RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); + FullRoute route = partialTxn.keys().toRoute(routingKey); + Ranges ranges = AccordTestUtils.fullRange(partialTxn.keys()); + PartialRoute partialRoute = route.slice(ranges); + PartialDeps deps = PartialDeps.builder(ranges).build(); + try + { + return getUninterruptibly(commandStore.submit(PreLoadContext.contextFor(Collections.singleton(txnId), partialTxn.keys()), safe -> { + Commands.AcceptOutcome result = Commands.preaccept(safe, txnId, partialTxn, route, null); + if (result != Commands.AcceptOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); + + result = Commands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); + if (result != Commands.AcceptOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); + + Commands.CommitOutcome commit = Commands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); + if (commit != Commands.CommitOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); + + // clear cache + long cacheSize = commandStore.getCacheSize(); + commandStore.setCacheSize(0); + commandStore.setCacheSize(cacheSize); + + return safe.command(txnId).current(); + }).beginAsResult()); + } + catch (ExecutionException e) + { + throw new AssertionError(e); + } + } + + private static void assertFutureState(AccordStateCache.Instance cache, TxnId txnId, boolean referenceExpected, boolean expectLoadFuture, boolean expectSaveFuture) { + if (cache.isReferenced(txnId) != referenceExpected) + throw new AssertionError(referenceExpected ? "Cache reference unexpectedly not found for " + txnId + : "Unexpectedly found cache reference for " + txnId); + cache.cleanupLoadResult(txnId); if (cache.hasLoadResult(txnId) != expectLoadFuture) throw new AssertionError(expectLoadFuture ? "Load future unexpectedly not found for " + txnId : "Unexpectedly found load future for " + txnId); @@ -155,27 +235,28 @@ private static void assertFutureState(AccordStateCache.Instance consumer = instance -> ((AccordCommand)instance.command(txnId)).setStatus(Status.PreApplied); - AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, singleton(txnId), emptyList(), consumer) + Consumer consumer = safeStore -> safeStore.command(txnId).readyToExecute(); + PreLoadContext ctx = PreLoadContext.contextFor(singleton(txnId), Keys.EMPTY); + AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) { - private AccordStateCache.Instance cache() + private AccordStateCache.Instance cache() { return commandStore.commandCache(); } @Override - AsyncLoader createAsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds, keys) { + return new AsyncLoader(commandStore, preLoadContext.txnIds(), (Iterable) preLoadContext.keys()) { @Override void state(State state) @@ -183,11 +264,14 @@ void state(State state) switch (state) { case SETUP: + assertFutureState(cache(), txnId, false, false, false); + break; case FINISHED: - assertFutureState(cache(), txnId, false, false); + assertFutureState(cache(), txnId, true, false, false); break; case LOADING: - assertFutureState(cache(), txnId, true, false); + assertFutureState(cache(), txnId, true, true, false); + break; } super.state(state); } @@ -205,11 +289,14 @@ void setState(State state) switch (state) { case SETUP: + assertFutureState(cache(), txnId, true, false, false); + break; case FINISHED: - assertFutureState(cache(), txnId, false, false); + assertFutureState(cache(), txnId, false, false, false); break; case SAVING: - assertFutureState(cache(), txnId, false, true); + assertFutureState(cache(), txnId, true, false, true); + break; } super.setState(state); @@ -220,6 +307,298 @@ void setState(State state) commandStore.executor().submit(operation); - awaitUninterruptibly(operation); + getUninterruptibly(operation); + } + + @Test + public void loadFail() + { + AtomicLong clock = new AtomicLong(0); + // all txn use the same key; 0 + Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + commandStore.executeBlocking(() -> commandStore.setCacheSize(0)); + Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); + + qt().withPure(false).withExamples(50).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { + before(); // truncate tables + + createCommand(commandStore, rs, ids); + + Map failed = selectFailedTxn(rs, ids); + + assertNoReferences(commandStore, ids, keys); + + PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + + Consumer consumer = Mockito.mock(Consumer.class); + + AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) + { + @Override + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) + { + return new AsyncLoader(commandStore, preLoadContext.txnIds(), (Iterable) preLoadContext.keys()) + { + @Override + Function loadCommandFunction() + { + Function delegate = super.loadCommandFunction(); + return txnId -> { + logger.info("Attempting to load {}; expected to fail? {}", txnId, failed.get(txnId)); + if (!failed.get(txnId)) return delegate.apply(txnId); + + throw new NullPointerException("txn_id " + txnId); + }; + } + }; + } + }; + + AssertionUtils.assertThatThrownBy(() -> getUninterruptibly(o1)) + .hasRootCause() + .isInstanceOf(NullPointerException.class) + .hasNoSuppressedExceptions(); + + Mockito.verifyNoInteractions(consumer); + + assertNoReferences(commandStore, ids, keys); + // the first failed load causes the whole operation to fail, so some ids may still be pending + // to make sure the next operation does not see a PENDING that will fail, wait for all loads to complete + awaitDone(commandStore, ids, keys); + + // can we recover? + AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.command(id).readyToExecute())); + getUninterruptibly(o2); + }); + } + + @Test + public void consumerFails() + { + AtomicLong clock = new AtomicLong(0); + // all txn use the same key; 0 + Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); + + AtomicInteger counter = new AtomicInteger(); + qt().withPure(false).withSeed(3131884991952253478L).withExamples(100).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { + logger.info("Test #{}", counter.incrementAndGet()); + before(); // truncate tables + + createCommand(commandStore, rs, ids); + assertNoReferences(commandStore, ids, keys); + + PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + + Consumer consumer = Mockito.mock(Consumer.class); + String errorMsg = "txn_ids " + ids; + Mockito.doThrow(new NullPointerException(errorMsg)).when(consumer).accept(Mockito.any()); + + AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, ctx, consumer); + + AssertionUtils.assertThatThrownBy(() -> getUninterruptibly(operation)) + .hasRootCause() + .isInstanceOf(NullPointerException.class) + .hasMessage(errorMsg) + .hasNoSuppressedExceptions(); + + assertNoReferences(commandStore, ids, keys); + }); + } + + @Test + public void writeFail() + { + AtomicLong clock = new AtomicLong(0); + // all txn use the same key; 0 + Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); + + qt().withExamples(100).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { + before(); // truncate tables + + createCommand(commandStore, rs, ids); + + Map failed = selectFailedTxn(rs, ids); + + assertNoReferences(commandStore, ids, keys); + + PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + + Consumer consumer = store -> ids.forEach(id -> store.command(id).readyToExecute()); + + AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) + { + @Override + AsyncWriter createAsyncWriter(AccordCommandStore commandStore) + { + return new AsyncWriter(commandStore) + { + @Override + protected AsyncWriter.StateMutationFunction writeCommandFunction() + { + StateMutationFunction delegate = super.writeCommandFunction(); + return (store, updated, timestamp) -> { + if (!failed.get(updated.txnId())) return delegate.apply(store, updated, timestamp); + + + Mutation mutation = Mockito.mock(Mutation.class); + Mockito.doThrow(new NullPointerException("txn_id " + updated.txnId())).when(mutation).apply(); + return mutation; + }; + } + }; + } + }; + + Assertions.assertThatThrownBy(() -> getUninterruptibly(o1)); + + + assertNoReferences(commandStore, ids, keys); + assertCanNotEvict(commandStore.commandCache(), failed.entrySet().stream() + .filter(e -> e.getValue()) + .map(e -> e.getKey()) + .collect(Collectors.toList())); + // first write will fail the operation, so make sure to wait for all write results + awaitSaveResult(commandStore.cache()); + + // the command should be ReadyToExecute, so move it forward and allow the save + AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> { + SafeCommand command = store.command(id); + Command current = command.current(); + Assertions.assertThat(current.status()).isEqualTo(Status.ReadyToExecute); + Writes writes = current.partialTxn().execute(current.executeAt(), new TxnData()); + command.preapplied(current, current.txnId(), current.asCommitted().waitingOn(), writes, null); + })); + getUninterruptibly(o2); + + assertNoReferences(commandStore, ids, keys); + assertCanEvict(commandStore.commandCache(), ids); + assertCanEvict(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); + }); + } + + private static void createCommand(AccordCommandStore commandStore, Gen.Random rs, List ids) + { + // to simulate CommandsForKey not being found, use createCommittedAndPersist periodically as it does not update + if (rs.nextBoolean()) ids.forEach(id -> createCommittedAndPersist(commandStore, id)); + else ids.forEach(id -> createCommittedUsingLifeCycle(commandStore, id)); + commandStore.clearCache(); + } + + private static Map selectFailedTxn(Gen.Random rs, List ids) + { + Map failed = Maps.newHashMapWithExpectedSize(ids.size()); + for (TxnId id : ids) + failed.put(id, rs.nextBoolean()); + if (failed.values().stream().allMatch(b -> b == Boolean.FALSE)) + failed.put(ids.get(0), Boolean.TRUE); + return failed; + } + + private static void assertNoReferences(AccordCommandStore commandStore, List ids, Keys keys) + { + AssertionError error = null; + try + { + assertNoReferences(commandStore.commandCache(), ids); + } + catch (AssertionError e) + { + error = e; + } + try + { + //TODO this is due to bad typing for Instance, it doesn't use ? extends RoutableKey + assertNoReferences(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); + } + catch (AssertionError e) + { + if (error == null) error = e; + else error.addSuppressed(e); + } + if (error != null) throw error; + } + + private static void assertNoReferences(AccordStateCache.Instance cache, Iterable keys) + { + AssertionError error = null; + for (T key : keys) + { + AccordStateCache.Node node = cache.getUnsafe(key); + if (node == null) continue; + try + { + Assertions.assertThat(node.referenceCount()) + .describedAs("Key %s found referenced in cache", key) + .isEqualTo(0); + } + catch (AssertionError e) + { + if (error == null) + { + error = e; + } + else + { + error.addSuppressed(e); + } + } + } + if (error != null) throw error; + } + + private static void awaitDone(AccordCommandStore commandStore, List ids, Keys keys) + { + awaitDone(commandStore.commandCache(), ids); + //TODO this is due to bad typing for Instance, it doesn't use ? extends RoutableKey + awaitDone(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); + } + + private static void awaitDone(AccordStateCache.Instance cache, Iterable keys) + { + for (T key : keys) + { + AccordStateCache.Node node = cache.getUnsafe(key); + if (node == null) continue; + Awaitility.await("For node " + node.key() + " to complete") + .atMost(Duration.ofMinutes(1)) + .until(() -> node.isComplete()); + } + } + + private static void awaitSaveResult(AccordStateCache cache) + { + for (Map.Entry> e : cache.saveResults().entrySet()) + AsyncChains.awaitUninterruptibly(e.getValue()); + } + + private static void assertCanEvict(AccordStateCache.Instance cache, Iterable keys) + { + for (T key : keys) + { + AccordStateCache.Node node = cache.getUnsafe(key); + if (node == null) + continue; + Assert.assertTrue("Unable to evict " + node.key(), cache.canEvict(node.key())); + } + } + + private static void assertCanNotEvict(AccordStateCache.Instance cache, Iterable keys) + { + List errors = new ArrayList<>(); + for (T key : keys) + { + if (cache.getUnsafe(key) == null) + { + errors.add(String.format("Node %s was evicted, but should not be", key)); + continue; + } + if (cache.canEvict(key)) errors.add(String.format("Node %s is evictable but should not be", key)); + } + if (!errors.isEmpty()) throw new AssertionError(String.join("\n", errors)); } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java deleted file mode 100644 index 8526daa67552..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncWriterTest.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.async; - -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; - -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import accord.local.Command; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.primitives.Ranges; -import accord.primitives.Timestamp; -import accord.primitives.Txn; -import accord.primitives.TxnId; -import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommand; -import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordCommandsForKey; -import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordPartialCommand; -import org.apache.cassandra.service.accord.api.PartitionKey; - -import static accord.local.PreLoadContext.contextFor; -import static com.google.common.collect.Iterables.getOnlyElement; -import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; -import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; -import static org.apache.cassandra.service.accord.AccordTestUtils.execute; -import static org.apache.cassandra.service.accord.AccordTestUtils.fullRange; -import static org.apache.cassandra.service.accord.AccordTestUtils.timestamp; -import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; - -public class AsyncWriterTest -{ - @BeforeClass - public static void beforeClass() throws Throwable - { - SchemaLoader.prepareServer(); - SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); - StorageService.instance.initServer(); - } - - private static void save(AccordCommandStore commandStore, AsyncContext context) - { - execute(commandStore, () -> { - AsyncWriter writer = new AsyncWriter(commandStore); - while (true) - { - if (writer.save(context, (o, t) -> Assert.assertNull(t))) - break; - } - }); - context.commands.items.values().forEach(AccordCommand::clearModifiedFlag); - context.commandsForKey.items.values().forEach(AccordCommandsForKey::clearModifiedFlag); - } - - @Test - public void waitingOnDenormalization() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - TxnId blockingId = txnId(1, clock.incrementAndGet(), 1); - TxnId waitingId = txnId(1, clock.incrementAndGet(), 1); - Txn txn = createTxn(0); - Ranges ranges = fullRange(txn); - AccordCommand blocking = new AccordCommand(blockingId).initialize(); - blocking.setPartialTxn(txn.slice(ranges, true)); - blocking.setExecuteAt(blockingId); - blocking.setStatus(Status.Committed); - AccordKeyspace.getCommandMutation(commandStore, blocking, commandStore.nextSystemTimestampMicros()).apply(); - blocking.clearModifiedFlag(); - - AccordCommand waiting = new AccordCommand(waitingId).initialize(); - waiting.setPartialTxn(txn.slice(ranges, true)); - waiting.setExecuteAt(waitingId); - waiting.setStatus(Status.Committed); - AccordKeyspace.getCommandMutation(commandStore, waiting, commandStore.nextSystemTimestampMicros()).apply(); - waiting.clearModifiedFlag(); - - AsyncContext context = new AsyncContext(); - waiting.addWaitingOnApplyIfAbsent(blocking.txnId(), blocking.executeAt()); - context.commands.add(waiting); - save(commandStore, context); - - // load the blocking command and confirm the waiting command is listed as being blocked - blocking = AccordKeyspace.loadCommand(commandStore, blockingId); - Assert.assertTrue(blocking.blockingApplyOn.getView().contains(waitingId)); - - // now change the blocking command and check its changes are reflected in the waiting command - context = new AsyncContext(); - blocking.setStatus(Status.ReadyToExecute); - context.commands.add(blocking); - save(commandStore, context); - - waiting = AccordKeyspace.loadCommand(commandStore, waitingId); - AccordCommand waitingFinal = waiting; - execute(commandStore, () -> { - AsyncContext ctx = new AsyncContext(); - commandStore.setContext(ctx); - TxnId blockingSummary = waitingFinal.firstWaitingOnApply(null); - Assert.assertEquals(blockingId, blockingSummary); - commandStore.unsetContext(ctx); - }); - } - - @Test - public void commandsPerKeyDenormalization() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); - Txn txn = createTxn(0); - Ranges ranges = fullRange(txn); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); - - AccordCommandsForKey cfk = new AccordCommandsForKey(commandStore, key).initialize(); - AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); - Assert.assertTrue(cfk.byExecuteAt.isEmpty()); - Assert.assertTrue(cfk.byId.isEmpty()); - - AccordCommand command = new AccordCommand(txnId).initialize(); - command.setPartialTxn(txn.slice(ranges, true)); - command.setExecuteAt(executeAt); - command.setSaveStatus(SaveStatus.AcceptedWithDefinition); - AsyncContext context = new AsyncContext(); - context.commands.add(command); - save(commandStore, context); - - AccordCommandsForKey cfkUncommitted = AccordKeyspace.loadCommandsForKey(commandStore, key); - execute(commandStore, () -> { - AsyncContext ctx = new AsyncContext(); - commandStore.setContext(ctx); - AccordPartialCommand summary = getOnlyElement(cfkUncommitted.byId().all().collect(Collectors.toList())); - Assert.assertTrue(cfkUncommitted.byId.map.getView().containsKey(txnId)); - Assert.assertTrue(cfkUncommitted.byExecuteAt.map.getView().containsKey(executeAt)); - Assert.assertEquals(Status.Accepted, summary.status()); - Assert.assertEquals(executeAt, summary.executeAt()); - commandStore.unsetContext(ctx); - }); - - // commit, summary should be moved to committed maps - command.setStatus(Status.Committed); - context = new AsyncContext(); - context.commands.add(command); - save(commandStore, context); - - AccordCommandsForKey cfkCommitted = AccordKeyspace.loadCommandsForKey(commandStore, key); - execute(commandStore, () -> { - AsyncContext ctx = new AsyncContext(); - commandStore.setContext(ctx); - AccordPartialCommand idSummary = getOnlyElement(cfkCommitted.byId().all().collect(Collectors.toList())); - AccordPartialCommand executeSummary = getOnlyElement(cfkCommitted.byExecuteAt().all().collect(Collectors.toList())); - - Assert.assertTrue(cfkCommitted.byId.map.getView().containsKey(txnId)); - Assert.assertTrue(cfkCommitted.byExecuteAt.map.getView().containsKey(executeAt)); - Assert.assertEquals(idSummary, executeSummary); - - Assert.assertEquals(Status.Committed, idSummary.status()); - Assert.assertEquals(executeAt, idSummary.executeAt()); - commandStore.unsetContext(ctx); - }); - } - - @Test - public void partialCommandDenormalization() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - TxnId blockingId = txnId(1, clock.incrementAndGet(), 1); - TxnId waitingId = txnId(1, clock.incrementAndGet(), 1); - Txn txn = createTxn(0); - Ranges ranges = fullRange(txn); - - { - AccordCommand blocking = new AccordCommand(blockingId).initialize(); - blocking.setPartialTxn(txn.slice(ranges, true)); - blocking.setExecuteAt(blockingId); - blocking.setStatus(Status.Committed); - - AccordCommand waiting = new AccordCommand(waitingId).initialize(); - waiting.setPartialTxn(txn.slice(ranges, true)); - waiting.setExecuteAt(waitingId); - waiting.setStatus(Status.Committed); - waiting.addWaitingOnApplyIfAbsent(blocking.txnId(), blocking.executeAt()); - - blocking.addListener(waiting); - - AccordKeyspace.getCommandMutation(commandStore, blocking, commandStore.nextSystemTimestampMicros()).apply(); - AccordKeyspace.getCommandMutation(commandStore, waiting, commandStore.nextSystemTimestampMicros()).apply(); - blocking.clearModifiedFlag(); - waiting.clearModifiedFlag(); - } - - // confirm the blocking operation has the waiting one as a listener - commandStore.execute(contextFor(blockingId), cs -> { - AccordCommand blocking = (AccordCommand) cs.command(blockingId); - Assert.assertTrue(blocking.hasListenerFor(waitingId)); - }); - - // remove listener from PartialCommand - commandStore.execute(contextFor(waitingId), cs -> { - Command waiting = cs.command(waitingId); - TxnId blocking = ((AccordCommand)waiting).firstWaitingOnApply(null); - Assert.assertNotNull(blocking); - Assert.assertEquals(blockingId, blocking); - }); - - // confirm it was propagated to the full command - commandStore.execute(contextFor(blockingId), cs -> { - AccordCommand blocking = (AccordCommand) cs.command(blockingId); - Assert.assertFalse(blocking.hasListenerFor(waitingId)); - }); - } -} diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java new file mode 100644 index 000000000000..1cc800f5dc40 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.nio.ByteBuffer; +import java.util.List; + +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.impl.CommandsForKey; +import accord.primitives.TxnId; +import accord.utils.Gens; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.AccordGenerators; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.assertj.core.api.Assertions.assertThat; + +public class CommandsForKeySerializerTest +{ + @BeforeClass + public static void beforeClass() throws Throwable + { + // need to create the accord test table as generating random txn is not currently supported + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + } + + @Test + public void serdeDeps() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + int version = MessagingService.Version.VERSION_40.value; + qt().forAll(Gens.lists(AccordGenerators.ids()).ofSizeBetween(0, 10)).check(ids -> { + buffer.clear(); + + long expectedSize = CommandsForKeySerializer.depsIdSerializer.serializedSize(ids, version); + + CommandsForKeySerializer.depsIdSerializer.serialize(ids, buffer, version); + assertThat(buffer.position()).isEqualTo(expectedSize); + try (DataInputBuffer in = new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)) + { + List read = CommandsForKeySerializer.depsIdSerializer.deserialize(in, version); + assertThat(read).isEqualTo(ids); + } + }); + } + + @Test + public void serde() + { + CommandsForKey.CommandLoader loader = CommandsForKeySerializer.loader; + qt().forAll(AccordGenerators.commands()).check(cmd -> { + ByteBuffer bb = loader.saveForCFK(cmd); + int size = bb.remaining(); + + assertThat(loader.txnId(bb)).isEqualTo(cmd.txnId()); + assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + + assertThat(loader.executeAt(bb)).isEqualTo(cmd.executeAt()); + assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + + assertThat(loader.saveStatus(bb)).isEqualTo(cmd.saveStatus()); + assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + + assertThat(loader.depsIds(bb)).isEqualTo(cmd.partialDeps() == null ? null : cmd.partialDeps().txnIds()); + assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java deleted file mode 100644 index d7062a5e0874..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/store/StoredMapTest.java +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Collections; -import java.util.HashSet; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeMap; - -import org.junit.Assert; -import org.junit.Test; - -import org.apache.cassandra.service.accord.AccordState; - -import static org.apache.cassandra.service.accord.store.StoredValueTest.assertISE; - -public class StoredMapTest -{ - - private static NavigableMap getAdditions(StoredNavigableMap map) - { - NavigableMap result = new TreeMap<>(); - map.forEachAddition(result::put); - return result; - } - - private static Set getDeletions(StoredNavigableMap map) - { - Set result = new HashSet<>(); - map.forEachDeletion(result::add); - return result; - } - - @Test - public void loadMap() - { - NavigableMap expectedData = new TreeMap<>(); - expectedData.put(1, 2); - expectedData.put(5, 6); - - StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); - - // no values loaded, getView should fail - assertISE(map::getView); - - map.load(new TreeMap<>(expectedData)); - Assert.assertEquals(expectedData, map.getView()); - Assert.assertFalse(map.hasModifications()); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - // check additions - NavigableMap expectedAdditions = new TreeMap<>(); - expectedAdditions.put(3, 4); - expectedData.put(3, 4); - map.blindPut(3, 4); - Assert.assertEquals(expectedData, map.getView()); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - Assert.assertEquals(expectedAdditions, getAdditions(map)); - - // check deletions - Set expectedDeletions = new HashSet<>(); - expectedDeletions.add(5); - expectedDeletions.add(6); - map.blindRemove(5); - map.blindRemove(6); - expectedData.remove(5); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertTrue(map.hasDeletions()); - - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - map.clearModifiedFlag(); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - Assert.assertTrue(getAdditions(map).isEmpty()); - Assert.assertTrue(getDeletions(map).isEmpty()); - - map.unload(); - assertISE(map::getView); - Assert.assertFalse(map.hasModifications()); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - } - - @Test - public void unloadedAddsAndRemoves() - { - StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); - assertISE(map::getView); - - // check additions - NavigableMap expectedAdditions = new TreeMap<>(); - expectedAdditions.put(3, 4); - map.blindPut(3, 4); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - Assert.assertEquals(expectedAdditions, getAdditions(map)); - - // check deletions - Set expectedDeletions = new HashSet<>(); - expectedDeletions.add(5); - expectedDeletions.add(6); - map.blindRemove(5); - map.blindRemove(6); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertTrue(map.hasDeletions()); - - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - // still shouldn't be able to read a complete map - assertISE(map::getView); - } - - // deleting a key should remove it from additions - @Test - public void additionDeletionCanceling() - { - NavigableMap expectedData = new TreeMap<>(); - NavigableMap expectedAdditions = new TreeMap<>(); - Set expectedDeletions = new HashSet<>(); - - StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); - map.load(new TreeMap<>()); - Assert.assertEquals(expectedData, map.getView()); - - // add - map.blindPut(1, 2); - map.blindPut(3, 4); - - expectedData.put(1, 2); - expectedData.put(3, 4); - expectedAdditions.put(1, 2); - expectedAdditions.put(3, 4); - - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - // remove - map.blindRemove(3); - expectedData.remove(3); - expectedAdditions.remove(3); - expectedDeletions.add(3); - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - - } - - @Test - public void clearMap() - { - NavigableMap expectedData = new TreeMap<>(); - NavigableMap expectedAdditions = new TreeMap<>(); - - expectedData.put(1, 2); - StoredNavigableMap map = new StoredNavigableMap<>(AccordState.ReadWrite.FULL); - map.load(new TreeMap<>(expectedData)); - Assert.assertEquals(expectedData, map.getView()); - - map.clear(); - expectedData.clear(); - Assert.assertEquals(expectedData, map.getView()); - - map.blindPut(3, 4); - map.blindPut(5, 6); - map.blindRemove(3); - - // since this will be written with a range tombstone, deletes shouldn't be tracked - expectedData.put(5, 6); - expectedAdditions.put(5, 6); - - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(Collections.emptySet(), getDeletions(map)); - } -} diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java deleted file mode 100644 index 8099dc4e97c8..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/store/StoredSetTest.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import java.util.Collections; -import java.util.HashSet; -import java.util.NavigableSet; -import java.util.Set; -import java.util.TreeSet; - -import org.junit.Assert; -import org.junit.Test; - -import org.apache.cassandra.service.accord.AccordState; - -import static org.apache.cassandra.service.accord.store.StoredValueTest.assertISE; - -public class StoredSetTest -{ - private static NavigableSet getAdditions(StoredSet.Navigable set) - { - NavigableSet result = new TreeSet<>(); - set.forEachAddition(result::add); - return result; - } - - private static Set getDeletions(StoredSet.Navigable set) - { - Set result = new HashSet<>(); - set.forEachDeletion(result::add); - return result; - } - - @Test - public void loadMap() - { - NavigableSet expected = new TreeSet<>(); - expected.add(1); - expected.add(5); - - StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); - - // no values loaded, getView should fail - assertISE(map::getView); - - map.load(new TreeSet<>(expected)); - Assert.assertEquals(expected, map.getView()); - Assert.assertFalse(map.hasModifications()); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - // check additions - NavigableSet expectedAdditions = new TreeSet<>(); - expectedAdditions.add(3); - expected.add(3); - map.blindAdd(3); - Assert.assertEquals(expected, map.getView()); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - Assert.assertEquals(expectedAdditions, getAdditions(map)); - - // check deletions - Set expectedDeletions = new HashSet<>(); - expectedDeletions.add(5); - expectedDeletions.add(6); - map.blindRemove(5); - map.blindRemove(6); - expected.remove(5); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertTrue(map.hasDeletions()); - - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - map.clearModifiedFlag(); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - Assert.assertTrue(getAdditions(map).isEmpty()); - Assert.assertTrue(getDeletions(map).isEmpty()); - - map.unload(); - assertISE(map::getView); - Assert.assertFalse(map.hasModifications()); - Assert.assertFalse(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - } - - @Test - public void unloadedAddsAndRemoves() - { - StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); - assertISE(map::getView); - - // check additions - NavigableSet expectedAdditions = new TreeSet<>(); - expectedAdditions.add(3); - map.blindAdd(3); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertFalse(map.hasDeletions()); - - Assert.assertEquals(expectedAdditions, getAdditions(map)); - - // check deletions - Set expectedDeletions = new HashSet<>(); - expectedDeletions.add(5); - expectedDeletions.add(6); - map.blindRemove(5); - map.blindRemove(6); - Assert.assertTrue(map.hasModifications()); - Assert.assertTrue(map.hasAdditions()); - Assert.assertTrue(map.hasDeletions()); - - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - // still shouldn't be able to read a complete map - assertISE(map::getView); - } - - // deleting a key should remove it from additions - @Test - public void additionDeletionCanceling() - { - NavigableSet expectedData = new TreeSet<>(); - NavigableSet expectedAdditions = new TreeSet<>(); - Set expectedDeletions = new HashSet<>(); - - StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); - map.load(new TreeSet<>()); - Assert.assertEquals(expectedData, map.getView()); - - // add - map.blindAdd(1); - map.blindAdd(3); - - expectedData.add(1); - expectedData.add(3); - expectedAdditions.add(1); - expectedAdditions.add(3); - - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - // remove - map.blindRemove(3); - expectedData.remove(3); - expectedAdditions.remove(3); - expectedDeletions.add(3); - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(expectedDeletions, getDeletions(map)); - - - } - - @Test - public void clearMap() - { - NavigableSet expectedData = new TreeSet<>(); - NavigableSet expectedAdditions = new TreeSet<>(); - - expectedData.add(1); - StoredSet.Navigable map = new StoredSet.Navigable<>(AccordState.ReadWrite.FULL); - map.load(new TreeSet<>(expectedData)); - Assert.assertEquals(expectedData, map.getView()); - - map.clear(); - expectedData.clear(); - Assert.assertEquals(expectedData, map.getView()); - - map.blindAdd(3); - map.blindAdd(5); - map.blindRemove(3); - - // since this will be written with a range tombstone, deletes shouldn't be tracked - expectedData.add(5); - expectedAdditions.add(5); - - Assert.assertEquals(expectedData, map.getView()); - Assert.assertEquals(expectedAdditions, getAdditions(map)); - Assert.assertEquals(Collections.emptySet(), getDeletions(map)); - } -} diff --git a/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java b/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java deleted file mode 100644 index d0250a982764..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/store/StoredValueTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.store; - -import org.junit.Assert; -import org.junit.Test; - -import org.apache.cassandra.service.accord.AccordState; - -public class StoredValueTest -{ - static void assertISE(Runnable runnable) - { - try - { - runnable.run(); - Assert.fail("Expected IllegalStateException"); - } - catch (IllegalStateException e) - { - // noop - } - } - - @Test - public void storedValueTest() - { - StoredValue value = new StoredValue<>(AccordState.ReadWrite.FULL); - // value is unloaded, read should fail - assertISE(value::get); - - value.load(5); - Assert.assertFalse(value.hasModifications()); - Assert.assertEquals(Integer.valueOf(5), value.get()); - - value.set(6); - Assert.assertTrue(value.hasModifications()); - Assert.assertEquals(Integer.valueOf(6), value.get()); - - // loading into an unsaved field should fail - assertISE(() -> value.load(7)); - - value.clearModifiedFlag(); - Assert.assertFalse(value.hasModifications()); - Assert.assertEquals(Integer.valueOf(6), value.get()); - - value.unload(); - // value is unloaded again, read should fail - assertISE(() -> value.get()); - } - - @Test - public void historyPreservingTest() - { - StoredValue.HistoryPreserving value = new StoredValue.HistoryPreserving<>(AccordState.ReadWrite.FULL); - value.load(5); - - Assert.assertEquals(Integer.valueOf(5), value.get()); - Assert.assertEquals(Integer.valueOf(5), value.previous()); - - value.set(6); - Assert.assertEquals(Integer.valueOf(6), value.get()); - Assert.assertEquals(Integer.valueOf(5), value.previous()); - - value.clearModifiedFlag(); - Assert.assertEquals(Integer.valueOf(6), value.get()); - Assert.assertEquals(Integer.valueOf(6), value.previous()); - } -} diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java new file mode 100644 index 000000000000..e85b6e3a51eb --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.function.ToIntFunction; +import java.util.function.ToLongFunction; + +import accord.local.Command; +import accord.local.Node; +import accord.primitives.PartialTxn; +import accord.primitives.Routable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.service.accord.AccordTestUtils; + +import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; + +public class AccordGenerators +{ + private AccordGenerators() {} + + public static Gen.LongGen epochs() + { + return Gens.longs().between(0, Timestamp.MAX_EPOCH); + } + + public static Gen ids() + { + return ids(epochs()::nextLong, Gen.Random::nextLong, Gen.Random::nextInt); + } + + public static Gen ids(ToLongFunction epochs, ToLongFunction hlcs, ToIntFunction nodes) + { + Gen kinds = Gens.enums().all(Txn.Kind.class); + Gen domains = Gens.enums().all(Routable.Domain.class); + return rs -> new TxnId(epochs.applyAsLong(rs), hlcs.applyAsLong(rs), kinds.next(rs), domains.next(rs), new Node.Id(nodes.applyAsInt(rs))); + } + + private enum SupportedCommandTypes { notWitnessed, preaccepted, committed } + + public static Gen commands() + { + Gen ids = ids(); + //TODO switch to Status once all types are supported + Gen supportedTypes = Gens.enums().all(SupportedCommandTypes.class); + //TODO goes against fuzz testing, and also limits to a very specific table existing... + // There is a branch that can generate random transactions, so maybe look into that? + PartialTxn txn = createPartialTxn(0); + return rs -> { + TxnId id = ids.next(rs); + Timestamp executeAt = id; + if (rs.nextBoolean()) + executeAt = ids.next(rs); + SupportedCommandTypes targetType = supportedTypes.next(rs); + switch (targetType) + { + case notWitnessed: return AccordTestUtils.Commands.notWitnessed(id, txn); + case preaccepted: return AccordTestUtils.Commands.preaccepted(id, txn, executeAt); + case committed: return AccordTestUtils.Commands.committed(id, txn, executeAt); + default: throw new UnsupportedOperationException("Unexpected type: " + targetType); + } + }; + } + +} diff --git a/test/unit/org/apache/cassandra/utils/AssertionUtils.java b/test/unit/org/apache/cassandra/utils/AssertionUtils.java index c122a95315b2..2ec07f756553 100644 --- a/test/unit/org/apache/cassandra/utils/AssertionUtils.java +++ b/test/unit/org/apache/cassandra/utils/AssertionUtils.java @@ -22,8 +22,12 @@ import com.google.common.base.Throwables; +import org.assertj.core.api.AbstractThrowableAssert; import org.assertj.core.api.Assertions; import org.assertj.core.api.Condition; +import org.assertj.core.api.ThrowableAssert; +import org.assertj.core.error.BasicErrorMessageFactory; +import org.assertj.core.internal.Failures; public class AssertionUtils { @@ -165,4 +169,31 @@ public boolean matches(Throwable value) } }; } + + public static ThrowableAssertPlus assertThatThrownBy(ThrowableAssert.ThrowingCallable fn) + { + return new ThrowableAssertPlus(Assertions.catchThrowable(fn)).hasBeenThrown(); + } + + public static class ThrowableAssertPlus extends AbstractThrowableAssert + { + public ThrowableAssertPlus(Throwable actual) + { + super(actual, ThrowableAssertPlus.class); + } + + @Override + protected ThrowableAssertPlus hasBeenThrown() + { + return super.hasBeenThrown(); + } + + public ThrowableAssertPlus hasRootCause() + { + Throwable cause = Throwables.getRootCause(actual); + if (cause == actual) + throw Failures.instance().failure(this.info, new BasicErrorMessageFactory("%nExpected a root cause but cause was null", new Object[0])); + return new ThrowableAssertPlus(cause); + } + } } From 6c62cc4ffadf02cace7da180bfba38ab5a9adfaf Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 9 Mar 2023 15:20:48 -0800 Subject: [PATCH 046/340] Remove git hook for pre-push as it is redundant and causes issues when merging to mainline patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-18309 --- .../git-hooks/pre-push/100-push-submodules.sh | 51 ------------------- .build/git/install-git-defaults.sh | 1 - 2 files changed, 52 deletions(-) delete mode 100755 .build/git/git-hooks/pre-push/100-push-submodules.sh diff --git a/.build/git/git-hooks/pre-push/100-push-submodules.sh b/.build/git/git-hooks/pre-push/100-push-submodules.sh deleted file mode 100755 index c3daa9559748..000000000000 --- a/.build/git/git-hooks/pre-push/100-push-submodules.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Redirect output to stderr. -exec 1>&2 - -#set -o xtrace -set -o errexit -set -o pipefail -set -o nounset - -bin="$(cd "$(dirname "$0")" > /dev/null; pwd)" - -_main() { - # In case the usage happens at a different layer, make sure to cd to the toplevel - local root_dir - root_dir="$(git rev-parse --show-toplevel)" - cd "$root_dir" - - if [[ ! -e .gitmodules ]]; then - # nothing to see here, look away! - return 0 - fi - - local -r cmd=' -branch="$(git rev-parse --abbrev-ref HEAD)" -[[ "$branch" == "HEAD" ]] && exit 0 - -default_remote="$(git config --local --get branch."${branch}".remote || true)" -remote="${default_remote:-origin}" - -git push --atomic "$remote" "$branch" -' - git submodule foreach --recursive "$cmd" -} - -_main "$@" diff --git a/.build/git/install-git-defaults.sh b/.build/git/install-git-defaults.sh index 00f1dc435dbe..7c26ed5eda7c 100755 --- a/.build/git/install-git-defaults.sh +++ b/.build/git/install-git-defaults.sh @@ -89,7 +89,6 @@ _install_hooks() { install_hook "$git_dir" "post-checkout" true install_hook "$git_dir" "post-switch" false install_hook "$git_dir" "pre-commit" false - install_hook "$git_dir" "pre-push" false } _git_config_set() { From f0924611144c18e4277e25ec34cafbbf62d55d41 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 9 Mar 2023 18:05:20 -0800 Subject: [PATCH 047/340] CEP-15: (Accord) Migrate Accord away from JDK random to a new interface RandomSource patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-18213 --- modules/accord | 2 +- .../service/accord/AccordService.java | 4 +-- .../accord/async/AsyncOperationTest.java | 5 ++-- .../CommandsForKeySerializerTest.java | 3 ++- .../cassandra/utils/AccordGenerators.java | 26 ++----------------- 5 files changed, 10 insertions(+), 30 deletions(-) diff --git a/modules/accord b/modules/accord index f607a05b76df..bc81f81c75f9 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit f607a05b76df32b39c97a6e49068ae35057be98a +Subproject commit bc81f81c75f93c73989a30bbc51b5c241a893c1a diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 7e68da36b042..a86cb70c5369 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -19,7 +19,6 @@ package org.apache.cassandra.service.accord; import java.util.Arrays; -import java.util.Random; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -40,6 +39,7 @@ import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.TopologyManager; +import accord.utils.DefaultRandom; import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Shutdownable; import accord.utils.async.AsyncResult; @@ -144,7 +144,7 @@ private AccordService() () -> null, new KeyspaceSplitter(new EvenSplit<>(getConcurrentAccordOps(), getPartitioner().accordSplitter())), new AccordAgent(), - new Random(), + new DefaultRandom(), scheduler, SizeOfIntersectionSorter.SUPPLIER, SimpleProgressLog::new, diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index b2ef10660131..37b1f556af15 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -62,6 +62,7 @@ import accord.primitives.Writes; import accord.utils.Gen; import accord.utils.Gens; +import accord.utils.RandomSource; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import org.apache.cassandra.SchemaLoader; @@ -481,7 +482,7 @@ protected AsyncWriter.StateMutationFunction writeCommandFunct }); } - private static void createCommand(AccordCommandStore commandStore, Gen.Random rs, List ids) + private static void createCommand(AccordCommandStore commandStore, RandomSource rs, List ids) { // to simulate CommandsForKey not being found, use createCommittedAndPersist periodically as it does not update if (rs.nextBoolean()) ids.forEach(id -> createCommittedAndPersist(commandStore, id)); @@ -489,7 +490,7 @@ private static void createCommand(AccordCommandStore commandStore, Gen.Random rs commandStore.clearCache(); } - private static Map selectFailedTxn(Gen.Random rs, List ids) + private static Map selectFailedTxn(RandomSource rs, List ids) { Map failed = Maps.newHashMapWithExpectedSize(ids.size()); for (TxnId id : ids) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 1cc800f5dc40..547b03c10ce9 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -26,6 +26,7 @@ import accord.impl.CommandsForKey; import accord.primitives.TxnId; +import accord.utils.AccordGens; import accord.utils.Gens; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.io.util.DataInputBuffer; @@ -56,7 +57,7 @@ public void serdeDeps() { DataOutputBuffer buffer = new DataOutputBuffer(); int version = MessagingService.Version.VERSION_40.value; - qt().forAll(Gens.lists(AccordGenerators.ids()).ofSizeBetween(0, 10)).check(ids -> { + qt().forAll(Gens.lists(AccordGens.txnIds()).ofSizeBetween(0, 10)).check(ids -> { buffer.clear(); long expectedSize = CommandsForKeySerializer.depsIdSerializer.serializedSize(ids, version); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index e85b6e3a51eb..af5198538eb3 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -18,48 +18,26 @@ package org.apache.cassandra.utils; -import java.util.function.ToIntFunction; -import java.util.function.ToLongFunction; - import accord.local.Command; -import accord.local.Node; import accord.primitives.PartialTxn; -import accord.primitives.Routable; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.Gens; import org.apache.cassandra.service.accord.AccordTestUtils; +import static accord.utils.AccordGens.txnIds; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; public class AccordGenerators { private AccordGenerators() {} - public static Gen.LongGen epochs() - { - return Gens.longs().between(0, Timestamp.MAX_EPOCH); - } - - public static Gen ids() - { - return ids(epochs()::nextLong, Gen.Random::nextLong, Gen.Random::nextInt); - } - - public static Gen ids(ToLongFunction epochs, ToLongFunction hlcs, ToIntFunction nodes) - { - Gen kinds = Gens.enums().all(Txn.Kind.class); - Gen domains = Gens.enums().all(Routable.Domain.class); - return rs -> new TxnId(epochs.applyAsLong(rs), hlcs.applyAsLong(rs), kinds.next(rs), domains.next(rs), new Node.Id(nodes.applyAsInt(rs))); - } - private enum SupportedCommandTypes { notWitnessed, preaccepted, committed } public static Gen commands() { - Gen ids = ids(); + Gen ids = txnIds(); //TODO switch to Status once all types are supported Gen supportedTypes = Gens.enums().all(SupportedCommandTypes.class); //TODO goes against fuzz testing, and also limits to a very specific table existing... From ea71336278f7a4a9ed129bcd15655bf8c91bb46b Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Thu, 16 Mar 2023 18:43:20 +0100 Subject: [PATCH 048/340] Improve transaction statement validation patch by Jacek Lewandowski; reviewed by David Capwell and Caleb Rackliffe for CASSANDRA-18302 --- src/antlr/Parser.g | 38 ++++++++-- .../cassandra/cql3/StatementSource.java | 76 +++++++++++++++++++ .../cql3/statements/BatchStatement.java | 50 +++++++++--- .../cql3/statements/DeleteStatement.java | 27 +++++-- .../statements/ModificationStatement.java | 53 ++++++++++--- .../cql3/statements/SelectStatement.java | 34 +++++++-- .../cql3/statements/TransactionStatement.java | 65 ++++++++-------- .../cql3/statements/UpdateStatement.java | 49 ++++++++---- .../org/apache/cassandra/db/view/View.java | 22 ++++-- .../cassandra/cql3/StatementSourceTest.java | 61 +++++++++++++++ .../statements/TransactionStatementTest.java | 26 ++++--- 11 files changed, 387 insertions(+), 114 deletions(-) create mode 100644 src/java/org/apache/cassandra/cql3/StatementSource.java create mode 100644 test/unit/org/apache/cassandra/cql3/StatementSourceTest.java diff --git a/src/antlr/Parser.g b/src/antlr/Parser.g index 8ac85ec1f4d7..f9fc2842df91 100644 --- a/src/antlr/Parser.g +++ b/src/antlr/Parser.g @@ -34,6 +34,8 @@ options { protected List references; + private Token statementBeginMarker; + public static final Set reservedTypeNames = new HashSet() {{ add("byte"); @@ -216,6 +218,19 @@ options { { // Do nothing. } + + public Token stmtBegins() + { + statementBeginMarker = input.LT(1); + return statementBeginMarker; + } + + public StatementSource stmtSrc() + { + StatementSource stmtSrc = StatementSource.create(statementBeginMarker); + statementBeginMarker = null; + return stmtSrc; + } } /** STATEMENTS **/ @@ -292,6 +307,7 @@ selectStatement returns [SelectStatement.RawStatement expr] List groups = new ArrayList<>(); boolean allowFiltering = false; boolean isJson = false; + stmtBegins(); } : K_SELECT // json is a valid column name. By consequence, we need to resolve the ambiguity for "json - json" @@ -311,7 +327,7 @@ selectStatement returns [SelectStatement.RawStatement expr] isJson, null); WhereClause where = wclause == null ? WhereClause.empty() : wclause.build(); - $expr = new SelectStatement.RawStatement(cf, params, $sclause.selectors, where, limit, perPartitionLimit); + $expr = new SelectStatement.RawStatement(cf, params, $sclause.selectors, where, limit, perPartitionLimit, stmtSrc()); } ; @@ -324,11 +340,12 @@ letStatement returns [SelectStatement.RawStatement expr] Term.Raw limit = null; } : K_LET txnVar=IDENT '=' - '(' K_SELECT assignments=letSelectors K_FROM cf=columnFamilyName K_WHERE wclause=whereClause ( K_LIMIT rows=intValue { limit = rows; } )? ')' + '(' { stmtBegins(); } K_SELECT assignments=letSelectors K_FROM cf=columnFamilyName K_WHERE wclause=whereClause ( K_LIMIT rows=intValue { limit = rows; } )? ')' { SelectStatement.Parameters params = new SelectStatement.Parameters(Collections.emptyList(), Collections.emptyList(), false, false, false, $txnVar.text); WhereClause where = wclause == null ? WhereClause.empty() : wclause.build(); - $expr = new SelectStatement.RawStatement(cf, params, assignments, where, limit, null); + + $expr = new SelectStatement.RawStatement(cf, params, assignments, where, limit, null, stmtSrc()); } ; @@ -535,6 +552,9 @@ groupByClause[List groups] * */ insertStatement returns [ModificationStatement.Parsed expr] + @init { + stmtBegins(); + } : K_INSERT K_INTO cf=columnFamilyName ( st1=normalInsertStatement[cf] { $expr = st1; } | K_JSON st2=jsonInsertStatement[cf] { $expr = st2; }) @@ -553,7 +573,7 @@ normalInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsert e ( K_IF K_NOT K_EXISTS { ifNotExists = true; } )? ( usingClause[attrs] )? { - $expr = new UpdateStatement.ParsedInsert(qn, attrs, columnNames, values, ifNotExists); + $expr = new UpdateStatement.ParsedInsert(qn, attrs, columnNames, values, ifNotExists, stmtSrc()); } ; @@ -573,7 +593,7 @@ jsonInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsertJson ( K_IF K_NOT K_EXISTS { ifNotExists = true; } )? ( usingClause[attrs] )? { - $expr = new UpdateStatement.ParsedInsertJson(qn, attrs, val, defaultUnset, ifNotExists); + $expr = new UpdateStatement.ParsedInsertJson(qn, attrs, val, defaultUnset, ifNotExists, stmtSrc()); } ; @@ -604,6 +624,7 @@ updateStatement returns [UpdateStatement.ParsedUpdate expr] Attributes.Raw attrs = new Attributes.Raw(); UpdateStatement.OperationCollector operations = new UpdateStatement.OperationCollector(); boolean ifExists = false; + stmtBegins(); } : K_UPDATE cf=columnFamilyName ( usingClause[attrs] )? @@ -617,7 +638,8 @@ updateStatement returns [UpdateStatement.ParsedUpdate expr] wclause.build(), conditions == null ? Collections.emptyList() : conditions, ifExists, - isParsingTxn); + isParsingTxn, + stmtSrc()); } ; @@ -638,6 +660,7 @@ deleteStatement returns [DeleteStatement.Parsed expr] Attributes.Raw attrs = new Attributes.Raw(); List columnDeletions = Collections.emptyList(); boolean ifExists = false; + stmtBegins(); } : K_DELETE ( dels=deleteSelection { columnDeletions = dels; } )? K_FROM cf=columnFamilyName @@ -650,7 +673,8 @@ deleteStatement returns [DeleteStatement.Parsed expr] columnDeletions, wclause.build(), conditions == null ? Collections.emptyList() : conditions, - ifExists); + ifExists, + stmtSrc()); } ; diff --git a/src/java/org/apache/cassandra/cql3/StatementSource.java b/src/java/org/apache/cassandra/cql3/StatementSource.java new file mode 100644 index 000000000000..2f07ec4f53d9 --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/StatementSource.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3; + +import java.util.Objects; + +import org.antlr.runtime.Token; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +public class StatementSource +{ + public static final StatementSource INTERNAL = new StatementSource(0, 0); + + public final int line; + public final int charPositionInLine; + + public StatementSource(int line, int charPositionInLine) + { + this.line = line; + this.charPositionInLine = charPositionInLine; + } + + @Override + public String toString() + { + if (this == INTERNAL) + { + return "<<>>"; + } + else + { + if (!isEmpty()) + return String.format("at [%d:%d]", line + 1, charPositionInLine + 1); + else + return ""; + } + } + + public boolean isEmpty() + { + return line > Character.MAX_VALUE || line == Character.MAX_VALUE && charPositionInLine > Character.MAX_VALUE; + } + + // note - this can also reproduce the original statement raw text by getting TokenStream and calling toString(startToken, endToken) + public static StatementSource create(Token startToken) + { + Objects.requireNonNull(startToken); + + if (startToken.getType() == Token.EOF) + return new StatementSource(Character.MAX_VALUE + 1, 0); + + int startLine = min(max(startToken.getLine(), 1) - 1, Character.MAX_VALUE); + int startChar = min(max(startToken.getCharPositionInLine(), 0), Character.MAX_VALUE); + + return new StatementSource(startLine, startChar); + } + +} diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java index c99dac31219c..939d7df767bb 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java @@ -18,7 +18,16 @@ package org.apache.cassandra.cql3.statements; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; @@ -32,19 +41,37 @@ import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; -import org.apache.cassandra.db.guardrails.Guardrails; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.*; -import org.apache.cassandra.db.*; +import org.apache.cassandra.cql3.Attributes; +import org.apache.cassandra.cql3.BatchQueryOptions; +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.ColumnSpecification; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.Slice; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.RowIterator; -import org.apache.cassandra.exceptions.*; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.exceptions.RequestExecutionException; +import org.apache.cassandra.exceptions.RequestValidationException; +import org.apache.cassandra.exceptions.UnauthorizedException; import org.apache.cassandra.metrics.BatchMetrics; import org.apache.cassandra.metrics.ClientRequestSizeMetrics; -import org.apache.cassandra.service.*; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.ClientWarn; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.messages.ResultMessage; @@ -53,7 +80,6 @@ import org.apache.cassandra.utils.Pair; import static java.util.function.Predicate.isEqual; - import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; /** @@ -202,7 +228,7 @@ public void validate() throws InvalidRequestException for (ModificationStatement statement : statements) { if (timestampSet && statement.isTimestampSet()) - throw new InvalidRequestException("Timestamp must be set either on BATCH or individual statements"); + throw new InvalidRequestException("Timestamp must be set either on BATCH or individual statements: " + statement.source); if (statement.isCounter()) hasCounters = true; @@ -243,7 +269,7 @@ public void validate() throws InvalidRequestException for (ModificationStatement stmt : statements) { if (ksName != null && (!stmt.keyspace().equals(ksName) || !stmt.table().equals(cfName))) - throw new InvalidRequestException("Batch with conditions cannot span multiple tables"); + throw new InvalidRequestException("Batch with conditions cannot span multiple tables: " + stmt.source); ksName = stmt.keyspace(); cfName = stmt.table(); } diff --git a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java index 3ba34319a70e..4ce68fd75d2f 100644 --- a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java @@ -20,9 +20,19 @@ import java.util.Collections; import java.util.List; +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; -import org.apache.cassandra.cql3.*; +import org.apache.cassandra.cql3.Attributes; +import org.apache.cassandra.cql3.Operation; +import org.apache.cassandra.cql3.Operations; +import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.StatementSource; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.WhereClause; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.Conditions; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; @@ -33,8 +43,6 @@ import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; @@ -49,9 +57,10 @@ private DeleteStatement(VariableSpecifications bindVariables, Operations operations, StatementRestrictions restrictions, Conditions conditions, - Attributes attrs) + Attributes attrs, + StatementSource source) { - super(StatementType.DELETE, bindVariables, cfm, operations, restrictions, conditions, attrs); + super(StatementType.DELETE, bindVariables, cfm, operations, restrictions, conditions, attrs, source); } @Override @@ -132,9 +141,10 @@ public Parsed(QualifiedName name, List deletions, WhereClause whereClause, List conditions, - boolean ifExists) + boolean ifExists, + StatementSource source) { - super(name, StatementType.DELETE, attrs, conditions, false, ifExists); + super(name, StatementType.DELETE, attrs, conditions, false, ifExists, source); this.deletions = deletions; this.whereClause = whereClause; } @@ -175,7 +185,8 @@ protected ModificationStatement prepareInternal(ClientState state, operations, restrictions, conditions, - attrs); + attrs, + source); if (stmt.hasConditions() && !restrictions.hasAllPrimaryKeyColumnsRestrictedByEqualities()) { diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index a4a3c50de022..d34b73a8a7ac 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -35,7 +35,6 @@ import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; - import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,6 +52,7 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.StatementSource; import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.cql3.Validation; import org.apache.cassandra.cql3.VariableSpecifications; @@ -76,25 +76,48 @@ import org.apache.cassandra.cql3.selection.Selection; import org.apache.cassandra.cql3.selection.Selection.Selectors; import org.apache.cassandra.cql3.transactions.ReferenceOperation; -import org.apache.cassandra.db.*; -import org.apache.cassandra.db.filter.*; +import org.apache.cassandra.db.CBuilder; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.SinglePartitionReadQuery; +import org.apache.cassandra.db.Slice; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.marshal.BooleanType; -import org.apache.cassandra.db.partitions.*; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.partitions.Partition; +import org.apache.cassandra.db.partitions.PartitionIterator; +import org.apache.cassandra.db.partitions.PartitionIterators; +import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.view.View; -import org.apache.cassandra.exceptions.*; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.exceptions.RequestExecutionException; +import org.apache.cassandra.exceptions.RequestValidationException; +import org.apache.cassandra.exceptions.UnauthorizedException; import org.apache.cassandra.metrics.ClientRequestSizeMetrics; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; +import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.service.disk.usage.DiskUsageBroadcaster; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.service.paxos.Commit.Proposal; import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; -import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; -import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.triggers.TriggerExecutor; import org.apache.cassandra.utils.ByteBufferUtil; @@ -140,13 +163,16 @@ public abstract class ModificationStatement implements CQLStatement.SingleKeyspa private final List functions; + public final StatementSource source; + public ModificationStatement(StatementType type, VariableSpecifications bindVariables, TableMetadata metadata, Operations operations, StatementRestrictions restrictions, Conditions conditions, - Attributes attrs) + Attributes attrs, + StatementSource source) { this.type = type; this.bindVariables = bindVariables; @@ -155,6 +181,7 @@ public ModificationStatement(StatementType type, this.operations = operations; this.conditions = conditions; this.attrs = attrs; + this.source = source; if (!conditions.isEmpty()) { @@ -1062,13 +1089,15 @@ public static abstract class Parsed extends QualifiedStatement private final List conditions; private final boolean ifNotExists; private final boolean ifExists; + protected final StatementSource source; protected Parsed(QualifiedName name, StatementType type, Attributes.Raw attrs, List conditions, boolean ifNotExists, - boolean ifExists) + boolean ifExists, + StatementSource source) { super(name); this.type = type; @@ -1076,6 +1105,7 @@ protected Parsed(QualifiedName name, this.conditions = conditions == null ? Collections.emptyList() : conditions; this.ifNotExists = ifNotExists; this.ifExists = ifExists; + this.source = source; } public ModificationStatement prepare(ClientState state) @@ -1202,6 +1232,7 @@ public SelectStatement createSelectForTxn() null, null, ONE, - null); + null, + StatementSource.INTERNAL); } } diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 26d8d3f467d3..4b104e0a79de 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -56,6 +56,7 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.ResultSet; +import org.apache.cassandra.cql3.StatementSource; import org.apache.cassandra.cql3.VariableSpecifications; import org.apache.cassandra.cql3.WhereClause; import org.apache.cassandra.cql3.functions.Function; @@ -187,6 +188,8 @@ public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement, */ private final ColumnComparator> orderingComparator; + public final StatementSource source; + // Used by forSelection below public static final Parameters defaultParameters = new Parameters(Collections.emptyList(), Collections.emptyList(), @@ -203,7 +206,8 @@ public SelectStatement(TableMetadata table, AggregationSpecification.Factory aggregationSpecFactory, ColumnComparator> orderingComparator, Term limit, - Term perPartitionLimit) + Term perPartitionLimit, + StatementSource source) { this.table = table; this.bindVariables = bindVariables; @@ -215,6 +219,7 @@ public SelectStatement(TableMetadata table, this.parameters = parameters; this.limit = limit; this.perPartitionLimit = perPartitionLimit; + this.source = source; } @Override @@ -281,7 +286,8 @@ static SelectStatement forSelection(TableMetadata table, Selection selection) null, null, null, - null); + null, + StatementSource.INTERNAL); } @Override @@ -761,6 +767,11 @@ public StatementRestrictions getRestrictions() return restrictions; } + public boolean isPartitionRangeQuery() + { + return isForPartitionRange(restrictions); + } + private ReadQuery getSliceCommands(QueryOptions options, ClientState state, ColumnFilter columnFilter, RowFilter rowFilter, DataLimits limit, long nowInSec) { @@ -1205,6 +1216,11 @@ private void orderResults(ResultSet cqlRows, QueryOptions options, ClientState s cqlRows.rows.sort(comparator); } + private static boolean isForPartitionRange(StatementRestrictions restrictions) + { + return restrictions.isKeyRange() || restrictions.usesSecondaryIndexing(); + } + public static class RawStatement extends QualifiedStatement { public final Parameters parameters; @@ -1213,13 +1229,15 @@ public static class RawStatement extends QualifiedStatement public final Term.Raw limit; public final Term.Raw perPartitionLimit; private ClientState state; + private final StatementSource source; public RawStatement(QualifiedName cfName, Parameters parameters, List selectClause, WhereClause whereClause, Term.Raw limit, - Term.Raw perPartitionLimit) + Term.Raw perPartitionLimit, + StatementSource source) { super(cfName); this.parameters = parameters; @@ -1227,6 +1245,7 @@ public RawStatement(QualifiedName cfName, this.whereClause = whereClause; this.limit = limit; this.perPartitionLimit = perPartitionLimit; + this.source = source; } public SelectStatement prepare(ClientState state) @@ -1315,7 +1334,8 @@ public SelectStatement prepare(ClientState state, boolean forView, VariableSpeci aggregationSpecFactory, orderingComparator, prepareLimit(variableSpecifications, limit, keyspace(), limitReceiver()), - prepareLimit(variableSpecifications, perPartitionLimit, keyspace(), perPartitionLimitReceiver())); + prepareLimit(variableSpecifications, perPartitionLimit, keyspace(), perPartitionLimitReceiver()), + source); } private Set getResultSetOrdering(StatementRestrictions restrictions, Map orderingColumns) @@ -1646,7 +1666,7 @@ private boolean isReversed(TableMetadata table, Map or private void checkNeedsFiltering(TableMetadata table, StatementRestrictions restrictions) throws InvalidRequestException { // non-key-range non-indexed queries cannot involve filtering underneath - if (!parameters.allowFiltering && (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing())) + if (!parameters.allowFiltering && isForPartitionRange(restrictions)) { // We will potentially filter data if the row filter is not the identity and there isn't any index group // supporting all the expressions in the filter. @@ -1857,7 +1877,7 @@ public String toString() private String loggableTokens(QueryOptions options, ClientState state) { - if (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing()) + if (isPartitionRangeQuery()) { AbstractBounds bounds = restrictions.getPartitionKeyBounds(options); return "token range: " + (bounds.inclusiveLeft() ? '[' : '(') + @@ -1894,7 +1914,7 @@ public String asCQL(QueryOptions options, ClientState state) sb.append("SELECT ").append(queriedColumns().toCQLString()); sb.append(" FROM ").append(table.keyspace).append('.').append(table.name); - if (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing()) + if (isPartitionRangeQuery()) { // partition range ClusteringIndexFilter clusteringIndexFilter = makeClusteringIndexFilter(options, state, columnFilter); diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index a596a5a0e935..1625ab40e918 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -58,7 +58,6 @@ import org.apache.cassandra.cql3.transactions.ReferenceOperation; import org.apache.cassandra.cql3.transactions.RowDataReference; import org.apache.cassandra.cql3.transactions.SelectReferenceSource; -import org.apache.cassandra.db.ReadQuery; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.SinglePartitionReadQuery; import org.apache.cassandra.db.marshal.AbstractType; @@ -79,7 +78,6 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.LazyToString; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; @@ -90,14 +88,13 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, private static final Logger logger = LoggerFactory.getLogger(TransactionStatement.class); public static final String DUPLICATE_TUPLE_NAME_MESSAGE = "The name '%s' has already been used by a LET assignment."; - public static final String INCOMPLETE_PRIMARY_KEY_LET_MESSAGE = "SELECT in LET assignment must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; CQL %s"; - public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "Normal SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; CQL %s"; - public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions."; - public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps."; + public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; + public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; + public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord_transactions_enabled in cassandra.yaml)"; - public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction"; + public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction; %s %s"; static class NamedSelect { @@ -208,12 +205,9 @@ public ResultSet.ResultMetadata getResultMetadata() TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, ClientState state) { SelectStatement select = namedSelect.select; - ReadQuery readQuery = select.getQuery(options, 0); - checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, select.asCQL(options, state)); - // We reject reads from both LET and SELECT that do not specify a single row. @SuppressWarnings("unchecked") - SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) select.getQuery(options, 0); if (selectQuery.queries.size() != 1) throw new IllegalArgumentException("Within a transaction, SELECT statements must select a single partition; found " + selectQuery.queries.size() + " partitions"); @@ -224,12 +218,9 @@ TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, Clie List createNamedReads(NamedSelect namedSelect, QueryOptions options, ClientState state) { SelectStatement select = namedSelect.select; - ReadQuery readQuery = select.getQuery(options, 0); - checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, select.asCQL(options, state)); - // We reject reads from both LET and SELECT that do not specify a single row. @SuppressWarnings("unchecked") - SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) select.getQuery(options, 0); if (selectQuery.queries.size() == 1) return Collections.singletonList(new TxnNamedRead(namedSelect.name, Iterables.getOnlyElement(selectQuery.queries))); @@ -340,24 +331,24 @@ public Txn createTxn(ClientState state, QueryOptions options) } } - private static void checkAtMostOneRowSpecified(ClientState clientState, @Nullable QueryOptions options, SelectStatement select, String failureMessage) + /** + * Returns {@code true} only if the statement selects multiple clusterings in a partition + */ + private static boolean isSelectingMultipleClusterings(SelectStatement select, @Nullable QueryOptions options) { if (select.getRestrictions().hasAllPrimaryKeyColumnsRestrictedByEqualities()) - return; + return false; if (options == null) { - // If the limit is a non-terminal marker (because we're preparing), defer validation until execution. + // if the limit is a non-terminal marker (because we're preparing), defer validation until execution (when options != null) if (select.isLimitMarker()) - return; + return false; - // The limit is already defined, so proceed with validation... options = QueryOptions.DEFAULT; } - int limit = select.getLimit(options); - QueryOptions finalOptions = options; // javac thinks this is mutable so requires a copy - checkTrue(limit == 1 && select.getRestrictions().hasAllPartitionKeyColumnsRestrictedByEqualities(), failureMessage, LazyToString.lazy(() -> select.asCQL(finalOptions, clientState))); + return select.getLimit(options) != 1; } @Override @@ -367,21 +358,19 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. try { + // check again since now we have query options; note that statements are quaranted to be single partition reads at this point for (NamedSelect assignment : assignments) - checkAtMostOneRowSpecified(state.getClientState(), options, assignment.select, INCOMPLETE_PRIMARY_KEY_LET_MESSAGE); + checkFalse(isSelectingMultipleClusterings(assignment.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment", assignment.select.source); if (returningSelect != null) - checkAtMostOneRowSpecified(state.getClientState(), options, returningSelect.select, INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE); + checkFalse(isSelectingMultipleClusterings(returningSelect.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning SELECT", returningSelect.select.source); TxnData data = AccordService.instance().coordinate(createTxn(state.getClientState(), options), options.getConsistency()); if (returningSelect != null) { - ReadQuery readQuery = returningSelect.select.getQuery(options, 0); - checkTrue(readQuery instanceof SinglePartitionReadQuery.Group, ILLEGAL_RANGE_QUERY_MESSAGE, returningSelect.select.asCQL(options, state.getClientState())); - @SuppressWarnings("unchecked") - SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) readQuery; + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) returningSelect.select.getQuery(options, 0); Selection.Selectors selectors = returningSelect.select.getSelection().newSelectors(options); ResultSetBuilder result = new ResultSetBuilder(resultMetadata, selectors, false); if (selectQuery.queries.size() == 1) @@ -513,7 +502,7 @@ public CQLStatement prepare(ClientState state) SelectStatement prepared = select.prepare(bindVariables); NamedSelect namedSelect = new NamedSelect(name, prepared); - checkAtMostOneRowSpecified(state, null, namedSelect.select, INCOMPLETE_PRIMARY_KEY_LET_MESSAGE); + checkAtMostOneRowSpecified(namedSelect.select, "LET assignment " + name.name()); preparedAssignments.add(namedSelect); refSources.put(name, new SelectReferenceSource(prepared)); } @@ -526,7 +515,7 @@ public CQLStatement prepare(ClientState state) if (select != null) { returningSelect = new NamedSelect(TxnDataName.returning(), select.prepare(bindVariables)); - checkAtMostOneRowSpecified(state, null, returningSelect.select, INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE); + checkAtMostOneRowSpecified(returningSelect.select, "returning select"); } List returningReferences = null; @@ -547,8 +536,8 @@ public CQLStatement prepare(ClientState state) ModificationStatement.Parsed parsed = updates.get(i); ModificationStatement prepared = parsed.prepare(state, bindVariables); - checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE); - checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE); + checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); + checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); preparedUpdates.add(prepared); } @@ -560,5 +549,15 @@ public CQLStatement prepare(ClientState state) return new TransactionStatement(preparedAssignments, returningSelect, returningReferences, preparedUpdates, preparedConditions, bindVariables); } + + /** + * Do not use this method in execution!!! It is only allowed during prepare because it outputs a query raw text. + * We don't want it print it for a user who provided an identifier of someone's else prepared statement. + */ + private static void checkAtMostOneRowSpecified(SelectStatement select, String name) + { + checkFalse(select.isPartitionRangeQuery(), ILLEGAL_RANGE_QUERY_MESSAGE, name, select.source); + checkFalse(isSelectingMultipleClusterings(select, null), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, name, select.source); + } } } diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java index 2344850ceeb9..d5002731f56c 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java @@ -24,16 +24,29 @@ import java.util.List; import com.google.common.base.Preconditions; +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; -import org.apache.cassandra.cql3.*; +import org.apache.cassandra.cql3.Attributes; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.Relation; +import org.apache.cassandra.cql3.terms.Constants; +import org.apache.cassandra.cql3.Json; +import org.apache.cassandra.cql3.Operation; +import org.apache.cassandra.cql3.Operations; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.StatementSource; +import org.apache.cassandra.cql3.terms.Term; +import org.apache.cassandra.cql3.UpdateParameters; +import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.cql3.WhereClause; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.Conditions; import org.apache.cassandra.cql3.constraints.ColumnConstraint; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; -import org.apache.cassandra.cql3.terms.Constants; -import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.cql3.transactions.ReferenceOperation; import org.apache.cassandra.cql3.transactions.ReferenceValue; import org.apache.cassandra.db.Clustering; @@ -47,8 +60,6 @@ import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Pair; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import static org.apache.cassandra.cql3.statements.RequestValidations.checkContainsNoDuplicates; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; @@ -70,9 +81,10 @@ private UpdateStatement(StatementType type, Operations operations, StatementRestrictions restrictions, Conditions conditions, - Attributes attrs) + Attributes attrs, + StatementSource source) { - super(type, bindVariables, metadata, operations, restrictions, conditions, attrs); + super(type, bindVariables, metadata, operations, restrictions, conditions, attrs, source); } @Override @@ -145,9 +157,10 @@ public ParsedInsert(QualifiedName name, Attributes.Raw attrs, List columnNames, List columnValues, - boolean ifNotExists) + boolean ifNotExists, + StatementSource source) { - super(name, StatementType.INSERT, attrs, null, ifNotExists, false); + super(name, StatementType.INSERT, attrs, null, ifNotExists, false, source); this.columnNames = columnNames; this.columnValues = columnValues; } @@ -219,7 +232,8 @@ else if (value instanceof ReferenceValue.Raw) operations, restrictions, conditions, - attrs); + attrs, + source); } } @@ -231,9 +245,9 @@ public static class ParsedInsertJson extends ModificationStatement.Parsed private final Json.Raw jsonValue; private final boolean defaultUnset; - public ParsedInsertJson(QualifiedName name, Attributes.Raw attrs, Json.Raw jsonValue, boolean defaultUnset, boolean ifNotExists) + public ParsedInsertJson(QualifiedName name, Attributes.Raw attrs, Json.Raw jsonValue, boolean defaultUnset, boolean ifNotExists, StatementSource source) { - super(name, StatementType.INSERT, attrs, null, ifNotExists, false); + super(name, StatementType.INSERT, attrs, null, ifNotExists, false, source); this.jsonValue = jsonValue; this.defaultUnset = defaultUnset; } @@ -290,7 +304,8 @@ protected ModificationStatement prepareInternal(ClientState state, operations, restrictions, conditions, - attrs); + attrs, + source); } } @@ -364,9 +379,10 @@ public ParsedUpdate(QualifiedName name, WhereClause whereClause, List conditions, boolean ifExists, - boolean isForTxn) + boolean isForTxn, + StatementSource source) { - super(name, StatementType.UPDATE, attrs, conditions, false, ifExists); + super(name, StatementType.UPDATE, attrs, conditions, false, ifExists, source); this.updates = updates; this.whereClause = whereClause; this.isForTxn = isForTxn; @@ -413,7 +429,8 @@ protected ModificationStatement prepareInternal(ClientState state, operations, restrictions, conditions, - attrs); + attrs, + source); } } diff --git a/src/java/org/apache/cassandra/db/view/View.java b/src/java/org/apache/cassandra/db/view/View.java index e926edb3a970..36894127ae45 100644 --- a/src/java/org/apache/cassandra/db/view/View.java +++ b/src/java/org/apache/cassandra/db/view/View.java @@ -17,19 +17,26 @@ */ package org.apache.cassandra.db.view; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.stream.Collectors; - import javax.annotation.Nullable; import com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import org.apache.cassandra.cql3.*; +import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.StatementSource; import org.apache.cassandra.cql3.selection.RawSelector; import org.apache.cassandra.cql3.selection.Selectable; import org.apache.cassandra.cql3.statements.SelectStatement; -import org.apache.cassandra.db.*; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.ReadQuery; +import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Schema; @@ -37,8 +44,6 @@ import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.utils.FBUtilities; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * A View copies data from a base table into a view table which can be queried independently from the @@ -174,7 +179,8 @@ SelectStatement getSelectStatement() selectClause(), definition.whereClause, null, - null); + null, + StatementSource.INTERNAL); rawSelect.setBindVariables(Collections.emptyList()); diff --git a/test/unit/org/apache/cassandra/cql3/StatementSourceTest.java b/test/unit/org/apache/cassandra/cql3/StatementSourceTest.java new file mode 100644 index 000000000000..b6362747d575 --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/StatementSourceTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3; + +import org.junit.Test; + +import org.antlr.runtime.Token; +import org.mockito.Mockito; + +import static org.apache.cassandra.cql3.StatementSource.create; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +public class StatementSourceTest +{ + private static Token token(int line, int pos) + { + Token token = Mockito.mock(Token.class); + when(token.getLine()).thenReturn(line); + when(token.getCharPositionInLine()).thenReturn(pos); + when(token.getType()).thenReturn(1); + return token; + } + + private static Token eof() + { + Token token = Mockito.mock(Token.class); + when(token.getLine()).thenThrow(UnsupportedOperationException.class); + when(token.getCharPositionInLine()).thenThrow(UnsupportedOperationException.class); + when(token.getType()).thenReturn(Token.EOF); + return token; + } + + @Test + public void test() + { + assertThat(create(token(1, 4))).hasToString("at [1:5]"); + assertThat(create(token(3, 8))).hasToString("at [3:9]"); + assertThat(create(token(6, 8))).hasToString("at [6:9]"); + assertThat(create(token(1, 0))).hasToString("at [1:1]"); + assertThat(create(eof()).toString()).isEmpty(); + + assertThat(StatementSource.INTERNAL).hasToString("<<>>"); + } +} diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java index 2b2750476d4c..95de83ff1b5e 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -36,7 +36,7 @@ import static org.apache.cassandra.cql3.statements.TransactionStatement.DUPLICATE_TUPLE_NAME_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.EMPTY_TRANSACTION_MESSAGE; -import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_LET_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.ILLEGAL_RANGE_QUERY_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_CONDITIONS_IN_UPDATES_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; @@ -162,7 +162,7 @@ public void shouldRejectIllegalLimitInLet() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect)); + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment row1", "at [2:15]")); } @Test @@ -176,7 +176,7 @@ public void shouldRejectIllegalBindLimitInLet() Assertions.assertThatThrownBy(() -> execute(query, 2)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect.replace("?", "2"))); + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment", "at [2:15]")); } @Test @@ -190,7 +190,7 @@ public void shouldRejectIncompletePrimaryKeyInLet() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, letSelect)); + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment row1", "at [2:15]")); } @Test @@ -201,7 +201,7 @@ public void shouldRejectIllegalLimitInSelect() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, select)); + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning select", "at [2:1]")); } @Test @@ -212,7 +212,7 @@ public void shouldRejectIncompletePrimaryKeyInSelect() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, select)); + .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning select", "at [2:1]")); } @Test @@ -224,7 +224,7 @@ public void shouldRejectUpdateWithCondition() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(NO_CONDITIONS_IN_UPDATES_MESSAGE); + .hasMessageContaining(NO_CONDITIONS_IN_UPDATES_MESSAGE, "INSERT", "at [2:3]"); } @Test @@ -236,7 +236,7 @@ public void shouldRejectUpdateWithCustomTimestamp() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(NO_TIMESTAMPS_IN_UPDATES_MESSAGE); + .hasMessageContaining(NO_TIMESTAMPS_IN_UPDATES_MESSAGE, "INSERT", "at [2:3]"); } @Test @@ -336,26 +336,28 @@ public void shouldRejectInsertPartiitonKeyReference() @Test public void shouldRejectNormalSelectWithIncompletePartitionKey() { + String select = "SELECT k, v FROM ks.tbl5 LIMIT 1"; String query = "BEGIN TRANSACTION\n" + - " SELECT k, v FROM ks.tbl5 LIMIT 1;\n" + + select + ";\n" + "COMMIT TRANSACTION;\n"; Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "SELECT v FROM ks.tbl5 LIMIT 1")); + .hasMessageContaining(String.format(ILLEGAL_RANGE_QUERY_MESSAGE, "returning select", "at [2:1]")); } @Test public void shouldRejectLetSelectWithIncompletePartitionKey() { + String select = "SELECT k, v FROM ks.tbl5 WHERE token(k) > token(123) LIMIT 1"; String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT k, v FROM ks.tbl5 WHERE token(k) > token(123) LIMIT 1); \n" + + " LET row1 = (" + select + "); \n" + " SELECT row1.k, row1.v;\n" + "COMMIT TRANSACTION;\n"; Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_LET_MESSAGE, "SELECT v FROM ks.tbl5 WHERE token(k) > 0000007b LIMIT 1")); + .hasMessageContaining(String.format(ILLEGAL_RANGE_QUERY_MESSAGE, "LET assignment row1", "at [2:15]")); } private static CQLStatement prepare(String query) From 68135aaf29dffb9d4836232daa9765d97e15658e Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Thu, 16 Mar 2023 18:35:31 -0500 Subject: [PATCH 049/340] fixing remaining (mostly compilation) issues after initial rebase of cep-15-accord on cep-21-tcm --- .../org/apache/cassandra/db/DeletionTime.java | 2 +- .../org/apache/cassandra/db/Keyspace.java | 2 +- .../cassandra/db/MutableDeletionInfo.java | 2 +- .../db/SinglePartitionReadCommand.java | 29 ++++--- .../apache/cassandra/db/SystemKeyspace.java | 5 +- .../apache/cassandra/db/rows/BTreeRow.java | 4 +- .../cassandra/db/rows/ComplexColumnData.java | 2 +- .../service/accord/AccordKeyspace.java | 6 +- .../service/accord/AccordTopologyUtils.java | 84 ++++++++++--------- .../cassandra/utils/btree/BTreeSet.java | 1 - .../test/accord/AccordCQLTest.java | 2 + .../test/accord/AccordTestBase.java | 2 +- .../systems/InterceptingGlobalMethods.java | 4 + .../statements/DescribeStatementTest.java | 10 +-- .../service/accord/api/AccordKeyTest.java | 2 +- 15 files changed, 81 insertions(+), 76 deletions(-) diff --git a/src/java/org/apache/cassandra/db/DeletionTime.java b/src/java/org/apache/cassandra/db/DeletionTime.java index 190f54d3437b..5970fbb042a4 100644 --- a/src/java/org/apache/cassandra/db/DeletionTime.java +++ b/src/java/org/apache/cassandra/db/DeletionTime.java @@ -61,7 +61,7 @@ public static DeletionTime build(long markedForDeleteAt, long localDeletionTime) // Do not use. This is a perf optimization where some data structures known to hold valid uints are allowed to use it. // You should use 'build' instead to not workaround validations, corruption detections, etc - public static DeletionTime buildUnsafeWithUnsignedInteger(long markedForDeleteAt, int localDeletionTimeUnsignedInteger) + static DeletionTime buildUnsafeWithUnsignedInteger(long markedForDeleteAt, int localDeletionTimeUnsignedInteger) { return CassandraUInt.compare(Cell.MAX_DELETION_TIME_UNSIGNED_INTEGER, localDeletionTimeUnsignedInteger) < 0 ? new InvalidDeletionTime(markedForDeleteAt) diff --git a/src/java/org/apache/cassandra/db/Keyspace.java b/src/java/org/apache/cassandra/db/Keyspace.java index f731139e442f..8b8cc9f2c912 100644 --- a/src/java/org/apache/cassandra/db/Keyspace.java +++ b/src/java/org/apache/cassandra/db/Keyspace.java @@ -696,7 +696,7 @@ public static Iterable nonLocalStrategy() public static Iterable system() { - return Iterables.transform(SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES, Keyspace::open); + return Iterables.transform(Schema.instance.localKeyspaces().names(), Keyspace::open); } @Override diff --git a/src/java/org/apache/cassandra/db/MutableDeletionInfo.java b/src/java/org/apache/cassandra/db/MutableDeletionInfo.java index 3076c5494669..f9636909c47e 100644 --- a/src/java/org/apache/cassandra/db/MutableDeletionInfo.java +++ b/src/java/org/apache/cassandra/db/MutableDeletionInfo.java @@ -233,7 +233,7 @@ public DeletionInfo updateAllTimestamp(long timestamp) public DeletionInfo updateAllTimestampAndLocalDeletionTime(long timestamp, int localDeletionTime) { if (partitionDeletion.markedForDeleteAt() != Long.MIN_VALUE) - partitionDeletion = DeletionTime.buildUnsafeWithUnsignedInteger(timestamp, localDeletionTime); + partitionDeletion = DeletionTime.build(timestamp, localDeletionTime); if (ranges != null) ranges.updateAllTimestampAndLocalDeletionTime(timestamp, localDeletionTime); diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index 6a129aeb5d0f..ab346bd47b6a 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -434,22 +434,21 @@ public SinglePartitionReadCommand withUpdatedLimit(DataLimits newLimits) isTrackingWarnings()); } - public SinglePartitionReadCommand withNowInSec(int nowInSec) + public SinglePartitionReadCommand withNowInSec(long nowInSec) { - return new SinglePartitionReadCommand(serializedAtEpoch(), - isDigestQuery(), - digestVersion(), - acceptsTransient(), - metadata(), - nowInSec, - columnFilter(), - rowFilter(), - limits(), - partitionKey(), - clusteringIndexFilter(), - indexQueryPlan(), - isTrackingWarnings(), - dataRange()); + return create(serializedAtEpoch(), + isDigestQuery(), + digestVersion(), + acceptsTransient(), + metadata(), + nowInSec, + columnFilter(), + rowFilter(), + limits(), + partitionKey(), + clusteringIndexFilter(), + indexQueryPlan(), + isTrackingWarnings()); } @Override diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 18bb9963936f..db82fe2386f2 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -1948,15 +1948,14 @@ private static Range byteBufferToRange(ByteBuffer rawRange, IPartitioner public static void writePreparedStatement(String loggedKeyspace, MD5Digest key, String cql) { - executeInternal(format("INSERT INTO %s (logged_keyspace, prepared_id, query_string) VALUES (?, ?, ?)", - PreparedStatements.toString()), + executeInternal(format("INSERT INTO %s (logged_keyspace, prepared_id, query_string) VALUES (?, ?, ?)", PreparedStatements), loggedKeyspace, key.byteBuffer(), cql); logger.debug("stored prepared statement for logged keyspace '{}': '{}'", loggedKeyspace, cql); } public static void removePreparedStatement(MD5Digest key) { - executeInternal(format("DELETE FROM %s WHERE prepared_id = ?", PreparedStatements.toString()), + executeInternal(format("DELETE FROM %s WHERE prepared_id = ?", PreparedStatements), key.byteBuffer()); } diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java index 08888b14ea58..2fbee6a24a6e 100644 --- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java +++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java @@ -431,7 +431,7 @@ public boolean hasInvalidDeletions() * Returns a copy of the row where all timestamps for live data have replaced by {@code newTimestamp} and * all deletion timestamp by {@code newTimestamp - 1}. * - * This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details. + * This exists for the Paxos path, see {@link PartitionUpdate.Builder#updateAllTimestamp} for additional details. */ public Row updateAllTimestamp(long newTimestamp) { @@ -453,7 +453,7 @@ public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLoca // should get rid of said deletion. Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty()) ? Deletion.LIVE - : new Deletion(DeletionTime.buildUnsafeWithUnsignedInteger(newTimestamp - 1, newLocalDeletionTime), deletion.isShadowable()); + : new Deletion(DeletionTime.build(newTimestamp - 1, newLocalDeletionTime), deletion.isShadowable()); return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); } diff --git a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java index f8cc58a84cf3..f668edfd7a05 100644 --- a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java @@ -267,7 +267,7 @@ public ComplexColumnData updateAllTimestamp(long newTimestamp) @Override public ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) { - DeletionTime newDeletion = complexDeletion.isLive() ? complexDeletion : DeletionTime.buildUnsafeWithUnsignedInteger(newTimestamp - 1, newLocalDeletionTime); + DeletionTime newDeletion = complexDeletion.isLive() ? complexDeletion : DeletionTime.build(newTimestamp - 1, newLocalDeletionTime); return transformAndFilter(newDeletion, (cell) -> (Cell) cell.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index b718da793f21..fc719dfdf831 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -433,7 +433,7 @@ private static void addSetChanges(ColumnMetadata column, Function void addMapChanges(ColumnMetadata column, Function Set deletions = Sets.difference(prev.keySet(), value.keySet()); Row.Deletion deletion = !deletions.isEmpty() ? - Row.Deletion.regular(DeletionTime.buildUnsafeWithUnsignedInteger(timestampMicros, nowInSeconds)) : + Row.Deletion.regular(DeletionTime.build(timestampMicros, nowInSeconds)) : null; ByteBuffer ordinalBytes = bytes(kind.ordinal()); value.forEach((timestamp, bytes) -> { diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index a4bd5d028c3e..1b69c27aa452 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -18,14 +18,21 @@ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; import java.util.stream.Collectors; import accord.topology.Shard; import accord.topology.Topology; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.tcm.ClusterMetadata; public class AccordTopologyUtils { @@ -52,49 +59,44 @@ private static TokenRange range(String keyspace, Token left, Token right) return new TokenRange(new TokenKey(keyspace, left), new TokenKey(keyspace, right)); } -// private static List createShards(String keyspace, TokenMetadata tokenMetadata) -// { -// AbstractReplicationStrategy replication = Keyspace.open(keyspace).getReplicationStrategy(); -// Set tokenSet = new HashSet<>(tokenMetadata.sortedTokens()); -// tokenSet.addAll(tokenMetadata.getBootstrapTokens().keySet()); -// tokenMetadata.getMovingEndpoints().forEach(p -> tokenSet.add(p.left)); -// List tokens = new ArrayList<>(tokenSet); -// tokens.sort(Comparator.naturalOrder()); -// -// List shards = new ArrayList<>(tokens.size() + 1); -// Shard finalShard = null; -// for (int i=0, mi=tokens.size(); i createShards(String keyspace, ClusterMetadata clusterMetadata) + { + KeyspaceMetadata keyspaceMetadata = Keyspace.open(keyspace).getMetadata(); + List tokens = new ArrayList<>(clusterMetadata.tokenMap.tokens()); + tokens.sort(Comparator.naturalOrder()); + + List shards = new ArrayList<>(tokens.size() + 1); + Shard finalShard = null; + for (int i = 0, mi = tokens.size(); i < mi; i++) + { + Token token = tokens.get(i); + EndpointsForToken natural = clusterMetadata.placements.get(keyspaceMetadata.params.replication).reads.forToken(token).get(); + EndpointsForToken pending = clusterMetadata.pendingEndpointsFor(keyspaceMetadata, token).get(); + if (i == 0) + { + shards.add(createShard(minRange(keyspace, token), natural, pending)); + finalShard = createShard(maxRange(keyspace, tokens.get(mi - 1)), natural, pending); + } + else + { + Token prev = tokens.get(i - 1); + shards.add(createShard(range(keyspace, prev, token), natural, pending)); + } + } + shards.add(finalShard); + + return shards; + } public static Topology createTopology(long epoch) { - throw new UnsupportedOperationException("git rebase should rewrite the history so this logic is based off TCM... TokenMetadata doesn't exist on trunk anymore"); -// TokenMetadata tokenMetadata = StorageService.instance.getTokenMetadata(); -// List keyspaces = new ArrayList<>(Schema.instance.distributedKeyspaces().names()); -// keyspaces.sort(String::compareTo); -// -// List shards = new ArrayList<>(); -// for (String keyspace : keyspaces) -// shards.addAll(createShards(keyspace, tokenMetadata)); -// -// return new Topology(epoch, shards.toArray(new Shard[0])); + List keyspaces = new ArrayList<>(Schema.instance.distributedKeyspaces().names()); + keyspaces.sort(String::compareTo); + + List shards = new ArrayList<>(); + for (String keyspace : keyspaces) + shards.addAll(createShards(keyspace, ClusterMetadata.current())); + + return new Topology(epoch, shards.toArray(new Shard[0])); } } diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java index b8be5126baae..d890bcf6ee60 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java +++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java @@ -40,7 +40,6 @@ import static org.apache.cassandra.utils.btree.BTree.findIndex; - public class BTreeSet extends AbstractSet implements NavigableSet, List { protected final Comparator comparator; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 17cfd4cada52..7e6ecc67c23b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -2493,6 +2493,8 @@ public void testReferenceArithmeticInUpdate() throws Exception }); } + // TODO: Re-enable when TrM integration is working + @Ignore @Test public void testCASAndSerialRead() throws Exception { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 359882adadb7..750b003d4c6f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -139,7 +139,7 @@ private static Cluster createCluster() throws IOException .withoutVNodes() .withConfig(c -> c.with(Feature.NETWORK).set("write_request_timeout", "10s") .set("transaction_timeout", "15s") - .set("legacy_paxos_strategy", "accord")) + .set("legacy_paxos_strategy", "migration")) // TODO: switch back to "accord" when TrM integration works .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install) .withInstanceInitializer(BBAccordCoordinateCountHelper::install) .start()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java index 40582706d97e..75f525e640a4 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingGlobalMethods.java @@ -24,6 +24,9 @@ import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.simulator.RandomSource; import org.apache.cassandra.simulator.systems.InterceptedWait.CaptureSites.Capture; import org.apache.cassandra.simulator.systems.InterceptedWait.InterceptedConditionWait; @@ -42,6 +45,7 @@ @PerClassLoader public class InterceptingGlobalMethods extends InterceptingMonitors implements InterceptorOfGlobalMethods { + private static final Logger logger = LoggerFactory.getLogger(InterceptingGlobalMethods.class); private static final boolean isDeterminismCheckStrict = TEST_SIMULATOR_DETERMINISM_CHECK.convert(name -> name.equals("strict")); private final @Nullable LongConsumer onThreadLocalRandomCheck; diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index dce719a5cdb1..2d5d1aadb2b9 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -45,6 +45,11 @@ import org.apache.cassandra.transport.ProtocolVersion; import static java.lang.String.format; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.AUTH_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; @@ -53,11 +58,6 @@ import static org.apache.cassandra.schema.SchemaConstants.SYSTEM_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.TRACE_KEYSPACE_NAME; import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_SCHEMA; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; public class DescribeStatementTest extends CQLTester { diff --git a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java index 64feaedc982d..95b4ed0078d2 100644 --- a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java @@ -78,7 +78,7 @@ public void comparisonTest() PartitionKey pk = new PartitionKey("", TABLE1, dk); TokenKey tk = new TokenKey("", dk.getToken()); TokenKey tkLow = new TokenKey("", dk.getToken().decreaseSlightly()); - TokenKey tkHigh = new TokenKey("", dk.getToken().increaseSlightly()); + TokenKey tkHigh = new TokenKey("", dk.getToken().nextValidToken()); Assert.assertTrue(tk.compareTo(pk) > 0); Assert.assertTrue(tkLow.compareTo(pk) < 0); From 4e95e3a4406ae582ab9a7fc164c5c3a814abb95e Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 17 Mar 2023 12:03:07 -0700 Subject: [PATCH 050/340] Operations.migrateReadRequiredOperations fails due to concurrent access when TransactionStatement is prepared patch by David Capwell; reviewed by Ariel Weisberg, Caleb Rackliffe for CASSANDRA-18337 --- src/antlr/Parser.g | 7 ++- .../org/apache/cassandra/cql3/Operations.java | 57 ++++++++++++------- .../cql3/statements/CQL3CasRequest.java | 5 +- .../cql3/statements/DeleteStatement.java | 13 ++++- .../statements/ModificationStatement.java | 31 +++++++--- .../cql3/statements/UpdateStatement.java | 21 +++++-- .../transactions/SelectReferenceSource.java | 2 +- .../test/accord/AccordTestBase.java | 1 - 8 files changed, 96 insertions(+), 41 deletions(-) diff --git a/src/antlr/Parser.g b/src/antlr/Parser.g index f9fc2842df91..b3fb490dcaf0 100644 --- a/src/antlr/Parser.g +++ b/src/antlr/Parser.g @@ -573,7 +573,7 @@ normalInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsert e ( K_IF K_NOT K_EXISTS { ifNotExists = true; } )? ( usingClause[attrs] )? { - $expr = new UpdateStatement.ParsedInsert(qn, attrs, columnNames, values, ifNotExists, stmtSrc()); + $expr = new UpdateStatement.ParsedInsert(qn, attrs, columnNames, values, ifNotExists, stmtSrc(), isParsingTxn); } ; @@ -593,7 +593,7 @@ jsonInsertStatement [QualifiedName qn] returns [UpdateStatement.ParsedInsertJson ( K_IF K_NOT K_EXISTS { ifNotExists = true; } )? ( usingClause[attrs] )? { - $expr = new UpdateStatement.ParsedInsertJson(qn, attrs, val, defaultUnset, ifNotExists, stmtSrc()); + $expr = new UpdateStatement.ParsedInsertJson(qn, attrs, val, defaultUnset, ifNotExists, stmtSrc(), isParsingTxn); } ; @@ -674,7 +674,8 @@ deleteStatement returns [DeleteStatement.Parsed expr] wclause.build(), conditions == null ? Collections.emptyList() : conditions, ifExists, - stmtSrc()); + stmtSrc(), + isParsingTxn); } ; diff --git a/src/java/org/apache/cassandra/cql3/Operations.java b/src/java/org/apache/cassandra/cql3/Operations.java index 952305ec0d46..8de94015a7d3 100644 --- a/src/java/org/apache/cassandra/cql3/Operations.java +++ b/src/java/org/apache/cassandra/cql3/Operations.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.List; +import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import org.apache.cassandra.cql3.functions.Function; @@ -38,6 +39,10 @@ public final class Operations implements Iterable * The type of statement. */ private final StatementType type; + /** + * If this operation is for a Transaction; this causes Operations to "migrate" when they require-read + */ + private final boolean isForTxn; /** * The operations on regular columns. @@ -52,29 +57,26 @@ public final class Operations implements Iterable private final List regularSubstitutions = new ArrayList<>(); private final List staticSubstitutions = new ArrayList<>(); - public Operations(StatementType type) + public Operations(StatementType type, boolean isForTxn) { this.type = type; + this.isForTxn = isForTxn; } - public void migrateReadRequiredOperations() + private Operations(Operations other) { - migrateReadRequiredOperations(staticOperations, staticSubstitutions); - migrateReadRequiredOperations(regularOperations, regularSubstitutions); + Preconditions.checkState(!other.isForTxn, "Unable to migrate from txn to txn"); + Preconditions.checkState(other.regularSubstitutions.isEmpty() && other.staticSubstitutions.isEmpty(), "Transaction substitutions are defined for a non-transaction operations! regular=%s, static=%s", other.regularSubstitutions, other.staticSubstitutions); + + type = other.type; + isForTxn = true; + for (Operation opt : other) + add(opt); } - private static void migrateReadRequiredOperations(List src, List dest) + public Operations forTxn() { - Iterator it = src.iterator(); - while (it.hasNext()) - { - Operation next = it.next(); - if (next.requiresRead()) - { - it.remove(); - dest.add(ReferenceOperation.create(next)); - } - } + return new Operations(this); } /** @@ -84,7 +86,7 @@ private static void migrateReadRequiredOperations(List src, List staticOperations() */ public void add(Operation operation) { + if (isForTxn && operation.requiresRead()) + { + add(operation.column, ReferenceOperation.create(operation)); + return; + } if (operation.column.isStatic()) staticOperations.add(operation); else @@ -132,6 +139,7 @@ public void add(Operation operation) public void add(ColumnMetadata column, ReferenceOperation operation) { + Preconditions.checkState(isForTxn, "Unable to add a transaction reference to a non-transaction operation"); if (column.isStatic()) staticSubstitutions.add(operation); else @@ -159,7 +167,7 @@ public boolean requiresRead() */ public boolean isEmpty() { - return staticOperations.isEmpty() && regularOperations.isEmpty(); + return staticIsEmpty() && regularIsEmpty(); } /** @@ -175,6 +183,7 @@ public void addFunctionsTo(List functions) { regularOperations.forEach(p -> p.addFunctionsTo(functions)); staticOperations.forEach(p -> p.addFunctionsTo(functions)); + //TODO substitutions as well? } public List allSubstitutions() @@ -201,4 +210,14 @@ public List staticSubstitutions() { return staticSubstitutions; } + + private boolean regularIsEmpty() + { + return regularOperations.isEmpty() && regularSubstitutions.isEmpty(); + } + + private boolean staticIsEmpty() + { + return staticOperations.isEmpty() && staticSubstitutions.isEmpty(); + } } diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index 96d75d2651e0..e511af319056 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -520,7 +520,10 @@ private List createWriteFragments(ClientState state) int idx = 0; for (RowUpdate update : updates) { - ModificationStatement modification = update.stmt; + // Some operations may need to migrate to run in the transaction, so need to call forTxn to make sure this + // happens. + // see CASSANDRA-18337 + ModificationStatement modification = update.stmt.forTxn(); QueryOptions options = update.options; TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); fragments.add(fragment); diff --git a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java index 4ce68fd75d2f..f2bbec1458d6 100644 --- a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java @@ -63,6 +63,12 @@ private DeleteStatement(VariableSpecifications bindVariables, super(StatementType.DELETE, bindVariables, cfm, operations, restrictions, conditions, attrs, source); } + @Override + protected ModificationStatement withOperations(Operations operations) + { + return new DeleteStatement(bindVariables, metadata, operations, restrictions, conditions, attrs, source); + } + @Override public void addUpdateForKey(PartitionUpdate.Builder updateBuilder, Clustering clustering, UpdateParameters params) throws InvalidRequestException @@ -135,6 +141,7 @@ public static class Parsed extends ModificationStatement.Parsed { private final List deletions; private final WhereClause whereClause; + private final boolean isForTxn; public Parsed(QualifiedName name, Attributes.Raw attrs, @@ -142,11 +149,13 @@ public Parsed(QualifiedName name, WhereClause whereClause, List conditions, boolean ifExists, - StatementSource source) + StatementSource source, + boolean isForTxn) { super(name, StatementType.DELETE, attrs, conditions, false, ifExists, source); this.deletions = deletions; this.whereClause = whereClause; + this.isForTxn = isForTxn; } @@ -157,7 +166,7 @@ protected ModificationStatement prepareInternal(ClientState state, Conditions conditions, Attributes attrs) { - Operations operations = new Operations(type); + Operations operations = new Operations(type, isForTxn); for (Operation.RawDeletion deletion : deletions) { diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index d34b73a8a7ac..51f51e640755 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -147,19 +147,23 @@ public abstract class ModificationStatement implements CQLStatement.SingleKeyspa protected final VariableSpecifications bindVariables; public final TableMetadata metadata; - private final Attributes attrs; + protected final Attributes attrs; - private final StatementRestrictions restrictions; + protected final StatementRestrictions restrictions; private final Operations operations; private final RegularAndStaticColumns updatedColumns; - private final Conditions conditions; + protected final Conditions conditions; private final RegularAndStaticColumns conditionColumns; private final RegularAndStaticColumns requiresRead; + /** + * Used by {@link #forTxn()} to only compute a migrated copy of this statement for transactions + */ + private ModificationStatement txnStmt; private final List functions; @@ -900,12 +904,24 @@ public TxnReferenceOperations getTxnReferenceOps(QueryOptions options, ClientSta return new TxnReferenceOperations(metadata, clustering, regularOps, staticOps); } - @VisibleForTesting - public void migrateReadRequiredOperations() + public ModificationStatement forTxn() { - operations.migrateReadRequiredOperations(); + if (requiresRead.isEmpty()) return this; + ModificationStatement migrated = txnStmt; + if (migrated == null) + { + synchronized (requiresRead) + { + migrated = txnStmt; + if (migrated == null) + txnStmt = migrated = withOperations(operations.forTxn()); + } + } + return migrated; } + protected abstract ModificationStatement withOperations(Operations operations); + @VisibleForTesting public List getSubstitutions() { @@ -914,9 +930,6 @@ public List getSubstitutions() public TxnWrite.Fragment getTxnWriteFragment(int index, ClientState state, QueryOptions options) { - // When an Operation requires a read, this cannot be done right away and must be done by the transaction itself, - // so migrate those Operations to a ReferenceOperation (which works properly in this case). - operations.migrateReadRequiredOperations(); PartitionUpdate baseUpdate = getTxnUpdate(state, options); TxnReferenceOperations referenceOps = getTxnReferenceOps(options, state); return new TxnWrite.Fragment(index, baseUpdate, referenceOps); diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java index d5002731f56c..0eaf58a9bbb3 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java @@ -87,6 +87,12 @@ private UpdateStatement(StatementType type, super(type, bindVariables, metadata, operations, restrictions, conditions, attrs, source); } + @Override + protected ModificationStatement withOperations(Operations operations) + { + return new UpdateStatement(type, bindVariables, metadata, operations, restrictions, conditions, attrs, source); + } + @Override public void addUpdateForKey(PartitionUpdate.Builder updateBuilder, Clustering clustering, UpdateParameters params) { @@ -143,6 +149,7 @@ public static class ParsedInsert extends ModificationStatement.Parsed { private final List columnNames; private final List columnValues; + private final boolean isForTxn; /** * A parsed INSERT statement. @@ -158,11 +165,13 @@ public ParsedInsert(QualifiedName name, List columnNames, List columnValues, boolean ifNotExists, - StatementSource source) + StatementSource source, + boolean isForTxn) { super(name, StatementType.INSERT, attrs, null, ifNotExists, false, source); this.columnNames = columnNames; this.columnValues = columnValues; + this.isForTxn = isForTxn; } @Override @@ -182,7 +191,7 @@ protected ModificationStatement prepareInternal(ClientState state, checkContainsNoDuplicates(columnNames, "The column names contains duplicates"); WhereClause.Builder whereClause = new WhereClause.Builder(); - Operations operations = new Operations(type); + Operations operations = new Operations(type, isForTxn); boolean hasClusteringColumnsSet = false; for (int i = 0; i < columnNames.size(); i++) @@ -244,12 +253,14 @@ public static class ParsedInsertJson extends ModificationStatement.Parsed { private final Json.Raw jsonValue; private final boolean defaultUnset; + private final boolean isForTxn; - public ParsedInsertJson(QualifiedName name, Attributes.Raw attrs, Json.Raw jsonValue, boolean defaultUnset, boolean ifNotExists, StatementSource source) + public ParsedInsertJson(QualifiedName name, Attributes.Raw attrs, Json.Raw jsonValue, boolean defaultUnset, boolean ifNotExists, StatementSource source, boolean isForTxn) { super(name, StatementType.INSERT, attrs, null, ifNotExists, false, source); this.jsonValue = jsonValue; this.defaultUnset = defaultUnset; + this.isForTxn = isForTxn; } @Override @@ -265,7 +276,7 @@ protected ModificationStatement prepareInternal(ClientState state, Json.Prepared prepared = jsonValue.prepareAndCollectMarkers(metadata, defs, bindVariables); WhereClause.Builder whereClause = new WhereClause.Builder(); - Operations operations = new Operations(type); + Operations operations = new Operations(type, isForTxn); boolean hasClusteringColumnsSet = false; for (ColumnMetadata def : defs) @@ -395,7 +406,7 @@ protected ModificationStatement prepareInternal(ClientState state, Conditions conditions, Attributes attrs) { - Operations operations = new Operations(type); + Operations operations = new Operations(type, isForTxn); for (Pair entry : updates.operations) { diff --git a/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java b/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java index ae5099aa3b11..231539c88834 100644 --- a/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java +++ b/src/java/org/apache/cassandra/cql3/transactions/SelectReferenceSource.java @@ -29,7 +29,7 @@ public class SelectReferenceSource implements RowDataReference.ReferenceSource { - public static final String COLUMN_NOT_IN_SELECT_MESSAGE = "%s refererences a column not included in the select"; + public static final String COLUMN_NOT_IN_SELECT_MESSAGE = "%s references a column not included in the select"; private final SelectStatement statement; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 750b003d4c6f..f6ae047fab2b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -248,7 +248,6 @@ public static boolean isIdempotent(TransactionStatement statement) private static boolean isIdempotent(ModificationStatement update) { - update.migrateReadRequiredOperations(); // ReferenceValue.Constant is used during migration, which means a case like "a += 1" // ReferenceValue.Substitution uses a LET reference, so rerunning would always just see the new state long numConstants = update.getSubstitutions().stream() From 44824a6e45b2f9a9d9675d68f839b1ffd59e3120 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 31 Mar 2023 15:04:51 -0700 Subject: [PATCH 051/340] CEP-15 (Accord) Expected reply message with verb ACCORD_INFORM_OF_TXNID_RSP but got ACCORD_SIMPLE_RSP patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-18375 --- src/java/org/apache/cassandra/net/Verb.java | 61 ++++++++----------- .../service/accord/AccordMessageSink.java | 22 +++++-- .../service/accord/async/AsyncOperation.java | 17 +++++- .../service/accord/AccordMessageSinkTest.java | 56 +++++++++++++++++ 4 files changed, 112 insertions(+), 44 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 377bb1dc5edc..fd82b76682e5 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -266,42 +266,31 @@ public enum Verb DATA_MOVEMENT_EXECUTED_REQ (817, P1, rpcTimeout, MISC, () -> DataMovement.Status.serializer, () -> DataMovements.instance, DATA_MOVEMENT_EXECUTED_RSP ), // accord - ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER), - - ACCORD_PREACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER), - ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, ACCORD, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP), - - ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER), - ACCORD_ACCEPT_REQ (122, P2, writeTimeout, ACCORD, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), - ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, ACCORD, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), - - ACCORD_READ_RSP (128, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER), - ACCORD_READ_REQ (127, P2, writeTimeout, ACCORD, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (125, P2, writeTimeout, ACCORD, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, ACCORD, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler()), - - ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER), - ACCORD_APPLY_REQ (129, P2, writeTimeout, ACCORD, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP), - - ACCORD_RECOVER_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER), - ACCORD_RECOVER_REQ (133, P2, writeTimeout, ACCORD, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), - ACCORD_BEGIN_INVALIDATE_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER), - ACCORD_BEGIN_INVALIDATE_REQ (135, P2, writeTimeout, ACCORD, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP), - ACCORD_WAIT_COMMIT_RSP (138, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER), - ACCORD_WAIT_COMMIT_REQ (137, P2, writeTimeout, ACCORD, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP), - - ACCORD_INFORM_OF_TXNID_RSP(140, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER), - ACCORD_INFORM_OF_TXNID_REQ(139, P2, writeTimeout, ACCORD, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_INFORM_OF_TXNID_RSP), - - ACCORD_INFORM_HOME_DURABLE_REQ(141, P2, writeTimeout, ACCORD, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP), - - ACCORD_INFORM_DURABLE_REQ(143, P2, writeTimeout, ACCORD, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP), - - ACCORD_CHECK_STATUS_RSP (146, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER), - ACCORD_CHECK_STATUS_REQ (145, P2, writeTimeout, ACCORD, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP), - - ACCORD_GET_DEPS_RSP (148, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER), - ACCORD_GET_DEPS_REQ (147, P2, writeTimeout, ACCORD, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP), + ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), + ACCORD_PREACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, ACCORD, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP ), + ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_ACCEPT_REQ (122, P2, writeTimeout, ACCORD, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, ACCORD, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_READ_RSP (128, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), + ACCORD_READ_REQ (127, P2, writeTimeout, ACCORD, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (125, P2, writeTimeout, ACCORD, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, ACCORD, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), + ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), + ACCORD_APPLY_REQ (129, P2, writeTimeout, ACCORD, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), + ACCORD_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), + ACCORD_RECOVER_REQ (131, P2, writeTimeout, ACCORD, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), + ACCORD_BEGIN_INVALIDATE_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), + ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, ACCORD, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), + ACCORD_WAIT_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), + ACCORD_WAIT_COMMIT_REQ (135, P2, writeTimeout, ACCORD, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP ), + ACCORD_INFORM_OF_TXNID_REQ (137, P2, writeTimeout, ACCORD, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_HOME_DURABLE_REQ (138, P2, writeTimeout, ACCORD, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_DURABLE_REQ (139, P2, writeTimeout, ACCORD, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), + ACCORD_CHECK_STATUS_REQ (140, P2, writeTimeout, ACCORD, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), + ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_DEPS_REQ (142, P2, writeTimeout, ACCORD, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), // generic failure response diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index dc329e8807dd..265b17fe5539 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; +import org.apache.cassandra.net.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,9 +36,6 @@ import accord.messages.ReplyContext; import accord.messages.Request; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.net.Message; -import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.net.Verb; import static org.apache.cassandra.service.accord.EndpointMapping.getEndpoint; @@ -92,6 +90,18 @@ private static Verb getVerb(MessageType type) return VerbMapping.instance.mapping.get(type); } + private final MessageDelivery messaging; + + public AccordMessageSink(MessageDelivery messaging) + { + this.messaging = messaging; + } + + public AccordMessageSink() + { + this(MessagingService.instance()); + } + @Override public void send(Node.Id to, Request request) { @@ -100,7 +110,7 @@ public void send(Node.Id to, Request request) Message message = Message.out(verb, request); InetAddressAndPort endpoint = getEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); - MessagingService.instance().send(message, endpoint); + messaging.send(message, endpoint); } @Override @@ -111,7 +121,7 @@ public void send(Node.Id to, Request request, Callback callback) Message message = Message.out(verb, request); InetAddressAndPort endpoint = getEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); - MessagingService.instance().sendWithCallback(message, endpoint, new AccordCallback<>((Callback) callback)); + messaging.sendWithCallback(message, endpoint, new AccordCallback<>((Callback) callback)); } @Override @@ -122,6 +132,6 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply Preconditions.checkArgument(replyMsg.verb() == getVerb(reply.type())); InetAddressAndPort endpoint = getEndpoint(replyingToNode); logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); - MessagingService.instance().send(replyMsg, endpoint); + messaging.send(replyMsg, endpoint); } } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index db659d623f83..2f2dd2ce6d8c 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -80,7 +80,19 @@ enum State AWAITING_SAVE, // wait for writes to complete COMPLETING, FINISHED, - FAILED + FAILED; + + boolean isComplete() + { + switch (this) + { + case FAILED: + case FINISHED: + return true; + default: + return false; + } + } } private State state = State.INITIALIZED; @@ -179,7 +191,8 @@ private void finish(R result) private void fail(Throwable throwable) { Invariants.nonNull(throwable); - Invariants.checkArgument(state != State.FINISHED && state != State.FAILED, "Unexpected state %s", state); + if (state.isComplete()) + throw new IllegalStateException("Unexpected state " + state, throwable); try { switch (state) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java new file mode 100644 index 000000000000..d8e2ae898c1b --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import org.apache.cassandra.net.MessageDelivery; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Node; +import accord.messages.InformOfTxnId; +import accord.messages.SimpleReply; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.Verb; +import org.mockito.Mockito; + +public class AccordMessageSinkTest +{ + @BeforeClass + public static void setup() + { + DatabaseDescriptor.clientInitialization(); + } + + @Test + public void informOfTxn() + { + // There was an issue where the reply was the wrong verb + // see CASSANDRA-18375 + InformOfTxnId info = Mockito.mock(InformOfTxnId.class); + Message req = Message.builder(Verb.ACCORD_INFORM_OF_TXNID_REQ, info).build(); + SimpleReply reply = SimpleReply.Ok; + + MessageDelivery messaging = Mockito.mock(MessageDelivery.class); + AccordMessageSink sink = new AccordMessageSink(messaging); + sink.reply(new Node.Id(1), req, reply); + + Mockito.verify(messaging).send(Mockito.any(), Mockito.any()); + } +} \ No newline at end of file From 7ad2bf672c013d1d9c9d168547950b7b0e60f958 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 7 Apr 2023 15:39:42 -0700 Subject: [PATCH 052/340] CEP-15 (Accord) Original and recover coordinators may hit a race condition with PreApply where reads and writes are interleaved, causing one of the coordinators to see the writes from the other patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-18422 --- modules/accord | 2 +- .../service/accord/AccordCommandStore.java | 8 + .../simulator/paxos/HistoryValidatorTest.java | 225 +++++++++++++----- .../accord/async/AsyncOperationTest.java | 13 +- 4 files changed, 181 insertions(+), 67 deletions(-) diff --git a/modules/accord b/modules/accord index bc81f81c75f9..08aaab6e33d4 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit bc81f81c75f93c73989a30bbc51b5c241a893c1a +Subproject commit 08aaab6e33d43406e0649146144e4df67648602a diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 5c9f3e4e9da8..70962298f455 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.util.Map; +import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; @@ -42,6 +43,7 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -187,6 +189,12 @@ public AsyncChain submit(PreLoadContext loadCtx, Function AsyncChain submit(Callable task) + { + return AsyncChains.ofCallable(executor, task); + } + public DataStore dataStore() { return dataStore; diff --git a/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java index 018affc43108..d7df4972cca6 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java @@ -44,6 +44,7 @@ import com.carrotsearch.hppc.IntIntMap; import com.carrotsearch.hppc.IntSet; import org.apache.cassandra.config.CassandraRelevantProperties; +import com.carrotsearch.hppc.cursors.IntCursor; import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.utils.Clock; import org.assertj.core.api.AbstractThrowableAssert; @@ -282,6 +283,32 @@ public void seenBehavior() ); } + private static String trim(String log, int... keys) + { + // this is deaad code, but exists to help when new validation errors are detected + // the logic will shrink the history to only contain transactions that contain the set of keys + IntSet set = new IntHashSet(); + IntStream.of(keys).forEach(set::add); + Parsed parsed = parse(log); + StringBuilder sb = new StringBuilder(); + for (Witness w : parsed.witnesses) + { + boolean match = false; + for (IntCursor pk : w.pks()) + { + if (set.contains(pk.value)) + { + match = true; + break; + } + } + if (!match) continue; + sb.append(w).append("\n"); + } + return sb.toString(); + } + + private void requiresMultiKeySupport() { Assume.assumeTrue("Validator " + factory.getClass() + " does not support multi-key", factory instanceof StrictSerializabilityValidator.Factory); @@ -357,79 +384,146 @@ private static Event writeOnly(int pk) return new Event(EnumSet.of(Event.Type.WRITE), pk, null); } - private void fromLog(String log) + private interface Operation { - IntSet pks = new IntHashSet(); - class Read + int pk(); + void check(HistoryValidator.Checker check); + void appendString(StringBuilder sb); + } + + private static class Read implements Operation + { + final int pk, id, count; + final int[] seq; + + Read(int pk, int id, int count, int[] seq) { - final int pk, id, count; - final int[] seq; + this.pk = pk; + this.id = id; + this.count = count; + this.seq = seq; + } - Read(int pk, int id, int count, int[] seq) - { - this.pk = pk; - this.id = id; - this.count = count; - this.seq = seq; - } + @Override + public int pk() + { + return pk; } - class Write + + @Override + public void check(HistoryValidator.Checker check) { - final int pk, id; - final boolean success; + check.read(pk, id, count, seq); + } - Write(int pk, int id, boolean success) - { - this.pk = pk; - this.id = id; - this.success = success; - } + @Override + public void appendString(StringBuilder sb) + { + sb.append("read(pk=").append(pk).append(", id=").append(id).append(", count=").append(count).append(", seq=").append(Arrays.toString(seq)).append(")\n"); } - class Witness + } + + private static class Write implements Operation + { + final int pk, id; + final boolean success; + + Write(int pk, int id, boolean success) { - final int start, end; - final List actions = new ArrayList<>(); + this.pk = pk; + this.id = id; + this.success = success; + } - Witness(int start, int end) - { - this.start = start; - this.end = end; - } + @Override + public int pk() + { + return pk; + } - void read(int pk, int id, int count, int[] seq) - { - actions.add(new Read(pk, id, count, seq)); - } + @Override + public void check(HistoryValidator.Checker check) + { + check.write(pk, id, success); + } - void write(int pk, int id, boolean success) - { - actions.add(new Write(pk, id, success)); - } + @Override + public void appendString(StringBuilder sb) + { + sb.append("write(pk=").append(pk).append(", id=").append(id).append(", success=").append(success).append(")\n"); + } + } + + private static class Witness + { + final int start, end; + final List actions = new ArrayList<>(); + + Witness(int start, int end) + { + this.start = start; + this.end = end; + } + + void read(int pk, int id, int count, int[] seq) + { + actions.add(new Read(pk, id, count, seq)); + } - void process(HistoryValidator validator) + void write(int pk, int id, boolean success) + { + actions.add(new Write(pk, id, success)); + } + + void process(HistoryValidator validator) + { + try (HistoryValidator.Checker check = validator.witness(start, end)) { - try (HistoryValidator.Checker check = validator.witness(start, end)) - { - for (Object a : actions) - { - if (a instanceof Read) - { - Read read = (Read) a; - check.read(read.pk, read.id, read.count, read.seq); - } - else - { - Write write = (Write) a; - check.write(write.pk, write.id, write.success); - } - } - } + for (Operation a : actions) + a.check(check); } } + + IntSet pks() + { + IntSet pks = new IntHashSet(); + for (Operation action : actions) + pks.add(action.pk()); + return pks; + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + sb.append("Witness(start=").append(start).append(", end=").append(end).append(")\n"); + for (Operation a : actions) + a.appendString(sb.append('\t')); + return sb.toString(); + } + } + + private static class Parsed + { + private final int[] keys; + private final List witnesses; + + private Parsed(int[] keys, List witnesses) + { + this.keys = keys; + this.witnesses = witnesses; + } + } + + private static Parsed parse(String log) + { + IntSet pks = new IntHashSet(); List witnesses = new ArrayList<>(); Witness current = null; for (String line : log.split("\n")) { + if (line.trim().isEmpty()) + continue; if (line.startsWith("Witness")) { if (current != null) @@ -469,9 +563,26 @@ else if (line.startsWith("\twrite")) witnesses.add(current); int[] keys = pks.toArray(); Arrays.sort(keys); - HistoryValidator validator = factory.create(keys); - for (Witness w : witnesses) - w.process(validator); + return new Parsed(keys, witnesses); + } + + private void fromLog(String log) + { + Parsed parsed = parse(log); + HistoryValidator validator = factory.create(parsed.keys); + for (Witness w : parsed.witnesses) + { + try + { + w.process(validator); + } + catch (HistoryViolation e) + { + HistoryViolation hv = new HistoryViolation(e.primaryKey, "Violation detected for witnessed action " + w + "; " + e.getMessage() + ";\n" + log); + hv.setStackTrace(e.getStackTrace()); + throw hv; + } + } } private static class Event diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 37b1f556af15..6eb1af434edc 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -42,8 +42,8 @@ import accord.api.RoutingKey; import accord.impl.SafeCommandsForKey; +import accord.local.CheckedCommands; import accord.local.Command; -import accord.local.Commands; import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; @@ -194,14 +194,9 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS try { return getUninterruptibly(commandStore.submit(PreLoadContext.contextFor(Collections.singleton(txnId), partialTxn.keys()), safe -> { - Commands.AcceptOutcome result = Commands.preaccept(safe, txnId, partialTxn, route, null); - if (result != Commands.AcceptOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); - - result = Commands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); - if (result != Commands.AcceptOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); - - Commands.CommitOutcome commit = Commands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); - if (commit != Commands.CommitOutcome.Success) throw new IllegalStateException("Command mutation rejected: " + result); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); + CheckedCommands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); // clear cache long cacheSize = commandStore.getCacheSize(); From 8633a301f793c06d583c1d014d9495d12b87c856 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 27 Apr 2023 11:30:50 -0700 Subject: [PATCH 053/340] CEP-15: (C*) Accord message processing should avoid being passed on to a Stage and run directly in the messageing handler patch by David Capwell; reviewed by Ariel Weisberg, Benedict Elliott Smith for CASSANDRA-18364 --- .build/build-accord.xml | 4 +- .../100-verify-submodules-pushed.sh | 1 - modules/accord | 2 +- .../apache/cassandra/concurrent/Stage.java | 1 - .../org/apache/cassandra/config/Config.java | 5 +- .../cassandra/config/DatabaseDescriptor.java | 30 +++----- .../config/OptionaldPositiveInt.java | 73 +++++++++++++++++++ .../config/YamlConfigurationLoader.java | 26 ++++--- src/java/org/apache/cassandra/net/Verb.java | 30 ++++---- .../service/accord/AccordCallback.java | 21 ++++-- .../service/accord/AccordCommandStore.java | 33 ++------- .../service/accord/AccordCommandStores.java | 5 +- .../service/accord/AccordMessageSink.java | 23 +++--- .../service/accord/AccordService.java | 8 +- .../service/accord/async/AsyncOperation.java | 2 +- .../serializers/ReadDataSerializers.java | 6 +- .../cassandra/service/accord/txn/TxnData.java | 3 + .../apache/cassandra/utils/FBUtilities.java | 7 +- test/data/config/version=5.1-alpha1.yml | 2 +- .../simulator/ClusterSimulation.java | 7 +- ...bstractPairOfSequencesPaxosSimulation.java | 4 +- .../simulator/paxos/PaxosSimulation.java | 32 ++++++-- .../simulator/systems/SimulatedAction.java | 8 +- .../config/DatabaseDescriptorRefTest.java | 1 + .../config/YamlConfigurationLoaderTest.java | 51 +++++++++++++ .../service/accord/AccordMessageSinkTest.java | 12 ++- 26 files changed, 273 insertions(+), 124 deletions(-) create mode 100644 src/java/org/apache/cassandra/config/OptionaldPositiveInt.java diff --git a/.build/build-accord.xml b/.build/build-accord.xml index eba85912d52e..6fc716d2d0c2 100644 --- a/.build/build-accord.xml +++ b/.build/build-accord.xml @@ -27,8 +27,10 @@ - + + + diff --git a/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh index c54099ac0f9a..aee8f658a12a 100755 --- a/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh +++ b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh @@ -84,7 +84,6 @@ _main() { _log "\t\tgit config --local cassandra.pre-commit.verify-submodules.enabled false" _log "\tOr" _log "\t\tgit config --local cassandra.pre-commit.verify-submodule-${file}.enabled false" - set -x git_sub_dir="${file}/.git" branch="$(git config -f .gitmodules "submodule.${file}.branch")" [[ -z "${branch:-}" ]] && error "Submodule ${file} does not define a branch" diff --git a/modules/accord b/modules/accord index 08aaab6e33d4..8226b2d77593 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 08aaab6e33d43406e0649146144e4df67648602a +Subproject commit 8226b2d7759319d7a0b0c823ab09b4344c5423f7 diff --git a/src/java/org/apache/cassandra/concurrent/Stage.java b/src/java/org/apache/cassandra/concurrent/Stage.java index d0ea2c10f597..808dc34b6844 100644 --- a/src/java/org/apache/cassandra/concurrent/Stage.java +++ b/src/java/org/apache/cassandra/concurrent/Stage.java @@ -47,7 +47,6 @@ public enum Stage MUTATION (true, "MutationStage", "request", DatabaseDescriptor::getConcurrentWriters, DatabaseDescriptor::setConcurrentWriters, Stage::multiThreadedLowSignalStage), COUNTER_MUTATION (true, "CounterMutationStage", "request", DatabaseDescriptor::getConcurrentCounterWriters, DatabaseDescriptor::setConcurrentCounterWriters, Stage::multiThreadedLowSignalStage), VIEW_MUTATION (true, "ViewMutationStage", "request", DatabaseDescriptor::getConcurrentViewWriters, DatabaseDescriptor::setConcurrentViewWriters, Stage::multiThreadedLowSignalStage), - ACCORD (true, "AccordStage", "request", DatabaseDescriptor::getConcurrentAccordOps, DatabaseDescriptor::setConcurrentAccordOps, Stage::multiThreadedLowSignalStage), GOSSIP (true, "GossipStage", "internal", () -> 1, null, Stage::singleThreadedStage), REQUEST_RESPONSE (false, "RequestResponseStage", "request", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedLowSignalStage), ANTI_ENTROPY (false, "AntiEntropyStage", "internal", () -> 1, null, Stage::singleThreadedStage), diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index b8cae7b18263..63e81481be8e 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -46,6 +46,7 @@ import org.apache.cassandra.utils.StorageCompatibilityMode; import static org.apache.cassandra.config.CassandraRelevantProperties.AUTOCOMPACTION_ON_STARTUP_ENABLED; +import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_AVAILABLE_PROCESSORS; import static org.apache.cassandra.config.CassandraRelevantProperties.FILE_CACHE_ENABLED; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_PAXOS_REPAIR_ON_TOPOLOGY_CHANGE; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_PAXOS_REPAIR_ON_TOPOLOGY_CHANGE_KEYSPACES; @@ -190,10 +191,9 @@ public static Set splitCommaDelimited(String src) public int concurrent_reads = 32; public int concurrent_writes = 32; - public int concurrent_accord_operations = 32; public int concurrent_counter_writes = 32; public int concurrent_materialized_view_writes = 32; - public int available_processors = -1; + public OptionaldPositiveInt available_processors = new OptionaldPositiveInt(CASSANDRA_AVAILABLE_PROCESSORS.getInt(OptionaldPositiveInt.UNDEFINED_VALUE)); public int memtable_flush_writers = 0; @Replaces(oldName = "memtable_heap_space_in_mb", converter = Converters.MEBIBYTES_DATA_STORAGE_INT, deprecated = true) @@ -624,6 +624,7 @@ public static class SSTableConfig public volatile boolean use_statements_enabled = true; public boolean accord_transactions_enabled = false; + public OptionaldPositiveInt accord_shard_count = OptionaldPositiveInt.UNDEFINED; /** * Optionally disable asynchronous UDF execution. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 5e0b01426d62..0c15ba122798 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -643,9 +643,6 @@ else if (conf.disk_access_mode == DiskAccessMode.direct) if (conf.concurrent_counter_writes < 2) throw new ConfigurationException("concurrent_counter_writes must be at least 2, but was " + conf.concurrent_counter_writes, false); - if (conf.concurrent_accord_operations < 1) - throw new ConfigurationException("concurrent_accord_operations must be at least 1, but was " + conf.concurrent_accord_operations, false); - if (conf.networking_cache_size == null) conf.networking_cache_size = new DataStorageSpec.IntMebibytesBound(Math.min(128, (int) (Runtime.getRuntime().maxMemory() / (16 * 1048576)))); @@ -2670,20 +2667,6 @@ public static void setConcurrentViewWriters(int concurrent_materialized_view_wri conf.concurrent_materialized_view_writes = concurrent_materialized_view_writes; } - public static int getConcurrentAccordOps() - { - return conf.concurrent_accord_operations; - } - - public static void setConcurrentAccordOps(int concurrent_operations) - { - if (concurrent_operations < 0) - { - throw new IllegalArgumentException("Concurrent accord operations must be non-negative"); - } - conf.concurrent_accord_operations = concurrent_operations; - } - public static int getFlushWriters() { return conf.memtable_flush_writers; @@ -2691,7 +2674,13 @@ public static int getFlushWriters() public static int getAvailableProcessors() { - return conf == null ? -1 : conf.available_processors; + OptionaldPositiveInt ap = conf == null ? OptionaldPositiveInt.UNDEFINED : conf.available_processors; + return ap.or(Runtime.getRuntime()::availableProcessors); + } + + public static void setAvailableProcessors(int value) + { + conf.available_processors = new OptionaldPositiveInt(value); } public static int getConcurrentCompactors() @@ -5198,6 +5187,11 @@ public static void setAccordTransactionsEnabled(boolean b) conf.accord_transactions_enabled = b; } + public static int getAccordShardCount() + { + return conf.accord_shard_count.or(DatabaseDescriptor::getAvailableProcessors); + } + public static boolean getForceNewPreparedStatementBehaviour() { return conf.force_new_prepared_statement_behaviour; diff --git a/src/java/org/apache/cassandra/config/OptionaldPositiveInt.java b/src/java/org/apache/cassandra/config/OptionaldPositiveInt.java new file mode 100644 index 000000000000..ea33b7af98f6 --- /dev/null +++ b/src/java/org/apache/cassandra/config/OptionaldPositiveInt.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.config; + +import java.util.Objects; +import java.util.function.IntSupplier; + +public class OptionaldPositiveInt +{ + public static final int UNDEFINED_VALUE = -1; + public static final OptionaldPositiveInt UNDEFINED = new OptionaldPositiveInt(UNDEFINED_VALUE); + + private final int value; + + public OptionaldPositiveInt(int value) + { + if (!(value == -1 || value >= 1)) + throw new IllegalArgumentException(String.format("Only -1 (undefined) and positive values are allowed; given %d", value)); + this.value = value; + } + + public boolean isDefined() + { + return value != UNDEFINED_VALUE; + } + + public int or(int defaultValue) + { + return value == UNDEFINED_VALUE ? defaultValue : value; + } + + public int or(IntSupplier defaultValue) + { + return value == UNDEFINED_VALUE ? defaultValue.getAsInt() : value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OptionaldPositiveInt that = (OptionaldPositiveInt) o; + return value == that.value; + } + + @Override + public int hashCode() + { + return Objects.hash(value); + } + + @Override + public String toString() + { + return !isDefined() ? "null" : Integer.toString(value); + } +} diff --git a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java index 9bf4e415592c..f37a42e8fa54 100644 --- a/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java +++ b/src/java/org/apache/cassandra/config/YamlConfigurationLoader.java @@ -135,16 +135,7 @@ public Config loadConfig(URL url) throws ConfigurationException throw new AssertionError(e); } - SafeConstructor constructor = new CustomConstructor(Config.class, Yaml.class.getClassLoader()); - Map, Map> replacements = getNameReplacements(Config.class); - verifyReplacements(replacements, configBytes); - PropertiesChecker propertiesChecker = new PropertiesChecker(replacements); - constructor.setPropertyUtils(propertiesChecker); - Yaml yaml = new Yaml(constructor); - Config result = loadConfig(yaml, configBytes); - propertiesChecker.check(); - maybeAddSystemProperties(result); - return result; + return loadConfig(configBytes); } catch (YAMLException e) { @@ -152,6 +143,21 @@ public Config loadConfig(URL url) throws ConfigurationException } } + @VisibleForTesting + static Config loadConfig(byte[] configBytes) + { + SafeConstructor constructor = new CustomConstructor(Config.class, Yaml.class.getClassLoader()); + Map, Map> replacements = getNameReplacements(Config.class); + verifyReplacements(replacements, configBytes); + PropertiesChecker propertiesChecker = new PropertiesChecker(replacements); + constructor.setPropertyUtils(propertiesChecker); + Yaml yaml = new Yaml(constructor); + Config result = loadConfig(yaml, configBytes); + propertiesChecker.check(); + maybeAddSystemProperties(result); + return result; + } + private static void maybeAddSystemProperties(Object obj) { if (CassandraRelevantProperties.CONFIG_ALLOW_SYSTEM_PROPERTIES.getBoolean()) diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index fd82b76682e5..b1917d448930 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -268,29 +268,29 @@ public enum Verb // accord ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), ACCORD_PREACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, ACCORD, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP ), + ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP ), ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_ACCEPT_REQ (122, P2, writeTimeout, ACCORD, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), - ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, ACCORD, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), ACCORD_READ_RSP (128, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), - ACCORD_READ_REQ (127, P2, writeTimeout, ACCORD, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (125, P2, writeTimeout, ACCORD, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, ACCORD, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), + ACCORD_READ_REQ (127, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (125, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (129, P2, writeTimeout, ACCORD, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), + ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), ACCORD_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), - ACCORD_RECOVER_REQ (131, P2, writeTimeout, ACCORD, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), + ACCORD_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), ACCORD_BEGIN_INVALIDATE_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, ACCORD, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), + ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), ACCORD_WAIT_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), - ACCORD_WAIT_COMMIT_REQ (135, P2, writeTimeout, ACCORD, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP ), - ACCORD_INFORM_OF_TXNID_REQ (137, P2, writeTimeout, ACCORD, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_HOME_DURABLE_REQ (138, P2, writeTimeout, ACCORD, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_DURABLE_REQ (139, P2, writeTimeout, ACCORD, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_WAIT_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP ), + ACCORD_INFORM_OF_TXNID_REQ (137, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_HOME_DURABLE_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CHECK_STATUS_REQ (140, P2, writeTimeout, ACCORD, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), + ACCORD_CHECK_STATUS_REQ (140, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_DEPS_REQ (142, P2, writeTimeout, ACCORD, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), + ACCORD_GET_DEPS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), // generic failure response diff --git a/src/java/org/apache/cassandra/service/accord/AccordCallback.java b/src/java/org/apache/cassandra/service/accord/AccordCallback.java index 60b5d6988a90..20ed9fad69ac 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCallback.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCallback.java @@ -22,28 +22,29 @@ import org.slf4j.LoggerFactory; import accord.coordinate.Timeout; +import accord.local.AgentExecutor; import accord.messages.Callback; +import accord.messages.SafeCallback; import accord.messages.Reply; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.RequestCallback; -class AccordCallback implements RequestCallback +class AccordCallback extends SafeCallback implements RequestCallback { private static final Logger logger = LoggerFactory.getLogger(AccordCallback.class); - private final Callback callback; - public AccordCallback(Callback callback) + public AccordCallback(AgentExecutor executor, Callback callback) { - this.callback = callback; + super(executor, callback); } @Override public void onResponse(Message msg) { logger.debug("Received response {} from {}", msg.payload, msg.from()); - callback.onSuccess(EndpointMapping.endpointToId(msg.from()), msg.payload); + success(EndpointMapping.endpointToId(msg.from()), msg.payload); } private static Throwable convertReason(RequestFailureReason reason) @@ -56,9 +57,15 @@ private static Throwable convertReason(RequestFailureReason reason) @Override public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) { - logger.debug("Received failure {} from {} for {}", failureReason, from, callback); + logger.debug("Received failure {} from {} for {}", failureReason, from, this); // TODO (now): we should distinguish timeout failures with some placeholder Exception - callback.onFailure(EndpointMapping.endpointToId(from), convertReason(failureReason)); + failure(EndpointMapping.endpointToId(from), convertReason(failureReason)); + } + + @Override + public boolean trackLatencyForSnitch() + { + return true; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 70962298f455..1633dc793005 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -50,7 +50,7 @@ import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -public class AccordCommandStore implements CommandStore +public class AccordCommandStore extends CommandStore { private static long getThreadId(ExecutorService executor) { @@ -68,7 +68,6 @@ private static long getThreadId(ExecutorService executor) } } - private final int id; private final long threadId; public final String loggingId; private final ExecutorService executor; @@ -79,12 +78,6 @@ private static long getThreadId(ExecutorService executor) private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; - private final NodeTimeService time; - private final Agent agent; - private final DataStore dataStore; - private final ProgressLog progressLog; - private final RangesForEpochHolder rangesForEpochHolder; - public AccordCommandStore(int id, NodeTimeService time, Agent agent, @@ -92,24 +85,20 @@ public AccordCommandStore(int id, ProgressLog.Factory progressLogFactory, RangesForEpochHolder rangesForEpoch) { - this.id = id; - this.time = time; - this.agent = agent; - this.dataStore = dataStore; - this.progressLog = progressLogFactory.create(this); - this.rangesForEpochHolder = rangesForEpoch; + super(id, time, agent, dataStore, progressLogFactory, rangesForEpoch); this.loggingId = String.format("[%s]", id); this.executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); this.threadId = getThreadId(this.executor); this.stateCache = new AccordStateCache(8<<20); this.commandCache = stateCache.instance(TxnId.class, accord.local.Command.class, AccordSafeCommand::new, AccordObjectSizes::command); this.commandsForKeyCache = stateCache.instance(RoutableKey.class, CommandsForKey.class, AccordSafeCommandsForKey::new, AccordObjectSizes::commandsForKey); + executor.execute(() -> CommandStore.register(this)); } @Override - public int id() + public boolean inStore() { - return id; + return Thread.currentThread().getId() == threadId; } public void setCacheSize(long bytes) @@ -125,12 +114,12 @@ public long getCacheSize() public void checkInStoreThread() { - Invariants.checkState(Thread.currentThread().getId() == threadId); + Invariants.checkState(inStore()); } public void checkNotInStoreThread() { - Invariants.checkState(Thread.currentThread().getId() != threadId); + Invariants.checkState(!inStore()); } public ExecutorService executor() @@ -197,13 +186,7 @@ public AsyncChain submit(Callable task) public DataStore dataStore() { - return dataStore; - } - - @Override - public Agent agent() - { - return agent; + return store; } NodeTimeService time() diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 0708f092d274..45208e5c343c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -25,14 +25,15 @@ import accord.local.NodeTimeService; import accord.local.ShardDistributor; import accord.topology.Topology; +import accord.utils.RandomSource; public class AccordCommandStores extends CommandStores { private long cacheSize; - AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, + AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory) { - super(time, agent, store, shardDistributor, progressLogFactory, AccordCommandStore::new); + super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore::new); setCacheSize(maxCacheSize()); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 265b17fe5539..b95fb8b13860 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -20,7 +20,6 @@ import java.util.EnumMap; import java.util.Map; -import java.util.Objects; import com.google.common.base.Preconditions; @@ -28,7 +27,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.Agent; import accord.api.MessageSink; +import accord.local.AgentExecutor; import accord.local.Node; import accord.messages.Callback; import accord.messages.MessageType; @@ -90,23 +91,25 @@ private static Verb getVerb(MessageType type) return VerbMapping.instance.mapping.get(type); } + private final Agent agent; private final MessageDelivery messaging; - public AccordMessageSink(MessageDelivery messaging) + public AccordMessageSink(Agent agent, MessageDelivery messaging) { + this.agent = agent; this.messaging = messaging; } - public AccordMessageSink() + public AccordMessageSink(Agent agent) { - this(MessagingService.instance()); + this(agent, MessagingService.instance()); } @Override public void send(Node.Id to, Request request) { Verb verb = getVerb(request.type()); - Objects.requireNonNull(verb, "verb"); + Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = getEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); @@ -114,14 +117,14 @@ public void send(Node.Id to, Request request) } @Override - public void send(Node.Id to, Request request, Callback callback) + public void send(Node.Id to, Request request, AgentExecutor executor, Callback callback) { Verb verb = getVerb(request.type()); - Preconditions.checkArgument(verb != null); + Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = getEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); - messaging.sendWithCallback(message, endpoint, new AccordCallback<>((Callback) callback)); + messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback)); } @Override @@ -129,7 +132,9 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply { Message replyTo = (Message) replyContext; Message replyMsg = replyTo.responseWith(reply); - Preconditions.checkArgument(replyMsg.verb() == getVerb(reply.type())); + Verb verb = getVerb(reply.type()); + Preconditions.checkNotNull(verb, "Verb is null for type %s", reply.type()); + Preconditions.checkArgument(replyMsg.verb() == verb, "Expected reply message with verb %s but got %s; reply type was %s", replyMsg.verb(), verb, reply.type()); InetAddressAndPort endpoint = getEndpoint(replyingToNode); logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); messaging.send(replyMsg, endpoint); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index a86cb70c5369..d8b9f4d89b43 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -61,7 +61,6 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; -import static org.apache.cassandra.config.DatabaseDescriptor.getConcurrentAccordOps; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -134,7 +133,8 @@ private AccordService() { Node.Id localId = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); logger.info("Starting accord with nodeId {}", localId); - this.messageSink = new AccordMessageSink(); + AccordAgent agent = new AccordAgent(); + this.messageSink = new AccordMessageSink(agent); this.configService = new AccordConfigurationService(localId); this.scheduler = new AccordScheduler(); this.node = new Node(localId, @@ -142,8 +142,8 @@ private AccordService() configService, AccordService::uniqueNow, () -> null, - new KeyspaceSplitter(new EvenSplit<>(getConcurrentAccordOps(), getPartitioner().accordSplitter())), - new AccordAgent(), + new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), + agent, new DefaultRandom(), scheduler, SizeOfIntersectionSorter.SUPPLIER, diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 2f2dd2ce6d8c..ce5bb125f14b 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -302,7 +302,7 @@ public void start(BiConsumer callback) { Invariants.checkArgument(this.callback == null); this.callback = callback; - commandStore.executor().submit(this); + commandStore.executor().execute(this); } private static Iterable toRoutableKeys(Seekables keys) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 4899316cc570..a344028b4915 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -81,7 +81,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I out.writeByte(0); ReadOk readOk = (ReadOk) reply; - TxnData.serializer.serialize((TxnData) readOk.data, out, version); + TxnData.nullableSerializer.serialize((TxnData) readOk.data, out, version); } @Override @@ -91,7 +91,7 @@ public ReadReply deserialize(DataInputPlus in, int version) throws IOException if (id != 0) return nacks[id - 1]; - return new ReadOk(TxnData.serializer.deserialize(in, version)); + return new ReadOk(TxnData.nullableSerializer.deserialize(in, version)); } @Override @@ -101,7 +101,7 @@ public long serializedSize(ReadReply reply, int version) return TypeSizes.BYTE_SIZE; ReadOk readOk = (ReadOk) reply; - return TypeSizes.BYTE_SIZE + TxnData.serializer.serializedSize((TxnData) readOk.data, version); + return TypeSizes.BYTE_SIZE + TxnData.nullableSerializer.serializedSize((TxnData) readOk.data, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java index a095d8ba78de..c3d8f6e18df6 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java @@ -40,6 +40,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.NullableSerializer; import org.apache.cassandra.utils.ObjectSizes; public class TxnData implements Data, Result, Iterable @@ -196,4 +197,6 @@ public long serializedSize(TxnData data, int version) return size; } }; + + public static final IVersionedSerializer nullableSerializer = NullableSerializer.wrap(serializer); } diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 91b608d18553..79b5c621c6ff 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -137,7 +137,7 @@ public class FBUtilities public static void setAvailableProcessors(int value) { - availableProcessors = value; + DatabaseDescriptor.setAvailableProcessors(value); } @VisibleForTesting @@ -148,10 +148,7 @@ public static void setSystemInfoSupplier(Supplier supplier) public static int getAvailableProcessors() { - if (availableProcessors > 0) - return availableProcessors; - else - return Runtime.getRuntime().availableProcessors(); + return DatabaseDescriptor.getAvailableProcessors(); } public static final int MAX_UNSIGNED_SHORT = 0xFFFF; diff --git a/test/data/config/version=5.1-alpha1.yml b/test/data/config/version=5.1-alpha1.yml index e730adcdb9bf..de5801c30249 100644 --- a/test/data/config/version=5.1-alpha1.yml +++ b/test/data/config/version=5.1-alpha1.yml @@ -408,7 +408,7 @@ max_concurrent_automatic_sstable_upgrades: "java.lang.Integer" maximum_replication_factor_warn_threshold: "java.lang.Integer" denylist_reads_enabled: "java.lang.Boolean" permissions_cache_active_update: "java.lang.Boolean" -available_processors: "java.lang.Integer" +available_processors: "org.apache.cassandra.config.OptionaldPositiveInt" file_cache_round_up: "java.lang.Boolean" secondary_indexes_per_table_warn_threshold: "java.lang.Integer" tables_warn_threshold: "java.lang.Integer" diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 4998e0e67ec7..9b31de369b0f 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -91,7 +91,6 @@ import org.apache.cassandra.simulator.utils.LongRange; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Closeable; -import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.Ref; import org.apache.cassandra.utils.memory.BufferPool; @@ -580,7 +579,7 @@ IInstanceConfig update(IInstanceConfig config) .set("concurrent_counter_writes", take(1, 4)) .set("concurrent_materialized_view_writes", take(1, 4)) .set("concurrent_reads", take(1, 4)) - .forceSet("available_processors", take(3, 4)); + .set("available_processors", take(3, 4)); } // begin allocating for a new node @@ -627,7 +626,7 @@ int take(int times, int min, int max) if (remaining * min <= allocationPool) return min; if (times == remaining) - return allocationPool / remaining; + return Math.max(allocationPool / remaining, min); if (times + 1 == remaining) return random.uniform(Math.max(min, (allocationPool - max) / times), Math.min(max, (allocationPool - min) / times)); @@ -638,7 +637,6 @@ int take(int times, int min, int max) } } - public final RandomSource random; public final SimulatedSystems simulated; public final Cluster cluster; @@ -772,7 +770,6 @@ public void initialise(ClassLoader classLoader, ThreadGroup threadGroup, int num @Override public void beforeStartup(IInstance i) { - ((IInvokableInstance) i).unsafeAcceptOnThisThread(FBUtilities::setAvailableProcessors, i.config().getInt("available_processors")); ((IInvokableInstance) i).unsafeAcceptOnThisThread(IfInterceptibleThread::setThreadLocalRandomCheck, (LongConsumer) threadLocalRandomCheck); int num = i.config().num(); diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java index 5a528468ea39..844a8df36882 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java @@ -209,6 +209,8 @@ public ActionPlan plan() public Action get() { int[] primaryKeyIndex = consume(simulated.random, available); + if (primaryKeyIndex == null) + return Actions.empty("All primary keys are taken, try again later"); long untilNanos = simulated.time.nanoTime() + SECONDS.toNanos(simulateKeyForSeconds.select(simulated.random)); int concurrency = withinKeyConcurrency.select(simulated.random); Supplier supplier = factory.apply(simulated, primaryKeyIndex); @@ -249,7 +251,7 @@ public String toString() private int[] consume(RandomSource random, List available) { if (available.isEmpty()) - throw new AssertionError("available partitions are empty!"); + return null; int numPartitions = available.size() == 1 || !allowMultiplePartitions() ? 1 : random.uniform(1, available.size()); int[] partitions = new int[numPartitions]; for (int counter = 0; counter < numPartitions; counter++) diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java index 004dc5138247..fbe7ba8835eb 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java @@ -18,7 +18,9 @@ package org.apache.cassandra.simulator.paxos; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; @@ -286,19 +288,39 @@ RuntimeException failWith(Throwable t) private RuntimeException logAndThrow() { - Integer causedByPrimaryKey = null; - Throwable causedByThrowable = null; + class Violation + { + final int primaryKey; + final Throwable cause; + + Violation(int primaryKey, Throwable cause) + { + this.primaryKey = primaryKey; + this.cause = cause; + } + } + List violations = new ArrayList<>(); for (Throwable t : simulated.failures.get()) { + Integer causedByPrimaryKey; if (null != (causedByPrimaryKey = causedBy(t))) { - causedByThrowable = t; + violations.add(new Violation(causedByPrimaryKey, t)); break; } } - log(causedByPrimaryKey); - Throwable t = (causedByPrimaryKey != null) ? causedByThrowable : simulated.failures.get().get(0); + if (!violations.isEmpty()) + { + AssertionError error = new AssertionError("History violations detected"); + violations.forEach(v -> { + log(v.primaryKey); + error.addSuppressed(v.cause); + }); + throw error; + } + + Throwable t = simulated.failures.get().get(0); Throwables.throwIfUnchecked(t); throw new RuntimeException(t); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java index a1d19b63751d..ca5407fe8995 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java @@ -385,11 +385,11 @@ List applyToMessage(IInvokableInstance from, IInvokableInstance to, IMes notify = from; } boolean isTimeout = deliver != FAILURE; - Executor callbackExecutor = notify.executorFor(verb.id); - if (callbackExecutor instanceof ImmediateExecutor) - callbackExecutor = to.executor(); + Executor notifierExecutor = notify.executorFor(verb.id); + if (notifierExecutor instanceof ImmediateExecutor) + notifierExecutor = notify.executor(); InterceptedExecution.InterceptedTaskExecution failTask = new InterceptedRunnableExecution( - (InterceptingExecutor) callbackExecutor, + (InterceptingExecutor) notifierExecutor, () -> notify.unsafeApplyOnThisThread((socketAddress, id, innerIsTimeout) -> { InetAddressAndPort address = InetAddressAndPort.getByAddress(socketAddress); RequestCallbacks.CallbackInfo callback = instance().callbacks.remove(id, address); diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 385ca62a337c..e7b808cceb6e 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -147,6 +147,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.GuardrailsOptions$ConsistencyLevels", "org.apache.cassandra.config.GuardrailsOptions$TableProperties", "org.apache.cassandra.config.JMXServerOptions", + "org.apache.cassandra.config.OptionaldPositiveInt", "org.apache.cassandra.config.ParameterizedClass", "org.apache.cassandra.config.RepairConfig", "org.apache.cassandra.config.RepairRetrySpec", diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 11833783a3ad..bbc7bf2c82e6 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -33,6 +33,9 @@ import com.google.common.collect.ImmutableMap; import org.junit.Test; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.util.File; import org.yaml.snakeyaml.error.YAMLException; @@ -426,6 +429,36 @@ public void converters() assertThat(from("compaction_tombstone_warning_threshold", "0").partition_tombstones_warn_threshold).isEqualTo(0); } + @Test + public void process() + { + for (Type type : Type.values()) + { + Config c = fromType(type, "available_processors", 4); + assertThat(c.available_processors).isEqualTo(new OptionaldPositiveInt(4)); + assertThat(c.accord_shard_count).isEqualTo(OptionaldPositiveInt.UNDEFINED); + + c = fromType(type, "available_processors", 3, "accord_shard_count", 1); + assertThat(c.available_processors).isEqualTo(new OptionaldPositiveInt(3)); + assertThat(c.accord_shard_count).isEqualTo(new OptionaldPositiveInt(1)); + } + } + + private enum Type { MAP, YAML } + + private static Config fromType(Type type, Object... values) + { + switch (type) + { + case MAP: + return from(values); + case YAML: + return fromYaml(values); + default: + throw new AssertionError("Unexpected type: " + type); + } + } + private static Config from(Object... values) { assert values.length % 2 == 0 : "Map can only be created with an even number of inputs: given " + values.length; @@ -469,6 +502,24 @@ public void testBackwardCompatibilityOfAuthenticatorPropertyAsString() throws IO assertTrue(config.authenticator.parameters.isEmpty()); } + private static Config fromYaml(Object... values) + { + assert values.length % 2 == 0 : "Map can only be created with an even number of inputs: given " + values.length; + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i = 0; i < values.length; i += 2) + builder.put((String) values[i], values[i + 1]); + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); // checkstyle: permit this instantiation + try + { + byte[] bytes = mapper.writeValueAsBytes(builder.build()); + return YamlConfigurationLoader.loadConfig(bytes); + } + catch (JsonProcessingException e) + { + throw new AssertionError("Unable to convert map to YAML", e); + } + } + public static Config load(String path) { URL url = YamlConfigurationLoaderTest.class.getClassLoader().getResource(path); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index d8e2ae898c1b..40890d36732b 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -18,17 +18,21 @@ package org.apache.cassandra.service.accord; -import org.apache.cassandra.net.MessageDelivery; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.Mockito; + +import accord.api.Agent; import accord.local.Node; import accord.messages.InformOfTxnId; import accord.messages.SimpleReply; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.Verb; -import org.mockito.Mockito; +import org.apache.cassandra.tcm.ClusterMetadataService; public class AccordMessageSinkTest { @@ -36,6 +40,8 @@ public class AccordMessageSinkTest public static void setup() { DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + ClusterMetadataService.initializeForClients(); } @Test @@ -48,7 +54,7 @@ public void informOfTxn() SimpleReply reply = SimpleReply.Ok; MessageDelivery messaging = Mockito.mock(MessageDelivery.class); - AccordMessageSink sink = new AccordMessageSink(messaging); + AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging); sink.reply(new Node.Id(1), req, reply); Mockito.verify(messaging).send(Mockito.any(), Mockito.any()); From da92eed2250c149eb9cf87bbfd5cc8c6217e9705 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 8 May 2023 11:56:37 -0700 Subject: [PATCH 054/340] CEP-15: (C*) Enhance in-memory FileSystem to work with mmap and support tests to add custom logic patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-18485 --- .../cassandra/simulator/ClusterSimulation.java | 1 - test/unit/org/apache/cassandra/cql3/CQLTester.java | 13 +++++++++++++ .../cassandra/cql3/PreparedStatementsTest.java | 2 ++ .../validation/entities/FrozenCollectionsTest.java | 7 ++++++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 9b31de369b0f..7c8867b9c19d 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -727,7 +727,6 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, .set("failure_detector", SimulatedFailureDetector.Instance.class.getName()) .set("commitlog_compression", new ParameterizedClass(LZ4Compressor.class.getName(), emptyMap())) .set("commitlog_sync", "batch"); - // TODO: Add remove() to IInstanceConfig if (config instanceof InstanceConfig) { diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index e988b979abb3..a167be90e3b6 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -159,6 +159,7 @@ import org.apache.cassandra.index.Index; import org.apache.cassandra.index.SecondaryIndexManager; import org.apache.cassandra.io.filesystem.ListenableFileSystem; +import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileSystems; import org.apache.cassandra.io.util.FileUtils; @@ -3238,6 +3239,18 @@ public void cleanupFileSystemListeners() return; fs.clearListeners(); } + + protected ListenableFileSystem.PathFilter isCurrentTableIndexFile(String keyspace) + { + return path -> { + if (!path.getFileName().toString().endsWith("Index.db")) + return false; + Descriptor desc = Descriptor.fromFile(new File(path)); + if (!desc.ksname.equals(keyspace) && desc.cfname.equals(currentTable())) + return false; + return true; + }; + } } private static class ClusterSettings diff --git a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java index 2d09317333cc..db84d8fde3fc 100644 --- a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java @@ -37,6 +37,7 @@ import com.datastax.driver.core.Session; import com.datastax.driver.core.exceptions.SyntaxError; import com.datastax.driver.core.exceptions.WriteTimeoutException; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.exceptions.PreparedQueryNotFoundException; @@ -66,6 +67,7 @@ public class PreparedStatementsTest extends CQLTester @BeforeClass public static void setUpClass() { + ServerTestUtils.daemonInitialization(); DatabaseDescriptor.setAccordTransactionsEnabled(true); CQLTester.setUpClass(); } diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java index 9ea3e7b05890..1bdd1ca19b77 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java @@ -29,7 +29,12 @@ import org.apache.cassandra.Util; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.TupleType; import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; From 8dc82a6369be1d29a47d455545e7b94e27089ee4 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Thu, 16 Feb 2023 13:02:35 +0000 Subject: [PATCH 055/340] CEP-15: Store PreAccept, Accept, Commit, and Apply messages in a durable log before processing by CommandStores patch by Aleksey Yeschenko; reviewed by David Capwell for CASSANDRA-18344 --- .../ManyToOneConcurrentLinkedQueue.java | 12 +- .../org/apache/cassandra/config/Config.java | 4 + .../cassandra/config/DatabaseDescriptor.java | 43 +- .../io/util/TrackedDataOutputPlus.java | 181 +++++ .../cassandra/journal/ActiveSegment.java | 517 ++++++++++++++ .../cassandra/journal/AsyncWriteCallback.java | 23 + .../apache/cassandra/journal/Component.java | 43 ++ .../apache/cassandra/journal/Descriptor.java | 210 ++++++ .../cassandra/journal/EntrySerializer.java | 226 +++++++ .../org/apache/cassandra/journal/Flusher.java | 368 ++++++++++ .../cassandra/journal/InMemoryIndex.java | 137 ++++ .../org/apache/cassandra/journal/Index.java | 76 +++ .../org/apache/cassandra/journal/Journal.java | 638 ++++++++++++++++++ .../cassandra/journal/JournalReadError.java | 38 ++ .../cassandra/journal/JournalWriteError.java | 38 ++ .../apache/cassandra/journal/KeySupport.java | 47 ++ .../apache/cassandra/journal/Metadata.java | 216 ++++++ .../org/apache/cassandra/journal/Metrics.java | 75 ++ .../apache/cassandra/journal/OnDiskIndex.java | 295 ++++++++ .../org/apache/cassandra/journal/Params.java | 56 ++ .../RecordConsumer.java} | 18 +- .../org/apache/cassandra/journal/Segment.java | 78 +++ .../cassandra/journal/SegmentWriter.java | 115 ++++ .../apache/cassandra/journal/Segments.java | 200 ++++++ .../cassandra/journal/StaticSegment.java | 349 ++++++++++ .../cassandra/journal/SyncedOffsets.java | 243 +++++++ .../cassandra/journal/ValueSerializer.java | 36 + .../cassandra/journal/package-info.java | 22 + .../cassandra/net/AbstractMessageHandler.java | 3 +- .../apache/cassandra/net/FrameDecoderCrc.java | 2 +- .../apache/cassandra/net/FrameDecoderLZ4.java | 2 +- .../apache/cassandra/net/FrameEncoderCrc.java | 2 +- .../apache/cassandra/net/FrameEncoderLZ4.java | 2 +- .../cassandra/net/HandshakeProtocol.java | 2 +- .../cassandra/net/InboundMessageHandler.java | 1 + .../cassandra/net/InboundMessageHandlers.java | 1 + .../cassandra/net/OutboundMessageQueue.java | 1 + .../apache/cassandra/net/SocketFactory.java | 1 + .../service/accord/AccordCommandStores.java | 92 ++- .../service/accord/AccordJournal.java | 499 ++++++++++++++ .../service/accord/AccordService.java | 4 +- .../paxos/uncommitted/PaxosBallotTracker.java | 2 +- .../apache/cassandra/{net => utils}/Crc.java | 17 +- .../apache/cassandra/utils/FBUtilities.java | 35 +- .../utils/JVMStabilityInspector.java | 36 +- .../apache/cassandra/utils/NoSpamLogger.java | 5 + .../org/apache/cassandra/utils/TimeUUID.java | 7 +- test/conf/cassandra-mtls.yaml | 1 + test/conf/cassandra-murmur.yaml | 1 + test/conf/cassandra-old.yaml | 1 + .../cassandra-pem-jks-sslcontextfactory.yaml | 1 + ...slcontextfactory-invalidconfiguration.yaml | 1 + .../conf/cassandra-pem-sslcontextfactory.yaml | 1 + test/conf/cassandra-seeds.yaml | 1 + ...slcontextfactory-invalidconfiguration.yaml | 1 + test/conf/cassandra-sslcontextfactory.yaml | 1 + test/conf/cassandra.yaml | 1 + .../distributed/impl/InstanceConfig.java | 3 + .../asm/GlobalMethodTransformer.java | 9 +- .../test/AccordJournalSimulationTest.java | 264 ++++++++ .../org/apache/cassandra/ServerTestUtils.java | 1 + .../ManyToOneConcurrentLinkedQueueTest.java | 2 +- .../org/apache/cassandra/io/util/Files.java | 33 + .../cassandra/journal/DescriptorTest.java | 166 +++++ .../apache/cassandra/journal/IndexTest.java | 251 +++++++ .../apache/cassandra/journal/JournalTest.java | 102 +++ .../cassandra/journal/MetadataTest.java | 105 +++ .../apache/cassandra/journal/SegmentTest.java | 241 +++++++ .../cassandra/journal/SyncedOffsetsTest.java | 70 ++ .../apache/cassandra/journal/TestParams.java | 61 ++ .../cassandra/journal/TimeUUIDKeySupport.java | 85 +++ .../service/accord/AccordJournalTest.java | 120 ++++ 72 files changed, 6477 insertions(+), 64 deletions(-) rename src/java/org/apache/cassandra/{net => concurrent}/ManyToOneConcurrentLinkedQueue.java (97%) create mode 100644 src/java/org/apache/cassandra/io/util/TrackedDataOutputPlus.java create mode 100644 src/java/org/apache/cassandra/journal/ActiveSegment.java create mode 100644 src/java/org/apache/cassandra/journal/AsyncWriteCallback.java create mode 100644 src/java/org/apache/cassandra/journal/Component.java create mode 100644 src/java/org/apache/cassandra/journal/Descriptor.java create mode 100644 src/java/org/apache/cassandra/journal/EntrySerializer.java create mode 100644 src/java/org/apache/cassandra/journal/Flusher.java create mode 100644 src/java/org/apache/cassandra/journal/InMemoryIndex.java create mode 100644 src/java/org/apache/cassandra/journal/Index.java create mode 100644 src/java/org/apache/cassandra/journal/Journal.java create mode 100644 src/java/org/apache/cassandra/journal/JournalReadError.java create mode 100644 src/java/org/apache/cassandra/journal/JournalWriteError.java create mode 100644 src/java/org/apache/cassandra/journal/KeySupport.java create mode 100644 src/java/org/apache/cassandra/journal/Metadata.java create mode 100644 src/java/org/apache/cassandra/journal/Metrics.java create mode 100644 src/java/org/apache/cassandra/journal/OnDiskIndex.java create mode 100644 src/java/org/apache/cassandra/journal/Params.java rename src/java/org/apache/cassandra/{exceptions/ChecksumMismatchException.java => journal/RecordConsumer.java} (74%) create mode 100644 src/java/org/apache/cassandra/journal/Segment.java create mode 100644 src/java/org/apache/cassandra/journal/SegmentWriter.java create mode 100644 src/java/org/apache/cassandra/journal/Segments.java create mode 100644 src/java/org/apache/cassandra/journal/StaticSegment.java create mode 100644 src/java/org/apache/cassandra/journal/SyncedOffsets.java create mode 100644 src/java/org/apache/cassandra/journal/ValueSerializer.java create mode 100644 src/java/org/apache/cassandra/journal/package-info.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordJournal.java rename src/java/org/apache/cassandra/{net => utils}/Crc.java (90%) create mode 100644 test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java rename test/unit/org/apache/cassandra/{net => concurrent}/ManyToOneConcurrentLinkedQueueTest.java (99%) create mode 100644 test/unit/org/apache/cassandra/io/util/Files.java create mode 100644 test/unit/org/apache/cassandra/journal/DescriptorTest.java create mode 100644 test/unit/org/apache/cassandra/journal/IndexTest.java create mode 100644 test/unit/org/apache/cassandra/journal/JournalTest.java create mode 100644 test/unit/org/apache/cassandra/journal/MetadataTest.java create mode 100644 test/unit/org/apache/cassandra/journal/SegmentTest.java create mode 100644 test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java create mode 100644 test/unit/org/apache/cassandra/journal/TestParams.java create mode 100644 test/unit/org/apache/cassandra/journal/TimeUUIDKeySupport.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java diff --git a/src/java/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueue.java b/src/java/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueue.java similarity index 97% rename from src/java/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueue.java rename to src/java/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueue.java index 4c73bdc9cd2e..8615e99c22f8 100644 --- a/src/java/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueue.java +++ b/src/java/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueue.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.net; +package org.apache.cassandra.concurrent; import java.util.Collection; import java.util.Iterator; @@ -37,12 +37,12 @@ * In addition to that, provides a {@link #relaxedPeekLastAndOffer(Object)} method that we use to avoid a CAS when * putting message handlers onto the wait queue. */ -class ManyToOneConcurrentLinkedQueue extends ManyToOneConcurrentLinkedQueueHead implements Queue +public class ManyToOneConcurrentLinkedQueue extends ManyToOneConcurrentLinkedQueueHead implements Queue { @SuppressWarnings("unused") // pad two cache lines after the head to prevent false sharing protected long p31, p32, p33, p34, p35, p36, p37, p38, p39, p40, p41, p42, p43, p44, p45; - ManyToOneConcurrentLinkedQueue() + public ManyToOneConcurrentLinkedQueue() { head = tail = new Node<>(null); } @@ -63,7 +63,7 @@ public boolean isEmpty() * - {@code false} result indicates that the queue MIGHT BE non-empty - the value of {@code head} might * not yet have been made externally visible by the consumer thread. */ - boolean relaxedIsEmpty() + public boolean relaxedIsEmpty() { return null == head.next; } @@ -156,7 +156,7 @@ public boolean remove(Object o) * Yields no performance benefit over invoking {@link #poll()} manually - there just isn't * anything to meaningfully amortise on the consumer side of this queue. */ - void drain(Consumer consumer) + public void drain(Consumer consumer) { E item; while ((item = poll()) != null) @@ -181,7 +181,7 @@ public boolean offer(E e) * * @return previously last tail item in the queue, potentially stale */ - E relaxedPeekLastAndOffer(E e) + public E relaxedPeekLastAndOffer(E e) { return internalOffer(e); } diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 63e81481be8e..704dda8aa9fa 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -397,6 +397,10 @@ public static class SSTableConfig @Replaces(oldName = "commitlog_total_space_in_mb", converter = Converters.MEBIBYTES_DATA_STORAGE_INT, deprecated = true) public DataStorageSpec.IntMebibytesBound commitlog_total_space; public CommitLogSync commitlog_sync; + + // Accord Journal + public String accord_journal_directory; + @Replaces(oldName = "commitlog_sync_group_window_in_ms", converter = Converters.MILLIS_DURATION_DOUBLE, deprecated = true) public DurationSpec.IntMillisecondsBound commitlog_sync_group_window = new DurationSpec.IntMillisecondsBound("0ms"); @Replaces(oldName = "commitlog_sync_period_in_ms", converter = Converters.MILLIS_DURATION_INT, deprecated = true) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 0c15ba122798..d251d3ef63d3 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -386,6 +386,14 @@ public static void clientInitialization(boolean failIfDaemonOrTool) clientInitialization(failIfDaemonOrTool, Config::new); } + // For simulator tests + public static void clientWithDaemonConfig() + { + clientInitialization(true, DatabaseDescriptor::loadConfig); + applyAll(); + AuthConfig.applyAuth(); + } + /** * Initializes this class as a client, which means that just an empty configuration will * be used. @@ -714,6 +722,11 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m if (commitLogWriteDiskAccessMode != conf.commitlog_disk_access_mode) logger.info("commitlog_disk_access_mode resolved to: {}", commitLogWriteDiskAccessMode); + if (conf.accord_journal_directory == null) + { + conf.accord_journal_directory = storagedirFor("accord_journal"); + } + if (conf.hints_directory == null) { conf.hints_directory = storagedirFor("hints"); @@ -789,6 +802,8 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m throw new ConfigurationException("local_system_data_file_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.commitlog_directory)) throw new ConfigurationException("commitlog_directory must not be the same as any data_file_directories", false); + if (datadir.equals(conf.accord_journal_directory)) + throw new ConfigurationException("accord_journal_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.hints_directory)) throw new ConfigurationException("hints_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.saved_caches_directory)) @@ -804,6 +819,8 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m { if (conf.local_system_data_file_directory.equals(conf.commitlog_directory)) throw new ConfigurationException("local_system_data_file_directory must not be the same as the commitlog_directory", false); + if (conf.local_system_data_file_directory.equals(conf.accord_journal_directory)) + throw new ConfigurationException("local_system_data_file_directory must not be the same as the accord_journal_directory", false); if (conf.local_system_data_file_directory.equals(conf.saved_caches_directory)) throw new ConfigurationException("local_system_data_file_directory must not be the same as the saved_caches_directory", false); if (conf.local_system_data_file_directory.equals(conf.hints_directory)) @@ -816,10 +833,18 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m FBUtilities.prettyPrintMemory(freeBytes)); } - if (conf.commitlog_directory.equals(conf.saved_caches_directory)) - throw new ConfigurationException("saved_caches_directory must not be the same as the commitlog_directory", false); + if (conf.commitlog_directory.equals(conf.accord_journal_directory)) + throw new ConfigurationException("accord_journal_directory must not be the same as the commitlog_directory", false); if (conf.commitlog_directory.equals(conf.hints_directory)) throw new ConfigurationException("hints_directory must not be the same as the commitlog_directory", false); + if (conf.commitlog_directory.equals(conf.saved_caches_directory)) + throw new ConfigurationException("saved_caches_directory must not be the same as the commitlog_directory", false); + + if (conf.accord_journal_directory.equals(conf.hints_directory)) + throw new ConfigurationException("hints_directory must not be the same as the accord_journal_directory", false); + if (conf.accord_journal_directory.equals(conf.saved_caches_directory)) + throw new ConfigurationException("saved_caches_directory must not be the same as the accord_journal_directory", false); + if (conf.hints_directory.equals(conf.saved_caches_directory)) throw new ConfigurationException("saved_caches_directory must not be the same as the hints_directory", false); @@ -2115,6 +2140,10 @@ public static void createAllDirectories() throw new ConfigurationException("commitlog_directory must be specified", false); FileUtils.createDirectory(conf.commitlog_directory); + if (conf.accord_journal_directory == null) + throw new ConfigurationException("accord_journal_directory must be specified", false); + FileUtils.createDirectory(conf.accord_journal_directory); + if (conf.hints_directory == null) throw new ConfigurationException("hints_directory must be specified", false); FileUtils.createDirectory(conf.hints_directory); @@ -3018,6 +3047,16 @@ public static void setCommitLogCompression(ParameterizedClass compressor) conf.commitlog_compression = compressor; } + public static String getAccordJournalDirectory() + { + return conf.accord_journal_directory; + } + + public static void setAccordJournalDirectory(String path) + { + conf.accord_journal_directory = path; + } + public static Config.FlushCompression getFlushCompression() { return conf.flush_compression; diff --git a/src/java/org/apache/cassandra/io/util/TrackedDataOutputPlus.java b/src/java/org/apache/cassandra/io/util/TrackedDataOutputPlus.java new file mode 100644 index 000000000000..34363e03b8ae --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/TrackedDataOutputPlus.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.io.util; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.cassandra.utils.vint.VIntCoding; + +public class TrackedDataOutputPlus implements DataOutputPlus +{ + private final DataOutputPlus out; + private int position = 0; + + private TrackedDataOutputPlus(DataOutputPlus out) + { + this.out = out; + } + + public static TrackedDataOutputPlus wrap(DataOutputPlus out) + { + return new TrackedDataOutputPlus(out); + } + + @Override + public void write(int b) throws IOException + { + out.write(b); + position += 1; + } + + @Override + public void write(byte[] b) throws IOException + { + out.write(b); + position += b.length; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException + { + out.write(b, off, len); + position += len; + } + + @Override + public void writeBoolean(boolean v) throws IOException + { + out.writeBoolean(v); + position += 1; + } + + @Override + public void writeByte(int v) throws IOException + { + out.writeByte(v); + position += 1; + } + + @Override + public void writeShort(int v) throws IOException + { + out.writeShort(v); + position += 2; + } + + @Override + public void writeChar(int v) throws IOException + { + out.writeChar(v); + position += 2; + } + + @Override + public void writeInt(int v) throws IOException + { + out.writeInt(v); + position += 4; + } + + @Override + public void writeLong(long v) throws IOException + { + out.writeLong(v); + position += 8; + } + + @Override + public void writeFloat(float v) throws IOException + { + out.writeFloat(v); + position += 4; + } + + @Override + public void writeDouble(double v) throws IOException + { + out.writeDouble(v); + position += 8; + } + + @Override + public void writeBytes(String s) throws IOException + { + out.writeBytes(s); + position += s.length(); + } + + @Override + public void writeChars(String s) throws IOException + { + out.writeChars(s); + position += s.length() * 2; + } + + @Override + public void writeUTF(String s) throws IOException + { + UnbufferedDataOutputStreamPlus.writeUTF(s, this); + } + + @Override + public void write(ByteBuffer buffer) throws IOException + { + out.write(buffer); + position += buffer.remaining(); + } + + @Override + public void write(ReadableMemory memory, long offset, long length) throws IOException + { + out.write(memory, offset, length); + position += length; + } + + @Override + public void writeVInt(long i) throws IOException + { + VIntCoding.writeVInt(i, this); + } + + @Override + public void writeUnsignedVInt(long i) throws IOException + { + VIntCoding.writeUnsignedVInt(i, this); + } + + @Override + public void writeMostSignificantBytes(long register, int bytes) throws IOException + { + out.writeMostSignificantBytes(register, bytes); + position += bytes; + } + + @Override + public long position() + { + return position; + } + + @Override + public boolean hasPosition() + { + return true; + } +} diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java new file mode 100644 index 000000000000..0f4d0dc09c66 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -0,0 +1,517 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.Executor; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; + +import com.codahale.metrics.Timer; +import org.apache.cassandra.concurrent.ExecutionFailure; +import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; +import org.apache.cassandra.io.util.*; +import org.apache.cassandra.utils.*; +import org.apache.cassandra.utils.concurrent.OpOrder; +import org.apache.cassandra.utils.concurrent.Ref; +import org.apache.cassandra.utils.concurrent.WaitQueue; + +final class ActiveSegment extends Segment +{ + final FileChannel channel; + + // OpOrder used to order appends wrt flush + private final OpOrder appendOrder = new OpOrder(); + + // position in the buffer we are allocating from + private final AtomicInteger allocatePosition = new AtomicInteger(0); + + /* + * Everything before this offset has been written and flushed. + */ + private volatile int lastFlushedOffset = 0; + + /* + * End position of the buffer; initially set to its capacity and + * updated to point to the last written position as the segment is being closed + * no need to be volatile as writes are protected by appendOrder barrier. + */ + private int endOfBuffer; + + // a signal that writers can wait on to be notified of a completed flush in BATCH and GROUP FlushMode + private final WaitQueue flushComplete = WaitQueue.newWaitQueue(); + + private final Ref> selfRef; + + final InMemoryIndex index; + + private ActiveSegment( + Descriptor descriptor, Params params, SyncedOffsets syncedOffsets, InMemoryIndex index, Metadata metadata, KeySupport keySupport) + { + super(descriptor, syncedOffsets, metadata, keySupport); + this.index = index; + try + { + channel = FileChannel.open(file.toPath(), StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE); + buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, params.segmentSize()); + endOfBuffer = buffer.capacity(); + selfRef = new Ref<>(this, new Tidier(descriptor, channel, buffer, syncedOffsets)); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); + } + } + + @SuppressWarnings("resource") + static ActiveSegment create(Descriptor descriptor, Params params, KeySupport keySupport) + { + SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor, true); + InMemoryIndex index = InMemoryIndex.create(keySupport); + Metadata metadata = Metadata.create(); + return new ActiveSegment<>(descriptor, params, syncedOffsets, index, metadata, keySupport); + } + + @Override + InMemoryIndex index() + { + return index; + } + + /** + * Read the entry and specified offset into the entry holder. + * Expects the caller to acquire the ref to the segment and the record to exist. + */ + @Override + boolean read(int offset, EntrySerializer.EntryHolder into) + { + ByteBuffer duplicate = (ByteBuffer) buffer.duplicate().position(offset).limit(buffer.capacity()); + try + { + EntrySerializer.read(into, keySupport, duplicate, descriptor.userVersion); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + return true; + } + + /** + * Stop writing to this file, flush and close it. Does nothing if the file is already closed. + */ + @Override + public synchronized void close() + { + close(true); + } + + /** + * @return true if the closed segment was definitely empty, false otherwise + */ + private synchronized boolean close(boolean persistComponents) + { + boolean isEmpty = discardUnusedTail(); + if (!isEmpty) + { + flush(); + if (persistComponents) persistComponents(); + } + release(); + return isEmpty; + } + + /** + * Close and discard a pre-allocated, available segment, that's never been exposed + */ + void closeAndDiscard() + { + boolean isEmpty = close(false); + if (!isEmpty) throw new IllegalStateException(); + discard(); + } + + void closeAndIfEmptyDiscard() + { + boolean isEmpty = close(true); + if (isEmpty) discard(); + } + + void persistComponents() + { + index.persist(descriptor); + metadata.persist(descriptor); + SyncUtil.trySyncDir(descriptor.directory); + } + + private void discard() + { + selfRef.ensureReleased(); + + descriptor.fileFor(Component.DATA).deleteIfExists(); + descriptor.fileFor(Component.INDEX).deleteIfExists(); + descriptor.fileFor(Component.METADATA).deleteIfExists(); + descriptor.fileFor(Component.SYNCED_OFFSETS).deleteIfExists(); + } + + void release() + { + selfRef.release(); + } + + @Override + public Ref> tryRef() + { + return selfRef.tryRef(); + } + + @Override + public Ref> ref() + { + return selfRef.ref(); + } + + private static final class Tidier implements Tidy + { + private final Descriptor descriptor; + private final FileChannel channel; + private final ByteBuffer buffer; + private final SyncedOffsets syncedOffsets; + + Tidier(Descriptor descriptor, FileChannel channel, ByteBuffer buffer, SyncedOffsets syncedOffsets) + { + this.descriptor = descriptor; + this.channel = channel; + this.buffer = buffer; + this.syncedOffsets = syncedOffsets; + } + + @Override + public void tidy() + { + FileUtils.clean(buffer); + try + { + channel.close(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, Component.DATA, e); + } + syncedOffsets.close(); + } + + @Override + public String name() + { + return descriptor.toString(); + } + } + + /* + * Flush logic; closing and component flushing + */ + + /** + * Possibly force a disk flush for this segment file. + * TODO FIXME: calls from outside Flusher + callbacks + * @return last synced offset + */ + synchronized int flush() + { + int allocatePosition = this.allocatePosition.get(); + if (lastFlushedOffset >= allocatePosition) + return lastFlushedOffset; + + waitForModifications(); + flushInternal(); + lastFlushedOffset = allocatePosition; + int syncedOffset = Math.min(allocatePosition, endOfBuffer); + syncedOffsets.mark(syncedOffset); + flushComplete.signalAll(); + return syncedOffset; + } + + private void waitForFlush(int position) + { + while (lastFlushedOffset < position) + { + WaitQueue.Signal signal = flushComplete.register(); + if (lastFlushedOffset < position) + signal.awaitThrowUncheckedOnInterrupt(); + else + signal.cancel(); + } + } + + /** + * Wait for any appends or discardUnusedTail() operations started before this method was called + */ + private void waitForModifications() + { + // issue a barrier and wait for it + appendOrder.awaitNewBarrier(); + } + + private void flushInternal() + { + try + { + SyncUtil.force((MappedByteBuffer) buffer); + } + catch (Exception e) // MappedByteBuffer.force() does not declare IOException but can actually throw it + { + throw new JournalWriteError(descriptor, file, e); + } + } + + boolean isFullyFlushed(int syncedOffset) + { + return syncedOffset >= endOfBuffer; + } + + /** + * Ensures no more of this segment is writeable, by allocating any unused section at the end + * and marking it discarded void discartUnusedTail() + * + * @return true if the segment was empty, false otherwise + */ + boolean discardUnusedTail() + { + try (OpOrder.Group ignored = appendOrder.start()) + { + while (true) + { + int prev = allocatePosition.get(); + int next = endOfBuffer + 1; + + if (prev >= next) + { + // already stopped allocating, might also be closed + assert buffer == null || prev == buffer.capacity() + 1; + return false; + } + + if (allocatePosition.compareAndSet(prev, next)) + { + // stopped allocating now; can only succeed once, no further allocation or discardUnusedTail can succeed + endOfBuffer = prev; + assert buffer != null && next == buffer.capacity() + 1; + return prev == 0; + } + } + } + } + + /* + * Entry/bytes allocation logic + */ + + @SuppressWarnings({ "resource", "RedundantSuppression" }) // op group will be closed by Allocation#write() + Allocation allocate(int entrySize, Set hosts) + { + int totalSize = totalEntrySize(hosts, entrySize); + OpOrder.Group opGroup = appendOrder.start(); + try + { + int position = allocateBytes(totalSize); + if (position < 0) + { + opGroup.close(); + return null; + } + return new Allocation(opGroup, (ByteBuffer) buffer.duplicate().position(position).limit(position + totalSize)); + } + catch (Throwable t) + { + opGroup.close(); + throw t; + } + } + + private int totalEntrySize(Set hosts, int recordSize) + { + return EntrySerializer.fixedEntrySize(keySupport, descriptor.userVersion) + + EntrySerializer.variableEntrySize(hosts.size(), recordSize); + } + + // allocate bytes in the segment, or return -1 if not enough space + private int allocateBytes(int size) + { + while (true) + { + int prev = allocatePosition.get(); + int next = prev + size; + if (next >= endOfBuffer) + return -1; + if (allocatePosition.compareAndSet(prev, next)) + { + assert buffer != null; + return prev; + } + LockSupport.parkNanos(1); // ConstantBackoffCAS Algorithm from https://arxiv.org/pdf/1305.5800.pdf + } + } + + final class Allocation + { + private final OpOrder.Group appendOp; + private final ByteBuffer buffer; + private final int position; + + Allocation(OpOrder.Group appendOp, ByteBuffer buffer) + { + this.appendOp = appendOp; + this.buffer = buffer; + this.position = buffer.position(); + } + + void write(K id, ByteBuffer record, Set hosts) + { + try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) + { + EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); + index.update(id, position); + metadata.update(hosts); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); + } + finally + { + appendOp.close(); + } + } + + void asyncWrite(K id, ByteBuffer record, Set hosts, Executor executor, AsyncWriteCallback callback) + { + try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) + { + int entrySize = totalEntrySize(hosts, record.remaining()); + EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); + index.update(id, position); + metadata.update(hosts); + writeCallbacksExternal.offer(new QueuedWriteCallback(position + entrySize, executor, callback)); + } + catch (Throwable t) + { + executor.execute(() -> callback.onFailure(t)); + } + finally + { + appendOp.close(); + } + } + + void awaitFlush(Timer waitingOnFlush) + { + try (Timer.Context ignored = waitingOnFlush.time()) + { + waitForFlush(position); + } + } + } + + // (external) MPSC queue for async write (flush) callbacks, to be executed in *write position order* + private final ManyToOneConcurrentLinkedQueue writeCallbacksExternal = + new ManyToOneConcurrentLinkedQueue<>(); + // (internal) single writer / single reader list of callbacks used to drain the callbacks into for sorting + private final ArrayList writeCallbacksInternal = + new ArrayList<>(); + + static final class QueuedWriteCallback implements Comparable + { + final long recordLimit; + final Executor executor; + final AsyncWriteCallback callback; + + QueuedWriteCallback(long recordLimit, Executor executor, AsyncWriteCallback callback) + { + this.recordLimit = recordLimit; + this.executor = executor; + this.callback = callback; + } + + @Override + public int compareTo(QueuedWriteCallback other) + { + // sort more recent callbacks first to simplify callback execution order later + return -Long.compare(this.recordLimit, other.recordLimit); + } + + void scheduleOnSuccess() + { + try + { + executor.execute(callback); + } + catch (Throwable t) + { + ExecutionFailure.handle(t); + } + } + + void scheduleOnFailure(Throwable error) + { + try + { + executor.execute(() -> callback.onFailure(error)); + } + catch (Throwable t) + { + ExecutionFailure.handle(t); + } + } + } + + void scheduleOnSuccessCallbacks(long syncedOffset) + { + // sort and execute callbacks in write position order, up until the furtherst synced offset + writeCallbacksExternal.drain(writeCallbacksInternal::add); + writeCallbacksInternal.sort(null); + + for (int i = writeCallbacksInternal.size() - 1; i >= 0; i--) + { + QueuedWriteCallback callback = writeCallbacksInternal.get(i); + if (callback.recordLimit > syncedOffset) + break; + callback.scheduleOnSuccess(); + writeCallbacksInternal.remove(i); + } + } + + void scheduleOnFailureCallbacks(Throwable t) + { + writeCallbacksExternal.drain(writeCallbacksInternal::add); + writeCallbacksInternal.sort(null); + + for (int i = writeCallbacksInternal.size() - 1; i >= 0; i--) + { + QueuedWriteCallback callback = writeCallbacksInternal.get(i); + callback.scheduleOnFailure(t); + } + + writeCallbacksInternal.clear(); + } +} diff --git a/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java b/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java new file mode 100644 index 000000000000..161b972b14db --- /dev/null +++ b/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +public interface AsyncWriteCallback extends Runnable +{ + void onFailure(Throwable error); +} diff --git a/src/java/org/apache/cassandra/journal/Component.java b/src/java/org/apache/cassandra/journal/Component.java new file mode 100644 index 000000000000..c9de451adb6f --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Component.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +enum Component +{ + DATA ("data"), + INDEX ("indx"), + METADATA ("meta"), + SYNCED_OFFSETS ("sync"); + //OFFSET_MAP (".offs"), + //INVLALIDATIONS (".invl"); + + final String extension; + + Component(String extension) + { + this.extension = extension; + } + + /** + * @return if this component for the provided descrtiptor exists on disk + */ + boolean existsFor(Descriptor descriptor) + { + return descriptor.fileFor(this).exists(); + } +} diff --git a/src/java/org/apache/cassandra/journal/Descriptor.java b/src/java/org/apache/cassandra/journal/Descriptor.java new file mode 100644 index 000000000000..ea7ce7a1c2af --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Descriptor.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.cassandra.io.util.File; + +import static java.lang.String.format; +import static java.util.stream.Collectors.toList; + +/** + * Timestamp and version encoded in the file name, e.g. + * log-1637159888484-2-1-1.data + * log-1637159888484-2-1-1.indx + * log-1637159888484-2-1-1.meta + * log-1637159888484-2-1-1.sync + */ +final class Descriptor implements Comparable +{ + private static final String SEPARATOR = "-"; + private static final String PREFIX = "log" + SEPARATOR; + private static final String TMP_SUFFIX = "tmp"; + + private static final Pattern DATA_FILE_PATTERN = + Pattern.compile( PREFIX + "(\\d+)" // timestamp + + SEPARATOR + "(\\d+)" // generation + + SEPARATOR + "(\\d+)" // journal version + + SEPARATOR + "(\\d+)" // user version + + "\\." + Component.DATA.extension); + + private static final Pattern TMP_FILE_PATTERN = + Pattern.compile( PREFIX + "\\d+" // timestamp + + SEPARATOR + "\\d+" // generation + + SEPARATOR + "\\d+" // journal version + + SEPARATOR + "\\d+" // user version + + "\\." + "[a-z]+" // component extension + + "\\." + TMP_SUFFIX); + + + /* + * NOTE: If and when another journal version is introduced, have implementations + * expose the version used via yaml. This way operators can force previous journal + * version on upgrade, temporarily, to allow easier downgrades if something goes wrong. + */ + static final int JOURNAL_VERSION_1 = 1; + static final int CURRENT_JOURNAL_VERSION = JOURNAL_VERSION_1; + + final File directory; + final long timestamp; + final int generation; + + /** + * Serialization version for journal components; bumped as journal + * implementation evolves over time. + */ + final int journalVersion; + + /** + * Serialization version for user content - specifically journal keys + * and journal values; bumped when user logic evolves. + */ + final int userVersion; + + Descriptor(File directory, long timestamp, int generation, int journalVersion, int userVersion) + { + this.directory = directory; + this.timestamp = timestamp; + this.generation = generation; + this.journalVersion = journalVersion; + this.userVersion = userVersion; + } + + static Descriptor create(File directory, long timestamp, int userVersion) + { + return new Descriptor(directory, timestamp, 1, CURRENT_JOURNAL_VERSION, userVersion); + } + + static Descriptor fromName(File directory, String name) + { + Matcher matcher = DATA_FILE_PATTERN.matcher(name); + if (!matcher.matches()) + throw new IllegalArgumentException("Provided filename " + new File(directory, name) + " is not valid for a data segment file"); + + long timestamp = Long.parseLong(matcher.group(1)); + int generation = Integer.parseInt(matcher.group(2)); + int journalVersion = Integer.parseInt(matcher.group(3)); + int userVersion = Integer.parseInt(matcher.group(4)); + + return new Descriptor(directory, timestamp, generation, journalVersion, userVersion); + } + + static Descriptor fromFile(File file) + { + return fromName(file.parent(), file.name()); + } + + Descriptor withIncrementedGeneration() + { + return new Descriptor(directory, timestamp, generation + 1, journalVersion, userVersion); + } + + File fileFor(Component component) + { + return new File(directory, formatFileName(component)); + } + + File tmpFileFor(Component component) + { + return new File(directory, formatFileName(component) + '.' + TMP_SUFFIX); + } + + static boolean isTmpFile(File file) + { + return TMP_FILE_PATTERN.matcher(file.name()).matches(); + } + + private String formatFileName(Component component) + { + return format("%s%d%s%d%s%d%s%d.%s", + PREFIX, timestamp, + SEPARATOR, generation, + SEPARATOR, journalVersion, + SEPARATOR, userVersion, + component.extension); + } + + static List list(File directory) + { + try + { + return Arrays.stream(directory.listNames((file, name) -> DATA_FILE_PATTERN.matcher(name).matches())) + .map(name -> fromName(directory, name)) + .collect(toList()); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Override + public int compareTo(Descriptor other) + { + assert this.directory.equals(other.directory) + : format("Descriptors have mismatching directories: %s and %s", this.directory, other.directory); + + int cmp = Long.compare(this.timestamp, other.timestamp); + if (cmp == 0) cmp = Integer.compare(this.generation, other.generation); + if (cmp == 0) cmp = Integer.compare(this.journalVersion, other.journalVersion); + if (cmp == 0) cmp = Integer.compare(this.userVersion, other.userVersion); + return cmp; + } + + @Override + public boolean equals(Object other) + { + if (this == other) + return true; + return (other instanceof Descriptor) && equals((Descriptor) other); + } + + boolean equals(Descriptor other) + { + assert this.directory.equals(other.directory) + : format("Descriptors have mismatching directories: %s and %s", this.directory, other.directory); + + return this.timestamp == other.timestamp + && this.generation == other.generation + && this.journalVersion == other.journalVersion + && this.userVersion == other.userVersion; + } + + @Override + public int hashCode() + { + int result = directory.hashCode(); + result = 31 * result + Long.hashCode(timestamp); + result = 31 * result + generation; + result = 31 * result + journalVersion; + result = 31 * result + userVersion; + return result; + } + + @Override + public String toString() + { + return format("dir: %s, ts: %d, gen: %d, journal ver: %d, user ver: %d", + directory, timestamp, generation, journalVersion, userVersion); + } +} diff --git a/src/java/org/apache/cassandra/journal/EntrySerializer.java b/src/java/org/apache/cassandra/journal/EntrySerializer.java new file mode 100644 index 000000000000..ab1d02649967 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/EntrySerializer.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Set; +import java.util.zip.CRC32; + +import org.agrona.collections.IntHashSet; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Crc; + +import static org.apache.cassandra.journal.Journal.validateCRC; +import static org.apache.cassandra.utils.FBUtilities.updateChecksum; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumShort; + +final class EntrySerializer +{ + static void write(K key, + ByteBuffer record, + Set hosts, + KeySupport keySupport, + DataOutputPlus out, + int userVersion) + throws IOException + { + CRC32 crc = Crc.crc32(); + + keySupport.serialize(key, out, userVersion); + keySupport.updateChecksum(crc, key, userVersion); + + out.writeShort(hosts.size()); + updateChecksumShort(crc, (short) hosts.size()); + + int recordSize = record.remaining(); + out.writeInt(recordSize); + updateChecksumInt(crc, recordSize); + + out.writeInt((int) crc.getValue()); + + for (int host : hosts) + { + out.writeInt(host); + updateChecksumInt(crc, host); + } + + out.write(record); + Crc.updateCrc32(crc, record, record.position(), record.limit()); + + out.writeInt((int) crc.getValue()); + } + + static void read(EntryHolder into, + KeySupport keySupport, + ByteBuffer buffer, + int userVersion) + throws IOException + { + CRC32 crc = Crc.crc32(); + into.clear(); + + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + { + K key = keySupport.deserialize(in, userVersion); + keySupport.updateChecksum(crc, key, userVersion); + into.key = key; + + int hostCount = in.readShort(); + updateChecksumShort(crc, (short) hostCount); + + int entrySize = in.readInt(); + updateChecksumInt(crc, entrySize); + + validateCRC(crc, in.readInt()); + + for (int i = 0; i < hostCount; i++) + { + int hostId = in.readInt(); + updateChecksumInt(crc, hostId); + into.hosts.add(hostId); + } + + ByteBuffer entry = ByteBufferUtil.read(in, entrySize); + updateChecksum(crc, entry); + into.value = entry; + + validateCRC(crc, in.readInt()); + } + } + + static boolean tryRead(EntryHolder into, + KeySupport keySupport, + ByteBuffer buffer, + DataInputBuffer in, + int syncedOffset, + int userVersion) + throws IOException + { + CRC32 crc = Crc.crc32(); + into.clear(); + + int fixedSize = EntrySerializer.fixedEntrySize(keySupport, userVersion); + if (buffer.remaining() < fixedSize) + return handleReadException(new EOFException(), buffer.limit(), syncedOffset); + + updateChecksum(crc, buffer, buffer.position(), fixedSize - TypeSizes.INT_SIZE); + int fixedCrc = buffer.getInt(buffer.position() + fixedSize - TypeSizes.INT_SIZE); + + try + { + validateCRC(crc, fixedCrc); + } + catch (IOException e) + { + return handleReadException(e, buffer.position() + fixedSize, syncedOffset); + } + + int hostCount, recordSize; + try + { + into.key = keySupport.deserialize(in, userVersion); + hostCount = in.readShort(); + recordSize = in.readInt(); + in.skipBytesFully(TypeSizes.INT_SIZE); + } + catch (IOException e) + { + throw new RuntimeException(); // can't happen unless deserializer is buggy + } + + int variableSize = EntrySerializer.variableEntrySize(hostCount, recordSize); + if (buffer.remaining() < variableSize) + return handleReadException(new EOFException(), buffer.limit(), syncedOffset); + + updateChecksum(crc, buffer, buffer.position(), variableSize - TypeSizes.INT_SIZE); + int variableCrc = buffer.getInt(buffer.position() + variableSize - TypeSizes.INT_SIZE); + + try + { + validateCRC(crc, variableCrc); + } + catch (IOException e) + { + return handleReadException(e, buffer.position() + variableSize, syncedOffset); + } + + for (int i = 0; i < hostCount; i++) + { + into.hosts.add(in.readInt()); + } + + try + { + in.skipBytesFully(recordSize); + } + catch (IOException e) + { + throw new AssertionError(); // can't happen + } + + into.value = (ByteBuffer) buffer.duplicate() + .position(buffer.position() - recordSize) + .limit(buffer.position()); + + in.skipBytesFully(TypeSizes.INT_SIZE); + return true; + } + + private static boolean handleReadException(IOException e, int bufferPosition, int fsyncedLimit) throws IOException + { + if (bufferPosition <= fsyncedLimit) + throw e; + else + return false; + } + + static int fixedEntrySize(KeySupport keySupport, int userVersion) + { + return keySupport.serializedSize(userVersion) // key/id + + TypeSizes.SHORT_SIZE // host count + + TypeSizes.INT_SIZE // record size + + TypeSizes.INT_SIZE; // CRC + } + + static int variableEntrySize(int hostCount, int recordSize) + { + return TypeSizes.INT_SIZE * hostCount // hosts + + recordSize // record + + TypeSizes.INT_SIZE; // CRC + } + + static final class EntryHolder + { + K key; + ByteBuffer value; + IntHashSet hosts = new IntHashSet(); + + void clear() + { + key = null; + value = null; + hosts.clear(); + } + } +} diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java new file mode 100644 index 000000000000..b6d5a29141f2 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.util.ArrayList; +import java.util.concurrent.atomic.AtomicLong; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Timer; +import org.apache.cassandra.concurrent.Interruptible; +import org.apache.cassandra.concurrent.Interruptible.TerminateException; +import org.apache.cassandra.utils.MonotonicClock; +import org.apache.cassandra.utils.NoSpamLogger; +import org.apache.cassandra.utils.concurrent.Semaphore; +import org.apache.cassandra.utils.concurrent.WaitQueue; + +import static java.lang.String.format; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; +import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; +import static org.apache.cassandra.concurrent.Interruptible.State.SHUTTING_DOWN; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; +import static org.apache.cassandra.utils.MonotonicClock.Global.preciseTime; +import static org.apache.cassandra.utils.concurrent.Semaphore.newSemaphore; +import static org.apache.cassandra.utils.concurrent.WaitQueue.newWaitQueue; + +final class Flusher +{ + private static final Logger logger = LoggerFactory.getLogger(Flusher.class); + + private final Journal journal; + private final Params params; + + private volatile Interruptible flushExecutor; + + // counts of total pending write and written entries + private final AtomicLong pending = new AtomicLong(0); + private final AtomicLong written = new AtomicLong(0); + + // all Allocations written before this time will be flushed + volatile long lastFlushedAt = currentTimeMillis(); + + // a signal that writers can wait on to be notified of a completed flush in PERIODIC FlushMode + private final WaitQueue flushComplete = newWaitQueue(); + + // a signal and flag that callers outside the flusher thread can use + // to signal they want the journal segments to be flushed to disk + private final Semaphore haveWork = newSemaphore(1); + private volatile boolean flushRequested; + + private final FlushMethod syncFlushMethod; + private final FlushMethod asyncFlushMethod; + + Flusher(Journal journal) + { + this.journal = journal; + this.params = journal.params; + this.syncFlushMethod = syncFlushMethod(params); + this.asyncFlushMethod = asyncFlushMethod(params); + } + + void start() + { + String flushExecutorName = journal.name + "-disk-flusher-" + toLowerCaseLocalized(params.flushMode().toString()); + flushExecutor = executorFactory().infiniteLoop(flushExecutorName, new FlushRunnable(preciseTime), SAFE, NON_DAEMON, SYNCHRONIZED); + } + + void shutdown() + { + flushExecutor.shutdown(); + } + + private class FlushRunnable implements Interruptible.Task + { + private final MonotonicClock clock; + private final NoSpamLogger noSpamLogger; + + private final ArrayList> segmentsToFlush = new ArrayList<>(); + + FlushRunnable(MonotonicClock clock) + { + this.clock = clock; + this.noSpamLogger = NoSpamLogger.wrap(logger, 5, MINUTES); + } + + @Override + public void run(Interruptible.State state) throws InterruptedException + { + try + { + doRun(state); + } + catch (Throwable t) + { + if (!journal.handleError("Failed to flush segments to disk", t)) + throw new TerminateException(); + else // sleep for full poll-interval after an error, so we don't spam the log file + haveWork.tryAcquire(1, flushPeriodNanos(), NANOSECONDS); + } + } + + public void doRun(Interruptible.State state) throws InterruptedException + { + long startedRunAt = clock.now(); + boolean flushToDisk = lastFlushedAt + flushPeriodNanos() <= startedRunAt || state != NORMAL || flushRequested; + + // synchronized to prevent thread interrupts while performing IO operations and also + // clear interrupted status to prevent ClosedByInterruptException in ActiveSegment::flush + synchronized (this) + { + boolean ignore = Thread.interrupted(); + if (flushToDisk) + { + flushRequested = false; + doFlush(); + lastFlushedAt = startedRunAt; + flushComplete.signalAll(); + } + } + + long now = clock.now(); + if (flushToDisk) + processFlushDuration(startedRunAt, now); + + if (state == SHUTTING_DOWN) + return; + + long wakeUpAt = startedRunAt + flushPeriodNanos(); + if (wakeUpAt > now) + haveWork.tryAcquireUntil(1, wakeUpAt); + } + + private void doFlush() + { + journal.selectSegmentToFlush(segmentsToFlush); + // only schedule onSuccess callbacks for a segment if the preceding segments + // have been fully flushed, to preserve 1:1 mapping between record's position + // in the journal and onSuccess callback scheduling order + boolean scheduleOnSuccessCallbacks = true; + try + { + for (ActiveSegment segment : segmentsToFlush) + { + try + { + scheduleOnSuccessCallbacks = doFlush(segment, scheduleOnSuccessCallbacks) && scheduleOnSuccessCallbacks; + } + catch (Throwable t) + { + segmentsToFlush.forEach(s -> s.scheduleOnFailureCallbacks(t)); + throw t; + } + } + } + finally + { + segmentsToFlush.clear(); + } + } + + // flush the segment, schedule write callbacks if requested, return whether the segment has been flushed fully + private boolean doFlush(ActiveSegment segment, boolean scheduleCallbacks) + { + int syncedOffset = segment.flush(); + if (scheduleCallbacks) + segment.scheduleOnSuccessCallbacks(syncedOffset); + return segment.isFullyFlushed(syncedOffset); + } + + private long firstLaggedAt = Long.MIN_VALUE; // first lag ever or since last logged warning + private int flushCount = 0; // flush count since firstLaggedAt + private int lagCount = 0; // lag count since firstLaggedAt + private long flushDuration = 0; // time spent flushing since firstLaggedAt + private long lagDuration = 0; // cumulative lag since firstLaggedAt + + private void processFlushDuration(long startedFlushAt, long finishedFlushAt) + { + flushCount++; + flushDuration += (finishedFlushAt - startedFlushAt); + + long lag = finishedFlushAt - (startedFlushAt + flushPeriodNanos()); + if (lag <= 0) + return; + + lagCount++; + lagDuration += lag; + + if (firstLaggedAt == Long.MIN_VALUE) + firstLaggedAt = finishedFlushAt; + + boolean logged = + noSpamLogger.warn(finishedFlushAt, + "Out of {} {} journal flushes over the past {}s with average duration of {}ms, " + + "{} have exceeded the configured flush period by an average of {}ms", + flushCount, + journal.name, + format("%.2f", (finishedFlushAt - firstLaggedAt) * 1e-9d), + format("%.2f", flushDuration * 1e-6d / flushCount), + lagCount, + format("%.2f", lagDuration * 1e-6d / lagCount)); + + if (logged) // reset metrics for next log statement + { + firstLaggedAt = Long.MIN_VALUE; + flushCount = lagCount = 0; + flushDuration = lagDuration = 0; + } + } + } + + @FunctionalInterface + private interface FlushMethod + { + void flush(ActiveSegment.Allocation allocation); + } + + private FlushMethod syncFlushMethod(Params params) + { + switch (params.flushMode()) + { + default: throw new IllegalArgumentException(); + case BATCH: return this::waitForFlushBatch; + case GROUP: return this::waitForFlushGroup; + case PERIODIC: return this::waitForFlushPeriodic; + } + } + + private FlushMethod asyncFlushMethod(Params params) + { + switch (params.flushMode()) + { + default: throw new IllegalArgumentException(); + case BATCH: return this::asyncFlushBatch; + case GROUP: return this::asyncFlushGroup; + case PERIODIC: return this::asyncFlushPeriodic; + } + } + + void waitForFlush(ActiveSegment.Allocation alloc) + { + syncFlushMethod.flush(alloc); + } + + void asyncFlush(ActiveSegment.Allocation alloc) + { + asyncFlushMethod.flush(alloc); + } + + private void waitForFlushBatch(ActiveSegment.Allocation alloc) + { + pending.incrementAndGet(); + requestExtraFlush(); + alloc.awaitFlush(journal.metrics.waitingOnFlush); + pending.decrementAndGet(); + written.incrementAndGet(); + } + + private void asyncFlushBatch(ActiveSegment.Allocation alloc) + { + pending.incrementAndGet(); + requestExtraFlush(); + // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO FIXME + pending.decrementAndGet(); + written.incrementAndGet(); + } + + private void waitForFlushGroup(ActiveSegment.Allocation alloc) + { + pending.incrementAndGet(); + alloc.awaitFlush(journal.metrics.waitingOnFlush); + pending.decrementAndGet(); + written.incrementAndGet(); + } + + private void asyncFlushGroup(ActiveSegment.Allocation alloc) + { + pending.incrementAndGet(); + // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO FIXME + pending.decrementAndGet(); + written.incrementAndGet(); + } + + private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) + { + long expectedFlushTime = nanoTime() - periodicFlushLagBlockNanos(); + if (lastFlushedAt < expectedFlushTime) + { + pending.incrementAndGet(); + awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); + pending.decrementAndGet(); + } + written.incrementAndGet(); + } + + private void asyncFlushPeriodic(ActiveSegment.Allocation ignore) + { + pending.incrementAndGet(); + // awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); // TODO FIXME + pending.decrementAndGet(); + written.incrementAndGet(); + } + + /** + * Request an additional flush cycle without blocking + */ + void requestExtraFlush() + { + // note: cannot simply invoke executor.interrupt() as some filesystems don't like it (jimfs, at least) + flushRequested = true; + haveWork.release(1); + } + + private void awaitFlushAt(long flushTime, Timer.Context context) + { + do + { + WaitQueue.Signal signal = flushComplete.register(context, Timer.Context::stop); + if (lastFlushedAt < flushTime) + signal.awaitUninterruptibly(); + else + signal.cancel(); + } + while (lastFlushedAt < flushTime); + } + + private long flushPeriodNanos() + { + return 1_000_000L * params.flushPeriod(); + } + + private long periodicFlushLagBlockNanos() + { + return 1_000_000L * params.periodicFlushLagBlock(); + } + + long pendingEntries() + { + return pending.get(); + } + + long writtenEntries() + { + return written.get(); + } +} diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java new file mode 100644 index 000000000000..e4ec73a679c5 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.util.Arrays; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicReference; + +import javax.annotation.Nullable; + +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileOutputStreamPlus; + +/** + * An index for a segment that's still being updated by journal writers concurrently. + */ +final class InMemoryIndex extends Index +{ + private static final int[] EMPTY = new int[0]; + + private final NavigableMap index; + + // CSLM#lastKey() can be costly, so track lastId separately; + // TODO: this could easily be premature and misguided; + // benchmark to ensure it's not acitevly harmful + private final AtomicReference lastId; + + static InMemoryIndex create(KeySupport keySupport) + { + return new InMemoryIndex<>(keySupport, new ConcurrentSkipListMap<>(keySupport)); + } + + private InMemoryIndex(KeySupport keySupport, NavigableMap index) + { + super(keySupport); + this.index = index; + this.lastId = new AtomicReference<>(); + } + + public void update(K id, int offset) + { + index.merge(id, new int[] { offset }, (current, value) -> + { + int idx = Arrays.binarySearch(current, offset); + if (idx >= 0) // repeat update() call; shouldn't occur, but we might as well allow this NOOP + return current; + + /* Merge the new offset with existing values */ + int pos = -idx - 1; + int[] merged = new int[current.length + 1]; + System.arraycopy(current, 0, merged, 0, pos); + merged[pos] = offset; + System.arraycopy(current, pos, merged, pos + 1, current.length - pos); + return merged; + }); + + lastId.accumulateAndGet(id, (current, update) -> (null == current || keySupport.compare(current, update) < 0) ? update : current); + } + + @Override + @Nullable + public K firstId() + { + return index.isEmpty() ? null : index.firstKey(); + } + + @Override + @Nullable + public K lastId() + { + return lastId.get(); + } + + @Override + public int[] lookUp(K id) + { + return mayContainId(id) ? index.getOrDefault(id, EMPTY) : EMPTY; + } + + @Override + public int lookUpFirst(K id) + { + int[] offests = lookUp(id); + return offests.length == 0 ? -1 : offests[0]; + } + + public void persist(Descriptor descriptor) + { + File tmpFile = descriptor.tmpFileFor(Component.INDEX); + try (FileOutputStreamPlus out = new FileOutputStreamPlus(tmpFile)) + { + OnDiskIndex.write(index, keySupport, out, descriptor.userVersion); + + out.flush(); + out.sync(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, tmpFile, e); + } + tmpFile.move(descriptor.fileFor(Component.INDEX)); + } + + static InMemoryIndex rebuild(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + InMemoryIndex index = new InMemoryIndex<>(keySupport, new TreeMap<>(keySupport)); + try (StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, keySupport, fsyncedLimit)) + { + while (reader.advance()) + index.update(reader.id(), reader.offset()); + } + return index; + } + + @Override + public void close() + { + } +} diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java new file mode 100644 index 000000000000..cfce4407e9ba --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import javax.annotation.Nullable; + +import org.apache.cassandra.utils.Closeable; + +/** + * Mapping of client supplied ids to in-segment offsets + */ +abstract class Index implements Closeable +{ + final KeySupport keySupport; + + Index(KeySupport keySupport) + { + this.keySupport = keySupport; + } + + /** + * Look up offsets by id. It's possible, due to retries, for a segment + * to contain the same record with the same id more than once, at + * different offsets. + * + * @return the found offsets into the segment, if any; can be empty + */ + abstract int[] lookUp(K id); + + /** + * Look up offsets by id. It's possible, due to retries, for a segment + * to contain the same record with the same id more than once, at + * different offsets. Return the first offset for provided record id, or -1 if none. + * + * @return the first offset into the segment, or -1 is none were found + */ + abstract int lookUpFirst(K id); + + /** + * @return the first (smallest) id in the index + */ + @Nullable + abstract K firstId(); + + /** + * @return the last (largest) id in the index + */ + @Nullable + abstract K lastId(); + + /** + * @return whether the id falls within lower/upper bounds of the index + */ + boolean mayContainId(K id) + { + K firstId = firstId(); + K lastId = lastId(); + + return null != firstId && null != lastId && keySupport.compare(id, firstId) >= 0 && keySupport.compare(id, lastId) <= 0; + } +} diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java new file mode 100644 index 000000000000..ff90c58f7bfc --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -0,0 +1,638 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BooleanSupplier; +import java.util.function.Function; +import java.util.zip.CRC32; + +import javax.annotation.Nonnull; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Timer.Context; +import org.apache.cassandra.concurrent.Interruptible; +import org.apache.cassandra.concurrent.Interruptible.TerminateException; +import org.apache.cassandra.concurrent.SequentialExecutorPlus; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.PathUtils; +import org.apache.cassandra.journal.Segments.ReferencedSegments; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.Crc; +import org.apache.cassandra.utils.JVMStabilityInspector; +import org.apache.cassandra.utils.concurrent.WaitQueue; + +import static java.lang.String.format; +import static java.util.Comparator.comparing; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; +import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; +import static org.apache.cassandra.concurrent.Interruptible.State.SHUTTING_DOWN; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; +import static org.apache.cassandra.utils.concurrent.WaitQueue.newWaitQueue; + +/** + * A generic append-only journal with some special features: + *

    + *
  • Records can be looked up by key + *
  • Records can be tagged with multiple owner node ids + *
  • Records can be invalidated by their owner ids + *
  • Fully invalidated records get purged during segment compaction + *

+ * + * Type parameters: + * @param the type of records stored in the journal + * @param the type of keys used to address the records; + must be fixed-size and byte-order comparable + */ +public class Journal +{ + private static final Logger logger = LoggerFactory.getLogger(Journal.class); + + final String name; + final File directory; + final Params params; + + final KeySupport keySupport; + final ValueSerializer valueSerializer; + + final Metrics metrics; + final Flusher flusher; + //final Invalidator invalidator; + //final Compactor compactor; + + volatile long replayLimit; + final AtomicLong nextSegmentId = new AtomicLong(); + + private volatile ActiveSegment currentSegment = null; + + // segment that is ready to be used; allocator thread fills this and blocks until consumed + private volatile ActiveSegment availableSegment = null; + + private final AtomicReference> segments = new AtomicReference<>(); + + Interruptible allocator; + private final WaitQueue segmentPrepared = newWaitQueue(); + private final WaitQueue allocatorThreadWaitQueue = newWaitQueue(); + private final BooleanSupplier allocatorThreadWaitCondition = () -> (availableSegment == null); + + SequentialExecutorPlus closer; + //private final Set invalidations = Collections.newSetFromMap(new ConcurrentHashMap<>()); + + public Journal(String name, + File directory, + Params params, + KeySupport keySupport, + ValueSerializer valueSerializer) + { + this.name = name; + this.directory = directory; + this.params = params; + + this.keySupport = keySupport; + this.valueSerializer = valueSerializer; + + this.metrics = new Metrics<>(name); + this.flusher = new Flusher<>(this); + //this.invalidator = new Invalidator<>(this); + //this.compactor = new Compactor<>(this); + } + + public void start() + { + metrics.register(flusher); + + deleteTmpFiles(); + + List descriptors = Descriptor.list(directory); + // find the largest existing timestamp + descriptors.sort(null); + long maxTimestamp = descriptors.isEmpty() + ? Long.MIN_VALUE + : descriptors.get(descriptors.size() - 1).timestamp; + nextSegmentId.set(replayLimit = Math.max(currentTimeMillis(), maxTimestamp + 1)); + + segments.set(Segments.ofStatic(StaticSegment.open(descriptors, keySupport))); + closer = executorFactory().sequential(name + "-closer"); + allocator = executorFactory().infiniteLoop(name + "-allocator", new AllocateRunnable(), SAFE, NON_DAEMON, SYNCHRONIZED); + advanceSegment(null); + flusher.start(); + //invalidator.start(); + //compactor.start(); + } + + /** + * Cleans up unfinished component files from previous run (metadata and index) + */ + private void deleteTmpFiles() + { + for (File tmpFile : directory.listUnchecked(Descriptor::isTmpFile)) + tmpFile.delete(); + } + + public void shutdown() + { + allocator.shutdown(); + //compactor.stop(); + //invalidator.stop(); + flusher.shutdown(); + closer.shutdown(); + closeAllSegments(); + metrics.deregister(); + } + + /** + * Looks up a record by the provided id. + *

+ * Looking up an invalidated record may or may not return a record, depending on + * compaction progress. + *

+ * In case multiple copies of the record exist in the log (e.g. because of user retries), + * only the first found record will be consumed. + * + * @param id user-provided record id, expected to roughly correlate with time and go up + * @param consumer function to consume the raw record (bytes and invalidation set) if found + * @return true if the record was found, false otherwise + */ + public boolean read(K id, RecordConsumer consumer) + { + try (ReferencedSegments segments = selectAndReference(id)) + { + for (Segment segment : segments.all()) + if (segment.read(id, consumer)) + return true; + } + + return false; + } + + /** + * Looks up a record by the provided id. + *

+ * Looking up an invalidated record may or may not return a record, depending on + * compaction progress. + *

+ * In case multiple copies of the record exist in the log (e.g. because of user retries), + * the first one found will be returned. + * + * @param id user-provided record id, expected to roughly correlate with time and go up + * @return deserialized record if found, null otherwise + */ + public V read(K id) + { + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + + try (ReferencedSegments segments = selectAndReference(id)) + { + for (Segment segment : segments.all()) + { + if (segment.read(id, holder)) + { + try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + { + return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); + + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + } + } + } + + return null; + } + + /** + * Synchronously write a record to the journal. + *

+ * Blocks until the record has been deemed durable according to the journal flush mode. + * + * @param id user-provided record id, expected to roughly correlate with time and go up + * @param record the record to store + * @param hosts hosts expected to invalidate the record + */ + public void write(K id, V record, Set hosts) + { + try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) + { + valueSerializer.serialize(record, dob, params.userVersion()); + ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); + alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); + flusher.waitForFlush(alloc); + } + catch (IOException e) + { + // exception during record serialization into the scratch buffer + throw new RuntimeException(e); + } + } + + /** + * Asynchronously write a record to the journal. Writes to the journal in the calling thread, + * but doesn't wait for flush. + *

+ * Executes the supplied callback on the executor provided once the record has been durably written to disk + * + * @param id user-provided record id, expected to roughly correlate with time and go up + * @param record the record to store + * @param hosts hosts expected to invalidate the record + * @param executor executor to run the callback on + * @param callback the callback to run on + */ + public void asyncWrite(K id, V record, Set hosts, @Nonnull Executor executor, @Nonnull AsyncWriteCallback callback) + { + try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) + { + valueSerializer.serialize(record, dob, params.userVersion()); + ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); + alloc.asyncWrite(id, dob.unsafeGetBufferAndFlip(), hosts, executor, callback); + flusher.asyncFlush(alloc); + } + catch (IOException e) + { + // exception during record serialization into the scratch buffer + executor.execute(() -> callback.onFailure(e)); + } + } + + private ActiveSegment.Allocation allocate(int entrySize, Set hosts) + { + ActiveSegment segment = currentSegment; + + ActiveSegment.Allocation alloc; + while (null == (alloc = segment.allocate(entrySize, hosts))) + { + // failed to allocate; move to a new segment with enough room + advanceSegment(segment); + segment = currentSegment; + } + return alloc; + } + + /* + * Segment allocation logic. + */ + + private void advanceSegment(ActiveSegment oldSegment) + { + while (true) + { + synchronized (this) + { + // do this in a critical section, so we can maintain the order of + // segment construction when moving to allocatingFrom/activeSegments + if (currentSegment != oldSegment) + return; + + // if a segment is ready, take it now, otherwise wait for the allocator thread to construct it + if (availableSegment != null) + { + // success - change allocatingFrom and activeSegments (which must be kept in order) before leaving the critical section + addNewActiveSegment(currentSegment = availableSegment); + availableSegment = null; + break; + } + } + + awaitAvailableSegment(oldSegment); + } + + // signal the allocator thread to prepare a new segment + wakeAllocator(); + + if (null != oldSegment) + closeActiveSegmentAndOpenAsStatic(oldSegment); + + // request that the journal be flushed out-of-band, as we've finished a segment + flusher.requestExtraFlush(); + } + + private void awaitAvailableSegment(ActiveSegment currentActiveSegment) + { + do + { + WaitQueue.Signal prepared = segmentPrepared.register(metrics.waitingOnSegmentAllocation.time(), Context::stop); + if (availableSegment == null && currentSegment == currentActiveSegment) + prepared.awaitUninterruptibly(); + else + prepared.cancel(); + } + while (availableSegment == null && currentSegment == currentActiveSegment); + } + + private void wakeAllocator() + { + allocatorThreadWaitQueue.signalAll(); + } + + private void discardAvailableSegment() + { + ActiveSegment next; + synchronized (this) + { + next = availableSegment; + availableSegment = null; + } + if (next != null) + next.closeAndDiscard(); + } + + private class AllocateRunnable implements Interruptible.Task + { + @Override + public void run(Interruptible.State state) throws InterruptedException + { + if (state == NORMAL) + runNormal(); + else if (state == SHUTTING_DOWN) + shutDown(); + } + + private void runNormal() throws InterruptedException + { + boolean interrupted = false; + try + { + if (availableSegment != null) + throw new IllegalStateException("availableSegment is not null"); + + // synchronized to prevent thread interrupts while performing IO operations and also + // clear interrupted status to prevent ClosedByInterruptException in createSegment() + synchronized (this) + { + interrupted = Thread.interrupted(); + availableSegment = createSegment(); + + segmentPrepared.signalAll(); + Thread.yield(); + } + } + catch (Throwable t) + { + if (!handleError("Failed allocating journal segments", t)) + { + discardAvailableSegment(); + throw new TerminateException(); + } + TimeUnit.SECONDS.sleep(1L); // sleep for a second to avoid log spam + } + + interrupted = interrupted || Thread.interrupted(); + if (!interrupted) + { + try + { + // If we offered a segment, wait for it to be taken before reentering the loop. + // There could be a new segment in next not offered, but only on failure to discard it while + // shutting down-- nothing more can or needs to be done in that case. + WaitQueue.waitOnCondition(allocatorThreadWaitCondition, allocatorThreadWaitQueue); + } + catch (InterruptedException e) + { + interrupted = true; + } + } + + if (interrupted) + { + discardAvailableSegment(); + throw new InterruptedException(); + } + } + + private void shutDown() throws InterruptedException + { + try + { + // if shutdown() started and finished during segment creation, we'll be left with a + // segment that no one will consume; discard it + discardAvailableSegment(); + } + catch (Throwable t) + { + handleError("Failed shutting down segment allocator", t); + throw new TerminateException(); + } + } + } + + private ActiveSegment createSegment() + { + Descriptor descriptor = Descriptor.create(directory, nextSegmentId.getAndIncrement(), params.userVersion()); + return ActiveSegment.create(descriptor, params, keySupport); + } + + private void closeAllSegments() + { + Segments segments = swapSegments(ignore -> Segments.none()); + + for (ActiveSegment segment : segments.onlyActive()) + segment.closeAndIfEmptyDiscard(); + for (StaticSegment segment : segments.onlyStatic()) + segment.close(); + } + + /** + * Select segments that could potentially have an entry with the specified id and + * attempt to grab references to them all. + * + * @return a subset of segments with references to them + */ + ReferencedSegments selectAndReference(K id) + { + while (true) + { + ReferencedSegments referenced = segments().selectAndReference(id); + if (null != referenced) + return referenced; + } + } + + private Segments segments() + { + return segments.get(); + } + + private Segments swapSegments(Function, Segments> transformation) + { + Segments currentSegments, newSegments; + do + { + currentSegments = segments(); + newSegments = transformation.apply(currentSegments); + } + while (!segments.compareAndSet(currentSegments, newSegments)); + return currentSegments; + } + + private void addNewActiveSegment(ActiveSegment activeSegment) + { + swapSegments(current -> current.withNewActiveSegment(activeSegment)); + } + + private void replaceCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) + { + swapSegments(current -> current.withCompletedSegment(activeSegment, staticSegment)); + } + + private void replaceCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + { + swapSegments(current -> current.withCompactedSegment(oldSegment, newSegment)); + } + + void selectSegmentToFlush(Collection> into) + { + ActiveSegment current = currentSegment; + for (ActiveSegment segment : segments().onlyActive()) + { + // do not sync segments that became active after flush started + if (segment.descriptor.timestamp <= current.descriptor.timestamp) + into.add(segment); + } + } + + /** + * Take care of a finished active segment: + * 1. discard tail + * 2. flush to disk + * 3. persist index and metadata + * 4. open the segment as static + * 5. replace the finished active segment with the opened static one in Segments view + * 6. release the Ref so the active segment will be cleaned up by its Tidy instance + */ + private class CloseActiveSegmentRunnable implements Runnable + { + private final ActiveSegment activeSegment; + + CloseActiveSegmentRunnable(ActiveSegment activeSegment) + { + this.activeSegment = activeSegment; + } + + @Override + public void run() + { + activeSegment.discardUnusedTail(); + activeSegment.flush(); + activeSegment.persistComponents(); + replaceCompletedSegment(activeSegment, StaticSegment.open(activeSegment.descriptor, keySupport)); + activeSegment.release(); + } + } + + void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) + { + closer.execute(new CloseActiveSegmentRunnable(activeSegment)); + } + + /* + * Replay logic + */ + + /** + * Iterate over and invoke the supplied callback on every record, + * with segments iterated in segment timestamp order. Only visits + * finished, on-disk segments. + */ + public void replayStaticSegments(RecordConsumer consumer) + { + List> staticSegments = new ArrayList<>(segments().onlyStatic()); + staticSegments.sort(comparing(segment -> segment.descriptor)); + + for (StaticSegment segment : staticSegments) + segment.forEachRecord(consumer); + } + + /* + * Static helper methods used by journal components + */ + + static void validateCRC(CRC32 crc, int readCRC) throws Crc.InvalidCrc + { + if (readCRC != (int) crc.getValue()) + throw new Crc.InvalidCrc(readCRC, (int) crc.getValue()); + } + + /* + * Error handling + */ + + /** + * @return true if the invoking thread should continue, or false if it should terminate itself + */ + boolean handleError(String message, Throwable t) + { + Params.FailurePolicy policy = params.failurePolicy(); + JVMStabilityInspector.inspectJournalThrowable(t, name, policy); + + switch (policy) + { + default: + throw new AssertionError(policy); + case DIE: + case STOP: + StorageService.instance.stopTransports(); + //$FALL-THROUGH$ + case STOP_JOURNAL: + message = format("%s. Journal %s failure policy is %s; terminating thread.", message, name, policy); + logger.error(maybeAddDiskSpaceContext(message), t); + return false; + case IGNORE: + message = format("%s. Journal %s failure policy is %s; ignoring excepton.", message, name, policy); + logger.error(maybeAddDiskSpaceContext(message), t); + return true; + } + } + + /** + * Add additional information to the error message if the journal directory does not have enough free space. + * + * @param message the original error message + * @return the message with additional information if possible + */ + private String maybeAddDiskSpaceContext(String message) + { + long availableDiskSpace = PathUtils.tryGetSpace(directory.toPath(), FileStore::getTotalSpace); + int segmentSize = params.segmentSize(); + + if (availableDiskSpace >= segmentSize) + return message; + + return format("%s. %d bytes required for next journal segment but only %d bytes available. " + + "Check %s to see if not enough free space is the reason for this error.", + message, segmentSize, availableDiskSpace, directory); + } +} diff --git a/src/java/org/apache/cassandra/journal/JournalReadError.java b/src/java/org/apache/cassandra/journal/JournalReadError.java new file mode 100644 index 000000000000..87366c8d7c6b --- /dev/null +++ b/src/java/org/apache/cassandra/journal/JournalReadError.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import org.apache.cassandra.io.FSReadError; +import org.apache.cassandra.io.util.File; + +public class JournalReadError extends FSReadError +{ + public final Descriptor descriptor; + + JournalReadError(Descriptor descriptor, File file, Throwable throwable) + { + super(throwable, file); + this.descriptor = descriptor; + } + + JournalReadError(Descriptor descriptor, Component component, Throwable throwable) + { + super(throwable, descriptor.fileFor(component)); + this.descriptor = descriptor; + } +} diff --git a/src/java/org/apache/cassandra/journal/JournalWriteError.java b/src/java/org/apache/cassandra/journal/JournalWriteError.java new file mode 100644 index 000000000000..03193af5455a --- /dev/null +++ b/src/java/org/apache/cassandra/journal/JournalWriteError.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import org.apache.cassandra.io.FSWriteError; +import org.apache.cassandra.io.util.File; + +public class JournalWriteError extends FSWriteError +{ + public final Descriptor descriptor; + + JournalWriteError(Descriptor descriptor, File file, Throwable throwable) + { + super(throwable, file); + this.descriptor = descriptor; + } + + JournalWriteError(Descriptor descriptor, Component component, Throwable throwable) + { + super(throwable, descriptor.fileFor(component)); + this.descriptor = descriptor; + } +} diff --git a/src/java/org/apache/cassandra/journal/KeySupport.java b/src/java/org/apache/cassandra/journal/KeySupport.java new file mode 100644 index 000000000000..13cb902ddf38 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/KeySupport.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.zip.Checksum; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +/** + * Record keys must satisfy two properties: + *

+ * 1. Must have a fixed serialized size + * 2. Must be byte-order comparable + */ +public interface KeySupport extends Comparator +{ + int serializedSize(int userVersion); + + void serialize(K key, DataOutputPlus out, int userVersion) throws IOException; + + K deserialize(DataInputPlus in, int userVersion) throws IOException; + + K deserialize(ByteBuffer buffer, int position, int userVersion); + + void updateChecksum(Checksum crc, K key, int userVersion); + + int compareWithKeyAt(K key, ByteBuffer buffer, int position, int userVersion); +} diff --git a/src/java/org/apache/cassandra/journal/Metadata.java b/src/java/org/apache/cassandra/journal/Metadata.java new file mode 100644 index 000000000000..bc521cc83c4c --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Metadata.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import java.util.zip.CRC32; + +import org.agrona.collections.Int2IntHashMap; +import org.agrona.collections.IntHashSet; +import org.apache.cassandra.io.util.*; +import org.apache.cassandra.utils.Crc; + +import static org.apache.cassandra.journal.Journal.validateCRC; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; + +/** + * Tracks and serializes the following information: + * - all the hosts with entries in the data segment and #of records each is tagged in; + * used for compaction prioritisation and to act in response to topology changes + * - total count of records in this segment file + * used for compaction prioritisation + */ +final class Metadata +{ + private final Set unmodifiableHosts; + private final Map recordsPerHost; + + private volatile int recordsCount; + private static final AtomicIntegerFieldUpdater recordsCountUpdater = + AtomicIntegerFieldUpdater.newUpdater(Metadata.class, "recordsCount"); + + static Metadata create() + { + return new Metadata(new ConcurrentHashMap<>(), 0); + } + + private Metadata(Map recordsPerHost, int recordsCount) + { + this.recordsPerHost = recordsPerHost; + this.recordsCount = recordsCount; + this.unmodifiableHosts = Collections.unmodifiableSet(recordsPerHost.keySet()); + } + + void update(Set hosts) + { + updateHosts(hosts); + incrementRecordsCount(); + } + + private void updateHosts(Set hosts) + { + for (int host : hosts) + recordsPerHost.compute(host, (k, v) -> null == v ? 1 : v + 1); + } + + private void incrementRecordsCount() + { + recordsCountUpdater.incrementAndGet(this); + } + + Set hosts() + { + return unmodifiableHosts; + } + + int count(int host) + { + return recordsPerHost.getOrDefault(host, 0); + } + + int totalCount() + { + return recordsCount; + } + + void write(DataOutputPlus out) throws IOException + { + CRC32 crc = Crc.crc32(); + + /* Write records count per host */ + + int size = recordsPerHost.size(); + out.writeInt(size); + updateChecksumInt(crc, size); + + out.writeInt((int) crc.getValue()); + + for (Map.Entry entry : recordsPerHost.entrySet()) + { + int host = entry.getKey(); + int count = entry.getValue(); + + out.writeInt(host); + out.writeInt(count); + + updateChecksumInt(crc, host); + updateChecksumInt(crc, count); + } + + /* Write records count */ + + out.writeInt(recordsCount); + updateChecksumInt(crc, recordsCount); + + out.writeInt((int) crc.getValue()); + } + + static Metadata read(DataInputPlus in) throws IOException + { + CRC32 crc = Crc.crc32(); + + /* Read records count per host */ + + int size = in.readInt(); + updateChecksumInt(crc, size); + validateCRC(crc, in.readInt()); + + Int2IntHashMap recordsPerHost = new Int2IntHashMap(Integer.MIN_VALUE); + for (int i = 0; i < size; i++) + { + int host = in.readInt(); + int count = in.readInt(); + + updateChecksumInt(crc, host); + updateChecksumInt(crc, count); + + recordsPerHost.put(host, count); + } + + /* Read records count */ + + int recordsCount = in.readInt(); + updateChecksumInt(crc, recordsCount); + + validateCRC(crc, in.readInt()); + return new Metadata(recordsPerHost, recordsCount); + } + + void persist(Descriptor descriptor) + { + File tmpFile = descriptor.tmpFileFor(Component.METADATA); + try (FileOutputStreamPlus out = new FileOutputStreamPlus(tmpFile)) + { + write(out); + + out.flush(); + out.sync(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, tmpFile, e); + } + tmpFile.move(descriptor.fileFor(Component.METADATA)); + } + + static Metadata load(Descriptor descriptor) + { + File file = descriptor.fileFor(Component.METADATA); + try (FileInputStreamPlus in = new FileInputStreamPlus(file)) + { + return read(in); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + } + + static Metadata rebuild(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + Int2IntHashMap recordsPerHost = new Int2IntHashMap(Integer.MIN_VALUE); + int recordsCount = 0; + + try (StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, keySupport, fsyncedLimit)) + { + while (reader.advance()) + { + // iterator is cached and reused by IntHashSet + IntHashSet.IntIterator hosts = reader.hosts().iterator(); + while (hosts.hasNext()) + recordsPerHost.merge(hosts.nextValue(), 1, Integer::sum); + + ++recordsCount; + } + } + + return new Metadata(recordsPerHost, recordsCount); + } + + static Metadata rebuildAndPersist(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + Metadata metadata = rebuild(descriptor, keySupport, fsyncedLimit); + metadata.persist(descriptor); + return metadata; + } +} diff --git a/src/java/org/apache/cassandra/journal/Metrics.java b/src/java/org/apache/cassandra/journal/Metrics.java new file mode 100644 index 000000000000..befc3c2ddb32 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Metrics.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Timer; +import org.apache.cassandra.metrics.CassandraMetricsRegistry; +import org.apache.cassandra.metrics.DefaultNameFactory; +import org.apache.cassandra.metrics.MetricNameFactory; + +final class Metrics +{ + private static final String WAITING_ON_FLUSH = "WaitingOnFlush"; + private static final String WAITING_ON_ALLOCATION = "WaitingOnSegmentAllocation"; + private static final String WRITTEN_ENTRIES = "WrittenEntries"; + private static final String PENDING_ENTRIES = "PendingEntries"; + + /** + * The time spent waiting on journal flush; for {@link org.apache.cassandra.journal.Params.FlushMode#PERIODIC} + * this is only occurs when the flush is lagging its flush interval. + */ + Timer waitingOnFlush; + + /** Time spent waiting for a segment to be allocated - under normal conditions this should be zero */ + Timer waitingOnSegmentAllocation; + + /** Number of pending (flush) entries */ + Gauge pendingEntries; + + /** Number of written (flushed) entries */ + Gauge writtenEntries; + + private final MetricNameFactory factory; + + Metrics(String name) + { + this.factory = new DefaultNameFactory("Journal", name); + } + + void register(Flusher flusher) + { + waitingOnFlush = CassandraMetricsRegistry.Metrics.timer(createName(WAITING_ON_FLUSH)); + waitingOnSegmentAllocation = CassandraMetricsRegistry.Metrics.timer(createName(WAITING_ON_ALLOCATION)); + pendingEntries = CassandraMetricsRegistry.Metrics.register(createName(PENDING_ENTRIES), flusher::pendingEntries); + writtenEntries = CassandraMetricsRegistry.Metrics.register(createName(WRITTEN_ENTRIES), flusher::writtenEntries); + } + + void deregister() + { + CassandraMetricsRegistry.Metrics.remove(createName(WAITING_ON_FLUSH)); + CassandraMetricsRegistry.Metrics.remove(createName(WAITING_ON_ALLOCATION)); + CassandraMetricsRegistry.Metrics.remove(createName(PENDING_ENTRIES)); + CassandraMetricsRegistry.Metrics.remove(createName(WRITTEN_ENTRIES)); + } + + private CassandraMetricsRegistry.MetricName createName(String metricName) + { + return factory.createMetricName(metricName); + } +} diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java new file mode 100644 index 000000000000..2bc40c6a5e99 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Map; +import java.util.NavigableMap; +import java.util.zip.CRC32; + +import javax.annotation.Nullable; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.Crc; + +import static org.apache.cassandra.journal.Journal.validateCRC; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; + +/** + * An on-disk (memory-mapped) index for a completed flushed segment. + *

+ * TODO (expected): block-level CRC + */ +final class OnDiskIndex extends Index +{ + private static final int[] EMPTY = new int[0]; + + private static final int FILE_PREFIX_SIZE = 4 + 4; // count of entries, CRC + private static final int VALUE_SIZE = 4; // int offset + + private final int KEY_SIZE; + private final int ENTRY_SIZE; + + private final Descriptor descriptor; + + private final FileChannel channel; + private volatile MappedByteBuffer buffer; + private final int entryCount; + + private volatile K firstId, lastId; + + private OnDiskIndex( + Descriptor descriptor, KeySupport keySupport, FileChannel channel, MappedByteBuffer buffer, int entryCount) + { + super(keySupport); + + this.descriptor = descriptor; + this.channel = channel; + this.buffer = buffer; + this.entryCount = entryCount; + + KEY_SIZE = keySupport.serializedSize(descriptor.userVersion); + ENTRY_SIZE = KEY_SIZE + VALUE_SIZE; + } + + /** + * Open the index for reading, validate CRC + */ + @SuppressWarnings({ "resource", "RedundantSuppression" }) + static OnDiskIndex open(Descriptor descriptor, KeySupport keySupport) + { + File file = descriptor.fileFor(Component.INDEX); + FileChannel channel = null; + MappedByteBuffer buffer = null; + try + { + channel = FileChannel.open(file.toPath(), StandardOpenOption.READ); + buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); + + int entryCount = buffer.getInt(0); + OnDiskIndex index = new OnDiskIndex<>(descriptor, keySupport, channel, buffer, entryCount); + index.validate(); + index.init(); + return index; + } + catch (Throwable e) + { + FileUtils.clean(buffer); + FileUtils.closeQuietly(channel); + throw new JournalReadError(descriptor, file, e); + } + } + + private void init() + { + if (entryCount > 0) + { + firstId = keyAtIndex(0); + lastId = keyAtIndex(entryCount - 1); + } + } + + @Override + public void close() + { + try + { + FileUtils.clean(buffer); + buffer = null; + channel.close(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, Component.INDEX, e); + } + } + + void validate() throws IOException + { + CRC32 crc = Crc.crc32(); + + try (DataInputBuffer in = new DataInputBuffer(buffer, true)) + { + int entryCount = in.readInt(); + updateChecksumInt(crc, entryCount); + validateCRC(crc, in.readInt()); + + Crc.updateCrc32(crc, buffer, FILE_PREFIX_SIZE, FILE_PREFIX_SIZE + entryCount * ENTRY_SIZE); + in.skipBytesFully(entryCount * ENTRY_SIZE); + validateCRC(crc, in.readInt()); + + if (in.available() != 0) + throw new IOException("Trailing data encountered in segment index " + descriptor.fileFor(Component.INDEX)); + } + } + + static void write( + NavigableMap entries, KeySupport keySupport, DataOutputPlus out, int userVersion) throws IOException + { + CRC32 crc = Crc.crc32(); + + int size = entries.values() + .stream() + .mapToInt(offsets -> offsets.length) + .sum(); + out.writeInt(size); + updateChecksumInt(crc, size); + out.writeInt((int) crc.getValue()); + + for (Map.Entry entry : entries.entrySet()) + { + for (int offset : entry.getValue()) + { + K key = entry.getKey(); + keySupport.serialize(key, out, userVersion); + keySupport.updateChecksum(crc, key, userVersion); + + out.writeInt(offset); + updateChecksumInt(crc, offset); + } + } + + out.writeInt((int) crc.getValue()); + } + + @Override + @Nullable + public K firstId() + { + return firstId; + } + + @Override + @Nullable + public K lastId() + { + return lastId; + } + + @Override + public int[] lookUp(K id) + { + if (!mayContainId(id)) + return EMPTY; + + int keyIndex = binarySearch(id); + if (keyIndex < 0) + return EMPTY; + + int[] offsets = new int[] { offsetAtIndex(keyIndex) }; + + /* + * Duplicate entries are possible within one segment (but should be rare). + * Check and add entries before and after the found result (not guaranteed to be first). + */ + + for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) + { + int length = offsets.length; + offsets = Arrays.copyOf(offsets, length + 1); + offsets[length] = offsetAtIndex(i); + } + + for (int i = keyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) + { + int length = offsets.length; + offsets = Arrays.copyOf(offsets, length + 1); + offsets[length] = offsetAtIndex(i); + } + + Arrays.sort(offsets); + return offsets; + } + + @Override + public int lookUpFirst(K id) + { + if (!mayContainId(id)) + return -1; + + int keyIndex = binarySearch(id); + + /* + * Duplicate entries are possible within one segment (but should be rare). + * Check and add entries before until we find the first occurrence of key. + */ + for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) + keyIndex = i; + + return keyIndex < 0 ? -1 : offsetAtIndex(keyIndex); + } + + private K keyAtIndex(int index) + { + return keySupport.deserialize(buffer, FILE_PREFIX_SIZE + index * ENTRY_SIZE, descriptor.userVersion); + } + + private int offsetAtIndex(int index) + { + return buffer.getInt(FILE_PREFIX_SIZE + index * ENTRY_SIZE + KEY_SIZE); + } + + /* + * This has been lifted from {@see IndexSummary}'s implementation, + * which itself was lifted from Harmony's Collections implementation. + */ + private int binarySearch(K key) + { + int low = 0, mid = entryCount, high = mid - 1, result = -1; + while (low <= high) + { + mid = (low + high) >> 1; + result = compareWithKeyAt(key, mid); + if (result > 0) + { + low = mid + 1; + } + else if (result == 0) + { + return mid; + } + else + { + high = mid - 1; + } + } + return -mid - (result < 0 ? 1 : 2); + } + + private int compareWithKeyAt(K key, int keyIndex) + { + int offset = FILE_PREFIX_SIZE + ENTRY_SIZE * keyIndex; + return keySupport.compareWithKeyAt(key, buffer, offset, descriptor.userVersion); + } + + static OnDiskIndex rebuildAndPersist(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + try (InMemoryIndex index = InMemoryIndex.rebuild(descriptor, keySupport, fsyncedLimit)) + { + index.persist(descriptor); + } + return open(descriptor, keySupport); + } +} diff --git a/src/java/org/apache/cassandra/journal/Params.java b/src/java/org/apache/cassandra/journal/Params.java new file mode 100644 index 000000000000..f462f450ac21 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Params.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +public interface Params +{ + enum FlushMode { BATCH, GROUP, PERIODIC } + + enum FailurePolicy { STOP, STOP_JOURNAL, IGNORE, DIE } + + /** + * @return maximum segment size + */ + int segmentSize(); + + /** + * @return this journal's {@link FailurePolicy} + */ + FailurePolicy failurePolicy(); + + /** + * @return journal flush (sync) mode + */ + FlushMode flushMode(); + + /** + * @return milliseconds between journal flushes + */ + int flushPeriod(); + + /** + * @return milliseconds to block writes for while waiting for a slow disk flush to complete + * when in {@link FlushMode#PERIODIC} mode + */ + int periodicFlushLagBlock(); + + /** + * @return user provided version to use for key and value serialization + */ + int userVersion(); +} diff --git a/src/java/org/apache/cassandra/exceptions/ChecksumMismatchException.java b/src/java/org/apache/cassandra/journal/RecordConsumer.java similarity index 74% rename from src/java/org/apache/cassandra/exceptions/ChecksumMismatchException.java rename to src/java/org/apache/cassandra/journal/RecordConsumer.java index a76c46c782bc..2a1adb9fa42d 100644 --- a/src/java/org/apache/cassandra/exceptions/ChecksumMismatchException.java +++ b/src/java/org/apache/cassandra/journal/RecordConsumer.java @@ -15,20 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.cassandra.journal; -package org.apache.cassandra.exceptions; +import java.nio.ByteBuffer; -import java.io.IOException; +import org.agrona.collections.IntHashSet; -public class ChecksumMismatchException extends IOException +@FunctionalInterface +public interface RecordConsumer { - public ChecksumMismatchException() - { - super(); - } - - public ChecksumMismatchException(String s) - { - super(s); - } + void accept(K key, ByteBuffer buffer, IntHashSet hosts, int userVersion); } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java new file mode 100644 index 000000000000..e63f6f2cad21 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.nio.ByteBuffer; + +import accord.utils.Invariants; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.utils.*; +import org.apache.cassandra.utils.concurrent.RefCounted; + +abstract class Segment implements Closeable, RefCounted> +{ + final File file; + final Descriptor descriptor; + final SyncedOffsets syncedOffsets; + final Metadata metadata; + final KeySupport keySupport; + + ByteBuffer buffer; + + Segment(Descriptor descriptor, SyncedOffsets syncedOffsets, Metadata metadata, KeySupport keySupport) + { + this.file = descriptor.fileFor(Component.DATA); + this.descriptor = descriptor; + this.syncedOffsets = syncedOffsets; + this.metadata = metadata; + this.keySupport = keySupport; + } + + abstract Index index(); + + /* + * Reading entries (by id, by offset, iterate) + */ + + boolean read(K id, RecordConsumer consumer) + { + int offset = index().lookUpFirst(id); + if (offset == -1) + return false; + + EntrySerializer.EntryHolder into = new EntrySerializer.EntryHolder<>(); + if (read(offset, into)) + { + Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); + consumer.accept(id, into.value, into.hosts, descriptor.userVersion); + return true; + } + return false; + } + + boolean read(K id, EntrySerializer.EntryHolder into) + { + int offset = index().lookUpFirst(id); + if (offset == -1 || !read(offset, into)) + return false; + Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); + return true; + } + + abstract boolean read(int offset, EntrySerializer.EntryHolder into); +} diff --git a/src/java/org/apache/cassandra/journal/SegmentWriter.java b/src/java/org/apache/cassandra/journal/SegmentWriter.java new file mode 100644 index 000000000000..852e955b21e3 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/SegmentWriter.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Set; + +import com.google.common.primitives.Ints; + +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileOutputStreamPlus; +import org.apache.cassandra.io.util.TrackedDataOutputPlus; + +final class SegmentWriter implements Closeable +{ + private final Descriptor descriptor; + private final KeySupport keySupport; + + private final InMemoryIndex index; + private final Metadata metadata; + + private final File file; + private FileOutputStreamPlus untrackedOut; + private TrackedDataOutputPlus trackedOut; + + private SegmentWriter(Descriptor descriptor, KeySupport keySupport) + { + this.descriptor = descriptor; + this.keySupport = keySupport; + + index = InMemoryIndex.create(keySupport); + metadata = Metadata.create(); + + file = descriptor.fileFor(Component.DATA); + try + { + untrackedOut = new FileOutputStreamPlus(file); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); + } + trackedOut = TrackedDataOutputPlus.wrap(untrackedOut); + } + + static SegmentWriter create(Descriptor descriptor, KeySupport keySupport) + { + return new SegmentWriter<>(descriptor, keySupport); + } + + int write(K key, ByteBuffer record, Set hosts) + { + int position = position(); + try + { + index.update(key, position); + metadata.update(hosts); + + EntrySerializer.write(key, record, hosts, keySupport, trackedOut, descriptor.userVersion); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); + } + return position; + } + + int position() + { + return Ints.checkedCast(trackedOut.position()); + } + + @Override + public void close() + { + try + { + untrackedOut.flush(); + untrackedOut.sync(); + untrackedOut.close(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); + } + + try (SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor, true)) + { + syncedOffsets.mark(position()); + } + + index.persist(descriptor); + metadata.persist(descriptor); + + untrackedOut = null; + trackedOut = null; + } +} diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java new file mode 100644 index 000000000000..8873ea7b3d85 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; + +import accord.utils.Invariants; +import org.apache.cassandra.utils.concurrent.Refs; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; + +/** + * Consistent, immutable view of active + static segments + *

+ * TODO: an interval/range structure for StaticSegment lookup based on min/max key bounds + */ +class Segments +{ + // active segments, containing unflushed data; the tail of this queue is the one we allocate writes from + private final List> activeSegments; + + // finalised segments, no longer written to + private final Map> staticSegments; + + // cached Iterable of concatenated active and static segments + private final Iterable> allSegments; + + Segments(List> activeSegments, Map> staticSegments) + { + this.activeSegments = activeSegments; + this.staticSegments = staticSegments; + this.allSegments = Iterables.concat(onlyActive(), onlyStatic()); + } + + static Segments ofStatic(Collection> segments) + { + HashMap> staticSegments = + Maps.newHashMapWithExpectedSize(segments.size()); + for (StaticSegment segment : segments) + staticSegments.put(segment.descriptor, segment); + return new Segments<>(new ArrayList<>(), staticSegments); + } + + static Segments none() + { + return new Segments<>(Collections.emptyList(), Collections.emptyMap()); + } + + Segments withNewActiveSegment(ActiveSegment activeSegment) + { + ArrayList> newActiveSegments = + new ArrayList<>(activeSegments.size() + 1); + newActiveSegments.addAll(activeSegments); + newActiveSegments.add(activeSegment); + return new Segments<>(newActiveSegments, staticSegments); + } + + Segments withCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) + { + Invariants.checkArgument(activeSegment.descriptor.equals(staticSegment.descriptor)); + + ArrayList> newActiveSegments = + new ArrayList<>(activeSegments.size() - 1); + for (ActiveSegment segment : activeSegments) + if (segment != activeSegment) + newActiveSegments.add(segment); + Invariants.checkState(newActiveSegments.size() == activeSegments.size() - 1); + + HashMap> newStaticSegments = + Maps.newHashMapWithExpectedSize(staticSegments.size() + 1); + newStaticSegments.putAll(staticSegments); + if (newStaticSegments.put(staticSegment.descriptor, staticSegment) != null) + throw new IllegalStateException(); + + return new Segments<>(newActiveSegments, newStaticSegments); + } + + Segments withCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + { + Invariants.checkArgument(oldSegment.descriptor.timestamp == newSegment.descriptor.timestamp); + Invariants.checkArgument(oldSegment.descriptor.generation < newSegment.descriptor.generation); + + HashMap> newStaticSegments = new HashMap<>(staticSegments); + if (!newStaticSegments.remove(oldSegment.descriptor, oldSegment)) + throw new IllegalStateException(); + if (null != newStaticSegments.put(newSegment.descriptor, newSegment)) + throw new IllegalStateException(); + + return new Segments<>(activeSegments, newStaticSegments); + } + + Segments withoutInvalidatedSegment(StaticSegment staticSegment) + { + HashMap> newStaticSegments = new HashMap<>(staticSegments); + if (!newStaticSegments.remove(staticSegment.descriptor, staticSegment)) + throw new IllegalStateException(); + return new Segments<>(activeSegments, newStaticSegments); + } + + Iterable> all() + { + return allSegments; + } + + Collection> onlyActive() + { + return activeSegments; + } + + Collection> onlyStatic() + { + return staticSegments.values(); + } + + /** + * Select segments that could potentially have an entry with the specified id and + * attempt to grab references to them all. + * + * @return a subset of segments with references to them, or {@code null} if failed to grab the refs + */ + @SuppressWarnings("resource") + ReferencedSegments selectAndReference(K id) + { + List> selectedActive = null; + for (ActiveSegment segment : onlyActive()) + { + if (segment.index.mayContainId(id)) + { + if (null == selectedActive) + selectedActive = new ArrayList<>(); + selectedActive.add(segment); + } + } + if (null == selectedActive) selectedActive = emptyList(); + + Map> selectedStatic = null; + for (StaticSegment segment : onlyStatic()) + { + if (segment.index().mayContainId(id)) + { + if (null == selectedStatic) + selectedStatic = new HashMap<>(); + selectedStatic.put(segment.descriptor, segment); + } + } + if (null == selectedStatic) selectedStatic = emptyMap(); + + Refs> refs = null; + if (!selectedActive.isEmpty() || !selectedStatic.isEmpty()) + { + refs = Refs.tryRef(Iterables.concat(selectedActive, selectedStatic.values())); + if (null == refs) + return null; + } + return new ReferencedSegments<>(selectedActive, selectedStatic, refs); + } + + static class ReferencedSegments extends Segments implements AutoCloseable + { + public final Refs> refs; + + ReferencedSegments( + List> activeSegments, Map> staticSegments, Refs> refs) + { + super(activeSegments, staticSegments); + this.refs = refs; + } + + @Override + public void close() + { + if (null != refs) + refs.release(); + } + } +} diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java new file mode 100644 index 000000000000..52b8d954e155 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.NoSuchFileException; +import java.nio.file.StandardOpenOption; +import java.util.*; + +import org.agrona.collections.IntHashSet; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.Closeable; +import org.apache.cassandra.utils.concurrent.Ref; + +/** + * An immutable data segment that is no longer written to. + *

+ * Can be compacted with input from {@code PersistedInvalidations} into a new smaller segment, + * with invalidated entries removed. + */ +final class StaticSegment extends Segment +{ + final FileChannel channel; + + private final Ref> selfRef; + + private final OnDiskIndex index; + + private StaticSegment(Descriptor descriptor, + FileChannel channel, + MappedByteBuffer buffer, + SyncedOffsets syncedOffsets, + OnDiskIndex index, + Metadata metadata, + KeySupport keySupport) + { + super(descriptor, syncedOffsets, metadata, keySupport); + this.index = index; + + this.channel = channel; + this.buffer = buffer; + + selfRef = new Ref<>(this, new Tidier<>(descriptor, channel, buffer, index)); + } + + /** + * Loads all segments matching the supplied desctiptors + * + * @param descriptors descriptors of the segments to load + * @return list of the loaded segments + */ + static List> open(Collection descriptors, KeySupport keySupport) + { + List> segments = new ArrayList<>(descriptors.size()); + for (Descriptor descriptor : descriptors) + segments.add(open(descriptor, keySupport)); + return segments; + } + + /** + * Load the segment corresponding to the provided desrciptor + * + * @param descriptor descriptor of the segment to load + * @return the loaded segment + */ + @SuppressWarnings({ "resource", "RedundantSuppression" }) + static StaticSegment open(Descriptor descriptor, KeySupport keySupport) + { + if (!Component.DATA.existsFor(descriptor)) + throw new IllegalArgumentException("Data file for segment " + descriptor + " doesn't exist"); + + SyncedOffsets syncedOffsets = Component.SYNCED_OFFSETS.existsFor(descriptor) + ? SyncedOffsets.load(descriptor) + : SyncedOffsets.absent(); + + Metadata metadata = Component.METADATA.existsFor(descriptor) + ? Metadata.load(descriptor) + : Metadata.rebuildAndPersist(descriptor, keySupport, syncedOffsets.syncedOffset()); + + OnDiskIndex index = Component.INDEX.existsFor(descriptor) + ? OnDiskIndex.open(descriptor, keySupport) + : OnDiskIndex.rebuildAndPersist(descriptor, keySupport, syncedOffsets.syncedOffset()); + + try + { + return internalOpen(descriptor, syncedOffsets, index, metadata, keySupport); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, Component.DATA, e); + } + } + + @SuppressWarnings("resource") + private static StaticSegment internalOpen( + Descriptor descriptor, SyncedOffsets syncedOffsets, OnDiskIndex index, Metadata metadata, KeySupport keySupport) + throws IOException + { + File file = descriptor.fileFor(Component.DATA); + FileChannel channel = FileChannel.open(file.toPath(), StandardOpenOption.READ); + MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); + return new StaticSegment<>(descriptor, channel, buffer, syncedOffsets, index, metadata, keySupport); + } + + @Override + public void close() + { + selfRef.release(); + } + + @Override + public Ref> tryRef() + { + return selfRef.tryRef(); + } + + @Override + public Ref> ref() + { + return selfRef.ref(); + } + + private static final class Tidier implements Tidy + { + private final Descriptor descriptor; + private final FileChannel channel; + private final ByteBuffer buffer; + private final Index index; + + Tidier(Descriptor descriptor, FileChannel channel, ByteBuffer buffer, Index index) + { + this.descriptor = descriptor; + this.channel = channel; + this.buffer = buffer; + this.index = index; + } + + @Override + public void tidy() + { + FileUtils.clean(buffer); + FileUtils.closeQuietly(channel); + index.close(); + } + + @Override + public String name() + { + return descriptor.toString(); + } + } + + @Override + OnDiskIndex index() + { + return index; + } + + /** + * Read the entry and specified offset into the entry holder. + * Expects the record to have been written at this offset, but potentially not flushed and lost. + */ + @Override + boolean read(int offset, EntrySerializer.EntryHolder into) + { + ByteBuffer duplicate = (ByteBuffer) buffer.duplicate().position(offset); + try (DataInputBuffer in = new DataInputBuffer(duplicate, false)) + { + return EntrySerializer.tryRead(into, keySupport, duplicate, in, syncedOffsets.syncedOffset(), descriptor.userVersion); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + } + + /** + * Iterate over and invoke the supplied callback on every record. + */ + void forEachRecord(RecordConsumer consumer) + { + try (SequentialReader reader = reader(descriptor, keySupport, syncedOffsets.syncedOffset())) + { + while (reader.advance()) + { + consumer.accept(reader.id(), reader.record(), reader.hosts(), descriptor.userVersion); + } + } + } + + /* + * Sequential reading (replay and components rebuild) + */ + + static SequentialReader reader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + return SequentialReader.open(descriptor, keySupport, fsyncedLimit); + } + + /** + * A sequential data segment reader to use for journal replay and rebuilding + * missing auxilirary components (index and metadata). + *

+ * Unexpected EOF and CRC mismatches in synced portions of segments are treated + * strictly, throwing {@link JournalReadError}. Errors encountered in unsynced portions + * of segments are treated as segment EOF. + */ + static final class SequentialReader implements Closeable + { + private final Descriptor descriptor; + private final KeySupport keySupport; + private final int fsyncedLimit; // exclusive + + private final File file; + private final FileChannel channel; + private final MappedByteBuffer buffer; + private final DataInputBuffer in; + + private int offset = -1; + private final EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + private State state = State.RESET; + + static SequentialReader open(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + return new SequentialReader<>(descriptor, keySupport, fsyncedLimit); + } + + SequentialReader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + this.descriptor = descriptor; + this.keySupport = keySupport; + this.fsyncedLimit = fsyncedLimit; + + file = descriptor.fileFor(Component.DATA); + try + { + channel = file.newReadChannel(); + buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); + } + catch (NoSuchFileException e) + { + throw new IllegalArgumentException("Data file for segment " + descriptor + " doesn't exist"); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + in = new DataInputBuffer(buffer, false); + } + + @Override + public void close() + { + FileUtils.closeQuietly(channel); + FileUtils.clean(buffer); + } + + int offset() + { + ensureHasAdvanced(); + return offset; + } + + K id() + { + ensureHasAdvanced(); + return holder.key; + } + + IntHashSet hosts() + { + ensureHasAdvanced(); + return holder.hosts; + } + + ByteBuffer record() + { + ensureHasAdvanced(); + return holder.value; + } + + private void ensureHasAdvanced() + { + if (state != State.ADVANCED) + throw new IllegalStateException("Must call advance() before accessing entry content"); + } + + boolean advance() + { + if (state == State.EOF) + return false; + + reset(); + return buffer.hasRemaining() ? doAdvance() : eof(); + } + + private boolean doAdvance() + { + offset = buffer.position(); + try + { + if (!EntrySerializer.tryRead(holder, keySupport, buffer, in, fsyncedLimit, descriptor.userVersion)) + return eof(); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + + state = State.ADVANCED; + return true; + } + + private void reset() + { + offset = -1; + holder.clear(); + state = State.RESET; + } + + private boolean eof() + { + state = State.EOF; + return false; + } + + enum State { RESET, ADVANCED, EOF } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/journal/SyncedOffsets.java b/src/java/org/apache/cassandra/journal/SyncedOffsets.java new file mode 100644 index 000000000000..bee302d6d867 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/SyncedOffsets.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.NoSuchFileException; +import java.util.zip.CRC32; + +import org.apache.cassandra.io.FSWriteError; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileOutputStreamPlus; +import org.apache.cassandra.io.util.RandomAccessReader; +import org.apache.cassandra.utils.Closeable; +import org.apache.cassandra.utils.Crc; + +import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; + +/** + * Keeps track of fsynced limits of a data file. Enables us to treat invalid + * records that are known to have been fsynced to disk differently from those + * that aren't. + *

+ * On disk representation is a sequence of 2-int tuples of {synced offset, CRC32(synced offset)} + */ +interface SyncedOffsets extends Closeable +{ + /** + * @return furthest known synced offset + */ + int syncedOffset(); + + /** + * Record an offset as synced to disk. + * + * @param offset the offset into datafile, up to which contents have been fsynced (exclusive) + */ + void mark(int offset); + + @Override + default void close() + { + } + + /** + * @return a disk-backed synced offset tracker for a new {@link ActiveSegment} + */ + static Active active(Descriptor descriptor, boolean syncOnMark) + { + return new Active(descriptor, syncOnMark); + } + + /** + * Load an existing log of synced offsets from disk into an immutable instance. + */ + static Static load(Descriptor descriptor) + { + return Static.load(descriptor); + } + + /** + * @return a placeholder instance in case this component is missing + */ + static Absent absent() + { + return Absent.INSTANCE; + } + + /** + * Single-threaded, file-based list of synced offsets. + */ + final class Active implements SyncedOffsets + { + private final Descriptor descriptor; + private final boolean syncOnMark; + + private final FileOutputStreamPlus output; + private volatile int syncedOffset; + + private Active(Descriptor descriptor, boolean syncOnMark) + { + this.descriptor = descriptor; + this.syncOnMark = syncOnMark; + + File file = descriptor.fileFor(Component.SYNCED_OFFSETS); + if (file.exists()) + throw new IllegalArgumentException("Synced offsets file " + file + " already exists"); + + try + { + output = file.newOutputStream(File.WriteMode.OVERWRITE); + } + catch (UncheckedIOException | FSWriteError e) + { + // extract original cause and throw as JournalWriteError + throw new JournalWriteError(descriptor, file, e.getCause()); + } + catch (NoSuchFileException e) + { + throw new AssertionError(); // unreachable + } + } + + @Override + public int syncedOffset() + { + return syncedOffset; + } + + @Override + public void mark(int offset) + { + if (offset < syncedOffset) + throw new IllegalArgumentException("offset " + offset + " is smaller than previous mark " + offset); + + CRC32 crc = Crc.crc32(); + updateChecksumInt(crc, offset); + + try + { + output.writeInt(offset); + output.writeInt((int) crc.getValue()); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, Component.SYNCED_OFFSETS, e); + } + + syncedOffset = offset; + if (syncOnMark) sync(); + } + + private void sync() + { + try + { + output.sync(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, Component.SYNCED_OFFSETS, e); + } + } + + @Override + public void close() + { + if (!syncOnMark) sync(); + + try + { + output.close(); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, Component.SYNCED_OFFSETS, e); + } + } + } + + final class Static implements SyncedOffsets + { + private final int syncedOffset; + + static Static load(Descriptor descriptor) + { + File file = descriptor.fileFor(Component.SYNCED_OFFSETS); + if (!file.exists()) + throw new IllegalArgumentException("Synced offsets file " + file + " doesn't exist"); + + int syncedOffset = 0; + try (RandomAccessReader reader = RandomAccessReader.open(file)) + { + CRC32 crc = Crc.crc32(); + while (reader.bytesRemaining() >= 8) + { + int offset = reader.readInt(); + updateChecksumInt(crc, offset); + int readCrc = reader.readInt(); + if (readCrc != (int) crc.getValue()) + break; + syncedOffset = offset; + Crc.initialize(crc); + } + } + catch (Throwable t) + { + throw new JournalReadError(descriptor, file, t); + } + + return new Static(syncedOffset); + } + + Static(int offset) + { + this.syncedOffset = offset; + } + + @Override + public int syncedOffset() + { + return syncedOffset; + } + + @Override + public void mark(int offset) + { + throw new UnsupportedOperationException(); + } + } + + final class Absent implements SyncedOffsets + { + static final Absent INSTANCE = new Absent(); + + @Override + public int syncedOffset() + { + return 0; + } + + @Override + public void mark(int offset) + { + throw new UnsupportedOperationException(); + } + } +} diff --git a/src/java/org/apache/cassandra/journal/ValueSerializer.java b/src/java/org/apache/cassandra/journal/ValueSerializer.java new file mode 100644 index 000000000000..f004c3a37aa7 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/ValueSerializer.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public interface ValueSerializer +{ + int serializedSize(V value, int userVersion); + + void serialize(V value, DataOutputPlus out, int userVersion) throws IOException; + + /** + * Deserialize the value given the key is known. Allows to avoid serializing + * redundant information in values, if it can be derived from keys. + */ + V deserialize(K key, DataInputPlus in, int userVersion) throws IOException; +} diff --git a/src/java/org/apache/cassandra/journal/package-info.java b/src/java/org/apache/cassandra/journal/package-info.java new file mode 100644 index 000000000000..5ae20b9274ad --- /dev/null +++ b/src/java/org/apache/cassandra/journal/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * TODO + */ +package org.apache.cassandra.journal; diff --git a/src/java/org/apache/cassandra/net/AbstractMessageHandler.java b/src/java/org/apache/cassandra/net/AbstractMessageHandler.java index 5b5b8b7f1ad7..895b277ad524 100644 --- a/src/java/org/apache/cassandra/net/AbstractMessageHandler.java +++ b/src/java/org/apache/cassandra/net/AbstractMessageHandler.java @@ -33,6 +33,7 @@ import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.channel.EventLoop; +import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.metrics.ClientMetrics; import org.apache.cassandra.net.FrameDecoder.CorruptFrame; import org.apache.cassandra.net.FrameDecoder.Frame; @@ -43,7 +44,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.cassandra.net.Crc.InvalidCrc; +import static org.apache.cassandra.utils.Crc.InvalidCrc; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; /** diff --git a/src/java/org/apache/cassandra/net/FrameDecoderCrc.java b/src/java/org/apache/cassandra/net/FrameDecoderCrc.java index 2a54f5f6636f..86f444245398 100644 --- a/src/java/org/apache/cassandra/net/FrameDecoderCrc.java +++ b/src/java/org/apache/cassandra/net/FrameDecoderCrc.java @@ -24,7 +24,7 @@ import io.netty.channel.ChannelPipeline; -import static org.apache.cassandra.net.Crc.*; +import static org.apache.cassandra.utils.Crc.*; /** * Framing format that protects integrity of data in movement with CRCs (of both header and payload). diff --git a/src/java/org/apache/cassandra/net/FrameDecoderLZ4.java b/src/java/org/apache/cassandra/net/FrameDecoderLZ4.java index 9cc100586e23..7dafb7fedf7c 100644 --- a/src/java/org/apache/cassandra/net/FrameDecoderLZ4.java +++ b/src/java/org/apache/cassandra/net/FrameDecoderLZ4.java @@ -26,7 +26,7 @@ import net.jpountz.lz4.LZ4Factory; import net.jpountz.lz4.LZ4SafeDecompressor; -import static org.apache.cassandra.net.Crc.*; +import static org.apache.cassandra.utils.Crc.*; /** * Framing format that compresses payloads with LZ4, and protects integrity of data in movement with CRCs diff --git a/src/java/org/apache/cassandra/net/FrameEncoderCrc.java b/src/java/org/apache/cassandra/net/FrameEncoderCrc.java index 364624816526..75b84aa14f53 100644 --- a/src/java/org/apache/cassandra/net/FrameEncoderCrc.java +++ b/src/java/org/apache/cassandra/net/FrameEncoderCrc.java @@ -24,7 +24,7 @@ import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandler; -import static org.apache.cassandra.net.Crc.*; +import static org.apache.cassandra.utils.Crc.*; /** * Please see {@link FrameDecoderCrc} for description of the framing produced by this encoder. diff --git a/src/java/org/apache/cassandra/net/FrameEncoderLZ4.java b/src/java/org/apache/cassandra/net/FrameEncoderLZ4.java index 75f15c726b79..423e377a2770 100644 --- a/src/java/org/apache/cassandra/net/FrameEncoderLZ4.java +++ b/src/java/org/apache/cassandra/net/FrameEncoderLZ4.java @@ -28,7 +28,7 @@ import org.apache.cassandra.io.compress.BufferType; import org.apache.cassandra.utils.ByteBufferUtil; -import static org.apache.cassandra.net.Crc.*; +import static org.apache.cassandra.utils.Crc.*; /** * Please see {@link FrameDecoderLZ4} for description of the framing produced by this encoder. diff --git a/src/java/org/apache/cassandra/net/HandshakeProtocol.java b/src/java/org/apache/cassandra/net/HandshakeProtocol.java index 3217aeae8a8d..085c8b966c4c 100644 --- a/src/java/org/apache/cassandra/net/HandshakeProtocol.java +++ b/src/java/org/apache/cassandra/net/HandshakeProtocol.java @@ -35,7 +35,7 @@ import static org.apache.cassandra.locator.InetAddressAndPort.Serializer.inetAddressAndPortSerializer; import static org.apache.cassandra.net.MessagingService.VERSION_40; import static org.apache.cassandra.net.Message.validateLegacyProtocolMagic; -import static org.apache.cassandra.net.Crc.*; +import static org.apache.cassandra.utils.Crc.*; import static org.apache.cassandra.net.OutboundConnectionSettings.*; /** diff --git a/src/java/org/apache/cassandra/net/InboundMessageHandler.java b/src/java/org/apache/cassandra/net/InboundMessageHandler.java index edee108bb0d3..6c697d4848b8 100644 --- a/src/java/org/apache/cassandra/net/InboundMessageHandler.java +++ b/src/java/org/apache/cassandra/net/InboundMessageHandler.java @@ -40,6 +40,7 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tracing.TraceState; import org.apache.cassandra.tracing.Tracing; +import org.apache.cassandra.utils.Crc; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.NoSpamLogger; diff --git a/src/java/org/apache/cassandra/net/InboundMessageHandlers.java b/src/java/org/apache/cassandra/net/InboundMessageHandlers.java index c7b946350d09..f1b98bdbbe31 100644 --- a/src/java/org/apache/cassandra/net/InboundMessageHandlers.java +++ b/src/java/org/apache/cassandra/net/InboundMessageHandlers.java @@ -30,6 +30,7 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.InternodeInboundMetrics; import org.apache.cassandra.net.Message.Header; +import org.apache.cassandra.utils.Crc; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; diff --git a/src/java/org/apache/cassandra/net/OutboundMessageQueue.java b/src/java/org/apache/cassandra/net/OutboundMessageQueue.java index 8280055e6891..5098f32bc9e4 100644 --- a/src/java/org/apache/cassandra/net/OutboundMessageQueue.java +++ b/src/java/org/apache/cassandra/net/OutboundMessageQueue.java @@ -27,6 +27,7 @@ import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/java/org/apache/cassandra/net/SocketFactory.java b/src/java/org/apache/cassandra/net/SocketFactory.java index 78256267ac67..bb4013dee59f 100644 --- a/src/java/org/apache/cassandra/net/SocketFactory.java +++ b/src/java/org/apache/cassandra/net/SocketFactory.java @@ -60,6 +60,7 @@ import io.netty.util.concurrent.ThreadPerTaskExecutor; import io.netty.util.internal.logging.InternalLoggerFactory; import io.netty.util.internal.logging.Slf4JLoggerFactory; +import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.NamedThreadFactory; import org.apache.cassandra.config.EncryptionOptions; import org.apache.cassandra.locator.InetAddressAndPort; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 45208e5c343c..235cc1be571b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord; import accord.api.Agent; @@ -23,20 +22,93 @@ import accord.api.ProgressLog; import accord.local.CommandStores; import accord.local.NodeTimeService; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; import accord.local.ShardDistributor; +import accord.primitives.Routables; import accord.topology.Topology; +import accord.utils.MapReduceConsume; import accord.utils.RandomSource; +import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.journal.AsyncWriteCallback; public class AccordCommandStores extends CommandStores { - private long cacheSize; + private final AccordJournal journal; + AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, - ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory) + ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, AccordJournal journal) { super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore::new); + this.journal = journal; setCacheSize(maxCacheSize()); } + static Factory factory(AccordJournal journal) + { + return (time, agent, store, random, shardDistributor, progressLogFactory) -> + new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, journal); + } + + @Override + public synchronized void shutdown() + { + super.shutdown(); + journal.shutdown(); + //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) + } + + @Override + protected void mapReduceConsume( + PreLoadContext context, + Routables keys, + long minEpoch, + long maxEpoch, + MapReduceConsume mapReduceConsume) + { + // append PreAccept, Accept, Commit, and Apply messages durably to AccordJournal before processing + if (journal.mustMakeDurable(context)) + mapReduceConsumeDurable(context, keys, minEpoch, maxEpoch, mapReduceConsume); + else + super.mapReduceConsume(context, keys, minEpoch, maxEpoch, mapReduceConsume); + } + + private void mapReduceConsumeDurable( + PreLoadContext context, + Routables keys, + long minEpoch, + long maxEpoch, + MapReduceConsume mapReduceConsume) + { + journal.append(context, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() + { + @Override + public void run() + { + // TODO (performance, expected): do not retain references to messages beyond a certain total + // cache threshold; in case of flush lagging behind, read the messages from journal and + // deserialize instead before processing, to prevent memory pressure buildup from messages + // pending flush to disk. + AccordCommandStores.super.mapReduceConsume(context, keys, minEpoch, maxEpoch, mapReduceConsume); + } + + @Override + public void onFailure(Throwable error) + { + mapReduceConsume.accept(null, error); + } + }); + } + + @Override + public synchronized void updateTopology(Topology newTopology) + { + super.updateTopology(newTopology); + refreshCacheSizes(); + } + + private long cacheSize; + synchronized void setCacheSize(long bytes) { cacheSize = bytes; @@ -56,18 +128,4 @@ private static long maxCacheSize() { return 5 << 20; // TODO (required): make configurable } - - @Override - public synchronized void updateTopology(Topology newTopology) - { - super.updateTopology(newTopology); - refreshCacheSizes(); - } - - @Override - public synchronized void shutdown() - { - super.shutdown(); - //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) - } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java new file mode 100644 index 000000000000..382ab1e65906 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -0,0 +1,499 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.Executor; +import java.util.zip.Checksum; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Ints; + +import accord.local.Node.Id; +import accord.local.PreLoadContext; +import accord.messages.Accept; +import accord.messages.Apply; +import accord.messages.Commit; +import accord.messages.MessageType; +import accord.messages.PreAccept; +import accord.messages.TxnRequest; +import accord.primitives.*; +import accord.utils.Invariants; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.journal.AsyncWriteCallback; +import org.apache.cassandra.journal.Journal; +import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.journal.Params; +import org.apache.cassandra.journal.ValueSerializer; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.serializers.AcceptSerializers; +import org.apache.cassandra.service.accord.serializers.ApplySerializers; +import org.apache.cassandra.service.accord.serializers.CommitSerializers; +import org.apache.cassandra.service.accord.serializers.EnumSerializer; +import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; + +import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; +import static org.apache.cassandra.db.TypeSizes.INT_SIZE; +import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumLong; + +public class AccordJournal +{ + private static final Set SENTINEL_HOSTS = Collections.singleton(0); + + static final Params PARAMS = new Params() + { + @Override + public int segmentSize() + { + return 32 << 20; + } + + @Override + public FailurePolicy failurePolicy() + { + return FailurePolicy.STOP; + } + + @Override + public FlushMode flushMode() + { + return FlushMode.GROUP; + } + + @Override + public int flushPeriod() + { + return 1000; + } + + @Override + public int periodicFlushLagBlock() + { + return 1500; + } + + @Override + public int userVersion() + { + /* + * NOTE: when accord journal version gets bumped, expose it via yaml. + * This way operators can force previous version on upgrade, temporarily, + * to allow easier downgrades if something goes wrong. + */ + return 1; + } + }; + + final File directory; + final Journal> journal; + + @VisibleForTesting + public AccordJournal() + { + directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); + journal = new Journal<>("AccordJournal", directory, PARAMS, Key.SUPPORT, MESSAGE_SERIALIZER); + } + + public AccordJournal start() + { + // journal.start(); TODO: re-enable + return this; + } + + public void shutdown() + { + // journal.shutdown(); TODO: re-enable + } + + boolean mustMakeDurable(PreLoadContext context) + { + return false; + // return context instanceof TxnRequest && Type.mustMakeDurable((TxnRequest) context); TODO: re-enable + } + + public void append(PreLoadContext context, Executor executor, AsyncWriteCallback callback) + { + append((TxnRequest) context, executor, callback); + } + + public void append(TxnRequest message, Executor executor, AsyncWriteCallback callback) + { + Key key = new Key(message.txnId, Type.fromMsgType(message.type())); + journal.asyncWrite(key, message, SENTINEL_HOSTS, executor, callback); + } + + public TxnRequest read(TxnId txnId, Type type) + { + Key key = new Key(txnId, type); + return journal.read(key); + } + + PreAccept readPreAccept(TxnId txnId) + { + return (PreAccept) read(txnId, Type.PREACCEPT_REQ); + } + + Accept readAccept(TxnId txnId) + { + return (Accept) read(txnId, Type.ACCEPT_REQ); + } + + Commit readCommit(TxnId txnId) + { + return (Commit) read(txnId, Type.COMMIT_REQ); + } + + Apply readApply(TxnId txnId) + { + return (Apply) read(txnId, Type.APPLY_REQ); + } + + static class Key + { + final TxnId txnId; + final Type type; + + Key(TxnId txnId, Type type) + { + this.txnId = txnId; + this.type = type; + } + + /** + * Support for (de)serializing and comparing record keys. + *

+ * Implements its own serialization and comparison for {@link TxnId} to satisty + * {@link KeySupport} contract - puts hybrid logical clock ahead of epoch + * when ordering txn ids. This is done for more precise elimination of candidate + * segments by min/max record key in segment. + */ + static final KeySupport SUPPORT = new KeySupport() + { + private static final int HLC_OFFSET = 0; + private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; + private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; + private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; + + @Override + public int serializedSize(int version) + { + return LONG_SIZE // txnId.hlc() + + 6 // txnId.epoch() + + 2 // txnId.flags() + + INT_SIZE // txnId.node + + BYTE_SIZE; // type + } + + @Override + public void serialize(Key key, DataOutputPlus out, int version) throws IOException + { + serializeTxnId(key.txnId, out); + out.writeByte(key.type.id); + } + + private void serializeTxnId(TxnId txnId, DataOutputPlus out) throws IOException + { + out.writeLong(txnId.hlc()); + out.writeLong(epochAndFlags(txnId)); + out.writeInt(txnId.node.id); + } + + @Override + public Key deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = deserializeTxnId(in); + int type = in.readByte(); + return new Key(txnId, Type.fromId(type)); + } + + private TxnId deserializeTxnId(DataInputPlus in) throws IOException + { + long hlc = in.readLong(); + long epochAndFlags = in.readLong(); + int nodeId = in.readInt(); + return TxnId.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + } + + @Override + public Key deserialize(ByteBuffer buffer, int position, int version) + { + TxnId txnId = deserializeTxnId(buffer, position); + int type = buffer.get(position + TYPE_OFFSET); + return new Key(txnId, Type.fromId(type)); + } + + private TxnId deserializeTxnId(ByteBuffer buffer, int position) + { + long hlc = buffer.getLong(position + HLC_OFFSET); + long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); + int nodeId = buffer.getInt(position + NODE_OFFSET); + return TxnId.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + } + + @Override + public void updateChecksum(Checksum crc, Key key, int version) + { + updateChecksum(crc, key.txnId); + crc.update(key.type.id & 0xFF); + } + + private void updateChecksum(Checksum crc, TxnId txnId) + { + updateChecksumLong(crc, txnId.hlc()); + updateChecksumLong(crc, epochAndFlags(txnId)); + updateChecksumInt(crc, txnId.node.id); + } + + @Override + public int compareWithKeyAt(Key k, ByteBuffer buffer, int position, int version) + { + int cmp = compareWithTxnIdAt(k.txnId, buffer, position); + if (cmp != 0) return cmp; + + byte type = buffer.get(position + TYPE_OFFSET); + cmp = Byte.compare((byte) k.type.id, type); + return cmp; + } + + private int compareWithTxnIdAt(TxnId txnId, ByteBuffer buffer, int position) + { + long hlc = buffer.getLong(position + HLC_OFFSET); + int cmp = Long.compareUnsigned(txnId.hlc(), hlc); + if (cmp != 0) return cmp; + + long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); + cmp = Long.compareUnsigned(epochAndFlags(txnId), epochAndFlags); + if (cmp != 0) return cmp; + + int nodeId = buffer.getInt(position + NODE_OFFSET); + cmp = Integer.compareUnsigned(txnId.node.id, nodeId); + return cmp; + } + + @Override + public int compare(Key k1, Key k2) + { + int cmp = compare(k1.txnId, k2.txnId); + if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); + return cmp; + } + + private int compare(TxnId txnId1, TxnId txnId2) + { + int cmp = Long.compareUnsigned(txnId1.hlc(), txnId2.hlc()); + if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(txnId1), epochAndFlags(txnId2)); + if (cmp == 0) cmp = Integer.compareUnsigned(txnId1.node.id, txnId2.node.id); + return cmp; + } + + private long epochAndFlags(TxnId txnId) + { + return (txnId.epoch() << 16) | (long) txnId.flags(); + } + + private long epoch(long epochAndFlags) + { + return epochAndFlags >>> 16; + } + + private int flags(long epochAndFlags) + { + return (int) (epochAndFlags & ((1 << 16) - 1)); + } + }; + + @Override + public boolean equals(Object other) + { + if (this == other) + return true; + return (other instanceof Key) && equals((Key) other); + } + + boolean equals(Key other) + { + return this.type == other.type && this.txnId.equals(other.txnId); + } + + @Override + public int hashCode() + { + return type.hashCode() + 31 * txnId.hashCode(); + } + + @Override + public String toString() + { + return "Key{" + txnId + ", " + type + '}'; + } + } + + static final ValueSerializer> MESSAGE_SERIALIZER = new ValueSerializer>() + { + @Override + public int serializedSize(TxnRequest message, int version) + { + return Ints.checkedCast(Type.ofMessage(message).serializedSize(message, version)); + } + + @Override + public void serialize(TxnRequest message, DataOutputPlus out, int version) throws IOException + { + Type.ofMessage(message).serialize(message, out, version); + } + + @Override + public TxnRequest deserialize(Key key, DataInputPlus in, int version) throws IOException + { + return key.type.deserialize(in, version); + } + }; + + /** + * Accord Message type - consequently the kind of persisted record. + *

+ * Note: {@link EnumSerializer} is intentionally not being reused here, for two reasons: + * 1. This is an internal enum, fully under our control, not part of an external library + * 2. It's persisted in the record key, so has the additional constraint of being fixed size and + * shouldn't be using varint encoding + */ + public enum Type implements IVersionedSerializer> + { + PREACCEPT_REQ (0, MessageType.PREACCEPT_REQ, PreacceptSerializers.request), + ACCEPT_REQ (1, MessageType.ACCEPT_REQ, AcceptSerializers.request ), + COMMIT_REQ (2, MessageType.COMMIT_REQ, CommitSerializers.request ), + APPLY_REQ (3, MessageType.APPLY_REQ, ApplySerializers.request ); + + final int id; + final MessageType msgType; + final IVersionedSerializer> serializer; + + Type(int id, MessageType msgType, IVersionedSerializer> serializer) + { + if (id < 0) + throw new IllegalArgumentException("Negative Type id " + id); + if (id > Byte.MAX_VALUE) + throw new IllegalArgumentException("Type id doesn't fit in a single byte: " + id); + + this.id = id; + this.msgType = msgType; + + //noinspection unchecked + this.serializer = (IVersionedSerializer>) serializer; + } + + private static final Type[] idToTypeMapping; + private static final Map msgTypeToTypeMap; + + static + { + Type[] types = values(); + + int maxId = -1; + for (Type type : types) + maxId = Math.max(type.id, maxId); + + + Type[] idToType = new Type[maxId + 1]; + for (Type type : types) + { + if (null != idToType[type.id]) + throw new IllegalStateException("Duplicate Type id " + type.id); + idToType[type.id] = type; + } + idToTypeMapping = idToType; + + EnumMap msgTypeToType = new EnumMap<>(MessageType.class); + for (Type type : types) + { + if (null != msgTypeToType.put(type.msgType, type)) + throw new IllegalStateException("Duplicate MessageType " + type.msgType); + } + msgTypeToTypeMap = msgTypeToType; + } + + static Type fromId(int id) + { + if (id < 0 || id >= idToTypeMapping.length) + throw new IllegalArgumentException("Out or range Type id " + id); + Type type = idToTypeMapping[id]; + if (null == type) + throw new IllegalArgumentException("Unknown Type id " + id); + return type; + } + + static Type fromMsgType(MessageType msgType) + { + Type type = msgTypeToTypeMap.get(msgType); + if (null == type) + throw new IllegalArgumentException("Unsupported MessageType " + msgType); + return type; + } + + static Type ofMessage(TxnRequest request) + { + return fromMsgType(request.type()); + } + + static boolean mustMakeDurable(TxnRequest message) + { + return msgTypeToTypeMap.containsKey(message.type()); + } + + @Override + public void serialize(TxnRequest request, DataOutputPlus out, int version) throws IOException + { + serializer.serialize(request, out, msVersion(version)); + } + + @Override + public TxnRequest deserialize(DataInputPlus in, int version) throws IOException + { + return serializer.deserialize(in, msVersion(version)); + } + + @Override + public long serializedSize(TxnRequest request, int version) + { + return serializer.serializedSize(request, msVersion(version)); + } + + static + { + // make noise early if we forget to update our version mappings + Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50); + } + + private static int msVersion(int version) + { + switch (version) + { + default: throw new IllegalArgumentException(); + case 1: return MessagingService.VERSION_50; + } + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index d8b9f4d89b43..4de78375bd14 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -77,7 +77,7 @@ public class AccordService implements IAccordService, Shutdownable private final AccordConfigurationService configService; private final AccordScheduler scheduler; private final AccordVerbHandler verbHandler; - + private static final IAccordService NOOP_SERVICE = new IAccordService() { @Override @@ -148,7 +148,7 @@ private AccordService() scheduler, SizeOfIntersectionSorter.SUPPLIER, SimpleProgressLog::new, - AccordCommandStores::new); + AccordCommandStores.factory(new AccordJournal().start())); this.nodeShutdown = toShutdownable(node); this.verbHandler = new AccordVerbHandler<>(this.node); } diff --git a/src/java/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTracker.java b/src/java/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTracker.java index 41314a2eafe5..06e4881fb4f0 100644 --- a/src/java/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTracker.java +++ b/src/java/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTracker.java @@ -38,7 +38,7 @@ import org.apache.cassandra.service.paxos.Commit; import static org.apache.cassandra.io.util.SequentialWriterOption.FINISH_ON_CLOSE; -import static org.apache.cassandra.net.Crc.crc32; +import static org.apache.cassandra.utils.Crc.crc32; /** * Tracks the highest paxos ballot we've seen, and the lowest ballot we can accept. diff --git a/src/java/org/apache/cassandra/net/Crc.java b/src/java/org/apache/cassandra/utils/Crc.java similarity index 90% rename from src/java/org/apache/cassandra/net/Crc.java rename to src/java/org/apache/cassandra/utils/Crc.java index 8f63e51a9353..f1a31584f364 100644 --- a/src/java/org/apache/cassandra/net/Crc.java +++ b/src/java/org/apache/cassandra/utils/Crc.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.net; +package org.apache.cassandra.utils; import java.io.IOException; import java.nio.ByteBuffer; @@ -48,26 +48,31 @@ public InvalidCrc(int read, int computed) public static CRC32 crc32() { CRC32 crc = crc32.get(); + return initialize(crc); + } + + public static CRC32 initialize(CRC32 crc) + { crc.reset(); - crc.update(initialBytes); + crc.update(initialBytes, 0, initialBytes.length); return crc; } - static int computeCrc32(ByteBuf buffer, int startReaderIndex, int endReaderIndex) + public static int computeCrc32(ByteBuf buffer, int startReaderIndex, int endReaderIndex) { CRC32 crc = crc32(); crc.update(buffer.internalNioBuffer(startReaderIndex, endReaderIndex - startReaderIndex)); return (int) crc.getValue(); } - static int computeCrc32(ByteBuffer buffer, int start, int end) + public static int computeCrc32(ByteBuffer buffer, int start, int end) { CRC32 crc = crc32(); updateCrc32(crc, buffer, start, end); return (int) crc.getValue(); } - static void updateCrc32(CRC32 crc, ByteBuffer buffer, int start, int end) + public static void updateCrc32(CRC32 crc, ByteBuffer buffer, int start, int end) { int savePosition = buffer.position(); int saveLimit = buffer.limit(); @@ -116,7 +121,7 @@ static void updateCrc32(CRC32 crc, ByteBuffer buffer, int start, int end) * @param len the number of bytes, greater than 0 and fewer than 9, to be read from bytes * @return the least-significant bit AND byte order crc24 using the CRC24_POLY polynomial */ - static int crc24(long bytes, int len) + public static int crc24(long bytes, int len) { int crc = CRC24_INIT; while (len-- > 0) diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 79b5c621c6ff..689277742259 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -131,7 +131,7 @@ public class FBUtilities private static volatile String previousReleaseVersionString; - private static int availableProcessors = CASSANDRA_AVAILABLE_PROCESSORS.getInt(DatabaseDescriptor.getAvailableProcessors()); + private static final int availableProcessors = CASSANDRA_AVAILABLE_PROCESSORS.getInt(DatabaseDescriptor.getAvailableProcessors()); private static volatile Supplier systemInfoSupplier = Suppliers.memoize(SystemInfo::new); @@ -1165,20 +1165,26 @@ public static String exec(Map env, Duration timeout, int outBufS { process.destroyForcibly(); logger.error("Command {} did not complete in {}, killed forcibly:\noutput:\n{}\n(truncated {} bytes)\nerror:\n{}\n(truncated {} bytes)", - Arrays.toString(cmd), timeout, out.asString(), outOverflow, err.asString(), errOverflow); + Arrays.toString(cmd), timeout, out.asString(), outOverflow, err.asString(), errOverflow); throw new TimeoutException("Command " + Arrays.toString(cmd) + " did not complete in " + timeout); } int r = process.exitValue(); if (r != 0) { logger.error("Command {} failed with exit code {}:\noutput:\n{}\n(truncated {} bytes)\nerror:\n{}\n(truncated {} bytes)", - Arrays.toString(cmd), r, out.asString(), outOverflow, err.asString(), errOverflow); + Arrays.toString(cmd), r, out.asString(), outOverflow, err.asString(), errOverflow); throw new IOException("Command " + Arrays.toString(cmd) + " failed with exit code " + r); } return out.asString(); } } + public static void updateChecksumShort(Checksum checksum, short v) + { + checksum.update((v >>> 8) & 0xFF); + checksum.update((v >>> 0) & 0xFF); + } + public static void updateChecksumInt(Checksum checksum, int v) { checksum.update((v >>> 24) & 0xFF); @@ -1187,6 +1193,12 @@ public static void updateChecksumInt(Checksum checksum, int v) checksum.update((v >>> 0) & 0xFF); } + public static void updateChecksumLong(Checksum checksum, long v) + { + updateChecksumInt(checksum, (int) (v >>> 32)); + updateChecksumInt(checksum, (int) (v & 0xFFFFFFFFL)); + } + /** * Updates checksum with the provided ByteBuffer at the given offset + length. * Resets position and limit back to their original values on return. @@ -1432,4 +1444,21 @@ public static SystemInfo getSystemInfo() { return systemInfoSupplier.get(); } + + public enum Order { LT, EQ, GT } + public static Order compare(T a, T b, Comparator comparator) + { + int rc = comparator.compare(a, b); + if (rc < 0) return Order.LT; + if (rc == 0) return Order.EQ; + return Order.GT; + } + + public static Order compare(A a, B b, AsymmetricOrdering comparator) + { + int rc = comparator.compareAsymmetric(a, b); + if (rc < 0) return Order.LT; + if (rc == 0) return Order.EQ; + return Order.GT; + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java b/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java index fdd678efd04b..220c7ff9fd71 100644 --- a/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java +++ b/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java @@ -30,10 +30,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; -import org.apache.cassandra.exceptions.UnrecoverableIllegalStateException; -import org.apache.cassandra.metrics.StorageMetrics; -import org.apache.cassandra.service.DiskErrorsHandlerService; -import org.apache.cassandra.tracing.Tracing; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,9 +37,14 @@ import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.UnrecoverableIllegalStateException; import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.sstable.CorruptSSTableException; +import org.apache.cassandra.journal.Params.FailurePolicy; +import org.apache.cassandra.service.DiskErrorsHandlerService; +import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.config.CassandraRelevantProperties.PRINT_HEAP_HISTOGRAM_ON_OUT_OF_MEMORY_ERROR; @@ -94,6 +95,11 @@ public static void inspectCommitLogThrowable(Throwable t) inspectThrowable(t, ex -> DiskErrorsHandlerService.get().inspectCommitLogError(ex)); } + public static void inspectJournalThrowable(Throwable t, String journalName, FailurePolicy failurePolicy) + { + inspectThrowable(t, th -> inspectJournalError(th, journalName, failurePolicy)); + } + public static void inspectThrowable(Throwable t, Consumer fn) throws OutOfMemoryError { boolean isUnstable = false; @@ -128,7 +134,14 @@ else if (t instanceof UnrecoverableIllegalStateException) } // Anything other than an OOM, we should try and heap dump to capture what's going on if configured to do so - HeapUtils.maybeCreateHeapDump(); + try + { + HeapUtils.maybeCreateHeapDump(); + } + catch (Throwable sub) + { + t.addSuppressed(sub); + } if (t instanceof InterruptedException) throw new UncheckedInterruptedException((InterruptedException) t); @@ -189,6 +202,19 @@ private static void forceHeapSpaceOomMaybe(OutOfMemoryError oom) } } + private static void inspectJournalError(Throwable t, String journalName, FailurePolicy failurePolicy) + { + if (!StorageService.instance.isDaemonSetupCompleted()) + { + logger.error("Exiting due to error while processing journal {} during initialization.", journalName, t); + killer.killCurrentJVM(t, true); + } + else if (failurePolicy == FailurePolicy.DIE) + { + killer.killCurrentJVM(t); + } + } + public static void killCurrentJVM(Throwable t, boolean quiet) { killer.killCurrentJVM(t, quiet); diff --git a/src/java/org/apache/cassandra/utils/NoSpamLogger.java b/src/java/org/apache/cassandra/utils/NoSpamLogger.java index 0a13f6b2a5ae..5c37a043d870 100644 --- a/src/java/org/apache/cassandra/utils/NoSpamLogger.java +++ b/src/java/org/apache/cassandra/utils/NoSpamLogger.java @@ -217,6 +217,11 @@ private NoSpamLogger(Logger wrapped, long minInterval, TimeUnit timeUnit) minIntervalNanos = timeUnit.toNanos(minInterval); } + public static NoSpamLogger wrap(Logger wrapped, long minInterval, TimeUnit timeUnit) + { + return new NoSpamLogger(wrapped, minInterval, timeUnit); + } + public boolean info(long nowNanos, String s, Object... objects) { return NoSpamLogger.this.log( Level.INFO, s, nowNanos, objects); diff --git a/src/java/org/apache/cassandra/utils/TimeUUID.java b/src/java/org/apache/cassandra/utils/TimeUUID.java index 6f4281c7ca64..b7930ed5828a 100644 --- a/src/java/org/apache/cassandra/utils/TimeUUID.java +++ b/src/java/org/apache/cassandra/utils/TimeUUID.java @@ -143,7 +143,12 @@ public static TimeUUID fromBytes(long msb, long lsb) public static TimeUUID deserialize(ByteBuffer buffer) { - return fromBytes(buffer.getLong(buffer.position()), buffer.getLong(buffer.position() + 8)); + return deserialize(buffer, buffer.position()); + } + + public static TimeUUID deserialize(ByteBuffer buffer, int position) + { + return fromBytes(buffer.getLong(position), buffer.getLong(position + 8)); } public static TimeUUID deserialize(DataInput in) throws IOException diff --git a/test/conf/cassandra-mtls.yaml b/test/conf/cassandra-mtls.yaml index d6f1b3e52c6b..356d3e918dde 100644 --- a/test/conf/cassandra-mtls.yaml +++ b/test/conf/cassandra-mtls.yaml @@ -25,6 +25,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints diff --git a/test/conf/cassandra-murmur.yaml b/test/conf/cassandra-murmur.yaml index 2e5828fb56a0..c3f7442aa64e 100644 --- a/test/conf/cassandra-murmur.yaml +++ b/test/conf/cassandra-murmur.yaml @@ -8,6 +8,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints diff --git a/test/conf/cassandra-old.yaml b/test/conf/cassandra-old.yaml index b8c3b028c519..0cda4138d717 100644 --- a/test/conf/cassandra-old.yaml +++ b/test/conf/cassandra-old.yaml @@ -9,6 +9,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size_in_mb: 5 commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra-pem-jks-sslcontextfactory.yaml b/test/conf/cassandra-pem-jks-sslcontextfactory.yaml index 0bd034d5ae9a..0e8a83f0aa6d 100644 --- a/test/conf/cassandra-pem-jks-sslcontextfactory.yaml +++ b/test/conf/cassandra-pem-jks-sslcontextfactory.yaml @@ -27,6 +27,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml b/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml index 16cffb58bd8b..a3146b79ab3f 100644 --- a/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml +++ b/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml @@ -27,6 +27,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra-pem-sslcontextfactory.yaml b/test/conf/cassandra-pem-sslcontextfactory.yaml index 229a0b03fdc3..d382d9d64bbf 100644 --- a/test/conf/cassandra-pem-sslcontextfactory.yaml +++ b/test/conf/cassandra-pem-sslcontextfactory.yaml @@ -27,6 +27,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra-seeds.yaml b/test/conf/cassandra-seeds.yaml index 53f82dd6ecd7..3a0fbf1831b4 100644 --- a/test/conf/cassandra-seeds.yaml +++ b/test/conf/cassandra-seeds.yaml @@ -9,6 +9,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints diff --git a/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml b/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml index 6b7488336076..6f83ec334143 100644 --- a/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml +++ b/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml @@ -27,6 +27,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra-sslcontextfactory.yaml b/test/conf/cassandra-sslcontextfactory.yaml index a20d26e59bee..a570bdf5f626 100644 --- a/test/conf/cassandra-sslcontextfactory.yaml +++ b/test/conf/cassandra-sslcontextfactory.yaml @@ -27,6 +27,7 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/conf/cassandra.yaml b/test/conf/cassandra.yaml index ff017b3afc81..c0e732cec2a9 100644 --- a/test/conf/cassandra.yaml +++ b/test/conf/cassandra.yaml @@ -10,6 +10,7 @@ commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog commitlog_disk_access_mode: legacy +accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw diff --git a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java index 19194188ee25..66bd85689e9b 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java @@ -72,6 +72,7 @@ private InstanceConfig(int num, String commitlog_directory, String hints_directory, String cdc_raw_directory, + String accord_journal_directory, Collection initial_token, int storage_port, int native_transport_port, @@ -92,6 +93,7 @@ private InstanceConfig(int num, .set("commitlog_directory", commitlog_directory) .set("hints_directory", hints_directory) .set("cdc_raw_directory", cdc_raw_directory) + .set("accord_journal_directory", accord_journal_directory) .set("partitioner", "org.apache.cassandra.dht.Murmur3Partitioner") .set("start_native_transport", true) .set("concurrent_writes", 2) @@ -329,6 +331,7 @@ public static InstanceConfig generate(int nodeNum, String.format("%s/node%d/commitlog", root, nodeNum), String.format("%s/node%d/hints", root, nodeNum), String.format("%s/node%d/cdc", root, nodeNum), + String.format("%s/node%d/accord_journal", root, nodeNum), tokens, provisionStrategy.storagePort(nodeNum), provisionStrategy.nativeTransportPort(nodeNum), diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/GlobalMethodTransformer.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/GlobalMethodTransformer.java index 883b7a66a3cc..064d2056468c 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/GlobalMethodTransformer.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/GlobalMethodTransformer.java @@ -85,12 +85,17 @@ else if (globalMethods && ((opcode == Opcodes.INVOKESTATIC && ( || !deterministic && owner.equals("java/lang/System") && name.equals("identityHashCode") || owner.equals("java/util/UUID") && name.equals("randomUUID") || owner.equals("com/google/common/util/concurrent/Uninterruptibles") && name.equals("sleepUninterruptibly") - || owner.equals("sun/misc/Unsafe") && name.equals("getUnsafe"))) - )) + || owner.equals("sun/misc/Unsafe") && name.equals("getUnsafe")))) + ) { transformer.witness(GLOBAL_METHOD); super.visitMethodInsn(Opcodes.INVOKESTATIC, "org/apache/cassandra/simulator/systems/InterceptorOfSystemMethods$Global", name, descriptor, false); } + else if (owner.equals("java/util/concurrent/TimeUnit") && name.equals("sleep")) + { + transformer.witness(GLOBAL_METHOD); + super.visitMethodInsn(Opcodes.INVOKESTATIC, "org/apache/cassandra/simulator/systems/InterceptorOfSystemMethods$Global", "sleep", "(Ljava/util/concurrent/TimeUnit;J)V", false); + } else if ((globalMethods || deterministic) && opcode == Opcodes.INVOKESTATIC && ((owner.equals("java/util/concurrent/ThreadLocalRandom") && (name.equals("getProbe") || name.equals("advanceProbe") || name.equals("localInit"))) || (owner.equals("java/util/concurrent/atomic/Striped64") && (name.equals("getProbe") || name.equals("advanceProbe")))) diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java new file mode 100644 index 000000000000..2f9da8910c76 --- /dev/null +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.simulator.test; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.CopyOnWriteArrayList; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableMap; +import org.apache.cassandra.schema.*; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.Utils; +import accord.api.Data; +import accord.api.RoutingKey; +import accord.api.Update; +import accord.api.Write; +import accord.impl.TopologyUtils; +import accord.local.Node; +import accord.messages.PreAccept; +import accord.messages.TxnRequest; +import accord.primitives.FullKeyRoute; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.commitlog.CommitLog; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.Files; +import org.apache.cassandra.journal.AsyncWriteCallback; +import org.apache.cassandra.service.accord.AccordJournal; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnNamedRead; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Isolated; +import org.apache.cassandra.utils.concurrent.CountDownLatch; + +public class AccordJournalSimulationTest extends SimulationTestBase +{ + @Test + @Ignore // TODO: re-enable + public void test() throws IOException + { + simulate(arr(() -> run()), + () -> check()); + } + + private static void run() + { + for (int i = 0; i < State.events; i++) + { + int finalI = i; + State.executor.execute(() -> State.append(finalI)); + } + + try + { + State.eventsDurable.await(); + State.logger.info("All events are durable done!"); + } + catch (InterruptedException e) + { + throw new AssertionError(e); + } + + if (!State.exceptions.isEmpty()) + { + AssertionError error = new AssertionError("Exceptions found during test"); + State.exceptions.forEach(error::addSuppressed); + throw error; + } + + State.journal.shutdown(); + State.logger.info("Run complete"); + } + + private static void check() + { + State.logger.info("Check starting"); + State.journal.start(); // to avoid a while true deadlock + try + { + for (int i = 0; i < State.events; i++) + { + TxnRequest event = State.journal.read(State.toTxnId(i), AccordJournal.Type.PREACCEPT_REQ); + State.logger.info("Event {} -> {}", i, event); + if (event == null) + throw new AssertionError(String.format("Unable to read event %d", i)); + } + State.logger.info("Check complete"); + } + finally + { + State.journal.shutdown(); + } + } + + @Isolated + public static class State + { + private static final Logger logger = LoggerFactory.getLogger(State.class); + private static final String KEYSPACE = "test"; + + static + { + Files.newGlobalInMemoryFileSystem(); + DatabaseDescriptor.clientWithDaemonConfig(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + DatabaseDescriptor.setAccordJournalDirectory("/journal"); + new File("/journal").createDirectoriesIfNotExists(); + DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); + DatabaseDescriptor.setDumpHeapOnUncaughtException(false); + + // in order to do journal.read, we need all this setup first! + Keyspace.setInitialized(); + Schema.instance.submit(SchemaTransformations.addKeyspace(KeyspaceMetadata.create(State.KEYSPACE, KeyspaceParams.simple(1)), true)); + Keyspace ks = Keyspace.open(State.KEYSPACE); + ks.initCfCustom(ColumnFamilyStore.createColumnFamilyStore(ks, TableMetadataRef.forOfflineTools(TableMetadata.builder(State.KEYSPACE, State.KEYSPACE) + .addPartitionKeyColumn("pk", Int32Type.instance) + .build()).get(), false)); + + try + { + CommitLog.instance.shutdownBlocking(); + } + catch (InterruptedException e) + { + // ignore + } + } + private static final ExecutorPlus executor = ExecutorFactory.Global.executorFactory().pooled("name", 10); + private static final AccordJournal journal = new AccordJournal(); + private static final int events = 100; + private static final CountDownLatch eventsWritten = CountDownLatch.newCountDownLatch(events); + private static final CountDownLatch eventsDurable = CountDownLatch.newCountDownLatch(events); + private static final List exceptions = new CopyOnWriteArrayList<>(); + + static + { + journal.start(); + } + + public static void append(int event) + { + TxnRequest request = toRequest(event); + journal.append(request, executor, new AsyncWriteCallback() + { + @Override + public void run() + { + durable(event); + } + + @Override + public void onFailure(Throwable error) + { + eventsDurable.decrement(); // to make sure we don't block forever + exceptions.add(error); + } + }); + eventsWritten.decrement(); + logger.info("append({}); remaining {}", event, eventsWritten.count()); + } + + private static void durable(int event) + { + eventsDurable.decrement(); + logger.info("durable({}); remaining {}", event, eventsDurable.count()); + } + + private static TxnRequest toRequest(int event) + { + TxnId id = toTxnId(event); + Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min("system"), AccordRoutingKey.SentinelKey.max("system"))); + Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] {node}, ranges, 3)); + Keys keys = Keys.of(toKey(0)); + Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys), TxnQuery.ALL, new NoopUpdate()); + FullRoute route = route(); + return new PreAccept(node, topologies, id, txn, route); + } + + private static TxnId toTxnId(int event) + { + return TxnId.fromValues(1, event, 0, node); + } + + private static PartitionKey toKey(int a) + { + return new PartitionKey(KEYSPACE, tableId, Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(a))); + } + + private static final TableId tableId = TableId.fromUUID(new UUID(0, 0)); + private static final Node.Id node = new Node.Id(0); + + private static FullRoute route() + { + return new FullKeyRoute(key, new RoutingKey[]{ key }); + } + + private static final RoutingKey key = new AccordRoutingKey.TokenKey("system", new Murmur3Partitioner.LongToken(42)); + } + + public static class NoopUpdate implements Update + { + @Override + public Seekables keys() + { + return null; + } + + @Override + public Write apply(@Nullable Data data) + { + return null; + } + + @Override + public Update slice(Ranges ranges) + { + return null; + } + + @Override + public Update merge(Update other) + { + return null; + } + } +} diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index ead4a1a558cb..79b9efaa8936 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -208,6 +208,7 @@ public static void cleanup() if (cdcDir != null) cleanupDirectory(cdcDir); cleanupDirectory(DatabaseDescriptor.getHintsDirectory()); + cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); cleanupSavedCaches(); // clean up data directory which are stored as data directory/keyspace/data files diff --git a/test/unit/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueueTest.java b/test/unit/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueueTest.java similarity index 99% rename from test/unit/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueueTest.java rename to test/unit/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueueTest.java index 4f60d01f240d..d1baae7e10cf 100644 --- a/test/unit/org/apache/cassandra/net/ManyToOneConcurrentLinkedQueueTest.java +++ b/test/unit/org/apache/cassandra/concurrent/ManyToOneConcurrentLinkedQueueTest.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.net; +package org.apache.cassandra.concurrent; import java.util.BitSet; import java.util.NoSuchElementException; diff --git a/test/unit/org/apache/cassandra/io/util/Files.java b/test/unit/org/apache/cassandra/io/util/Files.java new file mode 100644 index 000000000000..4a0ec3285d41 --- /dev/null +++ b/test/unit/org/apache/cassandra/io/util/Files.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.nio.file.FileSystem; + +import com.google.common.jimfs.Jimfs; + +public class Files +{ + public static FileSystem newGlobalInMemoryFileSystem() + { + FileSystem fs = Jimfs.newFileSystem(); + File.unsafeSetFilesystem(fs); + return fs; + } +} diff --git a/test/unit/org/apache/cassandra/journal/DescriptorTest.java b/test/unit/org/apache/cassandra/journal/DescriptorTest.java new file mode 100644 index 000000000000..f0f2975be9b2 --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/DescriptorTest.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.nio.file.FileSystem; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import com.google.common.collect.Sets; +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.Files; +import org.apache.cassandra.io.util.PathUtils; +import org.apache.cassandra.utils.Pair; +import org.assertj.core.api.Condition; + +import static accord.utils.Property.qt; +import static org.assertj.core.api.Assertions.assertThat; + +public class DescriptorTest +{ + private static final FileSystem FS = Files.newGlobalInMemoryFileSystem(); + + static + { + PathUtils.setDeletionListener(ignore -> {}); + } + + @Test + public void serde() + { + qt().forAll(descriptors()) + .check(desc -> + assertThat(Descriptor.fromFile(desc.fileFor(Component.DATA))).isEqualTo(desc)); + } + + @Test + public void isTmp() + { + Condition isTmp = new Condition("isTmpFile") + { + @Override + public boolean matches(File value) + { + return Descriptor.isTmpFile(value); + } + }; + qt().forAll(descriptors()).check(desc -> { + for (Component comp : Component.values()) + { + assertThat(desc.tmpFileFor(comp)).is(isTmp); + assertThat(desc.fileFor(comp)).isNot(isTmp); + } + }); + } + + @Test + public void list() + { + qt().withPure(false) + .forAll(children()) + .check(pair -> + assertThat(Descriptor.list(pair.left)).containsExactlyInAnyOrderElementsOf(pair.right)); + } + + @Test + public void order() + { + qt().withPure(false).forAll(children().filter(p -> p.right.size() >= 2)).check(pair -> + { + List list = new ArrayList<>(pair.right); + Collections.sort(list); + + Descriptor last = list.get(0); + for (int i = 1; i < list.size(); i++) + { + Descriptor current = list.get(i); + assertThat(current.directory).isEqualTo(last.directory); + assertThat(current.timestamp).isGreaterThanOrEqualTo(last.timestamp); + if (current.timestamp == last.timestamp) + assertThat(current.generation).isGreaterThanOrEqualTo(last.generation); + if (current.timestamp == last.timestamp + && current.generation == last.generation) + assertThat(current.journalVersion).isGreaterThanOrEqualTo(last.journalVersion); + if (current.timestamp == last.timestamp + && current.generation == last.generation + && current.journalVersion == last.journalVersion) + assertThat(current.userVersion).isGreaterThanOrEqualTo(last.userVersion); + last = current; + } + }); + } + + private static Gen>> children() + { + Gen dirs = dirs(); + return rs -> + { + File dir = dirs.next(rs); + if (dir.exists()) + dir.deleteRecursive(); + if (!dir.createDirectoriesIfNotExists()) + throw new AssertionError("Directory " + dir + " exists"); + int size = rs.nextInt(0, 10); + if (size == 0) + return Pair.create(dir, Collections.emptySet()); + Set uniq = Sets.newHashSetWithExpectedSize(size); + Gen descriptors = descriptors(Gens.constant(dir)); + for (int i = 0; i < size; i++) + { + Descriptor d = descriptors.next(rs); + while (!uniq.add(d)) + d = descriptors.next(rs); + } + for (Descriptor d : uniq) + d.fileFor(Component.DATA).createFileIfNotExists(); + return Pair.create(dir, uniq); + }; + } + + private static Gen descriptors() + { + Gen dir = dirs(); + return descriptors(dir); + } + + private static Gen descriptors(Gen dir) + { + Gen.LongGen longs = Gens.longs().between(0, 10); + Gen.IntGen ints = Gens.ints().between(0, 10); + return rs -> new Descriptor(dir.next(rs), longs.nextLong(rs), ints.next(rs), ints.next(rs), ints.next(rs)); + } + + private static Gen dirs() + { + Gen names = asciiVisible().ofLengthBetween(1, 100); + Gen gen = rs -> new File(FS.getPath('/' + names.next(rs))); + return gen.filter(f -> f.toCanonical().parent() != null); + } + + // TODO: replace with Gens.strings().asciiVisible() + public static Gens.SizeBuilder asciiVisible() + { + return new Gens.SizeBuilder<>(sizes -> Gens.strings().betweenCodePoints(sizes, 33, 127)); + } +} diff --git a/test/unit/org/apache/cassandra/journal/IndexTest.java b/test/unit/org/apache/cassandra/journal/IndexTest.java new file mode 100644 index 000000000000..8301ea988e31 --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/IndexTest.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import com.google.common.collect.Maps; +import org.junit.Test; + +import org.agrona.collections.IntHashSet; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.utils.Generators; +import org.apache.cassandra.utils.TimeUUID; +import org.quicktheories.core.Gen; +import org.quicktheories.impl.Constraint; + +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.quicktheories.QuickTheory.qt; + +public class IndexTest +{ + private static final int[] EMPTY = new int[0]; + + @Test + public void testInMemoryIndexBasics() + { + InMemoryIndex index = InMemoryIndex.create(TimeUUIDKeySupport.INSTANCE); + + TimeUUID key0 = nextTimeUUID(); + TimeUUID key1 = nextTimeUUID(); + TimeUUID key2 = nextTimeUUID(); + TimeUUID key3 = nextTimeUUID(); + TimeUUID key4 = nextTimeUUID(); + + assertArrayEquals(EMPTY, index.lookUp(key0)); + assertArrayEquals(EMPTY, index.lookUp(key1)); + assertArrayEquals(EMPTY, index.lookUp(key2)); + assertArrayEquals(EMPTY, index.lookUp(key3)); + assertArrayEquals(EMPTY, index.lookUp(key4)); + + int val11 = 1100; + int val21 = 2100; + int val22 = 2200; + int val31 = 3100; + int val32 = 3200; + int val33 = 3300; + + index.update(key1, val11); + index.update(key2, val21); + index.update(key2, val22); + index.update(key3, val31); + index.update(key3, val32); + index.update(key3, val33); + + assertArrayEquals(EMPTY, index.lookUp(key0)); + assertArrayEquals(new int[] { val11 }, index.lookUp(key1)); + assertArrayEquals(new int[] { val21, val22 }, index.lookUp(key2)); + assertArrayEquals(new int[] { val31, val32, val33 }, index.lookUp(key3)); + assertArrayEquals(EMPTY, index.lookUp(key4)); + + assertEquals(key1, index.firstId()); + assertEquals(key3, index.lastId()); + + assertFalse(index.mayContainId(key0)); + assertTrue(index.mayContainId(key1)); + assertTrue(index.mayContainId(key2)); + assertTrue(index.mayContainId(key3)); + assertFalse(index.mayContainId(key4)); + } + + @Test + public void testInMemoryIndexPersists() throws IOException + { + InMemoryIndex inMemory = InMemoryIndex.create(TimeUUIDKeySupport.INSTANCE); + + TimeUUID key0 = nextTimeUUID(); + TimeUUID key1 = nextTimeUUID(); + TimeUUID key2 = nextTimeUUID(); + TimeUUID key3 = nextTimeUUID(); + TimeUUID key4 = nextTimeUUID(); + + int val11 = 1100; + int val21 = 2100; + int val22 = 2200; + int val31 = 3100; + int val32 = 3200; + int val33 = 3300; + + inMemory.update(key1, val11); + inMemory.update(key2, val21); + inMemory.update(key2, val22); + inMemory.update(key3, val31); + inMemory.update(key3, val32); + inMemory.update(key3, val33); + + File directory = new File(Files.createTempDirectory(null)); + directory.deleteOnExit(); + Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); + inMemory.persist(descriptor); + + try (OnDiskIndex onDisk = OnDiskIndex.open(descriptor, TimeUUIDKeySupport.INSTANCE)) + { + assertArrayEquals(EMPTY, onDisk.lookUp(key0)); + assertArrayEquals(new int[] { val11 }, onDisk.lookUp(key1)); + assertArrayEquals(new int[] { val21, val22 }, onDisk.lookUp(key2)); + assertArrayEquals(new int[] { val31, val32, val33 }, onDisk.lookUp(key3)); + assertArrayEquals(EMPTY, onDisk.lookUp(key4)); + + assertEquals(key1, onDisk.firstId()); + assertEquals(key3, onDisk.lastId()); + + assertFalse(onDisk.mayContainId(key0)); + assertTrue(onDisk.mayContainId(key1)); + assertTrue(onDisk.mayContainId(key2)); + assertTrue(onDisk.mayContainId(key3)); + assertFalse(onDisk.mayContainId(key4)); + } + } + + @Test + public void prop() throws IOException + { + Constraint sizeConstraint = Constraint.between(1, 10); + Constraint valueSizeConstraint = Constraint.between(0, 10); + Constraint positionConstraint = Constraint.between(0, Integer.MAX_VALUE); + Gen keyGen = Generators.timeUUID(); + Gen valueGen = rs -> { + int[] array = new int[(int) rs.next(valueSizeConstraint)]; + IntHashSet uniq = new IntHashSet(); + for (int i = 0; i < array.length; i++) + { + int value = (int) rs.next(positionConstraint); + while (!uniq.add(value)) + value = (int) rs.next(positionConstraint); + array[i] = value; + } + return array; + }; + Gen> gen = rs -> { + int size = (int) rs.next(sizeConstraint); + Map map = Maps.newHashMapWithExpectedSize(size); + for (int i = 0; i < size; i++) + { + TimeUUID key = keyGen.generate(rs); + while (map.containsKey(key)) + key = keyGen.generate(rs); + int[] value = valueGen.generate(rs); + map.put(key, value); + } + return map; + }; + gen = gen.describedAs(map -> { + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : map.entrySet()) + sb.append('\n').append(entry.getKey()).append('\t').append(Arrays.toString(entry.getValue())); + return sb.toString(); + }); + File directory = new File(Files.createTempDirectory(null)); + directory.deleteOnExit(); + qt().forAll(gen).checkAssert(map -> test(directory, map)); + } + + private static void test(File directory, Map map) + { + InMemoryIndex inMemory = InMemoryIndex.create(TimeUUIDKeySupport.INSTANCE); + for (Map.Entry e : map.entrySet()) + { + TimeUUID key = e.getKey(); + assertThat(inMemory.lookUp(key)).isEmpty(); + + int[] value = e.getValue(); + if (value.length == 0) + continue; + for (int i : value) + inMemory.update(key, i); + Arrays.sort(value); + } + assertIndex(map, inMemory); + + Descriptor descriptor = Descriptor.create(directory, System.nanoTime(), 1); + inMemory.persist(descriptor); + + try (OnDiskIndex onDisk = OnDiskIndex.open(descriptor, TimeUUIDKeySupport.INSTANCE)) + { + assertIndex(map, onDisk); + } + } + + private static void assertIndex(Map expected, Index actual) + { + List keys = expected.entrySet() + .stream() + .filter(e -> e.getValue().length != 0) + .map(Map.Entry::getKey) + .sorted() + .collect(Collectors.toList()); + + if (keys.isEmpty()) + { + assertThat(actual.firstId()).describedAs("Index %s had wrong firstId", actual).isNull(); + assertThat(actual.lastId()).describedAs("Index %s had wrong lastId", actual).isNull(); + } + else + { + assertThat(actual.firstId()).describedAs("Index %s had wrong firstId", actual).isEqualTo(keys.get(0)); + assertThat(actual.lastId()).describedAs("Index %s had wrong lastId", actual).isEqualTo(keys.get(keys.size() - 1)); + } + + for (Map.Entry e : expected.entrySet()) + { + TimeUUID key = e.getKey(); + int[] value = e.getValue(); + int[] read = actual.lookUp(key); + + if (value.length == 0) + { + assertThat(read).describedAs("Index %s returned wrong values for %s", actual, key).isEmpty(); + } + else + { + assertThat(read).describedAs("Index %s returned wrong values for %s", actual, key).isEqualTo(value); + assertThat(actual.mayContainId(key)).describedAs("Index %s expected %s to exist", key, actual).isTrue(); + } + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java new file mode 100644 index 000000000000..cb2dd339111b --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.file.Files; +import java.util.Collections; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.utils.TimeUUID; + +import static org.junit.Assert.assertEquals; +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; + +public class JournalTest +{ + @BeforeClass + public static void setUp() + { + DatabaseDescriptor.daemonInitialization(); + } + + @Test + public void testSimpleReadWrite() throws IOException + { + File directory = new File(Files.createTempDirectory("JournalTest")); + directory.deleteRecursiveOnExit(); + + Journal journal = + new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + + journal.start(); + + TimeUUID id1 = nextTimeUUID(); + TimeUUID id2 = nextTimeUUID(); + TimeUUID id3 = nextTimeUUID(); + TimeUUID id4 = nextTimeUUID(); + + journal.write(id1, 1L, Collections.singleton(1)); + journal.write(id2, 2L, Collections.singleton(1)); + journal.write(id3, 3L, Collections.singleton(1)); + journal.write(id4, 4L, Collections.singleton(1)); + + assertEquals(1L, (long) journal.read(id1)); + assertEquals(2L, (long) journal.read(id2)); + assertEquals(3L, (long) journal.read(id3)); + assertEquals(4L, (long) journal.read(id4)); + + journal.shutdown(); + + journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + journal.start(); + + assertEquals(1L, (long) journal.read(id1)); + assertEquals(2L, (long) journal.read(id2)); + assertEquals(3L, (long) journal.read(id3)); + assertEquals(4L, (long) journal.read(id4)); + + journal.shutdown(); + } + + static class LongSerializer implements ValueSerializer + { + static final LongSerializer INSTANCE = new LongSerializer(); + + public int serializedSize(Long value, int userVersion) + { + return Long.BYTES; + } + + public void serialize(Long value, DataOutputPlus out, int userVersion) throws IOException + { + out.writeLong(value); + } + + public Long deserialize(TimeUUID key, DataInputPlus in, int userVersion) throws IOException + { + return in.readLong(); + } + } +} diff --git a/test/unit/org/apache/cassandra/journal/MetadataTest.java b/test/unit/org/apache/cassandra/journal/MetadataTest.java new file mode 100644 index 000000000000..a10aaff82cb4 --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/MetadataTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.junit.Test; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; + +import static org.junit.Assert.assertEquals; + +public class MetadataTest +{ + @Test + public void testUpdate() + { + Random rng = new Random(); + + int host1 = rng.nextInt(); + int host2 = host1 + 1; + int host3 = host2 + 1; + int host4 = host3 + 1; + int host5 = host4 + 1; + + Metadata metadata = Metadata.create(); + + metadata.update(set(host1)); + metadata.update(set(host2, host3)); + metadata.update(set(host1, host4)); + metadata.update(set(host1, host2, host3, host4)); + + assertEquals(set(host1, host2, host3, host4), metadata.hosts()); + assertEquals(3, metadata.count(host1)); + assertEquals(2, metadata.count(host2)); + assertEquals(2, metadata.count(host3)); + assertEquals(2, metadata.count(host4)); + assertEquals(0, metadata.count(host5)); + assertEquals(4, metadata.totalCount()); + } + + @Test + public void testWriteRead() throws IOException + { + Random rng = new Random(); + + int host1 = rng.nextInt(); + int host2 = host1 + 1; + int host3 = host2 + 1; + int host4 = host3 + 1; + int host5 = host4 + 1; + + Metadata metadata = Metadata.create(); + + metadata.update(set(host1)); + metadata.update(set(host2, host3)); + metadata.update(set(host1, host4)); + metadata.update(set(host1, host2, host3, host4)); + + try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) + { + metadata.write(out); + ByteBuffer serialized = out.buffer(); + + try (DataInputBuffer in = new DataInputBuffer(serialized, false)) + { + Metadata deserialized = Metadata.read(in); + + assertEquals(set(host1, host2, host3, host4), deserialized.hosts()); + assertEquals(3, deserialized.count(host1)); + assertEquals(2, deserialized.count(host2)); + assertEquals(2, deserialized.count(host3)); + assertEquals(2, deserialized.count(host4)); + assertEquals(0, deserialized.count(host5)); + assertEquals(4, deserialized.totalCount()); + } + } + } + + private static Set set(Integer... ids) + { + return new HashSet<>(Arrays.asList(ids)); + } +} diff --git a/test/unit/org/apache/cassandra/journal/SegmentTest.java b/test/unit/org/apache/cassandra/journal/SegmentTest.java new file mode 100644 index 000000000000..0294cfef094d --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/SegmentTest.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.util.*; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.TimeUUID; + +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class SegmentTest +{ + @Test + public void testWriteReadActiveSegment() throws IOException + { + TimeUUID id1 = nextTimeUUID(); + TimeUUID id2 = nextTimeUUID(); + TimeUUID id3 = nextTimeUUID(); + TimeUUID id4 = nextTimeUUID(); + + ByteBuffer record1 = ByteBufferUtil.bytes("sample record 1"); + ByteBuffer record2 = ByteBufferUtil.bytes("sample record 2"); + ByteBuffer record3 = ByteBufferUtil.bytes("sample record 3"); + ByteBuffer record4 = ByteBufferUtil.bytes("sample record 4"); + + Random rng = new Random(); + + int host1 = rng.nextInt(); + int host2 = rng.nextInt(); + int host3 = rng.nextInt(); + int host4 = rng.nextInt(); + + Set hosts1 = set(host1); + Set hosts2 = set(host1, host2); + Set hosts3 = set(host1, host2, host3); + Set hosts4 = set(host4); + + File directory = new File(Files.createTempDirectory(null)); + directory.deleteRecursiveOnExit(); + + Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); + + ActiveSegment segment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + + segment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); + segment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); + segment.allocate(record3.remaining(), hosts3).write(id3, record3, hosts3); + segment.allocate(record4.remaining(), hosts4).write(id4, record4, hosts4); + + // read all 4 entries by id and compare with originals + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + + segment.read(id1, holder); + assertEquals(id1, holder.key); + assertEquals(hosts1, holder.hosts); + assertEquals(record1, holder.value); + + segment.read(id2, holder); + assertEquals(id2, holder.key); + assertEquals(hosts2, holder.hosts); + assertEquals(record2, holder.value); + + segment.read(id3, holder); + assertEquals(id3, holder.key); + assertEquals(hosts3, holder.hosts); + assertEquals(record3, holder.value); + + segment.read(id4, holder); + assertEquals(id4, holder.key); + assertEquals(hosts4, holder.hosts); + assertEquals(record4, holder.value); + } + + @Test + public void testReadClosedSegmentByID() throws IOException + { + DatabaseDescriptor.daemonInitialization(); + + TimeUUID id1 = nextTimeUUID(); + TimeUUID id2 = nextTimeUUID(); + TimeUUID id3 = nextTimeUUID(); + TimeUUID id4 = nextTimeUUID(); + + ByteBuffer record1 = ByteBufferUtil.bytes("sample record 1"); + ByteBuffer record2 = ByteBufferUtil.bytes("sample record 2"); + ByteBuffer record3 = ByteBufferUtil.bytes("sample record 3"); + ByteBuffer record4 = ByteBufferUtil.bytes("sample record 4"); + + Random rng = new Random(); + + int host1 = rng.nextInt(); + int host2 = rng.nextInt(); + int host3 = rng.nextInt(); + int host4 = rng.nextInt(); + + Set hosts1 = set(host1); + Set hosts2 = set(host1, host2); + Set hosts3 = set(host1, host2, host3); + Set hosts4 = set(host4); + + File directory = new File(Files.createTempDirectory(null)); + directory.deleteRecursiveOnExit(); + + Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); + + ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + + activeSegment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); + activeSegment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); + activeSegment.allocate(record3.remaining(), hosts3).write(id3, record3, hosts3); + activeSegment.allocate(record4.remaining(), hosts4).write(id4, record4, hosts4); + + activeSegment.close(); + + StaticSegment staticSegment = StaticSegment.open(descriptor, TimeUUIDKeySupport.INSTANCE); + + // read all 4 entries by id and compare with originals + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + + staticSegment.read(id1, holder); + assertEquals(id1, holder.key); + assertEquals(hosts1, holder.hosts); + assertEquals(record1, holder.value); + + staticSegment.read(id2, holder); + assertEquals(id2, holder.key); + assertEquals(hosts2, holder.hosts); + assertEquals(record2, holder.value); + + staticSegment.read(id3, holder); + assertEquals(id3, holder.key); + assertEquals(hosts3, holder.hosts); + assertEquals(record3, holder.value); + + staticSegment.read(id4, holder); + assertEquals(id4, holder.key); + assertEquals(hosts4, holder.hosts); + assertEquals(record4, holder.value); + } + + @Test + public void testReadClosedSegmentSequentially() throws IOException + { + TimeUUID id1 = nextTimeUUID(); + TimeUUID id2 = nextTimeUUID(); + TimeUUID id3 = nextTimeUUID(); + TimeUUID id4 = nextTimeUUID(); + + ByteBuffer record1 = ByteBufferUtil.bytes("sample record 1"); + ByteBuffer record2 = ByteBufferUtil.bytes("sample record 2"); + ByteBuffer record3 = ByteBufferUtil.bytes("sample record 3"); + ByteBuffer record4 = ByteBufferUtil.bytes("sample record 4"); + + Random rng = new Random(); + + int host1 = rng.nextInt(); + int host2 = rng.nextInt(); + int host3 = rng.nextInt(); + int host4 = rng.nextInt(); + + Set hosts1 = set(host1); + Set hosts2 = set(host1, host2); + Set hosts3 = set(host1, host2, host3); + Set hosts4 = set(host4); + + File directory = new File(Files.createTempDirectory(null)); + directory.deleteRecursiveOnExit(); + + Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); + + ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + + activeSegment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); + activeSegment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); + activeSegment.allocate(record3.remaining(), hosts3).write(id3, record3, hosts3); + activeSegment.allocate(record4.remaining(), hosts4).write(id4, record4, hosts4); + + activeSegment.close(); + + StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, TimeUUIDKeySupport.INSTANCE, 0); + + // read all 4 entries sequentially and compare with originals + assertTrue(reader.advance()); + assertEquals(id1, reader.id()); + assertEquals(hosts1, reader.hosts()); + assertEquals(record1, reader.record()); + + assertTrue(reader.advance()); + assertEquals(id2, reader.id()); + assertEquals(hosts2, reader.hosts()); + assertEquals(record2, reader.record()); + + assertTrue(reader.advance()); + assertEquals(id3, reader.id()); + assertEquals(hosts3, reader.hosts()); + assertEquals(record3, reader.record()); + + assertTrue(reader.advance()); + assertEquals(id4, reader.id()); + assertEquals(hosts4, reader.hosts()); + assertEquals(record4, reader.record()); + + assertFalse(reader.advance()); + } + + private static Set set(Integer... ids) + { + return new HashSet<>(Arrays.asList(ids)); + } + + private static Params params() + { + return TestParams.INSTANCE; + } +} diff --git a/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java b/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java new file mode 100644 index 000000000000..5b83ee88f1c5 --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.file.Files; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.File; + +import static org.junit.Assert.assertEquals; + +public class SyncedOffsetsTest +{ + @BeforeClass + public static void setUp() + { + DatabaseDescriptor.clientInitialization(); + } + + @Test + public void testCommonCase() throws IOException + { + testReadWrite(512, true); + testReadWrite(512, false); + } + + @Test + public void testResize() throws IOException + { + testReadWrite(2048, true); + testReadWrite(2048, false); + } + + private void testReadWrite(int n, boolean syncOnMark) throws IOException + { + File directory = new File(Files.createTempDirectory(null)); + directory.deleteOnExit(); + + Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); + + SyncedOffsets active = SyncedOffsets.active(descriptor, syncOnMark); + for (int i = 0; i < n; i++) + active.mark(i); + assertEquals(n - 1, active.syncedOffset()); + active.close(); + + SyncedOffsets loaded = SyncedOffsets.load(descriptor); + assertEquals(n - 1, loaded.syncedOffset()); + loaded.close(); + } +} diff --git a/test/unit/org/apache/cassandra/journal/TestParams.java b/test/unit/org/apache/cassandra/journal/TestParams.java new file mode 100644 index 000000000000..3beb378536bf --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/TestParams.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import org.apache.cassandra.net.MessagingService; + +public class TestParams implements Params +{ + static final TestParams INSTANCE = new TestParams(); + + @Override + public int segmentSize() + { + return 32 << 20; + } + + @Override + public FailurePolicy failurePolicy() + { + return FailurePolicy.STOP; + } + + @Override + public FlushMode flushMode() + { + return FlushMode.GROUP; + } + + @Override + public int flushPeriod() + { + return 1000; + } + + @Override + public int periodicFlushLagBlock() + { + return 1500; + } + + @Override + public int userVersion() + { + return MessagingService.current_version; + } +} diff --git a/test/unit/org/apache/cassandra/journal/TimeUUIDKeySupport.java b/test/unit/org/apache/cassandra/journal/TimeUUIDKeySupport.java new file mode 100644 index 000000000000..3d04fad89b6c --- /dev/null +++ b/test/unit/org/apache/cassandra/journal/TimeUUIDKeySupport.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.zip.Checksum; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.TimeUUID; + +import static org.apache.cassandra.utils.FBUtilities.updateChecksumLong; + +class TimeUUIDKeySupport implements KeySupport +{ + static final TimeUUIDKeySupport INSTANCE = new TimeUUIDKeySupport(); + + @Override + public int serializedSize(int userVersion) + { + return 16; + } + + @Override + public void serialize(TimeUUID key, DataOutputPlus out, int userVersion) throws IOException + { + out.writeLong(key.uuidTimestamp()); + out.writeLong(key.lsb()); + } + + @Override + public TimeUUID deserialize(DataInputPlus in, int userVersion) throws IOException + { + long uuidTimestamp = in.readLong(); + long lsb = in.readLong(); + return new TimeUUID(uuidTimestamp, lsb); + } + + @Override + public TimeUUID deserialize(ByteBuffer buffer, int position, int userVersion) + { + long uuidTimestamp = buffer.getLong(position); + long lsb = buffer.getLong(position + 8); + return new TimeUUID(uuidTimestamp, lsb); + } + + @Override + public void updateChecksum(Checksum crc, TimeUUID key, int userVersion) + { + updateChecksumLong(crc, key.uuidTimestamp()); + updateChecksumLong(crc, key.lsb()); + } + + @Override + public int compareWithKeyAt(TimeUUID key, ByteBuffer buffer, int position, int userVersion) + { + long uuidTimestamp = buffer.getLong(position); + long lsb = buffer.getLong(position + 8); + return key.uuidTimestamp() != uuidTimestamp + ? Long.compare(key.uuidTimestamp(), uuidTimestamp) + : Long.compare(key.lsb(), lsb); + } + + @Override + public int compare(TimeUUID o1, TimeUUID o2) + { + return o1.compareTo(o2); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java new file mode 100644 index 000000000000..7059a12f2a64 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.service.accord.AccordJournal.Key; +import org.apache.cassandra.utils.AsymmetricOrdering; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.FBUtilities.Order; +import org.checkerframework.checker.nullness.qual.Nullable; + +import static accord.utils.Property.qt; +import static org.assertj.core.api.Assertions.assertThat; + +public class AccordJournalTest +{ + @Test + public void keySerde() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().forAll(keyGen()).check(key -> + { + buffer.clear(); + int expectedSize = Key.SUPPORT.serializedSize(1); + Key.SUPPORT.serialize(key, buffer, 1); + assertThat(buffer.getLength()).isEqualTo(expectedSize); + try (DataInputBuffer input = new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)) + { + Key read = Key.SUPPORT.deserialize(input, 1); + assertThat(read).isEqualTo(key); + } + }); + } + + @Test + public void compareKeys() + { + qt().forAll(Gens.lists(keyGen()).ofSizeBetween(2, 100)).check(keys -> + { + keys.sort(Key.SUPPORT); + + List buffers = new ArrayList<>(keys.size()); + for (Key k : keys) buffers.add(toBuffer(k)); + + for (int i = 0; i < keys.size(); i++) + { + Key outerKey = keys.get(i); + for (int j = 0; j < keys.size(); j++) + { + Key innerKey = keys.get(j); + ByteBuffer innerBuffer = buffers.get(j); + Order expected = FBUtilities.compare(outerKey, innerKey, Key.SUPPORT); + Order actual = FBUtilities.compare(outerKey, innerBuffer, new AsymmetricOrdering() + { + @Override + public int compareAsymmetric(Key left, ByteBuffer right) + { + return Key.SUPPORT.compareWithKeyAt(left, right, 0, 1); + } + + @Override + public int compare(@Nullable Key left, @Nullable Key right) + { + throw new UnsupportedOperationException(); + } + }); + assertThat(actual).isEqualTo(expected); + } + } + }); + } + + private static ByteBuffer toBuffer(Key k) + { + try (DataOutputBuffer buffer = new DataOutputBuffer(Key.SUPPORT.serializedSize(1))) + { + Key.SUPPORT.serialize(k, buffer, 1); + return buffer.unsafeGetBufferAndFlip(); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + private Gen keyGen() + { + Gen txnIdGen = AccordGens.txnIds(); + Gen typeGen = Gens.enums().all(AccordJournal.Type.class); + return rs -> new Key(txnIdGen.next(rs), typeGen.next(rs)); + } +} From 537c1f991afff4bb26fc60a8beadad79a228b49a Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 27 Apr 2023 18:01:11 +0100 Subject: [PATCH 056/340] (Accord only) Permit nodes to join a cluster without the full transaction history patch by Benedict; reviewed by Blake Eggleston for CASSANDRA-18523 --- modules/accord | 2 +- .../service/accord/AccordCommandStore.java | 6 +++ .../service/accord/AccordCommandStores.java | 45 ++++++++++++------- .../accord/AccordConfigurationService.java | 11 +++-- .../service/accord/AccordKeyspace.java | 5 +-- .../service/accord/AccordLoadingState.java | 29 ++++++++++++ .../service/accord/AccordObjectSizes.java | 15 +++---- .../service/accord/AccordSafeCommand.java | 19 ++++++++ .../accord/AccordSafeCommandStore.java | 2 +- .../service/accord/api/AccordAgent.java | 17 +++++++ .../service/accord/async/AsyncLoader.java | 13 ++++++ .../service/accord/async/AsyncOperation.java | 4 +- .../serializers/CommandSerializers.java | 6 ++- .../serializers/GetDepsSerializers.java | 8 +--- .../serializers/ListenerSerializers.java | 30 ++++++------- .../serializers/ReadDataSerializers.java | 26 +++++++---- .../serializers/RecoverySerializers.java | 8 +++- .../service/accord/txn/TxnQuery.java | 7 +-- .../cassandra/service/accord/txn/TxnRead.java | 1 + .../service/accord/txn/TxnUpdate.java | 3 +- .../test/AccordJournalSimulationTest.java | 3 +- .../accord/AccordCommandStoreTest.java | 2 +- .../service/accord/AccordTestUtils.java | 32 ++++++++----- .../accord/async/AsyncOperationTest.java | 21 +++++---- 24 files changed, 218 insertions(+), 97 deletions(-) diff --git a/modules/accord b/modules/accord index 8226b2d77593..b99c4671fa0b 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8226b2d7759319d7a0b0c823ab09b4344c5423f7 +Subproject commit b99c4671fa0b22bed7f5a37fc5acaa2d2579e5b2 diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 1633dc793005..b3db3f382238 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -39,6 +39,7 @@ import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; +import accord.primitives.Deps; import accord.primitives.RoutableKey; import accord.primitives.TxnId; import accord.utils.Invariants; @@ -101,6 +102,11 @@ public boolean inStore() return Thread.currentThread().getId() == threadId; } + @Override + protected void registerHistoricalTransactions(Deps deps) + { + } + public void setCacheSize(long bytes) { checkInStoreThread(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 235cc1be571b..3bad135715af 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -17,10 +17,14 @@ */ package org.apache.cassandra.service.accord; +import java.util.function.Supplier; + import accord.api.Agent; +import accord.api.ConfigurationService.EpochReady; import accord.api.DataStore; import accord.api.ProgressLog; import accord.local.CommandStores; +import accord.local.Node; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; @@ -32,7 +36,7 @@ import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.journal.AsyncWriteCallback; -public class AccordCommandStores extends CommandStores +public class AccordCommandStores extends CommandStores { private final AccordJournal journal; @@ -50,14 +54,6 @@ static Factory factory(AccordJournal journal) new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, journal); } - @Override - public synchronized void shutdown() - { - super.shutdown(); - journal.shutdown(); - //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) - } - @Override protected void mapReduceConsume( PreLoadContext context, @@ -100,13 +96,6 @@ public void onFailure(Throwable error) }); } - @Override - public synchronized void updateTopology(Topology newTopology) - { - super.updateTopology(newTopology); - refreshCacheSizes(); - } - private long cacheSize; synchronized void setCacheSize(long bytes) @@ -128,4 +117,28 @@ private static long maxCacheSize() { return 5 << 20; // TODO (required): make configurable } + + @Override + public synchronized Supplier updateTopology(Node node, Topology newTopology) + { + Supplier start = super.updateTopology(node, newTopology); + return () -> { + EpochReady ready = start.get(); + ready.metadata.addCallback(() -> { + synchronized (this) + { + refreshCacheSizes(); + } + }); + return ready; + }; + } + + @Override + public synchronized void shutdown() + { + super.shutdown(); + journal.shutdown(); + //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index d62985b0a1e8..e8f10336649a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -73,15 +73,18 @@ public synchronized void fetchTopologyForEpoch(long epoch) } @Override - public void acknowledgeEpoch(long epoch) + public void acknowledgeEpoch(EpochReady ready) { - Topology acknowledged = getTopologyForEpoch(epoch); + Topology acknowledged = getTopologyForEpoch(ready.epoch); for (Node.Id node : acknowledged.nodes()) { if (node.equals(localId)) continue; - for (Listener listener : listeners) - listener.onEpochSyncComplete(node, epoch); + + ready.coordination.addCallback(() -> { + for (Listener listener : listeners) + listener.onEpochSyncComplete(node, ready.epoch); + }); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index fc719dfdf831..9d38bd7133a7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -45,7 +45,6 @@ import accord.impl.CommandsForKey; import accord.impl.CommandsForKey.CommandTimeseries; import accord.local.Command; -import accord.local.CommandListener; import accord.local.CommandStore; import accord.local.CommonAttributes; import accord.local.Listeners; @@ -192,7 +191,7 @@ private static class CommandsSerializers static final LocalVersionedSerializer partialDeps = localSerializer(DepsSerializer.partialDeps); static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); static final LocalVersionedSerializer result = localSerializer(TxnData.serializer); - static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); + static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) { @@ -508,7 +507,7 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, CommandsSerializers.partialDeps, builder, timestampMicros, original, command); - addSetChanges(CommandsColumns.listeners, cmd -> Sets.filter(cmd.listeners(), l -> !l.isTransient()), v -> serialize(v, CommandsSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); + addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, CommandsSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); if (command.isCommitted()) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java b/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java index 1316b02d4848..8dea7389c0c7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java @@ -18,9 +18,14 @@ package org.apache.cassandra.service.accord; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; import java.util.concurrent.Callable; import java.util.function.Function; +import accord.local.Command; +import accord.utils.DeterministicIdentitySet; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncResults; @@ -54,6 +59,7 @@ public FailedLoad(Throwable cause) private final K key; private Object state = UNINITIALIZED; + private Set transientListeners; public AccordLoadingState(K key) { @@ -160,4 +166,27 @@ public AsyncChain listen() checkState(LoadingState.PENDING, false); return (PendingLoad) state; } + + + public void addListener(Command.TransientListener listener) + { + if (transientListeners == null) + transientListeners = new DeterministicIdentitySet<>(); + transientListeners.add(listener); + } + + public boolean removeListener(Command.TransientListener listener) + { + if (transientListeners == null) + return false; + + return transientListeners.remove(listener); + } + + public Collection transientListeners() + { + if (transientListeners == null) + return Collections.emptySet(); + return transientListeners; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index d56e88ba5775..c79d1ba5b9b8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -30,7 +30,6 @@ import accord.api.RoutingKey; import accord.impl.CommandsForKey; import accord.local.Command; -import accord.local.CommandListener; import accord.local.CommonAttributes; import accord.local.Node; import accord.local.SaveStatus; @@ -233,7 +232,7 @@ public static long dependencies(Deps dependencies) return size; } - private static final long EMPTY_WRITES_SIZE = measure(new Writes(null, null, null)); + private static final long EMPTY_WRITES_SIZE = measure(new Writes(null, null, null, null)); public static long writes(Writes writes) { long size = EMPTY_WRITES_SIZE; @@ -244,14 +243,12 @@ public static long writes(Writes writes) return size; } - private static final long EMPTY_COMMAND_LISTENER = measure(new Command.Listener(null)); + private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); private static final long EMPTY_CFK_LISTENER = measure(new CommandsForKey.Listener((Key) null)); - public static long listener(CommandListener listener) + public static long listener(Command.DurableAndIdempotentListener listener) { - if (listener.isTransient()) - return 0; - if (listener instanceof Command.Listener) - return EMPTY_COMMAND_LISTENER + timestamp(((Command.Listener) listener).txnId()); + if (listener instanceof Command.ProxyListener) + return EMPTY_COMMAND_LISTENER + timestamp(((Command.ProxyListener) listener).txnId()); if (listener instanceof CommandsForKey.Listener) return EMPTY_CFK_LISTENER + key(((CommandsForKey.Listener) listener).key()); throw new IllegalArgumentException("Unhandled listener type: " + listener.getClass()); @@ -306,7 +303,7 @@ public static long command(Command command) size += sizeNullable(command.progressKey(), AccordObjectSizes::key); size += sizeNullable(command.route(), AccordObjectSizes::route); size += sizeNullable(command.promised(), AccordObjectSizes::timestamp); - for (CommandListener listener : command.listeners()) + for (Command.DurableAndIdempotentListener listener : command.durableListeners()) size += listener(listener); if (!command.isWitnessed()) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 3dc37b2f3c23..b44f684e83f9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service.accord; +import java.util.Collection; import java.util.Objects; import com.google.common.annotations.VisibleForTesting; @@ -121,4 +122,22 @@ public boolean invalidated() { return invalidated; } + + @Override + public void addListener(Command.TransientListener listener) + { + global.addListener(listener); + } + + @Override + public boolean removeListener(Command.TransientListener listener) + { + return global().removeListener(listener); + } + + @Override + public Collection transientListeners() + { + return global.transientListeners(); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index c71ba18f69a3..e1bf80895bc5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -144,7 +144,7 @@ public long latestEpoch() } @Override - protected Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) + public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) { // TODO: Seekables // TODO: efficiency diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 232684565a79..5154ca7d80fe 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -22,8 +22,13 @@ import accord.api.Result; import accord.local.Command; import accord.local.Node; +import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.JVMStabilityInspector; import static java.util.concurrent.TimeUnit.MICROSECONDS; @@ -46,6 +51,12 @@ public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp n throw error; } + @Override + public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throwable failure) + { + + } + @Override public void onUncaughtException(Throwable t) { @@ -65,4 +76,10 @@ public boolean isExpired(TxnId initiated, long now) // TODO: should distinguish between reads and writes return now - initiated.hlc() > getReadRpcTimeout(MICROSECONDS); } + + @Override + public Txn emptyTxn(Txn.Kind kind, Seekables keysOrRanges) + { + return new Txn.InMemory(kind, keysOrRanges, TxnRead.EMPTY, TxnQuery.ALL, null); + } } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 34ba0936c678..0a486ee40245 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -19,18 +19,22 @@ package org.apache.cassandra.service.accord.async; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.impl.CommandsForKey; import accord.local.Command; +import accord.local.PreLoadContext; import accord.primitives.RoutableKey; import accord.primitives.TxnId; import accord.utils.Invariants; @@ -72,6 +76,15 @@ public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iter this.keys = keys; } + protected static Iterable txnIds(PreLoadContext context) + { + TxnId primaryid = context.primaryTxnId(); + Collection additionalIds = context.additionalTxnIds(); + if (primaryid == null) return additionalIds; + if (additionalIds.isEmpty()) return Collections.singleton(primaryid); + return Iterables.concat(Collections.singleton(primaryid), additionalIds); + } + private > void referenceAndAssembleReads(Iterable keys, Map context, AccordStateCache.Instance cache, diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index ce5bb125f14b..bf86f6777b74 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -42,6 +42,8 @@ import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordSafeState; +import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; + public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function { private static final Logger logger = LoggerFactory.getLogger(AsyncOperation.class); @@ -143,7 +145,7 @@ AsyncWriter createAsyncWriter(AccordCommandStore commandStore) AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, preLoadContext.txnIds(), toRoutableKeys(preLoadContext.keys())); + return new AsyncLoader(commandStore, txnIds(preLoadContext), toRoutableKeys(preLoadContext.keys())); } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 0441c640c818..366b469c2ffe 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -169,6 +169,7 @@ public long serializedSize(PartialTxn txn, int version) @Override public void serialize(Writes writes, DataOutputPlus out, int version) throws IOException { + txnId.serialize(writes.txnId, out, version); timestamp.serialize(writes.executeAt, out, version); KeySerializers.seekables.serialize(writes.keys, out, version); boolean hasWrites = writes.write != null; @@ -180,7 +181,7 @@ public void serialize(Writes writes, DataOutputPlus out, int version) throws IOE @Override public Writes deserialize(DataInputPlus in, int version) throws IOException { - return new Writes(timestamp.deserialize(in, version), + return new Writes(txnId.deserialize(in, version), timestamp.deserialize(in, version), KeySerializers.seekables.deserialize(in, version), in.readBoolean() ? TxnWrite.serializer.deserialize(in, version) : null); } @@ -188,7 +189,8 @@ public Writes deserialize(DataInputPlus in, int version) throws IOException @Override public long serializedSize(Writes writes, int version) { - long size = timestamp.serializedSize(writes.executeAt, version); + long size = txnId.serializedSize(writes.txnId, version); + size += timestamp.serializedSize(writes.executeAt, version); size += KeySerializers.seekables.serializedSize(writes.keys, version); boolean hasWrites = writes.write != null; size += TypeSizes.sizeof(hasWrites); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java index 1de5b96a5b21..37f9302ae36c 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java @@ -25,7 +25,6 @@ import accord.primitives.PartialRoute; import accord.primitives.Seekables; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -40,7 +39,6 @@ public void serializeBody(GetDeps msg, DataOutputPlus out, int version) throws I { KeySerializers.seekables.serialize(msg.keys, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); - CommandSerializers.kind.serialize(msg.kind, out, version); } @Override @@ -48,16 +46,14 @@ public GetDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, Parti { Seekables keys = KeySerializers.seekables.deserialize(in, version); Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); - Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); - return GetDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, keys, executeAt, kind); + return GetDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, keys, executeAt); } @Override public long serializedBodySize(GetDeps msg, int version) { return KeySerializers.seekables.serializedSize(msg.keys, version) - + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) - + CommandSerializers.kind.serializedSize(msg.kind, version); + + CommandSerializers.timestamp.serializedSize(msg.executeAt, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java index 37afe0a59a62..4581ab8cadbf 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java @@ -22,8 +22,6 @@ import accord.impl.CommandsForKey; import accord.local.Command; -import accord.local.CommandListener; -import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -36,9 +34,9 @@ public enum Kind { COMMAND, COMMANDS_FOR_KEY; - private static Kind of(CommandListener listener) + private static Kind of(Command.DurableAndIdempotentListener listener) { - if (listener instanceof Command.Listener) + if (listener instanceof Command.ProxyListener) return COMMAND; if (listener instanceof CommandsForKey.Listener) @@ -49,22 +47,22 @@ private static Kind of(CommandListener listener) } - private static final IVersionedSerializer commandListener = new IVersionedSerializer() + private static final IVersionedSerializer commandListener = new IVersionedSerializer() { @Override - public void serialize(Command.Listener listener, DataOutputPlus out, int version) throws IOException + public void serialize(Command.ProxyListener listener, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(listener.txnId(), out, version); } @Override - public Command.Listener deserialize(DataInputPlus in, int version) throws IOException + public Command.ProxyListener deserialize(DataInputPlus in, int version) throws IOException { - return new Command.Listener(CommandSerializers.txnId.deserialize(in, version)); + return new Command.ProxyListener(CommandSerializers.txnId.deserialize(in, version)); } @Override - public long serializedSize(Command.Listener listener, int version) + public long serializedSize(Command.ProxyListener listener, int version) { return CommandSerializers.txnId.serializedSize(listener.txnId(), version); } @@ -91,18 +89,17 @@ public long serializedSize(CommandsForKey.Listener listener, int version) } }; - public static final IVersionedSerializer listener = new IVersionedSerializer() + public static final IVersionedSerializer listener = new IVersionedSerializer() { @Override - public void serialize(CommandListener listener, DataOutputPlus out, int version) throws IOException + public void serialize(Command.DurableAndIdempotentListener listener, DataOutputPlus out, int version) throws IOException { - Invariants.checkArgument(!listener.isTransient()); Kind kind = Kind.of(listener); out.write(kind.ordinal()); switch (kind) { case COMMAND: - commandListener.serialize((Command.Listener) listener, out, version); + commandListener.serialize((Command.ProxyListener) listener, out, version); break; case COMMANDS_FOR_KEY: cfkListener.serialize((CommandsForKey.Listener) listener, out, version); @@ -113,7 +110,7 @@ public void serialize(CommandListener listener, DataOutputPlus out, int version) } @Override - public CommandListener deserialize(DataInputPlus in, int version) throws IOException + public Command.DurableAndIdempotentListener deserialize(DataInputPlus in, int version) throws IOException { Kind kind = Kind.values()[in.readByte()]; switch (kind) @@ -128,15 +125,14 @@ public CommandListener deserialize(DataInputPlus in, int version) throws IOExcep } @Override - public long serializedSize(CommandListener listener, int version) + public long serializedSize(Command.DurableAndIdempotentListener listener, int version) { - Invariants.checkArgument(!listener.isTransient()); Kind kind = Kind.of(listener); long size = TypeSizes.BYTE_SIZE; switch (kind) { case COMMAND: - size += commandListener.serializedSize((Command.Listener) listener, version); + size += commandListener.serializedSize((Command.ProxyListener) listener, version); break; case COMMANDS_FOR_KEY: size += cfkListener.serializedSize((CommandsForKey.Listener) listener, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index a344028b4915..18b8b15e5136 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -20,10 +20,11 @@ import java.io.IOException; -import accord.messages.ReadData; import accord.messages.ReadData.ReadNack; import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; +import accord.messages.ReadTxnData; +import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; @@ -32,12 +33,16 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.txn.TxnData; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + public class ReadDataSerializers { - public static final IVersionedSerializer request = new IVersionedSerializer() + public static final IVersionedSerializer request = new IVersionedSerializer() { @Override - public void serialize(ReadData read, DataOutputPlus out, int version) throws IOException + public void serialize(ReadTxnData read, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(read.txnId, out, version); KeySerializers.seekables.serialize(read.readScope, out, version); @@ -46,17 +51,17 @@ public void serialize(ReadData read, DataOutputPlus out, int version) throws IOE } @Override - public ReadData deserialize(DataInputPlus in, int version) throws IOException + public ReadTxnData deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Seekables readScope = KeySerializers.seekables.deserialize(in, version); long waitForEpoch = in.readUnsignedVInt(); long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; - return ReadData.SerializerSupport.create(txnId, readScope, executeAtEpoch, waitForEpoch); + return ReadTxnData.SerializerSupport.create(txnId, readScope, executeAtEpoch, waitForEpoch); } @Override - public long serializedSize(ReadData read, int version) + public long serializedSize(ReadTxnData read, int version) { return CommandSerializers.txnId.serializedSize(read.txnId, version) + KeySerializers.seekables.serializedSize(read.readScope, version) @@ -81,6 +86,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I out.writeByte(0); ReadOk readOk = (ReadOk) reply; + serializeNullable(readOk.unavailable, out, version, KeySerializers.ranges); TxnData.nullableSerializer.serialize((TxnData) readOk.data, out, version); } @@ -91,7 +97,9 @@ public ReadReply deserialize(DataInputPlus in, int version) throws IOException if (id != 0) return nacks[id - 1]; - return new ReadOk(TxnData.nullableSerializer.deserialize(in, version)); + Ranges ranges = deserializeNullable(in, version, KeySerializers.ranges); + TxnData data = TxnData.nullableSerializer.deserialize(in, version); + return new ReadOk(ranges, data); } @Override @@ -101,7 +109,9 @@ public long serializedSize(ReadReply reply, int version) return TypeSizes.BYTE_SIZE; ReadOk readOk = (ReadOk) reply; - return TypeSizes.BYTE_SIZE + TxnData.nullableSerializer.serializedSize((TxnData) readOk.data, version); + return TypeSizes.BYTE_SIZE + + serializedNullableSize(readOk.unavailable, version, KeySerializers.ranges) + + TxnData.nullableSerializer.serializedSize((TxnData) readOk.data, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index fbd00f637e30..b4111320d074 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import accord.api.Result; @@ -90,6 +91,7 @@ void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IO CommandSerializers.ballot.serialize(recoverOk.accepted, out, version); serializeNullable(recoverOk.executeAt, out, version, CommandSerializers.timestamp); DepsSerializer.partialDeps.serialize(recoverOk.deps, out, version); + serializeNullable(recoverOk.acceptedDeps, out, version, DepsSerializer.partialDeps); DepsSerializer.deps.serialize(recoverOk.earlierCommittedWitness, out, version); DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); out.writeBoolean(recoverOk.rejectsFastPath); @@ -112,9 +114,9 @@ RecoverNack deserializeNack(Ballot supersededBy, DataInputPlus in, int version) return new RecoverNack(supersededBy); } - RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, PartialDeps deps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) + RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, @Nonnull PartialDeps deps, PartialDeps acceptedDeps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) { - return new RecoverOk(txnId, status, accepted, executeAt, deps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); + return new RecoverOk(txnId, status, accepted, executeAt, deps, acceptedDeps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); } @Override @@ -129,6 +131,7 @@ public final RecoverReply deserialize(DataInputPlus in, int version) throws IOEx CommandSerializers.ballot.deserialize(in, version), deserializeNullable(in, version, CommandSerializers.timestamp), DepsSerializer.partialDeps.deserialize(in, version), + deserializeNullable(in, version, DepsSerializer.partialDeps), DepsSerializer.deps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), in.readBoolean(), @@ -150,6 +153,7 @@ long serializedOkSize(RecoverOk recoverOk, int version) size += CommandSerializers.ballot.serializedSize(recoverOk.accepted, version); size += serializedNullableSize(recoverOk.executeAt, version, CommandSerializers.timestamp); size += DepsSerializer.partialDeps.serializedSize(recoverOk.deps, version); + size += serializedNullableSize(recoverOk.acceptedDeps, version, DepsSerializer.partialDeps); size += DepsSerializer.deps.serializedSize(recoverOk.earlierCommittedWitness, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); size += TypeSizes.sizeof(recoverOk.rejectsFastPath); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index 29b28606ba78..115864d8302b 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -28,6 +28,7 @@ import accord.api.Read; import accord.api.Result; import accord.api.Update; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -48,7 +49,7 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Data data, @Nullable Read read, @Nullable Update update) + public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, @Nullable Update update) { return data != null ? (TxnData) data : new TxnData(); } @@ -63,7 +64,7 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Data data, @Nullable Read read, @Nullable Update update) + public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, @Nullable Update update) { return new TxnData(); } @@ -78,7 +79,7 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Data data, @Nullable Read read, Update update) + public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, Update update) { checkNotNull(txnId, "txnId should not be null"); checkNotNull(data, "data should not be null"); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index 5b5812aed81d..072a21c1dfab 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -54,6 +54,7 @@ public class TxnRead extends AbstractKeySorted implements Read public static final String SERIAL_READ_NAME = "SERIAL_READ"; public static final TxnDataName SERIAL_READ = TxnDataName.user(SERIAL_READ_NAME); private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnRead(new TxnNamedRead[0], null)); + public static final TxnRead EMPTY = new TxnRead(new TxnNamedRead[0], Keys.EMPTY); private final Keys txnKeys; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index f7b6565dbe89..a35ae36020b3 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -33,6 +33,7 @@ import accord.api.Write; import accord.primitives.Keys; import accord.primitives.Ranges; +import accord.primitives.Timestamp; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -170,7 +171,7 @@ private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] le } @Override - public Write apply(Data data) + public Write apply(Timestamp executeAt, Data data) { if (!checkCondition(data)) return TxnWrite.EMPTY; diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 2f9da8910c76..317d3711d402 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -44,6 +44,7 @@ import accord.primitives.Keys; import accord.primitives.Ranges; import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.Topologies; @@ -244,7 +245,7 @@ public static class NoopUpdate implements Update } @Override - public Write apply(@Nullable Data data) + public Write apply(Timestamp executeAt, @Nullable Data data) { return null; } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index dce4312d687c..6031f01e7c2a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -114,7 +114,7 @@ public void commandLoadSave() throws Throwable attrs.partialDeps(dependencies); ImmutableSortedSet waitingOnCommit = ImmutableSortedSet.of(oldTxnId1); ImmutableSortedMap waitingOnApply = ImmutableSortedMap.of(oldTimestamp, oldTxnId2); - attrs.addListener(new Command.Listener(oldTxnId1)); + attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, waitingOnCommit, waitingOnApply, result.left, result.right); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 031777764067..f09c2ab4d00c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -44,6 +44,7 @@ import accord.impl.CommandsForKey; import accord.impl.InMemoryCommandStore; import accord.local.Command; +import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.CommonAttributes; import accord.local.Node; @@ -197,7 +198,7 @@ public static Ballot ballot(long epoch, long hlc, int node) public static Pair processTxnResult(AccordCommandStore commandStore, TxnId txnId, PartialTxn txn, Timestamp executeAt) throws Throwable { AtomicReference> result = new AtomicReference<>(); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(Collections.emptyList(), txn.keys()), + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txn.keys()), safeStore -> { TxnRead read = (TxnRead) txn.read(); Data readData = read.keys().stream().map(key -> { @@ -215,9 +216,9 @@ public static Pair processTxnResult(AccordCommandStore commandSt } }) .reduce(null, TxnData::merge); - Write write = txn.update().apply(readData); - result.set(Pair.create(new Writes(executeAt, (Keys)txn.keys(), write), - txn.query().compute(txnId, readData, txn.read(), txn.update()))); + Write write = txn.update().apply(executeAt, readData); + result.set(Pair.create(new Writes(txnId, executeAt, txn.keys(), write), + txn.query().compute(txnId, executeAt, readData, txn.read(), txn.update()))); })); return result.get(); } @@ -292,7 +293,11 @@ private static class SingleEpochRanges extends CommandStores.RangesForEpochHolde public SingleEpochRanges(Ranges ranges) { this.ranges = ranges; - this.current = new CommandStores.RangesForEpoch(1, ranges); + } + + private void set(CommandStore store) + { + this.current = new CommandStores.RangesForEpoch(1, ranges, store); } } @@ -309,12 +314,15 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } }; - return new InMemoryCommandStore.Synchronized(0, + + SingleEpochRanges holder = new SingleEpochRanges(Ranges.of(range)); + InMemoryCommandStore.Synchronized result = new InMemoryCommandStore.Synchronized(0, time, new AccordAgent(), null, - cs -> null, - new SingleEpochRanges(Ranges.of(range))); + cs -> null, holder); + holder.set(result); + return result; } public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupplier now, Topology topology) @@ -326,12 +334,16 @@ public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupp @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } }; - return new AccordCommandStore(0, + + SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); + AccordCommandStore result = new AccordCommandStore(0, time, new AccordAgent(), null, cs -> NOOP_PROGRESS_LOG, - new SingleEpochRanges(topology.rangesForNode(node))); + holder); + holder.set(result); + return result; } public static AccordCommandStore createAccordCommandStore(LongSupplier now, String keyspace, String table) diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 6eb1af434edc..22f562146fc2 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -20,7 +20,6 @@ import java.time.Duration; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; @@ -92,7 +91,6 @@ import static accord.local.PreLoadContext.contextFor; import static accord.utils.Property.qt; import static accord.utils.async.AsyncChains.getUninterruptibly; -import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; @@ -100,6 +98,7 @@ import static org.apache.cassandra.service.accord.AccordTestUtils.keys; import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; +import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; public class AsyncOperationTest { @@ -150,7 +149,7 @@ public void optionalCommandsForKeyTest() throws Throwable Txn txn = createTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - getUninterruptibly(commandStore.execute(contextFor(Collections.emptyList(), Keys.of(key)), instance -> { + getUninterruptibly(commandStore.execute(contextFor(key), instance -> { SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeCommandsForKey(key); Assert.assertNull(cfk); })); @@ -193,7 +192,7 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS PartialDeps deps = PartialDeps.builder(ranges).build(); try { - return getUninterruptibly(commandStore.submit(PreLoadContext.contextFor(Collections.singleton(txnId), partialTxn.keys()), safe -> { + return getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); CheckedCommands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); @@ -240,7 +239,7 @@ public void testFutureCleanup() throws Throwable createCommittedAndPersist(commandStore, txnId); Consumer consumer = safeStore -> safeStore.command(txnId).readyToExecute(); - PreLoadContext ctx = PreLoadContext.contextFor(singleton(txnId), Keys.EMPTY); + PreLoadContext ctx = contextFor(txnId); AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) { @@ -252,7 +251,7 @@ private AccordStateCache.Instance cache() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, preLoadContext.txnIds(), (Iterable) preLoadContext.keys()) { + return new AsyncLoader(commandStore, txnIds(preLoadContext), (Iterable) preLoadContext.keys()) { @Override void state(State state) @@ -325,7 +324,7 @@ public void loadFail() assertNoReferences(commandStore, ids, keys); - PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + PreLoadContext ctx = contextFor(ids, keys); Consumer consumer = Mockito.mock(Consumer.class); @@ -334,7 +333,7 @@ public void loadFail() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, preLoadContext.txnIds(), (Iterable) preLoadContext.keys()) + return new AsyncLoader(commandStore, txnIds(preLoadContext), (Iterable) preLoadContext.keys()) { @Override Function loadCommandFunction() @@ -386,7 +385,7 @@ public void consumerFails() createCommand(commandStore, rs, ids); assertNoReferences(commandStore, ids, keys); - PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + PreLoadContext ctx = contextFor(ids, keys); Consumer consumer = Mockito.mock(Consumer.class); String errorMsg = "txn_ids " + ids; @@ -422,7 +421,7 @@ public void writeFail() assertNoReferences(commandStore, ids, keys); - PreLoadContext ctx = PreLoadContext.contextFor(ids, keys); + PreLoadContext ctx = contextFor(ids, keys); Consumer consumer = store -> ids.forEach(id -> store.command(id).readyToExecute()); @@ -466,7 +465,7 @@ protected AsyncWriter.StateMutationFunction writeCommandFunct SafeCommand command = store.command(id); Command current = command.current(); Assertions.assertThat(current.status()).isEqualTo(Status.ReadyToExecute); - Writes writes = current.partialTxn().execute(current.executeAt(), new TxnData()); + Writes writes = current.partialTxn().execute(current.txnId(), current.executeAt(), new TxnData()); command.preapplied(current, current.txnId(), current.asCommitted().waitingOn(), writes, null); })); getUninterruptibly(o2); From f54144d1c12770123e0eefd74dd07cbfe8783350 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Fri, 26 May 2023 11:25:52 -0500 Subject: [PATCH 057/340] - make sure workspace.xml specifies a storagedir - removing unnecessary calls to ServerTestUtils.daemonInitialization() in a handful of tests - minor cleanup in Verb and BTreeSet --- ide/idea/workspace.xml | 1 + .../db/filter/ClusteringIndexNamesFilter.java | 17 +++++++++++++++++ src/java/org/apache/cassandra/net/Verb.java | 19 ++++--------------- .../cassandra/utils/btree/BTreeSet.java | 9 --------- .../apache/cassandra/auth/TxnAuthTest.java | 3 +-- .../cassandra/cql3/CDCStatementTest.java | 8 +++----- .../validation/entities/VirtualTableTest.java | 12 ++++-------- 7 files changed, 30 insertions(+), 39 deletions(-) diff --git a/ide/idea/workspace.xml b/ide/idea/workspace.xml index 7f688b3d9626..1260f8613e82 100644 --- a/ide/idea/workspace.xml +++ b/ide/idea/workspace.xml @@ -189,6 +189,7 @@ -Dcassandra.reads.thresholds.coordinator.defensive_checks_enabled=true -Dcassandra.ring_delay_ms=10000 -Dcassandra.skip_sync=true + -Dcassandra.storagedir=$PROJECT_DIR$/data -Dcassandra.strict.runtime.checks=true -Dcassandra.test.flush_local_schema_changes=false -Dcassandra.test.messagingService.nonGracefulShutdown=true diff --git a/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java b/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java index d646511e164d..f3a0904eeea5 100644 --- a/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java +++ b/src/java/org/apache/cassandra/db/filter/ClusteringIndexNamesFilter.java @@ -150,6 +150,7 @@ public boolean intersects(ClusteringComparator comparator, Slice slice) return false; } + @Override public String toString(TableMetadata metadata) { StringBuilder sb = new StringBuilder(); @@ -197,6 +198,22 @@ public String toCQLString(TableMetadata metadata, RowFilter rowFilter) return sb.toString(); } + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClusteringIndexNamesFilter that = (ClusteringIndexNamesFilter) o; + return Objects.equals(clusterings, that.clusterings) && + Objects.equals(reversed, that.reversed); + } + + @Override + public int hashCode() + { + return Objects.hash(clusterings, reversed); + } + public Kind kind() { return Kind.NAMES; diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index b1917d448930..9d90fba37efc 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -21,7 +21,6 @@ import java.lang.reflect.Modifier; import java.util.List; import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; import java.util.function.Supplier; import java.util.function.ToLongFunction; @@ -364,30 +363,20 @@ public enum Kind */ Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler) { - this(NORMAL, id, priority, expiration, stage, serializer, handler, null, null); - } - - Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Predicate isFinalReply) - { - this(NORMAL, id, priority, expiration, stage, serializer, handler, null, isFinalReply); + this(NORMAL, id, priority, expiration, stage, serializer, handler, null); } Verb(int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb) { - this(NORMAL, id, priority, expiration, stage, serializer, handler, responseVerb, null); + this(NORMAL, id, priority, expiration, stage, serializer, handler, responseVerb); } Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler) { - this(kind, id, priority, expiration, stage, serializer, handler, null, null); - } - - Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Predicate isFinalReply) - { - this(kind, id, priority, expiration, stage, serializer, handler, null, isFinalReply); + this(kind, id, priority, expiration, stage, serializer, handler, null); } - Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb, Predicate isFinalReply) + Verb(Kind kind, int id, Priority priority, ToLongFunction expiration, Stage stage, Supplier> serializer, Supplier> handler, Verb responseVerb) { this.stage = stage; if (id < 0) diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java index d890bcf6ee60..20ee7cf0440b 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java +++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java @@ -27,13 +27,11 @@ import java.util.NavigableSet; import java.util.NoSuchElementException; import java.util.Objects; -import java.util.Set; import java.util.SortedSet; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Function; -import com.google.common.collect.Iterables; import com.google.common.collect.Ordering; import org.apache.cassandra.utils.btree.BTree.Dir; @@ -242,13 +240,6 @@ public boolean containsAll(Collection c) } @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || !(o instanceof Set)) return false; - return Iterables.elementsEqual(this, (Set) o); - } - public int hashCode() { // we can't just delegate to Arrays.deepHashCode(), diff --git a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java index 4743144eba85..8efd05ca1016 100644 --- a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java +++ b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java @@ -47,11 +47,10 @@ public class TxnAuthTest extends CQLTester { @BeforeClass - public static void setUpAuthAndAccord() throws Exception + public static void setUpAuthAndAccord() { CassandraRelevantProperties.ENABLE_NODELOCAL_QUERIES.setBoolean(true); - SchemaLoader.prepareServer(); IRoleManager roleManager = new AuthTestUtils.LocalCassandraRoleManager(); SchemaLoader.setupAuth(roleManager, new AuthTestUtils.LocalPasswordAuthenticator(), diff --git a/test/unit/org/apache/cassandra/cql3/CDCStatementTest.java b/test/unit/org/apache/cassandra/cql3/CDCStatementTest.java index 44ada8e9176b..86bc1141516e 100644 --- a/test/unit/org/apache/cassandra/cql3/CDCStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/CDCStatementTest.java @@ -22,7 +22,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; public class CDCStatementTest extends CQLTester @@ -30,19 +29,18 @@ public class CDCStatementTest extends CQLTester @BeforeClass public static void enableCDC() { - ServerTestUtils.daemonInitialization(); DatabaseDescriptor.setCDCEnabled(true); } @Test - public void testEnableOnCreate() throws Throwable + public void testEnableOnCreate() { createTable("CREATE TABLE %s (key text, val int, primary key(key)) WITH cdc = true;"); Assert.assertTrue(currentTableMetadata().params.cdc); } @Test - public void testEnableOnAlter() throws Throwable + public void testEnableOnAlter() { createTable("CREATE TABLE %s (key text, val int, primary key(key));"); Assert.assertFalse(currentTableMetadata().params.cdc); @@ -51,7 +49,7 @@ public void testEnableOnAlter() throws Throwable } @Test - public void testDisableOnAlter() throws Throwable + public void testDisableOnAlter() { createTable("CREATE TABLE %s (key text, val int, primary key(key)) WITH cdc = true;"); Assert.assertTrue(currentTableMetadata().params.cdc); diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/VirtualTableTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/VirtualTableTest.java index 3b9ce8ac80b8..38a6df7c45c4 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/VirtualTableTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/VirtualTableTest.java @@ -39,7 +39,6 @@ import org.junit.Test; import com.datastax.driver.core.exceptions.InvalidQueryException; -import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.marshal.Int32Type; @@ -57,7 +56,6 @@ import org.apache.cassandra.service.StorageServiceMBean; import org.apache.cassandra.triggers.ITrigger; - import static java.lang.String.format; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -209,8 +207,6 @@ private static Pair updateColumn(Pair row, @BeforeClass public static void setUpVirtualTables() { - ServerTestUtils.daemonInitialization(); - TableMetadata vt1Metadata = TableMetadata.builder(KS_NAME, VT1_NAME) .kind(TableMetadata.Kind.VIRTUAL) .addPartitionKeyColumn("pk", UTF8Type.instance) @@ -1051,7 +1047,7 @@ public void testMBeansMethods() throws Throwable } @Test - public void testDisallowedFilteringOnRegularColumn() throws Throwable + public void testDisallowedFilteringOnRegularColumn() { try { @@ -1065,7 +1061,7 @@ public void testDisallowedFilteringOnRegularColumn() throws Throwable } @Test - public void testDisallowedFilteringOnClusteringColumn() throws Throwable + public void testDisallowedFilteringOnClusteringColumn() { try { @@ -1079,13 +1075,13 @@ public void testDisallowedFilteringOnClusteringColumn() throws Throwable } @Test - public void testAllowedFilteringOnRegularColumn() throws Throwable + public void testAllowedFilteringOnRegularColumn() { executeNet(format("SELECT * FROM %s.%s WHERE v2 = 5", KS_NAME, VT1_NAME)); } @Test - public void testAllowedFilteringOnClusteringColumn() throws Throwable + public void testAllowedFilteringOnClusteringColumn() { executeNet(format("SELECT * FROM %s.%s WHERE c = 'abc'", KS_NAME, VT1_NAME)); } From f76f806de1e0307d09c2e2ef93353a5b415c971a Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 25 May 2023 13:20:39 -0700 Subject: [PATCH 058/340] CEP-15: (C*) Add notion of CommandsForRanges and make this durable in C* patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-18519 --- modules/accord | 2 +- src/java/org/apache/cassandra/dht/Token.java | 17 +- .../service/accord/AccordCommandStore.java | 134 ++++- .../service/accord/AccordCommandStores.java | 22 + .../accord/AccordConfigurationService.java | 13 +- .../service/accord/AccordDataStore.java | 47 ++ .../service/accord/AccordKeyspace.java | 336 +++++++++++- .../service/accord/AccordObjectSizes.java | 3 + .../accord/AccordSafeCommandStore.java | 152 ++++-- .../service/accord/AccordService.java | 8 +- .../service/accord/AccordStateCache.java | 8 + .../service/accord/AccordVerbHandler.java | 15 +- .../service/accord/CommandsForRanges.java | 515 ++++++++++++++++++ .../service/accord/api/AccordRoutingKey.java | 17 +- .../service/accord/async/AsyncLoader.java | 107 +++- .../service/accord/async/AsyncOperation.java | 20 +- .../serializers/CommandsForKeySerializer.java | 2 +- .../serializers/ListenerSerializers.java | 35 +- .../apache/cassandra/utils/IntervalTree.java | 89 ++- .../test/accord/AccordCQLTest.java | 18 +- .../test/accord/AccordTestBase.java | 39 +- .../test/accord/NewSchemaTest.java | 85 +++ .../service/accord/async/AsyncLoaderTest.java | 12 +- .../accord/async/AsyncOperationTest.java | 4 +- .../CommandsForKeySerializerTest.java | 4 +- 25 files changed, 1571 insertions(+), 133 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordDataStore.java create mode 100644 src/java/org/apache/cassandra/service/accord/CommandsForRanges.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java diff --git a/modules/accord b/modules/accord index b99c4671fa0b..3d0ff07cd5c7 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit b99c4671fa0b22bed7f5a37fc5acaa2d2579e5b2 +Subproject commit 3d0ff07cd5c7db43390b85afa593e6f76471d886 diff --git a/src/java/org/apache/cassandra/dht/Token.java b/src/java/org/apache/cassandra/dht/Token.java index f5ff32e00edc..7cbe0ebccb77 100644 --- a/src/java/org/apache/cassandra/dht/Token.java +++ b/src/java/org/apache/cassandra/dht/Token.java @@ -26,11 +26,12 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.utils.bytecomparable.ByteComparable; -import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.tcm.serialization.PartitionerAwareMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; public abstract class Token implements RingPosition, Serializable { @@ -45,6 +46,16 @@ public static abstract class TokenFactory public abstract ByteBuffer toByteArray(Token token); public abstract Token fromByteArray(ByteBuffer bytes); + public byte[] toOrderedByteArray(Token token, ByteComparable.Version version) + { + return ByteSourceInverse.readBytes(asComparableBytes(token, version)); + } + + public Token fromOrderedByteArray(byte[] bytes, ByteComparable.Version version) + { + return fromComparableBytes(ByteSource.peekable(ByteSource.fixedLength(bytes)), version); + } + /** * Produce a byte-comparable representation of the token. * See {@link Token#asComparableBytes} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index b3db3f382238..d2f627fb9b09 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -18,19 +18,29 @@ package org.apache.cassandra.service.accord; +import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import accord.api.Agent; import accord.api.DataStore; +import accord.api.Key; import accord.api.ProgressLog; +import accord.impl.CommandTimeseriesHolder; import accord.impl.CommandsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -39,20 +49,35 @@ import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; -import accord.primitives.Deps; +import accord.local.SaveStatus; +import accord.primitives.AbstractKeys; +import accord.primitives.AbstractRanges; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.RoutableKey; +import accord.primitives.Routables; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; +import accord.utils.async.Observable; +import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; public class AccordCommandStore extends CommandStore { + private static final Logger logger = LoggerFactory.getLogger(AccordCommandStore.class); + private static long getThreadId(ExecutorService executor) { try @@ -78,6 +103,7 @@ private static long getThreadId(ExecutorService executor) private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; + private CommandsForRanges commandsForRanges = new CommandsForRanges(); public AccordCommandStore(int id, NodeTimeService time, @@ -94,17 +120,67 @@ public AccordCommandStore(int id, this.commandCache = stateCache.instance(TxnId.class, accord.local.Command.class, AccordSafeCommand::new, AccordObjectSizes::command); this.commandsForKeyCache = stateCache.instance(RoutableKey.class, CommandsForKey.class, AccordSafeCommandsForKey::new, AccordObjectSizes::commandsForKey); executor.execute(() -> CommandStore.register(this)); + executor.execute(this::loadRangesToCommands); } - @Override - public boolean inStore() + private void loadRangesToCommands() { - return Thread.currentThread().getId() == threadId; + AsyncPromise future = new AsyncPromise<>(); + AccordKeyspace.findAllCommandsByDomain(id, Routable.Domain.Range, ImmutableSet.of("txn_id", "status", "txn", "execute_at", "dependencies"), new Observable() + { + private CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); + @Override + public void onNext(UntypedResultSet.Row row) throws Exception + { + TxnId txnId = AccordKeyspace.deserializeTxnId(row); + SaveStatus status = AccordKeyspace.deserializeStatus(row); + Timestamp executeAt = AccordKeyspace.deserializeExecuteAt(row); + + PartialTxn txn = AccordKeyspace.deserializeTxn(row); + Seekables keys = txn.keys(); + if (keys.domain() != Routable.Domain.Range) + throw new AssertionError(String.format("Txn keys are not range", txn)); + Ranges ranges = (Ranges) keys; + + PartialDeps deps = AccordKeyspace.deserializeDependencies(row); + List dependsOn = deps == null ? Collections.emptyList() : deps.txnIds(); + builder.put(txnId, ranges, status, executeAt, dependsOn); + } + + @Override + public void onError(Throwable t) + { + builder = null; + future.tryFailure(t); + } + + @Override + public void onCompleted() + { + CommandsForRanges result = this.builder.build(); + builder = null; + future.trySuccess(result); + } + }); + try + { + commandsForRanges = future.get(); + logger.debug("Loaded {} intervals", commandsForRanges.size()); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e.getCause()); + } } @Override - protected void registerHistoricalTransactions(Deps deps) + public boolean inStore() { + return Thread.currentThread().getId() == threadId; } public void setCacheSize(long bytes) @@ -234,7 +310,7 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, - Map commandsForKeys) + NavigableMap commandsForKeys) { Invariants.checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); @@ -252,6 +328,52 @@ public void completeOperation(AccordSafeCommandStore store, current = null; } + O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + { + keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); + switch (keysOrRanges.domain()) + { + case Key: + { + AbstractKeys keys = (AbstractKeys) keysOrRanges; + for (CommandTimeseriesHolder summary : commandsForRanges.search(keys)) + { + accumulate = map.apply(summary, accumulate); + if (accumulate.equals(terminalValue)) + return accumulate; + } + } + break; + case Range: + { + AbstractRanges ranges = (AbstractRanges) keysOrRanges; + for (Range range : ranges) + { + CommandTimeseriesHolder summary = commandsForRanges.search(range); + if (summary == null) + continue; + accumulate = map.apply(summary, accumulate); + if (accumulate.equals(terminalValue)) + return accumulate; + } + } + break; + default: + throw new AssertionError("Unknown domain: " + keysOrRanges.domain()); + } + return accumulate; + } + + CommandsForRanges commandsForRanges() + { + return commandsForRanges; + } + + CommandsForRanges.Updater updateRanges() + { + return commandsForRanges.update(); + } + public void abortCurrentOperation() { current = null; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 3bad135715af..38040b466be0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -29,12 +29,14 @@ import accord.local.PreLoadContext; import accord.local.SafeCommandStore; import accord.local.ShardDistributor; +import accord.primitives.Range; import accord.primitives.Routables; import accord.topology.Topology; import accord.utils.MapReduceConsume; import accord.utils.RandomSource; import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.journal.AsyncWriteCallback; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; public class AccordCommandStores extends CommandStores { @@ -96,6 +98,26 @@ public void onFailure(Throwable error) }); } + @Override + protected boolean shouldBootstrap(Node node, Topology previous, Topology updated, Range range) + { + if (!super.shouldBootstrap(node, previous, updated, range)) + return false; + // we see new ranges when a new keyspace is added, so avoid bootstrap in these cases + return contains(previous, ((AccordRoutingKey) range.start()).keyspace()); + } + + private static boolean contains(Topology previous, String searchKeyspace) + { + for (Range range : previous.ranges()) + { + String keyspace = ((AccordRoutingKey) range.start()).keyspace(); + if (keyspace.equals(searchKeyspace)) + return true; + } + return false; + } + private long cacheSize; synchronized void setCacheSize(long bytes) diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index e8f10336649a..3ab64c9bd097 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -20,8 +20,9 @@ import java.util.ArrayList; import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; -import com.google.common.base.Preconditions; +import javax.annotation.concurrent.GuardedBy; import accord.api.ConfigurationService; import accord.local.Node; @@ -33,7 +34,8 @@ public class AccordConfigurationService implements ConfigurationService { private final Node.Id localId; - private final List listeners = new ArrayList<>(); + private final List listeners = new CopyOnWriteArrayList<>(); + @GuardedBy("this") private final List epochs = new ArrayList<>(); public AccordConfigurationService(Node.Id localId) @@ -43,7 +45,7 @@ public AccordConfigurationService(Node.Id localId) } @Override - public synchronized void registerListener(Listener listener) + public void registerListener(Listener listener) { listeners.add(listener); } @@ -55,7 +57,7 @@ public synchronized Topology currentTopology() } @Override - public Topology getTopologyForEpoch(long epoch) + public synchronized Topology getTopologyForEpoch(long epoch) { return epochs.get((int) epoch); } @@ -64,7 +66,8 @@ public Topology getTopologyForEpoch(long epoch) public synchronized void fetchTopologyForEpoch(long epoch) { Topology current = currentTopology(); - Preconditions.checkArgument(epoch > current.epoch(), "Requested to fetch epoch %d which is <= %d (current epoch)", epoch, current.epoch()); + if (epoch < current.epoch()) + return; while (current.epoch() < epoch) { current = AccordTopologyUtils.createTopology(epochs.size()); diff --git a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java new file mode 100644 index 000000000000..b1f191a39678 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.api.DataStore; +import accord.local.Node; +import accord.local.SafeCommandStore; +import accord.primitives.Ranges; +import accord.primitives.SyncPoint; +import accord.primitives.Timestamp; +import accord.utils.async.AsyncResults; + +public enum AccordDataStore implements DataStore +{ + INSTANCE; + + @Override + public FetchResult fetch(Node node, SafeCommandStore safeStore, Ranges ranges, SyncPoint syncPoint, FetchRanges callback) + { + //TODO (implement): do real work + callback.starting(ranges).started(Timestamp.NONE); + callback.fetched(ranges); + return new ImmediateFetchFuture(ranges); + } + + private static class ImmediateFetchFuture extends AsyncResults.SettableResult implements FetchResult + { + ImmediateFetchFuture(Ranges ranges) { setSuccess(ranges); } + @Override public void abort(Ranges abort) { } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 9d38bd7133a7..d02c48badcf3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -30,10 +30,12 @@ import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; +import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.function.Supplier; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Lists; @@ -43,7 +45,7 @@ import accord.api.Result; import accord.impl.CommandsForKey; -import accord.impl.CommandsForKey.CommandTimeseries; +import accord.impl.CommandTimeseries; import accord.local.Command; import accord.local.CommandStore; import accord.local.CommonAttributes; @@ -54,11 +56,16 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.Invariants; +import accord.utils.async.Observable; +import org.apache.cassandra.concurrent.DebuggableTask; +import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; @@ -77,11 +84,14 @@ import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.ByteArrayAccessor; import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.BytesType; +import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.TupleType; +import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.BTreeRow; @@ -90,6 +100,10 @@ import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.transform.FilteredPartitions; +import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.LocalVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; @@ -97,6 +111,7 @@ import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; @@ -113,6 +128,7 @@ import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; @@ -129,9 +145,10 @@ public class AccordKeyspace public static final String COMMANDS = "commands"; public static final String COMMANDS_FOR_KEY = "commands_for_key"; - private static final String TIMESTAMP_TUPLE = "tuple"; private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); - private static final String KEY_TUPLE = "tuple"; + private static final String TIMESTAMP_TUPLE = TIMESTAMP_TYPE.asCQL3Type().toString(); + private static final TupleType KEY_TYPE = new TupleType(Arrays.asList(UUIDType.instance, BytesType.instance)); + private static final String KEY_TUPLE = KEY_TYPE.asCQL3Type().toString(); private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); @@ -157,12 +174,36 @@ ImmutableSortedMap getValues(CommandsForKey cfk) } } + private enum TokenType + { + Murmur3((byte) 1), + ByteOrdered((byte) 2), + ; + + private final byte value; + + TokenType(byte b) + { + this.value = b; + } + + static TokenType valueOf(Token token) + { + if (token instanceof Murmur3Partitioner.LongToken) + return Murmur3; + if (token instanceof ByteOrderedPartitioner.BytesToken) + return ByteOrdered; + throw new IllegalArgumentException("Unexpected token type: " + token.getClass()); + } + } + // TODO: store timestamps as blobs (confirm there are no negative numbers, or offset) private static final TableMetadata Commands = parse(COMMANDS, "accord commands", "CREATE TABLE %s (" + "store_id int," + + "domain int," // this is stored as part of txn_id, used currently for more cheaper scans of the table + format("txn_id %s,", TIMESTAMP_TUPLE) + "status int," + "home_key blob," @@ -179,8 +220,10 @@ ImmutableSortedMap getValues(CommandsForKey cfk) + format("waiting_on_commit set<%s>,", TIMESTAMP_TUPLE) + format("waiting_on_apply map<%s, blob>,", TIMESTAMP_TUPLE) + "listeners set, " - + "PRIMARY KEY((store_id, txn_id))" - + ')'); + + "PRIMARY KEY((store_id, domain, txn_id))" + + ')') + .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, Int32Type.instance, TIMESTAMP_TYPE))) + .build(); // TODO: naming is not very clearly distinct from the base serializers private static class CommandsSerializers @@ -232,6 +275,7 @@ private static class CommandsColumns "accord commands per key", "CREATE TABLE %s (" + "store_id int, " + + "key_token blob, " // can't use "token" as this is restricted word in CQL + format("key %s, ", KEY_TUPLE) + format("max_timestamp %s static, ", TIMESTAMP_TUPLE) + format("last_executed_timestamp %s static, ", TIMESTAMP_TUPLE) @@ -241,8 +285,10 @@ private static class CommandsColumns + "series int, " + format("timestamp %s, ", TIMESTAMP_TUPLE) + "data blob, " - + "PRIMARY KEY((store_id, key), series, timestamp)" - + ')'); + + "PRIMARY KEY((store_id, key_token, key), series, timestamp)" + + ')') + .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) + .build(); private static class CommandsForKeyColumns { @@ -293,13 +339,12 @@ else if (hasRegularChanges) } } - private static TableMetadata parse(String name, String description, String cql) + private static TableMetadata.Builder parse(String name, String description, String cql) { return CreateTableStatement.parse(format(cql, name), ACCORD_KEYSPACE_NAME) .id(TableId.forSystemTable(ACCORD_KEYSPACE_NAME, name)) .comment(description) - .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)) - .build(); + .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)); } public static KeyspaceMetadata metadata() @@ -339,7 +384,7 @@ private static T deserialize(ByteBuffer bytes, LocalVersionedSerializer s private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerializer serializer) throws IOException { - return bytes != null && ! ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; + return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; } private static ImmutableSortedMap deserializeWaitingOnApply(Map serialized) @@ -525,6 +570,7 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor } ByteBuffer key = CommandsColumns.keyComparator.make(commandStore.id(), + command.txnId().domain().ordinal(), serializeTimestamp(command.txnId())).serializeAsPartitionKey(); Row row = builder.build(); if (row.isEmpty()) @@ -538,6 +584,20 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor } } + public static ByteBuffer serializeToken(Token token) + { + return serializeToken(token, ByteBufferAccessor.instance); + } + + private static V serializeToken(Token token, ValueAccessor accessor) + { + TokenType type = TokenType.valueOf(token); + byte[] ordered = token.getPartitioner().getTokenFactory().toOrderedByteArray(token, ByteComparable.Version.OSS50); + V value = accessor.allocate(ordered.length + 1); + accessor.putByte(value, 0, type.value); + ByteArrayAccessor.instance.copyTo(ordered, 0, value, accessor, 1, ordered.length); + return value; + } private static ByteBuffer serializeKey(PartitionKey key) { @@ -596,13 +656,218 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t { String cql = "SELECT * FROM %s.%s " + "WHERE store_id = ? " + + "AND domain = ? " + "AND txn_id=(?, ?, ?)"; return executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), commandStore.id(), + txnId.domain().ordinal(), txnId.msb, txnId.lsb, txnId.node.id); } + public static void findAllCommandsByDomain(int commandStore, Routable.Domain domain, Set columns, Observable callback) + { + WalkCommandsForDomain work = new WalkCommandsForDomain(commandStore, domain, columns, Stage.READ.executor(), callback); + work.schedule(); + } + + private static abstract class TableWalk implements Runnable, DebuggableTask + { + private final long creationTimeNanos = Clock.Global.nanoTime(); + private final Executor executor; + private final Observable callback; + private long startTimeNanos = -1; + private int numQueries = 0; + private UntypedResultSet.Row lastSeen = null; + + private TableWalk(Executor executor, Observable callback) + { + this.executor = executor; + this.callback = callback; + } + + protected abstract UntypedResultSet query(UntypedResultSet.Row lastSeen); + + public final void schedule() + { + executor.execute(this); + } + + @Override + public final void run() + { + try + { + if (startTimeNanos == -1) + startTimeNanos = Clock.Global.nanoTime(); + numQueries++; + UntypedResultSet result = query(lastSeen); + if (result.isEmpty()) + { + callback.onCompleted(); + return; + } + UntypedResultSet.Row lastRow = null; + for (UntypedResultSet.Row row : result) + { + callback.onNext(row); + lastRow = row; + } + lastSeen = lastRow; + schedule(); + } + catch (Throwable t) + { + callback.onError(t); + } + } + + @Override + public long creationTimeNanos() + { + return creationTimeNanos; + } + + @Override + public long startTimeNanos() + { + return startTimeNanos; + } + + @Override + public String description() + { + return String.format("Table Walker for %s; queries = %d", getClass().getSimpleName(), numQueries); + } + } + + private static String selection(TableMetadata metadata, Set requiredColumns, Set forIteration) + { + StringBuilder selection = new StringBuilder(); + if (requiredColumns.isEmpty()) + selection.append("*"); + else + { + Sets.SetView other = Sets.difference(requiredColumns, forIteration); + for (String name : other) + { + ColumnMetadata meta = metadata.getColumn(new ColumnIdentifier(name, true)); + if (meta == null) + throw new IllegalArgumentException("Unknown column: " + name); + } + List names = new ArrayList<>(forIteration.size() + other.size()); + names.addAll(forIteration); + names.addAll(other); + // this sort is to make sure the CQL is determanistic + Collections.sort(names); + for (int i = 0; i < names.size(); i++) + { + if (i > 0) + selection.append(", "); + selection.append(names.get(i)); + } + } + return selection.toString(); + } + + private static class WalkCommandsForDomain extends TableWalk + { + private static final Set COLUMNS_FOR_ITERATION = ImmutableSet.of("txn_id", "store_id", "domain"); + private final String cql; + private final int storeId, domain; + + private WalkCommandsForDomain(int commandStore, Routable.Domain domain, Set requiredColumns, Executor executor, Observable callback) + { + super(executor, callback); + this.storeId = commandStore; + this.domain = domain.ordinal(); + cql = String.format("SELECT %s " + + "FROM %s " + + "WHERE store_id = ? " + + " AND domain = ? " + + " AND token(store_id, domain, txn_id) > token(?, ?, (?, ?, ?)) " + + "ALLOW FILTERING", selection(Commands, requiredColumns, COLUMNS_FOR_ITERATION), Commands); + } + + @Override + protected UntypedResultSet query(UntypedResultSet.Row lastSeen) + { + TxnId lastTxnId = lastSeen == null ? + new TxnId(0, 0, Txn.Kind.Read, Routable.Domain.Key, Node.Id.NONE) + : deserializeTxnId(lastSeen); + return executeInternal(cql, storeId, domain, storeId, domain, lastTxnId.msb, lastTxnId.lsb, lastTxnId.node.id); + } + } + + public static void findAllKeysBetween(int commandStore, + Token start, boolean startInclusive, + Token end, boolean endInclusive, + Observable callback) + { + //TODO (optimize) : CQL doesn't look smart enough to only walk Index.db, and ends up walking the Data.db file for each row in the partitions found (for frequent keys, this cost adds up) + // it would be possible to find all SSTables that "could" intersect this range, then have a merge iterator over the Index.db (filtered to the range; index stores partition liveness)... + KeysBetween work = new KeysBetween(commandStore, + AccordKeyspace.serializeToken(start), startInclusive, + AccordKeyspace.serializeToken(end), endInclusive, + ImmutableSet.of("key"), + Stage.READ.executor(), Observable.distinct(callback).map(value -> AccordKeyspace.deserializeKey(value))); + work.schedule(); + } + + private static class KeysBetween extends TableWalk + { + private static final Set COLUMNS_FOR_ITERATION = ImmutableSet.of("store_id", "key_token"); + + private final int storeId; + private final ByteBuffer start, end; + private final String cqlFirst; + private final String cqlContinue; + + private KeysBetween(int storeId, + ByteBuffer start, boolean startInclusive, + ByteBuffer end, boolean endInclusive, + Set requiredColumns, + Executor executor, Observable callback) + { + super(executor, callback); + this.storeId = storeId; + this.start = start; + this.end = end; + + String selection = selection(CommandsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); + this.cqlFirst = String.format("SELECT DISTINCT %s\n" + + "FROM %s\n" + + "WHERE store_id = ?\n" + + (startInclusive ? " AND key_token >= ?\n" : " AND key_token > ?\n") + + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + + "ALLOW FILTERING", + selection, CommandsForKeys); + this.cqlContinue = String.format("SELECT DISTINCT %s\n" + + "FROM %s\n" + + "WHERE store_id = ?\n" + + " AND key_token > ?\n" + + " AND key > ?\n" + + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + + "ALLOW FILTERING", + selection, CommandsForKeys); + } + + @Override + protected UntypedResultSet query(UntypedResultSet.Row lastSeen) + { + if (lastSeen == null) + { + return executeInternal(cqlFirst, storeId, start, end); + } + else + { + ByteBuffer previousToken = lastSeen.getBytes("key_token"); + ByteBuffer previousKey = lastSeen.getBytes("key"); + return executeInternal(cqlContinue, storeId, previousToken, previousKey, end); + } + } + } + public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) { commandStore.checkNotInStoreThread(); @@ -617,19 +882,19 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) try { UntypedResultSet.Row row = rows.one(); - Invariants.checkState(deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits).equals(txnId)); - SaveStatus status = SaveStatus.values()[row.getInt("status")]; + Invariants.checkState(deserializeTxnId(row).equals(txnId)); + SaveStatus status = deserializeStatus(row); CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); // TODO: something less brittle than ordinal, more efficient than values() attributes.durability(Status.Durability.values()[row.getInt("durability", 0)]); attributes.homeKey(deserializeOrNull(row.getBlob("home_key"), CommandsSerializers.routingKey)); attributes.progressKey(deserializeOrNull(row.getBlob("progress_key"), CommandsSerializers.routingKey)); attributes.route(deserializeOrNull(row.getBlob("route"), CommandsSerializers.route)); - attributes.partialTxn(deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn)); - attributes.partialDeps(deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps)); + attributes.partialTxn(deserializeTxn(row)); + attributes.partialDeps(deserializeDependencies(row)); attributes.setListeners(deserializeListeners(row, "listeners")); - Timestamp executeAt = deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); + Timestamp executeAt = deserializeExecuteAt(row); Ballot promised = deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits); Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); ImmutableSortedSet waitingOnCommit = deserializeTxnIdNavigableSet(row, "waiting_on_commit"); @@ -670,6 +935,43 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) } } + public static PartialDeps deserializeDependencies(UntypedResultSet.Row row) throws IOException + { + return deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps); + } + + public static Timestamp deserializeExecuteAt(UntypedResultSet.Row row) + { + return deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); + } + + public static SaveStatus deserializeStatus(UntypedResultSet.Row row) + { + return SaveStatus.values()[row.getInt("status")]; + } + + public static TxnId deserializeTxnId(UntypedResultSet.Row row) + { + return deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits); + } + + public static PartialTxn deserializeTxn(UntypedResultSet.Row row) throws IOException + { + return deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn); + } + + public static PartitionKey deserializeKey(UntypedResultSet.Row row) + { + List split = KEY_TYPE.unpack(row.getBytes("key"), ByteBufferAccessor.instance); + TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(split.get(0))); + ByteBuffer key = split.get(1); + + TableMetadata metadata = Schema.instance.getTableMetadata(tableId); + if (metadata == null) + throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); + return new PartitionKey(metadata.keyspace, tableId, metadata.partitioner.decorateKey(key)); + } + private static void addSeriesMutations(ImmutableSortedMap prev, ImmutableSortedMap value, SeriesKind kind, @@ -714,7 +1016,9 @@ private static void addSeriesMutations(CommandsForKey original, private static DecoratedKey makeKey(CommandStore commandStore, PartitionKey key) { + Token token = key.token(); ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(commandStore.id(), + serializeToken(token), serializeKey(key)).serializeAsPartitionKey(); return CommandsForKeys.partitioner.decorateKey(pk); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index c79d1ba5b9b8..eac47bfe4e7e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -245,12 +245,15 @@ public static long writes(Writes writes) private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); private static final long EMPTY_CFK_LISTENER = measure(new CommandsForKey.Listener((Key) null)); + private static final long EMPTY_CFR_LISTENER = measure(new CommandsForRanges.Listener(null)); public static long listener(Command.DurableAndIdempotentListener listener) { if (listener instanceof Command.ProxyListener) return EMPTY_COMMAND_LISTENER + timestamp(((Command.ProxyListener) listener).txnId()); if (listener instanceof CommandsForKey.Listener) return EMPTY_CFK_LISTENER + key(((CommandsForKey.Listener) listener).key()); + if (listener instanceof CommandsForRanges.Listener) + return EMPTY_CFR_LISTENER + timestamp(((CommandsForRanges.Listener) listener).txnId); throw new IllegalArgumentException("Unhandled listener type: " + listener.getClass()); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index e1bf80895bc5..5c6ac654f6cd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -18,9 +18,8 @@ package org.apache.cassandra.service.accord; -import java.util.Comparator; import java.util.Map; -import java.util.Objects; +import java.util.NavigableMap; import java.util.function.BiFunction; import javax.annotation.Nullable; @@ -30,15 +29,20 @@ import accord.api.Key; import accord.api.ProgressLog; import accord.impl.AbstractSafeCommandStore; +import accord.impl.CommandTimeseries; +import accord.impl.CommandTimeseries.CommandLoader; +import accord.impl.CommandTimeseriesHolder; import accord.impl.CommandsForKey; import accord.impl.SafeCommandsForKey; +import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; import accord.local.CommonAttributes; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.Status; import accord.primitives.AbstractKeys; -import accord.primitives.Keys; +import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Routables; @@ -51,12 +55,13 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final Map commandsForKeys; + private final NavigableMap commandsForKeys; private final AccordCommandStore commandStore; + CommandsForRanges.Updater rangeUpdates = null; public AccordSafeCommandStore(PreLoadContext context, Map commands, - Map commandsForKey, + NavigableMap commandsForKey, AccordCommandStore commandStore) { super(context); @@ -146,24 +151,59 @@ public long latestEpoch() @Override public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) { - // TODO: Seekables - // TODO: efficiency - return ((Keys)keysOrRanges).stream() - .map(this::maybeCommandsForKey) - .filter(Objects::nonNull) - .map(SafeCommandsForKey::current) - .filter(Objects::nonNull) - .map(CommandsForKey::max) - .max(Comparator.naturalOrder()) - .orElse(Timestamp.NONE); + return mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, null); } - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + @Override + public void registerHistoricalTransactions(Deps deps) + { + // used in places such as accord.local.CommandStore.fetchMajorityDeps + // We find a set of dependencies for a range then update CommandsFor to know about them + CommandStores.RangesForEpochHolder rangesForEpochHolder = commandStore.rangesForEpochHolder(); + Ranges allRanges = rangesForEpochHolder.get().all(); + deps.keyDeps.keys().forEach(allRanges, key -> { + SafeCommandsForKey cfk = commandsForKey(key); + deps.keyDeps.forEach(key, txnId -> { + // TODO (desired, efficiency): this can be made more efficient by batching by epoch + if (rangesForEpochHolder.get().coordinates(txnId).contains(key)) + return; // already coordinates, no need to replicate + if (!rangesForEpochHolder.get().allBefore(txnId.epoch()).contains(key)) + return; + + cfk.registerNotWitnessed(txnId); + }); + }); + CommandsForRanges commandsForRanges = commandStore.commandsForRanges(); + deps.rangeDeps.forEachUniqueTxnId(allRanges, txnId -> { + if (commandsForRanges.containsLocally(txnId)) + return; + + Ranges ranges = deps.rangeDeps.ranges(txnId); + if (rangesForEpochHolder.get().coordinates(txnId).intersects(ranges)) + return; // already coordinates, no need to replicate + if (!rangesForEpochHolder.get().allBefore(txnId.epoch()).intersects(ranges)) + return; + + updateRanges().mergeRemote(txnId, ranges.slice(allRanges), Ranges::with); + }); + } + + private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + { + accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminalValue); + if (accumulate.equals(terminalValue)) + return accumulate; + return mapReduceForKey(keysOrRanges, slice, map, accumulate, terminalValue); + } + + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) { - switch (keysOrRanges.domain()) { + switch (keysOrRanges.domain()) + { default: - throw new AssertionError(); + throw new AssertionError("Unknown domain: " + keysOrRanges.domain()); case Key: + { // TODO: efficiency AbstractKeys keys = (AbstractKeys) keysOrRanges; for (Key key : keys) @@ -174,10 +214,26 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunc if (accumulate.equals(terminalValue)) return accumulate; } - break; + } + break; case Range: - // TODO (required): implement - throw new UnsupportedOperationException(); + { + // Assuming the range provided is in the PreLoadContext, then AsyncLoader has populated commandsForKeys with keys that + // are contained within the ranges... so walk all keys found in commandsForKeys + Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); + if (!context.keys().slice(slice, Routables.Slice.Minimal).containsAll(sliced)) + throw new AssertionError("Range(s) detected not present in the PreLoadContext: expected " + context.keys() + " but given " + keysOrRanges); + for (RoutableKey key : commandsForKeys.keySet()) + { + //TODO (duplicate code): this is a repeat of Key... only change is checking contains in range + if (!sliced.contains(key)) continue; + SafeCommandsForKey forKey = commandsForKey(key); + accumulate = map.apply(forKey.current(), accumulate); + if (accumulate.equals(terminalValue)) + return accumulate; + } + } + break; } return accumulate; } @@ -185,8 +241,8 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunc @Override public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) { - accumulate = mapReduceForKey(keysOrRanges, slice, (forKey, prev) -> { - CommandsForKey.CommandTimeseries timeseries; + accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { + CommandTimeseries timeseries; switch (testTimestamp) { default: throw new AssertionError(); @@ -198,17 +254,17 @@ public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind test case MAY_EXECUTE_BEFORE: timeseries = forKey.byExecuteAt(); } - CommandsForKey.CommandTimeseries.TestTimestamp remapTestTimestamp; + CommandTimeseries.TestTimestamp remapTestTimestamp; switch (testTimestamp) { default: throw new AssertionError(); case STARTED_AFTER: case EXECUTES_AFTER: - remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.AFTER; + remapTestTimestamp = CommandTimeseries.TestTimestamp.AFTER; break; case STARTED_BEFORE: case MAY_EXECUTE_BEFORE: - remapTestTimestamp = CommandsForKey.CommandTimeseries.TestTimestamp.BEFORE; + remapTestTimestamp = CommandTimeseries.TestTimestamp.BEFORE; } return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminalValue); }, accumulate, terminalValue); @@ -227,16 +283,46 @@ public CommonAttributes completeRegistration(Seekables seekables, Ranges r @Override public CommonAttributes completeRegistration(Seekable seekable, Ranges ranges, AccordSafeCommand liveCommand, CommonAttributes attrs) { - Key key = (Key) seekable; - if (ranges.contains(key)) + switch (seekable.domain()) { - AccordSafeCommandsForKey cfk = commandsForKey(key); - cfk.register(liveCommand.current()); - attrs = attrs.mutable().addListener(new CommandsForKey.Listener(key)); + case Key: + { + Key key = seekable.asKey(); + if (ranges.contains(key)) + { + AccordSafeCommandsForKey cfk = commandsForKey(key); + cfk.register(liveCommand.current()); + attrs = attrs.mutable().addListener(new CommandsForKey.Listener(key)); + } + } + break; + case Range: + Range range = seekable.asRange(); + if (!ranges.intersects(range)) + return attrs; + // TODO (api) : cleaner way to deal with this? This is tracked at the Ranges level and not Range level + // but we register at the Range level... + if (!attrs.durableListeners().stream().anyMatch(l -> l instanceof CommandsForRanges.Listener)) + { + CommandsForRanges.Listener listener = new CommandsForRanges.Listener(liveCommand.txnId()); + attrs = attrs.mutable().addListener(listener); + // trigger to allow it to run right away + listener.onChange(this, liveCommand); + } + break; + default: + throw new UnsupportedOperationException("Unknown domain: " + seekable.domain()); } return attrs; } + protected CommandsForRanges.Updater updateRanges() + { + if (rangeUpdates == null) + rangeUpdates = commandStore.updateRanges(); + return rangeUpdates; + } + @Override protected void invalidateSafeState() { @@ -245,7 +331,7 @@ protected void invalidateSafeState() } @Override - public CommandsForKey.CommandLoader cfkLoader() + public CommandLoader cfkLoader() { return CommandsForKeySerializer.loader; } @@ -256,5 +342,7 @@ public void postExecute(Map commands, postExecute(); commands.values().forEach(AccordSafeState::postExecute); commandsForKeys.values().forEach(AccordSafeState::postExecute); + if (rangeUpdates != null) + rangeUpdates.apply(); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 4de78375bd14..3a979c133296 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -141,7 +141,7 @@ private AccordService() messageSink, configService, AccordService::uniqueNow, - () -> null, + () -> AccordDataStore.INSTANCE, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), agent, new DefaultRandom(), @@ -302,6 +302,12 @@ public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedExcep ExecutorUtils.shutdownAndWait(timeout, unit, this); } + @VisibleForTesting + public Node node() + { + return node; + } + private static Shutdownable toShutdownable(Node node) { return new Shutdownable() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 2bb852eb5411..1cb4fb5a4503 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.function.Function; import java.util.function.ToLongFunction; +import java.util.stream.Stream; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -352,6 +353,13 @@ public Instance(Class keyClass, Class valClass, Function> stream() + { + return cache.entrySet().stream() + .filter(e -> keyClass.isAssignableFrom(e.getKey().getClass())) + .map(e -> (Node) e.getValue()); + } + @Override public boolean equals(Object o) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 216d64df42e1..a35737040d9b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -43,6 +43,19 @@ public AccordVerbHandler(Node node) public void doVerb(Message message) throws IOException { logger.debug("Receiving {} from {}", message.payload, message.from()); - message.payload.process(node, EndpointMapping.getId(message.from()), message); + T request = message.payload; + Node.Id from = EndpointMapping.getId(message.from()); + long knownEpoch = request.knownEpoch(); + if (!node.topology().hasEpoch(knownEpoch)) + { + node.configService().fetchTopologyForEpoch(knownEpoch); + long waitForEpoch = request.waitForEpoch(); + if (!node.topology().hasEpoch(waitForEpoch)) + { + node.withEpoch(waitForEpoch, () -> request.process(node, from, message)); + return; + } + } + request.process(node, from, message); } } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java new file mode 100644 index 000000000000..3eb3bd08324c --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -0,0 +1,515 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.BiFunction; +import java.util.function.Function; +import javax.annotation.Nullable; + +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedMap; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.impl.CommandTimeseries; +import accord.impl.CommandTimeseriesHolder; +import accord.local.Command; +import accord.local.PreLoadContext; +import accord.local.SafeCommand; +import accord.local.SafeCommandStore; +import accord.local.SaveStatus; +import accord.primitives.AbstractKeys; +import accord.primitives.PartialDeps; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.RoutableKey; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.Interval; +import org.apache.cassandra.utils.IntervalTree; + +public class CommandsForRanges +{ + public enum TxnType + { + UNKNOWN, LOCAL, REMOTE; + + private boolean isSafeToMix(TxnType other) + { + if (this == UNKNOWN || other == UNKNOWN) return true; + return this == other; + } + } + + public static final class RangeCommandSummary implements Comparable + { + public final TxnId txnId; + public final Ranges ranges; + public final SaveStatus status; + public final @Nullable Timestamp executeAt; + public final List deps; + + RangeCommandSummary(TxnId txnId, Ranges ranges, SaveStatus status, @Nullable Timestamp executeAt, List deps) + { + this.txnId = txnId; + this.ranges = ranges; + this.status = status; + this.executeAt = executeAt; + this.deps = deps; + } + + public boolean equalsDeep(RangeCommandSummary other) + { + return Objects.equals(txnId, other.txnId) + && Objects.equals(ranges, other.ranges) + && Objects.equals(status, other.status) + && Objects.equals(executeAt, other.executeAt) + && Objects.equals(deps, other.deps); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RangeCommandSummary that = (RangeCommandSummary) o; + return txnId.equals(that.txnId); + } + + @Override + public int hashCode() + { + return Objects.hash(txnId); + } + + @Override + public String toString() + { + return "RangeCommandSummary{" + + "txnId=" + txnId + + ", status=" + status + + ", ranges=" + ranges + + '}'; + } + + public RangeCommandSummary withRanges(Ranges ranges, BiFunction remappingFunction) + { + return new RangeCommandSummary(txnId, remappingFunction.apply(this.ranges, ranges), status, executeAt, deps); + } + + @Override + public int compareTo(RangeCommandSummary other) + { + // Used in IntervalTree with the expecation that compareTo uniquely identifies an RangeCommandSummary + return txnId.compareTo(other.txnId); + } + } + + private enum RangeCommandSummaryLoader implements CommandTimeseries.CommandLoader + { + INSTANCE; + + @Override + public RangeCommandSummary saveForCFK(Command command) + { + //TODO split write from read? + throw new UnsupportedOperationException(); + } + + @Override + public TxnId txnId(RangeCommandSummary data) + { + return data.txnId; + } + + @Override + public Timestamp executeAt(RangeCommandSummary data) + { + return data.executeAt; + } + + @Override + public SaveStatus saveStatus(RangeCommandSummary data) + { + return data.status; + } + + @Override + public List depsIds(RangeCommandSummary data) + { + return data.deps; + } + } + + public static abstract class AbstractBuilder> + { + protected final Set localTxns = new HashSet<>(); + protected final TreeMap txnToRange = new TreeMap<>(); + protected final IntervalTree.Builder> rangeToTxn = new IntervalTree.Builder<>(); + + public TxnType type(TxnId txnId) + { + if (!txnToRange.containsKey(txnId)) return TxnType.UNKNOWN; + return localTxns.contains(txnId) ? TxnType.LOCAL : TxnType.REMOTE; + } + + public T put(TxnId txnId, Ranges ranges, SaveStatus status, Timestamp execteAt, List dependsOn) + { + remove(txnId); + RangeCommandSummary summary = new RangeCommandSummary(txnId, ranges, status, execteAt, dependsOn); + localTxns.add(txnId); + txnToRange.put(txnId, summary); + addRanges(summary); + return (T) this; + } + + private void addRanges(RangeCommandSummary summary) + { + for (Range range : summary.ranges) + { + rangeToTxn.add(Interval.create(normalize(range.start(), range.startInclusive(), true), + normalize(range.end(), range.endInclusive(), false), + summary)); + } + } + + public T putAll(CommandsForRanges other) + { + for (TxnId id : other.localCommands) + { + TxnType thisType = type(id); + TxnType otherType = other.type(id); + Invariants.checkArgument(thisType.isSafeToMix(otherType), "Attempted to add %s; expected %s but was %s", id, thisType, otherType); + } + localTxns.addAll(other.localCommands); + txnToRange.putAll(other.commandsToRanges); + // If "put" was called before for a txn present in "other", to respect the "put" semantics that update must + // be removed from "rangeToTxn" (as it got removed from "txnToRange"). + // The expected common case is that this method is called on an empty builder, so the removeIf is off an + // empty list (aka no-op) + rangeToTxn.removeIf(data -> other.commandsToRanges.containsKey(data.txnId)); + rangeToTxn.addAll(other.rangesToCommands); + return (T) this; + } + + public T mergeRemote(TxnId txnId, Ranges ranges, BiFunction remappingFunction) + { + // TODO (durability) : remote ranges are not made durable for now. If this command is stored in commands table, + // then we have a NotWitnessed command with Ranges, which is not expected in accord.local.Command.NotWitnessed. + // To properly handle this, the long term storage looks like it will need to store these as well. + Invariants.checkArgument(!localTxns.contains(txnId), "Attempted to merge remote txn %s, but this is a local txn", txnId); + // accord.impl.CommandTimeseries.mapReduce does the check on status and deps type, and NotWitnessed should match the semantics hard coded in InMemorySafeStore... + // in that store, the remote history is only ever included when minStauts == null and deps == ANY... but mapReduce sees accord.local.Status.KnownDeps.hasProposedOrDecidedDeps == false + // as a mis-match, so will be excluded... since NotWitnessed will return false it will only be included IFF deps = ANY. + // When it comes to the minStatus check, the current usage is "null", "Committed", "Accepted"... so NotWitnessed will only be included in the null case; + // the only subtle difference is if minStatus = NotWitnessed, this API will include these but InMemoryStore won't + RangeCommandSummary oldValue = txnToRange.get(txnId); + RangeCommandSummary newValue = oldValue == null ? + new RangeCommandSummary(txnId, ranges, SaveStatus.NotWitnessed, null, Collections.emptyList()) + : oldValue.withRanges(ranges, remappingFunction); + if (newValue == null) + { + remove(txnId); + } + else if (!oldValue.equalsDeep(newValue)) + { + // changes detected... have to update range index + rangeToTxn.removeIf(data -> data.txnId.equals(txnId)); + addRanges(newValue); + } + return (T) this; + } + + public T remove(TxnId txnId) + { + if (txnToRange.containsKey(txnId)) + { + localTxns.remove(txnId); + txnToRange.remove(txnId); + rangeToTxn.removeIf(data -> data.txnId.equals(txnId)); + } + return (T) this; + } + } + + public static class Builder extends AbstractBuilder + { + public CommandsForRanges build() + { + CommandsForRanges cfr = new CommandsForRanges(); + cfr.set(this); + return cfr; + } + } + + public class Updater extends AbstractBuilder + { + private Updater() + { + putAll(CommandsForRanges.this); + } + + public void apply() + { + CommandsForRanges.this.set(this); + } + } + + public static class Listener implements Command.DurableAndIdempotentListener + { + public final TxnId txnId; + private transient SaveStatus saveStatus; + + public Listener(TxnId txnId) + { + this.txnId = txnId; + } + + @Override + public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) + { + Command current = safeCommand.current(); + if (current.saveStatus() == saveStatus) + return; + saveStatus = current.saveStatus(); + PartialDeps deps = current.partialDeps(); + if (deps == null) + return; + Seekables keysOrRanges = current.partialTxn().keys(); + Invariants.checkArgument(keysOrRanges.domain() == Routable.Domain.Range, "Expected txn %s to be a Range txn, but was a %s", txnId, keysOrRanges.domain()); + + List dependsOn = deps.txnIds(); + ((AccordSafeCommandStore) safeStore).updateRanges() + .put(txnId, (Ranges) keysOrRanges, current.saveStatus(), current.executeAt(), dependsOn); + } + + @Override + public PreLoadContext listenerPreLoadContext(TxnId caller) + { + return caller.equals(txnId) ? PreLoadContext.contextFor(txnId) : PreLoadContext.contextFor(txnId, Collections.singletonList(caller)); + } + + @Override + public String toString() + { + return "Listener{" + + "txnId=" + txnId + + ", saveStatus=" + saveStatus + + '}'; + } + } + + private ImmutableSet localCommands; + private ImmutableSortedMap commandsToRanges; + private IntervalTree> rangesToCommands; + + public CommandsForRanges() + { + localCommands = ImmutableSet.of(); + commandsToRanges = ImmutableSortedMap.of(); + rangesToCommands = IntervalTree.emptyTree(); + } + + private void set(AbstractBuilder builder) + { + this.localCommands = ImmutableSet.copyOf(builder.localTxns); + this.commandsToRanges = ImmutableSortedMap.copyOf(builder.txnToRange); + this.rangesToCommands = builder.rangeToTxn.build(); + } + + public TxnType type(TxnId txnId) + { + if (!commandsToRanges.containsKey(txnId)) return TxnType.UNKNOWN; + return localCommands.contains(txnId) ? TxnType.LOCAL : TxnType.REMOTE; + } + + public boolean containsLocally(TxnId txnId) + { + return localCommands.contains(txnId); + } + + public Iterable search(AbstractKeys keys) + { + // group by the keyspace, as ranges are based off TokenKey, which is scoped to a range + Map> groupByKeyspace = new TreeMap<>(); + for (Key key : keys) + groupByKeyspace.computeIfAbsent(((PartitionKey) key).keyspace(), ignore -> new ArrayList<>()).add(key); + return () -> new AbstractIterator() + { + Iterator ksIt = groupByKeyspace.keySet().iterator(); + Iterator>> rangeIt; + + @Override + protected CommandTimeseriesHolder computeNext() + { + while (true) + { + if (rangeIt != null && rangeIt.hasNext()) + { + Map.Entry> next = rangeIt.next(); + return result(next.getKey(), next.getValue()); + } + rangeIt = null; + if (!ksIt.hasNext()) + { + ksIt = null; + return endOfData(); + } + String ks = ksIt.next(); + List keys = groupByKeyspace.get(ks); + Map> groupByRange = new TreeMap<>(Range::compare); + for (Key key : keys) + { + List> matches = rangesToCommands.matches(key); + if (matches.isEmpty()) + continue; + for (Interval interval : matches) + groupByRange.computeIfAbsent(toRange(interval), ignore -> new HashSet<>()).add(interval.data); + } + rangeIt = groupByRange.entrySet().iterator(); + } + } + }; + } + + private static Range toRange(Interval interval) + { + TokenKey start = (TokenKey) interval.min; + TokenKey end = (TokenKey) interval.max; + // TODO (correctness) : accord doesn't support wrap around, so decreaseSlightly may fail in some cases + // TODO (correctness) : this logic is mostly used for testing, so is it actually safe for all partitioners? + return new TokenRange(start.withToken(start.token().decreaseSlightly()), end); + } + + @Nullable + public CommandTimeseriesHolder search(Range range) + { + List matches = rangesToCommands.search(Interval.create(normalize(range.start(), range.startInclusive(), true), + normalize(range.end(), range.endInclusive(), false))); + return result(range, matches); + } + + private CommandTimeseriesHolder result(Seekable seekable, Collection matches) + { + if (matches.isEmpty()) + return null; + return new Holder(seekable, matches); + } + + public int size() + { + return rangesToCommands.intervalCount(); + } + + public Updater update() + { + return new Updater(); + } + + @Override + public String toString() + { + return rangesToCommands.unbuild().toString(); + } + + private static RoutingKey normalize(RoutingKey key, boolean inclusive, boolean upOrDown) + { + if (inclusive) return key; + AccordRoutingKey ak = (AccordRoutingKey) key; + switch (ak.kindOfRoutingKey()) + { + case SENTINEL: + return normalize(ak.asSentinelKey().toTokenKey(), inclusive, upOrDown); + case TOKEN: + TokenKey tk = ak.asTokenKey(); + return tk.withToken(upOrDown ? tk.token().nextValidToken() : tk.token().decreaseSlightly()); + default: + throw new IllegalArgumentException("Unknown kind: " + ak.kindOfRoutingKey()); + } + } + + private static class Holder implements CommandTimeseriesHolder + { + private final Seekable keyOrRange; + private final Collection matches; + + private Holder(Seekable keyOrRange, Collection matches) + { + this.keyOrRange = keyOrRange; + this.matches = matches; + } + + @Override + public CommandTimeseries byId() + { + return build(m -> m.txnId); + } + + @Override + public CommandTimeseries byExecuteAt() + { + return build(m -> m.executeAt != null ? m.executeAt : m.txnId); + } + + @Override + public Timestamp max() + { + return byExecuteAt().maxTimestamp(); + } + + private CommandTimeseries build(Function fn) + { + CommandTimeseries.Update builder = new CommandTimeseries.Update<>(keyOrRange, RangeCommandSummaryLoader.INSTANCE); + for (RangeCommandSummary m : matches) + { + if (m.status == SaveStatus.Invalidated) + continue; + builder.add(fn.apply(m), m); + } + return builder.build(); + } + + @Override + public String toString() + { + return "Holder{" + + "keyOrRange=" + keyOrRange + + ", matches=" + matches + + '}'; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 7054fb3ba949..e7e65ee36594 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -45,7 +45,7 @@ public abstract class AccordRoutingKey extends AccordRoutableKey implements RoutingKey { - enum RoutingKeyKind + public enum RoutingKeyKind { TOKEN, SENTINEL } @@ -58,6 +58,16 @@ protected AccordRoutingKey(String keyspace) public abstract RoutingKeyKind kindOfRoutingKey(); public abstract long estimatedSizeOnHeap(); + public SentinelKey asSentinelKey() + { + return (SentinelKey) this; + } + + public TokenKey asTokenKey() + { + return (TokenKey) this; + } + public static AccordRoutingKey of(Key key) { return (AccordRoutingKey) key; @@ -191,6 +201,11 @@ public TokenKey(String keyspace, Token token) this.token = token; } + public TokenKey withToken(Token token) + { + return new TokenKey(keyspace, token); + } + @Override public Token token() { diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 0a486ee40245..a8bff385d4df 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -23,31 +23,41 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterables; +import com.google.common.collect.ImmutableSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.RoutingKey; import accord.impl.CommandsForKey; import accord.local.Command; import accord.local.PreLoadContext; +import accord.primitives.Range; +import accord.primitives.Ranges; import accord.primitives.RoutableKey; +import accord.primitives.Seekables; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; +import accord.utils.async.Observable; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordLoadingState; import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; public class AsyncLoader @@ -65,15 +75,15 @@ enum State private final AccordCommandStore commandStore; private final Iterable txnIds; - private final Iterable keys; + private final Seekables keysOrRanges; protected AsyncResult readResult; - public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Iterable keys) + public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Seekables keysOrRanges) { this.commandStore = commandStore; this.txnIds = txnIds; - this.keys = keys; + this.keysOrRanges = keysOrRanges; } protected static Iterable txnIds(PreLoadContext context) @@ -85,7 +95,7 @@ protected static Iterable txnIds(PreLoadContext context) return Iterables.concat(Collections.singleton(primaryid), additionalIds); } - private > void referenceAndAssembleReads(Iterable keys, + private > void referenceAndAssembleReads(Iterable keys, Map context, AccordStateCache.Instance cache, Function loadFunction, @@ -140,13 +150,24 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) loadCommandFunction(), readRunnables, chains); - - referenceAndAssembleReads(keys, - context.commandsForKeys, - commandStore.commandsForKeyCache(), - loadCommandsPerKeyFunction(), - readRunnables, - chains); + switch (keysOrRanges.domain()) + { + case Key: + // cast to Keys fails... + Iterable keys = (Iterable) keysOrRanges; + referenceAndAssembleReads(keys, + context.commandsForKeys, + commandStore.commandsForKeyCache(), + loadCommandsPerKeyFunction(), + readRunnables, + chains); + break; + case Range: + chains.add(referenceAndDispatchReadsForRange(context)); + break; + default: + throw new UnsupportedOperationException("Unable to process keys of " + keysOrRanges.domain()); + } if (chains.isEmpty()) { @@ -161,6 +182,66 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) return !chains.isEmpty() ? AsyncChains.reduce(chains, (a, b) -> null).beginAsResult() : null; } + private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) + { + AsyncChain> overlappingKeys = findOverlappingKeys((Ranges) keysOrRanges); + return overlappingKeys.flatMap(keys -> { + if (keys.isEmpty()) + return AsyncChains.success(null); + // TODO (duplicate code): repeat of referenceAndDispatchReads + List readRunnables = new ArrayList<>(); + List> chains = new ArrayList<>(); + referenceAndAssembleReads(keys, + context.commandsForKeys, + commandStore.commandsForKeyCache(), + loadCommandsPerKeyFunction(), + readRunnables, + chains); + // all keys are already loaded + if (chains.isEmpty()) + return AsyncChains.success(null); + // runnable results are already contained in the chains collection + if (!readRunnables.isEmpty()) + AsyncChains.ofRunnables(Stage.READ.executor(), readRunnables).begin(commandStore.agent()); + return AsyncChains.reduce(chains, (a, b) -> null); + }, commandStore); + } + + private AsyncChain> findOverlappingKeys(Ranges ranges) + { + assert !ranges.isEmpty(); + + List>> chains = new ArrayList<>(ranges.size()); + for (Range range : ranges) + chains.add(findOverlappingKeys(range)); + return AsyncChains.reduce(chains, (a, b) -> ImmutableSet.builder().addAll(a).addAll(b).build()); + } + + private AsyncChain> findOverlappingKeys(Range range) + { + Set cached = commandStore.commandsForKeyCache().stream() + .map(n -> (PartitionKey) n.key()) + .filter(range::contains) + .collect(Collectors.toSet()); + // save to a variable as java gets confused when `.map` is called on the result of asChain + AsyncChain> map = Observable.asChain(callback -> + AccordKeyspace.findAllKeysBetween(commandStore.id(), + toTokenKey(range.start()).token(), range.startInclusive(), + toTokenKey(range.end()).token(), range.endInclusive(), + callback), + Collectors.toSet()); + return map.map(s -> ImmutableSet.builder().addAll(s).addAll(cached).build()); + } + + private static TokenKey toTokenKey(RoutingKey start) + { + if (start instanceof TokenKey) + return (TokenKey) start; + if (start instanceof AccordRoutingKey.SentinelKey) + return ((AccordRoutingKey.SentinelKey) start).toTokenKey(); + throw new IllegalArgumentException(String.format("Unable to convert RoutingKey %s (type %s) to TokenKey", start, start.getClass())); + } + @VisibleForTesting void state(State state) { @@ -169,7 +250,7 @@ void state(State state) public boolean load(AsyncOperation.Context context, BiConsumer callback) { - logger.trace("Running load for {} with state {}: {} {}", callback, state, txnIds, keys); + logger.trace("Running load for {} with state {}: {} {}", callback, state, txnIds, keysOrRanges); commandStore.checkInStoreThread(); switch (state) { @@ -200,7 +281,7 @@ public boolean load(AsyncOperation.Context context, BiConsumer commands = new HashMap<>(); - final HashMap commandsForKeys = new HashMap<>(); + final TreeMap commandsForKeys = new TreeMap<>(); void releaseResources(AccordCommandStore commandStore) { @@ -145,7 +145,7 @@ AsyncWriter createAsyncWriter(AccordCommandStore commandStore) AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), toRoutableKeys(preLoadContext.keys())); + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()); } @VisibleForTesting @@ -268,7 +268,6 @@ protected void runInternal() } } - @Override public void run() { @@ -307,19 +306,6 @@ public void start(BiConsumer callback) commandStore.executor().execute(this); } - private static Iterable toRoutableKeys(Seekables keys) - { - switch (keys.domain()) - { - default: throw new AssertionError("Unexpected domain: " + keys.domain()); - case Key: - return (Iterable) keys; - case Range: - // TODO (required): implement - throw new UnsupportedOperationException(); - } - } - static class ForFunction extends AsyncOperation { private final Function function; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index f65b1fb43788..48ec6fae6c6d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -25,7 +25,7 @@ import com.google.common.annotations.VisibleForTesting; -import accord.impl.CommandsForKey.CommandLoader; +import accord.impl.CommandTimeseries.CommandLoader; import accord.local.Command; import accord.local.SaveStatus; import accord.primitives.PartialDeps; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java index 4581ab8cadbf..f649a3b32520 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java @@ -26,13 +26,14 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.CommandsForRanges; import org.apache.cassandra.service.accord.api.PartitionKey; public class ListenerSerializers { public enum Kind { - COMMAND, COMMANDS_FOR_KEY; + COMMAND, COMMANDS_FOR_KEY, COMMANDS_FOR_RANGE; private static Kind of(Command.DurableAndIdempotentListener listener) { @@ -42,6 +43,9 @@ private static Kind of(Command.DurableAndIdempotentListener listener) if (listener instanceof CommandsForKey.Listener) return COMMANDS_FOR_KEY; + if (listener instanceof CommandsForRanges.Listener) + return COMMANDS_FOR_RANGE; + throw new IllegalArgumentException("Unsupported listener type: " + listener.getClass().getName()); } } @@ -68,6 +72,27 @@ public long serializedSize(Command.ProxyListener listener, int version) } }; + private static final IVersionedSerializer cfrListener = new IVersionedSerializer() + { + @Override + public void serialize(CommandsForRanges.Listener listener, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(listener.txnId, out, version); + } + + @Override + public CommandsForRanges.Listener deserialize(DataInputPlus in, int version) throws IOException + { + return new CommandsForRanges.Listener(CommandSerializers.txnId.deserialize(in, version)); + } + + @Override + public long serializedSize(CommandsForRanges.Listener listener, int version) + { + return CommandSerializers.txnId.serializedSize(listener.txnId, version); + } + }; + private static final IVersionedSerializer cfkListener = new IVersionedSerializer() { @Override @@ -104,6 +129,9 @@ public void serialize(Command.DurableAndIdempotentListener listener, DataOutputP case COMMANDS_FOR_KEY: cfkListener.serialize((CommandsForKey.Listener) listener, out, version); break; + case COMMANDS_FOR_RANGE: + cfrListener.serialize((CommandsForRanges.Listener) listener, out, version); + break; default: throw new IllegalArgumentException(); } @@ -119,6 +147,8 @@ public Command.DurableAndIdempotentListener deserialize(DataInputPlus in, int ve return commandListener.deserialize(in, version); case COMMANDS_FOR_KEY: return cfkListener.deserialize(in, version); + case COMMANDS_FOR_RANGE: + return cfrListener.deserialize(in, version); default: throw new IllegalArgumentException(); } @@ -137,6 +167,9 @@ public long serializedSize(Command.DurableAndIdempotentListener listener, int ve case COMMANDS_FOR_KEY: size += cfkListener.serializedSize((CommandsForKey.Listener) listener, version); break; + case COMMANDS_FOR_RANGE: + size += cfrListener.serializedSize((CommandsForRanges.Listener) listener, version); + break; default: throw new IllegalArgumentException(); } diff --git a/src/java/org/apache/cassandra/utils/IntervalTree.java b/src/java/org/apache/cassandra/utils/IntervalTree.java index bde80c5dfe0f..53f0fb716518 100644 --- a/src/java/org/apache/cassandra/utils/IntervalTree.java +++ b/src/java/org/apache/cassandra/utils/IntervalTree.java @@ -25,6 +25,9 @@ import java.util.Deque; import java.util.Iterator; import java.util.List; +import java.util.function.BiPredicate; +import java.util.function.Consumer; +import java.util.function.Predicate; import com.google.common.base.Joiner; import com.google.common.collect.Iterables; @@ -118,6 +121,16 @@ public static , D extends Comparable, return EMPTY_TREE; } + public static , D extends Comparable, I extends Interval> Builder builder() + { + return new Builder<>(); + } + + public Builder unbuild() + { + return new Builder().addAll(this); + } + public int intervalCount() { return intervalsByMinOrder.length; @@ -144,13 +157,28 @@ public C min() return head.low; } + public List> matches(Interval searchInterval) + { + if (head == null) + return Collections.emptyList(); + + List> results = new ArrayList<>(); + head.searchInternal(searchInterval, i -> results.add(i)); + return results; + } + + public List> matches(C point) + { + return matches(Interval.create(point, point, null)); + } + public List search(Interval searchInterval) { if (head == null) return Collections.emptyList(); List results = new ArrayList(); - head.searchInternal(searchInterval, results); + head.searchInternal(searchInterval, i -> results.add(i.data)); return results; } @@ -429,7 +457,7 @@ else if (candidate.min.compareTo(center) > 0) } - void searchInternal(Interval searchInterval, List results) + void searchInternal(Interval searchInterval, Consumer> results) { if (center.compareTo(searchInterval.min) < 0) { @@ -438,7 +466,7 @@ void searchInternal(Interval searchInterval, List results) return; while (i < intersectsRight.size()) - results.add(intersectsRight.get(i++).data); + results.accept(intersectsRight.get(i++)); if (right != null) right.searchInternal(searchInterval, results); @@ -450,7 +478,7 @@ else if (center.compareTo(searchInterval.max) > 0) return; for (int i = 0 ; i < j ; i++) - results.add(intersectsLeft.get(i).data); + results.accept(intersectsLeft.get(i)); if (left != null) left.searchInternal(searchInterval, results); @@ -460,7 +488,7 @@ else if (center.compareTo(searchInterval.max) > 0) // Adds every interval contained in this node to the result set then search left and right for further // overlapping intervals for (Interval interval : intersectsLeft) - results.add(interval.data); + results.accept(interval); if (left != null) left.searchInternal(searchInterval, results); @@ -510,4 +538,55 @@ private void gotoMinOf(IntervalNode node) } } + + public static class Builder, D extends Comparable, I extends Interval> + { + private final List intervals = new ArrayList<>(); + + public Builder addAll(IntervalTree other) + { + other.forEach(intervals::add); + return this; + } + + public Builder add(I interval) + { + intervals.add(interval); + return this; + } + + public interface TriPredicate + { + boolean test(A a, B b, C c); + } + + public Builder removeIf(TriPredicate predicate) + { + intervals.removeIf(i -> predicate.test(i.min, i.max, i.data)); + return this; + } + + public Builder removeIf(BiPredicate predicate) + { + intervals.removeIf(i -> predicate.test(i.min, i.max)); + return this; + } + + public Builder removeIf(Predicate predicate) + { + intervals.removeIf(i -> predicate.test(i.data)); + return this; + } + + public IntervalTree build() + { + return IntervalTree.build(intervals); + } + + @Override + public String toString() + { + return intervals.toString(); + } + } } \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 7e6ecc67c23b..8004a0eef316 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -26,15 +26,14 @@ import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.cassandra.distributed.Cluster; import org.assertj.core.api.Assertions; + import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -50,7 +49,6 @@ import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.UTF8Type; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.QueryResults; @@ -142,14 +140,8 @@ public void testMultipleShards() throws Exception String currentTable = keyspace + ".tbl"; List ddls = Arrays.asList("CREATE KEYSPACE " + keyspace + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}", "CREATE TABLE " + currentTable + " (k blob, c int, v int, primary key (k, c))"); - List tokens = SHARED_CLUSTER.stream() - .flatMap(i -> StreamSupport.stream(Splitter.on(",").split(i.config().getString("initial_token")).spliterator(), false)) - .collect(Collectors.toList()); - - List keys = tokens.stream() - .map(t -> (Murmur3Partitioner.LongToken) Murmur3Partitioner.instance.getTokenFactory().fromString(t)) - .map(Murmur3Partitioner.LongToken::keyForToken) - .collect(Collectors.toList()); + List tokens = tokens(); + List keys = tokensToKeys(tokens); List keyStrings = keys.stream().map(bb -> "0x" + ByteBufferUtil.bytesToHex(bb)).collect(Collectors.toList()); StringBuilder query = new StringBuilder("BEGIN TRANSACTION\n"); @@ -159,12 +151,12 @@ public void testMultipleShards() throws Exception query.append(" SELECT row0.v;\n") .append(" IF "); - for (int i = 0; i < keys.size(); i++) + for (int i = 0; i < keyStrings.size(); i++) query.append((i > 0 ? " AND row" : "row") + i + " IS NULL"); query.append(" THEN\n"); - for (int i = 0; i < keys.size(); i++) + for (int i = 0; i < keyStrings.size(); i++) query.append(" INSERT INTO " + currentTable + " (k, c, v) VALUES (" + keyStrings.get(i) + ", 0, " + i +");\n"); query.append(" END IF\n"); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index f6ae047fab2b..261620705e7e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -19,6 +19,7 @@ package org.apache.cassandra.distributed.test.accord; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -26,7 +27,9 @@ import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import com.google.common.base.Splitter; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -42,6 +45,7 @@ import org.apache.cassandra.cql3.statements.ModificationStatement; import org.apache.cassandra.cql3.statements.TransactionStatement; import org.apache.cassandra.cql3.transactions.ReferenceValue; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.Feature; @@ -56,6 +60,7 @@ import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FailingConsumer; +import org.apache.cassandra.utils.Shared; import static net.bytebuddy.matcher.ElementMatchers.named; import static net.bytebuddy.matcher.ElementMatchers.takesArguments; @@ -66,6 +71,12 @@ public abstract class AccordTestBase extends TestBaseImpl private static final Logger logger = LoggerFactory.getLogger(AccordTestBase.class); private static final int MAX_RETRIES = 10; + @Shared + public static class State + { + public static AtomicInteger coordinateCounts = new AtomicInteger(); + } + protected static final AtomicInteger COUNTER = new AtomicInteger(0); protected static Cluster SHARED_CLUSTER; @@ -128,7 +139,7 @@ protected void test(FailingConsumer fn) throws Exception protected int getAccordCoordinateCount() { - return SHARED_CLUSTER.get(1).callOnInstance(() -> BBAccordCoordinateCountHelper.count.get()); + return State.coordinateCounts.get(); } private static Cluster createCluster() throws IOException @@ -137,7 +148,7 @@ private static Cluster createCluster() throws IOException // disable vnode for now, but should enable before trunk return init(Cluster.build(2) .withoutVNodes() - .withConfig(c -> c.with(Feature.NETWORK).set("write_request_timeout", "10s") + .withConfig(c -> c.with(Feature.NETWORK, Feature.GOSSIP).set("write_request_timeout", "10s") .set("transaction_timeout", "15s") .set("legacy_paxos_strategy", "migration")) // TODO: switch back to "accord" when TrM integration works .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install) @@ -188,7 +199,7 @@ private SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, SimpleQue return result; } - private SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String check, Object... boundValues) + private static SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String check, Object... boundValues) { try { @@ -206,7 +217,7 @@ private SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String c } } - protected SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) + protected static SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) { check = wrapInTxn(check); @@ -218,7 +229,7 @@ protected SimpleQueryResult executeWithRetry(Cluster cluster, String check, Obje return executeWithRetry0(0, cluster, check, boundValues); } - private boolean isIdempotent(Cluster cluster, String cql) + private static boolean isIdempotent(Cluster cluster, String cql) { return cluster.get(1).callOnInstance(() -> { TransactionStatement stmt = AccordTestUtils.parse(cql); @@ -257,6 +268,21 @@ private static boolean isIdempotent(ModificationStatement update) return numConstants == 0; } + static List tokens() + { + return SHARED_CLUSTER.stream() + .flatMap(i -> StreamSupport.stream(Splitter.on(",").split(i.config().getString("initial_token")).spliterator(), false)) + .collect(Collectors.toList()); + } + + static List tokensToKeys(List tokens) + { + return tokens.stream() + .map(t -> (Murmur3Partitioner.LongToken) Murmur3Partitioner.instance.getTokenFactory().fromString(t)) + .map(Murmur3Partitioner.LongToken::keyForToken) + .collect(Collectors.toList()); + } + public static class EnforceUpdateDoesNotPerformRead { public static void install(ClassLoader classLoader, Integer num) @@ -285,7 +311,6 @@ public static void install(ClassLoader classLoader, Integer num) public static class BBAccordCoordinateCountHelper { - static AtomicInteger count = new AtomicInteger(); static void install(ClassLoader cl, int nodeNumber) { if (nodeNumber != 1) @@ -299,7 +324,7 @@ static void install(ClassLoader cl, int nodeNumber) public static TxnData coordinate(Txn txn, @SuperCall Callable actual) throws Exception { - count.incrementAndGet(); + State.coordinateCounts.incrementAndGet(); return actual.call(); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java new file mode 100644 index 000000000000..196638eb869e --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.nio.ByteBuffer; +import java.util.List; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.service.accord.AccordService; + +public class NewSchemaTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(NewSchemaTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @Test + public void test() + { + for (int i = 0; i < 20; i++) + { + String ks = "ks" + i; + String table = ks + ".tbl" + i; + SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + ks + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}"); + SHARED_CLUSTER.schemaChange(String.format("CREATE TABLE %s (pk blob primary key)", table)); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); + + List keys = tokensToKeys(tokens()); + + read(table, keys).exec(); + } + } + + private static Query read(String table, List keys) + { + assert !keys.isEmpty(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < keys.size(); i++) + sb.append("let row").append(i).append(" = (select * from ").append(table).append(" where pk = ?);\n"); + sb.append("SELECT row0.pk;"); + return new Query(sb.toString(), keys.toArray()); + } + + private static class Query + { + final String cql; + final Object[] binds; + + private Query(String cql, Object[] binds) + { + this.cql = cql; + this.binds = binds; + } + + SimpleQueryResult exec() + { + return executeWithRetry(SHARED_CLUSTER, cql, binds); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index db0dbd6af55c..4ffdf8db7ba4 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord.async; -import java.util.Collections; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -34,6 +33,7 @@ import accord.impl.CommandsForKey; import accord.local.Command; +import accord.primitives.Keys; import accord.primitives.PartialTxn; import accord.primitives.RoutableKey; import accord.primitives.TxnId; @@ -102,7 +102,7 @@ public void cachedTest() testLoad(safeCfk, commandsForKey(key)); cfkCache.release(safeCfk); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { @@ -143,7 +143,7 @@ public void loadTest() AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -192,7 +192,7 @@ public void partialLoadTest() AccordKeyspace.getCommandsForKeyMutation(commandStore, safeCfk, commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -244,7 +244,7 @@ public void inProgressLoadTest() throws Throwable testLoad(safeCfk, commandsForKey(key)); cfkCache.release(safeCfk); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), singleton(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); // since there's a read future associated with the txnId, we'll wait for it to load AsyncPromise cbFired = new AsyncPromise<>(); @@ -291,7 +291,7 @@ public void failedLoadTest() throws Throwable execute(commandStore, () -> { AtomicInteger loadCalls = new AtomicInteger(); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Collections.emptyList()){ + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY){ @Override Function loadCommandFunction() diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 22f562146fc2..f3802d2bf291 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -251,7 +251,7 @@ private AccordStateCache.Instance cache() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), (Iterable) preLoadContext.keys()) { + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) { @Override void state(State state) @@ -333,7 +333,7 @@ public void loadFail() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), (Iterable) preLoadContext.keys()) + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) { @Override Function loadCommandFunction() diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 547b03c10ce9..13485f90b3fc 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -24,7 +24,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import accord.impl.CommandsForKey; +import accord.impl.CommandTimeseries; import accord.primitives.TxnId; import accord.utils.AccordGens; import accord.utils.Gens; @@ -75,7 +75,7 @@ public void serdeDeps() @Test public void serde() { - CommandsForKey.CommandLoader loader = CommandsForKeySerializer.loader; + CommandTimeseries.CommandLoader loader = CommandsForKeySerializer.loader; qt().forAll(AccordGenerators.commands()).check(cmd -> { ByteBuffer bb = loader.saveForCFK(cmd); int size = bb.remaining(); From 8f156fc5dc2ba0d4a79446123763bd73f93060d8 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Fri, 2 Jun 2023 11:45:41 +0100 Subject: [PATCH 059/340] CEP-15: Extend Accord MessageType with a side effect flag patch by Aleksey Yeschenko; reviewed by Benedic Elliott Smith for CASSANDRA-18561 --- modules/accord | 2 +- src/java/org/apache/cassandra/net/Verb.java | 14 +++++++------- .../cassandra/service/accord/AccordJournal.java | 10 +++++----- .../service/accord/AccordMessageSink.java | 16 ++++++++-------- .../service/accord/AccordMessageSinkTest.java | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/accord b/modules/accord index 3d0ff07cd5c7..8830d97ba517 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3d0ff07cd5c7db43390b85afa593e6f76471d886 +Subproject commit 8830d97ba517fb2d0f7f22e8e6b886a98839e694 diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 9d90fba37efc..80afb387d90e 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -266,8 +266,8 @@ public enum Verb // accord ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), - ACCORD_PREACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_PREACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PREACCEPT_RSP ), + ACCORD_PRE_ACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_PRE_ACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PRE_ACCEPT_RSP ), ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), @@ -277,13 +277,13 @@ public enum Verb ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), - ACCORD_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), - ACCORD_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_RECOVER_RSP ), + ACCORD_BEGIN_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), + ACCORD_BEGIN_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_RECOVER_RSP ), ACCORD_BEGIN_INVALIDATE_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), - ACCORD_WAIT_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), - ACCORD_WAIT_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_COMMIT_RSP ), - ACCORD_INFORM_OF_TXNID_REQ (137, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), + ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_ON_COMMIT_RSP ), + ACCORD_INFORM_OF_TXN_REQ (137, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), ACCORD_INFORM_HOME_DURABLE_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), ACCORD_INFORM_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 382ab1e65906..5d097068a654 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -382,10 +382,10 @@ public TxnRequest deserialize(Key key, DataInputPlus in, int version) throws */ public enum Type implements IVersionedSerializer> { - PREACCEPT_REQ (0, MessageType.PREACCEPT_REQ, PreacceptSerializers.request), - ACCEPT_REQ (1, MessageType.ACCEPT_REQ, AcceptSerializers.request ), - COMMIT_REQ (2, MessageType.COMMIT_REQ, CommitSerializers.request ), - APPLY_REQ (3, MessageType.APPLY_REQ, ApplySerializers.request ); + PREACCEPT_REQ (0, MessageType.PRE_ACCEPT_REQ, PreacceptSerializers.request), + ACCEPT_REQ (1, MessageType.ACCEPT_REQ, AcceptSerializers.request ), + COMMIT_REQ (2, MessageType.COMMIT_REQ, CommitSerializers.request ), + APPLY_REQ (3, MessageType.APPLY_REQ, ApplySerializers.request ); final int id; final MessageType msgType; @@ -460,7 +460,7 @@ static Type ofMessage(TxnRequest request) static boolean mustMakeDurable(TxnRequest message) { - return msgTypeToTypeMap.containsKey(message.type()); + return message.type().hasSideEffects; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index b95fb8b13860..034504d7e7fa 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -52,24 +52,24 @@ private static class VerbMapping private VerbMapping() { - mapping.put(MessageType.PREACCEPT_REQ, Verb.ACCORD_PREACCEPT_REQ); - mapping.put(MessageType.PREACCEPT_RSP, Verb.ACCORD_PREACCEPT_RSP); + mapping.put(MessageType.PRE_ACCEPT_REQ, Verb.ACCORD_PRE_ACCEPT_REQ); + mapping.put(MessageType.PRE_ACCEPT_RSP, Verb.ACCORD_PRE_ACCEPT_RSP); mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); mapping.put(MessageType.COMMIT_REQ, Verb.ACCORD_COMMIT_REQ); - mapping.put(MessageType.COMMIT_INVALIDATE, Verb.ACCORD_COMMIT_INVALIDATE_REQ); + mapping.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); mapping.put(MessageType.APPLY_REQ, Verb.ACCORD_APPLY_REQ); mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); - mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_RECOVER_REQ); - mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_RECOVER_RSP); + mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); + mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); mapping.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); mapping.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); - mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_COMMIT_REQ); - mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_COMMIT_RSP); - mapping.put(MessageType.INFORM_TXNID_REQ, Verb.ACCORD_INFORM_OF_TXNID_REQ); + mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); + mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); + mapping.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); mapping.put(MessageType.INFORM_HOME_DURABLE_REQ,Verb.ACCORD_INFORM_HOME_DURABLE_REQ); mapping.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); mapping.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 40890d36732b..bae5fb144b71 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -50,7 +50,7 @@ public void informOfTxn() // There was an issue where the reply was the wrong verb // see CASSANDRA-18375 InformOfTxnId info = Mockito.mock(InformOfTxnId.class); - Message req = Message.builder(Verb.ACCORD_INFORM_OF_TXNID_REQ, info).build(); + Message req = Message.builder(Verb.ACCORD_INFORM_OF_TXN_REQ, info).build(); SimpleReply reply = SimpleReply.Ok; MessageDelivery messaging = Mockito.mock(MessageDelivery.class); From 145c467c694c87136792afa1914ff9eb8bc958c0 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Fri, 2 Jun 2023 13:29:08 +0100 Subject: [PATCH 060/340] CEP-15: Convert AccordStateCache cache from write-through to write-back patch by Aleksey Yeschenko; reviewed by Blake Eggleston for CASSANDRA-18563 --- .../service/accord/AccordCachingState.java | 640 ++++++++++++++++++ .../service/accord/AccordCommandStore.java | 77 ++- .../service/accord/AccordKeyspace.java | 41 +- .../service/accord/AccordLoadingState.java | 192 ------ .../service/accord/AccordSafeCommand.java | 17 +- .../accord/AccordSafeCommandStore.java | 8 +- .../accord/AccordSafeCommandsForKey.java | 10 +- .../service/accord/AccordSafeState.java | 20 +- .../service/accord/AccordStateCache.java | 629 +++++++---------- .../service/accord/async/AsyncLoader.java | 99 +-- .../service/accord/async/AsyncOperation.java | 128 ++-- .../service/accord/async/AsyncWriter.java | 183 ----- .../cassandra/concurrent/ManualExecutor.java | 177 +++++ .../accord/AccordCachingStateTest.java | 180 +++++ .../accord/AccordLoadingStateTest.java | 178 ----- .../service/accord/AccordStateCacheTest.java | 229 +++---- .../service/accord/AccordTestUtils.java | 56 +- .../service/accord/async/AsyncLoaderTest.java | 98 ++- .../accord/async/AsyncOperationTest.java | 184 +---- 19 files changed, 1651 insertions(+), 1495 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCachingState.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordLoadingState.java delete mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java create mode 100644 test/unit/org/apache/cassandra/concurrent/ManualExecutor.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java new file mode 100644 index 000000000000..994e551f79d6 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -0,0 +1,640 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.ToLongFunction; + +import com.google.common.primitives.Ints; + +import accord.local.Command.TransientListener; +import accord.utils.DeterministicIdentitySet; +import accord.utils.IntrusiveLinkedListNode; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncResults.RunnableResult; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.utils.ObjectSizes; + +import static java.lang.String.format; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.UNINITIALIZED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADING; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_LOAD; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.MODIFIED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.SAVING; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_SAVE; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; + +/** + * Global (per CommandStore) state of a cached entity (Command or CommandsForKey). + */ +public class AccordCachingState extends IntrusiveLinkedListNode +{ + static final long EMPTY_SIZE = ObjectSizes.measure(new AccordCachingState<>(null, null)); + + private final K key; + private State state; + + int references = 0; + int lastQueriedEstimatedSizeOnHeap = 0; + private boolean shouldUpdateSize; + + /** + * Transient listeners aren't meant to survive process restart, but must survive cache eviction. + */ + private Set transientListeners; + + public AccordCachingState(K key) + { + this.key = key; + //noinspection unchecked + this.state = (State) Uninitialized.instance; + } + + AccordCachingState(K key, State state) + { + this.key = key; + this.state = state; + } + + void unlink() + { + remove(); + } + + boolean isLinked() + { + return !isFree(); + } + + public K key() + { + return key; + } + + public int referenceCount() + { + return references; + } + + boolean isLoaded() + { + return status().isLoaded(); + } + + public boolean isComplete() + { + return status().isComplete(); + } + + int estimatedSizeOnHeap(ToLongFunction estimator) + { + shouldUpdateSize = false; + return lastQueriedEstimatedSizeOnHeap = Ints.checkedCast(EMPTY_SIZE + estimateStateOnHeapSize(estimator)); + } + + long estimatedSizeOnHeapDelta(ToLongFunction estimator) + { + long prevSize = lastQueriedEstimatedSizeOnHeap; + return estimatedSizeOnHeap(estimator) - prevSize; + } + + boolean shouldUpdateSize() + { + return shouldUpdateSize; + } + + @Override + public String toString() + { + return "Node{" + state.status() + + ", key=" + key() + + ", references=" + references + + "}@" + Integer.toHexString(System.identityHashCode(this)); + } + + public Status status() + { + return complete().status(); + } + + public void addListener(TransientListener listener) + { + if (transientListeners == null) + transientListeners = new DeterministicIdentitySet<>(); + transientListeners.add(listener); + } + + public boolean removeListener(TransientListener listener) + { + return transientListeners != null && transientListeners.remove(listener); + } + + public void listeners(Set listeners) + { + transientListeners = listeners; + } + + public Set listeners() + { + return transientListeners == null ? Collections.emptySet() : transientListeners; + } + + public boolean hasListeners() + { + return !listeners().isEmpty(); + } + + State complete() + { + return state.isCompleteable() ? state(state.complete()) : state; + } + + /** + * Submits a load runnable to the specified executor. When the runnable + * has completed, the state load will have either completed or failed. + */ + public AsyncChain load(ExecutorPlus executor, Function loadFunction) + { + Loading loading = state.load(key, loadFunction); + executor.submit(loading); + state(loading); + return loading; + } + + private State state(State next) + { + State prev = state; + if (prev != next) + shouldUpdateSize = true; + return state = next; + } + + public AsyncChain loading() + { + // do *not* attempt to complete, to prevent races where the caller found a pending load, attempts + // to register a callback, but gets an exception because the load completed in the meantime + return state.loading(); + } + + public V get() + { + return complete().get(); + } + + public void set(V value) + { + shouldUpdateSize = true; + state(complete().set(value)); + } + + /** + * Submits a save runnable to the specified executor. When the runnable + * has completed, the state save will have either completed or failed. + */ + void save(ExecutorPlus executor, BiFunction saveFunction) + { + @SuppressWarnings("unchecked") + State savingOrLoaded = state.save((BiFunction) saveFunction); + if (savingOrLoaded.status() == SAVING) + executor.submit(savingOrLoaded.saving()); + state(savingOrLoaded); + } + + public AsyncChain saving() + { + // do *not* attempt to complete, to prevent races where the caller found a pending save, attempts + // to register a callback, but gets an exception because the save completed in the meantime + return state.saving(); + } + + public AccordCachingState reset() + { + state(state.reset()); + return this; + } + + public Throwable failure() + { + return complete().failure(); + } + + public void markEvicted() + { + state(complete().evict()); + lastQueriedEstimatedSizeOnHeap = 0; + shouldUpdateSize = false; + } + + long estimateStateOnHeapSize(ToLongFunction estimateFunction) + { + return state.estimateOnHeapSize(estimateFunction); + } + + public enum Status + { + UNINITIALIZED, + LOADING, + LOADED, + FAILED_TO_LOAD, + MODIFIED, + SAVING, + + /** + * Attempted to save but failed. Shouldn't normally happen unless we have a bug in serialization, + * or commit log has been stopped. + */ + FAILED_TO_SAVE, + + /** + * Entry has been successfully evicted, but there were transient listeners present, so we kept the + * Node around (transient listeners must survive cache eviction). + */ + EVICTED, + ; + + boolean isLoaded() + { + return this == LOADED || this == MODIFIED || this == FAILED_TO_SAVE; + } + + boolean isComplete() + { + return !(this == LOADING || this == SAVING); + } + } + + interface State + { + Status status(); + + default boolean isCompleteable() + { + return false; + } + + default State complete() + { + throw illegalState(this, "complete()"); + } + + default Loading load(K key, Function loadFunction) + { + throw illegalState(this, "load(key, loadFunction)"); + } + + default RunnableResult loading() + { + throw illegalState(this, "loading()"); + } + + default V get() + { + throw illegalState(this, "get()"); + } + + default State set(V value) + { + throw illegalState(this, "set(value)"); + } + + default State save(BiFunction saveFunction) + { + throw illegalState(this, "save(saveFunction)"); + } + + default RunnableResult saving() + { + throw illegalState(this, "saving()"); + } + + default Throwable failure() + { + throw illegalState(this, "failure()"); + } + + default Uninitialized reset() + { + throw illegalState(this, "reset()"); + } + + default Evicted evict() + { + throw illegalState(this, "evict()"); + } + + default long estimateOnHeapSize(ToLongFunction estimateFunction) + { + return 0; + } + } + + private static IllegalStateException illegalState(State state, String method) + { + return new IllegalStateException(format("%s invoked on %s", method, state.status())); + } + + static class Uninitialized implements State + { + static final Uninitialized instance = new Uninitialized<>(); + + @SuppressWarnings("unchecked") + static Uninitialized instance() + { + return (Uninitialized) instance; + } + + @Override + public Status status() + { + return UNINITIALIZED; + } + + @Override + public Loading load(K key, Function loadFunction) + { + return new Loading<>(() -> loadFunction.apply(key)); + } + + @Override + public Evicted evict() + { + return Evicted.instance(); + } + } + + static class Loading extends RunnableResult implements State + { + Loading(Callable callable) + { + super(callable); + } + + @Override + public Status status() + { + return LOADING; + } + + @Override + public boolean isCompleteable() + { + return isDone(); + } + + @Override + public State complete() + { + if (!isDone()) return this; + else if (isSuccess()) return new Loaded<>(result()); + else return new FailedToLoad<>(failure()); + } + + @Override + public RunnableResult loading() + { + return this; + } + } + + static class Loaded implements State + { + final V original; + + Loaded(V original) + { + this.original = original; + } + + @Override + public Status status() + { + return LOADED; + } + + @Override + public V get() + { + return original; + } + + @Override + public State set(V value) + { + return value == original ? this : new Modified<>(original, value); + } + + @Override + public Evicted evict() + { + return Evicted.instance(); + } + + @Override + public long estimateOnHeapSize(ToLongFunction estimateFunction) + { + return null == original ? 0 : estimateFunction.applyAsLong(original); + } + } + + static class FailedToLoad implements State + { + final Throwable cause; + + FailedToLoad(Throwable cause) + { + this.cause = cause; + } + + @Override + public Status status() + { + return FAILED_TO_LOAD; + } + + @Override + public Throwable failure() + { + return cause; + } + + @Override + public Uninitialized reset() + { + return Uninitialized.instance(); + } + + @Override + public Evicted evict() + { + return Evicted.instance(); + } + } + + static class Modified implements State + { + final V original; + V current; + + Modified(V original, V current) + { + this.original = original; + this.current = current; + } + + @Override + public Status status() + { + return MODIFIED; + } + + @Override + public V get() + { + return current; + } + + @Override + public State set(V value) + { + if (value == original) // change reverted + return new Loaded<>(original); + + current = value; + return this; + } + + @Override + public State save(BiFunction saveFunction) + { + Runnable runnable = saveFunction.apply(original, current); + if (null == runnable) // null mutation -> null Runnable -> no change on disk + return new Loaded<>(current); + else + return new Saving<>(current, runnable); + } + + @Override + public long estimateOnHeapSize(ToLongFunction estimateFunction) + { + return (null == original ? 0 : estimateFunction.applyAsLong(original)) + + (null == current ? 0 : estimateFunction.applyAsLong(current)); + } + } + + static class Saving extends RunnableResult implements State + { + V current; + + Saving(V current, Runnable saveRunnable) + { + super(() -> { saveRunnable.run(); return null; }); + this.current = current; + } + + @Override + public Status status() + { + return SAVING; + } + + @Override + public boolean isCompleteable() + { + return isDone(); + } + + @Override + public State complete() + { + if (!isDone()) return this; + else if (isSuccess()) return new Loaded<>(current); + else return new FailedToSave<>(current, failure()); + } + + @Override + public RunnableResult saving() + { + return this; + } + } + + static class FailedToSave implements State + { + V current; + final Throwable cause; + + FailedToSave(V current, Throwable cause) + { + this.current = current; + this.cause = cause; + } + + @Override + public Status status() + { + return FAILED_TO_SAVE; + } + + @Override + public V get() + { + return current; + } + + @Override + public State set(V value) + { + current = value; + return this; + } + + @Override + public Throwable failure() + { + return cause; + } + } + + static class Evicted implements State + { + static final Evicted instance = new Evicted<>(); + + @SuppressWarnings("unchecked") + static Evicted instance() + { + return (Evicted) instance; + } + + @Override + public Status status() + { + return EVICTED; + } + + @Override + public Uninitialized reset() + { + return Uninitialized.instance(); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index d2f627fb9b09..84869c179d11 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -30,6 +30,8 @@ import java.util.function.Consumer; import java.util.function.Function; +import javax.annotation.Nullable; + import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; @@ -66,7 +68,11 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.Observable; +import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.AsyncPromise; @@ -111,14 +117,39 @@ public AccordCommandStore(int id, DataStore dataStore, ProgressLog.Factory progressLogFactory, RangesForEpochHolder rangesForEpoch) + { + this(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, Stage.READ.executor(), Stage.MUTATION.executor()); + } + + @VisibleForTesting + public AccordCommandStore(int id, + NodeTimeService time, + Agent agent, + DataStore dataStore, + ProgressLog.Factory progressLogFactory, + RangesForEpochHolder rangesForEpoch, + ExecutorPlus loadExecutor, + ExecutorPlus saveExecutor) { super(id, time, agent, dataStore, progressLogFactory, rangesForEpoch); - this.loggingId = String.format("[%s]", id); - this.executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); - this.threadId = getThreadId(this.executor); - this.stateCache = new AccordStateCache(8<<20); - this.commandCache = stateCache.instance(TxnId.class, accord.local.Command.class, AccordSafeCommand::new, AccordObjectSizes::command); - this.commandsForKeyCache = stateCache.instance(RoutableKey.class, CommandsForKey.class, AccordSafeCommandsForKey::new, AccordObjectSizes::commandsForKey); + loggingId = String.format("[%s]", id); + executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); + threadId = getThreadId(this.executor); + stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20); + commandCache = + stateCache.instance(TxnId.class, + TxnId.class, + AccordSafeCommand::new, + this::loadCommand, + this::saveCommand, + AccordObjectSizes::command); + commandsForKeyCache = + stateCache.instance(RoutableKey.class, + PartitionKey.class, + AccordSafeCommandsForKey::new, + this::loadCommandsForKey, + this::saveCommandsForKey, + AccordObjectSizes::commandsForKey); executor.execute(() -> CommandStore.register(this)); executor.execute(this::loadRangesToCommands); } @@ -139,7 +170,7 @@ public void onNext(UntypedResultSet.Row row) throws Exception PartialTxn txn = AccordKeyspace.deserializeTxn(row); Seekables keys = txn.keys(); if (keys.domain() != Routable.Domain.Range) - throw new AssertionError(String.format("Txn keys are not range", txn)); + throw new AssertionError(String.format("Txn keys are not range for %s", txn)); Ranges ranges = (Ranges) keys; PartialDeps deps = AccordKeyspace.deserializeDependencies(row); @@ -219,6 +250,30 @@ public AccordStateCache.Instance operation) @@ -319,9 +374,7 @@ public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, return current; } - public void completeOperation(AccordSafeCommandStore store, - Map commands, - Map commandsForKeys) + public void completeOperation(AccordSafeCommandStore store) { Invariants.checkState(current == store); current.complete(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index d02c48badcf3..e67d9952ada3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -528,11 +528,14 @@ private static int estimateMapChanges(Function> get, C or } public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordSafeCommand liveCommand, long timestampMicros) + { + return getCommandMutation(commandStore.id(), liveCommand.original(), liveCommand.current(), timestampMicros); + } + + public static Mutation getCommandMutation(int storeId, Command original, Command command, long timestampMicros) { try { - Command command = liveCommand.current(); - Command original = liveCommand.original(); Invariants.checkArgument(original != command); Row.Builder builder = BTreeRow.unsortedBuilder(); @@ -569,12 +572,13 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor } } - ByteBuffer key = CommandsColumns.keyComparator.make(commandStore.id(), - command.txnId().domain().ordinal(), - serializeTimestamp(command.txnId())).serializeAsPartitionKey(); Row row = builder.build(); if (row.isEmpty()) return null; + + ByteBuffer key = CommandsColumns.keyComparator.make(storeId, + command.txnId().domain().ordinal(), + serializeTimestamp(command.txnId())).serializeAsPartitionKey(); PartitionUpdate update = PartitionUpdate.singleRowUpdate(Commands, key, row); return new Mutation(update); } @@ -1014,26 +1018,29 @@ private static void addSeriesMutations(CommandsForKey original, addSeriesMutations(kind.getValues(original), kind.getValues(cfk), kind, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); } - private static DecoratedKey makeKey(CommandStore commandStore, PartitionKey key) + private static DecoratedKey makeKey(int storeId, PartitionKey key) { Token token = key.token(); - ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(commandStore.id(), - serializeToken(token), - serializeKey(key)).serializeAsPartitionKey(); + ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(storeId, + serializeToken(token), + serializeKey(key)).serializeAsPartitionKey(); return CommandsForKeys.partitioner.decorateKey(pk); } - private static DecoratedKey makeKey(CommandStore commandStore, CommandsForKey cfk) + private static DecoratedKey makeKey(int storeId, CommandsForKey cfk) { - return makeKey(commandStore, (PartitionKey) cfk.key()); + return makeKey(storeId, (PartitionKey) cfk.key()); } public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore, AccordSafeCommandsForKey liveCfk, long timestampMicros) + { + return getCommandsForKeyMutation(commandStore.id(), liveCfk.original(), liveCfk.current(), timestampMicros); + } + + public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey original, CommandsForKey cfk, long timestampMicros) { try { - CommandsForKey cfk = liveCfk.current(); - CommandsForKey original = liveCfk.original(); Invariants.checkArgument(original != cfk); // TODO: convert to byte arrays ValueAccessor accessor = ByteBufferAccessor.instance; @@ -1046,7 +1053,7 @@ public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore + estimateMapChanges(c -> c.byExecuteAt().commands, original, cfk); PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(CommandsForKeys, - makeKey(commandStore, cfk), + makeKey(storeId, cfk), CommandsForKeyColumns.columnsFor(original, cfk), expectedRows); @@ -1095,13 +1102,13 @@ private static ByteBuffer clusteringValue(Clustering clustering, int idx) return clustering.accessor().toBuffer(clustering.get(idx)); } - public static SinglePartitionReadCommand getCommandsForKeyRead(CommandStore commandStore, PartitionKey key, long nowInSeconds) + public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, PartitionKey key, long nowInSeconds) { return SinglePartitionReadCommand.create(CommandsForKeys, nowInSeconds, CommandsForKeyColumns.allColumns, RowFilter.none(), DataLimits.NONE, - makeKey(commandStore, key), + makeKey(storeId, key), FULL_PARTITION); } @@ -1111,7 +1118,7 @@ public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - SinglePartitionReadCommand command = getCommandsForKeyRead(commandStore, key, nowInSeconds); + SinglePartitionReadCommand command = getCommandsForKeyRead(commandStore.id(), key, nowInSeconds); EnumMap> seriesMaps = new EnumMap<>(SeriesKind.class); for (SeriesKind kind : SeriesKind.values()) diff --git a/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java b/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java deleted file mode 100644 index 8dea7389c0c7..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordLoadingState.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.Collection; -import java.util.Collections; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.function.Function; - -import accord.local.Command; -import accord.utils.DeterministicIdentitySet; -import accord.utils.async.AsyncChain; -import accord.utils.async.AsyncResults; - -/** - * Global state that manages loading states - */ -public class AccordLoadingState -{ - public enum LoadingState { UNINITIALIZED, PENDING, LOADED, FAILED } - private interface NonValueState {} - - private static final NonValueState UNINITIALIZED = new NonValueState() {}; - - private static class PendingLoad extends AsyncResults.RunnableResult implements NonValueState - { - public PendingLoad(Callable callable) - { - super(callable); - } - } - - private static class FailedLoad implements NonValueState - { - private final Throwable cause; - - public FailedLoad(Throwable cause) - { - this.cause = cause; - } - } - - private final K key; - private Object state = UNINITIALIZED; - private Set transientListeners; - - public AccordLoadingState(K key) - { - this.key = key; - } - - private LoadingState maybeCleanupLoad() - { - PendingLoad load = (PendingLoad) state; - if (!load.isDone()) - return LoadingState.PENDING; - - if (load.isSuccess()) - { - state = load.result(); - return LoadingState.LOADED; - } - else - { - state = new FailedLoad(load.failure()); - return LoadingState.FAILED; - } - } - - private static IllegalStateException unexpectedState(LoadingState expected, LoadingState actual) - { - return new IllegalStateException(String.format("Unexpected state. Expected %s, was %s", expected, actual)); - } - - /** - * Returns the current loading state. Since most calls here will be initiated by AsyncChain callbacks on - * load completion/failure, we attempt to complete any pending states so the caller doesn't have to remember - * to. The exception is the listen method, to prevent races where the caller found a pending load, attempts - * to register a callback, but gets an exception because the load completed in the meantime. - */ - private LoadingState state(boolean attemptLoadCompletion) - { - if (!(state instanceof NonValueState)) - return LoadingState.LOADED; - - if (state == UNINITIALIZED) - return LoadingState.UNINITIALIZED; - - if (state instanceof PendingLoad) - return attemptLoadCompletion - ? maybeCleanupLoad() - : LoadingState.PENDING; - - if (state instanceof FailedLoad) - return LoadingState.FAILED; - - throw new IllegalStateException("Unhandled state " + state); - } - - public LoadingState state() - { - return state(true); - } - - private void checkState(LoadingState expected, boolean attemptLoadCompletion) - { - LoadingState actual = state(attemptLoadCompletion); - if (actual != expected) - throw unexpectedState(expected, actual); - } - - public K key() - { - return key; - } - - public V value() - { - checkState(LoadingState.LOADED, true); - return (V) state; - } - - public void value(V value) - { - checkState(LoadingState.LOADED, true); - state = value; - } - - public Throwable failure() - { - checkState(LoadingState.FAILED, true); - return ((FailedLoad) state).cause; - } - - /** - * Return a runnable that will run the loadFunction in a separate thread. When the runnable - * has completed, the state load will have either completed, or failed. - */ - public AsyncResults.RunnableResult load(Function loadFunction) - { - checkState(LoadingState.UNINITIALIZED, true); - PendingLoad pendingLoad = new PendingLoad<>(() -> loadFunction.apply(key)); - state = pendingLoad; - return pendingLoad; - } - - public AsyncChain listen() - { - checkState(LoadingState.PENDING, false); - return (PendingLoad) state; - } - - - public void addListener(Command.TransientListener listener) - { - if (transientListeners == null) - transientListeners = new DeterministicIdentitySet<>(); - transientListeners.add(listener); - } - - public boolean removeListener(Command.TransientListener listener) - { - if (transientListeners == null) - return false; - - return transientListeners.remove(listener); - } - - public Collection transientListeners() - { - if (transientListeners == null) - return Collections.emptySet(); - return transientListeners; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index b44f684e83f9..516cdead1294 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -30,11 +30,11 @@ public class AccordSafeCommand extends SafeCommand implements AccordSafeState { private boolean invalidated; - private final AccordLoadingState global; + private final AccordCachingState global; private Command original; private Command current; - public AccordSafeCommand(AccordLoadingState global) + public AccordSafeCommand(AccordCachingState global) { super(global.key()); this.global = global; @@ -69,7 +69,7 @@ public String toString() } @Override - public AccordLoadingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; @@ -100,7 +100,7 @@ public Command original() public void preExecute() { checkNotInvalidated(); - original = global.value(); + original = global.get(); current = original; } @@ -108,7 +108,7 @@ public void preExecute() public void postExecute() { checkNotInvalidated(); - global.value(current); + global.set(current); } @Override @@ -126,18 +126,21 @@ public boolean invalidated() @Override public void addListener(Command.TransientListener listener) { + checkNotInvalidated(); global.addListener(listener); } @Override public boolean removeListener(Command.TransientListener listener) { - return global().removeListener(listener); + checkNotInvalidated(); + return global.removeListener(listener); } @Override public Collection transientListeners() { - return global.transientListeners(); + checkNotInvalidated(); + return global.listeners(); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 5c6ac654f6cd..cb084201646d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -97,13 +97,17 @@ protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) @Override protected AccordSafeCommand getIfLoaded(TxnId txnId) { - return commandStore.commandCache().referenceAndGetIfLoaded(txnId); + AccordSafeCommand command = commandStore.commandCache().acquireIfLoaded(txnId); + if (command != null) command.preExecute(); + return command; } @Override protected AccordSafeCommandsForKey getIfLoaded(RoutableKey key) { - return commandStore.commandsForKeyCache().referenceAndGetIfLoaded(key); + AccordSafeCommandsForKey cfk = commandStore.commandsForKeyCache().acquireIfLoaded(key); + if (cfk != null) cfk.preExecute(); + return cfk; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index a3b0595f2eb5..0df25ab6efbb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -30,11 +30,11 @@ public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { private boolean invalidated; - private final AccordLoadingState global; + private final AccordCachingState global; private CommandsForKey original; private CommandsForKey current; - public AccordSafeCommandsForKey(AccordLoadingState global) + public AccordSafeCommandsForKey(AccordCachingState global) { super((Key) global.key()); this.global = global; @@ -69,7 +69,7 @@ public String toString() } @Override - public AccordLoadingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; @@ -100,7 +100,7 @@ public CommandsForKey original() public void preExecute() { checkNotInvalidated(); - original = global.value(); + original = global.get(); current = original; } @@ -108,7 +108,7 @@ public void preExecute() public void postExecute() { checkNotInvalidated(); - global.value(current); + global.set(current); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java index 4c8f2ec1f983..b742efb9d103 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java @@ -15,15 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord; -import java.util.function.Function; - import accord.impl.SafeState; import accord.utils.async.AsyncChain; -import accord.utils.async.AsyncResults; -import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; +import org.apache.cassandra.service.accord.AccordCachingState.Status; public interface AccordSafeState extends SafeState { @@ -31,7 +27,7 @@ public interface AccordSafeState extends SafeState V original(); void preExecute(); void postExecute(); - AccordLoadingState global(); + AccordCachingState global(); default boolean hasUpdate() { @@ -48,19 +44,19 @@ default K key() return global().key(); } - default LoadingState loadingState() + default Status globalStatus() { - return global().state(); + return global().status(); } - default AsyncResults.RunnableResult load(Function loadFunction) + default AsyncChain loading() { - return global().load(loadFunction); + return global().loading(); } - default AsyncChain listen() + default AsyncChain saving() { - return global().listen(); + return global().saving(); } default Throwable failure() diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 1cb4fb5a4503..02da5ca74e46 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -15,121 +15,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord; import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; import java.util.Map; -import java.util.Objects; -import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.ToLongFunction; import java.util.stream.Stream; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utils.Invariants; -import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; -import org.apache.cassandra.utils.ObjectSizes; +import accord.utils.IntrusiveLinkedList; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.service.accord.AccordCachingState.Status; -import static org.apache.cassandra.service.accord.AccordLoadingState.LoadingState.FAILED; -import static org.apache.cassandra.service.accord.AccordLoadingState.LoadingState.LOADED; +import static accord.utils.Invariants.checkState; +import static java.lang.String.format; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_LOAD; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADING; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.SAVING; /** * Cache for AccordCommand and AccordCommandsForKey, available memory is shared between the two object types. - * + *

* Supports dynamic object sizes. After each acquire/free cycle, the cacheable objects size is recomputed to * account for data added/removed during txn processing if it's modified flag is set */ -public class AccordStateCache +public class AccordStateCache extends IntrusiveLinkedList> { private static final Logger logger = LoggerFactory.getLogger(AccordStateCache.class); - public static class Node extends AccordLoadingState - { - static final long EMPTY_SIZE = ObjectSizes.measure(new AccordStateCache.Node(null)); - - private Node prev; - private Node next; - private int references = 0; - private long lastQueriedEstimatedSizeOnHeap = 0; - - public Node(K key) - { - super(key); - } - - public int referenceCount() - { - return references; - } - - boolean isLoaded() - { - return state() == LOADED; - } - - public boolean isComplete() - { - switch (state()) - { - case PENDING: - case UNINITIALIZED: - return false; - case FAILED: - case LOADED: - return true; - default: throw new UnsupportedOperationException("Unknown state: " + state()); - } - } - - private boolean isInQueue() - { - return prev != null && next != null; - } - - long estimatedSizeOnHeap(ToLongFunction estimator) - { - long result = EMPTY_SIZE; - V v; - if (isLoaded() && (v = value()) != null) - result += estimator.applyAsLong(v); - lastQueriedEstimatedSizeOnHeap = result; - return result; - } - - long estimatedSizeOnHeapDelta(ToLongFunction estimator) - { - long prevSize = lastQueriedEstimatedSizeOnHeap; - return estimatedSizeOnHeap(estimator) - prevSize; - } - - boolean shouldUpdateSize() - { - return isLoaded() && lastQueriedEstimatedSizeOnHeap == EMPTY_SIZE; - } - - void maybeCleanupLoad() - { - state(); - } - - @Override - public String toString() - { - return "Node{" + state() + - ", key=" + key() + - ", references=" + references + - "}@" + Integer.toHexString(System.identityHashCode(this)); - } - } - static class Stats { private long queries; @@ -137,37 +58,27 @@ static class Stats private long misses; } - private static class NamedMap extends HashMap - { - final String name; + private final Map> cache = new HashMap<>(); + private final HashMap, Instance> instances = new HashMap<>(); - public NamedMap(String name) - { - this.name = name; - } - } - - private final Map> cache = new HashMap<>(); - private final Set> instances = new HashSet<>(); - - private final NamedMap> saveResults = new NamedMap<>("saveResults"); + private final ExecutorPlus loadExecutor, saveExecutor; private int unreferenced = 0; - Node head; - Node tail; private long maxSizeInBytes; private long bytesCached = 0; private final Stats stats = new Stats(); - public AccordStateCache(long maxSizeInBytes) + public AccordStateCache(ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, long maxSizeInBytes) { + this.loadExecutor = loadExecutor; + this.saveExecutor = saveExecutor; this.maxSizeInBytes = maxSizeInBytes; } public void setMaxSize(long size) { maxSizeInBytes = size; - maybeEvict(); + maybeEvictSomeNodes(); } public long getMaxSize() @@ -175,387 +86,372 @@ public long getMaxSize() return maxSizeInBytes; } - @VisibleForTesting - public void clear() - { - head = tail = null; - cache.clear(); - saveResults.clear(); - } - - @VisibleForTesting - public Map> saveResults() - { - return saveResults; - } - - private void unlink(Node node) + private void unlink(AccordCachingState node) { - Node prev = node.prev; - Node next = node.next; - - if (prev == null) - { - Preconditions.checkState(head == node, "previous is null but the head isnt the provided node!"); - head = next; - } - else - { - prev.next = next; - } - - if (next == null) - { - Preconditions.checkState(tail == node, "next is null but the tail isnt the provided node!"); - tail = prev; - } - else - { - next.prev = prev; - } - - node.prev = null; - node.next = null; + node.unlink(); unreferenced--; } - private void push(Node node) + private void link(AccordCachingState node) { - if (head != null) - { - node.prev = null; - node.next = head; - head.prev = node; - head = node; - } - else - { - head = node; - tail = node; - } + addLast(node); unreferenced++; } - private void updateSize(Node node, ToLongFunction estimator) - { - bytesCached += node.estimatedSizeOnHeapDelta(estimator); - } - - // don't evict if there's an outstanding save result. If an item is evicted then reloaded - // before it's mutation is applied, out of date info will be loaded - private boolean canEvict(Node node) + @SuppressWarnings("unchecked") + private void maybeUpdateSize(AccordCachingState node, ToLongFunction estimator) { - Invariants.checkState(node.references == 0); - return node.state() == FAILED || !hasActiveAsyncResult(saveResults, node.key()); + if (node.shouldUpdateSize()) + bytesCached += ((AccordCachingState) node).estimatedSizeOnHeapDelta((ToLongFunction) estimator); } - private void maybeEvict() + /* + * Roughly respects LRU semantics when evicting. Might consider prioritising keeping MODIFIED nodes around + * for longer to maximise the chances of hitting system tables fewer times (or not at all). + */ + private void maybeEvictSomeNodes() { if (bytesCached <= maxSizeInBytes) return; - Node current = tail; - while (current != null && bytesCached > maxSizeInBytes) + Iterator> iter = this.iterator(); + while (iter.hasNext() && bytesCached > maxSizeInBytes) { - Node evict = current; - current = current.prev; - - // TODO (expected, efficiency): can this be reworked so we're not skipping unevictable nodes everytime we try to evict? - if (!canEvict(evict)) - continue; + AccordCachingState node = iter.next(); + checkState(node.references == 0); - evict(evict, true); + /* + * TODO (expected, efficiency): + * can this be reworked so we're not skipping unevictable nodes everytime we try to evict? + */ + Status status = node.status(); // status() call completes (if completeable) + switch (status) + { + default: throw new IllegalStateException("Unhandled status " + status); + case LOADED: + unlink(node); + evict(node); + break; + case MODIFIED: + // schedule a save to disk, keep linked and in the cache map + Instance instance = instanceForNode(node); + node.save(saveExecutor, instance.saveFunction); + maybeUpdateSize(node, instance.heapEstimator); + break; + case SAVING: + // skip over until completes to LOADED or FAILED_TO_SAVE + break; + case FAILED_TO_SAVE: + // TODO (consider): panic when a save fails + // permanently unlink, but keep in the map + unlink(node); + } } } - private void evict(Node evict, boolean unlink) + private boolean isInQueue(AccordCachingState node) { - logger.trace("Evicting {} {} - {}", evict.state(), evict.key(), evict.isLoaded() ? evict.value() : null); - if (unlink) - unlink(evict); - else - Invariants.checkState(!evict.isInQueue()); - - Node self = cache.get(evict.key()); - Invariants.checkState(self == evict, "Leaked node detected; was attempting to remove %s but cache had %s", evict, self); - cache.remove(evict.key()); - bytesCached -= evict.lastQueriedEstimatedSizeOnHeap; + return node.isLinked(); } - private static > F getAsyncResult(NamedMap resultMap, K key) + private void evict(AccordCachingState node) { - F r = resultMap.get(key); - if (r == null) - return null; - - // if the result was a failure, can not remove from the map as this would allow eviction - if (!r.isSuccess()) - return r; - if (logger.isTraceEnabled()) - logger.trace("Clearing result for {} from {}: {}", key, resultMap.name, r); - resultMap.remove(key); - return null; - } + logger.trace("Evicting {} {} - {}", node.status(), node.key(), node.isLoaded() ? node.get() : null); - private static > void setAsyncResult(Map resultsMap, K key, F result) - { - Preconditions.checkState(!resultsMap.containsKey(key)); - resultsMap.put(key, result); - } + checkState(!isInQueue(node)); - private static boolean hasActiveAsyncResult(NamedMap> resultMap, K key) - { - // getResult only returns a result if it is not complete, so don't need to check if its been completed - return getAsyncResult(resultMap, key) != null; - } - - private static void mergeAsyncResult(Map> resultMap, K key, AsyncResult result) - { - AsyncResult existing = resultMap.get(key); - if (existing != null && !existing.isDone()) + bytesCached -= node.lastQueriedEstimatedSizeOnHeap; + if (!node.hasListeners()) { - logger.trace("Merging result {} with existing {}", result, existing); - result = AsyncChains.reduce(existing, result, (a, b) -> null).beginAsResult(); + AccordCachingState self = cache.remove(node.key()); + checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); + } + else + { + node.markEvicted(); // keep the node in the cache to prevent transient listeners from being GCd } - - resultMap.put(key, result); } - @VisibleForTesting - private void maybeCleanupLoad(K key) + private Instance instanceForNode(AccordCachingState node) { - Node node = cache.get(key); - if (node != null) - node.maybeCleanupLoad(); + return instances.get(node.key().getClass()); } - private void maybeClearAsyncResult(K key) + public > Instance instance( + Class keyClass, + Class realKeyClass, + Function, S> safeRefFactory, + Function loadFunction, + BiFunction saveFunction, + ToLongFunction heapEstimator) { - maybeCleanupLoad(key); - // will clear if it's done - getAsyncResult(saveResults, key); + Instance instance = + new Instance<>(keyClass, safeRefFactory, loadFunction, saveFunction, heapEstimator); + + if (instances.put(realKeyClass, instance) != null) + throw new IllegalArgumentException(format("Cache instances for key type %s already exists", realKeyClass.getName())); + + return instance; } public class Instance> { private final Class keyClass; - private final Class valClass; - private final Function, S> safeRefFactory; + private final Function, S> safeRefFactory; + private Function loadFunction; + private BiFunction saveFunction; private final ToLongFunction heapEstimator; private final Stats stats = new Stats(); - public Instance(Class keyClass, Class valClass, Function, S> safeRefFactory, ToLongFunction heapEstimator) + public Instance( + Class keyClass, + Function, S> safeRefFactory, + Function loadFunction, + BiFunction saveFunction, + ToLongFunction heapEstimator) { this.keyClass = keyClass; - this.valClass = valClass; this.safeRefFactory = safeRefFactory; + this.loadFunction = loadFunction; + this.saveFunction = saveFunction; this.heapEstimator = heapEstimator; } - public Stream> stream() + public Stream> stream() { return cache.entrySet().stream() .filter(e -> keyClass.isAssignableFrom(e.getKey().getClass())) - .map(e -> (Node) e.getValue()); + .map(e -> (AccordCachingState) e.getValue()); } - @Override - public boolean equals(Object o) + public S acquire(K key) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Instance instance = (Instance) o; - return keyClass.equals(instance.keyClass) && valClass.equals(instance.valClass); + AccordCachingState node = acquire(key, false); + return safeRefFactory.apply(node); } - @Override - public int hashCode() + public S acquireIfLoaded(K key) { - return Objects.hash(keyClass, valClass); + AccordCachingState node = acquire(key, true); + if (node == null) + return null; + return safeRefFactory.apply(node); } - private Node reference(K key, boolean createIfAbsent) + private AccordCachingState acquire(K key, boolean onlyIfLoaded) { - stats.queries++; - AccordStateCache.this.stats.queries++; + incrementCacheQueries(); + @SuppressWarnings("unchecked") + AccordCachingState node = (AccordCachingState) cache.get(key); + return node == null + ? acquireAbsent(key, onlyIfLoaded) + : acquireExisting(node, onlyIfLoaded); + } - Node node = (Node) cache.get(key); - if (node == null) - { - stats.misses++; - AccordStateCache.this.stats.misses++; - if (!createIfAbsent) - return null; - node = new Node<>(key); - // need to store ref right away, so eviction can not remove - node.references++; - cache.put(key, node); - updateSize(node, heapEstimator); - maybeEvict(); - } + /* + * Can only return a LOADING Node (or null) + */ + private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) + { + incrementCacheMisses(); + if (onlyIfLoaded) + return null; + AccordCachingState node = new AccordCachingState<>(key); + node.load(loadExecutor, loadFunction); + node.references++; + cache.put(key, node); + maybeUpdateSize(node, heapEstimator); + maybeEvictSomeNodes(); + return node; + } + + /* + * Can't return EVICTED or INITIALIZED + */ + private AccordCachingState acquireExisting(AccordCachingState node, boolean onlyIfLoaded) + { + Status status = node.status(); // status() completes + + if (status.isLoaded()) + incrementCacheHits(); else + incrementCacheMisses(); + + if (onlyIfLoaded && !status.isLoaded()) + return null; + + if (node.references == 0) { - if (node.state() == FAILED) - { - if (node.references != 0) - { - //TODO concurrent access to a failed node - // the API does not return Node but instead what node points to, this is a problem in this case as - // releasing 42 would attempt to release the retry and not the failed that is trying to cleanup - throw new UnsupportedOperationException("Attempted to reference failed node " + node); - } - - evict(node, true); - return reference(key, createIfAbsent); - } - stats.hits++; - AccordStateCache.this.stats.hits++; - if (node.references == 0) + if (status == FAILED_TO_LOAD || status == EVICTED) + node.reset().load(loadExecutor, loadFunction); + + if (isInQueue(node)) unlink(node); - else - Invariants.checkState(!node.isInQueue()); - node.references++; } + node.references++; return node; } - public S reference(K key) + public void release(S safeRef) { - Node node = reference(key, true); - return safeRefFactory.apply(node); - } + K key = safeRef.global().key(); + logger.trace("Releasing resources for {}: {}", key, safeRef); - public S referenceAndGetIfLoaded(K key) - { - Node node = reference(key, false); - if (node == null || !node.isLoaded()) - return null; - S safeRef = safeRefFactory.apply(node); - safeRef.preExecute(); - return safeRef; + @SuppressWarnings("unchecked") + AccordCachingState node = (AccordCachingState) cache.get(key); + + checkState(node != null, "node is null for %s", key); + checkState(node.references > 0, "references (%d) are zero for %s (%s)", node.references, key, node); + checkState(safeRef.global() == node); + checkState(!isInQueue(node)); + + if (safeRef.hasUpdate()) + node.set(safeRef.current()); + + maybeUpdateSize(node, heapEstimator); + + if (--node.references == 0) + { + Status status = node.status(); // status() completes + switch (status) + { + default: throw new IllegalStateException("Unhandled status " + status); + case LOADING: + case FAILED_TO_LOAD: + logger.trace("Evicting {} with status {}", key, status); + evict(node); + break; + case LOADED: + case MODIFIED: + case SAVING: + logger.trace("Moving {} with status {} to eviction queue", key, status); + link(node); + break; + case FAILED_TO_SAVE: + break; // can never evict, so no point in adding to eviction queue either + } + } + + // TODO (performance, expected): triggering on every release is potentially heavy + maybeEvictSomeNodes(); } @VisibleForTesting - public Node getUnsafe(K key) + public AccordCachingState getUnsafe(K key) { - return (Node) cache.get(key); + //noinspection unchecked + return (AccordCachingState) cache.get(key); } @VisibleForTesting public boolean isReferenced(K key) { - Node node = (Node) cache.get(key); + //noinspection unchecked + AccordCachingState node = (AccordCachingState) cache.get(key); return node != null && node.references > 0; } @VisibleForTesting public boolean isLoaded(K key) { - Node node = (Node) cache.get(key); + //noinspection unchecked + AccordCachingState node = (AccordCachingState) cache.get(key); return node != null && node.isLoaded(); } - public void release(S safeRef) + @VisibleForTesting + public boolean hasLoadResult(K key) { - K key = safeRef.global().key(); - logger.trace("Releasing resources for {}: {}", key, safeRef); - maybeClearAsyncResult(key); - Node node = (Node) cache.get(key); - Invariants.checkState(node != null, "node is null for %s", key); - Invariants.checkState(node.references > 0, "references (%d) are zero for %s (%s)", node.references, key, node); - - Invariants.checkState(safeRef.global() == node); - if (node.isLoaded() && (safeRef.hasUpdate() || node.shouldUpdateSize())) - { - node.value(safeRef.current()); - updateSize(node, heapEstimator); - } - - if (--node.references == 0) - { - if (node.state() == FAILED) - { - logger.trace("Found failed node {}, evicting", key); - evict(node, false); - } - else - { - logger.trace("Moving {} from active pool to cache", key); - Invariants.checkState(!node.isInQueue()); - push(node); - } - } - - maybeEvict(); + AccordCachingState node = cache.get(key); + return node != null && node.status() == LOADING; } @VisibleForTesting - public boolean canEvict(K key) + public boolean hasSaveResult(K key) { - return AccordStateCache.this.canEvict(cache.get(key)); + AccordCachingState node = cache.get(key); + return node != null && node.status() == SAVING; } @VisibleForTesting - public boolean hasLoadResult(K key) + public void complete(K key) { - Node node = cache.get(key); - return node != null && !node.isLoaded(); + AccordCachingState node = cache.get(key); + if (node != null) + node.complete(); } - public void cleanupLoadResult(K key) + public long cacheQueries() { - maybeCleanupLoad(key); + return stats.queries; } - public AsyncResult getSaveResult(K key) + public long cacheHits() { - return getAsyncResult(saveResults, key); + return stats.hits; } - public void addSaveResult(K key, AsyncResult result) + public long cacheMisses() { - logger.trace("Adding save result for {}: {}", key, result); - mergeAsyncResult(saveResults, key, result); + return stats.misses; } - public void cleanupSaveResult(K key) + private void incrementCacheQueries() { - getSaveResult(key); + stats.queries++; + AccordStateCache.this.stats.queries++; } - @VisibleForTesting - public boolean hasSaveResult(K key) + private void incrementCacheHits() { - return saveResults.get(key) != null; + stats.hits++; + AccordStateCache.this.stats.hits++; } - public long cacheQueries() + private void incrementCacheMisses() { - return stats.queries; + stats.misses++; + AccordStateCache.this.stats.misses++; } - public long cacheHits() + @VisibleForTesting + public void unsafeSetLoadFunction(Function loadFunction) { - return stats.hits; + this.loadFunction = loadFunction; } - public long cacheMisses() + @VisibleForTesting + public void unsafeSetSaveFunction(BiFunction saveFunction) { - return stats.misses; + this.saveFunction = saveFunction; } } - public > Instance instance(Class keyClass, Class valClass, - Function, S> safeRefFactory, - ToLongFunction heapEstimator) + @VisibleForTesting + void unsafeClear() { - Instance instance = new Instance<>(keyClass, valClass, safeRefFactory, heapEstimator); - if (!instances.add(instance)) - throw new IllegalArgumentException(String.format("Cache instances for types %s -> %s already exists", - keyClass.getName(), valClass.getName())); - return instance; + cache.clear(); + //noinspection StatementWithEmptyBody + while (null != poll()); + } + + @VisibleForTesting + AccordCachingState head() + { + Iterator> iter = iterator(); + return iter.hasNext() ? iter.next() : null; + } + + @VisibleForTesting + AccordCachingState tail() + { + AccordCachingState last = null; + Iterator> iter = iterator(); + while (iter.hasNext()) + last = iter.next(); + return last; } @VisibleForTesting @@ -585,20 +481,21 @@ long bytesCached() @VisibleForTesting boolean keyIsReferenced(Object key) { - Node node = cache.get(key); + AccordCachingState node = cache.get(key); return node != null && node.references > 0; } @VisibleForTesting boolean keyIsCached(Object key) { - return cache.containsKey(key); + AccordCachingState node = cache.get(key); + return node != null && node.status() != EVICTED; } @VisibleForTesting int references(Object key) { - Node node = cache.get(key); + AccordCachingState node = cache.get(key); return node != null ? node.references : 0; } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index a8bff385d4df..c5715e745fe0 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord.async; import java.util.ArrayList; @@ -25,7 +24,6 @@ import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; -import java.util.function.Function; import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; @@ -36,8 +34,6 @@ import org.slf4j.LoggerFactory; import accord.api.RoutingKey; -import accord.impl.CommandsForKey; -import accord.local.Command; import accord.local.PreLoadContext; import accord.primitives.Range; import accord.primitives.Ranges; @@ -48,21 +44,20 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; import accord.utils.async.Observable; -import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordLoadingState; import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; public class AsyncLoader { private static final Logger logger = LoggerFactory.getLogger(AsyncLoader.class); + enum State { INITIALIZED, @@ -98,118 +93,72 @@ protected static Iterable txnIds(PreLoadContext context) private > void referenceAndAssembleReads(Iterable keys, Map context, AccordStateCache.Instance cache, - Function loadFunction, - List loadRunnables, List> listenChains) { for (K key : keys) { - S safeRef = cache.reference(key); + S safeRef = cache.acquire(key); context.put(key, safeRef); - AccordLoadingState.LoadingState state = safeRef.loadingState(); - switch (state) + AccordCachingState.Status status = safeRef.globalStatus(); // globalStatus() completes + switch (status) { - case UNINITIALIZED: - AsyncResults.RunnableResult load = safeRef.load(loadFunction); - listenChains.add(load); - loadRunnables.add(load); + default: throw new IllegalStateException("Unhandled global state: " + status); + case LOADING: + listenChains.add(safeRef.loading()); break; - case PENDING: - listenChains.add(safeRef.listen()); + case SAVING: + // make sure we work with a completed state that supports get() and set() + listenChains.add(safeRef.saving()); break; case LOADED: + case MODIFIED: + case FAILED_TO_SAVE: break; - case FAILED: + case FAILED_TO_LOAD: throw new RuntimeException(safeRef.failure()); - default: - throw new IllegalStateException("Unhandled loading state: " + state); } } } - @VisibleForTesting - Function loadCommandFunction() - { - return txnId -> AccordKeyspace.loadCommand(commandStore, txnId); - } - - @VisibleForTesting - Function loadCommandsPerKeyFunction() - { - return key -> AccordKeyspace.loadCommandsForKey(commandStore, (PartitionKey) key); - } - private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) { - List readRunnables = new ArrayList<>(); List> chains = new ArrayList<>(); - referenceAndAssembleReads(txnIds, - context.commands, - commandStore.commandCache(), - loadCommandFunction(), - readRunnables, - chains); + referenceAndAssembleReads(txnIds, context.commands, commandStore.commandCache(), chains); + switch (keysOrRanges.domain()) { case Key: // cast to Keys fails... Iterable keys = (Iterable) keysOrRanges; - referenceAndAssembleReads(keys, - context.commandsForKeys, - commandStore.commandsForKeyCache(), - loadCommandsPerKeyFunction(), - readRunnables, - chains); - break; + referenceAndAssembleReads(keys, context.commandsForKeys, commandStore.commandsForKeyCache(), chains); + break; case Range: chains.add(referenceAndDispatchReadsForRange(context)); - break; + break; default: throw new UnsupportedOperationException("Unable to process keys of " + keysOrRanges.domain()); } - if (chains.isEmpty()) - { - Invariants.checkState(readRunnables.isEmpty()); - return null; - } - - // runnable results are already contained in the chains collection - if (!readRunnables.isEmpty()) - AsyncChains.ofRunnables(Stage.READ.executor(), readRunnables).begin(commandStore.agent()); - return !chains.isEmpty() ? AsyncChains.reduce(chains, (a, b) -> null).beginAsResult() : null; } private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) { AsyncChain> overlappingKeys = findOverlappingKeys((Ranges) keysOrRanges); + return overlappingKeys.flatMap(keys -> { if (keys.isEmpty()) return AsyncChains.success(null); - // TODO (duplicate code): repeat of referenceAndDispatchReads - List readRunnables = new ArrayList<>(); List> chains = new ArrayList<>(); - referenceAndAssembleReads(keys, - context.commandsForKeys, - commandStore.commandsForKeyCache(), - loadCommandsPerKeyFunction(), - readRunnables, - chains); - // all keys are already loaded - if (chains.isEmpty()) - return AsyncChains.success(null); - // runnable results are already contained in the chains collection - if (!readRunnables.isEmpty()) - AsyncChains.ofRunnables(Stage.READ.executor(), readRunnables).begin(commandStore.agent()); - return AsyncChains.reduce(chains, (a, b) -> null); + referenceAndAssembleReads(keys, context.commandsForKeys, commandStore.commandsForKeyCache(), chains); + return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); }, commandStore); } private AsyncChain> findOverlappingKeys(Ranges ranges) { - assert !ranges.isEmpty(); + Invariants.checkArgument(!ranges.isEmpty()); List>> chains = new ArrayList<>(ranges.size()); for (Range range : ranges) diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 04bd716c1b8a..9a1ca51c6f93 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord.async; import java.util.HashMap; @@ -24,7 +23,6 @@ import java.util.function.Consumer; import java.util.function.Function; -import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -43,6 +41,13 @@ import org.apache.cassandra.service.accord.AccordSafeState; import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.INITIALIZED; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.LOADING; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.PREPARING; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.RUNNING; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.COMPLETING; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FINISHED; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FAILED; public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function { @@ -74,36 +79,20 @@ void revertChanges() enum State { - INITIALIZED, - LOADING, - PREPARING_OPERATION, // setup safe store for RUNNING - RUNNING, - SAVING, // submits write to mutation stage - AWAITING_SAVE, // wait for writes to complete - COMPLETING, - FINISHED, - FAILED; + INITIALIZED, LOADING, PREPARING, RUNNING, COMPLETING, FINISHED, FAILED; boolean isComplete() { - switch (this) - { - case FAILED: - case FINISHED: - return true; - default: - return false; - } + return this == FINISHED || this == FAILED; } } - private State state = State.INITIALIZED; + private State state = INITIALIZED; private final AccordCommandStore commandStore; private final PreLoadContext preLoadContext; private final Context context = new Context(); private AccordSafeCommandStore safeStore; private final AsyncLoader loader; - private final AsyncWriter writer; private R result; private final String loggingId; private BiConsumer callback; @@ -126,10 +115,13 @@ public AsyncOperation(AccordCommandStore commandStore, PreLoadContext preLoadCon this.commandStore = commandStore; this.preLoadContext = preLoadContext; this.loader = createAsyncLoader(commandStore, preLoadContext); - setLoggingIds(); - this.writer = createAsyncWriter(commandStore); - logger.trace("Created {} on {}", this, commandStore); - clearLoggingIds(); + + if (logger.isTraceEnabled()) + { + setLoggingIds(); + logger.trace("Created {} on {}", this, commandStore); + clearLoggingIds(); + } } @Override @@ -138,28 +130,11 @@ public String toString() return "AsyncOperation{" + state + "}-" + loggingId; } - AsyncWriter createAsyncWriter(AccordCommandStore commandStore) - { - return new AsyncWriter(commandStore); - } - AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()); } - @VisibleForTesting - State state() - { - return state; - } - - @VisibleForTesting - protected void setState(State state) - { - this.state = state; - } - private void callback(Object o, Throwable throwable) { if (throwable != null) @@ -168,7 +143,14 @@ private void callback(Object o, Throwable throwable) fail(throwable); } else + { run(); + } + } + + private void state(State state) + { + this.state = state; } private void finish(R result, Throwable failure) @@ -180,41 +162,31 @@ private void finish(R result, Throwable failure) } finally { - state = failure == null ? State.FINISHED : State.FAILED; + state(failure == null ? FINISHED : FAILED); } } - private void finish(R result) - { - Invariants.checkArgument(state == State.COMPLETING, "Unexpected state %s", state); - finish(result, null); - } - private void fail(Throwable throwable) { Invariants.nonNull(throwable); if (state.isComplete()) throw new IllegalStateException("Unexpected state " + state, throwable); + try { switch (state) { - case INITIALIZED: case COMPLETING: - // nothing to cleanup, call callback - break; + break; // everything's cleaned up, invoke callback case RUNNING: context.revertChanges(); - case PREPARING_OPERATION: + case PREPARING: commandStore.abortCurrentOperation(); case LOADING: context.releaseResources(commandStore); break; - case SAVING: - case AWAITING_SAVE: - // TODO: revert changs - // TODO: panic? - break; + case INITIALIZED: + break; // nothing to clean up, call callback } } catch (Throwable cleanup) @@ -222,6 +194,7 @@ private void fail(Throwable throwable) commandStore.agent().onUncaughtException(cleanup); throwable.addSuppressed(cleanup); } + finish(null, throwable); } @@ -229,42 +202,27 @@ protected void runInternal() { switch (state) { + default: throw new IllegalStateException("Unexpected state " + state); case INITIALIZED: - state = State.LOADING; + state(LOADING); case LOADING: if (!loader.load(context, this::callback)) return; - - state = State.PREPARING_OPERATION; + state(PREPARING); + case PREPARING: safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.commandsForKeys); - state = State.RUNNING; + state(RUNNING); + case RUNNING: result = apply(safeStore); safeStore.postExecute(context.commands, context.commandsForKeys); - - state = State.SAVING; - case SAVING: - case AWAITING_SAVE: - boolean updatesPersisted = writer.save(context, this::callback); - - if (state == State.SAVING) - { - context.releaseResources(commandStore); - commandStore.completeOperation(safeStore, context.commands, context.commandsForKeys); - // with any updates on the way to disk, release resources so operations waiting - // to use these objects don't have issues with fields marked as unsaved - state = State.AWAITING_SAVE; - } - - if (!updatesPersisted) - return; - - state = State.COMPLETING; - finish(result); + context.releaseResources(commandStore); + commandStore.completeOperation(safeStore); + state(COMPLETING); + case COMPLETING: + finish(result, null); case FINISHED: case FAILED: break; - default: - throw new IllegalStateException("Unexpected state " + state); } } @@ -301,7 +259,7 @@ public void run() @Override public void start(BiConsumer callback) { - Invariants.checkArgument(this.callback == null); + Invariants.checkState(this.callback == null); this.callback = callback; commandStore.executor().execute(this); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java b/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java deleted file mode 100644 index 177d17fa27cc..000000000000 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncWriter.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.async; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.function.BiConsumer; - -import com.google.common.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.impl.CommandsForKey; -import accord.local.Command; -import accord.primitives.RoutableKey; -import accord.primitives.TxnId; -import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; -import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.db.Mutation; -import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordSafeCommand; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; -import org.apache.cassandra.service.accord.AccordSafeState; -import org.apache.cassandra.service.accord.AccordStateCache; - -public class AsyncWriter -{ - private static final Logger logger = LoggerFactory.getLogger(AsyncWriter.class); - - enum State - { - INITIALIZED, - SETUP, - SAVING, - FINISHED - } - - private State state = State.INITIALIZED; - protected AsyncResult writeResult; - private final AccordCommandStore commandStore; - final AccordStateCache.Instance commandCache; - final AccordStateCache.Instance cfkCache; - - public AsyncWriter(AccordCommandStore commandStore) - { - this.commandStore = commandStore; - this.commandCache = commandStore.commandCache(); - this.cfkCache = commandStore.commandsForKeyCache(); - } - - public interface StateMutationFunction> - { - Mutation apply(AccordCommandStore commandStore, V value, long timestamp); - } - - private static > void assembleWrites(Map context, - AccordStateCache.Instance cache, - StateMutationFunction mutationFunction, - long timestamp, - AccordCommandStore commandStore, - List> chains) - { - context.forEach((key, value) -> { - if (!value.hasUpdate()) - return; - Mutation mutation = mutationFunction.apply(commandStore, value, timestamp); - if (mutation == null) - return; - if (logger.isTraceEnabled()) - logger.trace("Dispatching mutation for {}, {} -> {}", key, value.current(), mutation); - AsyncResults.RunnableResult result = AsyncResults.runnableResult(() -> mutation.apply()); - cache.addSaveResult(key, result); - chains.add(result); - }); - } - - protected StateMutationFunction writeCommandFunction() - { - return AccordKeyspace::getCommandMutation; - } - - protected StateMutationFunction writeCommandForKeysFunction() - { - return AccordKeyspace::getCommandsForKeyMutation; - } - - private AsyncResult maybeDispatchWrites(AsyncOperation.Context context) throws IOException - { - if (context.commands.isEmpty() && context.commandsForKeys.isEmpty()) - return null; - - List> writes = new ArrayList<>(context.commands.size() + context.commandsForKeys.size()); - - long timestamp = commandStore.nextSystemTimestampMicros(); - assembleWrites(context.commands, - commandStore.commandCache(), - writeCommandFunction(), - timestamp, - commandStore, - writes); - - assembleWrites(context.commandsForKeys, - commandStore.commandsForKeyCache(), - writeCommandForKeysFunction(), - timestamp, - commandStore, - writes); - - if (writes.isEmpty()) - return null; - - AsyncChains.ofRunnables(Stage.MUTATION.executor(), writes).begin(commandStore.agent()); - - return AsyncChains.reduce(writes, (a, b) -> null).beginAsResult(); - } - - @VisibleForTesting - void setState(State state) - { - this.state = state; - } - - public boolean save(AsyncOperation.Context context, BiConsumer callback) - { - logger.trace("Running save for {} with state {}", callback, state); - commandStore.checkInStoreThread(); - try - { - switch (state) - { - case INITIALIZED: - setState(State.SETUP); - case SETUP: - writeResult = maybeDispatchWrites(context); - - setState(State.SAVING); - case SAVING: - if (writeResult != null && !writeResult.isSuccess()) - { - logger.trace("Adding callback for write result: {}", callback); - writeResult.addCallback(callback, commandStore.executor()); - break; - } - context.commands.keySet().forEach(commandStore.commandCache()::cleanupSaveResult); - context.commandsForKeys.keySet().forEach(commandStore.commandsForKeyCache()::cleanupSaveResult); - setState(State.FINISHED); - case FINISHED: - break; - default: - throw new IllegalStateException("Unexpected state: " + state); - } - } - catch (IOException e) - { - throw new RuntimeException(e); - } - - logger.trace("Exiting save for {} with state {}", callback, state); - return state == State.FINISHED; - } - -} diff --git a/test/unit/org/apache/cassandra/concurrent/ManualExecutor.java b/test/unit/org/apache/cassandra/concurrent/ManualExecutor.java new file mode 100644 index 000000000000..5600e3f3c11f --- /dev/null +++ b/test/unit/org/apache/cassandra/concurrent/ManualExecutor.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.concurrent; + +import accord.utils.Invariants; +import org.apache.cassandra.utils.Closeable; +import org.apache.cassandra.utils.WithResources; +import org.apache.cassandra.utils.concurrent.Future; + +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; + +public class ManualExecutor implements ExecutorPlus +{ + private final Queue tasks = new ArrayDeque<>(); + private int completedCount = 0; + + public void runOne() + { + Task task = tasks.remove(); + task.run(); + completedCount++; + } + + public void runAll() + { + while (!tasks.isEmpty()) + runOne(); + } + + @Override + public Future submit(Callable callable) + { + return submit((WithResources) null, callable); + } + + @Override + public Future submit(WithResources withResources, Callable callable) + { + FutureImpl future = new FutureImpl<>(); + tasks.add(new Task(null, callable, withResources, null, future)); + return future; + } + + @Override + public Future submit(Runnable runnable) + { + return submit(null, runnable, null); + } + + @Override + public Future submit(WithResources withResources, Runnable runnable) + { + return submit(withResources, runnable, null); + } + + @Override + public Future submit(Runnable runnable, T result) + { + return submit(null, runnable, result); + } + + @Override + public Future submit(WithResources withResources, Runnable runnable, T result) + { + FutureImpl future = new FutureImpl<>(); + tasks.add(new Task(runnable, null, withResources, result, future)); + return future; + } + + @Override + public void execute(Runnable runnable) + { + execute(null, runnable); + } + + @Override + public void execute(WithResources withResources, Runnable runnable) + { + tasks.add(new Task(runnable, null, withResources, null, null)); + } + + private static class Task + { + private final Runnable runnable; + private final Callable callable; + private final WithResources withResources; + private final FutureImpl future; + + private Object result; + + Task(Runnable runnable, Callable callable, WithResources withResources, Object result, FutureImpl future) + { + Invariants.checkArgument(runnable != null ^ callable != null); + + this.runnable = runnable; + this.callable = callable; + this.withResources = withResources; + this.result = result; + this.future = future; + } + + void run() + { + try (Closeable ignored = withResources == null ? null : withResources.get()) + { + if (null != runnable) + runnable.run(); + else + result = callable.call(); + + if (null != future) + future.succeed(result); + } + catch (Throwable t) + { + ExecutionFailure.handle(t); + if (null != future) + future.fail(t); + } + } + } + + private static class FutureImpl extends org.apache.cassandra.utils.concurrent.AsyncFuture + { + @SuppressWarnings("unchecked") + void succeed(Object v) + { + trySuccess((V) v); + } + + void fail(Throwable throwable) + { + tryFailure(throwable); + } + } + + @Override + public boolean inExecutor() + { + return true; + } + + @Override public int getActiveTaskCount() { return 0; } + @Override public long getCompletedTaskCount() { return completedCount; } + @Override public int getPendingTaskCount() { return tasks.size(); } + + @Override public int getCorePoolSize() { return 0; } + @Override public int getMaximumPoolSize() { return 0; } + @Override public void setCorePoolSize(int newCorePoolSize) { throw new IllegalArgumentException("Cannot resize ManualExecutor"); } + @Override public void setMaximumPoolSize(int newMaximumPoolSize) { throw new IllegalArgumentException("Cannot resize ManualExecutor"); } + + @Override public void shutdown() {} + @Override public List shutdownNow() { return Collections.emptyList(); } + @Override public boolean isShutdown() { return false; } + @Override public boolean isTerminated() { return false; } + @Override public boolean awaitTermination(long timeout, TimeUnit unit) { return true; } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java new file mode 100644 index 000000000000..b270992cd099 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.util.function.BiConsumer; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.concurrent.ManualExecutor; +import org.apache.cassandra.service.accord.AccordCachingState.Status; + +public class AccordCachingStateTest +{ + static class CachingState extends AccordCachingState + { + public CachingState(String key) + { + super(key); + } + } + + static class InspectableCallback implements BiConsumer + { + boolean called; + V result; + Throwable failure; + + @Override + public void accept(V result, Throwable failure) + { + Assert.assertFalse(called); + called = true; + this.result = result; + this.failure = failure; + } + } + + private static void assertIllegalState(Runnable runnable) + { + try + { + runnable.run(); + Assert.fail("Expected IllegalStateException"); + } + catch (IllegalStateException ise) + { + // expected + } + } + + @Test + public void loadSuccessTest() + { + ManualExecutor executor = new ManualExecutor(); + CachingState state = new CachingState("K"); + + Assert.assertEquals(Status.UNINITIALIZED, state.status()); + assertIllegalState(state::get); + assertIllegalState(() -> state.set("VVVV")); + assertIllegalState(state::loading); + + state.load(executor, k -> { + Assert.assertEquals("K", k); + return "V"; + }); + Assert.assertEquals(Status.LOADING, state.status()); + + executor.runOne(); + Assert.assertEquals(Status.LOADED, state.status()); + Assert.assertEquals("V", state.get()); + + assertIllegalState(() -> state.load(executor, k -> "CCC")); + assertIllegalState(state::loading); + } + + @Test + public void loadNullTest() + { + ManualExecutor executor = new ManualExecutor(); + CachingState state = new CachingState("K"); + Assert.assertEquals(Status.UNINITIALIZED, state.status()); + + assertIllegalState(state::get); + assertIllegalState(() -> state.set("VVVV")); + assertIllegalState(state::loading); + + state.load(executor, k -> { + Assert.assertEquals("K", k); + return null; + }); + Assert.assertEquals(Status.LOADING, state.status()); + + executor.runOne(); + Assert.assertEquals(Status.LOADED, state.status()); + Assert.assertNull(state.get()); + + assertIllegalState(() -> state.load(executor, k -> "CCC")); + assertIllegalState(state::loading); + } + + @Test + public void additionalCallbackTest() + { + ManualExecutor executor = new ManualExecutor(); + CachingState state = new CachingState("K"); + Assert.assertEquals(Status.UNINITIALIZED, state.status()); + + assertIllegalState(state::get); + assertIllegalState(() -> state.set("VVVV")); + assertIllegalState(state::loading); + + state.load(executor, k -> { + Assert.assertEquals("K", k); + return "V"; + }); + Assert.assertEquals(Status.LOADING, state.status()); + + // register other callbacks + InspectableCallback callback1 = new InspectableCallback<>(); + InspectableCallback callback2 = new InspectableCallback<>(); + + Assert.assertEquals(Status.LOADING, state.status()); + state.loading().addCallback(callback1); + executor.runOne(); + state.loading().addCallback(callback2); + + Assert.assertTrue(callback1.called); + Assert.assertNull(callback1.failure); + + Assert.assertTrue(callback2.called); + Assert.assertNull(callback2.failure); + + Assert.assertEquals(Status.LOADED, state.status()); + Assert.assertEquals("V", state.get()); + + assertIllegalState(() -> state.load(executor, k -> "CCC")); + assertIllegalState(state::loading); + } + + @Test + public void loadFailureTest() + { + ManualExecutor executor = new ManualExecutor(); + CachingState state = new CachingState("K"); + + Assert.assertEquals(Status.UNINITIALIZED, state.status()); + assertIllegalState(state::get); + assertIllegalState(() -> state.set("VVVV")); + assertIllegalState(state::loading); + + state.load(executor, k -> { + throw new RuntimeException(); + }); + Assert.assertEquals(Status.LOADING, state.status()); + + executor.runOne(); + Assert.assertEquals(Status.FAILED_TO_LOAD, state.status()); + assertIllegalState(state::get); + Assert.assertTrue(state.failure() instanceof RuntimeException); + + assertIllegalState(() -> state.load(executor, k -> "CCC")); + assertIllegalState(state::loading); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java b/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java deleted file mode 100644 index 94440226a94e..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/AccordLoadingStateTest.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.function.BiConsumer; - -import org.junit.Assert; -import org.junit.Test; - -import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; - -public class AccordLoadingStateTest -{ - static class LoadableState extends AccordLoadingState - { - public LoadableState(String key) - { - super(key); - } - } - - static class InspectableCallback implements BiConsumer - { - boolean called; - V result; - Throwable failure; - - @Override - public void accept(V result, Throwable failure) - { - Assert.assertFalse(called); - called = true; - this.result = result; - this.failure = failure; - } - } - - private static void assertIllegalState(Runnable runnable) - { - try - { - runnable.run(); - Assert.fail("Expected IllegalStateException"); - } - catch (IllegalStateException ise) - { - // expected - } - } - - - @Test - public void loadSuccessTest() - { - LoadableState state = new LoadableState("K"); - - Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); - assertIllegalState(() -> state.value()); - assertIllegalState(() -> state.value("VVVV")); - assertIllegalState(() -> state.listen()); - - Runnable runnable = state.load(k -> { - Assert.assertEquals("K", k); - return "V"; - }); - Assert.assertEquals(LoadingState.PENDING, state.state()); - - runnable.run(); - Assert.assertEquals(LoadingState.LOADED, state.state()); - Assert.assertEquals("V", state.value()); - - assertIllegalState(() -> state.load(k -> "CCC")); - assertIllegalState(() -> state.listen()); - } - - @Test - public void loadNullTest() - { - LoadableState state = new LoadableState("K"); - Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); - - assertIllegalState(() -> state.value()); - assertIllegalState(() -> state.value("VVVV")); - assertIllegalState(() -> state.listen()); - Runnable runnable = state.load(k -> { - Assert.assertEquals("K", k); - return null; - }); - - Assert.assertEquals(LoadingState.PENDING, state.state()); - - runnable.run(); - Assert.assertEquals(LoadingState.LOADED, state.state()); - Assert.assertEquals(null, state.value()); - - assertIllegalState(() -> state.load(k -> "CCC")); - assertIllegalState(() -> state.listen()); - } - - @Test - public void additionalCallbackTest() - { - LoadableState state = new LoadableState("K"); - Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); - - assertIllegalState(() -> state.value()); - assertIllegalState(() -> state.value("VVVV")); - assertIllegalState(() -> state.listen()); - Runnable runnable = state.load(k -> { - Assert.assertEquals("K", k); - return "V"; - }); - - Assert.assertEquals(LoadingState.PENDING, state.state()); - - // register other callbacks - InspectableCallback callback1 = new InspectableCallback<>(); - InspectableCallback callback2 = new InspectableCallback<>(); - - - Assert.assertEquals(LoadingState.PENDING, state.state()); - state.listen().addCallback(callback1); - runnable.run(); - state.listen().addCallback(callback2); - - Assert.assertTrue(callback1.called); - Assert.assertNull(callback1.failure); - - Assert.assertTrue(callback2.called); - Assert.assertNull(callback2.failure); - - Assert.assertEquals(LoadingState.LOADED, state.state()); - Assert.assertEquals("V", state.value()); - - assertIllegalState(() -> state.load(k -> "CCC")); - assertIllegalState(() -> state.listen()); - } - - @Test - public void loadFailureTest() - { - LoadableState state = new LoadableState("K"); - - Assert.assertEquals(LoadingState.UNINITIALIZED, state.state()); - assertIllegalState(() -> state.value()); - assertIllegalState(() -> state.value("VVVV")); - assertIllegalState(() -> state.listen()); - - Runnable runnable = state.load(k -> { - throw new RuntimeException(); - }); - Assert.assertEquals(LoadingState.PENDING, state.state()); - - runnable.run(); - Assert.assertEquals(LoadingState.FAILED, state.state()); - assertIllegalState(() -> state.value()); - Assert.assertTrue(state.failure() instanceof RuntimeException); - - assertIllegalState(() -> state.load(k -> "CCC")); - assertIllegalState(() -> state.listen()); - } -} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 375f2c7e8c68..193facf13729 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -15,18 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord; -import java.util.function.Function; - import org.junit.Assert; import org.junit.Test; import accord.utils.async.AsyncChain; -import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; -import org.apache.cassandra.service.accord.AccordLoadingState.LoadingState; +import org.apache.cassandra.concurrent.ManualExecutor; +import org.apache.cassandra.service.accord.AccordCachingState.Status; import static org.apache.cassandra.service.accord.AccordTestUtils.testLoad; @@ -37,15 +33,15 @@ public class AccordStateCacheTest private static class SafeString implements AccordSafeState { private boolean invalidated = false; - private final AccordLoadingState global; + private final AccordCachingState global; private String original = null; - public SafeString(AccordLoadingState global) + public SafeString(AccordCachingState global) { this.global = global; } - public AccordLoadingState global() + public AccordCachingState global() { return global; } @@ -59,13 +55,13 @@ public String key() @Override public String current() { - return global.value(); + return global.get(); } @Override public void set(String update) { - global.value(update); + global.set(update); } @Override @@ -77,31 +73,30 @@ public String original() @Override public void preExecute() { - original = global.value(); + original = global.get(); } @Override public void postExecute() { - } @Override - public LoadingState loadingState() + public Status globalStatus() { - return global.state(); + return global.status(); } @Override - public AsyncResults.RunnableResult load(Function loadFunction) + public AsyncChain loading() { - return global.load(loadFunction); + return global.loading(); } @Override - public AsyncChain listen() + public AsyncChain saving() { - return global.listen(); + return global.saving(); } @Override @@ -125,7 +120,7 @@ public boolean invalidated() private static long emptyNodeSize() { - return AccordStateCache.Node.EMPTY_SIZE; + return AccordCachingState.EMPTY_SIZE; } private static long nodeSize(long itemSize) @@ -133,9 +128,9 @@ private static long nodeSize(long itemSize) return itemSize + emptyNodeSize(); } - private static void assertCacheState(AccordStateCache cache, int referencd, int total, long bytes) + private static void assertCacheState(AccordStateCache cache, int referenced, int total, long bytes) { - Assert.assertEquals(referencd, cache.numReferencedEntries()); + Assert.assertEquals(referenced, cache.numReferencedEntries()); Assert.assertEquals(total, cache.totalNumEntries()); Assert.assertEquals(bytes, cache.bytesCached()); } @@ -143,151 +138,159 @@ private static void assertCacheState(AccordStateCache cache, int referencd, int @Test public void testAcquisitionAndRelease() { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor,500); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); - - SafeString safeString1 = instance.reference("1"); + SafeString safeString1 = instance.acquire("1"); assertCacheState(cache, 1, 1, emptyNodeSize()); - testLoad(safeString1, "1"); - Assert.assertNull(cache.head); - Assert.assertNull(cache.tail); + testLoad(executor, safeString1, "1"); + Assert.assertTrue(cache.isEmpty()); instance.release(safeString1); assertCacheState(cache, 0, 1, nodeSize(1)); - Assert.assertSame(safeString1.global, cache.tail); - Assert.assertSame(safeString1.global, cache.head); + Assert.assertSame(safeString1.global, cache.head()); + Assert.assertSame(safeString1.global, cache.tail()); - SafeString safeString2 = instance.reference("2"); + SafeString safeString2 = instance.acquire("2"); assertCacheState(cache, 1, 2, DEFAULT_NODE_SIZE + nodeSize(1)); - testLoad(safeString2, "2"); + testLoad(executor, safeString2, "2"); instance.release(safeString2); assertCacheState(cache, 0, 2, nodeSize(1) + nodeSize(1)); - Assert.assertSame(safeString1.global, cache.tail); - Assert.assertSame(safeString2.global, cache.head); + Assert.assertSame(safeString1.global, cache.head()); + Assert.assertSame(safeString2.global, cache.tail()); } @Test public void testRotation() { - AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 5); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[3]; for (int i=0; i<3; i++) { - SafeString safeString = instance.reference(Integer.toString(i)); + SafeString safeString = instance.acquire(Integer.toString(i)); items[i] = safeString; Assert.assertNotNull(safeString); - testLoad(safeString, Integer.toString(i)); + testLoad(executor, safeString, Integer.toString(i)); Assert.assertTrue(instance.isReferenced(safeString.key())); instance.release(safeString); } - Assert.assertSame(items[0].global, cache.tail); - Assert.assertSame(items[2].global, cache.head); + Assert.assertSame(items[0].global, cache.head()); + Assert.assertSame(items[2].global, cache.tail()); assertCacheState(cache, 0, 3, nodeSize(1) * 3); - SafeString safeString = instance.reference("1"); - Assert.assertEquals(LoadingState.LOADED, safeString.loadingState()); + SafeString safeString = instance.acquire("1"); + Assert.assertEquals(Status.LOADED, safeString.globalStatus()); assertCacheState(cache, 1, 3, nodeSize(1) * 3); - // releasing item should return it to the head + // releasing item should return it to the tail instance.release(safeString); assertCacheState(cache, 0, 3, nodeSize(1) * 3); - Assert.assertSame(items[0].global, cache.tail); - Assert.assertSame(items[1].global, cache.head); + Assert.assertSame(items[0].global, cache.head()); + Assert.assertSame(items[1].global, cache.tail()); } @Test public void testEvictionOnAcquire() { - AccordStateCache cache = new AccordStateCache(nodeSize(1) * 5); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; for (int i=0; i<5; i++) { - SafeString safeString = instance.reference(Integer.toString(i)); + SafeString safeString = instance.acquire(Integer.toString(i)); items[i] = safeString; - testLoad(safeString, Integer.toString(i)); + testLoad(executor, safeString, Integer.toString(i)); Assert.assertTrue(instance.isReferenced(safeString.key())); instance.release(safeString); } assertCacheState(cache, 0, 5, nodeSize(1) * 5); - Assert.assertSame(items[0].global, cache.tail); - Assert.assertSame(items[4].global, cache.head); + Assert.assertSame(items[0].global, cache.head()); + Assert.assertSame(items[4].global, cache.tail()); - SafeString safeString = instance.reference("5"); + SafeString safeString = instance.acquire("5"); Assert.assertTrue(instance.isReferenced(safeString.key())); // since it's not loaded, only the node size is counted here assertCacheState(cache, 1, 5, nodeSize(1) * 4 + nodeSize(0)); - Assert.assertSame(items[1].global, cache.tail); - Assert.assertSame(items[4].global, cache.head); + Assert.assertSame(items[1].global, cache.head()); + Assert.assertSame(items[4].global, cache.tail()); Assert.assertFalse(cache.keyIsCached("0")); Assert.assertFalse(cache.keyIsReferenced("0")); - testLoad(safeString, "5"); + testLoad(executor, safeString, "5"); instance.release(safeString); assertCacheState(cache, 0, 5, nodeSize(1) * 5); - Assert.assertSame(items[1].global, cache.tail); - Assert.assertSame(safeString.global, cache.head); + Assert.assertSame(items[1].global, cache.head()); + Assert.assertSame(safeString.global, cache.tail()); } @Test public void testEvictionOnRelease() { - AccordStateCache cache = new AccordStateCache(nodeSize(1) * 4); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; for (int i=0; i<5; i++) { - SafeString safeString = instance.reference(Integer.toString(i)); + SafeString safeString = instance.acquire(Integer.toString(i)); items[i] = safeString; - testLoad(safeString, Integer.toString(i)); + testLoad(executor, safeString, Integer.toString(i)); Assert.assertTrue(instance.isReferenced(safeString.key())); } assertCacheState(cache, 5, 5, nodeSize(0) * 5); - Assert.assertNull(cache.head); - Assert.assertNull(cache.tail); + Assert.assertNull(cache.head()); + Assert.assertNull(cache.tail()); instance.release(items[2]); assertCacheState(cache, 4, 4, nodeSize(0) * 4); - Assert.assertNull(cache.head); - Assert.assertNull(cache.tail); + Assert.assertNull(cache.head()); + Assert.assertNull(cache.tail()); instance.release(items[4]); assertCacheState(cache, 3, 4, nodeSize(0) * 3 + nodeSize(1)); - Assert.assertSame(items[4].global, cache.tail); - Assert.assertSame(items[4].global, cache.head); + Assert.assertSame(items[4].global, cache.head()); + Assert.assertSame(items[4].global, cache.tail()); } @Test public void testMultiAcquireRelease() { - AccordStateCache cache = new AccordStateCache(DEFAULT_NODE_SIZE * 4); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); - SafeString safeString1 = instance.reference("0"); - testLoad(safeString1, "0"); - Assert.assertEquals(LoadingState.LOADED, safeString1.loadingState()); + SafeString safeString1 = instance.acquire("0"); + testLoad(executor, safeString1, "0"); + Assert.assertEquals(Status.LOADED, safeString1.globalStatus()); Assert.assertEquals(1, cache.references("0")); assertCacheState(cache, 1, 1, nodeSize(0)); - SafeString safeString2 = instance.reference("0"); - Assert.assertEquals(LoadingState.LOADED, safeString1.loadingState()); + SafeString safeString2 = instance.acquire("0"); + Assert.assertEquals(Status.LOADED, safeString1.globalStatus()); Assert.assertEquals(2, cache.references("0")); assertCacheState(cache, 1, 1, nodeSize(0)); @@ -298,80 +301,64 @@ public void testMultiAcquireRelease() } @Test - public void evictionBlockedOnSaveFuture() + public void evictionBlockedOnSaving() { - AccordStateCache cache = new AccordStateCache(nodeSize(1) * 4); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3)); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); - SafeString[] items = new SafeString[4]; - for (int i=0; i<4; i++) + SafeString item = instance.acquire(Integer.toString(0)); + testLoad(executor, item, Integer.toString(0)); + item.set("0*"); + Assert.assertTrue(instance.isReferenced(item.key())); + instance.release(item); + + for (int i=1; i<4; i++) { - SafeString item = instance.reference(Integer.toString(i)); - testLoad(item, Integer.toString(i)); + item = instance.acquire(Integer.toString(i)); + testLoad(executor, item, Integer.toString(i)); Assert.assertTrue(instance.isReferenced(item.key())); instance.release(item); } - assertCacheState(cache, 0, 4, nodeSize(1) * 4); + assertCacheState(cache, 0, 4, nodeSize(1) * 3 + nodeSize(3)); - AsyncResult saveFuture = AsyncResults.settable(); - instance.addSaveResult("0", saveFuture); + // force cache eviction cache.setMaxSize(0); // all should have been evicted except 0 - assertCacheState(cache, 0, 1, nodeSize(1)); + assertCacheState(cache, 0, 1, nodeSize(2)); + Assert.assertTrue(cache.keyIsCached("0")); Assert.assertFalse(cache.keyIsCached("1")); Assert.assertFalse(cache.keyIsCached("2")); Assert.assertFalse(cache.keyIsCached("3")); } - // if a future is added and another one exists for the same key, they should be merged - @Test - public void testFutureMerging() - { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); - AsyncResult.Settable promise1 = AsyncResults.settable(); - AsyncResult.Settable promise2 = AsyncResults.settable(); - instance.addSaveResult("5", promise1); - instance.addSaveResult("5", promise2); - - AsyncResult future = instance.getSaveResult("5"); - Assert.assertNotSame(future, promise1); - Assert.assertNotSame(future, promise2); - - Assert.assertFalse(future.isDone()); - - promise1.setSuccess(null); - Assert.assertFalse(future.isDone()); - - promise2.setSuccess(null); - Assert.assertTrue(future.isDone()); - } - @Test public void testUpdates() { - AccordStateCache cache = new AccordStateCache(500); - AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, String::length); + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, 500); + AccordStateCache.Instance instance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); assertCacheState(cache, 0, 0, 0); - SafeString safeString = instance.reference("1"); - testLoad(safeString, "1"); + SafeString safeString = instance.acquire("1"); + testLoad(executor, safeString, "1"); assertCacheState(cache, 1, 1, emptyNodeSize()); - Assert.assertNull(cache.head); - Assert.assertNull(cache.tail); + Assert.assertNull(cache.head()); + Assert.assertNull(cache.tail()); Assert.assertTrue(instance.isReferenced(safeString.key())); - assertCacheState(cache, 1, 1, nodeSize(0)); + assertCacheState(cache, 1, 1, emptyNodeSize()); safeString.set("11"); instance.release(safeString); - assertCacheState(cache, 0, 1, nodeSize(2)); - Assert.assertSame(safeString.global, cache.tail); - Assert.assertSame(safeString.global, cache.head); - + assertCacheState(cache, 0, 1, nodeSize(3)); + Assert.assertSame(safeString.global, cache.head()); + Assert.assertSame(safeString.global, cache.tail()); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index f09c2ab4d00c..21ee94d2a576 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -66,6 +66,10 @@ import accord.topology.Shard; import accord.topology.Topology; import accord.utils.async.AsyncChains; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.concurrent.ManualExecutor; +import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.TransactionStatement; @@ -129,20 +133,20 @@ public static CommandsForKey commandsForKey(Key key) return new CommandsForKey(key, CommandsForKeySerializer.loader); } - public static AccordLoadingState loaded(K key, V value) + public static AccordCachingState loaded(K key, V value) { - AccordLoadingState global = new AccordLoadingState<>(key); - global.load(k -> { + AccordCachingState global = new AccordCachingState<>(key); + global.load(ImmediateExecutor.INSTANCE, k -> { Assert.assertEquals(key, k); return value; - }).run(); - Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, global.state()); + }); + Assert.assertEquals(AccordCachingState.Status.LOADED, global.status()); return global; } public static AccordSafeCommand safeCommand(Command command) { - AccordLoadingState global = loaded(command.txnId(), command); + AccordCachingState global = loaded(command.txnId(), command); return new AccordSafeCommand(global); } @@ -154,13 +158,11 @@ public static Function testableLoad(K key, V val) }; } - public static void testLoad(AccordSafeState safeState, V val) + public static void testLoad(ManualExecutor executor, AccordSafeState safeState, V val) { - Assert.assertEquals(AccordLoadingState.LoadingState.UNINITIALIZED, safeState.loadingState()); - Runnable load = safeState.load(testableLoad(safeState.key(), val)); - Assert.assertEquals(AccordLoadingState.LoadingState.PENDING, safeState.loadingState()); - load.run(); - Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, safeState.loadingState()); + Assert.assertEquals(AccordCachingState.Status.LOADING, safeState.globalStatus()); + executor.runOne(); + Assert.assertEquals(AccordCachingState.Status.LOADED, safeState.globalStatus()); safeState.preExecute(); Assert.assertEquals(val, safeState.current()); } @@ -325,7 +327,8 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS return result; } - public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupplier now, Topology topology) + public static AccordCommandStore createAccordCommandStore( + Node.Id node, LongSupplier now, Topology topology, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { NodeTimeService time = new NodeTimeService() { @@ -337,26 +340,39 @@ public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupp SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); AccordCommandStore result = new AccordCommandStore(0, - time, - new AccordAgent(), - null, - cs -> NOOP_PROGRESS_LOG, - holder); + time, + new AccordAgent(), + null, + cs -> NOOP_PROGRESS_LOG, + holder, + loadExecutor, + saveExecutor); holder.set(result); return result; } - public static AccordCommandStore createAccordCommandStore(LongSupplier now, String keyspace, String table) + public static AccordCommandStore createAccordCommandStore(Node.Id node, LongSupplier now, Topology topology) + { + return createAccordCommandStore(node, now, topology, Stage.READ.executor(), Stage.MUTATION.executor()); + } + + public static AccordCommandStore createAccordCommandStore( + LongSupplier now, String keyspace, String table, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.keyspace); Node.Id node = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); - AccordCommandStore store = createAccordCommandStore(node, now, topology); + AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCacheSize(1 << 20)); return store; } + public static AccordCommandStore createAccordCommandStore(LongSupplier now, String keyspace, String table) + { + return createAccordCommandStore(now, keyspace, table, Stage.READ.executor(), Stage.MUTATION.executor()); + } + public static void execute(AccordCommandStore commandStore, Runnable runnable) { try diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 4ffdf8db7ba4..2a2eb481ac7d 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -21,9 +21,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Consumer; -import java.util.function.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; @@ -41,11 +38,12 @@ import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.concurrent.ManualExecutor; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordLoadingState; +import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordSafeCommand; import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordStateCache; @@ -62,7 +60,6 @@ import static org.apache.cassandra.service.accord.AccordTestUtils.execute; import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; import static org.apache.cassandra.service.accord.AccordTestUtils.testLoad; -import static org.apache.cassandra.service.accord.AccordTestUtils.testableLoad; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; public class AsyncLoaderTest @@ -77,13 +74,15 @@ public static void beforeClass() throws Throwable } /** - * Loading a cached resource shoudln't block + * Loading a cached resource shouldn't block */ @Test public void cachedTest() { AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + ManualExecutor executor = new ManualExecutor(); + AccordCommandStore commandStore = + createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); AccordStateCache.Instance commandCache = commandStore.commandCache(); commandStore.executeBlocking(() -> commandStore.setCacheSize(1024)); @@ -94,12 +93,14 @@ public void cachedTest() // acquire / release - AccordSafeCommand safeCommand = commandCache.reference(txnId); - testLoad(safeCommand, notWitnessed(txnId, txn)); + commandCache.unsafeSetLoadFunction(id -> notWitnessed(id, txn)); + AccordSafeCommand safeCommand = commandCache.acquire(txnId); + testLoad(executor, safeCommand, notWitnessed(txnId, txn)); commandCache.release(safeCommand); - AccordSafeCommandsForKey safeCfk = cfkCache.reference(key); - testLoad(safeCfk, commandsForKey(key)); + cfkCache.unsafeSetLoadFunction(k -> commandsForKey((PartitionKey) k)); + AccordSafeCommandsForKey safeCfk = cfkCache.acquire(key); + testLoad(executor, safeCfk, commandsForKey(key)); cfkCache.release(safeCfk); AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); @@ -125,8 +126,6 @@ public void loadTest() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); @@ -174,16 +173,18 @@ public void loadTest() public void partialLoadTest() { AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + ManualExecutor executor = new ManualExecutor(); + AccordCommandStore commandStore = + createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCacche = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire /release, create / persist - AccordSafeCommand safeCommand = commandCache.reference(txnId); - testLoad(safeCommand, notWitnessed(txnId, txn)); + commandCache.unsafeSetLoadFunction(id -> notWitnessed(id, txn)); + AccordSafeCommand safeCommand = commandCache.acquire(txnId); + testLoad(executor, safeCommand, notWitnessed(txnId, txn)); commandCache.release(safeCommand); @@ -205,6 +206,7 @@ public void partialLoadTest() Assert.assertFalse(result); }); + executor.runOne(); cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); // then return immediately after the callback has fired @@ -224,7 +226,9 @@ public void partialLoadTest() public void inProgressLoadTest() throws Throwable { AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + ManualExecutor executor = new ManualExecutor(); + AccordCommandStore commandStore = + createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); commandStore.executor().submit(() -> commandStore.setCacheSize(1024)).get(); AccordStateCache.Instance commandCache = commandStore.commandCache(); AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); @@ -233,17 +237,17 @@ public void inProgressLoadTest() throws Throwable PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire / release - AccordSafeCommand safeCommand = commandCache.reference(txnId); - Assert.assertEquals(AccordLoadingState.LoadingState.UNINITIALIZED, safeCommand.loadingState()); - Runnable load = safeCommand.load(testableLoad(safeCommand.key(), notWitnessed(txnId, txn))); - Assert.assertEquals(AccordLoadingState.LoadingState.PENDING, safeCommand.loadingState()); + cfkCache.unsafeSetLoadFunction(k -> commandsForKey((PartitionKey) k)); + AccordSafeCommandsForKey safeCfk = cfkCache.acquire(key); + testLoad(executor, safeCfk, commandsForKey(key)); + cfkCache.release(safeCfk); + + commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notWitnessed(id, txn); }); + AccordSafeCommand safeCommand = commandCache.acquire(txnId); + Assert.assertEquals(AccordCachingState.Status.LOADING, safeCommand.globalStatus()); Assert.assertTrue(commandCache.isReferenced(txnId)); Assert.assertFalse(commandCache.isLoaded(txnId)); - AccordSafeCommandsForKey safeCfk = cfkCache.reference(key); - testLoad(safeCfk, commandsForKey(key)); - cfkCache.release(safeCfk); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); // since there's a read future associated with the txnId, we'll wait for it to load @@ -260,8 +264,8 @@ public void inProgressLoadTest() throws Throwable }); Assert.assertFalse(cbFired.isSuccess()); - load.run(); - Assert.assertEquals(AccordLoadingState.LoadingState.LOADED, safeCommand.loadingState()); + executor.runOne(); + Assert.assertEquals(AccordCachingState.Status.LOADED, safeCommand.globalStatus()); cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); Assert.assertTrue(cbFired.isSuccess()); @@ -282,34 +286,24 @@ public void failedLoadTest() throws Throwable TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); - AsyncResult.Settable promise1 = AsyncResults.settable(); - AtomicReference> consumer1 = new AtomicReference<>(); - AsyncResult.Settable promise2 = AsyncResults.settable(); - AtomicReference> consumer2 = new AtomicReference<>(); + AsyncResult.Settable promise = AsyncResults.settable(); AsyncResult.Settable callback = AsyncResults.settable(); RuntimeException failure = new RuntimeException(); execute(commandStore, () -> { AtomicInteger loadCalls = new AtomicInteger(); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY){ - - @Override - Function loadCommandFunction() - { - return txnId -> { - loadCalls.incrementAndGet(); - if (txnId.equals(txnId1)) - { - throw failure; - } - if (txnId.equals(txnId2)) - { - return notWitnessed(txnId, null); - } - throw new AssertionError("Unknown txnId: " + txnId); - }; - } - }; + + commandStore.commandCache().unsafeSetLoadFunction(txnId -> + { + loadCalls.incrementAndGet(); + if (txnId.equals(txnId1)) + throw failure; + else if (txnId.equals(txnId2)) + return notWitnessed(txnId, null); + throw new AssertionError("Unknown txnId: " + txnId); + }); + + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY); boolean result = loader.load(new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); @@ -321,7 +315,7 @@ Function loadCommandFunction() Assert.assertEquals(2, loadCalls.get()); }); - promise1.tryFailure(failure); + promise.tryFailure(failure); AsyncChains.getUninterruptibly(callback); } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index f3802d2bf291..1060e34d48da 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -19,15 +19,12 @@ package org.apache.cassandra.service.accord.async; import java.time.Duration; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; -import java.util.function.Function; -import java.util.stream.Collectors; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; @@ -46,7 +43,6 @@ import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.local.Status; import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.Keys; @@ -58,22 +54,19 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.primitives.Writes; import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; -import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; -import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.transform.FilteredPartitions; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordSafeCommand; @@ -81,7 +74,6 @@ import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FBUtilities; import org.assertj.core.api.Assertions; @@ -155,7 +147,7 @@ public void optionalCommandsForKeyTest() throws Throwable })); long nowInSeconds = FBUtilities.nowInSeconds(); - SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore, key, nowInSeconds); + SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore.id(), key, nowInSeconds); try(ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { @@ -216,7 +208,7 @@ private static void assertFutureState(AccordStateCache.Instance cache() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) { - + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) + { @Override void state(State state) { @@ -272,32 +264,6 @@ void state(State state) } }; } - - @Override - AsyncWriter createAsyncWriter(AccordCommandStore commandStore) - { - return new AsyncWriter(commandStore) { - - @Override - void setState(State state) - { - switch (state) - { - case SETUP: - assertFutureState(cache(), txnId, true, false, false); - break; - case FINISHED: - assertFutureState(cache(), txnId, false, false, false); - break; - case SAVING: - assertFutureState(cache(), txnId, true, false, true); - break; - - } - super.setState(state); - } - }; - } }; commandStore.executor().submit(operation); @@ -328,27 +294,13 @@ public void loadFail() Consumer consumer = Mockito.mock(Consumer.class); - AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) + commandStore.commandCache().unsafeSetLoadFunction(txnId -> { - @Override - AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) - { - return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) - { - @Override - Function loadCommandFunction() - { - Function delegate = super.loadCommandFunction(); - return txnId -> { - logger.info("Attempting to load {}; expected to fail? {}", txnId, failed.get(txnId)); - if (!failed.get(txnId)) return delegate.apply(txnId); - - throw new NullPointerException("txn_id " + txnId); - }; - } - }; - } - }; + logger.info("Attempting to load {}; expected to fail? {}", txnId, failed.get(txnId)); + if (!failed.get(txnId)) return AccordKeyspace.loadCommand(commandStore, txnId); + throw new NullPointerException("txn_id " + txnId); + }); + AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer); AssertionUtils.assertThatThrownBy(() -> getUninterruptibly(o1)) .hasRootCause() @@ -363,6 +315,7 @@ Function loadCommandFunction() awaitDone(commandStore, ids, keys); // can we recover? + commandStore.commandCache().unsafeSetLoadFunction(txnId -> AccordKeyspace.loadCommand(commandStore, txnId)); AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.command(id).readyToExecute())); getUninterruptibly(o2); }); @@ -403,85 +356,12 @@ public void consumerFails() }); } - @Test - public void writeFail() - { - AtomicLong clock = new AtomicLong(0); - // all txn use the same key; 0 - Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); - - qt().withExamples(100).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { - before(); // truncate tables - - createCommand(commandStore, rs, ids); - - Map failed = selectFailedTxn(rs, ids); - - assertNoReferences(commandStore, ids, keys); - - PreLoadContext ctx = contextFor(ids, keys); - - Consumer consumer = store -> ids.forEach(id -> store.command(id).readyToExecute()); - - AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) - { - @Override - AsyncWriter createAsyncWriter(AccordCommandStore commandStore) - { - return new AsyncWriter(commandStore) - { - @Override - protected AsyncWriter.StateMutationFunction writeCommandFunction() - { - StateMutationFunction delegate = super.writeCommandFunction(); - return (store, updated, timestamp) -> { - if (!failed.get(updated.txnId())) return delegate.apply(store, updated, timestamp); - - - Mutation mutation = Mockito.mock(Mutation.class); - Mockito.doThrow(new NullPointerException("txn_id " + updated.txnId())).when(mutation).apply(); - return mutation; - }; - } - }; - } - }; - - Assertions.assertThatThrownBy(() -> getUninterruptibly(o1)); - - - assertNoReferences(commandStore, ids, keys); - assertCanNotEvict(commandStore.commandCache(), failed.entrySet().stream() - .filter(e -> e.getValue()) - .map(e -> e.getKey()) - .collect(Collectors.toList())); - // first write will fail the operation, so make sure to wait for all write results - awaitSaveResult(commandStore.cache()); - - // the command should be ReadyToExecute, so move it forward and allow the save - AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> { - SafeCommand command = store.command(id); - Command current = command.current(); - Assertions.assertThat(current.status()).isEqualTo(Status.ReadyToExecute); - Writes writes = current.partialTxn().execute(current.txnId(), current.executeAt(), new TxnData()); - command.preapplied(current, current.txnId(), current.asCommitted().waitingOn(), writes, null); - })); - getUninterruptibly(o2); - - assertNoReferences(commandStore, ids, keys); - assertCanEvict(commandStore.commandCache(), ids); - assertCanEvict(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); - }); - } - private static void createCommand(AccordCommandStore commandStore, RandomSource rs, List ids) { // to simulate CommandsForKey not being found, use createCommittedAndPersist periodically as it does not update if (rs.nextBoolean()) ids.forEach(id -> createCommittedAndPersist(commandStore, id)); else ids.forEach(id -> createCommittedUsingLifeCycle(commandStore, id)); - commandStore.clearCache(); + commandStore.unsafeClearCache(); } private static Map selectFailedTxn(RandomSource rs, List ids) @@ -523,7 +403,7 @@ private static void assertNoReferences(AccordStateCache.Instance ca AssertionError error = null; for (T key : keys) { - AccordStateCache.Node node = cache.getUnsafe(key); + AccordCachingState node = cache.getUnsafe(key); if (node == null) continue; try { @@ -557,43 +437,11 @@ private static void awaitDone(AccordStateCache.Instance cache, Iter { for (T key : keys) { - AccordStateCache.Node node = cache.getUnsafe(key); + AccordCachingState node = cache.getUnsafe(key); if (node == null) continue; Awaitility.await("For node " + node.key() + " to complete") .atMost(Duration.ofMinutes(1)) - .until(() -> node.isComplete()); - } - } - - private static void awaitSaveResult(AccordStateCache cache) - { - for (Map.Entry> e : cache.saveResults().entrySet()) - AsyncChains.awaitUninterruptibly(e.getValue()); - } - - private static void assertCanEvict(AccordStateCache.Instance cache, Iterable keys) - { - for (T key : keys) - { - AccordStateCache.Node node = cache.getUnsafe(key); - if (node == null) - continue; - Assert.assertTrue("Unable to evict " + node.key(), cache.canEvict(node.key())); - } - } - - private static void assertCanNotEvict(AccordStateCache.Instance cache, Iterable keys) - { - List errors = new ArrayList<>(); - for (T key : keys) - { - if (cache.getUnsafe(key) == null) - { - errors.add(String.format("Node %s was evicted, but should not be", key)); - continue; - } - if (cache.canEvict(key)) errors.add(String.format("Node %s is evictable but should not be", key)); + .until(node::isComplete); } - if (!errors.isEmpty()) throw new AssertionError(String.join("\n", errors)); } } From 00bbf4ec8e680c9b49d17b16cb51d5fdf05785f2 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Wed, 7 Jun 2023 11:44:31 -0700 Subject: [PATCH 061/340] CEP-15: Accord Bootstrap Integration Patch by Blake Eggleston and Benedict Elliott Smith; Reviewed by David Capwell for CASSANDRA-17101 CEP-15: Accord TCM integration Patch by Blake Eggleston; Reviewed by David Capwell for CASSANDRA-18444 --- modules/accord | 2 +- .../cassandra/db/ColumnFamilyStore.java | 1 + .../db/virtual/AccordVirtualTables.java | 1 - .../apache/cassandra/dht/BootStrapper.java | 3 + src/java/org/apache/cassandra/net/Verb.java | 10 +- .../org/apache/cassandra/service/Rebuild.java | 21 +- .../cassandra/service/StorageProxy.java | 3 +- .../service/accord/AccordCallback.java | 8 +- .../service/accord/AccordCommandStore.java | 32 ++ .../service/accord/AccordCommandStores.java | 4 +- .../accord/AccordConfigurationService.java | 393 +++++++++++++-- .../service/accord/AccordDataStore.java | 19 +- .../accord/AccordEndpointMapper.java} | 20 +- .../accord/AccordFetchCoordinator.java | 408 +++++++++++++++ .../service/accord/AccordKeyspace.java | 415 +++++++++++++++- .../accord/AccordLocalSyncNotifier.java | 207 ++++++++ .../service/accord/AccordMessageSink.java | 20 +- .../service/accord/AccordService.java | 96 +++- .../service/accord/AccordTopologyUtils.java | 126 +++-- .../service/accord/AccordVerbHandler.java | 11 +- .../service/accord/EndpointMapping.java | 123 ++--- .../service/accord/IAccordService.java | 22 +- .../cassandra/service/accord/TokenRange.java | 22 + .../service/accord/async/AsyncOperation.java | 3 +- .../serializers/CommandSerializers.java | 43 +- .../serializers/CommandStoreSerializers.java | 115 +++++ .../accord/serializers/FetchSerializers.java | 131 +++++ .../serializers/TopologySerializers.java | 68 +++ .../apache/cassandra/tcm/ClusterMetadata.java | 6 + .../org/apache/cassandra/tcm/Startup.java | 2 + .../cassandra/tcm/membership/Directory.java | 5 + .../tcm/sequences/BootstrapAndJoin.java | 9 +- .../apache/cassandra/tcm/sequences/Move.java | 13 +- .../org/apache/cassandra/tools/NodeTool.java | 1 - .../cassandra/utils/CastingSerializer.java | 64 +++ .../utils/CollectionSerializers.java | 4 +- .../utils/concurrent/FutureCombiner.java | 6 + .../cassandra/distributed/impl/Instance.java | 1 + .../test/accord/AccordBootstrapTest.java | 464 ++++++++++++++++++ .../test/accord/AccordCQLTest.java | 2 - .../test/accord/AccordFeatureFlagTest.java | 4 +- .../test/accord/AccordTestBase.java | 7 +- .../test/accord/NewSchemaTest.java | 1 - ...bstractPairOfSequencesPaxosSimulation.java | 5 +- .../cassandra/audit/AuditLoggerTest.java | 2 - .../apache/cassandra/auth/TxnAuthTest.java | 2 - .../cql3/NodeLocalConsistencyTest.java | 3 - .../cql3/PreparedStatementsTest.java | 2 - .../hints/HintServiceBytemanTest.java | 4 +- .../cassandra/hints/HintsServiceTest.java | 2 +- .../apache/cassandra/hints/HintsTestUtil.java | 43 -- .../AccordConfigurationServiceTest.java | 268 ++++++++++ .../service/accord/AccordMessageSinkTest.java | 11 +- .../service/accord/AccordTestUtils.java | 10 +- .../service/accord/AccordTopologyTest.java | 173 ++++++- .../service/accord/EndpointMappingTest.java | 21 +- .../cassandra/utils/MockFailureDetector.java | 63 +++ 57 files changed, 3109 insertions(+), 416 deletions(-) rename src/java/org/apache/cassandra/{tools/nodetool/CreateEpochUnsafe.java => service/accord/AccordEndpointMapper.java} (62%) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java create mode 100644 src/java/org/apache/cassandra/utils/CastingSerializer.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java create mode 100644 test/unit/org/apache/cassandra/utils/MockFailureDetector.java diff --git a/modules/accord b/modules/accord index 8830d97ba517..03f937175dbc 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8830d97ba517fb2d0f7f22e8e6b886a98839e694 +Subproject commit 03f937175dbcf04243bb0ac48b64746c1a07bc9c diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 365bc98643dd..d47a651a0619 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -236,6 +236,7 @@ public enum FlushReason ANTICOMPACTION, SCHEMA_CHANGE, OWNED_RANGES_CHANGE, + ACCORD, UNIT_TESTS // explicitly requested flush needed for a test } diff --git a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java index 42a518b961a7..0dba15a4219e 100644 --- a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java +++ b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java @@ -64,7 +64,6 @@ protected Epoch(String keyspace) public DataSet data() { IAccordService accord = AccordService.instance(); - accord.createEpochFromConfigUnsafe(); long epoch = accord.currentEpoch(); diff --git a/src/java/org/apache/cassandra/dht/BootStrapper.java b/src/java/org/apache/cassandra/dht/BootStrapper.java index ebf04dddf89f..4ec9e4834f95 100644 --- a/src/java/org/apache/cassandra/dht/BootStrapper.java +++ b/src/java/org/apache/cassandra/dht/BootStrapper.java @@ -40,6 +40,7 @@ import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.streaming.StreamEvent; import org.apache.cassandra.streaming.StreamEventHandler; import org.apache.cassandra.streaming.StreamOperation; @@ -136,6 +137,8 @@ public Future bootstrap(StreamStateStore stateStore, boolean useStr logger.debug("Schema does not contain any non-local keyspaces to stream on bootstrap"); for (String keyspaceName : nonLocalStrategyKeyspaces) { + if (AccordService.instance().isAccordManagedKeyspace(keyspaceName)) + continue; KeyspaceMetadata ksm = metadata.schema.getKeyspaces().get(keyspaceName).get(); if (ksm.params.replication.isMeta()) continue; diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 80afb387d90e..537b857fce1f 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -79,10 +79,12 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; +import org.apache.cassandra.service.accord.AccordLocalSyncNotifier; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; import org.apache.cassandra.service.accord.serializers.CheckStatusSerializers; import org.apache.cassandra.service.accord.serializers.CommitSerializers; import org.apache.cassandra.service.accord.serializers.EnumSerializer; +import org.apache.cassandra.service.accord.serializers.FetchSerializers; import org.apache.cassandra.service.accord.serializers.GetDepsSerializers; import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; import org.apache.cassandra.service.accord.serializers.InformHomeDurableSerializers; @@ -91,6 +93,8 @@ import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.WaitOnCommitSerializer; +import org.apache.cassandra.service.paxos.Commit; +import org.apache.cassandra.service.paxos.Commit.Agreed; import org.apache.cassandra.service.paxos.PaxosCommit; import org.apache.cassandra.service.paxos.PaxosCommitAndPrepare; import org.apache.cassandra.service.paxos.PaxosPrepare; @@ -117,8 +121,6 @@ import org.apache.cassandra.utils.BooleanSerializer; import org.apache.cassandra.service.EchoVerbHandler; import org.apache.cassandra.service.SnapshotVerbHandler; -import org.apache.cassandra.service.paxos.Commit; -import org.apache.cassandra.service.paxos.Commit.Agreed; import org.apache.cassandra.service.paxos.PrepareResponse; import org.apache.cassandra.service.paxos.v1.PrepareVerbHandler; import org.apache.cassandra.service.paxos.v1.ProposeVerbHandler; @@ -290,6 +292,10 @@ public enum Verb ACCORD_CHECK_STATUS_REQ (140, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_DEPS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), + ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout, REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_REQ (144, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_FETCH_DATA_RSP ), + ACCORD_SYNC_NOTIFY_RSP (147, P2, writeTimeout, REQUEST_RESPONSE, () -> AccordLocalSyncNotifier.Acknowledgement.serializer, RESPONSE_HANDLER ), + ACCORD_SYNC_NOTIFY_REQ (146, P2, writeTimeout, IMMEDIATE, () -> AccordLocalSyncNotifier.Notification.serializer, () -> AccordLocalSyncNotifier.verbHandler, ACCORD_SYNC_NOTIFY_RSP), // generic failure response diff --git a/src/java/org/apache/cassandra/service/Rebuild.java b/src/java/org/apache/cassandra/service/Rebuild.java index 421595bab637..673a6ca486a3 100644 --- a/src/java/org/apache/cassandra/service/Rebuild.java +++ b/src/java/org/apache/cassandra/service/Rebuild.java @@ -44,11 +44,15 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.streaming.StreamOperation; +import org.apache.cassandra.streaming.StreamResultFuture; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.MovementMap; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.utils.FBUtilities.getBroadcastAddressAndPort; @@ -119,11 +123,16 @@ public static void rebuild(String sourceDc, String keyspace, String tokens, Stri if (keyspace == null) { for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces().names()) + { + if (AccordService.instance().isAccordManagedKeyspace(keyspaceName)) + continue; streamer.addKeyspaceToFetch(keyspaceName); + } } else if (tokens == null) { - streamer.addKeyspaceToFetch(keyspace); + if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) + streamer.addKeyspaceToFetch(keyspace); } else { @@ -150,10 +159,16 @@ else if (tokens == null) streamer.addSourceFilter(new RangeStreamer.AllowedSourcesFilter(sources)); } - streamer.addKeyspaceToFetch(keyspace); + if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) + streamer.addKeyspaceToFetch(keyspace); } - streamer.fetchAsync().get(); + StreamResultFuture resultFuture = streamer.fetchAsync(); + // wait for result + Future accordReady = AccordService.instance().epochReady(metadata.epoch); + Future ready = FutureCombiner.allOf(resultFuture, accordReady); + // wait for result + ready.get(); } catch (InterruptedException e) { diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 94eb38c96bc4..c8eb3be5978a 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -1884,7 +1884,8 @@ public static boolean hasJoined() private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { - if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord) + // TCM explicitly relies on paxos and doesn't work with accord + if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord && !group.metadata().keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) { return readWithAccord(group, consistencyLevel); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCallback.java b/src/java/org/apache/cassandra/service/accord/AccordCallback.java index 20ed9fad69ac..d25d27f59304 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCallback.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCallback.java @@ -34,17 +34,19 @@ class AccordCallback extends SafeCallback implements RequestCallback { private static final Logger logger = LoggerFactory.getLogger(AccordCallback.class); + private final AccordEndpointMapper endpointMapper; - public AccordCallback(AgentExecutor executor, Callback callback) + public AccordCallback(AgentExecutor executor, Callback callback, AccordEndpointMapper endpointMapper) { super(executor, callback); + this.endpointMapper = endpointMapper; } @Override public void onResponse(Message msg) { logger.debug("Received response {} from {}", msg.payload, msg.from()); - success(EndpointMapping.endpointToId(msg.from()), msg.payload); + success(endpointMapper.mappedId(msg.from()), msg.payload); } private static Throwable convertReason(RequestFailureReason reason) @@ -59,7 +61,7 @@ public void onFailure(InetAddressAndPort from, RequestFailureReason failureReaso { logger.debug("Received failure {} from {} for {}", failureReason, from, this); // TODO (now): we should distinguish timeout failures with some placeholder Exception - failure(EndpointMapping.endpointToId(from), convertReason(failureReason)); + failure(endpointMapper.mappedId(from), convertReason(failureReason)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 84869c179d11..e19f898915c2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -65,6 +65,7 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; +import accord.utils.ReducingRangeMap; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.Observable; @@ -150,6 +151,16 @@ public AccordCommandStore(int id, this::loadCommandsForKey, this::saveCommandsForKey, AccordObjectSizes::commandsForKey); + AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, bootstrapBeganAt, safeToRead) -> { + executor.submit(() -> { + if (rejectBefore != null) + super.setRejectBefore(rejectBefore); + if (bootstrapBeganAt != null) + super.setBootstrapBeganAt(bootstrapBeganAt); + if (safeToRead != null) + super.setSafeToRead(safeToRead); + }); + })); executor.execute(() -> CommandStore.register(this)); executor.execute(this::loadRangesToCommands); } @@ -437,4 +448,25 @@ public void shutdown() { executor.shutdown(); } + + protected void setRejectBefore(ReducingRangeMap newRejectBefore) + { + super.setRejectBefore(newRejectBefore); + // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases + AccordKeyspace.updateRejectBefore(this, newRejectBefore); + } + + protected void setBootstrapBeganAt(NavigableMap newBootstrapBeganAt) + { + super.setBootstrapBeganAt(newBootstrapBeganAt); + // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases + AccordKeyspace.updateBootstrapBeganAt(this, newBootstrapBeganAt); + } + + protected void setSafeToRead(NavigableMap newSafeToRead) + { + super.setSafeToRead(newSafeToRead); + // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases + AccordKeyspace.updateSafeToRead(this, newSafeToRead); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 38040b466be0..8e9c5beac201 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -141,9 +141,9 @@ private static long maxCacheSize() } @Override - public synchronized Supplier updateTopology(Node node, Topology newTopology) + public synchronized Supplier updateTopology(Node node, Topology newTopology, boolean startSync) { - Supplier start = super.updateTopology(node, newTopology); + Supplier start = super.updateTopology(node, newTopology, startSync); return () -> { EpochReady ready = start.get(); ready.metadata.addCallback(() -> { diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 3ab64c9bd097..1a7a512d091d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -18,98 +18,385 @@ package org.apache.cassandra.service.accord; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; -import javax.annotation.concurrent.GuardedBy; +import javax.annotation.Nullable; -import accord.api.ConfigurationService; +import com.google.common.annotations.VisibleForTesting; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.impl.AbstractConfigurationService; import accord.local.Node; import accord.topology.Topology; +import accord.utils.Invariants; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; +import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.gms.FailureDetector; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.accord.AccordKeyspace.EpochDiskState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; -/** - * Currently a stubbed out config service meant to be triggered from a dtest - */ -public class AccordConfigurationService implements ConfigurationService +// TODO: listen to FailureDetector and rearrange fast path accordingly +public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordLocalSyncNotifier.Listener { - private final Node.Id localId; - private final List listeners = new CopyOnWriteArrayList<>(); - @GuardedBy("this") - private final List epochs = new ArrayList<>(); + private static final Logger logger = LoggerFactory.getLogger(AccordConfigurationService.class); + + private final MessageDelivery messagingService; + private final IFailureDetector failureDetector; + private EpochDiskState diskState = EpochDiskState.EMPTY; + private enum State { INITIALIZED, LOADING, STARTED } + + private State state = State.INITIALIZED; + private volatile EndpointMapping mapping = EndpointMapping.EMPTY; + private final Long2ObjectHashMap syncNotifiers = new Long2ObjectHashMap<>(); - public AccordConfigurationService(Node.Id localId) + public enum SyncStatus { NOT_STARTED, NOTIFYING, COMPLETED } + + static class EpochState extends AbstractConfigurationService.AbstractEpochState { - this.localId = localId; - epochs.add(Topology.EMPTY); + SyncStatus syncStatus = SyncStatus.NOT_STARTED; + protected final AsyncResult.Settable localSyncNotified = AsyncResults.settable(); + + public EpochState(long epoch) + { + super(epoch); + } + + void setSyncStatus(SyncStatus status) + { + this.syncStatus = status; + if (status == SyncStatus.COMPLETED) + localSyncNotified.trySuccess(null); + } + + AsyncResult received() + { + return received; + } + + AsyncResult acknowledged() + { + return acknowledged; + } + + @Nullable AsyncResult reads() + { + return reads; + } + + AsyncResult.Settable localSyncNotified() + { + return localSyncNotified; + } } - @Override - public void registerListener(Listener listener) + static class EpochHistory extends AbstractConfigurationService.AbstractEpochHistory + { + @Override + protected EpochState createEpochState(long epoch) + { + return new EpochState(epoch); + } + } + + public AccordConfigurationService(Node.Id node, MessageDelivery messagingService, IFailureDetector failureDetector) + { + super(node); + this.messagingService = messagingService; + this.failureDetector = failureDetector; + } + + public AccordConfigurationService(Node.Id node) { - listeners.add(listener); + this(node, MessagingService.instance(), FailureDetector.instance); } @Override - public synchronized Topology currentTopology() + protected EpochHistory createEpochHistory() { - return epochs.get(epochs.size() - 1); + return new EpochHistory(); + } + + public synchronized void start() + { + Invariants.checkState(state == State.INITIALIZED, "Expected state to be INITIALIZED but was %s", state); + state = State.LOADING; + updateMapping(ClusterMetadata.current()); + diskState = AccordKeyspace.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete) -> { + if (topology != null) + reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); + + getOrCreateEpochState(epoch).setSyncStatus(syncStatus); + if (syncStatus == SyncStatus.NOTIFYING) + syncNotifiers.put(epoch, new AccordLocalSyncNotifier(epoch, localId, pendingSyncNotify, this, messagingService, failureDetector, this)); + + remoteSyncComplete.forEach(id -> remoteSyncComplete(id, epoch)); + })); + syncNotifiers.values().forEach(AccordLocalSyncNotifier::start); + state = State.STARTED; } @Override - public synchronized Topology getTopologyForEpoch(long epoch) + public Node.Id mappedId(InetAddressAndPort endpoint) { - return epochs.get((int) epoch); + return Invariants.nonNull(mapping.mappedId(endpoint)); } @Override - public synchronized void fetchTopologyForEpoch(long epoch) + public InetAddressAndPort mappedEndpoint(Node.Id id) { - Topology current = currentTopology(); - if (epoch < current.epoch()) - return; - while (current.epoch() < epoch) + return Invariants.nonNull(mapping.mappedEndpoint(id)); + } + + @VisibleForTesting + EpochDiskState diskState() + { + return diskState; + } + + @VisibleForTesting + synchronized void updateMapping(EndpointMapping mapping) + { + if (mapping.epoch() > this.mapping.epoch()) + this.mapping = mapping; + } + + synchronized void updateMapping(ClusterMetadata metadata) + { + updateMapping(AccordTopologyUtils.directoryToMapping(metadata.epoch.getEpoch(), metadata.directory)); + } + + private void reportMetadata(ClusterMetadata metadata) + { + Stage.MISC.submit(() -> { + synchronized (AccordConfigurationService.this) + { + logger.info("Reporting metadata for epoch {}", metadata.epoch.getEpoch()); + updateMapping(metadata); + reportTopology(AccordTopologyUtils.createAccordTopology(metadata, this::isAccordManagedKeyspace)); + } + }); + } + + private void maybeReportMetadata(ClusterMetadata metadata) + { + // don't report metadata until the previous one has been acknowledged + synchronized (this) { - current = AccordTopologyUtils.createTopology(epochs.size()); - unsafeAddEpoch(current); + long epoch = metadata.epoch.getEpoch(); + if (epochs.maxEpoch() == 0) + { + getOrCreateEpochState(epoch); // touch epoch state so subsequent calls see it + reportMetadata(metadata); + return; + } + getOrCreateEpochState(epoch - 1).acknowledged().addCallback(() -> reportMetadata(metadata)); } } @Override - public void acknowledgeEpoch(EpochReady ready) + public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) + { + maybeReportMetadata(prev); + maybeReportMetadata(next); + } + + @Override + protected void fetchTopologyInternal(long epoch) + { + // TODO: need a non-blocking way to inform CMS of an unknown epoch +// ClusterMetadataService.instance().maybeCatchup(Epoch.create(epoch)); + } + + @Override + protected synchronized void localSyncComplete(Topology topology) { - Topology acknowledged = getTopologyForEpoch(ready.epoch); - for (Node.Id node : acknowledged.nodes()) + long epoch = topology.epoch(); + EpochState epochState = getOrCreateEpochState(epoch); + if (epochState.syncStatus != SyncStatus.NOT_STARTED) + return; + + Set pendingNotification = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); + AccordLocalSyncNotifier notifier = new AccordLocalSyncNotifier(epoch, localId, pendingNotification, this, messagingService, failureDetector, this); + syncNotifiers.put(epoch, notifier); + diskState = AccordKeyspace.setNotifyingLocalSync(epoch, pendingNotification, diskState); + epochState.setSyncStatus(SyncStatus.NOTIFYING); + notifier.start(); + } + + @Override + public long currentEpoch() + { + return super.currentEpoch(); + } + + @Override + public synchronized void onEndpointAck(Node.Id id, long epoch) + { + EpochState epochState = getOrCreateEpochState(epoch); + if (epochState.syncStatus != SyncStatus.NOTIFYING) + return; + diskState = AccordKeyspace.markLocalSyncAck(id, epoch, diskState); + } + + @Override + public synchronized void onComplete(long epoch) + { + EpochState epochState = getOrCreateEpochState(epoch); + epochState.setSyncStatus(SyncStatus.COMPLETED); + diskState = AccordKeyspace.setCompletedLocalSync(epoch, diskState); + } + + @Override + protected synchronized void topologyUpdatePreListenerNotify(Topology topology) + { + if (state == State.STARTED) + diskState = AccordKeyspace.saveTopology(topology, diskState); + } + + @Override + protected void remoteSyncCompletePreListenerNotify(Node.Id node, long epoch) + { + if (state == State.STARTED) + diskState = AccordKeyspace.markRemoteTopologySync(node, epoch, diskState); + } + + @Override + protected void truncateTopologiesPreListenerNotify(long epoch) + { + Invariants.checkState(state == State.STARTED); + } + + @Override + protected void truncateTopologiesPostListenerNotify(long epoch) + { + if (state == State.STARTED) + diskState = AccordKeyspace.truncateTopologyUntil(epoch, diskState); + } + + @VisibleForTesting + public static class EpochSnapshot + { + public enum ResultStatus { - if (node.equals(localId)) - continue; + PENDING, SUCCESS, FAILURE; - ready.coordination.addCallback(() -> { - for (Listener listener : listeners) - listener.onEpochSyncComplete(node, ready.epoch); - }); + static ResultStatus of (AsyncResult result) + { + if (result == null || !result.isDone()) + return PENDING; + + return result.isSuccess() ? SUCCESS : FAILURE; + } + } + + public final long epoch; + public final SyncStatus syncStatus; + public final ResultStatus received; + public final ResultStatus acknowledged; + public final ResultStatus reads; + + private EpochSnapshot(EpochState state) + { + this.epoch = state.epoch(); + this.syncStatus = state.syncStatus; + this.received = ResultStatus.of(state.received()); + this.acknowledged = ResultStatus.of(state.acknowledged()); + this.reads = ResultStatus.of(state.reads()); + } + + public EpochSnapshot(long epoch, SyncStatus syncStatus, ResultStatus received, ResultStatus acknowledged, ResultStatus reads) + { + this.epoch = epoch; + this.syncStatus = syncStatus; + this.received = received; + this.acknowledged = acknowledged; + this.reads = reads; + } + + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + EpochSnapshot that = (EpochSnapshot) o; + return epoch == that.epoch && syncStatus == that.syncStatus && received == that.received && acknowledged == that.acknowledged && reads == that.reads; + } + + public int hashCode() + { + return Objects.hash(epoch, syncStatus, received, acknowledged, reads); + } + + public String toString() + { + return "EpochSnapshot{" + + "epoch=" + epoch + + ", syncStatus=" + syncStatus + + ", received=" + received + + ", acknowledged=" + acknowledged + + ", reads=" + reads + + '}'; + } + + public static EpochSnapshot completed(long epoch) + { + return new EpochSnapshot(epoch, SyncStatus.COMPLETED, ResultStatus.SUCCESS, ResultStatus.SUCCESS, ResultStatus.SUCCESS); } + + public static EpochSnapshot notStarted(long epoch) + { + return new EpochSnapshot(epoch, SyncStatus.NOT_STARTED, ResultStatus.SUCCESS, ResultStatus.SUCCESS, ResultStatus.SUCCESS); + } + } + + @VisibleForTesting + public synchronized EpochSnapshot getEpochSnapshot(long epoch) + { + if (epoch < epochs.minEpoch() || epoch > epochs.maxEpoch()) + return null; + + return new EpochSnapshot(getOrCreateEpochState(epoch)); } - public synchronized void createEpochFromConfig() + @VisibleForTesting + public synchronized long minEpoch() { - Topology current = currentTopology(); - Topology topology = AccordTopologyUtils.createTopology(epochs.size()); - if (current.equals(topology.withEpoch(current.epoch()))) return; - unsafeAddEpoch(topology); + return epochs.minEpoch(); } - private void unsafeAddEpoch(Topology topology) + @VisibleForTesting + public synchronized long maxEpoch() { - epochs.add(topology); - for (Listener listener : listeners) - listener.onTopologyUpdate(topology); + return epochs.maxEpoch(); + } - // TODO: This is a hack to enable simplistic cluster reuse for TxnAuthTest, AccordCQLTest, etc. - // Since we don't have a dist sys that sets this up, we have to just lie... - EndpointMapping.knownIds().forEach(id -> { - for (Listener listener : listeners) - listener.onEpochSyncComplete(id, topology.epoch()); + @VisibleForTesting + public synchronized Future localSyncNotified(long epoch) + { + AsyncPromise promise = new AsyncPromise<>(); + getOrCreateEpochState(epoch).localSyncNotified().addCallback((result, failure) -> { + if (failure != null) promise.tryFailure(failure); + else promise.trySuccess(result); }); + return promise; + } + + public boolean isAccordManagedKeyspace(String keyspace) + { + // TODO (required, interop) : replace with schema flag or other mechanism for classifying accord keyspaces + return !SchemaConstants.REPLICATED_SYSTEM_KEYSPACE_NAMES.contains(keyspace); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java index b1f191a39678..fee2d633a152 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java @@ -23,25 +23,14 @@ import accord.local.SafeCommandStore; import accord.primitives.Ranges; import accord.primitives.SyncPoint; -import accord.primitives.Timestamp; -import accord.utils.async.AsyncResults; -public enum AccordDataStore implements DataStore +public class AccordDataStore implements DataStore { - INSTANCE; - @Override public FetchResult fetch(Node node, SafeCommandStore safeStore, Ranges ranges, SyncPoint syncPoint, FetchRanges callback) { - //TODO (implement): do real work - callback.starting(ranges).started(Timestamp.NONE); - callback.fetched(ranges); - return new ImmediateFetchFuture(ranges); - } - - private static class ImmediateFetchFuture extends AsyncResults.SettableResult implements FetchResult - { - ImmediateFetchFuture(Ranges ranges) { setSuccess(ranges); } - @Override public void abort(Ranges abort) { } + AccordFetchCoordinator coordinator = new AccordFetchCoordinator(node, ranges, syncPoint, callback, safeStore.commandStore()); + coordinator.start(); + return coordinator.result(); } } diff --git a/src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java b/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java similarity index 62% rename from src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java rename to src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java index c5ea1246d61e..16ff04437e30 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/CreateEpochUnsafe.java +++ b/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java @@ -16,18 +16,16 @@ * limitations under the License. */ -package org.apache.cassandra.tools.nodetool; +package org.apache.cassandra.service.accord; -import io.airlift.airline.Command; -import org.apache.cassandra.tools.NodeProbe; -import org.apache.cassandra.tools.NodeTool; +import accord.local.Node; +import org.apache.cassandra.locator.InetAddressAndPort; -@Command(name="createepochunsafe", description = "manually create an Accord epoch from current topology") -public class CreateEpochUnsafe extends NodeTool.NodeToolCmd +/** + * Maps network addresses to accord ids + */ +public interface AccordEndpointMapper { - @Override - protected void execute(NodeProbe probe) - { - throw new UnsupportedOperationException("git rebase to pick up TCM removes this"); - } + Node.Id mappedId(InetAddressAndPort endpoint); + InetAddressAndPort mappedEndpoint(Node.Id id); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java new file mode 100644 index 000000000000..4e841a17ad82 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + +import accord.api.Data; +import accord.api.DataStore; +import accord.api.Query; +import accord.api.Read; +import accord.api.Update; +import accord.impl.AbstractFetchCoordinator; +import accord.local.CommandStore; +import accord.local.Node; +import accord.local.SafeCommandStore; +import accord.primitives.PartialTxn; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.primitives.SyncPoint; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.utils.Invariants; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.streaming.StreamCoordinator; +import org.apache.cassandra.streaming.StreamManager; +import org.apache.cassandra.streaming.StreamOperation; +import org.apache.cassandra.streaming.StreamPlan; +import org.apache.cassandra.streaming.StreamResultFuture; +import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.CastingSerializer; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.TimeUUID; + +import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; + +public class AccordFetchCoordinator extends AbstractFetchCoordinator implements StreamManager.StreamListener +{ + private static final Query noopQuery = (txnId, executeAt, data, read, update) -> null; + + public static class StreamData implements Data + { + public static class SessionInfo + { + final TimeUUID planId; + final boolean hasData; + + public SessionInfo(TimeUUID planId, boolean hasData) + { + this.planId = planId; + this.hasData = hasData; + } + + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + public void serialize(SessionInfo info, DataOutputPlus out, int version) throws IOException + { + TimeUUID.Serializer.instance.serialize(info.planId, out, version); + out.writeBoolean(info.hasData); + + } + + public SessionInfo deserialize(DataInputPlus in, int version) throws IOException + { + return new SessionInfo(TimeUUID.Serializer.instance.deserialize(in, version), in.readBoolean()); + } + + public long serializedSize(SessionInfo info, int version) + { + return TimeUUID.Serializer.instance.serializedSize(info.planId, version) + TypeSizes.BOOL_SIZE; + } + }; + } + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(StreamData data, DataOutputPlus out, int version) throws IOException + { + serializeMap(data.streams, out, version, TokenRange.serializer, SessionInfo.serializer); + } + + @Override + public StreamData deserialize(DataInputPlus in, int version) throws IOException + { + + return new StreamData(ImmutableMap.copyOf(deserializeMap(in, version, + TokenRange.serializer, + SessionInfo.serializer))); + } + + @Override + public long serializedSize(StreamData data, int version) + { + return serializedMapSize(data.streams, version, TokenRange.serializer, SessionInfo.serializer); + } + }; + + private final ImmutableMap streams; + + public StreamData(ImmutableMap streams) + { + this.streams = streams; + } + + public static StreamData of(TokenRange range, TimeUUID streamId, boolean hasData) + { + return new StreamData(ImmutableMap.of(range, new SessionInfo(streamId, hasData))); + } + + @Override + public Data merge(Data data) + { + StreamData that = (StreamData) data; + if (that.streams.keySet().stream().anyMatch(this.streams::containsKey)) + throw new IllegalStateException(String.format("Unable to merge: key found in multiple StreamData %s %s", + this.streams.keySet(), that.streams.keySet())); + Invariants.checkState(!that.streams.keySet().stream().anyMatch(this.streams::containsKey)); + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.putAll(this.streams); + builder.putAll(that.streams); + return new StreamData(builder.build()); + } + } + + // needs to be externally synchronized + private class IncomingStream + { + private final TimeUUID planId; + private Range range; + private Node.Id from; + private StreamResultFuture future; + + public IncomingStream(TimeUUID planId) + { + this.planId = planId; + } + + private void rangeReceived(Range range, Node.Id from) + { + Invariants.nonNull(range); + Invariants.nonNull(from); + Invariants.checkState(this.range == null, "range was not null: %s", this.range); + Invariants.checkState(this.from == null, "from was not null: %s", this.from); + this.range = range; + this.from = from; + maybeListen(); + } + + private void futureReceived(StreamResultFuture future) + { + Invariants.nonNull(future); + Invariants.checkState(this.future == null, "future was not null: %s", this.future); + this.future = future; + maybeListen(); + } + + private void maybeListen() + { + if (range == null || future == null) + return; + + Invariants.nonNull(from); + + future.addCallback((state, fail) -> { + if (fail == null) success(from, Ranges.of(range)); + else fail(from, Ranges.of(range), fail); + }, ((AccordCommandStore) commandStore()).executor()); + } + } + + public static class StreamingRead implements Read + { + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(StreamingRead read, DataOutputPlus out, int version) throws IOException + { + InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serialize(read.to, out, version); + KeySerializers.ranges.serialize(read.ranges, out, version); + } + + @Override + public StreamingRead deserialize(DataInputPlus in, int version) throws IOException + { + return new StreamingRead(InetAddressAndPort.Serializer.inetAddressAndPortSerializer.deserialize(in, version), + KeySerializers.ranges.deserialize(in, version)); + } + + @Override + public long serializedSize(StreamingRead read, int version) + { + return InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serializedSize(read.to, version) + + KeySerializers.ranges.serializedSize(read.ranges, version); + } + }; + + private final InetAddressAndPort to; + private final Ranges ranges; + + public StreamingRead(InetAddressAndPort to, Ranges ranges) + { + this.to = to; + this.ranges = ranges; + } + + @Override + public Seekables keys() { return ranges; } + + private static boolean hasDataToStream(StreamCoordinator coordinator, InetAddressAndPort to) + { + for (StreamSession session : coordinator.getAllStreamSessions()) + { + if (!session.peer.equals(to)) + continue; + + Invariants.checkState(session.getNumRequests() == 0, "Requested to send data: %s", session); + if (session.getNumTransfers() > 0) + return true; + } + return false; + } + + @Override + public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore commandStore, Timestamp executeAt, DataStore store) + { + try + { + Invariants.checkArgument(key.domain() == Routable.Domain.Range, "Required Range but saw %s: %s", key.domain(), key); + TokenRange range = (TokenRange) key; + + // TODO (correctness): check epoch + // TODO (correctness): handle dropped tables + KeyspaceMetadata ksm = ClusterMetadata.current().schema.getKeyspaceMetadata(range.keyspace()); + Invariants.checkState(ksm != null, "Keyspace %s not found", range.keyspace()); + Invariants.checkState(ksm.tables.size() > 0, "Keyspace '%s' has no tables", range.keyspace()); + + // FIXME: may also be relocation + StreamPlan plan = new StreamPlan(StreamOperation.BOOTSTRAP, 1, false, + null, PreviewKind.NONE).flushBeforeTransfer(true); + + RangesAtEndpoint ranges = RangesAtEndpoint.toDummyList(Collections.singleton(range.toKeyspaceRange())); + ksm.tables.forEach(table -> plan.transferRanges(to, table.keyspace, ranges, table.name)); + StreamResultFuture future = plan.execute(); + return AsyncChains.success(StreamData.of(range, future.planId, hasDataToStream(future.getCoordinator(), to))); + } + catch (Throwable t) + { + return AsyncChains.failure(t); + } + } + + @Override + public Read slice(Ranges ranges) { return new StreamingRead(to, this.ranges.slice(ranges)); } + + @Override + public Read merge(Read other) { throw new UnsupportedOperationException(); } + } + + public static class StreamingTxn + { + private static final IVersionedSerializer read = new CastingSerializer<>(StreamingRead.class, + StreamingRead.serializer); + + private static final IVersionedSerializer query = new IVersionedSerializer() + { + @Override + public void serialize(Query t, DataOutputPlus out, int version) + { + Invariants.checkArgument(t == noopQuery); + } + + @Override + public Query deserialize(DataInputPlus in, int version) + { + return noopQuery; + } + + @Override + public long serializedSize(Query t, int version) + { + Invariants.checkArgument(t == noopQuery); + return 0; + } + }; + + private static final IVersionedSerializer update = new IVersionedSerializer() + { + @Override + public void serialize(Update t, DataOutputPlus out, int version) + { + Invariants.checkArgument(t == null); + } + + @Override + public Update deserialize(DataInputPlus in, int version) + { + return null; + } + + @Override + public long serializedSize(Update t, int version) + { + Invariants.checkArgument(t == null); + return 0; + } + }; + + // TODO (desired): this could be serialized as an InetAddressAndPort and Ranges if we had a special case PartialTxn implementation + public static final IVersionedSerializer serializer = new CommandSerializers.PartialTxnSerializer(read, query, update); + } + + private final Map streams = new HashMap<>(); + + public AccordFetchCoordinator(Node node, Ranges ranges, SyncPoint syncPoint, DataStore.FetchRanges fetchRanges, CommandStore commandStore) + { + super(node, ranges, syncPoint, fetchRanges, commandStore); + } + + @Override + public void start() + { + StreamManager.instance.addListener(this); + super.start(); + } + + private IncomingStream stream(TimeUUID id) + { + return streams.computeIfAbsent(id, IncomingStream::new); + } + + // called from stream thread + @Override + public synchronized void onRegister(StreamResultFuture result) + { + stream(result.planId).futureReceived(result); + } + + protected void onDone(Ranges success, Throwable failure) + { + StreamManager.instance.removeListener(this); + super.onDone(success, failure); + } + + @Override + protected PartialTxn rangeReadTxn(Ranges ranges) + { + StreamingRead read = new StreamingRead(FBUtilities.getBroadcastAddressAndPort(), ranges); + return new PartialTxn.InMemory(ranges, Txn.Kind.Read, ranges, read, noopQuery, null); + } + + @Override + protected synchronized void onReadOk(Node.Id from, CommandStore commandStore, Data data, Ranges received) + { + if (data == null) + return; + + StreamData streamData = (StreamData) data; + streamData.streams.forEach((range, streamInfo) -> { + if (streamInfo.hasData) + { + stream(streamInfo.planId).rangeReceived(range, from); + } + else + { + // if there was no data to stream, no connection is initiated, and we aren't notified via the stream + // listener, so the stream initiator notifies us and we mark it complete here + success(from, Ranges.of(range)); + } + }); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index e67d9952ada3..b3bf0265d6a7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -31,13 +32,20 @@ import java.util.Set; import java.util.TreeMap; import java.util.concurrent.Executor; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Collectors; + +import javax.annotation.Nullable; import com.google.common.collect.ImmutableSet; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.ImmutableSortedSet; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.slf4j.Logger; @@ -56,24 +64,33 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Writes; +import accord.topology.Topology; import accord.utils.Invariants; +import accord.utils.ReducingRangeMap; import accord.utils.async.Observable; import org.apache.cassandra.concurrent.DebuggableTask; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.cql3.statements.ModificationStatement; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringComparator; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Columns; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.RegularAndStaticColumns; @@ -119,19 +136,25 @@ import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.Views; import org.apache.cassandra.serializers.UUIDSerializer; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.AccordConfigurationService.SyncStatus; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.DepsSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.ListenerSerializers; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; +import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; import static org.apache.cassandra.db.rows.BufferCell.live; import static org.apache.cassandra.db.rows.BufferCell.tombstone; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; @@ -144,6 +167,9 @@ public class AccordKeyspace public static final String COMMANDS = "commands"; public static final String COMMANDS_FOR_KEY = "commands_for_key"; + public static final String TOPOLOGIES = "topologies"; + public static final String EPOCH_METADATA = "epoch_metadata"; + public static final String COMMAND_STORE_METADATA = "command_store_metadata"; private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); private static final String TIMESTAMP_TUPLE = TIMESTAMP_TYPE.asCQL3Type().toString(); @@ -226,7 +252,7 @@ static TokenType valueOf(Token token) .build(); // TODO: naming is not very clearly distinct from the base serializers - private static class CommandsSerializers + private static class LocalVersionedSerializers { static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); static final LocalVersionedSerializer routingKey = localSerializer(AccordRoutingKey.serializer); @@ -235,6 +261,10 @@ private static class CommandsSerializers static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); static final LocalVersionedSerializer result = localSerializer(TxnData.serializer); static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); + static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); + static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); + static final LocalVersionedSerializer> bootstrapBeganAt = localSerializer(CommandStoreSerializers.bootstrapBeganAt); + static final LocalVersionedSerializer> safeToRead = localSerializer(CommandStoreSerializers.safeToRead); private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) { @@ -246,7 +276,7 @@ private static ColumnMetadata getColumn(TableMetadata metadata, String name) { ColumnMetadata column = metadata.getColumn(new ColumnIdentifier(name, true)); if (column == null) - throw new IllegalArgumentException(String.format("Unknown column %s for %s.%s", name, metadata.keyspace, metadata.name)); + throw new IllegalArgumentException(format("Unknown column %s for %s.%s", name, metadata.keyspace, metadata.name)); return column; } @@ -339,6 +369,39 @@ else if (hasRegularChanges) } } + private static final TableMetadata Topologies = + parse(TOPOLOGIES, + "accord topologies", + "CREATE TABLE %s (" + + "epoch bigint primary key, " + + "topology blob, " + + "sync_state int, " + + "pending_sync_notify set, " + // nodes that need to be told we're synced + "remote_sync_complete set " + // nodes that have told us they're synced + ')').build(); + + private static final TableMetadata EpochMetadata = + parse(EPOCH_METADATA, + "global epoch info", + "CREATE TABLE %s (" + + "key int primary key, " + + "min_epoch bigint, " + + "max_epoch bigint " + + ')').build(); + + private static final TableMetadata CommandStoreMetadata = + parse(COMMAND_STORE_METADATA, + "command store state", + "CREATE TABLE %s (" + + "store_id int, " + + "reject_before blob, " + + "bootstrap_began_at blob, " + + "safe_to_read blob, " + + "PRIMARY KEY(store_id)" + + ')').build(); + + private static final AtomicLong commandStoreMetadataTimestamp = new AtomicLong(); + private static TableMetadata.Builder parse(String name, String description, String cql) { return CreateTableStatement.parse(format(cql, name), ACCORD_KEYSPACE_NAME) @@ -347,6 +410,11 @@ private static TableMetadata.Builder parse(String name, String description, Stri .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)); } + private static void flush(TableMetadata table) + { + Keyspace.open(table.keyspace).getColumnFamilyStore(table.id).forceBlockingFlush(ColumnFamilyStore.FlushReason.ACCORD); + } + public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(ACCORD_KEYSPACE_NAME, KeyspaceParams.local(), tables(), Views.none(), Types.none(), UserFunctions.none()); @@ -354,7 +422,7 @@ public static KeyspaceMetadata metadata() private static Tables tables() { - return Tables.of(Commands, CommandsForKeys); + return Tables.of(Commands, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException @@ -364,7 +432,7 @@ private static ByteBuffer serialize(T obj, LocalVersionedSerializer seria { serializer.serialize(obj, out); ByteBuffer bb = out.buffer(); - assert size == bb.limit() : String.format("Expected to write %d but wrote %d", size, bb.limit()); + assert size == bb.limit() : format("Expected to write %d but wrote %d", size, bb.limit()); return bb; } } @@ -422,7 +490,7 @@ private static Listeners.Immutable deserializeListeners(Set serializ Listeners result = new Listeners(); for (ByteBuffer bytes : serialized) { - result.add(deserialize(bytes, CommandsSerializers.listeners)); + result.add(deserialize(bytes, LocalVersionedSerializers.listeners)); } return new Listeners.Immutable(result); } @@ -458,7 +526,7 @@ private static void addCellIfModified(ColumnMetadata colu private static void addKeyCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException { - addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, CommandsSerializers.routingKey), builder, timestampMicros, original, command); + addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, LocalVersionedSerializers.routingKey), builder, timestampMicros, original, command); } private static > void addEnumCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException @@ -545,17 +613,17 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, original, command); addKeyCellIfModified(CommandsColumns.home_key, Command::homeKey, builder, timestampMicros, original, command); addKeyCellIfModified(CommandsColumns.progress_key, Command::progressKey, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.route, Command::route, CommandsSerializers.route, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, original, command); addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.txn, Command::partialTxn, CommandsSerializers.partialTxn, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.txn, Command::partialTxn, LocalVersionedSerializers.partialTxn, builder, timestampMicros, original, command); addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, CommandsSerializers.partialDeps, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, LocalVersionedSerializers.partialDeps, builder, timestampMicros, original, command); - addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, CommandsSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); + addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); if (command.isCommitted()) { @@ -567,8 +635,8 @@ public static Mutation getCommandMutation(int storeId, Command original, Command { Command.Executed executed = command.asExecuted(); Command.Executed originalExecuted = original != null && original.isExecuted() ? original.asExecuted() : null; - addCellIfModified(CommandsColumns.writes, Command.Executed::writes, v -> serialize(v, CommandsSerializers.writes), builder, timestampMicros, originalExecuted, executed); - addCellIfModified(CommandsColumns.result, Command.Executed::result, v -> serialize((TxnData) v, CommandsSerializers.result), builder, timestampMicros, originalExecuted, executed); + addCellIfModified(CommandsColumns.writes, Command.Executed::writes, v -> serialize(v, LocalVersionedSerializers.writes), builder, timestampMicros, originalExecuted, executed); + addCellIfModified(CommandsColumns.result, Command.Executed::result, v -> serialize((TxnData) v, LocalVersionedSerializers.result), builder, timestampMicros, originalExecuted, executed); } } @@ -663,7 +731,7 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t "AND domain = ? " + "AND txn_id=(?, ?, ?)"; - return executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), + return executeInternal(format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), commandStore.id(), txnId.domain().ordinal(), txnId.msb, txnId.lsb, txnId.node.id); @@ -741,7 +809,7 @@ public long startTimeNanos() @Override public String description() { - return String.format("Table Walker for %s; queries = %d", getClass().getSimpleName(), numQueries); + return format("Table Walker for %s; queries = %d", getClass().getSimpleName(), numQueries); } } @@ -785,7 +853,7 @@ private WalkCommandsForDomain(int commandStore, Routable.Domain domain, Set= ?\n" : " AND key_token > ?\n") + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + "ALLOW FILTERING", selection, CommandsForKeys); - this.cqlContinue = String.format("SELECT DISTINCT %s\n" + + this.cqlContinue = format("SELECT DISTINCT %s\n" + "FROM %s\n" + "WHERE store_id = ?\n" + " AND key_token > ?\n" + @@ -891,9 +959,9 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); // TODO: something less brittle than ordinal, more efficient than values() attributes.durability(Status.Durability.values()[row.getInt("durability", 0)]); - attributes.homeKey(deserializeOrNull(row.getBlob("home_key"), CommandsSerializers.routingKey)); - attributes.progressKey(deserializeOrNull(row.getBlob("progress_key"), CommandsSerializers.routingKey)); - attributes.route(deserializeOrNull(row.getBlob("route"), CommandsSerializers.route)); + attributes.homeKey(deserializeOrNull(row.getBlob("home_key"), LocalVersionedSerializers.routingKey)); + attributes.progressKey(deserializeOrNull(row.getBlob("progress_key"), LocalVersionedSerializers.routingKey)); + attributes.route(deserializeOrNull(row.getBlob("route"), LocalVersionedSerializers.route)); attributes.partialTxn(deserializeTxn(row)); attributes.partialDeps(deserializeDependencies(row)); attributes.setListeners(deserializeListeners(row, "listeners")); @@ -903,8 +971,8 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); ImmutableSortedSet waitingOnCommit = deserializeTxnIdNavigableSet(row, "waiting_on_commit"); ImmutableSortedMap waitingOnApply = deserializeWaitingOnApply(row.getMap("waiting_on_apply", BytesType.instance, BytesType.instance)); - Writes writes = deserializeWithVersionOr(row, "writes", CommandsSerializers.writes, () -> null); - Result result = deserializeWithVersionOr(row, "result", CommandsSerializers.result, () -> null); + Writes writes = deserializeWithVersionOr(row, "writes", LocalVersionedSerializers.writes, () -> null); + Result result = deserializeWithVersionOr(row, "result", LocalVersionedSerializers.result, () -> null); switch (status.status) { @@ -941,7 +1009,7 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) public static PartialDeps deserializeDependencies(UntypedResultSet.Row row) throws IOException { - return deserializeOrNull(row.getBlob("dependencies"), CommandsSerializers.partialDeps); + return deserializeOrNull(row.getBlob("dependencies"), LocalVersionedSerializers.partialDeps); } public static Timestamp deserializeExecuteAt(UntypedResultSet.Row row) @@ -961,7 +1029,7 @@ public static TxnId deserializeTxnId(UntypedResultSet.Row row) public static PartialTxn deserializeTxn(UntypedResultSet.Row row) throws IOException { - return deserializeOrNull(row.getBlob("txn"), CommandsSerializers.partialTxn); + return deserializeOrNull(row.getBlob("txn"), LocalVersionedSerializers.partialTxn); } public static PartitionKey deserializeKey(UntypedResultSet.Row row) @@ -1094,7 +1162,7 @@ private static ByteBuffer cellValue(Cell cell) private static ByteBuffer cellValue(Row row, ColumnMetadata column) { Cell cell = row.getCell(column); - return (cell != null && !cell.isTombstone()) ? cellValue(cell) : null; + return (cell != null && !cell.isTombstone()) ? cellValue(cell) : null; } private static ByteBuffer clusteringValue(Clustering clustering, int idx) @@ -1124,8 +1192,8 @@ public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, for (SeriesKind kind : SeriesKind.values()) seriesMaps.put(kind, new ImmutableSortedMap.Builder<>(Comparator.naturalOrder())); - try(ReadExecutionController controller = command.executionController(); - FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) + try (ReadExecutionController controller = command.executionController(); + FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { if (!partitions.hasNext()) { @@ -1175,4 +1243,297 @@ public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, throw t; } } + + public static class EpochDiskState + { + public static final EpochDiskState EMPTY = new EpochDiskState(0, 0); + public final long minEpoch; + public final long maxEpoch; + + public EpochDiskState(long minEpoch, long maxEpoch) + { + Invariants.checkArgument(minEpoch >= 0, "Min Epoch %d < 0", minEpoch); + Invariants.checkArgument(maxEpoch >= minEpoch, "Max epoch %d < min %d", maxEpoch, minEpoch); + this.minEpoch = minEpoch; + this.maxEpoch = maxEpoch; + } + + private EpochDiskState withNewMaxEpoch(long epoch) + { + Invariants.checkArgument(epoch > maxEpoch, "Epoch %d <= %d (max)", epoch, maxEpoch); + return new EpochDiskState(Math.max(1, minEpoch), epoch); + } + + private EpochDiskState withNewMinEpoch(long epoch) + { + Invariants.checkArgument(epoch > minEpoch, "epoch %d <= %d (min)", epoch, minEpoch); + Invariants.checkArgument(epoch <= maxEpoch, "epoch %d > %d (max)", epoch, maxEpoch); + return new EpochDiskState(epoch, maxEpoch); + } + + @Override + public String toString() + { + return "EpochDiskState{" + + "minEpoch=" + minEpoch + + ", maxEpoch=" + maxEpoch + + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + EpochDiskState diskState = (EpochDiskState) o; + return minEpoch == diskState.minEpoch && maxEpoch == diskState.maxEpoch; + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + } + + private static void saveEpochDiskState(EpochDiskState diskState) + { + String cql = "INSERT INTO %s.%s (key, min_epoch, max_epoch) VALUES (0, ?, ?);"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, EPOCH_METADATA), + diskState.minEpoch, diskState.maxEpoch); + } + + @Nullable + @VisibleForTesting + public static EpochDiskState loadEpochDiskState() + { + String cql = "SELECT * FROM %s.%s WHERE key=0"; + UntypedResultSet result = executeInternal(format(cql, ACCORD_KEYSPACE_NAME, EPOCH_METADATA)); + if (result.isEmpty()) + return null; + UntypedResultSet.Row row = result.one(); + return new EpochDiskState(row.getLong("min_epoch"), row.getLong("max_epoch")); + } + + /** + * Update the disk state for this epoch, if it's higher than the one we have one disk. + * + * This is meant to be called before any update involving the new epoch, not after. This way if the update + * fails, we can detect and cleanup. If we updated disk state after an update and it failed, we could "forget" + * about (now acked) topology updates after a restart. + */ + private static EpochDiskState maybeUpdateMaxEpoch(EpochDiskState diskState, long epoch) + { + Invariants.checkArgument(epoch >= diskState.minEpoch, "Epoch %d < %d (min)", epoch, diskState.minEpoch); + if (epoch > diskState.maxEpoch) + { + diskState = diskState.withNewMaxEpoch(epoch); + saveEpochDiskState(diskState); + } + return diskState; + } + + public static EpochDiskState saveTopology(Topology topology, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, topology.epoch()); + + try + { + String cql = "UPDATE %s.%s SET topology=? WHERE epoch=?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + serialize(topology, LocalVersionedSerializers.topology), topology.epoch()); + flush(Topologies); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + + return diskState; + } + + public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET remote_sync_complete = remote_sync_complete + ? WHERE epoch = ?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + Collections.singleton(node.id), epoch); + flush(Topologies); + return diskState; + } + + public static EpochDiskState setNotifyingLocalSync(long epoch, Set pending, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET sync_state = ?, pending_sync_notify = ? WHERE epoch = ?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + SyncStatus.NOTIFYING.ordinal(), + pending.stream().map(i -> i.id).collect(Collectors.toSet()), + epoch); + return diskState; + } + + public static EpochDiskState markLocalSyncAck(Node.Id node, long epoch, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET pending_sync_notify = pending_sync_notify - ? WHERE epoch = ?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + Collections.singleton(node.id), epoch); + return diskState; + } + + public static EpochDiskState setCompletedLocalSync(long epoch, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET sync_state = ?, pending_sync_notify = {} WHERE epoch = ?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + SyncStatus.COMPLETED.ordinal(), + epoch); + return diskState; + } + + public static EpochDiskState truncateTopologyUntil(final long epoch, EpochDiskState diskState) + { + while (diskState.minEpoch < epoch) + { + long delete = diskState.minEpoch; + diskState = diskState.withNewMinEpoch(delete + 1); + saveEpochDiskState(diskState); + String cql = "DELETE FROM %s.%s WHERE epoch = ?"; + executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), delete); + } + return diskState; + } + + public interface TopologyLoadConsumer + { + void load(long epoch, Topology topology, SyncStatus syncStatus, Set pendingSyncNotify, Set remoteSyncComplete); + } + + @VisibleForTesting + public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws IOException + { + String cql = format("SELECT * FROM %s.%s WHERE epoch=?", ACCORD_KEYSPACE_NAME, TOPOLOGIES); + + UntypedResultSet result = executeInternal(cql, epoch); + Invariants.checkState(!result.isEmpty(), "Nothing found for epoch %d", epoch); + UntypedResultSet.Row row = result.one(); + Topology topology = row.has("topology") + ? deserialize(row.getBytes("topology"), LocalVersionedSerializers.topology) + : null; + + SyncStatus syncStatus = row.has("sync_state") + ? SyncStatus.values()[row.getInt("sync_state")] + : SyncStatus.NOT_STARTED; + Set pendingSyncNotify = row.has("pending_sync_notify") + ? row.getSet("pending_sync_notify", Int32Type.instance).stream().map(Node.Id::new).collect(Collectors.toSet()) + : Collections.emptySet(); + Set remoteSyncComplete = row.has("remote_sync_complete") + ? row.getSet("remote_sync_complete", Int32Type.instance).stream().map(Node.Id::new).collect(Collectors.toSet()) + : Collections.emptySet(); + + consumer.load(epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete); + + } + + public static EpochDiskState loadTopologies(TopologyLoadConsumer consumer) + { + try + { + EpochDiskState diskState = loadEpochDiskState(); + if (diskState == null) + return EpochDiskState.EMPTY; + + for (long epoch=diskState.minEpoch; epoch<=diskState.maxEpoch; epoch++) + loadEpoch(epoch, consumer); + + return diskState; + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + private static IMutation getCommandStoreMetadataMutation(String cql, ByteBuffer... values) + { + ClientState clientState = ClientState.forInternalCalls(); + ModificationStatement statement = (ModificationStatement) QueryProcessor.parseStatement(cql).prepare(ClientState.forInternalCalls()); + QueryOptions options = QueryOptions.forInternalCalls(Arrays.asList(values)); + + long tsMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + + while (true) + { + long prev = commandStoreMetadataTimestamp.get(); + if (prev >= tsMicros) + tsMicros = prev + 1; + + if (commandStoreMetadataTimestamp.compareAndSet(prev, tsMicros)) + break; + } + + return Iterables.getOnlyElement(statement.getMutations(clientState, options, true, tsMicros, (int) TimeUnit.MICROSECONDS.toSeconds(tsMicros), Dispatcher.RequestTime.forImmediateExecution())); + } + + + private static Future updateCommandStoreMetadata(CommandStore commandStore, String column, T value, LocalVersionedSerializer serializer) + { + String cql = format("UPDATE %s.%s SET %s=? WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA, column); + try + { + IMutation mutation = getCommandStoreMetadataMutation(cql, serialize(value, serializer), bytes(commandStore.id())); + return Stage.MUTATION.submit(mutation::apply); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + public static Future updateRejectBefore(CommandStore commandStore, ReducingRangeMap rejectBefore) + { + return updateCommandStoreMetadata(commandStore, "reject_before", rejectBefore, LocalVersionedSerializers.rejectBefore); + } + + public static Future updateBootstrapBeganAt(CommandStore commandStore, NavigableMap bootstrapBeganAt) + { + return updateCommandStoreMetadata(commandStore, "bootstrap_began_at", bootstrapBeganAt, LocalVersionedSerializers.bootstrapBeganAt); + } + + public static Future updateSafeToRead(CommandStore commandStore, NavigableMap safeToRead) + { + return updateCommandStoreMetadata(commandStore, "safe_to_read", safeToRead, LocalVersionedSerializers.safeToRead); + } + + public interface CommandStoreMetadataConsumer + { + void accept(ReducingRangeMap rejectBefore, NavigableMap bootstrapBeganAt, NavigableMap safeToRead); + + } + public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer consumer) + { + UntypedResultSet result = executeOnceInternal(format("SELECT * FROM %s.%s WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA), id); + ReducingRangeMap rejectBefore = null; + NavigableMap bootstrapBeganAt = null; + NavigableMap safeToRead = null; + if (!result.isEmpty()) + { + UntypedResultSet.Row row = Iterables.getOnlyElement(result); + try + { + if (row.has("reject_before")) + rejectBefore = deserialize(row.getBlob("reject_before"), LocalVersionedSerializers.rejectBefore); + if (row.has("bootstrap_began_at")) + bootstrapBeganAt = deserialize(row.getBlob("bootstrap_began_at"), LocalVersionedSerializers.bootstrapBeganAt); + if (row.has("safe_to_read")) + safeToRead = deserialize(row.getBlob("safe_to_read"), LocalVersionedSerializers.safeToRead); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + consumer.accept(rejectBefore, bootstrapBeganAt, safeToRead); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java b/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java new file mode 100644 index 000000000000..ade2f8e3e33d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Node; +import accord.utils.Invariants; +import org.apache.cassandra.concurrent.ScheduledExecutors; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; + +public class AccordLocalSyncNotifier implements RequestCallback +{ + public static final IVerbHandler verbHandler = message -> AccordService.instance().remoteSyncComplete(message); + private static final Logger logger = LoggerFactory.getLogger(AccordLocalSyncNotifier.class); + + interface Listener + { + void onEndpointAck(Node.Id id, long epoch); + void onComplete(long epoch); + } + + private final long epoch; + private final Node.Id from; + private final Set pendingNotifications; + private final AccordEndpointMapper endpointMapper; + private final IFailureDetector failureDetector; + private final Listener listener; + private final MessageDelivery messagingService; + + public AccordLocalSyncNotifier(long epoch, + Node.Id from, Set pendingNotifications, + AccordEndpointMapper endpointMapper, + MessageDelivery messagingService, IFailureDetector failureDetector, + Listener listener) + { + this.epoch = epoch; + this.from = from; + this.pendingNotifications = pendingNotifications; + this.endpointMapper = endpointMapper; + this.failureDetector = failureDetector; + this.listener = listener; + this.messagingService = messagingService; + } + + private void notify(Node.Id to) + { + InetAddressAndPort toEp = endpointMapper.mappedEndpoint(to); + if (failureDetector.isAlive(toEp)) + { + Message msg = Message.out(Verb.ACCORD_SYNC_NOTIFY_REQ, new Notification(epoch, from, to)); + messagingService.sendWithCallback(msg, toEp, this); + } + else + { + scheduleNotify(to); + } + } + + public void scheduleNotify(Node.Id to) + { + ScheduledExecutors.scheduledTasks.schedule(() -> notify(to), 1, TimeUnit.MINUTES); + } + + public synchronized void start() + { + if (pendingNotifications.isEmpty()) + { + listener.onComplete(epoch); + return; + } + pendingNotifications.forEach(this::notify); + } + + private synchronized void onResponse(InetAddressAndPort fromEp, Node.Id from) + { + try + { + Invariants.checkArgument(endpointMapper.mappedId(fromEp).equals(from), "%s != %s", from, endpointMapper.mappedId(fromEp)); + listener.onEndpointAck(from, epoch); + pendingNotifications.remove(from); + if (pendingNotifications.isEmpty()) + listener.onComplete(epoch); + } + catch (Throwable t) + { + logger.error("Unhandled exception handling sync ack on epoch {} from {}", epoch, fromEp, t); + scheduleNotify(from); + } + } + + @Override + public synchronized void onResponse(Message msg) + { + onResponse(msg.from(), msg.payload.from); + } + + @Override + public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + { + scheduleNotify(endpointMapper.mappedId(from)); + } + + public static class Notification + { + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Notification notification, DataOutputPlus out, int version) throws IOException + { + out.writeLong(notification.epoch); + TopologySerializers.nodeId.serialize(notification.from, out, version); + TopologySerializers.nodeId.serialize(notification.to, out, version); + } + + @Override + public Notification deserialize(DataInputPlus in, int version) throws IOException + { + return new Notification(in.readLong(), + TopologySerializers.nodeId.deserialize(in, version), + TopologySerializers.nodeId.deserialize(in, version)); + } + + @Override + public long serializedSize(Notification notification, int version) + { + return TypeSizes.LONG_SIZE + + TopologySerializers.nodeId.serializedSize() + + TopologySerializers.nodeId.serializedSize(); + } + }; + final long epoch; + final Node.Id from; + final Node.Id to; + + public Notification(long epoch, Node.Id from, Node.Id to) + { + this.epoch = epoch; + this.from = from; + this.to = to; + } + } + + public static class Acknowledgement + { + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Acknowledgement acknowledgement, DataOutputPlus out, int version) throws IOException + { + TopologySerializers.nodeId.serialize(acknowledgement.from, out, version); + } + + @Override + public Acknowledgement deserialize(DataInputPlus in, int version) throws IOException + { + return new Acknowledgement(TopologySerializers.nodeId.deserialize(in, version)); + } + + @Override + public long serializedSize(Acknowledgement acknowledgement, int version) + { + return TopologySerializers.nodeId.serializedSize(); + } + }; + + final Node.Id from; + + public Acknowledgement(Node.Id from) + { + this.from = from; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 034504d7e7fa..9169b9193675 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -38,8 +38,6 @@ import accord.messages.Request; import org.apache.cassandra.locator.InetAddressAndPort; -import static org.apache.cassandra.service.accord.EndpointMapping.getEndpoint; - public class AccordMessageSink implements MessageSink { private static final Logger logger = LoggerFactory.getLogger(AccordMessageSink.class); @@ -77,6 +75,8 @@ private VerbMapping() mapping.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); mapping.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); mapping.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); + mapping.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); + mapping.put(MessageType.FETCH_DATA_RSP, Verb.ACCORD_FETCH_DATA_RSP); for (MessageType type : MessageType.values()) { @@ -93,16 +93,18 @@ private static Verb getVerb(MessageType type) private final Agent agent; private final MessageDelivery messaging; + private final AccordEndpointMapper endpointMapper; - public AccordMessageSink(Agent agent, MessageDelivery messaging) + public AccordMessageSink(Agent agent, MessageDelivery messaging, AccordEndpointMapper endpointMapper) { this.agent = agent; this.messaging = messaging; + this.endpointMapper = endpointMapper; } - public AccordMessageSink(Agent agent) + public AccordMessageSink(Agent agent, AccordConfigurationService endpointMapper) { - this(agent, MessagingService.instance()); + this(agent, MessagingService.instance(), endpointMapper); } @Override @@ -111,7 +113,7 @@ public void send(Node.Id to, Request request) Verb verb = getVerb(request.type()); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); - InetAddressAndPort endpoint = getEndpoint(to); + InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); messaging.send(message, endpoint); } @@ -122,9 +124,9 @@ public void send(Node.Id to, Request request, AgentExecutor executor, Callback c Verb verb = getVerb(request.type()); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); - InetAddressAndPort endpoint = getEndpoint(to); + InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); - messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback)); + messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback, endpointMapper)); } @Override @@ -135,7 +137,7 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply Verb verb = getVerb(reply.type()); Preconditions.checkNotNull(verb, "Verb is null for type %s", reply.type()); Preconditions.checkArgument(replyMsg.verb() == verb, "Expected reply message with verb %s but got %s; reply type was %s", replyMsg.verb(), verb, reply.type()); - InetAddressAndPort endpoint = getEndpoint(replyingToNode); + InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); messaging.send(replyMsg, endpoint); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 3a979c133296..09db36757f9d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -40,6 +40,8 @@ import accord.primitives.TxnId; import accord.topology.TopologyManager; import accord.utils.DefaultRandom; +import accord.utils.Invariants; +import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Shutdownable; import accord.utils.async.AsyncResult; @@ -50,15 +52,22 @@ import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; @@ -70,12 +79,14 @@ public class AccordService implements IAccordService, Shutdownable public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); + private static final Future BOOTSTRAP_SUCCESS = ImmediateFuture.success(null); private final Node node; private final Shutdownable nodeShutdown; private final AccordMessageSink messageSink; private final AccordConfigurationService configService; private final AccordScheduler scheduler; + private final AccordDataStore dataStore; private final AccordVerbHandler verbHandler; private static final IAccordService NOOP_SERVICE = new IAccordService() @@ -86,9 +97,6 @@ public IVerbHandler verbHandler() return null; } - @Override - public void createEpochFromConfigUnsafe() { } - @Override public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) { @@ -110,15 +118,39 @@ public TopologyManager topology() throw new UnsupportedOperationException("Cannot return topology when accord_transactions_enabled = false in cassandra.yaml"); } + @Override + public void startup() {} + @Override public void shutdownAndWait(long timeout, TimeUnit unit) { } + + @Override + public Future epochReady(Epoch epoch) + { + return BOOTSTRAP_SUCCESS; + } + + @Override + public void remoteSyncComplete(Message message) {} + + public boolean isAccordManagedKeyspace(String keyspace) + { + return false; + } }; + private static Node.Id localId = null; private static class Handle { public static final AccordService instance = new AccordService(); } + public static void startup(NodeId tcmId) + { + localId = AccordTopologyUtils.tcmIdToAccord(tcmId); + instance().startup(); + } + public static IAccordService instance() { return DatabaseDescriptor.getAccordTransactionsEnabled() ? Handle.instance : NOOP_SERVICE; @@ -131,17 +163,18 @@ public static long uniqueNow() private AccordService() { - Node.Id localId = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); logger.info("Starting accord with nodeId {}", localId); AccordAgent agent = new AccordAgent(); - this.messageSink = new AccordMessageSink(agent); this.configService = new AccordConfigurationService(localId); + this.messageSink = new AccordMessageSink(agent, configService); this.scheduler = new AccordScheduler(); + this.dataStore = new AccordDataStore(); this.node = new Node(localId, messageSink, configService, AccordService::uniqueNow, - () -> AccordDataStore.INSTANCE, + () -> dataStore, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), agent, new DefaultRandom(), @@ -150,20 +183,20 @@ private AccordService() SimpleProgressLog::new, AccordCommandStores.factory(new AccordJournal().start())); this.nodeShutdown = toShutdownable(node); - this.verbHandler = new AccordVerbHandler<>(this.node); + this.verbHandler = new AccordVerbHandler<>(this.node, configService); } @Override - public IVerbHandler verbHandler() + public void startup() { - return verbHandler; + configService.start(); + ClusterMetadataService.instance().log().addListener(configService); } @Override - @VisibleForTesting - public void createEpochFromConfigUnsafe() + public IVerbHandler verbHandler() { - configService.createEpochFromConfig(); + return verbHandler; } public static long nowInMicros() @@ -248,12 +281,6 @@ private static RuntimeException throwPreempted(TxnId txnId, Txn txn, Consistency : new ReadPreemptedException(consistencyLevel, 0, 0, false, txnId.toString()); } - @VisibleForTesting - AccordMessageSink messageSink() - { - return messageSink; - } - @Override public void setCacheSize(long kb) { @@ -308,6 +335,26 @@ public Node node() return node; } + @Override + public Future epochReady(Epoch epoch) + { + AsyncPromise promise = new AsyncPromise<>(); + AsyncChain ready = configService.epochReady(epoch.getEpoch()); + ready.begin((result, failure) -> { + if (failure == null) promise.trySuccess(result); + else promise.tryFailure(failure); + }); + return promise; + } + + @Override + public void remoteSyncComplete(Message message) + { + Invariants.checkArgument(localId.equals(message.payload.to), "%s != %s", localId, message.payload.to); + configService.remoteSyncComplete(message.payload.from, message.payload.epoch); + MessagingService.instance().respond(new AccordLocalSyncNotifier.Acknowledgement(localId), message); + } + private static Shutdownable toShutdownable(Node node) { return new Shutdownable() { @@ -343,4 +390,15 @@ public boolean awaitTermination(long timeout, TimeUnit units) } }; } + + @VisibleForTesting + public AccordConfigurationService configurationService() + { + return configService; + } + + public boolean isAccordManagedKeyspace(String keyspace) + { + return configService.isAccordManagedKeyspace(keyspace); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index 1b69c27aa452..f234738402c0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -19,84 +19,128 @@ package org.apache.cassandra.service.accord; import java.util.ArrayList; -import java.util.Comparator; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Collectors; +import com.google.common.collect.Sets; + +import accord.local.Node; import accord.topology.Shard; import accord.topology.Topology; -import org.apache.cassandra.db.Keyspace; +import accord.utils.Invariants; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.EndpointsForRange; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; public class AccordTopologyUtils { - private static Shard createShard(TokenRange range, EndpointsForToken natural, EndpointsForToken pending) + static Node.Id tcmIdToAccord(NodeId nodeId) { - return new Shard(range, - natural.stream().map(EndpointMapping::getId).collect(Collectors.toList()), - natural.stream().map(EndpointMapping::getId).collect(Collectors.toSet()), - pending.stream().map(EndpointMapping::getId).collect(Collectors.toSet())); + return new Node.Id(nodeId.id()); + } + + private static Shard createShard(TokenRange range, Directory directory, EndpointsForRange reads, EndpointsForRange writes) + { + Function endpointMapper = e -> { + NodeId tcmId = directory.peerId(e); + return tcmIdToAccord(tcmId); + }; + Set endpoints = reads.endpoints(); + Set writeEndpoints = writes.endpoints(); + List nodes = endpoints.stream().map(endpointMapper).sorted().collect(Collectors.toList()); + Set fastPath = new HashSet<>(nodes); // TODO: support fast path updates + Set pending = endpoints.equals(writeEndpoints) ? + Collections.emptySet() : + writeEndpoints.stream().filter(e -> !endpoints.contains(e)).map(endpointMapper).collect(Collectors.toSet()); + + Sets.SetView readOnly = Sets.difference(endpoints, writeEndpoints); + Invariants.checkState(readOnly.isEmpty(), "Read only replicas detected: %s", readOnly); + return new Shard(range, nodes, fastPath, pending); } - private static TokenRange minRange(String keyspace, Token token) + static TokenRange minRange(String keyspace, Token token) { return new TokenRange(SentinelKey.min(keyspace), new TokenKey(keyspace, token)); } - private static TokenRange maxRange(String keyspace, Token token) + static TokenRange maxRange(String keyspace, Token token) { return new TokenRange(new TokenKey(keyspace, token), SentinelKey.max(keyspace)); } - private static TokenRange range(String keyspace, Token left, Token right) + static TokenRange fullRange(String keyspace) { - return new TokenRange(new TokenKey(keyspace, left), new TokenKey(keyspace, right)); + return new TokenRange(SentinelKey.min(keyspace), SentinelKey.max(keyspace)); } - public static List createShards(String keyspace, ClusterMetadata clusterMetadata) + static TokenRange range(String keyspace, Range range) { - KeyspaceMetadata keyspaceMetadata = Keyspace.open(keyspace).getMetadata(); - List tokens = new ArrayList<>(clusterMetadata.tokenMap.tokens()); - tokens.sort(Comparator.naturalOrder()); + Token minToken = range.left.minValue(); + return new TokenRange(range.left.equals(minToken) ? SentinelKey.min(keyspace) : new TokenKey(keyspace, range.left), + range.right.equals(minToken) ? SentinelKey.max(keyspace) : new TokenKey(keyspace, range.right)); + } + + public static List createShards(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory) + { + ReplicationParams replication = keyspace.params.replication; + DataPlacement placement = placements.get(replication); - List shards = new ArrayList<>(tokens.size() + 1); - Shard finalShard = null; - for (int i = 0, mi = tokens.size(); i < mi; i++) + List> ranges = placement.reads.ranges(); + List shards = new ArrayList<>(ranges.size()); + for (Range range : ranges) { - Token token = tokens.get(i); - EndpointsForToken natural = clusterMetadata.placements.get(keyspaceMetadata.params.replication).reads.forToken(token).get(); - EndpointsForToken pending = clusterMetadata.pendingEndpointsFor(keyspaceMetadata, token).get(); - if (i == 0) - { - shards.add(createShard(minRange(keyspace, token), natural, pending)); - finalShard = createShard(maxRange(keyspace, tokens.get(mi - 1)), natural, pending); - } - else - { - Token prev = tokens.get(i - 1); - shards.add(createShard(range(keyspace, prev, token), natural, pending)); - } + EndpointsForRange reads = placement.reads.forRange(range).get(); + EndpointsForRange writes = placement.reads.forRange(range).get(); + + // TCM doesn't create wrap around ranges + Invariants.checkArgument(!range.isWrapAround() || range.right.equals(range.right.minValue()), + "wrap around range %s found", range); + shards.add(createShard(range(keyspace.name, range), directory, reads, writes)); } - shards.add(finalShard); return shards; } - public static Topology createTopology(long epoch) + public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, Predicate keyspacePredicate) { - List keyspaces = new ArrayList<>(Schema.instance.distributedKeyspaces().names()); - keyspaces.sort(String::compareTo); - List shards = new ArrayList<>(); - for (String keyspace : keyspaces) - shards.addAll(createShards(keyspace, ClusterMetadata.current())); + for (KeyspaceMetadata keyspace : schema.getKeyspaces()) + { + if (!keyspacePredicate.test(keyspace.name)) + continue; + shards.addAll(createShards(keyspace, placements, directory)); + } + shards.sort((a, b) -> a.range.compare(b.range)); + return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); + } - return new Topology(epoch, shards.toArray(new Shard[0])); + public static EndpointMapping directoryToMapping(long epoch, Directory directory) + { + EndpointMapping.Builder builder = EndpointMapping.builder(epoch); + for (NodeId id : directory.peerIds()) + builder.add(directory.endpoint(id), tcmIdToAccord(id)); + return builder.build(); + } + + public static Topology createAccordTopology(ClusterMetadata metadata, Predicate keyspacePredicate) + { + return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, keyspacePredicate); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index a35737040d9b..5cf7e95aaa4f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -33,18 +33,21 @@ public class AccordVerbHandler implements IVerbHandler private static final Logger logger = LoggerFactory.getLogger(AccordVerbHandler.class); private final Node node; + private final AccordEndpointMapper endpointMapper; - public AccordVerbHandler(Node node) + public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper) { this.node = node; + this.endpointMapper = endpointMapper; } @Override public void doVerb(Message message) throws IOException { + // TODO (desired): need a non-blocking way to inform CMS of an unknown epoch and add callback to it's receipt +// ClusterMetadataService.instance().maybeCatchup(message.epoch()); logger.debug("Receiving {} from {}", message.payload, message.from()); T request = message.payload; - Node.Id from = EndpointMapping.getId(message.from()); long knownEpoch = request.knownEpoch(); if (!node.topology().hasEpoch(knownEpoch)) { @@ -52,10 +55,10 @@ public void doVerb(Message message) throws IOException long waitForEpoch = request.waitForEpoch(); if (!node.topology().hasEpoch(waitForEpoch)) { - node.withEpoch(waitForEpoch, () -> request.process(node, from, message)); + node.withEpoch(waitForEpoch, () -> request.process(node, endpointMapper.mappedId(message.from()), message)); return; } } - request.process(node, from, message); + request.process(node, endpointMapper.mappedId(message.from()), message); } } diff --git a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java index a863ca7410ac..4a746dc9a2c3 100644 --- a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java +++ b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java @@ -18,119 +18,70 @@ package org.apache.cassandra.service.accord; -import java.net.Inet4Address; -import java.net.InetAddress; -import java.net.UnknownHostException; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableMap; -import com.google.common.primitives.Ints; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import com.google.common.collect.ImmutableBiMap; import accord.local.Node; +import accord.utils.Invariants; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.Replica; -public class EndpointMapping +class EndpointMapping implements AccordEndpointMapper { - static Node.Id endpointToId(InetAddressAndPort endpoint) + public static final EndpointMapping EMPTY = new EndpointMapping(0, ImmutableBiMap.of()); + private final long epoch; + private final ImmutableBiMap mapping; + + private EndpointMapping(long epoch, + ImmutableBiMap mapping) { - Preconditions.checkArgument(endpoint.getAddress() instanceof Inet4Address); - Inet4Address address = (Inet4Address) endpoint.getAddress(); - int id = Ints.fromByteArray(address.getAddress()); - return new Node.Id(id); + this.epoch = epoch; + this.mapping = mapping; } - static InetAddressAndPort idToEndpoint(Node.Id node) + long epoch() { - byte[] bytes = Ints.toByteArray(node.id); - try - { - return InetAddressAndPort.getByAddress(InetAddress.getByAddress(bytes)); - } - catch (UnknownHostException e) - { - throw new RuntimeException(e); - } + return epoch; } - // TODO: Remove this if its one usage in AccordConfigurationService is removed. - public static ImmutableCollection knownIds() + @Override + public Node.Id mappedId(InetAddressAndPort endpoint) { - return mapping.endpointToId.values(); + return mapping.inverse().get(endpoint); } - private static class Mapping + @Override + public InetAddressAndPort mappedEndpoint(Node.Id id) { - private static final Mapping EMPTY = new Mapping(ImmutableMap.of(), ImmutableMap.of()); - final ImmutableMap idToEndpoint; - final ImmutableMap endpointToId; - - public Mapping(ImmutableMap idToEndpoint, - ImmutableMap endpointToId) - { - this.idToEndpoint = idToEndpoint; - this.endpointToId = endpointToId; - } + return mapping.get(id); + } - private static ImmutableMap put(ImmutableMap current, K key, V val) - { - return ImmutableMap.builderWithExpectedSize(current.size() + 1).putAll(current).put(key, val).build(); - } + static class Builder + { + private final long epoch; + private final BiMap mapping = HashBiMap.create(); - public Mapping add(InetAddressAndPort endpoint) + public Builder(long epoch) { - if (endpointToId.containsKey(endpoint)) - return this; - Node.Id id = endpointToId(endpoint); - return new Mapping(put(idToEndpoint, id, endpoint), put(endpointToId, endpoint, id)); + this.epoch = epoch; } - public Mapping add(Node.Id id) + public Builder add(InetAddressAndPort endpoint, Node.Id id) { - if (idToEndpoint.containsKey(id)) - return this; - - InetAddressAndPort endpoint = idToEndpoint(id); - return new Mapping(put(idToEndpoint, id, endpoint), put(endpointToId, endpoint, id)); + Invariants.checkArgument(!mapping.containsKey(id), "Mapping already exists for Node.Id %s", id); + Invariants.checkArgument(!mapping.containsValue(endpoint), "Mapping already exists for %s", endpoint); + mapping.put(id, endpoint); + return this; } - } - private static volatile Mapping mapping = Mapping.EMPTY; - - private EndpointMapping() {} - - public static Node.Id getId(InetAddressAndPort endpoint) - { - Node.Id id = mapping.endpointToId.get(endpoint); - if (id == null) + public EndpointMapping build() { - synchronized (EndpointMapping.class) - { - mapping = mapping.add(endpoint); - id = mapping.endpointToId.get(endpoint); - } + return new EndpointMapping(epoch, ImmutableBiMap.copyOf(mapping)); } - return id; } - // FIXME: put this stuff into the configuration service, where it will eventually live - public static Node.Id getId(Replica replica) + static Builder builder(long epoch) { - return getId(replica.endpoint()); - } - - public static InetAddressAndPort getEndpoint(Node.Id id) - { - InetAddressAndPort endpoint = mapping.idToEndpoint.get(id); - if (endpoint == null) - { - synchronized (EndpointMapping.class) - { - mapping = mapping.add(id); - endpoint = mapping.idToEndpoint.get(id); - } - } - return endpoint; + return new Builder(epoch); } } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 7a01fc52ab38..8ba98e919d05 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -23,7 +23,10 @@ import accord.topology.TopologyManager; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -32,8 +35,6 @@ public interface IAccordService { IVerbHandler verbHandler(); - void createEpochFromConfigUnsafe(); - TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel); long currentEpoch(); @@ -42,5 +43,22 @@ public interface IAccordService TopologyManager topology(); + void startup(); + void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException; + + /** + * Return a future that will complete once the accord has completed it's local bootstrap process + * for any ranges gained in the given epoch + */ + Future epochReady(Epoch epoch); + + void remoteSyncComplete(Message message); + + /** + * Temporary method to avoid double-streaming keyspaces + * @param keyspace + * @return + */ + boolean isAccordManagedKeyspace(String keyspace); } diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index a0b6f67cccb3..3c8e0e76ea6b 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -23,6 +23,10 @@ import accord.api.RoutingKey; import accord.primitives.Range; import accord.primitives.Ranges; +import accord.utils.Invariants; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -34,6 +38,14 @@ public class TokenRange extends Range.EndInclusive public TokenRange(AccordRoutingKey start, AccordRoutingKey end) { super(start, end); + Invariants.checkArgument(start.keyspace().equals(end.keyspace()), + "Token ranges cannot cover more than one keyspace start:%s, end:%s", + start, end); + } + + public String keyspace() + { + return ((AccordRoutingKey) start()).keyspace(); } public static TokenRange fullRange(String keyspace) @@ -56,6 +68,16 @@ public RoutingKey someIntersectingRoutingKey(Ranges ranges) return pick; } + public org.apache.cassandra.dht.Range toKeyspaceRange () + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + AccordRoutingKey start = (AccordRoutingKey) start(); + AccordRoutingKey end = (AccordRoutingKey) end(); + Token left = start instanceof SentinelKey ? partitioner.getMinimumToken() : start.token(); + Token right = end instanceof SentinelKey ? partitioner.getMinimumToken() : end.token(); + return new org.apache.cassandra.dht.Range<>(left, right); + } + public static final IVersionedSerializer serializer = new IVersionedSerializer() { @Override diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 9a1ca51c6f93..b5eeaa3f3857 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -170,8 +170,7 @@ private void fail(Throwable throwable) { Invariants.nonNull(throwable); if (state.isComplete()) - throw new IllegalStateException("Unexpected state " + state, throwable); - + return; try { switch (state) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 366b469c2ffe..4b20e9e7eaa1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -22,6 +22,9 @@ import com.google.common.base.Preconditions; +import accord.api.Query; +import accord.api.Read; +import accord.api.Update; import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; @@ -43,6 +46,7 @@ import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.utils.CastingSerializer; public class CommandSerializers { @@ -118,19 +122,30 @@ public int serializedSize() } } - public static final IVersionedSerializer partialTxn = new IVersionedSerializer() + public static class PartialTxnSerializer implements IVersionedSerializer { + private final IVersionedSerializer readSerializer; + private final IVersionedSerializer querySerializer; + private final IVersionedSerializer updateSerializer; + + public PartialTxnSerializer(IVersionedSerializer readSerializer, IVersionedSerializer querySerializer, IVersionedSerializer updateSerializer) + { + this.readSerializer = readSerializer; + this.querySerializer = querySerializer; + this.updateSerializer = updateSerializer; + } + @Override public void serialize(PartialTxn txn, DataOutputPlus out, int version) throws IOException { CommandSerializers.kind.serialize(txn.kind(), out, version); KeySerializers.ranges.serialize(txn.covering(), out, version); KeySerializers.seekables.serialize(txn.keys(), out, version); - TxnRead.serializer.serialize((TxnRead) txn.read(), out, version); - TxnQuery.serializer.serialize((TxnQuery) txn.query(), out, version); + readSerializer.serialize(txn.read(), out, version); + querySerializer.serialize(txn.query(), out, version); out.writeBoolean(txn.update() != null); if (txn.update() != null) - TxnUpdate.serializer.serialize((TxnUpdate) txn.update(), out, version); + updateSerializer.serialize(txn.update(), out, version); } @Override @@ -139,9 +154,9 @@ public PartialTxn deserialize(DataInputPlus in, int version) throws IOException Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); Ranges covering = KeySerializers.ranges.deserialize(in, version); Seekables keys = KeySerializers.seekables.deserialize(in, version); - TxnRead read = TxnRead.serializer.deserialize(in, version); - TxnQuery query = TxnQuery.serializer.deserialize(in, version); - TxnUpdate update = in.readBoolean() ? TxnUpdate.serializer.deserialize(in, version) : null; + Read read = readSerializer.deserialize(in, version); + Query query = querySerializer.deserialize(in, version); + Update update = in.readBoolean() ? updateSerializer.deserialize(in, version) : null; return new PartialTxn.InMemory(covering, kind, keys, read, query, update); } @@ -151,14 +166,20 @@ public long serializedSize(PartialTxn txn, int version) long size = CommandSerializers.kind.serializedSize(txn.kind(), version); size += KeySerializers.ranges.serializedSize(txn.covering(), version); size += KeySerializers.seekables.serializedSize(txn.keys(), version); - size += TxnRead.serializer.serializedSize((TxnRead) txn.read(), version); - size += TxnQuery.serializer.serializedSize((TxnQuery) txn.query(), version); + size += readSerializer.serializedSize(txn.read(), version); + size += querySerializer.serializedSize(txn.query(), version); size += TypeSizes.sizeof(txn.update() != null); if (txn.update() != null) - size += TxnUpdate.serializer.serializedSize((TxnUpdate) txn.update(), version); + size += updateSerializer.serializedSize(txn.update(), version); return size; } - }; + } + + private static final IVersionedSerializer read = new CastingSerializer<>(TxnRead.class, TxnRead.serializer); + private static final IVersionedSerializer query = new CastingSerializer<>(TxnQuery.class, TxnQuery.serializer); + private static final IVersionedSerializer update = new CastingSerializer<>(TxnUpdate.class, TxnUpdate.serializer); + + public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); public static final IVersionedSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); public static final IVersionedSerializer status = new EnumSerializer<>(Status.class); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java new file mode 100644 index 000000000000..4cd322435c59 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.NavigableMap; +import java.util.TreeMap; + +import accord.api.RoutingKey; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.ReducingRangeMap; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.CollectionSerializers; + +public class CommandStoreSerializers +{ + private CommandStoreSerializers() {} + + public static IVersionedSerializer> rejectBefore = new IVersionedSerializer>() + { + public void serialize(ReducingRangeMap map, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(map.inclusiveEnds()); + int size = map.size(); + out.writeUnsignedVInt32(size); + + for (int i=0; i deserialize(DataInputPlus in, int version) throws IOException + { + boolean inclusiveEnds = in.readBoolean(); + int size = in.readUnsignedVInt32(); + RoutingKey[] keys = new RoutingKey[size]; + Timestamp[] values = new Timestamp[size + 1]; + for (int i=0; i map, int version) + { + long size = TypeSizes.BOOL_SIZE; + size += TypeSizes.sizeofUnsignedVInt(size); + int mapSize = map.size(); + for (int i=0; i implements IVersionedSerializer> + { + private final IVersionedSerializer timestampSerializer; + + public TimestampToRangesSerializer(IVersionedSerializer timestampSerializer) + { + this.timestampSerializer = timestampSerializer; + } + + public void serialize(NavigableMap map, DataOutputPlus out, int version) throws IOException + { + CollectionSerializers.serializeMap(map, out, version, timestampSerializer, KeySerializers.ranges); + } + + public NavigableMap deserialize(DataInputPlus in, int version) throws IOException + { + return CollectionSerializers.deserializeMap(in, version, timestampSerializer, KeySerializers.ranges, i -> new TreeMap<>()); + + } + + public long serializedSize(NavigableMap map, int version) + { + return CollectionSerializers.serializedMapSize(map, version, timestampSerializer, KeySerializers.ranges); + } + } + + public static final IVersionedSerializer> bootstrapBeganAt = new TimestampToRangesSerializer<>(CommandSerializers.txnId); + public static final IVersionedSerializer> safeToRead = new TimestampToRangesSerializer<>(CommandSerializers.timestamp); +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java new file mode 100644 index 000000000000..e76df16335d1 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.Data; +import accord.impl.AbstractFetchCoordinator.FetchRequest; +import accord.impl.AbstractFetchCoordinator.FetchResponse; +import accord.local.Status; +import accord.messages.ReadData; +import accord.messages.ReadData.ReadReply; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.AccordFetchCoordinator.StreamData; +import org.apache.cassandra.service.accord.AccordFetchCoordinator.StreamingTxn; +import org.apache.cassandra.utils.CastingSerializer; + +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; + +public class FetchSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(FetchRequest request, DataOutputPlus out, int version) throws IOException + { + Invariants.checkArgument(request.txnId.equals(TxnId.NONE)); + Invariants.checkArgument(request.waitForStatus == Status.Applied); + Invariants.checkArgument(request.waitUntil.equals(Timestamp.MAX)); + + out.writeUnsignedVInt(request.waitForEpoch()); + CommandSerializers.txnId.serialize((TxnId) request.executeReadAt, out, version); + KeySerializers.ranges.serialize((Ranges) request.readScope, out, version); + + DepsSerializer.partialDeps.serialize(request.partialDeps, out, version); + StreamingTxn.serializer.serialize(request.read, out, version); + } + + @Override + public FetchRequest deserialize(DataInputPlus in, int version) throws IOException + { + return new FetchRequest(in.readUnsignedVInt(), + CommandSerializers.txnId.deserialize(in, version), + KeySerializers.ranges.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version), + StreamingTxn.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(FetchRequest request, int version) + { + return TypeSizes.sizeofUnsignedVInt(request.waitForEpoch()) + + CommandSerializers.txnId.serializedSize((TxnId) request.executeReadAt, version) + + KeySerializers.ranges.serializedSize((Ranges) request.readScope, version) + + DepsSerializer.partialDeps.serializedSize(request.partialDeps, version) + + StreamingTxn.serializer.serializedSize(request.read, version); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + final ReadData.ReadNack[] nacks = ReadData.ReadNack.values(); + final IVersionedSerializer streamDataSerializer = new CastingSerializer<>(StreamData.class, StreamData.serializer); + + @Override + public void serialize(ReadReply reply, DataOutputPlus out, int version) throws IOException + { + if (!reply.isOk()) + { + out.writeByte(1 + ((ReadData.ReadNack) reply).ordinal()); + return; + } + + out.writeByte(0); + FetchResponse response = (FetchResponse) reply; + serializeNullable(response.unavailable, out, version, KeySerializers.ranges); + serializeNullable(response.data, out, version, streamDataSerializer); + CommandSerializers.timestamp.serialize(response.maxApplied, out, version); + } + + @Override + public ReadReply deserialize(DataInputPlus in, int version) throws IOException + { + int id = in.readByte(); + if (id != 0) + return nacks[id - 1]; + + return new FetchResponse(deserializeNullable(in, version, KeySerializers.ranges), + deserializeNullable(in, version, streamDataSerializer), + CommandSerializers.timestamp.deserialize(in, version)); + } + + @Override + public long serializedSize(ReadReply reply, int version) + { + if (!reply.isOk()) + return TypeSizes.BYTE_SIZE; + + FetchResponse response = (FetchResponse) reply; + return TypeSizes.BYTE_SIZE + + serializedNullableSize(response.unavailable, version, KeySerializers.ranges) + + serializedNullableSize(response.data, version, streamDataSerializer) + + CommandSerializers.timestamp.serializedSize(response.maxApplied, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java index 04afa5b250ad..e1f4b5b28012 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -19,13 +19,21 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.util.List; +import java.util.Set; import accord.local.Node; +import accord.primitives.Range; +import accord.topology.Shard; +import accord.topology.Topology; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.utils.ArraySerializers; +import org.apache.cassandra.utils.CollectionSerializers; public class TopologySerializers { @@ -69,4 +77,64 @@ public int serializedSize() return TypeSizes.INT_SIZE; // id.id } }; + + public static final IVersionedSerializer shard = new IVersionedSerializer() + { + @Override + public void serialize(Shard shard, DataOutputPlus out, int version) throws IOException + { + TokenRange.serializer.serialize((TokenRange) shard.range, out, version); + CollectionSerializers.serializeList(shard.nodes, out, version, nodeId); + CollectionSerializers.serializeCollection(shard.fastPathElectorate, out, version, nodeId); + CollectionSerializers.serializeCollection(shard.joining, out, version, nodeId); + + } + + @Override + public Shard deserialize(DataInputPlus in, int version) throws IOException + { + Range range = TokenRange.serializer.deserialize(in, version); + List nodes = CollectionSerializers.deserializeList(in, version, nodeId); + Set fastPathElectorate = CollectionSerializers.deserializeSet(in, version, nodeId); + Set joining = CollectionSerializers.deserializeSet(in, version, nodeId); + return new Shard(range, nodes, fastPathElectorate, joining); + } + + @Override + public long serializedSize(Shard shard, int version) + { + long size = TokenRange.serializer.serializedSize((TokenRange) shard.range, version); + size += CollectionSerializers.serializedListSize(shard.nodes, version, nodeId); + size += CollectionSerializers.serializedCollectionSize(shard.fastPathElectorate, version, nodeId); + size += CollectionSerializers.serializedCollectionSize(shard.joining, version, nodeId); + return size; + } + }; + + public static final IVersionedSerializer topology = new IVersionedSerializer() + { + @Override + public void serialize(Topology topology, DataOutputPlus out, int version) throws IOException + { + out.writeLong(topology.epoch()); + ArraySerializers.serializeArray(topology.unsafeGetShards(), out, version, shard); + } + + @Override + public Topology deserialize(DataInputPlus in, int version) throws IOException + { + long epoch = in.readLong(); + Shard[] shards = ArraySerializers.deserializeArray(in, version, shard, Shard[]::new); + return new Topology(epoch, shards); + } + + @Override + public long serializedSize(Topology topology, int version) + { + long size = 0; + size += TypeSizes.LONG_SIZE; // epoch + size += ArraySerializers.serializedArraySize(topology.unsafeGetShards(), version, shard); + return size; + } + }; } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 607978806e76..e0f1a1c2e125 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -434,6 +434,12 @@ public Transformer withVersion(NodeId nodeId, NodeVersion version) return this; } + public Transformer register(NodeId nodeId, NodeAddresses addresses, Location location, NodeVersion version) + { + directory = directory.with(nodeId, addresses, location, version); + return this; + } + public Transformer withNodeState(NodeId id, NodeState state) { directory = directory.withNodeState(id, state); diff --git a/src/java/org/apache/cassandra/tcm/Startup.java b/src/java/org/apache/cassandra/tcm/Startup.java index 6d3fe6b2afd5..7acfe643c6e3 100644 --- a/src/java/org/apache/cassandra/tcm/Startup.java +++ b/src/java/org/apache/cassandra/tcm/Startup.java @@ -53,6 +53,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.tcm.log.LocalLog; import org.apache.cassandra.tcm.log.LogStorage; import org.apache.cassandra.tcm.log.SystemKeyspaceStorage; @@ -415,6 +416,7 @@ public static void startup(Supplier initialTransformation, boole { ClusterMetadata metadata = ClusterMetadata.current(); NodeId self = metadata.myNodeId(); + AccordService.startup(self); // finish in-progress sequences first InProgressSequences.finishInProgressSequences(self, true); diff --git a/src/java/org/apache/cassandra/tcm/membership/Directory.java b/src/java/org/apache/cassandra/tcm/membership/Directory.java index 436ded0ab319..a4dab0bc2135 100644 --- a/src/java/org/apache/cassandra/tcm/membership/Directory.java +++ b/src/java/org/apache/cassandra/tcm/membership/Directory.java @@ -168,6 +168,11 @@ public Directory with(NodeAddresses addresses, Location location) return with(addresses, location, CURRENT); } + public Directory with(NodeId id, NodeAddresses addresses, Location location, NodeVersion nodeVersion) + { + return with(addresses, id, id.toUUID(), location, nodeVersion); + } + public Directory with(NodeAddresses addresses, Location location, NodeVersion nodeVersion) { NodeId id = new NodeId(nextId); diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 15182fc92b2a..607ee2f669c3 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -25,6 +25,7 @@ import java.util.stream.StreamSupport; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +42,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.streaming.StreamState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -60,6 +62,7 @@ import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.vint.VIntCoding; import static com.google.common.collect.ImmutableList.of; @@ -356,12 +359,14 @@ public static boolean bootstrap(final Collection tokens, StorageService.instance.repairPaxosForTopologyChange("bootstrap"); Future bootstrapStream = StorageService.instance.startBootstrap(metadata, beingReplaced, movements, strictMovements); + Future accordReady = AccordService.instance().epochReady(metadata.epoch); + Future ready = FutureCombiner.allOf(Lists.newArrayList(bootstrapStream, accordReady)); try { if (bootstrapTimeoutMillis > 0) - bootstrapStream.get(bootstrapTimeoutMillis, MILLISECONDS); + ready.get(bootstrapTimeoutMillis, MILLISECONDS); else - bootstrapStream.get(); + ready.get(); StorageService.instance.markViewsAsBuilt(); StorageService.instance.clearOngoingBootstrap(); logger.info("Bootstrap completed for tokens {}", tokens); diff --git a/src/java/org/apache/cassandra/tcm/sequences/Move.java b/src/java/org/apache/cassandra/tcm/sequences/Move.java index 99ada06f9356..8e83f2e7e3e4 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/Move.java +++ b/src/java/org/apache/cassandra/tcm/sequences/Move.java @@ -49,8 +49,10 @@ import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.streaming.StreamPlan; +import org.apache.cassandra.streaming.StreamResultFuture; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; @@ -68,6 +70,8 @@ import org.apache.cassandra.tcm.transformations.PrepareMove; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JVMStabilityInspector; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.vint.VIntCoding; import static com.google.common.collect.ImmutableList.of; @@ -211,11 +215,12 @@ public SequenceState executeNext() case MID_MOVE: try { + ClusterMetadata metadata = ClusterMetadata.current(); logger.info("fetching new ranges and streaming old ranges"); StreamPlan streamPlan = new StreamPlan(StreamOperation.RELOCATION); Keyspaces keyspaces = Schema.instance.getNonLocalStrategyKeyspaces(); Map movementMap = movementMap(FailureDetector.instance, - ClusterMetadata.current().placements, + metadata.placements, toSplitRanges, startMove.delta(), midMove.delta(), @@ -224,6 +229,8 @@ public SequenceState executeNext() for (KeyspaceMetadata ks : keyspaces) { + if (AccordService.instance().isAccordManagedKeyspace(ks.name)) + continue; ReplicationParams replicationParams = ks.params.replication; if (replicationParams.isMeta()) continue; @@ -248,7 +255,9 @@ else if (destination.isSelf()) } } - streamPlan.execute().get(); + StreamResultFuture streamResult = streamPlan.execute(); + Future accordReady = AccordService.instance().epochReady(metadata.epoch); + FutureCombiner.allOf(streamResult, accordReady).get(); StorageService.instance.repairPaxosForTopologyChange("move"); } catch (InterruptedException e) diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index f1535adcdc9a..5b149acd9a0b 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -105,7 +105,6 @@ public int execute(String... args) Compact.class, CompactionHistory.class, CompactionStats.class, - CreateEpochUnsafe.class, DataPaths.class, Decommission.class, Decommission.Abort.class, diff --git a/src/java/org/apache/cassandra/utils/CastingSerializer.java b/src/java/org/apache/cassandra/utils/CastingSerializer.java new file mode 100644 index 000000000000..6dccb40bbef3 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/CastingSerializer.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.io.IOException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +/** + * Utility for serializing/deserializing from/into generic interface fields where we know (and require) the + * generic fields to be implementation specific classes + * @param + * @param + */ +public class CastingSerializer implements IVersionedSerializer +{ + private final Class specificClass; + private final IVersionedSerializer specificSerializer; + + public CastingSerializer(Class specificClass, IVersionedSerializer specificSerializer) + { + this.specificClass = specificClass; + this.specificSerializer = specificSerializer; + } + + @Override + public void serialize(Generic generic, DataOutputPlus out, int version) throws IOException + { + specificSerializer.serialize(specificClass.cast(generic), out, version); + } + + @Override + public Generic deserialize(DataInputPlus in, int version) throws IOException + { + Generic result = specificSerializer.deserialize(in, version); + if (result != null && !specificClass.isInstance(result)) + throw new IllegalStateException("Expected instance of " + specificClass.getName()); + return result; + } + + @Override + public long serializedSize(Generic generic, int version) + { + return specificSerializer.serializedSize(specificClass.cast(generic), version); + } +} diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializers.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java index 93fa02a04dee..7e672c431f82 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializers.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -75,10 +75,10 @@ public static Set deserializeSet(DataInputPlus in, int version, IVersione return deserializeCollection(in, version, serializer, newHashSet()); } - public static Map deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, IntFunction> factory) throws IOException + public static > M deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, IntFunction factory) throws IOException { int size = checkedCast(in.readUnsignedVInt32()); - Map result = factory.apply(size); + M result = factory.apply(size); while (size-- > 0) { K key = keySerializer.deserialize(in, version); diff --git a/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java b/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java index 325b4baca1e6..709e5da60811 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java +++ b/src/java/org/apache/cassandra/utils/concurrent/FutureCombiner.java @@ -20,6 +20,7 @@ import io.netty.util.concurrent.GenericFutureListener; import io.netty.util.concurrent.GlobalEventExecutor; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -242,6 +243,11 @@ public static Future> allOf(Collection(futures, () -> futures.stream().map(f -> f.getNow()).collect(Collectors.toList()), FailFastListener::new); } + public static Future> allOf(io.netty.util.concurrent.Future... futures) + { + return allOf(Arrays.asList(futures)); + } + /** * Waits for all futures to complete, returning a list containing values of all successful input futures. This * emulates Guava's Futures::successfulAsList in that results will be in the same order as inputs and any diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index a3918443e4a2..fcbddc259627 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -821,6 +821,7 @@ protected void partialStartup(ICluster cluster) throws IOException, NoSuchFie ClusterMetadataService.instance().processor().fetchLogAndWait(); NodeId self = Register.maybeRegister(); RegistrationStatus.instance.onRegistration(); + AccordService.startup(self); boolean joinRing = config.get(Constants.KEY_DTEST_JOIN_RING) == null || (boolean) config.get(Constants.KEY_DTEST_JOIN_RING); if (ClusterMetadata.current().directory.peerState(self) != NodeState.JOINED && joinRing) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java new file mode 100644 index 000000000000..97071c6678b1 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -0,0 +1,464 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import org.junit.Assert; +import org.junit.Test; + +import accord.local.CommandStore; +import accord.local.PreLoadContext; +import accord.primitives.Timestamp; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.shared.NetworkTopology; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordConfigurationService; +import org.apache.cassandra.service.accord.AccordConfigurationService.EpochSnapshot; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.streaming.StreamManager; +import org.apache.cassandra.streaming.StreamResultFuture; +import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; +import org.assertj.core.api.Assertions; + +import static accord.utils.async.AsyncChains.awaitUninterruptiblyAndRethrow; +import static com.google.common.collect.Iterables.getOnlyElement; +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.api.Feature.NETWORK; + +public class AccordBootstrapTest extends TestBaseImpl +{ + private static DecoratedKey dk(int key) + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + return partitioner.decorateKey(ByteBufferUtil.bytes(key)); + } + + private static PartitionKey pk(int key, String keyspace, String table) + { + TableId tid = Schema.instance.getTableMetadata(keyspace, table).id; + return new PartitionKey(keyspace, tid, dk(key)); + } + + protected void bootstrapAndJoinNode(Cluster cluster) + { + IInstanceConfig config = cluster.newInstanceConfig(); + config.set("auto_bootstrap", true); + IInvokableInstance newInstance = cluster.bootstrap(config); + newInstance.startup(cluster); + // todo: re-add once we fix write survey/join ring = false mode +// withProperty(BOOTSTRAP_SCHEMA_DELAY_MS.getKey(), Integer.toString(90 * 1000), +// () -> withProperty("cassandra.join_ring", false, () -> newInstance.startup(cluster))); +// newInstance.nodetoolResult("join").asserts().success(); + newInstance.nodetoolResult("describecms").asserts().success(); // just make sure we're joined, remove later + } + + private static AccordService service() + { + return (AccordService) AccordService.instance(); + } + + private static void awaitEpoch(long epoch) + { + try + { + boolean completed = service().epochReady(Epoch.create(epoch)).await(60, TimeUnit.SECONDS); + Assertions.assertThat(completed) + .describedAs("Epoch %s did not become ready within timeout on %s -> %s", + epoch, FBUtilities.getBroadcastAddressAndPort(), + service().configurationService().getEpochSnapshot(epoch)) + .isTrue(); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + } + + private static void awaitLocalSyncNotification(long epoch) + { + try + { + AccordConfigurationService configService = service().configurationService(); + boolean completed = configService.localSyncNotified(epoch).await(5, TimeUnit.SECONDS); + Assert.assertTrue(String.format("Local sync notification for epoch %s did not become ready within timeout on %s", + epoch, FBUtilities.getBroadcastAddressAndPort()), completed); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + } + + private static long maxEpoch(Cluster cluster) + { + return cluster.stream().mapToLong(node -> node.callOnInstance(() -> ClusterMetadata.current().epoch.getEpoch())).max().getAsLong(); + } + + private static class StreamListener implements StreamManager.StreamListener + { + private static boolean isRegistered = false; + private static final StreamListener listener = new StreamListener(); + + private final List registered = new ArrayList<>(); + + static synchronized void register() + { + if (isRegistered) + return; + StreamManager.instance.addListener(listener); + isRegistered = true; + } + + public synchronized void onRegister(StreamResultFuture result) + { + registered.add(result); + } + + public synchronized void forSession(Consumer consumer) + { + registered.forEach(future -> { + future.getCoordinator().getAllStreamSessions().forEach(consumer); + }); + } + } + + @Test + public void bootstrapTest() throws Throwable + { + + int originalNodeCount = 2; + int expandedNodeCount = originalNodeCount + 1; + + try (Cluster cluster = Cluster.build().withNodes(originalNodeCount) + .withoutVNodes() + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(expandedNodeCount)) + .withNodeIdTopology(NetworkTopology.singleDcNetworkTopology(expandedNodeCount, "dc0", "rack0")) + .withConfig(config -> config.with(NETWORK, GOSSIP)) + .start()) + { + long initialMax = maxEpoch(cluster); + for (IInvokableInstance node : cluster) + { + + node.runOnInstance(() -> { + Assert.assertEquals(initialMax, ClusterMetadata.current().epoch.getEpoch()); + awaitEpoch(initialMax); + AccordConfigurationService configService = service().configurationService(); + long minEpoch = configService.minEpoch(); + + Assert.assertEquals(initialMax, configService.maxEpoch()); + + for (long epoch = minEpoch; epoch < initialMax; epoch++) + { + awaitEpoch(epoch); + Assert.assertEquals(EpochSnapshot.completed(epoch), configService.getEpochSnapshot(epoch)); + } + + awaitLocalSyncNotification(initialMax); + Assert.assertEquals(EpochSnapshot.completed(initialMax), configService.getEpochSnapshot(initialMax)); + }); + } + + for (IInvokableInstance node : cluster) + { + node.runOnInstance(StreamListener::register); + } + + cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c))"); + + long schemaChangeMax = maxEpoch(cluster); + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(schemaChangeMax)); + awaitEpoch(schemaChangeMax); + AccordConfigurationService configService = service().configurationService(); + + for (long epoch = initialMax + 1; epoch <= schemaChangeMax; epoch++) + { + awaitLocalSyncNotification(epoch); + Assert.assertEquals(EpochSnapshot.completed(epoch), configService.getEpochSnapshot(epoch)); + } + }); + } + + for (int key = 0; key < 100; key++) + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl WHERE k = " + key + " AND c = 0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO ks.tbl (k, c, v) VALUES (" + key + ", " + key + ", " + key + ");\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + AccordTestBase.executeWithRetry(cluster, query); + } + + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + Assert.assertTrue(StreamListener.listener.registered.isEmpty()); + }); + } + + bootstrapAndJoinNode(cluster); + long bootstrapMax = maxEpoch(cluster); + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(bootstrapMax)); + Assert.assertEquals(bootstrapMax, ClusterMetadata.current().epoch.getEpoch()); + AccordService service = (AccordService) AccordService.instance(); + awaitEpoch(bootstrapMax); + AccordConfigurationService configService = service.configurationService(); + + awaitLocalSyncNotification(bootstrapMax); + Assert.assertEquals(EpochSnapshot.completed(bootstrapMax), configService.getEpochSnapshot(bootstrapMax)); + }); + } + + InetAddress node3Addr = cluster.get(3).broadcastAddress().getAddress(); + for (IInvokableInstance node : cluster.get(1, 2)) + { + node.runOnInstance(() -> { + + StreamListener.listener.forSession(session -> { + Assert.assertEquals(node3Addr, session.peer.getAddress()); + Assert.assertEquals(0, session.getNumRequests()); + Assert.assertTrue(session.getNumTransfers() > 0); + }); + + awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { + CommandStore commandStore = safeStore.commandStore(); + Assert.assertEquals(0, commandStore.maxBootstrapEpoch()); + Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.bootstrapBeganAt().keySet())); + Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.safeToRead().keySet())); + })); + }); + } + + cluster.get(3).runOnInstance(() -> { + List> ranges = StorageService.instance.getLocalRanges("ks"); + for (int key = 0; key < 100; key++) + { + UntypedResultSet result = QueryProcessor.executeInternal("SELECT * FROM ks.tbl WHERE k=?", key); + PartitionKey partitionKey = pk(key, "ks", "tbl"); + if (ranges.stream().anyMatch(range -> range.contains(partitionKey.token()))) + { + UntypedResultSet.Row row = getOnlyElement(result); + Assert.assertEquals(key, row.getInt("c")); + Assert.assertEquals(key, row.getInt("v")); + + awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { + if (safeStore.ranges().currentRanges().contains(partitionKey)) + { + CommandStore commandStore = safeStore.commandStore(); + Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); + Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); + Assert.assertFalse(commandStore.safeToRead().isEmpty()); + + Assert.assertEquals(1, commandStore.bootstrapBeganAt().entrySet().stream() + .filter(entry -> entry.getValue().contains(partitionKey)) + .map(entry -> { + Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); + return entry; + }).count()); + Assert.assertEquals(1, commandStore.safeToRead().entrySet().stream() + .filter(entry -> entry.getValue().contains(partitionKey)) + .map(entry -> { + Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); + return entry; + }).count()); + } + })); + } + else + { + Assert.assertTrue(result.isEmpty()); + } + } + }); + } + } + + @Test + public void moveTest() throws Throwable + { + try (Cluster cluster = Cluster.build().withNodes(3) + .withoutVNodes() + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(3)) + .withNodeIdTopology(NetworkTopology.singleDcNetworkTopology(3, "dc0", "rack0")) + .withConfig(config -> config.with(NETWORK, GOSSIP)) + .start()) + { + long initialMax = maxEpoch(cluster); + long[] tokens = new long[3]; + for (int i=0; i<3; i++) + { + tokens[i] = cluster.get(i+1).callOnInstance(() -> Long.valueOf(getOnlyElement(StorageService.instance.getTokens()))); + } + + for (IInvokableInstance node : cluster) + { + + node.runOnInstance(() -> { + Assert.assertEquals(initialMax, ClusterMetadata.current().epoch.getEpoch()); + awaitEpoch(initialMax); + AccordConfigurationService configService = service().configurationService(); + long minEpoch = configService.minEpoch(); + + Assert.assertEquals(initialMax, configService.maxEpoch()); + + for (long epoch = minEpoch; epoch < initialMax; epoch++) + { + awaitEpoch(epoch); + Assert.assertEquals(EpochSnapshot.completed(epoch), configService.getEpochSnapshot(epoch)); + } + + awaitLocalSyncNotification(initialMax); + Assert.assertEquals(EpochSnapshot.completed(initialMax), configService.getEpochSnapshot(initialMax)); + }); + } + + cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c))"); + + long schemaChangeMax = maxEpoch(cluster); + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + Assert.assertEquals(schemaChangeMax, ClusterMetadata.current().epoch.getEpoch()); + AccordService service = (AccordService) AccordService.instance(); + awaitEpoch(schemaChangeMax); + AccordConfigurationService configService = service.configurationService(); + + for (long epoch = initialMax + 1; epoch <= schemaChangeMax; epoch++) + { + awaitLocalSyncNotification(epoch); + Assert.assertEquals(EpochSnapshot.completed(epoch), configService.getEpochSnapshot(epoch)); + } + }); + } + + for (int key = 0; key < 100; key++) + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl WHERE k = " + key + " AND c = 0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO ks.tbl (k, c, v) VALUES (" + key + ", " + key + ", " + key + ");\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + AccordTestBase.executeWithRetry(cluster, query); + } + + long token = ((tokens[1] - tokens[0]) / 2) + tokens[0]; + long preMove = maxEpoch(cluster); + + cluster.get(1).runOnInstance(() -> StorageService.instance.move(Long.toString(token))); + + long moveMax = maxEpoch(cluster); + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(moveMax)); + Assert.assertEquals(moveMax, ClusterMetadata.current().epoch.getEpoch()); + AccordService service = (AccordService) AccordService.instance(); + awaitEpoch(moveMax); + AccordConfigurationService configService = service.configurationService(); + + awaitLocalSyncNotification(moveMax); + Assert.assertEquals(EpochSnapshot.completed(moveMax), configService.getEpochSnapshot(moveMax)); + }); + } + + for (IInvokableInstance node : cluster) + { + node.runOnInstance(() -> { + // validate streaming + List> ranges = StorageService.instance.getLocalRanges("ks"); + TableId tableId = Schema.instance.getTableMetadata("ks", "tbl").id; + for (int key = 0; key < 100; key++) + { + DecoratedKey dk = dk(key); + UntypedResultSet result = QueryProcessor.executeInternal("SELECT * FROM ks.tbl WHERE k=?", key); + if (ranges.stream().anyMatch(range -> range.contains(dk.getToken()))) + { + UntypedResultSet.Row row = getOnlyElement(result); + Assert.assertEquals(key, row.getInt("c")); + Assert.assertEquals(key, row.getInt("v")); + + PartitionKey partitionKey = new PartitionKey("ks", tableId, dk); + + awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(PreLoadContext.contextFor(partitionKey), + partitionKey.toUnseekable(), moveMax, moveMax, + safeStore -> { + if (!safeStore.ranges().allAt(preMove).contains(partitionKey)) + { + CommandStore commandStore = safeStore.commandStore(); + Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); + Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); + Assert.assertFalse(commandStore.safeToRead().isEmpty()); + + Assert.assertEquals(1, commandStore.bootstrapBeganAt().entrySet().stream() + .filter(entry -> entry.getValue().contains(partitionKey)) + .map(entry -> { + Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); + return entry; + }).count()); + Assert.assertEquals(1, commandStore.safeToRead().entrySet().stream() + .filter(entry -> entry.getValue().contains(partitionKey)) + .map(entry -> { + Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); + return entry; + }).count()); + } + })); + } + } + }); + } + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 8004a0eef316..a113d69aecb9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -2235,7 +2235,6 @@ public void testScalarUpdateSubstitution() SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}"); SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "1 (k int, c int, v int, primary key (k, c))"); SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "2 (k int, c int, v int, primary key (k, c))"); - SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); @@ -2413,7 +2412,6 @@ public void demoTest() throws Throwable SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) );"); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) );"); - SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.org_users (org_name, user, members_version, permissions) VALUES ('demo', 'blake', 5, 777);\n", ConsistencyLevel.ALL); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java index 6118f765a56b..e4bf7ccb7cb4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java @@ -25,6 +25,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.cassandra.schema.Schema; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -38,7 +39,6 @@ import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.AssertionUtils; @@ -71,7 +71,7 @@ public void shouldHideAccordTransactions() throws IOException // The Accord system keyspace should not be present: assertEquals("The Accord system keyspace should not exist", Optional.empty(), cluster.get(1).callOnInstance(() -> Schema.instance.localKeyspaces().get(ACCORD_KEYSPACE_NAME))); - + // Make sure virtual tables don't exist: IIsolatedExecutor.SerializableCallable> hasAccordVirtualTables = () -> SystemViewsKeyspace.instance.tables().stream().filter(t -> t.getClass().equals(AccordVirtualTables.Epoch.class)); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 261620705e7e..9146192ba25a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -117,7 +117,6 @@ protected void test(List ddls, FailingConsumer fn) throws Excep { for (String ddl : ddls) SHARED_CLUSTER.schemaChange(ddl); - SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); // Evict commands from the cache immediately to expose problems loading from disk. SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); @@ -182,7 +181,7 @@ protected static SimpleQueryResult assertRowEquals(Cluster cluster, Object[] row } // TODO: Retry on preemption may become unnecessary after the Unified Log is integrated. - protected SimpleQueryResult assertRowEqualsWithPreemptedRetry(Cluster cluster, Object[] row, String check, Object... boundValues) + protected static SimpleQueryResult assertRowEqualsWithPreemptedRetry(Cluster cluster, Object[] row, String check, Object... boundValues) { return assertRowWithPreemptedRetry(cluster, QueryResults.builder().row(row).build(), check, boundValues); } @@ -192,7 +191,7 @@ protected SimpleQueryResult assertEmptyWithPreemptedRetry(Cluster cluster, Strin return assertRowWithPreemptedRetry(cluster, QueryResults.builder().build(), check, boundValues); } - private SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, SimpleQueryResult expected, String check, Object... boundValues) + private static SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, SimpleQueryResult expected, String check, Object... boundValues) { SimpleQueryResult result = executeWithRetry(cluster, check, boundValues); QueryResultUtil.assertThat(result).isEqualTo(expected); @@ -217,7 +216,7 @@ private static SimpleQueryResult executeWithRetry0(int count, Cluster cluster, S } } - protected static SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) + public static SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) { check = wrapInTxn(check); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java index 196638eb869e..0b1cee3e0a23 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java @@ -47,7 +47,6 @@ public void test() String table = ks + ".tbl" + i; SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + ks + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}"); SHARED_CLUSTER.schemaChange(String.format("CREATE TABLE %s (pk blob primary key)", table)); - SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().createEpochFromConfigUnsafe())); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); List keys = tokensToKeys(tokens()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java index 844a8df36882..8ed4556194f5 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java @@ -43,7 +43,6 @@ import org.apache.cassandra.distributed.impl.FileLogAction; import org.apache.cassandra.distributed.impl.Instance; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.simulator.Action; import org.apache.cassandra.simulator.ActionList; import org.apache.cassandra.simulator.ActionPlan; @@ -191,9 +190,7 @@ public ActionPlan plan() plan = plan.encapsulate(ActionPlan.setUpTearDown( ActionList.of( - cluster.stream().map(i -> simulated.run("Insert Partitions", i, executeForPrimaryKeys(preInsertStmt(), primaryKeys))), - // TODO (now): this is temporary until we have correct epoch handling - cluster.stream().map(i -> simulated.run("Create Accord Epoch", i, () -> AccordService.instance().createEpochFromConfigUnsafe())) + cluster.stream().map(i -> simulated.run("Insert Partitions", i, executeForPrimaryKeys(preInsertStmt(), primaryKeys))) ), ActionList.of( cluster.stream().map(i -> checkErrorLogs(i)), diff --git a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java index 4721a7b4ebe2..94bc2d937bd4 100644 --- a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java +++ b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java @@ -64,7 +64,6 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.JMXServerUtils; import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_JMX_AUTHORIZER; @@ -444,7 +443,6 @@ public void testCqlBatch_MultipleTablesAuditing() public void testTransactionAuditing() { createTable("CREATE TABLE %s (key int PRIMARY KEY, val int)"); - AccordService.instance().createEpochFromConfigUnsafe(); Session session = sessionNet(); String fqTableName = KEYSPACE + "." + currentTable(); diff --git a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java index 8efd05ca1016..d60c3cadc3d4 100644 --- a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java +++ b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java @@ -36,7 +36,6 @@ import org.apache.cassandra.exceptions.UnauthorizedException; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.transport.messages.ResultMessage; import static org.junit.Assert.assertEquals; @@ -68,7 +67,6 @@ public static void setUpAuthAndAccord() public void setUpTest() { createTable("CREATE TABLE %s (k int, v int, PRIMARY KEY(k))"); - AccordService.instance().createEpochFromConfigUnsafe(); } @Test diff --git a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java index 4638970628dc..360b92ded53b 100644 --- a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java +++ b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java @@ -21,7 +21,6 @@ import org.junit.Test; import org.apache.cassandra.config.CassandraRelevantProperties; -import org.apache.cassandra.service.accord.AccordService; import static org.junit.Assert.assertEquals; @@ -97,8 +96,6 @@ public void testTransaction() createTable("CREATE TABLE %s (key text, val int, PRIMARY KEY(key))"); QueryProcessor.process(formatQuery("INSERT INTO %s (key, val) VALUES ('foo', 0)"), NODE_LOCAL); - AccordService.instance().createEpochFromConfigUnsafe(); - String query = "BEGIN TRANSACTION\n" + " SELECT * FROM %s WHERE key = 'foo';\n" + "COMMIT TRANSACTION"; diff --git a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java index db84d8fde3fc..8819a1c7c6d6 100644 --- a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java @@ -871,8 +871,6 @@ private static List columnNames(ResultSet rs) private static void updateTxnState() { - //TODO Remove this method once CEP-21 and CEP-15 integrate - AccordService.instance().createEpochFromConfigUnsafe(); AccordService.instance().setCacheSize(0); } } diff --git a/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java b/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java index b7f431dd81be..1beaa123a8bf 100644 --- a/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java +++ b/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java @@ -42,7 +42,7 @@ import org.jboss.byteman.contrib.bmunit.BMRule; import org.jboss.byteman.contrib.bmunit.BMUnitRunner; -import static org.apache.cassandra.hints.HintsTestUtil.MockFailureDetector; +import org.apache.cassandra.utils.MockFailureDetector; import static org.apache.cassandra.hints.HintsTestUtil.sendHintsAndResponses; import static org.junit.Assert.assertEquals; @@ -52,7 +52,7 @@ public class HintServiceBytemanTest private static final String KEYSPACE = "hints_service_test"; private static final String TABLE = "table"; - private final MockFailureDetector failureDetector = new HintsTestUtil.MockFailureDetector(); + private final MockFailureDetector failureDetector = new MockFailureDetector(); private static TableMetadata metadata; @BeforeClass diff --git a/test/unit/org/apache/cassandra/hints/HintsServiceTest.java b/test/unit/org/apache/cassandra/hints/HintsServiceTest.java index dd0eb5a6edde..1e12d3911ebb 100644 --- a/test/unit/org/apache/cassandra/hints/HintsServiceTest.java +++ b/test/unit/org/apache/cassandra/hints/HintsServiceTest.java @@ -41,7 +41,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.hints.HintsTestUtil.MockFailureDetector; +import org.apache.cassandra.utils.MockFailureDetector; import static org.apache.cassandra.hints.HintsTestUtil.sendHintsAndResponses; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/test/unit/org/apache/cassandra/hints/HintsTestUtil.java b/test/unit/org/apache/cassandra/hints/HintsTestUtil.java index 727404e6e8e5..8ab65998c4ae 100644 --- a/test/unit/org/apache/cassandra/hints/HintsTestUtil.java +++ b/test/unit/org/apache/cassandra/hints/HintsTestUtil.java @@ -24,9 +24,6 @@ import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.partitions.AbstractBTreePartition; import org.apache.cassandra.db.partitions.PartitionUpdate; -import org.apache.cassandra.gms.IFailureDetectionEventListener; -import org.apache.cassandra.gms.IFailureDetector; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MockMessagingService; import org.apache.cassandra.net.MockMessagingSpy; @@ -91,44 +88,4 @@ static MockMessagingSpy sendHintsAndResponses(TableMetadata metadata, int noOfHi } return spy; } - - static class MockFailureDetector implements IFailureDetector - { - boolean isAlive = true; - - public boolean isAlive(InetAddressAndPort ep) - { - return isAlive; - } - - public void interpret(InetAddressAndPort ep) - { - throw new UnsupportedOperationException(); - } - - public void report(InetAddressAndPort ep) - { - throw new UnsupportedOperationException(); - } - - public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) - { - throw new UnsupportedOperationException(); - } - - public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) - { - throw new UnsupportedOperationException(); - } - - public void remove(InetAddressAndPort ep) - { - throw new UnsupportedOperationException(); - } - - public void forceConviction(InetAddressAndPort ep) - { - throw new UnsupportedOperationException(); - } - } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java new file mode 100644 index 000000000000..b43209c2cda6 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.api.ConfigurationService.EpochReady; +import accord.impl.AbstractConfigurationServiceTest; +import accord.local.Node; +import accord.local.Node.Id; +import accord.topology.Shard; +import accord.topology.Topology; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.ConnectionType; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordKeyspace.EpochDiskState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.MockFailureDetector; +import org.apache.cassandra.utils.concurrent.Future; + +import static accord.impl.AbstractConfigurationServiceTest.TestListener; +import static com.google.common.collect.ImmutableSet.of; +import static java.lang.String.format; +import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.service.accord.AccordKeyspace.EPOCH_METADATA; +import static org.apache.cassandra.service.accord.AccordKeyspace.TOPOLOGIES; +import static org.apache.cassandra.service.accord.AccordKeyspace.loadEpoch; + +public class AccordConfigurationServiceTest +{ + private static final Id ID1 = new Id(1); + private static final Id ID2 = new Id(2); + private static final Id ID3 = new Id(3); + private static final List ID_LIST = ImmutableList.of(ID1, ID2, ID3); + private static final Set ID_SET = ImmutableSet.copyOf(ID_LIST); + private static final TableId TBL1 = TableId.fromUUID(new UUID(0, 1)); + private static final TableId TBL2 = TableId.fromUUID(new UUID(0, 2)); + + private static EndpointMapping mappingForEpoch(long epoch) + { + try + { + EndpointMapping.Builder builder = EndpointMapping.builder(epoch); + builder.add(InetAddressAndPort.getByName("127.0.0.1"), ID1); + builder.add(InetAddressAndPort.getByName("127.0.0.2"), ID2); + builder.add(InetAddressAndPort.getByName("127.0.0.3"), ID3); + return builder.build(); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + private static EndpointMapping mappingForTopology(Topology topology) + { + try + { + EndpointMapping.Builder builder = EndpointMapping.builder(topology.epoch()); + for (Node.Id id : topology.nodes()) + builder.add(InetAddressAndPort.getByName("127.0.0." + id.id), id); + return builder.build(); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + private static class Messaging implements MessageDelivery + { + static class Request + { + final Message message; + final InetAddressAndPort to; + final RequestCallback callback; + + public Request(Message message, InetAddressAndPort to, RequestCallback callback) + { + this.message = message; + this.to = to; + this.callback = callback; + } + } + + final List requests = new ArrayList<>(); + + @Override + public void send(Message message, InetAddressAndPort to) + { + requests.add(new Request(message, to, null)); + } + + @Override + public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb) + { + requests.add(new Request(message, to, cb)); + } + + @Override + public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb, ConnectionType specifyConnection) + { + throw new UnsupportedOperationException(); + } + + @Override + public Future> sendWithResult(Message message, InetAddressAndPort to) + { + throw new UnsupportedOperationException(); + } + + @Override + public void respond(V response, Message message) + { + throw new UnsupportedOperationException(); + } + } + + @BeforeClass + public static void beforeClass() throws Throwable + { + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + ServerTestUtils.daemonInitialization(); + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + } + + @Before + public void setup() + { + Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStore(TOPOLOGIES).truncateBlocking(); + Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStore(EPOCH_METADATA).truncateBlocking(); + } + + @Test + public void initialEpochTest() throws Throwable + { + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); + service.start(); + Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); + Assert.assertTrue(executeInternal(format("SELECT * FROM %s.%s WHERE epoch=1", ACCORD_KEYSPACE_NAME, TOPOLOGIES)).isEmpty()); + + Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + service.reportTopology(topology1); + loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync) -> { + Assert.assertEquals(topology1, topology); + Assert.assertTrue(remoteSync.isEmpty()); + }); + Assert.assertEquals(new EpochDiskState(1, 1), service.diskState()); + + service.remoteSyncComplete(ID1, 1); + service.remoteSyncComplete(ID2, 1); + loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync) -> { + Assert.assertEquals(topology1, topology); + Assert.assertEquals(Sets.newHashSet(ID1, ID2), remoteSync); + }); + } + + @Test + public void loadTest() throws Throwable + { + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + service.start(); + + Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + service.reportTopology(topology1); + service.acknowledgeEpoch(EpochReady.done(1)); + service.remoteSyncComplete(ID1, 1); + service.remoteSyncComplete(ID2, 1); + service.remoteSyncComplete(ID3, 1); + + Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + service.reportTopology(topology2); + service.acknowledgeEpoch(EpochReady.done(2)); + service.remoteSyncComplete(ID1, 2); + + Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + service.reportTopology(topology3); + service.acknowledgeEpoch(EpochReady.done(3)); + + AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + AbstractConfigurationServiceTest.TestListener listener = new AbstractConfigurationServiceTest.TestListener(loaded, true); + loaded.registerListener(listener); + loaded.start(); + + listener.assertNoTruncates(); + listener.assertTopologiesFor(1L, 2L, 3L); + listener.assertTopologyForEpoch(1, topology1); + listener.assertTopologyForEpoch(2, topology2); + listener.assertTopologyForEpoch(3, topology3); + listener.assertSyncsFor(1L, 2L); + listener.assertSyncsForEpoch(1, ID1, ID2, ID3); + listener.assertSyncsForEpoch(2, ID1); + } + + @Test + public void truncateTest() + { + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + TestListener serviceListener = new TestListener(service, true); + service.registerListener(serviceListener); + service.start(); + + Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + service.reportTopology(topology1); + + Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + service.reportTopology(topology2); + + Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + service.reportTopology(topology3); + service.truncateTopologiesUntil(3); + Assert.assertEquals(new EpochDiskState(3, 3), service.diskState()); + serviceListener.assertTruncates(3L); + + AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + TestListener loadListener = new TestListener(loaded, true); + loaded.registerListener(loadListener); + loaded.start(); + loadListener.assertTopologiesFor(3L); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index bae5fb144b71..358aac687f14 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -20,7 +20,6 @@ import org.junit.BeforeClass; import org.junit.Test; - import org.mockito.Mockito; import accord.api.Agent; @@ -29,6 +28,7 @@ import accord.messages.SimpleReply; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.Verb; @@ -45,8 +45,11 @@ public static void setup() } @Test - public void informOfTxn() + public void informOfTxn() throws Throwable { + Node.Id id = new Node.Id(1); + InetAddressAndPort endpoint = InetAddressAndPort.getByName("127.0.0.1"); + EndpointMapping mapping = EndpointMapping.builder(5).add(endpoint, id).build(); // There was an issue where the reply was the wrong verb // see CASSANDRA-18375 InformOfTxnId info = Mockito.mock(InformOfTxnId.class); @@ -54,8 +57,8 @@ public void informOfTxn() SimpleReply reply = SimpleReply.Ok; MessageDelivery messaging = Mockito.mock(MessageDelivery.class); - AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging); - sink.reply(new Node.Id(1), req, reply); + AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging, mapping); + sink.reply(id, req, reply); Mockito.verify(messaging).send(Mockito.any(), Mockito.any()); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 21ee94d2a576..77611f4f1afb 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -83,7 +83,6 @@ import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -93,11 +92,6 @@ public class AccordTestUtils { - public static Id localNodeId() - { - return EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); - } - public static class Commands { public static Command notWitnessed(TxnId txnId, PartialTxn txn) @@ -307,7 +301,7 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.keyspace); - Node.Id node = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); NodeTimeService time = new NodeTimeService() { @@ -361,7 +355,7 @@ public static AccordCommandStore createAccordCommandStore( { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.keyspace); - Node.Id node = EndpointMapping.endpointToId(FBUtilities.getBroadcastAddressAndPort()); + Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCacheSize(1 << 20)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java index 9c8540ebda8c..018595713975 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -18,54 +18,177 @@ package org.apache.cassandra.service.accord; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import accord.local.Node.Id; +import accord.topology.Shard; import accord.topology.Topology; -import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.BufferDecoratedKey; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.AbstractReplicationStrategy; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.membership.NodeState; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; public class AccordTopologyTest { + private static final Id ID1 = new Id(1); + private static final Id ID2 = new Id(2); + private static final Id ID3 = new Id(3); + private static final List NODE_LIST = ImmutableList.of(ID1, ID2, ID3); + private static final Set NODE_SET = ImmutableSet.copyOf(NODE_LIST); + + private static final InetAddressAndPort EP1 = ep(1); + private static final InetAddressAndPort EP2 = ep(2); + private static final InetAddressAndPort EP3 = ep(3); + + private static final IPartitioner partitioner = Murmur3Partitioner.instance; + private static Tables tables = null; + private static KeyspaceMetadata keyspace = null; + private static final Location LOCATION = new Location("DC1", "RACK1"); + @BeforeClass public static void beforeClass() throws Throwable { DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - SchemaLoader.prepareServer(); - SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); - StorageService.instance.initServer(); + tables = Tables.of(parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks").build()); + keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), tables); + } + + private static InetAddressAndPort ep(int i) + { + try + { + return InetAddressAndPort.getByAddressOverrideDefaults(InetAddress.getByAddress(new byte[]{127, 0, 0, (byte)i}), 7012); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + private static NodeId nodeId(int id) + { + return new NodeId(id); + } + + private static void addNode(ClusterMetadata.Transformer transformer, int node, Token token) + { + NodeId nodeId = nodeId(node); + InetAddressAndPort ep = ep(node); + NodeAddresses addresses = new NodeAddresses(nodeId.toUUID(), ep, ep, ep); + transformer.register(nodeId, addresses, LOCATION, NodeVersion.CURRENT); + transformer.withNodeState(nodeId, NodeState.JOINED); + transformer.proposeToken(nodeId, Collections.singleton(token)); + } + + private static ClusterMetadata configureCluster(List> ranges, Keyspaces keyspaces) + { + assert ranges.size() == 3; + + IPartitioner partitioner = Murmur3Partitioner.instance; + ClusterMetadata empty = new ClusterMetadata(partitioner); + ClusterMetadata.Transformer transformer = empty.transformer(); + transformer.with(new DistributedSchema(Keyspaces.of(keyspace))); + addNode(transformer, 1, ranges.get(0).right); + addNode(transformer, 2, ranges.get(1).right); + addNode(transformer, 3, ranges.get(2).right); + ClusterMetadata metadata = transformer.build().metadata; + + for (KeyspaceMetadata keyspace : keyspaces) + { + ReplicationParams replication = keyspace.params.replication; + AbstractReplicationStrategy strategy = AbstractReplicationStrategy.createReplicationStrategy(keyspace.name, replication); + DataPlacements.Builder placements = metadata.placements.unbuild(); + DataPlacement placement = strategy.calculateDataPlacement(metadata.epoch, metadata.tokenMap.toRanges(), metadata); + placements.with(replication, placement); + metadata = transformer.with(placements.build()).build().metadata; + } + + return metadata; + } + + private static Token token(long t) + { + return new Murmur3Partitioner.LongToken(t); + } + + private static Range range(Token left, Token right) + { + return new Range<>(left, right); + } + + private static Range range(long left, long right) + { + return range(token(left), token(right)); } + /** + * Check converter does the right thing if the ring is constructed with min and max tokens + */ @Test - public void minMaxTokenTest() + public void minMaxTokens() { - IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); - Topology topology = AccordTopologyUtils.createTopology(1); - Assert.assertNotEquals(0, topology.size()); - TableId tableId = Schema.instance.getTableMetadata("ks", "tbl").id; - Token minToken = partitioner.getMinimumToken(); - Token maxToken = partitioner.getMaximumToken(); - -// topology.forKey(new AccordKey.TokenKey(tableId, minToken.minKeyBound())); - topology.forKey(new PartitionKey("ks", tableId, new BufferDecoratedKey(minToken, ByteBufferUtil.bytes(0))).toUnseekable()); -// topology.forKey(new AccordKey.TokenKey(tableId, minToken.maxKeyBound())); -// topology.forKey(new AccordKey.TokenKey(tableId, maxToken.minKeyBound())); - topology.forKey(new PartitionKey("ks", tableId, new BufferDecoratedKey(maxToken, ByteBufferUtil.bytes(0))).toUnseekable()); -// topology.forKey(new AccordKey.TokenKey(tableId, maxToken.maxKeyBound())); + List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), + range(-100, 100), + range(token(100), partitioner.getMaximumToken())); + Assert.assertEquals(partitioner.getMinimumToken(), ranges.get(0).left); + Assert.assertEquals(partitioner.getMaximumToken(), ranges.get(2).right); + ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); + + Topology topology = AccordTopologyUtils.createAccordTopology(metadata, ks -> true); + Topology expected = new Topology(1, + new Shard(AccordTopologyUtils.minRange("ks", ranges.get(0).right), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.range("ks", ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.range("ks", ranges.get(2)), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.maxRange("ks", ranges.get(2).right), NODE_LIST, NODE_SET)); + + Assert.assertEquals(expected, topology); + } + + @Test + public void wrapAroundRanges() + { + List> ranges = ImmutableList.of(range(-100, 0), + range(0, 100), + range(100, -100)); + + ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); + Topology topology = AccordTopologyUtils.createAccordTopology(metadata, ks -> true); + Topology expected = new Topology(1, + new Shard(AccordTopologyUtils.minRange("ks", ranges.get(0).left), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.range("ks", ranges.get(0)), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.range("ks", ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopologyUtils.maxRange("ks", ranges.get(2).left), NODE_LIST, NODE_SET)); + + Assert.assertEquals(expected, topology); } } diff --git a/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java b/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java index c35fbce0d401..7054b35066cf 100644 --- a/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EndpointMappingTest.java @@ -18,25 +18,24 @@ package org.apache.cassandra.service.accord; -import org.junit.Assert; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import accord.local.Node; -import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.utils.CassandraGenerators; +import org.assertj.core.api.Assertions; +import static org.quicktheories.QuickTheory.qt; + +import org.quicktheories.generators.SourceDSL; + public class EndpointMappingTest { - private static final Logger logger = LoggerFactory.getLogger(EndpointMappingTest.class); - @Test public void identityTest() throws Throwable { - InetAddressAndPort endpoint = InetAddressAndPort.getByName("127.0.0.1"); - Node.Id id = EndpointMapping.endpointToId(endpoint); - Assert.assertEquals(endpoint, EndpointMapping.idToEndpoint(id)); - logger.info("{} -> {}", endpoint, id); + qt().forAll(CassandraGenerators.INET_ADDRESS_AND_PORT_GEN, SourceDSL.integers().between(1, Integer.MAX_VALUE).map(Node.Id::new)).checkAssert((endpoint, id) -> { + EndpointMapping mapping = EndpointMapping.builder(1).add(endpoint, id).build(); + Assertions.assertThat(mapping.mappedEndpoint(id)).isEqualTo(endpoint); + }); } } diff --git a/test/unit/org/apache/cassandra/utils/MockFailureDetector.java b/test/unit/org/apache/cassandra/utils/MockFailureDetector.java new file mode 100644 index 000000000000..bee0fbb5b40c --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/MockFailureDetector.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import org.apache.cassandra.gms.IFailureDetectionEventListener; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; + +public class MockFailureDetector implements IFailureDetector +{ + public boolean isAlive = true; + + public boolean isAlive(InetAddressAndPort ep) + { + return isAlive; + } + + public void interpret(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + public void report(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + throw new UnsupportedOperationException(); + } + + public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + throw new UnsupportedOperationException(); + } + + public void remove(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + public void forceConviction(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } +} From a6bb08d92619fac6e5e41c38e096e8b18b723679 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Tue, 15 Aug 2023 17:08:53 +0100 Subject: [PATCH 062/340] Support state eviction (i.e. permit the state machine to erase transactions that are known to be applied across the cluster) patch by Benedict Elliott Smith; reviewed by Ariel Weisberg, Aleksey Yeschenko, and David Capwell for CASSANDRA-18883 Co-authored-by: Benedict Elliott Smith Co-authored-by: Ariel Weisberg Co-authored-by: Aleksey Yeschenko Co-authored-by: David Capwell --- modules/accord | 2 +- .../cassandra/cql3/UntypedResultSet.java | 158 +++--- .../db/compaction/CompactionIterator.java | 262 ++++++++- .../db/compaction/CompactionManager.java | 8 + .../repair/CassandraValidationIterator.java | 4 +- .../apache/cassandra/db/rows/ArrayCell.java | 6 + .../apache/cassandra/db/rows/BufferCell.java | 6 + .../org/apache/cassandra/db/rows/Cell.java | 2 + .../apache/cassandra/db/rows/NativeCell.java | 8 +- .../cassandra/dht/AccordBytesSplitter.java | 5 +- .../apache/cassandra/dht/AccordSplitter.java | 16 +- .../cassandra/dht/LocalPartitioner.java | 2 +- .../dht/OrderPreservingPartitioner.java | 5 +- .../cassandra/net/MessagingService.java | 1 + src/java/org/apache/cassandra/net/Verb.java | 41 +- .../service/accord/AccordCommandStore.java | 56 +- .../service/accord/AccordCommandStores.java | 4 +- .../accord/AccordConfigurationService.java | 75 ++- .../service/accord/AccordKeyspace.java | 416 +++++++++++--- .../accord/AccordLocalSyncNotifier.java | 207 ------- .../service/accord/AccordMessageSink.java | 87 +-- .../service/accord/AccordObjectSizes.java | 95 ++-- .../accord/AccordSafeCommandStore.java | 36 +- .../accord/AccordSafeCommandsForKey.java | 22 + .../service/accord/AccordService.java | 85 ++- .../service/accord/AccordStateCache.java | 9 + .../service/accord/AccordSyncPropagator.java | 352 ++++++++++++ .../service/accord/CommandsForRanges.java | 112 +++- .../service/accord/IAccordService.java | 21 +- .../cassandra/service/accord/TokenRange.java | 8 + .../service/accord/api/AccordAgent.java | 21 +- .../service/accord/api/AccordRoutableKey.java | 12 + .../service/accord/api/AccordRoutingKey.java | 77 ++- .../service/accord/api/PartitionKey.java | 7 +- .../service/accord/async/AsyncOperation.java | 2 + .../accord/serializers/ApplySerializers.java | 11 +- .../serializers/CheckStatusSerializers.java | 33 +- .../serializers/CommandSerializers.java | 8 +- .../serializers/CommandStoreSerializers.java | 116 +++- .../accord/serializers/CommitSerializers.java | 2 +- .../accord/serializers/EnumSerializer.java | 16 + .../accord/serializers/FetchSerializers.java | 25 +- .../InformHomeDurableSerializers.java | 6 +- .../serializers/InformOfTxnIdSerializers.java | 6 +- .../accord/serializers/KeySerializers.java | 102 +++- .../QueryDurableBeforeSerializers.java | 72 +++ .../serializers/ReadDataSerializers.java | 42 +- .../serializers/SetDurableSerializers.java | 107 ++++ .../serializers/WaitOnCommitSerializer.java | 8 +- .../serializers/WaitingOnSerializer.java | 118 ++++ .../service/accord/txn/TxnWrite.java | 4 +- .../utils/CollectionSerializers.java | 25 +- .../apache/cassandra/utils/btree/BTree.java | 8 + .../cassandra/distributed/impl/Instance.java | 2 +- .../test/CompactionDiskSpaceTest.java | 2 +- .../distributed/test/MessageFiltersTest.java | 3 +- .../test/accord/AccordBootstrapTest.java | 31 +- .../test/accord/AccordCQLTest.java | 5 +- .../simulator/asm/ClassTransformer.java | 3 +- .../test/AccordJournalSimulationTest.java | 2 +- .../AdaptingScheduledExecutorPlus.java | 257 +++++++++ .../org/apache/cassandra/cql3/CQLTester.java | 18 +- .../cassandra/cql3/UntypedResultSetTest.java | 106 ++++ .../db/RecoveryManagerFlushedTest.java | 1 + .../org/apache/cassandra/db/RowCacheTest.java | 3 - .../CompactionAccordIteratorsTest.java | 406 ++++++++++++++ .../cassandra/dht/AccordSplitterTest.java | 132 +++++ .../cassandra/index/CustomIndexTest.java | 41 +- .../cassandra/schema/ValidationTest.java | 18 +- .../accord/AccordCommandStoreTest.java | 42 +- .../service/accord/AccordCommandTest.java | 16 +- .../AccordConfigurationServiceTest.java | 20 +- .../service/accord/AccordMessageSinkTest.java | 104 +++- .../accord/AccordSyncPropagatorTest.java | 508 ++++++++++++++++++ .../service/accord/AccordTestUtils.java | 114 ++-- .../service/accord/CommandsForRangesTest.java | 113 ++++ .../accord/SimpleAccordEndpointMapper.java | 54 ++ .../service/accord/async/AsyncLoaderTest.java | 16 +- .../accord/async/AsyncOperationTest.java | 16 +- .../serializers/WaitingOnSerializerTest.java | 102 ++++ .../tcm/sequences/ProgressBarrierTest.java | 1 + .../cassandra/utils/AccordGenerators.java | 173 +++++- 82 files changed, 4384 insertions(+), 864 deletions(-) delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/QueryDurableBeforeSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java create mode 100644 test/unit/org/apache/cassandra/concurrent/AdaptingScheduledExecutorPlus.java create mode 100644 test/unit/org/apache/cassandra/cql3/UntypedResultSetTest.java create mode 100644 test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java create mode 100644 test/unit/org/apache/cassandra/dht/AccordSplitterTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java diff --git a/modules/accord b/modules/accord index 03f937175dbc..7c15f3a62039 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 03f937175dbcf04243bb0ac48b64746c1a07bc9c +Subproject commit 7c15f3a6203939bc6cb398e538df1ca3557cbe03 diff --git a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java index f82ff3eb835e..6d2848dad2d2 100644 --- a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java +++ b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java @@ -20,7 +20,6 @@ import java.net.InetAddress; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; @@ -28,23 +27,33 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import javax.annotation.Nonnull; import java.util.stream.Stream; import java.util.stream.StreamSupport; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import org.apache.cassandra.cql3.functions.types.LocalDate; import org.apache.cassandra.cql3.statements.SelectStatement; -import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ConsistencyLevel; -import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.ReadExecutionController; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.BooleanType; +import org.apache.cassandra.db.marshal.ByteType; +import org.apache.cassandra.db.marshal.DoubleType; +import org.apache.cassandra.db.marshal.InetAddressType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.ShortType; +import org.apache.cassandra.db.marshal.TimestampType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.db.marshal.VectorType; import org.apache.cassandra.db.partitions.PartitionIterator; -import org.apache.cassandra.db.rows.Cell; -import org.apache.cassandra.db.rows.ComplexColumnData; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.pager.QueryPager; import org.apache.cassandra.transport.Dispatcher; @@ -60,11 +69,6 @@ public static UntypedResultSet create(ResultSet rs) return new FromResultSet(rs); } - public static UntypedResultSet create(List> results) - { - return new FromResultList(results); - } - public static UntypedResultSet create(SelectStatement select, QueryPager pager, int pageSize) { return new FromPager(select, pager, pageSize); @@ -142,48 +146,6 @@ public List metadata() } } - private static class FromResultList extends UntypedResultSet - { - private final List> cqlRows; - - private FromResultList(List> cqlRows) - { - this.cqlRows = cqlRows; - } - - public int size() - { - return cqlRows.size(); - } - - public Row one() - { - if (cqlRows.size() != 1) - throw new IllegalStateException("One row required, " + cqlRows.size() + " found"); - return new Row(cqlRows.get(0)); - } - - public Iterator iterator() - { - return new AbstractIterator() - { - final Iterator> iter = cqlRows.iterator(); - - protected Row computeNext() - { - if (!iter.hasNext()) - return endOfData(); - return new Row(iter.next()); - } - }; - } - - public List metadata() - { - throw new UnsupportedOperationException(); - } - } - private static class FromPager extends UntypedResultSet { private final SelectStatement select; @@ -307,52 +269,18 @@ public List metadata() public static class Row { + @Nonnull private final Map data = new HashMap<>(); - private final List columns = new ArrayList<>(); + @Nonnull + private final List columns; - public Row(Map data) + public Row(@Nonnull List names, @Nonnull List columns) { - this.data.putAll(data); - } - - public Row(List names, List columns) - { - this.columns.addAll(names); + this.columns = ImmutableList.copyOf(names); for (int i = 0; i < names.size(); i++) data.put(names.get(i).name.toString(), columns.get(i)); } - public static Row fromInternalRow(TableMetadata metadata, DecoratedKey key, org.apache.cassandra.db.rows.Row row) - { - Map data = new HashMap<>(); - - ByteBuffer[] keyComponents = SelectStatement.getComponents(metadata, key); - for (ColumnMetadata def : metadata.partitionKeyColumns()) - data.put(def.name.toString(), keyComponents[def.position()]); - - Clustering clustering = row.clustering(); - for (ColumnMetadata def : metadata.clusteringColumns()) - data.put(def.name.toString(), clustering.bufferAt(def.position())); - - for (ColumnMetadata def : metadata.regularAndStaticColumns()) - { - if (def.isSimple()) - { - Cell cell = row.getCell(def); - if (cell != null) - data.put(def.name.toString(), cell.buffer()); - } - else - { - ComplexColumnData complexData = row.getComplexColumnData(def); - if (complexData != null) - data.put(def.name.toString(), ((CollectionType) def.type).serializeForNativeProtocol(complexData.iterator())); - } - } - - return new Row(data); - } - public boolean has(String column) { // Note that containsKey won't work because we may have null values @@ -509,7 +437,47 @@ public List getColumns() @Override public String toString() { - return data.toString(); + StringBuilder sb = new StringBuilder(); + toString(sb); + return sb.toString(); + } + + public void toString(StringBuilder sb) + { + for (int i = 0; i < columns.size(); i++) + { + ColumnSpecification cspec = columns.get(i); + ByteBuffer v = data.get(cspec.name.toString()); + if (i != 0) + sb.append(" | "); + if (v == null) + { + sb.append("null"); + } + else + { + sb.append(cspec.type.getString(v)); + } + } + } + } + + /** + * When UntypedResultSet is from a pager calling toString will consume the pager. + * toString shouldn't mutate the object and this of course breaks things waiting to consume + * the results so if you want to get a pretty printed string you need to call this method explicitly. + */ + @SuppressWarnings("unused") + public String toStringUnsafe() + { + StringBuilder sb = new StringBuilder(); + sb.append(metadata()).append('\n'); + for (Row row : this) + { + row.toString(sb); + sb.append('\n'); } + sb.append("---"); + return sb.toString(); } } diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index eb1e761493d0..85f524596963 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.db.compaction; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -24,10 +25,21 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.function.LongPredicate; +import javax.annotation.Nonnull; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Ordering; +import accord.local.Commands; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.local.SaveStatus; +import accord.local.Status.Durability; +import accord.primitives.Route; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.AbstractCompactionController; import org.apache.cassandra.db.ColumnFamilyStore; @@ -38,7 +50,6 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.SystemKeyspace; -import org.apache.cassandra.db.transform.DuplicateRowChecker; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.partitions.PurgeFunction; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; @@ -51,6 +62,7 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.db.rows.UnfilteredRowIterators; import org.apache.cassandra.db.rows.WrappingUnfilteredRowIterator; +import org.apache.cassandra.db.transform.DuplicateRowChecker; import org.apache.cassandra.db.transform.Transformation; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.transactions.CompactionTransaction; @@ -63,13 +75,25 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.paxos.PaxosRepairHistory; import org.apache.cassandra.service.paxos.uncommitted.PaxosRows; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; +import static accord.impl.CommandsForKey.NO_LAST_EXECUTED_HLC; +import static accord.local.Commands.Cleanup.TRUNCATE_WITH_OUTCOME; +import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows.truncateStaticRow; /** * Merge multiple iterators over the content of sstable into a "compacted" iterator. @@ -116,7 +140,7 @@ public class CompactionIterator extends CompactionInfo.Holder implements Unfilte public CompactionIterator(OperationType type, List scanners, AbstractCompactionController controller, long nowInSec, TimeUUID compactionId) { - this(type, scanners, controller, nowInSec, compactionId, ActiveCompactionsTracker.NOOP, null); + this(type, scanners, controller, nowInSec, compactionId, ActiveCompactionsTracker.NOOP, null, AccordService::instance); } public CompactionIterator(OperationType type, @@ -126,6 +150,19 @@ public CompactionIterator(OperationType type, TimeUUID compactionId, ActiveCompactionsTracker activeCompactions, TopPartitionTracker.Collector topPartitionCollector) + { + this(type, scanners, controller, nowInSec, compactionId, activeCompactions, topPartitionCollector, + AccordService::instance); + } + + public CompactionIterator(OperationType type, + List scanners, + AbstractCompactionController controller, + long nowInSec, + TimeUUID compactionId, + ActiveCompactionsTracker activeCompactions, + TopPartitionTracker.Collector topPartitionCollector, + @Nonnull Supplier accordService) { this.controller = controller; this.type = type; @@ -152,8 +189,11 @@ public CompactionIterator(OperationType type, merged = Transformation.apply(merged, new TopPartitionTracker.TombstoneCounter(topPartitionCollector, nowInSec)); merged = Transformation.apply(merged, new GarbageSkipper(controller)); Transformation purger = isPaxos(controller.cfs) && paxosStatePurging() != legacy - ? new PaxosPurger(nowInSec) - : new Purger(controller, nowInSec); + ? new PaxosPurger() + : isAccordCommands(controller.cfs) + ? new AccordCommandsPurger(accordService) + : isAccordCommandsForKey(controller.cfs) ? new AccordCommandsForKeyPurger(accordService) + : new Purger(controller, nowInSec); merged = Transformation.apply(merged, purger); merged = DuplicateRowChecker.duringCompaction(merged, type); compacted = Transformation.apply(merged, new AbortableUnfilteredPartitionTransformation(this)); @@ -634,19 +674,9 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition } } - private class PaxosPurger extends Transformation + private abstract class AbstractPurger extends Transformation { - - private final long nowInSec; - private final long paxosPurgeGraceMicros = DatabaseDescriptor.getPaxosPurgeGrace(MICROSECONDS); - private final Map tableIdToHistory = new HashMap<>(); - private Token currentToken; - private int compactedUnfiltered; - - private PaxosPurger(long nowInSec) - { - this.nowInSec = nowInSec; - } + int compactedUnfiltered; protected void onEmptyPartitionPostPurge(DecoratedKey key) { @@ -663,7 +693,7 @@ protected void updateProgress() @Override protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition) { - currentToken = partition.partitionKey().getToken(); + beginPartition(partition); UnfilteredRowIterator purged = Transformation.apply(partition, this); if (purged.isEmpty()) { @@ -675,10 +705,27 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition return purged; } + protected abstract void beginPartition(UnfilteredRowIterator partition); + } + + private class PaxosPurger extends AbstractPurger + { + private final long paxosPurgeGraceMicros = DatabaseDescriptor.getPaxosPurgeGrace(MICROSECONDS); + private final Map tableIdToHistory = new HashMap<>(); + + private Token token; + + @Override + protected void beginPartition(UnfilteredRowIterator partition) + { + this.token = partition.partitionKey().getToken(); + } + @Override protected Row applyToRow(Row row) { updateProgress(); + TableId tableId = PaxosRows.getTableId(row); switch (paxosStatePurging()) @@ -700,12 +747,176 @@ protected Row applyToRow(Row row) }); return history == null ? row : - row.purgeDataOlderThan(history.ballotForToken(currentToken).unixMicros() - paxosPurgeGraceMicros, false); + row.purgeDataOlderThan(history.ballotForToken(token).unixMicros() - paxosPurgeGraceMicros, false); } } } } + class AccordCommandsPurger extends AbstractPurger + { + final Int2ObjectHashMap redundantBefores; + final DurableBefore durableBefore; + + int storeId; + TxnId txnId; + + AccordCommandsPurger(Supplier accordService) + { + Pair, DurableBefore> redundantBeforesAndDurableBefore = accordService.get().getRedundantBeforesAndDurableBefore(); + this.redundantBefores = redundantBeforesAndDurableBefore.left; + this.durableBefore = redundantBeforesAndDurableBefore.right; + } + + protected void beginPartition(UnfilteredRowIterator partition) + { + ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); + storeId = CommandRows.getStoreId(partitionKeyComponents); + txnId = CommandRows.getTxnId(partitionKeyComponents); + } + + @Override + protected Row applyToRow(Row row) + { + updateProgress(); + + RedundantBefore redundantBefore = redundantBefores.get(storeId); + // TODO (expected): if the store has been retired, this should return null + if (redundantBefore == null) + return row; + + Timestamp executeAt = CommandRows.getExecuteAt(row); + Durability durability = CommandRows.getDurability(row); + SaveStatus saveStatus = CommandRows.getStatus(row); + Route route = CommandRows.getRoute(row); + + Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, durability, executeAt, route, redundantBefore, durableBefore); + switch (cleanup) + { + default: throw new AssertionError(String.format("Unexpected cleanup task: %s", cleanup)); + case ERASE: + return null; + + case TRUNCATE_WITH_OUTCOME: + if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) + return row; + + case TRUNCATE: + if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) + return row; + return truncatedApply(cleanup.appliesIfNot, row, nowInSec, cleanup == TRUNCATE_WITH_OUTCOME); + + case NO: + return row; + } + } + + @Override + protected Row applyToStatic(Row row) + { + checkState(row.isStatic() && row.isEmpty()); + return row; + } + } + + class AccordCommandsForKeyPurger extends AbstractPurger + { + final Int2ObjectHashMap redundantBefores; + int storeId; + PartitionKey partitionKey; + + AccordCommandsForKeyPurger(Supplier accordService) + { + this.redundantBefores = accordService.get().getRedundantBeforesAndDurableBefore().left; + } + + protected void beginPartition(UnfilteredRowIterator partition) + { + ByteBuffer[] partitionKeyComponents = CommandsForKeyRows.splitPartitionKey(partition.partitionKey()); + storeId = CommandsForKeyRows.getStoreId(partitionKeyComponents); + partitionKey = CommandsForKeyRows.getKey(partitionKeyComponents); + } + + @Override + protected Row applyToStatic(Row row) + { + updateProgress(); + + RedundantBefore redundantBefore = redundantBefores.get(storeId); + // TODO (expected): if the store has been retired, this should return null + if (redundantBefore == null) + return row; + + RedundantBefore.Entry redundantBeforeEntry = redundantBefore.get(partitionKey.toUnseekable()); + if (redundantBeforeEntry == null) + return row; + + TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; + + boolean updatedColumn = false; + Timestamp max_timestamp = CommandsForKeyRows.getMaxTimestamp(row); + if (max_timestamp.compareTo(redundantBeforeTxnId) < 0) + { + max_timestamp = Timestamp.NONE; + updatedColumn = true; + } + + Timestamp last_execute = CommandsForKeyRows.getLastExecutedTimestamp(row); + if (last_execute.compareTo(redundantBeforeTxnId) < 0) + { + last_execute = Timestamp.NONE; + updatedColumn = true; + } + + Timestamp last_write = CommandsForKeyRows.getLastWriteTimestamp(row); + if (last_write.compareTo(redundantBeforeTxnId) < 0) + { + last_write = Timestamp.NONE; + updatedColumn = true; + } + + long last_execute_micros = CommandsForKeyRows.getLastExecutedMicros(row); + if (last_execute_micros < redundantBeforeTxnId.hlc()) + { + last_execute_micros = NO_LAST_EXECUTED_HLC; + updatedColumn = true; + } + + if (max_timestamp == Timestamp.NONE && + last_execute == Timestamp.NONE && + last_write == Timestamp.NONE && + last_execute_micros == NO_LAST_EXECUTED_HLC) + return null; + + if (updatedColumn) + return truncateStaticRow(nowInSec, row, last_execute_micros, last_execute, last_write, max_timestamp); + + return row; + } + + @Override + protected Row applyToRow(Row row) + { + updateProgress(); + + RedundantBefore redundantBefore = redundantBefores.get(storeId); + // TODO (expected): if the store has been retired, this should return null + if (redundantBefore == null) + return row; + + RedundantBefore.Entry redundantBeforeEntry = redundantBefore.get(partitionKey.toUnseekable()); + if (redundantBeforeEntry == null) + return row; + + TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; + Timestamp timestamp = CommandsForKeyRows.getTimestamp(row); + if (timestamp.compareTo(redundantBeforeTxnId) < 0) + return null; + + return row; + } + } + private static class AbortableUnfilteredPartitionTransformation extends Transformation { private final AbortableUnfilteredRowTransformation abortableIter; @@ -745,4 +956,19 @@ private static boolean isPaxos(ColumnFamilyStore cfs) { return cfs.name.equals(SystemKeyspace.PAXOS) && cfs.getKeyspaceName().equals(SchemaConstants.SYSTEM_KEYSPACE_NAME); } + + private static boolean isAccordCommands(ColumnFamilyStore cfs) + { + return cfs.name.equals(AccordKeyspace.COMMANDS) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + } + + private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs) + { + return cfs.name.equals(AccordKeyspace.COMMANDS_FOR_KEY) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + } + + private static boolean isAccordCommandsOrAccordCommandsForKey(ColumnFamilyStore cfs) + { + return isAccordCommands(cfs) || isAccordCommandsForKey(cfs); + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java index 1a3b2e705b3b..7e052cd8c412 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java @@ -83,6 +83,8 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.db.view.ViewBuilderTask; import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.ConfigurationException; @@ -102,6 +104,7 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.MetaStrategy; import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.locator.Replica; import org.apache.cassandra.metrics.CompactionMetrics; import org.apache.cassandra.metrics.TableMetrics; import org.apache.cassandra.repair.NoSuchRepairSessionException; @@ -768,6 +771,11 @@ public AllSSTableOpStatus performCleanup(final ColumnFamilyStore cfStore, int jo DataPlacement placement = cm.placements.get(keyspace.getMetadata().params.replication); InetAddressAndPort local = FBUtilities.getBroadcastAddressAndPort(); RangesAtEndpoint localWrites = placement.writes.byEndpoint().get(local); + // TODO review: Hack to get local partitioner not to fail out because it's handled very poorly with data placements + IPartitioner partitioner = cfStore.getPartitioner(); + if (partitioner.getClass() == LocalPartitioner.class) + localWrites = RangesAtEndpoint.of(Replica.fullReplica(local, new Range<>(partitioner.getMinimumToken(), partitioner.getMinimumToken()))); + final Set> allRanges = new HashSet<>(localWrites.ranges()); final Set> transientRanges = new HashSet<>(localWrites.onlyTransient().ranges()); final Set> fullRanges = new HashSet<>(localWrites.onlyFull().ranges()); diff --git a/src/java/org/apache/cassandra/db/repair/CassandraValidationIterator.java b/src/java/org/apache/cassandra/db/repair/CassandraValidationIterator.java index 5d4d88ed9efc..a46cbe58ca92 100644 --- a/src/java/org/apache/cassandra/db/repair/CassandraValidationIterator.java +++ b/src/java/org/apache/cassandra/db/repair/CassandraValidationIterator.java @@ -52,11 +52,11 @@ import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.metrics.TopPartitionTracker; +import org.apache.cassandra.repair.NoSuchRepairSessionException; import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.repair.ValidationPartitionIterator; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ActiveRepairService; -import org.apache.cassandra.repair.NoSuchRepairSessionException; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.service.snapshot.TableSnapshot; import org.apache.cassandra.utils.TimeUUID; @@ -161,7 +161,6 @@ else if (isIncremental) } private final ColumnFamilyStore cfs; - private final SharedContext ctx; private final Refs sstables; private final String snapshotName; private final boolean isGlobalSnapshotValidation; @@ -179,7 +178,6 @@ else if (isIncremental) public CassandraValidationIterator(ColumnFamilyStore cfs, SharedContext ctx, Collection> ranges, TimeUUID parentId, TimeUUID sessionID, boolean isIncremental, long nowInSec, boolean dontPurgeTombstones, TopPartitionTracker.Collector topPartitionCollector) throws IOException, NoSuchRepairSessionException { this.cfs = cfs; - this.ctx = ctx; isGlobalSnapshotValidation = SnapshotManager.instance.exists(cfs.getKeyspaceName(), cfs.getTableName(), parentId.toString()); if (isGlobalSnapshotValidation) diff --git a/src/java/org/apache/cassandra/db/rows/ArrayCell.java b/src/java/org/apache/cassandra/db/rows/ArrayCell.java index 07823d2be515..e9b1a983f069 100644 --- a/src/java/org/apache/cassandra/db/rows/ArrayCell.java +++ b/src/java/org/apache/cassandra/db/rows/ArrayCell.java @@ -94,6 +94,12 @@ public Cell withUpdatedValue(ByteBuffer newValue) return new ArrayCell(column, timestamp, ttl, localDeletionTimeUnsignedInteger, ByteBufferUtil.getArray(newValue), path); } + @Override + public Cell withUpdatedTimestamp(long newTimestamp) + { + return new ArrayCell(column, newTimestamp, ttl, localDeletionTimeUnsignedInteger, value, path); + } + public Cell withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime) { return new ArrayCell(column, newTimestamp, ttl, newLocalDeletionTime, value, path); diff --git a/src/java/org/apache/cassandra/db/rows/BufferCell.java b/src/java/org/apache/cassandra/db/rows/BufferCell.java index d6918533e868..68496afa69c8 100644 --- a/src/java/org/apache/cassandra/db/rows/BufferCell.java +++ b/src/java/org/apache/cassandra/db/rows/BufferCell.java @@ -127,6 +127,12 @@ public Cell withUpdatedValue(ByteBuffer newValue) return new BufferCell(column, timestamp, ttl, localDeletionTimeUnsignedInteger, newValue, path); } + @Override + public Cell withUpdatedTimestamp(long newTimestamp) + { + return new BufferCell(column, newTimestamp, ttl, localDeletionTimeUnsignedInteger, value, path); + } + public Cell withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime) { return new BufferCell(column, newTimestamp, ttl, newLocalDeletionTime, value, path); diff --git a/src/java/org/apache/cassandra/db/rows/Cell.java b/src/java/org/apache/cassandra/db/rows/Cell.java index d60fdda5a012..3ddfeae39a1f 100644 --- a/src/java/org/apache/cassandra/db/rows/Cell.java +++ b/src/java/org/apache/cassandra/db/rows/Cell.java @@ -184,6 +184,8 @@ public long localDeletionTime() public abstract Cell withUpdatedValue(ByteBuffer newValue); + public abstract Cell withUpdatedTimestamp(long newTimestamp); + public abstract Cell withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime); /** diff --git a/src/java/org/apache/cassandra/db/rows/NativeCell.java b/src/java/org/apache/cassandra/db/rows/NativeCell.java index b0613f33f6da..b8eaa557ce39 100644 --- a/src/java/org/apache/cassandra/db/rows/NativeCell.java +++ b/src/java/org/apache/cassandra/db/rows/NativeCell.java @@ -165,6 +165,12 @@ public Cell withUpdatedValue(ByteBuffer newValue) throw new UnsupportedOperationException(); } + @Override + public Cell withUpdatedTimestamp(long newTimestamp) + { + return new BufferCell(column, newTimestamp, ttl(), localDeletionTime(), value(), path()); + } + public Cell withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime) { return new BufferCell(column, newTimestamp, ttl(), newLocalDeletionTime, value(), path()); @@ -175,7 +181,7 @@ public Cell withUpdatedColumn(ColumnMetadata column) return new BufferCell(column, timestamp(), ttl(), localDeletionTimeAsUnsignedInt(), value(), path()); } - public Cell withSkippedValue() + public Cell withSkippedValue() { return new BufferCell(column, timestamp(), ttl(), localDeletionTimeAsUnsignedInt(), ByteBufferUtil.EMPTY_BYTE_BUFFER, path()); } diff --git a/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java b/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java index c27bc4359927..89602c55b74a 100644 --- a/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java +++ b/src/java/org/apache/cassandra/dht/AccordBytesSplitter.java @@ -79,7 +79,10 @@ Token tokenForValue(BigInteger value) private static int byteLength(RoutingKey routingKey) { - return byteLength(((AccordRoutingKey) routingKey).token()); + AccordRoutingKey accordKey = (AccordRoutingKey) routingKey; + if (accordKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return 0; + return byteLength(accordKey.token()); } private static int byteLength(Token token) diff --git a/src/java/org/apache/cassandra/dht/AccordSplitter.java b/src/java/org/apache/cassandra/dht/AccordSplitter.java index 232a47d4542f..c5971dc89fe0 100644 --- a/src/java/org/apache/cassandra/dht/AccordSplitter.java +++ b/src/java/org/apache/cassandra/dht/AccordSplitter.java @@ -56,7 +56,7 @@ public accord.primitives.Range subRange(accord.primitives.Range range, BigIntege String keyspace = startBound.keyspace(); return new TokenRange(startOffset.equals(ZERO) ? startBound : new TokenKey(keyspace, tokenForValue(start.add(startOffset))), - endOffset.equals(sizeOfRange) ? endBound : new TokenKey(keyspace, tokenForValue(start.add(endOffset)))); + endOffset.compareTo(sizeOfRange) >= 0 ? endBound : new TokenKey(keyspace, tokenForValue(start.add(endOffset)))); } @Override @@ -65,6 +65,13 @@ public BigInteger zero() return ZERO; } + @Override + public BigInteger valueOf(int v) + { + return BigInteger.valueOf(v); + } + + @Override public BigInteger add(BigInteger a, BigInteger b) { @@ -83,6 +90,13 @@ public BigInteger divide(BigInteger a, int i) return a.divide(BigInteger.valueOf(i)); } + @Override + public BigInteger divide(BigInteger a, BigInteger i) + { + return a.divide(i); + } + + @Override public BigInteger multiply(BigInteger a, int i) { diff --git a/src/java/org/apache/cassandra/dht/LocalPartitioner.java b/src/java/org/apache/cassandra/dht/LocalPartitioner.java index e485ccba597a..b0b8d558ad93 100644 --- a/src/java/org/apache/cassandra/dht/LocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/LocalPartitioner.java @@ -164,7 +164,7 @@ public String toString() public int compareTo(Token o) { // todo (tcm); seems partitioner got mutated on alter type (for example) before tcm, now we create a new one - not sure its enough just making sure that its the same type of partitioner - assert o.getPartitioner().getClass().equals(getPartitioner().getClass()); + assert o.getPartitioner().getClass().equals(getPartitioner().getClass()) : String.format("partitioners do not match; %s != %s", getPartitioner(), o.getPartitioner()); // assert getPartitioner() == o.getPartitioner() : String.format("partitioners do not match; %s != %s", getPartitioner(), o.getPartitioner()); return comparator.compare(token, ((LocalToken) o).token); } diff --git a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java index 227d043382bc..d2419049dbda 100644 --- a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java +++ b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java @@ -358,7 +358,10 @@ BigInteger maximumValue() private static int charLength(RoutingKey routingKey) { - return charLength(((AccordRoutingKey) routingKey).token()); + AccordRoutingKey accordKey = (AccordRoutingKey) routingKey; + if (accordKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return 0; + return charLength(accordKey.token()); } private static int charLength(Token token) diff --git a/src/java/org/apache/cassandra/net/MessagingService.java b/src/java/org/apache/cassandra/net/MessagingService.java index bb672e5b0f20..9215b7c97546 100644 --- a/src/java/org/apache/cassandra/net/MessagingService.java +++ b/src/java/org/apache/cassandra/net/MessagingService.java @@ -480,6 +480,7 @@ public void send(Message message, InetAddressAndPort to) * @param message messages to be sent. * @param response */ + @Override public void respond(V response, Message message) { send(message.responseWith(response), message.respondTo()); diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 537b857fce1f..be2d86ad59cb 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -77,9 +77,10 @@ import org.apache.cassandra.schema.SchemaPushVerbHandler; import org.apache.cassandra.schema.SchemaVersionVerbHandler; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; +import org.apache.cassandra.service.accord.AccordSyncPropagator; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; -import org.apache.cassandra.service.accord.AccordLocalSyncNotifier; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; import org.apache.cassandra.service.accord.serializers.CheckStatusSerializers; import org.apache.cassandra.service.accord.serializers.CommitSerializers; @@ -90,8 +91,10 @@ import org.apache.cassandra.service.accord.serializers.InformHomeDurableSerializers; import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; +import org.apache.cassandra.service.accord.serializers.QueryDurableBeforeSerializers; import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; import org.apache.cassandra.service.accord.serializers.RecoverySerializers; +import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.service.accord.serializers.WaitOnCommitSerializer; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.Commit.Agreed; @@ -273,10 +276,10 @@ public enum Verb ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), - ACCORD_READ_RSP (128, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), - ACCORD_READ_REQ (127, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (125, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (126, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), + ACCORD_READ_RSP (126, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), + ACCORD_READ_REQ (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), ACCORD_BEGIN_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), @@ -285,18 +288,22 @@ public enum Verb ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_ON_COMMIT_RSP ), - ACCORD_INFORM_OF_TXN_REQ (137, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_HOME_DURABLE_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CHECK_STATUS_REQ (140, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), - ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_DEPS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), - ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout, REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (144, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_FETCH_DATA_RSP ), - ACCORD_SYNC_NOTIFY_RSP (147, P2, writeTimeout, REQUEST_RESPONSE, () -> AccordLocalSyncNotifier.Acknowledgement.serializer, RESPONSE_HANDLER ), - ACCORD_SYNC_NOTIFY_REQ (146, P2, writeTimeout, IMMEDIATE, () -> AccordLocalSyncNotifier.Notification.serializer, () -> AccordLocalSyncNotifier.verbHandler, ACCORD_SYNC_NOTIFY_RSP), - + ACCORD_WAIT_ON_APPLY_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitOnApply, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), + ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_CHECK_STATUS_RSP (142, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), + ACCORD_CHECK_STATUS_REQ (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), + ACCORD_GET_DEPS_RSP (144, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_DEPS_REQ (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), + ACCORD_FETCH_DATA_RSP (146, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_REQ (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_FETCH_DATA_RSP ), + ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,() -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_QUERY_DURABLE_BEFORE_RSP (150, P2, writeTimeout, REQUEST_RESPONSE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), + ACCORD_QUERY_DURABLE_BEFORE_REQ (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,() -> AccordService.instance().verbHandler(), ACCORD_QUERY_DURABLE_BEFORE_RSP), + + ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), // generic failure response FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailureReason.serializer, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index e19f898915c2..727fee7baf54 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -46,10 +46,10 @@ import accord.impl.CommandsForKey; import accord.local.Command; import accord.local.CommandStore; -import accord.local.CommandStores.RangesForEpoch; -import accord.local.CommandStores.RangesForEpochHolder; +import accord.local.DurableBefore; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.RedundantBefore; import accord.local.SafeCommandStore; import accord.local.SaveStatus; import accord.primitives.AbstractKeys; @@ -117,9 +117,9 @@ public AccordCommandStore(int id, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, - RangesForEpochHolder rangesForEpoch) + EpochUpdateHolder epochUpdateHolder) { - this(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, Stage.READ.executor(), Stage.MUTATION.executor()); + this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, Stage.READ.executor(), Stage.MUTATION.executor()); } @VisibleForTesting @@ -128,11 +128,11 @@ public AccordCommandStore(int id, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, - RangesForEpochHolder rangesForEpoch, + EpochUpdateHolder epochUpdateHolder, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { - super(id, time, agent, dataStore, progressLogFactory, rangesForEpoch); + super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); loggingId = String.format("[%s]", id); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); threadId = getThreadId(this.executor); @@ -151,10 +151,14 @@ public AccordCommandStore(int id, this::loadCommandsForKey, this::saveCommandsForKey, AccordObjectSizes::commandsForKey); - AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, bootstrapBeganAt, safeToRead) -> { + AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { if (rejectBefore != null) super.setRejectBefore(rejectBefore); + if (durableBefore != null) + super.setDurableBefore(durableBefore); + if (redundantBefore != null) + super.setRedundantBefore(redundantBefore); if (bootstrapBeganAt != null) super.setBootstrapBeganAt(bootstrapBeganAt); if (safeToRead != null) @@ -347,11 +351,6 @@ ProgressLog progressLog() return progressLog; } - RangesForEpoch ranges() - { - return rangesForEpochHolder.get(); - } - @Override public AsyncChain execute(PreLoadContext preLoadContext, Consumer consumer) { @@ -385,6 +384,11 @@ public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, return current; } + public boolean hasSafeStore() + { + return current != null; + } + public void completeOperation(AccordSafeCommandStore store) { Invariants.checkState(current == store); @@ -392,14 +396,14 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) { keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); switch (keysOrRanges.domain()) { case Key: { - AbstractKeys keys = (AbstractKeys) keysOrRanges; + AbstractKeys keys = (AbstractKeys) keysOrRanges; for (CommandTimeseriesHolder summary : commandsForRanges.search(keys)) { accumulate = map.apply(summary, accumulate); @@ -410,7 +414,7 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction ranges = (AbstractRanges) keysOrRanges; + AbstractRanges ranges = (AbstractRanges) keysOrRanges; for (Range range : ranges) { CommandTimeseriesHolder summary = commandsForRanges.search(range); @@ -469,4 +473,26 @@ protected void setSafeToRead(NavigableMap newSafeToRead) // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases AccordKeyspace.updateSafeToRead(this, newSafeToRead); } + + @Override + public void setDurableBefore(DurableBefore newDurableBefore) + { + super.setDurableBefore(newDurableBefore); + AccordKeyspace.updateDurableBefore(this, newDurableBefore); + } + + @Override + protected void setRedundantBefore(RedundantBefore newRedundantBefore) + { + super.setRedundantBefore(newRedundantBefore); + // TODO (required): this needs to be synchronous, or at least needs to take effect before we rely upon it + AccordKeyspace.updateRedundantBefore(this, newRedundantBefore); + } + + @Override + public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ranges ranges) + { + super.markShardDurable(safeStore, globalSyncId, ranges); + commandsForRanges.prune(globalSyncId, ranges); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 8e9c5beac201..e6678a9a153a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -59,7 +59,7 @@ static Factory factory(AccordJournal journal) @Override protected void mapReduceConsume( PreLoadContext context, - Routables keys, + Routables keys, long minEpoch, long maxEpoch, MapReduceConsume mapReduceConsume) @@ -73,7 +73,7 @@ protected void mapReduceConsume( private void mapReduceConsumeDurable( PreLoadContext context, - Routables keys, + Routables keys, long minEpoch, long maxEpoch, MapReduceConsume mapReduceConsume) diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 1a7a512d091d..df77cae97583 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -21,21 +21,20 @@ import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.impl.AbstractConfigurationService; import accord.local.Node; +import accord.primitives.Ranges; import accord.topology.Topology; import accord.utils.Invariants; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; -import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.gms.IFailureDetector; @@ -50,18 +49,17 @@ import org.apache.cassandra.utils.concurrent.Future; // TODO: listen to FailureDetector and rearrange fast path accordingly -public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordLocalSyncNotifier.Listener +public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordSyncPropagator.Listener { private static final Logger logger = LoggerFactory.getLogger(AccordConfigurationService.class); + private final AccordSyncPropagator syncPropagator; - private final MessageDelivery messagingService; - private final IFailureDetector failureDetector; private EpochDiskState diskState = EpochDiskState.EMPTY; + private enum State { INITIALIZED, LOADING, STARTED } private State state = State.INITIALIZED; private volatile EndpointMapping mapping = EndpointMapping.EMPTY; - private final Long2ObjectHashMap syncNotifiers = new Long2ObjectHashMap<>(); public enum SyncStatus { NOT_STARTED, NOTIFYING, COMPLETED } @@ -115,8 +113,7 @@ protected EpochState createEpochState(long epoch) public AccordConfigurationService(Node.Id node, MessageDelivery messagingService, IFailureDetector failureDetector) { super(node); - this.messagingService = messagingService; - this.failureDetector = failureDetector; + this.syncPropagator = new AccordSyncPropagator(localId, this, messagingService, failureDetector, ScheduledExecutors.scheduledTasks, this); } public AccordConfigurationService(Node.Id node) @@ -135,24 +132,26 @@ public synchronized void start() Invariants.checkState(state == State.INITIALIZED, "Expected state to be INITIALIZED but was %s", state); state = State.LOADING; updateMapping(ClusterMetadata.current()); - diskState = AccordKeyspace.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete) -> { + diskState = AccordKeyspace.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { if (topology != null) reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); getOrCreateEpochState(epoch).setSyncStatus(syncStatus); if (syncStatus == SyncStatus.NOTIFYING) - syncNotifiers.put(epoch, new AccordLocalSyncNotifier(epoch, localId, pendingSyncNotify, this, messagingService, failureDetector, this)); + syncPropagator.reportSyncComplete(epoch, pendingSyncNotify, localId); - remoteSyncComplete.forEach(id -> remoteSyncComplete(id, epoch)); + remoteSyncComplete.forEach(id -> receiveRemoteSyncComplete(id, epoch)); + // TODO (now): disk doesn't get updated until we see our own notification, so there is an edge case where this instance notified others and fails in the middle, but Apply was already sent! This could leave partial closed/redudant accross the cluster + receiveClosed(closed, epoch); + receiveRedundant(redundant, epoch); })); - syncNotifiers.values().forEach(AccordLocalSyncNotifier::start); state = State.STARTED; } @Override public Node.Id mappedId(InetAddressAndPort endpoint) { - return Invariants.nonNull(mapping.mappedId(endpoint)); + return Invariants.nonNull(mapping.mappedId(endpoint), "Unable to map address %s to a Node.Id", endpoint); } @Override @@ -184,7 +183,6 @@ private void reportMetadata(ClusterMetadata metadata) Stage.MISC.submit(() -> { synchronized (AccordConfigurationService.this) { - logger.info("Reporting metadata for epoch {}", metadata.epoch.getEpoch()); updateMapping(metadata); reportTopology(AccordTopologyUtils.createAccordTopology(metadata, this::isAccordManagedKeyspace)); } @@ -229,18 +227,10 @@ protected synchronized void localSyncComplete(Topology topology) if (epochState.syncStatus != SyncStatus.NOT_STARTED) return; - Set pendingNotification = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); - AccordLocalSyncNotifier notifier = new AccordLocalSyncNotifier(epoch, localId, pendingNotification, this, messagingService, failureDetector, this); - syncNotifiers.put(epoch, notifier); - diskState = AccordKeyspace.setNotifyingLocalSync(epoch, pendingNotification, diskState); + Set notify = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); + diskState = AccordKeyspace.setNotifyingLocalSync(epoch, notify, diskState); epochState.setSyncStatus(SyncStatus.NOTIFYING); - notifier.start(); - } - - @Override - public long currentEpoch() - { - return super.currentEpoch(); + syncPropagator.reportSyncComplete(epoch, notify, localId); } @Override @@ -268,12 +258,43 @@ protected synchronized void topologyUpdatePreListenerNotify(Topology topology) } @Override - protected void remoteSyncCompletePreListenerNotify(Node.Id node, long epoch) + protected void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epoch) { if (state == State.STARTED) diskState = AccordKeyspace.markRemoteTopologySync(node, epoch, diskState); } + @Override + public synchronized void reportEpochClosed(Ranges ranges, long epoch) + { + Invariants.checkState(state == State.STARTED); + Topology topology = getTopologyForEpoch(epoch); + syncPropagator.reportClosed(epoch, topology.nodes(), ranges); + } + + @Override + public synchronized void reportEpochRedundant(Ranges ranges, long epoch) + { + Invariants.checkState(state == State.STARTED); + // TODO (expected): ensure we aren't fetching a truncated epoch; otherwise this should be non-null + Topology topology = getTopologyForEpoch(epoch); + syncPropagator.reportRedundant(epoch, topology.nodes(), ranges); + } + + @Override + public synchronized void receiveClosed(Ranges ranges, long epoch) + { + diskState = AccordKeyspace.markClosed(ranges, epoch, diskState); + super.receiveClosed(ranges, epoch); + } + + @Override + public synchronized void receiveRedundant(Ranges ranges, long epoch) + { + diskState = AccordKeyspace.markClosed(ranges, epoch, diskState); + super.receiveRedundant(ranges, epoch); + } + @Override protected void truncateTopologiesPreListenerNotify(long epoch) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index b3bf0265d6a7..d845c74c53cb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -30,7 +30,6 @@ import java.util.Map; import java.util.NavigableMap; import java.util.Set; -import java.util.TreeMap; import java.util.concurrent.Executor; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -38,11 +37,10 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; - import javax.annotation.Nullable; -import com.google.common.collect.ImmutableSet; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; @@ -52,16 +50,20 @@ import org.slf4j.LoggerFactory; import accord.api.Result; -import accord.impl.CommandsForKey; import accord.impl.CommandTimeseries; +import accord.impl.CommandsForKey; import accord.local.Command; +import accord.local.Command.WaitingOn; import accord.local.CommandStore; import accord.local.CommonAttributes; +import accord.local.DurableBefore; import accord.local.Listeners; import accord.local.Node; +import accord.local.RedundantBefore; import accord.local.SaveStatus; import accord.local.Status; import accord.primitives.Ballot; +import accord.primitives.Deps; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Ranges; @@ -91,6 +93,7 @@ import org.apache.cassandra.db.DeletionTime; import org.apache.cassandra.db.IMutation; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.RegularAndStaticColumns; @@ -112,6 +115,7 @@ import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.BufferCell; import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.Row; @@ -147,9 +151,11 @@ import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static java.lang.String.format; @@ -158,6 +164,7 @@ import static org.apache.cassandra.db.rows.BufferCell.live; import static org.apache.cassandra.db.rows.BufferCell.tombstone; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.service.accord.serializers.KeySerializers.blobMapToRanges; import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER; import static org.apache.cassandra.utils.ByteBufferUtil.bytes; @@ -232,8 +239,6 @@ static TokenType valueOf(Token token) + "domain int," // this is stored as part of txn_id, used currently for more cheaper scans of the table + format("txn_id %s,", TIMESTAMP_TUPLE) + "status int," - + "home_key blob," - + "progress_key blob," + "route blob," + "durability int," + "txn blob," @@ -243,8 +248,7 @@ static TokenType valueOf(Token token) + "dependencies blob," + "writes blob," + "result blob," - + format("waiting_on_commit set<%s>,", TIMESTAMP_TUPLE) - + format("waiting_on_apply map<%s, blob>,", TIMESTAMP_TUPLE) + + "waiting_on blob," + "listeners set, " + "PRIMARY KEY((store_id, domain, txn_id))" + ')') @@ -263,6 +267,8 @@ private static class LocalVersionedSerializers static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); + static final LocalVersionedSerializer durableBefore = localSerializer(CommandStoreSerializers.durableBefore); + static final LocalVersionedSerializer redundantBefore = localSerializer(CommandStoreSerializers.redundantBefore); static final LocalVersionedSerializer> bootstrapBeganAt = localSerializer(CommandStoreSerializers.bootstrapBeganAt); static final LocalVersionedSerializer> safeToRead = localSerializer(CommandStoreSerializers.safeToRead); @@ -283,9 +289,10 @@ private static ColumnMetadata getColumn(TableMetadata metadata, String name) private static class CommandsColumns { static final ClusteringComparator keyComparator = Commands.partitionKeyAsClusteringComparator(); + static final CompositeType partitionKeyType = (CompositeType) Commands.partitionKeyType; + static final ColumnMetadata txn_id = getColumn(Commands, "txn_id"); + static final ColumnMetadata store_id = getColumn(Commands, "store_id"); static final ColumnMetadata status = getColumn(Commands, "status"); - static final ColumnMetadata home_key = getColumn(Commands, "home_key"); - static final ColumnMetadata progress_key = getColumn(Commands, "progress_key"); static final ColumnMetadata route = getColumn(Commands, "route"); static final ColumnMetadata durability = getColumn(Commands, "durability"); static final ColumnMetadata txn = getColumn(Commands, "txn"); @@ -295,9 +302,102 @@ private static class CommandsColumns static final ColumnMetadata dependencies = getColumn(Commands, "dependencies"); static final ColumnMetadata writes = getColumn(Commands, "writes"); static final ColumnMetadata result = getColumn(Commands, "result"); - static final ColumnMetadata waiting_on_commit = getColumn(Commands, "waiting_on_commit"); - static final ColumnMetadata waiting_on_apply = getColumn(Commands, "waiting_on_apply"); + static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); + + static ColumnMetadata[][] TRUNCATE_FIELDS = new ColumnMetadata[][] { + new ColumnMetadata[] { durability, execute_at, route, status }, + new ColumnMetadata[] { durability, execute_at, result, route, status, writes }, + }; + + static + { + for (ColumnMetadata[] cds : TRUNCATE_FIELDS) + { + for (int i = 1 ; i < cds.length ; ++i) + Invariants.checkState(cds[i - 1].compareTo(cds[i]) < 0); + } + } + + } + + public static class CommandRows extends CommandsColumns + { + public static ByteBuffer[] splitPartitionKey(DecoratedKey key) + { + return partitionKeyType.split(key.getKey()); + } + + public static int getStoreId(ByteBuffer[] partitionKeyComponents) + { + return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); + } + + public static TxnId getTxnId(ByteBuffer[] partitionKeyComponents) + { + return deserializeTimestampOrNull(partitionKeyComponents[txn_id.position()], ByteBufferAccessor.instance, TxnId::fromBits); + } + + public static Timestamp getExecuteAt(Row row) + { + Cell cell = row.getCell(execute_at); + return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); + } + + public static SaveStatus getStatus(Row row) + { + Cell cell = row.getCell(status); + int ordinal = cell.accessor().getInt(cell.value(), 0); + return CommandSerializers.saveStatus.forOrdinal(ordinal); + } + + public static Status.Durability getDurability(Row row) + { + Cell cell = row.getCell(durability); + int ordinal = cell.accessor().getInt(cell.value(), 0); + return CommandSerializers.durability.forOrdinal(ordinal); + } + + public static Route getRoute(Row row) + { + Cell cell = row.getCell(route); + try + { + return deserializeOrNull(cell.buffer(), LocalVersionedSerializers.route); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, boolean withOutcome) + { + long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); + long newTimestamp = oldTimestamp + 1; + + ColumnMetadata[] fields = TRUNCATE_FIELDS[withOutcome ? 1 : 0]; + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(fields.length); + for (int i = 0 ; i < fields.length ; ++i) + { + if (fields[i] == status) newLeaf[i] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); + else newLeaf[i] = row.getCell(fields[i]).withUpdatedTimestamp(newTimestamp); + } + + return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), + new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false), + newLeaf); + } + + public static Result getResult(Row row) throws IOException + { + return deserializeWithVersionOr(row, result, LocalVersionedSerializers.result, () -> null); + } + + public static Writes getWrites(Row row) throws IOException + { + return deserializeWithVersionOr(row, writes, LocalVersionedSerializers.writes, () -> null); + } } private static final TableMetadata CommandsForKeys = @@ -311,7 +411,6 @@ private static class CommandsColumns + format("last_executed_timestamp %s static, ", TIMESTAMP_TUPLE) + "last_executed_micros bigint static, " + format("last_write_timestamp %s static, ", TIMESTAMP_TUPLE) - + format("blind_witnessed set<%s> static, ", TIMESTAMP_TUPLE) + "series int, " + format("timestamp %s, ", TIMESTAMP_TUPLE) + "data blob, " @@ -323,18 +422,23 @@ private static class CommandsColumns private static class CommandsForKeyColumns { static final ClusteringComparator keyComparator = CommandsForKeys.partitionKeyAsClusteringComparator(); + static final CompositeType partitionKeyType = (CompositeType) CommandsForKeys.partitionKeyType; static final ColumnFilter allColumns = ColumnFilter.all(CommandsForKeys); + static final ColumnMetadata store_id = getColumn(CommandsForKeys, "store_id"); + static final ColumnMetadata key_token = getColumn(CommandsForKeys, "key_token"); + static final ColumnMetadata key = getColumn(CommandsForKeys, "key"); + static final ColumnMetadata timestamp = getColumn(CommandsForKeys, "timestamp"); static final ColumnMetadata max_timestamp = getColumn(CommandsForKeys, "max_timestamp"); static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKeys, "last_executed_timestamp"); static final ColumnMetadata last_executed_micros = getColumn(CommandsForKeys, "last_executed_micros"); static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKeys, "last_write_timestamp"); - static final ColumnMetadata blind_witnessed = getColumn(CommandsForKeys, "blind_witnessed"); - static final ColumnMetadata series = getColumn(CommandsForKeys, "series"); - static final ColumnMetadata timestamp = getColumn(CommandsForKeys, "timestamp"); static final ColumnMetadata data = getColumn(CommandsForKeys, "data"); - static final Columns statics = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp, blind_witnessed)); + // Ordered by columnn name because it will be used to construct btree leaf arrays + static final ColumnMetadata[] static_columns_metadata = new ColumnMetadata[] { last_executed_micros, last_executed_timestamp, last_write_timestamp, max_timestamp }; + + static final Columns statics = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp)); static final Columns regulars = Columns.from(Lists.newArrayList(data)); private static final RegularAndStaticColumns all = new RegularAndStaticColumns(statics, regulars); private static final RegularAndStaticColumns justStatic = new RegularAndStaticColumns(statics, Columns.NONE); @@ -344,7 +448,8 @@ static boolean hasStaticChanges(CommandsForKey original, CommandsForKey current) { return valueModified(CommandsForKey::max, original, current) || valueModified(CommandsForKey::lastExecutedTimestamp, original, current) - || valueModified(CommandsForKey::lastWriteTimestamp, original, current); + || valueModified(CommandsForKey::lastWriteTimestamp, original, current) + || valueModified(CommandsForKey::rawLastExecutedHlc, original, current); } private static boolean hasRegularChanges(CommandsForKey original, CommandsForKey current) @@ -369,6 +474,78 @@ else if (hasRegularChanges) } } + public static class CommandsForKeyRows extends CommandsForKeyColumns + { + public static ByteBuffer[] splitPartitionKey(DecoratedKey key) + { + return partitionKeyType.split(key.getKey()); + } + + public static int getStoreId(ByteBuffer[] partitionKeyComponents) + { + return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); + } + + public static Timestamp getMaxTimestamp(Row row) + { + Cell cell = row.getCell(max_timestamp); + if (cell == null) + return null; + return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); + } + + public static Timestamp getLastExecutedTimestamp(Row row) + { + Cell cell = row.getCell(last_executed_timestamp); + if (cell == null) + return null; + return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); + } + + public static long getLastExecutedMicros(Row row) + { + Cell cell = row.getCell(last_executed_micros); + if (cell == null || cell.accessor().isEmpty(cell.value())) + return Long.MIN_VALUE; + return cell.accessor().getLong(cell.value(), 0); + } + + public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) + { + return deserializeKey(partitionKeyComponents[key.position()]); + } + + public static Timestamp getTimestamp(Row row) + { + return deserializeTimestampOrNull(row.clustering().bufferAt(CommandsForKeyColumns.timestamp.position()), Timestamp::fromBits); + } + + public static Timestamp getLastWriteTimestamp(Row row) + { + Cell cell = row.getCell(last_write_timestamp); + if (cell == null) + return null; + return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); + } + + public static Row truncateStaticRow(long nowInSec, Row row, long last_execute_micros, Timestamp last_execute, Timestamp last_write, Timestamp max_timestamp) + { + long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); + long newTimestamp = oldTimestamp + 1; + + ColumnMetadata[] fields = CommandsForKeyColumns.static_columns_metadata; + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(fields.length); + newLeaf[0] = BufferCell.live(fields[0], newTimestamp, ByteBufferAccessor.instance.valueOf(last_execute_micros)); + newLeaf[1] = BufferCell.live(fields[1], newTimestamp, serializeTimestamp(last_execute)); + newLeaf[2] = BufferCell.live(fields[2], newTimestamp, serializeTimestamp(last_write)); + newLeaf[3] = BufferCell.live(fields[3], newTimestamp, serializeTimestamp(max_timestamp)); + + return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), + new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false), + newLeaf); + } + } + private static final TableMetadata Topologies = parse(TOPOLOGIES, "accord topologies", @@ -377,7 +554,9 @@ else if (hasRegularChanges) "topology blob, " + "sync_state int, " + "pending_sync_notify set, " + // nodes that need to be told we're synced - "remote_sync_complete set " + // nodes that have told us they're synced + "remote_sync_complete set, " + // nodes that have told us they're synced + "closed map, " + + "redundant map" + ')').build(); private static final TableMetadata EpochMetadata = @@ -397,6 +576,8 @@ else if (hasRegularChanges) "reject_before blob, " + "bootstrap_began_at blob, " + "safe_to_read blob, " + + "redundant_before blob, " + + "durable_before blob, " + "PRIMARY KEY(store_id)" + ')').build(); @@ -455,15 +636,12 @@ private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerialize return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; } - private static ImmutableSortedMap deserializeWaitingOnApply(Map serialized) + private static WaitingOn deserializeWaitingOn(Deps deps, ByteBuffer bytes) throws IOException { - if (serialized == null || serialized.isEmpty()) - return ImmutableSortedMap.of(); + if (bytes == null || !bytes.hasRemaining()) + return WaitingOn.EMPTY; - NavigableMap result = new TreeMap<>(); - for (Map.Entry entry : serialized.entrySet()) - result.put(deserializeTimestampOrNull(entry.getKey(), Timestamp::fromBits), deserializeTimestampOrNull(entry.getValue(), TxnId::fromBits)); - return ImmutableSortedMap.copyOf(result); + return WaitingOnSerializer.deserialize(deps, new DataInputBuffer(bytes, false)); } private static ImmutableSortedSet deserializeTimestampSet(Set serialized, TimestampFactory timestampFactory) @@ -513,27 +691,31 @@ private static boolean valueModified(Function get, C original, C cu return prev != value; } - private static void addCellIfModified(ColumnMetadata column, Function get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, C original, C current) throws IOException + private static void addCellIfModified(ColumnMetadata column, Function get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C current) throws IOException { if (valueModified(get, original, current)) - builder.addCell(live(column, timestampMicros, serialize.apply(get.apply(current)))); + { + V newValue = get.apply(current); + if (newValue == null) builder.addCell(tombstone(column, timestampMicros, nowInSeconds)); + else builder.addCell(live(column, timestampMicros, serialize.apply(newValue))); + } } - private static void addCellIfModified(ColumnMetadata column, Function get, LocalVersionedSerializer serializer, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + private static void addCellIfModified(ColumnMetadata column, Function get, LocalVersionedSerializer serializer, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException { - addCellIfModified(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, original, command); + addCellIfModified(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, nowInSeconds, original, command); } - private static void addKeyCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + private static void addKeyCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException { - addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, LocalVersionedSerializers.routingKey), builder, timestampMicros, original, command); + addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, LocalVersionedSerializers.routingKey), builder, timestampMicros, nowInSeconds, original, command); } - private static > void addEnumCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, C original, C command) throws IOException + private static > void addEnumCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException { // TODO: convert to byte arrays ValueAccessor accessor = ByteBufferAccessor.instance; - addCellIfModified(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, original, command); + addCellIfModified(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, nowInSeconds, original, command); } private static void addSetChanges(ColumnMetadata column, Function> get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, int nowInSec, C original, C command) throws IOException @@ -610,34 +792,30 @@ public static Mutation getCommandMutation(int storeId, Command original, Command builder.newRow(Clustering.EMPTY); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, original, command); - addKeyCellIfModified(CommandsColumns.home_key, Command::homeKey, builder, timestampMicros, original, command); - addKeyCellIfModified(CommandsColumns.progress_key, Command::progressKey, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, original, command); - addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.txn, Command::partialTxn, LocalVersionedSerializers.partialTxn, builder, timestampMicros, original, command); + addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); + addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.txn, Command::partialTxn, LocalVersionedSerializers.partialTxn, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); - addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, LocalVersionedSerializers.partialDeps, builder, timestampMicros, original, command); + addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, LocalVersionedSerializers.partialDeps, builder, timestampMicros, nowInSeconds, original, command); addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); - if (command.isCommitted()) + addCellIfModified(CommandsColumns.writes, Command::writes, v -> serialize(v, LocalVersionedSerializers.writes), builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.result, Command::result, v -> serialize((TxnData) v, LocalVersionedSerializers.result), builder, timestampMicros, nowInSeconds, original, command); + + // TODO review this is just to work around Truncated not being committed but having a status after committed + // so status claims it is committed. + if (!command.isTruncated() && command.isCommitted()) { Command.Committed committed = command.asCommitted(); Command.Committed originalCommitted = original != null && original.isCommitted() ? original.asCommitted() : null; - addSetChanges(CommandsColumns.waiting_on_commit, Command.Committed::waitingOnCommit, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, originalCommitted, committed); - addMapChanges(CommandsColumns.waiting_on_apply, Command.Committed::waitingOnApply, AccordKeyspace::serializeTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, originalCommitted, committed); - if (command.isExecuted()) - { - Command.Executed executed = command.asExecuted(); - Command.Executed originalExecuted = original != null && original.isExecuted() ? original.asExecuted() : null; - addCellIfModified(CommandsColumns.writes, Command.Executed::writes, v -> serialize(v, LocalVersionedSerializers.writes), builder, timestampMicros, originalExecuted, executed); - addCellIfModified(CommandsColumns.result, Command.Executed::result, v -> serialize((TxnData) v, LocalVersionedSerializers.result), builder, timestampMicros, originalExecuted, executed); - } + if (originalCommitted == null || committed.waitingOn != originalCommitted.waitingOn) + builder.addCell(live(CommandsColumns.waiting_on, timestampMicros, WaitingOnSerializer.serialize(committed.waitingOn))); } Row row = builder.build(); @@ -694,6 +872,14 @@ public static T deserializeTimestampOrNull(ByteBuffer byte return factory.create(split.get(0).getLong(), split.get(1).getLong(), new Node.Id(split.get(2).getInt())); } + public static T deserializeTimestampOrNull(V value, ValueAccessor accessor, TimestampFactory factory) + { + if (value == null || accessor.isEmpty(value)) + return null; + List split = TIMESTAMP_TYPE.unpack(value, accessor); + return factory.create(accessor.getLong(split.get(0), 0), accessor.getLong(split.get(1), 0), new Node.Id(accessor.getInt(split.get(2), 0))); + } + private static T deserializeTimestampOrNull(UntypedResultSet.Row row, String name, TimestampFactory factory) { return deserializeTimestampOrNull(row.getBlob(name), factory); @@ -724,6 +910,15 @@ private static T deserializeWithVersionOr(UntypedResultSet.Row row, String d return deserialize(row.getBlob(dataColumn), serializer); } + private static T deserializeWithVersionOr(Row row, ColumnMetadata metadata, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException + { + Cell cell = row.getCell(metadata); + if (cell == null) + return defaultSupplier.get(); + + return deserialize(cell.buffer(), serializer); + } + public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId txnId) { String cql = "SELECT * FROM %s.%s " + @@ -959,25 +1154,23 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); // TODO: something less brittle than ordinal, more efficient than values() attributes.durability(Status.Durability.values()[row.getInt("durability", 0)]); - attributes.homeKey(deserializeOrNull(row.getBlob("home_key"), LocalVersionedSerializers.routingKey)); - attributes.progressKey(deserializeOrNull(row.getBlob("progress_key"), LocalVersionedSerializers.routingKey)); attributes.route(deserializeOrNull(row.getBlob("route"), LocalVersionedSerializers.route)); attributes.partialTxn(deserializeTxn(row)); - attributes.partialDeps(deserializeDependencies(row)); + PartialDeps deps = deserializeDependencies(row); + attributes.partialDeps(deps); attributes.setListeners(deserializeListeners(row, "listeners")); + WaitingOn waitingOn = deserializeWaitingOn(deps, row.getBlob("waiting_on")); Timestamp executeAt = deserializeExecuteAt(row); Ballot promised = deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits); Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); - ImmutableSortedSet waitingOnCommit = deserializeTxnIdNavigableSet(row, "waiting_on_commit"); - ImmutableSortedMap waitingOnApply = deserializeWaitingOnApply(row.getMap("waiting_on_apply", BytesType.instance, BytesType.instance)); Writes writes = deserializeWithVersionOr(row, "writes", LocalVersionedSerializers.writes, () -> null); Result result = deserializeWithVersionOr(row, "result", LocalVersionedSerializers.result, () -> null); switch (status.status) { - case NotWitnessed: - return Command.SerializerSupport.notWitnessed(attributes, promised); + case NotDefined: + return Command.SerializerSupport.notDefined(attributes, promised); case PreAccepted: return Command.SerializerSupport.preaccepted(attributes, executeAt, promised); case AcceptedInvalidate: @@ -986,11 +1179,14 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) return Command.SerializerSupport.accepted(attributes, status, executeAt, promised, accepted); case Committed: case ReadyToExecute: - return Command.SerializerSupport.committed(attributes, status, executeAt, promised, accepted, waitingOnCommit, waitingOnApply); + return Command.SerializerSupport.committed(attributes, status, executeAt, promised, accepted, waitingOn); case PreApplied: case Applied: + return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOn, writes, result); + case Truncated: + return Command.SerializerSupport.truncatedApply(attributes, status, executeAt, writes, result); case Invalidated: - return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOnCommit, waitingOnApply, writes, result); + return Command.SerializerSupport.invalidated(txnId, attributes.durableListeners()); default: throw new IllegalStateException("Unhandled status " + status); } @@ -1032,9 +1228,9 @@ public static PartialTxn deserializeTxn(UntypedResultSet.Row row) throws IOExcep return deserializeOrNull(row.getBlob("txn"), LocalVersionedSerializers.partialTxn); } - public static PartitionKey deserializeKey(UntypedResultSet.Row row) + public static PartitionKey deserializeKey(ByteBuffer buffer) { - List split = KEY_TYPE.unpack(row.getBytes("key"), ByteBufferAccessor.instance); + List split = KEY_TYPE.unpack(buffer, ByteBufferAccessor.instance); TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(split.get(0))); ByteBuffer key = split.get(1); @@ -1044,6 +1240,11 @@ public static PartitionKey deserializeKey(UntypedResultSet.Row row) return new PartitionKey(metadata.keyspace, tableId, metadata.partitioner.decorateKey(key)); } + public static PartitionKey deserializeKey(UntypedResultSet.Row row) + { + return deserializeKey(row.getBytes("key")); + } + private static void addSeriesMutations(ImmutableSortedMap prev, ImmutableSortedMap value, SeriesKind kind, @@ -1130,10 +1331,10 @@ public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey ori if (hasStaticChanges) { rowBuilder.newRow(Clustering.STATIC_CLUSTERING); - addCellIfModified(CommandsForKeyColumns.max_timestamp, CommandsForKey::max, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_executed_timestamp, CommandsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_executed_micros, CommandsForKey::lastExecutedMicros, accessor::valueOf, rowBuilder, timestampMicros, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_write_timestamp, CommandsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, original, cfk); + addCellIfModified(CommandsForKeyColumns.max_timestamp, CommandsForKey::max, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_executed_timestamp, CommandsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_executed_micros, CommandsForKey::rawLastExecutedHlc, accessor::valueOf, rowBuilder, timestampMicros, nowInSeconds, original, cfk); + addCellIfModified(CommandsForKeyColumns.last_write_timestamp, CommandsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); Row row = rowBuilder.build(); if (!row.isEmpty()) partitionBuilder.add(row); @@ -1250,7 +1451,7 @@ public static class EpochDiskState public final long minEpoch; public final long maxEpoch; - public EpochDiskState(long minEpoch, long maxEpoch) + private EpochDiskState(long minEpoch, long maxEpoch) { Invariants.checkArgument(minEpoch >= 0, "Min Epoch %d < 0", minEpoch); Invariants.checkArgument(maxEpoch >= minEpoch, "Max epoch %d < min %d", maxEpoch, minEpoch); @@ -1258,17 +1459,34 @@ public EpochDiskState(long minEpoch, long maxEpoch) this.maxEpoch = maxEpoch; } + public static EpochDiskState create(long minEpoch, long maxEpoch) + { + if (minEpoch == maxEpoch && minEpoch == 0) + return EMPTY; + return new EpochDiskState(minEpoch, maxEpoch); + } + + public static EpochDiskState create(long epoch) + { + return create(epoch, epoch); + } + + public boolean isEmpty() + { + return minEpoch == maxEpoch && maxEpoch == 0; + } + private EpochDiskState withNewMaxEpoch(long epoch) { Invariants.checkArgument(epoch > maxEpoch, "Epoch %d <= %d (max)", epoch, maxEpoch); - return new EpochDiskState(Math.max(1, minEpoch), epoch); + return EpochDiskState.create(Math.max(1, minEpoch), epoch); } private EpochDiskState withNewMinEpoch(long epoch) { Invariants.checkArgument(epoch > minEpoch, "epoch %d <= %d (min)", epoch, minEpoch); Invariants.checkArgument(epoch <= maxEpoch, "epoch %d > %d (max)", epoch, maxEpoch); - return new EpochDiskState(epoch, maxEpoch); + return EpochDiskState.create(epoch, maxEpoch); } @Override @@ -1296,11 +1514,12 @@ public int hashCode() } } - private static void saveEpochDiskState(EpochDiskState diskState) + private static EpochDiskState saveEpochDiskState(EpochDiskState diskState) { String cql = "INSERT INTO %s.%s (key, min_epoch, max_epoch) VALUES (0, ?, ?);"; executeInternal(format(cql, ACCORD_KEYSPACE_NAME, EPOCH_METADATA), diskState.minEpoch, diskState.maxEpoch); + return diskState; } @Nullable @@ -1312,7 +1531,7 @@ public static EpochDiskState loadEpochDiskState() if (result.isEmpty()) return null; UntypedResultSet.Row row = result.one(); - return new EpochDiskState(row.getLong("min_epoch"), row.getLong("max_epoch")); + return EpochDiskState.create(row.getLong("min_epoch"), row.getLong("max_epoch")); } /** @@ -1324,6 +1543,8 @@ public static EpochDiskState loadEpochDiskState() */ private static EpochDiskState maybeUpdateMaxEpoch(EpochDiskState diskState, long epoch) { + if (diskState.isEmpty()) + return saveEpochDiskState(EpochDiskState.create(epoch)); Invariants.checkArgument(epoch >= diskState.minEpoch, "Epoch %d < %d (min)", epoch, diskState.minEpoch); if (epoch > diskState.maxEpoch) { @@ -1362,6 +1583,26 @@ public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, Ep return diskState; } + public static EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET closed = closed + ? WHERE epoch = ?"; + executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + KeySerializers.rangesToBlobMap(ranges), epoch); + flush(Topologies); + return diskState; + } + + public static EpochDiskState markRedundant(Ranges ranges, long epoch, EpochDiskState diskState) + { + diskState = maybeUpdateMaxEpoch(diskState, epoch); + String cql = "UPDATE %s.%s SET redundant = redundant + ? WHERE epoch = ?"; + executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + KeySerializers.rangesToBlobMap(ranges), epoch); + flush(Topologies); + return diskState; + } + public static EpochDiskState setNotifyingLocalSync(long epoch, Set pending, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); @@ -1407,7 +1648,7 @@ public static EpochDiskState truncateTopologyUntil(final long epoch, EpochDiskSt public interface TopologyLoadConsumer { - void load(long epoch, Topology topology, SyncStatus syncStatus, Set pendingSyncNotify, Set remoteSyncComplete); + void load(long epoch, Topology topology, SyncStatus syncStatus, Set pendingSyncNotify, Set remoteSyncComplete, Ranges closed, Ranges redundant); } @VisibleForTesting @@ -1431,8 +1672,10 @@ public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws I Set remoteSyncComplete = row.has("remote_sync_complete") ? row.getSet("remote_sync_complete", Int32Type.instance).stream().map(Node.Id::new).collect(Collectors.toSet()) : Collections.emptySet(); + Ranges closed = row.has("closed") ? blobMapToRanges(row.getMap("closed", BytesType.instance, BytesType.instance)) : Ranges.EMPTY; + Ranges redundant = row.has("redundant") ? blobMapToRanges(row.getMap("redundant", BytesType.instance, BytesType.instance)) : Ranges.EMPTY; - consumer.load(epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete); + consumer.load(epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant); } @@ -1496,6 +1739,16 @@ public static Future updateRejectBefore(CommandStore commandStore, ReducingRa return updateCommandStoreMetadata(commandStore, "reject_before", rejectBefore, LocalVersionedSerializers.rejectBefore); } + public static Future updateDurableBefore(CommandStore commandStore, DurableBefore durableBefore) + { + return updateCommandStoreMetadata(commandStore, "durable_before", durableBefore, LocalVersionedSerializers.durableBefore); + } + + public static Future updateRedundantBefore(CommandStore commandStore, RedundantBefore redundantBefore) + { + return updateCommandStoreMetadata(commandStore, "redundant_before", redundantBefore, LocalVersionedSerializers.redundantBefore); + } + public static Future updateBootstrapBeganAt(CommandStore commandStore, NavigableMap bootstrapBeganAt) { return updateCommandStoreMetadata(commandStore, "bootstrap_began_at", bootstrapBeganAt, LocalVersionedSerializers.bootstrapBeganAt); @@ -1508,13 +1761,15 @@ public static Future updateSafeToRead(CommandStore commandStore, NavigableMap public interface CommandStoreMetadataConsumer { - void accept(ReducingRangeMap rejectBefore, NavigableMap bootstrapBeganAt, NavigableMap safeToRead); + void accept(ReducingRangeMap rejectBefore, DurableBefore durableBefore, RedundantBefore redundantBefore, NavigableMap bootstrapBeganAt, NavigableMap safeToRead); } public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer consumer) { UntypedResultSet result = executeOnceInternal(format("SELECT * FROM %s.%s WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA), id); ReducingRangeMap rejectBefore = null; + DurableBefore durableBefore = null; + RedundantBefore redundantBefore = null; NavigableMap bootstrapBeganAt = null; NavigableMap safeToRead = null; if (!result.isEmpty()) @@ -1524,6 +1779,10 @@ public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer { if (row.has("reject_before")) rejectBefore = deserialize(row.getBlob("reject_before"), LocalVersionedSerializers.rejectBefore); + if (row.has("durable_before")) + durableBefore = deserialize(row.getBlob("durable_before"), LocalVersionedSerializers.durableBefore); + if (row.has("redundant_before")) + redundantBefore = deserialize(row.getBlob("redundant_before"), LocalVersionedSerializers.redundantBefore); if (row.has("bootstrap_began_at")) bootstrapBeganAt = deserialize(row.getBlob("bootstrap_began_at"), LocalVersionedSerializers.bootstrapBeganAt); if (row.has("safe_to_read")) @@ -1534,6 +1793,7 @@ public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer throw new UncheckedIOException(e); } } - consumer.accept(rejectBefore, bootstrapBeganAt, safeToRead); + consumer.accept(rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead); } + } diff --git a/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java b/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java deleted file mode 100644 index ade2f8e3e33d..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordLocalSyncNotifier.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.io.IOException; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.local.Node; -import accord.utils.Invariants; -import org.apache.cassandra.concurrent.ScheduledExecutors; -import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.exceptions.RequestFailureReason; -import org.apache.cassandra.gms.IFailureDetector; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.net.IVerbHandler; -import org.apache.cassandra.net.Message; -import org.apache.cassandra.net.MessageDelivery; -import org.apache.cassandra.net.RequestCallback; -import org.apache.cassandra.net.Verb; -import org.apache.cassandra.service.accord.serializers.TopologySerializers; - -public class AccordLocalSyncNotifier implements RequestCallback -{ - public static final IVerbHandler verbHandler = message -> AccordService.instance().remoteSyncComplete(message); - private static final Logger logger = LoggerFactory.getLogger(AccordLocalSyncNotifier.class); - - interface Listener - { - void onEndpointAck(Node.Id id, long epoch); - void onComplete(long epoch); - } - - private final long epoch; - private final Node.Id from; - private final Set pendingNotifications; - private final AccordEndpointMapper endpointMapper; - private final IFailureDetector failureDetector; - private final Listener listener; - private final MessageDelivery messagingService; - - public AccordLocalSyncNotifier(long epoch, - Node.Id from, Set pendingNotifications, - AccordEndpointMapper endpointMapper, - MessageDelivery messagingService, IFailureDetector failureDetector, - Listener listener) - { - this.epoch = epoch; - this.from = from; - this.pendingNotifications = pendingNotifications; - this.endpointMapper = endpointMapper; - this.failureDetector = failureDetector; - this.listener = listener; - this.messagingService = messagingService; - } - - private void notify(Node.Id to) - { - InetAddressAndPort toEp = endpointMapper.mappedEndpoint(to); - if (failureDetector.isAlive(toEp)) - { - Message msg = Message.out(Verb.ACCORD_SYNC_NOTIFY_REQ, new Notification(epoch, from, to)); - messagingService.sendWithCallback(msg, toEp, this); - } - else - { - scheduleNotify(to); - } - } - - public void scheduleNotify(Node.Id to) - { - ScheduledExecutors.scheduledTasks.schedule(() -> notify(to), 1, TimeUnit.MINUTES); - } - - public synchronized void start() - { - if (pendingNotifications.isEmpty()) - { - listener.onComplete(epoch); - return; - } - pendingNotifications.forEach(this::notify); - } - - private synchronized void onResponse(InetAddressAndPort fromEp, Node.Id from) - { - try - { - Invariants.checkArgument(endpointMapper.mappedId(fromEp).equals(from), "%s != %s", from, endpointMapper.mappedId(fromEp)); - listener.onEndpointAck(from, epoch); - pendingNotifications.remove(from); - if (pendingNotifications.isEmpty()) - listener.onComplete(epoch); - } - catch (Throwable t) - { - logger.error("Unhandled exception handling sync ack on epoch {} from {}", epoch, fromEp, t); - scheduleNotify(from); - } - } - - @Override - public synchronized void onResponse(Message msg) - { - onResponse(msg.from(), msg.payload.from); - } - - @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) - { - scheduleNotify(endpointMapper.mappedId(from)); - } - - public static class Notification - { - public static final IVersionedSerializer serializer = new IVersionedSerializer() - { - @Override - public void serialize(Notification notification, DataOutputPlus out, int version) throws IOException - { - out.writeLong(notification.epoch); - TopologySerializers.nodeId.serialize(notification.from, out, version); - TopologySerializers.nodeId.serialize(notification.to, out, version); - } - - @Override - public Notification deserialize(DataInputPlus in, int version) throws IOException - { - return new Notification(in.readLong(), - TopologySerializers.nodeId.deserialize(in, version), - TopologySerializers.nodeId.deserialize(in, version)); - } - - @Override - public long serializedSize(Notification notification, int version) - { - return TypeSizes.LONG_SIZE - + TopologySerializers.nodeId.serializedSize() - + TopologySerializers.nodeId.serializedSize(); - } - }; - final long epoch; - final Node.Id from; - final Node.Id to; - - public Notification(long epoch, Node.Id from, Node.Id to) - { - this.epoch = epoch; - this.from = from; - this.to = to; - } - } - - public static class Acknowledgement - { - public static final IVersionedSerializer serializer = new IVersionedSerializer() - { - @Override - public void serialize(Acknowledgement acknowledgement, DataOutputPlus out, int version) throws IOException - { - TopologySerializers.nodeId.serialize(acknowledgement.from, out, version); - } - - @Override - public Acknowledgement deserialize(DataInputPlus in, int version) throws IOException - { - return new Acknowledgement(TopologySerializers.nodeId.deserialize(in, version)); - } - - @Override - public long serializedSize(Acknowledgement acknowledgement, int version) - { - return TopologySerializers.nodeId.serializedSize(); - } - }; - - final Node.Id from; - - public Acknowledgement(Node.Id from) - { - this.from = from; - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 9169b9193675..a6d8e4f3417b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -18,10 +18,14 @@ package org.apache.cassandra.service.accord; +import java.util.Collections; import java.util.EnumMap; +import java.util.EnumSet; import java.util.Map; +import java.util.Set; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import org.apache.cassandra.net.*; import org.slf4j.Logger; @@ -47,36 +51,45 @@ private static class VerbMapping private static final VerbMapping instance = new VerbMapping(); private final Map mapping = new EnumMap<>(MessageType.class); + private final Map> overrideReplyVerbs = ImmutableMap.>builder() + // read takes Result | Nack + .put(Verb.ACCORD_FETCH_DATA_REQ, EnumSet.of(Verb.ACCORD_FETCH_DATA_RSP, Verb.ACCORD_READ_RSP /* nack */)) + .build(); private VerbMapping() { - mapping.put(MessageType.PRE_ACCEPT_REQ, Verb.ACCORD_PRE_ACCEPT_REQ); - mapping.put(MessageType.PRE_ACCEPT_RSP, Verb.ACCORD_PRE_ACCEPT_RSP); - mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); - mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); - mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); - mapping.put(MessageType.COMMIT_REQ, Verb.ACCORD_COMMIT_REQ); - mapping.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); - mapping.put(MessageType.APPLY_REQ, Verb.ACCORD_APPLY_REQ); - mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); - mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); - mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); - mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); - mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); - mapping.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); - mapping.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); - mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); - mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); - mapping.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); - mapping.put(MessageType.INFORM_HOME_DURABLE_REQ,Verb.ACCORD_INFORM_HOME_DURABLE_REQ); - mapping.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); - mapping.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); - mapping.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); - mapping.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); - mapping.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); - mapping.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); - mapping.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); - mapping.put(MessageType.FETCH_DATA_RSP, Verb.ACCORD_FETCH_DATA_RSP); + mapping.put(MessageType.PRE_ACCEPT_REQ, Verb.ACCORD_PRE_ACCEPT_REQ); + mapping.put(MessageType.PRE_ACCEPT_RSP, Verb.ACCORD_PRE_ACCEPT_RSP); + mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); + mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); + mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); + mapping.put(MessageType.COMMIT_REQ, Verb.ACCORD_COMMIT_REQ); + mapping.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); + mapping.put(MessageType.APPLY_REQ, Verb.ACCORD_APPLY_REQ); + mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); + mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); + mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); + mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); + mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); + mapping.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); + mapping.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); + mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); + mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); + mapping.put(MessageType.WAIT_ON_APPLY_REQ, Verb.ACCORD_WAIT_ON_APPLY_REQ); + mapping.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); + mapping.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); + mapping.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); + mapping.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); + mapping.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); + mapping.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); + mapping.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); + mapping.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); + mapping.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); + mapping.put(MessageType.FETCH_DATA_RSP, Verb.ACCORD_FETCH_DATA_RSP); + mapping.put(MessageType.SET_SHARD_DURABLE_REQ, Verb.ACCORD_SET_SHARD_DURABLE_REQ); + mapping.put(MessageType.SET_GLOBALLY_DURABLE_REQ, Verb.ACCORD_SET_GLOBALLY_DURABLE_REQ); + mapping.put(MessageType.QUERY_DURABLE_BEFORE_REQ, Verb.ACCORD_QUERY_DURABLE_BEFORE_REQ); + mapping.put(MessageType.QUERY_DURABLE_BEFORE_RSP, Verb.ACCORD_QUERY_DURABLE_BEFORE_RSP); for (MessageType type : MessageType.values()) { @@ -134,11 +147,25 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply { Message replyTo = (Message) replyContext; Message replyMsg = replyTo.responseWith(reply); - Verb verb = getVerb(reply.type()); - Preconditions.checkNotNull(verb, "Verb is null for type %s", reply.type()); - Preconditions.checkArgument(replyMsg.verb() == verb, "Expected reply message with verb %s but got %s; reply type was %s", replyMsg.verb(), verb, reply.type()); + checkReplyType(reply, replyTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); messaging.send(replyMsg, endpoint); } + + private static void checkReplyType(Reply reply, Message replyTo) + { + Verb verb = getVerb(reply.type()); + Preconditions.checkNotNull(verb, "Verb is null for type %s", reply.type()); + Set allowedVerbs = expectedReplyTypes(replyTo.verb()); + Preconditions.checkArgument(allowedVerbs.contains(verb), "Expected reply message with verbs %s but got %s; reply type was %s, request verb was %s", allowedVerbs, verb, reply.type(), replyTo.verb()); + } + + private static Set expectedReplyTypes(Verb verb) + { + Set extra = VerbMapping.instance.overrideReplyVerbs.get(verb); + if (extra != null) return extra; + Verb v = verb.responseVerb; + return v == null ? Collections.emptySet() : Collections.singleton(v); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index eac47bfe4e7e..a0a5202c5615 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -23,13 +23,13 @@ import java.util.function.ToLongFunction; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.ImmutableSortedSet; import accord.api.Key; import accord.api.Result; import accord.api.RoutingKey; import accord.impl.CommandsForKey; import accord.local.Command; +import accord.local.Command.WaitingOn; import accord.local.CommonAttributes; import accord.local.Node; import accord.local.SaveStatus; @@ -41,21 +41,25 @@ import accord.primitives.FullRangeRoute; import accord.primitives.KeyDeps; import accord.primitives.Keys; +import accord.primitives.PartialDeps; import accord.primitives.PartialKeyRoute; import accord.primitives.PartialRangeRoute; import accord.primitives.PartialTxn; import accord.primitives.Range; import accord.primitives.RangeDeps; import accord.primitives.Ranges; +import accord.primitives.Routable.Domain; import accord.primitives.RoutingKeys; import accord.primitives.Seekables; import accord.primitives.Timestamp; +import accord.primitives.Txn.Kind; import accord.primitives.TxnId; import accord.primitives.Unseekables; import accord.primitives.Writes; -import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; @@ -115,7 +119,7 @@ public static long seekables(Seekables seekables) } } - private static long routingKeysOnly(AbstractKeys keys) + private static long routingKeysOnly(AbstractKeys keys) { // TODO: many routing keys are fixed size, can compute by multiplication long size = ObjectSizes.sizeOfReferenceArray(keys.size()); @@ -130,7 +134,7 @@ public static long routingKeys(RoutingKeys keys) return EMPTY_ROUTING_KEYS_SIZE + routingKeysOnly(keys); } - private static final long EMPTY_FULL_KEY_ROUTE_SIZE = measure(new FullKeyRoute(new TokenKey(null, null), new RoutingKey[0])); + private static final long EMPTY_FULL_KEY_ROUTE_SIZE = measure(new FullKeyRoute(new TokenKey(null, null), true, new RoutingKey[0])); public static long fullKeyRoute(FullKeyRoute route) { return EMPTY_FULL_KEY_ROUTE_SIZE @@ -138,7 +142,7 @@ public static long fullKeyRoute(FullKeyRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(Ranges.EMPTY, new TokenKey(null, null), new RoutingKey[0])); + private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(Ranges.EMPTY, new TokenKey(null, null), true, new RoutingKey[0])); public static long partialKeyRoute(PartialKeyRoute route) { return EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE @@ -147,7 +151,7 @@ public static long partialKeyRoute(PartialKeyRoute route) + key(route.homeKey()); } - private static long rangesOnly(AbstractRanges ranges) + private static long rangesOnly(AbstractRanges ranges) { long size = ObjectSizes.sizeOfReferenceArray(ranges.size()); for (int i=0, mi=ranges.size(); i ranges) return size; } - private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = measure(new FullRangeRoute(new TokenKey(null, null), new Range[0])); + private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = measure(new FullRangeRoute(new TokenKey(null, null), true, new Range[0])); public static long fullRangeRoute(FullRangeRoute route) { return EMPTY_FULL_RANGE_ROUTE_SIZE @@ -163,7 +167,7 @@ public static long fullRangeRoute(FullRangeRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(Ranges.EMPTY, new TokenKey(null, null), new Range[0])); + private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(Ranges.EMPTY, new TokenKey(null, null), true, new Range[0])); public static long partialRangeRoute(PartialRangeRoute route) { return EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE @@ -172,7 +176,7 @@ public static long partialRangeRoute(PartialRangeRoute route) + key(route.homeKey()); } - public static long route(Unseekables unseekables) + public static long route(Unseekables unseekables) { switch (unseekables.kind()) { @@ -205,6 +209,7 @@ public static long timestamp() { return TIMESTAMP_SIZE; } + public static long timestamp(Timestamp timestamp) { return TIMESTAMP_SIZE; @@ -243,6 +248,11 @@ public static long writes(Writes writes) return size; } + public static long results(Result result) + { + return ((TxnData) result).estimatedSizeOnHeap(); + } + private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); private static final long EMPTY_CFK_LISTENER = measure(new CommandsForKey.Listener((Key) null)); private static final long EMPTY_CFR_LISTENER = measure(new CommandsForRanges.Listener(null)); @@ -259,20 +269,26 @@ public static long listener(Command.DurableAndIdempotentListener listener) private static class CommandEmptySizes { - private static final CommonAttributes EMPTY_ATTRS = new CommonAttributes.Mutable((TxnId) null); - final static long NOT_WITNESSED = measure(Command.SerializerSupport.notWitnessed(EMPTY_ATTRS, Ballot.ZERO)); - final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(EMPTY_ATTRS, null, null)); - final static long ACCEPTED = measure(Command.SerializerSupport.accepted(EMPTY_ATTRS, SaveStatus.Accepted, null, null, null)); - final static long COMMITTED = measure(Command.SerializerSupport.committed(EMPTY_ATTRS, SaveStatus.Committed, null, null, null, ImmutableSortedSet.of(), ImmutableSortedMap.of())); - final static long EXECUTED = measure(Command.SerializerSupport.executed(EMPTY_ATTRS, SaveStatus.Applied, null, null, null, ImmutableSortedSet.of(), ImmutableSortedMap.of(), null, null)); - + private final static TokenKey EMPTY_KEY = new TokenKey("doesnotexist", null); + private final static TxnId EMPTY_TXNID = new TxnId(42, 42, Kind.Read, Domain.Key, new Node.Id(42)); + private final static CommonAttributes.Mutable EMPTY_ATTRS = new CommonAttributes.Mutable(EMPTY_TXNID) + .partialDeps(PartialDeps.NONE) + .route(new FullKeyRoute(EMPTY_KEY, true, new RoutingKey[] {EMPTY_KEY} )); + + final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(EMPTY_ATTRS, Ballot.ZERO)); + final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(EMPTY_ATTRS, EMPTY_TXNID, null));; + final static long ACCEPTED = measure(Command.SerializerSupport.accepted(EMPTY_ATTRS, SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); + final static long COMMITTED = measure(Command.SerializerSupport.committed(EMPTY_ATTRS, SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY)); + final static long EXECUTED = measure(Command.SerializerSupport.executed(EMPTY_ATTRS, SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY, null, null)); + final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(EMPTY_ATTRS, SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); + final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID, null)); private static long emptySize(Command command) { switch (command.status()) { - case NotWitnessed: - return NOT_WITNESSED; + case NotDefined: + return NOT_DEFINED; case PreAccepted: return PREACCEPTED; case AcceptedInvalidate: @@ -284,8 +300,11 @@ private static long emptySize(Command command) return COMMITTED; case PreApplied: case Applied: - case Invalidated: return EXECUTED; + case Truncated: + return TRUNCATED; + case Invalidated: + return INVALIDATED; default: throw new IllegalStateException("Unhandled status " + command.status()); } @@ -302,42 +321,22 @@ private static long sizeNullable(T value, ToLongFunction measure) public static long command(Command command) { long size = CommandEmptySizes.emptySize(command); - size += sizeNullable(command.homeKey(), AccordObjectSizes::key); - size += sizeNullable(command.progressKey(), AccordObjectSizes::key); size += sizeNullable(command.route(), AccordObjectSizes::route); size += sizeNullable(command.promised(), AccordObjectSizes::timestamp); for (Command.DurableAndIdempotentListener listener : command.durableListeners()) size += listener(listener); - - if (!command.isWitnessed()) - return size; - - Command.PreAccepted preaccepted = command.asWitnessed(); - size += timestamp(preaccepted.executeAt()); - size += sizeNullable(preaccepted.partialTxn(), AccordObjectSizes::txn); - size += sizeNullable(preaccepted.partialDeps(), AccordObjectSizes::dependencies); - - if (!command.isAccepted()) - return size; - - Command.Accepted accepted = command.asAccepted(); - size += timestamp(accepted.accepted()); - - if (!command.isCommitted()) + size += sizeNullable(command.executeAt(), AccordObjectSizes::timestamp); + size += sizeNullable(command.partialTxn(), AccordObjectSizes::txn); + size += sizeNullable(command.partialDeps(), AccordObjectSizes::dependencies); + size += sizeNullable(command.accepted(), AccordObjectSizes::timestamp); + size += sizeNullable(command.writes(), AccordObjectSizes::writes); + size += sizeNullable(command.result(), AccordObjectSizes::results); + + if (!(command instanceof Command.Committed)) return size; Command.Committed committed = command.asCommitted(); - size += TIMESTAMP_SIZE * committed.waitingOnCommit().size(); - size += TIMESTAMP_SIZE * 2 * committed.waitingOnApply().size(); - - if (!command.isExecuted()) - return size; - - Command.Executed executed = command.asExecuted(); - size += sizeNullable(executed.writes(), AccordObjectSizes::writes); - Result result = executed.result(); - if (result != null) - size += ((TxnData) result).estimatedSizeOnHeap(); + size += WaitingOnSerializer.serializedSize(committed.waitingOn); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index cb084201646d..e9bfc0d15d76 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -21,7 +21,6 @@ import java.util.Map; import java.util.NavigableMap; import java.util.function.BiFunction; - import javax.annotation.Nullable; import accord.api.Agent; @@ -34,11 +33,11 @@ import accord.impl.CommandTimeseriesHolder; import accord.impl.CommandsForKey; import accord.impl.SafeCommandsForKey; -import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; import accord.local.CommonAttributes; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.SafeCommand; import accord.local.Status; import accord.primitives.AbstractKeys; import accord.primitives.Deps; @@ -57,6 +56,7 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore commands; private final NavigableMap commandsForKeys; private final AccordCommandStore commandStore; + private final RangesForEpoch ranges; CommandsForRanges.Updater rangeUpdates = null; public AccordSafeCommandStore(PreLoadContext context, @@ -68,6 +68,7 @@ public AccordSafeCommandStore(PreLoadContext context, this.commands = commands; this.commandsForKeys = commandsForKey; this.commandStore = commandStore; + this.ranges = commandStore.updateRangesForEpoch(); } @Override @@ -143,7 +144,7 @@ public NodeTimeService time() @Override public RangesForEpoch ranges() { - return commandStore().ranges(); + return commandStore().unsafeRangesForEpoch(); } @Override @@ -155,7 +156,8 @@ public long latestEpoch() @Override public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) { - return mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, null); + Timestamp maxConflict = mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, null); + return Timestamp.nonNullOrMax(maxConflict, commandStore.commandsForRanges().maxRedundant()); } @Override @@ -163,15 +165,14 @@ public void registerHistoricalTransactions(Deps deps) { // used in places such as accord.local.CommandStore.fetchMajorityDeps // We find a set of dependencies for a range then update CommandsFor to know about them - CommandStores.RangesForEpochHolder rangesForEpochHolder = commandStore.rangesForEpochHolder(); - Ranges allRanges = rangesForEpochHolder.get().all(); + Ranges allRanges = ranges.all(); deps.keyDeps.keys().forEach(allRanges, key -> { SafeCommandsForKey cfk = commandsForKey(key); deps.keyDeps.forEach(key, txnId -> { // TODO (desired, efficiency): this can be made more efficient by batching by epoch - if (rangesForEpochHolder.get().coordinates(txnId).contains(key)) + if (ranges.coordinates(txnId).contains(key)) return; // already coordinates, no need to replicate - if (!rangesForEpochHolder.get().allBefore(txnId.epoch()).contains(key)) + if (!ranges.allBefore(txnId.epoch()).contains(key)) return; cfk.registerNotWitnessed(txnId); @@ -183,16 +184,21 @@ public void registerHistoricalTransactions(Deps deps) return; Ranges ranges = deps.rangeDeps.ranges(txnId); - if (rangesForEpochHolder.get().coordinates(txnId).intersects(ranges)) + if (this.ranges.coordinates(txnId).intersects(ranges)) return; // already coordinates, no need to replicate - if (!rangesForEpochHolder.get().allBefore(txnId.epoch()).intersects(ranges)) + if (!this.ranges.allBefore(txnId.epoch()).intersects(ranges)) return; updateRanges().mergeRemote(txnId, ranges.slice(allRanges), Ranges::with); }); } - private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + @Override + public void erase(SafeCommand safeCommand) + { + } + + private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) { accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminalValue); if (accumulate.equals(terminalValue)) @@ -200,7 +206,7 @@ private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) { switch (keysOrRanges.domain()) { @@ -209,7 +215,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunc case Key: { // TODO: efficiency - AbstractKeys keys = (AbstractKeys) keysOrRanges; + AbstractKeys keys = (AbstractKeys) keysOrRanges; for (Key key : keys) { if (!slice.contains(key)) continue; @@ -224,7 +230,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunc { // Assuming the range provided is in the PreLoadContext, then AsyncLoader has populated commandsForKeys with keys that // are contained within the ranges... so walk all keys found in commandsForKeys - Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); + Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); if (!context.keys().slice(slice, Routables.Slice.Minimal).containsAll(sliced)) throw new AssertionError("Range(s) detected not present in the PreLoadContext: expected " + context.keys() + " but given " + keysOrRanges); for (RoutableKey key : commandsForKeys.keySet()) @@ -335,7 +341,7 @@ protected void invalidateSafeState() } @Override - public CommandLoader cfkLoader() + public CommandLoader cfkLoader(RoutableKey key) { return CommandsForKeySerializer.loader; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 0df25ab6efbb..33b02e95167d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.util.Objects; +import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; @@ -26,6 +27,7 @@ import accord.impl.CommandsForKey; import accord.impl.SafeCommandsForKey; import accord.primitives.RoutableKey; +import accord.primitives.Timestamp; public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { @@ -111,6 +113,26 @@ public void postExecute() global.set(current); } + public long lastExecutedMicros() + { + return current().lastExecutedHlc(); + } + + public long timestampMicrosFor(Timestamp executeAt, boolean isForWriteTxn) + { + return current().hlcFor(executeAt, isForWriteTxn); + } + + public int nowInSecondsFor(Timestamp executeAt, boolean isForWriteTxn) + { + CommandsForKey current = current(); + current.validateExecuteAtTime(executeAt, isForWriteTxn); + // we use the executeAt time instead of the monotonic database timestamp to prevent uneven + // ttl expiration in extreme cases, ie 1M+ writes/second to a key causing timestamps to overflow + // into the next second on some keys and not others. + return Math.toIntExact(TimeUnit.MICROSECONDS.toSeconds(current.lastExecutedTimestamp().hlc())); + } + @Override public void invalidate() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 09db36757f9d..9c4a6b5a189f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -19,21 +19,27 @@ package org.apache.cassandra.service.accord; import java.util.Arrays; +import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; import com.google.common.annotations.VisibleForTesting; - +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Result; import accord.coordinate.Preempted; import accord.coordinate.Timeout; +import accord.impl.AbstractConfigurationService; import accord.impl.SimpleProgressLog; import accord.impl.SizeOfIntersectionSorter; +import accord.local.DurableBefore; import accord.local.Node; +import accord.local.NodeTimeService; +import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; import accord.messages.Request; import accord.primitives.Txn; @@ -43,8 +49,9 @@ import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; -import org.apache.cassandra.concurrent.Shutdownable; import accord.utils.async.AsyncResult; +import org.agrona.collections.Int2ObjectHashMap; +import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; @@ -53,7 +60,9 @@ import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.api.AccordScheduler; @@ -65,11 +74,13 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static accord.messages.SimpleReply.Ok; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -124,6 +135,12 @@ public void startup() {} @Override public void shutdownAndWait(long timeout, TimeUnit unit) { } + @Override + public AccordScheduler scheduler() + { + return null; + } + @Override public Future epochReady(Epoch epoch) { @@ -131,12 +148,19 @@ public Future epochReady(Epoch epoch) } @Override - public void remoteSyncComplete(Message message) {} + public void receive(Message> message) {} + @Override public boolean isAccordManagedKeyspace(String keyspace) { return false; } + + @Override + public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() + { + return Pair.create(new Int2ObjectHashMap<>(), DurableBefore.EMPTY); + } }; private static Node.Id localId = null; @@ -158,9 +182,16 @@ public static IAccordService instance() public static long uniqueNow() { + // TODO (correctness, now): This is not unique it's just currentTimeMillis as microseconds return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); } + public static long unix(TimeUnit timeUnit) + { + Preconditions.checkArgument(timeUnit != TimeUnit.NANOSECONDS, "Nanoseconds since the epoch doesn't fit in a long"); + return timeUnit.convert(Clock.Global.currentTimeMillis(), TimeUnit.MILLISECONDS); + } + private AccordService() { Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); @@ -174,6 +205,7 @@ private AccordService() messageSink, configService, AccordService::uniqueNow, + NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, AccordService::uniqueNow), () -> dataStore, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), agent, @@ -199,11 +231,6 @@ public IVerbHandler verbHandler() return verbHandler; } - public static long nowInMicros() - { - return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); - } - @Override public long currentEpoch() { @@ -326,9 +353,16 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted @Override public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { + scheduler.shutdownNow(); ExecutorUtils.shutdownAndWait(timeout, unit, this); } + @Override + public AccordScheduler scheduler() + { + return scheduler; + } + @VisibleForTesting public Node node() { @@ -348,11 +382,23 @@ public Future epochReady(Epoch epoch) } @Override - public void remoteSyncComplete(Message message) + public void receive(Message> message) { - Invariants.checkArgument(localId.equals(message.payload.to), "%s != %s", localId, message.payload.to); - configService.remoteSyncComplete(message.payload.from, message.payload.epoch); - MessagingService.instance().respond(new AccordLocalSyncNotifier.Acknowledgement(localId), message); + receive(MessagingService.instance(), configService, message); + } + + @VisibleForTesting + public static void receive(MessageDelivery sink, AbstractConfigurationService configService, Message> message) + { + List notifications = message.payload; + notifications.forEach(notification -> { + notification.syncComplete.forEach(id -> configService.receiveRemoteSyncComplete(id, notification.epoch)); + if (!notification.closed.isEmpty()) + configService.receiveClosed(notification.closed, notification.epoch); + if (!notification.redundant.isEmpty()) + configService.receiveRedundant(notification.redundant, notification.epoch); + }); + sink.respond(Ok, message); } private static Shutdownable toShutdownable(Node node) @@ -401,4 +447,19 @@ public boolean isAccordManagedKeyspace(String keyspace) { return configService.isAccordManagedKeyspace(keyspace); } + + @Override + public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() + { + Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); + AtomicReference durableBefore = new AtomicReference<>(DurableBefore.EMPTY); + AsyncChains.getBlockingAndRethrow(node.commandStores().forEach(safeStore -> { + synchronized (redundantBefores) + { + redundantBefores.put(safeStore.commandStore().id(), safeStore.commandStore().redundantBefore()); + } + durableBefore.set(DurableBefore.merge(durableBefore.get(), safeStore.commandStore().durableBefore())); + })); + return Pair.create(redundantBefores, durableBefore.get()); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 02da5ca74e46..47ac64a11522 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -31,6 +31,7 @@ import org.slf4j.LoggerFactory; import accord.utils.IntrusiveLinkedList; +import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.service.accord.AccordCachingState.Status; @@ -454,6 +455,14 @@ void unsafeClear() return last; } + @VisibleForTesting + public void awaitSaveResults() + { + for (AccordCachingState node : this) + if (node.status() == SAVING) + AsyncChains.awaitUninterruptibly(node.saving()); + } + @VisibleForTesting int numReferencedEntries() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java new file mode 100644 index 000000000000..2af9c9472b91 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; + +import accord.local.Node; +import accord.messages.SimpleReply; +import accord.primitives.Ranges; +import accord.utils.Invariants; +import org.agrona.collections.Int2ObjectHashMap; +import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.utils.CollectionSerializers; + +import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; + +/** + * Notifies remote replicas that the local replica has synchronised coordination information for this epoch + */ +public class AccordSyncPropagator +{ + public static final IVerbHandler> verbHandler = message -> AccordService.instance().receive(message); + + interface Listener + { + void onEndpointAck(Node.Id id, long epoch); + void onComplete(long epoch); + } + + private interface ReportPending + { + Notification report(PendingEpoch epoch, T value); + } + + static class PendingEpoch + { + final long epoch; + ImmutableSet syncComplete = ImmutableSet.of(); // TODO (desired): propagate ack's for other nodes + Ranges closed = Ranges.EMPTY, redundant = Ranges.EMPTY; + + PendingEpoch(long epoch) + { + this.epoch = epoch; + } + + Notification syncComplete(Node.Id newSyncComplete) + { + if (syncComplete.contains(newSyncComplete)) + return null; + + syncComplete = ImmutableSet.builder() + .addAll(syncComplete) + .add(newSyncComplete) + .build(); + + return new Notification(epoch, Collections.singleton(newSyncComplete), Ranges.EMPTY, Ranges.EMPTY); + } + + Notification closed(Ranges addClosed) + { + if (closed.containsAll(addClosed)) + return null; + + addClosed = addClosed.subtract(closed); + closed = closed.with(addClosed); + return new Notification(epoch, Collections.emptySet(), addClosed, Ranges.EMPTY); + } + + Notification redundant(Ranges addRedundant) + { + if (redundant.containsAll(addRedundant)) + return null; + + addRedundant = addRedundant.subtract(redundant); + redundant = redundant.with(addRedundant); + return new Notification(epoch, Collections.emptySet(), Ranges.EMPTY, addRedundant); + } + + boolean ack(Notification notification) + { + if (!notification.syncComplete.isEmpty()) + { + if (notification.syncComplete.containsAll(syncComplete)) syncComplete = ImmutableSet.of(); + else syncComplete = ImmutableSet.copyOf(Iterables.filter(syncComplete, v -> !notification.syncComplete.contains(v))); + } + closed = closed.subtract(notification.closed); + redundant = redundant.subtract(notification.redundant); + return syncComplete.isEmpty() && closed.isEmpty() && redundant.isEmpty(); + } + + @Override + public String toString() + { + return "PendingEpoch{" + + "epoch=" + epoch + + ", syncComplete=" + syncComplete + + ", closed=" + closed + + ", redundant=" + redundant + + '}'; + } + } + + static class PendingEpochs extends Long2ObjectHashMap + { + boolean ack(List notifications) + { + for (Notification notification : notifications) + { + PendingEpoch epoch = get(notification.epoch); + if (epoch == null) + continue; + if (epoch.ack(notification)) + remove(notification.epoch); + } + return isEmpty(); + } + } + + static class PendingNodes extends Int2ObjectHashMap + { + boolean ack(Node.Id id, List notifications) + { + PendingEpochs node = get(id.id); + if (node == null) + return true; + + if (!node.ack(notifications)) + return false; + + remove(id.id); + return true; + } + } + + private final PendingNodes pending = new PendingNodes(); + private final Node.Id localId; + private final AccordEndpointMapper endpointMapper; + private final MessageDelivery messagingService; + private final IFailureDetector failureDetector; + private final ScheduledExecutorPlus scheduler; + private final Listener listener; + + public AccordSyncPropagator(Node.Id localId, AccordEndpointMapper endpointMapper, + MessageDelivery messagingService, IFailureDetector failureDetector, ScheduledExecutorPlus scheduler, + Listener listener) + { + this.localId = localId; + this.endpointMapper = endpointMapper; + this.messagingService = messagingService; + this.failureDetector = failureDetector; + this.scheduler = scheduler; + this.listener = listener; + } + + boolean hasPending() + { + return !pending.isEmpty(); + } + + @Override + public String toString() + { + return "AccordSyncPropagator{" + + "localId=" + localId + + ", pending=" + pending + + '}'; + } + + public void reportSyncComplete(long epoch, Collection notify, Node.Id syncCompleteId) + { + if (notify.isEmpty()) + { + listener.onComplete(epoch); + return; + } + report(epoch, notify, PendingEpoch::syncComplete, syncCompleteId); + } + + public void reportClosed(long epoch, Collection notify, Ranges closed) + { + report(epoch, notify, PendingEpoch::closed, closed); + } + + public void reportRedundant(long epoch, Collection notify, Ranges redundant) + { + report(epoch, notify, PendingEpoch::redundant, redundant); + } + + private synchronized void report(long epoch, Collection notify, ReportPending report, T param) + { + // TODO (efficiency, now): for larger clusters this can be a problem as we trigger 1 msg for each instance, so in a 1k cluster its 1k messages; this can cause a thundering herd problem + // this is mostly a problem for reportSyncComplete as we include every node in the cluster, for reportClosed/reportRedundant these tend to use only the nodes that are replicas of the range, + // and there is currently an assumption that sub-ranges are done, so only impacting a handful of nodes. + // TODO (correctness, now): during a host replacement multiple epochs are generated (move the range, remove the node), so its possible that notify will never be able to send the notification as the node is leaving the cluster + notify.forEach(id -> { + PendingEpoch pendingEpoch = pending.computeIfAbsent(id.id, ignore -> new PendingEpochs()) + .computeIfAbsent(epoch, PendingEpoch::new); + Notification notification = report.report(pendingEpoch, param); + if (notification != null) + notify(id, Collections.singletonList(notification)); + }); + } + + private boolean hasSyncCompletedFor(long epoch) + { + return pending.values().stream().noneMatch(node -> { + PendingEpoch pending = node.get(epoch); + if (pending == null) + return false; + return !pending.syncComplete.isEmpty(); + }); + } + + private boolean notify(Node.Id to, List notifications) + { + InetAddressAndPort toEp = endpointMapper.mappedEndpoint(to); + if (!failureDetector.isAlive(toEp)) + { + scheduler.schedule(() -> notify(to, notifications), 1, TimeUnit.MINUTES); + return false; + } + Message> msg = Message.out(Verb.ACCORD_SYNC_NOTIFY_REQ, notifications); + messagingService.sendWithCallback(msg, toEp, new RequestCallback(){ + @Override + public void onResponse(Message msg) + { + Invariants.checkState(msg.payload == SimpleReply.Ok, "Unexpected message: %s", msg); + Set completedEpochs = new HashSet<>(); + // TODO review is it a good idea to call the listener while not holding the `AccordSyncPropagator` lock? + synchronized (AccordSyncPropagator.this) + { + pending.ack(to, notifications); + for (Notification notification : notifications) + { + long epoch = notification.epoch; + if (notification.syncComplete.contains(localId)) + { + if (hasSyncCompletedFor(epoch)) + completedEpochs.add(epoch); + } + } + } + for (Notification notification : notifications) + { + long epoch = notification.epoch; + listener.onEndpointAck(to, epoch); + if (completedEpochs.contains(epoch)) + listener.onComplete(epoch); + } + } + + @Override + public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + { + scheduler.schedule(() -> AccordSyncPropagator.this.notify(to, notifications), 1, TimeUnit.MINUTES); + } + + @Override + public boolean invokeOnFailure() + { + return true; + } + }); + return true; + } + + public static class Notification + { + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Notification notification, DataOutputPlus out, int version) throws IOException + { + out.writeLong(notification.epoch); + CollectionSerializers.serializeCollection(notification.syncComplete, out, version, TopologySerializers.nodeId); + KeySerializers.ranges.serialize(notification.closed, out, version); + KeySerializers.ranges.serialize(notification.redundant, out, version); + } + + @Override + public Notification deserialize(DataInputPlus in, int version) throws IOException + { + return new Notification(in.readLong(), + CollectionSerializers.deserializeList(in, version, TopologySerializers.nodeId), + KeySerializers.ranges.deserialize(in, version), + KeySerializers.ranges.deserialize(in, version)); + } + + @Override + public long serializedSize(Notification notification, int version) + { + return TypeSizes.LONG_SIZE + + CollectionSerializers.serializedCollectionSize(notification.syncComplete, version, TopologySerializers.nodeId) + + KeySerializers.ranges.serializedSize(notification.closed, version) + + KeySerializers.ranges.serializedSize(notification.redundant, version); + } + }; + public static final IVersionedSerializer> listSerializer = newListSerializer(serializer); + + final long epoch; + final Collection syncComplete; + final Ranges closed, redundant; + + public Notification(long epoch, Collection syncComplete, Ranges closed, Ranges redundant) + { + this.epoch = epoch; + this.syncComplete = syncComplete; + this.closed = closed; + this.redundant = redundant; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 3eb3bd08324c..64ac67fb4a4f 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -28,10 +28,12 @@ import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.function.BiFunction; import java.util.function.Function; import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.AbstractIterator; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; @@ -96,7 +98,7 @@ public boolean equalsDeep(RangeCommandSummary other) { return Objects.equals(txnId, other.txnId) && Objects.equals(ranges, other.ranges) - && Objects.equals(status, other.status) + && status == other.status && Objects.equals(executeAt, other.executeAt) && Objects.equals(deps, other.deps); } @@ -190,21 +192,25 @@ public TxnType type(TxnId txnId) public T put(TxnId txnId, Ranges ranges, SaveStatus status, Timestamp execteAt, List dependsOn) { remove(txnId); - RangeCommandSummary summary = new RangeCommandSummary(txnId, ranges, status, execteAt, dependsOn); + put(new RangeCommandSummary(txnId, ranges, status, execteAt, dependsOn)); + //noinspection unchecked + return (T) this; + } + + private void put(RangeCommandSummary summary) + { + TxnId txnId = summary.txnId; localTxns.add(txnId); txnToRange.put(txnId, summary); addRanges(summary); - return (T) this; } private void addRanges(RangeCommandSummary summary) { for (Range range : summary.ranges) - { rangeToTxn.add(Interval.create(normalize(range.start(), range.startInclusive(), true), normalize(range.end(), range.endInclusive(), false), summary)); - } } public T putAll(CommandsForRanges other) @@ -223,6 +229,7 @@ public T putAll(CommandsForRanges other) // empty list (aka no-op) rangeToTxn.removeIf(data -> other.commandsToRanges.containsKey(data.txnId)); rangeToTxn.addAll(other.rangesToCommands); + //noinspection unchecked return (T) this; } @@ -239,18 +246,15 @@ public T mergeRemote(TxnId txnId, Ranges ranges, BiFunction data.txnId.equals(txnId)); addRanges(newValue); } + //noinspection unchecked return (T) this; } @@ -262,6 +266,23 @@ public T remove(TxnId txnId) txnToRange.remove(txnId); rangeToTxn.removeIf(data -> data.txnId.equals(txnId)); } + //noinspection unchecked + return (T) this; + } + + public T map(Function mapper) + { + for (TxnId id : new TreeSet<>(txnToRange.keySet())) + { + RangeCommandSummary summary = txnToRange.get(id); + RangeCommandSummary update = mapper.apply(summary); + if (summary.equals(update)) + continue; + remove(summary.txnId); + if (update != null) + put(update); + } + //noinspection unchecked return (T) this; } } @@ -336,6 +357,8 @@ public String toString() private ImmutableSet localCommands; private ImmutableSortedMap commandsToRanges; private IntervalTree> rangesToCommands; + @Nullable + private Timestamp maxRedundant; public CommandsForRanges() { @@ -357,12 +380,29 @@ public TxnType type(TxnId txnId) return localCommands.contains(txnId) ? TxnType.LOCAL : TxnType.REMOTE; } + @VisibleForTesting + Set knownIds() + { + return commandsToRanges.keySet(); + } + + @VisibleForTesting + IntervalTree> tree() + { + return rangesToCommands; + } + + public @Nullable Timestamp maxRedundant() + { + return maxRedundant; + } + public boolean containsLocally(TxnId txnId) { return localCommands.contains(txnId); } - public Iterable search(AbstractKeys keys) + public Iterable search(AbstractKeys keys) { // group by the keyspace, as ranges are based off TokenKey, which is scoped to a range Map> groupByKeyspace = new TreeMap<>(); @@ -448,17 +488,21 @@ public String toString() private static RoutingKey normalize(RoutingKey key, boolean inclusive, boolean upOrDown) { - if (inclusive) return key; - AccordRoutingKey ak = (AccordRoutingKey) key; - switch (ak.kindOfRoutingKey()) - { - case SENTINEL: - return normalize(ak.asSentinelKey().toTokenKey(), inclusive, upOrDown); - case TOKEN: - TokenKey tk = ak.asTokenKey(); - return tk.withToken(upOrDown ? tk.token().nextValidToken() : tk.token().decreaseSlightly()); - default: - throw new IllegalArgumentException("Unknown kind: " + ak.kindOfRoutingKey()); + while (true) + { + if (inclusive) return key; + AccordRoutingKey ak = (AccordRoutingKey) key; + switch (ak.kindOfRoutingKey()) + { + case SENTINEL: + key = ak.asSentinelKey().toTokenKey(); + continue; + case TOKEN: + TokenKey tk = ak.asTokenKey(); + return tk.withToken(upOrDown ? tk.token().nextValidToken() : tk.token().decreaseSlightly()); + default: + throw new IllegalArgumentException("Unknown kind: " + ak.kindOfRoutingKey()); + } } } @@ -512,4 +556,26 @@ public String toString() '}'; } } + + public void prune(TxnId pruneBefore, Ranges pruneRanges) + { + class MaxErased { Timestamp v; } + MaxErased maxErased = new MaxErased(); + Updater update = update(); + update.map(summary -> { + if (summary.txnId.compareTo(pruneBefore) >= 0) + return summary; + + Ranges newRanges = summary.ranges.subtract(pruneRanges); + if (newRanges == summary.ranges || newRanges.equals(summary.ranges)) + return summary; + + maxErased.v = Timestamp.nonNullOrMax(maxErased.v, summary.executeAt); + if (newRanges.isEmpty()) + return null; + return new RangeCommandSummary(summary.txnId, newRanges, summary.status, summary.executeAt, summary.deps); + }).apply(); + maxRedundant = Timestamp.nonNullOrMax(maxRedundant, maxErased.v); + } + } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 8ba98e919d05..5ca68d10fccf 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -18,19 +18,25 @@ package org.apache.cassandra.service.accord; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import accord.local.DurableBefore; +import accord.local.RedundantBefore; import accord.messages.Request; import accord.primitives.Txn; import accord.topology.TopologyManager; +import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - public interface IAccordService { IVerbHandler verbHandler(); @@ -47,13 +53,15 @@ public interface IAccordService void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException; + AccordScheduler scheduler(); + /** * Return a future that will complete once the accord has completed it's local bootstrap process * for any ranges gained in the given epoch */ Future epochReady(Epoch epoch); - void remoteSyncComplete(Message message); + void receive(Message> message); /** * Temporary method to avoid double-streaming keyspaces @@ -61,4 +69,9 @@ public interface IAccordService * @return */ boolean isAccordManagedKeyspace(String keyspace); + + /** + * Fetch the redundnant befores for every command store + */ + Pair, DurableBefore> getRedundantBeforesAndDurableBefore(); } diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index 3c8e0e76ea6b..613c30f97f13 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -20,6 +20,8 @@ import java.io.IOException; +import com.google.common.annotations.VisibleForTesting; + import accord.api.RoutingKey; import accord.primitives.Range; import accord.primitives.Ranges; @@ -48,6 +50,12 @@ public String keyspace() return ((AccordRoutingKey) start()).keyspace(); } + @VisibleForTesting + public Range withKeyspace(String ks) + { + return new TokenRange(((AccordRoutingKey) start()).withKeyspace(ks), ((AccordRoutingKey) end()).withKeyspace(ks)); + } + public static TokenRange fullRange(String keyspace) { return new TokenRange(SentinelKey.min(keyspace), SentinelKey.max(keyspace)); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 5154ca7d80fe..3bfaed2d5082 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -18,6 +18,11 @@ package org.apache.cassandra.service.accord.api; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.api.Agent; import accord.api.Result; import accord.local.Command; @@ -27,15 +32,28 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.JVMStabilityInspector; import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; +// TODO (expected): merge with AccordService public class AccordAgent implements Agent { + private static final Logger logger = LoggerFactory.getLogger(AccordAgent.class); + + // TODO (required): this should be configurable and have exponential back-off, escaping to operator input past a certain number of retries + private long retryBootstrapDelayMicros = SECONDS.toMicros(1L); + + public void setRetryBootstrapDelay(long delay, TimeUnit units) + { + retryBootstrapDelayMicros = units.toMicros(delay); + } + @Override public void onRecover(Node node, Result success, Throwable fail) { @@ -54,7 +72,8 @@ public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp n @Override public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throwable failure) { - + logger.error("Failed bootstrap at {} for {}", phase, ranges, failure); + AccordService.instance().scheduler().once(retry, retryBootstrapDelayMicros, MICROSECONDS); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java index d19f832ace16..6ce68db83820 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -37,6 +37,18 @@ protected AccordRoutableKey(String keyspace) public final String keyspace() { return keyspace; } public abstract Token token(); + @Override + public Object prefix() + { + return keyspace; + } + + @Override + public String toString() + { + return prefix() + ":" + suffix(); + } + @Override public int hashCode() { diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index e7e65ee36594..b18dcafecae3 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -35,7 +36,9 @@ import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.utils.ByteBufferUtil; @@ -57,6 +60,7 @@ protected AccordRoutingKey(String keyspace) public abstract RoutingKeyKind kindOfRoutingKey(); public abstract long estimatedSizeOnHeap(); + public abstract AccordRoutingKey withKeyspace(String ks); public SentinelKey asSentinelKey() { @@ -104,6 +108,12 @@ public long estimatedSizeOnHeap() return EMPTY_SIZE; } + @Override + public AccordRoutingKey withKeyspace(String ks) + { + return new SentinelKey(ks, isMin); + } + public static SentinelKey min(String keyspace) { return new SentinelKey(keyspace, true); @@ -134,12 +144,9 @@ int asInt() } @Override - public String toString() + public String suffix() { - return "SentinelKey{" + - "keyspace=" + keyspace + - ", key=" + (isMin ? "min": "max") + - '}'; + return isMin ? "-Inf" : "+Inf"; } public static final IVersionedSerializer serializer = new IVersionedSerializer() @@ -219,12 +226,9 @@ public RoutingKeyKind kindOfRoutingKey() } @Override - public String toString() + public String suffix() { - return "TokenKey{" + - "keyspace=" + keyspace() + - ", key=" + token() + - '}'; + return token.toString(); } public long estimatedSizeOnHeap() @@ -232,6 +236,12 @@ public long estimatedSizeOnHeap() return EMPTY_SIZE + token().getHeapSize(); } + @Override + public AccordRoutingKey withKeyspace(String ks) + { + return new TokenKey(ks, token); + } + public static final Serializer serializer = new Serializer(); public static class Serializer implements IVersionedSerializer { @@ -260,9 +270,10 @@ public long serializedSize(TokenKey key, int version) } } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static class Serializer implements IVersionedSerializer { - final RoutingKeyKind[] kinds = RoutingKeyKind.values(); + static final RoutingKeyKind[] kinds = RoutingKeyKind.values(); + @Override public void serialize(AccordRoutingKey key, DataOutputPlus out, int version) throws IOException { @@ -280,6 +291,37 @@ public void serialize(AccordRoutingKey key, DataOutputPlus out, int version) thr } } + public ByteBuffer serialize(AccordRoutingKey key) + { + try (DataOutputBuffer buffer = new DataOutputBuffer((int)serializedSize(key, 0))) + { + try + { + serialize(key, buffer, 0); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + return buffer.asNewBuffer(); + } + } + + public AccordRoutingKey deserialize(ByteBuffer buffer) + { + try (DataInputBuffer in = new DataInputBuffer(buffer, true)) + { + try + { + return deserialize(in, 0); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + } + @Override public AccordRoutingKey deserialize(DataInputPlus in, int version) throws IOException { @@ -312,7 +354,10 @@ public long serializedSize(AccordRoutingKey key, int version) } return size; } - }; + + } + + public static final Serializer serializer = new Serializer(); public static class KeyspaceSplitter implements ShardDistributor { @@ -345,5 +390,11 @@ public List split(Ranges ranges) } return results; } + + @Override + public Range splitRange(Range range, int from, int to, int numSplits) + { + return subSplitter.splitRange(range, from, to, numSplits); + } } } diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index 13e54398029e..2c4e58ee8302 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -105,12 +105,9 @@ public long estimatedSizeOnHeap() } @Override - public String toString() + public String suffix() { - return "PartitionKey{" + - "tableId=" + tableId() + - ", key=" + partitionKey() + - '}'; + return partitionKey().toString(); } // TODO: callers to this method are not correctly handling ranges diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index b5eeaa3f3857..5682df2e3eca 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -187,6 +187,8 @@ private void fail(Throwable throwable) case INITIALIZED: break; // nothing to clean up, call callback } + if (commandStore.hasSafeStore()) + commandStore.agent().onUncaughtException(new IllegalStateException(String.format("Failure to cleanup safe store for %s; status=%s", this, state), throwable)); } catch (Throwable cleanup) { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 8a1b22b6a339..99c8410403ef 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -23,11 +23,11 @@ import accord.messages.Apply; import accord.primitives.PartialRoute; import accord.primitives.TxnId; -import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.NullableSerializer; public class ApplySerializers { @@ -36,10 +36,10 @@ public class ApplySerializers @Override public void serializeBody(Apply apply, DataOutputPlus out, int version) throws IOException { - out.writeUnsignedVInt(apply.untilEpoch); KeySerializers.seekables.serialize(apply.keys(), out, version); CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); + NullableSerializer.serializeNullable(apply.txn, out, version, CommandSerializers.partialTxn); CommandSerializers.writes.serialize(apply.writes, out, version); TxnData.serializer.serialize((TxnData) apply.result, out, version); } @@ -47,10 +47,11 @@ public void serializeBody(Apply apply, DataOutputPlus out, int version) throws I @Override public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { - return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, in.readUnsignedVInt(), + return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, KeySerializers.seekables.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), + NullableSerializer.deserializeNullable(in, version, CommandSerializers.partialTxn), CommandSerializers.writes.deserialize(in, version), TxnData.serializer.deserialize(in, version)); } @@ -58,10 +59,10 @@ public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, Partial @Override public long serializedBodySize(Apply apply, int version) { - return TypeSizes.sizeofUnsignedVInt(apply.untilEpoch) - + KeySerializers.seekables.serializedSize(apply.keys(), version) + return KeySerializers.seekables.serializedSize(apply.keys(), version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + + NullableSerializer.serializedNullableSize(apply.txn, version, CommandSerializers.partialTxn) + CommandSerializers.writes.serializedSize(apply.writes, version) + TxnData.serializer.serializedSize((TxnData) apply.result, version); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 803a25b9c27d..37d42395e72f 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -23,6 +23,7 @@ import accord.api.Result; import accord.api.RoutingKey; import accord.local.SaveStatus; +import accord.local.Status; import accord.local.Status.Durability; import accord.messages.CheckStatus; import accord.messages.CheckStatus.CheckStatusNack; @@ -32,6 +33,7 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Ranges; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -59,8 +61,7 @@ public void serialize(CheckStatus check, DataOutputPlus out, int version) throws { CommandSerializers.txnId.serialize(check.txnId, out, version); KeySerializers.unseekables.serialize(check.query, out, version); - out.writeUnsignedVInt(check.startEpoch); - out.writeUnsignedVInt(check.endEpoch - check.startEpoch); + out.writeUnsignedVInt(check.sourceEpoch); out.writeByte(check.includeInfo.ordinal()); } @@ -68,11 +69,10 @@ public void serialize(CheckStatus check, DataOutputPlus out, int version) throws public CheckStatus deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Unseekables query = KeySerializers.unseekables.deserialize(in, version); - long startEpoch = in.readUnsignedVInt(); - long endEpoch = in.readUnsignedVInt() + startEpoch; + Unseekables query = KeySerializers.unseekables.deserialize(in, version); + long sourceEpoch = in.readUnsignedVInt(); CheckStatus.IncludeInfo info = infos[in.readByte()]; - return new CheckStatus(txnId, query, startEpoch, endEpoch, info); + return new CheckStatus(txnId, query, sourceEpoch, info); } @Override @@ -80,8 +80,7 @@ public long serializedSize(CheckStatus check, int version) { return CommandSerializers.txnId.serializedSize(check.txnId, version) + KeySerializers.unseekables.serializedSize(check.query, version) - + TypeSizes.sizeofUnsignedVInt(check.startEpoch) - + TypeSizes.sizeofUnsignedVInt(check.endEpoch - check.startEpoch) + + TypeSizes.sizeofUnsignedVInt(check.sourceEpoch) + TypeSizes.BYTE_SIZE; } }; @@ -103,7 +102,10 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CheckStatusOk ok = (CheckStatusOk) reply; out.write(reply instanceof CheckStatusOkFull ? FULL : OK); + KeySerializers.ranges.serialize(ok.truncated, out, version); + CommandSerializers.status.serialize(ok.invalidIfNotAtLeast, out, version); CommandSerializers.saveStatus.serialize(ok.saveStatus, out, version); + CommandSerializers.saveStatus.serialize(ok.maxSaveStatus, out, version); CommandSerializers.ballot.serialize(ok.promised, out, version); CommandSerializers.ballot.serialize(ok.accepted, out, version); serializeNullable(ok.executeAt, out, version, CommandSerializers.timestamp); @@ -130,10 +132,13 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce { default: throw new IOException("Unhandled CheckStatusReply kind: " + Integer.toHexString(Byte.toUnsignedInt(kind))); case NACK: - return CheckStatusNack.nack(); + return CheckStatusNack.NotOwned; case OK: case FULL: + Ranges truncated = KeySerializers.ranges.deserialize(in, version); + Status invalidIfNotAtLeast = CommandSerializers.status.deserialize(in, version); SaveStatus status = CommandSerializers.saveStatus.deserialize(in, version); + SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); Ballot promised = CommandSerializers.ballot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); Timestamp executeAt = deserializeNullable(in, version, CommandSerializers.timestamp); @@ -143,14 +148,15 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce RoutingKey homeKey = deserializeNullable(in, version, KeySerializers.routingKey); if (kind == OK) - return createOk(status, promised, accepted, executeAt, isCoordinating, durability, route, homeKey); + return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, + isCoordinating, durability, route, homeKey); PartialTxn partialTxn = deserializeNullable(in, version, CommandSerializers.partialTxn); PartialDeps committedDeps = deserializeNullable(in, version, DepsSerializer.partialDeps); Writes writes = deserializeNullable(in, version, CommandSerializers.writes); Result result = deserializeNullable(in, version, TxnData.serializer); - return createOk(status, promised, accepted, executeAt, isCoordinating, durability, route, homeKey, - partialTxn, committedDeps, writes, result); + return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, + isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); } } @@ -162,6 +168,9 @@ public long serializedSize(CheckStatusReply reply, int version) return size; CheckStatusOk ok = (CheckStatusOk) reply; + size += KeySerializers.ranges.serializedSize(ok.truncated, version); + size += CommandSerializers.status.serializedSize(ok.invalidIfNotAtLeast, version); + size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); size += CommandSerializers.ballot.serializedSize(ok.promised, version); size += CommandSerializers.ballot.serializedSize(ok.accepted, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 4b20e9e7eaa1..497fa11a2937 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -47,6 +47,7 @@ import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.CastingSerializer; +import org.apache.cassandra.utils.NullableSerializer; public class CommandSerializers { @@ -54,6 +55,7 @@ private CommandSerializers() {} public static final TimestampSerializer txnId = new TimestampSerializer<>(TxnId::fromBits); public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); + public static final IVersionedSerializer nullableTimestamp = NullableSerializer.wrap(timestamp); public static final TimestampSerializer ballot = new TimestampSerializer<>(Ballot::fromBits); public static final EnumSerializer kind = new EnumSerializer<>(Txn.Kind.class); @@ -181,9 +183,9 @@ public long serializedSize(PartialTxn txn, int version) public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); - public static final IVersionedSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); - public static final IVersionedSerializer status = new EnumSerializer<>(Status.class); - public static final IVersionedSerializer durability = new EnumSerializer<>(Durability.class); + public static final EnumSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); + public static final EnumSerializer status = new EnumSerializer<>(Status.class); + public static final EnumSerializer durability = new EnumSerializer<>(Durability.class); public static final IVersionedSerializer writes = new IVersionedSerializer() { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java index 4cd322435c59..1dd60db79677 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -21,25 +21,44 @@ import java.io.IOException; import java.util.NavigableMap; import java.util.TreeMap; +import java.util.function.IntFunction; import accord.api.RoutingKey; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.Invariants; import accord.utils.ReducingRangeMap; +import accord.utils.TriFunction; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.utils.CollectionSerializers; +import org.apache.cassandra.utils.NullableSerializer; public class CommandStoreSerializers { private CommandStoreSerializers() {} - public static IVersionedSerializer> rejectBefore = new IVersionedSerializer>() + public static class ReducingRangeMapSerializer> implements IVersionedSerializer { - public void serialize(ReducingRangeMap map, DataOutputPlus out, int version) throws IOException + final IVersionedSerializer valueSerializer; + final IntFunction newValueArray; + final TriFunction constructor; + + public ReducingRangeMapSerializer(IVersionedSerializer valueSerializer, IntFunction newValueArray, TriFunction constructor) + { + this.valueSerializer = valueSerializer; + this.newValueArray = newValueArray; + this.constructor = constructor; + } + + public void serialize(R map, DataOutputPlus out, int version) throws IOException { out.writeBoolean(map.inclusiveEnds()); int size = map.size(); @@ -47,42 +66,107 @@ public void serialize(ReducingRangeMap map, DataOutputPlus out, int v for (int i=0; i deserialize(DataInputPlus in, int version) throws IOException + public R deserialize(DataInputPlus in, int version) throws IOException { boolean inclusiveEnds = in.readBoolean(); int size = in.readUnsignedVInt32(); - RoutingKey[] keys = new RoutingKey[size]; - Timestamp[] values = new Timestamp[size + 1]; + RoutingKey[] keys = new RoutingKey[size + 1]; + T[] values = newValueArray.apply(size); for (int i=0; i map, int version) + public long serializedSize(R map, int version) { long size = TypeSizes.BOOL_SIZE; size += TypeSizes.sizeofUnsignedVInt(size); int mapSize = map.size(); for (int i=0; i> rejectBefore = new ReducingRangeMapSerializer<>(CommandSerializers.nullableTimestamp, Timestamp[]::new, ReducingRangeMap.SerializerSupport::create); + public static IVersionedSerializer durableBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(new IVersionedSerializer() + { + @Override + public void serialize(DurableBefore.Entry t, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(t.majorityBefore, out, version); + CommandSerializers.txnId.serialize(t.universalBefore, out, version); + } + + @Override + public DurableBefore.Entry deserialize(DataInputPlus in, int version) throws IOException + { + TxnId majorityBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId universalBefore = CommandSerializers.txnId.deserialize(in, version); + return new DurableBefore.Entry(majorityBefore, universalBefore); + } + @Override + public long serializedSize(DurableBefore.Entry t, int version) + { + return CommandSerializers.txnId.serializedSize(t.majorityBefore, version) + + CommandSerializers.txnId.serializedSize(t.universalBefore, version); + } + }), DurableBefore.Entry[]::new, DurableBefore.SerializerSupport::create); + + public static IVersionedSerializer redundantBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(new IVersionedSerializer() + { + @Override + public void serialize(RedundantBefore.Entry t, DataOutputPlus out, int version) throws IOException + { + TokenRange.serializer.serialize((TokenRange) t.range, out, version); + Invariants.checkState(t.startEpoch <= t.endEpoch); + out.writeUnsignedVInt(t.startEpoch); + if (t.endEpoch == Long.MAX_VALUE) out.writeUnsignedVInt(0L); + else out.writeUnsignedVInt(1 + t.endEpoch - t.startEpoch); + CommandSerializers.txnId.serialize(t.redundantBefore, out, version); + CommandSerializers.txnId.serialize(t.bootstrappedAt, out, version); + } + + @Override + public RedundantBefore.Entry deserialize(DataInputPlus in, int version) throws IOException + { + Range range = TokenRange.serializer.deserialize(in, version); + long startEpoch = in.readUnsignedVInt(); + long endEpoch = in.readUnsignedVInt(); + if (endEpoch == 0) endEpoch = Long.MAX_VALUE; + else endEpoch = startEpoch + 1 + endEpoch; + TxnId redundantBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId bootstrappedAt = CommandSerializers.txnId.deserialize(in, version); + return new RedundantBefore.Entry(range, startEpoch, endEpoch, redundantBefore, bootstrappedAt); + } + + @Override + public long serializedSize(RedundantBefore.Entry t, int version) + { + long size = TokenRange.serializer.serializedSize((TokenRange) t.range, version); + size += TypeSizes.sizeofUnsignedVInt(t.startEpoch); + size += TypeSizes.sizeofUnsignedVInt(t.endEpoch == Long.MAX_VALUE ? 0 : 1 + t.endEpoch - t.startEpoch); + size += CommandSerializers.txnId.serializedSize(t.redundantBefore, version); + size += CommandSerializers.txnId.serializedSize(t.bootstrappedAt, version); return size; } - }; + }), RedundantBefore.Entry[]::new, RedundantBefore.SerializerSupport::create); private static class TimestampToRangesSerializer implements IVersionedSerializer> { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index 6b349e7ee91e..a57f684b874d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -85,7 +85,7 @@ public void serialize(Commit.Invalidate invalidate, DataOutputPlus out, int vers public Commit.Invalidate deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Unseekables scope = KeySerializers.unseekables.deserialize(in, version); + Unseekables scope = KeySerializers.unseekables.deserialize(in, version); long waitForEpoch = in.readUnsignedVInt(); long invalidateUntilEpoch = in.readUnsignedVInt() + waitForEpoch; return Commit.Invalidate.SerializerSupport.create(txnId, scope, waitForEpoch, invalidateUntilEpoch); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java index 1bad94da824b..dc8ea5a3f9dc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/EnumSerializer.java @@ -19,12 +19,14 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.nio.ByteBuffer; import accord.messages.SimpleReply; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.vint.VIntCoding; public class EnumSerializer> implements IVersionedSerializer { @@ -38,6 +40,11 @@ public EnumSerializer(Class clazz) this.values = clazz.getEnumConstants(); } + public E forOrdinal(int ordinal) + { + return values[ordinal]; + } + @Override public void serialize(E t, DataOutputPlus out, int version) throws IOException { @@ -50,6 +57,15 @@ public E deserialize(DataInputPlus in, int version) throws IOException return values[in.readUnsignedVInt32()]; } + public ByteBuffer serialize(E e) + { + int len = TypeSizes.sizeofUnsignedVInt(e.ordinal()); + ByteBuffer out = ByteBuffer.allocate(len); + VIntCoding.writeUnsignedVInt32(e.ordinal(), out); + out.flip(); + return out; + } + @Override public long serializedSize(E t, int version) { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index e76df16335d1..f6fccf5af350 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -23,12 +23,9 @@ import accord.api.Data; import accord.impl.AbstractFetchCoordinator.FetchRequest; import accord.impl.AbstractFetchCoordinator.FetchResponse; -import accord.local.Status; import accord.messages.ReadData; import accord.messages.ReadData.ReadReply; import accord.primitives.Ranges; -import accord.primitives.Timestamp; -import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -49,16 +46,14 @@ public class FetchSerializers @Override public void serialize(FetchRequest request, DataOutputPlus out, int version) throws IOException { - Invariants.checkArgument(request.txnId.equals(TxnId.NONE)); - Invariants.checkArgument(request.waitForStatus == Status.Applied); - Invariants.checkArgument(request.waitUntil.equals(Timestamp.MAX)); + Invariants.checkArgument(request.txnId.epoch() == request.executeAt.epoch()); out.writeUnsignedVInt(request.waitForEpoch()); - CommandSerializers.txnId.serialize((TxnId) request.executeReadAt, out, version); + CommandSerializers.txnId.serialize(request.txnId, out, version); KeySerializers.ranges.serialize((Ranges) request.readScope, out, version); - DepsSerializer.partialDeps.serialize(request.partialDeps, out, version); StreamingTxn.serializer.serialize(request.read, out, version); + out.writeBoolean(request.collectMaxApplied); } @Override @@ -68,17 +63,19 @@ public FetchRequest deserialize(DataInputPlus in, int version) throws IOExceptio CommandSerializers.txnId.deserialize(in, version), KeySerializers.ranges.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), - StreamingTxn.serializer.deserialize(in, version)); + StreamingTxn.serializer.deserialize(in, version), + in.readBoolean()); } @Override public long serializedSize(FetchRequest request, int version) { return TypeSizes.sizeofUnsignedVInt(request.waitForEpoch()) - + CommandSerializers.txnId.serializedSize((TxnId) request.executeReadAt, version) + + CommandSerializers.txnId.serializedSize(request.txnId, version) + KeySerializers.ranges.serializedSize((Ranges) request.readScope, version) + DepsSerializer.partialDeps.serializedSize(request.partialDeps, version) - + StreamingTxn.serializer.serializedSize(request.read, version); + + StreamingTxn.serializer.serializedSize(request.read, version) + + TypeSizes.BYTE_SIZE; } }; @@ -100,7 +97,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I FetchResponse response = (FetchResponse) reply; serializeNullable(response.unavailable, out, version, KeySerializers.ranges); serializeNullable(response.data, out, version, streamDataSerializer); - CommandSerializers.timestamp.serialize(response.maxApplied, out, version); + serializeNullable(response.maxApplied, out, version, CommandSerializers.timestamp); } @Override @@ -112,7 +109,7 @@ public ReadReply deserialize(DataInputPlus in, int version) throws IOException return new FetchResponse(deserializeNullable(in, version, KeySerializers.ranges), deserializeNullable(in, version, streamDataSerializer), - CommandSerializers.timestamp.deserialize(in, version)); + deserializeNullable(in, version, CommandSerializers.timestamp)); } @Override @@ -125,7 +122,7 @@ public long serializedSize(ReadReply reply, int version) return TypeSizes.BYTE_SIZE + serializedNullableSize(response.unavailable, version, KeySerializers.ranges) + serializedNullableSize(response.data, version, streamDataSerializer) - + CommandSerializers.timestamp.serializedSize(response.maxApplied, version); + + serializedNullableSize(response.maxApplied, version, CommandSerializers.timestamp); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java index 38e88ea15768..c6a349028b2b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java @@ -37,7 +37,7 @@ public class InformHomeDurableSerializers public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(inform.txnId, out, version); - KeySerializers.routingKey.serialize(inform.homeKey, out, version); + KeySerializers.route.serialize(inform.route, out, version); CommandSerializers.timestamp.serialize(inform.executeAt, out, version); CommandSerializers.durability.serialize(inform.durability, out, version); serializeCollection(inform.persistedOn, out, version, TopologySerializers.nodeId); @@ -48,7 +48,7 @@ public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) public InformHomeDurable deserialize(DataInputPlus in, int version) throws IOException { return new InformHomeDurable(CommandSerializers.txnId.deserialize(in, version), - KeySerializers.routingKey.deserialize(in, version), + KeySerializers.route.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), CommandSerializers.durability.deserialize(in, version), deserializeSet(in, version, TopologySerializers.nodeId)); @@ -58,7 +58,7 @@ public InformHomeDurable deserialize(DataInputPlus in, int version) throws IOExc public long serializedSize(InformHomeDurable inform, int version) { return CommandSerializers.txnId.serializedSize(inform.txnId, version) - + KeySerializers.routingKey.serializedSize(inform.homeKey, version) + + KeySerializers.route.serializedSize(inform.route, version) + CommandSerializers.timestamp.serializedSize(inform.executeAt, version) + CommandSerializers.durability.serializedSize(inform.durability, version) + serializedCollectionSize(inform.persistedOn, version, TopologySerializers.nodeId); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java index e773a405e280..c6f2098a16b1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java @@ -33,21 +33,21 @@ public class InformOfTxnIdSerializers public void serialize(InformOfTxnId inform, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(inform.txnId, out, version); - KeySerializers.routingKey.serialize(inform.homeKey, out, version); + KeySerializers.route.serialize(inform.someRoute, out, version); } @Override public InformOfTxnId deserialize(DataInputPlus in, int version) throws IOException { return new InformOfTxnId(CommandSerializers.txnId.deserialize(in, version), - KeySerializers.routingKey.deserialize(in, version)); + KeySerializers.route.deserialize(in, version)); } @Override public long serializedSize(InformOfTxnId inform, int version) { return CommandSerializers.txnId.serializedSize(inform.txnId, version) - + KeySerializers.routingKey.serializedSize(inform.homeKey, version); + + KeySerializers.route.serializedSize(inform.someRoute, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 078051bfe121..9fb95cedf2a0 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -19,7 +19,10 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.EnumSet; +import java.util.Map; +import java.util.TreeMap; import java.util.function.IntFunction; import accord.api.Key; @@ -33,6 +36,7 @@ import accord.primitives.PartialKeyRoute; import accord.primitives.PartialRangeRoute; import accord.primitives.PartialRoute; +import accord.primitives.Participants; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.RoutableKey; @@ -87,15 +91,17 @@ public Ranges deserialize(DataInputPlus in, int version, Range[] ranges) { Ranges covering = ranges.deserialize(in, version); RoutingKey homeKey = routingKey.deserialize(in, version); - return PartialKeyRoute.SerializationSupport.create(covering, homeKey, keys); + boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; + return PartialKeyRoute.SerializationSupport.create(covering, homeKey, isParticipatingHomeKey, keys); } @Override - public void serialize(PartialKeyRoute keys, DataOutputPlus out, int version) throws IOException + public void serialize(PartialKeyRoute route, DataOutputPlus out, int version) throws IOException { - super.serialize(keys, out, version); - ranges.serialize(keys.covering, out, version); - routingKey.serialize(keys.homeKey, out, version); + super.serialize(route, out, version); + ranges.serialize(route.covering, out, version); + routingKey.serialize(route.homeKey, out, version); + out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override @@ -103,7 +109,8 @@ public long serializedSize(PartialKeyRoute keys, int version) { return super.serializedSize(keys, version) + ranges.serializedSize(keys.covering, version) - + routingKey.serializedSize(keys.homeKey, version); + + routingKey.serializedSize(keys.homeKey, version) + + 1; } }; @@ -112,21 +119,24 @@ public long serializedSize(PartialKeyRoute keys, int version) @Override FullKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException { RoutingKey homeKey = routingKey.deserialize(in, version); - return FullKeyRoute.SerializationSupport.create(homeKey, keys); + boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; + return FullKeyRoute.SerializationSupport.create(homeKey, isParticipatingHomeKey, keys); } @Override - public void serialize(FullKeyRoute keys, DataOutputPlus out, int version) throws IOException + public void serialize(FullKeyRoute route, DataOutputPlus out, int version) throws IOException { - super.serialize(keys, out, version); - routingKey.serialize(keys.homeKey, out, version); + super.serialize(route, out, version); + routingKey.serialize(route.homeKey, out, version); + out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override - public long serializedSize(FullKeyRoute keys, int version) + public long serializedSize(FullKeyRoute route, int version) { - return super.serializedSize(keys, version) - + routingKey.serializedSize(keys.homeKey, version); + return super.serializedSize(route, version) + + routingKey.serializedSize(route.homeKey, version) + + 1; } }; @@ -136,15 +146,17 @@ public long serializedSize(FullKeyRoute keys, int version) { Ranges covering = ranges.deserialize(in, version); RoutingKey homeKey = routingKey.deserialize(in, version); - return PartialRangeRoute.SerializationSupport.create(covering, homeKey, rs); + boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; + return PartialRangeRoute.SerializationSupport.create(covering, homeKey, isParticipatingHomeKey, rs); } @Override - public void serialize(PartialRangeRoute rs, DataOutputPlus out, int version) throws IOException + public void serialize(PartialRangeRoute route, DataOutputPlus out, int version) throws IOException { - super.serialize(rs, out, version); - ranges.serialize(rs.covering, out, version); - routingKey.serialize(rs.homeKey, out, version); + super.serialize(route, out, version); + ranges.serialize(route.covering, out, version); + routingKey.serialize(route.homeKey, out, version); + out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override @@ -152,7 +164,9 @@ public long serializedSize(PartialRangeRoute rs, int version) { return super.serializedSize(rs, version) + ranges.serializedSize(rs.covering, version) - + routingKey.serializedSize(rs.homeKey, version); + + routingKey.serializedSize(rs.homeKey, version) + + 1; + } }; @@ -160,22 +174,25 @@ public long serializedSize(PartialRangeRoute rs, int version) { @Override FullRangeRoute deserialize(DataInputPlus in, int version, Range[] Ranges) throws IOException { - RoutingKey homeRange = routingKey.deserialize(in, version); - return FullRangeRoute.SerializationSupport.create(homeRange, Ranges); + RoutingKey homeKey = routingKey.deserialize(in, version); + boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; + return FullRangeRoute.SerializationSupport.create(homeKey, isParticipatingHomeKey, Ranges); } @Override - public void serialize(FullRangeRoute Ranges, DataOutputPlus out, int version) throws IOException + public void serialize(FullRangeRoute route, DataOutputPlus out, int version) throws IOException { - super.serialize(Ranges, out, version); - routingKey.serialize(Ranges.homeKey, out, version); + super.serialize(route, out, version); + routingKey.serialize(route.homeKey, out, version); + out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override public long serializedSize(FullRangeRoute ranges, int version) { return super.serializedSize(ranges, version) - + routingKey.serializedSize(ranges.homeKey(), version); + + routingKey.serializedSize(ranges.homeKey(), version) + + 1; } }; @@ -191,11 +208,15 @@ public long serializedSize(FullRangeRoute ranges, int version) EnumSet.of(UnseekablesKind.FullKeyRoute, UnseekablesKind.FullRangeRoute) ); - public static final IVersionedSerializer> unseekables = new AbstractRoutablesSerializer<>( + public static final IVersionedSerializer> unseekables = new AbstractRoutablesSerializer<>( + EnumSet.allOf(UnseekablesKind.class) + ); + + public static final IVersionedSerializer> participants = new AbstractRoutablesSerializer<>( EnumSet.allOf(UnseekablesKind.class) ); - static class AbstractRoutablesSerializer> implements IVersionedSerializer + static class AbstractRoutablesSerializer> implements IVersionedSerializer { final EnumSet permitted; protected AbstractRoutablesSerializer(EnumSet permitted) @@ -328,7 +349,7 @@ public long serializedSize(Seekables t, int version) } }; - public static abstract class AbstractKeysSerializer> implements IVersionedSerializer + public static abstract class AbstractKeysSerializer> implements IVersionedSerializer { final IVersionedSerializer keySerializer; final IntFunction allocate; @@ -368,7 +389,7 @@ public long serializedSize(KS keys, int version) } } - public static abstract class AbstractRangesSerializer> implements IVersionedSerializer + public static abstract class AbstractRangesSerializer implements IVersionedSerializer { @Override public void serialize(RS ranges, DataOutputPlus out, int version) throws IOException @@ -398,4 +419,27 @@ public long serializedSize(RS ranges, int version) return size; } } + + public static Map rangesToBlobMap(Ranges ranges) + { + TreeMap result = new TreeMap<>(); + for (Range range : ranges) + { + result.put(AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.start()), + AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.end())); + } + return result; + } + + public static Ranges blobMapToRanges(Map blobMap) + { + int i = 0; + Range[] ranges = new Range[blobMap.size()]; + for (Map.Entry e : blobMap.entrySet()) + { + ranges[i++] = new TokenRange(AccordRoutingKey.serializer.deserialize(e.getKey()), + AccordRoutingKey.serializer.deserialize(e.getValue())); + } + return Ranges.of(ranges); + } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/QueryDurableBeforeSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/QueryDurableBeforeSerializers.java new file mode 100644 index 000000000000..833c161ed0c3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/QueryDurableBeforeSerializers.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.QueryDurableBefore; +import accord.messages.QueryDurableBefore.DurableBeforeReply; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class QueryDurableBeforeSerializers +{ + public static final IVersionedSerializer request = new IVersionedSerializer() + { + @Override + public void serialize(QueryDurableBefore msg, DataOutputPlus out, int version) throws IOException + { + out.writeLong(msg.waitForEpoch()); + } + + @Override + public QueryDurableBefore deserialize(DataInputPlus in, int version) throws IOException + { + return new QueryDurableBefore(in.readLong()); + } + + @Override + public long serializedSize(QueryDurableBefore msg, int version) + { + return TypeSizes.LONG_SIZE; + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(DurableBeforeReply msg, DataOutputPlus out, int version) throws IOException + { + CommandStoreSerializers.durableBefore.serialize(msg.durableBeforeMap, out, version); + } + + @Override + public DurableBeforeReply deserialize(DataInputPlus in, int version) throws IOException + { + return new DurableBeforeReply(CommandStoreSerializers.durableBefore.deserialize(in, version)); + } + + @Override + public long serializedSize(DurableBeforeReply msg, int version) + { + return CommandStoreSerializers.durableBefore.serializedSize(msg.durableBeforeMap, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 18b8b15e5136..e9705d3198dd 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -24,8 +24,10 @@ import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; import accord.messages.ReadTxnData; +import accord.messages.WaitUntilApplied; +import accord.primitives.Participants; import accord.primitives.Ranges; -import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -45,7 +47,7 @@ public class ReadDataSerializers public void serialize(ReadTxnData read, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(read.txnId, out, version); - KeySerializers.seekables.serialize(read.readScope, out, version); + KeySerializers.participants.serialize(read.readScope, out, version); out.writeUnsignedVInt(read.waitForEpoch()); out.writeUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); } @@ -54,7 +56,7 @@ public void serialize(ReadTxnData read, DataOutputPlus out, int version) throws public ReadTxnData deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Seekables readScope = KeySerializers.seekables.deserialize(in, version); + Participants readScope = KeySerializers.participants.deserialize(in, version); long waitForEpoch = in.readUnsignedVInt(); long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; return ReadTxnData.SerializerSupport.create(txnId, readScope, executeAtEpoch, waitForEpoch); @@ -64,7 +66,7 @@ public ReadTxnData deserialize(DataInputPlus in, int version) throws IOException public long serializedSize(ReadTxnData read, int version) { return CommandSerializers.txnId.serializedSize(read.txnId, version) - + KeySerializers.seekables.serializedSize(read.readScope, version) + + KeySerializers.participants.serializedSize(read.readScope, version) + TypeSizes.sizeofUnsignedVInt(read.waitForEpoch()) + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); } @@ -114,4 +116,36 @@ public long serializedSize(ReadReply reply, int version) + TxnData.nullableSerializer.serializedSize((TxnData) readOk.data, version); } }; + + // TODO (consider): duplicates ReadTxnData ser/de logic; conside deduplicating if another instance of this is added + public static final IVersionedSerializer waitOnApply = new IVersionedSerializer() + { + @Override + public void serialize(WaitUntilApplied msg, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(msg.txnId, out, version); + KeySerializers.participants.serialize(msg.readScope, out, version); + out.writeUnsignedVInt(msg.waitForEpoch()); + CommandSerializers.timestamp.serialize(msg.executeAt, out , version); + } + + @Override + public WaitUntilApplied deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Participants readScope = KeySerializers.participants.deserialize(in, version); + long waitForEpoch = in.readUnsignedVInt(); + Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); + return WaitUntilApplied.SerializerSupport.create(txnId, readScope, executeAt, waitForEpoch); + } + + @Override + public long serializedSize(WaitUntilApplied msg, int version) + { + return CommandSerializers.txnId.serializedSize(msg.txnId, version) + + KeySerializers.participants.serializedSize(msg.readScope, version) + + TypeSizes.sizeofUnsignedVInt(msg.waitForEpoch()) + + CommandSerializers.timestamp.serializedSize(msg.executeAt, version); + } + }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java new file mode 100644 index 000000000000..1b55252d245e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.RoutingKey; +import accord.messages.SetGloballyDurable; +import accord.messages.SetShardDurable; +import accord.primitives.Deps; +import accord.primitives.Ranges; +import accord.primitives.SyncPoint; +import accord.primitives.TxnId; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class SetDurableSerializers +{ + public static final IVersionedSerializer shardDurable = new IVersionedSerializer() + { + @Override + public void serialize(SetShardDurable msg, DataOutputPlus out, int version) throws IOException + { + syncPoint.serialize(msg.exclusiveSyncPoint, out, version); + } + + @Override + public SetShardDurable deserialize(DataInputPlus in, int version) throws IOException + { + return new SetShardDurable(syncPoint.deserialize(in, version)); + } + + @Override + public long serializedSize(SetShardDurable msg, int version) + { + return syncPoint.serializedSize(msg.exclusiveSyncPoint, version); + } + }; + + public static final IVersionedSerializer globallyDurable = new IVersionedSerializer() + { + @Override + public void serialize(SetGloballyDurable msg, DataOutputPlus out, int version) throws IOException + { + CommandStoreSerializers.durableBefore.serialize(msg.durableBefore, out, version); + } + + @Override + public SetGloballyDurable deserialize(DataInputPlus in, int version) throws IOException + { + return new SetGloballyDurable(CommandStoreSerializers.durableBefore.deserialize(in, version)); + } + + @Override + public long serializedSize(SetGloballyDurable msg, int version) + { + return CommandStoreSerializers.durableBefore.serializedSize(msg.durableBefore, version); + } + }; + + public static final IVersionedSerializer syncPoint = new IVersionedSerializer() + { + @Override + public void serialize(SyncPoint sp, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(sp.syncId, out, version); + DepsSerializer.deps.serialize(sp.waitFor, out, version); + KeySerializers.ranges.serialize(sp.ranges, out, version); + KeySerializers.routingKey.serialize(sp.homeKey, out, version); + } + + @Override + public SyncPoint deserialize(DataInputPlus in, int version) throws IOException + { + TxnId syncId = CommandSerializers.txnId.deserialize(in, version); + Deps waitFor = DepsSerializer.deps.deserialize(in, version); + Ranges ranges = KeySerializers.ranges.deserialize(in, version); + RoutingKey homeKey = KeySerializers.routingKey.deserialize(in, version); + return SyncPoint.SerializationSupport.construct(syncId, waitFor, ranges, homeKey); + } + + @Override + public long serializedSize(SyncPoint sp, int version) + { + return CommandSerializers.txnId.serializedSize(sp.syncId, version) + + DepsSerializer.deps.serializedSize(sp.waitFor, version) + + KeySerializers.ranges.serializedSize(sp.ranges, version) + + KeySerializers.routingKey.serializedSize(sp.homeKey, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java index a56b1b29bf4d..821b8d9a3025 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java @@ -22,8 +22,8 @@ import accord.messages.WaitOnCommit; import accord.messages.WaitOnCommit.WaitOnCommitOk; +import accord.primitives.Participants; import accord.primitives.TxnId; -import accord.primitives.Unseekables; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -36,14 +36,14 @@ public class WaitOnCommitSerializer public void serialize(WaitOnCommit wait, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(wait.txnId, out, version); - KeySerializers.unseekables.serialize(wait.scope, out, version); + KeySerializers.participants.serialize(wait.scope, out, version); } @Override public WaitOnCommit deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Unseekables scope = KeySerializers.unseekables.deserialize(in, version); + Participants scope = KeySerializers.participants.deserialize(in, version); return WaitOnCommit.SerializerSupport.create(txnId, scope); } @@ -51,7 +51,7 @@ public WaitOnCommit deserialize(DataInputPlus in, int version) throws IOExceptio public long serializedSize(WaitOnCommit wait, int version) { return CommandSerializers.txnId.serializedSize(wait.txnId, version) - + KeySerializers.unseekables.serializedSize(wait.scope, version); + + KeySerializers.participants.serializedSize(wait.scope, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java new file mode 100644 index 000000000000..00735ff7e356 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import accord.local.Command.WaitingOn; +import accord.primitives.Deps; +import accord.utils.ImmutableBitSet; +import accord.utils.Invariants; +import accord.utils.SimpleBitSet; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class WaitingOnSerializer +{ + public static void serialize(WaitingOn waitingOn, DataOutputPlus out) throws IOException + { + // TODO (expected): use run length encoding; we know that at most 1/3rd of bits will be set between the three bitsets + int length = (waitingOn.deps.txnIdCount() + 63) / 64; + serialize(length, waitingOn.waitingOnCommit, out); + serialize(length, waitingOn.waitingOnApply, out); + serialize(length, waitingOn.appliedOrInvalidated, out); + } + + public static WaitingOn deserialize(Deps deps, DataInputPlus in) throws IOException + { + int length = (deps.txnIdCount() + 63) / 64; + ImmutableBitSet waitingOnCommit = deserialize(length, in); + ImmutableBitSet waitingOnApply = deserialize(length, in); + ImmutableBitSet appliedOrInvalidated = deserialize(length, in); + return new WaitingOn(deps, waitingOnCommit, waitingOnApply, appliedOrInvalidated); + } + + public static long serializedSize(WaitingOn waitingOn) + { + int length = (waitingOn.deps.txnIdCount() + 63) / 64; + return serializedSize(length, waitingOn.waitingOnCommit) + + serializedSize(length, waitingOn.waitingOnApply) + + serializedSize(length, waitingOn.appliedOrInvalidated); + } + + private static void serialize(int length, SimpleBitSet write, DataOutputPlus out) throws IOException + { + long[] bits = SimpleBitSet.SerializationSupport.getArray(write); + Invariants.checkState(length == bits.length); + for (long v : bits) + out.writeLong(v); + } + + private static ImmutableBitSet deserialize(int length, DataInputPlus in) throws IOException + { + long[] bits = new long[length]; + for (int i = 0 ; i < length ; ++i) + bits[i] = in.readLong(); + return ImmutableBitSet.SerializationSupport.construct(bits); + } + + public static long serializedSize(int length, SimpleBitSet write) + { + long[] bits = SimpleBitSet.SerializationSupport.getArray(write); + Invariants.checkState(length == bits.length); + return (long) TypeSizes.LONG_SIZE * length; + } + + public static ByteBuffer serialize(WaitingOn waitingOn) throws IOException + { + int length = (waitingOn.deps.txnIdCount() + 63) / 64; + ByteBuffer out = ByteBuffer.allocate(TypeSizes.LONG_SIZE * length * 3); + serialize(length, waitingOn.waitingOnCommit, out); + serialize(length, waitingOn.waitingOnApply, out); + serialize(length, waitingOn.appliedOrInvalidated, out); + return (ByteBuffer) out.flip(); + } + + private static void serialize(int length, SimpleBitSet write, ByteBuffer out) + { + long[] bits = SimpleBitSet.SerializationSupport.getArray(write); + Invariants.checkState(length == bits.length); + for (int i = 0; i < length; i++) + out.putLong(bits[i]); + } + + public static WaitingOn deserialize(Deps deps, ByteBuffer in) throws IOException + { + int length = (deps.txnIdCount() + 63) / 64; + ImmutableBitSet waitingOnCommit = deserialize(length, in); + ImmutableBitSet waitingOnApply = deserialize(length, in); + ImmutableBitSet appliedOrInvalidated = deserialize(length, in); + return new WaitingOn(deps, waitingOnCommit, waitingOnApply, appliedOrInvalidated); + } + + private static ImmutableBitSet deserialize(int length, ByteBuffer in) + { + long[] bits = new long[length]; + for (int i = 0 ; i < length ; ++i) + bits[i] = in.getLong(); + return ImmutableBitSet.SerializationSupport.construct(bits); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index b82013faa012..7c5794a66d67 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -350,9 +350,9 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // any that aren't can just use executeAt.hlc AccordSafeCommandsForKey cfk = ((AccordSafeCommandStore) safeStore).commandsForKey((RoutableKey) key); cfk.updateLastExecutionTimestamps(executeAt, true); - long timestamp = cfk.current().timestampMicrosFor(executeAt, true); + long timestamp = cfk.timestampMicrosFor(executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) - int nowInSeconds = cfk.current().nowInSecondsFor(executeAt, true); + int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); List> results = new ArrayList<>(); forEachWithKey((PartitionKey) key, write -> results.add(write.write(timestamp, nowInSeconds))); diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializers.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java index 7e672c431f82..0cdd5685abe5 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializers.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -39,7 +39,6 @@ public class CollectionSerializers { - public static void serializeCollection(Collection values, DataOutputPlus out, int version, IVersionedSerializer valueSerializer) throws IOException { out.writeUnsignedVInt32(values.size()); @@ -151,4 +150,28 @@ private static > C deserializeCollection(Data result.add(serializer.deserialize(in, version)); return result; } + + public static IVersionedSerializer> newListSerializer(IVersionedSerializer itemSerializer) + { + return new IVersionedSerializer>() + { + @Override + public void serialize(List list, DataOutputPlus out, int version) throws IOException + { + serializeList(list, out, version, itemSerializer); + } + + @Override + public List deserialize(DataInputPlus in, int version) throws IOException + { + return deserializeList(in, version, itemSerializer); + } + + @Override + public long serializedSize(List t, int version) + { + return serializedListSize(t, version, itemSerializer); + } + }; + } } diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java index 8674d714daf8..8ccecb1066cc 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTree.java +++ b/src/java/org/apache/cassandra/utils/btree/BTree.java @@ -29,6 +29,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Ordering; +import accord.utils.Invariants; import org.apache.cassandra.utils.BiLongAccumulator; import org.apache.cassandra.utils.BulkIterator; import org.apache.cassandra.utils.LongAccumulator; @@ -140,6 +141,13 @@ public static Object[] build(BulkIterator sourc return buildRoot(source, size, updateF); } + public static Object[] unsafeAllocateNonEmptyLeaf(int size) + { + Invariants.checkArgument(size > 0, "size should be non-zero"); + Invariants.checkArgument(size <= MAX_KEYS, "size (%s) should be no more than %s", size, MAX_KEYS); + return new Object[size | 1]; + } + /** * Build a leaf with {@code size} elements taken in bulk from {@code insert}, and apply {@code updateF} to these elements */ diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index fcbddc259627..4b8b3fbb146b 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -504,7 +504,7 @@ public static Message.Header deserializeHeader(IMessage message) @Override public void receiveMessage(IMessage message) { - sync(receiveMessageRunnable(message)).accept(false); + async(receiveMessageRunnable(message)).apply(false); } @Override diff --git a/test/distributed/org/apache/cassandra/distributed/test/CompactionDiskSpaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/CompactionDiskSpaceTest.java index 6cc0fdfe4970..d6418e696bab 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/CompactionDiskSpaceTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/CompactionDiskSpaceTest.java @@ -58,7 +58,7 @@ public void testNoSpaceLeft() throws IOException cluster.schemaChange("create table "+KEYSPACE+".tbl (id int primary key, x int) with compaction = {'class':'SizeTieredCompactionStrategy'}"); cluster.coordinator(1).execute("insert into "+KEYSPACE+".tbl (id, x) values (1,1)", ConsistencyLevel.ALL); cluster.get(1).flush(KEYSPACE); - cluster.setUncaughtExceptionsFilter((t) -> t.getMessage() != null && t.getMessage().contains("Not enough space for compaction")); + cluster.setUncaughtExceptionsFilter((t) -> t.getMessage() != null && t.getMessage().contains("Not enough space for compaction") && (t.getMessage().contains(KEYSPACE+".tbl") || t.getMessage().contains("system_"))); cluster.get(1).runOnInstance(() -> { ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl"); BB.estimatedRemaining.set(2000); diff --git a/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java b/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java index e1a0a91e32b9..ffe65c49dc66 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java @@ -45,6 +45,7 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.NoPayload; import org.apache.cassandra.net.Verb; +import org.assertj.core.api.Assertions; import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NETWORK; @@ -214,7 +215,7 @@ public void testMessageMatching() throws Throwable Message decoded = Instance.deserializeMessage(msg); return (Integer) decoded.verb().id; }).call(); - Assert.assertTrue(verbs.contains(id)); + Assertions.assertThat(verbs).describedAs("Unexpected verb %s", Verb.fromId(id)).contains(id); counter.incrementAndGet(); return false; }).drop(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 97071c6678b1..cd202fc72935 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -21,6 +21,7 @@ import java.net.InetAddress; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -30,6 +31,7 @@ import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.primitives.Timestamp; +import accord.topology.TopologyManager; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; @@ -165,7 +167,6 @@ public synchronized void forSession(Consumer consumer) @Test public void bootstrapTest() throws Throwable { - int originalNodeCount = 2; int expandedNodeCount = originalNodeCount + 1; @@ -173,7 +174,7 @@ public void bootstrapTest() throws Throwable .withoutVNodes() .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(expandedNodeCount)) .withNodeIdTopology(NetworkTopology.singleDcNetworkTopology(expandedNodeCount, "dc0", "rack0")) - .withConfig(config -> config.with(NETWORK, GOSSIP)) + .withConfig(config -> config.set("accord_shard_count", 2).with(NETWORK, GOSSIP)) .start()) { long initialMax = maxEpoch(cluster); @@ -271,15 +272,37 @@ public void bootstrapTest() throws Throwable awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { CommandStore commandStore = safeStore.commandStore(); - Assert.assertEquals(0, commandStore.maxBootstrapEpoch()); Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.bootstrapBeganAt().keySet())); Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.safeToRead().keySet())); +// +// Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); +// Assert.assertTrue(commandStore.bootstrapBeganAt().isEmpty()); +// Assert.assertTrue(commandStore.safeToRead().isEmpty()); })); }); } cluster.get(3).runOnInstance(() -> { List> ranges = StorageService.instance.getLocalRanges("ks"); + TopologyManager topologyManager = service().node().topology(); + for (long epoch = topologyManager.minEpoch() ; epoch <= topologyManager.epoch() ; ++epoch) + { + CountDownLatch latch = new CountDownLatch(1); + topologyManager.epochReady(epoch).data.addCallback(latch::countDown); + while (true) + { + try + { + if (latch.await(1L, TimeUnit.SECONDS)) + break; + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + } + } + for (int key = 0; key < 100; key++) { UntypedResultSet result = QueryProcessor.executeInternal("SELECT * FROM ks.tbl WHERE k=?", key); @@ -294,7 +317,6 @@ public void bootstrapTest() throws Throwable if (safeStore.ranges().currentRanges().contains(partitionKey)) { CommandStore commandStore = safeStore.commandStore(); - Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); Assert.assertFalse(commandStore.safeToRead().isEmpty()); @@ -437,7 +459,6 @@ public void moveTest() throws Throwable if (!safeStore.ranges().allAt(preMove).contains(partitionKey)) { CommandStore commandStore = safeStore.commandStore(); - Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); Assert.assertFalse(commandStore.safeToRead().isEmpty()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index a113d69aecb9..8dada5ba9e8e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -173,8 +173,9 @@ public void testMultipleShards() throws Exception sb.append(String.format("SELECT * FROM %s WHERE k=%s AND c=0;\n", currentTable, keyStrings.get(keyStrings.size() - 1))); sb.append("COMMIT TRANSACTION"); - Unseekables routables = AccordTestUtils.createTxn(sb.toString()).keys().toUnseekables(); - Topologies topology = AccordService.instance().topology().withUnsyncedEpochs(routables, AccordService.instance().topology().epoch()); + Unseekables routables = AccordTestUtils.createTxn(sb.toString()).keys().toParticipants(); + long epoch = AccordService.instance().topology().epoch(); + Topologies topology = AccordService.instance().topology().withUnsyncedEpochs(routables, epoch, epoch); // we don't detect out-of-bounds read/write yet, so use this to validate we reach different shards Assertions.assertThat(topology.totalShards()).isEqualTo(2); }); diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java index 778e44c80894..4e5dd730d7b4 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java @@ -44,8 +44,8 @@ import static org.apache.cassandra.simulator.asm.TransformationKind.HASHCODE; import static org.apache.cassandra.simulator.asm.TransformationKind.SYNCHRONIZED; import static org.apache.cassandra.simulator.asm.Utils.deterministicToString; -import static org.apache.cassandra.simulator.asm.Utils.visitEachRefType; import static org.apache.cassandra.simulator.asm.Utils.generateTryFinallyProxyCall; +import static org.apache.cassandra.simulator.asm.Utils.visitEachRefType; import static org.objectweb.asm.Opcodes.ACC_PRIVATE; import static org.objectweb.asm.Opcodes.ACC_STATIC; import static org.objectweb.asm.Opcodes.ACC_SYNTHETIC; @@ -182,7 +182,6 @@ private static boolean contains(int value, int mask) public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) { super.visit(version, makePublic(access), name, signature, superName, interfaces); - } @Override diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 317d3711d402..1093c347e11d 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -230,7 +230,7 @@ private static PartitionKey toKey(int a) private static FullRoute route() { - return new FullKeyRoute(key, new RoutingKey[]{ key }); + return new FullKeyRoute(key, true, new RoutingKey[]{ key }); } private static final RoutingKey key = new AccordRoutingKey.TokenKey("system", new Murmur3Partitioner.LongToken(42)); diff --git a/test/unit/org/apache/cassandra/concurrent/AdaptingScheduledExecutorPlus.java b/test/unit/org/apache/cassandra/concurrent/AdaptingScheduledExecutorPlus.java new file mode 100644 index 000000000000..9a84b2600df5 --- /dev/null +++ b/test/unit/org/apache/cassandra/concurrent/AdaptingScheduledExecutorPlus.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.concurrent; + +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.Executors; // checkstyle: permit this import +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import accord.utils.async.AsyncChain; +import org.apache.cassandra.utils.WithResources; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; + +import static com.google.common.primitives.Longs.max; +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +public class AdaptingScheduledExecutorPlus implements ScheduledExecutorPlus +{ + private final ScheduledExecutorService delegate; + + public AdaptingScheduledExecutorPlus(ScheduledExecutorService delegate) + { + this.delegate = delegate; + } + + protected ScheduledExecutorService delegate() + { + return delegate; + } + + @Override + public ScheduledFuture scheduleSelfRecurring(Runnable run, long delay, TimeUnit units) + { + return schedule(run, delay, units); + } + + @Override + public ScheduledFuture scheduleAt(Runnable run, long deadline) + { + return schedule(run, max(0, deadline - nanoTime()), NANOSECONDS); + } + + @Override + public ScheduledFuture scheduleTimeoutAt(Runnable run, long deadline) + { + return scheduleTimeoutWithDelay(run, max(0, deadline - nanoTime()), NANOSECONDS); + } + + @Override + public ScheduledFuture scheduleTimeoutWithDelay(Runnable run, long delay, TimeUnit units) + { + return schedule(run, delay, units); + } + + @Override + public ScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) + { + return delegate().schedule(command, delay, unit); + } + + @Override + public ScheduledFuture schedule(Callable callable, long delay, TimeUnit unit) + { + return delegate().schedule(callable, delay, unit); + } + + @Override + public ScheduledFuture scheduleAtFixedRate(Runnable command, long initialDelay, long period, TimeUnit unit) + { + return delegate().scheduleAtFixedRate(command, initialDelay, period, unit); + } + + @Override + public ScheduledFuture scheduleWithFixedDelay(Runnable command, long initialDelay, long delay, TimeUnit unit) + { + return delegate().scheduleWithFixedDelay(command, initialDelay, delay, unit); + } + + @Override + public void shutdown() + { + delegate().shutdown(); + } + + @Override + public List shutdownNow() + { + return delegate().shutdownNow(); + } + + @Override + public boolean isShutdown() + { + return delegate().isShutdown(); + } + + @Override + public boolean isTerminated() + { + return delegate().isTerminated(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException + { + return delegate().awaitTermination(timeout, unit); + } + + @Override + public Future submit(Callable task) + { + return wrap(delegate().submit(task)); + } + + @Override + public Future submit(Runnable task, T result) + { + return wrap(delegate().submit(task, result)); + } + + @Override + public Future submit(Runnable task) + { + return wrap(delegate().submit(task)); + } + + @Override + public void execute(WithResources withResources, Runnable task) + { + execute(TaskFactory.standard().toExecute(withResources, task)); + } + + @Override + public Future submit(WithResources withResources, Callable task) + { + class Catch { T value;} + Catch c = new Catch(); + Runnable exec = TaskFactory.standard().toExecute(withResources, () -> { + try + { + c.value = task.call(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + }); + return submit(() -> { + exec.run(); + return c.value; + }); + } + + @Override + public Future submit(WithResources withResources, Runnable task) + { + return submit(TaskFactory.standard().toExecute(withResources, task)); + } + + @Override + public Future submit(WithResources withResources, Runnable task, T result) + { + return submit(Executors.callable(TaskFactory.standard().toSubmit(withResources, task), result)); + } + + @Override + public boolean inExecutor() + { + return false; + } + + @Override + public void execute(Runnable command) + { + delegate().execute(command); + } + + @Override + public int getCorePoolSize() + { + return 0; + } + + @Override + public void setCorePoolSize(int newCorePoolSize) + { + + } + + @Override + public int getMaximumPoolSize() + { + return 0; + } + + @Override + public void setMaximumPoolSize(int newMaximumPoolSize) + { + + } + + @Override + public int getActiveTaskCount() + { + return 0; + } + + @Override + public long getCompletedTaskCount() + { + return 0; + } + + @Override + public int getPendingTaskCount() + { + return 0; + } + + private static org.apache.cassandra.utils.concurrent.Future wrap(java.util.concurrent.Future future) + { + if (future instanceof org.apache.cassandra.utils.concurrent.Future) + return (Future) future; + if (future instanceof AsyncChain) + { + AsyncChain chain = (AsyncChain) future; + AsyncPromise promise = new AsyncPromise<>(); + chain.begin((s, f) -> { + if (f != null) promise.setFailure(f); + else promise.setSuccess(s); + }); + + return promise; + } + throw new IllegalStateException("Unexpected future type: " + future.getClass()); + } +} diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index a167be90e3b6..3bdaa62f0a33 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -163,7 +163,6 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileSystems; import org.apache.cassandra.io.util.FileUtils; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.CassandraMetricsRegistry; import org.apache.cassandra.metrics.ClientMetrics; import org.apache.cassandra.net.MessagingService; @@ -195,6 +194,10 @@ import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_CONNECTION_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_READ_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_RANDOM_SEED; @@ -209,10 +212,6 @@ import static org.apache.cassandra.metrics.CassandraMetricsRegistry.createMetricsKeyspaceTables; import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_METRICS; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; /** * Base class for CQL tests. @@ -252,7 +251,6 @@ public abstract class CQLTester protected static int nativePort; protected static final InetAddress nativeAddr; - protected static final Set remoteAddrs = new HashSet<>(); private static final Map clusters = new HashMap<>(); private static final Map sessions = new HashMap<>(); @@ -445,7 +443,7 @@ public static void setUpClass() protected static void prePrepareServer() { CassandraRelevantProperties.SUPERUSER_SETUP_DELAY_MS.setLong(0); - ServerTestUtils.daemonInitialization(); + daemonInitialization(); if (ROW_CACHE_SIZE_IN_MIB > 0) DatabaseDescriptor.setRowCacheSizeInMiB(ROW_CACHE_SIZE_IN_MIB); StorageService.instance.registerMBeans(); @@ -453,6 +451,12 @@ protected static void prePrepareServer() SnapshotManager.instance.registerMBean(); } + // So derived classes can get enough intialization to start setting DatabaseDescriptor options + public static void daemonInitialization() + { + ServerTestUtils.daemonInitialization(); + } + @AfterClass public static void tearDownClass() { diff --git a/test/unit/org/apache/cassandra/cql3/UntypedResultSetTest.java b/test/unit/org/apache/cassandra/cql3/UntypedResultSetTest.java new file mode 100644 index 000000000000..4973cae87de2 --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/UntypedResultSetTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.DoubleType; +import org.apache.cassandra.utils.AbstractTypeGenerators; +import org.apache.cassandra.utils.Generators; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; +import static org.assertj.core.api.Assertions.assertThat; + +public class UntypedResultSetTest +{ + @Test + public void rowToString() + { + qt().forAll(row()).check(row -> { + String str = row.toString(); + assertThat(str.split(" \\| ")).hasSize(row.getColumns().size()); + assertThat(str).doesNotContain("null"); + }); + } + + @Test + public void resultSetToString() + { + qt().forAll(resultSet().map(UntypedResultSet::create)).check(rs -> { + String str = rs.toStringUnsafe(); + assertThat(str.split("\n")).describedAs("toStringUnsafe expected to return different size of rows", str).hasSize(rs.size() + 2); // header + footer + }); + } + + private static Gen> columns() + { + Gen identifierGen = fromQT(Generators.IDENTIFIER_GEN); + // this is testing toString so don't really need a complex type... + return rs -> { + int numColumns = rs.nextInt(1, 10); + String ks = identifierGen.next(rs); + String tableName = identifierGen.next(rs); + List names = Gens.lists(identifierGen).unique().ofSize(numColumns).next(rs); + // rather than generate the type, use a simple type like double as it doesn't matter... the type is not epxected to be parsable, so conflicts in output format doen't matter + List> types = names.stream().map(ignore -> DoubleType.instance).collect(Collectors.toList()); + List columns = new ArrayList<>(numColumns); + for (int i = 0; i < numColumns; i++) + columns.add(new ColumnSpecification(ks, tableName, new ColumnIdentifier(names.get(i), true), types.get(i))); + return columns; + }; + } + + private static Gen row() + { + return columns().flatMap(columns -> rs -> { + List data = new ArrayList<>(columns.size()); + for (int i = 0; i < columns.size(); i++) + { + AbstractTypeGenerators.TypeSupport support = AbstractTypeGenerators.getTypeSupport(columns.get(i).type); + data.add(fromQT(support.bytesGen()).next(rs)); + } + return new UntypedResultSet.Row(columns, data); + }); + } + + private static Gen resultSet() + { + Gen> columnsGen = columns(); + return rs -> { + ResultSet result = new ResultSet(new ResultSet.ResultMetadata(columnsGen.next(rs))); + List> dataGens = result.metadata.names.stream().map(c -> fromQT(AbstractTypeGenerators.getTypeSupport(c.type).bytesGen())).collect(Collectors.toList()); + int numRows = rs.nextInt(0, 10); + for (int i = 0; i < numRows; i++) + { + List row = dataGens.stream().map(g -> g.next(rs)).collect(Collectors.toList()); + result.addRow(row); + } + return result; + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/db/RecoveryManagerFlushedTest.java b/test/unit/org/apache/cassandra/db/RecoveryManagerFlushedTest.java index 4db364f24ed2..42ba2ed6f174 100644 --- a/test/unit/org/apache/cassandra/db/RecoveryManagerFlushedTest.java +++ b/test/unit/org/apache/cassandra/db/RecoveryManagerFlushedTest.java @@ -97,6 +97,7 @@ public static void defineSchema() throws ConfigurationException /* test that commit logs do not replay flushed data */ public void testWithFlush() throws Exception { + // Flush everything that may be in the commit log now to start fresh CompactionManager.instance.disableAutoCompaction(); for (String ks : Schema.instance.getKeyspaces()) Util.flush(Keyspace.open(ks)); diff --git a/test/unit/org/apache/cassandra/db/RowCacheTest.java b/test/unit/org/apache/cassandra/db/RowCacheTest.java index ec5e6176c168..3ecc0007070d 100644 --- a/test/unit/org/apache/cassandra/db/RowCacheTest.java +++ b/test/unit/org/apache/cassandra/db/RowCacheTest.java @@ -55,7 +55,6 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.service.CacheService; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.reads.range.TokenUpdater; import org.apache.cassandra.utils.ByteBufferUtil; @@ -299,7 +298,6 @@ public void testRowCacheLoad() throws Exception @Test public void testRowCacheCleanup() throws Exception { - StorageService.instance.initServer(); CacheService.instance.setRowCacheCapacityInMB(1); rowCacheLoad(100, Integer.MAX_VALUE, 1000); @@ -321,7 +319,6 @@ public void testRowCacheCleanup() throws Exception @Test public void testInvalidateRowCache() throws Exception { - StorageService.instance.initServer(); CacheService.instance.setRowCacheCapacityInMB(1); rowCacheLoad(100, Integer.MAX_VALUE, 1000); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java new file mode 100644 index 000000000000..e09a183b15e2 --- /dev/null +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -0,0 +1,406 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.compaction; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Result; +import accord.api.RoutingKey; +import accord.local.CheckedCommands; +import accord.local.CommandStore; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.local.SaveStatus; +import accord.local.Status.Durability; +import accord.primitives.Ballot; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Seekable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.Txn.Kind; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import org.agrona.collections.Int2ObjectHashMap; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ColumnFamilyStore.FlushReason; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.partitions.ImmutableBTreePartition; +import org.apache.cassandra.db.partitions.Partition; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Pair; + +import static accord.impl.CommandsForKey.NO_LAST_EXECUTED_HLC; +import static accord.local.PreLoadContext.contextFor; +import static accord.utils.async.AsyncChains.getUninterruptibly; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.MAJORITY; +import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.NOT_DURABLE; +import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.UNIVERSAL; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS; +import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS_FOR_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class CompactionAccordIteratorsTest +{ + private static final Logger logger = LoggerFactory.getLogger(CompactionAccordIteratorsTest.class); + private static final long CLOCK_START = 44; + private static final long HLC_START = 41; + private static final int NODE = 1; + private static final int EPOCH = 1; + private static final AtomicLong clock = new AtomicLong(CLOCK_START); + private static final TxnId LT_TXN_ID = AccordTestUtils.txnId(EPOCH, HLC_START, NODE); + private static final TxnId TXN_ID = AccordTestUtils.txnId(EPOCH, LT_TXN_ID.hlc() + 1, NODE); + private static final TxnId SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read); + private static final TxnId GT_TXN_ID = SECOND_TXN_ID; + // For CommandsForKey where we test with two commands + private static final TxnId[] TXN_IDS = new TxnId[] {TXN_ID, SECOND_TXN_ID}; + private static final TxnId GT_SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, SECOND_TXN_ID.hlc() + 1, NODE); + + static TableMetadata table; + static FullRoute route; + + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + // Schema doesn't matter since this is a metadata only test + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + StorageService.instance.initServer(); + table = ColumnFamilyStore.getIfExists("ks", "tbl").metadata(); + route = AccordTestUtils.keys(table, 42).toRoute(AccordTestUtils.key(table, 42).toUnseekable()); + } + + // This isn't attempting to be an exhaustive test of Commands.shouldCleanup just that the return values + // are handled correctly and that the interaction between the CompactionIterator and shoudCleanup seems reasonable + @Test + public void testAccordCommandsPurger() throws Throwable + { + // Null redudnant before should make no change since we have no information on this CommandStore + testAccordCommandsPurger(null, DurableBefore.EMPTY, expectAccordCommandsNoChange()); + // Universally durable (and global to boot) should be erased since literally everyone knows about it + // The way Commands.shouldCleanup was implemented (when this was written) it doesn't check redundantBefore + // at all for this + testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(UNIVERSAL), expectAccordCommandsErase()); + // With redundantBefore at the txnId there should be no change because it is < not <= + testAccordCommandsPurger(redundantBefore(TXN_ID), durableBefore(MAJORITY), expectAccordCommandsNoChange()); + testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(MAJORITY), expectAccordCommandsNoChange()); + // Durable at a majority can be truncated with minimal data preserved, it must be redundant for this to occur + testAccordCommandsPurger(redundantBefore(GT_TXN_ID), durableBefore(MAJORITY), expectAccordCommandsTruncated()); + // Not durable can be truncated, but needs the outcome preserved, it must be redundant for this to occur + testAccordCommandsPurger(redundantBefore(GT_TXN_ID), durableBefore(NOT_DURABLE), expectAccordCommandsTruncatedWithOutcome()); + // Since it is redudnant but not known durable (outside of local) + testAccordCommandsPurger(redundantBefore(GT_TXN_ID), durableBefore(DurableBeforeType.EMPTY), expectAccordCommandsTruncatedWithOutcome()); + // Never makes it past redundantBefore being LT_TXN_ID + testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(DurableBeforeType.EMPTY), expectAccordCommandsNoChange()); + } + + private static void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBefore durableBefore, Consumer> expectedResult) throws Throwable + { + testWithCommandStore((commandStore) -> { + IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, durableBefore); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, COMMANDS); + List result = compactCFS(mockAccordService, cfs); + expectedResult.accept(result); + }, false); + } + + @Test + public void testAccordCommandsForKeyPurger() throws Throwable + { + testAccordCommandsForKeyPurger(null, expectedAccordCommandsForKeyNoChange()); + testAccordCommandsForKeyPurger(redundantBefore(LT_TXN_ID), expectedAccordCommandsForKeyNoChange()); + testAccordCommandsForKeyPurger(redundantBefore(TXN_ID), expectedAccordCommandsForKeyNoChange()); + testAccordCommandsForKeyPurger(redundantBefore(GT_TXN_ID), expectedAccordCommandsForKeyEraseOne()); + testAccordCommandsForKeyPurger(redundantBefore(GT_SECOND_TXN_ID), expectedAccordCommandsForKeyEraseAll()); + } + + private static Consumer> expectedAccordCommandsForKeyNoChange() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); + assertEquals(4, Iterables.size(staticRow)); + assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); + assertEquals(TXN_ID, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); + assertEquals(TXN_ID, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); + assertEquals(TXN_ID.hlc(), CommandsForKeyRows.getLastExecutedMicros(staticRow)); + assertEquals(4, Iterators.size(partition.unfilteredIterator())); + UnfilteredRowIterator rows = partition.unfilteredIterator(); + // One row per series + for (int i = 0; i < 2; i++) + for (TxnId txnId : TXN_IDS) + assertEquals(txnId, CommandsForKeyRows.getTimestamp((Row)rows.next())); + }; + } + + private static Consumer> expectedAccordCommandsForKeyEraseOne() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); + assertEquals(4, Iterables.size(staticRow)); + assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); + assertEquals(Timestamp.NONE, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); + assertEquals(Timestamp.NONE, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); + assertEquals(NO_LAST_EXECUTED_HLC, CommandsForKeyRows.getLastExecutedMicros(staticRow)); + assertEquals(2, Iterators.size(partition.unfilteredIterator())); + UnfilteredRowIterator rows = partition.unfilteredIterator(); + assertEquals(TXN_IDS[1], CommandsForKeyRows.getTimestamp((Row)rows.next())); + assertEquals(TXN_IDS[1], CommandsForKeyRows.getTimestamp((Row)rows.next())); + }; + } + + private static Consumer> expectedAccordCommandsForKeyEraseAll() + { + return partitions -> assertEquals(0, partitions.size()); + } + + private static void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Consumer> expectedResult) throws Throwable + { + testWithCommandStore((commandStore) -> { + IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, DurableBefore.EMPTY); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); + List result = compactCFS(mockAccordService, cfs); + expectedResult.accept(result); + }, true); + } + + Consumer> expectAccordCommandsErase() + { + return partitions -> assertTrue(partitions.isEmpty()); + } + + Consumer> expectAccordCommandsTruncatedWithOutcome() + { + return partitions -> { + try + { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals(1, Iterators.size(partition.unfilteredIterator())); + ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); + Row row = (Row) partition.unfilteredIterator().next(); + assertEquals(6, row.columnCount()); + assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); + assertEquals(1, ((TxnData)CommandRows.getResult(row)).entrySet().size()); + assertNotNull(CommandRows.getWrites(row)); + assertEquals(Durability.Local, CommandRows.getDurability(row)); + assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); + assertEquals(route, CommandRows.getRoute(row)); + assertEquals(SaveStatus.TruncatedApplyWithOutcome, AccordKeyspace.CommandRows.getStatus(row)); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + }; + } + + Consumer> expectAccordCommandsTruncated() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals(1, Iterators.size(partition.unfilteredIterator())); + ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); + Row row = (Row)partition.unfilteredIterator().next(); + assertEquals(4, row.columnCount()); + assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); + assertEquals(Durability.Local, CommandRows.getDurability(row)); + assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); + assertEquals(route, CommandRows.getRoute(row)); + assertEquals(SaveStatus.TruncatedApply, AccordKeyspace.CommandRows.getStatus(row)); + }; + } + + Consumer> expectAccordCommandsNoChange() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals(1, Iterators.size(partition.unfilteredIterator())); + ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); + Row row = (Row)partition.unfilteredIterator().next(); + assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); + assertEquals(SaveStatus.Applied, AccordKeyspace.CommandRows.getStatus(row)); + }; + } + + + private static RedundantBefore redundantBefore(TxnId txnId) + { + Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); + return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, LT_TXN_ID); + } + + enum DurableBeforeType + { + UNIVERSAL, + MAJORITY, + NOT_DURABLE, + EMPTY + } + + private static DurableBefore durableBefore(DurableBeforeType durableBeforeType) + { + Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); + switch (durableBeforeType) + { + case UNIVERSAL: + return DurableBefore.create(ranges, GT_TXN_ID, GT_TXN_ID); + case MAJORITY: + return DurableBefore.create(ranges, GT_TXN_ID, LT_TXN_ID); + case NOT_DURABLE: + return DurableBefore.create(ranges, LT_TXN_ID, LT_TXN_ID); + case EMPTY: + return DurableBefore.EMPTY; + default: + throw new IllegalStateException(); + } + } + + private static IAccordService mockAccordService(CommandStore commandStore, RedundantBefore redundantBefore, DurableBefore durableBefore) + { + IAccordService mockAccordService = mock(IAccordService.class); + Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); + if (redundantBefore != null) + redundantBefores.put(commandStore.id(), redundantBefore); + when(mockAccordService.getRedundantBeforesAndDurableBefore()).thenReturn(Pair.create(redundantBefores, durableBefore)); + return mockAccordService; + } + + interface TestWithCommandStore + { + void test(AccordCommandStore commandStore) throws Throwable; + } + + private static void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable + { + Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores().forEach(ColumnFamilyStore::truncateBlocking); + clock.set(CLOCK_START); + AccordCommandStore commandStore = AccordTestUtils.createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + TxnId[] txnIds = additionalCommand ? TXN_IDS : new TxnId[] {TXN_ID}; + for (TxnId txnId : txnIds) + { + Txn txn = txnId.rw().isWrite() ? AccordTestUtils.createWriteTxn(42) : AccordTestUtils.createTxn(42); + Seekable key = txn.keys().get(0); + PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); + PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); + RoutingKey homeKey = key.someIntersectingRoutingKey(commandStore.unsafeRangesForEpoch().currentRanges()); + PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); + getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> { + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps); + CheckedCommands.commit(safe, txnId, route, null, partialTxn, txnId, partialDeps); + Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); + CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right); + return safe.get(txnId, homeKey).current(); + }).beginAsResult()); + } + + commandStore.executeBlocking(() -> { + // clear cache and wait for post-eviction writes to complete + long cacheSize = commandStore.getCacheSize(); + commandStore.setCacheSize(0); + commandStore.setCacheSize(cacheSize); + commandStore.cache().awaitSaveResults(); + }); + + UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS + ";"); + assertEquals(txnIds.length, commandsTable.size()); + Iterator commandsTableIterator = commandsTable.iterator(); + for (TxnId txnId : txnIds) + assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); + UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); + assertEquals(txnIds.length * 2, commandsForKeyTable.size()); + Iterator commandsForKeyTableIterator = commandsTable.iterator(); + for (TxnId txnId : txnIds) + assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsForKeyTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); + System.out.println(commandsForKeyTable); + test.test(commandStore); + } + + private static List compactCFS(IAccordService mockAccordService, ColumnFamilyStore cfs) + { + cfs.forceBlockingFlush(FlushReason.UNIT_TESTS); + List scanners = cfs.getLiveSSTables().stream().map(SSTableReader::getScanner).collect(Collectors.toList()); + List result = new ArrayList<>(); + try (CompactionController controller = new CompactionController(ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, cfs.name), Collections.emptySet(), 0); + CompactionIterator compactionIterator = new CompactionIterator(OperationType.COMPACTION, scanners, controller, FBUtilities.nowInSeconds(), null, ActiveCompactionsTracker.NOOP, null, () -> mockAccordService)) + { + while (compactionIterator.hasNext()) + { + try (UnfilteredRowIterator partition = compactionIterator.next()) + { + result.add(ImmutableBTreePartition.create(partition)); + } + } + } + verify(mockAccordService, times(1)).getRedundantBeforesAndDurableBefore(); + return result; + } +} diff --git a/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java b/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java new file mode 100644 index 000000000000..2ddec3906fd1 --- /dev/null +++ b/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; + +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.ShardDistributor; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.AccordGenerators; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class AccordSplitterTest +{ + @BeforeClass + public static void setup() throws NoSuchFieldException, IllegalAccessException + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + @Test + public void split() + { + qt().forAll(AccordGenerators.range(), Gens.random()).check((range, rs) -> { + AccordRoutingKey startKey = (AccordRoutingKey) range.start(); + AccordRoutingKey endKey = (AccordRoutingKey) range.end(); + IPartitioner partitioner = getPartitioner(range, rs); + // this section is filtering out known bugs + // TODO (now): fix the fact accordSplitter returns AccordBytesSplitter which will fail for java.lang.ClassCastException: org.apache.cassandra.dht.LocalPartitioner$LocalToken cannot be cast to org.apache.cassandra.dht.ByteOrderedPartitioner$BytesToken + // spoke with Benedict and he agrees that it doesn't make sense to split a local partitioner range, but this requires pushing this back into the API (similar to how C* returns Optional) + if (partitioner instanceof LocalPartitioner) + return; + // TODO (now): java.lang.AssertionError: [size is not larger than 0 for partitioner org.apache.cassandra.dht.OrderPreservingPartitioner@54a67a45] + if (partitioner instanceof OrderPreservingPartitioner && endKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return; + // TODO (now): [size is not larger than 0 for partitioner org.apache.cassandra.dht.ByteOrderedPartitioner@44e3a2b2] + if (partitioner instanceof ByteOrderedPartitioner && endKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return; + // TODO (now): [num splits not as expected for partitioner org.apache.cassandra.dht.ByteOrderedPartitioner@4c550889]\nExpected size to be between: <47> and <48> but was:<62> in: + if (partitioner instanceof ByteOrderedPartitioner && startKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return; + // TODO (now): [num splits not as expected for partitioner org.apache.cassandra.dht.ByteOrderedPartitioner@13518f37]\nExpected size to be between: <11> and <12> but was:<13> in: + if (partitioner instanceof ByteOrderedPartitioner) + return; + // TODO (now): [num splits not as expected for partitioner org.apache.cassandra.dht.OrderPreservingPartitioner@4233e892]\nExpected size to be between: <51> and <52> but was:<54> in: + if (partitioner instanceof OrderPreservingPartitioner) + return; + AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); + + BigInteger size = splitter.sizeOf(range); + Assertions.assertThat(size).describedAs("size is not larger than 0 for partitioner %s", partitioner).isGreaterThan(BigInteger.ZERO); + int maxSplits = 100; + int minSplits = 10; + if (size.compareTo(BigInteger.valueOf(maxSplits)) < 0) + maxSplits = size.intValue(); + if (size.compareTo(BigInteger.TEN) < 0) + minSplits = Math.min(2, maxSplits - 1); + int numSplits = rs.nextInt(minSplits, maxSplits); + List ranges = new ArrayList<>(numSplits); + BigInteger update = splitter.divide(size, numSplits); + BigInteger offset = BigInteger.ZERO; + while (offset.compareTo(size) < 0) + { + BigInteger end = offset.add(update); + ranges.add(splitter.subRange(range, offset, end)); + offset = end; + } + + // accord.local.ShardDistributor.EvenSplit.split attempts to detect this and work around it; a splitter is allowed to return slightly more in this case + Assertions.assertThat(ranges).describedAs("num splits not as expected for partitioner %s", partitioner).hasSizeBetween(numSplits, numSplits + 1); + + Ranges split = Ranges.of(ranges.toArray(new Range[0])).mergeTouching(); + Ranges missing = Ranges.of(range).subtract(split); + Assertions.assertThat(missing).isEmpty(); + + testEventSplit(partitioner, range, rs, numSplits); + }); + } + + private static void testEventSplit(IPartitioner partitioner, Range range, RandomSource rs, int numSplits) + { + ShardDistributor.EvenSplit splitter = new ShardDistributor.EvenSplit<>(numSplits, partitioner.accordSplitter()); + + Ranges topLevel = Ranges.of(range); + List ranges = splitter.split(topLevel); + + Assertions.assertThat(ranges).describedAs("num splits not as expected for partitioner %s", partitioner).hasSize(numSplits); + + Ranges split = ranges.stream().reduce(Ranges.EMPTY, Ranges::with).mergeTouching(); + Ranges missing = topLevel.subtract(split); + Assertions.assertThat(missing).isEmpty(); + } + + private static IPartitioner getPartitioner(Range range, RandomSource rs) + { + AccordRoutingKey key = (AccordRoutingKey) range.start(); + if (key.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + key = (AccordRoutingKey) range.end(); + if (key.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL) + return AccordGenerators.partitioner().next(rs); + + return key.token().getPartitioner(); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/CustomIndexTest.java b/test/unit/org/apache/cassandra/index/CustomIndexTest.java index 9f568a877394..483d109ec53c 100644 --- a/test/unit/org/apache/cassandra/index/CustomIndexTest.java +++ b/test/unit/org/apache/cassandra/index/CustomIndexTest.java @@ -20,7 +20,16 @@ */ package org.apache.cassandra.index; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -33,6 +42,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.junit.Assume; +import org.junit.BeforeClass; import org.junit.Test; import com.datastax.driver.core.exceptions.QueryValidationException; @@ -43,12 +53,21 @@ import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.restrictions.IndexRestrictions; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; -import org.apache.cassandra.cql3.statements.schema.IndexTarget; import org.apache.cassandra.cql3.statements.ModificationStatement; -import org.apache.cassandra.db.*; import org.apache.cassandra.db.ColumnFamilyStore.FlushReason; import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.cql3.statements.schema.IndexTarget; +import org.apache.cassandra.db.CassandraWriteContext; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.RangeTombstone; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.WriteContext; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.UTF8Type; @@ -80,6 +99,20 @@ public class CustomIndexTest extends CQLTester { + @BeforeClass + public static void setUpClass() // overrides CQLTester.setUpClass() + { + // Accord breaks indexBuildingPagesLargePartitions because it introduces blocking OpOrder.Group + // when it sees the schema change and forces a flush of the Accord keyspace topologies table + // which creates a blocking OpOrder.Group. + // The test is explicitly trying to assert none of the created groups are blocking and that is pretty + // fragile as implemented since any background things could create mark a group blocking becuase Keyspace.writeOrder + // is global + CQLTester.daemonInitialization(); + DatabaseDescriptor.setAccordTransactionsEnabled(false); + CQLTester.setUpClass(); + } + @Test public void testInsertsOnCfsBackedIndex() throws Throwable { @@ -1183,7 +1216,7 @@ public void removeRow(Row row) { } @Test - public void testFlushObserver() throws Throwable + public void testFlushObserver() { createTable("CREATE TABLE %s (k int, c int, s int static, v int, PRIMARY KEY (k, c))"); String indexName = "test_index_with_flush_observer"; diff --git a/test/unit/org/apache/cassandra/schema/ValidationTest.java b/test/unit/org/apache/cassandra/schema/ValidationTest.java index 9d072755dd77..630e815c00e4 100644 --- a/test/unit/org/apache/cassandra/schema/ValidationTest.java +++ b/test/unit/org/apache/cassandra/schema/ValidationTest.java @@ -18,17 +18,29 @@ */ package org.apache.cassandra.schema; -import java.util.*; - -import org.apache.cassandra.db.marshal.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.marshal.AbstractType; + import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class ValidationTest { + @BeforeClass + public static void beforeClass() + { + DatabaseDescriptor.daemonInitialization(); + } + @Test public void testIsNameValidPositive() { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 6031f01e7c2a..f5148fa48647 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -20,8 +20,6 @@ import java.util.concurrent.atomic.AtomicLong; -import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import org.junit.Assert; import org.junit.Before; @@ -39,9 +37,12 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; +import accord.utils.ImmutableBitSet; +import accord.utils.SimpleBitSet; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.ColumnFamilyStore; @@ -53,7 +54,7 @@ import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.Pair; -import static accord.local.Status.Durability.Durable; +import static accord.local.Status.Durability.Majority; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.preaccepted; import static org.apache.cassandra.service.accord.AccordTestUtils.ballot; @@ -90,34 +91,37 @@ public void commandLoadSave() throws Throwable Key key = (Key)depTxn.keys().get(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - PartialDeps dependencies; - try (PartialDeps.Builder builder = PartialDeps.builder(depTxn.covering())) - { - builder.add(key, txnId(1, clock.incrementAndGet(), 1)); - dependencies = builder.build(); - } - QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1)"); TxnId oldTxnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1); TxnId oldTimestamp = txnId(1, clock.incrementAndGet(), 1); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialDeps dependencies; + try (PartialDeps.Builder builder = PartialDeps.builder(depTxn.covering())) + { + builder.add(key, oldTxnId1); + builder.add(key, oldTxnId2); + dependencies = builder.build(); + } + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); PartialTxn txn = createPartialTxn(0); - attrs.homeKey(key.toUnseekable()); - attrs.progressKey(key.toUnseekable()); - attrs.durability(Durable); + attrs.route(RoutingKeys.of(key.toUnseekable()).toRoute(key.toUnseekable())); + attrs.durability(Majority); Ballot promised = ballot(1, clock.incrementAndGet(), 1); Ballot accepted = ballot(1, clock.incrementAndGet(), 1); Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); attrs.partialDeps(dependencies); - ImmutableSortedSet waitingOnCommit = ImmutableSortedSet.of(oldTxnId1); - ImmutableSortedMap waitingOnApply = ImmutableSortedMap.of(oldTimestamp, oldTxnId2); + SimpleBitSet waitingOnCommit = new SimpleBitSet(2); + waitingOnCommit.set(0); + SimpleBitSet waitingOnApply = new SimpleBitSet(2); + waitingOnApply.set(1); + Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies, new ImmutableBitSet(waitingOnCommit), new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, - waitingOnCommit, waitingOnApply, result.left, result.right); + waitingOn, result.left, result.right); AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.set(command); @@ -149,13 +153,13 @@ public void commandsForKeyLoadSave() cfk.updateMax(maxTimestamp); cfk.updateLastExecutionTimestamps(txnId1, true); - Assert.assertEquals(txnId1.hlc(), cfk.current().timestampMicrosFor(txnId1, true)); + Assert.assertEquals(txnId1.hlc(), cfk.timestampMicrosFor(txnId1, true)); cfk.updateLastExecutionTimestamps(txnId2, true); - Assert.assertEquals(txnId2.hlc(), cfk.current().timestampMicrosFor(txnId2, true)); + Assert.assertEquals(txnId2.hlc(), cfk.timestampMicrosFor(txnId2, true)); Assert.assertEquals(txnId2, cfk.current().lastExecutedTimestamp()); - Assert.assertEquals(txnId2.hlc(), cfk.current().lastExecutedMicros()); + Assert.assertEquals(txnId2.hlc(), cfk.lastExecutedMicros()); cfk.register(command1); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 187753adb90f..0b9dcc38b3b0 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -53,7 +53,11 @@ import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.apache.cassandra.service.accord.AccordTestUtils.*; +import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; +import static org.apache.cassandra.service.accord.AccordTestUtils.createWriteTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.fullRange; +import static org.apache.cassandra.service.accord.AccordTestUtils.timestamp; +import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; public class AccordCommandTest { @@ -88,7 +92,7 @@ public void basicCycleTest() throws Throwable getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - Txn txn = createTxn(1); + Txn txn = createWriteTxn(1); Key key = (Key)txn.keys().get(0); RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); @@ -106,7 +110,7 @@ public void basicCycleTest() throws Throwable })); getUninterruptibly(commandStore.execute(preAccept, instance -> { - Command command = instance.command(txnId).current(); + Command command = instance.ifInitialised(txnId).current(); Assert.assertEquals(txnId, command.executeAt()); Assert.assertEquals(Status.PreAccepted, command.status()); Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); @@ -135,7 +139,7 @@ public void basicCycleTest() throws Throwable })); getUninterruptibly(commandStore.execute(accept, instance -> { - Command command = instance.command(txnId).current(); + Command command = instance.ifInitialised(txnId).current(); Assert.assertEquals(executeAt, command.executeAt()); Assert.assertEquals(Status.Accepted, command.status()); Assert.assertEquals(deps, command.partialDeps()); @@ -151,7 +155,7 @@ public void basicCycleTest() throws Throwable getUninterruptibly(commandStore.execute(commit, commit::apply)); getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { - Command command = instance.command(txnId).current(); + Command command = instance.ifInitialised(txnId).current(); Assert.assertEquals(commit.executeAt, command.executeAt()); Assert.assertTrue(command.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, command.partialDeps()); @@ -169,7 +173,7 @@ public void computeDeps() throws Throwable getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); - Txn txn = createTxn(2); + Txn txn = createWriteTxn(2); Key key = (Key)txn.keys().get(0); RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index b43209c2cda6..333bca572b21 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -184,15 +184,15 @@ public void initialEpochTest() throws Throwable Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); service.reportTopology(topology1); - loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync) -> { + loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertTrue(remoteSync.isEmpty()); }); - Assert.assertEquals(new EpochDiskState(1, 1), service.diskState()); + Assert.assertEquals(EpochDiskState.create(1), service.diskState()); - service.remoteSyncComplete(ID1, 1); - service.remoteSyncComplete(ID2, 1); - loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync) -> { + service.receiveRemoteSyncComplete(ID1, 1); + service.receiveRemoteSyncComplete(ID2, 1); + loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertEquals(Sets.newHashSet(ID1, ID2), remoteSync); }); @@ -208,14 +208,14 @@ public void loadTest() throws Throwable service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); service.reportTopology(topology1); service.acknowledgeEpoch(EpochReady.done(1)); - service.remoteSyncComplete(ID1, 1); - service.remoteSyncComplete(ID2, 1); - service.remoteSyncComplete(ID3, 1); + service.receiveRemoteSyncComplete(ID1, 1); + service.receiveRemoteSyncComplete(ID2, 1); + service.receiveRemoteSyncComplete(ID3, 1); Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); service.reportTopology(topology2); service.acknowledgeEpoch(EpochReady.done(2)); - service.remoteSyncComplete(ID1, 2); + service.receiveRemoteSyncComplete(ID1, 2); Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); service.reportTopology(topology3); @@ -255,7 +255,7 @@ public void truncateTest() Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); service.reportTopology(topology3); service.truncateTopologiesUntil(3); - Assert.assertEquals(new EpochDiskState(3, 3), service.diskState()); + Assert.assertEquals(EpochDiskState.create(3), service.diskState()); serviceListener.assertTruncates(3L); AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 358aac687f14..a35050b017fd 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -20,22 +20,48 @@ import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; +import accord.Utils; import accord.api.Agent; +import accord.impl.AbstractFetchCoordinator; +import accord.impl.IntKey; +import accord.impl.TopologyUtils; import accord.local.Node; import accord.messages.InformOfTxnId; +import accord.messages.MessageType; +import accord.messages.ReadData; +import accord.messages.ReadTxnData; +import accord.messages.Reply; +import accord.messages.Request; import accord.messages.SimpleReply; +import accord.primitives.Keys; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.topology.Topology; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; -import org.apache.cassandra.net.Verb; import org.apache.cassandra.tcm.ClusterMetadataService; public class AccordMessageSinkTest { + private static final Node.Id node = new Node.Id(1); + private static final AccordEndpointMapper mapping = SimpleAccordEndpointMapper.INSTANCE; + private static final Topology topology = TopologyUtils.initialTopology(new Node.Id[] {node}, Ranges.of(IntKey.range(0, 100)), 1); + private static final Topologies topologies = new Topologies.Single((a, b, ignore) -> 0, topology); + + private static final MessageDelivery messaging = Mockito.mock(MessageDelivery.class); + private static final AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging, mapping); + @BeforeClass public static void setup() { @@ -45,21 +71,73 @@ public static void setup() } @Test - public void informOfTxn() throws Throwable + public void informOfTxn() { - Node.Id id = new Node.Id(1); - InetAddressAndPort endpoint = InetAddressAndPort.getByName("127.0.0.1"); - EndpointMapping mapping = EndpointMapping.builder(5).add(endpoint, id).build(); // There was an issue where the reply was the wrong verb // see CASSANDRA-18375 - InformOfTxnId info = Mockito.mock(InformOfTxnId.class); - Message req = Message.builder(Verb.ACCORD_INFORM_OF_TXN_REQ, info).build(); - SimpleReply reply = SimpleReply.Ok; + InformOfTxnId request = Mockito.mock(InformOfTxnId.class); + Mockito.when(request.type()).thenReturn(MessageType.INFORM_OF_TXN_REQ); + checkRequestReplies(request, SimpleReply.Ok); + } + + @Test + public void bootstrapRead() + { + long epoch = 42; + Txn txn = Utils.readTxn(Keys.of(IntKey.key(42))); + TxnId id = nextTxnId(epoch, txn); + PartialTxn partialTxn = txn.slice(Ranges.of(IntKey.range(40, 50)), true); + Request request = new AbstractFetchCoordinator.FetchRequest(epoch, id, partialTxn.covering(), PartialDeps.NONE, partialTxn, true); + + checkRequestReplies(request, + new AbstractFetchCoordinator.FetchResponse(null, null, id), + ReadData.ReadNack.NotCommitted); + + } + + @Test + public void txnRead() + { + TxnId txnId = nextTxnId(42, Txn.Kind.Read, Routable.Domain.Key); + Request request = new ReadTxnData(node, topologies, txnId, topology.ranges(), txnId); + checkRequestReplies(request, + new ReadData.ReadOk(null, null), + ReadData.ReadNack.NotCommitted); + } - MessageDelivery messaging = Mockito.mock(MessageDelivery.class); - AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging, mapping); - sink.reply(id, req, reply); + private static void checkRequestReplies(Request request, Reply... replies) + { + Message requestMessage = send(request); + for (Reply reply : replies) + { + Mockito.clearInvocations(messaging); + try + { + sink.reply(node, requestMessage, reply); + } + catch (Throwable t) + { + throw new AssertionError(String.format("Expected reply type %s (type=%s) to be allowed", reply.getClass().getCanonicalName(), reply.type()), t); + } + } + } - Mockito.verify(messaging).send(Mockito.any(), Mockito.any()); + private static Message send(Request request) + { + Mockito.clearInvocations(messaging); + ArgumentCaptor> captor = ArgumentCaptor.forClass(Message.class); + Mockito.doNothing().when(messaging).send(captor.capture(), Mockito.any()); + sink.send(node, request); + return captor.getValue(); + } + + private static TxnId nextTxnId(long epoch, Txn txn) + { + return nextTxnId(epoch, txn.kind(), txn.keys().domain()); + } + + private static TxnId nextTxnId(long epoch, Txn.Kind rw, Routable.Domain domain) + { + return new TxnId(Timestamp.fromValues(epoch, System.nanoTime(), node), rw, domain); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java new file mode 100644 index 000000000000..4159e6364c36 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -0,0 +1,508 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableBiMap; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.api.Agent; +import accord.impl.AbstractConfigurationService; +import accord.impl.TestAgent; +import accord.impl.basic.PendingQueue; +import accord.impl.basic.PropagatingPendingQueue; +import accord.impl.basic.RandomDelayQueue; +import accord.impl.basic.SimulatedDelayedExecutorService; +import accord.local.Node; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.topology.Topology; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.concurrent.AdaptingScheduledExecutorPlus; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.gms.IFailureDetectionEventListener; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.ConnectionType; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.StubClusterMetadataService; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.concurrent.Future; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class AccordSyncPropagatorTest +{ + @BeforeClass + public static void setup() throws NoSuchFieldException, IllegalAccessException + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + ClusterMetadataService.unsetInstance(); + ClusterMetadataService.setInstance(StubClusterMetadataService.forTesting()); + } + + @Test + public void burnTest() + { + Gen rangesGen = AccordGenerators.ranges().filter(r -> !r.isEmpty()); + Gen> nodesGen = Gens.lists(AccordGens.nodes()).unique().ofSizeBetween(1, 40); + qt().withExamples(100).check(rs -> { + List nodes = nodesGen.next(rs); + Set nodesAsSet = ImmutableSet.copyOf(nodes); + + List failures = new ArrayList<>(); + RandomDelayQueue delayQueue = new RandomDelayQueue.Factory(rs).get(); + PendingQueue queue = new PropagatingPendingQueue(failures, delayQueue); + Agent agent = new TestAgent.RethrowAgent(); + SimulatedDelayedExecutorService globalExecutor = new SimulatedDelayedExecutorService(queue, agent, rs.fork()); + ScheduledExecutorPlus scheduler = new AdaptingScheduledExecutorPlus(globalExecutor); + + Cluster cluster = new Cluster(nodes, rs, scheduler); + + long epochOffset = rs.nextLong(1, 1024); + int numEpochs = rs.nextInt(1, 10); + Map allRanges = new HashMap<>(); + for (int i = 0; i < numEpochs; i++) + { + long epoch = epochOffset + i; + Ranges ranges = rangesGen.next(rs); + allRanges.put(epoch, ranges); + scheduler.schedule(() -> { + for (Node.Id nodeId : nodes) + cluster.node(nodeId).propagator.reportSyncComplete(epoch, nodes, nodeId); + + for (int j = 0, attempts = rs.nextInt(1, 4); j < attempts; j++) + { + for (Range range : ranges) + { + Cluster.Instace inst = cluster.node(rs.pick(nodes)); + scheduler.schedule(() -> { + Ranges subrange = Ranges.of(range); + inst.propagator.reportClosed(epoch, nodes, subrange); + scheduler.schedule(() -> inst.propagator.reportRedundant(epoch, nodes, subrange), 1, TimeUnit.MINUTES); + }, rs.nextInt(30, 300), TimeUnit.SECONDS); + } + } + }, rs.nextInt(30, 300), TimeUnit.SECONDS); + } + + while (queue.size() > 0) + { + Runnable next = (Runnable) queue.poll(); + if (next == null) + break; + next.run(); + if (!failures.isEmpty()) + { + RuntimeException e = new RuntimeException("Failures detected"); + failures.forEach(e::addSuppressed); + throw e; + } + } + if (hasPending(cluster)) + throw new AssertionError("Unable to make progress: pending syncs on \n" + cluster.instances.values().stream().filter(i -> i.propagator.hasPending()).map(i -> i.propagator.toString()).collect(Collectors.joining("\n"))); + + for (Cluster.Instace inst : cluster.instances.values()) + { + Cluster.ConfigService cs = inst.configurationService; + assertSetsEqual(cs.completedEpochs, allRanges.keySet(), "completedEpochs %s", inst.id); + assertSetsEqual(cs.syncCompletes.keySet(), allRanges.keySet(), "syncCompletes %s", inst.id); + for (Map.Entry> e : cs.syncCompletes.entrySet()) + assertSetsEqual(e.getValue(), nodesAsSet, "syncCompletes values on %s", inst.id); + + assertMapEquals(cs.closed, allRanges, "Unexpected state for closed on %s", inst.id); + assertMapEquals(cs.redundant, allRanges, "Unexpected state for redundant on %s", inst.id); + } + }); + } + + private static void assertSetsEqual(Set actual, Set expected, String msg, Object... args) + { + Set notExpected = Sets.difference(actual, expected); + Assertions.assertThat(notExpected).describedAs("Unexpected values detected; " + msg, args).isEmpty(); + Set missing = Sets.difference(expected, actual); + Assertions.assertThat(missing).describedAs("Missing values detected; " + msg, args).isEmpty(); + } + + private static void assertMapEquals(Map actual, Map expected, String msg, Object... args) + { + assertSetsEqual(actual.keySet(), expected.keySet(), msg, args); + List errors = new ArrayList<>(); + for (Map.Entry e : actual.entrySet()) + { + V value = e.getValue(); + V other = expected.get(e.getKey()); + if (!Objects.equals(value, other)) + errors.add(String.format("Missmatch at key %s: expected %s but given %s", e.getKey(), other, value)); + } + if (!errors.isEmpty()) + throw new AssertionError(String.join("\n", errors)); + } + + private static boolean hasPending(Cluster cluster) + { + return cluster.instances.values().stream().anyMatch(i -> i.propagator.hasPending()); + } + + private static class Cluster implements AccordEndpointMapper + { + private final ImmutableBiMap nodeToAddress; + private final ImmutableMap instances; + private final RandomSource rs; + private final ScheduledExecutorPlus scheduler; + + private Cluster(List nodes, + RandomSource rs, + ScheduledExecutorPlus scheduler) + { + this.rs = rs; + this.scheduler = scheduler; + ImmutableBiMap.Builder nodeToAddress = ImmutableBiMap.builder(); + ImmutableMap.Builder instances = ImmutableMap.builder(); + for (Node.Id id : nodes) + { + InetAddressAndPort address = addressFromInt(id.id); + nodeToAddress.put(id, address); + ConfigService cs = new ConfigService(id); + Sink sink = new Sink(id); + IFailureDetector fd = new FailureDetector(address); + instances.put(id, new Instace(id, address, cs, sink, fd, cs, new AccordSyncPropagator(id, Cluster.this, sink, fd, scheduler, cs))); + } + this.nodeToAddress = nodeToAddress.build(); + this.instances = instances.build(); + } + + private InetAddressAndPort addressFromInt(int value) + { + byte[] array = ByteBufferUtil.bytes(value).array(); + try + { + InetAddress address = InetAddress.getByAddress(array); + return InetAddressAndPort.getByAddressOverrideDefaults(address, 1); + } + catch (UnknownHostException e) + { + throw new AssertionError(e); + } + } + + public Cluster.Instace node(Node.Id id) + { + Instace instace = instances.get(id); + if (instace == null) + throw new NullPointerException("Unknown id: " + id); + return instace; + } + + public Cluster.Instace node(InetAddressAndPort address) + { + return node(mappedId(address)); + } + + @Override + public Node.Id mappedId(InetAddressAndPort endpoint) + { + Node.Id id = nodeToAddress.inverse().get(endpoint); + if (id == null) + throw new NullPointerException("Unable to map endpoint: " + endpoint); + return id; + } + + @Override + public InetAddressAndPort mappedEndpoint(Node.Id id) + { + return nodeToAddress.get(id); + } + + private enum Action + { + DELIVER, TIMEOUT, ERROR + } + + private class Sink implements MessageDelivery + { + private final Node.Id from; + private final Map> callbacks = new HashMap<>(); + private final Map> nodeActions = new HashMap<>(); + + private Sink(Node.Id from) + { + this.from = from; + } + + @Override + public void send(Message message, InetAddressAndPort to) + { + throw new UnsupportedOperationException(); + } + + @Override + public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb) + { + Action action = action(to); + switch (action) + { + case ERROR: + cb.onFailure(to, RequestFailureReason.UNKNOWN); + return; + case TIMEOUT: + cb.onFailure(to, RequestFailureReason.TIMEOUT); + return; + case DELIVER: + break; + default: + throw new IllegalStateException("Unknown action: " + action); + } + callbacks.put(message.id(), cb); + scheduler.schedule(() -> AccordService.receive(this, node(to).configurationService, (Message>) message.withFrom(mappedEndpoint(from))), 500, TimeUnit.MILLISECONDS); + scheduler.schedule(() -> { + RequestCallback removed = callbacks.remove(message.id()); + if (removed != null) + removed.onFailure(to, RequestFailureReason.TIMEOUT); + }, 1, TimeUnit.MINUTES); + } + + @Override + public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb, ConnectionType specifyConnection) + { + throw new UnsupportedOperationException(); + } + + @Override + public Future> sendWithResult(Message message, InetAddressAndPort to) + { + throw new UnsupportedOperationException(); + } + + @Override + public void respond(V response, Message message) + { + Action action = action(message.respondTo()); + switch (action) + { + case ERROR: + case TIMEOUT: + return; + case DELIVER: + break; + default: + throw new IllegalStateException("Unknown action: " + action); + } + + RequestCallback cb = node(message.respondTo()).messagingService.callbacks.remove(message.id()); + if (cb != null) + cb.onResponse(message.responseWith(response)); + } + + private Action action(InetAddressAndPort to) + { + return nodeActions.computeIfAbsent(to, ignore -> Gens.enums().allWithWeights(Action.class, 81, 10, 1)).next(rs); + } + } + + private class FailureDetector implements IFailureDetector + { + private final InetAddressAndPort self; + private final Map> nodeRuns = new HashMap<>(); + + private FailureDetector(InetAddressAndPort self) + { + this.self = self; + } + + @Override + public boolean isAlive(InetAddressAndPort ep) + { + if (self.equals(ep)) return true; + + return !nodeRuns.computeIfAbsent(ep, ignore -> Gens.bools().runs(.01)).next(rs); + } + + @Override + public void interpret(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + @Override + public void report(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + @Override + public void remove(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + @Override + public void forceConviction(InetAddressAndPort ep) + { + throw new UnsupportedOperationException(); + } + + @Override + public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + throw new UnsupportedOperationException(); + } + + @Override + public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + throw new UnsupportedOperationException(); + } + } + + private class ConfigService extends AbstractConfigurationService.Minimal implements AccordSyncPropagator.Listener + { + private final Map> syncCompletes = new HashMap<>(); + private final Map> endpointAcks = new HashMap<>(); + private final NavigableSet completedEpochs = Collections.synchronizedNavigableSet(new TreeSet<>()); + private final Map closed = new HashMap<>(); + private final Map redundant = new HashMap<>(); + + private ConfigService(Node.Id node) + { + super(node); + } + + @Override + protected void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epoch) + { + syncCompletes.computeIfAbsent(epoch, ignore -> new HashSet<>()).add(node); + } + + @Override + protected void fetchTopologyInternal(long epoch) + { + // TODO + } + + @Override + protected void localSyncComplete(Topology topology) + { + Set notify = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); + instances.get(localId).propagator.reportSyncComplete(topology.epoch(), notify, localId); + } + + @Override + public void reportEpochClosed(Ranges ranges, long epoch) + { + Topology topology = getTopologyForEpoch(epoch); + instances.get(localId).propagator.reportClosed(epoch, topology.nodes(), ranges); + } + + @Override + public void reportEpochRedundant(Ranges ranges, long epoch) + { + Topology topology = getTopologyForEpoch(epoch); + instances.get(localId).propagator.reportRedundant(epoch, topology.nodes(), ranges); + } + + @Override + public void onEndpointAck(Node.Id id, long epoch) + { + endpointAcks.computeIfAbsent(epoch, ignore -> new HashSet<>()).add(id); + } + + @Override + public void onComplete(long epoch) + { + completedEpochs.add(epoch); + // TODO why do we see multiple calls? +// if (!completedEpochs.add(epoch)) +// throw new IllegalStateException("Completed epoch " + epoch + " multiple times"); + } + + @Override + public synchronized void receiveClosed(Ranges ranges, long epoch) + { + super.receiveClosed(ranges, epoch); + closed.merge(epoch, ranges, Ranges::with); + } + + @Override + public synchronized void receiveRedundant(Ranges ranges, long epoch) + { + super.receiveRedundant(ranges, epoch); + redundant.merge(epoch, ranges, Ranges::with); + } + } + + public class Instace + { + private final Node.Id id; + private final InetAddressAndPort address; + private final ConfigService configurationService; + private final Sink messagingService; + private final IFailureDetector failureDetector; + private final AccordSyncPropagator.Listener listener; + private final AccordSyncPropagator propagator; + + private Instace(Node.Id id, + InetAddressAndPort address, + ConfigService configurationService, + Sink messagingService, + IFailureDetector failureDetector, + AccordSyncPropagator.Listener listener, + AccordSyncPropagator propagator) + { + this.id = id; + this.address = address; + this.configurationService = configurationService; + this.messagingService = messagingService; + this.failureDetector = failureDetector; + this.listener = listener; + this.propagator = propagator; + } + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 77611f4f1afb..c8a5872d9f03 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -20,17 +20,14 @@ import java.util.Collections; import java.util.List; -import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.stream.Collectors; import java.util.stream.IntStream; -import javax.annotation.Nullable; -import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.junit.Assert; @@ -40,7 +37,6 @@ import accord.api.ProgressLog; import accord.api.Result; import accord.api.RoutingKey; -import accord.api.Write; import accord.impl.CommandsForKey; import accord.impl.InMemoryCommandStore; import accord.local.Command; @@ -51,17 +47,23 @@ import accord.local.Node.Id; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.SafeCommand; +import accord.local.SafeCommandStore; import accord.local.SaveStatus; -import accord.local.Status.Known; +import accord.local.SaveStatus.LocalExecution; import accord.primitives.Ballot; +import accord.primitives.FullKeyRoute; import accord.primitives.Keys; +import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Participants; import accord.primitives.Ranges; +import accord.primitives.Route; +import accord.primitives.Seekable; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.primitives.Unseekables; import accord.primitives.Writes; import accord.topology.Shard; import accord.topology.Topology; @@ -94,11 +96,11 @@ public class AccordTestUtils { public static class Commands { - public static Command notWitnessed(TxnId txnId, PartialTxn txn) + public static Command notDefined(TxnId txnId, PartialTxn txn) { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); attrs.partialTxn(txn); - return Command.SerializerSupport.notWitnessed(attrs, Ballot.ZERO); + return Command.SerializerSupport.notDefined(attrs, Ballot.ZERO); } public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp executeAt) @@ -110,15 +112,17 @@ public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp execute public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt) { - CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId).partialDeps(PartialDeps.NONE); attrs.partialTxn(txn); + Seekable key = txn.keys().get(0); + RoutingKey routingKey = key.asKey().toUnseekable(); + attrs.route(new FullKeyRoute(routingKey, true, new RoutingKey[]{ routingKey})); return Command.SerializerSupport.committed(attrs, SaveStatus.Committed, executeAt, Ballot.ZERO, Ballot.ZERO, - ImmutableSortedSet.of(), - ImmutableSortedMap.of()); + Command.WaitingOn.EMPTY); } } @@ -163,22 +167,26 @@ public static void testLoad(ManualExecutor executor, AccordSafeState set) {} - @Override public void durable(TxnId txnId, @Nullable Unseekables unseekables, ProgressShard progressShard) {} - @Override public void waiting(TxnId txnId, Known known, Unseekables unseekables) {} + @Override public void clear(TxnId txnId) {} + @Override public void durable(Command command) {} + @Override + public void waiting(SafeCommand blockedBy, LocalExecution blockedUntil, Route blockedOnRoute, Participants blockedOnParticipants) {} }; public static TxnId txnId(long epoch, long hlc, int node) { - return new TxnId(epoch, hlc, Txn.Kind.Write, Key, new Node.Id(node)); + return txnId(epoch, hlc, node, Txn.Kind.Write); + } + + public static TxnId txnId(long epoch, long hlc, int node, Txn.Kind kind) + { + return new TxnId(epoch, hlc, kind, Key, new Node.Id(node)); } public static Timestamp timestamp(long epoch, long hlc, int node) @@ -195,30 +203,32 @@ public static Pair processTxnResult(AccordCommandStore commandSt { AtomicReference> result = new AtomicReference<>(); getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txn.keys()), - safeStore -> { - TxnRead read = (TxnRead) txn.read(); - Data readData = read.keys().stream().map(key -> { - try - { - return AsyncChains.getBlocking(read.read(key, txn.kind(), safeStore, executeAt, null)); - } - catch (InterruptedException e) - { - throw new UncheckedInterruptedException(e); - } - catch (ExecutionException e) - { - throw new RuntimeException(e); - } - }) - .reduce(null, TxnData::merge); - Write write = txn.update().apply(executeAt, readData); - result.set(Pair.create(new Writes(txnId, executeAt, txn.keys(), write), - txn.query().compute(txnId, executeAt, readData, txn.read(), txn.update()))); - })); + safeStore -> result.set(processTxnResultDirect(safeStore, txnId, txn, executeAt)))); return result.get(); } + public static Pair processTxnResultDirect(SafeCommandStore safeStore, TxnId txnId, PartialTxn txn, Timestamp executeAt) + { + TxnRead read = (TxnRead) txn.read(); + Data readData = read.keys().stream().map(key -> { + try + { + return AsyncChains.getBlocking(read.read(key, txn.kind(), safeStore, executeAt, null)); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + }) + .reduce(null, TxnData::merge); + return Pair.create(txn.execute(txnId, executeAt, readData), + txn.query().compute(txnId, executeAt, readData, txn.read(), txn.update())); + + } public static Txn createTxn(String query) { @@ -251,15 +261,18 @@ public static Txn createTxn(int readKey, int... writeKeys) StringBuilder sb = new StringBuilder("BEGIN TRANSACTION\n"); sb.append(format("LET row1 = (SELECT * FROM ks.tbl WHERE k=%s AND c=0);\n", readKey)); sb.append("SELECT row1.v;\n"); - sb.append("IF row1 IS NULL THEN\n"); - for (int key : writeKeys) - sb.append(format("INSERT INTO ks.tbl (k, c, v) VALUES (%s, 0, 1);\n", key)); - sb.append("END IF\n"); + if (writeKeys.length > 0) + { + sb.append("IF row1 IS NULL THEN\n"); + for (int key : writeKeys) + sb.append(format("INSERT INTO ks.tbl (k, c, v) VALUES (%s, 0, 1);\n", key)); + sb.append("END IF\n"); + } sb.append("COMMIT TRANSACTION"); return createTxn(sb.toString()); } - public static Txn createTxn(int key) + public static Txn createWriteTxn(int key) { return createTxn(key, key); } @@ -282,7 +295,7 @@ public static PartialTxn createPartialTxn(int key) return new PartialTxn.InMemory(ranges, txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update()); } - private static class SingleEpochRanges extends CommandStores.RangesForEpochHolder + private static class SingleEpochRanges extends CommandStore.EpochUpdateHolder { private final Ranges ranges; @@ -293,7 +306,7 @@ public SingleEpochRanges(Ranges ranges) private void set(CommandStore store) { - this.current = new CommandStores.RangesForEpoch(1, ranges, store); + add(1, new CommandStores.RangesForEpoch(1, ranges, store), ranges); } } @@ -309,6 +322,8 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } + @Override + public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; SingleEpochRanges holder = new SingleEpochRanges(Ranges.of(range)); @@ -330,6 +345,8 @@ public static AccordCommandStore createAccordCommandStore( @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } + @Override + public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); @@ -342,6 +359,7 @@ public static AccordCommandStore createAccordCommandStore( loadExecutor, saveExecutor); holder.set(result); + result.updateRangesForEpoch(); return result; } diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java new file mode 100644 index 000000000000..fcdd5ad1bac5 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.SaveStatus; +import accord.primitives.Ranges; +import accord.primitives.RoutableKey; +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.RandomPartitioner; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.Interval; +import org.apache.cassandra.utils.IntervalTree; + +import static accord.utils.Property.qt; +import static org.assertj.core.api.Assertions.assertThat; + +public class CommandsForRangesTest +{ + private static Ranges FULL_RANGE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min("test"), AccordRoutingKey.SentinelKey.max("test"))); + + @BeforeClass + public static void setup() throws NoSuchFieldException, IllegalAccessException + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + @Test + public void prune() + { + qt().forAll(cfr()).check(cfr -> { + // public void prune(TxnId pruneBefore, Ranges pruneRanges) + // private Timestamp maxRedundant; + List knownIds = new ArrayList<>(cfr.knownIds()); + knownIds.sort(Comparator.naturalOrder()); + + assertThat(cfr.maxRedundant()).isNull(); + + TxnId min = knownIds.get(0); + TxnId max = knownIds.get(knownIds.size() - 1); + + // should do nothing + IntervalTree> tree = cfr.tree(); + cfr.prune(min, FULL_RANGE); + assertThat(cfr.maxRedundant()).isNull(); + assertThat(cfr.tree()).isEqualTo(tree); + + cfr.prune(max, FULL_RANGE); + assertThat(cfr.knownIds()).containsExactly(max); + assertThat(cfr.maxRedundant()).isEqualTo(knownIds.size() == 1 ? null : knownIds.get(knownIds.size() - 2)); + + cfr.prune(new TxnId(max.logicalNext(max.node), max.rw(), max.domain()), FULL_RANGE); + assertThat(cfr.knownIds()).isEmpty(); + assertThat(cfr.maxRedundant()).isEqualTo(max); + }); + } + + private static Gen cfr() + { + // TODO (coverage): once all partitioners work with regard to splitting, then should test all + Gen partitionerGen = rs -> rs.pick(Murmur3Partitioner.instance, RandomPartitioner.instance); + Gen statusGen = Gens.enums().all(SaveStatus.class); + return rs -> { + IPartitioner partitioner = partitionerGen.next(rs); + // some code reaches to the DD for partitioner... + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + Gen rangesGen = AccordGenerators.ranges(ignore -> Collections.singleton("test"), ignore -> partitioner); + CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); + int numTxn = rs.nextInt(1, 10); + Set uniq = new HashSet<>(); + for (int i = 0; i < numTxn; i++) + { + TxnId id; + while (!uniq.add(id = AccordGens.txnIds().next(rs))) {} + builder.put(id, rangesGen.next(rs), statusGen.next(rs), id, Collections.emptyList()); + } + return builder.build(); + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java b/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java new file mode 100644 index 000000000000..425169c34e75 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.nio.ByteBuffer; + +import accord.local.Node; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.utils.ByteBufferUtil; + +public enum SimpleAccordEndpointMapper implements AccordEndpointMapper +{ + INSTANCE; + + @Override + public Node.Id mappedId(InetAddressAndPort endpoint) + { + if (endpoint.addressBytes.length != 4) + throw new IllegalArgumentException("Only IPV4 is allowed: given " + endpoint.toString(true)); + return new Node.Id(ByteBuffer.wrap(endpoint.addressBytes).getInt()); + } + + @Override + public InetAddressAndPort mappedEndpoint(Node.Id id) + { + byte[] array = ByteBufferUtil.bytes(id.id).array(); + try + { + return InetAddressAndPort.getByAddressOverrideDefaults(InetAddress.getByAddress(array), 1); + } + catch (UnknownHostException e) + { + throw new AssertionError("Unable to convert " + id + " to an IPV4 address", e); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 2a2eb481ac7d..66a0d00784a1 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -53,7 +53,7 @@ import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.notWitnessed; +import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.notDefined; import static org.apache.cassandra.service.accord.AccordTestUtils.commandsForKey; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; @@ -93,9 +93,9 @@ public void cachedTest() // acquire / release - commandCache.unsafeSetLoadFunction(id -> notWitnessed(id, txn)); + commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); AccordSafeCommand safeCommand = commandCache.acquire(txnId); - testLoad(executor, safeCommand, notWitnessed(txnId, txn)); + testLoad(executor, safeCommand, notDefined(txnId, txn)); commandCache.release(safeCommand); cfkCache.unsafeSetLoadFunction(k -> commandsForKey((PartitionKey) k)); @@ -133,7 +133,7 @@ public void loadTest() // create / persist AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.preExecute(); - safeCommand.set(notWitnessed(txnId, txn)); + safeCommand.set(notDefined(txnId, txn)); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); @@ -182,9 +182,9 @@ public void partialLoadTest() PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire /release, create / persist - commandCache.unsafeSetLoadFunction(id -> notWitnessed(id, txn)); + commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); AccordSafeCommand safeCommand = commandCache.acquire(txnId); - testLoad(executor, safeCommand, notWitnessed(txnId, txn)); + testLoad(executor, safeCommand, notDefined(txnId, txn)); commandCache.release(safeCommand); @@ -242,7 +242,7 @@ public void inProgressLoadTest() throws Throwable testLoad(executor, safeCfk, commandsForKey(key)); cfkCache.release(safeCfk); - commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notWitnessed(id, txn); }); + commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notDefined(id, txn); }); AccordSafeCommand safeCommand = commandCache.acquire(txnId); Assert.assertEquals(AccordCachingState.Status.LOADING, safeCommand.globalStatus()); Assert.assertTrue(commandCache.isReferenced(txnId)); @@ -299,7 +299,7 @@ public void failedLoadTest() throws Throwable if (txnId.equals(txnId1)) throw failure; else if (txnId.equals(txnId2)) - return notWitnessed(txnId, null); + return notDefined(txnId, null); throw new AssertionError("Unknown txnId: " + txnId); }); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 1060e34d48da..2f32acce0839 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -32,7 +32,6 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,7 +85,6 @@ import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; -import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; import static org.apache.cassandra.service.accord.AccordTestUtils.keys; import static org.apache.cassandra.service.accord.AccordTestUtils.loaded; import static org.apache.cassandra.service.accord.AccordTestUtils.txnId; @@ -122,11 +120,13 @@ public void optionalCommandTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - Txn txn = createTxn((int)clock.incrementAndGet()); + Txn txn = AccordTestUtils.createWriteTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); getUninterruptibly(commandStore.execute(contextFor(txnId), instance -> { - SafeCommand command = instance.ifPresent(txnId); + // TODO review: This change to `ifInitialized` was done in a lot of places and it doesn't preserve this property + // I fixed this reference to point to `ifLoadedAndInitialised` and but didn't update other places + SafeCommand command = instance.ifLoadedAndInitialised(txnId); Assert.assertNull(command); })); @@ -138,7 +138,7 @@ public void optionalCommandTest() throws Throwable public void optionalCommandsForKeyTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - Txn txn = createTxn((int)clock.incrementAndGet()); + Txn txn = AccordTestUtils.createWriteTxn((int)clock.incrementAndGet()); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); getUninterruptibly(commandStore.execute(contextFor(key), instance -> { @@ -194,7 +194,7 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS commandStore.setCacheSize(0); commandStore.setCacheSize(cacheSize); - return safe.command(txnId).current(); + return safe.ifInitialised(txnId).current(); }).beginAsResult()); } catch (ExecutionException e) @@ -230,7 +230,7 @@ public void testFutureCleanup() throws Throwable createCommittedAndPersist(commandStore, txnId); - Consumer consumer = safeStore -> safeStore.command(txnId).readyToExecute(); + Consumer consumer = safeStore -> safeStore.ifInitialised(txnId).readyToExecute(); PreLoadContext ctx = contextFor(txnId); AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) { @@ -316,7 +316,7 @@ public void loadFail() // can we recover? commandStore.commandCache().unsafeSetLoadFunction(txnId -> AccordKeyspace.loadCommand(commandStore, txnId)); - AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.command(id).readyToExecute())); + AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.ifInitialised(id).readyToExecute())); getUninterruptibly(o2); }); } diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java new file mode 100644 index 000000000000..dccf0e14b691 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Command; +import accord.primitives.Deps; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.SimpleBitSet; +import accord.utils.Utils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class WaitingOnSerializerTest +{ + @BeforeClass + public static void setup() + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + @Test + public void serde() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().forAll(waitingOnGen()).check(waitingOn -> { + buffer.clear(); + long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); + WaitingOnSerializer.serialize(waitingOn, buffer); + Assertions.assertThat(buffer.getLength()).isEqualTo(expectedSize); + Command.WaitingOn read = WaitingOnSerializer.deserialize(waitingOn.deps, new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)); + Assertions.assertThat(read) + .isEqualTo(waitingOn) + .isEqualTo(WaitingOnSerializer.deserialize(waitingOn.deps, WaitingOnSerializer.serialize(waitingOn))); + }); + } + + private enum WaitingOnSets + {COMMIT, APPLY, APPLYED_OR_INVALIDATED} + + private static Gen waitingOnGen() + { + Gen depsGen = AccordGenerators.fromQT(CassandraGenerators.nonLocalPartitioners()) + .flatMap(AccordGenerators::depsGen); + Gen sets = Gens.enums().all(WaitingOnSets.class); + return rs -> { + Deps deps = depsGen.next(rs); + if (deps.isEmpty()) return Command.WaitingOn.EMPTY; + int[] selected = Gens.arrays(Gens.ints().between(0, deps.txnIdCount() - 1)).unique().ofSizeBetween(0, deps.txnIdCount() - 1).next(rs); + SimpleBitSet waitingOnCommit = new SimpleBitSet(deps.txnIdCount(), false); + SimpleBitSet waitingOnApply = new SimpleBitSet(deps.txnIdCount(), false); + SimpleBitSet appliedOrInvalidated = new SimpleBitSet(deps.txnIdCount(), false); + for (int i : selected) + { + WaitingOnSets set = sets.next(rs); + switch (set) + { + case COMMIT: + waitingOnCommit.set(i); + break; + case APPLY: + waitingOnApply.set(i); + break; + case APPLYED_OR_INVALIDATED: + appliedOrInvalidated.set(i); + break; + default: + throw new IllegalStateException("Unexpected set: " + set); + } + } + + return new Command.WaitingOn(deps, Utils.ensureImmutable(waitingOnCommit), Utils.ensureImmutable(waitingOnApply), Utils.ensureImmutable(appliedOrInvalidated)); + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java index fe73677e2575..ace1da7f1a90 100644 --- a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java +++ b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java @@ -154,6 +154,7 @@ public void sendWithCallback(Message message, InetAddressAndPort public void send(Message message, InetAddressAndPort to) {} public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb, ConnectionType specifyConnection) {} public Future> sendWithResult(Message message, InetAddressAndPort to) { return null; } + @Override public void respond(V response, Message message) {} }; ProgressBarrier progressBarrier = ((MultiStepOperation)metadata.inProgressSequences.get(node.nodeId())) diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index af5198538eb3..f42dcd230090 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -18,22 +18,53 @@ package org.apache.cassandra.utils; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import accord.local.Command; +import accord.primitives.Deps; +import accord.primitives.KeyDeps; import accord.primitives.PartialTxn; +import accord.primitives.Range; +import accord.primitives.RangeDeps; +import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.AccordGens; import accord.utils.Gen; import accord.utils.Gens; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.AccordSplitter; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.quicktheories.impl.JavaRandom; import static accord.utils.AccordGens.txnIds; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; public class AccordGenerators { - private AccordGenerators() {} + private static final Gen PARTITIONER_GEN = fromQT(CassandraGenerators.partitioners()); - private enum SupportedCommandTypes { notWitnessed, preaccepted, committed } + private AccordGenerators() + { + } + + public static Gen partitioner() + { + return PARTITIONER_GEN; + } + + private enum SupportedCommandTypes + {notDefined, preaccepted, committed} public static Gen commands() { @@ -51,12 +82,142 @@ public static Gen commands() SupportedCommandTypes targetType = supportedTypes.next(rs); switch (targetType) { - case notWitnessed: return AccordTestUtils.Commands.notWitnessed(id, txn); - case preaccepted: return AccordTestUtils.Commands.preaccepted(id, txn, executeAt); - case committed: return AccordTestUtils.Commands.committed(id, txn, executeAt); - default: throw new UnsupportedOperationException("Unexpected type: " + targetType); + case notDefined: + return AccordTestUtils.Commands.notDefined(id, txn); + case preaccepted: + return AccordTestUtils.Commands.preaccepted(id, txn, executeAt); + case committed: + return AccordTestUtils.Commands.committed(id, txn, executeAt); + default: + throw new UnsupportedOperationException("Unexpected type: " + targetType); } }; } + public static Gen keys() + { + return keys(fromQT(Generators.IDENTIFIER_GEN), + fromQT(CassandraGenerators.TABLE_ID_GEN), + fromQT(CassandraGenerators.decoratedKeys())); + } + + public static Gen keys(IPartitioner partitioner) + { + return keys(fromQT(Generators.IDENTIFIER_GEN), + fromQT(CassandraGenerators.TABLE_ID_GEN), + fromQT(CassandraGenerators.decoratedKeys(ignore -> partitioner))); + } + + public static Gen keys(Gen keyspace, Gen tableId, Gen key) + { + return rs -> new PartitionKey(keyspace.next(rs), tableId.next(rs), key.next(rs)); + } + + public static Gen routingKeyGen(Gen keyspace, Gen tokenGen) + { + return rs -> { + String ks = keyspace.next(rs); + if (rs.nextBoolean()) return new AccordRoutingKey.TokenKey(ks, tokenGen.next(rs)); + else return rs.nextBoolean() ? AccordRoutingKey.SentinelKey.min(ks) : AccordRoutingKey.SentinelKey.max(ks); + }; + } + + public static Gen range() + { + return PARTITIONER_GEN.flatMap(partitioner -> range(fromQT(Generators.IDENTIFIER_GEN), fromQT(CassandraGenerators.token(partitioner)))); + } + + public static Gen range(IPartitioner partitioner) + { + return range(fromQT(Generators.IDENTIFIER_GEN), fromQT(CassandraGenerators.token(partitioner))); + } + + public static Gen range(Gen keyspace, Gen tokenGen) + { + return rs -> { + String ks = keyspace.next(rs); + Gen gen = routingKeyGen(Gens.constant(ks), tokenGen); + AccordRoutingKey a = gen.next(rs); + AccordRoutingKey b = gen.next(rs); + while (a.equals(b)) + b = gen.next(rs); + if (a.compareTo(b) < 0) return new TokenRange(a, b); + else return new TokenRange(b, a); + }; + } + + public static Gen ranges() + { + // javac couldn't pick the right constructor with HashSet::new, so had to create new lambda... + return ranges(Gens.lists(fromQT(Generators.IDENTIFIER_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), PARTITIONER_GEN); + } + + public static Gen ranges(Gen> keyspaceGen, Gen partitionerGen) + { + return rs -> { + Set keyspaces = keyspaceGen.next(rs); + IPartitioner partitioner = partitionerGen.next(rs); + List ranges = new ArrayList<>(); + int numSplits = rs.nextInt(10, 100); + TokenRange range = new TokenRange(AccordRoutingKey.SentinelKey.min(""), AccordRoutingKey.SentinelKey.max("")); + AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); + BigInteger size = splitter.sizeOf(range); + BigInteger update = splitter.divide(size, numSplits); + BigInteger offset = BigInteger.ZERO; + while (offset.compareTo(size) < 0) + { + BigInteger end = offset.add(update); + TokenRange r = (TokenRange) splitter.subRange(range, offset, end); + for (String ks : keyspaces) + { + ranges.add(r.withKeyspace(ks)); + } + offset = end; + } + return Ranges.of(ranges.toArray(new Range[0])); + }; + } + + public static Gen ranges(IPartitioner partitioner) + { + return ranges(Gens.lists(fromQT(Generators.IDENTIFIER_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), ignore -> partitioner); + } + + public static Gen keyDepsGen() + { + return AccordGens.keyDeps(AccordGenerators.keys()); + } + + public static Gen keyDepsGen(IPartitioner partitioner) + { + return AccordGens.keyDeps(AccordGenerators.keys(partitioner)); + } + + public static Gen rangeDepsGen() + { + return AccordGens.rangeDeps(AccordGenerators.range()); + } + + public static Gen rangeDepsGen(IPartitioner partitioner) + { + return AccordGens.rangeDeps(AccordGenerators.range(partitioner)); + } + + public static Gen depsGen() + { + return AccordGens.deps(keyDepsGen(), rangeDepsGen()); + } + + public static Gen depsGen(IPartitioner partitioner) + { + return AccordGens.deps(keyDepsGen(partitioner), rangeDepsGen(partitioner)); + } + + public static Gen fromQT(org.quicktheories.core.Gen qt) + { + return rs -> { + JavaRandom r = new JavaRandom(rs.asJdkRandom()); + return qt.generate(r); + }; + } } From 76994f81967bcbeee296448d12b2e3e87468bb18 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 16 Aug 2023 14:14:03 -0700 Subject: [PATCH 063/340] CEP-15 (C*) When a host replacement happens don't loose the peer mapping right away (#3575) patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-18764 --- modules/accord | 2 +- src/java/org/apache/cassandra/net/Verb.java | 40 +++++++++---------- .../accord/AccordConfigurationService.java | 15 ++++--- .../service/accord/AccordService.java | 14 ++++++- .../service/accord/AccordSyncPropagator.java | 6 ++- .../service/accord/AccordTopologyUtils.java | 7 +++- .../service/accord/EndpointMapping.java | 13 ++++++ .../cassandra/distributed/impl/Instance.java | 5 ++- .../AccordConfigurationServiceTest.java | 6 +-- .../accord/AccordSyncPropagatorTest.java | 2 +- 10 files changed, 76 insertions(+), 34 deletions(-) diff --git a/modules/accord b/modules/accord index 7c15f3a62039..2ad55e03c43c 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 7c15f3a6203939bc6cb398e538df1ca3557cbe03 +Subproject commit 2ad55e03c43ce074cdf5e36cfa14cb4278c2dc0f diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index be2d86ad59cb..dc8e6d415798 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -272,36 +272,36 @@ public enum Verb // accord ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), ACCORD_PRE_ACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_PRE_ACCEPT_RSP ), + ACCORD_PRE_ACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), - ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, () -> AccordService.instance().verbHandler(), ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), ACCORD_READ_RSP (126, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), - ACCORD_READ_REQ (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, () -> AccordService.instance().verbHandler() ), + ACCORD_READ_REQ (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_APPLY_RSP ), + ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), ACCORD_BEGIN_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_RECOVER_RSP ), + ACCORD_BEGIN_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), ACCORD_BEGIN_INVALIDATE_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_BEGIN_INVALIDATE_RSP ), + ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), - ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, () -> AccordService.instance().verbHandler(), ACCORD_WAIT_ON_COMMIT_RSP ), - ACCORD_WAIT_ON_APPLY_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitOnApply, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP ), - ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_WAIT_ON_COMMIT_RSP ), + ACCORD_WAIT_ON_APPLY_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitOnApply, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_CHECK_STATUS_RSP (142, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CHECK_STATUS_REQ (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_CHECK_STATUS_RSP ), + ACCORD_CHECK_STATUS_REQ (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), ACCORD_GET_DEPS_RSP (144, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_DEPS_REQ (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_GET_DEPS_RSP ), + ACCORD_GET_DEPS_REQ (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), ACCORD_FETCH_DATA_RSP (146, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, () -> AccordService.instance().verbHandler(), ACCORD_FETCH_DATA_RSP ), - ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, () -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), - ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,() -> AccordService.instance().verbHandler(), ACCORD_SIMPLE_RSP ), + ACCORD_FETCH_DATA_REQ (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), + ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_QUERY_DURABLE_BEFORE_RSP (150, P2, writeTimeout, REQUEST_RESPONSE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), - ACCORD_QUERY_DURABLE_BEFORE_REQ (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,() -> AccordService.instance().verbHandler(), ACCORD_QUERY_DURABLE_BEFORE_RSP), + ACCORD_QUERY_DURABLE_BEFORE_REQ (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP), ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index df77cae97583..610147e3411a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -24,6 +24,7 @@ import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -132,13 +133,17 @@ public synchronized void start() Invariants.checkState(state == State.INITIALIZED, "Expected state to be INITIALIZED but was %s", state); state = State.LOADING; updateMapping(ClusterMetadata.current()); + EndpointMapping snapshot = mapping; diskState = AccordKeyspace.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { if (topology != null) reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); getOrCreateEpochState(epoch).setSyncStatus(syncStatus); if (syncStatus == SyncStatus.NOTIFYING) - syncPropagator.reportSyncComplete(epoch, pendingSyncNotify, localId); + { + // TODO (expected, correctness): since this is loading old topologies, might see nodes no longer present (host replacement, decom, shrink, etc.); attempt to remove unknown nodes + syncPropagator.reportSyncComplete(epoch, Sets.filter(pendingSyncNotify, snapshot::containsId), localId); + } remoteSyncComplete.forEach(id -> receiveRemoteSyncComplete(id, epoch)); // TODO (now): disk doesn't get updated until we see our own notification, so there is an edge case where this instance notified others and fails in the middle, but Apply was already sent! This could leave partial closed/redudant accross the cluster @@ -157,7 +162,7 @@ public Node.Id mappedId(InetAddressAndPort endpoint) @Override public InetAddressAndPort mappedEndpoint(Node.Id id) { - return Invariants.nonNull(mapping.mappedEndpoint(id)); + return Invariants.nonNull(mapping.mappedEndpoint(id), "Unable to map node id %s to a InetAddressAndPort", id); } @VisibleForTesting @@ -175,7 +180,7 @@ synchronized void updateMapping(EndpointMapping mapping) synchronized void updateMapping(ClusterMetadata metadata) { - updateMapping(AccordTopologyUtils.directoryToMapping(metadata.epoch.getEpoch(), metadata.directory)); + updateMapping(AccordTopologyUtils.directoryToMapping(mapping, metadata.epoch.getEpoch(), metadata.directory)); } private void reportMetadata(ClusterMetadata metadata) @@ -220,11 +225,11 @@ protected void fetchTopologyInternal(long epoch) } @Override - protected synchronized void localSyncComplete(Topology topology) + protected synchronized void localSyncComplete(Topology topology, boolean startSync) { long epoch = topology.epoch(); EpochState epochState = getOrCreateEpochState(epoch); - if (epochState.syncStatus != SyncStatus.NOT_STARTED) + if (!startSync ||epochState.syncStatus != SyncStatus.NOT_STARTED) return; Set notify = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 9c4a6b5a189f..cf1bd2923d16 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -163,12 +163,24 @@ public Pair, DurableBefore> getRedundantBefor } }; - private static Node.Id localId = null; + private static volatile Node.Id localId = null; + private static class Handle { public static final AccordService instance = new AccordService(); } + public static boolean isSetup() + { + return localId != null; + } + + public static IVerbHandler verbHandlerOrNoop() + { + if (!isSetup()) return ignore -> {}; + return instance().verbHandler(); + } + public static void startup(NodeId tcmId) { localId = AccordTopologyUtils.tcmIdToAccord(tcmId); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java index 2af9c9472b91..3e215a7e6810 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -59,7 +59,11 @@ */ public class AccordSyncPropagator { - public static final IVerbHandler> verbHandler = message -> AccordService.instance().receive(message); + public static final IVerbHandler> verbHandler = message -> { + if (!AccordService.isSetup()) + return; + AccordService.instance().receive(message); + }; interface Listener { diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index f234738402c0..a41e9732d95a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -131,11 +131,16 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); } - public static EndpointMapping directoryToMapping(long epoch, Directory directory) + public static EndpointMapping directoryToMapping(EndpointMapping mapping, long epoch, Directory directory) { EndpointMapping.Builder builder = EndpointMapping.builder(epoch); for (NodeId id : directory.peerIds()) builder.add(directory.endpoint(id), tcmIdToAccord(id)); + + // There are cases where nodes are removed from the cluster (host replacement, decom, etc.), but inflight events may still be happening; + // keep the ids around so pending events do not fail with a mapping error + for (Node.Id id : mapping.differenceIds(builder)) + builder.add(mapping.mappedEndpoint(id), id); return builder.build(); } diff --git a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java index 4a746dc9a2c3..0c964d3204a0 100644 --- a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java +++ b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java @@ -18,9 +18,12 @@ package org.apache.cassandra.service.accord; +import java.util.Set; + import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; import com.google.common.collect.ImmutableBiMap; +import com.google.common.collect.Sets; import accord.local.Node; import accord.utils.Invariants; @@ -44,6 +47,16 @@ long epoch() return epoch; } + public boolean containsId(Node.Id id) + { + return mapping.containsKey(id); + } + + public Set differenceIds(Builder builder) + { + return Sets.difference(mapping.keySet(), builder.mapping.keySet()); + } + @Override public Node.Id mappedId(InetAddressAndPort endpoint) { diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index 4b8b3fbb146b..a2d62eb11379 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -981,7 +981,10 @@ public Future shutdown(boolean runOnExitThreads, boolean shutdownMessaging () -> SharedExecutorPool.SHARED.shutdownAndWait(1L, MINUTES) ); - error = parallelRun(error, executor, () -> AccordService.instance().shutdownAndWait(1l, MINUTES)); + error = parallelRun(error, executor, () -> { + if (!AccordService.isSetup()) return; + AccordService.instance().shutdownAndWait(1l, MINUTES); + }); // CommitLog must shut down after Stage, or threads from the latter may attempt to use the former. // (ex. A Mutation stage thread may attempt to add a mutation to the CommitLog.) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 333bca572b21..3cf9da57c179 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -207,19 +207,19 @@ public void loadTest() throws Throwable Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); service.reportTopology(topology1); - service.acknowledgeEpoch(EpochReady.done(1)); + service.acknowledgeEpoch(EpochReady.done(1), true); service.receiveRemoteSyncComplete(ID1, 1); service.receiveRemoteSyncComplete(ID2, 1); service.receiveRemoteSyncComplete(ID3, 1); Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); service.reportTopology(topology2); - service.acknowledgeEpoch(EpochReady.done(2)); + service.acknowledgeEpoch(EpochReady.done(2), true); service.receiveRemoteSyncComplete(ID1, 2); Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); service.reportTopology(topology3); - service.acknowledgeEpoch(EpochReady.done(3)); + service.acknowledgeEpoch(EpochReady.done(3), true); AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 4159e6364c36..ab6e2790ce2c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -427,7 +427,7 @@ protected void fetchTopologyInternal(long epoch) } @Override - protected void localSyncComplete(Topology topology) + protected void localSyncComplete(Topology topology, boolean startSync) { Set notify = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); instances.get(localId).propagator.reportSyncComplete(topology.epoch(), notify, localId); From bfa0e59f7f1791d6a1adb4f1c51e6f3db66a8ade Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Thu, 17 Aug 2023 12:31:57 +0200 Subject: [PATCH 064/340] CASSANDRA-18774: Fix pre-commit hook --- .../git/git-hooks/pre-commit/100-verify-submodules-pushed.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh index aee8f658a12a..ec10bba04a5d 100755 --- a/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh +++ b/.build/git/git-hooks/pre-commit/100-verify-submodules-pushed.sh @@ -88,8 +88,9 @@ _main() { branch="$(git config -f .gitmodules "submodule.${file}.branch")" [[ -z "${branch:-}" ]] && error "Submodule ${file} does not define a branch" git_sha="$(git --git-dir "${git_sub_dir}" rev-parse HEAD)" - git --git-dir "${git_sub_dir}" fetch origin - git --git-dir "${git_sub_dir}" branch "origin/${branch}" --contains "${git_sha}" || error "Git commit ${git_sha} not found in $(git remote get-url origin) on branch ${branch}" + local remote="$(git --git-dir "${git_sub_dir}" config --get "branch.${branch}.remote" || error "Git branch ${branch} is not set up to track any remote in submodule ${file}")" + git --git-dir "${git_sub_dir}" fetch "${remote}" + git --git-dir "${git_sub_dir}" branch "${remote}/${branch}" --contains "${git_sha}" || error "Git commit ${git_sha} not found in $(git remote get-url "${remote}") on branch ${branch}" fi done < <(git diff --cached --name-status) } From 82acd3e9503452998988c0dc661f691ef94f7b4d Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Fri, 18 Aug 2023 16:48:39 -0400 Subject: [PATCH 065/340] Allow exceptions to be propagated remotely https://github.com/apache/cassandra-accord/pull/56 Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-18779 --- modules/accord | 2 +- .../exceptions/ExceptionSerializer.java | 222 ++++++++++++++++++ .../cassandra/exceptions/RequestFailure.java | 164 +++++++++++++ .../exceptions/RequestFailureReason.java | 7 + .../cassandra/hints/HintsDispatcher.java | 17 +- .../apache/cassandra/locator/ReplicaPlan.java | 23 +- .../org/apache/cassandra/net/InboundSink.java | 10 +- .../org/apache/cassandra/net/Message.java | 12 +- .../apache/cassandra/net/MessageDelivery.java | 20 +- .../cassandra/net/MessagingService.java | 17 +- .../apache/cassandra/net/RequestCallback.java | 3 +- .../net/RequestCallbackWithFailure.java | 4 +- .../cassandra/net/RequestCallbacks.java | 8 +- .../cassandra/net/ResponseVerbHandler.java | 4 +- src/java/org/apache/cassandra/net/Verb.java | 71 ++++-- .../apache/cassandra/repair/SnapshotTask.java | 8 +- .../repair/messages/RepairMessage.java | 13 +- .../service/AbstractWriteResponseHandler.java | 5 +- .../service/ActiveRepairService.java | 7 +- .../service/BatchlogResponseHandler.java | 6 +- .../service/FailureRecordingCallback.java | 5 +- .../cassandra/service/StorageProxy.java | 14 +- .../service/TruncateResponseHandler.java | 10 +- .../service/accord/AccordCallback.java | 15 +- .../service/accord/AccordMessageSink.java | 20 +- .../service/accord/AccordSyncPropagator.java | 4 +- .../apache/cassandra/service/paxos/Paxos.java | 19 +- .../cassandra/service/paxos/PaxosCommit.java | 9 +- .../cassandra/service/paxos/PaxosPrepare.java | 48 +++- .../service/paxos/PaxosPrepareRefresh.java | 13 +- .../cassandra/service/paxos/PaxosPropose.java | 6 +- .../cassandra/service/paxos/PaxosRepair.java | 4 +- .../service/paxos/PaxosRequestCallback.java | 8 +- .../paxos/cleanup/PaxosCleanupComplete.java | 11 +- .../paxos/cleanup/PaxosCleanupSession.java | 8 +- .../cleanup/PaxosFinishPrepareCleanup.java | 8 +- .../cleanup/PaxosStartPrepareCleanup.java | 14 +- .../cassandra/service/reads/ReadCallback.java | 7 +- .../reads/thresholds/WarningContext.java | 14 +- .../cassandra/tcm/PaxosBackedProcessor.java | 8 +- .../apache/cassandra/tcm/RemoteProcessor.java | 5 +- .../cassandra/tcm/migration/Election.java | 1 - .../tcm/sequences/ProgressBarrier.java | 8 +- .../apache/cassandra/utils/FBUtilities.java | 3 - test/data/config/version=5.0-alpha1.yml | 2 +- .../simulator/systems/SimulatedAction.java | 4 +- .../config/ConfigCompatibilityTest.java | 1 + ...nterMutationVerbHandlerOutOfRangeTest.java | 14 +- .../db/MutationVerbHandlerOutOfRangeTest.java | 14 +- .../ReadCommandVerbHandlerOutOfRangeTest.java | 14 +- .../exceptions/RemoteExceptionTest.java | 112 +++++++++ .../apache/cassandra/net/ConnectionTest.java | 6 +- .../cassandra/net/MessageDeliveryTest.java | 4 +- .../org/apache/cassandra/net/MessageTest.java | 78 ++++-- .../net/SimulatedMessageDelivery.java | 12 +- .../apache/cassandra/repair/FuzzTestBase.java | 10 +- .../repair/messages/RepairMessageTest.java | 10 +- .../service/WriteResponseHandlerTest.java | 14 +- .../service/accord/AccordJournalTest.java | 9 + .../accord/AccordSyncPropagatorTest.java | 8 +- .../paxos/PaxosVerbHandlerOutOfRangeTest.java | 12 +- .../service/reads/ReadExecutorTest.java | 19 +- .../tcm/DiscoverySimulationTest.java | 4 +- .../tcm/sequences/ProgressBarrierTest.java | 4 +- 64 files changed, 959 insertions(+), 277 deletions(-) create mode 100644 src/java/org/apache/cassandra/exceptions/ExceptionSerializer.java create mode 100644 src/java/org/apache/cassandra/exceptions/RequestFailure.java create mode 100644 test/unit/org/apache/cassandra/exceptions/RemoteExceptionTest.java diff --git a/modules/accord b/modules/accord index 2ad55e03c43c..91336705bde8 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 2ad55e03c43ce074cdf5e36cfa14cb4278c2dc0f +Subproject commit 91336705bde8332954e849219d73205d68fa168a diff --git a/src/java/org/apache/cassandra/exceptions/ExceptionSerializer.java b/src/java/org/apache/cassandra/exceptions/ExceptionSerializer.java new file mode 100644 index 000000000000..de379739a383 --- /dev/null +++ b/src/java/org/apache/cassandra/exceptions/ExceptionSerializer.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.exceptions; + +import java.io.IOException; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ArraySerializers; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.NullableSerializer; + +import static java.util.Collections.newSetFromMap; +import static org.apache.cassandra.db.TypeSizes.sizeof; +import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; + +/** + * Support for serializing exceptions without a dependency on being able to instantiate the exception class + * on the other side to eliminate dependencies across versions. + * + * This is still slightly more flexible than sending a string representation of the exception because it's still an exception so using it + * as a cause or suppressed exception works and it is formatted nicely as if it were another local exception. + */ +public class ExceptionSerializer +{ + public static class RemoteException extends RuntimeException + { + private final String originalClass; + + public RemoteException(String originalClass, String originalMessage, StackTraceElement[] stackTrace) + { + super(originalMessage); + this.originalClass = originalClass; + setStackTrace(stackTrace); + } + + private void initSuppressedAndCause(RemoteException cause, RemoteException[] suppressed) + { + initCause(cause); + for (RemoteException e : suppressed) + addSuppressed(e); + } + + @Override + public String toString() + { + String message = getMessage(); + return message != null ? originalClass + ": " + message : originalClass; + } + } + + static String getMessageWithOriginatingHost(Throwable t, boolean isFirstException) + { + if (isFirstException) + return "Remote exception from host " + FBUtilities.getBroadcastAddressAndPort().toString() + (t.getLocalizedMessage() != null ? " - " + t.getLocalizedMessage() : ""); + else + return t.getLocalizedMessage(); + } + + private static final IVersionedSerializer stackTraceElementSerializer = new IVersionedSerializer() + { + @Override + public void serialize(StackTraceElement t, DataOutputPlus out, int version) throws IOException + { + out.writeUTF(t.getClassName()); + out.writeUTF(t.getMethodName()); + out.writeBoolean(t.getFileName() != null); + if (t.getFileName() != null) + out.writeUTF(t.getFileName()); + out.writeUnsignedVInt32(t.getLineNumber()); + } + + @Override + public StackTraceElement deserialize(DataInputPlus in, int version) throws IOException + { + String className = in.readUTF(); + String methodName = in.readUTF(); + String fileName = null; + if (in.readBoolean()) + fileName = in.readUTF(); + int lineNumber = in.readUnsignedVInt32(); + return new StackTraceElement(className, methodName, fileName, lineNumber); + } + + @Override + public long serializedSize(StackTraceElement t, int version) + { + long size = sizeof(t.getClassName()) + + sizeof(t.getMethodName()) + + sizeof(t.getFileName() != null) + + sizeofUnsignedVInt(t.getLineNumber()); + if (t.getFileName() != null) + size += sizeof(t.getFileName()); + return size; + } + }; + + public static final IVersionedSerializer remoteExceptionSerializer = new IVersionedSerializer() + { + @Override + public void serialize(Throwable t, DataOutputPlus out, int version) throws IOException + { + Map alreadySerialized = new IdentityHashMap<>(); + serializeNextException(t, out, true, version, 0, alreadySerialized); + } + + private int serializeNextException(Throwable t, DataOutputPlus out, boolean isFirstException, int version, int nextExceptionId, Map alreadySerialized) throws IOException + { + if (alreadySerialized.containsKey(t)) + { + out.writeInt(alreadySerialized.get(t)); + return nextExceptionId; + } + else + { + alreadySerialized.put(t, nextExceptionId); + out.writeInt(nextExceptionId); + nextExceptionId++; + } + + out.writeUTF(t.getClass().getName()); + String message = getMessageWithOriginatingHost(t, isFirstException); + out.writeBoolean(message != null); + if (message != null) + out.writeUTF(message); + ArraySerializers.serializeArray(t.getStackTrace(), out, version, stackTraceElementSerializer); + + // Do cause and suppressed last so they can reference back to previously partially deserialized exceptions + out.writeBoolean(t.getCause() != null); + if (t.getCause() != null) + nextExceptionId = serializeNextException(t.getCause(), out, false, version, nextExceptionId, alreadySerialized); + out.writeUnsignedVInt32(t.getSuppressed().length); + for (Throwable suppressed : t.getSuppressed()) + nextExceptionId = serializeNextException(suppressed, out, false, version, nextExceptionId, alreadySerialized); + + return nextExceptionId; + } + + @Override + public Throwable deserialize(DataInputPlus in, int version) throws IOException + { + Map alreadyDeserialized = new HashMap<>(); + return deserializeNextException(in, version, alreadyDeserialized); + } + + private Throwable deserializeNextException(DataInputPlus in, int version, Map alreadyDeserialized) throws IOException + { + int nextExceptionId = in.readInt(); + Throwable alreadyDeserializedThrowable = alreadyDeserialized.get(nextExceptionId); + if (alreadyDeserializedThrowable != null) + return alreadyDeserializedThrowable; + + String originalClass = in.readUTF(); + String originalMessage = null; + if (in.readBoolean()) + originalMessage = in.readUTF(); + + StackTraceElement[] stackTrace = ArraySerializers.deserializeArray(in, version, stackTraceElementSerializer, size -> new StackTraceElement[size]); + RemoteException deserializedException = new RemoteException(originalClass, originalMessage, stackTrace); + deserializedException.setStackTrace(stackTrace); + alreadyDeserialized.put(nextExceptionId, deserializedException); + + // Do cause and suppressed last after alreadyDeserialized contains the exception we just processsed + RemoteException cause = in.readBoolean() ? (RemoteException)deserializeNextException(in, version, alreadyDeserialized) : null; + RemoteException[] suppressed = new RemoteException[in.readUnsignedVInt32()]; + for (int i = 0; i < suppressed.length; i++) + suppressed[i] = (RemoteException)deserializeNextException(in, version, alreadyDeserialized); + deserializedException.initSuppressedAndCause(cause, suppressed); + + return deserializedException; + } + + @Override + public long serializedSize(Throwable t, int version) + { + Set alreadySeen = newSetFromMap(new IdentityHashMap<>()); + return nextExceptionSerializedSize(t, version, true, alreadySeen); + } + + private long nextExceptionSerializedSize(Throwable t, int version, boolean isFirstException, Set alreadySeen) + { + if (!alreadySeen.add(t)) + return sizeof(42); // Exception ID from the last time it was serialized + + String message = getMessageWithOriginatingHost(t, isFirstException); + long size = sizeof(42) + // Exception ID generated during serialization + sizeof(t.getClass().getName()) + + sizeof(message != null) + + (message != null ? sizeof(message) : 0) + + sizeof(t.getCause() != null) + + (t.getCause() != null ? nextExceptionSerializedSize(t.getCause(), version, false, alreadySeen) : 0) + + sizeofUnsignedVInt(t.getSuppressed().length); + size += ArraySerializers.serializedArraySize(t.getStackTrace(), version, stackTraceElementSerializer); + for (Throwable suppressed : t.getSuppressed()) + size += nextExceptionSerializedSize(suppressed, version, false, alreadySeen); + return size; + } + }; + + public static final IVersionedSerializer nullableRemoteExceptionSerializer = NullableSerializer.wrap(remoteExceptionSerializer); +} diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailure.java b/src/java/org/apache/cassandra/exceptions/RequestFailure.java new file mode 100644 index 000000000000..39700d795c3b --- /dev/null +++ b/src/java/org/apache/cassandra/exceptions/RequestFailure.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.exceptions; + +import java.io.IOException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import org.apache.cassandra.db.filter.TombstoneOverwhelmingException; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.tcm.NotCMSException; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.cassandra.exceptions.ExceptionSerializer.nullableRemoteExceptionSerializer; + +/** + * Allow inclusion of a serialized exception in failure response messages + * This continues to use the same verb as the old failure response (whether a message payload or parameter) + * and has a nullable failure field that may contain a serialized in later versions. + */ +public class RequestFailure +{ + public static final RequestFailure UNKNOWN = new RequestFailure(RequestFailureReason.UNKNOWN); + public static final RequestFailure READ_TOO_MANY_TOMBSTONES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_TOMBSTONES); + public static final RequestFailure READ_TOO_MANY_INDEXES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_INDEXES); + public static final RequestFailure TIMEOUT = new RequestFailure(RequestFailureReason.TIMEOUT); + public static final RequestFailure INCOMPATIBLE_SCHEMA = new RequestFailure(RequestFailureReason.INCOMPATIBLE_SCHEMA); + public static final RequestFailure READ_SIZE = new RequestFailure(RequestFailureReason.READ_SIZE); + public static final RequestFailure NODE_DOWN = new RequestFailure(RequestFailureReason.NODE_DOWN); + public static final RequestFailure NOT_CMS = new RequestFailure(RequestFailureReason.NOT_CMS); + public static final RequestFailure INVALID_ROUTING = new RequestFailure(RequestFailureReason.INVALID_ROUTING); + public static final RequestFailure INDEX_NOT_AVAILABLE = new RequestFailure(RequestFailureReason.INDEX_NOT_AVAILABLE); + public static final RequestFailure COORDINATOR_BEHIND = new RequestFailure(RequestFailureReason.COORDINATOR_BEHIND); + + static + { + // Validate all reasons are handled + for (RequestFailureReason reason : RequestFailureReason.values()) + forReason(reason); + } + + // Allow RequestFailureReason to force class load to check failure reasons are handled + public static void init() {} + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(RequestFailure t, DataOutputPlus out, int version) throws IOException + { + RequestFailureReason.serializer.serialize(t.reason, out, version); + if (version >= MessagingService.VERSION_50) + nullableRemoteExceptionSerializer.serialize(t.failure, out, version); + } + + @Override + public RequestFailure deserialize(DataInputPlus in, int version) throws IOException + { + RequestFailureReason reason = RequestFailureReason.serializer.deserialize(in, version); + Throwable failure = null; + if (version >= MessagingService.VERSION_50) + failure = nullableRemoteExceptionSerializer.deserialize(in, version); + if (failure == null) + return forReason(reason); + else + return new RequestFailure(reason, failure); + } + + @Override + public long serializedSize(RequestFailure t, int version) + { + long size = RequestFailureReason.serializer.serializedSize(t.reason, version); + if (version >= MessagingService.VERSION_50) + size += nullableRemoteExceptionSerializer.serializedSize(t.failure, version); + return size; + } + }; + + @Nonnull + public final RequestFailureReason reason; + + @Nullable + public final Throwable failure; + + public static RequestFailure forException(Throwable t) + { + if (t instanceof TombstoneOverwhelmingException) + return READ_TOO_MANY_TOMBSTONES; + + if (t instanceof IncompatibleSchemaException) + return INCOMPATIBLE_SCHEMA; + + if (t instanceof NotCMSException) + return NOT_CMS; + + if (t instanceof InvalidRoutingException) + return INVALID_ROUTING; + + return UNKNOWN; + } + + public static RequestFailure forReason(RequestFailureReason reason) + { + switch (reason) + { + default: throw new IllegalStateException("Unhandled request failure reason " + reason); + case UNKNOWN: return UNKNOWN; + case READ_TOO_MANY_TOMBSTONES: return READ_TOO_MANY_TOMBSTONES; + case READ_TOO_MANY_INDEXES: return READ_TOO_MANY_INDEXES; + case TIMEOUT: return TIMEOUT; + case INCOMPATIBLE_SCHEMA: return INCOMPATIBLE_SCHEMA; + case READ_SIZE: return READ_SIZE; + case NODE_DOWN: return NODE_DOWN; + case NOT_CMS: return NOT_CMS; + case INVALID_ROUTING: return INVALID_ROUTING; + case INDEX_NOT_AVAILABLE: return INDEX_NOT_AVAILABLE; + case COORDINATOR_BEHIND: return COORDINATOR_BEHIND; + } + } + + private RequestFailure(RequestFailureReason reason) + { + this(reason, null); + } + + public RequestFailure(@Nonnull Throwable failure) + { + this(RequestFailureReason.UNKNOWN, failure); + } + + public RequestFailure(@Nonnull RequestFailureReason reason, @Nullable Throwable failure) + { + checkNotNull(reason); + this.reason = reason; + this.failure = failure; + } + + @Override + public String toString() + { + return "RequestFailure{" + + "reason=" + reason + + ", failure='" + failure + '\'' + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java index 9faff584f140..560b8d68e0ad 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java @@ -49,6 +49,13 @@ public enum RequestFailureReason COORDINATOR_BEHIND (10), // The following codes have been ported from an external fork, where they were offset explicitly to avoid conflicts. INDEX_BUILD_IN_PROGRESS (503); + + static + { + // Load RequestFailure class to check that all request failure reasons are handled + RequestFailure.init(); + } + public static final Serializer serializer = new Serializer(); public final int code; diff --git a/src/java/org/apache/cassandra/hints/HintsDispatcher.java b/src/java/org/apache/cassandra/hints/HintsDispatcher.java index b6273385435b..ce1f7282a6d7 100644 --- a/src/java/org/apache/cassandra/hints/HintsDispatcher.java +++ b/src/java/org/apache/cassandra/hints/HintsDispatcher.java @@ -18,7 +18,10 @@ package org.apache.cassandra.hints; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.UUID; import java.util.function.BooleanSupplier; import java.util.function.Function; @@ -26,17 +29,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.net.RequestCallback; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.HintsServiceMetrics; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.utils.concurrent.Condition; - -import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.*; +import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.FAILURE; +import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.INTERRUPTED; +import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.SUCCESS; +import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.TIMEOUT; import static org.apache.cassandra.metrics.HintsServiceMetrics.updateDelayMetrics; import static org.apache.cassandra.net.Verb.HINT_REQ; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; @@ -246,7 +251,7 @@ public boolean invokeOnFailure() } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failureMessage) { outcome = FAILURE; condition.signalAll(); diff --git a/src/java/org/apache/cassandra/locator/ReplicaPlan.java b/src/java/org/apache/cassandra/locator/ReplicaPlan.java index 7d08b341b8eb..ee8198aac3f4 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaPlan.java +++ b/src/java/org/apache/cassandra/locator/ReplicaPlan.java @@ -18,22 +18,23 @@ package org.apache.cassandra.locator; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.function.Supplier; + import com.google.common.collect.Iterables; + import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.tcm.ClusterMetadata; - -import java.util.List; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.function.Supplier; +import org.apache.cassandra.tcm.Epoch; public interface ReplicaPlan, P extends ReplicaPlan> { @@ -49,7 +50,7 @@ public interface ReplicaPlan, P extends ReplicaPlan P withContacts(E contacts); void collectSuccess(InetAddressAndPort inetAddressAndPort); - void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailureReason t); + void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailure t); boolean stillAppliesTo(ClusterMetadata newMetadata); interface ForRead, P extends ReplicaPlan.ForRead> extends ReplicaPlan @@ -115,7 +116,7 @@ public void collectSuccess(InetAddressAndPort addr) contacted.add(addr); } - public void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailureReason t) {} + public void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailure t) {} } diff --git a/src/java/org/apache/cassandra/net/InboundSink.java b/src/java/org/apache/cassandra/net/InboundSink.java index 2e8c8413dcb7..7fbc50a2019c 100644 --- a/src/java/org/apache/cassandra/net/InboundSink.java +++ b/src/java/org/apache/cassandra/net/InboundSink.java @@ -30,11 +30,11 @@ import org.apache.cassandra.db.filter.TombstoneOverwhelmingException; import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.InvalidRoutingException; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.index.IndexNotAvailableException; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.NotCMSException; import org.apache.cassandra.utils.NoSpamLogger; @@ -109,9 +109,9 @@ public void fail(Message.Header header, Throwable failure) if (header.callBackOnFailure()) { InetAddressAndPort to = header.respondTo() != null ? header.respondTo() : header.from; - Message response = Message.failureResponse(header.id, - header.expiresAtNanos, - RequestFailureReason.forException(failure)); + Message response = Message.failureResponse(header.id, + header.expiresAtNanos, + RequestFailure.forException(failure)); messaging.send(response, to); } } diff --git a/src/java/org/apache/cassandra/net/Message.java b/src/java/org/apache/cassandra/net/Message.java index f6ca39cd853d..146f8c1119fb 100644 --- a/src/java/org/apache/cassandra/net/Message.java +++ b/src/java/org/apache/cassandra/net/Message.java @@ -36,6 +36,7 @@ import accord.messages.ReplyContext; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.io.IVersionedAsymmetricSerializer; import org.apache.cassandra.io.IVersionedSerializer; @@ -353,12 +354,17 @@ public Message emptyResponse() } /** Builds a failure response Message with an explicit reason, and fields inferred from request Message */ - public Message failureResponse(RequestFailureReason reason) + public Message failureResponse(RequestFailureReason reason) { - return failureResponse(id(), expiresAtNanos(), reason); + return failureResponse(reason, null); } - static Message failureResponse(long id, long expiresAtNanos, RequestFailureReason reason) + public Message failureResponse(RequestFailureReason reason, @Nullable Throwable failure) + { + return failureResponse(id(), expiresAtNanos(), new RequestFailure(reason, failure)); + } + + static Message failureResponse(long id, long expiresAtNanos, RequestFailure reason) { return outWithParam(id, Verb.FAILURE_RSP, expiresAtNanos, reason, null, null); } diff --git a/src/java/org/apache/cassandra/net/MessageDelivery.java b/src/java/org/apache/cassandra/net/MessageDelivery.java index 0d052cb3d8c1..7c36d73c14a6 100644 --- a/src/java/org/apache/cassandra/net/MessageDelivery.java +++ b/src/java/org/apache/cassandra/net/MessageDelivery.java @@ -29,6 +29,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.Backoff; @@ -63,7 +64,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason reason) + public void onFailure(InetAddressAndPort from, RequestFailure reason) { logger.info("Received failure in response to {} from {}: {}", verb, from, reason); cdl.decrement(); @@ -110,6 +111,11 @@ public default void sendWithRetries(Backoff backoff, RetryScheduler r } public void respond(V response, Message message); public default void respondWithFailure(RequestFailureReason reason, Message message) + { + respondWithFailure(RequestFailure.forReason(reason), message); + } + + public default void respondWithFailure(RequestFailure reason, Message message) { send(Message.failureResponse(message.id(), message.expiresAtNanos(), reason), message.respondTo()); } @@ -121,12 +127,12 @@ interface OnResult interface RetryPredicate { - boolean test(int attempt, InetAddressAndPort from, RequestFailureReason failure); + boolean test(int attempt, InetAddressAndPort from, RequestFailure failure); } interface RetryErrorMessage { - String apply(int attempt, ResponseFailureReason retryFailure, @Nullable InetAddressAndPort from, @Nullable RequestFailureReason reason); + String apply(int attempt, ResponseFailureReason retryFailure, @Nullable InetAddressAndPort from, @Nullable RequestFailure reason); } private static void sendWithRetries(MessageDelivery messaging, @@ -157,7 +163,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failure) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { if (!backoff.mayRetry(attempt)) { @@ -212,11 +218,11 @@ public NoMoreCandidatesException(String s) class FailedResponseException extends IllegalStateException { public final InetAddressAndPort from; - public final RequestFailureReason failure; + public final RequestFailure failure; - public FailedResponseException(InetAddressAndPort from, RequestFailureReason failure, String message) + public FailedResponseException(InetAddressAndPort from, RequestFailure failure, String message) { - super(message); + super(message, failure.failure); this.from = from; this.failure = failure; } diff --git a/src/java/org/apache/cassandra/net/MessagingService.java b/src/java/org/apache/cassandra/net/MessagingService.java index 9215b7c97546..12d3f17cd4a0 100644 --- a/src/java/org/apache/cassandra/net/MessagingService.java +++ b/src/java/org/apache/cassandra/net/MessagingService.java @@ -39,6 +39,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.SystemKeyspace; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; @@ -71,7 +72,7 @@ * message is received, {@link RequestCallback#onResponse(Message)} method will be invoked on the * provided callback - in case of a success response. In case of a failure response (see {@link Verb#FAILURE_RSP}), * or if a response doesn't arrive within verb's configured expiry time, - * {@link RequestCallback#onFailure(InetAddressAndPort, RequestFailureReason)} will be invoked instead. + * {@link RequestCallback#onFailure(InetAddressAndPort, RequestFailure)} will be invoked instead. * 2. To send a response back, or a message that expects no response, use {@link #send(Message, InetAddressAndPort)} * method. * @@ -381,9 +382,9 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - promise.tryFailure(new FailureResponseException(from, failureReason)); + promise.tryFailure(new FailureResponseException(from, failure)); } @Override @@ -400,11 +401,11 @@ public static class FailureResponseException extends IOException private final InetAddressAndPort from; private final RequestFailureReason failureReason; - public FailureResponseException(InetAddressAndPort from, RequestFailureReason failureReason) + public FailureResponseException(InetAddressAndPort from, RequestFailure failureReason) { - super(String.format("Failure from %s: %s", from, failureReason.name())); + super(String.format("Failure from %s: %s", from, failureReason.reason.name()), failureReason.failure); this.from = from; - this.failureReason = failureReason; + this.failureReason = failureReason.reason; } public InetAddressAndPort from() @@ -499,7 +500,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failureReason) { future.setFailure(new RuntimeException(failureReason.toString())); } @@ -510,7 +511,7 @@ public void onFailure(InetAddressAndPort from, RequestFailureReason failureReaso public void respondWithFailure(RequestFailureReason reason, Message message) { - Message r = Message.failureResponse(message.id(), message.expiresAtNanos(), reason); + Message r = Message.failureResponse(message.id(), message.expiresAtNanos(), new RequestFailure(reason, null)); if (r.header.hasFlag(MessageFlag.URGENT)) r = r.withFlag(MessageFlag.URGENT); send(r, message.respondTo()); diff --git a/src/java/org/apache/cassandra/net/RequestCallback.java b/src/java/org/apache/cassandra/net/RequestCallback.java index 14e0169b858a..1265b1ea6c60 100644 --- a/src/java/org/apache/cassandra/net/RequestCallback.java +++ b/src/java/org/apache/cassandra/net/RequestCallback.java @@ -19,6 +19,7 @@ import java.util.Map; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; @@ -38,7 +39,7 @@ public interface RequestCallback /** * Called when there is an exception on the remote node or timeout happens */ - default void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + default void onFailure(InetAddressAndPort from, RequestFailure failure) { } diff --git a/src/java/org/apache/cassandra/net/RequestCallbackWithFailure.java b/src/java/org/apache/cassandra/net/RequestCallbackWithFailure.java index 685797abebd1..a7d807380eb1 100644 --- a/src/java/org/apache/cassandra/net/RequestCallbackWithFailure.java +++ b/src/java/org/apache/cassandra/net/RequestCallbackWithFailure.java @@ -18,7 +18,7 @@ package org.apache.cassandra.net; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; public interface RequestCallbackWithFailure extends RequestCallback @@ -26,7 +26,7 @@ public interface RequestCallbackWithFailure extends RequestCallback /** * Called when there is an exception on the remote node or timeout happens */ - void onFailure(InetAddressAndPort from, RequestFailureReason failureReason); + void onFailure(InetAddressAndPort from, RequestFailure failure); /** * @return true if the callback should be invoked on failure diff --git a/src/java/org/apache/cassandra/net/RequestCallbacks.java b/src/java/org/apache/cassandra/net/RequestCallbacks.java index ee63c5a3e652..485efc30b1ea 100644 --- a/src/java/org/apache/cassandra/net/RequestCallbacks.java +++ b/src/java/org/apache/cassandra/net/RequestCallbacks.java @@ -21,17 +21,15 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeoutException; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; - -import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.metrics.InternodeOutboundMetrics; @@ -154,7 +152,7 @@ private void onExpired(CallbackInfo info) messagingService.markExpiredCallback(info.peer); if (info.invokeOnFailure()) - INTERNAL_RESPONSE.submit(() -> info.callback.onFailure(info.peer, RequestFailureReason.TIMEOUT)); + INTERNAL_RESPONSE.submit(() -> info.callback.onFailure(info.peer, RequestFailure.TIMEOUT)); } void shutdownNow(boolean expireCallbacks) diff --git a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java index 36e5cf067040..6cecd2a415da 100644 --- a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java +++ b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java @@ -24,7 +24,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tracing.Tracing; @@ -74,7 +74,7 @@ public void doVerb(Message message) RequestCallback cb = callbackInfo.callback; if (message.isFailureResponse()) { - cb.onFailure(message.from(), (RequestFailureReason) message.payload); + cb.onFailure(message.from(), (RequestFailure) message.payload); } else { diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index dc8e6d415798..c87fa0f16376 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -41,13 +41,13 @@ import org.apache.cassandra.db.ReadRepairVerbHandler; import org.apache.cassandra.db.ReadResponse; import org.apache.cassandra.db.SnapshotCommand; +import org.apache.cassandra.db.TruncateRequest; import org.apache.cassandra.db.TruncateResponse; import org.apache.cassandra.db.TruncateVerbHandler; -import org.apache.cassandra.db.TruncateRequest; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.gms.GossipDigestAck; import org.apache.cassandra.gms.GossipDigestAck2; import org.apache.cassandra.gms.GossipDigestAck2VerbHandler; -import org.apache.cassandra.gms.GossipDigestAck; import org.apache.cassandra.gms.GossipDigestAckVerbHandler; import org.apache.cassandra.gms.GossipDigestSyn; import org.apache.cassandra.gms.GossipDigestSynVerbHandler; @@ -68,17 +68,19 @@ import org.apache.cassandra.repair.messages.SnapshotMessage; import org.apache.cassandra.repair.messages.StatusRequest; import org.apache.cassandra.repair.messages.StatusResponse; -import org.apache.cassandra.repair.messages.SyncResponse; import org.apache.cassandra.repair.messages.SyncRequest; -import org.apache.cassandra.repair.messages.ValidationResponse; +import org.apache.cassandra.repair.messages.SyncResponse; import org.apache.cassandra.repair.messages.ValidationRequest; +import org.apache.cassandra.repair.messages.ValidationResponse; import org.apache.cassandra.schema.SchemaMutationsSerializer; import org.apache.cassandra.schema.SchemaPullVerbHandler; import org.apache.cassandra.schema.SchemaPushVerbHandler; import org.apache.cassandra.schema.SchemaVersionVerbHandler; +import org.apache.cassandra.service.EchoVerbHandler; +import org.apache.cassandra.service.SnapshotVerbHandler; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.AccordSyncPropagator; +import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; @@ -104,14 +106,18 @@ import org.apache.cassandra.service.paxos.PaxosPrepareRefresh; import org.apache.cassandra.service.paxos.PaxosPropose; import org.apache.cassandra.service.paxos.PaxosRepair; +import org.apache.cassandra.service.paxos.PrepareResponse; +import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupComplete; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupHistory; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupRequest; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupResponse; -import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupComplete; -import org.apache.cassandra.service.paxos.cleanup.PaxosStartPrepareCleanup; import org.apache.cassandra.service.paxos.cleanup.PaxosFinishPrepareCleanup; +import org.apache.cassandra.service.paxos.cleanup.PaxosStartPrepareCleanup; +import org.apache.cassandra.service.paxos.v1.PrepareVerbHandler; +import org.apache.cassandra.service.paxos.v1.ProposeVerbHandler; import org.apache.cassandra.streaming.DataMovement; import org.apache.cassandra.streaming.DataMovementVerbHandler; +import org.apache.cassandra.streaming.ReplicationDoneVerbHandler; import org.apache.cassandra.tcm.Discovery; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.FetchCMSLog; @@ -122,26 +128,49 @@ import org.apache.cassandra.tcm.sequences.DataMovements; import org.apache.cassandra.tcm.serialization.MessageSerializers; import org.apache.cassandra.utils.BooleanSerializer; -import org.apache.cassandra.service.EchoVerbHandler; -import org.apache.cassandra.service.SnapshotVerbHandler; -import org.apache.cassandra.service.paxos.PrepareResponse; -import org.apache.cassandra.service.paxos.v1.PrepareVerbHandler; -import org.apache.cassandra.service.paxos.v1.ProposeVerbHandler; -import org.apache.cassandra.streaming.ReplicationDoneVerbHandler; import org.apache.cassandra.utils.ReflectionUtils; import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.UUIDSerializer; import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static org.apache.cassandra.concurrent.Stage.*; +import static org.apache.cassandra.concurrent.Stage.ANTI_ENTROPY; +import static org.apache.cassandra.concurrent.Stage.COUNTER_MUTATION; +import static org.apache.cassandra.concurrent.Stage.FETCH_LOG; +import static org.apache.cassandra.concurrent.Stage.GOSSIP; +import static org.apache.cassandra.concurrent.Stage.IMMEDIATE; +import static org.apache.cassandra.concurrent.Stage.INTERNAL_METADATA; +import static org.apache.cassandra.concurrent.Stage.INTERNAL_RESPONSE; +import static org.apache.cassandra.concurrent.Stage.MIGRATION; +import static org.apache.cassandra.concurrent.Stage.MISC; +import static org.apache.cassandra.concurrent.Stage.MUTATION; +import static org.apache.cassandra.concurrent.Stage.PAXOS_REPAIR; +import static org.apache.cassandra.concurrent.Stage.READ; +import static org.apache.cassandra.concurrent.Stage.REQUEST_RESPONSE; +import static org.apache.cassandra.concurrent.Stage.TRACING; import static org.apache.cassandra.net.ResponseHandlerSupplier.RESPONSE_HANDLER; -import static org.apache.cassandra.net.VerbTimeouts.*; -import static org.apache.cassandra.net.Verb.Kind.*; -import static org.apache.cassandra.net.Verb.Priority.*; +import static org.apache.cassandra.net.Verb.Kind.CUSTOM; +import static org.apache.cassandra.net.Verb.Kind.NORMAL; +import static org.apache.cassandra.net.Verb.Priority.P0; +import static org.apache.cassandra.net.Verb.Priority.P1; +import static org.apache.cassandra.net.Verb.Priority.P2; +import static org.apache.cassandra.net.Verb.Priority.P3; +import static org.apache.cassandra.net.Verb.Priority.P4; +import static org.apache.cassandra.net.VerbTimeouts.counterTimeout; +import static org.apache.cassandra.net.VerbTimeouts.longTimeout; +import static org.apache.cassandra.net.VerbTimeouts.noTimeout; +import static org.apache.cassandra.net.VerbTimeouts.pingTimeout; +import static org.apache.cassandra.net.VerbTimeouts.rangeTimeout; +import static org.apache.cassandra.net.VerbTimeouts.readTimeout; +import static org.apache.cassandra.net.VerbTimeouts.repairTimeout; +import static org.apache.cassandra.net.VerbTimeouts.repairValidationRspTimeout; +import static org.apache.cassandra.net.VerbTimeouts.repairWithBackoffTimeout; +import static org.apache.cassandra.net.VerbTimeouts.rpcTimeout; +import static org.apache.cassandra.net.VerbTimeouts.truncateTimeout; +import static org.apache.cassandra.net.VerbTimeouts.writeTimeout; import static org.apache.cassandra.tcm.ClusterMetadataService.commitRequestHandler; import static org.apache.cassandra.tcm.ClusterMetadataService.currentEpochRequestHandler; -import static org.apache.cassandra.tcm.ClusterMetadataService.logNotifyHandler; import static org.apache.cassandra.tcm.ClusterMetadataService.fetchLogRequestHandler; +import static org.apache.cassandra.tcm.ClusterMetadataService.logNotifyHandler; import static org.apache.cassandra.tcm.ClusterMetadataService.replicationHandler; /** @@ -306,7 +335,7 @@ public enum Verb ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), // generic failure response - FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailureReason.serializer, RESPONSE_HANDLER ), + FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailure.serializer, RESPONSE_HANDLER ), // dummy verbs _TRACE (30, P1, rpcTimeout, TRACING, () -> NoPayload.serializer, () -> null ), @@ -438,7 +467,7 @@ public long expiresAfterNanos() // this is a little hacky, but reduces the number of parameters up top public boolean isResponse() { - return handler == RESPONSE_HANDLER; + return handler.get() == ResponseVerbHandler.instance; } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/repair/SnapshotTask.java b/src/java/org/apache/cassandra/repair/SnapshotTask.java index ad45070cfb27..a95e0668d81e 100644 --- a/src/java/org/apache/cassandra/repair/SnapshotTask.java +++ b/src/java/org/apache/cassandra/repair/SnapshotTask.java @@ -19,10 +19,10 @@ import java.util.concurrent.RunnableFuture; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.repair.messages.RepairMessage; import org.apache.cassandra.repair.messages.SnapshotMessage; import org.apache.cassandra.utils.concurrent.AsyncFuture; @@ -81,9 +81,9 @@ public boolean invokeOnFailure() } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - task.tryFailure(new RuntimeException("Could not create snapshot at " + from + "; " + failureReason)); + task.tryFailure(new RuntimeException("Could not create snapshot at " + from + "; " + failure.reason)); } } } diff --git a/src/java/org/apache/cassandra/repair/messages/RepairMessage.java b/src/java/org/apache/cassandra/repair/messages/RepairMessage.java index 835f90fc6804..e38a930bcca0 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairMessage.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairMessage.java @@ -36,6 +36,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RepairRetrySpec; import org.apache.cassandra.config.RetrySpec; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.metrics.RepairMetrics; import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.exceptions.RepairException; @@ -73,7 +74,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failureReason) { } }; @@ -217,9 +218,9 @@ else if (reason == RequestFailureReason.TIMEOUT) finalCallback.onFailure(from, failure); return false; case RETRY: - if (failure == RequestFailureReason.TIMEOUT && allowRetry.get()) + if (failure.reason == RequestFailureReason.TIMEOUT && allowRetry.get()) return true; - maybeRecordRetry.accept(attempt, failure); + maybeRecordRetry.accept(attempt, failure.reason); finalCallback.onFailure(from, failure); return false; default: @@ -230,7 +231,7 @@ else if (reason == RequestFailureReason.TIMEOUT) switch (retryReason) { case MaxRetries: - maybeRecordRetry.accept(attempt, failure); + maybeRecordRetry.accept(attempt, failure.reason); finalCallback.onFailure(from, failure); return null; case Interrupted: @@ -255,9 +256,9 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - failureCallback.onFailure(RepairException.error(request.desc, PreviewKind.NONE, String.format("Got %s failure from %s: %s", verb, from, failureReason))); + failureCallback.onFailure(RepairException.error(request.desc, PreviewKind.NONE, String.format("Got %s failure from %s: %s", verb, from, failure.reason))); } @Override diff --git a/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java b/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java index 343bac1c4f80..d085174043df 100644 --- a/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java +++ b/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java @@ -36,6 +36,7 @@ import org.apache.cassandra.db.IMutation; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.WriteFailureException; import org.apache.cassandra.exceptions.WriteTimeoutException; @@ -295,7 +296,7 @@ protected void signal() } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { logger.trace("Got failure from {}", from); @@ -309,7 +310,7 @@ public void onFailure(InetAddressAndPort from, RequestFailureReason failureReaso if (failureReasonByEndpoint == null) failureReasonByEndpoint = new ConcurrentHashMap<>(); } - failureReasonByEndpoint.put(from, failureReason); + failureReasonByEndpoint.put(from, failure.reason); logFailureOrTimeoutToIdealCLDelegate(); diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 1a60bd306745..1592718bf117 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -64,6 +64,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.gms.ApplicationState; import org.apache.cassandra.gms.EndpointState; @@ -731,10 +732,10 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { failedNodes.add(from.toString()); - if (failureReason == RequestFailureReason.TIMEOUT) + if (failure.reason == RequestFailureReason.TIMEOUT) { pending.set(-1); promise.setFailure(failRepairException(parentRepairSession, "Did not get replies from all endpoints.")); @@ -787,7 +788,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { logger.debug("Failed to clean up parent repair session {} on {}. The uncleaned sessions will " + "be removed on a node restart. This should not be a problem unless you see thousands " + diff --git a/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java b/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java index 0fa284770080..dd2ebae915c5 100644 --- a/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java +++ b/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java @@ -20,7 +20,7 @@ import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.WriteFailureException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.locator.InetAddressAndPort; @@ -55,9 +55,9 @@ public void onResponse(Message msg) cleanup.ackMutation(); } - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - wrapped.onFailure(from, failureReason); + wrapped.onFailure(from, failure); } public boolean invokeOnFailure() diff --git a/src/java/org/apache/cassandra/service/FailureRecordingCallback.java b/src/java/org/apache/cassandra/service/FailureRecordingCallback.java index c4ca8e22f5ed..b672c4bf3f9c 100644 --- a/src/java/org/apache/cassandra/service/FailureRecordingCallback.java +++ b/src/java/org/apache/cassandra/service/FailureRecordingCallback.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.RequestCallbackWithFailure; @@ -136,9 +137,9 @@ public int failureCount() private static final AtomicReferenceFieldUpdater responsesUpdater = AtomicReferenceFieldUpdater.newUpdater(FailureRecordingCallback.class, FailureResponses.class, "failureResponses"); @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - FailureResponses.push(responsesUpdater, this, from, failureReason); + FailureResponses.push(responsesUpdater, this, from, failure.reason); } protected void onFailureWithMutex(InetAddressAndPort from, RequestFailureReason failureReason) diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index c8eb3be5978a..e61c9187a309 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -91,7 +91,7 @@ import org.apache.cassandra.exceptions.ReadFailureException; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestFailureException; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.UnavailableException; import org.apache.cassandra.exceptions.WriteFailureException; @@ -861,7 +861,7 @@ public void runMayThrow() { if (!(ex instanceof WriteTimeoutException)) logger.error("Failed to apply paxos commit locally : ", ex); - responseHandler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.forException(ex)); + responseHandler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.forException(ex)); } } @@ -1373,7 +1373,7 @@ private static void asyncWriteBatchedMutations(List } catch (OverloadedException | WriteTimeoutException e) { - wrapper.handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.forException(e)); + wrapper.handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.forException(e)); } } } @@ -1723,7 +1723,7 @@ public void runMayThrow() { if (!(ex instanceof WriteTimeoutException)) logger.error("Failed to apply mutation locally : ", ex); - handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.forException(ex)); + handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.forException(ex)); } } @@ -2246,7 +2246,7 @@ protected void runMayThrow() { // We track latency based on request processing time MessagingService.instance().metrics.recordSelfDroppedMessage(verb, MonotonicClock.Global.preciseTime.now() - requestTime.startedAtNanos(), NANOSECONDS); - handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.UNKNOWN); + handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.UNKNOWN); } if (!readRejected) @@ -2257,12 +2257,12 @@ protected void runMayThrow() { if (t instanceof TombstoneOverwhelmingException) { - handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.READ_TOO_MANY_TOMBSTONES); + handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.READ_TOO_MANY_TOMBSTONES); logger.error(t.getMessage()); } else { - handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.UNKNOWN); + handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.UNKNOWN); throw t; } } diff --git a/src/java/org/apache/cassandra/service/TruncateResponseHandler.java b/src/java/org/apache/cassandra/service/TruncateResponseHandler.java index 54b1241006d7..566d9fa02240 100644 --- a/src/java/org/apache/cassandra/service/TruncateResponseHandler.java +++ b/src/java/org/apache/cassandra/service/TruncateResponseHandler.java @@ -23,19 +23,19 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.cassandra.utils.concurrent.Condition; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.db.TruncateResponse; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.TruncateException; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; - import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getTruncateRpcTimeout; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -100,10 +100,10 @@ public void onResponse(Message message) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { // If the truncation hasn't succeeded on some replica, abort and indicate this back to the client. - failureReasonByEndpoint.put(from, failureReason); + failureReasonByEndpoint.put(from, failure.reason); condition.signalAll(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCallback.java b/src/java/org/apache/cassandra/service/accord/AccordCallback.java index d25d27f59304..955e034d11a9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCallback.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCallback.java @@ -24,8 +24,9 @@ import accord.coordinate.Timeout; import accord.local.AgentExecutor; import accord.messages.Callback; -import accord.messages.SafeCallback; import accord.messages.Reply; +import accord.messages.SafeCallback; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; @@ -49,19 +50,19 @@ public void onResponse(Message msg) success(endpointMapper.mappedId(msg.from()), msg.payload); } - private static Throwable convertReason(RequestFailureReason reason) + private static Throwable convertFailureMessage(RequestFailure failure) { - return reason == RequestFailureReason.TIMEOUT ? + return failure.reason == RequestFailureReason.TIMEOUT ? new Timeout(null, null) : - new RuntimeException(reason.toString()); + new RuntimeException(failure.failure); } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - logger.debug("Received failure {} from {} for {}", failureReason, from, this); + logger.debug("Received failure {} from {} for {}", failure, from, this); // TODO (now): we should distinguish timeout failures with some placeholder Exception - failure(endpointMapper.mappedId(from), convertReason(failureReason)); + failure(endpointMapper.mappedId(from), convertFailureMessage(failure)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index a6d8e4f3417b..1692e85fe8fe 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -26,8 +26,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; - -import org.apache.cassandra.net.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,7 +38,12 @@ import accord.messages.Reply; import accord.messages.ReplyContext; import accord.messages.Request; +import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.Verb; public class AccordMessageSink implements MessageSink { @@ -93,6 +96,9 @@ private VerbMapping() for (MessageType type : MessageType.values()) { + // Any request can receive a generic failure response + if (type == MessageType.FAILURE_RSP) + continue; if (!mapping.containsKey(type)) throw new AssertionError("Missing mapping for Accord MessageType " + type); } @@ -153,6 +159,16 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply messaging.send(replyMsg, endpoint); } + @Override + public void replyWithUnknownFailure(Node.Id replyingToNode, ReplyContext replyContext, Throwable failure) + { + Message replyTo = (Message) replyContext; + Message replyMsg = replyTo.failureResponse(RequestFailureReason.UNKNOWN, failure); + InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); + logger.debug("Replying with failure {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); + messaging.send(replyMsg, endpoint); + } + private static void checkReplyType(Reply reply, Message replyTo) { Verb verb = getVerb(reply.type()); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java index 3e215a7e6810..e16facee4a0c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -37,7 +37,7 @@ import org.agrona.collections.Long2ObjectHashMap; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.IFailureDetector; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -294,7 +294,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { scheduler.schedule(() -> AccordSyncPropagator.this.notify(to, notifications), 1, TimeUnit.MINUTES); } diff --git a/src/java/org/apache/cassandra/service/paxos/Paxos.java b/src/java/org/apache/cassandra/service/paxos/Paxos.java index a82396cd2449..75392640a06c 100644 --- a/src/java/org/apache/cassandra/service/paxos/Paxos.java +++ b/src/java/org/apache/cassandra/service/paxos/Paxos.java @@ -28,13 +28,11 @@ import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; - import javax.annotation.Nullable; import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,6 +71,7 @@ import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.exceptions.RequestFailureException; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.UnavailableException; @@ -91,6 +90,8 @@ import org.apache.cassandra.service.paxos.Commit.Proposal; import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteAccepted; +import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteCommitted; import org.apache.cassandra.service.reads.DataResolver; import org.apache.cassandra.service.reads.repair.NoopReadRepair; import org.apache.cassandra.tcm.Epoch; @@ -101,8 +102,6 @@ import org.apache.cassandra.triggers.TriggerExecutor; import org.apache.cassandra.utils.CassandraVersion; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteAccepted; -import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteCommitted; import org.apache.cassandra.utils.NoSpamLogger; import static java.util.Collections.emptyMap; @@ -111,10 +110,14 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_LOG_TTL_LINEARIZABILITY_VIOLATIONS; import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_MODERN_RELEASE; import static org.apache.cassandra.config.Config.PaxosVariant.v2_without_linearizable_reads_or_rejected_writes; +import static org.apache.cassandra.config.DatabaseDescriptor.getCasContentionTimeout; +import static org.apache.cassandra.config.DatabaseDescriptor.getWriteRpcTimeout; +import static org.apache.cassandra.db.ConsistencyLevel.LOCAL_QUORUM; +import static org.apache.cassandra.db.ConsistencyLevel.LOCAL_SERIAL; +import static org.apache.cassandra.db.ConsistencyLevel.QUORUM; +import static org.apache.cassandra.db.ConsistencyLevel.SERIAL; import static org.apache.cassandra.db.Keyspace.openAndGetStore; import static org.apache.cassandra.exceptions.RequestFailureReason.TIMEOUT; -import static org.apache.cassandra.config.DatabaseDescriptor.*; -import static org.apache.cassandra.db.ConsistencyLevel.*; import static org.apache.cassandra.locator.InetAddressAndPort.Serializer.inetAddressAndPortSerializer; import static org.apache.cassandra.locator.ReplicaLayout.forTokenWriteLiveAndDown; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; @@ -126,9 +129,9 @@ import static org.apache.cassandra.service.paxos.Ballot.Flag.LOCAL; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.nextBallot; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.staleBallot; -import static org.apache.cassandra.service.paxos.ContentionStrategy.*; import static org.apache.cassandra.service.paxos.ContentionStrategy.Type.READ; import static org.apache.cassandra.service.paxos.ContentionStrategy.Type.WRITE; +import static org.apache.cassandra.service.paxos.ContentionStrategy.waitForContention; import static org.apache.cassandra.service.paxos.PaxosCommit.commit; import static org.apache.cassandra.service.paxos.PaxosCommitAndPrepare.commitAndPrepare; import static org.apache.cassandra.service.paxos.PaxosPrepare.prepare; @@ -439,7 +442,7 @@ public void collectSuccess(InetAddressAndPort inetAddressAndPort) } @Override - public void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailureReason t) + public void collectFailure(InetAddressAndPort inetAddressAndPort, RequestFailure t) { } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosCommit.java b/src/java/org/apache/cassandra/service/paxos/PaxosCommit.java index b5ce86794dbb..b79e032fe233 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosCommit.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosCommit.java @@ -29,7 +29,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.Mutation; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.InOurDc; import org.apache.cassandra.locator.InetAddressAndPort; @@ -44,13 +44,12 @@ import org.apache.cassandra.utils.concurrent.ConditionAsConsumer; import static java.util.Collections.emptyMap; -import static org.apache.cassandra.exceptions.RequestFailureReason.NODE_DOWN; import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; import static org.apache.cassandra.net.Verb.PAXOS2_COMMIT_REMOTE_REQ; import static org.apache.cassandra.net.Verb.PAXOS_COMMIT_REQ; import static org.apache.cassandra.service.StorageProxy.shouldHint; import static org.apache.cassandra.service.StorageProxy.submitHint; -import static org.apache.cassandra.service.paxos.Commit.*; +import static org.apache.cassandra.service.paxos.Commit.Agreed; import static org.apache.cassandra.utils.concurrent.ConditionAsConsumer.newConditionAsConsumer; // Does not support EACH_QUORUM, as no such thing as EACH_SERIAL @@ -186,7 +185,7 @@ void start(Participants participants, boolean async) executeOnSelf |= isSelfOrSend(commitMessage, mutationMessage, participants.allLive.endpoint(i)); for (int i = 0, mi = participants.allDown.size(); i < mi ; ++i) - onFailure(participants.allDown.endpoint(i), NODE_DOWN); + onFailure(participants.allDown.endpoint(i), RequestFailure.NODE_DOWN); if (executeOnSelf) { @@ -223,7 +222,7 @@ private static boolean isInLocalDc(InetAddressAndPort destination) * Record a failure or timeout, and maybe submit a hint to {@code from} */ @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason reason) + public void onFailure(InetAddressAndPort from, RequestFailure reason) { if (logger.isTraceEnabled()) logger.trace("{} {} from {}", commit, reason, from); diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java index f293f1dba9e1..4a3d69eee10b 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java @@ -34,9 +34,15 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.ReadResponse; +import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.UnavailableException; import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.Gossiper; @@ -44,7 +50,6 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.metrics.PaxosMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -53,6 +58,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; @@ -64,13 +70,33 @@ import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_REQ; import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_RSP; import static org.apache.cassandra.service.paxos.Ballot.Flag.NONE; -import static org.apache.cassandra.service.paxos.Commit.*; -import static org.apache.cassandra.service.paxos.Paxos.*; -import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.*; +import static org.apache.cassandra.service.paxos.Commit.Accepted; +import static org.apache.cassandra.service.paxos.Commit.Committed; +import static org.apache.cassandra.service.paxos.Commit.CompareResult; +import static org.apache.cassandra.service.paxos.Commit.isAfter; +import static org.apache.cassandra.service.paxos.Paxos.Electorate; +import static org.apache.cassandra.service.paxos.Paxos.LOG_TTL_LINEARIZABILITY_VIOLATIONS; +import static org.apache.cassandra.service.paxos.Paxos.Participants; +import static org.apache.cassandra.service.paxos.Paxos.consistency; +import static org.apache.cassandra.service.paxos.Paxos.getPaxosVariant; +import static org.apache.cassandra.service.paxos.Paxos.isInRangeAndShouldProcess; +import static org.apache.cassandra.service.paxos.Paxos.newBallot; +import static org.apache.cassandra.service.paxos.Paxos.verifyElectorate; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.ELECTORATE_MISMATCH; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.FOUND_INCOMPLETE_ACCEPTED; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.FOUND_INCOMPLETE_COMMITTED; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.MAYBE_FAILURE; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.PROMISED; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.READ_PERMITTED; +import static org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome.SUPERSEDED; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PERMIT_READ; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.REJECT; +import static org.apache.cassandra.service.paxos.PaxosState.Snapshot; +import static org.apache.cassandra.service.paxos.PaxosState.get; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.Clock.Global.nanoTime; -import static org.apache.cassandra.service.paxos.PaxosState.*; -import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.*; import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; @@ -804,7 +830,7 @@ private void addReadResponse(ReadResponse response, InetAddressAndPort from) } @Override - public synchronized void onFailure(InetAddressAndPort from, RequestFailureReason reason) + public synchronized void onFailure(InetAddressAndPort from, RequestFailure reason) { if (logger.isTraceEnabled()) logger.trace("{} {} failure from {}", request, reason, from); @@ -812,7 +838,7 @@ public synchronized void onFailure(InetAddressAndPort from, RequestFailureReason if (isDone()) return; - super.onFailureWithMutex(from, reason); + super.onFailureWithMutex(from, reason.reason); ++failures; if (failures + participants.sizeOfConsensusQuorum == 1 + participants.sizeOfPoll()) @@ -875,7 +901,7 @@ private void refreshStaleParticipants() } @Override - public void onRefreshFailure(InetAddressAndPort from, RequestFailureReason reason) + public void onRefreshFailure(InetAddressAndPort from, RequestFailure reason) { onFailure(from, reason); } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java index fbdab4c6fdfe..ad909c6a004b 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepareRefresh.java @@ -24,6 +24,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.io.IVersionedSerializer; @@ -38,8 +39,6 @@ import org.apache.cassandra.service.paxos.Commit.Committed; import org.apache.cassandra.tracing.Tracing; -import static org.apache.cassandra.exceptions.RequestFailureReason.TIMEOUT; -import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_REFRESH_REQ; import static org.apache.cassandra.service.paxos.Commit.isAfter; import static org.apache.cassandra.service.paxos.PaxosRequestCallback.shouldExecuteOnSelf; @@ -65,7 +64,7 @@ public class PaxosPrepareRefresh implements RequestCallbackWithFailure refresh) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason reason) + public void onFailure(InetAddressAndPort from, RequestFailure reason) { callbacks.onRefreshFailure(from, reason); } @@ -124,8 +123,8 @@ private void executeOnSelf() } catch (Exception ex) { - RequestFailureReason reason = UNKNOWN; - if (ex instanceof WriteTimeoutException) reason = TIMEOUT; + RequestFailure reason = RequestFailure.UNKNOWN; + if (ex instanceof WriteTimeoutException) reason = RequestFailure.TIMEOUT; else logger.error("Failed to apply paxos refresh-prepare locally", ex); onFailure(getBroadcastAddressAndPort(), reason); @@ -167,7 +166,7 @@ public void doVerb(Message message) { Response response = execute(message.payload, message.from()); if (response == null) - MessagingService.instance().respondWithFailure(UNKNOWN, message); + MessagingService.instance().respondWithFailure(RequestFailureReason.UNKNOWN, message); else MessagingService.instance().respond(response, message); } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java b/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java index db702af7d43c..77a6fb4971a7 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java @@ -29,7 +29,7 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -43,8 +43,8 @@ import static java.util.Collections.emptyMap; import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; import static org.apache.cassandra.net.Verb.PAXOS2_PROPOSE_REQ; -import static org.apache.cassandra.service.paxos.PaxosPropose.Superseded.SideEffects.NO; import static org.apache.cassandra.service.paxos.PaxosPropose.Superseded.SideEffects.MAYBE; +import static org.apache.cassandra.service.paxos.PaxosPropose.Superseded.SideEffects.NO; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.concurrent.ConditionAsConsumer.newConditionAsConsumer; @@ -263,7 +263,7 @@ public void onResponse(Response response, InetAddressAndPort from) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason reason) + public void onFailure(InetAddressAndPort from, RequestFailure reason) { if (logger.isTraceEnabled()) logger.trace("{} {} failure from {}", proposal, reason, from); diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java index 0242a5110e66..ed369539ba65 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java @@ -45,7 +45,7 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.UnavailableException; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -189,7 +189,7 @@ private class Querying extends State implements RequestCallbackWithFailure i1.onFailure()); } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java index aad32ace0503..ff5f2d406839 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java @@ -24,14 +24,14 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.CassandraRelevantProperties; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.service.FailureRecordingCallback; -import static org.apache.cassandra.exceptions.RequestFailureReason.TIMEOUT; -import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; +import static org.apache.cassandra.exceptions.RequestFailure.TIMEOUT; +import static org.apache.cassandra.exceptions.RequestFailure.UNKNOWN; import static org.apache.cassandra.utils.FBUtilities.getBroadcastAddressAndPort; public abstract class PaxosRequestCallback extends FailureRecordingCallback @@ -58,7 +58,7 @@ protected void executeOnSelf(I parameter, BiFunction message) InetAddressAndPort from = message.from(); if (WarningContext.isSupported(params.keySet())) { - RequestFailureReason reason = getWarningContext().updateCounters(params, from); + RequestFailure reason = getWarningContext().updateCounters(params, from); replicaPlan().collectFailure(message.from(), reason); if (reason != null) { @@ -236,11 +237,11 @@ public boolean trackLatencyForSnitch() } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { assertWaitingFor(from); - failureReasonByEndpoint.put(from, failureReason); + failureReasonByEndpoint.put(from, failure.reason); if (replicaPlan().readQuorum() + failuresUpdater.incrementAndGet(this) > replicaPlan().contacts().size()) condition.signalAll(); diff --git a/src/java/org/apache/cassandra/service/reads/thresholds/WarningContext.java b/src/java/org/apache/cassandra/service/reads/thresholds/WarningContext.java index dd6ee2f1a6e8..5bb5deb99a90 100644 --- a/src/java/org/apache/cassandra/service/reads/thresholds/WarningContext.java +++ b/src/java/org/apache/cassandra/service/reads/thresholds/WarningContext.java @@ -22,7 +22,7 @@ import java.util.Map; import java.util.Set; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.ParamType; @@ -43,31 +43,31 @@ public static boolean isSupported(Set keys) return !Collections.disjoint(keys, SUPPORTED); } - public RequestFailureReason updateCounters(Map params, InetAddressAndPort from) + public RequestFailure updateCounters(Map params, InetAddressAndPort from) { for (Map.Entry entry : params.entrySet()) { WarnAbortCounter counter = null; - RequestFailureReason reason = null; + RequestFailure reason = null; switch (entry.getKey()) { case ROW_INDEX_READ_SIZE_FAIL: - reason = RequestFailureReason.READ_SIZE; + reason = RequestFailure.READ_SIZE; case ROW_INDEX_READ_SIZE_WARN: counter = rowIndexReadSize; break; case LOCAL_READ_SIZE_FAIL: - reason = RequestFailureReason.READ_SIZE; + reason = RequestFailure.READ_SIZE; case LOCAL_READ_SIZE_WARN: counter = localReadSize; break; case TOMBSTONE_FAIL: - reason = RequestFailureReason.READ_TOO_MANY_TOMBSTONES; + reason = RequestFailure.READ_TOO_MANY_TOMBSTONES; case TOMBSTONE_WARNING: counter = tombstones; break; case TOO_MANY_REFERENCED_INDEXES_FAIL: - reason = RequestFailureReason.READ_TOO_MANY_INDEXES; + reason = RequestFailure.READ_TOO_MANY_INDEXES; case TOO_MANY_REFERENCED_INDEXES_WARN: counter = indexReadSSTablesCount; break; diff --git a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java index 19bdc7ae9bbe..45b5945cbc5c 100644 --- a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java @@ -32,8 +32,8 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.exceptions.ReadTimeoutException; -import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.EndpointsForRange; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.metrics.TCMMetrics; @@ -202,10 +202,10 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - logger.debug("Error response from {} with {}", from, failureReason); - condition.tryFailure(new TimeoutException(failureReason.toString())); + logger.debug("Error response from {} with {}", from, failure.reason); + condition.tryFailure(new TimeoutException(failure.reason.toString())); } public void retry() diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index 0ea055b908f9..e5cb0568fed4 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -34,6 +34,7 @@ import com.codahale.metrics.Timer; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.locator.InetAddressAndPort; @@ -213,7 +214,7 @@ public static void sendWithCallbackAsync(Promise promise, Verb v (attempt, from, failure) -> { if (promise.isDone() || promise.isCancelled()) return false; - if (failure == RequestFailureReason.NOT_CMS) + if (failure.reason == RequestFailureReason.NOT_CMS) { logger.debug("{} is not a member of the CMS, querying it to discover current membership", from); DiscoveredNodes cms = tryDiscover(from); @@ -257,7 +258,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { // "success" - this lets us just try the next one in cmsIter promise.setSuccess(new DiscoveredNodes(Collections.emptySet(), DiscoveredNodes.Kind.KNOWN_PEERS)); diff --git a/src/java/org/apache/cassandra/tcm/migration/Election.java b/src/java/org/apache/cassandra/tcm/migration/Election.java index 507a55d31c82..6ada116323d6 100644 --- a/src/java/org/apache/cassandra/tcm/migration/Election.java +++ b/src/java/org/apache/cassandra/tcm/migration/Election.java @@ -29,7 +29,6 @@ import java.util.stream.Collectors; import com.google.common.collect.Sets; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/java/org/apache/cassandra/tcm/sequences/ProgressBarrier.java b/src/java/org/apache/cassandra/tcm/sequences/ProgressBarrier.java index af504d35d362..8212a57d9dd3 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ProgressBarrier.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ProgressBarrier.java @@ -39,7 +39,7 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; @@ -544,10 +544,10 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - logger.debug("Error response from {} with {}", from, failureReason); - condition.tryFailure(new TimeoutException(String.format("Watermark request did returned %s.", failureReason))); + logger.debug("Error response from {} with {}", from, failure); + condition.tryFailure(new TimeoutException(String.format("Watermark request did returned %s.", failure.reason))); } public void retry() diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 689277742259..7417f757bc88 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -94,7 +94,6 @@ import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import org.objectweb.asm.Opcodes; -import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_AVAILABLE_PROCESSORS; import static org.apache.cassandra.config.CassandraRelevantProperties.BUILD_DATE; import static org.apache.cassandra.config.CassandraRelevantProperties.GIT_SHA; import static org.apache.cassandra.config.CassandraRelevantProperties.LINE_SEPARATOR; @@ -131,8 +130,6 @@ public class FBUtilities private static volatile String previousReleaseVersionString; - private static final int availableProcessors = CASSANDRA_AVAILABLE_PROCESSORS.getInt(DatabaseDescriptor.getAvailableProcessors()); - private static volatile Supplier systemInfoSupplier = Suppliers.memoize(SystemInfo::new); public static void setAvailableProcessors(int value) diff --git a/test/data/config/version=5.0-alpha1.yml b/test/data/config/version=5.0-alpha1.yml index 8dad0f60acc2..19995ce52b88 100644 --- a/test/data/config/version=5.0-alpha1.yml +++ b/test/data/config/version=5.0-alpha1.yml @@ -407,7 +407,7 @@ max_concurrent_automatic_sstable_upgrades: "java.lang.Integer" maximum_replication_factor_warn_threshold: "java.lang.Integer" denylist_reads_enabled: "java.lang.Boolean" permissions_cache_active_update: "java.lang.Boolean" -available_processors: "java.lang.Integer" +available_processors: "org.apache.cassandra.config.OptionaldPositiveInt" file_cache_round_up: "java.lang.Boolean" secondary_indexes_per_table_warn_threshold: "java.lang.Integer" tables_warn_threshold: "java.lang.Integer" diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java index ca5407fe8995..b30626136b29 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedAction.java @@ -32,7 +32,7 @@ import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IMessage; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.net.RequestCallbacks; @@ -396,7 +396,7 @@ List applyToMessage(IInvokableInstance from, IInvokableInstance to, IMes if (callback != null) { RequestCallback invokeOn = (RequestCallback) callback.callback; - RequestFailureReason reason = innerIsTimeout ? RequestFailureReason.TIMEOUT : RequestFailureReason.UNKNOWN; + RequestFailure reason = innerIsTimeout ? RequestFailure.TIMEOUT : RequestFailure.UNKNOWN; invokeOn.onFailure(address, reason); } return null; diff --git a/test/unit/org/apache/cassandra/config/ConfigCompatibilityTest.java b/test/unit/org/apache/cassandra/config/ConfigCompatibilityTest.java index f5d10a6b526d..2d38db65308a 100644 --- a/test/unit/org/apache/cassandra/config/ConfigCompatibilityTest.java +++ b/test/unit/org/apache/cassandra/config/ConfigCompatibilityTest.java @@ -119,6 +119,7 @@ public class ConfigCompatibilityTest .add("Property role_manager used to be a value-type, but now is nested type class org.apache.cassandra.config.ParameterizedClass") .add("Property network_authorizer used to be a value-type, but now is nested type class org.apache.cassandra.config.ParameterizedClass") .add("require_client_auth types do not match; java.lang.String != java.lang.Boolean") + .add("available_processors types do not match; org.apache.cassandra.config.OptionaldPositiveInt != java.lang.Integer") .build(); /** diff --git a/test/unit/org/apache/cassandra/db/CounterMutationVerbHandlerOutOfRangeTest.java b/test/unit/org/apache/cassandra/db/CounterMutationVerbHandlerOutOfRangeTest.java index 9ae05e3e4ee1..da58eccfa02e 100644 --- a/test/unit/org/apache/cassandra/db/CounterMutationVerbHandlerOutOfRangeTest.java +++ b/test/unit/org/apache/cassandra/db/CounterMutationVerbHandlerOutOfRangeTest.java @@ -37,7 +37,7 @@ import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.InvalidRoutingException; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; @@ -51,12 +51,18 @@ import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.utils.FBUtilities; -import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.*; -import static org.apache.cassandra.utils.ByteBufferUtil.bytes; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.MessageDelivery; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.broadcastAddress; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.bytesToken; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.node1; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.randomInt; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.registerOutgoingMessageSink; +import static org.apache.cassandra.utils.ByteBufferUtil.bytes; + public class CounterMutationVerbHandlerOutOfRangeTest { private static final String KEYSPACE = "CounterCacheTest"; @@ -168,7 +174,7 @@ private static void verifyFailureResponse(ListenableFuture mess MessageDelivery response = messageSink.get(100, TimeUnit.MILLISECONDS); assertEquals(Verb.FAILURE_RSP, response.message.verb()); assertEquals(broadcastAddress, response.message.from()); - assertTrue(response.message.payload instanceof RequestFailureReason); + assertTrue(response.message.payload instanceof RequestFailure); assertEquals(messageId, response.message.id()); assertEquals(node1, response.to); } diff --git a/test/unit/org/apache/cassandra/db/MutationVerbHandlerOutOfRangeTest.java b/test/unit/org/apache/cassandra/db/MutationVerbHandlerOutOfRangeTest.java index 0421e26aa575..77ef47192ebb 100644 --- a/test/unit/org/apache/cassandra/db/MutationVerbHandlerOutOfRangeTest.java +++ b/test/unit/org/apache/cassandra/db/MutationVerbHandlerOutOfRangeTest.java @@ -37,7 +37,7 @@ import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.InvalidRoutingException; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -51,11 +51,17 @@ import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.utils.FBUtilities; -import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.*; -import static org.apache.cassandra.utils.ByteBufferUtil.bytes; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.MessageDelivery; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.broadcastAddress; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.bytesToken; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.node1; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.randomInt; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.registerOutgoingMessageSink; +import static org.apache.cassandra.utils.ByteBufferUtil.bytes; + public class MutationVerbHandlerOutOfRangeTest { private static final String TEST_NAME = "mutation_vh_test_"; @@ -190,7 +196,7 @@ private void getAndVerifyResponse(ListenableFuture messageSink, MessageDelivery response = messageSink.get(100, TimeUnit.MILLISECONDS); assertEquals(isOutOfRange ? Verb.FAILURE_RSP : Verb.MUTATION_RSP, response.message.verb()); assertEquals(broadcastAddress, response.message.from()); - assertEquals(isOutOfRange, response.message.payload instanceof RequestFailureReason); + assertEquals(isOutOfRange, response.message.payload instanceof RequestFailure); assertEquals(messageId, response.message.id()); assertEquals(node1, response.to); assertEquals(startingTotalMetricCount + (isOutOfRange ? 1 : 0), StorageMetrics.totalOpsForInvalidToken.getCount()); diff --git a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java index 847326ca4d5a..419772d3044f 100644 --- a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java +++ b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java @@ -37,7 +37,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.InvalidRoutingException; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; @@ -50,10 +50,16 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.*; -import static org.apache.cassandra.net.Verb.READ_REQ; import static org.junit.Assert.assertEquals; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.MessageDelivery; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.broadcastAddress; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.bytesToken; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.node1; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.randomInt; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.registerOutgoingMessageSink; +import static org.apache.cassandra.net.Verb.READ_REQ; + public class ReadCommandVerbHandlerOutOfRangeTest { private static ReadCommandVerbHandler handler; @@ -184,7 +190,7 @@ private void getAndVerifyResponse(ListenableFuture messageSink, MessageDelivery response = messageSink.get(100, TimeUnit.MILLISECONDS); assertEquals(isOutOfRange ? Verb.FAILURE_RSP : Verb.READ_RSP, response.message.verb()); assertEquals(broadcastAddress, response.message.from()); - assertEquals(isOutOfRange, response.message.payload instanceof RequestFailureReason); + assertEquals(isOutOfRange, response.message.payload instanceof RequestFailure); assertEquals(messageId, response.message.id()); assertEquals(node1, response.to); assertEquals(startingTotalMetricCount + (isOutOfRange ? 1 : 0), StorageMetrics.totalOpsForInvalidToken.getCount()); diff --git a/test/unit/org/apache/cassandra/exceptions/RemoteExceptionTest.java b/test/unit/org/apache/cassandra/exceptions/RemoteExceptionTest.java new file mode 100644 index 000000000000..42e211e16236 --- /dev/null +++ b/test/unit/org/apache/cassandra/exceptions/RemoteExceptionTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.exceptions; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.Test; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; + +import static com.google.common.base.Throwables.getStackTraceAsString; +import static org.apache.cassandra.exceptions.ExceptionSerializer.getMessageWithOriginatingHost; +import static org.apache.cassandra.exceptions.ExceptionSerializer.nullableRemoteExceptionSerializer; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class RemoteExceptionTest +{ + @Test + public void testRoundtrip() throws Exception + { + testRoundtrip(null); + Throwable root = new Throwable(); + testRoundtrip(root); + Throwable suppressed = new Throwable(); + Throwable causedByRoot = new Throwable(root); + testRoundtrip(causedByRoot); + causedByRoot.addSuppressed(root); + testRoundtrip(causedByRoot); + root.addSuppressed(causedByRoot); + testRoundtrip(root); + root.addSuppressed(suppressed); + testRoundtrip(root); + } + + public void testRoundtrip(Throwable original) throws Exception + { + Throwable normalizedOriginal = normalizeThrowable(original); + + DataOutputBuffer dob = new DataOutputBuffer(); + nullableRemoteExceptionSerializer.serialize(original, dob, MessagingService.current_version); + assertEquals(nullableRemoteExceptionSerializer.serializedSize(original, MessagingService.current_version), dob.toByteArray().length); + DataInputBuffer dib = new DataInputBuffer(dob.toByteArray()); + Throwable test = nullableRemoteExceptionSerializer.deserialize(dib, MessagingService.current_version); + if (original == null) + { + assertNull(test); + } + else + { + String originalString = getStackTraceAsString(normalizedOriginal); + String testString = getStackTraceAsString(test); + assertEquals(originalString, testString); + } + } + + public static Throwable normalizeThrowable(Throwable t) throws Exception + { + return normalizeThrowable(t, true, new HashMap<>()); + } + + private static Throwable normalizeThrowable(Throwable t, boolean isFirstException, Map alreadyNormalized) throws Exception + { + if (t == null) + return null; + + if (alreadyNormalized.containsKey(t)) + return alreadyNormalized.get(t); + + // Classloader, module name, and module version are difficult to get right because STE doesn't + // expose enough parameters to serialize the formatting correctly so settle for something close, but not exact + // Alternatives look fragile across different JVM versions and yield only moderate additional debugability + // when using class loaders and modules + StackTraceElement[] originalStack = t.getStackTrace(); + StackTraceElement[] normalizedStack = new StackTraceElement[originalStack.length]; + for (int i = 0; i < originalStack.length; i++) + { + StackTraceElement originalSTE = originalStack[i]; + normalizedStack[i] = new StackTraceElement(originalSTE.getClassName(), originalSTE.getMethodName(), originalSTE.getFileName(), originalSTE.getLineNumber()); + } + + Throwable normalized; + if (t.getCause() == null) + normalized = t.getClass().getConstructor(String.class).newInstance(getMessageWithOriginatingHost(t, isFirstException)); + else + normalized = t.getClass().getConstructor(String.class, Throwable.class).newInstance(getMessageWithOriginatingHost(t, isFirstException), normalizeThrowable(t.getCause(), false, alreadyNormalized)); + alreadyNormalized.put(t, normalized); + normalized.setStackTrace(normalizedStack); + for (Throwable suppressed : t.getSuppressed()) + normalized.addSuppressed(normalizeThrowable(suppressed, false, alreadyNormalized)); + return normalized; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/net/ConnectionTest.java b/test/unit/org/apache/cassandra/net/ConnectionTest.java index 42c137cc5f63..70bb0c8046e3 100644 --- a/test/unit/org/apache/cassandra/net/ConnectionTest.java +++ b/test/unit/org/apache/cassandra/net/ConnectionTest.java @@ -59,7 +59,7 @@ import org.apache.cassandra.config.EncryptionOptions; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.UnknownColumnException; import org.apache.cassandra.io.IVersionedAsymmetricSerializer; import org.apache.cassandra.io.IVersionedSerializer; @@ -78,7 +78,7 @@ import static org.apache.cassandra.net.MessagingService.current_version; import static org.apache.cassandra.net.ConnectionType.LARGE_MESSAGES; import static org.apache.cassandra.net.ConnectionType.SMALL_MESSAGES; -import static org.apache.cassandra.net.ConnectionUtils.*; +import static org.apache.cassandra.net.ConnectionUtils.check; import static org.apache.cassandra.net.OutboundConnectionSettings.Framing.LZ4; import static org.apache.cassandra.net.OutboundConnections.LARGE_MESSAGE_THRESHOLD; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -388,7 +388,7 @@ public long serializedSize(Object o, int version) MessagingService.instance().callbacks.addWithExpiration(new RequestCallback() { @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { done.countDown(); } diff --git a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java index c2d4656ddceb..e8fcf286a62e 100644 --- a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java +++ b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java @@ -35,7 +35,7 @@ import org.apache.cassandra.concurrent.SimulatedExecutorFactory; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.MessageDelivery.FailedResponseException; import org.apache.cassandra.net.MessageDelivery.MaxRetriesException; @@ -170,7 +170,7 @@ public void sendWithRetryDontAllowRetry() assertThat(result).isDone(); FailedResponseException e = getFailedResponseException(result); assertThat(e.from).isEqualTo(ID1); - assertThat(e.failure).isEqualTo(RequestFailureReason.TIMEOUT); + assertThat(e.failure).isEqualTo(RequestFailure.TIMEOUT); Mockito.verify(backoff, Mockito.times(1)).mayRetry(Mockito.anyInt()); Mockito.verify(backoff, Mockito.never()).computeWaitTime(Mockito.anyInt()); Mockito.verify(backoff, Mockito.never()).unit(); diff --git a/test/unit/org/apache/cassandra/net/MessageTest.java b/test/unit/org/apache/cassandra/net/MessageTest.java index 8e89973aa75e..ddc5f6b9c6b2 100644 --- a/test/unit/org/apache/cassandra/net/MessageTest.java +++ b/test/unit/org/apache/cassandra/net/MessageTest.java @@ -19,8 +19,8 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; @@ -30,6 +30,7 @@ import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; @@ -37,6 +38,7 @@ import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.MessagingService.Version; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.tracing.Tracing.TraceType; @@ -44,16 +46,22 @@ import org.apache.cassandra.utils.FreeRunningClock; import org.apache.cassandra.utils.TimeUUID; +import static com.google.common.base.Throwables.getStackTraceAsString; +import static org.apache.cassandra.exceptions.RemoteExceptionTest.normalizeThrowable; import static org.apache.cassandra.net.Message.serializer; import static org.apache.cassandra.net.MessagingService.VERSION_40; +import static org.apache.cassandra.net.MessagingService.VERSION_50; import static org.apache.cassandra.net.NoPayload.noPayload; import static org.apache.cassandra.net.ParamType.RESPOND_TO; import static org.apache.cassandra.net.ParamType.TRACE_SESSION; import static org.apache.cassandra.net.ParamType.TRACE_TYPE; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; - -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; public class MessageTest { @@ -165,7 +173,7 @@ public void testBuilder() } @Test - public void testCycleNoPayload() throws IOException + public void testCycleNoPayload() throws Exception { Message msg = Message.builder(Verb._TEST_1, noPayload) @@ -190,15 +198,20 @@ public void testCycleWithPayload() throws Exception } @Test - public void testFailureResponse() throws IOException + public void testFailureResponse() throws Exception { long expiresAt = approxTime.now(); - Message msg = Message.failureResponse(1, expiresAt, RequestFailureReason.INCOMPATIBLE_SCHEMA); + ExecutionException cause = new ExecutionException("test", new NullPointerException()); + Throwable root = new Throwable(cause); + Throwable suppressed = new Throwable(); + root.addSuppressed(suppressed); + Message msg = Message.failureResponse(1, expiresAt, new RequestFailure(RequestFailureReason.INCOMPATIBLE_SCHEMA, root)); assertEquals(1, msg.id()); assertEquals(Verb.FAILURE_RSP, msg.verb()); assertEquals(expiresAt, msg.expiresAtNanos()); - assertEquals(RequestFailureReason.INCOMPATIBLE_SCHEMA, msg.payload); + assertEquals(RequestFailureReason.INCOMPATIBLE_SCHEMA, msg.payload.reason); + assertEquals(getStackTraceAsString(root), getStackTraceAsString(msg.payload.failure)); assertTrue(msg.isFailureResponse()); testCycle(msg); @@ -218,19 +231,26 @@ public void testBuilderNotAddTraceHeaderWithNoTraceSession() } @Test - public void testCustomParams() throws CharacterCodingException, IOException + public void testCustomParams() throws IOException + { + for (Version version : MessagingService.Version.values()) + if (version.value >= VERSION_40) + testCustomParams(version.value); + } + + private void testCustomParams(int version) throws IOException { long id = 1; InetAddressAndPort from = FBUtilities.getLocalAddressAndPort(); Message msg = - Message.builder(Verb._TEST_1, noPayload) - .withEpoch(Epoch.EMPTY) - .withId(1) - .from(from) - .withCustomParam("custom1", "custom1value".getBytes(StandardCharsets.UTF_8)) - .withCustomParam("custom2", "custom2value".getBytes(StandardCharsets.UTF_8)) - .build(); + Message.builder(Verb._TEST_1, noPayload) + .withEpoch(Epoch.EMPTY) + .withId(1) + .from(from) + .withCustomParam("custom1", "custom1value".getBytes(StandardCharsets.UTF_8)) + .withCustomParam("custom2", "custom2value".getBytes(StandardCharsets.UTF_8)) + .build(); assertEquals(id, msg.id()); assertEquals(from, msg.from()); @@ -239,9 +259,10 @@ public void testCustomParams() throws CharacterCodingException, IOException assertEquals("custom2value", new String(msg.header.customParams().get("custom2"), StandardCharsets.UTF_8)); DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get(); - Message.serializer.serialize(msg, out, VERSION_40); + out.clear(); + Message.serializer.serialize(msg, out, version); DataInputBuffer in = new DataInputBuffer(out.buffer(), true); - msg = Message.serializer.deserialize(in, from, VERSION_40); + msg = Message.serializer.deserialize(in, from, version); assertEquals(id, msg.id()); assertEquals(from, msg.from()); @@ -265,13 +286,13 @@ private void testAddTraceHeaderWithType(TraceType traceType) } } - private void testCycle(Message msg) throws IOException + private void testCycle(Message msg) throws Exception { testCycle(msg, VERSION_40); } // serialize (using both variants, all in one or header then rest), verify serialized size, deserialize, compare to the original - private void testCycle(Message msg, int version) throws IOException + private void testCycle(Message msg, int version) throws Exception { try (DataOutputBuffer out = new DataOutputBuffer()) { @@ -283,7 +304,7 @@ private void testCycle(Message msg, int version) throws IOException { Message msgOut = serializer.deserialize(in, msg.from(), version); assertEquals(0, in.available()); - assertMessagesEqual(msg, msgOut); + assertMessagesEqual(msg, msgOut, version); } // extract header first, then deserialize the rest of the message and compare outcomes @@ -293,12 +314,12 @@ private void testCycle(Message msg, int version) throws IOException Message.Header headerOut = serializer.extractHeader(buffer, msg.from(), approxTime.now(), version); Message msgOut = serializer.deserialize(in, headerOut, version); assertEquals(0, in.available()); - assertMessagesEqual(msg, msgOut); + assertMessagesEqual(msg, msgOut, version); } } } - private static void assertMessagesEqual(Message msg1, Message msg2) + private static void assertMessagesEqual(Message msg1, Message msg2, int version) throws Exception { assertEquals(msg1.id(), msg2.id()); assertEquals(msg1.verb(), msg2.verb()); @@ -316,6 +337,19 @@ private static void assertMessagesEqual(Message msg1, Message msg2) assertTrue(payload2 == noPayload || payload2 == null); else if (null == payload2) assertSame(payload1, noPayload); + else if (msg1.verb() == Verb.FAILURE_RSP) + { + RequestFailure reason1 = (RequestFailure)msg1.payload; + RequestFailure reason2 = (RequestFailure)msg2.payload; + assertEquals(reason1.reason, reason2.reason); + if (version >= VERSION_50) + { + if (reason1.failure == null) + assertNull(reason2.failure); + else + assertEquals(getStackTraceAsString(normalizeThrowable(reason1.failure)), getStackTraceAsString(reason2.failure)); + } + } else assertEquals(payload1, payload2); } diff --git a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java index 6a335f4aab90..6cbabe37cca9 100644 --- a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java +++ b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java @@ -31,7 +31,7 @@ import accord.utilsfork.Gens; import accord.utilsfork.RandomSource; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; @@ -184,7 +184,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failure) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { promise.tryFailure(new MessagingService.FailureResponseException(from, failure)); } @@ -237,7 +237,7 @@ private void maybeEnqueue(Message message, InetAddressAndPort to if (action == Action.FAILURE) onDropped.onDrop(action, to, message); if (callback != null) - scheduler.schedule(() -> callback.onFailure(to, RequestFailureReason.UNKNOWN), + scheduler.schedule(() -> callback.onFailure(to, RequestFailure.UNKNOWN), message.verb().expiresAfterNanos(), TimeUnit.NANOSECONDS); return; default: @@ -252,7 +252,7 @@ private void maybeEnqueue(Message message, InetAddressAndPort to assert ctx == cb; try { - ctx.onFailure(to, RequestFailureReason.TIMEOUT); + ctx.onFailure(to, RequestFailure.TIMEOUT); } catch (Throwable t) { @@ -302,7 +302,7 @@ public void recieve(Message msg) try { if (msg.isFailureResponse()) - callback.onFailure(msg.from(), (RequestFailureReason) msg.payload); + callback.onFailure(msg.from(), (RequestFailure) msg.payload); else callback.onResponse(msg); } catch (Throwable t) @@ -364,7 +364,7 @@ public void onResponse(Message msg) callback.onResponse(msg); } - public void onFailure(InetAddressAndPort from, RequestFailureReason failure) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { if (callback.invokeOnFailure()) callback.onFailure(from, failure); } diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 9169fb4e88e8..593a4941287c 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -85,7 +85,7 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.ApplicationState; import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.HeartBeatState; @@ -816,7 +816,7 @@ public void onResponse(Message msg) callback.onResponse(msg); } - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failureReason) { if (callback.invokeOnFailure()) callback.onFailure(from, failureReason); } @@ -950,7 +950,7 @@ private void maybeEnqueue(Message message, InetAddressAndPort to assert ctx == cb; try { - ctx.onFailure(to, RequestFailureReason.TIMEOUT); + ctx.onFailure(to, RequestFailure.TIMEOUT); } catch (Throwable t) { @@ -992,7 +992,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failureReason) { promise.tryFailure(new MessagingService.FailureResponseException(from, failureReason)); } @@ -1177,7 +1177,7 @@ void handle(Message msg) try { if (msg.isFailureResponse()) - callback.onFailure(msg.from(), (RequestFailureReason) msg.payload); + callback.onFailure(msg.from(), (RequestFailure) msg.payload); else callback.onResponse(msg); } catch (Throwable t) diff --git a/test/unit/org/apache/cassandra/repair/messages/RepairMessageTest.java b/test/unit/org/apache/cassandra/repair/messages/RepairMessageTest.java index fb3ce470f581..7bbea89f1252 100644 --- a/test/unit/org/apache/cassandra/repair/messages/RepairMessageTest.java +++ b/test/unit/org/apache/cassandra/repair/messages/RepairMessageTest.java @@ -23,7 +23,7 @@ import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.IGossiper; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.RepairMetrics; @@ -87,7 +87,7 @@ public void noRetries() public void noRetriesRequestFailed() { test(NO_RETRY_ATTEMPTS, ((ignore, callback) -> { - callback.onFailure(ADDRESS, RequestFailureReason.UNKNOWN); + callback.onFailure(ADDRESS, RequestFailure.UNKNOWN); assertNoRetries(); })); } @@ -105,7 +105,7 @@ public void retryWithSuccess() public void retryWithTimeout() { test((maxAttempts, callback) -> { - callback.onFailure(ADDRESS, RequestFailureReason.TIMEOUT); + callback.onFailure(ADDRESS, RequestFailure.TIMEOUT); assertMetrics(maxAttempts, true, false); }); } @@ -114,7 +114,7 @@ public void retryWithTimeout() public void retryWithFailure() { test((maxAttempts, callback) -> { - callback.onFailure(ADDRESS, RequestFailureReason.UNKNOWN); + callback.onFailure(ADDRESS, RequestFailure.UNKNOWN); assertMetrics(maxAttempts, false, true); }); } @@ -208,7 +208,7 @@ private void test(int[] attempts, TestCase fn) sendMessageWithRetries(ctx, backoff(maxAttempts), always(), PAYLOAD, VERB, ADDRESS, RepairMessage.NOOP_CALLBACK); for (int i = 0; i < maxAttempts; i++) - callback(messaging).onFailure(ADDRESS, RequestFailureReason.TIMEOUT); + callback(messaging).onFailure(ADDRESS, RequestFailure.TIMEOUT); fn.test(maxAttempts, callback(messaging)); Mockito.verifyNoInteractions(messaging); } diff --git a/test/unit/org/apache/cassandra/service/WriteResponseHandlerTest.java b/test/unit/org/apache/cassandra/service/WriteResponseHandlerTest.java index 03785f3c305b..63562b35eb80 100644 --- a/test/unit/org/apache/cassandra/service/WriteResponseHandlerTest.java +++ b/test/unit/org/apache/cassandra/service/WriteResponseHandlerTest.java @@ -35,8 +35,8 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.WriteType; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.NodeProximity; @@ -238,8 +238,8 @@ public void failedIdealCLIncrementsStatForExplicitOnFailure() //Fail in remote DC - awr.onFailure(targets.get(3).endpoint(), RequestFailureReason.TIMEOUT); - awr.onFailure(targets.get(4).endpoint(), RequestFailureReason.TIMEOUT); + awr.onFailure(targets.get(3).endpoint(), RequestFailure.TIMEOUT); + awr.onFailure(targets.get(4).endpoint(), RequestFailure.TIMEOUT); awr.onResponse(createDummyMessage(5)); assertEquals(startingCountForWriteFailedIdealCL + 1, ks.metric.writeFailedIdealCL.getCount()); @@ -281,14 +281,14 @@ public void failedIdealCLDoesNotIncrementsStatOnExplicitQueryFailure() //Fail in local DC - awr.onFailure(targets.get(0).endpoint(), RequestFailureReason.TIMEOUT); - awr.onFailure(targets.get(1).endpoint(), RequestFailureReason.TIMEOUT); + awr.onFailure(targets.get(0).endpoint(), RequestFailure.TIMEOUT); + awr.onFailure(targets.get(1).endpoint(), RequestFailure.TIMEOUT); awr.onResponse(createDummyMessage(2)); //Fail in remote DC - awr.onFailure(targets.get(3).endpoint(), RequestFailureReason.TIMEOUT); - awr.onFailure(targets.get(4).endpoint(), RequestFailureReason.TIMEOUT); + awr.onFailure(targets.get(3).endpoint(), RequestFailure.TIMEOUT); + awr.onFailure(targets.get(4).endpoint(), RequestFailure.TIMEOUT); awr.onResponse(createDummyMessage(5)); assertEquals(startingCountForWriteFailedIdealCL, ks.metric.writeFailedIdealCL.getCount()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java index 7059a12f2a64..b24424b05518 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java @@ -23,18 +23,21 @@ import java.util.ArrayList; import java.util.List; +import org.junit.BeforeClass; import org.junit.Test; import accord.primitives.TxnId; import accord.utils.AccordGens; import accord.utils.Gen; import accord.utils.Gens; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.service.accord.AccordJournal.Key; import org.apache.cassandra.utils.AsymmetricOrdering; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.FBUtilities.Order; +import org.apache.cassandra.utils.StorageCompatibilityMode; import org.checkerframework.checker.nullness.qual.Nullable; import static accord.utils.Property.qt; @@ -42,6 +45,12 @@ public class AccordJournalTest { + @BeforeClass + public static void setCompatibilityMode() + { + CassandraRelevantProperties.TEST_STORAGE_COMPATIBILITY_MODE.setEnum(StorageCompatibilityMode.NONE); + } + @Test public void keySerde() { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index ab6e2790ce2c..5115dfdc47ce 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -59,7 +59,7 @@ import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.gms.IFailureDetector; import org.apache.cassandra.locator.InetAddressAndPort; @@ -289,10 +289,10 @@ public void sendWithCallback(Message message, InetAddressAndPort switch (action) { case ERROR: - cb.onFailure(to, RequestFailureReason.UNKNOWN); + cb.onFailure(to, RequestFailure.UNKNOWN); return; case TIMEOUT: - cb.onFailure(to, RequestFailureReason.TIMEOUT); + cb.onFailure(to, RequestFailure.TIMEOUT); return; case DELIVER: break; @@ -304,7 +304,7 @@ public void sendWithCallback(Message message, InetAddressAndPort scheduler.schedule(() -> { RequestCallback removed = callbacks.remove(message.id()); if (removed != null) - removed.onFailure(to, RequestFailureReason.TIMEOUT); + removed.onFailure(to, RequestFailure.TIMEOUT); }, 1, TimeUnit.MINUTES); } diff --git a/test/unit/org/apache/cassandra/service/paxos/PaxosVerbHandlerOutOfRangeTest.java b/test/unit/org/apache/cassandra/service/paxos/PaxosVerbHandlerOutOfRangeTest.java index 1ed2bef52ed9..a788c0ff9ddf 100644 --- a/test/unit/org/apache/cassandra/service/paxos/PaxosVerbHandlerOutOfRangeTest.java +++ b/test/unit/org/apache/cassandra/service/paxos/PaxosVerbHandlerOutOfRangeTest.java @@ -33,7 +33,7 @@ import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; @@ -48,9 +48,15 @@ import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.utils.ByteBufferUtil; -import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.*; import static org.junit.Assert.assertEquals; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.MessageDelivery; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.broadcastAddress; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.bytesToken; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.node1; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.randomInt; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.registerOutgoingMessageSink; + public class PaxosVerbHandlerOutOfRangeTest // PaxosV1 out of range tests - V2 implements OOTR checks at the protocol level { // For the purposes of this testing, the details of the Commit don't really matter @@ -175,7 +181,7 @@ private void getAndVerifyResponse(ListenableFuture messageSink, MessageDelivery response = messageSink.get(100, TimeUnit.MILLISECONDS); assertEquals(verb, response.message.verb()); Assert.assertEquals(broadcastAddress, response.message.from()); - assertEquals(isOutOfRange, response.message.payload instanceof RequestFailureReason); + assertEquals(isOutOfRange, response.message.payload instanceof RequestFailure); assertEquals(messageId, response.message.id()); Assert.assertEquals(node1, response.to); assertEquals(startingTotalMetricCount + (isOutOfRange ? 1 : 0), StorageMetrics.totalOpsForInvalidToken.getCount()); diff --git a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java index e23c7078b40d..989130adb4f2 100644 --- a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java +++ b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java @@ -22,32 +22,33 @@ import org.apache.commons.lang3.exception.ExceptionUtils; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.locator.ReplicaPlan; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ReadFailureException; import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.NoPayload; import org.apache.cassandra.net.Verb; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.tcm.Epoch; import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -149,8 +150,8 @@ public void testSpeculateSucceeded() throws Throwable public void run() { //Failures end the read promptly but don't require mock data to be suppleid - executor.handler.onFailure(targets.get(0).endpoint(), RequestFailureReason.READ_TOO_MANY_TOMBSTONES); - executor.handler.onFailure(targets.get(1).endpoint(), RequestFailureReason.READ_TOO_MANY_TOMBSTONES); + executor.handler.onFailure(targets.get(0).endpoint(), RequestFailure.READ_TOO_MANY_TOMBSTONES); + executor.handler.onFailure(targets.get(1).endpoint(), RequestFailure.READ_TOO_MANY_TOMBSTONES); executor.handler.condition.signalAll(); } }.start(); @@ -221,7 +222,7 @@ public void testRaceWithNonSpeculativeFailure() { // Fail the first request. When this fails the number of contacts has already been increased // to 2, so the failure won't actally signal. However... - executor.handler.onFailure(targets.get(0).endpoint(), RequestFailureReason.READ_TOO_MANY_TOMBSTONES); + executor.handler.onFailure(targets.get(0).endpoint(), RequestFailure.READ_TOO_MANY_TOMBSTONES); // ...speculative retries are fired after a short wait, and it is possible for the failure to // reach the handler just before one is fired and the number of contacts incremented... diff --git a/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java b/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java index 564eb98f8457..60213554420f 100644 --- a/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java +++ b/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java @@ -36,7 +36,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.ConnectionType; import org.apache.cassandra.net.IVerbHandler; @@ -194,7 +194,7 @@ public void sendWithCallback(Message message, InetAddressAndPort else { logger.info("{} simulating failure sending request to {}", addr, to); - cb.onFailure(to, RequestFailureReason.TIMEOUT); + cb.onFailure(to, RequestFailure.TIMEOUT); } } catch (IOException e) diff --git a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java index ace1da7f1a90..1006bc941fcf 100644 --- a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java +++ b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java @@ -37,7 +37,7 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.distributed.api.IIsolatedExecutor; import org.apache.cassandra.distributed.test.log.CMSTestBase; -import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.harry.gen.EntropySource; import org.apache.cassandra.harry.gen.Surjections; import org.apache.cassandra.harry.gen.rng.PCGFastPure; @@ -147,7 +147,7 @@ public void sendWithCallback(Message message, InetAddressAndPort } else { - cb.onFailure(message.from(), RequestFailureReason.TIMEOUT); + cb.onFailure(message.from(), RequestFailure.TIMEOUT); } } From 6167e436c302b41569e2a53c08c2351a8ac04be2 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 21 Aug 2023 13:32:42 -0700 Subject: [PATCH 066/340] CEP-15 (C*): when loading commands that have empty waiting_on, make sure not to loose the partial deps (#3590) patch by David Capwell; reviewed by Aleksey Yeschenko for CASSANDRA-18783 --- modules/accord | 2 +- .../service/accord/AccordKeyspace.java | 4 +- .../service/accord/AccordKeyspaceTest.java | 90 +++++++++++++++++++ .../service/accord/AccordTestUtils.java | 9 ++ 4 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java diff --git a/modules/accord b/modules/accord index 91336705bde8..8c7a3c9ef420 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 91336705bde8332954e849219d73205d68fa168a +Subproject commit 8c7a3c9ef4209d635b186189e17a2d9e728e9871 diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index d845c74c53cb..5f31f8627630 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -636,10 +636,10 @@ private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerialize return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; } - private static WaitingOn deserializeWaitingOn(Deps deps, ByteBuffer bytes) throws IOException + private static WaitingOn deserializeWaitingOn(@Nullable Deps deps, @Nullable ByteBuffer bytes) throws IOException { if (bytes == null || !bytes.hasRemaining()) - return WaitingOn.EMPTY; + return deps == null ? WaitingOn.EMPTY : WaitingOn.none(deps); return WaitingOnSerializer.deserialize(deps, new DataInputBuffer(bytes, false)); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java new file mode 100644 index 000000000000..d868850845fc --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicLong; + +import org.junit.Test; + +import accord.api.RoutingKey; +import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.KeyDeps; +import accord.primitives.Keys; +import accord.primitives.PartialTxn; +import accord.primitives.RangeDeps; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.assertj.core.api.Assertions; + +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +import static org.apache.cassandra.service.accord.AccordTestUtils.wrapInTxn; + +public class AccordKeyspaceTest extends CQLTester.InMemory +{ + private static final Ranges GLOBAL_SCOPE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(KEYSPACE), AccordRoutingKey.SentinelKey.max(KEYSPACE))); + + @Test + public void serde() + { + AtomicLong now = new AtomicLong(); + + String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))"); + + AccordCommandStore store = AccordTestUtils.createAccordCommandStore(now::incrementAndGet, KEYSPACE, tableName); + + TxnId id = new TxnId(Timestamp.fromValues(1, 42, new Node.Id(1)), Txn.Kind.Read, Routable.Domain.Key); + + Txn txn = createTxn(wrapInTxn(String.format("SELECT * FROM %s.%s WHERE k=? LIMIT 1", KEYSPACE, tableName)), Collections.singletonList(42)); + + PartialTxn partialTxn = txn.slice(GLOBAL_SCOPE, true); + RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); + FullRoute route = partialTxn.keys().toRoute(routingKey); + Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE); + + + CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); + common.partialTxn(partialTxn); + common.route(route); + common.partialDeps(deps.slice(GLOBAL_SCOPE)); + common.durability(Status.Durability.NotDurable); + Command.WaitingOn waitingOn = Command.WaitingOn.none(deps.slice(GLOBAL_SCOPE)); + Command.Committed committed = Command.SerializerSupport.committed(common, SaveStatus.Committed, id, Ballot.ZERO, Ballot.ZERO, waitingOn); + + AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); + safeCommand.set(committed); + Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); + mutation.apply(); + + Assertions.assertThat(AccordKeyspace.loadCommand(store, id)).isEqualTo(committed); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index c8a5872d9f03..628a09425714 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -230,6 +230,15 @@ public static Pair processTxnResultDirect(SafeCommandStore safeS } + public static String wrapInTxn(String query) + { + if (!query.endsWith(";")) + query += ";"; + return "BEGIN TRANSACTION\n" + + query + + "\nCOMMIT TRANSACTION"; + } + public static Txn createTxn(String query) { return createTxn(query, QueryOptions.DEFAULT); From ba02bd0bc74d7ab3f28230666040db248e0ad9f2 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 26 Mar 2024 14:44:17 -0700 Subject: [PATCH 067/340] Bootstrap/TCM integration test fixes --- .../cql3/statements/TransactionStatement.java | 46 +++++++- .../accord/AccordConfigurationService.java | 9 +- .../service/accord/AccordService.java | 10 +- .../service/accord/AccordTopologyUtils.java | 5 + .../apache/cassandra/tcm/ClusterMetadata.java | 29 +++++ .../apache/cassandra/tcm/MetadataKeys.java | 2 + .../tcm/StubClusterMetadataService.java | 2 + .../apache/cassandra/tcm/Transformation.java | 1 + .../tcm/compatibility/GossipHelper.java | 4 + .../tcm/ownership/AccordKeyspaces.java | 108 ++++++++++++++++++ .../transformations/AddAccordKeyspace.java | 78 +++++++++++++ .../distributed/test/IPMembershipTest.java | 23 +++- .../test/log/ClusterMetadataTestHelper.java | 5 +- .../org/apache/cassandra/db/RowCacheTest.java | 2 + .../cassandra/locator/MetaStrategyTest.java | 2 + .../cassandra/schema/ValidationTest.java | 3 +- .../ClusterMetadataTransformationTest.java | 3 + 17 files changed, 318 insertions(+), 14 deletions(-) create mode 100644 src/java/org/apache/cassandra/tcm/ownership/AccordKeyspaces.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/AddAccordKeyspace.java diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 1625ab40e918..fa7250f969c0 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -44,6 +44,7 @@ import accord.api.Key; import accord.primitives.Keys; import accord.primitives.Txn; +import accord.utils.Invariants; import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; import org.apache.cassandra.config.DatabaseDescriptor; @@ -62,10 +63,12 @@ import org.apache.cassandra.db.SinglePartitionReadQuery; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.exceptions.ExceptionCode; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnDataName; @@ -76,6 +79,9 @@ import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; @@ -351,6 +357,40 @@ private static boolean isSelectingMultipleClusterings(SelectStatement select, @N return select.getLimit(options) != 1; } + private void maybeConvertTablesToAccord(Txn txn) + { + Set allKeyspaces = new HashSet<>(); + Set newKeyspaces = new HashSet<>(); + txn.keys().forEach(key -> { + String keyspace = ((AccordRoutableKey) key).keyspace(); + if (allKeyspaces.add(keyspace) && !AccordService.instance().isAccordManagedKeyspace(keyspace)) + newKeyspaces.add(keyspace); + }); + + if (newKeyspaces.isEmpty()) + return; + + for (String keyspace : newKeyspaces) + { + ClusterMetadataService.instance().commit(new AddAccordKeyspace(keyspace), + metadata -> null, + (code, message) -> { + Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, + "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); + return null; + }); + } + + // we need to avoid creating a txnId in an epoch when no one has any ranges + FBUtilities.waitOnFuture(AccordService.instance().epochReady(ClusterMetadata.current().epoch)); + + for (String keyspace : allKeyspaces) + { + if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) + throw new IllegalStateException(keyspace + " is not an accord managed keyspace"); + } + } + @Override public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher.RequestTime requestTime) { @@ -365,7 +405,11 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. if (returningSelect != null) checkFalse(isSelectingMultipleClusterings(returningSelect.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning SELECT", returningSelect.select.source); - TxnData data = AccordService.instance().coordinate(createTxn(state.getClientState(), options), options.getConsistency()); + Txn txn = createTxn(state.getClientState(), options); + + maybeConvertTablesToAccord(txn); + + TxnData data = AccordService.instance().coordinate(txn, options.getConsistency()); if (returningSelect != null) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 610147e3411a..5b9ea24ab8c0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -42,7 +42,6 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.accord.AccordKeyspace.EpochDiskState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.listeners.ChangeListener; @@ -189,7 +188,7 @@ private void reportMetadata(ClusterMetadata metadata) synchronized (AccordConfigurationService.this) { updateMapping(metadata); - reportTopology(AccordTopologyUtils.createAccordTopology(metadata, this::isAccordManagedKeyspace)); + reportTopology(AccordTopologyUtils.createAccordTopology(metadata)); } }); } @@ -419,10 +418,4 @@ public synchronized Future localSyncNotified(long epoch) }); return promise; } - - public boolean isAccordManagedKeyspace(String keyspace) - { - // TODO (required, interop) : replace with schema flag or other mechanism for classifying accord keyspaces - return !SchemaConstants.REPLICATED_SYSTEM_KEYSPACE_NAMES.contains(keyspace); - } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index cf1bd2923d16..b85d0b570454 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -69,6 +69,7 @@ import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; @@ -187,6 +188,13 @@ public static void startup(NodeId tcmId) instance().startup(); } + public static void shutdownServiceAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException + { + if (localId == null) + return; + instance().shutdownAndWait(timeout, unit); + } + public static IAccordService instance() { return DatabaseDescriptor.getAccordTransactionsEnabled() ? Handle.instance : NOOP_SERVICE; @@ -457,7 +465,7 @@ public AccordConfigurationService configurationService() public boolean isAccordManagedKeyspace(String keyspace) { - return configService.isAccordManagedKeyspace(keyspace); + return ClusterMetadata.current().accordKeyspaces.contains(keyspace); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index a41e9732d95a..3884a19d00c6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -148,4 +148,9 @@ public static Topology createAccordTopology(ClusterMetadata metadata, Predicate< { return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, keyspacePredicate); } + + public static Topology createAccordTopology(ClusterMetadata metadata) + { + return createAccordTopology(metadata, metadata.accordKeyspaces::contains); + } } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index e0f1a1c2e125..5d51cfd3e338 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -62,6 +62,7 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PrimaryRangeComparator; @@ -93,6 +94,7 @@ public class ClusterMetadata public final Directory directory; public final TokenMap tokenMap; public final DataPlacements placements; + public final AccordKeyspaces accordKeyspaces; public final LockedRanges lockedRanges; public final InProgressSequences inProgressSequences; public final ImmutableMap, ExtensionValue> extensions; @@ -127,6 +129,7 @@ public ClusterMetadata(IPartitioner partitioner, Directory directory, Distribute directory, new TokenMap(partitioner), DataPlacements.EMPTY, + AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ImmutableMap.of()); @@ -138,6 +141,7 @@ public ClusterMetadata(Epoch epoch, Directory directory, TokenMap tokenMap, DataPlacements placements, + AccordKeyspaces accordKeyspaces, LockedRanges lockedRanges, InProgressSequences inProgressSequences, Map, ExtensionValue> extensions) @@ -149,6 +153,7 @@ public ClusterMetadata(Epoch epoch, directory, tokenMap, placements, + accordKeyspaces, lockedRanges, inProgressSequences, extensions); @@ -161,6 +166,7 @@ private ClusterMetadata(int metadataIdentifier, Directory directory, TokenMap tokenMap, DataPlacements placements, + AccordKeyspaces accordKeyspaces, LockedRanges lockedRanges, InProgressSequences inProgressSequences, Map, ExtensionValue> extensions) @@ -176,6 +182,7 @@ private ClusterMetadata(int metadataIdentifier, this.directory = directory; this.tokenMap = tokenMap; this.placements = placements; + this.accordKeyspaces = accordKeyspaces; this.lockedRanges = lockedRanges; this.inProgressSequences = inProgressSequences; this.extensions = ImmutableMap.copyOf(extensions); @@ -231,6 +238,7 @@ public ClusterMetadata forceEpoch(Epoch epoch) capLastModified(directory, epoch), capLastModified(tokenMap, epoch), capLastModified(placements, epoch), + capLastModified(accordKeyspaces, epoch), capLastModified(lockedRanges, epoch), capLastModified(inProgressSequences, epoch), capLastModified(extensions, epoch)); @@ -251,6 +259,7 @@ public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) directory, tokenMap, placements, + accordKeyspaces, lockedRanges, inProgressSequences, extensions); @@ -376,6 +385,7 @@ public static class Transformer private Directory directory; private TokenMap tokenMap; private DataPlacements placements; + private AccordKeyspaces accordKeyspaces; private LockedRanges lockedRanges; private InProgressSequences inProgressSequences; private final Map, ExtensionValue> extensions; @@ -390,6 +400,7 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) this.directory = metadata.directory; this.tokenMap = metadata.tokenMap; this.placements = metadata.placements; + this.accordKeyspaces = metadata.accordKeyspaces; this.lockedRanges = metadata.lockedRanges; this.inProgressSequences = metadata.inProgressSequences; extensions = new HashMap<>(metadata.extensions); @@ -509,6 +520,12 @@ public Transformer with(DataPlacements placements) return this; } + public Transformer withAccordKeyspace(String keyspace) + { + accordKeyspaces = accordKeyspaces.with(keyspace); + return this; + } + public Transformer with(LockedRanges lockedRanges) { this.lockedRanges = lockedRanges; @@ -595,6 +612,12 @@ public Transformed build() placements = placements.withLastModified(epoch); } + if (accordKeyspaces != base.accordKeyspaces) + { + modifiedKeys.add(MetadataKeys.ACCORD_KEYSPACES); + accordKeyspaces = accordKeyspaces.withLastModified(epoch); + } + if (lockedRanges != base.lockedRanges) { modifiedKeys.add(MetadataKeys.LOCKED_RANGES); @@ -614,6 +637,7 @@ public Transformed build() directory, tokenMap, placements, + accordKeyspaces, lockedRanges, inProgressSequences, extensions), @@ -629,6 +653,7 @@ public ClusterMetadata buildForGossipMode() directory, tokenMap, placements, + accordKeyspaces, lockedRanges, inProgressSequences, extensions); @@ -882,6 +907,7 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers Directory.serializer.serialize(metadata.directory, out, version); TokenMap.serializer.serialize(metadata.tokenMap, out, version); DataPlacements.serializer.serialize(metadata.placements, out, version); + AccordKeyspaces.serializer.serialize(metadata.accordKeyspaces, out, version); LockedRanges.serializer.serialize(metadata.lockedRanges, out, version); InProgressSequences.serializer.serialize(metadata.inProgressSequences, out, version); out.writeInt(metadata.extensions.size()); @@ -918,6 +944,7 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE Directory dir = Directory.serializer.deserialize(in, version); TokenMap tokenMap = TokenMap.serializer.deserialize(in, version); DataPlacements placements = DataPlacements.serializer.deserialize(in, version); + AccordKeyspaces accordKeyspaces = AccordKeyspaces.serializer.deserialize(in, version); LockedRanges lockedRanges = LockedRanges.serializer.deserialize(in, version); InProgressSequences ips = InProgressSequences.serializer.deserialize(in, version); int items = in.readInt(); @@ -936,6 +963,7 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE dir, tokenMap, placements, + accordKeyspaces, lockedRanges, ips, extensions); @@ -958,6 +986,7 @@ public long serializedSize(ClusterMetadata metadata, Version version) Directory.serializer.serializedSize(metadata.directory, version) + TokenMap.serializer.serializedSize(metadata.tokenMap, version) + DataPlacements.serializer.serializedSize(metadata.placements, version) + + AccordKeyspaces.serializer.serializedSize(metadata.accordKeyspaces, version) + LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + InProgressSequences.serializer.serializedSize(metadata.inProgressSequences, version); diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index bead377c0bfb..18a9e6d0231d 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -39,6 +39,7 @@ public class MetadataKeys public static final MetadataKey NODE_DIRECTORY = make(CORE_NS, "membership", "node_directory"); public static final MetadataKey TOKEN_MAP = make(CORE_NS, "ownership", "token_map"); public static final MetadataKey DATA_PLACEMENTS = make(CORE_NS, "ownership", "data_placements"); + public static final MetadataKey ACCORD_KEYSPACES = make(CORE_NS, "ownership", "accord_keyspaces"); public static final MetadataKey LOCKED_RANGES = make(CORE_NS, "sequences", "locked_ranges"); public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); @@ -46,6 +47,7 @@ public class MetadataKeys NODE_DIRECTORY, TOKEN_MAP, DATA_PLACEMENTS, + ACCORD_KEYSPACES, LOCKED_RANGES, IN_PROGRESS_SEQUENCES); diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 855cce0d363a..1e115a537992 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -32,6 +32,7 @@ import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementProvider; import org.apache.cassandra.tcm.ownership.TokenMap; @@ -172,6 +173,7 @@ public StubClusterMetadataService build() Directory.EMPTY, new TokenMap(partitioner), DataPlacements.EMPTY, + AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ImmutableMap.of()); diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index 864d9a5d94fa..f90a0da63490 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -221,6 +221,7 @@ enum Kind ADVANCE_CMS_RECONFIGURATION(33, () -> AdvanceCMSReconfiguration.serializer), CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer), ALTER_TOPOLOGY(35, () -> AlterTopology.serializer), + ADD_ACCORD_KEYSPACE(36, () -> AddAccordKeyspace.serializer) ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java index 1a555fce4d36..a5530e6faa8b 100644 --- a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java +++ b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java @@ -66,6 +66,7 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -295,6 +296,7 @@ public static ClusterMetadata emptyWithSchemaFromSystemTables(Set allKno Directory.EMPTY, new TokenMap(DatabaseDescriptor.getPartitioner()), DataPlacements.empty(), + AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, Collections.emptyMap()); @@ -382,6 +384,7 @@ public static ClusterMetadata fromEndpointStates(Map +{ + public static final AccordKeyspaces EMPTY = new AccordKeyspaces(Epoch.EMPTY, ImmutableSet.of()); + private final Epoch lastModified; + private final ImmutableSet keyspaces; + + public AccordKeyspaces(Epoch lastModified, ImmutableSet keyspaces) + { + this.lastModified = lastModified; + this.keyspaces = keyspaces; + } + + public String toString() + { + return "AccordKeyspaces{" + lastModified + keyspaces + '}'; + } + + public AccordKeyspaces withLastModified(Epoch epoch) + { + return new AccordKeyspaces(epoch, keyspaces); + } + + public Epoch lastModified() + { + return lastModified; + } + + public boolean contains(String keyspace) + { + return keyspaces.contains(keyspace); + } + + public AccordKeyspaces with(String keyspace) + { + if (keyspaces.contains(keyspace)) + return this; + + return new AccordKeyspaces(lastModified, ImmutableSet.builder().addAll(keyspaces).add(keyspace).build()); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + public void serialize(AccordKeyspaces accordKeyspaces, DataOutputPlus out, Version version) throws IOException + { + int size = accordKeyspaces.keyspaces.size(); + out.writeInt(size); + String[] keyspaces = new String[size]; + accordKeyspaces.keyspaces.toArray(keyspaces); + Arrays.sort(keyspaces); + for (String keyspace : keyspaces) + out.writeUTF(keyspace); + Epoch.serializer.serialize(accordKeyspaces.lastModified, out, version); + } + + public AccordKeyspaces deserialize(DataInputPlus in, Version version) throws IOException + { + int size = in.readInt(); + ImmutableSet.Builder builder = ImmutableSet.builder(); + for (int i=0; i serializer = new AsymmetricMetadataSerializer() + { + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + assert t instanceof AddAccordKeyspace; + AddAccordKeyspace addKeyspace = (AddAccordKeyspace) t; + out.writeUTF(addKeyspace.keyspace); + } + + public AddAccordKeyspace deserialize(DataInputPlus in, Version version) throws IOException + { + return new AddAccordKeyspace(in.readUTF()); + } + + public long serializedSize(Transformation t, Version version) + { + assert t instanceof AddAccordKeyspace; + AddAccordKeyspace addKeyspace = (AddAccordKeyspace) t; + return TypeSizes.sizeof(addKeyspace.keyspace); + } + }; +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/IPMembershipTest.java b/test/distributed/org/apache/cassandra/distributed/test/IPMembershipTest.java index 9f242b0212d0..c51e8163a6fe 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/IPMembershipTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/IPMembershipTest.java @@ -18,6 +18,7 @@ package org.apache.cassandra.distributed.test; +import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Set; @@ -33,7 +34,6 @@ import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.impl.InstanceConfig; import org.apache.cassandra.distributed.shared.ClusterUtils; -import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.locator.NoOpProximity; import org.apache.cassandra.locator.SimpleLocationProvider; import org.apache.cassandra.tools.ToolRunner; @@ -46,6 +46,25 @@ public class IPMembershipTest extends TestBaseImpl { + + private static void deleteRecursiveNoStaticInit(File file) + { + if (file.isDirectory()) + { + for (File entry : file.listFiles()) + deleteRecursiveNoStaticInit(entry); + } + else + { + file.delete(); + } + } + + private static void deleteRecursiveNoStaticInit(org.apache.cassandra.io.util.File file) + { + deleteRecursiveNoStaticInit(new File(file.absolutePath())); + } + /** * Port of replace_address_test.py::fail_without_replace_test to jvm-dtest */ @@ -64,7 +83,7 @@ public void sameIPFailWithoutReplace() throws IOException for (boolean auto_bootstrap : Arrays.asList(true, false)) { stopUnchecked(nodeToReplace); - getDirectories(nodeToReplace).forEach(FileUtils::deleteRecursive); + getDirectories(nodeToReplace).forEach(IPMembershipTest::deleteRecursiveNoStaticInit); nodeToReplace.config().set("auto_bootstrap", auto_bootstrap); diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 3c822864c570..33402af732cb 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -33,7 +33,6 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; - import org.apache.cassandra.ServerTestUtils.ResettableClusterMetadataService; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; @@ -69,6 +68,7 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -149,6 +149,7 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit Directory.EMPTY, new TokenMap(partitioner), DataPlacements.empty(), + AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ImmutableMap.of()); @@ -162,6 +163,7 @@ public static ClusterMetadata minimalForTesting(IPartitioner partitioner) null, null, DataPlacements.empty(), + AccordKeyspaces.EMPTY, null, null, ImmutableMap.of()); @@ -175,6 +177,7 @@ public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) null, null, DataPlacements.empty(), + AccordKeyspaces.EMPTY, null, null, ImmutableMap.of()); diff --git a/test/unit/org/apache/cassandra/db/RowCacheTest.java b/test/unit/org/apache/cassandra/db/RowCacheTest.java index 3ecc0007070d..f7a87b134144 100644 --- a/test/unit/org/apache/cassandra/db/RowCacheTest.java +++ b/test/unit/org/apache/cassandra/db/RowCacheTest.java @@ -55,6 +55,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.service.CacheService; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.reads.range.TokenUpdater; import org.apache.cassandra.utils.ByteBufferUtil; @@ -82,6 +83,7 @@ public static void defineSchema() throws ConfigurationException SchemaLoader.standardCFMD(KEYSPACE_CACHED, CF_CACHED).caching(CachingParams.CACHE_EVERYTHING), SchemaLoader.standardCFMD(KEYSPACE_CACHED, CF_CACHEDINT, 1, IntegerType.instance) .caching(new CachingParams(true, 100))); + StorageService.instance.initServer(); } @AfterClass diff --git a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java index c6a96fa71eb2..8fc3498bfb51 100644 --- a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java @@ -38,6 +38,7 @@ import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.sequences.InProgressSequences; @@ -87,6 +88,7 @@ public static ClusterMetadata metadata(NodeConfiguration... configurations) directory, tokenMap, DataPlacements.EMPTY, + AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ImmutableMap.of()); diff --git a/test/unit/org/apache/cassandra/schema/ValidationTest.java b/test/unit/org/apache/cassandra/schema/ValidationTest.java index 630e815c00e4..75727f218f24 100644 --- a/test/unit/org/apache/cassandra/schema/ValidationTest.java +++ b/test/unit/org/apache/cassandra/schema/ValidationTest.java @@ -24,10 +24,11 @@ import java.util.Map; import java.util.Set; +import org.apache.cassandra.config.DatabaseDescriptor; + import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.marshal.AbstractType; import static org.junit.Assert.assertFalse; diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index bd91832d00bb..318c0c6e3e22 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -50,6 +50,7 @@ import org.apache.cassandra.tcm.sequences.LockedRanges; import org.mockito.Mockito; +import static org.apache.cassandra.tcm.MetadataKeys.ACCORD_KEYSPACES; import static org.apache.cassandra.tcm.MetadataKeys.DATA_PLACEMENTS; import static org.apache.cassandra.tcm.MetadataKeys.IN_PROGRESS_SEQUENCES; import static org.apache.cassandra.tcm.MetadataKeys.LOCKED_RANGES; @@ -302,6 +303,8 @@ else if (key == LOCKED_RANGES) return metadata.lockedRanges; else if (key == IN_PROGRESS_SEQUENCES) return metadata.inProgressSequences; + else if (key == ACCORD_KEYSPACES) + return metadata.accordKeyspaces; throw new IllegalArgumentException("Unknown metadata key " + key); } From 10e065ffe5453a3ba9453d5fa1333ab74ead312f Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 16 Aug 2023 15:36:01 -0700 Subject: [PATCH 068/340] Fix Accord compaction purger tombstone logic Accord compaction purgers see random slices of Accord state during compaction (based on randomly selected compaction inputs). For at least the `durability` column in the `commands` table the tombstone being created when truncating was deleting the latest value since we can get enough information to truncate without actuall yhaving the latest `durability` value. To fix we can wait to emit a tombstone until we are erasing the entire command row when truncating or truncating with outcome and meanwhile we can drop the extra columns that are no longer needed instead of using a tombstone. We don't need to emit cell tombstones we can drop them from the purger when processing each row. patch by Ariel Weisberg; reviewed by David Capwell for CASSANDRA-18795 --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 112 ++++---- .../cassandra/db/marshal/TupleType.java | 5 + .../service/accord/AccordKeyspace.java | 243 ++++++++++++++---- test/unit/org/apache/cassandra/Util.java | 37 ++- .../CompactionAccordIteratorsTest.java | 183 ++++++++++--- .../db/compaction/CompactionIteratorTest.java | 56 ++-- .../ByteSourceComparisonTest.java | 2 +- .../ByteSourceConversionTest.java | 2 +- 9 files changed, 486 insertions(+), 156 deletions(-) diff --git a/modules/accord b/modules/accord index 8c7a3c9ef420..1d6028ca2055 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8c7a3c9ef4209d635b186189e17a2d9e728e9871 +Subproject commit 1d6028ca20553d1c1a6fe2809b204254955da3b3 diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 85f524596963..200d8687f395 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -25,9 +25,9 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.function.LongPredicate; +import java.util.function.Supplier; import javax.annotation.Nonnull; -import com.google.common.base.Supplier; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Ordering; @@ -54,6 +54,8 @@ import org.apache.cassandra.db.partitions.PurgeFunction; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.RangeTombstoneBoundMarker; import org.apache.cassandra.db.rows.RangeTombstoneMarker; import org.apache.cassandra.db.rows.Row; @@ -77,6 +79,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; @@ -86,14 +89,23 @@ import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; -import static accord.impl.CommandsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.Commands.Cleanup.TRUNCATE_WITH_OUTCOME; +import static accord.local.Status.Durability.Universal; import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.maybeDropTruncatedCommandColumns; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_executed_micros; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_executed_timestamp; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_write_timestamp; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.max_timestamp; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows.truncateStaticRow; +import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeDurabilityOrNull; +import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeRouteOrNull; +import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeSaveStatusOrNull; +import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeTimestampOrNull; /** * Merge multiple iterators over the content of sstable into a "compacted" iterator. @@ -785,26 +797,43 @@ protected Row applyToRow(Row row) if (redundantBefore == null) return row; - Timestamp executeAt = CommandRows.getExecuteAt(row); - Durability durability = CommandRows.getDurability(row); - SaveStatus saveStatus = CommandRows.getStatus(row); - Route route = CommandRows.getRoute(row); + // When commands end up being sliced by compaction we need this to discard tombstones and slices + // without enough information to run the rest of the cleanup logic + if (durableBefore.min(txnId) == Universal) + return null; + + Cell durabilityCell = row.getCell(CommandsColumns.durability); + Durability durability = deserializeDurabilityOrNull(durabilityCell); + Cell executeAtCell = row.getCell(CommandsColumns.execute_at); + Timestamp executeAt = deserializeTimestampOrNull(executeAtCell); + Cell routeCell = row.getCell(CommandsColumns.route); + Route route = deserializeRouteOrNull(routeCell); + Cell statusCell = row.getCell(CommandsColumns.status); + SaveStatus saveStatus = deserializeSaveStatusOrNull(statusCell); + + // With a sliced row we might not have enough columns to determine what to do so output the + // the row unmodified and we will try again later once it merges with the rest of the command state + // or is dropped by `durableBefore.min(txnId) == Universal` + if (executeAt == null || durability == null || saveStatus == null || route == null) + return row; - Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, durability, executeAt, route, redundantBefore, durableBefore); + Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, durability, executeAt, route, redundantBefore, durableBefore, false); switch (cleanup) { default: throw new AssertionError(String.format("Unexpected cleanup task: %s", cleanup)); case ERASE: - return null; + // Emit a tombstone so if this is slicing the command and making it not possible to determine if it + // can be truncated later it can still be dropped via the tombstone. + // Eventually the tombstone can be dropped by `durableBefore.min(txnId) == Universal` + // We can still encounter sliced command state just because compaction inputs are random + return BTreeRow.emptyDeletedRow(row.clustering(), new Row.Deletion(DeletionTime.build(row.primaryKeyLivenessInfo().timestamp(), nowInSec), false)); case TRUNCATE_WITH_OUTCOME: - if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) - return row; - case TRUNCATE: if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) - return row; - return truncatedApply(cleanup.appliesIfNot, row, nowInSec, cleanup == TRUNCATE_WITH_OUTCOME); + return maybeDropTruncatedCommandColumns(row, cleanup == TRUNCATE_WITH_OUTCOME, durabilityCell, executeAtCell, routeCell, statusCell); + return truncatedApply(cleanup.appliesIfNot, + row, nowInSec, durability, durabilityCell, executeAtCell, routeCell, cleanup == TRUNCATE_WITH_OUTCOME); case NO: return row; @@ -853,45 +882,45 @@ protected Row applyToStatic(Row row) TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; - boolean updatedColumn = false; - Timestamp max_timestamp = CommandsForKeyRows.getMaxTimestamp(row); - if (max_timestamp.compareTo(redundantBeforeTxnId) < 0) + Cell lastExecuteMicrosCell = row.getCell(last_executed_micros); + Long last_execute_micros = null; + if (lastExecuteMicrosCell != null && !lastExecuteMicrosCell.accessor().isEmpty(lastExecuteMicrosCell.value())) + last_execute_micros = lastExecuteMicrosCell.accessor().getLong(lastExecuteMicrosCell.value(), 0); + if (last_execute_micros != null && last_execute_micros < redundantBeforeTxnId.hlc()) { - max_timestamp = Timestamp.NONE; - updatedColumn = true; + lastExecuteMicrosCell = null; } - Timestamp last_execute = CommandsForKeyRows.getLastExecutedTimestamp(row); - if (last_execute.compareTo(redundantBeforeTxnId) < 0) + Cell lastExecuteCell = row.getCell(last_executed_timestamp); + Timestamp last_execute = deserializeTimestampOrNull(lastExecuteCell); + if (last_execute != null && last_execute.compareTo(redundantBeforeTxnId) < 0) { - last_execute = Timestamp.NONE; - updatedColumn = true; + lastExecuteCell = null; } - Timestamp last_write = CommandsForKeyRows.getLastWriteTimestamp(row); - if (last_write.compareTo(redundantBeforeTxnId) < 0) + Cell lastWriteCell = row.getCell(last_write_timestamp); + Timestamp last_write = deserializeTimestampOrNull(lastWriteCell); + if (last_write != null && last_write.compareTo(redundantBeforeTxnId) < 0) { - last_write = Timestamp.NONE; - updatedColumn = true; + lastWriteCell = null; } - long last_execute_micros = CommandsForKeyRows.getLastExecutedMicros(row); - if (last_execute_micros < redundantBeforeTxnId.hlc()) + Cell maxTimestampCell = row.getCell(max_timestamp); + Timestamp max_timestamp = deserializeTimestampOrNull(maxTimestampCell); + if (max_timestamp != null && max_timestamp.compareTo(redundantBeforeTxnId) < 0) { - last_execute_micros = NO_LAST_EXECUTED_HLC; - updatedColumn = true; + maxTimestampCell = null; } - if (max_timestamp == Timestamp.NONE && - last_execute == Timestamp.NONE && - last_write == Timestamp.NONE && - last_execute_micros == NO_LAST_EXECUTED_HLC) + // No need to emit a tombstone as earlier versions of the row will also be nulled out + // when compacted later or loaded into a commands for key + if (lastExecuteMicrosCell == null && + lastExecuteCell == null && + lastWriteCell == null && + maxTimestampCell == null) return null; - if (updatedColumn) - return truncateStaticRow(nowInSec, row, last_execute_micros, last_execute, last_write, max_timestamp); - - return row; + return truncateStaticRow(nowInSec, row, lastExecuteMicrosCell, lastExecuteCell, lastWriteCell, maxTimestampCell); } @Override @@ -910,7 +939,7 @@ protected Row applyToRow(Row row) TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; Timestamp timestamp = CommandsForKeyRows.getTimestamp(row); - if (timestamp.compareTo(redundantBeforeTxnId) < 0) + if (timestamp != null && timestamp.compareTo(redundantBeforeTxnId) < 0) return null; return row; @@ -966,9 +995,4 @@ private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs) { return cfs.name.equals(AccordKeyspace.COMMANDS_FOR_KEY) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); } - - private static boolean isAccordCommandsOrAccordCommandsForKey(ColumnFamilyStore cfs) - { - return isAccordCommands(cfs) || isAccordCommandsForKey(cfs); - } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/marshal/TupleType.java b/src/java/org/apache/cassandra/db/marshal/TupleType.java index d6ce2da0f48d..ff0b943078df 100644 --- a/src/java/org/apache/cassandra/db/marshal/TupleType.java +++ b/src/java/org/apache/cassandra/db/marshal/TupleType.java @@ -351,6 +351,11 @@ protected String componentOrFieldName(int i) return "component"; } + public static V pack(ValueAccessor accessor, V... components) + { + return pack(accessor, Arrays.asList(components)); + } + public static V pack(ValueAccessor accessor, Collection components) { int totalLength = 0; diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 5f31f8627630..bfaf76459220 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -62,6 +62,7 @@ import accord.local.RedundantBefore; import accord.local.SaveStatus; import accord.local.Status; +import accord.local.Status.Durability; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.PartialDeps; @@ -119,6 +120,7 @@ import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Row.Deletion; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.transform.FilteredPartitions; import org.apache.cassandra.dht.ByteOrderedPartitioner; @@ -158,6 +160,8 @@ import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import static accord.utils.Invariants.checkArgument; +import static accord.utils.Invariants.checkState; import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; @@ -286,17 +290,17 @@ private static ColumnMetadata getColumn(TableMetadata metadata, String name) return column; } - private static class CommandsColumns + public static class CommandsColumns { static final ClusteringComparator keyComparator = Commands.partitionKeyAsClusteringComparator(); static final CompositeType partitionKeyType = (CompositeType) Commands.partitionKeyType; static final ColumnMetadata txn_id = getColumn(Commands, "txn_id"); static final ColumnMetadata store_id = getColumn(Commands, "store_id"); - static final ColumnMetadata status = getColumn(Commands, "status"); - static final ColumnMetadata route = getColumn(Commands, "route"); - static final ColumnMetadata durability = getColumn(Commands, "durability"); + public static final ColumnMetadata status = getColumn(Commands, "status"); + public static final ColumnMetadata route = getColumn(Commands, "route"); + public static final ColumnMetadata durability = getColumn(Commands, "durability"); static final ColumnMetadata txn = getColumn(Commands, "txn"); - static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); + public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); static final ColumnMetadata dependencies = getColumn(Commands, "dependencies"); @@ -305,7 +309,7 @@ private static class CommandsColumns static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); - static ColumnMetadata[][] TRUNCATE_FIELDS = new ColumnMetadata[][] { + public static ColumnMetadata[][] TRUNCATE_FIELDS = new ColumnMetadata[][] { new ColumnMetadata[] { durability, execute_at, route, status }, new ColumnMetadata[] { durability, execute_at, result, route, status, writes }, }; @@ -315,10 +319,9 @@ private static class CommandsColumns for (ColumnMetadata[] cds : TRUNCATE_FIELDS) { for (int i = 1 ; i < cds.length ; ++i) - Invariants.checkState(cds[i - 1].compareTo(cds[i]) < 0); + checkState(cds[i - 1].compareTo(cds[i]) < 0); } } - } public static class CommandRows extends CommandsColumns @@ -338,29 +341,41 @@ public static TxnId getTxnId(ByteBuffer[] partitionKeyComponents) return deserializeTimestampOrNull(partitionKeyComponents[txn_id.position()], ByteBufferAccessor.instance, TxnId::fromBits); } + @Nullable public static Timestamp getExecuteAt(Row row) { Cell cell = row.getCell(execute_at); + if (cell == null) + return null; return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); } + @Nullable public static SaveStatus getStatus(Row row) { Cell cell = row.getCell(status); + if (cell == null) + return null; int ordinal = cell.accessor().getInt(cell.value(), 0); return CommandSerializers.saveStatus.forOrdinal(ordinal); } + @Nullable public static Status.Durability getDurability(Row row) { Cell cell = row.getCell(durability); + if (cell == null) + return null; int ordinal = cell.accessor().getInt(cell.value(), 0); return CommandSerializers.durability.forOrdinal(ordinal); } + @Nullable public static Route getRoute(Row row) { Cell cell = row.getCell(route); + if (cell == null) + return null; try { return deserializeOrNull(cell.buffer(), LocalVersionedSerializers.route); @@ -371,22 +386,94 @@ public static Route getRoute(Row row) } } - public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, boolean withOutcome) - { + private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, @Nullable Cell resultCell, Cell routeCell, @Nullable Cell writesCell, boolean updateTimestamps) + { + checkArgument(durabilityCell.column() == CommandsColumns.durability); + checkArgument(executeAtCell.column() == CommandsColumns.execute_at); + checkArgument(resultCell == null || resultCell.column() == CommandsColumns.result); + checkArgument(routeCell.column() == CommandsColumns.route); + checkArgument(writesCell == null || writesCell.column() == CommandsColumns.writes); + boolean includeOutcome = resultCell != null; + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[includeOutcome ? 1 : 0].length); + int colIndex = 0; + newLeaf[colIndex++] = updateTimestamps ? durabilityCell.withUpdatedTimestamp(newTimestamp) : durabilityCell; + newLeaf[colIndex++] = updateTimestamps ? executeAtCell.withUpdatedTimestamp(newTimestamp) : executeAtCell; + if (includeOutcome) + newLeaf[colIndex++] = updateTimestamps ? resultCell.withUpdatedTimestamp(newTimestamp) : resultCell; + newLeaf[colIndex++] = updateTimestamps ? routeCell.withUpdatedTimestamp(newTimestamp) : routeCell; + // Status always needs to use the new timestamp since we are replacing the existing value + // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion + newLeaf[colIndex++] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); + if (includeOutcome) + newLeaf[colIndex++] = updateTimestamps ? writesCell.withUpdatedTimestamp(newTimestamp) : writesCell; + return newLeaf; + } + + public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean withOutcome) + { + checkArgument(durabilityCell.column() == CommandsColumns.durability); + checkArgument(executeAtCell.column() == CommandsColumns.execute_at); + checkArgument(routeCell.column() == CommandsColumns.route); long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); long newTimestamp = oldTimestamp + 1; - - ColumnMetadata[] fields = TRUNCATE_FIELDS[withOutcome ? 1 : 0]; - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(fields.length); - for (int i = 0 ; i < fields.length ; ++i) - { - if (fields[i] == status) newLeaf[i] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); - else newLeaf[i] = row.getCell(fields[i]).withUpdatedTimestamp(newTimestamp); - } - + Cell resultCell = withOutcome ? row.getCell(CommandsColumns.result) : null; + Cell writesCell = withOutcome ? row.getCell(CommandsColumns.writes) : null; + checkState((resultCell != null) == (writesCell != null), "result and writes should always be set together"); + boolean doDeletion = true; + // If durability is not universal we don't want to delete older versions of the row that might have recorded + // a higher durability value. maybeDropTruncatedCommandColumns will take care of dropping things even if we don't drop via tombstones. + // durability should be the only column that could have an older value that is insufficient for propagating forward + if (durability != Durability.Universal) + doDeletion = false; + // We may not have what we need to generate a deletion and include the outcome in the truncated row + // so need to wait until we can have the outcome to issue the deletion otherwise it would be shadowed and lost + if (withOutcome && resultCell == null) + doDeletion = false; + + Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, resultCell, routeCell, writesCell, doDeletion); + + // Including a deletion allows future compactions to drop data before it gets to the purger + // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns + // regardless + Row.Deletion deletion = doDeletion ? new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false) : Deletion.LIVE; return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), - new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false), - newLeaf); + deletion, newLeaf); + } + + public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) + { + checkArgument(durabilityCell.column() == CommandsColumns.durability); + checkArgument(executeAtCell.column() == CommandsColumns.execute_at); + checkArgument(routeCell.column() == CommandsColumns.route); + checkArgument(statusCell.column() == CommandsColumns.status); + int colCount = row.columnCount(); + // If it's the exact length of the post truncate column count without outcome fields + // then it is exactly the columns needed for getting this far and withOutcome doesn't matter since + // nothing additional is available to include anyways + if (colCount == TRUNCATE_FIELDS[0].length) + return row; + + Cell resultCell = row.getCell(CommandsColumns.result); + Cell writesCell = row.getCell(CommandsColumns.writes); + checkState((resultCell != null) == (writesCell != null), "result and writes should always be set together"); + boolean includeOutcome = withOutcome && resultCell != null; + // This has just the columns needed for truncation with outcome so return it unmodified + if (colCount == TRUNCATE_FIELDS[1].length && includeOutcome) + return row; + + // Construct a replacement with just the available columns that are still needed + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[includeOutcome ? 1 : 0].length); + int colIndex = 0; + newLeaf[colIndex++] = durabilityCell; + newLeaf[colIndex++] = executeAtCell; + if (includeOutcome) + newLeaf[colIndex++] = resultCell; + newLeaf[colIndex++] = routeCell; + newLeaf[colIndex++] = statusCell; + if (includeOutcome) + newLeaf[colIndex++] = writesCell; + + return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), row.deletion(), newLeaf); } public static Result getResult(Row row) throws IOException @@ -419,7 +506,7 @@ public static Writes getWrites(Row row) throws IOException .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) .build(); - private static class CommandsForKeyColumns + public static class CommandsForKeyColumns { static final ClusteringComparator keyComparator = CommandsForKeys.partitionKeyAsClusteringComparator(); static final CompositeType partitionKeyType = (CompositeType) CommandsForKeys.partitionKeyType; @@ -428,10 +515,10 @@ private static class CommandsForKeyColumns static final ColumnMetadata key_token = getColumn(CommandsForKeys, "key_token"); static final ColumnMetadata key = getColumn(CommandsForKeys, "key"); static final ColumnMetadata timestamp = getColumn(CommandsForKeys, "timestamp"); - static final ColumnMetadata max_timestamp = getColumn(CommandsForKeys, "max_timestamp"); - static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKeys, "last_executed_timestamp"); - static final ColumnMetadata last_executed_micros = getColumn(CommandsForKeys, "last_executed_micros"); - static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKeys, "last_write_timestamp"); + public static final ColumnMetadata max_timestamp = getColumn(CommandsForKeys, "max_timestamp"); + public static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKeys, "last_executed_timestamp"); + public static final ColumnMetadata last_executed_micros = getColumn(CommandsForKeys, "last_executed_micros"); + public static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKeys, "last_write_timestamp"); static final ColumnMetadata data = getColumn(CommandsForKeys, "data"); @@ -486,6 +573,7 @@ public static int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } + @Nullable public static Timestamp getMaxTimestamp(Row row) { Cell cell = row.getCell(max_timestamp); @@ -494,6 +582,7 @@ public static Timestamp getMaxTimestamp(Row row) return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); } + @Nullable public static Timestamp getLastExecutedTimestamp(Row row) { Cell cell = row.getCell(last_executed_timestamp); @@ -515,11 +604,13 @@ public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) return deserializeKey(partitionKeyComponents[key.position()]); } + @Nullable public static Timestamp getTimestamp(Row row) { return deserializeTimestampOrNull(row.clustering().bufferAt(CommandsForKeyColumns.timestamp.position()), Timestamp::fromBits); } + @Nullable public static Timestamp getLastWriteTimestamp(Row row) { Cell cell = row.getCell(last_write_timestamp); @@ -528,21 +619,41 @@ public static Timestamp getLastWriteTimestamp(Row row) return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); } - public static Row truncateStaticRow(long nowInSec, Row row, long last_execute_micros, Timestamp last_execute, Timestamp last_write, Timestamp max_timestamp) + public static Row truncateStaticRow(long nowInSec, Row row, Cell lastExecuteMicrosCell, Cell lastExecuteCell, Cell lastWriteCell, Cell maxTimestampCell) { - long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); - long newTimestamp = oldTimestamp + 1; + checkArgument(lastExecuteMicrosCell == null || lastExecuteMicrosCell.column() == CommandsForKeyColumns.last_executed_micros); + checkArgument(lastExecuteCell == null || lastExecuteCell.column() == CommandsForKeyColumns.last_executed_timestamp); + checkArgument(lastWriteCell == null || lastWriteCell.column() == CommandsForKeyColumns.last_write_timestamp); + checkArgument(maxTimestampCell == null || maxTimestampCell.column() == CommandsForKeyColumns.max_timestamp); + + long timestamp = row.primaryKeyLivenessInfo().timestamp(); + + int colCount = 0; + if (lastExecuteMicrosCell != null) + colCount++; + if (lastExecuteCell != null) + colCount++; + if (lastWriteCell != null) + colCount++; + if (maxTimestampCell != null) + colCount++; ColumnMetadata[] fields = CommandsForKeyColumns.static_columns_metadata; - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(fields.length); - newLeaf[0] = BufferCell.live(fields[0], newTimestamp, ByteBufferAccessor.instance.valueOf(last_execute_micros)); - newLeaf[1] = BufferCell.live(fields[1], newTimestamp, serializeTimestamp(last_execute)); - newLeaf[2] = BufferCell.live(fields[2], newTimestamp, serializeTimestamp(last_write)); - newLeaf[3] = BufferCell.live(fields[3], newTimestamp, serializeTimestamp(max_timestamp)); + checkState(fields.length >= colCount, "CommandsForKeyColumns.static_columns_metadata should include all the columns"); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(colCount); + int colIndex = 0; - return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), - new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false), - newLeaf); + if (lastExecuteMicrosCell != null) + newLeaf[colIndex++] = lastExecuteMicrosCell; + if (lastExecuteCell != null) + newLeaf[colIndex++] = lastExecuteCell; + if (lastWriteCell != null) + newLeaf[colIndex++] = lastWriteCell; + if (maxTimestampCell != null) + newLeaf[colIndex++] = maxTimestampCell; + + return BTreeRow.create(row.clustering(), LivenessInfo.create(timestamp, nowInSec), + Deletion.LIVE, newLeaf); } } @@ -791,6 +902,7 @@ public static Mutation getCommandMutation(int storeId, Command original, Command Row.Builder builder = BTreeRow.unsortedBuilder(); builder.newRow(Clustering.EMPTY); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(timestampMicros, nowInSeconds)); addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); @@ -864,6 +976,7 @@ public interface TimestampFactory T create(long msb, long lsb, Node.Id node); } + @Nullable public static T deserializeTimestampOrNull(ByteBuffer bytes, TimestampFactory factory) { if (bytes == null || ByteBufferAccessor.instance.isEmpty(bytes)) @@ -872,6 +985,18 @@ public static T deserializeTimestampOrNull(ByteBuffer byte return factory.create(split.get(0).getLong(), split.get(1).getLong(), new Node.Id(split.get(2).getInt())); } + public static Timestamp deserializeTimestampOrNull(Cell cell) + { + if (cell == null) + return null; + ValueAccessor accessor = cell.accessor(); + V value = cell.value(); + if (accessor.isEmpty(value)) + return null; + List split = TIMESTAMP_TYPE.unpack(value, accessor); + return Timestamp.fromBits(accessor.getLong(split.get(0), 0), accessor.getLong(split.get(1), 0), new Node.Id(accessor.getInt(split.get(2), 0))); + } + public static T deserializeTimestampOrNull(V value, ValueAccessor accessor, TimestampFactory factory) { if (value == null || accessor.isEmpty(value)) @@ -902,6 +1027,30 @@ private static T deserializeTimestampOrDefault(Row row, Co return result; } + public static Durability deserializeDurabilityOrNull(Cell cell) + { + return cell == null ? null : CommandSerializers.durability.forOrdinal(cell.accessor().getInt(cell.value(), 0)); + } + + public static SaveStatus deserializeSaveStatusOrNull(Cell cell) + { + return cell == null ? null : CommandSerializers.saveStatus.forOrdinal(cell.accessor().getInt(cell.value(), 0)); + } + + public static Route deserializeRouteOrNull(Cell cell) + { + if (cell == null) + return null; + try + { + return deserializeOrNull(cell.buffer(), LocalVersionedSerializers.route); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + private static T deserializeWithVersionOr(UntypedResultSet.Row row, String dataColumn, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException { if (!row.has(dataColumn)) @@ -1149,7 +1298,7 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) try { UntypedResultSet.Row row = rows.one(); - Invariants.checkState(deserializeTxnId(row).equals(txnId)); + checkState(deserializeTxnId(row).equals(txnId)); SaveStatus status = deserializeStatus(row); CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); // TODO: something less brittle than ordinal, more efficient than values() @@ -1250,12 +1399,13 @@ private static void addSeriesMutations(ImmutableSortedMap SeriesKind kind, PartitionUpdate.Builder partitionBuilder, Row.Builder rowBuilder, - long timestampMicros, + LivenessInfo livenessInfo, int nowInSeconds) { if (prev == value) return; + long timestampMicros = livenessInfo.timestamp(); Set deletions = Sets.difference(prev.keySet(), value.keySet()); Row.Deletion deletion = !deletions.isEmpty() ? @@ -1267,6 +1417,7 @@ private static void addSeriesMutations(ImmutableSortedMap return; rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); rowBuilder.addCell(live(CommandsForKeyColumns.data, timestampMicros, bytes)); + rowBuilder.addPrimaryKeyLivenessInfo(livenessInfo); partitionBuilder.add(rowBuilder.build()); }); deletions.forEach(timestamp -> { @@ -1281,10 +1432,10 @@ private static void addSeriesMutations(CommandsForKey original, SeriesKind kind, PartitionUpdate.Builder partitionBuilder, Row.Builder rowBuilder, - long timestampMicros, + LivenessInfo livenessInfo, int nowInSeconds) { - addSeriesMutations(kind.getValues(original), kind.getValues(cfk), kind, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + addSeriesMutations(kind.getValues(original), kind.getValues(cfk), kind, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); } private static DecoratedKey makeKey(int storeId, PartitionKey key) @@ -1315,6 +1466,7 @@ public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey ori ValueAccessor accessor = ByteBufferAccessor.instance; int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); boolean hasStaticChanges = CommandsForKeyColumns.hasStaticChanges(original, cfk); int expectedRows = (hasStaticChanges ? 1 : 0) @@ -1335,13 +1487,14 @@ public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey ori addCellIfModified(CommandsForKeyColumns.last_executed_timestamp, CommandsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); addCellIfModified(CommandsForKeyColumns.last_executed_micros, CommandsForKey::rawLastExecutedHlc, accessor::valueOf, rowBuilder, timestampMicros, nowInSeconds, original, cfk); addCellIfModified(CommandsForKeyColumns.last_write_timestamp, CommandsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); + rowBuilder.addPrimaryKeyLivenessInfo(livenessInfo); Row row = rowBuilder.build(); if (!row.isEmpty()) partitionBuilder.add(row); } - addSeriesMutations(original, cfk, SeriesKind.BY_ID, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); - addSeriesMutations(original, cfk, SeriesKind.BY_EXECUTE_AT, partitionBuilder, rowBuilder, timestampMicros, nowInSeconds); + addSeriesMutations(original, cfk, SeriesKind.BY_ID, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); + addSeriesMutations(original, cfk, SeriesKind.BY_EXECUTE_AT, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); PartitionUpdate update = partitionBuilder.build(); if (update.isEmpty()) @@ -1431,7 +1584,7 @@ public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, seriesMaps.get(SeriesKind.values()[ordinal]).put(timestamp, data); } } - Invariants.checkState(!partitions.hasNext()); + checkState(!partitions.hasNext()); return CommandsForKey.SerializerSupport.create(key, max, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp, CommandsForKeySerializer.loader, @@ -1657,7 +1810,7 @@ public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws I String cql = format("SELECT * FROM %s.%s WHERE epoch=?", ACCORD_KEYSPACE_NAME, TOPOLOGIES); UntypedResultSet result = executeInternal(cql, epoch); - Invariants.checkState(!result.isEmpty(), "Nothing found for epoch %d", epoch); + checkState(!result.isEmpty(), "Nothing found for epoch %d", epoch); UntypedResultSet.Row row = result.one(); Topology topology = row.has("topology") ? deserialize(row.getBytes("topology"), LocalVersionedSerializers.topology) diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 60203f5e25eb..22b0eb9999be 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -55,12 +55,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; - -import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.gms.ApplicationState; -import org.apache.cassandra.gms.Gossiper; -import org.apache.cassandra.gms.VersionedValue; -import org.apache.cassandra.io.util.File; import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.slf4j.Logger; @@ -91,12 +85,6 @@ import org.apache.cassandra.db.compaction.CompactionTasks; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.ReplicaCollection; -import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.Int32Type; @@ -123,6 +111,10 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.internal.CassandraIndex; +import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; +import org.apache.cassandra.gms.ApplicationState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.gms.VersionedValue; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.sstable.SSTableLoader; @@ -131,8 +123,15 @@ import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.SSTableReaderWithFilter; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaCollection; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.pager.PagingState; @@ -735,6 +734,20 @@ public static void spinAssert(String message, Matcher matcher, Supplier assertThat(message, actualSupplier.get(), matcher)); } + public static void spinAssertEquals(Object expected, int timeoutInSeconds, Callable call) + { + spinAssertEquals(null, expected, timeoutInSeconds, TimeUnit.SECONDS, call); + } + + public static void spinAssertEquals(String message, T expected, long timeout, TimeUnit timeUnit, Callable call) + { + Awaitility.await() + .pollInterval(Duration.ofMillis(100)) + .pollDelay(0, TimeUnit.MILLISECONDS) + .atMost(timeout, timeUnit) + .untilAsserted(() -> assertThat(message, call.call(), equalTo(expected))); + } + public static void joinThread(Thread thread) throws InterruptedException { thread.join(10000); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index e09a183b15e2..c51e161d4d8c 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -24,24 +24,27 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Random; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; import java.util.stream.Collectors; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Result; -import accord.api.RoutingKey; import accord.local.CheckedCommands; import accord.local.CommandStore; import accord.local.DurableBefore; import accord.local.RedundantBefore; import accord.local.SaveStatus; +import accord.local.Status; import accord.local.Status.Durability; import accord.primitives.Ballot; import accord.primitives.Deps; @@ -51,7 +54,6 @@ import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Seekable; -import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; import accord.primitives.TxnId; @@ -64,18 +66,22 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ColumnFamilyStore.FlushReason; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.compaction.CompactionIteratorTest.Scanner; import org.apache.cassandra.db.partitions.ImmutableBTreePartition; import org.apache.cassandra.db.partitions.Partition; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.IAccordService; @@ -86,6 +92,7 @@ import static accord.impl.CommandsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.PreLoadContext.contextFor; import static accord.utils.async.AsyncChains.getUninterruptibly; +import static org.apache.cassandra.Util.spinAssertEquals; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.MAJORITY; import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.NOT_DURABLE; @@ -95,6 +102,7 @@ import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS_FOR_KEY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -104,6 +112,7 @@ public class CompactionAccordIteratorsTest { private static final Logger logger = LoggerFactory.getLogger(CompactionAccordIteratorsTest.class); + private static final long CLOCK_START = 44; private static final long HLC_START = 41; private static final int NODE = 1; @@ -117,8 +126,17 @@ public class CompactionAccordIteratorsTest private static final TxnId[] TXN_IDS = new TxnId[] {TXN_ID, SECOND_TXN_ID}; private static final TxnId GT_SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, SECOND_TXN_ID.hlc() + 1, NODE); + static ColumnFamilyStore commands; + static ColumnFamilyStore commandsForKey; static TableMetadata table; static FullRoute route; + Random random; + + /* + * Whether to compact all tables at once in a single merge or forcing two random tables + * to merge at a time + */ + private boolean singleCompaction; @BeforeClass public static void beforeClass() throws Throwable @@ -128,15 +146,40 @@ public static void beforeClass() throws Throwable SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); StorageService.instance.initServer(); + commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS); + commands.disableAutoCompaction(); + commandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); + commandsForKey.disableAutoCompaction(); table = ColumnFamilyStore.getIfExists("ks", "tbl").metadata(); route = AccordTestUtils.keys(table, 42).toRoute(AccordTestUtils.key(table, 42).toUnseekable()); } + @Before + public void setUp() + { + // This attempt at determinism doesn't work because the order of the SSTableScanners is not determinisitc + long seed = System.nanoTime(); + logger.info("Seed " + seed + "L"); + random = new Random(seed); + } + // This isn't attempting to be an exhaustive test of Commands.shouldCleanup just that the return values // are handled correctly and that the interaction between the CompactionIterator and shoudCleanup seems reasonable @Test - public void testAccordCommandsPurger() throws Throwable + public void testAccordCommandsPurgerSingleCompaction() throws Throwable { + testAccordCommandsPurger(true); + } + + @Test + public void testAccordCommandsPurgerMultipleCompactions() throws Throwable + { + testAccordCommandsPurger(false); + } + + private void testAccordCommandsPurger(boolean singleCompaction) throws Throwable + { + this.singleCompaction = singleCompaction; // Null redudnant before should make no change since we have no information on this CommandStore testAccordCommandsPurger(null, DurableBefore.EMPTY, expectAccordCommandsNoChange()); // Universally durable (and global to boot) should be erased since literally everyone knows about it @@ -156,7 +199,7 @@ public void testAccordCommandsPurger() throws Throwable testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(DurableBeforeType.EMPTY), expectAccordCommandsNoChange()); } - private static void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBefore durableBefore, Consumer> expectedResult) throws Throwable + private void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBefore durableBefore, Consumer> expectedResult) throws Throwable { testWithCommandStore((commandStore) -> { IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, durableBefore); @@ -167,8 +210,20 @@ private static void testAccordCommandsPurger(RedundantBefore redundantBefore, Du } @Test - public void testAccordCommandsForKeyPurger() throws Throwable + public void testAccordCommandsForKeyPurgerSingleCompaction() throws Throwable { + testAccordCommandsForKeyPurger(true); + } + + @Test + public void testAccordCommandsForKeyPurgerMultipleCompactions() throws Throwable + { + testAccordCommandsForKeyPurger(false); + } + + private void testAccordCommandsForKeyPurger(boolean singleCompaction) throws Throwable + { + this.singleCompaction = singleCompaction; testAccordCommandsForKeyPurger(null, expectedAccordCommandsForKeyNoChange()); testAccordCommandsForKeyPurger(redundantBefore(LT_TXN_ID), expectedAccordCommandsForKeyNoChange()); testAccordCommandsForKeyPurger(redundantBefore(TXN_ID), expectedAccordCommandsForKeyNoChange()); @@ -189,7 +244,7 @@ private static Consumer> expectedAccordCommandsForKeyNoChange() assertEquals(TXN_ID.hlc(), CommandsForKeyRows.getLastExecutedMicros(staticRow)); assertEquals(4, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); - // One row per series + // One row per txn per series for (int i = 0; i < 2; i++) for (TxnId txnId : TXN_IDS) assertEquals(txnId, CommandsForKeyRows.getTimestamp((Row)rows.next())); @@ -202,10 +257,11 @@ private static Consumer> expectedAccordCommandsForKeyEraseOne() assertEquals(1, partitions.size()); Partition partition = partitions.get(0); Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); - assertEquals(4, Iterables.size(staticRow)); + // Only expect one column to remain because the second transaction is a read + assertEquals(1, Iterables.size(staticRow)); assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); - assertEquals(Timestamp.NONE, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); - assertEquals(Timestamp.NONE, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); + assertNull(CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); + assertNull(CommandsForKeyRows.getLastWriteTimestamp(staticRow)); assertEquals(NO_LAST_EXECUTED_HLC, CommandsForKeyRows.getLastExecutedMicros(staticRow)); assertEquals(2, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); @@ -219,7 +275,7 @@ private static Consumer> expectedAccordCommandsForKeyEraseAll() return partitions -> assertEquals(0, partitions.size()); } - private static void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Consumer> expectedResult) throws Throwable + private void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Consumer> expectedResult) throws Throwable { testWithCommandStore((commandStore) -> { IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, DurableBefore.EMPTY); @@ -244,7 +300,9 @@ Consumer> expectAccordCommandsTruncatedWithOutcome() assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); Row row = (Row) partition.unfilteredIterator().next(); - assertEquals(6, row.columnCount()); + assertEquals(CommandsColumns.TRUNCATE_FIELDS[1].length, row.columnCount()); + for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS[1]) + assertNotNull(row.getColumnData(cm)); assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); assertEquals(1, ((TxnData)CommandRows.getResult(row)).entrySet().size()); assertNotNull(CommandRows.getWrites(row)); @@ -268,7 +326,9 @@ Consumer> expectAccordCommandsTruncated() assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); Row row = (Row)partition.unfilteredIterator().next(); - assertEquals(4, row.columnCount()); + assertEquals(CommandsColumns.TRUNCATE_FIELDS[0].length, row.columnCount()); + for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS[0]) + assertNotNull(row.getColumnData(cm)); assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); assertEquals(Durability.Local, CommandRows.getDurability(row)); assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); @@ -285,6 +345,9 @@ Consumer> expectAccordCommandsNoChange() assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); Row row = (Row)partition.unfilteredIterator().next(); + assertEquals(commands.metadata().regularColumns().size(), row.columnCount()); + for (ColumnMetadata cm : commands.metadata().regularColumns()) + assertNotNull(row.getColumnData(cm)); assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); assertEquals(SaveStatus.Applied, AccordKeyspace.CommandRows.getStatus(row)); }; @@ -338,7 +401,21 @@ interface TestWithCommandStore void test(AccordCommandStore commandStore) throws Throwable; } - private static void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable + + private static void flush(AccordCommandStore commandStore) + { + commandStore.executeBlocking(() -> { + // clear cache and wait for post-eviction writes to complete + long cacheSize = commandStore.getCacheSize(); + commandStore.setCacheSize(0); + commandStore.setCacheSize(cacheSize); + commandStore.cache().awaitSaveResults(); + }); + commands.forceBlockingFlush(FlushReason.UNIT_TESTS); + commandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); + } + + private void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable { Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores().forEach(ColumnFamilyStore::truncateBlocking); clock.set(CLOCK_START); @@ -350,57 +427,93 @@ private static void testWithCommandStore(TestWithCommandStore test, boolean addi Seekable key = txn.keys().get(0); PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); - RoutingKey homeKey = key.someIntersectingRoutingKey(commandStore.unsafeRangesForEpoch().currentRanges()); PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); - getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> { + long originalCacheSize = getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> { + // clear cache + long cacheSize = commandStore.getCacheSize(); + commandStore.setCacheSize(0); CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); + return cacheSize; + }).beginAsResult()); + flush(commandStore); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps); + }).beginAsResult()); + flush(commandStore); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { CheckedCommands.commit(safe, txnId, route, null, partialTxn, txnId, partialDeps); + }).beginAsResult()); + flush(commandStore); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right); - return safe.get(txnId, homeKey).current(); + }).beginAsResult()); + flush(commandStore); + // The apply chain is asychronous so it is easiest to just spin until it is applied + // in order to have the updated state in the system table + spinAssertEquals(true, 5, () -> + getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> safe.get(txnId, route.homeKey()).current().hasBeen(Status.Applied) + ).beginAsResult())); + flush(commandStore); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + commandStore.setCacheSize(originalCacheSize); }).beginAsResult()); } - commandStore.executeBlocking(() -> { - // clear cache and wait for post-eviction writes to complete - long cacheSize = commandStore.getCacheSize(); - commandStore.setCacheSize(0); - commandStore.setCacheSize(cacheSize); - commandStore.cache().awaitSaveResults(); - }); - UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS + ";"); + logger.info(commandsTable.toStringUnsafe()); assertEquals(txnIds.length, commandsTable.size()); Iterator commandsTableIterator = commandsTable.iterator(); for (TxnId txnId : txnIds) assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); + logger.info(commandsForKeyTable.toStringUnsafe()); assertEquals(txnIds.length * 2, commandsForKeyTable.size()); Iterator commandsForKeyTableIterator = commandsTable.iterator(); for (TxnId txnId : txnIds) assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsForKeyTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); - System.out.println(commandsForKeyTable); test.test(commandStore); } - private static List compactCFS(IAccordService mockAccordService, ColumnFamilyStore cfs) + private List compactCFS(IAccordService mockAccordService, ColumnFamilyStore cfs) { - cfs.forceBlockingFlush(FlushReason.UNIT_TESTS); List scanners = cfs.getLiveSSTables().stream().map(SSTableReader::getScanner).collect(Collectors.toList()); - List result = new ArrayList<>(); - try (CompactionController controller = new CompactionController(ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, cfs.name), Collections.emptySet(), 0); - CompactionIterator compactionIterator = new CompactionIterator(OperationType.COMPACTION, scanners, controller, FBUtilities.nowInSeconds(), null, ActiveCompactionsTracker.NOOP, null, () -> mockAccordService)) + int numScanners = scanners.size(); + List result = null; + do { - while (compactionIterator.hasNext()) + List outputPartitions = new ArrayList<>(); + List nextInputScanners = new ArrayList<>(); + if (singleCompaction) + { + nextInputScanners = ImmutableList.copyOf(scanners); + scanners.clear(); + } + else + { + // Process the rows only two sstables at a time to force compacting random slices of command state + nextInputScanners.add(scanners.remove(random.nextInt(scanners.size()))); + nextInputScanners.add(scanners.remove(random.nextInt(scanners.size()))); + } + try (CompactionController controller = new CompactionController(ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, cfs.name), Collections.emptySet(), 0); + CompactionIterator compactionIterator = new CompactionIterator(OperationType.COMPACTION, nextInputScanners, controller, FBUtilities.nowInSeconds(), null, ActiveCompactionsTracker.NOOP, null, () -> mockAccordService)) { - try (UnfilteredRowIterator partition = compactionIterator.next()) + while (compactionIterator.hasNext()) { - result.add(ImmutableBTreePartition.create(partition)); + try (UnfilteredRowIterator partition = compactionIterator.next()) + { + outputPartitions.add(ImmutableBTreePartition.create(partition)); + } } } - } - verify(mockAccordService, times(1)).getRedundantBeforesAndDurableBefore(); + + if (scanners.isEmpty()) + result = outputPartitions; + else + scanners.add(random.nextInt(scanners.size()), new Scanner(cfs.metadata(), outputPartitions.stream().map(Partition::unfilteredIterator).collect(Collectors.toList()))); + } while (!scanners.isEmpty()); + + verify(mockAccordService, times(singleCompaction ? 1 : numScanners - 1)).getRedundantBeforesAndDurableBefore(); return result; } } diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionIteratorTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionIteratorTest.java index 076ef9876f87..d09c9551730c 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionIteratorTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionIteratorTest.java @@ -17,18 +17,25 @@ */ package org.apache.cassandra.db.compaction; -import static org.apache.cassandra.config.CassandraRelevantProperties.DIAGNOSTIC_SNAPSHOT_INTERVAL_NANOS; -import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.assertCommandIssued; -import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.makeRow; -import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.partition; -import static org.junit.Assert.*; - -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Random; +import java.util.Set; +import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.collect.*; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import org.junit.BeforeClass; import org.junit.Test; @@ -44,17 +51,30 @@ import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.partitions.AbstractUnfilteredPartitionIterator; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.RangeTombstoneBoundaryMarker; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowsGenerator; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import static org.apache.cassandra.config.CassandraRelevantProperties.DIAGNOSTIC_SNAPSHOT_INTERVAL_NANOS; +import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.assertCommandIssued; +import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.makeRow; +import static org.apache.cassandra.db.transform.DuplicateRowCheckerTest.partition; + public class CompactionIteratorTest extends CQLTester { @@ -274,7 +294,7 @@ private List compact(Iterable> sources, Iterable
  • listToIterator(list, kk))); try (CompactionController controller = new Controller(Keyspace.openAndGetStore(metadata), transformedSources, GC_BEFORE); CompactionIterator iter = new CompactionIterator(OperationType.COMPACTION, - Lists.transform(content, x -> new Scanner(x)), + Lists.transform(content, x -> new Scanner(metadata, x)), controller, NOW, null)) { List result = new ArrayList<>(); @@ -336,7 +356,7 @@ public void transformTest() transformedSources.put(kk, Iterables.transform(tombstoneLists, list -> listToIterator(list, kk))); try (CompactionController controller = new Controller(Keyspace.openAndGetStore(metadata), transformedSources, GC_BEFORE); CompactionIterator iter = new CompactionIterator(OperationType.COMPACTION, - Lists.transform(content, x -> new Scanner(x)), + Lists.transform(content, x -> new Scanner(metadata, x)), controller, NOW, null)) { assertTrue(iter.hasNext()); @@ -369,7 +389,7 @@ public void transformPartitionTest() transformedSources.put(kk, Iterables.transform(tombstoneLists, list -> listToIterator(list, kk))); try (CompactionController controller = new Controller(Keyspace.openAndGetStore(metadata), transformedSources, GC_BEFORE); CompactionIterator iter = new CompactionIterator(OperationType.COMPACTION, - Lists.transform(content, x -> new Scanner(x)), + Lists.transform(content, x -> new Scanner(metadata, x)), controller, NOW, null)) { iter.stop(); @@ -404,12 +424,14 @@ public Iterable shadowSources(DecoratedKey key, boolean t } } - class Scanner extends AbstractUnfilteredPartitionIterator implements ISSTableScanner + static class Scanner extends AbstractUnfilteredPartitionIterator implements ISSTableScanner { Iterator iter; + TableMetadata metadata; - Scanner(Iterable content) + Scanner(TableMetadata metadata, Iterable content) { + this.metadata = metadata; iter = content.iterator(); } @@ -500,7 +522,7 @@ private void iterate(Unfiltered...unfiltereds) DecoratedKey key = cfs.getPartitioner().decorateKey(ByteBufferUtil.bytes("key")); try (CompactionController controller = new CompactionController(cfs, Integer.MAX_VALUE); UnfilteredRowIterator rows = partition(cfs.metadata(), key, false, unfiltereds); - ISSTableScanner scanner = new Scanner(Collections.singletonList(rows)); + ISSTableScanner scanner = new Scanner(cfs.metadata(), Collections.singletonList(rows)); CompactionIterator iter = new CompactionIterator(OperationType.COMPACTION, Collections.singletonList(scanner), controller, FBUtilities.nowInSeconds(), null)) diff --git a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceComparisonTest.java b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceComparisonTest.java index 496f6355c1d3..e1f11ac15a0b 100644 --- a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceComparisonTest.java +++ b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceComparisonTest.java @@ -494,7 +494,7 @@ public void testTupleTypeNonFull() tt.pack(decomposeAndRandomPad(UTF8Type.instance, ""), decomposeAndRandomPad(Int32Type.instance, 0)), // Note: a decomposed null (e.g. decomposeAndRandomPad(Int32Type.instance, null)) should not reach a tuple tt.pack(decomposeAndRandomPad(UTF8Type.instance, ""), null), - tt.pack(null, decomposeAndRandomPad(Int32Type.instance, 0)), + tt.pack((ByteBuffer) null, decomposeAndRandomPad(Int32Type.instance, 0)), tt.pack(decomposeAndRandomPad(UTF8Type.instance, "")), tt.pack((ByteBuffer) null), tt.pack() diff --git a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java index 7ab30adfa792..a6f34acadfb3 100644 --- a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java +++ b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java @@ -442,7 +442,7 @@ public void testTupleTypeNonFull() tt.pack(decomposeAndRandomPad(UTF8Type.instance, ""), decomposeAndRandomPad(Int32Type.instance, 0)), // Note: a decomposed null (e.g. decomposeAndRandomPad(Int32Type.instance, null)) should not reach a tuple tt.pack(decomposeAndRandomPad(UTF8Type.instance, ""), null), - tt.pack(null, decomposeAndRandomPad(Int32Type.instance, 0)), + tt.pack((ByteBuffer) null, decomposeAndRandomPad(Int32Type.instance, 0)), tt.pack(decomposeAndRandomPad(UTF8Type.instance, "")), tt.pack((ByteBuffer) null), tt.pack() From cfc63edcb483af43291ffa2c120d4b8a287bb872 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 14 Sep 2023 13:54:28 -0700 Subject: [PATCH 069/340] CEP-15: (C*) accord.messages.BeginRecovery.RecoverNack#supersededBy is nullable but C* serializer doesn't expect null --- .../service/accord/serializers/CommandSerializers.java | 1 + .../service/accord/serializers/RecoverySerializers.java | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 497fa11a2937..d25d5cffa290 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -57,6 +57,7 @@ private CommandSerializers() {} public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); public static final IVersionedSerializer nullableTimestamp = NullableSerializer.wrap(timestamp); public static final TimestampSerializer ballot = new TimestampSerializer<>(Ballot::fromBits); + public static final IVersionedSerializer nullableBallot = NullableSerializer.wrap(ballot); public static final EnumSerializer kind = new EnumSerializer<>(Txn.Kind.class); public static class TimestampSerializer implements IVersionedSerializer diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index b4111320d074..5d3a5a93daf9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -81,7 +81,7 @@ public long serializedBodySize(BeginRecovery recover, int version) { void serializeNack(RecoverNack recoverNack, DataOutputPlus out, int version) throws IOException { - CommandSerializers.ballot.serialize(recoverNack.supersededBy, out, version); + CommandSerializers.nullableBallot.serialize(recoverNack.supersededBy, out, version); } void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IOException @@ -124,7 +124,7 @@ public final RecoverReply deserialize(DataInputPlus in, int version) throws IOEx { boolean isOk = in.readBoolean(); if (!isOk) - return deserializeNack(CommandSerializers.ballot.deserialize(in, version), in, version); + return deserializeNack(CommandSerializers.nullableBallot.deserialize(in, version), in, version); return deserializeOk(CommandSerializers.txnId.deserialize(in, version), CommandSerializers.status.deserialize(in, version), @@ -143,7 +143,7 @@ public final RecoverReply deserialize(DataInputPlus in, int version) throws IOEx long serializedNackSize(RecoverNack recoverNack, int version) { - return CommandSerializers.ballot.serializedSize(recoverNack.supersededBy, version); + return CommandSerializers.nullableBallot.serializedSize(recoverNack.supersededBy, version); } long serializedOkSize(RecoverOk recoverOk, int version) From 08428a27508fd5c2150eef7aad364882c75a2b5c Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Wed, 20 Sep 2023 11:25:51 -0500 Subject: [PATCH 070/340] - Avoid persisting fragments that do not require completion as Updates in TxnWrite, as they can simply be pulled from PartialTxn when needed in Write#apply() - Avoid serializing full TxnData instances to Accord state tables patch by Caleb Rackliffe; reviewed by David Capwell, Benedict Elliot Smith, and Ariel Weisberg for CASSANDRA-18355 --- modules/accord | 2 +- .../service/accord/AccordKeyspace.java | 62 +++++++------------ .../service/accord/AccordObjectSizes.java | 10 +-- .../accord/serializers/ApplySerializers.java | 8 +-- .../serializers/CheckStatusSerializers.java | 12 ++-- .../serializers/RecoverySerializers.java | 24 ++++--- .../service/accord/txn/TxnUpdate.java | 21 ++++++- .../service/accord/txn/TxnWrite.java | 54 ++++++++++++---- .../test/accord/AccordCQLTest.java | 26 ++++++++ .../test/accord/AccordIntegrationTest.java | 1 - .../CompactionAccordIteratorsTest.java | 4 +- .../accord/AccordCommandStoreTest.java | 2 +- .../accord/AccordSyncPropagatorTest.java | 6 +- 13 files changed, 144 insertions(+), 88 deletions(-) diff --git a/modules/accord b/modules/accord index 1d6028ca2055..df492dfd2ffe 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 1d6028ca20553d1c1a6fe2809b204254955da3b3 +Subproject commit df492dfd2ffe993c33761d0531ac5b979b80f080 diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index bfaf76459220..837ad19c0abe 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -154,7 +154,6 @@ import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; -import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.btree.BTree; @@ -251,7 +250,6 @@ static TokenType valueOf(Token token) + format("accepted_ballot %s,", TIMESTAMP_TUPLE) + "dependencies blob," + "writes blob," - + "result blob," + "waiting_on blob," + "listeners set, " + "PRIMARY KEY((store_id, domain, txn_id))" @@ -267,7 +265,6 @@ private static class LocalVersionedSerializers static final LocalVersionedSerializer partialTxn = localSerializer(CommandSerializers.partialTxn); static final LocalVersionedSerializer partialDeps = localSerializer(DepsSerializer.partialDeps); static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); - static final LocalVersionedSerializer result = localSerializer(TxnData.serializer); static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); @@ -305,13 +302,12 @@ public static class CommandsColumns static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); static final ColumnMetadata dependencies = getColumn(Commands, "dependencies"); static final ColumnMetadata writes = getColumn(Commands, "writes"); - static final ColumnMetadata result = getColumn(Commands, "result"); static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); public static ColumnMetadata[][] TRUNCATE_FIELDS = new ColumnMetadata[][] { new ColumnMetadata[] { durability, execute_at, route, status }, - new ColumnMetadata[] { durability, execute_at, result, route, status, writes }, + new ColumnMetadata[] { durability, execute_at, route, status, writes }, }; static @@ -386,61 +382,56 @@ public static Route getRoute(Row row) } } - private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, @Nullable Cell resultCell, Cell routeCell, @Nullable Cell writesCell, boolean updateTimestamps) + private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, Cell routeCell, @Nullable Cell writesCell, boolean updateTimestamps) { checkArgument(durabilityCell.column() == CommandsColumns.durability); checkArgument(executeAtCell.column() == CommandsColumns.execute_at); - checkArgument(resultCell == null || resultCell.column() == CommandsColumns.result); checkArgument(routeCell.column() == CommandsColumns.route); checkArgument(writesCell == null || writesCell.column() == CommandsColumns.writes); - boolean includeOutcome = resultCell != null; + boolean includeOutcome = writesCell != null; Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[includeOutcome ? 1 : 0].length); int colIndex = 0; newLeaf[colIndex++] = updateTimestamps ? durabilityCell.withUpdatedTimestamp(newTimestamp) : durabilityCell; newLeaf[colIndex++] = updateTimestamps ? executeAtCell.withUpdatedTimestamp(newTimestamp) : executeAtCell; - if (includeOutcome) - newLeaf[colIndex++] = updateTimestamps ? resultCell.withUpdatedTimestamp(newTimestamp) : resultCell; newLeaf[colIndex++] = updateTimestamps ? routeCell.withUpdatedTimestamp(newTimestamp) : routeCell; // Status always needs to use the new timestamp since we are replacing the existing value // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion newLeaf[colIndex++] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); if (includeOutcome) + //noinspection UnusedAssignment newLeaf[colIndex++] = updateTimestamps ? writesCell.withUpdatedTimestamp(newTimestamp) : writesCell; return newLeaf; } - public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean withOutcome) + public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean withOutcome) { checkArgument(durabilityCell.column() == CommandsColumns.durability); checkArgument(executeAtCell.column() == CommandsColumns.execute_at); checkArgument(routeCell.column() == CommandsColumns.route); long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); long newTimestamp = oldTimestamp + 1; - Cell resultCell = withOutcome ? row.getCell(CommandsColumns.result) : null; - Cell writesCell = withOutcome ? row.getCell(CommandsColumns.writes) : null; - checkState((resultCell != null) == (writesCell != null), "result and writes should always be set together"); - boolean doDeletion = true; + Cell writesCell = withOutcome ? row.getCell(CommandsColumns.writes) : null; + // If durability is not universal we don't want to delete older versions of the row that might have recorded // a higher durability value. maybeDropTruncatedCommandColumns will take care of dropping things even if we don't drop via tombstones. // durability should be the only column that could have an older value that is insufficient for propagating forward - if (durability != Durability.Universal) - doDeletion = false; + boolean doDeletion = durability == Durability.Universal; + // We may not have what we need to generate a deletion and include the outcome in the truncated row // so need to wait until we can have the outcome to issue the deletion otherwise it would be shadowed and lost - if (withOutcome && resultCell == null) + if (withOutcome && writesCell == null) doDeletion = false; - Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, resultCell, routeCell, writesCell, doDeletion); + Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, routeCell, writesCell, doDeletion); // Including a deletion allows future compactions to drop data before it gets to the purger // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns // regardless Row.Deletion deletion = doDeletion ? new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false) : Deletion.LIVE; - return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), - deletion, newLeaf); + return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), deletion, newLeaf); } - public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) + public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) { checkArgument(durabilityCell.column() == CommandsColumns.durability); checkArgument(executeAtCell.column() == CommandsColumns.execute_at); @@ -449,38 +440,29 @@ public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, int colCount = row.columnCount(); // If it's the exact length of the post truncate column count without outcome fields // then it is exactly the columns needed for getting this far and withOutcome doesn't matter since - // nothing additional is available to include anyways + // nothing additional is available to include anyway if (colCount == TRUNCATE_FIELDS[0].length) return row; - Cell resultCell = row.getCell(CommandsColumns.result); - Cell writesCell = row.getCell(CommandsColumns.writes); - checkState((resultCell != null) == (writesCell != null), "result and writes should always be set together"); - boolean includeOutcome = withOutcome && resultCell != null; + Cell writesCell = row.getCell(CommandsColumns.writes); // This has just the columns needed for truncation with outcome so return it unmodified - if (colCount == TRUNCATE_FIELDS[1].length && includeOutcome) + if (colCount == TRUNCATE_FIELDS[1].length && withOutcome) return row; // Construct a replacement with just the available columns that are still needed - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[includeOutcome ? 1 : 0].length); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[withOutcome ? 1 : 0].length); int colIndex = 0; newLeaf[colIndex++] = durabilityCell; newLeaf[colIndex++] = executeAtCell; - if (includeOutcome) - newLeaf[colIndex++] = resultCell; newLeaf[colIndex++] = routeCell; newLeaf[colIndex++] = statusCell; - if (includeOutcome) + if (withOutcome && writesCell != null) + //noinspection UnusedAssignment newLeaf[colIndex++] = writesCell; return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), row.deletion(), newLeaf); } - public static Result getResult(Row row) throws IOException - { - return deserializeWithVersionOr(row, result, LocalVersionedSerializers.result, () -> null); - } - public static Writes getWrites(Row row) throws IOException { return deserializeWithVersionOr(row, writes, LocalVersionedSerializers.writes, () -> null); @@ -918,7 +900,6 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.writes, Command::writes, v -> serialize(v, LocalVersionedSerializers.writes), builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.result, Command::result, v -> serialize((TxnData) v, LocalVersionedSerializers.result), builder, timestampMicros, nowInSeconds, original, command); // TODO review this is just to work around Truncated not being committed but having a status after committed // so status claims it is committed. @@ -1314,7 +1295,6 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) Ballot promised = deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits); Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); Writes writes = deserializeWithVersionOr(row, "writes", LocalVersionedSerializers.writes, () -> null); - Result result = deserializeWithVersionOr(row, "result", LocalVersionedSerializers.result, () -> null); switch (status.status) { @@ -1331,9 +1311,9 @@ public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) return Command.SerializerSupport.committed(attributes, status, executeAt, promised, accepted, waitingOn); case PreApplied: case Applied: - return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOn, writes, result); + return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOn, writes, Result.APPLIED); case Truncated: - return Command.SerializerSupport.truncatedApply(attributes, status, executeAt, writes, result); + return Command.SerializerSupport.truncatedApply(attributes, status, executeAt, writes, Result.APPLIED); case Invalidated: return Command.SerializerSupport.invalidated(txnId, attributes.durableListeners()); default: diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index a0a5202c5615..bff25a9c05f8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -330,7 +330,9 @@ public static long command(Command command) size += sizeNullable(command.partialDeps(), AccordObjectSizes::dependencies); size += sizeNullable(command.accepted(), AccordObjectSizes::timestamp); size += sizeNullable(command.writes(), AccordObjectSizes::writes); - size += sizeNullable(command.result(), AccordObjectSizes::results); + + if (command.result() instanceof TxnData) + size += sizeNullable(command.result(), AccordObjectSizes::results); if (!(command instanceof Command.Committed)) return size; @@ -352,9 +354,9 @@ private static long cfkSeriesSize(ImmutableSortedMap seri return size; } - private static long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, null, 0, null, null, - ImmutableSortedMap.of(), - ImmutableSortedMap.of())); + private static final long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, null, 0, null, null, + ImmutableSortedMap.of(), + ImmutableSortedMap.of())); public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 99c8410403ef..9ad1fcd9531e 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -20,13 +20,13 @@ import java.io.IOException; +import accord.api.Result; import accord.messages.Apply; import accord.primitives.PartialRoute; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.NullableSerializer; public class ApplySerializers @@ -41,7 +41,6 @@ public void serializeBody(Apply apply, DataOutputPlus out, int version) throws I DepsSerializer.partialDeps.serialize(apply.deps, out, version); NullableSerializer.serializeNullable(apply.txn, out, version, CommandSerializers.partialTxn); CommandSerializers.writes.serialize(apply.writes, out, version); - TxnData.serializer.serialize((TxnData) apply.result, out, version); } @Override @@ -53,7 +52,7 @@ public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, Partial DepsSerializer.partialDeps.deserialize(in, version), NullableSerializer.deserializeNullable(in, version, CommandSerializers.partialTxn), CommandSerializers.writes.deserialize(in, version), - TxnData.serializer.deserialize(in, version)); + Result.APPLIED); } @Override @@ -63,8 +62,7 @@ public long serializedBodySize(Apply apply, int version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + NullableSerializer.serializedNullableSize(apply.txn, version, CommandSerializers.partialTxn) - + CommandSerializers.writes.serializedSize(apply.writes, version) - + TxnData.serializer.serializedSize((TxnData) apply.result, version); + + CommandSerializers.writes.serializedSize(apply.writes, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 37d42395e72f..4ce9eb7e925f 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -43,7 +43,6 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.txn.TxnData; import static accord.messages.CheckStatus.SerializationSupport.createOk; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; @@ -121,7 +120,6 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t serializeNullable(okFull.partialTxn, out, version, CommandSerializers.partialTxn); serializeNullable(okFull.committedDeps, out, version, DepsSerializer.partialDeps); serializeNullable(okFull.writes, out, version, CommandSerializers.writes); - serializeNullable((TxnData) okFull.result, out, version, TxnData.serializer); } @Override @@ -154,7 +152,14 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce PartialTxn partialTxn = deserializeNullable(in, version, CommandSerializers.partialTxn); PartialDeps committedDeps = deserializeNullable(in, version, DepsSerializer.partialDeps); Writes writes = deserializeNullable(in, version, CommandSerializers.writes); - Result result = deserializeNullable(in, version, TxnData.serializer); + + Result result = null; + if (status == SaveStatus.PreApplied || status == SaveStatus.Applied + || status == SaveStatus.TruncatedApply || status == SaveStatus.TruncatedApplyWithOutcome || status == SaveStatus.TruncatedApplyWithDeps) + result = Result.APPLIED; + else if (status == SaveStatus.Invalidated) + result = Result.INVALIDATED; + return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); } @@ -187,7 +192,6 @@ public long serializedSize(CheckStatusReply reply, int version) size += serializedNullableSize(okFull.partialTxn, version, CommandSerializers.partialTxn); size += serializedNullableSize(okFull.committedDeps, version, DepsSerializer.partialDeps); size += serializedNullableSize(okFull.writes, version, CommandSerializers.writes); - size += serializedNullableSize((TxnData) okFull.result, version, TxnData.serializer); return size; } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index 5d3a5a93daf9..19e13cbe0132 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -41,7 +41,6 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.txn.TxnData; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; @@ -96,11 +95,10 @@ void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IO DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); out.writeBoolean(recoverOk.rejectsFastPath); serializeNullable(recoverOk.writes, out, version, CommandSerializers.writes); - serializeNullable((TxnData) recoverOk.result, out, version, TxnData.serializer); } @Override - public final void serialize(RecoverReply reply, DataOutputPlus out, int version) throws IOException + public void serialize(RecoverReply reply, DataOutputPlus out, int version) throws IOException { out.writeBoolean(reply.isOk()); if (!reply.isOk()) @@ -120,14 +118,23 @@ RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp e } @Override - public final RecoverReply deserialize(DataInputPlus in, int version) throws IOException + public RecoverReply deserialize(DataInputPlus in, int version) throws IOException { boolean isOk = in.readBoolean(); if (!isOk) return deserializeNack(CommandSerializers.nullableBallot.deserialize(in, version), in, version); - return deserializeOk(CommandSerializers.txnId.deserialize(in, version), - CommandSerializers.status.deserialize(in, version), + TxnId id = CommandSerializers.txnId.deserialize(in, version); + Status status = CommandSerializers.status.deserialize(in, version); + + Result result = null; + if (status == Status.PreApplied || status == Status.Applied || status == Status.Truncated) + result = Result.APPLIED; + else if (status == Status.Invalidated) + result = Result.INVALIDATED; + + return deserializeOk(id, + status, CommandSerializers.ballot.deserialize(in, version), deserializeNullable(in, version, CommandSerializers.timestamp), DepsSerializer.partialDeps.deserialize(in, version), @@ -136,7 +143,7 @@ public final RecoverReply deserialize(DataInputPlus in, int version) throws IOEx DepsSerializer.deps.deserialize(in, version), in.readBoolean(), deserializeNullable(in, version, CommandSerializers.writes), - deserializeNullable(in, version, TxnData.serializer), + result, in, version); } @@ -158,12 +165,11 @@ long serializedOkSize(RecoverOk recoverOk, int version) size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); size += TypeSizes.sizeof(recoverOk.rejectsFastPath); size += serializedNullableSize(recoverOk.writes, version, CommandSerializers.writes); - size += serializedNullableSize((TxnData) recoverOk.result, version, TxnData.serializer); return size; } @Override - public final long serializedSize(RecoverReply reply, int version) + public long serializedSize(RecoverReply reply, int version) { return TypeSizes.sizeof(reply.isOk()) + (reply.isOk() ? serializedOkSize((RecoverOk) reply, version) : serializedNackSize((RecoverNack) reply, version)); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index a35ae36020b3..af0b6b771e9f 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -33,6 +33,7 @@ import accord.api.Write; import accord.primitives.Keys; import accord.primitives.Ranges; +import accord.primitives.RoutableKey; import accord.primitives.Timestamp; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.db.TypeSizes; @@ -174,7 +175,7 @@ private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] le public Write apply(Timestamp executeAt, Data data) { if (!checkCondition(data)) - return TxnWrite.EMPTY; + return TxnWrite.EMPTY_CONDITION_FAILED; List fragments = deserialize(this.fragments, TxnWrite.Fragment.serializer); List updates = new ArrayList<>(fragments.size()); @@ -182,9 +183,23 @@ public Write apply(Timestamp executeAt, Data data) AccordUpdateParameters parameters = new AccordUpdateParameters((TxnData) data, options); for (TxnWrite.Fragment fragment : fragments) - updates.add(fragment.complete(parameters)); + // Filter out fragments that already constitute complete updates to avoid persisting them via TxnWrite: + if (!fragment.isComplete()) + updates.add(fragment.complete(parameters)); - return new TxnWrite(updates); + return new TxnWrite(updates, true); + } + + public List completeUpdatesForKey(RoutableKey key) + { + List fragments = deserialize(this.fragments, TxnWrite.Fragment.serializer); + List updates = new ArrayList<>(fragments.size()); + + for (TxnWrite.Fragment fragment : fragments) + if (fragment.isComplete() && fragment.key.equals(key)) + updates.add(fragment.toUpdate()); + + return updates; } public static final IVersionedSerializer serializer = new IVersionedSerializer() diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 7c5794a66d67..89599bc90ecb 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -27,16 +27,13 @@ import java.util.Objects; import java.util.Set; +import accord.primitives.*; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import accord.api.DataStore; import accord.api.Write; import accord.local.SafeCommandStore; -import accord.primitives.RoutableKey; -import accord.primitives.Seekable; -import accord.primitives.Timestamp; -import accord.primitives.Writes; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Stage; @@ -56,6 +53,7 @@ import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.BooleanSerializer; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; @@ -66,9 +64,9 @@ public class TxnWrite extends AbstractKeySorted implements Write { - public static final TxnWrite EMPTY = new TxnWrite(Collections.emptyList()); + public static final TxnWrite EMPTY_CONDITION_FAILED = new TxnWrite(Collections.emptyList(), false); - private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY); + private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY_CONDITION_FAILED); public static class Update extends AbstractSerialized { @@ -218,10 +216,20 @@ public String toString() return "Fragment{key=" + key + ", index=" + index + ", baseUpdate=" + baseUpdate + ", referenceOps=" + referenceOps + '}'; } + public boolean isComplete() + { + return referenceOps.isEmpty(); + } + + public Update toUpdate() + { + return new Update(key, index, baseUpdate); + } + public Update complete(AccordUpdateParameters parameters) { - if (referenceOps.isEmpty()) - return new Update(key, index, baseUpdate); + if (isComplete()) + return toUpdate(); DecoratedKey key = baseUpdate.partitionKey(); PartitionUpdate.Builder updateBuilder = new PartitionUpdate.Builder(baseUpdate.metadata(), @@ -314,14 +322,18 @@ public long serializedSize(Fragment fragment, int version) }; } - private TxnWrite(Update[] items) + private final boolean isConditionMet; + + private TxnWrite(Update[] items, boolean isConditionMet) { super(items); + this.isConditionMet = isConditionMet; } - public TxnWrite(List items) + public TxnWrite(List items, boolean isConditionMet) { super(items); + this.isConditionMet = isConditionMet; } @Override @@ -343,7 +355,7 @@ Update[] newArray(int size) } @Override - public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store, PartialTxn txn) { // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; @@ -355,8 +367,22 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); List> results = new ArrayList<>(); + + // Apply updates not specified fully by the client but built from fragments completed by data from reads. + // This occurs, for example, when an UPDATE statement uses a value assigned by a LET statement. forEachWithKey((PartitionKey) key, write -> results.add(write.write(timestamp, nowInSeconds))); + if (isConditionMet) + { + // Apply updates that are fully specified by the client and not reliant on data from reads. + // ex. INSERT INTO tbl (a, b, c) VALUES (1, 2, 3) + // These updates are persisted only in TxnUpdate and not in TxnWrite to avoid duplication. + TxnUpdate txnUpdate = (TxnUpdate) txn.update(); + assert txnUpdate != null : "PartialTxn should contain an update if we're applying a write!"; + List updates = txnUpdate.completeUpdatesForKey((RoutableKey) key); + updates.forEach(update -> results.add(update.write(timestamp, nowInSeconds))); + } + if (results.isEmpty()) return Writes.SUCCESS; @@ -379,19 +405,21 @@ public long estimatedSizeOnHeap() @Override public void serialize(TxnWrite write, DataOutputPlus out, int version) throws IOException { + BooleanSerializer.serializer.serialize(write.isConditionMet, out, version); serializeArray(write.items, out, version, Update.serializer); } @Override public TxnWrite deserialize(DataInputPlus in, int version) throws IOException { - return new TxnWrite(deserializeArray(in, version, Update.serializer, Update[]::new)); + boolean isConditionMet = BooleanSerializer.serializer.deserialize(in, version); + return new TxnWrite(deserializeArray(in, version, Update.serializer, Update[]::new), isConditionMet); } @Override public long serializedSize(TxnWrite write, int version) { - return serializedArraySize(write.items, version, Update.serializer); + return BooleanSerializer.serializer.serializedSize(write.isConditionMet, version) + serializedArraySize(write.items, version, Update.serializer); } }; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 8dada5ba9e8e..8a515bb614d4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -508,6 +508,32 @@ public void testReturningLetReferences() throws Throwable }); } + @Test + public void testFailedConditionWithCompleteInsert() throws Throwable + { + test(cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + + String query = "BEGIN TRANSACTION\n" + + " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " SELECT row1.v;\n" + + " IF row0 IS NULL AND row1.v = ? THEN\n" + + " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 2, 0, 0, 1); + assertEquals(ImmutableList.of("row1.v"), result.names()); + assertThat(result).hasSize(1).contains(3); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + assertEmptyWithPreemptedRetry(cluster, check); + }); + } + @Test public void testReversedClusteringReference() throws Exception { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java index d128ead48976..ba9e1b801ce2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -28,7 +28,6 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.Verb; -@SuppressWarnings("Convert2MethodRef") public class AccordIntegrationTest extends AccordTestBase { private static final Logger logger = LoggerFactory.getLogger(AccordIntegrationTest.class); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index c51e161d4d8c..2f9328b7fcf1 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -85,7 +85,6 @@ import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.IAccordService; -import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; @@ -129,7 +128,7 @@ public class CompactionAccordIteratorsTest static ColumnFamilyStore commands; static ColumnFamilyStore commandsForKey; static TableMetadata table; - static FullRoute route; + static FullRoute route; Random random; /* @@ -304,7 +303,6 @@ Consumer> expectAccordCommandsTruncatedWithOutcome() for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS[1]) assertNotNull(row.getColumnData(cm)); assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); - assertEquals(1, ((TxnData)CommandRows.getResult(row)).entrySet().size()); assertNotNull(CommandRows.getWrites(row)); assertEquals(Durability.Local, CommandRows.getDurability(row)); assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index f5148fa48647..021cbd4897d9 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -121,7 +121,7 @@ public void commandLoadSave() throws Throwable attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, - waitingOn, result.left, result.right); + waitingOn, result.left, Result.APPLIED); AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.set(command); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 5115dfdc47ce..73830bda44a6 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -100,7 +100,7 @@ public void burnTest() RandomDelayQueue delayQueue = new RandomDelayQueue.Factory(rs).get(); PendingQueue queue = new PropagatingPendingQueue(failures, delayQueue); Agent agent = new TestAgent.RethrowAgent(); - SimulatedDelayedExecutorService globalExecutor = new SimulatedDelayedExecutorService(queue, agent, rs.fork()); + SimulatedDelayedExecutorService globalExecutor = new SimulatedDelayedExecutorService(queue, agent); ScheduledExecutorPlus scheduler = new AdaptingScheduledExecutorPlus(globalExecutor); Cluster cluster = new Cluster(nodes, rs, scheduler); @@ -361,7 +361,7 @@ public boolean isAlive(InetAddressAndPort ep) { if (self.equals(ep)) return true; - return !nodeRuns.computeIfAbsent(ep, ignore -> Gens.bools().runs(.01)).next(rs); + return !nodeRuns.computeIfAbsent(ep, ignore -> Gens.bools().biasedRepeatingRuns(.01)).next(rs); } @Override @@ -505,4 +505,4 @@ private Instace(Node.Id id, } } } -} \ No newline at end of file +} From c0a63c9164da09c102f195cae7bb8507d83eee6a Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Wed, 28 Jun 2023 16:00:02 +0100 Subject: [PATCH 071/340] CEP-15: Minimize transaction state kept in system tables patch by Aleksey Yeschenko; reviewed by Ariel Weisberg for CASSANDRA-18573 --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 2 +- .../cassandra/journal/AsyncWriteCallback.java | 6 + .../org/apache/cassandra/journal/Flusher.java | 6 +- .../org/apache/cassandra/journal/Index.java | 9 + .../org/apache/cassandra/journal/Journal.java | 148 ++++- .../org/apache/cassandra/journal/Segment.java | 4 +- .../apache/cassandra/journal/Segments.java | 8 +- .../cassandra/journal/ValueSerializer.java | 4 +- .../service/accord/AccordCommandStore.java | 83 ++- .../service/accord/AccordCommandStores.java | 54 +- .../service/accord/AccordJournal.java | 624 ++++++++++++++---- .../service/accord/AccordKeyspace.java | 340 ++++------ .../service/accord/AccordMessageSink.java | 17 +- .../service/accord/AccordService.java | 49 +- .../service/accord/AccordStateCache.java | 31 +- .../service/accord/AccordVerbHandler.java | 36 +- .../service/accord/async/AsyncOperation.java | 45 +- .../service/accord/async/ExecutionOrder.java | 149 +++++ .../accord/serializers/ApplySerializers.java | 15 +- .../BeginInvalidationSerializers.java | 28 +- .../serializers/CheckStatusSerializers.java | 39 +- .../serializers/CommandSerializers.java | 39 ++ .../accord/serializers/CommitSerializers.java | 11 +- .../accord/serializers/DepsSerializer.java | 3 + .../accord/serializers/FetchSerializers.java | 94 ++- .../accord/serializers/KeySerializers.java | 3 + .../serializers/RecoverySerializers.java | 18 +- .../service/accord/txn/AbstractKeySorted.java | 2 +- .../cassandra/service/accord/txn/TxnRead.java | 18 + .../service/accord/txn/TxnUpdate.java | 16 + .../distributed/test/TestBaseImpl.java | 2 + .../test/AccordJournalSimulationTest.java | 4 +- .../org/apache/cassandra/cql3/CQLTester.java | 2 + .../CompactionAccordIteratorsTest.java | 68 +- .../apache/cassandra/journal/JournalTest.java | 20 +- .../apache/cassandra/journal/SegmentTest.java | 16 +- .../accord/AccordCommandStoreTest.java | 23 +- .../service/accord/AccordCommandTest.java | 7 +- .../service/accord/AccordKeyspaceTest.java | 13 +- .../service/accord/AccordStateCacheTest.java | 14 +- .../service/accord/AccordTestUtils.java | 4 + .../accord/async/AsyncOperationTest.java | 42 +- 43 files changed, 1521 insertions(+), 597 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java diff --git a/modules/accord b/modules/accord index df492dfd2ffe..79fc1ebf7db6 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit df492dfd2ffe993c33761d0531ac5b979b80f080 +Subproject commit 79fc1ebf7db6aa5e616dbef1bc61b616fea3c2c6 diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 200d8687f395..ccefefbb8b95 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -831,7 +831,7 @@ protected Row applyToRow(Row row) case TRUNCATE_WITH_OUTCOME: case TRUNCATE: if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) - return maybeDropTruncatedCommandColumns(row, cleanup == TRUNCATE_WITH_OUTCOME, durabilityCell, executeAtCell, routeCell, statusCell); + return maybeDropTruncatedCommandColumns(row, durabilityCell, executeAtCell, routeCell, statusCell); return truncatedApply(cleanup.appliesIfNot, row, nowInSec, durability, durabilityCell, executeAtCell, routeCell, cleanup == TRUNCATE_WITH_OUTCOME); diff --git a/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java b/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java index 161b972b14db..53932ec4ef25 100644 --- a/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java +++ b/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java @@ -19,5 +19,11 @@ public interface AsyncWriteCallback extends Runnable { + AsyncWriteCallback NOOP = new AsyncWriteCallback() + { + @Override public void onFailure(Throwable error) {} + @Override public void run() {} + }; + void onFailure(Throwable error); } diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index b6d5a29141f2..ebad946f85ac 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -282,7 +282,7 @@ private void asyncFlushBatch(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); requestExtraFlush(); - // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO FIXME + // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO (expected): collect async flush metrics pending.decrementAndGet(); written.incrementAndGet(); } @@ -298,7 +298,7 @@ private void waitForFlushGroup(ActiveSegment.Allocation alloc) private void asyncFlushGroup(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); - // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO FIXME + // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO (expected): collect async flush metrics pending.decrementAndGet(); written.incrementAndGet(); } @@ -318,7 +318,7 @@ private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) private void asyncFlushPeriodic(ActiveSegment.Allocation ignore) { pending.incrementAndGet(); - // awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); // TODO FIXME + // awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); // TODO (expected): collect async flush metrics pending.decrementAndGet(); written.incrementAndGet(); } diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java index cfce4407e9ba..ef75d867e193 100644 --- a/src/java/org/apache/cassandra/journal/Index.java +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -21,6 +21,7 @@ import org.apache.cassandra.utils.Closeable; +import static com.google.common.collect.Iterables.any; /** * Mapping of client supplied ids to in-segment offsets */ @@ -73,4 +74,12 @@ boolean mayContainId(K id) return null != firstId && null != lastId && keySupport.compare(id, firstId) >= 0 && keySupport.compare(id, lastId) <= 0; } + + /** + * @return whether any of the ids falls within lower/upper bounds of the index + */ + boolean mayContainIds(Iterable ids) + { + return any(ids, this::mayContainId); + } } diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index ff90c58f7bfc..2deb10b9336e 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -21,6 +21,7 @@ import java.nio.file.FileStore; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.Executor; @@ -29,6 +30,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BooleanSupplier; import java.util.function.Function; +import java.util.function.Predicate; import java.util.zip.CRC32; import javax.annotation.Nonnull; @@ -37,9 +39,11 @@ import org.slf4j.LoggerFactory; import com.codahale.metrics.Timer.Context; +import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.concurrent.Interruptible; import org.apache.cassandra.concurrent.Interruptible.TerminateException; import org.apache.cassandra.concurrent.SequentialExecutorPlus; +import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.File; @@ -75,7 +79,7 @@ * @param the type of keys used to address the records; must be fixed-size and byte-order comparable */ -public class Journal +public class Journal implements Shutdownable { private static final Logger logger = LoggerFactory.getLogger(Journal.class); @@ -160,6 +164,12 @@ private void deleteTmpFiles() tmpFile.delete(); } + @Override + public boolean isTerminated() + { + return false; + } + public void shutdown() { allocator.shutdown(); @@ -171,6 +181,19 @@ public void shutdown() metrics.deregister(); } + @Override + public Object shutdownNow() + { + shutdown(); + return null; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException + { + return false; + } + /** * Looks up a record by the provided id. *

    @@ -178,37 +201,50 @@ public void shutdown() * compaction progress. *

    * In case multiple copies of the record exist in the log (e.g. because of user retries), - * only the first found record will be consumed. + * the first one found will be returned. * * @param id user-provided record id, expected to roughly correlate with time and go up - * @param consumer function to consume the raw record (bytes and invalidation set) if found - * @return true if the record was found, false otherwise + * @return deserialized record if found, null otherwise */ - public boolean read(K id, RecordConsumer consumer) + public V readFirst(K id) { + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + try (ReferencedSegments segments = selectAndReference(id)) { for (Segment segment : segments.all()) - if (segment.read(id, consumer)) - return true; + { + if (segment.readFirst(id, holder)) + { + try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + { + return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + } + } } - - return false; + return null; } /** - * Looks up a record by the provided id. + * Looks up a record by the provided id, if the value satisfies the provided condition. *

    * Looking up an invalidated record may or may not return a record, depending on * compaction progress. *

    * In case multiple copies of the record exist in the log (e.g. because of user retries), - * the first one found will be returned. + * and more than one of them satisfy the provided condition, the first one found will be returned. * * @param id user-provided record id, expected to roughly correlate with time and go up + * @param condition predicate to test the record against * @return deserialized record if found, null otherwise */ - public V read(K id) + public V readFirstMatching(K id, Predicate condition) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); @@ -216,23 +252,78 @@ public V read(K id) { for (Segment segment : segments.all()) { - if (segment.read(id, holder)) + int[] offsets = segment.index().lookUp(id); + for (int offset : offsets) { - try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + holder.clear(); + if (segment.read(offset, holder)) { - return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); - + try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + { + V record = valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); + if (condition.test(record)) + return record; + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } } - catch (IOException e) + } + } + } + return null; + } + + /** + * Looks up a record by the provided id. + *

    + * Looking up an invalidated record may or may not return a record, depending on + * compaction progress. + *

    + * In case multiple copies of the record exist in the log (e.g. because of user retries), + * only the first found record will be consumed. + * + * @param id user-provided record id, expected to roughly correlate with time and go up + * @param consumer function to consume the raw record (bytes and invalidation set) if found + * @return true if the record was found, false otherwise + */ + public boolean readFirst(K id, RecordConsumer consumer) + { + try (ReferencedSegments segments = selectAndReference(id)) + { + for (Segment segment : segments.all()) + if (segment.readFirst(id, consumer)) + return true; + } + return false; + } + + /** + * Test for existence of entries with specified ids. + * + * @return subset of ids to test that have been found in the journal + */ + public Set test(Set test) + { + Set present = new ObjectHashSet<>(test.size() + 1, 0.9f); + try (ReferencedSegments segments = selectAndReference(test)) + { + for (Segment segment : segments.all()) + { + for (K id : test) + { + if (segment.index().lookUpFirst(id) != -1) { - // can only throw if serializer is buggy - throw new RuntimeException(e); + present.add(id); + if (test.size() == present.size()) + return present; } } } } - - return null; + return present; } /** @@ -248,7 +339,7 @@ public void write(K id, V record, Set hosts) { try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { - valueSerializer.serialize(record, dob, params.userVersion()); + valueSerializer.serialize(id, record, dob, params.userVersion()); ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); flusher.waitForFlush(alloc); @@ -276,7 +367,7 @@ public void asyncWrite(K id, V record, Set hosts, @Nonnull Executor exe { try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { - valueSerializer.serialize(record, dob, params.userVersion()); + valueSerializer.serialize(id, record, dob, params.userVersion()); ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); alloc.asyncWrite(id, dob.unsafeGetBufferAndFlip(), hosts, executor, callback); flusher.asyncFlush(alloc); @@ -466,21 +557,26 @@ private void closeAllSegments() } /** - * Select segments that could potentially have an entry with the specified id and + * Select segments that could potentially have any entry with the specified ids and * attempt to grab references to them all. * * @return a subset of segments with references to them */ - ReferencedSegments selectAndReference(K id) + ReferencedSegments selectAndReference(Iterable ids) { while (true) { - ReferencedSegments referenced = segments().selectAndReference(id); + ReferencedSegments referenced = segments().selectAndReference(ids); if (null != referenced) return referenced; } } + ReferencedSegments selectAndReference(K id) + { + return selectAndReference(Collections.singleton(id)); + } + private Segments segments() { return segments.get(); diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index e63f6f2cad21..6a5604b0d973 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -49,7 +49,7 @@ abstract class Segment implements Closeable, RefCounted> * Reading entries (by id, by offset, iterate) */ - boolean read(K id, RecordConsumer consumer) + boolean readFirst(K id, RecordConsumer consumer) { int offset = index().lookUpFirst(id); if (offset == -1) @@ -65,7 +65,7 @@ boolean read(K id, RecordConsumer consumer) return false; } - boolean read(K id, EntrySerializer.EntryHolder into) + boolean readFirst(K id, EntrySerializer.EntryHolder into) { int offset = index().lookUpFirst(id); if (offset == -1 || !read(offset, into)) diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index 8873ea7b3d85..96256e623806 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -137,18 +137,18 @@ Collection> onlyStatic() } /** - * Select segments that could potentially have an entry with the specified id and + * Select segments that could potentially have an entry with the specified ids and * attempt to grab references to them all. * * @return a subset of segments with references to them, or {@code null} if failed to grab the refs */ @SuppressWarnings("resource") - ReferencedSegments selectAndReference(K id) + ReferencedSegments selectAndReference(Iterable ids) { List> selectedActive = null; for (ActiveSegment segment : onlyActive()) { - if (segment.index.mayContainId(id)) + if (segment.index.mayContainIds(ids)) { if (null == selectedActive) selectedActive = new ArrayList<>(); @@ -160,7 +160,7 @@ ReferencedSegments selectAndReference(K id) Map> selectedStatic = null; for (StaticSegment segment : onlyStatic()) { - if (segment.index().mayContainId(id)) + if (segment.index().mayContainIds(ids)) { if (null == selectedStatic) selectedStatic = new HashMap<>(); diff --git a/src/java/org/apache/cassandra/journal/ValueSerializer.java b/src/java/org/apache/cassandra/journal/ValueSerializer.java index f004c3a37aa7..a6a2c7d452ca 100644 --- a/src/java/org/apache/cassandra/journal/ValueSerializer.java +++ b/src/java/org/apache/cassandra/journal/ValueSerializer.java @@ -24,9 +24,9 @@ public interface ValueSerializer { - int serializedSize(V value, int userVersion); + int serializedSize(K key, V value, int userVersion); - void serialize(V value, DataOutputPlus out, int userVersion) throws IOException; + void serialize(K key, V value, DataOutputPlus out, int userVersion) throws IOException; /** * Deserialize the value given the key is known. Allows to avoid serializing diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 727fee7baf54..1913b3213fa4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Objects; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -52,10 +53,12 @@ import accord.local.RedundantBefore; import accord.local.SafeCommandStore; import accord.local.SaveStatus; +import accord.local.SerializerSupport; +import accord.local.SerializerSupport.MessageProvider; +import accord.messages.Message; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; -import accord.primitives.PartialDeps; -import accord.primitives.PartialTxn; +import accord.primitives.Ballot; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; @@ -75,6 +78,7 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation; +import org.apache.cassandra.service.accord.async.ExecutionOrder; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -103,7 +107,9 @@ private static long getThreadId(ExecutorService executor) private final long threadId; public final String loggingId; + private final AccordJournal journal; private final ExecutorService executor; + private final ExecutionOrder executionOrder; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; private final AccordStateCache.Instance commandsForKeyCache; @@ -117,9 +123,10 @@ public AccordCommandStore(int id, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, - EpochUpdateHolder epochUpdateHolder) + EpochUpdateHolder epochUpdateHolder, + AccordJournal journal) { - this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, Stage.READ.executor(), Stage.MUTATION.executor()); + this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor()); } @VisibleForTesting @@ -129,13 +136,16 @@ public AccordCommandStore(int id, DataStore dataStore, ProgressLog.Factory progressLogFactory, EpochUpdateHolder epochUpdateHolder, + AccordJournal journal, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); + this.journal = journal; loggingId = String.format("[%s]", id); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); - threadId = getThreadId(this.executor); + executionOrder = new ExecutionOrder(); + threadId = getThreadId(executor); stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20); commandCache = stateCache.instance(TxnId.class, @@ -143,6 +153,7 @@ public AccordCommandStore(int id, AccordSafeCommand::new, this::loadCommand, this::saveCommand, + this::validateCommand, AccordObjectSizes::command); commandsForKeyCache = stateCache.instance(RoutableKey.class, @@ -150,6 +161,7 @@ public AccordCommandStore(int id, AccordSafeCommandsForKey::new, this::loadCommandsForKey, this::saveCommandsForKey, + this::validateCommandsForKey, AccordObjectSizes::commandsForKey); AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { @@ -169,27 +181,36 @@ public AccordCommandStore(int id, executor.execute(this::loadRangesToCommands); } + static Factory factory(AccordJournal journal) + { + return (id, time, agent, dataStore, progressLogFactory, rangesForEpoch) -> + new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, journal); + } + private void loadRangesToCommands() { AsyncPromise future = new AsyncPromise<>(); - AccordKeyspace.findAllCommandsByDomain(id, Routable.Domain.Range, ImmutableSet.of("txn_id", "status", "txn", "execute_at", "dependencies"), new Observable() + AccordKeyspace.findAllCommandsByDomain(id, Routable.Domain.Range, ImmutableSet.of("txn_id", "status", "accepted_ballot", "execute_at"), new Observable() { private CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); + @Override public void onNext(UntypedResultSet.Row row) throws Exception { TxnId txnId = AccordKeyspace.deserializeTxnId(row); SaveStatus status = AccordKeyspace.deserializeStatus(row); - Timestamp executeAt = AccordKeyspace.deserializeExecuteAt(row); + Timestamp executeAt = AccordKeyspace.deserializeExecuteAtOrNull(row); + Ballot accepted = AccordKeyspace.deserializeAcceptedOrNull(row); + + MessageProvider messageProvider = journal.makeMessageProvider(txnId); - PartialTxn txn = AccordKeyspace.deserializeTxn(row); - Seekables keys = txn.keys(); + SerializerSupport.TxnAndDeps txnAndDeps = SerializerSupport.extractTxnAndDeps(status, accepted, messageProvider); + Seekables keys = txnAndDeps.txn.keys(); if (keys.domain() != Routable.Domain.Range) - throw new AssertionError(String.format("Txn keys are not range for %s", txn)); + throw new AssertionError(String.format("Txn keys are not range for %s", txnAndDeps.txn)); Ranges ranges = (Ranges) keys; - PartialDeps deps = AccordKeyspace.deserializeDependencies(row); - List dependsOn = deps == null ? Collections.emptyList() : deps.txnIds(); + List dependsOn = txnAndDeps.deps == null ? Collections.emptyList() : txnAndDeps.deps.txnIds(); builder.put(txnId, ranges, status, executeAt, dependsOn); } @@ -270,11 +291,6 @@ Command loadCommand(TxnId txnId) return AccordKeyspace.loadCommand(this, txnId); } - CommandsForKey loadCommandsForKey(RoutableKey key) - { - return AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); - } - @Nullable Runnable saveCommand(Command before, Command after) { @@ -282,6 +298,17 @@ Runnable saveCommand(Command before, Command after) return null != mutation ? mutation::apply : null; } + boolean validateCommand(TxnId txnId, Command evicting) + { + Command reloaded = AccordKeyspace.unsafeLoadCommand(this, txnId); + return (evicting == null && reloaded == null) || (evicting != null && reloaded != null && reloaded.isEqualOrFuller(evicting)); + } + + CommandsForKey loadCommandsForKey(RoutableKey key) + { + return AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); + } + @Nullable private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) { @@ -289,6 +316,12 @@ private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) return null != mutation ? mutation::apply : null; } + boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) + { + CommandsForKey reloaded = AccordKeyspace.unsafeLoadCommandsForKey(this, (PartitionKey) key); + return Objects.equals(evicting, reloaded); + } + @VisibleForTesting public AccordStateCache cache() { @@ -351,6 +384,11 @@ ProgressLog progressLog() return progressLog; } + public ExecutionOrder executionOrder() + { + return executionOrder; + } + @Override public AsyncChain execute(PreLoadContext preLoadContext, Consumer consumer) { @@ -495,4 +533,15 @@ public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ran super.markShardDurable(safeStore, globalSyncId, ranges); commandsForRanges.prune(globalSyncId, ranges); } + + MessageProvider makeMessageProvider(TxnId txnId) + { + return journal.makeMessageProvider(txnId); + } + + @VisibleForTesting + public void appendToJournal(Message message) + { + journal.appendMessageBlocking(message); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index e6678a9a153a..bbae3c3d54e6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -26,27 +26,18 @@ import accord.local.CommandStores; import accord.local.Node; import accord.local.NodeTimeService; -import accord.local.PreLoadContext; -import accord.local.SafeCommandStore; import accord.local.ShardDistributor; import accord.primitives.Range; -import accord.primitives.Routables; import accord.topology.Topology; -import accord.utils.MapReduceConsume; import accord.utils.RandomSource; -import org.apache.cassandra.concurrent.ImmediateExecutor; -import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.service.accord.api.AccordRoutingKey; public class AccordCommandStores extends CommandStores { - private final AccordJournal journal; - AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, AccordJournal journal) { - super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore::new); - this.journal = journal; + super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore.factory(journal)); setCacheSize(maxCacheSize()); } @@ -56,48 +47,6 @@ static Factory factory(AccordJournal journal) new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, journal); } - @Override - protected void mapReduceConsume( - PreLoadContext context, - Routables keys, - long minEpoch, - long maxEpoch, - MapReduceConsume mapReduceConsume) - { - // append PreAccept, Accept, Commit, and Apply messages durably to AccordJournal before processing - if (journal.mustMakeDurable(context)) - mapReduceConsumeDurable(context, keys, minEpoch, maxEpoch, mapReduceConsume); - else - super.mapReduceConsume(context, keys, minEpoch, maxEpoch, mapReduceConsume); - } - - private void mapReduceConsumeDurable( - PreLoadContext context, - Routables keys, - long minEpoch, - long maxEpoch, - MapReduceConsume mapReduceConsume) - { - journal.append(context, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() - { - @Override - public void run() - { - // TODO (performance, expected): do not retain references to messages beyond a certain total - // cache threshold; in case of flush lagging behind, read the messages from journal and - // deserialize instead before processing, to prevent memory pressure buildup from messages - // pending flush to disk. - AccordCommandStores.super.mapReduceConsume(context, keys, minEpoch, maxEpoch, mapReduceConsume); - } - - @Override - public void onFailure(Throwable error) - { - mapReduceConsume.accept(null, error); - } - }); - } - @Override protected boolean shouldBootstrap(Node node, Topology previous, Topology updated, Range range) { @@ -160,7 +109,6 @@ public synchronized Supplier updateTopology(Node node, Topology newT public synchronized void shutdown() { super.shutdown(); - journal.shutdown(); //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 5d097068a654..23554c7feb1c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -19,23 +19,41 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.*; +import java.util.Collections; +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.Map; +import java.util.Set; import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.zip.Checksum; import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.local.Node.Id; -import accord.local.PreLoadContext; +import accord.local.SerializerSupport; +import accord.messages.AbstractEpochRequest; import accord.messages.Accept; import accord.messages.Apply; +import accord.messages.BeginRecovery; import accord.messages.Commit; +import accord.messages.LocalMessage; +import accord.messages.Message; import accord.messages.MessageType; import accord.messages.PreAccept; +import accord.messages.Propagate; import accord.messages.TxnRequest; -import accord.primitives.*; +import accord.primitives.Ballot; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; import accord.utils.Invariants; +import org.agrona.collections.ObjectHashSet; +import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -49,20 +67,32 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; +import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; import org.apache.cassandra.service.accord.serializers.CommitSerializers; import org.apache.cassandra.service.accord.serializers.EnumSerializer; +import org.apache.cassandra.service.accord.serializers.FetchSerializers; +import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; +import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; +import org.apache.cassandra.service.accord.serializers.RecoverySerializers; +import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; +import org.apache.cassandra.utils.ByteArrayUtil; +import static accord.messages.MessageType.*; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; -import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; -import static org.apache.cassandra.utils.FBUtilities.updateChecksumLong; -public class AccordJournal +public class AccordJournal implements Shutdownable { + private static final Logger logger = LoggerFactory.getLogger(AccordJournal.class); + + private static final boolean LOG_MESSAGE_PROVIDER = false; + private static final Set SENTINEL_HOSTS = Collections.singleton(0); + private static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[21]); + static final Params PARAMS = new Params() { @Override @@ -80,7 +110,7 @@ public FailurePolicy failurePolicy() @Override public FlushMode flushMode() { - return FlushMode.GROUP; + return FlushMode.BATCH; } @Override @@ -108,86 +138,105 @@ public int userVersion() }; final File directory; - final Journal> journal; + final Journal journal; + + enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } + private volatile Status status = Status.INITIALIZED; @VisibleForTesting public AccordJournal() { directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); - journal = new Journal<>("AccordJournal", directory, PARAMS, Key.SUPPORT, MESSAGE_SERIALIZER); + journal = new Journal<>("AccordJournal", directory, PARAMS, Key.SUPPORT, RECORD_SERIALIZER); } public AccordJournal start() { - // journal.start(); TODO: re-enable + Invariants.checkState(status == Status.INITIALIZED); + status = Status.STARTING; + journal.start(); + status = Status.STARTED; return this; } - public void shutdown() + @Override + public boolean isTerminated() { - // journal.shutdown(); TODO: re-enable + return status == Status.TERMINATED; } - boolean mustMakeDurable(PreLoadContext context) + @Override + public void shutdown() { - return false; - // return context instanceof TxnRequest && Type.mustMakeDurable((TxnRequest) context); TODO: re-enable + Invariants.checkState(status == Status.STARTED); + status = Status.TERMINATING; + journal.shutdown(); + status = Status.TERMINATED; } - public void append(PreLoadContext context, Executor executor, AsyncWriteCallback callback) + @Override + public Object shutdownNow() { - append((TxnRequest) context, executor, callback); + shutdown(); + return null; } - public void append(TxnRequest message, Executor executor, AsyncWriteCallback callback) + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - Key key = new Key(message.txnId, Type.fromMsgType(message.type())); - journal.asyncWrite(key, message, SENTINEL_HOSTS, executor, callback); + // TODO (expected) + return true; } - public TxnRequest read(TxnId txnId, Type type) + void appendAuxiliaryRecord(AuxiliaryRecord record) { - Key key = new Key(txnId, type); - return journal.read(key); + Key key = new Key(record.timestamp, record.type()); + journal.write(key, record, SENTINEL_HOSTS); } - PreAccept readPreAccept(TxnId txnId) + public void appendMessage(Message message, Executor executor, AsyncWriteCallback callback) { - return (PreAccept) read(txnId, Type.PREACCEPT_REQ); + Type type = Type.fromMessageType(message.type()); + Key key = new Key(type.txnId(message), type); + journal.asyncWrite(key, message, SENTINEL_HOSTS, executor, callback); } - Accept readAccept(TxnId txnId) + @VisibleForTesting + public void appendMessageBlocking(Message message) { - return (Accept) read(txnId, Type.ACCEPT_REQ); + Type type = Type.fromMessageType(message.type()); + Key key = new Key(type.txnId(message), type); + journal.write(key, message, SENTINEL_HOSTS); } - Commit readCommit(TxnId txnId) + @VisibleForTesting + public M readMessage(TxnId txnId, Type type, Class clazz) { - return (Commit) read(txnId, Type.COMMIT_REQ); + return clazz.cast(journal.readFirst(new Key(txnId, type))); } - Apply readApply(TxnId txnId) + private M readMessage(TxnId txnId, Type type, Class clazz, Predicate condition) { - return (Apply) read(txnId, Type.APPLY_REQ); + return clazz.cast(journal.readFirstMatching(new Key(txnId, type), condition)); } static class Key { - final TxnId txnId; + final Timestamp timestamp; final Type type; - Key(TxnId txnId, Type type) + Key(Timestamp timestamp, Type type) { - this.txnId = txnId; + this.timestamp = timestamp; this.type = type; } /** * Support for (de)serializing and comparing record keys. *

    - * Implements its own serialization and comparison for {@link TxnId} to satisty + * Implements its own serialization and comparison for {@link Timestamp} to satisty * {@link KeySupport} contract - puts hybrid logical clock ahead of epoch - * when ordering txn ids. This is done for more precise elimination of candidate + * when ordering timestamps. This is done for more precise elimination of candidate * segments by min/max record key in segment. */ static final KeySupport SUPPORT = new KeySupport() @@ -198,79 +247,86 @@ static class Key private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; @Override - public int serializedSize(int version) + public int serializedSize(int userVersion) { - return LONG_SIZE // txnId.hlc() - + 6 // txnId.epoch() - + 2 // txnId.flags() - + INT_SIZE // txnId.node + return LONG_SIZE // timestamp.hlc() + + 6 // timestamp.epoch() + + 2 // timestamp.flags() + + INT_SIZE // timestamp.node + BYTE_SIZE; // type } @Override - public void serialize(Key key, DataOutputPlus out, int version) throws IOException + public void serialize(Key key, DataOutputPlus out, int userVersion) throws IOException { - serializeTxnId(key.txnId, out); + serializeTimestamp(key.timestamp, out); out.writeByte(key.type.id); } - private void serializeTxnId(TxnId txnId, DataOutputPlus out) throws IOException + private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException { - out.writeLong(txnId.hlc()); - out.writeLong(epochAndFlags(txnId)); - out.writeInt(txnId.node.id); + out.writeLong(timestamp.hlc()); + out.writeLong(epochAndFlags(timestamp)); + out.writeInt(timestamp.node.id); + } + + private void serialize(Key key, byte[] out) + { + serializeTimestamp(key.timestamp, out); + out[20] = (byte) (key.type.id & 0xFF); + } + + private void serializeTimestamp(Timestamp timestamp, byte[] out) + { + ByteArrayUtil.putLong(out, 0, timestamp.hlc()); + ByteArrayUtil.putLong(out, 8, epochAndFlags(timestamp)); + ByteArrayUtil.putInt(out, 16, timestamp.node.id); } @Override - public Key deserialize(DataInputPlus in, int version) throws IOException + public Key deserialize(DataInputPlus in, int userVersion) throws IOException { - TxnId txnId = deserializeTxnId(in); + Timestamp timestamp = deserializeTimestamp(in); int type = in.readByte(); - return new Key(txnId, Type.fromId(type)); + return new Key(timestamp, Type.fromId(type)); } - private TxnId deserializeTxnId(DataInputPlus in) throws IOException + private Timestamp deserializeTimestamp(DataInputPlus in) throws IOException { long hlc = in.readLong(); long epochAndFlags = in.readLong(); int nodeId = in.readInt(); - return TxnId.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); } @Override - public Key deserialize(ByteBuffer buffer, int position, int version) + public Key deserialize(ByteBuffer buffer, int position, int userVersion) { - TxnId txnId = deserializeTxnId(buffer, position); + Timestamp timestamp = deserializeTimestamp(buffer, position); int type = buffer.get(position + TYPE_OFFSET); - return new Key(txnId, Type.fromId(type)); + return new Key(timestamp, Type.fromId(type)); } - private TxnId deserializeTxnId(ByteBuffer buffer, int position) + private Timestamp deserializeTimestamp(ByteBuffer buffer, int position) { long hlc = buffer.getLong(position + HLC_OFFSET); long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); int nodeId = buffer.getInt(position + NODE_OFFSET); - return TxnId.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); } @Override - public void updateChecksum(Checksum crc, Key key, int version) + public void updateChecksum(Checksum crc, Key key, int userVersion) { - updateChecksum(crc, key.txnId); - crc.update(key.type.id & 0xFF); - } - - private void updateChecksum(Checksum crc, TxnId txnId) - { - updateChecksumLong(crc, txnId.hlc()); - updateChecksumLong(crc, epochAndFlags(txnId)); - updateChecksumInt(crc, txnId.node.id); + byte[] out = keyCRCBytes.get(); + serialize(key, out); + crc.update(out, 0, out.length); } @Override - public int compareWithKeyAt(Key k, ByteBuffer buffer, int position, int version) + public int compareWithKeyAt(Key k, ByteBuffer buffer, int position, int userVersion) { - int cmp = compareWithTxnIdAt(k.txnId, buffer, position); + int cmp = compareWithTimestampAt(k.timestamp, buffer, position); if (cmp != 0) return cmp; byte type = buffer.get(position + TYPE_OFFSET); @@ -278,40 +334,40 @@ public int compareWithKeyAt(Key k, ByteBuffer buffer, int position, int version) return cmp; } - private int compareWithTxnIdAt(TxnId txnId, ByteBuffer buffer, int position) + private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int position) { long hlc = buffer.getLong(position + HLC_OFFSET); - int cmp = Long.compareUnsigned(txnId.hlc(), hlc); + int cmp = Long.compareUnsigned(timestamp.hlc(), hlc); if (cmp != 0) return cmp; long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); - cmp = Long.compareUnsigned(epochAndFlags(txnId), epochAndFlags); + cmp = Long.compareUnsigned(epochAndFlags(timestamp), epochAndFlags); if (cmp != 0) return cmp; int nodeId = buffer.getInt(position + NODE_OFFSET); - cmp = Integer.compareUnsigned(txnId.node.id, nodeId); + cmp = Integer.compareUnsigned(timestamp.node.id, nodeId); return cmp; } @Override public int compare(Key k1, Key k2) { - int cmp = compare(k1.txnId, k2.txnId); + int cmp = compare(k1.timestamp, k2.timestamp); if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); return cmp; } - private int compare(TxnId txnId1, TxnId txnId2) + private int compare(Timestamp timestamp1, Timestamp timestamp2) { - int cmp = Long.compareUnsigned(txnId1.hlc(), txnId2.hlc()); - if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(txnId1), epochAndFlags(txnId2)); - if (cmp == 0) cmp = Integer.compareUnsigned(txnId1.node.id, txnId2.node.id); + int cmp = Long.compareUnsigned(timestamp1.hlc(), timestamp2.hlc()); + if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(timestamp1), epochAndFlags(timestamp2)); + if (cmp == 0) cmp = Integer.compareUnsigned(timestamp1.node.id, timestamp2.node.id); return cmp; } - private long epochAndFlags(TxnId txnId) + private long epochAndFlags(Timestamp timestamp) { - return (txnId.epoch() << 16) | (long) txnId.flags(); + return (timestamp.epoch() << 16) | (long) timestamp.flags(); } private long epoch(long epochAndFlags) @@ -335,43 +391,88 @@ public boolean equals(Object other) boolean equals(Key other) { - return this.type == other.type && this.txnId.equals(other.txnId); + return this.type == other.type && this.timestamp.equals(other.timestamp); } @Override public int hashCode() { - return type.hashCode() + 31 * txnId.hashCode(); + return type.hashCode() + 31 * timestamp.hashCode(); } @Override public String toString() { - return "Key{" + txnId + ", " + type + '}'; + return "Key{" + timestamp + ", " + type + '}'; } } - static final ValueSerializer> MESSAGE_SERIALIZER = new ValueSerializer>() + static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer() { @Override - public int serializedSize(TxnRequest message, int version) + public int serializedSize(Key key, Object record, int userVersion) { - return Ints.checkedCast(Type.ofMessage(message).serializedSize(message, version)); + return Ints.checkedCast(key.type.serializedSize(key, record, userVersion)); } @Override - public void serialize(TxnRequest message, DataOutputPlus out, int version) throws IOException + public void serialize(Key key, Object record, DataOutputPlus out, int userVersion) throws IOException { - Type.ofMessage(message).serialize(message, out, version); + key.type.serialize(key, record, out, userVersion); } @Override - public TxnRequest deserialize(Key key, DataInputPlus in, int version) throws IOException + public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException { - return key.type.deserialize(in, version); + return key.type.deserialize(key, in, userVersion); } }; + /* Adapts vanilla message serializers to journal-expected signatures; converts user version to MS version */ + static final class MessageSerializer implements ValueSerializer + { + final IVersionedSerializer wrapped; + + private MessageSerializer(IVersionedSerializer wrapped) + { + this.wrapped = wrapped; + } + + static MessageSerializer wrap(IVersionedSerializer wrapped) + { + return new MessageSerializer(wrapped); + } + + @Override + public int serializedSize(Key key, Object message, int userVersion) + { + return Ints.checkedCast(wrapped.serializedSize((Message) message, msVersion(userVersion))); + } + + @Override + public void serialize(Key key, Object message, DataOutputPlus out, int userVersion) throws IOException + { + wrapped.serialize((Message) message, out, msVersion(userVersion)); + } + + @Override + public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException + { + return wrapped.deserialize(in, msVersion(userVersion)); + } + } + + @FunctionalInterface + interface TxnIdProvider + { + TxnId txnId(Message message); + } + + private static final TxnIdProvider EPOCH = msg -> ((AbstractEpochRequest) msg).txnId; + private static final TxnIdProvider TXN = msg -> ((TxnRequest) msg).txnId; + private static final TxnIdProvider LOCAL = msg -> ((LocalMessage) msg).primaryTxnId(); + private static final TxnIdProvider INVL = msg -> ((Commit.Invalidate) msg).primaryTxnId(); + /** * Accord Message type - consequently the kind of persisted record. *

    @@ -380,18 +481,51 @@ public TxnRequest deserialize(Key key, DataInputPlus in, int version) throws * 2. It's persisted in the record key, so has the additional constraint of being fixed size and * shouldn't be using varint encoding */ - public enum Type implements IVersionedSerializer> + public enum Type implements ValueSerializer { - PREACCEPT_REQ (0, MessageType.PRE_ACCEPT_REQ, PreacceptSerializers.request), - ACCEPT_REQ (1, MessageType.ACCEPT_REQ, AcceptSerializers.request ), - COMMIT_REQ (2, MessageType.COMMIT_REQ, CommitSerializers.request ), - APPLY_REQ (3, MessageType.APPLY_REQ, ApplySerializers.request ); + /* Auxiliary journal records */ + REPLAY (0, ReplayRecord.SERIALIZER), + + /* Accord protocol requests */ + PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), + ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), + ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), + COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL), + APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), + APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), + BEGIN_RECOVER (72, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), + BEGIN_INVALIDATE (73, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), + INFORM_OF_TXN (74, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), + INFORM_DURABLE (75, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), + SET_SHARD_DURABLE (76, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), + SET_GLOBALLY_DURABLE (77, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), + + /* Accord local messages */ + PROPAGATE_PRE_ACCEPT (78, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_COMMIT (79, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_APPLY (80, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_OTHER (81, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), + ; final int id; - final MessageType msgType; - final IVersionedSerializer> serializer; + final MessageType type; + final TxnIdProvider txnIdProvider; + final ValueSerializer serializer; + + Type(int id, ValueSerializer serializer) + { + this(id, null, serializer, null); + } - Type(int id, MessageType msgType, IVersionedSerializer> serializer) + Type(int id, MessageType type, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) + { + //noinspection unchecked + this(id, type, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); + } + + Type(int id, MessageType type, ValueSerializer serializer, TxnIdProvider txnIdProvider) { if (id < 0) throw new IllegalArgumentException("Negative Type id " + id); @@ -399,10 +533,10 @@ public enum Type implements IVersionedSerializer> throw new IllegalArgumentException("Type id doesn't fit in a single byte: " + id); this.id = id; - this.msgType = msgType; - + this.type = type; //noinspection unchecked - this.serializer = (IVersionedSerializer>) serializer; + this.serializer = (ValueSerializer) serializer; + this.txnIdProvider = txnIdProvider; } private static final Type[] idToTypeMapping; @@ -416,7 +550,6 @@ public enum Type implements IVersionedSerializer> for (Type type : types) maxId = Math.max(type.id, maxId); - Type[] idToType = new Type[maxId + 1]; for (Type type : types) { @@ -429,8 +562,8 @@ public enum Type implements IVersionedSerializer> EnumMap msgTypeToType = new EnumMap<>(MessageType.class); for (Type type : types) { - if (null != msgTypeToType.put(type.msgType, type)) - throw new IllegalStateException("Duplicate MessageType " + type.msgType); + if (null != type.type && null != msgTypeToType.put(type.type, type)) + throw new IllegalStateException("Duplicate MessageType " + type.type); } msgTypeToTypeMap = msgTypeToType; } @@ -445,7 +578,7 @@ static Type fromId(int id) return type; } - static Type fromMsgType(MessageType msgType) + static Type fromMessageType(MessageType msgType) { Type type = msgTypeToTypeMap.get(msgType); if (null == type) @@ -453,47 +586,290 @@ static Type fromMsgType(MessageType msgType) return type; } - static Type ofMessage(TxnRequest request) + @Override + public int serializedSize(Key key, Object record, int userVersion) { - return fromMsgType(request.type()); + return serializer.serializedSize(key, record, userVersion); } - static boolean mustMakeDurable(TxnRequest message) + @Override + public void serialize(Key key, Object record, DataOutputPlus out, int userVersion) throws IOException { - return message.type().hasSideEffects; + serializer.serialize(key, record, out, userVersion); } @Override - public void serialize(TxnRequest request, DataOutputPlus out, int version) throws IOException + public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException { - serializer.serialize(request, out, msVersion(version)); + return serializer.deserialize(key, in, userVersion); } - @Override - public TxnRequest deserialize(DataInputPlus in, int version) throws IOException + TxnId txnId(Message message) { - return serializer.deserialize(in, msVersion(version)); + return txnIdProvider.txnId(message); } + } - @Override - public long serializedSize(TxnRequest request, int version) + static + { + // make noise early if we forget to update our version mappings + Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50); + } + + private static int msVersion(int version) + { + switch (version) { - return serializer.serializedSize(request, msVersion(version)); + default: throw new IllegalArgumentException(); + case 1: return MessagingService.VERSION_50; } + } - static + static abstract class AuxiliaryRecord + { + final Timestamp timestamp; + + AuxiliaryRecord(Timestamp timestamp) { - // make noise early if we forget to update our version mappings - Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50); + this.timestamp = timestamp; } - private static int msVersion(int version) + abstract Type type(); + } + + /* + * Placeholder for future record. + */ + static final class ReplayRecord extends AuxiliaryRecord + { + ReplayRecord(Timestamp timestamp) { - switch (version) + super(timestamp); + } + + @Override + Type type() + { + return Type.REPLAY; + } + + static final ValueSerializer SERIALIZER = new ValueSerializer() + { + @Override + public int serializedSize(Key key, ReplayRecord record, int userVersion) + { + return 0; + } + + @Override + public void serialize(Key key, ReplayRecord record, DataOutputPlus out, int userVersion) { - default: throw new IllegalArgumentException(); - case 1: return MessagingService.VERSION_50; } + + @Override + public ReplayRecord deserialize(Key key, DataInputPlus in, int userVersion) + { + return new ReplayRecord(key.timestamp); + } + }; + } + + SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) + { + return LOG_MESSAGE_PROVIDER ? new LoggingMessageProvider(txnId, new MessageProvider(txnId)) : new MessageProvider(txnId); + } + + final class MessageProvider implements SerializerSupport.MessageProvider + { + final TxnId txnId; + + private MessageProvider(TxnId txnId) + { + this.txnId = txnId; + } + + @Override + public Set test(Set messages) + { + Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); + for (MessageType message : messages) + keys.add(new Key(txnId, Type.fromMessageType(message))); + Set presentKeys = journal.test(keys); + EnumSet presentMessages = EnumSet.noneOf(MessageType.class); + for (Key key : presentKeys) + presentMessages.add(key.type.type); + return presentMessages; + } + + @Override + public PreAccept preAccept() + { + return readMessage(txnId, Type.PRE_ACCEPT, PreAccept.class); + } + + @Override + public BeginRecovery beginRecover() + { + return readMessage(txnId, Type.BEGIN_RECOVER, BeginRecovery.class); + } + + @Override + public Propagate propagatePreAccept() + { + return readMessage(txnId, Type.PROPAGATE_PRE_ACCEPT, Propagate.class); + } + + @Override + public Accept accept(Ballot ballot) + { + return readMessage(txnId, Type.ACCEPT, Accept.class, (accept) -> ((Accept) accept).ballot.equals(ballot)); + } + + @Override + public Commit commitMinimal() + { + return readMessage(txnId, Type.COMMIT_MINIMAL, Commit.class); + } + + @Override + public Commit commitMaximal() + { + return readMessage(txnId, Type.COMMIT_MAXIMAL, Commit.class); + } + + @Override + public Propagate propagateCommit() + { + return readMessage(txnId, Type.PROPAGATE_COMMIT, Propagate.class); + } + + @Override + public Apply applyMinimal() + { + return readMessage(txnId, Type.APPLY_MINIMAL, Apply.class); + } + + @Override + public Apply applyMaximal() + { + return readMessage(txnId, Type.APPLY_MAXIMAL, Apply.class); + } + + @Override + public Propagate propagateApply() + { + return readMessage(txnId, Type.PROPAGATE_APPLY, Propagate.class); + } + } + + final class LoggingMessageProvider implements SerializerSupport.MessageProvider + { + private final TxnId txnId; + private final MessageProvider provider; + + LoggingMessageProvider(TxnId txnId, MessageProvider provider) + { + this.txnId = txnId; + this.provider = provider; + } + + @Override + public Set test(Set messages) + { + logger.debug("Checking {} messages for {}", messages, txnId); + Set confirmed = provider.test(messages); + logger.debug("Confirmed {} messages for {}", confirmed, txnId); + return confirmed; + } + + @Override + public PreAccept preAccept() + { + logger.debug("Fetching {} message for {}", PRE_ACCEPT_REQ, txnId); + PreAccept preAccept = provider.preAccept(); + logger.debug("Fetched {} message for {}: {}", PRE_ACCEPT_REQ, txnId, preAccept); + return preAccept; + } + + @Override + public BeginRecovery beginRecover() + { + logger.debug("Fetching {} message for {}", BEGIN_RECOVER_REQ, txnId); + BeginRecovery beginRecover = provider.beginRecover(); + logger.debug("Fetched {} message for {}: {}", BEGIN_RECOVER_REQ, txnId, beginRecover); + return beginRecover; + } + + @Override + public Propagate propagatePreAccept() + { + logger.debug("Fetching {} message for {}", PROPAGATE_PRE_ACCEPT_MSG, txnId); + Propagate propagate = provider.propagatePreAccept(); + logger.debug("Fetched {} message for {}: {}", PROPAGATE_PRE_ACCEPT_MSG, txnId, propagate); + return propagate; + } + + @Override + public Accept accept(Ballot ballot) + { + logger.debug("Fetching {} message (with accepted: {}) for {}", ACCEPT_REQ, ballot, txnId); + Accept accept = provider.accept(ballot); + logger.debug("Fetched {} message (with accepted: {}) for {}: {}", ACCEPT_REQ, ballot, txnId, accept); + return accept; + } + + @Override + public Commit commitMinimal() + { + logger.debug("Fetching {} message for {}", COMMIT_MINIMAL_REQ, txnId); + Commit commit = provider.commitMinimal(); + logger.debug("Fetched {} message for {}: {}", COMMIT_MINIMAL_REQ, txnId, commit); + return commit; + } + + @Override + public Commit commitMaximal() + { + logger.debug("Fetching {} message for {}", COMMIT_MAXIMAL_REQ, txnId); + Commit commit = provider.commitMaximal(); + logger.debug("Fetched {} message for {}: {}", COMMIT_MAXIMAL_REQ, txnId, commit); + return commit; + } + + @Override + public Propagate propagateCommit() + { + logger.debug("Fetching {} message for {}", PROPAGATE_COMMIT_MSG, txnId); + Propagate propagate = provider.propagateCommit(); + logger.debug("Fetched {} message for {}: {}", PROPAGATE_COMMIT_MSG, txnId, propagate); + return propagate; + } + + @Override + public Apply applyMinimal() + { + logger.debug("Fetching {} message for {}", APPLY_MINIMAL_REQ, txnId); + Apply apply = provider.applyMinimal(); + logger.debug("Fetched {} message for {}: {}", APPLY_MINIMAL_REQ, txnId, apply); + return apply; + } + + @Override + public Apply applyMaximal() + { + logger.debug("Fetching {} message for {}", APPLY_MAXIMAL_REQ, txnId); + Apply apply = provider.applyMaximal(); + logger.debug("Fetched {} message for {}: {}", APPLY_MAXIMAL_REQ, txnId, apply); + return apply; + } + + @Override + public Propagate propagateApply() + { + logger.debug("Fetching {} message for {}", PROPAGATE_APPLY_MSG, txnId); + Propagate propagate = provider.propagateApply(); + logger.debug("Fetched {} message for {}: {}", PROPAGATE_APPLY_MSG, txnId, propagate); + return propagate; } } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 837ad19c0abe..798b529dad92 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -35,21 +35,18 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; -import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.api.Result; import accord.impl.CommandTimeseries; import accord.impl.CommandsForKey; import accord.local.Command; @@ -61,19 +58,18 @@ import accord.local.Node; import accord.local.RedundantBefore; import accord.local.SaveStatus; +import accord.local.SerializerSupport; +import accord.local.SerializerSupport.MessageProvider; +import accord.local.SerializerSupport.WaitingOnProvider; import accord.local.Status; import accord.local.Status.Durability; import accord.primitives.Ballot; -import accord.primitives.Deps; -import accord.primitives.PartialDeps; -import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.primitives.Writes; import accord.topology.Topology; import accord.utils.Invariants; import accord.utils.ReducingRangeMap; @@ -144,12 +140,10 @@ import org.apache.cassandra.serializers.UUIDSerializer; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.AccordConfigurationService.SyncStatus; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; -import org.apache.cassandra.service.accord.serializers.DepsSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; @@ -157,6 +151,7 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.btree.BTree; +import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static accord.utils.Invariants.checkArgument; @@ -239,17 +234,14 @@ static TokenType valueOf(Token token) "accord commands", "CREATE TABLE %s (" + "store_id int," - + "domain int," // this is stored as part of txn_id, used currently for more cheaper scans of the table + + "domain int," // this is stored as part of txn_id, used currently for cheaper scans of the table + format("txn_id %s,", TIMESTAMP_TUPLE) + "status int," + "route blob," + "durability int," - + "txn blob," + format("execute_at %s,", TIMESTAMP_TUPLE) + format("promised_ballot %s,", TIMESTAMP_TUPLE) + format("accepted_ballot %s,", TIMESTAMP_TUPLE) - + "dependencies blob," - + "writes blob," + "waiting_on blob," + "listeners set, " + "PRIMARY KEY((store_id, domain, txn_id))" @@ -261,10 +253,6 @@ static TokenType valueOf(Token token) private static class LocalVersionedSerializers { static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); - static final LocalVersionedSerializer routingKey = localSerializer(AccordRoutingKey.serializer); - static final LocalVersionedSerializer partialTxn = localSerializer(CommandSerializers.partialTxn); - static final LocalVersionedSerializer partialDeps = localSerializer(DepsSerializer.partialDeps); - static final LocalVersionedSerializer writes = localSerializer(CommandSerializers.writes); static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); @@ -296,27 +284,18 @@ public static class CommandsColumns public static final ColumnMetadata status = getColumn(Commands, "status"); public static final ColumnMetadata route = getColumn(Commands, "route"); public static final ColumnMetadata durability = getColumn(Commands, "durability"); - static final ColumnMetadata txn = getColumn(Commands, "txn"); public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); - static final ColumnMetadata dependencies = getColumn(Commands, "dependencies"); - static final ColumnMetadata writes = getColumn(Commands, "writes"); static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); - public static ColumnMetadata[][] TRUNCATE_FIELDS = new ColumnMetadata[][] { - new ColumnMetadata[] { durability, execute_at, route, status }, - new ColumnMetadata[] { durability, execute_at, route, status, writes }, - }; + public static final ColumnMetadata[] TRUNCATE_FIELDS = new ColumnMetadata[] { durability, execute_at, route, status }; static { - for (ColumnMetadata[] cds : TRUNCATE_FIELDS) - { - for (int i = 1 ; i < cds.length ; ++i) - checkState(cds[i - 1].compareTo(cds[i]) < 0); - } + for (int i = 1 ; i < TRUNCATE_FIELDS.length ; ++i) + Invariants.checkState(TRUNCATE_FIELDS[i - 1].compareTo(TRUNCATE_FIELDS[i]) < 0); } } @@ -369,37 +348,24 @@ public static Status.Durability getDurability(Row row) @Nullable public static Route getRoute(Row row) { - Cell cell = row.getCell(route); - if (cell == null) - return null; - try - { - return deserializeOrNull(cell.buffer(), LocalVersionedSerializers.route); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + Cell cell = row.getCell(route); + return deserializeRouteOrNull(cell); } - private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, Cell routeCell, @Nullable Cell writesCell, boolean updateTimestamps) + private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean updateTimestamps) { checkArgument(durabilityCell.column() == CommandsColumns.durability); checkArgument(executeAtCell.column() == CommandsColumns.execute_at); checkArgument(routeCell.column() == CommandsColumns.route); - checkArgument(writesCell == null || writesCell.column() == CommandsColumns.writes); - boolean includeOutcome = writesCell != null; - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[includeOutcome ? 1 : 0].length); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS.length); int colIndex = 0; newLeaf[colIndex++] = updateTimestamps ? durabilityCell.withUpdatedTimestamp(newTimestamp) : durabilityCell; newLeaf[colIndex++] = updateTimestamps ? executeAtCell.withUpdatedTimestamp(newTimestamp) : executeAtCell; newLeaf[colIndex++] = updateTimestamps ? routeCell.withUpdatedTimestamp(newTimestamp) : routeCell; // Status always needs to use the new timestamp since we are replacing the existing value // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion + //noinspection UnusedAssignment newLeaf[colIndex++] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); - if (includeOutcome) - //noinspection UnusedAssignment - newLeaf[colIndex++] = updateTimestamps ? writesCell.withUpdatedTimestamp(newTimestamp) : writesCell; return newLeaf; } @@ -410,8 +376,6 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe checkArgument(routeCell.column() == CommandsColumns.route); long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); long newTimestamp = oldTimestamp + 1; - Cell writesCell = withOutcome ? row.getCell(CommandsColumns.writes) : null; - // If durability is not universal we don't want to delete older versions of the row that might have recorded // a higher durability value. maybeDropTruncatedCommandColumns will take care of dropping things even if we don't drop via tombstones. // durability should be the only column that could have an older value that is insufficient for propagating forward @@ -419,10 +383,10 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe // We may not have what we need to generate a deletion and include the outcome in the truncated row // so need to wait until we can have the outcome to issue the deletion otherwise it would be shadowed and lost - if (withOutcome && writesCell == null) + if (withOutcome) doDeletion = false; - Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, routeCell, writesCell, doDeletion); + Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, routeCell, doDeletion); // Including a deletion allows future compactions to drop data before it gets to the purger // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns @@ -431,7 +395,7 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), deletion, newLeaf); } - public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) + public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) { checkArgument(durabilityCell.column() == CommandsColumns.durability); checkArgument(executeAtCell.column() == CommandsColumns.execute_at); @@ -441,31 +405,19 @@ public static Row maybeDropTruncatedCommandColumns(Row row, boolean withOutcome, // If it's the exact length of the post truncate column count without outcome fields // then it is exactly the columns needed for getting this far and withOutcome doesn't matter since // nothing additional is available to include anyway - if (colCount == TRUNCATE_FIELDS[0].length) - return row; - - Cell writesCell = row.getCell(CommandsColumns.writes); - // This has just the columns needed for truncation with outcome so return it unmodified - if (colCount == TRUNCATE_FIELDS[1].length && withOutcome) + if (colCount == TRUNCATE_FIELDS.length) return row; // Construct a replacement with just the available columns that are still needed - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS[withOutcome ? 1 : 0].length); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS.length); int colIndex = 0; newLeaf[colIndex++] = durabilityCell; newLeaf[colIndex++] = executeAtCell; newLeaf[colIndex++] = routeCell; + //noinspection UnusedAssignment newLeaf[colIndex++] = statusCell; - if (withOutcome && writesCell != null) - //noinspection UnusedAssignment - newLeaf[colIndex++] = writesCell; - return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), row.deletion(), newLeaf); - } - - public static Writes getWrites(Row row) throws IOException - { - return deserializeWithVersionOr(row, writes, LocalVersionedSerializers.writes, () -> null); + return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), row.deletion(), newLeaf); } } @@ -724,53 +676,6 @@ private static T deserialize(ByteBuffer bytes, LocalVersionedSerializer s } } - private static T deserializeOrNull(ByteBuffer bytes, LocalVersionedSerializer serializer) throws IOException - { - return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, serializer) : null; - } - - private static WaitingOn deserializeWaitingOn(@Nullable Deps deps, @Nullable ByteBuffer bytes) throws IOException - { - if (bytes == null || !bytes.hasRemaining()) - return deps == null ? WaitingOn.EMPTY : WaitingOn.none(deps); - - return WaitingOnSerializer.deserialize(deps, new DataInputBuffer(bytes, false)); - } - - private static ImmutableSortedSet deserializeTimestampSet(Set serialized, TimestampFactory timestampFactory) - { - if (serialized == null || serialized.isEmpty()) - return ImmutableSortedSet.of(); - - List result = new ArrayList<>(serialized.size()); - for (ByteBuffer bytes : serialized) - result.add(deserializeTimestampOrNull(bytes, timestampFactory)); - - return ImmutableSortedSet.copyOf(result); - } - - private static ImmutableSortedSet deserializeTxnIdNavigableSet(UntypedResultSet.Row row, String name) - { - return deserializeTimestampSet(row.getSet(name, BytesType.instance), TxnId::fromBits); - } - - private static Listeners.Immutable deserializeListeners(Set serialized) throws IOException - { - if (serialized == null || serialized.isEmpty()) - return Listeners.Immutable.EMPTY; - Listeners result = new Listeners(); - for (ByteBuffer bytes : serialized) - { - result.add(deserialize(bytes, LocalVersionedSerializers.listeners)); - } - return new Listeners.Immutable(result); - } - - private static Listeners.Immutable deserializeListeners(UntypedResultSet.Row row, String name) throws IOException - { - return deserializeListeners(row.getSet(name, BytesType.instance)); - } - private interface SerializeFunction { ByteBuffer apply(V v) throws IOException; @@ -799,11 +704,6 @@ private static void addCellIfModified(ColumnMetadata colu addCellIfModified(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, nowInSeconds, original, command); } - private static void addKeyCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException - { - addCellIfModified(column, get, v -> serializeOrNull((AccordRoutingKey) v, LocalVersionedSerializers.routingKey), builder, timestampMicros, nowInSeconds, original, command); - } - private static > void addEnumCellIfModified(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException { // TODO: convert to byte arrays @@ -886,21 +786,14 @@ public static Mutation getCommandMutation(int storeId, Command original, Command int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(timestampMicros, nowInSeconds)); - addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.txn, Command::partialTxn, LocalVersionedSerializers.partialTxn, builder, timestampMicros, nowInSeconds, original, command); - + addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); + addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); + addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.dependencies, Command::partialDeps, LocalVersionedSerializers.partialDeps, builder, timestampMicros, nowInSeconds, original, command); - - addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); - - addCellIfModified(CommandsColumns.writes, Command::writes, v -> serialize(v, LocalVersionedSerializers.writes), builder, timestampMicros, nowInSeconds, original, command); - // TODO review this is just to work around Truncated not being committed but having a status after committed // so status claims it is committed. if (!command.isTruncated() && command.isCommitted()) @@ -1018,37 +911,6 @@ public static SaveStatus deserializeSaveStatusOrNull(Cell cell) return cell == null ? null : CommandSerializers.saveStatus.forOrdinal(cell.accessor().getInt(cell.value(), 0)); } - public static Route deserializeRouteOrNull(Cell cell) - { - if (cell == null) - return null; - try - { - return deserializeOrNull(cell.buffer(), LocalVersionedSerializers.route); - } - catch (IOException e) - { - throw new RuntimeException(e); - } - } - - private static T deserializeWithVersionOr(UntypedResultSet.Row row, String dataColumn, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException - { - if (!row.has(dataColumn)) - return defaultSupplier.get(); - - return deserialize(row.getBlob(dataColumn), serializer); - } - - private static T deserializeWithVersionOr(Row row, ColumnMetadata metadata, LocalVersionedSerializer serializer, Supplier defaultSupplier) throws IOException - { - Cell cell = row.getCell(metadata); - if (cell == null) - return defaultSupplier.get(); - - return deserialize(cell.buffer(), serializer); - } - public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId txnId) { String cql = "SELECT * FROM %s.%s " + @@ -1207,7 +1069,7 @@ public static void findAllKeysBetween(int commandStore, AccordKeyspace.serializeToken(start), startInclusive, AccordKeyspace.serializeToken(end), endInclusive, ImmutableSet.of("key"), - Stage.READ.executor(), Observable.distinct(callback).map(value -> AccordKeyspace.deserializeKey(value))); + Stage.READ.executor(), Observable.distinct(callback).map(AccordKeyspace::deserializeKey)); work.schedule(); } @@ -1268,93 +1130,130 @@ protected UntypedResultSet query(UntypedResultSet.Row lastSeen) public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) { commandStore.checkNotInStoreThread(); + return unsafeLoadCommand(commandStore, txnId); + } + static Command unsafeLoadCommand(AccordCommandStore commandStore, TxnId txnId) + { UntypedResultSet rows = loadCommandRow(commandStore, txnId); - if (rows.isEmpty()) - { return null; - } + UntypedResultSet.Row row = rows.one(); try { - UntypedResultSet.Row row = rows.one(); checkState(deserializeTxnId(row).equals(txnId)); + + CommonAttributes.Mutable attrs = + new CommonAttributes.Mutable(txnId) + .durability(deserializeDurability(row)) + .route(deserializeRouteOrNull(row)) + .setListeners(deserializeListeners(row)); SaveStatus status = deserializeStatus(row); - CommonAttributes.Mutable attributes = new CommonAttributes.Mutable(txnId); - // TODO: something less brittle than ordinal, more efficient than values() - attributes.durability(Status.Durability.values()[row.getInt("durability", 0)]); - attributes.route(deserializeOrNull(row.getBlob("route"), LocalVersionedSerializers.route)); - attributes.partialTxn(deserializeTxn(row)); - PartialDeps deps = deserializeDependencies(row); - attributes.partialDeps(deps); - attributes.setListeners(deserializeListeners(row, "listeners")); - WaitingOn waitingOn = deserializeWaitingOn(deps, row.getBlob("waiting_on")); - - Timestamp executeAt = deserializeExecuteAt(row); - Ballot promised = deserializeTimestampOrNull(row, "promised_ballot", Ballot::fromBits); - Ballot accepted = deserializeTimestampOrNull(row, "accepted_ballot", Ballot::fromBits); - Writes writes = deserializeWithVersionOr(row, "writes", LocalVersionedSerializers.writes, () -> null); - - switch (status.status) - { - case NotDefined: - return Command.SerializerSupport.notDefined(attributes, promised); - case PreAccepted: - return Command.SerializerSupport.preaccepted(attributes, executeAt, promised); - case AcceptedInvalidate: - case Accepted: - case PreCommitted: - return Command.SerializerSupport.accepted(attributes, status, executeAt, promised, accepted); - case Committed: - case ReadyToExecute: - return Command.SerializerSupport.committed(attributes, status, executeAt, promised, accepted, waitingOn); - case PreApplied: - case Applied: - return Command.SerializerSupport.executed(attributes, status, executeAt, promised, accepted, waitingOn, writes, Result.APPLIED); - case Truncated: - return Command.SerializerSupport.truncatedApply(attributes, status, executeAt, writes, Result.APPLIED); - case Invalidated: - return Command.SerializerSupport.invalidated(txnId, attributes.durableListeners()); - default: - throw new IllegalStateException("Unhandled status " + status); - } - } - catch (IOException e) - { - logger.error("Exception loading AccordCommand " + txnId, e); - throw new RuntimeException(e); + + Timestamp executeAt = deserializeExecuteAtOrNull(row); + Ballot promised = deserializePromisedOrNull(row); + Ballot accepted = deserializeAcceptedOrNull(row); + + WaitingOnProvider waitingOn = deserializeWaitingOn(row); + MessageProvider messages = commandStore.makeMessageProvider(txnId); + + return SerializerSupport.reconstruct(attrs, status, executeAt, promised, accepted, waitingOn, messages); } catch (Throwable t) { logger.error("Exception loading AccordCommand " + txnId, t); - throw t; + throw Throwables.unchecked(t); } } - public static PartialDeps deserializeDependencies(UntypedResultSet.Row row) throws IOException + public static TxnId deserializeTxnId(UntypedResultSet.Row row) + { + return deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits); + } + + public static Status.Durability deserializeDurability(UntypedResultSet.Row row) { - return deserializeOrNull(row.getBlob("dependencies"), LocalVersionedSerializers.partialDeps); + // TODO (performance, expected): something less brittle than ordinal, more efficient than values() + return Status.Durability.values()[row.getInt("durability", 0)]; } - public static Timestamp deserializeExecuteAt(UntypedResultSet.Row row) + private static Route deserializeRouteOrNull(ByteBuffer bytes) throws IOException { - return deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); + return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, LocalVersionedSerializers.route) : null; + } + + private static Route deserializeRouteOrNull(UntypedResultSet.Row row) throws IOException + { + return deserializeRouteOrNull(row.getBlob("route")); + } + + public static Route deserializeRouteOrNull(Cell cell) + { + if (cell == null) + return null; + + try + { + return deserializeRouteOrNull(cell.buffer()); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + private static Listeners.Immutable deserializeListeners(UntypedResultSet.Row row) throws IOException + { + Set serialized = row.getSet("listeners", BytesType.instance); + if (serialized == null || serialized.isEmpty()) + return Listeners.Immutable.EMPTY; + + Listeners result = new Listeners<>(); + for (ByteBuffer bytes : serialized) + result.add(deserialize(bytes, LocalVersionedSerializers.listeners)); + return new Listeners.Immutable(result); } public static SaveStatus deserializeStatus(UntypedResultSet.Row row) { + // TODO (performance, expected): something less brittle than ordinal, more efficient than values() return SaveStatus.values()[row.getInt("status")]; } - public static TxnId deserializeTxnId(UntypedResultSet.Row row) + public static Timestamp deserializeExecuteAtOrNull(UntypedResultSet.Row row) { - return deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits); + return deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); } - public static PartialTxn deserializeTxn(UntypedResultSet.Row row) throws IOException + public static Ballot deserializePromisedOrNull(UntypedResultSet.Row row) { - return deserializeOrNull(row.getBlob("txn"), LocalVersionedSerializers.partialTxn); + return deserializeTimestampOrNull(row.getBlob("promised_ballot"), Ballot::fromBits); + } + + public static Ballot deserializeAcceptedOrNull(UntypedResultSet.Row row) + { + return deserializeTimestampOrNull(row.getBlob("accepted_ballot"), Ballot::fromBits); + } + + private static WaitingOnProvider deserializeWaitingOn(UntypedResultSet.Row row) + { + ByteBuffer bytes = row.getBlob("waiting_on"); + + return (deps) -> + { + if (bytes == null || !bytes.hasRemaining()) + return deps == null ? WaitingOn.EMPTY : WaitingOn.none(deps); + + try + { + return WaitingOnSerializer.deserialize(deps, new DataInputBuffer(bytes, false)); + } + catch (IOException e) + { + throw Throwables.unchecked(e); + } + }; } public static PartitionKey deserializeKey(ByteBuffer buffer) @@ -1517,6 +1416,11 @@ public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, Part public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) { commandStore.checkNotInStoreThread(); + return unsafeLoadCommandsForKey(commandStore, key); + } + + static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 1692e85fe8fe..c51f202d54a9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -66,9 +66,11 @@ private VerbMapping() mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); - mapping.put(MessageType.COMMIT_REQ, Verb.ACCORD_COMMIT_REQ); + mapping.put(MessageType.COMMIT_MINIMAL_REQ, Verb.ACCORD_COMMIT_REQ); + mapping.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); mapping.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); - mapping.put(MessageType.APPLY_REQ, Verb.ACCORD_APPLY_REQ); + mapping.put(MessageType.APPLY_MINIMAL_REQ, Verb.ACCORD_APPLY_REQ); + mapping.put(MessageType.APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); @@ -99,8 +101,15 @@ private VerbMapping() // Any request can receive a generic failure response if (type == MessageType.FAILURE_RSP) continue; - if (!mapping.containsKey(type)) - throw new AssertionError("Missing mapping for Accord MessageType " + type); + + if (mapping.containsKey(type)) + { + if (type.isLocal()) throw new AssertionError("Extraneous mapping for LOCAL Accord MessageType " + type); + } + else + { + if (type.isRemote()) throw new AssertionError("Missing mapping for REMOTE Accord MessageType " + type); + } } } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index b85d0b570454..ae6901118d80 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -41,22 +41,26 @@ import accord.local.NodeTimeService; import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; +import accord.messages.LocalMessage; import accord.messages.Request; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.TopologyManager; import accord.utils.DefaultRandom; import accord.utils.Invariants; +import accord.utils.MapReduceConsume; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import org.agrona.collections.Int2ObjectHashMap; +import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -99,6 +103,7 @@ public class AccordService implements IAccordService, Shutdownable private final AccordConfigurationService configService; private final AccordScheduler scheduler; private final AccordDataStore dataStore; + private final AccordJournal journal; private final AccordVerbHandler verbHandler; private static final IAccordService NOOP_SERVICE = new IAccordService() @@ -221,8 +226,10 @@ private AccordService() this.messageSink = new AccordMessageSink(agent, configService); this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); + this.journal = new AccordJournal(); this.node = new Node(localId, messageSink, + this::handleLocalMessage, configService, AccordService::uniqueNow, NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, AccordService::uniqueNow), @@ -233,14 +240,15 @@ private AccordService() scheduler, SizeOfIntersectionSorter.SUPPLIER, SimpleProgressLog::new, - AccordCommandStores.factory(new AccordJournal().start())); + AccordCommandStores.factory(journal)); this.nodeShutdown = toShutdownable(node); - this.verbHandler = new AccordVerbHandler<>(this.node, configService); + this.verbHandler = new AccordVerbHandler<>(node, configService, journal); } @Override public void startup() { + journal.start(); configService.start(); ClusterMetadataService.instance().log().addListener(configService); } @@ -316,6 +324,37 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) } } + private void handleLocalMessage(LocalMessage message, Node node) + { + if (!message.type().hasSideEffects()) + { + message.process(node); + return; + } + + journal.appendMessage(message, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() + { + @Override + public void run() + { + // TODO (performance, expected): do not retain references to messages beyond a certain total + // cache threshold; in case of flush lagging behind, read the messages from journal and + // deserialize instead before processing, to prevent memory pressure buildup from messages + // pending flush to disk. + message.process(node); + } + + @Override + public void onFailure(Throwable error) + { + if (message instanceof MapReduceConsume) + ((MapReduceConsume) message).accept(null, error); + else + node.agent().onUncaughtException(error); + } + }); + } + private static RuntimeException throwTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) { throw txn.isWrite() ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) @@ -345,13 +384,13 @@ public boolean isTerminated() @Override public void shutdown() { - ExecutorUtils.shutdown(Arrays.asList(scheduler, nodeShutdown)); + ExecutorUtils.shutdown(Arrays.asList(scheduler, nodeShutdown, journal)); } @Override public Object shutdownNow() { - ExecutorUtils.shutdownNow(Arrays.asList(scheduler, nodeShutdown)); + ExecutorUtils.shutdownNow(Arrays.asList(scheduler, nodeShutdown, journal)); return null; } @@ -360,7 +399,7 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted { try { - ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(scheduler, nodeShutdown)); + ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(scheduler, nodeShutdown, journal)); return true; } catch (TimeoutException e) diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 47ac64a11522..15faee4c001e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -39,6 +39,7 @@ import static java.lang.String.format; import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_LOAD; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADED; import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADING; import static org.apache.cassandra.service.accord.AccordCachingState.Status.SAVING; @@ -52,6 +53,16 @@ public class AccordStateCache extends IntrusiveLinkedList node) checkState(!isInQueue(node)); bytesCached -= node.lastQueriedEstimatedSizeOnHeap; + + if (node.status() == LOADED && VALIDATE_LOAD_ON_EVICT) + instanceForNode(node).validateLoadEvicted(node); + if (!node.hasListeners()) { AccordCachingState self = cache.remove(node.key()); @@ -185,10 +200,11 @@ public > Instance instance( Function, S> safeRefFactory, Function loadFunction, BiFunction saveFunction, + BiFunction validateFunction, ToLongFunction heapEstimator) { Instance instance = - new Instance<>(keyClass, safeRefFactory, loadFunction, saveFunction, heapEstimator); + new Instance<>(keyClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator); if (instances.put(realKeyClass, instance) != null) throw new IllegalArgumentException(format("Cache instances for key type %s already exists", realKeyClass.getName())); @@ -202,6 +218,7 @@ public class Instance> private final Function, S> safeRefFactory; private Function loadFunction; private BiFunction saveFunction; + private final BiFunction validateFunction; private final ToLongFunction heapEstimator; private final Stats stats = new Stats(); @@ -210,12 +227,14 @@ public Instance( Function, S> safeRefFactory, Function loadFunction, BiFunction saveFunction, + BiFunction validateFunction, ToLongFunction heapEstimator) { this.keyClass = keyClass; this.safeRefFactory = safeRefFactory; this.loadFunction = loadFunction; this.saveFunction = saveFunction; + this.validateFunction = validateFunction; this.heapEstimator = heapEstimator; } @@ -339,6 +358,16 @@ public void release(S safeRef) maybeEvictSomeNodes(); } + void validateLoadEvicted(AccordCachingState node) + { + @SuppressWarnings("unchecked") + AccordCachingState state = (AccordCachingState) node; + K key = state.key(); + V evicted = state.get(); + if (!validateFunction.apply(key, evicted)) + throw new IllegalStateException("Reloaded value for key " + key + " is not equal to or fuller than evicted value " + evicted); + } + @VisibleForTesting public AccordCachingState getUnsafe(K key) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 5cf7e95aaa4f..27302b73b5af 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -25,6 +25,9 @@ import accord.local.Node; import accord.messages.Request; +import accord.utils.MapReduceConsume; +import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -34,11 +37,13 @@ public class AccordVerbHandler implements IVerbHandler private final Node node; private final AccordEndpointMapper endpointMapper; + private final AccordJournal journal; - public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper) + public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper, AccordJournal journal) { this.node = node; this.endpointMapper = endpointMapper; + this.journal = journal; } @Override @@ -59,6 +64,33 @@ public void doVerb(Message message) throws IOException return; } } - request.process(node, endpointMapper.mappedId(message.from()), message); + + if (!request.type().hasSideEffects()) + { + request.process(node, endpointMapper.mappedId(message.from()), message); + return; + } + + journal.appendMessage(request, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() + { + @Override + public void run() + { + // TODO (performance, expected): do not retain references to messages beyond a certain total + // cache threshold; in case of flush lagging behind, read the messages from journal and + // deserialize instead before processing, to prevent memory pressure buildup from messages + // pending flush to disk. + request.process(node, endpointMapper.mappedId(message.from()), message); + } + + @Override + public void onFailure(Throwable error) + { + if (request instanceof MapReduceConsume) + ((MapReduceConsume) request).accept(null, error); + else + node.agent().onUncaughtException(error); + } + }); } } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 5682df2e3eca..a9413cd8e2ba 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -17,12 +17,15 @@ */ package org.apache.cassandra.service.accord.async; +import java.util.Collections; import java.util.HashMap; import java.util.TreeMap; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; +import javax.annotation.Nullable; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -31,6 +34,7 @@ import accord.local.PreLoadContext; import accord.local.SafeCommandStore; import accord.primitives.RoutableKey; +import accord.primitives.Seekables; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChains; @@ -135,7 +139,7 @@ AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext pr return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()); } - private void callback(Object o, Throwable throwable) + private void onLoaded(Object o, Throwable throwable) { if (throwable != null) { @@ -148,6 +152,11 @@ private void callback(Object o, Throwable throwable) } } + void onUnblocked() + { + commandStore.executor().execute(this); + } + private void state(State state) { this.state = state; @@ -166,11 +175,36 @@ private void finish(R result, Throwable failure) } } + @Nullable + TxnId primaryTxnId() + { + return preLoadContext.primaryTxnId(); + } + + @SuppressWarnings("unchecked") + Iterable keys() + { + Seekables keys = preLoadContext.keys(); + switch (keys.domain()) + { + default: + throw new IllegalStateException("Unhandled domain " + keys.domain()); + case Key: + return (Iterable) keys; + case Range: + // TODO (expected): handle ranges + return Collections.emptyList(); + } + } + private void fail(Throwable throwable) { + commandStore.checkInStoreThread(); Invariants.nonNull(throwable); + if (state.isComplete()) return; + try { switch (state) @@ -183,7 +217,7 @@ private void fail(Throwable throwable) commandStore.abortCurrentOperation(); case LOADING: context.releaseResources(commandStore); - break; + commandStore.executionOrder().unregister(this); case INITIALIZED: break; // nothing to clean up, call callback } @@ -201,13 +235,17 @@ private void fail(Throwable throwable) protected void runInternal() { + Boolean canRun = null; switch (state) { default: throw new IllegalStateException("Unexpected state " + state); case INITIALIZED: + canRun = commandStore.executionOrder().register(this); state(LOADING); case LOADING: - if (!loader.load(context, this::callback)) + if (null == canRun) + canRun = commandStore.executionOrder().canRun(this); + if (!loader.load(context, this::onLoaded) || !canRun) return; state(PREPARING); case PREPARING: @@ -218,6 +256,7 @@ protected void runInternal() safeStore.postExecute(context.commands, context.commandsForKeys); context.releaseResources(commandStore); commandStore.completeOperation(safeStore); + commandStore.executionOrder().unregister(this); state(COMPLETING); case COMPLETING: finish(result, null); diff --git a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java new file mode 100644 index 000000000000..5a3c28ca9488 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord.async; + +import java.util.ArrayDeque; + +import accord.primitives.RoutableKey; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.agrona.collections.Object2ObjectHashMap; + +/** + * Assists with correct ordering of {@link AsyncOperation} execution wrt each other, + * preventing reordering of overlapping operations by {@link AsyncLoader}. + */ +public class ExecutionOrder +{ + private final Object2ObjectHashMap queues = new Object2ObjectHashMap<>(); + + /** + * Register an operation as having a dependency on its keys and TxnIds + * @return true if no other operation depends on the keys or TxnIds, false otherwise + */ + boolean register(AsyncOperation operation) + { + boolean canRun = true; + for (RoutableKey key : operation.keys()) + canRun &= register(key, operation); + TxnId primaryTxnId = operation.primaryTxnId(); + if (null != primaryTxnId) + canRun &= register(primaryTxnId, operation); + return canRun; + } + + /** + * Register an operation as having a dependency on a key or a TxnId + * @return true if no other operation depends on the key/TxnId, false otherwise + */ + private boolean register(Object keyOrTxnId, AsyncOperation operation) + { + Object operationOrQueue = queues.get(keyOrTxnId); + if (null == operationOrQueue) + { + queues.put(keyOrTxnId, operation); + return true; + } + + if (operationOrQueue instanceof AsyncOperation) + { + ArrayDeque> queue = new ArrayDeque<>(4); + queue.add((AsyncOperation) operationOrQueue); + queue.add(operation); + queues.put(keyOrTxnId, queue); + } + else + { + @SuppressWarnings("unchecked") + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + queue.add(operation); + } + return false; + } + + /** + * Unregister the operation as being a dependency for its keys and TxnIds + */ + void unregister(AsyncOperation operation) + { + for (RoutableKey key : operation.keys()) + unregister(key, operation); + TxnId primaryTxnId = operation.primaryTxnId(); + if (null != primaryTxnId) + unregister(primaryTxnId, operation); + } + + /** + * Unregister the operation as being a dependency for key or TxnId + */ + private void unregister(Object keyOrTxnId, AsyncOperation operation) + { + Object operationOrQueue = queues.get(keyOrTxnId); + Invariants.nonNull(operationOrQueue); + + if (operationOrQueue instanceof AsyncOperation) + { + Invariants.checkState(operationOrQueue == operation); + queues.remove(keyOrTxnId); + } + else + { + @SuppressWarnings("unchecked") + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + AsyncOperation head = queue.poll(); + Invariants.checkState(head == operation); + + if (queue.isEmpty()) + { + queues.remove(keyOrTxnId); + } + else + { + head = queue.peek(); + if (canRun(head)) + head.onUnblocked(); + } + } + } + + boolean canRun(AsyncOperation operation) + { + for (RoutableKey key : operation.keys()) + if (!canRun(key, operation)) + return false; + + TxnId primaryTxnId = operation.primaryTxnId(); + return primaryTxnId == null || canRun(primaryTxnId, operation); + } + + private boolean canRun(Object keyOrTxnId, AsyncOperation operation) + { + Object operationOrQueue = queues.get(keyOrTxnId); + Invariants.nonNull(operationOrQueue); + + if (operationOrQueue instanceof AsyncOperation) + { + Invariants.checkState(operationOrQueue == operation); + return true; + } + + @SuppressWarnings("unchecked") + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + return queue.peek() == operation; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 9ad1fcd9531e..e8ccc6258fcd 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -24,10 +24,10 @@ import accord.messages.Apply; import accord.primitives.PartialRoute; import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.utils.NullableSerializer; public class ApplySerializers { @@ -36,21 +36,23 @@ public class ApplySerializers @Override public void serializeBody(Apply apply, DataOutputPlus out, int version) throws IOException { + out.writeBoolean(apply.kind == Apply.Kind.Maximal); KeySerializers.seekables.serialize(apply.keys(), out, version); CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); - NullableSerializer.serializeNullable(apply.txn, out, version, CommandSerializers.partialTxn); + CommandSerializers.nullablePartialTxn.serialize(apply.txn, out, version); CommandSerializers.writes.serialize(apply.writes, out, version); } @Override - public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, + in.readBoolean() ? Apply.Kind.Maximal : Apply.Kind.Minimal, KeySerializers.seekables.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), - NullableSerializer.deserializeNullable(in, version, CommandSerializers.partialTxn), + CommandSerializers.nullablePartialTxn.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), Result.APPLIED); } @@ -58,10 +60,11 @@ public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, Partial @Override public long serializedBodySize(Apply apply, int version) { - return KeySerializers.seekables.serializedSize(apply.keys(), version) + return TypeSizes.BOOL_SIZE + + KeySerializers.seekables.serializedSize(apply.keys(), version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) - + NullableSerializer.serializedNullableSize(apply.txn, version, CommandSerializers.partialTxn) + + CommandSerializers.nullablePartialTxn.serializedSize(apply.txn, version) + CommandSerializers.writes.serializedSize(apply.writes, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java index 5568b9b01ed0..54c130ac2baa 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java @@ -31,10 +31,6 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; - public class BeginInvalidationSerializers { public static final IVersionedSerializer request = new IVersionedSerializer() @@ -69,35 +65,35 @@ public long serializedSize(BeginInvalidation begin, int version) @Override public void serialize(InvalidateReply reply, DataOutputPlus out, int version) throws IOException { - serializeNullable(reply.supersededBy, out, version, CommandSerializers.ballot); + CommandSerializers.nullableBallot.serialize(reply.supersededBy, out, version); CommandSerializers.ballot.serialize(reply.accepted, out, version); CommandSerializers.status.serialize(reply.status, out, version); out.writeBoolean(reply.acceptedFastPath); - serializeNullable(reply.route, out, version, KeySerializers.route); - serializeNullable(reply.homeKey, out, version, KeySerializers.routingKey); + KeySerializers.nullableRoute.serialize(reply.route, out, version); + KeySerializers.nullableRoutingKey.serialize(reply.homeKey, out, version); } @Override public InvalidateReply deserialize(DataInputPlus in, int version) throws IOException { - Ballot supersededBy = deserializeNullable(in, version, CommandSerializers.ballot); + Ballot supersededBy = CommandSerializers.nullableBallot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); Status status = CommandSerializers.status.deserialize(in, version); boolean acceptedFastPath = in.readBoolean(); - Route route = deserializeNullable(in, version, KeySerializers.route); - RoutingKey homeKey = deserializeNullable(in, version, KeySerializers.routingKey); + Route route = KeySerializers.nullableRoute.deserialize(in, version); + RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); return new InvalidateReply(supersededBy, accepted, status, acceptedFastPath, route, homeKey); } @Override public long serializedSize(InvalidateReply reply, int version) { - return serializedNullableSize(reply.supersededBy, version, CommandSerializers.ballot) - + CommandSerializers.ballot.serializedSize(reply.accepted, version) - + CommandSerializers.status.serializedSize(reply.status, version) - + TypeSizes.BOOL_SIZE - + serializedNullableSize(reply.route, version, KeySerializers.route) - + serializedNullableSize(reply.homeKey, version, KeySerializers.routingKey); + return CommandSerializers.nullableBallot.serializedSize(reply.supersededBy, version) + + CommandSerializers.ballot.serializedSize(reply.accepted, version) + + CommandSerializers.status.serializedSize(reply.status, version) + + TypeSizes.BOOL_SIZE + + KeySerializers.nullableRoute.serializedSize(reply.route, version) + + KeySerializers.nullableRoutingKey.serializedSize(reply.homeKey, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 4ce9eb7e925f..74ed2eb70386 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -45,9 +45,6 @@ import org.apache.cassandra.io.util.DataOutputPlus; import static accord.messages.CheckStatus.SerializationSupport.createOk; -import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; -import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; public class CheckStatusSerializers { @@ -107,19 +104,19 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CommandSerializers.saveStatus.serialize(ok.maxSaveStatus, out, version); CommandSerializers.ballot.serialize(ok.promised, out, version); CommandSerializers.ballot.serialize(ok.accepted, out, version); - serializeNullable(ok.executeAt, out, version, CommandSerializers.timestamp); + CommandSerializers.nullableTimestamp.serialize(ok.executeAt, out, version); out.writeBoolean(ok.isCoordinating); CommandSerializers.durability.serialize(ok.durability, out, version); - serializeNullable(ok.route, out, version, KeySerializers.route); - serializeNullable(ok.homeKey, out, version, KeySerializers.routingKey); + KeySerializers.nullableRoute.serialize(ok.route, out, version); + KeySerializers.nullableRoutingKey.serialize(ok.homeKey, out, version); if (!(reply instanceof CheckStatusOkFull)) return; CheckStatusOkFull okFull = (CheckStatusOkFull) ok; - serializeNullable(okFull.partialTxn, out, version, CommandSerializers.partialTxn); - serializeNullable(okFull.committedDeps, out, version, DepsSerializer.partialDeps); - serializeNullable(okFull.writes, out, version, CommandSerializers.writes); + CommandSerializers.nullablePartialTxn.serialize(okFull.partialTxn, out, version); + DepsSerializer.nullablePartialDeps.serialize(okFull.committedDeps, out, version); + CommandSerializers.nullableWrites.serialize(okFull.writes, out, version); } @Override @@ -139,19 +136,19 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); Ballot promised = CommandSerializers.ballot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); - Timestamp executeAt = deserializeNullable(in, version, CommandSerializers.timestamp); + Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in, version); boolean isCoordinating = in.readBoolean(); Durability durability = CommandSerializers.durability.deserialize(in, version); - Route route = deserializeNullable(in, version, KeySerializers.route); - RoutingKey homeKey = deserializeNullable(in, version, KeySerializers.routingKey); + Route route = KeySerializers.nullableRoute.deserialize(in, version); + RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); if (kind == OK) return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, isCoordinating, durability, route, homeKey); - PartialTxn partialTxn = deserializeNullable(in, version, CommandSerializers.partialTxn); - PartialDeps committedDeps = deserializeNullable(in, version, DepsSerializer.partialDeps); - Writes writes = deserializeNullable(in, version, CommandSerializers.writes); + PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); + PartialDeps committedDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); + Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); Result result = null; if (status == SaveStatus.PreApplied || status == SaveStatus.Applied @@ -179,19 +176,19 @@ public long serializedSize(CheckStatusReply reply, int version) size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); size += CommandSerializers.ballot.serializedSize(ok.promised, version); size += CommandSerializers.ballot.serializedSize(ok.accepted, version); - size += serializedNullableSize(ok.executeAt, version, CommandSerializers.timestamp); + size += CommandSerializers.nullableTimestamp.serializedSize(ok.executeAt, version); size += TypeSizes.BOOL_SIZE; size += CommandSerializers.durability.serializedSize(ok.durability, version); - size += serializedNullableSize(ok.homeKey, version, KeySerializers.routingKey); - size += serializedNullableSize(ok.route, version, KeySerializers.route); + size += KeySerializers.nullableRoutingKey.serializedSize(ok.homeKey, version); + size += KeySerializers.nullableRoute.serializedSize(ok.route, version); if (!(reply instanceof CheckStatusOkFull)) return size; CheckStatusOkFull okFull = (CheckStatusOkFull) ok; - size += serializedNullableSize(okFull.partialTxn, version, CommandSerializers.partialTxn); - size += serializedNullableSize(okFull.committedDeps, version, DepsSerializer.partialDeps); - size += serializedNullableSize(okFull.writes, version, CommandSerializers.writes); + size += CommandSerializers.nullablePartialTxn.serializedSize(okFull.partialTxn, version); + size += DepsSerializer.nullablePartialDeps.serializedSize(okFull.committedDeps, version); + size += CommandSerializers.nullableWrites.serializedSize(okFull.writes, version); return size; } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index d25d5cffa290..77f414e0cdcc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -29,6 +29,7 @@ import accord.local.SaveStatus; import accord.local.Status; import accord.local.Status.Durability; +import accord.local.Status.Known; import accord.primitives.Ballot; import accord.primitives.PartialTxn; import accord.primitives.Ranges; @@ -183,6 +184,7 @@ public long serializedSize(PartialTxn txn, int version) private static final IVersionedSerializer update = new CastingSerializer<>(TxnUpdate.class, TxnUpdate.serializer); public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); + public static final IVersionedSerializer nullablePartialTxn = NullableSerializer.wrap(partialTxn); public static final EnumSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); public static final EnumSerializer status = new EnumSerializer<>(Status.class); @@ -223,4 +225,41 @@ public long serializedSize(Writes writes, int version) return size; } }; + + public static final IVersionedSerializer nullableWrites = NullableSerializer.wrap(writes); + + public static final EnumSerializer definition = new EnumSerializer<>(Status.Definition.class); + public static final EnumSerializer knownExecuteAt = new EnumSerializer<>(Status.KnownExecuteAt.class); + public static final EnumSerializer knownDeps = new EnumSerializer<>(Status.KnownDeps.class); + public static final EnumSerializer outcome = new EnumSerializer<>(Status.Outcome.class); + + public static final IVersionedSerializer known = new IVersionedSerializer() + { + @Override + public void serialize(Known known, DataOutputPlus out, int version) throws IOException + { + definition.serialize(known.definition, out, version); + knownExecuteAt.serialize(known.executeAt, out, version); + knownDeps.serialize(known.deps, out, version); + outcome.serialize(known.outcome, out, version); + } + + @Override + public Known deserialize(DataInputPlus in, int version) throws IOException + { + return new Known(definition.deserialize(in, version), + knownExecuteAt.deserialize(in, version), + knownDeps.deserialize(in, version), + outcome.deserialize(in, version)); + } + + @Override + public long serializedSize(Known known, int version) + { + return definition.serializedSize(known.definition, version) + + knownExecuteAt.serializedSize(known.executeAt, version) + + knownDeps.serializedSize(known.deps, version) + + outcome.serializedSize(known.outcome, version); + } + }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index a57f684b874d..c2ea5e6a24b0 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -40,8 +40,9 @@ public class CommitSerializers @Override public void serializeBody(Commit msg, DataOutputPlus out, int version) throws IOException { + out.writeBoolean(msg.kind == Commit.Kind.Maximal); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); - serializeNullable(msg.partialTxn, out, version, CommandSerializers.partialTxn); + CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); serializeNullable(msg.read, out, version, ReadDataSerializers.request); @@ -51,8 +52,9 @@ public void serializeBody(Commit msg, DataOutputPlus out, int version) throws IO public Commit deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, + in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal, CommandSerializers.timestamp.deserialize(in, version), - deserializeNullable(in, version, CommandSerializers.partialTxn), + CommandSerializers.nullablePartialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), deserializeNullable(in, version, KeySerializers.fullRoute), deserializeNullable(in, version, ReadDataSerializers.request) @@ -62,8 +64,9 @@ public Commit deserializeBody(DataInputPlus in, int version, TxnId txnId, Partia @Override public long serializedBodySize(Commit msg, int version) { - return CommandSerializers.timestamp.serializedSize(msg.executeAt, version) - + serializedNullableSize(msg.partialTxn, version, CommandSerializers.partialTxn) + return TypeSizes.BOOL_SIZE + + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) + serializedNullableSize(msg.read, version, ReadDataSerializers.request); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index 5e4e806b251e..3530e06936c9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -33,6 +33,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.utils.NullableSerializer; import static accord.primitives.KeyDeps.SerializerSupport.keysToTxnIds; import static accord.primitives.KeyDeps.SerializerSupport.keysToTxnIdsCount; @@ -75,6 +76,8 @@ public long serializedSize(PartialDeps partialDeps, int version) } }; + public static final IVersionedSerializer nullablePartialDeps = NullableSerializer.wrap(partialDeps); + @Override public void serialize(D deps, DataOutputPlus out, int version) throws IOException { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index f6fccf5af350..4b184b49fc58 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -21,11 +21,23 @@ import java.io.IOException; import accord.api.Data; +import accord.api.Result; +import accord.api.RoutingKey; import accord.impl.AbstractFetchCoordinator.FetchRequest; import accord.impl.AbstractFetchCoordinator.FetchResponse; +import accord.local.SaveStatus; +import accord.local.Status.Durability; +import accord.local.Status.Known; +import accord.messages.Propagate; import accord.messages.ReadData; import accord.messages.ReadData.ReadReply; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; import accord.primitives.Ranges; +import accord.primitives.Route; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.primitives.Writes; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -97,7 +109,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I FetchResponse response = (FetchResponse) reply; serializeNullable(response.unavailable, out, version, KeySerializers.ranges); serializeNullable(response.data, out, version, streamDataSerializer); - serializeNullable(response.maxApplied, out, version, CommandSerializers.timestamp); + CommandSerializers.nullableTimestamp.serialize(response.maxApplied, out, version); } @Override @@ -109,7 +121,7 @@ public ReadReply deserialize(DataInputPlus in, int version) throws IOException return new FetchResponse(deserializeNullable(in, version, KeySerializers.ranges), deserializeNullable(in, version, streamDataSerializer), - deserializeNullable(in, version, CommandSerializers.timestamp)); + CommandSerializers.nullableTimestamp.deserialize(in, version)); } @Override @@ -122,7 +134,83 @@ public long serializedSize(ReadReply reply, int version) return TypeSizes.BYTE_SIZE + serializedNullableSize(response.unavailable, version, KeySerializers.ranges) + serializedNullableSize(response.data, version, streamDataSerializer) - + serializedNullableSize(response.maxApplied, version, CommandSerializers.timestamp); + + CommandSerializers.nullableTimestamp.serializedSize(response.maxApplied, version); + } + }; + + public static final IVersionedSerializer propagate = new IVersionedSerializer() + { + @Override + public void serialize(Propagate p, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(p.txnId, out, version); + KeySerializers.route.serialize(p.route, out, version); + CommandSerializers.saveStatus.serialize(p.saveStatus, out, version); + CommandSerializers.saveStatus.serialize(p.maxSaveStatus, out, version); + CommandSerializers.durability.serialize(p.durability, out, version); + KeySerializers.nullableRoutingKey.serialize(p.homeKey, out, version); + KeySerializers.nullableRoutingKey.serialize(p.progressKey, out, version); + CommandSerializers.known.serialize(p.achieved, out, version); + CommandSerializers.nullablePartialTxn.serialize(p.partialTxn, out, version); + DepsSerializer.nullablePartialDeps.serialize(p.partialDeps, out, version); + out.writeLong(p.toEpoch); + CommandSerializers.nullableTimestamp.serialize(p.executeAt, out, version); + CommandSerializers.nullableWrites.serialize(p.writes, out, version); + } + + @Override + public Propagate deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Route route = KeySerializers.route.deserialize(in, version); + SaveStatus saveStatus = CommandSerializers.saveStatus.deserialize(in, version); + SaveStatus maxSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); + Durability durability = CommandSerializers.durability.deserialize(in, version); + RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); + RoutingKey progressKey = KeySerializers.nullableRoutingKey.deserialize(in, version); + Known achieved = CommandSerializers.known.deserialize(in, version); + PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); + PartialDeps partialDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); + long toEpoch = in.readLong(); + Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in, version); + Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); + + Result result = null; + switch (saveStatus) + { + case PreApplied: + case Applying: + case Applied: + case TruncatedApply: + case TruncatedApplyWithOutcome: + case TruncatedApplyWithDeps: + result = Result.APPLIED; + break; + case Invalidated: + result = Result.INVALIDATED; + break; + } + + return Propagate.SerializerSupport.create(txnId, route, saveStatus, maxSaveStatus, durability, homeKey, progressKey, achieved, partialTxn, partialDeps, toEpoch, executeAt, writes, result); + } + + @Override + public long serializedSize(Propagate p, int version) + { + return CommandSerializers.txnId.serializedSize(p.txnId, version) + + KeySerializers.route.serializedSize(p.route, version) + + CommandSerializers.saveStatus.serializedSize(p.saveStatus, version) + + CommandSerializers.saveStatus.serializedSize(p.maxSaveStatus, version) + + CommandSerializers.durability.serializedSize(p.durability, version) + + KeySerializers.nullableRoutingKey.serializedSize(p.homeKey, version) + + KeySerializers.nullableRoutingKey.serializedSize(p.progressKey, version) + + CommandSerializers.known.serializedSize(p.achieved, version) + + CommandSerializers.nullablePartialTxn.serializedSize(p.partialTxn, version) + + DepsSerializer.nullablePartialDeps.serializedSize(p.partialDeps, version) + + TypeSizes.sizeof(p.toEpoch) + + CommandSerializers.nullableTimestamp.serializedSize(p.executeAt, version) + + CommandSerializers.nullableWrites.serializedSize(p.writes, version) + ; } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 9fb95cedf2a0..52e534142ad2 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -52,6 +52,7 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.NullableSerializer; public class KeySerializers { @@ -59,6 +60,7 @@ private KeySerializers() {} public static final IVersionedSerializer key = (IVersionedSerializer) (IVersionedSerializer) PartitionKey.serializer; public static final IVersionedSerializer routingKey = (IVersionedSerializer) (IVersionedSerializer) AccordRoutingKey.serializer; + public static final IVersionedSerializer nullableRoutingKey = NullableSerializer.wrap(routingKey); public static final IVersionedSerializer routingKeys = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) { @@ -199,6 +201,7 @@ public long serializedSize(FullRangeRoute ranges, int version) public static final IVersionedSerializer> route = new AbstractRoutablesSerializer<>( EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.FullKeyRoute, UnseekablesKind.PartialRangeRoute, UnseekablesKind.FullRangeRoute) ); + public static final IVersionedSerializer> nullableRoute = NullableSerializer.wrap(route); public static final IVersionedSerializer> partialRoute = new AbstractRoutablesSerializer<>( EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.PartialRangeRoute) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index 19e13cbe0132..99e54b5b4552 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -88,13 +88,13 @@ void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IO CommandSerializers.txnId.serialize(recoverOk.txnId, out, version); CommandSerializers.status.serialize(recoverOk.status, out, version); CommandSerializers.ballot.serialize(recoverOk.accepted, out, version); - serializeNullable(recoverOk.executeAt, out, version, CommandSerializers.timestamp); + CommandSerializers.nullableTimestamp.serialize(recoverOk.executeAt, out, version); DepsSerializer.partialDeps.serialize(recoverOk.deps, out, version); - serializeNullable(recoverOk.acceptedDeps, out, version, DepsSerializer.partialDeps); + DepsSerializer.nullablePartialDeps.serialize(recoverOk.acceptedDeps, out, version); DepsSerializer.deps.serialize(recoverOk.earlierCommittedWitness, out, version); DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); out.writeBoolean(recoverOk.rejectsFastPath); - serializeNullable(recoverOk.writes, out, version, CommandSerializers.writes); + CommandSerializers.nullableWrites.serialize(recoverOk.writes, out, version); } @Override @@ -136,13 +136,13 @@ else if (status == Status.Invalidated) return deserializeOk(id, status, CommandSerializers.ballot.deserialize(in, version), - deserializeNullable(in, version, CommandSerializers.timestamp), + CommandSerializers.nullableTimestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), - deserializeNullable(in, version, DepsSerializer.partialDeps), + DepsSerializer.nullablePartialDeps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), in.readBoolean(), - deserializeNullable(in, version, CommandSerializers.writes), + CommandSerializers.nullableWrites.deserialize(in, version), result, in, version); @@ -158,13 +158,13 @@ long serializedOkSize(RecoverOk recoverOk, int version) long size = CommandSerializers.txnId.serializedSize(recoverOk.txnId, version); size += CommandSerializers.status.serializedSize(recoverOk.status, version); size += CommandSerializers.ballot.serializedSize(recoverOk.accepted, version); - size += serializedNullableSize(recoverOk.executeAt, version, CommandSerializers.timestamp); + size += CommandSerializers.nullableTimestamp.serializedSize(recoverOk.executeAt, version); size += DepsSerializer.partialDeps.serializedSize(recoverOk.deps, version); - size += serializedNullableSize(recoverOk.acceptedDeps, version, DepsSerializer.partialDeps); + size += DepsSerializer.nullablePartialDeps.serializedSize(recoverOk.acceptedDeps, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierCommittedWitness, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); size += TypeSizes.sizeof(recoverOk.rejectsFastPath); - size += serializedNullableSize(recoverOk.writes, version, CommandSerializers.writes); + size += CommandSerializers.nullableWrites.serializedSize(recoverOk.writes, version); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java index e5381aacdfef..e88f8fbf5fe0 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java @@ -111,7 +111,7 @@ public Keys keys() abstract PartitionKey getKey(T item); abstract T[] newArray(int size); - private int compare(T left, T right) + public int compare(T left, T right) { int cmp = getKey(left).compareTo(getKey(right)); return cmp != 0 ? cmp : compareNonKeyFields(left, right); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index 072a21c1dfab..de50b9652462 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -34,6 +34,7 @@ import accord.primitives.Seekable; import accord.primitives.Timestamp; import accord.primitives.Txn; +import accord.utils.SortedArrays; import org.apache.cassandra.db.SinglePartitionReadCommand; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; @@ -44,6 +45,7 @@ import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.utils.ObjectSizes; +import static accord.utils.SortedArrays.Search.CEIL; import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; @@ -139,6 +141,22 @@ public Read merge(Read read) return new TxnRead(reads, txnKeys.with((Keys)read.keys())); } + @Override + public boolean isEqualOrFuller(Read other) + { + TxnRead that = (TxnRead) other; + + int j = 0; + for (int i = 0; i < that.items.length; ++i) + { + j = SortedArrays.exponentialSearch(this.items, j, this.items.length, that.items[i], this::compare, CEIL); + if (j < 0 || !that.items[i].equals(this.items[j])) + return false; + } + + return this.txnKeys.containsAll(that.txnKeys); + } + @Override public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index af0b6b771e9f..01a87a0e9d39 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -109,6 +109,22 @@ public boolean equals(Object o) return Arrays.equals(fragments, txnUpdate.fragments) && Objects.equals(condition, txnUpdate.condition); } + @Override + public boolean isEqualOrFuller(Update other) + { + TxnUpdate that = (TxnUpdate) other; + + int j = 0; + for (int i = 0; i < that.keys.size(); ++i) + { + j = this.keys.findNext(j, that.keys.get(i), CEIL); + if (j < 0 || !that.fragments[i].equals(this.fragments[j])) + return false; + } + + return this.condition.equals(that.condition); + } + @Override public int hashCode() { diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index d09275733746..5507ea9dd35c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -59,6 +59,7 @@ import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.DistributedTestBase; +import org.apache.cassandra.service.accord.AccordStateCache; import static org.apache.cassandra.config.CassandraRelevantProperties.JOIN_RING; import static org.apache.cassandra.config.CassandraRelevantProperties.RESET_BOOTSTRAP_PROGRESS; @@ -81,6 +82,7 @@ public static void beforeClass() throws Throwable { ICluster.setup(); SKIP_GC_INSPECTOR.setBoolean(true); + AccordStateCache.validateLoadOnEvict(true); } @Override diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 1093c347e11d..5cb392d88a82 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -118,7 +118,7 @@ private static void check() { for (int i = 0; i < State.events; i++) { - TxnRequest event = State.journal.read(State.toTxnId(i), AccordJournal.Type.PREACCEPT_REQ); + TxnRequest event = State.journal.readMessage(State.toTxnId(i), AccordJournal.Type.PRE_ACCEPT, PreAccept.class); State.logger.info("Event {} -> {}", i, event); if (event == null) throw new AssertionError(String.format("Unable to read event %d", i)); @@ -179,7 +179,7 @@ public static class State public static void append(int event) { TxnRequest request = toRequest(event); - journal.append(request, executor, new AsyncWriteCallback() + journal.appendMessage(request, executor, new AsyncWriteCallback() { @Override public void run() diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 3bdaa62f0a33..6f1fe5b79467 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -177,6 +177,7 @@ import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.transport.Event; @@ -438,6 +439,7 @@ public static void setUpClass() // Once per-JVM is enough prepareServer(); + AccordStateCache.validateLoadOnEvict(true); } protected static void prePrepareServer() diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 2f9328b7fcf1..c09989eb0ab7 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.db.compaction; -import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; @@ -46,6 +45,10 @@ import accord.local.SaveStatus; import accord.local.Status; import accord.local.Status.Durability; +import accord.messages.Accept; +import accord.messages.Apply; +import accord.messages.Commit; +import accord.messages.PreAccept; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; @@ -292,27 +295,19 @@ Consumer> expectAccordCommandsErase() Consumer> expectAccordCommandsTruncatedWithOutcome() { return partitions -> { - try - { - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals(1, Iterators.size(partition.unfilteredIterator())); - ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); - Row row = (Row) partition.unfilteredIterator().next(); - assertEquals(CommandsColumns.TRUNCATE_FIELDS[1].length, row.columnCount()); - for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS[1]) - assertNotNull(row.getColumnData(cm)); - assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); - assertNotNull(CommandRows.getWrites(row)); - assertEquals(Durability.Local, CommandRows.getDurability(row)); - assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); - assertEquals(route, CommandRows.getRoute(row)); - assertEquals(SaveStatus.TruncatedApplyWithOutcome, AccordKeyspace.CommandRows.getStatus(row)); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals(1, Iterators.size(partition.unfilteredIterator())); + ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); + Row row = (Row) partition.unfilteredIterator().next(); + assertEquals(CommandsColumns.TRUNCATE_FIELDS.length, row.columnCount()); + for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS) + assertNotNull(row.getColumnData(cm)); + assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); + assertEquals(Durability.Local, CommandRows.getDurability(row)); + assertEquals(TXN_ID, CommandRows.getExecuteAt(row)); + assertEquals(route, CommandRows.getRoute(row)); + assertEquals(SaveStatus.TruncatedApplyWithOutcome, AccordKeyspace.CommandRows.getStatus(row)); }; } @@ -324,8 +319,8 @@ Consumer> expectAccordCommandsTruncated() assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); Row row = (Row)partition.unfilteredIterator().next(); - assertEquals(CommandsColumns.TRUNCATE_FIELDS[0].length, row.columnCount()); - for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS[0]) + assertEquals(CommandsColumns.TRUNCATE_FIELDS.length, row.columnCount()); + for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS) assertNotNull(row.getColumnData(cm)); assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); assertEquals(Durability.Local, CommandRows.getDurability(row)); @@ -425,37 +420,42 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC Seekable key = txn.keys().get(0); PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); - PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); - long originalCacheSize = getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> { - // clear cache - long cacheSize = commandStore.getCacheSize(); - commandStore.setCacheSize(0); + PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + PreAccept preAccept = + PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); + commandStore.appendToJournal(preAccept); CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); - return cacheSize; }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + Accept accept = + Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, txnId, partialTxn.keys(), partialDeps); + commandStore.appendToJournal(accept); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + Commit commit = + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Minimal, txnId, partialTxn, partialDeps, route, null); + commandStore.appendToJournal(commit); CheckedCommands.commit(safe, txnId, route, null, partialTxn, txnId, partialDeps); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); + Apply apply = + Apply.SerializationSupport.create(txnId, partialRoute, txnId.epoch(), Apply.Kind.Minimal, partialTxn.keys(), txnId, partialDeps, partialTxn, result.left, result.right); + commandStore.appendToJournal(apply); CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right); }).beginAsResult()); flush(commandStore); - // The apply chain is asychronous so it is easiest to just spin until it is applied + // The apply chain is asychronous, so it is easiest to just spin until it is applied // in order to have the updated state in the system table spinAssertEquals(true, 5, () -> getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> safe.get(txnId, route.homeKey()).current().hasBeen(Status.Applied) ).beginAsResult())); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { - commandStore.setCacheSize(originalCacheSize); - }).beginAsResult()); } UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS + ";"); diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index cb2dd339111b..f6a059051700 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -62,20 +62,20 @@ public void testSimpleReadWrite() throws IOException journal.write(id3, 3L, Collections.singleton(1)); journal.write(id4, 4L, Collections.singleton(1)); - assertEquals(1L, (long) journal.read(id1)); - assertEquals(2L, (long) journal.read(id2)); - assertEquals(3L, (long) journal.read(id3)); - assertEquals(4L, (long) journal.read(id4)); + assertEquals(1L, (long) journal.readFirst(id1)); + assertEquals(2L, (long) journal.readFirst(id2)); + assertEquals(3L, (long) journal.readFirst(id3)); + assertEquals(4L, (long) journal.readFirst(id4)); journal.shutdown(); journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); journal.start(); - assertEquals(1L, (long) journal.read(id1)); - assertEquals(2L, (long) journal.read(id2)); - assertEquals(3L, (long) journal.read(id3)); - assertEquals(4L, (long) journal.read(id4)); + assertEquals(1L, (long) journal.readFirst(id1)); + assertEquals(2L, (long) journal.readFirst(id2)); + assertEquals(3L, (long) journal.readFirst(id3)); + assertEquals(4L, (long) journal.readFirst(id4)); journal.shutdown(); } @@ -84,12 +84,12 @@ static class LongSerializer implements ValueSerializer { static final LongSerializer INSTANCE = new LongSerializer(); - public int serializedSize(Long value, int userVersion) + public int serializedSize(TimeUUID key, Long value, int userVersion) { return Long.BYTES; } - public void serialize(Long value, DataOutputPlus out, int userVersion) throws IOException + public void serialize(TimeUUID key, Long value, DataOutputPlus out, int userVersion) throws IOException { out.writeLong(value); } diff --git a/test/unit/org/apache/cassandra/journal/SegmentTest.java b/test/unit/org/apache/cassandra/journal/SegmentTest.java index 0294cfef094d..c5e1dff04de7 100644 --- a/test/unit/org/apache/cassandra/journal/SegmentTest.java +++ b/test/unit/org/apache/cassandra/journal/SegmentTest.java @@ -76,22 +76,22 @@ public void testWriteReadActiveSegment() throws IOException // read all 4 entries by id and compare with originals EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - segment.read(id1, holder); + segment.readFirst(id1, holder); assertEquals(id1, holder.key); assertEquals(hosts1, holder.hosts); assertEquals(record1, holder.value); - segment.read(id2, holder); + segment.readFirst(id2, holder); assertEquals(id2, holder.key); assertEquals(hosts2, holder.hosts); assertEquals(record2, holder.value); - segment.read(id3, holder); + segment.readFirst(id3, holder); assertEquals(id3, holder.key); assertEquals(hosts3, holder.hosts); assertEquals(record3, holder.value); - segment.read(id4, holder); + segment.readFirst(id4, holder); assertEquals(id4, holder.key); assertEquals(hosts4, holder.hosts); assertEquals(record4, holder.value); @@ -143,22 +143,22 @@ public void testReadClosedSegmentByID() throws IOException // read all 4 entries by id and compare with originals EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - staticSegment.read(id1, holder); + staticSegment.readFirst(id1, holder); assertEquals(id1, holder.key); assertEquals(hosts1, holder.hosts); assertEquals(record1, holder.value); - staticSegment.read(id2, holder); + staticSegment.readFirst(id2, holder); assertEquals(id2, holder.key); assertEquals(hosts2, holder.hosts); assertEquals(record2, holder.value); - staticSegment.read(id3, holder); + staticSegment.readFirst(id3, holder); assertEquals(id3, holder.key); assertEquals(hosts3, holder.hosts); assertEquals(record3, holder.value); - staticSegment.read(id4, holder); + staticSegment.readFirst(id4, holder); assertEquals(id4, holder.key); assertEquals(hosts4, holder.hosts); assertEquals(record4, holder.value); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 021cbd4897d9..9ee1efe9c04d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -34,9 +34,12 @@ import accord.local.Command; import accord.local.CommonAttributes; import accord.local.SaveStatus; +import accord.messages.Apply; import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Ranges; +import accord.primitives.Route; import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -107,7 +110,8 @@ public void commandLoadSave() throws Throwable CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); PartialTxn txn = createPartialTxn(0); - attrs.route(RoutingKeys.of(key.toUnseekable()).toRoute(key.toUnseekable())); + Route route = RoutingKeys.of(key.toUnseekable()).toRoute(key.toUnseekable()); + attrs.route(route); attrs.durability(Majority); Ballot promised = ballot(1, clock.incrementAndGet(), 1); Ballot accepted = ballot(1, clock.incrementAndGet(), 1); @@ -120,12 +124,27 @@ public void commandLoadSave() throws Throwable Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies, new ImmutableBitSet(waitingOnCommit), new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); + + Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, waitingOn, result.left, Result.APPLIED); - AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.set(command); + + Apply apply = + Apply.SerializationSupport.create(txnId, + route.slice(Ranges.of(TokenRange.fullRange("ks"))), + 1L, + Apply.Kind.Minimal, + depTxn.keys(), + executeAt, + dependencies, + null, + result.left, + Result.APPLIED); + commandStore.appendToJournal(apply); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); + logger.info("E: {}", command); Command actual = AccordKeyspace.loadCommand(commandStore, txnId); logger.info("A: {}", actual); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 0b9dcc38b3b0..9dcc72b84d6d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -99,6 +99,7 @@ public void basicCycleTest() throws Throwable PartialRoute route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); + commandStore.appendToJournal(preAccept); // Check preaccept getUninterruptibly(commandStore.execute(preAccept, instance -> { @@ -131,6 +132,7 @@ public void basicCycleTest() throws Throwable deps = builder.build(); } Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); + commandStore.appendToJournal(accept); getUninterruptibly(commandStore.execute(accept, instance -> { Accept.AcceptReply reply = accept.apply(instance); @@ -151,7 +153,8 @@ public void basicCycleTest() throws Throwable })); // check commit - Commit commit = Commit.SerializerSupport.create(txnId, route, 1, executeAt, partialTxn, deps, fullRoute, null); + Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.Maximal, executeAt, partialTxn, deps, fullRoute, null); + commandStore.appendToJournal(commit); getUninterruptibly(commandStore.execute(commit, commit::apply)); getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { @@ -180,12 +183,14 @@ public void computeDeps() throws Throwable PartialRoute route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); + commandStore.appendToJournal(preAccept1); getUninterruptibly(commandStore.execute(preAccept1, preAccept1::apply)); // second preaccept should identify txnId1 as a dependency TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); + commandStore.appendToJournal(preAccept2); getUninterruptibly(commandStore.execute(preAccept2, instance -> { PreAccept.PreAcceptReply reply = preAccept2.apply(instance); Assert.assertTrue(reply.isOk()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index d868850845fc..1cf6a6e68029 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -29,11 +29,13 @@ import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; +import accord.messages.Commit; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.KeyDeps; import accord.primitives.Keys; +import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.RangeDeps; import accord.primitives.Ranges; @@ -70,18 +72,23 @@ public void serde() RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE); + PartialDeps partialDeps = deps.slice(GLOBAL_SCOPE); CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); common.route(route); - common.partialDeps(deps.slice(GLOBAL_SCOPE)); + common.partialDeps(partialDeps); common.durability(Status.Durability.NotDurable); - Command.WaitingOn waitingOn = Command.WaitingOn.none(deps.slice(GLOBAL_SCOPE)); - Command.Committed committed = Command.SerializerSupport.committed(common, SaveStatus.Committed, id, Ballot.ZERO, Ballot.ZERO, waitingOn); + Command.WaitingOn waitingOn = Command.WaitingOn.none(partialDeps); + Command.Committed committed = Command.SerializerSupport.committed(common, SaveStatus.Committed, id, Ballot.ZERO, Ballot.ZERO, waitingOn); AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); + + Commit commit = Commit.SerializerSupport.create(id, route.slice(GLOBAL_SCOPE), 1, Commit.Kind.Maximal, id, partialTxn, partialDeps, route, null); + store.appendToJournal(commit); + Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); mutation.apply(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 193facf13729..0ac350b92f87 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -141,7 +141,7 @@ public void testAcquisitionAndRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor,500); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("1"); @@ -170,7 +170,7 @@ public void testRotation() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[3]; @@ -206,7 +206,7 @@ public void testEvictionOnAcquire() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -246,7 +246,7 @@ public void testEvictionOnRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -279,7 +279,7 @@ public void testMultiAcquireRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("0"); @@ -306,7 +306,7 @@ public void evictionBlockedOnSaving() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3)); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString item = instance.acquire(Integer.toString(0)); @@ -343,7 +343,7 @@ public void testUpdates() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, String::length); + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString = instance.acquire("1"); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 628a09425714..192aeacfc118 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -358,6 +358,9 @@ public static AccordCommandStore createAccordCommandStore( public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; + AccordJournal journal = new AccordJournal(); + journal.start(); + SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); AccordCommandStore result = new AccordCommandStore(0, time, @@ -365,6 +368,7 @@ public static AccordCommandStore createAccordCommandStore( null, cs -> NOOP_PROGRESS_LOG, holder, + journal, loadExecutor, saveExecutor); holder.set(result); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 2f32acce0839..14d4f5d409db 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -42,6 +42,9 @@ import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; +import accord.messages.Accept; +import accord.messages.Commit; +import accord.messages.PreAccept; import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.Keys; @@ -50,6 +53,7 @@ import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.RoutableKey; +import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -160,7 +164,20 @@ private static Command createCommittedAndPersist(AccordCommandStore commandStore Command command = AccordTestUtils.Commands.committed(txnId, createPartialTxn(0), executeAt); AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.set(command); + AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); + Commit commit = + Commit.SerializerSupport.create(txnId, + command.route().slice(AccordTestUtils.fullRange(command.partialTxn().keys())), + txnId.epoch(), + Commit.Kind.Maximal, + executeAt, + command.partialTxn(), + command.partialDeps(), + Route.castToFullRoute(command.route()), + null); + commandStore.appendToJournal(commit); + return command; } @@ -182,20 +199,37 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS Ranges ranges = AccordTestUtils.fullRange(partialTxn.keys()); PartialRoute partialRoute = route.slice(ranges); PartialDeps deps = PartialDeps.builder(ranges).build(); + + // create and write messages to the journal for loading to succeed + PreAccept preAccept = + PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); + Accept accept = + Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); + Commit commit = + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Minimal, executeAt, partialTxn, deps, route, null); + + commandStore.appendToJournal(preAccept); + commandStore.appendToJournal(accept); + commandStore.appendToJournal(commit); + try { - return getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); CheckedCommands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); + return safe.ifInitialised(txnId).current(); + }).beginAsResult()); - // clear cache + // clear cache + commandStore.executeBlocking(() -> { long cacheSize = commandStore.getCacheSize(); commandStore.setCacheSize(0); commandStore.setCacheSize(cacheSize); + commandStore.cache().awaitSaveResults(); + }); - return safe.ifInitialised(txnId).current(); - }).beginAsResult()); + return command; } catch (ExecutionException e) { From 19fcbcab61b2fb227abc6aced2f0f16c02de1358 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Wed, 27 Sep 2023 16:27:05 -0500 Subject: [PATCH 072/340] ninja-fix: checkstyle fixes in VirtualTableTest, HistoryValidatorTest, SeedDefiner, RunStartDefiner, and Config --- .../apache/cassandra/simulator/logging/RunStartDefiner.java | 6 ++++-- .../org/apache/cassandra/simulator/logging/SeedDefiner.java | 1 - .../cassandra/simulator/paxos/HistoryValidatorTest.java | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java index 92066c182470..1c522f11bf74 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java @@ -18,7 +18,8 @@ package org.apache.cassandra.simulator.logging; -import accord.utils.Invariants; +import java.util.concurrent.TimeUnit; + import ch.qos.logback.core.PropertyDefinerBase; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -26,7 +27,8 @@ public class RunStartDefiner extends PropertyDefinerBase { static { - Invariants.checkState(CassandraRelevantProperties.SIMULATOR_STARTED.getString() != null); + if (CassandraRelevantProperties.SIMULATOR_STARTED.getString() == null) + CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); } @Override diff --git a/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java b/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java index a3e11abe3c32..12d1ca8d2d8a 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/logging/SeedDefiner.java @@ -23,7 +23,6 @@ public class SeedDefiner extends PropertyDefinerBase { - public static void setSeed(long seed) { CassandraRelevantProperties.SIMULATOR_SEED.setString("0x" + Long.toHexString(seed)); diff --git a/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java index d7df4972cca6..a52345d0807d 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/paxos/HistoryValidatorTest.java @@ -30,6 +30,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.junit.Assume; import org.junit.FixMethodOrder; import org.junit.Test; @@ -43,7 +44,6 @@ import com.carrotsearch.hppc.IntIntHashMap; import com.carrotsearch.hppc.IntIntMap; import com.carrotsearch.hppc.IntSet; -import org.apache.cassandra.config.CassandraRelevantProperties; import com.carrotsearch.hppc.cursors.IntCursor; import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.utils.Clock; From cffe1cc61a9915f80466271c130b91abad1e50e4 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Fri, 29 Sep 2023 16:33:09 -0500 Subject: [PATCH 073/340] ninja-fix: minor post-TCM-rebase cleanup --- modules/accord | 2 +- src/java/org/apache/cassandra/net/Verb.java | 18 +++++++++--------- .../service/accord/AccordTopologyUtils.java | 8 ++++---- .../apache/cassandra/repair/FuzzTestBase.java | 8 ++++---- .../accord/AccordSyncPropagatorTest.java | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/modules/accord b/modules/accord index 79fc1ebf7db6..b1befa3cc0a8 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 79fc1ebf7db6aa5e616dbef1bc61b616fea3c2c6 +Subproject commit b1befa3cc0a8496451bb48ec3bb1c0f56b8c7653 diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index c87fa0f16376..54849389f078 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -232,7 +232,7 @@ public enum Verb // repair; mostly doesn't use callbacks and sends responses as their own request messages, with matching sessions by uuid; should eventually harmonize and make idiomatic // for the repair messages that implement retry logic, use rpcTimeout so the single request fails faster, then retries can be used to recover - REPAIR_RSP (100, P1, repairTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + REPAIR_RSP (100, P1, repairTimeout, REQUEST_RESPONSE, () -> NoPayload.serializer, RESPONSE_HANDLER ), VALIDATION_RSP (102, P1, repairValidationRspTimeout, ANTI_ENTROPY, () -> ValidationResponse.serializer, () -> RepairMessageVerbHandler.instance(), REPAIR_RSP ), VALIDATION_REQ (101, P1, repairWithBackoffTimeout, ANTI_ENTROPY, () -> ValidationRequest.serializer, () -> RepairMessageVerbHandler.instance(), REPAIR_RSP ), SYNC_RSP (104, P1, repairWithBackoffTimeout, ANTI_ENTROPY, () -> SyncResponse.serializer, () -> RepairMessageVerbHandler.instance(), REPAIR_RSP ), @@ -277,25 +277,25 @@ public enum Verb PAXOS2_CLEANUP_COMPLETE_REQ (48, P2, repairTimeout, PAXOS_REPAIR, () -> PaxosCleanupComplete.serializer, () -> PaxosCleanupComplete.verbHandler, PAXOS2_CLEANUP_COMPLETE_RSP ), // transactional cluster metadata - TCM_COMMIT_RSP (801, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::commitResultSerializer, () -> ResponseVerbHandler.instance ), + TCM_COMMIT_RSP (801, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::commitResultSerializer, RESPONSE_HANDLER ), TCM_COMMIT_REQ (802, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::commitSerializer, () -> commitRequestHandler(), TCM_COMMIT_RSP ), - TCM_FETCH_CMS_LOG_RSP (803, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, () -> ResponseVerbHandler.instance ), + TCM_FETCH_CMS_LOG_RSP (803, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), TCM_FETCH_CMS_LOG_REQ (804, P0, rpcTimeout, FETCH_LOG, () -> FetchCMSLog.serializer, () -> fetchLogRequestHandler(), TCM_FETCH_CMS_LOG_RSP ), TCM_REPLICATION (805, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::logStateSerializer, () -> replicationHandler() ), - TCM_NOTIFY_RSP (806, P0, rpcTimeout, INTERNAL_METADATA, () -> Epoch.messageSerializer, () -> ResponseVerbHandler.instance ), + TCM_NOTIFY_RSP (806, P0, rpcTimeout, INTERNAL_METADATA, () -> Epoch.messageSerializer, RESPONSE_HANDLER ), TCM_NOTIFY_REQ (807, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::logStateSerializer, () -> logNotifyHandler(), TCM_NOTIFY_RSP ), TCM_CURRENT_EPOCH_REQ (808, P0, rpcTimeout, INTERNAL_METADATA, () -> Epoch.messageSerializer, () -> currentEpochRequestHandler(), TCM_NOTIFY_RSP ), - TCM_INIT_MIG_RSP (809, P0, rpcTimeout, INTERNAL_METADATA, () -> CMSInitializationResponse.serializer, () -> ResponseVerbHandler.instance ), + TCM_INIT_MIG_RSP (809, P0, rpcTimeout, INTERNAL_METADATA, () -> CMSInitializationResponse.serializer, RESPONSE_HANDLER ), TCM_INIT_MIG_REQ (810, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::initRequestSerializer, () -> Election.instance.prepareHandler, TCM_INIT_MIG_RSP ), TCM_ABORT_MIG (811, P0, rpcTimeout, INTERNAL_METADATA, () -> CMSInitializationRequest.Initiator.serializer,() -> Election.instance.abortHandler, TCM_INIT_MIG_RSP ), - TCM_DISCOVER_RSP (812, P0, rpcTimeout, INTERNAL_METADATA, () -> Discovery.serializer, () -> ResponseVerbHandler.instance ), + TCM_DISCOVER_RSP (812, P0, rpcTimeout, INTERNAL_METADATA, () -> Discovery.serializer, RESPONSE_HANDLER ), TCM_DISCOVER_REQ (813, P0, rpcTimeout, INTERNAL_METADATA, () -> NoPayload.serializer, () -> Discovery.instance.requestHandler, TCM_DISCOVER_RSP ), - TCM_FETCH_PEER_LOG_RSP (818, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, () -> ResponseVerbHandler.instance ), + TCM_FETCH_PEER_LOG_RSP (818, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), TCM_FETCH_PEER_LOG_REQ (819, P0, rpcTimeout, FETCH_LOG, () -> FetchPeerLog.serializer, () -> FetchPeerLog.Handler.instance, TCM_FETCH_PEER_LOG_RSP ), - INITIATE_DATA_MOVEMENTS_RSP (814, P1, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + INITIATE_DATA_MOVEMENTS_RSP (814, P1, rpcTimeout, MISC, () -> NoPayload.serializer, RESPONSE_HANDLER ), INITIATE_DATA_MOVEMENTS_REQ (815, P1, rpcTimeout, MISC, () -> DataMovement.serializer, () -> DataMovementVerbHandler.instance, INITIATE_DATA_MOVEMENTS_RSP ), - DATA_MOVEMENT_EXECUTED_RSP (816, P1, rpcTimeout, MISC, () -> NoPayload.serializer, () -> ResponseVerbHandler.instance ), + DATA_MOVEMENT_EXECUTED_RSP (816, P1, rpcTimeout, MISC, () -> NoPayload.serializer, RESPONSE_HANDLER ), DATA_MOVEMENT_EXECUTED_REQ (817, P1, rpcTimeout, MISC, () -> DataMovement.Status.serializer, () -> DataMovements.instance, DATA_MOVEMENT_EXECUTED_RSP ), // accord diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index 3884a19d00c6..d385cf5d3185 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -35,7 +35,6 @@ import accord.utils.Invariants; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; @@ -48,6 +47,7 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.tcm.ownership.VersionedEndpoints; public class AccordTopologyUtils { @@ -56,7 +56,7 @@ static Node.Id tcmIdToAccord(NodeId nodeId) return new Node.Id(nodeId.id()); } - private static Shard createShard(TokenRange range, Directory directory, EndpointsForRange reads, EndpointsForRange writes) + private static Shard createShard(TokenRange range, Directory directory, VersionedEndpoints.ForRange reads, VersionedEndpoints.ForRange writes) { Function endpointMapper = e -> { NodeId tcmId = directory.peerId(e); @@ -106,8 +106,8 @@ public static List createShards(KeyspaceMetadata keyspace, DataPlacements List shards = new ArrayList<>(ranges.size()); for (Range range : ranges) { - EndpointsForRange reads = placement.reads.forRange(range).get(); - EndpointsForRange writes = placement.reads.forRange(range).get(); + VersionedEndpoints.ForRange reads = placement.reads.forRange(range); + VersionedEndpoints.ForRange writes = placement.reads.forRange(range); // TCM doesn't create wrap around ranges Invariants.checkArgument(!range.isWrapAround() || range.right.equals(range.right.minValue()), diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 593a4941287c..50716c80b92e 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -816,9 +816,9 @@ public void onResponse(Message msg) callback.onResponse(msg); } - public void onFailure(InetAddressAndPort from, RequestFailure failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - if (callback.invokeOnFailure()) callback.onFailure(from, failureReason); + if (callback.invokeOnFailure()) callback.onFailure(from, failure); } } @@ -992,9 +992,9 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailure failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - promise.tryFailure(new MessagingService.FailureResponseException(from, failureReason)); + promise.tryFailure(new MessagingService.FailureResponseException(from, failure)); } @Override diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 73830bda44a6..f3bb71707d49 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -361,7 +361,7 @@ public boolean isAlive(InetAddressAndPort ep) { if (self.equals(ep)) return true; - return !nodeRuns.computeIfAbsent(ep, ignore -> Gens.bools().biasedRepeatingRuns(.01)).next(rs); + return !nodeRuns.computeIfAbsent(ep, ignore -> Gens.bools().biasedRepeatingRuns(.01, rs.nextInt(3, 15))).next(rs); } @Override From 27a931397049fcb56fe83236608594063d1a91d1 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Tue, 10 Oct 2023 12:09:44 +0200 Subject: [PATCH 074/340] CEP-15: Accord metrics Patch by Jacek Lewandowski, reviewed by Caleb Rackliffe, David Capwell and Henrik Ingo for CASSANDRA-18580 --- modules/accord | 2 +- .../metrics/AccordClientRequestMetrics.java | 4 - .../cassandra/metrics/AccordMetrics.java | 312 ++++++++++++++++++ .../metrics/AccordStateCacheMetrics.java | 50 +++ .../cassandra/metrics/CacheAccessMetrics.java | 73 ++++ .../cassandra/metrics/CacheMetrics.java | 2 +- .../cassandra/metrics/CacheSizeMetrics.java | 63 ++++ .../cassandra/metrics/RatioGaugeSet.java | 121 +++++++ .../service/accord/AccordCommandStore.java | 44 ++- .../service/accord/AccordCommandStores.java | 39 ++- .../service/accord/AccordService.java | 3 +- .../service/accord/AccordStateCache.java | 133 ++++---- .../service/accord/api/AccordAgent.java | 8 + .../test/accord/AccordMetricsTest.java | 273 +++++++++++++++ .../CompactionAccordIteratorsTest.java | 6 +- .../io/sstable/LargePartitionsTest.java | 6 +- .../service/accord/AccordCommandTest.java | 4 +- .../service/accord/AccordStateCacheTest.java | 179 ++++++++-- .../service/accord/AccordTestUtils.java | 6 +- .../service/accord/async/AsyncLoaderTest.java | 4 +- .../accord/async/AsyncOperationTest.java | 8 +- 21 files changed, 1217 insertions(+), 123 deletions(-) create mode 100644 src/java/org/apache/cassandra/metrics/AccordMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/CacheAccessMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/CacheSizeMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/RatioGaugeSet.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java diff --git a/modules/accord b/modules/accord index b1befa3cc0a8..0419858bd1f6 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit b1befa3cc0a8496451bb48ec3bb1c0f56b8c7653 +Subproject commit 0419858bd1f6761f08fd1369477f7c142f5bbb4f diff --git a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java index c95c3bd11fc3..a9d1f28c47be 100644 --- a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java @@ -19,20 +19,17 @@ package org.apache.cassandra.metrics; import com.codahale.metrics.Histogram; -import com.codahale.metrics.Meter; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; public class AccordClientRequestMetrics extends ClientRequestMetrics { - public final Meter preempts; public final Histogram keySize; public AccordClientRequestMetrics(String scope) { super(scope); - preempts = Metrics.meter(factory.createMetricName("Preempts")); keySize = Metrics.histogram(factory.createMetricName("KeySizeHistogram"), false); } @@ -40,7 +37,6 @@ public AccordClientRequestMetrics(String scope) public void release() { super.release(); - Metrics.remove(factory.createMetricName("Preempts")); Metrics.remove(factory.createMetricName("KeySizeHistogram")); } } diff --git a/src/java/org/apache/cassandra/metrics/AccordMetrics.java b/src/java/org/apache/cassandra/metrics/AccordMetrics.java new file mode 100644 index 000000000000..5601b9fa406b --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AccordMetrics.java @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import java.lang.reflect.Field; +import java.util.concurrent.TimeUnit; + +import accord.api.EventsListener; +import accord.local.Command; +import accord.primitives.Deps; +import accord.primitives.PartialDeps; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import com.codahale.metrics.Counting; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.Timer; +import org.apache.cassandra.service.accord.AccordService; + +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class AccordMetrics +{ + public final static AccordMetrics readMetrics = new AccordMetrics("ro"); + public final static AccordMetrics writeMetrics = new AccordMetrics("rw"); + + public static final String COMMIT_LATENCY = "CommitLatency"; + public static final String EXECUTE_LATENCY = "ExecuteLatency"; + public static final String APPLY_LATENCY = "ApplyLatency"; + public static final String APPLY_DURATION = "ApplyDuration"; + public static final String PARTIAL_DEPENDENCIES = "PartialDependencies"; + public static final String PROGRESS_LOG_SIZE = "ProgressLogSize"; + + public static final String DEPENDENCIES = "Dependencies"; + public static final String FAST_PATHS = "FastPaths"; + public static final String SLOW_PATHS = "SlowPaths"; + public static final String PREEMPTS = "Preempts"; + public static final String TIMEOUTS = "Timeouts"; + public static final String INVALIDATIONS = "Invalidations"; + public static final String RECOVERY_DELAY = "RecoveryDelay"; + public static final String RECOVERY_TIME = "RecoveryTime"; + public static final String FAST_PATH_TO_TOTAL = "FastPathToTotal"; + public static final String ACCORD_REPLICA = "accord-replica"; + public static final String ACCORD_COORDINATOR = "accord-coordinator"; + + /** + * The time between start on the coordinator and commit on this replica. + */ + public final Timer commitLatency; + + /** + * The time between start on the coordinator and execution on this replica. + */ + public final Timer executeLatency; + + /** + * The time between start on the coordinator and application on this replica. + */ + public final Timer applyLatency; + + /** + * Duration of applying changes. + */ + public final Timer applyDuration; + + /** + * A histogram of the number of dependencies per partial transaction at this replica. + */ + public final Histogram partialDependencies; + + public final Meter progressLogSize; + + /** + * A histogram of the number of dependencies per transaction at this coordinator. + */ + public final Histogram dependencies; + + /** + * The number of fast path transactions executed on this coordinator. + */ + public final Meter fastPaths; + + /** + * The number of slow path transactions executed on this coordinator. + */ + public final Meter slowPaths; + + /** + * The number of preempted transactions on this coordinator. + */ + public final Meter preempts; + + /** + * The number of timed out transactions on this coordinator. + */ + public final Meter timeouts; + + /** + * The number of invalidated transactions on this coordinator. + */ + public final Meter invalidations; + + /** + * The time between the start of the transaction and the start of the recovery, if the transaction is recovered. + */ + public final Timer recoveryDelay; + + /** + * The time between the start of the recovery and the execution of the transaction, if the transaction is recovered. + */ + public final Timer recoveryDuration; + + /** + * The ratio of the number of fast path transactions to the total number of transactions. + */ + public final RatioGaugeSet fastPathToTotal; + + private AccordMetrics(String scope) + { + DefaultNameFactory replica = new DefaultNameFactory(ACCORD_REPLICA, scope); + commitLatency = Metrics.timer(replica.createMetricName(COMMIT_LATENCY)); + executeLatency = Metrics.timer(replica.createMetricName(EXECUTE_LATENCY)); + applyLatency = Metrics.timer(replica.createMetricName(APPLY_LATENCY)); + applyDuration = Metrics.timer(replica.createMetricName(APPLY_DURATION)); + partialDependencies = Metrics.histogram(replica.createMetricName(PARTIAL_DEPENDENCIES), true); + progressLogSize = Metrics.meter(replica.createMetricName(PROGRESS_LOG_SIZE)); + + DefaultNameFactory coordinator = new DefaultNameFactory(ACCORD_COORDINATOR, scope); + dependencies = Metrics.histogram(coordinator.createMetricName(DEPENDENCIES), true); + fastPaths = Metrics.meter(coordinator.createMetricName(FAST_PATHS)); + slowPaths = Metrics.meter(coordinator.createMetricName(SLOW_PATHS)); + preempts = Metrics.meter(coordinator.createMetricName(PREEMPTS)); + timeouts = Metrics.meter(coordinator.createMetricName(TIMEOUTS)); + invalidations = Metrics.meter(coordinator.createMetricName(INVALIDATIONS)); + recoveryDelay = Metrics.timer(coordinator.createMetricName(RECOVERY_DELAY)); + recoveryDuration = Metrics.timer(coordinator.createMetricName(RECOVERY_TIME)); + fastPathToTotal = new RatioGaugeSet(fastPaths, RatioGaugeSet.sum(fastPaths, slowPaths), coordinator, FAST_PATH_TO_TOTAL + ".%s"); + } + + @Override + public String toString() + { + StringBuilder builder = new StringBuilder(); + builder.append("AccordMetrics ["); + + try + { + for (Field f : getClass().getDeclaredFields()) + { + f.setAccessible(true); + if (Counting.class.isAssignableFrom(f.getType())) + { + Counting metric = (Counting) f.get(this); + builder.append(String.format("%s: count=%d, ", f.getName(), metric.getCount())); + } + } + } + catch (IllegalAccessException e) + { + throw new RuntimeException(e); + } + builder.append("]"); + return builder.toString(); + } + + public static class Listener implements EventsListener + { + public final static Listener instance = new Listener(AccordMetrics.readMetrics, AccordMetrics.writeMetrics); + + private final AccordMetrics readMetrics; + private final AccordMetrics writeMetrics; + + public Listener(AccordMetrics readMetrics, AccordMetrics writeMetrics) + { + this.readMetrics = readMetrics; + this.writeMetrics = writeMetrics; + } + + private AccordMetrics forTransaction(TxnId txnId) + { + if (txnId.isWrite()) + return writeMetrics; + else if (txnId.isRead()) + return readMetrics; + else + return null; + } + + @Override + public void onCommitted(Command cmd) + { + long now = AccordService.uniqueNow(); + AccordMetrics metrics = forTransaction(cmd.txnId()); + if (metrics != null) + { + long trxTimestamp = cmd.txnId().hlc(); + metrics.commitLatency.update(now - trxTimestamp, TimeUnit.MICROSECONDS); + } + } + + @Override + public void onExecuted(Command cmd) + { + long now = AccordService.uniqueNow(); + AccordMetrics metrics = forTransaction(cmd.txnId()); + if (metrics != null) + { + Timestamp trxTimestamp = cmd.txnId(); + metrics.executeLatency.update(now - trxTimestamp.hlc(), TimeUnit.MICROSECONDS); + PartialDeps deps = cmd.partialDeps(); + metrics.partialDependencies.update(deps != null ? deps.txnIdCount() : 0); + } + } + + @Override + public void onApplied(Command cmd, long applyStartTimestamp) + { + long now = AccordService.uniqueNow(); + AccordMetrics metrics = forTransaction(cmd.txnId()); + if (metrics != null) + { + Timestamp trxTimestamp = cmd.txnId(); + metrics.applyLatency.update(now - trxTimestamp.hlc(), TimeUnit.MICROSECONDS); + metrics.applyDuration.update(now - applyStartTimestamp, TimeUnit.MICROSECONDS); + } + } + + @Override + public void onFastPathTaken(TxnId txnId, Deps deps) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + { + metrics.fastPaths.mark(); + metrics.dependencies.update(deps.txnIdCount()); + } + } + + @Override + public void onSlowPathTaken(TxnId txnId, Deps deps) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + { + metrics.slowPaths.mark(); + metrics.dependencies.update(deps.txnIdCount()); + } + } + + @Override + public void onRecover(TxnId txnId, Timestamp recoveryTimestamp) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + { + long now = AccordService.uniqueNow(); + + metrics.recoveryDuration.update(now - recoveryTimestamp.hlc(), MICROSECONDS); + metrics.recoveryDelay.update(recoveryTimestamp.hlc() - txnId.hlc(), MICROSECONDS); + } + } + + @Override + public void onPreempted(TxnId txnId) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + metrics.preempts.mark(); + } + + @Override + public void onTimeout(TxnId txnId) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + metrics.timeouts.mark(); + } + + @Override + public void onInvalidated(TxnId txnId) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + metrics.invalidations.mark(); + } + + @Override + public void onProgressLogSizeChange(TxnId txnId, int delta) + { + AccordMetrics metrics = forTransaction(txnId); + if (metrics != null) + metrics.progressLogSize.mark(delta); + } + } +} diff --git a/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java new file mode 100644 index 000000000000..fd4308a356d2 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import com.codahale.metrics.Histogram; + +import static org.apache.cassandra.metrics.CacheMetrics.TYPE_NAME; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class AccordStateCacheMetrics extends CacheAccessMetrics +{ + public static final String OBJECT_SIZE = "ObjectSize"; + + public final Histogram objectSize; + + private final Map, CacheAccessMetrics> instanceMetrics = new ConcurrentHashMap<>(2); + + private final String type; + + public AccordStateCacheMetrics(String type) + { + super(new DefaultNameFactory(TYPE_NAME, type)); + objectSize = Metrics.histogram(factory.createMetricName(OBJECT_SIZE), false); + this.type = type; + } + + public CacheAccessMetrics forInstance(Class klass) + { + return instanceMetrics.computeIfAbsent(klass, k -> new CacheAccessMetrics(new DefaultNameFactory(TYPE_NAME, String.format("%s-%s", type, k.getSimpleName())))); + } +} diff --git a/src/java/org/apache/cassandra/metrics/CacheAccessMetrics.java b/src/java/org/apache/cassandra/metrics/CacheAccessMetrics.java new file mode 100644 index 000000000000..59d76a9f4904 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/CacheAccessMetrics.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import com.google.common.annotations.VisibleForTesting; + +import com.codahale.metrics.Meter; + +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class CacheAccessMetrics +{ + /** + * Total number of cache hits + */ + public final Meter hits; + + /** + * Total number of cache misses + */ + public final Meter misses; + + /** + * Total number of cache requests + */ + public final Meter requests; + + public final RatioGaugeSet hitRate; + + public final RatioGaugeSet missRate; + + protected final MetricNameFactory factory; + + public CacheAccessMetrics(MetricNameFactory factory) + { + this.factory = factory; + + this.hits = Metrics.meter(factory.createMetricName("Hits")); + this.misses = Metrics.meter(factory.createMetricName("Misses")); + this.requests = Metrics.meter(factory.createMetricName("Requests")); + + this.hitRate = new RatioGaugeSet(hits, requests, factory, "%sHitRate"); + this.missRate = new RatioGaugeSet(misses, requests, factory, "%sMissRate"); + } + + @VisibleForTesting + public void reset() + { + // No actual reset happens. The Meter counter is put to zero but will not reset the moving averages + // It rather injects a weird value into them. + // This method is being only used by CacheMetricsTest and CachingBench so fixing this issue was acknowledged + // but not considered mandatory to be fixed now (CASSANDRA-16228) + hits.mark(-hits.getCount()); + misses.mark(-misses.getCount()); + requests.mark(-requests.getCount()); + } +} diff --git a/src/java/org/apache/cassandra/metrics/CacheMetrics.java b/src/java/org/apache/cassandra/metrics/CacheMetrics.java index 574b0f065c20..13a59f88e658 100644 --- a/src/java/org/apache/cassandra/metrics/CacheMetrics.java +++ b/src/java/org/apache/cassandra/metrics/CacheMetrics.java @@ -38,7 +38,7 @@ public class CacheMetrics extends AbstractCacheMetrics public final Gauge entries; /** - * Create metrics for given cache. + * Create metrics for the given cache supporting entity. * * @param type Type of Cache to identify metrics * @param cache Weighted Cache to measure metrics diff --git a/src/java/org/apache/cassandra/metrics/CacheSizeMetrics.java b/src/java/org/apache/cassandra/metrics/CacheSizeMetrics.java new file mode 100644 index 000000000000..fb34cfcc19f7 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/CacheSizeMetrics.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.metrics; + +import com.codahale.metrics.Gauge; +import org.apache.cassandra.cache.CacheSize; + +import static org.apache.cassandra.metrics.CacheMetrics.TYPE_NAME; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class CacheSizeMetrics +{ + public static final String CAPACITY = "Capacity"; + public static final String SIZE = "Size"; + public static final String ENTRIES = "Entries"; + /** + * Cache capacity in bytes + */ + public final Gauge capacity; + + /** + * Total size of cache, in bytes + */ + public final Gauge size; + + /** + * Total number of cache entries + */ + public final Gauge entries; + + /** + * Create metrics for the given cache supporting entity. + * + * @param type Type of Cache to identify metrics. + * @param cache Cache to measure metrics + */ + public CacheSizeMetrics(String type, CacheSize cache) + { + this(new DefaultNameFactory(TYPE_NAME, type), cache); + } + + public CacheSizeMetrics(MetricNameFactory factory, CacheSize cache) + { + capacity = Metrics.register(factory.createMetricName(CAPACITY), cache::capacity); + size = Metrics.register(factory.createMetricName(SIZE), cache::weightedSize); + entries = Metrics.register(factory.createMetricName(ENTRIES), cache::size); + } +} diff --git a/src/java/org/apache/cassandra/metrics/RatioGaugeSet.java b/src/java/org/apache/cassandra/metrics/RatioGaugeSet.java new file mode 100644 index 000000000000..057a69011b2b --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/RatioGaugeSet.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import java.util.function.DoubleSupplier; +import java.util.function.ToDoubleFunction; + +import com.codahale.metrics.Metered; +import com.codahale.metrics.RatioGauge; +import org.apache.cassandra.metrics.CassandraMetricsRegistry.MetricName; + +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +public class RatioGaugeSet +{ + public static final String ONE_MINUTE = "OneMinute"; + public static final String FIVE_MINUTE = "FiveMinute"; + public static final String FIFTEEN_MINUTE = "FifteenMinute"; + public static final String MEAN_RATIO = ""; + + public final RatioGauge oneMinute; + public final RatioGauge fiveMinute; + public final RatioGauge fifteenMinute; + public final RatioGauge mean; + + public RatioGaugeSet(Metered numerator, Metered denominator, MetricNameFactory factory, String namePattern) + { + this.oneMinute = ratioGauge(factory.createMetricName(String.format(namePattern, ONE_MINUTE)), numerator::getOneMinuteRate, denominator::getOneMinuteRate); + this.fiveMinute = ratioGauge(factory.createMetricName(String.format(namePattern, FIVE_MINUTE)), numerator::getFiveMinuteRate, denominator::getFiveMinuteRate); + this.fifteenMinute = ratioGauge(factory.createMetricName(String.format(namePattern, FIFTEEN_MINUTE)), numerator::getFifteenMinuteRate, denominator::getFifteenMinuteRate); + this.mean = ratioGauge(factory.createMetricName(String.format(namePattern, MEAN_RATIO)), numerator::getCount, denominator::getCount); + } + + private static RatioGauge ratioGauge(DoubleSupplier numerator, DoubleSupplier denominator) + { + return new RatioGauge() + { + protected Ratio getRatio() + { + return Ratio.of(numerator.getAsDouble(), denominator.getAsDouble()); + } + }; + } + + private RatioGauge ratioGauge(MetricName name, DoubleSupplier numerator, DoubleSupplier denominator) + { + return Metrics.register(name, ratioGauge(numerator, denominator)); + } + + public static Metered sum(Metered... meters) + { + return new SummingMeter(meters); + } + + private static class SummingMeter implements Metered + { + private final Metered[] meters; + + public SummingMeter(Metered... meters) + { + this.meters = meters; + } + + @Override + public long getCount() + { + long count = 0; + for (Metered meter : meters) + count += meter.getCount(); + return count; + } + + private double getRate(ToDoubleFunction rateSupplier) + { + double rate = 0; + for (Metered meter : meters) + rate += rateSupplier.applyAsDouble(meter); + return rate; + } + + @Override + public double getMeanRate() + { + return getRate(Metered::getMeanRate); + } + + @Override + public double getFifteenMinuteRate() + { + return getRate(Metered::getFifteenMinuteRate); + } + + @Override + public double getFiveMinuteRate() + { + return getRate(Metered::getFiveMinuteRate); + } + + @Override + public double getOneMinuteRate() + { + return getRate(Metered::getOneMinuteRate); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 1913b3213fa4..c9a3224b9a1d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -30,12 +30,10 @@ import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,10 +70,12 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.Observable; +import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ExecutorPlus; -import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.async.ExecutionOrder; @@ -85,7 +85,7 @@ import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -public class AccordCommandStore extends CommandStore +public class AccordCommandStore extends CommandStore implements CacheSize { private static final Logger logger = LoggerFactory.getLogger(AccordCommandStore.class); @@ -124,9 +124,10 @@ public AccordCommandStore(int id, DataStore dataStore, ProgressLog.Factory progressLogFactory, EpochUpdateHolder epochUpdateHolder, - AccordJournal journal) + AccordJournal journal, + AccordStateCacheMetrics cacheMetrics) { - this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor()); + this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor(), cacheMetrics); } @VisibleForTesting @@ -138,7 +139,8 @@ public AccordCommandStore(int id, EpochUpdateHolder epochUpdateHolder, AccordJournal journal, ExecutorPlus loadExecutor, - ExecutorPlus saveExecutor) + ExecutorPlus saveExecutor, + AccordStateCacheMetrics cacheMetrics) { super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); this.journal = journal; @@ -146,7 +148,7 @@ public AccordCommandStore(int id, executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); executionOrder = new ExecutionOrder(); threadId = getThreadId(executor); - stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20); + stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20, cacheMetrics); commandCache = stateCache.instance(TxnId.class, TxnId.class, @@ -181,10 +183,10 @@ public AccordCommandStore(int id, executor.execute(this::loadRangesToCommands); } - static Factory factory(AccordJournal journal) + static Factory factory(AccordJournal journal, AccordStateCacheMetrics cacheMetrics) { return (id, time, agent, dataStore, progressLogFactory, rangesForEpoch) -> - new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, journal); + new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, journal, cacheMetrics); } private void loadRangesToCommands() @@ -250,15 +252,29 @@ public boolean inStore() return Thread.currentThread().getId() == threadId; } - public void setCacheSize(long bytes) + @Override + public void setCapacity(long bytes) { checkInStoreThread(); - stateCache.setMaxSize(bytes); + stateCache.setCapacity(bytes); } - public long getCacheSize() + @Override + public long capacity() + { + return stateCache.capacity(); + } + + @Override + public int size() + { + return stateCache.size(); + } + + @Override + public long weightedSize() { - return stateCache.getMaxSize(); + return stateCache.weightedSize(); } public void checkInStoreThread() diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index bbae3c3d54e6..d5f1ed3b3534 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -30,15 +30,24 @@ import accord.primitives.Range; import accord.topology.Topology; import accord.utils.RandomSource; +import org.apache.cassandra.cache.CacheSize; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; +import org.apache.cassandra.metrics.CacheSizeMetrics; import org.apache.cassandra.service.accord.api.AccordRoutingKey; -public class AccordCommandStores extends CommandStores +public class AccordCommandStores extends CommandStores implements CacheSize { + public static final String ACCORD_STATE_CACHE = "accord-state-cache"; + + private final CacheSizeMetrics cacheSizeMetrics; + private long cacheSize; + AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, AccordJournal journal) { - super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore.factory(journal)); - setCacheSize(maxCacheSize()); + super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore.factory(journal, new AccordStateCacheMetrics(ACCORD_STATE_CACHE))); + setCapacity(maxCacheSize()); + this.cacheSizeMetrics = new CacheSizeMetrics(ACCORD_STATE_CACHE, this); } static Factory factory(AccordJournal journal) @@ -67,21 +76,37 @@ private static boolean contains(Topology previous, String searchKeyspace) return false; } - private long cacheSize; - - synchronized void setCacheSize(long bytes) + public synchronized void setCapacity(long bytes) { cacheSize = bytes; refreshCacheSizes(); } + @Override + public long capacity() + { + return cacheSize; + } + + @Override + public int size() + { + return unsafeFoldLeft(0, (size, commandStore) -> size + ((AccordCommandStore) commandStore).size()); + } + + @Override + public long weightedSize() + { + return unsafeFoldLeft(0L, (size, commandStore) -> size + ((AccordCommandStore) commandStore).weightedSize()); + } + synchronized void refreshCacheSizes() { if (count() == 0) return; long perStore = cacheSize / count(); // TODO (low priority, safety): we might transiently breach our limit if we increase one store before decreasing another - forEach(commandStore -> ((AccordSafeCommandStore) commandStore).commandStore().setCacheSize(perStore)); + forEach(commandStore -> ((AccordSafeCommandStore) commandStore).commandStore().setCapacity(perStore)); } private static long maxCacheSize() diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index ae6901118d80..95ef73f0e793 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -299,7 +299,6 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) } if (cause instanceof Preempted) { - metrics.preempts.mark(); //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match @@ -372,7 +371,7 @@ public void setCacheSize(long kb) { long bytes = kb << 10; AccordCommandStores commandStores = (AccordCommandStores) node.commandStores(); - commandStores.setCacheSize(bytes); + commandStores.setCapacity(bytes); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 15faee4c001e..97bf5b1586e4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -26,13 +26,15 @@ import java.util.stream.Stream; import com.google.common.annotations.VisibleForTesting; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.utils.IntrusiveLinkedList; import accord.utils.async.AsyncChains; +import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; +import org.apache.cassandra.metrics.CacheAccessMetrics; import org.apache.cassandra.service.accord.AccordCachingState.Status; import static accord.utils.Invariants.checkState; @@ -49,7 +51,7 @@ * Supports dynamic object sizes. After each acquire/free cycle, the cacheable objects size is recomputed to * account for data added/removed during txn processing if it's modified flag is set */ -public class AccordStateCache extends IntrusiveLinkedList> +public class AccordStateCache extends IntrusiveLinkedList> implements CacheSize { private static final Logger logger = LoggerFactory.getLogger(AccordStateCache.class); @@ -63,13 +65,6 @@ public static void validateLoadOnEvict(boolean value) VALIDATE_LOAD_ON_EVICT = value; } - static class Stats - { - private long queries; - private long hits; - private long misses; - } - private final Map> cache = new HashMap<>(); private final HashMap, Instance> instances = new HashMap<>(); @@ -78,22 +73,27 @@ static class Stats private int unreferenced = 0; private long maxSizeInBytes; private long bytesCached = 0; - private final Stats stats = new Stats(); - public AccordStateCache(ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, long maxSizeInBytes) + @VisibleForTesting + final AccordStateCacheMetrics metrics; + + public AccordStateCache(ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, long maxSizeInBytes, AccordStateCacheMetrics metrics) { this.loadExecutor = loadExecutor; this.saveExecutor = saveExecutor; this.maxSizeInBytes = maxSizeInBytes; + this.metrics = metrics; } - public void setMaxSize(long size) + @Override + public void setCapacity(long sizeInBytes) { - maxSizeInBytes = size; + maxSizeInBytes = sizeInBytes; maybeEvictSomeNodes(); } - public long getMaxSize() + @Override + public long capacity() { return maxSizeInBytes; } @@ -114,7 +114,11 @@ private void link(AccordCachingState node) private void maybeUpdateSize(AccordCachingState node, ToLongFunction estimator) { if (node.shouldUpdateSize()) - bytesCached += ((AccordCachingState) node).estimatedSizeOnHeapDelta((ToLongFunction) estimator); + { + long delta = ((AccordCachingState) node).estimatedSizeOnHeapDelta((ToLongFunction) estimator); + bytesCached += delta; + instanceForNode(node).bytesCached += delta; + } } /* @@ -174,6 +178,8 @@ private void evict(AccordCachingState node) checkState(!isInQueue(node)); bytesCached -= node.lastQueriedEstimatedSizeOnHeap; + Instance instance = instanceForNode(node); + instance.bytesCached -= node.lastQueriedEstimatedSizeOnHeap; if (node.status() == LOADED && VALIDATE_LOAD_ON_EVICT) instanceForNode(node).validateLoadEvicted(node); @@ -181,6 +187,8 @@ private void evict(AccordCachingState node) if (!node.hasListeners()) { AccordCachingState self = cache.remove(node.key()); + if (self != null) + instance.itemsCached--; checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); } else @@ -212,7 +220,7 @@ public > Instance instance( return instance; } - public class Instance> + public class Instance> implements CacheSize { private final Class keyClass; private final Function, S> safeRefFactory; @@ -220,7 +228,11 @@ public class Instance> private BiFunction saveFunction; private final BiFunction validateFunction; private final ToLongFunction heapEstimator; - private final Stats stats = new Stats(); + private long bytesCached; + private int itemsCached; + + @VisibleForTesting + final CacheAccessMetrics instanceMetrics; public Instance( Class keyClass, @@ -236,6 +248,7 @@ public Instance( this.saveFunction = saveFunction; this.validateFunction = validateFunction; this.heapEstimator = heapEstimator; + this.instanceMetrics = metrics.forInstance(keyClass); } public Stream> stream() @@ -280,8 +293,11 @@ private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) AccordCachingState node = new AccordCachingState<>(key); node.load(loadExecutor, loadFunction); node.references++; - cache.put(key, node); + + if (cache.put(key, node) == null) + itemsCached++; maybeUpdateSize(node, heapEstimator); + metrics.objectSize.update(node.lastQueriedEstimatedSizeOnHeap); maybeEvictSomeNodes(); return node; } @@ -413,37 +429,22 @@ public void complete(K key) node.complete(); } - public long cacheQueries() - { - return stats.queries; - } - - public long cacheHits() - { - return stats.hits; - } - - public long cacheMisses() - { - return stats.misses; - } - private void incrementCacheQueries() { - stats.queries++; - AccordStateCache.this.stats.queries++; + instanceMetrics.requests.mark(); + metrics.requests.mark(); } private void incrementCacheHits() { - stats.hits++; - AccordStateCache.this.stats.hits++; + instanceMetrics.hits.mark(); + metrics.hits.mark(); } private void incrementCacheMisses() { - stats.misses++; - AccordStateCache.this.stats.misses++; + instanceMetrics.misses.mark(); + metrics.misses.mark(); } @VisibleForTesting @@ -457,12 +458,43 @@ public void unsafeSetSaveFunction(BiFunction saveFunction) { this.saveFunction = saveFunction; } + + @Override + public long capacity() + { + return AccordStateCache.this.capacity(); + } + + @Override + public void setCapacity(long capacity) + { + throw new UnsupportedOperationException("Capacity is shared between all instances. Please set the capacity on the global cache"); + } + + @Override + public int size() + { + return itemsCached; + } + + @Override + public long weightedSize() + { + return bytesCached; + } } @VisibleForTesting void unsafeClear() { cache.clear(); + bytesCached = 0; + metrics.reset();; + instances.values().forEach(i -> { + i.itemsCached = 0; + i.bytesCached = 0; + i.instanceMetrics.reset(); + }); //noinspection StatementWithEmptyBody while (null != poll()); } @@ -504,14 +536,14 @@ int numUnreferencedEntries() return unreferenced; } - @VisibleForTesting - int totalNumEntries() + @Override + public int size() { return cache.size(); } - @VisibleForTesting - long bytesCached() + @Override + public long weightedSize() { return bytesCached; } @@ -536,19 +568,4 @@ int references(Object key) AccordCachingState node = cache.get(key); return node != null ? node.references : 0; } - - public long cacheQueries() - { - return stats.queries; - } - - public long cacheHits() - { - return stats.hits; - } - - public long cacheMisses() - { - return stats.misses; - } } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 3bfaed2d5082..e01587e61b3e 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import accord.api.Agent; +import accord.api.EventsListener; import accord.api.Result; import accord.local.Command; import accord.local.Node; @@ -33,6 +34,7 @@ import accord.primitives.Txn; import accord.primitives.TxnId; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.metrics.AccordMetrics; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.JVMStabilityInspector; @@ -101,4 +103,10 @@ public Txn emptyTxn(Txn.Kind kind, Seekables keysOrRanges) { return new Txn.InMemory(kind, keysOrRanges, TxnRead.EMPTY, TxnQuery.ALL, null); } + + @Override + public EventsListener metricsEventsListener() + { + return AccordMetrics.Listener.instance; + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java new file mode 100644 index 000000000000..4db878da47c2 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Function; + +import com.google.common.base.Throwables; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.metrics.AccordMetrics; +import org.apache.cassandra.metrics.DefaultNameFactory; +import org.apache.cassandra.metrics.RatioGaugeSet; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; +import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; +import org.assertj.core.data.Offset; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.fail; + + +public class AccordMetricsTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMetricsTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + AccordTestBase.setupClass(); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); + for (int i = 0; i < SHARED_CLUSTER.size(); i++) // initialize metrics + logger.trace(SHARED_CLUSTER.get(i + 1).callOnInstance(() -> AccordMetrics.readMetrics.toString() + AccordMetrics.writeMetrics.toString())); + } + + String writeCql() + { + return "BEGIN TRANSACTION\n" + + " LET val = (SELECT v FROM " + currentTable + " WHERE k=? AND c=?);\n" + + " SELECT val.v;\n" + + " UPDATE " + currentTable + " SET v = v + 1 WHERE k=? AND c=?;\n" + + "COMMIT TRANSACTION"; + } + + String readCql() + { + return "BEGIN TRANSACTION\n" + + " LET val = (SELECT v FROM " + currentTable + " WHERE k=? AND c=?);\n" + + " SELECT val.v;\n" + + "COMMIT TRANSACTION"; + } + + Map> countingMetrics0; + + @Before + public void beforeTest() + { + SHARED_CLUSTER.filters().reset(); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))"); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + } + + @Test + public void testRegularMetrics() throws Exception + { + countingMetrics0 = getMetrics(); + SHARED_CLUSTER.coordinator(1).executeWithResult(writeCql(), ConsistencyLevel.ALL, 0, 0, 0, 0); + assertCoordinatorMetrics(0, "rw", 1, 0, 0, 0, 0); + assertCoordinatorMetrics(1, "rw", 0, 0, 0, 0, 0); + assertReplicaMetrics(0, "rw", 1, 1, 1); + assertReplicaMetrics(1, "rw", 1, 1, 1); + assertZeroMetrics("ro"); + + countingMetrics0 = getMetrics(); + SHARED_CLUSTER.coordinator(1).executeWithResult(readCql(), ConsistencyLevel.ALL, 0, 0); + assertCoordinatorMetrics(0, "ro", 1, 0, 0, 0, 0); + assertCoordinatorMetrics(1, "ro", 0, 0, 0, 0, 0); + assertReplicaMetrics(0, "ro", 1, 1, 0); + assertReplicaMetrics(1, "ro", 1, 1, 0); + assertZeroMetrics("rw"); + } + + @Test + public void testPreemptionMetrics() + { + IMessageFilters.Filter commitFilter1 = SHARED_CLUSTER.filters().outbound().verbs(Verb.ACCORD_COMMIT_REQ.id).from(1).to(1).drop(); + IMessageFilters.Filter commitFilter2 = SHARED_CLUSTER.filters().outbound().verbs(Verb.ACCORD_COMMIT_REQ.id).from(1).to(2).drop(); + commitFilter1.on(); + commitFilter2.on(); + + countingMetrics0 = getMetrics(); + try + { + SHARED_CLUSTER.coordinator(1).executeWithResult(writeCql(), ConsistencyLevel.ALL, 0, 0, 0, 0); + fail("expected to fail"); + } + catch (RuntimeException ex) + { + assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(WritePreemptedException.class.getName()); + } + + assertCoordinatorMetrics(0, "rw", 1, 0, 1, 0, 0); + assertCoordinatorMetrics(1, "rw", 0, 0, 0, 0, 1); + assertReplicaMetrics(0, "rw", 1, 1, 1); + assertReplicaMetrics(1, "rw", 1, 1, 1); + + assertZeroMetrics("ro"); + + countingMetrics0 = getMetrics(); + try + { + SHARED_CLUSTER.coordinator(1).executeWithResult(readCql(), ConsistencyLevel.ALL, 0, 0); + fail("expected to fail"); + } + catch (RuntimeException ex) + { + assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(ReadPreemptedException.class.getName()); + } + + assertCoordinatorMetrics(0, "ro", 1, 0, 1, 0, 0); + assertCoordinatorMetrics(1, "ro", 0, 0, 0, 0, 1); + assertReplicaMetrics(0, "ro", 1, 1, 0); + assertReplicaMetrics(1, "ro", 1, 1, 0); + + assertZeroMetrics("rw"); + } + + @Test + public void testTimeoutMetrics() + { + IMessageFilters.Filter preAcceptFilter = SHARED_CLUSTER.filters().outbound().verbs(Verb.ACCORD_PRE_ACCEPT_REQ.id).from(1).to(2).drop(); + preAcceptFilter.on(); + + countingMetrics0 = getMetrics(); + try + { + SHARED_CLUSTER.coordinator(1).executeWithResult(readCql(), ConsistencyLevel.ALL, 0, 0); + fail("expected to fail"); + } + catch (RuntimeException ex) + { + assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(ReadTimeoutException.class.getName()); + } + + assertCoordinatorMetrics(0, "ro", 0, 0, 0, 1, 0); + assertCoordinatorMetrics(1, "ro", 0, 0, 0, 0, 0); + assertReplicaMetrics(0, "ro", 0, 0, 0); + assertReplicaMetrics(1, "ro", 0, 0, 0); + + assertZeroMetrics("rw"); + + countingMetrics0 = getMetrics(); + try + { + SHARED_CLUSTER.coordinator(1).executeWithResult(writeCql(), ConsistencyLevel.ALL, 0, 0, 0, 0); + fail("expected to fail"); + } + catch (RuntimeException ex) + { + assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(WriteTimeoutException.class.getName()); + } + + assertCoordinatorMetrics(0, "rw", 0, 0, 0, 1, 0); + assertCoordinatorMetrics(1, "rw", 0, 0, 0, 0, 0); + assertReplicaMetrics(0, "rw", 0, 0, 0); + assertReplicaMetrics(1, "rw", 0, 0, 0); + + assertZeroMetrics("ro"); + } + + private void assertZeroMetrics(String scope) + { + for (int i = 0; i < SHARED_CLUSTER.size(); i++) + { + assertCoordinatorMetrics(i, scope, 0, 0, 0, 0, 0); + assertReplicaMetrics(i, scope, 0, 0, 0); + } + } + + private void assertCoordinatorMetrics(int node, String scope, long fastPaths, long slowPaths, long preempts, long timeouts, long recoveries) + { + DefaultNameFactory nameFactory = new DefaultNameFactory(AccordMetrics.ACCORD_COORDINATOR, scope); + Map metrics = diff(countingMetrics0).get(node); + logger.info("Metrics for node {} / {}: {}", node, scope, metrics); + Function metric = n -> metrics.get(nameFactory.createMetricName(n).getMetricName()); + assertThat(metric.apply(AccordMetrics.FAST_PATHS)).isEqualTo(fastPaths); + assertThat(metric.apply(AccordMetrics.SLOW_PATHS)).isEqualTo(slowPaths); + assertThat(metric.apply(AccordMetrics.PREEMPTS)).isEqualTo(preempts); + assertThat(metric.apply(AccordMetrics.TIMEOUTS)).isEqualTo(timeouts); + assertThat(metric.apply(AccordMetrics.RECOVERY_DELAY)).isEqualTo(recoveries); + assertThat(metric.apply(AccordMetrics.RECOVERY_TIME)).isEqualTo(recoveries); + assertThat(metric.apply(AccordMetrics.DEPENDENCIES)).isEqualTo(fastPaths + slowPaths); + + if ((fastPaths + slowPaths) > 0) + { + String fastPathToTotalName = nameFactory.createMetricName(AccordMetrics.FAST_PATH_TO_TOTAL + "." + RatioGaugeSet.MEAN_RATIO).getMetricName(); + assertThat((double) SHARED_CLUSTER.get(1).metrics().getGauge(fastPathToTotalName)).isEqualTo((double) fastPaths / (double) (fastPaths + slowPaths), Offset.offset(0.01d)); + } + } + + private void assertReplicaMetrics(int node, String scope, long commits, long executions, long applications) + { + DefaultNameFactory nameFactory = new DefaultNameFactory(AccordMetrics.ACCORD_REPLICA, scope); + Map metrics = diff(countingMetrics0).get(node); + Function metric = n -> metrics.get(nameFactory.createMetricName(n).getMetricName()); + assertThat(metric.apply(AccordMetrics.COMMIT_LATENCY)).isEqualTo(commits); + assertThat(metric.apply(AccordMetrics.EXECUTE_LATENCY)).isEqualTo(executions); + assertThat(metric.apply(AccordMetrics.APPLY_LATENCY)).isEqualTo(applications); + assertThat(metric.apply(AccordMetrics.APPLY_DURATION)).isEqualTo(applications); + assertThat(metric.apply(AccordMetrics.PARTIAL_DEPENDENCIES)).isEqualTo(executions); + } + + private Map> getMetrics() + { + Map> metrics = new HashMap<>(); + for (int i = 0; i < SHARED_CLUSTER.size(); i++) + metrics.put(i, SHARED_CLUSTER.get(i + 1).metrics().getCounters(name -> name.startsWith("org.apache.cassandra.metrics.accord-"))); + return metrics; + } + + private Map> diff(Map> prev) + { + Map> curr = getMetrics(); + Map> diff = new HashMap<>(); + for (int i = 0; i < SHARED_CLUSTER.size(); i++) + { + Map prevNode = prev.get(i); + Map currNode = curr.get(i); + Map diffNode = new HashMap<>(); + for (Map.Entry currEntry : currNode.entrySet()) + { + Long prevVal = prevNode.get(currEntry.getKey()); + if (prevVal != null) + diffNode.put(currEntry.getKey(), currEntry.getValue() - prevVal); + } + diff.put(i, diffNode); + } + return diff; + } +} diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index c09989eb0ab7..33472cf09f26 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -399,9 +399,9 @@ private static void flush(AccordCommandStore commandStore) { commandStore.executeBlocking(() -> { // clear cache and wait for post-eviction writes to complete - long cacheSize = commandStore.getCacheSize(); - commandStore.setCacheSize(0); - commandStore.setCacheSize(cacheSize); + long cacheSize = commandStore.capacity(); + commandStore.setCapacity(0); + commandStore.setCapacity(cacheSize); commandStore.cache().awaitSaveResults(); }); commands.forceBlockingFlush(FlushReason.UNIT_TESTS); diff --git a/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java b/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java index 17fb0f28ec21..a4c1b8608d67 100644 --- a/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java @@ -132,10 +132,10 @@ private static void keyCacheMetrics(String title) { CacheMetrics metrics = CacheService.instance.keyCache.getMetrics(); System.out.println("Key cache metrics " + title + ": capacity:" + metrics.capacity.getValue() + - " size:"+metrics.size.getValue()+ + " size:" + metrics.size.getValue() + " entries:" + metrics.entries.getValue() + - " hit-rate:"+metrics.hitRate.getValue() + - " one-min-rate:"+metrics.oneMinuteHitRate.getValue()); + " hit-rate:" + metrics.hitRate.getValue() + + " one-min-rate:" + metrics.hitRate.getValue()); } @Test diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 9dcc72b84d6d..a13f17d49e99 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -89,7 +89,7 @@ private static PartitionKey key(int k) public void basicCycleTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCapacity(0))); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); Txn txn = createWriteTxn(1); @@ -173,7 +173,7 @@ public void basicCycleTest() throws Throwable public void computeDeps() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCacheSize(0))); + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCapacity(0))); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); Txn txn = createWriteTxn(2); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 0ac350b92f87..5fc6f2305f48 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -17,55 +17,66 @@ */ package org.apache.cassandra.service.accord; +import java.util.UUID; + import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import accord.utils.async.AsyncChain; +import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ManualExecutor; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; +import org.apache.cassandra.metrics.CacheAccessMetrics; import org.apache.cassandra.service.accord.AccordCachingState.Status; import static org.apache.cassandra.service.accord.AccordTestUtils.testLoad; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatExceptionOfType; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class AccordStateCacheTest { private static final long DEFAULT_NODE_SIZE = nodeSize(0); + private AccordStateCacheMetrics cacheMetrics; - private static class SafeString implements AccordSafeState + private static abstract class TestSafeState implements AccordSafeState { - private boolean invalidated = false; - private final AccordCachingState global; - private String original = null; + protected boolean invalidated = false; + protected final AccordCachingState global; + private T original = null; - public SafeString(AccordCachingState global) + public TestSafeState(AccordCachingState global) { this.global = global; } - public AccordCachingState global() + public AccordCachingState global() { return global; } @Override - public String key() + public T key() { return global.key(); } @Override - public String current() + public T current() { return global.get(); } @Override - public void set(String update) + public void set(T update) { global.set(update); } @Override - public String original() + public T original() { return original; } @@ -118,6 +129,22 @@ public boolean invalidated() } } + private static class SafeString extends TestSafeState + { + public SafeString(AccordCachingState global) + { + super(global); + } + } + + private static class SafeInt extends TestSafeState + { + public SafeInt(AccordCachingState global) + { + super(global); + } + } + private static long emptyNodeSize() { return AccordCachingState.EMPTY_SIZE; @@ -131,15 +158,35 @@ private static long nodeSize(long itemSize) private static void assertCacheState(AccordStateCache cache, int referenced, int total, long bytes) { Assert.assertEquals(referenced, cache.numReferencedEntries()); - Assert.assertEquals(total, cache.totalNumEntries()); - Assert.assertEquals(bytes, cache.bytesCached()); + Assert.assertEquals(total, cache.size()); + Assert.assertEquals(bytes, cache.weightedSize()); + } + + private void assertCacheMetrics(CacheAccessMetrics metrics, int hits, int misses, int requests) + { + Assert.assertEquals(hits, metrics.hits.getCount()); + Assert.assertEquals(misses, metrics.misses.getCount()); + Assert.assertEquals(requests, metrics.requests.getCount()); + if (metrics instanceof AccordStateCacheMetrics) + { + AccordStateCacheMetrics ascMetrics = (AccordStateCacheMetrics) metrics; + Assert.assertEquals(misses, ascMetrics.objectSize.getCount()); + assertThat(ascMetrics.objectSize.getSnapshot().getMax()).isGreaterThanOrEqualTo(DEFAULT_NODE_SIZE); + } + } + + @Before + public void before() + { + String type = String.format("%s-%s", AccordCommandStores.ACCORD_STATE_CACHE, UUID.randomUUID()); + cacheMetrics = new AccordStateCacheMetrics(type); } @Test public void testAcquisitionAndRelease() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor,500); + AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -162,13 +209,56 @@ public void testAcquisitionAndRelease() Assert.assertSame(safeString1.global, cache.head()); Assert.assertSame(safeString2.global, cache.tail()); + + assertCacheMetrics(cache.metrics, 0, 2, 2); + assertCacheMetrics(instance.instanceMetrics, 0, 2, 2); + } + + @Test + public void testCachingMetricsWithTwoInstances() + { + ManualExecutor executor = new ManualExecutor(); + AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); + AccordStateCache.Instance stringInstance = + cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true,String::length); + AccordStateCache.Instance intInstance = + cache.instance(Integer.class, Integer.class, SafeInt::new, key -> key, (original, current) -> null, (k, v) -> true,ignored -> Integer.BYTES); + assertCacheState(cache, 0, 0, 0); + + SafeString safeString1 = stringInstance.acquire("1"); + testLoad(executor, safeString1, "1"); + stringInstance.release(safeString1); + SafeString safeString2 = stringInstance.acquire("2"); + testLoad(executor, safeString2, "2"); + stringInstance.release(safeString2); + + SafeInt safeInt1 = intInstance.acquire(3); + testLoad(executor, safeInt1, 3); + intInstance.release(safeInt1); + SafeInt safeInt2 = intInstance.acquire(4); + testLoad(executor, safeInt2, 4); + intInstance.release(safeInt2); + SafeInt safeInt3 = intInstance.acquire(5); + testLoad(executor, safeInt3, 5); + intInstance.release(safeInt3); + + assertCacheState(cache, 0, 5, nodeSize(Integer.BYTES) * 3 + nodeSize(1) * 2); + assertThat(stringInstance.size()).isEqualTo(2); + assertThat(stringInstance.weightedSize()).isEqualTo(nodeSize(1) * 2); + assertThat(stringInstance.capacity()).isEqualTo(cache.capacity()); + assertThat(intInstance.size()).isEqualTo(3); + assertThat(intInstance.weightedSize()).isEqualTo(nodeSize(Integer.BYTES) * 3); + assertThat(intInstance.capacity()).isEqualTo(cache.capacity()); + + assertThatExceptionOfType(UnsupportedOperationException.class).isThrownBy(() -> stringInstance.setCapacity(123)); + assertThatExceptionOfType(UnsupportedOperationException.class).isThrownBy(() -> intInstance.setCapacity(123)); } @Test public void testRotation() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5); + AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -187,11 +277,15 @@ public void testRotation() Assert.assertSame(items[0].global, cache.head()); Assert.assertSame(items[2].global, cache.tail()); assertCacheState(cache, 0, 3, nodeSize(1) * 3); + assertCacheMetrics(cache.metrics, 0, 3, 3); + assertCacheMetrics(instance.instanceMetrics, 0, 3, 3); SafeString safeString = instance.acquire("1"); Assert.assertEquals(Status.LOADED, safeString.globalStatus()); assertCacheState(cache, 1, 3, nodeSize(1) * 3); + assertCacheMetrics(cache.metrics, 1, 3, 4); + assertCacheMetrics(instance.instanceMetrics, 1, 3, 4); // releasing item should return it to the tail instance.release(safeString); @@ -204,7 +298,7 @@ public void testRotation() public void testEvictionOnAcquire() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -222,6 +316,8 @@ public void testEvictionOnAcquire() assertCacheState(cache, 0, 5, nodeSize(1) * 5); Assert.assertSame(items[0].global, cache.head()); Assert.assertSame(items[4].global, cache.tail()); + assertCacheMetrics(cache.metrics, 0, 5, 5); + assertCacheMetrics(instance.instanceMetrics, 0, 5, 5); SafeString safeString = instance.acquire("5"); Assert.assertTrue(instance.isReferenced(safeString.key())); @@ -232,19 +328,23 @@ public void testEvictionOnAcquire() Assert.assertSame(items[4].global, cache.tail()); Assert.assertFalse(cache.keyIsCached("0")); Assert.assertFalse(cache.keyIsReferenced("0")); + assertCacheMetrics(cache.metrics, 0, 6, 6); + assertCacheMetrics(instance.instanceMetrics, 0, 6, 6); testLoad(executor, safeString, "5"); instance.release(safeString); assertCacheState(cache, 0, 5, nodeSize(1) * 5); Assert.assertSame(items[1].global, cache.head()); Assert.assertSame(safeString.global, cache.tail()); + assertCacheMetrics(cache.metrics, 0, 6, 6); + assertCacheMetrics(instance.instanceMetrics, 0, 6, 6); } @Test public void testEvictionOnRelease() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -259,16 +359,22 @@ public void testEvictionOnRelease() } assertCacheState(cache, 5, 5, nodeSize(0) * 5); + assertCacheMetrics(cache.metrics, 0, 5, 5); + assertCacheMetrics(instance.instanceMetrics, 0, 5, 5); Assert.assertNull(cache.head()); Assert.assertNull(cache.tail()); instance.release(items[2]); assertCacheState(cache, 4, 4, nodeSize(0) * 4); + assertCacheMetrics(cache.metrics, 0, 5, 5); + assertCacheMetrics(instance.instanceMetrics, 0, 5, 5); Assert.assertNull(cache.head()); Assert.assertNull(cache.tail()); instance.release(items[4]); assertCacheState(cache, 3, 4, nodeSize(0) * 3 + nodeSize(1)); + assertCacheMetrics(cache.metrics, 0, 5, 5); + assertCacheMetrics(instance.instanceMetrics, 0, 5, 5); Assert.assertSame(items[4].global, cache.head()); Assert.assertSame(items[4].global, cache.tail()); } @@ -277,7 +383,7 @@ public void testEvictionOnRelease() public void testMultiAcquireRelease() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4); + AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -285,6 +391,8 @@ public void testMultiAcquireRelease() SafeString safeString1 = instance.acquire("0"); testLoad(executor, safeString1, "0"); Assert.assertEquals(Status.LOADED, safeString1.globalStatus()); + assertCacheMetrics(cache.metrics, 0, 1, 1); + assertCacheMetrics(instance.instanceMetrics, 0, 1, 1); Assert.assertEquals(1, cache.references("0")); assertCacheState(cache, 1, 1, nodeSize(0)); @@ -293,6 +401,8 @@ public void testMultiAcquireRelease() Assert.assertEquals(Status.LOADED, safeString1.globalStatus()); Assert.assertEquals(2, cache.references("0")); assertCacheState(cache, 1, 1, nodeSize(0)); + assertCacheMetrics(cache.metrics, 1, 1, 2); + assertCacheMetrics(instance.instanceMetrics, 1, 1, 2); instance.release(safeString1); assertCacheState(cache, 1, 1, nodeSize(1)); @@ -304,7 +414,7 @@ public void testMultiAcquireRelease() public void evictionBlockedOnSaving() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3)); + AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3), cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -324,9 +434,11 @@ public void evictionBlockedOnSaving() } assertCacheState(cache, 0, 4, nodeSize(1) * 3 + nodeSize(3)); + assertCacheMetrics(cache.metrics, 0, 4, 4); + assertCacheMetrics(instance.instanceMetrics, 0, 4, 4); // force cache eviction - cache.setMaxSize(0); + cache.setCapacity(0); // all should have been evicted except 0 assertCacheState(cache, 0, 1, nodeSize(2)); @@ -341,7 +453,7 @@ public void evictionBlockedOnSaving() public void testUpdates() { ManualExecutor executor = new ManualExecutor(); - AccordStateCache cache = new AccordStateCache(executor, executor, 500); + AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); @@ -360,5 +472,32 @@ public void testUpdates() assertCacheState(cache, 0, 1, nodeSize(3)); Assert.assertSame(safeString.global, cache.head()); Assert.assertSame(safeString.global, cache.tail()); + + assertCacheMetrics(cache.metrics, 0, 1, 1); + assertCacheMetrics(instance.instanceMetrics, 0, 1, 1); + } + + private CacheSize mockCacheSize(long capacity, long size, int entries) + { + CacheSize cacheSize = mock(CacheSize.class); + when(cacheSize.capacity()).thenReturn(capacity); + when(cacheSize.weightedSize()).thenReturn(size); + when(cacheSize.size()).thenReturn(entries); + return cacheSize; + } + + @Test + public void testAccorStateCacheMetrics() + { + CacheAccessMetrics stringInstance1 = cacheMetrics.forInstance(String.class); + CacheAccessMetrics stringInstance1Dup = cacheMetrics.forInstance(String.class); + CacheAccessMetrics stringInstance2 = cacheMetrics.forInstance(String.class); + CacheAccessMetrics integerInstance1 = cacheMetrics.forInstance(Integer.class); + CacheAccessMetrics integerInstance2 = cacheMetrics.forInstance(Integer.class); + + assertThat(stringInstance1).isSameAs(stringInstance1Dup); + assertThat(stringInstance1).isSameAs(stringInstance2); + assertThat(integerInstance1).isSameAs(integerInstance2); + assertThat(stringInstance1).isNotSameAs(integerInstance1); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 192aeacfc118..4e21b5096c79 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -77,6 +77,7 @@ import org.apache.cassandra.cql3.statements.TransactionStatement; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; @@ -370,7 +371,8 @@ public static AccordCommandStore createAccordCommandStore( holder, journal, loadExecutor, - saveExecutor); + saveExecutor, + new AccordStateCacheMetrics(AccordCommandStores.ACCORD_STATE_CACHE + System.currentTimeMillis())); holder.set(result); result.updateRangesForEpoch(); return result; @@ -389,7 +391,7 @@ public static AccordCommandStore createAccordCommandStore( Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); - store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCacheSize(1 << 20)); + store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCapacity(1 << 20)); return store; } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 66a0d00784a1..e1f5e317f031 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -84,7 +84,7 @@ public void cachedTest() AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); AccordStateCache.Instance commandCache = commandStore.commandCache(); - commandStore.executeBlocking(() -> commandStore.setCacheSize(1024)); + commandStore.executeBlocking(() -> commandStore.setCapacity(1024)); AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); @@ -229,7 +229,7 @@ public void inProgressLoadTest() throws Throwable ManualExecutor executor = new ManualExecutor(); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); - commandStore.executor().submit(() -> commandStore.setCacheSize(1024)).get(); + commandStore.executor().submit(() -> commandStore.setCapacity(1024)).get(); AccordStateCache.Instance commandCache = commandStore.commandCache(); AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 14d4f5d409db..086c9537fea8 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -223,9 +223,9 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS // clear cache commandStore.executeBlocking(() -> { - long cacheSize = commandStore.getCacheSize(); - commandStore.setCacheSize(0); - commandStore.setCacheSize(cacheSize); + long cacheSize = commandStore.capacity(); + commandStore.setCapacity(0); + commandStore.setCapacity(cacheSize); commandStore.cache().awaitSaveResults(); }); @@ -312,7 +312,7 @@ public void loadFail() // all txn use the same key; 0 Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - commandStore.executeBlocking(() -> commandStore.setCacheSize(0)); + commandStore.executeBlocking(() -> commandStore.setCapacity(0)); Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); qt().withPure(false).withExamples(50).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { From dfd1e99fd11f8952abd6799871647a30808eca98 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Mon, 9 Oct 2023 15:05:57 +0200 Subject: [PATCH 075/340] CEP-15: Add Accord configuration stub Patch by Jacek Lewandowski; reviewed by David Capwell for CASSANDRA-18221 --- conf/cassandra.yaml | 16 +++++-- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 30 +++++++++++++ .../org/apache/cassandra/config/Config.java | 8 +--- .../cassandra/config/DatabaseDescriptor.java | 43 ++++++++++--------- .../apache/cassandra/config/DurationSpec.java | 6 +++ .../cql3/statements/TransactionStatement.java | 2 +- .../service/accord/AccordConfiguration.java | 40 +++++++++++++++++ .../service/accord/AccordService.java | 12 ++++-- test/conf/cassandra-mtls.yaml | 3 +- test/conf/cassandra-murmur.yaml | 3 +- test/conf/cassandra-old.yaml | 3 +- .../cassandra-pem-jks-sslcontextfactory.yaml | 3 +- ...slcontextfactory-invalidconfiguration.yaml | 3 +- .../conf/cassandra-pem-sslcontextfactory.yaml | 3 +- test/conf/cassandra-seeds.yaml | 3 +- ...slcontextfactory-invalidconfiguration.yaml | 3 +- test/conf/cassandra-sslcontextfactory.yaml | 3 +- test/conf/cassandra.yaml | 8 +++- .../distributed/impl/InstanceConfig.java | 16 +++++-- .../test/accord/AccordBootstrapTest.java | 2 +- .../test/accord/AccordFeatureFlagTest.java | 11 +++-- .../config/DatabaseDescriptorRefTest.java | 24 ++++++----- .../config/YamlConfigurationLoaderTest.java | 6 +-- 24 files changed, 181 insertions(+), 72 deletions(-) create mode 100644 src/java/org/apache/cassandra/config/AccordSpec.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordConfiguration.java diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 89582a23de1a..a507dcd94312 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2199,9 +2199,6 @@ drop_compact_storage_enabled: false # Whether or not USE is allowed. This is enabled by default to avoid failure on upgrade. #use_statements_enabled: true -# Enables the execution of Accord (multi-key) transactions on this node. -accord_transactions_enabled: false - # When the client triggers a protocol exception or unknown issue (Cassandra bug) we increment # a client metric showing this; this logic will exclude specific subnets from updating these # metrics @@ -2651,3 +2648,16 @@ accord_transactions_enabled: false # compatibility mode would no longer toggle behaviors as when it was running in the UPGRADING mode. # storage_compatibility_mode: NONE + +#accord: +# # Enables the execution of Accord (multi-key) transactions on this node. +# enabled: false +# +# # Journal directory for Accord +# journal_directory: +# +# # The number of Accord shards on this node; -1 means use the number of cores +# shard_count: -1 +# +# # Progress log scheduling delay +# progress_log_schedule_delay: 1s diff --git a/modules/accord b/modules/accord index 0419858bd1f6..5ffe3d504bb5 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 0419858bd1f6761f08fd1369477f7c142f5bbb4f +Subproject commit 5ffe3d504bb5aa1ff1c2b96d817791e40f7ced0f diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java new file mode 100644 index 000000000000..b025d561b66d --- /dev/null +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.config; + +public class AccordSpec +{ + public volatile boolean enabled = false; + + public volatile String journal_directory; + + public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; + + public volatile DurationSpec.IntSecondsBound progress_log_schedule_delay = new DurationSpec.IntSecondsBound(1); +} diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 704dda8aa9fa..d23abdccff46 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -398,9 +398,6 @@ public static class SSTableConfig public DataStorageSpec.IntMebibytesBound commitlog_total_space; public CommitLogSync commitlog_sync; - // Accord Journal - public String accord_journal_directory; - @Replaces(oldName = "commitlog_sync_group_window_in_ms", converter = Converters.MILLIS_DURATION_DOUBLE, deprecated = true) public DurationSpec.IntMillisecondsBound commitlog_sync_group_window = new DurationSpec.IntMillisecondsBound("0ms"); @Replaces(oldName = "commitlog_sync_period_in_ms", converter = Converters.MILLIS_DURATION_INT, deprecated = true) @@ -627,9 +624,6 @@ public static class SSTableConfig public volatile boolean use_statements_enabled = true; - public boolean accord_transactions_enabled = false; - public OptionaldPositiveInt accord_shard_count = OptionaldPositiveInt.UNDEFINED; - /** * Optionally disable asynchronous UDF execution. * Disabling asynchronous UDF execution also implicitly disables the security-manager! @@ -1184,6 +1178,8 @@ public enum PaxosOnLinearizabilityViolation */ public ParameterizedClass default_compaction = null; + public final AccordSpec accord = new AccordSpec(); + public static Supplier getOverrideLoadConfig() { return overrideLoadConfig; diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index d251d3ef63d3..8912803564e4 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -718,13 +718,14 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m conf.commitlog_directory = storagedirFor("commitlog"); } + if (conf.accord.journal_directory == null) initializeCommitLogDiskAccessMode(); if (commitLogWriteDiskAccessMode != conf.commitlog_disk_access_mode) logger.info("commitlog_disk_access_mode resolved to: {}", commitLogWriteDiskAccessMode); - if (conf.accord_journal_directory == null) + if (conf.accord.journal_directory == null) { - conf.accord_journal_directory = storagedirFor("accord_journal"); + conf.accord.journal_directory = storagedirFor("accord_journal"); } if (conf.hints_directory == null) @@ -802,8 +803,8 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m throw new ConfigurationException("local_system_data_file_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.commitlog_directory)) throw new ConfigurationException("commitlog_directory must not be the same as any data_file_directories", false); - if (datadir.equals(conf.accord_journal_directory)) - throw new ConfigurationException("accord_journal_directory must not be the same as any data_file_directories", false); + if (datadir.equals(conf.accord.journal_directory)) + throw new ConfigurationException("accord.journal_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.hints_directory)) throw new ConfigurationException("hints_directory must not be the same as any data_file_directories", false); if (datadir.equals(conf.saved_caches_directory)) @@ -819,8 +820,8 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m { if (conf.local_system_data_file_directory.equals(conf.commitlog_directory)) throw new ConfigurationException("local_system_data_file_directory must not be the same as the commitlog_directory", false); - if (conf.local_system_data_file_directory.equals(conf.accord_journal_directory)) - throw new ConfigurationException("local_system_data_file_directory must not be the same as the accord_journal_directory", false); + if (conf.local_system_data_file_directory.equals(conf.accord.journal_directory)) + throw new ConfigurationException("local_system_data_file_directory must not be the same as the accord.journal_directory", false); if (conf.local_system_data_file_directory.equals(conf.saved_caches_directory)) throw new ConfigurationException("local_system_data_file_directory must not be the same as the saved_caches_directory", false); if (conf.local_system_data_file_directory.equals(conf.hints_directory)) @@ -833,17 +834,17 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m FBUtilities.prettyPrintMemory(freeBytes)); } - if (conf.commitlog_directory.equals(conf.accord_journal_directory)) - throw new ConfigurationException("accord_journal_directory must not be the same as the commitlog_directory", false); + if (conf.commitlog_directory.equals(conf.accord.journal_directory)) + throw new ConfigurationException("accord.journal_directory must not be the same as the commitlog_directory", false); if (conf.commitlog_directory.equals(conf.hints_directory)) throw new ConfigurationException("hints_directory must not be the same as the commitlog_directory", false); if (conf.commitlog_directory.equals(conf.saved_caches_directory)) throw new ConfigurationException("saved_caches_directory must not be the same as the commitlog_directory", false); - if (conf.accord_journal_directory.equals(conf.hints_directory)) - throw new ConfigurationException("hints_directory must not be the same as the accord_journal_directory", false); - if (conf.accord_journal_directory.equals(conf.saved_caches_directory)) - throw new ConfigurationException("saved_caches_directory must not be the same as the accord_journal_directory", false); + if (conf.accord.journal_directory.equals(conf.hints_directory)) + throw new ConfigurationException("hints_directory must not be the same as the accord.journal_directory", false); + if (conf.accord.journal_directory.equals(conf.saved_caches_directory)) + throw new ConfigurationException("saved_caches_directory must not be the same as the accord.journal_directory", false); if (conf.hints_directory.equals(conf.saved_caches_directory)) throw new ConfigurationException("saved_caches_directory must not be the same as the hints_directory", false); @@ -1145,7 +1146,7 @@ else if (conf.max_value_size.toMebibytes() >= 2048) if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); - if (conf.legacy_paxos_strategy == Config.LegacyPaxosStrategy.accord && !conf.accord_transactions_enabled) + if (conf.legacy_paxos_strategy == Config.LegacyPaxosStrategy.accord && !conf.accord.enabled) throw new ConfigurationException(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); } @@ -2140,9 +2141,9 @@ public static void createAllDirectories() throw new ConfigurationException("commitlog_directory must be specified", false); FileUtils.createDirectory(conf.commitlog_directory); - if (conf.accord_journal_directory == null) - throw new ConfigurationException("accord_journal_directory must be specified", false); - FileUtils.createDirectory(conf.accord_journal_directory); + if (conf.accord.journal_directory == null) + throw new ConfigurationException("accord.journal_directory must be specified", false); + FileUtils.createDirectory(conf.accord.journal_directory); if (conf.hints_directory == null) throw new ConfigurationException("hints_directory must be specified", false); @@ -3049,12 +3050,12 @@ public static void setCommitLogCompression(ParameterizedClass compressor) public static String getAccordJournalDirectory() { - return conf.accord_journal_directory; + return conf.accord.journal_directory; } public static void setAccordJournalDirectory(String path) { - conf.accord_journal_directory = path; + conf.accord.journal_directory = path; } public static Config.FlushCompression getFlushCompression() @@ -5218,17 +5219,17 @@ public static void setUseStatementsEnabled(boolean enabled) public static boolean getAccordTransactionsEnabled() { - return conf.accord_transactions_enabled; + return conf.accord.enabled; } public static void setAccordTransactionsEnabled(boolean b) { - conf.accord_transactions_enabled = b; + conf.accord.enabled = b; } public static int getAccordShardCount() { - return conf.accord_shard_count.or(DatabaseDescriptor::getAvailableProcessors); + return conf.accord.shard_count.or(DatabaseDescriptor::getAvailableProcessors); } public static boolean getForceNewPreparedStatementBehaviour() diff --git a/src/java/org/apache/cassandra/config/DurationSpec.java b/src/java/org/apache/cassandra/config/DurationSpec.java index bf0fc21334ad..01a7d70b5f46 100644 --- a/src/java/org/apache/cassandra/config/DurationSpec.java +++ b/src/java/org/apache/cassandra/config/DurationSpec.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.config; +import java.time.Duration; import java.util.Arrays; import java.util.Objects; import java.util.concurrent.TimeUnit; @@ -137,6 +138,11 @@ public TimeUnit unit() return unit; } + public Duration toDuration() + { + return Duration.of(quantity(), unit().toChronoUnit()); + } + /** * @param symbol the time unit symbol * @return the time unit associated to the specified symbol diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index fa7250f969c0..e56fd1ec1d22 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -99,7 +99,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; - public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord_transactions_enabled in cassandra.yaml)"; + public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord.enabled in cassandra.yaml)"; public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction; %s %s"; static class NamedSelect diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java new file mode 100644 index 000000000000..a17a9fc84478 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.time.Duration; + +import accord.config.LocalConfig; +import org.apache.cassandra.config.Config; + +public class AccordConfiguration implements LocalConfig +{ + private final Config config; + + public AccordConfiguration(Config config) + { + this.config = config; + } + + @Override + public Duration getProgressLogScheduleDelay() + { + return config.accord.progress_log_schedule_delay.toDuration(); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 95ef73f0e793..2a4ed5a2a76a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -31,6 +31,7 @@ import org.slf4j.LoggerFactory; import accord.api.Result; +import accord.config.LocalConfig; import accord.coordinate.Preempted; import accord.coordinate.Timeout; import accord.impl.AbstractConfigurationService; @@ -105,6 +106,7 @@ public class AccordService implements IAccordService, Shutdownable private final AccordDataStore dataStore; private final AccordJournal journal; private final AccordVerbHandler verbHandler; + private final LocalConfig configuration; private static final IAccordService NOOP_SERVICE = new IAccordService() { @@ -117,13 +119,13 @@ public IVerbHandler verbHandler() @Override public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) { - throw new UnsupportedOperationException("No accord transaction should be executed when accord_transactions_enabled = false in cassandra.yaml"); + throw new UnsupportedOperationException("No accord transaction should be executed when accord.enabled = false in cassandra.yaml"); } @Override public long currentEpoch() { - throw new UnsupportedOperationException("Cannot return epoch when accord_transactions_enabled = false in cassandra.yaml"); + throw new UnsupportedOperationException("Cannot return epoch when accord.enabled = false in cassandra.yaml"); } @Override @@ -132,7 +134,7 @@ public void setCacheSize(long kb) { } @Override public TopologyManager topology() { - throw new UnsupportedOperationException("Cannot return topology when accord_transactions_enabled = false in cassandra.yaml"); + throw new UnsupportedOperationException("Cannot return topology when accord.enabled = false in cassandra.yaml"); } @Override @@ -227,6 +229,7 @@ private AccordService() this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); this.journal = new AccordJournal(); + this.configuration = new AccordConfiguration(DatabaseDescriptor.getRawConfig()); this.node = new Node(localId, messageSink, this::handleLocalMessage, @@ -240,7 +243,8 @@ private AccordService() scheduler, SizeOfIntersectionSorter.SUPPLIER, SimpleProgressLog::new, - AccordCommandStores.factory(journal)); + AccordCommandStores.factory(journal), + configuration); this.nodeShutdown = toShutdownable(node); this.verbHandler = new AccordVerbHandler<>(node, configService, journal); } diff --git a/test/conf/cassandra-mtls.yaml b/test/conf/cassandra-mtls.yaml index 356d3e918dde..e80c2ac296e4 100644 --- a/test/conf/cassandra-mtls.yaml +++ b/test/conf/cassandra-mtls.yaml @@ -25,7 +25,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints @@ -88,3 +87,5 @@ authenticator: class_name : org.apache.cassandra.auth.MutualTlsAuthenticator parameters : validator_class_name: org.apache.cassandra.auth.SpiffeCertificateValidator +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-murmur.yaml b/test/conf/cassandra-murmur.yaml index c3f7442aa64e..75a208e5fa61 100644 --- a/test/conf/cassandra-murmur.yaml +++ b/test/conf/cassandra-murmur.yaml @@ -8,7 +8,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints @@ -43,3 +42,5 @@ user_defined_functions_enabled: true scripted_user_defined_functions_enabled: false sasi_indexes_enabled: true materialized_views_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-old.yaml b/test/conf/cassandra-old.yaml index 0cda4138d717..000df1148ff4 100644 --- a/test/conf/cassandra-old.yaml +++ b/test/conf/cassandra-old.yaml @@ -9,7 +9,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size_in_mb: 5 commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -57,3 +56,5 @@ internode_send_buff_size_in_bytes: 5 internode_recv_buff_size_in_bytes: 5 max_hint_window_in_ms: 10800000 cache_load_timeout_seconds: 35 +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-pem-jks-sslcontextfactory.yaml b/test/conf/cassandra-pem-jks-sslcontextfactory.yaml index 0e8a83f0aa6d..5c27c7b9f951 100644 --- a/test/conf/cassandra-pem-jks-sslcontextfactory.yaml +++ b/test/conf/cassandra-pem-jks-sslcontextfactory.yaml @@ -27,7 +27,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -150,3 +149,5 @@ stream_throughput_outbound: 24MiB/s sasi_indexes_enabled: true materialized_views_enabled: true file_cache_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml b/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml index a3146b79ab3f..58c2e9293a45 100644 --- a/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml +++ b/test/conf/cassandra-pem-sslcontextfactory-invalidconfiguration.yaml @@ -27,7 +27,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -147,3 +146,5 @@ stream_throughput_outbound: 24MiB/s sasi_indexes_enabled: true materialized_views_enabled: true file_cache_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-pem-sslcontextfactory.yaml b/test/conf/cassandra-pem-sslcontextfactory.yaml index d382d9d64bbf..ef68105e94c8 100644 --- a/test/conf/cassandra-pem-sslcontextfactory.yaml +++ b/test/conf/cassandra-pem-sslcontextfactory.yaml @@ -27,7 +27,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -151,3 +150,5 @@ stream_throughput_outbound: 24MiB/s sasi_indexes_enabled: true materialized_views_enabled: true file_cache_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-seeds.yaml b/test/conf/cassandra-seeds.yaml index 3a0fbf1831b4..1049e27fa891 100644 --- a/test/conf/cassandra-seeds.yaml +++ b/test/conf/cassandra-seeds.yaml @@ -9,7 +9,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal cdc_raw_directory: build/test/cassandra/cdc_raw cdc_enabled: false hints_directory: build/test/cassandra/hints @@ -42,3 +41,5 @@ row_cache_class_name: org.apache.cassandra.cache.OHCProvider row_cache_size: 16MiB user_defined_functions_enabled: true scripted_user_defined_functions_enabled: false +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml b/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml index 6f83ec334143..26db768f5cb7 100644 --- a/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml +++ b/test/conf/cassandra-sslcontextfactory-invalidconfiguration.yaml @@ -27,7 +27,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -81,3 +80,5 @@ stream_throughput_outbound: 23841858MiB/s sasi_indexes_enabled: true materialized_views_enabled: true file_cache_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra-sslcontextfactory.yaml b/test/conf/cassandra-sslcontextfactory.yaml index a570bdf5f626..153d2f924532 100644 --- a/test/conf/cassandra-sslcontextfactory.yaml +++ b/test/conf/cassandra-sslcontextfactory.yaml @@ -27,7 +27,6 @@ commitlog_sync: periodic commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -84,3 +83,5 @@ stream_throughput_outbound: 23841858MiB/s sasi_indexes_enabled: true materialized_views_enabled: true file_cache_enabled: true +accord: + journal_directory: build/test/cassandra/accord_journal diff --git a/test/conf/cassandra.yaml b/test/conf/cassandra.yaml index c0e732cec2a9..e78416c883cd 100644 --- a/test/conf/cassandra.yaml +++ b/test/conf/cassandra.yaml @@ -10,7 +10,6 @@ commitlog_sync_period: 10s commitlog_segment_size: 5MiB commitlog_directory: build/test/cassandra/commitlog commitlog_disk_access_mode: legacy -accord_journal_directory: build/test/cassandra/accord_journal # commitlog_compression: # - class_name: LZ4Compressor cdc_raw_directory: build/test/cassandra/cdc_raw @@ -57,7 +56,6 @@ file_cache_enabled: true full_query_logging_options: allow_nodetool_archive_command: true auto_hints_cleanup_enabled: true -accord_transactions_enabled: true heap_dump_path: build/test dump_heap_on_uncaught_exception: false @@ -116,3 +114,9 @@ memtable: class_name: TrieMemtable # Note: keep the memtable configuration at the end of the file, so that the default mapping can be changed without # duplicating the whole section above. + +accord: + enabled: true + journal_directory: build/test/cassandra/accord_journal + shard_count: 4 + progress_log_schedule_delay: 1s diff --git a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java index 66bd85689e9b..4d0741679cb1 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java @@ -31,6 +31,8 @@ import com.vdurmont.semver4j.Semver; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.AccordSpec; +import org.apache.cassandra.config.OptionaldPositiveInt; import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.shared.NetworkTopology; @@ -72,7 +74,7 @@ private InstanceConfig(int num, String commitlog_directory, String hints_directory, String cdc_raw_directory, - String accord_journal_directory, + AccordSpec accord, Collection initial_token, int storage_port, int native_transport_port, @@ -93,7 +95,10 @@ private InstanceConfig(int num, .set("commitlog_directory", commitlog_directory) .set("hints_directory", hints_directory) .set("cdc_raw_directory", cdc_raw_directory) - .set("accord_journal_directory", accord_journal_directory) + .set("accord.enabled", accord.enabled) + .set("accord.journal_directory", accord.journal_directory) + .set("accord.shard_count", accord.shard_count.toString()) + .set("accord.progress_log_schedule_delay", accord.progress_log_schedule_delay.toString()) .set("partitioner", "org.apache.cassandra.dht.Murmur3Partitioner") .set("start_native_transport", true) .set("concurrent_writes", 2) @@ -115,7 +120,6 @@ private InstanceConfig(int num, // required settings for dtest functionality .set("diagnostic_events_enabled", true) .set("auto_bootstrap", false) - .set("accord_transactions_enabled", true) // capacities that are based on `totalMemory` that should be fixed size .set("index_summary_capacity", "50MiB") .set("counter_cache_size", "50MiB") @@ -318,6 +322,10 @@ public static InstanceConfig generate(int nodeNum, int datadirCount) { int seedNode = provisionStrategy.seedNodeNum(); + AccordSpec accordSpec = new AccordSpec(); + accordSpec.enabled = true; + accordSpec.journal_directory = String.format("%s/node%d/accord_journal", root, nodeNum); + accordSpec.shard_count = new OptionaldPositiveInt(4); return new InstanceConfig(nodeNum, networkTopology, provisionStrategy.ipAddress(nodeNum), @@ -331,7 +339,7 @@ public static InstanceConfig generate(int nodeNum, String.format("%s/node%d/commitlog", root, nodeNum), String.format("%s/node%d/hints", root, nodeNum), String.format("%s/node%d/cdc", root, nodeNum), - String.format("%s/node%d/accord_journal", root, nodeNum), + accordSpec, tokens, provisionStrategy.storagePort(nodeNum), provisionStrategy.nativeTransportPort(nodeNum), diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index cd202fc72935..377a9199403a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -174,7 +174,7 @@ public void bootstrapTest() throws Throwable .withoutVNodes() .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(expandedNodeCount)) .withNodeIdTopology(NetworkTopology.singleDcNetworkTopology(expandedNodeCount, "dc0", "rack0")) - .withConfig(config -> config.set("accord_shard_count", 2).with(NETWORK, GOSSIP)) + .withConfig(config -> config.set("accord.shard_count", 2).with(NETWORK, GOSSIP)) .start()) { long initialMax = maxEpoch(cluster); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java index e4bf7ccb7cb4..06b44805b365 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java @@ -25,8 +25,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import org.apache.cassandra.schema.Schema; -import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.cassandra.db.virtual.AccordVirtualTables; @@ -39,14 +37,15 @@ import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.AssertionUtils; - -import static org.junit.Assert.assertEquals; +import org.assertj.core.api.Assertions; import static org.apache.cassandra.config.DatabaseDescriptor.NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_MESSAGE; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.junit.Assert.assertEquals; public class AccordFeatureFlagTest extends TestBaseImpl { @@ -55,7 +54,7 @@ public void shouldHideAccordTransactions() throws IOException { try (Cluster cluster = init(Cluster.build(1) .withoutVNodes() - .withConfig(c -> c.with(Feature.NETWORK).set("accord_transactions_enabled", "false")) + .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "false")) .start())) { cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (k int, c int, v int, primary key (k, c))"); @@ -91,7 +90,7 @@ public void shouldFailOnAccordMigrationWithAccordDisabled() throws IOException try (Cluster cluster = Cluster.build(1) .withoutVNodes() .withConfig(c -> c.with(Feature.NETWORK) - .set("accord_transactions_enabled", "false") + .set("accord.enabled", "false") .set("legacy_paxos_strategy", "accord")).createWithoutStarting()) { diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index e7b808cceb6e..a3f9d2325ff2 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -72,11 +72,12 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.auth.Cacheable", "org.apache.cassandra.auth.IAuthenticator", "org.apache.cassandra.auth.IAuthorizer", - "org.apache.cassandra.auth.IInternodeAuthenticator", "org.apache.cassandra.auth.ICIDRAuthorizer", "org.apache.cassandra.auth.ICIDRAuthorizer$CIDRAuthorizerMode", + "org.apache.cassandra.auth.IInternodeAuthenticator", "org.apache.cassandra.auth.INetworkAuthorizer", "org.apache.cassandra.auth.IRoleManager", + "org.apache.cassandra.config.AccordSpec", "org.apache.cassandra.config.CassandraRelevantProperties", "org.apache.cassandra.config.CassandraRelevantProperties$PropertyConverter", "org.apache.cassandra.config.Config", @@ -129,8 +130,8 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.DurationSpec$IntMillisecondsBound", "org.apache.cassandra.config.DurationSpec$IntMinutesBound", "org.apache.cassandra.config.DurationSpec$IntSecondsBound", - "org.apache.cassandra.config.DurationSpec$LongMillisecondsBound", "org.apache.cassandra.config.DurationSpec$LongMicrosecondsBound", + "org.apache.cassandra.config.DurationSpec$LongMillisecondsBound", "org.apache.cassandra.config.DurationSpec$LongNanosecondsBound", "org.apache.cassandra.config.DurationSpec$LongSecondsBound", "org.apache.cassandra.config.EncryptionOptions", @@ -151,24 +152,24 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.ParameterizedClass", "org.apache.cassandra.config.RepairConfig", "org.apache.cassandra.config.RepairRetrySpec", + "org.apache.cassandra.config.ReplicaFilteringProtectionOptions", "org.apache.cassandra.config.RetrySpec", "org.apache.cassandra.config.RetrySpec$MaxAttempt", "org.apache.cassandra.config.RetrySpec$Type", - "org.apache.cassandra.config.ReplicaFilteringProtectionOptions", "org.apache.cassandra.config.StartupChecksOptions", + "org.apache.cassandra.config.StartupChecksOptions", + "org.apache.cassandra.config.StorageAttachedIndexOptions", + "org.apache.cassandra.config.SubnetGroups", "org.apache.cassandra.config.SubnetGroups", "org.apache.cassandra.config.TrackWarnings", + "org.apache.cassandra.config.TrackWarnings", + "org.apache.cassandra.config.TransparentDataEncryptionOptions", "org.apache.cassandra.config.TransparentDataEncryptionOptions", "org.apache.cassandra.config.YamlConfigurationLoader", "org.apache.cassandra.config.YamlConfigurationLoader$CustomConstructor", + "org.apache.cassandra.config.YamlConfigurationLoader$CustomConstructor", "org.apache.cassandra.config.YamlConfigurationLoader$PropertiesChecker", "org.apache.cassandra.config.YamlConfigurationLoader$PropertiesChecker$1", - "org.apache.cassandra.config.YamlConfigurationLoader$CustomConstructor", - "org.apache.cassandra.config.TransparentDataEncryptionOptions", - "org.apache.cassandra.config.StartupChecksOptions", - "org.apache.cassandra.config.SubnetGroups", - "org.apache.cassandra.config.TrackWarnings", - "org.apache.cassandra.config.StorageAttachedIndexOptions", "org.apache.cassandra.db.ConsistencyLevel", "org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager", "org.apache.cassandra.db.commitlog.CommitLog", @@ -236,9 +237,9 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.io.sstable.format.SSTableReaderLoadingBuilder", "org.apache.cassandra.io.sstable.format.SSTableReaderWithFilter", "org.apache.cassandra.io.sstable.format.SSTableReaderWithFilter$Builder", - "org.apache.cassandra.io.sstable.format.SortedTableReaderLoadingBuilder", "org.apache.cassandra.io.sstable.format.SSTableWriter", "org.apache.cassandra.io.sstable.format.SSTableWriter$Builder", + "org.apache.cassandra.io.sstable.format.SortedTableReaderLoadingBuilder", "org.apache.cassandra.io.sstable.format.SortedTableWriter", "org.apache.cassandra.io.sstable.format.SortedTableWriter$Builder", "org.apache.cassandra.io.sstable.format.Version", @@ -282,6 +283,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.locator.SeedProvider", "org.apache.cassandra.locator.SimpleSeedProvider", "org.apache.cassandra.locator.SnitchAdapter", + "org.apache.cassandra.security.AbstractCryptoProvider", "org.apache.cassandra.security.EncryptionContext", "org.apache.cassandra.security.ISslContextFactory", "org.apache.cassandra.security.SSLFactory", @@ -296,12 +298,12 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.utils.LocalizeString", "org.apache.cassandra.utils.SystemInfo", "org.apache.cassandra.utils.Pair", + "org.apache.cassandra.utils.StorageCompatibilityMode", "org.apache.cassandra.utils.binlog.BinLogOptions", "org.apache.cassandra.utils.concurrent.RefCounted", "org.apache.cassandra.utils.concurrent.SelfRefCounted", "org.apache.cassandra.utils.concurrent.Transactional", "org.apache.cassandra.utils.concurrent.UncheckedInterruptedException", - "org.apache.cassandra.utils.StorageCompatibilityMode" }; static final Set checkedClasses = new HashSet<>(Arrays.asList(validClasses)); diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index bbc7bf2c82e6..372619d1457d 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -436,11 +436,11 @@ public void process() { Config c = fromType(type, "available_processors", 4); assertThat(c.available_processors).isEqualTo(new OptionaldPositiveInt(4)); - assertThat(c.accord_shard_count).isEqualTo(OptionaldPositiveInt.UNDEFINED); + assertThat(c.accord.shard_count).isEqualTo(OptionaldPositiveInt.UNDEFINED); - c = fromType(type, "available_processors", 3, "accord_shard_count", 1); + c = fromType(type, "available_processors", 3, "accord.shard_count", 1); assertThat(c.available_processors).isEqualTo(new OptionaldPositiveInt(3)); - assertThat(c.accord_shard_count).isEqualTo(new OptionaldPositiveInt(1)); + assertThat(c.accord.shard_count).isEqualTo(new OptionaldPositiveInt(1)); } } From 18d3aa47bc7cd9dbb7bcf86cf012a1096369847f Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 18 Oct 2023 11:41:46 -0700 Subject: [PATCH 076/340] CEP-15: (C*) Implement TopologySorter to prioritise hosts based on DynamicSnitch and/or topology layout patch by Blake Eggleston, David Capwell; reviewed by Blake Eggleston for CASSANDRA-18929 --- modules/accord | 2 +- .../AbstractNetworkTopologySnitch.java | 16 +++ .../locator/DynamicEndpointSnitch.java | 47 +++++-- .../apache/cassandra/locator/Endpoint.java | 24 ++++ .../cassandra/locator/IEndpointSnitch.java | 13 +- .../locator/NetworkTopologyProximity.java | 22 +++ .../cassandra/locator/NoOpProximity.java | 23 ++++ .../cassandra/locator/NodeProximity.java | 14 ++ .../org/apache/cassandra/locator/Replica.java | 3 +- .../cassandra/locator/ReplicaCollection.java | 4 +- .../cassandra/locator/SimpleSnitch.java | 16 +++ .../cassandra/locator/SnitchAdapter.java | 14 ++ .../service/accord/AccordJournal.java | 2 +- .../service/accord/AccordService.java | 17 ++- .../service/accord/AccordTopologyUtils.java | 10 +- .../accord/api/AccordTopologySorter.java | 130 ++++++++++++++++++ .../accord/api/CompositeTopologySorter.java | 84 +++++++++++ .../org/apache/cassandra/utils/Sortable.java | 28 ++++ .../paxos/AccordClusterSimulation.java | 2 +- .../paxos/PaxosClusterSimulation.java | 1 + .../simulator/systems/SimulatedSnitch.java | 49 +++++-- .../org/apache/cassandra/ServerTestUtils.java | 15 ++ .../config/DatabaseDescriptorRefTest.java | 1 + .../NodeProximityEndpointCompareTest.java | 72 ++++++++++ 24 files changed, 580 insertions(+), 29 deletions(-) create mode 100644 src/java/org/apache/cassandra/locator/Endpoint.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java create mode 100644 src/java/org/apache/cassandra/utils/Sortable.java create mode 100644 test/unit/org/apache/cassandra/locator/NodeProximityEndpointCompareTest.java diff --git a/modules/accord b/modules/accord index 5ffe3d504bb5..d99ad84cc49a 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 5ffe3d504bb5aa1ff1c2b96d817791e40f7ced0f +Subproject commit d99ad84cc49a96299a9ae55183e38ee6f1aa3f47 diff --git a/src/java/org/apache/cassandra/locator/AbstractNetworkTopologySnitch.java b/src/java/org/apache/cassandra/locator/AbstractNetworkTopologySnitch.java index b6901e27660c..ff0842fdf59d 100644 --- a/src/java/org/apache/cassandra/locator/AbstractNetworkTopologySnitch.java +++ b/src/java/org/apache/cassandra/locator/AbstractNetworkTopologySnitch.java @@ -17,6 +17,10 @@ */ package org.apache.cassandra.locator; +import java.util.Comparator; + +import org.apache.cassandra.utils.Sortable; + /** * An endpoint snitch tells Cassandra information about network topology that it can use to route * requests more efficiently. @@ -31,4 +35,16 @@ public int compareEndpoints(InetAddressAndPort address, Replica r1, Replica r2) { return proximity.compareEndpoints(address, r1, r2); } + + @Override + public boolean supportCompareByEndpoint() + { + return proximity.supportCompareByEndpoint(); + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return proximity.endpointComparator(address, addresses); + } } diff --git a/src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java b/src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java index 1d3810613f6d..003347ef29cd 100644 --- a/src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java +++ b/src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java @@ -42,6 +42,7 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MBeanWrapper; +import org.apache.cassandra.utils.Sortable; import static org.apache.cassandra.config.CassandraRelevantProperties.IGNORE_DYNAMIC_SNITCH_SEVERITY; @@ -170,10 +171,13 @@ private > C sortedByProximityWithBadnes // TODO: avoid copy replicas = delegate.sortedByProximity(address, replicas); - HashMap scores = this.scores; // Make sure the score don't change in the middle of the loop below - // (which wouldn't really matter here but its cleaner that way). - ArrayList subsnitchOrderedScores = new ArrayList<>(replicas.size()); - for (Replica replica : replicas) + return shouldSortByScore(scores, replicas) ? sortedByProximityWithScore(address, replicas) : replicas; + } + + private > boolean shouldSortByScore(HashMap scores, C sortedReplicas) + { + ArrayList subsnitchOrderedScores = new ArrayList<>(sortedReplicas.size()); + for (Endpoint replica : sortedReplicas) { Double score = scores.get(replica.endpoint()); if (score == null) @@ -193,12 +197,10 @@ private > C sortedByProximityWithBadnes for (Double subsnitchScore : subsnitchOrderedScores) { if (subsnitchScore > (sortedScoreIterator.next() * badnessThreshold)) - { - return sortedByProximityWithScore(address, replicas); - } + return true; } - return replicas; + return false; } private static double defaultStore(InetAddressAndPort target) @@ -208,6 +210,11 @@ private static double defaultStore(InetAddressAndPort target) // Compare endpoints given an immutable snapshot of the scores private int compareEndpoints(InetAddressAndPort target, Replica a1, Replica a2, Map scores) + { + return compareEndpoints(a1, a2, scores, (a, b) -> delegate.compareEndpoints(target, a, b)); + } + + private int compareEndpoints(T a1, T a2, Map scores, Comparator subCompare) { Double scored1 = scores.get(a1.endpoint()); Double scored2 = scores.get(a2.endpoint()); @@ -223,7 +230,7 @@ private int compareEndpoints(InetAddressAndPort target, Replica a1, Replica a2, } if (scored1.equals(scored2)) - return delegate.compareEndpoints(target, a1, a2); + return subCompare.compare(a1, a2); if (scored1 < scored2) return -1; else @@ -409,4 +416,26 @@ private double maxScore(ReplicaCollection endpoints) } return maxScore; } + + @Override + public boolean supportCompareByEndpoint() + { + return delegate.supportCompareByEndpoint(); + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + if (!delegate.supportCompareByEndpoint()) + throw new UnsupportedOperationException(); + assert address.equals(FBUtilities.getBroadcastAddressAndPort()); // we only know about ourself + Comparator compare = delegate.endpointComparator(address, addresses); + if (addresses.size() < 2) + return compare; + HashMap scores = this.scores; + Comparator compareWithScore = (r1, r2) -> compareEndpoints(r1, r2, scores, compare); + return dynamicBadnessThreshold == 0 || shouldSortByScore(scores, addresses.sorted(compare)) ? + compareWithScore : + compare; + } } diff --git a/src/java/org/apache/cassandra/locator/Endpoint.java b/src/java/org/apache/cassandra/locator/Endpoint.java new file mode 100644 index 000000000000..5a44bfd61c8c --- /dev/null +++ b/src/java/org/apache/cassandra/locator/Endpoint.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.locator; + +public interface Endpoint +{ + InetAddressAndPort endpoint(); +} diff --git a/src/java/org/apache/cassandra/locator/IEndpointSnitch.java b/src/java/org/apache/cassandra/locator/IEndpointSnitch.java index 4d5033681083..ef275ec12ff7 100644 --- a/src/java/org/apache/cassandra/locator/IEndpointSnitch.java +++ b/src/java/org/apache/cassandra/locator/IEndpointSnitch.java @@ -18,9 +18,11 @@ package org.apache.cassandra.locator; import java.net.InetSocketAddress; +import java.util.Comparator; import java.util.Set; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Sortable; /** * This interface helps determine location of node in the datacenter relative to another node. @@ -101,5 +103,14 @@ default boolean preferLocalConnections() { return false; } -} + default boolean supportCompareByEndpoint() + { + return false; + } + + default > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + throw new UnsupportedOperationException(); + } +} diff --git a/src/java/org/apache/cassandra/locator/NetworkTopologyProximity.java b/src/java/org/apache/cassandra/locator/NetworkTopologyProximity.java index eddcb3630364..f7f30dafb7e4 100644 --- a/src/java/org/apache/cassandra/locator/NetworkTopologyProximity.java +++ b/src/java/org/apache/cassandra/locator/NetworkTopologyProximity.java @@ -20,10 +20,18 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.utils.Sortable; + +import java.util.Comparator; public class NetworkTopologyProximity extends BaseProximity { public int compareEndpoints(InetAddressAndPort address, Replica r1, Replica r2) + { + return compareByEndpoints(address, r1, r2); + } + + public int compareByEndpoints(InetAddressAndPort address, Endpoint r1, Endpoint r2) { InetAddressAndPort a1 = r1.endpoint(); InetAddressAndPort a2 = r2.endpoint(); @@ -48,4 +56,18 @@ public int compareEndpoints(InetAddressAndPort address, Replica r1, Replica r2) return 1; return 0; } + + @Override + public boolean supportCompareByEndpoint() + { + return true; + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + if (!supportCompareByEndpoint()) + throw new UnsupportedOperationException(); + return (a, b) -> compareByEndpoints(address, a, b); + } } diff --git a/src/java/org/apache/cassandra/locator/NoOpProximity.java b/src/java/org/apache/cassandra/locator/NoOpProximity.java index 342f12e61958..69f5250d705d 100644 --- a/src/java/org/apache/cassandra/locator/NoOpProximity.java +++ b/src/java/org/apache/cassandra/locator/NoOpProximity.java @@ -18,6 +18,10 @@ package org.apache.cassandra.locator; +import org.apache.cassandra.utils.Sortable; + +import java.util.Comparator; + public class NoOpProximity extends BaseProximity { @Override @@ -34,4 +38,23 @@ public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) // Collections.sort is guaranteed to be stable) return 0; } + + @Override + public boolean supportCompareByEndpoint() + { + return true; + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return this::compareByEndpoint; + } + + private int compareByEndpoint(Endpoint a, Endpoint b) + { + // Making all endpoints equal ensures we won't change the original ordering (since + // Collections.sort is guaranteed to be stable) + return 0; + } } diff --git a/src/java/org/apache/cassandra/locator/NodeProximity.java b/src/java/org/apache/cassandra/locator/NodeProximity.java index cbb7158aafa8..af7c17eaacdf 100644 --- a/src/java/org/apache/cassandra/locator/NodeProximity.java +++ b/src/java/org/apache/cassandra/locator/NodeProximity.java @@ -18,6 +18,10 @@ package org.apache.cassandra.locator; +import org.apache.cassandra.utils.Sortable; + +import java.util.Comparator; + public interface NodeProximity { /** @@ -35,4 +39,14 @@ public interface NodeProximity * to be faster than 2 sequential queries, one against l1 followed by one against l2. */ public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaCollection l1, ReplicaCollection l2); + + default boolean supportCompareByEndpoint() + { + return false; + } + + default > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + throw new UnsupportedOperationException(); + } } diff --git a/src/java/org/apache/cassandra/locator/Replica.java b/src/java/org/apache/cassandra/locator/Replica.java index b1f68b2101e3..41a16a459be3 100644 --- a/src/java/org/apache/cassandra/locator/Replica.java +++ b/src/java/org/apache/cassandra/locator/Replica.java @@ -51,7 +51,7 @@ * and such and what the result is WRT to transientness. Definitely avoid creating fake Replicas with misinformation * about endpoints, ranges, or transientness. */ -public final class Replica implements Comparable +public final class Replica implements Comparable, Endpoint { public static final IPartitionerDependentSerializer serializer = new Serializer(); @@ -105,6 +105,7 @@ public String toString() return (full ? "Full" : "Transient") + '(' + endpoint() + ',' + range + ')'; } + @Override public final InetAddressAndPort endpoint() { return endpoint; diff --git a/src/java/org/apache/cassandra/locator/ReplicaCollection.java b/src/java/org/apache/cassandra/locator/ReplicaCollection.java index b679b506b01e..f1dac0042b78 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaCollection.java +++ b/src/java/org/apache/cassandra/locator/ReplicaCollection.java @@ -24,11 +24,13 @@ import java.util.function.Predicate; import java.util.stream.Stream; +import org.apache.cassandra.utils.Sortable; + /** * A collection like class for Replica objects. Represents both a well defined order on the contained Replica objects, * and efficient methods for accessing the contained Replicas, directly and as a projection onto their endpoints and ranges. */ -public interface ReplicaCollection> extends Iterable +public interface ReplicaCollection> extends Sortable { /** * @return a Set of the endpoints of the contained Replicas. diff --git a/src/java/org/apache/cassandra/locator/SimpleSnitch.java b/src/java/org/apache/cassandra/locator/SimpleSnitch.java index e06316fa2666..953f2e5c897c 100644 --- a/src/java/org/apache/cassandra/locator/SimpleSnitch.java +++ b/src/java/org/apache/cassandra/locator/SimpleSnitch.java @@ -17,6 +17,10 @@ */ package org.apache.cassandra.locator; +import java.util.Comparator; + +import org.apache.cassandra.utils.Sortable; + /** * A simple endpoint snitch implementation that treats Strategy order as proximity, * allowing non-read-repaired reads to prefer a single endpoint, which improves @@ -58,4 +62,16 @@ public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaC { return sorter.isWorthMergingForRangeQuery(merged, l1, l2); } + + @Override + public boolean supportCompareByEndpoint() + { + return sorter.supportCompareByEndpoint(); + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return sorter.endpointComparator(address, addresses); + } } diff --git a/src/java/org/apache/cassandra/locator/SnitchAdapter.java b/src/java/org/apache/cassandra/locator/SnitchAdapter.java index 1a32dd64970c..b90e6fed1615 100644 --- a/src/java/org/apache/cassandra/locator/SnitchAdapter.java +++ b/src/java/org/apache/cassandra/locator/SnitchAdapter.java @@ -18,12 +18,14 @@ package org.apache.cassandra.locator; +import java.util.Comparator; import java.util.HashSet; import java.util.Set; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.utils.Sortable; public class SnitchAdapter implements InitialLocationProvider, NodeProximity, NodeAddressConfig { @@ -81,4 +83,16 @@ public boolean preferLocalConnections() { return snitch.preferLocalConnections(); } + + @Override + public boolean supportCompareByEndpoint() + { + return snitch.supportCompareByEndpoint(); + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return snitch.endpointComparator(address, addresses); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 23554c7feb1c..a323edaf1742 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -613,7 +613,7 @@ TxnId txnId(Message message) static { // make noise early if we forget to update our version mappings - Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50); + Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50, "Expected current version to be %d but given %d", MessagingService.VERSION_50, MessagingService.current_version); } private static int msVersion(int version) diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 2a4ed5a2a76a..5de4daa233a8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -71,6 +71,8 @@ import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.api.AccordScheduler; +import org.apache.cassandra.service.accord.api.AccordTopologySorter; +import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnData; @@ -138,7 +140,17 @@ public TopologyManager topology() } @Override - public void startup() {} + public void startup() + { + try + { + AccordTopologySorter.checkSnitchSupported(DatabaseDescriptor.getNodeProximity()); + } + catch (Throwable t) + { + logger.warn("Current snitch is not compatable with Accord, make sure to fix the snitch before enabling Accord; {}", t.toString()); + } + } @Override public void shutdownAndWait(long timeout, TimeUnit unit) { } @@ -241,7 +253,8 @@ private AccordService() agent, new DefaultRandom(), scheduler, - SizeOfIntersectionSorter.SUPPLIER, + CompositeTopologySorter.create(SizeOfIntersectionSorter.SUPPLIER, + new AccordTopologySorter.Supplier(configService, DatabaseDescriptor.getNodeProximity())), SimpleProgressLog::new, AccordCommandStores.factory(journal), configuration); diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index d385cf5d3185..88a0d18b7fc6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -35,6 +35,7 @@ import accord.utils.Invariants; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; @@ -47,7 +48,6 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; -import org.apache.cassandra.tcm.ownership.VersionedEndpoints; public class AccordTopologyUtils { @@ -56,7 +56,7 @@ static Node.Id tcmIdToAccord(NodeId nodeId) return new Node.Id(nodeId.id()); } - private static Shard createShard(TokenRange range, Directory directory, VersionedEndpoints.ForRange reads, VersionedEndpoints.ForRange writes) + private static Shard createShard(TokenRange range, Directory directory, EndpointsForRange reads, EndpointsForRange writes) { Function endpointMapper = e -> { NodeId tcmId = directory.peerId(e); @@ -106,8 +106,10 @@ public static List createShards(KeyspaceMetadata keyspace, DataPlacements List shards = new ArrayList<>(ranges.size()); for (Range range : ranges) { - VersionedEndpoints.ForRange reads = placement.reads.forRange(range); - VersionedEndpoints.ForRange writes = placement.reads.forRange(range); + // TODO (consider, low priority): flesh out how Accord and Transient Replicas work together + // Accord needs to be able to read the full data from a single replica, but with transient ones they may only have a hash. + EndpointsForRange reads = placement.reads.forRange(range).get().filter(r -> r.isFull()); + EndpointsForRange writes = placement.writes.forRange(range).get().filter(r -> r.isFull()); // TCM doesn't create wrap around ranges Invariants.checkArgument(!range.isWrapAround() || range.right.equals(range.right.minValue()), diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java new file mode 100644 index 000000000000..4f9eff850b0a --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Set; + +import accord.api.TopologySorter; +import accord.local.Node; +import accord.topology.ShardSelection; +import accord.topology.Topologies; +import accord.topology.Topology; +import org.apache.cassandra.locator.*; +import org.apache.cassandra.service.accord.AccordEndpointMapper; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Sortable; + +public class AccordTopologySorter implements TopologySorter +{ + public static class Supplier implements TopologySorter.Supplier + { + private final AccordEndpointMapper mapper; + private final NodeProximity proximity; + + public Supplier(AccordEndpointMapper mapper, NodeProximity proximity) + { + checkSnitchSupported(proximity); + this.mapper = mapper; + this.proximity = proximity; + } + + @Override + public TopologySorter get(Topology topologies) + { + return create(topologies.nodes()); + } + + @Override + public TopologySorter get(Topologies topologies) + { + return create(topologies.nodes()); + } + + private AccordTopologySorter create(Set nodes) + { + SortableEndpoints endpoints = SortableEndpoints.from(nodes, mapper); + Comparator comparator = proximity.endpointComparator(FBUtilities.getBroadcastAddressAndPort(), endpoints); + return new AccordTopologySorter(mapper, comparator); + } + } + + private final AccordEndpointMapper mapper; + + private final Comparator comparator; + private AccordTopologySorter(AccordEndpointMapper mapper, Comparator comparator) + { + this.mapper = mapper; + this.comparator = comparator; + } + + public static void checkSnitchSupported(NodeProximity proximity) + { + if (!proximity.supportCompareByEndpoint()) + { + if (proximity instanceof DynamicEndpointSnitch) + proximity = ((DynamicEndpointSnitch) proximity).delegate; + throw new IllegalArgumentException("Unsupported snitch " + proximity.getClass() + "; supportCompareByEndpoint returned false"); + } + } + + @Override + public int compare(Node.Id node1, Node.Id node2, ShardSelection shards) + { + return comparator.compare(() -> mapper.mappedEndpoint(node1), () -> mapper.mappedEndpoint(node2)); + } + + private static class EndpointTuple implements Endpoint + { + final InetAddressAndPort endpoint; + + private EndpointTuple(InetAddressAndPort endpoint) + { + this.endpoint = endpoint; + } + + @Override + public InetAddressAndPort endpoint() + { + return endpoint; + } + } + + private static class SortableEndpoints extends ArrayList implements Sortable + { + public SortableEndpoints(int initialCapacity) + { + super(initialCapacity); + } + + public SortableEndpoints sorted(Comparator comparator) + { + sort(comparator); + return this; + } + + static SortableEndpoints from(Set nodes, AccordEndpointMapper mapper) + { + SortableEndpoints result = new SortableEndpoints(nodes.size()); + nodes.forEach(id -> result.add(new EndpointTuple(mapper.mappedEndpoint(id)))); + return result; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java b/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java new file mode 100644 index 000000000000..3886cde12d9b --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import accord.api.TopologySorter; +import accord.local.Node; +import accord.topology.ShardSelection; +import accord.topology.Topologies; +import accord.topology.Topology; + +public class CompositeTopologySorter implements TopologySorter +{ + public static class Supplier implements TopologySorter.Supplier + { + private final TopologySorter.Supplier[] delegates; + + private Supplier(TopologySorter.Supplier[] delegates) + { + this.delegates = delegates; + } + + @Override + public TopologySorter get(Topology topologies) + { + TopologySorter[] sorters = new TopologySorter[delegates.length]; + for (int i = 0; i < sorters.length; i++) + sorters[i] = delegates[i].get(topologies); + return new CompositeTopologySorter(sorters); + } + + @Override + public TopologySorter get(Topologies topologies) + { + TopologySorter[] sorters = new TopologySorter[delegates.length]; + for (int i = 0; i < sorters.length; i++) + sorters[i] = delegates[i].get(topologies); + return new CompositeTopologySorter(sorters); + } + } + + private final TopologySorter[] delegates; + + private CompositeTopologySorter(TopologySorter[] delegates) + { + this.delegates = delegates; + } + + public static TopologySorter.Supplier create(TopologySorter.Supplier... delegates) + { + switch (delegates.length) + { + case 0: throw new IllegalArgumentException("Can not create an empty sorter"); + case 1: return delegates[0]; + default: return new CompositeTopologySorter.Supplier(delegates); + } + } + + @Override + public int compare(Node.Id node1, Node.Id node2, ShardSelection shards) + { + for (int i = 0; i < delegates.length; i++) + { + int rc = delegates[i].compare(node1, node2, shards); + if (rc != 0) return rc; + } + return 0; + } +} diff --git a/src/java/org/apache/cassandra/utils/Sortable.java b/src/java/org/apache/cassandra/utils/Sortable.java new file mode 100644 index 000000000000..145967cb24f6 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/Sortable.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.Comparator; + +public interface Sortable> extends Iterable +{ + int size(); + + S sorted(Comparator comparator); +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java index 7f7cab1110bb..78e04454faba 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java @@ -63,7 +63,7 @@ public void applyHandicaps() AccordClusterSimulation(RandomSource random, long seed, int uniqueNum, Builder builder) throws IOException { super(random, seed, uniqueNum, builder, - config -> {}, + config -> config.set("storage_compatibility_mode", "NONE"), (simulated, schedulers, cluster, options) -> { int[] primaryKeys = primaryKeys(seed, builder.primaryKeyCount()); KindOfSequence.Period jitter = RandomSource.Choices.uniform(KindOfSequence.values()).choose(random) diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java index 03d7e61e7ff2..a0c66822116d 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java @@ -79,6 +79,7 @@ public PaxosClusterSimulation create(long seed) throws IOException .set("paxos_cache_size", (builder.stateCache != null ? builder.stateCache : random.uniformFloat() < 0.5) ? null : "0MiB") .set("paxos_state_purging", "repaired") .set("paxos_on_linearizability_violations", "log") + .set("storage_compatibility_mode", "NONE") , (simulated, schedulers, cluster, options) -> { int[] primaryKeys = primaryKeys(seed, builder.primaryKeyCount()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java index 55fe73c301f4..7692e0ae5c46 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java @@ -28,16 +28,49 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.IInstanceConfig; -import org.apache.cassandra.locator.IEndpointSnitch; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.locator.ReplicaCollection; +import org.apache.cassandra.locator.*; import org.apache.cassandra.simulator.cluster.NodeLookup; +import org.apache.cassandra.utils.Sortable; public class SimulatedSnitch extends NodeLookup { + private static class SimulatedProximity implements NodeProximity + { + @Override + public > C sortedByProximity(InetAddressAndPort address, C addresses) + { + return addresses.sorted(Comparator.comparingInt(SimulatedSnitch::asInt)); + } + + @Override + public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) + { + return Comparator.comparingInt(SimulatedSnitch::asInt).compare(r1, r2); + } + + @Override + public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaCollection l1, ReplicaCollection l2) + { + return false; + } + + @Override + public boolean supportCompareByEndpoint() + { + return true; + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return Comparator.comparingInt(SimulatedSnitch::asInt); + } + } + public static class Instance implements IEndpointSnitch { + private final NodeProximity proximity = new SimulatedProximity(); + private static volatile Function LOOKUP_DC; public String getRack(InetAddressAndPort endpoint) @@ -52,12 +85,12 @@ public String getDatacenter(InetAddressAndPort endpoint) public > C sortedByProximity(InetAddressAndPort address, C addresses) { - return addresses.sorted(Comparator.comparingInt(SimulatedSnitch::asInt)); + return proximity.sortedByProximity(address, addresses); } public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) { - return Comparator.comparingInt(SimulatedSnitch::asInt).compare(r1, r2); + return proximity.compareEndpoints(target, r1, r2); } public void gossiperStarting() @@ -66,7 +99,7 @@ public void gossiperStarting() public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaCollection l1, ReplicaCollection l2) { - return false; + return proximity.isWorthMergingForRangeQuery(merged, l1, l2); } public static void setup(Function lookupDc) @@ -127,7 +160,7 @@ public List dcs() return Arrays.asList(nameOfDcs); } - private static int asInt(Replica address) + private static int asInt(Endpoint address) { byte[] bytes = address.endpoint().addressBytes; return bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24); diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index 79b9efaa8936..130053658e01 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -21,6 +21,7 @@ import java.net.UnknownHostException; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -44,6 +45,7 @@ import org.apache.cassandra.io.sstable.format.big.BigTableReader; import org.apache.cassandra.io.sstable.indexsummary.IndexSummarySupport; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.locator.Endpoint; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.BaseProximity; @@ -70,6 +72,7 @@ import org.apache.cassandra.tcm.transformations.UnsafeJoin; import org.apache.cassandra.tcm.transformations.cms.Initialize; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Sortable; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; @@ -108,6 +111,18 @@ public int compareEndpoints(InetAddressAndPort target, Replica a1, Replica a2) { return 0; } + + @Override + public boolean supportCompareByEndpoint() + { + return true; + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return (a, b) -> 0; + } }); } diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index a3f9d2325ff2..9ab3dab6bf8a 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -272,6 +272,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.io.util.PathUtils$IOToLongFunction", "org.apache.cassandra.io.util.RebufferingInputStream", "org.apache.cassandra.io.util.SpinningDiskOptimizationStrategy", + "org.apache.cassandra.locator.Endpoint", "org.apache.cassandra.locator.IEndpointSnitch", "org.apache.cassandra.locator.InetAddressAndPort", "org.apache.cassandra.locator.Locator", diff --git a/test/unit/org/apache/cassandra/locator/NodeProximityEndpointCompareTest.java b/test/unit/org/apache/cassandra/locator/NodeProximityEndpointCompareTest.java new file mode 100644 index 000000000000..472104697883 --- /dev/null +++ b/test/unit/org/apache/cassandra/locator/NodeProximityEndpointCompareTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.locator; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Modifier; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.reflections.Reflections; +import org.reflections.scanners.Scanners; +import org.reflections.util.ConfigurationBuilder; + +import org.apache.cassandra.config.DatabaseDescriptor; + +public class NodeProximityEndpointCompareTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + } + + @Test + public void allSupportEndpoint() throws InvocationTargetException, InstantiationException, IllegalAccessException + { + Reflections reflections = new Reflections(new ConfigurationBuilder() + .forPackage("org.apache.cassandra") + .setScanners(Scanners.SubTypes) + .setExpandSuperTypes(true)); + + for (Class klass : reflections.getSubTypesOf(NodeProximity.class)) + { + if (Modifier.isAbstract(klass.getModifiers()) + || Modifier.isPrivate(klass.getModifiers()) // private can not be created normally, so these are scoped to tests and can be ignored + || klass.isAnonymousClass()) + continue; + Constructor declaredConstructor; + try + { + declaredConstructor = klass.getDeclaredConstructor(); + } + catch (NoSuchMethodException e) + { + // DynamicEndpointSnitch or test snitch... we can not create this normally + continue; + } + if (Modifier.isPrivate(declaredConstructor.getModifiers())) + continue; + NodeProximity proximity = declaredConstructor.newInstance(); + Assertions.assertThat(proximity.supportCompareByEndpoint()) + .describedAs("Snitch %s does not support compare by endpoint!", proximity.getClass()) + .isTrue(); + } + } +} \ No newline at end of file From 169f790cb193f2c691fdf45b0ab626e5096e5c05 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Thu, 19 Oct 2023 14:47:29 +0200 Subject: [PATCH 077/340] Use pinned Harry version --- .build/build-resolver.xml | 4 ++++ lib/harry-core-0.0.2-CASSANDRA-18768.jar | Bin 0 -> 458194 bytes 2 files changed, 4 insertions(+) create mode 100644 lib/harry-core-0.0.2-CASSANDRA-18768.jar diff --git a/.build/build-resolver.xml b/.build/build-resolver.xml index 29031b33a115..c9a47c4f96c7 100644 --- a/.build/build-resolver.xml +++ b/.build/build-resolver.xml @@ -253,6 +253,10 @@ + + + + diff --git a/lib/harry-core-0.0.2-CASSANDRA-18768.jar b/lib/harry-core-0.0.2-CASSANDRA-18768.jar new file mode 100644 index 0000000000000000000000000000000000000000..292db01f06942d8442204e776b7f25d158c9527b GIT binary patch literal 458194 zcma&O19YX^vNjyswr$(CZQHh!j%{_^F*;5<>Daby+xW8YIsZ9l?|bk5?;2x`vBnzD zs(03Ws%lo%Tklek1_nU^0Dyo1knm(vpnLh=SqT6DAPNWo@bxW#tf-10t)!e7y{w>| zq?o9(3Z1N2OspVOFav_nq7rl?D|~4O^`#xae`)un(7#@gU!VS^-Tw{)`+`{* zIyrgL|L+*Ye_@R6?TlTWoJ{Rp{ss>HA8;qrzX2lrC(y~&!NuO`Z_xi1YhvhP_&30> z{rIO!CMGujH=Og|;r^|@xv8D0li}Ago&QEG-oGyYA8->(XBQ_+BUcwod%M5+EzG}u zYin;}`ge;$`U}$7#@^WaZzTVFum2(0+4b)zMEi^MKhTzTR&KU`bA^9zlfAEE#r`>z!_x!QfP@HY$jx1oG(_uuUy{J&(nSvp((|Iqpe_+S2H zYv^YBcRZ5(cd)&aIh~<{p|OQ2ow1>_v!R`d)882}{r^$IfB7!0vHd?C?cb3OP?)}n z1Q-AS^vkAD|5KQVsDh}Rh^U;fq^R<^ny&o{1B&mV`csUm6%Noj+jC6*jAHi!?3`^J znIqC{--HeL%EycDVQ{=sgWUO2yw=ryEBoOZ{9tudHo8Aobrf18)Htj@GfHiAL;&P> z<`|Q3nK9OpWF5xMt`bA4{UA^Z6{b*GvvuV*Gayrgc&dUR{;rRkPCbR*;x4pDC=+h{ zGNnfg64X-{E@J{gg*2$-C~ZRag8MN$a6<5yRtUbm1UEG;Sg*q)SsNKK9d?jcq%)0C zoo5;l`7luC3&~f6`&q%xUnJ&nCM|fLK8#ZhiXn2@7_avms^{nOa=axjoC`dI>|7=V$r3(r{gn!&@Dpu9ek1*mTH7+;w`aM~)ZUzQ5i|f!( zOgXQk_^Bqntw30m4}RJbb5Jey?dNu8cnW4$WWv1Pl$Ia9Hy$32Yob*|>PpgV19^1- zpR{4!llzPn79gt&tu$t!(nNVk{EITIsEDL=SbcKa<)x7}O( zH&X9!+=a*e@NUD)aAJ%)i+&6ZXiT%tXIdKzyHqHtmn`&Z2{fzJHYOaXt9}Q(_&vyb z==3B3{fnLFtkJdTCXgJG76`cOgA<^qPBQ6X)x-<(<9+ZWVQ3XnqJSoFg%>s7x0AmH zI}?O&-q08_a#VUUU&7Qr+9Y!Y*;h>Vm*`8eBRVX5XLOm@VBkbtg^zlk)weSuu=(>H zkH-T3kqExz2yU`CB|u$q(L9P}pmJ^KHEl5e`+2zU`hT2@ z|8}c}hMh3muUMc*0|4+xO#N3#kdYJ?l~Wd7*V1-g9YFPatXXm@QMLpt$zid?K8&c_ z3#$^g6lt|1OZrAyOVA8fJ48ni`F_uv))81_;{Kv0Tf!=Nwp@RBlXIEf4}IPGTa5RHAszhMlR(D zYWE8&#u57HdMJF_mh2iGnAhHfIi(KLsuAOYm8!!+oj}hE(W^O6a_Lxn79JDH#uaVb z8uZ5UDAcn4wO;MmAk*s>;y)J8N0A&k*f~k{JgUNljFo%J!d_!zl3~1?wS!kq4TK!UK&Riehzg8sH#-6sI|Tl)?fE?6E%U(3EODC3F~h^U@q$WF)fmjD;|jCh(o169CKu zYRJJSrlI9tx{l5j;12C7sA&5gj&6a;Wg z<9Q_3hh&j&_34wO^6*So zdnalU@^E{y(RY%fI@N1$iAj`=AJT@Q)JIOhYj=W?ED+eaV*%E7f5E;TcaSs*D)7R64*%`-S5TP_0F-g|^mi9y_H40&-XfU;$)U&IgmJ8=(YS`s6}@qOr0003mLS zd$M^NYV0QmOAww)FO9JzqtwdSyMni8zr3Nu0n5!N-O=~eJ^#9A$n3b3_*H|Y)~Gc2 zrqa=XLTQ1{C00>s1s4y#PR_IjRsgzNlvLFQBew`3(v(Vy8bMT%FkMP6mG7EdmbAD@ zl>Cb2zpy*ViJ;83N{=-~u2Wj92#>;c4cYIopCJ)%vc%Q^<>xQ;Oe7r%sJPj+ARA!I zZU?~4;{mcEI4zo)F3*SD}Moi%;O^5({LUTZP8VPZ(u@O-vDh-Onq3v;zxI}3E^w|&ax6p z9rY&ghcp);(Fqtjy8xx2mfBAuR3~uu(uI8lI97KR4I!!WgbzqJelICb3F?uG9 z3=u&y+XifhH76I9eapertC`uMxDbyrma;*EkYi0@o*qS=n$hWKxWwj^2V|k|vS$q?$k(BIIK2bKLu(=+z<8751uTW zkxAF%q@b!uCSXbsgal0V?S!KPaE2%Y^`6cHq(W|kPc5o=$LJI?uS?(j3P#=?OzZMG zL&GL_jvsgM(9{O~&VtYmu1pmFUB!9xb2}`5H3&7{n0MeBa)y-SauBu>lv*h7Atpp1 zZZlv)#|qbXH$rryd7p*fh%sS533oEtxd+W{3ZEH#T?%CV6Zlf6i)kJ{-C)WICXgsF zC>FawdiqwA`4UhU+>rY@FMJ;~v^0**HHHU1GQbMJ@7W(vF&cM?5fPk6@u4!_fGitE zwu=v{eWcUAcAW*6Bd0QYX#KOLkdPTj2ZDGzG4e ze(n?c1=5PaETujo;734$h?@LpEP4pAyE$OIMn!EpAcML* zpB)hK(kGlQMOlOX9Q($kY$nb_gjm$KX(VM(Mw$MS1Plsg3o+l45HaHYbb8M~RY%t@ z6w%92jRZP`Ca6xBZuBbXnOgZu45-eGDDD*bu!9CyYIw-ySVag21#mm&?m}}7yjBoP zVjDJ~Aw?X*!oW}VIwJX1GrDbg0?h<1LM92QDmBUnCtIweh?Y+9GcV+mv(R6ANwEl) z+H>ghyMDlH<=f#9TAcSW>x3UUI4VlM92X>=KbWu{)=a!_w#XDM&kAe(5A$RD5rQ5B z-7(15c-w5eLs~FqD3G-uX)%TCvX3WBo9b18Ni+~WoELhO-2t$0@#$7M?9zb>x?rT| z>#nAC324-yJe3m?AfcuSi?jH#;|L~>oMFnI($cXD&Eo|}RKp->RWNzTGLx7m;L04X zCFP{==Pc$)gVgSR+rLfC*Uyi?-<;pWBwjiacpI-g^Ts=wzwRxk2zF)2aMsa0QzN5l z$wTrbqu!@e9?fai6H!QJ-snK7@e>=*8J9M66Ag)tD%A9(|bvF#x|6w-qCc77?eax{J zj7P*;Go(;*^0Y?O$0cKSTW2#=-s7)*=6(}CRs=HN5!y+D4As;LEWT%6;0HB`ty zWdm%RBO(KUcJUd+9mg00gA;~f5LQ1dGD~%okq*9e&GE6sAiu7i&8m1p72Y&cn+w@B zy+!Q|3Uz%rq&kq0Ew;dA*lLd9gLR(M%4Ed2tea3aX`x)yP*$eN!!sSZH`|&nMFQVE zQQ|2Xn6z3hi5gppZ9Xhg%os~%oUv4f;|hfQ%}07X_BZ5~9&4QdJ32mBX+dmu37%o0 zlJToFQayAc$0i3({g6$1N=#iYjxK3Q95)`kQ~HVR&GD`OY^GW8He>r1Af*OllZf`wF~t=; zpKvL$a$NZsL2dv$R$qBlgHMgin)g$C06!oK7|;#^8Z(|QrqDznrMnw(NZ2E~m5hE5 zYnY9r9U3OATt~8RS4QV21lKdh!8hS4uo&T@BDVSRO|f(PxY|7KE67wACrT8**`0e9 zD_}LS4aFY74r-&>z@(L=XPcptEp~m_iDZDLx$lQNQruQKS2Im$_<{aGx^Rq;1?5@5 z?6q^A#!)E-0ly^1r89$2hYk52b(QXcT>JC89iuY&V|GU)oar5_LF4(gEBl>zk@;yB zMqL>$2KV3N*zqs=2m9C8ja+~K(u>M!f7ALn$kC|uL-W_Kt`4}c=mhlZJ1&(bpAMl` zWh0Zt6meF&eG*01-v~T>Nn8(%yjsp>oy{pkJ?^s;g7D2#40O~Vs(>bo5QuLhXU{8TNnONbmdc4J5u4MZLx zKBkM+s)d1maU0E!tve&;{=o30`R;pA1kl7Cb_b!5_=v3|9^xf!PbzR{;MS4bn~xuZ zm)r_yOpaPp0y`5wP&hGHYMJ4vaeJheIuQR09d?JqnS%hDpU0dVl-PqLNfx(X9AnzUs zho*goZIyT|x`_)P_pn0%_90hJ!RV8iSL-@7Qk9Svp#i^IZi!G&XejwE;@jkB@1K5N zEuS`Z36173@_-gg9`60jF{|!cwG#CJQV_=)saEj`&mGLedxt}DFF(JUPh>$bu4fZ; zH)Mf?QoA2m!6mQljvz9`6 z^I$&*gs}NS%ES`5p0z4UoPeM}NVtPP!0aheMGnr=^vw~UESR<{)(Jt7^UE(^e#&{# z(N)VletYT^NG%k6x}*bjJ)JAz%0owh6u2(L>*O99adTsjYTbbgIE=d{O3w6B18@LB zV}qkFeX`^yhoz94bXpTgCN$n}in1`nk92I&S}(mDfm)9Tlf|LS4s>E^5rfG!tV-YT zc}de(kcHsEz1;2C@9D6{nJgmw*02v}U&)CvW4c2R{`9K)Cgc^IE8blBduEqUqP)^P zGfY0=Hv$#iRp8Ua{{ABc(H$`ah)(C*6VH^#`ZHT{9;28_5eJd$yP5DT6DQ_!s6eVP z#N%X#(+gh9tCjCNl1d4we$UkR9lVbmW|X9uIk`=T9_iq>z#kY-pAx@;ugU7GQZh>s zg0d}O`(SXYR$~zy1R(QhTn6nQo-HMJPA~3sZqYvh|6H0kP7dI^ewE|}U++J4>*VBB zzKV0-u{tOqMwozYUik#Yw1<{yS-8G+@4%@6>un3EOs$J^)b9CXeLlaIq?5oXie94o zsb8eMo|)u2$h(b7PkV&&*hSIo$E1m-lmONvuoY5g-3)29k3+DlabkYgbScD%-Jj^W ziJBcA`~wbmrgAQ~=HLMKe_o+Fyt?H*5C8xcH~;|C*DC+fc>j;4`j_Ebx)>VSn34z@ zeKm{?ja~lr4V|&gSKBZ}Ra$vg0p)`(qy#pfM+5~0s=-1#@TCi#Abg)N2aW~=?PBK6 z3YgGvMrDt`bqdx^r^R_UT04zSx6NhP!|tUh)J=8Emt_5c2{_*qo(lkbv}@#{5VY z8*P&c$E>Bk&{SpxQ)0Ev?7}jZaA99|cpTF+>?1{H!%%3(Y;TBz4l+ zDMR*%Q)#F)mRB!_LLSd~Ty5>aKMGCt{g8$yp1Yt1CO5W|)*d#LJPKcQnM z7GV`X3|LPpOM@c`yi94zTf#_D32BBss&_yWnbcG(4R!l%Wg?PbQ4xQkNHZlZ8Ki%F zuOMF5+m;>o%37K@bIw7+YRwLeRJ*NN2r#cm6YiJ|sZKpN5@-qCQvDF4PN!=u`pu>r z+tk+d083$L+99*i z>#y5ZYsdfqe{{+J&WW5o?Tr88LG$XT&d91LpX+8xva{#G@e6zc6e^N40sxA6RR&Uu z-AbXUsc8DE<4R~V>n-hok@7yjH{}OvPSE*LaO<|BUm(qJK1l|y*V;$P1SF;30p!dc z_$E1jZ*m^ydfzVcqx*x`g6hw21B4l4bPHk@xGS zo0Y%aV1B!$(N}o>7{=goNQbs`oUy+09eg_E_(U<088qlgRE#!)MfZ>k8=rDeV%6!* z=F%9esFp)s1&=EupR*Zeh0+{nYD}~}Ey+^VwO*O}$vKTP6?eo^aSV^`MR*zYz%&Pk ztz2@FYLkOUe)*5=fTJTT-^#4pqK;1lL<3-8jqcq0)GsA(h`Hwb&@;ZNJ7P zCjBbyC3(3n7-0IHw>Q#C8#C42ajD5#qlDRjL&q~WP?kZ&)_24KA`K>ukcdaY;EAi% zgkKE;k1_+RIm5SS;WkWc^j3jtR#ug8dY_P(ZibFp{@MbEMaH+3g|}>#STAHJIJE># zOm%h3*q_OW12|POr@dmg;HYBXU~+eYNyZ6>v};8)(6$wAR_22aBu|6roBvEflvm+bfceMybJr0pbbk`!uWil*1( z67+t@WJRN=5C1Kf1X{-JxnMF|GY(H~o_2Oo)rw&iS?j*^1#lNueTJuJGjGh!_Srn`4?-XM1c#TKbnG^*-2F{X4 zwrdM|X%J4>Xfa6bfquzoQVuEO6rHJkEu+OYsdE^#pJ}A?u!fg}G)9N2%Zr~Z*_K#= zOP{OJCfwh-JZX`CbUkd`cJOd>WUp(KtNb8yT=cl{e7C_NIw_*ek6GI;d7{=icEW1i zR=VOfF$V7}yn4vkkd7&LlUqJ8!fq_096`pZOIP~CJY?B5iYW^#X4}`NOMefa$wp$M zhw|V_ja(U;w+CyKtzS7yPr-AY(L&rI_o!nh)q+qg7p}lWbq77MW1cKbH!F9%ql`37 z4-=pA3FABE(xCi&=1;3$9z)#4`rwm06TRZu3cUPYgJouwl#)?mOnn6$Fxpd(vY{(? zQZM~L_bYtWaUVhVwqH573N0A~{#JCh)6AN>Y_7W}bmdLGV4#WvX~~gMJI>>E-|fH8 zNtP`Leu4{I9Ht!%un;`Z6XJJbOVLV_z3sbTMwi7ts(*bjIgOht(Ya@ZhoM^-td8BvqI=tLN$`GtVqHaR#&rQn9I(2yHk( zoM|iJ&hu(RcqpNt;w#YhphhPG7MgTXXm*#vz*#gp!(s`f~qC0SwfDw4{&D7dd z+R-pgHX?dCabIT~{d>{D=>)#Je7>a_rl!je)`_Q;IzRXNWf6E zv99i_eRso|&1s0KqD>l1+Gp)`b&pIbS1xO4n#gjk{ED7LttK8z=2%;OfCW%hW@TTCOcWqRY`Hm ztgGW(u_`TRI$l|sRdGAfu$xh>y=u~#E+ai?x0yhRrM?i^LCmny)Z^p*5HtsV;$z#} zss&>>J|9oK?)AA@kuxs#yC4er*M-Ja^vFwP9i~Pj=xMpJ2W#5v+f(0>1vgSLj`Q9- zkFa21vFHyHF!kX0Bs~dV`}LqJ!Z+zXBeY4$C}WRY&E7sn!h7)j<`Dx4MRR#n35|E!=6ll8^Q5)oGaJUlKL69dEXW|ECuKU3rlI3GI5W$K|Nb+lNrpD^B9lkQgbf# z!lwt++Y_gUeNGFAe?O>?`az`L&8pS{e9g^{h_lUnflbRZ_(RWKfIauH+z~2vMjC8# zT7El|@{G`_?USuwK$k*sjVIo`cak}v^v?8+@w)`rpkvSP$vk+?jR)tNsYHSifp8@_ zg&Emo-aa^^7FvvB1Y%sHB1%E(uz^hi#=a!R9dO2`?})-b@pq`0_07W|7IS-aOM&pu zh3GGZU!UL-;h~z}7*HisE1v{9P_UkYnrbXMVo^Ftju!bG=#h($IHQyNbmRrLRhCFB zJtM3qCb2RU6C@f`vL8<=9R}UG6x?%oO!sv|KfEpvu+D(&AU_KDnj7?=*m?X%Q8q)3 zEC9(pS5Zf1LT5W^THAcmOiR?RpG6`2HqA}4PYoJS;yEhLh&Zv7rP|{@E?xmw@RXYK zA1T-|30Wn+Wy>)lU-4G)&wL=;`yFt+_q6Mm4cOaU(J9(L=}3!6GFFGH4-@rb3~Qb6 zV*JE|(Gb^Iw!VvEk`_lGq&^P0K|pj2o#zqJnfqZuF?9n5Tnw>%Z>7K6w`SL45(fxgNQvUC$ zeT>RKi(^@AEFntxiXhPPa9a_o$jJhTCyew+Xgp~5W=k+2OQl&A6PKD;y7i%GYjzr! z;8FmB^(ss6l4Ex(lZ3o|u$n~syVuya%Ft$hC2 z1B@OW5SaJlF~UKopt;?b7hgR%FU1R<>6oA9TyCNRq>=AJ(oN=Ca~587xABB_&a0X% z(gU`^=(Jb~f#Zw~87ZL7{RgS0D87UcShid_5{HXV#<`6x9f`gQ8q6;Ahl@$la1nUc9hnW)Q`2u}O zet)~07KR{wyY1d?X$nrIr7#h?boaNwr$E0s?HdZ>jMv0u zd?+o`<0*BI_CuPt9(3F+d=4*!CkKipEf-*&qUG2iqOr9G{!VCn?gwwnZh-<6%qyDA zdrOH#B<3za_>1JF%q6GNHBKh6&qxskl^ZqQZmMOchEP()(3lI=_ZDt@swwqEFyt|> zu>n|grBEJ%!8;bh8!)A#NK!vPuH!))^R8Wf2PN!HEs^8A8Ho_}+c5OAY{CXVVp1lH zMK|Ur+sqN+s&ormbiQ_EqMtDaId>#=OW4t0+C^reuUesBgQ}KaV_XD(>ka=nqJ0yG z<$wec!airF0nz(V9R)?<<1t!2ZbfvEVj{rzcB(A3F>$bGXNhmD@O%L9M>K-DY<`q^Uxg= zh-p%-16hSu@Vvjpb11_T-F>COh>At!RfovgJ*wBS9k=XddXzhZ?wkeF;QLc63LvG#W4WWYqRKq2y`HWJlGgXh3 zPszLyQbVps3C)OVel=Jvr0NuUi=m9?8)MhI@=vxYW)?+lR|RAxJ41>pNQTCDnnG>L zP4suh$0Z92i4_#l2k>BEYB<3U^05vavAEe3%`Pw-IWE=Oerw{c_;HyIEXibc@BZK= zkRgyF`ZL@3NVmV_uO|$34anCIX@1uC!D*z`F?Cfxa3Z!3Ou%Ph8Zisogt;SYTvlqz z)E#hg4Lc4>8fDI#%GQ!;nroORyn+3>SQgQ4B<=jtp>iG5TeEdLkwO=bLUB54EC;0o}wKtlu$7I!Mvv> z&%>|cSJ0?Xx0sm!S(^qp2t(Shx&%Eyw7wOTpX!=8g`LYLHye$B zc6OB5d@r;Yz1I9?sGE?s#1PxL=)xV?Gc-G+in&E{y~M18=#iyg@587HzlkquT)uuC z(*`c8jLtA{Ek9Fq{g&ddc`t#7Z&&FT5ffiT5dZH)h}+m38QT1R?xZV^EQk`eY1zIG z-y~3xEc9HfgtXT)9|DaOk(q2USAdRyL~lL_SOQZ&>}I_acO%q6V6X#!C5||0!@e&x zqL=M_u;)AJNmlP~U;PQN)NKX;Z;MZ-W>y=?ioKxA1GSZRzEc+k3nrgoI1m#LQ}ux< zKWm>L5sgt?#-cel8^PhukT1ZY?zCCTeeNSw>S zmR$L~cRuF9d4YtpGnQ{8xKezvXEP|S7h#-UwBa(7qH2C70_DOHilSL^&bf3gzDhEK zbX_KUdwL;8ENJ>5@JMHelHo=CirSW_zNtV@|>p^i2s~8=%OqSu3wxT-8qtdr?As*IDK7r#If++^X zK+s@K^j)f11OI||a@ecjZ#3SOd zDLQ`=~0akMIEI6d6-> z8G&I{6f0~BEVb4K-wAQUwTE@hhq~-Alw1i#mkbo-M|{Dm&S7!Gbkrpdt$g?$-mLi? zAcx&~v)X+xqA0!xYH=U|25-QS&6w44DRKF&?h9@q_{`xtZhlv>m0vO5bT`qk^>(5V z+8*B3P%<8h%tObH#7&48nw=vGUcMdZJkO3QU3Mbk>)3@~69Ov%1-66+O@}G|Y{TR* zp!hUm#czcu^em|)DUQ7rxV}V-ii|+|)&Pmif|;YXm(f&&Bu0#oSX>DgO4L>PD^PV# zonkVXvi3-ijBbj+F9rpcM$t@mSw*`z?IE~vSzgtvFjLPn@Fv+OF|vCBzAiT$8rsIf zB=A@1EgPMLyCfSIv<-zu+$C(~RMroP!cI=3)1~g*)aSpPkay8n0?}Wzb$-!C`uDW` zk@EkOwFGJDUIB!`&!m!pfjM#C-&I~ycw_xpM9Dz}D7-3ON21Ng<-77R9}x=kkMQ$OqG8hULq>pYj_wDdHH9no0RA%P(XT<~wD8 z7-YmMxe2ckjstDZ0|>wjFfu>fz;@51eV9EjFw_t0LQw0kAph)zW0d*7t-t8B{-TrM zZ|VGJ2v*dU1`r{p6y2^kp1#^{9( z#bUFR7rCQ!YRGbiyjdcWF~sJMx`8U|O?eA=xQbkIHu=={7d&iFIK-O|Tw}SJPAHNg zmLa|e{xeZG#~^owUnJ>(0sv6|&qTS}xro@i+sPQZ7+d^j%*;{#uf&MAe%)=^4O$5V zRY65yr5q&%UKWv-H9fN&UKQ`_p4Y0RrcQ}Td&-r%jL$g#u3DA#of?mPTra@itJ~t-ivT3iUJXdpkPMO;!;+f{L>Dh$3 z(5LV z<9Wr>rWzazN0wUabg3rqoq2k2=G==zhd&iPe$|n#)t0_wCL1+Fdv9PD(HqnaDnbLx zvqiQV>px8UyThU%wJsCzGBA{f3A<(Dl>{Pds2USsvI=sJ+Q$}QK9wJ+9i@kQNnZfu~R}wc-3SP2JTnb0*gsy`u`x1?_%p16f zDI)qF)c4X5?}+csEUPdcTI)cRN~Uo#FNq|@X4V~3UH0`asXK_ykPDWqGWMJ z6DCcrkygph*DsL%$}dyNKh<^|QSd2pNt`ssUSRZ+<(LSYjbSUkFZ}i$+H}IFJK@90 zc68_!hjtBH_ylowNXmZ)cREcUY?V_DILMjsY19Aw2H6`qflj7_8!`#@O=;>{H|5~E z;#e(_f#vfehWoCX?dbW4`EL7YoOY%C%A!i4S=KqY9Nh;PdZDxHNhAmG`$9c~rLSCc zI0kpAlDlD8iT`1M(GOKjHU;61xiwO}!W9_4-lu?hgYV_6XGu@o9Np^aE4?~b9)YT7 zg@JzC+mlPhgU>*tSSH&DHfD%Q^90)3*<;A`sJ&^>RBmNPiZ@ZD&x(@ORyW=p3*s6F z!#p!GCJh8iy5UB>r95-Zo@EZU2BB6lZfJ;UWzBW5u%qv=e~y!VmN8&rU$#U1zvn0a z87KdlpD2w=^$MWy&Xz=!O?Ub%ES3mG8KAW(hae$B38JS2jUKzyQXYelMHKt*%PCv| zzfoM1otLy69q>)~X16|0OkN(c2UuroEl~#>ZuSI*3Pa06i^GWdSAzu4Doa_>?>W*K zhbgg77DX*xC*_CAdu^ApYsZFVz?iF9^n>hOH;}gl*2D z-@7_;=;RQM99GXd3^QL@f$emE!Us#PG_^MFFS?EipV&{WeR_mG%4QFYO5)xCtTo?N zWZIKAntUWD8hp@4Ca}_tM#$Erm~WACMm=JJ`2hMb^CqyJlMQ4=GS1@=$R%l>AkvmF z=1tia zC{i;Bm>s9)e$8RWa#f}3%~^r7E@x}dK2-W`ta0lss4AVdX<+;H9r}rM3JrxhFQ&w- zXiEK>N)AVoJ8eq)0ouD#y@v(8a#_ME(0~i0Ii$#JlQHc}Rz+%-Vg;yFY;lyPYD<-l zc3g!}@91hDiFqaKJ~~g}w2NNrMTbe#k%{kOBMv=hyQ!&f&Cx^|&F5oSc^jpjB<d1^K)xH*+A&`DABocb@^r|!wSvK>@~wKJ@G zDAkp<>lUqHCqqAf{KSSXMSW6ITlw*Vc(ZdCF{z1m%1 zYbXDW?AQ(W_>lWiCU3K@b7T&`UnFXZ_3&omI+_OEm)&}NS#X6<6_@Oe3VCuXBx2TM zxQ|>y!pCZd*@w>I?wD8hYgj`zyL*%jMNQVEI~vz+l+rd7Law2&jC>Jf=Uk0B&?M=E z2;Q#x=V9za(0-ZK_x)l+mjeQ2AYxl|`oKClq+fx{QUIP8S6HpRZ@i~M^vI#mEN|jotrb+O`mg8N!x2|JpC*OL)LAGST`?l zJ10%oN1WUpj3GmIUg$n~{kCk^dVYM?AK`WSA$@3I*DaT^sqlC3+hyU!TnN+}i(}nVh0UL_Rdt_U5Q@+L3DZ|p+j$Xi@GWh_dys(Bjh{Pjnax_V6 zg$lw41|g@mwt@gFhhQN`()DFOB2%pfyV~q`Y*NnV5Zj#$yD$8p$lcu$Jm{NB!I(fE z039Lc>>Rp2wb?Yl-XHbH?D*@Qt666Rn(#j!xBmJo2HO8A#rSJ<`p-^dj;gIYvM7pQ ziR4!!QE~|2X6%B8c~t=HV|+$&|u zV`qtDN!6QLMh(`ldA1jIHclaD(!>Uo!{brZr~p`;OaTHHY-}X`E;+&zhga% z823`7&zSNW$bnRz-7i}y0Wc1I?|AX0p(hWYsau>FIDC>a=31q&-HVCwAg_;?v}&~ z&CXB~+$-zsg|nuCEmU?Q_IELi%5(Ll2bGQZ>>-=wL=04W=n9p7tST43|Q`Xa7?V$QlqFk=L~c0n1hw8hHsX)t@^Fr${;ptn?pLM z>+_EtLPHz_%^TAx-cr8oUkzqkouQ~a4GwUJuq+k|aWFr`OYBcQxSga`PPS1L%%5L= z7w0Zr_+x4mlNZqKt4*0cSu@n96SScueA6TRi6c{%U%HAt-y1OAQT^oqEn(E%SQ=d3 zdd(F(c4jvnzd9Q*^K6;8c*x;l-K|CF(Kbl$l`-8b+|vAnlPm*hwqCa4gS|A~gk*5E zrYkozV7Csj%P~lenyE`Nn<%5(@)v&aWC*#y*JP(3=`dJr7R50;aZ}}h9n(HKhIFSu z!6j7`NEMQ#OTxe;FkK@xt|uh^j*UzM4zlTY;%n#uWcktGgc&|knIc1MqC5fuf@cV; z$VMx;24e%!_@NIpIqdhMv1K($II|ggJ`va3O5yRQTufasd~nG4;(+xoxdY_(R9xa( z?^z$XWrNBC(nZVGQlK>kN*9x1BP0~D(D^k)8^oN#*VuC@mXR!?H&DZ%b9Lz>Ti`6$ z0iD0YZ=Vxu7q3vO-~Un}pXrW03V&sNJzq%&=l{i}l`U;uZT|Rs5M@(aLpv8s5l|7FfDMV1FfI!98rMhBhBp9&|yS-$8;>Fa0w5%aOIBzPqLrGp$8h{GGee2+WbDbDJ0GsGrtGEcqR87 zMOHg{ZGyO2#kZv{D#Eo(;L&W;Djv$SogcF@wU7Ep%}jY&wN2W7ZYPZDk88!2Vx#}r zYG_@mdf73~py(dbf$jGHD0{~!%hsh!xGHViwry70wry8Bv(mOJZQHhO+jgZhd*6Gz z@7wqEdB5&&jJ?L#f7i1j<`Xd^VuohZS8%iT+HBrl6g*6$^+zoJeaE;6w;fm)@!H@< ztbJOWnM3oaw^-goj&d(|(}Rv*E}m4hy}wLw`f^Rl;qOFT91dqNiFpfXyg)t`#k#pT z3JqKO7Zbg!CJ64r+!H$_{LKfU?K)Q+39qy20@g4?0%9}UxgO*SE4Q&4MvzoL3u_F3 zcYzJXGR2C98+g^g1|yC?1{%jtJ~pnHk z$WTs;`+PfXSSZz4<#$&mVGP2~DJQmyYtZ&UzhE{*m*?HvJC#>B@C%_gn zgcql_5UOppopWWza)=YgOhPQIMr`DKz^x&fB5JR9X}G`x%>6N+A&crqDx^iil>+fJ zn*92mEDr+9@PI)#>*X&wNym-v$vMC>GXa;)-%{OwxorNGlw_)|Dq;bsw#t#vXsH1j zvRRD5Q0gI4#&{7>Wkrzkfr2_7)|C;83H3>7q%UIp_&drYLj2g6QSt&uPXZwzQUc}2 z%dFO?ta;b+mzTFU_;2H(Z!&en#K)rYHO5$C88HF{(W3j#J6xjzu`)_7$^&nUd2S3Q&r0!_zhTS*+t~DiT)A z;v68DfIUa~M&y!rr_5ve z>=^V-`Jx&vIG9D{vE{;4Ge>BYPPLz*LOTpsxSEnLsmw=9a|&{{1i4@h%^|6uswK$6>QA?2wwfvzTTN*`JTMng=vk-!8=5MiHQB}W)S=Lqg zMwU^GWGu_2DNfjR7(f$bYcOu`EtzH&UhdGTvATI<6h_hSDlXO`cEjqQ2{8ASF6@hL zs+yp|o%+xsTO}?)hpH#unbMt7qCfxqOt^ijrRX<2pAR(1qr?u58<%jWl$^=&!Nn^Y z%law#YJpquA>}qBtC*dY#tY0XmJY6Wx)FTd;2{C6J_xHRCgsr*rd7p{E8KF~wd?@y zIvQKb=lB6{RCn^3#KTj}S1-Y1>1K(GIf&fo*WD1v6m9);O} zyhC%OO@DudC4@dI<2<(wjjvI54I$DHVtoCI;B8T{i#3YKGss&VL)RGm=8Q+oDeLH; z#eehq`RzY5M_kvYMHYa&Bmsc@Z^v#VY)vg}Eu2lH?d5rTzE0xPhMSAZ*ThmQRL$UM21k|5$5ZQ2+9Tp;}YH}(3BlaKo9Iw zAPJ$PpkNZwKb=*t(cb2#1BWo1!%oS#ZZCxhJB`Sjgda9TB~&Rt*1J@(!H1I?3efAw#yqSwSDs!q6E;c>RYZ`=`iCm z5$UwPGt8UI9@9m*q$6v5hHh~dStrFFvrF#XE4SJaPt>dK=OpRTx$8QcjXz!;VAIZl zm7c2(i>u0r*6I4!BJZNCQ}Q~EBHE%|D)^Rpw+p2eMLF_R`tiV*$qdgo@Rg7EJE^Zq z_n)f_@YMYsEYaEsEX~Ek4K!zOKuAI5Fz@aY=W3$Oux$6EgGB~dNf6A`f16Gtuvv#7 z$1F#wq~Xe=)2T>3>oju$E76!p02wO_fxC^cnII+# zQ{M8ujjiq~0Zj_m&s*>WY0j(;!!|w=qzh?xISy-o90Ii6QgO;#YKOqV1^zD771UV^x_%P^X`7Z25%@x=deC(&d_uUh-wCIo zU+$P+EyAZ@GE3eqccPg3Nk%}Xj+HEyneQ*>oaR~_gyLNhG^xe~{QtDU6+1{48GsGW z{&&H|-`b#rt+R=vt%0?Yvw<@}H|ie~VE+szV_^me-?#HP+YGORg9GSU%zg zH&6wPQwAbec)lMDa2mKN@E!=>lp4SFMf>+48jPZVSJ?{}pS|Hz;p1{c zedw1#7f$4`Q`v2&Ha3cy1PG>ER;e^oqxFxw^6rt!wxUZ2Ro?~aGA_7$@w#v*y96mk zL_cNb^gi*v<`3>X0)sK@@$MCO95~EUX&CH;tG3P=QP;&D)TYyoH+d$%pekY`KKNY82*Cls9m$Vi6F+9#Ys~6{L{-U7I zonA$f)lO-lo6HoOgw0{NgNeq%W2SzQo9D+!gVhr91~!Mm7Te3vukT6^^U-t+oyYu} zUi-AZ%lBY~P$BEHdaPkdjU0#G^Q9=_x=L49C#TqW=1ob}DV{`ti}>-T1qaAoxqu;D zD@Tgt9MQu4fo%o-K}YCC8m%8UK5T_u?AB<+WiMq0{cEw_@|OCQK6mW7tLoVyerTIw zWAMjCacNv41ho7KaKqlj04S2#m-zCHfZ}jJhbfXo9``C?okY|K^B3-!t*R1TERPr! zn=>mL*sE`BVS@#x>mnOig;Pzv)g&eE!*FW&BTKO1Uc(rLzCG&`_2FvySO&hStRR;!514^>kVLmBnFz$!pelf-hk>(h(eXK8$7EW0{Y- zh2)A4JLJwo+U~Q^)VQ(-(YL~ju4a;nWgAo0cirJVyU1u^a?4*{_9VH#%d#=dM5h?l z*_R($Cff!dNVIR;tgvLqcbalAZYU;BG>Co%1hlOqhPESJMaRiYHJ7Y$HCrjHeSbLRGz+WG?X>TXJV+uq4IF;8E&9CTN70qO7$Jt zoI2AOXbl7NEHFltL}TQ!aOdlYr!wKD#3c_ZdzADCOof?v6u4e?n-3nqsDSV5;5>DU ztwt2`@M2}^{??X+i2JshS~F|n0F0u5&`Wgn%Lm_GZX~==N8mMbhV?-1K%zzL%^6|6 z=$(`V>;*K?Ux~KLxJ&;GD|`e#>VST}>T9&my8^_ajdcxF2#?A8*KZBI*1m@q{xKCE zq`6y;5e<>lgDSW{_d`drN=EqE)7yqG}N^SijcgWLsQK!3a zAC&QaI{jV*8NnD96G=$jf0-oq(fVbHIoyL6EQ9x^-VaVCtq1^FZ*W*yyb&AaD@Tu!{_%+Mc9#ebwHM z))J5D;0cy86~H}J-!vXBHMy_UU~F~Zqn{^w#*y4N$>X34Z{G6DyDAk?XMB__|MYk5 zxSWkb3&3~A_zij=n0;3_EYfTpYoS+dMdtbZI{~NO504Br)xv=sx=_i{7zAmsiTH8y`9`Z zCxtmO8kEj@gM^Kx%V-^~Fx$h!2FueU46GrIo!ZONe_&1zBe8xDI-5p~QvK*Y!{=}j zOHuu9HPzU#VMZH#5$7(n|T*K>`_@73bk02u+?&rnFWm03zs+p!&iRQt#P?tmn!oML0x@LvFl{SSbg zJKFWCX4nH%iP1sR1cgy!`U~_w0rJTj4vrsCeyGO(Z))a$2ax}|wkI{*y-*f$ zzq&RjW$V²I$hsEn72?0Tf$TSEb5wL(C{qk4IuqGSrGiAH#&9y;Y&l_A*)mqQ% zw5Bc<*9!Fy)TFnxs_eX8$NR%q(OF6R(Z!xHXX!Ziv`a*^LH%*1)ls8&q~mdUmp57XiNB z!VM{YS7f#Mvn3f8>Ebermrg%Bh*y;AMC6Y>;M$U3Tjm<31$4$g@=cZd*+DxVgIDpJ zSiGbNig{P?;W2CN3pP&}S-sMw!q=FDE*XhU9X%;F)~!TA-eq>h?Oa$E>J7zQ5;V&R z-%2(XIcFxxpR_j``d3VphmCsWT?ni}_u@h=2FoaO_%TX!^Ml#yhYu`!w9#3FH{mEX zOd7#GX^GKdV;CdGHL;+*gyE2)$ak!JXpbK&Fr(pYY>bL1mIUU_&zfVMFn;UkG&o zE+)=~q3SB0$H71Pn$?0D3fiygqO%e{^oFW9PWGnDt?Od(8!z%_YTPFQBAdld$8A}H zpsRdcyugCw2p#eXwUps99)MHZ`fZ`X%gdj*Qmd}im6o#<)DEert2D!;wfCW8TF_6x zjq3KDxM4LEO1ew(Bw@)HH0!H$et-vBZ0xh;A_k0tl(?t z4aH=-{w1(w&iOA!8p`0K6_v0tQRO%th9F0vAR}he>mKnX>&%P>wkVx^Y8!{~cx%f! z6%1H@1UEydjZf%Ldg{ILu_3jyJIe67A#u3)6oLL#MLGF8Ucxep9l8Xn)>3|G)R>VP z9)gcB!>t-}eK_%-K`nAz>>;yCFP1$Pqwk zvt4c#+#TtfCNa`leeIx7V`KA~{w$8z(?Ukgh>=m|rPiA2O^n|3Gl&=L++l7}Bo71r zbuAH58AGicY!Y({jFoMW&f<7%!KkK?x1^EB5onQBkf5${W`ZLp*Jo^!@f8#rbT#IN zcyXDr(sy~zgu0{}=Ff4_ z`1|BsKH=pq)_p2>j5+&eZmMF$Y?jZh(Rw>6T$(dw`QNcm5*v5zX|N(NfG15W=r?dD z@AuSRFh=HYtZ=(~?OfjBx|bC~JaBzuz4U_Q`Lf2xd*Z#`6^8Gh6SS@7Zq43<-W8(= zySvA92yuP;w_PrJAGtpL%s?fJLd}XrCgt-N4nc9h5(CGlxDeg`#N?kg0C}707q4z| zU|mXp97kit`}9w+jOOnv&KQg%uZJ7mC8%sPHgoKu^o$u&Awp|9B)9Hu_NwdER)ApX zF5J?;YYk)Okq~_R;mMl6m8_T=_T#BW_@Umyvbz){W^CU}{o&S0#^e%5>h*hMDTvO| zIga|yd4tBF3$B8paGomu_sH@f%r5@EAh&=EqY?^2?5F~|x<}KZdd{6S7i4nJsSUo< zWg45x*rfR`3mk#pUa3a~quZKUde4SLG_)=P8L!DOh^``sE<+L->ACPO9r1vXsd2nn z(V1r7D{s^mQ*(Wxx0TdpaMM!qkPYmk@#@^#Qb$o(kW-nkWUdt2v9ybycCWNhsXV*;mIR!e?dTYaDMM)Z8RYtVQKrnnnAnu$ z>UH%a-|ja@_U`b;D&}$Pr*mBr9B_L`{oAB@SSR!5p(T)zdy1C@zD-=hd~(c}rdgoS zWd(SkJz*v;jK>FJ+c9%Tbcw;SPp&N^aXj=H$LHLNKeb?EL3bIzx6w$y#iRf2NY zKXqZ=lU`=k6YhM=Y`czp>(5QFx^5gtsu_Xm+O5VARp7gPMdoWS4-O!QNAi~7W~hH` zoei0DOto$-@8X#$w+l_RMwcrOw0B1!N!GoYX-?+VbBz%z@)8L2NOAsIQ{V5^i3 z9TNpoeoOe-vL*oWO75sM3G5PqS08BH67Zl3k8r@z+bl#A*Bg#H7%RktD-wQ5ZpRhU{oLBDDsL@C5PL^XiqIs#?+sp=F;_o)H=EBN?`RuMcwtMeL4s(-Rt4 zMmmQJN<+$K$fYw7(&-Cq3xsruhIDMgRa0^gxR=`H5-!q(Rl#aw0b!|_uaP9?^S3e+ zkG)Els&t{|k%o$QGqwlxFZxjN1T*$WiS4TWaw7v7NB|irA`ze?C$LeUsHTPxhMlY? z{+7>IDfdk=2JeAN;0*cK0{OR6H3qkt;nu5*Gr?G9$O$S)X;`MjTyD`wd=Zn45Jd3{ z>lF!M_qY1oUM8j_AQm77WRBmVlGU19MGeg^Gc(okJtK$Sch+1 z6D~vUmO5SQ`#g%oZVw}yl31_|&9VMi6_K_5 zF|sP6OEmWR!@%Dh z1^Z$YdJF)-E`+P|4OwhubGj;idRsxYh0SJ^P=h&!?FSZb*Q7HM%27^5+nHNldQ*U70K2;Qipr*Pd9dsKav3b zoGC){EJ3#oF{G`g{m&~Bz85(BFgq~q)^d}f4T0uW%!X~srb1M5`JV`wS;XWDUB0wE+^5rX?6ni1RyR1_*Sd%UVd<>9|!vY|CD7pyBR_nF?`ZuYX9Ef{ZlK zE1ERwwVc$Yn#CVXRjqA&ylx?BN_i&FHWl49lfeKT3i5Hord?PzoIWCspxJ6jLFLa~ zI-@kog52|zUXEmMu-uDBBTN~f;W$ z=QAF5X#09g#4mc6(2N6*>*{t{f{0lKrqcvc8PrqA(9fT`$3HyqO*--Mcd|1!`^|Bj zO~o=+jodPP|6DM;UHnA?r#0OrcnCnykOBJA{#F9#pSf-pG9j@WSG2=F_Y<_8A>R;3W{Dxc4t#&vaZ^iC2?cu^Lsub=){~qL+~rxInLF`1 zeX%flTH5hrc&Q;8?o;VyCBiw|i@9-7@VVmYUTu*HRx4nK_cJ-NiNt{nI8b-PUK%`i z?5`q=^wC0>)76Pq?)_?K2FeXWSy&>URrM*SF(^XLcY$_2p*#{lgKN_`{tI_?EJ)<8 z4qysm08GK(ws`*KNdAjZjZ(6a14wH5TrE{jxPbS;suboR_V>Umx$l`k+=06eCvZ zJ$d1fs4=XOh7zelFh*FU&_K5&!HGBHj0HiyWqX*y4VvZMn`I0?V74D@z&a;dFKyb7 zX5+jE7}QqTd9ST{&+Mshc+%g3n4WB=p}U@^s;taRG~x&qs@NnSJ??e#8~PaxJ1p3+ z4$sbGxEr5Tz*yJU%2=ep`rktgF!P6tupwYyw0%y9Oxo|miZIGKHmblv+MAXcDCM9? zv@V(Z$F&auxjS^#=o1v6aNt#)il5A{ajIUl&crJ=^QQ&X7H^HnH7un_-L+-kjzL;` zwkg9-p|SgVCefjPb-xJTZ2)B|?1%NHqvxQNcOxktP8EEyYXFG3DESS|TM}_18Q=@~ zD-X-mvLbU;&Xt89!>k&#xJd5?MYp#U8^K86QX0&OGeyE#7H-yUz!hJeYIoU1uU+}s zycF0Uz52kEe`L#6nckM^ajZ18v`TC7Nvi!=%|Jo=epi|$2hE>_SPu=d*Ald@pIUIj z=kCxzx8fQ&1rVJ=6b`mlK4J>Y?sJCk_K)={Crt{%56uupe4hFC38(P^6off3#^cRh z=Wl>%;PK7Hz~L2UHobeC`yn$QO8M@FTd{F(%-SMUnu9@7j8CW zm@xy=hm1mtRQa-Mn>fa$c#7>6<;rNGdw*xJQ)&ud2(}}drXEk<^U=Xy#?t)nVt(lY z>@y$GZvVHs8~g*!TLc|VX#adVzV65WmX*UM!I1429HbE0 zKT}h(%O|Y(r~@k!92Y$8-J4V0RweHe;+b`WAt~aNLhT~oSYY*-gjA)ct-5A7()DHxIVZV1FK;MrL%q5vPbSrW^#^D6KsRIo(K-wNB~_U^^BCJh;iAo2 z36XRD=b%UTI#Su_=mdj>WCXXC_G<5|c-kY_T$wfjZZKnrtCo!|2r1S$%w&Wza(5xI zE=$s&8_HM&NO}E`r>~UtNLSr}C%1JU<$8;atf<9I0Cl zaTFvjJR`EW6jm7wG$WayLcoqnNIUmp)KXTB7Z|ZDBuqW|gi*UWK0Ym1d@Xk7Ad{+S zTpw}p!|Tw|8g_!L7o+*b#i}3+&(i4<&$0tBJLZv`W8J3rDxwNA4!=vJozr;A*)M|z z&$_vr_$HRZiw94b?(ie|UnQ&fE$cr6&3Lr!NF*~F8sh1Imw zy3b9qGyxv=zb^m)>QVo`SCma0Z7gg7l_?Q7BNM<~^Y3b@d@rE;8NoXpu*_oC z@{z)SN01kzV2egKT!_%E_MuA#yglC5xQ_BwBMSuH=NCSyY+eo##8qFq`~4PQu(!9T z&v*OqO9)zLz^wHNB?z!l6t|i=E{-h9y8$C{kgX++j!FdOt-733+QS^t{L7E}(wE~I zS0|-#HZom|a$KDaGv{Mz1Ju2^#({qA6w?ziJ;doTG^3>5bV`E%KwY`*@k8Mmd zs96^U2ho%wjosmZeXY&~Mz028Ys!pwZe4@sXmLjyyRHeAH``}!6wjLj`t$sQF)Dw* znu1+{w-d-S1uXbn{TUe78X^o|jG4HqrDZJb;j#sB7I@jC?czth1jZWMD#rxL1 z?YWlop_<8Fn(R;bkX}?G3=J8GcFpz$0~+XJeN!4VDRN0@%p=$n;RHtvF{CmSTHEgu zRqN^g{1@_-AfC!(9e|)p0PR24zdJ-(0~-@#WphUp1LObI0xo0XZ16w-|Las6RW<=L z-3VVcTAmLc5Xp9fa`NlCQis9<0w|#rwIWEdfdeUrZ5Ht3xSUl`*PrwqWrz&%&lF+4 zG1Ge&jRIB{f#7<80YbB*r}Wj;_U!I%Z*Yb{3)HC4pd7*~Au*xwf^e{h(auB^c$A0n z%{K-(MA>pavr!O;&`zQanOwug$S;QKHJ?AH)k-d8Jw zQI_3>^T2oKG@4-aM|W5Cw8GXcMK+x~s!aqqpv2CvZBvv1Y-SpLw*rG?0lEP*TJ*{C z0mwxrZ3!@Tc`j5X!p}eywI;zmk(sD4G8Xel_k)iryEM0pK$kz}ntGwfHy8_bwqRP^ z6C@$c0>{l-ch{!agnhZDam~0zF4@DQiD}qbr08VWI2Tc1+&D(%1Sc z1e;2AR6`kHlvjAqx=2&A^PzLY;|PM#tR=ZD6zI2Tx4nT5H{i~blCTZu(5;CKBh(jF zG~~0HWlI<20>`juizfIp!tSTe++ZYNWr@#5QezRBo0)Up|1^G33eIv;CeSIyP(;6= zeY{0^Sv>pApdGx@YL*x7wkUijcG$x5l^Ir5Cw;ME8#QEUD&NFIzEL=azU&gbeVYy) zcMp`Jfr&KhE9dcINGkkRk@gZJ2hn_*z95cI&ky8CBMv>nd83L2s_C8ME<`>`SI3ut zo6|_&1&_Aux+{f~cTWB831N)n+42Bv&}#fM+Gh~C9bzblH3@D0moto!eSlY5rYxtR2V=6=^7qd-tBpv@>O z6z1Otf0cD89esTdPf>~S6}&-!S{I!Ej-^^qdQHsY`^`n^&{Wh@UOi)&V zxriF#j2y&;DtDQuLw`;X}cUSPOL%}{Z z=gIP^?S99FCyVVtKj=d@d5(wCOWq~K>>}z{bK%JlpvH^cwPNBSO2JE7TsC5NOQ^DLxdHrw$U2Q^5^8TjIuOmvl-@Nx=0lD%!t3#6oF0a zTA$#asESXgcQ(Q0)}lhAUB6_~G2_U-T+1oajfc}1WF5e!;&_|rtIBKS_f;1zcWu>Q z6V~~#Wwe8jE27JuN5H2%$~4?6gwdpjguK)Y(IJ({HaZG?>?>PW0S`2HhDV`gZf-qE z6tz4BBZCt!Ud5uLTh*v%aqj8dwO_(=xR34}Y;XcCFWDF4>9qJgFK|z_(sUX+T4u4} z$i7U$&(Esz_JQ@XTthm|!mBHqTdjJ4h$b z*SbxxKdSU8nYjfONy%UW8#B5vE_{Ky*b$;z_4<`vvtJf{0qi6$PBK;$49sm~CL6y_ zrdj!5D!vhTQ;Be?P<*8-G>XPmyPRk@AKf%S1H1<&D~qZsI{Sp9YM+#>p8P6U(z{}0 z6Bi4<-g)k~hVB;jRe6R5EO&+dnl5ZR{e;gh&K;q_{a0-RnA7X87rh!~Ms!zPIX&1~ z>t4*H5t+v8SqJ}Hb%v0q!wI${kM%{YIl{8#D~^zk1v9@H?N10c{=_`Ax3aZqYRbhm z>t6bmFrQ-O3C;AocxC8fv{My-JAKwFz{0Nr_qtH9*P4X4W$tM1Gqg!WzCYj@8|F&9 zvTVXdDX@{bG|ufWtW(>RFFVw0eCMvZbngp2T-ruE0WC{CAvXrVo9agTw0wp;T-tJ{ z2;$tU?8e@Y937 zJ*;=5wX*JZ+O|6#3G`vB*5{ zFdutpTjSKpX7$Rzn^PuGt$m((`kSY;-M`9(uiHcD_mW7dMOl)$GbFvtb%%?PL&jj+p8OXX+f7sX?`glhG?U{O zaA4m7Js6y!5CFnV0y{OdQI&5Nyu8DPp1h*)AM&IN`cpob@L)NXU1qaYg>Uvj}4{2VScr?X)Ixv@X0_ z0DVuLWH9ziv3YF0r&R=s`B=4d($dsj^{rwi<5=QR^^ImJ13m9M?}~T>_VeOk8a#~Q zX~K-|ULIUJ$5QzuoAr^o8^}uD3QMd_wXb`zZ|f;YgUu;BqFp6-u=y)TGZ7S-Ajb~W zk2t8DJoAU#rn1a7R|c`+rpNc&2Kv|wF2fNU-psHVVlKjT@U{e|E5#gW9wzHyIDyIm zg5N;vrvaBlE`j$O&Zf0RnMG_EVjsp6-zR!ypqxs@aP_Ai8BIt{tCUXKy&gJ$#CV+C zMo-Nr62N%Jo_L~qtmVUrpdT-mCbiJ#J#PS{Jx9pN{uH5Qj-FVE3S;DJxxJ_ko?ylB z=r5o!Sl%thCe=ii03NF3cJJiroAxXxF%KNrQ)Y1((cxJRFyc(i{)~q`MygFt(XjYg zMwa4)TE+(Hk$P>C0X{=(5$3wo7@Ym(K9iLgR16_7@WhOB9z4Gc+<*VNGV0<}qvPtb zr|wCb-1ADNa$M|j&KIvZhdFHU3P?uAdjCia%!Ps*92mtea7IMx|aiSr{!+8cPU4@ zPnfVvfKxg%r({9~w>9x`T`p!GeZ3cdHI-SKFKO81(TtbaCS3b@F7YwP1XwpaO&qt7 zFw2xoVzkAjI%Fc@EhI_La#hfKCB%JYO4t^%>p`#Mg>vqIU)Zf@QL~=VLvLDlheJR; z{k<-pAgm+_L_)c;V~DoL#ziWU2FjQeRVG|?h4W;@yo#vVw*>ZkeyjU{-^e~& zIH{p3lYTHK?K(mfF*KD7Kp zqD}5U)thd5uV0x=?Z2T(`+ zf*LB<$!SZ{l^!h8S5KekepH$XO-?hfDvCjOb$QfCx! z$j%Zwt8f~%t7>xSBK)cACC*i$>(ok>Tpj?UI;!CSmZzIel zr^?+}l1XFmj@$I1?o<+?i%QY(@GhSPNCHGZ|-w`h4vq7YQGF+5e?3Pu#!MAfFj*ve}c498*zIi^IR!b20L6Uq6b zO+J+|0~)vP{wT+nhSd5vEJmC_MD`<2gj%#PM#c3V(FpNy&(-!BRjdS<%8N{1NX`bT zrD2%%$D3&Wo1E!Kn;YVx(oyKFNb{*FJ^kQ4mkbQT>7;p;Hy5w@xR;Uqoe3`Cd*3@u zw(bLM(zP@D<9fQ?77|C4ZX@jsK>1-zL6z|_e9zu!>F z!VJ)0{;$`xkpozkAVP&K8@K5_!|!zV-T>W=2z~@K5IG`fs4cR3877C5pfsIcz57`B z5iwA5sR*x$0Nqq>k98qHfb`7Y9_T^8#d4I6hKqP~_vQhkq_o?l7nn$qXIQVH2V*b8;S zqE%1nG5N!4=riE_fDL}yk7g#Te0FTqgdJ;t)KXiPS{qrt*tv~KLI&zq_`LH;KtO`U z4O|eH5oZ}-pz<2);P3Jn2KWELn0J2u%YYvRFqV=TU}o1Az$kJ2Urxyxu&~*{(fHqc zS^oE9OH!6{LQ%osWuIrxJaz6f_DAd|h5?C>93c=zfEZ)ok%m!_4xBS|nst1>Ff>Z~HpKsr!24<#KUf@7u&q zUH~BqF3g@cGmibucevjwqrHS)=1n2EUV)g%Z!1HH;IilM3fS2z9Jc!f0;WsLKJTRh zmX@#azf>U0CH2Sss}~LL*TTb-4<&oVeFz_n?IlFJR+|Ggx8QOg*liR5B%YJbe zH-=OBS+gh$>z8!Qg%Y0*N`0!(0{f)}sbGrmCwItuPEsgx(di`nc-5Bvw#)-&zMZZ> z2C{GuD5hu$sJF9x)r;c_3mAB?L^tlFUE_QUC_byjw=8o)TCj7zK%V{mJNq+QS_O4L zQD6f4pkZOR&Q~T=p$@Btf+i8p?k1qQtYa-BS_OsmymoKU)rPFZ`UaLUi9^bO`EG%8 zdj#W5lJ~i@gHGpw;bMpKXjzHeDRF*gC_-0WZ5fwZ9{ZUYjP>2%SM%F+m%u zpiA=%alSJz;&O%3{`o6qV<@l3kA1t?vjZCu!-v*%U&^n~w@E_(T8jb&;|9Ajum>e0 z*`)msIjVVGKW5{AMRX zwLrVB2yZ^_@QEhVvi4Gss|GYu&2JG0fazWt`WO>Hvh}IOnq9tK>qpizOzl!8cok-L zR6ZkkH#3SyE?`x(pHE|o>(Ol;H!J(Ik9qA-TJ!XO%S!kJHKO;tX>PoAy){GR54klS zJnr4LMOn+>S4f|9cl3fyqb|1F>ivWErrOV^D=#*mf6BVzy6nqNbmc-C$%yzIu;3ea zhWd1lL4J=%h=eH;AEj`FcOf$%+XR}n2x4T7Aab!!l@!`Qc_N2WftTrh6xQV8Gr789 zWeoe#Toa^O$KU_*2NBH2duB1wjOYg#<}AFJaU41@FDd3Jr34gFcW)wfJ#>OiQ4|_q zpywAYK;y)Xij#lxE*+|1HBqA@g64xM{HcNlmpKAAjb_schP;J8keMUoV*5WKj;V;m ziS8eR3IHK{uK!2pRWvcPZ~{~?OpO0`_svwDR$3QD`BY7&H3~EW7lH^vU_j84wFZ{` z4T41=YavaDBAs(7GomHWz(hQn_Gj)ZAj9LF6g-tOa5UFf;HyM-d#r9BL711*t<3ZJzI~uxi)F*VO9?#IJ~Gc)f1jocp7<((MPJ* zJk};7YdKrjqEm7b-H#g_mc+Z+m9>^60BK#?ZWSn|6VB;|CV6$nDEjt?l zF095HU~uEeI{7w;6roX;&Uzq9Z}x*8pM9wI=y#H;^kyYqs_4(zHx$~GYe&z~gQUCi zex^WBbKeL%a$~2%*e*fp^u>W8Bphtq4N57$&~MKU-&!5$Tyg&W=q81V(5CY zt$NR2P^!~!NE`^gJJ@48?_|u)PCO#woPIR?@(R8peo%xkA@zn+F;bp>VC1$P!YmNi zcby$q#L&kIR>#V}z}uXPL~qX)0H3*Zk*`TLH$eP`yjqa+f^+91OL&&m;9YCS3{mGL zC6sHrJvUkQE~EjWlb1DElM-}bg7`F|M0LmM0d##3(|A_w@rySPCrci#9>c&XvENIo zWT3c0?DR%A;qlsM0%kB0Pms?7%sV2LER%@FKWlgGCv`xA=`&*>h|lXd4nm zENP*(=u1q#Lx;K!NTITpV)Y8-b;GR_)6lH9p%c;X2tUJoG9re3-hB6>*q`|0zoL$f z^ffUBkR{q?-_B^@_wjjy)5nUL7vFb;Pd{bxa1iMzH>`Txsr}lw%&pwyr zQn%ngNbnG+T;wGS<1%79=S1+IM3 zD}Tag>Lz&n-pX&_!6)K)8U2MOaTk~WEui5AdqW<2Lmv0a@JAvTv;L&mSqmKd`JUYv z4lefg_sg;k+QA1dOJ1FE3#xSg(YO2&>`JSQKJ|iHxBZp@Z&%8KGZd^fS$(Eg+1exh zLgbNMI4}a=dCrUj#CxCwiCfRH++Qp7{}~S4r@b7V0G#j+>A$IW{Z~#07^eMaEdMXB zD^x#s#WBJ4*}5=cOCK$)wUEKKv`~^}gY|2YmzZ0tky+dMDV5Gr+X$7gX~8sUYMPb? z8|J7`8tNw2p%Ual?EfL~T}zTT6rbkU?Ir-f6IbvHir3Ima? zdz?FA`|GK3`TIpL{r*azG%Nio2b-{(Jy|q^vjnCZ17Zk}u;LU|Y9w)VhPJds9i<3u z>%_7?1pG(@ExjzU%Jf*2-1xW|3XgICtQ$wL*q$3ow#p4Bpq23^Rls^=aiYT140J(o ze&Nzs)(K}5(Hw_qNQRc(vIUv4-@)Yyk_BgZ_^@xO{k_=RL6Ff<_t2O%_ULS2Dcu~>Sc^j$MM<51D9W-Pd;SaKCpnDj zc(mliyy^rTPtK}gM^h}!#WjcPa5xrTH=-=UCW9&WG zE%!Q&EG5{yp3+hgv>@W3}fnnEFry2??RK|-z)yqFQ$wqWvz}y%wlHM zxX2_2F@)w#4rnOxMFp9-snm5%%nK^66e49UKpn6)|MI7&aLYg@Pnc)D|G`(9hPnB~ z#+S>f7-q{n@Z54W7Gx0Z8Rt|NB=bKgd&lTZw{2^*DyjHQ#kOtRwr$(CZC7mDsMxmc zRBR_VYwvSgJ9~ZStoz-cZ)@+r`OMkJ7`>0)y#|q%wc%9bB9&-Iu$Ji2Htf`0ww%+u zwJJUur5p3|xa8$DG=#WFH9E|L-)c7{sZHOWVwGwbUVQNoR8-*GlwJz;QQMUqb&JEb zmZ@S(JV{HBfvC>aB(w_}NfbRHWa@S(kNJb}639!mD>q$)&er!)>K?bW4k9q`1o>T(nJc@C9+453l_0n!U`~Pyy@oe6Hx(hN!2ycO>g9AB z7f9NoW3!TTkOr#Jar{AFmkH2StFqHUR6PHpPePrwK0v%ilzx^KuBpjwxT^BYosQsJ z+Mf3eA}NL=B$(`KI-Q(Gc3sgut9NAf@KRUE!I^44OYweBCC7Z5Q``=``(ymdaE;?T zg9bV_XJ4C*My49MGN9Op{Bja<9#wjU^tXYOLiAJmOF-ek$<6HbrqbA$-f$yZy2m27 ztcG$q5(tmdHN*JzoffTG>fB<%X3}iYU>$l$h;uNN@e2J*X;6~?QCfGRkreU`kWCU8 z9O>23q@FQlF-Il$5oGS~lpIe!)wLoUd#9C$gLX0OBY_GX>@1ouJEA<-A59gLA?eRT z^=X$MRH0X99O};<0Xo1+EkUO+DV1w2)-)OR``!H{gZX{9KnV_JuF>%MmfITcj?zB-1XD zo=P1oS^@U-cvZN`;vN2`Lq>`*&K)g+Cwp(vulU@)B%Fm9o;|!~8PN~=I1A$UudO5t zJYO=1Zy42h-t;rI383b)v%JNO(`3+rqX142w;HA$vnk$Q6llP3am=nS%82kCTo}3i ztFVA*GUk`ft%T-#!L#Vk*u@9|zIBEUiHq~uv@zK&-}pZ*(XK<~CBLK9xyg6f0tHZx z4!5Y;R*u>308!i$1}HU243(jR&>P%m12TnvV*zjFCSq@3QjmxTzE8~|A6x^L;o6Yo z1)0*C!W(XZG(qkLuY#+OrJWq>n4RIEJ;0vXp6VpLEbRmnaO^8sR?*r+hli#|4xy8| z2N|&G8q~O(m{QGwCY&F_eyz`2!=vCH7cjb5)l+l3P@y~UGag|rU6L8vdLTN)(kjOb zN=29GbZ<;>-kF`9egsccr~59Rmo3pU+fp?9ee<&;qe;giqzmCOcmswE$?|-d#RV0P zNRRPdNwtf36I+No>%-%iL|kV^(u>71ardNpYr&$HS#NM4>4CDgUn#Cv)bQ}4nANW+ zWu-PP^C<9Q5MG)67fg$!dulvw(@1tSYc{#aKBT9iZSo=+q)NhFD!p}m9h8ijG|K#? zfV|BQ9F=3r6Sp5qPGAM$aFL+oTl-qdliEBJanl31eWlZ#(YSq7SbY901sv7-`Q2`U zQ%Yg2&o1iJ$@HnZnBCc-@7ZXdI;m4#^z&VI7jKyfhgQkM&x7 zqXZ^pb``#xsQ8)&GDuJs5b9C|(D}b|KI*r0B`%+9@B01{F1!w?&U-de)Kb^CO0G4_ zTBWnx`JMh`ndid-NATQL1XF8AWJJNXU$BW>WjxlD!=XN5M?8npH+2og8DiYx#{lvt zV=y4~`EPYQDU*KnUkMTMUvgaBf2`a8T_r>*u6@FZ+7#lmUHd>?kuNd*@)DcVMmyBmulYJS#A6HTu zmnz1Wva>tCsq)J8Sz-cKejLV1@b(4$a2zjR5`s=)q%>_A&!LeZ%ZMM)QfUFqgbx`- zaWFxvpyDIdQ@0A~iOfqaA^hw!WLMbSkG`ceS)RSZ5)q|oo|L+*qVc4gsTgLcC>+yx zgP2m&Ty!YM(CEFonFzg1Q@+Thfu3qGsxp*V+CrA@tZ%CAzVQ^H7wZq)p#(~ zZ82OTt!}myw;S5pgq=j+s!=oysZ%DRl4Dq42Nd3YHg%Y-C2!a5vY;o6H8f!qBOg<& zfi{<|(=B8y(h847x;$`20`nqdZtER;ukY)(a3P4r=Utlyv8bo+HfN#d<+`__SH#!Bm!oE0fYoIULK?9>#ovI3UMPuLn8P3ny0|iA5{(0FQ`=a68Ae|m| z3&G-W;$Up;Jd zdlptXa7Kp!@?Nk{at8Q^N%#lF5b0_V_f1SbPiS^GuWgp)b1|(b8y#SW@c<^lE1KNj zI=kn>OK{1f4sCLpgP&wN=L{~RkTU5#Ss#zRe$E4&uA^^$JnK*U&5+FGMQ*yD7Ybb+ z`1wV(4NeIzVgqJgk?s_n0Zptv4c(+RUZ1u>@^3s`NXbxSw0_b-AXT+U1AO0<0m35F zC%3u;@IA0W#^bG-jUdLLUdN=|_!D10wEh4+yk2P;a>xO7f6mNFL&Ar@aRS%FFrWBe zt!mF#mCXO&K6!g18UdRx10)kCd%b_BV^~_s>e)N~D=OnFY}C@s@Gnoh|M<^;mCi~h z@?T=gk6_EX!*8uWB1vEwf9UQt%6yzA)5RpFdjvDQ+q2}LG%#7t2(g|7X7BGUe5|N{x$hD_O@d$ zcIGgPgfo~Uh>F^1fL4nvlAtQh7m4iB;g-sc5bhKXX_Tb@2l}y#{kE+fa3C zGMn@c@hl_)pq`b3I*CjvLJ3;Pgs1ZKODcKN-@?9qxUg^#On73eu0%0Fq3QQ{p$!>o z^{|yi=Bu@r7s;24yfEj&?%Qeib!9xMdX~^hG3L;HA6p)+GVMluZ4Z~uFbvV!^5G%J zloLSPfgag3BZVIiVYyq5R(wjr-#;b?eMmqn`64>jTj<;;ZOyMtGOUXrZVX$Su@Do7 zn>LY*hVD92UnzGv6i_00eh&u-W3k4SqB97>uq&W6SnC*%k#a1oBS1FG3|!HuN>;0| zU(?%^ z=SUx%U2UTEY7>%zgDF!3n|XJ+twh*@3ywq3EZh5u3X!qh`T8-kOqsxXI+{bH|il zWsuDJfsvV;-}C+bPJ)mZ2MGUIRwRu(@u!oofLQS_L#TLTx5j}}3%_(QRNk?*DRI)1v*0icmr*1?QLAmtiGi6Qa08$jyBe2 z2LB2Q`=2hMqJ_--UsmJP8qQj(n=pKGns#LTUO769@O5Djh9LN&yTuc-U`^A`%yY># z-jAQAKzK9#INLGwnD`{y#jxwqo6`M(S#S8FXCV3>*`uC5~0slE_uRX_d5maTcCRQl@R5kFpMk0Jkzdz}&r23l!n=$lmKOKn)1PpFuSCJ$n8^ zD-%z7ZP#>j_oPb;6JzXKH_y-A4@0?9?TE{bG&*iY(rFVxLc8z)3)`(F5m=p8M}<_n zc0+_1)u>!-m6w9g51ji>QyhxEiNLb0a!uF1qa)^^{Up1<3Hi9^lXX(djU} zkuMtj(Yr9wlTIWRn3aLr--U2qNQia>9NPQd4= z2=(mdEeP#9mhYt{b5jtFW;{|7du@NpR&tpAfCUX#>Q9m6J2;n0yW$mCj0T}y%!8EZ zCI2lXDAHZA?{4d)IXUUvcMOzorSl7U85UC#44jQM<25(gArrK*-d)@zMU zCcMsLG-&tphJW)!Hz~oXo$5I&Wk7wnLzDEl2BguDxreUOy{vVk3J;?4O`-~Q(FN-n zVD>c_H{YC~fz6688JQJOV3-wHq^IVvN?0V!Zxy&K7W5$go`W-L?EBJUiEL1<=PH+L ziPihf*c`jrWXCTnq-K|lWDvZzQn7%bDU^2bQ|$0(i6NWKTapWnbu5)B8H0H>PHYv)sD*ykv-i$gDT8Qbisa0l;_T-1!Vb06{vac`LLA3u5y+4R$Y+qf<*Ees00?{}!T9;liu zd&Dn31lm-<14AZJ6tj@adt09IX5`W$rd^hkT+MN7Hgm#9Z;&Z4xkN@C3F4rik}3;#*b%L$v6&h3&Hons5O zGaNo7n;c1(O&tUnBO%B3A>yqirVKz5$4I0SaL6RoAyHDE!u zSY6`RfLF*SRlVq8`SLZbboC~0Yw$O*1_&s{+8O*{1Nr5T1ktHaR$P6owy-E+Or*6~ z(V(Y-xKHFtavyGWl2+!5M3qLt4#s6SH(ybbt4yvg)cY*R*I1FVV>vS0+sLLgR?F1n z;1p=9d?Ey0h?b%% z?F7b-TBGDzRbm#4QoFgGNn7Bip}_$FMLg9^%eHUR>eyUwfyID)eKZE4Bc;RT28)wI zvyDV7@DaAeapY{|a*MUO!-BqWJ@R~H51dV2qEggQSz*M{i*B*jwArzsW1ZjXZdSX} z#;{p(;ORM6%cDF!_Fk2RWIRRvgx&m^2SiYJXWst$chpSce7E3b;0bf$`B9VD1mlH! zuqayzwi#yzSRQbDn^EO@3rF|&w-AWg-Gc$)eQB_!pbI|T5OuH5y7jBi7{5>2O_<)A z7%(`ec%$Uj-ggzA__X2p0NCr;;rUm@b|h8q*uHjpexqP1a|-i?nl;K;SctRl^^f|! zEeeHYNee}eBy$>@+OR{%e5fc$72++k-ZbkqmA@XWLEcSYQ+n@yfo1O|f)xv~gwsJR zt@bvwl-Fig@;i7|FJYsSy6fhh^|+9dZ;J{=_Un4sf|{hQO8J?gs%tY5S9ms-q&Zu1 zl_y!4cm=ziZ?%cM?P6c6Ko*w8-I_4^v?}gAIY?Pn{NV#P;bM(}g7172{R9`7Qe$7G z?y7c}Qf}&IH*A28oq_!Dr zG@x~u6-lHIReR@YFV|~=qx#KNsI1i;%gC>iY;{t!9Yxe~%hIq~(Z)a&@t%Fcl>XY( zS-N-NeRZ1%J6mGl5QPv>1F-xwyk=sMx*nF&_TkLJd2lhfu(0>Uh&WGy+tGRVxvCf; zq+KV*wGH2@8+wjfpO7(JBr?a!V`Ph87>TPu)|F**kENQwdt<|3$a04nQe|7w*`1R) z&I29)!Ga6IGE?h_dIQ?Fnb&?G2JvvTnHOSERS4a(wVg2Oa@D~{BeaEq`DaSDQ{Ynr z@V5I++ZZPhBUa!zajK6($5-d)UWF%I%7H`+wa#FmCrHALlBB~KZeT37PchqHPG7wZXQ>Z)34ah{cClafUr`hHq+mO#z_a(H zl$A{i=SA+3tsUaK-Ipajb4J{wo-o5m)?lj#1~V%KkO_J!k5P-6YdIoqz{Y{}up9j# z&V3>1?$BvnN`>fJ(=mdbTXrDylGnOJMUkyTceFQ_DM+gPOs`Y`?_mk)sM>j}6Gb)m zHPAKB%(naY#KO*AB+1s>$FJ`Xa{S()LX*0R+E(E+rkRo(Qu&6iTRS)***DZH8M?UH zXUINsP^>K6y>FR4Odmd2MHzW#IT?kHaz)EAvc_a6WD>T2_cN#Pa(DX+ZohrOE!{u9 z*nefx{!d67_b=gL@TaO(h0AH2y$U zNXiJ05W@8r))pq2L`HS}`NG=gCX&_J0dn^fvzerignHrOY+|{>4z}a+aI-arV98y? zA>uw&BHh$d)cm~o-}c) zHoKI5ku%#5{3@hSM|jbxkVq>74o>26iq;C=lgq?#lQ2Jdp(R0G4DJZhW}9VG1AXE7 zg;qciV5dGr>qj|YG|Og`0LhR#kkN~OlUg*&eSc8nS9%L{NJcGdsM;&eEE36tSl24^ zpVXEJE#3_4*O`TWHSYg?@$vsGlWeRFob2t5tR3Z@tiNdGe;?t$d-Y5u3k3u*L@q8d zRQNsM9`Lqy{5*bMVb3ZI<&-FO@xsJ-ktC&4eFkP)*0fxAW76k+9sBZ}3j6X0u(O!e z)wu*zF+!pzy|&Bq3y+s92OIX!kH-_7Z_QUFzS`^KeS=gC>&$&v7(6JkeKgF*mT%^J zo!!?~G&j4l)s{iII1m|SR-;xYgtbQ;Of=yXK+hmHT(Nd}Nl{xrKvOW_ZUH61J-?c> zOwl~_aAiHwe~7Dip$Ubn#<15In`6kIVhuqz+OP79wCgEfF!<%7h{1X|8U3t$F=qzv zW!*TylAeG*o%|K4U%0j4Wunk#lTT}FcpA$f;-ej+AwI+*IA$fmBM0=G9YPsU;hUNd zo4t`lc%|{=TVgVp&j)Q2?G(=Z7`1(EDVM z3bVl4MIzkAw3H3`VPaGC)yC^s zH>_GPh-7DfngFMx=RZaED_#j&d=DW_n;9R#D#bDDDU@reW*r~Eim z=+rkm7a%%%hSMfxpin~G*T~W{U|!4+T_2(8UPXDDXADA8FmUJs!@HQj9|MQ&xD`IY z<+7Fol(07iiZ~ND1RiLt4Q;awMlkC>co*G+I_^oNRVh$wwD-U+k|2b>y@f*Ynj_5u zBALdg#495@I3wUz2M)FUR>F-pQ)W9nj$`aQ$8G5Avjgd<41R8l2bSx+#a|B{$1uRe z$*UJIqb1yK4gbzarM!%vqb57Ye}&`1nSGsPu?Z-Y|JfH6iW~w@Oq+J7*2^SGhYw5= z)v+n>+7`DCow45wK7#e7XVv{sqIe2(_~~L7tF}@WQNO@&&;Q;k2h^11^syGTpzyB($Bq(?w zVW#A&eC4b1qu16W&V7b%%me1jraR?xYXmk}K>WZbQQ)9wWA>1{Bi=@XQvV+!UhCDB?lk`!7TOh;?ZTeTSs zG=598SwnZ<#W5I69MlDy4IqRi_WsH>+sl|qr=LyuO*bT6M6yy$r)N@ktse<+mCPM! zpz5ndUQ=ds=C%@#Fo-Os3{q!EqBCLiw%=9|m}h zqOl(`XWgudYyOZF>t;=h58Oeu8)6{4d!bCpkRo1oIO15vV>CqVbwD8^Qo$N zNYtR&xwtw~K9<@4v#vQ=YrOZ&sKR!k$|ll>0!dZdq2PbWcfNHsZLUv(X+JXV_$dNgW9ffY%6IB)!mXs!Oc6|vU8NWD{st?T=OL|?*3S& zC}EN>2x(>#4tv5+(m%b$jWn$(`p9I6%A=*FatUB0Nx-DPEavuruEMS$hTY>Nk@I+% zKt>(NZ^O%p1WJoaWx__Mg5fL_;~b3LsUA$jb_^XCq)<5B6(dt*>V^he7B7v9dh z`J}%<*fyWcJgCqdXx>2{@+KcwZ51O~2El}yT=zCOqpkV=gcHR#J?G31C< z>{~-m6-P?W0yW~n-#?LQsVvbRsDE5WGS-xqXh2^3#*`IU?Fg6`Hzz>VF}0B5K^5!t z0Kj`>QhP&YcC6DtsGu-HvelLBmaOjejU+ZKgxlfn1x zg*-cr<%l0cvO+ER8mN^HTF-i`IIs|vd={`_W=#Yr+0vLZI;2=qfKBc{pGu|QQ;Jx6 z{@^4+20d;f-{bTx#i>00@ z)l4l-U5|%Z>e{c=O2S}q)RGJbpcMG%|Ld2FZg%hH0q4`9@p%uHAywz19So?`dLR|M zwSeM~^Lo`D^?GGKf);yQ;c=Wk&x_M~2$(-U=gc)#=Nw%Wj+3Nrd{{G%MBZyy zmvG=}PDff55fI5>Fo9KT+TOUR5aHo{WE@4q0MBR#jDKU1#YR{~B`;;5nL;%oKgz2DIRW#FVd6 zqmNCYR%z5|jg4M@U%@<1&}+VDDp5dW2lB%C%X_aI9Z+jY1Zqy8T*k~;UfCjN)yIr9 zK-W1J@zWNQ#0&|$mu%`8uVNeKIL`pPyv%DmJ8$pI$ zM#ZlDVL=vvMSSLslX`~Y#}zz8(W9Kw6AaFA2%IoMRKpOO(!)GqAcb~@s7Lqp7h69r zIkcuGfb*{!IULnyFb49z#iX7|SmZ~)d)FGf)TG0+V0u-5+A%OAerc_0>(0YRyL66Sz1?3oh==sN}o&SGuU8*%SYD$y3O5tD5}dK zVOpi-PH9#qQ{XrIRJ~8ny{&@XSzZYAc$4vPb@7{|Uh>&p9{q4Tz(S2>v%X?wSm}kZ z@+^V+Iw9-fnNUvXxxkDS+p>3oJLi*pRk$5eF#{+lulu+9b}glCI#LS<vnd{05A&bMa^}WYhOL$X>#cnaa7gx z+$Tx$G)|M@rPX`tpyW*ifCE-(zcTuL;L5@8So);spRUUX!mH1uX)-?z$7QXeSan8N zzk)l6@?*A9b4{F!xh?~4sK zm|sLw4MiJ*46uuuDB*YtrYjaij9>l5bsG(QE;HCZI}}!Ryvr33LZ#A>-uUf)0eg=Z zbI)B|iCV@LLcXlv!35p&%KjOk=fr3O!3X^&71y(d!PKsRG^)9JXeF}3B5#PRlklJ0q z^UtA>dADR{u9#$-7(X07X5(~y>*#j?W1~Ev3o^o>_*wT=TK5ksU`(Ng$Hiqv(}r|l zLG*S10kpEnj~r74C{?NaGBke?r5$H1fakEAhBwY`_6#3qY8ZMaW}&|p;iF(r#_Q9U z!a#V$FJ+fP8x>9erEIkl1|3tN3p^*c2qScf=X?Mh=tJABkf z8sQ}zO6*IPXBVFeb;;j3>9)F!9eu@ovQF_J$yKJt30Qa8Uy9uBmo%hEFl=AH;+;N# zP=0i;XcvAA3|Xdu}pF)TZ%zmMNHHbOjxu>fqy)jG0R=$kK}J!1eS!LTr7 zEXVGch*#@o4!pe21s}nZ;+yHFL+otYhWT1+d|65LE!0&9X_8hBtGo zc>_dsgFW!@NJYlyevw-Dh_i+B#Br5)3mCs6E~eC)@&lb-MXK6SD)$3ZPP-w)D7@t> zI{(6t5GWi{Bm&RKZCj$1NLw)+YkkxC&zFy-@4C|9Yjy(u)rbFgXRQC@^!ld*V*FH-ui~Am+-|_49n??YNZdi{aL?75~wnC9BPlL0C43$w%w@F zBVxq;{GHWy4OD$VA!32xeeyk^#a2qj(GZsWb@o>gokVE{%V8Wg z256wR7q+|sZ5F9AiRzgi9C7F8P@k2`s$fQoJ@pu1eXa_p5u#Jv_6UcO1XumAX=ncQ zU{LG4L7ke3Z4vb+c0r@p4tjY%O-R#}A!&MY3uAvxx9pR~=uc0i&iTi@#XsaM`(Lca zv$cM{AA$byy={WSN$}vqNwIJOCI;Yj51sWP!LL533-BB#JfypfRW9f6_Yvoe8GtnO zBa==NpN~$)O}d#Zh|xG!E>+;?T!E#W7W|lKCL)~#okW}-HdGiPU9zksADjc$?+P7T6dNed3SUJ-hzIfI z*@dsyN+=Gl^Q22P1O(;Mytt|L<6WuJuw1F|5NuyWC{0UKvs?(449D8p*!p3`EvxnS zBhH6Am@dTBXp8><{ODEG^J3~{VHEc-uL+S}SRvm9urZ0qAD-3Xbn;uuK#Kifmn*evQZ#Qxrw_3eKkZ9s8uQqS zNPet6CWoZKQSXZMfyYch5N3G8@pRTRKKYkvBORkV#&qe zM;I@sT-euGN>)v>s$JPO}UwIW6C9YJ>T`hWnpsQ2IolcO^4ghGf!u>@OC zXDQZ%7gLu3EV2!{0V?N97&oq4KD7>_;nN{5)Hk2s0q-m)`cZlh_V#lN84N0hJ6i{S zZhP4hez`5h4TU3D!52FN&9?1}9G_6qIet;u#XxS`u(h_C+}8q*9eb9%2lD*d*oeSSnRaf1kd8Ka^_Rab;N@c0+h(zS&gO!SLEF zSrhKTplI50M>t>(@6t#;H8hdKcY$y_!WN(Oy5oSM-K8{`bqh5+f-}w>YT2vUmMHTI z%3}j(p@mkRg4JDQK(t?EAiP;*V0@`C32j+qP?VMfq|rkFA%gEAS2mxl*pj8@9-RhG zPp$h^aV$H9v|orWVV$=bG>q$KRm7DEAEj9r|3)sI$B$Ddl#VDci=(ANJQ?{6o3%0d zlRCN0S$jrN?7?y7aAmpRZ=@^iesnk7o|@NBh?H0xSuMJ0b473nNd%-iD4u%<_n!}M zdHx)|{MU+~>es~OAF*H1$XL(G((!*`8~;57{jH+rim-(AF+^ZNY4#1HL-2b>A*dl5 zY;58HC}b+(@-nX+B;J=1#q2URWz;+x#2#a86P`v2rIo3vxTu6iMxm8x`1if!3#s&? z*I!L1$rrB%S^S?IbyQ=?m}s5VFuy!5T20zb+C^wOoIikkPp@@ofR1l<;eLk7bUm#T zb6>(|uX(YC3TUtUxkOBPAa*Q4(Y$yb_h)ir4r*}=+BQF}=WlC+JU+`tKa#n2p&&i^ zg2{PKgJC_s*1*DmAM?>*_lEpf2tnec+{a9l62MlvqN73LJ{+*)ybu89z7V3~#silo zeJX%)dP?+{p+J?r3iKx-QXD{#%O49hB_q&V#S+{26Iy5qA|sLdd!^M26DYeL&X^3%c(F#Ih)tzzd4~!6eGWO_n0hNgIhT#QqSv|ZL8r9ytyox5rbliMJ zBHja5lU+JBpLv{YCT^|3{ES8FOf>h#@r9V&ZAFvUsyCJ3b**fBa)G7H@-cIx;wt`W z(_7!hSVbx1W?AJW|Tg=|FHY`asr85h$;7@_`2 z%L?4m*=DpffzfGi-k`)>ZL?CSV>Oc`L8Er>xu039qk2CTf+w?{Mwf(tdDa7N-V6<==x(O7y!d`j_% z{6Z$0+~?r)sB~;JMM4G&4)$}hA&vekTf2|h^*pL5P1Gn{sdu(X6d(J~>{+5UR_GA_ zYlCQYCau6Ks$5CZtFgpViCHDG>3xV@PxKL_`XZ|qPcNo! zc{T-}3#P;k?erlxH%4QG%371Eu}g?Oi279o}&@y8gNOYKOSYY)f|ROE3DBl!Uqh z+zQ1@e;#!7t+bL4_^%^v!%HQTq=DtG)IhQot|I*v&J67;Q%X$Iia92%L8ihUWts_+ z;|i)Q&CP=lB0Qz$Ce}vCO^YqlJk5?|N=5mlBqLme8{5|X%M79#L5LrocRAP9vxl5K zf5DH86SS3xvPqtmw4{{83a*A9v!G8x#m_mVKq5p~Gv}S#7*yw38WmKQs0K`xgX@=u z$39W^*&|WdW`--ZNcWf_1+bQHXN2QrsL(~yh?Weq8%G#FPP37BhC)Gh7VM5ag@;$~ zT99`}+g!y>B^iuK;~J|8@h>kz*} zWtA?6TS0o4?k>5i388osT-<9Qb@g4Qfe0<;hVyJAxqHfslRf1JDy6W(PGhaGIC~UN z6X~h(>46l^BXXLh4rbu$>Df&I(q*LCYtidolNARO;K^?rVYp*kC$xIk$H=m5r!6to3!q(Fhqe(*s=8#wET}6!$YdN6@3#x!9 z%mga93}@CE2b$!-Ms^SYd9ITiWt>D~1CPfr@T;1{!;$(nv*FI@k?xIq@Z(UqQ=+K8 z6tfT~<+0b-+RL%S9z?;$9Q^>AGAx^K| z>N}m|SxJzF+bQhN5lK&t;yCL2Y72=bN{XB9QF)N_D*8Y?vr!T&xfKnNBCq8I2vo`@WD zY^N4r>lE$nXa|@3E=;GUwW=q4FyC>XAd|k5+j|x$1^Lk@R#NgRT9Wze=TeO_ux?Z? zB9cDokCtFz<>=ta(UVxlB?Rgo@e7rxwd&xAD|AMCEBr%5JL@VHuql2>W*;W+W4cFD!<1YC#cLl-3OQd8K z#y2qdCh?zy6;yV(devvVz4|G@5E>G-akls13y_<7ca}_3^pDGd##Pp%>*M#JAB#!$ z4g@|dEAz~t!I(9Iz8cf|`Ai-S_)t}OkkJ9_(`2(_>!pZnn4Hj$wF1rsVN5m8Pd;`Zg7Ngtj ziTHk#@9KLB_lz1n60?K$%$k*eGZgFD8@8vh(_=YkO3^eI^d33#vja{0Y>;X>jP~6J z;9$$(Oh@tyj=&p+fH8Yn15E^OZKoC!bx?a0VUrv!jwBDWAPLRP1|#9&fzAxeV0M$w z4E=6Ti)CIe4?S{MyJHtuHRCoRaywV5 zgieQDKsMGP;bwnJv)z`oCl+?EMoV%&LbL5Tac4+*k5l$opSNu++)WKhX1_xJRu2~i z$WZG?`03VHx(~3oXW)w>xlf=lj!_ck``@-IAkY-%9lyFQ+ApS>`=7h5e-&&0AI-9& zmBJTO=52jjCl-oGE*lEz-w;GvVX8zJs%+XQ8B8xl!r-k>JpxU+ab|pR;XF$689(Ve zejJ&-K-1|}`+<7jn$8Hj3W!WQYkkb{m~_a#t$Q8g+4=*}m?5V$_0F|xQLJ(^+TFl`mzZs1I3Z$=~JDh6nU4Kv}+c*)R{@hk| zH)%a@N-C9oh9sJ?(#fo5j9rZiIYSV zL3DgV1tk8|J2O{hEV{TU@33k%X1_{etg%if!CJ|D=org@e%(dX(p-Saa6cG>J~5`D zf=k(b;}LyQyTIu92^*o~(v7k0t3L-;9(|QR6=*-SFnVm6)cz(knD3CDeuTg>Kje;v z-IP_|aj)MkA+Co@j4iBv%&4>gff>Ac&B+s4O>{Is->%2 zzIUoLJhr&WI00EpR%c$3*a$G!`RSzbN>$h=&|8!x%A&9Q?A|WF(By8U7xsRIzjQF0 z7?TtP?YHjyJ5N)IfWYsMX7QQS!*U}|LlzE5eX(_6w9M~|)uh!<^HFh$1@T*~`e;dl zbB1%gLe0~0s``}HKsr8@^@fYW6i8LQS9re@hacLg2mxx>1X>D@8Lqk>hsHB^ud@ft zef0y@XrP(&M|9z01cNuI{R^DIk*I@A(VrmWH!TrjP>(;7KQEVWwRo_W!P>zp^19`H z8aiqB-lHi!d*Xh#I02;vPvNIbpqO>w0Q;SK<+n)Db5t_5 zmnrS_6WrJo$(#4E9Ko;|Ib3A?JhT+6=jCT>kWJeHIwRtM&Tyy+DvD3GSlP5xnJ)r84Xq~>JlQWmY`Ie#jVR7=G=dVf$dt&$Ia4Dx+*zZ=;VLAmQbxsZyma~nldFrDw}}7)yyhkFclpu^?6Z)ZLZ(Xa=XjSx zbU#IWir#K?R0(<<*`sF~UhGMK5tuQRiKz$8m`6z*o)yPXw@rUg1Q6I|lxF&K56v~VP6&&=gt1*54SgFoh$Gmc zPDZegu|tao*SpmKnK2^_BQj>9^b{Zarh`cU-}fb*E~b z2kdSx-kW>e&(gGaiz4Z|?x0drd;I7fbYl!hmf8gnC}{X0R6{p0w1FXmLoQSFC1}l} z4D9vEeN)Qf4w@MW7k~}5X5H2LqM8D248?b$2>%tf!J~=r^P@rImIJr8!E*> zvL!>>L9CF7NUA>EPv8|Hh0xOze+t*O5hq)R0s%lRoi=iZ?hAd;ljFd3C46=xX9N)Y z1#1-!>ny(4io_ujgH`$@e;Zv0th9U?hNaq)8ta{olUNcSIej@N-9$B=W$RW*?nbfg zndd?bzj^lcMDKxEy=J}*p@yx*iQ8O$xs>5QtA3fGQ)SSl$!MAHX8M9Y^akB@2gl^4ixK%r~r zM-3w_J%-{jD6Y-IC&grDkyod8^q7gMNLT;|?t(*@_~{K5$pU8jK-!fXbMac?YSF&D zcHa__=FJp(B8R!$OvNX)Xm0EKNvZM{N@!~kM4<0-x4L~Tcb5mJXF3)@;g^kdIh<($ zjFLl3FdJfcgkt$5nm%-`wdV0fUltNbVzY>xWKSAM5?6VeZ%)Cj852@5BoueHV>8jb3%<0 zEY3bqtie4KG4ns(2@OpQa<{z;BdUkF)VS_%@->F4U3V51YrTQPf8hH-Dw)C-XuxFGGnl610g^ z%Pr)kg@f<%EAxcXdj5$u`gkno-OYR;a)$3w2N^VU^#fLvo)B4-o)km|bWh+;(A1OV zo<3EP3H9N7VeZVbyaR1tdFbE*(zqwkJLt<-JXBlwLwl&!v2{D}-dOiE4vfUmu=vC* zH5GkGi#rAu(I{ckS8k7U%;2NGJf6&nU3PB@zwS4cz~ds{JZ3JJypFX!!tR zzmxXz2;r74V&Zr8jo=qB*J>$pYd(#{L=s(k&xn@yctclR>jT7-bHqM+S?}Ra8;8E|`gCOxGtNoRH;u5Ne@{ z9G%1}G;mSsrG3?87D8VQ?xG;LUhNMdk2R)Rlv)svsv%zl3^-Map@kF1A=ayJAq`^M z@i50{+h?|8R*IJr)XE@nQZKRd!XaVb5H@z$C90xXnT4+=hE*!FWAnk@7)%fP-7!Q8 zj{5p7lz2cIrnVwRY{oin3vBK0a?Pr?WTQeHs<0qEcy7#6A4gr;6!X2&^bR7%RW|TL%HHF2!pEJGdcAkJJ|%sz{s;< zeX5_WV5l=@5R+1t$IxxmI`{ugos#qwy#w^6P65gIE+};M{Wv~J*dmexfq+l!|6%MK!|U$5 ztlO}$ZQHhOHnwe}X&O6?ZQHhO+qTiXrw`_u`<|IT@0b7QbDfRf#@cHQJc-XW(Ft{T zXOmbD4plZ^0l$&&iXZ|*kj~^;Pc#=E^1eLZ|N6WI(#pHw9p`PuxQFLtAh;ZAFf4Cj zF(2U(f&lnqs_1k;h3 z@KVNj$P8d0!nGiJmbSeQ8{$N&|H+UfT=ed4WX}zX3pKy%dfow@9);Pv+}A`_?6qB2 z-NtmEAcr-cXAu%Tr)=mBdkVn3n@uCoEB`u))xZ33X2hnBTQSVi%}XC}%a6%s3LtTF zN(_F4FeZUi>n`cXObDrrp$)R$n=H*E&)M4#UhYardHnHc(v`G-_Ygd@+AVd;0?5e1$imiP`i47${sVo?1k=o41OYun z`D~3bmz0Ss)@x;75pl{B@SDN_lhiwTLEsa;2pKG z!FBw{%?oSL#@JLjc=M0>48WZz82eHDi@*AN&(L)8IUzhs5c)pfXPV=XsE3zf-K);s zt}@ETxvxUKS6Jcn#XRiOMSpzv87EH`4m2vwHCmmivgYRQImwK0UKI`+AqJk2y)#Z) zS-9K#JnqoUNZlxhLpk^h4-`>%EkbctvB81WJ#PobQ^<5e-`1|EK@EsQ`xR1>JV&vI z7!tPxX#QF~&4#P;V=i((<)(`EYOC`?R`1}^(sTK&M3vCamU`YTg=$wwcDZA-|JwNG zj$Neeif^A_%U-LBXIFRBe!S+nSMrYa_|PnD-Z1#&MQksUag1Z)FvRZ_w!-tr_ty+w zdf)E@%qD(T#_FK74%YFs(-Zb_AnERrqYMaR2vBL|J3=*(VNXDZ&iB9$gQ~lQ1^eG# zDP}3tw*oa_QJLoEr7c6N^O0UGsbQcwsa;^0qzAiU%4R(7MxT4&{_1!>DqjQ;08R%D z_30DepB>K)Kn?nzUb1o3RR=5)cpl4f$%L|{Y6Bw*h!97no0l3$Vtxa|FF<4rp@xKm zmBrT<)RpZkD(C&i?;@oDS`1w|wNCbDvmcXw$Q8S`>< zK2Hc;Fjje25D7}`0dh*{{iYIvuCY(pBq5TB-UKn29dW&(*cT()IzXimUh(|uy;m7# zU@$fiFv&Nxn}-Fx>nZ9G-mB#_7E?kU`_T?{-S1Pd)|1|Zu4d?#nqt}KNpNod(mzpJ z4{qphhTlv&U5BMf?#qn->op)C&wA%L?k&6NXPh%CC5 z6jH;i*~htfm8sQm9+PxUe;Y`&T7*qxQ40_dTz`BxFS(M+XtiD>%#=D{$pbtDG@RUDK+e_9 zv=(o?^dFLSynoW|5HBnd;W0=7=uHzKNLTK_V#ho5QX_i5WoCZs`z*CZvq}6j)fc%r zTN^FaMGpzbRcjFnrBHZTaQWusfHjfDOWt)%TlUVQVz^cGfO42b_iE~v0wVD>z1%T# z5!CwP)&lk*9&sJsXDQCQbuwMN!MgBk%lK{6)d}izCCbMszt3iiS4Ptpwe(Ab&PJ%x zbkD`&T^y!h!P=QT?s>e{IR`AlFKDKbo^fjU!Ycj^lG@@}ELtA9bRL(KI(wQ1{i>4^ zx^i?wYTy;9%afMMRy7(8`<5|X8klg^83XQGiM@;FY5mP0xcT;&yXHoFc3?>6(jx?9 zLENN>MP^OTJBi}G$GC%VjV=Kq(rfGs>y|v}p5+f`)7)z)cPS&Hc>`0&z-aLK;2$60 zJ53(JC`QIIZIYz~uGmCP!kKppQ;T2_;iJ;ict7lusacdd#2;a|35+B%xCsv5!j=1H=pSj;?-E8(+yFg5kuKXN5kBj9 z;`Mu>$cqn@QV!z-Dkz$+c`2(}1<3fhmsvvRgrA|GdaH&_DXkW7a=yorEo znFo*iI8qLezsB|ey^nn}IPQ^#%Q)^H#vQxYQ;2a)Uz2k1Dd3QGoOTOLVps)YGRYLQ zC7?YNhUy*Af|4-o2#AX@r3puOpDFOnlOdAGdxstP#UINv$o`7t{(weXh1S&bYXEwE zbdE*j$lQ*%2i%cv zQGtzCj{3^cfKruhTz5b0=u|SKqh#}JtNuw-2%Jh1CvVSSUc`37io2$<*$<~MYnMd3 z093|2&Vbh@>9z3bH~fsZzbVmXiDgZ10*2sfS|uzLQ&{~l-fIrmYA4+%M%y3HeqBF4@_lv##T%giE~S9p zRJL1>K3u0^WY82SlHbl7Zcj~d4s-9u1O}syV|&~*0l%J*a1J$D&h&%T5jrN^E}*l> zFo2+g`lJI{69#n1sPxrcs~_lM1F0LT_Tm9+BS$7&Dm9Y3Ine9bIWsW zJl>EHCs8ez3uTdd5iAdLzNUyDGD9Qd1_IC8iL{gIZ`F#UqP$tkz-7d) z73R#rqKI8`YlD$M{aBa69!$7_^dL;2RJ(zT&iwleqceS1mrR?b9;T_&8lxT7B?fzh zDNJ6KzMi7FlAA2aZAT;=I-?vD6FTi9Z=lzz1-p;jWO45&N8mKOz+3Nhp}<-2GZvwe zkKNv@<&Q@i1+-(~C@l=pRHhp1Y5O3nWm>FWMTaS>xp96mpNFZ84=fumyw#Y4w8J`@ z0@I6P_qX*coe6Qk?AB~VI!WJ5|i4`~n|Tn24wvrj8Me~P~m`xjR9QZmnU+CZrH z`0q@2tt^6k=FA;)NrW`QFACg3H|*sd!XTW(ZWcB^j0m#OJfEaU2siQXSkzghryS#m z>%`C(e7c$*h-9O!^N$05vV($P!Vnv%{{DQ$lp)!d!W^gl0entvF638m6S9SUj70Os zOna7-L8i<&T;5e^Z7?04z?x9?34RER6K?4|FKpf$xa1*vzf`N?jd>A(PA2s$LG;9u@2tEKFs z4)KVXgN!=}_qMI!AmyfZpofWwNkklS4`IY;J(Sl}O_G;lUmjV9oi#dzE-PcG@n8N{ ztl+FE;LQQLmmWY0;rLVfkT3!qdYU`A|6g59y3)D@k^nl-&vm0&<`s?7hWo~qum(Y} zG98Lg;UAJkStMKgMocnf>*?{!>joQ=m5`9U-yS`-dxK3IvxUX+-54C%Hz%Jb#wLC} z-QIJ38(gx;3cdfthE;^@M%K0d=@G=Tjj zGN)L15$%2H?m+=QF4p3%0UzDfF{9kCq~mGF`^gekT-9PG_)}|*s;w@0Gs?9O5{GIW zY_a+~b=;N1>6DlaCZ*gcYQBj~7^f8yFs7Y#tivO$77d*Gh@(XDCM({dL>$W75-DoM zw4r-gHx09tQrP`(YUxuxmY$|(CRxYT;srH)4BE09`ntQ`v90o*tospfyB#dbg_8Ue zru%kx3V20oO0^y(IcW~gOuRlwpHDttK(!@puFZ`isimXBUh(T)^6~n2IoL~LP;f=t zdJ5r~0Ku6;${?vFqpqcYqybsh9_Sv{zYSrXGmpT|HLI6p$~NG03Y(ftB- z7sH){)~?Pl%FrZ@(pQe=0H_iTxmvJ1XaM0i7!nZd<@N+e#-E`B5-=d339_K}ls-F#cdo>1Is-&Zw zu_o_N7i^9SZTp@g>gZBV8eP$9RUjp@9x0sFc%RV<5TDYvu^(r#yIj*XT~}i#ClYPT zDf}KJX7Fc%{nzsvXV#SO$~G-`&??_soGcZ&1}->frxS!qkiyT|eNJP-_mlu^&XScN zKC|Zi8ltjWwR@LdnX+F4vkiI-jHN8vEf;~y&oatq3;C-9OWUPgR>7?*IyGlKX}gs< zUcAr5A`2~KkLd5SFToXnN8E|Vbzuh3b7cUiPlGQ&^LB@J{m6W?+xP^Z(I7kdE}O8 zsgM!*tw=w5@K_wL$gO~9+r5}+Hn$c|+nrl@OS#vqsNUaLEtVS+C3#DODULKor89ab z2qhume#biA@GSa<;tY$BbVhPz!-YO(0e{-!aYh`mJd1ZLddZ~QXr94RVvOFD{u2_t zrx$10WUHal^EFfohx2p)rDv*{3Uv972aJb6%C!=0(rA8(hA zWZg%Bk+>$i*`W&$>5PhJ@ljh~H#tTcJ^`~~WSaC$>*aP-szNH8{K+_BzX-&~_zINk z6mH*(kIEnz!`ln2C3l$A7QAnDq5IdE)q_Z^R{+GU?w7yHR{W1cg5R?bz%KO56G ziM%0eTOjMmDI?CsX9biY8E}Q@HKZmM2dBqr_s@j|Xz~2ZaD*yynouKc2~Y$qh2HDY z+ilE#@m!K*>agPX-z!tc@hvXW7tc4OAFd4<4|Veu5bNU7O`5cFBkN;Uc0ieG0hNTi`RKM0bHdN9x)UY+A#B-|DyTwz?O zPrcH1pe1omXfS=38HF;QyE8#kRVG4cnnp6W4QBEzC5voM6u0~v0Tt{y2ByYog1{Jv#Ie7Ut_U! z517@eA#(-kg54G%>uK{JheY{B3tlcq6%Qa4y|i6X~2f+FMdkw}$Pj_=I z&DoU65`k9FVVbKQM{Q>;0=c4NJSk@ZLVqo1*l24)6sIRNVgWFeQM5GG<+WPwRk~k+Kjnt|gA<8LS4nUs zyE9$pS9*K;x`Wz(dyy_6Ji9sD3b2nsdy*DqNx zNh&ojx8jJ_FABaeV{B0@mkA%1D%8>6>Uj9xB(s+)eDUT#4iQ4*dCEC%(%){Gmi~2Q z5nB^_5N>A0?06f^WJ%yOl8dMTc2KdtU^5)#!+=c={So zL*-ax02ckGbAiJJ9hS`5aVI8wJiz{9Y7xT|EATxeP7(U9qz(za0wmT+Rv(ihl%vFP z#02?(KB}w-F@Zs92=_j}I&#b$0SXz7g0T!gQxa(qGqm{bZ|%1)SK5#^z-~MLY5Ff? zYh-NoznLpWlMYw_dGI55ULn!qbfG0qeDenafKtwavY1@4CpUbZLbH{Lv6bp&p8M$& zZybAR2t4;3F%#M00~gbWySJy$=c+K4NcBkfNXXu&W{I*=*mr)0I4n`u_tn0UB{q*z zGCUHY`kzNjaP#?i&uo4Ub*$H7yaw=t6%Zm{MXPJ%^zl7r+M~izZ)(M{V|=H5MvG8myJR;l zorP<$5L|400CW(tcObp4flkXX{;5E|-I^Q43o0hK6Ev(i0bnWBw2iwsEXO%?3t+n> z*5Igy&)4!T)z#iJhoz&{BULj8^4pnq;bNSa8bF5!)90Sy*<#@Q^InnNTv60!?;RRe3QqM`* zgs@|RL$U^GPu3?H+zK>bIFNGI%Y;7SZ%5JlVj7xW%JH)vZ@v+VsXJ@Gg2kR0x`o0Htvy((LraF zX{z2gZpC0$7^mj(;AK<9#j&-X0RnokP5A=*nBjE4I?^FS;Fa>cEne|d{oq(GB$mP#6dSd`C(9k8k~|-Iz9dbPP`+#)72rb zQ6*=&l2O@yLe+6t*dCbR07OHHZ~`vx(*{&E<<*n#;1C3Iicy)B?!_G#=LiVyco}Q$ z$bE8h|7~Z`X+3i(sK%G3>@~3p%aDZHE%jV7b5QI3Ule_Z3M~`08X*3cO`#_*dl(l6_(eu~_1oj@F zXkwfFT{O+k@;CyDrVF}Hvq^MWaJlYH=#KW~-zFJ+5}+2Lj{4)muxLA+?3J?2?oKls z-j?L*>=|h2@3cB~tcF=|%AZP$V?rv4{APB^6$?;@*j1~$#$QsHO;#-}a@$~Mr)l9; z{MN$po`<#lGVp^9i(tmkHXGn=C^o5meCp8utf@3o1+qO&ci3?=Vik-cPW|tU)ryn$82d!xdAW1X2;H@-~Yyl{Sk4BQee4 zzi44qUYbrY@nunN;a)Ith%F9sOHt)sZ$(%XG$@8935@pxGp-nOQ>DJGqpESq6yjkx z-SHDIO!Wev$@v>9d1hl7qt%jd_aSL2yJ~Uo^SIr>pFOMtU**!YXScOLCnk`Teyw=3 zRfcvYL33>5In}VuO9lEkmOjJhiC(l~UU{b;na8+iDEvjV~OLtpEl#Xn~3 zm!xQx`1ri)*yPn`6qN6%+u3oozHr=LL(+R3J|ga5d^=o`QSyU^4jENt$6DJJWY?gi zAAeiE?_2pFumYZ5eZanr{7>U6S$%6`6(@5m$A1Q9gwmzltUNN0EzPOwXk`czSm;i) zH94sNv=J1BIz9EmwLZP!+uXQKoKxjNImvY=qIjPfv)2QF#rP{~jYwSmu($hVhT}zZ zWn-t0?<<5J(qycB%zSnDAZ7u}p3~9LsJwXB6oqu4_NLt)hCe^Z;% zSX4nq&0E4bi`BN|bVtny1c9S(u_8E0Vrw_5&`V0ryqEt{ceWG`>8VZ7ssOpT#PG0i z&fLw*UL)`t4z7z-ZqquYM#oW7kGZF;)PaYIID`_R3(jRO(NLz&qtLX2PIFDnT=ZN@ znYo_i#BiM-OA_V+MuH8kHGTvP@U0rXo?fiJ!4hxp4$K7lfJVmT@T@c~-_)DVZ7}+t z=YuXSdIsEv6LW6=X~$z#zJYuCfev^tP2Y&)3@d$n%=&CpE@23=jgm{Ci&Wi+7$4~C z_a)G{$vVR(AqL!i9VFg}R_`}bgX!W7MJQbcm(MCYhC)y3p#vn4jzhxkEqtMh`oNIw z-&EDK*~Q-W_?%!K^PJbB=)=UfKKDxz(G?*)kNF0@(B@Mdil-42|H?{tkVJ9=b8(mt zk#*XF8IaI99_7i~*njzggz&upjS8L4^w;0`wS_c~l9vJHgdyOmp#P)q*~-}o{X=H? zU-kOGeDGfdMD4PX4CZ;_%EF}u)O+-We}@Vv#1}~`0jVrGM)ksR;#MYom&PUEd^u3i z&2xV~3GP-1l6uq^0H`3t^CH=k?(uH&G4oS~VZ9&a1z6MKx1iV0xL|I`dg@sZf>`P8 zh(%@Pt{`s-?`0qbt1Ct6+{cuSWUUM_%?%MoYcaBDztHb$Gh{(cT+c45=9Q-Tn-f^T zU*xl%7tlSbrIkcRW?kesU9m+MitYM_)5q#qsL)~?K@)iQNM}P%j+Ac;7YCNgjFc=& zah8d>qs#wMgS?GdmSRCZ<$=JQ}w*RSP z-;9v-oJc+OWafxg%+``N*i72+aw87;<13Z*rCX7UQBILMPprLcb4hjPSyjlVMG#cd z(G+w!1Rq2_5xw1$StBYDdwuG0{ifCPQgWoFHs;Q>9**GlUvO!i?6Mb=JH|a~fs-hE zJ@$A!8ZMHWg^%GGS>-lq{9aVOFdO{^J+GjfT>Z%J2f@NzGF-h%M2u3SkPMTp@SY)( zF+neIZm+3iMP58e)JIo<9M>q8SFLv45~3OY6`kS8Ndq(s1AHCiiUKu&7!i_)Px)O%!hTbyJYC0RC0HW>Qi%g8}<-%!`p5w&&D+Zf2D zsWLkV1+@N_gfN6{&W+eBESj%% z!h{~ZG(>6cIBU(8de)`P&t(=Z(WDW3_4@iFh8fQ*DoTQ2E;Nh+pg_=So)=t4z%YvT zIy1l%IP*|N#d8d(?Q~P)4rlSXHz0h1dQR{(40|wOc{ZFyXVBL9IU`>BtatMCeR7aS zzCy|x6=#q^tV0y;K!^FtNtRVaMi6Z_9do5)t}Liwe*yF9*uH}WUPkR89)I(@GKd|e zYR~m&gsifj8!0lJ9qrv2mh2oJ)FMN9pr%m2F;uf;k&nA zf8-H#T$DsoKS>XV2I$Qg5~J3D$_i5T*B=~$r%Wcivh|3hOO48==j^=G60(a`Nfw%F zf&n&rRcRYq618KBXPo8p@N9N5*i^zp3Vg<~I$hdY)uGA@oEONUf9K1_l?MVU1|%o` z|J3XJ_PYO%nZiE-P`@3I(f~9cpRfn`sjCe@>XL+xh$w9h6WbAc%-I#Uo^0Zh*wqQ7 zBklE%;X+|OLtGxowf^Bn`(v6TpM&rFuWO_()&UljSMoApuKVej;8*(dmfu}ST=MAC^t_2`Kxf3lx7Z}TZ)Wb24I!~ zNHyZ(F^4cW70Qpl(UDZsvY%E1oPZhN1b>9U{dR)?0D)7Kv7P0I_g-aZkN}RmiVsvI zw=?5#3IN&pBq$gk&9B-c7H?V4uT2@XUeT1dK)u`}WBPd51);NW=jSyRDY# zb|W?U{_XAl9sFBR)2iUNQyHgfT!1P>1Uwrkrd$NeGn3!S!-PEklH+B4SO1XOI^9gn z!>dIc{Y;2fS#0)=$#!BlJ|AOpm*^%;fi2|uVGLGO{ifr?R5(Ykoa6Ddi~h zbW|?U{=3$|grz&A$0c<-8FgZbq3ZhD1_g-;DO2-pGPeL>#r*^d?*iRe&9xF`D-w$m z(5Wp{YN|3EMVCZ|=IM=b#``e2OEI^ydc}JXa|{v+p6P_-JsPp@^k$}(cm#0 zELi2U=cLcMjf&KgU<8#I!Nn#d&1S$M$8S$qxb*dZs*z@MXGQJaBGPDU{PH9xK~CBD8tCfa_&)z1y# zToBZ<%~9Pchi?>GL->+`Zh2!0v~i%S+jW`esi9YFS#|*+n`+f&3C2)AE{F+z9Z8Oj z^7rx4WAvNXIYC6}q*uQf%Nmlg;3{5c6Y^W2vaCPt1NZ=wYYX^Dvmr*l?C5M-PjQW& zVZwl+Uwvc1xJ=F~Dbv~O+6r@JXwlMzs5#-IN`mDhL&(0kNRYZkKBrV{G^nhbz*iw?+wFGPo(wzJd_2N&HR9{J1HfZUAoKj7*0hX2VSbyK|Cipl9ZHt zA{4YZD69KtR6%CJH550_&Uy;}+)iJAf(r=lr^{<7>cq~U)ipxTCiF83w06D)V}LD$ zdH4CV?+|F}JlyG3^HLgqW1HySrw}M3E~D>io4kHcT0Cs-iLY>%;9i$xsKc6t5`82) zjB;H=po9^-gqMnE`XsQ-9XSubKsE}Q+8MDnDj~`T)JA*rDNh~pTZS5`xW6)$?QjDx zCT;DKPE-p-oPjam>%yV_y`&97Ycf>^xXBQpUio9fkh24TiGI`UNtxRi{aY-lsI+a-*RTXl-8?Fn)@#)t8Euf#Hh5GjyU0)P5CPuSfp-pE)np1qZT z20Em1%5aDRX^KzLrBI8ezT`5=#Gv%^pKO7iC8vQ%bIu}7E%gIEH&cAZxcMT?Yfzqr zzN|tWTaK8~#aYwGI7AymlNG*SJ3{u?ab|`nXSxZ%SU`Z7{-d7BpRd+p{((LZ{0LRN z{vOJ9h1)w2uz>DrgN=X~4k?f$rDOsL3rmwcf$q+N#`_J^l{e#&HNC%7#$Gbp5guhB4)OReg%+ z1#H%?g(X`s&v#q+wP*10YT?lhmNbv{wWi8+?Kl9NYS&t##Cxho1eaKp5qMkojZ)HoNxn~5@Sbr}LF7cQr5v+F_F0{3jPhAhi$mT~_0E>eQ z|2kGAziP=h2eVs771q*Z`aFUb&_DxQ^OD`t;C$HrXKyJ`h(Q3as_Rcv2M&gXkc!83 z>qujM2T|Yq#-Zwgs@ci_Z@yzU%`;;Nq2@ygf9<8bT*dI|eb!d~iwTTlb)oH#nKzcG@2tikPU0u1L5Fx(%x z@fDoSjU5dEh^haj@BjBB`mer!LAA7AcS_@k@I@7c7@xd4C*j+_`+m+=&9ly#!tW^8 z%Yd!Hdf&_f>K&k#=5ah&^MrdmtgPJp%-v-Htg;48zX>2o`-}>-22n*W?^zTvfeWR) zu)qc6GurVLXxG|$&LzVsel=!2X-Iihl<`8mFw6_&MP05Ch^f+{d`3LE>|dRKtA||H z^BB6x@6Lat{deaN|GV=a`e)~VNM!{v);=LqfHXf{Y%)#;=={9|+s9pue%gI4NOs7K zWj`A=i`GCIF;sj?b04KhRu^O7ivezEZ*i$XKwZ+n`wUSS)r;0I;nBY@s<%U}o^?HN zK&vR{d^}bx!TRc;3;E+?VP&Elyq*S0Wc;Kf+;K$4=gqLc)$B+F=`$_FSf10xj|5yy4zh zxUzRVhNUkx@|6fqF2HV7^<>3qo5X zR#1@_FY52^KhA(jXhyv3sWsHHt@5g1OP3BR93gq1EpZ2|0zPRcK5DgT44h>kU*nza zuSq(3NQ~MP;4OTBU4rnRhD3@0m2g|@|58HyqY$F%GC&9snMWot+^>k5_8Yp0La!eB zBMq$?GK{Elo#MVy2?PwZtVxxrV6thZw(c8Dd8|X}4d8fabTu7akRElb*J-oEJJB)m zA_K5`djDhxri63UH;m3{CkazHZ&!e1YA>sBD5G`MhpTn=EkufxHnquMhq8BqgjR^m z$*WTSZd7Nes|Na@wkA6A3%ZBYuQEIH$yCRK(O4=8q+Mv9$2rl0m`V6akBn%HL~uVv z*?rWeIF!~SDg8QDA>@6ILpOsGtrUbChD`gA*`K#zYvYc`Ma~iCKcHYqE{2#NP2Okx z`Ra!q7nAq1H`DyD7qVE5{19g7WaeEZ2`@ol1)4A1eTs~~OsG{HC7yBM{t)D) zYknZ4Ja8res9<4G#DOzOZ)*%1$oNoIvSV1UjINn zC7Qj_rMzZwD32-iMlq~DJn#lza3>y7E#39SLhrB?1S4vQG5N@1LGQ?IFVR>*n!Uxs z@`kNpDDgRv$X!bvQ7P_A4M&w$c@Vj{cHunvclH;1qR8_lgh5#rC;h|_X>2xos>t;t zLvdzMg>vWiKKpPz!Q|nTB%`JmUmigY1%(yb$Aj}%rm!6GMB$SBaSg##>9ZFuPbVbs z=D8A|;0I zusUshw+&)tr%I$Q_&G=6E&VLbd&YK! z^^){^X`FIX3zL*p@d`ZPU_HEYovD`egZBkfddp47m9lOW;j%eM=+e!n?++-#d=~Wr znBefB;feI&UxP6!t9#x2b#`gMB3zd$@kFhmA+SN1^SkI}DA=u2FbVk)WGKYJU=eUg zihG&iW)m6B?0< z8aNs3T?C%D*5g*$eOYX=&`m}kx?6`QAUPUko<%w7GJg+ohuE8w)EXly$1F-ZvxKG6 z`N7yM@3rkumfLl-__@g z#3sGQY#*k737Hs!)$FHh|9vS6T`i{D3km8tG&;Kzvkg&);0kT6U(LbTYzOuj&vu3> zFRX*ksC!qx$jdRGgw}Z71&HDdrob?@{dTV+XMswms);-=gd)(-R8nasv_TDx)8Ex0 z2Ywi<;!-rOaR@bRHK)_5E)esmK(b` z_RS#7R$QzGzR1wtVs1K+(w$HlBtQxz3Dt}835%SFmK!4!Fa)=npABR%Ka?2b!6uRC z*~2}~vIl<-l-5LyFlp3C8(=R$k)G~@?mhp&Mx+dtoPB8OG~-LwMTKoAb-)owMX~m0 zzjD3rW4*xdZMN1Gw6IiN@6B!qEw43QDz8{aa+jmNTFPBZsNrfp=kg#hu1O_zNO#^4 z4%rB6L7rX9&OC*lT+z|C5KlGrj<>evZP=jrs6;Q}22O8Ebz8%1ls$s>PKBlix)&h4 zp`R)YOzu$hw#c}bB`SAKF>1vTYUOvNcL*%c-aD z7b?Vj7LEEELD3-LZ1m6}aR>%c2>P7}hD$};MZcIW=#A{Kr(y|v>++7A%*}s%A*stF z$3Q3HkWeKv!^%RD7FLhp>R z@JuJLDhqTThE+J>l}!u!poC!dfAtqrnYRWoDu*r9qZxu(v(t&M?Y_5XM#)6#X{tU_>YHxQt^mjiynXJAf!Tthw z+!4eSJ!sRR_mp916#%0$jzZ@tfKhp=KwFF`LzG5!4sX6Fd+n=@@KmD$%5f~9Vfxj@ z?qaqsA(GZ)Ty(Kwn?nzvL4oew5C%qqzAJPxM9)CqF)E~w0C}d~PRqdvU_PL)tO_O> zQ#46DiB%IjA&vsQ!MK4QRg)kNAqw&$eE!=YvwoUK@;A!r1TdFi{9_gM-;}8SB{2Oj zB*wp~uz>(n*viURoddZVhHdWlB++`x(1<)KHON|88nf#xHx{^FpYTT5O9#wyz^!Iv zO*n5)#}*S83r#-Qcp@!6(#HoCmxfeuVF*=+J( zb++og_sy((9$&u+q_vpf0|>O@y|JHi=+;}i%m_MT`yignx7$ljD>#hAMg4Zv48-TS zpqpD3MpV5pKtkT0`R^2VGG4IlOGo6LsiKh7Zgt{PJOA?Bh0vU?`r1=fAs)+6- zzJZ)g>IPZ2Qq%zp8^Voe5NY5~!0gcibhn^!v&nxed8;z+G%*2&A^|MC{y1w?0$idQ z8~vZN#(y*CH#SSn)P`J_m(y9-TR6;P=VW>ThkOZ??@r+7v!Pvenq32&^qGIe3rhW- zHbx-=?n#uT@qL<7lTA%COigSq7cVdQKDTwvLSnB`^$wKylyfxcejWrUvBBbUnbE6d zhoY;AdF}*v`;-&Lw;PkQ0>&y)evP|+Mnx_v#EmyqbU$&X@sJcR2mH_#&^Kf5Bpqf)s+`7B!5)W77^@ZF`I^GB$eUDJb zu0&7ZJ5(qTTvjS3Tz6GKHZ2H7*IdVkedmcWVdQ`u{3Q+oZjSxDLpXdXzb2bEwDV)g zjU2Yd?~o@F{E!F`IZ| zCma1x*tF2Z4ULu?%~)BBI%`Mo=l1xUjI26=^9x;(aRs|u&(&FHCzy~7+-Z~u^Bv#4 z7~>E+tLcn1QIQ;Qd0>b4##`@9d1`*BbIUs{Isi*ku_)0grfnN%5u#Zl zVH-8!sGLL>6S?U9)8@Ub_%+A^MyM_w^EiiVM5Jd1*K~E2RgEh2D^U7+!J%wuYm;af zB*~JSnaM$20JxD9GTZsh-;lGC^186LfZPBA$m@SZ&i=-3mSSxf=sfli`03P~D` zGDQn(=<5mDS-ZmsO(2K?RG@-(=G`;=u+i}3b(a14wtsNuXOqqkSO}6Ob2$Ebmpw>5 zO&z8B1mlHV^ra1D37;BP>!99TteT*VH62H^F!`(b-dSOFi~J7@zFKo;2EH6(&JY>B zaw*?Kr0?8wHiSWx5)mIPA^U2_wSDTvzmSK4AEF8l3k-v7qM}C;iR6i>odgAo4a={` z)ks40*Ak!Y?q!Gi-X6C!!|(}9cd3n@JOjZ$1fUQLXgNx%-xU!P~eBhf$!fC&Tt zlkA|9vAy%}Q#5_6|5bL-7D)`5r`E>`dyFZPAWl$RNjxo@rgCP1z}gOLdI(4u7ZHz= zd0DzA@Sb+;LUa$wH;cdX&bLmHFi-UaZcB_{qAVgrTT|wxyY_J-+4CU7Q;zg<U<@*ixna@L;~`c>Q3q#6S{=wFb1VS(~3JHiS5nKlEMG<^x{H|*Es zsS1gl+t75JBhjL`rN1v(k~`{+f4voeA2_DHPH(|tO@qPcm+Ulqnn-ZU6xH7vdLS^3 z`2~!_tHilMpkbr=5`u_s7DGDpP4@fX0odnia=W|SS9XXtwXGgrSR3@FkOj)%bmI)Q z?au}WraYLAK#OE8uMJOl!#_)bmNl9tTB8=*`PZc|EhXus5K-7w;Ixk`B?0P#C}RV1 z+^E_*ijzo!?lY#o(v!0T5L)M(I~x?ldZ#8~ppkWD@o^r_wcpZB@xs(DNj;vrar2m% zz_Dy)sD`n`jmng$;PMh+`uAX`kr(F!rQh7@i$UxoN}4=<_qP#KSEJ=HQZkq1f|BMt z?n-`^)1)`dnY$$98==$tifi*Lhpnc}E&s{_@|V)j(XQXh7d%=@XNcgcdKjO(sD=V# zvVh78Mp&aTfENoz7i|unWjD+vrQh%$@BAU8vrYi~lpB$$wn%Umk$qdZBu{lH)6i~J9 z)k#UV4t0q9in{A?EML`o&u~DuHmT!?V~C(KPd6Ux(ia5-pme{k;&d!fUvNe;(Th5W zF@zbq-|{)5qOqr8cXYeHp&ori(|P?3=`<9s-c|*8(fI+g7Jn?Al>ne=E8~B_VgF}v z#;g33lF3A~rEboFA|Z@TcuJ8rEVvO_LL4TfK!7mtXtxU1Sg#d8q7w3kecvx8!~Aa^ zmDc*~e3>I5QsCc=j;4nc4ZdY{wjoVl00;C^jSj8c~h0+G?jG1SK#V>3FnD zDRr$t1u|25UdX4XoIW~ICEiEgMEr6nWz=6D0;v=_x7A+0-|^9yMwvT0V>H@(yG2k1 z`{EVTG#AFS+@mX=v))P$qLjp`07=I^9Z{|gcAbAAz-7s?FYweuz)9*!b^hUGf%g$O zn|BW9Gkp^NRf@-ostWsK2Ts+vtsne93N%w|#!)JvAZ?CiYbK(CcSWzc$snm+_ti2i zwTg>;s1p2cNQSZoYhZ#?Oxc{$&tONQm#+nl!(=e-%lBO*PF;^BW@c;^iaBfX>Z!uG zaZRS9>O#bzYjK`*t*CpiKvTgytac!J9V(FoM}(EO>>5qlSGbT}x~is7_Pxw*$H!gk zXdjqm<_dFg3%$S*&@=Ao!P9m6u5C7&WON&l(WPHtPF1`313SFH9MJQ;znuxaObmX( z2*DSP=~HR(kTUgukUoYkWBvdhVB;i_&tehd3Xc-U%#3E`H4L{3Si;&(0?8>vkQ}s& z$f&IwOu**p2dNMPVQ$8%+nm!mt1hpLZOSg(%z?hgmN@PWb zlTZBr5%!Kzl5N}8XjQ7xwr$(CZ9CGoZQEw0ZL`w0ot4f? z_%+&`W34$x@1ytPe{v}(Vg~E}Djy%d@__DtD?yZ;o&M{X$LbeWgZwY7=C#^c%%d<5 z;bsbJL@|F)RO^r8o^*wh8KMNEq244EUT-L(914g)Jf=9T*;s1gXJ|eDzV}t>8h}=&7{=VDl04$FX~1yW1~Uwh%pI?mNT~?Oa@PNSFlwoJPnRXvL-*4`V%>v58YLFF_qMf z9fJ~bVdH%EA}Q9}w*4_#dn@(`ovG86YL-_%JC)zya8lgx!+SI_yMUdnM$Z6z09^rn z1O$wtLDU&^!bra-OrhS$7=x4{n-1Oxb;xTM=kJFK17hS0?XN|<|5~*F)j;*{4E}#Z z+y77D_#b=}NKv7afY2>=w)kK0CW-SU{K_DyVMZ`2W)Jpx)ujTBN_<&W1;92M5)&1E5fNY3o5&T1-jv1yS|O=&gH z`R4QZ>{?K+`!D<{op~f2fDj7zqt|TmWg&~^XfC0>sE_4!kwv>v3`Rn{3U(!QxMn7S z*1RUDiJoR9xGOuZP3aP;;n`eU8NHochtC4(6J}qw?+mt7<7#Hfyt#5l%b)oM$+n@o zsoF>UKz)2|Byxs=8QGr_<^l2eLLZOUUO!8(i@VYObX0%UjW8jb1f6_Vb$gYMSUSnH z*}#}q%)NDEcJ$TSMlFM!sGlCZLc5Q<67_Y}5==9>v$BM`5=AG{`apns_Xx*ZQk43% zzJH^l6Re9SwE^Xch)1JSi6-iUieGi3Nz(s7MU9Xy?$u zz(fxR2tviH`@p!u+RlNSgW`+c6Y{W4kV-~Dkg*u*fqgim>ZEI}*BD1@rLsDeJgYJ= za$m-3x$q6w_6@SqFLP;LuHqAK49#@ydhb2>bQmH?dhh1~%@i`F<{>&dGk%6M-B zp|TmKVyA>Eg}MvVve!P`FJr)%7loPq-Bdok3Y+s(oV`8#g`qTXf57 z*Pk?na6u9=;Vy=~g$tK`-&uO?p7>yQa#<@Izsvw5&=|Q&>K3i3#s;8SxwQpmg&<7e za7%Qo7`a=WSfo@8(s-}#olD93gAC3Uvl$O89xN+AXrX>NU1_(cb5w|rLrkjQq?5cG zD-!rU;|87)+w|Pq94)ov~a;>nyN}h{T%T zK#^?UgC~)Yd1Hh%4cYM+6{JcH%(V35llvKsi`nm~P11j*5o!@Qp;)F-ui&j<+T=a` zqvwCzx1nJFwIgl6B!d6fbB~${z|7p~OU%o}%Eai)2=MPr7-h?UtH)VuXr_pHH^d>;W6a3yUw1lH;xMzX_LI%)nN6OuDoL`4)$R;f3?{;+b?dXyFzQfpq_#==!2h zdQNR!XoEMgx?H#u89{U2MNuuc)ED8L z(=(Zowu~g`zFZ0tOpS5F^Qr)^nV@1dskUlWv9Yz6t7uJ(dWDf_Fm4$r7p@|N%fKCLcr>FJOSmVn zdu#(dwq@3tT_xx5#l=nVnPoimPB)pY=ZIzInI*1XZRUM*a!9o1JXgHD)>d1%n7ID> zbQpEt^my`#38PVVC~pso!nwKe_4G=q-R7Y+0ngnCQs^p= z(H*6_`7;*XChKPWYEMPReUNARQ4B6&AkIKIREBJ%7*yOU84;)`T!fxVNGT#6(VFVi zt!bik7&Is(M2}S9LI;tB)POra+-fIrK}}r!6((35Bvp@leejz|*7Y)+u_p z4C$3UUh_By<`X#6t)N+5zQgAJtm)$j+EW~`t=VT=xznO{%>}ZXt=K6Q;ad5oOV@P$ zN_C3+!B2v7c;4y7;ai(q`bWS`CoI52nCM@(MPtp&#gdkYG|zU+##<`i z6p~}z0qDPG3%Hue zwoHg8?P{09Mkb03nuiOBc4w2y$p-4k6PFO83m6K>5W!IQ8>FRlu2s5Y%WcG-!Vu1& zVleh!beUYpu_=3Bu;(jSjfCYy5~|%Ejo8(Ke$a^^S4WBfRa7N=e@*N%=xnv@q>wpZdj4X>zJ%=0w`N556+P2 zYnc+zAql)7mzMyKLGY$vB>L>#q(-wi3bhQ7GE(kyRAqqgzJ_S$jF_&m*mU?P#;sT z0cQL_A(NBolT6L)4#RG!7K_7I%F`RdiGT1y^sffftVPuw;j~1 zg1ic-3@}w@ab_a3(rjKM|3lb!h>&llY6}Nk8zWmF(q?y zkW|Bv{gnkFh;>itmVmlUL#C9f_rngg z`e*GNsP1TBI;N)_vB{mFe!ymJqUJ7k=WLQ@47u<{PV&6Iv%PjLvird#U_%Y?r?M*= ziFl{^Fml@jL(xxVRS^%~A@c!BR+enMS|a3P#}pbYIGKb8*n9Z7qb>DKwGDXLdlJNk z67lAUC{|A&P8r;1@<=>=q;tp@OBgUwm%Ph&iJP&(eptpy>n!&ie6dhI}wG0 zx^$AsWH>y0*NRqa?!?p&xf?fY0MJea?#(TgbiEZy|Aji5+9m@nxy(qqV3}JLiAjoK zCcDmGBIIR>iV!kKZ|&9?UgM%-@ZBREL*(Zhm9mG3$S1zcH|Qy ze>lv&iCzYRUN{VLNT`H$X&i3%cb04er(t4iNRJ@TW9Pc~jBa|4k4y7u55^%X@Z_LJ z_N_=EQY2B8!UUJ_=b~U=Ca%=t*V09IUH{Q!J_Im{VI&a#AmUGdQ<; z{xft5Bgm6g9#NT1bFmS|%ZojCjYS4P{o@61!|8(H+{gzxOh_M2Mt z=diEZGalw=;%D(q&odw9XX+=}&|QWgWMxDF1&L&NNfO0j7_y`PEG-X1$PTy_vWlCt z#Xay#z9`&mEb|KlPr>ZKy;1a+AX%v*w`i>8J;=LsA=&9MCXYzZ%n62vYT@yT8YYiS z&%%l4y?694jUiGxA9(#M8JFJ@B>UyWZ{_*1axSzvXC$SBVM!>8ZbjD?+P|P!iWO&> ziy;%J99xkd9|&P}l#VDD6Lsd{awIgUfnLj|UB>AbGZ^K{{gq`+ODXP>L9hFjictk+ z_KQ2EJuDGKjJ>os4``ZTxfoF(77c8dVX&NMm5B=|((x^4T$~eIatnwjt5&h*p#jX*`_`B>dmhoLcaSPK3{`ec zdsSSnCk|{X7a1q^;U&~6Q}KrBwI@ky4kgi33>B~YAu}~`1+o#zia!I?vM+0mcNQ}g zb1^QpH(JdOop+pWy}$^|HCnOF*DySVahVhS3&UiA0%>%zxtwOV6jV6mlrT|xN|-4u zSxmAQmZT8k_PiPmWGBX||~=(JHW3j2CCrORWBG<~N(b(spBYB`cA_8JkK{)?f1?b#y3pL0w6n&=4^b zm(suE+b%Y#p-#8TS z00uy2BXW?aZnZ%@PKwMFqT-FZv*0W?OJi;2kO?h{UKf`w$8L$wLgKa?o6774Z@#;o z!pAr;RlB4hREum%(bhQ^}Y>Ut862;Dm*#L7SzUlC&13*w}GiqzA)g`55 zQlOBwr(v4Cp&qlUg%+h{J`E|E*mzD+_#d%KTFy3N(vCN{*BV3n{4;c8Ks_@26Ii@RRJ(_||6J{b-Aj4mb{R(ebpW&9e?-k;ZvoaO3! zMBwH<;J;d@u_*7S`&6VpJG;5OJ-4}*3Ic%-BynJ zsyw^t$ND9s8psAtaFsSDmyyTl+(tska6svcbm^ZtY$g%D-t}2|Zu?{5jgzF~I}Uop z{j}827@oL+Y0tU3m!spC-@<;4DL5(Z#*7ivnfI~6is5zA<6Pf6DS3!P@Yte-1+C2N zg=R}VsUQsNaN|I}JV9>1`^pH_!Ke>raKm#m$Bj*CD+;rhQeP?$gNxtYkunuz+%l#D z5_OFBCSWr~U8&csdHSuW850dl^nyMKe;ki0uhp)5EMhOi8E#cJ$Td#Y3`%F#LnxX3 z6fJfAP(3EQ+M;MnX6n3$V|PpsP}2?(Z2|GSq|M>_vaAClCOD9m{{pqGUMk%`xcE}1 z#CMornW3Dg%u|L?4u3u~VJ1Z){!FA|NRbRN3@u!u^3_E(Y(9lnS?zChKccheiZ0uz zFf@IK9r(ZkW6`QyU0&v0*qqyDnBiUEYi;o^D~vv6Ocfr)X#2H_4oSwiDnCzCYHz9< z-H_LehkZCL@bKtz-vZZk0l*RLoLT2v_)?k`@uZU-UP`&-*~Iz6vxGb!{#ujvte^j{*_eaUiypp!7nu<`4>qR zdB!Tma^UU9^a4i*^X$Ta(;zJ7-~?j1{jXwW87RD3ml?|r+(8Vr@R%{+*E^`phszG#@c;19rdZk0RODPo>eyr@el|JRH)4uYxdJN+& ze^&z1X!P(GXw&$H(Qx^_zJ~J&=u_GCFIBe+F+%S!nA!$c#%!*_XL=gfk;3E!X-I_0 ze3FDWLF*jjS?VYF_t5~j`B<7KB#*&>*tKoGrd^}^R?ytMT#ajqmp0Jc0zLI>z)LY` zZoV1K6ZSh~ZARxUZ|tmZWDQqYmgF}&J{fLx4?K!M5*89~vhqgbfGA5Dq(LFxDQZ`X!vay?>} zI;P|=!0!ll4B2A71$;<_+d{tiJ%U9&8raBK=U0B|-M)Djxlf8GdcB`)??1BedG$cp z(JpvCkeH`?2xVA4oBaGOvyt$;$m|wN@%{EWbgGjgyjOu|@lYiN8b1Mp>D|l%} z3qaEagXGO+UF7=hGkZbws$lcpW=-m|vXay#PlKh-kFt`Kl|D2_tSd_$q&C7; z^*iNLGBLhJ^NW1-i(T`0aoduZW0gtU(ul(%nZshK<06^kVyVNTnZx4oiRKvCI%_+W ze9=#KSt|WdXIHc6Nz3%{A9<$<#cCg2j2LI#;|h zR6peADc@A#`m@gDX4=TiWtP`U5}md{Sbk>biN3v!lr?h!Q?Vsi-zCe=e9epF6;l2V z{!@u^weIK4WL>CL z9%$2YbPy^-UEtUjajf6Gj{mA%-F(!Qd2AnTBG=*0OC9O%{{}}z>5D5<N2ut`S`7 zPnelALM_!GV_vJd=!(Ajmo}kWJYo|RJN%Mv&#aUkzxOkB;%kz7m%2sM%0+iQtqpR|+nhRHMx8xH(0@$t|F=OZ^kEC#OJGkz87C2`{cY zbwiLlbevk0`S;BKgrrtMKojzN?*p7IrQ`PDMF1PC9>Qq6JWOl~`CxzZq zP2Nb7nusBXxB638yUl=;dF4FIv^$|~X;j>!$5SgiQ_56|f8>sM zB}=CsdVdIQMcFUku&xtx{>EXElNMaEaZmh2zM#p^a#!cgwJeAJuyFYF>t&r{99`*O zAF6LnUbkwFUALN?UkG%*ys$#_lu*j;eFmJNwc^XXf-IE)_;>o5FVI@aW$55rnPmcj zyaAl}cJ3Lv&88wANZvqwja0V!~RIeo872cZSX_sU3$9 zcnO_wami9q$o`8UyI~CFew`(a&FED`-|t^|SI4e}x+&1tw6DMZq!ajd==a!fxS(DX zYkFW$4qiP#*U3*lC_9xm@0h*IK;5v{f6RS>^+^jXMTL(AgqNejrUJvNF%YtUAm}jd zvBD9yn+iMO3Oh~S+st(RxbAxL`O28@eBaz~c7A;419>v&J@xTkO3d~i>oy>vi65Epu2LAL#X1!+Ct4TvY_UfXXzNY?_r zw%==hT(3WU!0x!V(Co6j;qumO3|M|&PWEsE-U;?Y1J(t4L+yPuQ1X@5C}g&}qgYRN zDNgqCk{iyxGplU|Z`hr8^*j7~r!O z=d((8{F7JI)jh#K{K?hB^7P>M9pD5B$0(jGQGNXINRDdj?NYK0R@2?P@Z`Qp-Gc`4 z$Hrk_EK+PGWk3|-ELSW4=vG&2$W*%}g&1g3SbLc}*wtBS;1^6oo@?_g`MLj_jYSI{ zOv`Acs_{ODhrl|5kwzxxLJp-egZ?8^t`xuVT7(O_A0;1&S&+>Rn-v1r0vho44|Z^Y z7}tK~S6oP6Nbf(U^8OLu-+zi4{DsxH|0iY{2FQN?ub9))&;{V3ptzu{JBOzf!NDW) z=-6XLkcbM%1CcL_9*!)OdVB29mK+a2Qq|+vj)+13Vk|cr` zV;mtS{jbUosHp;6rSS~e!T0C5fBuRT=W7Da&BG)ramF-HELDUTh4J-GB&I%BH^VRs z5>UxE%+6Z;K&~5u*dM0NhrgPK4Y3}|dq4cQIPkr2)wt{bt4(OXsbLOQVz zFEz}{pL6>bvO_$Ks`^u-Pd}pPpwJi%r`Q@nUqKsw ze1N%rd>cZiE)MJn-x1_|Vnv`X4QvvwO@IXp-!bG=mjlTy4vdzlJ+PP#-SKSRx~3)D zVFPY`!AHM5>m;Fut3?BThs>${axmW2jDN03Og86=G{?fiK#bJehxaiU#OaR6k+F1LxEqkxs>0= zW9Ut+kVVGKNguCu5iG<^j%H?^X1zAt#9jPgnGgnavN2`emOtQK46W4IEL0J8QeRZw zeX0(*MVK+9ARytUq9SVgTo5M?C-hy1C;=QhHje$`l0IoT(ss(seR{Nfn6m>1wp8%3 zf=Jp(5>~}h6_km7>Z8E3LN!BHqiEBu&_4AdA+QDog)&~|L4FSM3E-V_QEtn>zf7Ac zZTZ8y`9tDDQ0?JBwiJG?ZdW#Jd{A96)Pll13vXBQYOi#3UGBs>Zv{b&nPq*}ZZw+k^e?JplsVh%*M%#hsG{YQ zFe}=z$gDcXEJ6o)5U7g%J6#^7(O_sm9N zyz=lF^XG?!Vf51W{W8$J&^dHL*|4w@sXruFBj$pK>}%8G1L#r6k{4fwMJy%XT1wzO zR@Z$iQF3mGeUe*EyW&O7SQwjAOfm>p{RJB*BYf+MlsT)GZefp{f@#H@zMSHuR2TfB z320|V?n@0^kAgbwX-S|FoW^|0@`{||>IbWZ+HAj{ZO$3Tu6V|ueEHwP;T2>07YB`T?uzrsGGkm&BD*`omgc?Zgp=D1G-#QL4E=}2xos>oIAYXm_9o+ zSdoPW#eXxpHv*Du?FB+;@5KbjF#_Zy0dj@_IS+tb0zj?;AlDy{m#?Rb;b?pe_`@`% zV68t+Gd;;QTm!$&;SO{N9L4Tlx|1e*^Q}$KX%9#vlG_e#*$xjk{|avYhJqKE*)T(p zh?KJcL1_{&XbcmbW~xg>syP!@o`K1*N@z0w{#)TaP!;t#!t>hzjoE02BL?K97udXW z6tu|lm>GGSEJ5HQr)h{|wbF!shb+awNnR+Z1*SS7?sV8YV{~f-oIFtqmD4TdjYgqk zu9Dcfn;%;HG#>2q<)g;w|Mi`%KVaF5fx@&ODG}YUS0K7<6?W!b=DtE8k}nbHU!+$c z-z9vOxJ@0^W0fr8hfxwerTcp#W@p;227D!8@z+K8_eA`Qd}L^4B5LcPY~uKrNbtYY zlIxG{GA|ey7!sJQ3mB^ln2QS-tSDIZ&f8qUfpW`4ma-^VUG2o)L{XM<%UeO*!d}8e zr&HI@!Fpj)u=J?Gf&$6+gSP@@Z)lb%Xdrx*7mjfE)0Ynkjq)*)DCk@8{K3x7w}ioj zi8pRj2Rh_G}Fe!qBsP;o|vFLv(VBM|u|gh}{CrLTbf_KouYj=;g$?(5J) zCtz$W;^yRF@HOlvWb5$noBxiaSk2NCX#~YbOw+P)&Y_`>A2wVBYB0rGSUW;%y%^p< zGPt&nk#+FCUKi#eW3jrvquzcF%6h)&^gTg0uZ7rJaz3G%fYrYl?tRPl8PxlwiZ5S| zHz-F#3VB^oI!7~Bt&quV%hB_A;y2&c>o1*eKkBdtY_gPQWA1KmlDwqHMnGfaM&SF} z*m(n|hrgyxl=6-K;wl@G3zoHprK6h~3^RmqJhvhXfu%@}lW^1@GK$G2Wx~G&e^%`Q zS{>#RRo}=d?I(-~Vqmpn3=pR6xUmL~Z$M+{9K6B_3^|yX`c2HIxGtrK8)$K}ejNa> zew*|d{z-lc9sc(7vOO~0aCRISNF$#bdd{4YdZcym#w2926np&!XPvoTaX1fVz1glI z<<>0RNP3yWt)5xeidP!4jA`aqiANGe8KSd^ z8c@HLy%Hxw3e{Z681Rhv4mz#NYu&4Z`+EFnt1YEm|WiF?~XEEuVtD-VlW_eS%Y2Is?>L z-Y}QheF}G5TB~=7eM)x+RtD^9&x!!kMQG=`&DDn}7r{;7-_yfVO$<}qz8Rf% zUy81061-n`q?s$_PD5W{U8jjRM5sZpm!8>VXGgEWFoc84O*nchK#z(rZyj+_O~q$T zKxmnk7dpN-|KcqE%5^urSZSP<-eVpOO+dmZE9#x0?o7#AyDlTho@Mqv33ZRp6@6ySK7^{^QriMI4xt z(T2tBusOCGEffZi_;x$7YDeqBg7Fg(LP<+8oF8RulsV5);BL&)&LkGu)y^}5^azym zdu5gDBBe>iyF@r`+V9fhgsjO<+i4`qw?VyyAx5w50Hu!2b#l4FCdFK4>;CP=BUfX$ zB0DdutlgWCbls+aFUc_4EmsGkKVWsE0|w`7mB>RTYiKyPhkeGVUa4!_{Mq-iqrl3z zGK@GNt(G`#WaX|An8&OLts=yDCA3F#uv4sg+O%vUxf?<@S2&i1=yN8xWg6v7{WYM? zk9}bQ9k{a%Qz{N(fj{R!+p<3}c!NkA$CLtT>nCrqp}J4Wzb@vli$4)fXYOi9?8H6J zM@Ins7HlKM2a9^MDN4TG6JytDnlZ^qQA2b-lNqaDtQc0OT>u@jN)5t*iJ);t8Zq>Q z=<4@(#Fy*39m!2pxBh_mdlKZRBDP)qM!sIdC-xV$PKfLvQM}r%9zur?>+AS?#3ZgT z#(RoQotC^U;oq=(2KQnK@g1RVPx6f{YvX!DAI3#2YwI-$K4BkUv(2oHAL??9%|S9P zbc$I3PC#rE=Gj%D&55hRNO;RmE4XB+D<;%)Bm~i!by^TthK*#oCJP6RZH#|-N(u5=L7udIbk`|k!D0WUiqE{N2w*6 zMc)j+hi@44gw88aR`FB0YK{4YSi_+2gnSi3VRddap^08ool66o5RT_9J5Clp@#uyP zIx#AdtmfA_;%=9eI@wq%`!kRsP^DoM{4lvhd)}51rb3KM%sjRk`!I405K+Tt9}&m~Fl%3Ey) zL*X>~8WNWL7UvGS+nZRTCS2`$E&jmwAN6-`WIh(_7eTZf;=jLXh0G0X%uK%YT%1k* z@urPd)pGqZTKt$Io{Td#2T*kJ6BVP1OBxF+4B8>gp(4%&$WREXWKCt2D3LPRn*vq2 zl}9|D2f{t=1=4k2=T$qDx3_ye__n6>^abznN@M_8*J)54*FW;U-d=b=r1O2en;`W1 zc%cZO#e~$s(1q=$MeQSi#G__@(1X#F?+XuLSMUk2V~B#Sg3A)14Z(*Dutzf~bcP{d zJkw_|VD@Q5L2R{=>-M|Xb>Yj#MD8AvyX+)`>kf%v)Qrs-OVbB&dq<$Lgfwp?9kn-9 z7^qux3;s%N%`r5ep%LpL9?&<(cCZ?8wNjFU!VFBPv%V~$`EolQfv-6w=LBn6m@npT zX}#pCUVhyXn8UQY_!>mG;P+3&lo78W%CKf1LXrYOgu!97p6Bbgqwa2xJ#WfDHv%t_ zh{&ffTk~qw&f2*wV>Li1COJOIU@2)USy>Jf@EtE|by=i7i8)V^D$4ijqB{xkM2C6)IQGp zzx*K#KUv*<>pdCz@b~gCrUI^)&E!sVPZfa0UWKKU}WcI@i zGlfq8kBx4-ieT#Kl51sX$-u9!C22M1GVv%RM8heP*8-K=Ls3leK)Xt6udX7h9DyMm znVYonv}lst(?ZOouMmNv_Xi)&^pD6Ggny}05Psp=y=PuEE#1gI5aN^^MV|L015?g8 z#w$ND=1r#YYQ2{!CLg#&CL2M2u7c=H!hR7bj&8AY8JtUZEQ%=-+tXOIsE`D8DHZlp zoP?ZE6;O4kUV1r}@1G@X5P~xz0dkrPaE#3(I=5_HqSKS6BMk+(OUtTj%j7qUYyFJS z_qx`FVhOi_-!z5k@>6R?8WYE*F9MFf-MA9T8U?t_$Atqq{n-RNUw1@O)Vn1G^6nvY z<3((X4#@<67xseiW^T^+A0|6l-ISvH86nCof%<3wr_fR95l75RS=nWrtD@Qu{56$1 zrgkwE4@eY^nTJefB&cV8oG)Id?R>p?UPg{N*1roV}LUpiSWbOfuQ|K%ZdD% zDT!-kg)LUzfx(jOuR0Hg~c$SCG9 z3UUj|LhW;St*i0%`iy)*E6<>C!_h{AzBnVG%_cw5#$^@;-?PG76%=qgVyoBS|qr zKLp^Q5_WbaUt)LcGg=a!U{6jUf={)|F&g^lz1h1WTLgDM|54}o>6ADoe6_vczMjCO z|E10m2N;|D&l)FMNlR|wOLwP)M$3f)4eh&vygDsyhj`Zx3i1RdaHa_2QryYCc>eK0fbY`UsUA2lUw@s7!Mc zruH}@fFqBY*aYT7>?3=jkpNhw5t$Ha zbH~ZLp%hF~*|2EfhF*e>`9}yY`KT1e3B++RVq--T-k?am5OP-mWb)F- zoLQ~nGq@z+1N@9kk^)_JjkzAQUI~O#2A>H%e?qttCY2tNc*~@~Ae<~5`L;%%x>%!B zC7(s@16IcqPUbx>iV%ZQLXAQI$3~?|!4Z8#Wvxfl2QBGRMbW>EOlig1i3}!qjv(ol zfcd3;RPE3;3VMLS2}N(_z0NxiHQPO+dZvV`$O+QD-Ukvbf#BqM9~F#oRqyZBd)CkS zN5rokeff2!Apif^)&K28QKc$lhpYnsv2{so^D)mv5V*eubHUr<{~foQPmpoWc*9PZb9n~7+$IaP>ILsyeFsz4BS zc9qR~u059@9__D_nYo|OA7FlyPs|~-R<`_PFUYI;UoOtfzm)}jf#+6=ja)aGDfx<5 zkVbT&$rAwls5K)DVfpM22I}FjD32laSm6p2LpRK@tl44pwF+;hRp}3cu=OoGWjkVQM z=3-7?=g`EJjgUJ#ofHR#vYd)g+(>-?c!`Ndr)=2!=Tr=|8lx*|l!!{RU7sO;9g9vwVRP8lnzGzsT_-B{|Jc-5Vj>=*HrYTJb`GNYpsV8;lo zs};_&>lMz`PmjTqC|Mry%oAUO5aBv`4>cos^!Z!faAZ)+g1S8&jLmRkDkjreB2zf1 zgPhN3B*g5k;7fy8)RjbjNS&=dut_ z>L&GYD=1xna30p0gDecOFYiIyN#9ZH|3rA842Ez^!L2uxZ3=C&jkLMdbV+%SrDxEl z*`Ziv{`5w8*7%DF;pWe#*6c5Xsn2{{VQe~m4!Y~)12qFF7@mHztuPsFjFuEJxGFZJ zKO(mb!dLi&oF9h70@JCKt6_L;g@)UvkTy zf-}BIR)38~vC~ESV#w-g|3`}ajsW||B|`LH9mhh~J$nZ9R5c3`+ySa{*5(3uYctad z5=uRpbGkkk#&%J;46tnt=)=$t`#{grcHpUY=xTPzY=9f6n0>aWUDqvQ-#9fG%$+`?6mIbTNxALQKP-%)h;A2QzrdANgm2#@{>yXqUwXTWCXRNtHje*qwMMK2 zu(q=@`R{-JyJ=dbZfS?KjPg=ZE8fJENJZlp&MbInJA%pkuyq5T=x%=+X4hlVY?UgN`ufxQCv~L>yVE1bK@95V=Q3A8(1sHqmWPOo75M47QS%Oos z+|Ue^%KOAqQuGEYg>WLXNy3i#m}b{O=ClsbYWou0B)dqh#OOYAp`}9y4lKOEa*@f? zcG@?)4cNJf<_}RpW;gB-(N91lXe5o`vouc$aC)fkYV0|Q*WBIXH!NVQ%sJxdB^s2- zFo_064)P3>yXGbJ$ULlOzGQ*RXjo2w2$JM>ML-06vfNiGYB|T%|8V8+Bu1*(nKalwPMXv z5|$RJfw@F5oh{v~S~K@Eg;~_r-y?QdCiH`|ops9ZJq-~Z;MKwjKEFA5_1!Rs!&RJOrIe$4 z=*Bt*ja~PEy_Z371|?c~cHtHnImUp@iO4afgd!Li=}mjs`Au+`?j|Bi?It4%%kZAlhViEs zkXVcs`ekvr5B_DWJx>$?Z-lwFq~&?8cNK=|`g1vPf6z{xh zfd%=Dj__(Iyu61%<_7c2lJ2@ctm>9xTCX4#+O42!mg_XFTI;rbcbL{{y9O4^{X!S8 zoHN0kFX~YEA%6xbWh<-Zt!j}vzIhT4GI+8Pfv*a1m=lz|08B8v*<#` zE#SkRo8CWz@?Q6*ZnnI~MLI8KSHw+w8K6FbuP&{htd50L=n>MPj&tm^hH~Ka$wIs4 z(I@de_x~{Vm0@+J%d!axzHo;?aCawIaCdiicXwF0ySr;}cbDMq65KuLU2|skxo74% zd*Az$^^@+eySm=)Dyn^#fdXIDTS@Ic>tnCEA^$AjIjmbbrl@YHG#g#3*KZznU*LY- z0ii!bAEms9397Gm_}vj50v<)C_}W7zlmgFlepGjk+>o-~w)kh+SxVlu1n^8p@Aj;k zdXIT|IhuE!v|0)1MR9$E0b2{UoSN5>Z6BLL%5e8p?`g-4+;Rn`AXGWXXgvtlCxs z;St)EjTZzR_X7d>s#7u!K87~GS@7C<>fA`3ThzXV-e!Qo^cHhO@BR?cOdkBJMGw+U zPY`yTTSC3P0n<&KG34-e)GR^jVR0!QSs6^ifnZA^d&8=Vb&fdW>IrIZDTq&Ls>A#?Aw1rWa&^@Gu-xx!Qs87%ygUx&7khd&x8JiibFXNXe_WI z$t+bu;)}wn6w%H7y#rx&I1t8+khUFONdeUnPva&jRe}{o4Lh~ARm-{iWBGAYgx(2M zvj?j6;)yCRPM>yq!TkOaUt)W3C%-7ek4k;cI}o{J_4qAMn>Xk&KE!RHnpk6asGp9G z%+5;Y(&z8P9NfqP)*sf+(jc0G`u`hr{D~_f<)kFQ_%OYNC!8i`PWwh_`9ihR!r2PL z@jpw*DF<8cezT%_9vHW-;iYO74EFxcOXm+cRK@=W~7&yXC**ERK0 zYQ&(|Ik(bn^5;HPk@X*uO$T?7`3&EMDN`ub2ltkIcxdB}!>jfMCLKT#a5TJ>7Vovz z*oiajJbm)A-!*Jn4X)0T|FhHz)Yl0i%L@`^d$83O#CVB0IE^zq@E;;x6wnn5n^IEM zhjUqBhadoPM*J#==z8A&koW%_^JDG;6rpBNwEm{|@YkdH|0I%#|K}fn+a!#X>G{K! z!ArqnXF+|{{c`6=h;{^|jfW;Vlz2}EECc7#0<&7JMq$@}55i@Be}!U6&H%@FyW5Rx zyW1b}vJJ}Z@9h7`m(ty894f$nvB0V0!N!z&w;wvy;T$Dk?4)ET)XdwKdW@)CMqV*G zk$~}yZn?n1c^kITTz=njd5%XhKim80x4Nk()EFb>T&B-RXL7>S$SWfz-Xt{&MEX#8 zi}wPuSMM*@=;?jN1*tH!?L(EuO;yVB_53uhaRdF(kK4CCU5L_+zwp1jZxcZUqNnAt z>OnH*q2G79gCQNz=drpkp^42oPoa(c!<3{HC3K$_w8FFjk_!9V=EVQ@u;WtZ-_cx_d=O=-H<-M zyrWmdMZ;5I-!fYsZY?)mes&z}A5>rPevs7t@hO<$kWPRCU_cxySJKJ6;5UXT6Spl6 zhSIrj4q3I`8i+%&)=vZN2Hiv=$>>P4ass|;cg!;Byx%Ns5y0nds@6-9vgQ;thRAj% zF2oMNnsBBpv!Io!C{xcFV=8d84g&E;R4Xtp89?=YJQa)}4YrQFODK|Tj|u!Esi7mU zF9P9ODDV$}Rr`5=txBvSat!YC(6Sn?-o)~DIpo#hcn>lFoKhRL#m`A_wWq5%TT_@k zPo^^^g;joIC))vrj54~#pcNR47>M+gg+IC&t@ULDJ~QHN zJMVfaFS};PUqMBam@;PRW^M1B!&A=%&E^nV%yIsh_};64HCgk$$-FCxp}?!GY@{;m zslJ}l(fsFWZ>1RgHG|7f+kxG-jM1f;6AtMJ1RhH=sMtr@cxjE0x}OD_NQC zC9GNs({Hm^eWoyJDb9LVS$I31&GJH-DvZ*%joTR2;~M>Pf3vr6p4>Z$DF-foZmmHf zaAgfO7*oJ1cI!uSZ#8Ru^&_A_H+Tz-?S3JEwOlteIwbi`q<{N~6f(H6oAA*WS|?19 zo|o$yZkjG++SwHL3RBz5FG1{4U8f;*4_zMhl>(%?C}y@$YuO#9kNPpknj{;Zuzf`Gq&5UY{Xo`WhA)pj@kn zW(W9tp1SIe$`|l^1j18o~dtz%vK zvSoqSNq~>P4A0Y!#Me2B**tg|p5H#~yPI2%6$jS*FW;$CM_L|0ez+W6H+GY6*Fy-; zAF8^gw*XZGcG!#HZYn(<(8I1eUj}zmx(obE4~go%?bf;>p(S^Xk=@3j7^xEWKer_I zyRD%Y#C%6`K^>7!D$) zsW02oR6C#e@|FF7UTp}V03MC|7;sfB#=-<-kkJ6M+*W_VPOLyYBuBypvv=HNnX#kw zH4U^(Bm|U6*}*k&r(|8QofxXmmTyQ8*rsv$bQ9);3PYmcO7>a7V$I<03G12~ZJ2)Z z`x#K%#Kco8JW^8$38=uI**T-n;FM3_4#dOUe~pu~5l|feM8@_uB&M-5FQjZWX=UnG zt=xob9oyo&*o+fn)nFB_ffnAIv=9z6NRGO0@kE+k7t}SkJw<1OlZ`_Oag`f}`lOiA z_d(!2cZN1q!^q3%r|D<@mWmxEP1&4sRdj9X*N!j}KD3IG5?!Sp5z2FYsh-xb=&R5$ z89SK)6ICTWs9J(3QfhF`0a8JOj&@xD8MyTX!y1j1ok6-g5a#Z}BcbCb9JmrNe8_^8 z(n$DLgIR$(jKxXNOmq^Od=UfQ$O5RyIqAzBM+N(Idfkz##;sFiOo6v$1JRf-_1t+C zldvbXYIitdxS(MTsdL|1;-~qBPu`5$op=+hn>Ziz^Uq+R_gJl~*UurAP_MbehVHk} z39r@DWDE7q*jIF2{EFT$LH(U>R~CDAA;0M^3`A#|auHO_QleuQPjD}gg+NO%aW$(w z41$<+v5RB%GWgeDkR@KFlQ5av=Y?;W`h&q$VRK&+sDE{eTw~3hqV(H8*ER&BAQuJN z<^){~*tC2kSQB9DLp~!$zMpzGjGCq(eDh!*s0rSH%Gyq};grh|i8%b+N*{{1Rxmz} zc+8>Mmq{OI2`dKqu8!k_G3uI5m~=R3h-pF`Q3`v?;f~ra5J#m~SS3FwR1x7xlgEqx zj`DY?nnmkUn+>Wb37~fe-2aBEhI)>AbRbsD$t~%VE{IFk&QJsF-#n&|vi)vI7=oU}J9#1C zXhH0>Absnd+OPrkdxB~`v3)ASDs$BZ*%@1!s1w!c%O9$|)3ySA#n{R$o<-XZa5|=s zj8U_X;#-bz7Pa)u2}Z2oZ$)7mF#HX*V1?fYa*#JD7fY+`aT~SDwp%d?ootXejg+V# zBK+SRb8E0)7^&7KRmO_^Uxmu`X<1npB@fa%jc4PimEqR4r*S0~{1|qsGz2Bk7M+Hc zeG#} z#g*gWB;TdEG=w>UFFWG=usjeSY;7q`HZQWPJ@~D3ZHO~-@7Of zhSN_<@$(-0daFRHjjHpqV+x|lWT_*V(wv~@ggAx?DVi@=spXM#Uq!JJNG%u^U1B7} z-XAM9)sU_L?bfLswIsEXC4pd!k8n;4v=fyW1xeF6^uY_RP0mG%!1>!p47i?+l}}

    ^#U$=Zl0CNylkLpQ`tvj_<_%@LX}_cgQljda8xdA* zX*k&q*q<7?!1C2swOdDGX3B$tBDfM$w98Y`c(Rvo>!fZib-)L7!y}4eCs*vV!EN2N4eCOpY{lEzl)OlVg%!+6u8yu_c`v5HzdY?gW8;;mfoUv+RnbbfPSR9 zcK3oG{m?SD@hY<9Vt^59=mYzyw=n2+gii-S0YkitPXhkcx3tk>W;2SrUt%t<50xh3 zQMh5%jk$(*H-l^ILeM`m`PGP7wa8N>l;L!Mec$LFl@wDemSMf`OXmPrXYrX?#Y2GV z=FLf4uKwmG`)45|IDX9%SDzoj6JryQeAb-?tO8u`d*Jp=_dfOxm*5Ue(stuyc83Jo zNV9^vVfFM!>Gw71mOAWct^?}qryk%Ba4sg#ArCdQjH@q1y_?*c#*G+PA0(D~&|bpI z9|NA_e#`M{WU5?$CR0NhDDMH&Kf?y;c8v1kX&D_dzK0sw7_@sB*ha}50;=Q45{TLp zIMb}Q=Zi1!_pyfVZv~ijosx+7$2g)mCVk%ip%SkL-XYWt8tZI8v&onLTiFaP=>%*n zovf@CY>f>5b^aEq@Tc4HrxRKY6yiJHuMfyO#| zPLiYlvOC5n?feb$*)DFtwh|>vUG_L{c^L0tssk+tbbP7_zx(o~;?WhTgfPk7u50^H z7`N}Mrkv)UEYVIJm0SYn~hDcYv^8;<>*ArGkjWO{AOrn@?)>Ggj`zw_%)vXtl((d$QROqEb)&0$oc%}Vym7QnrrSW)q4>swMl(B(7X#-DnF zUfwy3CI+y%tL$?Clc3xw&>~H_rlKR8JZnK|q$_YhL#* zDVN-(VpVofjqd>gaZQP?SPx@>{cz+}J3U&vAym~NB)@~xn4zpf5yfdi53UG103{Wy z>1>z15GK7X>8zhFF7pbOsb`81Ny^J+5>f57PFI%#2!BLwq?wd2C?$Nukub6#oOdW~Cxwi1D z_4u|8JahCxNn+1ln9B_7S6(U&w3VlI5^Jt$c7&AnEn<}{Q7)}1gCNngh6T#_v0Hqb zQMzxk_4CG#62w^Ge7V{Ha+8qom!q`ACb!a@S?x#z;GpO?T$1@zKAAMUyk*N7=LMQ) z+$LPU6Ix0g6BDWOwO^}4B!uibI@vdY7N$Y9%F7G=t+a{Ol^twEbt84g=D=UF_dr`% zW$7L&G%C?DQ{lESc*nBCH~}_gw2D&IARi z80lO=?>Iu(QxXQg*FC@~GKUeeA#cEkEGL8jRFmdtf-teQ<|YVigUy~!ZGWsg-8pf7 zG~=Z)EP_q)!LmK&&X8G@W(`0aaSpKM0(?Y z*oxp^qKsD2IfoUUR%NTQW=yLXs9fKPod9yN&+xfq=IF3wWQb7KA?A`4{Mp$!O1Oz+&Rit3Z{JzDuUt zY!Z@y87EZW#-$r7G$YxGRn=YTs^!x;@OoEz3Mz=Lt5w~orak(N_M*V|ow-}q5}JQU z*bG*}urqF`mq{KRkZ4~RzB(4Vrp=XhQ3@?7&SeK}J z>@-LY<9^$0h}(-RMtAKD3n^>rtyh}uwTJ#huI{SXYfwQind#7q0V(V(c-D`iH>k@| zy33pxlJOmjEUZt~Y}eJ!P;qDkLh48l^L-M>Fr~$w5*TD*H$iWY1>bjzwu%K256xpa z?`+{g_po^e(n`G%?dJ3A4Bs1#MKr;06!VDUY4z)!xyD+;n0c}#`auDIk4iE#R0~+^ zmkpEcrdcau&(w>Mr|wY`v%-Hz^U9{n;gXOOS8ESvOxll#w*iY^*WQq|d97dBjP>Bn z;1zhEgcYTeCnn46IlnV7fSILejchCURU!`b0Xe75_XO16Q*%R#6_FSFp0)~fg~js6 zE~O4gc0T^&xScX7DpnhmeTDxq`xyV?8WsPvAOI%OYLySalD6_sqX(kx#FG$%p{<}w zni5=v0#?O14cqF@2fDlDbwrt`U3?O*POT}R@q3oX&s)3?GHe*1pC=!(KWykq1p|Lm zU#m`C>VKefDxj~<&B+~(QA>^juwo9vG$_2Krn9=J8*^V4%u=F7W6o%lQD8z?%pg7M z9SFL@dF#g;H5XA^f3=elc3(_^Uzsr(R{A31s=@l;#doY&Il+CgX31*hAhE1BRNB@% zT3wZh$$S#uG0DlW6$yb^OT#D1C7T5ll4tu>S_4-41*o69G;lWXXgqmw;GzFnA(y+S$c4A;@J!Yfs}gLuD#4$e`>wr*siq(GQvFvND@b#Mj10js!e>= z0CQ>oqfJH|DEpkzX+K$47A-0rs_k?{4CA(ss)y>2$U9SO{1%+gf?6pn)uRahXvoq& zs)tpLRaDK+4VC%bS@8EbvURU6L74$_;w792IPhz0)iqcj%y#8e@cXl)F?L(R4$AD$ ztXn2KcHA|xTvpG~5vLvgk(u?GM0~{c932OD90xrAs)`V+fqsQx;;O8dd0cVj0@IFy3_Z|FWT-f!UAzC-#lhN&S zq(#QY=bx5Ki5~^*7H8U=ezSP=Fv(Q=6_3j))M1tHUaeg>gX7y{p28aFUl;C1><%yUfhYJQg_L= zJ!);NAhd#14HB0e7Mgd@kjP3qXr{z3yBAtgvif8pv?oV@*&ITqYLe2iRgdJBdm6e@ zO7xP@W4|_JPKBhJ1yhNC?Nul+7V0m1SK*is%?(g^cMAaAES`ty{T9|_iqHw_qj!T#>`iD0p3BvyJlDl3BBg7_Qp0g=O zlh;6FoTB-csPX6NQx2eyHoto^zS;=hcn=q;F+*}rdb5l=u(jY3p(dsR9gnmFBk*o#jryHI( z6P}!PZ4a*p$<-gS^xHllSP-1+ad*FdK)`${Q=Gx0r@mnR`3N4ZP}OQThF#j4c+C)? zGK#>KrayzjbqYbj$4n9h~&os-Wp^4}8AABt>=;LK#M zI&z#+sdR`%YE4|Z}6h~HmiJ4oYvUYpE&Pcc?YXS{C_TIULkfcb}M&6n?J~Q{% zy4Q0`amNv@K2(-6E?}0z%@DlZ%7MY;45(z;Bz;eb^YoT7^DKQc#JIo)s+rRBKI7++ zx_Gub=XIpMd9@)^}3$}QxV=vUTi6+W4@NIIWtHE1}h`Eths=@Xw6YpE}B)z zuh(Hlfj<>n|IRf??zsHNmK#!Dt#>~t@Xr4!@c$}2@tf8lw}e4n8v-?PT?3!dB}~9V zC|ZG#ep{Xb2|2hDWoEr{n~=77VV1m!!i2G<7_(RPHLej0GTRP&M0t{dEU z&r^0CAI~>1-LI?Q?5YoTUvcp8TwRB^@I*&5XIcLNN8Sqj?26GhA z7z|8&pZzv(c7XdLCo6!31glkwYw&r`XZRbmqfEgA)8Lzjy;*Qx^yu#6UO$^`n#4=lYE?i#s z@bbFqUhaKc>)_gdX*fz3C^bGaKfVfZTA?~LpE?9T;@aWyx`#dmsj}Kj@m#|3aEJE+ zP!U!A!!??vc+dicuI+zT&HLUUF{${-b!v#y--;xVq}NHhvs3O=c~$RZO{nJ|NF7@! zK2QU#{NBnrt8CWMhTft7{@EW2Ww`Z%qHp`3qR;$S+fmT^LkWa?wyJI2vEgf@nN{UI zQDG!M2EpT0g8v|S5K9p0tx{1sL27MkX&p}PySNvUCrAkd_VatVt4o~lv@9ci-NTy0 zMXHC1ao79P6Wn+3wm-sug$N42o9p-%o)}4;0r1!BVVuvFUau0FrLl0Cbc5&5tvN>x z`f(xoP~k2Y0?BY4;ytwp$MJXvYs08uX|j`15Z56mo}%9S2iFnVx|VWMvo776CjG=c z-;QioDsObT^B@-RP_g0j%=C!M$Imr8*hkuvd@bLfIv1APu7fukr9WfaUtAcQI>u}y za#dktntfd0CgOvJYDRgGl3=WT712~xEqb{8SY-`1ScH+*YNLOpz8}o;Kz4#3O(LT; z5Gq~z&_sB0+!$Q>!DY*R3BD!(S-|c^jL~vU9ng0!$jw9@Ijg7IMUr{UV^*J~ve8k> zc5FhYUTgyJK-qv}Wsr(y{HnlQsxp<2e-dMRZkNsV3U)to&BQ^9_xLe;VFw=!{Ihb; zfYusO7ECXpUCU3u0pA-eg|F{&zs;2VKk_QJ*cc>1Ayg15pSqwSSk7Tr)-f-ZoSOj# zxnAX9t8A8yhTh@+9{c=XgyUi$%%K3n9RHT(V)+;3_><)VQ5{wDFkplsRi2`-E1g^6 zB*eZ-X6EU#{5$bjjq%RI&c>PRe_##_WU1SA9lf3Ku3lFcjc`8V@%^7`)f^8$mzQ7f zPi8SboRF{9)T9g`C(&?y=?jZQMW;cfM#W;pksq;**DI6d-MMgG-YrwG4g#e>p=)={ z#%k%7mSWUv+%{ho6ovhmO^AO3JgsY?q3*)Qi+ zstZ%Piz{8vb1{!VBN0?c@QSJ`iGv0nG+Tl?11jvm$5VPTa)jNU z8QDlZ_1b(_Osujxw|OrdAC~&c2D0WdOXQ`9at|K1Jld$ja!w!!bx69B0rG>?E1So< zx#F-x^)NjK1v-6p6prq~-5%pFyZoIcLx?Ss<~C2TLRV?qto~73M4PkbZB5mI2IS zbcq3fOGFRWc#{0YBBkrqOKCIY?0a;sQ>(US*=R3eE%VTLp5O*1Qfo|9u!uhv|xs%Xa3gNCSfu~e_juEU=JziE2=;*gv zBcTyedU|}Y~_yy^+$j|_#ffRQ>Zmt+%QG-6aRA>;qE`h0r9=%GWr zu+~R~Bl*PSG0FXq)#74&zdA_=vg#R-hQ?zRSQ}LK<@|Wg_~=wxxN4%hDkc5f1COfe z*=|U!^ts`fAz+c7yQd~Ylx8J=JG4g2J8!ryE+wJdz}hH^OnMRy^hVsVpuPm$Q6q(_ zBKu^0=C*cH+N?KcTEb~USjkn+@cD>ye!q_&gbJ~GJMjt5Bj9SJE`zwxO5=r@6kWLaHVibWm3k=J=g@|q-gtkAkp?9*z)#%Qp6msn~c*t={3i6-ak&$b4r5m z+Xp{VbfMzlI_&hCw`7ZkZw(d!ARM&gS{lS=8S$W-%2(%@@GUs>whZ~)J~Ay0`Rj}_ zTy?ty)+Y@ikrPm&dTRD4?UdqIASR!jqG4Y0=z(m6D0yKLO~+*vtx*}u1BaBc*upQq zS)<9})N2>D@Hx@xb5%|AI;|tXI#0m<^Ay_Ufn;U?6nm%t)Oy(dihca_AHW0X^C$4I zG_nK?{sBCGfe{mbBmL2cUoa(Ru}pI|(3+g;{mL|ew+Li9(TXRk-^y4D( zp~^h$NJV`z$YIpN-}Xls*lMTpbK>ueMiJW;#Z%ltohL>!^l2^Sre;;Dw^jPu4+K1# zubT(A25jQdzp8HVo-jRf^WkTi?dk>W`EjAykv8a;oGXef*-2wZz1B10p{F#SQe^!C zF1UR=YwzG?otxsAS6L-n& z#_wJ8EQ?L&wY%@1|K5GnJ~adj{zb+7+wQ~uukQ0F4|u-NP@zl;h2j>QE6|v$2#yj@ z@bycYWvM3?!PRbI+mU#yb7Q96`v^jsDSg`GrxzJynN>Apw&Aq3EQd+2bB9R>cWWP= zmq&12gh?fG*AiU`A&5jSLq(;zn#!$M{~>^yL^F&d@P+DGG9~w{1)x6b6trpUiWuUn zH+E>h`StiS8V>?D@05bAtn~V~*m|q6U)$XnfV(5-GaQqbPm8$c>h7GkR*Pj{5#!r! z+MHdxYgDly6=g6BfQz{bFu=30e7+0C$Dx6fyt0%qmLZTaw)Oapj ztl|y;SoVp^cQ*VS2QwG zO%W7|)Cp}hy|0A=9;nOV5Z@WAwgA79w2H!L(Syuake1V#$8^!vY84T~#$y)?+<>Q= zR?lB5?om=*4u-u0Zb^WXvl@{fePa5+t>aZBS#>{#e?LXD3qCBtuuWbdROin6)UpEF zYEq!&fT@*~%qQ_hl8Rer98j+LHhPALTsd2H7kM%4^%N}>swVyek3&j$5c_;!oN-}f z3|XYN!|(ZrmoK9lboy;4pFt|U+v)ej{A`(P@_O~S>wgv>7WF*&|7Z4b{44vUY-YbB zdZn{FqpSfFSQEUbQ03DKWIxeBCd_=INkmT7mRH(oc2bU_8iKcgjD7F_NGyva`trdW zRo^mLk)Cn3b)oD1zp{_ce(F-RPZ^#%ggS%-kg_A|Lmpc~l{WCzrI9P&%9*U9}!3ztM$W|6l>O^qH-ZDDaivsg__H2cX z?h(?ud95x|nH+br3Y2}V7p~HOW#1%bf};JublK*_@83RcV%iF~mD`E-sQ0$$u((ZO@W%J&b88OavEz05)l$ja2; z%r~V2i-vI~?O%PTY=oAsR8%p{F^zae98XC0;ZRc*IKX;m`8*m&J zYldn#l+>`$N#*2rRq|-l+9>^xJe=ns#5%B)t+zx%Y2$=3B3eo01)Y=HQTJm=CBSn< z{fyzR6%1h;&LnD9WC}L42w&kwlQDT@E)MnmkA`d1)(E;%EW5$b{als%wes?{ySdfF zO(T~e85^o)d7?|^$cJGVBC|dKnZ=yPP&Imv>6?hi>!sTk*gA=Bj(C&w$K}KBO7aaA zE4IDIEA%ss&2~ha*)H)L5#&hAb>lahOsP>w!*TcK4iDnFuq%@iPIdlX`fNc8XCmjywKG>UGvMt>#YUw*hn>t~G;E#gSkA%u-W8g#03x@h8};*{8{h`Xzf zsR{FE*6Xyww>=nNUp|>fL=T!qAjFYxAba~(zu_44*R2*R(g(7(IaFwUm$Qh~ ztFX3RSct8b=QfM#{B!HB6@T)(p5N-dyu0^?=mE9C4|8-2ejN4a;;5&$0h0V^?s~6d z*HY%BLQkdYO~i*lEsPsLkLwP)K$o$S#Tq)eoz7xK*n|b_w|V*`%)Q5PNkSPv^@hQ2 zQT=b+5Zp|+OMo+>U<;S3>$FYb0zy;#bJa$FJ9)CZRelG@F+H$J{!%=V`Q}qhLukGM>`;fZZ2ofB&$NM5` zU#C@>c!1-gVyj!i*-x1`u+YOpdkZqhXG*6-cwOR2%8M9Koi=&FTw@Xl4Y634a?fu+m}~C! zv!feeQjI?_YKaR+jnKtnhm2@#2L*S;;BjMF#&+Q+Ko9WKeXo9q{?)){!Q1f^r{Y8B z^r1a(MB4zj@PuT&t0#_ zB5d7V=3G}^zn3aMpNXO-cHU$%jC=BPvNX~hTOGs1-MqT5EzQl7>sYT@EBnQ%Sx!gp zraZRa;L|^B^Ybd>CxpODns%XLqO{9BZmZ0a?LFwo<4%ksW)GZX(e^nwxhjLkeO=i) zWCZTG4EwBVvqpz=O?!{dAa%^NdBlVucN<)L)O8@{u{sUtX2`NOX<k+IeZA#q|KbZ zwq?;*JkkysDcdfjMS=9j^ss46qvuH1h4uzC_9t8&e>f+q#xzN zg|`@=>G18|hCjT+3!TLUwaxi09{nb_4?_EV_-XHUXN`luSRiW5wYzdRcZ+GP}sJFPiwuCGK6_MYB&j-OjF zf~jVfS0+bZXNySdIZ#sPNtA%b7K!b1z&-|+AR9{8Kjz|#iWiMGP`=KCWF08}U4xJ} zayGJeFf#n3N&NRVQo@NeXh+p-?yFH@)Py*T^e*A7(q7}JLY!Zq zk#MOcbuW)F$=Fr=0Xi3MJW8%S3)9B@)?rM}-Irs%+Nm(50+8cCGK|8#6p%Jt zrc7zHP<_T}RjCcfZnYKYVS<}YF*xk9Mfstt-3OLs;p)c$7tU`Oq>8MMBG=}%QO(J?O!Az-3F>#VtWDSw;XA~;X7T5Q3s)L8EJy^m_z zRyqVu&hrwPZG4K7mhqPdFjalm+AKf?^IKY_VF-2-8_5F260uFFhvv`5rr-29(9DB= zd*Bp1EU-$R(R!4rf)JNf8-B00wh399Oey}`oe#%ajAmxF1_pLNQjJWKUJECc#-$IUXICP!Rb=z=52yEO* z!IZ@50czsZNHb-=D6f8x{jHFe$9^tPW1h|*0x!lQ%~B`KZn%eoS~CQ<3zYY{*^Vim zImOD{{P+0h_`(=e+3a`(Lf-zj>_=E(lRyJi5|BQ^^FNG8Kf$VNRD-JZ2f&99WdA>3 zz@L>nQCZ9ZM+o&zJzRt+QePtqWhm-LJoyiFgIHZw4JaFEC{##7J=rBjJhc9T#`ctwZlG56@pe zkp3Xe`Y>Sc*k%T7wrY&-Ek=5F7R zCMsMN!{ncmj07r+u{bc3{rjvK5A4y4R256Lmn)9JHi#UgHI&d4%9Sw(EkBd>in5mr zFQp<6yPCvCW_am&x3)I4=3rghi_y^6SOB!dD)Uk)fU~iz;KIq6TsQOn^Ej1xvYhUz~pG6zu3bCX)K13_Ov^X=s zU4EBinMLbMpn@SuF{5f|Oyw?eqDUP<98asQ6=vI97`rpHE_)+R*b`^4GfKtymU%3R zotd~RK}qtwGNmV1S9CwQjFrZ#QF#>RF1CBx<^o2>o)dVIlg&UafjMXZhT8f9$+Cbh zHCsMYw@RXiDq}YemYo@U3CK#M_eB9yguciZUAOT#czg>9tYaUduTgli+|6t$IZ?`N z?K2P=OOs2IG77R$*JE$}+WJANmy-0T$26VGGmB_~0r1te7huBK`l-PCSHMi3W}|j( zrhaWvl~UMT-rKM?-Np0N$4kJEVFK+~-7%GjKp_?2W=%(J*tyf0{t^`N(m8JRU1(u& zV-9v&6?%sB^eck}g!FV%TQ<)-UdERChdz8Y+6=)Es>vTB!=&@<&dnyi<-Yqo*RkZo z7$g<}F`CTqLpuoFjay*@Uwd&^fn;Ad16^RX5D3-^Kd|ZF;^f)ZL~k2p;}7!;h}~oD z?C~l?_ORnYR;Xo;3y!Jxs^Qp$aO+KT*v$UU5N3%!rzR)wLz~7DKy<)6r-td@9TI z$Q$Upq4!QdBQ&w1wmJP6$l12lY36@(?sv0d>l7vvAFU0ZzT*#fLZtdY>e&|Ry&8DB z8gS|m0>9hh71HX0@_LRaSd6caErxT?x@{{axeQ>xR@7_67Hia=z`DlaZzk`d8SnA9#AH=Z8JNe99j79I+STT6Z}Yfa;;iDgPkhL# zVUH|hvDK067SQS*e$+@+J`Cjewriw30tbxY9M6FV6_wqxB`zqr8i-t7-MgKW(D%QZ zY;Sq`+ZI@VPEH1-0`VG+^*pxYj1LEDVh>7nJljKdnFmcde6}w=-k5Q#q9Hh<`^QV6H`g%U zto>-Z(8pR|6XL)tOAhVu>X65>kZQ4JxXCMS9gIsj=*|A7$KBP8GfuEhs-9@F-@9&a zk69rX4v*A2Z+zhAg9yKOMKm{}N#w6lpp$pNquvsDy-BwYkj*b)KX##BQ9*}aS$yn5 zy3&Adc*JEpc>sIE-w}d#B7KYV-_sJNSP|NiGGMI|cQ%fK1{?2m5ejZ_SG|_Is7PKtjPF!- zmQn{PCA+6hAzB2wf1XGG&IMO4x^#kpWn=Y=IN6=+1%$c;jy}3Gu5$cH&%P=1!dg~{ z(bG<@H%+afDmFt>@a;x(+E3z8kCCOd%I!;_l;*vhz$=%FwKWcdk|02y9&a83Q#HnO zYWK*g$c1W1Ztv1`Ojn($K56zo(PHAYoV1hIkv&#&6->FB5xibf^E-Xkf_p;&jlR`b zf!Cq9tn}ksSe77RQR$T!&X0jL6Kp&3B-z<`_&gV{uh(J80ZN)>#M^ zwCOuBiJB;~Wc;Frk7Ljb@zeS7(B`C-Lun6Zu+Erqd-WT|XLUk~_RNXO&rf7Dai&Ec z@**0|XV*2gE*2qrMF9by`|Cy3W%@%;2p5AE>~8067!?y5j_tTRLiJ>H9eoE&R~TTb zt$F87JP5_3WR7NbSI#AMqLTpS3G5pb$Ve=WI#S19nnE^i^|%K*98p^R=pvQ`pDH8I z?|9dUd*hwQ);OYUIunwtU7XQv@Q07^xWz-Nn{?+4g#nWI3Gz4;tDMJQr^6U2_;-u+z00H?jk(w==WmerF|l6$Hn z4Ia6T?OSN~7ukqWzqU=agZ)Cp5#o6Dj4wu+s5Coym4Z>`_rl4|NESy7EJ?Xe(LFlo z_}wEUuOe9f@pQmeeMpz9#~@dp`JsoF6K0Ym!QtqiBhPj@B}5TX{Tny6bs>_oQ=7k`!@zP!iX~rPq;CzhF{(KK-KTiTs7d=W$`Ja>$e`B9_DAIej?WHc|;+AWIk@VY=$Lfl{0?+mdlJ z+KeYN%pNjkr1_CQf6DL+m{Ox`dReK|ynI3#OyOuVY}2SEx@exPK~d00#`KB(VN2R{ zk;?H2DZhziJI?Z?Ww1us!OTa*z6$=}Cc|4*MTyZ0Ak9wnfMfTup(p0o*eixLs$G<3 z9b*hz&g!9b!^Xh*QA>(kORT11V?`kKkG_#yE9&p=S~oTJ;PYK}E?bsP%vx*9!x!~< zUkgsq56YA6FwGhEl@huXv$4am)SaZar#;{rUF{lK40D{4$V>LAEOls%RRd0da80(z7xFZPTWk8f5TMCH z{h07#c{e#&1IjUDLV|2F%@@n!WPwn_uh~Z@S}Qf-=!+J>VLGyxfGBDlJ0ymXe)_vL zr6>2Sqwzxy9aR1b|#!2#P_ zS?kFcYh_bx@GV#ovu+_G_ zigx1d+Wib*Qnt~fU;D^LMMX3iOW})L0Sa2~VL$mQEfR1yKV?K(W^dmIb!&K&ZX|~s z2S;!=P}tk$*tUhYyBV+pg)aF%&?kDE?O@t4J9bYAT#$qyO5Z@f1Pdc_A6#~tSDyra zwc8Sbj^?=J>D`6r;&bi#)D?GG@JR+K%Xtf{7xeog3XiNW!nezEsIJxzU4wpIezwQB ztZ%}S`7YS%x89%RqEK#;GqkfmmD{if#XSuwT+Xp&`aclldDs$ho8%>A!SyeP*fYxk zlH)Us+fz&K_PyJ|ZCLVH*F&h7He3xQBG~X05X^RK#i`%Yso$u`k4cie*k*66TshAU z7S2du4rt4>U0@0iPGbSb=Cx%5H9ZMx#;dYG7uoZaT$%qv*gFST7H#{&v2EM7ZQHhO zc5K@=J9g5^jytyPj-7P$a^5+w?sv}j?z>g%k6mk5ty(qL9AnNg#}6EK)XfjfJ*d}B zrR~Uvwq=iqB@drUSrwcdRPff zZiWhkPR0EhxB=D^@!8nCDXu`vFNs>WYQNT_*uYEo z9jn6qM5_2?s&$m>CisgYrQ6mr@0T>Za%WyeQy=wRHVdtY6s0dYpWKmJ5kv%@2YUQ6 z$EvynhEWYg>Kh}EqM@-+KBl(;$tVd<2U(ZQ!yC|pW->-UFjqdR)PU7FRKKUWyFIWg z-0xqcZ2q}cZguT!_$@p$4dvmIOzmKZXjXq{i_cOoGo z7vJ}H(%#=tKf;SZIoFpTpr7c4pI36SMp5RsRS<(7LqQMi}Qg=n5-7-ycpiTV( zRT;!qQ$0$a22WS`DS8rEqG7qgW2wvYx{>F#TCKiToVo5DR)1Vq1w&8*8g}+3awuJ% z?zEwOwZXJzn*!ZBDPW$d&n5t?x9@uCJ$mBiEDo!Ww&k=P?J|s>)=kCpV>>#2FShKI zYJ7h!CrvVrC1ZN2Sl^pY;~kUD#cZ5AZN$v{p12V_zH|maI-(!>i{D00BWm&J7F%*& z{nt2UqU6H(hprhUJx_ae<_88_VoNup%lenpI=C`Oz&8};WAH=AW6NkUq5 zfI31=v(>n1+`eJXV3>C8glX^oFRBQ5?Uv(4k)l@c)aqjEa)DNtV38epLa#_3D2d?>5kkXAB5dkUc>|x{q=CdaEN`t9 z6(^y+#9Owd5m&9?lSUB6LUhJ?GQTW@ciN6%b)pd+rv7y&(v3LXRdX~y*35#z6ZjW! zJR&bveHF$sjbf$vK`_SqyD#vEJ@=fG*|hQZv!-1HScW#E2=EQMj=A7-`i_xcTKZM9 z0Up?8of`)Z(C5^&FT#KC$WX?@JU)FNxb6L$63IUfh*eFk%sBW zTbi1!>Vgm&|AwYjoQn5v-A0of5O85-T_h?*S}MZdOd$nu)>a|b^U|mipXk#bcFfhvr*gxQ?OnPyW(KPlefk} zh>Wi{G{Hm$uu_s#^rS}0{mFrxaKR=#;dMH2bfZYz>5p2&Du}I&aQYjPca6F8<|UcG zH_|Q87U1yt4w@~{!01@P+NeyBzK@c0wDs#XmS_fNJ*3-eQ`CwIE1Jr$Qsex@)lK*^ zutcz@W-rLYv%H#xa()jHoS9Dr|DNrv!~O$V;a8~BeHT>k^0S=e+_$Oh))NtKl|1eu z)_{15mb{hDvD!?tO}+*vhg?#)Q)N!k%HFn#{yJZ+(v5g~eY_ynt@y|*C4TTYdNmI`VPO@V?8sUaq&nfvQWp?AZI~I(f=p7x5vQ zCX%i>(qc?d5{O2MeSt14ADuJi2Sdjpb;$+TYH|~NT)$PcTMX{$hj`=C%O){CfNA)i zhx{l~Z(oX={kk3bV1FEEi1CdDeYite(sm$r+vokAx9c#3bG5Jp4{oa|-U^fU2(TJ9S@cz+$N|SnGJy9}g)IQivZg+j>yu=CH zP8@%C0>uzXX1=1=sBi(vd=)fLe~Mz-L(#isc1ft8h_d5^NVXlm>soU9rGDnb#)@RS z%H&bmf)z}c#nh9#LEZL0n$X3*?ttvYKPczlyolcaLs7&wnjaeNyN~(sKS#~~;nb+> z|1)YZL_|~XsCS4!(gV?e#p#HY1ZoTUQjH<}ouP}sMCYYdB@Au`1VdSgV&mig88@}y zGIP^xzOK*PuG3fBy|1t56P`c^TF?g=Ci~1Jq5x6jI2o)OKu4$~3xTP~2&&xg%6?U7 z=d9xjGCA2y<3u)I%g(OB{$#j2U`}7w_&(Df%Szp?14up#7g~JT&UoXp12}b({XAoP zS+5Z!9pV^aX`S>dGP(SzKic7KOZrz_5sjDjtmzF?n|*1&Pj!Aql864L9%RugCWta1 zr%K^&5cY%i)wQhDW~cj5cLp3I5UP7Q~=YSa=U3F^pgL5tCUuos7{d zURP2oq2pcL_h~LE-id-7Oon%58b;4mYR1M^_RbP9dkk{X_KhLS#36n|5mA{&p?;_tD zcwRu43X1SB(^Dw9X>?>(DWPALK$Uu(kwo*KftA^lmSk9PPj#2LHbivuzSI|dJ*oaP zrra0{Pq)5TBHsTwrv8P>&-nXRNiE2g*UB& zZYta8{&_(ws-XRoS&DmM@xolRYKhGU&w!D(t(OrKCOlVjLPy-YEJ@#)t~))G7{j505V}iFP0R@3e-H%&+Lir=qn* z5^>3UpK&$3mGZza+OfOHWxsPR)TybR)wLzHhf4+)5 z+2~|6?e`4z9IaQkmEYGt#6cXl9;em?2UegQ-^z(#nxJ$gB{{*b zSTB#*9xOD2hp)bpfSHlgeXrTB(2NaayJex~wM=lW^g;l8I3~C@T*mMq4t$f-UwoPK zHQ~A??#25G3!HdZ<+}E9JM$`_k`7`@iXqZ{8e0{{-0LeSJ6b_)OG_NlY+2)a_vg?2 zES5Z)GZOmrIv(QASjW0f-YY-qaYls{YdBcJ77^h(F*?55bFj)vXyAAVkqQ~%!I5Ha znTtRfrG7aKjr)X@g3o}S%9JGxsO%s~fhQopi&O*ooYMFlgNofk_mWAYNnM=|(s!{6 zU>yz=!HMzhM0ruYgYI1#{ehzUg)d`INc_U_kZ+_onvQ6eeDZ$jx!4D4*{|SFrHB8Z=XVj`)oy+V2haZ;9RDIMOI^+lL4GuyiYIeH2BQ5O1Dwu4SVOGkWTQXZ#{P3;x<0#o{3|$G_*bQ! z)t|O<%4l~|N89owE!VJzr4|Nra<6gjW{0CCc6qn5ulJf9Lmx<-j1M8>qJma>l`3`(~c4L$&D4Qxynzrg^HagjJ9TZsF zxu)f%I1+dZwvE3}`U#6!BkWDesxjs2dmMuzxD3}k3F`M~r?xBR5Q>kWqHXY3Uv(5C zXQg)`@sM7pvaGtfqYDEO&eA@y-c=Ey?!PB4iV`39sdd> z>?a~94FjIc3*i)VB{vD~4Ml|yJnBhvasL$bk;TZ0(MqlG>iET`|l+T_rbii&^KOS>|2hB z)q!9ArKGDTR@!0^RoeGOZ^ z@@-oe4Ba{N``!B!j81qKiwS<(Ik6~7$g z+r8QXqfR~n9t_XlCJ*P%I0y8zS&Vfnl7_n@HdQ0?sa~|xU|RiMj6Bv*ZcRQ&tSDQ1 zUA(=Ou7@-uIA5u=p(5cAwl$re+|W_FvzkgOqt%F3|GLYBu)^g&N#)B!%mID^py7!I0oNCPs~Fi0bAWF&Y6j7SQ}cSL&}z$N7~^`%V*Wg;w&NdQm6-)x-P+~4es^z@+-lvvuSo- zrrv?01f6s=|LMSfp?PJaN51)}2EnvM_qrhmgOSP9a6(u>u|S*L?+NAv%c1v5Md%GT zzcH96eBz7fHaDxLm4}P+4LJ{#kucUA~S!2!k5_ zvJaYeQy$!Y(-!K9))VtAmbim?>z`|U5W=bffP`V#6%CJ1o6j!t5A!p&a^a`EcHW_6 z4_Qw+^OxG<=v2M`tQv~HTLT~Lv-w=&5&n~iK?l|{D?qCt9h;+;sY| zo_-(-PcAXF#%9GE&dweJ=l-`x3OK4<_WcE{wdM7|&?>19`oT=6fT8)E+_sc|EaAfW=(NRr*E&7Po^O zC)(r-&WGk5=8?~SvNu$QH(1~|+=8Y6k?km$cA9H0fdSNSc<$za0-v<944^n6f2w8F zp~*_(5P>^ww*PZ@cKM~SL#sMTF$qn%Rxx$%hqu5YyohvBeOPB-mp+_tLwlE(CKqU} z^%<4vg1FS2Y@|e=$axxhyhjAXpHxt{c2h&HUcxh_r-L&lVjiYI{D)9A5) zk{;BrW+XaPR6=tJ@i}y}i!zB)jEJ?(s2^r>Cbxm@caL}%HWW4l`33Yvb-?WjkHUJg zI@r0>$@?vD`{A?k4Ltk;YKR)8fLo-bs&#JQ=G(oQ6+=#dtHpH!b0HKY11XjtGgXWI z7&M?V4VFm$&2|HYZUCX2=&+#CEB~w2AGmkv@&!b-TlnzTSsvcYEw4Ay{Pd*5O3Gee zEa-HzJU>4S3Mjma*Agrso)A{+lIre2gDPdHfmBS;!3@^ZgCvSG` zKK>o`b-ltR%j2Ll-FlHhwGB+}%=<_3qTUMnYOSHRah%?)!j_;2{Ikk@Pp(}@eNcIS zHH(>A#R$emSsn)$8@HX)Dw-CKe2Incmchj0+QzD^guRlwQqM_)W3Y04)>uo{+>7@zu~=3V*XUA;UOFL0q2 zFG4o0P3|gn;8uPvvpuQ)MRbbi!T#~Vy{nSj^$$fGO9Cz)+Sj7IB?0VuCph~ed_fHP zCL(;I(tM+}UP0cC;(y+csmLeyF$$3sNznGS*}}ZCb4BhucQe-VE@ z7CQ%V4tLp@z{50*$w%s8%PfzQYDOfx9wcRiC$~C}6;d|jChaURyuvIP)T&MVb|HKVXsv|gnv%G^50atI$?suG{1?kN8qfwYMUQ+ z{P&%~mlChu>Nl>y?i*|VU&62d*TcPs zz4di*Tl?0No@XysxCG-d$v($v-jhw&8J`T-*SlR?%4?7Y1-pzJZA@cBk3xJy=R#1! z)&mGgus#j3+@BlHq^LVa`#BzdHBjG8m}5N4X?u^&>0D$A3G7@WrV*pu0cnT|tj&R4 z1qe^(7mkqbwNrisKgb>P3*Xv51{yyIQ=VD90=;)EFN7fkjc$-a2)m@Nj7JEZi4g@E zM;LFgQhw1DI{Cc$EDvQp(qi7-hJwm_C68BN`RV{1qi=QJ6#&;+Kf(tH@w6fYF%A@8 znnNwK?sG#=@K-0Hy@P%Q7|(qE>EF2_NeQsLL4^KFy5=2opwKpaw z&^!=p6CWHOkkVztmPWEBREjtdl~F=>ayKZ~tA7E=l(vW8L=S4ydjX24v10V+{(i(@`2Eu5EATAPx z5yUdHSh{$L+0ZKfMpZzbR}|Sr%e`9jElJ6u*Jh;zeXVIiTn=AO@609kW3il^2#K#2 zfyRRAR>w3_aC^cR1zRnj+)<2kEaw@XkrbL?{br|%0yE0sL~tf()VXUz8Jg|VVu-hR ze23Pg$C{!Mgw7gylZ?5_+#ErdS&~9uN?%u7N>^(WHl;4y`Dhm;pANd5EyLA$g298rp}C1ufwU6x`(YXcdD8byDA{ z(eJscZaG;reO=~cG%bNE<|w!>nK! ziq`~3&bDmXk(4ICXBF6;xz;fmsfPk`z0w4}-1jZ8zXokt5cZKmR?KYzLu2_@X=C_C zKvrc9aDk;r3^Blql`xVUCQgqVMfgdy(U1w>tVq6{o3JriPK}+ALqOpeES#W2WU+G- z@L6~oRVAQ|rMb+M>CCW|^@wAXWp_u$rvi@JU?1S6e#()VbuzRPOsF zO-@nZEb)6h6;`V8N)D* zvC+n@k>tt~<34FB+1gavxdSlMWPu_3bkbgC%9K4IecB9o`F+XjC32t^6NZ)SmyqUj zL>l7&N)6d!WenRPo#Wb+VV_O896O7el;G8^iP2m#j#*Hmb9tJmxW%F)FAJWbwf)@5 zu(}+Z;+2$Dv5gYLvK*(g(4m%@ct1qWICYDHbQk)|bUphe14}`*3^Ri#Y?`g~wN>Q( zl*#gd()y>%Hy+(?Fq*nFoJFR)Tp?V4y z6J_Gn7bA2$9PP(0b5n=wS^3t@>DIm4g+fKGyp!}24(0Ue0TDrLSts3TmMuVtVQE7O z=2C^@De5zDz`vucd}v$eU4LBlqDl5g^9mK-ddAj66SOKHr+k$xFl zG@?4ST@pAfS^p_tKcTHv7+PPmEL9+g5?d8Q6CsU>N*_p*Aea;2v966{t74k;c~kSd zZH~;jo~|vD1G`|~C7}I&9UTeelf!j%U>?Z%5nB%>4C|%cre7&;M~%Yn_E){~Z2QrR^;X<5YaSk$5?Q_c+iZPs2y z4TBrqv3ikwQFnNu=Io)eer#)sSOS!w6h(Lo)cd++`@2%=rONUxCjDfL)$|Q@&+g z#NGiDZ+eDh<26u<=WHFUsFbUylB-w*qFBLw#F6KcqtGc|v3Uqcl2%~0Ty^^+Kh*7# z4WOT(+hk=$yOKEv&>x=uE=Rl4hB@ViIepEA^;Ay>(9e8pley}2)YQUShA!_m5st#t zImJ_l`9|&gW$nLnqF<$P({C;(3Bs(vELoIb4jT?yjpxL!`77Rk}{x2t>f?a z+c4`{axGQndF8IA+yG@oZ53xNEy}w>pNC=X7<@_9^s4YTk=!i2Rr=v%U2pa z--R?MEqp0Mn=vt~Bc%DsTqm6Eq<7LYY?rkyv$}DkFt{%<|Jz`SG=>(l*ch+xo%+f! z`}ng*`s&w2)~J44V^22x^W`RgU4C}>mNml0)WycFK@Vaun{thVl}|3<@Qb5i$CtM> z?eV)QFp49?pM*fMVsl8bL0x-JylwJVJ>Impe`|Giw+)%QV3@rEOikl zip9dbp+o$ZS^O3luDHw$D(SrQU14G$@A29bi#6-M%@E(DnuU*Lac71WHEe z4Ghjplgk1t^bIOHP65k{vze4>#@(2g@G!J;s0J3AZryT8_c!s- zoby3{I}6NU9PX|-86)&0Eb-ByG=?`~o(Q2Yv-G1FFrYhU;p-!Cp=aX;JEJcyTzQ2hg&+aPnf{ZEZ;d%AR0g}?(!h7@EUQ$-hJCNPc68< z6v87|0e;@A92ZeT`k#6h22c5|VG!uxWA(e@yXA27i7O3xK-Lc8X$*@%hBHm#c`#j{ ztT6)um66pc_;%u%AsvR~JvJwMJlAjiwr9p6ff;x28O>hxhX#jxdO{-{@0_R+Gj4C_ z2^dcHV8Y)xX8K6?)jfLFgI0Di2PFBXE(1#2bZXe~KZIX;PJn*#x_=Rf{2y zbl9dZEo(B?*{|+Cu`O$?I6Bcw^dCWWvhOLWrHTH*;l@&J8UxFD-=+~6habO;ro1@2 zQ2KL(5Zcq_dKBkkE(Z@rYi-IP(#AWBlDwoPCfvZ7uAXI#Y_kA-ayC(jXH(@?lO$AW z?x0_T7v-k$_haC9wiDAbfr}?q-#%BEw>j)F1JNC~z$N~-?*4&lC5npDZB>v{WdCty zUnXZ0tAytmGJ8K1ZK}dSsh=SehC2E&TPJmui>gu;^r#3;OgrC^k|Eb&j5vvmR9)(l zlW)JKb{)SS*m|+pX{vLKZHC&<`g=Pq<#YF+)%8_dvr77gvXt-`@PJCLo9dBu@_JV3 zs=k-15_Sq4DaI=;NlB)D-xL)t>-eyi3C$5h)K2WI?VQu_G8PIfgw|R(a9(W-=oFU@ z($-t3hRvfEqC>oxWt~~q# z?cq>1S1eMdUmsw7{_bNx9m}G2J35S(Ar_Y<%u7(BXsm(AL!4BJKHMKzfOXQ5)pqcS zx5SNG*@1a}p!QZFKx9y5+e+Aq+OhrcX{>fWnjg_=-)JrO02L2jAF-MFEy3yB^a+*P zX*JZ3-NMb`aB$Vipg7e8za-1o8i$skS9(fjiEE=#?{JfNJzRvV$%)i$8qHlP-g1EP zZBu&bgdc2gK!m1*Nu(tsvqGe03-}QRs!3GJHng6T8W|C^b`22V?vRKF^RljKGT!W+W?wEn;L{jqikXr6VZ1Q7B6@qIHGX+&? z`lA?%@h6hs-9{1<+epUE*AOqVBiUbC5Mq`Bbc#Q02?TJ0o9cG>$YtCOS(WY9iwRRg zFh#Eco^9UiF_ygEP^@o;1|L_%a+YMTFJDq{Dap`%77gk|xw!+a5gn~ruXy|+sHo~u z)B&a+GR=6f)#HUSjN{@GBZG-a{IW}+BY(ca6~YS(7LcLal|-U#z{QlKYTPpC!)Tnw zqEvY9TQxXs!4pZn1;`H`2X|+tp%Q}@^xA3O)+;@hL@J?vI(N)WIzZU0*=T8+vQ?Fc zq2~?suapsJVMf}_DJ{tm+NcxNaFj~$RCUNzXw}-g!WDP5aL~aH@HagK}FTftPF(0g|i( zrru)LKXslpmpP>1#% zhF~dGit=P)XtAwmEbtuFVF>u)6@f<~UC)D(#Ji?N_D&!$?gUBZ2UCD?`-bufRFHI| z?WX`Zakj+KA&HT8;?^VHgY8HGzbghq92kT`O%_nYcjUJvMXtlBNfM>364%LORM9;v z02e*!BioyJ!E?V-E%~0J^vUDeP+AkC(Z8hNG1;lbokxj6a89dH`htJXV_WDX-bDC8 zpY#I(EWe}xga?ZANGW?#fC3Qx3wJM3K*1bb!1N%a*ERAxIe;0IngXxM$tq8&vq(Jw zm+T$=QztlQyz_>$6j-N^V$X4p6*+)%@A?*zq>o__w>Grbj?@w0f&4}F?M5+!@(I3U zLa{A@mA5#>!o4o@7x@++`YRfe{2f>K-1kLb81om(V^W|j`8$xH7!|&=lL%IszJ(7T zcl1kAXg~xbk`N$4+e;efm6=4Uot{A~>8rAAdE;Atk4tP0#3$zp@*l?*%tUll&Ydg)ZaBxZ%cm_fhdj!@M5X!Vl#h1V z)0ug>Gqp0)mNP8JQv(gPIOsuwc&q|?w-I%fj7YO5s*!W^lT`(s%)i$TbwkZVJAYtJ zA{#7MWdu|$;?JRF&@*x;D{K^nrx4yGFk&klrO4`K>FR6TaMQmqaGwj-(Gh8(&3)OlODI{;Dyr+W4iXA!ghXLB@x$@r^FWGUrIExp(y*q#VWMFjvUEBfl^V&jc1*47P=sxJNO`4D!pb;dNr1a ztP#(ZPF8D`sbre;@kv}SGC!`bzIE#Ctw)Rd@=V;NBfn+SUePwU)Q~E+rq%7{uJ2BI zd-9~_tH%ySFS%Z7-TRSLpXHddIalN~U5TZ9o$Qvumu&7nb`A4NBE!3ex#SvkO<4}* zaR+4&TV&M{D%y$vX3TQbLuP(Ssn{e>0}p~cTu`~PdDzc<<*N=@B8_ns+8R@j z#al|*SGz}C4H3xXGA(*)5S*>`cVAJa%z#TiGU^E~>U&99Rb*-{A8CP*kuf|@?AKqv z?$|tGq~i{*v?Ne$H4MlIC9UabI0{`wR)u^&MZOl6JRb9|3Z9pVE)LoO$UxRE)NClo za9sYC*Axffm#W{X=eQWiAxLo75Xi8>9}{yKsM#!{m=}to<~C9>GvBi{%Uj!njxZj7 zw*dEyg%fvoZpy$41~E~-b43z1*&Ikw+#W|+lfwCVm3C^5IboyZ`$-F>X0cHj>{8gE zJ#+GOO;ryT*n@y8cBiOP6vla?XxFEa7{40+l2nS>i* z=mE>%0JIY1Jot@fV#zO<5plAJJ(J3Jlf8O4TKBC6Hlx5GdUdCMqV*6gxexH-T z$9mv%Ux?bOI)z$K)UQQEtLIfyKElHaSCIDR76SQ(g#Hq?t1@+R{;JEM+9m@ef(5A? zs|Ah(4v8!1Q|3g=U~%7!Ekq1P4$LS19K@G!XKRn1_Bt%rpXdyz3ru3p8OS~nQW#5CQNW=m z9%{xmd3O!=#ze%E(R#GU4sStXlqrnae4V&+`_8(TiN-mS{3sEY2$Lby&*zRqH)&@KY^q77{kXvcCoEg%DvWE zKJO_Z@+o1;>cqBXlQes^&r~s!$5Ud-bIi#)$b|mR@6cl@hSpzq3 z85H#;YXuaXqg&Mv1*SKhfou7))nh=cowEj(qO%vwDR+a`W8l`|XD@8@ra{G&KsNjX zm?xqfh1lB|^6*@6_^EsJzE!udsSF|-C8Lf+I^1c5b7eo7U68~@AUjrIjWg?JbY27e zOvt5|z@|KtH1S1XK^fHE30bINKO;QxrZ|Yjp1Rw>BB&xD%02xRbpr7s^!5nV+R|i6 zfD3xygC5wEP+%32d|RYxS4E=51}={B0Mrlo;)R6kmfYlzcxLcVvWp2};}cCZjn!P* z-4&g}#zD0g_#TPc;Y-<_SNf?R6_-#E{ltLlTi8q^rVmJyzu#+^C64-?yo-8A`vo`= zcKYRIgbQO>xV6-u8mq!Jx6)PRD5g;zqb*2EQoAsP{3#)qm1^$N&@6E6HzSOYJd#<1 zJv^213;RSvBVS;bw>>q-PwQT%zpCpNb3^t_js44@u5AFV%PqZ$6sDhu$E;G~f^;|E zdQ!}{Vc;An%8vqCWc}+~5sWobFtRJ#x<17D?tYf;o6Kp6)SrJV^z zm_KTvoq+Y52{vyivRdI4A~0S^-?P9KlHM6p2ZeIZl7cK9l{Yb@$X5n7ZV;e<8S*aJ z%zAMOAN+*Ifn-N#dEbG`4>JVPeiBup0{D{j1VCX#Dv^uq9nKgf7eygTp%MZl!(V%3 zG@IsJbp#ic+;Wz|W9a>rWS3h)3&73pWm}=8{je2%2GU|>!eV0#MGy#rnP1T5h6b6^ z8- z@8;xeaafOyg8^MmnC|OqEdIkEkbRZ*z)V^NT{eC_RAr}>dNWbLQ_E=Hk^(f&h4X^b8z@9hPWANXm@ps|27DW`G> zrRoI;UO&r`Bd-bHlBIi!V^WaT=+Vk!pe1hP@|IDn+u#K$w1s{{HmEjct@HE1x&T)* z?%kKHAFnUB__4uF{b_`IY3!qW%O<2H@uDJYPme|R-eDL#y1&yCo-KAa|C@=oIAX49 zBO-^ERsNSdEqB8Tz}Kzc&otn9@56j3JYJ}o#I!uGKyw??g>&*Eg5qZ@rr%cJ>!*NH z7x!R-FM-4n=Rz~2PFpbluQU(r227Bx1{c9xST)R6?5uLwb*?F`x^TYu5+Oo4!#3T# z_{GOVVB2ZfEhLmYgx{!JND3vnX=!Od=F$;aeQOj2EM6<`weiR-iYkQ6Unls;8*;B! zTnTt#PRZ?9XVh~g-4jWKrRB82enZp4(Uq9Ap~dz?Y^!m4opvVgi4v`%_nr4_1u<3v zm`i>NOa2Ze-MHn)j^!?`&5+aA;f_w~v+$U1w-+EyXRw4ZmaKTB<><#SCWPVMRVPA-I*hI^xzJa4>ywXeRtlSQ@4rwc?byQKS6?xv`ig*aoq#w+^Q@Wjtn2Yx#hE25dL!u?7iFB ztA%{`C!0(Ov{4F%N^wuhJ!^Do@vlJh0#W)UC##c1q50m-eEC#Ox=$27tZKt*rax%R7wErt_@OTx)>^)`R0qHDC;u*{`A_WM|JSh0UU@@dLkNjK zL_Qa+0vZj4;oB1z-JQHxrbqd2aC9h|kPQui7m)q3tR0i>;cDgg-XaLBdN&Ahw}i%` zPD-u%+1O)7Z#qBA`}6Gu%m}oA25YpNM#jt0fRJL>Xl@`m#2k_~wCQi1SvpODkYwmB zEpjU=!0lO1FG_*zq41t{LFa+ARhr5l);;qFWy^9jiU1+X#pSAIs1N6&jbQg_&0$6q za(P!H+FX&8NQk5YFFb&FU~5T-d?N`90ex`IfhhT~>)=hff)do7*e64#?N<{B?Ks;8 z`@MQy28?$fC4;2dl0h!H%4I0Tq9N=`oZ8d-EK77m5y3?jTckf^4a54&X>l|(CWD1u zs*}~-1(Ql+9X&sWCLN&#QZDcG!Hwt{p5O`e$EOCR7xBF>(oF_Gt*Apj2M>7E#iA z`SY~gOs?;Nsj5#s13})&DT9|s8Ockd% zBqrJ%FdC%PYAZUN90p57u7#X<)74yfjFqAAF;4x3-Z*o9|nYaHI~ELYIBuO zewfVGWq)(j8tezN+QmwUXB5$E0M7Y`-wt4g^WSI$swH@S{sEL?_t8q&Q7c8vu`yC# z+4@;R#>g(k-|G21R=$Y?a!-Ma-AC-Ao|dKA2*Xa}daYp_Zkc4M;|%o9^qyF&14G&) z)66_7)$I6N9i}q)9|5fi;~4duZSFP8OqWaGgLTU^Fy-ocqp{JzaMe<~snG@xRb131 zq>(kJqYZi{Fc#{Y{khS|xx{3=yke0bv>BHCwHf|rV*N1mjRy6REe6f*d*!2RfnR** z)0)&juw+h|pA+OQcsNY3?g01lLWzbTPe6BruD-myz(XeUzpwB7VnsToLoEpuoSA7Be)rg zyFDz#fUtt3_K73*iBa~6pk5!O_elW%8|eFhO#V0Y?cp`>2Ho(*dM2&D$h!V6>{fE@ zo>M~dQBm~HfmwzFlRKqtWc_Y1w77+>y>(_`^iFsOxrp%MOfi9R z?dtZG#BUjDnniW}9LGHA@)U2{*B3%tu`3)e-WUHJ!8-vj0O;SNLcva$0^~afLVZUD z!~dm^@xODRu$h^Pxt+0_wWEWmv7McXv8nApyBcNjGctohNF%?h*llfETQ7%wll~0o z^{9hIL750M&Az!cw^VJ&uaX7-JRX3&DH@V*R3i)C1vK1G6qjp7sAR*Gwz)z@<@L6VZ7MqE)8r<`k;kyRltmilfl}44mrJGQ{;V#~YC1pS zTU~|w^%h4d`{P)sISUf8xh#43#ZBg>#LS_Cs*XhN96G~N^DDHaxTQs9i(*}wCrPgo zU32q+v(4)Vue8EIOmr5v% zApvLn6*xEgEz4^He9zB%%VL<_oP9ag2ebEr|Diy)q>_O(_&tg^-y=%*-;L&fEiMwq zrr+dP?|+V^ee8e@KnP9zx|x}It{7ZUSZ2$OLqE%$xDc%?@*~js&Pt9_Itx4OT?g|6 zNKi&j%qgY4 zt;zlNZh4G_UM0-X^V-$9JxZ;f1AedSghN{s zJ;#&2&RjOrU9HIckzT+PoK`aPdCwojIM88?l&{}*3uv(BZ{#|D(K#5@$IX8v+#!)o zlCtkMF|NFr(cQXFp6k%Z}Cv%5yc;`O{BTiNRI{~Bdx955Am=`JuR~817q*H&x zzDS9MQBa^|OyDW52o@;e5Ed*fs+(p{MYc{6Z!kMJkfrpnR=!y%{tLi3GEq6t>?ipN zzN`VCpYPyC2%fARSz$pY7}|0;eoc!izez%ZY zFxLFrRBT_`^qf;4H>Gtg)sTI2Smt7ab^gj_o^s5Es_{{vbW+r@8}8pOuM%P(sfr_ zg(z9yJ#!FjkA*rlxRh$-XCzs$F_35vZM2A7Yq})yVON06Bk|zMlHcYfANeovrk7RSwBU5q7SukGF`K6p^Emg zIbOKAH;gPmqZ}}4$K-?NOD%EIgjleq>A+~LLUoVE8IX_lAE^@TGD-guM5#GRx4DYQ zfR-r1V@}f_?l?z6)dQxZ@C7lr`SCpNT{{I?ud=eg7UY+=$vwR2Ei&VDO|;5(#u~;^ zQP_18H$;qv+LAj6ibkAC*D0B>qV9f0vb2x=r%RrSMlvoI8W2zdHK z`}(OprU_EAX7RFi5|1YWkwSrj{NELlXusuwVt)!=O-0HN0biEh=^3J zp<&ewTWry6){Me$6|7QD#P4-&vwCgmYJGjvvQe|*;@Wv_(|LWfl2UN>{Be_QdV}{A zyFF`)>{IJG^P2t4-n*}P%l%9MuRTTEW{&weBaL~6jjGaHn>Tt6i;?Z1*lzzv6kTTW zu14gxeO%gfOKOU3pMkGY+ORXTqD@N%i@}V64fqi|TD#3}8(C=-8QCe$8Jz;yHj7W$ zT_nFzg$A&ZoVbkZWCY9@7ANnJWIS(dJ3l5S91 zkW2BXh0Q%bu{P-)1$;{xhd_s7vjxHH81ak{%Kbl~h z8N;>nRbz)}&>3g#6ePN}%^Wl|yK8wG{zkt7To`n4+>RsO3LPtI#Ir!0q?;QjmPGS? zuw03$*gnbJOHy{W!_1crTXINq@bgo1FrFv672Lzf(+Lj?BT6JCm@uy)v)rK0)*T5b zsBEM=ua%;M({q5S)XZn3>xIh0E$^g$6 zz8o2!;Jv>lr5#P)&9Es7l zOiqJxCO++aHCwJM9EsmtYLD*u<6g3|zK)g!zGiqd4YW{H8g*c4hXkL3 zBA%gdgW`ZlpvmP-O=#mc+jAy2hBk;1CFvD^Xhs2Oz>spXvN2^PMMt5lEjdt_&UQ-BC4pRtA3!~LZk&A--8xrKWPUb=tJ(yOJ79ce508fFp!7GN6h zdyE#TX|HH2@0qkpAJK1wNK8+5%n!zDs@E^Kj+X#lE0i&Qe9Q%t1urECsPO0asZVoU z@>DbVEa8gPHhgp#M25uLPz0}izqDbJol6vKN&bDw%IZlnmr#_iaUk(SGl`p}4}4%7 zyjeYsW#51Y%Y1J+5c6gYju-7_)6dUTQhT=|_b-EZzUuR~^xQZ&H!^@j-RQ@w+v1*A zB0|ShQr-=+nC6eZ4cCA$?T?bHWbPjYTTOje8QK++)F6m6Nn+?*N-rDsu+J!llcWxC zAtqV(bz!mlQ?+T~rHstT0T&&4GoheK?2KHRMJ!nF<7Af5!RLsVWw~Aj*zP0c*viKN z)4YQwX&ZXv?plQzF-iX z6B*~VOuHU4b}UuTKYA1Y8cn1j^bE@IM8mCK$cSV1!b@K@HR4$rvhyVN*fE|M5hMng z6x~AkbKk$3d*4Ga?XCTmG)98qV$Q6pvhigC_>$HrgQ4ZoxJ!m1!co6y_iB18e@Ko2 zLA!*jlj79L5lFYx^IRW2=Hd~(L;V!sjeW5Q>pVqjb2UYZB!OdF0C(YB1>I$F8tewE zmwMdDt<^{*?yzkHJR8)#RHk+QnnU6jmAs|$vy;q2oD41i(slBFp%Z78QI&(!o9bKe zOy`S8w(Y?-$L>_?*$4hxxr{5=#L&kF)@|fdl^<>X-nkd$Zpq^4*&#+?BJ2ZB_JUKP z46FWL?477{MezpXpcHnT1x&VvJCi zYANfvXn}w1rBg!A>WfS^Z-4I6tKx@wQA7f>V5#VteD*EMoTpHo_Xm3R?eV))Pp;1V zYvYIC4D6p%28Wnt&Iqf^j+T@f7G2fJgcugxcLv?l3>w`{=Ii{LIN8(B54hiUFbOHM zu}H2q?qFVvw$46jK0&AXw$~j`%hZ=0-jy32{3Y|Hi@XAAa$j6tPT~adGR2Ae{mb)@ zPeQU46aBGDi}F%&myjQ6GPd)wbp?fXnJdhdt1V3~uL9bZD?iRD56wZheIV$8c>z}Uf>i?kB)KX13*0IjplwVmdH(tA~B9}S`VT+2lHg)Kv0B(o1Z z+pph(MiQfK13cu2lfdlIu~YQX=KOP}5fE3&5!+GYm$Q=5V>q959Y3s%tBb0hF!Mm0p)u(NkmkxH%t;8=eqDli z%QD;S`OJliu6&%0LkmTg0F(EJ+HSFOS~$tg5)`T}7y=`?7D zISSIBZiUdSnRmtE{&rB@xh5?A#E(%S?E^t%?z-V3oBB+ZZffdW;F?<(8TR z?0C^<$fshX6|XsF+ftH$8}Jp&7hMbtMx0<_)UAMm^^aRx#s&2ghC zXJa<$o+^)#RB#Rs*J!B|+_uXh}32U!F@pkk~Z)uU{FAy60EH)Y}USp);fz%HDI zElCWSG$Zs+E=dTrmM4Q*t5U+ZYE=u#fnJYt==pR)|2ElsHroH!@oNX}1Aq8y5ahij z^p__8&@Ob8+&caGY{lYJ;;bbvzsm?RV%M>SF|QahX?d4t5EdxolJY55$wjvekJpKH@IkYwodr^i z`#Au|tYzzf$7G*_Q5_dZ3Ss0i8>EUmh-H_*a?UVC>kvW_d`0%{l1^TSjizGR=1p^G zhyJcy>!2nMMlm?48Ns4g>Z)@9Nq9}k*uV~6aVyK$4e+8xL8c4Gfzl1;5+ z&Z0hTh?;zx58T15Jj;ER%)QUm zc~m;wf@!C$n$p@u=ttMI-8||?$sa8D#so6b3C6|#1KEV?BFv7Ew-@(cM_TB+=%l?( z^SIk0B3?dopM(&%B%7nubCNHR>l0OYVFWN&LL!cM)F1SU zBB2&DB8OUfeqb{CKQ8bb4GC3+`~mP{o}4y`b{ax34>hAxol|`ZOHW4S*W(I;dg)A0 zej}*-hV05&{sq|>){AF;Zx3!yuxZ@Vi?2wpGKrvzQ3L1N7IJo=^(=>f;o8r@IiTo? zx3xml9fXOu^c+Uyb5#5cEJNSiG8bxXek~+QXhM1WjuS0dA&`T@`_li};W~0qT#iyQ z3&tW7>6t`u&n0#+r2n&xGG_DYKPa>VDkv7yNhkYM0<)Q{SchvX;;0RmxvtofmB((} z)C%SQ(YUv`uEoWFJouiM^llCz^O1JCw*so9(TcAqqXK~HhJ)xYD(sw9tu{@$qmje_eFqxy0R1hHwn1q(%-I2`*hDJ9Jtq-#<h(#7+p~ukIJ6s14o$ znE?`sFkb0=vWtumws@PIP;>Ne83`_aIn10P07TdNTY-;(m?HRRUsW03kH*7^taWc6b$~WaUcc8lf&o0-^G& z%BIQquagm$bePJ!fs`?qYJj5n+KD;>3Z0DU%WY5N9gM}>`|ffn6@2x@Ck%xzmJy12 zY!coi@3QfY$Icj-Th7ej%X?}0FKVR#diBLELXgW@cS4s~&oxng%fP}cDi0U!Q1DB9 zh>3Y`CM-cwDts#iEV)W4_)7aK3S37p(f+wBt*~3T3Z-DY z4TfKSZ%tYb?7f!|lUM4TmSU4M6_%%SVLs^Bthh3HG%2nD(}!G1>|F*lziw0#IxM~3 zGPeU}+zi_Nq_06P22%I98aoeUWwCUE?*PhOdGx-|NXw@5O};x95D_yM6ydcF!{ixT z`^X(S`@tsp312az?HOkgkOLEM9Hc@CP>C5*?r3<&t|2%#u_4pf@ubLXraO)FSjD)` zV#Y0jI1SqeyRZ27cR!D~_O_m0_cibaod9O;j7g8S6VJ%Ez@Ja-iy_XV;@v`%4}X*V zYAZfVrqrF`ZqqWzm5v;AYeV$y5tfa?W2ONbkDTpCh>clW2jcE&Ww!>;Gwd4U6Y#4e zajuwdtx-ic4%uT!H;h?hT-^c26&LVe-Ua7YEhO?LTYwl=>o#oN%*(@?BZvg+5yVZxuu8-eped}= zRr-U*Hq4z1#ceDML$pfat)$frY%zu)qUymA5NHGFYYHfIq!iu2qvj>}btJC4qBz{y zwFfE95L*waTM%sPmYo=Di$U*Q+!F*R_)rBQuhmgu5BBxY&iK1Jj-(_e>Z{6ovP>!L z@1Lp+G4Ep2r3aN9xrAgKO!NnWr_9$hn$BJ-Nv?_my-Ce z0#GN6{_ZC&+wviLfrtjwluu*x2%E4L4WM0Qxd*L=_Z2O7<<%w@$Kfp9m_`mXR4sR5 z99~*mF4N97GQJwA%r@bba@mBRa4H{z**n(ZV!sVk+>$+gFg+*Wiqm}I*BHz#obJkP zIiy+?4ji}gkC>m8lS6NB4E#bjHJ(6A4vVz+%HV*JVyRy3YFI-+?swSEY#)ufwb?Y&sC1gwo?ygMLt=SAoa zODF-hddPv{0tFyD-N8wA#1%c~0HeklaD1`b(e`;G{=_`_1Sb8K5h2x;X`z@lb}8?z zaG9)aG0Zj$WfRq{cjQvv!@59$dx8F3A)=d28biN(l(gef4ARXEy;TAreDF2dr9rKv z6=Ke@9%Qn|BbZDNs9bDy&zT3PsO!$EnOE62t5|E}jHX_-E>}%YdUVA$IKfBXAB4#B*KP-b*w%v-cZA`PVn z4arBUw0eMBh(!Ps7gxZ-q4CDC?b=-S>e{JZZ1?BEsreNBAoW^$mV~Ur3+0hbWWBwA z&gnUt_4WCJ?lX3yZ#rZK#&*+|Q$-@OYerwCBO=5du8U$t(i+W#5TUEU3!ulxO0^d> zlL?!gM%x*vHjy%rz(=*6oI{d2Y>7fgzojEm=s2}sWyRj?)c1G`7?y;(f)O0jW$0e| zC79Y$)Oqt`q^jPoS#rvGGorTKB(XJ}xKd}H`Q;qqc^I?@7=C>m5s=n48%@D| z(roOlJ*(`!I67rtKx4wz{@B*t;r&g7{LVxy^AX4lG!|| zMiNCs#qMg5htpc!#kRbKo^8ee(R`X$ub*u3&y3gPJ=qJ>b81#Xge!gJ0kBg!TVO!M z;+YbC=(7=$a#t)Y8b{y|^)}6^NNcItfHQU~ehvF^evVFUZD<@KzaaANa*n#%NeHc> z&M@{?vc2qZ=!@9UtJ*Lr9#i*dMN|YZ8im9j1B!Bz{)C#mzP@sBw2y$B+^8`d1%+DS zJ&zk1T7qbwUfgPH`TXO)qN7o3`K+Pn3)@N!S%<#etRytd+|v_wLu&fzmN^l@FH#Kl zCW)L^bXXBf2ThD7IU(frOO@nVss*`ldmE%vOb?wqZN)Sp6l_;z(%IF84IT5Znif~F zKk1(uP}k?*@Fcz+)KkjxX^o-CAt=@Etr4Or^&|{fsi!8DEo6p-p)ENs zlWO?=O@^F3hM6O7(e>A^$uq2!tcv7w_(owfr5MZ)3}z!VShDFGD*IlNJHR@-%uU= zz$6wh`F@2av|m6e*PV}03x@ikj!w~WC`6dz1&z$ye?AD{3u{B}2T zLr?bzvV|jt$@`%&3Tln>0Bj1mK}tjE661P>?&!j$+a_lE2s`m8rJ|e+j!O?gWXk2_ zzdm!YzWzl~0{6{3_WXki0Y^bl(-eX~4lU;b#MXqJgt6ZvCieCll11o<_;EGAT3+I} zMg_{=kl6Lk8_+#neZ?W1HE>_{G90@G>U*S8903?!0k%g%fp<(iFD8Bk(Xx9qsG-`u<-APr zKaM$NGzm?GPnP&}=0j(N2I!LUYrg;N4NGvtTCo2_p9{=izbOB^oB6*Zkbu6Eq1iu8 zFx1VRFjbMia*WKa+`E<}4LHrkA`-_Wuu9F~mM3I+82q0;Wl?%Toe=7F}kGNm*}8L6|KzVy8B zY`yM0cf0OpmnNcqKO8Ooia9`aJ{#=RW{ZkMxUuh#iRAi&!*XlPVaUPPBC^ zz)jO}2jC?)5UOQ1Bzkc#?^Oi@@3kdNpR4yEe&Z9-E!+5v4xa0*&&i`2J9`Mp^&yPo zDKqcs2<)sXP)yb|spIsjGGI%fCq_ zA&LOgYV7tJc`h>1m2X~51alkHl4bN|JS7(sR#Bqa!KaPs zjlE3yTYL$v)VNOM?I2!t3}O5d)H>hvtnA;rBn>Yf(&3^AktZxqZJrb;4rL2#%p)^W zK46b2o{I-Y%s`3~FCXB<<~2!!+lE=SB}D?{BlU*!YD~kNDZB&cq7cI8hp#hE3*Lkq<<>Rnv#(+^iON4*Yv;%=vkTq%xVUjY$^E{~4V zQbS5U^AAk}@NLI3q81b_<-F?B6is(ObC=ylPOQ91xq@T654Q^h?f=%ia| z8yPXsc-;*O{BpZc&a>H0G;cvhKMR+vHAGo$#+|amg`AZmMH)`idX~$ZY?e4O+&sa> z*91}j4#GLLrp`?X2Yje5$|7Lu2zpcCu7CadTkd1@K4?^)rrl!SsH*-VkqX^wM zD5*vmdT}wTgB1>B88Gpk-U40GAjdgJwDD2vqrwdb=01R5bS5a>>9;!}mp%xWkf9Fb z9R}r{F2YMK3%CzLsnlKQ9-V=g>Rv|>$}FNCkKdc}(&E4*tD6XE^s;t9B*rK+lbm8_ z#0a_tFl=?+IY)*yp00rfGyCQr%cjWPJjo9f&!O8rl-|+07eN?hU3leOapw)bhUV6^ zqFu3{onw>?CN*N!rw}$B5h;I(?f{Rumfydxn6DE(_wI{ z&U1LzN^5=H@^W2XXt0$J<>H0&3YN48e+;L^Cyt7Y?oGQz#xq#2H(3s9noAwknN7-T zfNoCKh5P1@6FG+n%xFwlH^#lMN>3XrB6W2oZ+{oN@k!{6J?d!H zxdE1J_oO;{vQel}k-4ERKElCoZ;js~BzitunJmVoG)hO>)q6qyB9mM_ZwM6VZ#@Nb zK?Zj*F0pt@m}^Z>BV>Y9bn+;}a6(1dKE~oQh0kG(boIAELFcL+pYNwsftis9IvpDz zu0Fr6PSF`i*txMdAO87JnX*$r+^xO55UIYnsY~7ckn-rFB5B*7k<+Q+(U|hsQBho~ zws={dvZF5PalO32X)QggNZpMt=~2DB@Srh(U4~*afpDXzY^QX)8$*nEQ+5y3R$lx} z0izwD9{MY(2waRoar911107 zf${ZSqmzY~2Udf}u@#Q=cX7$K=xMO-^MCLR`TajmZ+@)Or+%^!vj1-eqUdC7_aC)T zwu+@9rZSq349yR02hL0!vq^!@8c_mMf|5*5BELE~WPycQr{WBuMgOv)GjZDqgqO3> zXRe;(5cV!o*LlRkUgXEimdLb=%e0j)C_Rk~@V)1h`}W%XncejDvsTvyKq~-86rz?S zZp2a{Q^o+K5#pq%t6dmX$fk&Rt@@RLm_KT_rIcwx^#FptTQ z;$0h?>dWZ&jSg{1-}M^v9|AO*!{-)S{50Hqz{xTTVAboB-c$FQUJLVfOJnLEhy8C^ z(cf}ho3({|D7&e(5LHMisj09>-fK-33^htE)gHy?Y$g5fNV%#WR&5}YZN;7)h8CM4 z%C#UVzx=UkuYt@qWuN67vy!i=tii&J{pF-7K)%Py;|7J-9@#|eg=@rGyHkKdI9;ao zG#wavbmjD}_(jRycckJf+%&ESk*Du>{R1L@hevE%Ej?m}c#(=~cZD(|-yQ<`$Iomb zvd#Z+MPq>ulvIu6^1z`!A)#qrs-#*SGK94b31wKc$P#`FVoS`|)-amo)hR)}=vkI_3R8ert1N1>^0M>P6VUVjb`a|5MF-hLGB zF9rMN(?kpOD*61Bjq59&dO_FOqnIQc*%R&pdda9l9)NRBW(NF;ngJK2=9nTmvMWgF zKHQ$h*YmjWJN!a>szRlP`{OTsT&SC$C=U*Xcs+qRa6W+y*<<9KVc=54?f4-fjpMm+ zxe~_)fcI7unN!3${=MT9qxSdPyPfN1G*3_$&+lL&aEA|$1g`;qdykgyfDm6GRPUH? z=2V7Rc`8DO)P~Tr2FxuGhg?8_iOyF=J^d4}2c4r1s1Cl4p5|)xqdEsxXXGK@H{31c zbt(0;!WOHX0Z`W`iG3yVdFM;_kncfj>;)3PvP$fN95QwwzH84)iG|$*9Dq{q87@W0 zNy!$QXhjjo2w&VmFWu;J-~Wd*$Fjq^R?pA;W#CWTAo+JOZQq825X3vG$bZty9ZmCGz+lpOuCR=gzBiHBxckb&~7Wk_Lv59Vv6zIGBqR@ zp@fjcX{ef>g6T(nSRp>1DIx?NKUzuyYM5pjCph|97!v67tDHv11j`SXpgb$D#$Tdu zY+#Pq5A|~7G%;wp8*-csu)B$rzJm$catCwH$6P7QO%7!H{`kKTa7E=^+{1vQDUyeVBB11F*OX){b6EoM=JBAO` zP^z7N(4y{wFm-7i-us$oZI{!hTfRiy#P0uLc|CYSd%^B^67d=COrDOr$=O%Jjr(37 zfyyarT1WauN_L__(Otz7QFoyj-bejs-74Q6)f4>l4}kko&whsSewpbzIJnaq{qMR} z*6v?;nxCoWf7GtYapN+-`Qbt;0ZfhL;aIm=ne+&>xWf1MP{gI^;!({^Io6D_@VB{1 zdNM$9`$8F3`oh#ks%e$Kzm=VxS0^tQ8Gm67y}K+IP{r(Q%KEsYZiLrjoL zV0fy1Dd2JQTtt_567lupov@eHsN`B&Np19e@_N6%S?4|-ovqykm@=RYK_hu#AQu=a zhLPe&i63n8L#L^|GnVP=@~eb`pWG?m$ne92%HC`_ACi}796dSuGX zcc&;v6r0e!t3n=Dd}#jnKMx671LYYw9Q#F$+->Otr(j{K+l@^{cus&_FVOTb|<609i3awK&5O)^ZNPAIa`Q1TAHyY3lEDctT`tO|0q zoDKQS^#(iov@2_ySaoh|;d6{uaI08!%fweGZU`}S=%p}@AQWcBt;Z(V`5Qlg=x{xp z5UAmbF<^b1unxB8R$W8(^>jA;8u-3TD8L1rgAn(z0QZg=hmbD|5Uq-_mk>7mNUHY^ z7(e9I?1}LA_`c91;=qFe0oGf{*r1+3Rqi4=XNVk^$fGx*+i!ghU3vOiao<4ru3)JI z(s}^@Mb?D0eqbVUw$%_Z$hQqTbA4URe&Ii1_>oBK1w-^=w@l#lIKyZom*$g1?4oI> z52W#7wq7ToDHgwqKKlTSKA0t0fwf4Af-cXrE=uLtALqx}8*)mugN~|pQ?Y7-g|;3S zNB##7g7xnhocE8jEjHf&CqByQJ2;vDulV?b(RI^OM*g}rGSOjhw@xV`HqW#Hr(@J! zFonWhZUoeB{HR6_C&*C5txl0?|wsdER&on`yK8unY^M z*AwTW<@hMDFc^Oa?(Btu^BW}T#j)TbZB9qKR}Zm8%v8v!8Ov0tj@5*WM-G4YosUTlFHN&V>u|8^W`eTe!FGw`WQTEA;J*KdN2{Q6m1Q4fRaFkRZvRAUtH85d#xZ zDB`>nRix9#T8+((l^k|<^5~oxs!NFL zM>Mm58tcvZhd2$v^&hio@ zmq&Mvth=Weo?rAGr}HH@Mqx2TQ)C#jYU^GO^QSUI?zg{7QOk1#@HpdsJWTj`eY_ut zROd~+&o}IrJisNpez5P-y&~ienH}$~h)X92$T?S%2jIjE<#ir0Bd!i3;Rusxp;4}R zq9S0di(WTlN~-6zdNEMlzC!iUQ|aB4e-K#>m1B6isrMjX zw1?7Ispkxk{%--Rf+#TZ1dye6$`Go-l5NKL;R@Sj}Yd?Yw0(oD_rIE$zjntaF%WHv~(kwmWMJ zn=(>)luJ8{lC=rVWu=|56=Ic(E7)AUjh33ad9I5sa<-DTdDKGeUQWi6tL@kAHe_gK zg*Q`J87)Fdu=7<$MBZeKbbYcplvv8h9FHCnGr(M31ARIb>ih!iE1WC&h5duzUSji| zOOp#DmNLVQj5badT`Zz9n8$#Ne@7Zq6x+3Q_rScA(o>;+I1w;!J8%zYl71i>k9q_= zWOF*dKFxylFafD>CWsQ{r^5irv+HamOIr&I4Hdw3!zah9I7X5(IM6 zUWAX9nzt+V)A?lPEmWGS=30Zb^G_toIXv5?B107|qIx{qu$DIs$C;AXF-d!g5?APq ziU>{t4l|<%$O|qWO@tc8Lo1G*LP$Zs`#G7t?>m^d_?T~g31xs!fOhgbe9;%NMk zW{m$f5e3U~ig+|(ZIM_qKG4t;-e~=3k6G|8y|2{2N`V6&CE`aUQ<@~V1aofLa>qec z1pB1|*2FJByec!43)tuXBl+6={w{QwFzc*p$=saKpiwESKFWv^@g~9)pq+mxcO{nC zJCgrC2!#6*w0nuoH@d`UgB>^~Juo47m4qo+>fOVefI%~9h07Mz^TW-Dip>LwEr5xg zUqdwOOT0TEf1JPituRYM+ZPN%lnWq$hBfE{_bsC(MIC4!y7|+&fy$}=dIy7rUtJj_ zyXAyHXA3fSHJ#3s%w?GxD@ijRd7SDrv%;jQe`=qg19l`$;4K{yq`t$_a`TCz7*`-3 zOGi{@yH@JrqtIzm&R;9Ela;$iD0f>|5uU>t7DBF#k65)-WW#zO^VgD`DFb;}^m4QB`0(v6ya+#ISIIZ(n*ZFuiQ9LT(9@k&vx^^}mJ3QrjVV9FSB%TXC&`#u8vcK_oSBsfWc0gfGn+MhOj9THTj!wW(!0jd` zt!Kw-4+y4^Pz_twLV^jPYUS#+p!Ay2umVl8gF{-6gA7T&2^`QvrmHAo6fE|nKC6<~ zIAinmE7^iZYUE9#9q-|@DC$O(Mfu9aHX|$33n5PM-B#ODm)gNK&<8o8b{F7ZF0)ir zbfBX2#^I@)^I}hmyVWeMdiyo3SSDlg#Pw#W4L|$Ovy*7pqmGO0hkcS197lkiWp1#3 z3|nmx*Fs0CSc|z9K%=x9M>Wpp@C(S0>6OCSPQrlPBW#l9=*bw`#IFW)?8$8qQx9S6 zfp3sh4`}R>MTVH&q^|lI4>$%ZFVBWl7p1qNC&LM6jRDWpD_Pg(xPHO0WHFG#Ok+uui16J1R}lwL{qcC#)1OTDI< zaa!1UR3A>4S<9%Qo^jgRdAvA~8nu$1rI>MA+Ih@4kh-m7)>h0qzkVvojAX1{wN z&f3o7#(~uHW=0LwjMJRT%hf>;vgW?CFxX<@gZ`EkcrBBOU&X~=!nHcs1V`R1;^%^| zJ|JPi5Qxw&{y9Yx9zNWD(dZ`>tAZBLkHII8tjilFG@ISK~sPdb9*T_qq5WW4& zd%KI@ey~<$g|w@G+s3s;*j5%5&+=}O4s+GWWRN4z7VlZoW*1M-@Vl{dM^T|}^u0h~ zdx8I17f;iQ_LKam94?J$%lrBN}MGuiFKZ( zSlT>qflou%yb@^`M6nN~T;aWeNK&oiYJ>>yEOM*ZdCcGm~g4trOG&aa#d#GWuBvkiGr^hIs(8f@T&NMN>99%M=c ze$3yR*W1|Rin^gDjL^B_A_pWhE<_)Vb7li`S&%3Mw5Ory1exwXO5bFv}Q zL|C7!)kcY;^W0!rW|zH=wRp~c(4lIBI{0acEK!irW?spS+LZm|W96hmU7bP2+QY-a zroS#VI)d187ri2062C)jDzil$uOT4E8LN@O5OcX=yjzlbJn>z9XEuuY=UrQ7_oRkFuxKTq|M#=~Yr%1+mWJMW` zyN1u26OlnGf$7#p8RDdhxRlG0?yf>BGKQ6|=uo<=c)us6_O3Yu85{O4IwX`zINNyh zz`X1R*bq?GdCkB&;7_I__*~m?)Yd_}q0Ri|*1U zQD}-Z!ECNd5{swbp?b|i7V_!cTCLf8x(Mcs{ncHTIuYB&-(Ojy!ZY~pH$Dz1*B`)d zBTsoc1>E<8ltCMJpaY&~ZZ2R}^MIG7vEV#uYJ_c9=3>Bgya_}@?1Ximb0Y|}bFT>T zs$mdrRQu~4YxnNVUx-aQPTZKj{xu_D`L6xz9kRzrUhpbA?b3 zN7_7tcVx1yt4Aw2hdFFZbxeqd9#@UJ+EtBHut(YWQ87I~DnsrdVs5=`bE!8pVo zUhvbv^qCL|U}1=V^dBG+V8B$!G{8hKKu?7d7(v}UH1DmL?xTRX2-EJ|No3vQQh;v2 zLC1lB7Qgrp1ol#EZ|StE!D(>%h66)AsQo&}(flKqc74B}L-SoS^9TTxg1(>NDmoEw z_q1^>BKQ!BqqWsvfiGU;v(-Qs?s8y@_Ptx0nw~&)K0@32+{5Gmmsp@)aV;y{~2tu1zR!`e`MA3pFRDA|1Q}4xAqpT@Gn&zDeIJtzAfJiSxk{ksX$@RPqQ2q zX$YXGA{r_gY{L7{Dp7dTxk;z+WDa*(u^8tK@U4&{CtHgGUJv%RI~#j;X6@_!_8IpV z?j4B`a5Nkb20#XY4!i~F1evQuSrd#0&^H&h*nmS-RM%AR-{Ni*@98m(EE0j|Amw2n zkT;tm1{-bC(V#a|!y@igf=Qyn&U-O6&Tx?Q7@qQ}Vi$)LO71w9-n~|Ef63V(Nf7`S zuw@4#ttO6a881Q`;Uv!D!rFH6ED33(Qj@jmQN!vYYuu?JTjdfp_Q;wewa9RW!5yS^ z+T#rXoyNo!FUs3ZVXNDB&VeUEDuFd=$IP*xoWJLbcfagTQxv)RS7%T*Xd_!O7$!bpc0P$y<1+wiKX+0$24@g>$C#MU0Hs+fz6)`>Xn%5dTA9!1s} zC8isXHjD2)v5#VJJ?r{UAQ?A|5zPFycp11EfTz?PBMFFacyb)qN_eR^Ma>3_0 z$$1hD+$6k8hVJ0eGwn163K@*DP#9yy-LyoE9qxjHWGBmHV0aGsp>mV&*uiBd%0|u6 zonO3wd;Dl6+$2W4yu{_WsCjg0g8w3NYWbVqA*1UKuNrw_@0Rtk-dCl)r{KM}M4jGc zYmj`%1N%hfioKNgy{89Z21oFs2AU3^q9XW^8>0IV4@T-rzC=eIzf|YFW=EagdxHBW z-I=1J?$I)+?AIC`+ai3?1pTX!zOU7Q`yw=?{-WIXWmq|M0@fL7M9@h97eZ(!qKF@p zF2e~k53vaOTgD3?6T}plXB8>JY-lJHJ=)wRRLpg#I=#oA$ydM-&?GM_B+akgEy+A( zhv7GLSLUP#Sb~yRmbF*Ummmn+PwfHZu1c0C+>}?&s1hC@#$fQxzf3p65!jLMd?O>& z>5g9j8yPHkQMQpzHyA(jWw&(%EQ`m5r92&!4=({acwZ;3mMFYpco-dFydCCxvtj`6 zOofGwBQ?{cX8p_|8Ha8P`PZHnRnLxm3!A(PS6BVjNM1N-J`wQvw60t@nb{(brd zhB*biAF->8+mQs6z}mf(0|Q6pS+wb+AYq^cmd~swnyytiD@%Xc3St!4HB9vhe!^p5 zzOFq+ylb7s2K$I`y)2X|-;S-~S}N*lHHteRU2d}$ba^b#B4~Im$0-wZkjmaa8Yx1Q zqqU*g*>K7QXr-D#ucCjjOEglH;Hl?N9XXdEmTO=uvvuX>mAi>%uIs+kOg%#dHa+$% zkrSN(KDTBAciO8qJxluL=ITU_9M0k)OBChUqVY}-k7a-LXv=+@7IgUGk7@kr_2iSh zzM6Tjop*Hga-8{i{aO>VWc8UF`-~;MMrIG0sK@=5!1Thr(Jx@Pz*N-nb&aEZJHB&z z7Qa?CJrz^m1tJ{MR5*}sVY8Dt)coxxIt=`#TyQ4Nz?t+a0`667SW&^07;lz2Fi@o; zM?9A7g9B>xCgHPGnmVai@L2pTRU)p&ZoqYNoi>^IY!?oYyixcQ|05O8{ii{z94lrALgu8h=clf~;-+K{=dfPXgRh z9Hu%ev6V`oLl~AiU8LSab;{wulTTp$e;9km@JicdTev$;dZuIBwr$(CZKKn{j5@ZB zj+2gU+qTUPJ2~@RYoBxWdcSqPwXe({`TvZ%t7_D!8d+Ks2`&>Uj}Rok;njB0V!^GT zS!+HJdAvMUf4DJSw`7hC5JKMvBh5Dki_K0QXru&@+EjX8oGtdU@N5quAZt*rDzTsvfR#Vwc(77R8RWFS*`V*8|uCYLQ zruH!#z)e=upyxAyMpa5J*yYQXvlaK03|6hjOJ&Upqo!L~F?P5$nl5->tq76_xkK0? zwW`De9NRQ+_#sVw0J^kmqi|HSdcGJO+l+B|IvRKAq*`eRI(2>UWWsQ{;SX&t>_4sc zgK{EK7t`ZG-jw=k;+@vk*Ur_U)+;xdulf7khbWU7z+ zOV2&?>VQ)qL1JD|_De)logD72^jI1OgA~cA!$thY?wY3KQd{M#?+#$hcWnq@muQf_ zxgXR?pCd>gHb0#wnu+puSZZ}#TYHklbp>_Ps7wNDw;e!P`J(NTgO-FPMnW?cgZE2VMa9Kois2TPQ#+NQE1K{%Q0BvZzhQw>D= zn7ZI%EqbtItJB4f@uaUQ+%4q8EJJGu;;ajZYNuGJ`ndFZ7Mk$z5g1xG1F#7U>CczM z6{A#Sb!g6lDj~hO4Q70vn)#o<7UOxVay(daHY<(S9>7tjO08r$@sU-IggZs&=_SD6 ztjhDXvJk=L6g~ISZgE`~{HhN0w#2}Y?@M37QGj~jERvY0)u*TL~$DX`kEQ(Q2Z*IFN3-&mqP8yaOrqC zzcOApP56u@P^8GoT}H6T{aP6qWAN?tvl^o_s@ji2ab&&00iCWY0(nPQfh_+rcEQ`7 zX0<3Fv35I|R9bczSTT~j^`Ne}l3rK)K;dwjmyU}DM_HUCp{?~fdi?P7A9LM&B7Z#8 z29~c=X(!VF0ru~A1f=s3Wf0rRX9(FiyPaQB?iiLR7kM@CC*Ky<_X7K{pr2F5Fmt`L zOW8fEzArsxjckeVdLxs`eg-^~Z{NrX;in4@z*4SP?q6o2l-@RE1w;-l@K$kscg{J2 zSnrHSq4dLSkWkuk$92gO+)z@L&C9mJ#z$d^DVX#?wz%*M>ydFx?cyJP)BD1qo(orA z^J&9U7-7mFSiYi9+E!4&+bh9elva#xfHAQ2!QzEz3=tyhRV_Cqb(yS(foKqg6pp|g z(MNlPWLNz+p2RchArgMQEm0!nFZD)Eutd@%Izp>z&Z#uX76HRrz_193EN8PL^o-mzW^FUv|hBwv*9Pu87L=+BBa|%;#tDG-V`ezhBD! zvZ*dG2K^ZH`{;E7&+uZ9S*rKa3MD%5Il_q-*%b(IBygLVL(D`X(Wg8Va#_ZB^n7MX zsofJJvty6zyg~fU;vIQ=__u`Sv-5<+l5ITgdH(NJw)1)QB#AfZIBMn zZo2kop8o8sYFOOST-^Fc$4~U_8E&NXB_%F$4Ao(5G1YdsG=sIbdE?7YkV=lgJ*0Fp3fG3w-l)UhW^liz$I{@Nc z(DvJjc`~$_z6NP+dVY~UQCFR{)0Au~S?QQTS_ms>p1bW3Z;$EgH6t5ARy1?t;txPr zo#nacrv$6x<)LsdJm^>Ul_X@j3w27FXkS$CkS*k+;)&&%l~qjMZ}cWWQtIU-n21qv zFXklZ+wfvn@pe$kXn`j1NY(rLi>K@B%6LvUMOMX0<81W4&1tIOh=|upf&?j-PHenR zjdu<*Y!K1a0dGbdmIe|bd(b`deECkAs* z!uXR3q5yq*FEz@~!2EnN)vudSm?O$#^bKvAy8{A+9uM0H*AiNtTI`mBbcc*A8-HHh zrZV1d?$$g$#aw&BepzOU6WJ8~?6J^jvz`5&4<2~Y=_X3aR&ikc?5)s_A|}ziu{)-& z(AZakjgI5KgtjwHK`tiyOR8ihE}?#<&{s|fRyD=d26Ytp=lilLy@8qi+8IJ{zRvK1 z_2@5Z?hYfwPDha~oF891I&1Xcxg(hj1g^RJ%}*O?Vt{{=?=$ibrd{$b8$w)YeRWo9 zRS*=;1LOpn)F>Jy@XJqIX9XNZ5ha1{rxd-95(8C*8o}8Fs1)0VHqHkm6VZUxPCi2M zph5LI3z}%C5;c#bz4a1OGvt!jMjzVD9ZlpSC4=@Dypuc}MaPwJV+Yp#-x&(#Wb_{w z<>Nq(`?Wgb@@(cVo@EuyFJr@)l-kWP@!vs$uY6M{erfE+BWr`cQCB5v=8Vyg;>oeo zErW2z#7-S0xYL$0@3WV0);F+;z~h_t)I>6N}Z?08e z>TVavqm#V~!=zWSO#=~$_>u;#Cpfu23{rmo;;eq`M^vS57Sk4s5iwl)9c$I4UhXQU zPNaf8n3gCs}hAjuKP7EMXC#6(_ zh|!+z3{Z>JH-99J!?oc*;sVM2GpF{py=K@>TZ<(E(10~YeZ1~loY6XgH)cLv zETUfXxZKb(tsDE*xWwSX&5ZnENzE}P!b zg^kWGM5@ztt@VVlhW3CEPk(t3xPz%f%yX-297fB@`(d5zFj|;o))B}Lok^OZm zI_+q`oxa{!z<^oprcrMd7v4}iVz&y7vQoim1o;vNfcN$4P<17j8){^4qNsqkPd(_T zjA*UExNEFEfm>9~8uOL6+$)}SZS{Q)CTcgVNMEN4FWreq2Dn&6F)Y4&a=FS^lAucJ z)kwSR=P(Dm*P=|}8&wejwc(c>_4+6WSxp%y5i5z~RI$04dK5i?)kM+k@N~`jpv&*C z-!5U?)^#XFjs(=C;ay&Nx`?Z~XP=v7#*$JZ4>AO!^pP(nOhtWk7{Y?wz59p>(T?!| zbFy$Vni6fS6F&|OPZ7@(JjIk0OosX1qM@)G>ONyTrJ0l0$4&NK&!xovd?NCdxc+wk z$-gk&7?Yg3wiE-3;qqdsGA?K)k-13Ga${{1t&wd7U5c(I+=o z5)~0&8imX!ao9_==oT=|6HN7~cEJzfRRfJj-DpU%Era5G*fM~l&bq+dhC}PN;GJQCjua2zjG(@Lffn0w(#~q$n_*D{1 z#OWbw^pQ`rsH|Tx1XDPLa4|a0d?S92g6&I5YbUYqr-dMHH#SoX*y+GFNPci0PEliHEL~Z-0|)c0Tj}_6I%qM4(%t|5vj4 z|A;m(SF^u`T9k^6!<-=EJ3GVDpq=n)utc6!ZV0cmilrBOgediAcqBfEgHEZ9`J^+# zp`WVsnX%2c0)l}uLb!+)Rk+QLJH1n%%sK2GWz`nqJIw%OUUwU|V7cwjm;P2l%=x^1!HV>j+NHk~mZpRqsNpkF-z;4seC)}4OK zWcB^f|7m@RX}5H~+R+lXss=Ss-$Z+f{9;;_Tzz%Tr(f5+K@~2FpZK_$M$FfR&tQsX(jNyjBMoGNS*IrVMGWOQQp&r5`JR95W zSPXL>xKi7)hOvEGqX8WHpdhH8$kX{cTRlyuU9je{ItZ@vgZRDLR%!nfb0|1kr@3

    *Z-`4<9gG2@~&#sgfj$1rCPs+b&mJZjU_Xp~5^IEQ3W|Et$DGpK)(lY!_E5&WQmU zc$xYDZgEV@XRR=TfKL9GL$tbifl8;ZJC;KadoOiMYxV&g!&p)dn#%XEaeA-D-32XPn>3-Si zn+0lO({JbZrwa}Cd5b3Ye^essx>Eb6IiU+Cu^tmGP-5hsS9o0K0%Fu znDKuWg@d0)e*D?QRqdQ64{6h&_y%_H$)soTjk;VUfc20sfGnT5bPz5Lyi_Re&jM3* z8e`RP1@7%EI-?l7R5lPNwNS^kPQ&EX;9nDmeYK7aJScGlfD#AOe@Gmn4sOP_e+P3? z0!Xkb2+A8@I!MV9;R4}C#e~+Jso?ZuK_a?Bb)~WU&pE<4(y6hWRuMrtQW7&*roN88 zzeTvRH|X=A24_Z0A5EvYx;E|vGIF{){h%46gaH?+(L#M2n5yf0eSZAB+kF56{IdP5 zDB`h#!zV_9fjvs}d`(jrv?cZwtYvHwQk&Icj;ZN%*`(oL`>Ft7bf6W^)R~VCur1F) ztA19?akN!!Rxjb(*g$NP$s#59(fo!4i4C{+VaR^jWtJX8-N{}Q6lzIfVad_ zad~ayPW$W@x+zqb3YU{fd<*WC=azwd2+_;IY7b%w$Dmoh0Ealcm9z z(n#ZCc{~27)cB%3*NR~h!o29l`e7k3*)ZMlhzca0=snCLrMkZ!>`c1_J^L0k zd-i)mC1|Wfu|DQ?GN2)|8j28cUg6keCfF4j#(cl=03Uq&?xE}=p)D+!+bH;L_xU_iWTaDtmjm~AC&w^=@M z31%Y38-!38MR{>(3?i@iad`|OJuGlof-puoSUe|S09ph3*OL3o_5IM2%q%}^>L))PL9%Rf0=d@*SS z0$bm}G{w~hAjf|yDx!~VknUeSY4T3cl$I*i5+;j5%76;4bqh%~xwZQx%-s;feUxBE6yqg zh{~!n?8d2X>-&1M`vdS@V%T_}_0!p#P|WSKdQB+ty(4*fN{=l*zOod)f)yv}))gFjc#u$5i9ZY1A~qk>~S z6L2u}W78XJ_QBy>dWK10li#GOB1HpjQl}ZC$Kb*=xj*VLnP-eK&8O7hAM_{9Tyy~P z0(O&S6q(`<-mDOMvQkR}jO3He;B$yMU>(5w7aG%_(Dl*NDj~Ea8MJ}&&r%&1xXF`n0mS6E2z@EQ`yMwkT+Sq zoNgU<)dlIG{UXQ;>bLU92fXXYQOfMd!*uiY;@nzUJzx$#iOkIQ>d8NT-b;o5^d&Ju zl1*)tfDq5lnUfn06=lfoCv`5R9juAzul=9Rp)#(K;=kpo|NO0cK>u}QXn42YLlrqVcEA~>?0~9Fl zVF={btmH{ghQ7Z+hq8)g=)NGQyIziW9A&pWu4fr^b$xuQ2{QxNU^EdF5u$%A*dHKx z8JVj9=6?ovarVkJ66(tbh5~BQ#%kM`QvsV@FiO9HLQ|82MRz*A=fOZ?YC9qzy~ zdGA6ag%S?a0;O{)mz^`62ht!ptjE1||c{t#ik z#vY6LmBgGDL47S3UV%FlZD{YBTL17pzChGszd$Z(F{#WcS40Hq^&CH9Crl4C>d4GG zuOIPv#aZ9IeoWJmulUqH{jGbz()$*q1A_APzw9nr{BLIIUzkr>2YZXZBRPKRFC-6j z^)9*VBeAO+dXe~^-wxgcY8d4 z$1r!M(HH5(IWBuS9CbXVJ$ABmeY`!vZqwrz8}-VQgvaZSI=5IEGDV>#?<+Gou65&q zj{#mH_PBPagVgJB0O4%kR2WUIqbc;=JTizqCo5>C_}MbM8d=?GYY!f4E5oK`4U=N3v`xza~ylfDtH1mWk{XzI9pO6_fcag3}fTT;%0CZWG{eAr>d zK2%$DTUTrh!(GAkqRP)IQkgZyaJ~E64%r}8LUR=er%e9?dQ{oW-OSm=>~BPUlG=>? zKOWuLWcuuMxraY*Q?t^o2PYDt2A|WN~S&Kxn~V5h6}wDWv-a-?G~vE*)(ILJ+%cWVVj0! z*u8*X8kn0rT4)xy9e+Rj#NT3h(_{BO^#A;UF*SeQKj$>G`W*8$hAnw=z%|paai~zo zGs#PsyV};zrQoM;Yt0l6uZ>Gdx}UnT<}NFBFHPHR>@cQgwzRBZM9Fdlj7b~G97Ggb zv)+=~8?=*ZLcfYBYGaq{ek9&AaC=78JL7n)AxNakn=pCwC@FO1zC;DpWnjU6@WFoR z*?VaD=fXzaa{MV6ltOG|#8N-wqHHQAs}mlF?0P>P3RR(;DPxXcwDtq9hWMX3FJ0hD z_GI}NQ4bC$DD1iW0C-04pQ`?Dwsj)+-6S=LbDc7*1Vjqqw!sx~&tH?%K7hj*+G0%7 zdt!Bhcz%|MTQ)yttSdqW&2%B}D+c&C8#$0JweoJ+(v>d2HZWYj4%nM@5;KE*e_2IQ zYAN|cZ=K*iZDz+e2y1;wNQE<%i~0q&lDy3Nd>jqNB(KYz?V7mhi*f@4pcilp>v)Y> z2AHoo*Z1NsY+f)9aiuG1OqZHA4KwTM%-Rz7T-SXd=;s8z(wzUH5B)Fn|9h$@e#RbI2o?D8y?4|dtsi}$OtW${(7j=x z9J>JBMmmhmx|zUmRbJ11;ymysi*+!=xLvW@4?CyZ(_GU=B!fKO(flyy*H-4;)Z5#i z2b=(?%QZ(wk6nT{b`;W;(($QV^USp^qts!3u45Gp*&+pcJTD- zSKbJS1Gd#R-Jg$}lUqNAeTW}L8Ip|<>F^M=Oelv9*N$pE^Y#S?oeL4F8rR8#7I3J^ z0#m;?ST10Y#~5|4Ztjs>KJzZ$kax4CFf)@}BFM2wG_uj+wjV?^vMi{BO8-)YZ)_oJ zr>7C{lm2L5)f8+cFT~yrXvVnLdaN>?6nnj(J)EX`;MDPS`vZe$N)L) zg8Yi5;26l& zYa3nsMbZ*SbWUqP+m8Ti;wA7LGkHQlo@s%|{3bxFpG=oswsk7_IWZLryK>do?1u_L zvEUWpxXh}T`;%5-qTC$%v$}Zeb?Rt)GNDFzF2dx(sX**nD4*Vj(ULPPYseWnz0ss< zdj45+luP~9^fA%n-_BgP%jRFxfFMW-f*{BL41)iT5lM>I`HMLZnL0ciWSa;n)`ctl z4%P_HM&>6-jUgmfCXOT)aN8-xQAsi+ZbO0W@J1-OTO`E$Y3hx#zXC_tv`m_Gbo~7M zrs>zv^Yhsm$0wcT+1fBSG<);TE@)TU?PRznwJFt}S;Mn>BkSgZ9eR<=ytB3$6JWiS z8zwVP&v!%0dOd@7O!zk8&K$h=gUZe zATy>9{{jy_f{0L^_-mNV^CZ_>0Sw6SNbsJRqHGjaVCPv|@|!l*)tuh>(@qWEV%sbGRASDD!bnt;*;-Ms{I7p+2@s zQYazNDq@D;xTUr?$oj{PF`jS6H3=c4>v37p8XRg8tJH8HAjk)b-D#^E^sODymLzjh z1x134?z%RNYNou&sr04U?>nts#kjRGk_Js&er?B?__{_rpC% zvWu0Tg3%9BB;2ViyO1jbfOnZ-R()i0}ZaDri6UFH{h6*DeY!Hxccfgd! z1I%c(`&e9h9cB4kj=#M)H;20e(Nj5K5FTc^+H_H=EMwNq7d~EG2LwL$MTlgu6mN=pPnciI({WT1y)o7K> zNR_n0J_XC0myB(r)LLPzPDevV;aAQgXe*Vp40y>}n5a322OKKtv1nb9wfHkJIYV_5 zzfwc}mKO_dE3m>&UrWtBJreumk`n=_tGgEz!@r}Z-??QPR#Rh*DvJ;*RJ}JL41@&M zex!524{rTMc(a{EMQv_$QhP+`HsPHVXkwskI39+Ww3(1>K~HY?A^Krl5aQkc6s3ws zsCS>q5|2|-b$&H8poGoMSgi3K-pmbket-$k&ny`(gxx&k-3dfp>m<77bqg@GCdixO zk8h%vV&Egml_380rTI8pZqVCxE;x?T%oSm@D&|n{OVn(kQf%=dDZJc7ftD!DZ4;tA zJRV{D-k#UhZK2-A4!HtmUlndzlp=cJ8(fw550h-E9@BY+$Gn03TnUT>hs34_tZ>*7 zf_X#!ji_UqmO+D~If@InQSR5UPisPn7sNgM4fW}w(fg9NU*R&qTK6+l zQW4smzj6MR%AW9&kBtV!4;v_cSpNs{2WJ0FkZvj4UCQLW7a}m~X|AMXqOwu%_if1<*P_0nZh7}X(9_8)vkeV_w)K<~$oTkIfsqulQ#)9BQmh$kX7Uu0452r5Uq8!yzCq<(&ju zD?VtE|D_Hz1+_jwJO}5Zx}Qsr%Qz>g+s}}I<&1POO;aVI))&Qw)B5v8x%xI#ag*Xx zD3M{S9}MZw`>2!Fr$L-{hucK|0Xv?B}^&IgNC48zc%cx=xPEZ1r#w*UBAJI zbrVn$A%y|m7Qlsfo!qI$VrSDEWdim8;xPpB!}ZY%!f(zA%zRa(waSQa-8VD5wjNJ> z9L&FcJinuK)8bMe4kiAD>>0-Lov$MH`pv|;hKkY$Zou>S zCL@;nr4Sndhbvaw4>AW%S7yb>^mVPH&e+!{MjZR_fIJU7JG1BTi&cIEzMLLirx62Z zPsC-^szs15>y=ieh?pT=gk2jJ+!Xv^^Z~!=ba30rb?k9ak#&+ee&~Gc4@n(^d76I6CVis7ww1qkPFI{>iR^vA_8SyCAAo<<-1XMav@+L zsTYn-W*wHpt0dk;KWlh;bE_kg$NE5smZ}CQe4yoUX7pz%q;v@y`~~T)dV()rXn!QE z5|nLvMHxi4Zg{=Cm-_kEIh-Ih5cZUW+@|c(n!{ws;V@?zhZOlA24;fiDw%%dlr?$2 z-#-Dh-Jiz42y^k(Ql+oMV~AmjHVMw*^M4Li@?L_%oSg0wB$3&eTb1C6q$Md=|J~CW zpCBYq$>lY?NvzQ6McA#CNit^o;dBdjf?79-V5*N6`Ks7i2y;c;i@-#z9i%~8%oK(ZjE=1y^RrW8Mr zbO`1Rc~jDj3`e;MPV`m4eyp|OAN@@K5$4^BUozRxcan6*t1`sqhK%$3OrT z=op)N1m1mN=vtQFa#Y}rq#H`na59smdjK!qoT-~P_*`?`+{fRnY>|02^`bz9HY;dV zis=9UvHqv_t4PB}1GHbeL1G1zg$6?9JJLERGzq%u& z6&4m&2Ar!eQF_G>I!OOQaLfQA6CI&$G9G4UIMtN%(P0{x*8arWlH3V0gV7pP*XBaP zKK1F7ct0#?IB(tCq#qP;FS^c@dp`ps>7TU+IheDhEM>6Tyn7biAJOmI?X4QEq!dlO zR)29-Do%ev))pH-wYHr^nN=My^Q9rG&nGxm7s_FIoa5R+!Kt_zev%iK;PU3Tux_Mi zZdsgRHc98w8}o^r!d=}2o2YH)<5t?kB)Bv0pYUCr`+oIP=d$}pZKZ{6-}u;*kZ`VW z$f)w^;-IiARV%U-nL4R%l!H~W8|GvuDs?c;PRRENE~&J_xCxuzokcRue(e=MpBmkB zLtM1^anQFoykfY8+>Km&*Ft$HG-T}4lJ(+n2<}GZvUH>j18SOq_jS6*}^u#48g$T_dB`Aq_j!jZ&A@b>aE?1EUJXqf|30`K+!Q3 z)x_*wHgdAKu+q#Oy`iJ7v#y-)Cn{iwy&C#sBduuL^4<8 zf=<&Yk$Te)0yW;D)`*-MdZW@yLGLMcv?eEsUdN8$(>{fPjXhIOc?q;fG4Ty~HM|^Z zVu|EZ{sFb|h#xi&%+~ai}x>=(qT?hALumWcy6ZpwcHqCs+~*GPSKT zYIL!m+1y&Zwfb}!qT0=5KNe{2zHwY0GsRn9t+bNp`DjQtJ6H4tR0l5YHFrN(T6}3I z#%SoBYtA&t5 z665{E`y3ehO%VU?8hSs|epSyHjOLx}$O1s=y-*3jArAYXRTSEn$PmVD*W_lgYWW*3|he!Gx8`#M&~J1#VSgzmd5u zLn1ueg5|aK#9ULRMb3PYI>_#4P8oKEVKJ=qsCVly{YOsT8J7^I;ctlThrIYhe3*va z0e^I*FgRaRmIuagD?Ynl*Rt4QS0U4}^4MJCeKjX=id`1LCH{(1jl>hS6wVPw($8+3 zx-8|Ja!e78JP_pbqf#2zgWMwE0|4e3!ztvO<2gp~^tYS3?O;dKi=Z{$S&&AF;@`ey z|6a|iYzLYwAo>?Gcnm!x1}Y;eQi&Z$*7`HP^c2IgTg#@$YYKyHHJZ^$M0c3BApg-o zM!6xofqp6gxQu-+9$@ufPFoonzvE?T`)j5Exs8S2=2(;2=f~@KgGfVPN4csQ_2(8f zBR+}^CkAG+P2R)j*POQ;WdlO+V$Lb}&q8#i(wMXAUdCAAj*jtmQcZY+XQhN#dJT7& zH1%c%*f~3ck;&^)iigHy$jmgmP(P$|n2VuD(zNieZzkk&l$>|@ypu2brGKuE)9YvX zQ7Q8AVc42g`-Nw<;bF=X)?lYT5=_fIC|{8{xTE}IfAB0kKue2?OyukAHS9rXOh@7D zb6~As07W)Z9Ns)xTnftlkvY7s9mfV+L*WzcRC!gE?ieF^f_M9uZnudghGQbgXy0|{ zjLc@MV7SNx#9_0ZVSR$rYN=pC%5zjoBJ7{h z8!Ub!H$g+LDA7gH&{|m4Fco)tS2rf({CZeDNSUTj_=vKy!2()5%fYT)LcG(hGc>$1 znt3X6`PZ7w$&m{4CkPs4pt=qB-$DZ$6!hO<-FlfePaOcQM5C$P_naiV?_*e$Rt zISKjI?FNYng0h5d zEwKYUWR<+fa5tLz&Ce9sFANCjp_nM zjQ*l{J=b|i!MjH*UCFUS?F$s<`A7|3AHidWo{NQX_kpQ-v&T+U(DoX^!ndUHwQW3- zqHkG7?PA?~QpBds1XA(yUr$*V!-Bpr8$tt2oTlMpJl`}nGS)6SSPmsx(#NI`EFYao}#hGP02|{}Oy`Ib6s8?b6=lsX&Ie;g5SVTdLUMiKd zBSn*?N68L7RWPR4an|~AX5)%nmW219zWK;HdlltsQ*q z?#ox{YzJ=4N8b#}3~VQqO?YMvYYqk!8r2llh-!s@{lp5bW5Q{m&ua{t+mQadPb}hK z``4k6f2>ygm$OijnjJ`xi0B_33`~YWqawbt{?6VGfr@4)24PTavLnOu=X}Oono-U= zoq5~gHfU1Q?d`9H3a-ndJm%iu@l<*DtKaT98$R|f{~Xfbc;ZXojtN)5)TRr9wIbTU zT54i6`oLY%EGu5e3YFnB2Fd}SqP3ep#@=@ zW4_4f&eWa(Sa#}ZJO>%^S}`Ek0C#birm1i;5*Ewb6ZUYxrnHZX0h+G3c`|~4<892O zo7G%wd1j2QxN#4U_b(!f03Kg1Zr=XWmbHCJ7}2ao{v`Gf|`f;N$skj3f=j z8~I-y=Pan>{EyIP{;zEzRUOcd2IBkI4qF}d3K;zW;YzW_gSv=7VVj^unRHcTmEItS zPMjY!`T5Yjc-_(66wN`qW$MabV9?VKma&&N4fwqz%-1O&c)_6%0WW!gWe z%@o9?xz&^t>`Sjd0U_HncbiD0)JhYM9dmbTAWrEq*p8_bZHzgyZbWa%5i7YYblpkb z)tL?VawWR_F>AxtfEH0wj+O4R0WT>zyPpRH?B4nE-iHXE*AV1CSj2et<~)E} z5UPDZ*DYEQ4cW1a^&2PL$I9?<9?pM)Lq?6|qJJo&5RBxgVdje&!B+Rz(o9apC$ni3gSJ zyMe0WTZKO(M~zeQ5Mx+77@WL=gi{j>z$Q}bnMf0)xKv^ZhfaM0%zYAg?c?sF?-@7J z-lQ>Tb54zEyWa5feBWu7If75HU%yLY%rNk8+Hs?}g6kw2;jr+h&$FZs;BL!Mjic}< zoQZ6i;%_W{hxsfn@R*n1S-am?$`|^nSmPb9&dia6(w*%SESX8pdk|&LcNj>-olm5Z zVjrEMWLsrKp){luu}7R8tJbXY>y;+Vv<6lmkAKrK2||=Cegh$R;(vl*mj8ob2jssH zETffcBWBxiACa^gqOV^005w3VFD#rvSxDZQ)eE~}zEautrS_eifVI}z_Zn1}{cuU9 zV{a@H{2PFqLzTgtKmbk@1sl87loB2)tH+|Y2c6wFjuNth)fwOx*-dMq!0tR*251j7 zhUhdlAHl{L)wYi5J$4gn>^N^Z@fBR1o4p!;7qm7XJM-^_?&Zdm9XXy39Jt#ZY|J& znltrsUHvsxkIY)@_E>CNY!~V~xb4#R8ARId>A`Nb)~m+lbDK!^Sy!5-QW{xMO^q!! z(tNDqfAkV4$1-iRku@bYs+E3P?{zc`tbLvi?+qLErxcN&g69jQ=zGbQYF|B4m~rXf zelbj*?3Y^1j9x=zkQ-Fp3mgbx)ELW64 zJc$X2}i;>DN$k~HiQsT9&50)rC3>jsw9=1Y-Dnb z{}UIf3OAc<9k~?7(>cKmM#E`*D;%9W-B}RB3&tA-zvg3>6flO6hJSyPsNuH~qS>5_g(vxu2rS303c>0m z41HwNU8*TH(jxRUU{HO+Q^NPqmHJ3u=dT}U@pqXOBX%RUNbQRm^dwkcbMb$^$OY zr6s(Ga=UoYc^oosPc9$eB~`ta2?wn=@)PM+8&t<=Z-_~9m({ahod?R)@+|ZSS7FLyzYCsE3wJ~> z4n$@M1eRx@_3^Pv`-TmPkD)EewDS6W_arCb z%kL&Qix|KeOF?9cSr#(7l&zFyeJR1wlly%(jBWb7GhnR@Mfx&IpcW&IPTR&UbciF6gNZ(G%JNB+aOQ2qTq%@o?)qo!m`{>IK5xn1kp z$vnogYj~+1A*+6weiggY{(kzAIj1ZWZPG4$2r+!e?g^Q1O%Z>#Xd5m1;@agcdCvo8 z=d=8_&;9IOSzFYdO@K+uNg)}^^Rz-+&@UXeZ810RLisska6Rh$C@$gqsYm|r-x?L9 zcmr=CzTb4Kls4guWGE&%(G?GX8JOfM02#ZNnj&)p!wpkb(bmO=LqYaeyBSB{+>)H`JcC1({_5hDr zJ^B_F#Q=Y0H1#4zvwg+drwcN3O3Pcyew5;)^c9LF#IJPa5NhvPV;quiPns8e4eZZzgfeYHsDMho6QOyFXxn1}$q>pP7K-)yK$OkwNlAZxjA}fO=uLH~z|8sWb$6_f? zeeqKa@v~lv7UCQx6pQWzlR7d*62>QNH8;oIK|YKA6)~O_0p2|Uc=kEzAASC~Oge4hK?_$UvJAx2X4{F@)X- zl_kT+EqfBWT8zcCe;mfP@4OQ;KaYAGm@Z3=;U9y(c(|sM7NS6=zrS94$a&$M@tj`U zdjGhw?*Zlrx*%*aA`3?&8B9Q#2tiP4qezjaG80x{TnIn8lOxV>&>Yf%h)(-9k~bn8 zycMl4Qq@s(R2}L-O3EBwZZedFkV|8ak{O$vTSiJTMImeSs<%KPeJoyn5;COjHk++X zbqX=sgr>{6okR?r%(PC?qe6#)1b)iK(rsg48ZHdb_G+818e;KY_(hc4)=QYX87%bsa#JdBzU$$aS~;+lQuQ! zusyJOJWi&@Ci!#?0k^D=%(%X!9j|=5y6mg{PeM++WNxLAIP4U_z;)@iV(r`81;sqq zQOO2Otwk!7ImL-&yibL>J-U-F2gaWy9R-ha0-xF@auJG%V4cTqNofnSi$Ui?{^kI? z(sgNV-du%D1#;?SJS56-8|95veR7l1QEY%wXTbqOoW&*lh~l?OXX&1=Pu`yJDUz03 zz4_xl*Bz_KT}vpc$!p76ai<09#2=p;1tvA~hd{bU8C$cc&W8-E^=O%+Dps~uKvSq; z6vSZ~l$?Ci^+<^7RkAFoHLqr>i(AhhieT%4ld5J>rvvOhbIeQ}6MXDRzQ0dKLX*u% zl4M9m42M4Ht>!P7k-lK2J&>GU3e`wl@Dw1t??O#N`{E)cLwxgo0Xw;1!Ja#cjysZ( zTgBD$T%mSvF%Q?+dcK`_F}%Sj7dv9YHU`*ERo2+u`lqSsz!qV-7ih%x#Nn~8UB3si z@DD`2jHf+2gA;$*a`FU|Vz8{HjM5gSK0E4Vzw*)ZT@JZ3Z5!g`$3E=dCJm>NihMb} z5WkXp9j?~i@&+=U`U5?k3xkBBdMel|Z@lvxl!*s3LbHlBvqqHg?A*&XLfU2e^1qg{1zm7 zQQ_~vMXS-vfp$lO8~N}79zhWE=1ukb#lv%GXF7KA6WDMYz{dWbO5y0ZSKb@R*PMJl z%a&cyK3Tk1I@n_Z6nXIjT;Xe5%tfp^;jN9fqmImG&AqWrlqFUbI%~Qy-P`Uy}p}Rm)kHmKV3x(*FiG- zw#Pxplc)QDkc6pI{0lGVjc4vd)=AdXai)LID^f4YHgqBO7{YG{Il4&0EO9A1X{>p} zGKrRa@N~BC_0zCx4E+*TZFze}X?qh+p1K31u$-`!Kz-O=^=3sd(gaNgdhGWtngzI$ z+9`^XXirv4Wl&k8cIV0Caz+Dr>aEhrR{hX5F~y&rx&7LUJb>adO8c# zCGUFfZ&$L3N1{%l+3TX_!^gp6O7(4-j{XdmFRVZ}N$!2*+F@(S1`R`6TOOqbQ%&3S z=PgwX84;$n7wnHBQ*4ot`^b#DkLoEmStzd7#PywYh8`3$0S zopQA?>=Ri^#g*>9v^aDkj9x7e`8@hSKc4bEClGZ9)8YKN(XGuw|Xz6<7bsR>7oHAAx0^Z~U?;Wc+z z9~pzE3LzjsSi>l-CwFVNw|^x2Bi}cPf07TUa7b-#*#g!euDkTLdRTXF8?y%*5x)mk z-{+PsN!x|eBf}d0U8m>ZjaTUPC+;hv@FUNG-oYo$u7(|#<(LRx)a>=awOF*5;s}q@ zh-JM$9qQZ|4&j@!@XM|~hT49vaC=y<%kIy`O85M!vhk+n$|o?s;nO-8!(aBVe1TAA zfge`>w|qi;I-e5AxjbI0wc8{~JCuf#0lwis{0jK^6SwI}UZ@LRqid^n7-w!yZo`z9 zk)G1`wJ8A@fhv>$j>tI@+S)~wVaJ3;V2xhqCX|AlVUPo$!5jggM4yG|MNy}@9P*4v z!Vrf-ImUBP^&)YZVyn~pzagP+6|eB7j-nYxWO80z`s!`cl0onYB^0*$p3zFUcV8|Q z4yN#4Q9&jQV-NA6jfyvyNh=8pO?v$kI;Jhmo1_)}kA8=fJ+;PVnzJtQD zgTi=+Y06!}I=jA-Qqk~Rf+thLw~J&v&2+&^D=(^9uFs&4T=1!}xu`gbM?J;tukWFs z|LEX_zg*NpekUn2xF0`={)guDzjr@|)U};AL{L82+-S7jMnez?BsW+%7Ttto?UR;= zXN#JG+tf3EQKs79>`^5jMUj|eg_6JWPWD0@j?T7r-xgv_*VwGM@g(5bOrW zoxaY|J&NV`_XgW#a#IPeFWEIi!KZ2~Hu~AB>J#jWO8lzPCx#-5s<^4>pf0Mj>P4EVjp>=Sn5+9h`XyPO*| zRDr5%3_;KO(gNl{bZhaN?P4QC^>_p9Nd^^L)y~FA8vUkvy6!=P9I!Anve^LF(Cntc z-LcF7Q+<94yD-f-cO6&>rVWNnZot6smNctVU=q@#(o&Y+BEwSE`{Fy5UTx0oh)Qz@?*wWc8O@b|D?SR z{q?tS3FR~8afb>t7ak;A2DO)Ta7j+jJ)y)btSV6Iro%~ zfQEP;ejEw!9GnEwKzlFUoUa`V>sb@hWVtR?z@FV04}P}{5G1CU=xq5T75U*Cz`Ioik^Rn9D@+L~pSMK2>N`{sdOLc{tH2TS_$O@^e9tX)SmR{+_~Fb+m5Pw&f(U(f8zZ1UcKR8QE~>(e&{9)vF%oP$S_XgEy_ zGQDs5_KwOADwBl6Yko#6of3~bg%kK86OH)2GLVRS#vmsL68)Dlhb)^|S7b2K^p0X3 z!rBmVluC(EVGpqmq^Vz@MLCil+OfI_)5JrVf*+Ae8TEZaROhbO0W3*zljynp_?>8H zg!5oR!842|dh&H1Q?OQwmAb^7Hph-qlPCbh>ltEhT!rRcC_awK9imx(tV1bcf|2?U zp>|-dq#yg2<`@9YIz?j#Zx9QAG~?8qHYfZEs9tX{8Ld*xn&_C`FSs6%b)5QU>9N8x zatBuF1EM+xzIHCJ zkW6z00zG*DNsuBr#uEFU4H$Qw3(?q#A&?3AgCgm>PQtTRYUDJcK1JUXRm0OcKybj z?YoKnU;4KF6L$qe%m3I&5Un^S1^n&X_N|pBiy<3sqaPDw9P1OjE%9(5WuVIg8H|>j zC&3-YygZY5tmO&c?SsE996VP9k@q_FQjPVht?1a{*W39~RhT6TrHrYO#%4G?Touh3 zT4V!{p^@F!eH5hphXP|hlPS(Cl2EmLr4km*n%o9C_rZi9=B-sWf7)Y#yks|o_L{vd zq2!Z{b}JdN4>A?dR}LPaz|9v+BzOr}Ig;XS!xf}i$p>ue*8J&{Z*MYl!G>8|sH@d7 z(#M*)+mWs0^_#KG@5(z0i-+L~6LKGDiS`7C)^Ldv923N92_jsDnfSsQk7MtynF)&V z-n@)RF{Ah(G{sQduc1%2u;;gM9x@t=w0+Zht05=uQtZlmkyWdJO**?$ek6oEobpOK znhFh$k^|H1HyocXCzedB?E1BIoD*L*#ZUYPMH&|X$^=A5sTY5VRoK=C!FWWExnwMm zmCi^rJxO4!9j-g9NYca*AgKj(XF+{EK>n{$!*ODy#`HZE0s;R27B&ArhnD;wn$wPr zt9d2=fL(jUJ8|vVidSrrx;#oWmVh`6IJ4Cn29u7BOS_0C`*{)MzQMA)7)=1=s{`D>m9aC!>b7pf+cYDhvrXWPn zJH_i;gQ%8;JvY`}x0=xp1Cqu(f)sb@lt5k4YmLUG`-G#ZM$$|gUC|YjXP}2R4e?=( zx_;jTWe1#ziH*q@S}iyf^|X^ErwKGo&0Q})y9CfnIbY9xAV@};m6{aX`<*Qs5y6{f zo91z|;XbmRISTc1laqTS4)yxUxy#j23`3WlV=~dRv$Lo&#Rii~(dK_^S~j19E6eS+C$AT2n{6WgDQ<`<)-ss7BR?!HW(L*lWh#e_Wv8V9$iin= zk#BLy!^dk$%1pPb&NSIMEACxa@0?fNmWHM|$!t8eH}V*RQrnI7R-?EXsY6M+%#+)c zl*IMNpv;dJIY5=7KDJ&#f(l>xTx&JI0oLs<8fs-(V&+h#W_gw8NL3c9q|1sJMd5M5 zTo5wS5A)|&6LEb&Nh`a~biy*0wuQ9CFF|cBR*8XGy!?N~#DctvFyyhrh_LgcT+i9*)SQD}HQlP;bp{?y0n^}?+>V2F{JRD2Xon~1k#y{n42 zYYxa?#-vg$Qf7K{?4&)|V2k>h6&IEG)%Us`DwsWAKvI94@NS`ubpE?V$%g@Rb(R(B6g#<(zw!?J_C3~vC@mZL(Pi2D`q4Qzi({B4$i98n0;0q~hA)XOPFjx^lpi_MV1FJtV& zlTfsm@c285nq}A_dE5JFOZkj1v`Bn^J7LazSHAzH)8IdnrnIT& ze_mbxUIiyBf3w(vD4%8>E*%q45~%cL3ZS;oZm9O~EF^#c6l@t9OBs(0x_0ZnyTbcM#4J&<~BO)>_jrV{^sP&dDM>?IIf`yYkx-=}!XRlO|88Mnj zhe$roo&14ol~aPy!3%nMqXI$kSiEzrC&bw91a|8#EI`kWV$>ZM)~|YCx;pj9d+naR zf(u%kYioysPaD1Y_NiIIB~yt?l26@IWL4|oMZ-E&#~2OHCZoZIXEfDIoO!K3HjJDE z!(*!Pk-ImqibEe{_$QZ30$hZc)&;EIV(evY5X%LBCSN&-#o)I;W?#Wx3>&mRr4Iev z1d{;ZajFbbV1R^|AC`?SZVH)CbUDvHgNQ|k-%PmeR_wXrl3s>~ECC@0?Og{h?%qo?MLAD0O2%**vjGOc5r=sphE3 z9--NH)%oWitRKC?ECtVZU&$Tu#}DrR>e=>>11#%m<6`-5yG8%clbo$??V_TN`YCrj zy_r5X#f@ASU}ga7xiY1D?NYfWYb@h=r3dV94ns zIo^%4>p(KPQaidQO6e{6_Fe&~1yu}4eW-`kSHFY(<0JaUmA9X%$Kmg1`B+WR9j$(J zo1yX+gVT*YSn8lM1Xk`K16N!5s)t~|`lko);GobSmE?}H?t;m!tD&{ae zoe_LrOq(ga!DT{*wA{&mB`!xUmQ76Avao(dQ6p8#o|P#j9mfU{LPI6yWj6m;UPpr! zZzAJ-%O5P}LS_EpWO7iSYP0z!HHG35>r}o1b~9S$?Nw8=s+4uTpT)eWg|P*6%$7Ej zWMEN`YL+w#>&bxgJ3U_8ad-7HlS^Dw>rs=W{A-&{CiV+U>mbJ8+Y(roTT;cKO)(@V zSB{uSOBq`xVC1Y!(bg`~Gngh7=1&7OOBPf)vJ&57^$|G^TL=)UYZw<752n@MmTc8_ zIQ1Hx^;>S6%r+w0#MD}9v$}$@#p3Sn7XvNL0AwnxaOYUgw8d(hj88Oq@LM}L(iW@9 zRm>II47J5a?iNR0&eb+cgH%M)_iUbQMBbcag5;?J0eGI9(>R{-&QCeit)#TEteNR` z&FDZzVr1FOfoDw!JEbg4DboYS6f9~HvO(mkvocdhkI7ZN7#>X2(zGip~YAJ0X%ymz;C= zFr^%C;W4PV3BNzGn_K9t!=e4m1^16*onmeJbI2x5YJ)cyL1AX2gj>hd$zbb0kjm5OyMxh?vHTHtLH%5b* z#h8qPt5rBA!tmrqrxVLUoWqdW+(X5c*KXtZjDnF#^6FTN$Op=c5Z%`6`FP@*OH^3i z5%?1ZEMe{UMw~`+Ej+NzSr!Wg2L?Pu2B4}I9nkofZkcW75JY7Lls&WtII~t9e%?SN z%`D4c(d{RMw#oE3P>s~6I4KNZ4By1n9Pe44_Nk#FW6=#PI*7svw^bOn;v`5|df?z< zu^BcBER7%}0<5vNlRIesilC4wd^x%;q&-Zp^mG3a8*&W@d0CXUf%ElPm1wxcwxd@m z<uP>?lj$D`@;8PD>QA7<=V5yV~vDT^w6vdh$LJ z_h(BdJd=VlI!Sb;Hq+t?Ka}pW<8AcL_I*o;V!KE&ElOJz2 zh2nly^>J19L&)uBf@WoSStrRpn$RsLz0i=B;0HVd_(fSp*b@s;y90@sgyR05Gh8B# z(eTjLQ%|thPf~N@Cu0g;Yhik3D$^u{zX%9_6$x@gBRCAi@!ck!8D|8^*=9X|gq#Yo zZ%sgaAmA5>42jjmEpAS!GwWOGQn1$oRHE~3fR0Ws88WwwLpdyNlm#(ao&TcjL)z^9 zj#D{*MP6Erci;VX47z6$^7Md5HBMX5^~ZDuRU1vL(%CFh!D~DT8Tve+$i`Qa>#NMl zpY{bZzvQCKzm+|lz`_(if;B(GP&r~jiLLZR(VM|M$IV6gc2<@|E8q}{(mQOSC#2C6 zYxW3HZ&t4t=^O@l09WsmjD4WEg?}#;Z>^K=v^{BG93x0Qf#91+M5plw?m9INb-rWC zDVh|%^q!kZPpE_zzi_5IKZ4jCWJ28tzD>5Ql;8|dMPl?@&>55pcZ|yjLQ20_xo4MB zfn#r+_a)4w%sy<8PlN`luW$++9o#J`ar={`FDoC1pKozN9%4?W{!+NRHW z>N7O1w!deM(LxRG&nn3Y*7VW%nPMokBb=Wjp6KP2=ROe{Kw5)pc#Q^GgT7`L(lI)} zM>?FY7lGZz1J7KXo`j=8QzcL}43`F&f1(k5rVW-Q4cLB`7Ky*v1+5TV)Ta68I0|;2 zl80SEl4*JPm!tI&@MI}l&tE#8cY!f6G5x%$g`!mt3PwqwTH~&0fJwmbxYl~j=-^H| z469M|pnNXo!2D27RltXE;Y@1?N`hFT)2R2wV>R_&KB#f&lvEG9?k# z`xh_|XUY)#`#$z-!^3;Fh6$e9F)p?MwYy(*+{12R(0n2y`93dQgAco1!w>Gm5Aeee z^1~0c@+k1NzIHs(Z1_L*)AeG?yr^;3I7-pUTD!btN%)v&tP1XOE}>k<>nty?1+}kDJtV(%F+4c(fZRF`&V2c z2yM6(mb`uo-C$y67hG!h>tA0LCnK;^5Z^=I+~1Nlp8toUOxe=T+{X0(QkAK#sUw@C zeEF`027iYpl!ZWD0!W0bz2JC_VItNLkrp?JF$M-12XnsB%lg#yRc80=X=F}eOM%N% z^~t0Cxuy4w2?PYr(<9-I&qUXYd+N?d_xsb#%=fUvFMJ`3L2N;mT_MJpU0Fye2tJuR zB2q%!K#=5EdbC1F%pBP;i&>A+V1^*goHHM;k)Xbbs$fq&odkpX6(}Qco1CLp55i!o zPSQOPdb)=?iajX^`vY9a8z~QY0)|RDJY`r&d?*m_CQMdSYfWJRIgC)N=W~E*c}nl~ zp5+h=`T=IL8!ET8)?HxTl1^GnA|kgnwU<&y0aSRffGKk(18yJRlv0Tct1P)W8g}qzty%*B2j{pCNH~r+*8AoAJT5(G|hA^ z2Z}cN#vWIk+{K;Vh=G%ytg8t32KShZhIa~! ze8&Dv3xM)L8x%h#7@(lltW?5wb0{<`*4x0!%{qmY7=*ZfJEMGvQW!QAdCX$0Gv}PO zngW9F-TY{}txnhRH^EG25jE3nDJuR&UOh_YeGsKFKbvtCvcfH$HGAspEVy1kWVdcq zPdb=_pm-@TURH*~kM{76iai7YdNNEF4p4$-%Q_VPT%NLzi%t=2`n$I?tT zB`fKUkq>dTK{LYC2QXuDIT*<#M?`7Nu4MvaB+-AuRFf}1jQ*-5M&F&l*elG zm-nOzkOxdyry_k{(oZ@oOH%J4EkwVyJ*Q1nGBM*;lYaJ1Qn?k8-fy}aKZyw%QL^gR zi^?( zRbpOvC&uEEaM0JoE|01OhThW`d8p@mYaPNTe*xnS6A&IxXj5oh~h+0jtWp&jb^ZR`ayU4IRJ7i$n+wdvgNrti_#{+lX-N<(e53iu+*bl zb6`T*2Oe34h>Q;n>6m;r@EXR^?m25zU46&zWA` z@G8|Pc3oLX8l#j-@1^n!Hx&h^8k}Q|Uq*Mhn3KPx5YZA`2sjC&+JRUSR_L067Hv&{ zc8xJQDq40rq7w7%$#?3XAn@+IGqt%R@?0V8rJ8jF5iydbvxXLEwtE6v4FoYw?tRU1 zjUSfCTvHQYj+_A46)@pTH}}FybZg&w+M^pm)sK2%@x5&#NkW!TJO`|16f)}!N0}ly zt!&6{1_J;YPO%?l+!{p&)^qr$H*PKl z>yNI@CQHGTkfSX-EaVdSl4F*{G(&?c$ zM1L2EPOj1!t`GS^PwDY(2L(gzAu3vrp|9$QR7>fhD@qyj`=-<2t39^=Q~5R&zBYyTb zkyo0TUBd2~jR8y;mo!YHfnGs3 z=PJS*M_shSVmR`4+K!^IHrT{~n`Re@%VC6JI<=rh49f80G;A07dGXi_>C;I?#Q*NF zfz}ktB}a_RBubR#?x_zHb?lNio)-7U)$fAjBGyqvP@5&r_aY3}a4eT|YCJ!k$XeB= z6U)r1o7a*hFA1kKa)IvAalBLc4ZiQlYb9l&6RwTUTF#Yj)PBxaBnl=GFoI<{wPFl& zZaB9d4y|d09WH_dwQcm_KH}=V(e&A3N;d_4C6f&e+qNA0!9i*$iq%~`cD;rY8`KOy){j8@ACEPE5`-1p;HL4lP zB2O?~gaHe=Anehj-yjpQuk&hzJ5A-b249&dN7T}|y56ETu+gtG?1i-IuUIp&iXVYW z^V^+vlaz*CSXvo$jkF4=u-ZDHJ#gpGop+Y*W_%ZQP;thB6> z>P)E5(~;a;$O!FJh?@3crDt>g&%mCyddwAxvh10q$*@L;J!MGs$=1I_?7s>;pDAB6 z@%=~4d7w^0?y*M~DPF_ya}RQnsL&H`B>~FKCfVng@-=$d4+56Xnk6ZD9GGMzT1?U$ zMUW9N*q3rt?6W_Sj9 zzF^QJy6c-X{Vif#drbX7V;RkLYgnrHFHt z%p%!m;gdz|#0f4f25<)*ghJiYEM3Iy7C=G@cRWexjiYSD7wc4`s&;|}x$izy$w18r7u$xC|tutzj<5M}HWwd8bAD|y!o92k%H8+Y z&a5b^(zpVyjza8($Ba@Rm&Z^;mP&WM-#OlsLrOieUwaL+=ehX3H6Gy4E=hU;!l^zXOc7_bw=7Be6yd2?JJeK~nRbq5w0rio4GAQkMUyQEq}XSe0{vun(75y+fWW*{8z^@OUjBDP|h!z;$|sB0Z4X^3u>V8 z8t<*mH_biAHfk}Yzu!A6F^Aa*)lSMOZWUUjcv5o(B3QYs_xdfRs*5u#fyQZDmg@m5 zrH(12;hu46smMncd5+6NHW$YSd#)m$%1pO(&7H9z z^msw9AYpfue!phg9-wjh{2j@=2Nfmzb(P@2G!I`iJss2D-5Mq@Z9HN)n{%1rpN{v@ zrcrnX=aKZcHhTs&{|17V&T+#}Gtyty68Z?>50EqhC(fQ(quKHZ`G*`Ed;9|37`I4= zk{d2=n9!0mTj6>Q>YnHxW&#Zj44U_>`odaE7?e!@D{Uglsb1`SrFN_pjIavonVM~l z(G1nFDVGd;SSf*Gq8E^98<~!J9><>5H$IE0!xp1zUaEf6I9e|PJp}F@;eCD?g_r@D z0KQhi^oBb8)zRBXxZBc^PVlrNs;*S02Fa1`1$1%0WnT9t#U7G;|0`iL`=;fYV)MZ> zAZt2U>+seyO7>RJtWUUVENpDHP~?UYXa60JN0KggKeaR+bi^^?0g;Ptb#v+#-T?6- zKVfmeKh~5DCBY2C6}hTj?T%#5sF*Fi>hRh1E2p)&ta-! z?2>Jb@=}#tfjy`gO0iffzzGgdy;`OPDhYni_yRXnYnM`?ij2CA8$B*JR2yq8&aCif z5$m?>C3g_}I<_5?me^g)njV27y z@l}2B)yhB886z1}@b!}QP6;%TbUw(K?b-T`7PU)z;>KTiCMT(-^-fM$)PMJ?bsc|B zh&baREr#RC=GRuSRrfjvj2&fvPU(rIc(nIQmc-KC5q-hq%{0OJr%eqNpua3I?nzuV z!RaUD57hF7uo5F{Z1hQwwQ^9RCAwOcJ%~8>f@tre^m)Zn{_ShZl)AbsF{~y5b{{}> z@x*<(CfF8Na?jVhNIHN_YxRR69myXZh|?_`%@Cl3KWk7dn^53`+F??O0Tgyr9KckJ z++8VFN$O|c)267^Q`eXYkh(M^om_K{c%=A8iuB=Y?YrOSfl7-TqFi|T-x<=5M7vla zk5HT7!#n(?HxT_<(eQDGBAmF83=&~U7V|U*W@K1plJ12wBj*Mlx-xoJyTRqJ&J96&9rIAj zr8JqT$b=gl9qQ_+resdrYCbp!@|pQG)ub<%^V1abBlxs~!)cD#<0V`Fy^Oc*;$ivYf9 z)_UXbM_ZX}0h;DC{^=G~byO9U^O}fm76P4K5#t5H2QOui?df~{^kc!YWHM^T>{h*d zhBJFRjkGG#TgL!oseh{*L@-*0sBQnx^-JzcEr->QOC20Pb@&k-Ulw;C7u=hF(y?Y- zLZ9CVO8?IYDw`Tx{40XZ|B7ITBk`&ojO_wlR2$+qglYck|dGK%FG?M&g)OV zq}PJe0^C$Mo$XC#_w7mVFArZ}go2y>UeVau_)67oG8?cL=|40Yf)NTNqnWG*q4ju%sPLT!vh&-KQWTxrMtD7}oDD-$UpeeFO`N}B zzIsdF`@aEstT~mk4SnH}}8bapMdYOH^ z=}I@DlpOr|d=Pxlqs1U*&2W(aTljO#|raoO!o_uNwvCS1dp!OizO<^}R!m5cpl;2;|~GHc<4?^oOhT+?UuD_8;mPZ5AD;h+0mKlJ3EO=LWRwOyi^y zL1JgDQPKDOM_&;TJ0-})(27-q8{-iY6~ZY;XmBD+GsAJc2+1A>)EKLS8nH%=txg-R zvG~WOuq%`0{$y=eGmVcEHKryniRL=o5mj%2j(w8D`^g9!K2@+d<}(Xhd?{sf57bDoN3T$!zYZKdXtVk*WNBu)IEnVt#zEuoi_|9Nhlz0JIPd=P8Wr}VGA=6 z8|G7{S!8c7%QSPQlRdLUc{dYPGhgd48TMMkXb0%yIz^Qn4OWbW_lp$Wg-1jy&Gg*BONEaz0Fy`QPGba04f9RkRaxmyWJI+pVzG)r ze&{&^fHQDvxz8v9r!S#w1cB=9S#7pX8j4w{-EXI=ogFVUAog5zAh70gTI>5mQpN6pkT;IqHe%w3|1tu|i?11ym{1<@| zgyBzgNX%kqsR2?d$g>D18Lxu8K*sw6l@d(XVpLWMlq9rtM?h&iA$6G#fzBblf7sU$ zpO;Y=Y*_^TEh|DeXlD2PujStKm+_6Dmn}f7n`1$Z1Mb(4tUCQ%X~!NE{?7<=GS~PB<()Zq;Cbp6MJ#-Zcxe>&!%8CkqAaZNAuyxxPR+}zm`XiWoBf<&mWAJqeH%%k0gXO0tT=GLMn zyPOiL$%iB->UvC8USt8$mq^h3Tudp^%P2P|=E2xAVd#uqq_9DJA2U?7m=SsoBkRm0 z%CL)<5lGhQg~ISnnzJTN-qeju-aK3Moli=dAJ&con%>`UO+zi0r$sv#Q3H!_Vzx02 z=lHI3ZBc`6>L#B85Qbp|H3D-Lu(z!A7${y+Y=Eq#N|qUOz96ielwlY;3nX>|*-o}E ziPhb8&xMi!H}RY7F+E*`XZ$9Hy4ty{uUP1)oU`-^Gj#h`y8UPSIV+-$4r3)PII~UN z@978;B0@p2o5)})N&ZcOQd4_-2}4JEV8bD znNu4qKwaH*6F%%ZIcU*#qO%LQv^IhcN}lmy*(hLOc~OV*aFZjBX8<>_PSL`QzYU>4 zPybb1-dau-Pmtx+T{$kMk`LQj+eWtmpcZPZ<{-xBrUGS|@Fbb1t+?*>EdMR>GNcJ$ za+j9RlI;U*?yp6b8_;>I?K)ZqQX}Oj-MV%LU%E{QSHbe9?(P|QHl-8CMNM|o{BR11q9NB)G$p585Uqe0UJpm-$({j-)Af_2V#QA;Yl$R#Uy zDr=X{bC?^Wb=uq^h==aKVbvaJC~DxbiXxvmS-dc3lA;Iom&mNKbuBNWUWJYq6F2I? z-ivR@1KHj5+p5#so2z=}KGsgp2M^x+&Zx=b4%8>^LNb?tpqU?Hkr2)#+nxANZo1YoxTh!ht=5V(>1Yg0bPE$@dKu&SK)#W ztdx&cG#QJcx@sBQ>Q?zZN}r`t?UOt#S9xqW@tu)a?|blP4Ai%$8a@f~6`BYqa{j8* zF<|PVzkBUop~qykXZOY_*|i+!Q{CsmI&@R1`$H49+4L4W)En(Dl!F`^hIVL=Zol@^ zAN>*AvoAZyn{N}bnTF(N?@)hQt##22{*6wp->w;WW|(%Y=`=GtmNbO`A!qUW3s62XnhTVD&DB zAh)Zh_LuZJ0rpND*P>vHcbmaY=#-NljIc!Y-3Z@<>^WR$S2&2e6s~S>hQ0{}3Xm&u z=rJts$?JjfD(4z@p?haWut>cYQ0{X`8R2GHFf;p6(WVP`CRMGQ0-xk3L%J+NTCujY zUB8v(Nrv~=ENVl07yMpZ7@@W$+%J75S>xdB!Xp*klJ*iw|BRTt;E20+dnT?v#FS zlNvF2($Xy6VbB*kZ`2D9Iba+F**b%0BN>;;L}d%Pw_~W!wf}?}$2b^bY?{aL9v{T$ zU1Ad3)6&r0D7G>PuQGhFP%WMt9vA5tCR%63P@Hp1YzAK$w^+ExQw86k)N!?Jb40YO z-Ye=?HKx~;(Ex6X>HU4|8IrtGc8^6&A0#BA(59!<_Ps4T)l{~wu-X@nwHU_^1t%~x zbzh;7W6=O#6V4FUpoeBV&|l+EEY+9ek^irESUq+KJ^Iwn>ynHk_+<9gLo4f>G+|rH zM#!gMcS7UL4^-bp<78xz31X7VG%9~D?h&=I)nb+M;1FfdzdrOpo9vUZ|w>4_#GgcO0)UEH20Nu?aMjg)z2u z!Z&R5d%0QLL#Q}b>E6Qlp~0enD(5L8Ag%6s9Op6Ih9ksP;W$s+WPp|?2R63uwWthL z7QidUCIR^bp)?kBj~5~lAx?ECL7LNvB(E1C4yAETsSFTX7nTx`NWwr?`(KouV|%92 zmaQvE#kOsuVy9wMY`?K>+qP}n72CFLn=)8Tb1x}wT=X8{ss z$zKZ#5p_uv1vC(g#W-@~#IeJSESkK39EYP7w;HZ#Xe6pnW~w}&HXt<|kXXD}QNjZK zyNc-dL>NM2aGb2fc6atT}1c(SlQsF~sp_Ceh+E#ABwN`o9|rjh{sNKU7#zy$hxqn#1; zcpeB-QZ-4E%m620T35X~x`HsYg%(75nrCQc(t)-&7%ccAOLlJ6$g z&TuR3zhO%B8jMzdqiD!omBW*2TN2I=GE#Mttp;iJyr~;AJm6+*C|{Ue%WC5u%T2n{ z9?iKsGAoAESF7_s+Y2ZB|x1QJr~?x^>m@`fz>y%j8xfW3B}Hd&wsY_5X;o{-;3okFd{5l@}Fc6_ihH zBqk&gwV!3XU<0h%NQwmuIcTL~7X6C^XeYY}A{darm{V12-ObV4uinhi+nb%_rtRHc z?Pb0~vOY0CdLBEIM|h=(ifvL_?>7H#eO&&%+IV{2J3ZL?;jkMAKW0-AWv~#uhw-$- zDWxdT2TAv5(O2N9r@JBG?OCh?jxNQ~X3qeZn_VdoGoz{(#H9ubeg^;<{VW@&qu?4g3>?cGveRa&}+zp0wl3CbsB_4 zB%_*%o|tn@GT$W>EJo%-`_xqFJ5!433p^Z{OCtZ#5Y{ScBPMB1kPUw>c{4^~e0Urj zo?MZM?W*5alnUg2rq>gnv3eayv9yhhx!jQmBz=>v7`H2S5K826`H&0a5y&ASgjoIx7$TZ2$f+ zm>`s=h>LksLCdsD-JnaOl`i-qa6x*7mP)jBP~Dxg6>3=e-X5_BL!IfEHm1wSqVrcG zm9EBNV^S^|N<)WX1N2r4h2=h=^w`KLSyOPRq~09EYKFe;D70YQ32}TwZYbVuejbJg zL0np9h?zw@Y>7%#KM1Y(g*h;GR>Ko-%?`LCl2RdGU&5U8t1=S?@hB%;4xaKgg)uR* zW1}}UPj=#=|c)IZofH(9Ah$i*u9r_MfWQG5b(DR!eM&(Pq!Q?Ftmvm|( z$5SzKDRWWOVPNLa1g1Rf)s%z14Lj&_^%$8pobwqIvqUl(JXjVdiD#MEj3z{LCeu%1L!VHu7> zp9NLHD~U}X@o?-X^+EYJL#&=?k~aJCbo+7AhHW>u=EKN}{3qDnlSp>SZZGI(WoQ?< zH=6bOYZRh4`xtaYX$?FkeA6j{YVuKRri7k2ckJpEd zCq?VV*&yG20{Nuat+I-W5))8L*(o!D-7FEC@#u_o|qtHk4m4`AJN=f! ze<`&R5+@+0QSmLy`LzW&h|#d&e~6eY3VNON*SvrJfk2wFTf9P@htv7J5e4O(+}yLa z%@NEpM$KAqbW}4h9PNf4m2p#d_x0%r3fnhEO_v)xcMp+9P2n-EeJf{{G+U)A{H9C& zw|VaMsNrqIkoaPnFmDf)R2qCb9WqDd9FQC{3uGY!%AbPE?#C7C);3!j3&7%#AP)ad z>=z*e1bq0Tg(XLcQ*VA0^a^0FEg54eu~zlIHtx??&zv@B?go5s>+}5n(tal~>sy+p zZ{aWY(OWP|aLB{R?@B=FOCTj>l4ttm&X@P~FVl*ttn?elZU3Ee3k6g&g~5QHF9QwZjV*dMY=$2|uk zW7Nl3Lr@oH4pT3zT}0{AJ`G&+!@v(*yCUh3T+)BN>FqQJqdR)w^NHoWoFSBp z_8yM)%RM7glMeuRLi3`SFL5=<1DZ%4wFoUCOcjRExM8C~XOUqKNSaU`=qIP(a^x$aFPwEMCOFTc!ClQ-*RcJQFfhj39=aV zOrU+4`MALPW`x%G=ozN-E#b6Z_ZPh-xy@5Yn2kheCPSR^t81vXZLk`IaQy`odEg4y4pW`8gF3{RRmB8@m*W20^rGwMneZtJ{U zTM@~d*i~EWx-Jy64NkhWOBbltmI`>)%Fi|5f(A&x6F=;hEjadkU+KFbsO&m-};-pEeMWrqlbx z&Wb+kB)9-6!?ooy*sUtVb!JgR%FhL*>*dt*!ZG5#c;r2O9>X&xSlh&!+Zjl>keSd83LMUAunx;c9loj{H*DmxyfGB4dcAQ#mM=MufB^a6 zgt~-XAu_?=`-oKoS!M(9_ebQ2^bH4v64tRx%eLBK(@t{DMMv|B(#WBtc5YxJ8GvFe zu@5{B4ZeBrJ19&A7Fhb?G%RujXOoKbpi37HX}zJEU(?tOMmqQ(eTuYG^ElroXe0Yj z4xbzS1jWIte)g?%97*1hU6~o9G;*RyMt=^?0ivpraXx+b6x_=6M%8e$TC_!?AXLpF zlGAn}A#ALfyObP~PUt1f9;17i<9Lzt5J9HR_u6^Sib0foR&&=x7h5^{l|nGiFVSeL z(GICpf5ZdBVGGWXN7H~uN}iwUqoXeKW(ljqW`T?xO8hnOLS2Gch04 zy|aF8?%q>pSpH&vP$|Ioi2UWv_V5funR^md%6gLL%5m{9ga0RAIGFl4iB{1z`ogVy zLD1kDbn^CYJy=v~h0rmaL?ZNJrJIWNhJ$a1)a6>vDu0kJeUbZ=&PdSgJZS!Skkgc; zB=*~sTrj96(neQ0Gqn5*k%dXjM3jAFGSdDp&6Kwf?>KK4!`qNU21Kla`I~}5invK{c_$tT;zvibVh_UfS^D$}Dgb95kePI!5 zZ@{y+advre0LQkPcgWqdoJI?f@^I4QQNE4GbDz!J?dgxS1^dP>UwiVC`#n|?*ez)D zirD(*-vt8*Cpz~C$?hDCH49(hbsJbNkGqkZd@;*ZxW`s>R;f3A8B_~K87eyigj;YE zirG`sVrj>IVhP|WApdeWn*N!+f5&) zj@^YjJ4Co2QKgHg*rvZw$M1bI+%bw%aPGdr(Iu&^yi8{8q7mivn^%>MX zZ9ZIb81tROf4$$J`=Fj<4DXejX7s@NkN@%a(7C%qOSgtuAH-#KewkBZeR_7yp1}HM zVt6F?fPkoZROc7%PGsFYG@>CL6x}D zhGmv>Re5lpUWtFTE}zAc>pZ39HB);8Z^iz?bjZvNl6dRP!NpHViJP@^J_(nQ>e+-BW(Y@NsE2WjX)d z3GLRNvODm-ha4=-CvFY1gbj>t5EHbjuyMa?w1^wyOuUwAp{?$^$rkK;nIp7165hTy zsag?qbIx?KF%J|G?Ln7;F0o{In;b4aOrJ4g?)~^P7Z&6X_aw-%uo)+V?<9J`8g1dW zKlt=w=cy@7e2v>&uf8H2+R5c=!XEy!$i?CQBYpQ|CiQ4%HZ^x@YKMCT`u-J>@)cxq z^(uFza36c`$T_7hTvY76Dh`^8MSPJ+qn9R(;Tl~ zk6AS1-5O;7o8tOv0K+ddWnbK#?=dUnSg8>na#op}Q$(V2Ya^s#tEw8DWNaN&BbU&6 zF-(-YK>_LO*MZ9TXXttP`?~x7r%_g6OM4hO#IrNCh})A{Z3)5W5ZoFWwljc zt#XOuhkuAppse>-%S2Ur%BtqZ}66;f@vf`ULb z{aOwvU6#2~Y0WRtkA>A#R>}wMVhKrE^N*7~D|Yne(|z!L5!(evhBm=s)~C-W<`qvd z{!~XhtYxzAv8GKH)9ty$HMv48g95DncFA1tK#FWBfsgybZ zJn|j+@q+6K0d(%#8%>(1UK)b(9+CnfRJcvR?c!L>?pJP~X&X>to>b}XsJAB)Yp@wX zS`PS}w80I@noH0X-8Y(k5yA*8Q_XkS0DeN6ofa;sY_dQVN`1v|wq!kY6wHgk(#Ol& zqTpCRgNQ>#T1#M9NLdIA^4T&2VaHMkvUBK_Je)g*Xdp$|Dd1 zRD<21IE1yjjfiTHuw%g{AZY5Nh1gazey2n1ahi~A%A4pxRyDAM+tZuZ7h^l#46{xV z;}fE&%1}K{MeqqTMIjs)!-eIxwS#5Gf^R%JKRs=8-`>Z zJ?5GAt`UrKu5g?PJfpU@s9Lc`2j8#E^pH%e|C12)E zp~C(H_hip!!7KGnP!+MEOTjG6gvypzGZIq?r;sf|(S#KAI1tb-bNC_x-D%kit9~Je z=V(kTQtB{}QEj#MBp|x86ImDOZy98`&IVwHfx|p0F9t{|Bso$P!W`LR zwS~>e>CqHnWBbyRL{hpQ3!lw-MyB=OD$*1^TIh<8TK zkGAVJ@)(#kM(ef@cjlXb{k?BK{@Eng{p&u~P@%Gy-Oq~?GfFQfut|8WVp;@(*jz0e zAk`^q1zuPO)v+mXq_U4s=?;JBESyT)wZQXWCglG)6?z34bdAK)G3()(=3Ovgyn_N} zili=gd#lUGXpjRtBxmKwSoH?d*0#o*TLUuZ0z&)-)aoC5%W0?~xF1tXF3vA6qm@nK z!B3baB7QfcVaELq_P;Awv0wJl^ShO)!hS0&|L%hNUlsfh^O({%HDU>M!(Rl;6a#3G zV9g`M)(qLkq6Qb;OdlJYfmX;z9|E?=5~H7B5RI575~n0S4t{Qy%=B#VLrgh&?V!j} z#r(DQRku-m?ddbP^Qq9+5OC8`K8M*^wC_ z?T^asakAx1Hv46=AQ$M$NWwz%Q|_H-5uh&zLs{EM0wy~2jgW{Pylq+u+}8gp?pc`e ziD1w~?1TMIGPc9w#>KkjwAv-beXCWEELTM~*O{}0rWZAN3j@~gcf2^)T^&6*&+wDw zjF(tZ1APdUVqM6Tq|a(w%#1PjgzEK2-iud2A1X$n$cM`F$7DNFt75Y(ykpIr;;h=s zR6#gC&QQ*QGeT=oPq)!@=kDURHZHDsX`ro*S9V(+<=oxSQT$6944j56sm$8Gm{~_T zcFVM)`Gm^$ka1LCkCM~&STuA=ncNvxo@3^u*k@X$-s;OD6F?~Dw*oYIH}F)p0tCygT4Exn zNbp#>9&!ea6>CMT2S=k(zAL0=tVSo%_&cEMtmU?`1&`2(EWU}hIpod{g(+iFmG_zf z#QGqJ=|wN2g-Iyhbx5PhZVB~RZA@#J2A&-+uh$Wl~vKha80-Uzv$|Un?B8Yv!n5iSnYWb_#LYeQmxfH)$9qOg@Aleb0G8 zORycjYgnu>9w5nTBH3YH#{me}e?U|iEVtDJW*Y?UNUz)`>pV?aOshi#sWEWI_vfIC z(r={jCs>7JzQ;@T^@pK*Qz_&1nP*I*LM(@q!yER&=uv3{B-e5R8*l^t?<`y=n9Kdm z%pPmq=`NBKZfo7n*IFQ3eK|o~V4cybWXyi<4C6Ps&{!9oCbdvF3(?; zl=fP(N|H;ZO26+V^Q^T4LP>0VdH&B|r^yGT9=}4T&%dF9tNr*{CsTxv7 z=%732a6?BoI#+}YBVIZAObc^vzMi3YViPX3#v$!id{n0ZMt&>E=nguDY}ifTI{=M& z8okzT!DG3GG6exL_X0QuXBK+sXvDwzi^*_AxKOw$RX|FPewAnJAyzrqE3<<~f8#E| zO>a?#Vl*v!(kW#M26XOrj9AZ>h@2jVpqs?MDh zq1xCneRK`DRp5PJyGOvUXi9=F9|GYq%U`v_cQ55$_O6hN)ak6Gg2($hTfUMUGT8;< z_oO1sJ0>)wf4pf$Ehb$+yfg{fxm!opTW#5*>k77f23{&_S(O4R4t187D1U`#hBZ!z zreO2q&3GuZj1wooDTDULz=O`Pu7uti`pmE7ws-Non^ffPbS?jK@_GGqZIIt`#VgBx zrITO#F}D;=&Oq{z+Avqbe~b*tv3~|Fzs2#YbRoYWqx477(N|1ZNvi?jMRqUU8BaN3 zL`0pJ2@aK(eFJ zF!zW4B|vziORSFrY?F%ASDN&g2SzKpV2$_ztm(>+EHL`jLOj6G341(c)E$kvdE7;c z85jmXP&D?=jfsSOwgI3D< zQ$ut9CS5Y@t1QYQ9lsrkfn-K}#b`X$?+sQ`>QhouLNzKm)8{S(<0VqZ(4q81gCE_m zg*hvSV)zR2-<>N-IAD<$*GX*kiseyOjOpbf_5N4PfOv%?N6_~y#Nyjo@NXZY3J!Lr zR>r?g%?$s?&P$|ZkVML`Uifh}Ca0gt(Z1i4hS52AVoCB8lG+5)c?Qr;srlv(YC+n~CDSU~pyG?^ShUvqn zG1nujqpZ7)s|@#}NO)~dNL|Lyi(qW$@aF)0)RrO9i+He07MpB*yy6c^kf*mDd#r4< z8=q`RzP%w$4_0*e-JE;R$?l_%;XWQPJS2B`$vwmwJk$$D5PvInZ{zo8U_A7j02oiX z3S1;X8Mm7R-{QiL2n%mj&S;G1IE#ma*n_u%hYIv5*PD(C@1E{(uld~)UqZf98yPyQ zo|}? zlRsEwEQ9(Z38<2mwRs-^^lmGRS}|i|=8Sm;NB1#l^JpdXM!gM3%TghdMNh^d;#!^^ zA*KpI*st~MFrhP6RZpC|M&UMnd7XSqGHM<3q;wMfb;VRRV-Y173+Bd))B!L&BV1f=HAxE?6qoS=EK&Z&T2EX%0xsL4#iogp*G=awCnVYvP-y8 zfyl~KSz<*S)*bh%oNZAd#$TTzSf2Z=ik? zi7VGm!oS@yohUNSC4^p&e^|2E1B|^%u%POd92U#KjK$!}xiXNp`v(q-B(hr34!nzl zxXY{QaiAmnnwVXr6(*4UsS%0mI)z%6h-zxBR^bDGU1nc+kbc~=7gG`#L(>tyVQJlL z@>1`(tk7t;wYj|=ljq+#&9VhUa|^?{D%cvg2KnhK_ZO+obmEo&$}qzP*a2!vnW$46 zGuIU`635w1kQaT0e@Lw=#P6+aqDk9IMHl@1@?!+~Lm5nhLyIF*=G%#=H-TqOA*m;Z z_Oo7ff^(sp4W@$a`ZeI!@-pRn!;~&?<1BH2ov;dXCSfN`rW$3M-oBf@;%u0&HM{%C z3P-G3Rz|$s6k=)@?Pm5dlnxpq0~8s_oQIm89()zsOc*T@Ynn+980^6pHDaoFK6Vvm zDWo}as7tSqzk$XkMbTM>0sOC-{z6rAJzVAVXRjm;3)k?rBhn?Sm zsY3xN-We+(cdq~G+7lUY&Jg>(Vjv!81m>l?8~hv_ioZwB^ioXb6_lO+47y1Ij$I+! zD%g5(U?0!&zLZ0@YrUKNoXCG?j%>ZdMr<2dSwQARJ=1^PRNz)ejRYB$w2*jSPKmFh zrV>Wqg}q`Sg*42272$7XPlQ8e`{!M|$z;$C*}EHm=;h=GspOZzu2u)-YWAOZQ18U; zFC!wqOtL0GyKh(eGx(?Jwhq?er%r<&E^acv(8akTU?((4f1?zy=o+tegZ0v8ls`$* zI77hBFR`dP+bG#cyqE?uiMF0w+HpZPy^VsATPhPS<77WnE`j0k5mfr1xW{zLrDkGO5%Xatzo zXybjU1_>;z2CBSNr*}yoZjnGI4Cle4)bzY-{IV;GcRS#x>|!-L)%zPYnemJOBJi3 zBqcaNsy05)zgeeOw4R#AO1#R+#Xi%2fwOjxHq>x9m1}ml&*aM{J$NosI_F z+Dh31&VaPI91bJPX7go{e8%8(d2vC|7zUw3kAEis5X+tkss(wH0F?gfgBm$_?I9-k zQ|9p>WBn7UV>O9DC%ax`>l=?NrcF(ga4O7@>w)DZ=-;+DO9v1u5L_(stU>0=a~`Q9 zqQx2JefsH&;4M<}m?4u}jSHVdXM{pSBw2yK?MXmxAXY)RHo)%1Wla_sfsKMGj}~NY zsr%@|l=L4+yb}cZhjP`q&d?+2u_i{>@6|=gR(W!%TCf(9b|S zqTk35$;w|ry5J4o{@%EBUcd$--p1_j@FBcTQbIq?dsufF@^odlpF@yn#=Fko&_&p0 zgU@vJOklgipg)cZ9^OZeH;wQj+pEskhjlQZeNet%22|`;K-EduB;GBj^f=~678#lQ z*}xgI=BB4fms7WTvAB~AQ%-M*c20_sBVS025NmgyaHsvLN4m)DWWg+2SDXl$@Y&q+ zxIc3XVz6&rlg7XtrM1HsRfB`eQ?w)NTK|Be;?xwrY=ZmSc7b68tN&OU>uPUzO?3zd z*tC@72ErPqy+_&h(@1t1h2|N>p3Wr3TkaYx+WovG#>^;F#qyybK0C-AU<_At0JSFy z_z5B=FA3lqvO3bY{aLOiWQe}h!XX*;j`i63QtlFv>}+)19$z9%DC*5+3E}0ZztXspQnC%V`~R61`}=?bm|v`Wnohz+^CH)%+glG_ylB^bU1b5^5D zu4%Hawlmh(^QVF>GSmz0;Do^$@3@gSsTQtyh2G>vJ^YODs70ef#F`OsNmFa(%Bqza zT)rB*OdqLZ&qVD{fZcjkqxb3@`~gn}We;Ba*(0OUdSdCWN85K9)k)GLNF^t{Qz^sc zf>CDaTpWkBmErvU(-sub=>3H8z4+{z&3!$G;^9*zF@nYIy9%exa;G7k6NOE?ENhR0 ze!Ei&K#(-ZJ)?=Hka156}@1ffIq|(d9Jl?gumb3h?Ht2y7)tQ z#4jLNRUVIQ3C}Q#S7Z`SkE1Jj+xGAPIpQ4wen%W8hVAW!`Lg$y<^!`aey!X}zhIU> z(s|>HS`U|@4_iGn_%fT_zW>SES}=I(78^n%_qt@AM+yae43nL8i^*Rfj67`?`TnWX)3yn+rK=LX`JK zZk3dfqX}vFn08_$hj8kqI1I3sgQAOIX>!8lV|kPe1zTK*ga@l03n+I<8Ruk>ESobs zrqk{=1XXPjg1jKw!kshK-|O1@3M(Grv>cn9Ta;AXgGB>ZxnNMOuCB{)FK|cm+bCfd z1W%L@C9+q=PgL3|?3EH`75^s0D@NGWg(*nOc_v6)6pbrDj(Q{(WR%G#fR<%5TmrtXTte{-tm{6~Xd|dA4r7W^e*|I33dX@5Q9&HrVjQSw( zlxj4v%-=3{b@3AzP1++kF>d;4?cRoXs3SmT=KkzP|GDMkZ#b#~+1S{fc}w5k8~y** z;IQAOGp)W=$* z304yj$vpcFa?mY8cV;lp^Wm`kMn<#QEcNryVAQ~_8;fPxIIDxgq@E)3?KqVLn5ziq zNHjE2ap#W0_y;}?v}y(Q7j<1Rz$gYDSp*NcN3HL;)4AFybxm$)(FJC)WkNbqP(&)- zaEn^CaYNvw9hL`cj>0?j5z?!4!knv9L}#>p=fO$Q%cBNe!)aE9?5YFIFFU@y0I+Wd zS%aoRJPETmC?IQkVoO4OdsGQ1u>~QSxPAo*ItubGutB-K!g2QWgyYKEL6MWp8_Se-)ZmuXd*eGA&|~wLPT*9>d=BF|L9N{1=58 z6F6Dy7trLNc0ns^KEE(SxDfC&O&WU{Rt26$%{#~#P-pu5!_a<0=E%=%yY$Ja3nWUS zCYYlC6vlhLO;H$^$W@@T<`SnvM7?!DgpEP#X2SHWa^Ty$f5^@50GY9joPkamc)8LY zzz^{(RD;39iVpxniz%E5vq0v6naO7L1N=4(vg|dTtled+8EL_rLMT?g$R@H4%EIVu zV5Ad?=#$&5CChKB(hpb^CBDZ{V`JE?s0+xVU`m%H^XhRyHZ08~T9E$hk-KaW&pA_& z(QR-)wWqxQHnK`$GkxU#UqTlyFtJRoz8AF*zNa4l7I61J>-E3C{|o&R zsjy}>$B)2+OwJez3mcfAK^O>TMve*=3SOY>SsEf_Xk|A4^yidQBh_(Vvd{w<2_FOr zq6Y-OTa?f!GpR^8uXaCkY%+t#-RWuR3ZD;%yUz{Qu;H{scc7Pao$aQ(zbuqBH!ad& z)k|*|F}E|FJh+D;zLsd2J`pPJLae|v21(PKB+g(r5Xe{M zST9kWFr8PDUM4p6skT><-r(iRPtmnJy#EDTGucjR2K{~3rGKAw|39_!UjP)v8LK%4 z6dvv0^-Y-2pkNV%|u{8G3C$`2csf6^%JW8#S(@x|W3#0Z%2o`3K{ z+2XENtwEU=EHAM=W!QUq?&)-SzyG|(z*f`bXAFk9+^_X(cQuf!vzhAkgf98-Ii0=(b#+B5UFM34k{oi%WM>1xB+-B= zznm|T2=7^sYV!khB8uJAc~7I2jjR#6LPeJJ^`WWwYQtv~O=je>RtlXU7B<<$om_9k zSUUNT-;D%RY}u?t!eznZK1k?icO7XD7he4}GY2JNOSs2Wo)rCM@C2-LTTm@l%^j&- zFJ2N^UuK+3JWU2@I(=2m?>r4L_T4Q0rJTVsA@blw#RWhCpWQ+gr$1dS$urVNYl`Hk zP9>e3NlsEbaW=ed!7A45{dongb4v3w{;LdDbC1shC?6Wl36rYH*!vEhr z6e0-dod3I~w!Rgyf173U-{+7$$$x+U=Txgw`5SGgi12C9u1rH6de*cP*8p3(HYy!@ z_5;)mc{Qj_+=jm2$|MIR%5p%CN>U^l#`Q4J+3Uyq&rMEK<7Xh4u24WUi4Ky^E`!P7 zrv1vDXS=(T%IjJ7k~i=(Lc2Ipv+zD?fGomIY5+tsHB#O)Hzd&0uId1~?QjdeC?dR2 zMffrTL86wqOeIH}DMZW?tYD+EqtRL!E&qH3ZYD>W5A>)=Tf@+!nzFG7@mtGi^gd|+ zsCu7nlzFUlf&&$*It-hdWOv#wz76i5smqoi(O6nAe~-3?F`6@iHQCTujv_Rf?ti+`?)5R!_kmh6al5KlI&oOprxz>cuE+gvH_YC{ji8$q7bB z)Ged;qF!<=1AHV$e~fXyxn_H}{Ki(a8?Re4J4usBNe0KBQ{Kij#e@Zd{^(90FG-)! z{+u_dX#%QGui7#B+x|Fe@wgViuW4)I2D5UuktC=>NYYLK-P@ut$}OkX~(Z^)lyhof_^ z@+r^W$FCzXoh?>%s(mOvRkXK(HlPVnag z%Cw5<1yqgctm(u@c>tK;t9Au#TW>Wgg&Ml=hh-%aAMzZ{OKX~WPCu{K5*rjYM&zWVQt^q zT{e~C{PBHdcX4#+1G0w%A8(s~d&}LwT<2H4T!VGK87(X5|Hq=je;+^p*!e3|3DrS4 z!t^q727F`0Nz~>c5K+?&;!%lvh4VyA9%* z&h>4wh=-R@)>@`DmGaCI|GlFAK)-!kxOX)UG6aAVjb06nvu!=%LrC7pf(}J1^7pJM;Xo7m>=DxRaP8krBJoO>uw(v)#Dm(=a%m2G3dk85((F(X zxhnU8$EO&`cd0`4CB?_D_QB6aDUb>ECI9`*%$vVE zg1{R-1-X87a1$t>F^I0@A)eG#u}e+WSw=7)+X#n!6Ok%GHLW*N0B9t;cUTppZmCiR zOR6YjOEvwF?LAh`S+C4#S&>RhMFag5+rR_$T~ut&&8gUs!s4p?2!<(F2tZYADBg9` zi?&L=y^scD@+6zFwz!(3H806s%K5_+UpEW8*IAH`@^jC__})OIXHo18;3;c8Y&0$; zD8p(xChMU~0KigK*l3V#+|$ofmI*J&vCA711ktym&yucft*(| zl{$Y+PMp?Q+Hl3xjObk*S_lk{o<1rlj^Z6`1zj1>8esh6os`uEbNpU=2M)P|^+qCQ z#-N-(GVR_hRT@7UUrtD@CMdMONhYv2e^*?C$@Lai=l;yQ%V2I+$!#!ZiZ8yTcWo?-ap=4iHmMN4R_^C4W%%cJ zCqjgDl*JmJ#7lA`Q9ha>TNwtgQ*2ove)^l1TioRxyLlq#_RCbRo~!`)tcN$76Q;CQ zObd^_m@I6KNnnmt#VL)RSENnRGCOM%u%2D(EpQDkOknD*iGM>ktWB)ntOFzy1zS22 zuEe<1geh3})J4^@QExe6a5vzl?pLN;`h=)xb48OrACL;|AYS)(!BWUnyoPgXZ228{ z$cfzjiip!O?A}4@Ro%9L7A?Ng%(l-!|J1P>(lIO9>#AI^ggjP4t!mfLRGrB5B9TDZ z3EOmzsm=Z)D0xwewQ|=Q>odyRl>IyZ#vo=v*LQ)#hwX-ZwuR5gj={0t#H3k+X)SLv zf2fsDo&EyX(yfjQP+*Qb@@{rKtZGnTN+taA;p7+| zHFDy32Bo6$Ae841+AaMAWq%y@*rxz+Rf+=5B{Lf1o8QItT@+%{@<&uwfIB8prLJXlDNGwHJ^o z(rH~AxoHNnbqI7$|1(MoSX<-AdIE?Q8ACK0y2~$Hp`v|Y;J4O;ypid#yBtXyu0?;3)6OQEhUG0@$qowO7_m$PBS^z3E71kQ77Cx%3>Y& zLa+5x*m>X$YNA^jJr#t;u)TqPRGB;o8#hb)Tt?tVgsU}?+6`-7 zub<#b15O7@dfEF*y2b5|uHo9^4nGGTh-JyGd5o|t90D|LFfV#DHK)h{=uYJMb)#YZR)y{D*v*oO@u2$86za4)sITJ(oo%@9-{01ULwB>bxEPK3d^K z;R6=3;bGbHZ?BJhQ75uo1dnRSShVR4WvC~d*XW)`IIzL=w}wB9f5&uhbk@@W(i<3h=8 z9uf=iOGn6r7%{X9M^`-};VD2=@0x0$AMV)H4ku`|J}LMTbxX+gD*wWJH;3Zr&^ln8 ztEH+lY<^_j1GIPZWQk6xQE@5Tn0FtsIYr}V+Y57wHjh*T?_%tTBBxXxl2o!+dI^gI zc^KAuusj(RA$dk%GI{-)b9gWE+|4xl|g-UU(yj>Fm(m0flPJ4b?mqU1Wh=Xwv@2u1pty0p&$q2w^h;)C!_yr!RwQn$ZuDUUP z`22k*&`Pp{sM$^4F4{#(=ly?_eFKnWjgsz6bJ|8Vr)}G|ZQHhO+qP}ncK5WWZDYD$ z|9kI!yZ7IX_hKXJR76!pMO2-WnJ2$5GrxR++Ly!HdE&XYc~96noU&H^#lIDDqx`{9 z&~hkzH50|{7xH&ShMX?d5``}Y0`p5XPw0PQTMf+|zB(uLogB?ec9$ zl0hWTm>`lBlGywA57Pbfhh7P@s9`u32M(GUP7a2*{iBwvZ{(^gcBl?y=&faWr3B&i z!Jyc7lSFnBCqhKYiL^@gc&QD0E^9Yv%M835UiEp8L3j%56lbxe^JLxB zN(l+hvu^7jN1!&2qw!T6Qoq&?qZN;O(eECMCSof*wWO8yq+GVYJFuQ4wHwQ!7L3)5YgEgo^LNZy2qyjsn zK$5R6K!9L4=!CTdz{-lG-cOv%W(wJ|`E%^pw65(cZ(ymCiEP((NGn*1Ed@)WJBtZn zj_MBe63%0kLr)k$M71B?NZsydLGnz{GEJU~sW_|m>$TF8KmEBm$Hiz3c@wWBuQT4k zsAjNyKk`MN?}6a5!3rg|QhT}L1E7BB*+b9etFN8du{CbJU(2de1rP0=;ALl9%yMDa z_&M*!T&}yaJj~~UkJIw$2w!~bT2KAVA##hB*0?t=D15NTt>nBziJNf@|0!bM|D;H2 z07|B4mo1GpDl%6{1N4!@U!*03lL9MFT0h+}TNcKA2D#}F@wc34;V69!qWs_2Z1)Ot!q$ITEAJQYD)_&%+W%^; z!g>adHui4+97>Vmfc58v_bT||pK99aAEaIzl@%5hJUBSmk3`wZ2amU=CE1!D1b!_$ zJQUvVPiFkpGc@yd=lNaL;9U1uSFrZ-o%lzezFVg~nMHCazi72Y*(#Beda+#5LcQ)A z>MDGvgW8yCpqcgJ!Xv}za{$LR#KvXx9rF)2%T*@5$EQJD)`pZ2D4Bbk=ubY<%0CZTW+wyYTZ*Wsj`+#)Fj~I;{DNz7f6FXf2~$e#G!h@Ka~ChiSp<+O+K%^8Zl74P)?-AC+wgBqtUcSu{GIgU%2(&LZO zExj2rO5JiYQ1ziD+&HjKCsvgjxU!g1a8L+b{CAk0Nf`ipI* zla@sJ5!cG;YH~E0$-vQ|?(^sI16l(JogMb=u0f) zhAP;zsd-43Q89y%M1vkVLu?1~k8cMz-U8L3olk^C+AF|xPzxYushZZO4JVL?`PX`N zHj66i759~sAN+2Wj`w%FU}FH)@`grc7I&VyW4 znwR1_!yP9^Ocqj<^Jk>~m*L$fj=A^#w~Zwgc~0@28(g@gD^IP z)zxh}KfVw5pUKc}cm5IDQ*S^-(y0KQtFkvy3 z7-W%MW^=Ku=>nq^W(a@O0!>P;XlLX_R6|n(X4T4}#JgYTeGm!GGk6@Yc|v*8^Y?ei zj~}579UIGokU+a394?n#4$}{59d{c!Gv1)K-z@;hc4ScylN1hXgGtbinp3t7iGueM zhPE&(Qk0lW+XbO@$Q;DwfCF2yAe1KW%E>5XZn?9P@l8fKhMTmocHid1R-pW1&X*=% zyCJ)*opRoK#fqtFh-0N1E2x9hXp3zOOD3z&Zp%mXv6Zgm^D41zxi{8Q@C@mkHZTQe zkD(Lr?gcE6Xc5G-;RGGbYNG+h#xUV(BMecaQH?@iQsg1U%N0OU$wrCDO zBH9mXPl{Ja;Fvl%tX<6)3=H9K*dyk1{alQRZ8A60k`CSdvqz{Q?*e{9pH81Z1~nM% zTb{ZtKz_G9A;_t3Z~=WY1%=(gdwM1x#H;1d%x?V5Y`=pye>H1KkFL!hFU3d;P?Cr2 zfskC|E-Vc!u#UCl|L|jpDty z>Y_`9jX7y_F}=c{I^*yM+wIFNj^4HE#N_5^ixm#c#-BgV8Mc&k00^gIdcb;hy{A&f zVWs%17nQ#Cr;#!5u59HCsNGFTu-%Lk=hxQFU%B?c7(`}39)a#8(D($v4XJBKk~W1v`)Op6`?uJ%3HE|f*9gsU8+8H@5Wb* z_rDACObA?Xd#Kx6k?2-Fh(shbsS`U?-G8P>=ZF~yvxnJz{d_f6-)=Zgfx+7}o-F~S zRPm?x1!h2_$aDMm((O_POau<=SVr)U;^DM$>H(wp)`hEM@%?ex>L=d^rL3gofXi?J z=~LBhbVCKKpi4xaB}YkNrVgpb*ESUzn8?y#?Z);hR@PP&&@$W z&+0!GYnuH9-V}^1zgpjI>1&cd%eHD@Q({urKt5+2FSILTuoK+)B77f zP?LDsc>WEH!5~JY6tI*;`n6%P5h+wh!_+zIMbEsTKzDJb=;oA1 z#epESXt&Z+`l+$SVIiEW$l6kNF|9^EiPY0g&-j)RXf9T;0dz-DMyd(dUR=by^~{JW zGHLoCizDH;c31F{5_W#iZXSWCh@0Zo*Mg^osm97ojYm-H*(VTXF$`M%`GyIf8rgE@ zD&LZI@f!~Tbs1z?I!N75Zo19i^I-`-{{#;J6$AQP7z>tXpeKhRrVW0(JxwpF&JT}L zb#F;ppm&b}WhQHdyN^jk++tzlph#xkID!OzC}>91^W1>?&Z{VAjTe$eO)Vd8(=ggp zG8XQS(j>WUlto4ll;)8GfuSNi$B0Yitv{As)*qCytvwP#>DX7=*)R?e?aw|NxM!=) zVa_|T&69n!T2YSa(fKl2SQWsioNi>U7&AjuNUE3#t%L_f((Z4S>z`RKe_L;W_49`8 z^(&S@esK=}S)Km>8ty+~TKvRUzYKElXPs7uvlMVqqb&6a(QILa^LPFvUQwk&QAGqn z$W!lh$rHtHL4%S;xFLT~e*He+YeG)Yz13(civB7qv)`IMYz#;5A8&VPzADG8v~^|% z-!-5_dH%!%SwT@j(V(x3{6h1_hH0Y#(;T8t)}uwvMjy&M4em>O=TzdPqi?+m;4Q5l zDG(KeUN@ZVjRs7%(8u0zfE9RcvgK7_MuQ3z&;Z})o#sG3gypy!s-1)oTxQu4A$b`y z?m37xN`GcjICZG(3eqXcD$nQVT^ZhwIG98hbXx7*eZ)@t18qpm}5 z9PY#M)bFQFhs`OmOe7WNqk|k@|0LDVp6?8**UmS5qEpVBQBX%gHW7AH z-B)5qskt>BUM=lgtrp+#fhMAGC61xMrHLEq{MiXPG3pfh? z^EWBc1bp1@tzVbHpzGf*akKelKi&1TnX@Gm}%!Gm;8>hLTC%z)jnd1?=$ zxnGBes(4%rA?P699O8Hqg#tg9ht^#5<9G@TvIH0owg`H1gbK*M?03B1a3O4?-2fx- zAkxOqDRtx)rH<1lTThO?y9x9Lj8)t+u~}<`W6Z35$q_nIaUUrcgWeY>b`one_krA# zoYI1b-lY#m29jqGGk|h#K%ObuYcWS+-dHL}mC&D6{wxWnWVmQXy{A;zml&dN22>xV za|<*>p9mR`VpW>=&&nHUOd>Vv(dJz)o%5#yB-)Jr2ssGTQ3;4`ct-jXP|IeUdE(#= zHZTG2H5EXZbw)Pknj~e$+od}5!lE9Wp&1RxV^JigkXH6{w#6|Rl1LhlQ==*c#bJ_M z5@cH3amsF$AWPLv|2Phn)JC06%jNxnE=rD};V7vYw?N($81F8uOfwPOzr8gqZK0Jn zpQ^Rw&7fJ;!nbLl?A%vq=>ADB4w35Zr8S5Exse@Oo@acOsDqGwOl9 zS|{W!7}5=f*miv~+6}{a`0xvm;Z^BE=gDt1x)ZjNIDgAC+D1+uT@8dd;#3r%YBi}a z&~ueTes+ow%y=G7BHo)C!VC6hg6$I=Ii|w=m)!y&*w|AKUk9ZtYPT^W*_J5i425TN z?0e1ik>~8BYd%SdXEvM^TjUrh)(aku0Ww6NcPCq0>X?)?4CpN7X(`HRg-R1UdCh+!~3BFG0p&GCo3y7XRo3f7%zW3~v z&!2eXC35QNnD`&0yE4xWzH_MIgK)a+6@u?Ugqtn&vt*f74os&!iiD_4+!f>#6Q^nf zHfX8FQ`$4N`ew6w>*WSRx7yeYW)0D4XYEzSBlDJ(r5TIfQ8%t1O1+6s(qm)I=1dz2 zZ=7$gq&jG{yEGDQ5I#uPr9E@j{c^@ERw>tDzY?!eKco?^i^LNNmu#tvUZ#<5ntcn3 z)&N_&H(yhjiJe;N1Gf6b&~hu(x-lX#p;GO+FPnFX)>O!)@nqhDQG_1G&X?y!2jfXa z5i0TxV{3!s^Yy;fqd`y!DFLlPRO&=ey;V0kAjNrPqPT2Vv9&_#rNSwaE_&>%BUuzi z6&83R9xA;G+NK|%m!2Fe^~>AR0!x?i3yvcfEUQq>`n5t@8jVUyy<^0?B* zJMy0D#yCxnE=FJ2S>@vJRw_NrE3r`nEg*qv=|+@`^zObru_M#s0I}($n!ygpB^Cr1 zQbRGV?b--wa>j3o(~Woa$x3Me>L|S zauG0b=DlIZsDZ~CKP>YJ+M}kI^c-OXHuGL!zFnHxa7mYaQhV$Az33Fsv#Hx;dcn~>o$IT2f_3z@W*s~ z)mOx^vvp1xZfEUG{t42@ms!Le{fvMn7dbqxi_N5~aGW2i)Hev3Dyqw=?0^#BHj$8Y z%q7)y_gpLC(6)RH-nu&gqe)WAd7|fv2<4>NX;jzWB-j*zlA;WeG=m#D%ujHo`-FXN zChUUYC8%74Svm_Nt9YulPjv&iMZYAkYPWK5g3;auyTzT2&XGdI=g5HL$Vu^1ubI!v zk@p*E1$Oel8xSB%$YJ6)#x z9aPwETQO@5!`E>xbf~@9kqZ*we_oH#3b%JqajIXJt|sHhdKj&k*6?sB@RUMHO1Um5A3^9x z@oCWy#I@F3iD`a}Yc2)0DVYmAQlL|Jr8kI`GvMPfV_Cta2=D+XYbMq15qJ&-! zhA!!XWKuzu6x%}A@0Z^-`;C-p66XxtA;ma9>Ek9T91n4N`$OG-KzH#-96Bg!pPU!X z#^nF_z`e?NW={$!eTPj-c*gUV-2eEJoRr~rn=|T>)?~Rx&AcmbrAk70g(C&1QGtn^S^QYMv-)o{k-r(pQe(N7<@FkMY&NMt3}8_{`kib%dMq$ z4Nw_^qf3;Wz9EPSx<^nvKN6Jn*J%;$tS!ZUd`w3z&sQgn(x)K^J0Ym> z>pXuXjkcEWT&ka_J-mIW;$`AGjltky#KcOGL{gDq zV~rgBJI*a>S%_ZuJkKJY-wXZ&!*w<^`ptULTGzWwcQHI|I$TYEeBAA$`GR3ETeIo5 zUg!cSz_!MAtzj$JDgjs70TiSgsx5VWv0nJVWiIHyRMbV@Q&+rjuogGe$98gGeBxZt z_&v4=<`$+NU;~;k-Tn2NiGXj+SFa$OW;(cq!mDaW{=lltKNsW{!5IzGuIzT-w zwwA81I?C}+4WXVwyP14S-rF0V@;HmO+__iYaH`rq3bT*Up*`L7%5pc>P1QciDCn!Q zqdgg;o35duF)yIpQQf6HFmQ^u@`$nEnH{KcqlfkyloG{C8b<~Tg|T5rpUtQsmZ8O? zmj(0>($)gqnWTp7LP=SrF9UA7`}vpO)iHVcer4k;)r+THiRSc@8;>f&jK;4q_7W=; zJb|<+NO~1?A{&pfHv$=17KC%=Wid!2)8ejJ>V8)#%jb&-l&5PKCdDp5#G??FXpM%x z*bZ&8(Wf>p!3>5BPksH{w!R8p5?b&t6c+GxeC5#JO!e&T-Dv)S!v4di{*f$O#Z3LR z*)V9P%Bna@>1`hlzg2!VYOU3Qs3=I>ASv0}NHQcH(PW&E%=pNm?fSbX=?$?O>7rkn z!_2kaOqb3!ZqB!AJHlCnW5{zTEckW?K#)UPpzzZ+P}4XRze8zLeV6EPT{tN0%Lt1D z3XEYadPZFrE{{}*w-0^S1MUw;k<}9bHixx2bzYeMP`Q>Lo$BLFgy+Bagyd;DjQGnRnZEoH`)}t?+D67!z|_dV;$Lth>0ch0Os%yn--}8<9-@34QXl#pNJlIs zDhUa*xCbn=aWP13ZE|*QZQzp(#`L-acP#?AsziYbVl-}OaJb6$xa>0d{qg?r2i#YS zbAc|(6B6UHur{)T{Zfx8NU3s47yF5cb5?ftIu?&wpf9$}iINPw_(IGYCqzF6?`0D~ z`r2PF&-KCNBoI&TVLel?`S%ZRbY~6C?q+prx8D}oR&6B7j)`23#QxFy3BOR)H%-v^_KXIycR zyBHJBET=8CK%=kH<{AhO6#L^O8&F+p)llkTa?(-%@u2wQOFdtZsTMva)kyn8#O=7_o4aAj%Q&N>P${Z@9UiL5~i z)l;DGH%XI+mCD?=aLDGmdlAka0|$f1h1d@eA9IJ_$`dAACtQg@Qj}J1y`)DnIL()1 zGQgoZ5?(P1O5&AC25C*-fm;Ne1U4H3ui8)5SqTrs|o~HZsIcj4uzYOuA_HHwe=Wv zCkfEi=`^X$tE)=-jiik0}h2e&Ru_^ ze+Ylfe$6xa2fdN1w*pP@Ll{Awx%S4c5TT59{SjYjA(<{cfn>8JJ*0qwb6O{`VwN=? z4Kk??G^x(yxFQ^Et?9+Ejv~PNL%~{L#sMmBYolG}I zeWDd~M&hMuGEKMQ>W1wF4(C>MC+ehybHhm~iO-AzR3 zEIEFh$)Z>XZ%nnv?iO(C+o2~FxOhB9C~5~`VPpUKw*ez(TY`f1uK;E873BYOljwi9 zBLN#rC#(NU5&x5Elb#R<=7X<{wi~Gw)9e(zBzL!?{>9IW2=7@J^2gt9;dpRNGBpk5 zoe9pHms>m&1QZ73T)@RiZfR*^<_Ydw3Mt4VsC!_ib8*@RIEpsNYoKR%bcVq8~e**OT)I(Ra@*VxDlVoukLaiJ*1{M18j=u0c4f7;T6v+ z7^HBW$A~m{B_Qy?1*`7~4<$Stc|Bst6`1p+Z-f&gYWNT&pRdWbjY261Kd36h*8{Ob z1JzeGo?z=4;gHM;MlP3re)TUkO#e-l*m@bTYyH)s&V&B#8^hm%`G57be?Le6#wazZ zLbxh^Em6G6I3!9Cp$Ci#!-HeZkMRSM0Zn}a63d;1@RI}rQ$L%Sl(-$u7;ExgzuGi(NNb&Sy#5QK-+kIY4h}cEn7n6eCkRWlcWa*Ce{0u+4hx! zO@D4Xd?jNg(zrYjx&Uqaz7RbG0(?a@kaKDwsA2IsfI1*QG^sD3HDVZ%di6S4o-ogW z>@e@}jZ^m>Pge)X=>lNCKAN}Hf z0Bq_f{^nIk>jYfKHG-$&;E4FO9Rf?6Ed%#%KgkUl8tUGH<^?LwFO$}LwU1V%!6S6l z#60{~4V;vXc`_x7vpA&h!Sk`~hzSZOh5fV@EO~^7i5M|jdBgb7^Tn!?V$SCE)fDUj z6p7N8%Ke_%dYmisYWHWp7>XiaD^a$g!6!yV(ler7_$hR;@I~s#8|u&cyUJtlmO4B}MXGwV z{NA$BHB z$%Fbynio96Ot`-|Yvzcd+OQ zA&JP(450N{v!jb#MKVzb<^co}&u#EBBm?7MNh)O=P~H?g_=N1DIy16tPMVG*^K#|< z2rYD!$V=T!xzR%)e4vc1^03Fe7^hFz@z%L=V&oTEXLa2s$}I*m;`Uo^?B?nPn7YVy+zPY`W}TZC2!r;4CG1*(YP4aj?#96bNIq zS`q)0BxMk}-6?`uZzNjWTA87}vfPywzD@|GCQSV8291^jUtN8v>*DjYh_wOsY9rbt|$aklf;D zv4g;pTg-b_n&q^92@NwnuXyHiYJJEGCeh~aF?PmHuzk9A^l;&qd)S4WoQxVz_Sl6T z{<6aHlL$o3v5-vCeWlxwDLOI8OlUb0fEk=R|<7zIb z-3A`n3jTm}PhLQCu6@%pfy<50>KDgIm{e+o{ys24UfE1({nM(ic6HMeecAvXKrCwy zQUuO`n&rvlv0AHWWv-Fj+6ZQo>t^jhH5Dh-oNk#syZHgp+DysawrQ<#S!%DiW;1X( zq!j%KYa$Wi`oqX+njlvFirz<3)npK~AKbM{1*%KAnGtt{GFO1pSDiW~qkixZl9~lO zxL{h11Seo47<+|d)sKON1)EXwW=BI#I2l)n$^-i09HL#4D-7om+|tM=Kk);u+OUQn zVLUHkTD#B6R}&2UlDm_y4kc=S1_xoJcvndf3}5q z{!*G93yNld*qq7_J|#R5ZT{LFe}wxiD82_r5SSxgc=Ev9Vt9XJ{#pCJAN)uBUu&TI z<{f?>kHAjs$?r4#WX2Z3rrnyot<=U#58ed2dSAZ%?mu?c5RAlod<1#lRPf6K`p%Nj z^)}<~);Ta;Qj=-WAZ0f{`1Dl#hcz0H9wnMj;c%aZh|dY6N0S$6wM-SA4ZQ;8G-3-5^oRO{d)Xrvl z-&#UYL;v6lBr3wF59ZH-gr8|sKy2Bd^*-y5uQ76hAa(TMwJ8V1nQ6cI6*tu^fjEV} zq-qp71-@d@;W-<~+TF1cO>RzCXx2HA=8$)e(8wz^_CA1pn87j9xHNE}upsHU z6e$#5J`_DEUzQ8#!f-ZNj#5oSWb2c?k>kbYwct2+3nTaGLi&S(o8}miGh%y}t$efb zd!ID5RV$cB?MnQkNnwlne3UDV_50sCQ%VdwfOo&gGnOzmNqZ&r zt%JN+-s%E>tJ13KO`WbD#O^|0qbWi|qp8Y5%R-~7SzV(i+u`WYsNU>2#b$bIn(K+@ ztcwP&(`RJsTk;+kuJVqAwyP~f0u93pWI_%73r2zjisu$6a?s-ABsQpN9}DEQlxIY- z=1ivI?Pd}8-b*uF$}Y!dN9LGInrDn#&(#3R^Y^FfgSLuR7pA9bJ^L#EGtSmuA#SaG z5yqRNn=Eye8Bn%R-l8V1DOj?X-T@*lub~Dt{u{{9Znho%Ufky+-*I8}S3`nY^S+NPk5>{=5)(YB?Gc}aQ?9*jtO_aBr{zIZP`m_T|{=hbp`mQgG&qJ%ZG zur!Lrs+(5RB{5E_&Yy=8)u&6aJyuryNL=l}pQ@UQn)~{V;Gt}p%k*># zGDHOG{1F*tkswTolICf{f|In20{!{R-KxY54b?nLRYa%#CkVp(sSqA2qPB+dU6441 z(jN6H{c{S|#YXx$Y3Z&Xk*?%%7cC^noPrKf2EPdvWyoA9NwVblINf7#&_>YDnj|aC z7*B%J=2{Hh#QCd;Dpe%tG2-P{vPNW6W~6|C(-xhR7m{gQDhKyIR-+&4BFmtb?j4wJ zT}Aypin14{6o@z(nY@`N9g{eKRhA&~aW~Hh; z9;EFpoFpV9roI-tII_gC#`T8+B5bT8im0-<(p2j?mbOJDsQ0S#?o}EK)5MC*J}Vu^ z;o}g+*ZCOmlhCuu97BbQN5B#z`ei3K*pRo!B zXAvLk`;3!p6(34z!+iy}%(o&!UUgY`936;VUEtsE@K=O0 zS1Q%PVk$Ngg{Q;wZ%Ni4U#iE_|gI zBLk4$6?-m|&#Z{#jM*PtPhhoLL4Gn~r5Fz?q;a$E24TUHHqAM=g>E965)qBDsPcZB zqQ%VI@e*Y9tq7qe3;=zJb7=H6NA{$=GR1h|HOF2_&}1qYOTB7EncnfXb>BEh%N3v- zRr*-TwC@C2vs1p<>EGigmAGSf&|y{~j6vMx(|CQz3qU8i*Twzsmi39J%o`^|Q)JOW zhBrC22G@j^f)R#*+R6v@Cw;9!5X&yw;^4&Sk=Y zIKD5P)!#aQB2nZFD^Yy^?xT1^Qi#l1wFls=*kj|Y*#mt~tJjMsjH7jNZ05>feDJnf z1QlgbH*T>05p_StW*}H~(Y{6!IhPmEJaue8+DX?3zBZNY;YXn+T>WWfR#tP>ncf&4npriB-?BA%wIn{zE#85{afEevH_n*vm;G_366}D8z{LRUL1U2c)q3 z?O|t1N4wQUvBRfB)pohnj8=(a!#J@ewwOwLt-dg+S$G5$@?OPK+O#RK;;yjQ1}ei_ zY2{G!HBtPcvn@)^G{rtb!qqN{=0h|FCIfOu`do^*tY2cfse|SMQe=5quyFUBJYUOv zpuJS=9w2H@RnsPK460p%1*D>ZeNd$O!Z`lXB9-ABDhNU%j4mpK6vAr0p(J9HX>ZF3^69crBhO)$PQ(S2p zPv1kFW$?^QS4-kW`mc=#IX?^roR)@Fb{#4~{)O6Em}%^0mmF+I#W*=;mM_lG;}Q0!z8 z@;@xQmxMNqqfV^RsI;!~qtvOiZWmD#BBfXpOi7}uNz@>lM!?|sA*t8l*jinAu=>{j z*n&-DzU@LLw>=JE-*-F-v`J-pq0W)d1K%duj;r)h=mY?6n^K8%V(4GM6h~R~{XCJF zk+#<}9X+9Qz6r+6iRZ5fu^P=89Kg;QOiIrgEK<)IY+A1i&_F;rQADNrNZaCm-vn>d zXYLG<-RZ|Gg+1?H8y$hxVXpg+}R&A zSw&0ZiM7hPlnpURsSH&pa3DW40Xt;Uie*PteKj`rbyi z8<$4f+BY8HB6Nq7^eNE5FM6$qBC@1B;fODE;eSYEDR|P*-X55T-7yWitjgUBL4{e8 z<2Be#MetA)_Blv<%;^Fck8>qW1ju7QA|S)0((0DnHsnjZBH5dX9W;MFS7z})+uV52 z1kBVtb8T%tc~tvOP_V1y%Syg=nH(vH_9@?igyW6MB&5&8rqAeizjKlwqLB{i2+Ddx zj7-q%6TYATUY$+Arzj0Xe0Q?SmMu%YI2G9_slB;KSD83oN8UL z`eX=Z1WxW!Q4oeWU{a3IHjfdbkqkFI1w#Y!dODP~)Nsla(RVq0>|10pvRvl?QN%cE zuL(_Wm7DSkIal;Mc8TA(gL6-z{&HmFL-y4;(O#ra9n_obk>L`o<6wWP`{bW!fhM|& zw0kVjlRT^#QW{WiISlt)=rp|H4q@GuSg}-3Z1E3lkJ20iV0`|1fcC^%2VomBqM^zi zZi+crx0}4Ps-!Xs_Q=d@P8S-ce&%L!v%L&V$lrtR?f9OKmep4*Hd!#2+hf>ivWasU ziI-VC;b9T%e?vX5dcqp+pN=;_q+WDnCl5x5bY!Ox+9!7kTh=NT46Zx=X?}&Eik6>V zdw3XI5NqBGt;i9M;Fze}X29IP7;Va#@xd)s%5~7K?g^H2*b#CMsN)Z@3WD7T!%jwO zWAUAdLzdX*4w_Cr=GgU=*uSenE>Gt-AUQc>3mlS;3{ET+j&2Kd&*pU_x1bYZ!r)Uh zA}H^t*8F*&u6IJD2cOe&o!Yq9VEwaA@n*Ww zlY3}AwLoVWJvJsr7evtCV=V>QxXkj}O|7ZOovNiTLYdU&xf!SPIF)$0REycWkNA=o z?kvFD0Wkre+dbgF%jw;JUG&jp{`&j=)O@h#uPOX*-#)*3Q0e~vs6p27Up1G1>9!>) zZpeSlxN;>xNhA{m!iOQEEar~ci^W!i|FBO`Y(=gsoo~TY>+d&aY3j(k=J9-3L&I$p zNA|h~ekYyCmTHXv)nM9S;NmLjka?QC;^Fc6j?O2|@AIPy9lnPTpl3KpNfDqY`i=v> z>$S9n3T+@bOgd~>kDfw{w;L8%0zIWPF{iaf4^WNHvx>nSgWT5PSY~e9Xiu1=Z=5mH z=rnG%DDpQp6X}fij5jt^t`G^9zU^N#3FIuxXy+J; z71P?+r0(j2PbHEFXRgUu7q6->TdR2~=1VvIMQ(2pu+gr{lK4FiXrOngi!g+g+bn~F zV!eA}N2X|-z7bVZdsFN85`pT%Mr85)3wHMi0~pIhTBlL+No9??erVR3+SVqXY1DXB@cCAv6Jh^Q!?{qg=Dtuk7+ zFzIBscOdWuXnl#~n24X$ImIWvoda?!5~=sv}14rJ~Pz+#hQfW`VLs=x@H97$9F zPWz{xJLdXMh)g0i0L|k}R^`>OIdx2&AE>;w`(u$kiykPf&!IbqOWL1Np!!L{InOh1FO05`auo z0NM&Meeui`RLwLj5Ur)VufI_UD}^b6z`y(+H-yh{z_50mc7s@8yJ#aTy>nXkloigk z>I;#((%>E{k}ZjMVFqAp=WC#b45UV^Bi6ouWsy8PDW3gXbIQ7zBaO#b{lpmQzlzxX z_Zrhb0r-Eq(m#NAlFFL~mI}Zd?)>r`c7Q0*QrsFE!fcfYMl3SV%9IkXP>t}rpQfq$ zNi9cjtdTPV!vs;?a(UHK^D;S8I&odox;m3~c8Q-U&Vx(N2v6VkAA-h*w`u5sUL8x_a%N5y53xBUutITe3Atvap8l$5WVV#^64 zXQ=Oxtsk3TH>!)Ia+;%qdMy)g36Eym-`-ogb?%3YCq#OJ6v#HPqhf3<9@pQwikB;o zn@X?Vu)p5n^Lvb9FtXAgt5urBKG>awoW%n>{e#b2Hd89^R4Qv<|C=m=OIF3%+xKG5 z_J;^#r0<^moo>9DE0z|g=E}8Eqs8&gE-75h?KL_amuKCDLxUCDpfAsYEdl_PrzG%m zQ-x`>cLReOuVA;r^Q|~ogL8ZN8mg0E_CSD*$U5+XD>xzNDy^Y2yRA|!F^qs6ycEPx z{CJ`*PX?o!`wV?3KWzafgr1~D#|6rLxz?#;=ft4+5@X-kTbriM_|lhK5?k;|X|%$i z*z62x89K_t!QBq9y;Y+?cCy^cZ6WUJ6KwEInK4kk30dRA0DR zKXMm5QY79;$zcK3Rz0Ks(wUe|E`_GHn!)IF3dD$sLjW)cibuFKXwtG%*# z_6@jaG#Of6da%zoJal+hSH;}4u+S!wF@1*NKG*Ot< zVdaZUxdLj*3e7HP0oD_NVHv2RUS7`_D;zxK262dXo0gVLg!pepV^E+DCn=us^%9zj1J91ds9m6ji zeQN21nF}Ds__IE6HHYXPiLsnfz%qcid7qW51VK;F6I5nAX4t$;a}=ebB9?*}G0wal zi0KMnT$Pu+3xsIP84W)xa#M)2YFh|jD!r;hc42H)OR*^7reju_^jtWT^qgOi5f9IC zG2ECjbZZswQMbjEuVR&=3_HcSkl(y4R-L>s8M5>yylZb;w`Wl77NlJ)`~((J>w~gd zf`uyob_N_!IS`3ePUcN(GXP6Mzyk z+MOclBZ#ACktA{ok3gM%Oo+bd!DAlW&hOj244jbb~Tm;4Dep{;Pb zi?V(w`TGB1>>Y!2?Urr9Rkm&0wryLhY+I{r+g@ecwr$(CyH4!g_n!UT?$aId{Ca=A zk&!cJ&XGCBxPWudETlQgt2Nh>{hNUn*HJ$f;(gH1WGvYrA10c8=Z&6l2968#qUrld zR~7_3d2AHPX)vm8unXG6B3ej?Wc){aV2P_^H8XPbc+5zeMXxm^Bzg$wl;$=?burv=k z@8Zzn6k^jQ%LOGJ&T(SFoKIF&tbqaprofbXLo~@t7Tq;o0b`SD$YN_6{@S$EC_8`_ znlh=jj*=Jqju-nzwPa8`w!`w2k0sht*+CiqD`ZJLuu2D70v41*wOP6@QqbLtEVS!ADk|?Re6^<+TO8*$quVpitG3;7S5XI5OTi&TKI6Db1b(2SBv!BnIwZVh9;f*>Htde(9bvgD!-R!Y2cL?+I3j;B- zyvPc|GW-&5LHb)=)hdc0ZKx*A1~!Aj+b4KXo8n#d-1`DULs05-ON4tO9JRK(Yf+EP zOy~K7P%%Qhx&!<(^Hcn6!3$c?R0o@M2;Rm8}Qd^lUl;en7A-dDx`L@A#TiwxF|X3a!rmyDJXdQHj5WUaxSYKjI9XbH`ssf zoK?>DKq7+w`sE7$-zmQTPkr1!*;W5) zmU0!3^GQXYlY@33L87Z8mL^TPuo7Zdcrl3=%=C*r;&ju_Idrx~FPkto*)DT7 z+N}&gmKO53qKp=klO=zB;;DZ+623Hq+0~-cGgy(Y^r zM!j!zL!1gEQg~^tbJ9tpDNDo?s4|tM1<5)+lZC(}A~JrllMc%})CB#lmDDt45y!R; z@n_K0tYXycTAgBx7{B`QkeDefI)}CBIOT?O9Xg)u-;sQUx*_HJF&O}Bk;9i%L4i7B zQLenZ+)%C(jn7ty$vG>QWi~m6;gFCJBxEn8{fpF_2Tnj9Vjy;6<*6!i|4?8 zdZX0+GGj}R?1zTb@NA~=Pa`Lx56$TCGMfz1u)X_1oN*aP;)T8sblKWQQ(Dt7H$Oy1 zO+}fj-pzDb?tskWGL*9YtJZL^kP-fR1<U z;#G`AQq-8Ff=>RaOC-%pdv4yc2?|(O&H9UM&^;J&imUQv!shey;}?a>95PGflA8ry zf6>v&B`+&2n?z+4lymfmKPJKXc^yojac@9E=B2plW0^4QhCaU%!17a9pcQCQPb5JOyP$j(^50+BOnG(`6R^s z{Wa=XIK2}Ggg@kmVr~o(`1Egz38%(hF#b&Wdua@CU{vq8AXJaB$LNie?D+e#M7;?P z{n;Qf1gH*k(;32&K8ueWcT*bBV*CR!_W9&JXilTYI;L7tq4YhR3~NDMeKuhVX&9_Z z-NjKTuWI}eF0s`{TxuDlG6JFQnM8|l+Z5?~Q{{Vbc8vj`rMBqgJgSlI?j%FmT3|eW zH4U>QO4puq;uQFEYtoVzajkNmLC9{0>_@e`%kbQrIadJFxL6wLWL-L2iW!^YRFYz7 zTEsTw9h+y~Iz>;#{^H22+9ritOChJ&RK2G>q(qd{8PzC!UTD-CVr41nP8(BI9=Xf9 zeYo4q7CJrrTuV?N9h{r@wa-V5;+kC{KTh#}57(aiJwpw^bJ3=_>-;LCdDW7FS;kJf)_QRv zM7_Xc{{!?|lb~ne$~DDFW{W8v1H0SjH~qX%Y*j#6Rr;Mxk(dcsr&CaqvsDYi5`dqK#2cykqKrKLiIlFdU34tzhV-_HtQY3`2^AOg z3%m_T2Vh1d_Lgc8mmOCKmpvGh@ci>s*&UHt9v?n3N`g)XQ5R%7@ei8OJ2xz$c$8Kh#?-y#O`_&4}7OC>v@=^o{ARUb1oi`3fxfG+Jr&q+eiRp2h2sz z;ZwrRox0_d@p4yWZ<%+R)}wn`5JC9rIOsX#ImO@dB|ZLFBVXY?BI~0)XIVIEdxWz;j)H#hsRvov=ImND|3|6@bW| zX@;W?VN5myWJB=Shhf$0vH@)v_Np-$?M`7x+HX9a!u9lv*m4=RIaxA$nlAqxbL{X? zi6sq!FY7GthlBf-Z_YJRuC+m_lhg;{0T3q>(oSb>DD{N22M*$BHZCk^J^mL!LR-;K zv|803EI%EA=_z@3WEkB)5HTT#b|)iKOJ?{Rm`St-Szyxyc#k1MXb9GQTI${9mFW$3 zmLOR9*2lHA{^zJQr+Hj!t^IM}y;TXt2nV`Dsg;FMlUGBJ8)3iMkHl%Low}OJVdxmUp@wfU7UvOl1pV>qBX3k<>u)YGj z_osjC!~A^^x4~Yhs@D^o%48Lihwb6axy6dz_*9CroHBcW&22;gN|uSoitS@ZXiUe`C(boF`#h^=b_G) z46oQXdiQ^=aoLpo9yd>?H{E&c2;Ui-ExE~CA-*omflS0`()nu-`6h6km*)v&bNj|1 zeRI{R@aEl(i`Mz|4fvn^#dP>)>gf;Ey704${68;h{IkFK4-Qw9f{YZ90JL}Md9;?A z&bh!sIoRV4zT0|1AwD?BGupuF44f%nszRR6WWbLO1<6#npnzt&knV_-F5=zm;}^gV zsu-jvBv@d{o~gnJxK_+#VC>d%V++^#R)iWmF?8@=R7N9R6xBdDg!oQG2nAR3jEYkD z;{H?2-LU}aH_7NgmnYpG+^H|BPM~>?c<24_FNj{p%O^{{UEvn;SdmI~bao|Hvu*zwNp#s*bB6 zsiA*wjuHz6@8+eN{Q@Xmype81#e1QC0T<01$VZKUO~67=oft@i$j`7j`Dn+B*g!VAP1&9ErGO@#96ND{|5vhJj%_zln<`m9~6Q?}rxZ zILS~o^R(&t8B1XpkXbLo%KaDx!KXPLqeoVpJNOJR1o(nsN(T=iOqV8ZaJ#mkcytA8 z1mo-*UuF24U#9LhJK1bKe7>i}tL?~|bac3&T#y~@o?a9!W1&FXQa*>zvCMK@g=58! zB46M?T&{-0k4ILmTS>95GY#fO$AUo1aI7DyQ59-j=`oq&aIU7^Q_rU{dTU*jhgh)N zDyehPt_uIi9kPZ+-u2vFGE#UV6d7i;H9wB)kHyk4Hn@({D|J=C>>90b)6s!s-<@@) zFLzlV1mq)S(`_E`quT9 zw4jvt%w*$YVU=4@BWFr)r5}?p=^ZWEO{aoW6#r!qq%o$81=eUD62Ug!3uPyODAnbV z%23D0K##9BH8vx+Z75uIpcEMKUeCg!Lj}T(7w2WJ9HcU7)#N6dTh;XG_~}NPhX2C3 z=%;zr1X)L0+plgIVootM$f$12(B4+G8-cNwavrzonyHHMizTh!d1a?*qO|P1F1x|6 z)g>Abx(NnlKT~!M(yjEZ9675()22!l`4LkCwa3oL)AuMW(3}3oZSr%q+v)*Cg@F#g zq{jxb-S_oF)VB8=^CgqVVAc=>LZN18fLw|+Y;#O213FHj(bbiakovP#&JObYdxd4Z zy+jRL7anzEo?jpR&31Fp&a6J9puO<FkuuSBGnH|uEZT^H9XD<4knnizJ?!}DCpki#Pjgo zDJ$8S7PU=noVS=VvPdmL&-s;6VL>YbZP}{$n)p<=8OLj}U4e<&60!H|@va|J-2B@m zJM^e1&tMX&y{J$p%;`!b=RgP{Rv)2xGvgp8_`ch0fcFh=R?tWA1dD7fEmo2}4EF8v z@8YTD;iM zqF=R;#LN_kIKrA8Bntt$b2RdJZwN_dzVg!f0!AZ|^d-plq8j|N z8UvAv@eJgG!xA2@=+}s23Xw6fc)=YS5mE@(vor~2q4L7{1%dg9IQ{l_9ashI0=DPm zvc+a4CL`e(Yn6;S#1cSpPsb-F7jjM|PErZU2GkBOToSR9EwkOIC3xp@1B61Ff>MLo z-eLO?l`+XfOI?AiC8%RDxlOQ(VbXB&>Y(u!f=buiUXVnP7)@bvEKeubIboFqC|Uyh z5i1ShRull3iSV9@B+%r~c40RvhXR$Ppfkbqa|tGz(<2@6){Km?;>utN;xAR=#1>?P zaiPhQY2A_a>|$?$*D~@s1seVD)XvGD0=<}d{pfL#WBx^QijA3MVZtTzKK%2OS(9=# z`^B0FQ10Y8;zDXt3e_YM|NclNdO=3*5hyg_kfv=m`m)tC`|P2t8=B_Z(DtU^X*xtK zp(>lrJEMQOF23;nXQe+&Y9aysxg*E^KR7!wwl=m-wl?O5LRPkhmj526$p1@b%X+?r zqA>8rj4xNPH1;M4Ygk-dS%l{SN}8UevX*LCKJDLM z;`ES!LWe+$1r|Az5Qq;K)Z!_#(9`H`{5?xI9|WbMNC|9WN}Gin%-ju(H5xk@hpBDq zLlH%4W7Cw%pioGjTX*M7=Z3V@bGNBD;2Y@g{-P2zHCbX!Sq_R1bZbkOAaOLca<7G- z_TNpkYm)F{d{(@*4d1Pt9m3E#aAwSV&Im1st_sY)Ki%wWM}G^v_xXsO*{;o^Ur9nj z1cy;xg#3^}_=T&b1>I|5Wy{dlrN%l_z76EWL)vw|gYKz+P;Wr2|L_lC)MT}Bgub5} zQ}-vH{}aFGKjZnIJx8PrWFI|z(6*Vi)hCs|8vp`I1C}dokbiE=4R9^!_nhz?r5g(x zZjVq{DU}ZRl)=&M%aJFyt?MsCU}%(J%wCBX1^NYu#~@YCc?&$b!swX5GieI_qWI!c zafkq2pRWBO?S5g~Kq*%hK1_`e1Y9#;eIa8zvGdd807k)9YKfBN{eK&j>1NTpo zW}hbk8|ac%O}h@>HL!xud@OmNKu!>WfOiBT^1$__zIUFUc9+&`2R&k=kJRS{XCD99 znT56hw2Atm7H#D8(grOa)N{*_@-%Db}bs>olFSpyI@XGPGE z0X9LuB+L>NPgF67Mg++May$cg#?md$8_bn_u3?@S=cig- zFiukwg*kJ&9j898IF8@mrn`MUfqEFb!h{4k{9u#~Muu<5{V1$zOC1iRP}q(=}FW ziM8SW=3J^(R+<*6)Rh@0bO^lgRCg&bP$stPtkssOHtHtCg6nWSdY91>AGX!L2l^v< zfABC|ZjLElDje!rU@kJwmXkGY+n;VfK_5?;OT@ww56pRz7&Z%*Gs%J3K2F#;yK}Zm zyWFTqrYV-d_CIE7KEKS|(!AG*WT(cK`%vYjGU%?Ey5t#qihb9UDD~rX(YnLDzi}YWw9M|>x`V_Z+s6!gRs-&BP$;r2kXyTpn1_+!dmVU2q2O z(T$(%ap3wO*3(G{QxYzHHS{5t8EY2TsUG2+z?gLbS9Bs+wn_o;ZWAB>7+9bqMB<+1 zuiEi&!Z3Z51%SjWE9oKE$f!GaEYwQkbzeN%Y~poqO3F=VMmjM3e#5%P`x&_*Notdu z7c#@qFEuIy^wOGv0RGId6{LI_Fd&dPCHNZ}FT({bo7b<(7rk4~T~aKb^C}@1P}|$P zAFo%s`(kqHj%9J-rCsNz-$p~Yo-;kKT93V^_or?qoAoLHSHgTWcLtPk%Wf#~oOp#R z?1(4{hQdy{QyGYMooG#HM*4r0CG3;osx@vor!?%5ZEtpEAZ#@hBJrfyZ3QTf*zJ4$ zBT}1Vvq8GtCHvMiKH+Otqi!|4fDGsozL_NcM%G zdIgRnR)Lpf-<>P4lm)ZoFjfY76eDj=m;NF78!6J?7N`eR^Dz9NB#fx%yOC!ezJ+=x zMkp!lJC62n{#L=3YvyIC9^X1>$HRxckR(ozO{f&;0#L}Z!MDlYp%e8YCIqR+ zLD{oWVPVlA(&Niyxm=HG@^sqK8s$nU^bxdpDs1XOfpSMy~)M>#qfw&=)zzrM(NTvvF%0qnCDA zzmb5+x>gNS8bThw=&?=28IX>Z~=EtdRCXxmw7iEs)~?q zyxCaa9t;8#g{E>&;c?f?j}~O*&~L}U~e4KQud+Gx?JIvrz_DlFP3?<_R}g5MU$^5-*j$^HakmZ|^~NGaMUf zG#%S=_4Jeb5~Z0LR{jgbbMI)Dw`S=!Qv~x3 ze8@zzbI_@=OScM~I+^s8!|`*{17a~~Z^OkAc9ytMB$_;qcSi=NaKjG_#@%D(EoK2- z*4loNpweS@dAdW6UMqfhV=EEc5N@PYTt*%-Zn{G%+;q7Hn}5*~o@aP?lE+2P-DuqID^MfIBdLjj4RXN9t}Q+W#7k&ks)a*0hRe zzAdmFeSyvy|Jew7pp0|&7)KB3L)ku*)QznTG=`vJ?CKe2yxNt5&>K}B^lm%q7Ag&_ zHvRxwriee6eTcrZ3`zG)Zbo8xX0|&C*DdDCznxr2$;zVLK2jp@heQ$xFS5bNxL4rV zPr^4qCOjlyS=KJNe5Nuot93M-LMo@2Bo-o7lfh8TCe3D!)dLXl`76eO!xEjFf~~+5 z%4jOu?g%95gs*$uT9%xd9=2XM!=n7UJwwECY!?#2R#X=?g-MqfTpd3v=^^Lf$fce3 zKB^p9v7Q7_0exe9mwVezyJe+oI2r7BAR&U8X}v2?PNI|?CZ|P~=(eNo8`GH9mo>WO z@2uO*hxzwUd5B?0TKJR;nt(9h?L>pVVI^+ufKB#Q}JFVGXFb0!@~hd;OLYfhu1R)}ZG#XJum>%9N8@M+`D{HkmdKnUF<=E0BH@ua!?=+ zgzXDPm5kXca|4wMT@{wJOlMeSGv1F=Yg(S7Ofe3f$KCdd5WKIHX;y!+ipOyXDq#3W z;SAEcK!dH=cTC9cfO~@UTMgK`knOfY!8-Ms;&VmYJj^h`^)DR4#jh8#3wQJ0jfiIc z!QO&zewj*9;sQGvC8#Abg^}wg)|c-?fDrN*-tKYZ$J*1!8Cw7N3JpOLV_rr8h&eo)Zsf38i_=P?Po=WHu$`wv9_Q|J&;Yry>M=o-kfg6~kw> z=d~L+$&IJ$Z-{g8sVx5+(hr-T1*&gQHZNMHXIyY)lDVOlzHPOfeaZ=Vh1p~UdTMqJ zn=3%-g2ih@ci7qE?l(5k$~yUVG|~uSl6VdPn_Mmy@FxTjmMwa3E}fTlzSRW*wT>H9 z^-N?Q>@mF{m#~{X>Ywy9@iNSU;AEqjJuWasE-*7PxHZ*OE6c>y_&^58G=z2mf!qO~ z(5yb7L_U0H_m6N{BUdCG;@Kgn(w3JMT*9vYjz^w(6ktOyQ?#IpwJk*9>l#+P;QJQ&h!UE$#OdQ^&WQkf2Ec z6Et#6v|B)}9(+$Q%=SRG)pqvjEhczd0_`*MXIVo2c9ukZ@u<;2fj&_d%FGR-3wH^1 z@OdR>Bz(iA9%fS^pnXLkic*01y;N2H4^$LvF-l)Bw>yH0p}#B&U`S3b;0a+*P4g#= z{Pq%;I^3Qhh$Q7Bydh5G@LO3YOTTG?9grLjIv-g0UG5L*-2uxu<2YAD+$KKslp1fY z8s`_~BTNJfQ$Nlak3au0fL22ConZRe$tCdk;$>`q{F8&X zWw6`rvl;?pK-c0yA&CJjc*j!swf+6U5h}sYyc4Xj)S`8kT!8t)fe=GOf%trL0I^%_ zc7IK9_Lc!6jI6nux|n8T#C+Zkzr+1vwI2=)b3oO|WUxm)mz<*Ds6OzI505YNivXp? zX!)@k5_~{cPY~!nc9V6>nWCILgSB?Z*bKYYhq5`7()&idPg^K{_4v`- zf-Y1vXQf)oa3t?2Kn{XOe*kpLji{^k5pB@GEZ316zvhQ51X_P7R!4;yeP($pUmE4j zdw_aZL!&-|(PFSmS*6huEd#}txS@PS8IEd`XSTyCbc*M#1QFDzb9K3@mF3-Lte&0p zu&{0@9@Cg#)WI0Nfy7K*C3peh$`!xRHRvfMYr78XE?y-|6lQ7w{AuRO_9H{eJ0hy< zz`X3}FK--ID!TWA;O$q4!ILNL6A2OQt_i||{+7XLbX?2dCaw!)?(wVs3ARGjBVxhw zPLW+OJ;s(-px<=QivZBh|B4(_Y*f&Sg3_CcI*(nSH&|B1SNjr1vG5N36rn*4LbaHF z$1+kYLh1lf8`qI1&=WowU{(78n3+4D~T5EX-^u{md=za zrtONFgaf*c_mIyMV$(urN$Z7gh&ziYtWVmQhwT$%3meAW^(qUKv93!%8`B#!cBt#G zbgWPAP}p00EM3bqrlM73-&qpkte+}UqTDJO9{Qu+F>1VN&|bQq+JLS~i{%qtdRKnU zevd=s-2Y;615m9w+hymUH;CB1`F0i6XT_IpX@jH_MMA`2y*dm0B8?%d-+0(>9y_t} z=&yN<0lWg>iKV5 z?Ij@QX#T})A8ZNn6oKsIVuo*d%=u4P&aF9}dHE+ll7DjLe|vTRpX)LId%yWVAfzDq zUW*^;L2sn_irUgB5BM_~ZswG$L9J+id2;eM5X-uiv-Ct6t&!@-9k3ULpYmZ2H#DKY zb+a{*p)aeKBlb%P8rTNd2I5NKvXNihtg?=Vz-@xj+Ef%Jh!)r*aEejKvLK;(1JUGJ zZhCA@5%*3RtJUo@DI6kZSfTRG6tVu0)2BHsyBl2w{JW9oVj%?a1S#L7pta0F;v{Rg zZ%hI+$@#7)&L0ygG0?DPT&jD|py#xFX`0S~gtoii>wP z#LVM|?SIAyKtN23^(Q_^KUJLIe>h8uHm3is-m%lNKT#R#_WP>!^)1=SL8rr=3FvfV zA|)jzg(nO{zm{6%muMfPA$gPM^~Lr2g*V2o#c)e#568r{?RB3q`NylP6R4#}00ZQr z&8&5WNV!3NpSWvsFKz+2ee*o)34BqR*F+KIu2`SqwP9yrYF(8KLrSm`uPkX_HNR4uHl&XoPSv*+myI z{55I4ebr&SJSJ%x?179~%cI`YeVTeWr1lE!6GVt*3AfqAAL4}pyhxJ`f`%`2yHKBU za7XNj9007Hi&W@6Z!Bz{F1=aDGwSO&yr3O;nII?i{?KtNr8b~+ZgtyYn9)2|a z_R~SNEVu%u2EA6_D?S}vo?c(i&*6Sqt|+TZ3fal*w)NHZh2!6E*f*%v!E7&g2`bR5 znww3VX;#&jTU)d0$n6IIZrT6yL*ofj5wMZ!X47#FrM(T_vqZzLkC%)**f!*BAWzq_KV&-*F}ixcs&6 zscY9gWUx(Z-lcZ#h3NJe4AxI&k3QE)|Ly9@$Vb^-K#_%`TNFh#Kj z7Zs_q)PK+iOK(8t-9?Qasfb6@tXI*k=?j+#6tV6N;qs{ytaoE{EtCiA#eooXy&O$C z2x8rqx>lb6E2*=&O8c%%ak2gd9^cSf;I z5Qp#%aov2M^vpaSDP7Xz5vs5s8i*7SF!Ilk8T^;HJF9#t(>K-e&vDNAxf%ujahw&L zjUC+0ZA|}5?(pB@qd(pf|NYm0iy;2XQ{rFQSE#6E`C}&WRn%G`QBYZ#(xh8J(SX_p z_m*D^0s$^5tsvDymS$BvO5Dz*@6w#=Qz!=l;^WIF`Qd^jSxy7VJHg>N^*ZHBn~BHQ z{Riv@_*^B?ZoL1J^2lzuUkC1)djVu@x^Z^2ieKNbo=VmulZ zy3rcHzsiZT-gL@C&-&wwEWZI7aq z8cpEWPH94qC4@-fF6l}Tzyb1%JV7;sLQNle0lZPEY2Y)y$~%T@chBQmAMIv($z)a? zgg?&89peeD&jt_O*$UL}T;6utqP-0>04X$FMndfqwQF+s)~_5#5br#enMY;{(S@Qc zx#4Y?Xm$5+j6Giy5o{^Zz8m=hh8ics7E76Ed7dJI3bRg$)wPf4cr3A*+flo{F@@j? zv+WS5CRc7AiaSo!f0N@6KJG**mOMEQ9d=Y=wXeTL)&z;hYh2<^W+&67FR@ub!!m{U zqgENrOKou?Q0-3w^ITi8;};y__IIZ?n9Qy0w@5!zg}Ga_GYB1Xhj22%Rukw0FL8hk zmVw?jEsn#y=}CKL6NOrEQj>gGx04_{MFjgon~U%Xm%9zpv%538NqnhKxxw>dG65r9 z?#D|KD1J(IwMU{dys-uT)#yU$6ELtT?))d1_>@TDU6&LZNnj8wF+`cKX`EE*A~s)K z(uK8)^96E~D^v!w)TJ=n?&uJeM!vK|$vId;Y3Ifxnj`^|^=}O1FTV`G4|vMaqlmBW z3RFf#Pnyio`2mIA8IkWltNYARf=*XN5u|_`SVkdcYV&$Mb7|Ay-W7w;Giy=wmGz-B zk>LbE0(HS>vklvT7Gz;#Cv1^%phYqmjE>xn!8ayv$azY_M^o^hfv{_d)hCfVmmt&d z`3+O^WeJ_)Bb-;iONZDr#5j+!hRKIQ$t10sDJ0AAstIIbb!R#SXT&dIMn=kHj|rF`@XeRs0Vr*g;LG-^zzcyvHVtu8ge``0&4h?cL-^ zk^Dsv1P@U7d*E^Vg?g5ajs!(Xne4Bi;qX+bmNeTJui8{E3b9d zZQ7gKuPoZv8#kAl*2^vh<36W+uDRo-1y9zhhCHV~uV1I%Gru#rx;}c5`7_>dSt4c|p z^%VU6gyip@KYi9s7p{g{`Ud~Hh2#|QiFGA&e#`w3t@}o}mGc8bqE92hvee}qFivs` zTqbI*EnU(4En|NDB#JoROx8U(1v@Yin~MDLgH#NM%!>Nnezyw1K7dd8k#|gA^ys zhHYNjlp|wKBX6;RJwB|WejA{2V$39!^dUc@u;|8y%Fv}!ZfN3xbAIgCd){F?dE#jkUEnDg;ib5TA5P!%2Ghl^0+y;^b5i{T@e;TXT`A_gD=hDq249h6lB^D&dXu%gOUBV}E-p!&_tF!mJ~(M|DU7flXn z`_cWw+VBKU`c_mKBY~69#93T$-;6Ao0K7^f>|Hb%qek=a{n!f~rliBayIN@FIpIKM zC+9Jnt3yBytHqSUtkmY>8z`&-F6hN-o+YK?aw{!Tz}FBPXuO|uv}1a$xKqO$<5pBO zsOp)Mnj#2wiI8fi!(=$FduimHrBe<9UY1=*gzt|r=+1>LbP#60plGsLEReVV()qy125GVRQZ$7$jc zi=I*8ctR(Z*>K284uv&JqHuqGZ&chMEI{7v$KAJC8L63d2&wkR`Bcjg4@PB*p=)F# zU9wzAwlp@BxS1u0>D0|3BeSWgM4Bi9Svwn{P*HI$icmHg`4AAb;}970hMR-i%KnAZ zpCt*O+btT$TjhsT(ejjf4#CRZx5^>{ zgHzV2TQl>;xgslX9Yc+=h<9m46}6!0TvJ8Q;8`r^0-C!5s)!JaeYSM=l|TO}Gab!u z2?*{}FXKDWd*m5erxoZlPAMh~hwN-W`UD{F{$}E}k1n$S5fKTtPjyzOqKrv_o9DR3oJ_+iifQY%gi+NkkXt!3A@R9awqN@xSd0P3-kL(Wz&v18N# zg9Y;$7fJL55s833rz8P6rDg#N?H9pp(;F{cUqcgxAD)!_RJ&Sw+LZD{nf-mAen*Mu z@sfVW$_`1#Lhkt~X#m|G2q#@ak_h{^$GLHCga(0|4*(R3E=`1b7JYFq+B^+t^R1+S z`^=7uuJMh9h89suBR1=ag(Ox-basEKI{ka!2678=mTY0rvqu2ijc67fgIj`Gt%eIU zUG}!>*kbOnJJdvP3Z@r^p3M!4;Fn}DMKQbc`gWe|q{%lj)xmV7NYfsD97$>s{!m$T zIeZ(atKGs5dIl&DNSr$r{ub#GIuAcG#>S}KL*k!1nXTW$F(pk@7B1_hO!)(4W{AT< zy2hHF%t2s=ILk+QemWBz7YUYxtAig zSMAn?L8)(tHm>0jnj?`|(nrD-!znUbSQnOLF|CT)1WWFXDT|{kZAyFyulZwCS=6W| z7K0tmk4y zC$@x*$>;aay#4rOx>B01X=wRWqsaSwPlvR{*;&i|HR1)WC7}M1p!TUM@&Xao+*6GD zASI}>gjdzn#u)PAchOwQGOJ2hgDFf`dNva~O{=p~wCU8=m-N^+#;QJLYlV2qsazQx z&DX*bw&cim++-;KQ$636li7-?K$6pz>%5fb)b_ZmG}j$(eZlqg0cVlrn`dPpun*#K zUJ0~a{Qi!a-j+$tAV$&M>aAcb~!0DIYk0zM?nS*?DqPmxxZ6 z&f`}g7?aERu&9$V3l8Z}y`X}L1BmK^?aiID7f&ki*?U4lOfO!LvLB+D&noMzv-kCN z!zV8rY+(xdV_4Og^ia464i`8lW|W(QC1CQUHU&&9ncV%X9~2&pSE zs#a=`^chwzT;(>jme#GM^dbpOh>OeCA%$}Br1h#Wah6^z+@VA+7@*MCU->4Y;3#Lm zLz0hBQ5Y@8I5)+-2QhevteQ;|(#xLu#75dNzI++AQ}@9=lHH}o-=Xo#Y&LgUhrUIz zF+;arW-(_d8X}Gz*{rUq=2E6x=}0GZQeo5-6t%d`$xhCu zE>w8E*qmdY?X6JCIug$F`L+31Zsgt{c9G;|59D~x1T^yLcepf=9ZSk;Y#$io`}+5X zBu=mUs*)P~Lkd3+eYy}7>UR39=!@hM)@o4>uv|ln{*5M$KLT;^jv8aPSK_X0@rk9i zfw`ZP3g{a{Ad=92LCc2W=V8`e#wPh%bJ}dKX|ol?O^fXpiEkLDCEa*gy$woAv6`Q< zK`( z6&I901rADjJ6Y0tI{(wNapeIP!1F-^RbmLSh-GyMn_Jq$IuVcdt0jeXy!UzO*=Idm z5mQtowfugVK^s~Eujn>le-`*GH*G*!&IXeLh#2826?+k<{%@OMrn2iqoN)ip$V%`r z<8ZqN z`S+u_8nIu1ce7Sz++b_0R2Uzuj~&ngVxT`RjW9Ql4+>)G);*|c#UOBsBrhs>h3SkV zOE4D`yFqA;#>()8sDkj50cZCFeSqnbP!ZDfG}C`DH-9B5a2P!eap}L2q&8wd z%l9i__M%gXHU&tDfNKY8m4uw7>-d%(j2>G+}X%nyuCf60u1sO~6<{H?@&>L3$*>F+pP29FzmPl?6 zS=(9j32e^o=(ZyZPoCf|*ZTMlj~|0L=IML}>Tml~9F&P5X;Atj=X0krkslF)V=AUv z_+=3jizntFw+s(+L2I9yed5$$dh-*}q1^0*j4>}y$LJzFui){I7u(a!>+>8|%cMtk z*Y!PiArv}eH{pzxL?Opy@HD&lB>k(-^PC_E)RRDZ#Ia+H!Z@j(F;S*xp73eM+N;f= zkbKXeX&@UxuBVy63q4JsPx)C{;6Q#cUcL*+f(i>x?o#`cVkI~25D_=zSnr?26!tkR zK{h!DITWvNa7AHuP@x{}GbXRYuCd=)@8CL#^Pxn%)j-x4g&E6!DJ*KQW&U}L7q(Sb z#kJLIORgq-rOE4WZ)LR&`Q1a3Vyis2;5r>C$- zyu(wTrIeXnSoa?ln=t`yV(# zW2^{dwm?%`8tef_L_GcC!|#8Ef7T9Z%$)p`tq^ml7Tto_@OXTSeCv-TBw0bqfK6Pcn)06jCbc z?ueS7Fc>R**erKH|CFAtcKHP|TS%`*6J@wH>=-5mcW5~4v z$TWTj)g^5yucT^r}JyN*Ix$NBTg6dq=$>pd1DZL>Dw56+J zF<-UP@LWzolxi*#8*7#zItD|IrJs-ZwllA$}HHEhZb2d zXU@OjCNUjzr!}%-3n(|(gr$G|O34d{%_qQ|fR^NSZ(aSDa&jjN{>be&AQaE&pcfa5 zq2OLBe3Tx3Y%6@+QlQ(DBX#Jk3CJdsjhMa8dGc`eR7&vEX6`PP{HD#N-{&Wcx%c8=0v z@KB;LI^{0weUus4&|*&iBW!p&e1fnodi8}2c0Fwd^NBsYLpQ_H6>#;E5?ZrOsI)!E z@r+A&`(0Cj1m?sB(xW! z66h|@C144U8jY6iht?HMssCJC?N??8j#m(6CPQmZ)o!N^AF|Z9!+zXx$GTv1mN$vD z*vCw*%(7q;2LkK^gz9-R7qyQI-!N|E924w#hJn4F!k27&a6x;>F3jNv^hKN{f|vWq zDFPGw=(ThrNLvteA!zF0zqXoXJKWizo&+TG$iAI>oXCk*Ani4-V1wQ&7q32AkJ8TRM(}c^^XfaqT}tl`D9OBH6BF8;DJ!9b}W( zc_6@&HK4*AQJ$15P$B1!PuFL;m)H7$XaPboVpGhWTBaL3q8S$ge6Fx@tu`O+5^s?U z{6nQvTzZwm*|H+f8wNj75JPfD zo;;X!50DF_rsHH_!$RE3hvpTMm=>8EZ{1Z>~xeYgOr0JS(mB&Og`g^a}$L8So zLG6e1fvWH^+N6NWtNGutv;gi0>$hm3EAAjU=8mRqJP-Cpf7?Bt4+f`dJ_qD4hNmws$w#@be@wyc zno!XYA%Fey{;`7k-~QhJnrT(5b4Y%jX%Hyo!A;)<_*;CmqREL#|$ z{WAL0gVBO(vFRXp2i+t>rJnB<8#^>6ZB(g47A}%GH^SE!F6_`PZ|msaZX6olAw@im zL&wz?cNg8F-a1q-yJRoBcq3&y1Zp>M2e4Q=C?dITjyV4>%HAo+(r@kZt*S(&ZQHhO z+qSvVs>Z{qU`Hhvm(G`R}vyn#txB-x%g!1e{f{s^+hjeyl4Hy;uY zBZWD{Vt17$A->3DIEtAwe6ncK)f<9IWu6o77uQUU6q&6h)7Z)w5o_OIa$rwIRAZJ| zm$BqCX_F!(|7|?9Qg$4o06KUt7D z25ce{{yQ#-dXMuqwW&z^SC=ymYRY2l>^?6 zd?nkdVq+R{!F5FNVLKy_ZYdZs5C7Qs(Q;=VLOQPscVFr~yv)M#1Y)N*D8|gZ+lM0Y zOnKqEG?<;R2_c(eMlTE;yr^0G)Kz_>SET?I9>T^#M@Ji!((!DTtM$@wW^|TDX_WIr@IGmv$Y+>j@?J8H zw~b>3Jv-@Hs-!`?zR@-DR{F|Ylb!uWt5Svho%mnYjckOmIOAz`qd@ege%RQ_dy1#^ z>RyYM&ka$MH44KN4JT^;P8_eXi#O<=GW|zRoBcX`fHJ;p zZ~S&819d~eOLrtC$Qd^;oEHLXeJexH{ca1RXmbt%C5pMT7;-~*9_Tn;nlxQp>egO&VYnD!pE>k zMS?H55XT|1nTwXwLllS1_O9o?wi{|5NTyw_AWu2##3GvZ0;-5|1p^zrJYjHj;X}kG zool;Ay+-pv<#IQ}Y|b@RA3q?t+c6;7g+Swi+PIj&j_Fk@?e6QVe=B{8XhDZ7c8URD zLs1zeGfJmU$ZfdJNq3q`5fHKz9=(#o|D>g1_+!|yR!Q}2h9V*Jqp4B%A)AKI_SW6e zifAm(kNZR_Oc7;JeWs{vLYhsrtLnf*rKZkKlSQ1doWZe7`_$H1bYJV1gUZyt%{Dc}^Lz8TZ-NJ1T*wUnCZ64833fD>_O@5lPNWoCQ5Kf!r|?WU@)T zUVondVQ&AN1(Jf4ca6!rd4p~O7*Z~s=j9qa|(iR__fmwN>$BFm=^vcgdm4)rR-F+~d z5JzFE0IpenMY)O@LkhD9%j&=<*B!rF)qK|_@EDI{U-ifD2^QA{=#XdGbt0VN!xohR zGbH1M&8RcaVd*Hz8x3endZ-UCwLOFT`YPAWqCb6O25^a%^wW4OHN(TiKHyAX^<~Ph zbjB-f!MOgoZES+-=B*V!${2X(t!>4j4YlepJqOw2ygB;#`=A{`u7^4^dnEPrhctj1 z)IwtZns`#ACHwFrY{^mP3>>8fD^6uIEu%LgC^`v+>+~@P6dE{CY86R; z#<%YcFd(_9&XGlI5W5lM!u86)7NE7+lUnHxMS0la;=*iam)s^e?{$&_T~&J=*~ycg zuh>T9wHb1};d-?pxlOQ;^N7aelrJC>m!IiC>ng5Q5tIPJ{Mckv%QI+?kU-yIP&dR1 z2nsB=|1@{?yHODOkUz1D2{FV=ig+s!w zwk_c+6@UKZ+#@{%69q2<*FhNiq|Efau^z(Rj&@jYX8^0D>v_g^@$hJ$;#pbie0z9% zll{>wCx#ywi_)N{1nrEmWG^VrDjiX158A9iXSX(hjp!o1ZYsvUH;px=45xew82*JMLzHx$C8pl3P)58F|ic^Cv} zsUtY}(z+FlAOQt?5YuQkZmO_|meU+qo5@r-sCR`%U zRU0NHa|z(gzI1BTY21Wk?r@ae>`Lkx)KD49D5@~m!<8Q?-npe)&oArqV)g{r!l1(> zr3BiQ5dX5`8%h{xcK#N$;0Y-tEYfnK@%mk81BkS}7QE9bsR}oLwT7@4x0n~cq@}f) zp9V(e1yO@i4wx1o#nGslw%8$JAi+_0jUYvl$@jWP9qZwW$bS3A)`d=C;`d(azvNR$R}uZfiEh!Dh~X1iVB=OtS3Yzvf+%hcedL@GM%ie9Fx!xFG`)mWyIe;} zb69GM`5Wmee1{U3-6#kfi7ht!929Xx_Ls|-CgzfQGL}WvzsF4rxS>FRk@`Vln}oXb z8eEHDC8v`o?QJ1BXVCZ+RN!_M-s3B**c@X383TF8{8q)4n^&&`bJ7}NM|!tCbe}Z^ zO4n|84#rf_(Y2V}dA`G$?Qxr;t|7Z8rfJID#C#TNsZU3+{j-rsW(Q|A0>yppYNsN1 z(;W!aOlt1+XJxiNgj@ar?7u5j>~P|D^|w{W8T!W$n*R%>{-eK46>|?nWh8G>ZIUrC z;<#TRxr0h`2@H5314@NR6ux|tiebpe#qsHY)$i`eLX9V{t*7kVkj35n=xncdX`U<( zQ?WHb?lpd^%O&?w$CbM2=gZZi?vK|S!?>g!w5+}rHif^#wTLfhlRF&|0ueGa*WKar z5kz!hlJ*S-RK5g5@iEtcU-7OaVdYw9>gZF#H=amK2(CZVBFJszqVs`C@~QF(a!n%^ znEL4}BzhN_>yOQG(x;`qD;Am{rZPmxo8JL%hs{&N!s+3X)@zrZG z!%ouh><<^pv<-?*U$ZRK7#)?FaV28z7wd`;x8G|n zA{dt(jnM*Jd?}%Rvoe?vTV>N}88X+_N_m3>OiR)(O+EztYP+Cw3Uqg;P8TY53%L(f z|E>@d?&cWGu(uvH>eCLJ4>&}7XXlbSZYq3>>ZkJ3@k=0mTrhr)C{mgv5wN}v#|4k)KtRD_8o6Hf?XZ?xwf4WXS3hw^V@lc zo=N^jc(fv-$ra8(?1fU@FtAuCjY~oaKIumQWym{jNGp7x_VrOK~!xmXVtC#MNdZk#e@fMSDNBK5)2Q7a63eGkR;tVX3XGk;=fVDtxno0Cb>r{ftfyI)y^W-Px-d@n3 z>MC^fY_a3^a(R^D;^&B>zA|r}=JjXIT}KR3W$UETa>EanLYhuC2Wz8*SI5Hd-nV4g z&no5kZni7;3ilNX`WXtPRqIGv*g6dZMV>;|L~^D>EkJceI!`M8NPlXUbkmhwj=&!; zmqNZqVN|JdiDhX$zvwIU{YbWh2D1_780^$E{Weh}H>z$t%(SX{3}K@!2u$OxS+^)x z4DD`;;|sOle(N-AljwQUwDIFXBCZipTYF0R?1u79Tx%@E4W?C9u|M5gFU+JgKDbfK z>8_k{+LMEYJX!bM7hhGDo63&OF)CE|8M>6Nk-L=X{tP2?g&`t5()OaQ;}@X}=oRS~ zC)zSfDkWzbdCH2DFF#i3#$R7KWaawm$z&DQq>fAfj%!`$Rg<;%>>9efVED7H)@=H4 z5o3}j+;7N^&MUXidH@@l9(gb=e9#`>B}#a_t(fI3&U0&?2ro$5c9MRF?~Z{N3RXE2 zwW$dR$PSq7zuy|gwFg6gwS_gB_NT&g?quV8LdfpTEu=K#XRHjhy2|Rp457+7IclqI zp|eX^H0;x3feV!Q0oUH|D)fT{SWM`gO)UNgJA&Ub@T`DknE$zj4xeMw6N$#B$XRtv z@^(`U#xv-uABl|jbHy?ZWHNuuUBW|(fZ3fE=B)^Or5o}bAv3TZ7y5?H`J zO^(+^h6|{{Lk4MI#@z-YQY)BpZ~VhDAQTfXN$MjCyBlxRO?)iEWMp0Z=QpRf;j(!9 zGLOzhF0PF(@}rWTD+Im3`~zca2eXYEIcXE_Bx?x$^=89ZYJ2qS-?#pZ{;wvQ??YPd zHx>f#KeqmVvf^UKR^Ld7|Ji|1sH7?TFFS8#1$8){pO*4`nDI}_kfmLaFfuhJduSrA z6e$_soY1NTdfn(*;`#e zDZLU1;u-OhEP+UqlIEv)MT#wdQjvn}YO#~;)ccX4RhjqEm`Jq%esl(UXHdVJTGDA4 zppdezsk59~n`4>tdu~_`DWfdI|4;!6_gq;vf}pgO7bNyuR{Rh}3T9Z&Tx9_M`UG>c zQ{LsZ%0WV#%^-rNy*C3N5~XB*q8-Px(k{KtE$yQ~qFI*7p=NXVo?at&LNP9GXb>DQ zs4Q-9R;H5sqZ4U;$SMHU7GwB=Vj^Z6^*k(dJm*)zo!h@RogPR~{;8#@t4-+>Z#ISd zsmA&T7R@}Za;y5zOElV7c&ezU{=0~Nl|x8zutE6~UUN^y5L|cFWD6L|N(vZ1h$nGV zS81+HEOW?x)%QTrol@YNv#PEgq*={5Fma+OevXt+)Du8|(bAL8RRbrf0f=^YUIjAa zU*|!axyK#^Pg9ZMtzVmh(m8DOWrpzqr^kKz*c&NJx%vKe3=A$l`a*&_!@q3K)CRDj zU7V=&Q}t_{WaWUaii>w}kd0_9=2nr2Y4T;JRvvB4+%DUMB{2JANt!Vrpz^{(h3M{nXHIpc61j*1TZo%Zx!ysO#d%=%C?i?;ukg>l@=q z0$a|ksDx{^0kZIIx3${g&Okv~X_iQ;lkv;AE7aPcjOIq!pC}p$BAq2(WNgv)Tm)=@J+oNV1dt&g)yc=Ir6)&_gw5fn#j< z-!oOn2|kw7q-y5%884iGjIB~u_FxxF)zN2|)s^S|rF*pfMW10Y7=6mn`L}S6eQ7z^ zOXNj#PjpT0nnF2lA+ISYk3fqZ?Ir@rD`(LDyEaM#Ic{uzhA|!YYs@KT9SJ|Pi!6r) zgF0$@#ueh6!xacl*aqI)r)Z>OdSQO4!ZYnG3BxdfrW*;31Fm>1wg~w6B(^_)lZm$h zGv9|+CPo!kJ-i{Q%j;WRDfF%WDL2UWgth(yrUx6cQwd@2(y5!<1*79eP@r^TwrMOG zB(SMhK*o~OoTi@qyT}|7Y{c%tG8%Q8P1+5%VwMgDs2X1I>90YC^j2aG0Ror;oZMRi z{hS7$S#&&lP`LE|vJe|3`13P`PN1g9`B>z3GWHWYTr_aW_qa{7X9Vho5TuKl1H{U@ zK^;Sh#(Dx?=>teIzz==;Mp<`%XAW@wH-BdT z^Zp9b6>GuiGy0LfK0nrKL}$%XQ>`g5Rt4b~<~{+szGf1?NK=H--g3hBg9Smg`bF3V zvKrSa(t1Jz-&z5FNW(WL|I(j?k1wg#-aKAFY_^CffFz(Gml~W&oP0m9l>g;w{9KH( z(kiw(lzJkgHQI6NyAJe=T~*ip=mzlzO-v@Jh244&BAfm;#Jd+*^Nu+5mmc1qJ&sTL zPTuN0$g+=Y3HqSo`8{)JU8=$0>xd}YK)E2=^2_RyEFYRd>Sxr4=dviDg`0}gm$azr z@+U`*uH4P3vu9T5Ehf3ZJ&Z2n@49%p>6j!#AD zPg>>+W9Ti`=|8ASW3Ye@gXu8Uh5Ij+Xx!8Pn)g#8W<%`wT&MYd4W40af_30d@gA!7-aZ^fQks4r6dFlka`n? zNZh0VD_0lca3G6`Aqwg_7b&O|aifo!R94`%(8NmYuL5_iwo!t7J_53dQ|1!I)YpPQ zAJtQ_YrVvql$;CqYWEXsr3Ad%BR|(aft{!>S8>OTg3>|RSXbsPi7Q<%87SG!j5R}jbDLLm)iQ@UfI~XwlG2-U^H$Pshw;*(hv<>g*XzeEo5P#Na9pf zk(K~)nY9s{Ve_I3tMcpSUM`G=*}%(d1D(U2P}X1qYH`w)%>zkPTnkY#p20zdO47BW zER&_H(Z(HdCm{@aSxKd$dP<>f^wD^>mWOh|&^S1b$>q$P`d&{37{*B$9{~^}!E3~A zY&Fp`2BNqK%^j8`IhQK*^#~wHtOsNDpSTPrIlzE{c$^xonajMZux3GAkzRar-G||) zE}>>|l+D%c969*L_l`vYWM4t059_QgIT9xe6+>M;7_hsYO*Se1Qf~Omn24L!F$Y3~ z*$`L9ny^0XTjzj;u~ezL0+OmSFEH-=^{C<{$wG8(;5&>9?x|!0p6$Ep12{#GN<%oW za3zKqbo_bK$8VG!kQw8n^MYh!tt$LgJjq16O(3O5U=GsUgIjxgCXsTx`w8qP7Z*?P z%@uWdC`d>taF^hzc=u-Smz`j|Xwt;nx2v|^Q5Gj$C&TbQ&t**dR&IEt?u z3kzS>=Sfjp3#P`T`Y6xzRQRBFO&18nOF(DO8D;66vtA_6!OygFD&(_rCxmLIBZh@j zkf5d?Q81IZ*9ykJ6a^UjhIY0rCIq&~)Yt`O2=G2R1DqPJN?43zh5o|GE20qEj3TI8 zNF-(mf1xsNCKxMB*x8h@-286Y7$BW5IH>DONK`N!U8xIO+{}^a!jkOLu&83R=SA95 zwY@yq90i%`y|Ch0O3)xSTWF^_J3h&obwk9_?}W!xvC+@uR!r4)94H2^P5{~SWSRjFo`_9%#z0D zUchv5G+^ZR-x~fcmPFaT`W$=(X|t) zpK0!-$Uhy6eXL+H|(KK z!zj99?QSjAQ6#3JjVRl$yG?XfiN`hyg!b}!Npj{e(v`vnXkIjETFqFL75%mHpr=l5 z5)T@bo0+XLyPR=6;gW#t+*Y1%6pm_1ao3PzX{}TEJw9jT=*Ay7=u8-UVSl;ch12s^ zncyh3QX~!aF4-xIGQ-*Y;C$Y4I0~)J$qBJH1_QFoTJ(r53+r$-LvVdeHEX1aA`$0a z6+s+~g=Qm9Vk=+m=H*;yWlc+l9G*O`$pwl0_AniLx&PSL7xy#+gKUZjaS=f+zp*)4!9mVCfiT+$%lz9iPLWzuf zxtr@Sx_%gyG{Njwi=vb>{35r95lZCsyxdns-0E||a|gbq3q;TwF`c&2^40cCoBYx7 zyZr@5AGXSY^gCvfjRCJsVn}xLGQ90!78a%46a`|XnC6YCx+aq1N?WDx6Edi(quE|= zEdx-$mwz@@P~#Qf!IIti?(;M}SA?inLd%d_MtckfsuI{D3PfFnpxuTECs&%~8NeQ! z{tfA^>5g0Y;dr1=vmX{EA zOvnqtA@@RZOmzJl)4!Ju{af@}ysV5ZOoBfgK37DCUu3$d;6*{}>A6`uFj1f9mVy#> zxhX0;rz`L)`*pcBgkV8L>CQk@CvU&d;xE0@jE`v}+Ke&$7d_S5gf)h!9Cz5kOh}cC z7l?921Gb)~5G;>xT0UDlFm>T@O&Fqxy-dM9m<2M|I|~=iaI5DxPCjT&%StH4w+<{F zeI_{%Cey43B_o=qqTks{7}j?#oM?}8VujuBR3zx45_m83{EKb}0E=&*K=(qhb8jqg z!Y*(u{Og3c@7UPy2pKB|F0Kv?s1BIClwSjbj_l~p4}i}=)fHTj8F*>j!rx{u=d7o; z>7Py%q)8OcxlgiA-n>K8is@q|_=*!Ww7?e40&NGIJY9!m&1fd*KR2|XCGN`z>m@;R zPY%7y0pcY=0ruWBZz`xOP66gO??}s#z(kcS81aF%vx(P_W}q3eXx;aaVL4sJ-tzNz zYZ^p4%Os!z(HI2AXD;cYQyRd>JL#|9U6n8+KlxzR?=`l6b8an78p30xh% zMgAGsspk8Palx#i+42V34K6?74BCz8d*Cr7**!DGHHg+V`}%wY1J74Pk!OI-8yNxD zWIdXQDWgu_Gg|Ho>iVZHLK3bh>lXJj)yBs(5zUypR+1B}5-DNn-i`es#+%%GhN2Db zC~z{n7i;a5T(a^Kx#S5+VWZCp7?sVPmIvmOzKfS}{Ay=)<)~#R?uA6vZOe$R6K;fi zu~X&iQq}Itk7)W|(R%np&mU}v-#+?)J*Efn zp?Ng>=Yr-N$kbvR;-U}0iVEgUSlb)7>HQiFoTh>p%?-=tIM(5K`h}I~xll-va)1PF zFSE>_-%RU8b|Mi8xdWd(fH=^g`O1}vB5S46`6-tjL}f7b{@<`}77`zQ1zfzkADMlV zceq@gRght4?nNaun?adWO1E6BBB~M|UObSE%B}1rXLKm;rG=klG)Q?A)7LReheTiy z9@rMD2>9!LQYd<#b$2TT>+>c@BomxIojiGKTD*RvA-q^tV83BNjGU^y?sv8c%`Ul- z5(F`1KW{p!!nhLpe1j053^Og#5=UcRkz{ua-2x$tDYc#{$s?Xg9R?zyB(}?;5>3&? zbPb*Eo#x|K@?Eo1chaFIZ0VVF^-4O$*1dwI?z9j6ujos34fwiyez>FOKmW`kc;gLM zUAYtV#O2MqXVB@l99TK4gR}h$@gCr|ZCZ!In~c&_JuLp#0t+9k9@mF0)G=vz*#KWH zza`Opfw+9wATQf>5$T!k=l@zCHt}}BvKCIgAuK_LZ>np=*dFci0#ZN9f7yn~JD6=} z^XJVS+ed=jOjItxD~h;Zt5-ccuD#5{QKco8vgEmK{=i1@Fw(KkMszWl$y06qo3f>- zHbyrmBj94z8L8`Qu1gchDdpCT$;#Sl)yS}_?6Yg|%j@JrO-Z-0E%05k!3*Q(mOsl( z$EnYB{yNBGf{R>lBO#L>_`$*MUD+l?)-85{f%1>4eHobBN@oR_EzW7H@}o|1Z2X5M z(qXtSu%i+w=bkYrsB~)_#Z~%!Uu`cCu7a{^J^$*NC07F|OH2$|q4%zkt$5k$uPhp4CWFR`Gmqn{PK@Ojzw$S2m&t+ z8$oB}htdo}(MsYKDiRqgk{tlxv{($u3(Cah#cnOyeJ!FWdI`7oq1)){5l1QaIot6|_1K>F}ZG!CWmMWVkk>nKYm&`ok@dZ);Of5wA#RZ>tf6Nb&Enn=xg z4aH`?+AB?V;D6=_p~my>-EWx>zhu~d+9x5@W4VGxdb{9>PNqKm?Ku$zy(N6H7ikRi zKe+ezj|VNkf`i$sUiWZ7^(`7LxHbDWz~#5he^T}sD^fs(uG&>Qi|kh4usZWAX9d$2 zb`Sbz?9knAtx$RnsrybtQxFJs4apn*4-5ha=g z+w{GDgv6B!gz5Q>_P9ToMW7ijvjkn{FYEYev$T2=B4qrXG+jZj3ROSmQlj^%id%^I zWM1xn(rNA4lz}xg zU5+)Z$@r6tWK)5uDYU3uSk9$~nINj!R9i(!O${h4tT~?n_zpSaB}`XppFl#KWmzvp zRau>>skb|8lE-{@=UcwOlM;SdlXUaKSOV}0KXK06|c zUI&b9#)ImrO_i=vtj68r7+2g-!=H|k(Av;Ky~3w)X!#w#JwgUolC*fvA^BHFr=yaO`-R4?f;*bRP8rt>!1Remq` z^+DzM(%*IHKpFB0x1x{SN_S}_c6KH!p$SyK zuxZ*}QMXObB>v=wD2g>!~s0}`H^ zpll$rX?$9t7N*TKN4Kj78uZ*8ai<>f#V1uBXDfyAF`XT`l7wZeG%_U9-X7>RdSwZ= z0>r&UsJ65*mDV_z_+8js>d8K~d9FIH-nzZ%`Mrr##Yw@MSXZZzmQqfA?gz~-Mr2}J ziMOouE=Cq8OOxH|=(;bT$iVJz_zB=;a`YsflQT_9Z9J~;-1AAMf0`*ks7#I&Z_KkL zN{}AGK*3voAir|fo6F|vn}qFQikV$9w9DDu>ZDn$#?dUdgOlO6^W!(*Pf%kbPuiZl z|7MVGNBaIzVPMLnv+Aaz_O8wGSBM5mwtd(&jP2*18bhB()A?aYDx^W@o}YSSbW?GF zA?)R~*`%?94WJYKHOID%inG?0d%pIHm^C?ZtF&O%Bztwkqm}EWwdDmj822j<%DkcJ z7U{tv2{$@c8uQbP%gkPj$*A~i9MTh~+(;~juvaoGV|A0C#;6R}Ofm%0*e1f6mGS5&n4_HES#5p3 zEY>+28aG8Zs%&suw7<`_15#~+?|9qGK;9nu3~w|O8v>^cKA^)#0we+EKM(ip}2g=wG(toG$# zbs;ZN&RaQaS8pF&Xf^Law|>)n#U?6y4d2J>l`6x~ExHO(N;n{|h9;O>s#pJ*mj+tI z=Avkp=}_0!s6LPAEh|8O-!I(FD?qBZKtPi8SYpE7V83*vH(&EObrmMTb9TT!F-l6E zpN|`5@N)|KKV;Ocs9ChO^(as(U29yJ(fw zRwcnlYLD9Nlc_vxGF0lKM`LIPy9p-cEG$ZC>#zA|hT1H{p)iDk;k5}Hwr287R9$cU zqeLA72~?Ad7YEuefCMtNS7!qd+?}^hps$|6$ao_*U;u=%BvO*W^1Qt z&Z*Tb>NTuOof`+`u#;BHn$S-z+#mOU%*0D_p!=O3LXH(77$&6g)ZStt3?qFG_q%?e zE3)>xxR?^mq@J5yt<(~;C-(90vR2Ry^D0p=Goq6>rH~Gt36IpaOQ8&DS9~rwA1ayn zA-E%Xa)D&_{n@NpKtY3HsG?1^Uvg!iwA?L7D-fazxt~##a3)uoJgl0d0|b%s;pq}J zT|^!!?J-ScEURSC9aNM;ek!ClGkf)jKirZ8rkhCBXi4iEoNSFy5ni{wlq!2Xo7Rs$Hv+sEl3A(>B5C5|3!Jb@kTXZWekgK*jKegCh z!`CY|6RSnPiZiI9Q@U5U^tK=~I&zRnN>^pg(%8(mf1Zw1Mvzzx|{Hp zrk1j`?Bx9Lj>KjnHwSPN97ky|3mh|rdmpOLWyWq0e}yp4IM)-m(S*1buy^T&Ri^LN zBhhdqK}L?a_E&%zC`wDda;@-Ex&Lk zi8opqCk163zpalm%&z#^nRC*?!iJA3AdMoGZA25Gn*U z;iXt;{;d0PJ>Qj%j?4G&M^toAfl&?T>Mq6&2W8#}Ig0&PQ0&}IGenjRg z0sO+8epFFnuEre{M#}WzH+};N!1uejUJfVBF%4fX_L*m>)pexm1nMBnA;-NZZ!htC z|6G`UC;BxEhb4TFw+HqR-5UbD>(omxls2pk{cBl9wcRTux`=QMXZ0B|dnEGq$SPYHS8r1Qt zPY|>dkHBXRRK*!6j2tnSC(#FIo1VEczIp`x&K(FmdcjkV*2CnPxP|&3T(CJ_4E4?B zx%xUq_TdEV=N8>9P~v)r?NTq-Il~eQQxgE#&PP1g;#5c+9cD=713lS&79g)9X|s!( zB20?wH3U`F+9cyxqLAoqbZyQ^s4rF#ySwrgnEk}0BpG^yWhd9+hZuM7^7A~;E?lX8F8nP*m8p=IPS}Isu2bGyPjN(2U;I?t(ofV6wzEwyx(R9DtKZQCgf6Y4<|5_5N zLDwYfRU$Q$?)2u(yF%lV(Ye`#<_{}8Bm^UFpQxvaP7Yz%0_or=3l%~;Z_vY(lGAUN z#cG2A^_DW7h_n&9P)bapWN8bB-l?N zb)N(%+nPd;!VTNRt6O7c!_DY+x)euAWWp_Tuyx|P0TSoa@-j} zmWzn@Q)3Y+GkU`mnf*_|%jztw_O z?U21gtTWDr$~CSd_a7-Py<*xJTP*Q2WgWuA?LnYs;#8yba5fYGR&;-P3UU$X%7_Be zB&=Qieeh zz_&Aq@K}Tw=evQPH%ax{Kw-a24ObblRnLI<_&vC0s(mCVd_~wiI+9fD(wTKmj=zjC zc(aGS&_`}PK&io6ylar|Jh`HG4?Eo|6mX#PzV@v)!`=L>h{4VG)O261{@WzlOadH7 zd4e16W$T6%*(FEm#R%t;OD@!$Xe42l?dE2T*8{EkTXA@rJ**1+i_CbhnVn+1De(hYznRi@Ce*PD4eLbzTh~x;dZ_l`vPu;)2{uWo*z@*5@|b zhrtYawqRwY$hZCYji9|)jLo`-vB(Bx= zAhy({rYy83rQC#?F*;-&Hdp>LRgg6?aWn?}tHzAz|MUF6 zh<9WvTK<>gkb7?FyA!(v7RL`vg-TeRc?gZ3q$o608U$FOfB1wUFmMCAuATU{nkhEM zY!~)g6n)AncA6~cS^sj~<8agQDr4j2=I+ephwM!OFr-U3S&qdxH{>1hzTK9{xvtc4wND*HZp`q}3A*%ZbrLSk@ik2DT_toU?K@w_NqRzDZOwswX5C!#xL^n0z z)IhNw&B}v;@6AF&>yD*McW!@G^IW*m?ulHFH)arp#WO<`GGlhLvoR7j`_K00C`*V8 z$H9g0kR%Y2MGyU5kg|(Gob+xO!OQ~F0&R;{4!br68Kryr>rC#G%V7?{zIxo8(RxUN z6hNr1mA{S|eT1*p_0K{p)|o~DtWpA2EQ=4>2aZ#1v$E>rd%*_mAfW{EUoT^QalPkE zlTH|J*3M$S&L%p%vD4n5+r%;Py;D6J%>)(zt-}K5)G@DvOm!i*8yPM4{o#zS#!hCk z^|GB6l6)O~KwQr~Z+DD%H^ue&7Y`vRiy}uCluO3KBo3{)?=IEji551wX->{7Yt(3V zPT=sbyJSRpJWa;#*JojAkRckqKB}!G5MRo_dh;Pe!>ks7y)1kg`N zA+JNU)Yo(Ry6m|D2J65nM74Y`**qr{YrTNK*eG=o4NJcBjMhS@sUqs@Il6cb@o#8L z2`5~W%A@{L*G#d!vC-j z{r^_&Khv*BCGCF}XLaEjfm}n*uR@I)>c0fPD6;X1Nn$8q(GWJ*X#~}mFPYX#dvT5P zLd6I|zbk5uJL)&|$P=y|Ob^F19H%<#>wP{yU!nDI&c6L)P)JVq1`_=ZQD{0u;HmPx zN@MECIuZv{Nkwy&YO}&t| zID;-vE3c;U@21r`l&p<=uS@Z{xV*#VLlL@!Kviv%5akL`Q@u5)P~>69eGL7Vi#{A^ zVtYPKFXQ1o+_XlBCteNYI?u0gbwV0q)u;)Urv&<} zEtvg>pa#!Y(^hE%QLh6{ViLl%VOeAKpEDgA!>1Y9boXL26gvHy4EBnwkAm@m%1QK723`xGSaX%iI?5gqX$7QH3us^)$)#DqyhRR;qdOD8+CY+vL&ed z(D3)=HnZuHr9rbL@G{axY(nKu1Mtdb^r`Hk`<&8*XhkJ+U{ZL#;#_&^9BP)|StUL? zMF%Q4Oz=UTST(Io>MTNy4nC`Frae<3Vdovr?4BrL=LY0p+a$8#ULFrL|;Gueb;97ysYW^WfCADYQ z!?YsWn)hjA9w#k-o!nM|a@yUn(s@J8IMrm=`AUtQH3>X@fMKk&qf9IP*9yGaXN;yT z0ykZ%{+8-uQC9?f>q_{n5Kh^+$q1i*g=#MP?}*DJ55CjRx2Z zoLcIqE^yQ5S26fFGUBM(3gL?l+iWcCjam*Pzi{c?ZM5J2pzIyIEbo?W!K$=vR@%00 z+qP|6J8j#xZQIVQRHf~#m#5Etqx*dC_UPMV?EMFQ8jQ z`H)Se{43=jnCj9dkmeQ#xZ7eFHVN-l4%64=3}S`NC5{|Up4PH09_kxfeJv6k zb8jJ&zc|aYsa>9Dey`Y-4*eAeuIVnNMKg@9sb&m=NVW?Mv+{jTL9l<^{ly;B|G_k$ z$6h}o{2j%4q5seQfdBj^{tbx3s#;3eCK$dw#KuDEGTKn;Rs2@w$Z)U+^gV!Z_QZ`@`a@`xQ+h8J(1+fuwlD*8OZP$b-EY301YkLFXPJ5 zs=tczH;p3LZ=xgqB3k81m3jdX$c%7?9y;c;NqOgH!aQ1AsBNy^m5^F`ERzg33!>+4 z5KDjNl*M&#IWUbbqqJ}(ZBbl1Zqxj3@zQ35;j9tuXtVbz?JUD2xg(8RH4#!4 zhYYc9BWuMOF&wsl%HHqqJ@!1GaDrv3)e?NR&it50ul5}LU6Z@1GW%$K6(+=DDR>VL z)BCaeMExka&Zxm*X!+vZSE$y`VfMPfxuc3dnrP3go+# z{$xh=EklV#mUs~qg={6>N^tf}-#mKd(j8Rc1Bs+)_4=KS8+4EefRp_--~(?4@TD2Pt7H1B^GVJ4XSMxeibV(K_Hf) zhjML`G41g|`WspMZTA zP0SfXZjjlKJ@JQCqp@;0@V(B2)AIW2l=T;b6b?I~whO|NqG)|1Shm@^ErCu~D%#HgQxo zadQ6Oi%KE?Tfg^3tvXPrO|8FK;0CN;{f{mr21L5`(Vg2uhopM&M4XbTA0W z`%2~#v{ptF66>QOIoEnZz>{X(_ zzi^(q?_Ywm^1#BM|5(Ib$ygk9`OZLLzB5qP|0V+e^Q_;&*U94h__jtK(k8AZ*8kO# zRkD#=lt=NUyBJAMTePaFkv8;lEL#d_laCFtl8`M71j-BL92D2S!u(IJ2 zxc6(<+_uw~*ILBm-Tx+v1Mow4Mpkp#W z%$!thwq}~@?p3(<8MT8nfnMK=#wsHbWO~rRb6eGlrV-@S;`Ny~KA8{;U9M_Ax#05H z-RJV)g5vH`8ru%0GGRH7={*C<$veF%+p=gQbc+Zoq4L~{Lc?586IY+KQFLiKpMQYd z+}xL4f+|Z)K!xm9f94vdGP9iQS|-=kdnb%wxG<$E>#WR?=dsT>mXuFxqhX~tIv~-t zaG$34(IG`HBu{e>cR>Ecpl2YL)=VRSp?}E14BvhP%q?SJqgiyF9CJ4@JFnix5MlR|uFnE#bcB-Bd+3m}A= z8BL{QJR$)TLa?H3v_ZKdDgc28_zzQ1DB92pH5k~*xtay39aDItL*w^_A+gO1#_-o6 zL>YwQf92YRdi-_w0rsad2dzWrC-Nr^0`?vmLVO~Tc@sqv6{Iwy_xAxaAX`+`6wLn3 zla+5Me{C6EQW)H9j#*UNU7iM66O4DC23R_DwIc+Kdt^k*JDi7)w#SO&KSE3cYjRSQ zw1D#oUWF-1m-(=&6EL#BjdVA*%SKMY9Q%ddlW-(NlBLULF_cBPH9SZVMC1>Q#f#m9 zFRRBi=aI}#`rT{SavnyPRt={!L5eDROsW3d_ck!FZ;xR{l2uTn6Z|x-CjyH`)l?_e zx9=j&=pVM5RW|6$`)xJI=TXib#%TTXMK>fvtreYe$>)-~R0qP_?#*J6i?TIL%1pi7 z_a86ZU&)6K)8AL({!RP#tzhuO+`!S%gYMsyZ~q;Z{!8iQ|Go^>ZO2VvlrL4d_(O<` z4rUV24N?~+E{V#fqgwVNzLNan8F&>WbFM)EwH;FP-2_p!pwD{gUIO|akgfkX3^#w6 znIStB^r#V8{iO81;0BM`tXof`*{sIdk>0!CkCz{@jN(STK@3yp?73vy&DjXgB~lZcHfKrPwLbXYT9bG|aLBj0*-Myv8 zb2XMiIii@gUHapi&*c{aikr(Yg#sSx-du$x?A6m}ZvbIbVLllP6h_xsp|W#T-&R1# zOb-|I=QZYfQXb6JDP&To7td6zK51~eDn^TAi`5=H8HaZ^X1^w2QGMk$B*3e!Hkb(} zHPo7s@Q)1iVEa$~&=55quF&#;v++GC?z}1#sZX)unlj2k;oI;XsY`pP^RRxhr`e-B z{BOq~#yNW%_*NBIbn@VL$hoo&4vR;q5H1fNsSi|bgt<|1_QM-jX%s;Rp zQ5$f*U7G`&gWG=QT!E8KB8^1Ag+du-p>}fNL>KGjq+`OwZ;sI)#x4iRfT`S?n)B2-Vy_RFvuBa3J z`h}H;1V@;Eh#eXwWRtvqaqF5^kb_wm>mAzj#L4VZ8}~peK^hGhVKem$d18`b?h?74 zS8orb?v8Hx>BufY4Ch!N#DzZj@Stex`cnhQJD;`zq)qTJ z9E$9bOOfxwbAWXw;}A=AqBzG749USa8)UdG}`eq%^oT}H#@VptG49b zIBb`GWAfexU9M*^@%rXB7x!!MiQq+k+@dlav6hD1%R=Ne`srwW5f}9toK(kPf^Fk( zesC{}Ti+m28jiIdYQAtCIpT75ab-jjO*zE!hTNbxuB?(D3(6d1Wq~{S4tFcWrb!;O z(Y19UVQE80Lz}1r8R~t@?ylKithIk({}Zks;g*0KzVQIz8?ygN1^M6Mf|82ze=6Mp zx}gU65rWN(q@|VX>RRUnX}U&c6cIrB3EzI0HIy;%*}Ea$S)%d!!;_Gz8Ya@XnysZ@ zKl*b2q@H)1cbF$xEAL`3;Lg6JRV-I5@)(Mk3qcephb~x+dn!*-O+;d+WoBLf+mCjevI?bqhE^IlSL0=Y67{9J@A;%CFN^hyp1riwI zIY!!~uQnn0-iiCN{HS22?H=>|kIF~H&-t*%@AFfBpZ}lr>i_oq|Cw|Cr!8bU|7DA; zM~6~|z>qaf^Ze-5@aW=S5P>2Oz%y+xzjXJmQ0}bIzDe~+NK<|~L~Uc5x*q?_7Si)6 z=Hg8dge_G($Y6!HZ$;J9ODsJ01zXdVOU^*Ts=9OvC= z5Rpv%5<*FOmg?nfL{C4F29g=9(IZ@AaPyUv%Y`!wH!kRKzU5iuN`E0xl80IbQ(AIV z}`~G81neDgxgZ+1qc>LyVBKiLg68{ycQ!X7BMNvX`o1;y*3WB}o zG zH=>4BW2J63ml-qI-C^^>`^@4!ZQo%4(qhi}+)hDl)`|-OW;3|GRhSdXIcUG_;ysDJ z#qdt^<-dv5V~}S4Jigp{@!a8rQ!pA$)3*~lJm!7QKpb7l4a4!vVALj5X@sZ0yJh`|r|?X@h*gP<7`BNz z%nqa*cL0-%)k9RX#$v4%b~eHF_f&YzkrJV|NSPa~S!&ne@QTl2fI|IvRa1yqdfB+S zhaLuT0+--HjC8*8?QR<;gy4fAg2Xj9Q`+Xn6T#SV47Fr+6YimLT@>Q>_IF{Ono#AB zt=(*cN9kwlAHV!P+GAyBqCd`x={A5!^z^Q+@a1c@zHAAzK>J{2+K_60a@I=NJq6?hk(F8qrk8Smjyl z376y{?JA2cr`pxyP8VP~cjbr)?1>F`(Y1he(t# zP7-wZ+uWQzVp5|ZGxX9VRr#GH5t=4tq|*`Y;B@${$}m(CQU}*Ep^gUwLaCOX-OGk) zvb^!k%OAVt zL@cf`IWYK2e!A-qUV<`Ex$>DRF&feem0>~bwergP(qhndgoQw`sVBLC#d<`KKxZ(% zzLSt4Ne+>L;w!@Je1_vYpi}-PPe*tv4^e4Rr`W=BVGk!`)bdV(`}pu>Rg`zH_E&sk zA}qWBBnokZT;?g!i`?++__v9B=&1T7R1uo6M!2#N`Px4r%IjKvgal7VB^`vH8}glCd*3u_hL>v#~dDG*Pzuw*yDZ_R9|Nqhx^>7lYQLBVW0ffp0^T zvFM6Ri!&!Q?e{FDLfN{Fx3Ju&3=k9ObFIi5B)WyH_5yZ%uU4h#bXfH1I~ zsBX%_9FTBqji*y2FNeOZ+pyQw%bw0A(?QoI1zZc&jHlr1GV{BtR|Rd0^SJC zMyUdt1X5rmCK?^t%qpY%z7f#A5ze~Jmx(Wkn3)cxrz31oVI9q;6&b!oT33vId_zJq zvk0Pns?9=Bi;Bpkg`JCxcuXLRpfWDefy4Cg6+OI34W0a!bxsyMCyaF{LZuZJZ}pW| z>2I8f>Z`FX%OqH8q3)WjZjNX$Vdss%Z{r$Pp+?DuoCI7T2OV9!ij~{6=Jz|swC#yS z6%|pMNNm*KByY{-TG`lt32~9x+LpKUKkrD5Ph*3yy{72HLJl%+VImcjD_gop^_!iP zjdp#F3(gk|1iz5GJO0vSv{Y(b_n^w0I^Qs$Y?ZGIA1^fjm@MXt9ahGROi^oEWP~M zBv~F`Ojo+WtC@Ha>__%4-(xj3M#$tbXKa9SHixP?1TLV}mRa72olTWn#NDJ_g9&n} zONm#igQt|TVwwB16HtR#ZQiZaPw$y7pl@CiNKli_Hrz9S%>yqrv`_}&zFR1qp- z*1(j}Go}o1T%>YC6Db-LKIyikvQ-GB{S$3)zAo8pPP|D~ahx;~qnfBGGf)_*e0F~`;uDPTvxo`A7fnIb!e-C2 zB^X{pRk8&`NLv_h6qatL^+t(UpV&1tbdBS8HWP?kzc@@h2Cs-wRqJ0j@=}ob%~YO{ zhc2Lbpa&wSY%g67HN=K%%Ff6zHUse%XQY^k(X*VB)sT9mg zw4HJ5M!{>y&J4*lP_T$8LRptakw*-ZaCKwS)C|y=PpF@n&GoZc8QRm5HKe@#rG#pU zhH^?ZkPJkMWv`xb#2r9gU2YPimtejEn@mmj>u-3`C+4LSdaqG-xdJJ^!M|%8j&>>Z z<>tp_Est&bgy;xfxdxr8Si&`?Jj5M59Xo&2P7U{W>mZZ+dmxgB&&EAukp+X9TV1;c z%rF+~%|CrK^FtyIlupgtM1$t{1o1M^`@Xs4rS^2x%-{Rm#8$$=6rTK}_VSaYT~Osa z5U_t25*h!8822w4PG<`{+kb;wvVxQpkOInAOiYZC60hgFkphY_=}d{vyeI^amp@@s zR}4BX?sXqKShd$jE*etPDfmNSfm3Ng2u&pLiBHeUnen_j3TovfD`F zm<6a%d5!n}M7;8R`^>d(?EUe&DLtf)#~+KA1-faL?iN_+bJhiV8)N;9wO`7A2ZhJ} za{p~$inA3KPkDsD`1lFF=^ciGPZkik6WAT78%$HLKGu+Pc$&eLVC|G_CV!+KDO}7c zaz}yJtbvXR;S3&hWiStnc8YkPwflxM2n0;@#0)!}Ln>K^Vw5EEd)-nHxKg{B_a8~N zQW_pa%y*I<2mXIjF8=Gq@!v)4e}$j_x*!!Dr|)c=hiW{I5-wX5-0A6*fyV^ zzyb?GkHOV{qZrml5l4nFu2?yRYccgJ^N9l9xr8Cz_4J_}{)4>axkdycCY=F(!|VBK z+i7N}J3+Vi^&J0)-hl-gSdD4i2n$}%!9M4x?Wy|Iffe=HOX#H|8k>XEs3h8`{c@j8 ziyO?AiC1jcyT(RmMIm)ubZeRj^dEKmL%JBJ z?);6CjL7ugG?>w<-lfZqT0r;7He`{$6fA=>t8~p94&^g9vEHTotlPh89J&9xa{Vkt z@9v9A1+mpSDjdQOQ*|fItCpA6Z#w|J((G*C7j-ckbbD1#I_7gAj6IQ%k80&ntgarH zdE$4@njF~9-H4IP9IY1Wz_;ag!oC&Ur$A-)?oAm-ss~NDbfke1FjG=w=MCAA+IBA!VrGn5$>`e`LEiW@6EwO8LK zer+J_9hDK&5pm65?Nrbz*}9R_dnO7*f6L>WN2OzGwk-^qo0&GqtfkLs z&xMi)iuwr?SqUyKF55ba&L{B`T$_=iDd!$0)z`tv7eJ2&SQ)E4!-S83-mro#tVKE3 zSc!!XRPKU%imA+mbUbdWp5U6M= zkcP;Oa6}3Kc><Eu}s-P!c?b(k7kpKo#tAwUxIoM<;AX(-#PuC zQxphXKVg4;Z`8Sxe|OFQhwv-w?DS1~Y;Eyx5dL>L;oqJ+jCJt_BdwOYI%+zkk%s;kk7EfeFXqB208F51KC8?EE3>f$bUr(VW@-{M z0)I-?kbwk&jJc$c6a@(+0RmeRGbj=eSXIvb&Z4;}R(nIjQ+I`wyV}=G zyHe@Tsv`K#8y)zKnY(JDGnl-FCo&L_yaTDtOM3C?QSr(C?rvvNgC&c0Qo!%(w=E+5&lQ4D)(whZmO!d8leu)gLEkaMgq zdA)%AXq7%THLS^4PNo>|PTkIsummUkNFVTokOdpBPTZWb2XSRXyDd`3HZs@&ffR98 z?ZX59AlxZUhSEGFr0JZR5eBMxs9Z&}bY7Ej71`D?gcydEkCBbK!FJR+JvaA(JK zAZt(fQwEoOfRB4{J#&tWh%NjA*%aw+Q&IC2VRL>?B;XVS$ILHC6W+kJ^B^K=$m`dl8usCQ8D0Vc zllMH{qMdj|ZtSa!QCF#&RCs38r64yS4{r`ote7IF*hdxM*7RzAYOmSp7oKUamEB3K z2ZsgoOlK$(-Q8M(9&VN0wkl;>z*LJPwLT?1$(THy&O|r;vw%K)oIXf7&R+$5yhZT_qxi>Aj(hLf6&t*(6XVF~i=cmsl?72*X z1=p9h`OJ+$T2c~WH}=GsgOpbr(ix+n;Hj_RrF>lJ=cBAITrguo0V zk+LW2At0i8}t+jAp&}S`60$g? zav@S02`{AC6n>3hWX){n`>)*@trsSalTABgQgiY)VTJuU8&Meo2v>Brj8> zNG?~FMTJtSP!?IvGPesKRT@_oJm0P;?Ks>w&QDBvF9TGf@>VFEDQcF_nNTh-R48aL z-U<~jpUxsR@Z_JT#PLw-#Q37J9XL@gD|APGQ)Al6)_}+tvoxuSfIpQz5NM7bV`Vn` zKD>2O#K&b?9I9KYBgLGLC#Z9)BrHAVBq(?N_Sa2dRy?jqAQ?xZL1V25)DVF25N!!dTGtS*H$d=y1}UKB;u+Ye+_K-TMb`T>ws3uo=Q<;q(ywUmDa z^CHRrwJF9fFvYnCkt#d*C_F?%**vG3tZ9*1w;>B5%QB_BA;6{LjR*ef$(k(U=*7B7 zwQ8H2yq>6%vQN5YbBC^LaGl%c~GH(ix?6ez|Pqouzv0{=I-viZ(}ofqMUT4?`8!Y@XW_ zZ70>(NETD_3y{+7L*apGfr+2+i0mav=30LN2H(RlHSMW0-tl{8VtG~y^V7LHY@5)k ze4T2-UgbI;A%rK5qP6x-9}d^i`ABuSUp0o&kaqlFWgVSqsC)+@!Xo0!ru%I2IZHw~ zfenpk3*9gvgRFVe!{UUwhzd#S5=pB;i0IjWhN`uSzMR9xDvGa<)QzN1SXm^A-yj=upe{eJM?@s?A=_)h$ zVK0pqmHu)P1?1X?uL}g+`zZ4_gt82zv-w29o!H+Uvg#>DLQX$mH%&s(;A&$-usa{; z@{ebt9oh{qvNcmof5)Sf>Yw4gviula9BvnC7`Gf_YC(fK8~=T9G|6G7&Ggm&+7(5{ znW5B_RGKKE_w*$0$T>Y}^h9I5%C4Nt?9Qmred|3~SfWl)LOwNi`uiCa2K7P~5vQvEkiuNU!m=1Mv&`bJcA_zS#2t3;ZlT?A>nIb)Bn=3-}K<(Xb07pfuawr z{(c07jVgrmR%6KxM6@hFJhTGX%O7|0#A@IOHLN|Z@5hoW)0f^`Qxfki+>SJU)j`o> zKh-o4lXHc4f<`WUv^@b|VflNA+N0I$$J`fU5cHY`r_v&2E}BcSgEM{sB@K}RYqA5n ztT1Au672W^hFV;`2M-R)DGcKVZEdSMygz2W-@kskgXgJE)nW;MAwXHTy_H4F0=TIf z)?hSf0~03&jyZzUvkYbi>3N49Nxiq@ssZh9^*B1G$EZ_$G$zP|>|+ZOQopAM36xS1 zb@kQeO7gUShLOz_WSrc(gr0=ct}@BYw>4FHLxc=v`E=P)A5cvF@Izg{AvcB7Pbt%w zfr^RH(ucSuN84urxpfbHm0S!=CN#B{%i63U$a}buwNt12lCKF%G5DM2H3Sl64QrxB zC0pCPyz%+;I?7NvU{_n=GIh`O#D>${IIo-Q=BEi*x3_;Krt^v1+%>J_8UDI`4rM3d z78@BhWVAv|u*Nm$LZG-J8kF^|PXvdnkNc=gyS`x*#D~)|tuPTJ?B;~=3%j_KnDYH~ zDJ*Mr;RPn8B(>b&Uw-ts7da55bYyiJl53SJ$&$* z3R}>Bw$Dr7FD?@uQFG1e7?DfhR@HWzY%Au?S?zqaZV!&Sx(Z`p*y9Bn1-`JT(EPiD z+g3EFI=^C2YmGfrkf{DKJyA_2iibYifXzv5a@7aFHyq2}rzgnZ3m|!c?7X77;Hqu- z;xlQVDGDmUI(h-&wJ`65&o#^is={hRpkF;kg$3@P&1=opETFSi4vbZZ!{B0=_=Rnhi)N#2|3sxS{`!f%~9K1z7A_|}J zi>0ypCwDFsZ31O1TJ_O&cZZrm@y zp1jxJ>BVYm)SH771|e`B34yTi(L^^B9KAcdcRvLd^Z{q^fVSn9+7nD~KLt2;p74XF zX99sNU~3Xb+9#&y(Zl~;dL3VS@{1*9G3kmRW#+dh@wAIu#<`G14SZ=kZ^AKWpEf95 zPwW69FEl00sVlF^uv(hs^b$kRsX~#PI-!L%e(@q#Uhf z&=$q$62u?S4ARUzdTa`r^%=uua0m+Z>bzqe_MPDix8J#g0}TW%2NW~x(qnuhtOy7_ zu@1h-``G4|_0{z^`N{7!7_g3BeYT*lj|&ZfTMU3{?&kFT83Pgb!eqUhas$Ea`Z9IR zkpfCEp3=RIWDg)9x@2_xX(A1qUm{9cN9tPqi{VT0TA}sDgSD>>ZEE?s6}J?cFb&!)JkQ zy~Oa(4hPRFheh0`*@cv`;gu&2bACT}e=B6}XHa?cNL-3%Y{WA+i!CmJwXv%4j!+GE z2i8S-aa7?lQ|@#)saw3jU*Fl7IgE3CbLeG0u}j^8^q82S$!rmsdDJI+pxNNkHv)b( zz40>o+hqC2Olq2|h}I}rn?mu%YMWALEUUI0kYf)$-9~+4M&VXn@raImpqLpLmB)Of zD>NDX;JWrV!o}YAm>l;vpcNQvRYrSv=n<{P zFZO&{`!DZmF%FFz=XTlt^`RHt z4j#1>cd1!JghY{kWlQJm+10(zXZB+E`yrp6AM_r=4%1wCU;t^B^qjf&Xz&x_Kx~Xe z5Eh3+Wac86p?eg1?!3Yh!|;;^@QJOsk1uJCQ1F*0Dp3bvKBV7xsK5aWothB3EiVKrFv7H%q~OGJcG5{tUY=g zNpvXS6A;6~BaX;;95-&Sh7M+{ccm`vUo5(`8dn^XZfh}F_4@tgbH_|6Y;q+W#~v+w ze;Bda8xNsVu6uK}3gT>Q6k#2ejzl)OE;UKK#-9QqpRVJ*?uA1zrszQn8&0WX#Rq_* zhx;gLWYs4T4ZD<|fF^Iry{UCD@`%iTf}@HjO-N0q*vTb>K9%PF@YDx1<;hgc7J+KO zJ~dYyCCumG^;+t*I?~ciWoL9-rUr5wm9%t3gyUBr=+#j|=L0M=f<}WJ8E$axbn|F< z>I|$-r_bB&_~g4h+J1|26qpms6`W3asmec6e63e|vAT;E|Ek)&K^}8Ri!nr*ROvO& zwO7BJv01q?LqYb6KRj-W;Bi;Yv%vN0ni{>-b^}zY>^bbxy_3G(03v*CkL&dZ`c(n~ zMolT^x?X>iqV4}tXWS2HXm@Iy603~iGJ8+jZ;CL{CgLF*A@vaFHL+*gBtl=SNn;O0 zM5fyl3HTb_D+-#cY?FQ6mi=UDRhmJy>ENrm7dTxhOo*<*H}l$o`~8_O^`5I3FFOJ7 z-FB>eDyQPsTWj+NEqz5hD+i0g9v?stfmX~T@^N@avZ_l-J>sgT>uzRmU9loI_Fa5y z_K#KHO~&(Ca&}yFh`7traq#x4(dB-?0$Gmif?`q@jD84)x(Jn|BS1yzZl=*z;&&ob zBVLEh*o`_VlSx$J2WI|rn7+gNh&N<|MTL>Kl81L-c6xwrJa(Mc1gCM4YUi#!g6=p3 zQ31$|I@l?C?hJU#pW-3c6Vwrq>ud=uee<;261LLAZoO1s;3G+e5h-1Bp%wc6A)@2K&U>)np6KTErSdS0Lz{;|x~0WOT_CC-FXjkI4~dswW+ zZ{+y3TE6`UvBS{Yjj-<*(1k%4wa>NDsFeI)#{U-v|JP(ex7xZJvI@pu8OCwuv$>TN zRbi<9mbfGmDD`F@@ErtDg6w?OKC>WLusCn8c0{AA!(k@~}=P!khA zS_lP3nMZ*EAPllbahWS*56ve>`AuoC*A}=h={x|?XSg%0Ls`pLBL6}G2;WlUfYt=p z6>11|RQ4qzY|yPBE$O5bb8VL#cW}m5@g_}J#4x2yZE9>wcYxBZo57#2=qgQ~dTif$ zf^@H&UB$_BF$%b~ZpuPBD*S0YA%r(z5;J zR!y5TvF0&5W>RY-&NoMCN$Fcm?%K2Nxc3RnkPGp7r9mD+%P0qJ(jv9-Oq| z=6_hDlp*H|Iu1WQ z*}?}Dpk-+I(v~wykDTv=npk@}0ZRz{1f^>LwdCblDo!TT@=eggg0=#S&OPIY%h0k| zk;b|jR5deOEsgDQO{=s|MSTjmGn95i)|T}**?a|*ys?gPers=g zqAH$OP(d`+^aSpa;8X4oj^VI5UKIGtlnt0RB_L5X7@)4a=z%TwsE_MG_W!Ip(#$Sm zj#pGBk=O`wFDvR}E4v|iXwntcUx%&l36)=UgUA&rxW&=*4PMaGLVSaa|H@-`icS^p z?Yh9CTwKl59S+Qucyz;B)e%$q(VO>nQ1qtKlM(B5CBpS2t<-I%_=s0E=t-#}a?iSF zaMgnP`9QWo_VEnyj6ZBdQXB#~P#1^bGtU=C-i~&9AA&*ypB#)Luz`r0s*EVCIiW$| zV`gu3F2k|eTy(3|L}!5$r)Qq1b$;Y>nXmF3Qd3Ck-(v6cfo`+l>)|bvsY7Z3i%cVY9>)b zXK+O<9BcPVxQBFvQlk?-r6W&phIWt;5LW!luTQ9@$f7XM>8{AWlP^o?E+eI;H^O7q zBPx!EKDohkqCj#SqV9f7E!cnC_hJmk{8G@$`Bpw9;$kMcRu=x9?kNQA+ z^6z03Z$4Dbnb)7;a`#|W$6MnbM(lxdw;VHbPI9S?<+&K(4iI7gY-rl`4T5xi&yrYu zk1BBe{|s&4BPIXOZf3X2r7W@n0#B=+)M~RoVc^Sf*an(`fWVyUhk&59;+(vo^hYyw zW<%RW!nPLj&ItHT7^&|t{`c6p3AY<`GnzCM*5boV_If&#+3{8O=hxvm+YcxA0--@; zG|a7Kh7b!}Qd8%OMyxA~HoCm`{uX2#kcB%CVUb@-u0CqCkVSXN1Ej-@xFDXDHP=-v zPc2#YgBy3|?eehJlbcO~YG+|1Nm~v?0bed=yG}Mz9Rax>YffXHjE{RT6Uc%jsg zKQXAG;Kz9TJWQ72PDJiUFS9(8u1`36HI(l|3|R^Mnyl-j_n>z&TJsgnvq`UlE~}n4 z>9A*>q%sdvD_b3QiP?6=OuiDn{PaQwtH~^ej}tSTvZ#I|87$oT>-7Z~T^Dd7?WtcHtGgdE8-M4CcUCp$AZ*NrRa$LMgYztg zC;VeTfqBL()tPS~tnJqOjHof&0=aRNXx_F7-rPN6-D94xo-we4vl!j7%}5!&{G6RfuObFyu&HrlVZ%x zqTgfDKs|ttmo2rA(}|ii*|w zoinW$LoP|JS(6X&&6C0Pd=&7Pdgu#zPs0}!OAi5bWv8d)oV@J3H{ar_aEI!%+cKNP zMNAf4MoimY*X<|XCtLT~Ue{M_e|NHg>X6k$sz&RfxG4;AW1>z)W5bb8j|LNga*zn} zm_Jwl8ncO$RG_#Ik%|N&(NgB{M=jeW2^}xK(h7F3}gc0iV^Cu9&M?&zU z+ROHNfXQW6RGM-HQLSmLO38>(=F?NhmUk>9rZ(%Y%fGUdIj09% zh8noEawe@L9ZG5{9X6DgJbXHCNn^q&mex>-hLM^vLQ!1Bz)plRAmI^`X|gA>4DyuU zNBT-99!|s+WQw(UPjZ_pfVHMGnnVYsc`R_!n0_|g?7paEho-%OO)DDN(_g$AOn~XOY^e(IUP?}9B<_*!^Ykn6iFW{ z(&>|fNeFzH+&ijFG~edC)(kC204vF6TECM6A2B%o!a_U9QO#S3o2m@Q(=VYo@+RaQ zbx&3#v=An?G<*}0P<3C^kSIXkHJ=ulrVg2$6nru!BNM?p7^a-`yHaJPMrzaC9VjEC zODxw_*82)faRGjUp@3l1ZuY)pV^r4Mh@T*vi|bmPt6( zev`h5ddMedI})j(qhT^ec6)&<{7;8I@Jw8)pA`edU_**Z%)&@bK6Lj`_sCvu(3!YJ z2GU8uf&!t5IkHt``T~s|O+WW};pd4sooDawug=sODbLt7?UBHUyQX}&@ey%aBbdwn z119I6*eQC8H2}{D$5ZJ$kXLA>IOz12=qzz(xU9KlXpM#F?d!&yJ{rHn`{Yih66-i8 z*YE*xGnp^227hTZ(T?U|3yI;4D%sm|;1wAib=K+rSGdQHSGjlKC>}~^+y_IV?$Mf7 zUD0j*kEMbWFs$Z50lq$DO)uaJ|6M=f}StLnRR=_S`6T7c4y3SRfsa}eOT1?37x!@f@u>% z_P-G@aO=1n4YC3geVc;cz_?)GWBb#?ZjwXl`d2b^62xZK8%)bVs><~m$>tk8|$a}c8itV`8y8)F5mD)Q>-HU8^>*IRIBTCh*PP#+#C-v%9I7*3 z7_FAxa%lNDX=Z)!^>Xz7wfnWtRjFpMoMl(k!HAOSNSfL15Q9vP4Kg78jAd!DgyG00 zr_Tye-h5nyH?YPSA)r_8fMaE=Pdi&VIdMU#tfXpJ5a<&VbEOr$W^<*#OE}RpiuVK} zTaRC$U{#lApYj>qTJDp!-5+*^zGd8dfc6g3n{=(?x@u_Z|F=7i%78s39YKSNp%ZryC*X64@04BG zs?Gsn>Od57wIw>fq{c7)fm^~8ZO5WIm|!p7Uz2Q);SpnBZc!~ynFO^?Mj%>Bd>;?% zJ0NIVXghPlqMGX0J-bSxj)B-&@8kvUu#jr9?M9zj9oWa=U)UBU?EIFD4Ju3g5?9{l z%*l8B|5YT&#dI~PfdvA>Bm@GY{69N1{=)^B_tq=mbcfFuxQ~ZzHwqEnb3S;Khhm5v zh5;&%N&p1@ex8tz>#E<~TXW!W=6de?Zk-swH^`YAZ@&~p#4|!3hbk}#xs(F*(ClME>Fs;r9R8d2WJH>k`0-JG)A7P++9}EfclI}8t^%yz04z{U+wg6ETnoi0RBuk({P9=cNV#ycQ!Iv3vIXh*Y ziYHNG15(V0(NyYvV{Ie**5OAes9pmK%~Fz-IxMM5EcA@{JFU=#SYx6vrGZ-vp6UBs zT@j_u1AyDBji^zKEQdwexxsMh}g0u^Tpe>`P>L$zgtACs0LTvH6sgN8N?gt{dzJcacd%U3ZPkguA*Z6f z(jgwOQj>>5NmB`CW{TelqfIihYU~Qek-qUHtaS_qCyIKAQdrzmV~l86OSg+XRG2dr z0Afp2J0#ZV_NhWsJS_MoJonEPJk_Y@QNm8Lomb@@#N|$6D%8`}KHiQ;2#kzmr5!}Z zk9g=)m)2z7okUFE0~Nl*Q!x)7AV2{m8?3qPK(#S;#KMuggwFrn9}s zM4oW5PzxG9GW-^|JgsnQM(wqY7R&aOdQW3bGH7=p8&yg(jhL8@XNHo5-_FCl;cFDU z_ft)~Ont~!y&KA9!`GlMb)bFyOK$_!Rz%B=he;d&VUmujB@ zmrbS73^}ewj>p`|&WxudWce6rXG5m0FF#f)u^xomq^Uq>L~oA#@Q=IB$nMMG?MqlR zyTC@wIM1lb2QoS4$Lj&jAvB%IR^5pdi_H1y#pDb6PZK-ZPO3xX4OcWHHtw3#;k_|Y z1!ecOrK3|9x5AO(li>-4Ba<1U)3~&uZH7mBS^?LssDLS$G?|nJVNRWD87u1$W-TFa z!+zR2hAK&;AzLa}iMK0WN7%iO)Qb_0PEM%X`GT^q_E>dtiV2W$DqKd2QZNRz87>|Z zwj3{H76CJU7Db~(HIdK@n51DNB436~!ad&uYGVZGk;brlypwdwI*Tgjkv4O_@m0SK zAZtcPLF9-zvR-fGeLjs?OMrzJ0fotb34tlJ)O!#6K9HLo|EAiCesG|RXyrh6NHFu=TI zP+L+P^`>)Sikv{L;xJy;8KDz1pX`Gi7sBKx)vRK784Fu=BuB3=RQXrcvoY~Z{ykCf zM$&=@MkF8w&X09ikz4e3H} z9Wp~LNdqk#t--w4z8xQNnLFk0*QSaK=2{%cEUMg(c0av`Np24wI?qRc`A3ZbNx zePi~IC9$p>Q_#T9WY(XMSC4Hb*`m$cG&(;xq(}1XXG4)@))U#JjzxcZSPV}72Sxe95BwY=z_$I>ZmskPvj zfiny$wu}%~fgy24+e~odynG(@b;zg8n))8e_=?M}aEG0!@M_LB0=o}L973V_v;sT=U4&S3P0GA4iNZkPga4ajNO1k3J`F4+o0Hh!v=E39$tNR z>t~(-D5>ihS+Sz3DO3jeO1R|qg%x!1ex_4+a_pfO*2RgEqC1yVDJi!$$TIYxe&kr& zdV$WrDSL#-ZNn6Wr*urm;D6U*(xbBM>1;)*Z3(}(C4sKVlAh&-uZF`MxOt!V;oo`? z+zuFS06>38ZX_eI@L*gEsfN+4LxlZKVdivjNI;y~@kEkEql^EDAsB*|O4^S^Y{+sq z7p8G!TOd8Q?E`ZDIT9CE?zzK=0I*2R$r(RU(E3Pw&dB%mk0z5)fI2o4k4jNA@;k3h z9jm4Pv8UTwA>^uC+c_t5lJWcZ=w}oW9zAeg!^s&KjvYI3nKMw5yk^T{DF<)yl}W+! z@L2#QVG8Qzt=_$O^8x7oK%#5R}&5xBKK4rMse}2nF0t!9#eQ zZS>U%S&H-ekFJ1}_l+_JLA&4{j|1ZnjPeMR@kZw~x$^cif_{~x%bst90T=ebezqkN z{2}%)N-hOBxZY&}&F4h@b61nISC&Wd15%5qZVzxY=>&2wvj;HuubOBJGCedug1=nb zXA3fB18ULy^``InLP99Svl8rMf+GNA66J{eSY;Ly2cCn{wFn9O&h)I%gI1srYPBwi zi{nx%*U6a77@!BZ&EPwa&EQ+iLb3U?jQ$!QgZO)cma^JLMPhw33BHW9XClq|tAo?7 z{j;`1j;_d>6by8L7w)}L6zD@M3t{NJqBdFODAfUPbizGRW&_tEg{RPSJEn6xwsU^d zgfg8ivX1Ykp^Dl2b*$ZulnF?=1C(`u;AS8rizRTnc@W(i~amRHSDWTSo;Lh zh1y*@=`5oR7Wf&?VHOLQBw6oHD$|TpHq2L+2rRFFfNK!w&w)31x&6xSY|IC+%g_6W!}2UKDR^7 zMhBkQyn(@SBO2jIZU{+kSQFeg>}@9@XKd;mZHvygB;K}wKY{q|FJ5ub`Gx6x)3rY! zXUrfLwY4oe+J>F)0g5z>!?dMQuNyw8bh`Q&u734tZtBsInVq6K`H1tH=l;&nysoCbau`Gm*c>r-}TF(OT#`QZ7`mp#AI zA&3ZvGd$>nb!*|`O%)6MBc?;BGJ&gx=ks2{GXs3a0a2=n9aom20(!0VYyGu6?NrEZa-c3DS?+_fTMNi zq?~k|jk|aimAdPDB@${SDwY=x`slsDA|K1tDi1dCkWQg3ZZRT)h4M`%^;|;;YQ{to zUkotH8=ukQRZePpLEd`)2{w9b?$fr64|uQgCS7W{3)1dH=y$7d0arB?=dCNjjafV0sLEDQFjdx~R^99$&V;pQ7bavWroI@lFuIBy^nKMum%5 z$8>SuaWGqN6QHWEpSnL%ul977c78Q~k(IOK4dRrrI&R1RxEbE_ttKn)a&d~}NdrqW zr{z9YsIeCskfw>tpV(S|V`umET4{cQsKOQcg?XzvbyzMMc~5(0x%EbJoH_#OGlTD@ z51YoAbh<>g-G|ZV zHrU`s!pO;ibp{XD5N2HN0BRPwdEM9}l{YwY^Lvob+Sg!AnOPiJ2=QqBWYOs-_6fPe zdkCzG#)iabEA{um?OQJL0E`_~~83O|m@gAvUp(QFH!)%Yx6C zcq0Uhkt|SYDjfT~ubJ!-;)l|O|pJ{Vp(|GoPB>S>MZTJSoZpUzwA~c^?Qzqu=}+pn@0Wm_~^J5 z1A~OqCuPudaLl0FF9Mb}6sK^1*kB$LG1}%XE&OtC30m?J8fcfguZ2W2+)C^Y$FCZS zuXcxw^g-fF6?nT>M*M>LQH}An4e1NNiyU_=cK7ck1QKtQKJ`%HBhme%Ay5yh2%;CO z|IhyF@FDw9@ow~9>)mDw!AFn!Gs4g;f3+~umvYk=%@o#$2j>^CpGIge^x>cTAjc25 zFSfwddpDDpg23K5eVDJ`G;fA+;vk6_N#) zsG2hB3)UGvqJA?!L|rB*%PNZtS0_;?oNYoBZq77BE9^OIFA_%Ml1J&lPC%%Kr8~vE z%1PXSz&zU z1O*E5Pd)Ldz4o?rOw;+GXK0;G?A#F~0tT0HsY|Dp+t$Uple%t264qQ8%K(+1D_4#L ziHROlRhhf6hu304l8#qrr{*&m)Y1jvQCsHulX*tA?o@O%VlrRFLAc;$$n;7E-XLS! z;G%c%T})f=K;3h{gtMs&!cq70Sp|-p%526;oqZM>l#R?LrH#7Okv4JC*<=RgJ>~oX zJ4UEHvmA+X6K3g70`|r6aq|xBvvP|DCGBF-dg#lw8(rfmaGek16Y_vW(eBb#_Ozkj z-8aUdsGj*m!=td;jCHzAu=JVoD*xD@VxWus>rqT#{}Kun2btcaBlfhZZ<-orJV=w&vJwOCK(mkHjSwf zrdAYL^E&-gso4Yz#X2h_=CL?N#t+U5%i0!MEQK704aOU@4&+&;ehSlXh<__n@w6*e>V>NMQSZ) zS*J58=||-*Uw{n-@=Y##4|KJ61_3$cB|BE9tqor9m^V1r;+gc=(A1h)-)en5KYQkKo+oHy^ zC1aTz(&v6)n#%pyz)HT3@Q>g$BfK0Ar5l|l@TwzoOggScRV(t1SEHL#{A}r?GlKmm zfw8B;8rqgN(v{gkTL+W<#YgHk&v(2{@PiOVJ{`7k7xtWyBnzU zSD?C1kA-1~5}KUcC9N1lyEUDfDf09T#?nvfsgpFT4c8sq`XxD$iW`U}F&`b2k`5`B zb5)A8Hi{0BY*iew$k59bXT_canN@j)?kMn;$$gBdV#9}svE^+r+f)s0dL^_&vKrjS zlPl7qw?CKbWv^R_bB$Zj<`E(GROGu6?H}452csFm>BzzgrV>SqOhJ~)hAcLe%XB6t zGcEfGGGQt`fgyHw`pRiTG~Bs5AF8OClSBb7)wWF|4F0nQiLwCx)`BQi4se>yl4p_lS%`H z)Y|3bWiu`9w88>!Mad$LcD7)MSr<-=Hx&xodJ7wo#d8C$s<}=T&)}Jg2RzR|f^pFC@JJWvsgcT__`IbH zhY!9Xx$@^A55GhBiWkDx2_KMnDi)F+<|%SayYdtb78ap>q)Xx!Jwo^j7Rb+)s^`mI zm3zi;h`JszQ~0Fv4uxI@CVfQVuPV%5>KLdwnlwH?vr9ksRCxcL9yx2!L-@7%>`y4q zc6_eYm$#M8ownzyo5!o1!FT|n17@J4%u-7{NYGV>Mb0!lW9u04#(%s&_XvX$j(2GF zSy4%C<^8JQ%FTxLCQ7mQ`iK7AODl4xuvt|ijl|@YM<$C*kPx}P3Zm452euVS)aNSq zE9YnL#8{@FzIGxG0?k*1F`vE60(toxoaN%WNX|$jzh@cVnWR&LGOw|KmqFgb=WP9C z^f?cu5=w6|9Ug_Y5J$A|V7F7xj$&o*`!aVfnc<{lcaknIlh~*!>B?xrG?dU0@RXcF zG8P{bvVKzxX(tEd+1v1_W8r^Jk|QM35|z@J1)bti3&lG8p&Or@kmECfl57IM!WxJH zvz9qg7dyrEl525cV|Sp_<@Zw=uPT-O*M)j7kXIWG;q?=|7varIQq;}ki@HpL=VtRd zw@^3$`<+Cs+&w!*TMN@h;F$#TH{Yt$I5sGmutKTylv$GK1%>2Ef@@TeoK^bbsg6)i z?Kl>Zg=o^lgQ9Cgvwxv%Ky80jmK}_Pvd@NhfM1WqI+p%SShcc5*hR|SRpDon{0R+Y z*ft7($iz;>Q=WxAGj+=CuL;miXEXXGKwTjn+Zb{rYVfGk$R2RILTe7}L}{Yoy1~FY zKCC6|PbojJ#FM}YlgaXoKr@_!>ovSy4|5~f~otBB3u_l7mHRVx!)?T;E)}! zeAyz*;~?yT!oos8ZqF32MVey^(CD$`I?t?$=7AmLAgYF4k8dQ_(eq(viRdwZxB080_sH#I&&+r`AlEDhFPw+8Q? zWmu#QrG3z41<9B7KrV=oQRi$;li$B(QqbOLC@bg#W^&(5E$|C;L;!s-cUHX`unYq%+Nc+dp72 z!p+$TOkF_I)@H5_mAuRc5=VT_9J2AMq|3Vrb zwZi(h3sTY9OCBWo!S4&5UjZM9YU1;+s6O7o*t#fT!^0(#!_AouqRe*T{q3>Y&kId` ztRRZV7gamKbwVfeA&6PG4ApaLDsNcb#Rnc;`3ErWs>5Y>7Ad-=YRcX?r#~Uy=PGyj z(^pWYV?v=|Dc*UqKZTna(LPD7h{r-oy(k?+2Kmzs;7%F?&!wGf=zBV=H%8)29ZJ_R z){x@&YUvbTG#zbmJBey}TFlQ@jj7`#aG0ZmzNuF&L!8cpn5qQ|1EGO+du=X{4(eg5 zI6(4&k`4;fHIfdTw7X&U!aZ>Z>E8IGP>4JI*F;^&R0A9ly0&F)r?MzQA9^(V8!}ns z?kIF)1{S`lk$L9+GqdUJkUnP+#!@tb4Wy~G1OC}&_Pdm@f0ZD=Nd&72>QX}Nu*4LQ z=!)9du-b#=uS=sSJm5;w^21HnNmuf307T6y1yU4#lz)b>C_b!BYuxHc-Sk<;z{WaFBGMqrXhVkNtU?6PQr^3&l=`j)zG|>rQ*}pQpw=K z)hLhD5pjm57?RAw|G^%v)G=u;YAda@hFy&jUTeX)`E6s^p&XA|1di1w|yGmGd+PaEg77JNKcSs#-?X<8Ck~%-bIf7MI1ls zMih4|jJF%X(}T>{Q}_rptD@c$GDN|x@(iJ|KR z$Autp9Xb2Bsw@kvI zW~e@U*6sk1bDaUNb?itmfMCUQB}YYfgE?wBmVI&@Q3Gam&2Ce9*|?z+-nPw?-hyZVD0K$9knv*hBABX$bm)u zG1qxr*jj9uhgRM&t2?{x9%`jWNd>KHUYP)|)@yOG0PmmAaA!Tm*4*J#k)Igi~wC^Gajk&b4^m3XwOvrc{u+g$lCa9{+=_ow zN}QNrd7T=ie6(CSR;7}lRz>tjRYbQ2T8>sX-3rEUzf1#@vB!bSj7`Q~Aa6P&)nRMg z`2dJSZ~^1V!6|2pNeNbyKIY75Q3=*E6SJ5(=&h|6-(ja7ZYy^}JH`VB%LId99k7AM zXqBr)Vzt1m?&OMQMO4`K_t^Cx?7sV>N)ukqp|d;Ydk}L?tKG79smi|$)ko$U^9WFK zz%zPDH2wVzMA9Ev8*Q|v#-PqSuEYWcPjg!MUEPpQ3*4p!2C>*xkFt<^j^5qbTM%d5 zZGQ%z9J&QC<^4$-Gkl{r)YMO+p2$DrE4O=Y_g)Q;VlPG?5!Ro`Uzp!STHg`h198Gp zsdI{&2aog6A7_65g`GuB?W33d12`N10XdTV|4E9jHcJ0VlJ8EEkppH#3@!7gDWoVo zK*u^Rj$ae*HsBFAYzTEn92{0!w<#WTm1&h4nEh*f(BJt*ap;WBN+K$Z%zi$zdEPYJ zwAjrb$=a9@stT=IqfVB#c%T?sK_hDxet;@%tz1GvGmQbBZeulIA zHE3hHZ6x-%b2;LbjC+$GVOAxMtfu7VRD-ja!--))xQxJ;(G1d;$5ai6$**tTqi;5& z$UK|30>_sOhE=-fo}f><%SH|He&B$bTXv~@o-Y=C2*6A7bQ+Mi4nqiAOt;hn*_X%`|%XeSSX30X3>) zih#LvvRpB#^b=}3GU)U(&}mE_3*9jqEmOnqxY;lFL%>@ zO*NxUylfRmzg=qhJ%7(hy3C$tWz+bI^R`8@JQHVN(ss5ZA#s-AH-WW#7Haeg_4UkK3@{ig zYFUr=@8Q=02KQt4_gigx+F_~}B%E*~Q7uyL(Gi^2cR1l}H9{ORC6=vV7UhT`R<*r@ zOQVY17l)CQb)q+qF;31*pJY(b!x&HYl+ai2Gon0b5D}i3O$!z0Nlq$H|G2OVo8!Me(9!XU`*kW`mqR+2~}@R z;M^)+%I?}kJ{1_rH4s18X{5pN?q_F6x=%AsckRU&>f~y1vGv!(llby)!&qc%oIyd8$jQQV>z5ny+a}vtG zvn^?u^oV&(pHpZhGBC|WTVbyfu$rvCTwiLGW3X%8>p)z#yZ2|B4;!Yv1g9rPwd(tyP;71FlBTPCpeRx+9sW+?@wjWGp_5si^~GUl{h%41J+~vhLk% zYaO5Gxs{srK)2eqL5%`${ae#9FI2U~CUoJu_`-M2iS(H)sZ2$j_qr+91&d|K^fLc# z!B%BLnQPrr6;pM#JWs1AXLRd?%o+(&E9VFp5|yKqi6&#ir~Cddr?-N2D?`waa~}W4 z2r2k~Hs61?ga6|w|G$U*f4dA^SHcm&_Pt`8LmylQKN{AKXT%!?-7z1^qJVso zz_c50LmQRVwi|-$qBW8n;dW>`^u)ngWLn~Q%Eur#m9d`+SCJU0hSeG25y~+^XH5$* z!OXPXnCUu^_GpY9iE5(cY`JD^Iyrd(R)e)oN$xs5Q>C1i=g-&bU>~a^&zQC~KFiI& z#2hd+pE*xuJIGqhRN7k%B?Gca!A9Pi2g9{xdUI9rch(w@h2K?dZYbMCgG%bs$az># zYo?=5_t@?1)1fueV)h(&YuhsrH>QT)&`f;m;PM4c4*@X7Fg{n={p+lh zHK&vF6iPGs6!>nuQITz#H393!JKb)AHG!k!Y9qDJN_NaNj|p99#vQ!dj%FRry|r}q z7fad1s@y16X9dSDLH`o1Z9ho}AYv2x9?GsjsDUqZG-Uy0*<9))N4-!`Ys`%5zCeORu2Xh3YP@~Z`(xQ{5hx0<3d4{^5%RptEOv>T9$Ogsm-neYXSh4w(8Jc z5{P^bj5%PCBe72br$D#8M^5+7rHDeZn2;v50+eRWkU}Xg2fD)v_%={I79x#SvL7?C z%$~l9c5JMGiZtkj_k{@!a5lsk+9Kmg#Tp`3X$&Vs?O-%UsSu&A8zWKfr!-Onie{kd zutEkh_$Ah9kX5mrcCQP3dkA8QR23!G=bJ8o1p?(w^6`&ph8dPeMy;e+6JHz?pV`@t zGu;Z2s=i=CHqxv*H*GRg#rRCd_6H%b1}!AHKA$kfciz_DSQWUDgpIkV61Pf3mo1jl zA%mKuLv3#QT6hy*(b71^&=TRe>Zm1|QHjLb!+0~D__7ky3G0=V#Rj%tzGH(*$V*Ur z@|t{-^NsK6NiwKb3SbQ#R7vAKQ7E`+)@T!#$;b>thTM5?dx(!+uv_@efFp=PTj+mo zS0l1y&!D6+Pc!d<9*aE6{DU}j!ksRvFsWM#;0`b5x*^zjM}1SGjwee~Sk>$WeeDBa zX&N=D3-Y{yVcilrAaIw16`r$gyqBO}Wp9zw79IbtOnc%ZCagIdg zene#S4kGiG!r4WdjT?I8RarzBJ0OqVGb0Jgo8sDtD$25z6#V_Lhk{rgY(S$9wMf)2 ziGdy>cq#0@612#YS766p_??I-Aqru{lnVwo9RqkftIlnkUHHK9Aukf&1DfcM5qM<(@enM(;HSbTxG{*>+P1w3MZ zN`C(~^D9L61H0iCEMww)BMJ?A_bGUn6=;mn$o@8j;19%A0GhFRSc+?Ci~d?DRHPJ0ARQv`xX&;|U+TM{h^F zPjj7mZoS_f_ulbyeS*SO@1M1s(tOVfA0Kjsd-sX3yHAIv>?9ww=%@;J(eQhT@p>-{ zcZ+t>i}(>?{3t~INJRZ;=JVRhgLj-3^A_^j&u7o+p(!65=Op|j{76fEN#^yW9@yzJ z#r&wBK65zEZ->wDL4WbP&hbY!=W(1ny#4sjW^Lcl-_#&Jv!cFozJ#Ve%GO@=Y~SqO zDuidlmKhd!|NXI{XCuSJ;K+z*yluDzH&j>+21|F@0{&{L)bYw5Zi@Pv`O%^hSz+=P=Rx11u6&9wX(Ose6e}KjsutwygOT_kh6jaUyYdFJ%4b_w}tbrzfl)^vJ5=}Rzw>TJjd(inMbM4pwSU#*mAQRR`~ za~Uaujva_{hE5y>&SFc5$hU#j7lWE>Ykf@%+%hwju1xFLMuc@L2MAV{R!kc;7uUe8 zl&|@K^@=ltdmEXJm>&DUiiO>>v$uG9=(TYA1Qu@tn_GdUv>Jsu9y6ZEf)|X;Wsknq z*47pq+e)b@FA&6uorkjg&y91YEt-6iB004b#I-%s(gRpYCzp^`%Z;b~Csh-zkXC3- z2a<)z5l}?tm)smh58-)FpKBjaYx!^xg5h66P*1HC4pMxMz-D48BYf5<`a3jjK;{07 zP{}jF1sImfsH$ffU0~IvI3g+T*h-Y%G{4A{X*rJ6AeJDNSRf80DxqM1k#z{c^s_7l zD{vyoY~Rq3{NRF@^A*&ZxkpKqej|p}W`98;nz^aN1n`*Kn943~j1EXTeRVMe-@uO; za*ZiYsk5vmx{9rxUCs@Lc8EZUP303l1`{)&BKDHW9a<{XFICw}^g}J$tt^}%@vPh&v zs|)dLLE>LL%j&s%j5OoSkk2f;n1olbaj&xuvV;#9rIL383Gf-0oU-9DTh;)j--ok-bw4AgNPA*XjkXc{4LS~eNAhH7F2TO z+P@5r*VNi^B5M#ttFel$qxLE_kOc$dh|`$6b?KQrY_3_xs~Qz8cl=caTcE?>T(HTi zt_yXUBfJ%w!3-sMDGV78erveKG)UtREaEzg^WX_`X6jK=EdJ^Usi3+AMm+QAlx|(H zVS<~>ANrNC=@BOV(+jdAfiU6B?IU^u|3$Lc1-l7n#A=jOJL(+4ZL%YRAkn2&N+0oz zmtl7zH@-ETCu%sYklpu(sc|zY6DhV*<~_(#n*dcg;8W~OA)}L%!YN!O1AeiQA%&4v zH5eJS5Tz2aIX)in^fb3fkm*Hwlf*gYr7#Gd`5_Z?4S1f!*)Nx#Wn|0CPXde@$NuBX zke#>KGu1f>7Vnn*Em2OC%3|ypkOV*&;~5hI;K#qBiI!J(xkqTU3)v}HxQ`f&irC8_0E@d$@VE^6VU-FvwIr7oQ7PFcFj`-N6orYSq-~MUcejO)9o4% zv$tSDO5X=YTn{Q;$CS62fp)aD61&$1f{EFzm0wX`~e+Ft#_NcZ($~h06SUl2i-#TX^nhT<_N%;Y+=&J_nt+UI5J|78{ zc)FujGQU)8W?XW39Y!01p`vOwm#hUV#muqgTpi8M_{NYKotsPi-HO-Caj>AK4yy-3 zqwb9tuOt@_Jz$xOMKv90oGxF1<<9%m1r7HkH@6*8Mm(*O5Rj#%Z>l@{1w39f)P9D% z_a4)KU)3y`11aVT(FBSZkzG#ulOF&IyV=(141uH&f?c|&0iyb_2X!1)0n3o{H>o8T zlMA?LmtJ5Fr2xprkd915iK24yQZdxQEAW4jBJI_!&Ic)+cUjBSN-lJ9leir^A6UJ7 z^Fq}|!>r%f6bfDFclCoFm{Z^VG4gLur18SN!P(&0}Z!PL8qBd(Fd>;|Ng^2zsgT@!u!7{2p`LrN|hC6pbK;kADcP?h>^i*+ee zFz?#m5{c*)-GlHoysk-i=}@lvHtcTU8 zJHsWsu=P$#H|M|6`a~$e@Jf6po33bSU+-%zzqe@_Q|&-s#=cbDjAsA2psHk=y9YyTJ{CYQ4wT>8f_Lp~PIq(5?BY zw4vP)t5>7?FOtMvgogD-#9}&PTTmY)R)vg>3~FILAo&lM+XXZwFF04$(Hw=(%)GJ-H+uu{R^;vX=O`#j_dT zt$TGdon~7b-RI2(EVR!V*i%BsOW>DtUL(`tAd2R_;q6-Izj?qmrqMJZ z=D0#7d`*B&{m@KDNv4b(O&z2x4ZN`hk*sZae-DRQ)b?35A}mG^9`T8hB08LV^bb&^ z6iji6+$j7l25&S!fy(5Sbh=t{2#)+bHP?5xPJmDQRMbzB&HFiF~}N#a{`E{Dfri2to%Zzl8?v|DckQQ8W!YP-*EoG9EY|7cywtXQ3zqc3@2nKMxYX2z72*O!OagN_qRRH zuzC>A!O!wL9QK)j4l=^lALXE~`p=;0ke?{79Noc zQ(G{kaiW{sKwaTFuMXDen75dkTGsdI^!zWJuy*dM1MOHEl(lJ}l@s%y3~!)2MXmn1!XP_pvWHH2Y62drxQ z0}ai1v=XuNwy+*g(s7iD1=qxab8^W!MV7GGdfV9t739lF2;`dmZ(!Yb3@Cd+mQfmD zv`lXM35_9DF4NGrsS5r^{aBC_(43~ib)|g4YH-=PkqZM$^{2#$z|^cc;|2AAUC9s6 z`>3^s^ED{TpUeL%^bfQ0W1bkPzAG|%^hH4K@5y#h zRUOI8wWO4KV6sh0Tq)(V=6#5!+`>h)pH?}oxyC1)=7X-5)C+};5yXutQjBeqEn}}= z3-e<9wHi#4zeyVkm zi2I^OBE6l_ZZ!N`L8x$)q)fjr&p+*opY!By{gB^jFMaZG0Nt#!3!|GQbP8XZZ!btK zPzE^FPgf7l(F^1@N9Q=$S6J9rc-U8%*jrbx%9eAiY%k9Hkdze$Zl)N{#bE_g^0Y1` zqr5Fh7p!5pqcXrDLzWicx%{L5(Cs^)4FLH0KyUG?3u3{s0azIoDKOT@29Ex*>8OJrMBOy+^y$b#f zD2aqg@gUsZmQYv6q$;I)J;5t|8zf$djoETb4Ci(eqi{-CF$>BrlTTKl7^0Rcii7_a2q) z&ApS?--&oTd+Y-l%rofhh~2d?QldSAXT>s#HGS-%{U!CkZfI9Fnh<1v6nqA_ z|Gq8wKja-53wIM^RTD?29|qKa3i$sI%|LC_3t0r^TL#cz(GcGvpRY`+Eh9Nm=nqW0 zD#`kPG4_^Gbw?>DK4)V6$ga4<-oy}bUuyt?@A z-l0v~&a248w>&=8u{K&vWsP5Dar^xJhl3)-Z^Dh}LMX|#nV3Rru9C1m@U7oB;9=J# zVS4y5@U*w<%m?hz15RL6Xr544fEHC;SQv6Ao%!3N)J4P!UBfdQOw!moqk4%?`fl2m%3CvWm_&YFsr{Yw&9uGSjOqj*#@1 z9sOop$oR+X_Nk#ps0r)h;dtU8@7Szser2`|77t5n8OK%*tRhMchdJ!EJJqf!gzco% zgi>JBx<))jrcVzU_AOisRBG*C_B#apX_;%0X(T`Xfy@eYv1Je~Q zliDWPavM$+4&m$!*9wgLJ$0)huxDyK1qC^n`FTT7+3zK#MP(p`845<=+xU=VI9%c> zt#C4KrEU=sFidhmwdZ0qlFNS(ot^R8f|uc}Ey#*Mvp&z)VWE+xfyd~B^eB6oR+3vgQ!_07;E+?d9|F=9WhSyr zPCUgXS973oA*-k8zh`9?)dKfZ*|TlP@(eS~_-CfUVtUEn$ObD8p7kReXz$MVi6lP~ z4rQ!0Q9B!KJT>hC^-{o(J~?(|cwH-kevUmu*Y)ouZW+|MGd^NY_m`G&+uzP6x5Ud@ zuNeA0z^u}Rg7!v)L>XPrg`gUm!1(*53rqtI#Y0nzrcV(kt&o6u_iL2JNSzsancsZO z>3l*hYmGT$OtJ7p(P@AB)3VIWzwnbHR2e=(=FanMpwVz3BkXcol4+BM_zRuW>OErG zW#61kOGStKT#7PAOD-4~`HHv9{2pUU{*?o%Q64;s=7-S4$~M#I_?IIQid4|(-qYiN zaOrg|%`T4?d{1lK5l>&{nukJV)W*;cJeS%1Wp{y~!7W+`&Q~$NHTNE3YH`HTCCwSK2iJ8Jhta zjZvZ|l{D-ka?gN2J~UI;qF8TkPW8|CP-Gve3R2vTbVzwJRm)2`KB3#MJcRM**pTMR zlvZ><^>i&B{A?c)?Eoiqjug1IcIj}_!p;hl;RyxY3Z1xHq=g;-3G2J49P0yq8h6iQ ze)+=jKepbL-K@-9{^4o+58p>yRaYZdGf_)36B{$<{~G3NdFbIS6MP6rJ4)#x3~|6H z*nGmsQDrGxOsLxI5pKSioDGC=!2vlazI<1(wRjyIQA+YtyUcm zTRyd}ud|8)SDRnKU<49(fSaA$Z$l4nOD_aS?+$;ze&M~@hAwC;4qPHLE)K89kcQxk z1{7H{N7mzF8|nRS3v#7rVIr%GukNdcJ%`rlD-Vs*`-V+9cq%Q@mqfM_SPBg)4Um=*_2CCm%&r`%MpikIB~fm9i~*9asco4Eo>CZ zB^|{c$%f3TfDK-&BUPY$HO%vG1c}FlBX-RlyO|URfrqpG1{0&4=^p1X{N>4pYq4$r zqTUwo(7ltr2P^nR)~Hf1I(Ts6oa=G}6-HN}(LM5X34*>0!Z;r_2;Mr^%DI!Xnj6sr zy(@xRGSuwqjT{KF{F-NEla)C9jsOp zHYKPy+Sy}-NfM(gosWAMm?WCPJx0UeQ&8W~G%bxf1sEsTZ}~Y5VJ8xGtVc#b#Dt$_ z5Q?8eNd(^Pnvw71q$6kEy`XfBX^fxT-sBMx*-c@D+OYXDqW8&7KI2Y<3t}7kU6?b* zmLEf^w^3;o@a*vmtv|uh>jyrVVyH zl@CQ)i(B^?3SqXKgJC<0M;u;lU`B@8hi<&M+H$pUgP<`{L7#m~C$4q&;sUT0$RAXz zJIH&(5H>m3X^^`rm>_+yK{&Y!*ot|e_NW@~2DH`^Top>77PcfzrJ@Po+Vu>-ZqXibvF*i5YDJmGVc}o{Bx2 z$BrO}vPa6E;+=IsPw-X6BVkYN4iq5lS`!p4wsJ+pYZ!%lbUqGGp$SBch@{P$hQ36fH~(ovEM7L+ zV3lVWZ#$TF34ZxMT-v~ z;Z!b1cD*3<)5`cFYKHoaDophdKZ(L+=vfR!E4|8GHMu5s2y++1RPf>-f(8h@3=3p? z=yjLHDMzl31#JwZ;KgJly>TYJM(AuwjCL_jV_nUG?0VJ$=GSy1$)?0nk^^Uy$#)jM z+&HEbRcjH0z17Fus=%>(hC0K~!CP=M;{cBxWsmcb?sRH zu_Emd$}wboV(oL7ylGF=*Jr|`evFFj-6H}h>_

  • =|KDIYmOAK(L2#3-|p*8i@nX z=x$!vNLJ0%_Efoch#dZ57IWMHk94?yjp1qXVsz_}!AnnQ;gJyRfFpeKRd-c(`e3Y~ zzQ48m4@NeV>37(d@ZWV54INqE#QFj(uQif(V6W$IfN$h2#xLR30)(+b0#-NuRFSEr& z{G>>=IB{Ra8GRwp-@zB;qVFVIO9G)gDsiAqY5 zHyf+5P&$GtqRJ2_g<$gXj|4)3AIMmO*N{u{5z!s_jA4dn$F+Sl=@}`>aI^ z^I^VNL2)L85$ZrlRX}t%!J>-Rn5iOT1<#Nz0oO+@W_I}48Jg%DSfnVgMINU(Ipl$0U1^II7T zQ0t!zqu?YnfVruK)kS?FBBNfH8h6^D`^Fj)f^?fl^Z^O>G!m0`U1nwbYiP)dZM&O1 z>G@8j!D!3)8}~i9mY-M?$#;w1+^rb&Qu%0(c5T~RorW1_7~M;qf>tU;he9!gc5i$m z4Da&U$uxN>XK{wlUP;RmjV*1?MNZAn+zaEwVrygziVtoX7?fAQ`H#@-e5T@Iy!uX_ zXvz6Ifu8{nItS@Y?_mMESC;+zZ#%iAy;MSlg7q1{(VQ~%_z_z7K_5{6{$z!&IZf*O z+-Z#dWElUijRXfrvwuM)To^?i>>RCZ%}oDU3)y{EM*k{=V$>#8@yxJ4x}R3`TWEIX ze)u-2L{6H2Lr9gWN7l)g7lh6WGMkyFTurqT<*>)hKJ}cr*&DqX84Ski(kA#?nhlE9 z{2(26vdy*N3<)V(!{0jg3YdQJ>ER4`e>uSUA^WH__=UlPILJz1E5S*ef}6}lBQ#_j zmWA&4v8`eU7jOG;yekL-XBlH-M^|G=9-`HBL&H&Ws30gSngU{+MFYG)R8a;@=Q8lA zJe?u^@CBqk-8}wa0iN8Y&yc8taq`r*+b8oLsD6DC$Yzsf=b#|8tovk#J=zJD08wfQ z<9c}B(A?FX(z?OU`n`b*qSP?;GNWel0T=s*!wyz3W1UU^L3yoQs|K{H(4Jx7*aC1L z1=M(#SlDTsO&ura*lJ4ZDYhu~?&yeNYWWWS`A!cv=mJIrwi!;KS1Ah9U46`9HO}sQ z=X$ytUf~t^j=A^Rq7TqvxVx}ka<9z!@hG)JUaaNWW~%WzHO57jSHrVz-#50l0oqDc zVW>O9-umss3~x*wHZONTKI0neG^tVWmBO ztap~D0Y}SclsU_lJ6)2W|j#SO6+F}|j?k8a8?A6@fL%?0#*d5a3kySbK{ z_w*;H@EyDh%;WqG*$tg^&VooBy+9~3v_jb5`?!FTi{EBaA<$ zzC)J2ry<3YIQ=raPss2{=pC%tY+gU4?r2zOAih-#`^yv7YX3_^$$4QgE$M9bN_a&! z^pESSpUi(Lj9$3B(Cg=~*2uklGRHD(NBjzMcrUz>6@4Y>((9<2J=>**v~nj|EL-ujFR@Mue;Z4i6+MPWpHOjN zZ4%Th-?L#W^{RN|DRKJ7ZCW3eNxg$fxW^$dOyC>ynRrtP=ys{9b}aTTXG^EDSuI`HfkaeIBFYt@u<5gYFjidOxD06 zq8`uM4}+;Hk|aC{)YIakwYwrPY>-gSt>Z4+?w-j(@}orhnEHE-(msHi4K zi%kND;Ym%phUmI9);TeGR13{|Gmsa4zdY6HuTM-3%YbtZ1$GB)3$4bdz0ft$O`>SV zR*;3WMaoMH-*QU5BaqV9ug@(s*ToYVnkR?G)^U4V#s_2fZszX>aW+;g^kA2xR7zIz zlhZtUiqf~@GC}m%2Hk>4?8x$tUNde`*0I{SeLd-ft;vFZ@4?oWRsMScYJL>+?z$hw zO!#($nLUE)JLSOz3%Kg)6Kr2(vII}oD$Z3nX4nF0_7S<=Fh%VJ^p8?H?7A##JNc@f z$!j?*#ChI{m?H`_@8y{-WCwD(V>+lWtT4^>=WoDpv&>NJJyXLvc`l7~hK*c7pX};b zgNc)wS?MZx#oB{#_5;@g=1LR92lk0Gt>2`;89~Tk39o=y+|)AKURlNryn+cc(5Th; zS~cS^L8;gV^B?wEYig!w{Ah}>zFtb{Gi&d|NMqWK^`z|xGpq*(jZHZk^Yc3DPolIW z9&dW3kv1DnJ~f=?lm;8ci+k0e&iH19T1b^?fzv}{VL-da-Wh2TeGQ~q=BzOs-rMNS zb?&`NyqW?0w^@lHG=Z@Odz#>1DmCgM{AbgeF+M=UC4X)30b9o zqWKA(-Ahcp9g~SK9?CvzfrKqyecaeHG(Xzd8MnB1U?G4dNq{Ttt2b-bZ({8cPa%ffa-)BvMQb z<^1r5!DB1=3I;4LJWyMm07O*g4n`I9?`Z#i>7K0Vu#ta?2_`;I-2eHs{g;j)ROR!u z31jom>uaso%!u^CfoagTz%@iZ7Tp9^)EfYRccQ ztR&*ESw@vSqK3HNdCh0egfo-ANlN)A|Iy*|J#O)>pyLiaejc``UhvXe*ssaUoD56m zt9r3Vr)e}RJqz2#;yt;Wi4i9zdx3cHn)vRLE0e;1IC0On6(?P=ru1N4@Rm-p44*i& zUmJqT_%6MsZZNtm3wjb3yWGBR!)rn~|K<5ZA5F8~&=>^*O?-v&mI;B6?FQNhzBnP* zeZBR}N@r5wL5^=o0C*J8rBai$v(=)|E?L4!IqD}>gG<9G-`HiVRoa;PgM=cTW-9LX#3;N`LX`=@4u#8a z1&c|IuC@h|Nl;}ME&NF#LO*}`V+vNcp(WgKX$l$V43A9~Pvj$`K$h)TW$A}&m{xwa zwC$cD#nqrC6mF^F~{%RtBb){0ll;JEP4tgf)bfurWL5<**WIDSJp!h)`}J zA#gEM004mWH8fCPGVlpMvg~mPs%QJLzcnTN!{t)`SxSep?#+F={n-Eb<$e3%t@kSH z{058A8~wM;V71Yw#pQW_D1bqwUN;!ei;)3yKXY)Oe(y#DfkCg}lS2$3bhx{@amPrr zDiblN(z^r_c8LNF^hnG#t=nL))SQ~ba7i2yYVP;+{q@2CSSk;mdRCMD&+%d=R8xvJbZZU0cDp8MHBK*yZ)@5;?T6v?xiLYw{&Eg$X8PZ z>$q}CK7J2*36qzxC9LL4@f-yN=5tLG_vVSR)gfqUR~H~tf3D)+(OkTHH>ORD9F~Z9 zemJa49Y(>8IjyLpCmkV+7CO~tsT^#nI<|^P7O1Y3(=2o@D{|Uwy6H(B7z?SbrFo=e zdv$?HpZ-L0VCY|IO;X+w@!Cnk05-}8)H;y}_t-d8*f;4}-fn)ktNG{h-^+HAarQk2 zP#T(P>deIgdOig=3GP(RX8FCS4h`mW?^VQ1o9JFAr>l<-bg2nG(t_3jdM zp<0v5W_hhkS@`s-rm0Z}9zh=+=$?Mwi{5#8?>MR@ZZ-2HQoH1B^O2GP6ruTt1%6B;%!#vG2uO;N1FPUggHhwIvywl>+kiB2%cv!(-e8+K#^Fy1eR`QfE-wz(lX>>pCvUZ*!4H)yKhY4d%#WFDy7;mz~X zQgy*C+VPG?3Z2{C)7YAXcV@wFhNpnu_f#I6yPLMKUa_zF$iAz<$VJfG=a9ahc1a?# zedYQ}I4!4aV(~<=TZO#-k+&cBm=r6xuZJz0+cR~G3pBLZWo`(*B~doFiYy(#_e zDGp6zz4d8i+nKiZJhnM&7O8!0s;EHX(I6xSUGzka1d~Rg=Z)P$-G7TLL1Be8vbXhTz)hO{IIir+yITHi-sK2fP>?Y&HVS@=Q$4>1`RwXES2Mm|$ zbse_Wohoc;V>ek7sr*l&AEGd`xqn0)ImVW?6`sRJ6I}yt5~ObzKzvCLxiHHIj3SVv z_D85ihBs~K@`AZZG^+--7pyG^7}Frn97N(!wm>4Q+fqFm)N*|g3F{htEgzq3OtwVy z*yy*9>w(M)wYrwhc~HKuL?2qx{3Uf=-0r;uAcxHbp~dt}Zk2A6+FzurE3Rd9^?@VR zAEWO?w(eGauz2T@>^A)2_v&F?{G<)e7YfJdHgu;FQIutFf%Qks!!pM|Yy>}?1*tH& zLZ@Zg+rE_Y;8Fi*)*7A-cX#E=ygJB?Gm#x}!CoX$EW0HJ036{?hj-REARk)3U4zp~ z=7sT*UB~|R__CH?@*FDCq(Kn+A#mv~9i@&RNbxHeI%Afcz7++kxe%@NtFt@Lk}Mnj z$=(Eo1zugk$v{;mtB)j7Z^g<$J06D1Df#In;;HVV_+eYc6H`Tos6EtO5a?GXm1v1x zwFUaK>u;)jTv`~9X7DZ*Fl}}4t_o<+ln_uN>(_aD3$T*XFOAPy{e0gnc$i)uDGi@- z7{yVB%$g{VOXYFx631S}3*QTr1DIv}hm+%$_XSRoAuj{d0;B`qxU7jzMK6bUFe0j8 z+L5AGO|IkeBX`SM-jKYkX(dtG6bmqj*#KaR3CuHsQb1>(fYwtX5fkJ8SI7II4hoaAEpbDWA+_o=P$+Dr2VJa1OWf9XijMe|A;GGrea^ z816eyX4@fx)D|3bbGL`GD=crqy82ACa4g`$&QqIUH5|z>cn8)@_XZ9E?KtrE5gAQK{$+P7FHwA^Jh2@M=9QkcE6Ee zA4yTjwc_fTPJbsmLhxN^aE?P_z)V!Y)bTaF!cMhC10WHz1WBA)CU*FeYzxz=Uqh!7 zh`B)MG%?ea5|0ZM5eT^kovka6ADcyg9NH@TJhouPv<1O;CqA{o@;b4HIU?-)(lhM7 z5b6!Lo;k7F8)kN)JnZqW#APz|r-+_&DDE-vg?8S+_x_wP#RhR2GjIJam58C*ljl-Q z^y&IlePi4*6m`#LP*r+k%pU`czdryro=8S(^9@W99U~H5r~}D$s$NtZ+Uu;0T(ss52F9#Z+3*RA^!awdkf2xA!gfsZ!e5iOr;}xBogb|?F zobDY@3zKL?D20Nm*5VdhMcuZ)ww~pXB6bqi{&Jmw_!R-iwU%94aH?j2ongjMIeU?d+Pb;8 znmJq9Tl~{m^q)WfRryY8o_XL&VtiD0FUeHO7qr!*C8;>*qM1ICsLHAAkvE%A*`agW zSH~MeL7y#3Y-yzq)EumF9m#toa~`7HK~H1K{Ge>cD7C^}6M_pAQ0%?o^9AMphI0Pl zBbjK%+p_N~Drs>vmF;73Y2(l8b1$eU2$x4tTWJY|A-e4U8A%`oz*Z+A8_Z(k#IY9` zh$$ql6cL{aXXYE2+ppr=9S&z6U65uhLH89Kl)PcaAR4A>dm^v&9}5S54!^>0a4>QQ zkF^f(U1ry3Dd?Pu9b5k zAQh>k&S$gfEjv?y7XrifuIfi8EU|QL(VPlm*_4+2%Rf*48ikxft=g^}ojf_Aubp=hehueU@c0{Kjd+n>8De4m{3^E!Ey zjfL!qcJ1+c)d+vGl)4aBR2>ax_fPxm8a~n>*Wnlw@_bU!#7L7E?O53`O9>Wj=X7{j zIybGL(j6?mDi#Vw-PEc!o5K~4^Zd}I!GYyeWK)W&B``zG#lpO;AzkW$Inlf*Wv-S7+h_aUn=qqt_lFT zG2>$H4CU*#ko#Fzz-%_*B(|yaS^~1pVEJG}lYcQVeD&-|s_lFHsw>^NxC8{QEnQnS zxk*0+L%F9%y0nv$H1CK)dA;itbM+fXsUC)xr5Qhh&owJTXPqu^`X(fV{&2BF2D@(B zX%11AlC2Bv0(Wb0(*1m4CNLY;pTy`6cGswy71qCeS2J%ic5fLsXFwa`n2j%#>ernB z%m*qY$ceY6bWWRJ^rR54!zLaIjmK;l-%6dTRE>X#WTreesl!CMzPRd9!OyuuJHYZ6 z{`_RtzE*h^vSE+)(EXePXUr2(R+hEelVdCFC)URyn(>J7$EHTYF|k>THs}7Nw^zAD zej&q`xfIiwn*zG*W9PP)Fa<4hjlSqG?E{RgV}Au81Yh+o@-v&>00l%E`zz!$ji-Sq zz9!o%W^ds&V|8FRH!n_W*<1@9lyp)$*Wn99R~bovx$oEB(hAF^s4`I(C=&2@p)UjH zr0>XA0B^6oxR4?^N@zsOOGVF!>wIm-Z*%ql@xR&qtctk%Vl@s+Sm4bg)RGFe?<_u2 zhdP~n8PuNQz)HPFdCtHg)Rt-lMe-9)5#|PO=7Sk8Jw%NYN`o_}w3$xKKV1<%(+-_b zDwN{=gpJ_1$UJskmrW|pK?x%}h9T;w8cnJ8<8OX%NbdI!_-B`A-My8)XO7={L$0kE zV<2IMdOTSEqY`DsJPmY38PR6io0MaGbI@b!yX6a{4vbtAGG%=s;af=Sdm)% zT@DOr%`omt37RM|;dP@Dq?5Jm+lEJQ?@;Z5B2S1AwbGV7I2aw?8%1`vZsLClVv?D} z$y>*rqG8g$Ik|eRSMK2}0A_aX8zl>6`=rs6heu557~WIOTx+(Aeu@zc+PF4@`W8$4 z6rM1eERPedDo?E80wcG7973aro^N(%yocQ;dwyDq@2A@EYWTj#7HKMTNTS_yju6=DAVgV?RJdlh(* zK=^`+d%kp&%|fpdh<>i$lxxibZBev2EVZ_D>lVOevpEz_vyo4 zCYWjWAwGXPiNX4nCcIsdl*8haOhPdW(L*3GuN5XfKSwD+iLj!F;K265b15x2Wt~m~ zKZ=wNg**p_7-7S0LS^so5SLBj^$M;?{JYRO;-})h4vLC$g57^&*_R*N-D-KjW_m#} zYYb>55QXct>*oNU;Z}%^aC2~<b`9%Lr2Oth|!tPV#W z^*mlM)>YOr!fYgExJi2a%j2s zewpe#0OSfP#QM}kd&2U9OtRQKt@>6pgnFc%kTmk~ehTmiX++Jg9$p z-(NsH$MRiP*;CIrjGO4*ufw^We6(82g=pnCu`C(Ko6s#Q+U)|%`E>4} z8LK-vL#IRc1hxOH%0e+JH?`|ee8H<+zx5n;tuM$C8C<1+%SIt76A4|C-X=#k%W0u` z&fCA}LyX?NBqX{d5j#DvLK|tyMJ$2Ex+;Fho(!1T2Li#Nrbl}IbI*+Q?=(;0Cx7eh zv+?$SRfwqCTA7&r_d-O&=5w-&?r&haovh$l+9u%5CAMao^&_1AlsTQXV?ZAI*S@n~Lo#2@MOL}ubkIdxh>vq^aT!vk( zIE(h?Ur(yNiz|RxL~LA~13aJvR55|IX|>_%tE@Xq%Dv1)<^*(#`c&bvCQ31!SkWhZ znpNh|1a@dq7Q>xK;-&g?q~$DLz^zqD%Q59hTOTExg0X8yqVI#w7XGru^hjBmiypY^-NyOee5 z9g=PR#9Ij}l;!4`txL4^3cYog9#n;#Wc<)AYq!1|JSw6sHw;SrtsUoTdvcKsr&cC5 zn$XS2Y_z(%`an{)dmG85Rd8p8NoyA3fqi(CmY>Xsd!&2dNEls}eoOAlR_Q8$M>qc*No+F^>8IxkWyyCKc3{^z-KEa4!M*9y2y!23_&@fxyrGqL#Xl zR`ssn2Xf!MJyBosT@#GBJ+xD`xV?xhW1_v4Bf?^l^8pEF>X{-n==2dZ>;uVil)zj>U^l0oNqVq6)FUlGzJF+f@ zF0Et{0PMmAlC?I3O0)pyNM-fY^Od($YK#V2n%M!3{Cuj#CI*$py3G|XFZ*PNiK2lV6bslY4nnVT zaLsL*HLvYo^~7mwcB%8ZjV;kG^Bu@NPyF!<2cj*Fy=LwBgq4m-GLS5*)0goXpcml^L3|SP}GU5}lDeXd4K$9d3u2BnF5%t*pmD!trw1@Di&$=zU znk*ziqM>H(i4`K0shb_PCY0|pC%W4Eg-LW}j{oO|aHv|q-elu)zdP%=MP$M)#MgS8 zu`Ro8$3I~A_fX!q!#v*C(BQ0Q;BYu}tKMiKtEUBMwiEi$X<5;r&lmco3)5fx_@13% zm)AG=>W0ey_x1n*J^QKou97v=r9Rb+WDC~)i7;X}kOaT)?u6*jVGRGWnCyhIQ^paxGbxl6qBUL1o58$bG|;2nZyKx!I5nrbFH3 z;4Yo!Kju4bZ!P^E5bz2{9Ak@(dRg8Ur;X!;gQObm{Hp4Ad!xw$LLWa&&J+emJko&SnS z-N(f16Rp{PpDdNb#W>3J=Q)|%ZTz!crv zp-?x4om85O(Lfl#Swv19&o(qe;=`J$Vu$ZaAkKhyX{%glM>+IXe!;8St%hyY?;MeP98j@(?4KN^bKJ%?_Iep#3377Z| z0ZG`psVLu2kMcW{#Oo^?k60*fN{Jw`Cv3#AN7G^4T&H_#DDOhxFbHhaIgA_p54%AH0Ibj3zVRl5zl!X%RW1pYA!Wu)*oq4#{6JeVQG7tKb$m4$NDIqTj&Fw>Q z(!;eWLe`J{&kH;=-3&VAGn_p?g*+_(b2zJh_7H5%{#QTYzun;3pHu&NA$0zIE+_oN z5~K`N>OaCi$o)bjwTmkN>U$`2GHleStMb{(CZ{|J7izF{S~n1Ndr&KNUtrW=$OV4H ziEN~Id|eAu!Th_C-u|%Ra(RFMxg62+RjYp!+?YD!r}5Np5ik@wTxy)Q(L8N=NDZT( z^y@VQRoHEVh60zcwGSiiAcm)zPY%YB<2IH6P@-pTFK3yynYVvGaMS%Q@pkIoWGqFd z@HzVGMuoF3TV?TYurv08El+XAB5G&X5{gEcT~td$V(I0@YdYXCEqe zta8I6%x3U6d~VgsGz-k4Xat2YrA7(O#IzzU5$J62vzP4I8BMVm$0fri+GKd!P_*WM zre_2I$86o0t$v8;=5kQR`T+tNE!4wouimOq)5dFy8pqTv?n4c#GpKyK+RQ_$@NCmI07f7?_0`sI4j;Ri^-P^a9aP1qe>$2vto z>i{DS3Y50XgtszTn~UIg6{hdEmsG_nt$R3r0=;+CjG2+G7^LKc-G~IQ6;sDte zQKNH08j;+qrLiCs5Pw~m6#m*RaR1R%&~jnnQOi zEpYzNn)n5O2Zky@^FqCyAu9W~Q&h`5qKvX!_a!E6Ge$EdExkHz3_a|?QHC4Ej00~@ z$FjDr#yeU2c_O6+r_G#xau4uW3I|RDAB3}gxn+=_xLTGz$G#9r$}vGqJ|!K-fN%sCz9 zYQ#HVe=gRRNv_~RP=qANHP#d~*MP!W35dU8$63%QD-XTJW*;h3?w!bMC+>z6j*xtT z)-J^FD9~t?ydTm>EQ|Qe4=}#CI0zj~nEfuJpGceb(iHRI+VV_959(@0)=7Iz0U!;U zB6#ejs_4#)yFe$1o$G+=_Nv;sd%w-?u<^+v(8P!NBu?WkGW7fPLO&O!)^o~?+!0w& zydA1)Rq6nmh9nyakR{TVSmMtW)5JLD>M3z@`sgc%gOt}-EbbjnI87#6rkk%~RL&O{ z^zi{i{NN3pxYFMM$>WRG>7`qmJM1L7^n+Wfp_}__{C#Du#Xk|D(kr3TRp0+IUS*Fx zq-mDqDLX2{_B!h zJraQXJ^li96tahW)67?#Hn>C1hv>{Cz=oDg`lPHBcQC%j#5SMlntibZ3UNCR{jg1V zgO&ri1LzkF71>OwV*yYg_+cVn`}k7q6e4Z>~?S82D%Dtgx}g;!}i)7KS|f>>MuBy}HigK^7QKlf|(+K2+^^LkM#Yomsd)LyI6;I2J_ zF0UXm0~~jWZbSp`6~Uep;69=`)n22}bgyd}Ua>!p`c=Bh_qGQo0-zH=T#dNjWd{Q~ zXMSMmKf?H*LhK$JXkEJmUc+H+^{<0H7e|G5s|MBX=;u%R>@oKt1<4!`N5M^+s!JPf znY!eMo-?S7k`O9DwS${Scv=EdV6#}T)<$bo+M_lOlt(L7Ao|+Ii}Wm%mjj6@0KQUSvC|}VOj*I=BldCG2VdcyLAB#j^bfp5RA?7B{yyl z^9EN|#2}uJF$i9jTeLna13D(4@3m{Jugyd+WYF~ivv2Ns*|Fso+#v+7+E z5wEY@icBKg2k8hyHdzGo*kjN>x{T5V@gNz=*puMhnRr5yNBKXNY+Kl4lW>)>$7D^e zJc^*{H9hq7-#sL$Kh0`LY^yuDydQeX{po5X-xz}C5~5H0IqO5Vc=7U&_4;gCC5`cCo>-}AnPR2S-jW8h@W&zn2xk2WCioanH7dn2 z0{ycOChzCUnX8;FeU6mQA?np06%waFrd$|7O`IQ0GR@#zGd zNYXm^Oh=}x+too-VCe^!T70-Lr3EDXHIgj0o)Sb0XgK}19nT;zp2T3$`nFGLQ#~ht z`T^S>hTF@L)I}@>>CJ9g4=={}39oVR`FlwR7**IGxidYl@&r>~Po=D*TN-?5gd__N z^tK*R6_{lQOD2W6za8x+)9T#r^OWwY61fK@m)t74syH07ljLTnYF31j>Osb22fw8J z4qZDm5aqOUQu{_Jud8jgpC@1=!9G!z$ZDN%G*C~1%1K+*tZ?A3NzI)Vd7{s(9HpmHy%^LQW_L30_$_DIksr}| zCDtE-l9c*YhZm0ao#Z06o!FkE}K zd<}ze_BSdAPVUUh5&)p~Zl+c?k+L4Coa4V%L!ug)5EHs{2bz*}Nf>*zv~sN4x?;I5 z-D5H%m^wyc|_-46Jk=lL!G!gi;Khx#id-Y=yx3AQ|wtbU+ zh#hOnMma^1 z$L)S|Z{ZeUW|X&`p1uJs|0D6GILf_AWK8)|^;Uqis+M+(9ShE~Z?e0i4G25sc-a`4 zK*v~1+YugRu5%QSCnXiR-@|yXU+>y`<3$6Q>OFT7?^xZY?@*YnzJeVNcPrH86#ia+ zlo)(5b=4c3pJOOVwF~F6rnH%6xhHTtF>Au*Y` zhU^hah7U>_`ouL0p3>9)Nqo&)`F|*T#~{&yG+Xo(PuaF@yH44*ZQHhOyXur}+qP}n zuexV?-kpwlH{L|#-m(8?Fcp~vs;l&9PrAT^iphaEk+Up?J(LdTUtNbV_MQ2h&apUf(vSV zE~)cXj2JGXcx^Ka$IfZ>`LGC6#e{Dy+H#5_mzuyGI9R_=E-@1}`Ad-sjuj)AYmb6Z zq-$~{j+P1U!g$4>`(x@kb>rRJ`}jO)VBZ5SW-o)a`JuxZEH|m9Z?yh4*rA50f(z0@ zU{Z|?n&*(p5*lD#Z3S~mVABI8y51Jla^DtSeZiL_8lf*hl@KrL20F2c4I^6*X_#y* zTZo+wra9@D8(^5N1LXoItz`zTCP!#6fl7uiFbCT9_8!7pV_dyL0DY+dL8Y!zN;TNn zvQKHojyXCHL~dxK&wC|skjn)PCXX4^7C2TmvXT%S{9BcXh^tUk^kswue;1lFnxP`? zg>%({B^iLq-w3cg@9&9Xtq?vk4ze9`K+k$b{O%lZ!mU+A&cz?)iLvh<>%TYxhXn`% zsm3W2)k)^>MI29=bP8iFUwL7yBU7y1Q(Dflth8K`d$edHkJLO;>@O8P%Bq843pJH4haqL~2i%ESE$? zNVK~Z^5Vigl0+PK)uIr*o$CtPWc#OVL}FPDSijHpT%Pq)n-8Qq$YpNdtS&6|&Y3N> zB7xF&;mRcKl1|_3o7~V?$80yI6oW~vS-JCW$G|fUaSeeCWQ~CoK;GNQRP4gHb~k@d zbK2S}(s)27H=_sKz;9R%83dfVRo}pWBkhGuas&^`NQO<)7IqDh+@#myZ^Z8}48BD) z{-q-AKVRfp0g*9+VpeeL(&(xG>w8?C#`T_BVuX?nS#nP#(TnqU>@tsHV=N?h=Mev=>S+|2T`ArEKp~d^mQ|AyQf6*s6DN9g)RGDwe6W)Q@PU= zsxh-R`A{td{8SNu^mX@$-bRB3z*vFwFup|8>v?CsIF{(znKNebxHdDWqEvgYz81cV zxL_{ten@th;JhTfLf?*Ab;3Z=m?_VM*?S&IEelbNy17k>pT8w4=o{sVIk249$Qm;3 z*6-eQ$FRL_FbR26ey>>#h?tgamR@t~!1$y!9wmm}wNeVX3nF-WMg)+!D}3lkoNtNX z@lnaJUI4OR>K@2@`@^I*TtT_#mkMJ`kIJq?4xd2}bJ~Cod$2H}&2O$Q2p=Pz9?B4S z3V0&e{RDs%nMd}a8e@(_WRk-Uxe6~h3y|MAZ+PNBkVCc6bbuM7vJ)I1Nrbi_fc9lJ zz%hoilOO>kA=>8p7?Q%a_0zX_D3Iy=6I|^;^4u=U+-0*6VGL5`6$s1E^jepOnSWqR z_)w0gATZpnqX{m?z0S-^$H0&*5-0!>ZX)%)>Ph&je~o)y&6l!9E~J*Jq7Nobrsq1o zjN2Nszkxq3yQDNV(T5699Ss#oPcvY09juz*$u+wIZ5zl23DyKe-~Jk*feF;D~LV zU)hr4ev4<-pJK|{C62F|o8_A1yg%F!QsL+e_@JjQ5mK6#zre@w=)mciI_Jm=Jx{nG zEi)(fhLV-9a$@YBWu3=*qU@f)BaZ0UJ}80PtT<0f_2`&V6vkt8TIfObeoEt3ryUgY z$Pv7Kq@XyucG?1)b4XPeamd@#XoX|ZhF#JcJ7MD0e#8C&16biiiKA}{P?fuG2=EuK zgvx^lone|(;ON!x~A2B@d!S?oqLw8I)7b5tE?XQ*q#!9wFAW$!Mync z=r~8eEV?-r<_&T3`4Gf-(hI}BTg_S>M)!CuYlxDjiLWK!K^1)IKLqi zK}p~cR5pSWT&ix+@uYuVX-m$!u=gj#H~2@iL~nCwE4cC{f@y`lwSQ%oBj1w*s$tfj z)8B+<{>_uW_c$#cftO%(RqRkD)HM*Cy^iqtpC_wUs-}4&)gYI;L-!-#;lKtx3u5qj$$3J)yEL za%c$|x`B(@M|Ve|y><1Q+7Efa^6uNGJ$0*_fjlTg9hKQG#BdLzDX?*P^-Sw~K!cuf zX|2%SxqF51itLifDPBIg*Ocy-q{J+wv1JJiREf=Nbc< z4|r^lP8MulaaoNYiB&^Dcb^~&gYugEd4=tjil5l2fq^^#3($av?Wy+hpv4W!8OU%E zMQD@1?`x|)jIB^MkHxD>lXubbjDBo5Lh5|y#HZsh)BX<5mZ{GMZRVbciYN$@rT2Rz zW0qF`Dh-?HRqqafk>&vG*Fp+FMHVtNjhUFK$5#VEFmR&@A3Ror$jZuz?>|6d`~Gh9 zw$V#6@@JJ;l{{I+I8Fs2*5LH|Mz!2ZR@&97kVR^i@N}B+@~uV7QNcjDJN334(_&;h z1&TaD!^fW`MYu<&o+`1T$e~8vb$~G-$!Uxvyz+oyjFb5kv{mXQ1^n$&weP#`XJWTi zSRGH9shEv0Eau5GGEZX=etgpr);ciY=*~Gj^T>2EN`6B5a?yo1=!3n<B?zSvHePJ2`BEM7r%MUS^(<0BRPBu%Y^0>(>2+ z`1MKN6{14)!1w|C4RzvkTdxD~-;==gzs6PJekvVdKZ#({|Ie(Eh>`U_$lx~rMf;43 z|7YcH_}hi@oFyXix7`j95QJr}WaOK$f#y*YlOtvoqV|pdhSQ{SDRt{Poj3OOi@H=K zeegeAq_Wqk&?Q7`%%<796C9?#x3=Doud5|~P1mFhX&EYr>X8f0)(v=2TuHd%O$wMP zD;_9T&|jP^iLf@jJ9$?}8mDF3&p&5>J!!k zJ9nRPXg3oG;xTb08@10N)lSx6T(+=n+1bf!ZyU@&<&<9B_V%FD84m4dWGM{8petLa z=I?2QQjGHb@jE(Y@EwZc$*Z0;u@p_r#wN*`b zo=~0Inus`z9eI>%nZeIo&}XU^%T-Ahs}~7J$ww@g8`(P|&pR@!kx)*H3ybPwG>=Zh zc?o9{Te%legg=ZdHdDj>iOH6M^fnNM~d+?_f^Y^+W1wyAl0c3le|)M z4LaP>%Jym8*%^w`!k0bKNw5fSV|hi0;*=@M~#tD@GR^vy&V12wNcqyrixnOGxT zBD-K{Xv=D@R=WGCSTN@sM{g1P`5o7yUQT*M{q(WwX4l&Yjf5xrvbKA)qRDhQ{a;P_hcnHIY-Ur_y=w+T_u9LZd2^5tTWC zF0($hKK%_Fkxg_RR(2I41Jo>JzI+!E@${n|pP4Jv3u2?j+xjl!6(gRZpB1fnWG6W?v{Xw(H*4jBJ&vM$ zQv_vaSZ}TetU}chRF1MF+ejDXw8{nF@`yW@=0qhU3$;!@#vrYHK~w1QJzBd;B5+ST9>U#lX6d0 zKHr+_kaoUVevld7VJ}P(ci5~Si($p;@f1IkPYJ z3G=NOCO1d80xyaZ7bwd@{;hA zzercUc44JzwjKiL9IXe@uqN-x0cM}`9oppyQ+ZUJOyVk2kgt|ay?v^&Ur^f48iRHH z_!Hdg#KeY^R?l!)r+3a)Wgm|_zq2oh=*!<(=XeEcI{K_zeX3B!je)KM+E^5Ux;zqNdWPaQ>%VolFax`iF|24CjCLT=veE1A>V9m23>T&)Lc z04uCU3Ely@&-~xdjY8TT2$G-QTk5Cc@ITU*|EJ$MNkzjEaT$5jKn!QnG_n`Z&#K3n z;tvXF@5z&vTLP*ON1gZAvwUfx5EBqlwn%?BTBCOpl{CnQymeuV3vqexR@*eBu6y zz^%Ck+%#t^krEiK`FO;9B*438B{zbhZ8|WyAIm>NL|yrWVbu_e)u$^Fq#)0UcrxX# z!X4c-BZqihd4!Ra9NB}cAg{^cGAg|fM4liAJ)8KbNeBPz|BCgsmK74}_ z6x#B-SWa5w=Pnccy%eP{wf>8Qh;EekqN8^tpwv}qgPA3{v!h1!C~3)tQ{yBMQ)yZz zh+^!`B7Q7vcdq7!t~)*SAY-xq#b22kWa-wog{O%XuhkeU z(P`PNacI*01Sc{gO z6R!feX(}@c%SR$bX6?1HIQ2iGDRJT2B>P12_B2V&bNu?%murJwk+u~C@l$Tf`Yi`n zr#xjn+cF{$=?P61+?p7?R&InxX2bFDG8t21^Yw!*45zfws~{j+$P%p`qKT8Gq*qfL zIPfR&<+_KRJjj}1)G&|U{;%J_CZiMKmE}zyL-Y|pUdeUI2cS50o00xrwRw3QoRP|d zPmas*yC|4*4G1?C`Kkc}3<$K&sTF#)IJP*!QN_W4KOiq#hKOl@tbj~b(M3p-!%0${mCWHZKB5e>qFJ$C zs+CUFo<(6nu84t-qNyEBG79o8Q6y8(rk+OVZ%Q0XcT{3KTrrcFqFfPeBz{akJc6ZO ziP)xrLP5uCr?c%cpSbZy z$C+C>(Pudh%7-jEC2V26RD{W+C83a!ITY7k;#3;6^+gGn+{!^-c1439!&Jp*<}V=zO?|-=|P450IkM!#nNHW-D9#aennuq zfx}_31)Vq;TTrb!Fx;RXoT6vUY1Z7mheo!+c%j}x2=*CXp_vC#l*IgDmjm{#_uWtf zF{U!xYQ(bhZS_`!yFhn@Y-6s3rI;c> zt>00L@qK)G#QRSjPF00JBU^TL&&k*9U0schQ7B^m^>134*{f{Iz?JB>(9(AV;=1IlpX#X$w8Kwd9kK zL>|mbf@ZICwH0)U2hg0!a$a~1rE*(xSyE7!FK_IK&#J~G>7V3LZxC9ao#^Y;oT`=N zzZ(v2TUGNo=Rn8MYOt7iKP|6ri7d9UHT=VO@r5bhzXR47&tHx&=6u3g~$`?wb4a<7pRGcs62xxD$5_~Be3)igZnYoV* zIV5EYj~(Y3aT0eo$wIzu8Rcn_$n6_Y_=$xr;H&pHzBCsDVF=do4dWTF3VqO4wUPew>lVyFcc^2(D%@1RnB|z8n9IC;!+y~Ts z(i;;)?0x50NV7jcmG7h+yd!D(3c#&ACHe2y+JQ0BXqXj?sI=t*DLXtM@A7K7Id=RWVSl5BdkHBcN+}~wixQU*lodDF-C^~mP2uz3kuypip(&|l-?@P! zb4JNI5pdDGBtf{;e7u_MyV{Vt*U?U7H|T;fxt ztqCtwlzJdvQ!*dUOOl*lk;5Y(Vec_$gXG4q_lX;zm)oK5pYKUN_{G5M-x1JYe^sxa z%SGQlAuI%hUUKNNcgAxuw_qrYJOWRV!Iy*rIawz1tlHFzqY!2s?CY}-;5*X-9?I`G zDa(m+$0^eKgST|u{EM`*=Elef{A}qef)?2=EA-Uxxit;cIwHxm<5Ht*zGv)_6>R=F zXy*<9;}K63)mH}kI0g>{SG}n0^Pg`>l!PpAem@8*0*I%uEk;v}&2Wu{Vl4IN)v$y_ zd)Re}^D3x@EE7%Xz2$dOOl4C{;ix(Dj22O_c@XI<*%u&Wi4Pdmg)Y@Dt{(7ba*r3t z3TN(XG^9^8iB*{A1Ue&TEzi0lYRwekf&4}#PfJ`?jiG(I1)h`(7 zv*OnsBr6~j1i8&k@38I7v(Yp%!7uI-fGoA)HWrHa0^DH>PV!J7N}H*qqW!IZC@pBQ55=fVG< zVhMR8OTB+ICjCp1$SG0^27nJPNQeFbZNPGHs83yF50qaYA3=IX*vNbCH2DhOjv4Z| zE!k{nk$c&*{OI%9ZO10S#4Iu#AU1zpvq{SB-v!B&Yt{0$24nTI2m+{NlKG_+l5Jb9 zm4ZL~Xvkx&Nd)AFYs2!MP zff+Yn@BST+e_zr~VG_OapPa+ePj}`2ebWD0+kdnS|7(5!p*r`|S@~@h&tfH(r(8im z78+{y3Kp7!gl!C$wbaLl2D6nMYZid77oXT0fJ*y?xYc3+MvgS&%!_`(v^h;+fk z*#7=>`QG`w{;d79e2vQsurbsRrFddSBsY;8n1*Eh+ZM!u;z)^#j!4CF83vag>0ynB zcQ3eqv#l2Stx1DcC7cs#EWO!;9%|8YdCd~%Lj8+n+etK`+Kt4;xy2^Jt3t6a7@N*? znmQz;>6oW}lY32BbNOx=3kMh_E7=_S>b)B3dghL>yywM&cM&{Oor)1^rMFn=SZ}<$ z0#!$_E!=`*vWJI2YYzFCDDgo*CwtUptueYE8YdlEzfvVa3G`?#ml@f;5{eEgVO+sY zPZSF*H(V&VH~n~Nxg$V88lF3+cg7=@%}ldOdC-Z$_;-hjecQ~yQXXIKh4RQ`3g!nL zjezmE#ar>?07}M%&PPWJc#?gV{V|AR0yL$is|SX>>RhSXRcTB4F3wfLJTf$F%|!0A z1_knfNgMBZwzZ75{2)aCoZ93np-hC)Z0p>-6~uy%2-H^upBJB19CUhn-t?sHXDZ3x zHtnc4L94~_aAxG-xNlHBw98%xC%-RAi6M_&seBr5K(L=~W8LgFl zn2Na}6kGZsn&D@uXp=6!uazKg>^#Uq#B>yO(G)s_!H&q;m1)fhzA4I0q%B9;*ma~K zIvfFNq7j6VX-TBSYf+*bn7?;8;)!6b{qyT13SndkP8rfTh%G2t$Mj+Yg`#cJ@&4rT zh_UwN5B|Htf|eMp1)8C4u(PR=hCl%^^YG9WM<;nKw#;e8*15DPZ~rF6qw7D_UG@X} z+5a8v|5N%YDp5k=r&oUXyICxu0k{;o91Ma$Uma(6HzB_tYERNa4<8EBC535TSfH=a zueW-qFA9^j&9-0IWFaah2)76J(A663pU4-JzUJ=!cH2C9&dKTiwb2*Aq=-gNHex~c zpbM8^CaI@B7#XexYBYMAd4n%7RmyezzTw!a!pPd!b89{*?;XLXeOt~V;4^r8@z|MV z#kq2@PE%<}*)_OHjl;I*(m@vDRMMNv+o*-Z(e*rV`}d*#z|Fa4?@qYCc$2?qDqyGR z^m>i&V3987b6BJI_LV>&W?r?~f{SY|CGw-*wDLghO1poxgn1D0-4H3#Gi9u>Jm^a< z4GL%=iy|ApwO^pTNm5%M(M|q3%3iz04au^98_5YgXASInzN;RFFX2O)sU_YI|)MEDWCH?x{mZ*rFNjkc4_xwrmr=htu<_n z1mx^Xuv7#wKbjaiT`g?ri6uM8g~ z9aHT-7C$Vh?||+d^eN~9*!`p|_V*102lwO1(o6(YC;AjV=DorLiU;_s0p@F4kR;RS zeTi6EmZt*=yDua?J7U6}3K36pt&N6i>*$d!y>W?w@|gJLNjr+7d438VQUOc+0k|Ql zh{GN1h*(Dm1ulOyM67xukDxe+xNnC6u^#lkctg&hQlzYPQuUcJw*vq#dEEjX^p z3Y1Cr8Blk1?N$sAO4P%e78jOjTcF!@r$ib~(AGBg(+JzMc~}e7IeZ?*Yg{#&$7P3> z^Cqiyk~XVHT{d%8dFsd6&wo#@(zQzuQDYv5)q87O5d>lmtJ9cquvSwZKJITSF3_C& zo1ac3?FP8PLAbYzjOsGLHVT-~-@@KW-GX0WU?bbY)q=PFo-ZVCW@$+qYsKW1y4*Ob zww-9S6nwaao+^SmQ*E`-I7|DjVT^9Y_C9YSp!qP zHwc9*KKgxg=%HP}oye!{($Noe`Fatt2ji?E&;8osFVKQK z1fCn2$=JoJgXa~qu|rLOT^o6(HmnfKzV`p{Bw3AtglET&yCWUoX{ZgMuGlbiXhEx0 zfTuJna(?iDq+p(vK#P<`w3`F2LnLH>r95EM7D(FulMKVZH!wFnBku=@cR3Ot8*EG5 zF}+6-JlY&9EE;bYb-<>dMhw46RwY7nTyRC!UnVfH-3anQ>6icsO)kX>F*HIHMQk1T zw%>xj#Q)G4vt&w!I{QwBUK~&a^{IpUgZVk8nFO(qf5SX(>||}_59a6pcbI4T7v?AB z|7)X1`qxI!1L&`Vgl7E9L>&@RPqn4JFpUKRfdm?gCjxc{^kiuViP~py_pwc={nb9|{fn}Hz5ZjS+&c3uh9#iD0VJ5F zy-QEY%{HI*wDovbeU>@vv(4*|ML6Zt=*W31Op=%iWJiJl!vp;D^e*~dI~Sb=g)y4@ zShj}Nt@AV6s5?(UUUQh^)(=%HEHl9vajOcDnhD6kG|?1pr4!xu{*Yrk;w;OyvR_y0 zK&)|Df_?W$zDCMso3)a(>E@ItDH|cAP`OI(vV#NLeiOo6n-~PTp1h6f8C_o@f}E>I z069k9HS!1i^FQF%9Ok&Uru69gMS+55Oxu=aWzMDmr=mh(Qz=(CbsgwF_aER#F1t0+ z`vJdUlnRAK$OaSvE`{s2&m%hM+QEW2ehmTExd-ExK*<4_8I6mA zf8>7$eCGcI{y(H-`H;Sh+b^4n{#42e$x;wlj~8upDnb|Q+d3FO-dtTvRduNt=# zzo>iwb(!YN2ueL{=IjN~cD>}^PB@bn>GqS_Cwgp`wkg&3!g-jhZZKQjt{}BHP=o2Qxka#IfMywsD_yZw z*E?_8a#agI&G^(IvbDw=oG@Z$2nV%OfB9y6@2*1RTQXxF;jHrWXD7tZ&YKvLY=zQ2 zZ9-oqmnSxAOQNEu<_a-ftX0C$LBSt}bE#+mh;LObvI{j}2UlznhV4(a z>dPe~QoHncFqc3|t*SSGEp#8=-IYUYKM%ug)@4Yqx*Cp2edBveEAtm?vci2)12*9v9X^pc}Z?1Y4JCRL@rTA?T${9Oy(-woBmS zJvU|E&`;&aaFJQtmzuTB&^v7y-zcbtWOthnk>L|-U&4TOVkn*Ofq)|XF49N4gg3Ur z!SIT`n>7`J1JN>`L&+of!;qPVWAIxSS4;)2z$KK+7x$#ZWn#V`t%p7S0c5WuCI_E< zI-Jb5F2i_Bk=h~_e+lib07BFR%QT+@*5$V6CXI3gK5`YiFk zIdBkr&`giiiq&C$FeH-(V>7WM?OD=*D)hUxq_$BA3S3t4OvuY(u>4`wv$T*3+WM@y z35uPs!)8*hZ(N|b!X4|rjclK3$MiVix?)%RyZ)8PI8 z&{6(ZR7>=)kN=kuo20Cvf}w)+6};YKsK~#lj5u$~3`l}sYKECNBp5XaXXRgueQ7lS zKLo>w0$SdA*jfFF>SdlQQFkV;l6|l6M%dOiJs~f4cin?g)=Zj%ig9`ZMpZ%{)`VxwxBl z0wr=$UvGleKHMbAq5UhJ-UYc4q`FupT2zyI30SwFtBtc&+z>H~I|{DDj>z)0Q4Y-! z+W-saF~r5VdJ;Q5JaH!mRebIQ{r8EN6{>V&28nCt*A(S`(0P&jA3W& zNScunZJ~bFHQXqMo0>+-I0d2eidUPaeCidnaX3-s*Nz1H?$$as+V0&}E zlu7fn)DF4gn-oJvoddqHtBO3-qo}!TNf->pHo^A3Owf|#6a&lw9a+i0R*n20%bE#0 zdtM)g8JAYn^fso8@!9JNcUQVPL5fJF?cfTTW1G8MVqc(|)N{9L3~jqt019m!c6^l; zWFV^zY2=3wfNkI4TlwcqjTG7<^bnKF?$c@lu41*Bo z>@9=AM89eJaY)#q8kzUtB#)U(;*8asG#AQ?9836W93j{X)Tbz66_c~75Fe$6$o9NE z@$~&S#g>H>Yp3@)&yuEAgoy0^{I;sFPx5wQ5B$Ejcd}3!%^-le<&MD>{0Pl){h*o) za@82nnwVvtUnMCJgEwedV)vFGcyYT?Ajm)Nm5B(=`Ym%igx>Y;2)>Sx$^DkH<_MvM z;Wjx_*3>z>w6CY)d&YayJ%nZ5%u@e|A==nYl6qdjD%G!?z1iqS)^Dyu)V9sG zVaLv&VbgXswtnu+Oa(O$X&t!L?AHpgMEz^jPDB_R-N7((^ak(Ty$%k(4|)T5vziGL zf0TzGI-Af{r}vq2l5InOBe_cSg@^?EgNbYDFWl|04&Dufckpv&pNv|&D%sLka&|O! zR3tt)trq|fk6kOkmm8)74lcv8-EiN;3tJ(bMXSi~F*`DWu$$P62;fgz`1lS)_EA*c ze2E; zeB}1zGd+hK!O?a?;T_|wwvExdI}$I%HqfZ#_skE6L_W|!?BufjveJ7NXj6;3Zd;b+ zeD=62La5$($J?b=Itl<^#S5sTLYy+^FAMQGdWi>K_8T#@H(UInUmLR~G?K|p7cPba zahuVSh+hSNz$h(oV7?&#`$2whQxX9M{_B_Yk5Lxx|7{EVkCVGm)lC(18UC9ZLETJ^ zE;-78Ump^+25!Y%9hDSR!C{47H5CoM9j)YwfVxK5^l)*ES$bbv?E1aRqfiR=SyUs( zS4wKGU26Va{QA9X{rm0Fg7f-P9Fkv_-R#q=!gxD_^*&>Na^fw{M=R@h4YW_njBf)h zGuTs`!8ltwy`dKm?H@-sND#>b1nm zevx*^L_`YX9kO8(^MGd(zR`}rml=6Qmm6@ zqw1M2CT6bJ`eQQtgwj;qe2dpwB@vV0R8F8SznPPUB^Sk493dHcLQiZbOV9LVP1`nU zM@^k;>^4|Akrg2=T~t@%!b{&Xv5e(#TBtuX1TIz69EU|fWO^F-)6f^**gG^re;9}$ zO*UF<`#nO>Y>Yq@gM9VzS!!lFO277;&QYXEQfWV@PBK?R56*TM=+&7&9{okjRi6(k z#?2F3Z>(`+m$#w)q6)t@j&+&HC4bepl zx8j_C@E`@A=C_g#d};Wksdms@2OK2E=VL5lelbGEiGsZIZp-zz>BeKvL(?FF^Q{xe zsz1^qqp;*=0e#}3+E=AycO*@rq2+KTgLOX)I_E6SJ{L`R3(-DbXJ>n zv6O(=W7$y)j!gC4&VEu$(^!ZbI_$0CVX>Q(bP zovMn}c8N5|+!{GETty5k=-j?L2|R4+Frzqu?pYH%K(a8*^3pF_G)}qv9Jx&IDZmFvbeUnAvxxEI zIclbCr1dWlCZv?a3Va2RqJax#Nu$^VOi1m>HY%W`)C?8FTy>6O5+#qNH{?!d^9ikw zEmqKaC!XO1nB%8;C-(S-3g_Dp|SJ=p;!fz4{tXA^{lW=wyHbtgXLjLM`V$@kZeUy&}TvysWp`q&biBI5i$WkOhM{q9kX$SR6 ze&~T4*2W>RLe{nH5HK`2#7XjM1eWAyfm&GV_b-raJUQRRgD8hKDr_1P%nP)VUe)dl zoMKAI9Fs-Pl|+q(&j2ikk2UfdwJoLf*?iBLsC(I#b$0mNVhY#>AhHLHdf#MHg+v5( zfdx)-awGhYk!PTjDS+0LJF~m`nM-3A)IFlYEo4KM%_X|dEv3UW4#{;EN%dpAF(XtI zYmRAqt?FV{^Am-wGBEYDY;DAoz2~Ct@ZAMr1*Ppod(RACzP$B!F4QWo5I6`mqkdJy zGAD@UZ26MW1IXqsW2Fd$-WU&KBw4KRK1HH}j$Q-_2sbDEF)M%-(yc0DLx|>WQ+t+L z7~zL7ck(YSEYwMqIaZo&>qSHea+8Xq!#;$#s75iq(3JOi$z2(kCmP*I<*S8!3+61A zGo((Gv&5!^;YTp}UKIiWN#|hi``07RXD-k%p3Q#dUx^s&`kPMuFnCp_^vW!_Zb0dL+Y!2y(WVk zcedWoZoH!GP3e!BhpN_K>#w7@v?uo6WGd~t(S8xSj z4Pil%ZS~>fNpuj}S&+_#jn22P0&B|eFEm9Hu@oW^GYtQ5SfN8o&*>fNl2y2;0-TIy z8CQ`LMM@LD3pXgmb=ghREXNhOrJ5s=qahPi3szrQ*%Gy;O>RfALh8C!r`JiGE3E5a8DUry?uLqmTy~S6FXdlB)_xY8P@pnT7j-t2U zbWLupqo|EZ@epP@{}tlL-}u@I(&YOSjd5x+YW>e&K0H>08*Jd(j6XPd$B}T{(pP$* z&4?{Pia;^&BIF<~;E19(i3|49Ge5(UlrG#TUnwnho(kQnW6O+v*tE%jq%}d4^D7Kt z7gzC>N`#o)l0*I}X=704*Wy>YnI$*StQU&)vXEt4IGruL3{OtGf%1a2V3F*!P)fdF zaSS+Wv;#_jI20u#4u#M|^;|@W{NnbJ-Z#8PK(aNibbJ=-1`kGrPZ+xTH{q8*Zyv^U zZGZ>tC`Z)sT#k?zTFLjn)qJX=^i0owE>gtEzkU(?PZuc_d$WI8>ioxDs#C?wQNaZL zdy{0cQ_MO8P82_YuZMn4_5=;LExA{md}a+YD}zaDt%uEolQZ#|=X&}6EBpAh zbNc>hEQ3}XxCYqAp0D?l=3$R8|FzT<2W~PzngzBsZ(k1`H{sR}HM2#hH}=JwU(#D% zGzVd>1ld#A6Ibp=4J5}w?)q5hTW;2qk12-yCEuTxoGo`}Tz^{yiHl}G7cK|xJjYRw z1g7-{sq61fjXxnobogyG`zjapeyYETCfM#dM0CWh338_F4K!(2m<@RBcCy`En2_sK z(DhC`L$=EEfWB7#OkB>r0vU;C!tyD2 zG?PRreSAA&dRa3m2pD)}USG5-)ht@r(>ahjQKd)*Cr8u0j>xB=S7n)B1K$jMf;s3z zp;%>n@AoAWzJ*z=tcqHB;NOOtrU3p#e=D8H-14S+n{kL)GYla2r3KJAgoOrS)-ZwP z^8m+~qCFTuFjM*hc0@m2VzO_i4Y2pS+ zyFaIl@$^;2WRGHCiDXf!{F8q(<3bO;nA;kdlO*O;n3>qynk6JOcv5DG_d(E^)(Zxo z;DjVd3ZR7}KcPgtN&(lN-oi;%B=s&$b=~Lan-DtGsZeGo^HaiD5!z^0Aj0nn+2=)> zoH5ku=i;B-{Zk4Gs&1pV`+TqF5!kaF7QG($B!QbEJp)eV0H<)9-Q2)|?xs+oRB^Ot zJMz_r(1SVj2NKfL#%&$$E>)ci5~@rZVzw?Xml@s5eA>6|zO4~W7Oz+sR%`!=i*{x# zsl#=sb5xQV6b)@XPhoQM>D>>&`xUPdB#9a#TL%!z4BsI!7?Y{AGz#h~-6C+-bKD0S z7c(3}Kpx-f6E^~QNl`H7rcEI34e3n#A;5H$$F-{YkdGIH@tfHS$+t=>>KKGl9#e~5 zLJmqJU-xTKbcN_4n+P%bJ5XpNO}bsk-+F28RNaaaNp?ohPV`=45?E!l2V7#dv4SdT z4oFaV|56a;RlFs%bx@)!+#6mQ^jz!N+NcNz237nJ%xn>3!W=9t=V>V_sxP5NfU;8( zh#gm9(0}aB7JUzH?7e!bY7%vkSSC47%&|}2O}$?l6d5GeRIWu{54`0~xOG5Qxq&Qh z%$6$GXj^xkDg+ma_3stcV2yc$b1qd4eilz8&?1yiRsTa52D9D{nYcz?h< z^gp{G3*G4qUd)mp)%2;<*1Or!9Zs){djw7Y5NDDyR6lJtUW5ly^@E9VQwnQ72|e88tT1 zEXcJgOOaFAP*=AV^vB)paWXf2a=3p4L1zprnV?=f*BVT#HzrQw-!_bt(1AmeY|S3M zVJg}gRr91uS;(2E!<%=`j+ry+jG2q$;_N}oJq?XOaAUJ_%}O4IoTSbS1Zl$m)fE8Y z4KvXlb&7&Q82qJ>O@f0ltC*e*8$w~23?XpIpVHl@PtQXsliZTMP$`*irCFJ_wx*xL z-W=kV>rvtqfF9Rk4PkZpn51uk3f_$6+KU;zsf>|qyDYV)GwPCETe}3jJ!fbkIgNqN zt=!S33+k z_yM>Ph5W%$i|~BFs*-jgm4BttUmq;DcmOqx6I?lpY^xbemF3HbWee56bpZ4GZ;0Gx zVeoX1U9>{WucEReu1t;TzC}gM(#O^id}E83TN*53Y9m;qoD`{QtF+;3@C8tFHV*Qg z2agM$9Gg3^;tPtjo8Ow=F_nFST7RfBz*=bagzcQod$f2o&NGsE=g*(txQFTHuekwP zLxA*Cw>1V_Mkupdo;W<_SIZMZ==)vD(RgFhG=LiWQ;|z=J%r)7B37s8%~;0E#3W5P zQ5md7sE_neeEv)dph?XBS*H2JiJ}igI2XWK6UCeDtV_={WW%e1rTZYw;9Y{&PU-kH z4mD}9@yOAC%L;mcd-07~8M$Cy_G3eUXi>StJ(i$7h}EEr+(DpdI%wV#zg8n0m^`O0 z&LW{Mg0z^tz&ENVcFs=8Xz6q!h$^5>=@wcKNuz#4a;|~8(Xg*W<)TG2F8_giU7{%9 z=rd$282ZQH_L2k;)(R->;03pMY^AT8<*QqG+}!YC?UI3~hu6dV@4>WKs>*IWNjG1k zwA?ksgA@XlpTXI~&0xWe6lrZ+UlZv11N?-UDlX2wS5{5<3%1}D$!IGJxm+N)oZ;z_}ul4YQbj~jv)e4;6OAvk; zYEnnr`N^GX zD`L{7h60IPp>Yx%+}#=3k&LdNnapf$A@YD8^uE8*$M6epfO#U1V|OX1h^8G3XLvq9 zp4IIz^lwhDPv3?1Vgp5hHZ zVh;m|U%62zROA0psLe@V)V**%guJ$GfXk-0gR8i6U|`u5C!rywq!cS=J(;}8NV?mhchF#^spq;QA!W%4BWk+O z4zLp1`$+ufh#DDE%KF(zYDtvGrRb8I5i*}UjSw<^hb&M z3GvSGj&-zsckgUa^gg)7Yz&67gfoawXn4@g;VwD?6&sb7gGSp?vtx%1-^gtqD><=% z$iZkrzfHfjF`DD9pqmVra;KcpQqSy*{_3T)8Aq89KZc&}N~<2XpJ(erCQ|xxTj9X5rFo-QM1~S>Vx- zl>hMTvz?MA$&lB3<==7L`R#G-qhsT_Utpq*mml;ID$IK$OW0X3yX`(AeBDx(@FgAW zV)T+H>~oEu0{-=zb(8=5qzHG`1LHFxqiHtA=oXRnlkqd+_qdPKhVT(CY|@;};nX|B zhh%V$2i-?Ti23O@rR|9ZR7uAJzSlvKmm>yH!Vq!v z{1Pthq+Z65^Nzvqnfm#`^dTDB_Eo}?&YqsO4DnXQCQ>uaqdj|AnIi~T>K$_a9d6sH zB>SP?Xr23e6kr1i%-Ki9Mivz&j2dj+rLlRCwbUs5z8G(+NO7YOx{5 z5U$x&0SpW7t&xLu zjM%5FpnZ}`*q^(K<|+zI7&Z{(s-`ocJMkuD$sK-I{+(j&HXZTN~P5RPQ3n|@z5+Du0B=F|e z5x@x-Fi25 z7cy|p85DN{g*6xuW9^{J3>#bhU#0z^8CRoC?1j*!!Lf!e$w!DAffg&OFzfU)ajS9k zICPF|zi}@!oP{92ktVk`Ft#^$X&tpj_s~PwNwf~qTHD`)g>?M+fVUj%qq3!0~|&h zI1?mp3?F0SaR}#B^}+YWGupq73Ii4Tkp^oWfg@S@2Q!L+byW&*++bk09Ca8H`@(98 zM9%5{@Rc>Z^7x5p5_>iW^h=vOEcL+Wi^C zlDZPUc`gq5I_)%v|K{b0Lo|wuVg@(OPMUe9D8w; z$_#M0C*XlW-5z_9W=xhCLG+P!8E}An4WG8i1tHXugw&Gg6nh*#|D&>@&RvOFFY$2C zFUQQSl~TWfCeEpx@cEGV5UgSqCERxA*~X1BVQbnLv@b>EP^Bht8k*!5%ihy_#;M+G z0xaBtNx81k9!QovT}n8bnZDdZ5faL!N6#d~ngElJC0LQDP)6X1HTGoN&X@vum*`xs z#5dv`IR#J943_j1)~OWAOv9-oN0FnZBz?rf$s-KMG?NQH#HlB$khA8PHG0FTM;isJ z3ufD{RQ3zd6o~$^gf|DbfN$A@$Id@gjU*|kFQXTnOEn|$($#%_B z+q+~8+hun|-J(a+kBI(|>g1(&3-sDOYfjr!3uc&$XK3z9M;PuZ$Do1?X9|S~Wyfp} z^aU9TrrH_r?HYyj{rG501Nk<6{pTq^JCRWw0)*L;DE$8qpr zAn??FmCYZHd;)Ek zUlO}T4`u15e`2#q5ddy^Yo5&BU32)Rbjo3ljM10JV@{1`mIG(=j<}3}zRB7L?Xxhh z2U7YR-4pm^yGHT|dAmT}5*DYQF5#(d&Rs%yhC^uh!aEe_-jv?3l8Ln=`Meu%kYzU- zVf2zD91$hCdB-$;p|()GvjH1JMZL~oiTvB-ZR_ey4USr9rI0#iBi_w}x#5nxvW5*sGLUm}Y?s7VOkfNllOHE@g{IXS>1)c!y8bC3uBbrSNS`UQp*-lkVeID)!52tyL(hQXowAds(Q*aX(1!bU2`dCzDSr`G?$G@@J9VGJwu}b{c8m8joxr1}%rj{T7uMF4HO-;pB6`+DmW3@s3i3(4H?t-F zg-j{vApGSa1S0LKdECJQ9ZiiVHPPO?!FB_mb~A+*yP?GifKt#%F4bcy zZD})v2{ic_LIl>qa=pT146Q^c%_FIU&26XH>?XF!A z6guCcU2hS7JWK+u9IY}@#Kb7!W6X=cH`C5L7q`Rn$DsJ+$5QcfxtYDjcl_^Ph=2lY z4ao>IEo_O`jgMN(=kuCKvHAAm=!1Vli)?1aMNuU0HIC0VCX6f#N8hfx*lh5<(!r>2U~pzWSRKn&lx%*0+UOVOR~jylI)*fG}w+<2|Fer>#gWK zQdWGU-eAQe`&4i|E}QMt5R(|%3~Bi!_xOq>I_44JT=MONzh9hj3A0E9p)ul=K9O5G z{_0GmtmlS*1K}nMVy%Mg^5ieFuzYRfU(nY2%mMBLsm8*8=#^r+ zx-tR$w^R^EUdgB5HM>**$>V{HLUb)r)CM`wNQojGl=etTvS zhfpp~2IXF%^=Tovt{|uUI5xWcwQi#2M9_ z9XTgBB_ENXClAYQYb5yiFxD-g%cF9*oct#n;`7XO*j!)X<2I^~tOAch_~=cUl4bjQ&oBS$sUYpP(n*tSQ7G`G{5_2q{UjZI`f=|ENH|l#i4eGiYh%&k2m$rX(ruCLH zyY)A&B1U^%NacIu<*WFov-L%1o$mfwN6J!-?|DXEv1q)W_+>#Y}WqH+At=Dks{eC6_~EgiWXaz60QR8u33YWa+R^n9pily}00By}I6 z>h39gkJOxg@u0xq`6nFxMg3Xo_zZ!mUkW$$GTx%S>YL>U`qOcr*|=um#n~jRosE1c zbgtjsdt|{bA+~QR_idfkINIKO86)k(fLTQL$1ftoSJ)2W)|pF|3}E(+-H8@nv5WI5 zRMrNj>3sp-^GfR~yg69hNoTEe z%oJ^GH8H@L3RLC@Fjokg^9f9il%lqJediG~?;s?~-gki6*??p4%DW9?yFOCfoIAOX&Ul8WD-H7i*%`4q zukz;biD8=;F!{hT(80SLDtbnleY}UEvh?e2GjUdahaw0roXOA z_W|;quzW?Yw2a8yv0X)+CWhf$mge55Fiiy2>8NL=_(C90Xa8Q8Tsc$N5IYXCi}ISI z;*yAkEBD@{pggD~k&-&?1hCfvIdP!Bm+>5Q{LG41(s`e)&LWA`D;Ac<6PhbB;+v~7;*-<)1jT9s7eiZ&lXhb23gXvmqF2cU zUdU?#hcH}+X7;ai<6;~X;LieW?5(2r# z$_*Kg_%2?9P1tss;9a;ha$TX<%$D_wlp_j{^?YA1)m zZLAMzSBck-g_{`Q?!(4yhWG=Mk%1hM%)%M_7oB%B5jjsZg@xt=ogOkrzPC{}iLi5& zGS5__V_MdQKf9nl=ifWwT`B))=8P{5hwf)s^pbh!mr*i#Wx?} zzj{JAeDanRinvIV6hy`pN2V!ql7LSPexQ}CSj|xPC?XtJ)??*9&S9^&;n#Q6=kvKr z>bY(8!wra)Pp0?-S%A8>#Fc(3&HLn)&d-U>*UT=|&=zxz6n--D13j0eZiunA z_byz1iX?!h9W3~n>iB}4BJ)M_O66n)@FVFVw6V)xt+9ZQ7FEvS6m-stR%K=FnX@>R z%)EN`;Uu_3v+{q!u-B&Evt7*B^80z-KWD*sY{-!PCfSAW6O{c85c5}Jc-`(tdyU7v z8=4P@HXy1e@H`_2iW;>vLA&^OQ?p_jVoiDZTC+-Dbpj zdBJeA3ZtM0v~Q==T$jWB_YP?OIMubb1f8}ZZEKN&E50j>=A1a@b;0MlR9rzKEY%Hf zH$tBc#Z5%^Vy-+LDQIrpQr0tC>LO{<4RQ&~C8tPIep?cATeq+;a((xaXt9OFa*B&@ zS_V}us~)cVb@Xeq>`1?+$;Kr1#-w(ZdWM&c@~JLX%Bq!+sojE}hFc%hsm}W+kLT8* zQOF?waost$=o&QHbJ_V|{~+V@h4ld@}(pG zr`-@&zl__XBr}z5X3*#{Zdq2@5+iP7mj@r;I9z<*_fTZf-(cvz{J({NziIwSCv#M! zFe~}tg>U_^Vu}9q6!hO}TZ;c#=K9Yr&*XmzOr^!RWaVV}KN3eW;;Mj#zXK3DA=|72 zX+U-FgQV6Pj4USB4!zF`^Z7^k0w5WA*N~2)4}skaVa(l>iVy}H|M@$2_qy$5H|P2J zwDtxBurxlh5GKVy?7$(az&Az(SnG(spLohRZW2C4|3e5W(jINsMzYuGXCD&Q^f2}G zY{|bVsmi?KmAm6a8InwUMIK^wo^!B5#;QHdi0!f7Lz zjXh-`ek!#=zUtBnt+LcYjcGo?(P>&`UOV2#mL|_FfXoX=jvuMdw zFcJrWN^5m3cZ2RBQ-;5fH>gk8f8%x%oI?_0Z>-aB={;W`KNtO07BRILic-{7ESYg^ zeHS-G=Y0_#aG)x!(fH1z0Sig)&Mp#s;B%3>S`He5y&ZtU!!t%_qq zVX2zr2+1vAkQ}(2G*m6&youQt*OhD1uT9pYL5|y&{&GqxcuE(Od|C34Sokogp#2`b z2Uln=3i{#}fFOwUhuiCi^&RfZKVkYC(o2X<>m%&GF$Qj6^n zLjDPTCUik~CiDR@+u%Rv82kAJ;V^7|m18b~21+l?8%@CpQj=bg=9Zv62#M^7BA1_} zRv(7h5r*b2@eNt`5c);4d%)uXj&}Y)B)IiLksPG$JM&GjFlkJ~bUQAlY!U+ukFo5~*yzl=@mdiWZ8JRda**X44 zA-6;W%3XN`<=YIeqbn;5UQk_~z?4XUO+Z4J8WlMNo}eChTGTu+oGmNM*r3U+$1AN& zzvjI*PIEqRB~sOB#j*gNAGx7rRkNl>(q~oEVx{s$p$4TP!t8W+$2vE-?xUB>%k*@b z3`To2+uqU43GVc;sH_gbfh1hx^sGvEM<&}^y8}(_lRif8 z-9SnEZVZ-v5g6M6818m7o~aQMtC#*hDz8ITFbyuUJ{4X^3>)7mblX#k8+%tZ-|lAw zl$+%G>3~i<7Z%?L58@^d`KS+~@96-$_3_p1oG2jN{iy)k%%5yFxQ9~%+C#MNjoSg( zuSBcOiHB>)f*~0IS0`jI?e(cLJO`xTKHVP4OuVBp8*?e#x1+zf2Q++72rk#WUnO{7 zv37kT-TbHyEFk)f$jwzUQ7JnS7zj13Q!!OO=zy&B<6THr3nLj=2IA4QYpazflSGL% zaifW&Eue4Zi%Ck+C3H;eR~>CNRdAvzJWZ4(h~FzAdQ@{TtW}(SVmRddRb#|bSQ6By2%UaAbw~;TGTe;-;yMn zh;kd)jL1RyXj+0UEY%}}3FVb(bk_reGbd1VvX#FvLl;o{QWaCk`Qqm6iK17<3yT8_ z>g%RAnI|yjqK64bokM^3=VyzBu)?Ph-6zEjQJqpbOo`lzwrDJ^;IADKYq1NaEn1g%qBmMNqpPOOJrJAu8XJ(Q_C8ZQg@3d zef%Rtq|#uoECnC_QMpjbBJo}n&36ETL9jfb?jOQ9D^wHx)SxU%V#*MGzU#r{tfMj1 zmKRN#O=W~@XK7y>jAqgu9K`q?oo3P{!RM&1!0VsHF|o;Sq-z0gIx(QtwHi2O25D<`IkR&x2e z?*;iddtjZ(7hDcwXE~xFN2W>hqbW!sqtP@i6PS}EwUjBM)5P=)H{+BkqeQMqV7s(=CKG_E3aS z6%4eI7l+n+yJZWetfHDB3G`7|GeL!|%b9V@fLZ%{VNtU9W|~wvDRv&h06NWTl7o)-k&Kme*FccACC6Zep-BRiVqG^GmgbTU?aGg$F~o5RTIG#Pd3VCUC?% z6i;vcg}$I?zPe>%rEhbNZ=z^B;|a~KV@`M>0vgzTvDBp4>1)g!jtObRr_Yyn;5Y81 zL$0nReuKGd@|T5us=&4Dj_48F%BAvkq?k~{nY30TZ8 zZA&IAoJmSc18Ye|b=aA>X=N-Q(HwVh&XcT!%Dl)vh`r*FfqL|WB1lJ;i@7DvpoPNV zROz*l`u%WN=o|~Jeu5zji@jtsj3S|iW0%jo-iC?bSfL`ZTwVp!&_|ven|_WAEcqFD z?Ye7x+x?|sNmwm*NK76QXs|oqYOMF}6Jm1C6?4VqV_uk*S$I-lU$t2pd}x^-oD8Ew z;z6(=455y~9vY?h*Hg*^We%kdSeJk zwL6SZ>2jR^Z#U8OCj6wf0NZB&_NDLoCkEo{rKJ>O<#`YF)E$TQAvyia%4<5K+X;g? zHjBU4e%aW!GytOYm#maBZ(o0)0qX+mbjf#j^tborn(^z?REaN9F`_wBzl`mVdj=f9Lt?goy&iN;7 zi!OVGPBemfzd3aQ*&mSf@RD*gC)U~!Q(6Gvk%i)US`uh9!2(``#vL#?$6u`t)@0dQ z`|G_x8@<=C<4AXBQX7uD^JfKikUrghTt0FOZx(ZLG{<-o243+>$1ZpLwqT($DL#%0 zV2`Dz^U+gjO=v?CSi)Q09L%|UvyDt?(dd(i-HmV{FS=!G7xu_EqvO5Nx?~KVTMzVi z@Enm$*CXbXl3WL}2)If1@mE_73&96e{SH~J`ju-IFKjj=6khbKee#~rV{2PIvpnRG zD_q_i1)gRJ`Ayxn-(6_FUGRNyBiPwnX+2nE>FJvcxmQc(P&Lrt&Kz417}84t_8J6khP~&k2qUl*P5mripC3Do;fY}87`_x zDiL@G7lW}EGcdQZakuhH$&(a4#F8UTb9sbY;7!nwd0gFZ5m5*v5s`ZW_fdZy@iDBL zxlnOe*Ahv9CSvqcXG!E%M#eScn!j5jzUXkNtG&fucImUkXG}MH+w`GrOJe>K8C2#8 z5j81dm9m3mlOweUWm{9Yr6MphF6*XjGM3&xKr7|&p6OXaiY{lgN>Tv8ukSdgDz*!KL3!L}NwODgSkpO10i2vpNaC`GJb4km+~~Q5 zO|~29o(8_vym2W*YPSNB+mED9=wB_*7%>Ip>QUqj*y<|Hl0A8qr=Y(P2vi!(1?=T< zJ6tZ5X)72Yb2>?C)i)@8oF&4T&?B<;!(*ozjv$^XOy*7NqKb50J{DZ^( z2lrU>xa~IN*gR}#H-O>>eSj_XUgB0;W*L~ z*wgR^4trUTlaSj?Q821mR4|u0)pms9b_nFQOxY|?dwAuRD0g&LeNH)!dq_K+5!}=O z5w*Wh^{>wjMQnW<_rM*|KDT{|Rv5X48~4ncVWw;RTob!}4p;EGEp7J!R&2V>(S4FF zVeDp^(r@H$>#E_ZR;Y)}8_sSEFL=8R*nOL=D|X|#0j^i>ZZo`~x0|DffIraQ=6SMz zFy7aBq4L535z8`N(r1-*flQR^r-Vh4=tF8yUS?wyU0GJlaWsg3Fs>c_%CS*cIXF14 zYrKdIudco#NCbHmK7t70-sB%uk{$)_BKzJ_!&dWC_Sm0465=!{XWEiMFxJ0cuX z$QyIPa8tuO;U!V@QfLOLoIK%?;Y&jTWcvyuz7Erd7&vKCHB+0wSBFbt*9HWtt-|Ss zYl~<3yPSw_5&L>_WHfkP$a^T@Wr(AIoVKU_f6>l#fe%ncd?9?M&bxchvGHn8-XFEw zf3Y38T;AnRw{UFl0sIq#eAom8zWktUf}h?~%Ks12RHvUbH-C+ zbcthyWP{T9LGB>BarNV%ktv;6d$~ZG`IhpuIc83iqCPaU@6Hr!qRmdo=I4C)D8Aq;*}t8*^J+ z%m}nr5<^f)bg4Ht0vpU}FG3P6pxIbtlYxK&W=~}SwLqp)oD}^j9MOFAGg%@87nhn$@dnXj;DBm$L z)$#x}g>SLEr)u|B4iabH-USBYy1T=c0J*TBdL>X?M8>R}qAvU@rX1#E+I6c)hiGcFv&y_zHXa(F<%|UBQl!I5pX)nY?pj+7 z-Qk9t&sdbCmUIc%eJM!`J^Bm78n+0OFV?%90uSFj-*Wxh?}?Pes_pu zB9+}oh8Lff07h)Ggdjdefg&~??JhqN(oKp7@_NB2*4m_G&?FJ85)R8f!-}_%yB~T~ zBF0;LCs~)nhHxiWiH;B37l3(?i)ZPRvOJ_KUTvsB*cZm(Z-muvhn#oJye5Zn_`*om zl56PcvBBX=Kc{r9HB$5PdO;`ZsJwZ{a{Rm%Ll4J(UB>1lFN1Mj?C_zZxpKccsS>TL z^{g94tpjHn^y>1gX2XUih!c(9Az0z7zf0RY-{FdzXH`Gx^#kHzxgLK|K2F~~Mt>h( zTlfUu5^Sj34$J_PzVhiLhfC>f!RAdbLIU0Z7`U=;Pb(rUmx{Y%8tuy`WjBrM<$Nw<_& zq%-0l9YA+U^)QyR#v#_j_61 zZ9Un~y9b1qdN=`Y#o#y3S`I(nZn(R%A+T=uWGTMLvG!@B?BDo!w`X~HiTCYrDev{c zaqs(M#tgHEMToqoL%!bK=x>Ke-tL1^+0~^<*%D1WchOA-tvnU74$meOW?~yDLaS z@6^L!5|7|QX*{rnIWVQNN-fu$#qo?L*?*2&qQpxC}XX;8^Jomf$j8ynzN7jF`d;xFV`wJo_S=l&(T~lIct<; zfo*KH&tH8AtZM8=!HznYiK76!8-5f{G-D`nvE(k5aO{KTPBXHcm}1npCq;{1CoII0 zVt_q}RXr+Po7Sw zYY%2eQ?*kbad6Z9RF~QcMMl{=7Psuf3p`__4={C&*Hgx1SiBl#(wzLH2HQG9l%8B` zkc$|5gVKz*gU(F6(e{!ae7^Wc;T{@;@h03s_);H`zLZDj-XG^45`*=|(MRbhQd9Lg z6Us0msGY%lMKp583?cebAF#eu2k(r(a=ZSRGAC?_#H>{Gg?fhlc&;gijeFTzZhpL; z<&hwM_rq)ZkuzLte^T(HIJ8ts#F(5=zZ}H9Bk7ppH$E35UrwdIC*99M(*&$3MQ}?} zmMDF|(%fxYb<;~af{G-H22#2TwlFQrK*7Pc$aH?C-VY+#v+9DnWP%~{)I>%XGb%N% zgG(w31j%C^m6ctf~`r zBR#ZqS;%MyV4~GjS_gO$Wtv2#j_E9LP4CH5byOKQ0NPAsG-fp0%*@BzD+Z7>)Oqb`$b4%6*F<5c*OzSFbB( zKc=PI>7|YrS+fxt_3-E&lI2 zW8}8Ax>LWA9OxaVh%<6z$})5}&bxx)FM-|4N4M8%vS7@no=DVZ;`?cS`(s*oNY4O3XE^G7Qmq&`B1P@_PoQv$uwP>c2;7bWrx&?Bb{=E?VN2TWSt zG`S(9ZJrp&+Ft(hJ&7?;3fCiYO7RO+MK7X?YN8UE;(OI+$fi~^WYd|=yY!I?UB^Hu zvEYU$Dx2m+hbaZkDGm9NoXqDV&O|lm7q_{<&Uv#cVD%V3S^G{Ga z(;-LGF$kvt6W#9N6Ykk5kYFzmVzQ<<%n`nh_(;nP(BN+}hqTyaYH3FT(1*6};5-3H z?o1*hyxk$+S>rr4!+vlIy4l1THug!5$P()(srGfiB7!BslxpjNYc3I z+eO6L!jx3klaO6`8fo`F^f(7t!?63k;efU2G|42gP_K}zovUU>m}P44rdRN#_Z&eE zV(qf-@Z>amEKzLQf&(JJv<0t%tJ~5m=A;FGoI{WRziW_X@+Vcq58+prl}xPi!5^T! z+BeM_=59zb%pCkr`wsHnxhn7dji!TwU*LB`-2Q7PL0!H*?*CLwp8L7+|2GD7vHkJz zp%XT6Hc)mnFtRfFPcI*?7rQlHP*6}LP+M0}S65J3QPA4`uf@V+<+kZ;Wl_*yX~T)b z@rA?3*UN-G)x(X#qM*KO-N)Hqhtu88d&`N@U&yYYw4$ID(Zhv>l79*pyStYOeTZc! zL!+U&bMY0l^w5nBp4)JgTFs z2QWhcVZ|Q<7}DlripB%}g8eT5ck|7r;q)h{tY80bTJQMZgZsBYMGcI8tcyJUAfuBIG+0jITeA(ze(T4)l!v zDc$34X8P>)4^Tf1VhB-)k)Zs&AhIk(&loonm8lAc;NM>AksXa%s|tvvsW1$#wgr)t zsFkD>xF+21%0(QqxtiH&X~@}B!^J9{tr{#ToLIS%Mkdx8QasZ zQAmb^P8r_qAtPJbb@;@-Ob4S^Mx>AK`T8k5A}s79xUVTNYLfaDrPI)DP*vCgJH zcKy>jWj}$M>je*^S<8J|!)tPCy?5{ji8e%&Y(je-=~n^V*U(#?`*<9Q+ipiKW#anwa9t}oCb4tLfSozlQ3lA4`tRSsW+l)Nhjna33mIC>2RkFuy`S)Ct zPhGdQw?K1#Y!!E2QwC}|5vJ+kbskIZbBd^q)Gd@D$aq3lI===+i!25{Y54g)`kXPV1ab zS@9b=Lo9V8^}lzo7?w#|k~gCu4bpK0V4ZBwtdjG}cFmNFlZE5tr&)XXJ|%E5KT|v^ zPa)%*N(g{-(r1N4(}m)o0hBDy3pwI1 zRAB3$Tiy;VQs{_OteUVN$Qqku?Qm^rL-f0e5>_1l0q9*JT*Igz1Y?+hD=Wjp&U8=u z@#Jc34gmKkA+`$?y&9A9{Jg!)NNCI_^!F3~z46?c!m$W+i~u{g=cth{e#hr%5+^Qw zvtul;+mSpj6E#d|T^Jp@Y7R%*<9ccYVWU%pTsRMOHv48=M|2NFkOfEB7foA|`ui(0 z#I|~Hz)kQ#H5#ZwBfhlRusy5DiwJ&UM<7jHPX_qwU_?4(8WptA$fimK3|)FQEc4m-K1|}sdJ>rTbyC?$FzuU|HslMBlj>myK7pi|a{7%TlQO}aDfN3- z9vr0>jwFL{t;WhaRj4KQVpgeP>5!@UneoT1tnp}@jSC~lj=bk&Nrp27?vTgn-WXAvhxzbp?Nt@Bj}P#4Q?aAA@~h2Pxi8$%BTXQRzf zAtas9SRn&^Yf@uEp|^fhgZVecnbjI0UKXxPG*)N}R?NcG3v;0djfw%bq9Lfi&Hx%? zrdb=ZH&Yvvy>h9CKsxP~%BsD|oRjSnWMc&TX{zTt1bIQ)lNGD9vLF{)4PN$;Pxr{n zv#8oLU0q<-l)6G)BGoFIGZeBY807x~zb5+dU+_$lQH<45Y7P5w)p&z(Zn!84E9IM_ zNaj#l%Ks$}RULw{J}g!J1pQ~Gz4D066#rz{@z4FgE$;r;>StnV;9~9kZ;RglVjzV` zb3qRNq+HR$FnRi{B8U~*02Vq3RKq+}w8PAp2~bzW{t7j>e;6X0x+eQj_t-se-P;j_ zenc&|_?&K!1V-l4F`pDy8OQm7$yE%J@Bop2HXz+n1S!HvFtXt#cWK&!`{mgoL z7k$g>V))**wE(&`mP5(9rUit85*hN9^0i!uH09vCoD}n~f{?rBok1U5&R8C29MD_A zvw~505WIf|iqpDB00RmD0R2Nk;ree=>c0mn>}X(Nt7u~9Xl&y6W7p(jqHN$~^&j&s zM^(!WNd@^^Hp_U?*kZfEG~VCud-#6TZAu~N z8wAg8QnLzrOrr23dyd%s@@$->rX$4zVGhmXu z>QM{|U!}pGlKmTcW{@7mTCsT0xO_+-#U8%sXh5RCCib#^<#-ctjaI8?7^Z4j6J~DfLcQt0ffi9hD_ILLRv~Pon9+Ozax$G_1$c;MS~3|C6}NP!gEBcB$Riau1?My` z48|FL)5Ii7B+7^JnW?kMFDYSODrEFk?b&yDr%#eqhUS?f4jW@cOHO%mC07iG9SEwgcNvqeQmyQt?W zcmfKQK5Hz4u}-}Bsh`l5uZL@*Tr~*HIzrA?!P_gh-z!7w%a6!1SD^#uWRW;6%Z+e3 z{?X|!+YgLf!CvNx++4 zz&%gRTh0NTbks>TftuA7*eTaaV=%e>Z(?i9G zy0`tPm5U96Zz@*mRLH%f?`uVoybui3t{pqEsXE`?_l2xt1x(_7F-wT)bLP+-{T`tT zl#Z!F?4aWxRU#s?IJDVPI3WV4Wx44L@=Pl;7@mzoIA1P<^?UD3WHUauKjsxKH1HpV zw@<%+8;`>{ZE=x8YoXuX^DeE_=p=B|1M zMkhr+zE0q=RUSV#`VB1P#Gl3zxO-Lx$Y_s3>cw){L@LRlGo(5g9^bH8zwK&m z*x!Wx9VT%V9q1UM>MF}Q*(qK-e`$AYc)>%BJ~Y_#aZ$)1>^E>o6w_za>;R3@5bo*) zyoda3sK^{jvIV$T;0P?E5`eJ$H@xyTvSc8yzPig9(++Tos}qQULt1}?UGM?d*QVgPL zy9Thjc$SPcN#O5+NdLf*IQu{zL92Z`3D!v;sQ_pj;Sj}%Zs9-dlBE_Xk5X0 z`7hpnnEFjPyuuNkYKBp^O9-giYEys!VeaomtjXm7C{OR1<0)T&$I`w zGHK+J7_~NC%-C2qzcGN*^lEEfQT8gYsaZjlX9X6%F{x^K^Vyi%ecP#A>1AWupu#E) z#oO~9``mr=d)pez<$1vOe=(Fmx3ZTX#2UIk&?nJ+AjP#!yg%J{+SNlR=niq)n;hCA z?&g8UxZ5A$BI%}zrtHo_3v++X1>cQ&dw4X3XbWb50~=~aKe~An%Rf}bwN1JY07pFv z@_r@2<<*vP=be<>85tTId}qHsb>kh-zKht`^-hH29q|hL7)5}1W4JUjU?$XxaKNtX z4-Q5$$Q}lg_Nv4L%sWlv8*1aFI&dJ~VI; z*^Wn-mv&;dHB~2ips~zLl-SZj*1BL#InK9aO< zRIk6CI+xI5at2fGD$vRxSaI$Tf$_;IwOR#m%Gv6FWyo0EEYy&+%XAy@4VV`tY}5kz zY8d4K3~(h9sjQGR@9bn^TAmt2XyA7l6sNr4`6;1qZb7dDVQv)rl1@OxTG!t=8L|$W zA_pD-!>k!yniZ4I*_%Hx&RwW{viO80EdFjT9%KWzuTI)fCGN2zcFp{VlI@h+PYmKp zHV|Wn*dt~s_pD48NI8+5FW9QApt4|8&wDfUjrt;#5Z&bzycK2~=6vIddp*hLSfHQA{E}4$nj*szwZI_6EV@9 zXI)Yj)zHM?s#h8{S*NgHeUeI=zVVNyS*J9@YK3Ix`P)6})<<)Ms4Y4BSonzM^d1|1 zBR>94+ezXrH~RJfgpqax`8d;UxJAk|ue!u6jqwNici4MEG~N*e2H$`j$fYT`69(Ur z77(|2duX9ulfinb4doD*vRA*jh6=$Kcck9@nFL1PaGmiNW?#aC)|c$yoypfPzvKt9 z&^NOm>ZYE_7t9XhcPs(M-+db#A;fPA`+;WzCJ_b_0W~%t5UXoF?r8D%w&=h6eSm0* zPNWm$Q{Cm3x~+sQmKp;39athGajn%T&$`TYM>;)g%QWgS$t<^BKs_P*F9hLTXemzVfx0msWjOhyc!4XKY- zJ5)&w9~T^rDbBx%+4Zjv4~1~Ysp4`B1=+Da8GV6{xz~$p=-qz z5^@XdY^IQX%$=+Jonowr(>hcrDZKNzD1lR)E~KG@fh!x=s)6kb7~{;S1G8I0b$VWV zfDPE8*Ru7ze0z{Q2vXp%si^0juyj#P+>vCcNGs=3(VAxZCU;4p1}Hv>K$4^2rVyB$ zC6rnRzs=r$V~~E*R2}MKMPYH5yWqRjZ2m!*IaTU~GLvgfi6-MGIg>MjW7bI{6BE#v z7jk$zRBBe89VF)>8n7wY(t>r-d`Uj*kQW$!huLE75)P#hsy^p*XAWxSwNqNIno^KD zA*s|YGm3YLa$#>|ZTYLaF&avCXveNW-7L*vlaZVA{4J^+RUMbAMf0MwA?W&=1Gu7c z{D08(PC>SH+qQ1nwr$(aoN3#(X4;r(+qP}nwr$&-H`iJFM67@9b7MS=m+{bJM6_OO ztEJTP?QLY)%HYoo-@6tt`obh)fWYJt818? z&4f1PT(|aaxKca=P^Kc$K4rhzoV=is`XyRg6sM0TsajH9MUsZK)t4ed8+U@Sj~nKd z7F|>OYAlFF3bJO~VPR8cNanDl5K4P?@z9{~ZD^jWG zE3!^2Z#|=PuzKBx+aj0qZLvSM=!lnfRRBBE)6Uj%#?BNd=E^3SMo_=&Bq`Q?cyuZ) z$vLz-eR&>=l^?Ld`pT}^lbw1~y!d5?%kq+!u)HWf$J1-0Z|jR*MNh3QhVJ0`>w)4* zfnMUkR&y(wVCs*z%mIQ7`Q8O)$X^VgH;dTfylgD-gHoPgyE!T$>NfIptdUBiRGEnK zY%<2^Q^rY)c6`VBQojAp?vUXId5Ht6epF#ptU8Dai)OGB;%Jvj(#DHg^pja%0T;%( z9=vy`%aJ6!ZQ8-95tKpxS_dFU@6`-n;>%1M0T6T!uj7tJJpAk~5H&<{zlsS57})`P zWP9r@zXK`z-UB!AF^t0B9}0_+2C_Owd0L2^VeW-9Z}6;3CUt-M&(Ar}g8Oy{yXhX_4$@Q7RC@a6BiGXjX zs*1K)f_XAe5G;$!^)odxoqx-o$2Ryn3bv{o_RG+F@+%&H+nC&7V?1B$9Z`Bq=)#)C z2v%E-6SgPN+xBfCrZjRfxM?e?S=T$|K(l{72)>}@=e&#fMW zUF02W4+9JfYSt$?Ja&81E4$!o?^Ob;6(Q zVXTklH6OGFRE8R|lKC5**6gjRo_Z^wIYMyI{g^#z zr@RgmY9^A}pWt{IO4?Z?B8GWRI^bEwB-NKasUF4-4ShPdIemalw3He)0i|aJMA-f)5q}TXYk_u zBTkV1_W_Tm?oyi`^7ftjWR+D-tb#$ z?S-@?Ww5P5&p{#0HDBQG^DTTOU-4dFTbOIB>H-@#7XioHoQ}^A+$XwUXJ6E6fINTr z$!bv#>@!0T!a%YImF#(NN5(+1{jz&?rAcN3IyX_z+nD!LrSn4-aZGui_mqK342nw6 zHO5ZIL(oI$Ra8)Jlgtm$&E4p)2@ z*uHdt;+$3({Z3<*7(lQbiftlQi^*VKLir3Z{p@sGqR*-+cPa);^_Y15Ivh#2oXzTO zhkn)pJq5aP;GS6!2vzir+Q$=IwTZb8R})O@_RqV|R=JC|+9kFcBmMty^x>+C|W2MMbb~rt}GdKXY=#srvy-- zda3rEfFONjPhgvr?Id&x1_G@MX`}8m3W>>dP)vb*kIx3uBe+(enC>?NH5zex7?ffe zi@k@Dx??=(++SZpNfamGvH{1HUZb5Yu48pBqLWG2LshnEH2U3&TI^WO8|HJhN@lwRx2o zc+=t%d~|0%jvMXgDY2VRG?1KagG}B!!|8V;fV!#;5Hq?JM%_redWW2tn)g1KF?W`1 zYA#!ShFwIR%YG3|kd2<2@ai!!QCA_}wR_LAh8LfNm@D3rqUsH+DSM&nDcNHfQQ`Ho zp<)_?rjJOX?wA!f0o4@*agG_Ic-u4n2^Dgs<4k_WeMnv3CrFz;H)W z$R>z`^&Fsdg>kz#LTzB5Ub7otyiq^w1%?oDU~|nSlQ7GU!Cwib^a?c0KTJ7bRHZ8j zHu6n4%-O4cGW&PtB0OtZRaNJqqe;ssmZ}+@J(M(I?kiAZUnicq*H^HP zEU$vJV3|Pq6zl`h)vK}#XBjM;L%M9-V>U=TSO;^YB8hth`;L+}>-dW9?mdp9bsqt! zZ=Yb>vcjc}@Hp;bL)%u#xo#O!Eu+Hn+p#o_)*B13ZF_muqRM#A?{dauCk9Zqb{DH+9*Jci0UELF&eKVA-yHr zmfW}bRMnSzj~?#0W{52S>xhzi*SC{DGnppDQ`WK?>hxE>;@}KP+dvF>C?1{GPN~)y zXuS#a^i!YJ>uc@8C9=hdns4*8MTXQdBgmGfiSfv;hjlX7?{};W|BW;M75^uGmF~%Abf3WlOm&kHWwJbVpwoY+?A4p$=(l*=<#D& znE&)#i5wkq8;odX5=xwR9ERabjT^-w9CXTTZlg^)|slm znCbZN-)BY^ch9xzB)t%)2`pxB$d+a9-6V>*165f~P+xt-?}A-Wwyo?R7ry72>b9ag z$7n$mcaUfHxR>pTR&vEI)*y5Ku2L-n!?oa~R z_%zL=aBZPSH!4+~i~c4vv~lqpn*D-&`t>OpUf(eGIO(#KIuBIM{FOYB8DcDVhGi7) z5Y;yencFhYiw$#(p1|P)H9K^M2xZsbQch@Aq!aCbiYTACkmjV!x!_5WXnZds?jCrS zV`dD5%T3T+TLw+-;a4;^H;_6aqYxVh-}C?S!)KpWRFr93D1KWVjXNCCI8;>Fp;dW(+)eJNns%GtpI?R=ltZ?8OG@10C1N+jJiPN^|fn+T5XhfgkS@ zZhNU!6Jey;UzLk_AN06H!97K{xJ25h`@y*VaYG5%rwAT|ygxr=9o6dh1F)0J&rSxv zgSZ+rZ%Sx>Z|ms($*{945x-UOqjbeDWbc#?#o=8Sj%|+k3cLVtoTn@t_Bs9?TGmK8lQu*=?9Vgh$TmSjQTRFg#?W=glkR)PJp2okuTRz1iER0w^#hX$m zaht!yKs2$ojty{QI4kB1Q{ARnXbJ`z8P6tqPDtVgZ?kVAPbNq#2PA=3obycbn^pRn zFHo$^io*O@1^i4q{%OpB#9B(b*h=y6?)oCsRgX7h>0x$Q$;Q#Vhb<=h?t1i@(|Wp& zlY1u508eYUt?M`3nU!|Obe>E-J&OW+hL{hWr+E2kTWx5)%i6bF_%wf1(`+3ia;0ka zYC1`MI7hIOAj4E1Kg<(jb%uJI@T(nbAW8ny(~V()fAmy@`%=Ep_ShTI;Zf=$M-nYe zRWm_?fOq)`lE;qFy&*%YElf4ryDJZdX$*3iJ`pJ09u@O~f7!Ws!`MMC%*oC_g|X}^ z07hwW0Dud40055v?#rZq8(CS{n*FB&sAyvFBZ)cwpKNvq$AA6cf2NozR@OL6s3U(} z^%}UdS531x`Z(6Mh9xeW`eNr<7F}+KB{VZ&1Fi93VH*#&!kEWAT)4ThS7e0$po!-_ z3R2J_T2_>+Xi`=&^tmXJl$NXR6ABKIhXdRd1<`kQ&#GEV%(QXi$r+Bgi;QP|q&xb& z_)fPq_?#{9^MUV*ev16U@-$Kp;BIS+$f$96UCkg-J zVQ{zmlM&!dVVJnn?3P!UA6c_vz-(M#K6uI zx}VYMp#D>VaQe2OqDAe2H@d|7x`5XC7AVkMlA+s;R)N?};ZH*zugi_lIvz-=^_px3 z9NL{rSS*h_nD66gHHfvmem_ABIvj@3g@ZN=e?LhDUYJw);qRt znK7EpK~hU`g0P_4b(x)4XIYmr$w%b)wb*8xrkT54H`4G8*-tQp?RB$stgPWO9L~!usKL-NuBF3hKC>zv^WwH}9=exlWLwL2ViWp&9AUe-q+Mot5=BdvV}7;? zpgpp0*$N4awS||nP#c|rU8_1^R+p^HsrMPB(xAS(ZoOWt)cq50xioMjR8L>FxA6+c z{-PAx9)FE^)K7PL90Y#o?@8Bod$iskfb|}yU?)A`dKVik>tr!%kl`P~>IENT{>sPv z4Zy!xi@d?|85+`J|C^gVXT-zLq@gh_&12PiLt8gO+d2N1QBJ}kGCx&ptGEXhNA(_~ zHLAMjTUh1Zh+F2odGcBF9e0rh|clB=-geBlZ2h?k^KhO6Qj6dwN|clWrExWgB86R0puJw#6EGnS<;+Lb@Sw z-IjZ>vNm;m;uCJ#1tMsdrpwfUA$vd62Ln?#YCXVD(lQ;&G@>a8t0D3%EgU9Rf>024i?Zm3`+}@ATU<+_djp4MKI>)-6{XaAP6=PH zk$tRS8AB{+vK};YRyjhd-ym{o4>$la6h24p--vBz-~;&!&xrR8E?ZEWU_w33DSk6Y zU}C{sl6+yzf+o8^o;=qjTd<)Qgf|)LlF$9w#Dkqef-iugi9aAbrvR|0SF!#luO{E^ z3u|SMon-VksY?8F1vuD8^nE+JFX%( z=@fDD;yUSh)&=8Tm}<1rtY9u=@|o^y!eyd&6bVK@QjRE8w_pVFxo8WyS_V}~f@q@> zcnT3KEA!`M_af*_a-9IZ_GGDqYfi4m2a?eo@pk@%nx0rEU(z#Of-~Jt!?XI;^ZV%b z`JzH6rXpGWFmi(^CLAtQo+H1ZBS1Yu1>K^ZcHaURp*O8N#(oiQ+zqVT^&ebv9a z)EBGo^3gKGGi`4bFZ>w?)S7tK%53tzVR!=Ae}!Z*$VUpo=3eh{RB zU@%5jIOZAu&>p9QJT6(BPNGpssAS~i6Kj$iee8ia-zi-8^%Bs*lTQI}gyKF{uqhdN z@Ju0ZA&AMeNhq({ZA*)AA>AsWY&SW>$`ak6X||_xk(rQ#Jo$NeO&cU9*swqFyeD}l zJdc&|TT2eCw6k)eDZB@|-;66abxPYR)6UR&1Vs7WL=o<~hSlhO{46?-!qsi>B1ZRi zf3-87I;qeIY0eeMvAkR^csrh|wWC zio+cH?Ag8DZ{on=iF2Qz(>sN2-=Nw}u49j7)F+e>f== zKft;FH97g;75d-hxJn(|31tPfij7q3&n^ew08nXccbqQB0>d0Bn_Y`B^dc| z!Ip-Feq%#g7HEQD$;Qvy4C1!V-A1Z0lu|M4S^T^mB*mA zbHzy6#X}wG58V^zjh6Bi5|A5Tkr2$5XT%()%v7}}i*(Nao9N%16`akId8s?%vVn9W zDHDl#;K5@=g3<_X7B4}KHZ$X>Rxn#A3LMR=Ygyj9=iqIBCLQE1vIIsb7pc?ibR zGs1f1)Vwo!PMq8-uqNkg&9G41A@M_=D^SSg+B@<@34C{xZB4f@(z5j$v~vl=5Cx5@ z(eiW}NwjCcbQqbeJhg}8SBzOkG+LB~-;iK7A#+mj8zBxLYFKuF%!i>eA3t4`Mf)n% z^5z2+N^PA`0n-*}SQ96gB4ziqem+f3gE6&aVlhExN0VE|e+C~+MaQIZn2|+7vYs)0 zl4Ve|6Kvug#AVgqW7Fpr&cD#4^6@Dz14Jrncge|;O)H?fChp2l_;#9xOl)PU=gGm3 zMx@rbO21ZbAt$)zBKg$>p&&n*a}2>_XaKGiDjAijx>WP=h`(m8!3YKi7Q0Sj=nIR_ zKgRM@uqK$#;=S(I0PAWwVUPkWen5o*cC!`6U=tQ!^cNLh-d@6MVz`{uD-2$)6%^KE zrp(S%H1U$@k!+8EQ?WWBSA&tuw4_V1O^c&6zqM0*ZUr-ZLY7ntSqc-67L@pg7#vfQ z3Qy@?gPVBbM`p0W-mN=Nt^s}Zid_MVUhE!v&~YllcRa>ke3y^RnbTETAQ>wT0)$RF zeZYKo{$2%;Gjg{mTUYaysgBqT4EA+u)+F1?-UaQeXhZn#=3OnU?x>s6*XrP#y$`5r zw@bm?eQ>B{8SM5X3=_W{`)FJ&pU@DMPw|16J7ie?p#hq@px0A-oUhZ^w4!eQftjJf zp4j7*$hv#&)2TGHF=k%iI4qwaJu3_`&FIfy?diPk@EyxnT%Q^Ok7$S}d?x1paCA{l z>z=rI$7C&_4{%VaX>VeR)=$P12vF$a9vAnE;wa)@1HXQ$l)ls2yn)U`1CTk-kCNdq zAH-$oXo8rVp&e(|@%{>tpsXrIlp#f@wh~2HuIxe=4IW=8^?dt|KjhMSK38mJu6}q` zi5tIn6-G_9{8eAh)Asa8qSQXkID9p**g~3u2K-p~65kv6kb%{xm&eQ;Q-j-$#|gZG(Ky5 zCPU%vj9|6&?D->*fF}D+2E{viO@q;9_#?N^JVR;_fNvYOsBBqH(dLPrH64q}Gq`K6 zbjIo5bLgB8Yez{H-t8tsX$4zU-&Oi&1@j?KUdP_*ll^ts{Aw3-WWOmX+3dS$qstxgzTa3ex@sj5{SsJVmj5 zmdSr65t>s|9$QxRt^B;HN75dcJjGUN@rnX~juLHq;MfMQh^)}^bRjbx}40)v)^9;0DcV)66hWwOp zYKmp~2c#KAA^L=Jqz?W=qBgq2wimhWon(f0oOrd=5L3;zvbxtxFXFF40O;bM;MP)Y zKf+0(JkfV22@+lrkazmEyOV+h7nOW*#_<50z)hCx| z^3oCJ-?6I`c(afAbCLf@V7S&;WVfx`r-oR!*PZ%r8+f)49+bQAJIkX|0t6CDX)Sg9&rjW0lI{d4QDwZ<;J_a1Z>6CxB>*$`9wIHB@~YGMGJ2dHHF*C>XSAt_Bde16pG z1V1-jpo)n-G_<~{r#%dtPIBPZt4;E2i>!Vo1C^1#R9)UC&P(18&BrRgr#|YFWbIlt zV`w%3yh6+<^er~hAuGFZEl=SBEn|c;`neo>&Nkg3z|vp;IG}avS6e zbf!W89JZSWXbzDy8Zhdt)K{_9DfuJ)%gIu)Q9~9(@geINBU?j7 zWk97>L17_;JoBU_K&4b*`GaH`X`tU7-d-J@8?8ZdjJCL6>lb*4{a_!fwyS(!egG*AGzy&1d>`zlQ>{PM(OO7k z08qHd>X9iD#}j`P+3X5!bnl(6-Klj8&*QHyM;iGd(x-JgZ=L$3cFXVOv-*9&^*^Li z@4tafT})0TMTFFy*EH|1Lo|=MNwjj&x9VwYww|mXT`Aw4)@>KjrQDxZ3G>X?tur@OK07Qnwk}~oTG=gv-huHCjF|5ZBu@MR_ zoi2RLh6Du9Fq}+!%O0O_cgJr4H}nu>QwU>Cj5{y`@A~}~K$JE*n<|(YuUC?Rih~+K z5k@*0Vd9m)^W(t`+~bhFr%b$~Q}5jCI$ljfXMGzcMCB+kd-ny zGVAsJ8eeLtUTLWOS?F#|amWCgL%!z2K%a68Rp-_KEoK)1T{Bpr%@MAaQk%TecI(lZ z2NK8*o!pX00Doh5cgaQ`eUK%)8STnx559dhoYg92?iG{kS>x6@6NT5=1H!gR%fUy} z?ISYDcOK}EY@0baLgJ;}PYyvkCr+}6HTTiv*P55ADH4OolLekMAZfI@C=`{YIcZJC z@wNV^I@Mm7H1WlpaVipnUD}Lv6t}$^tBQ~mO-Vz3gGCUV8ys~2!v;nKrin4Ci7I3A zSf+BTPGN!75qa;AYk6RneHUlN;PIKPbxu7(=&4_;L23J1s}yuahlYov41C`<7G!#Jut|+tR4AIAa@}3@okEH^7W#*Q-9qC9qzFdw7qN{ z1#mD4&JdN5A918{GCXtacJ6{&gvUpeZ@?6PO8p6tx5E!z1RY z)dKTlxH}b;GB$&RK)Jdn)hQ+{YEy#>Nd(51^vbx&{f5#>p`J33;P%AQBu*qg+C5#! z7es9vVn9|)Rusq=$5kQ8R{Bz98AzoiOUN996ZOEc=_a=ZPzyNWavDI@=)<4%t8z!9 zIJ4_o~Ss>*@42cSU$pp43cN&>VjKTJp zC$~Su?mfYOk-Q;e5USH%kw#y6+D)tn&`ntUroQ4M=?|zcPsY^;B&VfjjOSfmaFv0r zhYMPD`_)XmU}Gd++apKrU?F!Be|Xv; zn7(nhYM8zQJ0_pNe-rB|z`2NIE&P=!o?mET2z!Q}s z9gP_acZj$MsnA6wX6TSH&GGpoqg*$(m^i9}~ko;gSt)4$gXtECFyww8A=82r93Y1RfjZJiLv-cMjZJQgKGl7J0^{qh)sm+SA zE}20!p|b14bhE)S%)J97CrhGJRmpd#(;J`Sw?B|EXBLmvw2w7 zN!VD>*6s~x~@~3fwb9(zz(jRR!RQJDNwn}6VIU_7#S>7mnqr( zoJpivIArM<6-3G#&bR>U4bi5G9Uc2~qi84`Xw`5{iMd!ZfdZ6H-rI7|3<&jmC$j`9yJ(X5}!6 zU;F8~E}}$pVakRa19!2fR6a#|s3%E}t{pulR@esKWL=IDM%w3&MjQF126JFI$jB)* z8?Fb*$<5Wa_y#J`2qHF}>PtRJN{?45>xR?n)DC!Kn5MT6@go~JLh5qzS66TO- zQY;WPKBSSBlu9X+Ets?lsPkcp%kbl1Dq?e2S;6KMCw+%ix`vSNkd*6#sISUrLhs)F z&;U?kIYNIcJ?e``sgP1+FfU4=iP~HBG{!(CH9Ra&ey*1X3i*JQaxS>1V!O*r;ke_} z9Wr|C8Ot_`3K6n0+-*r}g`qiOI0%tmab$+UO~^-Z0eE4dudv~%-D%-f`)8e4)h3@@ zxE(dw3GB})X{qU%*4Sl&F0o|_Zhv)}4tV&W-9czVpr1%ECK&+N*cj3s_bs)8)tY*d z8?_lWBhnkJ8InD&S7PpXs@J669fi2p9JFN?)|h2jOI&EW+d!|sQd%RcLSt!#UGZHs zFpLl;n}?Z*3I)j0IK(;p#gaK=$>mpi7R zwud8Tg7>_6o|}Lklhs+u$}q$9O^ZJQ#r?cUx>f=5AxZcfXDP5_7wLgfn{wZz1z>#| z5nQD{VB!_24W~#s$XPiG?cNZ4NuWe?QyP|I`T$Zh$n$LjQ!|QDGeFFOHapjuL+;{^ z#imwZ8=l-{*Tn1TdRKam(`Un;7s}3mg?nyjrotZD>(q2~WskyE|J)1En~4ULBPL6g zX6R}yMh7Qfktb}+jWWcR2pLJai)UkkdTF#Lv}iO%p4uQM@C(U<1P35ntPO-+r#J2- zc4btxZ~O$Cmu~JMp7ZKN!B}5mi_&dva@~Frq1em`mt2?^3KMeAM8kjJ9%-2dg4H4l zRykF1X{H8Pl?~32HOAYz8=H|u=yT5tUU$+6KA?Go@Y|3tsCx=kJM?8Y!G&Bj^%}a< zCz1JDtF#yS^GhP+75jtzm3Cok`ryXBGhoDwU2PVKssyRn6ZWFaePdE4PuYUw1*hDh^Yv1+P z-@(*WbAekB{~$J7qZCOHK?D%qJ}gz4$d1}EDBY=kU^fcJ9}hOu(B~Vo@AiYUFF-2c z{oLIgX)C%9THu?T_rB(idVZt67V&{zkNkZsUevKkxiQaMc4M5PId(M(Ss=HG6H&5- zE?~!#NBSN0%N#KbcD85}jk_Y1H?fFscT2rX+(SMBXx?uFnv?rcwhw6%QiEB6CkPPm z=26ndUDbqu=Mql{YFGniJ|KFx|01pjXJUa`|KuDD{4Bx#M*`wMdRZkCd*`1<*22L0 zKYQ6Cwg0>GYC3QHr%|9yLjfkXSwJ2!Xb}nos>%XUvpheyWULXFD-AM1B5(V~t>=DU zv->qLrac*bJYd2s1^ydSPy2O00Uhl%EA4SR(Z7P4`6IL8h4W+M=OgXg?Q}=bE(=UcM)R37M5d(bqC>P*VRXvXVJ3Zu z!>#6`Gw`hVW!lHlx3xeE43rm{P~u2kUu^T33>^@>K@Uzl&0?CB*$7r>Rc>#>bQqNi zWszgwJXs}eF-N>%2l`0&G_78*%v4%ev|n^VUjkG$sLc#iRTK=hg%cYoFg-%{Ln_k8 zQq5nXbo5F;H;>}>2&5or2Q^HJ1EQ(DBSSsL>DwWpkOlKyPR6{??dg;`BYdrVzM(>UG-WK~g@ zS`^1_yiHUClKkY(#J)}(k)(eNk{>dC5}7ZgHXlpwN27r;IT2`+DRqg$p-Rie9^IBq+A%7Xf31Zd-0N^zzRFFb-EKKBYZawnNh9l;ySzJDl1-H#DA zaCjk&Q@}-o!m`SRgTk7*w3lBvAe@$&o~3~+qduS{rIeJvmlbeJYR&W>_fHhpz}27R`CfMl`$m`JoB$$8VKuD=<3(8h;k1xb+fv$qyeR$=Fy z?QKK5!~JI32Q}vT`u9}%`cvlTQqn!bEcBIut;+YM&ursim7ZfyS}|7j5QL+rRe?9S zX|FH_enP+I|{wCl-y}W4O-oA;@W}gTQ z-}e`FvKRZ#yM6T4zOYe{WavjG1Zohz5ltw2!B<#M;R_*TbCg!6gBB@vpDegv2|Xuz zp$EUY{uRfB#ngKJ;ehV!!aNRtTb7Gg>4sV&2am+Jk;4zABb?a-2J87P=m`7lplU*m z8&l3yF7TTi4s}pm>jFe|S)cB^;yX>oFB;$LRS_I-&x7{)?fqj6f#J~kaKyqxeI}Mo zU#vB|k`orQ{DdFdh~sAq_D=C-^@HhebY6xEen>AO$~brPTlJ z&-$N=rS#*`vNln*aJ2Zx0HR2B!(~GRWy^?M)sT7xt%ap=c3CD<3-`K67PyL#Ww)@A z4shJLk|GRAgQ*^8l>8Rb3$UhERb!V=z zwP7Et1sC-Bvq933Z1m;`q*jkOX~)a#T5)~&G7VkbU|F@qd!|LFrnw(O5AB4yi3)~K zp&8w#T$3ns(@e9m2!pO8ygo7Cu*jq6@^*nnjY*|3oo)jL$yjLQ0rc*V1>Ib^BrP~mby*HZK&jhA%mz%H!V3FrE{(|c*{ zI#6FF;JVFRw!kzST-~kt26FWAz0nBoy1DQ?8cCVas@?M8)ZaMz+eRzx$w;8N!{La`aY?>0JD8cBu(!TLdq zS^D}Jf$yd-!|w%!II&-WUUwI&>c1nVE3!)ZC#KaCG1M<;`whrfcti^Gg~`-5i8gqy z--x;ou((AvR>XG_p50nL|8#|)0RE>d2gABgOic4;sQOmSQ?WNGW}p_iMbDi649 zVl(zR4^J1w-6@)>U;h`&!s8O|mAssYeF^;xV4V9h0Zc3e8qd${F0;2y=Onq|$Tq#r z(5~uMd;IuJPQ}Fx3xiF;X={qh^|)`FE&?Y|?7iJd)w&@WGVblUqc$P$9g1h$5aZr% zEDq+lN25!&9FHV})D{PC7(tgsG!!!a^%tAEY96xR`3iyY zDG^?f@o{Rnw0HDfZhgn81p%derZoZSE$cjrQ{UhRS_#%E`s?uyzZUfeZpq!Bq%Xuk z3Fb!&u&s$HK2Rz^S-h{BUM z$1(?B#@?R6UIHe(%TuM(BRwO0QRJ@D{NS0ho21Z?iNQn6r*YqZzM*0zG}gR+`suo# z#G?N{X+zn<#>DtP8_Fcb3E6%D6kcFj2xa#-ApF3=pxuHWiW55t8J1JtXm=u&(fA@v zRoIU4Vk-d|-6sJ5xSN^2U}SLX>111t<4o4ZOw9{C0C;JkL4KmYUp?1yytD(%3j7Za2Vwp8G^lf*RQ;+&9C+~X{O}~p*Trmd|xlGa3 z6Jf;~3Xer?xA;CT1=j`FVBr}5r7(iBfoBdYt8)M0Fr`?+7e|!zg(YN$#J_kEgx;7U z^H0%j^NE-Z*A*JHg>@E$euY7tg{LNF<^~ks@qM)~F=o}ykyJM8g-;`~YEzI$D!%O& zTZNRuCBK$&_ff#1p0c*sYdxF;)w_(N5qy`H9;igMdx?*D6}FfeyYvPBY8FSEo&|{h zqCu(=l2R|W3{rk+6)o?P*hZb|l;j~Gm8PD;Ec>spDXK>}3926~lb!$o0OtR@!2H`% zG=v4|hO+ebSI@aRMjCfuU%G+D=$`dg+*;b|6zn$fxaa_OMyxy>EI@7V4q|WZ?zU0inS%bd z--ol^KKEyJ%YeqZb@AEU^JnW^2y?yru)SV8>(_C^YPq)GH%pC}p>nH1zH#gm?c522 zuO-1Z7R6V*H2`&`ehr1@vWtOUzjK5+evthH>cKx6F_QJl#PsoRx*G!WLV9VR4f}X~ zCjv+!>3iRJrC&>qodX3B{^LC8v{?a7aX3ZbNw}wBx z_*Cd&mFZ)LaA@d}dKu#OiDa0xu9@u=rI-)cg$P{V!2Hv}hhk9^3ATQ0U0#{uy#(PN zrye`ImzTr`v1jPXFoZUjJaXfVo zUU~VW04(Z={cR&49Wmk*VqqZ|J$(m|c2#$66A{wcstT~ngHc%f6#hMG2m-a}OvHrT zc$f=BOJ-Co73$(ku#0ecsaz}_v-EJI0(98|k>6J+Wl&N!xCU$k(NN@lK3T@sT7 z3qt9ArgVwOFMGYpr@*B`4PHwTW7*hzds;ASL+-jEAd;djV(5o3i!9UCSh6GoXK8&jpQ9acNqT~IsZS14UoiHcr9 zh%jQo@_gRZDJ1d}If4mhFGv3TH;NasL2F~b{N^aiCuP65lG|75FLit~lZZAVO$%Du zGx&TDQPpHACIB)I6lfysm09^%!+gjVp;Nq{wp=N4CBtq>(xtPhuqH;3D=kp4LV}uF z2AGjX9U=i-Fu3-O1BZBvI%Hh3oxgP;Pd*plygp^_3pXNir2a1`aHqGqzv+_~ka7E#jl`y@_Df3X{gzrZr$>}YGIe$wyW~7LRj38T`Br@W zEQ}}G{uc%Afa7KmCx_|n3?>Yg2)A@_8;_Ef)P#;{;Y5t%vZli(UpYAmr*dFfoOsK) zbNyk2puVzg#TpSVMAp=}Afk{n6|!Sum4F&F#zbi-$WhL9nIRy_Bsg`wgwojiNNR}_ z=>iFeP)gO4BA6yCq1&?4`C>>x0%#`W5TdCGRy>|vP9J|o{nrrSRB8txL_4}!P7@-^ zxF}@c!>lphL~&SB4AU6mJ4k@&-pibpTjmYRMO4jw#`h!}7x3XO_^=jDiiDS+(1bL& znkz365Q)9(s_ldnrt(mI$WDx*1NoKPXqK0Y#hiBDLvuK)rXeI%bBilvFV%Pz3|4XG zIz4OzZiwhY3Fjp5F*0D={d;nWk|pw+g7L`B=E%k0>l1AX%4z))3>@0l$PB2f7=3$3 zHbPp)@;~}CO{3-IC-IP(X?N=vPKm$~-TOkTQ=q<~^4=1RKQAqDacA2# zy)`BHg=x~H^6954`yA`5(h_h^(MKgssb8%m6%`dAHvWp$Su>O(bs4WE0C6*ly%gE1 zTq-22CIo{e38f9W^9&_rhSxSanM_Zzs_Vvb_T{V(HNw}2;;_m}=7-XAg6^&VRmJqh z*_fhX1&J@nP1$6ZE!|{H%8Z<4R3_r*M4S%V{?ioX!Wv|VSY!JWi|RN`gXRwv(|bU* z3H?%L$ItVJfw;ijQfM!GI2T4*iWspfL`OCVe2)+{J6IR>x$mIS0MppFvfpgC0pi!D z#Qp6lkI{0a2GqCc(9S+L6%Lu^t?R2N>R$kGl~3YtNCWA2I2Ob(vZcy0optRC&W7e9 zi>LW?O}Q;7IboxQlIP3z;$yOyaNpsc(pR<541Zs>JA6BYyy=GS>NglU5%Pye$uoB} zce?cJBjD6CmFzQ_?`k^%avtORE8c|(-UX#%Tl0kSJoko{pA|wmj{Myo#aghSQP*q zWsq{AP#%?4_11$7%L^;3bY7EeR4XjCG%MaG=Zf=m-V%!bYN%fvDslAM;S};gRKbMS zI*-2ZC82xe6oal|ff2laQ+(0k5ue_Rtgz0pWNYr_VtDOp_f_q6Fzc!3 za2$*|2d^6z@KkQfXahcKuz>Pvi zm=0ooOT>Sy>0}#SVT+0uk0!R#z=aAKq8JrvZSa;kw3K^TFnL;lpnhkYkrD~;XtvZK zHh;*pu zOab(Wj|*@4ci(4sw|j zPT-N&wMwrRb>)}f4Q@?(sDu)wsX|XN0M+n$B|AuLd0}jitPVP?N9UutvehbDl_80U zibH9++AyUGbGWGF@%`F*62BB8^iCLpxrwEYc%F^2))?U%b*&%<+ONFk?2SZL9!wZ? z1%#5D__4D;JlaC43QK@06$ex~oScrv7%6{nOe0Zt)AYA!gjksp0vF({ElgkLyCMt& z$rx0t8~Q}X2rf7!p+ch8VNFwC95MNw{h5pTPT)#)x>;3`D0Vf()2ajDf{H>Fca?qQ zPQs1$vARf^LM6bj`I5+dNP=Xoc6G47`MKBdgmXs1p2UOkVIsqJslX54@Oh1{Au{t3 z^jYSvce- z_Z==El+aLl)-1C$lBQ4#QEOeTl)O+3fHtTgqFo#rHNwg`nki4cSHy4bZf=vtH*Qg- zVpoJIJeh98hQd_IEpmW#KXN5za7vQ(IFhLb>vFC>t?0~;VndKN5UMv}J1oS>Kr+xjps zPMZ0Tcpj@Tf12REc-Tgfyjlz5v6zSBBFVC0`fzx_EJ3{|3*?@$GDdLk*8RA1fL(lNWG)nR5s1+om@ln!L?jv-ki5ph>1Bl@+Sz%+5R1B3H@N<)wYEM)CEX^gTe z0+LEcwahp%+13z$TJP8WATZu?B2=5=$Gj%3+i?k5$J*l-FJ_rbj?@;u?NO7zT&Ic4 z0K5ZohF022h10vK2mWVY*TI347*q4OyIOKKEn9~IJgNe?2uBff*L|7aHdQJlv2bp+Dx zRLXK{W9$W4qz^}!wIFYZ{(8^K9wUe+BSb=r9WM{|HXu2ul2@afcAR|rXi2*{8joHX z85-2NR&)&|Cf*`4iIE{WonmfcChln7zNi*0F5|Z z>PpU1>>C|(itd`pHAXg2lt{^-xTU7J0ly2d8vFmAlhekS= zEF)bJ;<4&WpzD@4FvEHHUO2embXgMAWU^;v@6?mm&7hx_6zqu9@$sz+LFKKGKwsPF zU-GN8Vay?!PLmLlPcv1j1KLXVP*x38UQ&|C_N++F^(<92)e~c*eqfJ7Nlm7Gz4%XC z_Qj1uv~+g>iZKh^Zr)o|GHUw;?VOdO>bMZr8TiWX;8lvDK32=%b$X@Kk zap+Sc^mr3*WXC@`G(>40+4M{^P>$aiX(~@_XWJ(igY_E2#5y}Xx4k}#_nrJiy#fIj-Us9=e6<;zcZo}L!_nB>i8I4}t5(<&i8n*H+%k|9#4Hd1W4F(>_ zA$MEvc#R(dzLXj9=e7qf@7{I?h;FAgRR(Xh7jQ$!{=Oq#=%Sv>rN-^=;s zRuqjwmiONKXlbJ9`HW1ZrKd8AlY~^C8NdIUr+d^0GK@|~ldNON)y$6;-$){JeR((L zyo$PLo&#-WB%I%nikb1s$Pe>WzDBppjZ~-?9vDB8ANT@%r^ytGm{t`jW5=KKY0hBI=8>FCpZuvcb8HB{1J=i|m=#WRxs3MAV^ATP z|CyAq)x+C1UJK3K7QGcYq%HeIx~Ichi2r@znxA5xV+C{dhXAsskEL5EcLCnroOt=X z_4nq3MEEs0w1irZ6MdoQey!H5EkP4-?z# zxN~mNiFxh3BhWqv{+~TKY#3)?D^f3i4=yq^H+0;uaa5&%#t~;o%Uzhb#jFNhi6*d~ zveYXw?RNoe(2{DjY_I%!U%d7$Fs66@8ITLi)-+U9eu8!=lU!mWB=Se={GFz9Kb#om z0daMy&&z#bcCAFF%>KAy47q+Z5=14M!QA3JV8a;Te_-`$lW+wCriWdUmL#5MFDQT8 zjFlzqzzxkL^Q6Ov2)O9tUzME#LrC&&_-004f6oxj=L<%K-2~4(Qhs16Pj4ZJgc*QN ztX<)*!;t-XDgUI;)VyYRJPT>?k*5}y5#dfko3P7WfjRUv_@GQw!tMvvRKV3^QJAHq zOnb1{FV3M4;@V2On)5FsnHyYMGGx+H%7h<`aM%;^p$C`@=_DGx@usJW75UgF!o^Z& z+S5&MK`d^U^w0~_2Hl+hl_mj5PB>8=VrB<|SZ|eY_?xFyO4ADu z_yZAUo}6+9{O;Y-_yKa;=!ziq+0?pQkK%=cW!RBSrgiRHz6Q0u|w6dDGsDt3N z2c8bYCIqu3$kSG)e&|BMdIQAp;-fA^_@)>*{{)(HDkE+%IrB6Y(P6jp?jKU4-&<(eb!A`inr{O*z=o)z}B)LvNh_TRP?gz~c)(cHHd8`+dR()KC ze9U@Hm8Iw)z6;!6Q#Ul_$c?8PP{cgRuh&v2A3S`nzXCtho!XdjtQ2_# zv8Nmm8&J~A5TNtpDGA6_vT9o z_u*fC7*vne1Rd*#1nm=aPs7kW?Q37(TGzA~2IsPJS9N z3_jg!7ODpw|4fCxpEHp{+D@9lx2z5*Zt2Jn+X5nTAc_`%n{1#0a{&Vj%vGD>5CQ!#^TPOC*de@k*+h2&n;>InLP^R;6JOaD3x*{8vwvEJ&9K- zOGhbDrG-Go(ng^(AYVoas&OF$(sD!eyYNP_Ls@99OYggR%})wKLF%8^kJN-jQf64^ zcqeUX-G7s&#WjvWRAB=~Vs+Kk{+hQh#CpYkt_6>nwm)lR05l|{DE5JM;gD}J)5BH= z6uWU#_hamT?SSIAGigV2?O}EUYR5MBY`Z~R4|8tGcfjQSy!dXBlyrqw``}94(A}yf zKG-I%?Uo<=V;OLM=>Z=vy5RU3h7gTUeSl!fKM_$V{_uYvdf6wIOump$_9Eb^r#`rO zI+dU0XSaTdpzDs_=(2kutS5GbU>>oY%I|hmO~z5rW|?7`WWC}`ljE6iwM^=&THk~` zRCB0ph_~BmHO=`UiY9l^?Gb!&LSni5`$@>ri}>P^yYsc*37|(IB)z%~S#N>B+&R?; zZ&2OB)Fa`M)bPSz-a)PTdr?$f1-W`t8vJQ8K=&4hN*dYxH72jg+M*6eE}{hL+V)9= zUW@r8$Mpqn7}*)o7A{=!kh1-MJK#EC0B%C_|aj)GeFw&qaj)2 zrC;RSitE@4^T0pYAwFopjCK>OUcj+CsOFspBml>~GQv|W6`jOVLZaS6MdiYt>U zY3w^i^29wjQIXo+4XyY4j8*Cws?Xdl4Pnuq83x0JSB~KAgG-& zu&yJBz%8!SRE65hCcij)bNchU>e)Yvo^^}UlmrjmQLR7?<2@gHFh!?I3oEtfL6z$& zul-CNdHGpaPa2g6$4T7`dceZT@jXwZJ6i{N5ifrb@KlMhofiK{4T?T(h=Myo$(9X0 z4qbrc7QEAZbsZXV59MbrzE_LFMUF7$dlj|F2?^tIw{U5;nTe#O0fUBUZt{$HeZnK1 z8JsuJ$fI|hdX!kkRK(RC`#|FQ6!V~U9Z{JktHdtE>eIjzYvag2ih+~G?mG!obT)g} zZiWn;Lj!X{shax9QQQgIDcT9)+I+ab$Eob24&GvT(fm0F=cMIBu94Ed>QOlnm6y^; z>wU!U!fDmq{H;65l7*!$bYN$KqkV~mfzrhl&^=YcPE6N+ zo1>|Rb03JF)A@H<&t2)8t}`FD9=DBqiAAnGTrpnmFus7(^G}mVX1#(JxARY)%oo;P z=sxG8qNGACUXpT!SISsj#psxJVfM2gmE=gq3qQqdvRQ|tC^&U;)`hN|9h}FnYIpf9 zm=R^|$xKkK2vxebGqcu>YE=}tXpI_}nw63E7~ic<6uP0QniUz<%dA(Lk7vLCk|j#s z@`r~1S$*37kzZi^?*`M4%C6b}?hF$DpFcu&wx$+l|HGD1v65T*X%;rg;YQVStKw23 zC>RJ-`p=1k#Q!G2;zt#K)gPCKMsd|*w2AP(r?UVim>>^Za+FkmZoQ7VjEqXFSVxVl6e-t|G3auz_sG zXU+xI1J`(ZQBUzYpyY$7a0J1GLH`MAE{&^mkn?`$&nvK@ZvOf7HYD~Y+F1>uerGHO zxd}&a!(g}y_-_UVlSOwLdIo2O`hR}dxVp&UGo4z~+u3b6Rs`fWnwBDorTkVqEG7;z zlv?j8Bk6rsO=l~CwZ2uFi~$Xit;eF6%LGAwqfPh4lvGrI)EiWVW-~UX#URYqXQE*1 zwr8uz8C_dSXDOu|l_!kyT?JE%6G@`NtVBs#Ol+p|&+1a%=|(wV?9kNGU{G3!UpknX zlxITIJEYc+L#;%CUBh@C?7(W0@*GWe_7E)#{727mdCI6yr0{g-#qbit`v|I7y!|LiE? z{olOne_TmbMhRO5g~zUx$&NSkHi13?ABl*;8&+C596CH25(<(ST#y*^X?`&vv;fP? zllcSE&9>Uns)0o%Gh9`pR;Tlh>=n6MQscmbXJ~9M8XRrW=fsE5DXc~jtp_Y7??u3gg)4cqC4{Pjgf{SNf7(y>{uhBgZ(6_Js^wod7HTQAaz&} zNk%=noyI{d--JNE%JoZi!o|jK(CLP&3i!nN44~X#js_Mza-5X}o1>$aS7|NKQMDe@ z@3ys!7y!6_6;T{HbuAT;$q%_G=P~Ccqxk)&Z;6ni$isGx> zF_Qr2d=F&scV?zv)%ZA$UOK-}tpBmG(?4Gs&E9}klI$jHpmtC@@QFYG<^BFPWM@b( zLz3&i0GoAvS;|C``P+=O)>{Sr*RajEu%+|>?7&0o7-lJ--|;@c#LfamB*Ny;0yQS7@b(^S6^L}?k+-(C?2ml%N9ylYZ-e7Rl2f0ce z*}ST1>2sA|*Kk|-R~9LFJj?zdQ8%VZ7Wx1=K+UL1YSBBMP373+g7B>JNs~*b20g&+ zB4u{Xat6}*qYC^rk$o%AvyEtU9L)h?6fe@KR z?!F?r+7j4SSVq!nH>`8)8MQwZ$#UyL0%Dgd^#-%dQ-rqAqO|Yf`=$8`6=M&yzz*Tc z`zT%ONPN~7_SF==aNY0J%++xZVJQBiE3%m;fo0P$_Pd9Vu<-+Z{zm`t<{-Jh-a4r( z9LhUFY9kStkBArh<`Y#^tDCMVN*JX}T-4so?4~Vj+H?=N&LaDOC5ZTKe)7&8Ohft* zx&e5V;5=_Y#QDx@_Q3&)4!FPtV|%!~2KN!Eto2P@h`E_@+B6syBKEBA4=dH3k9&jy z-Qu;dah!={1PVeTdNMjAajoQ6SU)m&>bZLiLT{{rE(^wI_oeVW>O+>jmKq{V8o1EX zu~Q+SIu!RDI!>=iRmbMqi#a}Rv71M*2=lb(A*wEZUOU8i_C>8+`b90>8k2P+TFiWB z{oTjxW;Ja8SJ)$5Q9@Yu5c>B`cZ9={AU+y8)9`1MI}tIFTENoy!Pp=tGn5SdmoSmv z4D~zI2GMlB!NaA+>f_^i*z9Eq2K`P zQ3o)+!8$7Tz}?$@w!7jpo{~L_kJRYhR0qKUrdJ#=#$IjE_dY7yioF|{z-tjazh1(_ zDn?mK^}s{k#5U?o+?NKRc-BBt4zIP8W(wF!E z^hbmKN2vUWBW{0Q_*@>PJC0vze-vA!179xz@8Q9N4A|~q8zA`Zl<`YYfGF1MF0JsB z>ds>^s%`i{FKWOIbZC?Ds}x=Au1(ilZS)1jdiW(Ii|_UzOmH!-`;`mt3-<=)aO@Rg z;Q6O``6`C-U7WA;r}Z$;cjn21pkKv>1^&>1g51Csly4b*mq&?8( z6Vy$NvJ`o8P*m||hV=O|V|(L^eNH)8Nr7gk&IFcWR;IJYV2HRH7QrfGTAe&ehV)6= z+-(${rP-H2H1QJQLzgqcX99C`<{R|;!c(1C^nB~rbccZsrja$cF$;v7Tp55c&m zC^w1>NzuBrsGKIjj3i04dHIa(Acd(>slpI*H42kX?qXgRz0jwu*l##Xh3gnZN~a{8 zilcmn*-h=zKln`e=kezOb=R;j&+gcZirZzOl0 zbL@0C1T#el;l0T4B}SocZ;b$dW24itH+4_W9ksbJ8>uf`Ta%NRs6du96INzWJ)c}M zEmUYr0d1s)=Cg5l8XGLq!}#d$Gl|>p_2rP#%Zu%#vLR1 zAT*KQ{Mf|y)HS?COX05y4KuD>v?3lC9uH$RCqXFGtk?kR@{?RUlEg{V!cHem zs)q&rragrQkRHa_%2&N?r90l)sdPmR$_nBHDbhAcks|`?43{=3rm%Gr-tPyWR0-7y zDc)2G=)B-(+ z78Aye^l<2uO8UwL#!hV#J{q-iXiWk0H+~7_5-JOlfjl(5?7+7&Chx0~ZRqUE`{3~C z)D!h_iA0MO$zPf}vR0Fk<(eg!np(0g(eo;lU=;%T>*b2b6_hNC%ak=mPmDT63! zB{Iz#C70qU6(S|78-)j}w+@4iAN^Kyn0M*zGm@Om#`qbHz@%A-^5>o#l)P;AkqX0# zlo)tUE7bUmET)jxE}X^Bjsbf)TqFH6FiN4z$p%?yN;ph=&k0e(-lX=+9vM4v9`?_V@>)7jne9L`Ha z+_ya7I}-Y8zHb!{^1T3Gh>sk4E-^Ig74R(Om3eE<^3tAL)RiZ>6j1-J?ouZ?j%rj( zwH*@^sjuZ1IsV%z-A0e^^zb09&Pu1P0LVwf%HZPL{HwVB+oXOqJ)*DDLsV|8guEt< zrAdv?d|eKr7h_w(OCF=T>o?Rp6#ap)dajfU4BCwot$-N3mTKny=EV3}Pld4^EADAk z!SCA;YJDoNR~9jl==*rvmWy466yE9YBG+>bLx!S zlR@?5_3wtksZm1lZdS`j?iybp1Ku0!R*g@E&pMU4-S;$onTPgn77gj-Ek=ks*kFj{ zlDpXgGO{Xxc}d+~=jVVX(^N@iX;MB0Bu(NPF_HDF+hN0MGt*v`(oIM!TH07-pG+G~ z+`kmmVkn;X<*0KPToC{M-iqqHfTEAN9rb=f}d#4|^+;2=i@h=q%z8Zx)4DVE>^%5MpT6Q#cIB!|g7^cLc3Pc{1 z);!h^(m$qOD2mzXI1|J7-22Lw#8YzLY$xD?vi=X*XA6l9?Y1lI$u($!^}mbUe`0S` zoRp}C6`l5tf{{bSpUnbaBW zp#e0JyKYEnGKGnC1%`fi#IFRe9A|}UBJO}GP369e7uf?xC>oNl#JYiR4sAE^sf_o? zRK-3=a4PT*lDfegI-Kc zYi$xWit7tTdcrk*Yd1U*7e6B@EQjh`0UHw9BiJgHg*J+E(mrD9sK!R?)!7Yv$UK|s zc2W{a@(N^k&8Ndl2v-UEc8^TtPre~f()j_uxPwGhca`V4W8?dbvDMV74|NwoSDct) z;b3cLykl%Uqj^$3OOo1E#d{mz2|drpfz>>TG$(2tA$A;_SN!(oV7{G&y%RyK zFbuP8M!f4f&Ez5)2q2kPoUKnj)wf6oiIO3Wr*MmJ7&*#cPp*>&OlqR3xl+6y86JJa zsY`D{RYNruhB`RcQ`_1)nl7i36dvl`Pe$F|5A+IGEj z>#3Tt?b7Bc4AIKgt`tLf$@P9We=ef$5jEM zun~*C4VME?FqMih-$Q?fj9zdk{%28qfoaPR7sC^V^b@EkF&IP<5ne*dUR^WNs~GDR zu>$8slFK2l8A7aN^UmKH1wk_Iz(I7+Xu<5x;aq~7m?ti*M9E#)6D%)T_wer0UHXk9 zR^s$7*a{B51M{|+D!2^2hOV7;ul>@~dt!OiDTjM?#D=bo?a+d&t0$u5uN#k78(K-U zFq-WEP}vV$-wV(7^zVxf^8x%8RLW-uGG1t?;`5i+S{%9LTB(6ot+u8lNjQT@#Zign zSs;i=ttPBtI8*t7=1elU*{o$6nwtSAt#!O_1pCX@8uceI)Cu{4`ag<9aRJe?N91A$ zw$nA;^Y`)ogRL$#Ph^eg``KO^5qp1+sP!Sm*1i1kg<2i!7lP*twP8Mi24=C3_c~OV>IKF4T_SZ}_4scicZsoH&-!$%i=wjT0_?eH)9A^~8ZM}({ssH2@(%Oz zM2k7{`XsS*bX*zf@e*s_Dm(Yc&jV`P>QZoGIK~2F7TqN6XfK(=w`daG+majy$x3%D z?eDDPxL9Ma=%!yg7e@b9RX-7La>O!b3*zLxux5|Sup7N&>F-|HbMQ_O;CKHWDr2AR zZ?^2VsHbgKsWPD!UAou~Nj}`f%+~)i9dYgsb%-jZHP|~8l?eB@9MbY{? zHW1UdqY(>WUU~|>Wdo-`YFAv1^PHPl{Pt|Ym!JX;S=Q%Dv?xu9s_Y*ZovOqXi_zy= zWQ02q&wnM3Os)-Olz^RGpvVr{kg9mValvwQ(wDR$l)$EAmFICyp8E zT)qCI&y^tJA^{nTP=wSwDi(;1#kM2E2F9PSYX$iW=Myx#iH>qiEsGE%`ZITm$b%(d z`m>HqJhq-tX~hyfHX%E)x4K03`=dJoy9`eiDv6ir4^y%=rwS9L`F4AqcLlG_4cZ)%6q?-vI8?=?k2C(<0VGX55!Wk+*c=hkhxqbH7lI|^SKL!c4D zP2nlp?9ARfZV^`Rc*izn~0Wz6y&RrpL`H(w%Z>+VO zd17?|gJ!S5^$YEA6XM9Qg=3OzxdKL#=N?)OIc^NxG1 zb=kY*3Lnhc5s|v=;7H=$ZuKJdiuq!mFvPavxC74H-(#osYiA7LMJ+b!J8ClaA%o*B z3;$m6eSq@yUtUU%cfrB+(`<775lq7Wzk2Easx#H7S^Z%4(0#!LQ4;}4{wUSq8|A-3 z^yykzRw~_tz>C2Zte4>@fT^-{wyen?ed|_qYkJl;yyot#ykct%d3Zjz7W?M;<`h{D*V&CZfjSRn% z-MO!Z9I}5eOi*%zT#w-=8fM-L4aMVmF;L?YK|AvavL6qT!{XV9Q`Mtno0fBMub7SG zOczvJSh|Lpz@HDnT9##Fa3{?(!shk{?DW7Eyi)Ma7J;*c6+w-y%PaCha zvMfeCpIcnjS8z3)yh65JNoY2;m=8GYBmEwpUo;WumQ z=PW&+H#(sn8D{m9mUhuhO|Y}#U9My!Ql)5IkGKQPEzlPjB3+8R%R*`?zfJSh6(Uwq zh@H+kSQ@N1B*nTc*fT=wD&H~cD$GZt0uZZJZ!05LZ#yH!Zp#DNl)EF^P`YalqSste zu>jpg2iQ9*cep!hcfWXxpS3vqDR~EF!A@1YqhSJfgrIp1Ak%I|*b8!I6P;Xar59nR zeU@4KH1?73binc$t#zg8jMV$`wM+D8ibcs(5X;(ly+h|PTxklyl4oHYvf9PLvQLUK zXgYHl#J7hRZ5ze#~e&@jPuo+n-2(Srmb^ylr_(4cL9D_hI@qV}? z1r#gp&(ZxkDNWdjiLvBvI1M>FJfS$XM4A>TUXwyQh7f<#f*r+@yMobi$p$%=v5XzT zV#MkMEII-u-&8)K@pdHwzoC~XsEzQ~o%WGh7jj84H_dfW0_L43(UuzBBMSS~(pO-ZKd_nKtSwX{9$lrbz)9mOuC{uz zSjd`(+VZct++|Tv9D+QYBSsdwQ1EQ2T>G&(3U|6_4rH4bhBKWmR3Dc!Jq-S&AjsoP zA3k&C@P5m-QZfFQ?v@bOC56^%|EQ)Npo2YF74uR*(W;W(AMF+?M(>czTS$|Kk=TS? z-0Vq_+*5}lVCTQXLw5c1s=}WKXW{1%{eKUSi?f9_ouGxYl8H0%&*vu=baJuz-#rel zC%aW%aBy&Ba9vk$cvo;+QSj%ZkM4toEoD(~Rk%6w8G2Fh+P#m3!XxFjY3zLbHYHOD zd&>x2RYMIzE)WEwAZt-W1%(n{3klO$USU}4327QVBcx&a*ywowc>gdEa5Qx^>@+MjKPoZ^Rl2d!9`J%TEI!L$ z*lg?U|7tgHO+ti~{zSj?KXO6;zb5`z%F6s>>i;~isy9m7Y8bxPUwgDuBmp5L%>0c7 z!menFnlkdi#Wvy_w27i5G9(N(smDmP8m<#x{iG@uKl%-y`|L%o{dYoz@OaZI*Yh9n{v)()Y}Mm#y&t9(xsA`eDKJQ)1w z6^|~CPqcQg&4kzw2oprB23UwN1dj=cPt>Cbas+!I>p>oJXfyv#x0e(F84VB6C)Jxx zVVA`)zmPht}dO~2%^qpJZs6>Uemmoujhx%OYG zv~241*Z)gDQ~8ed~|Tr z#gT_(Et3pIC@#|c(wtdor;TBfw0HU;#oQJcaLy`xHx+eQXt|*RvNGalt?-9wxZn|t zG8OadxVYKk)5Y3<${cA4(T^pLZif&#g$Ed2X$=8iUcjnRC_d_fQeYa04_))FqVnS~`v2`L**x_fR{7^8;H3bAO*euH;Md;^|Sv8QtjFTNMe@LfFj zD2+FwUgOXHGL5rAT}WL1CjVI2N^QW33vgU*PJK~`V6VObIuUAbG#E0Sx;r9N#=ppr zLw3`>y@)}P#+a1*1{jkc&Xj6qs}Xl+!|RSAVJcl(VCW7TL#W(E9=(?b$$L$Kr1wVgacH^LC9+WZV%2D(_k(}w*+pw`_O!W= z@*k4Ukm@$waz5Ju;+`HB>*vwrurlzJ#uT9=VOqdGizyaIo=_zg}36fqFD%F$qY#0(zr$ns{!4M5DjqZiXS*Ern$W5Y} zo2N`3|44%#ir;1&Z4N>H0j5uCv1!xalE?CVg{?ZrSlZ+Ef$qh60P6mc$LDKV$ zEuWf(wZp<9)uVtd}OKn$0hc7#>fw|-Z@u26Fs&w6j*?PmGI zp-Emt_gftnZf{ohRI*bD5^WJqo%_U{0U104B4PCq^2z|Fr!Y)a$j$C8#?2wGEO+Ek z<`C8vVSNs6SWe_XL99ry*T!z>u~Wo{Q_i@oB$zHq!s2BHiNY@fW9Po4mhtr$xiAJSOzYnR4N57%W6K!fk^4T5M8?YE9s zp|re@pG>nUq6^P_eieZ@oS)X}pNTxn{sQgYkmxsMQQwDO_}8KCJBK7+V-+vQ?C-Em+TL;BhK8SA@`GA6>`Ko$5uV&XGUUes zrRlN_mfAtqqd!X&mHoXUU~H87SUhW@2&ri;p*_V05Xw1!t)y!6_bHzx3xqL)Ztxke z?Yd%08}9t8$In|P<#qP`& zxVi(XHHHQGR37`bl$$6kIEt?vySo)hOWo=1(WlGEQ4mN}77aU-)vzn*?Y* zv?i&RdA_?Vp4=(EruEN|yDzGwcifVXZK@jlpxzbgS<4RJ>;WbA^9eX#|NMxu7&DpX zTE`W+f_vIXEsnE9UZlZO`r6}bwS=RIm|@j=v#X5{C`()G@#EnQ9*eu&$NgkY-kFc7 z<u4j2&p_Qa}E(Zz!IpwTx8X8;u6I1|f6PBFWUyce3_Q%NiqXRl&ByA?!I?V(y4 zja)#26ZTYAb$YMeKNgb#Xwgs#0P)<3Da@&v9g3;hteo`jc#cxOG{qUpb-+2OlWB=1 z#h)lwdYd;X=w@fzaPW=duKrIPkIe>JwX~>IHY-%Bt@Ft)k|sto&sO!YEX-{T%?ZSg z-{~bK7EUC}A~TX9&M1$++55~rcQi{e*HWZoOjQS1HML#54UlO(QZfEm$$3`BxEQk- zTW6Grcnk+TiVh8w!kUDgpbUp_02U7QIsK-zS*+mMs$?nZb;y#Obe%CgN>pG12j_n` z97Py2v+yv}#qLgKLaXFz7w6O_SkfROZqilT%|$sMZMcgG7lKCi*k@G~KNM@Z1j-M1 zlCvs!zr!n`QY>hE@;y0tqpB$aOi@5Y zIa95fjKNmoHiWUcacvV`qcXg?3ux%pN>u({jZiX}6;T~MQJ1I{b z9R{$gL11y!>??JvNw+W-HSE){JS=iFmYpW8&(yl2in>(ljCJZqNMLS080PDspIf@& zFPR4VasRT&4+aJty30jFuEKrvGpUu<=+Eoix(OSQI3yKj4;TKzIs_l3uhl;VH^ym$ zr7kEmTK5N8?FWKKQrgLmJWpm)izJM=Qm@%1!J&fwf!T4aDVN!d!H*8I=0zP^x!;f4 z65P=#O+~Ixp0jk4+@vB?KkAbxUS(wl5!mZluT`vCtbNuQjz#^53|XON6Lq^Wnd4Xo zQ`%y+gYNtb5LL7`Th?i0^jKg23;9s0QTT${W!=%R;1H%V-gT{*67o)zvkN3|RVr1i z-taMyY0^4T{oB1&X|~lVTSu$mE3XmN(P_6JW<%nxpb>QDP&BrN_xH!@DKTEzf4GNg_+-;$Yl^ zu>oIAJpN?2nSKH1{ogxBwF*M~bk7RUck_P`0o&Vhahll*Z!lG}dIh?2JfW#?UB;}jb#KllZO>^W7i_=iIc}kuH)724lqzK6>!HQ7kH~V9*M(p=t6Ojz8~~h z?T-1jBj|WR%6wMnMq@6Xla@-{S24)kgBnv#{T3K|W-3IMH6#Y@6N6Nk@O6>KneS}b zfjGOqT)`#s#GCJ`bW~Hq<@IZ=L!=rp?}tT%5#OWCvWUG>(vP-c{4zl08g#`xjwSWK zlzwxNWIW&de)(6Ps04h$7fwDo+lRP+CwBmn5t=kE=|QQTHyXxBgZ&Nqds^-m^kB{N zg2H+4xRb}KvOAWk@g8~MgxY}Afgl)z1YupzdeEi|(GOT?8|ZlzCT0!Rl>q}SiE8?U zeL(mPSu8g{N$8cKwGZ~mOeqD4AlW@@P;9AcTRT0ha+P1I;Y4?k`atKT^2ICCjY%_ure@nYU<&h^MTIss z%ECo0gf%4yo1#`}HRAoP&jwdPmb)zoTJQC7es5y$|9nDnnV5{549fO5-`QWXcX?ji zW;@*6p9Z6hfKm_f7*qFaCA}ftrXY>fLlIu=o#@zJ9Gud#-a9<}vk;sf5HSe6{~yBM zF**`{3-j%yI~}8ARcza~ZQHhOc5K_WZQHi(WOD8~Gk4B=@2vS$Yt^^fwb%M@Jp1`6 z461RE=;Lw?PVmD=RqyY?VFmV8bCKp@ zN#E=@2NW6pvh^d*ioG^PgS>{4=EC10c*;t+X1(O5-ho`_Cf^B%@7y0qGGx$I=(UH> zeA*?4*V@kWOps1UszJ;KL9dwbF{U5%MvX#7m%A1h)x9zCzr#TGCJT zDbWm#Z=__NFEVUh=p+y|YrXekY>}TJaIu2i7}UWd&u%oAnJ|E1I*JgeZMn$Kd6>p& zQ}bCCMvzm%JY6ljR8EUCBiqDeqzhH>c+eA%7dtK#RkzaN8T%Xg_h5ob9%WkVcYer> zG?f8kUdzTQV24m$mM5dG#z!@WNf`9)cxjMKRycu#+L?`t_;_iAOdV5E?tZP83=l>3 z^ZraHK{ASr_SF)8+CV-jm)=RPjuv^$WTl*5w6vf*htixxCkEPdu)V0rJ`O*bJxTm8 zcIU_|Fqytu7gzjZUA!1Q)zBJ0L0j-W#ekTE8QBvOMH)Qg{HLe-LVZdg{4Q|mLuZFt zSIouYT6u&y)J4i)%C!kaJ_oicODY{VGjnEMUsHl? z4LT!wpj44PV`<4>d%pAZz~1149Np-He1wp?0IW%#!8fyc5^RNe(Z4x7O8Is4h9j9^zezhd>A4lobax8sV-RmEWLruQD_} z^=iL+7#4ETmsmQzv&~2@Q4Yi#b@?b_MJX$Zn2mN@a48KXmP>LpB?}zzm#c~%&C&3i zwIvVhw-Z*fx!@RJ3SCv~_{u~O-~U*XKO=mWZ&PD#cI^T{n)cv|=C+zo$fJZ{)n2#EwS2Y?%s#$q>;g`3rn) zWEGyJRv83$bQp&QNV708FN1_naW7-@V$g9t?qaMH$XIXW0T6P^kA}op=d> zSc-ADOi3&?pjS<}R`s}Q=l!=qcQT{>j$^;vskwa8EZ#Q9&LNZK2EE)vJHoi;GCN5u z-tO{=WxXjcv^#uLODwsz#5Wal%NF*Yp4*A532u6+5bj(-VF6sT060&rt|I48 zft6}u7S|gOp8XHa!%ygXY!WRzS!#qAF1*et5(|RY#W@#x+Zm%E-hKx*(4pnN%w;V* zLQ?}_6hx%mACVdXHLFM43i%+R_#^MZB{&vz6zh z!Jb&LsB|eX)24Fu2iYTz1Kv9c*^pOyzilP4jS}g5NoXf*o8i3B81ai?viK#j<{*=4 z#o)ULZ;oWuyCo9G(R(Cf$4BvLfJZ#xt(N|{mrTkPj;3gKVw5t~Q>f~QBJbM@0x8!T?0{{E9aUHR2y#HH&dC6UyP`K1#;ZkgL`LY0nlfBes@g$J z7>izKjsS0g5$>`+3rXvrT&5!5KqX!{1?IJF#H)IhR%kLPHz^l&E{mt`kiaYRR@d34kjM~A;?5}Y7-L#{s<_Z*S zliK?UaoKC$oYv`e)sxnYV!cE283=l9eHxBr>-)EE-hSPTrom`60@}|V{XtE;fE^6ufWr98e}rbbAy$ zqX_a_C|eAJ`9zF>(}Tc1P~9^OTn1@erl){ed{n0&j5o#zxkM};+(CH!Jl+xWSIEFu z_9tFpw|=O{-mKAVp0!*Xe2Yhm$!Tw$=canm&MrXsIb>&LuWz?b&2sTvejd9h-^HzW zPRaNc%HARPB#WoVs15d?hW`d%8!({XyWqI*Mi%hjwU(T&m9e2c!T6G)QjH~sjt8{B-?~l8E=O66Xj&NQC=5Ree5&95O zV=Ot_oFK&hsQsiMSLCP=dMHw$vXS)VEaT!~Nq@8^Z|O#74gMC}pI^5}K=PWPLgGb6 zKyij36Ci2qd!{Fq1M~O%ic3P=N0gv5z115@hS;UE03XfQeN=I!$LBpg%5!S4Q4pOq zF0^+YN?sIA)k}10h)~=^k89mCi<5^(Ut=7av0SQ_II7+;(Pz!@`1OcZxPyh$RrC3& zP}*fL;#aja`q$QTmgYhKWrTKvYhV?=SCqWsAc$dY+urxYzAgyA$6Jnm=%n~DrO@i> zkOj47iLfr!Qu}POO{h}6nB_umjXcL`dSN!{cRlHjXb^s1fxv9yiCK{7V5f>VF%r`F zvD7M4p+@-jwS4Ayxt%C(w)UhsD;{3WegIR2#JxMM0SX zBk|dSI^gUaKgb-qwNW=pTm|Edg8re|p!8h)2qHTMdPU6hU^3Xmr9& zm^gKz@S)herd8I$4Ua9+IZv6vSh~LNWSD@B4iF$+3=iyQ6p)!5(H>^1f$J8ZDTSnL z8+rH1Gce{NC>W6>)vxGq6T7{Kb*^X&z!o$4jJ_Og?y=&Iy{QS4z9uzQ!dr=BwnNJp ztwKRw3fJ${9x!em8KdN*|3f+X<~5ryzcLItv4+)VaV{z=DIgw4UC*U#i)bxwq7qXT zL9O5sT9v4)knS@WvtU$Y6`EA9BAK~p=lbz1NS7TdNre)}yPaz&Z5HryxjgEtM%Z7f z-*g%>cTc+4N?wT}Ox^UIH5Mv*4=PGhOA_FcH3}_ezS?rg&n~pj=(hmfv<0~Wa)jx^ zLI*EgOU;7zga$-(=^iF3Venea8~ zE&m_rF1Mj(+dC-i6J5(~5bl02Imn$W4vDxkfhUCziaf91hYKNRgV_A9Ad2=ZGpkJW z!Ji@C$YKOE!T!>+oS2UzT|L|yl+8Qcp3i-^+tpQ$e@3^fyCf}qMSKDoDWG7e^d|I+ zr*Cn022HOc8n@Rs;2A=l#_O^c13{9*DJoK&3I# z@r*w|rAKd>qiSJN(BX8*9*|s|_5xIU#pSU8t71Mmmr8E-p)&vKg;cSEq(-Q%hqvfz z>z{dvo*6s@#$6VoeXDVLYLrSTGj;m6pi^SnJ4swb?n{nFS0BRpnEI!44Y;C)W=Z%{ zwX{P_+g!-%qd$BjVx|x)AGv?s*3aPIy~L?&nAVy+dlVCk_zd;3Y6&a9`Zd)e{FQMGUrG_FTD7KM+Gd0xvqv+#$R#z_xz!82I># z(y-Vsk1P|QX0bZZMF}iz4Twh&#^kh_><5He;6#q5_aVaE^ZvDs`!w3XLYhFyRZtmlfT56k5KgUZw z*0Qhki)B?zq{>Z+D<}K;7$qX{(2fir_e`%8!XimpNAZG4$-s5idxlc&yGNy??-D1x zMEemdyhNSNi1}~~H)n~p!NZn=0lD1nL&>eYe=VsBGr|W0vB|5A8w}v%tLnFT17-`e z`3fryMv96Z_$&)hM|ur>);!PW6oL3tA6J#?{SP&fMT~(-*!K&9_@4c@7gO4McswKJ?fKIAO5*X!+v)>Y2jA;97v=-_ zce{;F7H9+N{ajBD9_C=57G{qu^#!L~fEdT9;%kdTIoKM`jjjk!zdyV{2gC5YMKUbD z7fl2bZcsA{83PsJFAgG}Cq3|2$e8`Qo?t|5sq63dn}JG)uX_)w(leF~k3M9hl3R9?A^j$;e|k!g{&m~ zI*^Z@<*@KP@7+r?Xb}J?0{+#8(TetqkgZes>XI1CMA5{=Kd7=8pF?}iIzobfkt*d) zU(SPsY@ujop~_eV`FsRzZeNwTSe8EV;S9CZzeTbbPXg}~v>_fVrVUXm0#CdkR3H9N zKZ1Zb(Up^C5|PX8`*D4UTY3KF)ieXX={Ge@ndw`4Qac|>?CBC#SmX&7?(%yI!bPh8 z&V{!-98arv46jkAEaV$`67ntg31M!&C)(629`{x5Ut1>F1z5n{P)&xggg<$(@XKyK z?fm?My_@cHhex%I4}=}w93qa#)l&pTXb&ljdq}?-Bc~^;TBK1CQe>ogb%Dp#cjWQx z2df=++)MRGTNj}(m!e!ic<^O2XU{HR_l5pPE41pjw1*Gr*GnMyc&BcL8immBu)ie(d~0AfK*9g*|b+Aw)2X(S>^4@pFQxA z-wK$tC<4I;m4q%Ro9Mleta&)$C}QEqwsvM{=(Si)fdJd6UxY36nKVQalf_ez2@Ih~ z_HvoHNxhJHa<$S}MIurQ>4#bjX(G2s%JVRk1+$Qfs51)uaxBBZr05g8oCD7Cbz^er zx+m$!i&R%6scJ6Y^eFEyn7b#{LuggzCs)z9jjAzOLOe9PF^M@NVLXmLvGMJVHpiYY z!}NV2@fHKXis76CUcaTu;(}_Y%&4iPlt;fBO*!P@oJKXe)v0mHlAa~ao8q_^$%XkE zowRqMg6n;mf5uA`XApc*uZZod#Sx9Wk^x z2fNuqu8R@jHu?ebVttK*-%j+3iY4gcSc|$`9_=ZtcpYJypGMF-_1hOf4fk%1$so%~ ztY#TUlmB(?BL*w8Z8SKQxC_hk=i)5v{lek4HzmWCI60_N&fk3;bP=n306;3%XLS%m zC$edWrT+CdgjZ1A4O@ECHn3H%CqggdENQnH`iB9~I=?=r$9_u?M^#P+#2D;iE;voW zUo>aH2;RivWGO@J1HncxG=m+cFexGkzpK9F1|me!jxGyq4=cEF&w+zQ(@D`xgtEAM zepZyS6n0nI_?_Llv>Xsq+__)hmXP=4yY#Hoi}Wf*zN?S1`_D}ylqpAaL8ysKA9r-T zK30ouIoiG|5>sU0UdQ;w^bW8mf>L1M;A8gST_18B3H_L#pu}Gvl-sRhQW->(NJc(z z9Pys`hAlC->k`SU;44_MuVCZoclH~CM~)^!kiTF^GbVW0(ty}8U^kK}`-{mZiW};} zoLmYMY(69HfD(WkKLU|}#V)hYVGb;2Ix?WtYWfbh!B{D1#;gi2jf-0X>2%N!U`n5N zO%+&39!ARwDWyyL=dB6t){6HlSj9Gt{2K8Z`@&%yRzWreA2|Rur@$JDNdU!3{k)W3 zwavDWPECb!E63|{HX1BjHd zHka%6U6UQI+TLDl-)RcDaHZifv&=>(gWZu}BaL=U1z96WG9fgvw_Q3S2ibt!%pp$XHi zZyg+MNBR7Dk^=X2b+3|jgzZ?$L~z;pWQk$ryZ!nLu`Gbou$0X%gHW_T$)sO(cN?=L zjd)xPv7g$ZJAZ>MQcz4o>m|RCMng;f!*{mp;u6+y`T-w-aUQ8WtvLc3#yO!$bYO02 zH@;3OdM((C_%r;q&SvrK>7$ywS~pAk;GJ;Lt~uXOnsr{)Wir!EXl(P-8!0gdE9kYr z1#&aMB_@-9><&ksJ?Qywz+JxupBAsLNhZ-Yd{{j_$LxCZGo*H`&|7=5T|=AF#XcB& zR!#?tP%D?7_!t^4yWgeMUqSzDmcpVoVG_xHh%@~0;^nn?m!~uR4^DUU&`c!M{<3@I zmY_h0%DWnBT}VR?+@)PVG4nt)G>(E>S1Ue45?$Pr+Obo?(@r63*b7iPHi{cSq%{Sc zMUj&BEA)TwdP%dQhF`ujqxd^ZsQx=c6b$YELlh!VFtM|C{!SM@)BhH*Sw{7U_3^+3 zeU;Ff&zhNsTjQWo2O&! zx_W#A?Pe5$8iM-iM;$BBnG_IyY)Z)`kPe8|)artSNP@@}h>6VWT?r|rlCcceytJT0!gS}r?Nt{G zTpJl>b}`>|c5-t8Y3ShD;Ls5Dmy+Dq;Y-vF2z3;aLxxFF#l+spo z|G`yNDv+I2pE#5lg&D0pFx>K*OLEz&y=k0wR-Awol54u`!N|LC)d7hDFETg0aKF@4 zJX-rgXla}5)p^}-$CO%q4^B?787=T*VP#Rr=b12ylzlY zSH@UE`kXwb<3q;?v+kkocdDlRg~|^QqYPXnS|;*cZZOzoky_)0q7f2NK>x$uQI!AW z?q;3&I(yUz%jkM+wsb15oOKJ@`ntF^z0mqGnKC27^Hz_Kgsnk3}F&|pEkDV zweXLl8!yZa8b=BT9)2$04;6{-UO8rGi)h&QG?voCae!Cq$+DDXy0g53#aTeHVXCK(d>0%IA4&NyPYl544s5}A_V&>FBN zV@<1!!|6lG%1OCT@}FhmB|zQrcv@g7Qm$e+@J_AjzKxyJ07VjZ1ckbS8C!v!;XV6My1fxlCuqzE_u?%dPeVyl2I z%5!?R_9I)Z`_9s3dTdV1suYF^`M2VGp0eM)2kJ5xYBeBs@xyf~C9|Tk3dQ1#l!jGE5Fj@F{pF9ar+`i5ODeVv`d#pL=v@TuaYdnPrv=Ul}h^ zGB?pazJFNR#lZOuc1?qf1tw(LT)#*an)3Vop|YdJoPjeOG$q7z4M4ehR%D?T$SPTP0@5NNTk8O zOvd^eVlbiZ2Ffw(BOICN^lNFXHhCOlUjMW|1vpb{={DBk_S8dnZVb}B_&wEPO(m8d zv^x$YUioA=dKw>?{`7Q_+Lpn|_0>OSyms~A9YTTj0M|Yg9z2j6DPb|R_nfp-5(`h5 zB{QBtp_iSn$TN1h4X6jb%6Nk{Qjb3{n3PAVlv>dHaJ zYnN-I5OU@Hy!&y3v`%JtHA>LTOAvYORyez=1yicpYM(Uc=-EDP=Kj;>Gk^uP-Sc>k@{5I7LgNRe$D94eHp96Ca z!W#CzOyAl3k!?ge8ITIC7b4e}j{Q_H`6`FsInOE;YOwOR8v(pMTfzYsdW**YFExpX|KGi6G^6Jr(URyFIfVGv zW<0@dKVx~dmG~PAx2d`k>ib>Vk!yPY61Eyz=rr`u595qwE2L;h0w#ZaOn2H7b@DhK z_|lT$#q&$~MF_(SUGJ;%t|z=FEc~lura0n+kt`cY^SQxoqv)|1~+s3o_PHMVn%0(IESMTU94YnJ!}#ugw?kLZ$PS~ePPK85?;XkIl8ef zV_MG`#jnsehzG$?ubAefZ1UGjGAx3{^jaCb_EUU~r$1KDU|IKntX{!Xv2l;^*YW(I z_=xL;S+euex}XMHEOU~TiP{eXd$)`2J$oSqSs1Wb5o2&!v|xE1LfVBj7yHuws22Be z?ogtt{XFm#n=)rOXR<$sxjTi9bxsp}B)WT%4?8uFKQYwOR+uMfzl{{}$T|fVtd^k{ zEh{vlpGVBSv=1Av#$ZX^j21D8c;}cE#I5_4z4QZJefK43_ey~fmeD?3WO6efGSU!$&)Hz*E{8T| zWCxc4dlR#TPw)`_`{SEvHwSS?&yvq(7@}IXqf^hHaP%jNdw}|yUak$QiM2{*aFBEP z>mN!MryVk1k#Fo<_O}5Y?mr&z|I~~BI)~NOvoQS6luSX>8lLwX`{vN3+30uQ*SHKz zu4vMru*Mt#%Y>e0VU8EjKLj*bYq)~jCv!o4tC$tRIODu$O}D8ESE|HtNo4=D+47Wb z|2-5NYr2#E@mQUNTbCXXh=ftbw1`Py(>K@}nFeLM0BJgbQVKP>x8yi8(+SnKZ}QTP z64Z_i7PQg6v@e!{I1&`5O)Xt(JFybP9Grwn`JB zbT)X*rFPPvOu6$0v`3UNUma^~kOtw}siMaoKePo9)X)`@4}(yov03YT=1Q*p3hG{XFoN0N{=d4^=)GXHeSD7a>4r5Rtb&_QuPgTKlD}8 zaZTl>^aQ!?f*VzW9@oYs;b0I`gC}(slG;U6;G0`#V!S1C={NJ0AqAqua4zn^oLhGB|pDYmDYw(Nmccv5B;i8y(`N;c8sl;5GngJ z#IeJIm@%NCz`SOh*Q^@_SPsF+Hjn8BK^$R=cS&MfCQ1qanIaw^M|gbh{NHtCdvo}e z>i38?`D3T$47A6kKZ&)0ag+WJE%mG z>5N)J*NnRClOvbyHLYkKvxlVMoR;mcH{ng{!bG2D5ns+9$CLWpstMO1E0SoAph57H z?|XMxgiw)MF4}C(B$1gd#`UHanV%0^U!IQ*(vjRB2T$5fTdzw2p~jLL@F7^YV_p~4 zJOpd@Qas#hDG$G5_qFVj(Z+~q!OE1 z>FzYUw{+$xRw|%u#Cl6wWdh=0`jMET4%?O8do{@*EZ%}P3QY=75<-a%j7YD#92{-7 z<+Q*4cKbhRGqbTVF%s$LQQnVKX1%>wlu%|S?!UL_^cSa9I*@BWw)PJc=w?u$Zqahz!y4!?^VU?@jPZ{o{T z%!~Q|WEhPi)$gf3vk&LG00A&dDkPSHT-j<1)fCtYyB}DAb3_2Zo0E6qS3CC;kL>pn zZasiMmb1FG_Tfo%hQYk0Cr)ux;;@#awxH;7u2dLI(NP5wNjii4x6(6ubq`-A_`MX5> z1qm+;BP~Y>uMh+OJQqmJ3&gr#M9Io+NFTfR`f&j>&T#**2`zx%0-b!89ztSFiRnv~ z>;h(63zlJ24cW$eDcguQ*}qvvB6>BHJ;$x%JaeV)tW)sE9n?+vm_?_>KAa!pZCcsc zgvXpUcaD%ckC&xXqc9S7Z?D5(gCIq{jR|h+!D{asXlrBL3s#PRV1vDx&nq|3CW*O`38}kPoqBd_r=tN}_G zV=+!yj#qI|8P;m8~~vWCEeM{ZT2(nhrKCx!I<0iY~G!s z2=V6U^$1eE0pXwwW1Z+}UyRIUil}wMOllD2sFKtbP2-ikvlVQMK&eNm&ea@Zw1lQE zjd(-bTDMxqS>qaL8eAOmu;`n*0bs^c%eARa36lcwh~n=x#ZqHo>;(lx`#3s)_|*?W zVQV4!>#?ubJ=+r>A^5!GjIHC>y_6zUo4`eGMC1J{?<-;@E zN`P(j@9`ZCeRTC_RN2TbSw39UT;#vk$~<&w1$7bty%6t3P987x0br$IPdm^!b2G)z zFUl0|gL<3x;A!}}m3SC_ts?3Kbvu3k2RLOv+{aQqfoctBro-AcNqlQnewI-e zqwXu)0GqpDF{f-Y&x1h!GH!MTi9c25F;&p?jas4+BIU!b+7%UJo0rz%gz1a$r)apT zoJ^p46FEA6z`za6ZvHY6e~4TPZ;O{IhMuo7*+?OWGv(DlCFiRf$sPB|9+t=RKZm=mK;A}q3(=a`M`i? zP3jDAzX6Fg6%)u|_z?tRkImz()k7*Ac+Sbb1XG^0z=)|(rXoMYaToKNjTNnM_Iia*>B zDjqDfu6K*B|6s%H6?rZPk!!VwM7y!R7h^~77%N;EFRV|MY)+MJ4k=vemp<7pyfvSC z@;#gn5ieX5F4SQwGkS~$3u)Z!nYNc>(W6(sz{$`r(Vq3;SeZ1R^ciW~>>z91Y|o)< zJ?zjwgJ5%oPc86Y3u1Q$XqO8kb!tSLy{E!ScLr_U?w&nca(DV}-RzOR zS7OgScepiQ3x{1sFRW)3Z)TB^e@XOZnLk5fKkiU>xFCBU3~BFK$3J%|w!UY$y?4NV z`FP*&F}}xT9lzJOh&{n?$hVfOSem&We>bTo7q7E(Y=2eE$%*Eg1vMifP`&=kcM zTFX(POZ4QaW>zU*)1g|asbwrG(RQL6oL*_60b}30K@eiYkT)`r!njeHA9n9Z?Sz}B zL?mLdeS@Gtc@b+F40r7@3T2wdEmqoC8L2lFIMOBS=VzMFFqkf|FR_SdmSYVqsjfYt z7kj%K6vU4o8{EyY8INi~&m2i-^+peOiA1eA7i|!uv$lw;3HmC4;I9eI?@_4sOtGAB z#XF#|D#(D^2(Wz#NC-XnDeZ=eGuimEq7xGFQ$jr{f^}4pUlnVR3*Rt&?%ERCnkEUe1Cv%hs?5yd zXjaHr8qoRTA$5^^df zu57+VC-wm|tUrTb7o*nhB*c}X(9c-^1+!XxXNvwl+t84He4fe@ z;5! znsOl_*`Hn)poJA&T%@ z*`AP8K*lj~#dxfp8dAWhep?!Q?xs20aVDX|pj5|~5`h8*elT8DreS?xB7KSb`nek% zU1c=Qw>rOh7GgDtRAG!FH1@Cn7>0pjx%M6ULNpB_K%TT#Uv>4x-1S3FkVtVyP*Ov+ zSLmYFpSgj2IWILMoCUkXLdY(l4?O)acy}?`uHL<}Zk3`STYSv4*#{O$L;h3}!!WYR za1t{cCCzVxaz_vmK)7NOGT;*XX&39aO2U0R$n zuW)mK&P2x%gAJynV)E&|d22{RrKo(OH@slNi+m+3{2NjYk|aEm zay#alQmb1j&=otL<}jK&XcwyzRt}q3k#`=u98!!x1qOnW*i0#iu-9TBuv1~DN|aRX zZ)iD!i&~GNbZmnJYtqV z-AI^0`K$l_8V1@>^cpPm))oZ|bMKtR>L$>Ey-p=;R^wO@F^~@+-^FvOkf<%8i0VYR zTE{AFitZMxDb3X)7+0}Lxx7a%GlAruvj9J5iku$k!Awl+r5fP!L?mJ zCW)VuZIpMPLPIVM!QY}lhE*5mM@*kGCR8DgSk2f>Q8CJ;Mq$qoGb**lOn9L&5Xbp# zYMJL0s~4t9p?y&P>D$veNdtLC*o8&QIMsoHdfE=9_z zOwOS@XnXykdK)9XwL5LJYdFSwuo60)U;~3GxRI<>902Sz4mN|aqLAZ^2+tg@-dlbvdP=#1V02P%fW*QE8BsuP#ug)#{YP@K#Dr6PLtoHBT}Uw@_m0a zHNXu?S~)}T!okoB>~V>VE?kvI$`S79*=D8Fk^O}XoW5!^z3@us=#6G>t&_Kn&II_@ zLQH6^+R2n{UYXw8SWZB?I3PRETRVE9uL+f5a8}yaHQSpYuc`l3lb3vxWcYcJSGP=S zhu{nZlr*;mK91*tM5#REh7@3zgaO)N33LTrudKT-`fWNs7`$e!)1;F>v4SAyehJfT zwT;!;-;H21v8xrBKPFTQdWvoJd(R+3M5sEkE-j?js`d5HmSzIoJ$~i)>%kU_YkFoA@Maf0aq0i#G=g zI+&58V8;}C?!1}7qA*W;XxXW>R01W)U*;elp+X+90yw+#0sXN(L~%!bnR(R-@Wy=? z3&5j-*b+}IuJD1#KVo^pPygU0yX#ZoH=OXZ(`v_ z64ohdco50hBY{?Kiw*|A{k}w~*X;WMU6;J9nDT~0V618uD%0%Ct32KD&-6K3_}D!C z=oHexl-C(90QF;Z$B8Qv(B+LGO9nq6{;)ea$*LjW~!iQHb zi~k2VkB)=f=Jzs!!|PHKp|Xiqd1kQ}&13MfY?<@mjI6B)ugCBybI3g;8rYg$!&(Qn zz0sJt&7zBI?69MEV~fvv;mm;ZXzgj?W&Y#*nQ0u!G2h&J=lZx3+VUkrnak5^LLPb6 zxFU{A@#<#)pX<{Qglye}7Et>`u<^_bDMxX29A4qu@TGraHmt5s1)IL-GtujY7T$v- zR7<-HRMn1c)YMLwZgs|Cs2k*(@yPZYET|3S)?cDu1{iVA8R=i8>y%G+pXL@%bsubN z{=q-(c&WrwhrgQR&b<6;xg)anr60JWsjrLNx&pKhr60UNw0o*nJGO?fw_*P&n6W-6 z{5HAy@gwoij~_(;Yo`6bdN+P+J3B`khi@f>(!ble|Fmt&;ogYKNS{_tbxHb%-Y?z94Ud}5+fBqG37!Q5nx$$zbV{(z!xDr z|KK9I!40JcrX52=r_=B?ShNkM%VcbRQ6a*ZUIoFkT0vGx1uSYhQ^YxI;Lpi7vi{Xs zqfGJ`dibRlEkPU5Ki*Tj6ZL?qHB!TwXYVJ$JK-$J=sJ{LRUbyhw`{dq*u}ry=%8Ft zxX9&eQY26FE73r5ZCRNL2Gy#H>)StMzuoHLq_X4{NpBvTDb!Tcuuyi69%)npB)}B* zA&|hY1XieJynYtH!;Z_^1ra&;C9adgT>^W7KG6!+&@EW=s97#&T<;~UENxkHgNj8k zkWh#6tuWyP++;so8eIUV!v^1Z!4C8zP1aP=rXD0SaS&|=!n1{;lpHN{C^v0u# zumgD3qVvPZjlx=7Mj7tbX5w~L5;fdRBx5D zTFbtRdWR``j?icm7pQ3SXA&+c(Do@?k16MeS*<>7Cy32hRIVDtER9(oY13y0uCm3) zd>(Pi=_tOPPlWh0+bHSM*=tr0tv>nZ*w|OPBbU@KWd;MAr2Zua55Sa+;e`{~p|!~~ zQRc!mW+&wh0D5bH9)4@?Uw4&#YuEgt*rqJuM~v-0_UMy)enHH7{@qb=MRE=vd4F#+ z6hO4Zq!S#Lm_6renguN%|t8RQ_XZ6AYE7X&rYL)eIxNTWIMsNjGGe@WJaN*(0KCY@cgZ~hJN#KThJvv-FvE2@?^ET za@{O5Q_)dYi6JuSYL9#5wY8QpT#&2Q9llwXvQe3adD7)s#|~-_A!JaJvm_xW5YH}i z{#7b3$}$b{e#(0KTg=x~uW^qjJ&W2KybIbA{4-^xXgeKRwRL``v(5;Oke_*^vI4^f zAbEjF>RGd~3QV)2-3g(fH{KT>w9rxQP=Iw{LCgKRs3Lam=J#I1!S^nq+L1X^GMl#j z&TklAkLW((iTY;J~xI6%=^GSC-l8+QHO&KQLtYf%0eS zLAVNsR`hUdSi5aFiQn6^p?f!sr#!WtfFNsVwr*Y{n35vxSp(#ca44%(e|TtPM8>k1 z8JR$c`;8HfNl8LTcEO^`&?8fQFgbh`pPgiP)#_wU)a#&ac=sgZx!@$n4MVPR70yal zbBOv=Oua)MS*2FDWm=;kzptV00!tK;FQiImLnq(XvceY$+El>CGD!I{SsFQC4G~&P zz+rWr9cGhoHQ^_R5}~#*f8}K~ML2h0(0=NXOqRrqkK`@<;;e?bAK4;ihmT{6Z97-5 z##f@H@Am8AGs|TNO&1CYO;c9@aw>0%86dWK#z*Ebj~&*%2)Fv-pcyNuX*EF<-!30gv=gq!8K~VJkr^;pU{Y#!)*X zR1Mkh(1oQMp+7|~JZ58fcvDgqbTy&fNV_A<8ho!H<#L$>z<2$AIS&CH%-5Bk3_!=! zw0=>!N#THg<(nE6%&eW!KBEuHMepviXZ$Hy?|~|hJj-3eCigb_yR?*ymA9L_Gv^=nF0sCWwip8puQuHNzr>iZmR*K* zIFmY|P%I>9p(;x9pwWh9PQq@R4y5v3NfM_1mc4dPC$PZ+|DoYXE5ER-z=$fVw^4Ht zN1-_^VnUTedXS|-G@C^sk7**jXWJ^Bo79nPY47Z5U^qK8B3n<`FK_P8b5nVE3cwKa zF0%JoFyaX=n(aO@QE0_vocg_e^Q4G6Mv|41TE=0V z2AVTHq_wfKcj=Yw5u8QJT4ka+DH-J3n!9ouHd>Daby+qUjF9o^~Jww-ir+qP{d z9Vc(*{~N4lX5O`Ct@EL(K31Kx>%!jq5_dU%zrcV?Wv800(ugbiMS>sgWfo0%TLr3T zkM|pWGqY*= zrkd8g&k*a0L}!;S5`OeSM|CayCGCaCtepeH&V-3SB=qX?T9k1mrBHj8JJIl*i6mm~ zb2PvM#h_?if>^v9T#Le@adeKtAluj@Yfz^BHK}5AhstMzXF=#YDB}Q=tw6!q4SQdt z2IMfK7TgRsUJS&Be#Eo|4=ph|^X*oQJHPHnz&^B6_9Pw-)b$R%`7^V*N5n4MNPIKu z*;eF+ndQ-DX)$9MCS_a*b&9|$liv~8ODy?07`Y{#F~pqO3+6w!@VvP2D#}+ttAzUP z8`b}D3;%0>4rsgr&_ppls8VIjfxsY%JVzoh_8CbKgdeSRUmODHC>ba-31UQ7Zr;RH zVAf>~A@s^a2mRv?pZ9s#6+54ss)E2h9>DJwa_U3U>#uuxD+Qe7P^vq}i!{J>p<}CK zt5Un^a(wCY^G*6o8Y_u%=BUu4fdbEw&>ig{kFqx$Id-+&92 zF4{^z;)iJKY2BU2|UT*lj+z zTlS7RA29$rvdE-@md52BHz?GxXJ?}J?}?{g$)~Ph$(+|ySzY8tZ*+mC?!qv>f^?B< z&O~R(vc*R{eIF{v@pjxiyqDU6JC8O$j*Esyd7$LL+s&cJ4Yo*yZBjHC>rLRp3Gb!n z`X$oYEQ@HZ&(b(cIe||I6vlcpIZvC-%Ub&cp*dW97jkDt=Aj973G-1`z5Y921N6M3 zx$RGsf$DfiS;oIvu%tyHeV6GHWiO$nNIH+W+fjJijHVNrATDGl=Pb%4rNyW%xy5?^ zu(VXV<};J7!@vp4^A2KhRYiPyx70!kyQXcpo@@NDv{EDDWus*?caY$SXND2+7r}Dx zAWaf1XLksxI#t6%kGa9--~OY;EbC<_!YuPrU~4XJ85Q&ppRc6}A=^WbZPp1{Hd|w- zOf7rVkY##XYmVDjT8tijT@a4)vbNBoEc zI6QYo65cxjqB9_SY<^tc#s?c5qT*s49}uF-6Ckd7zbghFUE2ITn%X_Ua4oF7e)>Z3 zqzG5(hWahdaL@GcOUwm6ks!DUkUBcp&O3n-X!G%3y+F2U+Hq?W-8!3Gb56RxW2f>B z-}>pUG;nnT3GSzIgZ5FeOaD|J*0p1aKDBL073O8T=FBTj9qm+YakP=dk!3sJObhd| z8Rl2wPEf4c{tHw425F+Bz=&E;?K&(>{U$A7NwxheVQ&v^DlM2-j>2Z6K^a>O7J7L3 z@HFtW+8Tn{G+geyHIl=W_9+gilC zx%%gJKZ+V;8dEc8k-mygIcn$T&tGO90_3JdFBC(MRFlxwvjr>6c#c2-ww;uZb#N6{ zDq7^d7hJkBHPwluEGBr^PO6GeaUg4bQCGC(Jcy;$KB zS6o+nkOQlfUaEs!E|uJcmQi>}ibGUx6CGcomxCs!Oixq0+ml`FMEo@Wv zo+QJQ$rE8ZSvoHA{_=uw*&Dgy6P5+9<@ey-?+11tVA0+0M?`O!1AYs>=zdKUn{G2t zHzc?}@Qxr_;k3v7ZcI+FkpgC9KkUSCV)83VNA@kaM_7it@azhk9JV=(lsZ%Ke(MU{ zR=DUbOGu4;Ss+}fHc!AdPc%oWrQ&TwWyjsi;gs);+hP^W(4E{&6%6Ed6Ke}&1V281 z$j3GZslpZ4yug--ud3z|}xY>rj<&6uGdiWpfUw55ZoF%rM@Ggr6A0V|(d8@K-%Zf^Hy zSj=xld-HQ@?0{wyS6a)gKG)>wm7r2)2{@aUUuVU1_L}J|9_*zAKYH^MP$0=_@V{%Q zN~s**(@mcY_^zWD$pINa#EmVEt%#ROtb(c{RmfzFQy2u52FZ5)x#A0vHA^|ezCH*A zBK!2piBJG((e<7?$j|Y7db*Fa=B2qL=*?3x%6 zQ-LKqfS|9bqz~dcCIngNNG^9 zvKCMA9&t-&dre-vF3k=3fw+V6mk;l#fD}8DOZGg2H4@RA=2I!f?K6GhCFZpMy5kLV`t>$bELk9ftkS8s!Y6T*xRs#8zyl0!837qR)mT$og$B`I7_VkNMh zJ9hE}A_Y+Z9nyDMrN4r$@gDQ`zY>iDg(klVcz1;1ht~XvclmS-R#zu)HRN zMzZ3dlf-t$+3cslu`|%?ll3m!VgC7w7Xs#HTi*a;OD}M1sIc(!DPW`KW*q9?#up0g zubovNSX$Ntp5DMW1JX|AW2PfFP9&Tbd zJ@F0UF7)c`$oXK$7bm>e-*!g{OIqGEqzH%>g(WwH`wx^oetSNv5E46PKjEB{$mT}X zqiUbnbH88Y-hcah2RRt=MjUDPW7o3;6?rLK{X}s42h#$NSdPe(iM;p}Xg`@GP0kK- z{ecBEU*Q9W)>U-glE_s97E$O@Oko$!vu()}I+=w2DceYC&8X2QTk677ANW}dfOuJM+(uI8plA%8#*jLIeqD$MW^h6=EeY^=cX9f}bR8t($n{G%tNls~QaQ3qD zqYeM|TzCBwslUkwG<^R;YL_pDFxUUgfn@B>&HoFgN&k5(>TYW6`k(x)Of_wF)Gzc# zXvsi=5Gi5oSrvh06(MWXE6Z67Gw2bB`N7uEM3mZX!4xk?Uea(O{25X0TDjOfYW2F= zd5_ufJ{c1Prl={ac6eS_y~XEs#r%Ait?vivfWRN*2zH0kBs3f}69^tE8d}k>goV=7 z2W7+=@y)3Nt0$}Gtr-XPi&knzZiMdi zSb3{)nNR%Br|_nk@ibH888`EbR@Dp-6^61=Zo%gb_&RbVq9@lmE&UC#X_&G5Lz*#% zqA|}(Gli>OyMhh3e6Kb`ylmwrFbDtz9g^F5h^(vso;@EFxf%-Au~fwPM@Bg&0nM>0 zy;tZf3L==0XQ+9$V<=BdMz(s$F?D%n3g&Ss1~@|F7`wUR!t59ddaE^Lr>T~Tb`dMT ztULN*u8a+5z6~1MG>8ep*>V;o%zUl+KU!~Ki%?!INWAhpj z5m?P4pK@_W^jc&$U&;)~9TiEu{76xF!Pykdh+NcEz{trQ0K1{WZvZ}0UKKs44%)$aJ`X-WNn~sol$-8Xk`8lXa@RkQ#Ggf z(3)KlqtavAx{Qxipdy?zoLhFhh-_eZEAT93@tTMneDczbp`(MweTs%#OcM2CmVD64 zFjN+v{aT@q1Zl$9dyLusfH~O~G|`>d;qo)0wjV&RO*apn*H>@d<(!P?3GSEks)Qub zeu5G6q-S8GFWmdbtG@O0EO6vfTI9xvyM0pb#-xwC_As-mc}DJv8n=C79(+F(!VL_2 zZFhP^wnK-#mh8aVmuHSP>-03-HUlL2FK4k=q}g_Ez(|8i)zVYhEg5=Qel=}dH+MCC zRrij126xm=q(>0F;U=0@g2c0=bL+E=!y~4Uw+Bf4S0e%kN&9#N@fqiuzNist*q&%O zdpz6j$Gm_&=#KvMBN`<6yY}(a&>KA5+f76kL~O66#Wf?rvFYlAF$g zu$bu=i+)CXaxLhEe-D+`8eT!cfP8%(UxgZH8N+3suRvU~Ru$Ya80p?USx z9&v)vprP=lnt{;Q_7K`A*aGkV$?#n4Tl*{l{v|SC1Q;>XEbf{0cBLdOtBVE0lhVES z50vXg0})SZ_;yybIVXCgo0$M8<5hXv2xR&~T%K5LHCK!|9ZZM0zSeOmCO6zOrLTor zo>wFR&U4E2^F&cy;=yxX^>9P{YU$}cW#z5pveV2>zTL*(;?(>;lSlN|l%vMX6td4K z(Bp6bhkQ_@4zY2DZdN|&4gJ!7kKxiK!=^rxZM4FS2T}3RHU7CwtFFF0?xXp=pF4;% zW1CB84_WPrx3}{*6P<4O4acHk-Xs4%ffdY^9Vy{U^IZ33FYsUUK>o8^`cEYNS5+xf z*_Iwu5XrZwGAqlo{P8gl3?05y9$g~^rte>ol_ zf2#_RRS!<5@KVWBdc8xD)*R%?a_dU!*B|KR3pmjz(DfwIb;VVrdP@xSv?Elf(B{%D z#?|!I{E=8Y$z51$_pM}{S)V^+D3&3s?D1L(LCkm`(2^zx8VvRcJBnCy(R|`y-d2QL z+6bXCk~J&tPKka7(x|Lnwxrljs^oYXpN)+{hT!=)U{k#O%bbcaS1qlCD0e zZ{NNc%>Qms{9hl{#rYq-??e@A^>r1r4FEyVNCA%$p2#0i7}J#9ekvv83LY5RIfeQ_ zC_XJ=a%M1Llz6zLgFW^8c)~YCzA#(K`J7PGy?3;bEW0@MUj;Kww8lsxyW3~kOcczi_fT!BYVoYBW#gZ{a!#A`EF@JkUkQ)DVldX`U}#<1s6oNEpF zXK>-cb-DUstb(ouN$pg~fU`#H4a@T@jUHglo<<(soC8~A_uGW)PE=>S(B3`UqTQ;1(HNv(NRHDm3=_n= zcCX1o?3~Ocpw}CDq1=3=?cWm~|3{ScY-nh$QToSd*_l>PoN-XDr~nQkZSvmRW2u|C z_9Hb5Z9FE^i%z|TLN94T!cp~Xe%~ECf{&EXTgErIT1sNMcII_k5!Ea1qI?BW2T2r74UP#C?woS-RmZ-099{b_k&)T%~p!lEif zzfa>*mALD~0Iz2H9cyq1H?;Q9fBR>srUB;~88c)}lKC=?#^l@>(%Y(x@qhD*%T zMVed;8NdcP4_#8x)2Xf^*dX{ zq3pxDV19y8>v;r;INl=1b;bSkZ-|^F9-;7PN1myBk_=S?pW6?DC zyQ{ArDW5Hcx_}^?6g!i@Wt{p4m~G66QID|bP_H;Lxw=0DiA22%Jh_USulF)sdiP71 z4ibi=@dQ288fX}fN_T@%7e*_s4pIWIs;22AhvSP$^ex2oi1yQu#ERru#5My&`~)nC zk7mV>t%eq32hzIf!d8CL+7t#?_Ah%E&(O(cxj^>Q@W-g`)bX2*NVbd%{Ds=M&;eME zqW8E;?-w>U&toL7?kZUZB#WA>^sGjt6zPhcSIwx@C5((~KK!je`XkD?_SvR4XwMaP zL^J2ItCyK^0M&5;_edG?8X#!-G5}X6^cAF-JpU#(p23{$Wgtf=jqrR?ft&&i*2=F4nn2PuSqs=(s{qUbUDF1pT-h|XYPRY| z%Q^EG-$KiDw%p~Q`iYaN_k2&!t8-7?S2Q1l0~le* z&6f@5Aqu;w9Y)IxiTk%gC?h1pS{J*le}CxZfGur^)r`_3+`JmtM=7r0Kf)L8quQ8d zls%yqsm@`u;NijRdA9!1UKgp_47EAb#iQBV*j{3Kp-rIYB`lrCbm)?BUqP`E@`BW+xTSQ z)_aESX~h{LgDh~qZ7ICoW*$zCj!_x#yUK&pTsIxy6SQu-Bn#e_rik8-YyA$v?L(0* zwnq#84tArF{VuY?NJVVxR7=_$B&oBm00V)mKGlXQr|Bj*uK};N#bDj*hSqO9cW>{) zoSC3niesucs;r=hT+}z3h!0aGLw#J9R5DWvt+Bx3x-mwEbJ{9k3}~srgfkDt#N~o_ zS#sx4sZOHAk#&rOS7uR?7FQbFUIer*kz|YU%u;lRQ(E&*`iaa&9-z$T^ z>Ai&N@LUzE@~N`mhA1vJU|IB7<<%ptg8uxe&*tJ8wqMy=8LkXr-`A1;xE81$HYSR` zwQ$WFVaX9RW${E4W#NvMy~GeD&cYKGzusO-6vc?XwZst2uKLB|`Eo~x$9~i3lfPC* z=byV~^R3#!JkK(Hq+fYKfAyVMwC5PMUwOgzt=>h%`S=Y`d6N`IxUza;j(!8bvUsAF z*Xj1c`50W>y6FTWV%-CQ=^BbPWjh#rDvsKH;!njuKf_=-f7#|7}&B$dr`l^b5kR<8Tx1opmb&hx}IzNYVCR&_(15^o zgb&eiDKO_UQTZvUX;W|EM8zx`uT+=Xl1kGd4saJE02wmQq-I~eWjV1 zo4rUFo;+RkXv1Juxs5Qk4Fq_SkI^SIH1}27&mph0qGQ|r0TePhKliH8W%Bwkvd8ku zrr#Q*!+P+7`6JjmTFzA-T-Z)d`&DNn)rmGb#dN=DM^7J|XxMFb61a@_bD>u;K6;a9 z=B+%L|<&+ck{m?o2mKy=#JC$vj`cH_=d}iA=QOXtBH?P7#d= z@A1eJ62!52bW6uu-Pzsyw!#4ruz~>;uvlVlkvodgVrTqf9>z^EL_?6hoAhNj+_e+B zw$Lol_nute2MD)mVRv-yXyD2ZdzOkAbnMnh-Hj(CK*tqDjW9^INJ6+` z*ikHwKs>F&&QdlWJz-I^Rz<5LnvcPZGA^J({+?f?KIFwlIFq8S5y#*k)tio|uzaqNYZeicoXIaa1(R^NEAW+d&hi^nl^#;HT*NaF zK5Bt{<$~D@Ia5~C#)Z>Wp6kYcM9_Ou;hW)rybzI3kZCu`(4T4j(b5eN-f!Ot{kn$; zV@d>X`F8Tvc;}HZA96$%fG>gU4p(LOp2y|Q)!6UYLVk_kefq@-Z)U{3?~w`Lg(!|6 z?Snsfa=WfeXWFjGu#d)6f^#~Mx((Z-AcYNiMBa};+Sl}kv?=n?K26{DMD9QTnef3a z(pxjAce0mTV$?-!bWAcJ!g^4D7Os2EtbTGs>v*fG!8Ps;jVFR zm7IH(Nzc0UVYfYPcH4Y4b8{n8TNLWM8e-pi=)CIqy_{ELkHOPt1F^MzeoRWeQWp{=FJxELk`## z;`h6Uzn5QhC;Pd=FSNJoHp*`gR&v|V{Pc4h^)o*GGpq-3yDR^i-}?a3^hw{H6^4Iz zWNG)ChMTICxbDlHR zCrBtaib~fqu_qi;Zj!PXCmJc%1}p2cEYHRc*|{RsQS17vDhuUKT_&BfrzdYbY`Iz0 zajzFGv}T=vsHg5AUka8C7mcvjBda$JAm9l~B%NrTtNyf)`yo<0)JjZGfJM@` zH&IW!+rlXHZgWjh=K)^P1py$BKLLy&|FzN-q%{N~It*PAq-#G&Z~1P?+Bf~$PiKGF9Z^p zh~5+shzx~oQiTZZ%I%3|${if=rCl!Y_~ilv<|=UwZGr%lM#|-|Cdy?X?k&wEP_9S2 z20AzQ&wS>{alV#V3H6_EL7j5c$&%@3Itz<+t%qgc=;f$ES%zg*j8)p4>iCULW~(it zy76ir7Lg89^xqbQ@=8<7aiQ&%uie&5w#nBkwzaPjdNHn4{@`b+{Q<>Af5PIa+Qw*? z?x}QVCkkI9U9Z|kI8SgW4$%EFooM?b*}DOgNSc7TK*ub4u0S(CS7uiB;=Y)v62eJW zmQNxjqIZ5bWpg?>K3C&gM8@an)naF4Is*6Oto*um@5=H_~#01?MpR`(ddO?Cg6YP;GZ6jLZIRh)1!ZK;Kw=y{_9i;JO?Lg^s=Oup9z0u zk_a4e4>@R24yCVNXQ4@-by(;WM;zTS`{pm`?*Q=|EW?hzTAdK7*>k-O{JL2=SFo${ zo+dTvr$!u}svKY+Oc4WZZdJE(#3NLc(r{KfgWGYo?Z+%ed;0>(GpttE*8& zkgBd%99wA>x?+3!GeiCs*;tFMJiWJ7 zWoNyBZeeC*caC)FeYv=s#GSo&@a*R)l3kiGo3*CU>Wq%>K=hv-t#Yo>A#nCfw-tw{ z3|7;H?{(`#%JA4qv6yuuYb_OHQ#swCB(I2n@j05=yPWO5jljy+)NRgFfFvr^PdD82 zjqNrG)wZ3aZGCZLmxiIn4!uwmLs=@KaSrvUZ6mOLPV8|fpownpuE>jzYKnc+_Nh&& z|d#|US#d($=Mhh%_ey@w~3yaHxS1{q0xWeM9>?*IT22gl-3tOVyYCq2~ zl%Wm`?(>*R{HY@)LGD|zOhqG49#=c1G$zg5rBK^~3g6I`xxD)E5@XDh1Do2`V%K!; z7@5+-?BNQnK`hCZNQIUzJw2Ho*;g7-gF+HwsEP_eRPx@a|lPKvtr zRvwAaQ}1q?8~xMe07hPOQh$NnT3Oiqo66?rPp*x8y89p!_-VAj(FW_(L~hJyQ?;wO z9T&$uVsW3l<-uTIDp8_l$37rDL0J;->Y<4kTb9LHjTo=ChE7cF(5`@@^v682SDYVx zinNp(OYTZRovVG-3y;g24!euqj~wzUYDXr*x&i}@%^ed!xNY3xp9j6M^NE`GN?OzD!YB2x zaHSwXLhpN;A8*Q54`x-5sml|jzbhFSC3ra;Xo|64ThT0UFIz^T^(0006=YiWad|#g zdTho|V$n5(K*NbhSU)B5L6I-&!}~X{yI$xc zDC5ZibnvxBC7{njg8rlH_zJ&$C*^r`t6C=?w6dhpiVaCI5<#1u?x)Dss%V6=q0vj1 zvnW2HLy+-^lJ=;~$=xaAG+>xPoTjYqNRkLej?x`xit^gy2_3u>3iG5?aeIhCUX$XX zk>Dn$a8wU>CMCki%%+_o?waPLZKzi6uw?ebtw!ME`)0mDlUWh-SBu)sEdh%64(S%r ze5vVJ_xZ41R=wKIDpiJm!51H;Gn4ADc0W~>AbAzC+~6C+?bIUDQ3&@BygWs4yrP@f zycM~?(DjjC<)AmIN_gQ^`)9x&RO~4-fROEePXWx=KQjAxJL`lpychYO zO;_>rt_OKdtpa{IoTpwPs@RVv3~>3wqwuA$2%vGjr!9`AKIIsu|@y57)b&)Uw= zNi0n|0q)LcWF@o?9=MeAo8kahVb*j11JXI1<6yPkN4MUx!0A)cic4_#zdZ~SML{eR z1OR`Qz^~QGvTh7VZTa`&I);8p59h8cbs#M}_!Nk(p#|+sgp2wM?MFCxa{qSmHvVoL z)*H*i({c6)VI(0)=;3#|>6sbq5@8EOie6>~d1N?(-A#BXeW2+@tcb}^(ky@6;_af` z@SFze#PoddyfTqQ`T77azSxYIVuJ%?BoA{m`p%no>S+g#Dqt5_IfDA4XtAN|=S~cu ze*%gp12`Dug1y-8a1&X#*{el0&YQbbGcBRE4ufj|PNX>K!agyY%yzPzQ~+!wqqY2wcWdEWGu8!)rhNb%b<>^aE2C~!eSEHMVyIbU_u z5G$+F$C4K+l{cg`J#_MSKKHKb7c?KTmqiqqSRTB%V>B6q8inV+eb(rhtB&&FMJv!r z>#yZA+4A`PG}!WQ;I-jrF9r0B=)JA2orWr5Rq|;$O3hD+M(S|^q11pPY>TR2L}q~@ zXT`auCEZdwIc;w(7RMj>7x7V{n`@7Av>4hleA?<*+eIsEYdg1 z_L}=BeMw8|dly*tkFfVY;g`>*wZX_v4FK49Q%I%Xv*{iIX3Mr^ zIlF^P2ALw~K~LvfFlEL|9{igavv!kz&F>Q{ApjXM5{{*9VNCY*t(e*82s~bh3B(0RsF?%^e{T!{%cJy+a0)*5Sa!OCH3=n}+4DxhI2Oy<75_so#H3t!Hs<|n zQD|FLm|AGaC!7vJgC6~)aXIa-H4$`U^y5A8x`Ja5r4X?QE)e*@fVcZudk>1Wm6RiO zjqY?t^@KF>`PK*~w;TBM?d0_H&E_vx<_!c0@uLGW-JmG{e;J$tP z%Ip7o!|xwS{C^k~{TmTxPvt9D)yDC}{&~QZ4+>|{Gs}dE#hq`AoCpJ_CGZ{lhCoUS zgs9Xq;C9zOZ3Z$ybL^mQY-`^sJ6^$)WZ6Tawr*;3tMh9A%8&j7EG7_L5blaR)R$js zJ}(H-2x}ljG3Xlkh=#1^Plm|1a>aW?$=*4v#p}=@`Fu(v-Cl^k>DI4-d}bv-=lPc7 z^FC4;z$vYek=Zc9c@BZkQCg!9shX$APqBDzx}=>J2sPuL?8Z8k4M$a;IIS>BWzbi< zi*$N*E9&zAcMgrRROw+`FN>QUlqJ(%n7Sl@#A{ju3N}N;=lb6H=Q!=J72{jW%3mbE z$8|2-tOiCubSp<)&J}(ICB5>3LlOGd#C%&g?lh$B2%g_mg>hQtg@=bwo-wVJeKC)8 z2`={IftCbfE^96oajkhb#E6X%N1&Iwc#?22uhK=9OE%J_RpozkJrvGhjk*0ZJN3Mg zhv>2f7=L@Q2!5bupEj3qbR$4M#ci3PE`Lq8_H`6N*54C^q zeu4@=c{%@*y*wpoe1cD(jp{Q`_30vJr%L{e`JQs2Ob*s#6&G2Tk?QvFBUY;xx-)XX zp{-%Al)hmm2l8|h||(GvT+#uQb%1MS_@B{Bi7*Bjv!1JLUw;Y(Qtx; zHCnZW5(6YMUa9k*)y?UFJXcEpAkUJ3TjXu-xCL@sZdZALV|qbrG;y5$x9u7RtMdn! zXy>N>#Wd?lHTRsamV%3qcAcu!bw6vJ_d*JaZ-M7~b&4hH)0km^XI1n^E!O7tSwx9m zUMYtQVNnxEz|+Usr+_>*;5Uw&=na>@paOtVNC59T3-SpL zMG!RfvyM5^gt=dLCLlvHRT)R zKMRn0fkbIVUy8i*FTA4spI9aD@c%g1_ICeKY$Rp;7wsf6!CHRK0Bz_K5%-KOTt>id z6KPP@^w)1pq||(iKDrc8C`W~}Mo7(dZzUrK52O4GrcVqL7-uXsem+coA;HPS;{>KR? z%T>Cm7*OzVbg)6p2^7;Q4Z76&8(K}?8ybM&xr7IqK20|7bdRl-996hGBG=X_$BFWn zB35(e`j1lu51Uj~oWAR#8e;{=0apBKn>Jj|a;4ia6eScfQ=Nh42xW^nM|f1Gsy(0p zMR5UM--g5iZ>3ezq0QJ&s?MutCOfBP+9b)OpGj*pb2I=AGmY64q+^6dvIgU1TfL}x z!hGo{nBgzxf$t#WsA)M(Ov!_u##VtV+VP7%HZ%a-^D!?#R^r_l6$io5WXPdx{cgH@ z+v=}|DEUw63G2~3x@o_2w2`~;Mr)3q&N;!~G_>PIuEC8CY9mywwT&7Y$QGN_?TvY- zk4r4PmF3ux?Ort@?V<1e!S{Jpr&dJrb!z}qn3Y02$0K`hO8t)&M=n44(%d9f<{(>f z8JZbUOiDpaDbm&Fa|c*Omi*vz%K#9ed5}$C;PN}p=kHd;w_rXZJi2yacfM{YLmzmu zUJ3a8ASHO?pc$M&XjI5Nc?LqLJ6^Yp;If{M@Eof{SB8!WG|$8K06N zF#J8uS{RI+*oB*Kfx8bcc zV=0LrF-Hi=RH{izWr^BrRe9N2F=icoyFt+hXzs0Dshv?eH&5Jbfr!+2(55i<>l8Sb}kP%ZH7l*c69Hyx3x2NZ-q6U=GG@0hzoy!F%IK)$TbTZU*FH zV48`9hEKaZsl`j3KAok`e2~q>Sq~7rzhg45k3=AB-u$S<>m~o{{MHY{Zp9+XEp8K% zYaN2k%#zA?FZfxyrcIq%0~Uf!5q#9m3ydxWdad2otgimB4wlnO=1~e9V77AT z4Usyv_eT=t3hUB#yin4Qj>27^@Rzu%U5GHMB8JLcmR1!LvU{mv$zkc^z@j{DEnbut zGW=hI4mx(ZsV>cGEV3UO?1Ag#YH*js`6}bvmd*> z`O2H*J>z*qi(ouShVvo8Ug2+E0URhSZGQ_I4oy9mUHmnLIn;?~K2#&eL$ zOJ>EllX}SB@@>UN6&ar#%3Fl(Z9s&x{n3S|S?o@56?3br^&s2aPF~(mR$;SAJg1k~ z`24-GMVU`tkBT5fcHYm?${=0&81rHq`{DqOlciYK(T$|H1Gi}emhDKe#TeFy#t2NQ zfQj0+P>5#5>$}Mzwm$BL76B=budp!4BrPY1B`U`*9<~#_r5IFpB0WitV0uRTHoKy5 zB$SEh3Mlas+Q7B){ONy*`0#5!q2J(_@Un1gCSQx};p%EchszTmm3Cu`#t#GBq}}bNNpXQ_}yR=l>GF|LZJPs$D4IiK6jN z_`<=4f0sc+`5}drJist05@{y_`rA&2PspMCtzkh12aAb2u%k}J{d?Ev(^~R0RSBt9 zBII>0jr^nCO-(bJ!bi>GJJ;&t;~U*Ws1lhd1;#!l>?X&jnm7QykB>=x|aG2V67dL z_orIr(`)F*)nd`A47kB(A7Z-FdxzW3r5(s1UCkHhU;kyDYY4=#f!h&7a8s%FIj{yH}&0~RT?x|;alL7>6Eaf z=FkHb$!`j@i{;i*)uRo+(ms^-M{;Ny>=$KCnKtiJTXsxKO_53rL_Me?#+a@N$2hEd zVaM4=&YxJbPt(>0ea}_!4oOV)tGl;hsC%l*n7&Ky`(laJMEaW)N$IJ$E}7D*y;!n3 zG!;A+U<6n3FgOu9@Rld~lOG>DGW6w^9ecwq@Zz4aJ4`F7m+0Qa)7Qige~+6O>^@t~@?x~2u;oHIq0rL45a32khv_`y=z7LqBv(bLOD zEfezJzQ%3P8Y`9trD2WJAvg22SiP84&r%)LJ(I=~9yv2*AyxYO8scplij=8Y5 zq=3BEXbnufid(&g)U;%RCXd4EdOe?_-E?a&{N_d9^Ah!)dSRIth}BrYHLGS;6x}qS z_nZAK$s%YcR)NN?4XE$Oi_FbhOBZJy;`unn_PXFJ5gds>a+U91#^IaW2e!z^Dj(Oa z@Tdnx;Pd8Fk>d63~xwX^dvt`w`&xM1BY)2Y3I?<9i-Az?9iLYb~a;_{xt-k z70mkS46BML=(k#;H_X;xv!6b7IR4t)of);IGbP7 zrFY4TnzMgdUu-qBx0SXmaD4E#{+d^6j=i-iMG~<~A?7rgfv9I`>>HiK4%3qYde;)#osB$5P`ZWYXdDX6sMv0o3DqE{&W0CIZ z9i5~nMWBqxDD7#=Ftt8#ZNtn>`ue*!=;sU68$*o&%guLha^rSpwzwFi)9e*)z!m4w zWNO>%-D4BsH@iJU0=B9X)0siaDs9TOlJ16BG1X#gu2w^3m8NY?4YOg_d<}9Y80=-P zzyt9P2IR2Z_2r$m^)oW2>o>}KFR4(58+R1ff~(3DaS|BqXslXE?ZHB_NK5C!O|P~% z4ia=PT%DUANIld$sLdmwEwW%z#sp&uw%uVTxtTe0 z@11jIE`IH2|NNe}-dgppT2-qzOXrg05Z`Dp`#Pv?JKj&_J#r#U#N@4(UquS~7%rRM zo+y?bNoK+bA~7kQ!l&R2%E-LL;0Ov@w(u0mw)wxCnF1NI`~qSrXv4mpEkGuxtaU_ z0AK$BTpBO`?m-3~EIy72dK+jHO*A9~InrOZ&l=k9Mkr78(}s{$UZ-V$mJnQq#FMq4 z#4dweuD#eh(M6`kMJly12Bl%hHlxJ5{ra5iU7+3n+7dByaBiM}^}5^r>Fvt*>8#ja z;EDIwz_T@CwPe@~I8Qiiy-_CGjM#TQ*ghe7L13hm<6m(!6(Zf?%%P(##6XWI!EKCT zs7^J(r>JRABG!Fd5HCeRgpiqt78rng@U48OOl=oWbl7bU<5tuYRQnUeO#mW)I6}y+ zAusWTr|OWgq0bKjvknU6tp?$mz1@szAO{R|$pUiG?`Jv}T@) z(yHJqv#hIVJ~J{ji>_u{?oBGbR2FCGicq~xHneAzbdxz3)P+ChSth)x*gU*Q*Qw^& z&=n2s``WUgcM{ORsG~C(h1aJufI+>1c(Mk1Iq3_=DtA^ES86~PTts&e!=w- zU^HqzfVcSwu}Ynk-vkr%Kb`2JRFm@C0d~5GV{V?X^7_H7TZJ93lHVnm{ZmbKu9(nr z%_oZMC|rpQrTY{cTUwU)*_Z)X1Y_&jmD$!>V~cGm#js6?JYsG6n-7-}Hq7k0D|$VB7%&duc$lxOB@L>Rc%yVcV--{PiFX z2KYl2hFr`Yp1a8g)1M!+<^}I4uSNLN9qQEju$eeL?+KO%RDLTiwmxsv^zmdf zrbQ<44fqAZJ2UNT9qbt2k%nd*$geSvq-0FAMX+8$iT`4AJ17n~`X~)0VgvTjumQWt z)ZIn9d2BOA8k@{{ z2S^UvuRX-b?Z>!3{m-86h^!_@wey_V~XW4E?Evpa0w#F?eoWNkL z>X%Sf<~br1G@NGTT&%A?Q7-k`*2%;qBNC6MbgvZKFEqFEIV99zPXl{u4`|09C8sdvi&i zwZf{B1UXg8oNfVqpQqs>{vvk5C(U zcJHxf`vNTq9%&d8IW)xIV87d*9k$X5YY2_r)7|*x81BhgP8kaDqcp+vX2}q8aVxkn zaw_NUw9yUM8i$B-+dN<<&toJH(!+tLC!tL>mD2KPmhT|fE(R`($tk$Q&#;el-IaKI zdYdjf7}e~yt2T`koEEPIzI#R7w(S5@*!0!t{m518C*>gU6e3HBBH9Gw0~4V_uA=n_ z*)6seAaLrpPG_hf>-6B3D=em#0pv$_On=t{kb zPW^SZVpvyY(z6}RdA|0U&Q#@@fQ;mOz989S7HI8M@_WWY@;S>}lbcRSyz=($jh*pv zb2Nk~$59gkNM~p6#0Kc0djA2sW(CUCwz-)-9aPzjtH8@QX| z+d{Jre>PJ>ih2eXIAQZcsmcuytnF{ZRxP9Fg^&}}F;n7;sjib$ z8{~-1fR-&}N554lyn+VP*m3hShP?HrQNt61`!pAy2@%g&lrDsy%|dC7nFQjHu%3}p z8K^NryCVh$2RB3pdqJs5N^|S-AiX2KgQS{_LdGtw{nB2x@YxiVX~~suo7XuriA`0Crg6ctk89YYtaE z!3A6WqeGwaa#mPiMOg+|L^a;~AB-enVG}dx;Eqo&C`INZeQU@M1~vyZ3h#1fd_Mcx zbMM?kS17^4QAR1_yecj0uvt$!$Vn-^H9ur}bbe+({YX*Y%uy(EWp}+Dd2*1eK z%0?C^h3axXs(sX6PTJTuEhEZSpey{W`7B{0jm(ch%spX53I`z|)y46egpN^Fnj3JB zt2KT>=)f^;Tb?(7-XJdGj?wNOB3;t9HU5o}ephaUA!^+J_}i9I z8vr{;BCVn@@Dvltj5p(tA$`A3Fj!~>?NSUnY)ylR-^6@QBK2gS*xEyBZ9KbQ#_Fc@Pawmxg=c5!Lps)!D7#MVHCqRmM2JSlb1*`mEP7oKr#Fcv;scj- zk_33fI^!uZcdjGy9T>)o;P6}dF(8Slj?ddlK}#O4q;NA=NXAOsX{70wW*T~1pSstc z>1Rz4xiM?Y`t*(l9+F3T^2+I-tJ^AAqgAL;g7fGG`L)>7%e7 zBa4Vr=s7GBEn4|u``b!hP$nixk}e7(V5ImaF<^y<`}ApV$sl-_D%38@;vCIb^J9J=dX6IL0<#2{9IBuMq#_BNX9kuMZTmCIqar4gT{mny|nk~VZ;_i;WU?1NnZFA|J29w4uv3030 zbD8J*zCCW4=ia~982G~c#Nd}UWPz?;31<4!MiJtOtJL5Phb!mUSrg@hP!cyNa%N1m z7My_wmjpYCx~8NKlB||y6BL7D%Lzph@XD|pN!1#8P+SY9M)pzrT_EIJen3WmB`6EH zJQOK(KAQA*=RwzEfdfq;LePMseKqPijFT~MKQbK7Lp_RmPdOY^i(6};pt;}N-TSJ| z?7ivZRmONsbH$+!fCP^x4H=OHr1F4Dt11LvO)pu%P)=u6+kz*_F^X+F)h=z5fC;ro zz!rn7iPX8-I?NlaeV!!B_8}H#Fsu_;kvmt{Pk-%}LY(&^qAejs3 z3SH2yr2(QX`ACz`s}_fsKnK}!DMd1$eweAv#kdGGDM}z}`*E_L%2Woa2^#?nHu;?V@)O`c7gs&Z?)psK61d)(!z zEuOOWu2FyVki=KsbEW9~26(%b>%(CqZAw-KLz_8ng0PniWh!Bhwo#{5g^<~YLeMmQ zsw%0AtUH5(HlR9Il}98jwlF69E`M2+Gt_~JOHjy<8kNx=MasL(3_BZpXDl|kmjj0i z_WwZH7D4MzGK?AO;1%SMQP(I{$eK9xiV`$uwnA&*Ja}MIzOJ%mM0H2tIHGvZ zRp(%U=i93xj5*BFfZ<8;E=7#+!15aIN%5NQ5qPW(vUn^F;=@v}?SojS{*Dm5=1s1q zXe>1$VH!I=meq~0y-*&AHe#fb(J4J|J&W+(8ietf?UfirJNkoipUVY-LbX|=cg z92;?7C-d{IVCB*oHIMwh?FWe~3T zdbuK>mJNh?#G!6~aBAjoEZy)eiFQ%ux%QaL2IDjeuXYH6DTL$Jz?FhlW#TN*JF2vb zIAT%%MU%FC1q~kXSgq?E1#Q)=SgDSuvmI<3kTHD)W42i!$mJ{bD6^0I7nF`e$XsF^ zB*bNY^AW+KhqlWmRvE9#`PTZ^Q{`m}AA5O-aL7!{W@O5VHF+CIW~wR9etO4<8r%Zq z1pzjni;mFG@jJmI%~StUHsjUZw1-lg|$Y16>E3COiJ*Tf^Su@Oy{^r1!+lJeS!?tDScDHoEGbQ_|}T zf5&GM^pR0_3*T22*J|f zE}M4;k6v&FX`1b0HJY&Ji2Q{`&ULB;<9WFZ@UlXf>4k4hm;+c#j^Dx9R9EhtX}@C{ z)W|{esmKpH;2oQZ{@!B{LHJvs+hvO7nD9a;nIGjH)FVyUlY*;*Vv8 z)KP5J^VHGTof_M$w9&_}B7R1;GCBZi#>|oT9_$4!dBYYcP$R65y_=rxgjjF2<(|1c8wu4IlmzW`oaNq zezS>sK^XNEZ1&oMH-{V4GWB=ll7~7HN{{KFMmfrwGaC^0`Hz2j?}d0Cu&N*Th`-GS zFwEX=nx9-~JLBsy`BdOt3~E<(f4fnd!zcWM`JKQ#fLHPxf@Lq?!uBNZ766GZ6Yy>N zm_tRTD z=A!eHpi>7k_yLR zR|46Br7&^L>d6z;@qHgsAKLUY)R*Sm{Z-HTMI<3I+F#>XoV(w%eIMP^89w>n&h5X! zJSz?S;7@qg6!{T_oGVEyMQa_tTN3J5WpOf4>KMJt5}||K!gexhIlAKv#ie?+ znb*;k-t--4*h>AO=0{|&$Jxqn_AIaF3-1&agZ5^+nN(+Yxq$q@!?dBLKESCh7Ed$Q zh6dY(?Lfmqw=wEpy|b;97Mq5XSI?Hi3*d(?8&3BjcW2}Dc0a+Ox13XMN<^$UZ zQ&*NPYbH4!a|qwSJ$%Dcb;)6eJ`0Xhx6cuyC4?0#*RT-bRq;BK z<)+0CBuO7SLLA=fQYRj^fd18(sVtucUZAszNWtt z4NGc+j5Za=OSH$hOKwVfLq4l-f9nZ_XVQ(dz)#SZSv0%&1li)bC-2PSr_VD*+*cV- z*{*i50M4gt6V20zQciU6(0nN3{NAtPT1)r6|Mtq}O!75#XB;PeI)Xw*7s+$PS1i3? zG&|gZ*gSwOy<8=^GH>YUc;~zcvi4|?#t#9!E#Ocu*$RP|}@XFsjP+7HRGe9mI&Nv(9%>>=|cH+JUb zs13fMZzF^bB%7g}!bQx>;x2r?!QEp>uP;$*%(kQK;A`QZP?tpDQ4%vW_qMPf_u-^L zt&yzAMO~s9QE|HV2~HuSbVi;9ToZ=L1F!IA;Uy7MMDp?Wx2QG4>mCr0TR>tohZ2Vv zKcv@y0MQ(0lw{1rQAzC(QDT4rW9)f0qaRr!v@!QPL}S|oN|twY{+Bb$M`*l>(+8D! zp`umasX*{9U=8X+=-YXt?VNUjZx9u=yBa(TP|E}~d0sDh-h)H2d~0jq4KH~?Edf)W zF_(aG!C0#y00&V4z}NSDjp;cU19=4in;}-%-eCzua12q1Sz=1LWZwJymv;)yig#+{ z*E{9=*9HH7TyA-L1&9B7xs}J|`vp!->TAG7vPYFKyW*&vq|c;3CHu0$;9THMjeo(n`*aUI&)b zYPDW$($u7!q?>hQZ=-@@T8Qq-S+bDO{8?4JAuAg+>M%TtL($;Y07jDU_Kw-Et2!F)s4a(Q{ zjF!+aD8@)LMx5OHX2xmAeh~Ce;tnj4=HSENs%SLvqFQW{Wlg5mSWbD8*m8z{PTKeS z3=oxH$r|qKY+3&AN&7#Y=|4$7T;=+p0|imD%#TX2L`UGWxfk*u=c$m>{pt99$RdCc${pg`qO&SCxdbdeHB)rvn5 z_WZS;s5O6&{EoU#hMK>xnsfNZGMYb^s~`kh`>oFwXPB;)ct;Txd2^MSuDV+FXHtiX z5_oLU#EZHYx$po7)~)+DQ~pee{&+$jMWnLQ9P*WRF@8Cku<`XXqbnOYHfm7S-t)*s z#l&8!1?CkQf+C>$?9SGQ%K4Q4lo`-?)*gV}TD}3_G8u)Xr*O5=V^xha^)Rm;a&RAE zkJ=qWy!joYasxN8XN`~>8TP#7jo4inU~!Ar9(w+I%5HXd2s*$x2b{O5o(sm`3L~;z zLpM#;(NnTw%v#i#A68;(<8`K_S_9s!GnZq$#RqJFcGPoxG|B=kpw>-W9|}^s8=>nL zOU#xHcJ3s^i(5<^=iyEpIl0nfg4C% z1KaEw{#za&t;0HDSs-V2gKt@d_0?WNZ_O4fs9d;4&6=4&(vTkv^NBTNh@~bdh z8|=QyIy)(9?0WGUg8Q6r(tl?5S#UA(HPDG(d6+vdFWIxjkp@T|8yg2V$;QI1(tD7+ z&+wjD-Uh=Q0NwlDmt?_A5NO;bd?pC7?v6Pp-ux-$ySbfD0|+kPF+h*mQMZNI6LBMW zMaq!JJ|b_vi;pMYw1J9_CC`58)yE*hLnYrKjEH!N{)Q9il0nn%Fr&ZsvhW#5zL-01pbq9dvI#z$m%aJv=S+t)WQ!zqnt>jO3BV(MqTG&M&-ewr9;Ee| zLi-LW*x;jxvMh*QzhIW?lZWRJWsrKYBM<0k_My%aD1w&q9|z{p#l?#G{2KaKkVATQ zxBvV~2tr>8f$qO11Z6`z6MNe)UgjUZy8lAVe5ut{*2Pgjet;VmI>3<8ArexNHbYSo z5m|!R3g^uzqCl3|o%qJ1BaSAJjLW`YWuK~iikY>-UdonEzbj??(a=zFgvk^nlsdE}*0SfxwvQI%ve z1x+vZRT&+-@TxqFqprot7=E8dn=oQYxe*?z2Xq3^;NZlzQ-Go4ZE{FII?M($V63ud zX6`D8gEqNNe+vft2{t8fCNvskC16kUUX|)8(T~@M-eQh3P=Sp+jU7ov^*Jkg==dt$ zvNjGar4UFux9#2LMd)*#0}j+qH&RxR%*b4HtvuApAd4_EL@BvbxX|LWu5D*3X8&-o zC&}4Ix9%t5ZJ~N6M#(2{7435o&A+0NL=fkqq5pAXqb*V3ab}kb8j&O4a#btIBVv-X zg{$ToNt~t`V#BHmXG={0WZ6Tt5Q0eD)-fX4am!j9v7Xi6*z~lL2F@tG&_OQO?UP~; zp&-HP9x&<<2AaOkj|ti8i|GAca-brBw)PfEGBQy1>_OIaT2h)=)W(mlxWmNk#{&iP zvE3nYIrk{vz4|WgZ8r4-Gq1XqWt&hiC=^50dO{F4zM%XE3;HQ7ZCWK~Q zYq7KYqNdSw!Z|zIMx9`{%VS|w;K2D_11UR7D4Vqk^I0Y{x!gzJ7|LKvKWLG8iVurq zgYdyGFxeb8>-Zru2#=y6hot{RKly5&NAszp3N%~>Dy}IB)MM!2O+%Z+yXM?oaQz-E zk{+o@;MOnb)2;UUC>4VMF3b>XudXGD#WP4nfrZc7th3$QA2!;<6>y7##XlnJT}n+) z=hjpUxBAs-U?;%xVWgNZ^2g_BxV7(BcMzLUc}L`y->MahT$+!K*&oR_brjtJQ!Gn* zrB3&P1}gHtXIp8%K)Y7NM^Tp27i5)&I~7Tumk9*3F2lANJOfE3uT2p0P3_?b@`3Pg zWz!!4gJ(BH*pIVmcw1reLi1-t32&075|6WgxdX?XE`z4n?+e&mljz;QNLa=$`g_y9-oblc7Ty2NxXV8>$-gELjW>6kMWhd4=aTjvIgy*u z#fl&@Zk!lOQ{0d6uf28{%uahN!T2?|ub#|8=Hcy0Hv zLLArynD?Hc@yeCfpGHthHwthL-i`JYf8UO%c(v%{^QTU8YK0tCq~(ymnjF|Q#$mbC z%V$JF(>!rfTqv`jrt=-e@B z^nb(4X!>)OQZeZxq`uLq0cWUP~sm z$5;7aN%in=!eGjZF~p;yX{-?(>?GfAq%JL+S#CW_aqr|@NhVe6K5AaK8B5e1KHr_O9BRE8RjBIjXf9b$3KlaZr>r$$;g}cvToJ zmuJlhfDz!8N4X+(dSx4;R>g}n<_|xgt3jeMo9LPy{_GvUYJgcBJ#(d9BUXxlKa%lV zTRk3Is_>k(O$e^7g$bkE0N+}0M$Z|+g5fc}3k;!Ku`)!_XTOaUUN-GPn|(=+&_J_$ z#j@|~xAvGH)CqPmyp-qLI`tAIWS-RVz`0KISnl_Jp6SnZP$zzC2*Iga?U#67>1U{& z6~^VzY{hEPJQ;QiIj6r?(riUrm-R8)Rq-*~{c^sD=+S!C&3ohNX1qm!`5eSR?;g}Z z{|IqLA4_9&-0BCD&3Inx=Xh=j;m1{M^|~!GZni9~Z{|qg^zeyJyF;6r#{x3W4_g)E z<(@{@@$+DsnjKs~q*3?;B{+{ClQ&1$r?WL!o0(4%bCq)IYl%qjW{@QaZl#U*xx`QW z;wW%Xn?!+d@wC!fA#}u%#b3!?^RDVK|4GS}#4A~~yzC*(A|)Dxhn5~s6&o4Rm%z}f zfnTcRFi#=(S0ZQrp}illGQ03M>g-CI&aBcoG&OWjWj0;NYg4&8h}*bMtgr22K9yH` zc_GU;!PQvA#_!L$cJ2EYMVqg*UH8JDs%EgT;G;p%e-_7qHV&EBh*P10Ops{A$ z)dj;LScjAEL``{>xX$vmTiK+2D}wgjNL{I^(v8eOr6d1V?asT*sbO)H+vvA;$w7&_ zz7xU4y9%NdY7|wC!E3<$AhBhs!@KDb-qP&=1GpQC9|F4+(-DMCqNFH9Ep3Ho9#?tH+{l!shSVQjfDi%52@U+O#`Z(kePc%#!%|N(4Z`dny zzwqSsjG{Q{2V8*!r!8xDh-=Xm&Eq1B=2QsUv7;Ck&A#pMuoP>E=2GUqt*SoRSmn3{ zl9wq~b>ocN0q;`M><3vaoZ**fLvz>>2G$8Fbaqg1i7^@u_5eM!E|mzk;64nipNU%_ zaDA60*mq$~$&ujA5h9NZO2bkf1m2Q9LLZ!dBU|zoclXDV*Hy#1YOkW>n$^x$di&+& ztutiy&_2ctQ$Xd`Fz4~Ely0?Pi7&|VL!9`tb^4$-yP;Dxil%#mQVUxr_e8oQI!|d* zi&MAqflf43l>wfg6Uee1OIxj#YwKi|sCl-gExMZsWa+@!d@g;CIxYb^1OIX=Ij5~K z$q3KPgatRI6ov6=$>%#KWLTj>68`RAs4?D{j*?r)8R-bWw5-cT$`mM84R6N2Ig zzBPm;#~l!kNg^GY$gNcbhF+xSNj5vf9<$Ev%i&vGQzSS4u**Luq1_wDdiv&g zhw!T5B!A}DkU-Xp{`_!~8QtW$ ztX4s+Has?BZDb(nP4tJtexfknd~_5xKUzO6Ki;sp168#7?JDEybjYoz4^be)o(y(( z)h-QmJdEkQo-7v&OG#xd1Dkd0m61^&wKmxZ9r@Ifdm^-)Cx!J9>RP1{ekNvoe8;XZ zyY0~gEE=0n@?VP>jhQds9Tx~^@j451G%t2dPL)=2&5LrHS(B(^y-n%2XQRFeFL^$T zxt!hVoHA@@=z1eAx5euW$!WL+3tx#X+BS8w30h7&ss3#pwj@Vg7N&Ybm4fMmtclLh z8YSn!y6gmFopd2Bm2mcG^TMJmZN`lRGCO!~(~hFNP*hf^1!R*z-xS;AS2myQJkOaN z_QAP={?(J2883v9F}J(d%oDAeq>VFSV-)BBA!(Y8gnFC%cqhLxmywuqT#Yql`QcGH zOdy$$E0bbI(~0quS*5YraH9;0_|%ws-JcFq9m8WYcPte@5!uakusnx#;US+DJ1x8mG`0|v_+Xb&Ey3>UCbOI*m#qdmCvTM?U z!)Y!SAF|neaf%2<7UQ}SI;k>;HL4D*`$n=7H(TM;m8`m-CnqDp0Mb`hACVD`+fIOY z@Jk8+rB!7_sncLYkF$7p9lGmNqTu`)UVr9>xURd-EtZ4dW>qUhF=pw;7f+yfoZBy0 zV{n|{{P`Q;{29uxV!w;CfF!gPkh@zC$lb@l_AB4l@K776xy8ZjNlxBA^^hxgA<`d^?<0?Axkw*OFX!SX!_#n3AE*GZ zbpm(c(m%`gv0!$e>+Niu@-OMl;8%(#3~IWSobw|+Hm!ztgytV?7{3a;Kpz5PwIjT`5Y^>JR? zlI@}@v1Y5&Y;~TCUb+|uAHW8zs9ydOUN&1LT(Y*Sr9Yt60qeoYNpTKP}E7;b}8y>0#_9sQd;&|C5l;QM`^1*dojmqA>UwGqrS(fCD6zTNrHH zY1}`8!{%z^@1g1VJ))dxtW?$xNb7#8Tc;@PQ=L^0){EWQ7)xL*7+b7{D z!A<@%TG{iK>GR$jNaYO!38FxIaEa*;)1{PIT4xKa-DjL|*@UHaZS}gKK{}0EXRPYm zsoq((#+Q|!Yx~&?8-K9j-WW%3F?$jMejxZ02)k%xZ3gM%7L7Y{`vgvH#U^?L)5p&5 zLQrlRIKmPPTJ{!_A|5IsM@|>w4~pD)FCd0m8c=>Oi+HnfN@i3`@?m+RA5O&_Mees= zsjdwKd}r?d0M6Vq?EQXc*u7QTXb)W17u5dIp+4iO(d%#S;2vkQgZAJA4K$1eiC6;k zUhOn|^#P=#uHoIRG-_ijFSFOMAKx6P%IFPzJ7{d+b?*$ieEyhnnfpJgz3x0Aa)sdG zLd-~ia_puGAkIJ?s2!^|oAUmS~zq~N?zW;&0@(Xh>4Br-I;{rINcLvD< z)(NV&%~l5d64G++eP(Ht7w3KMij|ri0=j=+l45=Un}Iu-CpixUTKxfPxUvf1kH9^? z)88c|G$&FudvUqz)|?k3M}<=?0Xa8-GhkXJnOLBmY69`D(C+?qR_QyYirmkovNv4h z07R7;QeL8uY;SmqAR^&+R&dUio7~k6L5lA2KiBd85uZ^0n6U2%|4cQTOktJMeo4G2 zzeXBp{u^e=)WOEk^FR562phT>Tl@n&lT~f4zXl8a9utld?q%0jv<)gOt*z-T6Dqa0 zXP}!2F$2M9`Y@$eNhO>QoiHBhLkK@8z%nEMUFS8WZv<1`_-BBhgN27>`U2Lu$It&0 z>L;868);oyujxj7ZIO|d$X6?SBYC^(2MHi6lvjO^QnIV8U$t(mm)m6wHeRB>7Mu9#4N;jTIKgYJVUf7eZdkuFZEgpn`aX4R# z9*;*~tyR(|1+#`}xSbbW6p>n;-nm@OeC;65mcwWQ&_nJ@wXL?8pIJPpS?-UV%}w5_ z*=UXoI=6DfZSOk#4?RX%oVGE3ft`|pO+IG}G6dW8^N5l^V=25W`w`O2!Xs)M%klk1 z0ICVb3Bd8{$*TM~@*oDX|3%fg$1gT01gwORmqe_pWi{S8N1Rx!q-+J{u`-YTTAzFl zCfS@gd$z}Fv);H4hife2P#gbf_l}fxBTDSjOs95*!Y8GUqv$)W<$*n~fXg(7kR4xa z?WHn}VVMsIFfj8ePjGY#sDwr==$L>Kp}8bnNee^PBCUSwqD91fxo^rZB(*+Ah|=92 z;iIWDx7>!@oqh_MA|M2mmUtYa%nAXhp_#2n>UW$KhVxEiR|ioSzbr?AMj37dOd7n5`bm-l z*qw%ALF*vFKL_1*cTl$USJ1_O1s%ixN6`JR$<}`aUW|&hJ+cxaU+OZ59n8>Nq`GC3 z=E@up!KYb`Btj})k{B++Ko-5#vPH{n87u>F-+yj!P87xem%tcjL)=nx-Qjq;v-#1~ zM0Voa+jOkKH)#(n3FJ0R(OI*tOpOiJCh!8fO7#hL8s`MBl#Z_J zXsP(iVtVA9KDWI^NQ^^n{h|wz@`xrG${r1yRV6*)2eR6H%Jpk@$%9*TiCB*6q9vC4 zz>Vye6+r%0j+F5b;2g3FkcpS3m*{yjm08Anq;35^X>g!L3VL{dFA2z#9`q}c9q){ zTBY=3I#s{5)IvT-NJ-;Ji`06~$Y`Q9$m+BS>XZU>Bo}kx27S&PU@DM6dgowp*&S zJcYtnSQ`9)=E8qp^!gtmscLIKr}))@v!`q756yJu4mpieURC7aa5av|lOZjOU?DZT z9RSO4Oa+n^(-IAT7DX@?Mtmt=O?g`oQyKT4ou=lUr^lVA=O34^uLZu_{L~nLt#>4K z)X2zU4GruJrL3oxJ-wDvQLL*lk`qCCQsa%w4})e$!xGL$4?1$AjeYBP zZHH#ZDlom`SciR;8L8`xob1qxx{C?Y&h*yr@gQ#Rcki;htzNA2G~f*TqhvxxxnUN5 z2`Ta10b42F#BHn9J%w1_`jr&5Pym10-8X7G-8&6cNY$J(H-OFxqsNYndo5r{QQ*Fq zP8jgeytgX8?%Pi%zYM2FmjTaE3~L))w19%7{L2o$8p}&4E_1*7*^H{0x^OzY2FcS_ z;z?W}6Rj*e%~SAZJ+BoOs(;aDstfNhie(P+n{dfIH4`axIt!jO1az}2t#KIZ>y1>w zIy5B>&mtO@*_1`6C9am7f8<=`{V6rpo-}ev50S97}`V` z3!p)3YNhlO<|L{|zV_dstf0}xDIjHxaO+8b@GixR|NR$?<`0KJsr>5xjDHPpk^R5Z z<$pw^nw$!b1fp+{dWW_r9t3Vc@*ud44NQJ5{7)ki2%K;{5|~;fCH}0M#rj_I@hh_? z;@v9|`0~HMo{wav%D>nq_`kn&|GN7ypW;*XE2$An62V@qS>L!MaY zqiEIPJcA6^U-Qf+m@l5o>xpO)wpk240z%QS)i>L5V9Hu_w_}dM(47zQXd)jc`Sc+R z+NqG-{hcnr(}(WdmvT(mjh0zju)8O9(hhsKfvo*A&e>(0`e=$NfpQW%va5fx7l-X8 zBiO4lBv?QXdz(8e(AsQT$df%rs$rtt>^iT4bDFMS*B6gul?a zIr+p58TdGgUPcARZpk|O6lgxeNQ6PB`*fw9rlwHYc9LG)WdK6IFcH1~nl6vzMyyA(iXf2Psi2 znN+vT5;O7xSmR&OU9Ld_){6np1k1TzM%CyYkQng{#tf`@L)Pd$VzAv=vS_4?`oU!N z6){TW+^OR*gxM$Dh^f)ke3T3l1Zkd4K-LirNIqmY>$g)V)FO_}Tr;1#Z$S>d^M41w zqrQ?MZ~DU}>0*h;c{{#qb%SvuuntHhtQida=8PoB-$)mEP@ zDmi*UY4rQ0G8ZBN9yK54u`eb4*Y9ru%TYQ7wA|;NHoN^@0LopjXZjgwu}@4{M!|?f z#?=R`AK^O*aNj)R6OH2r$4*-KPIQZec%aBLn-{lJL0^jB?rR;r=niW<6A6LFAzCP_ zF2-J3>m1vIH7A8FToASySn?@ZNo@MrQ9;o?D3v{-$Sd)sXNuPW-Oc=ZEYVHWMl9_q z!wnt&_U{k4e=d%uTAyE7zfg$d3x$~e>upytv~{rgQY!w(??1K+xc-~hs=eKR)a5GH z3ja1^JWr=bRjpVBxd&6V7FL__3}-AXDQSgGDvW&_WJam7QXgyP)>8K=jRF@z{_)K} z>Gq=8(xMv5FD>2sI>&pe%iY}X{o@0r7skdczVDUR>}E!k3I;S*mY4l@P*f}rQL^Q{ zELN{vgs0J$0TIzPU_xfSVL$`q)TwLkpl&~xGTa5xB!}IOO5e?Zk=v*^zoGnS@;4j z#!>T=@F1PE3B*zS=XkqvJsx~-JI=aEz{PKxRNs~B3O+YEc4r417kI4&r`AxdfZ|hs zicns(viFf5V^BxK6p}8L^GQdECSequ&Tpyh*fNxG5R84wS@`T@w_M?X=*#*mzw3H_v9J@3UuGMp~E%dYlT1A)!Rnu*GXd{?gW0~E>;-Cw+p`BOLCck z@arm1#G>A}26g3S13bX!Y>E|2{r)x1mq`5fps&I zGj|T^fr_IKk^;=3j3aLlg9O4mUSDorH}QBwJ#T97J!j>92xoUBhO3X<2p68gaxIg2 z%2;#7L)%o5T9vx&wi6!Q@UMMD%gmfoUO1(7I&?KL5$mODRF)h*&+D)DTkjt$NKZ9` zfvJlmt?~&|X*;1+tODjXu4#?pM0p`KdCF-D^G@ASDxE{$mV$lL9EqB2w#FJ|MrHjH zk4qZe1*G5Wc7EC;9IPAc2i(AGZuM>j2kbcxwYrQ%E0-Rlqtc<$YS(;eHJ_=6%x=#D zaT1+{afR}Paw4r`YSar^`rgAaY6So~L-fd!kCy2}m;Vkt8;xb5>zSJB{4J{O3Xn z*@8bt%Nt`KjWLmkZdrUBnEGdESO^a8FmQ0bn!Csv-P49BMEZy}0xVD>&?nP05nsi! zT=bKfY(94__10&88Z{}kM$>dVLn-|OjZ-4y@I`$K1yr^CV)_&uf%+`Q3^) zt>xS?@_56FtOlSJ$Z-a^Nw{Bs@(c*L_ZaF0>#W$F`8*8-Tw_1u>$BZp>vP=r28iE;0thfMhcdbSEOthoBLKPB z&scs_^{9m`&8MiVB=WPp41*~xH!`5^`}lB)#tBGjl6-$$DtxMKluxyZz)lEFs9MfS zH>7fSuB7=4UsTEyn6s12`O2+My2^>vYGscF;hIyvJF9`w?K{y}9f;*}Ul2gnIl`~i zBWzC^z4#4I?_8#_h9ljjv&L|_`Zze0M_`TgXS2FHEX-`j(w0qUz4E-!eXT+BhK-_P zINv}*Ib$_8GH-Nyj#_5%?n54A%VKa;WWCwWiDRk9T#HS!){0VtO<(vaoFt9-To5Q; z7Euh@xM2f6k(ITV!~1uaY&|vu>m2^7i~5y6f?mFLeE{w)x=L+}F4V`LH<=2WL z-1HUqZKJXl7C!tpQLdM^sthIe$}4eI3zKIexXl(XB0EKKP`3@?&_vzI3ZKQ}h1z+7 zq4LkoNPtRdy|qq6$&ig3hfW@K&;%{&*-y?*hD7z;`2EqM&~>eEtBWiFb2#dZvhC10 z%O+ksUQ^uq`jrKDVJm-y$R2CXF8o@sqvzy0**nnXX2~M~)bi>h2;!A#5^zeh7F@1^ z;C|z4rP-Y+ET?x`at5WG#~QTUweq5a=abgSxGcq&oFB%W=33WR=V;&tSY3GMzFIcj zQYgk0J%P<419~%cY9-V?9@>YVkZ#?GeNz&|D=+dtcnl(b9*gwxiM|w`?5|DF%UBSC zpImi!M@-q?Gtw(pd`EEH3=s!D5z5NIuGPn< z*z7~-@jTAY;VWN_FgX=jaqOyia6YS_4}Sx<3==fc0o|2|tJW%cPt@j!d(530pGlo5 z+GWg#{4|znmD^9V-hK=Ck-{AsQZ3MY{Sp( zJB~N96yFZ?4HPf}%;wCJ7%@Sl-V*7z%t~i49Z_(*$fm#eI!N;)D@CU9v4Ojf;pb*FXJaHrDT&Q6CaE1PGV9gMbYW4;2@r@{QIss~?JuboJkm-4BQV zG>?k=_^lsHO%-i^uF!WO$L{^kVLkPcnc+cN#chomt=c5z#naf zo)V;h5-Ih3ej2$?z$u?4j`K167HX--^AU-{83B3u0yAu(lBDkYYdWrCzai;N_=I}# zl)Js(5RXoq6V@kue%Ildx@WqO#)Fx|$cO8#%7v~hUekJnn0tGOJt7Q!kOVvKu~wHK zBWZ`qB#;>P{;OqNy!cdC^%Q3O_we?Aly)ZYP>~EQO+^B762^ER~jb zFqmS78EZw2l#&*-Z;H{XMf<+0kQPN!Dj}&e4l=-q$EVyqSwgZ37%^bn=BLG9WY63 z${04o=*yXRL(48*vtIZ0j^1J^?HyJNK2&dBaOupqq^#mUCq6#mP+onE=sS7T;LH7H zG71?p`{(vs{Vlnf<}>I+pHTxYk4d+v?N+!XSWi0add!Q=@Io2yz!>l4`!sitQcg)T zzbXG>j8S2#-NM3*7n|x%7xy)HTb8@N^!%*g|Dt5)7-x?evFu@ePQy5(sg>!TJuM3( zn`c)=`rVKZka?-GjOu%8Ax&9t{H!`V+u3`<&CLC+zMKzo2rU^q^1I2_lhf52PG8qN z7clcdblP++Gp6$8y?Omt%F<(JulOMMeEXd5^yRyDOqXS>3mLQ}b9V%<vYfvb7*9OIn9eynujSNhS$@HfUuUFPmqTI_@H^G07@)f6jk{+}^*=dr*;ogIZTy@MB-b=IDtU+3sa7W8Xd5 z;Fn{Uy51qM?P5!L)GmeNO>_I$Ut=$IzB=hpd7u8PdJp*GIBd_p?)Jlz4(zDP@9%r6 zfwA&Gt#i{2RjDSbFV4Pu@>P5Bq2Cu8*Qzw8E^v2g+P*Zb{>25OU40HT=D0IFi&gr_ zbt`#hy5fxg@uLam3HBaqZugz_*=*Rpj|Gnn#;6X>+qc1R`x7I%$!}DaQ(N|MROH^R zy{Bi~`{#$b%9SYxUiXe1^HiF5p}uk{Gn1A1MZ?jVcW&5}%i1?9YgV*;v0hraWQ=o0 zXfu6&_T@{mLtf9azh?ib#YQVZE-90iX0?Q}>qNg9!w+t=RkX6+&69S}y zI5lBA&%|8KY<#J~^(|TxRb!wqzb0Vg=-CF_GtR&EdH#7*{i+{!7uU8>3NEhwFtc=y ze2&f*!~JJ+=UO|t6}1^DoH6~D*z~IHPfOeChv{+ajhoYtr>))c@nG$d87miXBYPPC zxG-u+ZAyK@;?)NW?OK1uwuOXzv$Xy5PP*;my*&4*05zQi1uePz9H~>z#x+Vwm+7r3 zv{t+MtC~UwMg3y$xHw66pCT)0ana9=y_SIoO@)>!jip%&Oi1XYJVz2 zcfpqEslA1>Q)>HqT+D|_LKA#P;7k_gF(M1a9wtt*hiY2y=hXY0D&6MeB%4vwRGOSzdcNgIX>Dm{ ztM-Nb!lMqcvO}C?Pox*-*Np57?Y=Y+2BXa>jy7G z#tyPGik+^no2R=Y!Ng+eu{#zX<6TtVDOp&Y9=87ZM7c9hGDA$0>=Nf5-;>wJep__J zs1MspKHeTB7x(blfs=0*wJPq{;IKSp^y$}e^>b5u|60H1;_4ppdxHkOigvH< zeeL?9xLa#m53G};?#W7{{U~x+Sd?3tQ@?$#zU$VDo>H%U^EnTc=LN`hDzC{YqCw>?j}T z&}Lq)Z#E^Oey{w`Z4X%a9y-sNdQ&{6P+uH3iPOw&3|{Cxy`@Yoca!Hy3#H#eS*uHK z?=w>2sMb1Y6WXHfp?#^O^IB?tYgje)mZvma60-h^RN+AD=HF?0jaREqKNvKu%t7bq zy(1>}>$eoxkJl)_%cj>}Rcy{pKh=GVvH6{xJ+E4-7fhMJ`e1UH^7Y768`+iC8y1c* zlN;)+dt1wX_tB@p&HJMQOw%j#lm@<@<<@BQEX~=VqHgVEk3)GcyOo(~Y&Oj=pD^?7 z&7H%Q8G+knJkD7r^rg8*XKOuIsw@}~W8V;-=XWfN-TJuaJ6^`0kE1MTsY)4FEI&p0 zhcScm7v_&nA8Y;kj8DX}+Rff}{=;_YByW*9neV6Wa3*)2gKh2t1MPl~-M3EK|1F{| z+0As*7Ps;H7&pJAwPdAwMIRjVvB0Tyv}cUIXWoL;p>a$-=uKq;+Z zrSB&$u__B$F1K@-(T0rN6OSx3*GShS4AQz@+eo=-`aV+nfkNou&`mM+B|dNa-I@9- zp>mNFHKqG~y@KTXWx_X%>&lhfJqXp;9jX#%J3xCive-HuKataR5$V`zyFn?hOdgo8 z^t7_IjR`Y(I#JfHuT1x+r|1T^{T}&V@qc@-TZ>o5hCR)bO67*OmA?HQFFh@&ap#Bs zY>vEd)$4WZ$NQ2K#+#i_1gFmIzEV~tW5rP)_1K?F7uRS!nAu0w$)%!2{bE|e6^-vH z(-%ZNjxTb|)1|d2wjT8MW+^e#%~PT-6e%4t4qvo7uc&9%SH@PG#+}wjd(N8X`)21i zU-j89hrAiuIl!t09bHJiSP4|cCuRdCRErDnXh zmkj4)L%}qC87uuWtjz1o61oq1Y0FR#=Bd8AI)l+`JAKyh<0i&IS66TMrTd+qn`~x0+NE)_ z>#bjJbTb-UbS%xT?R9n>(kJ)LLi_PA-9H|$3)4%VzVy}?Z*}I$=Sr>TPWc$S`i8xI zvHbJRoYDKX?LBMtsPWLNK!cvS@2EYOoE_3&Hz`%Ic;@bfA3wi)e5~z*_3wGx-*~H0 zN4jqQJ-bA=?~J`uZfTuv9#^YK3*U9?qi@!+3zbUWo8YAJ72{w3%?hp#YAUeY_GP+d zSj(?Q)1Rd$ss^9>)eX&I52%$=%Y_vyy*?DmSa2r~M<3gfE}l*%5wuu3(}cwiHKs+< zg2LcdOd5v+rDcu4fPx=cgrp3!MFF;-t-E_I*HugzarMySW;Jyy5937MbIWI+G7Xm={B<-$Dd6HG>3#i7bo-${ay|D&E?nG2xS>6BKy@CHJ_UggfX0t86`N_ByS;Gc4*iXE) z<>{Rl*S^}BUdD%pCbJCahK*S&r+Z0Do#r4AM+%zZTt%`W6k zugQrCcu3nMKl<+e-3GU&r*;1n=+V3P(#zDVx8Ilh__=SkG)gI*xiiz({fRGC@qk(J z!bRqT98RbhZJB$Sdb8q8r2J=x+gk0{IdlD&_mzBE1A@x^j`{Jqug)VxIRV?)15-)ef5G&+-)0Ndgo z{^`4A!KX#3uZFBP{QRI|$Xe^v3ak6;|7;lC?6Ex6r{Yh;m(2lr zYla?wdBAGx@0ptE8ubng!;fcs`NdnWuaQc+x#Cyykj0hbe@qy@$Zh|pNm4PBKe{aM zbK_XsF!dMTv!xR1QzojjPBb*_v>pHS8E04X9QpC1D_vF2UTU-4YT4J#JMX^J?|?$j zH(Bhv$yTdcCak=$?c<-g=W|EY8==j9^>LX(H)!tIrP)^hTeFX3v*0Zj(F{7rZG>U5 zyPLc5NB4r;&La%>mwVhXaee7(QerZa();DWytldobo4@bZ*+$b7&yEkPeqr#K5sz( z(1ur4YK^Lms;^~#S<1>R_?{Xm1w(f!X_am&6iUyY6z7DoUdBJO+ovp!5bxn{1s@(4 z`&AIEJZwE(oE)h>#vV?5LT?~Y+6jV3;1@{tbbR_!-q}8b-vtD+$6(^nCK=4cuiy0KdX2W&lq>n>1?jmwpm&YOKlF9Ac`CPkPZr2CdF_G}H z1=6Qcv4;gu2g5B3qz{4-y6F4p?}NeN|L`0IhNOJc26M=tL(Qu%crKLA#NMpJVsl8v z4(Zgk-Aev=j|Ve2(QHOw47OTJ%7T@*E=A80up(H&bn-&L?Y>=}ydd;sP&g}S9;sxs zuHOkg&xs*-u>qaJ+ijo@V*tMC`FdkdGnjK@BS*v^A@^1|7}>>s2md}E!vrrNRUCEnM}kM8y2S@IP2Y2@ri4cdSN6#5Q#PC2p6H*j`@#=#LNo8?B}6O-^F3J?Rc@ zqJJkFe)70)y22eCy&OFq96jw_9I3*HWKS zr%&9yF*F7Q=D_!_v9YJC2X=YdbBYfXK z8%JZn-812I9os+%74Rk-!Qb%bAHw{RMIpC7LSIs?%->7NjNUR1T1K0ZFeD z13!7(C!_F4EM_nR>&SF$F;~K8wxl=PqyzIJFtNRhogW@I6K?ep)lR^~su~|zYOw3l zJV4F^q-G}%e)71qF=R-5+NaktExACN*`1&9=mhe(y)AHQG0bQORvgnEI|xBm@~q=$ zl3GB`KyXCxJi&*@{c0&5OkDgz#wuAS;0_b9=;(=Jx%f6Pt{<@48`|Osh_GmTJO{&g zkxJ^!<8`Ee$o>5Rr|*d6V1n))SC@3!fybS0Cjpzx^P6UsezONFTm{v^q=FBRJHkl< zG?ft%6ON7QsPqULGnx@Z5UtKPFKT=XVzxsG&}MPNS-cp0i)+N4tiU$;tq0Z7Uf~N& zphPV0!VF;@P{58CYCu@M%oZUo|j!RnYf7}y{H6o=iO zJ~YleeE2I{?SyGyoHbw^G{{s1iHHAvFO9G@l0U6kJR7*V5EapmKOsacmss6>y$l_@ zfgMi57ow}+!{ZKNh{w{|5ez20O2;uihz|B4^8Hgs+&nNL)$r93h`8U1AV81Ezr@1cLENU8Ws~B44oS`0L2GB3v8Hxy^_%o%waAt1ZLdD z^KpQ_Oi1zu&7%jm2NTS#>A_;EPs1}SA_vH2@YO~nbYF-g!5_xUZ~!%d(x&tQ%MV+g^A z$Msq!nH)adpAjAAj>&_GGQQnDZ~oEH2vR;lmI93{RVyTtgpbu96L=UK|7JqC(iEtV z8LDR{85W=Ru}HB^8_aP96tzQWzt;VwLCVaY1^sep$L4z-+wgF~6>J>AE-&Q&!Izgm z?W4uP4(%Ngd5N3I47j)jCLj`syi1EPVnBmXf?}c__o}TV zBJCZh^l&=%c2x;vOzVHA)*mWlILw)`Y;fm?$93KQ?=tLS7~#Q0Ze^4>KLINa1&RYx z6~TwcZOtJQC84z@n`)Jf1N3rm2sHj5=aJzBSy}J)+!5bq_&&hBvUJxW2qsyO6o845 z;KSoi+Dk6tuLqKv=*|6@`I_Kl!4OZ-!Ls#XaseC&S80hTy*2Qg@+ zpC}*|!yof7Lpv5*CGancYab5=L8bSEnn4E>6UpWirh|aD zP&clETEQYq^9eE`^uU-<+y6%cJ&^lgv^RwPY2Xj29ShEo%5BfP{8KyarAjT2h9a7R zcUvPxoPABasDCqOZ|TW56u~{xz&+4VXM9tcmE z3~T#?hV515Me=&NrX8wVC{h&6vhamh5u9Q(x{#~v>YnwD#%3RZVoIJ z5>}E+Ff8UVq7x*oto1poYNvsS!6J?Lb`6P`|EWd&*K6a6sUT|(Bt+0*DYu?fR%qzo zji3_vh0W~Nd;fvy27oM7j$tE-9GGjd8A0CkNERDLz7oh;G+|rOERa(x(vE6hlgYuB z;v~RxPCPwT3V1dY1C6h%-;&{DV0sSr`ukpc31rD_50R#V9GXbgoP=p0p|^Q-$U-~c zE@w)GCsi-Qmo$t8bJ3vL>Laa)_K`#mHZ%5x#V~?Yk9|Pn<|?pM5R?GB%fIu(<39aN zMumI*?c1lB9kq==#JZc)?V1MY93>LZWWI`_|67kU#7kOw9~8qzq{8Qa7bEDOeYe1P z+*|B5ZRQ{g4bqE#ijmdj+I$H-OWC$9`#uOd34+kk){0hALF@=xxTH}mb>Qhx9}rRm z)r6Mj$R83Rg8O0I7|h_#84BrWc!$&Y?I9pY8cKuCe`Zmnh}+3OjRwAJY7Pzt8FC^P z8z@a8Kty;16IS5$-LeQznJ;;F(Hu$MAu)_v>eZ2(jW^K{r5m>Hc62ugT&L#;z zd0c5(5)nj~l1t!go>luV%m#_r;w(B!yw!_LV*3@3|JxjVfvlfF7}!w-#G($74sW0! zwBtVo=M!aCu2?_w2D}?h`a3I<$>=h9L=O532jwF>S_0I7tqTDTZK7ZLlJKc8q@f2( z+C*Kw<|weB5HesWv{MdNCKd9JgIl{sr)4f6W0}aD+o(U89C}m?Hgckci=T5BrB6S9 z1ngx4b33%oZw?|80i7}&>8QM$3Gfzx zqe+9s8YFPEm+yLYgs?{i$Hb;u0lZGcqec!DFTjh%l4N}J>Pq$0C=kMeNg7%Wm$XQP z_|qAoVUSLiDBf?lGEe;i0Bi^n!%}pJW#Eq^co|v>yMR#+UoC{SY&^n3J9DD^ zbk5`nbq4?gtw$(=hfE;Dp>)#41=pEpsmB2C2i1rMp5TdOBCr=Gp@YO~L zr|l5})(~K&!->WAp>wdYHa^esmc5!bgrWTk!i#M49lG#3c0$?)!wLuN+<~sap4Fb! zVi6?6vsdm?9}7;u5Ar;Qh@SOsqFnnh8Z#6-Jsd+9!(BXUHzyOSLJ`<{2sd!5DAyUX z4dQj(mjB?J67b%GuIPY&91Pe5wbEe&j46qxeTIFiP=IsBf&Nv!H37~9w@0;e6q~ig7Oc*I}LO+mK}yv zrEr0|nq8;sA4psCKIIh+#?U~XLAQmv_a_)`lF7i=*?&Rcncff?yuroMs{IdoE@2)u zj1?zI)$TBkJDLsDb>J`>0t;i!_(*ssH5l6f3>6AH#i;aXFaW-SwC)!iFM~l@Q0T!3 zDlJNYf-S-Jtnio!CN+{CL=5uw<=GAwO@T0Jp| z07alwznr5D^AGI&^pxTDte}u0(#5 z4n|wdIDFEW89Cngr0`V)Buji!mHru1Qz)4yw7p?SD}J+vfYhFLNr)uair}k|XvTp; zMne^%qmF8rn9!q2f! z794^gD(t*8t6tdr8G8$ZHlo+jd{Q1k+=w>W+4vOnn;^oIJwnPOh#qIPKIK;fZ@dU^ z7>tfd%1aPKEbHzwY`|ZNpo>GxI|(*8k?;s2NpjR9smY*UPhg_CoJ;3Oc^#=P9exLB z&w?$g1R2&guL(0Bf$M}r3q;*4M33h#}Bu7TCmfRBzwSJe}+`7IJFI_XHU(5_lL4>X<)@b=489|W_rHIUOlL(qywd@_F7pE-r?c7XVJ1SSEf=Tx-{ zIB;sq8MYs=*a@AEPB4&iwNjjG38Xjb!h1Laq@+6=q$sWZPJ7aHu`Rdt)q99+r2t2p z&C_1uz|?4ZB!1y5*i|#3)wSnt^^j)MPl1Hm-oA-;!c0Arei1wV&=-t~=9`rJk}w4s zbW$^1(_g*{%lrL+0E+t4%YKr;iA`bgrKoQr#3c=I0#t3Y{v>2Uu9sBp+a{B*=z%?A zAx@)x`)^p^?ugT(Zs8?JE#X(+cKefCVW2^I4J{19so=xo&ebKs5*vZ1vP2^Q1`0F4 z2xyYZdN?rR6UpHYtQg1?iNTvK$eQ~LboT{3x_2mH*$E#5MGSStG;}6c_i@1f%4;!wiE*yBr?-<@FjAdSofS}Bani< zi3MG~`R*l=6iIwYUh&6<4HG|=8K{5+G(c>cPA)+L^_3=0o3#omz#PJ`7NR~KHg=IzHGVKV zFa7G?L+B#zL(D;^f1_x4IEf-;RHVTmYl*W+c<9hgHsfs2r5oJ4_6bVB~Ehv*+O=Al?=smd+JJB$x(9 z<%d;ar`@K*Z~%>?t0Ke@vCZvb+*5r(`n!WbUk%$0QTH6l7DM--iSzybtc{M-0PY5= zqwwKzV&ITq6{Gp`Z*LFx0(b~4b7&*pP`ZE|j_>>fQ=e&>fuH_>FY5gL!9_&c5|-={ zXviVTF<9(BdM5^eFxC(n*R0}70QI6V*!bFxdll$68k%WO$bzHVx-Ah8CUD-hdS7g@ zrOmrwR&=m*b(wgm8$Cf_5PYRgyzc2w1Yd}Q^%Hcz%#s!2`5usWXJA8U@q%96l)v2* zqQnGdy8!}B3jiIIeFC1Yc0!%v{}7w{W6 zV=egbxLG_Lq`>AnQ)6h+PBF~(kcW>RJ4CztHvnovC8OwjF!2`Awkw*!BuATsjJJbL zPZSu!2%(kskfN#dpfGYY+;) ztldXSZeLQuxA9vw<4<$IBNV{Enh0}aJ{c3A=DO{{-agRAcff7{Ere!%ii}3kh2Gt< z^{5@L&FU4mTFWDUoA(8R`xTuOsxABaHw6hysIm5U{u&C%nTJX{8#^VZ#xspdkccN4pPO_ z>hPE`Y{6WbV3hPuRW0m;6ov8y9-2SnsTNDGJ*fmmc$ z`J@@%*;!4xJdl1ZRHX{R;7u2aDc?2$rl{PRyDJz2tAgYgCa z*zn9R9?EwXLLw@#zf`w>v8w1sM|^=Zeuo`&1fya%PoWcp_mKURyc+T8DTy{Uj$ps9 zU_U>^eq7m3I{&sT+weGA0ta67L>J^aJ2$(*VN=3?lVIcEV9k#PKcR%Zp>3eecziF3 zb-*uPmiADo(-6+Bp&1(?#aq+|MLWO_WHtl4J+8AT(mD`@VB7RM_HpB*ZkZqaiWM{p92#9IpQVPP zrbKqu1>p=VHgZ_*V<=K66b>Cc<_#u>b?nXQI6aK7Ci9`{3nugclSIzI7HddKCUv1b z!RNZYfS#+5s&I;J`EX(Z9oIRDx+{Jua%8_hg4j!GgSLg{m@k@%74mn@fv9_b%!oEw z07eRd^;Wcd?`821czoRtmR;_x1Z!MzeT5@m860Nm>bPAUMN-S$O?$fOHFyqo9L_?} zD6q-FDA?&OIi0jouTuD}z7|a1Q)H&fawQhh^?0A8#d)*-q%ItQpuiHhP(zsRMJx$D zs*hg{8~ktey9x%H4P-P+yARIY5g7_4lNw2Gt~7HM1cT?$rJ|AVyf?7`!6G5?HkyeC zXHhxeK+lzgy))BK0&r*W!8Z$4d2uN$UQx6mBI+Xk<2{EM-ZACENB%x)RaA@B9~c8& z*e#-wAjPZ<#-G|^UI_}q&LN>T+6+hAi0bce+b69%Dci{bV*#uTme4}1ph72xdlPML zF5hx;)=)s5hc<(DGb=&}arnmEXVzx)3N*_GW7;B`)rFy`_RYF5^TK+rj$RdRmGAnu z5}ZpKUbavJd)>e?(0n)|&S0Jd;_%DxIR5kVbZ}m87#3+GP-&(x6g@(P&x<@cV4xBt zFA`xKfQ}{aEGFQ=ncR*`w>peqN8=D=3rf4^yv9!c4TB~(4bj$kB}#|mu#ox$lmDgv zH9XlrN-&yqZn)j!FjPV;%;3?@F^^VDt_8mRwq0vGiEVb?1(8Y}{IcpZ?@9F`plTb)> z)bJo5W$|zDCA?b#A8&k)HwqT8+P4^^`CEevc=#zHAsl)%|2oSqzf0f@nfsEWFM*^R z;1I)*3VVJ@j3j*3Ef&5%g6)zS3-yMszCMRD^<9H$``yIg>LV_^FG}2 z;ARY1Rt_wSo-Lr2h=+m=IFRC?GlLS`VdE;ny;d8>{x{eTj19wKp&^>#ZSgXkusdH} znCDFzI8O$*F^!15)W|^oqb-pBVGvBXAq*lD3o`@GB9x7Q7GJ2!5XYM!&7~I zNW0z@Qe~;ffNltMv=voBGE^)bt!7e57WMHh*e3Tc5ZHA=V&{j)Z73Hj3E%2=6BQL7 z0fz?4ql3!Lk3^9^ta)^%S2!(*9s$QG`SV|URzxI?P4{8p3kX!&Ux%%opM}{8deg<# zD#-+3OMuv+WIEdurtkzAcPZU7cD_&s>`uthopynrJnrsil8M2(Kx`I|UkvJ|Aw8kn zqO9#9+?%=8NXEyM$7lDQASJH}Q9c&Nn%0Q!y7dyWqdCx9hr=}_SPA$(gfGi5xoz`E z2o*n|I?##Yt`^B;!D>J#K|wZzS^F-5PV}{+@F`&RpAy15NXcC24%JxtL71VcY6)*i zw1J~o!jS~0T=;b!Ik?qwC;09tsBX1OEB7;8h*+YTUH$kZexe&>u4qs7b?2l z9GLPF)N5^axAU7}UqK}#`OvAczNIAc@ays6hC9&SWb4+dQH(EVDc6gLF1QNg@63oancDNvo2)BPx9Jd)P`jL+-YKKY zn<}tQ6W%!y3>SaGAP+h>e9cel{)jgM>P_%QU8Hue%)+5MKq57oPCnUbd_2Qk2OO~! zuqe`mCJss9gDKVdlMcYB?dBli!Tp9HF-ZJkzbTrRIUXu30UQmT_9QcLfbAIq9sBSY z7|3Fi+4ioZ{V(zH+F$RE&xMHb8+1nTec~j*Q=y-wli*WNm0w*67RG{-9#Uz)7ZBpx zdBhpCt()mR*8?*i8VNeQDNQ0|@;BK0wF`;P=ide?L*{|99^4l-NdZ?3U$6>Af>-q3 z_O2Bwa~wnsbaUe3r9^oAx;sX>H4Tm|P*#Al!x7`2+(<+c>|7uU&Pg&C&ZYx!Gx!`j zM(wjjJQO?W7u;TWfgpU{c_uZz^+1FNSORT&@8N|g{O-L=jF1Fre0}4eY%s$e_`;w; z@ZoX&cjF*Cj~)>j!l%h-*gk-~fiIe~8MB8Bhi%8^&jYZV^YKgn)Jcza0BQ~!PoZqX zy=3f8ow3G2r1yWxcP_zBB1)METkC+tY=Y=E=%-l5@8QGBr%&{)9KK)w+pa4 zejkXidlyK|COC)aG_KE~v%oGAVRt(&F`M8_qR;O1qX`h+vLS*(lNNk<++L?7W#fCv zXV#vTaxhNyguWVGlq`kU35c|jPUpaf9a=>Tdjl|C*!QC?P!gLI_P>q;^Jl}$6!^U- zKJSs@tihWh4O{^|ye`6<1baF8%5{C86l|hwe-%o5FOQF8dmDtpo$g;@sLF@{jOcPb}J>p~{Y=v*rN_1YP#e}hOP-6}qZHXLVBGH!uda z>l{UPQx!rw(%9h%j`56WG5bU6H02t!p&ckgDhRDFD>WgcGxkg_m48RZUR zxwkL{N2lLgwS`zP!I=kx=C1YS4ezLciB)?%WOK0p0bk0RppfQdh_Ct(tWhIa8H$yH zkJMN^VF7j{h#x4ABBjlRko@xNzzs;?2klJ|AGugY+1p!j%igtYN zHC<}$8E^q*Z~?SUtgyxB^0g!l2+Ovqbr0_j$K0V!pvtD&;X|oHciZ7Bo0rsE116%B zTxbU9+WrJ*0xmcOa-khQc&*Fys)b;I0*Ewdg*|o02V-Z(g2^h(utmYe5W259HjoMf z19yIS+-;B~=;#pmF_#Rpu6l8A1#qPo5Mt0d)9)}L4D?Q+ojU1s{R2NEf?x8H=j8pv z!K6I!Ep(_705yoH*%gTo3#e`EmjYSFZqV|zI|~dyd0d$&AsB48kA<5IsbMhJh3o3^ zc}Ce!wN69S7C z0(x{71$C4eGXe37na-bH`C&R#?oJ5%sK3p*fa0)~J*-;r)#CUnOyfXuHU$D5+I=)% z>*U0CzLpmMl{ffc*2~?{QeglLIT1aiy!kMh=eH^WxZ{RHsOgT&O7WS>M=h+e*Pj(b zYxO{wXRAb*=<0L_R|ofR`O%Q{vfc8@_K%gIINmUqLA&q7YRN?77wMj>>gW(CjxDrz z)J&CiBHWHNlsGd{rX-(`0%kP$#(2a`+}9#ZcUISH`gPDhb}kIz>n;;x(RUrRxML6- z&`A9Fy%;(8K<${%J+NIFSWX}9cn>#=0;4&k&N6iSXgO@KIUj0lIAR6UFCwtc8}VU6 zOA3{%$o-KB9nVi4Vbk7fC4_eRUZ;@uKa-m_D0S$8xnPOGFj@prf)9_oQ%+X6^&niL zN&f65T3dEQ+i3zSWH|&M9@o8}2vM;0vr|i=ncl=5C0Q4Mg&q7wrz7K3B%uB^H|f}U z(iAsY9dI&t2zY2CaqBN$3cjzc%Kv@70GP$#Ypw!oV1fPEAQ2{{!r@LQdN`ygU`gli zPCA0W=f@L#xzn@Ek~@!=p@Y}b!4k_QNxJCjCZDJWx8Dc@5wt5z8!9OuUvv4xx81RK z57dJfq2$xU3CVwLMWS;d%ke@wR=3cy(E2@QAfAk`tsx~tc@5NW9dOZ=$0epB%=U~t z7W!Z>q0kF$@aaa67Hov86)9360j2+EPD;lTJYA&W(?8EP>2Uyj60V{b+PgDuEGfMU znY2O82-D7hEliYC;QX41^P5|c3BXsk-}g9H8_;Zl2D<_TA0F2c<`F_&Jq%cbSrLM@ zC4qn7qf45uUfu*XSPMfGG!CwrM1U4UR2=od8oRMD5Jtr)k>MmtMB6*Qj=^_S_l$rx zf!*YS4xzF=aEOuNw1mGsxK8_RWcc9c7nf{z1#kv{O$1iOLX?V^c(5Rb~_;l%&E;PzNb{ z6I{9?tSEQ^W5h_ZY$B2yV;#)BJ_hESvT!9)_dF1#V zFsdb#9M!clOdJ`%^q1V;XQV>N!glFvBc*>ePlPDJAV^oPbO$EduntAw^WaoL z$6$*z{9PC@DR>}255X6$a2~wjRj8UefcQloP%EXD3(l?w!*_HC$`_U_#lz9ZcBG4^ zlSu?Emd-R`u|th%k+h&N*kwoKa3Bz}X(sK`Mh3Cqy={@K2;=yOaN)NytUfMN=muuM rMk!W^S(`ZjlMHx|C~TUKhVu+wZr!2wVRMUAx|HDxc%f=E_Q(GNF?mt` literal 0 HcmV?d00001 From 9e766f46a3f400f52a763b3c7af8f67ca6f4cc13 Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Fri, 7 Jul 2023 08:48:50 -0600 Subject: [PATCH 078/340] Several simulator fixes not yet merged to cep-15-accord s c4bf1533b4 Add more JVM arg checks to the paxos simulation runner s 1b650a0636 break circular dependency between FileSystemOwnershipCheck and CassandraRelevantProperties s e0111748dd fix verify with-rng and interrupts s f8475f2a18 make SimulatedFailureDetector deterministic s ee77734bbe Deterministic transformation s 890f23696a Add mechanism forsimulator to intercept password salting s 5205b99874 Specify Paxos V2 because it is the only linearizable option Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-19008 --- modules/accord | 2 +- .../cassandra/auth/CassandraRoleManager.java | 16 +--- .../cassandra/auth/PasswordSaltSupplier.java | 62 ++++++++++++++ .../config/CassandraRelevantProperties.java | 17 +++- .../service/FileSystemOwnershipCheck.java | 5 +- .../DirectStreamingConnectionFactory.java | 6 ++ .../simulator/asm/ClassTransformer.java | 1 + .../asm/DeterministicChanceSupplier.java | 25 ++++++ .../simulator/asm/InterceptClasses.java | 38 +++++---- .../simulator/ClusterSimulation.java | 23 ++++- .../cassandra/simulator/SimulatorUtils.java | 37 ++++++++ .../asm/InterceptAsClassTransformer.java | 7 +- .../simulator/debug/SelfReconcile.java | 25 ++++-- .../paxos/PaxosSimulationRunner.java | 7 ++ .../systems/InterceptingMonitors.java | 5 +- .../systems/SimulatedFailureDetector.java | 10 +-- .../test/ShortPaxosSimulationTest.java | 2 +- .../simulator/test/SimulationTestBase.java | 2 +- .../auth/PasswordAuthenticatorTest.java | 2 +- .../AbstractFilesystemOwnershipCheckTest.java | 84 ++++++++++--------- 20 files changed, 277 insertions(+), 99 deletions(-) create mode 100644 src/java/org/apache/cassandra/auth/PasswordSaltSupplier.java create mode 100644 test/simulator/asm/org/apache/cassandra/simulator/asm/DeterministicChanceSupplier.java diff --git a/modules/accord b/modules/accord index d99ad84cc49a..3056d13bc8c4 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit d99ad84cc49a96299a9ae55183e38ee6f1aa3f47 +Subproject commit 3056d13bc8c45a22ec794e0979d02f469cc4e209 diff --git a/src/java/org/apache/cassandra/auth/CassandraRoleManager.java b/src/java/org/apache/cassandra/auth/CassandraRoleManager.java index 1e1a9ec310e9..45e9b0dd4d53 100644 --- a/src/java/org/apache/cassandra/auth/CassandraRoleManager.java +++ b/src/java/org/apache/cassandra/auth/CassandraRoleManager.java @@ -75,7 +75,6 @@ import org.apache.cassandra.utils.NoSpamLogger; import org.mindrot.jbcrypt.BCrypt; -import static org.apache.cassandra.config.CassandraRelevantProperties.AUTH_BCRYPT_GENSALT_LOG2_ROUNDS; import static org.apache.cassandra.service.QueryState.forInternalCalls; /** @@ -147,17 +146,6 @@ public class CassandraRoleManager implements IRoleManager, CassandraRoleManagerM } }; - private static final int GENSALT_LOG2_ROUNDS = getGensaltLogRounds(); - - static int getGensaltLogRounds() - { - int rounds = AUTH_BCRYPT_GENSALT_LOG2_ROUNDS.getInt(10); - if (rounds < 4 || rounds > 30) - throw new ConfigurationException(String.format("Bad value for system property %s." + - "Please use a value between 4 and 30 inclusively", AUTH_BCRYPT_GENSALT_LOG2_ROUNDS.getKey())); - return rounds; - } - private SelectStatement loadRoleStatement; private SelectStatement loadIdentityStatement; @@ -689,9 +677,11 @@ private String optionsToAssignments(Map options) .collect(Collectors.joining(",")); } + + private static String hashpw(String password) { - return BCrypt.hashpw(password, BCrypt.gensalt(GENSALT_LOG2_ROUNDS)); + return BCrypt.hashpw(password, PasswordSaltSupplier.get()); } private static String escape(String name) diff --git a/src/java/org/apache/cassandra/auth/PasswordSaltSupplier.java b/src/java/org/apache/cassandra/auth/PasswordSaltSupplier.java new file mode 100644 index 000000000000..9c9bd1d0f813 --- /dev/null +++ b/src/java/org/apache/cassandra/auth/PasswordSaltSupplier.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.auth; + +import java.util.function.Supplier; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.exceptions.ConfigurationException; +import org.mindrot.jbcrypt.BCrypt; + +import static org.apache.cassandra.config.CassandraRelevantProperties.AUTH_BCRYPT_GENSALT_LOG2_ROUNDS; + +public class PasswordSaltSupplier +{ + // 2 ** GENSALT_LOG2_ROUNDS rounds of hashing will be performed. + private static final int GENSALT_LOG2_ROUNDS = getGensaltLogRounds(); + + @VisibleForTesting + static int getGensaltLogRounds() + { + int rounds = AUTH_BCRYPT_GENSALT_LOG2_ROUNDS.getInt(); + if (rounds < 4 || rounds > 30) + throw new ConfigurationException(String.format("Bad value for system property -D%s." + + "Please use a value between 4 and 30 inclusively", + AUTH_BCRYPT_GENSALT_LOG2_ROUNDS.getKey())); + return rounds; + } + private static Supplier DEFAULT_SALT_SUPPLIER = () -> BCrypt.gensalt(GENSALT_LOG2_ROUNDS); + private static Supplier saltSupplier = DEFAULT_SALT_SUPPLIER; + + public static void unsafeSet(Supplier newSaltSupplier) + { + assert newSaltSupplier != null; + saltSupplier = newSaltSupplier; + } + public static void unsafeReset() + { + saltSupplier = DEFAULT_SALT_SUPPLIER; + } + + public static String get() + { + return saltSupplier.get(); + } +} diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index c8f57f4e4c70..51d270c19217 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -27,7 +27,6 @@ import org.apache.cassandra.db.virtual.LogMessagesTable; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.service.FileSystemOwnershipCheck; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.StorageCompatibilityMode; @@ -53,7 +52,7 @@ public enum CassandraRelevantProperties ALLOW_UNSAFE_TRANSIENT_CHANGES("cassandra.allow_unsafe_transient_changes"), APPROXIMATE_TIME_PRECISION_MS("cassandra.approximate_time_precision_ms", "2"), /** 2 ** GENSALT_LOG2_ROUNDS rounds of hashing will be performed. */ - AUTH_BCRYPT_GENSALT_LOG2_ROUNDS("cassandra.auth_bcrypt_gensalt_log2_rounds"), + AUTH_BCRYPT_GENSALT_LOG2_ROUNDS("cassandra.auth_bcrypt_gensalt_log2_rounds", "4"), /** We expect default values on cache retries and interval to be sufficient for everyone but have this escape hatch just in case. */ AUTH_CACHE_WARMING_MAX_RETRIES("cassandra.auth_cache.warming.max_retries"), AUTH_CACHE_WARMING_RETRY_INTERVAL_MS("cassandra.auth_cache.warming.retry_interval_ms"), @@ -236,11 +235,11 @@ public enum CassandraRelevantProperties /** @deprecated should be removed in favor of flags in relevant startup check (FileSystemOwnershipCheck) */ /** @deprecated See CASSANDRA-17797 */ @Deprecated(since = "4.1") - FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME("cassandra.fs_ownership_filename", FileSystemOwnershipCheck.DEFAULT_FS_OWNERSHIP_FILENAME), + FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME("cassandra.fs_ownership_filename", ".cassandra_fs_ownership"), /** @deprecated should be removed in favor of flags in relevant startup check (FileSystemOwnershipCheck) */ /** @deprecated See CASSANDRA-17797 */ @Deprecated(since = "4.1") - FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN(FileSystemOwnershipCheck.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN), + FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN("CassandraOwnershipToken"), FORCE_DEFAULT_INDEXING_PAGE_SIZE("cassandra.force_default_indexing_page_size"), /** Used when running in Client mode and the system and schema keyspaces need to be initialized outside of their normal initialization path **/ FORCE_LOAD_LOCAL_KEYSPACES("cassandra.schema.force_load_local_keyspaces"), @@ -332,6 +331,16 @@ public enum CassandraRelevantProperties /** Java Virtual Machine implementation name */ JAVA_VM_NAME("java.vm.name"), JOIN_RING("cassandra.join_ring", "true"), + + /** + * {@link StorageCompatibilityMode} mode sets how the node will behave, sstable or messaging versions to use etc according to a yaml setting. + * But many tests don't load the config hence we need to force it otherwise they would run always under the default. Config is null for junits + * that don't load the config. Get from env var that CI/build.xml sets. + * + * This is a dev/CI only property. Do not use otherwise. + */ + JUNIT_STORAGE_COMPATIBILITY_MODE("cassandra.junit_storage_compatibility_mode", StorageCompatibilityMode.CASSANDRA_4.toString()), + /** startup checks properties */ LIBJEMALLOC("cassandra.libjemalloc"), /** Line separator ("\n" on UNIX). */ diff --git a/src/java/org/apache/cassandra/service/FileSystemOwnershipCheck.java b/src/java/org/apache/cassandra/service/FileSystemOwnershipCheck.java index 3d69c9e7631c..6e6ebd67b7a6 100644 --- a/src/java/org/apache/cassandra/service/FileSystemOwnershipCheck.java +++ b/src/java/org/apache/cassandra/service/FileSystemOwnershipCheck.java @@ -75,9 +75,6 @@ public class FileSystemOwnershipCheck implements StartupCheck { private static final Logger logger = LoggerFactory.getLogger(FileSystemOwnershipCheck.class); - public static final String FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN = "CassandraOwnershipToken"; - public static final String DEFAULT_FS_OWNERSHIP_FILENAME = ".cassandra_fs_ownership"; - // Ownership file properties static final String VERSION = "version"; static final String VOLUME_COUNT = "volume_count"; @@ -230,7 +227,7 @@ protected String constructTokenFromProperties(Map config) throws { String cluster = getOwnershipToken(config); if (null == cluster || cluster.isEmpty()) - throw exception(String.format(MISSING_PROPERTY, FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN)); + throw exception(String.format(MISSING_PROPERTY, CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey())); return cluster; } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/DirectStreamingConnectionFactory.java b/test/distributed/org/apache/cassandra/distributed/impl/DirectStreamingConnectionFactory.java index 72105d8f403c..bd674b3b1e49 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/DirectStreamingConnectionFactory.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/DirectStreamingConnectionFactory.java @@ -325,6 +325,12 @@ public synchronized void onClose(Runnable runOnClose) else if (onClose == null) onClose = runOnClose; else { Runnable tmp = onClose; onClose = () -> { tmp.run(); runOnClose.run(); }; } } + + @Override + public int hashCode() + { + return id; + } } private final DirectStreamingChannel outToRecipient, outToOriginator; diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java index 4e5dd730d7b4..f9bab8eaed04 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java @@ -300,6 +300,7 @@ void witness(TransformationKind kind) { case FIELD_NEMESIS: case SIGNAL_NEMESIS: + // TODO: this isn't correct: we will share any class we choose not to insert nemesis points into on first transformation isCacheablyTransformed = false; } methodLogger.witness(kind); diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/DeterministicChanceSupplier.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/DeterministicChanceSupplier.java new file mode 100644 index 000000000000..dae021a128b8 --- /dev/null +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/DeterministicChanceSupplier.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.asm; + +import java.util.function.IntFunction; + +public interface DeterministicChanceSupplier extends IntFunction +{ +} diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java index 90792dee5564..dd53ce067fbe 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java @@ -96,19 +96,24 @@ static class PeerGroup class SubTransformer implements BiFunction { private final Map isolatedCache = new ConcurrentHashMap<>(); + private final int id; + SubTransformer(int id) + { + this.id = id; + } @Override public byte[] apply(String name, byte[] bytes) { - return transformTransitiveClosure(name, bytes, isolatedCache); + return transformTransitiveClosure(name, bytes, isolatedCache, id); } } private final Map cache = new ConcurrentHashMap<>(); private final int api; - private final ChanceSupplier nemesisChance; - private final ChanceSupplier monitorDelayChance; + private final DeterministicChanceSupplier nemesisChance; + private final DeterministicChanceSupplier monitorDelayChance; private final Hashcode insertHashcode; private final NemesisFieldKind.Selector nemesisFieldSelector; private final ClassLoader prewarmClassLoader; @@ -116,12 +121,12 @@ public byte[] apply(String name, byte[] bytes) private final byte[] bufIn = new byte[4096]; private final ByteArrayOutputStream bufOut = new ByteArrayOutputStream(); - public InterceptClasses(ChanceSupplier monitorDelayChance, ChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) + public InterceptClasses(DeterministicChanceSupplier monitorDelayChance, DeterministicChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) { this(BYTECODE_VERSION, monitorDelayChance, nemesisChance, nemesisFieldSelector, prewarmClassLoader, prewarm); } - public InterceptClasses(int api, ChanceSupplier monitorDelayChance, ChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) + public InterceptClasses(int api, DeterministicChanceSupplier monitorDelayChance, DeterministicChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) { this.api = api; this.nemesisChance = nemesisChance; @@ -135,10 +140,10 @@ public InterceptClasses(int api, ChanceSupplier monitorDelayChance, ChanceSuppli @Override public byte[] apply(String name, byte[] bytes) { - return transformTransitiveClosure(name, bytes, null); + return transformTransitiveClosure(name, bytes, null, 0); } - private synchronized byte[] transformTransitiveClosure(String externalName, byte[] input, Map isolatedCache) + private synchronized byte[] transformTransitiveClosure(String externalName, byte[] input, Map isolatedCache, int id) { if (input == null) return maybeSynthetic(externalName); @@ -164,12 +169,12 @@ private synchronized byte[] transformTransitiveClosure(String externalName, byte case UNMODIFIED: return input; case UNSHAREABLE: - return transform(internalName, externalName, null, input, null, null); + return transform(internalName, externalName, null, input, null, id, null); } } for (String peer : cached.uncacheablePeers) - transform(peer, slashesToDots(peer), null, cache.get(peer).bytes, isolatedCache, null); + transform(peer, slashesToDots(peer), null, cache.get(peer).bytes, isolatedCache, id, null); switch (cached.kind) { @@ -192,13 +197,13 @@ private synchronized byte[] transformTransitiveClosure(String externalName, byte }; final PeerGroup peerGroup = new PeerGroup(); - byte[] result = transform(internalName, externalName, peerGroup, input, isolatedCache, dependentTypeConsumer); + byte[] result = transform(internalName, externalName, peerGroup, input, isolatedCache, id, dependentTypeConsumer); for (String next = load.pollFirst(); next != null; next = load.pollFirst()) { // TODO (now): otherwise merge peer groups Cached existing = cache.get(next); if (existing == null) - transform(next, slashesToDots(next), peerGroup, read(next), isolatedCache, dependentTypeConsumer); + transform(next, slashesToDots(next), peerGroup, read(next), isolatedCache, id, dependentTypeConsumer); } return result; @@ -222,7 +227,7 @@ private byte[] read(String name) } } - private byte[] transform(String internalName, String externalName, PeerGroup peerGroup, byte[] input, Map isolatedCache, Consumer dependentTypes) + private byte[] transform(String internalName, String externalName, PeerGroup peerGroup, byte[] input, Map isolatedCache, int id, Consumer dependentTypes) { Hashcode hashcode = insertHashCode(externalName); @@ -247,7 +252,8 @@ private byte[] transform(String internalName, String externalName, PeerGroup pee return input; } - ClassTransformer transformer = new ClassTransformer(api, internalName, flags, monitorDelayChance, new NemesisGenerator(api, internalName, nemesisChance), nemesisFieldSelector, hashcode, dependentTypes); + int chanceSeed = internalName.hashCode() * 31 + id; + ClassTransformer transformer = new ClassTransformer(api, internalName, flags, monitorDelayChance.apply(chanceSeed), new NemesisGenerator(api, internalName, nemesisChance.apply(chanceSeed + 1)), nemesisFieldSelector, hashcode, dependentTypes); transformer.setUpdateVisibility(true); transformer.readAndTransform(input); @@ -371,13 +377,15 @@ protected byte[] maybeSynthetic(String externalName) EnumSet flags = EnumSet.of(Flag.GLOBAL_METHODS, Flag.MONITORS, Flag.LOCK_SUPPORT); if (NEMESIS.matcher(externalName).matches()) flags.add(Flag.NEMESIS); - NemesisGenerator nemesis = new NemesisGenerator(api, externalName, nemesisChance); + + int hashCode = externalName.hashCode(); + NemesisGenerator nemesis = new NemesisGenerator(api, externalName, nemesisChance.apply(hashCode)); ShadowingTransformer transformer; transformer = new ShadowingTransformer(InterceptClasses.BYTECODE_VERSION, originalType, shadowType, originalRootType, shadowRootType, originalOuterTypePrefix, shadowOuterTypePrefix, - flags, monitorDelayChance, nemesis, nemesisFieldSelector, null); + flags, monitorDelayChance.apply(hashCode), nemesis, nemesisFieldSelector, null); transformer.readAndTransform(Utils.readDefinition(originalType + ".class")); return transformer.toBytes(); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 7c8867b9c19d..fac497707602 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -38,6 +38,7 @@ import com.google.common.util.concurrent.AsyncFunction; import com.google.common.util.concurrent.FutureCallback; +import org.apache.cassandra.auth.PasswordSaltSupplier; import org.apache.cassandra.concurrent.ExecutorFactory; import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.distributed.Cluster; @@ -62,6 +63,7 @@ import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.service.paxos.PaxosPrepare; import org.apache.cassandra.simulator.RandomSource.Choices; +import org.apache.cassandra.simulator.asm.DeterministicChanceSupplier; import org.apache.cassandra.simulator.asm.InterceptAsClassTransformer; import org.apache.cassandra.simulator.asm.NemesisFieldSelectors; import org.apache.cassandra.simulator.cluster.ClusterActions; @@ -703,7 +705,24 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, }); Predicate sharedClassPredicate = getSharedClassPredicate(ISOLATE, SHARE, ANY, SIMULATION); - InterceptAsClassTransformer interceptClasses = new InterceptAsClassTransformer(builder.monitorDelayChance.asSupplier(random), builder.nemesisChance.asSupplier(random), NemesisFieldSelectors.get(), ClassLoader.getSystemClassLoader(), sharedClassPredicate.negate()); + DeterministicChanceSupplier monitorDelayChance; { + long monitorDelayChanceSeed = random.uniform(0, Long.MAX_VALUE); + monitorDelayChance = hash -> { + RandomSource subRandom = new RandomSource.Default(); + subRandom.reset(monitorDelayChanceSeed * 31 + hash); + return builder.monitorDelayChance.asSupplier(subRandom); + }; + } + DeterministicChanceSupplier nemesisChance; { + long nemesisChanceSeed = random.uniform(0, Long.MAX_VALUE); + nemesisChance = hash -> { + RandomSource subRandom = new RandomSource.Default(); + subRandom.reset(nemesisChanceSeed * 31 + hash); + return builder.nemesisChance.asSupplier(subRandom); + }; + } + + InterceptAsClassTransformer interceptClasses = new InterceptAsClassTransformer(monitorDelayChance, nemesisChance, NemesisFieldSelectors.get(), ClassLoader.getSystemClassLoader(), sharedClassPredicate.negate()); threadLocalRandomCheck = new ThreadLocalRandomCheck(builder.onThreadLocalRandomCheck); Failures failures = builder.failures; @@ -769,8 +788,10 @@ public void initialise(ClassLoader classLoader, ThreadGroup threadGroup, int num @Override public void beforeStartup(IInstance i) { + ((IInvokableInstance) i).unsafeAcceptOnThisThread(PasswordSaltSupplier::unsafeSet, () -> "$2a$05$rT01y27MnvpE7NgzwvYNFe"); ((IInvokableInstance) i).unsafeAcceptOnThisThread(IfInterceptibleThread::setThreadLocalRandomCheck, (LongConsumer) threadLocalRandomCheck); + int num = i.config().num(); if (builder.memoryListener != null) { diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java b/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java index 23fe5eaed65e..622e35ab5d85 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulatorUtils.java @@ -18,9 +18,13 @@ package org.apache.cassandra.simulator; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.slf4j.Logger; @@ -46,4 +50,37 @@ public static void dumpStackTraces(Logger logger) logger.error("{}:\n{}", thread, Threads.prettyPrint(ste, false, prefix, delimiter, ""))); FastThreadLocal.destroy(); } + + public static void verifyAndlogSimulatorArgs(Logger logger, String[] args) + { + RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean(); + final List jvmArgs = runtimeMxBean.getInputArguments(); + logger.error("JVM Args: {}", jvmArgs.stream().collect(Collectors.joining("\" \"", "\"", "\""))); + logger.error("Command Args: {}", Arrays.stream(args).collect(Collectors.joining("\" \"", "\"", "\""))); + + assert jvmArgs.stream().anyMatch(arg -> arg.startsWith("-Xbootclasspath/a") && arg.endsWith("simulator-bootstrap.jar")) : + "must launch JVM with -Xbootclasspath/a:simulator-bootstrap.jar"; + assert jvmArgs.stream().anyMatch(arg -> arg.startsWith("-javaagent:") && arg.endsWith("simulator-asm.jar")) : + "must launch JVM with -javaagent:simulator-asm.jar"; + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-XX:-BackgroundCompilation"))) + logger.warn("JVM Argument -XX:-BackgroundCompilation not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-XX:-TieredCompilation"))) + logger.warn("JVM Argument -XX:-TieredCompilation not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-XX:CICompilerCount=1"))) + logger.warn("JVM Argument -XX:CICompilerCount=1 not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.startsWith("-XX:Tier4CompileThreshold="))) + logger.warn("JVM Argument -XX:Tier4CompileThreshold not set, non-determinism possible. Typically set -XX:Tier4CompileThreshold=1000"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-Dcassandra.disable_tcactive_openssl=true"))) + logger.warn("JVM Argument -Dcassandra.disable_tcactive_openssl=true not set, non-determinism possible. Typically set -XX:Tier4CompileThreshold=1000"); + + // log4j support + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-Dlog4j2.disableJmx=true"))) + logger.warn("JVM Argument -Dlog4j2.disableJmx=true not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-Dlog4j2.disable.jmx=true"))) + logger.warn("JVM Argument -Dlog4j2.disable.jmx=true not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-Dlog4j.shutdownHookEnabled=false"))) + logger.warn("JVM Argument -Dlog4j.shutdownHookEnabled=false not set, non-determinism possible"); + if (!jvmArgs.stream().anyMatch(arg -> arg.equals("-Dcassandra.simulator.skiplog4jreload=true"))) + logger.warn("JVM Argument -Dcassandra.simulator.skiplog4jreload=true not set, non-determinism possible"); + } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/asm/InterceptAsClassTransformer.java b/test/simulator/main/org/apache/cassandra/simulator/asm/InterceptAsClassTransformer.java index 200f98409438..b3f04c157f32 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/asm/InterceptAsClassTransformer.java +++ b/test/simulator/main/org/apache/cassandra/simulator/asm/InterceptAsClassTransformer.java @@ -25,12 +25,13 @@ // an adapter to IClassTransformer that is loaded by the system classloader public class InterceptAsClassTransformer extends InterceptClasses implements IClassTransformer { - public InterceptAsClassTransformer(ChanceSupplier monitorDelayChance, ChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) + private int subTransformerCount = 0; + public InterceptAsClassTransformer(DeterministicChanceSupplier monitorDelayChance, DeterministicChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) { super(monitorDelayChance, nemesisChance, nemesisFieldSelector, prewarmClassLoader, prewarm); } - public InterceptAsClassTransformer(int api, ChanceSupplier monitorDelayChance, ChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) + public InterceptAsClassTransformer(int api, DeterministicChanceSupplier monitorDelayChance, DeterministicChanceSupplier nemesisChance, NemesisFieldKind.Selector nemesisFieldSelector, ClassLoader prewarmClassLoader, Predicate prewarm) { super(api, monitorDelayChance, nemesisChance, nemesisFieldSelector, prewarmClassLoader, prewarm); } @@ -44,6 +45,6 @@ public byte[] transform(String name, byte[] bytecode) @Override public IClassTransformer initialise() { - return new SubTransformer()::apply; + return new SubTransformer(++subTransformerCount)::apply; } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java index 390bb82a9a8c..78ee783bd7c9 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java +++ b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java @@ -124,18 +124,27 @@ synchronized void verify(Object event) if (events.size() == 1) { - int cur = counter; - while (cur == counter) + boolean restoreInterrupt = Thread.interrupted(); + try { - try - { - wait(); - } - catch (InterruptedException e) + int cur = counter; + while (cur == counter) { - throw new UncheckedInterruptedException(e); + try + { + wait(); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } } } + finally + { + if (restoreInterrupt) + Thread.currentThread().interrupt(); + } } else { diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java index 50a0ee5b51c5..373a3767145b 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java @@ -22,15 +22,21 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import io.airlift.airline.Cli; import io.airlift.airline.Command; import io.airlift.airline.Option; import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.simulator.SimulationRunner; +import org.apache.cassandra.simulator.SimulatorUtils; public class PaxosSimulationRunner extends SimulationRunner { + private static Logger logger = LoggerFactory.getLogger(PaxosSimulationRunner.class); + @Command(name = "run") public static class Run extends SimulationRunner.Run { @@ -134,6 +140,7 @@ static void propagateTo(String consistency, boolean withStateCache, boolean with */ public static void main(String[] args) throws IOException { + SimulatorUtils.verifyAndlogSimulatorArgs(logger, args); PaxosClusterSimulation.Builder builder = new PaxosClusterSimulation.Builder(); builder.unique(uniqueNum.getAndIncrement()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java index 489ff15b253e..626a267a47ff 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java @@ -662,8 +662,9 @@ public void preMonitorEnter(Object monitor, float preMonitorDelayChance) if (!(anyThread instanceof InterceptibleThread)) return; - boolean restoreInterrupt = false; + // save any interrupt before testing random.decide, in case we are trapping these for verification InterceptibleThread thread = (InterceptibleThread) anyThread; + boolean restoreInterrupt = Thread.interrupted(); try { if ( !thread.isEvaluationDeterministic() @@ -674,8 +675,6 @@ public void preMonitorEnter(Object monitor, float preMonitorDelayChance) InterceptedConditionWait signal = new InterceptedConditionWait(NEMESIS, 0L, thread, captureWaitSite(thread), null); thread.interceptWait(signal); - // save interrupt state to restore afterwards - new ones only arrive if terminating simulation - restoreInterrupt = Thread.interrupted(); while (true) { try diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedFailureDetector.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedFailureDetector.java index 6bdc74c1d651..52bd7b6cf586 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedFailureDetector.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedFailureDetector.java @@ -19,12 +19,12 @@ package org.apache.cassandra.simulator.systems; import java.net.InetSocketAddress; -import java.util.Collections; -import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CopyOnWriteArraySet; import java.util.function.Consumer; import java.util.function.Function; @@ -41,7 +41,7 @@ public static class Instance implements IFailureDetector private static volatile FailureDetector wrapped; private static volatile Function OVERRIDE; - private static final Map LISTENERS = Collections.synchronizedMap(new IdentityHashMap<>()); + private static final Set LISTENERS = new CopyOnWriteArraySet<>(); private static FailureDetector wrapped() { @@ -92,7 +92,7 @@ public void forceConviction(InetAddressAndPort ep) public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) { - LISTENERS.put(listener, Boolean.TRUE); + LISTENERS.add(listener); } public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) @@ -103,7 +103,7 @@ public void unregisterFailureDetectionEventListener(IFailureDetectionEventListen synchronized static void setup(Function override, Consumer> register) { OVERRIDE = override; - register.accept(ep -> LISTENERS.keySet().forEach(c -> c.convict(InetAddressAndPort.getByAddress(ep), Double.MAX_VALUE))); + register.accept(ep -> LISTENERS.forEach(c -> c.convict(InetAddressAndPort.getByAddress(ep), Double.MAX_VALUE))); } } diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java index 7eee1a69c8ab..474f16861579 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java @@ -98,7 +98,7 @@ public class ShortPaxosSimulationTest @Test public void simulationTest() throws IOException { - PaxosSimulationRunner.main(new String[] { "run", "-n", "3..6", "-t", "1000", "-c", "2", "--cluster-action-limit", "2", "-s", "30" }); + PaxosSimulationRunner.main(new String[] { "run", "--variant", "v2", "-n", "3..6", "-t", "1000", "-c", "2", "--cluster-action-limit", "2", "-s", "30" }); } @Test diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/SimulationTestBase.java b/test/simulator/test/org/apache/cassandra/simulator/test/SimulationTestBase.java index 88aaf5374cb6..406725bb1c33 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/SimulationTestBase.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/SimulationTestBase.java @@ -251,7 +251,7 @@ public static void simulate(IIsolatedExecutor.SerializableRunnable[] runnables, InstanceClassLoader classLoader = new InstanceClassLoader(1, 1, AbstractCluster.CURRENT_VERSION.classpath, Thread.currentThread().getContextClassLoader(), sharedClassPredicate, - new InterceptClasses(() -> 1.0f, () -> 1.0f, + new InterceptClasses((x) -> () -> 1.0f, (x) -> () -> 1.0f, NemesisFieldSelectors.get(), ClassLoader.getSystemClassLoader(), sharedClassPredicate.negate())::apply); diff --git a/test/unit/org/apache/cassandra/auth/PasswordAuthenticatorTest.java b/test/unit/org/apache/cassandra/auth/PasswordAuthenticatorTest.java index fadfa82c6c40..fb6adb827650 100644 --- a/test/unit/org/apache/cassandra/auth/PasswordAuthenticatorTest.java +++ b/test/unit/org/apache/cassandra/auth/PasswordAuthenticatorTest.java @@ -44,9 +44,9 @@ import static org.apache.cassandra.auth.AuthTestUtils.ALL_ROLES; import static org.apache.cassandra.auth.CassandraRoleManager.DEFAULT_SUPERUSER_PASSWORD; -import static org.apache.cassandra.auth.CassandraRoleManager.getGensaltLogRounds; import static org.apache.cassandra.auth.PasswordAuthenticator.SaslNegotiator; import static org.apache.cassandra.auth.PasswordAuthenticator.checkpw; +import static org.apache.cassandra.auth.PasswordSaltSupplier.getGensaltLogRounds; import static org.apache.cassandra.config.CassandraRelevantProperties.AUTH_BCRYPT_GENSALT_LOG2_ROUNDS; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; diff --git a/test/unit/org/apache/cassandra/service/AbstractFilesystemOwnershipCheckTest.java b/test/unit/org/apache/cassandra/service/AbstractFilesystemOwnershipCheckTest.java index cae7de060b62..7c0d78dbd543 100644 --- a/test/unit/org/apache/cassandra/service/AbstractFilesystemOwnershipCheckTest.java +++ b/test/unit/org/apache/cassandra/service/AbstractFilesystemOwnershipCheckTest.java @@ -33,14 +33,15 @@ import org.junit.Ignore; import org.junit.Test; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.StartupChecksOptions; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.exceptions.StartupException; import org.apache.cassandra.io.util.File; -import static org.apache.cassandra.service.FileSystemOwnershipCheck.DEFAULT_FS_OWNERSHIP_FILENAME; +import static org.apache.cassandra.config.CassandraRelevantProperties.FILE_SYSTEM_CHECK_ENABLE; +import static org.apache.cassandra.config.CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME; +import static org.apache.cassandra.config.CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN; import static org.apache.cassandra.service.FileSystemOwnershipCheck.ERROR_PREFIX; import static org.apache.cassandra.service.FileSystemOwnershipCheck.INCONSISTENT_FILES_FOUND; import static org.apache.cassandra.service.FileSystemOwnershipCheck.INVALID_FILE_COUNT; @@ -69,15 +70,20 @@ public abstract class AbstractFilesystemOwnershipCheckTest static WithProperties properties; + protected static String ownershipCheckFileName() + { + return FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME.getDefaultValue(); + } + protected void setup() { cleanTempDir(); tempDir = new File(com.google.common.io.Files.createTempDir()); token = makeRandomString(10); properties = new WithProperties(); - System.clearProperty(CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME.getKey()); - System.clearProperty(CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); - System.clearProperty(CassandraRelevantProperties.FILE_SYSTEM_CHECK_ENABLE.getKey()); + System.clearProperty(FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME.getKey()); + System.clearProperty(FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); + System.clearProperty(FILE_SYSTEM_CHECK_ENABLE.getKey()); } static File writeFile(File dir, String filename, Properties props) throws IOException @@ -118,7 +124,7 @@ private static Properties makeProperties(int version, int volumeCount, String to private static File writeFile(File dir, int volumeCount, String token) throws IOException { - return AbstractFilesystemOwnershipCheckTest.writeFile(dir, DEFAULT_FS_OWNERSHIP_FILENAME, 1, volumeCount, token); + return AbstractFilesystemOwnershipCheckTest.writeFile(dir, ownershipCheckFileName(), 1, volumeCount, token); } private static File writeFile(File dir, final String filename, int version, int volumeCount, String token) @@ -201,7 +207,7 @@ public void skipCheckDisabledIfSystemPropertyIsEmpty() throws Exception { // no exceptions thrown from the supplier because the check is skipped options.disable(check_filesystem_ownership); - System.clearProperty(CassandraRelevantProperties.FILE_SYSTEM_CHECK_ENABLE.getKey()); + System.clearProperty(FILE_SYSTEM_CHECK_ENABLE.getKey()); AbstractFilesystemOwnershipCheckTest.checker(() -> { throw new RuntimeException("FAIL"); }).execute(options); } @@ -210,23 +216,23 @@ public void skipCheckDisabledIfSystemPropertyIsFalseButOptionsEnabled() throws E { // no exceptions thrown from the supplier because the check is skipped options.enable(check_filesystem_ownership); - CassandraRelevantProperties.FILE_SYSTEM_CHECK_ENABLE.setBoolean(false); + FILE_SYSTEM_CHECK_ENABLE.setBoolean(false); AbstractFilesystemOwnershipCheckTest.checker(() -> { throw new RuntimeException("FAIL"); }).execute(options); } @Test public void checkEnabledButClusterPropertyIsEmpty() { - CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.setString(""); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(tempDir), options, MISSING_PROPERTY, CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); + FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.setString(""); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(tempDir), options, MISSING_PROPERTY, FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); } @Test public void checkEnabledButClusterPropertyIsUnset() { Assume.assumeFalse(options.getConfig(check_filesystem_ownership).containsKey("ownership_token")); - CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.clearValue(); // checkstyle: suppress nearby 'clearValueSystemPropertyUsage' - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(tempDir), options, MISSING_PROPERTY, CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); + FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.clearValue(); // checkstyle: suppress nearby 'clearValueSystemPropertyUsage' + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(tempDir), options, MISSING_PROPERTY, FILE_SYSTEM_CHECK_OWNERSHIP_TOKEN.getKey()); } // tests for presence/absence of files in dirs @@ -319,7 +325,7 @@ public void propsFileUnreadable() throws Exception public void propsFileIllegalContent() throws Exception { File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - File propsFile = new File(leafDir, DEFAULT_FS_OWNERSHIP_FILENAME); //checkstyle: permit this instantiation + File propsFile = new File(leafDir, ownershipCheckFileName()); //checkstyle: permit this instantiation assertTrue(propsFile.createFileIfNotExists()); try (OutputStream os = Files.newOutputStream(propsFile.toPath())) { @@ -360,9 +366,9 @@ public void overrideFilename() throws Exception { File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); writeFile(leafDir.parent(), "other_file", AbstractFilesystemOwnershipCheckTest.makeProperties(1, 1, token)); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), options, NO_OWNERSHIP_FILE, quote(leafDir.absolutePath())); - CassandraRelevantProperties.FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME.setString("other_file"); - AbstractFilesystemOwnershipCheckTest.checker(leafDir).execute(options); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, NO_OWNERSHIP_FILE, quote(leafDir.absolutePath())); + FILE_SYSTEM_CHECK_OWNERSHIP_FILENAME.setString("other_file"); + checker(leafDir).execute(options); } // check consistency between discovered files @@ -404,11 +410,11 @@ public void differentExpectedCountsFoundInTrees() throws Exception public void emptyPropertiesFile() throws Exception { File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, new Properties()); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), new Properties()); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, VERSION), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -418,11 +424,11 @@ public void missingVersionProp() throws Exception p.setProperty(VOLUME_COUNT, "1"); p.setProperty(TOKEN, "foo"); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, VERSION), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -431,11 +437,11 @@ public void nonNumericVersionProp() throws Exception Properties p = new Properties(); p.setProperty(VERSION, "abc"); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, VERSION), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -444,11 +450,11 @@ public void unsupportedVersionProp() throws Exception Properties p = new Properties(); p.setProperty(VERSION, "99"); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(UNSUPPORTED_VERSION, "99"), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -458,11 +464,11 @@ public void missingVolumeCountProp() throws Exception p.setProperty(VERSION, "1"); p.setProperty(TOKEN, token); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, VOLUME_COUNT), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -473,11 +479,11 @@ public void nonNumericVolumeCountProp() throws Exception p.setProperty(VOLUME_COUNT, "bar"); p.setProperty(TOKEN, token); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, VOLUME_COUNT), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -487,11 +493,11 @@ public void missingTokenProp() throws Exception p.setProperty(VERSION, "1"); p.setProperty(VOLUME_COUNT, "1"); File leafDir = AbstractFilesystemOwnershipCheckTest.mkdirs(tempDir, "cassandra/data"); - writeFile(leafDir.parent(), DEFAULT_FS_OWNERSHIP_FILENAME, p); - AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), + writeFile(leafDir.parent(), ownershipCheckFileName(), p); + AbstractFilesystemOwnershipCheckTest.executeAndFail(checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, TOKEN), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -502,7 +508,7 @@ public void emptyTokenProp() throws Exception AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), options, String.format(INVALID_PROPERTY_VALUE, TOKEN), - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } @Test @@ -514,7 +520,7 @@ public void mismatchingTokenProp() throws Exception AbstractFilesystemOwnershipCheckTest.executeAndFail(AbstractFilesystemOwnershipCheckTest.checker(leafDir), options, MISMATCHING_TOKEN, - leafDir.parent().toPath().resolve(DEFAULT_FS_OWNERSHIP_FILENAME)); + leafDir.parent().toPath().resolve(ownershipCheckFileName())); } // Validate volume_count prop values match number of files found From c10c84b9cd3839404184df3669f8cd9a20a46524 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 23 Jan 2023 14:58:21 -0500 Subject: [PATCH 079/340] Accord/non-Accord interoperability and support for live migration Patch by Ariel Weisberg; Reviewed by Blake Eggleston for CASSANDRA-18129 Co-authored-by: Blake Eggleston --- .gitmodules | 2 +- build.xml | 12 +- ide/idea/workspace.xml | 1 - modules/accord | 2 +- .../apache/cassandra/concurrent/Stage.java | 1 + .../cassandra/concurrent/SyncFutureTask.java | 6 +- .../config/CassandraRelevantProperties.java | 1 + .../org/apache/cassandra/config/Config.java | 113 ++- .../cassandra/config/DatabaseDescriptor.java | 93 +- .../cassandra/cql3/UpdateParameters.java | 37 +- .../cql3/statements/BatchStatement.java | 2 +- .../statements/BatchUpdatesCollector.java | 2 +- .../cql3/statements/CQL3CasRequest.java | 54 +- .../statements/ModificationStatement.java | 65 +- .../cql3/statements/TransactionStatement.java | 67 +- .../apache/cassandra/cql3/terms/Lists.java | 68 +- .../db/AbstractMutationVerbHandler.java | 6 +- .../cassandra/db/ColumnFamilyStore.java | 5 + .../org/apache/cassandra/db/IMutation.java | 5 + .../org/apache/cassandra/db/Mutation.java | 72 +- .../db/PartitionRangeReadCommand.java | 18 +- .../org/apache/cassandra/db/ReadCommand.java | 84 +- .../cassandra/db/ReadCommandVerbHandler.java | 37 +- .../cassandra/db/ReadRepairVerbHandler.java | 7 +- .../db/SinglePartitionReadCommand.java | 16 +- .../apache/cassandra/db/SystemKeyspace.java | 57 +- .../db/partitions/AbstractBTreePartition.java | 34 +- .../db/partitions/AtomicBTreePartition.java | 17 +- .../db/partitions/FilteredPartition.java | 11 +- .../db/partitions/PartitionUpdate.java | 39 +- .../cassandra/db/rows/AbstractCell.java | 14 +- .../apache/cassandra/db/rows/BTreeRow.java | 16 +- .../apache/cassandra/db/rows/ColumnData.java | 21 +- .../cassandra/db/rows/ComplexColumnData.java | 17 +- .../org/apache/cassandra/db/rows/Row.java | 21 +- .../db/streaming/CassandraOutgoingFile.java | 8 + .../db/streaming/CassandraStreamManager.java | 4 +- .../db/streaming/CassandraStreamReceiver.java | 19 +- .../db/virtual/LocalRepairTables.java | 6 +- .../apache/cassandra/dht/AccordSplitter.java | 2 +- .../cassandra/dht/ByteOrderedPartitioner.java | 39 +- .../cassandra/dht/LocalPartitioner.java | 18 +- .../cassandra/dht/Murmur3Partitioner.java | 21 +- .../dht/OrderPreservingPartitioner.java | 10 +- .../cassandra/dht/RandomPartitioner.java | 19 +- src/java/org/apache/cassandra/dht/Range.java | 291 +++++- .../cassandra/exceptions/RequestFailure.java | 6 +- .../cassandra/locator/ReplicaLayout.java | 7 +- .../cassandra/locator/ReplicaPlans.java | 26 +- .../metrics/AccordClientRequestMetrics.java | 20 + .../metrics/CASClientRequestMetrics.java | 12 + .../metrics/ClientRequestsMetricsHolder.java | 2 + .../cassandra/metrics/KeyspaceMetrics.java | 10 + .../cassandra/metrics/TableMetrics.java | 13 + .../org/apache/cassandra/net/Message.java | 6 + .../org/apache/cassandra/net/MessageFlag.java | 5 +- .../cassandra/net/MessagingService.java | 4 +- .../cassandra/net/ResponseVerbHandler.java | 6 +- src/java/org/apache/cassandra/net/Verb.java | 79 +- .../cassandra/repair/AbstractRepairJob.java | 66 ++ .../cassandra/repair/AbstractRepairTask.java | 7 +- .../cassandra/repair/AccordRepairJob.java | 173 ++++ ...RepairJob.java => CassandraRepairJob.java} | 49 +- .../repair/IncrementalRepairTask.java | 2 +- .../cassandra/repair/NormalRepairTask.java | 5 +- .../cassandra/repair/PreviewRepairTask.java | 6 +- .../cassandra/repair/RepairCoordinator.java | 4 +- .../repair/RepairMessageVerbHandler.java | 14 +- .../apache/cassandra/repair/RepairResult.java | 6 +- .../cassandra/repair/RepairSession.java | 32 +- .../repair/messages/RepairMessage.java | 3 +- .../repair/messages/RepairOption.java | 52 +- .../repair/messages/SyncResponse.java | 5 +- .../schema/SystemDistributedKeyspace.java | 4 +- .../org/apache/cassandra/schema/TableId.java | 47 + .../cassandra/schema/TableMetadata.java | 9 +- .../service/ActiveRepairService.java | 8 +- .../apache/cassandra/service/CASRequest.java | 10 +- .../cassandra/service/StorageProxy.java | 266 +++-- .../cassandra/service/StorageService.java | 87 +- .../service/StorageServiceMBean.java | 19 + .../service/accord/AccordCachingState.java | 24 +- .../service/accord/AccordCommandStore.java | 8 +- .../accord/AccordFetchCoordinator.java | 6 +- .../service/accord/AccordJournal.java | 147 ++- .../service/accord/AccordKeyspace.java | 4 +- .../service/accord/AccordMessageSink.java | 162 +++- .../service/accord/AccordObjectSizes.java | 10 +- .../service/accord/AccordSafeCommand.java | 5 +- .../accord/AccordSafeCommandStore.java | 29 +- .../service/accord/AccordSerializers.java | 25 + .../service/accord/AccordService.java | 186 +++- .../service/accord/AccordTopologyUtils.java | 15 +- .../service/accord/IAccordService.java | 70 +- .../service/accord/api/AccordAgent.java | 20 +- .../service/accord/api/PartitionKey.java | 6 + .../accord/interop/AccordInteropApply.java | 269 ++++++ .../accord/interop/AccordInteropCommit.java | 73 ++ .../interop/AccordInteropExecution.java | 412 ++++++++ .../accord/interop/AccordInteropPersist.java | 167 ++++ .../accord/interop/AccordInteropRead.java | 209 ++++ .../interop/AccordInteropReadCallback.java | 88 ++ .../interop/AccordInteropReadRepair.java | 182 ++++ .../accord/serializers/ApplySerializers.java | 43 +- .../serializers/CheckStatusSerializers.java | 2 +- .../serializers/CommandSerializers.java | 4 +- .../accord/serializers/CommitSerializers.java | 53 +- .../InformHomeDurableSerializers.java | 1 - .../accord/serializers/KeySerializers.java | 2 +- .../serializers/ReadDataSerializers.java | 139 ++- .../serializers/SetDurableSerializers.java | 18 +- .../service/accord/txn/AccordUpdate.java | 126 +++ .../accord/txn/AccordUpdateParameters.java | 20 +- .../txn/RetryWithNewProtocolResult.java | 75 ++ .../service/accord/txn/TxnCondition.java | 5 +- .../cassandra/service/accord/txn/TxnData.java | 38 +- .../service/accord/txn/TxnDataName.java | 11 +- .../service/accord/txn/TxnNamedRead.java | 18 +- .../service/accord/txn/TxnQuery.java | 101 +- .../cassandra/service/accord/txn/TxnRead.java | 78 +- .../service/accord/txn/TxnResult.java | 88 ++ .../service/accord/txn/TxnUpdate.java | 54 +- .../service/accord/txn/TxnWrite.java | 38 +- .../accord/txn/UnrecoverableRepairUpdate.java | 206 ++++ .../migration/ConsensusKeyMigrationState.java | 365 +++++++ .../migration/ConsensusRequestRouter.java | 237 +++++ .../ConsensusTableMigrationState.java | 909 ++++++++++++++++++ .../service/paxos/AbstractPaxosRepair.java | 26 +- .../apache/cassandra/service/paxos/Paxos.java | 158 +-- .../service/paxos/PaxosCommitAndPrepare.java | 20 +- .../cassandra/service/paxos/PaxosPrepare.java | 106 +- .../cassandra/service/paxos/PaxosPropose.java | 155 +-- .../cassandra/service/paxos/PaxosRepair.java | 35 +- .../service/paxos/PaxosRequestCallback.java | 28 + .../cassandra/service/paxos/PaxosState.java | 69 +- .../cleanup/PaxosStartPrepareCleanup.java | 2 + .../service/reads/AbstractReadExecutor.java | 47 +- .../cassandra/service/reads/DataResolver.java | 18 +- .../service/reads/DigestResolver.java | 8 +- .../service/reads/ReadCoordinator.java | 78 ++ .../reads/ReplicaFilteringProtection.java | 9 +- .../service/reads/ResponseResolver.java | 4 +- .../reads/ShortReadPartitionsProtection.java | 23 +- .../service/reads/ShortReadProtection.java | 3 +- .../reads/range/RangeCommandIterator.java | 5 +- .../range/ScanAllRangesCommandIterator.java | 3 +- .../reads/repair/AbstractReadRepair.java | 16 +- .../reads/repair/BlockingPartitionRepair.java | 47 +- .../reads/repair/BlockingReadRepair.java | 170 +++- .../service/reads/repair/NoopReadRepair.java | 8 + .../reads/repair/ReadOnlyReadRepair.java | 12 +- .../service/reads/repair/ReadRepair.java | 18 +- .../reads/repair/ReadRepairStrategy.java | 9 +- .../cassandra/streaming/OutgoingStream.java | 4 + .../cassandra/streaming/SessionSummary.java | 11 +- .../streaming/StreamDeserializingTask.java | 3 +- .../cassandra/streaming/StreamOperation.java | 26 +- .../streaming/StreamReceiveTask.java | 18 +- .../cassandra/streaming/StreamSession.java | 23 +- .../cassandra/streaming/StreamSummary.java | 33 +- .../cassandra/streaming/StreamTask.java | 8 +- .../streaming/StreamTransferTask.java | 14 + .../streaming/TableStreamManager.java | 5 +- .../StreamSummaryCompositeData.java | 10 +- .../streaming/messages/CompleteMessage.java | 5 +- .../messages/IncomingStreamMessage.java | 8 +- .../streaming/messages/KeepAliveMessage.java | 5 +- .../messages/OutgoingStreamMessage.java | 5 +- .../streaming/messages/PrepareAckMessage.java | 5 +- .../messages/PrepareSynAckMessage.java | 7 +- .../streaming/messages/PrepareSynMessage.java | 9 +- .../streaming/messages/ReceivedMessage.java | 7 +- .../messages/SessionFailedMessage.java | 5 +- .../streaming/messages/StreamInitMessage.java | 9 +- .../streaming/messages/StreamMessage.java | 9 +- .../apache/cassandra/tcm/ClusterMetadata.java | 59 +- src/java/org/apache/cassandra/tcm/Epoch.java | 5 + .../apache/cassandra/tcm/MetadataKeys.java | 4 +- .../tcm/StubClusterMetadataService.java | 2 + .../apache/cassandra/tcm/Transformation.java | 25 +- .../tcm/compatibility/GossipHelper.java | 5 +- .../cassandra/tcm/ownership/TokenMap.java | 13 +- .../tcm/transformations/AlterSchema.java | 30 +- ...ginConsensusMigrationForTableAndRange.java | 134 +++ ...ishConsensusMigrationForTableAndRange.java | 162 ++++ .../SetConsensusMigrationTargetProtocol.java | 131 +++ .../org/apache/cassandra/tools/NodeProbe.java | 38 +- .../org/apache/cassandra/tools/NodeTool.java | 47 +- .../apache/cassandra/tools/RepairRunner.java | 16 +- .../nodetool/ConsensusMigrationAdmin.java | 146 +++ .../cassandra/tools/nodetool/Repair.java | 9 +- .../utils/AbstractBiMultiValMap.java | 134 +++ .../apache/cassandra/utils/BiMultiValMap.java | 103 +- .../utils/CollectionSerializers.java | 192 +++- .../apache/cassandra/utils/PojoToString.java | 182 ++++ .../cassandra/utils/SortedBiMultiValMap.java | 27 +- .../org/apache/cassandra/utils/TimeUUID.java | 5 + .../5.0/service.SyncComplete.bin | Bin 256 -> 258 bytes .../5.0/service.ValidationComplete.bin | Bin 597 -> 597 bytes .../distributed/impl/AbstractCluster.java | 2 +- .../test/OptimiseStreamsRepairTest.java | 12 +- .../distributed/test/ReadRepairTest.java | 34 +- .../distributed/test/ReadSpeculationTest.java | 4 +- .../test/SSTableIdGenerationTest.java | 9 +- .../test/ShortReadProtectionTest.java | 37 +- .../test/accord/AccordCQLTest.java | 227 ++++- .../test/accord/AccordFeatureFlagTest.java | 5 +- .../test/accord/AccordIntegrationTest.java | 22 +- .../test/accord/AccordInteropReadTest.java | 94 ++ .../accord/AccordInteroperabilityTest.java | 66 ++ .../test/accord/AccordMetricsTest.java | 2 +- .../test/accord/AccordMigrationTest.java | 655 +++++++++++++ .../test/accord/AccordTestBase.java | 197 +++- .../test/accord/NewSchemaTest.java | 10 + ...drailCollectionSizeOnSSTableWriteTest.java | 44 +- .../test/log/ClusterMetadataTestHelper.java | 3 + ...ur3ReplicationAwareTokenAllocatorTest.java | 4 +- .../NoReplicationTokenAllocatorTest.java | 10 +- ...domReplicationAwareTokenAllocatorTest.java | 4 +- .../microbench/ZeroCopyStreamingBench.java | 7 +- .../AtomicBTreePartitionUpdateBench.java | 4 +- .../simulator/ClusterSimulation.java | 34 +- .../cassandra/simulator/SimulationRunner.java | 32 +- .../simulator/cluster/ClusterActions.java | 29 +- .../simulator/cluster/KeyspaceActions.java | 72 +- .../cluster/OnClusterMigrateConsensus.java | 87 ++ .../OnClusterMigrateConsensusOneRange.java | 50 + .../simulator/cluster/OnInstanceRepair.java | 2 +- .../OnInstanceStartConsensusMigration.java | 53 + .../cassandra/simulator/debug/Reconcile.java | 2 +- .../simulator/debug/SelfReconcile.java | 1 - ...bstractPairOfSequencesPaxosSimulation.java | 8 +- .../paxos/PairOfSequencesPaxosSimulation.java | 4 + .../paxos/PaxosClusterSimulation.java | 7 +- .../paxos/PaxosSimulationRunner.java | 17 +- .../test/AccordJournalSimulationTest.java | 2 +- .../test/ShortPaxosSimulationTest.java | 14 + .../apache/cassandra/CassandraTestBase.java | 261 +++++ test/unit/org/apache/cassandra/Util.java | 29 +- .../auth/CassandraAuthorizerTest.java | 1 + .../batchlog/BatchlogManagerTest.java | 21 +- .../config/DatabaseDescriptorRefTest.java | 3 +- .../cql3/statements/TxnDataNameTest.java | 4 +- .../validation/operations/CQLVectorTest.java | 2 +- .../cassandra/db/CleanupTransientTest.java | 12 +- .../ReadCommandVerbHandlerOutOfRangeTest.java | 2 + .../db/ReadCommandVerbHandlerTest.java | 1 + .../apache/cassandra/db/ReadResponseTest.java | 1 + .../CompactionStrategyManagerTest.java | 23 +- .../db/compaction/PartialCompactionsTest.java | 4 +- .../cassandra/db/filter/ColumnFilterTest.java | 2 +- ...assandraEntireSSTableStreamWriterTest.java | 7 +- .../streaming/CassandraStreamManagerTest.java | 8 +- ...StreamConcurrentComponentMutationTest.java | 7 +- .../cassandra/db/view/ViewUtilsTest.java | 20 +- .../db/virtual/StreamingVirtualTableTest.java | 13 +- .../cassandra/dht/BootStrapperTest.java | 16 +- .../cassandra/dht/KeyCollisionTest.java | 28 +- .../cassandra/dht/LengthPartitioner.java | 2 + .../org/apache/cassandra/dht/RangeTest.java | 167 +++- .../apache/cassandra/dht/SplitterTest.java | 24 +- .../cassandra/dht/StreamStateStoreTest.java | 8 +- .../tokenallocator/TokenAllocationTest.java | 25 +- .../io/sstable/CQLSSTableWriterTest.java | 93 +- .../cassandra/io/sstable/ScrubTest.java | 2 +- .../format/bti/PartitionIndexTest.java | 2 +- .../indexsummary/IndexSummaryTest.java | 2 +- .../AssureSufficientLiveNodesTest.java | 23 +- .../cassandra/locator/MetaStrategyTest.java | 2 + .../locator/NetworkTopologyStrategyTest.java | 155 +-- .../locator/PropertyFileSnitchTest.java | 15 +- .../cassandra/locator/SimpleStrategyTest.java | 63 +- .../org/apache/cassandra/net/MessageTest.java | 4 +- .../cassandra/repair/RepairJobTest.java | 81 +- .../cassandra/repair/RepairSessionTest.java | 5 +- .../RepairMessageSerializationsTest.java | 25 +- .../cassandra/schema/ValidationTest.java | 7 +- .../service/BootstrapTransientTest.java | 7 +- .../apache/cassandra/service/RemoveTest.java | 26 +- .../cassandra/service/SerializationsTest.java | 21 +- .../service/StorageServiceServerTest.java | 88 +- .../service/accord/AccordReadRepairTest.java | 117 +++ .../service/accord/AccordTestUtils.java | 4 +- ...nUpdateTest.java => AccordUpdateTest.java} | 10 +- .../paxos/AbstractPaxosRepairTest.java | 2 +- .../paxos/cleanup/PaxosTableRepairsTest.java | 2 +- .../uncommitted/PaxosBallotTrackerTest.java | 19 +- .../uncommitted/PaxosUncommittedTests.java | 17 +- ...axosUncommittedTrackerIntegrationTest.java | 11 +- .../reads/AbstractReadResponseTest.java | 12 +- .../service/reads/DataResolverTest.java | 43 +- .../service/reads/DigestResolverTest.java | 12 +- .../service/reads/ReadExecutorTest.java | 12 +- .../reads/repair/AbstractReadRepairTest.java | 7 +- .../reads/repair/BlockingReadRepairTest.java | 5 +- .../DiagEventsBlockingReadRepairTest.java | 5 +- .../reads/repair/ReadOnlyReadRepairTest.java | 3 +- .../service/reads/repair/ReadRepairTest.java | 3 +- .../repair/RepairedDataVerifierTest.java | 1 + .../reads/repair/TestableReadRepair.java | 9 +- .../cassandra/streaming/SessionInfoTest.java | 6 +- .../cassandra/streaming/StreamReaderTest.java | 7 +- .../async/StreamingInboundHandlerTest.java | 6 +- .../ClusterMetadataTransformationTest.java | 3 + .../tools/nodetool/NetStatsTest.java | 3 +- .../cassandra/utils/BloomFilterTest.java | 2 +- .../cassandra/utils/SerializationsTest.java | 2 +- 307 files changed, 11621 insertions(+), 1933 deletions(-) create mode 100644 src/java/org/apache/cassandra/repair/AbstractRepairJob.java create mode 100644 src/java/org/apache/cassandra/repair/AccordRepairJob.java rename src/java/org/apache/cassandra/repair/{RepairJob.java => CassandraRepairJob.java} (95%) create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/RetryWithNewProtocolResult.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/TxnResult.java create mode 100644 src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java create mode 100644 src/java/org/apache/cassandra/service/reads/ReadCoordinator.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java create mode 100644 src/java/org/apache/cassandra/utils/AbstractBiMultiValMap.java create mode 100644 src/java/org/apache/cassandra/utils/PojoToString.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensus.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensusOneRange.java create mode 100644 test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceStartConsensusMigration.java create mode 100644 test/unit/org/apache/cassandra/CassandraTestBase.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java rename test/unit/org/apache/cassandra/service/accord/txn/{TxnUpdateTest.java => AccordUpdateTest.java} (91%) diff --git a/.gitmodules b/.gitmodules index 616dacf610a7..6e0094316221 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = https://github.com/apache/cassandra-accord.git + url = https://github.com/apache/cassandra-accord branch = trunk diff --git a/build.xml b/build.xml index 563ac70ea238..7544c664c1aa 100644 --- a/build.xml +++ b/build.xml @@ -328,6 +328,7 @@ -XX:-CMSClassUnloadingEnabled -Dio.netty.tryReflectionSetAccessible=true + -XX:MaxMetaspaceSize=2G @@ -1155,6 +1156,7 @@ + @@ -1174,7 +1176,7 @@ - + @@ -1353,7 +1355,7 @@ - @@ -1664,12 +1666,14 @@ + + testtag="@{testtag}" showoutput="@{showoutput}" + maxmemory="@{maxmemory}"> @@ -1804,7 +1808,7 @@ - + diff --git a/ide/idea/workspace.xml b/ide/idea/workspace.xml index 1260f8613e82..08a8a73ae49c 100644 --- a/ide/idea/workspace.xml +++ b/ide/idea/workspace.xml @@ -213,7 +213,6 @@ -XX:CICompilerCount=1 -XX:HeapDumpPath=build/test -XX:MaxMetaspaceSize=2G - -Xmx4G -XX:ReservedCodeCacheSize=256M -XX:Tier4CompileThreshold=1000 -ea" /> diff --git a/modules/accord b/modules/accord index 3056d13bc8c4..6c6872270e16 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3056d13bc8c45a22ec794e0979d02f469cc4e209 +Subproject commit 6c6872270e16d2e777f1fa2c510b8f15396be3f3 diff --git a/src/java/org/apache/cassandra/concurrent/Stage.java b/src/java/org/apache/cassandra/concurrent/Stage.java index 808dc34b6844..135b5d078eac 100644 --- a/src/java/org/apache/cassandra/concurrent/Stage.java +++ b/src/java/org/apache/cassandra/concurrent/Stage.java @@ -47,6 +47,7 @@ public enum Stage MUTATION (true, "MutationStage", "request", DatabaseDescriptor::getConcurrentWriters, DatabaseDescriptor::setConcurrentWriters, Stage::multiThreadedLowSignalStage), COUNTER_MUTATION (true, "CounterMutationStage", "request", DatabaseDescriptor::getConcurrentCounterWriters, DatabaseDescriptor::setConcurrentCounterWriters, Stage::multiThreadedLowSignalStage), VIEW_MUTATION (true, "ViewMutationStage", "request", DatabaseDescriptor::getConcurrentViewWriters, DatabaseDescriptor::setConcurrentViewWriters, Stage::multiThreadedLowSignalStage), + ACCORD_MIGRATION (false, "AccordMigrationReadStage", "request", DatabaseDescriptor::getConcurrentAccordOps, DatabaseDescriptor::setConcurrentAccordOps, Stage::multiThreadedLowSignalStage), GOSSIP (true, "GossipStage", "internal", () -> 1, null, Stage::singleThreadedStage), REQUEST_RESPONSE (false, "RequestResponseStage", "request", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedLowSignalStage), ANTI_ENTROPY (false, "AntiEntropyStage", "internal", () -> 1, null, Stage::singleThreadedStage), diff --git a/src/java/org/apache/cassandra/concurrent/SyncFutureTask.java b/src/java/org/apache/cassandra/concurrent/SyncFutureTask.java index 422da99fb806..8176913de7e1 100644 --- a/src/java/org/apache/cassandra/concurrent/SyncFutureTask.java +++ b/src/java/org/apache/cassandra/concurrent/SyncFutureTask.java @@ -71,7 +71,11 @@ public void run() catch (Throwable t) { tryFailure(t); - ExecutionFailure.handle(t); + // A lot of exceptions are expected and will be handled by Cassandra + // by consuming the result of the future task so only treat Error + // as uncaught + if (t instanceof Error) + ExecutionFailure.handle(t); } } diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 51d270c19217..c79e7e27469e 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -603,6 +603,7 @@ public enum CassandraRelevantProperties TEST_ORG_CAFFINITAS_OHC_SEGMENTCOUNT("org.caffinitas.ohc.segmentCount"), TEST_PRESERVE_THREAD_CREATION_STACKTRACE("cassandra.test.preserve_thread_creation_stacktrace", "false"), TEST_RANDOM_SEED("cassandra.test.random.seed"), + TEST_RANGE_EXPENSIVE_CHECKS("cassandra.test.range_expensive_checks"), TEST_READ_ITERATION_DELAY_MS("cassandra.test.read_iteration_delay_ms", "0"), TEST_REUSE_PREPARED("cassandra.test.reuse_prepared", "true"), TEST_ROW_CACHE_SIZE("cassandra.test.row_cache_size"), diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index d23abdccff46..2dd95248feaa 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -44,6 +44,7 @@ import org.apache.cassandra.io.sstable.format.big.BigFormat; import org.apache.cassandra.service.StartupChecks.StartupCheckType; import org.apache.cassandra.utils.StorageCompatibilityMode; +import org.apache.cassandra.service.accord.IAccordService; import static org.apache.cassandra.config.CassandraRelevantProperties.AUTOCOMPACTION_ON_STARTUP_ENABLED; import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_AVAILABLE_PROCESSORS; @@ -191,6 +192,7 @@ public static Set splitCommaDelimited(String src) public int concurrent_reads = 32; public int concurrent_writes = 32; + public int concurrent_accord_operations = 32; public int concurrent_counter_writes = 32; public int concurrent_materialized_view_writes = 32; public OptionaldPositiveInt available_processors = new OptionaldPositiveInt(CASSANDRA_AVAILABLE_PROCESSORS.getInt(OptionaldPositiveInt.UNDEFINED_VALUE)); @@ -500,6 +502,8 @@ public static class SSTableConfig public DataStorageSpec.LongMebibytesBound paxos_cache_size = null; + public DataStorageSpec.LongMebibytesBound consensus_migration_cache_size = null; + @Replaces(oldName = "cache_load_timeout_seconds", converter = Converters.NEGATIVE_SECONDS_DURATION, deprecated = true) public DurationSpec.IntSecondsBound cache_load_timeout = new DurationSpec.IntSecondsBound("30s"); @@ -1163,7 +1167,23 @@ public enum PaxosOnLinearizabilityViolation public volatile boolean client_request_size_metrics_enabled = true; - public LegacyPaxosStrategy legacy_paxos_strategy = LegacyPaxosStrategy.migration; + public LWTStrategy lwt_strategy = LWTStrategy.migration; + public NonSerialWriteStrategy non_serial_write_strategy = NonSerialWriteStrategy.normal; + + /** + * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up + */ + public int accord_barrier_retry_attempts = 5; + + /** + * When a barrier transaction fails how long the initial backoff should be before being increased + * as part of exponential backoff on each attempt + */ + public DurationSpec.IntMillisecondsBound accord_barrier_retry_inital_backoff_millis = new DurationSpec.IntMillisecondsBound("1s"); + + public DurationSpec.IntMillisecondsBound accord_barrier_max_backoff = new DurationSpec.IntMillisecondsBound("10m"); + + public DurationSpec.IntMillisecondsBound accord_range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); public volatile int max_top_size_partition_count = 10; public volatile int max_top_tombstone_partition_count = 10; @@ -1387,7 +1407,7 @@ public enum TombstonesMetricGranularity * and serial read operations. Transaction statements * will always run on Accord. Legacy in this context includes PaxosV2. */ - public enum LegacyPaxosStrategy + public enum LWTStrategy { /** * Allow both Accord and PaxosV1/V2 to run on the same cluster @@ -1405,6 +1425,95 @@ public enum LegacyPaxosStrategy accord } + /* + * Configure how non-serial writes should be executed. For Accord transactions to function correctly + * when mixed with non-SERIAL writes it's necessary for the writes to occur through Accord. + * + * Accord will also use this configuration to determine what consistency level to perform its reads + * at since it will need to be able to read data written at non-SERIAL consistency levels. + * + * BlockingReadRepair will also use this configuration to determine how BRR mutations are applied. For migration + * and accord the BRR mutations will be applied as Accord transactions so that BRR doesn't expose Accord to + * uncommitted Accord data that is being RRed. This can occur when Accord has applied a transaction at some, but not + * all replica since Accord defaults to asynchronous commit. + * + * By routing repairs through Accord it is guaranteed that the Accord derived contents of the repair have already been applied at any + * replica where Accord applies the transaction. This also prevents BRR from breaking atomicity of Accord writes. + * + * If they are not written through Accord then reads through Accord will be required to occur at + * consistency level compatible with the non-serial writes preventing single replica reads from being performed + * by Accord. It will also require Accord to perform read repair of non-serial writes. + * + * Even then there is the potential for Accord to inconsistently execute transactions at different replicas + * because different coordinators for an Accord transaction may encounter different non-SERIAL write state and + * race to commit different outcomes for the transaction. + * + * This is different from Paxos because Paxos performs consensus on the actual values to be applied so recovery + * coordinators will always produce a consistent state when applying a transaction. Accord performs consensus on + * the execution order of transaction and different coordinators witnessing different states not managed by Accord + * can produce multiple outcomes for a transaction. + * + * // TODO (maybe): To safely migrate you would have to route all writes through Accord with the current implementation + * // We could do it by range instead in the migration version, but then we need to know when all in flight writes + * // are done before marking a range as migrated. Would waiting out the timeout be enough (timeout bugs!)? + */ + public enum NonSerialWriteStrategy + { + /* + * Execute writes through Cassandra via StorageProxy's normal write path. This can lead Accord to compute + * multiple outcomes for a transaction that depends on data written by non-SERIAL writes. + */ + normal(false, false, false), + /* + * Allow mixing of non-SERIAL writes and Accord, but still force BRR through Accord + */ + mixed(false, false, true), + /* + * Execute writes through Accord skipping StorageProxy's normal write path, but commit + * writes at the provided consistency level so they can be read via non-SERIAL consistency levels. + */ + migration(false, true, true), + /* + * Execute writes through Accord skipping StorageProxy's normal write path. Ignores the provided consistency level + * which makes Accord commit writes at ANY similar to Paxos with commit consistency level ANY. + */ + accord(true, true, true); + + public final boolean ignoresSuppliedConsistencyLevel; + public final boolean writesThroughAccord; + + public final boolean blockingReadRepairThroughAccord; + + NonSerialWriteStrategy(boolean ignoresSuppliedConsistencyLevel, boolean writesThroughAccord, boolean blockingReadRepairThroughAccord) + { + this.ignoresSuppliedConsistencyLevel = ignoresSuppliedConsistencyLevel; + this.writesThroughAccord = writesThroughAccord; + this.blockingReadRepairThroughAccord = blockingReadRepairThroughAccord; + } + + public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) + { + if (ignoresSuppliedConsistencyLevel) + return null; + + if (!IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for write/commit, supported are ANY, ONE, QUORUM, and ALL"); + + return consistencyLevel; + } + + public ConsistencyLevel readCLForStrategy(ConsistencyLevel consistencyLevel) + { + if (ignoresSuppliedConsistencyLevel) + return null; + + if (!IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for read, supported are ONE, QUORUM, and SERIAL"); + + return consistencyLevel; + } + } + private static final Set SENSITIVE_KEYS = new HashSet() {{ add("client_encryption_options"); add("server_encryption_options"); diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 8912803564e4..757407ad7bac 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -78,6 +78,8 @@ import org.apache.cassandra.auth.IRoleManager; import org.apache.cassandra.config.Config.CommitLogSync; import org.apache.cassandra.config.Config.DiskAccessMode; +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; import org.apache.cassandra.config.Config.PaxosOnLinearizabilityViolation; import org.apache.cassandra.config.Config.PaxosStatePurging; import org.apache.cassandra.db.ConsistencyLevel; @@ -167,7 +169,7 @@ public class DatabaseDescriptor { public static final String NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE = - "Cannot use legacy_paxos_strategy \"accord\" while Accord transactions are disabled."; + "Cannot use lwt_strategy \"accord\" while Accord transactions are disabled."; static { @@ -226,6 +228,7 @@ public class DatabaseDescriptor private static long keyCacheSizeInMiB; private static long paxosCacheSizeInMiB; + private static long consensusMigrationCacheSizeInMiB; private static long counterCacheSizeInMiB; private static long indexSummaryCapacityInMiB; @@ -651,6 +654,9 @@ else if (conf.disk_access_mode == DiskAccessMode.direct) if (conf.concurrent_counter_writes < 2) throw new ConfigurationException("concurrent_counter_writes must be at least 2, but was " + conf.concurrent_counter_writes, false); + if (conf.concurrent_accord_operations < 1) + throw new ConfigurationException("concurrent_accord_operations must be at least 1, but was " + conf.concurrent_accord_operations, false); + if (conf.networking_cache_size == null) conf.networking_cache_size = new DataStorageSpec.IntMebibytesBound(Math.min(128, (int) (Runtime.getRuntime().maxMemory() / (16 * 1048576)))); @@ -718,7 +724,6 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m conf.commitlog_directory = storagedirFor("commitlog"); } - if (conf.accord.journal_directory == null) initializeCommitLogDiskAccessMode(); if (commitLogWriteDiskAccessMode != conf.commitlog_disk_access_mode) logger.info("commitlog_disk_access_mode resolved to: {}", commitLogWriteDiskAccessMode); @@ -959,6 +964,22 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m + conf.paxos_cache_size + "', supported values are >= 0.", false); } + try + { + // if consensusMigrationCacheSizeInMiB option was set to "auto" then size of the cache should be "min(1% of Heap (in MB), 50MB) + consensusMigrationCacheSizeInMiB = (conf.consensus_migration_cache_size == null) + ? Math.min(Math.max(1, (int) (Runtime.getRuntime().totalMemory() * 0.01 / 1024 / 1024)), 50) + : conf.consensus_migration_cache_size.toMebibytes(); + + if (consensusMigrationCacheSizeInMiB < 0) + throw new NumberFormatException(); // to escape duplicating error message + } + catch (NumberFormatException e) + { + throw new ConfigurationException("consensus_migration_cache_size option was set incorrectly to '" + + conf.consensus_migration_cache_size + "', supported values are >= 0.", false); + } + // we need this assignment for the Settings virtual table - CASSANDRA-17735 conf.counter_cache_size = new DataStorageSpec.LongMebibytesBound(counterCacheSizeInMiB); @@ -1146,8 +1167,13 @@ else if (conf.max_value_size.toMebibytes() >= 2048) if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); - if (conf.legacy_paxos_strategy == Config.LegacyPaxosStrategy.accord && !conf.accord.enabled) - throw new ConfigurationException(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); + if (conf.lwt_strategy == LWTStrategy.accord) + { + if (!conf.accord.enabled) + throw new ConfigurationException(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); + if (conf.non_serial_write_strategy == Config.NonSerialWriteStrategy.normal) + throw new ConfigurationException("If Accord is used for LWTs then regular writes needs to be routed through Accord for interoperability by setting non_serial_write_strategy to \"accord\" or \"migration\""); + } } @VisibleForTesting @@ -2697,6 +2723,20 @@ public static void setConcurrentViewWriters(int concurrent_materialized_view_wri conf.concurrent_materialized_view_writes = concurrent_materialized_view_writes; } + public static int getConcurrentAccordOps() + { + return conf.concurrent_accord_operations; + } + + public static void setConcurrentAccordOps(int concurrent_operations) + { + if (concurrent_operations < 0) + { + throw new IllegalArgumentException("Concurrent accord operations must be non-negative"); + } + conf.concurrent_accord_operations = concurrent_operations; + } + public static int getFlushWriters() { return conf.memtable_flush_writers; @@ -3613,9 +3653,45 @@ public static boolean paxoTopologyRepairStrictEachQuorum() return conf.paxos_topology_repair_strict_each_quorum; } - public static Config.LegacyPaxosStrategy getLegacyPaxosStrategy() + // TODO (desired): This configuration should come out of TrM to force the cluster to agree on it + public static LWTStrategy getLWTStrategy() + { + return conf.lwt_strategy; + } + + public static void setLWTStrategy(LWTStrategy lwtStrategy) + { + conf.lwt_strategy = lwtStrategy; + } + + public static Config.NonSerialWriteStrategy getNonSerialWriteStrategy() + { + return conf.non_serial_write_strategy; + } + + public static void setNonSerialWriteStrategy(NonSerialWriteStrategy nonSerialWriteStrategy) + { + conf.non_serial_write_strategy = nonSerialWriteStrategy; + } + + public static int getAccordBarrierRetryAttempts() { - return conf.legacy_paxos_strategy; + return conf.accord_barrier_retry_attempts; + } + + public static long getAccordBarrierRetryInitialBackoffMillis() + { + return conf.accord_barrier_retry_inital_backoff_millis.toMilliseconds(); + } + + public static long getAccordBarrierRetryMaxBackoffMillis() + { + return conf.accord_barrier_max_backoff.toMilliseconds(); + } + + public static long getAccordRangeBarrierTimeoutNanos() + { + return conf.accord_range_barrier_timeout.to(TimeUnit.NANOSECONDS); } public static void setNativeTransportMaxRequestDataInFlightPerIpInBytes(long maxRequestDataInFlightInBytes) @@ -4205,6 +4281,11 @@ public static long getPaxosCacheSizeInMiB() return paxosCacheSizeInMiB; } + public static long getConsensusMigrationCacheSizeInMiB() + { + return consensusMigrationCacheSizeInMiB; + } + public static long getCounterCacheSizeInMiB() { return counterCacheSizeInMiB; diff --git a/src/java/org/apache/cassandra/cql3/UpdateParameters.java b/src/java/org/apache/cassandra/cql3/UpdateParameters.java index 5ec6d44d31b5..8b0e5cdf7c74 100644 --- a/src/java/org/apache/cassandra/cql3/UpdateParameters.java +++ b/src/java/org/apache/cassandra/cql3/UpdateParameters.java @@ -20,14 +20,26 @@ import java.nio.ByteBuffer; import java.util.Map; -import org.apache.cassandra.db.guardrails.Guardrails; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ClusteringComparator; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionPurger; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.LivenessInfo; +import org.apache.cassandra.db.RangeTombstone; +import org.apache.cassandra.db.Slice; import org.apache.cassandra.db.context.CounterContext; +import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.db.partitions.Partition; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.BufferCell; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Rows; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.utils.TimeUUID; @@ -39,6 +51,7 @@ public class UpdateParameters public final TableMetadata metadata; public final ClientState clientState; public final QueryOptions options; + public final boolean constructingAccordBaseUpdate; private final long nowInSec; private final long timestamp; @@ -62,6 +75,18 @@ public UpdateParameters(TableMetadata metadata, long nowInSec, int ttl, Map prefetchedRows) throws InvalidRequestException + { + this(metadata, clientState, options, timestamp, nowInSec, ttl, prefetchedRows, false); + } + + public UpdateParameters(TableMetadata metadata, + ClientState clientState, + QueryOptions options, + long timestamp, + long nowInSec, + int ttl, + Map prefetchedRows, + boolean constructingAccordBaseUpdate) throws InvalidRequestException { this.metadata = metadata; this.clientState = clientState; @@ -79,6 +104,8 @@ public UpdateParameters(TableMetadata metadata, // it to avoid potential confusion. if (timestamp == Long.MIN_VALUE) throw new InvalidRequestException(String.format("Out of bound timestamp, must be in [%d, %d]", Long.MIN_VALUE + 1, Long.MAX_VALUE)); + + this.constructingAccordBaseUpdate = constructingAccordBaseUpdate; } public void newRow(Clustering clustering) throws InvalidRequestException diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java index 939d7df767bb..65c7f56662fb 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java @@ -343,7 +343,7 @@ public List getMutations(ClientState state, } QueryOptions statementOptions = options.forStatement(i); long timestamp = attrs.getTimestamp(batchTimestamp, statementOptions); - statement.addUpdates(collector, partitionKeys.get(i), state, statementOptions, local, timestamp, nowInSeconds, requestTime); + statement.addUpdates(collector, partitionKeys.get(i), state, statementOptions, local, timestamp, nowInSeconds, requestTime, false); } if (tablesWithZeroGcGs != null) diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java b/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java index 521cd2afa6e2..aabcecec72f8 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java @@ -223,7 +223,7 @@ public Mutation build() PartitionUpdate update = updateEntry.getValue().build(); updates.put(updateEntry.getKey(), update); } - return new Mutation(keyspaceName, key, updates.build(), createdAt); + return new Mutation(keyspaceName, key, updates.build(), createdAt, false); } public PartitionUpdate.Builder get(TableId tableId) diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index e511af319056..b2edb6cc3366 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -31,14 +31,18 @@ import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import accord.api.Update; import accord.primitives.Txn; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.Columns; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.SinglePartitionReadCommand; @@ -53,7 +57,6 @@ import org.apache.cassandra.db.partitions.Partition; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.Row; -import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.CASRequest; import org.apache.cassandra.service.ClientState; @@ -63,6 +66,7 @@ import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnReference; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.service.paxos.Ballot; @@ -70,13 +74,21 @@ import org.apache.cassandra.utils.TimeUUID; import static com.google.common.base.Preconditions.checkState; -import static org.apache.cassandra.service.accord.txn.TxnDataName.Kind.USER; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.RETRY_NEW_PROTOCOL; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.casResult; +import static org.apache.cassandra.service.accord.txn.TxnDataName.Kind.CAS_READ; +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; + /** * Processed CAS conditions and update on potentially multiple rows of the same partition. */ public class CQL3CasRequest implements CASRequest { + @SuppressWarnings("unused") + private static final Logger logger = LoggerFactory.getLogger(CQL3CasRequest.class); + public final TableMetadata metadata; public final DecoratedKey key; private final RegularAndStaticColumns conditionColumns; @@ -410,7 +422,7 @@ public String toCQL() public TxnCondition asTxnCondition() { - TxnDataName txnDataName = new TxnDataName(USER, clustering, TxnRead.SERIAL_READ_NAME); + TxnDataName txnDataName = new TxnDataName(CAS_READ, clustering, TxnRead.CAS_READ_NAME); TxnReference txnReference = new TxnReference(txnDataName, null); return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NULL); } @@ -436,7 +448,7 @@ public String toCQL() public TxnCondition asTxnCondition() { - TxnDataName txnDataName = new TxnDataName(USER, clustering, TxnRead.SERIAL_READ_NAME); + TxnDataName txnDataName = new TxnDataName(CAS_READ, clustering, TxnRead.CAS_READ_NAME); TxnReference txnReference = new TxnReference(txnDataName, null); return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NOT_NULL); } @@ -484,20 +496,26 @@ public String toString() } @Override - public Txn toAccordTxn(ClientState clientState, long nowInSecs) + public Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs) { SinglePartitionReadCommand readCommand = readCommand(nowInSecs); - Update update = createUpdate(clientState); - // In a CAS request only one key is supported and writes + Update update = createUpdate(clientState, commitConsistencyLevel); + // If the write strategy is sending all writes through Accord there is no need to use the supplied consistency + // level since Accord will manage reading safely + consistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().readCLForStrategy(consistencyLevel); + TxnRead read = TxnRead.createCasRead(readCommand, consistencyLevel); + // In a CAS requesting only one key is supported and writes // can't be dependent on any data that is read (only conditions) // so the only relevant keys are the read key - TxnRead read = TxnRead.createSerialRead(readCommand); return new Txn.InMemory(read.keys(), read, TxnQuery.CONDITION, update); } - private Update createUpdate(ClientState clientState) + private Update createUpdate(ClientState clientState, ConsistencyLevel commitConsistencyLevel) { - return new TxnUpdate(createWriteFragments(clientState), createCondition()); + // Potentially ignore commit consistency level if non-SERIAL write strategy is Accord + // since it is safe to match what non-SERIAL writes do + commitConsistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().commitCLForStrategy(commitConsistencyLevel); + return new TxnUpdate(createWriteFragments(clientState), createCondition(), commitConsistencyLevel); } private TxnCondition createCondition() @@ -528,13 +546,23 @@ private List createWriteFragments(ClientState state) TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); fragments.add(fragment); } + for (RangeDeletion rangeDeletion : rangeDeletions) + { + ModificationStatement modification = rangeDeletion.stmt; + QueryOptions options = rangeDeletion.options; + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); + fragments.add(fragment); + } return fragments; } @Override - public RowIterator toCasResult(TxnData txnData) + public ConsensusAttemptResult toCasResult(TxnResult txnResult) { - FilteredPartition partition = txnData.get(TxnRead.SERIAL_READ); - return partition != null ? partition.rowIterator() : null; + if (txnResult.kind() == retry_new_protocol) + return RETRY_NEW_PROTOCOL; + TxnData txnData = (TxnData)txnResult; + FilteredPartition partition = txnData.get(TxnRead.CAS_READ); + return casResult(partition != null ? partition.rowIterator(false) : null); } } diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index 51f51e640755..19556a766b27 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -57,19 +57,10 @@ import org.apache.cassandra.cql3.Validation; import org.apache.cassandra.cql3.VariableSpecifications; import org.apache.cassandra.cql3.WhereClause; -import org.apache.cassandra.cql3.constraints.ConstraintViolationException; -import org.apache.cassandra.db.guardrails.Guardrails; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.locator.ReplicaLayout; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.ColumnConditions; import org.apache.cassandra.cql3.conditions.Conditions; +import org.apache.cassandra.cql3.constraints.ConstraintViolationException; import org.apache.cassandra.cql3.functions.Function; import org.apache.cassandra.cql3.restrictions.StatementRestrictions; import org.apache.cassandra.cql3.selection.ResultSetBuilder; @@ -94,6 +85,7 @@ import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.db.marshal.BooleanType; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.Partition; @@ -102,11 +94,19 @@ import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.view.View; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.exceptions.RequestValidationException; import org.apache.cassandra.exceptions.UnauthorizedException; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaLayout; import org.apache.cassandra.metrics.ClientRequestSizeMetrics; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageProxy; @@ -641,7 +641,8 @@ private ResultMessage executeWithoutCondition(QueryState queryState, QueryOption false, options.getTimestamp(queryState), options.getNowInSeconds(queryState), - requestTime); + requestTime, + false); if (!mutations.isEmpty()) { StorageProxy.mutateWithTriggers(mutations, cl, false, requestTime); @@ -806,7 +807,7 @@ public ResultMessage executeInternalWithoutCondition(QueryState queryState, Quer { long timestamp = options.getTimestamp(queryState); long nowInSeconds = options.getNowInSeconds(queryState); - for (IMutation mutation : getMutations(queryState.getClientState(), options, true, timestamp, nowInSeconds, requestTime)) + for (IMutation mutation : getMutations(queryState.getClientState(), options, true, timestamp, nowInSeconds, requestTime, false)) mutation.apply(); return null; } @@ -834,7 +835,7 @@ static RowIterator casInternal(ClientState state, CQL3CasRequest request, long t } if (!request.appliesTo(current)) - return current.rowIterator(); + return current.rowIterator(false); PartitionUpdate updates = request.makeUpdates(current, state, ballot); updates = TriggerExecutor.instance.execute(updates); @@ -859,19 +860,22 @@ public List getMutations(ClientState state, boolean local, long timestamp, long nowInSeconds, - Dispatcher.RequestTime requestTime) + Dispatcher.RequestTime requestTime, + boolean constructingAccordBaseUpdate) { List keys = buildPartitionKeyNames(options, state); - if(keys.size() == 1) + + if (keys.size() == 1) { SingleTableSinglePartitionUpdatesCollector collector = new SingleTableSinglePartitionUpdatesCollector(metadata, updatedColumns); - addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime); + addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); return collector.toMutations(state); - } else + } + else { HashMultiset perPartitionKeyCounts = HashMultiset.create(keys); SingleTableUpdatesCollector collector = new SingleTableUpdatesCollector(metadata, updatedColumns, perPartitionKeyCounts); - addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime); + addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); return collector.toMutations(state); } } @@ -879,7 +883,7 @@ public List getMutations(ClientState state, @VisibleForTesting public PartitionUpdate getTxnUpdate(ClientState state, QueryOptions options) { - List mutations = getMutations(state, options, false, 0, 0, new Dispatcher.RequestTime(0, 0)); + List mutations = getMutations(state, options, false, 0, 0, new Dispatcher.RequestTime(0, 0), true); if (mutations.size() != 1) throw new IllegalArgumentException("When running withing a transaction, modification statements may only mutate a single partition"); return Iterables.getOnlyElement(mutations.get(0).getPartitionUpdates()); @@ -942,7 +946,8 @@ final void addUpdates(UpdatesCollector collector, boolean local, long timestamp, long nowInSeconds, - Dispatcher.RequestTime requestTime) + Dispatcher.RequestTime requestTime, + boolean constructingAccordBaseUpdate) { if (hasSlices()) { @@ -960,7 +965,8 @@ final void addUpdates(UpdatesCollector collector, local, timestamp, nowInSeconds, - requestTime); + requestTime, + constructingAccordBaseUpdate); for (ByteBuffer key : keys) { Validation.validateKey(metadata(), key); @@ -984,7 +990,7 @@ final void addUpdates(UpdatesCollector collector, if (restrictions.hasClusteringColumnsRestrictions() && clusterings.isEmpty()) return; - UpdateParameters params = makeUpdateParameters(keys, clusterings, state, options, local, timestamp, nowInSeconds, requestTime); + UpdateParameters params = makeUpdateParameters(keys, clusterings, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); for (ByteBuffer key : keys) { @@ -1042,7 +1048,8 @@ private UpdateParameters makeUpdateParameters(Collection keys, boolean local, long timestamp, long nowInSeconds, - Dispatcher.RequestTime requestTime) + Dispatcher.RequestTime requestTime, + boolean constructingAccordBaseUpdate) { if (clusterings.contains(Clustering.STATIC_CLUSTERING)) return makeUpdateParameters(keys, @@ -1053,7 +1060,8 @@ private UpdateParameters makeUpdateParameters(Collection keys, local, timestamp, nowInSeconds, - requestTime); + requestTime, + constructingAccordBaseUpdate); return makeUpdateParameters(keys, new ClusteringIndexNamesFilter(clusterings, false), @@ -1063,7 +1071,8 @@ private UpdateParameters makeUpdateParameters(Collection keys, local, timestamp, nowInSeconds, - requestTime); + requestTime, + constructingAccordBaseUpdate); } private UpdateParameters makeUpdateParameters(Collection keys, @@ -1074,7 +1083,8 @@ private UpdateParameters makeUpdateParameters(Collection keys, boolean local, long timestamp, long nowInSeconds, - Dispatcher.RequestTime requestTime) + Dispatcher.RequestTime requestTime, + boolean constructingAccordBaseUpdate) { // Some lists operation requires reading Map lists = @@ -1092,7 +1102,8 @@ private UpdateParameters makeUpdateParameters(Collection keys, getTimestamp(timestamp, options), nowInSeconds, getTimeToLive(options), - lists); + lists, + constructingAccordBaseUpdate); } public static abstract class Parsed extends QualifiedStatement diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index e56fd1ec1d22..cff94f952648 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -44,7 +44,6 @@ import accord.api.Key; import accord.primitives.Keys; import accord.primitives.Txn; -import accord.utils.Invariants; import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; import org.apache.cassandra.config.DatabaseDescriptor; @@ -63,12 +62,11 @@ import org.apache.cassandra.db.SinglePartitionReadQuery; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.FilteredPartition; -import org.apache.cassandra.exceptions.ExceptionCode; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.api.AccordRoutableKey; +import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnDataName; @@ -76,18 +74,18 @@ import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnReference; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; +import static org.apache.cassandra.service.accord.txn.TxnRead.createTxnRead; +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; public class TransactionStatement implements CQLStatement.CompositeCQLStatement, CQLStatement.ReturningCQLStatement { @@ -302,9 +300,9 @@ List createWriteFragments(ClientState state, QueryOptions opt return fragments; } - TxnUpdate createUpdate(ClientState state, QueryOptions options, Map autoReads, Consumer keyConsumer) + AccordUpdate createUpdate(ClientState state, QueryOptions options, Map autoReads, Consumer keyConsumer) { - return new TxnUpdate(createWriteFragments(state, options, autoReads, keyConsumer), createCondition(options)); + return new TxnUpdate(createWriteFragments(state, options, autoReads, keyConsumer), createCondition(options), null); } Keys toKeys(SortedSet keySet) @@ -323,16 +321,16 @@ public Txn createTxn(ClientState state, QueryOptions options) Preconditions.checkState(conditions.isEmpty(), "No condition should exist without updates present"); List reads = createNamedReads(options, state, ImmutableMap.of(), keySet::add); Keys txnKeys = toKeys(keySet); - TxnRead read = new TxnRead(reads, txnKeys); + TxnRead read = createTxnRead(reads, txnKeys, null); return new Txn.InMemory(txnKeys, read, TxnQuery.ALL); } else { Map autoReads = new HashMap<>(); - TxnUpdate update = createUpdate(state, options, autoReads, keySet::add); + AccordUpdate update = createUpdate(state, options, autoReads, keySet::add); List reads = createNamedReads(options, state, autoReads, keySet::add); Keys txnKeys = toKeys(keySet); - TxnRead read = new TxnRead(reads, txnKeys); + TxnRead read = createTxnRead(reads, txnKeys, null); return new Txn.InMemory(txnKeys, read, TxnQuery.ALL, update); } } @@ -357,40 +355,6 @@ private static boolean isSelectingMultipleClusterings(SelectStatement select, @N return select.getLimit(options) != 1; } - private void maybeConvertTablesToAccord(Txn txn) - { - Set allKeyspaces = new HashSet<>(); - Set newKeyspaces = new HashSet<>(); - txn.keys().forEach(key -> { - String keyspace = ((AccordRoutableKey) key).keyspace(); - if (allKeyspaces.add(keyspace) && !AccordService.instance().isAccordManagedKeyspace(keyspace)) - newKeyspaces.add(keyspace); - }); - - if (newKeyspaces.isEmpty()) - return; - - for (String keyspace : newKeyspaces) - { - ClusterMetadataService.instance().commit(new AddAccordKeyspace(keyspace), - metadata -> null, - (code, message) -> { - Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, - "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); - return null; - }); - } - - // we need to avoid creating a txnId in an epoch when no one has any ranges - FBUtilities.waitOnFuture(AccordService.instance().epochReady(ClusterMetadata.current().epoch)); - - for (String keyspace : allKeyspaces) - { - if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) - throw new IllegalStateException(keyspace + " is not an accord managed keyspace"); - } - } - @Override public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher.RequestTime requestTime) { @@ -407,9 +371,12 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. Txn txn = createTxn(state.getClientState(), options); - maybeConvertTablesToAccord(txn); + AccordService.instance().maybeConvertKeyspacesToAccord(txn); - TxnData data = AccordService.instance().coordinate(txn, options.getConsistency()); + TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); + if (txnResult.kind() == retry_new_protocol) + throw new IllegalStateException("Transaction statement should never be required to switch consensus protocols"); + TxnData data = (TxnData)txnResult; if (returningSelect != null) { @@ -420,8 +387,9 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. if (selectQuery.queries.size() == 1) { FilteredPartition partition = data.get(TxnDataName.returning()); + boolean reversed = selectQuery.queries.get(0).isReversed(); if (partition != null) - returningSelect.select.processPartition(partition.rowIterator(), options, result, FBUtilities.nowInSeconds()); + returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, FBUtilities.nowInSeconds()); } else { @@ -429,8 +397,9 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. for (int i = 0; i < selectQuery.queries.size(); i++) { FilteredPartition partition = data.get(TxnDataName.returning(i)); + boolean reversed = selectQuery.queries.get(i).isReversed(); if (partition != null) - returningSelect.select.processPartition(partition.rowIterator(), options, result, nowInSec); + returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, nowInSec); } } return new ResultMessage.Rows(result.build()); diff --git a/src/java/org/apache/cassandra/cql3/terms/Lists.java b/src/java/org/apache/cassandra/cql3/terms/Lists.java index 153316226757..e82f0f5ac092 100644 --- a/src/java/org/apache/cassandra/cql3/terms/Lists.java +++ b/src/java/org/apache/cassandra/cql3/terms/Lists.java @@ -27,6 +27,10 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.cql3.AssignmentTestable; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.ColumnSpecification; @@ -34,21 +38,25 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.cql3.VariableSpecifications; +import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.guardrails.Guardrails; -import org.apache.cassandra.db.marshal.MultiElementType; -import org.apache.cassandra.schema.ColumnMetadata; -import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.db.*; -import org.apache.cassandra.db.rows.*; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MultiElementType; +import org.apache.cassandra.db.marshal.TimeUUIDType; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.TimeUUID; -import static org.apache.cassandra.cql3.terms.Constants.UNSET_VALUE; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest; +import static org.apache.cassandra.cql3.terms.Constants.UNSET_VALUE; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.TimeUUID.Generator.atUnixMillisAsBytes; @@ -57,6 +65,16 @@ */ public abstract class Lists { + @SuppressWarnings("unused") + private static final Logger logger = LoggerFactory.getLogger(Lists.class); + + /** + * Sentinel value indicating the cell path should be replaced by Accord with one based on the transaction executeAt + */ + private static final TimeUUID ACCORD_CELL_PATH_SENTINEL_UUID = TimeUUID.atUnixMicrosWithLsb(0, 0); + public static final CellPath ACCORD_DUMMY_CELL_PATH = CellPath.create(ACCORD_CELL_PATH_SENTINEL_UUID.toBytes()); + private static final long ACCORD_CELL_PATH_SENTINEL_MSB = ACCORD_CELL_PATH_SENTINEL_UUID.msb(); + private Lists() {} public static ColumnSpecification indexSpecOf(ColumnSpecification column) @@ -142,6 +160,33 @@ public static ListType getPreferredCompatibleType(List items, return type == null ? null : ListType.getInstance(type, false); } + /** + * Return a function that given a cell with an ACCORD_CELL_PATH_SENTINEL_MSB will + * return a new CellPath with a TimeUUID that increases monotonically every time it is called or + * the existing cell path if path does not contain ACCORD_CELL_PATH_SENTINEL_MSB. + * + * Only intended to work with list cell paths where list append needs a timestamp based on the executeAt + * of the Accord transaction appending the cell. + * @param timestampMicros executeAt timestamp to use as the MSB for generated cell paths + */ + public static com.google.common.base.Function accordListPathSupplier(long timestampMicros) + { + return new com.google.common.base.Function() + { + final long timeUuidMsb = TimeUUID.unixMicrosToMsb(timestampMicros); + long cellIndex = 0; + @Override + public CellPath apply(Cell cell) + { + CellPath path = cell.path(); + if (ACCORD_CELL_PATH_SENTINEL_MSB == path.get(0).getLong(0)) + return CellPath.create(ByteBuffer.wrap(TimeUUID.toBytes(timeUuidMsb, TimeUUIDType.signedBytesToNativeLong(cellIndex++)))); + else + return path; + } + }; + } + public static class Literal extends Term.Raw { private final List elements; @@ -406,11 +451,18 @@ static void doAppend(Term.Terminal value, ColumnMetadata column, UpdateParameter // during SSTable write. Guardrails.itemsPerCollection.guard(type.collectionSize(elements), column.name.toString(), false, params.clientState); + long cellIndex = 0; int dataSize = 0; for (ByteBuffer buffer : elements) { - ByteBuffer uuid = ByteBuffer.wrap(params.nextTimeUUIDAsBytes()); - Cell cell = params.addCell(column, CellPath.create(uuid), buffer); + ByteBuffer cellPath; + // Accord will need to replace this value later once it knows the executeAt timestamp + // so just put a TimeUUID with MSB sentinel for now + if (params.constructingAccordBaseUpdate) + cellPath = TimeUUID.atUnixMicrosWithLsb(0, cellIndex++).toBytes(); + else + cellPath = ByteBuffer.wrap(params.nextTimeUUIDAsBytes()); + Cell cell = params.addCell(column, CellPath.create(cellPath), buffer); dataSize += cell.dataSize(); } Guardrails.collectionListSize.guard(dataSize, column.name.toString(), false, params.clientState); diff --git a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java index fe3acdba06c3..76b765ae7073 100644 --- a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java +++ b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java @@ -85,7 +85,9 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message } } - if (!forToken.get().containsSelf()) + // Mutations may intentionally be sent against an older Epoch so out of range checking doesn't work + // and could cause data to not end up where it needs to be for future operations + if (!message.payload.allowsOutOfRangeMutations() && !forToken.get().containsSelf()) { StorageService.instance.incOutOfRangeOperationCount(); Keyspace.open(message.payload.getKeyspaceName()).metric.outOfRangeTokenWrites.inc(); @@ -93,7 +95,7 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message throw InvalidRoutingException.forWrite(respondTo, key.getToken(), metadata.epoch, message.payload); } - if (forToken.lastModified().isAfter(message.epoch())) + if (!message.payload.allowsOutOfRangeMutations() && forToken.lastModified().isAfter(message.epoch())) { TCMMetrics.instance.coordinatorBehindPlacements.mark(); throw new CoordinatorBehindException(String.format("Routing is correct, but coordinator needs to catch-up at least to epoch %s to maintain consistency. Current coordinator epoch is %s", diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index d47a651a0619..0fcdb192fd16 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -3339,4 +3339,9 @@ public TableMetrics getMetrics() { return metric; } + + public TableId getTableId() + { + return metadata().id; + } } diff --git a/src/java/org/apache/cassandra/db/IMutation.java b/src/java/org/apache/cassandra/db/IMutation.java index 1998e2c0353c..ba8d586deae3 100644 --- a/src/java/org/apache/cassandra/db/IMutation.java +++ b/src/java/org/apache/cassandra/db/IMutation.java @@ -70,4 +70,9 @@ static long dataSize(Collection mutations) } return size; } + + default boolean allowsOutOfRangeMutations() + { + return false; + } } diff --git a/src/java/org/apache/cassandra/db/Mutation.java b/src/java/org/apache/cassandra/db/Mutation.java index 0861bb64c41f..1f9d2c86be86 100644 --- a/src/java/org/apache/cassandra/db/Mutation.java +++ b/src/java/org/apache/cassandra/db/Mutation.java @@ -18,7 +18,13 @@ package org.apache.cassandra.db; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.function.Supplier; @@ -56,6 +62,7 @@ public class Mutation implements IMutation, Supplier { public static final MutationSerializer serializer = new MutationSerializer(); + public static final int ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG = 0x01; // todo this is redundant // when we remove it, also restore SerializationsTest.testMutationRead to not regenerate new Mutations each test @@ -84,23 +91,26 @@ public class Mutation implements IMutation, Supplier /** @see CassandraRelevantProperties#CACHEABLE_MUTATION_SIZE_LIMIT */ private static final long CACHEABLE_MUTATION_SIZE_LIMIT = CassandraRelevantProperties.CACHEABLE_MUTATION_SIZE_LIMIT.getLong(); + private boolean allowOutOfRangeMutations; + public Mutation(PartitionUpdate update) { this(update.metadata().keyspace, update.partitionKey(), ImmutableMap.of(update.metadata().id, update), approxTime.now(), update.metadata().params.cdc); } - public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos) + public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean allowOutOfRangeMutations) { - this(keyspaceName, key, modifications, approxCreatedAtNanos, cdcEnabled(modifications.values())); + this(keyspaceName, key, modifications, approxCreatedAtNanos, cdcEnabled(modifications.values()), allowOutOfRangeMutations); } - public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean cdcEnabled) + public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean cdcEnabled, boolean allowOutOfRangeMutations) { this.keyspaceName = keyspaceName; this.key = key; this.modifications = modifications; this.cdcEnabled = cdcEnabled; this.approxCreatedAtNanos = approxCreatedAtNanos; + this.allowOutOfRangeMutations = allowOutOfRangeMutations; } private static boolean cdcEnabled(Iterable modifications) @@ -125,7 +135,7 @@ public Mutation without(Set tableIds) } } - return new Mutation(keyspaceName, key, builder.build(), approxCreatedAtNanos); + return new Mutation(keyspaceName, key, builder.build(), approxCreatedAtNanos, allowOutOfRangeMutations); } public Mutation without(TableId tableId) @@ -201,18 +211,22 @@ public boolean isEmpty() * @throws IllegalArgumentException if not all the mutations are on the same * keyspace and key. */ - public static Mutation merge(List mutations) + public static Mutation merge(Collection mutations) { assert !mutations.isEmpty(); - if (mutations.size() == 1) - return mutations.get(0); + if (mutations.size() == ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG) + return mutations.iterator().next(); Set updatedTables = new HashSet<>(); String ks = null; DecoratedKey key = null; + Boolean allowOutOfRangeMutations = null; for (Mutation mutation : mutations) { + if (allowOutOfRangeMutations != null && allowOutOfRangeMutations != mutation.allowOutOfRangeMutations) + throw new IllegalArgumentException("Can't merge mutations with differing policies on allowing out of range mutations"); + allowOutOfRangeMutations = mutation.allowOutOfRangeMutations; updatedTables.addAll(mutation.modifications.keySet()); if (ks != null && !ks.equals(mutation.keyspaceName)) throw new IllegalArgumentException(); @@ -236,10 +250,10 @@ public static Mutation merge(List mutations) if (updates.isEmpty()) continue; - modifications.put(table, updates.size() == 1 ? updates.get(0) : PartitionUpdate.merge(updates)); + modifications.put(table, updates.size() == ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG ? updates.get(0) : PartitionUpdate.merge(updates)); updates.clear(); } - return new Mutation(ks, key, modifications.build(), approxTime.now()); + return new Mutation(ks, key, modifications.build(), approxTime.now(), allowOutOfRangeMutations); } public Future applyFuture() @@ -296,6 +310,27 @@ public boolean trackedByCDC() return cdcEnabled; } + public Mutation allowOutOfRangeMutations() + { + allowOutOfRangeMutations = true; + return this; + } + + public boolean allowsOutOfRangeMutations() + { + return allowOutOfRangeMutations; + } + + private static int allowsOutOfRangeMutationsFlag(boolean allowOutOfRangeMutations) + { + return allowOutOfRangeMutations ? ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG : 0; + } + + private static boolean allowsOutOfRangeMutations(int flags) + { + return (flags & ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG) != 0; + } + public String toString() { return toString(false); @@ -481,6 +516,9 @@ static void serializeInternal(PartitionUpdate.PartitionUpdateSerializer serializ { Map modifications = mutation.modifications; + if (version >= VERSION_51) + out.write(allowsOutOfRangeMutationsFlag(mutation.allowsOutOfRangeMutations())); + /* serialize the modifications in the mutation */ int size = modifications.size(); out.writeUnsignedVInt32(size); @@ -500,6 +538,12 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper { teeIn = new TeeDataInputPlus(in, dob, CACHEABLE_MUTATION_SIZE_LIMIT); + boolean allowsOutOfRangeMutations = false; + if (version >= VERSION_51) + { + int flags = in.readByte(); + allowsOutOfRangeMutations = allowsOutOfRangeMutations(flags); + } int size = teeIn.readUnsignedVInt32(); assert size > 0; @@ -519,7 +563,7 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper update = PartitionUpdate.serializer.deserialize(teeIn, version, flag); modifications.put(update.metadata().id, update); } - m = new Mutation(update.metadata().keyspace, dk, modifications.build(), approxTime.now()); + m = new Mutation(update.metadata().keyspace, dk, modifications.build(), approxTime.now(), allowsOutOfRangeMutations); } //Only cache serializations that don't hit the limit @@ -597,7 +641,9 @@ long serializedSize(PartitionUpdate.PartitionUpdateSerializer serializer, Mutati long size = this.size; if (size == 0L) { - size = TypeSizes.sizeofUnsignedVInt(mutation.modifications.size()); + if (version >= VERSION_51) + size += ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG; // flags + size += TypeSizes.sizeofUnsignedVInt(mutation.modifications.size()); for (PartitionUpdate partitionUpdate : mutation.modifications.values()) size += serializer.serializedSize(partitionUpdate, version); this.size = size; @@ -650,7 +696,7 @@ public boolean isEmpty() public Mutation build() { - return new Mutation(keyspaceName, key, modifications.build(), approxCreatedAtNanos); + return new Mutation(keyspaceName, key, modifications.build(), approxCreatedAtNanos, false); } } } diff --git a/src/java/org/apache/cassandra/db/PartitionRangeReadCommand.java b/src/java/org/apache/cassandra/db/PartitionRangeReadCommand.java index 4926061cb870..6a5301946abd 100644 --- a/src/java/org/apache/cassandra/db/PartitionRangeReadCommand.java +++ b/src/java/org/apache/cassandra/db/PartitionRangeReadCommand.java @@ -71,6 +71,7 @@ protected PartitionRangeReadCommand(Epoch serializedAtEpoch, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -80,7 +81,7 @@ protected PartitionRangeReadCommand(Epoch serializedAtEpoch, Index.QueryPlan indexQueryPlan, boolean trackWarnings) { - super(serializedAtEpoch, Kind.PARTITION_RANGE, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan, trackWarnings, dataRange); + super(serializedAtEpoch, Kind.PARTITION_RANGE, isDigest, digestVersion, acceptsTransient, allowOutOfRangeReads, metadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan, trackWarnings, dataRange); this.requestedSlices = dataRange.clusteringIndexFilter.getSlices(metadata()); } @@ -88,6 +89,7 @@ private static PartitionRangeReadCommand create(Epoch serializedAtEpoch, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -115,6 +117,7 @@ private static PartitionRangeReadCommand create(Epoch serializedAtEpoch, isDigest, digestVersion, acceptsTransient, + allowsOutOfRangeReads, metadata, nowInSec, columnFilter, @@ -136,6 +139,7 @@ public static PartitionRangeReadCommand create(TableMetadata metadata, false, 0, false, + false, metadata, nowInSec, columnFilter, @@ -160,6 +164,7 @@ public static PartitionRangeReadCommand allDataRead(TableMetadata metadata, long false, 0, false, + false, metadata, nowInSec, ColumnFilter.all(metadata), @@ -206,6 +211,7 @@ public PartitionRangeReadCommand forSubRange(AbstractBounds r isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -222,6 +228,7 @@ public PartitionRangeReadCommand copy() isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -239,6 +246,7 @@ protected PartitionRangeReadCommand copyAsDigestQuery() true, digestVersion(), false, + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -256,6 +264,7 @@ protected PartitionRangeReadCommand copyAsTransientQuery() false, 0, true, + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -273,6 +282,7 @@ public PartitionRangeReadCommand withUpdatedLimit(DataLimits newLimits) isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -290,6 +300,7 @@ public PartitionRangeReadCommand withUpdatedLimitsAndDataRange(DataLimits newLim isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -525,6 +536,7 @@ public ReadCommand deserialize(DataInputPlus in, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -534,7 +546,7 @@ public ReadCommand deserialize(DataInputPlus in, throws IOException { DataRange range = DataRange.serializer.deserialize(in, version, metadata); - return PartitionRangeReadCommand.create(serializedAtEpoch, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, rowFilter, limits, range, indexQueryPlan, false); + return PartitionRangeReadCommand.create(serializedAtEpoch, isDigest, digestVersion, acceptsTransient, allowsOutOfRangeReads, metadata, nowInSec, columnFilter, rowFilter, limits, range, indexQueryPlan, false); } } @@ -552,7 +564,7 @@ private VirtualTablePartitionRangeReadCommand(boolean isDigest, Index.QueryPlan indexQueryPlan, boolean trackWarnings) { - super(metadata.epoch, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, rowFilter, limits, dataRange, indexQueryPlan, trackWarnings); + super(metadata.epoch, isDigest, digestVersion, acceptsTransient, true, metadata, nowInSec, columnFilter, rowFilter, limits, dataRange, indexQueryPlan, trackWarnings); } @Override diff --git a/src/java/org/apache/cassandra/db/ReadCommand.java b/src/java/org/apache/cassandra/db/ReadCommand.java index e4ea5f12d74b..d6b0f4734d21 100644 --- a/src/java/org/apache/cassandra/db/ReadCommand.java +++ b/src/java/org/apache/cassandra/db/ReadCommand.java @@ -18,44 +18,62 @@ package org.apache.cassandra.db; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; -import java.util.function.LongPredicate; import java.util.function.Function; +import java.util.function.LongPredicate; import java.util.stream.Collectors; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.netty.util.concurrent.FastThreadLocal; -import org.apache.cassandra.config.*; -import org.apache.cassandra.db.filter.*; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.Config; +import org.apache.cassandra.config.DataStorageSpec; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.LocalReadSizeTooLargeException; +import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.filter.TombstoneOverwhelmingException; +import org.apache.cassandra.db.partitions.PurgeFunction; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.RangeTombstoneBoundMarker; +import org.apache.cassandra.db.rows.RangeTombstoneMarker; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Rows; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; import org.apache.cassandra.db.transform.BasePartitions; import org.apache.cassandra.db.transform.BaseRows; +import org.apache.cassandra.db.transform.RTBoundCloser; +import org.apache.cassandra.db.transform.RTBoundValidator; +import org.apache.cassandra.db.transform.RTBoundValidator.Stage; +import org.apache.cassandra.db.transform.StoppingTransformation; +import org.apache.cassandra.db.transform.Transformation; import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.QueryCancelledException; +import org.apache.cassandra.exceptions.UnknownIndexException; import org.apache.cassandra.exceptions.UnknownTableException; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.net.MessageFlag; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.ParamType; import org.apache.cassandra.net.Verb; -import org.apache.cassandra.db.partitions.*; -import org.apache.cassandra.db.rows.*; -import org.apache.cassandra.db.transform.RTBoundCloser; -import org.apache.cassandra.db.transform.RTBoundValidator; -import org.apache.cassandra.db.transform.RTBoundValidator.Stage; -import org.apache.cassandra.db.transform.StoppingTransformation; -import org.apache.cassandra.db.transform.Transformation; -import org.apache.cassandra.exceptions.UnknownIndexException; import org.apache.cassandra.index.Index; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -67,16 +85,16 @@ import org.apache.cassandra.schema.IndexMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; -import org.apache.cassandra.utils.CassandraUInt; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.CassandraUInt; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.ObjectSizes; @@ -84,8 +102,8 @@ import static com.google.common.collect.Iterables.any; import static com.google.common.collect.Iterables.filter; -import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.db.partitions.UnfilteredPartitionIterators.MergeListener.NOOP; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; /** @@ -110,6 +128,7 @@ public abstract class ReadCommand extends AbstractReadQuery private final boolean isDigestQuery; private final boolean acceptsTransient; private final Epoch serializedAtEpoch; + private boolean allowsOutOfRangeReads; // if a digest query, the version for which the digest is expected. Ignored if not a digest. private int digestVersion; @@ -128,6 +147,7 @@ public abstract ReadCommand deserialize(DataInputPlus in, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -154,6 +174,7 @@ protected ReadCommand(Epoch serializedAtEpoch, boolean isDigestQuery, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -172,6 +193,7 @@ protected ReadCommand(Epoch serializedAtEpoch, this.digestVersion = digestVersion; this.acceptsTransient = acceptsTransient; this.indexQueryPlan = indexQueryPlan; + this.allowsOutOfRangeReads = allowsOutOfRangeReads; this.trackWarnings = trackWarnings; this.serializedAtEpoch = serializedAtEpoch; this.dataRange = dataRange; @@ -528,6 +550,17 @@ public ReadExecutionController executionController() return ReadExecutionController.forCommand(this, false); } + public ReadCommand allowOutOfRangeReads() + { + allowsOutOfRangeReads = true; + return this; + } + + public boolean allowsOutOfRangeReads() + { + return allowsOutOfRangeReads; + } + /** * Wraps the provided iterator so that metrics on what is scanned by the command are recorded. * This also log warning/trow TombstoneOverwhelmingException if appropriate. @@ -874,7 +907,7 @@ protected boolean hasPartitionLevelDeletions(SSTableReader sstable) // Skip purgeable tombstones. We do this because it's safe to do (post-merge of the memtable and sstable at least), it // can save us some bandwith, and avoid making us throw a TombstoneOverwhelmingException for purgeable tombstones (which // are to some extend an artefact of compaction lagging behind and hence counting them is somewhat unintuitive). - protected UnfilteredPartitionIterator withoutPurgeableTombstones(UnfilteredPartitionIterator iterator, + protected UnfilteredPartitionIterator withoutPurgeableTombstones(UnfilteredPartitionIterator iterator, ColumnFamilyStore cfs, ReadExecutionController controller) { @@ -1217,6 +1250,7 @@ public static class Serializer implements IVersionedSerializer private static final int HAS_INDEX = 0x04; private static final int ACCEPTS_TRANSIENT = 0x08; private static final int NEEDS_RECONCILIATION = 0x10; + private static final int ALLOWS_OUT_OF_RANGE_READS = 0x20; private final SchemaProvider schema; @@ -1281,6 +1315,16 @@ private static boolean needsReconciliation(int flags) return (flags & NEEDS_RECONCILIATION) != 0; } + private static int allowsOutOfRangeReadsFlag(boolean allowsOutOfRangeReads) + { + return allowsOutOfRangeReads ? ALLOWS_OUT_OF_RANGE_READS: 0; + } + + private static boolean allowsOutOfRangeReads(int flags) + { + return (flags & ALLOWS_OUT_OF_RANGE_READS) != 0; + } + public void serialize(ReadCommand command, DataOutputPlus out, int version) throws IOException { out.writeByte(command.kind.ordinal()); @@ -1289,6 +1333,7 @@ public void serialize(ReadCommand command, DataOutputPlus out, int version) thro | indexFlag(null != command.indexQueryPlan()) | acceptsTransientFlag(command.acceptsTransient()) | needsReconciliationFlag(command.rowFilter().needsReconciliation()) + | allowsOutOfRangeReadsFlag(command.allowsOutOfRangeReads) ); if (command.isDigestQuery()) out.writeUnsignedVInt32(command.digestVersion()); @@ -1314,6 +1359,7 @@ public ReadCommand deserialize(DataInputPlus in, int version) throws IOException int flags = in.readByte(); boolean isDigest = isDigest(flags); boolean acceptsTransient = acceptsTransient(flags); + boolean allowsOutOfRangeReads = allowsOutOfRangeReads(flags); // Shouldn't happen or it's a user error (see comment above) but // better complain loudly than doing the wrong thing. if (isForThrift(flags)) @@ -1359,7 +1405,7 @@ public ReadCommand deserialize(DataInputPlus in, int version) throws IOException indexQueryPlan = indexGroup.queryPlanFor(rowFilter); } - return kind.selectionDeserializer.deserialize(in, version, schemaVersion, isDigest, digestVersion, acceptsTransient, tableMetadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan); + return kind.selectionDeserializer.deserialize(in, version, schemaVersion, isDigest, digestVersion, acceptsTransient, allowsOutOfRangeReads, tableMetadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan); } private IndexMetadata deserializeIndexMetadata(DataInputPlus in, int version, TableMetadata metadata) throws IOException diff --git a/src/java/org/apache/cassandra/db/ReadCommandVerbHandler.java b/src/java/org/apache/cassandra/db/ReadCommandVerbHandler.java index 5d430f32cf72..0094a0df9243 100644 --- a/src/java/org/apache/cassandra/db/ReadCommandVerbHandler.java +++ b/src/java/org/apache/cassandra/db/ReadCommandVerbHandler.java @@ -22,21 +22,21 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; -import org.apache.cassandra.exceptions.CoordinatorBehindException; -import org.apache.cassandra.exceptions.InvalidRoutingException; -import org.apache.cassandra.exceptions.QueryCancelledException; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.exceptions.InvalidRoutingException; +import org.apache.cassandra.exceptions.QueryCancelledException; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.metrics.TCMMetrics; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.FBUtilities; @@ -49,6 +49,18 @@ public class ReadCommandVerbHandler implements IVerbHandler private static final Logger logger = LoggerFactory.getLogger(ReadCommandVerbHandler.class); + public ReadResponse doRead(ReadCommand command, boolean trackRepairedData) + { + ReadResponse response; + try (ReadExecutionController controller = command.executionController(trackRepairedData); + UnfilteredPartitionIterator iterator = command.executeLocally(controller)) + { + response = command.createResponse(iterator, controller.getRepairedDataInfo()); + } + + return response; + } + public void doVerb(Message message) { if (message.epoch().isAfter(Epoch.EMPTY)) @@ -68,10 +80,9 @@ public void doVerb(Message message) command.trackWarnings(); ReadResponse response; - try (ReadExecutionController controller = command.executionController(message.trackRepairedData()); - UnfilteredPartitionIterator iterator = command.executeLocally(controller)) + try { - response = command.createResponse(iterator, controller.getRepairedDataInfo()); + response = doRead(command, message.trackRepairedData()); } catch (RejectException e) { @@ -147,15 +158,21 @@ else if (localComparisonEpoch.isAfter(readCommand.serializedAtEpoch())) private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message message) { ReadCommand command = message.payload; + if (command.metadata().isVirtual()) return metadata; + // Some read commands may be sent using an older Epoch intentionally so validating using the current Epoch + // doesn't work + if (command.allowsOutOfRangeReads()) + return metadata; + if (command.isTopK()) return metadata; if (command instanceof SinglePartitionReadCommand) { - Token token = ((SinglePartitionReadCommand) command).partitionKey().getToken(); + Token token = ((SinglePartitionReadCommand)command).partitionKey().getToken(); Replica localReplica = getLocalReplica(metadata, token, command.metadata().keyspace); if (localReplica == null) { diff --git a/src/java/org/apache/cassandra/db/ReadRepairVerbHandler.java b/src/java/org/apache/cassandra/db/ReadRepairVerbHandler.java index 8ca29eba1351..d40359c14472 100644 --- a/src/java/org/apache/cassandra/db/ReadRepairVerbHandler.java +++ b/src/java/org/apache/cassandra/db/ReadRepairVerbHandler.java @@ -25,9 +25,14 @@ public class ReadRepairVerbHandler extends AbstractMutationVerbHandler { public static final ReadRepairVerbHandler instance = new ReadRepairVerbHandler(); + public void applyMutation(Mutation mutation) + { + mutation.apply(); + } + void applyMutation(Message message, InetAddressAndPort respondToAddress) { - message.payload.apply(); + applyMutation(message.payload); MessagingService.instance().send(message.emptyResponse(), respondToAddress); } } diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index ab346bd47b6a..921d90a47b9e 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -98,6 +98,7 @@ protected SinglePartitionReadCommand(Epoch serializedAtEpoch, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -109,7 +110,7 @@ protected SinglePartitionReadCommand(Epoch serializedAtEpoch, boolean trackWarnings, DataRange dataRange) { - super(serializedAtEpoch, Kind.SINGLE_PARTITION, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan, trackWarnings, dataRange); + super(serializedAtEpoch, Kind.SINGLE_PARTITION, isDigest, digestVersion, acceptsTransient, allowsOutOfRangeReads, metadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan, trackWarnings, dataRange); assert partitionKey.getPartitioner() == metadata.partitioner; this.partitionKey = partitionKey; this.clusteringIndexFilter = clusteringIndexFilter; @@ -119,6 +120,7 @@ private static SinglePartitionReadCommand create(Epoch serializedAtEpoch, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -152,6 +154,7 @@ private static SinglePartitionReadCommand create(Epoch serializedAtEpoch, isDigest, digestVersion, acceptsTransient, + allowsOutOfRangeReads, metadata, nowInSec, columnFilter, @@ -191,6 +194,7 @@ public static SinglePartitionReadCommand create(TableMetadata metadata, false, 0, false, + false, metadata, nowInSec, columnFilter, @@ -369,6 +373,7 @@ public SinglePartitionReadCommand copy() isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -387,6 +392,7 @@ protected SinglePartitionReadCommand copyAsDigestQuery() true, digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -405,6 +411,7 @@ protected SinglePartitionReadCommand copyAsTransientQuery() false, 0, true, + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -423,6 +430,7 @@ public SinglePartitionReadCommand withUpdatedLimit(DataLimits newLimits) isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec(), columnFilter(), @@ -440,6 +448,7 @@ public SinglePartitionReadCommand withNowInSec(long nowInSec) isDigestQuery(), digestVersion(), acceptsTransient(), + allowsOutOfRangeReads(), metadata(), nowInSec, columnFilter(), @@ -1342,6 +1351,7 @@ public ReadCommand deserialize(DataInputPlus in, boolean isDigest, int digestVersion, boolean acceptsTransient, + boolean allowsOutOfRangeReads, TableMetadata metadata, long nowInSec, ColumnFilter columnFilter, @@ -1352,7 +1362,7 @@ public ReadCommand deserialize(DataInputPlus in, { DecoratedKey key = metadata.partitioner.decorateKey(metadata.partitionKeyType.readBuffer(in, DatabaseDescriptor.getMaxValueSize())); ClusteringIndexFilter filter = ClusteringIndexFilter.serializer.deserialize(in, version, metadata); - return SinglePartitionReadCommand.create(serializedAtEpoch, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, rowFilter, limits, key, filter, indexQueryPlan, false); + return SinglePartitionReadCommand.create(serializedAtEpoch, isDigest, digestVersion, acceptsTransient, allowsOutOfRangeReads, metadata, nowInSec, columnFilter, rowFilter, limits, key, filter, indexQueryPlan, false); } } @@ -1400,7 +1410,7 @@ protected VirtualTableSinglePartitionReadCommand(boolean isDigest, boolean trackWarnings, DataRange dataRange) { - super(metadata.epoch, isDigest, digestVersion, acceptsTransient, metadata, nowInSec, columnFilter, + super(metadata.epoch, isDigest, digestVersion, acceptsTransient, true, metadata, nowInSec, columnFilter, rowFilter, limits, partitionKey, clusteringIndexFilter, indexQueryPlan, trackWarnings, dataRange); } diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index db82fe2386f2..8f155773c8a9 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -136,6 +136,8 @@ import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeInternalWithNowInSec; import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; import static org.apache.cassandra.gms.ApplicationState.DC; import static org.apache.cassandra.gms.ApplicationState.HOST_ID; import static org.apache.cassandra.gms.ApplicationState.INTERNAL_ADDRESS_AND_PORT; @@ -162,6 +164,7 @@ private SystemKeyspace() public static final String BATCHES = "batches"; public static final String PAXOS = "paxos"; + public static final String CONSENSUS_MIGRATION_STATE = "consensus_migration_state"; public static final String PAXOS_REPAIR_HISTORY = "paxos_repair_history"; public static final String PAXOS_REPAIR_STATE = "_paxos_repair_state"; public static final String BUILT_INDEXES = "IndexInfo"; @@ -190,6 +193,7 @@ private SystemKeyspace() */ public static final Set TABLES_SPLIT_ACROSS_MULTIPLE_DISKS = ImmutableSet.of(BATCHES, PAXOS, + CONSENSUS_MIGRATION_STATE, COMPACTION_HISTORY, PREPARED_STATEMENTS, REPAIRS); @@ -215,14 +219,14 @@ private SystemKeyspace() TABLE_ESTIMATES_TYPE_LOCAL_PRIMARY, AVAILABLE_RANGES_V2, TRANSFERRED_RANGES_V2, VIEW_BUILDS_IN_PROGRESS, BUILT_VIEWS, PREPARED_STATEMENTS, REPAIRS, TOP_PARTITIONS, LEGACY_PEERS, LEGACY_PEER_EVENTS, LEGACY_TRANSFERRED_RANGES, LEGACY_AVAILABLE_RANGES, LEGACY_SIZE_ESTIMATES, LEGACY_SSTABLE_ACTIVITY, - METADATA_LOG, SNAPSHOT_TABLE_NAME); + METADATA_LOG, SNAPSHOT_TABLE_NAME, CONSENSUS_MIGRATION_STATE); public static final Set TABLE_NAMES = ImmutableSet.of( - BATCHES, PAXOS, PAXOS_REPAIR_HISTORY, BUILT_INDEXES, LOCAL, PEERS_V2, PEER_EVENTS_V2, - COMPACTION_HISTORY, SSTABLE_ACTIVITY_V2, TABLE_ESTIMATES, AVAILABLE_RANGES_V2, TRANSFERRED_RANGES_V2, VIEW_BUILDS_IN_PROGRESS, - BUILT_VIEWS, PREPARED_STATEMENTS, REPAIRS, TOP_PARTITIONS, LEGACY_PEERS, LEGACY_PEER_EVENTS, + BATCHES, PAXOS, PAXOS_REPAIR_HISTORY, BUILT_INDEXES, LOCAL, PEERS_V2, PEER_EVENTS_V2, + COMPACTION_HISTORY, SSTABLE_ACTIVITY_V2, TABLE_ESTIMATES, AVAILABLE_RANGES_V2, TRANSFERRED_RANGES_V2, VIEW_BUILDS_IN_PROGRESS, + BUILT_VIEWS, PREPARED_STATEMENTS, REPAIRS, TOP_PARTITIONS, LEGACY_PEERS, LEGACY_PEER_EVENTS, LEGACY_TRANSFERRED_RANGES, LEGACY_AVAILABLE_RANGES, LEGACY_SIZE_ESTIMATES, LEGACY_SSTABLE_ACTIVITY, - METADATA_LOG, SNAPSHOT_TABLE_NAME); + METADATA_LOG, SNAPSHOT_TABLE_NAME, CONSENSUS_MIGRATION_STATE); public static final TableMetadata Batches = parse(BATCHES, @@ -255,6 +259,25 @@ private SystemKeyspace() .indexes(PaxosUncommittedIndex.indexes()) .build(); + private static final TableMetadata ConsensusMigrationState = + parse(CONSENSUS_MIGRATION_STATE, + "Keys that have been migrated to another consensus protocol", + "CREATE TABLE %s (" + + "row_key blob, " + + "cf_id UUID, " + + "consensus_migrated_at_epoch bigint, " + + "consensus_target tinyint, " + + "PRIMARY KEY ((row_key), cf_id, consensus_migrated_at_epoch)) " + + "WITH CLUSTERING ORDER BY (cf_id ASC, consensus_migrated_at_epoch DESC)") + .compaction(CompactionParams.twcs( + ImmutableMap.of( + "compaction_window_unit", "MINUTES", + "compaction_window_size", + // 7 days divided into 30 windows + String.valueOf((7 * 24 * 60) / 30)))) + .defaultTimeToLive((int)TimeUnit.DAYS.toSeconds(7)) + .build(); + private static final TableMetadata BuiltIndexes = parse(BUILT_INDEXES, "built column indexes", @@ -602,7 +625,8 @@ private static Tables tables() Repairs, TopPartitions, LocalMetadataLog, - Snapshots); + Snapshots, + ConsensusMigrationState); } private static volatile Map> truncationRecords; @@ -1598,6 +1622,27 @@ public static PaxosRepairHistory loadPaxosRepairHistory(String keyspace, String return PaxosRepairHistory.fromTupleBufferList(keyspace, table, points); } + public static void saveConsensusKeyMigrationState(ByteBuffer partitionKey, UUID cfId, ConsensusMigratedAt consensusMigratedAt) + { + String cql = "UPDATE system." + CONSENSUS_MIGRATION_STATE + " SET consensus_target = ? WHERE row_key = ? AND cf_id = ? AND consensus_migrated_at_epoch = ?"; + executeInternal(cql, consensusMigratedAt.migratedAtTarget.value, partitionKey, cfId, consensusMigratedAt.migratedAtEpoch.getEpoch()); + } + + public static ConsensusMigratedAt loadConsensusKeyMigrationState(ByteBuffer partitionKey, UUID cfId) + { + String cql = "SELECT consensus_migrated_at_epoch, consensus_target FROM system." + CONSENSUS_MIGRATION_STATE + " WHERE row_key = ? AND cf_id = ? LIMIT 1"; + UntypedResultSet results = executeInternal(cql, partitionKey, cfId); + + if (results.isEmpty()) + return null; + + UntypedResultSet.Row row = results.one(); + // TODO Period won't be necessary eventually + Epoch migratedAtEpoch = Epoch.create(row.getLong("consensus_migrated_at_epoch")); + ConsensusMigrationTarget target = ConsensusMigrationTarget.fromValue(row.getByte("consensus_target")); + return new ConsensusMigratedAt(migratedAtEpoch, target); + } + /** * Returns a RestorableMeter tracking the average read rate of a particular SSTable, restoring the last-seen rate * from values in system.sstable_activity if present. diff --git a/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java index 857e0dfde909..923ef54ab621 100644 --- a/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java +++ b/src/java/org/apache/cassandra/db/partitions/AbstractBTreePartition.java @@ -24,12 +24,32 @@ import com.google.common.collect.Iterators; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ClusteringBound; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionInfo; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.MutableDeletionInfo; +import org.apache.cassandra.db.RangeTombstone; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.Slice; +import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.EncodingStats; +import org.apache.cassandra.db.rows.RangeTombstoneMarker; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowAndDeletionMergeIterator; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.db.rows.Rows; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.SearchIterator; import org.apache.cassandra.utils.btree.BTree; +import org.apache.cassandra.utils.btree.BTree.Dir; import static org.apache.cassandra.utils.btree.BTree.Dir.desc; @@ -403,9 +423,15 @@ public int rowCount() return BTree.size(holder().tree); } + @Override public Iterator iterator() { - return BTree.iterator(holder().tree); + return iterator(false); + } + + public Iterator iterator(boolean reverse) + { + return BTree.iterator(holder().tree, reverse ? Dir.DESC : Dir.ASC); } public Row lastRow() diff --git a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java index c9035befbde5..994ef1ac7b90 100644 --- a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java +++ b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java @@ -25,12 +25,17 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.cassandra.index.transactions.UpdateTransaction; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.TableMetadataRef; + import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionInfo; +import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.memory.Cloner; @@ -223,9 +228,9 @@ public UnfilteredRowIterator unfilteredIterator(BTreePartitionData current, Colu } @Override - public Iterator iterator() + public Iterator iterator(boolean reverse) { - return allocator.ensureOnHeap().applyToPartition(super.iterator()); + return allocator.ensureOnHeap().applyToPartition(super.iterator(reverse)); } private boolean shouldLock(OpOrder.Group writeOp) diff --git a/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java b/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java index 138c853224f6..a66781dd7ef0 100644 --- a/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java +++ b/src/java/org/apache/cassandra/db/partitions/FilteredPartition.java @@ -19,11 +19,12 @@ import java.util.Iterator; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.DeletionInfo; import org.apache.cassandra.db.RegularAndStaticColumns; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.btree.BTree; public class FilteredPartition extends ImmutableBTreePartition @@ -49,9 +50,9 @@ public Row getAtIdx(int idx) return BTree.findByIndex(holder.tree, idx); } - public RowIterator rowIterator() + public RowIterator rowIterator(boolean reverse) { - final Iterator iter = iterator(); + final Iterator iter = iterator(reverse); return new RowIterator() { public TableMetadata metadata() @@ -61,7 +62,7 @@ public TableMetadata metadata() public boolean isReverseOrder() { - return false; + return reverse; } public RegularAndStaticColumns columns() diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java index cb0fdfb9ffe9..1b543bd2c7df 100644 --- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java +++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java @@ -24,17 +24,45 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import javax.annotation.Nonnull; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.Columns; +import org.apache.cassandra.db.CounterMutation; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionInfo; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.MutableDeletionInfo; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.RangeTombstone; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.SimpleBuilders; +import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.db.rows.DeserializationHelper; +import org.apache.cassandra.db.rows.EncodingStats; +import org.apache.cassandra.db.rows.RangeTombstoneMarker; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.db.rows.RowIterators; +import org.apache.cassandra.db.rows.Rows; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIteratorSerializer; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.UnknownTableException; import org.apache.cassandra.index.IndexRegistry; @@ -1109,11 +1137,11 @@ public Builder updateAllTimestamp(long newTimestamp) return this; } - public Builder updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + public Builder updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime) { deletionInfo.updateAllTimestampAndLocalDeletionTime(newTimestamp - 1, newLocalDeletionTime); - tree = BTree.transformAndFilter(tree, (x) -> x.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); - staticRow = this.staticRow.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime); + tree = BTree.transformAndFilter(tree, (x) -> x.updateTimesAndPathsForAccord(cellToMaybeNewListPath, newTimestamp, newLocalDeletionTime)); + staticRow = this.staticRow.updateTimesAndPathsForAccord(cellToMaybeNewListPath, newTimestamp, newLocalDeletionTime); return this; } @@ -1130,6 +1158,5 @@ public String toString() ", isBuilt=" + isBuilt + '}'; } - } } diff --git a/src/java/org/apache/cassandra/db/rows/AbstractCell.java b/src/java/org/apache/cassandra/db/rows/AbstractCell.java index c3df806b8c25..d30489a10921 100644 --- a/src/java/org/apache/cassandra/db/rows/AbstractCell.java +++ b/src/java/org/apache/cassandra/db/rows/AbstractCell.java @@ -19,9 +19,12 @@ import java.nio.ByteBuffer; import java.util.Objects; +import javax.annotation.Nonnull; + +import com.google.common.base.Function; -import org.apache.cassandra.db.Digest; import org.apache.cassandra.db.DeletionPurger; +import org.apache.cassandra.db.Digest; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.context.CounterContext; import org.apache.cassandra.db.marshal.AbstractType; @@ -118,12 +121,19 @@ public Cell updateAllTimestamp(long newTimestamp) } @Override - public ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + public ColumnData updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime) { long localDeletionTime = localDeletionTime() != NO_DELETION_TIME ? newLocalDeletionTime : NO_DELETION_TIME; return new BufferCell(column, isTombstone() ? newTimestamp - 1 : newTimestamp, ttl(), localDeletionTime, buffer(), path()); } + @Override + public Cell updateAllTimesWithNewCellPathForComplexColumnData(@Nonnull CellPath maybeNewPath, long newTimestamp, int newLocalDeletionTime) + { + long localDeletionTime = localDeletionTime() != NO_DELETION_TIME ? newLocalDeletionTime : NO_DELETION_TIME; + return new BufferCell(column, isTombstone() ? newTimestamp - 1 : newTimestamp, ttl(), localDeletionTime, buffer(), maybeNewPath); + } + public int dataSize() { CellPath path = path(); diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java index 2fbee6a24a6e..ed445e8b6bd7 100644 --- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java +++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java @@ -18,7 +18,6 @@ package org.apache.cassandra.db.rows; import java.nio.ByteBuffer; - import java.util.AbstractCollection; import java.util.Arrays; import java.util.Collection; @@ -28,9 +27,10 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Consumer; -import java.util.function.Function; import java.util.function.Predicate; +import javax.annotation.Nonnull; +import com.google.common.base.Function; import com.google.common.collect.Collections2; import com.google.common.collect.Iterators; import com.google.common.primitives.Ints; @@ -40,15 +40,13 @@ import org.apache.cassandra.db.DeletionPurger; import org.apache.cassandra.db.DeletionTime; import org.apache.cassandra.db.LivenessInfo; +import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.UTF8Type; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.TableMetadata; - -import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.DroppedColumn; - +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.AbstractIterator; import org.apache.cassandra.utils.BiLongAccumulator; import org.apache.cassandra.utils.BulkIterator; @@ -446,7 +444,7 @@ public Row updateAllTimestamp(long newTimestamp) } @Override - public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + public Row updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime) { LivenessInfo newInfo = primaryKeyLivenessInfo.isEmpty() ? primaryKeyLivenessInfo : primaryKeyLivenessInfo.withUpdatedTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime); // If the deletion is shadowable and the row has a timestamp, we'll forced the deletion timestamp to be less than the row one, so we @@ -454,7 +452,7 @@ public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLoca Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty()) ? Deletion.LIVE : new Deletion(DeletionTime.build(newTimestamp - 1, newLocalDeletionTime), deletion.isShadowable()); - return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateTimesAndPathsForAccord(cellToMaybeNewListPath, newTimestamp, newLocalDeletionTime)); } public Row withRowDeletion(DeletionTime newDeletion) diff --git a/src/java/org/apache/cassandra/db/rows/ColumnData.java b/src/java/org/apache/cassandra/db/rows/ColumnData.java index 18530b2d3929..8f055e1d148c 100644 --- a/src/java/org/apache/cassandra/db/rows/ColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ColumnData.java @@ -18,13 +18,16 @@ package org.apache.cassandra.db.rows; import java.util.Comparator; +import javax.annotation.Nonnull; + +import com.google.common.base.Function; import org.apache.cassandra.cache.IMeasurableMemory; -import org.apache.cassandra.db.Digest; -import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.db.DeletionPurger; import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.Digest; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.btree.UpdateFunction; @@ -284,7 +287,19 @@ public static void digest(Digest digest, ColumnData cd) * This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details. */ public abstract ColumnData updateAllTimestamp(long newTimestamp); - public abstract ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime); + + /** + * @param cellToMaybeNewListPath If the cell is a list append cell a new cell path is returned generated based on the Accord executeAt timestamp + */ + public abstract ColumnData updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime); + + /** + * List paths are time UUIDs that increment for each item in the list and for Accord and Paxos + * should be based on the transaction's ballot/timestamp. + * + * @param maybeNewPath If this cell is a list append for a non-frozen list (multi-cell) then it will be new path generated using the executeAt timestamp, otherwise it will be the existing path + */ + public abstract ColumnData updateAllTimesWithNewCellPathForComplexColumnData(@Nonnull CellPath maybeNewPath, long newTimestamp, int newLocalDeletionTime); public abstract ColumnData markCounterLocalToBeCleared(); diff --git a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java index f668edfd7a05..f032fd5b6d33 100644 --- a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java +++ b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.Iterator; import java.util.Objects; +import javax.annotation.Nonnull; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; @@ -30,6 +31,7 @@ import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.marshal.ByteType; +import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.DroppedColumn; @@ -265,10 +267,21 @@ public ComplexColumnData updateAllTimestamp(long newTimestamp) } @Override - public ColumnData updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime) + public ColumnData updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime) { DeletionTime newDeletion = complexDeletion.isLive() ? complexDeletion : DeletionTime.build(newTimestamp - 1, newLocalDeletionTime); - return transformAndFilter(newDeletion, (cell) -> (Cell) cell.updateAllTimestampAndLocalDeletionTime(newTimestamp, newLocalDeletionTime)); + Function maybeNewListPath; + if (column.type instanceof ListType && column.type.isMultiCell()) + maybeNewListPath = cellToMaybeNewListPath; + else + maybeNewListPath = cell -> cell.path(); + return transformAndFilter(newDeletion, (cell) -> (Cell) cell.updateAllTimesWithNewCellPathForComplexColumnData(maybeNewListPath.apply(cell), newTimestamp, newLocalDeletionTime)); + } + + @Override + public ColumnData updateAllTimesWithNewCellPathForComplexColumnData(@Nonnull CellPath maybeNewPath, long newTimestamp, int newLocalDeletionTime) + { + throw new UnsupportedOperationException(); } public long maxTimestamp() diff --git a/src/java/org/apache/cassandra/db/rows/Row.java b/src/java/org/apache/cassandra/db/rows/Row.java index ee836446d491..3820e8c3a44d 100644 --- a/src/java/org/apache/cassandra/db/rows/Row.java +++ b/src/java/org/apache/cassandra/db/rows/Row.java @@ -17,13 +17,26 @@ */ package org.apache.cassandra.db.rows; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; import java.util.function.BiConsumer; import java.util.function.Consumer; -import java.util.function.Function; +import javax.annotation.Nonnull; + +import com.google.common.base.Function; import org.apache.cassandra.cache.IMeasurableMemory; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.DeletionPurger; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.Digest; +import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; @@ -299,7 +312,7 @@ public interface Row extends Unfiltered, Iterable, IMeasurableMemory */ public Row updateAllTimestamp(long newTimestamp); - public Row updateAllTimestampAndLocalDeletionTime(long newTimestamp, int newLocalDeletionTime); + public Row updateTimesAndPathsForAccord(@Nonnull Function cellToMaybeNewListPath, long newTimestamp, int newLocalDeletionTime); /** * Returns a copy of this row with the new deletion as row deletion if it is more recent diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraOutgoingFile.java b/src/java/org/apache/cassandra/db/streaming/CassandraOutgoingFile.java index 7572749d37e2..dc24a3bc0fea 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraOutgoingFile.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraOutgoingFile.java @@ -49,6 +49,7 @@ public class CassandraOutgoingFile implements OutgoingStream private final boolean shouldStreamEntireSSTable; private final StreamOperation operation; private final CassandraStreamHeader header; + private final List> ranges; public CassandraOutgoingFile(StreamOperation operation, Ref ref, List sections, List> normalizedRanges, @@ -60,6 +61,7 @@ public CassandraOutgoingFile(StreamOperation operation, Ref ref, this.ref = ref; this.estimatedKeys = estimatedKeys; this.sections = sections; + this.ranges = normalizedRanges; SSTableReader sstable = ref.get(); @@ -131,6 +133,12 @@ public int getNumFiles() return shouldStreamEntireSSTable ? header.componentManifest.components().size() : 1; } + @Override + public List> ranges() + { + return ranges; + } + @Override public long getRepairedAt() { diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamManager.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamManager.java index 6940f11b57fc..505bd6b9287e 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamManager.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamManager.java @@ -81,9 +81,9 @@ public IncomingStream prepareIncomingStream(StreamSession session, StreamMessage } @Override - public StreamReceiver createStreamReceiver(StreamSession session, int totalStreams) + public StreamReceiver createStreamReceiver(StreamSession session, List> ranges, int totalStreams) { - return new CassandraStreamReceiver(cfs, session, totalStreams); + return new CassandraStreamReceiver(cfs, session, ranges, totalStreams); } @Override diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 50f87c799ece..62af76127741 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -41,18 +41,24 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.db.view.View; import org.apache.cassandra.dht.Bounds; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.streaming.IncomingStream; import org.apache.cassandra.streaming.StreamReceiver; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.CassandraVersion; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.Refs; +import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.cassandra.config.CassandraRelevantProperties.REPAIR_MUTATION_REPAIR_ROWS_PER_BATCH; public class CassandraStreamReceiver implements StreamReceiver @@ -74,14 +80,17 @@ public class CassandraStreamReceiver implements StreamReceiver private final boolean requiresWritePath; + private final List> ranges; - public CassandraStreamReceiver(ColumnFamilyStore cfs, StreamSession session, int totalFiles) + + public CassandraStreamReceiver(ColumnFamilyStore cfs, StreamSession session, List> ranges, int totalFiles) { this.cfs = cfs; this.session = session; // this is an "offline" transaction, as we currently manually expose the sstables once done; // this should be revisited at a later date, so that LifecycleTransaction manages all sstable state changes this.txn = LifecycleTransaction.offline(OperationType.STREAM); + this.ranges = ranges; this.sstables = new ArrayList<>(totalFiles); this.requiresWritePath = requiresWritePath(cfs); } @@ -233,6 +242,14 @@ public synchronized void finishTransaction() @Override public void finished() { + CassandraVersion minVersion = ClusterMetadata.current().directory.clusterMinVersion.cassandraVersion; + checkNotNull(minVersion, "Unable to determine minimum cluster version"); + IAccordService accordService = AccordService.instance(); + if (session.streamOperation().requiresBarrierTransaction() + && accordService.isAccordManagedKeyspace(cfs.keyspace.getName()) + && CassandraVersion.CASSANDRA_5_0.compareTo(minVersion) >= 0) + accordService.postStreamReceivingBarrier(cfs, ranges); + boolean requiresWritePath = requiresWritePath(cfs); Collection readers = sstables; diff --git a/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java b/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java index 1c8c24b1716a..0012382bd9cc 100644 --- a/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java +++ b/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java @@ -144,7 +144,7 @@ private void updateDataset(SimpleDataSet result, CoordinatorState state) result.column("options_primary_range", state.options.isPrimaryRange()); result.column("options_trace", state.options.isTraced()); result.column("options_job_threads", state.options.getJobThreads()); - result.column("options_subrange_repair", state.options.isSubrangeRepair()); + result.column("options_subrange_repair", false); result.column("options_pull_repair", state.options.isPullRepair()); result.column("options_force_repair", state.options.isForcedRepair()); result.column("options_preview_kind", state.options.getPreviewKind().name()); @@ -183,6 +183,10 @@ private String getType(CoordinatorState state) default: throw new AssertionError("Unknown preview kind: " + state.options.getPreviewKind()); } } + else if (state.options.accordRepair()) + { + return "accord repair"; + } else if (state.options.isIncremental()) { return "incremental"; diff --git a/src/java/org/apache/cassandra/dht/AccordSplitter.java b/src/java/org/apache/cassandra/dht/AccordSplitter.java index c5971dc89fe0..b0868b47e810 100644 --- a/src/java/org/apache/cassandra/dht/AccordSplitter.java +++ b/src/java/org/apache/cassandra/dht/AccordSplitter.java @@ -45,7 +45,7 @@ public BigInteger sizeOf(accord.primitives.Range range) } @Override - public accord.primitives.Range subRange(accord.primitives.Range range, BigInteger startOffset, BigInteger endOffset) + public TokenRange subRange(accord.primitives.Range range, BigInteger startOffset, BigInteger endOffset) { AccordRoutingKey startBound = (AccordRoutingKey)range.start(); AccordRoutingKey endBound = (AccordRoutingKey)range.end(); diff --git a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java index 9b3f63b82097..9e3c3cf848c8 100644 --- a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java +++ b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java @@ -17,37 +17,36 @@ */ package org.apache.cassandra.dht; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; + +import com.google.common.collect.Maps; +import org.apache.commons.lang3.ArrayUtils; + import accord.primitives.Ranges; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.db.BufferDecoratedKey; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.bytecomparable.ByteComparable; -import org.apache.cassandra.utils.bytecomparable.ByteSource; -import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Hex; import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.Pair; - -import org.apache.commons.lang3.ArrayUtils; - -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; -import java.util.function.Function; - -import com.google.common.collect.Maps; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; public class ByteOrderedPartitioner implements IPartitioner { @@ -194,6 +193,8 @@ public Token decreaseSlightly() } } + private ByteOrderedPartitioner() {} + public BytesToken getToken(ByteBuffer key) { if (key.remaining() == 0) diff --git a/src/java/org/apache/cassandra/dht/LocalPartitioner.java b/src/java/org/apache/cassandra/dht/LocalPartitioner.java index b0b8d558ad93..c2886fd53986 100644 --- a/src/java/org/apache/cassandra/dht/LocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/LocalPartitioner.java @@ -21,12 +21,13 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Random; import java.util.function.Function; import accord.primitives.Ranges; -import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.CachedHashDecoratedKey; +import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.utils.ByteBufferUtil; @@ -140,6 +141,21 @@ public AbstractType partitionOrdering() return comparator; } + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + LocalPartitioner that = (LocalPartitioner) o; + return comparator.equals(that.comparator) && tokenFactory.equals(that.tokenFactory); + } + + @Override + public int hashCode() + { + return Objects.hash(comparator, tokenFactory); + } + public class LocalToken extends ComparableObjectToken { static final long serialVersionUID = 8437543776403014875L; diff --git a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java index f80d6d4843a2..410dbfcd0b00 100644 --- a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java +++ b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java @@ -21,28 +21,33 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; -import java.util.*; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.function.Function; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Longs; + import accord.primitives.Ranges; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PreHashedDecoratedKey; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; -import org.apache.cassandra.db.marshal.PartitionerDefinedOrder; import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.PartitionerDefinedOrder; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.MurmurHash; +import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; -import org.apache.cassandra.utils.MurmurHash; -import org.apache.cassandra.utils.ObjectSizes; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.primitives.Longs; /** * This class generates a BigIntegerToken using a Murmur3 hash. @@ -85,6 +90,8 @@ BigInteger maximumValue() } }; + protected Murmur3Partitioner() {} + public DecoratedKey decorateKey(ByteBuffer key) { long[] hash = getHash(key); diff --git a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java index d2419049dbda..741a2b0c7f8d 100644 --- a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java +++ b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java @@ -20,14 +20,18 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.function.Function; import accord.api.RoutingKey; import accord.primitives.Ranges; -import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.CachedHashDecoratedKey; +import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.exceptions.ConfigurationException; @@ -70,6 +74,8 @@ public int compareTo(Token o) public static final OrderPreservingPartitioner instance = new OrderPreservingPartitioner(); + private OrderPreservingPartitioner() {} + public DecoratedKey decorateKey(ByteBuffer key) { return new CachedHashDecoratedKey(getToken(key), key); diff --git a/src/java/org/apache/cassandra/dht/RandomPartitioner.java b/src/java/org/apache/cassandra/dht/RandomPartitioner.java index 44f1893f0bb6..a21815c0c5fb 100644 --- a/src/java/org/apache/cassandra/dht/RandomPartitioner.java +++ b/src/java/org/apache/cassandra/dht/RandomPartitioner.java @@ -22,28 +22,33 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.security.MessageDigest; -import java.util.*; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; import accord.primitives.Ranges; import org.apache.cassandra.db.CachedHashDecoratedKey; -import org.apache.cassandra.db.marshal.ByteArrayAccessor; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteArrayAccessor; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.IntegerType; import org.apache.cassandra.db.marshal.PartitionerDefinedOrder; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.bytecomparable.ByteComparable; -import org.apache.cassandra.utils.bytecomparable.ByteSource; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.GuidGenerator; import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; /** * This class generates a BigIntegerToken using MD5 hash. @@ -108,6 +113,8 @@ BigInteger maximumValue() } }; + private RandomPartitioner() {} + public DecoratedKey decorateKey(ByteBuffer key) { return new CachedHashDecoratedKey(getToken(key), key); diff --git a/src/java/org/apache/cassandra/dht/Range.java b/src/java/org/apache/cassandra/dht/Range.java index b5d06967ac01..9100254508bf 100644 --- a/src/java/org/apache/cassandra/dht/Range.java +++ b/src/java/org/apache/cassandra/dht/Range.java @@ -19,13 +19,27 @@ import java.io.IOException; import java.io.Serializable; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.function.Predicate; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; import org.apache.commons.lang3.ObjectUtils; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.dht.Token.TokenFactory; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.net.MessagingService; @@ -34,6 +48,10 @@ import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.Pair; +import static com.google.common.base.Preconditions.checkState; +import static java.util.Collections.emptyList; +import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_RANGE_EXPENSIVE_CHECKS; + /** * A representation of the range that a node is responsible for on the DHT ring. * @@ -48,6 +66,34 @@ public class Range> extends AbstractBounds implemen public static final Serializer serializer = new Serializer(); public static final long serialVersionUID = 1L; + public static final boolean EXPENSIVE_CHECKS = TEST_RANGE_EXPENSIVE_CHECKS.getBoolean(); + + public static final IPartitionerDependentSerializer rangeSerializer = new RangeSerializer(); + + public static class RangeSerializer> implements IPartitionerDependentSerializer> + { + @Override + public void serialize(Range range, DataOutputPlus out, int version) throws IOException + { + Token.compactSerializer.serialize(range.left.getToken(), out, version); + Token.compactSerializer.serialize(range.right.getToken(), out, version); + } + + @Override + public Range deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException + { + return new Range(Token.compactSerializer.deserialize(in, p, version), + Token.compactSerializer.deserialize(in, p, version)); + } + + @Override + public long serializedSize(Range range, int version) + { + return Token.compactSerializer.serializedSize(range.left.getToken(), version) + + Token.compactSerializer.serializedSize(range.right.getToken(), version); + } + } + public Range(T left, T right) { super(left, right); @@ -349,6 +395,43 @@ public int compareTo(Range rhs) return right.compareTo(rhs.right); } + /* + * Compares ranges by right token. Used for intersecting normalized ranges. + * + * Assumes no wrap around ranges except for RHS = minValue which is essentialy synonymous with the maximal value. + * This shows up coming out of unwrap because Range is not left inclusive so the only way to include minValue + * in the range is by wrapping from maxValue. + */ + private int compareNormalized(Range rhs) + { + // otherwise compare by right. + int cmp = right.compareTo(rhs.right); + // minValue on the RHS is maxValue, but doesn't work with compare so check for it explicitly + boolean rhsRMin = rhs.right.isMinimum(); + boolean lhsRMin = right.isMinimum(); + + if (rhsRMin && lhsRMin) + return 0; + + if (cmp < 0) + { + if (lhsRMin) + { + return 1; + } + return -1; + } + else if (cmp > 0) + { + if (rhsRMin) + { + return -1; + } + return 1; + } + return 0; + } + /** * Subtracts a portion of this range. * @param contained The range to subtract from this. It must be totally @@ -361,7 +444,7 @@ private List> subtractContained(Range contained) // both ranges cover the entire ring, their difference is an empty set if(isFull(left, right) && isFull(contained.left, contained.right)) { - return Collections.emptyList(); + return emptyList(); } // a range is subtracted from another range that covers the entire ring @@ -472,6 +555,190 @@ public static > boolean isInRanges(T token, Iterable { + Range range = (Range)o1; + RingPosition key = (RingPosition) o2; + boolean rangeRightIsMin = range.right.isMinimum(); + boolean keyIsMinimum = key.isMinimum(); + + if (keyIsMinimum & rangeRightIsMin) + return 0; + + int lc = key.compareTo(range.left); + int rc = key.compareTo(range.right); + if ((lc < 0 & !keyIsMinimum) | lc == 0) return 1; + if (rc > 0 & !rangeRightIsMin) return -1; + return 0; + }; + + public static > boolean isInNormalizedRanges(T token, List> ranges) + { + if (ranges.size() == 1 && ranges.get(0).isFull()) + return true; + boolean isIn = Collections.binarySearch((List)ranges, token, NORMALIZED_TOKEN_RANGE_COMPARATOR) >= 0; + if (EXPENSIVE_CHECKS) + checkState(isInRanges(token, ranges) == isIn); + return isIn; + } + + public static > List> subtractNormalizedRanges(List> a, List> b) + { + if (b.size() == 1 && b.get(0).isFull()) + return emptyList(); + + if (a.size() == 1 && a.get(0).isFull()) + return invertNormalizedRanges(b); + + List> remaining = new ArrayList<>(); + Iterator> aIter = a.iterator(); + Iterator> bIter = b.iterator(); + Range aRange = aIter.hasNext() ? aIter.next() : null; + Range bRange = bIter.hasNext() ? bIter.next() : null; + while (aRange != null && bRange != null) + { + boolean aRMin = aRange.right.isMinimum(); + boolean bRMin = bRange.right.isMinimum(); + + if (aRMin && bRMin) + { + if (aRange.left.compareTo(bRange.left) < 0) + remaining.add(new Range<>(aRange.left, bRange.left)); + checkState(!aIter.hasNext() && !bIter.hasNext()); + aRange = null; + break; + } + + if (!aRMin && aRange.right.compareTo(bRange.left) <= 0) + { + remaining.add(aRange); + aRange = aIter.hasNext() ? aIter.next() : null; + } + else if (!bRMin && aRange.left.compareTo(bRange.right) >= 0) + { + bRange = bIter.hasNext() ? bIter.next() : null; + } + else + { + // Handle what remains to the left of the intersection + if (aRange.left.compareTo(bRange.left) < 0) + { + remaining.add(new Range(aRange.left, bRange.left)); + } + + // Handle what remains to the right of the intersection + if (!aRMin && (aRange.right.compareTo(bRange.right) <= 0 | bRMin)) + aRange = aIter.hasNext() ? aIter.next() : null; + else + aRange = new Range(bRange.right, aRange.right); + } + } + + while (aRange != null) + { + remaining.add(aRange); + aRange = aIter.hasNext() ? aIter.next() : null; + } + + List> result = ImmutableList.copyOf(normalize(remaining)); + if (EXPENSIVE_CHECKS) + checkState(result.equals(normalize(subtract(a, b)))); + return result; + } + + private boolean isFull() + { + return isFull(left, right); + } + + @VisibleForTesting + static > List> invertNormalizedRanges(List> ranges) + { + if (ranges.isEmpty()) + return ranges; + + List> result = new ArrayList<>(ranges.size() + 2); + T minValue = ranges.get(0).left.minValue(); + T left = minValue; + for (Range r : ranges) + { + if (!r.left.equals(left)) + { + result.add(new Range<>(left, r.left)); + } + left = r.right; + } + + // Loop doesn't add the range to the right of the last one + Range last = ranges.get(ranges.size() - 1); + if (!last.right.isMinimum()) + result.add(new Range<>(last.right, minValue)); + + result = normalize(result); + if (EXPENSIVE_CHECKS) + checkState(result.equals(normalize(subtract(ImmutableList.of(new Range<>(minValue, minValue)), ranges)))); + return result; + } + + public static > List> intersectionOfNormalizedRanges(List> a, List> b) + { + if (a.size() == 1 && a.get(0).isFull()) + return b; + if (b.size() == 1 && b.get(0).isFull()) + return a; + + List> merged = new ArrayList<>(); + PeekingIterator> aIter = Iterators.peekingIterator(a.iterator()); + PeekingIterator> bIter = Iterators.peekingIterator(b.iterator()); + while (aIter.hasNext() && bIter.hasNext()) + { + Range aRange = aIter.peek(); + Range bRange = bIter.peek(); + + int cmp = aRange.compareNormalized(bRange); + if (aRange.intersects(bRange)) + { + merged.addAll(aRange.intersectionWith(bRange)); + if (cmp == 0) + { + aIter.next(); + bIter.next(); + } + else if(cmp < 0) + { + aIter.next(); + } + else + { + bIter.next(); + } + } + else + { + if (cmp <= 0) + aIter.next(); + if (cmp >= 0) + bIter.next(); + } + } + + List> result = ImmutableList.copyOf(normalize(merged)); + + if (EXPENSIVE_CHECKS) + { + List> expensiveResult = new ArrayList<>(); + for (Range r1 : a) + { + for (Range r2 : b) + { + expensiveResult.addAll(r1.intersectionWith(r2)); + } + } + checkState(result.equals(normalize(expensiveResult))); + } + + return result; + } + @Override public boolean equals(Object o) { @@ -670,6 +937,26 @@ else if (t.compareTo(currentRange.right) <= 0 || currentRange.right.compareTo(cu } } + public static > boolean equals(Collection> a, Collection> b) + { + return normalize(a).equals(normalize(b)); + } + + // Helper to convert a range string to POJO so you can copy toString from a debugger + public static Range fromString(String value) + { + return fromString(value, DatabaseDescriptor.getPartitioner()); + } + + public static Range fromString(String value, IPartitioner partitioner) + { + TokenFactory tokenFactory = partitioner.getTokenFactory(); + String[] parts = value.split(","); + Token left = tokenFactory.fromString(parts[0].substring(1)); + Token right = tokenFactory.fromString(parts[1].substring(0, parts[1].length() -1)); + return new Range<>(left, right); + } + public static > void assertNormalized(List> ranges) { Range lastRange = null; diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailure.java b/src/java/org/apache/cassandra/exceptions/RequestFailure.java index 39700d795c3b..d2c8a2e61c56 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailure.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailure.java @@ -67,7 +67,7 @@ public static void init() {} public void serialize(RequestFailure t, DataOutputPlus out, int version) throws IOException { RequestFailureReason.serializer.serialize(t.reason, out, version); - if (version >= MessagingService.VERSION_50) + if (version >= MessagingService.VERSION_51) nullableRemoteExceptionSerializer.serialize(t.failure, out, version); } @@ -76,7 +76,7 @@ public RequestFailure deserialize(DataInputPlus in, int version) throws IOExcept { RequestFailureReason reason = RequestFailureReason.serializer.deserialize(in, version); Throwable failure = null; - if (version >= MessagingService.VERSION_50) + if (version >= MessagingService.VERSION_51) failure = nullableRemoteExceptionSerializer.deserialize(in, version); if (failure == null) return forReason(reason); @@ -88,7 +88,7 @@ public RequestFailure deserialize(DataInputPlus in, int version) throws IOExcept public long serializedSize(RequestFailure t, int version) { long size = RequestFailureReason.serializer.serializedSize(t.reason, version); - if (version >= MessagingService.VERSION_50) + if (version >= MessagingService.VERSION_51) size += nullableRemoteExceptionSerializer.serializedSize(t.failure, version); return size; } diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java b/src/java/org/apache/cassandra/locator/ReplicaLayout.java index 30a52be73ade..6b464237d04e 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java +++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java @@ -26,6 +26,7 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.FBUtilities; @@ -356,11 +357,11 @@ static EndpointsForToken resolveWriteConflictsInPending(EndpointsForToken natura * @return the read layout for a token - this includes natural replicas, i.e. those that are not pending. * They are reverse sorted by the badness score of the configured snitch */ - static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) + static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token, ReadCoordinator coordinator) { EndpointsForToken replicas = keyspace.getMetadata().params.replication.isLocal() ? forLocalStrategyToken(metadata, replicationStrategy, token) - : forNonLocalStrategyTokenRead(metadata, keyspace.getMetadata(), token); + : coordinator.forNonLocalStrategyTokenRead(metadata, keyspace.getMetadata(), token); replicas = DatabaseDescriptor.getNodeProximity().sortedByProximity(FBUtilities.getBroadcastAddressAndPort(), replicas); @@ -386,7 +387,7 @@ static EndpointsForRange forNonLocalStategyRangeRead(ClusterMetadata metadata, K return metadata.placements.get(keyspace.params.replication).reads.forRange(range.right.getToken()).get(); } - static EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) + public static EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) { return metadata.placements.get(keyspace.params.replication).reads.forToken(token).get(); } diff --git a/src/java/org/apache/cassandra/locator/ReplicaPlans.java b/src/java/org/apache/cassandra/locator/ReplicaPlans.java index 53b32797c6fb..5aef13a6d3cd 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaPlans.java +++ b/src/java/org/apache/cassandra/locator/ReplicaPlans.java @@ -63,6 +63,7 @@ import org.apache.cassandra.index.IndexStatusManager; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.service.reads.AlwaysSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; @@ -533,7 +534,7 @@ public static List sortByProximity(Collection forRead, ClusterMetadata metadata, Keyspace keyspace, ConsistencyLevel consistencyLevel, Token token, Predicate isAlive) throws UnavailableException + public static ReplicaPlan.ForWrite forReadRepair(ReplicaPlan forRead, ClusterMetadata metadata, Keyspace keyspace, ConsistencyLevel consistencyLevel, Token token, Predicate isAlive, ReadCoordinator coordinator) throws UnavailableException { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); Selector selector = writeReadRepair(forRead); @@ -550,7 +551,7 @@ public static ReplicaPlan.ForWrite forReadRepair(ReplicaPlan forRead, Clus liveAndDown.all(), live.all(), contacts, - (newClusterMetadata) -> forReadRepair(forRead, newClusterMetadata, keyspace, consistencyLevel, token, isAlive), + (newClusterMetadata) -> forReadRepair(forRead, newClusterMetadata, keyspace, consistencyLevel, token, isAlive, coordinator), metadata.epoch); } @@ -882,9 +883,10 @@ public static ReplicaPlan.ForTokenRead forRead(Keyspace keyspace, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, - SpeculativeRetryPolicy retry) + SpeculativeRetryPolicy retry, + ReadCoordinator coordinator) { - return forRead(ClusterMetadata.current(), keyspace, token, indexQueryPlan, consistencyLevel, retry, false); + return forRead(ClusterMetadata.current(), keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, false); } public static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, @@ -892,9 +894,10 @@ public static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, - SpeculativeRetryPolicy retry) + SpeculativeRetryPolicy retry, + ReadCoordinator coordinator) { - return forRead(metadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, true); + return forRead(metadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, true); } private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, @@ -903,10 +906,11 @@ private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, SpeculativeRetryPolicy retry, + ReadCoordinator coordinator, boolean throwOnInsufficientLiveReplicas) { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); - ReplicaLayout.ForTokenRead forTokenReadLiveAndDown = ReplicaLayout.forTokenReadSorted(metadata, keyspace, replicationStrategy, token); + ReplicaLayout.ForTokenRead forTokenReadLiveAndDown = ReplicaLayout.forTokenReadSorted(metadata, keyspace, replicationStrategy, token, coordinator); ReplicaLayout.ForTokenRead forTokenReadLive = forTokenReadLiveAndDown.filter(FailureDetector.isReplicaAlive); EndpointsForToken candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forTokenReadLive.all()); EndpointsForToken contacts = contactForRead(metadata.locator, replicationStrategy, consistencyLevel, retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE), candidates); @@ -915,8 +919,8 @@ private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, assureSufficientLiveReplicasForRead(metadata.locator, replicationStrategy, consistencyLevel, contacts); return new ReplicaPlan.ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, forTokenReadLiveAndDown.all(), - (newClusterMetadata) -> forRead(newClusterMetadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, false), - (self) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive), + (newClusterMetadata) -> forRead(newClusterMetadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, false), + (self) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, coordinator), metadata.epoch); } @@ -962,7 +966,7 @@ public static ReplicaPlan.ForRangeRead forRangeRead(ClusterMetadata metadata, forRangeReadLiveAndDown.all(), vnodeCount, (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, indexQueryPlan, consistencyLevel, range, vnodeCount, false), - (self, token) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive), + (self, token) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT), metadata.epoch); } @@ -1041,7 +1045,7 @@ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, (self, token) -> { // It might happen that the ring has moved forward since the operation has started, but because we'll be recomputing a quorum // after the operation is complete, we will catch inconsistencies either way. - return forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive); + return forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT); }, left.epoch); } diff --git a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java index a9d1f28c47be..33f7e8f20100 100644 --- a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java @@ -19,6 +19,7 @@ package org.apache.cassandra.metrics; import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; @@ -26,11 +27,27 @@ public class AccordClientRequestMetrics extends ClientRequestMetrics { public final Histogram keySize; + // During migration back to Paxos it's possible a transaction runs + // in an Epoch where Accord is no longer accepting transactions + // and we still run it to completion, but we do skip the read from Cassandra + // although it would be harmless. This should only occur briefly when coordinators + // start transactions on the wrong protocol due to temporarily out of data cluster metadata. + public final Meter migrationSkippedReads; + + // Number of times a key had to be run through PaxosRepair for migration to Accord + public final Meter paxosKeyMigrations; + + // Number of times a query was rejected by Accord in TxnQuery due to a migration back to Paxos + public final Meter accordMigrationRejects; + public AccordClientRequestMetrics(String scope) { super(scope); keySize = Metrics.histogram(factory.createMetricName("KeySizeHistogram"), false); + migrationSkippedReads = Metrics.meter(factory.createMetricName("MigrationSkippedReads")); + paxosKeyMigrations = Metrics.meter(factory.createMetricName("PaxosKeyMigrations")); + accordMigrationRejects = Metrics.meter(factory.createMetricName("AccordMigrationRejects")); } @Override @@ -38,5 +55,8 @@ public void release() { super.release(); Metrics.remove(factory.createMetricName("KeySizeHistogram")); + Metrics.remove(factory.createMetricName("MigrationSkippedReads")); + Metrics.remove(factory.createMetricName("PaxosKeyMigrations")); + Metrics.remove(factory.createMetricName("AccordMigrationRejects")); } } diff --git a/src/java/org/apache/cassandra/metrics/CASClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/CASClientRequestMetrics.java index 654bb059d16e..f3ae6c89248f 100644 --- a/src/java/org/apache/cassandra/metrics/CASClientRequestMetrics.java +++ b/src/java/org/apache/cassandra/metrics/CASClientRequestMetrics.java @@ -29,6 +29,12 @@ public class CASClientRequestMetrics extends ClientRequestMetrics public final Histogram contention; public final Counter unfinishedCommit; public final Meter unknownResult; + // CAS request rejected after Prepare/Promise due to migration from Paxos to Accord + public final Meter beginMigrationRejects; + // Number of times a CAS request was rejected after Propose/Accept due to migration from Paxos to Accord + public final Meter acceptMigrationRejects; + // Number of times a key was migrated from Accord to Paxos + public final Meter accordKeyMigrations; public CASClientRequestMetrics(String scope) { @@ -36,6 +42,9 @@ public CASClientRequestMetrics(String scope) contention = Metrics.histogram(factory.createMetricName("ContentionHistogram"), false); unfinishedCommit = Metrics.counter(factory.createMetricName("UnfinishedCommit")); unknownResult = Metrics.meter(factory.createMetricName("UnknownResult")); + beginMigrationRejects = Metrics.meter(factory.createMetricName("PaxosBeginMigrationRejects")); + acceptMigrationRejects = Metrics.meter(factory.createMetricName("PaxosAcceptMigrationRejects")); + accordKeyMigrations = Metrics.meter(factory.createMetricName("AccordKeyMigrations")); } public void release() @@ -44,5 +53,8 @@ public void release() Metrics.remove(factory.createMetricName("ContentionHistogram")); Metrics.remove(factory.createMetricName("UnfinishedCommit")); Metrics.remove(factory.createMetricName("UnknownResult")); + Metrics.remove(factory.createMetricName("PaxosBeginMigrationRejects")); + Metrics.remove(factory.createMetricName("PaxosAcceptMigrationRejects")); + Metrics.remove(factory.createMetricName("AccordKeyMigrations")); } } diff --git a/src/java/org/apache/cassandra/metrics/ClientRequestsMetricsHolder.java b/src/java/org/apache/cassandra/metrics/ClientRequestsMetricsHolder.java index 26f2913263e6..05d17a0fda95 100644 --- a/src/java/org/apache/cassandra/metrics/ClientRequestsMetricsHolder.java +++ b/src/java/org/apache/cassandra/metrics/ClientRequestsMetricsHolder.java @@ -29,6 +29,8 @@ public final class ClientRequestsMetricsHolder public static final CASClientWriteRequestMetrics casWriteMetrics = new CASClientWriteRequestMetrics("CASWrite"); public static final CASClientRequestMetrics casReadMetrics = new CASClientRequestMetrics("CASRead"); public static final ViewWriteMetrics viewWriteMetrics = new ViewWriteMetrics("ViewWrite"); + public static final AccordClientRequestMetrics accordReadMetrics = new AccordClientRequestMetrics("AccordRead"); + public static final AccordClientRequestMetrics accordWriteMetrics = new AccordClientRequestMetrics("AccordWrite"); public static final Map readMetricsMap = new EnumMap<>(ConsistencyLevel.class); public static final Map writeMetricsMap = new EnumMap<>(ConsistencyLevel.class); diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index e603381affa6..c290c321f313 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -101,6 +101,12 @@ public class KeyspaceMetrics public final LatencyMetrics casPropose; /** CAS Commit metrics */ public final LatencyMetrics casCommit; + /** Latency for locally run key migrations **/ + public final LatencyMetrics keyMigration; + /** Latency for range migrations run by locally coordinated Accord repairs **/ + public final LatencyMetrics rangeMigration; + public final Meter rangeMigrationUnexpectedFailures; + public final Meter rangeMigrationDependencyLimitFailures; /** Writes failed ideal consistency **/ public final Counter writeFailedIdealCL; /** Ideal CL write latency metrics */ @@ -247,6 +253,10 @@ public KeyspaceMetrics(final Keyspace ks) casPrepare = createLatencyMetrics("CasPrepare"); casPropose = createLatencyMetrics("CasPropose"); casCommit = createLatencyMetrics("CasCommit"); + keyMigration = createLatencyMetrics("KeyMigration"); + rangeMigration = createLatencyMetrics("RangeMigration"); + rangeMigrationUnexpectedFailures = createKeyspaceMeter("RangeMigrationUnexpectedFailures"); + rangeMigrationDependencyLimitFailures = createKeyspaceMeter("RangeMigratingDependencyLimitFailures"); writeFailedIdealCL = createKeyspaceCounter("WriteFailedIdealCL"); idealCLWriteLatency = createLatencyMetrics("IdealCLWrite"); diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index fd3a6bac2e75..3729fe607486 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -87,6 +87,8 @@ public class TableMetrics public final static LatencyMetrics GLOBAL_READ_LATENCY = new LatencyMetrics(GLOBAL_FACTORY, GLOBAL_ALIAS_FACTORY, "Read"); public final static LatencyMetrics GLOBAL_WRITE_LATENCY = new LatencyMetrics(GLOBAL_FACTORY, GLOBAL_ALIAS_FACTORY, "Write"); public final static LatencyMetrics GLOBAL_RANGE_LATENCY = new LatencyMetrics(GLOBAL_FACTORY, GLOBAL_ALIAS_FACTORY, "Range"); + public final static LatencyMetrics GLOBAL_KEY_MIGRATION_LATENCY = new LatencyMetrics(GLOBAL_FACTORY, GLOBAL_ALIAS_FACTORY, "KeyMigration"); + public final static LatencyMetrics GLOBAL_RANGE_MIGRATION_LATENCY = new LatencyMetrics(GLOBAL_FACTORY, GLOBAL_ALIAS_FACTORY, "RangeMigration"); /** Total amount of data stored in the memtable that resides on-heap, including column related overhead and partitions overwritten. */ public final Gauge memtableOnHeapDataSize; @@ -188,6 +190,12 @@ public class TableMetrics public final LatencyMetrics casPropose; /** CAS Commit metrics */ public final LatencyMetrics casCommit; + /** Latency for locally run key migrations **/ + public final LatencyMetrics keyMigration; + /** Latency for range migrations run by locally coordinated Accord repairs **/ + public final LatencyMetrics rangeMigration; + public final TableMeter rangeMigrationUnexpectedFailures; + public final TableMeter rangeMigrationDependencyLimitFailures; /** percent of the data that is repaired */ public final Gauge percentRepaired; /** Reports the size of sstables in repaired, unrepaired, and any ongoing repair buckets */ @@ -624,6 +632,7 @@ public Long getValue() readLatency = createLatencyMetrics("Read", cfs.keyspace.metric.readLatency, GLOBAL_READ_LATENCY); writeLatency = createLatencyMetrics("Write", cfs.keyspace.metric.writeLatency, GLOBAL_WRITE_LATENCY); rangeLatency = createLatencyMetrics("Range", cfs.keyspace.metric.rangeLatency, GLOBAL_RANGE_LATENCY); + pendingFlushes = createTableCounter("PendingFlushes"); bytesFlushed = createTableCounter("BytesFlushed"); flushSizeOnDisk = ExpMovingAverage.decayBy1000(); @@ -804,6 +813,10 @@ public Long getValue() casPrepare = createLatencyMetrics("CasPrepare", cfs.keyspace.metric.casPrepare); casPropose = createLatencyMetrics("CasPropose", cfs.keyspace.metric.casPropose); casCommit = createLatencyMetrics("CasCommit", cfs.keyspace.metric.casCommit); + keyMigration = createLatencyMetrics("KeyMigration", cfs.keyspace.metric.keyMigration, GLOBAL_KEY_MIGRATION_LATENCY); + rangeMigration = createLatencyMetrics("RangeMigration", cfs.keyspace.metric.rangeMigration, GLOBAL_RANGE_MIGRATION_LATENCY); + rangeMigrationUnexpectedFailures = createTableMeter("RangeMigrationUnexpectedFailures", cfs.keyspace.metric.rangeMigrationUnexpectedFailures); + rangeMigrationDependencyLimitFailures = createTableMeter("RangeMigrationDependencyLimitFaiures", cfs.keyspace.metric.rangeMigrationDependencyLimitFailures); repairsStarted = createTableCounter("RepairJobsStarted"); repairsCompleted = createTableCounter("RepairJobsCompleted"); diff --git a/src/java/org/apache/cassandra/net/Message.java b/src/java/org/apache/cassandra/net/Message.java index 146f8c1119fb..57359cc039e0 100644 --- a/src/java/org/apache/cassandra/net/Message.java +++ b/src/java/org/apache/cassandra/net/Message.java @@ -303,6 +303,7 @@ public static Message internalResponse(Verb verb, T payload) * Used by the {@code MultiRangeReadCommand} to split multi-range responses from a replica * into single-range responses. */ + @VisibleForTesting public static Message remoteResponse(InetAddressAndPort from, Verb verb, T payload) { assert verb.isResponse(); @@ -574,6 +575,11 @@ boolean trackWarnings() return MessageFlag.TRACK_WARNINGS.isIn(flags); } + boolean isFinal() + { + return !MessageFlag.NOT_FINAL.isIn(flags); + } + @Nullable ForwardingInfo forwardTo() { diff --git a/src/java/org/apache/cassandra/net/MessageFlag.java b/src/java/org/apache/cassandra/net/MessageFlag.java index 1c2db557c340..4c5762f9796e 100644 --- a/src/java/org/apache/cassandra/net/MessageFlag.java +++ b/src/java/org/apache/cassandra/net/MessageFlag.java @@ -31,7 +31,10 @@ public enum MessageFlag /** allow creating warnings or aborting queries based off query - see CASSANDRA-16850 */ TRACK_WARNINGS(2), /** whether this message should be sent on an URGENT channel despite its Verb default priority */ - URGENT(3); + URGENT(3), + /** Allow a single callback to receive multiple responses until a final response is received **/ + NOT_FINAL(4) + ; private final int id; diff --git a/src/java/org/apache/cassandra/net/MessagingService.java b/src/java/org/apache/cassandra/net/MessagingService.java index 12d3f17cd4a0..2457f2dbcea2 100644 --- a/src/java/org/apache/cassandra/net/MessagingService.java +++ b/src/java/org/apache/cassandra/net/MessagingService.java @@ -500,9 +500,9 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailure failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { - future.setFailure(new RuntimeException(failureReason.toString())); + future.setFailure(new RuntimeException(failure.toString())); } }); diff --git a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java index 6cecd2a415da..f89362715b74 100644 --- a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java +++ b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java @@ -58,7 +58,11 @@ class ResponseVerbHandler implements IVerbHandler @Override public void doVerb(Message message) { - RequestCallbacks.CallbackInfo callbackInfo = MessagingService.instance().callbacks.remove(message.id(), message.from()); + RequestCallbacks.CallbackInfo callbackInfo; + if (message.header.isFinal()) + callbackInfo = MessagingService.instance().callbacks.remove(message.id(), message.from()); + else + callbackInfo = MessagingService.instance().callbacks.get(message.id(), message.from()); if (callbackInfo == null) { String msg = "Callback already removed for {} (from {})"; diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 54849389f078..97e77eccbbff 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -81,6 +81,10 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordSyncPropagator; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; +import org.apache.cassandra.service.accord.interop.AccordInteropApply; +import org.apache.cassandra.service.accord.interop.AccordInteropCommit; +import org.apache.cassandra.service.accord.interop.AccordInteropRead; +import org.apache.cassandra.service.accord.interop.AccordInteropReadRepair; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; @@ -98,6 +102,8 @@ import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.service.accord.serializers.WaitOnCommitSerializer; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.ConsensusKeyMigrationFinished; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.Commit.Agreed; import org.apache.cassandra.service.paxos.PaxosCommit; @@ -260,7 +266,7 @@ public enum Verb PAXOS2_PREPARE_REQ (40, P2, writeTimeout, MUTATION, () -> PaxosPrepare.requestSerializer, () -> PaxosPrepare.requestHandler, PAXOS2_PREPARE_RSP ), PAXOS2_PREPARE_REFRESH_RSP (51, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepareRefresh.responseSerializer, RESPONSE_HANDLER ), PAXOS2_PREPARE_REFRESH_REQ (41, P2, writeTimeout, MUTATION, () -> PaxosPrepareRefresh.requestSerializer, () -> PaxosPrepareRefresh.requestHandler, PAXOS2_PREPARE_REFRESH_RSP ), - PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.responseSerializer, RESPONSE_HANDLER ), + PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.ACCEPT_RESULT_SERIALIZER, RESPONSE_HANDLER ), PAXOS2_PROPOSE_REQ (42, P2, writeTimeout, MUTATION, () -> PaxosPropose.requestSerializer, () -> PaxosPropose.requestHandler, PAXOS2_PROPOSE_RSP ), PAXOS2_COMMIT_AND_PREPARE_RSP (53, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, RESPONSE_HANDLER ), PAXOS2_COMMIT_AND_PREPARE_REQ (43, P2, writeTimeout, MUTATION, () -> PaxosCommitAndPrepare.requestSerializer, () -> PaxosCommitAndPrepare.requestHandler, PAXOS2_COMMIT_AND_PREPARE_RSP ), @@ -300,40 +306,51 @@ public enum Verb // accord ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_RSP (121, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_REQ (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), - ACCORD_ACCEPT_RSP (124, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_ACCEPT_REQ (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), - ACCORD_ACCEPT_INVALIDATE_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), - ACCORD_READ_RSP (126, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), - ACCORD_READ_REQ (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), - ACCORD_APPLY_RSP (130, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), - ACCORD_BEGIN_RECOVER_RSP (132, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_RECOVER_REQ (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), - ACCORD_BEGIN_INVALIDATE_RSP (134, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_INVALIDATE_REQ (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), + ACCORD_PRE_ACCEPT_RSP (120, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_PRE_ACCEPT_REQ (121, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), + ACCORD_ACCEPT_RSP (122, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_ACCEPT_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (124, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), + ACCORD_READ_RSP (125, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), + ACCORD_READ_REQ (126, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), + ACCORD_APPLY_RSP (129, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), + ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), + ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), + ACCORD_BEGIN_RECOVER_REQ (132, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), + ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), + ACCORD_BEGIN_INVALIDATE_REQ (134, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), - ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_WAIT_ON_COMMIT_RSP ), - ACCORD_WAIT_ON_APPLY_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitOnApply, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_CHECK_STATUS_RSP (142, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CHECK_STATUS_REQ (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), - ACCORD_GET_DEPS_RSP (144, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_DEPS_REQ (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), - ACCORD_FETCH_DATA_RSP (146, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), - ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_QUERY_DURABLE_BEFORE_RSP (150, P2, writeTimeout, REQUEST_RESPONSE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), - ACCORD_QUERY_DURABLE_BEFORE_REQ (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP), + ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_WAIT_ON_COMMIT_RSP ), + ACCORD_WAIT_UNTIL_APPLIED_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), + ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), + ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), + ACCORD_FETCH_DATA_RSP (145, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_REQ (146, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), + ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, REQUEST_RESPONSE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), + ACCORD_QUERY_DURABLE_BEFORE_REQ (150, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP ), ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), + ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ(152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData,() -> AccordSyncPropagator.verbHandler, ACCORD_READ_RSP), + + CONSENSUS_KEY_MIGRATION (153, P1, writeTimeout, MUTATION, () -> ConsensusKeyMigrationFinished.serializer,() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), + + ACCORD_INTEROP_READ_RSP (154, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.replySerializer, RESPONSE_HANDLER), + ACCORD_INTEROP_READ_REQ (155, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.requestSerializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_COMMIT_REQ (156, P2, writeTimeout, IMMEDIATE, () -> AccordInteropCommit.serializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_READ_REPAIR_RSP (157, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.replySerializer, RESPONSE_HANDLER), + ACCORD_INTEROP_READ_REPAIR_REQ (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_REPAIR_RSP), + ACCORD_INTEROP_APPLY_REQ (160, P2, writeTimeout, IMMEDIATE, () -> AccordInteropApply.serializer, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP), + // generic failure response FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailure.serializer, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/repair/AbstractRepairJob.java b/src/java/org/apache/cassandra/repair/AbstractRepairJob.java new file mode 100644 index 000000000000..7c4346e3bf5e --- /dev/null +++ b/src/java/org/apache/cassandra/repair/AbstractRepairJob.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair; + +import java.util.concurrent.Executor; +import javax.annotation.Nullable; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.repair.state.JobState; +import org.apache.cassandra.utils.concurrent.AsyncFuture; + +public abstract class AbstractRepairJob extends AsyncFuture implements Runnable +{ + private final SharedContext ctx; + public final JobState state; + protected final RepairJobDesc desc; + protected final RepairSession session; + protected final Executor taskExecutor; + + protected final Keyspace ks; + protected final ColumnFamilyStore cfs; + + /** + * Create repair job to run on specific columnfamily + * @param session RepairSession that this RepairJob belongs + * @param columnFamily name of the ColumnFamily to repair + */ + public AbstractRepairJob(RepairSession session, String columnFamily) + { + this.ctx = session.ctx; + this.session = session; + this.taskExecutor = session.taskExecutor; + this.desc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, columnFamily, session.state.commonRange.ranges); + this.state = new JobState(ctx.clock(), desc, session.state.commonRange.endpoints); + this.ks = Keyspace.open(desc.keyspace); + this.cfs = ks.getColumnFamilyStore(columnFamily); + } + + public void run() + { + state.phase.start(); + cfs.metric.repairsStarted.inc(); + runRepair(); + } + + abstract protected void runRepair(); + + abstract void abort(@Nullable Throwable reason); +} diff --git a/src/java/org/apache/cassandra/repair/AbstractRepairTask.java b/src/java/org/apache/cassandra/repair/AbstractRepairTask.java index f27e72deb177..e6ba28aee61a 100644 --- a/src/java/org/apache/cassandra/repair/AbstractRepairTask.java +++ b/src/java/org/apache/cassandra/repair/AbstractRepairTask.java @@ -24,7 +24,6 @@ import com.google.common.collect.Lists; import com.google.common.util.concurrent.FutureCallback; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,6 +58,7 @@ private List submitRepairSessions(TimeUUID parentSession, ExecutorPlus executor, Scheduler validationScheduler, List commonRanges, + boolean excludedDeadNodes, String... cfnames) { List futures = new ArrayList<>(options.getRanges().size()); @@ -68,6 +68,7 @@ private List submitRepairSessions(TimeUUID parentSession, logger.info("Starting RepairSession for {}", commonRange); RepairSession session = coordinator.ctx.repair().submitRepairSession(parentSession, commonRange, + excludedDeadNodes, keyspace, options.getParallelism(), isIncremental, @@ -77,6 +78,7 @@ private List submitRepairSessions(TimeUUID parentSession, options.repairPaxos(), options.paxosOnly(), options.dontPurgeTombstones(), + options.accordRepair(), executor, validationScheduler, cfnames); @@ -93,9 +95,10 @@ protected Future runRepair(TimeUUID parentSession, ExecutorPlus executor, Scheduler validationScheduler, List commonRanges, + boolean excludedDeadNodes, String... cfnames) { - List allSessions = submitRepairSessions(parentSession, isIncremental, executor, validationScheduler, commonRanges, cfnames); + List allSessions = submitRepairSessions(parentSession, isIncremental, executor, validationScheduler, commonRanges, excludedDeadNodes, cfnames); List>> ranges = Lists.transform(allSessions, RepairSession::ranges); Future> f = FutureCombiner.successfulOf(allSessions); return f.map(results -> { diff --git a/src/java/org/apache/cassandra/repair/AccordRepairJob.java b/src/java/org/apache/cassandra/repair/AccordRepairJob.java new file mode 100644 index 000000000000..1736d799fa7b --- /dev/null +++ b/src/java/org/apache/cassandra/repair/AccordRepairJob.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair; + +import java.math.BigInteger; +import java.util.List; +import javax.annotation.Nullable; + +import accord.api.BarrierType; +import accord.api.RoutingKey; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import org.apache.cassandra.dht.AccordSplitter; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; + +import static com.google.common.base.Preconditions.checkState; +import static java.util.Collections.emptyList; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/* + * Accord repair consists of creating a barrier transaction for all the ranges which ensure that all Accord transactions + * before the Epoch and point in time at which the repair started have their side effects visible to Paxos and regular quorum reads. + */ +public class AccordRepairJob extends AbstractRepairJob +{ + public static final BigInteger TWO = BigInteger.valueOf(2); + + private final Ranges ranges; + + private final AccordSplitter splitter; + + private BigInteger rangeStep; + + private Epoch minEpoch = ClusterMetadata.current().epoch; + + public AccordRepairJob(RepairSession repairSession, String cfname) + { + super(repairSession, cfname); + List> normalizedRanges = Range.normalize(desc.ranges); + IPartitioner partitioner = normalizedRanges.get(0).left.getPartitioner(); + TokenRange[] tokenRanges = new TokenRange[normalizedRanges.size()]; + for (int i = 0; i < normalizedRanges.size(); i++) + tokenRanges[i] = new TokenRange(new TokenKey(ks.getName(), normalizedRanges.get(i).left), new TokenKey(ks.getName(), normalizedRanges.get(i).right)); + this.ranges = Ranges.of(tokenRanges); + this.splitter = partitioner.accordSplitter().apply(Ranges.of(tokenRanges)); + } + + @Override + protected void runRepair() + { + try + { + for (accord.primitives.Range range : ranges) + repairRange((TokenRange)range); + state.phase.success(); + cfs.metric.repairsCompleted.inc(); + trySuccess(new RepairResult(desc, emptyList(), ConsensusMigrationRepairResult.fromAccordRepair(minEpoch))); + } + catch (Throwable t) + { + state.phase.fail(t); + cfs.metric.repairsCompleted.inc(); + tryFailure(t); + } + } + + @Override + void abort(@Nullable Throwable reason) + { + throw new UnsupportedOperationException("Have not implemented this yet, and the job runs synchronously so it isn't abortable"); + } + + private void repairRange(TokenRange range) + { + RoutingKey remainingStart = range.start(); + BigInteger rangeSize = splitter.sizeOf(range); + if (rangeStep == null) + rangeStep = BigInteger.ONE.max(splitter.divide(rangeSize, 1000)); + + BigInteger offset = BigInteger.ZERO; + + TokenRange lastRepaired = null; + int iteration = 0; + while (true) + { + iteration++; + if (iteration % 100 == 0) + rangeStep = rangeStep.multiply(TWO); + + BigInteger remaining = rangeSize.subtract(offset); + BigInteger length = remaining.min(rangeStep); + + long start = nanoTime(); + boolean dependencyOverflow = false; + try + { + // Splitter is approximate so it can't work right up to the end + TokenRange toRepair; + if (splitter.compare(offset, rangeSize) >= 0) + { + if (remainingStart.equals(range.end())) + return; + + // Final repair is whatever remains + toRepair = range.newRange(remainingStart, range.end()); + } + else + { + toRepair = splitter.subRange(range, offset, splitter.add(offset, length)); + checkState(iteration > 1 || toRepair.start().equals(range.start())); + } + checkState(!toRepair.equals(lastRepaired), "Shouldn't repair the same range twice"); + checkState(lastRepaired == null || toRepair.start().equals(lastRepaired.end()), "Next range should directly follow previous range"); + lastRepaired = toRepair; + AccordService.instance().barrierWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false); + remainingStart = toRepair.end(); + } + catch (RuntimeException e) + { + // TODO Placeholder for dependency limit overflow +// dependencyOverflow = true; + cfs.metric.rangeMigrationDependencyLimitFailures.mark(); + throw e; + } + catch (Throwable t) + { + // unexpected error + cfs.metric.rangeMigrationUnexpectedFailures.mark(); + throw new RuntimeException(t); + } + finally + { + cfs.metric.rangeMigration.addNano(start); + } + + // TODO when dependency limits are added to Accord need to test repair overflow + if (dependencyOverflow) + { + offset = offset.subtract(rangeStep); + if (rangeStep.equals(BigInteger.ONE)) + throw new IllegalStateException("Unable to repair without overflowing with range step of 1"); + rangeStep = BigInteger.ONE.max(rangeStep.divide(TWO)); + continue; + } + + offset = offset.add(length); + } + } +} diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java similarity index 95% rename from src/java/org/apache/cassandra/repair/RepairJob.java rename to src/java/org/apache/cassandra/repair/CassandraRepairJob.java index 7b60df94160c..95a55cd4d5d4 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java @@ -17,7 +17,14 @@ */ package org.apache.cassandra.repair; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.Executor; import java.util.function.Function; @@ -28,18 +35,11 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; -import com.google.common.util.concurrent.*; - -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.repair.state.JobState; -import org.apache.cassandra.utils.concurrent.AsyncFuture; +import com.google.common.util.concurrent.FutureCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.InetAddressAndPort; @@ -47,9 +47,15 @@ import org.apache.cassandra.repair.asymmetric.HostDifferences; import org.apache.cassandra.repair.asymmetric.PreferedNodeFilter; import org.apache.cassandra.repair.asymmetric.ReduceHelper; +import org.apache.cassandra.repair.state.JobState; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTrees; @@ -65,9 +71,9 @@ /** * RepairJob runs repair on given ColumnFamily. */ -public class RepairJob extends AsyncFuture implements Runnable +public class CassandraRepairJob extends AbstractRepairJob { - private static final Logger logger = LoggerFactory.getLogger(RepairJob.class); + private static final Logger logger = LoggerFactory.getLogger(CassandraRepairJob.class); private final SharedContext ctx; public final JobState state; @@ -87,8 +93,9 @@ public class RepairJob extends AsyncFuture implements Runnable * @param session RepairSession that this RepairJob belongs * @param columnFamily name of the ColumnFamily to repair */ - public RepairJob(RepairSession session, String columnFamily) + public CassandraRepairJob(RepairSession session, String columnFamily) { + super(session, columnFamily); this.ctx = session.ctx; this.session = session; this.taskExecutor = session.taskExecutor; @@ -116,17 +123,16 @@ public long getNowInSeconds() * This sets up necessary task and runs them on given {@code taskExecutor}. * After submitting all tasks, waits until validation with replica completes. */ - public void run() + @Override + protected void runRepair() { - state.phase.start(); - Keyspace ks = Keyspace.open(desc.keyspace); - ColumnFamilyStore cfs = ks.getColumnFamilyStore(desc.columnFamily); - cfs.metric.repairsStarted.inc(); List allEndpoints = new ArrayList<>(session.state.commonRange.endpoints); allEndpoints.add(ctx.broadcastAddressAndPort()); Future paxosRepair; - if (paxosRepairEnabled() && (((useV2() || isMetadataKeyspace()) && session.repairPaxos) || session.paxosOnly)) + Epoch repairStartingEpoch = ClusterMetadata.current().epoch; + boolean doPaxosRepair = paxosRepairEnabled() && (((useV2() || isMetadataKeyspace()) && session.repairPaxos) || session.paxosOnly); + if (doPaxosRepair) { logger.info("{} {}.{} starting paxos repair", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); TableMetadata metadata = Schema.instance.getTableMetadata(desc.keyspace, desc.columnFamily); @@ -142,10 +148,10 @@ public void run() { paxosRepair.addCallback(new FutureCallback<>() { - public void onSuccess(Void v) + public void onSuccess(Void ignored) { logger.info("{} {}.{} paxos repair completed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); - trySuccess(new RepairResult(desc, Collections.emptyList())); + trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromCassandraRepair(repairStartingEpoch, false))); } /** @@ -211,7 +217,8 @@ public void onSuccess(List stats) SystemDistributedKeyspace.successfulRepairJob(session.getId(), desc.keyspace, desc.columnFamily); } cfs.metric.repairsCompleted.inc(); - trySuccess(new RepairResult(desc, stats)); + logger.info("Completing repair with excludedDeadNodes {}", session.excludedDeadNodes); + trySuccess(new RepairResult(desc, stats, ConsensusMigrationRepairResult.fromCassandraRepair(repairStartingEpoch, doPaxosRepair && !session.excludedDeadNodes))); } /** diff --git a/src/java/org/apache/cassandra/repair/IncrementalRepairTask.java b/src/java/org/apache/cassandra/repair/IncrementalRepairTask.java index 347846cf6b4e..956973c49418 100644 --- a/src/java/org/apache/cassandra/repair/IncrementalRepairTask.java +++ b/src/java/org/apache/cassandra/repair/IncrementalRepairTask.java @@ -64,7 +64,7 @@ public Future performUnsafe(ExecutorPlus executor, Sche CoordinatorSession coordinatorSession = coordinator.ctx.repair().consistent.coordinated.registerSession(parentSession, allParticipants, neighborsAndRanges.shouldExcludeDeadParticipants); - return coordinatorSession.execute(() -> runRepair(parentSession, true, executor, validationScheduler, allRanges, cfnames)); + return coordinatorSession.execute(() -> runRepair(parentSession, true, executor, validationScheduler, allRanges, neighborsAndRanges.shouldExcludeDeadParticipants, cfnames)); } } diff --git a/src/java/org/apache/cassandra/repair/NormalRepairTask.java b/src/java/org/apache/cassandra/repair/NormalRepairTask.java index e304280c5822..05b721d17c61 100644 --- a/src/java/org/apache/cassandra/repair/NormalRepairTask.java +++ b/src/java/org/apache/cassandra/repair/NormalRepairTask.java @@ -27,16 +27,19 @@ public class NormalRepairTask extends AbstractRepairTask { private final TimeUUID parentSession; private final List commonRanges; + private final boolean excludedDeadNodes; private final String[] cfnames; protected NormalRepairTask(RepairCoordinator coordinator, TimeUUID parentSession, List commonRanges, + boolean excludedDeadNodes, String[] cfnames) { super(coordinator); this.parentSession = parentSession; this.commonRanges = commonRanges; + this.excludedDeadNodes = excludedDeadNodes; this.cfnames = cfnames; } @@ -49,6 +52,6 @@ public String name() @Override public Future performUnsafe(ExecutorPlus executor, Scheduler validationScheduler) { - return runRepair(parentSession, false, executor, validationScheduler, commonRanges, cfnames); + return runRepair(parentSession, false, executor, validationScheduler, commonRanges, excludedDeadNodes, cfnames); } } diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 95c7a63f9466..872199156ee5 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -42,14 +42,16 @@ public class PreviewRepairTask extends AbstractRepairTask { private final TimeUUID parentSession; private final List commonRanges; + private final boolean excludedDeadNodes; private final String[] cfnames; private volatile String successMessage = name() + " completed successfully"; - protected PreviewRepairTask(RepairCoordinator coordinator, TimeUUID parentSession, List commonRanges, String[] cfnames) + protected PreviewRepairTask(RepairCoordinator coordinator, TimeUUID parentSession, List commonRanges, boolean excludedDeadNodes, String[] cfnames) { super(coordinator); this.parentSession = parentSession; this.commonRanges = commonRanges; + this.excludedDeadNodes = excludedDeadNodes; this.cfnames = cfnames; } @@ -68,7 +70,7 @@ public String successMessage() @Override public Future performUnsafe(ExecutorPlus executor, Scheduler validationScheduler) { - Future f = runRepair(parentSession, false, executor, validationScheduler, commonRanges, cfnames); + Future f = runRepair(parentSession, false, executor, validationScheduler, commonRanges, excludedDeadNodes, cfnames); return f.map(result -> { if (result.hasFailed()) return result; diff --git a/src/java/org/apache/cassandra/repair/RepairCoordinator.java b/src/java/org/apache/cassandra/repair/RepairCoordinator.java index c4b7a9fd55c9..79159c9fe10e 100644 --- a/src/java/org/apache/cassandra/repair/RepairCoordinator.java +++ b/src/java/org/apache/cassandra/repair/RepairCoordinator.java @@ -478,7 +478,7 @@ private Future>> repair(String[] RepairTask task; if (state.options.isPreview()) { - task = new PreviewRepairTask(this, state.id, neighborsAndRanges.filterCommonRanges(state.keyspace, cfnames), cfnames); + task = new PreviewRepairTask(this, state.id, neighborsAndRanges.filterCommonRanges(state.keyspace, cfnames), neighborsAndRanges.shouldExcludeDeadParticipants, cfnames); } else if (state.options.isIncremental()) { @@ -486,7 +486,7 @@ else if (state.options.isIncremental()) } else { - task = new NormalRepairTask(this, state.id, neighborsAndRanges.filterCommonRanges(state.keyspace, cfnames), cfnames); + task = new NormalRepairTask(this, state.id, neighborsAndRanges.filterCommonRanges(state.keyspace, cfnames), neighborsAndRanges.shouldExcludeDeadParticipants, cfnames); } ExecutorPlus executor = createExecutor(); diff --git a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java index f7771260195a..5f69da549d2d 100644 --- a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java +++ b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java @@ -17,7 +17,8 @@ */ package org.apache.cassandra.repair; -import java.util.*; +import java.util.ArrayList; +import java.util.List; import java.util.function.BiFunction; import java.util.function.Function; @@ -28,7 +29,14 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; -import org.apache.cassandra.repair.messages.*; +import org.apache.cassandra.repair.messages.CleanupMessage; +import org.apache.cassandra.repair.messages.FailSession; +import org.apache.cassandra.repair.messages.PrepareMessage; +import org.apache.cassandra.repair.messages.RepairMessage; +import org.apache.cassandra.repair.messages.StatusRequest; +import org.apache.cassandra.repair.messages.StatusResponse; +import org.apache.cassandra.repair.messages.SyncRequest; +import org.apache.cassandra.repair.messages.ValidationRequest; import org.apache.cassandra.repair.state.AbstractCompletable; import org.apache.cassandra.repair.state.AbstractState; import org.apache.cassandra.repair.state.Completable; @@ -39,6 +47,7 @@ import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.TimeUUID; @@ -86,6 +95,7 @@ private PreviewKind previewKind(TimeUUID sessionID) throws NoSuchRepairSessionEx public void doVerb(final Message message) { + ClusterMetadataService.instance().fetchLogFromCMS(message.epoch()); // TODO add cancel/interrupt message RepairJobDesc desc = message.payload.desc; try diff --git a/src/java/org/apache/cassandra/repair/RepairResult.java b/src/java/org/apache/cassandra/repair/RepairResult.java index 333b48ad33e7..4899448c7196 100644 --- a/src/java/org/apache/cassandra/repair/RepairResult.java +++ b/src/java/org/apache/cassandra/repair/RepairResult.java @@ -19,6 +19,8 @@ import java.util.List; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; + /** * RepairJob's result */ @@ -26,10 +28,12 @@ public class RepairResult { public final RepairJobDesc desc; public final List stats; + public final ConsensusMigrationRepairResult consensusMigrationRepairResult; - public RepairResult(RepairJobDesc desc, List stats) + public RepairResult(RepairJobDesc desc, List stats, ConsensusMigrationRepairResult consensusMigrationRepairResult) { this.desc = desc; this.stats = stats; + this.consensusMigrationRepairResult = consensusMigrationRepairResult; } } diff --git a/src/java/org/apache/cassandra/repair/RepairSession.java b/src/java/org/apache/cassandra/repair/RepairSession.java index 92d56390fe3a..ae46877a6b39 100644 --- a/src/java/org/apache/cassandra/repair/RepairSession.java +++ b/src/java/org/apache/cassandra/repair/RepairSession.java @@ -30,13 +30,11 @@ import java.util.concurrent.Executor; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicBoolean; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; -import com.google.common.util.concurrent.*; - +import com.google.common.util.concurrent.FutureCallback; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,7 +46,9 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.RepairException; -import org.apache.cassandra.gms.*; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.IEndpointStateChangeSubscriber; +import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.repair.consistent.ConsistentSession; @@ -59,6 +59,7 @@ import org.apache.cassandra.repair.state.SessionState; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.FBUtilities; @@ -73,7 +74,7 @@ * * A given RepairSession repairs a set of replicas for a given set of ranges on a list * of column families. For each of the column family to repair, RepairSession - * creates a {@link RepairJob} that handles the repair of that CF. + * creates a {@link AbstractRepairJob} that handles the repair of that CF. * * A given RepairJob has the 3 main phases: *
      @@ -122,6 +123,7 @@ public class RepairSession extends AsyncFuture implements I public final boolean repairPaxos; public final boolean paxosOnly; public final boolean dontPurgeTombstones; + public final boolean excludedDeadNodes; private final AtomicBoolean isFailed = new AtomicBoolean(false); @@ -135,7 +137,8 @@ public class RepairSession extends AsyncFuture implements I public final boolean optimiseStreams; public final SharedContext ctx; public final Scheduler validationScheduler; - private volatile List jobs = Collections.emptyList(); + private volatile List jobs = Collections.emptyList(); + private final boolean accordRepair; private volatile boolean terminated = false; @@ -143,6 +146,7 @@ public class RepairSession extends AsyncFuture implements I * Create new repair session. * @param parentRepairSession the parent sessions id * @param commonRange ranges to repair + * @param excludedDeadNodes Was the repair started for --force and were dead nodes excluded as a result * @param keyspace name of keyspace * @param parallelismDegree specifies the degree of parallelism when calculating the merkle trees * @param pullRepair true if the repair should be one way (from remote host to this host and only applicable between two hosts--see RepairOption) @@ -154,6 +158,7 @@ public RepairSession(SharedContext ctx, Scheduler validationScheduler, TimeUUID parentRepairSession, CommonRange commonRange, + boolean excludedDeadNodes, String keyspace, RepairParallelism parallelismDegree, boolean isIncremental, @@ -163,6 +168,7 @@ public RepairSession(SharedContext ctx, boolean repairPaxos, boolean paxosOnly, boolean dontPurgeTombstones, + boolean accordRepair, String... cfnames) { this.ctx = ctx; @@ -178,6 +184,8 @@ public RepairSession(SharedContext ctx, this.optimiseStreams = optimiseStreams; this.dontPurgeTombstones = dontPurgeTombstones; this.taskExecutor = new SafeExecutor(createExecutor(ctx)); + this.accordRepair = accordRepair; + this.excludedDeadNodes = excludedDeadNodes; } @VisibleForTesting @@ -338,10 +346,14 @@ public void start(ExecutorPlus executor) // Create and submit RepairJob for each ColumnFamily state.phase.jobsSubmitted(); - List jobs = new ArrayList<>(state.cfnames.length); + List jobs = new ArrayList<>(state.cfnames.length); for (String cfname : state.cfnames) { - RepairJob job = new RepairJob(this, cfname); + AbstractRepairJob job = accordRepair ? + new AccordRepairJob(this, cfname) : + new CassandraRepairJob(this, cfname); + // Repairs can drive forward progress for consensus migration so always check + job.addCallback(ConsensusTableMigrationState.completedRepairJobHandler); state.register(job.state); executor.execute(job); jobs.add(job); @@ -381,10 +393,10 @@ public void onFailure(Throwable t) public synchronized void terminate(@Nullable Throwable reason) { terminated = true; - List jobs = this.jobs; + List jobs = this.jobs; if (jobs != null) { - for (RepairJob job : jobs) + for (AbstractRepairJob job : jobs) job.abort(reason); } this.jobs = null; diff --git a/src/java/org/apache/cassandra/repair/messages/RepairMessage.java b/src/java/org/apache/cassandra/repair/messages/RepairMessage.java index e38a930bcca0..2eab88ef91f2 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairMessage.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairMessage.java @@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Supplier; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -74,7 +73,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailure failureReason) + public void onFailure(InetAddressAndPort from, RequestFailure failure) { } }; diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java index bc9231dcc142..bef7acfe16d7 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java @@ -17,7 +17,13 @@ */ package org.apache.cassandra.repair.messages; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @@ -57,6 +63,8 @@ public class RepairOption public static final String NO_TOMBSTONE_PURGING = "nopurge"; + public static final String ACCORD_REPAIR_KEY = "accordRepair"; + // we don't want to push nodes too much for repair public static final int MAX_JOB_THREADS = 4; @@ -86,6 +94,7 @@ public static Set> parseRanges(String rangesStr, IPartitioner parti } return ranges; } + /** * Construct RepairOptions object from given map of Strings. *

      @@ -167,6 +176,12 @@ public static Set> parseRanges(String rangesStr, IPartitioner parti * ranges to the same host multiple times *

  • * + * + * + * + * + * * *
    false
    accordRepair"true" if the repair should be of Accord in flight transactions. Will ensure + * that once repair completes all Accord transactions are replicated at quorumfalse
    * @@ -188,11 +203,21 @@ public static RepairOption parse(Map options, IPartitioner parti boolean repairPaxos = Boolean.parseBoolean(options.get(REPAIR_PAXOS_KEY)); boolean paxosOnly = Boolean.parseBoolean(options.get(PAXOS_ONLY_KEY)); boolean dontPurgeTombstones = Boolean.parseBoolean(options.get(NO_TOMBSTONE_PURGING)); + boolean accordRepair = Boolean.parseBoolean(options.get(ACCORD_REPAIR_KEY)); if (previewKind != PreviewKind.NONE) { Preconditions.checkArgument(!repairPaxos, "repairPaxos must be set to false for preview repairs"); Preconditions.checkArgument(!paxosOnly, "paxosOnly must be set to false for preview repairs"); + Preconditions.checkArgument(!accordRepair, "accordRepair must be set to false for preview repairs"); + } + + if (accordRepair) + { + Preconditions.checkArgument(!paxosOnly, "paxosOnly must be set to false for Accord repairs"); + Preconditions.checkArgument(previewKind == PreviewKind.NONE, "Can't perform preview repair with an Accord repair"); + Preconditions.checkArgument(!force, "Accord repair only requires a quorum to work so force is not supported"); + incremental = false; } int jobThreads = 1; @@ -212,7 +237,7 @@ public static RepairOption parse(Map options, IPartitioner parti boolean asymmetricSyncing = Boolean.parseBoolean(options.get(OPTIMISE_STREAMS_KEY)); - RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty(), pullRepair, force, previewKind, asymmetricSyncing, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones); + RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, pullRepair, force, previewKind, asymmetricSyncing, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); // data centers String dataCentersStr = options.get(DATACENTERS_KEY); @@ -286,7 +311,6 @@ else if (ranges.isEmpty()) private final boolean incremental; private final boolean trace; private final int jobThreads; - private final boolean isSubrangeRepair; private final boolean pullRepair; private final boolean forceRepair; private final PreviewKind previewKind; @@ -296,12 +320,17 @@ else if (ranges.isEmpty()) private final boolean paxosOnly; private final boolean dontPurgeTombstones; + private final boolean accordRepair; + private final Collection columnFamilies = new HashSet<>(); private final Collection dataCenters = new HashSet<>(); private final Collection hosts = new HashSet<>(); private final Collection> ranges = new HashSet<>(); - public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection> ranges, boolean isSubrangeRepair, boolean pullRepair, boolean forceRepair, PreviewKind previewKind, boolean optimiseStreams, boolean ignoreUnreplicatedKeyspaces, boolean repairPaxos, boolean paxosOnly, boolean dontPurgeTombstones) + public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, + Collection> ranges, boolean pullRepair, boolean forceRepair, + PreviewKind previewKind, boolean optimiseStreams, boolean ignoreUnreplicatedKeyspaces, boolean repairPaxos, + boolean paxosOnly, boolean dontPurgeTombstones, boolean accordRepair) { this.parallelism = parallelism; @@ -310,7 +339,6 @@ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean this.trace = trace; this.jobThreads = jobThreads; this.ranges.addAll(ranges); - this.isSubrangeRepair = isSubrangeRepair; this.pullRepair = pullRepair; this.forceRepair = forceRepair; this.previewKind = previewKind; @@ -319,6 +347,7 @@ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean this.repairPaxos = repairPaxos; this.paxosOnly = paxosOnly; this.dontPurgeTombstones = dontPurgeTombstones; + this.accordRepair = accordRepair; } public RepairParallelism getParallelism() @@ -381,11 +410,6 @@ public boolean isGlobal() return dataCenters.isEmpty() && hosts.isEmpty(); } - public boolean isSubrangeRepair() - { - return isSubrangeRepair; - } - public PreviewKind getPreviewKind() { return previewKind; @@ -439,6 +463,11 @@ public boolean dontPurgeTombstones() return dontPurgeTombstones; } + public boolean accordRepair() + { + return accordRepair; + } + @Override public String toString() { @@ -459,6 +488,7 @@ public String toString() ", repairPaxos: " + repairPaxos + ", paxosOnly: " + paxosOnly + ", dontPurgeTombstones: " + dontPurgeTombstones + + ", accordRepair: " + accordRepair + ')'; } @@ -472,7 +502,6 @@ public Map asMap() options.put(COLUMNFAMILIES_KEY, Joiner.on(",").join(columnFamilies)); options.put(DATACENTERS_KEY, Joiner.on(",").join(dataCenters)); options.put(HOSTS_KEY, Joiner.on(",").join(hosts)); - options.put(SUB_RANGE_REPAIR_KEY, Boolean.toString(isSubrangeRepair)); options.put(TRACE_KEY, Boolean.toString(trace)); options.put(RANGES_KEY, Joiner.on(",").join(ranges)); options.put(PULL_REPAIR_KEY, Boolean.toString(pullRepair)); @@ -482,6 +511,7 @@ public Map asMap() options.put(REPAIR_PAXOS_KEY, Boolean.toString(repairPaxos)); options.put(PAXOS_ONLY_KEY, Boolean.toString(paxosOnly)); options.put(NO_TOMBSTONE_PURGING, Boolean.toString(dontPurgeTombstones)); + options.put(ACCORD_REPAIR_KEY, Boolean.toString(accordRepair)); return options; } } diff --git a/src/java/org/apache/cassandra/repair/messages/SyncResponse.java b/src/java/org/apache/cassandra/repair/messages/SyncResponse.java index e7e7985fff34..0c528a379640 100644 --- a/src/java/org/apache/cassandra/repair/messages/SyncResponse.java +++ b/src/java/org/apache/cassandra/repair/messages/SyncResponse.java @@ -23,12 +23,13 @@ import java.util.Objects; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.SyncNodePair; import org.apache.cassandra.repair.RepairJobDesc; +import org.apache.cassandra.repair.SyncNodePair; import org.apache.cassandra.streaming.SessionSummary; /** @@ -103,7 +104,7 @@ public SyncResponse deserialize(DataInputPlus in, int version) throws IOExceptio List summaries = new ArrayList<>(numSummaries); for (int i=0; i serializer = new IVersionedSerializer() + { + @Override + public void serialize(TableId t, DataOutputPlus out, int version) throws IOException + { + t.serialize(out); + } + + @Override + public TableId deserialize(DataInputPlus in, int version) throws IOException + { + return TableId.deserialize(in); + } + + @Override + public long serializedSize(TableId t, int version) + { + return t.serializedSize(); + } + }; + + public static final MetadataSerializer metadataSerializer = new MetadataSerializer() + { + @Override + public void serialize(TableId t, DataOutputPlus out, Version version) throws IOException + { + t.serialize(out); + } + + @Override + public TableId deserialize(DataInputPlus in, Version version) throws IOException + { + return TableId.deserialize(in); + } + + @Override + public long serializedSize(TableId t, Version version) + { + return t.serializedSize(); + } + }; } diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 1c22d955b7d8..90c56e47bba7 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -70,10 +70,10 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.serialization.UDTAndFunctionsAwareMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.utils.AbstractIterator; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; @@ -320,6 +320,11 @@ public TableMetadata withSwapped(Indexes indexes) return unbuild().indexes(indexes).build(); } + public TableId id() + { + return id; + } + public boolean isView() { return kind == Kind.VIEW; @@ -344,7 +349,7 @@ public boolean isCompactTable() { return false; } - + public boolean isIncrementalBackupsEnabled() { return params.incrementalBackups; diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 1592718bf117..24b2966bdf6a 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -448,6 +448,7 @@ public int parentRepairSessionsCount() */ public RepairSession submitRepairSession(TimeUUID parentRepairSession, CommonRange range, + boolean excludedDeadNodes, String keyspace, RepairParallelism parallelismDegree, boolean isIncremental, @@ -457,6 +458,7 @@ public RepairSession submitRepairSession(TimeUUID parentRepairSession, boolean repairPaxos, boolean paxosOnly, boolean dontPurgeTombstones, + boolean accordRepair, ExecutorPlus executor, Scheduler validationScheduler, String... cfnames) @@ -470,9 +472,11 @@ public RepairSession submitRepairSession(TimeUUID parentRepairSession, if (cfnames.length == 0) return null; - final RepairSession session = new RepairSession(ctx, validationScheduler, parentRepairSession, range, keyspace, + final RepairSession session = new RepairSession(ctx, validationScheduler, parentRepairSession, + range, excludedDeadNodes, keyspace, parallelismDegree, isIncremental, pullRepair, - previewKind, optimiseStreams, repairPaxos, paxosOnly, dontPurgeTombstones, cfnames); + previewKind, optimiseStreams, repairPaxos, paxosOnly, + dontPurgeTombstones, accordRepair, cfnames); repairs.getIfPresent(parentRepairSession).register(session.state); sessions.put(session.getId(), session); diff --git a/src/java/org/apache/cassandra/service/CASRequest.java b/src/java/org/apache/cassandra/service/CASRequest.java index f118dcf84722..fb78daa2a597 100644 --- a/src/java/org/apache/cassandra/service/CASRequest.java +++ b/src/java/org/apache/cassandra/service/CASRequest.java @@ -18,15 +18,17 @@ package org.apache.cassandra.service; import accord.primitives.Txn; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.PartitionUpdate; -import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.exceptions.InvalidRequestException; -import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.transport.Dispatcher; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult; + /** * Abstract the conditions and updates for a CAS operation. */ @@ -51,7 +53,7 @@ public interface CASRequest */ PartitionUpdate makeUpdates(FilteredPartition current, ClientState clientState, Ballot ballot) throws InvalidRequestException; - Txn toAccordTxn(ClientState clientState, long nowInSecs); + Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs); - RowIterator toCasResult(TxnData data); + ConsensusAttemptResult toCasResult(TxnResult txnResult); } diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index e61c9187a309..a40426c00130 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -39,14 +39,16 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import com.google.common.cache.CacheLoader; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.primitives.Keys; import accord.primitives.Txn; import org.apache.cassandra.batchlog.Batch; import org.apache.cassandra.batchlog.BatchlogManager; @@ -54,6 +56,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.Config; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; @@ -90,8 +93,8 @@ import org.apache.cassandra.exceptions.ReadAbortException; import org.apache.cassandra.exceptions.ReadFailureException; import org.apache.cassandra.exceptions.ReadTimeoutException; -import org.apache.cassandra.exceptions.RequestFailureException; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.UnavailableException; import org.apache.cassandra.exceptions.WriteFailureException; @@ -125,9 +128,18 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.AccordUpdate; +import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; +import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.ContentionStrategy; @@ -137,6 +149,7 @@ import org.apache.cassandra.service.paxos.v1.ProposeCallback; import org.apache.cassandra.service.reads.AbstractReadExecutor; import org.apache.cassandra.service.reads.ReadCallback; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.range.RangeCommands; import org.apache.cassandra.service.reads.repair.ReadRepair; import org.apache.cassandra.tcm.ClusterMetadata; @@ -155,10 +168,10 @@ import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.collect.Iterables.concat; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static org.apache.cassandra.config.Config.LegacyPaxosStrategy.accord; import static org.apache.cassandra.db.ConsistencyLevel.SERIAL; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casWriteMetrics; @@ -177,6 +190,11 @@ import static org.apache.cassandra.net.Verb.SCHEMA_VERSION_REQ; import static org.apache.cassandra.net.Verb.TRUNCATE_REQ; import static org.apache.cassandra.service.BatchlogResponseHandler.BatchlogCleanup; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.RETRY_NEW_PROTOCOL; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.casResult; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.serialReadResult; +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision; import static org.apache.cassandra.service.paxos.Ballot.Flag.GLOBAL; import static org.apache.cassandra.service.paxos.Ballot.Flag.LOCAL; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.nextBallot; @@ -322,6 +340,7 @@ public static RowIterator cas(String keyspaceName, Dispatcher.RequestTime requestTime) throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException, CasWriteUnknownResultException { + TableMetadata metadata = Schema.instance.validateTable(keyspaceName, cfName); if (DatabaseDescriptor.getPartitionDenylistEnabled() && DatabaseDescriptor.getDenylistWritesEnabled() && !partitionDenylist.isKeyPermitted(keyspaceName, cfName, key.getKey())) { denylistMetrics.incrementWritesRejected(); @@ -329,34 +348,61 @@ public static RowIterator cas(String keyspaceName, key, keyspaceName, cfName)); } - if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord) + ConsensusAttemptResult lastAttemptResult; + do { - TxnData data = AccordService.instance().coordinate(request.toAccordTxn(clientState, nowInSeconds), consistencyForPaxos); - return request.toCasResult(data); - } - else - { - return (Paxos.useV2() || keyspaceName.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) - ? Paxos.cas(key, request, consistencyForPaxos, consistencyForCommit, clientState) - : legacyCas(keyspaceName, cfName, key, request, consistencyForPaxos, consistencyForCommit, clientState, nowInSeconds, requestTime); - } - } - - public static RowIterator legacyCas(String keyspaceName, - String cfName, - DecoratedKey key, - CASRequest request, - ConsistencyLevel consistencyForPaxos, - ConsistencyLevel consistencyForCommit, - ClientState clientState, - long nowInSeconds, - Dispatcher.RequestTime requestTime) + ConsensusRoutingDecision decision = consensusRouting(metadata, key, consistencyForPaxos, requestTime, true); + switch (decision) + { + case paxosV2: + lastAttemptResult = Paxos.cas(key, + request, + consistencyForPaxos, + consistencyForCommit, + clientState, + requestTime); + break; + case paxosV1: + lastAttemptResult = legacyCas(metadata, + key, + request, + consistencyForPaxos, + consistencyForCommit, + clientState, + nowInSeconds, + requestTime); + break; + case accord: + Txn txn = request.toAccordTxn(consistencyForPaxos, + consistencyForCommit, + clientState, + nowInSeconds); + IAccordService accordService = AccordService.instance(); + accordService.maybeConvertKeyspacesToAccord(txn); + TxnResult txnResult = accordService.coordinate(txn, + consistencyForPaxos, + requestTime); + lastAttemptResult = request.toCasResult(txnResult); + break; + default: + throw new IllegalStateException("Unsupported consensus " + decision); + } + } while (lastAttemptResult.shouldRetryOnNewConsensusProtocol); + return lastAttemptResult.casResult; + } + + private static ConsensusAttemptResult legacyCas(TableMetadata metadata, + DecoratedKey key, + CASRequest request, + ConsistencyLevel consistencyForPaxos, + ConsistencyLevel consistencyForCommit, + ClientState clientState, + long nowInSeconds, + Dispatcher.RequestTime requestTime) throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException { try { - TableMetadata metadata = Schema.instance.validateTable(keyspaceName, cfName); - Function> updateProposer = ballot -> { // read the current values and check they validate the conditions @@ -374,7 +420,7 @@ public static RowIterator legacyCas(String keyspaceName, { Tracing.trace("CAS precondition does not match current values {}", current); casWriteMetrics.conditionNotMet.inc(); - return Pair.create(PartitionUpdate.emptyUpdate(metadata, key), current.rowIterator()); + return Pair.create(PartitionUpdate.emptyUpdate(metadata, key), current.rowIterator(false)); } // Create the desired updates @@ -399,15 +445,14 @@ public static RowIterator legacyCas(String keyspaceName, return Pair.create(updates, null); }; - return doPaxos(metadata, - key, - consistencyForPaxos, - consistencyForCommit, - consistencyForCommit, - requestTime, - casWriteMetrics, - updateProposer); - + return casResult(doPaxos(metadata, + key, + consistencyForPaxos, + consistencyForCommit, + consistencyForCommit, + requestTime, + casWriteMetrics, + updateProposer)); } catch (CasWriteUnknownResultException e) { @@ -1165,6 +1210,7 @@ public static void mutateWithTriggers(List mutations, Collection augmented = TriggerExecutor.instance.execute(mutations); + String keyspaceName = mutations.iterator().next().getKeyspaceName(); boolean updatesView = Keyspace.open(mutations.iterator().next().getKeyspaceName()) .viewManager .updatesAffectView(mutations, true); @@ -1172,8 +1218,10 @@ public static void mutateWithTriggers(List mutations, long size = IMutation.dataSize(mutations); writeMetrics.mutationSize.update(size); writeMetricsForLevel(consistencyLevel).mutationSize.update(size); - - if (augmented != null) + NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); + if (nonSerialWriteStrategy.writesThroughAccord && !SchemaConstants.getSystemKeyspaces().contains(keyspaceName)) + mutateWithAccord(augmented != null ? augmented : mutations, consistencyLevel, requestTime, nonSerialWriteStrategy); + else if (augmented != null) mutateAtomically(augmented, consistencyLevel, updatesView, requestTime); else { @@ -1184,6 +1232,29 @@ public static void mutateWithTriggers(List mutations, } } + private static void mutateWithAccord(Collection iMutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, Config.NonSerialWriteStrategy nonSerialWriteStrategy) + { + int fragmentIndex = 0; + List fragments = new ArrayList<>(iMutations.size()); + List partitionKeys = new ArrayList<>(iMutations.size()); + for (IMutation mutation : iMutations) + { + for (PartitionUpdate update : mutation.getPartitionUpdates()) + { + PartitionKey pk = PartitionKey.of(update); + partitionKeys.add(pk); + fragments.add(new TxnWrite.Fragment(PartitionKey.of(update), fragmentIndex++, update, TxnReferenceOperations.empty())); + } + } + // Potentially ignore commit consistency level if the strategy specifies accord and not migration + ConsistencyLevel clForCommit = nonSerialWriteStrategy.commitCLForStrategy(consistencyLevel); + AccordUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit); + Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.EMPTY, TxnQuery.EMPTY, update); + IAccordService accordService = AccordService.instance(); + accordService.maybeConvertKeyspacesToAccord(txn); + accordService.coordinate(txn, consistencyLevel, requestTime); + } + /** * See mutate. Adds additional steps before and after writing a batch. * Before writing the batch (but after doing availability check against the FD for the row replicas): @@ -1602,7 +1673,7 @@ public static void sendToHintedReplicas(final Mutation mutation, if (insertLocal) { - Preconditions.checkNotNull(localReplica); + checkNotNull(localReplica); performLocally(stage, localReplica, mutation::apply, responseHandler, mutation, requestTime); } @@ -1881,43 +1952,68 @@ public static boolean hasJoined() return metadata.myNodeState() == NodeState.JOINED; } - private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) - throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException + private static ConsensusRoutingDecision consensusRouting(TableMetadata metadata, DecoratedKey partitionKey, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, boolean isForWrite) { - // TCM explicitly relies on paxos and doesn't work with accord - if (DatabaseDescriptor.getLegacyPaxosStrategy() == accord && !group.metadata().keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) - { - return readWithAccord(group, consistencyLevel); - } - else - { - return readWithPaxos(group, consistencyLevel, requestTime); - } + if (metadata.keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) + return ConsensusRoutingDecision.paxosV2; + return ConsensusRequestRouter.instance.routeAndMaybeMigrate(partitionKey, + metadata.id, + consistencyLevel, + requestTime, + DatabaseDescriptor.getCasContentionTimeout(NANOSECONDS), + isForWrite); } - private static PartitionIterator readWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { - return (Paxos.useV2() || group.metadata().keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) - ? Paxos.read(group, consistencyLevel, requestTime) - : legacyReadWithPaxos(group, consistencyLevel, requestTime); + ConsensusAttemptResult lastResult; + do + { + SinglePartitionReadCommand command = group.queries.get(0); + ConsensusRoutingDecision decision = consensusRouting(group.metadata(), command.partitionKey(), consistencyLevel, requestTime, false); + switch (decision) + { + case paxosV2: + lastResult = Paxos.read(group, consistencyLevel, requestTime); + break; + case paxosV1: + lastResult = legacyReadWithPaxos(group, consistencyLevel, requestTime); + break; + case accord: + lastResult = readWithAccord(group, consistencyLevel, requestTime); + break; + default: + throw new IllegalStateException("Unsupported consensus " + decision); + } + } while (lastResult.shouldRetryOnNewConsensusProtocol); + return lastResult.serialReadResult; } - private static PartitionIterator readWithAccord(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel) + private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { if (group.queries.size() > 1) throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time"); - TxnRead read = TxnRead.createSerialRead(group.queries.get(0)); + SinglePartitionReadCommand readCommand = group.queries.get(0); + // If the non-SERIAL write strategy is sending all writes through Accord there is no need to use the supplied consistency + // level since Accord will manage reading safely + consistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().readCLForStrategy(consistencyLevel); + TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); Txn txn = new Txn.InMemory(read.keys(), read, TxnQuery.ALL); - TxnData data = AccordService.instance().coordinate(txn, consistencyLevel); + IAccordService accordService = AccordService.instance(); + accordService.maybeConvertKeyspacesToAccord(txn); + TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); + if (txnResult.kind() == retry_new_protocol) + return RETRY_NEW_PROTOCOL; + TxnData data = (TxnData)txnResult; FilteredPartition partition = data.get(TxnRead.SERIAL_READ); if (partition != null) - return PartitionIterators.singletonIterator(partition.rowIterator()); + return serialReadResult(PartitionIterators.singletonIterator(partition.rowIterator(readCommand.isReversed()))); else - return EmptyIterators.partition(); + return serialReadResult(EmptyIterators.partition()); } - private static PartitionIterator legacyReadWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + private static ConsensusAttemptResult legacyReadWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { long start = nanoTime(); @@ -1930,7 +2026,6 @@ private static PartitionIterator legacyReadWithPaxos(SinglePartitionReadCommand. // calculate the blockFor before repair any paxos round to avoid RS being altered in between. int blockForRead = consistencyLevel.blockFor(Keyspace.open(metadata.keyspace).getReplicationStrategy()); - PartitionIterator result = null; try { final ConsistencyLevel consistencyForReplayCommitsOrFetch = consistencyLevel == ConsistencyLevel.LOCAL_SERIAL @@ -1967,7 +2062,7 @@ private static PartitionIterator legacyReadWithPaxos(SinglePartitionReadCommand. throw new ReadFailureException(consistencyLevel, e.received, e.blockFor, false, e.failureReasonByEndpoint); } - result = fetchRows(group.queries, consistencyForReplayCommitsOrFetch, requestTime); + return serialReadResult(fetchRows(group.queries, consistencyForReplayCommitsOrFetch, ReadCoordinator.DEFAULT, requestTime)); } catch (UnavailableException e) { @@ -2011,18 +2106,16 @@ private static PartitionIterator legacyReadWithPaxos(SinglePartitionReadCommand. readMetricsForLevel(consistencyLevel).addNano(latency); Keyspace.open(metadata.keyspace).getColumnFamilyStore(metadata.name).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS); } - - return result; } @SuppressWarnings("resource") - private static PartitionIterator readRegular(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + public static PartitionIterator readRegular(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, ReadCoordinator coordinator, Dispatcher.RequestTime requestTime) throws UnavailableException, ReadFailureException, ReadTimeoutException { long start = nanoTime(); try { - PartitionIterator result = fetchRows(group.queries, consistencyLevel, requestTime); + PartitionIterator result = fetchRows(group.queries, consistencyLevel, coordinator, requestTime); // Note that the only difference between the command in a group must be the partition key on which // they applied. boolean enforceStrictLiveness = group.queries.get(0).metadata().enforceStrictLiveness(); @@ -2072,6 +2165,11 @@ private static PartitionIterator readRegular(SinglePartitionReadCommand.Group gr } } + public static PartitionIterator readRegular(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + return readRegular(group, consistencyLevel, ReadCoordinator.DEFAULT, requestTime); + } + public static void recordReadRegularAbort(ConsistencyLevel consistencyLevel, Throwable cause) { readMetrics.markAbort(cause); @@ -2119,6 +2217,7 @@ public RowIterator next() */ private static PartitionIterator fetchRows(List commands, ConsistencyLevel consistencyLevel, + ReadCoordinator coordinator, Dispatcher.RequestTime requestTime) throws UnavailableException, ReadFailureException, ReadTimeoutException { @@ -2131,7 +2230,7 @@ private static PartitionIterator fetchRows(List comm // for type of speculation we'll use in this read for (int i=0; i anyOutOfRangeOpsRecorded = keyspace -> keyspace.metric.outOfRangeTokenReads.getCount() > 0 || keyspace.metric.outOfRangeTokenWrites.getCount() > 0 @@ -1664,6 +1676,49 @@ public void abortBootstrap(String nodeStr, String endpointStr) } } + @Override + public void migrateConsensusProtocol(@Nonnull String targetProtocol, + @Nonnull List keyspaceNames, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr) + { + checkNotNull(targetProtocol, "targetProtocol is null"); + checkArgument(!keyspaceNames.contains(SchemaConstants.METADATA_KEYSPACE_NAME)); + startMigrationToConsensusProtocol(targetProtocol, keyspaceNames, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr)); + } + + @Override + public List finishConsensusMigration(@Nonnull String keyspace, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr) + { + checkArgument(!keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)); + return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr)); + } + + @Override + public void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocol, + @Nullable List keyspaceNames, + @Nullable List maybeTableNames) + { + checkNotNull(targetProtocol, "targetProtocol is null"); + checkNotNull(keyspaceNames, "keyspaceNames is null"); + checkArgument(!keyspaceNames.contains(SchemaConstants.METADATA_KEYSPACE_NAME)); + + ConsensusTableMigrationState.setConsensusMigrationTargetProtocol(targetProtocol, keyspaceNames, Optional.ofNullable(maybeTableNames)); + } + + @Override + public String listConsensusMigrations(@Nullable Set keyspaceNames, + @Nullable Set tableNames, + @Nonnull String format) + { + ClusterMetadata cm = ClusterMetadata.current(); + ConsensusMigrationState snapshot = cm.consensusMigrationState; + Map snapshotAsMap = snapshot.toMap(keyspaceNames, tableNames); + return pojoMapToString(snapshotAsMap, format); + } + public Map> getConcurrency(List stageNames) { Stream stageStream = stageNames.isEmpty() ? stream(Stage.values()) : stageNames.stream().map(Stage::fromPoolName); @@ -3978,11 +4033,21 @@ synchronized void checkServiceAllowedToStart(String service) // Never ever do this at home. Used by tests. @VisibleForTesting - public IPartitioner setPartitionerUnsafe(IPartitioner newPartitioner) + public void setPartitionerUnsafe(IPartitioner newPartitioner) { - IPartitioner oldPartitioner = DatabaseDescriptor.setPartitionerUnsafe(newPartitioner); + checkNotNull(newPartitioner, "newPartitioner is null"); + checkState(originalPartitioner == null, "Already changed the partitioner without resetting"); + originalPartitioner = DatabaseDescriptor.setPartitionerUnsafe(newPartitioner); valueFactory = new VersionedValue.VersionedValueFactory(newPartitioner); - return oldPartitioner; + } + + @VisibleForTesting + public void resetPartitionerUnsafe() + { + checkState(originalPartitioner != null, "Original partitioner was never changed"); + DatabaseDescriptor.setPartitionerUnsafe(originalPartitioner); + valueFactory = new VersionedValueFactory(originalPartitioner); + originalPartitioner = null; } public void truncate(String keyspace, String table) throws TimeoutException, IOException @@ -4165,6 +4230,16 @@ public List getNonLocalStrategyKeyspaces() return Lists.newArrayList(Schema.instance.distributedKeyspaces().names()); } + @Override + public List getAccordManagedKeyspaces() + { + // TODO (review) These are really just the ones Accord is aware of not necessarily managed + Set keyspaces = Schema.instance.getNonLocalStrategyKeyspaces().names(); + return keyspaces.stream() + .filter(AccordService.instance()::isAccordManagedKeyspace) + .collect(toList()); + } + public Map getViewBuildStatuses(String keyspace, String view, boolean withPort) { Map coreViewStatus = SystemDistributedKeyspace.viewStatus(keyspace, view); @@ -4994,7 +5069,7 @@ public void enableFullQueryLogger(String path, String rollCycle, Boolean blockin archiveCommand = archiveCommand != null ? archiveCommand : fqlOptions.archive_command; maxArchiveRetries = maxArchiveRetries != Integer.MIN_VALUE ? maxArchiveRetries : fqlOptions.max_archive_retries; - Preconditions.checkNotNull(path, "cassandra.yaml did not set log_dir and not set as parameter"); + checkNotNull(path, "cassandra.yaml did not set log_dir and not set as parameter"); FullQueryLogger.instance.enableWithoutClean(File.getPath(path), rollCycle, blocking, maxQueueWeight, maxLogSize, archiveCommand, maxArchiveRetries); } @@ -5359,7 +5434,7 @@ public Long getRepairRpcTimeout() public void setRepairRpcTimeout(Long timeoutInMillis) { - Preconditions.checkState(timeoutInMillis > 0); + checkState(timeoutInMillis > 0); DatabaseDescriptor.setRepairRpcTimeout(timeoutInMillis); logger.info("RepairRpcTimeout set to {}ms via JMX", timeoutInMillis); } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index 6e8a2ad449f7..d11ca1bfb4f6 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.management.NotificationEmitter; import javax.management.openmbean.CompositeData; @@ -1141,6 +1142,23 @@ default int upgradeSSTables(String keyspaceName, boolean excludeCurrentVersion, public String getBootstrapState(); void abortBootstrap(String nodeId, String endpoint); + void migrateConsensusProtocol(@Nonnull String targetProtocol, + @Nullable List keyspaceNames, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr); + + List finishConsensusMigration(@Nonnull String keyspace, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr); + + void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocol, + @Nullable List keyspaceNames, + @Nullable List maybeTableNames); + + String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @Nonnull String format); + + List getAccordManagedKeyspaces(); + /** Gets the concurrency settings for processing stages*/ static class StageConcurrency implements Serializable { @@ -1188,6 +1206,7 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e /** * Start the fully query logger. + * * @param path Path where the full query log will be stored. If null cassandra.yaml value is used. * @param rollCycle How often to create a new file for query data (MINUTELY, DAILY, HOURLY) * @param blocking Whether threads submitting queries to the query log should block if they can't be drained to the filesystem or alternatively drops samples and log diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 994e551f79d6..d7bce189d2fe 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -17,8 +17,6 @@ */ package org.apache.cassandra.service.accord; -import java.util.Collections; -import java.util.Set; import java.util.concurrent.Callable; import java.util.function.BiFunction; import java.util.function.Function; @@ -27,7 +25,7 @@ import com.google.common.primitives.Ints; import accord.local.Command.TransientListener; -import accord.utils.DeterministicIdentitySet; +import accord.local.Listeners; import accord.utils.IntrusiveLinkedListNode; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncResults.RunnableResult; @@ -35,14 +33,14 @@ import org.apache.cassandra.utils.ObjectSizes; import static java.lang.String.format; -import static org.apache.cassandra.service.accord.AccordCachingState.Status.UNINITIALIZED; -import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADING; -import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_LOAD; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_SAVE; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADING; import static org.apache.cassandra.service.accord.AccordCachingState.Status.MODIFIED; import static org.apache.cassandra.service.accord.AccordCachingState.Status.SAVING; -import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_SAVE; -import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; +import static org.apache.cassandra.service.accord.AccordCachingState.Status.UNINITIALIZED; /** * Global (per CommandStore) state of a cached entity (Command or CommandsForKey). @@ -61,7 +59,7 @@ public class AccordCachingState extends IntrusiveLinkedListNode /** * Transient listeners aren't meant to survive process restart, but must survive cache eviction. */ - private Set transientListeners; + private Listeners transientListeners; public AccordCachingState(K key) { @@ -140,7 +138,7 @@ public Status status() public void addListener(TransientListener listener) { if (transientListeners == null) - transientListeners = new DeterministicIdentitySet<>(); + transientListeners = new Listeners<>(); transientListeners.add(listener); } @@ -149,14 +147,14 @@ public boolean removeListener(TransientListener listener) return transientListeners != null && transientListeners.remove(listener); } - public void listeners(Set listeners) + public void listeners(Listeners listeners) { transientListeners = listeners; } - public Set listeners() + public Listeners listeners() { - return transientListeners == null ? Collections.emptySet() : transientListeners; + return transientListeners == null ? Listeners.EMPTY : transientListeners; } public boolean hasListeners() diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index c9a3224b9a1d..696cfe227c10 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -30,6 +30,8 @@ import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; +import java.util.function.Predicate; + import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -450,7 +452,7 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); switch (keysOrRanges.domain()) @@ -461,7 +463,7 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction null; + private static final Query noopQuery = (txnId, executeAt, keys, data, read, update) -> null; public static class StreamData implements Data { @@ -145,7 +145,7 @@ public static StreamData of(TokenRange range, TimeUUID streamId, boolean hasData } @Override - public Data merge(Data data) + public StreamData merge(Data data) { StreamData that = (StreamData) data; if (that.streams.keySet().stream().anyMatch(this.streams::containsKey)) @@ -258,7 +258,7 @@ private static boolean hasDataToStream(StreamCoordinator coordinator, InetAddres } @Override - public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore commandStore, Timestamp executeAt, DataStore store) + public AsyncChain read(Seekable key, SafeCommandStore commandStore, Timestamp executeAt, DataStore store) { try { diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index a323edaf1742..e3e1dd03f9d9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; -import java.util.EnumMap; -import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Executor; @@ -30,8 +30,12 @@ import java.util.zip.Checksum; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ListMultimap; +import com.google.common.collect.Multimap; import com.google.common.primitives.Ints; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,6 +69,8 @@ import org.apache.cassandra.journal.Params; import org.apache.cassandra.journal.ValueSerializer; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.interop.AccordInteropApply; +import org.apache.cassandra.service.accord.interop.AccordInteropCommit; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; @@ -78,10 +84,31 @@ import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.utils.ByteArrayUtil; -import static accord.messages.MessageType.*; +import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; +import static accord.messages.MessageType.ACCEPT_REQ; +import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; +import static accord.messages.MessageType.APPLY_MINIMAL_REQ; +import static accord.messages.MessageType.BEGIN_INVALIDATE_REQ; +import static accord.messages.MessageType.BEGIN_RECOVER_REQ; +import static accord.messages.MessageType.COMMIT_INVALIDATE_REQ; +import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; +import static accord.messages.MessageType.COMMIT_MINIMAL_REQ; +import static accord.messages.MessageType.INFORM_DURABLE_REQ; +import static accord.messages.MessageType.INFORM_OF_TXN_REQ; +import static accord.messages.MessageType.PRE_ACCEPT_REQ; +import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; +import static accord.messages.MessageType.PROPAGATE_COMMIT_MSG; +import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; +import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; +import static accord.messages.MessageType.SET_GLOBALLY_DURABLE_REQ; +import static accord.messages.MessageType.SET_SHARD_DURABLE_REQ; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; +import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ; +import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; +import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; +import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; public class AccordJournal implements Shutdownable { @@ -487,45 +514,68 @@ public enum Type implements ValueSerializer REPLAY (0, ReplayRecord.SERIALIZER), /* Accord protocol requests */ - PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), - ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), - ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), - COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), - COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), - COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL), - APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), - APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), - BEGIN_RECOVER (72, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), - BEGIN_INVALIDATE (73, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), - INFORM_OF_TXN (74, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), - INFORM_DURABLE (75, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), - SET_SHARD_DURABLE (76, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), - SET_GLOBALLY_DURABLE (77, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), + PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), + ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), + ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), + COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL ), + APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), + APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), + + INTEROP_COMMIT_MINIMAL (90, INTEROP_COMMIT_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_COMMIT_MAXIMAL (91, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_APPLY_MINIMAL (92, INTEROP_APPLY_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_APPLY_MAXIMAL (93, INTEROP_APPLY_MAXIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + + BEGIN_RECOVER (72, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), + BEGIN_INVALIDATE (73, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), + INFORM_OF_TXN (74, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), + INFORM_DURABLE (75, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), + SET_SHARD_DURABLE (76, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), + SET_GLOBALLY_DURABLE (77, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), /* Accord local messages */ - PROPAGATE_PRE_ACCEPT (78, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_COMMIT (79, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_APPLY (80, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_OTHER (81, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_PRE_ACCEPT (78, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_COMMIT (79, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_APPLY (80, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_OTHER (81, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), ; final int id; - final MessageType type; + /** + * An incoming message of a given type from Accord's perspective might have multiple + * concrete implementations some of which are supplied by the Cassandra integration. + * The incoming type specifies the handling for writing out a message to the journal. + */ + final MessageType incomingType; + /** + * The outgoing type is the type that will be returned to Accord and it must be a subclass of the incoming type. + * + * This type will always be from accord.messages.MessageType and never from the extended types in the integration. + */ + final MessageType outgoingType; final TxnIdProvider txnIdProvider; final ValueSerializer serializer; Type(int id, ValueSerializer serializer) { - this(id, null, serializer, null); + this(id, null, null, serializer, null); + } + + Type(int id, MessageType incomingType, MessageType outgoingType, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) + { + //noinspection unchecked + this(id, incomingType, outgoingType, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); } Type(int id, MessageType type, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) { //noinspection unchecked - this(id, type, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); + this(id, type, type, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); } - Type(int id, MessageType type, ValueSerializer serializer, TxnIdProvider txnIdProvider) + Type(int id, MessageType incomingType, MessageType outgoingType, ValueSerializer serializer, TxnIdProvider txnIdProvider) { if (id < 0) throw new IllegalArgumentException("Negative Type id " + id); @@ -533,7 +583,8 @@ public enum Type implements ValueSerializer throw new IllegalArgumentException("Type id doesn't fit in a single byte: " + id); this.id = id; - this.type = type; + this.incomingType = incomingType; + this.outgoingType = outgoingType; //noinspection unchecked this.serializer = (ValueSerializer) serializer; this.txnIdProvider = txnIdProvider; @@ -542,6 +593,8 @@ public enum Type implements ValueSerializer private static final Type[] idToTypeMapping; private static final Map msgTypeToTypeMap; + private static final ListMultimap msgTypeToSynonymousTypesMap; + static { Type[] types = values(); @@ -559,13 +612,26 @@ public enum Type implements ValueSerializer } idToTypeMapping = idToType; - EnumMap msgTypeToType = new EnumMap<>(MessageType.class); + Map msgTypeToType = new HashMap<>(); + for (Type type : types) + { + if (null != type.incomingType && null != msgTypeToType.put(type.incomingType, type)) + throw new IllegalStateException("Duplicate MessageType " + type.incomingType); + } + msgTypeToTypeMap = ImmutableMap.copyOf(msgTypeToType); + + Multimap msgTypeToSynonymousTypes = ArrayListMultimap.create(); for (Type type : types) { - if (null != type.type && null != msgTypeToType.put(type.type, type)) - throw new IllegalStateException("Duplicate MessageType " + type.type); + if (null != type.outgoingType) + { + Type incomingType = msgTypeToTypeMap.get(type.incomingType); + if (msgTypeToSynonymousTypes.get(type.outgoingType).contains(incomingType)) + throw new IllegalStateException("Duplicate synonymous Type " + type.incomingType); + msgTypeToSynonymousTypes.put(type.outgoingType, incomingType); + } } - msgTypeToTypeMap = msgTypeToType; + msgTypeToSynonymousTypesMap = ImmutableListMultimap.copyOf(msgTypeToSynonymousTypes); } static Type fromId(int id) @@ -578,6 +644,14 @@ static Type fromId(int id) return type; } + static List synonymousTypesFromMessageType(MessageType msgType) + { + List synonymousTypes = msgTypeToSynonymousTypesMap.get(msgType); + if (null == synonymousTypes) + throw new IllegalArgumentException("Unsupported MessageType " + msgType); + return synonymousTypes; + } + static Type fromMessageType(MessageType msgType) { Type type = msgTypeToTypeMap.get(msgType); @@ -613,7 +687,7 @@ TxnId txnId(Message message) static { // make noise early if we forget to update our version mappings - Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_50, "Expected current version to be %d but given %d", MessagingService.VERSION_50, MessagingService.current_version); + Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_51, "Expected current version to be %d but given %d", MessagingService.VERSION_51, MessagingService.current_version); } private static int msVersion(int version) @@ -621,7 +695,7 @@ private static int msVersion(int version) switch (version) { default: throw new IllegalArgumentException(); - case 1: return MessagingService.VERSION_50; + case 1: return MessagingService.VERSION_51; } } @@ -693,11 +767,12 @@ public Set test(Set messages) { Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); for (MessageType message : messages) - keys.add(new Key(txnId, Type.fromMessageType(message))); + for (Type synonymousType : Type.synonymousTypesFromMessageType(message)) + keys.add(new Key(txnId, synonymousType)); Set presentKeys = journal.test(keys); - EnumSet presentMessages = EnumSet.noneOf(MessageType.class); + Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); for (Key key : presentKeys) - presentMessages.add(key.type.type); + presentMessages.add(key.type.outgoingType); return presentMessages; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 798b529dad92..fc63c620495d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -150,8 +150,8 @@ import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.Throwables; +import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static accord.utils.Invariants.checkArgument; @@ -1753,7 +1753,7 @@ private static IMutation getCommandStoreMetadataMutation(String cql, ByteBuffer. break; } - return Iterables.getOnlyElement(statement.getMutations(clientState, options, true, tsMicros, (int) TimeUnit.MICROSECONDS.toSeconds(tsMicros), Dispatcher.RequestTime.forImmediateExecution())); + return Iterables.getOnlyElement(statement.getMutations(clientState, options, true, tsMicros, (int) TimeUnit.MICROSECONDS.toSeconds(tsMicros), Dispatcher.RequestTime.forImmediateExecution(), false)); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index c51f202d54a9..6c9622cffd0b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -18,14 +18,18 @@ package org.apache.cassandra.service.accord; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.util.Collections; -import java.util.EnumMap; import java.util.EnumSet; +import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,61 +46,122 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.MessageFlag; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.Verb; +import static accord.messages.MessageType.Kind.REMOTE; + public class AccordMessageSink implements MessageSink { private static final Logger logger = LoggerFactory.getLogger(AccordMessageSink.class); + public static final class AccordMessageType extends MessageType + { + public static final MessageType INTEROP_READ_REQ = amt(REMOTE, false); + public static final MessageType INTEROP_READ_RSP = amt(REMOTE, false); + public static final MessageType INTEROP_READ_REPAIR_REQ = amt(REMOTE, false); + public static final MessageType INTEROP_READ_REPAIR_RSP = amt(REMOTE, false); + public static final MessageType INTEROP_COMMIT_MINIMAL_REQ = amt(REMOTE, true ); + public static final MessageType INTEROP_COMMIT_MAXIMAL_REQ = amt(REMOTE, true ); + public static final MessageType INTEROP_APPLY_MINIMAL_REQ = amt(REMOTE, true ); + public static final MessageType INTEROP_APPLY_MAXIMAL_REQ = amt(REMOTE, true ); + + + public static final List values; + + static + { + ImmutableList.Builder builder = ImmutableList.builder(); + for (Field f : AccordMessageType.class.getDeclaredFields()) + { + if (f.getType().equals(AccordMessageType.class) && Modifier.isStatic(f.getModifiers())) + { + try + { + builder.add((MessageType) f.get(null)); + } + catch (IllegalAccessException e) + { + throw new RuntimeException(e); + } + } + } + values = builder.build(); + } + + private static MessageType amt(MessageType.Kind kind, boolean hasSideEffects) + { + return new AccordMessageType(kind, hasSideEffects); + } + + private AccordMessageType(MessageType.Kind kind, boolean hasSideEffects) + { + super(kind, hasSideEffects); + } + } + private static class VerbMapping { private static final VerbMapping instance = new VerbMapping(); - private final Map mapping = new EnumMap<>(MessageType.class); + private final Map mapping; private final Map> overrideReplyVerbs = ImmutableMap.>builder() - // read takes Result | Nack - .put(Verb.ACCORD_FETCH_DATA_REQ, EnumSet.of(Verb.ACCORD_FETCH_DATA_RSP, Verb.ACCORD_READ_RSP /* nack */)) - .build(); + // read takes Result | Nack + .put(Verb.ACCORD_FETCH_DATA_REQ, EnumSet.of(Verb.ACCORD_FETCH_DATA_RSP, Verb.ACCORD_READ_RSP /* nack */)) + .put(Verb.ACCORD_INTEROP_COMMIT_REQ, EnumSet.of(Verb.ACCORD_INTEROP_READ_RSP, Verb.ACCORD_READ_RSP)) + .put(Verb.ACCORD_INTEROP_READ_REPAIR_REQ, EnumSet.of(Verb.ACCORD_INTEROP_READ_REPAIR_RSP, Verb.ACCORD_READ_RSP)) + .build(); private VerbMapping() { - mapping.put(MessageType.PRE_ACCEPT_REQ, Verb.ACCORD_PRE_ACCEPT_REQ); - mapping.put(MessageType.PRE_ACCEPT_RSP, Verb.ACCORD_PRE_ACCEPT_RSP); - mapping.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); - mapping.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); - mapping.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); - mapping.put(MessageType.COMMIT_MINIMAL_REQ, Verb.ACCORD_COMMIT_REQ); - mapping.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); - mapping.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); - mapping.put(MessageType.APPLY_MINIMAL_REQ, Verb.ACCORD_APPLY_REQ); - mapping.put(MessageType.APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); - mapping.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); - mapping.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); - mapping.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); - mapping.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); - mapping.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); - mapping.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); - mapping.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); - mapping.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); - mapping.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); - mapping.put(MessageType.WAIT_ON_APPLY_REQ, Verb.ACCORD_WAIT_ON_APPLY_REQ); - mapping.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); - mapping.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); - mapping.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); - mapping.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); - mapping.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); - mapping.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); - mapping.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); - mapping.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); - mapping.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); - mapping.put(MessageType.FETCH_DATA_RSP, Verb.ACCORD_FETCH_DATA_RSP); - mapping.put(MessageType.SET_SHARD_DURABLE_REQ, Verb.ACCORD_SET_SHARD_DURABLE_REQ); - mapping.put(MessageType.SET_GLOBALLY_DURABLE_REQ, Verb.ACCORD_SET_GLOBALLY_DURABLE_REQ); - mapping.put(MessageType.QUERY_DURABLE_BEFORE_REQ, Verb.ACCORD_QUERY_DURABLE_BEFORE_REQ); - mapping.put(MessageType.QUERY_DURABLE_BEFORE_RSP, Verb.ACCORD_QUERY_DURABLE_BEFORE_RSP); - - for (MessageType type : MessageType.values()) + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put(MessageType.SIMPLE_RSP, Verb.ACCORD_SIMPLE_RSP); + builder.put(MessageType.PRE_ACCEPT_REQ, Verb.ACCORD_PRE_ACCEPT_REQ); + builder.put(MessageType.PRE_ACCEPT_RSP, Verb.ACCORD_PRE_ACCEPT_RSP); + builder.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); + builder.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); + builder.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); + builder.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); + builder.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); + builder.put(MessageType.COMMIT_MINIMAL_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); + builder.put(MessageType.APPLY_MINIMAL_REQ, Verb.ACCORD_APPLY_REQ); + builder.put(MessageType.APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); + builder.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); + builder.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); + builder.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); + builder.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); + builder.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); + builder.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); + builder.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); + builder.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); + builder.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); + builder.put(MessageType.WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_WAIT_UNTIL_APPLIED_REQ); + builder.put(MessageType.APPLY_AND_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ); + builder.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); + builder.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); + builder.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); + builder.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); + builder.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); + builder.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); + builder.put(MessageType.FETCH_DATA_RSP, Verb.ACCORD_FETCH_DATA_RSP); + builder.put(MessageType.SET_SHARD_DURABLE_REQ, Verb.ACCORD_SET_SHARD_DURABLE_REQ); + builder.put(MessageType.SET_GLOBALLY_DURABLE_REQ, Verb.ACCORD_SET_GLOBALLY_DURABLE_REQ); + builder.put(MessageType.QUERY_DURABLE_BEFORE_REQ, Verb.ACCORD_QUERY_DURABLE_BEFORE_REQ); + builder.put(MessageType.QUERY_DURABLE_BEFORE_RSP, Verb.ACCORD_QUERY_DURABLE_BEFORE_RSP); + builder.put(AccordMessageType.INTEROP_READ_REQ, Verb.ACCORD_INTEROP_READ_REQ); + builder.put(AccordMessageType.INTEROP_READ_RSP, Verb.ACCORD_INTEROP_READ_RSP); + builder.put(AccordMessageType.INTEROP_READ_REPAIR_REQ, Verb.ACCORD_INTEROP_READ_REPAIR_REQ); + builder.put(AccordMessageType.INTEROP_READ_REPAIR_RSP, Verb.ACCORD_INTEROP_READ_REPAIR_RSP); + builder.put(AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ, Verb.ACCORD_INTEROP_COMMIT_REQ); + builder.put(AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ, Verb.ACCORD_INTEROP_COMMIT_REQ); + builder.put(AccordMessageType.INTEROP_APPLY_MINIMAL_REQ, Verb.ACCORD_APPLY_REQ); + builder.put(AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); + mapping = builder.build(); + + for (MessageType type : Iterables.concat(AccordMessageType.values, MessageType.values)) { // Any request can receive a generic failure response if (type == MessageType.FAILURE_RSP) @@ -119,6 +184,15 @@ private static Verb getVerb(MessageType type) return VerbMapping.instance.mapping.get(type); } + private static Verb getVerb(Request request) + { + MessageType type = request.type(); + if (type != null) + return getVerb(request.type()); + + return null; + } + private final Agent agent; private final MessageDelivery messaging; private final AccordEndpointMapper endpointMapper; @@ -138,7 +212,7 @@ public AccordMessageSink(Agent agent, AccordConfigurationService endpointMapper) @Override public void send(Node.Id to, Request request) { - Verb verb = getVerb(request.type()); + Verb verb = getVerb(request); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); @@ -149,7 +223,7 @@ public void send(Node.Id to, Request request) @Override public void send(Node.Id to, Request request, AgentExecutor executor, Callback callback) { - Verb verb = getVerb(request.type()); + Verb verb = getVerb(request); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); @@ -162,6 +236,8 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply { Message replyTo = (Message) replyContext; Message replyMsg = replyTo.responseWith(reply); + if (!reply.isFinal()) + replyMsg = replyMsg.withFlag(MessageFlag.NOT_FINAL); checkReplyType(reply, replyTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index bff25a9c05f8..8630377a5275 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -60,10 +60,10 @@ import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; -import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; @@ -197,7 +197,7 @@ public static long txn(PartialTxn txn) size += seekables(txn.keys()); size += ((TxnRead) txn.read()).estimatedSizeOnHeap(); if (txn.update() != null) - size += ((TxnUpdate) txn.update()).estimatedSizeOnHeap(); + size += ((AccordUpdate) txn.update()).estimatedSizeOnHeap(); if (txn.query() != null) size += ((TxnQuery) txn.query()).estimatedSizeOnHeap(); return size; @@ -250,7 +250,7 @@ public static long writes(Writes writes) public static long results(Result result) { - return ((TxnData) result).estimatedSizeOnHeap(); + return ((TxnResult) result).estimatedSizeOnHeap(); } private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); @@ -331,7 +331,7 @@ public static long command(Command command) size += sizeNullable(command.accepted(), AccordObjectSizes::timestamp); size += sizeNullable(command.writes(), AccordObjectSizes::writes); - if (command.result() instanceof TxnData) + if (command.result() instanceof TxnResult) size += sizeNullable(command.result(), AccordObjectSizes::results); if (!(command instanceof Command.Committed)) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 516cdead1294..6009e52c3aa9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -18,12 +18,13 @@ package org.apache.cassandra.service.accord; -import java.util.Collection; import java.util.Objects; import com.google.common.annotations.VisibleForTesting; import accord.local.Command; +import accord.local.Command.TransientListener; +import accord.local.Listeners; import accord.local.SafeCommand; import accord.primitives.TxnId; @@ -138,7 +139,7 @@ public boolean removeListener(Command.TransientListener listener) } @Override - public Collection transientListeners() + public Listeners transientListeners() { checkNotInvalidated(); return global.listeners(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index e9bfc0d15d76..4fb9bc203cfd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -21,8 +21,11 @@ import java.util.Map; import java.util.NavigableMap; import java.util.function.BiFunction; +import java.util.function.Predicate; import javax.annotation.Nullable; +import com.google.common.base.Predicates; + import accord.api.Agent; import accord.api.DataStore; import accord.api.Key; @@ -156,7 +159,7 @@ public long latestEpoch() @Override public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) { - Timestamp maxConflict = mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, null); + Timestamp maxConflict = mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, Predicates.isNull()); return Timestamp.nonNullOrMax(maxConflict, commandStore.commandsForRanges().maxRedundant()); } @@ -198,15 +201,15 @@ public void erase(SafeCommand safeCommand) { } - private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { - accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminalValue); - if (accumulate.equals(terminalValue)) + accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminate); + if (terminate.test(accumulate)) return accumulate; - return mapReduceForKey(keysOrRanges, slice, map, accumulate, terminalValue); + return mapReduceForKey(keysOrRanges, slice, map, accumulate, terminate); } - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, O terminalValue) + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { switch (keysOrRanges.domain()) { @@ -221,7 +224,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio if (!slice.contains(key)) continue; SafeCommandsForKey forKey = commandsForKey(key); accumulate = map.apply(forKey.current(), accumulate); - if (accumulate.equals(terminalValue)) + if (terminate.test((accumulate))) return accumulate; } } @@ -239,7 +242,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio if (!sliced.contains(key)) continue; SafeCommandsForKey forKey = commandsForKey(key); accumulate = map.apply(forKey.current(), accumulate); - if (accumulate.equals(terminalValue)) + if (terminate.test(accumulate)) return accumulate; } } @@ -251,6 +254,12 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio @Override public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) { + Predicate terminate = Predicates.equalTo(terminalValue); + return mapReduceWithTerminate(keysOrRanges, slice, testKind, testTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, accumulate, terminate); + } + + @Override + public T mapReduceWithTerminate(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, Predicate terminate) { accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { CommandTimeseries timeseries; switch (testTimestamp) @@ -276,8 +285,8 @@ public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind test case MAY_EXECUTE_BEFORE: remapTestTimestamp = CommandTimeseries.TestTimestamp.BEFORE; } - return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminalValue); - }, accumulate, terminalValue); + return timeseries.mapReduceWithTerminate(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminate); + }, accumulate, terminate); return accumulate; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java index 43dc7c84f59d..557941afd03d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java @@ -27,6 +27,7 @@ import org.apache.cassandra.db.ArrayClustering; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringPrefix; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.CollectionType; import org.apache.cassandra.db.marshal.ListType; @@ -46,6 +47,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.NullableSerializer; import static org.apache.cassandra.db.TypeSizes.sizeof; import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; @@ -127,6 +129,8 @@ public long serializedSize(PartitionUpdate upd, int version) } }; + public static final IVersionedSerializer nullablePartitionUpdateSerializer = NullableSerializer.wrap(partitionUpdateSerializer); + public static final IVersionedSerializer columnMetadataSerializer = new IVersionedSerializer() { @Override @@ -246,4 +250,25 @@ private long computeSerializedSize(Clustering clustering) return size; } }; + + public static final IVersionedSerializer consistencyLevelSerializer = new IVersionedSerializer() + { + @Override + public void serialize(ConsistencyLevel t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t.code); + } + + @Override + public ConsistencyLevel deserialize(DataInputPlus in, int version) throws IOException + { + return ConsistencyLevel.fromCode(in.readByte()); + } + + @Override + public long serializedSize(ConsistencyLevel t, int version) + { + return 1; + } + }; } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 5de4daa233a8..fd115f8943bb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -24,14 +24,17 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nonnull; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.BarrierType; import accord.api.Result; import accord.config.LocalConfig; +import accord.coordinate.CoordinationFailed; import accord.coordinate.Preempted; import accord.coordinate.Timeout; import accord.impl.AbstractConfigurationService; @@ -39,12 +42,16 @@ import accord.impl.SizeOfIntersectionSorter; import accord.local.DurableBefore; import accord.local.Node; +import accord.local.Node.Id; import accord.local.NodeTimeService; import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; import accord.messages.LocalMessage; import accord.messages.Request; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.Txn; +import accord.primitives.Txn.Kind; import accord.primitives.TxnId; import accord.topology.TopologyManager; import accord.utils.DefaultRandom; @@ -59,7 +66,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.exceptions.ExceptionCode; import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.metrics.AccordClientRequestMetrics; @@ -75,29 +84,38 @@ import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; -import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.interop.AccordInteropApply; +import org.apache.cassandra.service.accord.interop.AccordInteropExecution; +import org.apache.cassandra.service.accord.interop.AccordInteropPersist; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.messages.SimpleReply.Ok; +import static accord.utils.Invariants.checkState; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; +import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; +import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; import static org.apache.cassandra.utils.Clock.Global.nanoTime; public class AccordService implements IAccordService, Shutdownable { private static final Logger logger = LoggerFactory.getLogger(AccordService.class); - public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); - public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); private static final Future BOOTSTRAP_SUCCESS = ImmediateFuture.success(null); private final Node node; @@ -119,7 +137,13 @@ public IVerbHandler verbHandler() } @Override - public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) + public long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + { + throw new UnsupportedOperationException("No accord barriers should be executed when accord_transactions_enabled = false in cassandra.yaml"); + } + + @Override + public @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, @Nonnull Dispatcher.RequestTime requestTime) { throw new UnsupportedOperationException("No accord transaction should be executed when accord.enabled = false in cassandra.yaml"); } @@ -181,6 +205,9 @@ public Pair, DurableBefore> getRedundantBefor { return Pair.create(new Int2ObjectHashMap<>(), DurableBefore.EMPTY); } + + @Override + public void ensureKeyspaceIsAccordManaged(String keyspace) {} }; private static volatile Node.Id localId = null; @@ -257,6 +284,9 @@ private AccordService() new AccordTopologySorter.Supplier(configService, DatabaseDescriptor.getNodeProximity())), SimpleProgressLog::new, AccordCommandStores.factory(journal), + new AccordInteropExecution.Factory(agent, configService), + AccordInteropPersist.FACTORY, + AccordInteropApply.FACTORY, configuration); this.nodeShutdown = toShutdownable(node); this.verbHandler = new AccordVerbHandler<>(node, configService, journal); @@ -276,35 +306,136 @@ public IVerbHandler verbHandler() return verbHandler; } + @Override + public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + { + AccordClientRequestMetrics metrics = isForWrite ? accordWriteMetrics : accordReadMetrics; + TxnId txnId = null; + try + { + logger.debug("Starting barrier key: {} epoch: {} barrierType: {} isForWrite {}", keysOrRanges, epoch, barrierType, isForWrite); + txnId = node.nextTxnId(Kind.SyncPoint, keysOrRanges.domain()); + AsyncResult asyncResult = node.barrier(keysOrRanges, epoch, barrierType); + long deadlineNanos = requestTime.startedAtNanos() + timeoutNanos; + Timestamp barrierExecuteAt = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); + logger.debug("Completed in {}ms barrier key: {} epoch: {} barrierType: {} isForWrite {}", + NANOSECONDS.toMillis(nanoTime() - requestTime.startedAtNanos()), + keysOrRanges, epoch, barrierType, isForWrite); + return barrierExecuteAt.epoch(); + } + catch (ExecutionException e) + { + Throwable cause = e.getCause(); + if (cause instanceof Timeout) + { + metrics.timeouts.mark(); + throw newBarrierTimeout(txnId, barrierType.global); + } + if (cause instanceof Preempted) + { + //TODO need to improve + // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. + // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match + throw newBarrierPreempted(txnId, barrierType.global); + } + metrics.failures.mark(); + throw new RuntimeException(cause); + } + catch (InterruptedException e) + { + metrics.failures.mark(); + throw new UncheckedInterruptedException(e); + } + catch (TimeoutException e) + { + metrics.timeouts.mark(); + throw newBarrierTimeout(txnId, barrierType.global); + } + finally + { + // TODO Should barriers have a dedicated latency metric? Should it be a read/write metric? + // What about counts for timeouts/failures/preempts? + metrics.addNano(nanoTime() - requestTime.startedAtNanos()); + } + } + + private static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) + { + return new ReadTimeoutException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); + } + + private static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean global) + { + return new ReadPreemptedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); + } + + @Override + public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + { + // Since we could end up having the barrier transaction or the transaction it listens to invalidated + CoordinationFailed existingFailures = null; + Long success = null; + long backoffMillis = 0; + for (int attempt = 0; attempt < DatabaseDescriptor.getAccordBarrierRetryAttempts(); attempt++) + { + try + { + Thread.sleep(backoffMillis); + } + catch (InterruptedException e) + { + if (existingFailures != null) + e.addSuppressed(existingFailures); + throw e; + } + backoffMillis = backoffMillis == 0 ? DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis() : Math.min(backoffMillis * 2, DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); + try + { + success = AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite); + break; + } + catch (CoordinationFailed newFailures) + { + existingFailures = Throwables.merge(existingFailures, newFailures); + } + } + if (success == null) + { + checkState(existingFailures != null, "Didn't have success, but also didn't have failures"); + throw existingFailures; + } + return success; + } + @Override public long currentEpoch() { return configService.currentEpoch(); } - + @Override public TopologyManager topology() { return node.topology(); } - + /** * Consistency level is just echoed back in timeouts, in the future it may be used for interoperability * with non-Accord operations. */ @Override - public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) + public @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { - AccordClientRequestMetrics metrics = txn.isWrite() ? writeMetrics : readMetrics; + AccordClientRequestMetrics metrics = txn.isWrite() ? accordWriteMetrics : accordReadMetrics; TxnId txnId = null; - final long startNanos = nanoTime(); try { metrics.keySize.update(txn.keys().size()); txnId = node.nextTxnId(txn.kind(), txn.keys().domain()); + long deadlineNanos = requestTime.startedAtNanos() + DatabaseDescriptor.getTransactionTimeout(NANOSECONDS); AsyncResult asyncResult = node.coordinate(txnId, txn); - Result result = AsyncChains.getBlocking(asyncResult, DatabaseDescriptor.getTransactionTimeout(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); - return (TxnData) result; + Result result = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); + return (TxnResult) result; } catch (ExecutionException e) { @@ -312,14 +443,14 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) if (cause instanceof Timeout) { metrics.timeouts.mark(); - throw throwTimeout(txnId, txn, consistencyLevel); + throw newTimeout(txnId, txn, consistencyLevel); } if (cause instanceof Preempted) { //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match - throw throwPreempted(txnId, txn, consistencyLevel); + throw newPreempted(txnId, txn, consistencyLevel); } metrics.failures.mark(); throw new RuntimeException(cause); @@ -332,11 +463,11 @@ public TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel) catch (TimeoutException e) { metrics.timeouts.mark(); - throw throwTimeout(txnId, txn, consistencyLevel); + throw newTimeout(txnId, txn, consistencyLevel); } finally { - metrics.addNano(nanoTime() - startNanos); + metrics.addNano(nanoTime() - requestTime.startedAtNanos()); } } @@ -371,13 +502,13 @@ public void onFailure(Throwable error) }); } - private static RuntimeException throwTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) + private static RequestTimeoutException newTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) { throw txn.isWrite() ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) : new ReadTimeoutException(consistencyLevel, 0, 0, false, txnId.toString()); } - private static RuntimeException throwPreempted(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) + private static RuntimeException newPreempted(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) { throw txn.isWrite() ? new WritePreemptedException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) : new ReadPreemptedException(consistencyLevel, 0, 0, false, txnId.toString()); @@ -438,6 +569,11 @@ public AccordScheduler scheduler() return scheduler; } + public Id nodeId() + { + return node.id(); + } + @VisibleForTesting public Node node() { @@ -523,6 +659,22 @@ public boolean isAccordManagedKeyspace(String keyspace) return ClusterMetadata.current().accordKeyspaces.contains(keyspace); } + @Override + public void ensureKeyspaceIsAccordManaged(String keyspace) + { + if (isAccordManagedKeyspace(keyspace)) + return; + ClusterMetadataService.instance().commit(new AddAccordKeyspace(keyspace), + metadata -> null, + (code, message) -> { + Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, + "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); + return null; + }); + // we need to avoid creating a txnId in an epoch when no one has any ranges + FBUtilities.waitOnFuture(AccordService.instance().epochReady(ClusterMetadata.current().epoch)); + } + @Override public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index 88a0d18b7fc6..d8b757941ad9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -37,13 +37,11 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.EndpointsForRange; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.ownership.DataPlacement; @@ -120,17 +118,17 @@ public static List createShards(KeyspaceMetadata keyspace, DataPlacements return shards; } - public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, Predicate keyspacePredicate) + public static Topology createAccordTopology(ClusterMetadata cm, Predicate keyspacePredicate) { List shards = new ArrayList<>(); - for (KeyspaceMetadata keyspace : schema.getKeyspaces()) + for (KeyspaceMetadata keyspace : cm.schema.getKeyspaces()) { if (!keyspacePredicate.test(keyspace.name)) continue; - shards.addAll(createShards(keyspace, placements, directory)); + shards.addAll(createShards(keyspace, cm.placements, cm.directory)); } shards.sort((a, b) -> a.range.compare(b.range)); - return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); + return new Topology(cm.epoch.getEpoch(), shards.toArray(new Shard[0])); } public static EndpointMapping directoryToMapping(EndpointMapping mapping, long epoch, Directory directory) @@ -146,11 +144,6 @@ public static EndpointMapping directoryToMapping(EndpointMapping mapping, long e return builder.build(); } - public static Topology createAccordTopology(ClusterMetadata metadata, Predicate keyspacePredicate) - { - return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, keyspacePredicate); - } - public static Topology createAccordTopology(ClusterMetadata metadata) { return createAccordTopology(metadata, metadata.accordKeyspaces::contains); diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 5ca68d10fccf..5610ebd9c2c8 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -18,30 +18,74 @@ package org.apache.cassandra.service.accord; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import com.google.common.collect.ImmutableSet; + +import accord.api.BarrierType; import accord.local.DurableBefore; +import accord.local.Node.Id; import accord.local.RedundantBefore; import accord.messages.Request; +import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.Txn; import accord.topology.TopologyManager; import org.agrona.collections.Int2ObjectHashMap; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.service.accord.api.AccordRoutableKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.AccordScheduler; -import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; public interface IAccordService { + Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); + Set SUPPORTED_READ_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL); + IVerbHandler verbHandler(); - TxnData coordinate(Txn txn, ConsistencyLevel consistencyLevel); + default long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + { + throw new UnsupportedOperationException(); + } + + long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); + + default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List> ranges) + { + String ks = cfs.keyspace.getName(); + Ranges accordRanges = Ranges.of(ranges + .stream() + .map(r -> new TokenRange(new TokenKey(ks, r.left), new TokenKey(ks, r.right))) + .collect(Collectors.toList()) + .toArray(new accord.primitives.Range[0])); + try + { + barrierWithRetries(accordRanges, Epoch.FIRST.getEpoch(), BarrierType.global_async, true); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + } + + @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime); long currentEpoch(); @@ -74,4 +118,26 @@ public interface IAccordService * Fetch the redundnant befores for every command store */ Pair, DurableBefore> getRedundantBeforesAndDurableBefore(); + + default Id nodeId() { throw new UnsupportedOperationException(); } + + default void maybeConvertKeyspacesToAccord(Txn txn) + { + Set allKeyspaces = new HashSet<>(); + txn.keys().forEach(key -> allKeyspaces.add(((AccordRoutableKey) key).keyspace())); + + for (String keyspace : allKeyspaces) + { + + ensureKeyspaceIsAccordManaged(keyspace); + } + + for (String keyspace : allKeyspaces) + { + if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) + throw new IllegalStateException(keyspace + " is not an accord managed keyspace"); + } + } + + void ensureKeyspaceIsAccordManaged(String keyspace); } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index e01587e61b3e..f6a57247670e 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -23,6 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nonnull; + import accord.api.Agent; import accord.api.EventsListener; import accord.api.Result; @@ -32,16 +34,20 @@ import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; +import accord.primitives.Txn.Kind; import accord.primitives.TxnId; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.metrics.AccordMetrics; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.JVMStabilityInspector; +import static accord.primitives.Routable.Domain.Key; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; +import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.maybeSaveAccordKeyMigrationLocally; // TODO (expected): merge with AccordService public class AccordAgent implements Agent @@ -78,6 +84,16 @@ public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throw AccordService.instance().scheduler().once(retry, retryBootstrapDelayMicros, MICROSECONDS); } + @Override + public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull Timestamp executeAt) + { + if (keysOrRanges.domain() == Key) + { + PartitionKey key = (PartitionKey)keysOrRanges.get(0); + maybeSaveAccordKeyMigrationLocally(key, Epoch.create(executeAt.epoch())); + } + } + @Override public void onUncaughtException(Throwable t) { @@ -99,9 +115,9 @@ public boolean isExpired(TxnId initiated, long now) } @Override - public Txn emptyTxn(Txn.Kind kind, Seekables keysOrRanges) + public Txn emptyTxn(Kind kind, Seekables seekables) { - return new Txn.InMemory(kind, keysOrRanges, TxnRead.EMPTY, TxnQuery.ALL, null); + return new Txn.InMemory(kind, seekables, TxnRead.EMPTY, TxnQuery.EMPTY, null); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index 2c4e58ee8302..a8dac58234dd 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -33,6 +33,7 @@ import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.db.partitions.Partition; +import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -70,6 +71,11 @@ public static PartitionKey of(Key key) return (PartitionKey) key; } + public static PartitionKey of(PartitionUpdate update) + { + return new PartitionKey(update.metadata().keyspace, update.metadata().id, update.partitionKey()); + } + public static PartitionKey of(Partition partition) { return new PartitionKey(partition.metadata().keyspace, partition.metadata().id, partition.partitionKey()); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java new file mode 100644 index 000000000000..22821bab392a --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.util.BitSet; +import javax.annotation.Nullable; + +import accord.api.Result; +import accord.local.Command; +import accord.local.Node.Id; +import accord.local.PreLoadContext; +import accord.local.SafeCommand; +import accord.local.SafeCommandStore; +import accord.local.Status; +import accord.messages.Apply; +import accord.messages.MessageType; +import accord.primitives.Deps; +import accord.primitives.Keys; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Route; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.topology.Topologies; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType; +import org.apache.cassandra.service.accord.serializers.ApplySerializers.ApplySerializer; +import org.apache.cassandra.service.accord.txn.AccordUpdate; + +import static accord.utils.Invariants.checkState; +import static accord.utils.MapReduceConsume.forEach; +import static com.google.common.base.Preconditions.checkArgument; + +/** + * Apply that waits until the transaction is actually applied before sending a response + * // TODO (desired): At this point there are a plethora of do X to Command, then wait until state Y before maybe doing Z and returning a response, potentially returning insufficient along the way + * // and these all are a bit copy pasta in terms of managing things like waiting on, obsoletion, cancellation/listeners, insufficient etc. and it would be less fragile + * // in the long run to not duplicate these kind of difficult to get right mechanism and have a single pluggable framework to request each specific behavior + */ +public class AccordInteropApply extends Apply implements Command.TransientListener +{ + public static final Apply.Factory FACTORY = new Apply.Factory() + { + @Override + public Apply create(Kind kind, Id to, Topologies participates, Topologies executes, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) + { + checkArgument(kind != Kind.Maximal, "Shouldn't need to send a maximal commit with interop support"); + ConsistencyLevel commitCL = txn.update() instanceof AccordUpdate ? ((AccordUpdate) txn.update()).cassandraCommitCL() : null; + // Any asynchronous apply option should use the regular Apply that doesn't wait for writes to complete + if (commitCL == null || commitCL == ConsistencyLevel.ANY) + return Apply.FACTORY.create(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); + return new AccordInteropApply(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); + } + }; + + public static final IVersionedSerializer serializer = new ApplySerializer() + { + @Override + protected AccordInteropApply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result) + { + return new AccordInteropApply(kind, txnId, scope, waitForEpoch, keys, executeAt, deps, txn, writes, result); + } + }; + + transient BitSet waitingOn; + transient int waitingOnCount; + + private AccordInteropApply(Kind kind, TxnId txnId, PartialRoute route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, Writes writes, Result result) + { + super(kind, txnId, route, waitForEpoch, keys, executeAt, deps, txn, writes, result); + } + + private AccordInteropApply(Kind kind, Id to, Topologies participates, Topologies executes, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) + { + super(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); + } + + @Override + public void process() + { + waitingOn = new BitSet(); + super.process(); + } + + + @Override + public ApplyReply apply(SafeCommandStore safeStore) + { + ApplyReply reply = super.apply(safeStore); + checkState(reply == ApplyReply.Redundant || reply == ApplyReply.Applied || reply == ApplyReply.Insufficient, "Unexpected ApplyReply"); + + // Hasn't necessarily finished applying yet so need to check and maybe add a listener + // Redundant means we are competing with a recovery coordinator which is fine + // we don't need to return an error we can wait for the Apply + // Insufficient means it is safe to install the listener and wait for Apply to happen + // once the coordinator sends a maximal commit + // Applied doesn't actually mean the command is in the Applied state so we still need to check and maybe install + // the listener + SafeCommand safeCommand = safeStore.get(txnId, executeAt, scope); + Command current = safeCommand.current(); + // Don't actually think it is possible for this to reach applied while we are stll running, but just to be safe + // check anyways + Status status = current.status(); + switch (status) + { + default: throw new AssertionError(); + case NotDefined: + case PreAccepted: + case Accepted: + case AcceptedInvalidate: + case PreCommitted: + case Committed: + case PreApplied: + case ReadyToExecute: + synchronized (this) + { + waitingOn.set(safeStore.commandStore().id()); + ++waitingOnCount; + } + safeCommand.addListener(this); + break; + + case Applied: + case Invalidated: + case Truncated: + } + + return reply; + } + + private synchronized void ack() + { + // wait for -1 to ensure the setup phase has also completed. Setup calls ack in its callback + // and prevents races where we respond before dispatching all the required reads (if the reads are + // completing faster than the reads can be setup on all required shards) + if (-1 == --waitingOnCount) + { + node.reply(replyTo, replyContext, ApplyReply.Applied, null); + } + } + + @Override + public ApplyReply reduce(ApplyReply r1, ApplyReply r2) + { + return r1 == null || r2 == null + ? r1 == null ? r2 : r1 + : r1.compareTo(r2) >= 0 ? r1 : r2; + } + + @Override + public void accept(ApplyReply reply, Throwable failure) + { + if (reply == ApplyReply.Insufficient) + { + // Respond with insufficient which should make the coordinator send us the commit + // we need to respond + node.reply(replyTo, replyContext, reply, failure); + } + else if (failure != null) + { + node.reply(replyTo, replyContext, null, failure); + node.agent().onUncaughtException(failure); + cancel(); + } + + // Unless failed always ack to indicate setup has completed otherwise the counter never gets to -1 + if (failure == null) + ack(); + } + + private void cancel() + { + node.commandStores().mapReduceConsume(this, waitingOn.stream(), forEach(safeStore -> { + SafeCommand safeCommand = safeStore.ifInitialised(txnId); + if (safeCommand != null) + safeCommand.removeListener(this); + }, node.agent())); + } + + @Override + public TxnId primaryTxnId() + { + return txnId; + } + + @Override + public Seekables keys() + { + if (txn == null) return Keys.EMPTY; + return txn.keys(); + } + + @Override + public MessageType type() + { + switch (kind) + { + case Minimal: return AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; + case Maximal: return AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ; + default: throw new IllegalStateException(); + } + } + + @Override + public String toString() + { + return "AccordInteropApply{" + + "txnId:" + txnId + + ", deps:" + deps + + ", executeAt:" + executeAt + + ", writes:" + writes + + ", result:" + result + + '}'; + } + + @Override + public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) + { + Command command = safeCommand.current(); + + switch (command.status()) + { + default: throw new AssertionError(); + case NotDefined: + case PreAccepted: + case Accepted: + case AcceptedInvalidate: + case PreCommitted: + case Committed: + case PreApplied: + case ReadyToExecute: + return; + + case Applied: + case Invalidated: + case Truncated: + } + + if (safeCommand.removeListener(this)) + ack(); + } + + @Override + public PreLoadContext listenerPreLoadContext(TxnId caller) + { + return PreLoadContext.contextFor(txnId); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java new file mode 100644 index 000000000000..e3051bf64455 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import accord.local.Node; +import accord.messages.Commit; +import accord.messages.MessageType; +import accord.messages.ReadData; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.topology.Topology; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType; +import org.apache.cassandra.service.accord.serializers.CommitSerializers.CommitSerializer; + +public class AccordInteropCommit extends Commit +{ + public static final IVersionedSerializer serializer = new CommitSerializer(AccordInteropRead.class, AccordInteropRead.requestSerializer) + { + @Override + protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + { + return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, executeAt, partialTxn, partialDeps, fullRoute, read); + } + }; + + public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) + { + super(kind, txnId, scope, waitForEpoch, executeAt, partialTxn, partialDeps, fullRoute, readData); + } + + public AccordInteropCommit(Kind kind, Node.Id to, Topology coordinateTopology, Topologies topologies, TxnId txnId, Txn txn, FullRoute route, Timestamp executeAt, Deps deps, AccordInteropRead read) + { + super(kind, to, coordinateTopology, topologies, txnId, txn, route, executeAt, deps, (t, u, p) -> read); + } + + @Override + public MessageType type() + { + switch (kind) + { + case Minimal: return AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; + case Maximal: return AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; + default: throw new IllegalStateException(); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java new file mode 100644 index 000000000000..71757ed106fa --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -0,0 +1,412 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.Agent; +import accord.api.Data; +import accord.api.Result; +import accord.coordinate.Execute; +import accord.coordinate.Persist; +import accord.coordinate.TxnExecute; +import accord.local.AgentExecutor; +import accord.local.CommandStore; +import accord.local.Node; +import accord.local.Node.Id; +import accord.messages.Commit; +import accord.messages.Commit.Kind; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.Participants; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Shard; +import accord.topology.Topologies; +import accord.topology.Topology; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadResponse; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.SinglePartitionReadCommand.Group; +import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.partitions.PartitionIterator; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.metrics.AccordClientRequestMetrics; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.accord.AccordEndpointMapper; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.interop.AccordInteropReadCallback.MaximalCommitSender; +import org.apache.cassandra.service.accord.txn.AccordUpdate; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.UnrecoverableRepairUpdate; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.reads.ReadCoordinator; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.transport.Dispatcher; + +import static accord.utils.Invariants.checkArgument; +import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; +import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; + +/* + * The core interoperability problem between Accord and C* writes (regular, and read repair) + * is that when the writes don't go through Accord then Accord can read data that is not yet committed + * because Accord replicas can lag behind and multiple coordinators can be attempting to compute the result of a + * transaction and they can compute different results depending on what they consider to be the inputs to the Accord + * transaction. + * + * We generally solve this by forcing non-Accord writes through Accord as well as by having Accord perform read repair + * on its inputs. + * + */ +public class AccordInteropExecution implements Execute, ReadCoordinator, MaximalCommitSender +{ + private static final Logger logger = LoggerFactory.getLogger(AccordInteropExecution.class); + + private static class InteropExecutor implements AgentExecutor + { + private final AccordAgent agent; + + public InteropExecutor(AccordAgent agent) + { + this.agent = agent; + } + + @Override + public Agent agent() + { + return agent; + } + + @Override + public AsyncChain submit(Callable task) + { + try + { + return AsyncChains.success(task.call()); + } + catch (Throwable e) + { + return AsyncChains.failure(e); + } + } + } + + public static class Factory implements Execute.Factory + { + private final InteropExecutor executor; + private final AccordEndpointMapper endpointMapper; + + public Factory(AccordAgent agent, AccordEndpointMapper endpointMapper) + { + this.executor = new InteropExecutor(agent); + this.endpointMapper = endpointMapper; + } + + @Override + public Execute create(Node node, TxnId txnId, Txn txn, FullRoute route, Participants readScope, Timestamp executeAt, Deps deps, BiConsumer callback) + { + // Unrecoverable repair always needs to be run by AccordInteropExecution + AccordUpdate.Kind updateKind = AccordUpdate.kind(txn.update()); + ConsistencyLevel consistencyLevel = txn.read() instanceof TxnRead ? ((TxnRead) txn.read()).cassandraConsistencyLevel() : null; + if (updateKind != AccordUpdate.Kind.UNRECOVERABLE_REPAIR && (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE || txn.read().keys().isEmpty())) + return TxnExecute.FACTORY.create(node, txnId, txn, route, readScope, executeAt, deps, callback); + return new AccordInteropExecution(node, txnId, txn, updateKind, route, readScope, executeAt, deps, callback, executor, consistencyLevel, endpointMapper); + } + } + + private final Node node; + private final TxnId txnId; + private final Txn txn; + private final FullRoute route; + private final Participants readScope; + private final Timestamp executeAt; + private final Deps deps; + private final BiConsumer callback; + private final AgentExecutor executor; + private final ConsistencyLevel consistencyLevel; + private final AccordEndpointMapper endpointMapper; + + private final Topologies executes; + private final Topologies allTopologies; + private final Topology executeTopology; + private final Topology coordinateTopology; + + private final AtomicInteger readsCurrentlyUnderConstruction; + + private final Set contacted; + private final AccordUpdate.Kind updateKind; + + public AccordInteropExecution(Node node, TxnId txnId, Txn txn, AccordUpdate.Kind updateKind, FullRoute route, Participants readScope, Timestamp executeAt, Deps deps, BiConsumer callback, + AgentExecutor executor, ConsistencyLevel consistencyLevel, AccordEndpointMapper endpointMapper) + { + checkArgument(!txn.read().keys().isEmpty() || updateKind == AccordUpdate.Kind.UNRECOVERABLE_REPAIR); + this.node = node; + this.txnId = txnId; + this.txn = txn; + this.route = route; + this.readScope = readScope; + this.executeAt = executeAt; + this.deps = deps; + this.callback = callback; + this.executor = executor; + + checkArgument(updateKind == AccordUpdate.Kind.UNRECOVERABLE_REPAIR || consistencyLevel == ConsistencyLevel.QUORUM || consistencyLevel == ConsistencyLevel.ALL || consistencyLevel == ConsistencyLevel.SERIAL); + this.consistencyLevel = consistencyLevel; + this.endpointMapper = endpointMapper; + + this.executes = node.topology().forEpoch(route, executeAt.epoch()); + this.allTopologies = txnId.epoch() != executeAt.epoch() + ? node.topology().preciseEpochs(route, txnId.epoch(), executeAt.epoch()) + : executes; + this.executeTopology = executes.forEpoch(executeAt.epoch()); + this.coordinateTopology = allTopologies.forEpoch(txnId.epoch()); + if (consistencyLevel != ConsistencyLevel.ALL) + { + readsCurrentlyUnderConstruction = new AtomicInteger(txn.read().keys().size()); + contacted = Collections.newSetFromMap(new ConcurrentHashMap<>()); + } + else + { + readsCurrentlyUnderConstruction = null; + contacted = null; + } + this.updateKind = updateKind; + } + + @Override + public boolean localReadSupported() + { + return false; + } + + @Override + public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata doNotUse, KeyspaceMetadata keyspace, Token token) + { + AccordRoutingKey.TokenKey key = new AccordRoutingKey.TokenKey(keyspace.name, token); + Shard shard = executeTopology.forKey(key); + Range range = ((TokenRange) shard.range).toKeyspaceRange(); + + Replica[] replicas = new Replica[shard.nodes.size()]; + for (int i=0; i message, InetAddressAndPort to, RequestCallback callback) + { + Node.Id id = endpointMapper.mappedId(to); + SinglePartitionReadCommand command = (SinglePartitionReadCommand) message.payload; + AccordInteropRead read = new AccordInteropRead(id, executes, txnId, readScope, executeAt, command); + AccordInteropCommit commit = new AccordInteropCommit(Commit.Kind.Minimal, id, coordinateTopology, allTopologies, + txnId, txn, route, executeAt, deps, read); + node.send(id, commit, executor, new AccordInteropRead.ReadCallback(id, to, message, callback, this)); + } + + @Override + public void sendReadRepairMutation(Message message, InetAddressAndPort to, RequestCallback callback) + { + Node.Id id = endpointMapper.mappedId(to); + Mutation mutation = message.payload; + AccordInteropReadRepair readRepair = new AccordInteropReadRepair(id, executes, txnId, readScope, executeAt, mutation); + node.send(id, readRepair, executor, new AccordInteropReadRepair.ReadRepairCallback(id, to, message, callback, this)); + } + + private AsyncChain readChains() + { + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); + // TODO (expected): use normal query nano time + Dispatcher.RequestTime requestTime = Dispatcher.RequestTime.forImmediateExecution(); + + TxnRead read = (TxnRead) txn.read(); + List> results = new ArrayList<>(); + Seekables keys = txn.read().keys(); + keys.forEach(key -> { + read.forEachWithKey((PartitionKey) key, fragment -> { + SinglePartitionReadCommand command = (SinglePartitionReadCommand) fragment.command(); + + // This should only rarely occur when coordinators start a transaction in a migrating range + // because they haven't yet updated their cluster metadata. + // It would be harmless to do the read, but we can respond faster skipping it + // and getting the transaction on the correct protocol + TableMigrationState tms = ConsensusTableMigrationState.getTableMigrationState(command.metadata().id); + AccordClientRequestMetrics metrics = txn.kind().isWrite() ? accordWriteMetrics : accordReadMetrics; + if (ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(tms, command.partitionKey())) + { + metrics.migrationSkippedReads.mark(); + results.add(AsyncChains.success(TxnData.emptyPartition(fragment.txnDataName(), command))); + return; + } + + Group group = Group.one(command.withNowInSec(nowInSeconds)); + results.add(AsyncChains.ofCallable(Stage.ACCORD_MIGRATION.executor(), () -> { + TxnData result = new TxnData(); + try (PartitionIterator iterator = StorageProxy.readRegular(group, consistencyLevel, this, requestTime)) + { + if (iterator.hasNext()) + { + try (RowIterator partition = iterator.next()) + { + FilteredPartition filtered = FilteredPartition.create(partition); + if (filtered.hasRows() || command.selectsFullPartition()) + result.put(fragment.txnDataName(), filtered); + } + } + } + return result; + })); + }); + }); + + if (results.isEmpty()) + return AsyncChains.success(new TxnData()); + + if (results.size() == 1) + return results.get(0); + + return AsyncChains.reduce(results, Data::merge); + } + + /* + * Any nodes not contacted for read need to be sent commits + */ + @Override + public void notifyOfInitialContacts(EndpointsForToken fullDataRequests, EndpointsForToken transientRequests, EndpointsForToken digestRequests) + { + if (readsCurrentlyUnderConstruction == null) + return; + + for (int i = 0; i < fullDataRequests.size(); i++) + contacted.add(fullDataRequests.endpoint(i)); + for (int i = 0; i < transientRequests.size(); i++) + contacted.add(transientRequests.endpoint(i)); + for (int i = 0; i < digestRequests.size(); i++) + contacted.add(digestRequests.endpoint(i)); + if (readsCurrentlyUnderConstruction.decrementAndGet() == 0) + sendCommitsToUncontacted(); + } + + private void sendCommitsToUncontacted() + { + for (Node.Id to : executeTopology.nodes()) + if (!contacted.contains(endpointMapper.mappedEndpoint(to))) + node.send(to, new Commit(Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + } + + @Override + public void start() + { + if (coordinateTopology != executeTopology) + { + for (Node.Id to : allTopologies.nodes()) + { + if (!executeTopology.contains(to)) + node.send(to, new Commit(Commit.Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + } + } + AsyncChain result; + if (updateKind == AccordUpdate.Kind.UNRECOVERABLE_REPAIR) + result = executeUnrecoverableRepairUpdate(); + else + result = readChains(); + + CommandStore cs = node.commandStores().select(route.homeKey()); + result.beginAsResult().withExecutor(cs).begin((data, failure) -> { + if (failure == null) + Persist.persist(node, executes, txnId, route, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); + else + callback.accept(null, failure); + }); + } + + private AsyncChain executeUnrecoverableRepairUpdate() + { + return AsyncChains.ofCallable(Stage.ACCORD_MIGRATION.executor(), () -> { + UnrecoverableRepairUpdate repairUpdate = (UnrecoverableRepairUpdate)txn.update(); + // TODO (expected): We should send the read in the same message as the commit. This requires refactor ReadData.Kind so that it doesn't specify the ordinal encoding + // and can be extended similar to MessageType which allows additional types not from Accord to be added + for (Node.Id to : executeTopology.nodes()) + node.send(to, new Commit(Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + repairUpdate.runBRR(AccordInteropExecution.this); + return new TxnData(); + }); + } + + @Override + public boolean isEventuallyConsistent() + { + return false; + } + + @Override + public ReadCommand maybeAllowOutOfRangeReads(ReadCommand readCommand) + { + return readCommand.allowOutOfRangeReads(); + } + + @Override + public Mutation maybeAllowOutOfRangeMutations(Mutation m) + { + return m.allowOutOfRangeMutations(); + } + + // Prrovide request callbacks with a way to send maximal commits on Insufficient responses + @Override + public void sendMaximalCommit(Id to) + { + Commit.commitMaximal(node, to, txn, txnId, executeAt, route, deps, readScope); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java new file mode 100644 index 000000000000..7ef158153661 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.util.function.BiConsumer; + +import accord.api.Result; +import accord.api.Update; +import accord.coordinate.Persist; +import accord.coordinate.TxnPersist; +import accord.coordinate.tracking.AppliedTracker; +import accord.coordinate.tracking.QuorumTracker; +import accord.coordinate.tracking.RequestStatus; +import accord.coordinate.tracking.ResponseTracker; +import accord.local.Node; +import accord.messages.Apply; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.topology.Topologies; +import accord.utils.Invariants; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.service.accord.txn.AccordUpdate; +import org.apache.cassandra.utils.Throwables; + +/** + * Similar to Accord persist, but can wait on a configurable number of responses and sends AccordInteropApply messages + * that only return a response when the Apply has actually occurred. Regular Apply messages only get the transaction + * to PreApplied. + */ +public class AccordInteropPersist extends Persist +{ + public static Persist.Factory FACTORY = new Persist.Factory() + { + @Override + public Persist create(Node node, Topologies topologies, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) + { + Update update = txn.update(); + ConsistencyLevel consistencyLevel = update instanceof AccordUpdate ? ((AccordUpdate) update).cassandraCommitCL() : null; + if (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ANY || writes.isEmpty()) + return TxnPersist.FACTORY.create(node, topologies, txnId, route, txn, executeAt, deps, writes, result); + return new AccordInteropPersist(node, topologies, txnId, route, txn, executeAt, deps, writes, result, consistencyLevel); + } + }; + + private static class CallbackHolder + { + private final ResponseTracker tracker; + private final Result result; + private final BiConsumer clientCallback; + private Throwable failure = null; + + public CallbackHolder(ResponseTracker tracker, Result result, BiConsumer clientCallback) + { + this.tracker = tracker; + this.result = result; + this.clientCallback = clientCallback; + } + + private void handleStatus(RequestStatus status) + { + switch (status) + { + default: throw new IllegalStateException("Unhandled request status " + status); + case Success: + clientCallback.accept(result, null); + return; + case Failed: + clientCallback.accept(null, failure); + return; + case NoChange: + // noop + } + } + + public void recordSuccess(Node.Id node) + { + handleStatus(tracker.recordSuccess(node)); + } + + + public void recordFailure(Node.Id node, Throwable throwable) + { + failure = Throwables.merge(failure, throwable); + handleStatus(tracker.recordFailure(node)); + } + } + + private final ConsistencyLevel consistencyLevel; + private CallbackHolder holder = null; + + public AccordInteropPersist(Node node, Topologies topologies, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, ConsistencyLevel consistencyLevel) + { + super(node, topologies, txnId, route, txn, executeAt, deps, writes, result); + Invariants.checkArgument(consistencyLevel == ConsistencyLevel.QUORUM || consistencyLevel == ConsistencyLevel.ALL || consistencyLevel == ConsistencyLevel.SERIAL || consistencyLevel == ConsistencyLevel.ONE); + this.consistencyLevel = consistencyLevel; + } + + @Override + public void registerClientCallback(Writes writes, Result result, BiConsumer clientCallback) + { + + Invariants.checkState(holder == null); + switch (consistencyLevel) + { + case ONE: // Can safely upgrade ONE to QUORUM/SERIAL to get a synchronous commit + case SERIAL: + case QUORUM: + holder = new CallbackHolder(new QuorumTracker(topologies), result, clientCallback); + break; + case ALL: + holder = new CallbackHolder(new AppliedTracker(topologies), result, clientCallback); + break; + default: + throw new IllegalArgumentException("Unhandled consistency level: " + consistencyLevel); + } + } + + @Override + public void onSuccess(Node.Id from, Apply.ApplyReply reply) + { + super.onSuccess(from, reply); + switch (reply) + { + case Redundant: + case Applied: + holder.recordSuccess(from); + return; + case Insufficient: + // On insufficient Persist will send a commit with the missing information + // which will allow a final response to be returned later that could be successful + return; + default: throw new IllegalArgumentException("Unhandled apply response " + reply); + } + } + + @Override + public void onFailure(Node.Id from, Throwable failure) + { + holder.recordFailure(from, failure); + } + + @Override + public void onCallbackFailure(Node.Id from, Throwable failure) + { + holder.recordFailure(from, failure); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java new file mode 100644 index 000000000000..97ace2556605 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.io.IOException; +import javax.annotation.Nullable; + +import accord.api.Data; +import accord.local.Node; +import accord.local.SafeCommandStore; +import accord.messages.AbstractExecute; +import accord.messages.MessageType; +import accord.primitives.PartialTxn; +import accord.primitives.Participants; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ReadCommandVerbHandler; +import org.apache.cassandra.db.ReadResponse; +import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; +import org.apache.cassandra.service.accord.serializers.ReadDataSerializers.ReadDataSerializer; + +public class AccordInteropRead extends AbstractExecute +{ + public static final IVersionedSerializer requestSerializer = new ReadDataSerializer() + { + @Override + public void serialize(AccordInteropRead read, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(read.txnId, out, version); + KeySerializers.participants.serialize(read.readScope, out, version); + out.writeUnsignedVInt(read.waitForEpoch()); + out.writeUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + SinglePartitionReadCommand.serializer.serialize(read.command, out, version); + } + + @Override + public AccordInteropRead deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Participants readScope = KeySerializers.participants.deserialize(in, version); + long waitForEpoch = in.readUnsignedVInt(); + long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; + SinglePartitionReadCommand command = (SinglePartitionReadCommand) SinglePartitionReadCommand.serializer.deserialize(in, version); + return new AccordInteropRead(txnId, readScope, waitForEpoch, executeAtEpoch, command); + } + + @Override + public long serializedSize(AccordInteropRead read, int version) + { + return CommandSerializers.txnId.serializedSize(read.txnId, version) + + KeySerializers.participants.serializedSize(read.readScope, version) + + TypeSizes.sizeofUnsignedVInt(read.waitForEpoch()) + + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()) + + SinglePartitionReadCommand.serializer.serializedSize(read.command, version); + } + }; + + public static final IVersionedSerializer replySerializer = new ReadDataSerializers.ReplySerializer<>(LocalReadData.serializer); + + private static class LocalReadData implements Data + { + static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(LocalReadData data, DataOutputPlus out, int version) throws IOException + { + ReadResponse.serializer.serialize(data.response, out, version); + } + + @Override + public LocalReadData deserialize(DataInputPlus in, int version) throws IOException + { + return new LocalReadData(ReadResponse.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(LocalReadData data, int version) + { + return ReadResponse.serializer.serializedSize(data.response, version); + } + }; + + final ReadResponse response; + + public LocalReadData(ReadResponse response) + { + this.response = response; + } + + @Override + public String toString() + { + return "LocalReadData{" + response + '}'; + } + + @Override + public Data merge(Data data) + { + throw new IllegalStateException("Should only ever be a single partition"); + } + } + + static class ReadCallback extends AccordInteropReadCallback + { + public ReadCallback(Node.Id id, InetAddressAndPort endpoint, Message message, RequestCallback wrapped, MaximalCommitSender maximalCommitSender) + { + super(id, endpoint, message, wrapped, maximalCommitSender); + } + + @Override + ReadResponse convertResponse(ReadOk ok) + { + return ((LocalReadData) ok.data).response; + } + } + + private final SinglePartitionReadCommand command; + + public AccordInteropRead(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, Timestamp executeAt, SinglePartitionReadCommand command) + { + super(to, topologies, txnId, readScope, executeAt); + this.command = command; + } + + public AccordInteropRead(TxnId txnId, Participants readScope, long executeAtEpoch, long waitForEpoch, SinglePartitionReadCommand command) + { + super(txnId, readScope, executeAtEpoch, waitForEpoch); + this.command = command; + } + + @Override + protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn) + { + return AsyncChains.ofCallable(Stage.READ.executor(), () -> new LocalReadData(ReadCommandVerbHandler.instance.doRead(command, false))); + } + + @Override + protected boolean canExecutePreApplied() + { + return true; + } + + @Override + protected ReadOk constructReadOk(Ranges unavailable, Data data) + { + return new InteropReadOk(unavailable, data); + } + + @Override + public MessageType type() + { + return AccordMessageType.INTEROP_READ_REQ; + } + + @Override + public String toString() + { + return "AccordInteropRead{" + + "txnId=" + txnId + + "command=" + command + + '}'; + } + + private static class InteropReadOk extends ReadOk + { + public InteropReadOk(@Nullable Ranges unavailable, @Nullable Data data) + { + super(unavailable, data); + } + + @Override + public MessageType type() + { + return AccordMessageType.INTEROP_READ_RSP; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java new file mode 100644 index 000000000000..6bf006b3a84f --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import javax.annotation.Nonnull; + +import accord.local.Node; +import accord.messages.Callback; +import accord.messages.ReadData.ReadOk; +import accord.messages.ReadData.ReadReply; +import accord.utils.Invariants; +import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.RequestCallback; + +import static accord.messages.ReadData.ReadNack.NotCommitted; + +public abstract class AccordInteropReadCallback implements Callback +{ + interface MaximalCommitSender + { + void sendMaximalCommit(@Nonnull Node.Id to); + } + + private final Node.Id id; + private final InetAddressAndPort endpoint; + private final Message message; + private final RequestCallback wrapped; + private final MaximalCommitSender maximalCommitSender; + + public AccordInteropReadCallback(Node.Id id, InetAddressAndPort endpoint, Message message, RequestCallback wrapped, MaximalCommitSender maximalCommitSender) + { + this.id = id; + this.message = message; + this.endpoint = endpoint; + this.wrapped = wrapped; + this.maximalCommitSender = maximalCommitSender; + } + + abstract T convertResponse(ReadOk ok); + + public void onSuccess(Node.Id from, ReadReply reply) + { + Invariants.checkArgument(from.equals(id)); + if (reply.isOk()) + { + wrapped.onResponse(message.responseWith(convertResponse((ReadOk) reply)).withFrom(endpoint)); + } + else if (reply == NotCommitted) + { + // Might still send a response if we send a maximal commit. Accord would tryAlternative and send + // both the commit and an additional repair, but Cassandra doesn't have tryAlternative unless we add + // it and instead opts to trigger additional repair messages based on time. + maximalCommitSender.sendMaximalCommit(id); + } + else + { + wrapped.onFailure(endpoint, RequestFailure.UNKNOWN); + } + } + + public void onFailure(Node.Id from, Throwable failure) + { + wrapped.onFailure(endpoint, RequestFailure.UNKNOWN); + } + + public void onCallbackFailure(Node.Id from, Throwable failure) + { + wrapped.onFailure(endpoint, RequestFailure.UNKNOWN); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java new file mode 100644 index 000000000000..c16c99e33ed5 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.io.IOException; +import javax.annotation.Nullable; + +import accord.api.Data; +import accord.local.Node; +import accord.local.SafeCommandStore; +import accord.messages.AbstractExecute; +import accord.messages.MessageType; +import accord.primitives.PartialTxn; +import accord.primitives.Participants; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.ReadRepairVerbHandler; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.NoPayload; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; +import org.apache.cassandra.service.accord.serializers.ReadDataSerializers.ReadDataSerializer; + +/** + * Applies a read repair mutation from inside the context of a CommandStore via AbstractExecute + * ensuring that the contents of the read repair consist of data that isn't from transactions that + * haven't been committed yet at this command store. + */ +public class AccordInteropReadRepair extends AbstractExecute +{ + public static final IVersionedSerializer requestSerializer = new ReadDataSerializer() + { + @Override + public void serialize(AccordInteropReadRepair repair, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(repair.txnId, out, version); + KeySerializers.participants.serialize(repair.readScope, out, version); + out.writeUnsignedVInt(repair.waitForEpoch()); + out.writeUnsignedVInt(repair.executeAtEpoch - repair.waitForEpoch()); + Mutation.serializer.serialize(repair.mutation, out, version); + } + + @Override + public AccordInteropReadRepair deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Participants readScope = KeySerializers.participants.deserialize(in, version); + long waitForEpoch = in.readUnsignedVInt(); + long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; + Mutation mutation = Mutation.serializer.deserialize(in, version); + return new AccordInteropReadRepair(txnId, readScope, waitForEpoch, executeAtEpoch, mutation); + } + + @Override + public long serializedSize(AccordInteropReadRepair repair, int version) + { + return CommandSerializers.txnId.serializedSize(repair.txnId, version) + + KeySerializers.participants.serializedSize(repair.readScope, version) + + TypeSizes.sizeofUnsignedVInt(repair.waitForEpoch()) + + TypeSizes.sizeofUnsignedVInt(repair.executeAtEpoch - repair.waitForEpoch()) + + Mutation.serializer.serializedSize(repair.mutation, version); + } + }; + + static class ReadRepairCallback extends AccordInteropReadCallback + { + public ReadRepairCallback(Node.Id id, InetAddressAndPort endpoint, Message message, RequestCallback wrapped, MaximalCommitSender maximalCommitSender) + { + super(id, endpoint, message, wrapped, maximalCommitSender); + } + + @Override + Object convertResponse(ReadOk ok) + { + return NoPayload.noPayload; + } + } + + private final Mutation mutation; + + private static final IVersionedSerializer noop_data_serializer = new IVersionedSerializer() + { + @Override + public void serialize(Data t, DataOutputPlus out, int version) throws IOException {} + @Override + public Data deserialize(DataInputPlus in, int version) throws IOException { return Data.NOOP_DATA; } + + public long serializedSize(Data t, int version) { return 0; } + }; + + public static final IVersionedSerializer replySerializer = new ReadDataSerializers.ReplySerializer<>(noop_data_serializer); + + public AccordInteropReadRepair(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, Timestamp executeAt, Mutation mutation) + { + super(to, topologies, txnId, readScope, executeAt); + this.mutation = mutation; + } + + public AccordInteropReadRepair(TxnId txnId, Participants readScope, long executeAtEpoch, long waitForEpoch, Mutation mutation) + { + // TODO (review): remove followup read - Is there anything left to be done for this or can I remove it? + super(txnId, readScope, executeAtEpoch, waitForEpoch); + this.mutation = mutation; + } + + @Override + protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn) + { + return AsyncChains.ofCallable(Verb.READ_REPAIR_REQ.stage.executor(), () -> { + ReadRepairVerbHandler.instance.applyMutation(mutation); + return Data.NOOP_DATA; + }); + } + + @Override + protected boolean canExecutePreApplied() + { + return true; + } + + @Override + protected boolean executeIfObsoleted() + { + return true; + } + + @Override + protected ReadOk constructReadOk(Ranges unavailable, Data data) + { + return new InteropReadRepairOk(unavailable, data); + } + + @Override + public MessageType type() + { + return AccordMessageType.INTEROP_READ_REPAIR_REQ; + } + + private static class InteropReadRepairOk extends ReadOk + { + public InteropReadRepairOk(@Nullable Ranges unavailable, @Nullable Data data) + { + super(unavailable, data); + } + + @Override + public MessageType type() + { + return AccordMessageType.INTEROP_READ_REPAIR_RSP; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index e8ccc6258fcd..4decbdf1f8fc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -22,19 +22,25 @@ import accord.api.Result; import accord.messages.Apply; +import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.primitives.Writes; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; + public class ApplySerializers { - public static final IVersionedSerializer request = new TxnRequestSerializer() + public abstract static class ApplySerializer extends TxnRequestSerializer { @Override - public void serializeBody(Apply apply, DataOutputPlus out, int version) throws IOException + public void serializeBody(A apply, DataOutputPlus out, int version) throws IOException { out.writeBoolean(apply.kind == Apply.Kind.Maximal); KeySerializers.seekables.serialize(apply.keys(), out, version); @@ -44,21 +50,24 @@ public void serializeBody(Apply apply, DataOutputPlus out, int version) throws I CommandSerializers.writes.serialize(apply.writes, out, version); } + protected abstract A deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result); + @Override - public Apply deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { - return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, - in.readBoolean() ? Apply.Kind.Maximal : Apply.Kind.Minimal, - KeySerializers.seekables.deserialize(in, version), - CommandSerializers.timestamp.deserialize(in, version), - DepsSerializer.partialDeps.deserialize(in, version), - CommandSerializers.nullablePartialTxn.deserialize(in, version), - CommandSerializers.writes.deserialize(in, version), - Result.APPLIED); + return deserializeApply(txnId, scope, waitForEpoch, + in.readBoolean() ? Apply.Kind.Maximal : Apply.Kind.Minimal, + KeySerializers.seekables.deserialize(in, version), + CommandSerializers.timestamp.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version), + CommandSerializers.nullablePartialTxn.deserialize(in, version), + CommandSerializers.writes.deserialize(in, version), + Result.APPLIED); } @Override - public long serializedBodySize(Apply apply, int version) + public long serializedBodySize(A apply, int version) { return TypeSizes.BOOL_SIZE + KeySerializers.seekables.serializedSize(apply.keys(), version) @@ -67,6 +76,16 @@ public long serializedBodySize(Apply apply, int version) + CommandSerializers.nullablePartialTxn.serializedSize(apply.txn, version) + CommandSerializers.writes.serializedSize(apply.writes, version); } + } + + public static final IVersionedSerializer request = new ApplySerializer() + { + @Override + protected Apply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result) + { + return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, keys, executeAt, deps, txn, writes, result); + } }; public static final IVersionedSerializer reply = new IVersionedSerializer() diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 74ed2eb70386..734186ea0be8 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -173,7 +173,7 @@ public long serializedSize(CheckStatusReply reply, int version) size += KeySerializers.ranges.serializedSize(ok.truncated, version); size += CommandSerializers.status.serializedSize(ok.invalidIfNotAtLeast, version); size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); - size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); + size += CommandSerializers.saveStatus.serializedSize(ok.maxSaveStatus, version); size += CommandSerializers.ballot.serializedSize(ok.promised, version); size += CommandSerializers.ballot.serializedSize(ok.accepted, version); size += CommandSerializers.nullableTimestamp.serializedSize(ok.executeAt, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 77f414e0cdcc..0c15515206a0 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -43,9 +43,9 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.CastingSerializer; import org.apache.cassandra.utils.NullableSerializer; @@ -181,7 +181,7 @@ public long serializedSize(PartialTxn txn, int version) private static final IVersionedSerializer read = new CastingSerializer<>(TxnRead.class, TxnRead.serializer); private static final IVersionedSerializer query = new CastingSerializer<>(TxnQuery.class, TxnQuery.serializer); - private static final IVersionedSerializer update = new CastingSerializer<>(TxnUpdate.class, TxnUpdate.serializer); + private static final IVersionedSerializer update = new CastingSerializer<>(AccordUpdate.class, AccordUpdate.serializer); public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); public static final IVersionedSerializer nullablePartialTxn = NullableSerializer.wrap(partialTxn); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index c2ea5e6a24b0..e67368d00d62 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -19,15 +19,22 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import javax.annotation.Nullable; import accord.messages.Commit; +import accord.messages.ReadData; +import accord.primitives.FullRoute; +import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; +import accord.primitives.PartialTxn; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Unseekables; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.CastingSerializer; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; @@ -35,41 +42,61 @@ public class CommitSerializers { - public static final IVersionedSerializer request = new TxnRequestSerializer() + public abstract static class CommitSerializer extends TxnRequestSerializer { + private final IVersionedSerializer read; + + public CommitSerializer(Class klass, IVersionedSerializer read) + { + this.read = new CastingSerializer<>(klass, read); + } + @Override - public void serializeBody(Commit msg, DataOutputPlus out, int version) throws IOException + public void serializeBody(C msg, DataOutputPlus out, int version) throws IOException { out.writeBoolean(msg.kind == Commit.Kind.Maximal); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); - serializeNullable(msg.read, out, version, ReadDataSerializers.request); + serializeNullable(msg.readData, out, version, read); } + protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Timestamp executeAt, + @Nullable PartialTxn partialTxn, PartialDeps partialDeps, + @Nullable FullRoute fullRoute, @Nullable ReadData read); + @Override - public Commit deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { - return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, - in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal, - CommandSerializers.timestamp.deserialize(in, version), - CommandSerializers.nullablePartialTxn.deserialize(in, version), - DepsSerializer.partialDeps.deserialize(in, version), - deserializeNullable(in, version, KeySerializers.fullRoute), - deserializeNullable(in, version, ReadDataSerializers.request) + return deserializeCommit(txnId, scope, waitForEpoch, + in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal, + CommandSerializers.timestamp.deserialize(in, version), + CommandSerializers.nullablePartialTxn.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version), + deserializeNullable(in, version, KeySerializers.fullRoute), + deserializeNullable(in, version, read) ); } @Override - public long serializedBodySize(Commit msg, int version) + public long serializedBodySize(C msg, int version) { return TypeSizes.BOOL_SIZE + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) - + serializedNullableSize(msg.read, version, ReadDataSerializers.request); + + serializedNullableSize(msg.readData, version, read); + } + } + + public static final IVersionedSerializer request = new CommitSerializer(ReadData.class, ReadDataSerializers.readData) + { + @Override + protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + { + return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, executeAt, partialTxn, partialDeps, fullRoute, read); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java index c6a349028b2b..50f53a04f61f 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java @@ -41,7 +41,6 @@ public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) CommandSerializers.timestamp.serialize(inform.executeAt, out, version); CommandSerializers.durability.serialize(inform.durability, out, version); serializeCollection(inform.persistedOn, out, version, TopologySerializers.nodeId); - } @Override diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 52e534142ad2..8b102ae13dd1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -50,8 +50,8 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.NullableSerializer; public class KeySerializers diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index e9705d3198dd..db7a4f7bf5a9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -20,9 +20,13 @@ import java.io.IOException; +import accord.api.Data; +import accord.messages.ApplyThenWaitUntilApplied; +import accord.messages.ReadData; import accord.messages.ReadData.ReadNack; import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; +import accord.messages.ReadData.ReadType; import accord.messages.ReadTxnData; import accord.messages.WaitUntilApplied; import accord.primitives.Participants; @@ -34,14 +38,80 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnResult; +import static org.apache.cassandra.db.TypeSizes.sizeof; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; public class ReadDataSerializers { - public static final IVersionedSerializer request = new IVersionedSerializer() + public static final IVersionedSerializer readData = new IVersionedSerializer() + { + @Override + public void serialize(ReadData t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t.kind().val); + serializerFor(t).serialize(t, out, version); + } + + @Override + public ReadData deserialize(DataInputPlus in, int version) throws IOException + { + return serializerFor(ReadType.valueOf(in.readByte())).deserialize(in, version); + } + + @Override + public long serializedSize(ReadData t, int version) + { + return sizeof(t.kind().val) + serializerFor(t).serializedSize(t, version); + } + }; + + private static final ApplyThenWaitUntilAppliedSerializer applyThenWaitUntilApplied = new ApplyThenWaitUntilAppliedSerializer(); + + private static class ApplyThenWaitUntilAppliedSerializer implements ReadDataSerializer + { + @Override + public void serialize(ApplyThenWaitUntilApplied applyThenWaitUntilApplied, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(applyThenWaitUntilApplied.txnId, out, version); + KeySerializers.partialRoute.serialize(applyThenWaitUntilApplied.route, out, version); + DepsSerializer.partialDeps.serialize(applyThenWaitUntilApplied.deps, out, version); + KeySerializers.seekables.serialize(applyThenWaitUntilApplied.partialTxnKeys, out, version); + CommandSerializers.writes.serialize(applyThenWaitUntilApplied.writes, out, version); + TxnResult.serializer.serialize((TxnResult) applyThenWaitUntilApplied.txnResult, out, version); + out.writeBoolean(applyThenWaitUntilApplied.notifyAgent); + } + + @Override + public ApplyThenWaitUntilApplied deserialize(DataInputPlus in, int version) throws IOException + { + return ApplyThenWaitUntilApplied.SerializerSupport.create( + CommandSerializers.txnId.deserialize(in, version), + KeySerializers.partialRoute.deserialize(in, version), + DepsSerializer.partialDeps.deserialize(in, version), + KeySerializers.seekables.deserialize(in, version), + CommandSerializers.writes.deserialize(in, version), + TxnResult.serializer.deserialize(in, version), + in.readBoolean()); + } + + @Override + public long serializedSize(ApplyThenWaitUntilApplied applyThenWaitUntilApplied, int version) + { + return CommandSerializers.txnId.serializedSize(applyThenWaitUntilApplied.txnId, version) + + KeySerializers.partialRoute.serializedSize(applyThenWaitUntilApplied.route, version) + + DepsSerializer.partialDeps.serializedSize(applyThenWaitUntilApplied.deps, version) + + KeySerializers.seekables.serializedSize(applyThenWaitUntilApplied.partialTxnKeys, version) + + CommandSerializers.writes.serializedSize(applyThenWaitUntilApplied.writes, version) + + TxnResult.serializer.serializedSize((TxnData)applyThenWaitUntilApplied.txnResult, version) + + sizeof(applyThenWaitUntilApplied.notifyAgent); + } + } + + private static final ReadDataSerializer readTxnData = new ReadDataSerializer() { @Override public void serialize(ReadTxnData read, DataOutputPlus out, int version) throws IOException @@ -72,10 +142,43 @@ public long serializedSize(ReadTxnData read, int version) } }; - public static final IVersionedSerializer reply = new IVersionedSerializer() + public interface ReadDataSerializer extends IVersionedSerializer + { + void serialize(T bound, DataOutputPlus out, int version) throws IOException; + T deserialize(DataInputPlus in, int version) throws IOException; + long serializedSize(T condition, int version); + } + + private static ReadDataSerializer serializerFor(ReadData toSerialize) + { + return serializerFor(toSerialize.kind()); + } + + private static ReadDataSerializer serializerFor(ReadType type) + { + switch (type) + { + case readTxnData: + return readTxnData; + case applyThenWaitUntilApplied: + return applyThenWaitUntilApplied; + case waitUntilApplied: + return waitUntilApplied; + default: + throw new IllegalStateException("Unsupported ExecuteType " + type); + } + } + + public static final class ReplySerializer implements IVersionedSerializer { // TODO (now): use something other than ordinal final ReadNack[] nacks = ReadNack.values(); + private final IVersionedSerializer dataSerializer; + + public ReplySerializer(IVersionedSerializer dataSerializer) + { + this.dataSerializer = dataSerializer; + } @Override public void serialize(ReadReply reply, DataOutputPlus out, int version) throws IOException @@ -89,7 +192,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I out.writeByte(0); ReadOk readOk = (ReadOk) reply; serializeNullable(readOk.unavailable, out, version, KeySerializers.ranges); - TxnData.nullableSerializer.serialize((TxnData) readOk.data, out, version); + dataSerializer.serialize((D) readOk.data, out, version); } @Override @@ -100,7 +203,7 @@ public ReadReply deserialize(DataInputPlus in, int version) throws IOException return nacks[id - 1]; Ranges ranges = deserializeNullable(in, version, KeySerializers.ranges); - TxnData data = TxnData.nullableSerializer.deserialize(in, version); + D data = dataSerializer.deserialize(in, version); return new ReadOk(ranges, data); } @@ -113,20 +216,22 @@ public long serializedSize(ReadReply reply, int version) ReadOk readOk = (ReadOk) reply; return TypeSizes.BYTE_SIZE + serializedNullableSize(readOk.unavailable, version, KeySerializers.ranges) - + TxnData.nullableSerializer.serializedSize((TxnData) readOk.data, version); + + dataSerializer.serializedSize((D) readOk.data, version); } - }; + } + + public static final IVersionedSerializer reply = new ReplySerializer<>(TxnData.nullableSerializer); // TODO (consider): duplicates ReadTxnData ser/de logic; conside deduplicating if another instance of this is added - public static final IVersionedSerializer waitOnApply = new IVersionedSerializer() + public static final ReadDataSerializer waitUntilApplied = new ReadDataSerializer() { @Override - public void serialize(WaitUntilApplied msg, DataOutputPlus out, int version) throws IOException + public void serialize(WaitUntilApplied waitUntilApplied, DataOutputPlus out, int version) throws IOException { - CommandSerializers.txnId.serialize(msg.txnId, out, version); - KeySerializers.participants.serialize(msg.readScope, out, version); - out.writeUnsignedVInt(msg.waitForEpoch()); - CommandSerializers.timestamp.serialize(msg.executeAt, out , version); + CommandSerializers.txnId.serialize(waitUntilApplied.txnId, out, version); + KeySerializers.participants.serialize(waitUntilApplied.readScope, out, version); + out.writeUnsignedVInt(waitUntilApplied.waitForEpoch()); + CommandSerializers.timestamp.serialize(waitUntilApplied.executeAt, out , version); } @Override @@ -140,12 +245,12 @@ public WaitUntilApplied deserialize(DataInputPlus in, int version) throws IOExce } @Override - public long serializedSize(WaitUntilApplied msg, int version) + public long serializedSize(WaitUntilApplied waitUntilApplied, int version) { - return CommandSerializers.txnId.serializedSize(msg.txnId, version) - + KeySerializers.participants.serializedSize(msg.readScope, version) - + TypeSizes.sizeofUnsignedVInt(msg.waitForEpoch()) - + CommandSerializers.timestamp.serializedSize(msg.executeAt, version); + return CommandSerializers.txnId.serializedSize(waitUntilApplied.txnId, version) + + KeySerializers.participants.serializedSize(waitUntilApplied.readScope, version) + + TypeSizes.sizeofUnsignedVInt(waitUntilApplied.waitForEpoch()) + + CommandSerializers.timestamp.serializedSize(waitUntilApplied.executeAt, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java index 1b55252d245e..e9cabbbd1512 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java @@ -23,9 +23,10 @@ import accord.messages.SetGloballyDurable; import accord.messages.SetShardDurable; import accord.primitives.Deps; -import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.SyncPoint; import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -81,8 +82,9 @@ public void serialize(SyncPoint sp, DataOutputPlus out, int version) throws IOEx { CommandSerializers.txnId.serialize(sp.syncId, out, version); DepsSerializer.deps.serialize(sp.waitFor, out, version); - KeySerializers.ranges.serialize(sp.ranges, out, version); + KeySerializers.seekables.serialize(sp.keysOrRanges, out, version); KeySerializers.routingKey.serialize(sp.homeKey, out, version); + out.writeBoolean(sp.finishedAsync); } @Override @@ -90,18 +92,20 @@ public SyncPoint deserialize(DataInputPlus in, int version) throws IOException { TxnId syncId = CommandSerializers.txnId.deserialize(in, version); Deps waitFor = DepsSerializer.deps.deserialize(in, version); - Ranges ranges = KeySerializers.ranges.deserialize(in, version); + Seekables keysOrRanges = KeySerializers.seekables.deserialize(in, version); RoutingKey homeKey = KeySerializers.routingKey.deserialize(in, version); - return SyncPoint.SerializationSupport.construct(syncId, waitFor, ranges, homeKey); + boolean finishedAsync = in.readBoolean(); + return SyncPoint.SerializationSupport.construct(syncId, waitFor, keysOrRanges, homeKey, finishedAsync); } @Override public long serializedSize(SyncPoint sp, int version) { return CommandSerializers.txnId.serializedSize(sp.syncId, version) - + DepsSerializer.deps.serializedSize(sp.waitFor, version) - + KeySerializers.ranges.serializedSize(sp.ranges, version) - + KeySerializers.routingKey.serializedSize(sp.homeKey, version); + + DepsSerializer.deps.serializedSize(sp.waitFor, version) + + KeySerializers.seekables.serializedSize(sp.keysOrRanges, version) + + KeySerializers.routingKey.serializedSize(sp.homeKey, version) + + TypeSizes.sizeof(sp.finishedAsync); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java new file mode 100644 index 000000000000..ae63f9d26e62 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import javax.annotation.Nullable; + +import accord.api.Data; +import accord.api.Update; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public abstract class AccordUpdate implements Update +{ + public enum Kind + { + TXN(0), + UNRECOVERABLE_REPAIR(1), + NONE(2), + ; + + int val; + + Kind(int val) + { + this.val = val; + } + + public static Kind valueOf(int val) + { + switch(val) + { + case 0: + return TXN; + case 1: + return UNRECOVERABLE_REPAIR; + default: + throw new IllegalArgumentException("Unrecognized AccordUpdate.Kind value " + val); + } + } + } + + public static Kind kind(@Nullable Update update) + { + if (update == null) + return Kind.NONE; + return ((AccordUpdate)update).kind(); + } + + public boolean checkCondition(Data data) + { + throw new UnsupportedOperationException(); + } + + public abstract ConsistencyLevel cassandraCommitCL(); + + public abstract Kind kind(); + + public abstract long estimatedSizeOnHeap(); + + public interface AccordUpdateSerializer extends IVersionedSerializer + { + void serialize(T update, DataOutputPlus out, int version) throws IOException; + T deserialize(DataInputPlus in, int version) throws IOException; + long serializedSize(T update, int version); + } + + private static AccordUpdateSerializer serializerFor(AccordUpdate toSerialize) + { + return serializerFor(toSerialize.kind()); + } + + private static AccordUpdateSerializer serializerFor(Kind kind) + { + switch (kind) + { + case TXN: + return TxnUpdate.serializer; + case UNRECOVERABLE_REPAIR: + return UnrecoverableRepairUpdate.serializer; + default: + throw new IllegalStateException("Unsupported AccordUpdate Kind " + kind); + } + } + + public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer() + { + @Override + public void serialize(AccordUpdate update, DataOutputPlus out, int version) throws IOException + { + out.writeByte(update.kind().val); + serializerFor(update).serialize(update, out, version); + } + + @Override + public AccordUpdate deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.valueOf(in.readByte()); + return serializerFor(kind).deserialize(in, version); + } + + @Override + public long serializedSize(AccordUpdate update, int version) + { + return 1 + serializerFor(update).serializedSize(update, version); + } + }; +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java index afe7e87e0ae8..e6a977955ad0 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java @@ -31,6 +31,8 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; +import static com.google.common.base.Preconditions.checkState; + public class AccordUpdateParameters { private final TxnData data; @@ -47,7 +49,7 @@ public TxnData getData() return data; } - public UpdateParameters updateParameters(TableMetadata metadata, int rowIndex) + public UpdateParameters updateParameters(TableMetadata metadata, DecoratedKey dk, int rowIndex) { // This is currently only used by Guardrails, but this logically have issues with Accord as drifts in config // values could cause unexpected issues in Accord. (ex. some nodes reject writes while others accept) @@ -67,16 +69,24 @@ public UpdateParameters updateParameters(TableMetadata metadata, int rowIndex) timestamp, nowInSeconds, ttl, - prefetchRow(metadata, rowIndex)); + prefetchRow(metadata, dk, rowIndex)); } - private Map prefetchRow(TableMetadata metadata, int index) + private Map prefetchRow(TableMetadata metadata, DecoratedKey dk, int index) { for (Map.Entry e : data.entrySet()) { TxnDataName name = e.getKey(); - if (name.isAutoRead() && name.atIndex(index)) - return ImmutableMap.of(name.getDecoratedKey(metadata), e.getValue()); + switch (name.getKind()) + { + case CAS_READ: + checkState(data.entrySet().size() == 1, "CAS read should only have one entry"); + return ImmutableMap.of(dk, e.getValue()); + case AUTO_READ: + if (name.atIndex(index)) + return ImmutableMap.of(name.getDecoratedKey(metadata), e.getValue()); + default: + } } return Collections.emptyMap(); } diff --git a/src/java/org/apache/cassandra/service/accord/txn/RetryWithNewProtocolResult.java b/src/java/org/apache/cassandra/service/accord/txn/RetryWithNewProtocolResult.java new file mode 100644 index 000000000000..d5c02a596b78 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/RetryWithNewProtocolResult.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.ObjectSizes; + +/** + * Potentially returned by any transaction that tries to execute in an Epoch + * where the range has migrated away from Accord + */ +public class RetryWithNewProtocolResult extends TxnResult +{ + private static final long SIZE = ObjectSizes.measure(new RetryWithNewProtocolResult(Epoch.FIRST)); + + public final Epoch epoch; + + RetryWithNewProtocolResult(Epoch epoch) + { + this.epoch = epoch; + } + + @Override + public Kind kind() + { + return Kind.retry_new_protocol; + } + + @Override + public long estimatedSizeOnHeap() + { + return SIZE; + } + + public static final TxnResultSerializer serializer = new TxnResultSerializer() + { + @Override + public void serialize(RetryWithNewProtocolResult retry, DataOutputPlus out, int version) throws IOException + { + Epoch.messageSerializer.serialize(retry.epoch, out, version); + } + + @Override + public RetryWithNewProtocolResult deserialize(DataInputPlus in, int version) throws IOException + { + return new RetryWithNewProtocolResult(Epoch.messageSerializer.deserialize(in, version)); + } + + @Override + public long serializedSize(RetryWithNewProtocolResult retry, int version) + { + return Epoch.messageSerializer.serializedSize(retry.epoch, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java index 905667d35c69..46f59f093668 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java @@ -51,9 +51,8 @@ import org.apache.cassandra.utils.ByteBufferUtil; import static com.google.common.base.Preconditions.checkNotNull; - import static org.apache.cassandra.service.accord.AccordSerializers.clusteringSerializer; -import static org.apache.cassandra.service.accord.txn.TxnRead.SERIAL_READ; +import static org.apache.cassandra.service.accord.txn.TxnRead.CAS_READ; import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; import static org.apache.cassandra.utils.CollectionSerializers.serializeList; @@ -333,7 +332,7 @@ public ColumnConditionsAdapter(Clustering clustering, Collection bound public boolean applies(@Nonnull TxnData data) { checkNotNull(data); - FilteredPartition partition = data.get(SERIAL_READ); + FilteredPartition partition = data.get(CAS_READ); Row row = partition != null ? partition.getRow(clustering) : null; for (Bound bound : bounds) { diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java index c3d8f6e18df6..9c2ae88f838c 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java @@ -24,11 +24,15 @@ import java.util.Map; import java.util.Set; +import com.google.common.collect.Maps; + import accord.api.Data; -import accord.api.Result; +import org.apache.cassandra.db.EmptyIterators; +import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.db.partitions.PartitionIterators; import org.apache.cassandra.db.rows.DeserializationHelper; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.UnfilteredRowIterator; @@ -43,7 +47,9 @@ import org.apache.cassandra.utils.NullableSerializer; import org.apache.cassandra.utils.ObjectSizes; -public class TxnData implements Data, Result, Iterable +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.txn_data; + +public class TxnData extends TxnResult implements Data, Iterable { private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnData()); @@ -74,8 +80,13 @@ public Set> entrySet() return data.entrySet(); } + public boolean isEmpty() + { + return data.isEmpty(); + } + @Override - public Data merge(Data data) + public TxnData merge(Data data) { TxnData that = (TxnData) data; TxnData merged = new TxnData(); @@ -94,6 +105,7 @@ public static Data merge(Data left, Data right) return left.merge(right); } + @Override public long estimatedSizeOnHeap() { long size = EMPTY_SIZE; @@ -124,6 +136,14 @@ public boolean equals(Object o) return data.equals(that.data); } + public static TxnData emptyPartition(TxnDataName name, SinglePartitionReadCommand command) + { + TxnData result = new TxnData(); + FilteredPartition empty = FilteredPartition.create(PartitionIterators.getOnlyElement(EmptyIterators.partition(), command)); + result.put(name, empty); + return result; + } + private static final IVersionedSerializer partitionSerializer = new IVersionedSerializer() { @Override @@ -158,7 +178,13 @@ public long serializedSize(FilteredPartition partition, int version) } }; - public static final IVersionedSerializer serializer = new IVersionedSerializer() + @Override + public Kind kind() + { + return txn_data; + } + + public static final TxnResultSerializer serializer = new TxnResultSerializer() { @Override public void serialize(TxnData data, DataOutputPlus out, int version) throws IOException @@ -174,8 +200,8 @@ public void serialize(TxnData data, DataOutputPlus out, int version) throws IOEx @Override public TxnData deserialize(DataInputPlus in, int version) throws IOException { - Map data = new HashMap<>(); - long size = in.readUnsignedVInt(); + int size = in.readUnsignedVInt32(); + Map data = Maps.newHashMapWithExpectedSize(size); for (int i=0; i getParts() return Collections.unmodifiableList(Arrays.asList(parts)); } - public boolean isAutoRead() - { - return kind == Kind.AUTO_READ; - } - public DecoratedKey getDecoratedKey(TableMetadata metadata) { checkKind(Kind.AUTO_READ); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index acab7c89f792..df41e78b53e8 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -23,8 +23,10 @@ import java.util.Objects; import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.api.Data; -import accord.local.SafeCommandStore; import accord.primitives.Timestamp; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; @@ -49,6 +51,9 @@ public class TxnNamedRead extends AbstractSerialized { + @SuppressWarnings("unused") + private static final Logger logger = LoggerFactory.getLogger(TxnNamedRead.class); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnNamedRead(null, null, null)); private final TxnDataName name; @@ -111,7 +116,7 @@ public PartitionKey key() return key; } - public AsyncChain read(boolean isForWriteTxn, SafeCommandStore safeStore, Timestamp executeAt) + public AsyncChain read(Timestamp executeAt) { SinglePartitionReadCommand command = (SinglePartitionReadCommand) get(); // TODO (required, safety): before release, double check reasoning that this is safe @@ -121,7 +126,16 @@ public AsyncChain read(boolean isForWriteTxn, SafeCommandStore safeStore, // this simply looks like the transaction witnessed TTL'd data and the data then expired // immediately after the transaction executed, and this simplifies things a great deal int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); + return performLocalRead(command, nowInSeconds); + } + public ReadCommand command() + { + return get(); + } + + private AsyncChain performLocalRead(SinglePartitionReadCommand command, int nowInSeconds) + { return AsyncChains.ofCallable(Stage.READ.executor(), () -> { SinglePartitionReadCommand read = command.withNowInSec(nowInSeconds); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index 115864d8302b..6005673fa6d8 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -22,21 +22,33 @@ import javax.annotation.Nullable; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import accord.api.Data; import accord.api.Query; import accord.api.Read; import accord.api.Result; import accord.api.Update; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.EmptyIterators; +import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.ObjectSizes; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.cassandra.service.accord.txn.TxnRead.CAS_READ; public abstract class TxnQuery implements Query { @@ -49,7 +61,7 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, @Nullable Update update) + public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update) { return data != null ? (TxnData) data : new TxnData(); } @@ -64,7 +76,7 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, @Nullable Update update) + public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update) { return new TxnData(); } @@ -79,19 +91,52 @@ protected byte type() } @Override - public Result compute(TxnId txnId, Timestamp executeAt, Data data, @Nullable Read read, Update update) + public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, Update update) { checkNotNull(txnId, "txnId should not be null"); checkNotNull(data, "data should not be null"); checkNotNull(update, "update should not be null"); - TxnUpdate txnUpdate = (TxnUpdate)update; - boolean conditionCheck = txnUpdate.checkCondition(data); + + AccordUpdate accordUpdate = (AccordUpdate)update; + TxnData txnData = (TxnData)data; + boolean conditionCheck = accordUpdate.checkCondition(data); // If the condition applied an empty result indicates success if (conditionCheck) return new TxnData(); + else if (txnData.isEmpty()) + { + TxnRead txnRead = (TxnRead)read; + SinglePartitionReadCommand command = (SinglePartitionReadCommand)txnRead.iterator().next().get(); + // For CAS must return a non-empty result to indicate error even if there was no partition found + return new TxnData(ImmutableMap.of(CAS_READ, FilteredPartition.create(EmptyIterators.row(command.metadata(), command.partitionKey(), command.isReversed())))); + } else - // If it failed to apply the partition contents (if present) are returned and it indicates failure - return (TxnData)data; + // If it failed to apply the partition contents are returned and it indicates failure + return ((TxnData)data); + } + }; + + public static final TxnQuery EMPTY = new TxnQuery() + { + + @Override + protected byte type() + { + return 4; + } + + @Override + public Result compute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update) + { + // Skip the migration checks in the base class for empty transactions, we don't + // want/need the RetryWithNewProtocolResult + return new TxnData(); + } + + @Override + protected Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update) + { + throw new UnsupportedOperationException(); } }; @@ -101,6 +146,27 @@ private TxnQuery() {} abstract protected byte type(); + abstract protected Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update); + + @Override + public Result compute(TxnId txnId, Timestamp executeAt, Seekables keys, @Nullable Data data, @Nullable Read read, @Nullable Update update) + { + Epoch epoch = Epoch.create(executeAt.epoch()); + if (transactionIsInMigratingOrMigratedRange(epoch, keys)) + { + // Fail fast because we can't be sure where this request should really run or what was intended + if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) + throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); + + if (txnId.isWrite()) + ClientRequestsMetricsHolder.accordWriteMetrics.accordMigrationRejects.mark(); + else + ClientRequestsMetricsHolder.accordReadMetrics.accordMigrationRejects.mark(); + return new RetryWithNewProtocolResult(epoch); + } + return doCompute(txnId, executeAt, keys, data, read, update); + } + public long estimatedSizeOnHeap() { return SIZE; @@ -111,7 +177,7 @@ public long estimatedSizeOnHeap() @Override public void serialize(TxnQuery query, DataOutputPlus out, int version) throws IOException { - Preconditions.checkArgument(query == null || query == ALL || query == NONE || query == CONDITION); + Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == EMPTY); out.writeByte(query == null ? 0 : query.type()); } @@ -125,14 +191,31 @@ public TxnQuery deserialize(DataInputPlus in, int version) throws IOException case 1: return ALL; case 2: return NONE; case 3: return CONDITION; + case 4: return EMPTY; } } @Override public long serializedSize(TxnQuery query, int version) { - Preconditions.checkArgument(query == null || query == ALL || query == NONE || query == CONDITION); + Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == EMPTY); return TypeSizes.sizeof((byte)2); } }; + + private static boolean transactionIsInMigratingOrMigratedRange(Epoch epoch, Seekables keys) + { + // Whatever this transaction might be it isn't one supported for migration anyways + if (!keys.domain().isKey()) + return false; + + if (keys.size() > 1) + // It has to be a transaction statement and we don't support migration with those + return false; + // Could be a transaction statement, but this check does no additional harm + // and transaction statement will generate an error when it sees + // the RetryOnNewProtocolResult + PartitionKey partitionKey = (PartitionKey)keys.get(0); + return ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(epoch, partitionKey.tableId(), partitionKey.partitionKey()); + } } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index de50b9652462..122336ad122b 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -22,6 +22,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import com.google.common.collect.ImmutableList; @@ -33,49 +35,89 @@ import accord.primitives.Ranges; import accord.primitives.Seekable; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.utils.SortedArrays; import org.apache.cassandra.db.SinglePartitionReadCommand; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.txn.TxnDataName.Kind; import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.Simulate; import static accord.utils.SortedArrays.Search.CEIL; +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.cassandra.service.accord.AccordSerializers.consistencyLevelSerializer; +import static org.apache.cassandra.service.accord.IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS; import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; public class TxnRead extends AbstractKeySorted implements Read { // There is only potentially one partition in a CAS and SERIAL/LOCAL_SERIAL read public static final String SERIAL_READ_NAME = "SERIAL_READ"; public static final TxnDataName SERIAL_READ = TxnDataName.user(SERIAL_READ_NAME); - private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnRead(new TxnNamedRead[0], null)); - public static final TxnRead EMPTY = new TxnRead(new TxnNamedRead[0], Keys.EMPTY); + public static final TxnRead EMPTY = new TxnRead(new TxnNamedRead[0], Keys.EMPTY, null); + private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY); + + public static final String CAS_READ_NAME = "CAS_READ"; + public static final TxnDataName CAS_READ = new TxnDataName(Kind.CAS_READ, CAS_READ_NAME); + + @Nonnull private final Keys txnKeys; - - public TxnRead(TxnNamedRead[] items, Keys txnKeys) + + // Cassandra's consistency level used by Accord to safely read data written outside of Accord + @Nullable + private final ConsistencyLevel cassandraConsistencyLevel; + + public TxnRead(@Nonnull TxnNamedRead[] items, @Nonnull Keys txnKeys, @Nullable ConsistencyLevel cassandraConsistencyLevel) { super(items); + checkArgument(cassandraConsistencyLevel == null || SUPPORTED_READ_CONSISTENCY_LEVELS.contains(cassandraConsistencyLevel), "Unsupported consistency level for read"); this.txnKeys = txnKeys; + this.cassandraConsistencyLevel = cassandraConsistencyLevel; } - public TxnRead(List items, Keys txnKeys) + public TxnRead(@Nonnull List items, @Nonnull Keys txnKeys, @Nullable ConsistencyLevel cassandraConsistencyLevel) { super(items); + checkArgument(cassandraConsistencyLevel == null || SUPPORTED_READ_CONSISTENCY_LEVELS.contains(cassandraConsistencyLevel), "Unsupported consistency level for read"); this.txnKeys = txnKeys; + this.cassandraConsistencyLevel = cassandraConsistencyLevel; + } + + public static TxnRead createTxnRead(@Nonnull List items, @Nonnull Keys txnKeys, @Nullable ConsistencyLevel consistencyLevel) + { + return new TxnRead(items, txnKeys, consistencyLevel); } - public static TxnRead createSerialRead(SinglePartitionReadCommand readCommand) + public static TxnRead createSerialRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel) { TxnNamedRead read = new TxnNamedRead(SERIAL_READ, readCommand); - return new TxnRead(ImmutableList.of(read), Keys.of(read.key())); + return new TxnRead(ImmutableList.of(read), Keys.of(read.key()), consistencyLevel); + } + + public static TxnRead createCasRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel) + { + TxnNamedRead read = new TxnNamedRead(CAS_READ, readCommand); + return new TxnRead(ImmutableList.of(read), Keys.of(read.key()), consistencyLevel); + } + + // A read that declares it will read from keys but doesn't actually read any data so dependent transactions will + // still be applied first + public static TxnRead createNoOpRead(Keys keys) + { + return new TxnRead(ImmutableList.of(), keys, null); } public long estimatedSizeOnHeap() @@ -110,9 +152,9 @@ public Keys keys() return txnKeys; } - public Keys readKeys() + public ConsistencyLevel cassandraConsistencyLevel() { - return itemKeys; + return cassandraConsistencyLevel; } @Override @@ -125,7 +167,7 @@ public Read slice(Ranges ranges) if (keys.contains(read.key())) reads.add(read); - return new TxnRead(reads, txnKeys.slice(ranges)); + return createTxnRead(reads, txnKeys.slice(ranges), cassandraConsistencyLevel); } @Override @@ -138,7 +180,7 @@ public Read merge(Read read) if (!reads.contains(namedRead)) reads.add(namedRead); - return new TxnRead(reads, txnKeys.with((Keys)read.keys())); + return createTxnRead(reads, txnKeys.with((Keys)read.keys()), cassandraConsistencyLevel); } @Override @@ -158,12 +200,13 @@ public boolean isEqualOrFuller(Read other) } @Override - public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) + public AsyncChain read(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { List> results = new ArrayList<>(); - forEachWithKey((PartitionKey) key, read -> results.add(read.read(kind.isWrite(), safeStore, executeAt))); + forEachWithKey((PartitionKey) key, read -> results.add(read.read(executeAt))); if (results.isEmpty()) + // Result type must match everywhere return AsyncChains.success(new TxnData()); if (results.size() == 1) @@ -172,6 +215,7 @@ public AsyncChain read(Seekable key, Txn.Kind kind, SafeCommandStore safeS return AsyncChains.reduce(results, Data::merge); } + @Simulate(with = MONITORS) public static final IVersionedSerializer serializer = new IVersionedSerializer() { @Override @@ -179,13 +223,16 @@ public void serialize(TxnRead read, DataOutputPlus out, int version) throws IOEx { KeySerializers.keys.serialize(read.txnKeys, out, version); serializeArray(read.items, out, version, TxnNamedRead.serializer); + serializeNullable(read.cassandraConsistencyLevel, out, version, consistencyLevelSerializer); } @Override public TxnRead deserialize(DataInputPlus in, int version) throws IOException { Keys keys = KeySerializers.keys.deserialize(in, version); - return new TxnRead(deserializeArray(in, version, TxnNamedRead.serializer, TxnNamedRead[]::new), keys); + TxnNamedRead[] items = deserializeArray(in, version, TxnNamedRead.serializer, TxnNamedRead[]::new); + ConsistencyLevel consistencyLevel = deserializeNullable(in, version, consistencyLevelSerializer); + return new TxnRead(items, keys, consistencyLevel); } @Override @@ -193,6 +240,7 @@ public long serializedSize(TxnRead read, int version) { long size = KeySerializers.keys.serializedSize(read.txnKeys, version); size += serializedArraySize(read.items, version, TxnNamedRead.serializer); + size += serializedNullableSize(read.cassandraConsistencyLevel, version, consistencyLevelSerializer); return size; } }; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnResult.java b/src/java/org/apache/cassandra/service/accord/txn/TxnResult.java new file mode 100644 index 000000000000..38eeb88aca7e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnResult.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; + +import accord.api.Result; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static org.apache.cassandra.db.TypeSizes.sizeof; + +public abstract class TxnResult implements Result +{ + public interface TxnResultSerializer extends IVersionedSerializer {} + + public enum Kind + { + txn_data(0), + retry_new_protocol(1); + + int id; + + Kind(int id) + { + this.id = id; + } + + public TxnResultSerializer serializer() + { + switch (this) + { + case txn_data: + return TxnData.serializer; + case retry_new_protocol: + return RetryWithNewProtocolResult.serializer; + default: + throw new IllegalStateException("Unrecognized kind " + this); + } + } + } + + public abstract Kind kind(); + + public abstract long estimatedSizeOnHeap(); + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @SuppressWarnings("unchecked") + @Override + public void serialize(TxnResult txnResult, DataOutputPlus out, int version) throws IOException + { + out.writeByte(txnResult.kind().ordinal()); + txnResult.kind().serializer().serialize(txnResult, out, version); + } + + @Override + public TxnResult deserialize(DataInputPlus in, int version) throws IOException + { + TxnResult.Kind kind = TxnResult.Kind.values()[in.readByte()]; + return (TxnResult)kind.serializer().deserialize(in, version); + } + + @SuppressWarnings("unchecked") + @Override + public long serializedSize(TxnResult txnResult, int version) + { + return sizeof((byte)txnResult.kind().ordinal()) + txnResult.kind().serializer().serializedSize(txnResult, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index 01a87a0e9d39..cd2aa8a3327e 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -26,16 +26,17 @@ import java.util.List; import java.util.Objects; import java.util.function.Function; +import javax.annotation.Nullable; import accord.api.Data; import accord.api.Key; import accord.api.Update; -import accord.api.Write; import accord.primitives.Keys; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Timestamp; import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; @@ -43,13 +44,17 @@ import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.AccordObjectSizes; import org.apache.cassandra.service.accord.AccordSerializers; +import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; +import static accord.utils.Invariants.checkArgument; import static accord.utils.SortedArrays.Search.CEIL; +import static org.apache.cassandra.service.accord.AccordSerializers.consistencyLevelSerializer; import static org.apache.cassandra.service.accord.AccordSerializers.serialize; import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializeArray; @@ -57,39 +62,50 @@ import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; import static org.apache.cassandra.utils.ByteBufferUtil.serializedSizeWithVIntLength; import static org.apache.cassandra.utils.ByteBufferUtil.writeWithVIntLength; +import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; +import static org.apache.cassandra.utils.NullableSerializer.serializedNullableSize; -public class TxnUpdate implements Update +public class TxnUpdate extends AccordUpdate { - private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null, null)); private final Keys keys; private final ByteBuffer[] fragments; private final ByteBuffer condition; + @Nullable + private final ConsistencyLevel cassandraCommitCL; + // Memoize computation of condition private Boolean conditionResult; - public TxnUpdate(List fragments, TxnCondition condition) + public TxnUpdate(List fragments, TxnCondition condition, @Nullable ConsistencyLevel cassandraCommitCL) { + checkArgument(cassandraCommitCL == null || IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(cassandraCommitCL)); // TODO: Figure out a way to shove keys into TxnCondition, and have it implement slice/merge. this.keys = Keys.of(fragments, fragment -> fragment.key); fragments.sort(TxnWrite.Fragment::compareKeys); this.fragments = toSerializedValuesArray(keys, fragments, fragment -> fragment.key, TxnWrite.Fragment.serializer); this.condition = serialize(condition, TxnCondition.serializer); + this.cassandraCommitCL = cassandraCommitCL; } - private TxnUpdate(Keys keys, ByteBuffer[] fragments, ByteBuffer condition) + private TxnUpdate(Keys keys, ByteBuffer[] fragments, ByteBuffer condition, ConsistencyLevel cassandraCommitCL) { this.keys = keys; this.fragments = fragments; this.condition = condition; + this.cassandraCommitCL = cassandraCommitCL; } + @Override public long estimatedSizeOnHeap() { long size = EMPTY_SIZE + ByteBufferUtil.estimatedSizeOnHeap(condition); for (ByteBuffer update : fragments) size += ByteBufferUtil.estimatedSizeOnHeap(update); + size += AccordObjectSizes.keys(keys); return size; } @@ -145,7 +161,7 @@ public Update slice(Ranges ranges) { Keys keys = this.keys.slice(ranges); // TODO: Slice the condition. - return new TxnUpdate(keys, select(this.keys, keys, fragments), condition); + return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL); } private static ByteBuffer[] select(Keys in, Keys out, ByteBuffer[] from) @@ -167,7 +183,7 @@ public Update merge(Update update) TxnUpdate that = (TxnUpdate) update; Keys mergedKeys = this.keys.with(that.keys); ByteBuffer[] mergedFragments = merge(this.keys, that.keys, this.fragments, that.fragments, mergedKeys.size()); - return new TxnUpdate(mergedKeys, mergedFragments, condition); + return new TxnUpdate(mergedKeys, mergedFragments, condition, cassandraCommitCL); } private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] left, ByteBuffer[] right, int outputSize) @@ -188,7 +204,7 @@ private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] le } @Override - public Write apply(Timestamp executeAt, Data data) + public TxnWrite apply(Timestamp executeAt, Data data) { if (!checkCondition(data)) return TxnWrite.EMPTY_CONDITION_FAILED; @@ -198,6 +214,7 @@ public Write apply(Timestamp executeAt, Data data) QueryOptions options = QueryOptions.forProtocolVersion(ProtocolVersion.CURRENT); AccordUpdateParameters parameters = new AccordUpdateParameters((TxnData) data, options); + // First completes all fragments and join them with the repairs pending for those partitions for (TxnWrite.Fragment fragment : fragments) // Filter out fragments that already constitute complete updates to avoid persisting them via TxnWrite: if (!fragment.isComplete()) @@ -218,7 +235,7 @@ public List completeUpdatesForKey(RoutableKey key) return updates; } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer() { @Override public void serialize(TxnUpdate update, DataOutputPlus out, int version) throws IOException @@ -226,6 +243,7 @@ public void serialize(TxnUpdate update, DataOutputPlus out, int version) throws KeySerializers.keys.serialize(update.keys, out, version); writeWithVIntLength(update.condition, out); serializeArray(update.fragments, out, version, ByteBufferUtil.byteBufferSerializer); + serializeNullable(update.cassandraCommitCL, out, version, consistencyLevelSerializer); } @Override @@ -234,7 +252,8 @@ public TxnUpdate deserialize(DataInputPlus in, int version) throws IOException Keys keys = KeySerializers.keys.deserialize(in, version); ByteBuffer condition = readWithVIntLength(in); ByteBuffer[] fragments = deserializeArray(in, version, ByteBufferUtil.byteBufferSerializer, ByteBuffer[]::new); - return new TxnUpdate(keys, fragments, condition); + ConsistencyLevel consistencyLevel = deserializeNullable(in, version, consistencyLevelSerializer); + return new TxnUpdate(keys, fragments, condition, consistencyLevel); } @Override @@ -243,6 +262,7 @@ public long serializedSize(TxnUpdate update, int version) long size = KeySerializers.keys.serializedSize(update.keys, version); size += serializedSizeWithVIntLength(update.condition); size += serializedArraySize(update.fragments, version, ByteBufferUtil.byteBufferSerializer); + size += serializedNullableSize(update.cassandraCommitCL, version, consistencyLevelSerializer); assert(ByteBufferUtil.serialized(this, update, version).remaining() == size); return size; } @@ -323,7 +343,7 @@ private static List deserialize(ByteBuffer[] buffers, IVersionedSerialize return result; } - // maybeCheckCondition? checkConditionMemoized? + @Override public boolean checkCondition(Data data) { // Assert data that was memoized is same as data that is provided? @@ -333,4 +353,16 @@ public boolean checkCondition(Data data) conditionResult = condition.applies((TxnData) data); return conditionResult; } + + @Override + public Kind kind() + { + return Kind.TXN; + } + + @Override + public ConsistencyLevel cassandraCommitCL() + { + return cassandraCommitCL; + } } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 89599bc90ecb..27406036d93d 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -26,14 +26,22 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import javax.annotation.Nonnull; -import accord.primitives.*; +import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import accord.api.DataStore; import accord.api.Write; import accord.local.SafeCommandStore; +import accord.primitives.PartialTxn; +import accord.primitives.RoutableKey; +import accord.primitives.Seekable; +import accord.primitives.Timestamp; +import accord.primitives.Writes; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Stage; @@ -45,25 +53,31 @@ import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.BooleanSerializer; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; -import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; +import static org.apache.cassandra.cql3.terms.Lists.accordListPathSupplier; import static org.apache.cassandra.service.accord.AccordSerializers.partitionUpdateSerializer; +import static org.apache.cassandra.utils.ArraySerializers.deserializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializeArray; import static org.apache.cassandra.utils.ArraySerializers.serializedArraySize; public class TxnWrite extends AbstractKeySorted implements Write { + @SuppressWarnings("unused") + private static final Logger logger = LoggerFactory.getLogger(TxnWrite.class); + public static final TxnWrite EMPTY_CONDITION_FAILED = new TxnWrite(Collections.emptyList(), false); private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY_CONDITION_FAILED); @@ -121,9 +135,9 @@ public String toString() '}'; } - public AsyncChain write(long timestamp, int nowInSeconds) + public AsyncChain write(@Nonnull Function cellToMaybeNewListPath, long timestamp, int nowInSeconds) { - PartitionUpdate update = new PartitionUpdate.Builder(get(), 0).updateAllTimestampAndLocalDeletionTime(timestamp, nowInSeconds).build(); + PartitionUpdate update = new PartitionUpdate.Builder(get(), 0).updateTimesAndPathsForAccord(cellToMaybeNewListPath, timestamp, nowInSeconds).build(); Mutation mutation = new Mutation(update); return AsyncChains.ofRunnable(Stage.MUTATION.executor(), mutation::apply); } @@ -142,7 +156,6 @@ public void serialize(Update write, DataOutputPlus out, int version) throws IOEx PartitionKey.serializer.serialize(write.key, out, version); out.writeInt(write.index); ByteBufferUtil.writeWithVIntLength(write.bytes(), out); - } @Override @@ -220,12 +233,12 @@ public boolean isComplete() { return referenceOps.isEmpty(); } - + public Update toUpdate() { return new Update(key, index, baseUpdate); } - + public Update complete(AccordUpdateParameters parameters) { if (isComplete()) @@ -238,7 +251,7 @@ public Update complete(AccordUpdateParameters parameters) baseUpdate.rowCount(), baseUpdate.canHaveShadowedData()); - UpdateParameters up = parameters.updateParameters(baseUpdate.metadata(), index); + UpdateParameters up = parameters.updateParameters(baseUpdate.metadata(), key, index); TxnData data = parameters.getData(); Row staticRow = applyUpdates(baseUpdate.staticRow(), referenceOps.statics, key, Clustering.STATIC_CLUSTERING, up, data); @@ -323,7 +336,7 @@ public long serializedSize(Fragment fragment, int version) } private final boolean isConditionMet; - + private TxnWrite(Update[] items, boolean isConditionMet) { super(items); @@ -370,7 +383,8 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // Apply updates not specified fully by the client but built from fragments completed by data from reads. // This occurs, for example, when an UPDATE statement uses a value assigned by a LET statement. - forEachWithKey((PartitionKey) key, write -> results.add(write.write(timestamp, nowInSeconds))); + Function accordListPathSuppler = accordListPathSupplier(timestamp); + forEachWithKey((PartitionKey) key, write -> results.add(write.write(accordListPathSuppler, timestamp, nowInSeconds))); if (isConditionMet) { @@ -380,7 +394,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam TxnUpdate txnUpdate = (TxnUpdate) txn.update(); assert txnUpdate != null : "PartialTxn should contain an update if we're applying a write!"; List updates = txnUpdate.completeUpdatesForKey((RoutableKey) key); - updates.forEach(update -> results.add(update.write(timestamp, nowInSeconds))); + updates.forEach(update -> results.add(update.write(accordListPathSuppler, timestamp, nowInSeconds))); } if (results.isEmpty()) diff --git a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java new file mode 100644 index 000000000000..310a3e6a5890 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.txn; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import accord.api.Data; +import accord.api.Update; +import accord.api.Write; +import accord.local.Node; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.Endpoints; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.service.reads.ReadCoordinator; +import org.apache.cassandra.service.reads.repair.BlockingReadRepair; + +/** + * This update is used to support blocking read repair from non-transactional Cassandra reads. Cassandra creates + * a read repair mutation per node and this enables some partitiosn to be readable that would otherwise run into messages + * size limits. + * + * This update is used during the `Execute` phase to apply the repair mutations directly in AccordInteropExecution similar + * to how Accord applies read repair mutations for normal Accord transactions. It will always produce an empty update + * for Accord to use in the Apply phase because Accord doesn't support a per replica Apply and adding it would be redundant + * with the support that exists in AccordInteropExecution. + * + * The state for this update is always kept in memory and is never serialized. Only the Id is propagated so the cache + * can evict the update and then load it back. We don't need to persist it or have it be recoverable because if the original + * coordinator fails to complete the transaction then the dependent Cassandra read that triggered the read repair will + * also fail and it doesn't matter if the read repair is partially applied or not applied at all. + */ +public class UnrecoverableRepairUpdate, P extends ReplicaPlan.ForRead> extends AccordUpdate +{ + private static final ConcurrentHashMap inflightUpdates = new ConcurrentHashMap<>(); + + public static UnrecoverableRepairUpdate removeInflightUpdate(Key updateKey) + { + return inflightUpdates.remove(updateKey); + } + + private static class Key + { + final int nodeId; + final long counter; + + private Key(@Nonnull int nodeId, long counter) + { + this.nodeId = nodeId; + this.counter = counter; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Key key = (Key) o; + + if (nodeId != key.nodeId) return false; + return counter == key.counter; + } + + @Override + public int hashCode() + { + int result = nodeId; + result = 31 * result + (int) (counter ^ (counter >>> 32)); + return result; + } + } + + private static final AtomicLong nextCounter = new AtomicLong(0); + + public final BlockingReadRepair parent; + public final Seekables keys; + public final DecoratedKey dk; + public final Map mutations; + public final ReplicaPlan.ForWrite writePlan; + public final Key updateKey; + + private UnrecoverableRepairUpdate(Node.Id nodeId, BlockingReadRepair parent, + Seekables keys, DecoratedKey dk, Map mutations, ReplicaPlan.ForWrite writePlan) + { + this.parent = parent; + this.keys = keys; + this.dk = dk; + this.mutations = mutations; + this.writePlan = writePlan; + this.updateKey = new Key(nodeId.id, nextCounter.getAndIncrement()); + } + + public static , P extends ReplicaPlan.ForRead> UnrecoverableRepairUpdate create(Node.Id nodeId, BlockingReadRepair parent, + Seekables keys, DecoratedKey dk, Map mutations, + ReplicaPlan.ForWrite writePlan) + { + UnrecoverableRepairUpdate update = new UnrecoverableRepairUpdate<>(nodeId, parent, keys, dk, mutations, writePlan); + inflightUpdates.put(update.updateKey, update); + return update; + } + + @Override + public Seekables keys() + { + return keys; + } + + @Override + public Write apply(Timestamp executeAt, @Nullable Data data) + { + return TxnWrite.EMPTY_CONDITION_FAILED; + } + + @Override + public Update slice(Ranges ranges) + { + return this; + } + + @Override + public Update merge(Update other) + { + return this; + } + + @Override + public ConsistencyLevel cassandraCommitCL() + { + // Leads to standard async persist/commit which is fine since the repair mutations were applied + // as part of execute/read + return null; + } + + @Override + public Kind kind() + { + return Kind.UNRECOVERABLE_REPAIR; + } + + @Override + public long estimatedSizeOnHeap() + { + return 0; + } + + public void runBRR(ReadCoordinator readCoordinator) + { + // This read repair is effectively running as a delegate of the read repair instance that did the reads + // to generate the mutations, but since we already have the mutations we can go ahead and apply them + // now that we are inside a transaction that guarantees that the contents of the mutations consist + // of committed data everywhere we go to apply it + parent.repairPartitionDirectly(readCoordinator, dk, mutations, writePlan); + } + + public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer() + { + @Override + public void serialize(UnrecoverableRepairUpdate update, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(update.updateKey.nodeId); + out.writeUnsignedVInt(update.updateKey.counter); + } + + @Override + public UnrecoverableRepairUpdate deserialize(DataInputPlus in, int version) throws IOException + { + return inflightUpdates.get(new Key(in.readUnsignedVInt32(), in.readUnsignedVInt())); + } + + @Override + public long serializedSize(UnrecoverableRepairUpdate update, int version) + { + return TypeSizes.sizeofUnsignedVInt(update.updateKey.nodeId) + TypeSizes.sizeofUnsignedVInt(update.updateKey.counter); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java new file mode 100644 index 000000000000..512943e4633c --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Ints; + +import accord.api.BarrierType; +import accord.primitives.Seekables; +import com.github.benmanes.caffeine.cache.CacheLoader; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Weigher; +import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.SystemKeyspace; +import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.exceptions.CasWriteTimeoutException; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; +import org.apache.cassandra.service.paxos.AbstractPaxosRepair.Failure; +import org.apache.cassandra.service.paxos.AbstractPaxosRepair.Result; +import org.apache.cassandra.service.paxos.PaxosRepair; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.UUIDSerializer; + +import static org.apache.cassandra.net.Verb.CONSENSUS_KEY_MIGRATION; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.paxos; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + * Tracks the migration state of individual keys storing the migration (or not) in system.consensus_migration_state + * with an in-memory cache in front. Only locally replicated keys are tracked here to avoid storing too much + * state when token aware routing is not used. + * + * It is safe to migrate keys multiple times so no effort is made to ensure exactly once behavior and the system table + * expires key migration state after 7 days. + */ +public abstract class ConsensusKeyMigrationState +{ + /* + * Used to notify other replicas when key migration has occurred so they can + * also cache that the key migration was done + */ + public static class ConsensusKeyMigrationFinished + { + @Nonnull + private final UUID tableId; + @Nonnull + private final ByteBuffer partitionKey; + @Nonnull + private final ConsensusMigratedAt consensusMigratedAt; + + private ConsensusKeyMigrationFinished(@Nonnull UUID tableId, + @Nonnull ByteBuffer partitionKey, + @Nonnull ConsensusMigratedAt consensusMigratedAt) + { + this.tableId = tableId; + this.partitionKey = partitionKey; + this.consensusMigratedAt = consensusMigratedAt; + } + + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(ConsensusKeyMigrationFinished t, DataOutputPlus out, int version) throws IOException + { + UUIDSerializer.serializer.serialize(t.tableId, out, version); + ByteBufferUtil.writeWithVIntLength(t.partitionKey, out); + ConsensusMigratedAt.serializer.serialize(t.consensusMigratedAt, out, version); + } + + @Override + public ConsensusKeyMigrationFinished deserialize(DataInputPlus in, int version) throws IOException + { + UUID tableId = UUIDSerializer.serializer.deserialize(in, version); + ByteBuffer partitionKey = ByteBufferUtil.readWithVIntLength(in); + ConsensusMigratedAt consensusMigratedAt = ConsensusMigratedAt.serializer.deserialize(in, version); + return new ConsensusKeyMigrationFinished(tableId, partitionKey, consensusMigratedAt); + } + + @Override + public long serializedSize(ConsensusKeyMigrationFinished t, int version) + { + return UUIDSerializer.serializer.serializedSize(t.tableId, version) + + ByteBufferUtil.serializedSizeWithVIntLength(t.partitionKey) + + ConsensusMigratedAt.serializer.serializedSize(t.consensusMigratedAt, version); + } + }; + } + + /* + * Bundles various aspects of key migration state together to avoid multiple lookups + * and to communicate multiple result values and state + */ + public static class KeyMigrationState + { + static final KeyMigrationState MIGRATION_NOT_NEEDED = new KeyMigrationState(null, null, null, null); + + public final ConsensusMigratedAt consensusMigratedAt; + + public final Epoch currentEpoch; + + public final TableMigrationState tableMigrationState; + + public final DecoratedKey key; + + private KeyMigrationState(ConsensusMigratedAt consensusMigratedAt, Epoch currentEpoch, + TableMigrationState tableMigrationState, DecoratedKey key) + { + this.consensusMigratedAt = consensusMigratedAt; + this.currentEpoch = currentEpoch; + this.tableMigrationState = tableMigrationState; + this.key = key; + } + + /* + * This will trigger a distributed migration for the key, but will only block on local completion + * so Paxos reads can return a result as soon as the local state is ready + */ + public void maybePerformAccordToPaxosKeyMigration(boolean isForWrite) + { + if (paxosReadSatisfiedByKeyMigration()) + return; + + // TODO (desired): Better query start time + TableMigrationState tms = tableMigrationState; + repairKeyAccord(key, tms.keyspaceName, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), Dispatcher.RequestTime.forImmediateExecution(), false, isForWrite); + } + + private boolean paxosReadSatisfiedByKeyMigration() + { + // No migration in progress, it's safe + if (tableMigrationState == null) + return true; + + return tableMigrationState.paxosReadSatisfiedByKeyMigrationAtEpoch(key, consensusMigratedAt); + } + } + + private static final int EMPTY_KEY_SIZE = Ints.checkedCast(ObjectSizes.measureDeep(Pair.create(null, UUID.randomUUID()))); + private static final int VALUE_SIZE = Ints.checkedCast(ObjectSizes.measureDeep(new ConsensusMigratedAt(Epoch.EMPTY, ConsensusMigrationTarget.accord))); + + private static final CacheLoader, ConsensusMigratedAt> LOADING_FUNCTION = k -> SystemKeyspace.loadConsensusKeyMigrationState(k.left, k.right); + private static final Weigher, ConsensusMigratedAt> WEIGHER_FUNCTION = (k, v) -> EMPTY_KEY_SIZE + Ints.checkedCast(ByteBufferUtil.estimatedSizeOnHeap(k.left)) + VALUE_SIZE; + private static final LoadingCache, ConsensusMigratedAt> MIGRATION_STATE_CACHE = + Caffeine.newBuilder() + .maximumWeight(DatabaseDescriptor.getConsensusMigrationCacheSizeInMiB() << 20) + .weigher(WEIGHER_FUNCTION) + .executor(ImmediateExecutor.INSTANCE) + .build(LOADING_FUNCTION); + + public static final IVerbHandler consensusKeyMigrationFinishedHandler = message -> { + saveConsensusKeyMigrationLocally(message.payload.partitionKey, message.payload.tableId, message.payload.consensusMigratedAt); + }; + + private ConsensusKeyMigrationState() {} + + @VisibleForTesting + public static void reset() + { + MIGRATION_STATE_CACHE.invalidateAll(); + } + + public static void maybeSaveAccordKeyMigrationLocally(PartitionKey partitionKey, Epoch epoch) + { + TableId tableId = partitionKey.tableId(); + UUID tableUUID = tableId.asUUID(); + DecoratedKey dk = partitionKey.partitionKey(); + ByteBuffer key = dk.getKey(); + + TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tableId); + if (tms == null) + return; + + ConsensusMigratedAt migratedAt = new ConsensusMigratedAt(epoch, paxos); + if (!tms.paxosReadSatisfiedByKeyMigrationAtEpoch(dk, migratedAt)) + return; + + saveConsensusKeyMigrationLocally(key, tableUUID, migratedAt); + } + + /* + * Should be called where we know we replicate the key so that the system table contains useful information + * about whether the migration already occurred. + * + * This is a more expensive check that might read from the system table to determine if migration occurred. + */ + public static KeyMigrationState getKeyMigrationState(TableId tableId, DecoratedKey key) + { + ClusterMetadata cm = ClusterMetadata.current(); + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tableId); + // No state means no migration for this table + if (tms == null) + return KeyMigrationState.MIGRATION_NOT_NEEDED; + + if (Range.isInNormalizedRanges(key.getToken(), tms.migratingRanges)) + { + ConsensusMigratedAt consensusMigratedAt = getConsensusMigratedAt(tableId, key); + if (consensusMigratedAt == null) + return new KeyMigrationState(null, cm.epoch, tms, key); + return new KeyMigrationState(consensusMigratedAt, cm.epoch, tms, key); + } + + return KeyMigrationState.MIGRATION_NOT_NEEDED; + } + + public static @Nullable ConsensusMigratedAt getConsensusMigratedAt(TableId tableId, DecoratedKey key) + { + return MIGRATION_STATE_CACHE.get(Pair.create(key.getKey(), tableId.asUUID())); + } + + /* + * Trigger a distributed repair of Accord state for this key. + */ + static void repairKeyAccord(DecoratedKey key, + String keyspace, + TableId tableId, + long minEpoch, + Dispatcher.RequestTime requestTime, + boolean global, + boolean isForWrite) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); + if (isForWrite) + ClientRequestsMetricsHolder.casWriteMetrics.accordKeyMigrations.mark(); + else + ClientRequestsMetricsHolder.casReadMetrics.accordKeyMigrations.mark(); + long start = nanoTime(); + try + { + // Global will always create a transaction to effect the barrier so all replicas + // will soon be ready to execute, but only waits for the local replica to be ready + // Local will only create a transaction if it can't find an existing one to wait on + BarrierType barrierType = global ? BarrierType.global_async : BarrierType.local; + AccordService.instance().barrier(Seekables.of(new PartitionKey(keyspace, tableId, key)), minEpoch, requestTime, DatabaseDescriptor.getTransactionTimeout(TimeUnit.NANOSECONDS), barrierType, isForWrite); + // We don't save the state to the cache here. Accord will notify the agent every time a barrier happens. + } + finally + { + cfs.metric.keyMigration.addNano(nanoTime() - start); + } + } + + static void repairKeyPaxos(EndpointsForToken naturalReplicas, + Epoch currentEpoch, + DecoratedKey key, + ColumnFamilyStore cfs, + ConsistencyLevel consistencyLevel, + Dispatcher.RequestTime requestTime, + long timeoutNanos, + boolean isLocallyReplicated, + boolean isForWrite) + { + if (isForWrite) + ClientRequestsMetricsHolder.accordWriteMetrics.paxosKeyMigrations.mark(); + else + ClientRequestsMetricsHolder.accordReadMetrics.paxosKeyMigrations.mark(); + TableMetadata tableMetadata = cfs.metadata(); + PaxosRepair repair = PaxosRepair.create(consistencyLevel, key, tableMetadata, timeoutNanos); + long start = nanoTime(); + repair.start(requestTime.startedAtNanos()); + Result result; + try + { + result = repair.await(); + switch (result.outcome) + { + default: + case CANCELLED: + throw new IllegalStateException("Unexpected PaxosRepair outcome " + result.outcome); + case DONE: + // Don't want to repeatedly save this in the non-token aware case + if (isLocallyReplicated) + saveConsensusKeyMigration(naturalReplicas, + new ConsensusKeyMigrationFinished(tableMetadata.id.asUUID(), + key.getKey(), + new ConsensusMigratedAt(currentEpoch, ConsensusMigrationTarget.accord))); + return; + case FAILURE: + Failure failure = (Failure)result; + if (failure.failure == null) + throw new CasWriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0, 0); + throw new RuntimeException(failure.failure); + } + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + finally + { + cfs.metric.keyMigration.addNano(nanoTime() - start); + } + } + + private static void saveConsensusKeyMigration(EndpointsForToken replicas, ConsensusKeyMigrationFinished finished) + { + Message out = Message.out(CONSENSUS_KEY_MIGRATION, finished); + replicas.endpoints(); + for (Replica replica : replicas) + { + if (replica.isSelf()) + saveConsensusKeyMigrationLocally(finished.partitionKey, finished.tableId, finished.consensusMigratedAt); + else + MessagingService.instance().send(out, replica.endpoint()); + } + } + + private static void saveConsensusKeyMigrationLocally(ByteBuffer partitionKey, UUID tableId, ConsensusMigratedAt consensusMigratedAt) + { + // Order doesn't matter, existing values don't matter, version doesn't matter + // If any of this races or goes backwards the result is that key migration is + // reattempted and it should be very rare + MIGRATION_STATE_CACHE.put(Pair.create(partitionKey, tableId), consensusMigratedAt); + Stage.MUTATION.execute(() -> SystemKeyspace.saveConsensusKeyMigrationState(partitionKey, tableId, consensusMigratedAt)); + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java new file mode 100644 index 000000000000..ac63cc4bd95a --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import javax.annotation.Nonnull; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.ReplicaLayout; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; +import org.apache.cassandra.service.paxos.Paxos; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.FBUtilities; + +import static com.google.common.base.Preconditions.checkState; +import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.getConsensusMigratedAt; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.accord; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV1; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.paxos; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; + +/** + * Helper class to decide where to route a request that requires consensus, migrating a key if necessary + * before rerouting. + */ +public class ConsensusRequestRouter +{ + public enum ConsensusRoutingDecision + { + paxosV1, + paxosV2, + accord, + } + + public static volatile ConsensusRequestRouter instance = new ConsensusRequestRouter(); + + @VisibleForTesting + public static void setInstance(ConsensusRequestRouter testInstance) + { + instance = testInstance; + } + + @VisibleForTesting + public static void resetInstance() + { + instance = new ConsensusRequestRouter(); + } + + protected ConsensusRequestRouter() {} + + public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull TableId tableId, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + // In accord mode there might be migration state in CM (unless cleanup gets added), but it doesn't + // matter. All other consensus protocols are not used. + if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) + return accord; + + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); + if (cfs == null) + throw new IllegalStateException("Can't route consensus request for nonexistent table %s".format(tableId.toString())); + return routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + } + + protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + ClusterMetadata cm = ClusterMetadata.current(); + + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(cfs.getTableId()); + if (tms == null) + return pickPaxos(); + + if (Range.isInNormalizedRanges(key.getToken(), tms.migratedRanges)) + return pickMigrated(tms.targetProtocol); + + if (Range.isInNormalizedRanges(key.getToken(), tms.migratingRanges)) + return pickBasedOnKeyMigrationStatus(cm, tms, key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + + // It's not migrated so infer the protocol from the target + return pickNotMigrated(tms.targetProtocol); + } + + /** + * If the key was already migrated then we can pick the target protocol otherwise + * we have to run a repair operation on the key to migrate it. + */ + private static ConsensusRoutingDecision pickBasedOnKeyMigrationStatus(ClusterMetadata cm, TableMigrationState tms, DecoratedKey key, ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + checkState(pickPaxos() != paxosV1, "Can't migrate from PaxosV1 to anything"); + + // If it is locally replicated we can check our local migration state to see if it was already migrated + EndpointsForToken naturalReplicas = ReplicaLayout.forNonLocalStrategyTokenRead(cm, cfs.keyspace.getMetadata(), key.getToken()); + boolean isLocallyReplicated = naturalReplicas.lookup(FBUtilities.getBroadcastAddressAndPort()) != null; + if (isLocallyReplicated) + { + ConsensusMigratedAt consensusMigratedAt = getConsensusMigratedAt(tms.tableId, key); + // Check that key migration that was performed satisfies the requirements of the current in flight migration + // for the range + // Be aware that for Accord->Paxos the cache only tells us if the key was repaired locally + // This ends up still being safe because every single Paxos read (in a migrating range) during migration will check + // locally to see if repair is necessary + if (consensusMigratedAt != null && tms.satisfiedByKeyMigrationAtEpoch(key, consensusMigratedAt)) + return pickMigrated(tms.targetProtocol); + + if (tms.targetProtocol == paxos) + { + // Run the Accord barrier txn now so replicas don't start independent + // barrier transactions to accomplish the migration + // They still might need to go through the fast local path for barrier txns + // at each replica, but they won't create their own txn since we created it here + ConsensusKeyMigrationState.repairKeyAccord(key, tms.keyspaceName, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), requestTime, true, isForWrite); + return paxosV2; + } + // Fall through for repairKeyPaxos + } + + // If it's not locally replicated then: + // Accord -> Paxos - Paxos will ask Accord to migrate in the read at each replica if necessary + // Paxos -> Accord - Paxos needs to be repaired before Accord runs so do it here + if (tms.targetProtocol == paxos) + return paxosV2; + else + // Should exit exceptionally if the repair is not done + ConsensusKeyMigrationState.repairKeyPaxos(naturalReplicas, cm.epoch, key, cfs, consistencyLevel, requestTime, timeoutNanos, isLocallyReplicated, isForWrite); + + return pickMigrated(tms.targetProtocol); + } + + // Allows tests to inject specific responses + public boolean isKeyInMigratingOrMigratedRangeDuringPaxosBegin(TableId tableId, DecoratedKey key) + { + return isKeyInMigratingOrMigratedRangeFromPaxos(tableId, key); + } + + // Allows tests to inject specific responses + public boolean isKeyInMigratingOrMigratedRangeDuringPaxosAccept(TableId tableId, DecoratedKey key) + { + return isKeyInMigratingOrMigratedRangeFromPaxos(tableId, key); + } + + /* + * A lightweight check against cluster metadata that doesn't check if the key has already been migrated + * using local system table state + */ + public boolean isKeyInMigratingOrMigratedRangeFromPaxos(TableId tableId, DecoratedKey key) + { + TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tableId); + // No state means no migration for this table + if (tms == null) + return false; + + if (tms.targetProtocol == ConsensusMigrationTarget.paxos) + return false; + + // The coordinator will need to retry either on Accord if they are trying + // to propose their own value, or by setting the consensus migration epoch to recover an incomplete transaction + if (Range.isInNormalizedRanges(key.getToken(), tms.migratingAndMigratedRanges)) + return true; + + return false; + } + + public boolean isKeyInMigratingOrMigratedRangeFromAccord(Epoch epoch, TableId tableId, DecoratedKey key) + { + ClusterMetadata cm = ClusterMetadataService.instance().fetchLogFromCMS(epoch); + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tableId); + return isKeyInMigratingOrMigratedRangeFromAccord(tms, key); + } + + /* + * A lightweight check against cluster metadata that doesn't check if the key has already been migrated + * using local system table state. + */ + public boolean isKeyInMigratingOrMigratedRangeFromAccord(TableMigrationState tms, DecoratedKey key) + { + // No state means no migration for this table + if (tms == null) + return false; + + if (tms.targetProtocol == ConsensusMigrationTarget.accord) + return false; + + if (Range.isInNormalizedRanges(key.getToken(), tms.migratingAndMigratedRanges)) + return true; + + return false; + } + + private static ConsensusRoutingDecision pickMigrated(ConsensusMigrationTarget targetProtocol) + { + if (targetProtocol.equals(ConsensusMigrationTarget.accord)) + return accord; + else + return pickPaxos(); + } + + private static ConsensusRoutingDecision pickNotMigrated(ConsensusMigrationTarget targetProtocol) + { + if (targetProtocol.equals(ConsensusMigrationTarget.accord)) + return pickPaxos(); + else + return accord; + } + + private static ConsensusRoutingDecision pickPaxos() + { + return Paxos.useV2() ? paxosV2 : paxosV1; + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java new file mode 100644 index 000000000000..62b03eefa98c --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java @@ -0,0 +1,909 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.io.IOException; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.base.Predicates; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; +import com.google.common.collect.ImmutableSortedMap; +import com.google.common.primitives.SignedBytes; +import com.google.common.util.concurrent.FutureCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.repair.RepairJobDesc; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.RepairResult; +import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.paxos.Paxos; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MetadataValue; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.SetConsensusMigrationTargetProtocol; +import org.apache.cassandra.utils.LocalizeString; +import org.apache.cassandra.utils.NullableSerializer; +import org.apache.cassandra.utils.PojoToString; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Collections.emptyList; +import static org.apache.cassandra.db.TypeSizes.sizeof; +import static org.apache.cassandra.dht.Range.intersectionOfNormalizedRanges; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.dht.Range.subtract; +import static org.apache.cassandra.dht.Range.subtractNormalizedRanges; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.reset; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; +import static org.apache.cassandra.utils.CollectionSerializers.newHashMap; +import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; + +/** + * Track and update the migration state of individual table and ranges within those tables + */ +public abstract class ConsensusTableMigrationState +{ + private static final Logger logger = LoggerFactory.getLogger(ConsensusTableMigrationState.class); + + public static final MetadataSerializer>> rangesSerializer = newListSerializer(Range.serializer); + + public static final FutureCallback completedRepairJobHandler = new FutureCallback() + { + @Override + public void onSuccess(@Nullable RepairResult repairResult) + { + checkNotNull(repairResult, "repairResult should not be null"); + ConsensusMigrationRepairResult migrationResult = repairResult.consensusMigrationRepairResult; + + // Need to repair both Paxos and base table state + // Could track them separately, but doesn't seem worth the effort + if (migrationResult.type == ConsensusMigrationRepairType.ineligible) + return; + + RepairJobDesc desc = repairResult.desc; + TableMetadata tm = Schema.instance.getTableMetadata(desc.keyspace, desc.columnFamily); + if (tm == null) + return; + TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tm.id); + if (tms == null || !Range.intersects(tms.migratingRanges, desc.ranges)) + return; + + if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.accord) + return; + if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.paxos) + return; + + logger.info("Repair {} is going to trigger migration completion for ranges {} and epoch {}", desc.sessionId, desc.ranges, migrationResult.minEpoch); + + ClusterMetadataService.instance().commit( + new MaybeFinishConsensusMigrationForTableAndRange( + desc.keyspace, desc.columnFamily, ImmutableList.copyOf(desc.ranges), + migrationResult.minEpoch, migrationResult.type)); + } + + @Override + public void onFailure(Throwable throwable) + { + // Only successes drive forward progress + } + }; + + public static void reset() + { + ClusterMetadata cm = ClusterMetadata.current(); + for (TableMigrationState tms : cm.consensusMigrationState.tableStates.values()) + setConsensusMigrationTargetProtocol("reset", + ImmutableList.of(tms.keyspaceName), + Optional.of(ImmutableList.of(tms.tableName))); + } + + public enum ConsensusMigrationRepairType + { + ineligible(0), + paxos(1), + accord(2); + + public final byte value; + + ConsensusMigrationRepairType(int value) + { + this.value = SignedBytes.checkedCast(value); + } + + public static ConsensusMigrationRepairType fromString(String repairType) + { + return ConsensusMigrationRepairType.valueOf(LocalizeString.toLowerCaseLocalized(repairType)); + } + + public static ConsensusMigrationRepairType fromValue(byte value) + { + switch (value) + { + default: + throw new IllegalArgumentException(value + " is not recognized"); + case 0: + return ConsensusMigrationRepairType.ineligible; + case 1: + return ConsensusMigrationRepairType.paxos; + case 2: + return ConsensusMigrationRepairType.accord; + } + } + } + + public enum ConsensusMigrationTarget + { + paxos(0), + accord(1), + reset(2); + + public final byte value; + + ConsensusMigrationTarget(int value) + { + this.value = SignedBytes.checkedCast(value); + } + + public static ConsensusMigrationTarget fromString(String targetProtocol) + { + return ConsensusMigrationTarget.valueOf(LocalizeString.toLowerCaseLocalized(targetProtocol)); + } + + public static ConsensusMigrationTarget fromValue(byte value) + { + switch (value) + { + default: + throw new IllegalArgumentException(value + " is not recognized"); + case 0: + return paxos; + case 1: + return accord; + case 2: + return reset; + } + } + } + + public static class ConsensusMigrationRepairResult + { + private final ConsensusMigrationRepairType type; + private final Epoch minEpoch; + + private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch minEpoch) + { + this.type = type; + this.minEpoch = minEpoch; + } + + public static ConsensusMigrationRepairResult fromCassandraRepair(Epoch minEpoch, boolean migrationEligibleRepair) + { + checkArgument(!migrationEligibleRepair || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); + if (migrationEligibleRepair) + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); + else + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); + } + + public static ConsensusMigrationRepairResult fromAccordRepair(Epoch minEpoch) + { + checkArgument(minEpoch.isAfter(Epoch.EMPTY), "Accord repairs should always occur at an Epoch"); + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); + } + } + + public static class ConsensusMigratedAt + { + public static final IVersionedSerializer serializer = NullableSerializer.wrap(new IVersionedSerializer() + { + @Override + public void serialize(ConsensusMigratedAt t, DataOutputPlus out, int version) throws IOException + { + Epoch.messageSerializer.serialize(t.migratedAtEpoch, out, version); + out.writeByte(t.migratedAtTarget.value); + } + + @Override + public ConsensusMigratedAt deserialize(DataInputPlus in, int version) throws IOException + { + Epoch migratedAtEpoch = Epoch.messageSerializer.deserialize(in, version); + ConsensusMigrationTarget target = ConsensusMigrationTarget.fromValue(in.readByte()); + return new ConsensusMigratedAt(migratedAtEpoch, target); + } + + @Override + public long serializedSize(ConsensusMigratedAt t, int version) + { + return TypeSizes.sizeof(ConsensusMigrationTarget.accord.value) + + Epoch.messageSerializer.serializedSize(t.migratedAtEpoch, version); + } + }); + + // Fields are not nullable when used for messaging + @Nullable + public final Epoch migratedAtEpoch; + + @Nullable + public final ConsensusMigrationTarget migratedAtTarget; + + public ConsensusMigratedAt(Epoch migratedAtEpoch, ConsensusMigrationTarget migratedAtTarget) + { + this.migratedAtEpoch = migratedAtEpoch; + this.migratedAtTarget = migratedAtTarget; + } + } + + // TODO (desired): Move this into the schema for the table once this is based off of TrM + public static class TableMigrationState + { + @Nonnull + public final String keyspaceName; + + @Nonnull + public final String tableName; + + @Nonnull + public final TableId tableId; + + @Nonnull + public final ConsensusMigrationTarget targetProtocol; + + @Nonnull + public final List> migratedRanges; + + /* + * Necessary to track which ranges started migrating at which epoch + * in order to know whether a repair qualifies in terms of finishing + * migration of the range. + */ + @Nonnull + public final NavigableMap>> migratingRangesByEpoch; + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(TableMigrationState t, DataOutputPlus out, Version version) throws IOException + { + out.write(t.targetProtocol.value); + out.writeUTF(t.keyspaceName); + out.writeUTF(t.tableName); + t.tableId.serialize(out); + serializeCollection(t.migratedRanges, out, version, Range.serializer); + serializeMap(t.migratingRangesByEpoch, out, version, Epoch.serializer, rangesSerializer); + } + + @Override + public TableMigrationState deserialize(DataInputPlus in, Version version) throws IOException + { + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromValue(in.readByte()); + String keyspaceName = in.readUTF(); + String tableName = in.readUTF(); + TableId tableId = TableId.deserialize(in); + Set> migratedRanges = deserializeSet(in, version, Range.serializer); + Map>> migratingRangesByEpoch = deserializeMap(in, version, Epoch.serializer, rangesSerializer, newHashMap()); + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch); + } + + @Override + public long serializedSize(TableMigrationState t, Version version) + { + return sizeof(t.targetProtocol.value) + + sizeof(t.keyspaceName) + + sizeof(t.tableName) + + t.tableId.serializedSize() + + serializedCollectionSize(t.migratedRanges, version, Range.serializer) + + serializedMapSize(t.migratingRangesByEpoch, version, Epoch.serializer, rangesSerializer); + } + }; + + @Nonnull + public final List> migratingRanges; + + @Nonnull + public final List> migratingAndMigratedRanges; + + public TableMigrationState(@Nonnull String keyspaceName, + @Nonnull String tableName, + @Nonnull TableId tableId, + @Nonnull ConsensusMigrationTarget targetProtocol, + @Nonnull Collection> migratedRanges, + @Nonnull Map>> migratingRangesByEpoch) + { + this.keyspaceName = keyspaceName; + this.tableName = tableName; + this.tableId = tableId; + this.targetProtocol = targetProtocol; + this.migratedRanges = ImmutableList.copyOf(normalize(migratedRanges)); + this.migratingRangesByEpoch = ImmutableSortedMap.copyOf( + migratingRangesByEpoch.entrySet() + .stream() + .map( entry -> new SimpleEntry<>(entry.getKey(), ImmutableList.copyOf(normalize(entry.getValue())))) + .collect(Collectors.toList())); + this.migratingRanges = ImmutableList.copyOf(normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList()))); + this.migratingAndMigratedRanges = ImmutableList.copyOf(normalize(ImmutableList.>builder().addAll(migratedRanges).addAll(migratingRanges).build())); + } + + public TableMigrationState withRangesMigrating(@Nonnull Collection> ranges, + @Nonnull ConsensusMigrationTarget target) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't already have an entry for the empty epoch"); + // Doesn't matter which epoch the range started migrating in for this context so merge them all + Collection> migratingRanges = normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); + checkArgument(target == targetProtocol, "Requested migration to target protocol " + target + " conflicts with in progress migration to protocol " + targetProtocol); + List> normalizedRanges = normalize(ranges); + if (subtract(normalizedRanges, migratingRanges).isEmpty()) + logger.warn("Range " + ranges + " is already being migrated"); + Set> withoutAlreadyMigrated = subtract(normalizedRanges, migratedRanges); + if (withoutAlreadyMigrated.isEmpty()) + logger.warn("Range " + ranges + " is already migrated"); + Set> withoutBoth = subtract(withoutAlreadyMigrated, migratingRanges); + if (withoutBoth.isEmpty()) + logger.warn("Range " + ranges + " is already migrating/migrated"); + + if (!Range.equals(normalizedRanges, withoutBoth)) + logger.warn("Ranges " + normalizedRanges + " to start migrating is already partially migrating/migrated " + withoutBoth); + + Map>> newMigratingRanges = new HashMap<>(migratingRangesByEpoch.size() + 1); + newMigratingRanges.putAll(migratingRangesByEpoch); + newMigratingRanges.put(Epoch.EMPTY, normalizedRanges); + + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRanges); + } + + public TableMigrationState withReplacementForEmptyEpoch(@Nonnull Epoch replacementEpoch) + { + if (!migratingRangesByEpoch.containsKey(Epoch.EMPTY)) + return this; + Map>> newMigratingRangesByEpoch = new HashMap<>(migratingRangesByEpoch.size()); + migratingRangesByEpoch.forEach((epoch, ranges) -> { + if (epoch.equals(Epoch.EMPTY)) + newMigratingRangesByEpoch.put(replacementEpoch, ranges); + else + newMigratingRangesByEpoch.put(epoch, ranges); + }); + + if (newMigratingRangesByEpoch != null) + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRangesByEpoch); + else + return this; + } + + public TableMigrationState withRangesRepairedAtEpoch(@Nonnull Collection> ranges, + @Nonnull Epoch epoch) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); + checkArgument(epoch.isAfter(Epoch.EMPTY), "Epoch shouldn't be empty"); + + List> normalizedRepairedRanges = normalize(ranges); + // This should be inclusive because the epoch we store in the map is the epoch in which the range has been marked migrating + // in startMigrationToConsensusProtocol + NavigableMap>> coveredEpochs = migratingRangesByEpoch.headMap(epoch, true); + List> normalizedMigratingRanges = normalize(coveredEpochs.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); + List> normalizedRepairedIntersection = intersectionOfNormalizedRanges(normalizedRepairedRanges, normalizedMigratingRanges); + checkState(!normalizedRepairedIntersection.isEmpty(), "None of Ranges " + ranges + " were being migrated"); + + Map>> newMigratingRangesByEpoch = new HashMap<>(); + + // Everything in this epoch or later can't have been migrated so re-add all of them + newMigratingRangesByEpoch.putAll(migratingRangesByEpoch.tailMap(epoch, false)); + + // Include anything still remaining to be migrated after subtracting what was repaired + for (Map.Entry>> e : coveredEpochs.entrySet()) + { + // Epoch when these ranges started migrating + Epoch rangesEpoch = e.getKey(); + List> epochMigratingRanges = e.getValue(); + List> remainingRanges = subtractNormalizedRanges(epochMigratingRanges, normalizedRepairedIntersection); + if (!remainingRanges.isEmpty()) + newMigratingRangesByEpoch.put(rangesEpoch, remainingRanges); + } + + List> newMigratedRanges = new ArrayList<>(normalizedMigratingRanges.size() + ranges.size()); + newMigratedRanges.addAll(migratedRanges); + newMigratedRanges.addAll(normalizedRepairedIntersection); + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, newMigratedRanges, newMigratingRangesByEpoch); + } + + public boolean paxosReadSatisfiedByKeyMigrationAtEpoch(DecoratedKey key, ConsensusMigratedAt consensusMigratedAt) + { + // This check is being done from a Paxos read attempt which needs to + // check if Accord needs to resolve any in flight accord transactions + // if the migration target is Accord then nothing needs to be done + if (targetProtocol != ConsensusMigrationTarget.paxos) + return true; + + return satisfiedByKeyMigrationAtEpoch(key, consensusMigratedAt); + } + + public boolean satisfiedByKeyMigrationAtEpoch(@Nonnull DecoratedKey key, @Nullable ConsensusMigratedAt consensusMigratedAt) + { + if (consensusMigratedAt == null) + { + // It hasn't been migrated and needs migration if it is in a migrating range + return Range.isInNormalizedRanges(key.getToken(), migratingRanges); + } + else + { + // It has been migrated and might be from a late enough epoch to satisfy this migration + return consensusMigratedAt.migratedAtTarget == targetProtocol + && migratingRangesByEpoch.headMap(consensusMigratedAt.migratedAtEpoch, true).values() + .stream() + .flatMap(List::stream) + .anyMatch(range -> range.contains(key.getToken())); + } + } + + public Epoch minMigrationEpoch(Token token) + { + for (Map.Entry>> e : migratingRangesByEpoch.entrySet()) + { + if (Range.isInNormalizedRanges(token, e.getValue())) + return e.getKey(); + } + return Epoch.EMPTY; + } + + + public @Nonnull TableId getTableId() + { + return tableId; + } + + public TableMigrationState withMigrationTarget(ConsensusMigrationTarget newTargetProtocol) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); + if (this.targetProtocol == newTargetProtocol) + return this; + + // Migrating ranges remain migrating because individual keys may have already been migrated + // So for correctness we need to perform key migration + // We do need to update the epoch so that a new repair is required to drive the migration + Map>> migratingRangesByEpoch = ImmutableMap.of(Epoch.EMPTY, migratingRanges); + + Token minToken = ColumnFamilyStore.getIfExists(tableId).getPartitioner().getMinimumToken(); + Range fullRange = new Range(minToken, minToken); + // What is migrated already is anything that was never migrated/migrating before (untouched) + List> migratedRanges = ImmutableList.copyOf(normalize(fullRange.subtractAll(migratingAndMigratedRanges))); + + return new TableMigrationState(keyspaceName, tableName, tableId, newTargetProtocol, migratedRanges, migratingRangesByEpoch); + } + + public Map toMap() + { + Builder builder = ImmutableMap.builder(); + builder.put("keyspace", keyspaceName); + builder.put("table", tableName); + builder.put("tableId", tableId.toString()); + builder.put("targetProtocol", targetProtocol.toString()); + builder.put("migratedRanges", migratedRanges.stream().map(Objects::toString).collect(toImmutableList())); + Map> rangesByEpoch = new LinkedHashMap<>(); + for (Map.Entry>> entry : migratingRangesByEpoch.entrySet()) + { + rangesByEpoch.put(entry.getKey().getEpoch(), entry.getValue().stream().map(Objects::toString).collect(toImmutableList())); + } + builder.put("migratingRangesByEpoch", rangesByEpoch); + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TableMigrationState that = (TableMigrationState) o; + return keyspaceName.equals(that.keyspaceName) && tableName.equals(that.tableName) && tableId.equals(that.tableId) && targetProtocol == that.targetProtocol && migratedRanges.equals(that.migratedRanges) && migratingRangesByEpoch.equals(that.migratingRangesByEpoch) && migratingRanges.equals(that.migratingRanges) && migratingAndMigratedRanges.equals(that.migratingAndMigratedRanges); + } + + @Override + public int hashCode() + { + return Objects.hash(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch, migratingRanges, migratingAndMigratedRanges); + } + + public List> migratingRanges() + { + return migratingRanges; + } + } + + public static class ConsensusMigrationState implements MetadataValue + { + public static ConsensusMigrationState EMPTY = new ConsensusMigrationState(Epoch.EMPTY, ImmutableMap.of()); + @Nonnull + public final Map tableStates; + + public final Epoch lastModified; + + + public ConsensusMigrationState(@Nonnull Epoch lastModified, @Nonnull Map tableStates) + { + checkNotNull(tableStates, "tableStates is null"); + checkNotNull(lastModified, "lastModified is null"); + this.lastModified = lastModified; + this.tableStates = ImmutableMap.copyOf(tableStates); + } + + public Map toMap(@Nullable Set keyspaceNames, @Nullable Set tableNames) + { + return ImmutableMap.of("lastModifiedEpoch", lastModified.getEpoch(), + "tableStates", tableStatesAsMaps(keyspaceNames, tableNames), + "version", PojoToString.CURRENT_VERSION); + } + + private List> tableStatesAsMaps(@Nullable Set keyspaceNames, + @Nullable Set tableNames) + { + ImmutableList.Builder> builder = ImmutableList.builder(); + for (TableMigrationState tms : tableStates.values()) + { + if (keyspaceNames != null && !keyspaceNames.contains(tms.keyspaceName)) + continue; + if (tableNames != null && !tableNames.contains(tms.tableName)) + continue; + builder.add(tms.toMap()); + } + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ConsensusMigrationState that = (ConsensusMigrationState) o; + return tableStates.equals(that.tableStates); + } + + @Override + public int hashCode() + { + return Objects.hash(tableStates); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(ConsensusMigrationState consensusMigrationState, DataOutputPlus out, Version version) throws IOException + { + Epoch.serializer.serialize(consensusMigrationState.lastModified, out, version); + serializeMap(consensusMigrationState.tableStates, out, version, TableId.metadataSerializer, TableMigrationState.serializer); + } + + @Override + public ConsensusMigrationState deserialize(DataInputPlus in, Version version) throws IOException + { + Epoch lastModified = Epoch.serializer.deserialize(in, version); + Map tableMigrationStates = deserializeMap(in, version, TableId.metadataSerializer, TableMigrationState.serializer, newHashMap()); + return new ConsensusMigrationState(lastModified, tableMigrationStates); + } + + @Override + public long serializedSize(ConsensusMigrationState t, Version version) + { + return Epoch.serializer.serializedSize(t.lastModified, version) + + serializedMapSize(t.tableStates, version, TableId.metadataSerializer, TableMigrationState.serializer); + } + }; + + @Override + public ConsensusMigrationState withLastModified(Epoch epoch) + { + ImmutableMap.Builder newMap = ImmutableMap.builderWithExpectedSize(tableStates.size()); + tableStates.forEach((tableId, tableState) -> { + newMap.put(tableId, tableState.withReplacementForEmptyEpoch(epoch)); + }); + return new ConsensusMigrationState(epoch, newMap.build()); + } + + @Override + public Epoch lastModified() + { + return lastModified; + } + } + + private ConsensusTableMigrationState() {} + + // Used by callers to avoid looking up the TMS multiple times + public static @Nullable TableMigrationState getTableMigrationState(TableId tableId) + { + TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tableId); + return tms; + } + + /* + * Set or change the migration target for the keyspaces and tables. Can be used to reverse the direction of a migration + * or instantly migrate a table to a new protocol. + */ + public static void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocolName, + @Nullable List keyspaceNames, + @Nonnull Optional> maybeTables) + { + checkArgument(!maybeTables.isPresent() || (keyspaceNames != null && keyspaceNames.size() == 1), "Must specify one keyspace along with tables"); + checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + keyspaceNames = maybeDefaultKeyspaceNames(keyspaceNames); + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(targetProtocolName); + + if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) + throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); + + if (!Paxos.useV2()) + throw new IllegalStateException("Can't do any consensus migrations from/to PaxosV1, switch to V2 first"); + + List tableIds = keyspacesAndTablesToTableIds(keyspaceNames, maybeTables); + ClusterMetadataService.instance().commit(new SetConsensusMigrationTargetProtocol(targetProtocol, tableIds)); + } + + public static void startMigrationToConsensusProtocol(@Nonnull String targetProtocolName, + @Nullable List keyspaceNames, + @Nonnull Optional> maybeTables, + @Nonnull Optional maybeRangesStr) + { + checkState(keyspaceNames.size() == 1 || !maybeTables.isPresent(), "Must specify one keyspace along with tables"); + checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(targetProtocolName); + checkArgument(targetProtocol != reset, "Can't start migration to reset"); + + + if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) + throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); + + NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); + if (!nonSerialWriteStrategy.writesThroughAccord && nonSerialWriteStrategy != NonSerialWriteStrategy.mixed) + throw new IllegalStateException("non-SERIAL writes need to be routed through Accord before attempting migration, or enable mixed mode"); + + if (!Paxos.useV2()) + throw new IllegalStateException("Can't do any consensus migrations to/from PaxosV1, switch to V2 first"); + + keyspaceNames = maybeDefaultKeyspaceNames(keyspaceNames); + List> ranges = maybeRangesToRanges(maybeRangesStr); + List tableIds = keyspacesAndTablesToTableIds(keyspaceNames, maybeTables); + + ClusterMetadataService.instance().commit(new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tableIds)); + } + + public static List finishMigrationToConsensusProtocol(@Nonnull String keyspace, + @Nonnull Optional> maybeTables, + @Nonnull Optional maybeRangesStr) + { + checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + + Optional>> localKeyspaceRanges = Optional.of(ImmutableList.copyOf(StorageService.instance.getLocalReplicas(keyspace).onlyFull().ranges())); + List> ranges = maybeRangesToRanges(maybeRangesStr, localKeyspaceRanges); + Map allTableMigrationStates = ClusterMetadata.current().consensusMigrationState.tableStates; + List tableIds = keyspacesAndTablesToTableIds(ImmutableList.of(keyspace), maybeTables, Optional.of(allTableMigrationStates::containsKey)); + + checkState(tableIds.stream().allMatch(allTableMigrationStates::containsKey), "All tables need to be migrating"); + List tableMigrationStates = new ArrayList<>(); + tableIds.forEach(table -> { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(table); + if (cfs == null) + { + logger.warn("Table {} does not exist or was dropped", cfs); + return; + } + TableMigrationState tms = allTableMigrationStates.get(table); + if (tms == null) + { + logger.warn("Table {} does not have any migration state", cfs.name); + return; + } + if(!Range.intersects(ranges, tms.migratingRanges)) + { + logger.warn("Table {} with migrating ranges {} does not intersect with any requested ranges {}", cfs.name, tms.migratingRanges, ranges); + return; + } + tableMigrationStates.add(tms); + }); + + List migratingToAccord = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.accord).collect(toImmutableList()); + List migratingToPaxos = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.paxos).collect(toImmutableList());; + + Integer accordRepairCmd = finishMigrationToAccord(keyspace, migratingToAccord, ranges); + Integer paxosRepairCmd = finishMigrationToPaxos(keyspace, migratingToPaxos, ranges); + List result = new ArrayList<>(); + if (accordRepairCmd != null) + result.add(accordRepairCmd); + if (paxosRepairCmd != null) + result.add(paxosRepairCmd); + return result; + } + + private interface MigrationFinisher + { + Integer finish(Collection tables, List> ranges); + } + + private static Integer finishMigrationTo(String name, List tableMigrationStates, List> requestedRanges, MigrationFinisher migrationFinisher) + { + logger.info("Begin finish migration to {} for ranges {} and tables {}", name, requestedRanges, tableMigrationStates); + List> intersectingRanges = new ArrayList<>(); + tableMigrationStates.stream().map(TableMigrationState::migratingRanges).forEach(intersectingRanges::addAll); + intersectingRanges = Range.normalize(intersectingRanges); + intersectingRanges = Range.intersectionOfNormalizedRanges(intersectingRanges, requestedRanges); + if (intersectingRanges.isEmpty()) + { + logger.warn("No requested ranges {} intersect any migrating ranges in any table in keyspace {}"); + return null; + } + + // Repair requires that the ranges once again be grouped by the ranges provided originally which all + // fall within local range boundaries. This was already checked in maybeRangesToRanges. + List> intersectingRangesGrouped = new ArrayList<>(); + for (Range r : requestedRanges) + { + List> intersectionsForGroup = new ArrayList<>(); + for (Range intersectedRange : intersectingRanges) + intersectionsForGroup.addAll(r.intersectionWith(intersectedRange)); + intersectingRangesGrouped.addAll(normalize(intersectionsForGroup)); + } + return migrationFinisher.finish(tableMigrationStates, intersectingRangesGrouped); + } + + /* + * This is basically just invoking classic Cassandra repair and is pretty redundant with invoking repair + * directly which would also work without issue. It's include so the same interface works for both migrating to/from + * Accord, but it's not great in that repair has a lot of options that might need to be forwarded. + * + * Still maybe more valuable to put this layer of abstraction in so we can change how it works later and it's less + * tightly coupled with the Repair interface which is pretty orthogonal to consensus migration. + */ + private static Integer finishMigrationToAccord(String keyspace, List migratingToAccord, List> requestedRanges) + { + return finishMigrationTo("Accord", migratingToAccord, requestedRanges, (tables, intersectingRanges) -> { + RepairOption repairOption = getRepairOption(tables, intersectingRanges, false); + return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; + }); + } + + private static Integer finishMigrationToPaxos(String keyspace, List migratingToPaxos, List> requestedRanges) + { + return finishMigrationTo("Paxos", migratingToPaxos, requestedRanges, (tables, intersectingRanges) -> { + RepairOption repairOption = getRepairOption(tables, intersectingRanges, true); + return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; + }); + } + + @Nonnull + private static RepairOption getRepairOption(Collection tables, List> intersectingRanges, boolean accordRepair) + { + boolean primaryRange = false; + // TODO (review): Should disabling incremental repair be exposed for the Paxos repair in case someone explicitly does not do incremental repair? + boolean incremental = !accordRepair; + boolean trace = false; + int numJobThreads = 1; + boolean pullRepair = false; + boolean forceRepair = false; + boolean optimiseStreams = false; + boolean ignoreUnreplicatedKeyspaces = true; + boolean repairPaxos = !accordRepair; + boolean paxosOnly = false; + boolean dontPurgeTombstones = false; + RepairOption repairOption = new RepairOption(RepairParallelism.PARALLEL, primaryRange, incremental, trace, numJobThreads, intersectingRanges, pullRepair, forceRepair, PreviewKind.NONE, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); + tables.forEach(table -> repairOption.getColumnFamilies().add(table.tableName)); + return repairOption; + } + + + // Repair is restricted to local ranges, but manipulating CMS migration state doesn't need to be restricted + private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr) + { + return maybeRangesToRanges(maybeRangesStr, Optional.empty()); + } + + private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr, Optional>> restrictToRanges) + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + Optional>> maybeParsedRanges = maybeRangesStr.map(rangesStr -> ImmutableList.copyOf(RepairOption.parseRanges(rangesStr, partitioner))); + Token minToken = partitioner.getMinimumToken(); + List> defaultRanges = restrictToRanges.orElse(ImmutableList.of(new Range(minToken, minToken))); + List> ranges = maybeParsedRanges.orElse(defaultRanges); + checkArgument(ranges.stream().allMatch(range -> defaultRanges.stream().anyMatch(defaultRange -> defaultRange.contains(range))), + "If ranges are specified each range must be contained within a local range (" + defaultRanges + ") for this node to allow for precise repairs. Specified " + ranges); + return ranges; + } + + private static List maybeDefaultKeyspaceNames(@Nullable List keyspaceNames) + { + if (keyspaceNames == null || keyspaceNames.isEmpty()) + { + keyspaceNames = ImmutableList.copyOf(StorageService.instance.getNonSystemKeyspaces()); + } + checkState(keyspaceNames.stream().noneMatch(SchemaConstants::isSystemKeyspace), "Migrating system keyspaces is not supported"); + return keyspaceNames; + } + + private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables) + { + return keyspacesAndTablesToTableIds(keyspaceNames, maybeTables, Optional.empty()); + } + + private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables, @Nonnull Optional> includeTable) + { + List tableIds = new ArrayList<>(); + for (String keyspaceName : keyspaceNames) + { + Optional> maybeTableIds = maybeTables.map(tableNames -> + tableNames + .stream() + .map(tableName -> { + TableMetadata tm = Schema.instance.getTableMetadata(keyspaceName, tableName); + if (tm == null) + throw new IllegalArgumentException("Unknown table %s.%s".format(keyspaceName, tableName)); + return tm.id; + }) + .collect(toImmutableList())); + tableIds.addAll( + maybeTableIds.orElseGet(() -> + Schema.instance.getKeyspaceInstance(keyspaceName).getColumnFamilyStores() + .stream() + .map(ColumnFamilyStore::getTableId) + .filter(includeTable.orElse(Predicates.alwaysTrue())) // Filter out non-migrating so they don't generate an error + .collect(toImmutableList()))); + } + return tableIds; + } +} diff --git a/src/java/org/apache/cassandra/service/paxos/AbstractPaxosRepair.java b/src/java/org/apache/cassandra/service/paxos/AbstractPaxosRepair.java index 05c67da3f1f1..7640d75e8981 100644 --- a/src/java/org/apache/cassandra/service/paxos/AbstractPaxosRepair.java +++ b/src/java/org/apache/cassandra/service/paxos/AbstractPaxosRepair.java @@ -24,9 +24,9 @@ import java.util.List; import java.util.Objects; import java.util.function.Consumer; +import javax.annotation.Nullable; import com.google.common.base.Preconditions; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,9 +64,9 @@ public void accept(T input) public static class Result extends State { - enum Outcome { DONE, CANCELLED, FAILURE } + public enum Outcome { DONE, CANCELLED, FAILURE } - final Outcome outcome; + public final Outcome outcome; public Result(Outcome outcome) { @@ -127,15 +127,20 @@ public int hashCode() } private final DecoratedKey partitionKey; + @Nullable private final Ballot incompleteBallot; + + protected final long retryTimeoutNanos; + private List listeners = null; private volatile State state; private volatile long startedNanos = Long.MIN_VALUE; - public AbstractPaxosRepair(DecoratedKey partitionKey, Ballot incompleteBallot) + public AbstractPaxosRepair(DecoratedKey partitionKey, Ballot incompleteBallot, long retryTimeoutNanos) { this.partitionKey = partitionKey; this.incompleteBallot = incompleteBallot; + this.retryTimeoutNanos = retryTimeoutNanos; } public State state() @@ -158,7 +163,8 @@ public boolean isComplete() return isResult(state); } - public Ballot incompleteBallot() + // Shouldn't be null when used by PaxosRepairs, but will be null when used by ConsensusRequestRouter + public @Nullable Ballot incompleteBallot() { return incompleteBallot; } @@ -203,11 +209,19 @@ public final DecoratedKey partitionKey() public State restart(State state) { return restart(state, Long.MIN_VALUE); } public abstract State restart(State state, long waitUntil); + // Used to start repairs from PaxosTableRepairs public final synchronized AbstractPaxosRepair start() + { + long startedNanos = Math.max(Long.MIN_VALUE + 1, nanoTime()); + return start(startedNanos); + } + + // Used to start repairs from ConsensusRequestRouter + public final synchronized AbstractPaxosRepair start(long queryStartNanos) { updateState(null, null, (state, i2) -> { Preconditions.checkState(!isStarted()); - startedNanos = Math.max(Long.MIN_VALUE + 1, nanoTime()); + startedNanos = queryStartNanos; return restart(state); }); return this; diff --git a/src/java/org/apache/cassandra/service/paxos/Paxos.java b/src/java/org/apache/cassandra/service/paxos/Paxos.java index 75392640a06c..4bfce8953b0a 100644 --- a/src/java/org/apache/cassandra/service/paxos/Paxos.java +++ b/src/java/org/apache/cassandra/service/paxos/Paxos.java @@ -70,8 +70,8 @@ import org.apache.cassandra.exceptions.ReadFailureException; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestExecutionException; -import org.apache.cassandra.exceptions.RequestFailureException; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureException; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.UnavailableException; @@ -87,12 +87,15 @@ import org.apache.cassandra.service.CASRequest; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.FailureRecordingCallback.AsMap; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.paxos.Commit.Proposal; import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteAccepted; import org.apache.cassandra.service.paxos.PaxosPrepare.FoundIncompleteCommitted; +import org.apache.cassandra.service.paxos.PaxosPropose.Superseded; import org.apache.cassandra.service.reads.DataResolver; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.repair.NoopReadRepair; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; @@ -104,6 +107,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.NoSpamLogger; +import static com.google.common.base.Preconditions.checkState; import static java.util.Collections.emptyMap; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; @@ -125,6 +129,10 @@ import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.readMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.readMetricsMap; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.writeMetricsMap; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.RETRY_NEW_PROTOCOL; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.casResult; +import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.serialReadResult; import static org.apache.cassandra.service.paxos.Ballot.Flag.GLOBAL; import static org.apache.cassandra.service.paxos.Ballot.Flag.LOCAL; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.nextBallot; @@ -687,7 +695,7 @@ public interface Async * Any successful prepare phase yielding a read that rejects the condition must be followed by the proposal of * an empty update, to ensure the evaluation of the condition is linearized with respect to other reads and writes. * - * @param key the row key for the row to CAS + * @param partitionKey the row key for the row to CAS * @param request the conditions for the CAS to apply as well as the update to perform if the conditions hold. * @param consistencyForConsensus the consistency for the paxos prepare and propose round. This can only be either SERIAL or LOCAL_SERIAL. * @param consistencyForCommit the consistency for write done during the commit phase. This can be anything, except SERIAL or LOCAL_SERIAL. @@ -695,48 +703,22 @@ public interface Async * @return null if the operation succeeds in updating the row, or the current values corresponding to conditions. * (since, if the CAS doesn't succeed, it means the current value do not match the conditions). */ - public static RowIterator cas(DecoratedKey key, - CASRequest request, - ConsistencyLevel consistencyForConsensus, - ConsistencyLevel consistencyForCommit, - ClientState clientState) - throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException - { - final long start = nanoTime(); - final long proposeDeadline = start + getCasContentionTimeout(NANOSECONDS); - final long commitDeadline = Math.max(proposeDeadline, start + getWriteRpcTimeout(NANOSECONDS)); - return cas(key, request, consistencyForConsensus, consistencyForCommit, clientState, start, proposeDeadline, commitDeadline); - } - public static RowIterator cas(DecoratedKey key, - CASRequest request, - ConsistencyLevel consistencyForConsensus, - ConsistencyLevel consistencyForCommit, - ClientState clientState, - long proposeDeadline, - long commitDeadline - ) - throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException - { - return cas(key, request, consistencyForConsensus, consistencyForCommit, clientState, nanoTime(), proposeDeadline, commitDeadline); - } - private static RowIterator cas(DecoratedKey partitionKey, - CASRequest request, - ConsistencyLevel consistencyForConsensus, - ConsistencyLevel consistencyForCommit, - ClientState clientState, - long start, - long proposeDeadline, - long commitDeadline - ) + public static ConsensusAttemptResult cas(DecoratedKey partitionKey, + CASRequest request, + ConsistencyLevel consistencyForConsensus, + ConsistencyLevel consistencyForCommit, + ClientState clientState, + Dispatcher.RequestTime requestTime) throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException { + final long proposeDeadline = requestTime.startedAtNanos() + getCasContentionTimeout(NANOSECONDS); + final long commitDeadline = Math.max(proposeDeadline, requestTime.startedAtNanos() + getWriteRpcTimeout(NANOSECONDS)); SinglePartitionReadCommand readCommand = request.readCommand(FBUtilities.nowInSeconds()); TableMetadata metadata = readCommand.metadata(); consistencyForConsensus.validateForCas(); consistencyForCommit.validateForCasCommit(Keyspace.open(metadata.keyspace).getReplicationStrategy()); - Ballot minimumBallot = null; int failedAttemptsDueToContention = 0; try (PaxosOperationLock lock = PaxosState.lock(partitionKey, metadata, proposeDeadline, consistencyForConsensus, true)) { @@ -747,7 +729,14 @@ private static RowIterator cas(DecoratedKey partitionKey, Tracing.trace("Reading existing values for CAS precondition"); BeginResult begin = begin(proposeDeadline, readCommand, consistencyForConsensus, - true, minimumBallot, failedAttemptsDueToContention, request.requestTime()); + true, null, failedAttemptsDueToContention, request.requestTime()); + + if (begin.retryWithNewConsenusProtocol) + { + casWriteMetrics.beginMigrationRejects.mark(); + return RETRY_NEW_PROTOCOL; + } + Ballot ballot = begin.ballot; Participants participants = begin.participants; failedAttemptsDueToContention = begin.failedAttemptsDueToContention; @@ -766,7 +755,7 @@ private static RowIterator cas(DecoratedKey partitionKey, { Tracing.trace("CAS precondition rejected", current); casWriteMetrics.conditionNotMet.inc(); - return current.rowIterator(); + return casResult(current.rowIterator(false)); } // If we failed to meet our condition, it does not mean we can do nothing: if we do not propose @@ -782,7 +771,7 @@ private static RowIterator cas(DecoratedKey partitionKey, if (begin.isLinearizableRead) { Tracing.trace("CAS precondition does not match current values {}; read is already linearizable; aborting", current); - return conditionNotMet(current); + return casResult(conditionNotMet(current)); } Tracing.trace("CAS precondition does not match current values {}; proposing empty update", current); @@ -816,7 +805,7 @@ else if (begin.isPromised) continue; } - PaxosPropose.Status propose = propose(proposal, participants, conditionMet).awaitUntil(proposeDeadline); + PaxosPropose.Status propose = propose(proposal, participants, conditionMet, false).awaitUntil(proposeDeadline); switch (propose.outcome) { default: throw new IllegalStateException(); @@ -827,7 +816,7 @@ else if (begin.isPromised) case SUCCESS: { if (!conditionMet) - return conditionNotMet(current); + return casResult(conditionNotMet(current)); // no need to commit a no-op; either it // 1) reached a majority, in which case it was agreed, had no effect and we can do nothing; or @@ -840,7 +829,8 @@ else if (begin.isPromised) case SUPERSEDED: { - switch (propose.superseded().hadSideEffects) + Superseded superseded = propose.superseded(); + switch (superseded.hadSideEffects) { default: throw new IllegalStateException(); @@ -852,7 +842,12 @@ else if (begin.isPromised) .markAndThrowAsTimeoutOrFailure(true, consistencyForConsensus, failedAttemptsDueToContention); case NO: - minimumBallot = propose.superseded().by; + // Shouldn't retry on this protocol + if (superseded.needsConsensusMigration) + { + casWriteMetrics.acceptMigrationRejects.mark(); + return RETRY_NEW_PROTOCOL; + } // We have been superseded without our proposal being accepted by anyone, so we can safely retry Tracing.trace("Paxos proposal not accepted (pre-empted by a higher ballot)"); if (!waitForContention(proposeDeadline, ++failedAttemptsDueToContention, metadata, partitionKey, consistencyForConsensus, WRITE)) @@ -870,12 +865,12 @@ else if (begin.isPromised) throw result.maybeFailure().markAndThrowAsTimeoutOrFailure(true, consistencyForCommit, failedAttemptsDueToContention); } Tracing.trace("CAS successful"); - return null; + return casResult((RowIterator)null); } finally { - final long latency = nanoTime() - start; + final long latency = nanoTime() - requestTime.startedAtNanos(); if (failedAttemptsDueToContention > 0) { @@ -893,28 +888,16 @@ private static RowIterator conditionNotMet(FilteredPartition read) { Tracing.trace("CAS precondition rejected", read); casWriteMetrics.conditionNotMet.inc(); - return read.rowIterator(); - } - - public static PartitionIterator read(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyForConsensus, Dispatcher.RequestTime requestTime) - throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException - { - long deadline = requestTime.computeDeadline(DatabaseDescriptor.getReadRpcTimeout(NANOSECONDS)); - return read(group, consistencyForConsensus, requestTime, deadline); + return read.rowIterator(false); } - public static PartitionIterator read(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyForConsensus, long deadline) - throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException - { - return read(group, consistencyForConsensus, Dispatcher.RequestTime.forImmediateExecution(), deadline); - } - - private static PartitionIterator read(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyForConsensus, Dispatcher.RequestTime requestTime, long deadline) + public static ConsensusAttemptResult read(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyForConsensus, Dispatcher.RequestTime requestTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException { long start = nanoTime(); if (group.queries.size() > 1) throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time"); + long deadline = requestTime.computeDeadline(DatabaseDescriptor.getReadRpcTimeout(NANOSECONDS)); int failedAttemptsDueToContention = 0; Ballot minimumBallot = null; @@ -927,6 +910,12 @@ private static PartitionIterator read(SinglePartitionReadCommand.Group group, Co final BeginResult begin = begin(deadline, read, consistencyForConsensus, false, minimumBallot, failedAttemptsDueToContention, requestTime); failedAttemptsDueToContention = begin.failedAttemptsDueToContention; + if (begin.retryWithNewConsenusProtocol) + { + casReadMetrics.beginMigrationRejects.mark(); + return RETRY_NEW_PROTOCOL; + } + switch (PAXOS_VARIANT) { default: @@ -935,16 +924,16 @@ private static PartitionIterator read(SinglePartitionReadCommand.Group group, Co case v2_without_linearizable_reads_or_rejected_writes: case v2_without_linearizable_reads: - return begin.readResponse; + return serialReadResult(begin.readResponse); case v2: // no need to submit an empty proposal, as the promise will be treated as complete for future optimistic reads if (begin.isLinearizableRead) - return begin.readResponse; + return serialReadResult(begin.readResponse); } Proposal proposal = Proposal.empty(begin.ballot, read.partitionKey(), read.metadata()); - PaxosPropose.Status propose = propose(proposal, begin.participants, false).awaitUntil(deadline); + PaxosPropose.Status propose = propose(proposal, begin.participants, true, false).awaitUntil(deadline); switch (propose.outcome) { default: throw new IllegalStateException(); @@ -953,10 +942,21 @@ private static PartitionIterator read(SinglePartitionReadCommand.Group group, Co throw propose.maybeFailure().markAndThrowAsTimeoutOrFailure(false, consistencyForConsensus, failedAttemptsDueToContention); case SUCCESS: - return begin.readResponse; + return serialReadResult(begin.readResponse); case SUPERSEDED: - switch (propose.superseded().hadSideEffects) + Superseded superseded = propose.superseded(); + // For consensus migration we are going to bail out earlier if migration is needed + // otherwise it it will fail every single query that races with migration being started + // during the propose step. Necessary because of CASSANDRA-18276 + // Shouldn't retry again on this protocol + if (superseded.needsConsensusMigration) + { + casReadMetrics.acceptMigrationRejects.mark(); + return RETRY_NEW_PROTOCOL; + } + // TODO https://issues.apache.org/jira/browse/CASSANDRA-18276 side effects shouldn't matter for reads + switch (superseded.hadSideEffects) { default: throw new IllegalStateException(); @@ -974,6 +974,7 @@ private static PartitionIterator read(SinglePartitionReadCommand.Group group, Co if (!waitForContention(deadline, ++failedAttemptsDueToContention, group.metadata(), group.queries.get(0).partitionKey(), consistencyForConsensus, READ)) throw MaybeFailure.noResponses(begin.participants).markAndThrowAsTimeoutOrFailure(true, consistencyForConsensus, failedAttemptsDueToContention); } + break; } } } @@ -1004,9 +1005,11 @@ static class BeginResult final boolean isPromised; final Ballot retryWithAtLeast; - public BeginResult(Ballot ballot, Participants participants, int failedAttemptsDueToContention, PartitionIterator readResponse, boolean isLinearizableRead, boolean isPromised, Ballot retryWithAtLeast) + final boolean retryWithNewConsenusProtocol; + + public BeginResult(Ballot ballot, Participants participants, int failedAttemptsDueToContention, PartitionIterator readResponse, boolean isLinearizableRead, boolean isPromised, Ballot retryWithAtLeast, boolean retryWithNewConsenusProtocol) { - assert isPromised || isLinearizableRead; + assert isPromised || isLinearizableRead || retryWithNewConsenusProtocol; this.ballot = ballot; this.participants = participants; this.failedAttemptsDueToContention = failedAttemptsDueToContention; @@ -1014,6 +1017,12 @@ public BeginResult(Ballot ballot, Participants participants, int failedAttemptsD this.isLinearizableRead = isLinearizableRead; this.isPromised = isPromised; this.retryWithAtLeast = retryWithAtLeast; + this.retryWithNewConsenusProtocol = retryWithNewConsenusProtocol; + } + + static BeginResult retryOnNewProtocol() + { + return new BeginResult(null, null, -1, null, false, false, null, true); } } @@ -1057,6 +1066,14 @@ private static BeginResult begin(long deadline, // prepare PaxosPrepare retry = null; PaxosPrepare.Status prepare = preparing.awaitUntil(deadline); + + // After performing the prepare phase we may discover that we can't propose + // our own transaction on this protocol by discovering a new CM Epoch + if (ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeDuringPaxosBegin(query.metadata().id, query.partitionKey())) + { + return BeginResult.retryOnNewProtocol(); + } + boolean isPromised = false; retry: switch (prepare.outcome) { @@ -1085,7 +1102,7 @@ private static BeginResult begin(long deadline, // and in fact it's possible for a CAS to sometimes determine if side effects occurred by reading // the underlying data and not witnessing the timestamp of its ballot (or any newer for the relevant data). Proposal repropose = new Proposal(inProgress.ballot, inProgress.accepted.update); - PaxosPropose.Status proposeResult = propose(repropose, inProgress.participants, false).awaitUntil(deadline); + PaxosPropose.Status proposeResult = propose(repropose, inProgress.participants, false, true).awaitUntil(deadline); switch (proposeResult.outcome) { default: throw new IllegalStateException(); @@ -1098,6 +1115,7 @@ private static BeginResult begin(long deadline, break retry; case SUPERSEDED: + checkState(!proposeResult.superseded().needsConsensusMigration, "Should not receive needsConsensusMigration rejects from begin"); // since we are proposing a previous value that was maybe superseded by us before completion // we don't need to test the side effects, as we just want to start again, and fall through // to the superseded section below @@ -1122,7 +1140,7 @@ private static BeginResult begin(long deadline, PaxosPrepare.Success success = prepare.success(); Supplier plan = () -> success.participants; - DataResolver resolver = new DataResolver<>(query, plan, NoopReadRepair.instance, requestTime); + DataResolver resolver = new DataResolver<>(ReadCoordinator.DEFAULT, query, plan, NoopReadRepair.instance, requestTime); for (int i = 0 ; i < success.responses.size() ; ++i) resolver.preprocess(success.responses.get(i)); @@ -1140,7 +1158,7 @@ class WasRun implements Runnable { boolean v; public void run() { v = true; } } break; } - return new BeginResult(success.ballot, success.participants, failedAttemptsDueToContention, result, !hadShortRead.v && success.isReadSafe, isPromised, success.supersededBy); + return new BeginResult(success.ballot, success.participants, failedAttemptsDueToContention, result, !hadShortRead.v && success.isReadSafe, isPromised, success.supersededBy, false); } case MAYBE_FAILURE: diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosCommitAndPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosCommitAndPrepare.java index 7046dfbb3753..b81f6b720b0e 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosCommitAndPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosCommitAndPrepare.java @@ -45,7 +45,7 @@ public class PaxosCommitAndPrepare static PaxosPrepare commitAndPrepare(Agreed commit, Paxos.Participants participants, SinglePartitionReadCommand readCommand, boolean isWrite, boolean acceptEarlyReadSuccess) { Ballot ballot = newBallot(commit.ballot, participants.consistencyForConsensus); - Request request = new Request(commit, ballot, participants.electorate, readCommand, isWrite); + Request request = new Request(commit, ballot, participants.electorate, readCommand, isWrite, true); PaxosPrepare prepare = new PaxosPrepare(participants, request, acceptEarlyReadSuccess, null); Tracing.trace("Committing {}; Preparing {}", commit.ballot, ballot); @@ -59,21 +59,21 @@ private static class Request extends PaxosPrepare.AbstractRequest { final Agreed commit; - Request(Agreed commit, Ballot ballot, Paxos.Electorate electorate, SinglePartitionReadCommand read, boolean isWrite) + Request(Agreed commit, Ballot ballot, Paxos.Electorate electorate, SinglePartitionReadCommand read, boolean isWrite, boolean isForRecovery) { - super(ballot, electorate, read, isWrite); + super(ballot, electorate, read, isWrite, isForRecovery); this.commit = commit; } - private Request(Agreed commit, Ballot ballot, Paxos.Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite) + private Request(Agreed commit, Ballot ballot, Paxos.Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean isForRecovery) { - super(ballot, electorate, partitionKey, table, isWrite); + super(ballot, electorate, partitionKey, table, isWrite, isForRecovery); this.commit = commit; } Request withoutRead() { - return new Request(commit, ballot, electorate, partitionKey, table, isForWrite); + return new Request(commit, ballot, electorate, partitionKey, table, isForWrite, isForRecovery); } public String toString() @@ -84,14 +84,14 @@ public String toString() public static class RequestSerializer extends PaxosPrepare.AbstractRequestSerializer { - Request construct(Agreed param, Ballot ballot, Paxos.Electorate electorate, SinglePartitionReadCommand read, boolean isWrite) + Request construct(Agreed param, Ballot ballot, Paxos.Electorate electorate, SinglePartitionReadCommand read, boolean isWrite, boolean isForRecovery) { - return new Request(param, ballot, electorate, read, isWrite); + return new Request(param, ballot, electorate, read, isWrite, isForRecovery); } - Request construct(Agreed param, Ballot ballot, Paxos.Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite) + Request construct(Agreed param, Ballot ballot, Paxos.Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean isForRecovery) { - return new Request(param, ballot, electorate, partitionKey, table, isWrite); + return new Request(param, ballot, electorate, partitionKey, table, isWrite, isForRecovery); } @Override diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java index 4a3d69eee10b..eb11bf84acb5 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java @@ -41,6 +41,7 @@ import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.ReadResponse; import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.UnavailableException; @@ -57,6 +58,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.KeyMigrationState; import org.apache.cassandra.service.paxos.PaxosPrepare.Status.Outcome; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -69,6 +71,8 @@ import static org.apache.cassandra.locator.InetAddressAndPort.Serializer.inetAddressAndPortSerializer; import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_REQ; import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_RSP; +import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.getKeyMigrationState; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; import static org.apache.cassandra.service.paxos.Ballot.Flag.NONE; import static org.apache.cassandra.service.paxos.Commit.Accepted; import static org.apache.cassandra.service.paxos.Commit.Committed; @@ -369,7 +373,7 @@ static PaxosPrepare prepare(Ballot minimumBallot, Participants participants, Sin static PaxosPrepare prepareWithBallot(Ballot ballot, Participants participants, SinglePartitionReadCommand readCommand, boolean isWrite, boolean acceptEarlyReadPermission) { Tracing.trace("Preparing {} with read", ballot); - Request request = new Request(ballot, participants.electorate, readCommand, isWrite); + Request request = new Request(ballot, participants.electorate, readCommand, isWrite, true); return prepareWithBallotInternal(participants, request, acceptEarlyReadPermission, null); } @@ -377,7 +381,7 @@ static PaxosPrepare prepareWithBallot(Ballot ballot, Participants participants, static > T prepareWithBallot(Ballot ballot, Participants participants, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean acceptEarlyReadPermission, T onDone) { Tracing.trace("Preparing {}", ballot); - prepareWithBallotInternal(participants, new Request(ballot, participants.electorate, partitionKey, table, isWrite), acceptEarlyReadPermission, onDone); + prepareWithBallotInternal(participants, new Request(ballot, participants.electorate, partitionKey, table, isWrite, true), acceptEarlyReadPermission, onDone); return onDone; } @@ -936,8 +940,9 @@ static abstract class AbstractRequest> final boolean isForWrite; final DecoratedKey partitionKey; final TableMetadata table; + final boolean isForRecovery; - AbstractRequest(Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isForWrite) + AbstractRequest(Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isForWrite, boolean isForRecovery) { this.ballot = ballot; this.electorate = electorate; @@ -945,9 +950,10 @@ static abstract class AbstractRequest> this.isForWrite = isForWrite; this.partitionKey = read.partitionKey(); this.table = read.metadata(); + this.isForRecovery = isForRecovery; } - AbstractRequest(Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isForWrite) + AbstractRequest(Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isForWrite, boolean isForRecovery) { this.ballot = ballot; this.electorate = electorate; @@ -955,6 +961,7 @@ static abstract class AbstractRequest> this.table = table; this.read = null; this.isForWrite = isForWrite; + this.isForRecovery = isForRecovery; } abstract R withoutRead(); @@ -967,19 +974,19 @@ public String toString() static class Request extends AbstractRequest { - Request(Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite) + Request(Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite, boolean isForRecovery) { - super(ballot, electorate, read, isWrite); + super(ballot, electorate, read, isWrite, isForRecovery); } - private Request(Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite) + private Request(Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean isForRecovery) { - super(ballot, electorate, partitionKey, table, isWrite); + super(ballot, electorate, partitionKey, table, isWrite, isForRecovery); } Request withoutRead() { - return read == null ? this : new Request(ballot, electorate, partitionKey, table, isForWrite); + return read == null ? this : new Request(ballot, electorate, partitionKey, table, isForWrite, isForRecovery); } public String toString() @@ -990,11 +997,16 @@ public String toString() static class Response { + @Nonnull final MaybePromise.Outcome outcome; - Response(MaybePromise.Outcome outcome) + @Nullable + final ConsensusMigratedAt maybeConsenusMigratedAt; + + Response(@Nonnull MaybePromise.Outcome outcome, @Nullable ConsensusMigratedAt maybeConsenusMigratedAt) { this.outcome = outcome; + this.maybeConsenusMigratedAt = maybeConsenusMigratedAt; } Permitted permitted() { return (Permitted) this; } Rejected rejected() { return (Rejected) this; } @@ -1024,9 +1036,9 @@ static class Permitted extends Response @Nullable final Ballot supersededBy; final Epoch electorateEpoch; - Permitted(MaybePromise.Outcome outcome, long lowBound, @Nullable Accepted latestAcceptedButNotCommitted, Committed latestCommitted, @Nullable ReadResponse readResponse, boolean hadProposalStability, Map gossipInfo, Epoch electorateEpoch, @Nullable Ballot supersededBy) + Permitted(MaybePromise.Outcome outcome, @Nullable ConsensusMigratedAt maybeConsensusMigratedAt, long lowBound, @Nullable Accepted latestAcceptedButNotCommitted, Committed latestCommitted, @Nullable ReadResponse readResponse, boolean hadProposalStability, Map gossipInfo, Epoch electorateEpoch, @Nullable Ballot supersededBy) { - super(outcome); + super(outcome, maybeConsensusMigratedAt); this.lowBound = lowBound; this.latestAcceptedButNotCommitted = latestAcceptedButNotCommitted; this.latestCommitted = latestCommitted; @@ -1048,9 +1060,9 @@ static class Rejected extends Response { final Ballot supersededBy; - Rejected(Ballot supersededBy) + Rejected(Ballot supersededBy, @Nullable ConsensusMigratedAt maybeConsensusMigratedAt) { - super(REJECT); + super(REJECT, maybeConsensusMigratedAt); this.supersededBy = supersededBy; } @@ -1094,6 +1106,7 @@ static Response execute(AbstractRequest request, InetAddressAndPort from) static Response execute(AbstractRequest request, PaxosState state) { MaybePromise result = state.promiseIfNewer(request.ballot, request.isForWrite); + KeyMigrationState keyMigrationState = getKeyMigrationState(request.table.id, request.partitionKey); switch (result.outcome) { case PROMISE: @@ -1129,6 +1142,8 @@ static Response execute(AbstractRequest request, PaxosState state) if (request.read != null) { + // Make sure the read is safe and there is no Accord state that needs application + keyMigrationState.maybePerformAccordToPaxosKeyMigration(request.isForWrite); try (ReadExecutionController executionController = request.read.executionController(); UnfilteredPartitionIterator iterator = request.read.executeLocally(executionController)) { @@ -1150,10 +1165,10 @@ static Response execute(AbstractRequest request, PaxosState state) ColumnFamilyStore cfs = Schema.instance.getColumnFamilyStoreInstance(request.table.id); long lowBound = cfs.getPaxosRepairLowBound(request.partitionKey).uuidTimestamp(); - return new Permitted(result.outcome, lowBound, acceptedButNotCommitted, committed, readResponse, hasProposalStability, gossipInfo, electorateEpoch, supersededBy); + return new Permitted(result.outcome, keyMigrationState.consensusMigratedAt, lowBound, acceptedButNotCommitted, committed, readResponse, hasProposalStability, gossipInfo, electorateEpoch, supersededBy); case REJECT: - return new Rejected(result.supersededBy()); + return new Rejected(result.supersededBy(), keyMigrationState.consensusMigratedAt); default: throw new IllegalStateException(); @@ -1163,8 +1178,8 @@ static Response execute(AbstractRequest request, PaxosState state) static abstract class AbstractRequestSerializer, T> implements IVersionedSerializer { - abstract R construct(T param, Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite); - abstract R construct(T param, Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite); + abstract R construct(T param, Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite, boolean isForRecovery); + abstract R construct(T param, Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean isForRecovery); @Override public void serialize(R request, DataOutputPlus out, int version) throws IOException @@ -1182,6 +1197,8 @@ public void serialize(R request, DataOutputPlus out, int version) throws IOExcep request.table.id.serialize(out); DecoratedKey.serializer.serialize(request.partitionKey, out, version); } + if (version >= MessagingService.VERSION_51) + out.writeBoolean(request.isForRecovery); } public R deserialize(T param, DataInputPlus in, int version) throws IOException @@ -1192,38 +1209,47 @@ public R deserialize(T param, DataInputPlus in, int version) throws IOException if ((flag & 1) != 0) { SinglePartitionReadCommand readCommand = (SinglePartitionReadCommand) ReadCommand.serializer.deserialize(in, version); - return construct(param, ballot, electorate, readCommand, (flag & 2) == 0); + boolean isForRecovery = false; + if (version >= MessagingService.VERSION_51) + isForRecovery = in.readBoolean(); + return construct(param, ballot, electorate, readCommand, (flag & 2) == 0, isForRecovery); } else { TableMetadata table = Schema.instance.getExistingTableMetadata(TableId.deserialize(in)); DecoratedKey partitionKey = (DecoratedKey) DecoratedKey.serializer.deserialize(in, table.partitioner, version); - return construct(param, ballot, electorate, partitionKey, table, (flag & 2) != 0); + boolean isForRecovery = false; + if (version >= MessagingService.VERSION_51) + isForRecovery = in.readBoolean(); + return construct(param, ballot, electorate, partitionKey, table, (flag & 2) != 0, isForRecovery); } } @Override public long serializedSize(R request, int version) { - return Ballot.sizeInBytes() + long size = Ballot.sizeInBytes() + Electorate.serializer.serializedSize(request.electorate, version) + 1 + (request.read != null ? ReadCommand.serializer.serializedSize(request.read, version) : request.table.id.serializedSize() + DecoratedKey.serializer.serializedSize(request.partitionKey, version)); + if (version >= MessagingService.VERSION_51) + size += TypeSizes.sizeof(request.isForRecovery); + return size; } } public static class RequestSerializer extends AbstractRequestSerializer { - Request construct(Object ignore, Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite) + Request construct(Object ignore, Ballot ballot, Electorate electorate, SinglePartitionReadCommand read, boolean isWrite, boolean isForRecovery) { - return new Request(ballot, electorate, read, isWrite); + return new Request(ballot, electorate, read, isWrite, isForRecovery); } - Request construct(Object ignore, Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite) + Request construct(Object ignore, Ballot ballot, Electorate electorate, DecoratedKey partitionKey, TableMetadata table, boolean isWrite, boolean isForRecovery) { - return new Request(ballot, electorate, partitionKey, table, isWrite); + return new Request(ballot, electorate, partitionKey, table, isWrite, isForRecovery); } public Request deserialize(DataInputPlus in, int version) throws IOException @@ -1232,6 +1258,14 @@ public Request deserialize(DataInputPlus in, int version) throws IOException } } + private static void serializeRejection(DataOutputPlus out, Ballot supersededBy, ConsensusMigratedAt maybeConsenusMigratedAt, int version) throws IOException + { + out.writeByte(0); + supersededBy.serialize(out); + if (version >= MessagingService.VERSION_51) + ConsensusMigratedAt.serializer.serialize(maybeConsenusMigratedAt, out, version); + } + public static class ResponseSerializer implements IVersionedSerializer { public void serialize(Response response, DataOutputPlus out, int version) throws IOException @@ -1240,7 +1274,7 @@ public void serialize(Response response, DataOutputPlus out, int version) throws { out.writeByte(0); Rejected rejected = (Rejected) response; - rejected.supersededBy.serialize(out); + serializeRejection(out, rejected.supersededBy, rejected.maybeConsenusMigratedAt, version); } else { @@ -1262,6 +1296,8 @@ public void serialize(Response response, DataOutputPlus out, int version) throws Epoch.messageSerializer.serialize(promised.electorateEpoch, out, version); if (promised.outcome == PERMIT_READ) promised.supersededBy.serialize(out); + if (version >= MessagingService.VERSION_51) + ConsensusMigratedAt.serializer.serialize(response.maybeConsenusMigratedAt, out, version); } } @@ -1271,7 +1307,10 @@ public Response deserialize(DataInputPlus in, int version) throws IOException if (flags == 0) { Ballot supersededBy = Ballot.deserialize(in); - return new Rejected(supersededBy); + ConsensusMigratedAt consensusMigratedAt = null; + if (version >= MessagingService.VERSION_51) + consensusMigratedAt = ConsensusMigratedAt.serializer.deserialize(in, version); + return new Rejected(supersededBy, consensusMigratedAt); } else { @@ -1286,15 +1325,20 @@ public Response deserialize(DataInputPlus in, int version) throws IOException Ballot supersededBy = null; if (outcome == PERMIT_READ) supersededBy = Ballot.deserialize(in); - return new Permitted(outcome, lowBound, acceptedNotCommitted, committed, readResponse, hasProposalStability, gossipInfo, electorateEpoch, supersededBy); + ConsensusMigratedAt consensusMigratedAt = null; + if (version >= MessagingService.VERSION_51) + consensusMigratedAt = ConsensusMigratedAt.serializer.deserialize(in, version); + return new Permitted(outcome, consensusMigratedAt, lowBound, acceptedNotCommitted, committed, readResponse, hasProposalStability, gossipInfo, electorateEpoch, supersededBy); } } public long serializedSize(Response response, int version) { + long size; if (response.isRejected()) { - return 1 + Ballot.sizeInBytes(); + size = 1 + Ballot.sizeInBytes(); + } else { @@ -1308,6 +1352,10 @@ public long serializedSize(Response response, int version) + (version >= MessagingService.VERSION_51 ? Epoch.messageSerializer.serializedSize(permitted.electorateEpoch, version) : 0) + (permitted.outcome == PERMIT_READ ? Ballot.sizeInBytes() : 0); } + if (version >= MessagingService.VERSION_51) + size += ConsensusMigratedAt.serializer.serializedSize(response.maybeConsenusMigratedAt, version); + + return size; } } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java b/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java index 77a6fb4971a7..650bd8818c82 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPropose.java @@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; import java.util.function.Consumer; +import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; @@ -38,13 +39,16 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.paxos.Commit.Proposal; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.utils.concurrent.ConditionAsConsumer; +import static com.google.common.base.Preconditions.checkArgument; import static java.util.Collections.emptyMap; import static org.apache.cassandra.exceptions.RequestFailureReason.UNKNOWN; import static org.apache.cassandra.net.Verb.PAXOS2_PROPOSE_REQ; -import static org.apache.cassandra.service.paxos.PaxosPropose.Superseded.SideEffects.MAYBE; -import static org.apache.cassandra.service.paxos.PaxosPropose.Superseded.SideEffects.NO; +import static org.apache.cassandra.service.paxos.PaxosPropose.Status.SideEffects.MAYBE; +import static org.apache.cassandra.service.paxos.PaxosPropose.Status.SideEffects.NO; +import static org.apache.cassandra.service.paxos.PaxosState.AcceptResult; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.concurrent.ConditionAsConsumer.newConditionAsConsumer; @@ -54,13 +58,13 @@ * indicating (respectively) that we have had no side effect, or that we cannot * know if we our proposal produced a side effect. */ -public class PaxosPropose> extends PaxosRequestCallback +public class PaxosPropose> extends PaxosRequestCallback { private static final Logger logger = LoggerFactory.getLogger(PaxosPropose.class); public static final RequestHandler requestHandler = new RequestHandler(); public static final RequestSerializer requestSerializer = new RequestSerializer(); - public static final ResponseSerializer responseSerializer = new ResponseSerializer(); + public static final AcceptResultSerializer ACCEPT_RESULT_SERIALIZER = new AcceptResultSerializer(); /** * Represents the current status of a propose action: it is a status rather than a result, @@ -77,22 +81,30 @@ enum Outcome { SUCCESS, SUPERSEDED, MAYBE_FAILURE } } Superseded superseded() { return (Superseded) this; } Paxos.MaybeFailure maybeFailure() { return ((MaybeFailure) this).info; } - public String toString() { return "Success"; } + public String toString() { return outcome.toString(); } + + enum SideEffects { NO, MAYBE } } static class Superseded extends Status { - enum SideEffects { NO, MAYBE } + @Nullable final Ballot by; final SideEffects hadSideEffects; - Superseded(Ballot by, SideEffects hadSideEffects) + // Consensus migration can occur at the same time that we are superseded + // and it's important to preserve returning the uncertainty of the superseded + // at the same time as enforcing the need for consensus migration + final boolean needsConsensusMigration; + Superseded(@Nullable Ballot by, SideEffects hadSideEffects, boolean needsConsensusMigration) { super(Outcome.SUPERSEDED); + checkArgument(needsConsensusMigration == true || by != null, "Must be superseded by ballot if not due to consensus migration"); this.by = by; this.hadSideEffects = hadSideEffects; + this.needsConsensusMigration = needsConsensusMigration; } - public String toString() { return "Superseded(" + by + ',' + hadSideEffects + ')'; } + public String toString() { return "Superseded(" + by + ',' + hadSideEffects + ',' + needsConsensusMigration + ')'; } } private static class MaybeFailure extends Status @@ -107,7 +119,7 @@ private static class MaybeFailure extends Status public String toString() { return info.toString(); } } - private static final Status success = new Status(Status.Outcome.SUCCESS); + private static final Status STATUS_SUCCESS = new Status(Status.Outcome.SUCCESS); private static final AtomicLongFieldUpdater responsesUpdater = AtomicLongFieldUpdater.newUpdater(PaxosPropose.class, "responses"); private static final AtomicReferenceFieldUpdater supersededByUpdater = AtomicReferenceFieldUpdater.newUpdater(PaxosPropose.class, Ballot.class, "supersededBy"); @@ -126,6 +138,10 @@ private static class MaybeFailure extends Status final int participants; /** Number of accepts required */ final int required; + + /** Repairing an in flight txn not proposing a new one **/ + final boolean isForRecovery; + /** Invoke on reaching a terminal status */ final OnDone onDone; @@ -145,7 +161,9 @@ private static class MaybeFailure extends Status /** The newest superseding ballot from a refusal; only returned to the caller if we fail to reach a quorum */ private volatile Ballot supersededBy; - private PaxosPropose(Proposal proposal, int participants, int required, boolean waitForNoSideEffect, OnDone onDone) + private volatile boolean needsConsensusMigration = false; + + private PaxosPropose(Proposal proposal, int participants, int required, boolean waitForNoSideEffect, boolean isForRecovery, OnDone onDone) { this.proposal = proposal; assert required > 0; @@ -153,6 +171,7 @@ private PaxosPropose(Proposal proposal, int participants, int required, boolean this.participants = participants; this.required = required; this.onDone = onDone; + this.isForRecovery = isForRecovery; } /** @@ -160,8 +179,10 @@ private PaxosPropose(Proposal proposal, int participants, int required, boolean * or for the present status if the time elapses without a final result being reached. * @param waitForNoSideEffect if true, on failure we will wait until we can say with certainty there are no side effects * or until we know we will never be able to determine this with certainty + * @param isForRecovery if true the value being proposed is not a new value it is a value from an existing in flight proposal + * and will be allowed to proceed even if the key is migrating to a different consensus protocol */ - static Paxos.Async propose(Proposal proposal, Paxos.Participants participants, boolean waitForNoSideEffect) + static Paxos.Async propose(Proposal proposal, Paxos.Participants participants, boolean waitForNoSideEffect, boolean isForRecovery) { if (waitForNoSideEffect && proposal.update.isEmpty()) waitForNoSideEffect = false; // by definition this has no "side effects" (besides linearizing the operation) @@ -169,9 +190,9 @@ static Paxos.Async propose(Proposal proposal, Paxos.Participants partici // to avoid unnecessary object allocations we extend PaxosPropose to implements Paxos.Async class Async extends PaxosPropose> implements Paxos.Async { - private Async(Proposal proposal, int participants, int required, boolean waitForNoSideEffect) + private Async(Proposal proposal, int participants, int required, boolean waitForNoSideEffect, boolean isForRecovery) { - super(proposal, participants, required, waitForNoSideEffect, newConditionAsConsumer()); + super(proposal, participants, required, waitForNoSideEffect, isForRecovery, newConditionAsConsumer()); } public Status awaitUntil(long deadline) @@ -190,24 +211,24 @@ public Status awaitUntil(long deadline) } } - Async propose = new Async(proposal, participants.sizeOfPoll(), participants.sizeOfConsensusQuorum, waitForNoSideEffect); + Async propose = new Async(proposal, participants.sizeOfPoll(), participants.sizeOfConsensusQuorum, waitForNoSideEffect, isForRecovery); propose.start(participants); return propose; } - static > T propose(Proposal proposal, Paxos.Participants participants, boolean waitForNoSideEffect, T onDone) + static > T propose(Proposal proposal, Paxos.Participants participants, boolean waitForNoSideEffect, boolean isForRecovery, T onDone) { if (waitForNoSideEffect && proposal.update.isEmpty()) waitForNoSideEffect = false; // by definition this has no "side effects" (besides linearizing the operation) - PaxosPropose propose = new PaxosPropose<>(proposal, participants.sizeOfPoll(), participants.sizeOfConsensusQuorum, waitForNoSideEffect, onDone); + PaxosPropose propose = new PaxosPropose<>(proposal, participants.sizeOfPoll(), participants.sizeOfConsensusQuorum, waitForNoSideEffect, isForRecovery, onDone); propose.start(participants); return onDone; } void start(Paxos.Participants participants) { - Message message = Message.out(PAXOS2_PROPOSE_REQ, new Request(proposal), participants.isUrgent()); + Message message = Message.out(PAXOS2_PROPOSE_REQ, new Request(proposal, isForRecovery), participants.isUrgent()); boolean executeOnSelf = false; for (int i = 0, size = participants.sizeOfPoll(); i < size ; ++i) @@ -219,7 +240,7 @@ void start(Paxos.Participants participants) } if (executeOnSelf) - PAXOS2_PROPOSE_REQ.stage.execute(() -> executeOnSelf(proposal)); + PAXOS2_PROPOSE_REQ.stage.execute(() -> executeOnSelf(proposal, isForRecovery)); } /** @@ -230,35 +251,39 @@ Status status() long responses = this.responses; if (isSuccessful(responses)) - return success; + return STATUS_SUCCESS; - if (!canSucceed(responses) && supersededBy != null) + if (!canSucceed(responses) && (supersededBy != null || needsConsensusMigration)) { Superseded.SideEffects sideEffects = hasNoSideEffects(responses) ? NO : MAYBE; - return new Superseded(supersededBy, sideEffects); + return new Superseded(supersededBy, sideEffects, needsConsensusMigration); } return new MaybeFailure(new Paxos.MaybeFailure(participants, required, accepts(responses), failureReasonsAsMap())); } - private void executeOnSelf(Proposal proposal) + private void executeOnSelf(Proposal proposal, boolean isForRecovery) { - executeOnSelf(proposal, RequestHandler::execute); + executeOnSelf(proposal, isForRecovery, RequestHandler::execute); } - public void onResponse(Response response, InetAddressAndPort from) + public void onResponse(AcceptResult acceptResult, InetAddressAndPort from) { + checkArgument(!isForRecovery || acceptResult.rejectedDueToConsensusMigration == false, "Repair should never be rejected due to consensus migration"); if (logger.isTraceEnabled()) - logger.trace("{} for {} from {}", response, proposal, from); + logger.trace("{} for {} from {}", acceptResult, proposal, from); - Ballot supersededBy = response.supersededBy; + Ballot supersededBy = acceptResult.supersededBy; if (supersededBy != null) supersededByUpdater.accumulateAndGet(this, supersededBy, (a, b) -> a == null ? b : b.uuidTimestamp() > a.uuidTimestamp() ? b : a); - long increment = supersededBy == null + long increment = supersededBy == null && !acceptResult.rejectedDueToConsensusMigration ? ACCEPT_INCREMENT : REFUSAL_INCREMENT; + if (acceptResult.rejectedDueToConsensusMigration) + needsConsensusMigration = true; + update(increment); } @@ -375,29 +400,21 @@ private static int failures(long responses) static class Request { final Proposal proposal; - Request(Proposal proposal) + final boolean isForRecovery; + Request(Proposal proposal, boolean isForRecovery) { this.proposal = proposal; + this.isForRecovery = isForRecovery; } + @Override public String toString() { - return proposal.toString("Propose"); - } - } - - /** - * The response to a proposal, indicating success (if {@code supersededBy == null}, - * or failure, alongside the ballot that beat us - */ - static class Response - { - final Ballot supersededBy; - Response(Ballot supersededBy) - { - this.supersededBy = supersededBy; + return "Request{" + + "proposal=" + proposal.toString("Propose") + + ", isForRecovery=" + isForRecovery + + '}'; } - public String toString() { return supersededBy == null ? "Accept" : "RejectProposal(supersededBy=" + supersededBy + ')'; } } /** @@ -408,14 +425,15 @@ public static class RequestHandler implements IVerbHandler @Override public void doVerb(Message message) { - Response response = execute(message.payload.proposal, message.from()); - if (response == null) + ClusterMetadataService.instance().fetchLogFromCMS(message.epoch()); + AcceptResult acceptResult = execute(message.payload.proposal, message.payload.isForRecovery, message.from()); + if (acceptResult == null) MessagingService.instance().respondWithFailure(UNKNOWN, message); else - MessagingService.instance().respond(response, message); + MessagingService.instance().respond(acceptResult, message); } - public static Response execute(Proposal proposal, InetAddressAndPort from) + public static AcceptResult execute(Proposal proposal, boolean isForRecovery, InetAddressAndPort from) { if (!Paxos.isInRangeAndShouldProcess(from, proposal.update.partitionKey(), proposal.update.metadata(), false)) return null; @@ -423,7 +441,7 @@ public static Response execute(Proposal proposal, InetAddressAndPort from) long start = nanoTime(); try (PaxosState state = PaxosState.get(proposal)) { - return new Response(state.acceptIfLatest(proposal)); + return state.acceptIfLatest(proposal, isForRecovery); } finally { @@ -438,42 +456,61 @@ public static class RequestSerializer implements IVersionedSerializer public void serialize(Request request, DataOutputPlus out, int version) throws IOException { Proposal.serializer.serialize(request.proposal, out, version); + if (version >= MessagingService.VERSION_51) + out.writeBoolean(request.isForRecovery); } @Override public Request deserialize(DataInputPlus in, int version) throws IOException { Proposal propose = Proposal.serializer.deserialize(in, version); - return new Request(propose); + boolean isForRecovery = false; + if (version >= MessagingService.VERSION_51) + isForRecovery = in.readBoolean(); + return new Request(propose, isForRecovery); } @Override public long serializedSize(Request request, int version) { - return Proposal.serializer.serializedSize(request.proposal, version); + long size = Proposal.serializer.serializedSize(request.proposal, version); + if (version >= MessagingService.VERSION_51) + size += TypeSizes.sizeof(request.isForRecovery); + return size; } } - public static class ResponseSerializer implements IVersionedSerializer + public static class AcceptResultSerializer implements IVersionedSerializer { - public void serialize(Response response, DataOutputPlus out, int version) throws IOException + public void serialize(PaxosState.AcceptResult acceptResult, DataOutputPlus out, int version) throws IOException { - out.writeBoolean(response.supersededBy != null); - if (response.supersededBy != null) - response.supersededBy.serialize(out); + out.writeBoolean(acceptResult.supersededBy != null); + if (acceptResult.supersededBy != null) + acceptResult.supersededBy.serialize(out); + if (version >= MessagingService.VERSION_51) + out.writeBoolean(acceptResult.rejectedDueToConsensusMigration); } - public Response deserialize(DataInputPlus in, int version) throws IOException + public AcceptResult deserialize(DataInputPlus in, int version) throws IOException { boolean isSuperseded = in.readBoolean(); - return isSuperseded ? new Response(Ballot.deserialize(in)) : new Response(null); + Ballot supersededBy = null; + if (isSuperseded) + supersededBy = Ballot.deserialize(in); + boolean rejectedDueToConsensusMigration = false; + if (version >= MessagingService.VERSION_51) + rejectedDueToConsensusMigration = in.readBoolean(); + return new AcceptResult(supersededBy, rejectedDueToConsensusMigration); } - public long serializedSize(Response response, int version) + public long serializedSize(AcceptResult acceptResult, int version) { - return response.supersededBy != null + long size = acceptResult.supersededBy != null ? TypeSizes.sizeof(true) + Ballot.sizeInBytes() : TypeSizes.sizeof(false); + if (version >= MessagingService.VERSION_51) + size += TypeSizes.sizeof(acceptResult.rejectedDueToConsensusMigration); + return size; } } } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java index ed369539ba65..8c9f219a3b85 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRepair.java @@ -61,6 +61,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.paxos.PaxosPropose.Superseded; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.utils.CassandraVersion; @@ -68,6 +69,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MonotonicClock; +import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_REPAIR_RETRY_TIMEOUT_IN_MS; @@ -141,10 +143,11 @@ public class PaxosRepair extends AbstractPaxosRepair public static final RequestSerializer requestSerializer = new RequestSerializer(); public static final ResponseSerializer responseSerializer = new ResponseSerializer(); public static final RequestHandler requestHandler = new RequestHandler(); - private static final long RETRY_TIMEOUT_NANOS = getRetryTimeoutNanos(); private static final ScheduledExecutorPlus RETRIES = executorFactory().scheduled("PaxosRepairRetries"); + private static final long RETRY_TIMEOUT_NANOS = getRetryTimeoutNanos(); + private static long getRetryTimeoutNanos() { long retryMillis = PAXOS_REPAIR_RETRY_TIMEOUT_IN_MS.getLong(); @@ -269,6 +272,7 @@ else if (isAcceptedButNotCommitted && !isPromisedButNotAccepted && !reproposalMa { if (logger.isTraceEnabled()) logger.trace("PaxosRepair of {} completing {}", partitionKey(), latestAccepted); + // We need to complete this in-progress accepted proposal, which may not have been seen by a majority // However, since we have not sought any promises, we can simply complete the existing proposal // since this is an idempotent operation - both us and the original proposer (and others) can @@ -277,8 +281,7 @@ else if (isAcceptedButNotCommitted && !isPromisedButNotAccepted && !reproposalMa // If ballots with same timestamp have been both accepted and rejected by different nodes, // to avoid a livelock we simply try to poison, knowing we will fail but use a new ballot // (note there are alternative approaches but this is conservative) - - return PaxosPropose.propose(latestAccepted, participants, false, + return PaxosPropose.propose(latestAccepted, participants, false, true, new ProposingRepair(latestAccepted)); } else if (isAcceptedButNotCommitted || isPromisedButNotAccepted || latestWitnessed.compareTo(latestPreviouslyWitnessed) < 0) @@ -336,9 +339,10 @@ public State execute(Status input) throws Throwable // (else an "earlier" operation can sneak in and invalidate us while we're proposing // with a newer ballot) FoundIncompleteAccepted incomplete = input.incompleteAccepted(); + Proposal propose = new Proposal(incomplete.ballot, incomplete.accepted.update); logger.trace("PaxosRepair of {} found incomplete {}", partitionKey(), incomplete.accepted); - return PaxosPropose.propose(propose, participants, false, + return PaxosPropose.propose(propose, participants, false, true, new ProposingRepair(propose)); // we don't know if we're done, so we must restart } @@ -356,7 +360,7 @@ public State execute(Status input) throws Throwable // propose the empty ballot logger.trace("PaxosRepair of {} submitting empty proposal", partitionKey()); Proposal proposal = Proposal.empty(input.success().ballot, partitionKey(), table); - return PaxosPropose.propose(proposal, participants, false, + return PaxosPropose.propose(proposal, participants, false, true, new ProposingRepair(proposal)); } @@ -383,7 +387,9 @@ public State execute(PaxosPropose.Status input) return retry(this); case SUPERSEDED: - if (isAfter(input.superseded().by, prevSupersededBy)) + Superseded superseded = input.superseded(); + checkState(!superseded.needsConsensusMigration, "Repair should not encounter consensus migration rejection"); + if (isAfter(superseded.by, prevSupersededBy)) prevSupersededBy = input.superseded().by; return retry(this); @@ -423,9 +429,9 @@ public State execute(PaxosCommit.Status input) } } - private PaxosRepair(DecoratedKey partitionKey, Ballot incompleteBallot, TableMetadata table, ConsistencyLevel paxosConsistency) + private PaxosRepair(DecoratedKey partitionKey, @Nullable Ballot incompleteBallot, TableMetadata table, ConsistencyLevel paxosConsistency, long retryTimeoutNanos) { - super(partitionKey, incompleteBallot); + super(partitionKey, incompleteBallot, retryTimeoutNanos); // TODO: move precondition into super ctor Preconditions.checkArgument(paxosConsistency.isSerialConsistency()); this.table = table; @@ -435,12 +441,17 @@ private PaxosRepair(DecoratedKey partitionKey, Ballot incompleteBallot, TableMet public static PaxosRepair create(ConsistencyLevel consistency, DecoratedKey partitionKey, Ballot incompleteBallot, TableMetadata table) { - return new PaxosRepair(partitionKey, incompleteBallot, table, consistency); + return new PaxosRepair(partitionKey, incompleteBallot, table, consistency, RETRY_TIMEOUT_NANOS); + } + + public static PaxosRepair create(ConsistencyLevel consistency, DecoratedKey partitionKey, TableMetadata table, long retryTimeoutNanos) + { + return new PaxosRepair(partitionKey, null, table, consistency, retryTimeoutNanos); } private State retry(State state) { - Preconditions.checkState(isStarted()); + checkState(isStarted()); if (isResult(state)) return state; @@ -455,7 +466,7 @@ public State restart(State state, long waitUntil) participants = Participants.get(table, partitionKey(), paxosConsistency); - if (waitUntil > Long.MIN_VALUE && waitUntil - startedNanos() > RETRY_TIMEOUT_NANOS) + if (waitUntil > Long.MIN_VALUE && waitUntil - startedNanos() > retryTimeoutNanos) return new Failure(null); try @@ -477,7 +488,7 @@ public State restart(State state, long waitUntil) private ConsistencyLevel commitConsistency() { - Preconditions.checkState(paxosConsistency.isSerialConsistency()); + checkState(paxosConsistency.isSerialConsistency()); return paxosConsistency.isDatacenterLocal() ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.QUORUM; } diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java index ff5f2d406839..fce825fa3b84 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java @@ -29,6 +29,7 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.service.FailureRecordingCallback; +import org.apache.cassandra.tcm.ClusterMetadataService; import static org.apache.cassandra.exceptions.RequestFailure.TIMEOUT; import static org.apache.cassandra.exceptions.RequestFailure.UNKNOWN; @@ -44,6 +45,7 @@ public abstract class PaxosRequestCallback extends FailureRecordingCallback message) { + ClusterMetadataService.instance().fetchLogFromCMS(message.epoch()); onResponse(message.payload, message.from()); } @@ -69,6 +71,32 @@ protected void executeOnSelf(I parameter, BiFunction { + D apply(A var1, B var2, C var3); + } + + protected void executeOnSelf(I parameter1, J parameter2, TriFunction execute) + { + T response; + try + { + response = execute.apply(parameter1, parameter2, getBroadcastAddressAndPort()); + if (response == null) + return; + } + catch (Exception ex) + { + RequestFailure reason = UNKNOWN; + if (ex instanceof WriteTimeoutException) reason = TIMEOUT; + else logger.error("Failed to apply {}, {} locally", parameter1, parameter2, ex); + + onFailure(getBroadcastAddressAndPort(), reason); + return; + } + + onResponse(response, getBroadcastAddressAndPort()); + } + static boolean shouldExecuteOnSelf(InetAddressAndPort replica) { return USE_SELF_EXECUTION && replica.equals(getBroadcastAddressAndPort()); diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosState.java b/src/java/org/apache/cassandra/service/paxos/PaxosState.java index 4fe03d6ff511..44ab0b6e4a63 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosState.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosState.java @@ -30,8 +30,9 @@ import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.primitives.Ints; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.cassandra.concurrent.ImmediateExecutor; @@ -43,17 +44,21 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.metrics.PaxosMetrics; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.paxos.uncommitted.PaxosBallotTracker; import org.apache.cassandra.service.paxos.uncommitted.PaxosStateTracker; import org.apache.cassandra.service.paxos.uncommitted.PaxosUncommittedTracker; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.Nemesis; +import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_DISABLE_COORDINATOR_LOCKING; import static org.apache.cassandra.config.Config.PaxosStatePurging.gc_grace; @@ -69,6 +74,8 @@ import static org.apache.cassandra.service.paxos.Commit.Proposal; import static org.apache.cassandra.service.paxos.Commit.isAfter; import static org.apache.cassandra.service.paxos.Commit.latest; +import static org.apache.cassandra.service.paxos.PaxosState.AcceptResult.RETRY_NEW_PROTOCOL; +import static org.apache.cassandra.service.paxos.PaxosState.AcceptResult.SUCCESS; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PERMIT_READ; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.REJECT; @@ -80,6 +87,9 @@ */ public class PaxosState implements PaxosOperationLock { + @SuppressWarnings("unused") + private static final Logger logger = LoggerFactory.getLogger(PaxosState.class.getName()); + private static volatile boolean DISABLE_COORDINATOR_LOCKING = PAXOS_DISABLE_COORDINATOR_LOCKING.getBoolean(); public static final ConcurrentHashMap ACTIVE = new ConcurrentHashMap<>(); public static final Map RECENT = Caffeine.newBuilder() @@ -127,7 +137,7 @@ public static PaxosBallotTracker ballotTracker() public static void initializeTrackers() { - Preconditions.checkState(TrackerHandle.tracker != null); + checkState(TrackerHandle.tracker != null); PaxosMetrics.initialize(); } @@ -634,7 +644,7 @@ else if (isAfter(ballot, latestWriteOrLowBound)) /** * Record an acceptance of the proposal if there is no newer promise; otherwise inform the caller of the newer ballot */ - public Ballot acceptIfLatest(Proposal proposal) + public AcceptResult acceptIfLatest(Proposal proposal, boolean isForRecovery) { if (paxosStatePurging() == legacy && !(proposal instanceof AcceptedWithTTL)) proposal = AcceptedWithTTL.withDefaultTTL(proposal); @@ -642,20 +652,31 @@ public Ballot acceptIfLatest(Proposal proposal) // state.promised can be null, because it is invalidated by committed; // we may also have accepted a newer proposal than we promised, so we confirm that we are the absolute newest // (or that we have the exact same ballot as our promise, which is the typical case) + boolean shouldRejectDueToConsensusMigration; Snapshot before, after; while (true) { Snapshot realBefore = current; before = realBefore.removeExpired((int)proposal.ballot.unix(SECONDS)); Ballot latest = before.latestWitnessedOrLowBound(); + if (isForRecovery) + shouldRejectDueToConsensusMigration = false; + else + shouldRejectDueToConsensusMigration = ConsensusRequestRouter.instance + .isKeyInMigratingOrMigratedRangeDuringPaxosAccept(proposal.update.metadata().id, + proposal.update.partitionKey()); if (!proposal.isSameOrAfter(latest)) { Tracing.trace("Rejecting proposal {}; latest is now {}", proposal.ballot, latest); - return latest; + return new AcceptResult(latest, shouldRejectDueToConsensusMigration); } - if (proposal.hasSameBallot(before.committed)) // TODO: consider not answering - return null; // no need to save anything, or indeed answer at all + if (shouldRejectDueToConsensusMigration) + return RETRY_NEW_PROTOCOL; + + // TODO: Consider not answering in the committed ballot case where there is no need to save anything or answer at all + if (proposal.hasSameBallot(before.committed)) + return null; after = new Snapshot(realBefore.promised, realBefore.promisedWrite, proposal.accepted(), realBefore.committed); if (currentUpdater.compareAndSet(this, realBefore, after)) @@ -672,7 +693,8 @@ public Ballot acceptIfLatest(Proposal proposal) // though this Tracing.trace("Accepting proposal {}", proposal); SystemKeyspace.savePaxosProposal(proposal); - return null; + checkState(!shouldRejectDueToConsensusMigration); + return SUCCESS; } public void commit(Agreed commit) @@ -794,6 +816,8 @@ public static Boolean legacyPropose(Commit proposal) boolean accept = proposal.isSameOrAfter(before.latestWitnessedOrLowBound()); if (accept) { + PartitionUpdate partitionUpdate = proposal.update; + checkState(ConsensusKeyMigrationState.getKeyMigrationState(partitionUpdate.metadata().id, partitionUpdate.partitionKey()).tableMigrationState == null, "Using PaxosV1 while consensus migration is in progress is not supported"); if (proposal.hasSameBallot(before.committed) || currentUpdater.compareAndSet(unsafeState, realBefore, new Snapshot(realBefore.promised, realBefore.promisedWrite, @@ -832,4 +856,35 @@ public static Snapshot unsafeGetIfPresent(DecoratedKey partitionKey, TableMetada if (cur != null) return cur.current; return RECENT.get(key); } + + /** + * The response to a proposal, indicating success (if {@code supersededBy == null}, + * or failure, alongside the ballot that beat us + */ + public static class AcceptResult + { + static final AcceptResult SUCCESS = new AcceptResult(false); + + static final AcceptResult RETRY_NEW_PROTOCOL = new AcceptResult(true); + + @Nullable + public final Ballot supersededBy; + + public final boolean rejectedDueToConsensusMigration; + + public AcceptResult(@Nullable Ballot supersededBy, boolean rejectedDueToConsensusMigration) + { + this.supersededBy = supersededBy; + this.rejectedDueToConsensusMigration = rejectedDueToConsensusMigration; + } + + // Success result + private AcceptResult(boolean rejectedDueToConsensusMigration) + { + supersededBy = null; + this.rejectedDueToConsensusMigration = rejectedDueToConsensusMigration; + } + + public String toString() { return supersededBy == null && !rejectedDueToConsensusMigration ? "Accept" : "RejectProposal(supersededBy=" + supersededBy + ", rejectedDueToConsensusMigration=" + rejectedDueToConsensusMigration + ')'; } + } } diff --git a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java index 08cbd9aa0900..21fcb90da49b 100644 --- a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java +++ b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java @@ -53,6 +53,7 @@ import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.PaxosRepairHistory; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.utils.concurrent.AsyncFuture; import static org.apache.cassandra.net.Verb.PAXOS2_CLEANUP_START_PREPARE_REQ; @@ -184,6 +185,7 @@ public long serializedSize(Request request, int version) public static IVerbHandler createVerbHandler(SharedContext ctx) { return in -> { + ClusterMetadataService.instance().fetchLogFromCMS(in.epoch()); ColumnFamilyStore table = Schema.instance.getColumnFamilyStoreInstance(in.payload.tableId); // Note: pre-5.1 we would use gossip state included in the request payload to update topology // prior to cleanup. Topology is no longer derived from gossip state, so this has been removed. diff --git a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java index 88b3ba49fab1..286a5b85d780 100644 --- a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java +++ b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java @@ -66,6 +66,7 @@ public abstract class AbstractReadExecutor { private static final Logger logger = LoggerFactory.getLogger(AbstractReadExecutor.class); + protected final ReadCoordinator coordinator; protected final ReadCommand command; private final ReplicaPlan.SharedForTokenRead replicaPlan; protected final ReadRepair readRepair; @@ -78,14 +79,15 @@ public abstract class AbstractReadExecutor private final int initialDataRequestCount; protected volatile PartitionIterator result = null; - AbstractReadExecutor(ColumnFamilyStore cfs, ReadCommand command, ReplicaPlan.ForTokenRead replicaPlan, int initialDataRequestCount, Dispatcher.RequestTime requestTime) + AbstractReadExecutor(ReadCoordinator coordinator, ColumnFamilyStore cfs, ReadCommand command, ReplicaPlan.ForTokenRead replicaPlan, int initialDataRequestCount, Dispatcher.RequestTime requestTime) { + this.coordinator = coordinator; this.command = command; this.replicaPlan = ReplicaPlan.shared(replicaPlan); this.initialDataRequestCount = initialDataRequestCount; // the ReadRepair and DigestResolver both need to see our updated - this.readRepair = ReadRepair.create(command, this.replicaPlan, requestTime); - this.digestResolver = new DigestResolver<>(command, this.replicaPlan, requestTime); + this.readRepair = ReadRepair.create(coordinator, command, this.replicaPlan, requestTime); + this.digestResolver = new DigestResolver<>(coordinator, command, this.replicaPlan, requestTime); this.handler = new ReadCallback<>(digestResolver, command, this.replicaPlan, requestTime); this.cfs = cfs; this.traceState = Tracing.instance.get(); @@ -136,13 +138,14 @@ private void makeRequests(ReadCommand readCommand, Iterable replicas) { boolean hasLocalEndpoint = false; Message message = null; + readCommand = coordinator.maybeAllowOutOfRangeReads(readCommand); for (Replica replica: replicas) { assert replica.isFull() || readCommand.acceptsTransient(); InetAddressAndPort endpoint = replica.endpoint(); - if (replica.isSelf()) + if (replica.isSelf() && coordinator.localReadSupported()) { hasLocalEndpoint = true; continue; @@ -154,7 +157,7 @@ private void makeRequests(ReadCommand readCommand, Iterable replicas) if (null == message) message = readCommand.createMessage(false, requestTime).withEpoch(ClusterMetadata.current().epoch); - MessagingService.instance().sendWithCallback(message, endpoint, handler); + coordinator.sendReadCommand(message, endpoint, handler); } // We delay the local (potentially blocking) read till the end to avoid stalling remote requests. @@ -179,8 +182,11 @@ public void executeAsync() EndpointsForToken selected = replicaPlan().contacts(); EndpointsForToken fullDataRequests = selected.filter(Replica::isFull, initialDataRequestCount); makeFullDataRequests(fullDataRequests); - makeTransientDataRequests(selected.filterLazily(Replica::isTransient)); - makeDigestRequests(selected.filterLazily(r -> r.isFull() && !fullDataRequests.contains(r))); + EndpointsForToken transientRequests = selected.filter(Replica::isTransient); + makeTransientDataRequests(transientRequests); + EndpointsForToken digestRequests = selected.filter(r -> r.isFull() && !fullDataRequests.contains(r)); + makeDigestRequests(digestRequests); + coordinator.notifyOfInitialContacts(fullDataRequests, transientRequests, digestRequests); } /** @@ -189,6 +195,7 @@ public void executeAsync() public static AbstractReadExecutor getReadExecutor(ClusterMetadata metadata, SinglePartitionReadCommand command, ConsistencyLevel consistencyLevel, + ReadCoordinator coordinator, Dispatcher.RequestTime requestTime) throws UnavailableException { Keyspace keyspace = Keyspace.open(command.metadata().keyspace); @@ -200,25 +207,26 @@ public static AbstractReadExecutor getReadExecutor(ClusterMetadata metadata, command.partitionKey().getToken(), command.indexQueryPlan(), consistencyLevel, - retry); + retry, + coordinator); // Speculative retry is disabled *OR* // 11980: Disable speculative retry if using EACH_QUORUM in order to prevent miscounting DC responses if (retry.equals(NeverSpeculativeRetryPolicy.INSTANCE) || consistencyLevel == ConsistencyLevel.EACH_QUORUM) - return new NeverSpeculatingReadExecutor(cfs, command, replicaPlan, requestTime, false); + return new NeverSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime, false); // There are simply no extra replicas to speculate. // Handle this separately so it can record failed attempts to speculate due to lack of replicas if (replicaPlan.contacts().size() == replicaPlan.readCandidates().size()) { boolean recordFailedSpeculation = consistencyLevel != ConsistencyLevel.ALL; - return new NeverSpeculatingReadExecutor(cfs, command, replicaPlan, requestTime, recordFailedSpeculation); + return new NeverSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime, recordFailedSpeculation); } if (retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE)) - return new AlwaysSpeculatingReadExecutor(cfs, command, replicaPlan, requestTime); + return new AlwaysSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime); else // PERCENTILE or CUSTOM. - return new SpeculatingReadExecutor(cfs, command, replicaPlan, requestTime); + return new SpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime); } public boolean hasLocalRead() @@ -272,13 +280,14 @@ public static class NeverSpeculatingReadExecutor extends AbstractReadExecutor */ private final boolean logFailedSpeculation; - public NeverSpeculatingReadExecutor(ColumnFamilyStore cfs, + public NeverSpeculatingReadExecutor(ReadCoordinator coordinator, + ColumnFamilyStore cfs, ReadCommand command, ReplicaPlan.ForTokenRead replicaPlan, Dispatcher.RequestTime requestTime, boolean logFailedSpeculation) { - super(cfs, command, replicaPlan, 1, requestTime); + super(coordinator, cfs, command, replicaPlan, 1, requestTime); this.logFailedSpeculation = logFailedSpeculation; } @@ -295,7 +304,8 @@ static class SpeculatingReadExecutor extends AbstractReadExecutor { private volatile boolean speculated = false; - public SpeculatingReadExecutor(ColumnFamilyStore cfs, + public SpeculatingReadExecutor(ReadCoordinator coordinator, + ColumnFamilyStore cfs, ReadCommand command, ReplicaPlan.ForTokenRead replicaPlan, Dispatcher.RequestTime requestTime) @@ -303,7 +313,7 @@ public SpeculatingReadExecutor(ColumnFamilyStore cfs, // We're hitting additional targets for read repair (??). Since our "extra" replica is the least- // preferred by the snitch, we do an extra data read to start with against a replica more // likely to respond; better to let RR fail than the entire query. - super(cfs, command, replicaPlan, replicaPlan.readQuorum() < replicaPlan.contacts().size() ? 2 : 1, requestTime); + super(coordinator, cfs, command, replicaPlan, replicaPlan.readQuorum() < replicaPlan.contacts().size() ? 2 : 1, requestTime); } public void maybeTryAdditionalReplicas() @@ -366,14 +376,15 @@ void onReadTimeout() private static class AlwaysSpeculatingReadExecutor extends AbstractReadExecutor { - public AlwaysSpeculatingReadExecutor(ColumnFamilyStore cfs, + public AlwaysSpeculatingReadExecutor(ReadCoordinator coordinator, + ColumnFamilyStore cfs, ReadCommand command, ReplicaPlan.ForTokenRead replicaPlan, Dispatcher.RequestTime requestTime) { // presumably, we speculate an extra data request here in case it is our data request that fails to respond, // and there are no more nodes to consult - super(cfs, command, replicaPlan, replicaPlan.contacts().size() > 1 ? 2 : 1, requestTime); + super(coordinator, cfs, command, replicaPlan, replicaPlan.contacts().size() > 1 ? 2 : 1, requestTime); } public void maybeTryAdditionalReplicas() diff --git a/src/java/org/apache/cassandra/service/reads/DataResolver.java b/src/java/org/apache/cassandra/service/reads/DataResolver.java index 332a78570851..03eee1c2c522 100644 --- a/src/java/org/apache/cassandra/service/reads/DataResolver.java +++ b/src/java/org/apache/cassandra/service/reads/DataResolver.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.function.Supplier; import java.util.function.UnaryOperator; - import javax.annotation.Nullable; import com.google.common.base.Joiner; @@ -57,22 +56,23 @@ import org.apache.cassandra.service.reads.repair.RepairedDataVerifier; import org.apache.cassandra.transport.Dispatcher; -import static com.google.common.collect.Iterables.*; +import static com.google.common.collect.Iterables.any; +import static com.google.common.collect.Iterables.transform; public class DataResolver, P extends ReplicaPlan.ForRead> extends ResponseResolver { private final boolean enforceStrictLiveness; - private final ReadRepair readRepair; + public final ReadRepair readRepair; private final boolean trackRepairedStatus; - public DataResolver(ReadCommand command, Supplier replicaPlan, ReadRepair readRepair, Dispatcher.RequestTime requestTime) + public DataResolver(ReadCoordinator coordinator, ReadCommand command, Supplier replicaPlan, ReadRepair readRepair, Dispatcher.RequestTime requestTime) { - this(command, replicaPlan, readRepair, requestTime, false); + this(coordinator, command, replicaPlan, readRepair, requestTime, false); } - public DataResolver(ReadCommand command, Supplier replicaPlan, ReadRepair readRepair, Dispatcher.RequestTime requestTime, boolean trackRepairedStatus) + public DataResolver(ReadCoordinator coordinator, ReadCommand command, Supplier replicaPlan, ReadRepair readRepair, Dispatcher.RequestTime requestTime, boolean trackRepairedStatus) { - super(command, replicaPlan, requestTime); + super(coordinator, command, replicaPlan, requestTime); this.enforceStrictLiveness = command.metadata().enforceStrictLiveness(); this.readRepair = readRepair; this.trackRepairedStatus = trackRepairedStatus; @@ -209,6 +209,7 @@ private UnfilteredPartitionIterator shortReadProtectedResponse(int i, ResolveCon originalResponse, command, context.mergedResultCounter, + coordinator, requestTime, enforceStrictLiveness) : originalResponse; @@ -249,7 +250,7 @@ private PartitionIterator resolveWithReplicaFilteringProtection(E replicas, Repa // before it counts against the limit. If this "pre-count" filter causes a short read, additional rows // will be fetched from the first-phase iterator. - ReplicaFilteringProtection rfp = new ReplicaFilteringProtection<>(replicaPlan().keyspace(), + ReplicaFilteringProtection rfp = new ReplicaFilteringProtection<>(coordinator, replicaPlan().keyspace(), command, replicaPlan().consistencyLevel(), requestTime, @@ -257,6 +258,7 @@ private PartitionIterator resolveWithReplicaFilteringProtection(E replicas, Repa DatabaseDescriptor.getCachedReplicaRowsWarnThreshold(), DatabaseDescriptor.getCachedReplicaRowsFailThreshold()); + // We need separate contexts, as each context has his own counter ResolveContext firstPhaseContext = new ResolveContext(replicas, false); PartitionIterator firstPhasePartitions = resolveInternal(firstPhaseContext, rfp.mergeController(), diff --git a/src/java/org/apache/cassandra/service/reads/DigestResolver.java b/src/java/org/apache/cassandra/service/reads/DigestResolver.java index cc248422c06c..59c3df383d2e 100644 --- a/src/java/org/apache/cassandra/service/reads/DigestResolver.java +++ b/src/java/org/apache/cassandra/service/reads/DigestResolver.java @@ -30,8 +30,8 @@ import org.apache.cassandra.db.partitions.PartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.locator.Endpoints; -import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.net.Message; import org.apache.cassandra.service.reads.repair.NoopReadRepair; @@ -45,9 +45,9 @@ public class DigestResolver, P extends ReplicaPlan.ForRea { private volatile Message dataResponse; - public DigestResolver(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + public DigestResolver(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - super(command, replicaPlan, requestTime); + super(coordinator, command, replicaPlan, requestTime); Preconditions.checkArgument(command instanceof SinglePartitionReadCommand, "DigestResolver can only be used with SinglePartitionReadCommand commands"); } @@ -87,7 +87,7 @@ public PartitionIterator getData() // This path can be triggered only if we've got responses from full replicas and they match, but // transient replica response still contains data, which needs to be reconciled. DataResolver dataResolver - = new DataResolver<>(command, replicaPlan, NoopReadRepair.instance, requestTime); + = new DataResolver<>(coordinator, command, replicaPlan, NoopReadRepair.instance, requestTime); dataResolver.preprocess(dataResponse); // Reconcile with transient replicas diff --git a/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java new file mode 100644 index 000000000000..8f41464779b1 --- /dev/null +++ b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.reads; + +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadResponse; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.ReplicaLayout; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.tcm.ClusterMetadata; + +public interface ReadCoordinator +{ + ReadCoordinator DEFAULT = new ReadCoordinator() + { + public boolean localReadSupported() + { + return true; + } + + public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) + { + return ReplicaLayout.forNonLocalStrategyTokenRead(metadata, keyspace, token); + } + + public void sendReadCommand(Message message, InetAddressAndPort to, RequestCallback callback) + { + MessagingService.instance().sendWithCallback(message, to, callback); + } + + public void sendReadRepairMutation(Message message, InetAddressAndPort to, RequestCallback callback) + { + MessagingService.instance().sendWithCallback(message, to, callback); + } + + public boolean isEventuallyConsistent() + { + return true; + } + }; + + boolean localReadSupported(); + EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token); + default ReadCommand maybeAllowOutOfRangeReads(ReadCommand command) + { + return command; + } + void sendReadCommand(Message message, InetAddressAndPort to, RequestCallback callback); + default void notifyOfInitialContacts(EndpointsForToken fullDataRequests, EndpointsForToken transientRequests, EndpointsForToken digestRequests) {} + void sendReadRepairMutation(Message message, InetAddressAndPort to, RequestCallback callback); + default Mutation maybeAllowOutOfRangeMutations(Mutation m) + { + return m; + } + boolean isEventuallyConsistent(); +} diff --git a/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java b/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java index 056f3b55df32..2744e8d4be08 100644 --- a/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java +++ b/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java @@ -23,8 +23,8 @@ import java.util.Arrays; import java.util.List; import java.util.NavigableSet; -import java.util.concurrent.TimeUnit; import java.util.Queue; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import org.slf4j.Logger; @@ -98,6 +98,7 @@ public class ReplicaFilteringProtection> private static final Function NULL_TO_NO_STATS = rowIterator -> rowIterator == null ? EncodingStats.NO_STATS : rowIterator.stats(); + private final ReadCoordinator coordinator; private final Keyspace keyspace; private final ReadCommand command; private final ConsistencyLevel consistency; @@ -119,7 +120,8 @@ public class ReplicaFilteringProtection> */ private final List> originalPartitions; - ReplicaFilteringProtection(Keyspace keyspace, + ReplicaFilteringProtection(ReadCoordinator coordinator, + Keyspace keyspace, ReadCommand command, ConsistencyLevel consistency, Dispatcher.RequestTime requestTime, @@ -127,6 +129,7 @@ public class ReplicaFilteringProtection> int cachedRowsWarnThreshold, int cachedRowsFailThreshold) { + this.coordinator = coordinator; this.keyspace = keyspace; this.command = command; this.consistency = consistency; @@ -149,7 +152,7 @@ private UnfilteredPartitionIterator executeReadCommand(ReadCommand cmd, Replica { @SuppressWarnings("unchecked") DataResolver resolver = - new DataResolver<>(cmd, replicaPlan, (NoopReadRepair) NoopReadRepair.instance, requestTime); + new DataResolver<>(coordinator, cmd, replicaPlan, (NoopReadRepair) NoopReadRepair.instance, requestTime); ReadCallback handler = new ReadCallback<>(resolver, cmd, replicaPlan, requestTime); diff --git a/src/java/org/apache/cassandra/service/reads/ResponseResolver.java b/src/java/org/apache/cassandra/service/reads/ResponseResolver.java index 5dd81eb7bcc1..61956322d884 100644 --- a/src/java/org/apache/cassandra/service/reads/ResponseResolver.java +++ b/src/java/org/apache/cassandra/service/reads/ResponseResolver.java @@ -34,6 +34,7 @@ public abstract class ResponseResolver, P extends Replica { protected static final Logger logger = LoggerFactory.getLogger(ResponseResolver.class); + protected final ReadCoordinator coordinator; protected final ReadCommand command; protected final Supplier replicaPlan; @@ -41,8 +42,9 @@ public abstract class ResponseResolver, P extends Replica protected final Accumulator> responses; protected final Dispatcher.RequestTime requestTime; - public ResponseResolver(ReadCommand command, Supplier replicaPlan, Dispatcher.RequestTime requestTime) + public ResponseResolver(ReadCoordinator coordinator, ReadCommand command, Supplier replicaPlan, Dispatcher.RequestTime requestTime) { + this.coordinator = coordinator; this.command = command; this.replicaPlan = replicaPlan; this.responses = new Accumulator<>(replicaPlan.get().readCandidates().size()); diff --git a/src/java/org/apache/cassandra/service/reads/ShortReadPartitionsProtection.java b/src/java/org/apache/cassandra/service/reads/ShortReadPartitionsProtection.java index e9870f1f1d7b..102042758e76 100644 --- a/src/java/org/apache/cassandra/service/reads/ShortReadPartitionsProtection.java +++ b/src/java/org/apache/cassandra/service/reads/ShortReadPartitionsProtection.java @@ -18,9 +18,6 @@ package org.apache.cassandra.service.reads; -import org.apache.cassandra.locator.Endpoints; -import org.apache.cassandra.locator.ReplicaPlan; -import org.apache.cassandra.locator.ReplicaPlans; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,16 +38,20 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.ExcludingBounds; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.service.reads.repair.NoopReadRepair; +import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.locator.ReplicaPlans; import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.reads.repair.NoopReadRepair; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; public class ShortReadPartitionsProtection extends Transformation implements MorePartitions { private static final Logger logger = LoggerFactory.getLogger(ShortReadPartitionsProtection.class); + + private final ReadCoordinator coordinator; private final ReadCommand command; private final Replica source; @@ -65,13 +66,16 @@ public class ShortReadPartitionsProtection extends Transformation, P extends ReplicaPlan.ForRead> UnfilteredPartitionIterator executeReadCommand(ReadCommand cmd, ReplicaPlan.Shared replicaPlan) { - DataResolver resolver = new DataResolver<>(cmd, replicaPlan, (NoopReadRepair)NoopReadRepair.instance, requestTime); + cmd = coordinator.maybeAllowOutOfRangeReads(cmd); + DataResolver resolver = new DataResolver<>(coordinator, cmd, replicaPlan, (NoopReadRepair)NoopReadRepair.instance, requestTime); ReadCallback handler = new ReadCallback<>(resolver, cmd, replicaPlan, requestTime); - if (source.isSelf()) + if (source.isSelf() && coordinator.localReadSupported()) { Stage.READ.maybeExecuteImmediately(new StorageProxy.LocalReadRunnable(cmd, handler, requestTime)); } @@ -189,7 +194,7 @@ UnfilteredPartitionIterator executeReadCommand(ReadCommand cmd, ReplicaPlan.Shar { if (source.isTransient()) cmd = cmd.copyAsTransientQuery(source); - MessagingService.instance().sendWithCallback(cmd.createMessage(false, requestTime), source.endpoint(), handler); + coordinator.sendReadCommand(cmd.createMessage(false, requestTime), source.endpoint(), handler); } // We don't call handler.get() because we want to preserve tombstones since we're still in the middle of merging node results. diff --git a/src/java/org/apache/cassandra/service/reads/ShortReadProtection.java b/src/java/org/apache/cassandra/service/reads/ShortReadProtection.java index 1eca190a7343..e289d276b3e7 100644 --- a/src/java/org/apache/cassandra/service/reads/ShortReadProtection.java +++ b/src/java/org/apache/cassandra/service/reads/ShortReadProtection.java @@ -44,6 +44,7 @@ public static UnfilteredPartitionIterator extend(Replica source, UnfilteredPartitionIterator partitions, ReadCommand command, DataLimits.Counter mergedResultCounter, + ReadCoordinator coordinator, Dispatcher.RequestTime requestTime, boolean enforceStrictLiveness) { @@ -52,7 +53,7 @@ public static UnfilteredPartitionIterator extend(Replica source, command.selectsFullPartition(), enforceStrictLiveness).onlyCount(); - ShortReadPartitionsProtection protection = new ShortReadPartitionsProtection(command, + ShortReadPartitionsProtection protection = new ShortReadPartitionsProtection(coordinator, command, source, preFetchCallback, singleResultCounter, diff --git a/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java b/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java index eb55a280c920..98f0399d3c3c 100644 --- a/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java +++ b/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java @@ -48,6 +48,7 @@ import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.reads.DataResolver; import org.apache.cassandra.service.reads.ReadCallback; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.repair.ReadRepair; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; @@ -197,9 +198,9 @@ private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan); ReadRepair readRepair = - ReadRepair.create(command, sharedReplicaPlan, requestTime); + ReadRepair.create(ReadCoordinator.DEFAULT, command, sharedReplicaPlan, requestTime); DataResolver resolver = - new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, requestTime, trackRepairedStatus); + new DataResolver<>(ReadCoordinator.DEFAULT, rangeCommand, sharedReplicaPlan, readRepair, requestTime, trackRepairedStatus); ReadCallback handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, requestTime); diff --git a/src/java/org/apache/cassandra/service/reads/range/ScanAllRangesCommandIterator.java b/src/java/org/apache/cassandra/service/reads/range/ScanAllRangesCommandIterator.java index 53f55f8938ae..c866911e1a50 100644 --- a/src/java/org/apache/cassandra/service/reads/range/ScanAllRangesCommandIterator.java +++ b/src/java/org/apache/cassandra/service/reads/range/ScanAllRangesCommandIterator.java @@ -38,6 +38,7 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.reads.DataResolver; import org.apache.cassandra.service.reads.ReadCallback; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.repair.NoopReadRepair; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; @@ -92,7 +93,7 @@ protected PartitionIterator sendNextRequests() ReplicaPlan.ForRangeRead plan = ReplicaPlans.forFullRangeRead(keyspace, consistencyLevel, command.dataRange().keyRange(), replicasToQuery, totalRangeCount); ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(plan); - DataResolver resolver = new DataResolver<>(command, sharedReplicaPlan, NoopReadRepair.instance, requestTime, false); + DataResolver resolver = new DataResolver<>(ReadCoordinator.DEFAULT, command, sharedReplicaPlan, NoopReadRepair.instance, requestTime, false); ReadCallback handler = new ReadCallback<>(resolver, command, sharedReplicaPlan, requestTime); int nodes = 0; diff --git a/src/java/org/apache/cassandra/service/reads/repair/AbstractReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/AbstractReadRepair.java index 8343b83b071e..7ef6d2fe047a 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/AbstractReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/AbstractReadRepair.java @@ -39,11 +39,11 @@ import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.metrics.ReadRepairMetrics; import org.apache.cassandra.net.Message; -import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.reads.DataResolver; import org.apache.cassandra.service.reads.DigestResolver; import org.apache.cassandra.service.reads.ReadCallback; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; @@ -54,6 +54,7 @@ public abstract class AbstractReadRepair, P extends Repli { protected static final Logger logger = LoggerFactory.getLogger(AbstractReadRepair.class); + protected final ReadCoordinator coordinator; protected final ReadCommand command; protected final Dispatcher.RequestTime requestTime; protected final ReplicaPlan.Shared replicaPlan; @@ -75,10 +76,11 @@ public DigestRepair(DataResolver dataResolver, ReadCallback readCall } } - public AbstractReadRepair(ReadCommand command, + public AbstractReadRepair(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { + this.coordinator = coordinator; this.command = command; this.requestTime = requestTime; this.replicaPlan = replicaPlan; @@ -92,9 +94,9 @@ protected P replicaPlan() void sendReadCommand(Replica to, ReadCallback readCallback, boolean speculative, boolean trackRepairedStatus) { - ReadCommand command = this.command; - - if (to.isSelf()) + ReadCommand command = coordinator.maybeAllowOutOfRangeReads(this.command); + + if (to.isSelf() && coordinator.localReadSupported()) { Stage.READ.maybeExecuteImmediately(new StorageProxy.LocalReadRunnable(command, readCallback, requestTime, trackRepairedStatus)); return; @@ -117,7 +119,7 @@ void sendReadCommand(Replica to, ReadCallback readCallback, boolean specul } Message message = command.createMessage(trackRepairedStatus && to.isFull(), requestTime); - MessagingService.instance().sendWithCallback(message, to.endpoint(), readCallback); + coordinator.sendReadCommand(message, to.endpoint(), readCallback); } abstract Meter getRepairMeter(); @@ -139,7 +141,7 @@ public void startRepair(DigestResolver digestResolver, Consumer resolver = new DataResolver<>(command, replicaPlan, this, requestTime, trackRepairedStatus); + DataResolver resolver = new DataResolver<>(coordinator, command, replicaPlan, this, requestTime, trackRepairedStatus); ReadCallback readCallback = new ReadCallback<>(resolver, command, replicaPlan, requestTime); digestRepair = new DigestRepair<>(resolver, readCallback, resultConsumer); diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java index 61b529ca003b..973fbcb45dfa 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java @@ -21,9 +21,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; - -import org.apache.cassandra.utils.concurrent.AsyncFuture; -import org.apache.cassandra.utils.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; @@ -38,27 +35,32 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.InOurDc; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.locator.Replicas; -import org.apache.cassandra.locator.InOurDc; import org.apache.cassandra.metrics.ReadRepairMetrics; -import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.reads.ReadCoordinator; +import org.apache.cassandra.service.reads.repair.BlockingReadRepair.PendingPartitionRepair; import org.apache.cassandra.tracing.Tracing; +import org.apache.cassandra.utils.concurrent.AsyncFuture; +import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; -import static org.apache.cassandra.net.Verb.*; +import static org.apache.cassandra.net.Verb.READ_REPAIR_REQ; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.concurrent.CountDownLatch.newCountDownLatch; import static com.google.common.collect.Iterables.all; public class BlockingPartitionRepair - extends AsyncFuture implements RequestCallback + extends AsyncFuture implements RequestCallback, PendingPartitionRepair { + private final ReadCoordinator coordinator; private final DecoratedKey key; private final ReplicaPlan.ForWrite repairPlan; private final Map pendingRepairs; @@ -66,8 +68,10 @@ public class BlockingPartitionRepair private final int blockFor; private volatile long mutationsSentTime; - public BlockingPartitionRepair(DecoratedKey key, Map repairs, ReplicaPlan.ForWrite repairPlan) + @VisibleForTesting + public BlockingPartitionRepair(ReadCoordinator coordinator, DecoratedKey key, Map repairs, ReplicaPlan.ForWrite repairPlan) { + this.coordinator = coordinator; this.key = key; this.pendingRepairs = new ConcurrentHashMap<>(repairs); this.repairPlan = repairPlan; @@ -99,18 +103,21 @@ public BlockingPartitionRepair(DecoratedKey key, Map repairs, latch = newCountDownLatch(Math.max(blockFor, 0)); } + @Override public ReplicaPlan.ForWrite repairPlan() { return repairPlan; } - int blockFor() + @Override + public int blockFor() { return blockFor; } @VisibleForTesting - int waitingOn() + @Override + public int waitingOn() { return latch.count(); } @@ -147,7 +154,7 @@ private PartitionUpdate mergeUnackedUpdates() @VisibleForTesting protected void sendRR(Message message, InetAddressAndPort endpoint) { - MessagingService.instance().sendWithCallback(message, endpoint, this); + coordinator.sendReadRepairMutation(message, endpoint, this); } public void sendInitialRepairs() @@ -159,7 +166,7 @@ public void sendInitialRepairs() { Replica destination = entry.getKey(); Preconditions.checkArgument(destination.isFull(), "Can't send repairs to transient replicas: %s", destination); - Mutation mutation = entry.getValue(); + Mutation mutation = coordinator.maybeAllowOutOfRangeMutations(entry.getValue()); TableId tableId = extractUpdate(mutation).metadata().id; Tracing.trace("Sending read-repair-mutation to {}", destination); @@ -177,6 +184,7 @@ public void sendInitialRepairs() * @param timeUnit the time unit of the future time * @return true if repair is done; otherwise, false. */ + @Override public boolean awaitRepairsUntil(long timeoutAt, TimeUnit timeUnit) { long timeoutAtNanos = timeUnit.toNanos(timeoutAt); @@ -191,18 +199,18 @@ public boolean awaitRepairsUntil(long timeoutAt, TimeUnit timeUnit) } } + @Override + public boolean awaitRepairs(long remaining, TimeUnit timeUnit) throws InterruptedException + { + return latch.await(remaining, timeUnit); + } + private static int msgVersionIdx(int version) { return version - MessagingService.minimum_version; } - /** - * If it looks like we might not receive acks for all the repair mutations we sent out, combine all - * the unacked mutations and send them to the minority of nodes not involved in the read repair data - * read / write cycle. We will accept acks from them in lieu of acks from the initial mutations sent - * out, so long as we receive the same number of acks as repair mutations transmitted. This prevents - * misbehaving nodes from killing a quorum read, while continuing to guarantee monotonic quorum reads - */ + @Override public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) { if (awaitRepairsUntil(timeout + timeoutUnit.convert(mutationsSentTime, TimeUnit.NANOSECONDS), timeoutUnit)) @@ -242,6 +250,7 @@ public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) continue; } + mutation = coordinator.maybeAllowOutOfRangeMutations(mutation); Tracing.trace("Sending speculative read-repair-mutation to {}", replica); sendRR(Message.out(READ_REPAIR_REQ, mutation), replica.endpoint()); ReadRepairDiagnostics.speculatedWrite(this, replica.endpoint(), mutation); diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java index 4a56e6fe18bd..a1e90fd96551 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java @@ -18,30 +18,55 @@ package org.apache.cassandra.service.reads.repair; +import java.util.Collection; import java.util.Map; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; -import org.apache.cassandra.db.DecoratedKey; +import com.google.common.util.concurrent.UncheckedExecutionException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.primitives.Keys; +import accord.primitives.Txn; import com.codahale.metrics.Meter; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.locator.ReplicaPlan.ForWrite; import org.apache.cassandra.metrics.ReadRepairMetrics; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.service.accord.txn.UnrecoverableRepairUpdate; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; /** * 'Classic' read repair. Doesn't allow the client read to return until @@ -53,13 +78,66 @@ public class BlockingReadRepair, P extends ReplicaPlan.Fo { private static final Logger logger = LoggerFactory.getLogger(BlockingReadRepair.class); - protected final Queue repairs = new ConcurrentLinkedQueue<>(); + protected final Queue repairs = new ConcurrentLinkedQueue<>(); - BlockingReadRepair(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + interface PendingPartitionRepair { - super(command, replicaPlan, requestTime); + + /** + * Wait for the repair to complete util a future time + * If the {@param timeoutAt} is a past time, the method returns immediately with the repair result. + * @param timeoutAt future time + * @param timeUnit the time unit of the future time + * @return true if repair is done; otherwise, false. + */ + default boolean awaitRepairsUntil(long timeoutAt, TimeUnit timeUnit) + { + long timeoutAtNanos = timeUnit.toNanos(timeoutAt); + long remaining = timeoutAtNanos - nanoTime(); + try + { + return awaitRepairs(remaining, timeUnit); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new UncheckedExecutionException(e); + } + } + + boolean awaitRepairs(long remaining, TimeUnit timeUnit) throws InterruptedException, ExecutionException; + + /** + * If it looks like we might not receive acks for all the repair mutations we sent out, combine all + * the unacked mutations and send them to the minority of nodes not involved in the read repair data + * read / write cycle. We will accept acks from them in lieu of acks from the initial mutations sent + * out, so long as we receive the same number of acks as repair mutations transmitted. This prevents + * misbehaving nodes from killing a quorum read, while continuing to guarantee monotonic quorum reads + */ + default void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) {} + + default int blockFor() + { + return -1; + } + + default int waitingOn() + { + return -1; + } + + ForWrite repairPlan(); } + BlockingReadRepair(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + { + super(coordinator, command, replicaPlan, requestTime); + } + + @Override public UnfilteredPartitionIterators.MergeListener getMergeListener(P replicaPlan) { return new PartitionIteratorMergeListener<>(replicaPlan, command, this); @@ -74,7 +152,7 @@ Meter getRepairMeter() @Override public void maybeSendAdditionalWrites() { - for (BlockingPartitionRepair repair: repairs) + for (PendingPartitionRepair repair: repairs) { repair.maybeSendAdditionalWrites(cfs.additionalWriteLatencyMicros, MICROSECONDS); } @@ -83,10 +161,10 @@ public void maybeSendAdditionalWrites() @Override public void awaitWrites() { - BlockingPartitionRepair timedOut = null; + PendingPartitionRepair timedOut = null; ReplicaPlan.ForWrite repairPlan = null; - for (BlockingPartitionRepair repair : repairs) + for (PendingPartitionRepair repair : repairs) { long deadline = requestTime.computeDeadline(DatabaseDescriptor.getReadRpcTimeout(NANOSECONDS)); @@ -116,10 +194,80 @@ public void awaitWrites() } @Override - public void repairPartition(DecoratedKey partitionKey, Map mutations, ReplicaPlan.ForWrite writePlan) + public void repairPartition(DecoratedKey dk, Map mutations, ReplicaPlan.ForWrite writePlan) + { + NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); + if (coordinator.isEventuallyConsistent() && (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord + || nonSerialWriteStrategy.blockingReadRepairThroughAccord)) + { + Collection partitionUpdates = Mutation.merge(mutations.values()).getPartitionUpdates(); + checkState(partitionUpdates.size() == 1, "Expect only one PartitionUpdate"); + PartitionUpdate update = partitionUpdates.iterator().next(); + PartitionKey partitionKey = PartitionKey.of(update); + Keys key = Keys.of(partitionKey); + // This is going create a new BlockingReadRepair inside an Accord transaction which will go down + // the !isEventuallyConsistent path and apply the repairs through Accord command stores using AccordInteropExecution + UnrecoverableRepairUpdate repairUpdate = UnrecoverableRepairUpdate.create(AccordService.instance().nodeId(), this, key, dk, mutations, writePlan); + Future repairFuture; + try + { + Txn txn = new Txn.InMemory(key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); + repairFuture = Stage.ACCORD_MIGRATION.submit(() -> { + try + { + return AccordService.instance().coordinate(txn, ConsistencyLevel.ANY, requestTime); + } + finally + { + // If we successfully ran the repair txn then the update should definitely + // be there for us to clear which means we are sure it was there to be sent + checkNotNull(UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey)); + } + }); + } + catch (Throwable t) + { + UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey); + throw t; + } + + repairs.add(new PendingPartitionRepair() + { + @Override + public boolean awaitRepairs(long remaining, TimeUnit timeUnit) throws InterruptedException, ExecutionException + { + try + { + repairFuture.get(remaining, timeUnit); + return true; + } + catch (TimeoutException e) + { + + return false; + } + } + + @Override + public ForWrite repairPlan() + { + return writePlan; + } + }); + } + else + { + BlockingPartitionRepair blockingRepair = new BlockingPartitionRepair(coordinator, dk, mutations, writePlan); + blockingRepair.sendInitialRepairs(); + repairs.add(blockingRepair); + } + } + + public void repairPartitionDirectly(ReadCoordinator readCoordinator, DecoratedKey dk, Map mutations, ForWrite writePlan) { - BlockingPartitionRepair blockingRepair = new BlockingPartitionRepair(partitionKey, mutations, writePlan); - blockingRepair.sendInitialRepairs(); - repairs.add(blockingRepair); + ReadRepair delegateRR = ReadRepairStrategy.BLOCKING.create(readCoordinator, command, replicaPlan, requestTime); + delegateRR.repairPartition(dk, mutations, writePlan); + delegateRR.maybeSendAdditionalWrites(); + delegateRR.awaitWrites(); } } diff --git a/src/java/org/apache/cassandra/service/reads/repair/NoopReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/NoopReadRepair.java index 5cf72b33cf85..2aad00bc9d85 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/NoopReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/NoopReadRepair.java @@ -30,6 +30,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.service.reads.DigestResolver; +import org.apache.cassandra.service.reads.ReadCoordinator; /** * Bypasses the read repair path for short read protection and testing @@ -79,4 +80,11 @@ public void repairPartition(DecoratedKey partitionKey, Map mu { } + + @Override + public void repairPartitionDirectly(ReadCoordinator coordinator, DecoratedKey partitionKey, Map mutations, ReplicaPlan.ForWrite writePlan) + { + // Shouldn't be possible to invoke this since repairPartition is a no op + throw new UnsupportedOperationException(); + } } diff --git a/src/java/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepair.java index 46b30a927935..3a91cf67fcb0 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepair.java @@ -28,8 +28,10 @@ import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.locator.ReplicaPlan.ForWrite; import org.apache.cassandra.metrics.ReadRepairMetrics; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.reads.ReadCoordinator; /** * Only performs the collection of data responses and reconciliation of them, doesn't send repair mutations @@ -38,9 +40,9 @@ public class ReadOnlyReadRepair, P extends ReplicaPlan.ForRead> extends AbstractReadRepair { - ReadOnlyReadRepair(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + ReadOnlyReadRepair(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - super(command, replicaPlan, requestTime); + super(coordinator, command, replicaPlan, requestTime); } @Override @@ -67,6 +69,12 @@ public void repairPartition(DecoratedKey partitionKey, Map mu throw new UnsupportedOperationException("ReadOnlyReadRepair shouldn't be trying to repair partitions"); } + @Override + public void repairPartitionDirectly(ReadCoordinator coordinator, DecoratedKey partitionKey, Map mutations, ForWrite writePlan) + { + throw new UnsupportedOperationException("ReadOnlyReadRepair shouldn't be trying to repair partitions"); + } + @Override public void awaitWrites() { diff --git a/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java index a63cc7f6bfca..bff068a6d77e 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java @@ -19,32 +19,33 @@ import java.util.Map; import java.util.function.Consumer; +import javax.annotation.Nullable; import org.apache.cassandra.db.DecoratedKey; -import org.apache.cassandra.locator.Endpoints; - import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.partitions.PartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.service.reads.DigestResolver; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.reads.ReadCoordinator; public interface ReadRepair, P extends ReplicaPlan.ForRead> { public interface Factory { , P extends ReplicaPlan.ForRead> - ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime); + ReadRepair create(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime); } static , P extends ReplicaPlan.ForRead> - ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + ReadRepair create(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - return command.metadata().params.readRepair.create(command, replicaPlan, requestTime); + return command.metadata().params.readRepair.create(coordinator, command, replicaPlan, requestTime); } /** @@ -58,7 +59,7 @@ ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPla * @param digestResolver supplied so we can get the original data response * @param resultConsumer hook for the repair to set it's result on completion */ - public void startRepair(DigestResolver digestResolver, Consumer resultConsumer); + public void startRepair(DigestResolver digestResolver, @Nullable Consumer resultConsumer); /** * Block on the reads (or timeout) sent out in {@link ReadRepair#startRepair} @@ -94,4 +95,9 @@ ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPla * we will block repair only on the replicas that have responded. */ void repairPartition(DecoratedKey partitionKey, Map mutations, ReplicaPlan.ForWrite writePlan); + + /** + * Repairs a partition using the provided read coordinator + */ + void repairPartitionDirectly(ReadCoordinator coordinator, DecoratedKey partitionKey, Map mutations, ReplicaPlan.ForWrite writePlan); } diff --git a/src/java/org/apache/cassandra/service/reads/repair/ReadRepairStrategy.java b/src/java/org/apache/cassandra/service/reads/repair/ReadRepairStrategy.java index 22615494a748..7f8d861888a5 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/ReadRepairStrategy.java +++ b/src/java/org/apache/cassandra/service/reads/repair/ReadRepairStrategy.java @@ -22,6 +22,7 @@ import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.reads.ReadCoordinator; import static org.apache.cassandra.utils.LocalizeString.toUpperCaseLocalized; @@ -30,18 +31,18 @@ public enum ReadRepairStrategy implements ReadRepair.Factory NONE { public , P extends ReplicaPlan.ForRead> - ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + ReadRepair create(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - return new ReadOnlyReadRepair<>(command, replicaPlan, requestTime); + return new ReadOnlyReadRepair<>(coordinator, command, replicaPlan, requestTime); } }, BLOCKING { public , P extends ReplicaPlan.ForRead> - ReadRepair create(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) + ReadRepair create(ReadCoordinator coordinator, ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - return new BlockingReadRepair<>(command, replicaPlan, requestTime); + return new BlockingReadRepair<>(coordinator, command, replicaPlan, requestTime); } }; diff --git a/src/java/org/apache/cassandra/streaming/OutgoingStream.java b/src/java/org/apache/cassandra/streaming/OutgoingStream.java index cc42ab6b8235..77386a622552 100644 --- a/src/java/org/apache/cassandra/streaming/OutgoingStream.java +++ b/src/java/org/apache/cassandra/streaming/OutgoingStream.java @@ -19,7 +19,10 @@ package org.apache.cassandra.streaming; import java.io.IOException; +import java.util.List; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.utils.TimeUUID; @@ -54,4 +57,5 @@ public interface OutgoingStream long getEstimatedSize(); TableId getTableId(); int getNumFiles(); + List> ranges(); } diff --git a/src/java/org/apache/cassandra/streaming/SessionSummary.java b/src/java/org/apache/cassandra/streaming/SessionSummary.java index 9588e4918ffd..f5bcfa31be40 100644 --- a/src/java/org/apache/cassandra/streaming/SessionSummary.java +++ b/src/java/org/apache/cassandra/streaming/SessionSummary.java @@ -25,7 +25,8 @@ import java.util.List; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.IPartitionerDependentSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; @@ -78,7 +79,7 @@ public int hashCode() return result; } - public static IVersionedSerializer serializer = new IVersionedSerializer() + public static IPartitionerDependentSerializer serializer = new IPartitionerDependentSerializer() { public void serialize(SessionSummary summary, DataOutputPlus out, int version) throws IOException { @@ -98,7 +99,7 @@ public void serialize(SessionSummary summary, DataOutputPlus out, int version) t } } - public SessionSummary deserialize(DataInputPlus in, int version) throws IOException + public SessionSummary deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException { InetAddressAndPort coordinator = inetAddressAndPortSerializer.deserialize(in, version); InetAddressAndPort peer = inetAddressAndPortSerializer.deserialize(in, version); @@ -107,14 +108,14 @@ public SessionSummary deserialize(DataInputPlus in, int version) throws IOExcept List receivingSummaries = new ArrayList<>(numRcvd); for (int i=0; i sendingSummaries = new ArrayList<>(numRcvd); for (int i=0; i> ranges; + + public StreamReceiveTask(StreamSession session, TableId tableId, List> ranges, int totalStreams, long totalSize) { super(session, tableId); - this.receiver = ColumnFamilyStore.getIfExists(tableId).getStreamManager().createStreamReceiver(session, totalStreams); + Range.assertNormalized(ranges); + this.receiver = ColumnFamilyStore.getIfExists(tableId).getStreamManager().createStreamReceiver(session, ranges, totalStreams); this.totalStreams = totalStreams; this.totalSize = totalSize; + this.ranges = ranges; } /** @@ -164,6 +172,12 @@ public synchronized void abort() receiver.abort(); } + @Override + protected List> ranges() + { + return ranges; + } + @VisibleForTesting public static void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { diff --git a/src/java/org/apache/cassandra/streaming/StreamSession.java b/src/java/org/apache/cassandra/streaming/StreamSession.java index 217c70f0586f..050e37c749c2 100644 --- a/src/java/org/apache/cassandra/streaming/StreamSession.java +++ b/src/java/org/apache/cassandra/streaming/StreamSession.java @@ -36,7 +36,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -45,13 +44,12 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import io.netty.channel.Channel; import io.netty.util.concurrent.Future; //checkstyle: permit this import -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; @@ -72,7 +70,17 @@ import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.async.StreamingMultiplexedChannel; -import org.apache.cassandra.streaming.messages.*; +import org.apache.cassandra.streaming.messages.CompleteMessage; +import org.apache.cassandra.streaming.messages.IncomingStreamMessage; +import org.apache.cassandra.streaming.messages.OutgoingStreamMessage; +import org.apache.cassandra.streaming.messages.PrepareAckMessage; +import org.apache.cassandra.streaming.messages.PrepareSynAckMessage; +import org.apache.cassandra.streaming.messages.PrepareSynMessage; +import org.apache.cassandra.streaming.messages.ReceivedMessage; +import org.apache.cassandra.streaming.messages.SessionFailedMessage; +import org.apache.cassandra.streaming.messages.StreamInitMessage; +import org.apache.cassandra.streaming.messages.StreamMessage; +import org.apache.cassandra.streaming.messages.StreamMessageHeader; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.NoSpamLogger; @@ -454,6 +462,7 @@ synchronized void addTransferRanges(String keyspace, RangesAtEndpoint replicas, //Range and if it's transient RangesAtEndpoint unwrappedRanges = replicas.unwrap(); List streams = getOutgoingStreamsForRanges(unwrappedRanges, stores, pendingRepair, previewKind); + addTransferStreams(streams); Set> toBeUpdated = transferredRangesPerKeyspace.get(keyspace); if (toBeUpdated == null) @@ -735,7 +744,7 @@ else if (e instanceof TransactionAlreadyCompletedException && isFailedOrAborted( if (channel.connected()) { - state(State.FAILED); // make sure subsequent error handling sees the session in a final state + state(State.FAILED); // make sure subsequent error handling sees the session in a final state sendControlMessage(new SessionFailedMessage()).awaitUninterruptibly(); } StringBuilder failureReason = new StringBuilder("Failed because of an unknown exception\n"); @@ -1260,7 +1269,7 @@ public synchronized void prepareReceiving(StreamSummary summary) { failIfFinished(); if (summary.files > 0) - receivers.put(summary.tableId, new StreamReceiveTask(this, summary.tableId, summary.files, summary.totalSize)); + receivers.put(summary.tableId, new StreamReceiveTask(this, summary.tableId, summary.ranges, summary.files, summary.totalSize)); } private void startStreamingFiles(@Nullable PrepareDirection prepareDirection) diff --git a/src/java/org/apache/cassandra/streaming/StreamSummary.java b/src/java/org/apache/cassandra/streaming/StreamSummary.java index 3f957c69a78c..34aa56c66d02 100644 --- a/src/java/org/apache/cassandra/streaming/StreamSummary.java +++ b/src/java/org/apache/cassandra/streaming/StreamSummary.java @@ -19,23 +19,31 @@ import java.io.IOException; import java.io.Serializable; +import java.util.List; import com.google.common.base.Objects; +import com.google.common.collect.ImmutableList; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.IPartitionerDependentSerializer; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.utils.CollectionSerializers; /** * Summary of streaming. */ public class StreamSummary implements Serializable { - public static final IVersionedSerializer serializer = new StreamSummarySerializer(); + public static final IPartitionerDependentSerializer serializer = new StreamSummarySerializer(); public final TableId tableId; + public final List> ranges; /** * Number of files to transfer. Can be 0 if nothing to transfer for some streaming request. @@ -43,9 +51,10 @@ public class StreamSummary implements Serializable public final int files; public final long totalSize; - public StreamSummary(TableId tableId, int files, long totalSize) + public StreamSummary(TableId tableId, List> ranges, int files, long totalSize) { this.tableId = tableId; + this.ranges = ranges; this.files = files; this.totalSize = totalSize; } @@ -56,13 +65,13 @@ public boolean equals(Object o) if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; StreamSummary summary = (StreamSummary) o; - return files == summary.files && totalSize == summary.totalSize && tableId.equals(summary.tableId); + return files == summary.files && totalSize == summary.totalSize && tableId.equals(summary.tableId) && ranges.equals(summary.ranges); } @Override public int hashCode() { - return Objects.hashCode(tableId, files, totalSize); + return Objects.hashCode(tableId, ranges, files, totalSize); } @Override @@ -70,27 +79,33 @@ public String toString() { final StringBuilder sb = new StringBuilder("StreamSummary{"); sb.append("path=").append(tableId); + sb.append(", ranges=").append(ranges); sb.append(", files=").append(files); sb.append(", totalSize=").append(totalSize); sb.append('}'); return sb.toString(); } - public static class StreamSummarySerializer implements IVersionedSerializer + public static class StreamSummarySerializer implements IPartitionerDependentSerializer { public void serialize(StreamSummary summary, DataOutputPlus out, int version) throws IOException { summary.tableId.serialize(out); out.writeInt(summary.files); out.writeLong(summary.totalSize); + if (version >= MessagingService.VERSION_51) + CollectionSerializers.serializeCollection(summary.ranges, out, version, Range.rangeSerializer); } - public StreamSummary deserialize(DataInputPlus in, int version) throws IOException + public StreamSummary deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException { TableId tableId = TableId.deserialize(in); int files = in.readInt(); long totalSize = in.readLong(); - return new StreamSummary(tableId, files, totalSize); + List> ranges = ImmutableList.of(); + if (version >= MessagingService.VERSION_51) + ranges = CollectionSerializers.deserializeList(in, p, version, Range.rangeSerializer); + return new StreamSummary(tableId, ranges, files, totalSize); } public long serializedSize(StreamSummary summary, int version) @@ -98,6 +113,8 @@ public long serializedSize(StreamSummary summary, int version) long size = summary.tableId.serializedSize(); size += TypeSizes.sizeof(summary.files); size += TypeSizes.sizeof(summary.totalSize); + if (version >= MessagingService.VERSION_51) + size += CollectionSerializers.serializedCollectionSize(summary.ranges, version, Range.rangeSerializer); return size; } } diff --git a/src/java/org/apache/cassandra/streaming/StreamTask.java b/src/java/org/apache/cassandra/streaming/StreamTask.java index 1e22c34ce9c9..886257fda7b8 100644 --- a/src/java/org/apache/cassandra/streaming/StreamTask.java +++ b/src/java/org/apache/cassandra/streaming/StreamTask.java @@ -17,6 +17,10 @@ */ package org.apache.cassandra.streaming; +import java.util.List; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableId; /** @@ -51,11 +55,13 @@ protected StreamTask(StreamSession session, TableId tableId) */ public abstract void abort(); + protected abstract List> ranges(); + /** * @return StreamSummary that describes this task */ public StreamSummary getSummary() { - return new StreamSummary(tableId, getTotalNumberOfFiles(), getTotalSize()); + return new StreamSummary(tableId, ranges(), getTotalNumberOfFiles(), getTotalSize()); } } diff --git a/src/java/org/apache/cassandra/streaming/StreamTransferTask.java b/src/java/org/apache/cassandra/streaming/StreamTransferTask.java index 0721316ccbde..8b4fe1f1bd62 100644 --- a/src/java/org/apache/cassandra/streaming/StreamTransferTask.java +++ b/src/java/org/apache/cassandra/streaming/StreamTransferTask.java @@ -20,7 +20,10 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -33,6 +36,8 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.streaming.messages.OutgoingStreamMessage; import org.apache.cassandra.utils.ExecutorUtils; @@ -57,6 +62,8 @@ public class StreamTransferTask extends StreamTask private long totalSize = 0; private int totalFiles = 0; + private final Set> ranges = new HashSet<>(); + public StreamTransferTask(StreamSession session, TableId tableId) { super(session, tableId); @@ -70,6 +77,7 @@ public synchronized void addTransferStream(OutgoingStream stream) streams.put(message.header.sequenceNumber, message); totalSize += message.stream.getEstimatedSize(); totalFiles += message.stream.getNumFiles(); + ranges.addAll(stream.ranges()); } /** @@ -149,6 +157,12 @@ public synchronized void abort() } } + @Override + protected List> ranges() + { + return Range.normalize(ranges); + } + public synchronized int getTotalNumberOfFiles() { return totalFiles; diff --git a/src/java/org/apache/cassandra/streaming/TableStreamManager.java b/src/java/org/apache/cassandra/streaming/TableStreamManager.java index 208dc344a926..d19064c9577e 100644 --- a/src/java/org/apache/cassandra/streaming/TableStreamManager.java +++ b/src/java/org/apache/cassandra/streaming/TableStreamManager.java @@ -19,7 +19,10 @@ package org.apache.cassandra.streaming; import java.util.Collection; +import java.util.List; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.streaming.messages.StreamMessageHeader; import org.apache.cassandra.utils.TimeUUID; @@ -36,7 +39,7 @@ public interface TableStreamManager /** * Creates a {@link StreamReceiver} for the given session, expecting the given number of streams */ - StreamReceiver createStreamReceiver(StreamSession session, int totalStreams); + StreamReceiver createStreamReceiver(StreamSession session, List> ranges, int totalStreams); /** * Creates an {@link IncomingStream} for the given header diff --git a/src/java/org/apache/cassandra/streaming/management/StreamSummaryCompositeData.java b/src/java/org/apache/cassandra/streaming/management/StreamSummaryCompositeData.java index 05a0afcfd4a1..c79c1a9a1f0e 100644 --- a/src/java/org/apache/cassandra/streaming/management/StreamSummaryCompositeData.java +++ b/src/java/org/apache/cassandra/streaming/management/StreamSummaryCompositeData.java @@ -19,7 +19,14 @@ import java.util.HashMap; import java.util.Map; -import javax.management.openmbean.*; +import javax.management.openmbean.CompositeData; +import javax.management.openmbean.CompositeDataSupport; +import javax.management.openmbean.CompositeType; +import javax.management.openmbean.OpenDataException; +import javax.management.openmbean.OpenType; +import javax.management.openmbean.SimpleType; + +import com.google.common.collect.ImmutableList; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.streaming.StreamSummary; @@ -75,6 +82,7 @@ public static StreamSummary fromCompositeData(CompositeData cd) { Object[] values = cd.getAll(ITEM_NAMES); return new StreamSummary(TableId.fromString((String) values[0]), + ImmutableList.of(), (int) values[1], (long) values[2]); } diff --git a/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java b/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java index bf3526663c2b..86620c38594d 100644 --- a/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java @@ -17,15 +17,16 @@ */ package org.apache.cassandra.streaming.messages; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class CompleteMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public CompleteMessage deserialize(DataInputPlus in, int version) + public CompleteMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) { return new CompleteMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java index ff1e61fd598b..4ee726ee83b1 100644 --- a/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java @@ -21,20 +21,20 @@ import java.util.Objects; import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; - import org.apache.cassandra.streaming.IncomingStream; -import org.apache.cassandra.streaming.StreamingChannel; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamManager; import org.apache.cassandra.streaming.StreamReceiveException; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingChannel; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class IncomingStreamMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public IncomingStreamMessage deserialize(DataInputPlus input, int version) throws IOException + public IncomingStreamMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException { StreamMessageHeader header = StreamMessageHeader.serializer.deserialize(input, version); StreamSession session = StreamManager.instance.findSession(header.sender, header.planId, header.sessionIndex, header.sendByFollower); diff --git a/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java b/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java index a09cfcae8200..928783f4014a 100644 --- a/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java @@ -17,9 +17,10 @@ */ package org.apache.cassandra.streaming.messages; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class KeepAliveMessage extends StreamMessage { @@ -37,7 +38,7 @@ public String toString() public static Serializer serializer = new Serializer() { - public KeepAliveMessage deserialize(DataInputPlus in, int version) + public KeepAliveMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) { return new KeepAliveMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java index 4128ddb4b0fe..dcd3b755e8ab 100644 --- a/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java @@ -21,18 +21,19 @@ import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.streaming.OutgoingStream; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.utils.FBUtilities; public class OutgoingStreamMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public OutgoingStreamMessage deserialize(DataInputPlus in, int version) + public OutgoingStreamMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) { throw new UnsupportedOperationException("Not allowed to call deserialize on an outgoing stream"); } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java index 479ef3424db0..72d61d29cb2b 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java @@ -20,9 +20,10 @@ import java.io.IOException; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class PrepareAckMessage extends StreamMessage { @@ -33,7 +34,7 @@ public void serialize(PrepareAckMessage message, StreamingDataOutputPlus out, in //nop } - public PrepareAckMessage deserialize(DataInputPlus in, int version) throws IOException + public PrepareAckMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException { return new PrepareAckMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java index 9d97de69fac7..e29e651824b4 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java @@ -22,10 +22,11 @@ import java.util.ArrayList; import java.util.Collection; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamSummary; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class PrepareSynAckMessage extends StreamMessage { @@ -38,12 +39,12 @@ public void serialize(PrepareSynAckMessage message, StreamingDataOutputPlus out, StreamSummary.serializer.serialize(summary, out, version); } - public PrepareSynAckMessage deserialize(DataInputPlus input, int version) throws IOException + public PrepareSynAckMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException { PrepareSynAckMessage message = new PrepareSynAckMessage(); int numSummaries = input.readInt(); for (int i = 0; i < numSummaries; i++) - message.summaries.add(StreamSummary.serializer.deserialize(input, version)); + message.summaries.add(StreamSummary.serializer.deserialize(input, partitioner, version)); return message; } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java index 1160033bd3ae..e901365e5ec3 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java @@ -17,21 +17,22 @@ */ package org.apache.cassandra.streaming.messages; -import java.io.*; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamRequest; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamSummary; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class PrepareSynMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public PrepareSynMessage deserialize(DataInputPlus input, int version) throws IOException + public PrepareSynMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException { PrepareSynMessage message = new PrepareSynMessage(); // requests @@ -41,7 +42,7 @@ public PrepareSynMessage deserialize(DataInputPlus input, int version) throws IO // summaries int numSummaries = input.readInt(); for (int i = 0; i < numSummaries; i++) - message.summaries.add(StreamSummary.serializer.deserialize(input, version)); + message.summaries.add(StreamSummary.serializer.deserialize(input, partitioner, version)); return message; } diff --git a/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java b/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java index 378f72f896da..c6b7a0f638aa 100644 --- a/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java @@ -17,18 +17,19 @@ */ package org.apache.cassandra.streaming.messages; -import java.io.*; +import java.io.IOException; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class ReceivedMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public ReceivedMessage deserialize(DataInputPlus input, int version) throws IOException + public ReceivedMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException { return new ReceivedMessage(TableId.deserialize(input), input.readInt()); } diff --git a/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java b/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java index f09b64327e05..f05be58aa684 100644 --- a/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java @@ -17,15 +17,16 @@ */ package org.apache.cassandra.streaming.messages; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; public class SessionFailedMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public SessionFailedMessage deserialize(DataInputPlus in, int version) + public SessionFailedMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) { return new SessionFailedMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java b/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java index 889c732f0fc1..2fd65d7dff0d 100644 --- a/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java @@ -20,14 +20,15 @@ import java.io.IOException; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.streaming.StreamingChannel; -import org.apache.cassandra.streaming.StreamingDataOutputPlus; -import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.streaming.StreamResultFuture; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.StreamingChannel; +import org.apache.cassandra.streaming.StreamingDataOutputPlus; import org.apache.cassandra.utils.TimeUUID; import static org.apache.cassandra.locator.InetAddressAndPort.Serializer.inetAddressAndPortSerializer; @@ -93,7 +94,7 @@ public void serialize(StreamInitMessage message, StreamingDataOutputPlus out, in out.writeInt(message.previewKind.getSerializationVal()); } - public StreamInitMessage deserialize(DataInputPlus in, int version) throws IOException + public StreamInitMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException { InetAddressAndPort from = inetAddressAndPortSerializer.deserialize(in, version); int sessionIndex = in.readInt(); diff --git a/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java index db393a54347f..6e5dc08f8815 100644 --- a/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java @@ -21,10 +21,11 @@ import java.util.HashMap; import java.util.Map; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingChannel; import org.apache.cassandra.streaming.StreamingDataOutputPlus; -import org.apache.cassandra.streaming.StreamSession; /** * StreamMessage is an abstract base class that every messages in streaming protocol inherit. @@ -44,16 +45,16 @@ public static long serializedSize(StreamMessage message, int version) throws IOE return 1 + message.type.outSerializer.serializedSize(message, version); } - public static StreamMessage deserialize(DataInputPlus in, int version) throws IOException + public static StreamMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException { Type type = Type.lookupById(in.readByte()); - return type.inSerializer.deserialize(in, version); + return type.inSerializer.deserialize(in, partitioner, version); } /** StreamMessage serializer */ public static interface Serializer { - V deserialize(DataInputPlus in, int version) throws IOException; + V deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException; void serialize(V message, StreamingDataOutputPlus out, int version, StreamSession session) throws IOException; long serializedSize(V message, int version) throws IOException; } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 5d51cfd3e338..2b9dd0a04d9a 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -54,6 +54,9 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; import org.apache.cassandra.tcm.extensions.ExtensionKey; import org.apache.cassandra.tcm.extensions.ExtensionValue; import org.apache.cassandra.tcm.membership.Directory; @@ -97,6 +100,7 @@ public class ClusterMetadata public final AccordKeyspaces accordKeyspaces; public final LockedRanges lockedRanges; public final InProgressSequences inProgressSequences; + public final ConsensusMigrationState consensusMigrationState; public final ImmutableMap, ExtensionValue> extensions; // This isn't serialized as part of ClusterMetadata it's really just a view over the Directory. @@ -132,6 +136,7 @@ public ClusterMetadata(IPartitioner partitioner, Directory directory, Distribute AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, + ConsensusMigrationState.EMPTY, ImmutableMap.of()); } @@ -144,6 +149,7 @@ public ClusterMetadata(Epoch epoch, AccordKeyspaces accordKeyspaces, LockedRanges lockedRanges, InProgressSequences inProgressSequences, + ConsensusMigrationState consensusMigrationState, Map, ExtensionValue> extensions) { this(EMPTY_METADATA_IDENTIFIER, @@ -156,6 +162,7 @@ public ClusterMetadata(Epoch epoch, accordKeyspaces, lockedRanges, inProgressSequences, + consensusMigrationState, extensions); } @@ -169,6 +176,7 @@ private ClusterMetadata(int metadataIdentifier, AccordKeyspaces accordKeyspaces, LockedRanges lockedRanges, InProgressSequences inProgressSequences, + ConsensusMigrationState consensusMigrationState, Map, ExtensionValue> extensions) { // TODO: token map is a feature of the specific placement strategy, and so may not be a relevant component of @@ -185,6 +193,7 @@ private ClusterMetadata(int metadataIdentifier, this.accordKeyspaces = accordKeyspaces; this.lockedRanges = lockedRanges; this.inProgressSequences = inProgressSequences; + this.consensusMigrationState = consensusMigrationState; this.extensions = ImmutableMap.copyOf(extensions); this.locator = Locator.usingDirectory(directory); } @@ -241,6 +250,7 @@ public ClusterMetadata forceEpoch(Epoch epoch) capLastModified(accordKeyspaces, epoch), capLastModified(lockedRanges, epoch), capLastModified(inProgressSequences, epoch), + capLastModified(consensusMigrationState, epoch), capLastModified(extensions, epoch)); } @@ -262,6 +272,7 @@ public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) accordKeyspaces, lockedRanges, inProgressSequences, + consensusMigrationState, extensions); } @@ -388,6 +399,7 @@ public static class Transformer private AccordKeyspaces accordKeyspaces; private LockedRanges lockedRanges; private InProgressSequences inProgressSequences; + private ConsensusMigrationState consensusMigrationState; private final Map, ExtensionValue> extensions; private final Set modifiedKeys; @@ -403,6 +415,7 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) this.accordKeyspaces = metadata.accordKeyspaces; this.lockedRanges = metadata.lockedRanges; this.inProgressSequences = metadata.inProgressSequences; + this.consensusMigrationState = metadata.consensusMigrationState; extensions = new HashMap<>(metadata.extensions); modifiedKeys = new HashSet<>(); } @@ -538,6 +551,31 @@ public Transformer with(InProgressSequences sequences) return this; } + public Transformer with(Map newTableMigrationStates) + { + return with(newTableMigrationStates, true); + } + + public Transformer with(Map newTableMigrationStates, + boolean addRemaining) + { + if (addRemaining) + { + ImmutableMap.Builder tableMigrationStatesBuilder = ImmutableMap.builder(); + consensusMigrationState.tableStates.entrySet() + .stream() + .filter(existingTMS -> !newTableMigrationStates.containsKey(existingTMS.getKey())) + .forEach(tableMigrationStatesBuilder::put); + tableMigrationStatesBuilder.putAll(newTableMigrationStates.entrySet()); + consensusMigrationState = new ConsensusMigrationState(Epoch.EMPTY, tableMigrationStatesBuilder.build()); + } + else + { + consensusMigrationState = new ConsensusMigrationState(Epoch.EMPTY, newTableMigrationStates); + } + return this; + } + public Transformer with(ExtensionKey key, ExtensionValue obj) { if (MetadataKeys.CORE_METADATA.contains(key)) @@ -630,6 +668,12 @@ public Transformed build() inProgressSequences = inProgressSequences.withLastModified(epoch); } + if (consensusMigrationState != base.consensusMigrationState) + { + modifiedKeys.add(MetadataKeys.CONSENSUS_MIGRATION_STATE); + consensusMigrationState = consensusMigrationState.withLastModified(epoch); + } + return new Transformed(new ClusterMetadata(base.metadataIdentifier, epoch, partitioner, @@ -640,6 +684,7 @@ public Transformed build() accordKeyspaces, lockedRanges, inProgressSequences, + consensusMigrationState, extensions), ImmutableSet.copyOf(modifiedKeys)); } @@ -656,6 +701,7 @@ public ClusterMetadata buildForGossipMode() accordKeyspaces, lockedRanges, inProgressSequences, + consensusMigrationState, extensions); } @@ -672,6 +718,7 @@ public String toString() ", placement=" + placements + ", lockedRanges=" + lockedRanges + ", inProgressSequences=" + inProgressSequences + + ", consensusMigrationState=" + consensusMigrationState + ", extensions=" + extensions + ", modifiedKeys=" + modifiedKeys + '}'; @@ -759,6 +806,7 @@ public String legacyToString() @Override public String toString() { + // TODO is this supposed to be missing fields? return "ClusterMetadata{" + "epoch=" + epoch + ", schema=" + schema + @@ -766,6 +814,7 @@ public String toString() ", tokenMap=" + tokenMap + ", placements=" + placements + ", lockedRanges=" + lockedRanges + + ", consensusMigrationState=" + lockedRanges + '}'; } @@ -780,8 +829,10 @@ public boolean equals(Object o) directory.equals(that.directory) && tokenMap.equals(that.tokenMap) && placements.equals(that.placements) && + accordKeyspaces.equals(that.accordKeyspaces) && lockedRanges.equals(that.lockedRanges) && inProgressSequences.equals(that.inProgressSequences) && + consensusMigrationState.equals(that.consensusMigrationState) && extensions.equals(that.extensions); } @@ -830,7 +881,7 @@ public void dumpDiff(ClusterMetadata other) @Override public int hashCode() { - return Objects.hash(epoch, schema, directory, tokenMap, placements, lockedRanges, inProgressSequences, extensions); + return Objects.hash(epoch, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public static ClusterMetadata current() @@ -910,6 +961,7 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers AccordKeyspaces.serializer.serialize(metadata.accordKeyspaces, out, version); LockedRanges.serializer.serialize(metadata.lockedRanges, out, version); InProgressSequences.serializer.serialize(metadata.inProgressSequences, out, version); + ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); out.writeInt(metadata.extensions.size()); for (Map.Entry, ExtensionValue> entry : metadata.extensions.entrySet()) { @@ -947,6 +999,7 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE AccordKeyspaces accordKeyspaces = AccordKeyspaces.serializer.deserialize(in, version); LockedRanges lockedRanges = LockedRanges.serializer.deserialize(in, version); InProgressSequences ips = InProgressSequences.serializer.deserialize(in, version); + ConsensusMigrationState consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); int items = in.readInt(); Map, ExtensionValue> extensions = new HashMap<>(items); for (int i = 0; i < items; i++) @@ -966,6 +1019,7 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE accordKeyspaces, lockedRanges, ips, + consensusMigrationState, extensions); } @@ -988,7 +1042,8 @@ public long serializedSize(ClusterMetadata metadata, Version version) DataPlacements.serializer.serializedSize(metadata.placements, version) + AccordKeyspaces.serializer.serializedSize(metadata.accordKeyspaces, version) + LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + - InProgressSequences.serializer.serializedSize(metadata.inProgressSequences, version); + InProgressSequences.serializer.serializedSize(metadata.inProgressSequences, version) + + ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version); return size; } diff --git a/src/java/org/apache/cassandra/tcm/Epoch.java b/src/java/org/apache/cassandra/tcm/Epoch.java index 0d070b4a5ba2..d15030e3ec9a 100644 --- a/src/java/org/apache/cassandra/tcm/Epoch.java +++ b/src/java/org/apache/cassandra/tcm/Epoch.java @@ -87,6 +87,11 @@ public static Epoch max(Epoch l, Epoch r) return l.compareTo(r) > 0 ? l : r; } + public static Epoch min(Epoch l, Epoch r) + { + return l.compareTo(r) < 0 ? l : r; + } + public boolean isDirectlyBefore(Epoch epoch) { if (epoch.equals(Epoch.FIRST)) diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index 18a9e6d0231d..df65474a536f 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -42,6 +42,7 @@ public class MetadataKeys public static final MetadataKey ACCORD_KEYSPACES = make(CORE_NS, "ownership", "accord_keyspaces"); public static final MetadataKey LOCKED_RANGES = make(CORE_NS, "sequences", "locked_ranges"); public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); + public static final MetadataKey CONSENSUS_MIGRATION_STATE = make(CORE_NS, "consensus", "migration_state"); public static final ImmutableSet CORE_METADATA = ImmutableSet.of(SCHEMA, NODE_DIRECTORY, @@ -49,7 +50,8 @@ public class MetadataKeys DATA_PLACEMENTS, ACCORD_KEYSPACES, LOCKED_RANGES, - IN_PROGRESS_SEQUENCES); + IN_PROGRESS_SEQUENCES, + CONSENSUS_MIGRATION_STATE); public static MetadataKey make(String...parts) { diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 1e115a537992..34799e9b56c4 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -28,6 +28,7 @@ import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; import org.apache.cassandra.tcm.Commit.Replicator; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; @@ -176,6 +177,7 @@ public StubClusterMetadataService build() AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, + ConsensusTableMigrationState.ConsensusMigrationState.EMPTY, ImmutableMap.of()); } return new StubClusterMetadataService(new UniformRangePlacement(), diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index f90a0da63490..cdbf44fcb619 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -38,8 +38,25 @@ import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.tcm.transformations.*; +import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; +import org.apache.cassandra.tcm.transformations.AlterSchema; +import org.apache.cassandra.tcm.transformations.AlterTopology; +import org.apache.cassandra.tcm.transformations.Assassinate; +import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.CancelInProgressSequence; +import org.apache.cassandra.tcm.transformations.CustomTransformation; +import org.apache.cassandra.tcm.transformations.ForceSnapshot; +import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.PrepareJoin; +import org.apache.cassandra.tcm.transformations.PrepareLeave; +import org.apache.cassandra.tcm.transformations.PrepareMove; +import org.apache.cassandra.tcm.transformations.PrepareReplace; +import org.apache.cassandra.tcm.transformations.Register; +import org.apache.cassandra.tcm.transformations.SetConsensusMigrationTargetProtocol; import org.apache.cassandra.tcm.transformations.Startup; +import org.apache.cassandra.tcm.transformations.TriggerSnapshot; +import org.apache.cassandra.tcm.transformations.Unregister; +import org.apache.cassandra.tcm.transformations.UnsafeJoin; import org.apache.cassandra.tcm.transformations.cms.AdvanceCMSReconfiguration; import org.apache.cassandra.tcm.transformations.cms.FinishAddToCMS; import org.apache.cassandra.tcm.transformations.cms.Initialize; @@ -221,7 +238,11 @@ enum Kind ADVANCE_CMS_RECONFIGURATION(33, () -> AdvanceCMSReconfiguration.serializer), CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer), ALTER_TOPOLOGY(35, () -> AlterTopology.serializer), - ADD_ACCORD_KEYSPACE(36, () -> AddAccordKeyspace.serializer) + + ADD_ACCORD_KEYSPACE(36, () -> AddAccordKeyspace.serializer), + BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(37, () -> BeginConsensusMigrationForTableAndRange.serializer), + MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), + SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL(39, () -> SetConsensusMigrationTargetProtocol.serializer) ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java index a5530e6faa8b..f87278d4b380 100644 --- a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java +++ b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java @@ -34,7 +34,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +54,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaKeyspace; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.MultiStepOperation; @@ -299,6 +299,7 @@ public static ClusterMetadata emptyWithSchemaFromSystemTables(Set allKno AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, + ConsensusMigrationState.EMPTY, Collections.emptyMap()); } @@ -387,6 +388,7 @@ public static ClusterMetadata fromEndpointStates(Map tokens) return new TokenMap(lastModified, partitioner, finalisedCopy); } - public BiMultiValMap asMap() + public SortedBiMultiValMap asMap() { return SortedBiMultiValMap.create(map); } @@ -164,7 +164,14 @@ private static void maybeAdd(List> ranges, Range r) ranges.add(r); } - public Token nextToken(List tokens, Token token) + public Token getPredecessor(Token token) + { + int index = Collections.binarySearch(tokens, token); + assert index >= 0 : token + " not found in " + StringUtils.join(map.keySet(), ", "); + return index == 0 ? tokens.get(tokens.size() - 1) : tokens.get(index - 1); + } + + public static Token nextToken(List tokens, Token token) { return tokens.get(nextTokenIndex(tokens, token)); } diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java index faa86e357b6d..5cdd1b3016b1 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java @@ -26,6 +26,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,14 +41,19 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceMetadata.KeyspaceDiff; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.SchemaTransformation; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.schema.Views; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadata.Transformer; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; @@ -58,11 +66,13 @@ import org.apache.cassandra.utils.vint.VIntCoding; import static org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement.NO_EXECUTION_TIMESTAMP; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static org.apache.cassandra.exceptions.ExceptionCode.ALREADY_EXISTS; import static org.apache.cassandra.exceptions.ExceptionCode.CONFIG_ERROR; import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; import static org.apache.cassandra.exceptions.ExceptionCode.SERVER_ERROR; import static org.apache.cassandra.exceptions.ExceptionCode.SYNTAX_ERROR; +import static org.apache.cassandra.utils.Collectors3.toImmutableMap; public class AlterSchema implements Transformation { @@ -231,7 +241,7 @@ public final Result execute(ClusterMetadata prev) }); next = next.with(newPlacementsBuilder.build()); } - + next = maybeUpdateConsensusTableMigrationStateForDroppedTables(prev.consensusMigrationState, next, diff.altered, diff.dropped); return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); } @@ -247,6 +257,24 @@ private static Map> groupByReplication( return byReplication; } + private Transformer maybeUpdateConsensusTableMigrationStateForDroppedTables(ConsensusMigrationState prev, Transformer next, ImmutableList altered, Keyspaces dropped) + { + Set tableIds = Streams.concat( + altered.stream().flatMap(diff -> diff.tables.dropped.stream().map(TableMetadata::id)), + dropped.stream().flatMap(ks -> ks.tables.stream().map(TableMetadata::id))) + .collect(toImmutableSet()); + if (tableIds.stream().anyMatch(prev.tableStates.keySet()::contains)) + { + ImmutableMap newTableStates = + prev.tableStates.entrySet().stream().filter(e -> !tableIds.contains(e.getKey())).collect(toImmutableMap()); + return next.with(newTableStates); + } + else + { + return next; + } + } + private static Iterable normaliseTableEpochs(Epoch nextEpoch, Stream tables) { return tables.map(tm -> tm.epoch.is(nextEpoch) diff --git a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java new file mode 100644 index 000000000000..a00db104af5e --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import javax.annotation.Nonnull; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import static org.apache.cassandra.tcm.ClusterMetadata.Transformer; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.Collectors3.toImmutableMap; + +public class BeginConsensusMigrationForTableAndRange implements Transformation +{ + public static Serializer serializer = new Serializer(); + + @Nonnull + public final ConsensusMigrationTarget targetProtocol; + + @Nonnull + public final List> ranges; + + @Nonnull + public final List tables; + + public BeginConsensusMigrationForTableAndRange(@Nonnull ConsensusMigrationTarget targetProtocol, + @Nonnull List> ranges, + @Nonnull List tables) + { + checkNotNull(targetProtocol, "targetProtocol should not be null"); + checkNotNull(ranges, "ranges should not be null"); + checkArgument(!ranges.isEmpty(), "ranges should not be empty"); + checkNotNull(tables, "tables should not be null"); + checkArgument(!tables.isEmpty(), "tables should not be empty"); + this.targetProtocol = targetProtocol; + this.ranges = ranges; + this.tables = tables; + } + + public Kind kind() + { + return Kind.BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE; + } + + public Result execute(ClusterMetadata prev) + { + Map tableStates = prev.consensusMigrationState.tableStates; + List columnFamilyStores = tables.stream().map(Schema.instance::getColumnFamilyStoreInstance).collect(toImmutableList()); + + Transformer transformer = prev.transformer(); + + Map newStates = columnFamilyStores + .stream() + .map(cfs -> + tableStates.containsKey(cfs.getTableId()) ? + tableStates.get(cfs.getTableId()).withRangesMigrating(ranges, targetProtocol) : + new TableMigrationState(cfs.keyspace.getName(), cfs.name, cfs.getTableId(), targetProtocol, ImmutableSet.of(), ImmutableMap.of(Epoch.EMPTY, ranges))) + .collect(toImmutableMap(TableMigrationState::getTableId, Function.identity())); + + return Transformation.success(transformer.with(newStates), LockedRanges.AffectedRanges.EMPTY); + } + + static class Serializer implements AsymmetricMetadataSerializer + { + + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + BeginConsensusMigrationForTableAndRange v = (BeginConsensusMigrationForTableAndRange)t; + out.writeUTF(v.targetProtocol.toString()); + ConsensusTableMigrationState.rangesSerializer.serialize(v.ranges, out, version); + serializeCollection(v.tables, out, version, TableId.metadataSerializer); + } + + public BeginConsensusMigrationForTableAndRange deserialize(DataInputPlus in, Version version) throws IOException + { + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(in.readUTF()); + List> ranges = ConsensusTableMigrationState.rangesSerializer.deserialize(in, version); + List tables = deserializeList(in, version, TableId.metadataSerializer); + return new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tables); + } + + public long serializedSize(Transformation t, Version version) + { + BeginConsensusMigrationForTableAndRange v = (BeginConsensusMigrationForTableAndRange) t; + return TypeSizes.sizeof(v.targetProtocol.toString()) + + ConsensusTableMigrationState.rangesSerializer.serializedSize(v.ranges, version) + + serializedCollectionSize(v.tables, version, TableId.metadataSerializer); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java new file mode 100644 index 000000000000..64e6248c8132 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.List; +import javax.annotation.Nonnull; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairType; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static java.lang.String.format; +import static org.apache.cassandra.dht.Range.intersects; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; + + +public class MaybeFinishConsensusMigrationForTableAndRange implements Transformation +{ + public static Serializer serializer = new Serializer(); + + @Nonnull + public final String keyspace; + + @Nonnull + public final String cf; + + @Nonnull + public final List> repairedRanges; + + @Nonnull + public final Epoch minEpoch; + + @Nonnull + public final ConsensusMigrationRepairType repairType; + + public MaybeFinishConsensusMigrationForTableAndRange(@Nonnull String keyspace, + @Nonnull String cf, + @Nonnull List> repairedRanges, + @Nonnull Epoch minEpoch, + @Nonnull ConsensusMigrationRepairType repairType) + { + checkNotNull(keyspace, "keyspace should not be null"); + checkNotNull(cf, "cf should not be null"); + checkNotNull(repairedRanges, "repairedRanges should not be null"); + checkArgument(!repairedRanges.isEmpty(), "repairedRanges should not be empty"); + checkNotNull(minEpoch, "minEpoch should not be null"); + checkArgument(minEpoch.isAfter(Epoch.EMPTY), "minEpoch should not be empty"); + checkNotNull(repairType, "repairType is null"); + checkArgument(repairType != ConsensusMigrationRepairType.ineligible, "Shouldn't attempt to finish migration with ineligible repair"); + this.keyspace = keyspace; + this.cf = cf; + this.repairedRanges = repairedRanges; + this.minEpoch = minEpoch; + this.repairType = repairType; + } + + public Kind kind() + { + return Kind.MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE; + } + + public Result execute(@Nonnull ClusterMetadata metadata) + { + System.out.println("Completed repair " + repairType + " ranges " + repairedRanges); + checkNotNull(metadata, "clusterMetadata should not be null"); + String ksAndCF = keyspace + "." + cf; + TableMetadata tbm = Schema.instance.getTableMetadata(keyspace, cf); + if (tbm == null) + return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); + + ConsensusMigrationState consensusMigrationState = metadata.consensusMigrationState; + ConsensusTableMigrationState.TableMigrationState tms = consensusMigrationState.tableStates.get(tbm.id); + if (tms == null) + return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); + + if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairType != ConsensusMigrationRepairType.paxos) + return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to Accord and the repair was a Paxos repair", ksAndCF)); + + if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairType != ConsensusMigrationRepairType.accord) + return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to Paxos and the repair was an Accord repair", ksAndCF)); + + List> normalizedRepairedRanges = normalize(repairedRanges); + + // Bail out if repair doesn't actually intersect with any migrating ranges + if (!intersects(tms.migratingRanges, normalizedRepairedRanges)) + return new Rejected(INVALID, format("Table %s is migrating ranges %s, which doesn't include repaired ranges %s", ksAndCF, tms.migratingRanges, normalizedRepairedRanges)); + + TableMigrationState newTableMigrationState = tms.withRangesRepairedAtEpoch(normalizedRepairedRanges, minEpoch); + + return Transformation.success(metadata.transformer().with(ImmutableMap.of(newTableMigrationState.tableId, newTableMigrationState)), LockedRanges.AffectedRanges.EMPTY); + } + + static class Serializer implements AsymmetricMetadataSerializer + { + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; + out.writeUTF(v.keyspace); + out.writeUTF(v.cf); + ConsensusTableMigrationState.rangesSerializer.serialize(v.repairedRanges, out, version); + Epoch.serializer.serialize(v.minEpoch, out, version); + out.write(v.repairType.value); + } + + public MaybeFinishConsensusMigrationForTableAndRange deserialize(DataInputPlus in, Version version) throws IOException + { + String keyspace = in.readUTF(); + String cf = in.readUTF(); + List> repairedRanges = ConsensusTableMigrationState.rangesSerializer.deserialize(in, version); + Epoch minEpoch = Epoch.serializer.deserialize(in, version); + ConsensusMigrationRepairType repairType = ConsensusMigrationRepairType.fromValue(in.readByte()); + return new MaybeFinishConsensusMigrationForTableAndRange(keyspace, cf, repairedRanges, minEpoch, repairType); + } + + public long serializedSize(Transformation t, Version version) + { + MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; + return TypeSizes.sizeof(v.keyspace) + + TypeSizes.sizeof(v.cf) + + ConsensusTableMigrationState.rangesSerializer.serializedSize(v.repairedRanges, version) + + Epoch.serializer.serializedSize(v.minEpoch) + + TypeSizes.sizeof(v.repairType.value); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java b/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java new file mode 100644 index 000000000000..c0fd662d6766 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import javax.annotation.Nonnull; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadata.Transformer; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.reset; +import static org.apache.cassandra.tcm.Transformation.Kind.SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.Collectors3.toImmutableMap; + +/* + * Narrowly focused on setting or changing the consensus migration protocol. The real use case + * is when a migration is already in progress or done and you want to change the target. + */ +public class SetConsensusMigrationTargetProtocol implements Transformation +{ + public static Serializer serializer = new Serializer(); + + @Nonnull + public final ConsensusMigrationTarget targetProtocol; + + @Nonnull + public final List tables; + + public SetConsensusMigrationTargetProtocol(@Nonnull ConsensusMigrationTarget targetProtocol, + @Nonnull List tables) + { + this.targetProtocol = targetProtocol; + this.tables = tables; + } + + @Override + public Kind kind() + { + return SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL; + } + + @Override + public Result execute(ClusterMetadata metadata) + { + Map tableStates = metadata.consensusMigrationState.tableStates; + List columnFamilyStores = tables.stream().map(Schema.instance::getColumnFamilyStoreInstance).collect(toImmutableList()); + + Transformer transformer = metadata.transformer(); + + Map newStates; + + if (targetProtocol == reset) + { + newStates = tableStates.entrySet().stream().filter(entry -> !tables.contains(entry.getKey())).collect(toImmutableMap()); + } + else + { + newStates = columnFamilyStores + .stream() + .map(cfs -> + tableStates.containsKey(cfs.getTableId()) ? + tableStates.get(cfs.getTableId()).withMigrationTarget(targetProtocol) : + new TableMigrationState(cfs.keyspace.getName(), cfs.name, cfs.getTableId(), targetProtocol, ImmutableSet.of(), ImmutableMap.of())) + .collect(toImmutableMap(TableMigrationState::getTableId, Function.identity())); + } + + return Transformation.success(transformer.with(newStates, targetProtocol == reset ? false : true), LockedRanges.AffectedRanges.EMPTY); + } + + static class Serializer implements AsymmetricMetadataSerializer + { + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + SetConsensusMigrationTargetProtocol v = (SetConsensusMigrationTargetProtocol)t; + out.writeUTF(v.targetProtocol.toString()); + serializeCollection(v.tables, out, version, TableId.metadataSerializer); + } + + public SetConsensusMigrationTargetProtocol deserialize(DataInputPlus in, Version version) throws IOException + { + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(in.readUTF()); + List tables = deserializeList(in, version, TableId.metadataSerializer); + return new SetConsensusMigrationTargetProtocol(targetProtocol, tables); + } + + public long serializedSize(Transformation t, Version version) + { + SetConsensusMigrationTargetProtocol v = (SetConsensusMigrationTargetProtocol) t; + return TypeSizes.sizeof(v.targetProtocol.toString()) + + serializedCollectionSize(v.tables, version, TableId.metadataSerializer); + } + } +} diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 747da8348564..b90be2cf1bf1 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -41,7 +41,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; - import javax.annotation.Nullable; import javax.management.JMX; import javax.management.MBeanServerConnection; @@ -55,12 +54,20 @@ import javax.management.remote.JMXServiceURL; import javax.rmi.ssl.SslRMIClientSocketFactory; +import com.google.common.base.Function; +import com.google.common.base.Strings; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; +import com.google.common.util.concurrent.Uninterruptibles; + import org.apache.cassandra.audit.AuditLogManager; import org.apache.cassandra.audit.AuditLogManagerMBean; import org.apache.cassandra.audit.AuditLogOptions; import org.apache.cassandra.audit.AuditLogOptionsCompositeData; - -import com.google.common.collect.ImmutableMap; import org.apache.cassandra.auth.AuthCache; import org.apache.cassandra.auth.AuthCacheMBean; import org.apache.cassandra.auth.CIDRGroupsMappingManager; @@ -116,19 +123,9 @@ import org.apache.cassandra.streaming.StreamManagerMBean; import org.apache.cassandra.streaming.StreamState; import org.apache.cassandra.streaming.management.StreamStateCompositeData; -import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; - -import com.google.common.base.Function; -import com.google.common.base.Strings; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; - import org.apache.cassandra.tcm.CMSOperations; import org.apache.cassandra.tools.nodetool.GetTimeout; +import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; import org.apache.cassandra.utils.NativeLibrary; import static org.apache.cassandra.config.CassandraRelevantProperties.NODETOOL_JMX_NOTIFICATION_POLL_INTERVAL_SECONDS; @@ -523,7 +520,12 @@ public String getKeyspaceReplicationInfo(String keyspaceName) public void repairAsync(final PrintStream out, final String keyspace, Map options) throws IOException { - RepairRunner runner = new RepairRunner(out, ssProxy, keyspace, options); + blockOnAsyncRepair(out, keyspace, ssProxy.repairAsync(keyspace, options)); + } + + public void blockOnAsyncRepair(final PrintStream out, final String keyspace, Integer cmd) throws IOException + { + RepairRunner runner = new RepairRunner(out, ssProxy, keyspace, cmd); try { if (jmxc != null) @@ -1330,6 +1332,12 @@ public List getNonLocalStrategyKeyspaces() return ssProxy.getNonLocalStrategyKeyspaces(); } + + public List getAccordManagedKeyspace() + { + return ssProxy.getAccordManagedKeyspaces(); + } + public String getClusterName() { return ssProxy.getClusterName(); diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 5b149acd9a0b..9f72c59eabf2 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -17,20 +17,7 @@ */ package org.apache.cassandra.tools; -import static com.google.common.base.Throwables.getStackTraceAsString; -import static com.google.common.collect.Iterables.toArray; -import static com.google.common.collect.Lists.newArrayList; -import static java.lang.Integer.parseInt; -import static java.lang.String.format; -import static org.apache.cassandra.io.util.File.WriteMode.APPEND; -import static org.apache.commons.lang3.ArrayUtils.EMPTY_STRING_ARRAY; -import static org.apache.commons.lang3.StringUtils.EMPTY; -import static org.apache.commons.lang3.StringUtils.isEmpty; -import static org.apache.commons.lang3.StringUtils.isNotEmpty; - import java.io.Console; -import org.apache.cassandra.io.util.File; -import org.apache.cassandra.io.util.FileWriter; import java.io.FileNotFoundException; import java.io.IOError; import java.io.IOException; @@ -44,16 +31,10 @@ import java.util.Map.Entry; import java.util.Scanner; import java.util.SortedMap; - import javax.management.InstanceNotFoundException; import com.google.common.base.Joiner; import com.google.common.base.Throwables; - -import org.apache.cassandra.locator.EndpointSnitchInfoMBean; -import org.apache.cassandra.tools.nodetool.*; -import org.apache.cassandra.utils.FBUtilities; - import com.google.common.collect.Maps; import io.airlift.airline.Cli; @@ -67,6 +48,22 @@ import io.airlift.airline.ParseOptionConversionException; import io.airlift.airline.ParseOptionMissingException; import io.airlift.airline.ParseOptionMissingValueException; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileWriter; +import org.apache.cassandra.locator.EndpointSnitchInfoMBean; +import org.apache.cassandra.tools.nodetool.*; +import org.apache.cassandra.utils.FBUtilities; + +import static com.google.common.base.Throwables.getStackTraceAsString; +import static com.google.common.collect.Iterables.toArray; +import static com.google.common.collect.Lists.newArrayList; +import static java.lang.Integer.parseInt; +import static java.lang.String.format; +import static org.apache.cassandra.io.util.File.WriteMode.APPEND; +import static org.apache.commons.lang3.ArrayUtils.EMPTY_STRING_ARRAY; +import static org.apache.commons.lang3.StringUtils.EMPTY; +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.isNotEmpty; public class NodeTool { @@ -274,6 +271,14 @@ public int execute(String... args) .withCommand(CMSAdmin.DumpDirectory.class) .withCommand(CMSAdmin.DumpLog.class); + builder.withGroup("consensus_admin") + .withDescription("List and mark ranges as migrating between consensus protocols") + .withDefaultCommand(CassHelp.class) + .withCommand(ConsensusMigrationAdmin.BeginMigration.class) + .withCommands(ConsensusMigrationAdmin.SetTargetProtocol.class) + .withCommands(ConsensusMigrationAdmin.ListCmd.class) + .withCommands(ConsensusMigrationAdmin.FinishMigration.class); + Cli parser = builder.build(); int status = 0; @@ -473,7 +478,7 @@ private NodeProbe connect() protected enum KeyspaceSet { - ALL, NON_SYSTEM, NON_LOCAL_STRATEGY + ALL, NON_SYSTEM, NON_LOCAL_STRATEGY, ACCORD_MANAGED } protected List parseOptionalKeyspace(List cmdArgs, NodeProbe nodeProbe) @@ -492,6 +497,8 @@ protected List parseOptionalKeyspace(List cmdArgs, NodeProbe nod keyspaces.addAll(keyspaces = nodeProbe.getNonLocalStrategyKeyspaces()); else if (defaultKeyspaceSet == KeyspaceSet.NON_SYSTEM) keyspaces.addAll(keyspaces = nodeProbe.getNonSystemKeyspaces()); + else if (defaultKeyspaceSet == KeyspaceSet.ACCORD_MANAGED) + keyspaces.addAll(nodeProbe.getAccordManagedKeyspace()); else keyspaces.addAll(nodeProbe.getKeyspaces()); } diff --git a/src/java/org/apache/cassandra/tools/RepairRunner.java b/src/java/org/apache/cassandra/tools/RepairRunner.java index 01aa5201852b..3a4f77ccd2d7 100644 --- a/src/java/org/apache/cassandra/tools/RepairRunner.java +++ b/src/java/org/apache/cassandra/tools/RepairRunner.java @@ -21,24 +21,24 @@ import java.io.PrintStream; import java.text.SimpleDateFormat; import java.util.List; -import java.util.Map; import org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus; import org.apache.cassandra.service.StorageServiceMBean; import org.apache.cassandra.utils.concurrent.Condition; - import org.apache.cassandra.utils.progress.ProgressEvent; import org.apache.cassandra.utils.progress.ProgressEventType; import org.apache.cassandra.utils.progress.jmx.JMXNotificationProgressListener; -import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus.FAILED; import static org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus.valueOf; import static org.apache.cassandra.tools.NodeProbe.JMX_NOTIFICATION_POLL_INTERVAL_SECONDS; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; -import static org.apache.cassandra.utils.progress.ProgressEventType.*; +import static org.apache.cassandra.utils.progress.ProgressEventType.COMPLETE; +import static org.apache.cassandra.utils.progress.ProgressEventType.ERROR; +import static org.apache.cassandra.utils.progress.ProgressEventType.PROGRESS; public class RepairRunner extends JMXNotificationProgressListener { @@ -47,23 +47,21 @@ public class RepairRunner extends JMXNotificationProgressListener private final PrintStream out; private final StorageServiceMBean ssProxy; private final String keyspace; - private final Map options; private final Condition condition = newOneTimeCondition(); - private int cmd; + private Integer cmd; private volatile Exception error; - public RepairRunner(PrintStream out, StorageServiceMBean ssProxy, String keyspace, Map options) + public RepairRunner(PrintStream out, StorageServiceMBean ssProxy, String keyspace, Integer cmd) { this.out = out; this.ssProxy = ssProxy; this.keyspace = keyspace; - this.options = options; + this.cmd = cmd; } public void run() throws Exception { - cmd = ssProxy.repairAsync(keyspace, options); if (cmd <= 0) { // repairAsync can only return 0 for replication factor 1. diff --git a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java new file mode 100644 index 000000000000..cd24bf91fa4f --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Collections.emptyList; +import static java.util.Collections.singleton; +import static java.util.Collections.singletonList; + +/** + * For managing migration from one consensus protocol to another. + * + * Mark ranges as migrating, and list the migrating ranges. + */ +public abstract class ConsensusMigrationAdmin extends NodeTool.NodeToolCmd +{ + @Command(name = "list", description = "List migrating tables and ranges") + public static class ListCmd extends ConsensusMigrationAdmin + { + @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") + private List schemaArgs = new ArrayList<>(); + + @Option(title = "format", name = {"-f", "--format"}, description = "Output format, YAML and JSON are the only supported formats, default YAML, prefix with `minified-` to turn off pretty printing") + private String format = "yaml"; + + protected void execute(NodeProbe probe) + { + Set keyspaceNames = schemaArgs.size() > 0 ? singleton(schemaArgs.get(0)) : null; + Set tableNames = schemaArgs.size() > 1 ? new HashSet<>(schemaArgs.subList(1, schemaArgs.size())) : null; + String output = probe.getStorageService().listConsensusMigrations(keyspaceNames, tableNames, format); + probe.output().out.println(output); + } + } + + @Command(name = "begin-migration", description = "Mark the range as migrating for the specified token range and tables") + public static class BeginMigration extends ConsensusMigrationAdmin + { + @Option(title = "start_token", name = {"-st", "--start-token"}, description = "Use -st to specify a token at which the repair range starts") + private String startToken = null; + + @Option(title = "end_token", name = {"-et", "--end-token"}, description = "Use -et to specify a token at which repair range ends") + private String endToken = null; + + @Option(title = "target_protocol", name = {"-tp", "--target-protocol"}, description = "Use -tp to specify what consensus protocol should be migrated to", required=true) + private String targetProtocol = null; + + @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") + private List schemaArgs = new ArrayList<>(); + + protected void execute(NodeProbe probe) + { + checkArgument((endToken != null && startToken != null) || (endToken == null && startToken == null), "Must specify start and end token together"); + String maybeRangesStr = startToken != null ? startToken + ":" + endToken : null; + List keyspaceNames = parseOptionalKeyspace(schemaArgs, probe, KeyspaceSet.ACCORD_MANAGED); + List maybeTableNames = schemaArgs.size() > 1 ? schemaArgs.subList(1, schemaArgs.size()) : null; + probe.getStorageService().migrateConsensusProtocol(targetProtocol, keyspaceNames, maybeTableNames, maybeRangesStr); + probe.output().out.println("Marked requested ranges as migrating. Repair needs to be run in order to complete the migration"); + } + } + + @Command(name = "finish-migration", description = "Complete the migration for a range that has already begun migration") + public static class FinishMigration extends ConsensusMigrationAdmin + { + @Option(title = "start_token", name = {"-st", "--start-token"}, description = "Use -st to specify a token at which the repair range starts (exclusive)") + private String startToken = null; + + @Option(title = "end_token", name = {"-et", "--end-token"}, description = "Use -et to specify a token at which repair range ends (inclusive)") + private String endToken = null; + + @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") + private List schemaArgs = new ArrayList<>(); + + protected void execute(NodeProbe probe) + { + checkArgument((endToken != null) == (startToken != null), "Start and end token must be specified together"); + String maybeRangesStr = startToken != null ? startToken + ":" + endToken : null; + List keyspaceNames = parseOptionalKeyspace(schemaArgs, probe, KeyspaceSet.ACCORD_MANAGED); + List maybeTableNames = schemaArgs.size() > 1 ? schemaArgs.subList(1, schemaArgs.size()) : null; + for (String keyspace : keyspaceNames) + { + List commands = probe.getStorageService().finishConsensusMigration(keyspace, maybeTableNames, maybeRangesStr); + for (Integer command : commands) + { + try + { + probe.blockOnAsyncRepair(probe.output().out, keyspace, command); + } + catch (IOException e) + { + throw new RuntimeException("Error occurred attempting to finish migration for keyspace " + keyspace + " tables " + maybeTableNames + " and ranges " + maybeRangesStr, e); + } + } + } + probe.output().out.printf("Finished consensus migration range (%s) of keyspaces %s and tables %s%n", maybeRangesStr, keyspaceNames, maybeTableNames); + } + } + + @Command(name = "set-target-protocol", description = "Set or change the target consensus protocol of the specified tables. If a migration is in progress then the migration will be reversed with migrating ranges still migrating, unmigrated ranges marked as migrated, and migrating ranges will need migration. Be aware that if no migration was in progress for a table it will immediately cause the table to run on the target protocol because the ranges requiring migration are derived from the migrated ranges that don't exist.") + public static class SetTargetProtocol extends ConsensusMigrationAdmin + { + @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") + private List schemaArgs = new ArrayList<>(); + + @Option(title = "target_protocol", name = {"-tp", "--target-protocol"}, description = "Use -tp to specify what consensus protocol should be migrated to", required=true) + private String targetProtocol = null; + + @Option(title = "force_completion", name = {"-f", "--force-completion"}, description = "Forces migration state for all ranges of the specified table regardless of whether migration completed successfully or not. Should only be used if table is empty or has had no writes since last repair.") + private boolean forceCompletion = false; + + protected void execute(NodeProbe probe) + { + checkArgument(schemaArgs.size() >= 2, "Must specify a keyspace and at least one table"); + List keyspaceNames = schemaArgs.size() > 0 ? singletonList(schemaArgs.get(0)) : emptyList(); + List maybeTableNames = schemaArgs.size() > 1 ? schemaArgs.subList(1, schemaArgs.size()) : null; + probe.getStorageService().setConsensusMigrationTargetProtocol(targetProtocol, keyspaceNames, maybeTableNames); + } + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/Repair.java b/src/java/org/apache/cassandra/tools/nodetool/Repair.java index c66992acc9a8..8d5b0607d4e9 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/Repair.java +++ b/src/java/org/apache/cassandra/tools/nodetool/Repair.java @@ -32,14 +32,14 @@ import java.util.function.Supplier; import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.repair.RepairParallelism; import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool.NodeToolCmd; -import org.apache.commons.lang3.StringUtils; @Command(name = "repair", description = "Repair one or more tables") public class Repair extends NodeToolCmd @@ -134,7 +134,8 @@ else if (preview) @Override public void execute(NodeProbe probe) { - List keyspaces = parseOptionalKeyspace(args, probe, KeyspaceSet.NON_LOCAL_STRATEGY); + KeyspaceSet keyspaceSet = KeyspaceSet.NON_LOCAL_STRATEGY; + List keyspaces = parseOptionalKeyspace(args, probe, keyspaceSet); String[] cfnames = parseOptionalTables(args); if (primaryRange && (!specificDataCenters.isEmpty() || !specificHosts.isEmpty())) diff --git a/src/java/org/apache/cassandra/utils/AbstractBiMultiValMap.java b/src/java/org/apache/cassandra/utils/AbstractBiMultiValMap.java new file mode 100644 index 000000000000..e29427d386a5 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/AbstractBiMultiValMap.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; + +public abstract class AbstractBiMultiValMap implements Map +{ + protected abstract Map forwardDelegate(); + protected abstract Multimap reverseDelegate(); + + public Multimap inverse() + { + return Multimaps.unmodifiableMultimap(reverseDelegate()); + } + + public void clear() + { + forwardDelegate().clear(); + reverseDelegate().clear(); + } + + public boolean containsKey(Object key) + { + return forwardDelegate().containsKey(key); + } + + public boolean containsValue(Object value) + { + return reverseDelegate().containsKey(value); + } + + public Set> entrySet() + { + return forwardDelegate().entrySet(); + } + + public V get(Object key) + { + return forwardDelegate().get(key); + } + + public boolean isEmpty() + { + return forwardDelegate().isEmpty(); + } + + public Set keySet() + { + return forwardDelegate().keySet(); + } + + public V put(K key, V value) + { + V oldVal = forwardDelegate().put(key, value); + if (oldVal != null) + reverseDelegate().remove(oldVal, key); + reverseDelegate().put(value, key); + return oldVal; + } + + public void putAll(Map m) + { + for (Map.Entry entry : m.entrySet()) + put(entry.getKey(), entry.getValue()); + } + + public V remove(Object key) + { + V oldVal = forwardDelegate().remove(key); + reverseDelegate().remove(oldVal, key); + return oldVal; + } + + public Collection removeValue(V value) + { + Collection keys = reverseDelegate().removeAll(value); + for (K key : keys) + forwardDelegate().remove(key); + return keys; + } + + public int size() + { + return forwardDelegate().size(); + } + + public Collection values() + { + return reverseDelegate().keys(); + } + + public Collection valueSet() + { + return reverseDelegate().keySet(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (!(o instanceof AbstractBiMultiValMap)) return false; + AbstractBiMultiValMap that = (AbstractBiMultiValMap) o; + return forwardDelegate().equals(that.forwardDelegate()) && reverseDelegate().equals(that.reverseDelegate()); + } + + @Override + public int hashCode() + { + return Objects.hash(forwardDelegate(), reverseDelegate()); + } +} diff --git a/src/java/org/apache/cassandra/utils/BiMultiValMap.java b/src/java/org/apache/cassandra/utils/BiMultiValMap.java index f439c5c496fd..2859e6964bc2 100644 --- a/src/java/org/apache/cassandra/utils/BiMultiValMap.java +++ b/src/java/org/apache/cassandra/utils/BiMultiValMap.java @@ -17,15 +17,11 @@ */ package org.apache.cassandra.utils; -import java.util.Collection; import java.util.HashMap; import java.util.Map; -import java.util.Objects; -import java.util.Set; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; /** * @@ -35,7 +31,7 @@ * @param * @param */ -public class BiMultiValMap implements Map +public class BiMultiValMap extends AbstractBiMultiValMap { protected final Map forwardMap; protected final Multimap reverseMap; @@ -59,104 +55,15 @@ public BiMultiValMap(BiMultiValMap map) reverseMap.putAll(map.inverse()); } - public Multimap inverse() - { - return Multimaps.unmodifiableMultimap(reverseMap); - } - - public void clear() - { - forwardMap.clear(); - reverseMap.clear(); - } - - public boolean containsKey(Object key) - { - return forwardMap.containsKey(key); - } - - public boolean containsValue(Object value) - { - return reverseMap.containsKey(value); - } - - public Set> entrySet() - { - return forwardMap.entrySet(); - } - - public V get(Object key) - { - return forwardMap.get(key); - } - - public boolean isEmpty() - { - return forwardMap.isEmpty(); - } - - public Set keySet() - { - return forwardMap.keySet(); - } - - public V put(K key, V value) - { - V oldVal = forwardMap.put(key, value); - if (oldVal != null) - reverseMap.remove(oldVal, key); - reverseMap.put(value, key); - return oldVal; - } - - public void putAll(Map m) - { - for (Map.Entry entry : m.entrySet()) - put(entry.getKey(), entry.getValue()); - } - - public V remove(Object key) - { - V oldVal = forwardMap.remove(key); - reverseMap.remove(oldVal, key); - return oldVal; - } - - public Collection removeValue(V value) - { - Collection keys = reverseMap.removeAll(value); - for (K key : keys) - forwardMap.remove(key); - return keys; - } - - public int size() - { - return forwardMap.size(); - } - - public Collection values() - { - return reverseMap.keys(); - } - - public Collection valueSet() - { - return reverseMap.keySet(); - } - @Override - public boolean equals(Object o) + protected Map forwardDelegate() { - if (this == o) return true; - if (!(o instanceof BiMultiValMap)) return false; - BiMultiValMap that = (BiMultiValMap) o; - return forwardMap.equals(that.forwardMap) && reverseMap.equals(that.reverseMap); + return forwardMap; } @Override - public int hashCode() + protected Multimap reverseDelegate() { - return Objects.hash(forwardMap, reverseMap); + return reverseMap; } } diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializers.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java index 0cdd5685abe5..1fcb7cc2f3e3 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializers.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -26,15 +26,19 @@ import java.util.Map; import java.util.Set; import java.util.function.IntFunction; +import javax.annotation.Nonnull; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.IPartitionerDependentSerializer; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; -import static com.google.common.primitives.Ints.checkedCast; import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; public class CollectionSerializers @@ -46,6 +50,20 @@ public static void serializeCollection(Collection values, DataOutputPlus valueSerializer.serialize(value, out, version); } + public static void serializeCollection(Collection values, DataOutputPlus out, Version version, MetadataSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(values.size()); + for (V value : values) + valueSerializer.serialize(value, out, version); + } + + public static void serializeCollection(Collection values, DataOutputPlus out, int version, IPartitionerDependentSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(values.size()); + for (V value : values) + valueSerializer.serialize(value, out, version); + } + public static > void serializeList(L values, DataOutputPlus out, int version, IVersionedSerializer valueSerializer) throws IOException { int size = values.size(); @@ -64,19 +82,59 @@ public static void serializeMap(Map map, DataOutputPlus out, int ve } } + public static void serializeMap(Map map, DataOutputPlus out, Version version, MetadataSerializer keySerializer, MetadataSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(map.size()); + for (Map.Entry e : map.entrySet()) + { + keySerializer.serialize(e.getKey(), out, version); + valueSerializer.serialize(e.getValue(), out, version); + } + } + + public static void serializeMap(Map map, DataOutputPlus out, int version, IVersionedSerializer keySerializer, IPartitionerDependentSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(map.size()); + for (Map.Entry e : map.entrySet()) + { + keySerializer.serialize(e.getKey(), out, version); + valueSerializer.serialize(e.getValue(), out, version); + } + } + public static List deserializeList(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException { return deserializeCollection(in, version, serializer, newArrayList()); } + public static List deserializeList(DataInputPlus in, Version version, MetadataSerializer serializer) throws IOException + { + return deserializeCollection(in, version, serializer, newArrayList()); + } + + public static List deserializeList(DataInputPlus in, IPartitioner partitioner, int version, IPartitionerDependentSerializer serializer) throws IOException + { + return deserializeCollection(in, partitioner, version, serializer, newArrayList()); + } + public static Set deserializeSet(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException { return deserializeCollection(in, version, serializer, newHashSet()); } + public static Set deserializeSet(DataInputPlus in, IPartitioner partitioner, int version, IPartitionerDependentSerializer serializer) throws IOException + { + return deserializeCollection(in, partitioner, version, serializer, newHashSet()); + } + + public static Set deserializeSet(DataInputPlus in, Version version, MetadataSerializer serializer) throws IOException + { + return deserializeCollection(in, version, serializer, newHashSet()); + } + public static > M deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer, IntFunction factory) throws IOException { - int size = checkedCast(in.readUnsignedVInt32()); + int size = in.readUnsignedVInt32(); M result = factory.apply(size); while (size-- > 0) { @@ -87,6 +145,32 @@ public static > M deserializeMap(DataInputPlus in, int return result; } + public static Map deserializeMap(DataInputPlus in, Version version, MetadataSerializer keySerializer, MetadataSerializer valueSerializer, IntFunction> factory) throws IOException + { + int size = in.readUnsignedVInt32(); + Map result = factory.apply(size); + while (size-- > 0) + { + K key = keySerializer.deserialize(in, version); + V value = valueSerializer.deserialize(in, version); + result.put(key, value); + } + return result; + } + + public static Map deserializeMap(DataInputPlus in, IPartitioner partitioner, int version, IVersionedSerializer keySerializer, IPartitionerDependentSerializer valueSerializer, IntFunction> factory) throws IOException + { + int size = in.readUnsignedVInt32(); + Map result = factory.apply(size); + while (size-- > 0) + { + K key = keySerializer.deserialize(in, version); + V value = valueSerializer.deserialize(in, partitioner, version); + result.put(key, value); + } + return result; + } + public static Map deserializeMap(DataInputPlus in, int version, IVersionedSerializer keySerializer, IVersionedSerializer valueSerializer) throws IOException { return deserializeMap(in, version, keySerializer, valueSerializer, newHashMap()); @@ -100,6 +184,22 @@ public static long serializedCollectionSize(Collection values, int versio return size; } + public static long serializedCollectionSize(Collection values, Version version, MetadataSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(values.size()); + for (V value : values) + size += valueSerializer.serializedSize(value, version); + return size; + } + + public static long serializedCollectionSize(Collection values, int version, IPartitionerDependentSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(values.size()); + for (V value : values) + size += valueSerializer.serializedSize(value, version); + return size; + } + public static > long serializedListSize(L values, int version, IVersionedSerializer valueSerializer) { int items = values.size(); @@ -118,6 +218,24 @@ public static long serializedMapSize(Map map, int version, IVersion return size; } + public static long serializedMapSize(Map map, Version version, MetadataSerializer keySerializer, MetadataSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(map.size()); + for (Map.Entry e : map.entrySet()) + size += keySerializer.serializedSize(e.getKey(), version) + + valueSerializer.serializedSize(e.getValue(), version); + return size; + } + + public static long serializedMapSize(Map map, int version, IVersionedSerializer keySerializer, IPartitionerDependentSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(map.size()); + for (Map.Entry e : map.entrySet()) + size += keySerializer.serializedSize(e.getKey(), version) + + valueSerializer.serializedSize(e.getValue(), version); + return size; + } + public static IntFunction> newHashSet() { return i -> i == 0 ? Collections.emptySet() : Sets.newHashSetWithExpectedSize(i); @@ -135,7 +253,7 @@ public static IntFunction> newArrayList() public static int readCollectionSize(DataInputPlus in, int version) throws IOException { - return checkedCast(in.readUnsignedVInt()); + return in.readUnsignedVInt32(); } /* @@ -144,7 +262,7 @@ public static int readCollectionSize(DataInputPlus in, int version) throws IOExc */ private static > C deserializeCollection(DataInputPlus in, int version, IVersionedSerializer serializer, IntFunction factory) throws IOException { - int size = checkedCast(in.readUnsignedVInt32()); + int size = in.readUnsignedVInt32(); C result = factory.apply(size); while (size-- > 0) result.add(serializer.deserialize(in, version)); @@ -174,4 +292,70 @@ public long serializedSize(List t, int version) } }; } + + private static > C deserializeCollection(DataInputPlus in, IPartitioner partitioner, int version, IPartitionerDependentSerializer serializer, IntFunction factory) throws IOException + { + int size = in.readUnsignedVInt32(); + C result = factory.apply(size); + while (size-- > 0) + result.add(serializer.deserialize(in, partitioner, version)); + return result; + } + + private static > C deserializeCollection(DataInputPlus in, Version version, MetadataSerializer serializer, IntFunction factory) throws IOException + { + int size = in.readUnsignedVInt32(); + C result = factory.apply(size); + while (size-- > 0) + result.add(serializer.deserialize(in, version)); + return result; + } + + public static IPartitionerDependentSerializer> newCollectionSerializer(@Nonnull final IPartitionerDependentSerializer serializer) + { + return new IPartitionerDependentSerializer>() + { + @Override + public void serialize(Collection t, DataOutputPlus out, int version) throws IOException + { + serializeCollection(t, out, version, serializer); + } + + @Override + public Collection deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException + { + return deserializeCollection(in, p, version, serializer, newArrayList()); + } + + @Override + public long serializedSize(Collection t, int version) + { + return serializedCollectionSize(t, version, serializer); + } + }; + } + + public static MetadataSerializer> newListSerializer(@Nonnull final MetadataSerializer serializer) + { + return new MetadataSerializer>() + { + @Override + public void serialize(List t, DataOutputPlus out, Version version) throws IOException + { + serializeCollection(t, out, version, serializer); + } + + @Override + public List deserialize(DataInputPlus in, Version version) throws IOException + { + return deserializeList(in, version, serializer); + } + + @Override + public long serializedSize(List t, Version version) + { + return serializedCollectionSize(t, version, serializer); + } + }; + } } diff --git a/src/java/org/apache/cassandra/utils/PojoToString.java b/src/java/org/apache/cassandra/utils/PojoToString.java new file mode 100644 index 000000000000..4e6c8a95ab77 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/PojoToString.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import com.fasterxml.jackson.core.JsonProcessingException; +import org.yaml.snakeyaml.DumperOptions; +import org.yaml.snakeyaml.DumperOptions.FlowStyle; +import org.yaml.snakeyaml.Yaml; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.cassandra.utils.JsonUtils.JSON_OBJECT_MAPPER; + +/** + * Helper to format POJOs that are easy to convert (primitives, nonnull, and built in collections) + * in various human + machine readable formats. Useful for JMX and nodetool. + */ +public class PojoToString +{ + public static final Integer VERSION_50 = 0; + + public static final Integer CURRENT_VERSION = VERSION_50; + + enum Format + { + YAML, + MINIFIED_YAML(true), + JSON, + MINIFIED_JSON(true); + + boolean minified; + + Format() + { + this(false); + } + + Format(boolean minified) + { + this.minified = minified; + } + + public boolean isYaml() + { + return this == YAML || this == MINIFIED_YAML; + } + + public static Format fromString(String formatString) + { + formatString = LocalizeString.toUpperCaseLocalized(formatString); + switch (formatString) + { + case "YAML": + return YAML; + case "MINIFIED-YAML": + return MINIFIED_YAML; + case "JSON": + return JSON; + case "MINIFIED-JSON": + return MINIFIED_JSON; + default: throw new IllegalArgumentException("Unsupported format " + formatString + + " supported formats are YAML, MINIFIED-YAML, JSON, MINIFIED-JSON"); + } + } + } + private static final Set> ALLOWED_PRIMITIVES = ImmutableSet.of( + String.class, + Double.class, + Float.class, + Long.class, + Integer.class, + Short.class, + Byte.class + ); + + private static final List> ALLOWED_COLLECTIONS = ImmutableList.of( + List.class, + Set.class + ); + + /** + * Helper to convert POJOs from a restricted set (primitive Java types and collections) to a human/machine readable + * format that is specified by the format parameter. + * + * This doesn't enforce what objects are serialized so you can get error or messy output if you try and serialize + * things that aren't primitive or collections. + * + * The map must contain a 'version' key set to CURRENT_VERSION + * @param map Map POJO that must be restricted to easily representable types (map, set , list, primitives), and contains the 'version' key set to CURRENT_VERSION + * @param formatString Human/machine readable format name, can be YAML or JSON, prefix with MINIFIED- to get a minified version + * @return The map formatted in the requested format + * @throws IllegalArgumentException If the 'version' key is not present and set to CURRENT_VERSION + */ + public static String pojoMapToString(Map map, String formatString) + { + checkArgument(CURRENT_VERSION.equals(map.get("version"))); + return pojoToString(map, formatString); + } + + private static String pojoToString(Object obj, String formatString) + { + validateAllowedTypes(obj); + Format format = Format.fromString(formatString); + if (format.isYaml()) + { + DumperOptions dumperOptions = new DumperOptions(); + if (format.minified) + { + dumperOptions.setDefaultFlowStyle(FlowStyle.FLOW); + dumperOptions.setIndent(1); + dumperOptions.setWidth(Integer.MAX_VALUE); + dumperOptions.setSplitLines(false); + } + // TODO How do you get snake yaml to produce minified output? + return new Yaml(dumperOptions).dump(obj); + } + else + { + try + { + if (format.minified) + return JSON_OBJECT_MAPPER.writeValueAsString(obj); + else + return JSON_OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(obj); + } + catch (JsonProcessingException e) + { + throw new RuntimeException(e); + } + } + } + + private static void validateAllowedTypes(Object o) + { + if (o == null) + throw new NullPointerException("Null objects are unsupported"); + if (o instanceof Map) + { + for (Map.Entry entry : ((Map)o).entrySet()) + { + Object key = entry.getKey(); + if (!(key instanceof String | key instanceof Long | key instanceof Integer)) + throw new IllegalArgumentException("Map has entry with key " + entry.getKey() + " of " + + key.getClass() + " which is unsupported, only String is supported for map keys"); + validateAllowedTypes(entry.getValue()); + } + } + else if (o instanceof Collection) + { + if (!(o instanceof Set | o instanceof List)) + throw new IllegalArgumentException("Collection " + o + " with " + o.getClass() + " is not in allow list " + ALLOWED_COLLECTIONS); + for (Object element : ((Collection)o)) + validateAllowedTypes(element); + } + else if (!ALLOWED_PRIMITIVES.contains(o.getClass())) + throw new IllegalArgumentException("Scalar " + o + " with " + o.getClass() + " is not in allow list " + ALLOWED_PRIMITIVES); + + } +} diff --git a/src/java/org/apache/cassandra/utils/SortedBiMultiValMap.java b/src/java/org/apache/cassandra/utils/SortedBiMultiValMap.java index 44ac0a01c0b4..1b0d8541ee43 100644 --- a/src/java/org/apache/cassandra/utils/SortedBiMultiValMap.java +++ b/src/java/org/apache/cassandra/utils/SortedBiMultiValMap.java @@ -18,17 +18,21 @@ package org.apache.cassandra.utils; import java.util.Collection; -import java.util.SortedMap; +import java.util.NavigableMap; import java.util.TreeMap; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; -public class SortedBiMultiValMap extends BiMultiValMap +public class SortedBiMultiValMap extends AbstractBiMultiValMap { - protected SortedBiMultiValMap(SortedMap forwardMap, SortedSetMultimap reverseMap) + protected final NavigableMap forwardMap; + protected final SortedSetMultimap reverseMap; + + protected SortedBiMultiValMap(NavigableMap forwardMap, SortedSetMultimap reverseMap) { - super(forwardMap, reverseMap); + this.forwardMap = forwardMap; + this.reverseMap = reverseMap; } public static , V extends Comparable> SortedBiMultiValMap create() @@ -36,10 +40,10 @@ public static , V extends Comparable> SortedBiMultiVa return new SortedBiMultiValMap(new TreeMap(), TreeMultimap.create()); } - public static , V extends Comparable> SortedBiMultiValMap create(BiMultiValMap map) + public static , V extends Comparable, M extends AbstractBiMultiValMap> SortedBiMultiValMap create(M map) { SortedBiMultiValMap newMap = SortedBiMultiValMap.create(); - newMap.forwardMap.putAll(map.forwardMap); + newMap.forwardMap.putAll(map.forwardDelegate()); // Put each individual TreeSet instead of Multimap#putAll(Multimap) to get linear complexity // See CASSANDRA-14660 for (Entry> entry : map.inverse().asMap().entrySet()) @@ -47,4 +51,15 @@ public static , V extends Comparable> SortedBiMultiVa return newMap; } + @Override + protected NavigableMap forwardDelegate() + { + return forwardMap; + } + + @Override + protected SortedSetMultimap reverseDelegate() + { + return reverseMap; + } } diff --git a/src/java/org/apache/cassandra/utils/TimeUUID.java b/src/java/org/apache/cassandra/utils/TimeUUID.java index b7930ed5828a..49c31478e18c 100644 --- a/src/java/org/apache/cassandra/utils/TimeUUID.java +++ b/src/java/org/apache/cassandra/utils/TimeUUID.java @@ -242,6 +242,11 @@ public static long unixMicrosToRawTimestamp(long unixMicros) return unixMicros * 10 - (UUID_EPOCH_UNIX_MILLIS * 10000); } + public static long unixMicrosToMsb(long unixMicros) + { + return TimeUUID.rawTimestampToMsb(TimeUUID.unixMicrosToRawTimestamp(unixMicros)); + } + public static long msbToRawTimestamp(long msb) { assert (UUID_VERSION_BITS_IN_MSB & msb) == TIMESTAMP_UUID_VERSION_IN_MSB; diff --git a/test/data/serialization/5.0/service.SyncComplete.bin b/test/data/serialization/5.0/service.SyncComplete.bin index 7c775cef6601900bc18ae7203966f3065b89c7a3..d8465dbf61e810df54d74bc4c93261440ebb7ef9 100644 GIT binary patch delta 77 zcmZo*YGRs@ZMWqV-_O^o9&!6JjZ_}~aJOM#U|bh?EQqz{? TgCanoFCYmZ;9{5%0#OVAxf>LC delta 75 zcmZo-YG9g>ZPVjY?W`*2zI^vw9i5#aGfyxuFt7qK2&4cpW6A8uN4zcWKUSA1Zk-cw SU tokens() { try { - IPartitioner partitioner = ((IPartitioner)Class.forName(i.config().getString("partitioner")).newInstance()); + IPartitioner partitioner = FBUtilities.newPartitioner(i.config().getString("partitioner")); return Stream.of(i.config().getString("initial_token").split(",")).map(partitioner.getTokenFactory()::fromString); } catch (Throwable t) diff --git a/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java index 63a2f95f166d..edb749fb2199 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java @@ -19,7 +19,6 @@ package org.apache.cassandra.distributed.test; import java.io.IOException; - import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; @@ -29,8 +28,9 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; - import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + import org.junit.Test; import net.bytebuddy.ByteBuddy; @@ -44,8 +44,8 @@ import org.apache.cassandra.distributed.api.NodeToolResult; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.AsymmetricRemoteSyncTask; +import org.apache.cassandra.repair.CassandraRepairJob; import org.apache.cassandra.repair.LocalSyncTask; -import org.apache.cassandra.repair.RepairJob; import org.apache.cassandra.repair.SyncTask; import org.apache.cassandra.repair.TreeResponse; @@ -59,6 +59,8 @@ public class OptimiseStreamsRepairTest extends TestBaseImpl { + static final AtomicInteger createOptimizedSyncCount = new AtomicInteger(); + @Test public void testBasic() throws Exception { @@ -97,6 +99,7 @@ public void testBasic() throws Exception res = cluster.get(1).nodetoolResult("repair", KEYSPACE, "--preview", "--full"); res.asserts().success(); res.asserts().notificationContains("Previewed data was in sync"); + assertTrue(cluster.get(1).callOnInstance(() -> createOptimizedSyncCount.get()) > 0); } } @@ -104,7 +107,7 @@ public static class BBHelper { public static void install(ClassLoader cl, int id) { - new ByteBuddy().rebase(RepairJob.class) + new ByteBuddy().rebase(CassandraRepairJob.class) .method(named("createOptimisedSyncingSyncTasks").and(takesArguments(1))) .intercept(MethodDelegation.to(BBHelper.class)) .make() @@ -114,6 +117,7 @@ public static void install(ClassLoader cl, int id) public static List createOptimisedSyncingSyncTasks(List trees, @SuperCall Callable> zuperCall) { + createOptimizedSyncCount.incrementAndGet(); List tasks = null; try { diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java index 6470a4d0c5d1..6364401de50f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java @@ -23,6 +23,8 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import com.google.common.util.concurrent.FutureCallback; import org.junit.Assert; @@ -47,10 +49,12 @@ import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.IMessageFilters.Filter; import org.apache.cassandra.distributed.api.TokenSupplier; import org.apache.cassandra.distributed.shared.NetworkTopology; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.reads.repair.BlockingReadRepair; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; import org.apache.cassandra.utils.concurrent.Condition; @@ -71,6 +75,7 @@ import static org.apache.cassandra.net.Verb.READ_REQ; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class ReadRepairTest extends TestBaseImpl @@ -81,7 +86,8 @@ public class ReadRepairTest extends TestBaseImpl @Test public void testBlockingReadRepair() throws Throwable { - testReadRepair(ReadRepairStrategy.BLOCKING); + testReadRepair(ReadRepairStrategy.BLOCKING, false); + testReadRepair(ReadRepairStrategy.BLOCKING, true); } /** * @@ -95,8 +101,14 @@ public void testNoneReadRepair() throws Throwable private void testReadRepair(ReadRepairStrategy strategy) throws Throwable { - try (Cluster cluster = init(Cluster.create(3))) + testReadRepair(strategy, false); + } + + private void testReadRepair(ReadRepairStrategy strategy, boolean brrThroughAccord) throws Throwable + { + try (Cluster cluster = init(Cluster.create(3, config -> config.set("non_serial_write_strategy", brrThroughAccord ? "migration" : "normal")))) { + cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int, c int, v int, PRIMARY KEY (k, c)) " + String.format("WITH read_repair='%s'", strategy))); @@ -111,8 +123,11 @@ private void testReadRepair(ReadRepairStrategy strategy) throws Throwable // verify that the third node doesn't have the row assertRows(cluster.get(3).executeInternal(selectQuery)); - // read with CL=QUORUM to trigger read repair + // read with CL=QUORUM to trigger read repair, force 3 to be involved in the read so that read repair + // will occur + Filter blockReadFromOne = cluster.filters().inbound().from(3).to(1).verbs(READ_REQ.id).drop(); assertRows(cluster.coordinator(3).execute(selectQuery, QUORUM), row); + blockReadFromOne.off(); // verify whether the coordinator has the repaired row depending on the read repair strategy if (strategy == ReadRepairStrategy.NONE) @@ -143,13 +158,13 @@ public void readRepairTimeoutTest() throws Throwable catch (Exception ex) { // the containing exception class was loaded by another class loader. Comparing the message as a workaround to assert the exception - Assert.assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); + assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); long actualTimeTaken = currentTimeMillis() - start; long magicDelayAmount = 100L; // it might not be the best way to check if the time taken is around the timeout value. // Due to the delays, the actual time taken from client perspective is slighly more than the timeout value - Assert.assertTrue(actualTimeTaken > reducedReadTimeout); + assertTrue(actualTimeTaken > reducedReadTimeout); // But it should not exceed too much - Assert.assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); + assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1"), row(1, 1, 1)); // the partition happened when the repaired node sending back ack. The mutation should be in fact applied. } @@ -423,8 +438,11 @@ public void onFailure(Throwable t) {} catch (ExecutionException e) { Throwable cause = e.getCause(); - Assert.assertTrue("Expected a different error message, but got " + cause.getMessage(), - cause.getMessage().contains("INVALID_ROUTING from /127.0.0.2:7012")); + Matcher matcher = Pattern.compile("Operation failed - received (\\d+) responses and 1 failures: INVALID_ROUTING from /127.0.0.2:7012").matcher(cause.getMessage()); + assertTrue("Expected a different error message, but got " + cause.getMessage(), + matcher.matches()); + int responses = Integer.valueOf(matcher.group(1)); + assertTrue(responses >= 1 && responses <= 3); } catch (InterruptedException e) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java index 445315f34388..f6f5e2a812fb 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java @@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.junit.Assert; import org.junit.Test; @@ -86,7 +87,8 @@ public void speculateTest() throws Throwable ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(TABLE); DecoratedKey dk = cfs.decorateKey(bytes(PK_VALUE)); ReplicaPlan.ForTokenRead plan = ReplicaPlans.forRead(keyspace, dk.getToken(), null, - QUORUM, cfs.metadata().params.speculativeRetry); + QUORUM, cfs.metadata().params.speculativeRetry, + ReadCoordinator.DEFAULT); return plan.contacts().endpointList().stream().map(InetSocketAddress::getAddress).collect(Collectors.toList()); }, null); logger.info("Replicas provided in a read plan contacts: {}", readPlanEndpoints); diff --git a/test/distributed/org/apache/cassandra/distributed/test/SSTableIdGenerationTest.java b/test/distributed/org/apache/cassandra/distributed/test/SSTableIdGenerationTest.java index c45108eea3a8..75b0806899d9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/SSTableIdGenerationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/SSTableIdGenerationTest.java @@ -36,6 +36,7 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.compaction.AbstractCompactionStrategy; import org.apache.cassandra.db.compaction.LeveledCompactionStrategy; @@ -144,8 +145,14 @@ public void testRestartWithUUIDDisabled() throws IOException .withConfig(config -> config.set(ENABLE_UUID_FIELD_NAME, true)) .start())) { - cluster.disableAutoCompaction(KEYSPACE); cluster.schemaChange(createTableStmt(KEYSPACE, "tbl", null)); + for (IInvokableInstance instance : cluster) + { + instance.runOnInstance(() -> { + for (ColumnFamilyStore cs : Keyspace.open(KEYSPACE).getColumnFamilyStores()) + cs.disableAutoCompaction(); + }); + } createSSTables(cluster.get(1), KEYSPACE, "tbl", 1, 2); assertSSTablesCount(cluster.get(1), 0, 2, KEYSPACE, "tbl"); verfiySSTableActivity(cluster, false); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java index 2e26659243a6..c6c941328553 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java @@ -36,17 +36,23 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.config.Config.LWTStrategy; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Pair; import static com.google.common.collect.Iterators.toArray; import static java.lang.String.format; import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; import static org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.SERIAL; import static org.apache.cassandra.distributed.shared.AssertUtils.row; /** @@ -81,24 +87,31 @@ public class ShortReadProtectionTest extends TestBaseImpl @Parameterized.Parameter(2) public boolean paging; - @Parameterized.Parameters(name = "{index}: read_cl={0} flush={1} paging={2}") + @Parameterized.Parameter(3) + public Pair transactionStrategies; + + @Parameterized.Parameters(name = "{index}: read_cl={0} flush={1} paging={2}, transactionStrategies={3}") public static Collection data() { List result = new ArrayList<>(); - for (ConsistencyLevel readConsistencyLevel : Arrays.asList(ALL, QUORUM)) - for (boolean flush : BOOLEANS) - for (boolean paging : BOOLEANS) - result.add(new Object[]{ readConsistencyLevel, flush, paging }); + for (Pair transactionStrategies : Arrays.asList(Pair.create(LWTStrategy.accord, NonSerialWriteStrategy.migration), Pair.create(LWTStrategy.migration, NonSerialWriteStrategy.normal))) + for (ConsistencyLevel readConsistencyLevel : Arrays.asList(ALL, QUORUM, SERIAL)) + for (boolean flush : BOOLEANS) + for (boolean paging : BOOLEANS) + result.add(new Object[]{ readConsistencyLevel, flush, paging, transactionStrategies}); return result; } @BeforeClass public static void setupCluster() throws IOException { + // TODO this blocks some of the original testing of SRP invoking BRR since it is BRRing through Accord + // but maybe that is out of scope and is covered by the dedicated BRR tests? cluster = init(Cluster.build() .withNodes(NUM_NODES) .withConfig(config -> config.set("hinted_handoff_enabled", false)) .start()); + cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @AfterClass @@ -111,6 +124,14 @@ public static void teardownCluster() @Before public void setupTester() { + String lwtStrategy = transactionStrategies.left.toString(); + String nonSerialWriteStrategy = transactionStrategies.right.toString(); + cluster.forEach(node -> { + node.runOnInstance(() -> { + DatabaseDescriptor.setLWTStrategy(LWTStrategy.valueOf(lwtStrategy)); + DatabaseDescriptor.setNonSerialWriteStrategy(NonSerialWriteStrategy.valueOf(nonSerialWriteStrategy)); + }); + }); tester = new Tester(readConsistencyLevel, flush, paging); } @@ -427,7 +448,7 @@ private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean pag this.paging = paging; qualifiedTableName = KEYSPACE + ".t_" + seqNumber.getAndIncrement(); - assert readConsistencyLevel == ALL || readConsistencyLevel == QUORUM + assert readConsistencyLevel == ALL || readConsistencyLevel == QUORUM || readConsistencyLevel == SERIAL : "Only ALL and QUORUM consistency levels are supported"; } @@ -485,12 +506,12 @@ private Tester toNode3(String... queries) /** * Internally runs the specified write queries in the specified node. If the {@link #readConsistencyLevel} is - * QUORUM the write will also be internally done in the next replica in the ring, to simulate a QUORUM write. + * QUORUM/SERIAL the write will also be internally done in the next replica in the ring, to simulate a QUORUM/SERIAL write. */ private Tester toNode(int node, String... queries) { IInvokableInstance replica = cluster.get(node); - IInvokableInstance nextReplica = readConsistencyLevel == QUORUM + IInvokableInstance nextReplica = (readConsistencyLevel == QUORUM || readConsistencyLevel == SERIAL) ? cluster.get(node == NUM_NODES ? 1 : node + 1) : null; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 8a515bb614d4..db8f1c21d9a2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -20,28 +20,34 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.stream.Collectors; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; - -import org.apache.cassandra.distributed.Cluster; -import org.assertj.core.api.Assertions; - +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.primitives.Unseekables; import accord.topology.Topologies; +import org.apache.cassandra.config.Config.NonSerialWriteStrategy; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.functions.types.utils.Bytes; import org.apache.cassandra.db.marshal.Int32Type; @@ -49,19 +55,25 @@ import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.shared.AssertUtils; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.utils.ByteBufferUtil; +import org.assertj.core.api.Assertions; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static java.util.Collections.singletonList; import static org.apache.cassandra.cql3.CQLTester.row; import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +@RunWith(Parameterized.class) public class AccordCQLTest extends AccordTestBase { private static final Logger logger = LoggerFactory.getLogger(AccordCQLTest.class); @@ -72,11 +84,36 @@ protected Logger logger() return logger; } + @Parameterized.Parameter + public String nonSerialWriteStrategyName; + + NonSerialWriteStrategy nonSerialWriteStrategy; + + @Parameterized.Parameters(name = "nonSerialWriteStrategy={0}") + public static Collection data() + { + return ImmutableList.of(new Object[] {NonSerialWriteStrategy.accord.toString()}, new Object[] {NonSerialWriteStrategy.migration.toString()}); + } + + @Before + public void setNonSerialWriteStrategy() + { + nonSerialWriteStrategy = NonSerialWriteStrategy.valueOf(nonSerialWriteStrategyName); + String nonSerialWriteStrategyName = this.nonSerialWriteStrategyName; + SHARED_CLUSTER.forEach(node -> { + node.runOnInstance(() -> { + DatabaseDescriptor.setNonSerialWriteStrategy(NonSerialWriteStrategy.valueOf(nonSerialWriteStrategyName)); + }); + }); + } + @BeforeClass public static void setupClass() throws IOException { - AccordTestBase.setupClass(); + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord") + .set("non_serial_write_strategy", "migration")), 2); SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @Test @@ -138,7 +175,8 @@ public void testMultipleShards() throws Exception { String keyspace = "multipleShards"; String currentTable = keyspace + ".tbl"; - List ddls = Arrays.asList("CREATE KEYSPACE " + keyspace + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}", + List ddls = Arrays.asList("DROP KEYSPACE IF EXISTS " + keyspace + ";", + "CREATE KEYSPACE " + keyspace + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}", "CREATE TABLE " + currentTable + " (k blob, c int, v int, primary key (k, c))"); List tokens = tokens(); List keys = tokensToKeys(tokens); @@ -354,7 +392,12 @@ private void checkUpdateStatic(Cluster cluster, String update, int key, String e private void assertResultsFromAccordMatches(Cluster cluster, String accordRead, String simpleRead, int key) { - Object[][] simpleReadResult = cluster.get(1).executeInternal(simpleRead, key); + Object[][] simpleReadResult; + if (nonSerialWriteStrategy.ignoresSuppliedConsistencyLevel) + // With accord non-SERIAL write strategy the commit CL is effectively ANY so we need to read at SERIAL + simpleReadResult = cluster.coordinator(1).execute(simpleRead, ConsistencyLevel.SERIAL, key); + else + simpleReadResult = cluster.get(1).executeInternal(simpleRead, key); Object[][] accordReadResult = executeWithRetry(cluster, accordRead, key).toObjectArrays(); Assertions.assertThat(withRemovedNullOnlyRows(accordReadResult)).isEqualTo(withRemovedNullOnlyRows(simpleReadResult)); @@ -587,10 +630,56 @@ private void testScalarShorthandOperation(int startingValue, String operation, i String check = "BEGIN TRANSACTION\n" + " SELECT v FROM " + currentTable + " WHERE k = 1;\n" + "COMMIT TRANSACTION"; - assertRowEqualsWithPreemptedRetry(cluster, new Object[] { endingvalue }, check); + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, check); + }); + } + + @Test + public void testConstantNonStaticRowReadBeforeUpdate() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 2);\n" + + " SELECT row1.v;\n" + + " UPDATE " + currentTable + " SET v += 1 WHERE k = 1 AND c = 2;\n" + + "COMMIT TRANSACTION"; + assertRowEquals(cluster, new Object[] { 3 }, update); + + String check = "BEGIN TRANSACTION\n" + + " SELECT v FROM " + currentTable + " WHERE k = 1 AND c = 2;\n" + + "COMMIT TRANSACTION"; + assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); + }); + } + + @Test + public void testRangeDeletion() throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))", + cluster -> + { + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 3, ?)", ConsistencyLevel.ALL, 4); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 4, ?)", ConsistencyLevel.ALL, 5); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 2);\n" + + " SELECT row1.v;\n" + + " DELETE FROM " + currentTable + " WHERE k = 1 AND c >=3 AND c <= 4;\n" + + "COMMIT TRANSACTION"; + assertRowEquals(cluster, new Object[] { 3 }, update); + + Object[][] check = cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1;", ConsistencyLevel.SERIAL); + assertArrayEquals(new Object[] { 1, 2, 3 }, check[0]); + assertEquals(1, check.length); }); } + @Test public void testPartitionKeyReferenceCondition() throws Exception { @@ -2434,7 +2523,9 @@ public void testMultiKeyQueryAndInsert() throws Throwable @Test public void demoTest() throws Throwable { + SHARED_CLUSTER.schemaChange("DROP KEYSPACE IF EXISTS demo_ks;"); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE demo_ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2};"); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("demo_ks")); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_docs ( org_name text, doc_id int, contents_version int static, title text, permissions int, PRIMARY KEY (org_name, doc_id) );"); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) );"); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) );"); @@ -2519,6 +2610,8 @@ public void testCASAndSerialRead() throws Exception cluster -> { ICoordinator coordinator = cluster.coordinator(1); int startingAccordCoordinateCount = getAccordCoordinateCount(); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); coordinator.execute("INSERT INTO " + currentTable + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); @@ -2533,8 +2626,118 @@ public void testCASAndSerialRead() throws Exception assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET s = 6 WHERE id = 1 IF s = 5"); assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); + + // Test that read before write works with CAS + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 6, 7); + + // Check range deletion works + coordinator.execute("INSERT INTO " + currentTable + " (id, c, v, s) VALUES (1, 2, 6, 7);", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + currentTable + " (id, c, v) VALUES (1, 3, 3);", ConsistencyLevel.ALL); + assertRowEquals(cluster, new Object[]{true}, "BEGIN BATCH \n" + + "UPDATE " + currentTable + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS; \n" + + "DELETE FROM " + currentTable + " WHERE id = 1 AND c > 0 AND c < 10; \n" + + "APPLY BATCH;"); + Object[][] rangeDeletionCheck = coordinator.execute("SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1", ConsistencyLevel.SERIAL); + assertArrayEquals(new Object[] { 1, 2, 7, 8 }, rangeDeletionCheck[0]); + assertEquals(1, rangeDeletionCheck.length); + // Make sure all the consensus using queries actually were run on Accord - assertEquals( 11, getAccordCoordinateCount() - startingAccordCoordinateCount); - }); + if (nonSerialWriteStrategy.writesThroughAccord) + assertEquals( 20, getAccordCoordinateCount() - startingAccordCoordinateCount); + else + // Non-serial writes don't go through Accord in these modes + assertEquals( 17, getAccordCoordinateCount() - startingAccordCoordinateCount); + }); + } + + // Reproduces some bugs that simulator finds + @Test + public void testCASSimulatorLite() throws Exception + { + test("CREATE TABLE " + currentTable + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + cluster -> { + ICoordinator coordinator = cluster.coordinator(1); + coordinator.execute("INSERT INTO " + currentTable + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + + ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); + ExecutorService es = Executors.newCachedThreadPool(); + List> futures = new ArrayList<>(); + for (int ii = 0; ii < 10; ii++) + { + int id = ii; + futures.add(es.submit(() -> coordinator.execute("UPDATE " + currentTable + " SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS", ConsistencyLevel.ALL, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id))), 1))); + } + for (Future f : futures) + f.get(); + + Object[][] result = coordinator.execute("SELECT pk, count, seq1, seq2 FROM " + currentTable + " WHERE pk = 1", ConsistencyLevel.SERIAL); + + int[] seq1 = Arrays.stream(((String) result[0][2]).split(",")) + .filter(s -> !s.isEmpty()) + .mapToInt(Integer::parseInt) + .toArray(); + int[] seq2 = ((ArrayList) result[0][3]).stream().mapToInt(x -> x).toArray(); + logger.info("String append of ids executed {}", Arrays.toString(seq1)); + logger.info("List append of ids executed {}", Arrays.toString(seq2)); + assertArrayEquals("History doesn't match between the two columns", seq1, seq2); + }); + } + + @Test + public void testTransactionCasSimulatorLite() throws Exception + { + test("CREATE TABLE " + currentTable + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + cluster -> + { + ICoordinator coordinator = cluster.coordinator(1); + coordinator.execute("INSERT INTO " + currentTable + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + + ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); + ExecutorService es = Executors.newCachedThreadPool(); + List> futures = new ArrayList<>(); + for (int ii = 0; ii < 10; ii++) + { + int id = ii; + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + currentTable + " WHERE pk = 1);\n" + + " UPDATE " + currentTable + " SET count += 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk=1;\n" + + "COMMIT TRANSACTION"; + futures.add(es.submit(() -> coordinator.executeWithResult(update, ConsistencyLevel.ANY, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id)))))); + } + for (Future f : futures) + f.get(); + + String check = "BEGIN TRANSACTION\n" + + " SELECT * FROM " + currentTable + " WHERE pk = 1;\n" + + "COMMIT TRANSACTION"; + Object[][] result = coordinator.execute(check, ConsistencyLevel.ALL); + + int[] seq1 = Arrays.stream(((String) result[0][2]).split(",")) + .filter(s -> !s.isEmpty()) + .mapToInt(Integer::parseInt) + .toArray(); + int[] seq2 = ((ArrayList) result[0][3]).stream().mapToInt(x -> x).toArray(); + logger.info("String append of ids executed {}", Arrays.toString(seq1)); + logger.info("List append of ids executed {}", Arrays.toString(seq2)); + assertArrayEquals("History doesn't match between the two columns", seq1, seq2); + } + ); + } + + @Test + public void testSerialReadDescending() throws Throwable + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY(k, c))", + cluster -> { + ICoordinator coordinator = cluster.coordinator(1); + for (int i = 1; i <= 10; i++) + coordinator.execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + } + ); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java index 06b44805b365..827eebf6f9ac 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java @@ -39,6 +39,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.AssertionUtils; import org.assertj.core.api.Assertions; @@ -78,7 +79,7 @@ public void shouldHideAccordTransactions() throws IOException assertEquals("No Accord virtual tables should exist", Collections.emptyList(), tables); // Make sure we throw if someone tries to coordinate a transaction against the no-op service: - Assertions.assertThatThrownBy(() -> cluster.get(1).callOnInstance(() -> AccordService.instance().coordinate(null, null))) + Assertions.assertThatThrownBy(() -> cluster.get(1).callOnInstance(() -> AccordService.instance().coordinate(null, null, Dispatcher.RequestTime.forImmediateExecution()))) .isInstanceOf(UnsupportedOperationException.class); } } @@ -91,7 +92,7 @@ public void shouldFailOnAccordMigrationWithAccordDisabled() throws IOException .withoutVNodes() .withConfig(c -> c.with(Feature.NETWORK) .set("accord.enabled", "false") - .set("legacy_paxos_strategy", "accord")).createWithoutStarting()) + .set("lwt_strategy", "accord")).createWithoutStarting()) { Assertions.assertThatThrownBy(() -> cluster.startup()) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java index ba9e1b801ce2..7315df858a49 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -18,11 +18,17 @@ package org.apache.cassandra.distributed.test.accord; +import java.io.IOException; +import java.util.function.Function; + +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.impl.SimpleProgressLog; import accord.messages.Commit; +import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IMessageFilters; import org.apache.cassandra.distributed.impl.Instance; import org.apache.cassandra.net.Message; @@ -38,9 +44,16 @@ protected Logger logger() return logger; } + @BeforeClass + public static void setUp() throws IOException + { + AccordTestBase.setupCluster(Function.identity(), 2); + } + @Test public void testRecovery() throws Exception { + pauseSimpleProgressLog(); test(cluster -> { IMessageFilters.Filter lostApply = cluster.filters().verbs(Verb.ACCORD_APPLY_REQ.id).drop(); IMessageFilters.Filter lostCommit = cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).to(2).drop(); @@ -89,12 +102,13 @@ public void testRecovery() throws Exception @Test public void testLostCommitReadTriggersFallbackRead() throws Exception { + pauseSimpleProgressLog(); test(cluster -> { // It's expected that the required Read will happen regardless of whether this fails to return a read cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).messagesMatching((from, to, iMessage) -> cluster.get(from).callOnInstance(() -> { Message msg = Instance.deserializeMessage(iMessage); if (msg.payload instanceof Commit) - return ((Commit) msg.payload).read != null; + return ((Commit) msg.payload).readData != null; return false; })).drop(); @@ -113,4 +127,10 @@ public void testLostCommitReadTriggersFallbackRead() throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check, 0, 0); }); } + + private void pauseSimpleProgressLog() + { + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = true); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java new file mode 100644 index 000000000000..6022fdda5595 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import org.junit.Test; + +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.statements.ModificationStatement; +import org.apache.cassandra.cql3.statements.SelectStatement; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.impl.RowUtil; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.distributed.util.QueryResultUtil; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.accord.AccordService; + +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.api.Feature.NETWORK; + +public class AccordInteropReadTest extends TestBaseImpl +{ + + private static void localWrite(String s) + { + ModificationStatement stmt = (ModificationStatement) QueryProcessor.parseStatement(s).prepare(ClientState.forInternalCalls()); + stmt.executeLocally(QueryState.forInternalCalls(), QueryOptions.DEFAULT); + } + + private static SimpleQueryResult localRead(String s) + { + SelectStatement stmt = (SelectStatement) QueryProcessor.parseStatement(s).prepare(ClientState.forInternalCalls()); + return RowUtil.toQueryResult(stmt.executeLocally(QueryState.forInternalCalls(), QueryOptions.DEFAULT)); + } + + private static Object[] obj(Object... values) + { + return values; + } + + @Test + public void serialReadTest() throws Throwable + { + try (Cluster cluster = builder().withNodes(3) + .withConfig(config -> config.with(GOSSIP).with(NETWORK) + .set("non_serial_write_strategy", "mixed") + .set("lwt_strategy", "accord")) + .start()) + { + cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':3}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, PRIMARY KEY (k, c))"); + cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("ks")); + + cluster.get(1).runOnInstance(() -> localWrite("INSERT INTO ks.tbl (k, c, v) VALUES (1, 1, 1)")); + cluster.get(2).runOnInstance(() -> localWrite("INSERT INTO ks.tbl (k, c, v) VALUES (1, 1, 2)")); + cluster.get(3).shutdown(); + cluster.get(1).runOnInstance(() -> QueryResultUtil.assertThat(localRead("SELECT * FROM ks.tbl WHERE k=1")).isEqualTo(obj(obj(1, 1, 1)))); + cluster.get(2).runOnInstance(() -> QueryResultUtil.assertThat(localRead("SELECT * FROM ks.tbl WHERE k=1")).isEqualTo(obj(obj(1, 1, 2)))); + + + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT v FROM ks.tbl WHERE k=0 AND c=0);\n" + + " SELECT row1.v;\n" + + " IF row1 IS NULL THEN\n" + + " INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1);\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + SimpleQueryResult result = cluster.coordinator(1).executeWithResult("SELECT * FROM ks.tbl WHERE k=1", ConsistencyLevel.SERIAL); + QueryResultUtil.assertThat(result).isEqualTo(obj(obj(1, 1, 2))); + cluster.get(1).runOnInstance(() -> QueryResultUtil.assertThat(localRead("SELECT * FROM ks.tbl WHERE k=1")).isEqualTo(obj(obj(1, 1, 2)))); + cluster.get(2).runOnInstance(() -> QueryResultUtil.assertThat(localRead("SELECT * FROM ks.tbl WHERE k=1")).isEqualTo(obj(obj(1, 1, 2)))); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java new file mode 100644 index 000000000000..74c64c3b138e --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.service.accord.AccordService; + +public class AccordInteroperabilityTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordInteroperabilityTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord") + .set("non_serial_write_strategy", "accord")), 3); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); + } + + @Test + public void testSerialReadDescending() throws Throwable + { + test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY(k, c))", + cluster -> { + ICoordinator coordinator = cluster.coordinator(1); + for (int i = 1; i <= 10; i++) + coordinator.execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + } + ); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 4db878da47c2..496e8d08e4ca 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -60,7 +60,7 @@ protected Logger logger() @BeforeClass public static void setupClass() throws IOException { - AccordTestBase.setupClass(); + AccordTestBase.setupCluster(Function.identity(), 2); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); for (int i = 0; i < SHARED_CLUSTER.size(); i++) // initialize metrics logger.trace(SHARED_CLUSTER.get(i + 1).callOnInstance(() -> AccordMetrics.readMetrics.toString() + AccordMetrics.writeMetrics.toString())); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java new file mode 100644 index 000000000000..69c550f633d1 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -0,0 +1,655 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; +import javax.annotation.Nonnull; + +import com.google.common.collect.ImmutableList; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.type.TypeReference; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.Config.PaxosVariant; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.BufferDecoratedKey; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.SimpleBuilders.PartitionUpdateBuilder; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.paxos.Ballot; +import org.apache.cassandra.service.paxos.Ballot.Flag; +import org.apache.cassandra.service.paxos.BallotGenerator; +import org.apache.cassandra.service.paxos.Commit.Agreed; +import org.apache.cassandra.service.paxos.Commit.Proposal; +import org.apache.cassandra.service.paxos.PaxosState; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.JsonUtils; +import org.apache.cassandra.utils.PojoToString; +import org.yaml.snakeyaml.Yaml; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.lang.String.format; +import static java.util.Collections.emptyList; +import static org.apache.cassandra.Util.spinUntilSuccess; +import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; +import static org.apache.cassandra.db.SystemKeyspace.PAXOS; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.ANY; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.SERIAL; +import static org.apache.cassandra.schema.SchemaConstants.SYSTEM_KEYSPACE_NAME; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; +import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; +import static org.assertj.core.api.Fail.fail; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/* + * This test suite is intended to serve as an integration test with some pretty good visibility into actual execution + * that can run quickly, and make sure all the right steps are running during migration. + * + * For correctness related to wrong/right answers we rely on simulator to validate. + */ +public class AccordMigrationTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMigrationTest.class); + + private static final int CLUSTERING_VALUE = 2; + + private static final String TABLE_FMT = "CREATE TABLE %s (id int, c int, v int, s int static, PRIMARY KEY ((id), c));"; + + private static final String CAS_FMT = "UPDATE %s SET v = 4 WHERE id = ? AND c = %d IF v = 42"; + + private static IPartitioner partitioner; + + private static Token minToken; + + private static Token maxToken; + + private static Token midToken; + + private static Token upperMidToken; + + private static Token lowerMidToken; + + private static ICoordinator coordinator; + + // To create a precise repair where the repaired range is fully contained in a locally replicated range + // we need to align with this token. The local ranges are (9223372036854775805,-1] and (-1,9223372036854775805] + // No idea why the partitioner creates such an + private Token maxAlignedWithLocalRanges = new LongToken(9223372036854775805L); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + ServerTestUtils.daemonInitialization(); + // Otherwise repair complains if you don't specify a keyspace + CassandraRelevantProperties.SYSTEM_TRACES_DEFAULT_RF.setInt(3); + AccordTestBase.setupCluster(builder -> + builder.appendConfig(config -> + config.set("paxos_variant", PaxosVariant.v2.name()) + .set("non_serial_write_strategy", "migration")), + 3); + partitioner = FBUtilities.newPartitioner(SHARED_CLUSTER.get(1).callsOnInstance(() -> DatabaseDescriptor.getPartitioner().getClass().getSimpleName()).call()); + StorageService.instance.setPartitionerUnsafe(partitioner); + ServerTestUtils.prepareServerNoRegister(); + minToken = partitioner.getMinimumToken(); + maxToken = partitioner.getMaximumToken(); + midToken = partitioner.midpoint(minToken, maxToken); + upperMidToken = partitioner.midpoint(midToken, maxToken); + lowerMidToken = partitioner.midpoint(minToken, midToken); + coordinator = SHARED_CLUSTER.coordinator(1); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); + } + + @AfterClass + public static void tearDownClass() + { + StorageService.instance.resetPartitionerUnsafe(); + } + + @After + public void tearDown() throws Exception + { + super.tearDown(); + // Reset migration state + forEach(() -> { + ConsensusRequestRouter.resetInstance(); + ConsensusKeyMigrationState.reset(); + }); + SHARED_CLUSTER.get(1).runOnInstance(() -> { + ConsensusTableMigrationState.reset(); + }); + SHARED_CLUSTER.coordinators().forEach(coordinator -> coordinator.execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, CONSENSUS_MIGRATION_STATE), ALL)); + SHARED_CLUSTER.coordinators().forEach(coordinator -> coordinator.execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, PAXOS), ALL)); + } + + private static String nodetool(ICoordinator coordinator, String... commandAndArgs) + { + NodeToolResult nodetoolResult = coordinator.instance().nodetoolResult(commandAndArgs); + if (!nodetoolResult.getStdout().isEmpty()) + System.out.println(nodetoolResult.getStdout()); + if (!nodetoolResult.getStderr().isEmpty()) + System.err.println(nodetoolResult.getStderr()); + if (nodetoolResult.getError() != null) + fail("Failed nodetool " + Arrays.asList(commandAndArgs), nodetoolResult.getError()); + // TODO why does standard out end up in stderr in nodetool? + return nodetoolResult.getStdout(); + } + + private static int getKeyBetweenTokens(Token left, Token right) + { + return getKeysBetweenTokens(left, right).next(); + } + + private static Iterator getKeysBetweenTokens(Token left, Token right) + { + return new Iterator() + { + int candidate = 0; + @Override + public boolean hasNext() + { + return true; + } + + @Override + public Integer next() + { + for (int i = 0; i < 1_000_000; i++) + { + int value = candidate; + candidate++; + if (partitioner.getToken(ByteBufferUtil.bytes(value)).compareTo(right) < 0 && partitioner.getToken(ByteBufferUtil.bytes(value)).compareTo(left) > 0) + return value; + } + throw new IllegalStateException("Gave up after 1 million attempts"); + } + }; + } + + /* + * Force routing a request to Paxos even after a range has been marked migrating to simulate + * a race between updating cluster metadata and making a routing decision to a specific consensus + * protocol. Paxos should still detect the routing change at two points. After running the promise phase + * (round of messaging might discover a new epoch) and during the accept phase (might not get a majority due + * to rejects caused by acceptors refusing due to migration). + * + * This is used directly to test that begin rejects after discovering a migration, and indirectly in + * PaxosToAccordMigrationNotHappeningUpToAccept. + */ + public static class RoutesToPaxosOnce extends ConsensusRequestRouter + { + boolean routed; + + @Override + protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + if (routed) + return super.routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + routed = true; + return paxosV2; + } + } + + /* + * To allow for testing of Paxos we want to force begin to succeed, but accept to fail + * with a retry on new protocol reject. + */ + public static class PaxosToAccordMigrationNotHappeningUpToBegin extends RoutesToPaxosOnce + { + @Override + public boolean isKeyInMigratingOrMigratedRangeDuringPaxosBegin(TableId tableId, DecoratedKey key) + { + return false; + } + } + + public static class PaxosToAccordMigrationNotHappeningUpToAccept extends PaxosToAccordMigrationNotHappeningUpToBegin + { + @Override + public boolean isKeyInMigratingOrMigratedRangeDuringPaxosAccept(TableId tableId, DecoratedKey key) + { + return false; + } + } + + public static class RoutesToAccordOnce extends ConsensusRequestRouter + { + boolean routed; + + @Override + protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + if (routed) + return super.routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + routed = true; + return ConsensusRoutingDecision.accord; + } + } + + /* + * Helper to invoke a query and assert that the right metrics change indicating the correct + * paths were taken to execute the query during migration + */ + private static void assertTargetAccordWrite(Consumer query, int coordinatorIndex, int key, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedCasBeginRejects, int expectedCasAcceptRejects) + { + int startingWriteCount = getAccordWriteCount(coordinatorIndex); + int startingCasWriteCount = getCasWriteCount(coordinatorIndex); + int startingKeyMigrationCount = getKeyMigrationCount(coordinatorIndex); + int startingCasWriteBeginRejects = getCasWriteBeginRejects(coordinatorIndex); + int startingCasWriteAcceptRejects = getCasWriteAcceptRejects(coordinatorIndex); + query.accept(key); + assertEquals("Accord writes", expectedAccordWriteCount, getAccordWriteCount(coordinatorIndex) - startingWriteCount); + assertEquals("CAS writes", expectedCasWriteCount, getCasWriteCount(coordinatorIndex) - startingCasWriteCount); + assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); + assertEquals("CAS Begin rejects", expectedCasBeginRejects, getCasWriteBeginRejects(coordinatorIndex) - startingCasWriteBeginRejects); + assertEquals("CAS Accept rejects", expectedCasAcceptRejects, getCasWriteAcceptRejects(coordinatorIndex) - startingCasWriteAcceptRejects); + } + + private static Object[][] assertTargetAccordRead(Function query, int coordinatorIndex, int key, int expectedAccordReadCount, int expectedCasPrepareCount, int expectedKeyMigrationCount, int expectedCasReadBeginRejects, int expectedCasReadAcceptRejects) + { + int startingReadCount = getAccordReadCount(coordinatorIndex); + int startingCasPrepareCount = getCasPrepareCount(coordinatorIndex); + int startingKeyMigrationCount = getKeyMigrationCount(coordinatorIndex); + int startingCasReadBeginRejects = getCasReadBeginRejects(coordinatorIndex); + int startingCasReadAcceptRejects = getCasReadAcceptRejects(coordinatorIndex); + Object[][] result = query.apply(key); + assertEquals("Accord reads", expectedAccordReadCount, getAccordReadCount(coordinatorIndex) - startingReadCount); + assertEquals("CAS prepares", expectedCasPrepareCount, getCasPrepareCount(coordinatorIndex) - startingCasPrepareCount); + assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); + assertEquals("CAS Begin rejects", expectedCasReadBeginRejects, getCasReadBeginRejects(coordinatorIndex) - startingCasReadBeginRejects); + assertEquals("CAS Accept rejects", expectedCasReadAcceptRejects, getCasReadAcceptRejects(coordinatorIndex) - startingCasReadAcceptRejects); + return result; + } + + private static void assertTargetPaxosWrite(Consumer query, int coordinatorIndex, int key, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedMigrationRejects, int expectedSkippedReads) + { + int startingWriteCount = getAccordWriteCount(coordinatorIndex); + int startingCasWriteCount = getCasWriteCount(coordinatorIndex); + int startingKeyMigrationCount = getKeyMigrationCount(coordinatorIndex); + int startingMigrationRejectsCount = getAccordMigrationRejects(coordinatorIndex); + int startingSkippedReadsCount = getAccordMigrationSkippedReads(); + query.accept(key); + assertEquals("Accord writes", expectedAccordWriteCount, getAccordWriteCount(coordinatorIndex) - startingWriteCount); + assertEquals("CAS writes", expectedCasWriteCount, getCasWriteCount(coordinatorIndex) - startingCasWriteCount); + assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); + assertEquals("Accord migration rejects", expectedMigrationRejects, getAccordMigrationRejects(coordinatorIndex) - startingMigrationRejectsCount); + assertEquals("Accord skipped reads", expectedSkippedReads, getAccordMigrationSkippedReads() - startingSkippedReadsCount); + } + + @Test + public void testPaxosToAccordCAS() throws Exception + { + test(format(TABLE_FMT, currentTable), + cluster -> { + String casCQL = format(CAS_FMT, currentTable, CLUSTERING_VALUE); + Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); + Consumer runCasApplies = key -> assertRowEquals(cluster, new Object[]{true}, casCQL, key); + Consumer runCasOnSecondNode = key -> assertEquals( "[applied]", cluster.coordinator(2).executeWithResult(casCQL, ANY, key).names().get(0)); + String tableName = currentTable.split("\\.")[1]; + int migratingKey = getKeyBetweenTokens(midToken, maxToken); + int notMigratingKey = getKeyBetweenTokens(minToken, midToken); + Range migratingRange = new Range(midToken, maxToken); + List> migratingRanges = ImmutableList.of(migratingRange); + + // Not actually migrating yet so should do nothing special + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 0, 1, 0, 0, 0); + + // Mark ranges migrating and check migration state is correct + nodetool(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, tableName); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); + + // Should be routed directly to Accord, and perform key migration, as well as key migration read in Accord + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 1, 0, 0); + + // Should not repeat key migration, and should still do a migration read in Accord + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 0, 0, 0); + + // Should run on Paxos since it is not in the migrating range + assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, 0, 1, 0, 0, 0); + + // Check that the coordinator on the other node also has saved that the key migration was performed + // and runs the query on Accord immediately without key migration + assertTargetAccordWrite(runCasOnSecondNode, 2, migratingKey, 1, 0, 0, 0, 0); + + // Forced repair while a node is down shouldn't work, use repair instead of finish-migration because repair exposes --force + // and regular Cassandra repairs are eligible to drive migration so it's important they check --force and down nodes + InetAddressAndPort secondNodeBroadcastAddress = InetAddressAndPort.getByAddress(cluster.get(2).broadcastAddress()); + cluster.get(1).runOnInstance(() -> { + EndpointState endpointState = Gossiper.instance.getEndpointStateForEndpoint(secondNodeBroadcastAddress); + Gossiper.runInGossipStageBlocking(() -> Gossiper.instance.markDead(secondNodeBroadcastAddress, endpointState)); + }); + nodetool(coordinator, "repair", "--force"); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); + cluster.get(1).runOnInstance(() -> { + EndpointState endpointState = Gossiper.instance.getEndpointStateForEndpoint(secondNodeBroadcastAddress); + Gossiper.runInGossipStageBlocking(() -> Gossiper.instance.realMarkAlive(secondNodeBroadcastAddress, endpointState)); + }); + + // Full repair should complete the migration and update the metadata, adding --force when nodes are up should be fine + nodetool(coordinator, "repair", "--force"); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, migratingRanges, emptyList(), 0); + + // Should run on Accord, and not perform key migration nor should it need to perform a migration read in Accord now that it is repaired + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 0, 0, 0); + + // Should run on Paxos, and not perform key migration + assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, 0, 1, 0, 0, 0); + + // Pivot to testing repair with a subrange of the migrating range as well as key migration + // Will use the unmigrated range between lowerMidToken and midToken + nodetool(coordinator, "consensus_admin", "begin-migration", "-st", lowerMidToken.toString(), "-et", midToken.toString(), "-tp", "accord", KEYSPACE, tableName); + + // Generate several keys to test with instead of resetting key state + Iterator testingKeys = getKeysBetweenTokens(lowerMidToken, midToken); + migratingKey = testingKeys.next(); + + // Check that Paxos repair is run and actually repairs a transaction that was accepted, but not committed + String ballotString = BallotGenerator.Global.nextBallot(Flag.GLOBAL).toString(); + saveAcceptedPaxosProposal(tableName, ballotString, migratingKey); + // PaxosRepair will have inserted a condition matching row, so it can apply, demonstrating repair and + // key migration occurred + assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 0, 1, 0, 0); + + // This will force the request to run on Paxos up to Accept + // and the accept will be rejected at both nodes and we are certain we need to retry the transaction + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); + // Update inserted row so the condition can apply, if the condition check doesn't apply + // then it won't get to propose/accept + migratingKey = testingKeys.next(); + Consumer makeCASApply = key -> cluster.coordinator(1).execute("UPDATE " + currentTable + " SET v = 42 WHERE id = ? AND c = ?", ALL, key, CLUSTERING_VALUE); + makeCASApply.accept(migratingKey); + assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 1, 1, 0, 1); + + // One node will now accept the other will reject and we are uncertain if we should retry the transaction + // and should surface that as a timeout exception + migratingKey = testingKeys.next(); + makeCASApply.accept(migratingKey); + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToAccept())); + try + { + runCasNoApply.accept(migratingKey); + fail("Should have thrown timeout exception"); + } + catch (Throwable t) + { + if (!t.getClass().getName().equals("org.apache.cassandra.exceptions.CasWriteTimeoutException")) + throw new RuntimeException(t); + } + + // Test that if we find out about a migration from the prepare phase Paxos.begin we + // retry it on Accord + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new RoutesToPaxosOnce())); + // Should exit Paxos from begin, key migration should occur because it's a new key, and Accord will need to do a migration read + assertTargetAccordWrite(runCasNoApply, 1, testingKeys.next(), 1, 1, 1, 1, 0); + + // Now do two repairs to complete the migration repair, and we are done with black box integration testing + // First repair is a range smack dab in the middle + Token startTokenForRepair = partitioner.midpoint(lowerMidToken, midToken); + Token endTokenForRepair = partitioner.midpoint(startTokenForRepair, midToken); + nodetool(coordinator, "consensus_admin", "finish-migration", "-st", startTokenForRepair.toString(), "-et", endTokenForRepair.toString()); + List> migratedRanges = ImmutableList.of(new Range<>(startTokenForRepair, endTokenForRepair), migratingRange); + List> midMigratingRanges = ImmutableList.of(new Range<>(lowerMidToken, startTokenForRepair), new Range<>(endTokenForRepair, midToken)); + List> migratingAndMigratedRanges = ImmutableList.of(new Range<>(lowerMidToken, maxToken)); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, migratedRanges, midMigratingRanges, 1); + + nodetool(coordinator, "consensus_admin", "finish-migration"); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, migratingAndMigratedRanges, emptyList(), 0); + }); + } + + /* + * Read has a few code paths that are separate from CAS that need to be tested + * such as switching consensus protocol, rejecting read during accept, and throwing + * timeout exception if uncertain about side effects + */ + @Test + public void testPaxosToAccordSerialRead() throws Exception + { + test(format(TABLE_FMT, currentTable), + cluster -> { + String tableName = currentTable.split("\\.")[1]; + String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", currentTable, CLUSTERING_VALUE); + Function runRead = key -> cluster.coordinator(1).execute(readCQL, SERIAL, key); + Range migratingRange = new Range<>(new LongToken(Long.MIN_VALUE + 1), new LongToken(Long.MIN_VALUE)); + List> migratingRanges = ImmutableList.of(migratingRange); + int key = 0; + + assertTargetAccordRead(runRead, 1, 0, 0, 1, 0, 0, 0); + // Mark wrap around range as migrating + nodetool(coordinator, "consensus_admin", "begin-migration", "-st", String.valueOf(Long.MIN_VALUE + 1), "-et", String.valueOf(Long.MIN_VALUE), "-tp", "accord", KEYSPACE, tableName); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); + // Should run directly on accord, migrate the key, and perform a quorum read from Accord, Paxos repair will run prepare once + assertTargetAccordRead(runRead, 1, key++, 1, 1, 1, 0, 0); + + // Should run up to accept with both nodes refusing to accept + savePromisedAndCommittedPaxosProposal(tableName, key); + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); + assertTargetAccordRead(runRead, 1, key++, 1, 2, 1, 0, 1); + }); + } + + @Test + public void testAccordToPaxos() throws Exception + { + test(format(TABLE_FMT, currentTable), + cluster -> { + String casCQL = format(CAS_FMT, currentTable, CLUSTERING_VALUE); + Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); + String tableName = currentTable.split("\\.")[1]; + + // Mark a subrange as migrating and finish migrating half of it + nodetool(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, tableName); + nodetool(coordinator, "consensus_admin", "finish-migration", "-st", midToken.toString(), "-et", "3074457345618258601"); + nodetool(coordinator, "consensus_admin", "finish-migration", "-st", "3074457345618258601", "-et", upperMidToken.toString()); + Range accordMigratedRange = new Range(midToken, upperMidToken); + Range accordMigratingRange = new Range(upperMidToken, maxToken); + assertMigrationState(tableName, ConsensusMigrationTarget.accord, ImmutableList.of(accordMigratedRange), ImmutableList.of(accordMigratingRange), 1); + + // Test that we can reverse the migration and go back to Paxos + nodetool(coordinator, "consensus_admin", "set-target-protocol", "-tp", "paxos", KEYSPACE, tableName); + assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), new Range(maxToken, minToken)), ImmutableList.of(accordMigratingRange), 1); + Iterator paxosNonMigratingKeys = getKeysBetweenTokens(minToken, midToken); + Iterator paxosMigratingKeys = getKeysBetweenTokens(upperMidToken, maxToken); + Iterator accordKeys = getKeysBetweenTokens(midToken, upperMidToken); + + // Paxos non-migrating keys should run on Paxos as per normal + assertTargetPaxosWrite(runCasNoApply, 1, paxosNonMigratingKeys.next(), 0, 1, 0, 0, 0); + + // Paxos migrating keys should be key migrated which means a local barrier is run by Paxos during read at each replica, the key migration barrier is also counted as a write + assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 1, 1, 1, 0, 0); + + // A key from a range migrated to Accord is now not migrating/migrated and should be accessed through Accord + assertTargetPaxosWrite(runCasNoApply, 1, accordKeys.next(), 1, 0, 0, 0, 0); + + // If an Accord transaction races with cluster metadata updates it should be rejected if the epoch it runs in contains the migration + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new RoutesToAccordOnce())); + assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 2, 1, 1, 1, 1); + + // Repair the currently migrating range from when targets were switched, but it's not an Accord repair, this is to make sure the wrong repair type doesn't trigger progress + nodetool(coordinator, "repair", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString()); + assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), new Range(maxToken, minToken)), ImmutableList.of(accordMigratingRange), 1); + + // Paxos migrating keys should still need key migration after non-Accord repair + assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 1, 1, 1, 0, 0); + + // Now do it with an Accord repair so key migration shouldn't be necessary + nodetool(coordinator, "consensus_admin", "finish-migration", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString()); + Range repairedRange = new Range(upperMidToken, maxAlignedWithLocalRanges); + // Sliver remaining because of precise repairs + // TODO This precision isn't needed for Accord repair? Worth lifting that restriction or keep it consistent? + Range remainingRange = new Range(maxAlignedWithLocalRanges, maxToken); + assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), repairedRange, new Range(maxToken, minToken)), ImmutableList.of(remainingRange), 1); + + // Paxos migrating keys shouldn't need key migration after Accord repair + assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 0, 1, 0, 0, 0); + }); + } + + private static void assertMigrationState(String tableName, ConsensusMigrationTarget target, List> migratedRanges, List> migratingRanges, int numMigratingEpochs) throws Throwable + { + // Validate nodetool consensus admin list output + String yamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list"); + Map yamlStateMap = new Yaml().load(yamlResultString); + String minifiedYamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-yaml"); + Map minifiedYamlStateMap = new Yaml().load(minifiedYamlResultString); + String jsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "json"); + Map jsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(jsonResultString, new TypeReference>(){}); + String minifiedJsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-json"); + Map minifiedJsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(minifiedJsonResultString, new TypeReference>(){}); + + List tableIds = new ArrayList<>(); + for (Map migrationStateMap : ImmutableList.of(yamlStateMap, jsonStateMap, minifiedYamlStateMap, minifiedJsonStateMap)) + { + assertEquals(PojoToString.CURRENT_VERSION, migrationStateMap.get("version")); + assertTrue(Epoch.EMPTY.getEpoch() < ((Number) migrationStateMap.get("lastModifiedEpoch")).longValue()); + List> tableStates = (List>) migrationStateMap.get("tableStates"); + assertEquals(tableStates.size(), 1); + Map tableStateMap = tableStates.get(0); + assertEquals(tableName, tableStateMap.get("table")); + assertEquals(KEYSPACE, tableStateMap.get("keyspace")); + tableIds.add((String) tableStateMap.get("tableId")); + List> migratedRangesFromStateMap = ((List) tableStateMap.get("migratedRanges")).stream().map(Range::fromString).collect(toImmutableList()); + assertEquals(migratedRanges, migratedRangesFromStateMap); + Map>> migratingRangesByEpochFromStateMap = new LinkedHashMap<>(); + for (Map.Entry> entry : ((Map>) tableStateMap.get("migratingRangesByEpoch")).entrySet()) + { + long epoch = entry.getKey() instanceof Number ? ((Number)entry.getKey()).longValue() : Long.valueOf((String)entry.getKey()); + migratingRangesByEpochFromStateMap.put(epoch, entry.getValue().stream().map(Range::fromString).collect(toImmutableList())); + } + if (migratingRanges.isEmpty()) + assertEquals(0, migratingRangesByEpochFromStateMap.size()); + else + assertEquals(migratingRanges, migratingRangesByEpochFromStateMap.values().iterator().next()); + } + + // Also check JSON format at least loads without error + // Validate in memory state at each node + List> migratingAndMigratedRanges = normalize(ImmutableList.>builder().addAll(migratedRanges).addAll(migratingRanges).build()); + spinUntilSuccess(() -> { + for (IInvokableInstance instance : SHARED_CLUSTER) + { + ConsensusMigrationState snapshot = getMigrationStateSnapshot(instance); + assertEquals(1, snapshot.tableStates.size()); + TableMigrationState state = snapshot.tableStates.values().iterator().next(); + assertEquals(KEYSPACE, state.keyspaceName); + assertEquals(tableName, state.tableName); + for (String tableId : tableIds) + assertEquals(tableId, state.tableId.toString()); + assertEquals(target, state.targetProtocol); + assertEquals("Migrated ranges:", migratedRanges, state.migratedRanges); + assertEquals("Migrating ranges:", migratingRanges, state.migratingRanges); + assertEquals("Migrating and migrated ranges:", migratingAndMigratedRanges, state.migratingAndMigratedRanges); + assertEquals(numMigratingEpochs, state.migratingRangesByEpoch.size()); + if (migratingRanges.isEmpty()) + assertEquals(0, state.migratingRangesByEpoch.size()); + else + assertEquals(migratingRanges, state.migratingRangesByEpoch.values().iterator().next()); + } + }); + } + + /** + * Save a promise that is after the committed one to make a subsequent read not linearizable + */ + private static void savePromisedAndCommittedPaxosProposal(String tableName, int key) + { + String committedBallotString = BallotGenerator.Global.nextBallot(Flag.GLOBAL).toString(); + String promisedBallotString = BallotGenerator.Global.nextBallot(Flag.GLOBAL).toString(); + forEach(() -> { + TableMetadata metadata = ColumnFamilyStore.getIfExists(KEYSPACE, tableName).metadata(); + ByteBuffer lowMidMigratingKeyBuffer = ByteBuffer.wrap(ByteArrayUtil.bytes(key)); + DecoratedKey dk = new BufferDecoratedKey(DatabaseDescriptor.getPartitioner().getToken(lowMidMigratingKeyBuffer), lowMidMigratingKeyBuffer); + try (PaxosState state = PaxosState.get(dk, metadata)) + { + Ballot ballot = Ballot.fromString(committedBallotString); + PartitionUpdateBuilder updateBuilder = new PartitionUpdateBuilder(metadata, key); + updateBuilder.row(CLUSTERING_VALUE).add("v", 42); + + state.commit(new Agreed(ballot, updateBuilder.build())); + state.promiseIfNewer(Ballot.fromString(promisedBallotString), true); + } + }); + } + + private static void saveAcceptedPaxosProposal(String tableName, String ballotString, int key) + { + forEach(() -> { + TableMetadata metadata = ColumnFamilyStore.getIfExists(KEYSPACE, tableName).metadata(); + ByteBuffer lowMidMigratingKeyBuffer = ByteBuffer.wrap(ByteArrayUtil.bytes(key)); + DecoratedKey dk = new BufferDecoratedKey(DatabaseDescriptor.getPartitioner().getToken(lowMidMigratingKeyBuffer), lowMidMigratingKeyBuffer); + try (PaxosState state = PaxosState.get(dk, metadata)) + { + Ballot ballot = Ballot.fromString(ballotString); + assertEquals( PROMISE, state.promiseIfNewer(ballot, true).outcome()); + PartitionUpdateBuilder updateBuilder = new PartitionUpdateBuilder(metadata, key); + updateBuilder.row(CLUSTERING_VALUE).add("v", 42); + // Set isForRepair to true to force accepting the proposal for testing purposes + assertEquals( null, state.acceptIfLatest(new Proposal(ballot, updateBuilder.build()), true).supersededBy); + } + }); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 9146192ba25a..8010a7e58166 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -26,17 +26,19 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import com.google.common.base.Splitter; +import com.google.common.primitives.Ints; +import org.junit.After; import org.junit.AfterClass; import org.junit.Before; -import org.junit.BeforeClass; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.primitives.Txn; +import accord.impl.SimpleProgressLog; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; @@ -47,23 +49,31 @@ import org.apache.cassandra.cql3.transactions.ReferenceValue; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.Cluster.Builder; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IIsolatedExecutor.SerializableRunnable; import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.distributed.shared.Metrics; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.distributed.util.QueryResultUtil; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; -import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FailingConsumer; -import org.apache.cassandra.utils.Shared; import static net.bytebuddy.matcher.ElementMatchers.named; -import static net.bytebuddy.matcher.ElementMatchers.takesArguments; import static org.junit.Assert.assertArrayEquals; public abstract class AccordTestBase extends TestBaseImpl @@ -71,22 +81,15 @@ public abstract class AccordTestBase extends TestBaseImpl private static final Logger logger = LoggerFactory.getLogger(AccordTestBase.class); private static final int MAX_RETRIES = 10; - @Shared - public static class State - { - public static AtomicInteger coordinateCounts = new AtomicInteger(); - } - protected static final AtomicInteger COUNTER = new AtomicInteger(0); protected static Cluster SHARED_CLUSTER; protected String currentTable; - @BeforeClass - public static void setupClass() throws IOException + public static void setupCluster(Function options, int nodes) throws IOException { - SHARED_CLUSTER = createCluster(); + SHARED_CLUSTER = createCluster(nodes, options); } @AfterClass @@ -102,12 +105,25 @@ public void setup() currentTable = KEYSPACE + ".tbl" + COUNTER.getAndIncrement(); } + @After + public void tearDown() throws Exception + { + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = false); + } + protected static void assertRowSerial(Cluster cluster, String query, int k, int c, int v, int s) { Object[][] result = cluster.coordinator(1).execute(query, ConsistencyLevel.SERIAL); assertArrayEquals(new Object[]{new Object[] {k, c, v, s}}, result); } + protected static void assertRowSerial(Cluster cluster, String query, Object[]... expected) + { + Object[][] result = cluster.coordinator(1).execute(query, ConsistencyLevel.SERIAL); + AssertUtils.assertRows(result, expected); + } + protected void test(String tableDDL, FailingConsumer fn) throws Exception { test(Collections.singletonList(tableDDL), fn); @@ -136,23 +152,123 @@ protected void test(FailingConsumer fn) throws Exception test("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", fn); } - protected int getAccordCoordinateCount() + protected static ConsensusMigrationState getMigrationStateSnapshot(IInvokableInstance instance) throws IOException + { + byte[] serializedBytes = instance.callOnInstance(() -> { + DataOutputBuffer output = new DataOutputBuffer(); + try + { + ConsensusMigrationState.serializer.serialize( + ClusterMetadata.current().consensusMigrationState, + output, Version.V0); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + return output.toByteArray(); + }); + DataInputPlus input = new DataInputBuffer(serializedBytes); + return ConsensusMigrationState.serializer.deserialize(input, Version.V0); + } + + protected static int getAccordCoordinateCount() + { + return getAccordWriteCount() + getAccordReadCount(); + } + + protected static int getCasWriteCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.Latency.CASWrite")); + } + + protected static int getCasPrepareCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.keyspace.CasPrepareLatency.distributed_test_keyspace")); + } + + protected static int getAccordWriteCount() + { + return getAccordWriteCount(1); + } + + protected static int getAccordWriteCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.Latency.AccordWrite")); + } + + protected static int getAccordReadCount() + { + return getAccordReadCount(1); + } + + protected static int getAccordReadCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.Latency.AccordRead")); + } + + protected static int getAccordMigrationRejects(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.AccordMigrationRejects.AccordWrite")); + } + + protected static int getAccordMigrationSkippedReads() { - return State.coordinateCounts.get(); + // Skipped reads can occur at any node so sum them + long sum = 0; + for (IInvokableInstance instance : SHARED_CLUSTER) + sum += instance.metrics().getCounter("org.apache.cassandra.metrics.ClientRequest.MigrationSkippedReads.AccordWrite"); + return Ints.checkedCast(sum); } - private static Cluster createCluster() throws IOException + protected static int getKeyMigrationCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.Table.KeyMigrationLatency.all")); + } + + protected static int getCasWriteBeginRejects(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.PaxosBeginMigrationRejects.CASWrite")); + } + + protected static int getCasReadBeginRejects(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.PaxosBeginMigrationRejects.CASRead")); + } + + protected static int getCasWriteAcceptRejects(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.PaxosAcceptMigrationRejects.CASWrite")); + } + + protected static int getCasReadAcceptRejects(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.PaxosAcceptMigrationRejects.CASRead")); + } + + protected static Metrics getMetrics(int coordinatorIndex) + { + return SHARED_CLUSTER.get(coordinatorIndex).metrics(); + } + + protected static void forEach(SerializableRunnable runnable) + { + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(runnable); + } + + private static Cluster createCluster(int nodes, Function options) throws IOException { // need to up the timeout else tests get flaky // disable vnode for now, but should enable before trunk - return init(Cluster.build(2) + Cluster.Builder builder = Cluster.build(nodes) .withoutVNodes() .withConfig(c -> c.with(Feature.NETWORK, Feature.GOSSIP).set("write_request_timeout", "10s") .set("transaction_timeout", "15s") - .set("legacy_paxos_strategy", "migration")) // TODO: switch back to "accord" when TrM integration works - .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install) - .withInstanceInitializer(BBAccordCoordinateCountHelper::install) - .start()); + .set("transaction_timeout", "15s")) + .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install); + builder = options.apply(builder); + return init(builder.start()); } protected static SimpleQueryResult executeAsTxn(Cluster cluster, String check, Object... boundValues) @@ -168,6 +284,18 @@ protected static SimpleQueryResult execute(Cluster cluster, String check, Object return cluster.coordinator(1).executeWithResult(check, ConsistencyLevel.ANY, boundValues); } + private static SimpleQueryResult execute(Cluster cluster, String check, ConsistencyLevel cl, Object... boundValues) + { + return cluster.coordinator(1).executeWithResult(check, cl, boundValues); + } + + protected static SimpleQueryResult assertRowEquals(Cluster cluster, SimpleQueryResult expected, String check, ConsistencyLevel cl, Object... boundValues) + { + SimpleQueryResult result = execute(cluster, check, cl, boundValues); + QueryResultUtil.assertThat(result).isEqualTo(expected); + return result; + } + protected static SimpleQueryResult assertRowEquals(Cluster cluster, SimpleQueryResult expected, String check, Object... boundValues) { SimpleQueryResult result = execute(cluster, check, boundValues); @@ -175,6 +303,11 @@ protected static SimpleQueryResult assertRowEquals(Cluster cluster, SimpleQueryR return result; } + protected static SimpleQueryResult assertRowEquals(Cluster cluster, Object[] row, String check, ConsistencyLevel cl, Object... boundValues) + { + return assertRowEquals(cluster, QueryResults.builder().row(row).build(), check, cl, boundValues); + } + protected static SimpleQueryResult assertRowEquals(Cluster cluster, Object[] row, String check, Object... boundValues) { return assertRowEquals(cluster, QueryResults.builder().row(row).build(), check, boundValues); @@ -308,25 +441,5 @@ public static void install(ClassLoader classLoader, Integer num) } } - public static class BBAccordCoordinateCountHelper - { - static void install(ClassLoader cl, int nodeNumber) - { - if (nodeNumber != 1) - return; - new ByteBuddy().rebase(AccordService.class) - .method(named("coordinate").and(takesArguments(2))) - .intercept(MethodDelegation.to(BBAccordCoordinateCountHelper.class)) - .make() - .load(cl, ClassLoadingStrategy.Default.INJECTION); - } - - public static TxnData coordinate(Txn txn, @SuperCall Callable actual) throws Exception - { - State.coordinateCounts.incrementAndGet(); - return actual.call(); - } - } - protected abstract Logger logger(); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java index 0b1cee3e0a23..084709e4964a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java @@ -18,9 +18,11 @@ package org.apache.cassandra.distributed.test.accord; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.List; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,6 +30,8 @@ import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.service.accord.AccordService; +import static java.util.function.UnaryOperator.identity; + public class NewSchemaTest extends AccordTestBase { private static final Logger logger = LoggerFactory.getLogger(NewSchemaTest.class); @@ -38,6 +42,12 @@ protected Logger logger() return logger; } + @BeforeClass + public static void setupClass() throws IOException + { + AccordTestBase.setupCluster(identity(), 2); + } + @Test public void test() { diff --git a/test/distributed/org/apache/cassandra/distributed/test/guardrails/GuardrailCollectionSizeOnSSTableWriteTest.java b/test/distributed/org/apache/cassandra/distributed/test/guardrails/GuardrailCollectionSizeOnSSTableWriteTest.java index 6b7daef26feb..3405185d451d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/guardrails/GuardrailCollectionSizeOnSSTableWriteTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/guardrails/GuardrailCollectionSizeOnSSTableWriteTest.java @@ -26,9 +26,12 @@ import com.datastax.driver.core.Session; import com.datastax.driver.core.SimpleStatement; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; import static java.nio.ByteBuffer.allocate; @@ -57,7 +60,6 @@ public static void setupCluster() throws IOException .set("collection_size_warn_threshold", WARN_THRESHOLD + "B") .set("collection_size_fail_threshold", FAIL_THRESHOLD + "B")) .start()); - cluster.disableAutoCompaction(KEYSPACE); driverCluster = com.datastax.driver.core.Cluster.builder().addContactPoint("127.0.0.1").build(); driverSession = driverCluster.connect(); } @@ -84,7 +86,7 @@ protected Cluster getCluster() @Test public void testSetSize() throws Throwable { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v set)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v set)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", set()); @@ -108,7 +110,7 @@ public void testSetSize() throws Throwable @Test public void testSetSizeFrozen() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", set()); @@ -123,7 +125,7 @@ public void testSetSizeFrozen() @Test public void testSetSizeWithUpdates() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v set)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v set)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", set(allocate(1))); execute("UPDATE %s SET v = v + ? WHERE k = 0", set(allocate(1))); @@ -145,7 +147,7 @@ public void testSetSizeWithUpdates() @Test public void testSetSizeAfterCompaction() throws Throwable { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v set)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v set)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", set(allocate(1))); assertNotWarnedOnFlush(); @@ -175,7 +177,7 @@ public void testSetSizeAfterCompaction() throws Throwable @Test public void testListSize() throws Throwable { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v list)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v list)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", list()); @@ -199,7 +201,7 @@ public void testListSize() throws Throwable @Test public void testListSizeFrozen() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", list()); @@ -214,7 +216,7 @@ public void testListSizeFrozen() @Test public void testListSizeWithUpdates() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v list)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v list)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", list(allocate(1))); execute("UPDATE %s SET v = v + ? WHERE k = 0", list(allocate(1))); @@ -236,7 +238,7 @@ public void testListSizeWithUpdates() @Test public void testListSizeAfterCompaction() throws Throwable { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v list)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v list)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", list(allocate(1))); assertNotWarnedOnFlush(); @@ -266,7 +268,7 @@ public void testListSizeAfterCompaction() throws Throwable @Test public void testMapSize() throws Throwable { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v map)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v map)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", map()); @@ -297,7 +299,7 @@ public void testMapSize() throws Throwable @Test public void testMapSizeFrozen() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v frozen>)"); execute("INSERT INTO %s (k, v) VALUES (0, null)"); execute("INSERT INTO %s (k, v) VALUES (1, ?)", map()); @@ -316,7 +318,7 @@ public void testMapSizeFrozen() @Test public void testMapSizeWithUpdates() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v map)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v map)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", map(allocate(1), allocate(1))); execute("UPDATE %s SET v = v + ? WHERE k = 0", map(allocate(1), allocate(1))); @@ -350,7 +352,7 @@ public void testMapSizeWithUpdates() @Test public void testMapSizeAfterCompaction() { - schemaChange("CREATE TABLE %s (k int PRIMARY KEY, v map)"); + createTable("CREATE TABLE %s (k int PRIMARY KEY, v map)"); execute("INSERT INTO %s (k, v) VALUES (0, ?)", map(allocate(1), allocate(1))); execute("UPDATE %s SET v = v + ? WHERE k = 0", map(allocate(1), allocate(1))); @@ -397,7 +399,7 @@ public void testMapSizeAfterCompaction() @Test public void testCompositePartitionKey() { - schemaChange("CREATE TABLE %s (k1 int, k2 text, v set, PRIMARY KEY((k1, k2)))"); + createTable("CREATE TABLE %s (k1 int, k2 text, v set, PRIMARY KEY((k1, k2)))"); execute("INSERT INTO %s (k1, k2, v) VALUES (0, 'a', ?)", set(allocate(WARN_THRESHOLD))); assertWarnedOnFlush(warnMessage("(0, 'a')")); @@ -409,7 +411,7 @@ public void testCompositePartitionKey() @Test public void testCompositeClusteringKey() { - schemaChange("CREATE TABLE %s (k int, c1 int, c2 text, v set, PRIMARY KEY(k, c1, c2))"); + createTable("CREATE TABLE %s (k int, c1 int, c2 text, v set, PRIMARY KEY(k, c1, c2))"); execute("INSERT INTO %s (k, c1, c2, v) VALUES (1, 10, 'a', ?)", set(allocate(WARN_THRESHOLD))); assertWarnedOnFlush(warnMessage("(1, 10, 'a')")); @@ -434,4 +436,16 @@ private String failMessage(String key) { return String.format("Detected collection v in row %s in table %s of size", key, qualifiedTableName); } + + private void createTable(String cql) + { + schemaChange(cql); + for (IInvokableInstance instance : cluster) + { + instance.runOnInstance(() -> { + for (ColumnFamilyStore cs : Keyspace.open(KEYSPACE).getColumnFamilyStores()) + cs.disableAutoCompaction(); + }); + } + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 33402af732cb..de4b6541ed94 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -152,6 +152,7 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, + null, ImmutableMap.of()); } @@ -166,6 +167,7 @@ public static ClusterMetadata minimalForTesting(IPartitioner partitioner) AccordKeyspaces.EMPTY, null, null, + null, ImmutableMap.of()); } @@ -180,6 +182,7 @@ public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) AccordKeyspaces.EMPTY, null, null, + null, ImmutableMap.of()); } diff --git a/test/long/org/apache/cassandra/dht/tokenallocator/Murmur3ReplicationAwareTokenAllocatorTest.java b/test/long/org/apache/cassandra/dht/tokenallocator/Murmur3ReplicationAwareTokenAllocatorTest.java index e28ecfa45347..83665f1c9b22 100644 --- a/test/long/org/apache/cassandra/dht/tokenallocator/Murmur3ReplicationAwareTokenAllocatorTest.java +++ b/test/long/org/apache/cassandra/dht/tokenallocator/Murmur3ReplicationAwareTokenAllocatorTest.java @@ -30,7 +30,7 @@ public class Murmur3ReplicationAwareTokenAllocatorTest extends AbstractReplicati @Test public void testExistingCluster() { - super.testExistingCluster(new Murmur3Partitioner(), MAX_VNODE_COUNT); + super.testExistingCluster(Murmur3Partitioner.instance, MAX_VNODE_COUNT); } @Test @@ -43,6 +43,6 @@ public void testNewCluster() private void flakyTestNewCluster() { - testNewCluster(new Murmur3Partitioner(), MAX_VNODE_COUNT); + testNewCluster(Murmur3Partitioner.instance, MAX_VNODE_COUNT); } } diff --git a/test/long/org/apache/cassandra/dht/tokenallocator/NoReplicationTokenAllocatorTest.java b/test/long/org/apache/cassandra/dht/tokenallocator/NoReplicationTokenAllocatorTest.java index 5e13519fcd42..6835f2c5b013 100644 --- a/test/long/org/apache/cassandra/dht/tokenallocator/NoReplicationTokenAllocatorTest.java +++ b/test/long/org/apache/cassandra/dht/tokenallocator/NoReplicationTokenAllocatorTest.java @@ -26,9 +26,9 @@ import com.google.common.collect.Maps; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; +import org.junit.Assert; import org.junit.Test; -import org.junit.Assert; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.RandomPartitioner; @@ -42,13 +42,13 @@ public class NoReplicationTokenAllocatorTest extends TokenAllocatorTestBase @Test public void testNewClusterWithMurmur3Partitioner() { - testNewCluster(new Murmur3Partitioner()); + testNewCluster(Murmur3Partitioner.instance); } @Test public void testNewClusterWithRandomPartitioner() { - testNewCluster(new RandomPartitioner()); + testNewCluster(RandomPartitioner.instance); } private void testNewCluster(IPartitioner partitioner) @@ -75,13 +75,13 @@ public void testNewCluster(int perUnitCount, TokenCount tc, NoReplicationStrateg @Test public void testExistingClusterWithMurmur3Partitioner() { - testExistingCluster(new Murmur3Partitioner()); + testExistingCluster(Murmur3Partitioner.instance); } @Test public void testExistingClusterWithRandomPartitioner() { - testExistingCluster(new RandomPartitioner()); + testExistingCluster(RandomPartitioner.instance); } private void testExistingCluster(IPartitioner partitioner) diff --git a/test/long/org/apache/cassandra/dht/tokenallocator/RandomReplicationAwareTokenAllocatorTest.java b/test/long/org/apache/cassandra/dht/tokenallocator/RandomReplicationAwareTokenAllocatorTest.java index bb1a2c8f3c57..4e7982e0c7a5 100644 --- a/test/long/org/apache/cassandra/dht/tokenallocator/RandomReplicationAwareTokenAllocatorTest.java +++ b/test/long/org/apache/cassandra/dht/tokenallocator/RandomReplicationAwareTokenAllocatorTest.java @@ -34,13 +34,13 @@ public class RandomReplicationAwareTokenAllocatorTest extends AbstractReplicatio @Test public void testExistingCluster() { - testExistingCluster(new RandomPartitioner(), MAX_VNODE_COUNT); + testExistingCluster(RandomPartitioner.instance, MAX_VNODE_COUNT); } @Test public void testNewClusterr() { - testNewCluster(new RandomPartitioner(), MAX_VNODE_COUNT); + testNewCluster(RandomPartitioner.instance, MAX_VNODE_COUNT); } } diff --git a/test/microbench/org/apache/cassandra/test/microbench/ZeroCopyStreamingBench.java b/test/microbench/org/apache/cassandra/test/microbench/ZeroCopyStreamingBench.java index e10b91d9b2e5..07ff3edce90b 100644 --- a/test/microbench/org/apache/cassandra/test/microbench/ZeroCopyStreamingBench.java +++ b/test/microbench/org/apache/cassandra/test/microbench/ZeroCopyStreamingBench.java @@ -78,6 +78,7 @@ import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; +import static java.util.Collections.emptyList; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; /** @@ -130,14 +131,14 @@ public void setupBenchmark() throws IOException serializedBlockStream = blockStreamCaptureChannel.getSerializedStream(); out.close(); - session.prepareReceiving(new StreamSummary(sstable.metadata().id, 1, serializedBlockStream.readableBytes())); + session.prepareReceiving(new StreamSummary(sstable.metadata().id, emptyList(), 1, serializedBlockStream.readableBytes())); CassandraStreamHeader entireSSTableStreamHeader = CassandraStreamHeader.builder() .withSSTableVersion(sstable.descriptor.version) .withSSTableLevel(0) .withEstimatedKeys(sstable.estimatedKeys()) - .withSections(Collections.emptyList()) + .withSections(emptyList()) .withSerializationHeader(sstable.header.toComponent()) .withComponentManifest(context.manifest()) .isEntireSSTable(true) @@ -219,7 +220,7 @@ private StreamSession setupStreamingSessionForTest() StreamResultFuture future = StreamResultFuture.createInitiator(nextTimeUUID(), StreamOperation.BOOTSTRAP, Collections.emptyList(), streamCoordinator); InetAddressAndPort peer = FBUtilities.getBroadcastAddressAndPort(); - streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, Collections.emptyList(), Collections.emptyList(), StreamSession.State.INITIALIZED, null)); + streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, emptyList(), emptyList(), StreamSession.State.INITIALIZED, null)); StreamSession session = streamCoordinator.getOrCreateOutboundSession(peer); session.init(future); diff --git a/test/microbench/org/apache/cassandra/test/microbench/btree/AtomicBTreePartitionUpdateBench.java b/test/microbench/org/apache/cassandra/test/microbench/btree/AtomicBTreePartitionUpdateBench.java index 7083832c012a..a2a86a9f65c8 100644 --- a/test/microbench/org/apache/cassandra/test/microbench/btree/AtomicBTreePartitionUpdateBench.java +++ b/test/microbench/org/apache/cassandra/test/microbench/btree/AtomicBTreePartitionUpdateBench.java @@ -68,11 +68,11 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableMetadataRef; +import org.apache.cassandra.utils.BulkIterator; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.btree.UpdateFunction; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.OpOrder; -import org.apache.cassandra.utils.BulkIterator; import org.apache.cassandra.utils.memory.ByteBufferCloner; import org.apache.cassandra.utils.memory.Cloner; import org.apache.cassandra.utils.memory.HeapPool; @@ -107,7 +107,7 @@ public class AtomicBTreePartitionUpdateBench private static final MutableDeletionInfo NO_DELETION_INFO = new MutableDeletionInfo(DeletionTime.LIVE); private static final HeapPool POOL = new HeapPool(Long.MAX_VALUE, 1.0f, () -> ImmediateFuture.success(Boolean.TRUE)); private static final ByteBuffer zero = Int32Type.instance.decompose(0); - private static final DecoratedKey decoratedKey = new BufferDecoratedKey(new ByteOrderedPartitioner().getToken(zero), zero); + private static final DecoratedKey decoratedKey = new BufferDecoratedKey(ByteOrderedPartitioner.instance.getToken(zero), zero); static { diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index fac497707602..4b71603565da 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -67,6 +67,7 @@ import org.apache.cassandra.simulator.asm.InterceptAsClassTransformer; import org.apache.cassandra.simulator.asm.NemesisFieldSelectors; import org.apache.cassandra.simulator.cluster.ClusterActions; +import org.apache.cassandra.simulator.cluster.ClusterActions.ConsensusChange; import org.apache.cassandra.simulator.cluster.ClusterActions.TopologyChange; import org.apache.cassandra.simulator.systems.Failures; import org.apache.cassandra.simulator.systems.InterceptedWait.CaptureSites.Capture; @@ -151,6 +152,9 @@ public static abstract class Builder protected TopologyChange[] topologyChanges = TopologyChange.values(); protected int topologyChangeLimit = -1; + protected ConsensusChange[] consensusChanges = ConsensusChange.values(); + protected int consensusChangeLimit = -1; + protected int primaryKeyCount; protected int secondsToSimulate; @@ -176,7 +180,8 @@ public static abstract class Builder schedulerLongDelayNanos = new LongRange(50, 5000, MICROSECONDS, NANOSECONDS), clockDriftNanos = new LongRange(1, 5000, MILLISECONDS, NANOSECONDS), clockDiscontinuitIntervalNanos = new LongRange(10, 60, SECONDS, NANOSECONDS), - topologyChangeIntervalNanos = new LongRange(5, 15, SECONDS, NANOSECONDS); + topologyChangeIntervalNanos = new LongRange(5, 15, SECONDS, NANOSECONDS), + consensusChangeIntervalNanos = new LongRange(1, 5, SECONDS, NANOSECONDS); @@ -193,6 +198,7 @@ public static abstract class Builder protected HeapPool.Logged.Listener memoryListener; protected SimulatedTime.Listener timeListener = (i1, i2) -> {}; protected LongConsumer onThreadLocalRandomCheck; + protected String lwtStrategy = "migration"; public Builder failures(Failures failures) { @@ -312,6 +318,24 @@ public Builder topologyChangeLimit(int topologyChangeLimit) return this; } + public Builder consensusChanges(ConsensusChange[] consensusChanges) + { + this.consensusChanges = consensusChanges; + return this; + } + + public Builder consensusChangeIntervalNanos(LongRange consensusChangeIntervalNanos) + { + this.consensusChangeIntervalNanos = consensusChangeIntervalNanos; + return this; + } + + public Builder consensusChangeLimit(int consensusChangeLimit) + { + this.consensusChangeLimit = consensusChangeLimit; + return this; + } + public int primaryKeyCount() { return primaryKeyCount; @@ -551,6 +575,12 @@ public Builder onThreadLocalRandomCheck(LongConsumer runnable) return this; } + public Builder lwtStrategy(String strategy) + { + this.lwtStrategy = strategy; + return this; + } + public abstract ClusterSimulation create(long seed) throws IOException; } @@ -840,6 +870,8 @@ public void afterStartup(IInstance i) scheduler = builder.schedulerFactory.create(random); options = new ClusterActions.Options(builder.topologyChangeLimit, Choices.uniform(KindOfSequence.values()).choose(random).period(builder.topologyChangeIntervalNanos, random), Choices.random(random, builder.topologyChanges), + builder.consensusChangeLimit, Choices.uniform(KindOfSequence.values()).choose(random).period(builder.consensusChangeIntervalNanos, random), + Choices.random(random, builder.consensusChanges), minRf, initialRf, maxRf, null); this.factory = factory; } diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java index 61b15dd40ed6..511f443ceb14 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java @@ -41,6 +41,7 @@ import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.simulator.Debug.Info; import org.apache.cassandra.simulator.Debug.Levels; +import org.apache.cassandra.simulator.cluster.ClusterActions.ConsensusChange; import org.apache.cassandra.simulator.cluster.ClusterActions.TopologyChange; import org.apache.cassandra.simulator.debug.SelfReconcile; import org.apache.cassandra.simulator.logging.SeedDefiner; @@ -61,9 +62,10 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.CLOCK_MONOTONIC_APPROX; import static org.apache.cassandra.config.CassandraRelevantProperties.CLOCK_MONOTONIC_PRECISE; import static org.apache.cassandra.config.CassandraRelevantProperties.CONSISTENT_DIRECTORY_LISTINGS; +import static org.apache.cassandra.config.CassandraRelevantProperties.DETERMINISM_SSTABLE_COMPRESSION_DEFAULT; import static org.apache.cassandra.config.CassandraRelevantProperties.DETERMINISM_UNSAFE_UUID_NODE; +import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_GOSSIP_ENDPOINT_REMOVAL; import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_SSTABLE_ACTIVITY_TRACKING; -import static org.apache.cassandra.config.CassandraRelevantProperties.DETERMINISM_SSTABLE_COMPRESSION_DEFAULT; import static org.apache.cassandra.config.CassandraRelevantProperties.DTEST_API_LOG_TOPOLOGY; import static org.apache.cassandra.config.CassandraRelevantProperties.GOSSIPER_SKIP_WAITING_TO_SETTLE; import static org.apache.cassandra.config.CassandraRelevantProperties.IGNORE_MISSING_NATIVE_FILE_HINTS; @@ -74,7 +76,6 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.RING_DELAY; import static org.apache.cassandra.config.CassandraRelevantProperties.SHUTDOWN_ANNOUNCE_DELAY_IN_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_AUTH_DEFAULT_RF; -import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_GOSSIP_ENDPOINT_REMOVAL; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_JVM_DTEST_DISABLE_SSL; import static org.apache.cassandra.simulator.debug.Reconcile.reconcileWith; import static org.apache.cassandra.simulator.debug.Record.record; @@ -178,9 +179,16 @@ protected abstract static class BasicCommand { - builder.scheduler(stream(kinds.split(",")) - .filter(v -> !v.isEmpty()) - .map(v -> RunnableActionScheduler.Kind.valueOf(toUpperCaseLocalized(v))) - .toArray(RunnableActionScheduler.Kind[]::new)); + Optional.ofNullable(consensusChanges).ifPresent(consensusChanges -> { + builder.consensusChanges(stream(consensusChanges.split(",")) + .filter(v -> !v.isEmpty()) + .map(v -> ConsensusChange.valueOf(toUpperCaseLocalized(v))) + .toArray(ConsensusChange[]::new)); }); - + parseNanosRange(Optional.ofNullable(consensusChangeInterval)).ifPresent(builder::consensusChangeIntervalNanos); + builder.consensusChangeLimit(Integer.parseInt(consensusChangeLimit)); Optional.ofNullable(this.capture) .map(s -> s.split(",")) .map(s -> new Capture( @@ -324,6 +336,8 @@ protected void propagate(B builder) .orElse(new int[0]); builder.debug(debugLevels, debugPrimaryKeys); } + + Optional.ofNullable(lwtStrategy).ifPresent(builder::lwtStrategy); } public void run(B builder) throws IOException diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/ClusterActions.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/ClusterActions.java index 60b6a62e5726..ea7256247fdc 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/cluster/ClusterActions.java +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/ClusterActions.java @@ -85,6 +85,11 @@ public enum TopologyChange JOIN, LEAVE, REPLACE, CHANGE_RF } + public enum ConsensusChange + { + ACCORD_MIGRATE + } + public static class Options { public final int topologyChangeLimit; @@ -92,6 +97,9 @@ public static class Options public final Choices allChoices; public final Choices choicesNoLeave; public final Choices choicesNoJoin; + public final int consensusChangeLimit; + public final KindOfSequence.Period consensusChangeInterval; + public final Choices consensusChoices; public final int[] minRf, initialRf, maxRf; public final PaxosVariant changePaxosVariantTo; @@ -108,32 +116,45 @@ public Options(Options copy, PaxosVariant changePaxosVariantTo) this.allChoices = copy.allChoices; this.choicesNoLeave = copy.choicesNoLeave; this.choicesNoJoin = copy.choicesNoJoin; + this.consensusChangeLimit = copy.consensusChangeLimit; + this.consensusChangeInterval = copy.consensusChangeInterval; + this.consensusChoices = copy.consensusChoices; this.minRf = copy.minRf; this.initialRf = copy.initialRf; this.maxRf = copy.maxRf; this.changePaxosVariantTo = changePaxosVariantTo; } - public Options(int topologyChangeLimit, KindOfSequence.Period topologyChangeInterval, Choices choices, int[] minRf, int[] initialRf, int[] maxRf, PaxosVariant changePaxosVariantTo) + public Options(int topologyChangeLimit, + KindOfSequence.Period topologyChangeInterval, + Choices topologyChangeChoices, + int consensusChangeLimit, + KindOfSequence.Period consensusChangeInterval, + Choices consensusChangeChoices, + int[] minRf, int[] initialRf, int[] maxRf, + PaxosVariant changePaxosVariantTo) { if (Arrays.equals(minRf, maxRf)) - choices = choices.without(TopologyChange.CHANGE_RF); + topologyChangeChoices = topologyChangeChoices.without(TopologyChange.CHANGE_RF); this.topologyChangeInterval = topologyChangeInterval; this.topologyChangeLimit = topologyChangeLimit; + this.consensusChangeInterval = consensusChangeInterval; + this.consensusChangeLimit = consensusChangeLimit; this.minRf = minRf; this.initialRf = initialRf; this.maxRf = maxRf; - this.allChoices = choices; + this.allChoices = topologyChangeChoices; this.choicesNoJoin = allChoices.without(JOIN).without(REPLACE); this.choicesNoLeave = allChoices.without(LEAVE); + this.consensusChoices = consensusChangeChoices; this.changePaxosVariantTo = changePaxosVariantTo; } public static Options noActions(int clusterSize) { int[] rf = new int[]{clusterSize}; - return new Options(0, UNIFORM.period(null, null), Choices.uniform(), rf, rf, rf, null); + return new Options(0, UNIFORM.period(null, null), Choices.uniform(), 0, UNIFORM.period(null, null), Choices.uniform(), rf, rf, rf, null); } public Options changePaxosVariantTo(PaxosVariant newVariant) diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java index 1e8656dd2aca..6ddf61109d34 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/KeyspaceActions.java @@ -44,6 +44,7 @@ import org.apache.cassandra.simulator.systems.InterceptedExecution; import org.apache.cassandra.simulator.systems.InterceptingExecutor; import org.apache.cassandra.simulator.systems.SimulatedSystems; +import org.apache.cassandra.simulator.utils.KindOfSequence; import org.apache.cassandra.tcm.ClusterMetadataService; import static java.util.Collections.singletonList; @@ -62,7 +63,8 @@ public class KeyspaceActions extends ClusterActions final ConsistencyLevel serialConsistency; final int[] primaryKeys; - final EnumSet ops = EnumSet.noneOf(TopologyChange.class); + final EnumSet topologyOps = EnumSet.noneOf(TopologyChange.class); + final EnumSet consensusOps = EnumSet.noneOf(ConsensusChange.class); final NodeLookup nodeLookup; final TokenPlacementModel.NodeFactory factory; final int[] minRf, initialRf, maxRf; @@ -77,7 +79,9 @@ public class KeyspaceActions extends ClusterActions final int[] currentRf; Topology topology; boolean haveChangedVariant; + boolean haveConsensusMigrated; int topologyChangeCount = 0; + int consensusChangeCount = 0; public KeyspaceActions(SimulatedSystems simulated, String keyspace, String table, String createTableCql, @@ -118,7 +122,8 @@ public KeyspaceActions(SimulatedSystems simulated, maxRf = options.maxRf; currentRf = initialRf.clone(); membersOfQuorumDcs = serialConsistency == LOCAL_SERIAL ? all.dcs[0] : all.toArray(); - ops.addAll(Arrays.asList(options.allChoices.options)); + topologyOps.addAll(Arrays.asList(options.allChoices.options)); + consensusOps.addAll(Arrays.asList(options.consensusChoices.options)); } public ActionPlan plan(boolean joinAll) @@ -214,7 +219,7 @@ private Topology recomputeTopology(TokenPlacementModel.ReplicatedRanges readPlac for (int i = 0 ; i < primaryKeys.length ; ++i) { int primaryKey = primaryKeys[i]; - LongToken token = new Murmur3Partitioner().getToken(Int32Type.instance.decompose(primaryKey)); + LongToken token = Murmur3Partitioner.instance.getToken(Int32Type.instance.decompose(primaryKey)); List readReplicas = readPlacements.replicasFor(token.token); List writeReplicas = writePlacements.replicasFor(token.token); @@ -234,15 +239,53 @@ private Topology recomputeTopology(TokenPlacementModel.ReplicatedRanges readPlac private Action next() { - if (options.topologyChangeLimit >= 0 && topologyChangeCount++ > options.topologyChangeLimit) + Action nextTopologyChangeAction = nextTopologyChangeAction(); + if (nextTopologyChangeAction != null) + return nextTopologyChangeAction; + + Action nextConsensusChangeAction = nextConsensusChangeAction(); + if (nextConsensusChangeAction != null) + return nextConsensusChangeAction; + + if (options.changePaxosVariantTo != null && !haveChangedVariant) + { + haveChangedVariant = true; + return schedule(new OnClusterSetPaxosVariant(KeyspaceActions.this, options.changePaxosVariantTo), options.topologyChangeInterval); + } + + return null; + } + + private Action nextConsensusChangeAction() + { + if (options.consensusChangeLimit >= 0 && ++consensusChangeCount > options.consensusChangeLimit) return null; - while (!ops.isEmpty() && (!registered.isEmpty() || joined.size() > sum(minRf))) + while (!consensusOps.isEmpty() && !haveConsensusMigrated) + { + ConsensusChange nextChange = options.consensusChoices.choose(random); + switch (nextChange) + { + case ACCORD_MIGRATE: + haveConsensusMigrated = true; + return schedule(new OnClusterMigrateConsensus(this), options.topologyChangeInterval); + } + } + + return null; + } + + private Action nextTopologyChangeAction() + { + if (options.topologyChangeLimit >= 0 && ++topologyChangeCount > options.topologyChangeLimit) + return null; + + while (!topologyOps.isEmpty() && (!registered.isEmpty() || joined.size() > sum(minRf))) { if (options.changePaxosVariantTo != null && !haveChangedVariant && random.decide(1f / (1 + registered.size()))) { haveChangedVariant = true; - return schedule(new OnClusterSetPaxosVariant(KeyspaceActions.this, options.changePaxosVariantTo)); + return schedule(new OnClusterSetPaxosVariant(KeyspaceActions.this, options.changePaxosVariantTo), options.topologyChangeInterval); } // pick a dc @@ -251,8 +294,8 @@ private Action next() // try to pick an action (and simply loop again if we cannot for this dc) TopologyChange next; if (registered.size(dc) > 0 && joined.size(dc) > currentRf[dc]) next = options.allChoices.choose(random); - else if (registered.size(dc) > 0 && ops.contains(JOIN)) next = options.choicesNoLeave.choose(random); - else if (joined.size(dc) > currentRf[dc] && ops.contains(LEAVE)) next = options.choicesNoJoin.choose(random); + else if (registered.size(dc) > 0 && topologyOps.contains(JOIN)) next = options.choicesNoLeave.choose(random); + else if (joined.size(dc) > currentRf[dc] && topologyOps.contains(LEAVE)) next = options.choicesNoJoin.choose(random); else if (joined.size(dc) > minRf[dc]) next = CHANGE_RF; else continue; @@ -330,19 +373,12 @@ else if (random.decide(0.5f)) // can do either } } } - - if (options.changePaxosVariantTo != null && !haveChangedVariant) - { - haveChangedVariant = true; - return schedule(new OnClusterSetPaxosVariant(KeyspaceActions.this, options.changePaxosVariantTo)); - } - return null; } - private Action schedule(Action action) + private Action schedule(Action action, KindOfSequence.Period period) { - action.setDeadline(time, time.nanoTime() + options.topologyChangeInterval.get(random)); + action.setDeadline(time, time.nanoTime() + period.get(random)); return action; } @@ -364,7 +400,7 @@ public void transitivelyAfter(Action finished) time.permitDiscontinuities(); } }); - return schedule(action); + return schedule(action, options.topologyChangeInterval); } void updateTopology(Topology newTopology) diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensus.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensus.java new file mode 100644 index 000000000000..d4c4cb87d570 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensus.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.cluster; + +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.harry.model.TokenPlacementModel; +import org.apache.cassandra.simulator.Action; +import org.apache.cassandra.simulator.ActionList; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.TokenMap; +import org.apache.cassandra.utils.Pair; + +import static com.google.common.base.Preconditions.checkState; +import static org.apache.cassandra.simulator.Action.Modifiers.NONE; + +class OnClusterMigrateConsensus extends Action +{ + private final KeyspaceActions actions; + + OnClusterMigrateConsensus(KeyspaceActions actions) + { + super("Performing consensus migration", NONE, NONE); + this.actions = actions; + } + + public ActionList performSimple() + { + List result = new ArrayList<>(); + List>> ranges = new ArrayList<>(); + ClusterMetadata cm = ClusterMetadata.current(); + TokenMap tm = cm.tokenMap; + IPartitioner partitioner = tm.partitioner(); + TokenPlacementModel.Lookup lookup = actions.factory.lookup(); + Map idToNodeId = new HashMap<>(); + for (int id : actions.all.toArray()) + idToNodeId.put(id, lookup.nodeId(id)); + + for (int ii = 0; ii < actions.all.size(); ii++) + { + int nodeIdx = ii + 1; + List tokens = tm.tokens(idToNodeId.get(nodeIdx)); + checkState(tokens.size() == 1, "Expect only 1, not handling vnodes tokenRanges " + tokens); + Token token = tokens.get(0); + Range tokenRange = new Range(tm.getPredecessor(token), token); + Range firstRange = new Range<>(tokenRange.left, partitioner.split(tokenRange.left, tokenRange.right, 0.33)); + Range secondRange = new Range<>(firstRange.right, partitioner.split(tokenRange.left, tokenRange.right, 0.66)); + Range thirdRange = new Range<>(secondRange.right, tokenRange.right); + ranges.add(Pair.create(nodeIdx, new SimpleEntry<>(firstRange.left.toString(), firstRange.right.toString()))); + ranges.add(Pair.create(nodeIdx, new SimpleEntry<>(secondRange.left.toString(), secondRange.right.toString()))); + ranges.add(Pair.create(nodeIdx, new SimpleEntry<>(thirdRange.left.toString(), thirdRange.right.toString()))); + } + + Collections.shuffle(ranges); + + System.out.println("Ranges to migrate " + ranges); + + ranges.stream().forEach(p -> result.add(new OnClusterMigrateConsensusOneRange(actions, p.left(), p.right()))); + return ActionList.of(result); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensusOneRange.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensusOneRange.java new file mode 100644 index 000000000000..84a1d43c5091 --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnClusterMigrateConsensusOneRange.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.cluster; + +import java.util.Map; + +import com.google.common.collect.ImmutableList; + +import org.apache.cassandra.simulator.Action; +import org.apache.cassandra.simulator.ActionList; + +import static org.apache.cassandra.simulator.Action.Modifiers.NONE; +import static org.apache.cassandra.simulator.Action.Modifiers.STRICT; + +class OnClusterMigrateConsensusOneRange extends Action +{ + private final KeyspaceActions actions; + private final int repairOn; + Map.Entry startMigrationRange; + + OnClusterMigrateConsensusOneRange(KeyspaceActions actions, int repairOn, Map.Entry startMigrationRange) + { + super("Performing consensus migration one range " + startMigrationRange, STRICT, NONE); + this.actions = actions; + this.repairOn = repairOn; + this.startMigrationRange = startMigrationRange; + } + + public ActionList performSimple() + { + return ActionList.of(new OnInstanceStartConsensusMigration(actions, 1, startMigrationRange ), + new OnClusterRepairRanges(actions, new int[] { repairOn }, true, false, ImmutableList.of(startMigrationRange))); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java index 46edfb392649..bf75f920ed52 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java @@ -97,7 +97,7 @@ private static void invokeRepair(String keyspaceName, boolean repairPaxos, boole { Collection> ranges = rangesSupplier.call(); // no need to wait for completion, as we track all task submissions and message exchanges, and ensure they finish before continuing to next action - StorageService.instance.repair(keyspaceName, new RepairOption(RepairParallelism.SEQUENTIAL, isPrimaryRangeOnly, false, false, 1, ranges, false, false, force, PreviewKind.NONE, false, true, repairPaxos, repairOnlyPaxos, false), singletonList((tag, event) -> { + StorageService.instance.repair(keyspaceName, new RepairOption(RepairParallelism.SEQUENTIAL, isPrimaryRangeOnly, false, false, 1, ranges, false, force, PreviewKind.NONE, false, true, repairPaxos, repairOnlyPaxos, false, false), singletonList((tag, event) -> { if (event.getType() == ProgressEventType.COMPLETE) listener.run(); })); diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceStartConsensusMigration.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceStartConsensusMigration.java new file mode 100644 index 000000000000..ba23f985872d --- /dev/null +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceStartConsensusMigration.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.cluster; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.simulator.Action.Modifiers.RELIABLE_NO_TIMEOUTS; + +class OnInstanceStartConsensusMigration extends ClusterAction +{ + + public OnInstanceStartConsensusMigration(KeyspaceActions actions, int on, Map.Entry startMigrationRange) + { + this(actions, on, RELIABLE_NO_TIMEOUTS, RELIABLE_NO_TIMEOUTS, startMigrationRange); + } + + public OnInstanceStartConsensusMigration(KeyspaceActions actions, int on, Modifiers self, Modifiers transitive, Map.Entry startMigrationRange) + { + super("Start consensus migration on " + on, self, transitive, actions, on, invokableBlockingStartConsensusMigration(actions.keyspace, actions.table, startMigrationRange)); + } + + private static IIsolatedExecutor.SerializableRunnable invokableBlockingStartConsensusMigration(String keyspaceName, String cfName, Map.Entry range) + { + return () -> { + List keyspaces = new ArrayList<>(); + keyspaces.add(keyspaceName); + List tables = new ArrayList<>(); + tables.add(cfName); + StorageService.instance.migrateConsensusProtocol("accord", keyspaces, tables, range.getKey() + ":" + range.getValue()); + }; + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/debug/Reconcile.java b/test/simulator/main/org/apache/cassandra/simulator/debug/Reconcile.java index 5face389acd8..cbbf85fc32a0 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/debug/Reconcile.java +++ b/test/simulator/main/org/apache/cassandra/simulator/debug/Reconcile.java @@ -61,7 +61,7 @@ public class Reconcile private static final Pattern STRIP_TRACES = Pattern.compile("(Wakeup|Continue|Timeout|Waiting)\\[(((([a-zA-Z]\\.)*[a-zA-Z0-9_$]+\\.[a-zA-Z0-9_<>$]+:[\\-0-9]+; )*(([a-zA-Z]\\.)*[a-zA-Z0-9_$]+\\.[a-zA-Z0-9_<>$]+:[\\-0-9]+))( #\\[.*?]#)?) ?(by\\[.*?])?]"); private static final Pattern STRIP_NOW_TRACES = Pattern.compile("( #\\[.*?]#)"); private static final Pattern NORMALISE_THREAD_RECORDING_IN = Pattern.compile("(Thread\\[[^]]+:[0-9]+),?[0-9]+(,node[0-9]+)]"); - static final Pattern NORMALISE_LAMBDA = Pattern.compile("((\\$\\$Lambda\\$[0-9]+/[0-9]+)?(@[0-9a-f]+)?)"); + static final Pattern NORMALISE_LAMBDA = Pattern.compile("((\\$\\$Lambda\\$[0-9]+/(0x)?[a-f0-9]+)?(@[0-9a-f]+)?)"); static final Pattern NORMALISE_THREAD = Pattern.compile("(Thread\\[[^]]+:[0-9]+),[0-9](,node[0-9]+)(_[0-9]+)?]"); public static class AbstractReconciler diff --git a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java index 78ee783bd7c9..72bc99fba76c 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java +++ b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java @@ -320,5 +320,4 @@ private static String normalise(String input) ).replaceAll("$1$2]") ).replaceAll("$1]"); } - } diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java index 8ed4556194f5..5bfb218c7edf 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java @@ -42,6 +42,7 @@ import org.apache.cassandra.distributed.api.LogResult; import org.apache.cassandra.distributed.impl.FileLogAction; import org.apache.cassandra.distributed.impl.Instance; +import org.apache.cassandra.distributed.shared.Metrics; import org.apache.cassandra.io.util.File; import org.apache.cassandra.simulator.Action; import org.apache.cassandra.simulator.ActionList; @@ -99,7 +100,7 @@ public AbstractPairOfSequencesPaxosSimulation(SimulatedSystems simulated, long seed, int[] primaryKeys, long runForNanos, LongSupplier jitter) { - super(runForNanos < 0 ? STREAM_LIMITED : clusterOptions.topologyChangeLimit < 0 ? TIME_LIMITED : TIME_AND_STREAM_LIMITED, + super(runForNanos < 0 ? STREAM_LIMITED : (clusterOptions.topologyChangeLimit <= 0 && clusterOptions.consensusChangeLimit <= 0) ? TIME_LIMITED : TIME_AND_STREAM_LIMITED, simulated, cluster, scheduler, runForNanos, jitter); this.readRatio = readRatio; this.concurrency = concurrency; @@ -183,6 +184,11 @@ protected ActionList performSimple() }; } + protected Metrics getMetrics(int coordinatorIndex) + { + return cluster.get(coordinatorIndex).metrics(); + } + public ActionPlan plan() { ActionPlan plan = new KeyspaceActions(simulated, KEYSPACE, TABLE, createTableStmt(), cluster, diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java index b07b4a86cd89..de3e5f15b765 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesPaxosSimulation.java @@ -286,6 +286,10 @@ void log(@Nullable Integer primaryKey) @Override boolean joinAll() { + // Consensus migration means Accord is running and Accord doesn't yet support joining nodes + if ((clusterOptions.consensusChangeLimit == -1 || clusterOptions.consensusChangeLimit > 0) + && clusterOptions.consensusChoices.options.length > 0) + return true; return false; } } diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java index a0c66822116d..6e7d058beadb 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java @@ -22,8 +22,8 @@ import org.apache.cassandra.config.Config.PaxosVariant; import org.apache.cassandra.distributed.api.ConsistencyLevel; -import org.apache.cassandra.simulator.RandomSource; import org.apache.cassandra.simulator.ClusterSimulation; +import org.apache.cassandra.simulator.RandomSource; import org.apache.cassandra.simulator.utils.KindOfSequence; import static java.util.concurrent.TimeUnit.SECONDS; @@ -69,6 +69,11 @@ public PaxosClusterSimulation create(long seed) throws IOException random.reset(seed); return new PaxosClusterSimulation(random, seed, uniqueNum, this); } + + public String lwtStrategy() + { + return lwtStrategy; + } } PaxosClusterSimulation(RandomSource random, long seed, int uniqueNum, Builder builder) throws IOException diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java index 373a3767145b..6c9f683c6186 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java @@ -19,6 +19,7 @@ package org.apache.cassandra.simulator.paxos; import java.io.IOException; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; @@ -31,6 +32,7 @@ import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.simulator.SimulationRunner; +import org.apache.cassandra.simulator.utils.IntRange; import org.apache.cassandra.simulator.SimulatorUtils; public class PaxosSimulationRunner extends SimulationRunner @@ -63,6 +65,18 @@ protected void propagate(PaxosClusterSimulation.Builder builder) super.propagate(builder); propagateTo(consistency, withStateCache, withoutStateCache, variant, toVariant, builder); } + + @Override + protected void run( long seed, PaxosClusterSimulation.Builder builder) throws IOException + { + if (Objects.equals(builder.lwtStrategy(), "accord")) + { + // Apply handicaps + builder.dcs(new IntRange(1, 1)); + builder.nodes(new IntRange(3, 3)); + } + super.run(seed, builder); + } } @Command(name = "record") @@ -94,7 +108,8 @@ protected void propagate(PaxosClusterSimulation.Builder builder) } @Command(name = "reconcile") - public static class Reconcile extends SimulationRunner.Reconcile + public static class + Reconcile extends SimulationRunner.Reconcile { @Option(name = "--consistency") String consistency; diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 5cb392d88a82..c73eead7446f 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -210,7 +210,7 @@ private static TxnRequest toRequest(int event) Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min("system"), AccordRoutingKey.SentinelKey.max("system"))); Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] {node}, ranges, 3)); Keys keys = Keys.of(toKey(0)); - Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys), TxnQuery.ALL, new NoopUpdate()); + Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys, null), TxnQuery.ALL, new NoopUpdate()); FullRoute route = route(); return new PreAccept(node, topologies, id, txn, route); } diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java index 474f16861579..431eb7e454c8 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java @@ -101,6 +101,20 @@ public void simulationTest() throws IOException PaxosSimulationRunner.main(new String[] { "run", "--variant", "v2", "-n", "3..6", "-t", "1000", "-c", "2", "--cluster-action-limit", "2", "-s", "30" }); } + @Test + public void casOnAccordSimulationTest() throws IOException + { + PaxosSimulationRunner.main(new String[] { "run", + "--lwt-strategy", "migration", + "-n", "3...6", + "-t", "1000", + "--cluster-action-limit", "0", + "--consensus-action-limit", "-1", + "--consensus-actions", "ACCORD_MIGRATE", + "-c", "10", + "-s", "30"}); + } + @Test @Ignore("fails due to OOM DirectMemory - unclear why") public void selfReconcileTest() throws IOException diff --git a/test/unit/org/apache/cassandra/CassandraTestBase.java b/test/unit/org/apache/cassandra/CassandraTestBase.java new file mode 100644 index 000000000000..bfb7bc8c4123 --- /dev/null +++ b/test/unit/org/apache/cassandra/CassandraTestBase.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra; + +import java.lang.annotation.Annotation; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.reflect.Method; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.rules.TestName; +import org.junit.rules.TestWatcher; +import org.junit.runner.Description; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.dht.LengthPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.OrderPreservingPartitioner; +import org.apache.cassandra.dht.RandomPartitioner; +import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadataService; + +import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; +import static org.junit.Assert.assertTrue; + +/* + * Many tests declare their own test base and duplicate functionality + * Hopefully this can serve as a place to put common initialization patterns and annotations + * So people have fewer problems to solve when authoring tests. + */ +public class CassandraTestBase +{ + @Retention(RetentionPolicy.RUNTIME) + public @interface UseMurmur3Partitioner {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface UseRandomPartitioner {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface UseOrderPreservingPartitioner {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface UseLengthPartitioner {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface UseByteOrderedPartitioner {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface DDDaemonInitialization {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface SchemaLoaderPrepareServer {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface SchemaLoaderLoadSchema {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface PrepareServerNoRegister {} + + @Retention(RetentionPolicy.RUNTIME) + public @interface DisableMBeanRegistration {} + + private static boolean classResetStorageServicePartitioner; + + private static Boolean oldMBeanRegistrationValue; + + @BeforeClass + public static void cassandraTestBaseBeforeClass() + { + if (hasClassAnnotation(DisableMBeanRegistration.class)) + { + oldMBeanRegistrationValue = ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.getBoolean(); + ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(true); + } + + if (hasClassAnnotation(DDDaemonInitialization.class)) + DatabaseDescriptor.daemonInitialization(); + else if (hasClassAnnotation(SchemaLoaderPrepareServer.class)) + SchemaLoader.prepareServer(); + else if (hasClassAnnotation(SchemaLoaderLoadSchema.class)) + SchemaLoader.loadSchema(); + else if (hasClassAnnotation(PrepareServerNoRegister.class)) + ServerTestUtils.daemonInitialization(); + + int partitionerAnnotationCount = 0; + if (hasClassAnnotation(UseMurmur3Partitioner.class)) + { + partitionerAnnotationCount++; + classResetStorageServicePartitioner = true; + StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + if (hasClassAnnotation(UseRandomPartitioner.class)) + { + partitionerAnnotationCount++; + classResetStorageServicePartitioner = true; + StorageService.instance.setPartitionerUnsafe(RandomPartitioner.instance); + } + if (hasClassAnnotation(UseOrderPreservingPartitioner.class)) + { + partitionerAnnotationCount++; + classResetStorageServicePartitioner = true; + StorageService.instance.setPartitionerUnsafe(OrderPreservingPartitioner.instance); + } + if (hasClassAnnotation(UseLengthPartitioner.class)) + { + partitionerAnnotationCount++; + classResetStorageServicePartitioner = true; + StorageService.instance.setPartitionerUnsafe(LengthPartitioner.instance); + } + if (hasClassAnnotation(UseByteOrderedPartitioner.class)) + { + partitionerAnnotationCount++; + classResetStorageServicePartitioner = true; + StorageService.instance.setPartitionerUnsafe(ByteOrderedPartitioner.instance); + } + assertTrue("At most one partitioner should be annotated", partitionerAnnotationCount <= 1); + + if (hasClassAnnotation(PrepareServerNoRegister.class)) + ServerTestUtils.prepareServerNoRegister(); + } + + @AfterClass + public static void cassandraTestBaseAfterClass() + { + if (oldMBeanRegistrationValue != null) + { + ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(oldMBeanRegistrationValue); + oldMBeanRegistrationValue = null; + } + + if (classResetStorageServicePartitioner) + { + StorageService.instance.resetPartitionerUnsafe(); + classResetStorageServicePartitioner = false; + } + } + + public static boolean hasClassAnnotation(Class annotation) + { + return hasClassAnnotation(testClass, annotation); + } + + public static boolean hasClassAnnotation(Class clazz, Class annotation) + { + if (clazz == null) + return false; + if (clazz.getAnnotation(annotation) != null) + return true; + return hasClassAnnotation(clazz.getSuperclass(), annotation); + } + + private static Class testClass; + + @ClassRule + public static TestWatcher classWatcher = new TestWatcher() + { + @Override + public void starting(Description description) + { + testClass = description.getTestClass(); + } + }; + + @Rule + public TestName testMethodName = new TestName(); + public Method testMethod; + + private boolean testResetPartitioner; + + ClusterMetadataService toRestore; + + @Before + public void cassandraTestBaseSetUp() throws Exception + { + testMethod = testClass.getMethod(testMethodName.getMethodName()); + int partitionerAnnotationCount = 0; + if (hasMethodAnnotation(UseMurmur3Partitioner.class)) + { + partitionerAnnotationCount++; + testResetPartitioner = true; + StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + if (hasMethodAnnotation(UseRandomPartitioner.class)) + { + partitionerAnnotationCount++; + testResetPartitioner = true; + StorageService.instance.setPartitionerUnsafe(RandomPartitioner.instance); + } + if (hasMethodAnnotation(UseOrderPreservingPartitioner.class)) + { + partitionerAnnotationCount++; + testResetPartitioner = true; + StorageService.instance.setPartitionerUnsafe(OrderPreservingPartitioner.instance); + } + if (hasMethodAnnotation(UseLengthPartitioner.class)) + { + partitionerAnnotationCount++; + testResetPartitioner = true; + StorageService.instance.setPartitionerUnsafe(LengthPartitioner.instance); + } + if (hasMethodAnnotation(UseByteOrderedPartitioner.class)) + { + partitionerAnnotationCount++; + testResetPartitioner = true; + StorageService.instance.setPartitionerUnsafe(ByteOrderedPartitioner.instance); + } + + if (testResetPartitioner) + { + toRestore = ClusterMetadataService.unsetInstance(); + ClusterMetadataService withNewPartitioner = ClusterMetadataTestHelper.instanceForTest(); + ClusterMetadataService.setInstance(withNewPartitioner); + } + assertTrue("At most one partitioner should be annotated", partitionerAnnotationCount <= 1); + } + + private boolean hasMethodAnnotation(Class annotation) + { + return testMethod.getAnnotation(annotation) != null; + } + + @After + public void cassandraTestBaseTearDown() + { + if (testResetPartitioner) + { + StorageService.instance.resetPartitionerUnsafe(); + testResetPartitioner = false; + ClusterMetadataService.unsetInstance(); + + if (toRestore != null) + { + ClusterMetadataService.setInstance(toRestore); + toRestore = null; + } + } + } +} diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 22b0eb9999be..74cf51de9210 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -155,7 +155,9 @@ import org.hamcrest.Matcher; import org.mockito.Mockito; import org.mockito.internal.stubbing.defaultanswers.ForwardsInvocations; +import org.awaitility.core.ThrowingRunnable; +import static com.google.common.base.Preconditions.checkState; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertEquals; @@ -230,7 +232,7 @@ public static Iterable once(final Iterator source) private AtomicBoolean exhausted = new AtomicBoolean(); public Iterator iterator() { - Preconditions.checkState(!exhausted.getAndSet(true)); + checkState(!exhausted.getAndSet(true)); return source; } }; @@ -699,19 +701,21 @@ public static void assumeLegacySecondaryIndex() public static class PartitionerSwitcher implements AutoCloseable { - final IPartitioner oldP; final IPartitioner newP; + boolean closed; + public PartitionerSwitcher(IPartitioner partitioner) { newP = partitioner; - oldP = StorageService.instance.setPartitionerUnsafe(partitioner); + StorageService.instance.setPartitionerUnsafe(partitioner); } public void close() { - IPartitioner p = StorageService.instance.setPartitionerUnsafe(oldP); - assert p == newP; + checkState(!closed, "Already reset"); + closed = true; + StorageService.instance.resetPartitionerUnsafe(); } } @@ -748,6 +752,21 @@ public static void spinAssertEquals(String message, T expected, long timeout .untilAsserted(() -> assertThat(message, call.call(), equalTo(expected))); } + public static void spinUntilSuccess(ThrowingRunnable runnable) + { + spinUntilSuccess(runnable, 10); + } + + public static void spinUntilSuccess(ThrowingRunnable runnable, int timeoutInSeconds) + { + Awaitility.await() + .pollInterval(Duration.ofMillis(100)) + .pollDelay(0, TimeUnit.MILLISECONDS) + .atMost(timeoutInSeconds, TimeUnit.SECONDS) + .ignoreExceptions() + .untilAsserted(runnable); + } + public static void joinThread(Thread thread) throws InterruptedException { thread.join(10000); diff --git a/test/unit/org/apache/cassandra/auth/CassandraAuthorizerTest.java b/test/unit/org/apache/cassandra/auth/CassandraAuthorizerTest.java index 7ead4e4118a6..97e8fb4b08cc 100644 --- a/test/unit/org/apache/cassandra/auth/CassandraAuthorizerTest.java +++ b/test/unit/org/apache/cassandra/auth/CassandraAuthorizerTest.java @@ -36,6 +36,7 @@ public class CassandraAuthorizerTest extends CQLTester @BeforeClass public static void setupAuth() { + // This runs after the base class sets up Cassandra and might not even work CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(true); requireAuthentication(); requireNetwork(); diff --git a/test/unit/org/apache/cassandra/batchlog/BatchlogManagerTest.java b/test/unit/org/apache/cassandra/batchlog/BatchlogManagerTest.java index d267a4b96aba..f8599f4c6ed7 100644 --- a/test/unit/org/apache/cassandra/batchlog/BatchlogManagerTest.java +++ b/test/unit/org/apache/cassandra/batchlog/BatchlogManagerTest.java @@ -23,15 +23,16 @@ import java.util.concurrent.ExecutionException; import com.google.common.collect.Lists; -import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.SchemaLoaderPrepareServer; +import org.apache.cassandra.CassandraTestBase.UseByteOrderedPartitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.Util; -import org.apache.cassandra.Util.PartitionerSwitcher; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.ColumnFamilyStore; @@ -45,7 +46,6 @@ import org.apache.cassandra.db.partitions.ImmutableBTreePartition; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.Row; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.schema.KeyspaceParams; @@ -66,7 +66,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -public class BatchlogManagerTest +@UseByteOrderedPartitioner +@SchemaLoaderPrepareServer +public class BatchlogManagerTest extends CassandraTestBase { private static final String KEYSPACE1 = "BatchlogManagerTest1"; private static final String CF_STANDARD1 = "Standard1"; @@ -75,14 +77,9 @@ public class BatchlogManagerTest private static final String CF_STANDARD4 = "Standard4"; private static final String CF_STANDARD5 = "Standard5"; - static PartitionerSwitcher sw; - @BeforeClass public static void defineSchema() throws ConfigurationException { - DatabaseDescriptor.daemonInitialization(); - sw = Util.switchPartitioner(Murmur3Partitioner.instance); - SchemaLoader.prepareServer(); SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple(1), SchemaLoader.standardCFMD(KEYSPACE1, CF_STANDARD1, 1, BytesType.instance), @@ -92,12 +89,6 @@ public static void defineSchema() throws ConfigurationException SchemaLoader.standardCFMD(KEYSPACE1, CF_STANDARD5, 1, BytesType.instance)); } - @AfterClass - public static void cleanup() - { - sw.close(); - } - @Before public void setUp() throws Exception { diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 9ab3dab6bf8a..eb10476ace2d 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -91,7 +91,8 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$DiskOptimizationStrategy", "org.apache.cassandra.config.Config$FlushCompression", "org.apache.cassandra.config.Config$InternodeCompression", - "org.apache.cassandra.config.Config$LegacyPaxosStrategy", + "org.apache.cassandra.config.Config$LWTStrategy", + "org.apache.cassandra.config.Config$NonSerialWriteStrategy", "org.apache.cassandra.config.Config$MemtableAllocationType", "org.apache.cassandra.config.Config$PaxosOnLinearizabilityViolation", "org.apache.cassandra.config.Config$PaxosStatePurging", diff --git a/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java b/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java index 8c1214a26bde..d084082a0885 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TxnDataNameTest.java @@ -18,11 +18,12 @@ package org.apache.cassandra.cql3.statements; -import org.apache.cassandra.service.accord.txn.TxnDataName; import org.junit.Test; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.service.accord.txn.TxnDataName; +import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.Generators; import org.assertj.core.api.Assertions; import org.quicktheories.core.Gen; @@ -62,6 +63,7 @@ public static Gen gen() case USER: return TxnDataName.user(symbolGen.generate(rnd)); case RETURNING: return TxnDataName.returning(); case AUTO_READ: return new TxnDataName(kind, symbolGen.generate(rnd), symbolGen.generate(rnd), symbolGen.generate(rnd)); + case CAS_READ: return TxnRead.CAS_READ; default: throw new IllegalArgumentException("Unknown kind: " + kind); } }; diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/CQLVectorTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/CQLVectorTest.java index 2c6ce589dae4..6b034af523c2 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/CQLVectorTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/CQLVectorTest.java @@ -203,7 +203,7 @@ public void sandwichBetweenUDTs() execute("INSERT INTO %s (pk, value) VALUES (0, {z: [{y:1}, {y:2}]})"); assertRows(execute("SELECT * FROM %s"), - row(0, userType("z", vector(userType("y", 1), userType("y", 2))))); + row(0, userType("z", vector((Object)userType("y", 1), (Object)userType("y", 2))))); } @Test diff --git a/test/unit/org/apache/cassandra/db/CleanupTransientTest.java b/test/unit/org/apache/cassandra/db/CleanupTransientTest.java index d1a275c65722..33877c9b8e0d 100644 --- a/test/unit/org/apache/cassandra/db/CleanupTransientTest.java +++ b/test/unit/org/apache/cassandra/db/CleanupTransientTest.java @@ -26,8 +26,10 @@ import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; +import org.apache.cassandra.CassandraTestBase.UseRandomPartitioner; import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.compaction.CompactionManager; @@ -45,10 +47,11 @@ import static org.junit.Assert.assertEquals; -public class CleanupTransientTest +@PrepareServerNoRegister +@UseRandomPartitioner +public class CleanupTransientTest extends CassandraTestBase { private static final IPartitioner partitioner = RandomPartitioner.instance; - private static IPartitioner oldPartitioner; public static final int LOOPS = 200; public static final String KEYSPACE1 = "CleanupTest1"; @@ -70,10 +73,7 @@ public class CleanupTransientTest @BeforeClass public static void setup() throws Exception { - DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setTransientReplicationEnabledUnsafe(true); - oldPartitioner = StorageService.instance.setPartitionerUnsafe(partitioner); - ServerTestUtils.prepareServerNoRegister(); SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple("2/1"), SchemaLoader.standardCFMD(KEYSPACE1, CF_STANDARD1), diff --git a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java index 419772d3044f..0462c6d61bc6 100644 --- a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java +++ b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerOutOfRangeTest.java @@ -219,6 +219,7 @@ private static class StubReadCommand extends SinglePartitionReadCommand false, 0, false, + false, tmd, FBUtilities.nowInSeconds(), ColumnFilter.all(tmd), @@ -254,6 +255,7 @@ private static class StubRangeReadCommand extends PartitionRangeReadCommand false, 0, false, + false, tmd, FBUtilities.nowInSeconds(), ColumnFilter.all(tmd), diff --git a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerTest.java b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerTest.java index f71b1ff7ca6b..bc2b285d980a 100644 --- a/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerTest.java +++ b/test/unit/org/apache/cassandra/db/ReadCommandVerbHandlerTest.java @@ -173,6 +173,7 @@ private static class TrackingSinglePartitionReadCommand extends SinglePartitionR false, 0, false, + false, metadata, FBUtilities.nowInSeconds(), ColumnFilter.all(metadata), diff --git a/test/unit/org/apache/cassandra/db/ReadResponseTest.java b/test/unit/org/apache/cassandra/db/ReadResponseTest.java index 988677a83b64..e59436903764 100644 --- a/test/unit/org/apache/cassandra/db/ReadResponseTest.java +++ b/test/unit/org/apache/cassandra/db/ReadResponseTest.java @@ -255,6 +255,7 @@ private static class StubReadCommand extends SinglePartitionReadCommand isDigest, 0, false, + false, metadata, FBUtilities.nowInSeconds(), ColumnFilter.all(metadata), diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionStrategyManagerTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionStrategyManagerTest.java index 19b758f73eac..20420aeef3f5 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionStrategyManagerTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionStrategyManagerTest.java @@ -42,6 +42,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.SchemaLoaderPrepareServer; +import org.apache.cassandra.CassandraTestBase.UseByteOrderedPartitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; @@ -53,15 +56,12 @@ import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.db.RowUpdateBuilder; import org.apache.cassandra.db.compaction.AbstractStrategyHolder.GroupedSSTableContainer; -import org.apache.cassandra.dht.ByteOrderedPartitioner; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.File; import org.apache.cassandra.notifications.SSTableAddedNotification; import org.apache.cassandra.notifications.SSTableDeletingNotification; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.ByteBufferUtil; @@ -73,27 +73,27 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -public class CompactionStrategyManagerTest +/** + * We use byte ordered partitioner in this test to be able to easily infer an SSTable + * disk assignment based on its generation - See {@link this#getSSTableIndex(Integer[], SSTableReader)} + */ +@SchemaLoaderPrepareServer +@UseByteOrderedPartitioner +public class CompactionStrategyManagerTest extends CassandraTestBase { private static final Logger logger = LoggerFactory.getLogger(CompactionStrategyManagerTest.class); private static final String KS_PREFIX = "Keyspace1"; private static final String TABLE_PREFIX = "CF_STANDARD"; - private static IPartitioner originalPartitioner; private static boolean backups; @BeforeClass public static void beforeClass() { - SchemaLoader.prepareServer(); backups = DatabaseDescriptor.isIncrementalBackupsEnabled(); DatabaseDescriptor.setIncrementalBackupsEnabled(false); - /** - * We use byte ordered partitioner in this test to be able to easily infer an SSTable - * disk assignment based on its generation - See {@link this#getSSTableIndex(Integer[], SSTableReader)} - */ - originalPartitioner = StorageService.instance.setPartitionerUnsafe(ByteOrderedPartitioner.instance); + SchemaLoader.createKeyspace(KS_PREFIX, KeyspaceParams.simple(1), SchemaLoader.standardCFMD(KS_PREFIX, TABLE_PREFIX) @@ -110,7 +110,6 @@ public void setUp() throws Exception @AfterClass public static void afterClass() { - DatabaseDescriptor.setPartitionerUnsafe(originalPartitioner); DatabaseDescriptor.setIncrementalBackupsEnabled(backups); } diff --git a/test/unit/org/apache/cassandra/db/compaction/PartialCompactionsTest.java b/test/unit/org/apache/cassandra/db/compaction/PartialCompactionsTest.java index 1d877904a25e..92c479ffb5c4 100644 --- a/test/unit/org/apache/cassandra/db/compaction/PartialCompactionsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/PartialCompactionsTest.java @@ -41,8 +41,8 @@ import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.FBUtilities; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; public class PartialCompactionsTest extends SchemaLoader @@ -120,7 +120,7 @@ private static long enoughSpaceForAllButTheLargestSSTable(ColumnFamilyStore cfs) private static int liveRows(ColumnFamilyStore cfs) { return Util.getAll(Util.cmd(cfs, "key1").build()).stream() - .map(partition -> count(partition.rowIterator())) + .map(partition -> count(partition.rowIterator(false))) .reduce(Integer::sum) .orElse(0); } diff --git a/test/unit/org/apache/cassandra/db/filter/ColumnFilterTest.java b/test/unit/org/apache/cassandra/db/filter/ColumnFilterTest.java index f2b47cbf6eab..cbd38ba8e7e5 100644 --- a/test/unit/org/apache/cassandra/db/filter/ColumnFilterTest.java +++ b/test/unit/org/apache/cassandra/db/filter/ColumnFilterTest.java @@ -90,7 +90,7 @@ public static void beforeClass() SchemaLoader.prepareServer(); DatabaseDescriptor.setSeedProvider(Arrays::asList); DatabaseDescriptor.setDefaultFailureDetector(); - DatabaseDescriptor.setPartitionerUnsafe(new Murmur3Partitioner()); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); } // Select all diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraEntireSSTableStreamWriterTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraEntireSSTableStreamWriterTest.java index c29f059000c7..abb6c2dc1b1e 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraEntireSSTableStreamWriterTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraEntireSSTableStreamWriterTest.java @@ -62,6 +62,7 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; +import static java.util.Collections.emptyList; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -145,14 +146,14 @@ public void testBlockReadingAndWritingOverWire() throws Throwable CassandraEntireSSTableStreamWriter writer = new CassandraEntireSSTableStreamWriter(sstable, session, context); writer.write(out); - session.prepareReceiving(new StreamSummary(sstable.metadata().id, 1, 5104)); + session.prepareReceiving(new StreamSummary(sstable.metadata().id, emptyList(), 1, 5104)); CassandraStreamHeader header = CassandraStreamHeader.builder() .withSSTableVersion(sstable.descriptor.version) .withSSTableLevel(0) .withEstimatedKeys(sstable.estimatedKeys()) - .withSections(Collections.emptyList()) + .withSections(emptyList()) .withSerializationHeader(sstable.header.toComponent()) .withComponentManifest(context.manifest()) .isEntireSSTable(true) @@ -208,7 +209,7 @@ private StreamSession setupStreamingSessionForTest() StreamResultFuture future = StreamResultFuture.createInitiator(nextTimeUUID(), StreamOperation.BOOTSTRAP, Collections.emptyList(), streamCoordinator); InetAddressAndPort peer = FBUtilities.getBroadcastAddressAndPort(); - streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, Collections.emptyList(), Collections.emptyList(), StreamSession.State.INITIALIZED, null)); + streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, emptyList(), emptyList(), StreamSession.State.INITIALIZED, null)); StreamSession session = streamCoordinator.getOrCreateOutboundSession(peer); session.init(future); diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamManagerTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamManagerTest.java index 41dcdcb66b88..7085ddf18a47 100644 --- a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamManagerTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamManagerTest.java @@ -33,15 +33,13 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Uninterruptibles; - -import org.apache.cassandra.Util; -import org.apache.cassandra.locator.RangesAtEndpoint; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.Util; import org.apache.cassandra.concurrent.NamedThreadFactory; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; @@ -53,6 +51,7 @@ import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.KeyspaceParams; @@ -69,6 +68,7 @@ import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.Ref; +import static java.util.Collections.emptyList; import static org.apache.cassandra.service.ActiveRepairService.NO_PENDING_REPAIR; import static org.apache.cassandra.service.ActiveRepairService.UNREPAIRED_SSTABLE; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; @@ -272,7 +272,7 @@ private Collection createSummaries() Collection summaries = new ArrayList<>(); for (int i = 0; i < 10; i++) { - StreamSummary summary = new StreamSummary(tbm.id, i, (i + 1) * 10); + StreamSummary summary = new StreamSummary(tbm.id, emptyList(), i, (i + 1) * 10); summaries.add(summary); } return summaries; diff --git a/test/unit/org/apache/cassandra/db/streaming/EntireSSTableStreamConcurrentComponentMutationTest.java b/test/unit/org/apache/cassandra/db/streaming/EntireSSTableStreamConcurrentComponentMutationTest.java index 7f0fe2c9c6e4..55793c9cacbc 100644 --- a/test/unit/org/apache/cassandra/db/streaming/EntireSSTableStreamConcurrentComponentMutationTest.java +++ b/test/unit/org/apache/cassandra/db/streaming/EntireSSTableStreamConcurrentComponentMutationTest.java @@ -87,6 +87,7 @@ import org.jboss.byteman.contrib.bmunit.BMRule; import org.jboss.byteman.contrib.bmunit.BMUnitRunner; +import static java.util.Collections.emptyList; import static org.apache.cassandra.service.ActiveRepairService.NO_PENDING_REPAIR; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.junit.Assert.assertTrue; @@ -228,7 +229,7 @@ private void testStreamWithConcurrentComponentMutation(Callable runBeforeStre streaming.get(3, TimeUnit.MINUTES); concurrentMutations.get(3, TimeUnit.MINUTES); - session.prepareReceiving(new StreamSummary(sstable.metadata().id, 1, 5104)); + session.prepareReceiving(new StreamSummary(sstable.metadata().id, emptyList(), 1, 5104)); StreamMessageHeader messageHeader = new StreamMessageHeader(sstable.metadata().id, peer, session.planId(), false, 0, 0, 0, null); try (DataInputBuffer in = new DataInputBuffer(serializedFile.nioBuffer(), false)) @@ -321,10 +322,10 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) private StreamSession setupStreamingSessionForTest() { StreamCoordinator streamCoordinator = new StreamCoordinator(StreamOperation.BOOTSTRAP, 1, new NettyStreamingConnectionFactory(), false, false, null, PreviewKind.NONE); - StreamResultFuture future = StreamResultFuture.createInitiator(nextTimeUUID(), StreamOperation.BOOTSTRAP, Collections.emptyList(), streamCoordinator); + StreamResultFuture future = StreamResultFuture.createInitiator(nextTimeUUID(), StreamOperation.BOOTSTRAP, emptyList(), streamCoordinator); InetAddressAndPort peer = FBUtilities.getBroadcastAddressAndPort(); - streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, Collections.emptyList(), Collections.emptyList(), StreamSession.State.INITIALIZED, null)); + streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, emptyList(), emptyList(), StreamSession.State.INITIALIZED, null)); StreamSession session = streamCoordinator.getOrCreateOutboundSession(peer); session.init(future); diff --git a/test/unit/org/apache/cassandra/db/view/ViewUtilsTest.java b/test/unit/org/apache/cassandra/db/view/ViewUtilsTest.java index 2e6870e6a638..5a292bc1955e 100644 --- a/test/unit/org/apache/cassandra/db/view/ViewUtilsTest.java +++ b/test/unit/org/apache/cassandra/db/view/ViewUtilsTest.java @@ -23,35 +23,39 @@ import java.util.Map; import java.util.Optional; -import org.junit.*; - +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseOrderPreservingPartitioner; import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.dht.OrderPreservingPartitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.NetworkTopologyStrategy; import org.apache.cassandra.locator.Replica; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.StubClusterMetadataService; -public class ViewUtilsTest +@DDDaemonInitialization +@UseOrderPreservingPartitioner +public class ViewUtilsTest extends CassandraTestBase { private final String KS = "Keyspace1"; @BeforeClass public static void setUp() throws ConfigurationException, IOException { - DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(OrderPreservingPartitioner.instance); ServerTestUtils.cleanupAndLeaveDirs(); Keyspace.setInitialized(); } diff --git a/test/unit/org/apache/cassandra/db/virtual/StreamingVirtualTableTest.java b/test/unit/org/apache/cassandra/db/virtual/StreamingVirtualTableTest.java index 30a70338f94b..b00815197842 100644 --- a/test/unit/org/apache/cassandra/db/virtual/StreamingVirtualTableTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/StreamingVirtualTableTest.java @@ -54,6 +54,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.assertj.core.util.Throwables; +import static java.util.Collections.emptyList; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; @@ -90,15 +91,15 @@ public void single() throws Throwable { StreamingState state = stream(true); assertRows(execute(t("select id, follower, operation, peers, status, progress_percentage, last_updated_at, failure_cause, success_message from %s")), - new Object[] { state.id(), true, "Repair", Collections.emptyList(), "init", 0F, new Date(state.lastUpdatedAtMillis()), null, null }); + new Object[] { state.id(), true, "Repair", emptyList(), "init", 0F, new Date(state.lastUpdatedAtMillis()), null, null }); state.phase.start(); assertRows(execute(t("select id, follower, operation, peers, status, progress_percentage, last_updated_at, failure_cause, success_message from %s")), - new Object[] { state.id(), true, "Repair", Collections.emptyList(), "start", 0F, new Date(state.lastUpdatedAtMillis()), null, null }); + new Object[] { state.id(), true, "Repair", emptyList(), "start", 0F, new Date(state.lastUpdatedAtMillis()), null, null }); - state.handleStreamEvent(new StreamEvent.SessionPreparedEvent(state.id(), new SessionInfo(PEER2, 1, PEER1, Collections.emptyList(), Collections.emptyList(), StreamSession.State.PREPARING, null), StreamSession.PrepareDirection.ACK)); + state.handleStreamEvent(new StreamEvent.SessionPreparedEvent(state.id(), new SessionInfo(PEER2, 1, PEER1, emptyList(), emptyList(), StreamSession.State.PREPARING, null), StreamSession.PrepareDirection.ACK)); - state.onSuccess(new StreamState(state.id(), StreamOperation.REPAIR, ImmutableSet.of(new SessionInfo(PEER2, 1, PEER1, Collections.emptyList(), Collections.emptyList(), StreamSession.State.COMPLETE, null)))); + state.onSuccess(new StreamState(state.id(), StreamOperation.REPAIR, ImmutableSet.of(new SessionInfo(PEER2, 1, PEER1, emptyList(), emptyList(), StreamSession.State.COMPLETE, null)))); assertRows(execute(t("select id, follower, operation, peers, status, progress_percentage, last_updated_at, failure_cause, success_message from %s")), new Object[] { state.id(), true, "Repair", Arrays.asList(address(127, 0, 0, 2).toString()), "success", 100F, new Date(state.lastUpdatedAtMillis()), null, null }); } @@ -222,7 +223,7 @@ private List deterministic(Collection summaries) private static StreamSummary streamSummary() { int files = ThreadLocalRandom.current().nextInt(2, 10); - return new StreamSummary(TableId.fromUUID(UUID.randomUUID()), files, files * 1024); + return new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), files, files * 1024); } @Test @@ -232,7 +233,7 @@ public void failed() throws Throwable RuntimeException t = new RuntimeException("You failed!"); state.onFailure(t); assertRows(execute(t("select id, follower, peers, status, progress_percentage, last_updated_at, failure_cause, success_message from %s")), - new Object[] { state.id(), true, Collections.emptyList(), "failure", 100F, new Date(state.lastUpdatedAtMillis()), Throwables.getStackTrace(t), null }); + new Object[] { state.id(), true, emptyList(), "failure", 100F, new Date(state.lastUpdatedAtMillis()), Throwables.getStackTrace(t), null }); } private static String t(String query) diff --git a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java index ad5829c7c0a1..00885d8c1f4f 100644 --- a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java +++ b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java @@ -33,8 +33,10 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.runner.RunWith; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; +import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -48,7 +50,6 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.NodeId; @@ -57,16 +58,15 @@ import org.apache.cassandra.utils.Pair; import org.jboss.byteman.contrib.bmunit.BMRule; import org.jboss.byteman.contrib.bmunit.BMRules; -import org.jboss.byteman.contrib.bmunit.BMUnitRunner; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -@RunWith(BMUnitRunner.class) -public class BootStrapperTest +@UseMurmur3Partitioner +@PrepareServerNoRegister +public class BootStrapperTest extends CassandraTestBase { - static IPartitioner oldPartitioner; static Predicate originalAlivePredicate = RangeStreamer.ALIVE_PREDICATE; public static AtomicBoolean nonOptimizationHit = new AtomicBoolean(false); public static AtomicBoolean optimizationHit = new AtomicBoolean(false); @@ -88,9 +88,6 @@ public boolean isAlive(InetAddressAndPort ep) @BeforeClass public static void setup() throws ConfigurationException { - DatabaseDescriptor.daemonInitialization(); - oldPartitioner = StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); - ServerTestUtils.prepareServerNoRegister(); SchemaLoader.startGossiper(); SchemaLoader.schemaDefinition("BootStrapperTest"); RangeStreamer.ALIVE_PREDICATE = Predicates.alwaysTrue(); @@ -100,7 +97,6 @@ public static void setup() throws ConfigurationException @AfterClass public static void tearDown() { - DatabaseDescriptor.setPartitionerUnsafe(oldPartitioner); RangeStreamer.ALIVE_PREDICATE = originalAlivePredicate; } diff --git a/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java b/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java index 6cd4a1331dd9..abc6f023c16a 100644 --- a/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java +++ b/test/unit/org/apache/cassandra/dht/KeyCollisionTest.java @@ -20,26 +20,26 @@ import java.math.BigInteger; import java.util.List; -import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.SchemaLoaderPrepareServer; +import org.apache.cassandra.CassandraTestBase.UseLengthPartitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.Util; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.marshal.IntegerType; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.RowUpdateBuilder; -import org.apache.cassandra.db.partitions.*; +import org.apache.cassandra.db.marshal.IntegerType; +import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; -import org.apache.cassandra.utils.FBUtilities; /** * Test cases where multiple keys collides, ie have the same token. @@ -48,29 +48,21 @@ * length partitioner that takes the length of the key as token, making * collision easy and predictable. */ -public class KeyCollisionTest +@UseLengthPartitioner +@SchemaLoaderPrepareServer +public class KeyCollisionTest extends CassandraTestBase { - static IPartitioner oldPartitioner; private static final String KEYSPACE1 = "KeyCollisionTest1"; private static final String CF = "Standard1"; @BeforeClass public static void defineSchema() throws ConfigurationException { - DatabaseDescriptor.daemonInitialization(); - oldPartitioner = StorageService.instance.setPartitionerUnsafe(LengthPartitioner.instance); - SchemaLoader.prepareServer(); SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple(1), SchemaLoader.standardCFMD(KEYSPACE1, CF)); } - @AfterClass - public static void tearDown() - { - DatabaseDescriptor.setPartitionerUnsafe(oldPartitioner); - } - @Test public void testGetSliceWithCollision() throws Exception { diff --git a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java index 01b41d4b3b4c..e57a714e7bfa 100644 --- a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java +++ b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java @@ -122,6 +122,8 @@ public Token fromString(String string) public void validate(String token) {} }; + private LengthPartitioner() {} + public Token.TokenFactory getTokenFactory() { return tokenFactory; diff --git a/test/unit/org/apache/cassandra/dht/RangeTest.java b/test/unit/org/apache/cassandra/dht/RangeTest.java index 84ca1246a3d4..ff23a5910f58 100644 --- a/test/unit/org/apache/cassandra/dht/RangeTest.java +++ b/test/unit/org/apache/cassandra/dht/RangeTest.java @@ -18,8 +18,8 @@ package org.apache.cassandra.dht; import java.nio.ByteBuffer; -import java.util.Collection; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -27,27 +27,48 @@ import java.util.Set; import com.google.common.base.Joiner; +import com.google.common.base.Stopwatch; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.dht.ByteOrderedPartitioner.BytesToken; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken; import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.Util.range; -import static org.junit.Assert.*; - - -public class RangeTest +import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_RANGE_EXPENSIVE_CHECKS; +import static org.apache.cassandra.dht.Range.fromString; +import static org.apache.cassandra.dht.Range.intersectionOfNormalizedRanges; +import static org.apache.cassandra.dht.Range.invertNormalizedRanges; +import static org.apache.cassandra.dht.Range.isInNormalizedRanges; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.dht.Range.subtractNormalizedRanges; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + + +@DDDaemonInitialization +public class RangeTest extends CassandraTestBase { @BeforeClass - public static void setupDD() + public static void enableExpensiveRangeChecks() { - DatabaseDescriptor.daemonInitialization(); + assertFalse(TEST_RANGE_EXPENSIVE_CHECKS.getBoolean()); // Expect off by default + CassandraRelevantProperties.TEST_RANGE_EXPENSIVE_CHECKS.setBoolean(true); + assertTrue(TEST_RANGE_EXPENSIVE_CHECKS.getBoolean()); } @Test @@ -578,7 +599,7 @@ public void testDifferenceToFetchNewWraps() private > void assertNormalize(List> input, List> expected) { - List> result = Range.normalize(input); + List> result = normalize(input); assert result.equals(expected) : "Expecting " + expected + " but got " + result; } @@ -736,4 +757,132 @@ public void testGroupSubtract() assertEquals(ranges, Range.subtract(ranges, asList(r(6, 7), r(20, 25)))); assertEquals(Sets.newHashSet(r(1, 4), r(11, 15)), Range.subtract(ranges, asList(r(4, 7), r(8, 11)))); } + + @Test + public void testIntersectsBounds() + { + Range r = r(0, 100); + assertTrue(r.intersects(bounds(5, 10))); + assertTrue(r.intersects(bounds(100, 110))); + assertTrue(r.intersects(bounds(-100, 200))); + assertTrue(r.intersects(bounds(10, 15))); + assertTrue(r.intersects(bounds(20,20))); + + assertFalse(r.intersects(bounds(-5, 0))); + assertFalse(r.intersects(bounds(-5, -1))); + assertFalse(r.intersects(bounds(110, 114))); + } + + private static Bounds bounds(long left, long right) + { + return new Bounds<>(t(left), t(right)); + } + + @Test + @UseMurmur3Partitioner + public void testIsInNormalizedRanges() + { + List> ranges = ImmutableList.of(fromString("(1,10]"), fromString("(10,20]"), fromString("(30,40]"), fromString("(50,60]"), fromString("(60,70]"), fromString("(80,90]"), fromString("(" + Long.MAX_VALUE + ",-9223372036854775808]")); + for (int ii = 0; ii < 100; ii++) + { + boolean isIn = isInNormalizedRanges(new LongToken(ii), ranges); + if (ii > 1 && ii <= 20) + assertTrue("Index " + ii, isIn); + else if (ii > 30 && ii <= 40) + assertTrue("Index " + ii, isIn); + else if (ii > 50 && ii <= 70) + assertTrue("Index " + ii, isIn); + else if (ii > 80 && ii <= 90) + assertTrue("Index " + ii, isIn); + else + assertFalse("Index " + ii, isIn); + } + assertFalse(isInNormalizedRanges(new LongToken(Long.MAX_VALUE), ranges)); + assertTrue(isInNormalizedRanges(new LongToken(Long.MIN_VALUE), ranges)); + ranges = ImmutableList.of(fromString("(-9223372036854775808,-9223372036854775807]")); + assertFalse(isInNormalizedRanges(new LongToken(Long.MIN_VALUE), ranges)); + assertTrue(isInNormalizedRanges(new LongToken(Long.MIN_VALUE + 1), ranges)); + ranges = ImmutableList.of(fromString("(" + (Long.MAX_VALUE - 1) + ",-9223372036854775808]")); + assertFalse(isInNormalizedRanges(new LongToken(Long.MAX_VALUE - 1), ranges)); + assertTrue(isInNormalizedRanges(new LongToken(Long.MAX_VALUE), ranges)); + assertTrue(isInNormalizedRanges(new LongToken(Long.MIN_VALUE), ranges)); + assertFalse(isInNormalizedRanges(new LongToken(Long.MAX_VALUE - 1), normalize(ranges))); + assertTrue(isInNormalizedRanges(new LongToken(Long.MAX_VALUE), normalize(ranges))); + assertTrue(isInNormalizedRanges(new LongToken(Long.MIN_VALUE), normalize(ranges))); + } + + @Test + @UseMurmur3Partitioner + public void testSubtractNormalizedRanges() + { + List> ranges = ImmutableList.of(fromString("(1,10]"), fromString("(10,20]"), fromString("(30,40]"), fromString("(50,60]"), fromString("(60,70]"), fromString("(80,90]"), fromString("(" + Long.MAX_VALUE + ",-9223372036854775808]")); + for (int ii = 0; ii < 100; ii++) + { + boolean isIn = isInNormalizedRanges(new LongToken(ii), ranges); + if (ii > 1 && ii <= 20) + assertTrue("Index " + ii, isIn); + else if (ii > 30 && ii <= 40) + assertTrue("Index " + ii, isIn); + else if (ii > 50 && ii <= 70) + assertTrue("Index " + ii, isIn); + else if (ii > 80 && ii <= 90) + assertTrue("Index " + ii, isIn); + else + assertFalse("Index " + ii, isIn); + } + List> rightMostRange = ImmutableList.of(r(Long.MAX_VALUE, Long.MIN_VALUE)); + List> maxLongRange = ImmutableList.of(r(Long.MAX_VALUE - 1, Long.MAX_VALUE)); + + assertEquals(emptyList(), subtractNormalizedRanges(ranges, ranges)); + assertEquals(emptyList(), subtractNormalizedRanges(rightMostRange, ranges)); + assertEquals(maxLongRange, subtractNormalizedRanges(maxLongRange, ranges)); + ranges = maxLongRange; + assertEquals(emptyList(), subtractNormalizedRanges(ranges, ranges)); + assertEquals(rightMostRange, subtractNormalizedRanges(rightMostRange, ranges)); + assertEquals(emptyList(), subtractNormalizedRanges(maxLongRange, ranges)); + ranges = ImmutableList.of(fromString("(" + (Long.MAX_VALUE - 1) + ",-9223372036854775808]")); + assertEquals(emptyList(), subtractNormalizedRanges(ranges, ranges)); + assertEquals(emptyList(), subtractNormalizedRanges(rightMostRange, ranges)); + assertEquals(emptyList(), subtractNormalizedRanges(maxLongRange, ranges)); + } + + @Test + public void testExpensiveChecksBurn() + { + long seed = System.nanoTime(); + System.out.println(seed); + Random r = new java.util.Random(seed); + + Stopwatch elapsed = Stopwatch.createStarted(); + while (elapsed.elapsed(SECONDS) != 10) + { + int numRanges = 3; + List> a = new ArrayList(); + for (int ii = 0; ii < numRanges; ii++) + { + a.add(new Range<>(new LongToken(r.nextLong()), new LongToken(r.nextLong()))); + } + a = ImmutableList.copyOf(normalize(a)); + List> b = new ArrayList(); + for (int ii = 0; ii < numRanges; ii++) + { + b.add(new Range<>(new LongToken(r.nextLong()), new LongToken(r.nextLong()))); + } + b = ImmutableList.copyOf(normalize(b)); + + for (int ii = 0; ii < 1000; ii++) + { + Token t = new LongToken(r.nextLong()); + isInNormalizedRanges(t, a); + isInNormalizedRanges(t, b); + } + + intersectionOfNormalizedRanges(a, b); + intersectionOfNormalizedRanges(b, a); + subtractNormalizedRanges(a, b); + subtractNormalizedRanges(b, a); + invertNormalizedRanges(a); + invertNormalizedRanges(b); + } + } } diff --git a/test/unit/org/apache/cassandra/dht/SplitterTest.java b/test/unit/org/apache/cassandra/dht/SplitterTest.java index 1de22ff8fc69..707d294a8087 100644 --- a/test/unit/org/apache/cassandra/dht/SplitterTest.java +++ b/test/unit/org/apache/cassandra/dht/SplitterTest.java @@ -46,25 +46,25 @@ public class SplitterTest @Test public void randomSplitTestNoVNodesRandomPartitioner() { - randomSplitTestNoVNodes(new RandomPartitioner()); + randomSplitTestNoVNodes(RandomPartitioner.instance); } @Test public void randomSplitTestNoVNodesMurmur3Partitioner() { - randomSplitTestNoVNodes(new Murmur3Partitioner()); + randomSplitTestNoVNodes(Murmur3Partitioner.instance); } @Test public void randomSplitTestVNodesRandomPartitioner() { - randomSplitTestVNodes(new RandomPartitioner()); + randomSplitTestVNodes(RandomPartitioner.instance); } @Test public void randomSplitTestVNodesMurmur3Partitioner() { - randomSplitTestVNodes(new Murmur3Partitioner()); + randomSplitTestVNodes(Murmur3Partitioner.instance); } // CASSANDRA-18013 @@ -235,13 +235,13 @@ private static List generateLocalRanges(int numTokens, i @Test public void testSplitMurmur3Partitioner() { - testSplit(new Murmur3Partitioner()); + testSplit(Murmur3Partitioner.instance); } @Test public void testSplitRandomPartitioner() { - testSplit(new RandomPartitioner()); + testSplit(RandomPartitioner.instance); } @SuppressWarnings("unchecked") @@ -359,13 +359,13 @@ private static Token token(IPartitioner partitioner, Object n) @Test public void testTokensInRangeRandomPartitioner() { - testTokensInRange(new RandomPartitioner()); + testTokensInRange(RandomPartitioner.instance); } @Test public void testTokensInRangeMurmur3Partitioner() { - testTokensInRange(new Murmur3Partitioner()); + testTokensInRange(Murmur3Partitioner.instance); } private static void testTokensInRange(IPartitioner partitioner) @@ -391,13 +391,13 @@ private static void testTokensInRange(IPartitioner partitioner) @Test public void testElapsedTokensRandomPartitioner() { - testElapsedMultiRange(new RandomPartitioner()); + testElapsedMultiRange(RandomPartitioner.instance); } @Test public void testElapsedTokensMurmur3Partitioner() { - testElapsedMultiRange(new Murmur3Partitioner()); + testElapsedMultiRange(Murmur3Partitioner.instance); } private static void testElapsedMultiRange(IPartitioner partitioner) @@ -457,13 +457,13 @@ private static void testElapsedTokens(IPartitioner partitioner, Range ran @Test public void testPositionInRangeRandomPartitioner() { - testPositionInRangeMultiRange(new RandomPartitioner()); + testPositionInRangeMultiRange(RandomPartitioner.instance); } @Test public void testPositionInRangeMurmur3Partitioner() { - testPositionInRangeMultiRange(new Murmur3Partitioner()); + testPositionInRangeMultiRange(Murmur3Partitioner.instance); } private static void testPositionInRangeMultiRange(IPartitioner partitioner) diff --git a/test/unit/org/apache/cassandra/dht/StreamStateStoreTest.java b/test/unit/org/apache/cassandra/dht/StreamStateStoreTest.java index d731385fd318..816366e26fd8 100644 --- a/test/unit/org/apache/cassandra/dht/StreamStateStoreTest.java +++ b/test/unit/org/apache/cassandra/dht/StreamStateStoreTest.java @@ -19,18 +19,18 @@ import java.util.Collections; -import org.apache.cassandra.db.commitlog.CommitLog; -import org.apache.cassandra.locator.RangesAtEndpoint; import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.streaming.async.NettyStreamingConnectionFactory; +import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.streaming.StreamEvent; import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.streaming.StreamSession; +import org.apache.cassandra.streaming.async.NettyStreamingConnectionFactory; import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.net.MessagingService.current_version; @@ -51,7 +51,7 @@ public static void initDD() public void testUpdateAndQueryAvailableRanges() { // let range (0, 100] of keyspace1 be bootstrapped. - IPartitioner p = new Murmur3Partitioner(); + IPartitioner p = Murmur3Partitioner.instance; Token.TokenFactory factory = p.getTokenFactory(); Range range = new Range<>(factory.fromString("0"), factory.fromString("100")); diff --git a/test/unit/org/apache/cassandra/dht/tokenallocator/TokenAllocationTest.java b/test/unit/org/apache/cassandra/dht/tokenallocator/TokenAllocationTest.java index 6b4ef404656d..7099565e3eb0 100644 --- a/test/unit/org/apache/cassandra/dht/tokenallocator/TokenAllocationTest.java +++ b/test/unit/org/apache/cassandra/dht/tokenallocator/TokenAllocationTest.java @@ -28,16 +28,15 @@ import java.util.Set; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; -import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; @@ -56,18 +55,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; -public class TokenAllocationTest +@DDDaemonInitialization +@UseMurmur3Partitioner +public class TokenAllocationTest extends CassandraTestBase { - static IPartitioner oldPartitioner; static Random rand = new Random(1); - @BeforeClass - public static void beforeClass() throws ConfigurationException - { - DatabaseDescriptor.daemonInitialization(); - oldPartitioner = StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); - } - @Before public void before() throws ConfigurationException { @@ -81,12 +74,6 @@ public void after() throws ConfigurationException ClusterMetadataService.unsetInstance(); } - @AfterClass - public static void afterClass() - { - DatabaseDescriptor.setPartitionerUnsafe(oldPartitioner); - } - private static TokenAllocation createForTest(ClusterMetadata metadata, int replicas, int numTokens) { return TokenAllocation.create(metadata.locator.local().datacenter, metadata, replicas, numTokens); diff --git a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java index b7b67ba9bde7..c61a1b50a2b2 100644 --- a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java @@ -49,7 +49,6 @@ import org.junit.rules.TemporaryFolder; import com.datastax.driver.core.utils.UUIDs; -import org.apache.cassandra.Util; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.functions.types.DataType; @@ -59,7 +58,6 @@ import org.apache.cassandra.cql3.functions.types.UserType; import org.apache.cassandra.db.marshal.FloatType; import org.apache.cassandra.db.marshal.UTF8Type; -import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -118,57 +116,54 @@ public void perTestSetup() throws IOException @Test public void testUnsortedWriter() throws Exception { - try (AutoCloseable ignored = Util.switchPartitioner(ByteOrderedPartitioner.instance)) - { - String schema = "CREATE TABLE " + qualifiedTable + " (" - + " k int PRIMARY KEY," - + " v1 text," - + " v2 int" - + ")"; - String insert = "INSERT INTO " + qualifiedTable + " (k, v1, v2) VALUES (?, ?, ?)"; - CQLSSTableWriter writer = CQLSSTableWriter.builder() - .inDirectory(dataDir) - .forTable(schema) - .using(insert).build(); + String schema = "CREATE TABLE " + qualifiedTable + " (" + + " k int PRIMARY KEY," + + " v1 text," + + " v2 int" + + ")"; + String insert = "INSERT INTO " + qualifiedTable + " (k, v1, v2) VALUES (?, ?, ?)"; + CQLSSTableWriter writer = CQLSSTableWriter.builder() + .inDirectory(dataDir) + .forTable(schema) + .using(insert).build(); - writer.addRow(0, "test1", 24); - writer.addRow(1, "test2", 44); - writer.addRow(2, "test3", 42); - writer.addRow(ImmutableMap.of("k", 3, "v2", 12)); + writer.addRow(0, "test1", 24); + writer.addRow(1, "test2", 44); + writer.addRow(2, "test3", 42); + writer.addRow(ImmutableMap.of("k", 3, "v2", 12)); - writer.close(); + writer.close(); - loadSSTables(dataDir, keyspace, table); + loadSSTables(dataDir, keyspace, table); - if (verifyDataAfterLoading) - { - UntypedResultSet rs = QueryProcessor.executeInternal("SELECT * FROM " + qualifiedTable); - assertEquals(4, rs.size()); - - Iterator iter = rs.iterator(); - UntypedResultSet.Row row; - - row = iter.next(); - assertEquals(0, row.getInt("k")); - assertEquals("test1", row.getString("v1")); - assertEquals(24, row.getInt("v2")); - - row = iter.next(); - assertEquals(1, row.getInt("k")); - assertEquals("test2", row.getString("v1")); - //assertFalse(row.has("v2")); - assertEquals(44, row.getInt("v2")); - - row = iter.next(); - assertEquals(2, row.getInt("k")); - assertEquals("test3", row.getString("v1")); - assertEquals(42, row.getInt("v2")); - - row = iter.next(); - assertEquals(3, row.getInt("k")); - assertEquals(null, row.getBytes("v1")); // Using getBytes because we know it won't NPE - assertEquals(12, row.getInt("v2")); - } + if (verifyDataAfterLoading) + { + UntypedResultSet rs = QueryProcessor.executeInternal("SELECT * FROM " + qualifiedTable); + assertEquals(4, rs.size()); + + Iterator iter = rs.iterator(); + UntypedResultSet.Row row; + + row = iter.next(); + assertEquals(0, row.getInt("k")); + assertEquals("test1", row.getString("v1")); + assertEquals(24, row.getInt("v2")); + + row = iter.next(); + assertEquals(1, row.getInt("k")); + assertEquals("test2", row.getString("v1")); + //assertFalse(row.has("v2")); + assertEquals(44, row.getInt("v2")); + + row = iter.next(); + assertEquals(2, row.getInt("k")); + assertEquals("test3", row.getString("v1")); + assertEquals(42, row.getInt("v2")); + + row = iter.next(); + assertEquals(3, row.getInt("k")); + assertEquals(null, row.getBytes("v1")); // Using getBytes because we know it won't NPE + assertEquals(12, row.getInt("v2")); } } diff --git a/test/unit/org/apache/cassandra/io/sstable/ScrubTest.java b/test/unit/org/apache/cassandra/io/sstable/ScrubTest.java index e7952549035a..323cd01fbb6b 100644 --- a/test/unit/org/apache/cassandra/io/sstable/ScrubTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/ScrubTest.java @@ -478,7 +478,7 @@ public void testScrubOutOfOrder() // This test assumes ByteOrderPartitioner to create out-of-order SSTable IPartitioner oldPartitioner = DatabaseDescriptor.getPartitioner(); - DatabaseDescriptor.setPartitionerUnsafe(new ByteOrderedPartitioner()); + DatabaseDescriptor.setPartitionerUnsafe(ByteOrderedPartitioner.instance); // Create out-of-order SSTable File tempDir = FileUtils.createTempFile("ScrubTest.testScrubOutOfOrder", "").parent(); diff --git a/test/unit/org/apache/cassandra/io/sstable/format/bti/PartitionIndexTest.java b/test/unit/org/apache/cassandra/io/sstable/format/bti/PartitionIndexTest.java index 6ceee331f6e8..36e9f8fb068c 100644 --- a/test/unit/org/apache/cassandra/io/sstable/format/bti/PartitionIndexTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/format/bti/PartitionIndexTest.java @@ -291,7 +291,7 @@ private long eq(List keys, DecoratedKey key) @Test public void testAddEmptyKey() throws Exception { - IPartitioner p = new RandomPartitioner(); + IPartitioner p = RandomPartitioner.instance; File file = FileUtils.createTempFile("ColumnTrieReaderTest", ""); FileHandle.Builder fhBuilder = makeHandle(file); diff --git a/test/unit/org/apache/cassandra/io/sstable/indexsummary/IndexSummaryTest.java b/test/unit/org/apache/cassandra/io/sstable/indexsummary/IndexSummaryTest.java index aea166aa3ea1..079437c6ff15 100644 --- a/test/unit/org/apache/cassandra/io/sstable/indexsummary/IndexSummaryTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/indexsummary/IndexSummaryTest.java @@ -248,7 +248,7 @@ public void testSerialization() throws IOException @Test public void testAddEmptyKey() throws Exception { - IPartitioner p = new RandomPartitioner(); + IPartitioner p = RandomPartitioner.instance; try (IndexSummaryBuilder builder = new IndexSummaryBuilder(1, 1, BASE_SAMPLING_LEVEL)) { builder.maybeAddEntry(p.decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER), 0); diff --git a/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java b/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java index 9e9ff605df99..20da42dd120b 100644 --- a/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java +++ b/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java @@ -36,9 +36,10 @@ import org.junit.Test; import org.junit.runner.RunWith; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; +import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; @@ -49,6 +50,7 @@ import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.schema.Tables; import org.apache.cassandra.service.reads.NeverSpeculativeRetryPolicy; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.utils.FBUtilities; import org.jboss.byteman.contrib.bmunit.BMRule; import org.jboss.byteman.contrib.bmunit.BMUnitRunner; @@ -69,7 +71,9 @@ targetClass = "FailureDetector", targetMethod = "isAlive", action = "return true;") -public class AssureSufficientLiveNodesTest +@PrepareServerNoRegister +@UseMurmur3Partitioner +public class AssureSufficientLiveNodesTest extends CassandraTestBase { private static final AtomicInteger testIdGen = new AtomicInteger(0); private static final Supplier keyspaceNameGen = () -> "race_" + testIdGen.getAndIncrement(); @@ -82,9 +86,6 @@ public class AssureSufficientLiveNodesTest @BeforeClass public static void setUpClass() throws Throwable { - ServerTestUtils.daemonInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - ServerTestUtils.prepareServerNoRegister(); // Register peers with expected DC for NetworkTopologyStrategy. List instances = ImmutableList.of( // datacenter 1 @@ -139,7 +140,7 @@ public void addDatacenterShouldNotCausesUnavailableWithEachQuorumTest() throws T // alter to KeyspaceParams.nts(DC1, 3, DC2, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE) + keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -172,7 +173,7 @@ public void addDatacenterShouldNotCausesUnavailableWithQuorumTest() throws Throw // alter to KeyspaceParams.nts(DC1, 3, DC2, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE) + keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); raceOfReplicationStrategyTest( // init. The # of live endpoints is 3 = 2 + 1 @@ -180,7 +181,7 @@ public void addDatacenterShouldNotCausesUnavailableWithQuorumTest() throws Throw // alter to. (3 + 3) / 2 + 1 > 3 KeyspaceParams.nts(DC1, 2, DC2, 1, DC3, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE) + keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -204,7 +205,7 @@ public void raceOnRemoveDatacenterNotCausesUnavailable() throws Throwable // alter to KeyspaceParams.nts(DC1, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE) + keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -228,7 +229,7 @@ public void increaseReplicationFactorShouldNotCausesUnavailableTest() throws Thr // alter to KeyspaceParams.nts(DC1, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, LOCAL_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE) + keyspace -> ReplicaPlans.forRead(keyspace, tk, null, LOCAL_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } diff --git a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java index 8fc3498bfb51..babb7d659be2 100644 --- a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java @@ -32,6 +32,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.Directory; @@ -91,6 +92,7 @@ public static ClusterMetadata metadata(NodeConfiguration... configurations) AccordKeyspaces.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, + ConsensusMigrationState.EMPTY, ImmutableMap.of()); } diff --git a/test/unit/org/apache/cassandra/locator/NetworkTopologyStrategyTest.java b/test/unit/org/apache/cassandra/locator/NetworkTopologyStrategyTest.java index fc235526fcef..e59c847f7746 100644 --- a/test/unit/org/apache/cassandra/locator/NetworkTopologyStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/NetworkTopologyStrategyTest.java @@ -20,29 +20,42 @@ import java.io.IOException; import java.net.UnknownHostException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; - -import org.junit.*; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; import org.junit.rules.ExpectedException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DisableMBeanRegistration; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.*; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.RegistrationStatus; @@ -59,20 +72,13 @@ import static org.apache.cassandra.locator.SimpleLocationProvider.LOCATION; import static org.junit.Assert.assertTrue; -public class NetworkTopologyStrategyTest +@PrepareServerNoRegister +@DisableMBeanRegistration +public class NetworkTopologyStrategyTest extends CassandraTestBase { private static final String KEYSPACE = "ks1"; private static final Logger logger = LoggerFactory.getLogger(NetworkTopologyStrategyTest.class); - @BeforeClass - public static void setupDD() - { - DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(OrderPreservingPartitioner.instance); - DatabaseDescriptor.setTransientReplicationEnabledUnsafe(true); - ClusterMetadataService.setInstance(ClusterMetadataTestHelper.instanceForTest()); - } - @After public void teardown() { @@ -80,6 +86,7 @@ public void teardown() } @Test + @UseOrderPreservingPartitioner public void testProperties() throws IOException, ConfigurationException { createDummyTokens(true); @@ -104,6 +111,7 @@ public void testProperties() throws IOException, ConfigurationException } @Test + @UseOrderPreservingPartitioner public void testPropertiesWithEmptyDC() throws IOException, ConfigurationException { createDummyTokens(false); @@ -126,6 +134,7 @@ public void testPropertiesWithEmptyDC() throws IOException, ConfigurationExcepti } @Test + @UseOrderPreservingPartitioner public void testLargeCluster() throws UnknownHostException, ConfigurationException { int[] dcRacks = new int[]{2, 4, 8}; @@ -201,47 +210,45 @@ public void tokenFactory(String token, byte[] bytes, Location location) throws U } @Test + @UseMurmur3Partitioner public void testCalculateEndpoints() throws UnknownHostException { final int NODES = 100; final int VNODES = 64; final int RUNS = 10; - try (WithPartitioner m3p = new WithPartitioner(Murmur3Partitioner.instance)) + Map datacenters = ImmutableMap.of("rf1", 1, "rf3", 3, "rf5_1", 5, "rf5_2", 5, "rf5_3", 5); + List nodes = new ArrayList<>(NODES); + for (byte i = 0; i < NODES; ++i) + nodes.add(InetAddressAndPort.getByAddress(new byte[]{ 127, 0, 0, i })); + for (int run = 0; run < RUNS; ++run) { - Map datacenters = ImmutableMap.of("rf1", 1, "rf3", 3, "rf5_1", 5, "rf5_2", 5, "rf5_3", 5); - List nodes = new ArrayList<>(NODES); - for (byte i = 0; i < NODES; ++i) - nodes.add(InetAddressAndPort.getByAddress(new byte[]{ 127, 0, 0, i })); - for (int run = 0; run < RUNS; ++run) - { - ServerTestUtils.resetCMS(); - Random rand = new Random(run); - Locator locator = generateLocator(datacenters, nodes, rand); + ServerTestUtils.resetCMS(); + Random rand = new Random(run); + Locator locator = generateLocator(datacenters, nodes, rand); - for (int i = 0; i < NODES; ++i) // Nodes + for (int i = 0; i < NODES; ++i) // Nodes + { + Set tokens = new HashSet<>(); + while (tokens.size() < VNODES) // tokens/vnodes per node { - Set tokens = new HashSet<>(); - while (tokens.size() < VNODES) // tokens/vnodes per node - { - tokens.add(Murmur3Partitioner.instance.getRandomToken(rand)); - } - // Here we fake the registration status because we want all the nodes to be registered in cluster - // metadata using the locations we setup in generateLocator. This registration occurs as a part of - // the addEndpoint call here and behaves as expected for all nodes _except_ the one with the address - // which matches the local broadcast address (i.e. 127.0.0.1, which is #2 in the list of nodes). - // The location we want this to be registered with is {DC: rf5_1, rack: 3}, but while - // RegistrationStatus.instance indicates that the node is yet to be registered, the Locator will - // correctly return the initialization location obtained from - // DatabaseDescriptor::getInitialLocationProvider, which ultimately resolves to - // SimpleLocationProvider (because test/conf/cassandra.yaml specifies use of SimpleSnitch) and so - // we register that one node with the location {DC: datacenter1, rack: rack1}. - // This is purely an artefact of the contrived testing setup and in more realistic scenarios, - // including the majority of tests, isn't an issue. - RegistrationStatus.instance.onRegistration(); - ClusterMetadataTestHelper.addEndpoint(nodes.get(i), tokens, locator.location(nodes.get(i))); + tokens.add(Murmur3Partitioner.instance.getRandomToken(rand)); } - testEquivalence(ClusterMetadata.current(), locator, datacenters, rand); + // Here we fake the registration status because we want all the nodes to be registered in cluster + // metadata using the locations we setup in generateLocator. This registration occurs as a part of + // the addEndpoint call here and behaves as expected for all nodes _except_ the one with the address + // which matches the local broadcast address (i.e. 127.0.0.1, which is #2 in the list of nodes). + // The location we want this to be registered with is {DC: rf5_1, rack: 3}, but while + // RegistrationStatus.instance indicates that the node is yet to be registered, the Locator will + // correctly return the initialization location obtained from + // DatabaseDescriptor::getInitialLocationProvider, which ultimately resolves to + // SimpleLocationProvider (because test/conf/cassandra.yaml specifies use of SimpleSnitch) and so + // we register that one node with the location {DC: datacenter1, rack: rack1}. + // This is purely an artefact of the contrived testing setup and in more realistic scenarios, + // including the majority of tests, isn't an issue. + RegistrationStatus.instance.onRegistration(); + ClusterMetadataTestHelper.addEndpoint(nodes.get(i), tokens, locator.location(nodes.get(i))); } + testEquivalence(ClusterMetadata.current(), locator, datacenters, rand); } } @@ -438,35 +445,32 @@ private static Range range(long l, long r) } @Test + @UseMurmur3Partitioner public void testTransientReplica() throws Exception { - try (WithPartitioner m3p = new WithPartitioner(Murmur3Partitioner.instance)) - { - List endpoints = Lists.newArrayList(InetAddressAndPort.getByName("127.0.0.1"), - InetAddressAndPort.getByName("127.0.0.2"), - InetAddressAndPort.getByName("127.0.0.3"), - InetAddressAndPort.getByName("127.0.0.4")); - - ClusterMetadataTestHelper.addEndpoint(endpoints.get(0), tk(100), LOCATION); - ClusterMetadataTestHelper.addEndpoint(endpoints.get(1), tk(200), LOCATION); - ClusterMetadataTestHelper.addEndpoint(endpoints.get(2), tk(300), LOCATION); - ClusterMetadataTestHelper.addEndpoint(endpoints.get(3), tk(400), LOCATION); - - Map configOptions = new HashMap<>(); - configOptions.put(LOCATION.datacenter, "3/1"); - NetworkTopologyStrategy strategy = new NetworkTopologyStrategy(KEYSPACE, configOptions); - - Util.assertRCEquals(EndpointsForRange.of(fullReplica(endpoints.get(0), range(400, 100)), - fullReplica(endpoints.get(1), range(400, 100)), - transientReplica(endpoints.get(2), range(400, 100))), - strategy.calculateNaturalReplicas(tk(99), ClusterMetadata.current())); - - - Util.assertRCEquals(EndpointsForRange.of(fullReplica(endpoints.get(1), range(100, 200)), - fullReplica(endpoints.get(2), range(100, 200)), - transientReplica(endpoints.get(3), range(100, 200))), - strategy.calculateNaturalReplicas(tk(101), ClusterMetadata.current())); - } + List endpoints = Lists.newArrayList(InetAddressAndPort.getByName("127.0.0.1"), + InetAddressAndPort.getByName("127.0.0.2"), + InetAddressAndPort.getByName("127.0.0.3"), + InetAddressAndPort.getByName("127.0.0.4")); + + ClusterMetadataTestHelper.addEndpoint(endpoints.get(0), tk(100), LOCATION); + ClusterMetadataTestHelper.addEndpoint(endpoints.get(1), tk(200), LOCATION); + ClusterMetadataTestHelper.addEndpoint(endpoints.get(2), tk(300), LOCATION); + ClusterMetadataTestHelper.addEndpoint(endpoints.get(3), tk(400), LOCATION); + + Map configOptions = new HashMap<>(); + configOptions.put(LOCATION.datacenter, "3/1"); + NetworkTopologyStrategy strategy = new NetworkTopologyStrategy(KEYSPACE, configOptions); + Util.assertRCEquals(EndpointsForRange.of(fullReplica(endpoints.get(0), range(400, 100)), + fullReplica(endpoints.get(1), range(400, 100)), + transientReplica(endpoints.get(2), range(400, 100))), + strategy.calculateNaturalReplicas(tk(99), ClusterMetadata.current())); + + + Util.assertRCEquals(EndpointsForRange.of(fullReplica(endpoints.get(1), range(100, 200)), + fullReplica(endpoints.get(2), range(100, 200)), + transientReplica(endpoints.get(3), range(100, 200))), + strategy.calculateNaturalReplicas(tk(101), ClusterMetadata.current())); } @Rule @@ -486,6 +490,7 @@ public void shouldRejectReplicationFactorOption() throws ConfigurationException } @Test + @UseOrderPreservingPartitioner public void shouldWarnOnHigherReplicationFactorThanNodesInDC() { HashMap configOptions = new HashMap<>(); diff --git a/test/unit/org/apache/cassandra/locator/PropertyFileSnitchTest.java b/test/unit/org/apache/cassandra/locator/PropertyFileSnitchTest.java index 35bdb98d8d0d..d2bbd3a0805d 100644 --- a/test/unit/org/apache/cassandra/locator/PropertyFileSnitchTest.java +++ b/test/unit/org/apache/cassandra/locator/PropertyFileSnitchTest.java @@ -31,10 +31,11 @@ import org.junit.After; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseRandomPartitioner; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.StubClusterMetadataService; @@ -48,18 +49,14 @@ /** * Unit tests for {@link PropertyFileSnitch}. */ -public class PropertyFileSnitchTest +@DDDaemonInitialization +@UseRandomPartitioner +public class PropertyFileSnitchTest extends CassandraTestBase { private Path effectiveFile; private Path backupFile; private InetAddressAndPort localAddress; - @BeforeClass - public static void setupDD() - { - DatabaseDescriptor.daemonInitialization(); - } - @Before public void setup() throws ConfigurationException, IOException { diff --git a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java index 874e50d17a56..625e2e65ef85 100644 --- a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java @@ -28,38 +28,47 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; -import org.junit.*; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; import org.junit.rules.ExpectedException; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DisableMBeanRegistration; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.dht.*; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.OrderPreservingPartitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; import org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.schema.ReplicationParams; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.utils.ByteBufferUtil; -import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; import static org.apache.cassandra.ServerTestUtils.recreateCMS; +import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -public class SimpleStrategyTest +@PrepareServerNoRegister +@DisableMBeanRegistration +public class SimpleStrategyTest extends CassandraTestBase { public static final String KEYSPACE1 = "SimpleStrategyTest"; public static final String MULTIDC = "MultiDCSimpleStrategyTest"; @@ -69,16 +78,9 @@ public class SimpleStrategyTest ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(true); } - @BeforeClass - public static void defineSchema() - { - DatabaseDescriptor.daemonInitialization(); - } - - public static void withPartitioner(IPartitioner partitioner) + @Before + public void defineSchema() { - DatabaseDescriptor.setPartitionerUnsafe(partitioner); - ServerTestUtils.prepareServerNoRegister(); recreateCMS(); SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple(1)); SchemaLoader.createKeyspace(MULTIDC, KeyspaceParams.simple(3)); @@ -92,9 +94,10 @@ public void tryValidKeyspace() } @Test + @UseRandomPartitioner public void testBigIntegerEndpoints() throws UnknownHostException { - withPartitioner(RandomPartitioner.instance); + defineSchema(); List endpointTokens = new ArrayList<>(); List keyTokens = new ArrayList<>(); for (int i = 0; i < 5; i++) { @@ -105,24 +108,23 @@ public void testBigIntegerEndpoints() throws UnknownHostException } @Test + @UseOrderPreservingPartitioner public void testStringEndpoints() throws UnknownHostException { - IPartitioner partitioner = OrderPreservingPartitioner.instance; - withPartitioner(partitioner); + defineSchema(); List endpointTokens = new ArrayList(); List keyTokens = new ArrayList(); for (int i = 0; i < 5; i++) { endpointTokens.add(new StringToken(String.valueOf((char)('a' + i * 2)))); - keyTokens.add(partitioner.getToken(ByteBufferUtil.bytes(String.valueOf((char) ('a' + i * 2 + 1))))); + keyTokens.add(OrderPreservingPartitioner.instance.getToken(ByteBufferUtil.bytes(String.valueOf((char) ('a' + i * 2 + 1))))); } verifyGetNaturalEndpoints(endpointTokens.toArray(new Token[0]), keyTokens.toArray(new Token[0])); } @Test + @UseMurmur3Partitioner public void testMultiDCSimpleStrategyEndpoints() throws UnknownHostException { - withPartitioner(Murmur3Partitioner.instance); - // Topology taken directly from the topology_test.test_size_estimates_multidc dtest that regressed Multimap dc1 = HashMultimap.create(); dc1.put(InetAddressAndPort.getByName("127.0.0.1"), new Murmur3Partitioner.LongToken(-6639341390736545756L)); @@ -186,9 +188,10 @@ private void verifyGetNaturalEndpoints(Token[] endpointTokens, Token[] keyTokens } @Test + @UseRandomPartitioner public void testGetEndpointsDuringBootstrap() throws UnknownHostException, ExecutionException, InterruptedException { - withPartitioner(RandomPartitioner.instance); + defineSchema(); // the token difference will be RING_SIZE * 2. final int RING_SIZE = 10; @@ -264,10 +267,9 @@ private static Range range(long l, long r) } @Test + @UseMurmur3Partitioner public void transientReplica() throws Exception { - withPartitioner(Murmur3Partitioner.instance); - List endpoints = Lists.newArrayList(InetAddressAndPort.getByName("127.0.0.1"), InetAddressAndPort.getByName("127.0.0.2"), InetAddressAndPort.getByName("127.0.0.3"), @@ -305,9 +307,10 @@ public void transientReplica() throws Exception public ExpectedException expectedEx = ExpectedException.none(); @Test + @UseMurmur3Partitioner public void testSimpleStrategyThrowsConfigurationException() throws ConfigurationException, UnknownHostException { - withPartitioner(Murmur3Partitioner.instance); + defineSchema(); expectedEx.expect(ConfigurationException.class); expectedEx.expectMessage("SimpleStrategy requires a replication_factor strategy option."); @@ -327,9 +330,10 @@ public void testSimpleStrategyThrowsConfigurationException() throws Configuratio } @Test + @UseMurmur3Partitioner public void shouldReturnNoEndpointsForEmptyRing() { - withPartitioner(Murmur3Partitioner.instance); + defineSchema(); HashMap configOptions = new HashMap<>(); configOptions.put("replication_factor", "1"); @@ -341,9 +345,10 @@ public void shouldReturnNoEndpointsForEmptyRing() } @Test + @UseMurmur3Partitioner public void shouldWarnOnHigherReplicationFactorThanNodes() { - withPartitioner(Murmur3Partitioner.instance); + defineSchema(); HashMap configOptions = new HashMap<>(); configOptions.put("replication_factor", "2"); diff --git a/test/unit/org/apache/cassandra/net/MessageTest.java b/test/unit/org/apache/cassandra/net/MessageTest.java index ddc5f6b9c6b2..15d062e1a70c 100644 --- a/test/unit/org/apache/cassandra/net/MessageTest.java +++ b/test/unit/org/apache/cassandra/net/MessageTest.java @@ -50,7 +50,7 @@ import static org.apache.cassandra.exceptions.RemoteExceptionTest.normalizeThrowable; import static org.apache.cassandra.net.Message.serializer; import static org.apache.cassandra.net.MessagingService.VERSION_40; -import static org.apache.cassandra.net.MessagingService.VERSION_50; +import static org.apache.cassandra.net.MessagingService.VERSION_51; import static org.apache.cassandra.net.NoPayload.noPayload; import static org.apache.cassandra.net.ParamType.RESPOND_TO; import static org.apache.cassandra.net.ParamType.TRACE_SESSION; @@ -342,7 +342,7 @@ else if (msg1.verb() == Verb.FAILURE_RSP) RequestFailure reason1 = (RequestFailure)msg1.payload; RequestFailure reason2 = (RequestFailure)msg2.payload; assertEquals(reason1.reason, reason2.reason); - if (version >= VERSION_50) + if (version >= VERSION_51) { if (reason1.failure == null) assertNull(reason2.failure); diff --git a/test/unit/org/apache/cassandra/repair/RepairJobTest.java b/test/unit/org/apache/cassandra/repair/RepairJobTest.java index ea32bd750b88..2a589c028a8f 100644 --- a/test/unit/org/apache/cassandra/repair/RepairJobTest.java +++ b/test/unit/org/apache/cassandra/repair/RepairJobTest.java @@ -37,12 +37,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.ListenableFuture; - -import org.apache.cassandra.repair.messages.SyncResponse; -import org.apache.cassandra.repair.messages.ValidationResponse; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; -import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; @@ -50,6 +44,7 @@ import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.ByteOrderedPartitioner; @@ -64,11 +59,14 @@ import org.apache.cassandra.net.Verb; import org.apache.cassandra.repair.messages.RepairMessage; import org.apache.cassandra.repair.messages.SyncRequest; +import org.apache.cassandra.repair.messages.SyncResponse; +import org.apache.cassandra.repair.messages.ValidationResponse; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupRequest; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupResponse; +import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; @@ -78,8 +76,11 @@ import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.asserts.SyncTaskListAssert; +import org.assertj.core.api.Assertions; import static java.util.Collections.emptySet; +import static org.apache.cassandra.net.Verb.PAXOS2_CLEANUP_REQ; +import static org.apache.cassandra.net.Verb.PAXOS2_CLEANUP_START_PREPARE_REQ; import static org.apache.cassandra.repair.RepairParallelism.SEQUENTIAL; import static org.apache.cassandra.streaming.PreviewKind.NONE; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; @@ -87,8 +88,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.apache.cassandra.net.Verb.PAXOS2_CLEANUP_START_PREPARE_REQ; -import static org.apache.cassandra.net.Verb.PAXOS2_CLEANUP_REQ; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -114,7 +113,7 @@ public class RepairJobTest private static InetAddressAndPort addr4; private static InetAddressAndPort addr5; private MeasureableRepairSession session; - private RepairJob job; + private CassandraRepairJob job; private RepairJobDesc sessionJobDesc; // So that threads actually get recycled and we can have accurate memory accounting while testing @@ -123,14 +122,14 @@ private static class MeasureableRepairSession extends RepairSession { private final List> syncCompleteCallbacks = new ArrayList<>(); - public MeasureableRepairSession(TimeUUID parentRepairSession, CommonRange commonRange, String keyspace, + public MeasureableRepairSession(TimeUUID parentRepairSession, CommonRange commonRange, boolean excludedDeadNodes, String keyspace, RepairParallelism parallelismDegree, boolean isIncremental, boolean pullRepair, PreviewKind previewKind, boolean optimiseStreams, boolean repairPaxos, boolean paxosOnly, - boolean dontPurgeTombstones, String... cfnames) + boolean dontPurgeTombstones, boolean accordRepair, String... cfnames) { super(SharedContext.Global.instance, new Scheduler.NoopScheduler(), - parentRepairSession, commonRange, keyspace, parallelismDegree, isIncremental, pullRepair, - previewKind, optimiseStreams, repairPaxos, paxosOnly, dontPurgeTombstones, cfnames); + parentRepairSession, commonRange, excludedDeadNodes, keyspace, parallelismDegree, isIncremental, pullRepair, + previewKind, optimiseStreams, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair, cfnames); } @Override @@ -194,11 +193,11 @@ public void setup() ActiveRepairService.UNREPAIRED_SSTABLE, false, PreviewKind.NONE); this.session = new MeasureableRepairSession(parentRepairSession, - new CommonRange(neighbors, emptySet(), FULL_RANGE), + new CommonRange(neighbors, emptySet(), FULL_RANGE), false, KEYSPACE, SEQUENTIAL, false, false, - NONE, false, true, false, false, CF); + NONE, false, true, false, false, false, CF); - this.job = new RepairJob(session, CF); + this.job = new CassandraRepairJob(session, CF); this.sessionJobDesc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, CF, session.ranges()); @@ -268,7 +267,7 @@ public void testNoTreesRetainedAfterDifference() throws Throwable // Use addr4 instead of one of the provided trees to force everything to be remote sync tasks as // LocalSyncTasks try to reach over the network. - List syncTasks = RepairJob.createStandardSyncTasks(SharedContext.Global.instance, sessionJobDesc, mockTreeResponses, + List syncTasks = CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, sessionJobDesc, mockTreeResponses, addr4, // local noTransient(), session.isIncremental, @@ -330,7 +329,7 @@ public void testValidationFailure() throws InterruptedException, TimeoutExceptio interceptRepairMessages(mockTrees, new ArrayList<>()); - try + try { job.run(); job.get(TEST_TIMEOUT_S, TimeUnit.SECONDS); @@ -368,7 +367,7 @@ public static void testCreateStandardSyncTasks(boolean pullRepair) treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different"), treeResponse(addr3, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local noTransient(), // transient @@ -404,7 +403,7 @@ public void testStandardSyncTransient(boolean pullRepair) List treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local transientPredicate(addr2), @@ -434,7 +433,7 @@ public void testStandardSyncLocalTransient(boolean pullRepair) List treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local transientPredicate(addr1), @@ -494,7 +493,7 @@ public void testEmptyDifference(InetAddressAndPort local, Predicate treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, local, // local isTransient, @@ -512,13 +511,13 @@ public void testCreateStandardSyncTasksAllDifferent() treeResponse(addr2, RANGE_1, "two", RANGE_2, "two", RANGE_3, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "three", RANGE_3, "three")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local ep -> ep.equals(addr3), // transient - false, - true, - PreviewKind.ALL)); + false, + true, + PreviewKind.ALL)); assertThat(tasks).hasSize(3); @@ -543,7 +542,7 @@ public void testCreate5NodeStandardSyncTasksWithTransient() treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); Predicate isTransient = ep -> ep.equals(addr4) || ep.equals(addr5); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local isTransient, // transient @@ -610,7 +609,7 @@ public static void testLocalSyncWithTransient(InetAddressAndPort local, boolean treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); Predicate isTransient = ep -> ep.equals(addr4) || ep.equals(addr5); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, local, // local isTransient, // transient @@ -659,13 +658,13 @@ private static void testLocalAndRemoteTransient(boolean pullRepair) treeResponse(addr4, RANGE_1, "four", RANGE_2, "four", RANGE_3, "four"), treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); - Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr4, // local ep -> ep.equals(addr4) || ep.equals(addr5), // transient - false, - pullRepair, - PreviewKind.ALL)); + false, + pullRepair, + PreviewKind.ALL)); assertThat(tasks.get(pair(addr4, addr5))).isNull(); } @@ -677,13 +676,13 @@ public void testOptimisedCreateStandardSyncTasksAllDifferent() treeResponse(addr2, RANGE_1, "two", RANGE_2, "two", RANGE_3, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "three", RANGE_3, "three")); - Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local noTransient(), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); for (SyncNodePair pair : new SyncNodePair[]{ pair(addr1, addr2), pair(addr1, addr3), @@ -712,13 +711,13 @@ public void testOptimisedCreateStandardSyncTasks() treeResponse(addr2, RANGE_1, "one", RANGE_2, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "two")); - Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr4, // local noTransient(), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); SyncTaskListAssert.assertThat(tasks.values()).areAllInstanceOf(AsymmetricRemoteSyncTask.class); @@ -745,13 +744,13 @@ public void testOptimisedCreateStandardSyncTasksWithTransient() treeResponse(addr3, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); RepairJobDesc desc = new RepairJobDesc(nextTimeUUID(), nextTimeUUID(), "ks", "cf", Collections.emptyList()); - Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, desc, + Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, desc, treeResponses, addr1, // local ep -> ep.equals(addr3), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); SyncTask task = tasks.get(pair(addr1, addr2)); diff --git a/test/unit/org/apache/cassandra/repair/RepairSessionTest.java b/test/unit/org/apache/cassandra/repair/RepairSessionTest.java index 470a2efc538e..a5db87036969 100644 --- a/test/unit/org/apache/cassandra/repair/RepairSessionTest.java +++ b/test/unit/org/apache/cassandra/repair/RepairSessionTest.java @@ -65,10 +65,9 @@ public void testConviction() throws Exception Set endpoints = Sets.newHashSet(remote); RepairSession session = new RepairSession(SharedContext.Global.instance, new Scheduler.NoopScheduler(), parentSessionId, new CommonRange(endpoints, Collections.emptySet(), Arrays.asList(repairRange)), - "Keyspace1", RepairParallelism.SEQUENTIAL, + false, "Keyspace1", RepairParallelism.SEQUENTIAL, false, false, - PreviewKind.NONE, false, false, false, false, - "Standard1"); + PreviewKind.NONE, false, false, false, false, false, "Standard1"); // perform convict session.convict(remote, Double.MAX_VALUE); diff --git a/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java b/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java index 1657ceff4870..9e8080f903cc 100644 --- a/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java +++ b/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java @@ -25,13 +25,13 @@ import java.util.UUID; import com.google.common.collect.Lists; -import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.Range; @@ -52,37 +52,30 @@ import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.repair.RepairJobDesc; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.SessionSummary; import org.apache.cassandra.streaming.StreamSummary; import org.apache.cassandra.utils.MerkleTrees; +import static java.util.Collections.emptyList; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; -public class RepairMessageSerializationsTest +@UseMurmur3Partitioner +@DDDaemonInitialization +public class RepairMessageSerializationsTest extends CassandraTestBase { private static final int PROTOCOL_VERSION = MessagingService.current_version; private static final int GC_BEFORE = 1000000; - private static IPartitioner originalPartitioner; @BeforeClass public static void before() { - DatabaseDescriptor.daemonInitialization(); - originalPartitioner = StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); ClusterMetadataTestHelper.setInstanceForTest(); SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create("serializationsTestKeyspace", KeyspaceParams.simple(3))); SchemaTestUtil.announceNewTable(TableMetadata.minimal("serializationsTestKeyspace", "repairMessages")); } - @AfterClass - public static void after() - { - DatabaseDescriptor.setPartitionerUnsafe(originalPartitioner); - } - @Test public void validationRequestMessage() throws IOException { @@ -175,8 +168,8 @@ public void syncCompleteMessage() throws IOException InetAddressAndPort dst = InetAddressAndPort.getByName("127.0.0.3"); List summaries = new ArrayList<>(); summaries.add(new SessionSummary(src, dst, - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), 5, 100)), - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), 500, 10)) + Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 5, 100)), + Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 500, 10)) )); SyncResponse msg = new SyncResponse(buildRepairJobDesc(), new SyncNodePair(src, dst), true, summaries); serializeRoundTrip(msg, SyncResponse.serializer); diff --git a/test/unit/org/apache/cassandra/schema/ValidationTest.java b/test/unit/org/apache/cassandra/schema/ValidationTest.java index 75727f218f24..e9edbf729dbd 100644 --- a/test/unit/org/apache/cassandra/schema/ValidationTest.java +++ b/test/unit/org/apache/cassandra/schema/ValidationTest.java @@ -27,14 +27,19 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.junit.BeforeClass; + import org.junit.Test; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; + import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -public class ValidationTest +@DDDaemonInitialization +public class ValidationTest extends CassandraTestBase { @BeforeClass public static void beforeClass() diff --git a/test/unit/org/apache/cassandra/service/BootstrapTransientTest.java b/test/unit/org/apache/cassandra/service/BootstrapTransientTest.java index d6f4aa9a09d5..b0207565c074 100644 --- a/test/unit/org/apache/cassandra/service/BootstrapTransientTest.java +++ b/test/unit/org/apache/cassandra/service/BootstrapTransientTest.java @@ -30,6 +30,9 @@ import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseOrderPreservingPartitioner; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.OrderPreservingPartitioner; import org.apache.cassandra.dht.Range; @@ -63,7 +66,9 @@ * is used to calculate the endpoints to fetch from and check they are alive for both RangeRelocator (move) and * bootstrap (RangeRelocator). */ -public class BootstrapTransientTest +@DDDaemonInitialization +@UseOrderPreservingPartitioner +public class BootstrapTransientTest extends CassandraTestBase { static final String KEYSPACE = "TestKeyspace"; static InetAddressAndPort address02; diff --git a/test/unit/org/apache/cassandra/service/RemoveTest.java b/test/unit/org/apache/cassandra/service/RemoveTest.java index b2c664ebf204..9e85c6a65619 100644 --- a/test/unit/org/apache/cassandra/service/RemoveTest.java +++ b/test/unit/org/apache/cassandra/service/RemoveTest.java @@ -24,15 +24,16 @@ import java.util.List; import java.util.UUID; -import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; +import org.apache.cassandra.CassandraTestBase.UseRandomPartitioner; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.RandomPartitioner; import org.apache.cassandra.dht.Token; @@ -43,19 +44,14 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.tcm.membership.NodeId; -import static org.apache.cassandra.tcm.membership.MembershipUtils.*; +import static org.apache.cassandra.tcm.membership.MembershipUtils.endpoint; -public class RemoveTest +@UseRandomPartitioner +@PrepareServerNoRegister +public class RemoveTest extends CassandraTestBase { - static - { - DatabaseDescriptor.daemonInitialization(); - CommitLog.instance.start(); - } - static final IPartitioner partitioner = RandomPartitioner.instance; StorageService ss = StorageService.instance; - static IPartitioner oldPartitioner; ArrayList endpointTokens = new ArrayList(); ArrayList keyTokens = new ArrayList(); List hosts = new ArrayList<>(); @@ -66,17 +62,9 @@ public class RemoveTest @BeforeClass public static void setupClass() throws ConfigurationException { - oldPartitioner = StorageService.instance.setPartitionerUnsafe(partitioner); - ServerTestUtils.prepareServerNoRegister(); MessagingService.instance().listen(); } - @AfterClass - public static void tearDownClass() - { - StorageService.instance.setPartitionerUnsafe(oldPartitioner); - } - @Before public void setup() throws IOException, ConfigurationException { diff --git a/test/unit/org/apache/cassandra/service/SerializationsTest.java b/test/unit/org/apache/cassandra/service/SerializationsTest.java index 20431fc335c6..9251b590bb6b 100644 --- a/test/unit/org/apache/cassandra/service/SerializationsTest.java +++ b/test/unit/org/apache/cassandra/service/SerializationsTest.java @@ -26,9 +26,6 @@ import java.util.UUID; import com.google.common.collect.Lists; - -import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.io.util.FileInputStreamPlus; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -41,13 +38,19 @@ import org.apache.cassandra.dht.RandomPartitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataOutputStreamPlus; +import org.apache.cassandra.io.util.FileInputStreamPlus; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.repair.SyncNodePair; import org.apache.cassandra.repair.RepairJobDesc; +import org.apache.cassandra.repair.SyncNodePair; import org.apache.cassandra.repair.Validator; -import org.apache.cassandra.repair.messages.*; +import org.apache.cassandra.repair.messages.RepairMessage; +import org.apache.cassandra.repair.messages.SyncRequest; +import org.apache.cassandra.repair.messages.SyncResponse; +import org.apache.cassandra.repair.messages.ValidationRequest; +import org.apache.cassandra.repair.messages.ValidationResponse; import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; @@ -62,6 +65,8 @@ import org.apache.cassandra.utils.MerkleTrees; import org.apache.cassandra.utils.TimeUUID; +import static java.util.Collections.emptyList; + public class SerializationsTest extends AbstractSerializationsTester { private static PartitionerSwitcher partitionerSwitcher; @@ -218,12 +223,12 @@ private void testSyncCompleteWrite() throws IOException // sync success List summaries = new ArrayList<>(); summaries.add(new SessionSummary(src, dest, - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), 5, 100)), - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), 500, 10)) + Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 5, 100)), + Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 500, 10)) )); SyncResponse success = new SyncResponse(DESC, src, dest, true, summaries); // sync fail - SyncResponse fail = new SyncResponse(DESC, src, dest, false, Collections.emptyList()); + SyncResponse fail = new SyncResponse(DESC, src, dest, false, emptyList()); testRepairMessageWrite("service.SyncComplete.bin", SyncResponse.serializer, success, fail); } diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java index 7814b82ba08a..3405dc9bbf13 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java @@ -36,11 +36,11 @@ import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.CassandraTestBase; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.audit.AuditLogManager; import org.apache.cassandra.audit.AuditLogOptions; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.OrderPreservingPartitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; @@ -49,7 +49,6 @@ import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.WithPartitioner; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; @@ -69,7 +68,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class StorageServiceServerTest +public class StorageServiceServerTest extends CassandraTestBase { static final String DC1 = "DC1"; static final String DC2 = "DC2"; @@ -94,7 +93,6 @@ public static void setUp() throws ConfigurationException, UnknownHostException id4 = InetAddressAndPort.getByName("127.0.0.4"); id5 = InetAddressAndPort.getByName("127.0.0.5"); registerNodes(); - ServerTestUtils.markCMS(); } private static void registerNodes() @@ -159,6 +157,7 @@ public void testSnapshot() throws IOException } @Test + @UseOrderPreservingPartitioner public void testLocalPrimaryRangeForEndpointWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -190,6 +189,7 @@ public void testLocalPrimaryRangeForEndpointWithNetworkTopologyStrategy() throws } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -225,6 +225,7 @@ public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategy() thr } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangesWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -255,6 +256,7 @@ public void testPrimaryRangesWithNetworkTopologyStrategy() throws Exception } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangesWithNetworkTopologyStrategyOneDCOnly() throws Exception { setupDefaultPlacements(); @@ -286,6 +288,7 @@ public void testPrimaryRangesWithNetworkTopologyStrategyOneDCOnly() throws Excep } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategyOneDCOnly() throws Exception { setupDefaultPlacements(); @@ -317,6 +320,7 @@ public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategyOneDCO } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangesWithVnodes() throws Exception { // DC1 @@ -367,6 +371,7 @@ public void testPrimaryRangesWithVnodes() throws Exception } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithVnodes() throws Exception { // DC1 @@ -431,6 +436,7 @@ public void testPrimaryRangeForEndpointWithinDCWithVnodes() throws Exception } @Test + @UseOrderPreservingPartitioner public void testPrimaryRangesWithSimpleStrategy() throws Exception { ClusterMetadataTestHelper.join(id1, new StringToken("A")); @@ -456,6 +462,7 @@ public void testPrimaryRangesWithSimpleStrategy() throws Exception /* Does not make much sense to use -local and -pr with simplestrategy, but just to prevent human errors */ @Test + @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithSimpleStrategy() throws Exception { ClusterMetadataTestHelper.join(id1, new StringToken("A")); @@ -483,46 +490,43 @@ public void testPrimaryRangeForEndpointWithinDCWithSimpleStrategy() throws Excep } @Test + @UseMurmur3Partitioner public void testCreateRepairRangeFrom() throws Exception { - try (WithPartitioner m3p = new WithPartitioner(Murmur3Partitioner.instance)) - { - registerNodes(); - ClusterMetadataTestHelper.join(id1, new LongToken(1000L)); - ClusterMetadataTestHelper.join(id2, new LongToken(2000L)); - ClusterMetadataTestHelper.join(id3, new LongToken(3000L)); - ClusterMetadataTestHelper.join(id4, new LongToken(4000L)); - - Collection> repairRangeFrom = StorageService.instance.createRepairRangeFrom("1500", "3700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(3); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1500L), new LongToken(2000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(3700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "1700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(2); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(1000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(1700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2500", "2300"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(5); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2500L), new LongToken(3000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(4000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(4000L), new LongToken(1000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(2000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(2300L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "3000"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "2000"); - Assertions.assertThat(repairRangeFrom).isEmpty(); - } + ClusterMetadataTestHelper.join(id1, new LongToken(1000L)); + ClusterMetadataTestHelper.join(id2, new LongToken(2000L)); + ClusterMetadataTestHelper.join(id3, new LongToken(3000L)); + ClusterMetadataTestHelper.join(id4, new LongToken(4000L)); + + Collection> repairRangeFrom = StorageService.instance.createRepairRangeFrom("1500", "3700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(3); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1500L), new LongToken(2000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(3700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "1700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(2); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(1000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(1700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2500", "2300"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(5); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2500L), new LongToken(3000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(4000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(4000L), new LongToken(1000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(2000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(2300L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "3000"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "2000"); + Assertions.assertThat(repairRangeFrom).isEmpty(); } /** diff --git a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java new file mode 100644 index 000000000000..8d8fb0fc46b7 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IMessageFilters.Filter; +import org.apache.cassandra.distributed.test.accord.AccordTestBase; +import org.apache.cassandra.net.Verb; + +import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows; +import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; + +public class AccordReadRepairTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(org.apache.cassandra.distributed.test.accord.AccordCQLTest.class); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord").set("non_serial_write_strategy", "mixed")), 2); + SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); + } + + /* + * SERIAL read and CAS create Accord transactions which will then invoke Cassandra coordination to perform the read + * and proxy any read repairs that are generated. + */ + @Test + public void testSerialReadRepair() throws Exception + { + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.SERIAL), + new Object[][] {{1, 1, 1, 1}}); + } + + @Test + public void testCASFailedConditionReadRepair() throws Exception + { + // Even if the condition fails to apply the data checked when applying the condition should be repaired + testReadRepair(cluster -> cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v1) VALUES (1, 1, 99) IF NOT EXISTS;", ConsistencyLevel.SERIAL), + new Object[][] {{false, 1, 1, 1, 1}}); + } + + @Test + public void testCASReadRepair() throws Exception + { + // If the condition applies the read repair should preserve the existing timestamp + testReadRepair(cluster -> cluster.coordinator(1).execute("UPDATE " + currentTable + " SET v2 = 99 WHERE k = 1 and c = 1 IF EXISTS;", ConsistencyLevel.SERIAL), + new Object[][] {{Boolean.TRUE}}); + } + + /* + * non-SERIAL consistency levels are coordinated by C* and then if a partition needs to be repaired an Accord transaction + * is created for each partition repair to proxy the repair mutations safely. + */ + @Test + public void testNonSerialReadRepair() throws Exception + { + for (ConsistencyLevel cl : ImmutableList.of(ConsistencyLevel.QUORUM)) + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", cl), + new Object[][] {{1, 1, 1, 1}}); + } + + void testReadRepair(Function accordTxn, Object[][] expected) throws Exception + { + test("CREATE TABLE " + currentTable + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c));", + cluster -> { + Filter mutationFilter = cluster.filters().verbs(Verb.MUTATION_REQ.id).drop().on(); + cluster.filters().verbs(Verb.HINT_REQ.id, Verb.HINT_RSP.id).drop().on(); + cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); + mutationFilter.off(); + Filter blockNodeOneReads = cluster.filters().verbs(Verb.READ_REQ.id).to(1).drop().on(); + assertThat(cluster.coordinator(2).executeWithResult("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + .isEmpty(); + blockNodeOneReads.off(); + // Should perform read repair + Object[][] result = accordTxn.apply(cluster); + assertRows(result, expected); + blockNodeOneReads.on(); + // Side effect of the read repair should be visible now + assertThat(cluster.coordinator(2).executeWithResult("SELECT k, c, v1, WRITETIME(v1) FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + .isEqualTo(1, 1, 1, 42L); + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 4e21b5096c79..8a64409ccac6 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -214,7 +214,7 @@ public static Pair processTxnResultDirect(SafeCommandStore safeS Data readData = read.keys().stream().map(key -> { try { - return AsyncChains.getBlocking(read.read(key, txn.kind(), safeStore, executeAt, null)); + return AsyncChains.getBlocking(read.read(key, safeStore, executeAt, null)); } catch (InterruptedException e) { @@ -227,7 +227,7 @@ public static Pair processTxnResultDirect(SafeCommandStore safeS }) .reduce(null, TxnData::merge); return Pair.create(txn.execute(txnId, executeAt, readData), - txn.query().compute(txnId, executeAt, readData, txn.read(), txn.update())); + txn.query().compute(txnId, executeAt, txn.keys(), readData, txn.read(), txn.update())); } diff --git a/test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java similarity index 91% rename from test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java rename to test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java index 203e60ab16d2..fbfd1190cc4b 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/TxnUpdateTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java @@ -18,18 +18,18 @@ package org.apache.cassandra.service.accord.txn; -import org.apache.cassandra.service.accord.AccordTestUtils; import org.junit.BeforeClass; import org.junit.Test; import accord.primitives.Txn; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.accord.AccordTestUtils; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.utils.SerializerTestUtils.assertSerializerIOEquality; -public class TxnUpdateTest +public class AccordUpdateTest { @BeforeClass public static void setupClass() @@ -44,7 +44,7 @@ public static void setupClass() public void predicateSerializer() { Txn txn = AccordTestUtils.createTxn(0, 0); - TxnUpdate update = (TxnUpdate) txn.update(); - assertSerializerIOEquality(update, TxnUpdate.serializer); + AccordUpdate update = (AccordUpdate) txn.update(); + assertSerializerIOEquality(update, AccordUpdate.serializer); } -} +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/paxos/AbstractPaxosRepairTest.java b/test/unit/org/apache/cassandra/service/paxos/AbstractPaxosRepairTest.java index 9721879d56b0..b91604792d9d 100644 --- a/test/unit/org/apache/cassandra/service/paxos/AbstractPaxosRepairTest.java +++ b/test/unit/org/apache/cassandra/service/paxos/AbstractPaxosRepairTest.java @@ -51,7 +51,7 @@ private static class PaxosTestRepair extends AbstractPaxosRepair { public PaxosTestRepair() { - super(Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(1)), null); + super(Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(1)), null, -1); } public State restart(State state, long waitUntil) diff --git a/test/unit/org/apache/cassandra/service/paxos/cleanup/PaxosTableRepairsTest.java b/test/unit/org/apache/cassandra/service/paxos/cleanup/PaxosTableRepairsTest.java index 22441fec4253..fe21b820c5f9 100644 --- a/test/unit/org/apache/cassandra/service/paxos/cleanup/PaxosTableRepairsTest.java +++ b/test/unit/org/apache/cassandra/service/paxos/cleanup/PaxosTableRepairsTest.java @@ -51,7 +51,7 @@ private static class MockRepair extends AbstractPaxosRepair public MockRepair(DecoratedKey key) { - super(key, null); + super(key, null, -1); } public State restart(State state, long waitUntil) diff --git a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTrackerTest.java b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTrackerTest.java index 1f9db0851f0f..c9ec812f10a0 100644 --- a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTrackerTest.java +++ b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosBallotTrackerTest.java @@ -20,23 +20,26 @@ import java.io.IOException; -import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.paxos.Ballot; -import org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome; -import org.junit.*; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.service.paxos.PaxosState; +import org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome; import org.apache.cassandra.utils.ByteBufferUtil; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.REJECT; @@ -126,7 +129,7 @@ private static void testHighBound(Stage stage, Order order) case PROPOSE: try (PaxosState state = PaxosState.get(commit)) { - state.acceptIfLatest(commit); + state.acceptIfLatest(commit, false); } break; case COMMIT: @@ -220,7 +223,7 @@ public void lowBoundAccept() throws IOException DecoratedKey key = dk(1); try (PaxosState state = PaxosState.get(key, cfm)) { - Ballot result = state.acceptIfLatest(new Commit.Proposal(ballot2, PartitionUpdate.emptyUpdate(cfm, key))); + Ballot result = state.acceptIfLatest(new Commit.Proposal(ballot2, PartitionUpdate.emptyUpdate(cfm, key)), false).supersededBy; Assert.assertNull(result); } @@ -228,7 +231,7 @@ public void lowBoundAccept() throws IOException ballotTracker.updateLowBound(ballot4); try (PaxosState state = PaxosState.get(key, cfm)) { - Ballot result = state.acceptIfLatest(new Commit.Proposal(ballot3, PartitionUpdate.emptyUpdate(cfm, key))); + Ballot result = state.acceptIfLatest(new Commit.Proposal(ballot3, PartitionUpdate.emptyUpdate(cfm, key)), false).supersededBy; Assert.assertEquals(ballot4, result); } } diff --git a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTests.java b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTests.java index 2804508e64a0..654c10b6d0d9 100644 --- a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTests.java +++ b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTests.java @@ -18,14 +18,23 @@ package org.apache.cassandra.service.paxos.uncommitted; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; import com.google.common.collect.Lists; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.commitlog.CommitLog; -import org.apache.cassandra.dht.*; +import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.utils.ByteBufferUtil; @@ -42,7 +51,7 @@ class PaxosUncommittedTests CommitLog.instance.start(); } - static final IPartitioner PARTITIONER = new ByteOrderedPartitioner(); + static final IPartitioner PARTITIONER = ByteOrderedPartitioner.instance; static final Token MIN_TOKEN = PARTITIONER.getMinimumToken(); static final Range FULL_RANGE = new Range<>(MIN_TOKEN, MIN_TOKEN); static final Collection> ALL_RANGES = Collections.singleton(FULL_RANGE); diff --git a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTrackerIntegrationTest.java b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTrackerIntegrationTest.java index 8c3dd250f5b3..dc8fafa96640 100644 --- a/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTrackerIntegrationTest.java +++ b/test/unit/org/apache/cassandra/service/paxos/uncommitted/PaxosUncommittedTrackerIntegrationTest.java @@ -20,7 +20,10 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import org.junit.*; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.Util; @@ -36,7 +39,7 @@ import static org.apache.cassandra.service.paxos.Ballot.Flag.NONE; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.nextBallot; -import static org.apache.cassandra.service.paxos.Commit.*; +import static org.apache.cassandra.service.paxos.Commit.Proposal; import static org.apache.cassandra.service.paxos.uncommitted.PaxosUncommittedTests.ALL_RANGES; import static org.apache.cassandra.service.paxos.uncommitted.PaxosUncommittedTests.PAXOS_CFS; @@ -97,7 +100,7 @@ public void commitCycle() try (PaxosState state = PaxosState.get(key, cfm)) { - state.acceptIfLatest(proposal); + state.acceptIfLatest(proposal, false); } try (CloseableIterator iterator = tracker.uncommittedKeyIterator(cfm.id, ALL_RANGES)) @@ -124,7 +127,7 @@ public void inMemoryCommit() try (PaxosState state = PaxosState.get(key, cfm)) { state.promiseIfNewer(proposal.ballot, true); - state.acceptIfLatest(proposal); + state.acceptIfLatest(proposal, false); } try (CloseableIterator iterator = tracker.uncommittedKeyIterator(cfm.id, ALL_RANGES)) { diff --git a/test/unit/org/apache/cassandra/service/reads/AbstractReadResponseTest.java b/test/unit/org/apache/cassandra/service/reads/AbstractReadResponseTest.java index 7d22439d8d5c..3c0450d2e817 100644 --- a/test/unit/org/apache/cassandra/service/reads/AbstractReadResponseTest.java +++ b/test/unit/org/apache/cassandra/service/reads/AbstractReadResponseTest.java @@ -30,10 +30,12 @@ import org.junit.BeforeClass; import org.junit.Ignore; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; +import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.BufferClusteringBound; import org.apache.cassandra.db.BufferClusteringBoundary; @@ -66,7 +68,6 @@ import org.apache.cassandra.db.rows.Rows; import org.apache.cassandra.db.rows.Unfiltered; import org.apache.cassandra.db.rows.UnfilteredRowIterator; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; @@ -83,7 +84,9 @@ * Base class for testing various components which deal with read responses */ @Ignore -public abstract class AbstractReadResponseTest +@DDDaemonInitialization +@UseMurmur3Partitioner +public abstract class AbstractReadResponseTest extends CassandraTestBase { public static final String KEYSPACE1 = "DataResolverTest"; public static final String KEYSPACE3 = "DataResolverTest3"; @@ -124,9 +127,6 @@ public abstract class AbstractReadResponseTest @BeforeClass public static void setupClass() throws Throwable { - DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - TableMetadata.Builder builder1 = TableMetadata.builder(KEYSPACE1, CF_STANDARD) .addPartitionKeyColumn("key", BytesType.instance) diff --git a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java index d281025666af..19e7724807b9 100644 --- a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java +++ b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java @@ -25,17 +25,18 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Sets; - import org.junit.Assert; import org.junit.Test; import org.apache.cassandra.Util; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringBound; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DeletionInfo; import org.apache.cassandra.db.DeletionTime; import org.apache.cassandra.db.EmptyIterators; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.MutableDeletionInfo; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.RangeTombstone; @@ -54,8 +55,6 @@ import org.apache.cassandra.db.rows.RangeTombstoneBoundaryMarker; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.RowIterator; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.EndpointsForRange; @@ -66,7 +65,7 @@ import org.apache.cassandra.locator.ReplicaUtils; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.net.*; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.reads.repair.ReadRepair; import org.apache.cassandra.service.reads.repair.RepairedDataTracker; import org.apache.cassandra.service.reads.repair.RepairedDataVerifier; @@ -137,7 +136,7 @@ private EndpointsForRange makeReplicas(int num) public void testResolveNewerSingleRow() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, iter(new RowUpdateBuilder(cfm, nowInSec, 0L, dk).clustering("1") .add("c1", "v1") @@ -169,7 +168,7 @@ public void testResolveNewerSingleRow() public void testResolveDisjointSingleRow() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, iter(new RowUpdateBuilder(cfm, nowInSec, 0L, dk).clustering("1") .add("c1", "v1") @@ -206,7 +205,7 @@ public void testResolveDisjointSingleRow() public void testResolveDisjointMultipleRows() throws UnknownHostException { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, iter(new RowUpdateBuilder(cfm, nowInSec, 0L, dk).clustering("1") .add("c1", "v1") @@ -253,7 +252,7 @@ public void testResolveDisjointMultipleRows() throws UnknownHostException public void testResolveDisjointMultipleRowsWithRangeTombstones() { EndpointsForRange replicas = makeReplicas(4); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); RangeTombstone tombstone1 = tombstone("1", "11", 1, nowInSec); RangeTombstone tombstone2 = tombstone("3", "31", 1, nowInSec); @@ -334,7 +333,7 @@ public void testResolveDisjointMultipleRowsWithRangeTombstones() public void testResolveWithOneEmpty() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, iter(new RowUpdateBuilder(cfm, nowInSec, 1L, dk).clustering("1") .add("c2", "v2") @@ -365,7 +364,7 @@ public void testResolveWithBothEmpty() { EndpointsForRange replicas = makeReplicas(2); TestableReadRepair readRepair = new TestableReadRepair(command); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); resolver.preprocess(response(command, replicas.get(0).endpoint(), EmptyIterators.unfilteredPartition(cfm))); resolver.preprocess(response(command, replicas.get(1).endpoint(), EmptyIterators.unfilteredPartition(cfm))); @@ -381,7 +380,7 @@ public void testResolveWithBothEmpty() public void testResolveDeleted() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); // one response with columns timestamped before a delete in another response InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, iter(new RowUpdateBuilder(cfm, nowInSec, 0L, dk).clustering("1") @@ -407,7 +406,7 @@ public void testResolveDeleted() public void testResolveMultipleDeleted() { EndpointsForRange replicas = makeReplicas(4); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); // deletes and columns with interleaved timestamp, with out of order return sequence InetAddressAndPort peer1 = replicas.get(0).endpoint(); resolver.preprocess(response(command, peer1, fullPartitionDelete(cfm, dk, 0, nowInSec))); @@ -492,7 +491,7 @@ public void testResolveRangeTombstonesOnBoundarySameTimestamp() throws UnknownHo private void resolveRangeTombstonesOnBoundary(long timestamp1, long timestamp2) { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); InetAddressAndPort peer2 = replicas.get(1).endpoint(); @@ -566,7 +565,7 @@ public void testRepairRangeTombstoneBoundary() throws UnknownHostException */ private void testRepairRangeTombstoneBoundary(EndpointsForRange replicas, int timestamp1, int timestamp2, int timestamp3) throws UnknownHostException { - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); InetAddressAndPort peer2 = replicas.get(1).endpoint(); @@ -619,7 +618,7 @@ public void testRepairRangeTombstoneWithPartitionDeletion() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); InetAddressAndPort peer2 = replicas.get(1).endpoint(); @@ -658,7 +657,7 @@ public void testRepairRangeTombstoneWithPartitionDeletion() public void testRepairRangeTombstoneWithPartitionDeletion2() { EndpointsForRange replicas = makeReplicas(2); - DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, command, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); InetAddressAndPort peer1 = replicas.get(0).endpoint(); InetAddressAndPort peer2 = replicas.get(1).endpoint(); @@ -742,7 +741,7 @@ public void testResolveComplexDelete() EndpointsForRange replicas = makeReplicas(2); ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build(); TestableReadRepair readRepair = new TestableReadRepair(cmd); - DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); long[] ts = {100, 200}; @@ -794,7 +793,7 @@ public void testResolveDeletedCollection() EndpointsForRange replicas = makeReplicas(2); ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build(); TestableReadRepair readRepair = new TestableReadRepair(cmd); - DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); long[] ts = {100, 200}; @@ -838,7 +837,7 @@ public void testResolveNewCollection() EndpointsForRange replicas = makeReplicas(2); ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build(); TestableReadRepair readRepair = new TestableReadRepair(cmd); - DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); long[] ts = {100, 200}; @@ -888,7 +887,7 @@ public void testResolveNewCollectionOverwritingDeleted() EndpointsForRange replicas = makeReplicas(2); ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build(); TestableReadRepair readRepair = new TestableReadRepair(cmd); - DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); + DataResolver resolver = new DataResolver(ReadCoordinator.DEFAULT, cmd, plan(replicas, ALL), readRepair, Dispatcher.RequestTime.forImmediateExecution()); long[] ts = {100, 200}; @@ -1260,7 +1259,7 @@ class TestableDataResolver extends DataResolver public TestableDataResolver(ReadCommand command, ReplicaPlan.SharedForRangeRead plan, ReadRepair readRepair, Dispatcher.RequestTime requestTime) { - super(command, plan, readRepair, requestTime, true); + super(ReadCoordinator.DEFAULT, command, plan, readRepair, requestTime, true); } protected RepairedDataVerifier getRepairedDataVerifier(ReadCommand command) @@ -1326,7 +1325,7 @@ private void assertRepairMetadata(Mutation mutation) private ReplicaPlan.SharedForRangeRead plan(EndpointsForRange replicas, ConsistencyLevel consistencyLevel) { - BiFunction, Token, ReplicaPlan.ForWrite> repairPlan = (self, t) -> ReplicaPlans.forReadRepair(self, ClusterMetadata.current(), ks, consistencyLevel, t, (i) -> true); + BiFunction, Token, ReplicaPlan.ForWrite> repairPlan = (self, t) -> ReplicaPlans.forReadRepair(self, ClusterMetadata.current(), ks, consistencyLevel, t, (i) -> true, ReadCoordinator.DEFAULT); return ReplicaPlan.shared(new ReplicaPlan.ForRangeRead(ks, ks.getReplicationStrategy(), consistencyLevel, diff --git a/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java b/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java index 17baa4fa55f0..bde9763baf78 100644 --- a/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java +++ b/test/unit/org/apache/cassandra/service/reads/DigestResolverTest.java @@ -69,7 +69,7 @@ public void noRepairNeeded() { SinglePartitionReadCommand command = SinglePartitionReadCommand.fullPartitionRead(cfm, nowInSec, dk); EndpointsForToken targetReplicas = EndpointsForToken.of(dk.getToken(), full(EP1), full(EP2)); - DigestResolver resolver = new DigestResolver(command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); + DigestResolver resolver = new DigestResolver(ReadCoordinator.DEFAULT, command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); PartitionUpdate response = update(row(1000, 4, 4), row(1000, 5, 5)).build(); @@ -102,7 +102,7 @@ public void multiThreadedNoRepairNeededReadCallback() { final long startNanos = System.nanoTime(); final Dispatcher.RequestTime requestTime = new Dispatcher.RequestTime(startNanos, startNanos); - final DigestResolver resolver = new DigestResolver<>(command, plan, requestTime); + final DigestResolver resolver = new DigestResolver<>(ReadCoordinator.DEFAULT, command, plan, requestTime); final ReadCallback callback = new ReadCallback<>(resolver, command, plan, requestTime); final CountDownLatch startlatch = new CountDownLatch(2); @@ -137,7 +137,7 @@ public void digestMismatch() { SinglePartitionReadCommand command = SinglePartitionReadCommand.fullPartitionRead(cfm, nowInSec, dk); EndpointsForToken targetReplicas = EndpointsForToken.of(dk.getToken(), full(EP1), full(EP2)); - DigestResolver resolver = new DigestResolver(command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); + DigestResolver resolver = new DigestResolver(ReadCoordinator.DEFAULT, command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); PartitionUpdate response1 = update(row(1000, 4, 4), row(1000, 5, 5)).build(); PartitionUpdate response2 = update(row(2000, 4, 5)).build(); @@ -158,7 +158,7 @@ public void agreeingTransient() { SinglePartitionReadCommand command = SinglePartitionReadCommand.fullPartitionRead(cfm, nowInSec, dk); EndpointsForToken targetReplicas = EndpointsForToken.of(dk.getToken(), full(EP1), trans(EP2)); - DigestResolver resolver = new DigestResolver<>(command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); + DigestResolver resolver = new DigestResolver<>(ReadCoordinator.DEFAULT, command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); PartitionUpdate response1 = update(row(1000, 4, 4), row(1000, 5, 5)).build(); PartitionUpdate response2 = update(row(1000, 5, 5)).build(); @@ -179,7 +179,7 @@ public void transientResponse() { SinglePartitionReadCommand command = SinglePartitionReadCommand.fullPartitionRead(cfm, nowInSec, dk); EndpointsForToken targetReplicas = EndpointsForToken.of(dk.getToken(), full(EP1), trans(EP2)); - DigestResolver resolver = new DigestResolver<>(command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); + DigestResolver resolver = new DigestResolver<>(ReadCoordinator.DEFAULT, command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); PartitionUpdate response2 = update(row(1000, 5, 5)).build(); Assert.assertFalse(resolver.isDataPresent()); @@ -194,7 +194,7 @@ public void transientResponseData() { SinglePartitionReadCommand command = SinglePartitionReadCommand.fullPartitionRead(cfm, nowInSec, dk); EndpointsForToken targetReplicas = EndpointsForToken.of(dk.getToken(), full(EP1), full(EP2), trans(EP3)); - DigestResolver resolver = new DigestResolver<>(command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); + DigestResolver resolver = new DigestResolver<>(ReadCoordinator.DEFAULT, command, plan(ConsistencyLevel.QUORUM, targetReplicas), new Dispatcher.RequestTime(0L, 0L)); PartitionUpdate fullResponse = update(row(1000, 1, 1)).build(); PartitionUpdate digestResponse = update(row(1000, 1, 1)).build(); diff --git a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java index 989130adb4f2..832fbcbe2371 100644 --- a/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java +++ b/test/unit/org/apache/cassandra/service/reads/ReadExecutorTest.java @@ -101,7 +101,7 @@ public void testUnableToSpeculate() throws Throwable { assertEquals(0, cfs.metric.speculativeInsufficientReplicas.getCount()); assertEquals(0, ks.metric.speculativeInsufficientReplicas.getCount()); - AbstractReadExecutor executor = new AbstractReadExecutor.NeverSpeculatingReadExecutor(cfs, new MockSinglePartitionReadCommand(), plan(targets, LOCAL_QUORUM), Dispatcher.RequestTime.forImmediateExecution(), true); + AbstractReadExecutor executor = new AbstractReadExecutor.NeverSpeculatingReadExecutor(ReadCoordinator.DEFAULT, cfs, new MockSinglePartitionReadCommand(), plan(targets, LOCAL_QUORUM), Dispatcher.RequestTime.forImmediateExecution(), true); executor.maybeTryAdditionalReplicas(); try { @@ -116,7 +116,7 @@ public void testUnableToSpeculate() throws Throwable assertEquals(1, ks.metric.speculativeInsufficientReplicas.getCount()); //Shouldn't increment - executor = new AbstractReadExecutor.NeverSpeculatingReadExecutor(cfs, new MockSinglePartitionReadCommand(), plan(targets, LOCAL_QUORUM), Dispatcher.RequestTime.forImmediateExecution(), false); + executor = new AbstractReadExecutor.NeverSpeculatingReadExecutor(ReadCoordinator.DEFAULT, cfs, new MockSinglePartitionReadCommand(), plan(targets, LOCAL_QUORUM), Dispatcher.RequestTime.forImmediateExecution(), false); executor.maybeTryAdditionalReplicas(); try { @@ -142,7 +142,7 @@ public void testSpeculateSucceeded() throws Throwable assertEquals(0, cfs.metric.speculativeFailedRetries.getCount()); assertEquals(0, ks.metric.speculativeRetries.getCount()); assertEquals(0, ks.metric.speculativeFailedRetries.getCount()); - AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(cfs, new MockSinglePartitionReadCommand(DAYS.toMillis(365)), plan(LOCAL_QUORUM, targets, targets.subList(0, 2)), Dispatcher.RequestTime.forImmediateExecution()); + AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(ReadCoordinator.DEFAULT, cfs, new MockSinglePartitionReadCommand(DAYS.toMillis(365)), plan(LOCAL_QUORUM, targets, targets.subList(0, 2)), Dispatcher.RequestTime.forImmediateExecution()); executor.maybeTryAdditionalReplicas(); new Thread() { @@ -183,7 +183,7 @@ public void testSpeculateFailed() throws Throwable assertEquals(0, cfs.metric.speculativeFailedRetries.getCount()); assertEquals(0, ks.metric.speculativeRetries.getCount()); assertEquals(0, ks.metric.speculativeFailedRetries.getCount()); - AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(cfs, new MockSinglePartitionReadCommand(), plan(LOCAL_QUORUM, targets, targets.subList(0, 2)), Dispatcher.RequestTime.forImmediateExecution()); + AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(ReadCoordinator.DEFAULT, cfs, new MockSinglePartitionReadCommand(), plan(LOCAL_QUORUM, targets, targets.subList(0, 2)), Dispatcher.RequestTime.forImmediateExecution()); executor.maybeTryAdditionalReplicas(); try { @@ -209,7 +209,7 @@ public void testRaceWithNonSpeculativeFailure() { MockSinglePartitionReadCommand command = new MockSinglePartitionReadCommand(TimeUnit.DAYS.toMillis(365)); ReplicaPlan.ForTokenRead plan = plan(ConsistencyLevel.LOCAL_ONE, targets, targets.subList(0, 1)); - AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(cfs, command, plan, Dispatcher.RequestTime.forImmediateExecution()); + AbstractReadExecutor executor = new AbstractReadExecutor.SpeculatingReadExecutor(ReadCoordinator.DEFAULT, cfs, command, plan, Dispatcher.RequestTime.forImmediateExecution()); // Issue an initial request against the first endpoint... executor.executeAsync(); @@ -255,7 +255,7 @@ public static class MockSinglePartitionReadCommand extends SinglePartitionReadCo MockSinglePartitionReadCommand(long timeout) { - super(cfs.metadata().epoch, false, 0, false, cfs.metadata(), 0, null, null, null, Util.dk("ry@n_luvs_teh_y@nk33z"), null, null, false, null); + super(cfs.metadata().epoch, false, 0, false, false, cfs.metadata(), 0, null, null, null, Util.dk("ry@n_luvs_teh_y@nk33z"), null, null, false, null); this.timeout = timeout; } diff --git a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java index 1689069cf97a..d4a5b7a05fd0 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java @@ -66,14 +66,15 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.locator.ReplicaUtils; +import org.apache.cassandra.service.reads.ReadCoordinator; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.net.Message; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.ByteBufferUtil; @@ -360,7 +361,7 @@ static ReplicaPlan.ForRangeRead replicaPlan(Keyspace keyspace, ConsistencyLevel replicas, 1, null, - (self, token) -> forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, (r) -> true), + (self, token) -> forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, (r) -> true, ReadCoordinator.DEFAULT), Epoch.EMPTY); } diff --git a/test/unit/org/apache/cassandra/service/reads/repair/BlockingReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/BlockingReadRepairTest.java index 6806172402f1..a0320c92c789 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/BlockingReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/BlockingReadRepairTest.java @@ -42,6 +42,7 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.service.reads.ReadCallback; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.reads.ReadCoordinator; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -53,7 +54,7 @@ private static class InstrumentedReadRepairHandler { public InstrumentedReadRepairHandler(Map repairs, ReplicaPlan.ForWrite writePlan) { - super(Util.dk("not a real usable value"), repairs, writePlan); + super(ReadCoordinator.DEFAULT, Util.dk("not a real usable value"), repairs, writePlan); } Map mutationsSent = new HashMap<>(); @@ -86,7 +87,7 @@ private static class InstrumentedBlockingReadRepair, P ex { public InstrumentedBlockingReadRepair(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - super(command, replicaPlan, requestTime); + super(ReadCoordinator.DEFAULT, command, replicaPlan, requestTime); } Set readCommandRecipients = new HashSet<>(); diff --git a/test/unit/org/apache/cassandra/service/reads/repair/DiagEventsBlockingReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/DiagEventsBlockingReadRepairTest.java index 9258922ff88d..1a330402d834 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/DiagEventsBlockingReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/DiagEventsBlockingReadRepairTest.java @@ -47,6 +47,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.net.Message; import org.apache.cassandra.service.reads.ReadCallback; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.repair.ReadRepairEvent.ReadRepairEventType; import org.apache.cassandra.transport.Dispatcher; @@ -135,7 +136,7 @@ private static class DiagnosticBlockingRepairHandler extends BlockingReadRepair DiagnosticBlockingRepairHandler(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - super(command, replicaPlan, requestTime); + super(ReadCoordinator.DEFAULT, command, replicaPlan, requestTime); DiagnosticEventService.instance().subscribe(ReadRepairEvent.class, this::onRepairEvent); } @@ -183,7 +184,7 @@ private static Predicate isLocal() DiagnosticPartitionReadRepairHandler(DecoratedKey key, Map repairs, ReplicaPlan.ForWrite forReadRepair) { - super(key, repairs, forReadRepair); + super(ReadCoordinator.DEFAULT, key, repairs, forReadRepair); DiagnosticEventService.instance().subscribe(PartitionRepairEvent.class, this::onRepairEvent); } diff --git a/test/unit/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepairTest.java index 749e444425f7..a094ebfa6bc8 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/ReadOnlyReadRepairTest.java @@ -34,6 +34,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.service.reads.ReadCallback; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.reads.ReadCoordinator; public class ReadOnlyReadRepairTest extends AbstractReadRepairTest { @@ -42,7 +43,7 @@ private static class InstrumentedReadOnlyReadRepair, P ex { public InstrumentedReadOnlyReadRepair(ReadCommand command, ReplicaPlan.Shared replicaPlan, Dispatcher.RequestTime requestTime) { - super(command, replicaPlan, requestTime); + super(ReadCoordinator.DEFAULT, command, replicaPlan, requestTime); } Set readCommandRecipients = new HashSet<>(); diff --git a/test/unit/org/apache/cassandra/service/reads/repair/ReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/ReadRepairTest.java index 5138de03000b..d0f0682fbf1b 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/ReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/ReadRepairTest.java @@ -57,6 +57,7 @@ import org.apache.cassandra.schema.SchemaTestUtil; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.utils.ByteBufferUtil; @@ -79,7 +80,7 @@ private static class InstrumentedReadRepairHandler, P ext { public InstrumentedReadRepairHandler(Map repairs, ReplicaPlan.ForWrite writePlan) { - super(Util.dk("not a valid key"), repairs, writePlan); + super(ReadCoordinator.DEFAULT, Util.dk("not a valid key"), repairs, writePlan); } Map mutationsSent = new HashMap<>(); diff --git a/test/unit/org/apache/cassandra/service/reads/repair/RepairedDataVerifierTest.java b/test/unit/org/apache/cassandra/service/reads/repair/RepairedDataVerifierTest.java index 682dc740ff08..7bdd08349a07 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/RepairedDataVerifierTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/RepairedDataVerifierTest.java @@ -281,6 +281,7 @@ private static class StubReadCommand extends SinglePartitionReadCommand isDigest, 0, false, + false, metadata, FBUtilities.nowInSeconds(), ColumnFilter.all(metadata), diff --git a/test/unit/org/apache/cassandra/service/reads/repair/TestableReadRepair.java b/test/unit/org/apache/cassandra/service/reads/repair/TestableReadRepair.java index eecd106e06ac..650fa73eb2b3 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/TestableReadRepair.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/TestableReadRepair.java @@ -35,7 +35,9 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.locator.ReplicaPlan.ForWrite; import org.apache.cassandra.service.reads.DigestResolver; +import org.apache.cassandra.service.reads.ReadCoordinator; public class TestableReadRepair, P extends ReplicaPlan.ForRead> implements ReadRepair @@ -89,7 +91,6 @@ public void startRepair(DigestResolver digestResolver, Consumer mu sent.put(entry.getKey().endpoint(), entry.getValue()); } + @Override + public void repairPartitionDirectly(ReadCoordinator coordinator, DecoratedKey partitionKey, Map mutations, ForWrite writePlan) + { + throw new UnsupportedOperationException(); + } + public Mutation getForEndpoint(InetAddressAndPort endpoint) { return sent.get(endpoint); diff --git a/test/unit/org/apache/cassandra/streaming/SessionInfoTest.java b/test/unit/org/apache/cassandra/streaming/SessionInfoTest.java index 778b6b2d7115..b44d6b42c930 100644 --- a/test/unit/org/apache/cassandra/streaming/SessionInfoTest.java +++ b/test/unit/org/apache/cassandra/streaming/SessionInfoTest.java @@ -27,6 +27,8 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.utils.FBUtilities; +import static java.util.Collections.emptyList; + public class SessionInfoTest { /** @@ -41,11 +43,11 @@ public void testTotals() Collection summaries = new ArrayList<>(); for (int i = 0; i < 10; i++) { - StreamSummary summary = new StreamSummary(tableId, i, (i + 1) * 10); + StreamSummary summary = new StreamSummary(tableId, emptyList(), i, (i + 1) * 10); summaries.add(summary); } - StreamSummary sending = new StreamSummary(tableId, 10, 100); + StreamSummary sending = new StreamSummary(tableId, emptyList(), 10, 100); SessionInfo info = new SessionInfo(local, 0, local, summaries, Collections.singleton(sending), StreamSession.State.PREPARING, null); assert info.getTotalFilesToReceive() == 45; diff --git a/test/unit/org/apache/cassandra/streaming/StreamReaderTest.java b/test/unit/org/apache/cassandra/streaming/StreamReaderTest.java index 79856ee1539b..0a854c074430 100644 --- a/test/unit/org/apache/cassandra/streaming/StreamReaderTest.java +++ b/test/unit/org/apache/cassandra/streaming/StreamReaderTest.java @@ -70,6 +70,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.TimeUUID; +import static java.util.Collections.emptyList; import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.*; import static org.apache.cassandra.tcm.ownership.OwnershipUtils.beginJoin; import static org.apache.cassandra.tcm.ownership.OwnershipUtils.beginMove; @@ -366,7 +367,7 @@ public static StreamSession setupStreamingSessionForTest() StreamResultFuture future = StreamResultFuture.createInitiator(nextTimeUUID(), StreamOperation.REPAIR, Collections.emptyList(), streamCoordinator); InetAddressAndPort peer = FBUtilities.getBroadcastAddressAndPort(); - streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, Collections.emptyList(), Collections.emptyList(), StreamSession.State.INITIALIZED, "")); + streamCoordinator.addSessionInfo(new SessionInfo(peer, 0, peer, emptyList(), emptyList(), StreamSession.State.INITIALIZED, "")); StreamSession session = streamCoordinator.getOrCreateOutboundSession(peer); session.init(future); @@ -380,7 +381,7 @@ private static void tryReceiveExpectingSuccess(int[] tokens) throws Throwable CassandraStreamHeader streamHeader = streamMessageHeader(tokens); long startMetricCount = StorageMetrics.totalOpsForInvalidToken.getCount(); IStreamReader reader = streamReader(header, streamHeader, session); - StreamSummary streamSummary = new StreamSummary(streamHeader.tableId, 1, 0); + StreamSummary streamSummary = new StreamSummary(streamHeader.tableId, emptyList(), 1, 0); session.prepareReceiving(streamSummary); reader.read(incomingStream(tokens)); assertEquals(StorageMetrics.totalOpsForInvalidToken.getCount(), startMetricCount); @@ -392,7 +393,7 @@ private static void tryReceiveExpectingFailure(int[] tokens) throws Throwable StreamMessageHeader header = streamHeader(); CassandraStreamHeader streamHeader = streamMessageHeader(tokens); long startMetricCount = StorageMetrics.totalOpsForInvalidToken.getCount(); - StreamSummary streamSummary = new StreamSummary(streamHeader.tableId, 1, 0); + StreamSummary streamSummary = new StreamSummary(streamHeader.tableId, emptyList(), 1, 0); session.prepareReceiving(streamSummary); try { diff --git a/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java b/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java index 03d6aa7fd321..069d0fb58d0f 100644 --- a/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java +++ b/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java @@ -30,6 +30,7 @@ import io.netty.buffer.ByteBuf; import io.netty.channel.embedded.EmbeddedChannel; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -50,9 +51,8 @@ import org.apache.cassandra.streaming.messages.StreamMessageHeader; import org.apache.cassandra.utils.TimeUUID; -import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; - import static org.apache.cassandra.net.TestChannel.REMOTE_ADDR; +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; public class StreamingInboundHandlerTest { @@ -125,7 +125,7 @@ public void StreamDeserializingTask_deserialize_ISM_NoSession() throws IOExcepti temp.flip(); DataInputPlus in = new DataInputBuffer(temp, false); // session not found - IncomingStreamMessage.serializer.deserialize(in, MessagingService.current_version); + IncomingStreamMessage.serializer.deserialize(in, IPartitioner.global(), MessagingService.current_version); } @Test diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index 318c0c6e3e22..b9df8471a2aa 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -51,6 +51,7 @@ import org.mockito.Mockito; import static org.apache.cassandra.tcm.MetadataKeys.ACCORD_KEYSPACES; +import static org.apache.cassandra.tcm.MetadataKeys.CONSENSUS_MIGRATION_STATE; import static org.apache.cassandra.tcm.MetadataKeys.DATA_PLACEMENTS; import static org.apache.cassandra.tcm.MetadataKeys.IN_PROGRESS_SEQUENCES; import static org.apache.cassandra.tcm.MetadataKeys.LOCKED_RANGES; @@ -305,6 +306,8 @@ else if (key == IN_PROGRESS_SEQUENCES) return metadata.inProgressSequences; else if (key == ACCORD_KEYSPACES) return metadata.accordKeyspaces; + else if (key == CONSENSUS_MIGRATION_STATE) + return metadata.consensusMigrationState; throw new IllegalArgumentException("Unknown metadata key " + key); } diff --git a/test/unit/org/apache/cassandra/tools/nodetool/NetStatsTest.java b/test/unit/org/apache/cassandra/tools/nodetool/NetStatsTest.java index 7bddc9b23ae7..9664c9071461 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/NetStatsTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/NetStatsTest.java @@ -40,6 +40,7 @@ import org.apache.cassandra.tools.ToolRunner; import org.apache.cassandra.utils.FBUtilities; +import static java.util.Collections.emptyList; import static org.apache.cassandra.net.Verb.ECHO_REQ; import static org.assertj.core.api.Assertions.assertThat; @@ -111,7 +112,7 @@ public void testNetStats() @Test public void testHumanReadable() throws IOException { - List streamSummaries = Collections.singletonList(new StreamSummary(TableId.generate(), 1, 1024)); + List streamSummaries = Collections.singletonList(new StreamSummary(TableId.generate(), emptyList(), 1, 1024)); SessionInfo info = new SessionInfo(InetAddressAndPort.getLocalHost(), 1, InetAddressAndPort.getLocalHost(), diff --git a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java index 7f08d6ccf29a..51e6c9fb4c71 100644 --- a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java +++ b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java @@ -233,7 +233,7 @@ public void testHugeBFSerialization() throws IOException @Test public void testMurmur3FilterHash() { - IPartitioner partitioner = new Murmur3Partitioner(); + IPartitioner partitioner = Murmur3Partitioner.instance; Iterator gen = new KeyGenerator.RandomStringGenerator(new Random().nextInt(), FilterTestHelper.ELEMENTS); long[] expected = new long[2]; long[] actual = new long[2]; diff --git a/test/unit/org/apache/cassandra/utils/SerializationsTest.java b/test/unit/org/apache/cassandra/utils/SerializationsTest.java index e23bb3883247..0fc38209e598 100644 --- a/test/unit/org/apache/cassandra/utils/SerializationsTest.java +++ b/test/unit/org/apache/cassandra/utils/SerializationsTest.java @@ -118,7 +118,7 @@ public void testBloomFilterTable() throws Exception private void testBloomFilterTable(String file, boolean oldBfFormat) throws Exception { - Murmur3Partitioner partitioner = new Murmur3Partitioner(); + Murmur3Partitioner partitioner = Murmur3Partitioner.instance; try (FileInputStreamPlus in = new File(file).newInputStream(); IFilter filter = BloomFilterSerializer.forVersion(oldBfFormat).deserialize(in)) From 4c575a7e4847de174723902fa2eb2e23dfcd192a Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 13 Nov 2023 17:41:38 -0500 Subject: [PATCH 080/340] Fix Paxos V2 prepare response serialization Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-19023 --- .../cassandra/repair/CassandraRepairJob.java | 3 --- .../cassandra/service/paxos/PaxosPrepare.java | 24 ++++++------------- .../disk/v1/InvertedIndexSearcherTest.java | 13 ++-------- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java index 95a55cd4d5d4..e2d373c58739 100644 --- a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java +++ b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java @@ -47,7 +47,6 @@ import org.apache.cassandra.repair.asymmetric.HostDifferences; import org.apache.cassandra.repair.asymmetric.PreferedNodeFilter; import org.apache.cassandra.repair.asymmetric.ReduceHelper; -import org.apache.cassandra.repair.state.JobState; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableMetadata; @@ -76,7 +75,6 @@ public class CassandraRepairJob extends AbstractRepairJob private static final Logger logger = LoggerFactory.getLogger(CassandraRepairJob.class); private final SharedContext ctx; - public final JobState state; private final RepairJobDesc desc; private final RepairSession session; private final RepairParallelism parallelismDegree; @@ -101,7 +99,6 @@ public CassandraRepairJob(RepairSession session, String columnFamily) this.taskExecutor = session.taskExecutor; this.parallelismDegree = session.parallelismDegree; this.desc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, columnFamily, session.state.commonRange.ranges); - this.state = new JobState(ctx.clock(), desc, session.state.commonRange.endpoints); } public long getNowInSeconds() diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java index eb11bf84acb5..1cd7da413c71 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java @@ -1258,23 +1258,15 @@ public Request deserialize(DataInputPlus in, int version) throws IOException } } - private static void serializeRejection(DataOutputPlus out, Ballot supersededBy, ConsensusMigratedAt maybeConsenusMigratedAt, int version) throws IOException - { - out.writeByte(0); - supersededBy.serialize(out); - if (version >= MessagingService.VERSION_51) - ConsensusMigratedAt.serializer.serialize(maybeConsenusMigratedAt, out, version); - } - public static class ResponseSerializer implements IVersionedSerializer { public void serialize(Response response, DataOutputPlus out, int version) throws IOException { if (response.isRejected()) { - out.writeByte(0); Rejected rejected = (Rejected) response; - serializeRejection(out, rejected.supersededBy, rejected.maybeConsenusMigratedAt, version); + out.writeByte(0); + rejected.supersededBy.serialize(out); } else { @@ -1296,9 +1288,9 @@ public void serialize(Response response, DataOutputPlus out, int version) throws Epoch.messageSerializer.serialize(promised.electorateEpoch, out, version); if (promised.outcome == PERMIT_READ) promised.supersededBy.serialize(out); - if (version >= MessagingService.VERSION_51) - ConsensusMigratedAt.serializer.serialize(response.maybeConsenusMigratedAt, out, version); } + if (version >= MessagingService.VERSION_51) + ConsensusMigratedAt.serializer.serialize(response.maybeConsenusMigratedAt, out, version); } public Response deserialize(DataInputPlus in, int version) throws IOException @@ -1334,17 +1326,15 @@ public Response deserialize(DataInputPlus in, int version) throws IOException public long serializedSize(Response response, int version) { - long size; + long size = 1; //flags if (response.isRejected()) { - size = 1 + Ballot.sizeInBytes(); - + size += Ballot.sizeInBytes(); } else { Permitted permitted = (Permitted) response; - return 1 - + VIntCoding.computeUnsignedVIntSize(permitted.lowBound) + size += VIntCoding.computeUnsignedVIntSize(permitted.lowBound) + (permitted.latestAcceptedButNotCommitted == null ? 0 : Accepted.serializer.serializedSize(permitted.latestAcceptedButNotCommitted, version)) + Committed.serializer.serializedSize(permitted.latestCommitted, version) + (permitted.readResponse == null ? 0 : ReadResponse.serializer.serializedSize(permitted.readResponse, version)) diff --git a/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java b/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java index 93045c80d6d1..e0385a157d5a 100644 --- a/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java +++ b/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.List; -import org.junit.BeforeClass; import org.junit.Test; import com.carrotsearch.hppc.LongArrayList; @@ -34,18 +33,17 @@ import org.apache.cassandra.index.sai.QueryContext; import org.apache.cassandra.index.sai.SAITester; import org.apache.cassandra.index.sai.StorageAttachedIndex; -import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; -import org.apache.cassandra.index.sai.memory.MemtableTermsIterator; import org.apache.cassandra.index.sai.disk.PrimaryKeyMap; import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; import org.apache.cassandra.index.sai.disk.v1.segment.IndexSegmentSearcher; import org.apache.cassandra.index.sai.disk.v1.segment.LiteralIndexSegmentSearcher; import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata; import org.apache.cassandra.index.sai.disk.v1.trie.LiteralIndexWriter; +import org.apache.cassandra.index.sai.iterators.KeyRangeIterator; +import org.apache.cassandra.index.sai.memory.MemtableTermsIterator; import org.apache.cassandra.index.sai.plan.Expression; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.index.sai.utils.SAIRandomizedTester; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; @@ -92,13 +90,6 @@ public long floor(Token token) }; public static final PrimaryKeyMap.Factory TEST_PRIMARY_KEY_MAP_FACTORY = () -> TEST_PRIMARY_KEY_MAP; - @BeforeClass - public static void setupCQLTester() - { - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); - } - @Test public void testEqQueriesAgainstStringIndex() throws Exception { From 0e899532dcb00e63e4b9043ff7772f7456f13901 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Fri, 17 Nov 2023 16:01:47 +0000 Subject: [PATCH 081/340] Quick fix for AccordCommandStoreTest#commandLoadSave() --- .../cassandra/service/accord/AccordCommandStoreTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 9ee1efe9c04d..e3cb042cb109 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -111,6 +111,7 @@ public void commandLoadSave() throws Throwable CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); PartialTxn txn = createPartialTxn(0); Route route = RoutingKeys.of(key.toUnseekable()).toRoute(key.toUnseekable()); + attrs.partialTxn(txn); attrs.route(route); attrs.durability(Majority); Ballot promised = ballot(1, clock.incrementAndGet(), 1); @@ -139,7 +140,7 @@ public void commandLoadSave() throws Throwable depTxn.keys(), executeAt, dependencies, - null, + txn, result.left, Result.APPLIED); commandStore.appendToJournal(apply); From 050688228fd046556ea2bd4c526b42b84516b96a Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Mon, 9 Oct 2023 15:13:54 +0100 Subject: [PATCH 082/340] Improve validation and address various discovered faults patch by Benedict; reviewed by Ariel Weisberg for CASSANDRA-19045 --- .gitmodules | 2 +- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 4 +- .../service/accord/AccordCommandStore.java | 5 +- .../service/accord/AccordMessageSink.java | 2 +- .../accord/AccordSafeCommandStore.java | 16 +-- .../service/accord/api/AccordAgent.java | 6 + .../accord/interop/AccordInteropRead.java | 3 +- .../interop/AccordInteropReadRepair.java | 3 +- .../accord/serializers/ApplySerializers.java | 2 +- .../serializers/CheckStatusSerializers.java | 104 ++++++++++++++---- .../serializers/CommandSerializers.java | 25 ++++- .../serializers/CommandStoreSerializers.java | 14 ++- .../accord/serializers/FetchSerializers.java | 26 +++-- .../serializers/RecoverySerializers.java | 4 +- .../service/accord/txn/TxnWrite.java | 2 +- .../test/accord/AccordBootstrapTest.java | 8 +- .../cassandra/simulator/RandomSource.java | 50 ++++++++- .../CompactionAccordIteratorsTest.java | 2 +- .../apache/cassandra/repair/FuzzTestBase.java | 11 +- .../accord/AccordCommandStoreTest.java | 10 +- .../accord/AccordSyncPropagatorTest.java | 3 +- .../service/accord/AccordTestUtils.java | 14 ++- .../service/accord/CommandsForRangesTest.java | 3 +- .../cassandra/utils/AccordGenerators.java | 3 + 25 files changed, 240 insertions(+), 84 deletions(-) diff --git a/.gitmodules b/.gitmodules index 6e0094316221..616dacf610a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = https://github.com/apache/cassandra-accord + url = https://github.com/apache/cassandra-accord.git branch = trunk diff --git a/modules/accord b/modules/accord index 6c6872270e16..746dabe0b43b 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 6c6872270e16d2e777f1fa2c510b8f15396be3f3 +Subproject commit 746dabe0b43bf719badbd605e68a76037d01256d diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index ccefefbb8b95..dbf0d0e2427f 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -880,7 +880,7 @@ protected Row applyToStatic(Row row) if (redundantBeforeEntry == null) return row; - TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; + TxnId redundantBeforeTxnId = redundantBeforeEntry.shardRedundantBefore(); Cell lastExecuteMicrosCell = row.getCell(last_executed_micros); Long last_execute_micros = null; @@ -937,7 +937,7 @@ protected Row applyToRow(Row row) if (redundantBeforeEntry == null) return row; - TxnId redundantBeforeTxnId = redundantBeforeEntry.redundantBefore; + TxnId redundantBeforeTxnId = redundantBeforeEntry.shardRedundantBefore(); Timestamp timestamp = CommandsForKeyRows.getTimestamp(row); if (timestamp != null && timestamp.compareTo(redundantBeforeTxnId) < 0) return null; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 696cfe227c10..fe9c397358dc 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -452,7 +452,7 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); switch (keysOrRanges.domain()) @@ -552,6 +552,9 @@ public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ran commandsForRanges.prune(globalSyncId, ranges); } + public NavigableMap bootstrapBeganAt() { return super.bootstrapBeganAt(); } + public NavigableMap safeToRead() { return super.safeToRead(); } + MessageProvider makeMessageProvider(TxnId txnId) { return journal.makeMessageProvider(txnId); diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 6c9622cffd0b..fd9880d3e273 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -139,7 +139,7 @@ private VerbMapping() builder.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); builder.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); builder.put(MessageType.WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_WAIT_UNTIL_APPLIED_REQ); - builder.put(MessageType.APPLY_AND_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ); + builder.put(MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ); builder.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); builder.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); builder.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 4fb9bc203cfd..122ae52a13ba 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -51,6 +51,7 @@ import accord.primitives.Seekable; import accord.primitives.Seekables; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; @@ -201,7 +202,7 @@ public void erase(SafeCommand safeCommand) { } - private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminate); if (terminate.test(accumulate)) @@ -209,7 +210,7 @@ private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) { switch (keysOrRanges.domain()) { @@ -252,14 +253,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio } @Override - public T mapReduce(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, T terminalValue) - { - Predicate terminate = Predicates.equalTo(terminalValue); - return mapReduceWithTerminate(keysOrRanges, slice, testKind, testTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, accumulate, terminate); - } - - @Override - public T mapReduceWithTerminate(Seekables keysOrRanges, Ranges slice, TestKind testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, T accumulate, Predicate terminate) { + public T mapReduce(Seekables keysOrRanges, Ranges slice, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) { accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { CommandTimeseries timeseries; switch (testTimestamp) @@ -285,7 +279,7 @@ public T mapReduceWithTerminate(Seekables keysOrRanges, Ranges slice, case MAY_EXECUTE_BEFORE: remapTestTimestamp = CommandTimeseries.TestTimestamp.BEFORE; } - return timeseries.mapReduceWithTerminate(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, prev, terminate); + return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, p1, prev, terminate); }, accumulate, terminate); return accumulate; diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index f6a57247670e..f24b30fa994f 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -94,6 +94,12 @@ public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull Times } } + @Override + public void onStale(Timestamp staleSince, Ranges ranges) + { + // TODO (required): decide how to handle this - maybe do nothing besides log? Maybe configurably try some number of repair attempts to catch up. + } + @Override public void onUncaughtException(Throwable t) { diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java index 97ace2556605..0caceb6fb3e9 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java @@ -161,8 +161,9 @@ public AccordInteropRead(TxnId txnId, Participants readScope, long executeAtE } @Override - protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn) + protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) { + // TODO (required): subtract unavailable ranges, either from read or from response (or on coordinator) return AsyncChains.ofCallable(Stage.READ.executor(), () -> new LocalReadData(ReadCommandVerbHandler.instance.doRead(command, false))); } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java index c16c99e33ed5..00aeb0f24454 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java @@ -134,8 +134,9 @@ public AccordInteropReadRepair(TxnId txnId, Participants readScope, long exec } @Override - protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn) + protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) { + // TODO (required): subtract unavailable ranges, either from read or from response (or on coordinator) return AsyncChains.ofCallable(Verb.READ_REPAIR_REQ.stage.executor(), () -> { ReadRepairVerbHandler.instance.applyMutation(mutation); return Data.NOOP_DATA; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 4decbdf1f8fc..c273e76be527 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -63,7 +63,7 @@ public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout DepsSerializer.partialDeps.deserialize(in, version), CommandSerializers.nullablePartialTxn.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), - Result.APPLIED); + CommandSerializers.APPLIED); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 734186ea0be8..0d15b74f5254 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -22,18 +22,20 @@ import accord.api.Result; import accord.api.RoutingKey; +import accord.coordinate.Infer; import accord.local.SaveStatus; -import accord.local.Status; import accord.local.Status.Durability; +import accord.local.Status.Known; import accord.messages.CheckStatus; import accord.messages.CheckStatus.CheckStatusNack; import accord.messages.CheckStatus.CheckStatusOk; import accord.messages.CheckStatus.CheckStatusOkFull; import accord.messages.CheckStatus.CheckStatusReply; +import accord.messages.CheckStatus.FoundKnown; +import accord.messages.CheckStatus.FoundKnownMap; import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; -import accord.primitives.Ranges; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -48,7 +50,74 @@ public class CheckStatusSerializers { - public static final IVersionedSerializer request = new IVersionedSerializer() + public static final IVersionedSerializer foundKnown = new IVersionedSerializer<>() + { + @Override + public void serialize(FoundKnown known, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.known.serialize(known, out, version); + CommandSerializers.invalidIfNot.serialize(known.invalidIfNot, out, version); + CommandSerializers.isPreempted.serialize(known.isPreempted, out, version); + } + + @Override + public FoundKnown deserialize(DataInputPlus in, int version) throws IOException + { + Known known = CommandSerializers.known.deserialize(in, version); + Infer.InvalidIfNot invalidIfNot = CommandSerializers.invalidIfNot.deserialize(in, version); + Infer.IsPreempted isPreempted = CommandSerializers.isPreempted.deserialize(in, version); + return new FoundKnown(known, invalidIfNot, isPreempted); + } + + @Override + public long serializedSize(FoundKnown known, int version) + { + return CommandSerializers.known.serializedSize(known, version) + + CommandSerializers.invalidIfNot.serializedSize(known.invalidIfNot, version) + + CommandSerializers.isPreempted.serializedSize(known.isPreempted, version); + } + }; + + public static final IVersionedSerializer foundKnownMap = new IVersionedSerializer<>() + { + @Override + public void serialize(FoundKnownMap knownMap, DataOutputPlus out, int version) throws IOException + { + int size = knownMap.size(); + out.writeUnsignedVInt32(size); + for (int i = 0 ; i <= size ; ++i) + KeySerializers.routingKey.serialize(knownMap.startAt(i), out, version); + for (int i = 0 ; i < size ; ++i) + foundKnown.serialize(knownMap.valueAt(i), out, version); + } + + @Override + public FoundKnownMap deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readUnsignedVInt32(); + RoutingKey[] starts = new RoutingKey[size + 1]; + for (int i = 0 ; i <= size ; ++i) + starts[i] = KeySerializers.routingKey.deserialize(in, version); + FoundKnown[] values = new FoundKnown[size]; + for (int i = 0 ; i < size ; ++i) + values[i] = foundKnown.deserialize(in, version); + return FoundKnownMap.SerializerSupport.create(true, starts, values); + } + + @Override + public long serializedSize(FoundKnownMap knownMap, int version) + { + int size = knownMap.size(); + long result = TypeSizes.sizeofUnsignedVInt(size); + for (int i = 0 ; i <= size ; ++i) + result += KeySerializers.routingKey.serializedSize(knownMap.startAt(i), version); + for (int i = 0 ; i < size ; ++i) + result += foundKnown.serializedSize(knownMap.valueAt(i), version); + return result; + } + }; + + public static final IVersionedSerializer request = new IVersionedSerializer<>() { final CheckStatus.IncludeInfo[] infos = CheckStatus.IncludeInfo.values(); @@ -81,7 +150,7 @@ public long serializedSize(CheckStatus check, int version) } }; - public static final IVersionedSerializer reply = new IVersionedSerializer() + public static final IVersionedSerializer reply = new IVersionedSerializer<>() { private static final byte OK = 0x00; private static final byte FULL = 0x01; @@ -98,9 +167,8 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CheckStatusOk ok = (CheckStatusOk) reply; out.write(reply instanceof CheckStatusOkFull ? FULL : OK); - KeySerializers.ranges.serialize(ok.truncated, out, version); - CommandSerializers.status.serialize(ok.invalidIfNotAtLeast, out, version); - CommandSerializers.saveStatus.serialize(ok.saveStatus, out, version); + foundKnownMap.serialize(ok.map, out, version); + CommandSerializers.saveStatus.serialize(ok.maxKnowledgeSaveStatus, out, version); CommandSerializers.saveStatus.serialize(ok.maxSaveStatus, out, version); CommandSerializers.ballot.serialize(ok.promised, out, version); CommandSerializers.ballot.serialize(ok.accepted, out, version); @@ -130,9 +198,8 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce return CheckStatusNack.NotOwned; case OK: case FULL: - Ranges truncated = KeySerializers.ranges.deserialize(in, version); - Status invalidIfNotAtLeast = CommandSerializers.status.deserialize(in, version); - SaveStatus status = CommandSerializers.saveStatus.deserialize(in, version); + FoundKnownMap map = foundKnownMap.deserialize(in, version); + SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); Ballot promised = CommandSerializers.ballot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); @@ -143,7 +210,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); if (kind == OK) - return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, + return createOk(map, maxKnowledgeStatus, maxStatus, promised, accepted, executeAt, isCoordinating, durability, route, homeKey); PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); @@ -151,13 +218,11 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); Result result = null; - if (status == SaveStatus.PreApplied || status == SaveStatus.Applied - || status == SaveStatus.TruncatedApply || status == SaveStatus.TruncatedApplyWithOutcome || status == SaveStatus.TruncatedApplyWithDeps) - result = Result.APPLIED; - else if (status == SaveStatus.Invalidated) - result = Result.INVALIDATED; + if (maxKnowledgeStatus == SaveStatus.PreApplied || maxKnowledgeStatus == SaveStatus.Applied + || maxKnowledgeStatus == SaveStatus.TruncatedApply || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithOutcome || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithDeps) + result = CommandSerializers.APPLIED; - return createOk(truncated, invalidIfNotAtLeast, status, maxStatus, promised, accepted, executeAt, + return createOk(map, maxKnowledgeStatus, maxStatus, promised, accepted, executeAt, isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); } } @@ -170,9 +235,8 @@ public long serializedSize(CheckStatusReply reply, int version) return size; CheckStatusOk ok = (CheckStatusOk) reply; - size += KeySerializers.ranges.serializedSize(ok.truncated, version); - size += CommandSerializers.status.serializedSize(ok.invalidIfNotAtLeast, version); - size += CommandSerializers.saveStatus.serializedSize(ok.saveStatus, version); + size += foundKnownMap.serializedSize(ok.map, version); + size += CommandSerializers.saveStatus.serializedSize(ok.maxKnowledgeSaveStatus, version); size += CommandSerializers.saveStatus.serializedSize(ok.maxSaveStatus, version); size += CommandSerializers.ballot.serializedSize(ok.promised, version); size += CommandSerializers.ballot.serializedSize(ok.accepted, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 0c15515206a0..46232a811e90 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -24,7 +24,9 @@ import accord.api.Query; import accord.api.Read; +import accord.api.Result; import accord.api.Update; +import accord.coordinate.Infer; import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; @@ -32,6 +34,7 @@ import accord.local.Status.Known; import accord.primitives.Ballot; import accord.primitives.PartialTxn; +import accord.primitives.ProgressToken; import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -54,6 +57,16 @@ public class CommandSerializers { private CommandSerializers() {} + // TODO (expected): this is meant to encode e.g. whether the transaction's condition met or not + public static final Result APPLIED = new Result() + { + @Override + public ProgressToken asProgressToken() + { + return ProgressToken.APPLIED; + } + }; + public static final TimestampSerializer txnId = new TimestampSerializer<>(TxnId::fromBits); public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); public static final IVersionedSerializer nullableTimestamp = NullableSerializer.wrap(timestamp); @@ -228,16 +241,20 @@ public long serializedSize(Writes writes, int version) public static final IVersionedSerializer nullableWrites = NullableSerializer.wrap(writes); + public static final EnumSerializer route = new EnumSerializer<>(Status.KnownRoute.class); public static final EnumSerializer definition = new EnumSerializer<>(Status.Definition.class); public static final EnumSerializer knownExecuteAt = new EnumSerializer<>(Status.KnownExecuteAt.class); public static final EnumSerializer knownDeps = new EnumSerializer<>(Status.KnownDeps.class); public static final EnumSerializer outcome = new EnumSerializer<>(Status.Outcome.class); + public static final EnumSerializer invalidIfNot = new EnumSerializer<>(Infer.InvalidIfNot.class); + public static final EnumSerializer isPreempted = new EnumSerializer<>(Infer.IsPreempted.class); - public static final IVersionedSerializer known = new IVersionedSerializer() + public static final IVersionedSerializer known = new IVersionedSerializer<>() { @Override public void serialize(Known known, DataOutputPlus out, int version) throws IOException { + route.serialize(known.route, out, version); definition.serialize(known.definition, out, version); knownExecuteAt.serialize(known.executeAt, out, version); knownDeps.serialize(known.deps, out, version); @@ -247,7 +264,8 @@ public void serialize(Known known, DataOutputPlus out, int version) throws IOExc @Override public Known deserialize(DataInputPlus in, int version) throws IOException { - return new Known(definition.deserialize(in, version), + return new Known(route.deserialize(in, version), + definition.deserialize(in, version), knownExecuteAt.deserialize(in, version), knownDeps.deserialize(in, version), outcome.deserialize(in, version)); @@ -256,7 +274,8 @@ public Known deserialize(DataInputPlus in, int version) throws IOException @Override public long serializedSize(Known known, int version) { - return definition.serializedSize(known.definition, version) + return route.serializedSize(known.route, version) + + definition.serializedSize(known.definition, version) + knownExecuteAt.serializedSize(known.executeAt, version) + knownDeps.serializedSize(known.deps, version) + outcome.serializedSize(known.outcome, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java index 1dd60db79677..34dfced93df6 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -139,8 +139,10 @@ public void serialize(RedundantBefore.Entry t, DataOutputPlus out, int version) out.writeUnsignedVInt(t.startEpoch); if (t.endEpoch == Long.MAX_VALUE) out.writeUnsignedVInt(0L); else out.writeUnsignedVInt(1 + t.endEpoch - t.startEpoch); - CommandSerializers.txnId.serialize(t.redundantBefore, out, version); + CommandSerializers.txnId.serialize(t.locallyAppliedOrInvalidatedBefore, out, version); + CommandSerializers.txnId.serialize(t.shardAppliedOrInvalidatedBefore, out, version); CommandSerializers.txnId.serialize(t.bootstrappedAt, out, version); + CommandSerializers.nullableTimestamp.serialize(t.staleUntilAtLeast, out, version); } @Override @@ -151,9 +153,11 @@ public RedundantBefore.Entry deserialize(DataInputPlus in, int version) throws I long endEpoch = in.readUnsignedVInt(); if (endEpoch == 0) endEpoch = Long.MAX_VALUE; else endEpoch = startEpoch + 1 + endEpoch; - TxnId redundantBefore = CommandSerializers.txnId.deserialize(in, version); TxnId bootstrappedAt = CommandSerializers.txnId.deserialize(in, version); - return new RedundantBefore.Entry(range, startEpoch, endEpoch, redundantBefore, bootstrappedAt); + TxnId locallyAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId shardAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + Timestamp staleUntilAtLeast = CommandSerializers.nullableTimestamp.deserialize(in, version); + return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast); } @Override @@ -162,8 +166,10 @@ public long serializedSize(RedundantBefore.Entry t, int version) long size = TokenRange.serializer.serializedSize((TokenRange) t.range, version); size += TypeSizes.sizeofUnsignedVInt(t.startEpoch); size += TypeSizes.sizeofUnsignedVInt(t.endEpoch == Long.MAX_VALUE ? 0 : 1 + t.endEpoch - t.startEpoch); - size += CommandSerializers.txnId.serializedSize(t.redundantBefore, version); + size += CommandSerializers.txnId.serializedSize(t.locallyAppliedOrInvalidatedBefore, version); + size += CommandSerializers.txnId.serializedSize(t.shardAppliedOrInvalidatedBefore, version); size += CommandSerializers.txnId.serializedSize(t.bootstrappedAt, version); + size += CommandSerializers.nullableTimestamp.serializedSize(t.staleUntilAtLeast, version); return size; } }), RedundantBefore.Entry[]::new, RedundantBefore.SerializerSupport::create); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 4b184b49fc58..0bdae00bb5a5 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -28,6 +28,7 @@ import accord.local.SaveStatus; import accord.local.Status.Durability; import accord.local.Status.Known; +import accord.messages.CheckStatus; import accord.messages.Propagate; import accord.messages.ReadData; import accord.messages.ReadData.ReadReply; @@ -145,16 +146,18 @@ public void serialize(Propagate p, DataOutputPlus out, int version) throws IOExc { CommandSerializers.txnId.serialize(p.txnId, out, version); KeySerializers.route.serialize(p.route, out, version); - CommandSerializers.saveStatus.serialize(p.saveStatus, out, version); + CommandSerializers.saveStatus.serialize(p.maxKnowledgeSaveStatus, out, version); CommandSerializers.saveStatus.serialize(p.maxSaveStatus, out, version); CommandSerializers.durability.serialize(p.durability, out, version); KeySerializers.nullableRoutingKey.serialize(p.homeKey, out, version); KeySerializers.nullableRoutingKey.serialize(p.progressKey, out, version); CommandSerializers.known.serialize(p.achieved, out, version); + CheckStatusSerializers.foundKnownMap.serialize(p.known, out, version); + out.writeBoolean(p.isTruncated); CommandSerializers.nullablePartialTxn.serialize(p.partialTxn, out, version); - DepsSerializer.nullablePartialDeps.serialize(p.partialDeps, out, version); + DepsSerializer.nullablePartialDeps.serialize(p.committedDeps, out, version); out.writeLong(p.toEpoch); - CommandSerializers.nullableTimestamp.serialize(p.executeAt, out, version); + CommandSerializers.nullableTimestamp.serialize(p.committedExecuteAt, out, version); CommandSerializers.nullableWrites.serialize(p.writes, out, version); } @@ -169,6 +172,8 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); RoutingKey progressKey = KeySerializers.nullableRoutingKey.deserialize(in, version); Known achieved = CommandSerializers.known.deserialize(in, version); + CheckStatus.FoundKnownMap known = CheckStatusSerializers.foundKnownMap.deserialize(in, version); + boolean isTruncated = in.readBoolean(); PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); PartialDeps partialDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); long toEpoch = in.readLong(); @@ -184,14 +189,11 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException case TruncatedApply: case TruncatedApplyWithOutcome: case TruncatedApplyWithDeps: - result = Result.APPLIED; - break; - case Invalidated: - result = Result.INVALIDATED; + result = CommandSerializers.APPLIED; break; } - return Propagate.SerializerSupport.create(txnId, route, saveStatus, maxSaveStatus, durability, homeKey, progressKey, achieved, partialTxn, partialDeps, toEpoch, executeAt, writes, result); + return Propagate.SerializerSupport.create(txnId, route, saveStatus, maxSaveStatus, durability, homeKey, progressKey, achieved, known, isTruncated, partialTxn, partialDeps, toEpoch, executeAt, writes, result); } @Override @@ -199,16 +201,18 @@ public long serializedSize(Propagate p, int version) { return CommandSerializers.txnId.serializedSize(p.txnId, version) + KeySerializers.route.serializedSize(p.route, version) - + CommandSerializers.saveStatus.serializedSize(p.saveStatus, version) + + CommandSerializers.saveStatus.serializedSize(p.maxKnowledgeSaveStatus, version) + CommandSerializers.saveStatus.serializedSize(p.maxSaveStatus, version) + CommandSerializers.durability.serializedSize(p.durability, version) + KeySerializers.nullableRoutingKey.serializedSize(p.homeKey, version) + KeySerializers.nullableRoutingKey.serializedSize(p.progressKey, version) + CommandSerializers.known.serializedSize(p.achieved, version) + + CheckStatusSerializers.foundKnownMap.serializedSize(p.known, version) + + TypeSizes.BOOL_SIZE + CommandSerializers.nullablePartialTxn.serializedSize(p.partialTxn, version) - + DepsSerializer.nullablePartialDeps.serializedSize(p.partialDeps, version) + + DepsSerializer.nullablePartialDeps.serializedSize(p.committedDeps, version) + TypeSizes.sizeof(p.toEpoch) - + CommandSerializers.nullableTimestamp.serializedSize(p.executeAt, version) + + CommandSerializers.nullableTimestamp.serializedSize(p.committedExecuteAt, version) + CommandSerializers.nullableWrites.serializedSize(p.writes, version) ; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index 99e54b5b4552..adf60212cce6 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -129,9 +129,7 @@ public RecoverReply deserialize(DataInputPlus in, int version) throws IOExceptio Result result = null; if (status == Status.PreApplied || status == Status.Applied || status == Status.Truncated) - result = Result.APPLIED; - else if (status == Status.Invalidated) - result = Result.INVALIDATED; + result = CommandSerializers.APPLIED; return deserializeOk(id, status, diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 27406036d93d..3848908216e5 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -374,7 +374,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc AccordSafeCommandsForKey cfk = ((AccordSafeCommandStore) safeStore).commandsForKey((RoutableKey) key); - cfk.updateLastExecutionTimestamps(executeAt, true); + cfk.updateLastExecutionTimestamps(safeStore, executeAt, true); long timestamp = cfk.timestampMicrosFor(executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 377a9199403a..8dff79cd8698 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -28,7 +28,6 @@ import org.junit.Assert; import org.junit.Test; -import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.primitives.Timestamp; import accord.topology.TopologyManager; @@ -48,6 +47,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordConfigurationService; import org.apache.cassandra.service.accord.AccordConfigurationService.EpochSnapshot; import org.apache.cassandra.service.accord.AccordService; @@ -271,7 +271,7 @@ public void bootstrapTest() throws Throwable }); awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { - CommandStore commandStore = safeStore.commandStore(); + AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.bootstrapBeganAt().keySet())); Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.safeToRead().keySet())); // @@ -316,7 +316,7 @@ public void bootstrapTest() throws Throwable awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { if (safeStore.ranges().currentRanges().contains(partitionKey)) { - CommandStore commandStore = safeStore.commandStore(); + AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); Assert.assertFalse(commandStore.safeToRead().isEmpty()); @@ -458,7 +458,7 @@ public void moveTest() throws Throwable safeStore -> { if (!safeStore.ranges().allAt(preMove).contains(partitionKey)) { - CommandStore commandStore = safeStore.commandStore(); + AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); Assert.assertFalse(commandStore.safeToRead().isEmpty()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/RandomSource.java b/test/simulator/main/org/apache/cassandra/simulator/RandomSource.java index 14d7ad9b1d90..4e429e418fc6 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/RandomSource.java +++ b/test/simulator/main/org/apache/cassandra/simulator/RandomSource.java @@ -20,13 +20,17 @@ import java.lang.reflect.Array; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import java.util.function.IntSupplier; import java.util.function.LongSupplier; import java.util.stream.IntStream; import java.util.stream.LongStream; +import com.google.common.collect.Iterators; + import org.apache.cassandra.utils.Shared; import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @@ -46,11 +50,20 @@ private Choices(float[] cumulativeProbabilities, T[] options) } public T choose(RandomSource random) + { + return choose(random.uniformFloat()); + } + + public T choose(accord.utils.RandomSource random) + { + return choose(random.nextFloat()); + } + + private T choose(float choose) { if (options.length == 0) return null; - float choose = random.uniformFloat(); int i = Arrays.binarySearch(cumulativeProbabilities, choose); if (i < 0) i = -1 - i; @@ -131,6 +144,41 @@ public static Choices uniform(T ... options) Arrays.fill(nonCumulativeProbabilities, 1f / options.length); return new Choices<>(cumulativeProbabilities(nonCumulativeProbabilities), options); } + + public static T choose(RandomSource rs, Set set) + { + return choose(rs.uniform(0, set.size()), set); + } + + public static T choose(accord.utils.RandomSource rs, Set set) + { + return choose(rs.nextInt(set.size()), set); + } + + private static T choose(int i, Set set) + { + return Iterators.get(set.iterator(), i); + } + + public static T choose(RandomSource rs, List list) + { + return list.get(rs.uniform(0, list.size())); + } + + public static T choose(accord.utils.RandomSource rs, List list) + { + return list.get(rs.nextInt(list.size())); + } + + public static T choose(RandomSource rs, T ... array) + { + return array[rs.uniform(0, array.length)]; + } + + public static T choose(accord.utils.RandomSource rs, T ... array) + { + return array[rs.nextInt(array.length)]; + } } public static abstract class Abstract implements RandomSource diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 33472cf09f26..6d180f82ec70 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -350,7 +350,7 @@ Consumer> expectAccordCommandsNoChange() private static RedundantBefore redundantBefore(TxnId txnId) { Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); - return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, LT_TXN_ID); + return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, LT_TXN_ID); } enum DurableBeforeType diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 50716c80b92e..9eafb67d9b59 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -464,13 +464,12 @@ InetAddressAndPort pickParticipant(RandomSource rs, Cluster.Node coordinator, Re { if (repair.state.isComplete()) throw new IllegalStateException("Repair is completed! " + repair.state.getResult()); - List participaents = new ArrayList<>(repair.state.getNeighborsAndRanges().participants.size() + 1); - if (rs.nextBoolean()) participaents.add(coordinator.broadcastAddressAndPort()); - participaents.addAll(repair.state.getNeighborsAndRanges().participants); - participaents.sort(Comparator.naturalOrder()); + List participants = new ArrayList<>(repair.state.getNeighborsAndRanges().participants.size() + 1); + if (rs.nextBoolean()) participants.add(coordinator.broadcastAddressAndPort()); + participants.addAll(repair.state.getNeighborsAndRanges().participants); + participants.sort(Comparator.naturalOrder()); - InetAddressAndPort selected = rs.pick(participaents); - return selected; + return participants.get(rs.nextInt(participants.size())); } static void addMismatch(RandomSource rs, ColumnFamilyStore cfs, Validator validator) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index e3cb042cb109..928daf66960b 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -54,6 +54,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.Pair; @@ -126,9 +127,8 @@ public void commandLoadSave() throws Throwable attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); - Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, - waitingOn, result.left, Result.APPLIED); + waitingOn, result.left, CommandSerializers.APPLIED); AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); safeCommand.set(command); @@ -142,7 +142,7 @@ public void commandLoadSave() throws Throwable dependencies, txn, result.left, - Result.APPLIED); + CommandSerializers.APPLIED); commandStore.appendToJournal(apply); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); @@ -172,10 +172,10 @@ public void commandsForKeyLoadSave() cfk.initialize(CommandsForKeySerializer.loader); cfk.updateMax(maxTimestamp); - cfk.updateLastExecutionTimestamps(txnId1, true); + cfk.updateLastExecutionTimestamps(null, txnId1, true); Assert.assertEquals(txnId1.hlc(), cfk.timestampMicrosFor(txnId1, true)); - cfk.updateLastExecutionTimestamps(txnId2, true); + cfk.updateLastExecutionTimestamps(null, txnId2, true); Assert.assertEquals(txnId2.hlc(), cfk.timestampMicrosFor(txnId2, true)); Assert.assertEquals(txnId2, cfk.current().lastExecutedTimestamp()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index f3bb71707d49..bc694eee644a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -75,6 +75,7 @@ import org.assertj.core.api.Assertions; import static accord.utils.Property.qt; +import static org.apache.cassandra.simulator.RandomSource.Choices.choose; public class AccordSyncPropagatorTest { @@ -121,7 +122,7 @@ public void burnTest() { for (Range range : ranges) { - Cluster.Instace inst = cluster.node(rs.pick(nodes)); + Cluster.Instace inst = cluster.node(choose(rs, nodes)); scheduler.schedule(() -> { Ranges subrange = Ranges.of(range); inst.propagator.reportClosed(epoch, nodes, subrange); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 8a64409ccac6..64907c0209f6 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -53,6 +53,7 @@ import accord.local.SaveStatus.LocalExecution; import accord.primitives.Ballot; import accord.primitives.FullKeyRoute; +import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; @@ -108,6 +109,7 @@ public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp execute { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); attrs.partialTxn(txn); + attrs.route(route(txn)); return Command.SerializerSupport.preaccepted(attrs, executeAt, Ballot.ZERO); } @@ -115,9 +117,7 @@ public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId).partialDeps(PartialDeps.NONE); attrs.partialTxn(txn); - Seekable key = txn.keys().get(0); - RoutingKey routingKey = key.asKey().toUnseekable(); - attrs.route(new FullKeyRoute(routingKey, true, new RoutingKey[]{ routingKey})); + attrs.route(route(txn)); return Command.SerializerSupport.committed(attrs, SaveStatus.Committed, executeAt, @@ -125,6 +125,13 @@ public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt Ballot.ZERO, Command.WaitingOn.EMPTY); } + + private static FullRoute route(PartialTxn txn) + { + Seekable key = txn.keys().get(0); + RoutingKey routingKey = key.asKey().toUnseekable(); + return new FullKeyRoute(routingKey, true, new RoutingKey[]{ routingKey }); + } } public static CommandsForKey commandsForKey(Key key) @@ -171,6 +178,7 @@ public static void testLoad(ManualExecutor executor, AccordSafeState cfr() { // TODO (coverage): once all partitioners work with regard to splitting, then should test all - Gen partitionerGen = rs -> rs.pick(Murmur3Partitioner.instance, RandomPartitioner.instance); + Gen partitionerGen = rs -> choose(rs, Murmur3Partitioner.instance, RandomPartitioner.instance); Gen statusGen = Gens.enums().all(SaveStatus.class); return rs -> { IPartitioner partitioner = partitionerGen.next(rs); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index f42dcd230090..7017e986647a 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -26,6 +26,7 @@ import accord.local.Command; import accord.primitives.Deps; +import accord.primitives.FullRoute; import accord.primitives.KeyDeps; import accord.primitives.PartialTxn; import accord.primitives.Range; @@ -74,6 +75,8 @@ public static Gen commands() //TODO goes against fuzz testing, and also limits to a very specific table existing... // There is a branch that can generate random transactions, so maybe look into that? PartialTxn txn = createPartialTxn(0); + FullRoute route = txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null)); + return rs -> { TxnId id = ids.next(rs); Timestamp executeAt = id; From d07a36106db1376f98eb76aaba311f6021597052 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 27 Oct 2023 12:18:49 -0700 Subject: [PATCH 083/340] Reduce command deps Patch by Blake Eggleston; Reviewed by Benedict Elliott Smith for CASSANDRA-18784 --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 93 ++- .../service/accord/AccordCachingState.java | 54 +- .../service/accord/AccordCommandStore.java | 147 ++++- .../service/accord/AccordCommandsForKeys.java | 252 ++++++++ .../service/accord/AccordKeyspace.java | 553 +++++++++++------- .../service/accord/AccordMessageSink.java | 2 +- .../service/accord/AccordObjectSizes.java | 56 +- .../accord/AccordSafeCommandStore.java | 180 ++++-- .../accord/AccordSafeCommandsForKey.java | 24 +- .../AccordSafeCommandsForKeyUpdate.java | 122 ++++ .../accord/AccordSafeTimestampsForKey.java | 146 +++++ .../service/accord/AccordStateCache.java | 157 +++-- .../service/accord/CommandsForKeyUpdate.java | 101 ++++ .../service/accord/CommandsForRanges.java | 35 +- .../service/accord/async/AsyncLoader.java | 74 ++- .../service/accord/async/AsyncOperation.java | 23 +- .../accord/serializers/ApplySerializers.java | 27 +- .../serializers/CheckStatusSerializers.java | 1 + .../accord/serializers/CommitSerializers.java | 27 +- .../accord/serializers/FetchSerializers.java | 25 +- .../service/accord/txn/TxnWrite.java | 13 +- .../CompactionAccordIteratorsTest.java | 46 +- .../accord/AccordCachingStateTest.java | 7 +- .../accord/AccordCommandStoreTest.java | 194 +++++- .../service/accord/AccordCommandTest.java | 27 +- .../service/accord/AccordStateCacheTest.java | 34 +- .../service/accord/AccordTestUtils.java | 9 +- .../service/accord/async/AsyncLoaderTest.java | 185 ++++-- .../accord/async/AsyncOperationTest.java | 17 +- 30 files changed, 2054 insertions(+), 579 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java diff --git a/modules/accord b/modules/accord index 746dabe0b43b..d9ef555302f8 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 746dabe0b43bf719badbd605e68a76037d01256d +Subproject commit d9ef555302f8774ed03325ba22d38ee0b80130a8 diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index dbf0d0e2427f..ae6f68294a49 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -80,7 +80,8 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; -import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; +import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyAccessor; +import org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.api.PartitionKey; @@ -97,11 +98,11 @@ import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.maybeDropTruncatedCommandColumns; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_executed_micros; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_executed_timestamp; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.last_write_timestamp; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyColumns.max_timestamp; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows.truncateStaticRow; +import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_executed_micros; +import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_executed_timestamp; +import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_write_timestamp; +import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.max_timestamp; +import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows.truncateTimestampsForKeyRow; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeDurabilityOrNull; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeRouteOrNull; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeSaveStatusOrNull; @@ -204,8 +205,11 @@ public CompactionIterator(OperationType type, ? new PaxosPurger() : isAccordCommands(controller.cfs) ? new AccordCommandsPurger(accordService) - : isAccordCommandsForKey(controller.cfs) ? new AccordCommandsForKeyPurger(accordService) - : new Purger(controller, nowInSec); + : isAccordDepsCommandsForKey(controller.cfs) + ? new AccordCommandsForKeyPurger(AccordKeyspace.DepsCommandsForKeysAccessor, accordService) + : isAccordAllCommandsForKey(controller.cfs) + ? new AccordCommandsForKeyPurger(AccordKeyspace.AllCommandsForKeysAccessor, accordService) + : new Purger(controller, nowInSec); merged = Transformation.apply(merged, purger); merged = DuplicateRowChecker.duringCompaction(merged, type); compacted = Transformation.apply(merged, new AbortableUnfilteredPartitionTransformation(this)); @@ -817,7 +821,9 @@ protected Row applyToRow(Row row) if (executeAt == null || durability == null || saveStatus == null || route == null) return row; - Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, durability, executeAt, route, redundantBefore, durableBefore, false); + Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, + durability, executeAt, route, + redundantBefore, durableBefore); switch (cleanup) { default: throw new AssertionError(String.format("Unexpected cleanup task: %s", cleanup)); @@ -840,6 +846,9 @@ protected Row applyToRow(Row row) } } + + + @Override protected Row applyToStatic(Row row) { @@ -848,26 +857,25 @@ protected Row applyToStatic(Row row) } } - class AccordCommandsForKeyPurger extends AbstractPurger + class AccordTimestampsForKeyPurger extends AbstractPurger { final Int2ObjectHashMap redundantBefores; int storeId; PartitionKey partitionKey; - AccordCommandsForKeyPurger(Supplier accordService) + AccordTimestampsForKeyPurger(Supplier accordService) { this.redundantBefores = accordService.get().getRedundantBeforesAndDurableBefore().left; } protected void beginPartition(UnfilteredRowIterator partition) { - ByteBuffer[] partitionKeyComponents = CommandsForKeyRows.splitPartitionKey(partition.partitionKey()); - storeId = CommandsForKeyRows.getStoreId(partitionKeyComponents); - partitionKey = CommandsForKeyRows.getKey(partitionKeyComponents); + ByteBuffer[] partitionKeyComponents = TimestampsForKeyRows.splitPartitionKey(partition.partitionKey()); + storeId = TimestampsForKeyRows.getStoreId(partitionKeyComponents); + partitionKey = TimestampsForKeyRows.getKey(partitionKeyComponents); } - @Override - protected Row applyToStatic(Row row) + protected Row applyToRow(Row row) { updateProgress(); @@ -920,7 +928,35 @@ protected Row applyToStatic(Row row) maxTimestampCell == null) return null; - return truncateStaticRow(nowInSec, row, lastExecuteMicrosCell, lastExecuteCell, lastWriteCell, maxTimestampCell); + return truncateTimestampsForKeyRow(nowInSec, row, lastExecuteMicrosCell, lastExecuteCell, lastWriteCell, maxTimestampCell); + } + + @Override + protected Row applyToStatic(Row row) + { + checkState(row.isStatic() && row.isEmpty()); + return row; + } + } + + class AccordCommandsForKeyPurger extends AbstractPurger + { + final CommandsForKeyAccessor accessor; + final Int2ObjectHashMap redundantBefores; + int storeId; + PartitionKey partitionKey; + + AccordCommandsForKeyPurger(CommandsForKeyAccessor accessor, Supplier accordService) + { + this.accessor = accessor; + this.redundantBefores = accordService.get().getRedundantBeforesAndDurableBefore().left; + } + + protected void beginPartition(UnfilteredRowIterator partition) + { + ByteBuffer[] partitionKeyComponents = accessor.splitPartitionKey(partition.partitionKey()); + storeId = accessor.getStoreId(partitionKeyComponents); + partitionKey = accessor.getKey(partitionKeyComponents); } @Override @@ -938,12 +974,19 @@ protected Row applyToRow(Row row) return row; TxnId redundantBeforeTxnId = redundantBeforeEntry.shardRedundantBefore(); - Timestamp timestamp = CommandsForKeyRows.getTimestamp(row); + Timestamp timestamp = accessor.getTimestamp(row); if (timestamp != null && timestamp.compareTo(redundantBeforeTxnId) < 0) return null; return row; } + + @Override + protected Row applyToStatic(Row row) + { + checkState(row.isStatic() && row.isEmpty()); + return row; + } } private static class AbortableUnfilteredPartitionTransformation extends Transformation @@ -991,8 +1034,18 @@ private static boolean isAccordCommands(ColumnFamilyStore cfs) return cfs.name.equals(AccordKeyspace.COMMANDS) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); } - private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs) + private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs, String name) + { + return cfs.name.equals(name) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + } + + private static boolean isAccordDepsCommandsForKey(ColumnFamilyStore cfs) + { + return isAccordCommandsForKey(cfs, AccordKeyspace.DEPS_COMMANDS_FOR_KEY); + } + + private static boolean isAccordAllCommandsForKey(ColumnFamilyStore cfs) { - return cfs.name.equals(AccordKeyspace.COMMANDS_FOR_KEY) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + return isAccordCommandsForKey(cfs, AccordKeyspace.ALL_COMMANDS_FOR_KEY); } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index d7bce189d2fe..43e0a50c7aa8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -22,11 +22,13 @@ import java.util.function.Function; import java.util.function.ToLongFunction; +import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; import accord.local.Command.TransientListener; import accord.local.Listeners; import accord.utils.IntrusiveLinkedListNode; +import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncResults.RunnableResult; import org.apache.cassandra.concurrent.ExecutorPlus; @@ -47,13 +49,24 @@ */ public class AccordCachingState extends IntrusiveLinkedListNode { - static final long EMPTY_SIZE = ObjectSizes.measure(new AccordCachingState<>(null, null)); + static final long EMPTY_SIZE = ObjectSizes.measure(new AccordCachingState<>(null, 0, null)); + + public interface Factory + { + AccordCachingState create(K key, int index); + } + + static Factory defaultFactory() + { + return AccordCachingState::new; + } private final K key; private State state; int references = 0; int lastQueriedEstimatedSizeOnHeap = 0; + final byte index; private boolean shouldUpdateSize; /** @@ -61,16 +74,20 @@ public class AccordCachingState extends IntrusiveLinkedListNode */ private Listeners transientListeners; - public AccordCachingState(K key) + AccordCachingState(K key, int index) { this.key = key; + Invariants.checkArgument(index >= 0 && index <= Byte.MAX_VALUE); + this.index = (byte) index; //noinspection unchecked this.state = (State) Uninitialized.instance; } - AccordCachingState(K key, State state) + private AccordCachingState(K key, int index, State state) { this.key = key; + Invariants.checkArgument(index >= 0 && index <= Byte.MAX_VALUE); + this.index = (byte) index; this.state = state; } @@ -104,6 +121,11 @@ public boolean isComplete() return status().isComplete(); } + public boolean canEvict() + { + return true; + } + int estimatedSizeOnHeap(ToLongFunction estimator) { shouldUpdateSize = false; @@ -179,7 +201,12 @@ public AsyncChain load(ExecutorPlus executor, Function loadFunction) return loading; } - private State state(State next) + public void initialize(V value) + { + state(state.initialize(value)); + } + + protected State state(State next) { State prev = state; if (prev != next) @@ -187,6 +214,12 @@ private State state(State next) return state = next; } + @VisibleForTesting + protected State state() + { + return state; + } + public AsyncChain loading() { // do *not* attempt to complete, to prevent races where the caller found a pending load, attempts @@ -209,7 +242,8 @@ public void set(V value) * Submits a save runnable to the specified executor. When the runnable * has completed, the state save will have either completed or failed. */ - void save(ExecutorPlus executor, BiFunction saveFunction) + @VisibleForTesting + public void save(ExecutorPlus executor, BiFunction saveFunction) { @SuppressWarnings("unchecked") State savingOrLoaded = state.save((BiFunction) saveFunction); @@ -300,6 +334,11 @@ default Loading load(K key, Function loadFunction) throw illegalState(this, "load(key, loadFunction)"); } + default Loaded initialize(V value) + { + throw illegalState(this, "initialize(value)"); + } + default RunnableResult loading() { throw illegalState(this, "loading()"); @@ -373,6 +412,11 @@ public Loading load(K key, Function loadFunction) return new Loading<>(() -> loadFunction.apply(key)); } + public Loaded initialize(V value) + { + return new Loaded<>(value); + } + @Override public Evicted evict() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index fe9c397358dc..e5f83ab6ff1f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -27,7 +27,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; -import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -43,8 +42,10 @@ import accord.api.DataStore; import accord.api.Key; import accord.api.ProgressLog; -import accord.impl.CommandTimeseriesHolder; import accord.impl.CommandsForKey; +import accord.impl.DomainCommands; +import accord.impl.DomainTimestamps; +import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; import accord.local.DurableBefore; @@ -69,6 +70,7 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.ReducingRangeMap; +import accord.utils.TriFunction; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.Observable; @@ -112,9 +114,13 @@ private static long getThreadId(ExecutorService executor) private final AccordJournal journal; private final ExecutorService executor; private final ExecutionOrder executionOrder; + private final AccordCommandsForKeys keyCoordinator; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; - private final AccordStateCache.Instance commandsForKeyCache; + private final AccordStateCache.Instance timestampsForKeyCache; + private final AccordStateCache.Instance depsCommandsForKeyCache; + private final AccordStateCache.Instance allCommandsForKeyCache; + private final AccordStateCache.Instance updatesForKeyCache; private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; @@ -147,26 +153,56 @@ public AccordCommandStore(int id, super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); this.journal = journal; loggingId = String.format("[%s]", id); + keyCoordinator = new AccordCommandsForKeys(this); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); executionOrder = new ExecutionOrder(); threadId = getThreadId(executor); stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20, cacheMetrics); commandCache = stateCache.instance(TxnId.class, - TxnId.class, + AccordSafeCommand.class, AccordSafeCommand::new, this::loadCommand, this::saveCommand, this::validateCommand, AccordObjectSizes::command); - commandsForKeyCache = + timestampsForKeyCache = stateCache.instance(RoutableKey.class, - PartitionKey.class, + AccordSafeTimestampsForKey.class, + AccordSafeTimestampsForKey::new, + this::loadTimestampsForKey, + this::saveTimestampsForKey, + this::validateTimestampsForKey, + AccordObjectSizes::timestampsForKey); + depsCommandsForKeyCache = + stateCache.instance(RoutableKey.class, + AccordSafeCommandsForKey.class, + AccordSafeCommandsForKey::new, + this::loadDepsCommandsForKey, + this::saveCommandsForKey, + this::validateDepsCommandsForKey, + AccordObjectSizes::commandsForKey, + keyCoordinator::createDepsCommandsNode); + allCommandsForKeyCache = + stateCache.instance(RoutableKey.class, + AccordSafeCommandsForKey.class, AccordSafeCommandsForKey::new, - this::loadCommandsForKey, + this::loadAllCommandsForKey, this::saveCommandsForKey, - this::validateCommandsForKey, - AccordObjectSizes::commandsForKey); + this::validateAllCommandsForKey, + AccordObjectSizes::commandsForKey, + keyCoordinator::createDepsCommandsNode); + updatesForKeyCache = + stateCache.instance(RoutableKey.class, + AccordSafeCommandsForKeyUpdate.class, + AccordSafeCommandsForKeyUpdate::new, + this::loadCommandsForKeyUpdate, + this::saveCommandsForKeyUpdate, + (key, evicting) -> true, + CommandsForKeyUpdate::estimatedSizeOnHeap, + keyCoordinator::createUpdatesNode); + +//>>>>>>> 701eeff2b4 (deps pruning integration) AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { if (rejectBefore != null) @@ -181,6 +217,7 @@ public AccordCommandStore(int id, super.setSafeToRead(safeToRead); }); })); + executor.execute(() -> CommandStore.register(this)); executor.execute(this::loadRangesToCommands); } @@ -299,9 +336,24 @@ public AccordStateCache.Instance commandCache return commandCache; } - public AccordStateCache.Instance commandsForKeyCache() + public AccordStateCache.Instance timestampsForKeyCache() + { + return timestampsForKeyCache; + } + + public AccordStateCache.Instance depsCommandsForKeyCache() + { + return depsCommandsForKeyCache; + } + + public AccordStateCache.Instance allCommandsForKeyCache() + { + return allCommandsForKeyCache; + } + + public AccordStateCache.Instance updatesForKeyCache() { - return commandsForKeyCache; + return updatesForKeyCache; } Command loadCommand(TxnId txnId) @@ -322,22 +374,63 @@ boolean validateCommand(TxnId txnId, Command evicting) return (evicting == null && reloaded == null) || (evicting != null && reloaded != null && reloaded.isEqualOrFuller(evicting)); } - CommandsForKey loadCommandsForKey(RoutableKey key) + boolean validateTimestampsForKey(RoutableKey key, TimestampsForKey evicting) + { + TimestampsForKey reloaded = AccordKeyspace.unsafeLoadTimestampsForKey(this, (PartitionKey) key); + return Objects.equals(evicting, reloaded); + + } + + TimestampsForKey loadTimestampsForKey(RoutableKey key) + { + return AccordKeyspace.loadTimestampsForKey(this, (PartitionKey) key); + } + + CommandsForKey loadDepsCommandsForKey(RoutableKey key) + { + return AccordKeyspace.loadDepsCommandsForKey(this, (PartitionKey) key); + } + + CommandsForKey loadAllCommandsForKey(RoutableKey key) + { + return AccordKeyspace.loadAllCommandsForKey(this, (PartitionKey) key); + } + + CommandsForKeyUpdate loadCommandsForKeyUpdate(RoutableKey key) + { + throw new IllegalStateException(); + } + + boolean validateDepsCommandsForKey(RoutableKey key, CommandsForKey evicting) + { + CommandsForKey reloaded = AccordKeyspace.loadDepsCommandsForKey(this, (PartitionKey) key); + return Objects.equals(evicting, reloaded); + } + + boolean validateAllCommandsForKey(RoutableKey key, CommandsForKey evicting) { - return AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); + CommandsForKey reloaded = AccordKeyspace.loadAllCommandsForKey(this, (PartitionKey) key); + return Objects.equals(evicting, reloaded); } @Nullable private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) { - Mutation mutation = AccordKeyspace.getCommandsForKeyMutation(id, before, after, nextSystemTimestampMicros()); + throw new IllegalStateException(); + } + + @Nullable + private Runnable saveTimestampsForKey(TimestampsForKey before, TimestampsForKey after) + { + Mutation mutation = AccordKeyspace.getTimestampsForKeyMutation(id, before, after, nextSystemTimestampMicros()); return null != mutation ? mutation::apply : null; } - boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) + @Nullable + private Runnable saveCommandsForKeyUpdate(CommandsForKeyUpdate before, CommandsForKeyUpdate after) { - CommandsForKey reloaded = AccordKeyspace.unsafeLoadCommandsForKey(this, (PartitionKey) key); - return Objects.equals(evicting, reloaded); + Mutation mutation = AccordKeyspace.getCommandsForKeyMutation(id, after, nextSystemTimestampMicros()); + return null != mutation ? mutation::apply : null; } @VisibleForTesting @@ -431,12 +524,16 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, - NavigableMap commandsForKeys) + NavigableMap timestampsForKeys, + NavigableMap depsCommandsForKeys, + NavigableMap allCommandsForKeys, + NavigableMap updatesForKeys) { Invariants.checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); - commandsForKeys.values().forEach(AccordSafeState::preExecute); - current = new AccordSafeCommandStore(preLoadContext, commands, commandsForKeys, this); + depsCommandsForKeys.values().forEach(AccordSafeState::preExecute); + timestampsForKeys.values().forEach(AccordSafeState::preExecute); + current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, depsCommandsForKeys, allCommandsForKeys, updatesForKeys, this); return current; } @@ -452,7 +549,7 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + O mapReduceForRange(Routables keysOrRanges, Ranges slice, TriFunction map, O accumulate, Predicate terminate) { keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); switch (keysOrRanges.domain()) @@ -460,9 +557,9 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction keys = (AbstractKeys) keysOrRanges; - for (CommandTimeseriesHolder summary : commandsForRanges.search(keys)) + for (CommandsForRanges.DomainInfo summary : commandsForRanges.search(keys)) { - accumulate = map.apply(summary, accumulate); + accumulate = map.apply(summary, summary, accumulate); if (terminate.test(accumulate)) return accumulate; } @@ -473,10 +570,10 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction createDepsCommandsNode(RoutableKey key, int index) + { + return new DepsCommandsCachingState(key, index); + } + + AccordCachingState createAllCommandsNode(RoutableKey key, int index) + { + return new AllCommandsCachingState(key, index); + } + + AccordCachingState createUpdatesNode(RoutableKey key, int index) + { + return new UpdateCachingState(key, index); + } + + protected static boolean hasEvictableStatus(AccordCachingState state) + { + if (state == null) + return true; + + switch (state.status()) + { + case LOADING: + case SAVING: + return false; + } + + return true; + } + + boolean canEvictKey(RoutableKey key) + { + return hasEvictableStatus(commandStore.depsCommandsForKeyCache().getUnsafe(key)) + && hasEvictableStatus(commandStore.allCommandsForKeyCache().getUnsafe(key)) + && hasEvictableStatus(commandStore.updatesForKeyCache().getUnsafe(key)); + } + + public abstract class CommandsCachingState extends AccordCachingState + { + protected CommandsCachingState(RoutableKey key, int index) + { + super(key, index); + } + + private CommandsForKey initializeIfNull(CommandsForKey commands) + { + if (commands != null) + return commands; + return new CommandsForKey((Key) key(), CommandsForKeySerializer.loader); + } + + private State maybeApplyUpdates(State state) + { + if (!(state instanceof Loaded)) + return state; + + Loaded loaded = (Loaded) state; + CommandsForKey commands = loaded.get(); + UpdateCachingState updates = (UpdateCachingState) commandStore.updatesForKeyCache().getUnsafe(key()); + if (updates == null) + return loaded; + + CommandsForKeyUpdate update = updates.getUpdateIfAvailable(); + if (update == null) + return loaded; + CommandsForKey updated = apply(initializeIfNull(commands), update); + if (updated == commands) + return loaded; + + return new Loaded<>(updated); + } + + protected abstract CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update); + + private void maybeApplyUpdates(CommandsForKeyUpdate update) + { + if (status() != Status.LOADED) + return; + + CommandsForKey commands = get(); + CommandsForKey updated = apply(initializeIfNull(commands), update); + if (commands != updated) + super.state(new Loaded<>(updated)); + } + + protected State state(State next) + { + Status nextStatus = next.status(); + Invariants.checkState(nextStatus != Status.MODIFIED && nextStatus != Status.SAVING, + "CommandsForKey cannot have state %s", nextStatus); + + return super.state(maybeApplyUpdates(next)); + } + + @Override + public boolean canEvict() + { + return canEvictKey(key()); + } + } + + public class DepsCommandsCachingState extends CommandsCachingState + { + public DepsCommandsCachingState(RoutableKey key, int index) + { + super(key, index); + } + + protected CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update) + { + return update.applyToDeps(current); + } + } + + public class AllCommandsCachingState extends CommandsCachingState + { + public AllCommandsCachingState(RoutableKey key, int index) + { + super(key, index); + } + + protected CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update) + { + return update.applyToAll(current); + } + } + + public class UpdateCachingState extends AccordCachingState implements CommandsForKeyGroupUpdater.Immutable.Factory + { + public UpdateCachingState(RoutableKey key, int index) + { + super(key, index); + } + + public AsyncChain load(ExecutorPlus executor, Function loadFunction) + { + if (status() == Status.UNINITIALIZED) + { + CommandsForKeyUpdate initialized = CommandsForKeyUpdate.empty(key()); + state(state().initialize(initialized)); + return null; + } + + return super.load(executor, loadFunction); + } + + // update in memory cfk data with the update results + protected void maybeUpdateCommands(CommandsForKeyUpdate update) + { + CommandsCachingState commands = (CommandsCachingState) commandStore.depsCommandsForKeyCache().getUnsafe(key()); + if (commands == null) + return; + + commands.maybeApplyUpdates(update); + } + + public CommandsForKeyUpdate create(CommandsForKeyUpdater.Immutable deps, CommandsForKeyUpdater.Immutable all, CommandsForKeyUpdater.Immutable common) + { + return new CommandsForKeyUpdate((PartitionKey) key(), deps, all, common); + } + + protected State maybeProcessModification(State next) + { + if (!(next instanceof Modified)) + return next; + + Modified modified = (Modified) next; + + CommandsForKeyUpdate current = modified.current; + maybeUpdateCommands(current); + + // combine in memory updates + current = CommandsForKeyGroupUpdater.Immutable.merge(modified.original, current, this); + + return new Modified<>(null, current); + } + + protected State state(State next) + { + Status nextStatus = next.status(); + Invariants.checkState(nextStatus != Status.LOADING, + "CommandsForKeyUpdate cannot have state %s", nextStatus); + + return super.state(maybeProcessModification(next)); + } + + CommandsForKeyUpdate getUpdateIfAvailable() + { + switch (status()) + { + case LOADED: + case MODIFIED: + return get(); + } + + return null; + } + + @Override + public boolean canEvict() + { + return canEvictKey(key()); + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index fc63c620495d..836baed379f6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -25,7 +25,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.Comparator; -import java.util.EnumMap; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -35,10 +34,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Iterables; @@ -47,8 +48,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.Key; import accord.impl.CommandTimeseries; import accord.impl.CommandsForKey; +import accord.impl.CommandsForKeyUpdater; +import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; import accord.local.CommandStore; @@ -150,8 +154,9 @@ import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.Throwables; +import org.apache.cassandra.utils.MonotonicClock; import org.apache.cassandra.utils.btree.BTree; +import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static accord.utils.Invariants.checkArgument; @@ -171,7 +176,9 @@ public class AccordKeyspace private static final Logger logger = LoggerFactory.getLogger(AccordKeyspace.class); public static final String COMMANDS = "commands"; - public static final String COMMANDS_FOR_KEY = "commands_for_key"; + public static final String TIMESTAMPS_FOR_KEY = "timestamps_for_key"; + public static final String DEPS_COMMANDS_FOR_KEY = "deps_commands_for_key"; + public static final String ALL_COMMANDS_FOR_KEY = "all_commands_for_key"; public static final String TOPOLOGIES = "topologies"; public static final String EPOCH_METADATA = "epoch_metadata"; public static final String COMMAND_STORE_METADATA = "command_store_metadata"; @@ -183,28 +190,6 @@ public class AccordKeyspace private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); - public enum SeriesKind - { - BY_ID(CommandsForKey::byId), - BY_EXECUTE_AT(CommandsForKey::byExecuteAt); - - private final Function> getSeries; - - SeriesKind(Function> getSeries) - { - this.getSeries = getSeries; - } - - ImmutableSortedMap getValues(CommandsForKey cfk) - { - if (cfk == null) - return ImmutableSortedMap.of(); - - CommandTimeseries series = getSeries.apply(cfk); - return (ImmutableSortedMap) series.commands; - } - } - private enum TokenType { Murmur3((byte) 1), @@ -421,81 +406,50 @@ public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCe } } - private static final TableMetadata CommandsForKeys = - parse(COMMANDS_FOR_KEY, - "accord commands per key", + private static final TableMetadata TimestampsForKeys = + parse(TIMESTAMPS_FOR_KEY, + "accord timestamps per key", "CREATE TABLE %s (" + "store_id int, " + "key_token blob, " // can't use "token" as this is restricted word in CQL + format("key %s, ", KEY_TUPLE) - + format("max_timestamp %s static, ", TIMESTAMP_TUPLE) - + format("last_executed_timestamp %s static, ", TIMESTAMP_TUPLE) - + "last_executed_micros bigint static, " - + format("last_write_timestamp %s static, ", TIMESTAMP_TUPLE) - + "series int, " - + format("timestamp %s, ", TIMESTAMP_TUPLE) - + "data blob, " - + "PRIMARY KEY((store_id, key_token, key), series, timestamp)" + + format("max_timestamp %s, ", TIMESTAMP_TUPLE) + + format("last_executed_timestamp %s, ", TIMESTAMP_TUPLE) + + "last_executed_micros bigint, " + + format("last_write_timestamp %s, ", TIMESTAMP_TUPLE) + + "PRIMARY KEY((store_id, key_token, key))" + ')') .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) .build(); - public static class CommandsForKeyColumns + public static class TimestampsForKeyColumns { - static final ClusteringComparator keyComparator = CommandsForKeys.partitionKeyAsClusteringComparator(); - static final CompositeType partitionKeyType = (CompositeType) CommandsForKeys.partitionKeyType; - static final ColumnFilter allColumns = ColumnFilter.all(CommandsForKeys); - static final ColumnMetadata store_id = getColumn(CommandsForKeys, "store_id"); - static final ColumnMetadata key_token = getColumn(CommandsForKeys, "key_token"); - static final ColumnMetadata key = getColumn(CommandsForKeys, "key"); - static final ColumnMetadata timestamp = getColumn(CommandsForKeys, "timestamp"); - public static final ColumnMetadata max_timestamp = getColumn(CommandsForKeys, "max_timestamp"); - public static final ColumnMetadata last_executed_timestamp = getColumn(CommandsForKeys, "last_executed_timestamp"); - public static final ColumnMetadata last_executed_micros = getColumn(CommandsForKeys, "last_executed_micros"); - public static final ColumnMetadata last_write_timestamp = getColumn(CommandsForKeys, "last_write_timestamp"); - - static final ColumnMetadata data = getColumn(CommandsForKeys, "data"); - - // Ordered by columnn name because it will be used to construct btree leaf arrays - static final ColumnMetadata[] static_columns_metadata = new ColumnMetadata[] { last_executed_micros, last_executed_timestamp, last_write_timestamp, max_timestamp }; - - static final Columns statics = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp)); - static final Columns regulars = Columns.from(Lists.newArrayList(data)); - private static final RegularAndStaticColumns all = new RegularAndStaticColumns(statics, regulars); - private static final RegularAndStaticColumns justStatic = new RegularAndStaticColumns(statics, Columns.NONE); - private static final RegularAndStaticColumns justRegular = new RegularAndStaticColumns(Columns.NONE, regulars); + static final ClusteringComparator keyComparator = TimestampsForKeys.partitionKeyAsClusteringComparator(); + static final CompositeType partitionKeyType = (CompositeType) TimestampsForKeys.partitionKeyType; + static final ColumnFilter allColumns = ColumnFilter.all(TimestampsForKeys); + static final ColumnMetadata store_id = getColumn(TimestampsForKeys, "store_id"); + static final ColumnMetadata key_token = getColumn(TimestampsForKeys, "key_token"); + static final ColumnMetadata key = getColumn(TimestampsForKeys, "key"); + public static final ColumnMetadata max_timestamp = getColumn(TimestampsForKeys, "max_timestamp"); + public static final ColumnMetadata last_executed_timestamp = getColumn(TimestampsForKeys, "last_executed_timestamp"); + public static final ColumnMetadata last_executed_micros = getColumn(TimestampsForKeys, "last_executed_micros"); + public static final ColumnMetadata last_write_timestamp = getColumn(TimestampsForKeys, "last_write_timestamp"); - static boolean hasStaticChanges(CommandsForKey original, CommandsForKey current) - { - return valueModified(CommandsForKey::max, original, current) - || valueModified(CommandsForKey::lastExecutedTimestamp, original, current) - || valueModified(CommandsForKey::lastWriteTimestamp, original, current) - || valueModified(CommandsForKey::rawLastExecutedHlc, original, current); - } + static final Columns columns = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp)); - private static boolean hasRegularChanges(CommandsForKey original, CommandsForKey current) + static ByteBuffer makePartitionKey(int storeId, Key key) { - return valueModified(CommandsForKey::byId, original, current) - || valueModified(CommandsForKey::byExecuteAt, original, current); + PartitionKey pk = (PartitionKey) key; + return keyComparator.make(storeId, serializeToken(pk.token()), serializeKey(pk)).serializeAsPartitionKey(); } - static RegularAndStaticColumns columnsFor(CommandsForKey original, CommandsForKey current) + static ByteBuffer makePartitionKey(int storeId, TimestampsForKey timestamps) { - boolean hasStaticChanges = hasStaticChanges(original, current); - boolean hasRegularChanges = hasRegularChanges(original, current); - - if (hasStaticChanges && hasRegularChanges) - return all; - else if (hasStaticChanges) - return justStatic; - else if (hasRegularChanges) - return justRegular; - else - throw new IllegalArgumentException("No Static or Regular columns changed for CFK " + current.key()); + return makePartitionKey(storeId, timestamps.key()); } } - public static class CommandsForKeyRows extends CommandsForKeyColumns + public static class TimestampsForKeyRows extends TimestampsForKeyColumns { public static ByteBuffer[] splitPartitionKey(DecoratedKey key) { @@ -507,6 +461,11 @@ public static int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } + public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) + { + return deserializeKey(partitionKeyComponents[key.position()]); + } + @Nullable public static Timestamp getMaxTimestamp(Row row) { @@ -533,17 +492,6 @@ public static long getLastExecutedMicros(Row row) return cell.accessor().getLong(cell.value(), 0); } - public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) - { - return deserializeKey(partitionKeyComponents[key.position()]); - } - - @Nullable - public static Timestamp getTimestamp(Row row) - { - return deserializeTimestampOrNull(row.clustering().bufferAt(CommandsForKeyColumns.timestamp.position()), Timestamp::fromBits); - } - @Nullable public static Timestamp getLastWriteTimestamp(Row row) { @@ -553,12 +501,12 @@ public static Timestamp getLastWriteTimestamp(Row row) return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); } - public static Row truncateStaticRow(long nowInSec, Row row, Cell lastExecuteMicrosCell, Cell lastExecuteCell, Cell lastWriteCell, Cell maxTimestampCell) + public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastExecuteMicrosCell, Cell lastExecuteCell, Cell lastWriteCell, Cell maxTimestampCell) { - checkArgument(lastExecuteMicrosCell == null || lastExecuteMicrosCell.column() == CommandsForKeyColumns.last_executed_micros); - checkArgument(lastExecuteCell == null || lastExecuteCell.column() == CommandsForKeyColumns.last_executed_timestamp); - checkArgument(lastWriteCell == null || lastWriteCell.column() == CommandsForKeyColumns.last_write_timestamp); - checkArgument(maxTimestampCell == null || maxTimestampCell.column() == CommandsForKeyColumns.max_timestamp); + checkArgument(lastExecuteMicrosCell == null || lastExecuteMicrosCell.column() == last_executed_micros); + checkArgument(lastExecuteCell == null || lastExecuteCell.column() == last_executed_timestamp); + checkArgument(lastWriteCell == null || lastWriteCell.column() == last_write_timestamp); + checkArgument(maxTimestampCell == null || maxTimestampCell.column() == max_timestamp); long timestamp = row.primaryKeyLivenessInfo().timestamp(); @@ -572,8 +520,7 @@ public static Row truncateStaticRow(long nowInSec, Row row, Cell lastExecuteMicr if (maxTimestampCell != null) colCount++; - ColumnMetadata[] fields = CommandsForKeyColumns.static_columns_metadata; - checkState(fields.length >= colCount, "CommandsForKeyColumns.static_columns_metadata should include all the columns"); + checkState(columns.size() >= colCount, "CommandsForKeyColumns.static_columns_metadata should include all the columns"); Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(colCount); int colIndex = 0; @@ -591,6 +538,79 @@ public static Row truncateStaticRow(long nowInSec, Row row, Cell lastExecuteMicr } } + private static final TableMetadata DepsCommandsForKeys = commandsForKeysTable(DEPS_COMMANDS_FOR_KEY); + private static final TableMetadata AllCommandsForKeys = commandsForKeysTable(ALL_COMMANDS_FOR_KEY); + + private static TableMetadata commandsForKeysTable(String tableName) + { + return parse(tableName, + "accord commands per key", + "CREATE TABLE %s (" + + "store_id int, " + + "key_token blob, " // can't use "token" as this is restricted word in CQL + + format("key %s, ", KEY_TUPLE) + + format("timestamp %s, ", TIMESTAMP_TUPLE) + + "data blob, " + + "PRIMARY KEY((store_id, key_token, key), timestamp)" + + ')') + .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) + .build(); + } + + public static class CommandsForKeyAccessor + { + final TableMetadata table; + final ClusteringComparator keyComparator; + final CompositeType partitionKeyType; + final ColumnFilter allColumns; + final ColumnMetadata store_id; + final ColumnMetadata key_token; + final ColumnMetadata key; + final ColumnMetadata timestamp; + + final ColumnMetadata data; + + final RegularAndStaticColumns columns; + + public CommandsForKeyAccessor(TableMetadata table) + { + this.table = table; + this.keyComparator = table.partitionKeyAsClusteringComparator(); + this.partitionKeyType = (CompositeType) table.partitionKeyType; + this.allColumns = ColumnFilter.all(table); + this.store_id = getColumn(table, "store_id"); + this.key_token = getColumn(table, "key_token"); + this.key = getColumn(table, "key"); + this.timestamp = getColumn(table, "timestamp"); + this.data = getColumn(table, "data"); + this.columns = new RegularAndStaticColumns(Columns.NONE, Columns.from(Lists.newArrayList(data))); + } + + public ByteBuffer[] splitPartitionKey(DecoratedKey key) + { + return partitionKeyType.split(key.getKey()); + } + + public int getStoreId(ByteBuffer[] partitionKeyComponents) + { + return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); + } + + public PartitionKey getKey(ByteBuffer[] partitionKeyComponents) + { + return deserializeKey(partitionKeyComponents[key.position()]); + } + + @Nullable + public Timestamp getTimestamp(Row row) + { + return deserializeTimestampOrNull(row.clustering().bufferAt(timestamp.position()), Timestamp::fromBits); + } + } + + public static final CommandsForKeyAccessor DepsCommandsForKeysAccessor = new CommandsForKeyAccessor(DepsCommandsForKeys); + public static final CommandsForKeyAccessor AllCommandsForKeysAccessor = new CommandsForKeyAccessor(AllCommandsForKeys); + private static final TableMetadata Topologies = parse(TOPOLOGIES, "accord topologies", @@ -648,7 +668,7 @@ public static KeyspaceMetadata metadata() private static Tables tables() { - return Tables.of(Commands, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); + return Tables.of(Commands, TimestampsForKeys, DepsCommandsForKeys, AllCommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException @@ -871,6 +891,14 @@ public static Timestamp deserializeTimestampOrNull(Cell cell) return Timestamp.fromBits(accessor.getLong(split.get(0), 0), accessor.getLong(split.get(1), 0), new Node.Id(accessor.getInt(split.get(2), 0))); } + public static T deserializeTimestampOrDefault(V value, ValueAccessor accessor, TimestampFactory factory, T defaultVal) + { + if (value == null || accessor.isEmpty(value)) + return defaultVal; + List split = TIMESTAMP_TYPE.unpack(value, accessor); + return factory.create(accessor.getLong(split.get(0), 0), accessor.getLong(split.get(1), 0), new Node.Id(accessor.getInt(split.get(2), 0))); + } + public static T deserializeTimestampOrNull(V value, ValueAccessor accessor, TimestampFactory factory) { if (value == null || accessor.isEmpty(value)) @@ -884,6 +912,11 @@ private static T deserializeTimestampOrNull(UntypedResultS return deserializeTimestampOrNull(row.getBlob(name), factory); } + private static T deserializeTimestampOrDefault(UntypedResultSet.Row row, String name, TimestampFactory factory, T defaultVal) + { + return deserializeTimestampOrDefault(row.getBlob(name), ByteBufferAccessor.instance, factory, defaultVal); + } + private static ByteBuffer bytesOrNull(Row row, ColumnMetadata column) { Cell cell = row.getCell(column); @@ -1093,14 +1126,14 @@ private KeysBetween(int storeId, this.start = start; this.end = end; - String selection = selection(CommandsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); + String selection = selection(TimestampsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); this.cqlFirst = format("SELECT DISTINCT %s\n" + "FROM %s\n" + "WHERE store_id = ?\n" + (startInclusive ? " AND key_token >= ?\n" : " AND key_token > ?\n") + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + "ALLOW FILTERING", - selection, CommandsForKeys); + selection, TimestampsForKeys); this.cqlContinue = format("SELECT DISTINCT %s\n" + "FROM %s\n" + "WHERE store_id = ?\n" + @@ -1108,7 +1141,7 @@ private KeysBetween(int storeId, " AND key > ?\n" + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + "ALLOW FILTERING", - selection, CommandsForKeys); + selection, TimestampsForKeys); } @Override @@ -1273,117 +1306,188 @@ public static PartitionKey deserializeKey(UntypedResultSet.Row row) return deserializeKey(row.getBytes("key")); } - private static void addSeriesMutations(ImmutableSortedMap prev, - ImmutableSortedMap value, - SeriesKind kind, - PartitionUpdate.Builder partitionBuilder, - Row.Builder rowBuilder, - LivenessInfo livenessInfo, - int nowInSeconds) + public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey original, TimestampsForKey current, long timestampMicros) { - if (prev == value) - return; + try + { + Invariants.checkArgument(original != current); + // TODO: convert to byte arrays + ValueAccessor accessor = ByteBufferAccessor.instance; - long timestampMicros = livenessInfo.timestamp(); - Set deletions = Sets.difference(prev.keySet(), value.keySet()); + Row.Builder builder = BTreeRow.unsortedBuilder(); + builder.newRow(Clustering.EMPTY); + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); + builder.addPrimaryKeyLivenessInfo(livenessInfo); + addCellIfModified(TimestampsForKeyColumns.max_timestamp, TimestampsForKey::max, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); + addCellIfModified(TimestampsForKeyColumns.last_executed_timestamp, TimestampsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); + addCellIfModified(TimestampsForKeyColumns.last_executed_micros, TimestampsForKey::rawLastExecutedHlc, accessor::valueOf, builder, timestampMicros, nowInSeconds, original, current); + addCellIfModified(TimestampsForKeyColumns.last_write_timestamp, TimestampsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); - Row.Deletion deletion = !deletions.isEmpty() ? - Row.Deletion.regular(DeletionTime.build(timestampMicros, nowInSeconds)) : - null; - ByteBuffer ordinalBytes = bytes(kind.ordinal()); - value.forEach((timestamp, bytes) -> { - if (bytes.equals(prev.get(timestamp))) + Row row = builder.build(); + if (row.isEmpty()) + return null; + + ByteBuffer key = TimestampsForKeyColumns.makePartitionKey(storeId, current.key()); + PartitionUpdate update = PartitionUpdate.singleRowUpdate(TimestampsForKeys, key, row); + return new Mutation(update); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + public static Mutation getTimestampsForKeyMutation(AccordCommandStore commandStore, AccordSafeTimestampsForKey liveTimestamps, long timestampMicros) + { + return getTimestampsForKeyMutation(commandStore.id(), liveTimestamps.original(), liveTimestamps.current(), timestampMicros); + } + + public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore, PartitionKey key) + { + String cql = "SELECT * FROM %s.%s " + + "WHERE store_id = ? " + + "AND key_token = ? " + + "AND key=(?, ?)"; + + return executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY), + commandStore.id(), + serializeToken(key.token()), + key.tableId().asUUID(), key.partitionKey().getKey()); + } + + public static TimestampsForKey loadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) + { + commandStore.checkNotInStoreThread(); + return unsafeLoadTimestampsForKey(commandStore, key); + } + + public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) + { + + UntypedResultSet rows = loadTimestampsForKeyRow(commandStore, key); + + if (rows.isEmpty()) + { + return null; + } + + UntypedResultSet.Row row = rows.one(); + checkState(deserializeKey(row).equals(key)); + + Timestamp max = deserializeTimestampOrDefault(row, "max_timestamp", Timestamp::fromBits, Timestamp.NONE); + Timestamp lastExecutedTimestamp = deserializeTimestampOrDefault(row, "last_executed_timestamp", Timestamp::fromBits, Timestamp.NONE); + long lastExecutedMicros = row.has("last_executed_micros") ? row.getLong("last_executed_micros") : 0; + Timestamp lastWriteTimestamp = deserializeTimestampOrDefault(row, "last_write_timestamp", Timestamp::fromBits, Timestamp.NONE); + + return TimestampsForKey.SerializerSupport.create(key, max, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp); + } + + private static void addSeriesMutations(CommandsForKeyAccessor accessor, + CommandTimeseries.Update update, + PartitionUpdate.Builder partitionBuilder, + Row.Builder rowBuilder, + LivenessInfo livenessInfo, + long timestampMicros, + Row.Deletion deletion, + Predicate predicate) + { + if (update.isEmpty()) + return; + + update.forEachWrite((timestamp, bytes) -> { + if (!predicate.test(timestamp)) return; - rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); - rowBuilder.addCell(live(CommandsForKeyColumns.data, timestampMicros, bytes)); + rowBuilder.newRow(Clustering.make(serializeTimestamp(timestamp))); + rowBuilder.addCell(live(accessor.data, timestampMicros, bytes)); rowBuilder.addPrimaryKeyLivenessInfo(livenessInfo); partitionBuilder.add(rowBuilder.build()); }); - deletions.forEach(timestamp -> { - rowBuilder.newRow(Clustering.make(ordinalBytes, serializeTimestamp(timestamp))); + update.forEachDelete(timestamp -> { + if (!predicate.test(timestamp)) + return; + rowBuilder.newRow(Clustering.make(serializeTimestamp(timestamp))); rowBuilder.addRowDeletion(deletion); partitionBuilder.add(rowBuilder.build()); }); } - private static void addSeriesMutations(CommandsForKey original, - CommandsForKey cfk, - SeriesKind kind, - PartitionUpdate.Builder partitionBuilder, - Row.Builder rowBuilder, - LivenessInfo livenessInfo, - int nowInSeconds) + private static void addSeriesMutations(CommandsForKeyAccessor accessor, + CommandTimeseries.Update common, + CommandTimeseries.Update update, + PartitionUpdate.Builder partitionBuilder, + Row.Builder rowBuilder, + LivenessInfo livenessInfo, + int nowInSeconds) { - addSeriesMutations(kind.getValues(original), kind.getValues(cfk), kind, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); + + long timestampMicros = livenessInfo.timestamp(); + + Row.Deletion deletion = common.numDeletes() + update.numDeletes() > 0 ? + Row.Deletion.regular(DeletionTime.build(timestampMicros, nowInSeconds)) : + null; + + addSeriesMutations(accessor, common, partitionBuilder, rowBuilder, livenessInfo, timestampMicros, deletion, ts -> !update.contains(ts)); + addSeriesMutations(accessor, update, partitionBuilder, rowBuilder, livenessInfo, timestampMicros, deletion, ts -> true); } - private static DecoratedKey makeKey(int storeId, PartitionKey key) + private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId, PartitionKey key) { Token token = key.token(); - ByteBuffer pk = CommandsForKeyColumns.keyComparator.make(storeId, - serializeToken(token), - serializeKey(key)).serializeAsPartitionKey(); - return CommandsForKeys.partitioner.decorateKey(pk); + ByteBuffer pk = accessor.keyComparator.make(storeId, + serializeToken(token), + serializeKey(key)).serializeAsPartitionKey(); + return accessor.table.partitioner.decorateKey(pk); } - private static DecoratedKey makeKey(int storeId, CommandsForKey cfk) + private static PartitionUpdate getCommandsForKeyPartitionUpdate(CommandsForKeyAccessor accessor, int storeId, PartitionKey key, CommandsForKeyUpdater common, CommandsForKeyUpdater update, long timestampMicros) { - return makeKey(storeId, (PartitionKey) cfk.key()); - } - public static Mutation getCommandsForKeyMutation(AccordCommandStore commandStore, AccordSafeCommandsForKey liveCfk, long timestampMicros) - { - return getCommandsForKeyMutation(commandStore.id(), liveCfk.original(), liveCfk.current(), timestampMicros); - } + int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); + LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); - public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey original, CommandsForKey cfk, long timestampMicros) - { - try - { - Invariants.checkArgument(original != cfk); - // TODO: convert to byte arrays - ValueAccessor accessor = ByteBufferAccessor.instance; + int expectedRows = common.totalChanges() + update.totalChanges(); - int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); + PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(accessor.table, + makeKey(accessor, storeId, key), + accessor.columns, + expectedRows); - boolean hasStaticChanges = CommandsForKeyColumns.hasStaticChanges(original, cfk); - int expectedRows = (hasStaticChanges ? 1 : 0) - + estimateMapChanges(c -> c.byId().commands, original, cfk) - + estimateMapChanges(c -> c.byExecuteAt().commands, original, cfk); + Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); - PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(CommandsForKeys, - makeKey(storeId, cfk), - CommandsForKeyColumns.columnsFor(original, cfk), - expectedRows); + addSeriesMutations(accessor, common.commands(), update.commands(), partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); - Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); + PartitionUpdate partitionUpdate = partitionBuilder.build(); + if (partitionUpdate.isEmpty()) + return null; + return partitionUpdate; + } - if (hasStaticChanges) - { - rowBuilder.newRow(Clustering.STATIC_CLUSTERING); - addCellIfModified(CommandsForKeyColumns.max_timestamp, CommandsForKey::max, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_executed_timestamp, CommandsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_executed_micros, CommandsForKey::rawLastExecutedHlc, accessor::valueOf, rowBuilder, timestampMicros, nowInSeconds, original, cfk); - addCellIfModified(CommandsForKeyColumns.last_write_timestamp, CommandsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, rowBuilder, timestampMicros, nowInSeconds, original, cfk); - rowBuilder.addPrimaryKeyLivenessInfo(livenessInfo); - Row row = rowBuilder.build(); - if (!row.isEmpty()) - partitionBuilder.add(row); - } + public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKeyUpdate update, long timestampMicros) + { + PartitionUpdate depsUpdate = getCommandsForKeyPartitionUpdate(DepsCommandsForKeysAccessor, + storeId, + update.key(), + update.common(), + update.deps(), + timestampMicros); + PartitionUpdate allUpdate = getCommandsForKeyPartitionUpdate(AllCommandsForKeysAccessor, + storeId, + update.key(), + update.common(), + update.all(), + timestampMicros); - addSeriesMutations(original, cfk, SeriesKind.BY_ID, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); - addSeriesMutations(original, cfk, SeriesKind.BY_EXECUTE_AT, partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); + if (depsUpdate == null && allUpdate == null) + return null; + if (depsUpdate == null) + return new Mutation(allUpdate); + else if (allUpdate == null) + return new Mutation(depsUpdate); - PartitionUpdate update = partitionBuilder.build(); - if (update.isEmpty()) - return null; - return new Mutation(update); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + return new Mutation(ACCORD_KEYSPACE_NAME, depsUpdate.partitionKey(), + ImmutableMap.of(depsUpdate.metadata().id, depsUpdate, allUpdate.metadata().id, allUpdate), + MonotonicClock.Global.approxTime.now(), false); } private static ByteBuffer cellValue(Cell cell) @@ -1403,32 +1507,35 @@ private static ByteBuffer clusteringValue(Clustering clustering, int idx) return clustering.accessor().toBuffer(clustering.get(idx)); } - public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, PartitionKey key, long nowInSeconds) + private static SinglePartitionReadCommand getCommandsForKeyRead(CommandsForKeyAccessor accessor, int storeId, PartitionKey key, long nowInSeconds) { - return SinglePartitionReadCommand.create(CommandsForKeys, nowInSeconds, - CommandsForKeyColumns.allColumns, + return SinglePartitionReadCommand.create(accessor.table, nowInSeconds, + accessor.allColumns, RowFilter.none(), DataLimits.NONE, - makeKey(storeId, key), + makeKey(accessor, storeId, key), FULL_PARTITION); } - public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + public static SinglePartitionReadCommand getDepsCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) { - commandStore.checkNotInStoreThread(); - return unsafeLoadCommandsForKey(commandStore, key); + return getCommandsForKeyRead(DepsCommandsForKeysAccessor, storeId, key, nowInSeconds); + } + + public static SinglePartitionReadCommand getAllCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) + { + return getCommandsForKeyRead(AllCommandsForKeysAccessor, storeId, key, nowInSeconds); } - static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, AccordCommandStore commandStore, PartitionKey key) { long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - SinglePartitionReadCommand command = getCommandsForKeyRead(commandStore.id(), key, nowInSeconds); + SinglePartitionReadCommand command = getCommandsForKeyRead(accessor, commandStore.id(), key, nowInSeconds); + - EnumMap> seriesMaps = new EnumMap<>(SeriesKind.class); - for (SeriesKind kind : SeriesKind.values()) - seriesMaps.put(kind, new ImmutableSortedMap.Builder<>(Comparator.naturalOrder())); + ImmutableSortedMap.Builder commands = new ImmutableSortedMap.Builder<>(Comparator.naturalOrder()); try (ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) @@ -1438,42 +1545,22 @@ static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, return null; } - Timestamp max = Timestamp.NONE; - Timestamp lastExecutedTimestamp = Timestamp.NONE; - long lastExecutedMicros = 0; - Timestamp lastWriteTimestamp = Timestamp.NONE; - try (RowIterator partition = partitions.next()) { - // empty static row will be interpreted as all null cells which will cause everything to be initialized - Row staticRow = partition.staticRow(); - max = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.max_timestamp, Timestamp::fromBits, max); - lastExecutedTimestamp = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.last_executed_timestamp, Timestamp::fromBits, lastExecutedTimestamp); - - ByteBuffer microsBytes = bytesOrNull(staticRow, CommandsForKeyColumns.last_executed_micros); - if (microsBytes != null) - lastExecutedMicros = microsBytes.getLong(microsBytes.position()); - - lastWriteTimestamp = deserializeTimestampOrDefault(staticRow, CommandsForKeyColumns.last_write_timestamp, Timestamp::fromBits, lastWriteTimestamp); - while (partition.hasNext()) { Row row = partition.next(); Clustering clustering = row.clustering(); - int ordinal = Int32Type.instance.compose(clusteringValue(clustering, 0)); - Timestamp timestamp = deserializeTimestampOrNull(clusteringValue(clustering, 1), Timestamp::fromBits); - ByteBuffer data = cellValue(row, CommandsForKeyColumns.data); + Timestamp timestamp = deserializeTimestampOrNull(clusteringValue(clustering, 0), Timestamp::fromBits); + ByteBuffer data = cellValue(row, accessor.data); if (data == null) continue; - seriesMaps.get(SeriesKind.values()[ordinal]).put(timestamp, data); + commands.put(timestamp, data); } } checkState(!partitions.hasNext()); - return CommandsForKey.SerializerSupport.create(key, max, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp, - CommandsForKeySerializer.loader, - seriesMaps.get(SeriesKind.BY_ID).build(), - seriesMaps.get(SeriesKind.BY_EXECUTE_AT).build()); + return CommandsForKey.SerializerSupport.create(key, CommandsForKeySerializer.loader, commands.build()); } catch (Throwable t) { @@ -1482,6 +1569,28 @@ static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, } } + public static CommandsForKey unsafeLoadDepsCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + return unsafeLoadCommandsForKey(DepsCommandsForKeysAccessor, commandStore, key); + } + + public static CommandsForKey unsafeLoadAllCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + return unsafeLoadCommandsForKey(AllCommandsForKeysAccessor, commandStore, key); + } + + public static CommandsForKey loadDepsCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + commandStore.checkNotInStoreThread(); + return unsafeLoadCommandsForKey(DepsCommandsForKeysAccessor, commandStore, key); + } + + public static CommandsForKey loadAllCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + { + commandStore.checkNotInStoreThread(); + return unsafeLoadCommandsForKey(AllCommandsForKeysAccessor, commandStore, key); + } + public static class EpochDiskState { public static final EpochDiskState EMPTY = new EpochDiskState(0, 0); diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index fd9880d3e273..336bc5ee5a0f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -164,7 +164,7 @@ private VerbMapping() for (MessageType type : Iterables.concat(AccordMessageType.values, MessageType.values)) { // Any request can receive a generic failure response - if (type == MessageType.FAILURE_RSP) + if (type == MessageType.FAILURE_RSP || type.isLocal()) continue; if (mapping.containsKey(type)) diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 8630377a5275..dc7a3c9e3bed 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -28,11 +28,13 @@ import accord.api.Result; import accord.api.RoutingKey; import accord.impl.CommandsForKey; +import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; import accord.local.CommonAttributes; import accord.local.Node; import accord.local.SaveStatus; +import accord.local.Status; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Ballot; @@ -271,16 +273,29 @@ private static class CommandEmptySizes { private final static TokenKey EMPTY_KEY = new TokenKey("doesnotexist", null); private final static TxnId EMPTY_TXNID = new TxnId(42, 42, Kind.Read, Domain.Key, new Node.Id(42)); - private final static CommonAttributes.Mutable EMPTY_ATTRS = new CommonAttributes.Mutable(EMPTY_TXNID) - .partialDeps(PartialDeps.NONE) - .route(new FullKeyRoute(EMPTY_KEY, true, new RoutingKey[] {EMPTY_KEY} )); - - final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(EMPTY_ATTRS, Ballot.ZERO)); - final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(EMPTY_ATTRS, EMPTY_TXNID, null));; - final static long ACCEPTED = measure(Command.SerializerSupport.accepted(EMPTY_ATTRS, SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); - final static long COMMITTED = measure(Command.SerializerSupport.committed(EMPTY_ATTRS, SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY)); - final static long EXECUTED = measure(Command.SerializerSupport.executed(EMPTY_ATTRS, SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY, null, null)); - final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(EMPTY_ATTRS, SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); + + private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) + { + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(EMPTY_TXNID).route(new FullKeyRoute(EMPTY_KEY, true, new RoutingKey[]{ EMPTY_KEY })); + attrs.durability(Status.Durability.NotDurable); + if (hasDeps) + attrs.partialDeps(PartialDeps.NONE); + + if (hasTxn) + attrs.partialTxn(new PartialTxn.InMemory(null, null, null, null, null, null)); + + return attrs; + } + + private static final Writes EMPTY_WRITES = new Writes(EMPTY_TXNID, EMPTY_TXNID, Keys.EMPTY, (key, safeStore, executeAt, store, txn) -> null); + private static final Result EMPTY_RESULT = new Result() {}; + + final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(attrs(false, false), Ballot.ZERO)); + final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, null));; + final static long ACCEPTED = measure(Command.SerializerSupport.accepted(attrs(true, false), SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); + final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY)); + final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY, EMPTY_WRITES, EMPTY_RESULT)); + final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(attrs(false, false), SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID, null)); private static long emptySize(Command command) @@ -354,18 +369,23 @@ private static long cfkSeriesSize(ImmutableSortedMap seri return size; } - private static final long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, null, 0, null, null, - ImmutableSortedMap.of(), - ImmutableSortedMap.of())); + private static long EMPTY_TFK_SIZE = measure(TimestampsForKey.SerializerSupport.create(null, null, null, 0, null)); + + public static long timestampsForKey(TimestampsForKey timestamps) + { + long size = EMPTY_TFK_SIZE; + size += timestamp(timestamps.max()); + size += timestamp(timestamps.lastExecutedTimestamp()); + size += timestamp(timestamps.lastWriteTimestamp()); + return size; + } + + private static long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, ImmutableSortedMap.of())); public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; size += key(cfk.key()); - size += timestamp(cfk.max()); - size += timestamp(cfk.lastExecutedTimestamp()); - size += timestamp(cfk.lastWriteTimestamp()); - size += cfkSeriesSize((ImmutableSortedMap) cfk.byId().commands); - size += cfkSeriesSize((ImmutableSortedMap) cfk.byExecuteAt().commands); + size += cfkSeriesSize((ImmutableSortedMap) cfk.commands().commands); return size; } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 122ae52a13ba..ea364d04f612 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -20,7 +20,6 @@ import java.util.Map; import java.util.NavigableMap; -import java.util.function.BiFunction; import java.util.function.Predicate; import javax.annotation.Nullable; @@ -33,11 +32,14 @@ import accord.impl.AbstractSafeCommandStore; import accord.impl.CommandTimeseries; import accord.impl.CommandTimeseries.CommandLoader; -import accord.impl.CommandTimeseriesHolder; import accord.impl.CommandsForKey; -import accord.impl.SafeCommandsForKey; +import accord.impl.CommandsForKeys; +import accord.impl.DomainCommands; +import accord.impl.DomainTimestamps; +import accord.impl.SafeTimestampsForKey; import accord.local.CommandStores.RangesForEpoch; import accord.local.CommonAttributes; +import accord.local.KeyHistory; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommand; @@ -53,24 +55,34 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.utils.TriFunction; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; -public class AccordSafeCommandStore extends AbstractSafeCommandStore +public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final NavigableMap commandsForKeys; + private final NavigableMap depsCommandsForKeys; + private final NavigableMap allCommandsForKeys; + private final NavigableMap timestampsForKeys; + private final NavigableMap updatesForKeys; private final AccordCommandStore commandStore; private final RangesForEpoch ranges; CommandsForRanges.Updater rangeUpdates = null; public AccordSafeCommandStore(PreLoadContext context, Map commands, - NavigableMap commandsForKey, + NavigableMap timestampsForKey, + NavigableMap depsCommandsForKey, + NavigableMap allCommandsForKeys, + NavigableMap updatesForKeys, AccordCommandStore commandStore) { super(context); this.commands = commands; - this.commandsForKeys = commandsForKey; + this.timestampsForKeys = timestampsForKey; + this.depsCommandsForKeys = depsCommandsForKey; + this.allCommandsForKeys = allCommandsForKeys; + this.updatesForKeys = updatesForKeys; this.commandStore = commandStore; this.ranges = commandStore.updateRangesForEpoch(); } @@ -88,33 +100,107 @@ protected void addCommandInternal(AccordSafeCommand command) } @Override - protected AccordSafeCommandsForKey getCommandsForKeyInternal(RoutableKey key) + protected AccordSafeCommand getIfLoaded(TxnId txnId) + { + AccordSafeCommand command = commandStore.commandCache().acquireIfLoaded(txnId); + if (command != null) command.preExecute(); + return command; + } + + private NavigableMap commandsForKeyMap(KeyHistory history) { - return commandsForKeys.get(key); + switch (history) + { + case DEPS: + return depsCommandsForKeys; + case ALL: + return allCommandsForKeys; + default: + throw new IllegalArgumentException(); + } } @Override - protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) + protected AccordSafeCommandsForKey getDepsCommandsForKeyInternal(RoutableKey key) { - commandsForKeys.put(cfk.key(), cfk); + return depsCommandsForKeys.get(key); } @Override - protected AccordSafeCommand getIfLoaded(TxnId txnId) + protected void addDepsCommandsForKeyInternal(AccordSafeCommandsForKey cfk) { - AccordSafeCommand command = commandStore.commandCache().acquireIfLoaded(txnId); - if (command != null) command.preExecute(); - return command; + depsCommandsForKeys.put(cfk.key(), cfk); + } + + @Override + protected AccordSafeCommandsForKey getDepsCommandsForKeyIfLoaded(RoutableKey key) + { + AccordSafeCommandsForKey cfk = commandStore.depsCommandsForKeyCache().acquireIfLoaded(key); + if (cfk != null) cfk.preExecute(); + return cfk; + } + + @Override + protected AccordSafeCommandsForKey getAllCommandsForKeyInternal(RoutableKey key) + { + return allCommandsForKeys.get(key); + } + + @Override + protected void addAllCommandsForKeyInternal(AccordSafeCommandsForKey cfk) + { + allCommandsForKeys.put(cfk.key(), cfk); } @Override - protected AccordSafeCommandsForKey getIfLoaded(RoutableKey key) + protected AccordSafeCommandsForKey getAllCommandsForKeyIfLoaded(RoutableKey key) { - AccordSafeCommandsForKey cfk = commandStore.commandsForKeyCache().acquireIfLoaded(key); + AccordSafeCommandsForKey cfk = commandStore.allCommandsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); return cfk; } + @Override + protected AccordSafeTimestampsForKey getTimestampsForKeyInternal(RoutableKey key) + { + return timestampsForKeys.get(key); + } + + @Override + protected void addTimestampsForKeyInternal(AccordSafeTimestampsForKey cfk) + { + timestampsForKeys.put(cfk.key(), cfk); + } + + @Override + protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(RoutableKey key) + { + AccordSafeTimestampsForKey cfk = commandStore.timestampsForKeyCache().acquireIfLoaded(key); + if (cfk != null) cfk.preExecute(); + return cfk; + } + + protected AccordSafeCommandsForKeyUpdate getCommandsForKeyUpdateInternal(RoutableKey key) + { + return updatesForKeys.get(key); + } + + protected AccordSafeCommandsForKeyUpdate createCommandsForKeyUpdateInternal(RoutableKey key) + { + throw new IllegalStateException("CFK updates should be initialized for operation"); + } + + protected void addCommandsForKeyUpdateInternal(AccordSafeCommandsForKeyUpdate update) + { + updatesForKeys.put(update.key(), update); + } + + protected void applyCommandForKeyUpdates() + { + // TODO (now): should this happen as part of invalidate? Less obvious it's happening, but eliminates possibility of post update changes + updatesForKeys.values().forEach(AccordSafeCommandsForKeyUpdate::setUpdates); + } + @Override public AccordCommandStore commandStore() { @@ -160,7 +246,7 @@ public long latestEpoch() @Override public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) { - Timestamp maxConflict = mapReduce(keysOrRanges, slice, (ts, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, Predicates.isNull()); + Timestamp maxConflict = mapReduce(keysOrRanges, slice, KeyHistory.NONE, (ts, commands, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, Predicates.isNull()); return Timestamp.nonNullOrMax(maxConflict, commandStore.commandsForRanges().maxRedundant()); } @@ -171,7 +257,6 @@ public void registerHistoricalTransactions(Deps deps) // We find a set of dependencies for a range then update CommandsFor to know about them Ranges allRanges = ranges.all(); deps.keyDeps.keys().forEach(allRanges, key -> { - SafeCommandsForKey cfk = commandsForKey(key); deps.keyDeps.forEach(key, txnId -> { // TODO (desired, efficiency): this can be made more efficient by batching by epoch if (ranges.coordinates(txnId).contains(key)) @@ -179,7 +264,7 @@ public void registerHistoricalTransactions(Deps deps) if (!ranges.allBefore(txnId.epoch()).contains(key)) return; - cfk.registerNotWitnessed(txnId); + CommandsForKeys.registerNotWitnessed(this, key, txnId); }); }); CommandsForRanges commandsForRanges = commandStore.commandsForRanges(); @@ -202,15 +287,15 @@ public void erase(SafeCommand safeCommand) { } - private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + private O mapReduce(Routables keysOrRanges, Ranges slice, KeyHistory keyHistory, TriFunction map, O accumulate, Predicate terminate) { accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminate); if (terminate.test(accumulate)) return accumulate; - return mapReduceForKey(keysOrRanges, slice, map, accumulate, terminate); + return mapReduceForKey(keysOrRanges, slice, keyHistory, map, accumulate, terminate); } - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate, Predicate terminate) + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, KeyHistory keyHistory, TriFunction map, O accumulate, Predicate terminate) { switch (keysOrRanges.domain()) { @@ -223,8 +308,9 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio for (Key key : keys) { if (!slice.contains(key)) continue; - SafeCommandsForKey forKey = commandsForKey(key); - accumulate = map.apply(forKey.current(), accumulate); + SafeTimestampsForKey timestamps = timestampsForKey(key); + CommandsForKey commands = !keyHistory.isNone() ? commandsForKey(key, keyHistory).current() : null; + accumulate = map.apply(timestamps.current(), commands, accumulate); if (terminate.test((accumulate))) return accumulate; } @@ -237,12 +323,13 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); if (!context.keys().slice(slice, Routables.Slice.Minimal).containsAll(sliced)) throw new AssertionError("Range(s) detected not present in the PreLoadContext: expected " + context.keys() + " but given " + keysOrRanges); - for (RoutableKey key : commandsForKeys.keySet()) + for (RoutableKey key : timestampsForKeys.keySet()) { //TODO (duplicate code): this is a repeat of Key... only change is checking contains in range if (!sliced.contains(key)) continue; - SafeCommandsForKey forKey = commandsForKey(key); - accumulate = map.apply(forKey.current(), accumulate); + SafeTimestampsForKey timestamps = timestampsForKey(key); + CommandsForKey commands = !keyHistory.isNone() ? commandsForKey(key, keyHistory).current() : null; + accumulate = map.apply(timestamps.current(), commands, accumulate); if (terminate.test(accumulate)) return accumulate; } @@ -253,19 +340,25 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio } @Override - public T mapReduce(Seekables keysOrRanges, Ranges slice, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) { - accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { - CommandTimeseries timeseries; +//<<<<<<< HEAD +// public T mapReduce(Seekables keysOrRanges, Ranges slice, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) { +// accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { +// CommandTimeseries timeseries; +//======= + public T mapReduce(Seekables keysOrRanges, Ranges slice, KeyHistory keyHistory, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) + { + accumulate = mapReduce(keysOrRanges, slice, keyHistory, (timestamps, commands, prev) -> { + CommandTimeseries.TimestampType timestampType; switch (testTimestamp) { default: throw new AssertionError(); case STARTED_AFTER: case STARTED_BEFORE: - timeseries = forKey.byId(); + timestampType = CommandTimeseries.TimestampType.TXN_ID; break; case EXECUTES_AFTER: case MAY_EXECUTE_BEFORE: - timeseries = forKey.byExecuteAt(); + timestampType = CommandTimeseries.TimestampType.EXECUTE_AT; } CommandTimeseries.TestTimestamp remapTestTimestamp; switch (testTimestamp) @@ -279,7 +372,7 @@ public T mapReduce(Seekables keysOrRanges, Ranges slice, Txn.Kind. case MAY_EXECUTE_BEFORE: remapTestTimestamp = CommandTimeseries.TestTimestamp.BEFORE; } - return timeseries.mapReduce(testKind, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, p1, prev, terminate); + return commands.commands().mapReduce(testKind, timestampType, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, p1, prev, terminate); }, accumulate, terminate); return accumulate; @@ -303,8 +396,7 @@ public CommonAttributes completeRegistration(Seekable seekable, Ranges ranges, A Key key = seekable.asKey(); if (ranges.contains(key)) { - AccordSafeCommandsForKey cfk = commandsForKey(key); - cfk.register(liveCommand.current()); + CommandsForKeys.registerCommand(this, key, liveCommand.current()); attrs = attrs.mutable().addListener(new CommandsForKey.Listener(key)); } } @@ -340,7 +432,10 @@ protected CommandsForRanges.Updater updateRanges() protected void invalidateSafeState() { commands.values().forEach(AccordSafeCommand::invalidate); - commandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); + timestampsForKeys.values().forEach(AccordSafeTimestampsForKey::invalidate); + depsCommandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); + allCommandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); + updatesForKeys.values().forEach(AccordSafeCommandsForKeyUpdate::invalidate); } @Override @@ -350,11 +445,18 @@ public CommandLoader cfkLoader(RoutableKey key) } public void postExecute(Map commands, - Map commandsForKeys) + Map timestampsForKey, + Map depsCommandsForKeys, + Map allCommandsForKeys, + Map updatesForKeys + ) { postExecute(); commands.values().forEach(AccordSafeState::postExecute); - commandsForKeys.values().forEach(AccordSafeState::postExecute); + timestampsForKey.values().forEach(AccordSafeState::postExecute); + depsCommandsForKeys.values().forEach(AccordSafeState::postExecute); + allCommandsForKeys.values().forEach(AccordSafeState::postExecute); + updatesForKeys.values().forEach(AccordSafeState::postExecute); if (rangeUpdates != null) rangeUpdates.apply(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 33b02e95167d..e57a94f863e4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -19,7 +19,6 @@ package org.apache.cassandra.service.accord; import java.util.Objects; -import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; @@ -27,7 +26,6 @@ import accord.impl.CommandsForKey; import accord.impl.SafeCommandsForKey; import accord.primitives.RoutableKey; -import accord.primitives.Timestamp; public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { @@ -110,27 +108,7 @@ public void preExecute() public void postExecute() { checkNotInvalidated(); - global.set(current); - } - - public long lastExecutedMicros() - { - return current().lastExecutedHlc(); - } - - public long timestampMicrosFor(Timestamp executeAt, boolean isForWriteTxn) - { - return current().hlcFor(executeAt, isForWriteTxn); - } - - public int nowInSecondsFor(Timestamp executeAt, boolean isForWriteTxn) - { - CommandsForKey current = current(); - current.validateExecuteAtTime(executeAt, isForWriteTxn); - // we use the executeAt time instead of the monotonic database timestamp to prevent uneven - // ttl expiration in extreme cases, ie 1M+ writes/second to a key causing timestamps to overflow - // into the next second on some keys and not others. - return Math.toIntExact(TimeUnit.MICROSECONDS.toSeconds(current.lastExecutedTimestamp().hlc())); + // updates are applied directly by CommandsForKeyUpdate } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java new file mode 100644 index 000000000000..b881a4524126 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.nio.ByteBuffer; + +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Key; +import accord.impl.SafeCommandsForKey; +import accord.primitives.RoutableKey; +import accord.utils.async.AsyncChain; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; + +public class AccordSafeCommandsForKeyUpdate extends SafeCommandsForKey.Update implements AccordSafeState +{ + private boolean invalidated; + private final AccordCachingState global; + private CommandsForKeyUpdate original; + private CommandsForKeyUpdate current; + + public AccordSafeCommandsForKeyUpdate(AccordCachingState global) + { + super((Key) global.key(), CommandsForKeySerializer.loader); + this.global = global; + this.original = null; + this.current = null; + } + + @Override + public void initialize() + { + set(CommandsForKeyUpdate.empty((PartitionKey) key())); + } + + @Override + public AccordCachingState global() + { + checkNotInvalidated(); + return global; + } + + @Override + public CommandsForKeyUpdate current() + { + checkNotInvalidated(); + return current; + } + + public AsyncChain loading() + { + throw new IllegalStateException("Updates aren't loaded"); + } + + @Override + @VisibleForTesting + public void set(CommandsForKeyUpdate cfk) + { + checkNotInvalidated(); + this.current = cfk; + } + + public CommandsForKeyUpdate original() + { + checkNotInvalidated(); + return original; + } + + public CommandsForKeyUpdate setUpdates() + { + CommandsForKeyUpdate next = new CommandsForKeyUpdate((PartitionKey) key(), + deps().toImmutable(), + all().toImmutable(), + common().toImmutable()); + set(next); + return next; + } + + @Override + public void preExecute() + { + checkNotInvalidated(); + original = global.get(); + current = original; + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + global.set(current); + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java new file mode 100644 index 000000000000..b5b44a770380 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Key; +import accord.impl.SafeTimestampsForKey; +import accord.impl.TimestampsForKey; +import accord.primitives.RoutableKey; +import accord.primitives.Timestamp; + +public class AccordSafeTimestampsForKey extends SafeTimestampsForKey implements AccordSafeState +{ + private boolean invalidated; + private final AccordCachingState global; + private TimestampsForKey original; + private TimestampsForKey current; + + public AccordSafeTimestampsForKey(AccordCachingState global) + { + super((Key) global.key()); + this.global = global; + this.original = null; + this.current = null; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordSafeTimestampsForKey that = (AccordSafeTimestampsForKey) o; + return Objects.equals(original, that.original) && Objects.equals(current, that.current); + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() + { + return "AccordSafeTimestampsForKey{" + + "invalidated=" + invalidated + + ", global=" + global + + ", original=" + original + + ", current=" + current + + '}'; + } + + @Override + public AccordCachingState global() + { + checkNotInvalidated(); + return global; + } + + @Override + public TimestampsForKey current() + { + checkNotInvalidated(); + return current; + } + + @Override + @VisibleForTesting + public void set(TimestampsForKey cfk) + { + checkNotInvalidated(); + this.current = cfk; + } + + public TimestampsForKey original() + { + checkNotInvalidated(); + return original; + } + + @Override + public void preExecute() + { + checkNotInvalidated(); + original = global.get(); + current = original; + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + global.set(current); + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } + + public long lastExecutedMicros() + { + return current().lastExecutedHlc(); + } + + public static long timestampMicrosFor(TimestampsForKey timestamps, Timestamp executeAt, boolean isForWriteTxn) + { + return timestamps.hlcFor(executeAt, isForWriteTxn); + } + + public static int nowInSecondsFor(TimestampsForKey timestamps, Timestamp executeAt, boolean isForWriteTxn) + { + timestamps.validateExecuteAtTime(executeAt, isForWriteTxn); + // we use the executeAt time instead of the monotonic database timestamp to prevent uneven + // ttl expiration in extreme cases, ie 1M+ writes/second to a key causing timestamps to overflow + // into the next second on some keys and not others. + return Math.toIntExact(TimeUnit.MICROSECONDS.toSeconds(timestamps.lastExecutedTimestamp().hlc())); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 97bf5b1586e4..d08bd86c4215 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -26,10 +26,12 @@ import java.util.stream.Stream; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.utils.IntrusiveLinkedList; +import accord.utils.Invariants; import accord.utils.async.AsyncChains; import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ExecutorPlus; @@ -38,7 +40,6 @@ import org.apache.cassandra.service.accord.AccordCachingState.Status; import static accord.utils.Invariants.checkState; -import static java.lang.String.format; import static org.apache.cassandra.service.accord.AccordCachingState.Status.EVICTED; import static org.apache.cassandra.service.accord.AccordCachingState.Status.FAILED_TO_LOAD; import static org.apache.cassandra.service.accord.AccordCachingState.Status.LOADED; @@ -65,8 +66,14 @@ public static void validateLoadOnEvict(boolean value) VALIDATE_LOAD_ON_EVICT = value; } - private final Map> cache = new HashMap<>(); - private final HashMap, Instance> instances = new HashMap<>(); + static class Stats + { + private long queries; + private long hits; + private long misses; + } + + private ImmutableList> instances = ImmutableList.of(); private final ExecutorPlus loadExecutor, saveExecutor; @@ -136,6 +143,8 @@ private void maybeEvictSomeNodes() AccordCachingState node = iter.next(); checkState(node.references == 0); + if (!node.canEvict()) + continue; /* * TODO (expected, efficiency): * can this be reworked so we're not skipping unevictable nodes everytime we try to evict? @@ -186,9 +195,7 @@ private void evict(AccordCachingState node) if (!node.hasListeners()) { - AccordCachingState self = cache.remove(node.key()); - if (self != null) - instance.itemsCached--; + AccordCachingState self = instances.get(node.index).cache.remove(node.key()); checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); } else @@ -199,29 +206,45 @@ private void evict(AccordCachingState node) private Instance instanceForNode(AccordCachingState node) { - return instances.get(node.key().getClass()); + return instances.get(node.index); } public > Instance instance( Class keyClass, - Class realKeyClass, + Class valClass, Function, S> safeRefFactory, Function loadFunction, BiFunction saveFunction, BiFunction validateFunction, - ToLongFunction heapEstimator) + ToLongFunction heapEstimator, + AccordCachingState.Factory nodeFactory) { + int index = instances.size(); + + Instance instance = - new Instance<>(keyClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator); + new Instance<>(index, keyClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator, nodeFactory); - if (instances.put(realKeyClass, instance) != null) - throw new IllegalArgumentException(format("Cache instances for key type %s already exists", realKeyClass.getName())); + instances = ImmutableList.>builder().addAll(instances).add(instance).build(); return instance; } + public > Instance instance( + Class keyClass, + Class valClass, + Function, S> safeRefFactory, + Function loadFunction, + BiFunction saveFunction, + BiFunction validateFunction, + ToLongFunction heapEstimator) + { + return instance(keyClass, valClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator, AccordCachingState.defaultFactory()); + } + public class Instance> implements CacheSize { + private final int index; private final Class keyClass; private final Function, S> safeRefFactory; private Function loadFunction; @@ -229,19 +252,24 @@ public class Instance> implements CacheSiz private final BiFunction validateFunction; private final ToLongFunction heapEstimator; private long bytesCached; - private int itemsCached; +// private int itemsCached; @VisibleForTesting final CacheAccessMetrics instanceMetrics; + private final Stats stats = new Stats(); + private final Map> cache = new HashMap<>(); + private final AccordCachingState.Factory nodeFactory; public Instance( - Class keyClass, + int index, Class keyClass, Function, S> safeRefFactory, Function loadFunction, BiFunction saveFunction, BiFunction validateFunction, - ToLongFunction heapEstimator) + ToLongFunction heapEstimator, + AccordCachingState.Factory nodeFactory) { + this.index = index; this.keyClass = keyClass; this.safeRefFactory = safeRefFactory; this.loadFunction = loadFunction; @@ -249,6 +277,7 @@ public Instance( this.validateFunction = validateFunction; this.heapEstimator = heapEstimator; this.instanceMetrics = metrics.forInstance(keyClass); + this.nodeFactory = nodeFactory; } public Stream> stream() @@ -258,6 +287,34 @@ public Stream> stream() .map(e -> (AccordCachingState) e.getValue()); } + public S acquireOrInitialize(K key, Function valueFactory) + { + incrementCacheQueries(); + @SuppressWarnings("unchecked") + AccordCachingState node = (AccordCachingState) cache.get(key); + if (node == null) + { + node = nodeFactory.create(key, index); + node.initialize(valueFactory.apply(key)); + cache.put(key, node); + } + AccordCachingState acquired = acquireExisting(node, true); + Invariants.checkState(acquired != null, "%s could not be acquired", node); + return safeRefFactory.apply(acquired); + } + + public S acquireIfExists(K key) + { + incrementCacheQueries(); + @SuppressWarnings("unchecked") + AccordCachingState node = (AccordCachingState) cache.get(key); + if (node == null) + { + return null; + } + return safeRefFactory.apply(acquireExisting(node, false)); + } + public S acquire(K key) { AccordCachingState node = acquire(key, false); @@ -290,12 +347,11 @@ private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) incrementCacheMisses(); if (onlyIfLoaded) return null; - AccordCachingState node = new AccordCachingState<>(key); + AccordCachingState node = nodeFactory.create(key, index); node.load(loadExecutor, loadFunction); node.references++; - if (cache.put(key, node) == null) - itemsCached++; + cache.put(key, node); maybeUpdateSize(node, heapEstimator); metrics.objectSize.update(node.lastQueriedEstimatedSizeOnHeap); maybeEvictSomeNodes(); @@ -429,6 +485,27 @@ public void complete(K key) node.complete(); } + @VisibleForTesting + boolean keyIsReferenced(Object key, Class> valClass) + { + AccordCachingState node = cache.get(key); + return node != null && node.references > 0; + } + + @VisibleForTesting + boolean keyIsCached(Object key, Class> valClass) + { + AccordCachingState node = cache.get(key); + return node != null && node.status() != EVICTED; + } + + @VisibleForTesting + int references(Object key, Class> valClass) + { + AccordCachingState node = cache.get(key); + return node != null ? node.references : 0; + } + private void incrementCacheQueries() { instanceMetrics.requests.mark(); @@ -474,7 +551,7 @@ public void setCapacity(long capacity) @Override public int size() { - return itemsCached; + return cache.size(); } @Override @@ -487,13 +564,12 @@ public long weightedSize() @VisibleForTesting void unsafeClear() { - cache.clear(); bytesCached = 0; metrics.reset();; - instances.values().forEach(i -> { - i.itemsCached = 0; - i.bytesCached = 0; - i.instanceMetrics.reset(); + instances.forEach(instance -> { + instance.cache.clear(); + instance.bytesCached = 0; + instance.instanceMetrics.reset(); }); //noinspection StatementWithEmptyBody while (null != poll()); @@ -524,10 +600,18 @@ public void awaitSaveResults() AsyncChains.awaitUninterruptibly(node.saving()); } + private int cacheSize() + { + int size = 0; + for (Instance instance : instances) + size += instance.cache.size(); + return size; + } + @VisibleForTesting int numReferencedEntries() { - return cache.size() - unreferenced; + return cacheSize() - unreferenced; } @VisibleForTesting @@ -539,7 +623,7 @@ int numUnreferencedEntries() @Override public int size() { - return cache.size(); + return cacheSize(); } @Override @@ -547,25 +631,4 @@ public long weightedSize() { return bytesCached; } - - @VisibleForTesting - boolean keyIsReferenced(Object key) - { - AccordCachingState node = cache.get(key); - return node != null && node.references > 0; - } - - @VisibleForTesting - boolean keyIsCached(Object key) - { - AccordCachingState node = cache.get(key); - return node != null && node.status() != EVICTED; - } - - @VisibleForTesting - int references(Object key) - { - AccordCachingState node = cache.get(key); - return node != null ? node.references : 0; - } } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java b/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java new file mode 100644 index 000000000000..4ab08b52735e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.nio.ByteBuffer; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import accord.impl.CommandTimeseries; +import accord.impl.CommandsForKey; +import accord.impl.CommandsForKeyGroupUpdater; +import accord.impl.CommandsForKeyUpdater; +import accord.primitives.RoutableKey; +import accord.primitives.Timestamp; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.service.accord.api.PartitionKey; + +import static org.apache.cassandra.utils.ObjectSizes.measure; + +public class CommandsForKeyUpdate extends CommandsForKeyGroupUpdater.Immutable implements CommandsForKey.Update +{ + private static final CommandsForKeyUpdate EMPTY = new CommandsForKeyUpdate(null, null, null, null); + + static long EMPTY_SIZE = measure(EMPTY); + + private static long EMPTY_TIMESERIES_UPDATE_SIZE = measure(new CommandTimeseries.ImmutableUpdate(ImmutableMap.of(), ImmutableSet.of())); + + private static long immutableTimeseriesUpdate(CommandTimeseries.ImmutableUpdate update) + { + long size = EMPTY_TIMESERIES_UPDATE_SIZE; + for (Map.Entry write : update.writes.entrySet()) + { + size += AccordObjectSizes.timestamp(write.getKey()); + size += ByteBufferAccessor.instance.size(write.getValue()); + } + + for (T delete : update.deletes) + size += AccordObjectSizes.timestamp(delete); + + return size; + } + + private static long EMPTY_UPDATER_SIZE = measure(new CommandsForKeyUpdater.Immutable<>(null)); + + private static long updaterSize(CommandsForKeyUpdater.Immutable updater) + { + long size = EMPTY_UPDATER_SIZE; + size += immutableTimeseriesUpdate(updater.commands()); + return size; + } + + private final PartitionKey key; + + public CommandsForKeyUpdate(PartitionKey key, CommandsForKeyUpdater.Immutable deps, CommandsForKeyUpdater.Immutable all, CommandsForKeyUpdater.Immutable common) + { + super(deps, all, common); + this.key = key; + } + + public static CommandsForKeyUpdate empty(RoutableKey key) + { + return new CommandsForKeyUpdate((PartitionKey) key, + CommandsForKeyUpdater.Immutable.empty(), + CommandsForKeyUpdater.Immutable.empty(), + CommandsForKeyUpdater.Immutable.empty()); + + } + + public PartitionKey key() + { + return key; + } + + public long estimatedSizeOnHeap() + { + long size = EMPTY_SIZE; + size += AccordObjectSizes.key(key.asKey()); + size += updaterSize(deps()); + size += updaterSize(all()); + size += updaterSize(common()); + return size; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 64ac67fb4a4f..c1da9da190f3 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -41,7 +41,8 @@ import accord.api.Key; import accord.api.RoutingKey; import accord.impl.CommandTimeseries; -import accord.impl.CommandTimeseriesHolder; +import accord.impl.DomainCommands; +import accord.impl.DomainTimestamps; import accord.local.Command; import accord.local.PreLoadContext; import accord.local.SafeCommand; @@ -66,6 +67,8 @@ public class CommandsForRanges { + public interface DomainInfo extends DomainCommands, DomainTimestamps {} + public enum TxnType { UNKNOWN, LOCAL, REMOTE; @@ -402,19 +405,19 @@ public boolean containsLocally(TxnId txnId) return localCommands.contains(txnId); } - public Iterable search(AbstractKeys keys) + public Iterable search(AbstractKeys keys) { // group by the keyspace, as ranges are based off TokenKey, which is scoped to a range Map> groupByKeyspace = new TreeMap<>(); for (Key key : keys) groupByKeyspace.computeIfAbsent(((PartitionKey) key).keyspace(), ignore -> new ArrayList<>()).add(key); - return () -> new AbstractIterator() + return () -> new AbstractIterator() { Iterator ksIt = groupByKeyspace.keySet().iterator(); Iterator>> rangeIt; @Override - protected CommandTimeseriesHolder computeNext() + protected DomainInfo computeNext() { while (true) { @@ -456,14 +459,14 @@ private static Range toRange(Interval interval } @Nullable - public CommandTimeseriesHolder search(Range range) + public DomainInfo search(Range range) { List matches = rangesToCommands.search(Interval.create(normalize(range.start(), range.startInclusive(), true), normalize(range.end(), range.endInclusive(), false))); return result(range, matches); } - private CommandTimeseriesHolder result(Seekable seekable, Collection matches) + private DomainInfo result(Seekable seekable, Collection matches) { if (matches.isEmpty()) return null; @@ -506,7 +509,7 @@ private static RoutingKey normalize(RoutingKey key, boolean inclusive, boolean u } } - private static class Holder implements CommandTimeseriesHolder + private static class Holder implements DomainInfo { private final Seekable keyOrRange; private final Collection matches; @@ -518,31 +521,25 @@ private Holder(Seekable keyOrRange, Collection matches) } @Override - public CommandTimeseries byId() - { - return build(m -> m.txnId); - } - - @Override - public CommandTimeseries byExecuteAt() + public CommandTimeseries commands() { - return build(m -> m.executeAt != null ? m.executeAt : m.txnId); + return build(); } @Override public Timestamp max() { - return byExecuteAt().maxTimestamp(); + return commands().maxTimestamp(); } - private CommandTimeseries build(Function fn) + private CommandTimeseries build() { - CommandTimeseries.Update builder = new CommandTimeseries.Update<>(keyOrRange, RangeCommandSummaryLoader.INSTANCE); + CommandTimeseries.Builder builder = new CommandTimeseries.Builder<>(keyOrRange, RangeCommandSummaryLoader.INSTANCE); for (RangeCommandSummary m : matches) { if (m.status == SaveStatus.Invalidated) continue; - builder.add(fn.apply(m), m); + builder.add(m.txnId, m); } return builder.build(); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index c5715e745fe0..16b2e8673067 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory; import accord.api.RoutingKey; +import accord.local.KeyHistory; import accord.local.PreLoadContext; import accord.primitives.Range; import accord.primitives.Ranges; @@ -71,14 +72,16 @@ enum State private final Iterable txnIds; private final Seekables keysOrRanges; + private final KeyHistory keyHistory; protected AsyncResult readResult; - public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Seekables keysOrRanges) + public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Seekables keysOrRanges, KeyHistory keyHistory) { this.commandStore = commandStore; this.txnIds = txnIds; this.keysOrRanges = keysOrRanges; + this.keyHistory = keyHistory; } protected static Iterable txnIds(PreLoadContext context) @@ -90,34 +93,51 @@ protected static Iterable txnIds(PreLoadContext context) return Iterables.concat(Collections.singleton(primaryid), additionalIds); } + private static > void referenceAndAssembleReadsForKey(K key, + Map context, + AccordStateCache.Instance cache, + List> listenChains) + { + S safeRef = cache.acquire(key); + context.put(key, safeRef); + AccordCachingState.Status status = safeRef.globalStatus(); // globalStatus() completes + switch (status) + { + default: throw new IllegalStateException("Unhandled global state: " + status); + case LOADING: + listenChains.add(safeRef.loading()); + break; + case SAVING: + // make sure we work with a completed state that supports get() and set() + listenChains.add(safeRef.saving()); + break; + case LOADED: + case MODIFIED: + case FAILED_TO_SAVE: + break; + case FAILED_TO_LOAD: + throw new RuntimeException(safeRef.failure()); + } + } + + private void referenceAndAssembleReadsForKey(RoutableKey key, + AsyncOperation.Context context, + List> listenChains) + { + referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); + if (keyHistory == KeyHistory.DEPS) + referenceAndAssembleReadsForKey(key, context.depsCommandsForKeys, commandStore.depsCommandsForKeyCache(), listenChains); + if (keyHistory == KeyHistory.ALL) + referenceAndAssembleReadsForKey(key, context.allCommandsForKeys, commandStore.allCommandsForKeyCache(), listenChains); + referenceAndAssembleReadsForKey(key, context.updatesForKeys, commandStore.updatesForKeyCache(), listenChains); + } + private > void referenceAndAssembleReads(Iterable keys, Map context, AccordStateCache.Instance cache, List> listenChains) { - for (K key : keys) - { - S safeRef = cache.acquire(key); - context.put(key, safeRef); - AccordCachingState.Status status = safeRef.globalStatus(); // globalStatus() completes - switch (status) - { - default: throw new IllegalStateException("Unhandled global state: " + status); - case LOADING: - listenChains.add(safeRef.loading()); - break; - case SAVING: - // make sure we work with a completed state that supports get() and set() - listenChains.add(safeRef.saving()); - break; - case LOADED: - case MODIFIED: - case FAILED_TO_SAVE: - break; - case FAILED_TO_LOAD: - throw new RuntimeException(safeRef.failure()); - } - } + keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, cache, listenChains)); } private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) @@ -131,7 +151,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) case Key: // cast to Keys fails... Iterable keys = (Iterable) keysOrRanges; - referenceAndAssembleReads(keys, context.commandsForKeys, commandStore.commandsForKeyCache(), chains); + keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); break; case Range: chains.add(referenceAndDispatchReadsForRange(context)); @@ -151,7 +171,7 @@ private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context c if (keys.isEmpty()) return AsyncChains.success(null); List> chains = new ArrayList<>(); - referenceAndAssembleReads(keys, context.commandsForKeys, commandStore.commandsForKeyCache(), chains); + keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); }, commandStore); } @@ -168,7 +188,7 @@ private AsyncChain> findOverlappingKeys(Ranges ranges private AsyncChain> findOverlappingKeys(Range range) { - Set cached = commandStore.commandsForKeyCache().stream() + Set cached = commandStore.depsCommandsForKeyCache().stream() .map(n -> (PartitionKey) n.key()) .filter(range::contains) .collect(Collectors.toSet()); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index a9413cd8e2ba..e93c1dd20b35 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -42,7 +42,9 @@ import org.apache.cassandra.service.accord.AccordSafeCommand; import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKeyUpdate; import org.apache.cassandra.service.accord.AccordSafeState; +import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.INITIALIZED; @@ -66,18 +68,27 @@ private static class LoggingProps static class Context { final HashMap commands = new HashMap<>(); - final TreeMap commandsForKeys = new TreeMap<>(); + final TreeMap timestampsForKey = new TreeMap<>(); + final TreeMap depsCommandsForKeys = new TreeMap<>(); + final TreeMap allCommandsForKeys = new TreeMap<>(); + final TreeMap updatesForKeys = new TreeMap<>(); void releaseResources(AccordCommandStore commandStore) { commands.values().forEach(commandStore.commandCache()::release); - commandsForKeys.values().forEach(commandStore.commandsForKeyCache()::release); + timestampsForKey.values().forEach(commandStore.timestampsForKeyCache()::release); + depsCommandsForKeys.values().forEach(commandStore.depsCommandsForKeyCache()::release); + allCommandsForKeys.values().forEach(commandStore.allCommandsForKeyCache()::release); + updatesForKeys.values().forEach(commandStore.updatesForKeyCache()::release); } void revertChanges() { commands.values().forEach(AccordSafeState::revert); - commandsForKeys.values().forEach(AccordSafeState::revert); + timestampsForKey.values().forEach(AccordSafeState::revert); + depsCommandsForKeys.values().forEach(AccordSafeState::revert); + allCommandsForKeys.values().forEach(AccordSafeState::revert); + updatesForKeys.values().forEach(AccordSafeState::revert); } } @@ -136,7 +147,7 @@ public String toString() AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()); + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys(), preLoadContext.keyHistory()); } private void onLoaded(Object o, Throwable throwable) @@ -249,11 +260,11 @@ protected void runInternal() return; state(PREPARING); case PREPARING: - safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.commandsForKeys); + safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.depsCommandsForKeys, context.allCommandsForKeys, context.updatesForKeys); state(RUNNING); case RUNNING: result = apply(safeStore); - safeStore.postExecute(context.commands, context.commandsForKeys); + safeStore.postExecute(context.commands, context.timestampsForKey, context.depsCommandsForKeys, context.allCommandsForKeys, context.updatesForKeys); context.releaseResources(commandStore); commandStore.completeOperation(safeStore); commandStore.executionOrder().unregister(this); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index c273e76be527..d75d924ca5e1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -29,6 +29,7 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; +import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -37,12 +38,32 @@ public class ApplySerializers { + private static final IVersionedSerializer kind = new IVersionedSerializer() + { + public void serialize(Apply.Kind kind, DataOutputPlus out, int version) throws IOException + { + Invariants.checkArgument(kind == Apply.Kind.Maximal || kind == Apply.Kind.Minimal); + out.writeBoolean(kind == Apply.Kind.Maximal); + } + + public Apply.Kind deserialize(DataInputPlus in, int version) throws IOException + { + return in.readBoolean() ? Apply.Kind.Maximal : Apply.Kind.Minimal; + } + + public long serializedSize(Apply.Kind t, int version) + { + return TypeSizes.BOOL_SIZE; + } + }; + +// public static final IVersionedSerializer request = new TxnRequestSerializer() public abstract static class ApplySerializer extends TxnRequestSerializer { @Override public void serializeBody(A apply, DataOutputPlus out, int version) throws IOException { - out.writeBoolean(apply.kind == Apply.Kind.Maximal); + kind.serialize(apply.kind, out, version); KeySerializers.seekables.serialize(apply.keys(), out, version); CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); @@ -57,7 +78,7 @@ protected abstract A deserializeApply(TxnId txnId, PartialRoute scope, long w public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { return deserializeApply(txnId, scope, waitForEpoch, - in.readBoolean() ? Apply.Kind.Maximal : Apply.Kind.Minimal, + kind.deserialize(in, version), KeySerializers.seekables.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), @@ -69,7 +90,7 @@ public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout @Override public long serializedBodySize(A apply, int version) { - return TypeSizes.BOOL_SIZE + return kind.serializedSize(apply.kind, version) + KeySerializers.seekables.serializedSize(apply.keys(), version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 0d15b74f5254..1d903025afa2 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -224,6 +224,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce return createOk(map, maxKnowledgeStatus, maxStatus, promised, accepted, executeAt, isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); + } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index e67368d00d62..23ea20b19710 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -30,6 +30,7 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Unseekables; +import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -42,6 +43,26 @@ public class CommitSerializers { + private static final IVersionedSerializer kind = new IVersionedSerializer() + { + public void serialize(Commit.Kind kind, DataOutputPlus out, int version) throws IOException + { + Invariants.checkArgument(kind == Commit.Kind.Minimal || kind == Commit.Kind.Maximal); + out.writeBoolean(kind == Commit.Kind.Maximal); + + } + + public Commit.Kind deserialize(DataInputPlus in, int version) throws IOException + { + return in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal; + } + + public long serializedSize(Commit.Kind kind, int version) + { + return TypeSizes.BOOL_SIZE; + } + }; + public abstract static class CommitSerializer extends TxnRequestSerializer { private final IVersionedSerializer read; @@ -54,7 +75,7 @@ public CommitSerializer(Class klass, IVersionedSerializer read) @Override public void serializeBody(C msg, DataOutputPlus out, int version) throws IOException { - out.writeBoolean(msg.kind == Commit.Kind.Maximal); + kind.serialize(msg.kind, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); @@ -70,7 +91,7 @@ protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { return deserializeCommit(txnId, scope, waitForEpoch, - in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal, + kind.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), CommandSerializers.nullablePartialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), @@ -82,7 +103,7 @@ public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout @Override public long serializedBodySize(C msg, int version) { - return TypeSizes.BOOL_SIZE + return kind.serializedSize(msg.kind, version) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 0bdae00bb5a5..61d715b79802 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -166,7 +166,7 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Route route = KeySerializers.route.deserialize(in, version); - SaveStatus saveStatus = CommandSerializers.saveStatus.deserialize(in, version); + SaveStatus maxKnowledgeSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); Durability durability = CommandSerializers.durability.deserialize(in, version); RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); @@ -175,13 +175,13 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException CheckStatus.FoundKnownMap known = CheckStatusSerializers.foundKnownMap.deserialize(in, version); boolean isTruncated = in.readBoolean(); PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); - PartialDeps partialDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); + PartialDeps committedDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); long toEpoch = in.readLong(); - Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in, version); + Timestamp committedExecuteAt = CommandSerializers.nullableTimestamp.deserialize(in, version); Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); Result result = null; - switch (saveStatus) + switch (maxSaveStatus) { case PreApplied: case Applying: @@ -193,7 +193,22 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException break; } - return Propagate.SerializerSupport.create(txnId, route, saveStatus, maxSaveStatus, durability, homeKey, progressKey, achieved, known, isTruncated, partialTxn, partialDeps, toEpoch, executeAt, writes, result); + return Propagate.SerializerSupport.create(txnId, + route, + maxKnowledgeSaveStatus, + maxSaveStatus, + durability, + homeKey, + progressKey, + achieved, + known, + isTruncated, + partialTxn, + committedDeps, + toEpoch, + committedExecuteAt, + writes, + result); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 3848908216e5..014a6b56c6a3 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -36,6 +36,9 @@ import accord.api.DataStore; import accord.api.Write; +import accord.impl.AbstractSafeCommandStore; +import accord.impl.CommandsForKeys; +import accord.impl.TimestampsForKey; import accord.local.SafeCommandStore; import accord.primitives.PartialTxn; import accord.primitives.RoutableKey; @@ -60,8 +63,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.service.accord.AccordSafeCommandStore; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.BooleanSerializer; import org.apache.cassandra.utils.ByteBufferUtil; @@ -373,11 +375,10 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc - AccordSafeCommandsForKey cfk = ((AccordSafeCommandStore) safeStore).commandsForKey((RoutableKey) key); - cfk.updateLastExecutionTimestamps(safeStore, executeAt, true); - long timestamp = cfk.timestampMicrosFor(executeAt, true); + TimestampsForKey cfk = CommandsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (RoutableKey) key, executeAt, true); + long timestamp = AccordSafeTimestampsForKey.timestampMicrosFor(cfk, executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) - int nowInSeconds = cfk.nowInSecondsFor(executeAt, true); + int nowInSeconds = AccordSafeTimestampsForKey.nowInSecondsFor(cfk, executeAt, true); List> results = new ArrayList<>(); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 6d180f82ec70..728ecbb2d943 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -85,13 +85,11 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; -import org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeyRows; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; -import static accord.impl.CommandsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.PreLoadContext.contextFor; import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.Util.spinAssertEquals; @@ -101,10 +99,10 @@ import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.UNIVERSAL; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS; -import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS_FOR_KEY; +import static org.apache.cassandra.service.accord.AccordKeyspace.DEPS_COMMANDS_FOR_KEY; +import static org.apache.cassandra.service.accord.AccordKeyspace.DepsCommandsForKeysAccessor; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -129,7 +127,7 @@ public class CompactionAccordIteratorsTest private static final TxnId GT_SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, SECOND_TXN_ID.hlc() + 1, NODE); static ColumnFamilyStore commands; - static ColumnFamilyStore commandsForKey; + static ColumnFamilyStore depsCommandsForKey; static TableMetadata table; static FullRoute route; Random random; @@ -150,8 +148,8 @@ public static void beforeClass() throws Throwable StorageService.instance.initServer(); commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS); commands.disableAutoCompaction(); - commandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); - commandsForKey.disableAutoCompaction(); + depsCommandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, DEPS_COMMANDS_FOR_KEY); + depsCommandsForKey.disableAutoCompaction(); table = ColumnFamilyStore.getIfExists("ks", "tbl").metadata(); route = AccordTestUtils.keys(table, 42).toRoute(AccordTestUtils.key(table, 42).toUnseekable()); } @@ -211,6 +209,12 @@ private void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBe }, false); } + @Test + public void testAccordTimestampsForKeyPurger() + { + throw new AssertionError("TODO -> see commented out parts of CFK tests"); + } + @Test public void testAccordCommandsForKeyPurgerSingleCompaction() throws Throwable { @@ -240,16 +244,16 @@ private static Consumer> expectedAccordCommandsForKeyNoChange() Partition partition = partitions.get(0); Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); assertEquals(4, Iterables.size(staticRow)); - assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); - assertEquals(TXN_ID, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); - assertEquals(TXN_ID, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); - assertEquals(TXN_ID.hlc(), CommandsForKeyRows.getLastExecutedMicros(staticRow)); +// assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); +// assertEquals(TXN_ID, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); +// assertEquals(TXN_ID, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); +// assertEquals(TXN_ID.hlc(), CommandsForKeyRows.getLastExecutedMicros(staticRow)); assertEquals(4, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); // One row per txn per series for (int i = 0; i < 2; i++) for (TxnId txnId : TXN_IDS) - assertEquals(txnId, CommandsForKeyRows.getTimestamp((Row)rows.next())); + assertEquals(txnId, DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); }; } @@ -261,14 +265,14 @@ private static Consumer> expectedAccordCommandsForKeyEraseOne() Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); // Only expect one column to remain because the second transaction is a read assertEquals(1, Iterables.size(staticRow)); - assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); - assertNull(CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); - assertNull(CommandsForKeyRows.getLastWriteTimestamp(staticRow)); - assertEquals(NO_LAST_EXECUTED_HLC, CommandsForKeyRows.getLastExecutedMicros(staticRow)); +// assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); +// assertNull(CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); +// assertNull(CommandsForKeyRows.getLastWriteTimestamp(staticRow)); +// assertEquals(NO_LAST_EXECUTED_HLC, CommandsForKeyRows.getLastExecutedMicros(staticRow)); assertEquals(2, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); - assertEquals(TXN_IDS[1], CommandsForKeyRows.getTimestamp((Row)rows.next())); - assertEquals(TXN_IDS[1], CommandsForKeyRows.getTimestamp((Row)rows.next())); + assertEquals(TXN_IDS[1], DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); + assertEquals(TXN_IDS[1], DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); }; } @@ -281,7 +285,7 @@ private void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Con { testWithCommandStore((commandStore) -> { IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, DurableBefore.EMPTY); - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, DEPS_COMMANDS_FOR_KEY); List result = compactCFS(mockAccordService, cfs); expectedResult.accept(result); }, true); @@ -405,7 +409,7 @@ private static void flush(AccordCommandStore commandStore) commandStore.cache().awaitSaveResults(); }); commands.forceBlockingFlush(FlushReason.UNIT_TESTS); - commandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); + depsCommandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); } private void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable @@ -464,7 +468,7 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC Iterator commandsTableIterator = commandsTable.iterator(); for (TxnId txnId : txnIds) assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); - UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); + UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + DEPS_COMMANDS_FOR_KEY + ";"); logger.info(commandsForKeyTable.toStringUnsafe()); assertEquals(txnIds.length * 2, commandsForKeyTable.size()); Iterator commandsForKeyTableIterator = commandsTable.iterator(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java index b270992cd099..cf5ab6e8af68 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java @@ -29,9 +29,14 @@ public class AccordCachingStateTest { static class CachingState extends AccordCachingState { + public CachingState(String key, int index) + { + super(key, index); + } + public CachingState(String key) { - super(key); + this(key, 0); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 928daf66960b..5131689845d3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -18,9 +18,12 @@ package org.apache.cassandra.service.accord; +import java.nio.ByteBuffer; +import java.util.NavigableMap; +import java.util.TreeMap; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.collect.Iterables; +import com.google.common.collect.ImmutableSortedMap; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -30,16 +33,23 @@ import accord.api.Key; import accord.api.Result; +import accord.impl.CommandTimeseries; import accord.impl.CommandsForKey; +import accord.impl.CommandsForKeys; +import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommonAttributes; +import accord.local.KeyHistory; +import accord.local.PreLoadContext; import accord.local.SaveStatus; import accord.messages.Apply; import accord.primitives.Ballot; +import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Route; +import accord.primitives.RoutableKey; import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -53,12 +63,14 @@ import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordCachingState.Modified; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.Pair; import static accord.local.Status.Durability.Majority; +import static com.google.common.collect.Iterables.getOnlyElement; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.preaccepted; import static org.apache.cassandra.service.accord.AccordTestUtils.ballot; @@ -115,6 +127,7 @@ public void commandLoadSave() throws Throwable attrs.partialTxn(txn); attrs.route(route); attrs.durability(Majority); + attrs.partialTxn(txn); Ballot promised = ballot(1, clock.incrementAndGet(), 1); Ballot accepted = ballot(1, clock.incrementAndGet(), 1); Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); @@ -154,42 +167,193 @@ public void commandLoadSave() throws Throwable } @Test - public void commandsForKeyLoadSave() + public void timestampsForKeyLoadSave() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); Timestamp maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); Command command1 = preaccepted(txnId1, txn, timestamp(1, clock.incrementAndGet(), 1)); Command command2 = preaccepted(txnId2, txn, timestamp(1, clock.incrementAndGet(), 1)); + AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); + tfk.initialize(); + tfk.updateMax(maxTimestamp); + + CommandsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, true); + Assert.assertEquals(txnId1.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId1, true)); + + CommandsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, true); + Assert.assertEquals(txnId2.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId2, true)); + + Assert.assertEquals(txnId2, tfk.current().lastExecutedTimestamp()); + Assert.assertEquals(txnId2.hlc(), tfk.lastExecutedMicros()); + AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); cfk.initialize(CommandsForKeySerializer.loader); - cfk.updateMax(maxTimestamp); - cfk.updateLastExecutionTimestamps(null, txnId1, true); - Assert.assertEquals(txnId1.hlc(), cfk.timestampMicrosFor(txnId1, true)); + AccordSafeCommandsForKeyUpdate ufk = new AccordSafeCommandsForKeyUpdate(loaded(key, null)); + ufk.initialize(); + + CommandsForKeys.registerCommand(tfk, ufk, command1); + CommandsForKeys.registerCommand(tfk, ufk, command2); + + AccordKeyspace.getTimestampsForKeyMutation(commandStore, tfk, commandStore.nextSystemTimestampMicros()).apply(); + logger.info("E: {}", tfk); + TimestampsForKey actual = AccordKeyspace.loadTimestampsForKey(commandStore, key); + logger.info("A: {}", actual); + + Assert.assertEquals(tfk.current(), actual); + } + + @Test + public void commandsForKeyLoadSave() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + PartialTxn txn = createPartialTxn(1); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); + TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); - cfk.updateLastExecutionTimestamps(null, txnId2, true); - Assert.assertEquals(txnId2.hlc(), cfk.timestampMicrosFor(txnId2, true)); + Command command1 = preaccepted(txnId1, txn, timestamp(1, clock.incrementAndGet(), 1)); + Command command2 = preaccepted(txnId2, txn, timestamp(1, clock.incrementAndGet(), 1)); + + AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); + tfk.initialize(); - Assert.assertEquals(txnId2, cfk.current().lastExecutedTimestamp()); - Assert.assertEquals(txnId2.hlc(), cfk.lastExecutedMicros()); + AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); + cfk.initialize(CommandsForKeySerializer.loader); + AccordSafeCommandsForKeyUpdate ufk = new AccordSafeCommandsForKeyUpdate(loaded(key, null)); + ufk.initialize(); - cfk.register(command1); - cfk.register(command2); + CommandsForKeys.registerCommand(tfk, ufk, command1); + CommandsForKeys.registerCommand(tfk, ufk, command2); - AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getCommandsForKeyMutation(commandStore.id(), ufk.setUpdates(), commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", cfk); - CommandsForKey actual = AccordKeyspace.loadCommandsForKey(commandStore, key); + CommandsForKey actual = AccordKeyspace.loadDepsCommandsForKey(commandStore, key); logger.info("A: {}", actual); - Assert.assertEquals(cfk.current(), actual); + Assert.assertEquals(ufk.applyToDeps(cfk.current()), actual); + } + + private static > NavigableMap toNavigableMap(V safeState) + { + TreeMap map = new TreeMap<>(); + map.put(safeState.key(), safeState); + return map; + } + + @Test + public void commandsForKeyUpdateTest() + { + // check that updates are reflected in CFKs without marking them modified + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + PartialTxn txn = createPartialTxn(1); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + + AccordSafeCommand safeCommand = commandStore.commandCache().acquireOrInitialize(txnId, t -> preaccepted(txnId, txn, timestamp(1, clock.incrementAndGet(), 1))); + AccordSafeTimestampsForKey timestamps = commandStore.timestampsForKeyCache().acquireOrInitialize(key, k -> new TimestampsForKey((Key) k)); + AccordSafeCommandsForKey commands = commandStore.depsCommandsForKeyCache().acquireOrInitialize(key, k -> new CommandsForKey((Key) k, CommandsForKeySerializer.loader)); + AccordSafeCommandsForKeyUpdate update = commandStore.updatesForKeyCache().acquireOrInitialize(key, CommandsForKeyUpdate::empty); + + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.commandCache().getUnsafe(txnId).status()); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.timestampsForKeyCache().getUnsafe(key).status()); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.depsCommandsForKeyCache().getUnsafe(key).status()); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.updatesForKeyCache().getUnsafe(key).status()); + + AccordSafeCommandStore safeStore = commandStore.beginOperation(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.DEPS), + toNavigableMap(safeCommand), + toNavigableMap(timestamps), + toNavigableMap(commands), + new TreeMap<>(), + toNavigableMap(update)); + + AccordSafeCommandsForKeyUpdate updates = safeStore.getOrCreateCommandsForKeyUpdate(key); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.updatesForKeyCache().getUnsafe(key).status()); + + Command initialCommand = safeCommand.current(); + CommandsForKey initialCFK = commands.current(); + CommandsForKeyUpdate initialUpdate = updates.current(); + + updates.common().commands().add(txnId, initialCommand); + + CommandsForKeyUpdate expected = new CommandsForKeyUpdate(key, updates.deps().toImmutable(), updates.all().toImmutable(), updates.common().toImmutable()); + Assert.assertEquals(1, expected.common().commands().numChanges()); + Assert.assertTrue(expected.deps().isEmpty()); + Assert.assertTrue(expected.all().isEmpty()); + + Assert.assertSame(initialCFK, commands.current()); + Assert.assertSame(initialUpdate, updates.current()); + + safeStore.postExecute(toNavigableMap(safeCommand), + toNavigableMap(timestamps), + toNavigableMap(commands), + new TreeMap<>(), + toNavigableMap(updates)); + safeStore.complete(); + + + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.commandCache().getUnsafe(txnId).status()); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.timestampsForKeyCache().getUnsafe(key).status()); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.depsCommandsForKeyCache().getUnsafe(key).status()); + Assert.assertEquals(AccordCachingState.Status.MODIFIED, commandStore.updatesForKeyCache().getUnsafe(key).status()); + + CommandsForKey finalCFK = commandStore.depsCommandsForKeyCache().getUnsafe(key).get(); + Assert.assertEquals(txnId, getOnlyElement(finalCFK.commands().commands.keySet())); + + Modified loadedUpdate = (Modified) commandStore.updatesForKeyCache().getUnsafe(key).state(); + Assert.assertNull(loadedUpdate.original); + Assert.assertEquals(expected, loadedUpdate.get()); + } + + /** + * Test that in memory cfk updates are applied to + */ + @Test + public void commandsForKeyUpdateOnLoadTest() + { + AtomicLong clock = new AtomicLong(0); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + + PartialTxn txn = createPartialTxn(1); + PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + + Command command = preaccepted(txnId, txn, timestamp(1, clock.incrementAndGet(), 1)); + + // make a cached update + AccordSafeCommandsForKeyUpdate updates = commandStore.updatesForKeyCache().acquireOrInitialize(key, k -> null); + updates.preExecute(); + updates.common().commands().remove(command.txnId()); + updates.setUpdates(); // apply the updates applied to the safe state to the cached value + updates.postExecute(); // apply the cached value to the global state + commandStore.updatesForKeyCache().release(updates); + + // make an out of date CFK + CommandTimeseries.CommandLoader loader = CommandsForKeySerializer.loader; + CommandsForKey staleCFK = CommandsForKey.SerializerSupport.create(key, loader, + ImmutableSortedMap.of(command.txnId(), loader.saveForCFK(command))); + + Assert.assertEquals(txnId, getOnlyElement(staleCFK.commands().commands.keySet())); + + // on loading the cfk into the cache, the in memory update should be applied + AccordSafeCommandsForKey commands = commandStore.depsCommandsForKeyCache().acquireOrInitialize(key, k -> new CommandsForKey((Key) k, CommandsForKeySerializer.loader)); + commands.preExecute(); + + Assert.assertEquals(txnId, getOnlyElement(staleCFK.commands().commands.keySet())); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index a13f17d49e99..516a89345af5 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -27,7 +27,9 @@ import accord.api.Key; import accord.api.RoutingKey; import accord.impl.CommandsForKey; +import accord.impl.TimestampsForKey; import accord.local.Command; +import accord.local.KeyHistory; import accord.local.Node; import accord.local.PreLoadContext; import accord.local.Status; @@ -116,10 +118,11 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.PreAccepted, command.status()); Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); - Assert.assertEquals(txnId, cfk.max()); - Assert.assertNotNull((cfk.byId()).get(txnId)); - Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); + TimestampsForKey tfk = ((AccordSafeCommandStore) instance).timestampsForKey(key(1)).current(); + Assert.assertEquals(txnId, tfk.max()); + + CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); + Assert.assertNotNull((cfk.commands()).get(txnId)); })); // check accept @@ -146,10 +149,11 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.Accepted, command.status()); Assert.assertEquals(deps, command.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); - Assert.assertEquals(executeAt, cfk.max()); - Assert.assertNotNull((cfk.byId()).get(txnId)); - Assert.assertNotNull((cfk.byExecuteAt()).get(txnId)); + TimestampsForKey tfk = ((AccordSafeCommandStore) instance).timestampsForKey(key(1)).current(); + Assert.assertEquals(executeAt, tfk.max()); + + CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); + Assert.assertNotNull((cfk.commands()).get(txnId)); })); // check commit @@ -157,15 +161,14 @@ public void basicCycleTest() throws Throwable commandStore.appendToJournal(commit); getUninterruptibly(commandStore.execute(commit, commit::apply)); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key)), instance -> { + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.DEPS), instance -> { Command command = instance.ifInitialised(txnId).current(); Assert.assertEquals(commit.executeAt, command.executeAt()); Assert.assertTrue(command.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, command.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); - Assert.assertNotNull((cfk.byId()).get(txnId)); - Assert.assertNotNull((cfk.byExecuteAt()).get(commit.executeAt)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); + Assert.assertNotNull((cfk.commands()).get(txnId)); })); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 5fc6f2305f48..5d632c0fb7a2 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -188,7 +188,7 @@ public void testAcquisitionAndRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("1"); @@ -220,9 +220,9 @@ public void testCachingMetricsWithTwoInstances() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance stringInstance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true,String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true,String::length); AccordStateCache.Instance intInstance = - cache.instance(Integer.class, Integer.class, SafeInt::new, key -> key, (original, current) -> null, (k, v) -> true,ignored -> Integer.BYTES); + cache.instance(Integer.class, SafeInt.class, SafeInt::new, key -> key, (original, current) -> null, (k, v) -> true,ignored -> Integer.BYTES); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = stringInstance.acquire("1"); @@ -260,7 +260,7 @@ public void testRotation() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[3]; @@ -300,7 +300,7 @@ public void testEvictionOnAcquire() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -326,8 +326,8 @@ public void testEvictionOnAcquire() assertCacheState(cache, 1, 5, nodeSize(1) * 4 + nodeSize(0)); Assert.assertSame(items[1].global, cache.head()); Assert.assertSame(items[4].global, cache.tail()); - Assert.assertFalse(cache.keyIsCached("0")); - Assert.assertFalse(cache.keyIsReferenced("0")); + Assert.assertFalse(instance.keyIsCached("0", SafeString.class)); + Assert.assertFalse(instance.keyIsReferenced("0", SafeString.class)); assertCacheMetrics(cache.metrics, 0, 6, 6); assertCacheMetrics(instance.instanceMetrics, 0, 6, 6); @@ -346,7 +346,7 @@ public void testEvictionOnRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -385,7 +385,7 @@ public void testMultiAcquireRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("0"); @@ -394,12 +394,12 @@ public void testMultiAcquireRelease() assertCacheMetrics(cache.metrics, 0, 1, 1); assertCacheMetrics(instance.instanceMetrics, 0, 1, 1); - Assert.assertEquals(1, cache.references("0")); + Assert.assertEquals(1, instance.references("0", SafeString.class)); assertCacheState(cache, 1, 1, nodeSize(0)); SafeString safeString2 = instance.acquire("0"); Assert.assertEquals(Status.LOADED, safeString1.globalStatus()); - Assert.assertEquals(2, cache.references("0")); + Assert.assertEquals(2, instance.references("0", SafeString.class)); assertCacheState(cache, 1, 1, nodeSize(0)); assertCacheMetrics(cache.metrics, 1, 1, 2); assertCacheMetrics(instance.instanceMetrics, 1, 1, 2); @@ -416,7 +416,7 @@ public void evictionBlockedOnSaving() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3), cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString item = instance.acquire(Integer.toString(0)); @@ -443,10 +443,10 @@ public void evictionBlockedOnSaving() // all should have been evicted except 0 assertCacheState(cache, 0, 1, nodeSize(2)); - Assert.assertTrue(cache.keyIsCached("0")); - Assert.assertFalse(cache.keyIsCached("1")); - Assert.assertFalse(cache.keyIsCached("2")); - Assert.assertFalse(cache.keyIsCached("3")); + Assert.assertTrue(instance.keyIsCached("0", SafeString.class)); + Assert.assertFalse(instance.keyIsCached("1", SafeString.class)); + Assert.assertFalse(instance.keyIsCached("2", SafeString.class)); + Assert.assertFalse(instance.keyIsCached("3", SafeString.class)); } @Test @@ -455,7 +455,7 @@ public void testUpdates() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, String.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString = instance.acquire("1"); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 64907c0209f6..33255a0f0f57 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -139,9 +139,9 @@ public static CommandsForKey commandsForKey(Key key) return new CommandsForKey(key, CommandsForKeySerializer.loader); } - public static AccordCachingState loaded(K key, V value) + public static AccordCachingState loaded(K key, V value, int index) { - AccordCachingState global = new AccordCachingState<>(key); + AccordCachingState global = new AccordCachingState<>(key, index); global.load(ImmediateExecutor.INSTANCE, k -> { Assert.assertEquals(key, k); return value; @@ -150,6 +150,11 @@ public static AccordCachingState loaded(K key, V value) return global; } + public static AccordCachingState loaded(K key, V value) + { + return loaded(key, value, 0); + } + public static AccordSafeCommand safeCommand(Command command) { AccordCachingState global = loaded(command.txnId(), command); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index e1f5e317f031..569de8239296 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -28,8 +28,10 @@ import org.junit.BeforeClass; import org.junit.Test; -import accord.impl.CommandsForKey; +import accord.api.Key; +import accord.impl.TimestampsForKey; import accord.local.Command; +import accord.local.KeyHistory; import accord.primitives.Keys; import accord.primitives.PartialTxn; import accord.primitives.RoutableKey; @@ -45,16 +47,19 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordSafeCommand; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKeyUpdate; +import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.CommandsForKeyUpdate; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation.Context; import org.apache.cassandra.utils.concurrent.AsyncPromise; +import static java.util.Collections.emptyList; import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.notDefined; -import static org.apache.cassandra.service.accord.AccordTestUtils.commandsForKey; +import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.preaccepted; import static org.apache.cassandra.service.accord.AccordTestUtils.createAccordCommandStore; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; import static org.apache.cassandra.service.accord.AccordTestUtils.execute; @@ -86,7 +91,7 @@ public void cachedTest() AccordStateCache.Instance commandCache = commandStore.commandCache(); commandStore.executeBlocking(() -> commandStore.setCapacity(1024)); - AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); + AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); @@ -98,24 +103,24 @@ public void cachedTest() testLoad(executor, safeCommand, notDefined(txnId, txn)); commandCache.release(safeCommand); - cfkCache.unsafeSetLoadFunction(k -> commandsForKey((PartitionKey) k)); - AccordSafeCommandsForKey safeCfk = cfkCache.acquire(key); - testLoad(executor, safeCfk, commandsForKey(key)); - cfkCache.release(safeCfk); + timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); + AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquire(key); + testLoad(executor, safeTimestamps, new TimestampsForKey(key)); + timestampsCache.release(safeTimestamps); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { Context context = new Context(); boolean result = loader.load(context, (o, t) -> Assert.fail()); Assert.assertEquals(safeCommand.global(), context.commands.get(txnId).global()); - Assert.assertEquals(safeCfk.global(), context.commandsForKeys.get(key).global()); + Assert.assertEquals(safeTimestamps.global(), context.timestampsForKey.get(key).global()); Assert.assertTrue(result); }); Assert.assertSame(safeCommand.global(), commandCache.getUnsafe(txnId)); - Assert.assertSame(safeCfk.global(), cfkCache.getUnsafe(key)); + Assert.assertSame(safeTimestamps.global(), timestampsCache.getUnsafe(key)); } /** @@ -136,20 +141,21 @@ public void loadTest() safeCommand.set(notDefined(txnId, txn)); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); - AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); - safeCommand.preExecute(); - cfk.set(commandsForKey(key)); - AccordKeyspace.getCommandsForKeyMutation(commandStore, cfk, commandStore.nextSystemTimestampMicros()).apply(); + AccordSafeTimestampsForKey timestamps = new AccordSafeTimestampsForKey(loaded(key, null)); + timestamps.preExecute(); + timestamps.initialize(); + + AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, timestamps.current(), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.DEPS); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -161,7 +167,7 @@ public void loadTest() commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); }); } @@ -187,20 +193,17 @@ public void partialLoadTest() testLoad(executor, safeCommand, notDefined(txnId, txn)); commandCache.release(safeCommand); - - AccordSafeCommandsForKey safeCfk = new AccordSafeCommandsForKey(loaded(key, null)); - safeCfk.set(commandsForKey(key)); - AccordKeyspace.getCommandsForKeyMutation(commandStore, safeCfk, commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, new TimestampsForKey(key), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -214,7 +217,7 @@ public void partialLoadTest() boolean result = loader.load(context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); }); } @@ -231,16 +234,16 @@ public void inProgressLoadTest() throws Throwable createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); commandStore.executor().submit(() -> commandStore.setCapacity(1024)).get(); AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance cfkCache = commandStore.commandsForKeyCache(); + AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); // acquire / release - cfkCache.unsafeSetLoadFunction(k -> commandsForKey((PartitionKey) k)); - AccordSafeCommandsForKey safeCfk = cfkCache.acquire(key); - testLoad(executor, safeCfk, commandsForKey(key)); - cfkCache.release(safeCfk); + timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); + AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquire(key); + testLoad(executor, safeTimestamps, new TimestampsForKey(key)); + timestampsCache.release(safeTimestamps); commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notDefined(id, txn); }); AccordSafeCommand safeCommand = commandCache.acquire(txnId); @@ -248,7 +251,7 @@ public void inProgressLoadTest() throws Throwable Assert.assertTrue(commandCache.isReferenced(txnId)); Assert.assertFalse(commandCache.isLoaded(txnId)); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key)); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); // since there's a read future associated with the txnId, we'll wait for it to load AsyncPromise cbFired = new AsyncPromise<>(); @@ -257,7 +260,7 @@ public void inProgressLoadTest() throws Throwable boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -273,7 +276,7 @@ public void inProgressLoadTest() throws Throwable commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.commandsForKeys.containsKey(key)); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); }); } @@ -303,7 +306,7 @@ else if (txnId.equals(txnId2)) throw new AssertionError("Unknown txnId: " + txnId); }); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY); + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY, KeyHistory.DEPS); boolean result = loader.load(new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); @@ -318,4 +321,118 @@ else if (txnId.equals(txnId2)) promise.tryFailure(failure); AsyncChains.getUninterruptibly(callback); } + + @Test + public void inProgressCommandSaveTest() + { + AtomicLong clock = new AtomicLong(0); + ManualExecutor executor = new ManualExecutor(); + AccordCommandStore commandStore = + createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); + AccordStateCache.Instance commandCache = commandStore.commandCache(); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + + // acquire / release + + commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); + commandCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + + AccordSafeCommand safeCommand = commandCache.acquire(txnId); + testLoad(executor, safeCommand, notDefined(txnId, txn)); + safeCommand.set(preaccepted(txnId, txn, safeCommand.txnId())); + commandCache.release(safeCommand); + + Assert.assertEquals(AccordCachingState.Status.MODIFIED, commandCache.getUnsafe(txnId).status()); + commandCache.getUnsafe(txnId).save(executor, (before, after) -> () -> {}); + Assert.assertEquals(AccordCachingState.Status.SAVING, commandCache.getUnsafe(txnId).status()); + + // since the command is still saving, the loader shouldn't be able to acquire a reference + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(), KeyHistory.NONE); + AsyncPromise cbFired = new AsyncPromise<>(); + Context context = new Context(); + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> { + Assert.assertNull(t); + Assert.assertTrue(context.commands.containsKey(txnId)); + cbFired.setSuccess(null); + }); + Assert.assertFalse(result); + }); + + Assert.assertEquals(AccordCachingState.Status.SAVING, commandCache.getUnsafe(txnId).status()); + executor.runOne(); + cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); + Assert.assertEquals(AccordCachingState.Status.LOADED, commandCache.getUnsafe(txnId).status()); + + // then return immediately after the callback has fired + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(context.commands.containsKey(txnId)); + Assert.assertTrue(result); + }); + } + + @Test + public void inProgressCFKSaveTest() + { + AtomicLong clock = new AtomicLong(0); + ManualExecutor executor = new ManualExecutor(); + AccordCommandStore commandStore = + createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); + + AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); + timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); + timestampsCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + + AccordStateCache.Instance updateCache = commandStore.updatesForKeyCache(); + updateCache.unsafeSetLoadFunction(k -> { throw new AssertionError("updates shouldn't be loaded"); }); + updateCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + PartialTxn txn = createPartialTxn(0); + PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + Command preaccepted = preaccepted(txnId, txn, txnId); + + // acquire / release + + AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquireOrInitialize(key, k -> new TimestampsForKey((Key) k)); + timestampsCache.release(safeTimestamps); + Assert.assertEquals(AccordCachingState.Status.LOADED, timestampsCache.getUnsafe(key).status()); + + AccordSafeCommandsForKeyUpdate safeUpdate = updateCache.acquireOrInitialize(key, CommandsForKeyUpdate::empty); + safeUpdate.common().commands().add(txnId, preaccepted); + safeUpdate.setUpdates(); + updateCache.release(safeUpdate); + + Assert.assertEquals(AccordCachingState.Status.MODIFIED, updateCache.getUnsafe(key).status()); + updateCache.getUnsafe(key).save(executor, (before, after) -> () -> {}); + Assert.assertEquals(AccordCachingState.Status.SAVING, updateCache.getUnsafe(key).status()); + + // since the command is still saving, the loader shouldn't be able to acquire a reference + AsyncLoader loader = new AsyncLoader(commandStore, emptyList(), Keys.of(key), KeyHistory.NONE); + AsyncPromise cbFired = new AsyncPromise<>(); + Context context = new Context(); + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> { + Assert.assertNull(t); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); + cbFired.setSuccess(null); + }); + Assert.assertFalse(result); + }); + + Assert.assertEquals(AccordCachingState.Status.SAVING, updateCache.getUnsafe(key).status()); + executor.runOne(); + cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); + Assert.assertEquals(AccordCachingState.Status.LOADED, updateCache.getUnsafe(key).status()); + + // then return immediately after the callback has fired + commandStore.executeBlocking(() -> { + boolean result = loader.load(context, (o, t) -> Assert.fail()); + Assert.assertTrue(context.timestampsForKey.containsKey(key)); + Assert.assertTrue(result); + }); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 086c9537fea8..66f796e4a7a5 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -68,6 +68,7 @@ import org.apache.cassandra.db.transform.FilteredPartitions; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordCommandStore; @@ -111,8 +112,10 @@ public static void beforeClass() throws Throwable @Before public void before() { - QueryProcessor.executeInternal("TRUNCATE system_accord.commands"); - QueryProcessor.executeInternal("TRUNCATE system_accord.commands_for_key"); + QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS)); + QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.TIMESTAMPS_FOR_KEY)); + QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.DEPS_COMMANDS_FOR_KEY)); + QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.ALL_COMMANDS_FOR_KEY)); } /** @@ -146,12 +149,12 @@ public void optionalCommandsForKeyTest() throws Throwable PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); getUninterruptibly(commandStore.execute(contextFor(key), instance -> { - SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeCommandsForKey(key); + SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeDepsCommandsForKey(key); Assert.assertNull(cfk); })); long nowInSeconds = FBUtilities.nowInSeconds(); - SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore.id(), key, nowInSeconds); + SinglePartitionReadCommand command = AccordKeyspace.getDepsCommandsForKeyRead(commandStore.id(), key, (int) nowInSeconds); try(ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { @@ -277,7 +280,7 @@ private AccordStateCache.Instance cache() @Override AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) { - return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys()) + return new AsyncLoader(commandStore, txnIds(preLoadContext), preLoadContext.keys(), preLoadContext.keyHistory()) { @Override void state(State state) @@ -422,7 +425,7 @@ private static void assertNoReferences(AccordCommandStore commandStore, List) (Iterable) keys); + assertNoReferences(commandStore.depsCommandsForKeyCache(), (Iterable) (Iterable) keys); } catch (AssertionError e) { @@ -464,7 +467,7 @@ private static void awaitDone(AccordCommandStore commandStore, List ids, { awaitDone(commandStore.commandCache(), ids); //TODO this is due to bad typing for Instance, it doesn't use ? extends RoutableKey - awaitDone(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); + awaitDone(commandStore.depsCommandsForKeyCache(), (Iterable) (Iterable) keys); } private static void awaitDone(AccordStateCache.Instance cache, Iterable keys) From a9cb71833a0009e09a40dd869673d3205fc50d89 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Wed, 29 Nov 2023 17:12:46 -0500 Subject: [PATCH 084/340] Fix additional live migration/interop merge issues s fc83325fa9 Fix AccordObjectSizes empty sizes s 44bce6a08c Fix error handling when there are no column families to repair s f1459540e5 Fix broken nowInSec deserialization s 91c1befd1c Fix Mutation serializedSize s b11467c200 Using simulated clock instead of raw nanoTime s 6412b35924 Accord repair needs to use sentinel tokens s d19c01ff01 Enable Accord repair with HappyPathFuzzTest s 2da87f91b8 Implement abort in AccordRepairJob s e12877c4fa move barrier to accord spec Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-19023 --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 15 ++ .../config/CassandraRelevantProperties.java | 1 + .../org/apache/cassandra/config/Config.java | 15 -- .../cassandra/config/DatabaseDescriptor.java | 8 +- .../db/virtual/LocalRepairTables.java | 26 +--- .../cassandra/repair/AbstractRepairJob.java | 2 +- .../cassandra/repair/AccordRepairJob.java | 43 +++--- .../cassandra/repair/RepairCoordinator.java | 1 + .../repair/messages/RepairOption.java | 10 ++ .../repair/state/CoordinatorState.java | 24 ++++ .../service/accord/AccordService.java | 34 +++-- .../service/accord/AccordTopologyUtils.java | 13 +- .../apache/cassandra/tcm/ClusterMetadata.java | 10 ++ .../org/apache/cassandra/ServerTestUtils.java | 37 ++++- .../repair/FailingRepairFuzzTest.java | 10 +- .../apache/cassandra/repair/FuzzTestBase.java | 135 ++++++++++++++++-- .../cassandra/repair/HappyPathFuzzTest.java | 29 ++++ 18 files changed, 329 insertions(+), 86 deletions(-) diff --git a/modules/accord b/modules/accord index d9ef555302f8..3ca9e5502419 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit d9ef555302f8774ed03325ba22d38ee0b80130a8 +Subproject commit 3ca9e55024192e9b7c38ad5330229830343b74f2 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index b025d561b66d..2143342bd44b 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -27,4 +27,19 @@ public class AccordSpec public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; public volatile DurationSpec.IntSecondsBound progress_log_schedule_delay = new DurationSpec.IntSecondsBound(1); + + /** + * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up + */ + public int barrier_retry_attempts = 5; + + /** + * When a barrier transaction fails how long the initial backoff should be before being increased + * as part of exponential backoff on each attempt + */ + public DurationSpec.IntMillisecondsBound barrier_retry_inital_backoff_millis = new DurationSpec.IntMillisecondsBound("1s"); + + public DurationSpec.IntMillisecondsBound barrier_max_backoff = new DurationSpec.IntMillisecondsBound("10m"); + + public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); } diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index c79e7e27469e..f1f50e589f8c 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -37,6 +37,7 @@ /** A class that extracts system properties for the cassandra node it runs within. */ public enum CassandraRelevantProperties { + ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL("cassandra.accord.repair.range_step_update_interval", "100"), ACQUIRE_RETRY_SECONDS("cassandra.acquire_retry_seconds", "60"), ACQUIRE_SLEEP_MS("cassandra.acquire_sleep_ms", "1000"), ALLOCATE_TOKENS_FOR_KEYSPACE("cassandra.allocate_tokens_for_keyspace"), diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 2dd95248feaa..3352ce9fed7c 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -1170,21 +1170,6 @@ public enum PaxosOnLinearizabilityViolation public LWTStrategy lwt_strategy = LWTStrategy.migration; public NonSerialWriteStrategy non_serial_write_strategy = NonSerialWriteStrategy.normal; - /** - * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up - */ - public int accord_barrier_retry_attempts = 5; - - /** - * When a barrier transaction fails how long the initial backoff should be before being increased - * as part of exponential backoff on each attempt - */ - public DurationSpec.IntMillisecondsBound accord_barrier_retry_inital_backoff_millis = new DurationSpec.IntMillisecondsBound("1s"); - - public DurationSpec.IntMillisecondsBound accord_barrier_max_backoff = new DurationSpec.IntMillisecondsBound("10m"); - - public DurationSpec.IntMillisecondsBound accord_range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); - public volatile int max_top_size_partition_count = 10; public volatile int max_top_tombstone_partition_count = 10; public volatile DataStorageSpec.LongBytesBound min_tracked_partition_size = new DataStorageSpec.LongBytesBound("1MiB"); diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 757407ad7bac..12da2dae429b 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -3676,22 +3676,22 @@ public static void setNonSerialWriteStrategy(NonSerialWriteStrategy nonSerialWri public static int getAccordBarrierRetryAttempts() { - return conf.accord_barrier_retry_attempts; + return conf.accord.barrier_retry_attempts; } public static long getAccordBarrierRetryInitialBackoffMillis() { - return conf.accord_barrier_retry_inital_backoff_millis.toMilliseconds(); + return conf.accord.barrier_retry_inital_backoff_millis.toMilliseconds(); } public static long getAccordBarrierRetryMaxBackoffMillis() { - return conf.accord_barrier_max_backoff.toMilliseconds(); + return conf.accord.barrier_max_backoff.toMilliseconds(); } public static long getAccordRangeBarrierTimeoutNanos() { - return conf.accord_range_barrier_timeout.to(TimeUnit.NANOSECONDS); + return conf.accord.range_barrier_timeout.to(TimeUnit.NANOSECONDS); } public static void setNativeTransportMaxRequestDataInFlightPerIpInBytes(long maxRequestDataInFlightInBytes) diff --git a/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java b/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java index 0012382bd9cc..34aeccf128c2 100644 --- a/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java +++ b/src/java/org/apache/cassandra/db/virtual/LocalRepairTables.java @@ -136,7 +136,7 @@ private void updateDataset(SimpleDataSet result, CoordinatorState state) { result.row(state.id); addState(result, state); - result.column("type", getType(state)); + result.column("type", state.getType()); result.column("keyspace_name", state.keyspace); result.column("command_id", state.cmd); @@ -169,30 +169,6 @@ private void updateDataset(SimpleDataSet result, CoordinatorState state) ranges = state.getCommonRanges(); result.column("unfiltered_ranges", ranges == null ? null : ranges.stream().map(c -> c.ranges).map(LocalRepairTables::toStringList).collect(Collectors.toList())); } - - private String getType(CoordinatorState state) - { - if (state.options.isPreview()) - { - switch (state.options.getPreviewKind()) - { - case ALL: return "preview full"; - case REPAIRED: return "preview repaired"; - case UNREPAIRED: return "preview unrepaired"; - case NONE: throw new AssertionError("NONE preview kind not expected when preview repair is set"); - default: throw new AssertionError("Unknown preview kind: " + state.options.getPreviewKind()); - } - } - else if (state.options.accordRepair()) - { - return "accord repair"; - } - else if (state.options.isIncremental()) - { - return "incremental"; - } - return "full"; - } } private static final class SessionTable extends AbstractVirtualTable diff --git a/src/java/org/apache/cassandra/repair/AbstractRepairJob.java b/src/java/org/apache/cassandra/repair/AbstractRepairJob.java index 7c4346e3bf5e..df3a67dbc99d 100644 --- a/src/java/org/apache/cassandra/repair/AbstractRepairJob.java +++ b/src/java/org/apache/cassandra/repair/AbstractRepairJob.java @@ -28,7 +28,7 @@ public abstract class AbstractRepairJob extends AsyncFuture implements Runnable { - private final SharedContext ctx; + protected final SharedContext ctx; public final JobState state; protected final RepairJobDesc desc; protected final RepairSession session; diff --git a/src/java/org/apache/cassandra/repair/AccordRepairJob.java b/src/java/org/apache/cassandra/repair/AccordRepairJob.java index 1736d799fa7b..c2e9eb792ec4 100644 --- a/src/java/org/apache/cassandra/repair/AccordRepairJob.java +++ b/src/java/org/apache/cassandra/repair/AccordRepairJob.java @@ -19,27 +19,27 @@ package org.apache.cassandra.repair; import java.math.BigInteger; -import java.util.List; import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.api.BarrierType; import accord.api.RoutingKey; import accord.primitives.Ranges; import accord.primitives.Seekables; import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.AccordTopologyUtils; import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import static com.google.common.base.Preconditions.checkState; import static java.util.Collections.emptyList; -import static org.apache.cassandra.utils.Clock.Global.nanoTime; +import static org.apache.cassandra.config.CassandraRelevantProperties.ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL; /* * Accord repair consists of creating a barrier transaction for all the ranges which ensure that all Accord transactions @@ -47,6 +47,8 @@ */ public class AccordRepairJob extends AbstractRepairJob { + private static final Logger logger = LoggerFactory.getLogger(AccordRepairJob.class); + public static final BigInteger TWO = BigInteger.valueOf(2); private final Ranges ranges; @@ -57,16 +59,14 @@ public class AccordRepairJob extends AbstractRepairJob private Epoch minEpoch = ClusterMetadata.current().epoch; + private volatile Throwable shouldAbort = null; + public AccordRepairJob(RepairSession repairSession, String cfname) { super(repairSession, cfname); - List> normalizedRanges = Range.normalize(desc.ranges); - IPartitioner partitioner = normalizedRanges.get(0).left.getPartitioner(); - TokenRange[] tokenRanges = new TokenRange[normalizedRanges.size()]; - for (int i = 0; i < normalizedRanges.size(); i++) - tokenRanges[i] = new TokenRange(new TokenKey(ks.getName(), normalizedRanges.get(i).left), new TokenKey(ks.getName(), normalizedRanges.get(i).right)); - this.ranges = Ranges.of(tokenRanges); - this.splitter = partitioner.accordSplitter().apply(Ranges.of(tokenRanges)); + IPartitioner partitioner = desc.ranges.iterator().next().left.getPartitioner(); + this.ranges = AccordTopologyUtils.toAccordRanges(desc.keyspace, desc.ranges); + this.splitter = partitioner.accordSplitter().apply(ranges); } @Override @@ -91,15 +91,19 @@ protected void runRepair() @Override void abort(@Nullable Throwable reason) { - throw new UnsupportedOperationException("Have not implemented this yet, and the job runs synchronously so it isn't abortable"); + shouldAbort = reason == null ? new RuntimeException("Abort") : reason; } - private void repairRange(TokenRange range) + private void repairRange(TokenRange range) throws Throwable { + int rangeStepUpdateInterval = ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL.getInt(); RoutingKey remainingStart = range.start(); BigInteger rangeSize = splitter.sizeOf(range); if (rangeStep == null) - rangeStep = BigInteger.ONE.max(splitter.divide(rangeSize, 1000)); + { + BigInteger divide = splitter.divide(rangeSize, 1000); + rangeStep = divide.equals(BigInteger.ZERO) ? rangeSize : BigInteger.ONE.max(divide); + } BigInteger offset = BigInteger.ZERO; @@ -107,14 +111,16 @@ private void repairRange(TokenRange range) int iteration = 0; while (true) { + if (shouldAbort != null) + throw shouldAbort; iteration++; - if (iteration % 100 == 0) + if (iteration % rangeStepUpdateInterval == 0) rangeStep = rangeStep.multiply(TWO); BigInteger remaining = rangeSize.subtract(offset); BigInteger length = remaining.min(rangeStep); - long start = nanoTime(); + long start = ctx.clock().nanoTime(); boolean dependencyOverflow = false; try { @@ -123,7 +129,10 @@ private void repairRange(TokenRange range) if (splitter.compare(offset, rangeSize) >= 0) { if (remainingStart.equals(range.end())) + { + logger.info("Completed barriers for {} in {} iterations", range, iteration - 1); return; + } // Final repair is whatever remains toRepair = range.newRange(remainingStart, range.end()); diff --git a/src/java/org/apache/cassandra/repair/RepairCoordinator.java b/src/java/org/apache/cassandra/repair/RepairCoordinator.java index 79159c9fe10e..c2d2415a7c67 100644 --- a/src/java/org/apache/cassandra/repair/RepairCoordinator.java +++ b/src/java/org/apache/cassandra/repair/RepairCoordinator.java @@ -301,6 +301,7 @@ private void runMayThrow() throws Throwable this.traceState = maybeCreateTraceState(columnFamilies); notifyStarting(); NeighborsAndRanges neighborsAndRanges = getNeighborsAndRanges(); + // We test to validate the start JMX notification is seen before we compute neighbors and ranges // but in state (vtable) tracking, we rely on getNeighborsAndRanges to know where we are running repair... // JMX start != state start, its possible we fail in getNeighborsAndRanges and state start is never reached diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java index bef7acfe16d7..626c2d18b56d 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java @@ -350,6 +350,16 @@ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean this.accordRepair = accordRepair; } + public RepairOption withAccordRepair(boolean accordRepair) + { + RepairOption repairOption = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, pullRepair, forceRepair, previewKind, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); + repairOption.columnFamilies.addAll(columnFamilies); + repairOption.dataCenters.addAll(dataCenters); + repairOption.hosts.addAll(hosts); + repairOption.ranges.addAll(ranges); + return repairOption; + } + public RepairParallelism getParallelism() { return parallelism; diff --git a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java index 5bc8a9e5d86a..43d17acca6e6 100644 --- a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java +++ b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java @@ -64,6 +64,30 @@ public CoordinatorState(Clock clock, int cmd, String keyspace, RepairOption opti this.options = Objects.requireNonNull(options); } + public String getType() + { + if (options.isPreview()) + { + switch (options.getPreviewKind()) + { + case ALL: return "preview full"; + case REPAIRED: return "preview repaired"; + case UNREPAIRED: return "preview unrepaired"; + case NONE: throw new AssertionError("NONE preview kind not expected when preview repair is set"); + default: throw new AssertionError("Unknown preview kind: " + options.getPreviewKind()); + } + } + else if (options.accordRepair()) + { + return "accord repair"; + } + else if (options.isIncremental()) + { + return "incremental"; + } + return "full"; + } + public Collection getSessions() { return sessions.values(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index fd115f8943bb..ce0f6f6f1e06 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -210,16 +210,17 @@ public Pair, DurableBefore> getRedundantBefor public void ensureKeyspaceIsAccordManaged(String keyspace) {} }; - private static volatile Node.Id localId = null; + private static volatile IAccordService instance = null; - private static class Handle + @VisibleForTesting + public static void unsafeSetNewAccordService() { - public static final AccordService instance = new AccordService(); + instance = null; } public static boolean isSetup() { - return localId != null; + return instance != null; } public static IVerbHandler verbHandlerOrNoop() @@ -228,22 +229,33 @@ public static IVerbHandler verbHandlerOrNoop() return instance().verbHandler(); } - public static void startup(NodeId tcmId) + public synchronized static void startup(NodeId tcmId) { - localId = AccordTopologyUtils.tcmIdToAccord(tcmId); - instance().startup(); + if (!DatabaseDescriptor.getAccordTransactionsEnabled()) + { + instance = NOOP_SERVICE; + return; + } + AccordService as = new AccordService(AccordTopologyUtils.tcmIdToAccord(tcmId)); + as.startup(); + instance = as; } public static void shutdownServiceAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { - if (localId == null) + IAccordService i = instance; + if (i == null) return; - instance().shutdownAndWait(timeout, unit); + i.shutdownAndWait(timeout, unit); } public static IAccordService instance() { - return DatabaseDescriptor.getAccordTransactionsEnabled() ? Handle.instance : NOOP_SERVICE; + if (!DatabaseDescriptor.getAccordTransactionsEnabled()) + return NOOP_SERVICE; + IAccordService i = instance; + Invariants.checkState(i != null, "AccordService was not started"); + return i; } public static long uniqueNow() @@ -258,7 +270,7 @@ public static long unix(TimeUnit timeUnit) return timeUnit.convert(Clock.Global.currentTimeMillis(), TimeUnit.MILLISECONDS); } - private AccordService() + private AccordService(Id localId) { Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); logger.info("Starting accord with nodeId {}", localId); diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java index d8b757941ad9..e88587411b98 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -30,6 +31,7 @@ import com.google.common.collect.Sets; import accord.local.Node; +import accord.primitives.Ranges; import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; @@ -49,7 +51,7 @@ public class AccordTopologyUtils { - static Node.Id tcmIdToAccord(NodeId nodeId) + public static Node.Id tcmIdToAccord(NodeId nodeId) { return new Node.Id(nodeId.id()); } @@ -95,6 +97,15 @@ static TokenRange range(String keyspace, Range range) range.right.equals(minToken) ? SentinelKey.max(keyspace) : new TokenKey(keyspace, range.right)); } + public static accord.primitives.Ranges toAccordRanges(String keyspace, Collection> ranges) + { + List> normalizedRanges = Range.normalize(ranges); + TokenRange[] tokenRanges = new TokenRange[normalizedRanges.size()]; + for (int i = 0; i < normalizedRanges.size(); i++) + tokenRanges[i] = range(keyspace, normalizedRanges.get(i)); + return Ranges.of(tokenRanges); + } + public static List createShards(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory) { ReplicationParams replication = keyspace.params.replication; diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 2b9dd0a04d9a..8dfe9a6790ad 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -198,6 +198,16 @@ private ClusterMetadata(int metadataIdentifier, this.locator = Locator.usingDirectory(directory); } + public ClusterMetadata withDirectory(Directory directory) + { + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + } + + public ClusterMetadata withPlacements(DataPlacements placements) + { + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + } + public Set fullCMSMembers() { if (fullCMSEndpoints == null) diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index 130053658e01..337974345253 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -25,8 +25,10 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.function.Function; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; +import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +54,7 @@ import org.apache.cassandra.security.ThreadAwareSecurityManager; import org.apache.cassandra.service.DiskErrorsHandlerService; import org.apache.cassandra.service.EmbeddedCassandraService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.tcm.AtomicLongBackedProcessor; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -73,8 +76,10 @@ import org.apache.cassandra.tcm.transformations.cms.Initialize; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Sortable; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; /** * Utility methodes used by SchemaLoader and CQLTester to manage the server and its state. @@ -340,6 +345,36 @@ public static void recreateCMS() cms.mark(); } + public static void recreateAccord(NodeId tcmid) + { + if (!DatabaseDescriptor.getAccordTransactionsEnabled()) + return; + if (AccordService.isSetup()) + { + try + { + AccordService.instance().shutdownAndWait(1, TimeUnit.MINUTES); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (TimeoutException e) + { + throw new RuntimeException(e); + } + + Keyspace ks = Keyspace.open(ACCORD_KEYSPACE_NAME); + FBUtilities.waitOnFutures(ks.flush(ColumnFamilyStore.FlushReason.UNIT_TESTS)); + cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); + for (ColumnFamilyStore t : ks.getColumnFamilyStores()) + t.truncateBlockingWithoutSnapshot(); + + AccordService.unsafeSetNewAccordService(); + } + AccordService.startup(tcmid); + } + public static void markCMS() { ClusterMetadataService cms = ClusterMetadataService.instance(); diff --git a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java index cc0d781898f4..6ee81d154581 100644 --- a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java @@ -34,6 +34,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.state.Completable; import org.apache.cassandra.streaming.StreamEventHandler; import org.apache.cassandra.streaming.StreamState; @@ -64,7 +65,14 @@ public void failingRepair() { Cluster.Node coordinator = coordinatorGen.next(rs); - RepairCoordinator repair = coordinator.repair(KEYSPACE, repairOption(rs, coordinator, KEYSPACE, TABLES), false); + // exclude accord repair as this test breaks validation/sync; which accord doesn't have + RepairOption options; + do + { + options = repairOption(rs, coordinator, KEYSPACE, TABLES); + } + while (options.accordRepair()); + RepairCoordinator repair = coordinator.repair(KEYSPACE, options, false); repair.run(); InetAddressAndPort failingAddress = pickParticipant(rs, coordinator, repair); Cluster.Node failingNode = cluster.nodes.get(failingAddress); diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 9eafb67d9b59..7dcf4c640418 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -52,7 +52,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.cassandra.config.UnitConfigOverride; import org.junit.BeforeClass; import accord.utilsfork.DefaultRandom; @@ -73,6 +72,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.UnitConfigOverride; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Digest; @@ -99,6 +99,7 @@ import org.apache.cassandra.locator.LocalStrategy; import org.apache.cassandra.locator.Locator; import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.locator.Replica; import org.apache.cassandra.net.ConnectionType; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -116,6 +117,7 @@ import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableId; @@ -123,6 +125,8 @@ import org.apache.cassandra.schema.Tables; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordConfigurationService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupComplete; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupHistory; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupRequest; @@ -137,12 +141,21 @@ import org.apache.cassandra.streaming.StreamingChannel; import org.apache.cassandra.streaming.StreamingDataInputPlus; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tools.nodetool.Repair; import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Closeable; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.FailingBiConsumer; import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.MBeanWrapper; @@ -158,6 +171,7 @@ import org.mockito.Mockito; import org.quicktheories.impl.JavaRandom; +import static org.apache.cassandra.config.CassandraRelevantProperties.ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL; import static org.apache.cassandra.config.CassandraRelevantProperties.CLOCK_GLOBAL; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; @@ -177,6 +191,7 @@ public abstract class FuzzTestBase extends CQLTester.InMemory public static void setUpClass() { ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(true); + ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL.setInt(1); CLOCK_GLOBAL.setString(ClockAccess.class.getName()); // when running in CI an external actor will replace the test configs based off the test type (such as trie, cdc, etc.), this could then have failing tests // that do not repo with the same seed! To fix that, go to UnitConfigOverride and update the config type to match the one that failed in CI, this should then @@ -288,8 +303,12 @@ public ExecutorBuilder configurePooled(String name, int // so don't want to deal with unlucky histories... DatabaseDescriptor.setRepairRpcTimeout(TimeUnit.DAYS.toMillis(1)); + // make sure accord is enabled as accord has custom repair steps + DatabaseDescriptor.setAccordTransactionsEnabled(true); InMemory.setUpClass(); + + MessagingService.instance().listen(); } public static void setupSchema() @@ -402,14 +421,26 @@ static void assertSuccess(Cluster cluster, int example, boolean shouldSync, Repa for (JobState job : session.getJobs()) { EnumSet expected = EnumSet.allOf(JobState.State.class); - if (!shouldSnapshot) + if (repair.state.options.accordRepair()) { + // accord doesn't do snapshot, validation, or streaming expected.remove(JobState.State.SNAPSHOT_START); expected.remove(JobState.State.SNAPSHOT_COMPLETE); + expected.remove(JobState.State.VALIDATION_START); + expected.remove(JobState.State.VALIDATION_COMPLETE); + expected.remove(JobState.State.STREAM_START); } - if (!shouldSync) + else { - expected.remove(JobState.State.STREAM_START); + if (!shouldSnapshot) + { + expected.remove(JobState.State.SNAPSHOT_START); + expected.remove(JobState.State.SNAPSHOT_COMPLETE); + } + if (!shouldSync) + { + expected.remove(JobState.State.STREAM_START); + } } Set actual = job.getStateTimesMillis().keySet(); Assertions.assertThat(actual).isEqualTo(expected); @@ -554,11 +585,29 @@ static RepairOption previewOption(RandomSource rs, Cluster.Node coordinator, Str private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinator, String ks, Gen> tablesGen, Gen repairTypeGen, Gen previewTypeGen, Gen repairParallelismGen) { + RepairType type = repairTypeGen.next(rs); + PreviewType previewType = previewTypeGen.next(rs); + boolean accordRepair = type == RepairType.FULL && previewType == PreviewType.NONE ? rs.nextBoolean() : false; List args = new ArrayList<>(); args.add(ks); - args.addAll(tablesGen.next(rs)); - args.add("-pr"); - RepairType type = repairTypeGen.next(rs); + List tables = tablesGen.next(rs); + args.addAll(tables); + if (accordRepair) + { + List> ranges = new ArrayList<>(StorageService.instance.getReplicas(ks, coordinator.broadcastAddressAndPort()).ranges()); + ranges.sort(Comparator.naturalOrder()); + Range range = ranges.get(rs.nextInt(0, ranges.size())); + args.add("--start-token"); + args.add(range.left.toString()); + args.add("--end-token"); + Murmur3Partitioner.LongToken left = (Murmur3Partitioner.LongToken) range.left; + Token right = rs.nextBoolean() ? new Murmur3Partitioner.LongToken(left.token + 100) : range.right; + args.add(right.toString()); + } + else + { + args.add("-pr"); + } switch (type) { case IR: @@ -570,7 +619,6 @@ private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinat default: throw new AssertionError("Unsupported repair type: " + type); } - PreviewType previewType = previewTypeGen.next(rs); switch (previewType) { case NONE: @@ -601,6 +649,8 @@ private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinat } if (rs.nextBoolean()) args.add("--optimise-streams"); RepairOption options = RepairOption.parse(Repair.parseOptionMap(() -> "test", args), DatabaseDescriptor.getPartitioner()); + if (accordRepair) + options = options.withAccordRepair(true); if (options.getRanges().isEmpty()) { if (options.isPrimaryRange()) @@ -757,9 +807,71 @@ static class Cluster ClusterMetadataTestHelper.register(inst.broadcastAddressAndPort()); ClusterMetadataTestHelper.join(inst.broadcastAddressAndPort(), inst.tokens()); } + List addresses = new ArrayList<>(nodes.keySet()); + addresses.sort(Comparator.naturalOrder()); + NodeId tcmid = ClusterMetadata.current().directory.peerId(addresses.get(rs.nextInt(0, addresses.size()))); + ServerTestUtils.recreateAccord(tcmid); + interceptTCMNotifications(tcmid); + setupSchema(); } + private void interceptTCMNotifications(NodeId tcmid) + { + AccordService as = (AccordService) AccordService.instance(); + AccordConfigurationService config = as.configurationService(); + ClusterMetadataService.instance().log().removeListener(config); + ClusterMetadataService.instance().log().addListener(new ChangeListener() + { + @Override + public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) + { + config.notifyPostCommit(sanitize(prev, tcmid), sanitize(next, tcmid), fromSnapshot); + } + }); + } + + private ClusterMetadata sanitize(ClusterMetadata metadata, NodeId tcmid) + { + if (metadata.directory.isEmpty()) + return metadata; + ClusterMetadata sanitized = metadata.withDirectory(sanitize(metadata.directory, tcmid)) + .withPlacements(sanitize(metadata.placements, FBUtilities.getBroadcastAddressAndPort())); + return sanitized; + } + + private Directory sanitize(Directory directory, NodeId tcmid) + { + if (directory.getNodeAddresses(tcmid) == null) + throw new AssertionError("Expected node " + tcmid + " but not found in " + directory); + for (NodeId peer : directory.peerIds()) + { + if (peer.equals(tcmid)) + continue; + directory = directory.without(peer); + } + directory = directory.withNodeAddresses(tcmid, NodeAddresses.current()); + return directory; + } + + private DataPlacements sanitize(DataPlacements placements, InetAddressAndPort endpoint) + { + DataPlacements.Builder builder = DataPlacements.builder(placements.size()); + for (Map.Entry e : placements) + builder.with(e.getKey(), sanitize(placements.lastModified(), e.getValue(), endpoint)); + return builder.build(); + } + + private DataPlacement sanitize(Epoch epoch, DataPlacement value, InetAddressAndPort endpoint) + { + DataPlacement.Builder builder = DataPlacement.builder(); + for (Range e : value.writes.ranges()) + builder.withWriteReplica(epoch, new Replica(endpoint, e, true)); + for (Range e : value.reads.ranges()) + builder.withReadReplica(epoch, new Replica(endpoint, e, true)); + return builder.build(); + } + public Closeable addListener(MessageListener listener) { listeners.add(listener); @@ -1295,6 +1407,8 @@ public RepairCoordinator repair(String ks, RepairOption options, boolean addFail failures.add(new AssertionError(event.getMessage())); }); } + if (repair.state.options.accordRepair()) + AccordService.instance().ensureKeyspaceIsAccordManaged(repair.state.keyspace); return repair; } @@ -1449,7 +1563,10 @@ private void checkAccess() if (("org.apache.cassandra.service.paxos.Paxos".equals(next.getClassName()) && "newBallot".equals(next.getMethodName())) || ("org.apache.cassandra.service.paxos.uncommitted.PaxosBallotTracker".equals(next.getClassName()) && "updateLowBound".equals(next.getMethodName()))) return Access.MAIN_THREAD_ONLY; - if (next.getClassName().startsWith("org.apache.cassandra.db.") + if (next.getClassName().startsWith("org.apache.cassandra.accord.") + || next.getClassName().startsWith("org.apache.cassandra.journal.") + || next.getClassName().startsWith("org.apache.cassandra.service.accord.") + || next.getClassName().startsWith("org.apache.cassandra.db.") || next.getClassName().startsWith("org.apache.cassandra.gms.") || next.getClassName().startsWith("org.apache.cassandra.cql3.") || next.getClassName().startsWith("org.apache.cassandra.metrics.") diff --git a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java index b145e28fb943..980942193114 100644 --- a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java @@ -19,12 +19,18 @@ package org.apache.cassandra.repair; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.LongStream; import org.junit.Test; import accord.utilsfork.Gen; import accord.utilsfork.Gens; +import org.agrona.collections.LongArrayList; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.utils.Closeable; @@ -38,6 +44,8 @@ public void happyPath() { // disable all retries, no delays/drops are possible DatabaseDescriptor.getRepairRetrySpec().maxAttempts = RetrySpec.MaxAttempt.DISABLED; + Map repairTypeRuntimes = new HashMap<>(); + long realStartNanos = System.nanoTime(); qt().withPure(false).withExamples(10).check(rs -> { Cluster cluster = new Cluster(rs); Gen coordinatorGen = Gens.pick(cluster.nodes.keySet()).map(cluster.nodes::get); @@ -47,6 +55,7 @@ public void happyPath() { Cluster.Node coordinator = coordinatorGen.next(rs); + long nowNanos = System.nanoTime(); RepairCoordinator repair = coordinator.repair(KEYSPACE, repairOption(rs, coordinator, KEYSPACE, TABLES)); repair.run(); boolean shouldSync = rs.nextBoolean(); @@ -54,9 +63,29 @@ public void happyPath() closeables.add(cluster.nodes.get(pickParticipant(rs, coordinator, repair)).doValidation((cfs, validator) -> addMismatch(rs, cfs, validator))); runAndAssertSuccess(cluster, example, shouldSync, repair); + repairTypeRuntimes.computeIfAbsent(repair.state.getType(), ignore -> new LongArrayList()).addLong(System.nanoTime() - nowNanos); closeables.forEach(Closeable::close); closeables.clear(); } }); + long realDurationNanos = System.nanoTime() - realStartNanos; + long repairDurationsNanos = 0; + StringBuilder sb = new StringBuilder(); + for (Map.Entry e : repairTypeRuntimes.entrySet()) + { + sb.append(e.getKey()); + long[] times = e.getValue().toLongArray(); + repairDurationsNanos += LongStream.of(times).sum(); + Arrays.sort(times); + long min = times[0]; + long median = times[times.length / 2]; + long max = times[times.length - 1]; + sb.append(": min=").append(TimeUnit.NANOSECONDS.toMillis(min)) + .append(", median=").append(TimeUnit.NANOSECONDS.toMillis(median)) + .append(", max=").append(TimeUnit.NANOSECONDS.toMillis(max)) + .append(", count=").append(times.length) + .append('\n'); + } + logger.info("Repair runtimes (in millis):\nTest Duration {}\nRepair Duration {}\n{}", TimeUnit.NANOSECONDS.toMillis(realDurationNanos), TimeUnit.NANOSECONDS.toMillis(repairDurationsNanos), sb); } } From 021b06639e04f55bcfe162b265cf13e4e92619b9 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Tue, 5 Dec 2023 17:31:50 -0500 Subject: [PATCH 085/340] Fix Mutation serializer Fix AccordJournal.Type interop entries --- .../org/apache/cassandra/db/Mutation.java | 4 +- .../service/accord/AccordJournal.java | 6 +- .../service/accord/AccordKeyspace.java | 4 +- .../service/StorageServiceServerTest.java | 88 +++++++++---------- .../accord/AccordSyncPropagatorTest.java | 2 +- .../tools/nodetool/TableHistogramsTest.java | 15 ++-- 6 files changed, 58 insertions(+), 61 deletions(-) diff --git a/src/java/org/apache/cassandra/db/Mutation.java b/src/java/org/apache/cassandra/db/Mutation.java index 1f9d2c86be86..9c51cd13764b 100644 --- a/src/java/org/apache/cassandra/db/Mutation.java +++ b/src/java/org/apache/cassandra/db/Mutation.java @@ -541,7 +541,7 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper boolean allowsOutOfRangeMutations = false; if (version >= VERSION_51) { - int flags = in.readByte(); + int flags = teeIn.readByte(); allowsOutOfRangeMutations = allowsOutOfRangeMutations(flags); } int size = teeIn.readUnsignedVInt32(); @@ -642,7 +642,7 @@ long serializedSize(PartitionUpdate.PartitionUpdateSerializer serializer, Mutati if (size == 0L) { if (version >= VERSION_51) - size += ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG; // flags + size += TypeSizes.sizeof((byte)ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG); // flags size += TypeSizes.sizeofUnsignedVInt(mutation.modifications.size()); for (PartitionUpdate partitionUpdate : mutation.modifications.values()) size += serializer.serializedSize(partitionUpdate, version); diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index e3e1dd03f9d9..609b3dbc54a9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -524,9 +524,9 @@ public enum Type implements ValueSerializer APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), INTEROP_COMMIT_MINIMAL (90, INTEROP_COMMIT_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_COMMIT_MAXIMAL (91, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_APPLY_MINIMAL (92, INTEROP_APPLY_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropApply.serializer, TXN), - INTEROP_APPLY_MAXIMAL (93, INTEROP_APPLY_MAXIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_COMMIT_MAXIMAL (91, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_APPLY_MINIMAL (92, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_APPLY_MAXIMAL (93, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), BEGIN_RECOVER (72, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), BEGIN_INVALIDATE (73, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 836baed379f6..ab36c28ffd42 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -155,8 +155,8 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.MonotonicClock; -import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.Throwables; +import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static accord.utils.Invariants.checkArgument; @@ -666,7 +666,7 @@ public static KeyspaceMetadata metadata() return KeyspaceMetadata.create(ACCORD_KEYSPACE_NAME, KeyspaceParams.local(), tables(), Views.none(), Types.none(), UserFunctions.none()); } - private static Tables tables() + public static Tables tables() { return Tables.of(Commands, TimestampsForKeys, DepsCommandsForKeys, AllCommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); } diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java index 3405dc9bbf13..7814b82ba08a 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java @@ -36,11 +36,11 @@ import org.junit.BeforeClass; import org.junit.Test; -import org.apache.cassandra.CassandraTestBase; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.audit.AuditLogManager; import org.apache.cassandra.audit.AuditLogOptions; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.OrderPreservingPartitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken; @@ -49,6 +49,7 @@ import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.WithPartitioner; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; @@ -68,7 +69,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class StorageServiceServerTest extends CassandraTestBase +public class StorageServiceServerTest { static final String DC1 = "DC1"; static final String DC2 = "DC2"; @@ -93,6 +94,7 @@ public static void setUp() throws ConfigurationException, UnknownHostException id4 = InetAddressAndPort.getByName("127.0.0.4"); id5 = InetAddressAndPort.getByName("127.0.0.5"); registerNodes(); + ServerTestUtils.markCMS(); } private static void registerNodes() @@ -157,7 +159,6 @@ public void testSnapshot() throws IOException } @Test - @UseOrderPreservingPartitioner public void testLocalPrimaryRangeForEndpointWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -189,7 +190,6 @@ public void testLocalPrimaryRangeForEndpointWithNetworkTopologyStrategy() throws } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -225,7 +225,6 @@ public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategy() thr } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangesWithNetworkTopologyStrategy() throws Exception { setupDefaultPlacements(); @@ -256,7 +255,6 @@ public void testPrimaryRangesWithNetworkTopologyStrategy() throws Exception } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangesWithNetworkTopologyStrategyOneDCOnly() throws Exception { setupDefaultPlacements(); @@ -288,7 +286,6 @@ public void testPrimaryRangesWithNetworkTopologyStrategyOneDCOnly() throws Excep } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategyOneDCOnly() throws Exception { setupDefaultPlacements(); @@ -320,7 +317,6 @@ public void testPrimaryRangeForEndpointWithinDCWithNetworkTopologyStrategyOneDCO } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangesWithVnodes() throws Exception { // DC1 @@ -371,7 +367,6 @@ public void testPrimaryRangesWithVnodes() throws Exception } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithVnodes() throws Exception { // DC1 @@ -436,7 +431,6 @@ public void testPrimaryRangeForEndpointWithinDCWithVnodes() throws Exception } @Test - @UseOrderPreservingPartitioner public void testPrimaryRangesWithSimpleStrategy() throws Exception { ClusterMetadataTestHelper.join(id1, new StringToken("A")); @@ -462,7 +456,6 @@ public void testPrimaryRangesWithSimpleStrategy() throws Exception /* Does not make much sense to use -local and -pr with simplestrategy, but just to prevent human errors */ @Test - @UseOrderPreservingPartitioner public void testPrimaryRangeForEndpointWithinDCWithSimpleStrategy() throws Exception { ClusterMetadataTestHelper.join(id1, new StringToken("A")); @@ -490,43 +483,46 @@ public void testPrimaryRangeForEndpointWithinDCWithSimpleStrategy() throws Excep } @Test - @UseMurmur3Partitioner public void testCreateRepairRangeFrom() throws Exception { - ClusterMetadataTestHelper.join(id1, new LongToken(1000L)); - ClusterMetadataTestHelper.join(id2, new LongToken(2000L)); - ClusterMetadataTestHelper.join(id3, new LongToken(3000L)); - ClusterMetadataTestHelper.join(id4, new LongToken(4000L)); - - Collection> repairRangeFrom = StorageService.instance.createRepairRangeFrom("1500", "3700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(3); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1500L), new LongToken(2000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(3700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "1700"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(2); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(1000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(1700L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2500", "2300"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(5); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2500L), new LongToken(3000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(4000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(4000L), new LongToken(1000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(2000L))); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(2300L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "3000"); - Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); - Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); - - repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "2000"); - Assertions.assertThat(repairRangeFrom).isEmpty(); + try (WithPartitioner m3p = new WithPartitioner(Murmur3Partitioner.instance)) + { + registerNodes(); + ClusterMetadataTestHelper.join(id1, new LongToken(1000L)); + ClusterMetadataTestHelper.join(id2, new LongToken(2000L)); + ClusterMetadataTestHelper.join(id3, new LongToken(3000L)); + ClusterMetadataTestHelper.join(id4, new LongToken(4000L)); + + Collection> repairRangeFrom = StorageService.instance.createRepairRangeFrom("1500", "3700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(3); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1500L), new LongToken(2000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(3700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("500", "1700"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(2); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(500L), new LongToken(1000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(1700L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2500", "2300"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(5); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2500L), new LongToken(3000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(3000L), new LongToken(4000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(4000L), new LongToken(1000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(1000L), new LongToken(2000L))); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(2300L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "3000"); + Assertions.assertThat(repairRangeFrom.size()).as(repairRangeFrom.toString()).isEqualTo(1); + Assertions.assertThat(repairRangeFrom).contains(new Range<>(new LongToken(2000L), new LongToken(3000L))); + + repairRangeFrom = StorageService.instance.createRepairRangeFrom("2000", "2000"); + Assertions.assertThat(repairRangeFrom).isEmpty(); + } } /** diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index bc694eee644a..919600e7796c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -82,7 +82,7 @@ public class AccordSyncPropagatorTest @BeforeClass public static void setup() throws NoSuchFieldException, IllegalAccessException { - DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); ClusterMetadataService.unsetInstance(); ClusterMetadataService.setInstance(StubClusterMetadataService.forTesting()); diff --git a/test/unit/org/apache/cassandra/tools/nodetool/TableHistogramsTest.java b/test/unit/org/apache/cassandra/tools/nodetool/TableHistogramsTest.java index 4a233b22faf8..b3c51d196bf6 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/TableHistogramsTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/TableHistogramsTest.java @@ -17,19 +17,19 @@ */ package org.apache.cassandra.tools.nodetool; -import org.apache.cassandra.auth.AuthKeyspace; -import org.apache.cassandra.db.SystemKeyspace; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.SchemaKeyspace; -import org.apache.cassandra.schema.SystemDistributedKeyspace; -import org.apache.cassandra.tracing.TraceKeyspace; - import org.apache.commons.lang3.StringUtils; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.SystemKeyspace; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaKeyspace; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.tools.ToolRunner; +import org.apache.cassandra.tracing.TraceKeyspace; import static org.apache.cassandra.tools.ToolRunner.invokeNodetool; import static org.assertj.core.api.Assertions.assertThat; @@ -44,6 +44,7 @@ public class TableHistogramsTest extends CQLTester TraceKeyspace.TABLE_NAMES.size() + AuthKeyspace.TABLE_NAMES.size() + SystemDistributedKeyspace.TABLE_NAMES.size() + + AccordKeyspace.tables().size() + 1; // DistributedMetadataLogKeyspace contains a single table @BeforeClass From 1f4675a902618beac98567ec76af4a9b8cb5e7c2 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 4 Dec 2023 16:21:07 -0500 Subject: [PATCH 086/340] Fix AccordCommandsPurger universal durability check --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 15 ++++++++------- .../cassandra/service/accord/AccordKeyspace.java | 1 + 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/modules/accord b/modules/accord index 3ca9e5502419..9f21a24660fe 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3ca9e55024192e9b7c38ad5330229830343b74f2 +Subproject commit 9f21a24660fe49881e0131813f9eff850e25b3dc diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index ae6f68294a49..7586bc651e21 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -31,7 +31,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Ordering; -import accord.local.Commands; +import accord.local.Cleanup; import accord.local.DurableBefore; import accord.local.RedundantBefore; import accord.local.SaveStatus; @@ -90,8 +90,8 @@ import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; -import static accord.local.Commands.Cleanup.TRUNCATE_WITH_OUTCOME; -import static accord.local.Status.Durability.Universal; +import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; +import static accord.local.Cleanup.shouldCleanup; import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; @@ -803,7 +803,7 @@ protected Row applyToRow(Row row) // When commands end up being sliced by compaction we need this to discard tombstones and slices // without enough information to run the rest of the cleanup logic - if (durableBefore.min(txnId) == Universal) + if (Cleanup.isSafeToCleanup(durableBefore, txnId)) return null; Cell durabilityCell = row.getCell(CommandsColumns.durability); @@ -821,9 +821,10 @@ protected Row applyToRow(Row row) if (executeAt == null || durability == null || saveStatus == null || route == null) return row; - Commands.Cleanup cleanup = Commands.shouldCleanup(txnId, saveStatus.status, - durability, executeAt, route, - redundantBefore, durableBefore); + Cleanup cleanup = shouldCleanup(txnId, saveStatus.status, + durability, executeAt, route, + redundantBefore, durableBefore, + false); switch (cleanup) { default: throw new AssertionError(String.format("Unexpected cleanup task: %s", cleanup)); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index ab36c28ffd42..752c72338d9e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -364,6 +364,7 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe // If durability is not universal we don't want to delete older versions of the row that might have recorded // a higher durability value. maybeDropTruncatedCommandColumns will take care of dropping things even if we don't drop via tombstones. // durability should be the only column that could have an older value that is insufficient for propagating forward + // TODO (now): with UniversalOrInvalidated should this change? boolean doDeletion = durability == Durability.Universal; // We may not have what we need to generate a deletion and include the outcome in the truncated row From e812127a8be210b0dc8c91d189b10a355d05bd3b Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Wed, 6 Dec 2023 11:11:43 -0800 Subject: [PATCH 087/340] Fix TombstoneCountWarningTest Don't use ImmediateExectuor in JVM dtests to process messages Fix GuardrailTablesTest Instance.receiveMessage should use sync Fix StorageAttachedIndexDDLTest failing due to background Accord compactions Add back enforceInvariants=false to shouldCleanup in AccordCommandsPurger Fix CompactionAccordIteratorsTest Fix empty row check in command/tfk mutation methods --- .../db/compaction/CompactionIterator.java | 58 +++++++--- .../cassandra/schema/SchemaConstants.java | 2 + .../service/accord/AccordKeyspace.java | 8 +- .../cassandra/distributed/impl/Instance.java | 7 +- .../thresholds/TombstoneCountWarningTest.java | 3 +- .../CompactionAccordIteratorsTest.java | 105 ++++++++++++------ .../apache/cassandra/index/sai/SAITester.java | 5 +- 7 files changed, 137 insertions(+), 51 deletions(-) diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 7586bc651e21..74836f845656 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -201,20 +201,35 @@ public CompactionIterator(OperationType type, if (topPartitionCollector != null) // need to count tombstones before they are purged merged = Transformation.apply(merged, new TopPartitionTracker.TombstoneCounter(topPartitionCollector, nowInSec)); merged = Transformation.apply(merged, new GarbageSkipper(controller)); - Transformation purger = isPaxos(controller.cfs) && paxosStatePurging() != legacy - ? new PaxosPurger() - : isAccordCommands(controller.cfs) - ? new AccordCommandsPurger(accordService) - : isAccordDepsCommandsForKey(controller.cfs) - ? new AccordCommandsForKeyPurger(AccordKeyspace.DepsCommandsForKeysAccessor, accordService) - : isAccordAllCommandsForKey(controller.cfs) - ? new AccordCommandsForKeyPurger(AccordKeyspace.AllCommandsForKeysAccessor, accordService) - : new Purger(controller, nowInSec); + Transformation purger = purger(controller.cfs, accordService); merged = Transformation.apply(merged, purger); merged = DuplicateRowChecker.duringCompaction(merged, type); compacted = Transformation.apply(merged, new AbortableUnfilteredPartitionTransformation(this)); } + private Transformation purger(ColumnFamilyStore cfs, Supplier accordService) + { + if (isPaxos(cfs) && paxosStatePurging() != legacy) + return new PaxosPurger(); + + // Topologies uses regular deletion so it can use a regular Purger + if (!requiresAccordSpecificPurger(cfs)) + return new Purger(controller, nowInSec); + + if (isAccordCommands(cfs)) + return new AccordCommandsPurger(accordService); + if (isAccordTimestampsForKey(cfs)) + return new AccordTimestampsForKeyPurger(accordService); + + if (isAccordDepsCommandsForKey(cfs)) + return new AccordCommandsForKeyPurger(AccordKeyspace.DepsCommandsForKeysAccessor, accordService); + + if (isAccordAllCommandsForKey(cfs)) + return new AccordCommandsForKeyPurger(AccordKeyspace.AllCommandsForKeysAccessor, accordService); + + throw new IllegalArgumentException("Unhandled accord table: " + cfs.keyspace.getName() + '.' + cfs.name); + } + public TableMetadata metadata() { return controller.cfs.metadata(); @@ -1030,23 +1045,38 @@ private static boolean isPaxos(ColumnFamilyStore cfs) return cfs.name.equals(SystemKeyspace.PAXOS) && cfs.getKeyspaceName().equals(SchemaConstants.SYSTEM_KEYSPACE_NAME); } + private static boolean requiresAccordSpecificPurger(ColumnFamilyStore cfs) + { + return cfs.getKeyspaceName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME) && + ImmutableSet.of(AccordKeyspace.COMMANDS, + AccordKeyspace.TIMESTAMPS_FOR_KEY, + AccordKeyspace.DEPS_COMMANDS_FOR_KEY, + AccordKeyspace.ALL_COMMANDS_FOR_KEY) + .contains(cfs.getTableName()); + } + + private static boolean isAccordTable(ColumnFamilyStore cfs, String name) + { + return cfs.name.equals(name) && cfs.getKeyspaceName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + } + private static boolean isAccordCommands(ColumnFamilyStore cfs) { - return cfs.name.equals(AccordKeyspace.COMMANDS) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + return isAccordTable(cfs, AccordKeyspace.COMMANDS); } - private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs, String name) + private static boolean isAccordTimestampsForKey(ColumnFamilyStore cfs) { - return cfs.name.equals(name) && cfs.keyspace.getName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); + return isAccordTable(cfs, AccordKeyspace.TIMESTAMPS_FOR_KEY); } private static boolean isAccordDepsCommandsForKey(ColumnFamilyStore cfs) { - return isAccordCommandsForKey(cfs, AccordKeyspace.DEPS_COMMANDS_FOR_KEY); + return isAccordTable(cfs, AccordKeyspace.DEPS_COMMANDS_FOR_KEY); } private static boolean isAccordAllCommandsForKey(ColumnFamilyStore cfs) { - return isAccordCommandsForKey(cfs, AccordKeyspace.ALL_COMMANDS_FOR_KEY); + return isAccordTable(cfs, AccordKeyspace.ALL_COMMANDS_FOR_KEY); } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/schema/SchemaConstants.java b/src/java/org/apache/cassandra/schema/SchemaConstants.java index f264c7442779..11b43ec3d754 100644 --- a/src/java/org/apache/cassandra/schema/SchemaConstants.java +++ b/src/java/org/apache/cassandra/schema/SchemaConstants.java @@ -30,6 +30,7 @@ import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.db.Digest; import org.apache.cassandra.db.SystemKeyspace; +import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.tracing.TraceKeyspace; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; @@ -166,6 +167,7 @@ public static Set getLocalAndReplicatedSystemTableNames() .addAll(TraceKeyspace.TABLE_NAMES) .addAll(AuthKeyspace.TABLE_NAMES) .addAll(SystemDistributedKeyspace.TABLE_NAMES) + .addAll(AccordKeyspace.TABLE_NAMES) .build(); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 752c72338d9e..0bf3f686c54c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -183,6 +183,10 @@ public class AccordKeyspace public static final String EPOCH_METADATA = "epoch_metadata"; public static final String COMMAND_STORE_METADATA = "command_store_metadata"; + public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS, TIMESTAMPS_FOR_KEY, DEPS_COMMANDS_FOR_KEY, + ALL_COMMANDS_FOR_KEY, TOPOLOGIES, EPOCH_METADATA, + COMMAND_STORE_METADATA); + private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); private static final String TIMESTAMP_TUPLE = TIMESTAMP_TYPE.asCQL3Type().toString(); private static final TupleType KEY_TYPE = new TupleType(Arrays.asList(UUIDType.instance, BytesType.instance)); @@ -826,7 +830,7 @@ public static Mutation getCommandMutation(int storeId, Command original, Command } Row row = builder.build(); - if (row.isEmpty()) + if (row.columnCount() == 0) return null; ByteBuffer key = CommandsColumns.keyComparator.make(storeId, @@ -1326,7 +1330,7 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey addCellIfModified(TimestampsForKeyColumns.last_write_timestamp, TimestampsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); Row row = builder.build(); - if (row.isEmpty()) + if (row.columnCount() == 0) return null; ByteBuffer key = TimestampsForKeyColumns.makePartitionKey(storeId, current.key()); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index a2d62eb11379..cac40690afbb 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -59,6 +59,7 @@ import org.apache.cassandra.concurrent.ExecutorFactory; import org.apache.cassandra.concurrent.ExecutorLocals; import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.concurrent.NamedThreadFactory; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.concurrent.SharedExecutorPool; @@ -504,7 +505,7 @@ public static Message.Header deserializeHeader(IMessage message) @Override public void receiveMessage(IMessage message) { - async(receiveMessageRunnable(message)).apply(false); + sync(receiveMessageRunnable(message)).accept(false); } @Override @@ -554,6 +555,10 @@ private SerializableConsumer receiveMessageRunnable(IMessage message) inInstancelogger.warn("Dropping message {} due to stage {} being shutdown", messageIn, header.verb.stage); return; } + // This can cause deadlocks when sending messages to self so use Stage.MISC.executor() just to have a + // place for it to run + if ( executor == ImmediateExecutor.INSTANCE) + executor = Stage.MISC.executor(); executor.execute(ExecutorLocals.create(state), () -> MessagingService.instance().inboundSink.accept(messageIn)); } }; diff --git a/test/distributed/org/apache/cassandra/distributed/test/thresholds/TombstoneCountWarningTest.java b/test/distributed/org/apache/cassandra/distributed/test/thresholds/TombstoneCountWarningTest.java index 5e409cbc31e0..054256a08f1e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/thresholds/TombstoneCountWarningTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/thresholds/TombstoneCountWarningTest.java @@ -57,6 +57,7 @@ import org.apache.cassandra.distributed.test.JavaDriverUtils; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.exceptions.ReadFailureException; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.exceptions.TombstoneAbortException; import org.apache.cassandra.locator.InetAddressAndPort; @@ -425,7 +426,7 @@ public static void awaitResults(@SuperCall Runnable zuper) } @SuppressWarnings("unused") - public static void onFailure(InetAddressAndPort from, RequestFailureReason failureReason, @SuperCall Runnable zuper) throws Exception + public static void onFailure(InetAddressAndPort from, RequestFailure failure, @SuperCall Runnable zuper) throws Exception { State.onFailure(new InetSocketAddress(from.getAddress(), from.getPort())); zuper.run(); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 728ecbb2d943..85ef473ac4b6 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -90,6 +90,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; +import static accord.impl.TimestampsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.PreLoadContext.contextFor; import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.Util.spinAssertEquals; @@ -98,12 +99,8 @@ import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.NOT_DURABLE; import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.UNIVERSAL; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; -import static org.apache.cassandra.service.accord.AccordKeyspace.COMMANDS; -import static org.apache.cassandra.service.accord.AccordKeyspace.DEPS_COMMANDS_FOR_KEY; -import static org.apache.cassandra.service.accord.AccordKeyspace.DepsCommandsForKeysAccessor; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.apache.cassandra.service.accord.AccordKeyspace.*; +import static org.junit.Assert.*; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -127,7 +124,9 @@ public class CompactionAccordIteratorsTest private static final TxnId GT_SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, SECOND_TXN_ID.hlc() + 1, NODE); static ColumnFamilyStore commands; + static ColumnFamilyStore timestampsForKey; static ColumnFamilyStore depsCommandsForKey; + static ColumnFamilyStore allCommandsForKey; static TableMetadata table; static FullRoute route; Random random; @@ -146,10 +145,19 @@ public static void beforeClass() throws Throwable SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); StorageService.instance.initServer(); + commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS); commands.disableAutoCompaction(); + + timestampsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY); + timestampsForKey.disableAutoCompaction(); + depsCommandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, DEPS_COMMANDS_FOR_KEY); depsCommandsForKey.disableAutoCompaction(); + + allCommandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, ALL_COMMANDS_FOR_KEY); + allCommandsForKey.disableAutoCompaction(); + table = ColumnFamilyStore.getIfExists("ks", "tbl").metadata(); route = AccordTestUtils.keys(table, 42).toRoute(AccordTestUtils.key(table, 42).toUnseekable()); } @@ -209,12 +217,6 @@ private void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBe }, false); } - @Test - public void testAccordTimestampsForKeyPurger() - { - throw new AssertionError("TODO -> see commented out parts of CFK tests"); - } - @Test public void testAccordCommandsForKeyPurgerSingleCompaction() throws Throwable { @@ -230,57 +232,94 @@ public void testAccordCommandsForKeyPurgerMultipleCompactions() throws Throwable private void testAccordCommandsForKeyPurger(boolean singleCompaction) throws Throwable { this.singleCompaction = singleCompaction; + testAccordTimestampsForKeyPurger(null, expectedAccordTimestampsForKeyNoChange()); testAccordCommandsForKeyPurger(null, expectedAccordCommandsForKeyNoChange()); + testAccordTimestampsForKeyPurger(redundantBefore(LT_TXN_ID), expectedAccordTimestampsForKeyNoChange()); testAccordCommandsForKeyPurger(redundantBefore(LT_TXN_ID), expectedAccordCommandsForKeyNoChange()); + testAccordTimestampsForKeyPurger(redundantBefore(TXN_ID), expectedAccordTimestampsForKeyNoChange()); testAccordCommandsForKeyPurger(redundantBefore(TXN_ID), expectedAccordCommandsForKeyNoChange()); + testAccordTimestampsForKeyPurger(redundantBefore(GT_TXN_ID), expectedAccordTimestampsForKeyEraseOne()); testAccordCommandsForKeyPurger(redundantBefore(GT_TXN_ID), expectedAccordCommandsForKeyEraseOne()); + testAccordTimestampsForKeyPurger(redundantBefore(GT_SECOND_TXN_ID), expectedAccordTimestampsForKeyEraseAll()); testAccordCommandsForKeyPurger(redundantBefore(GT_SECOND_TXN_ID), expectedAccordCommandsForKeyEraseAll()); } + private static Consumer> expectedAccordTimestampsForKeyNoChange() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + Row row = partition.getRow(Clustering.EMPTY); + + assertEquals(SECOND_TXN_ID, TimestampsForKeyRows.getMaxTimestamp(row)); + assertEquals(TXN_ID, TimestampsForKeyRows.getLastExecutedTimestamp(row)); + assertEquals(TXN_ID, TimestampsForKeyRows.getLastWriteTimestamp(row)); + + // last_executed_micros is only persisted if it doesn't match txnId.hlc, which only happens in the + // case of an hlc collision. Each txnId in this test has a unique hlc + assertEquals(NO_LAST_EXECUTED_HLC, TimestampsForKeyRows.getLastExecutedMicros(row)); + }; + } + private static Consumer> expectedAccordCommandsForKeyNoChange() { return partitions -> { assertEquals(1, partitions.size()); Partition partition = partitions.get(0); - Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); - assertEquals(4, Iterables.size(staticRow)); -// assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); -// assertEquals(TXN_ID, CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); -// assertEquals(TXN_ID, CommandsForKeyRows.getLastWriteTimestamp(staticRow)); -// assertEquals(TXN_ID.hlc(), CommandsForKeyRows.getLastExecutedMicros(staticRow)); - assertEquals(4, Iterators.size(partition.unfilteredIterator())); + assertEquals(2, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); // One row per txn per series - for (int i = 0; i < 2; i++) - for (TxnId txnId : TXN_IDS) - assertEquals(txnId, DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); + for (TxnId txnId : TXN_IDS) + assertEquals(txnId, DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); }; } - private static Consumer> expectedAccordCommandsForKeyEraseOne() + private static Consumer> expectedAccordTimestampsForKeyEraseOne() { return partitions -> { assertEquals(1, partitions.size()); Partition partition = partitions.get(0); - Row staticRow = partition.getRow(Clustering.STATIC_CLUSTERING); + Row row = partition.getRow(Clustering.EMPTY); // Only expect one column to remain because the second transaction is a read - assertEquals(1, Iterables.size(staticRow)); -// assertEquals(SECOND_TXN_ID, CommandsForKeyRows.getMaxTimestamp(staticRow)); -// assertNull(CommandsForKeyRows.getLastExecutedTimestamp(staticRow)); -// assertNull(CommandsForKeyRows.getLastWriteTimestamp(staticRow)); -// assertEquals(NO_LAST_EXECUTED_HLC, CommandsForKeyRows.getLastExecutedMicros(staticRow)); - assertEquals(2, Iterators.size(partition.unfilteredIterator())); + assertEquals(1, Iterables.size(row)); + assertEquals(SECOND_TXN_ID, AccordKeyspace.TimestampsForKeyRows.getMaxTimestamp(row)); + assertNull(AccordKeyspace.TimestampsForKeyRows.getLastExecutedTimestamp(row)); + assertNull(AccordKeyspace.TimestampsForKeyRows.getLastWriteTimestamp(row)); + assertEquals(NO_LAST_EXECUTED_HLC, AccordKeyspace.TimestampsForKeyRows.getLastExecutedMicros(row)); + }; + } + + private static Consumer> expectedAccordCommandsForKeyEraseOne() + { + return partitions -> { + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals(1, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); assertEquals(TXN_IDS[1], DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); - assertEquals(TXN_IDS[1], DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); }; } + private static Consumer> expectedAccordTimestampsForKeyEraseAll() + { + return partitions -> assertEquals(0, partitions.size()); + } + private static Consumer> expectedAccordCommandsForKeyEraseAll() { return partitions -> assertEquals(0, partitions.size()); } + private void testAccordTimestampsForKeyPurger(RedundantBefore redundantBefore, Consumer> expectedResult) throws Throwable + { + testWithCommandStore((commandStore) -> { + IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, DurableBefore.EMPTY); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY); + List result = compactCFS(mockAccordService, cfs); + expectedResult.accept(result); + }, true); + } + private void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Consumer> expectedResult) throws Throwable { testWithCommandStore((commandStore) -> { @@ -409,7 +448,9 @@ private static void flush(AccordCommandStore commandStore) commandStore.cache().awaitSaveResults(); }); commands.forceBlockingFlush(FlushReason.UNIT_TESTS); + timestampsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); depsCommandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); + allCommandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); } private void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable @@ -470,7 +511,7 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + DEPS_COMMANDS_FOR_KEY + ";"); logger.info(commandsForKeyTable.toStringUnsafe()); - assertEquals(txnIds.length * 2, commandsForKeyTable.size()); + assertEquals(txnIds.length, commandsForKeyTable.size()); Iterator commandsForKeyTableIterator = commandsTable.iterator(); for (TxnId txnId : txnIds) assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsForKeyTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); diff --git a/test/unit/org/apache/cassandra/index/sai/SAITester.java b/test/unit/org/apache/cassandra/index/sai/SAITester.java index e004f0c0ee6c..1869bb6efd59 100644 --- a/test/unit/org/apache/cassandra/index/sai/SAITester.java +++ b/test/unit/org/apache/cassandra/index/sai/SAITester.java @@ -46,6 +46,7 @@ import javax.management.ObjectName; import com.google.common.collect.Sets; +import com.google.common.primitives.Ints; import org.junit.After; import org.junit.Assert; import org.junit.BeforeClass; @@ -678,7 +679,9 @@ protected void runInitializationTask() throws Exception protected int getCompactionTasks() { - return CompactionManager.instance.getActiveCompactions() + CompactionManager.instance.getPendingTasks(); + long activeCount = CompactionManager.instance.active.getCompactions().stream().filter(compaction -> compaction.getCompactionInfo().getTableMetadata().keyspace.equals(KEYSPACE)).count(); + int pendingCount = Keyspace.open(KEYSPACE).getColumnFamilyStores().stream().map(columnFamilyStore -> columnFamilyStore.getCompactionStrategyManager().getEstimatedRemainingTasks()).reduce(0, Integer::sum); + return Ints.checkedCast(activeCount + pendingCount); } protected int snapshot(String snapshotName) From cb1a05c5d44abe476174abff32faedb77a2a6fc5 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 19 Dec 2023 09:30:06 -0800 Subject: [PATCH 088/340] Schema based accord fast path configuration Patch by Blake Eggleston; Reviewed by David Capwell and Alex Petrov for CASSANDRA-19009 --- conf/cassandra.yaml | 3 + modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 2 + .../cassandra/config/DatabaseDescriptor.java | 10 + .../cql3/statements/TransactionStatement.java | 2 +- .../statements/schema/KeyspaceAttributes.java | 27 +- .../statements/schema/TableAttributes.java | 14 + .../db/streaming/CassandraStreamReceiver.java | 2 +- .../apache/cassandra/dht/AccordSplitter.java | 7 +- .../apache/cassandra/dht/BootStrapper.java | 6 +- .../apache/cassandra/dht/RangeStreamer.java | 25 +- .../cassandra/locator/ReplicaLayout.java | 5 +- .../cassandra/locator/ReplicaPlans.java | 30 +- .../cassandra/repair/AccordRepairJob.java | 4 +- .../DistributedMetadataLogKeyspace.java | 7 +- .../cassandra/schema/KeyspaceMetadata.java | 14 +- .../cassandra/schema/KeyspaceParams.java | 46 +- .../cassandra/schema/SchemaKeyspace.java | 31 +- .../cassandra/schema/TableMetadata.java | 7 + .../apache/cassandra/schema/TableParams.java | 33 +- .../org/apache/cassandra/service/Rebuild.java | 13 +- .../cassandra/service/StorageProxy.java | 6 +- .../cassandra/service/StorageService.java | 19 +- .../service/StorageServiceMBean.java | 1 + .../service/accord/AccordCommandStores.java | 9 +- .../service/accord/AccordCommandsForKeys.java | 7 +- .../accord/AccordConfigurationService.java | 12 +- .../service/accord/AccordEndpointMapper.java | 15 +- .../service/accord/AccordFastPath.java | 293 ++++++ .../accord/AccordFastPathCoordinator.java | 345 +++++++ .../accord/AccordFetchCoordinator.java | 11 +- .../service/accord/AccordKeyspace.java | 6 +- .../service/accord/AccordObjectSizes.java | 7 +- .../accord/AccordSafeCommandsForKey.java | 14 + .../service/accord/AccordService.java | 26 +- .../service/accord/AccordTopology.java | 277 ++++++ .../service/accord/AccordTopologyUtils.java | 162 ---- .../service/accord/CommandsForRanges.java | 17 +- .../service/accord/EndpointMapping.java | 4 +- .../service/accord/IAccordService.java | 57 +- .../cassandra/service/accord/TokenRange.java | 15 +- .../service/accord/api/AccordAgent.java | 2 +- .../service/accord/api/AccordRoutableKey.java | 21 +- .../service/accord/api/AccordRoutingKey.java | 65 +- .../service/accord/api/PartitionKey.java | 28 +- .../service/accord/async/AsyncLoader.java | 15 +- .../service/accord/async/AsyncOperation.java | 1 + .../accord/fastpath/FastPathStrategy.java | 184 ++++ .../InheritKeyspaceFastPathStrategy.java | 65 ++ .../ParameterizedFastPathStrategy.java | 375 ++++++++ .../fastpath/SimpleFastPathStrategy.java | 87 ++ .../interop/AccordInteropExecution.java | 5 +- .../serializers/CommandStoreSerializers.java | 2 +- .../serializers/TopologySerializers.java | 40 +- .../service/accord/txn/TxnNamedRead.java | 2 +- .../service/accord/txn/TxnQuery.java | 2 +- .../migration/ConsensusKeyMigrationState.java | 4 +- .../service/reads/AbstractReadExecutor.java | 1 + .../service/reads/ReadCoordinator.java | 5 +- .../service/reads/range/RangeCommands.java | 6 +- .../reads/range/ReplicaPlanIterator.java | 6 +- .../reads/range/ReplicaPlanMerger.java | 7 +- .../cassandra/streaming/StreamPlan.java | 29 + .../apache/cassandra/tcm/ClusterMetadata.java | 118 ++- .../apache/cassandra/tcm/MetadataKeys.java | 6 +- .../tcm/StubClusterMetadataService.java | 6 +- .../apache/cassandra/tcm/Transformation.java | 12 +- .../tcm/compatibility/GossipHelper.java | 12 +- .../tcm/ownership/AccordKeyspaces.java | 108 --- .../cassandra/tcm/ownership/AccordTables.java | 109 +++ .../sequences/CancelCMSReconfiguration.java | 3 +- .../apache/cassandra/tcm/sequences/Move.java | 13 +- .../cassandra/tcm/serialization/Version.java | 1 + ...ccordKeyspace.java => AddAccordTable.java} | 49 +- .../ReconfigureAccordFastPath.java | 97 ++ .../cms/PrepareCMSReconfiguration.java | 3 +- .../org/apache/cassandra/tools/NodeProbe.java | 8 +- .../org/apache/cassandra/tools/NodeTool.java | 2 +- .../distributed/test/ReadRepairTest.java | 4 +- .../distributed/test/ReadSpeculationTest.java | 4 +- .../test/ShortReadProtectionTest.java | 8 +- .../test/accord/AccordBootstrapTest.java | 4 +- .../test/accord/AccordCQLTest.java | 899 +++++++++--------- .../test/accord/AccordIntegrationTest.java | 20 +- .../accord/AccordInteroperabilityTest.java | 14 +- .../test/accord/AccordMetricsTest.java | 10 +- .../test/accord/AccordMigrationTest.java | 22 +- .../test/accord/AccordSimpleFastPathTest.java | 153 +++ .../test/accord/AccordTestBase.java | 23 +- .../test/log/ClusterMetadataTestHelper.java | 12 +- .../test/AccordJournalSimulationTest.java | 6 +- .../statements/DescribeStatementTest.java | 13 +- .../cassandra/db/SchemaCQLHelperTest.java | 3 + .../cassandra/dht/BootStrapperTest.java | 3 +- .../cassandra/dht/PartitionerTestCase.java | 11 +- .../AssureSufficientLiveNodesTest.java | 16 +- .../cassandra/locator/MetaStrategyTest.java | 6 +- .../cassandra/schema/FastPathSchemaTest.java | 121 +++ .../accord/AccordCommandStoreTest.java | 5 +- .../service/accord/AccordCommandTest.java | 2 +- .../AccordConfigurationServiceTest.java | 14 +- .../accord/AccordFastPathCoordinatorTest.java | 253 +++++ .../service/accord/AccordKeyspaceTest.java | 16 +- .../service/accord/AccordReadRepairTest.java | 17 +- .../accord/AccordSyncPropagatorTest.java | 9 +- .../service/accord/AccordTestUtils.java | 28 +- .../service/accord/AccordTopologyTest.java | 103 +- .../service/accord/CommandsForRangesTest.java | 5 +- .../accord/SimpleAccordEndpointMapper.java | 4 +- .../service/accord/api/AccordKeyTest.java | 36 +- .../accord/fastpath/FastPathParsingTest.java | 109 +++ .../ParameterizedFastPathStrategyTest.java | 153 +++ .../fastpath/SimpleFastPathStrategyTest.java | 43 + .../serializers/CommandSerializersTest.java | 2 +- .../accord/txn/AbstractKeySortedTest.java | 2 +- .../service/reads/DataResolverTest.java | 2 +- .../reads/range/RangeCommandIteratorTest.java | 10 +- .../reads/range/RangeCommandsTest.java | 6 +- .../reads/range/ReplicaPlanIteratorTest.java | 4 +- .../reads/range/ReplicaPlanMergerTest.java | 4 +- .../reads/repair/AbstractReadRepairTest.java | 2 +- .../ClusterMetadataTransformationTest.java | 15 +- .../cassandra/utils/AccordGenerators.java | 42 +- .../cassandra/utils/CassandraGenerators.java | 3 +- 124 files changed, 4109 insertions(+), 1224 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordFastPath.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordTopology.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java create mode 100644 src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java create mode 100644 src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java create mode 100644 src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java create mode 100644 src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java delete mode 100644 src/java/org/apache/cassandra/tcm/ownership/AccordKeyspaces.java create mode 100644 src/java/org/apache/cassandra/tcm/ownership/AccordTables.java rename src/java/org/apache/cassandra/tcm/transformations/{AddAccordKeyspace.java => AddAccordTable.java} (53%) create mode 100644 src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java create mode 100644 test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/fastpath/FastPathParsingTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategyTest.java diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index a507dcd94312..4341422dbde7 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2661,3 +2661,6 @@ storage_compatibility_mode: NONE # # # Progress log scheduling delay # progress_log_schedule_delay: 1s +# +# # how quickly the fast path is reconfigured when nodes go up/down +# fast_path_update_delay: 5s diff --git a/modules/accord b/modules/accord index 9f21a24660fe..5523cfefef16 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 9f21a24660fe49881e0131813f9eff850e25b3dc +Subproject commit 5523cfefef163efee53c8cc57595f5b50ea4f363 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 2143342bd44b..d821c31e87ca 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -42,4 +42,6 @@ public class AccordSpec public DurationSpec.IntMillisecondsBound barrier_max_backoff = new DurationSpec.IntMillisecondsBound("10m"); public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); + + public volatile DurationSpec fast_path_update_delay = new DurationSpec.IntSecondsBound(5); } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 12da2dae429b..f4eccb000ac0 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5313,6 +5313,16 @@ public static int getAccordShardCount() return conf.accord.shard_count.or(DatabaseDescriptor::getAvailableProcessors); } + public static long getAccordFastPathUpdateDelayMillis() + { + return conf.accord.fast_path_update_delay.to(TimeUnit.MILLISECONDS); + } + + public static void setAccordFastPathUpdateDelayMillis(long millis) + { + conf.accord.fast_path_update_delay = new DurationSpec.IntMillisecondsBound(millis); + } + public static boolean getForceNewPreparedStatementBehaviour() { return conf.force_new_prepared_statement_behaviour; diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index cff94f952648..bb9180130bf5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -371,7 +371,7 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. Txn txn = createTxn(state.getClientState(), options); - AccordService.instance().maybeConvertKeyspacesToAccord(txn); + AccordService.instance().maybeConvertTablesToAccord(txn); TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); if (txnResult.kind() == retry_new_protocol) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/KeyspaceAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/KeyspaceAttributes.java index d4d5b984b3c3..0be8b882dd79 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/KeyspaceAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/KeyspaceAttributes.java @@ -23,9 +23,11 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.KeyspaceParams.Option; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; public final class KeyspaceAttributes extends PropertyDefinitions { @@ -48,6 +50,10 @@ public void validate() Map replicationOptions = getAllReplicationOptions(); if (!replicationOptions.isEmpty() && !replicationOptions.containsKey(ReplicationParams.CLASS)) throw new ConfigurationException("Missing replication strategy class"); + + FastPathStrategy strategy = getFastPathStrategy(); + if (strategy != null && strategy.kind() == FastPathStrategy.Kind.INHERIT_KEYSPACE) + throw new ConfigurationException("Cannot use keyspace inheriting fast path strategy with keyspaces"); } public String getReplicationStrategyClass() @@ -63,10 +69,26 @@ private Map getAllReplicationOptions() : replication; } + private FastPathStrategy getFastPathStrategy() + { + if (!hasOption(Option.FAST_PATH)) + return null; + + try + { + return FastPathStrategy.fromMap(getMap(Option.FAST_PATH.toString())); + } + catch (SyntaxException e) + { + return FastPathStrategy.keyspaceStrategyFromString(getString(Option.FAST_PATH.toString())); + } + } + KeyspaceParams asNewKeyspaceParams() { boolean durableWrites = getBoolean(Option.DURABLE_WRITES.toString(), KeyspaceParams.DEFAULT_DURABLE_WRITES); - return KeyspaceParams.create(durableWrites, getAllReplicationOptions()); + FastPathStrategy fastPath = getFastPathStrategy(); + return KeyspaceParams.create(durableWrites, getAllReplicationOptions(), fastPath != null ? fastPath : FastPathStrategy.simple()); } KeyspaceParams asAlteredKeyspaceParams(KeyspaceParams previous) @@ -76,7 +98,8 @@ KeyspaceParams asAlteredKeyspaceParams(KeyspaceParams previous) ReplicationParams replication = getReplicationStrategyClass() == null ? previous.replication : ReplicationParams.fromMapWithDefaults(getAllReplicationOptions(), previousOptions); - return new KeyspaceParams(durableWrites, replication); + FastPathStrategy fastPath = getFastPathStrategy(); + return new KeyspaceParams(durableWrites, replication, fastPath != null ? fastPath : previous.fastPath); } public boolean hasOption(Option option) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 87af6b840b00..eb1891862858 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -25,6 +25,7 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.CompressionParams; @@ -32,6 +33,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.schema.TableParams.Option; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -151,6 +153,18 @@ private TableParams build(TableParams.Builder builder) if (hasOption(READ_REPAIR)) builder.readRepair(ReadRepairStrategy.fromString(getString(READ_REPAIR))); + if (hasOption(Option.FAST_PATH)) + { + try + { + builder.fastPath(FastPathStrategy.fromMap(getMap(Option.FAST_PATH))); + } + catch (SyntaxException e) + { + builder.fastPath(FastPathStrategy.tableStrategyFromString(getString(Option.FAST_PATH))); + } + } + return builder.build(); } diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 62af76127741..99ef8e96b3f7 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -246,7 +246,7 @@ public void finished() checkNotNull(minVersion, "Unable to determine minimum cluster version"); IAccordService accordService = AccordService.instance(); if (session.streamOperation().requiresBarrierTransaction() - && accordService.isAccordManagedKeyspace(cfs.keyspace.getName()) + && accordService.isAccordManagedTable(cfs.getTableId()) && CassandraVersion.CASSANDRA_5_0.compareTo(minVersion) >= 0) accordService.postStreamReceivingBarrier(cfs, ranges); diff --git a/src/java/org/apache/cassandra/dht/AccordSplitter.java b/src/java/org/apache/cassandra/dht/AccordSplitter.java index b0868b47e810..467ac2a1055a 100644 --- a/src/java/org/apache/cassandra/dht/AccordSplitter.java +++ b/src/java/org/apache/cassandra/dht/AccordSplitter.java @@ -21,6 +21,7 @@ import java.math.BigInteger; import accord.local.ShardDistributor; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; @@ -54,9 +55,9 @@ public TokenRange subRange(accord.primitives.Range range, BigInteger startOffset BigInteger end = endBound instanceof SentinelKey ? maximumValue() : valueForToken(endBound.token()); BigInteger sizeOfRange = end.subtract(start); - String keyspace = startBound.keyspace(); - return new TokenRange(startOffset.equals(ZERO) ? startBound : new TokenKey(keyspace, tokenForValue(start.add(startOffset))), - endOffset.compareTo(sizeOfRange) >= 0 ? endBound : new TokenKey(keyspace, tokenForValue(start.add(endOffset)))); + TableId tableId = startBound.table(); + return new TokenRange(startOffset.equals(ZERO) ? startBound : new TokenKey(tableId, tokenForValue(start.add(startOffset))), + endOffset.compareTo(sizeOfRange) >= 0 ? endBound : new TokenKey(tableId, tokenForValue(start.add(endOffset)))); } @Override diff --git a/src/java/org/apache/cassandra/dht/BootStrapper.java b/src/java/org/apache/cassandra/dht/BootStrapper.java index 4ec9e4834f95..609b4b89e783 100644 --- a/src/java/org/apache/cassandra/dht/BootStrapper.java +++ b/src/java/org/apache/cassandra/dht/BootStrapper.java @@ -40,7 +40,6 @@ import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.streaming.StreamEvent; import org.apache.cassandra.streaming.StreamEventHandler; import org.apache.cassandra.streaming.StreamOperation; @@ -127,7 +126,8 @@ public Future bootstrap(StreamStateStore stateStore, boolean useStr true, DatabaseDescriptor.getStreamingConnectionsPerHost(), movements, - strictMovements); + strictMovements, + true); if (beingReplaced != null) streamer.addSourceFilter(new RangeStreamer.ExcludedSourcesFilter(Collections.singleton(beingReplaced))); @@ -137,8 +137,6 @@ public Future bootstrap(StreamStateStore stateStore, boolean useStr logger.debug("Schema does not contain any non-local keyspaces to stream on bootstrap"); for (String keyspaceName : nonLocalStrategyKeyspaces) { - if (AccordService.instance().isAccordManagedKeyspace(keyspaceName)) - continue; KeyspaceMetadata ksm = metadata.schema.getKeyspaces().get(keyspaceName).get(); if (ksm.params.replication.isMeta()) continue; diff --git a/src/java/org/apache/cassandra/dht/RangeStreamer.java b/src/java/org/apache/cassandra/dht/RangeStreamer.java index de66fab4f9ec..f21dd3186d40 100644 --- a/src/java/org/apache/cassandra/dht/RangeStreamer.java +++ b/src/java/org/apache/cassandra/dht/RangeStreamer.java @@ -59,7 +59,9 @@ import org.apache.cassandra.locator.ReplicaCollection.Builder.Conflict; import org.apache.cassandra.locator.Replicas; import org.apache.cassandra.locator.NodeProximity; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.streaming.StreamPlan; @@ -99,6 +101,7 @@ public class RangeStreamer private final StreamStateStore stateStore; private final MovementMap movements; private final MovementMap strictMovements; + private final boolean excludeAccordTables; public static class FetchReplica { @@ -299,10 +302,11 @@ public RangeStreamer(ClusterMetadata metadata, boolean connectSequentially, int connectionsPerHost, MovementMap movements, - MovementMap strictMovements) + MovementMap strictMovements, + boolean excludeAccordTables) { this(metadata, streamOperation, useStrictConsistency, proximity, stateStore, - FailureDetector.instance, connectSequentially, connectionsPerHost, movements, strictMovements); + FailureDetector.instance, connectSequentially, connectionsPerHost, movements, strictMovements, excludeAccordTables); } RangeStreamer(ClusterMetadata metadata, @@ -314,8 +318,10 @@ public RangeStreamer(ClusterMetadata metadata, boolean connectSequentially, int connectionsPerHost, MovementMap movements, - MovementMap strictMovements) + MovementMap strictMovements, + boolean excludeAccordTables) { + this.excludeAccordTables = excludeAccordTables; Preconditions.checkArgument(streamOperation == StreamOperation.BOOTSTRAP || streamOperation == StreamOperation.REBUILD, streamOperation); this.metadata = metadata; this.description = streamOperation.getDescription(); @@ -760,8 +766,17 @@ public StreamResultFuture fetchAsync() logger.debug("Source and our replicas {}", fetchReplicas); logger.debug("Source {} Keyspace {} streaming full {} transient {}", source, keyspace, full, transientReplicas); - /* Send messages to respective folks to stream data over to me */ - streamPlan.requestRanges(source, keyspace, full, transientReplicas); + KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(keyspace); + if (excludeAccordTables && StreamPlan.hasAccordTables(ksm)) + { + String[] cfNames = StreamPlan.nonAccordTablesForKeyspace(ksm); + if (cfNames != null) + streamPlan.requestRanges(source, keyspace, full, transientReplicas, cfNames); + } + else + { + streamPlan.requestRanges(source, keyspace, full, transientReplicas); + } }); }); diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java b/src/java/org/apache/cassandra/locator/ReplicaLayout.java index 6b464237d04e..2e111dc9aa37 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java +++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java @@ -26,6 +26,7 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.FBUtilities; @@ -357,11 +358,11 @@ static EndpointsForToken resolveWriteConflictsInPending(EndpointsForToken natura * @return the read layout for a token - this includes natural replicas, i.e. those that are not pending. * They are reverse sorted by the badness score of the configured snitch */ - static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token, ReadCoordinator coordinator) + static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, TableId tableId, Token token, ReadCoordinator coordinator) { EndpointsForToken replicas = keyspace.getMetadata().params.replication.isLocal() ? forLocalStrategyToken(metadata, replicationStrategy, token) - : coordinator.forNonLocalStrategyTokenRead(metadata, keyspace.getMetadata(), token); + : coordinator.forNonLocalStrategyTokenRead(metadata, keyspace.getMetadata(), tableId, token); replicas = DatabaseDescriptor.getNodeProximity().sortedByProximity(FBUtilities.getBroadcastAddressAndPort(), replicas); diff --git a/src/java/org/apache/cassandra/locator/ReplicaPlans.java b/src/java/org/apache/cassandra/locator/ReplicaPlans.java index 5aef13a6d3cd..d009ed33db8e 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaPlans.java +++ b/src/java/org/apache/cassandra/locator/ReplicaPlans.java @@ -62,6 +62,7 @@ import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexStatusManager; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; @@ -534,7 +535,7 @@ public static List sortByProximity(Collection forRead, ClusterMetadata metadata, Keyspace keyspace, ConsistencyLevel consistencyLevel, Token token, Predicate isAlive, ReadCoordinator coordinator) throws UnavailableException + public static ReplicaPlan.ForWrite forReadRepair(ReplicaPlan forRead, ClusterMetadata metadata, Keyspace keyspace, TableId tableId, ConsistencyLevel consistencyLevel, Token token, Predicate isAlive, ReadCoordinator coordinator) throws UnavailableException { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); Selector selector = writeReadRepair(forRead); @@ -551,7 +552,7 @@ public static ReplicaPlan.ForWrite forReadRepair(ReplicaPlan forRead, Clus liveAndDown.all(), live.all(), contacts, - (newClusterMetadata) -> forReadRepair(forRead, newClusterMetadata, keyspace, consistencyLevel, token, isAlive, coordinator), + (newClusterMetadata) -> forReadRepair(forRead, newClusterMetadata, keyspace, tableId, consistencyLevel, token, isAlive, coordinator), metadata.epoch); } @@ -880,28 +881,31 @@ private static ReplicaPlan.ForRangeRead forSingleReplicaRead(ClusterMetadata met * it would break EACH_QUORUM to do so without further filtering */ public static ReplicaPlan.ForTokenRead forRead(Keyspace keyspace, + TableId tableId, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, SpeculativeRetryPolicy retry, ReadCoordinator coordinator) { - return forRead(ClusterMetadata.current(), keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, false); + return forRead(ClusterMetadata.current(), keyspace, tableId, token, indexQueryPlan, consistencyLevel, retry, coordinator, false); } public static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, Keyspace keyspace, + TableId tableId, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, SpeculativeRetryPolicy retry, ReadCoordinator coordinator) { - return forRead(metadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, true); + return forRead(metadata, keyspace, tableId, token, indexQueryPlan, consistencyLevel, retry, coordinator, true); } private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, Keyspace keyspace, + TableId tableId, Token token, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, @@ -910,7 +914,7 @@ private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, boolean throwOnInsufficientLiveReplicas) { AbstractReplicationStrategy replicationStrategy = keyspace.getReplicationStrategy(); - ReplicaLayout.ForTokenRead forTokenReadLiveAndDown = ReplicaLayout.forTokenReadSorted(metadata, keyspace, replicationStrategy, token, coordinator); + ReplicaLayout.ForTokenRead forTokenReadLiveAndDown = ReplicaLayout.forTokenReadSorted(metadata, keyspace, replicationStrategy, tableId, token, coordinator); ReplicaLayout.ForTokenRead forTokenReadLive = forTokenReadLiveAndDown.filter(FailureDetector.isReplicaAlive); EndpointsForToken candidates = candidatesForRead(keyspace, indexQueryPlan, consistencyLevel, forTokenReadLive.all()); EndpointsForToken contacts = contactForRead(metadata.locator, replicationStrategy, consistencyLevel, retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE), candidates); @@ -919,8 +923,8 @@ private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, assureSufficientLiveReplicasForRead(metadata.locator, replicationStrategy, consistencyLevel, contacts); return new ReplicaPlan.ForTokenRead(keyspace, replicationStrategy, consistencyLevel, candidates, contacts, forTokenReadLiveAndDown.all(), - (newClusterMetadata) -> forRead(newClusterMetadata, keyspace, token, indexQueryPlan, consistencyLevel, retry, coordinator, false), - (self) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, coordinator), + (newClusterMetadata) -> forRead(newClusterMetadata, keyspace, tableId, token, indexQueryPlan, consistencyLevel, retry, coordinator, false), + (self) -> forReadRepair(self, metadata, keyspace, tableId, consistencyLevel, token, FailureDetector.isReplicaAlive, coordinator), metadata.epoch); } @@ -932,16 +936,18 @@ private static ReplicaPlan.ForTokenRead forRead(ClusterMetadata metadata, * There is no speculation for range read queries at present, so we never 'always speculate' here, and a failed response fails the query. */ public static ReplicaPlan.ForRangeRead forRangeRead(Keyspace keyspace, + TableId tableId, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, AbstractBounds range, int vnodeCount) { - return forRangeRead(ClusterMetadata.current(), keyspace, indexQueryPlan, consistencyLevel, range, vnodeCount, true); + return forRangeRead(ClusterMetadata.current(), keyspace, tableId, indexQueryPlan, consistencyLevel, range, vnodeCount, true); } public static ReplicaPlan.ForRangeRead forRangeRead(ClusterMetadata metadata, Keyspace keyspace, + TableId tableId, @Nullable Index.QueryPlan indexQueryPlan, ConsistencyLevel consistencyLevel, AbstractBounds range, @@ -965,8 +971,8 @@ public static ReplicaPlan.ForRangeRead forRangeRead(ClusterMetadata metadata, contacts, forRangeReadLiveAndDown.all(), vnodeCount, - (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, indexQueryPlan, consistencyLevel, range, vnodeCount, false), - (self, token) -> forReadRepair(self, metadata, keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT), + (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, tableId, indexQueryPlan, consistencyLevel, range, vnodeCount, false), + (self, token) -> forReadRepair(self, metadata, keyspace, tableId, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT), metadata.epoch); } @@ -1000,6 +1006,7 @@ public static ReplicaPlan.ForRangeRead forFullRangeRead(Keyspace keyspace, */ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, Keyspace keyspace, + TableId tableId, ConsistencyLevel consistencyLevel, ReplicaPlan.ForRangeRead left, ReplicaPlan.ForRangeRead right) @@ -1037,6 +1044,7 @@ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, newVnodeCount, (newClusterMetadata) -> forRangeRead(newClusterMetadata, keyspace, + tableId, null, // TODO (TCM) - we only use the recomputed ForRangeRead to check stillAppliesTo - make sure passing null here is ok consistencyLevel, newRange, @@ -1045,7 +1053,7 @@ public static ReplicaPlan.ForRangeRead maybeMerge(ClusterMetadata metadata, (self, token) -> { // It might happen that the ring has moved forward since the operation has started, but because we'll be recomputing a quorum // after the operation is complete, we will catch inconsistencies either way. - return forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT); + return forReadRepair(self, ClusterMetadata.current(), keyspace, tableId, consistencyLevel, token, FailureDetector.isReplicaAlive, ReadCoordinator.DEFAULT); }, left.epoch); } diff --git a/src/java/org/apache/cassandra/repair/AccordRepairJob.java b/src/java/org/apache/cassandra/repair/AccordRepairJob.java index c2e9eb792ec4..8db43b46b33c 100644 --- a/src/java/org/apache/cassandra/repair/AccordRepairJob.java +++ b/src/java/org/apache/cassandra/repair/AccordRepairJob.java @@ -21,6 +21,7 @@ import java.math.BigInteger; import javax.annotation.Nullable; +import org.apache.cassandra.service.accord.AccordTopology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,7 +32,6 @@ import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.AccordTopologyUtils; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; import org.apache.cassandra.tcm.ClusterMetadata; @@ -65,7 +65,7 @@ public AccordRepairJob(RepairSession repairSession, String cfname) { super(repairSession, cfname); IPartitioner partitioner = desc.ranges.iterator().next().left.getPartitioner(); - this.ranges = AccordTopologyUtils.toAccordRanges(desc.keyspace, desc.ranges); + this.ranges = AccordTopology.toAccordRanges(desc.keyspace, desc.ranges); this.splitter = partitioner.accordSplitter().apply(ranges); } diff --git a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java index 37af87f3e400..1e4a41c9ff1d 100644 --- a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java +++ b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java @@ -26,8 +26,6 @@ import com.google.common.collect.ImmutableMap; -import org.apache.cassandra.locator.MetaStrategy; -import org.apache.cassandra.utils.JVMStabilityInspector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +34,8 @@ import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.exceptions.CasWriteTimeoutException; +import org.apache.cassandra.locator.MetaStrategy; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.MetadataSnapshots; @@ -44,6 +44,7 @@ import org.apache.cassandra.tcm.log.LogReader; import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.tcm.transformations.cms.PreInitialize; +import org.apache.cassandra.utils.JVMStabilityInspector; import static org.apache.cassandra.tcm.Epoch.FIRST; @@ -223,7 +224,7 @@ private static TableMetadata.Builder parse(String cql, String table, String desc public static KeyspaceMetadata initialMetadata(Set knownDatacenters) { - return KeyspaceMetadata.create(SchemaConstants.METADATA_KEYSPACE_NAME, new KeyspaceParams(true, ReplicationParams.simpleMeta(1, knownDatacenters)), Tables.of(Log)); + return KeyspaceMetadata.create(SchemaConstants.METADATA_KEYSPACE_NAME, new KeyspaceParams(true, ReplicationParams.simpleMeta(1, knownDatacenters), FastPathStrategy.simple()), Tables.of(Log)); } public static KeyspaceMetadata initialMetadata(String datacenter) diff --git a/src/java/org/apache/cassandra/schema/KeyspaceMetadata.java b/src/java/org/apache/cassandra/schema/KeyspaceMetadata.java index 8065c5929007..0700b3321277 100644 --- a/src/java/org/apache/cassandra/schema/KeyspaceMetadata.java +++ b/src/java/org/apache/cassandra/schema/KeyspaceMetadata.java @@ -49,6 +49,7 @@ import org.apache.cassandra.schema.Tables.TablesDiff; import org.apache.cassandra.schema.Types.TypesDiff; import org.apache.cassandra.schema.Views.ViewsDiff; +import org.apache.cassandra.utils.LocalizeString; import static com.google.common.collect.Iterables.any; import static java.lang.String.format; @@ -364,9 +365,16 @@ public String toCqlString(boolean withWarnings, boolean withInternals, boolean i params.replication.appendCqlTo(builder); builder.append(" AND durable_writes = ") - .append(params.durableWrites) - .append(';') - .toString(); + .append(params.durableWrites); + + if (params.fastPath != null) + { + builder.append(" AND fast_path = '") + .append(LocalizeString.toLowerCaseLocalized(params.fastPath.toString())) + .append("'"); + } + + builder.append(';'); } return builder.toString(); } diff --git a/src/java/org/apache/cassandra/schema/KeyspaceParams.java b/src/java/org/apache/cassandra/schema/KeyspaceParams.java index 76516334b8d5..fe05b10b5d55 100644 --- a/src/java/org/apache/cassandra/schema/KeyspaceParams.java +++ b/src/java/org/apache/cassandra/schema/KeyspaceParams.java @@ -28,10 +28,12 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; +import static org.apache.cassandra.tcm.serialization.Version.V2; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** @@ -54,7 +56,8 @@ public final class KeyspaceParams public enum Option { DURABLE_WRITES, - REPLICATION; + REPLICATION, + FAST_PATH; @Override public String toString() @@ -65,41 +68,53 @@ public String toString() public final boolean durableWrites; public final ReplicationParams replication; + public final FastPathStrategy fastPath; - public KeyspaceParams(boolean durableWrites, ReplicationParams replication) + public KeyspaceParams(boolean durableWrites, ReplicationParams replication, FastPathStrategy fastPath) { this.durableWrites = durableWrites; this.replication = replication; + this.fastPath = fastPath; + } + + public static KeyspaceParams create(boolean durableWrites, Map replication, FastPathStrategy fastPath) + { + return new KeyspaceParams(durableWrites, ReplicationParams.fromMap(replication), fastPath); + } + + public static KeyspaceParams create(boolean durableWrites, Map replication, Map fastPath) + { + return create(durableWrites, replication, FastPathStrategy.fromMap(fastPath)); } public static KeyspaceParams create(boolean durableWrites, Map replication) { - return new KeyspaceParams(durableWrites, ReplicationParams.fromMap(replication)); + return create(durableWrites, replication, FastPathStrategy.simple()); } public static KeyspaceParams local() { - return new KeyspaceParams(DEFAULT_LOCAL_DURABLE_WRITES, ReplicationParams.local()); + return new KeyspaceParams(DEFAULT_LOCAL_DURABLE_WRITES, ReplicationParams.local(), FastPathStrategy.simple()); } public static KeyspaceParams simple(int replicationFactor) { - return new KeyspaceParams(true, ReplicationParams.simple(replicationFactor)); + return new KeyspaceParams(true, ReplicationParams.simple(replicationFactor), FastPathStrategy.simple()); } public static KeyspaceParams simple(String replicationFactor) { - return new KeyspaceParams(true, ReplicationParams.simple(replicationFactor)); + return new KeyspaceParams(true, ReplicationParams.simple(replicationFactor), FastPathStrategy.simple()); } public static KeyspaceParams simpleTransient(int replicationFactor) { - return new KeyspaceParams(false, ReplicationParams.simple(replicationFactor)); + return new KeyspaceParams(false, ReplicationParams.simple(replicationFactor), FastPathStrategy.simple()); } public static KeyspaceParams nts(Object... args) { - return new KeyspaceParams(true, ReplicationParams.nts(args)); + return new KeyspaceParams(true, ReplicationParams.nts(args), FastPathStrategy.simple()); } public void validate(String name, ClientState state, ClusterMetadata metadata) @@ -118,13 +133,13 @@ public boolean equals(Object o) KeyspaceParams p = (KeyspaceParams) o; - return durableWrites == p.durableWrites && replication.equals(p.replication); + return durableWrites == p.durableWrites && replication.equals(p.replication) && fastPath.equals(p.fastPath); } @Override public int hashCode() { - return Objects.hashCode(durableWrites, replication); + return Objects.hashCode(durableWrites, replication, fastPath); } @Override @@ -133,6 +148,7 @@ public String toString() return MoreObjects.toStringHelper(this) .add(Option.DURABLE_WRITES.toString(), durableWrites) .add(Option.REPLICATION.toString(), replication) + .add(Option.FAST_PATH.toString(), fastPath.toString()) .toString(); } @@ -142,19 +158,25 @@ public void serialize(KeyspaceParams t, DataOutputPlus out, Version version) thr { ReplicationParams.serializer.serialize(t.replication, out, version); out.writeBoolean(t.durableWrites); + if (version.isAtLeast(V2)) + FastPathStrategy.serializer.serialize(t.fastPath, out, version); } public KeyspaceParams deserialize(DataInputPlus in, Version version) throws IOException { ReplicationParams params = ReplicationParams.serializer.deserialize(in, version); boolean durableWrites = in.readBoolean(); - return new KeyspaceParams(durableWrites, params); + FastPathStrategy fastPath = version.isAtLeast(V2) + ? FastPathStrategy.serializer.deserialize(in, version) + : FastPathStrategy.simple(); + return new KeyspaceParams(durableWrites, params, fastPath); } public long serializedSize(KeyspaceParams t, Version version) { return ReplicationParams.serializer.serializedSize(t.replication, version) + - TypeSizes.sizeof(t.durableWrites); + TypeSizes.sizeof(t.durableWrites) + + (version.isAtLeast(V2) ? FastPathStrategy.serializer.serializedSize(t.fastPath, version) : 0); } } } diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index ce8fa750b38c..cd37fa4dac48 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -44,6 +44,7 @@ import org.apache.cassandra.db.partitions.*; import org.apache.cassandra.db.rows.*; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.schema.ColumnMetadata.ClusteringOrder; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; @@ -97,6 +98,7 @@ private SchemaKeyspace() + "keyspace_name text," + "durable_writes boolean," + "replication frozen>," + + "fast_path frozen>," + "PRIMARY KEY ((keyspace_name)))"); private static final TableMetadata Tables = @@ -128,6 +130,7 @@ private SchemaKeyspace() + "additional_write_policy text," + "cdc boolean," + "read_repair text," + + "fast_path frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -491,7 +494,8 @@ private static Mutation.SimpleBuilder makeCreateKeyspaceMutation(String name, Ke .row() .add(KeyspaceParams.Option.DURABLE_WRITES.toString(), params.durableWrites) .add(KeyspaceParams.Option.REPLICATION.toString(), - (params.replication.isMeta() ? params.replication.asNonMeta() : params.replication).asMap()); + (params.replication.isMeta() ? params.replication.asNonMeta() : params.replication).asMap()) + .add(KeyspaceParams.Option.FAST_PATH.toString(), params.fastPath.asMap()); return builder; } @@ -551,7 +555,7 @@ private static void addTableToSchemaMutation(TableMetadata table, boolean withCo .add("id", table.id.asUUID()) .add("flags", TableMetadata.Flag.toStringSet(table.flags)); - addTableParamsToRowBuilder(table.params, rowBuilder); + addTableParamsToRowBuilder(table.params, rowBuilder, false); if (withColumnsAndTriggers) { @@ -569,7 +573,7 @@ private static void addTableToSchemaMutation(TableMetadata table, boolean withCo } } - private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder) + private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder, boolean forView) { builder.add("bloom_filter_fp_chance", params.bloomFilterFpChance) .add("comment", params.comment) @@ -608,6 +612,9 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui // incremental_backups is enabled, to avoid RTE in pre-4.2 versioned node during upgrades if (!params.incrementalBackups) builder.add("incremental_backups", false); + + if (DatabaseDescriptor.getAccordTransactionsEnabled() && !forView) + builder.add("fast_path", params.fastPath.asMap()); } private static void addAlterTableToSchemaMutation(TableMetadata oldTable, TableMetadata newTable, Mutation.SimpleBuilder builder) @@ -820,7 +827,7 @@ private static void addViewToSchemaMutation(ViewMetadata view, boolean includeCo .add("where_clause", view.whereClause.toCQLString()) .add("id", table.id.asUUID()); - addTableParamsToRowBuilder(table.params, rowBuilder); + addTableParamsToRowBuilder(table.params, rowBuilder, true); if (includeColumns) { @@ -966,9 +973,11 @@ private static KeyspaceParams fetchKeyspaceParams(String keyspaceName) UntypedResultSet.Row row = query(query, keyspaceName).one(); boolean durableWrites = row.getBoolean(KeyspaceParams.Option.DURABLE_WRITES.toString()); Map replication = row.getFrozenTextMap(KeyspaceParams.Option.REPLICATION.toString()); - KeyspaceParams params = KeyspaceParams.create(durableWrites, replication); + Map fastPath = row.getFrozenTextMap(KeyspaceParams.Option.FAST_PATH.toString()); + KeyspaceParams params = KeyspaceParams.create(durableWrites, replication, fastPath); + if (keyspaceName.equals(SchemaConstants.METADATA_KEYSPACE_NAME)) - params = new KeyspaceParams(params.durableWrites, params.replication.asMeta()); + params = new KeyspaceParams(params.durableWrites, params.replication.asMeta(), FastPathStrategy.simple()); return params; } @@ -1070,7 +1079,8 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) SpeculativeRetryPolicy.fromString(row.getString("additional_write_policy")) : SpeculativeRetryPolicy.fromString("99PERCENTILE")) .cdc(row.has("cdc") && row.getBoolean("cdc")) - .readRepair(getReadRepairStrategy(row)); + .readRepair(getReadRepairStrategy(row)) + .fastPath(getFastPathStrategy(row)); // allow_auto_snapshot column was introduced in 4.2 if (row.has("allow_auto_snapshot")) @@ -1448,4 +1458,11 @@ private static ReadRepairStrategy getReadRepairStrategy(UntypedResultSet.Row row ? ReadRepairStrategy.fromString(row.getString("read_repair")) : ReadRepairStrategy.BLOCKING; } + + private static FastPathStrategy getFastPathStrategy(UntypedResultSet.Row row) + { + return row.has("fast_path") + ? FastPathStrategy.fromMap(row.getFrozenTextMap("fast_path")) + : FastPathStrategy.inheritKeyspace(); + } } diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 90c56e47bba7..88a3f80d0b8a 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -70,6 +70,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.serialization.UDTAndFunctionsAwareMetadataSerializer; @@ -973,6 +974,12 @@ public Builder compression(CompressionParams val) return this; } + public Builder fastPath(FastPathStrategy val) + { + params.fastPath(val); + return this; + } + public Builder defaultTimeToLive(int val) { params.defaultTimeToLive(val); diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 6903179525b7..40614f65ea52 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -32,6 +32,7 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; @@ -69,7 +70,8 @@ public enum Option ADDITIONAL_WRITE_POLICY, CRC_CHECK_CHANCE, CDC, - READ_REPAIR; + READ_REPAIR, + FAST_PATH; @Override public String toString() @@ -97,6 +99,7 @@ public String toString() public final ImmutableMap extensions; public final boolean cdc; public final ReadRepairStrategy readRepair; + public final FastPathStrategy fastPath; private TableParams(Builder builder) { @@ -121,6 +124,7 @@ private TableParams(Builder builder) extensions = builder.extensions; cdc = builder.cdc; readRepair = builder.readRepair; + fastPath = builder.fastPath; } public static Builder builder() @@ -148,7 +152,8 @@ public static Builder builder(TableParams params) .additionalWritePolicy(params.additionalWritePolicy) .extensions(params.extensions) .cdc(params.cdc) - .readRepair(params.readRepair); + .readRepair(params.readRepair) + .fastPath(params.fastPath); } public Builder unbuild() @@ -239,7 +244,8 @@ public boolean equals(Object o) && memtable.equals(p.memtable) && extensions.equals(p.extensions) && cdc == p.cdc - && readRepair == p.readRepair; + && readRepair == p.readRepair + && fastPath.equals(fastPath); } @Override @@ -263,7 +269,8 @@ public int hashCode() memtable, extensions, cdc, - readRepair); + readRepair, + fastPath); } @Override @@ -275,6 +282,7 @@ public String toString() .add(ALLOW_AUTO_SNAPSHOT.toString(), allowAutoSnapshot) .add(BLOOM_FILTER_FP_CHANCE.toString(), bloomFilterFpChance) .add(CRC_CHECK_CHANCE.toString(), crcCheckChance) + .add(FAST_PATH.toString(), fastPath) .add(GC_GRACE_SECONDS.toString(), gcGraceSeconds) .add(DEFAULT_TIME_TO_LIVE.toString(), defaultTimeToLive) .add(INCREMENTAL_BACKUPS.toString(), incrementalBackups) @@ -289,6 +297,7 @@ public String toString() .add(EXTENSIONS.toString(), extensions) .add(CDC.toString(), cdc) .add(READ_REPAIR.toString(), readRepair) + .add(Option.FAST_PATH.toString(), fastPath) .toString(); } @@ -318,8 +327,8 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) if (!isView) { - builder.append("AND default_time_to_live = ").append(defaultTimeToLive) - .newLine(); + builder.append("AND fast_path = ").append(fastPath.asCQL()).newLine(); + builder.append("AND default_time_to_live = ").append(defaultTimeToLive).newLine(); } builder.append("AND extensions = ").append(extensions.entrySet() @@ -364,6 +373,7 @@ public static final class Builder private ImmutableMap extensions = ImmutableMap.of(); private boolean cdc; private ReadRepairStrategy readRepair = ReadRepairStrategy.BLOCKING; + private FastPathStrategy fastPath = FastPathStrategy.inheritKeyspace(); public Builder() { @@ -482,6 +492,12 @@ public Builder readRepair(ReadRepairStrategy val) return this; } + public Builder fastPath(FastPathStrategy val) + { + fastPath = val; + return this; + } + public Builder extensions(Map val) { extensions = ImmutableMap.copyOf(val); @@ -504,7 +520,10 @@ public void serialize(TableParams t, DataOutputPlus out, Version version) throws out.writeUTF(t.speculativeRetry.toString()); out.writeUTF(t.additionalWritePolicy.toString()); if (version.isAtLeast(Version.V2)) + { out.writeUTF(t.memtable.configurationKey()); + FastPathStrategy.serializer.serialize(t.fastPath, out, version); + } serializeMap(t.caching.asMap(), out); serializeMap(t.compaction.asMap(), out); serializeMap(t.compression.asMap(), out); @@ -532,6 +551,7 @@ public TableParams deserialize(DataInputPlus in, Version version) throws IOExcep .speculativeRetry(SpeculativeRetryPolicy.fromString(in.readUTF())) .additionalWritePolicy(SpeculativeRetryPolicy.fromString(in.readUTF())) .memtable(version.isAtLeast(Version.V2) ? MemtableParams.get(in.readUTF()) : MemtableParams.DEFAULT) + .fastPath(version.isAtLeast(Version.V2) ? FastPathStrategy.serializer.deserialize(in, version) : FastPathStrategy.simple()) .caching(CachingParams.fromMap(deserializeMap(in))) .compaction(CompactionParams.fromMap(deserializeMap(in))) .compression(CompressionParams.fromMap(deserializeMap(in))) @@ -556,6 +576,7 @@ public long serializedSize(TableParams t, Version version) sizeof(t.speculativeRetry.toString()) + sizeof(t.additionalWritePolicy.toString()) + (version.isAtLeast(Version.V2) ? sizeof(t.memtable.configurationKey()) : 0) + + (version.isAtLeast(Version.V2) ? FastPathStrategy.serializer.serializedSize(t.fastPath, version) : 0) + serializedSizeMap(t.caching.asMap()) + serializedSizeMap(t.compaction.asMap()) + serializedSizeMap(t.compression.asMap()) + diff --git a/src/java/org/apache/cassandra/service/Rebuild.java b/src/java/org/apache/cassandra/service/Rebuild.java index 673a6ca486a3..b6e4d61d1bf4 100644 --- a/src/java/org/apache/cassandra/service/Rebuild.java +++ b/src/java/org/apache/cassandra/service/Rebuild.java @@ -113,7 +113,8 @@ public static void rebuild(String sourceDc, String keyspace, String tokens, Stri false, DatabaseDescriptor.getStreamingConnectionsPerHost(), rebuildMovements, - null); + null, + true); if (sourceDc != null) streamer.addSourceFilter(new RangeStreamer.SingleDatacenterFilter(metadata.locator, sourceDc)); @@ -123,16 +124,11 @@ public static void rebuild(String sourceDc, String keyspace, String tokens, Stri if (keyspace == null) { for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces().names()) - { - if (AccordService.instance().isAccordManagedKeyspace(keyspaceName)) - continue; streamer.addKeyspaceToFetch(keyspaceName); - } } else if (tokens == null) { - if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) - streamer.addKeyspaceToFetch(keyspace); + streamer.addKeyspaceToFetch(keyspace); } else { @@ -159,8 +155,7 @@ else if (tokens == null) streamer.addSourceFilter(new RangeStreamer.AllowedSourcesFilter(sources)); } - if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) - streamer.addKeyspaceToFetch(keyspace); + streamer.addKeyspaceToFetch(keyspace); } StreamResultFuture resultFuture = streamer.fetchAsync(); diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index a40426c00130..52cc03ef5ac5 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -378,7 +378,7 @@ public static RowIterator cas(String keyspaceName, clientState, nowInSeconds); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertKeyspacesToAccord(txn); + accordService.maybeConvertTablesToAccord(txn); TxnResult txnResult = accordService.coordinate(txn, consistencyForPaxos, requestTime); @@ -1251,7 +1251,7 @@ private static void mutateWithAccord(Collection iMutations, AccordUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit); Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.EMPTY, TxnQuery.EMPTY, update); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertKeyspacesToAccord(txn); + accordService.maybeConvertTablesToAccord(txn); accordService.coordinate(txn, consistencyLevel, requestTime); } @@ -2001,7 +2001,7 @@ private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand. TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); Txn txn = new Txn.InMemory(read.keys(), read, TxnQuery.ALL); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertKeyspacesToAccord(txn); + accordService.maybeConvertTablesToAccord(txn); TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); if (txnResult.kind() == retry_new_protocol) return RETRY_NEW_PROTOCOL; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index e9dc473577a9..d473ae5f099e 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -4232,11 +4232,24 @@ public List getNonLocalStrategyKeyspaces() @Override public List getAccordManagedKeyspaces() + { + Keyspaces keyspaces = Schema.instance.getNonLocalStrategyKeyspaces(); + return keyspaces.stream().flatMap(ks -> ks.tables.stream()) + .filter(tbm -> AccordService.instance().isAccordManagedTable(tbm.id)) + .map(tbm -> tbm.keyspace) + .distinct() + .sorted() + .collect(toList()); + } + + @Override + public List getAccordManagedTables() { // TODO (review) These are really just the ones Accord is aware of not necessarily managed - Set keyspaces = Schema.instance.getNonLocalStrategyKeyspaces().names(); - return keyspaces.stream() - .filter(AccordService.instance()::isAccordManagedKeyspace) + Keyspaces keyspaces = Schema.instance.getNonLocalStrategyKeyspaces(); + return keyspaces.stream().flatMap(ks -> ks.tables.stream()) + .filter(tbm -> AccordService.instance().isAccordManagedTable(tbm.id)) + .map(tbm -> tbm.keyspace + '.' + tbm.name) .collect(toList()); } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index d11ca1bfb4f6..b4e28c3e9b68 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1158,6 +1158,7 @@ void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocol, String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @Nonnull String format); List getAccordManagedKeyspaces(); + List getAccordManagedTables(); /** Gets the concurrency settings for processing stages*/ static class StageConcurrency implements Serializable diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index d5f1ed3b3534..cf4cda992be3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -33,6 +33,7 @@ import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.metrics.CacheSizeMetrics; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; public class AccordCommandStores extends CommandStores implements CacheSize @@ -62,15 +63,15 @@ protected boolean shouldBootstrap(Node node, Topology previous, Topology updated if (!super.shouldBootstrap(node, previous, updated, range)) return false; // we see new ranges when a new keyspace is added, so avoid bootstrap in these cases - return contains(previous, ((AccordRoutingKey) range.start()).keyspace()); + return contains(previous, ((AccordRoutingKey) range.start()).table()); } - private static boolean contains(Topology previous, String searchKeyspace) + private static boolean contains(Topology previous, TableId searchTable) { for (Range range : previous.ranges()) { - String keyspace = ((AccordRoutingKey) range.start()).keyspace(); - if (keyspace.equals(searchKeyspace)) + TableId table = ((AccordRoutingKey) range.start()).table(); + if (table.equals(searchTable)) return true; } return false; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java index e8a35f417454..340b8396d357 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java @@ -192,9 +192,9 @@ public AsyncChain load(ExecutorPlus executor, Function cache) { - CommandsCachingState commands = (CommandsCachingState) commandStore.depsCommandsForKeyCache().getUnsafe(key()); + CommandsCachingState commands = (CommandsCachingState) cache.getUnsafe(key()); if (commands == null) return; @@ -214,7 +214,8 @@ protected State maybeProcessModification(Stat Modified modified = (Modified) next; CommandsForKeyUpdate current = modified.current; - maybeUpdateCommands(current); + maybeUpdateCommands(current, commandStore.depsCommandsForKeyCache()); + maybeUpdateCommands(current, commandStore.allCommandsForKeyCache()); // combine in memory updates current = CommandsForKeyGroupUpdater.Immutable.merge(modified.original, current, this); diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 5b9ea24ab8c0..ad20fea04329 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -153,15 +153,15 @@ public synchronized void start() } @Override - public Node.Id mappedId(InetAddressAndPort endpoint) + public Node.Id mappedIdOrNull(InetAddressAndPort endpoint) { - return Invariants.nonNull(mapping.mappedId(endpoint), "Unable to map address %s to a Node.Id", endpoint); + return mapping.mappedIdOrNull(endpoint); } @Override - public InetAddressAndPort mappedEndpoint(Node.Id id) + public InetAddressAndPort mappedEndpointOrNull(Node.Id id) { - return Invariants.nonNull(mapping.mappedEndpoint(id), "Unable to map node id %s to a InetAddressAndPort", id); + return mapping.mappedEndpointOrNull(id); } @VisibleForTesting @@ -179,7 +179,7 @@ synchronized void updateMapping(EndpointMapping mapping) synchronized void updateMapping(ClusterMetadata metadata) { - updateMapping(AccordTopologyUtils.directoryToMapping(mapping, metadata.epoch.getEpoch(), metadata.directory)); + updateMapping(AccordTopology.directoryToMapping(mapping, metadata.epoch.getEpoch(), metadata.directory)); } private void reportMetadata(ClusterMetadata metadata) @@ -188,7 +188,7 @@ private void reportMetadata(ClusterMetadata metadata) synchronized (AccordConfigurationService.this) { updateMapping(metadata); - reportTopology(AccordTopologyUtils.createAccordTopology(metadata)); + reportTopology(AccordTopology.createAccordTopology(metadata)); } }); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java b/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java index 16ff04437e30..fd0e8cf73618 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java +++ b/src/java/org/apache/cassandra/service/accord/AccordEndpointMapper.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import accord.local.Node; +import accord.utils.Invariants; import org.apache.cassandra.locator.InetAddressAndPort; /** @@ -26,6 +27,16 @@ */ public interface AccordEndpointMapper { - Node.Id mappedId(InetAddressAndPort endpoint); - InetAddressAndPort mappedEndpoint(Node.Id id); + Node.Id mappedIdOrNull(InetAddressAndPort endpoint); + InetAddressAndPort mappedEndpointOrNull(Node.Id id); + + default Node.Id mappedId(InetAddressAndPort endpoint) + { + return Invariants.nonNull(mappedIdOrNull(endpoint), "Unable to map address %s to a Node.Id", endpoint); + } + + default InetAddressAndPort mappedEndpoint(Node.Id id) + { + return Invariants.nonNull(mappedEndpointOrNull(id), "Unable to map node id %s to a InetAddressAndPort", id); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordFastPath.java b/src/java/org/apache/cassandra/service/accord/AccordFastPath.java new file mode 100644 index 000000000000..3c45241c2f12 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordFastPath.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +import accord.local.Node; +import com.google.common.collect.ImmutableMap; + +import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MetadataValue; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +/** + * Cluster availability info for services that need a consistent view of availability for a given epoch, such + * as accord topology calculation + */ +public class AccordFastPath implements MetadataValue +{ + public static final AccordFastPath EMPTY = new AccordFastPath(ImmutableMap.of(), Epoch.EMPTY); + + public enum Status + { + NORMAL, SHUTDOWN, UNAVAILABLE; + + public boolean isUnavailable() + { + switch (this) + { + case UNAVAILABLE: + case SHUTDOWN: + return true; + default: + return false; + } + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(Status status, DataOutputPlus out, Version version) throws IOException + { + switch (status) + { + case NORMAL: out.write(0); break; + case SHUTDOWN: out.write(1); break; + case UNAVAILABLE: out.write(2); break; + default: throw new IllegalStateException("Unhandled status: " + this); + } + } + + @Override + public Status deserialize(DataInputPlus in, Version version) throws IOException + { + byte b = in.readByte(); + switch (b) + { + case 0: return NORMAL; + case 1: return SHUTDOWN; + case 2: return UNAVAILABLE; + default: throw new IllegalArgumentException("Unhandled status byte: " + b); + } + } + + @Override + public long serializedSize(Status status, Version version) + { + return TypeSizes.BYTE_SIZE; + } + }; + }; + + public static class NodeInfo + { + public final Status status; + public final long updated; + + public NodeInfo(Status status, long updated) + { + this.status = status; + this.updated = updated; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NodeInfo nodeInfo = (NodeInfo) o; + return updated == nodeInfo.updated && status == nodeInfo.status; + } + + @Override + public int hashCode() + { + return Objects.hash(status, updated); + } + + private static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(NodeInfo info, DataOutputPlus out, Version version) throws IOException + { + Status.serializer.serialize(info.status, out, version); + out.writeUnsignedVInt(info.updated); + } + + @Override + public NodeInfo deserialize(DataInputPlus in, Version version) throws IOException + { + return new NodeInfo(Status.serializer.deserialize(in, version), in.readUnsignedVInt()); + } + + @Override + public long serializedSize(NodeInfo info, Version version) + { + return Status.serializer.serializedSize(info.status, version) + TypeSizes.sizeofUnsignedVInt(info.updated); + } + }; + } + + public final ImmutableMap info; + + private final Epoch lastModified; + + AccordFastPath(ImmutableMap info, Epoch lastModified) + { + this.info = info; + this.lastModified = lastModified; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordFastPath that = (AccordFastPath) o; + return info.equals(that.info) && lastModified.equals(that.lastModified); + } + + @Override + public int hashCode() + { + return Objects.hash(info, lastModified); + } + + public AccordFastPath withoutNode(NodeId tcmId) + { + Node.Id node = AccordTopology.tcmIdToAccord(tcmId); + if (!info.containsKey(node)) + return this; + + ImmutableMap.Builder builder = ImmutableMap.builder(); + info.forEach((n, info) -> { + if (!n.equals(node)) + builder.put(n, info); + }); + return new AccordFastPath(builder.build(), lastModified); + } + + public AccordFastPath withNodeStatusSince(Node.Id node, Status status, long updateTimeMillis, long updateDelayMillis) + { + NodeInfo current = info.get(node); + if (status == Status.SHUTDOWN && current != null) + { + // nodes report when they're being shutdown and aren't superseded + updateTimeMillis = Math.max(updateTimeMillis, current.updated + 1); + } + + if (!canUpdateNodeTo(current, status, updateTimeMillis, updateDelayMillis)) + throw new InvalidRequestException(String.format("cannot transition %s to %s at %s", node, status, updateTimeMillis)); + + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put(node, new NodeInfo(status, updateTimeMillis)); + info.forEach((n, info) -> { + if (!n.equals(node)) + builder.put(n, info); + }); + return new AccordFastPath(builder.build(), lastModified); + } + + public boolean canUpdateNodeTo(NodeInfo current, Status status, long updateTimeMillis, long updateDelayMillis) + { + if (current == null) + return status != Status.NORMAL; + + if (current.status == status) + return false; + + return updateTimeMillis > current.updated + (status == Status.SHUTDOWN ? 0 : updateDelayMillis); + } + + public AccordFastPath withLastModified(Epoch epoch) + { + return new AccordFastPath(info, epoch); + } + + public Epoch lastModified() + { + return lastModified; + } + + public ImmutableSet unavailableIds() + { + ImmutableSet.Builder builder = ImmutableSet.builder(); + info.entrySet().stream() + .filter(entry -> entry.getValue().status.isUnavailable()) + .map(Map.Entry::getKey) + .forEach(builder::add); + return builder.build(); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + private void serializeMap(Map map, DataOutputPlus out, Version version) throws IOException + { + out.writeInt(map.size()); + for (Map.Entry entry : map.entrySet()) + { + TopologySerializers.nodeId.serialize(entry.getKey(), out, version); + NodeInfo.serializer.serialize(entry.getValue(), out, version); + } + } + + public void serialize(AccordFastPath accordFastPath, DataOutputPlus out, Version version) throws IOException + { + serializeMap(accordFastPath.info, out, version); + Epoch.serializer.serialize(accordFastPath.lastModified, out, version); + } + + private ImmutableMap deserializeMap(DataInputPlus in, Version version) throws IOException + { + int size = in.readInt(); + if (size == 0) + return ImmutableMap.of(); + + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i=0; i map, Version version) + { + long size = TypeSizes.INT_SIZE; + for (Map.Entry entry : map.entrySet()) + { + size += TopologySerializers.nodeId.serializedSize(entry.getKey(), version); + size += NodeInfo.serializer.serializedSize(entry.getValue(), version); + } + return size; + } + + public long serializedSize(AccordFastPath accordFastPath, Version version) + { + return serializedMapSize(accordFastPath.info, version) + + Epoch.serializer.serializedSize(accordFastPath.lastModified, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java new file mode 100644 index 000000000000..c1fc73d80fb2 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.api.ConfigurationService; +import accord.local.Node; +import accord.primitives.Ranges; +import accord.topology.Topology; +import accord.utils.Invariants; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.concurrent.ScheduledExecutors; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.gms.IEndpointStateChangeSubscriber; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordFastPath.NodeInfo; +import org.apache.cassandra.service.accord.AccordFastPath.Status; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.tcm.transformations.ReconfigureAccordFastPath; +import org.apache.cassandra.utils.Clock; + +import java.util.*; +import java.util.concurrent.TimeUnit; + +/** + * Listens to availability status of peers and updates tcm fast path data accordingly + */ +public abstract class AccordFastPathCoordinator implements ChangeListener, ConfigurationService.Listener +{ + private static final AsyncResult SUCCESS = AsyncResults.success(null); + + private static class PeerStatus + { + final Node.Id peer; + final Status status; + + public PeerStatus(Node.Id peer, Status status) + { + this.peer = peer; + this.status = status; + } + + boolean shouldUpdateFastPath(AccordFastPath fastPath, long nowMillis, long delayMillis) + { + NodeInfo info = fastPath.info.get(peer); + + if (info == null) + return status != Status.NORMAL; + + if (info.status == status || info.status == Status.SHUTDOWN) + return false; + + return nowMillis - info.updated > delayMillis; + } + } + + private static class Peers + { + static final Peers EMPTY = new Peers(0, ImmutableSet.of(), Collections.emptyMap()); + final long epoch; + final ImmutableSet peers; + final Map statusMap; + + public Peers(long epoch, ImmutableSet peers, Map statusMap) + { + this.epoch = epoch; + this.peers = peers; + this.statusMap = statusMap; + } + + public boolean contains(Node.Id node) + { + return peers.contains(node); + } + + public static Peers from(Node.Id localId, Topology topology, Peers prev) + { + Set peers = new HashSet<>(); + topology.forEachOn(localId, (shard, index) -> peers.addAll(shard.nodes)); + peers.remove(localId); + + Map statusMap = new HashMap<>(); + for (Node.Id peer : peers) + { + PeerStatus status = prev.statusMap.get(peer); + if (status != null) + statusMap.put(peer, status); + } + + return new Peers(topology.epoch(), ImmutableSet.copyOf(peers), statusMap); + } + + public PeerStatus onUpdate(Node.Id node, Status status) + { + Invariants.checkArgument(contains(node)); + PeerStatus peerStatus = new PeerStatus(node, status); + statusMap.put(node, peerStatus); + return peerStatus; + } + + public Iterable statusIterable() + { + return statusMap.values(); + } + } + + private boolean receivedShutdownSignal = false; + private volatile Epoch startupEpoch = null; + private volatile boolean issuedStartupUpdate = false; + private boolean hasRegistered = false; + private Peers peers = Peers.EMPTY; + private final Node.Id localId; + + public AccordFastPathCoordinator(Node.Id localId) + { + this.localId = localId; + } + + private boolean isShutdown(AccordFastPath fastPath) + { + NodeInfo info = fastPath.info.get(localId); + return info != null && info.status == Status.SHUTDOWN; + } + + public synchronized void start() + { + if (hasRegistered) + return; + + ClusterMetadata cm = currentMetadata(); + startupEpoch = cm.epoch; + registerAsListener(); + + // TODO: start check routine + + hasRegistered = true; + + AccordFastPath fastPath = cm.accordFastPath; + + long updateDelayMillis = getAccordFastPathUpdateDelayMillis(); + if (isShutdown(fastPath)) + { + updateFastPath(localId, Status.NORMAL, Clock.Global.currentTimeMillis(), updateDelayMillis); + issuedStartupUpdate = true; + } + + scheduleMaintenanceTask(updateDelayMillis); + } + + abstract ClusterMetadata currentMetadata(); + abstract void registerAsListener(); + abstract void updateFastPath(Node.Id node, Status status, long updateTimeMillis, long updateDelayMillis); + abstract long getAccordFastPathUpdateDelayMillis(); + + private static class Impl extends AccordFastPathCoordinator implements IEndpointStateChangeSubscriber + { + private final AccordConfigurationService configService; + + public Impl(Node.Id localId, AccordConfigurationService configService) + { + super(localId); + this.configService = configService; + } + + @Override + ClusterMetadata currentMetadata() + { + return ClusterMetadata.current(); + } + + @Override + void registerAsListener() + { + Gossiper.instance.register(this); + StorageService.instance.addPreShutdownHook(this::onShutdown); + configService.registerListener(this); + } + + @Override + void updateFastPath(Node.Id node, Status status, long updateTimeMillis, long updateDelayMillis) + { + Stage.MISC.submit(() -> { + ClusterMetadataService.instance().commit(new ReconfigureAccordFastPath(node, status, updateTimeMillis, updateDelayMillis), + metadata -> metadata, ((code, message) -> null)); + }); + } + + @Override + long getAccordFastPathUpdateDelayMillis() + { + return DatabaseDescriptor.getAccordFastPathUpdateDelayMillis(); + } + + @Override + public void onAlive(InetAddressAndPort endpoint, EndpointState state) + { + Node.Id node = configService.mappedIdOrNull(endpoint); + if (node != null) onAlive(node); + } + + @Override + public void onDead(InetAddressAndPort endpoint, EndpointState state) + { + Node.Id node = configService.mappedIdOrNull(endpoint); + if (node != null) onDead(node); + } + } + + public static AccordFastPathCoordinator create(Node.Id localId, AccordConfigurationService configService) + { + return new Impl(localId, configService); + } + + synchronized void maybeUpdateFastPath(Node.Id node, Status status) + { + long nowMillis = Clock.Global.currentTimeMillis(); + long delayMillis = getAccordFastPathUpdateDelayMillis(); + + // don't schedule updates for nodes we don't share shards with + if (!peers.contains(node)) + return; + + PeerStatus peerStatus = peers.onUpdate(node, status); + ClusterMetadata metadata = currentMetadata(); + if (peerStatus.shouldUpdateFastPath(metadata.accordFastPath, nowMillis, delayMillis)) + updateFastPath(node, status, nowMillis, delayMillis); + } + + private void scheduleMaintenanceTask(long delayMillis) + { + ScheduledExecutors.scheduledTasks.schedule(this::maintenance, delayMillis, TimeUnit.MILLISECONDS); + } + + synchronized void maintenance() + { + long nowMillis = Clock.Global.currentTimeMillis(); + long delayMillis = getAccordFastPathUpdateDelayMillis(); + try + { + ClusterMetadata metadata = currentMetadata(); + for (PeerStatus status : peers.statusIterable()) + { + if (status.shouldUpdateFastPath(metadata.accordFastPath, nowMillis, delayMillis)) + updateFastPath(status.peer, status.status, nowMillis, delayMillis); + } + } + finally + { + scheduleMaintenanceTask(delayMillis); + } + } + + void onAlive(Node.Id node) + { + maybeUpdateFastPath(node, Status.NORMAL); + } + + public void onDead(Node.Id node) + { + maybeUpdateFastPath(node, Status.UNAVAILABLE); + } + + public void onShutdown() + { + synchronized (this) + { + receivedShutdownSignal = true; + } + + updateFastPath(localId, Status.SHUTDOWN, Clock.Global.currentTimeMillis(), getAccordFastPathUpdateDelayMillis()); + } + + /** + * In case we somehow missed that we've marked ourselves shutdown on startup + */ + @Override + public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) + { + if (next.epoch.compareTo(startupEpoch) <= 0) + return; + + if (!isShutdown(next.accordFastPath)) + return; + + synchronized (this) + { + if (receivedShutdownSignal || issuedStartupUpdate) + return; + issuedStartupUpdate = true; + } + + updateFastPath(localId, Status.NORMAL, Clock.Global.currentTimeMillis(), getAccordFastPathUpdateDelayMillis()); + } + + synchronized void updatePeers(Topology topology) + { + if (topology.epoch() <= peers.epoch) + return; + + peers = Peers.from(localId, topology, peers); + } + + @VisibleForTesting + synchronized boolean isPeer(Node.Id node) + { + return peers.contains(node); + } + + @Override + public AsyncResult onTopologyUpdate(Topology topology, boolean startSync) + { + updatePeers(topology); + return SUCCESS; + } + + @Override public void onRemoteSyncComplete(Node.Id node, long epoch) {} + @Override public void truncateTopologyUntil(long epoch) {} + @Override public void onEpochClosed(Ranges ranges, long epoch) {} + @Override public void onEpochRedundant(Ranges ranges, long epoch) {} +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java index 8b6162f22781..088c6c1331c7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java @@ -52,7 +52,8 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.RangesAtEndpoint; -import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.streaming.PreviewKind; @@ -267,16 +268,16 @@ public AsyncChain read(Seekable key, SafeCommandStore commandStore, Timest // TODO (correctness): check epoch // TODO (correctness): handle dropped tables - KeyspaceMetadata ksm = ClusterMetadata.current().schema.getKeyspaceMetadata(range.keyspace()); - Invariants.checkState(ksm != null, "Keyspace %s not found", range.keyspace()); - Invariants.checkState(ksm.tables.size() > 0, "Keyspace '%s' has no tables", range.keyspace()); + TableId tableId = range.table(); + TableMetadata table = ClusterMetadata.current().schema.getKeyspaces().getTableOrViewNullable(tableId); + Invariants.checkState(table != null, "Table with id %s not found", tableId); // FIXME: may also be relocation StreamPlan plan = new StreamPlan(StreamOperation.BOOTSTRAP, 1, false, null, PreviewKind.NONE).flushBeforeTransfer(true); RangesAtEndpoint ranges = RangesAtEndpoint.toDummyList(Collections.singleton(range.toKeyspaceRange())); - ksm.tables.forEach(table -> plan.transferRanges(to, table.keyspace, ranges, table.name)); + plan.transferRanges(to, table.keyspace, ranges, table.name); StreamResultFuture future = plan.execute(); return AsyncChains.success(StreamData.of(range, future.planId, hasDataToStream(future.getCoordinator(), to))); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 0bf3f686c54c..963a20adb113 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -862,7 +862,7 @@ private static V serializeToken(Token token, ValueAccessor accessor) private static ByteBuffer serializeKey(PartitionKey key) { - return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(UUIDSerializer.instance.serialize(key.tableId().asUUID()), key.partitionKey().getKey())); + return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(UUIDSerializer.instance.serialize(key.table().asUUID()), key.partitionKey().getKey())); } private static ByteBuffer serializeTimestamp(Timestamp timestamp) @@ -1303,7 +1303,7 @@ public static PartitionKey deserializeKey(ByteBuffer buffer) TableMetadata metadata = Schema.instance.getTableMetadata(tableId); if (metadata == null) throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); - return new PartitionKey(metadata.keyspace, tableId, metadata.partitioner.decorateKey(key)); + return new PartitionKey(tableId, metadata.partitioner.decorateKey(key)); } public static PartitionKey deserializeKey(UntypedResultSet.Row row) @@ -1358,7 +1358,7 @@ public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore return executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY), commandStore.id(), serializeToken(key.token()), - key.tableId().asUUID(), key.partitionKey().getKey()); + key.table().asUUID(), key.partitionKey().getKey()); } public static TimestampsForKey loadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index dc7a3c9e3bed..095781f46a0b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.Map; +import java.util.UUID; import java.util.function.ToLongFunction; import com.google.common.collect.ImmutableSortedMap; @@ -58,6 +59,7 @@ import accord.primitives.TxnId; import accord.primitives.Unseekables; import accord.primitives.Writes; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; @@ -84,7 +86,8 @@ public static long key(RoutingKey key) return ((AccordRoutingKey) key).estimatedSizeOnHeap(); } - private static final long EMPTY_RANGE_SIZE = measure(TokenRange.fullRange("")); + private static final TableId EMPTY_ID = TableId.fromUUID(new UUID(0, 0)); + private static final long EMPTY_RANGE_SIZE = measure(TokenRange.fullRange(EMPTY_ID)); public static long range(Range range) { return EMPTY_RANGE_SIZE + key(range.start()) + key(range.end()); @@ -271,7 +274,7 @@ public static long listener(Command.DurableAndIdempotentListener listener) private static class CommandEmptySizes { - private final static TokenKey EMPTY_KEY = new TokenKey("doesnotexist", null); + private final static TokenKey EMPTY_KEY = new TokenKey(EMPTY_ID, null); private final static TxnId EMPTY_TXNID = new TxnId(42, 42, Kind.Read, Domain.Key, new Node.Id(42)); private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index e57a94f863e4..97fc60464957 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -68,6 +68,20 @@ public String toString() '}'; } + @Override + public boolean hasUpdate() + { + boolean hasUpdate = AccordSafeState.super.hasUpdate(); + + // cfk initialization is legal, but doesn't need to be propagated to the cache (and would + // cause an exception to be thrown if it were). Making an exception on the cache side could + // throw away applied cfk updates as well, so it's special cased here + if (hasUpdate && original == null && current != null && current.commands().isEmpty()) + return false; + + return hasUpdate; + } + @Override public AccordCachingState global() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index ce0f6f6f1e06..93f5422f8277 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -28,6 +28,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,6 +77,7 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; @@ -92,7 +94,6 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; @@ -122,6 +123,7 @@ public class AccordService implements IAccordService, Shutdownable private final Shutdownable nodeShutdown; private final AccordMessageSink messageSink; private final AccordConfigurationService configService; + private final AccordFastPathCoordinator fastPathCoordinator; private final AccordScheduler scheduler; private final AccordDataStore dataStore; private final AccordJournal journal; @@ -139,7 +141,7 @@ public IVerbHandler verbHandler() @Override public long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) { - throw new UnsupportedOperationException("No accord barriers should be executed when accord_transactions_enabled = false in cassandra.yaml"); + throw new UnsupportedOperationException("No accord barriers should be executed when accord.enabled = false in cassandra.yaml"); } @Override @@ -195,7 +197,7 @@ public Future epochReady(Epoch epoch) public void receive(Message> message) {} @Override - public boolean isAccordManagedKeyspace(String keyspace) + public boolean isAccordManagedTable(TableId keyspace) { return false; } @@ -207,7 +209,7 @@ public Pair, DurableBefore> getRedundantBefor } @Override - public void ensureKeyspaceIsAccordManaged(String keyspace) {} + public void ensureTableIsAccordManaged(TableId tableId) {} }; private static volatile IAccordService instance = null; @@ -236,7 +238,7 @@ public synchronized static void startup(NodeId tcmId) instance = NOOP_SERVICE; return; } - AccordService as = new AccordService(AccordTopologyUtils.tcmIdToAccord(tcmId)); + AccordService as = new AccordService(AccordTopology.tcmIdToAccord(tcmId)); as.startup(); instance = as; } @@ -276,6 +278,7 @@ private AccordService(Id localId) logger.info("Starting accord with nodeId {}", localId); AccordAgent agent = new AccordAgent(); this.configService = new AccordConfigurationService(localId); + this.fastPathCoordinator = AccordFastPathCoordinator.create(localId, configService); this.messageSink = new AccordMessageSink(agent, configService); this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); @@ -310,6 +313,8 @@ public void startup() journal.start(); configService.start(); ClusterMetadataService.instance().log().addListener(configService); + fastPathCoordinator.start(); + ClusterMetadataService.instance().log().addListener(fastPathCoordinator); } @Override @@ -666,17 +671,18 @@ public AccordConfigurationService configurationService() return configService; } - public boolean isAccordManagedKeyspace(String keyspace) + @Override + public boolean isAccordManagedTable(TableId tableId) { - return ClusterMetadata.current().accordKeyspaces.contains(keyspace); + return ClusterMetadata.current().accordTables.contains(tableId); } @Override - public void ensureKeyspaceIsAccordManaged(String keyspace) + public void ensureTableIsAccordManaged(TableId tableId) { - if (isAccordManagedKeyspace(keyspace)) + if (isAccordManagedTable(tableId)) return; - ClusterMetadataService.instance().commit(new AddAccordKeyspace(keyspace), + ClusterMetadataService.instance().commit(new AddAccordTable(tableId), metadata -> null, (code, message) -> { Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java new file mode 100644 index 000000000000..46b4d026bf0e --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import accord.primitives.Ranges; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; + +import accord.local.Node; +import accord.topology.Shard; +import accord.topology.Topology; +import accord.utils.Invariants; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.*; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.tcm.ownership.VersionedEndpoints; + +/** + * Deterministically computes accord topology from a ClusterMetadata instance + */ +public class AccordTopology +{ + public static Node.Id tcmIdToAccord(NodeId nodeId) + { + return new Node.Id(nodeId.id()); + } + + private static class ShardLookup extends HashMap + { + private Shard createOrReuse(accord.primitives.Range range, List nodes, Set fastPathElectorate, Set joining) + { + Shard prev = get(range); + if (prev != null + && Objects.equals(prev.nodes, nodes) + && Objects.equals(prev.fastPathElectorate, fastPathElectorate) + && Objects.equals(prev.joining, joining)) + return prev; + + return new Shard(range, nodes, fastPathElectorate, joining); + } + } + + static class KeyspaceShard + { + private final KeyspaceMetadata keyspace; + private final Range range; + private final List nodes; + private final Set pending; + + private KeyspaceShard(KeyspaceMetadata keyspace, Range range, List nodes, Set pending) + { + this.keyspace = keyspace; + this.range = range; + this.nodes = nodes; + this.pending = pending; + } + + // return the keyspace fast path strategy if the inherit keyspace strategy is used + private FastPathStrategy strategyFor(TableMetadata metadata) + { + FastPathStrategy tableStrategy = metadata.params.fastPath; + FastPathStrategy strategy = tableStrategy.kind() != FastPathStrategy.Kind.INHERIT_KEYSPACE + ? tableStrategy : keyspace.params.fastPath; + Invariants.checkState(strategy.kind() != FastPathStrategy.Kind.INHERIT_KEYSPACE);; + return strategy; + } + + Shard createForTable(TableMetadata metadata, Set unavailable, Map dcMap, ShardLookup lookup) + { + TokenRange tokenRange = AccordTopology.range(metadata.id, range); + + Set fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); + + return lookup.createOrReuse(tokenRange, nodes, fastPath, pending); + } + + private static KeyspaceShard forRange(KeyspaceMetadata keyspace, Range range, Directory directory, VersionedEndpoints.ForRange reads, VersionedEndpoints.ForRange writes) + { + // TCM doesn't create wrap around ranges + Invariants.checkArgument(!range.isWrapAround() || range.right.equals(range.right.minValue()), + "wrap around range %s found", range); + + Set readEndpoints = reads.endpoints(); + Set writeEndpoints = writes.endpoints(); + Sets.SetView readOnly = Sets.difference(readEndpoints, writeEndpoints); + Invariants.checkState(readOnly.isEmpty(), "Read only replicas detected: %s", readOnly); + + List nodes = writes.endpoints().stream() + .map(directory::peerId) + .map(AccordTopology::tcmIdToAccord) + .sorted().collect(Collectors.toList()); + + Set pending = readEndpoints.equals(writeEndpoints) ? + Collections.emptySet() : + writeEndpoints.stream() + .filter(e -> !readEndpoints.contains(e)) + .map(directory::peerId) + .map(AccordTopology::tcmIdToAccord) + .collect(Collectors.toSet()); + + return new KeyspaceShard(keyspace, range, nodes, pending); + } + + public static List forKeyspace(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory, ShardLookup lookup) + { + ReplicationParams replication = keyspace.params.replication; + DataPlacement placement = placements.get(replication); + + List> ranges = placement.reads.ranges(); + List shards = new ArrayList<>(ranges.size()); + for (Range range : ranges) + { + VersionedEndpoints.ForRange reads = placement.reads.forRange(range); + VersionedEndpoints.ForRange writes = placement.writes.forRange(range); + shards.add(forRange(keyspace, range, directory, reads, writes)); + } + return shards; + } + } + + static TokenRange minRange(TableId table, Token token) + { + return new TokenRange(SentinelKey.min(table), new TokenKey(table, token)); + } + + static TokenRange maxRange(TableId table, Token token) + { + return new TokenRange(new TokenKey(table, token), SentinelKey.max(table)); + } + + static TokenRange fullRange(TableId table) + { + return new TokenRange(SentinelKey.min(table), SentinelKey.max(table)); + } + + static TokenRange range(TableId table, Range range) + { + Token minToken = range.left.minValue(); + return new TokenRange(range.left.equals(minToken) ? SentinelKey.min(table) : new TokenKey(table, range.left), + range.right.equals(minToken) ? SentinelKey.max(table) : new TokenKey(table, range.right)); + } + + public static accord.primitives.Ranges toAccordRanges(TableId tableId, Collection> ranges) + { + List> normalizedRanges = Range.normalize(ranges); + TokenRange[] tokenRanges = new TokenRange[normalizedRanges.size()]; + for (int i = 0; i < normalizedRanges.size(); i++) + tokenRanges[i] = range(tableId, normalizedRanges.get(i)); + return Ranges.of(tokenRanges); + } + + public static accord.primitives.Ranges toAccordRanges(String keyspace, Collection> ranges) + { + Keyspace ks = Keyspace.open(keyspace); + Ranges accordRanges = Ranges.EMPTY; + if (ks == null) + return accordRanges; + + for (TableMetadata tbm : ks.getMetadata().tables) + { + accordRanges = accordRanges.with(toAccordRanges(tbm.id, ranges)); + } + + return accordRanges; + } + + private static Map createDCMap(Directory directory) + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + directory.knownDatacenters().forEach(dc -> { + Set dcEndpoints = directory.datacenterEndpoints(dc); + // nodes aren't added to the endpointsToDCMap until they've joined + if (dcEndpoints == null) + return; + dcEndpoints.forEach(ep -> { + NodeId tid = directory.peerId(ep); + Node.Id aid = tcmIdToAccord(tid); + builder.put(aid, dc); + }); + }); + return builder.build(); + } + + public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, AccordFastPath accordFastPath, Predicate tablePredicate, ShardLookup lookup) + { + List shards = new ArrayList<>(); + Set unavailable = accordFastPath.unavailableIds(); + Map dcMap = createDCMap(directory); + + for (KeyspaceMetadata keyspace : schema.getKeyspaces()) + { + List tables = keyspace.tables.stream().filter(tbl -> tablePredicate.test(tbl.id)).collect(Collectors.toList()); + if (tables.isEmpty()) + continue; + List ksShards = KeyspaceShard.forKeyspace(keyspace, placements, directory, lookup); + tables.forEach(table -> ksShards.forEach(shard -> shards.add(shard.createForTable(table, unavailable, dcMap, lookup)))); + } + + shards.sort((a, b) -> a.range.compare(b.range)); + return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); + } + + public static Topology createAccordTopology(ClusterMetadata metadata, Predicate tablePredicate, ShardLookup lookup) + { + return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, metadata.accordFastPath, tablePredicate, lookup); + } + + public static Topology createAccordTopology(ClusterMetadata metadata, Predicate tablePredicate) + { + return createAccordTopology(metadata, tablePredicate, new ShardLookup()); + } + + public static Topology createAccordTopology(ClusterMetadata metadata, Topology current) + { + return createAccordTopology(metadata, metadata.accordTables::contains, createShardLookup(current)); + } + + public static Topology createAccordTopology(ClusterMetadata metadata) + { + return createAccordTopology(metadata, (Topology) null); + } + + public static EndpointMapping directoryToMapping(EndpointMapping mapping, long epoch, Directory directory) + { + EndpointMapping.Builder builder = EndpointMapping.builder(epoch); + for (NodeId id : directory.peerIds()) + builder.add(directory.endpoint(id), tcmIdToAccord(id)); + + // There are cases where nodes are removed from the cluster (host replacement, decom, etc.), but inflight events may still be happening; + // keep the ids around so pending events do not fail with a mapping error + for (Node.Id id : mapping.differenceIds(builder)) + builder.add(mapping.mappedEndpoint(id), id); + return builder.build(); + } + + private static ShardLookup createShardLookup(Topology topology) + { + ShardLookup map = new ShardLookup(); + + if (topology == null) + return map; + + topology.forEach(shard -> map.put(shard.range, shard)); + return map; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java deleted file mode 100644 index e88587411b98..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -import com.google.common.collect.Sets; - -import accord.local.Node; -import accord.primitives.Ranges; -import accord.topology.Shard; -import accord.topology.Topology; -import accord.utils.Invariants; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.EndpointsForRange; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.ReplicationParams; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.membership.Directory; -import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.ownership.DataPlacement; -import org.apache.cassandra.tcm.ownership.DataPlacements; - -public class AccordTopologyUtils -{ - public static Node.Id tcmIdToAccord(NodeId nodeId) - { - return new Node.Id(nodeId.id()); - } - - private static Shard createShard(TokenRange range, Directory directory, EndpointsForRange reads, EndpointsForRange writes) - { - Function endpointMapper = e -> { - NodeId tcmId = directory.peerId(e); - return tcmIdToAccord(tcmId); - }; - Set endpoints = reads.endpoints(); - Set writeEndpoints = writes.endpoints(); - List nodes = endpoints.stream().map(endpointMapper).sorted().collect(Collectors.toList()); - Set fastPath = new HashSet<>(nodes); // TODO: support fast path updates - Set pending = endpoints.equals(writeEndpoints) ? - Collections.emptySet() : - writeEndpoints.stream().filter(e -> !endpoints.contains(e)).map(endpointMapper).collect(Collectors.toSet()); - - Sets.SetView readOnly = Sets.difference(endpoints, writeEndpoints); - Invariants.checkState(readOnly.isEmpty(), "Read only replicas detected: %s", readOnly); - return new Shard(range, nodes, fastPath, pending); - } - - static TokenRange minRange(String keyspace, Token token) - { - return new TokenRange(SentinelKey.min(keyspace), new TokenKey(keyspace, token)); - } - - static TokenRange maxRange(String keyspace, Token token) - { - return new TokenRange(new TokenKey(keyspace, token), SentinelKey.max(keyspace)); - } - - static TokenRange fullRange(String keyspace) - { - return new TokenRange(SentinelKey.min(keyspace), SentinelKey.max(keyspace)); - } - - static TokenRange range(String keyspace, Range range) - { - Token minToken = range.left.minValue(); - return new TokenRange(range.left.equals(minToken) ? SentinelKey.min(keyspace) : new TokenKey(keyspace, range.left), - range.right.equals(minToken) ? SentinelKey.max(keyspace) : new TokenKey(keyspace, range.right)); - } - - public static accord.primitives.Ranges toAccordRanges(String keyspace, Collection> ranges) - { - List> normalizedRanges = Range.normalize(ranges); - TokenRange[] tokenRanges = new TokenRange[normalizedRanges.size()]; - for (int i = 0; i < normalizedRanges.size(); i++) - tokenRanges[i] = range(keyspace, normalizedRanges.get(i)); - return Ranges.of(tokenRanges); - } - - public static List createShards(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory) - { - ReplicationParams replication = keyspace.params.replication; - DataPlacement placement = placements.get(replication); - - List> ranges = placement.reads.ranges(); - List shards = new ArrayList<>(ranges.size()); - for (Range range : ranges) - { - // TODO (consider, low priority): flesh out how Accord and Transient Replicas work together - // Accord needs to be able to read the full data from a single replica, but with transient ones they may only have a hash. - EndpointsForRange reads = placement.reads.forRange(range).get().filter(r -> r.isFull()); - EndpointsForRange writes = placement.writes.forRange(range).get().filter(r -> r.isFull()); - - // TCM doesn't create wrap around ranges - Invariants.checkArgument(!range.isWrapAround() || range.right.equals(range.right.minValue()), - "wrap around range %s found", range); - shards.add(createShard(range(keyspace.name, range), directory, reads, writes)); - } - - return shards; - } - - public static Topology createAccordTopology(ClusterMetadata cm, Predicate keyspacePredicate) - { - List shards = new ArrayList<>(); - for (KeyspaceMetadata keyspace : cm.schema.getKeyspaces()) - { - if (!keyspacePredicate.test(keyspace.name)) - continue; - shards.addAll(createShards(keyspace, cm.placements, cm.directory)); - } - shards.sort((a, b) -> a.range.compare(b.range)); - return new Topology(cm.epoch.getEpoch(), shards.toArray(new Shard[0])); - } - - public static EndpointMapping directoryToMapping(EndpointMapping mapping, long epoch, Directory directory) - { - EndpointMapping.Builder builder = EndpointMapping.builder(epoch); - for (NodeId id : directory.peerIds()) - builder.add(directory.endpoint(id), tcmIdToAccord(id)); - - // There are cases where nodes are removed from the cluster (host replacement, decom, etc.), but inflight events may still be happening; - // keep the ids around so pending events do not fail with a mapping error - for (Node.Id id : mapping.differenceIds(builder)) - builder.add(mapping.mappedEndpoint(id), id); - return builder.build(); - } - - public static Topology createAccordTopology(ClusterMetadata metadata) - { - return createAccordTopology(metadata, metadata.accordKeyspaces::contains); - } -} diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index c1da9da190f3..8cfd3581ee89 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -59,6 +59,7 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; @@ -407,13 +408,13 @@ public boolean containsLocally(TxnId txnId) public Iterable search(AbstractKeys keys) { - // group by the keyspace, as ranges are based off TokenKey, which is scoped to a range - Map> groupByKeyspace = new TreeMap<>(); + // group by the table, as ranges are based off TokenKey, which is scoped to a range + Map> groupByTable = new TreeMap<>(); for (Key key : keys) - groupByKeyspace.computeIfAbsent(((PartitionKey) key).keyspace(), ignore -> new ArrayList<>()).add(key); + groupByTable.computeIfAbsent(((PartitionKey) key).table(), ignore -> new ArrayList<>()).add(key); return () -> new AbstractIterator() { - Iterator ksIt = groupByKeyspace.keySet().iterator(); + Iterator tblIt = groupByTable.keySet().iterator(); Iterator>> rangeIt; @Override @@ -427,13 +428,13 @@ protected DomainInfo computeNext() return result(next.getKey(), next.getValue()); } rangeIt = null; - if (!ksIt.hasNext()) + if (!tblIt.hasNext()) { - ksIt = null; + tblIt = null; return endOfData(); } - String ks = ksIt.next(); - List keys = groupByKeyspace.get(ks); + TableId tbl = tblIt.next(); + List keys = groupByTable.get(tbl); Map> groupByRange = new TreeMap<>(Range::compare); for (Key key : keys) { diff --git a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java index 0c964d3204a0..916cea43bf82 100644 --- a/src/java/org/apache/cassandra/service/accord/EndpointMapping.java +++ b/src/java/org/apache/cassandra/service/accord/EndpointMapping.java @@ -58,13 +58,13 @@ public Set differenceIds(Builder builder) } @Override - public Node.Id mappedId(InetAddressAndPort endpoint) + public Node.Id mappedIdOrNull(InetAddressAndPort endpoint) { return mapping.inverse().get(endpoint); } @Override - public InetAddressAndPort mappedEndpoint(Node.Id id) + public InetAddressAndPort mappedEndpointOrNull(Node.Id id) { return mapping.get(id); } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 5610ebd9c2c8..e0025b46a359 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -40,16 +40,23 @@ import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.tcm.transformations.AddAccordTable; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; @@ -72,7 +79,7 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List String ks = cfs.keyspace.getName(); Ranges accordRanges = Ranges.of(ranges .stream() - .map(r -> new TokenRange(new TokenKey(ks, r.left), new TokenKey(ks, r.right))) + .map(r -> new TokenRange(new TokenKey(cfs.getTableId(), r.left), new TokenKey(cfs.getTableId(), r.right))) .collect(Collectors.toList()) .toArray(new accord.primitives.Range[0])); try @@ -109,10 +116,10 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List /** * Temporary method to avoid double-streaming keyspaces - * @param keyspace + * @param tableId * @return */ - boolean isAccordManagedKeyspace(String keyspace); + boolean isAccordManagedTable(TableId tableId); /** * Fetch the redundnant befores for every command store @@ -121,23 +128,45 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List default Id nodeId() { throw new UnsupportedOperationException(); } - default void maybeConvertKeyspacesToAccord(Txn txn) + default void maybeConvertTablesToAccord(Txn txn) { - Set allKeyspaces = new HashSet<>(); - txn.keys().forEach(key -> allKeyspaces.add(((AccordRoutableKey) key).keyspace())); + Set allTables = new HashSet<>(); + Set newTables = new HashSet<>(); + txn.keys().forEach(key -> { + TableId table = ((AccordRoutableKey) key).table(); + if (allTables.add(table) && !isAccordManagedTable(table)) + newTables.add(table); + }); - for (String keyspace : allKeyspaces) - { + if (newTables.isEmpty()) + return; - ensureKeyspaceIsAccordManaged(keyspace); - } + for (TableId table : newTables) + AddAccordTable.addTable(table); + + // we need to avoid creating a txnId in an epoch when no one has any ranges + FBUtilities.waitOnFuture(epochReady(ClusterMetadata.current().epoch)); - for (String keyspace : allKeyspaces) + for (TableId table : allTables) { - if (!AccordService.instance().isAccordManagedKeyspace(keyspace)) - throw new IllegalStateException(keyspace + " is not an accord managed keyspace"); + if (!isAccordManagedTable(table)) + throw new IllegalStateException(table + " is not an accord managed table"); } } - void ensureKeyspaceIsAccordManaged(String keyspace); + void ensureTableIsAccordManaged(TableId tableId); + + default void ensureTableIsAccordManaged(String keyspace, String table) + { + // TODO: remove when accord enabled is handled via schema + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + ensureTableIsAccordManaged(metadata.id); + } + + default void ensureKeyspaceIsAccordManaged(String keyspace) + { + // TODO: remove when accord enabled is handled via schema + Keyspace ks = Keyspace.open(keyspace); + ks.getMetadata().tables.forEach(metadata -> ensureTableIsAccordManaged(metadata.id)); + } } diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index 613c30f97f13..b03eaf39d9a7 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -32,6 +32,7 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; @@ -40,25 +41,25 @@ public class TokenRange extends Range.EndInclusive public TokenRange(AccordRoutingKey start, AccordRoutingKey end) { super(start, end); - Invariants.checkArgument(start.keyspace().equals(end.keyspace()), + Invariants.checkArgument(start.table().equals(end.table()), "Token ranges cannot cover more than one keyspace start:%s, end:%s", start, end); } - public String keyspace() + public TableId table() { - return ((AccordRoutingKey) start()).keyspace(); + return ((AccordRoutingKey) start()).table(); } @VisibleForTesting - public Range withKeyspace(String ks) + public Range withTable(TableId table) { - return new TokenRange(((AccordRoutingKey) start()).withKeyspace(ks), ((AccordRoutingKey) end()).withKeyspace(ks)); + return new TokenRange(((AccordRoutingKey) start()).withTable(table), ((AccordRoutingKey) end()).withTable(table)); } - public static TokenRange fullRange(String keyspace) + public static TokenRange fullRange(TableId table) { - return new TokenRange(SentinelKey.min(keyspace), SentinelKey.max(keyspace)); + return new TokenRange(SentinelKey.min(table), SentinelKey.max(table)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index f24b30fa994f..f9ab58387d75 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -103,7 +103,7 @@ public void onStale(Timestamp staleSince, Ranges ranges) @Override public void onUncaughtException(Throwable t) { - // TODO: this + logger.error("Uncaught accord exception", t); JVMStabilityInspector.uncaughtException(Thread.currentThread(), t); } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java index 6ce68db83820..1869310f5b9b 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -22,25 +22,30 @@ import accord.primitives.RoutableKey; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; public abstract class AccordRoutableKey implements RoutableKey { - final String keyspace; // TODO (desired): use an id (TrM) + final TableId table; // TODO (desired): use an id (TrM) - protected AccordRoutableKey(String keyspace) + protected AccordRoutableKey(TableId table) { - this.keyspace = keyspace; + this.table = table; + } + + public TableId table() + { + return table; } - public final String keyspace() { return keyspace; } public abstract Token token(); @Override public Object prefix() { - return keyspace; + return table; } @Override @@ -52,7 +57,7 @@ public String toString() @Override public int hashCode() { - return Objects.hash(keyspace, token().tokenHash()); + return Objects.hash(table, token().tokenHash()); } @Override @@ -63,7 +68,7 @@ public final int compareTo(RoutableKey that) public final int compareTo(AccordRoutableKey that) { - int cmp = this.keyspace().compareTo(that.keyspace()); + int cmp = this.table().compareTo(that.table()); if (cmp != 0) return cmp; @@ -80,7 +85,7 @@ public final int compareTo(AccordRoutableKey that) if (this.getClass() == TokenKey.class) return that.getClass() == TokenKey.class ? 0 : 1; - return that.getClass() == TokenKey.class ? -1 : ((PartitionKey)this).tableId.compareTo(((PartitionKey)that).tableId); + return that.getClass() == TokenKey.class ? -1 : 0; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index b18dcafecae3..5b089267a105 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -40,6 +40,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; @@ -53,14 +54,14 @@ public enum RoutingKeyKind TOKEN, SENTINEL } - protected AccordRoutingKey(String keyspace) + protected AccordRoutingKey(TableId table) { - super(keyspace); + super(table); } public abstract RoutingKeyKind kindOfRoutingKey(); public abstract long estimatedSizeOnHeap(); - public abstract AccordRoutingKey withKeyspace(String ks); + public abstract AccordRoutingKey withTable(TableId table); public SentinelKey asSentinelKey() { @@ -84,16 +85,16 @@ public static final class SentinelKey extends AccordRoutingKey private final boolean isMin; - private SentinelKey(String keyspace, boolean isMin) + private SentinelKey(TableId table, boolean isMin) { - super(keyspace); + super(table); this.isMin = isMin; } @Override public int hashCode() { - return Objects.hash(keyspace, isMin); + return Objects.hash(table, isMin); } @Override @@ -108,26 +109,25 @@ public long estimatedSizeOnHeap() return EMPTY_SIZE; } - @Override - public AccordRoutingKey withKeyspace(String ks) + public AccordRoutingKey withTable(TableId table) { - return new SentinelKey(ks, isMin); + return new SentinelKey(table, isMin); } - public static SentinelKey min(String keyspace) + public static SentinelKey min(TableId table) { - return new SentinelKey(keyspace, true); + return new SentinelKey(table, true); } - public static SentinelKey max(String keyspace) + public static SentinelKey max(TableId table) { - return new SentinelKey(keyspace, false); + return new SentinelKey(table, false); } public TokenKey toTokenKey() { IPartitioner partitioner = getPartitioner(); - return new TokenKey(keyspace, isMin ? + return new TokenKey(table, isMin ? partitioner.getMinimumToken().nextValidToken() : partitioner.getMaximumToken().decreaseSlightly()); } @@ -154,22 +154,22 @@ public String suffix() @Override public void serialize(SentinelKey key, DataOutputPlus out, int version) throws IOException { + key.table.serialize(out); out.writeBoolean(key.isMin); - out.writeUTF(key.keyspace); } @Override public SentinelKey deserialize(DataInputPlus in, int version) throws IOException { + TableId table = TableId.deserialize(in); boolean isMin = in.readBoolean(); - String keyspace = in.readUTF(); - return new SentinelKey(keyspace, isMin); + return new SentinelKey(table, isMin); } @Override public long serializedSize(SentinelKey key, int version) { - return TypeSizes.BOOL_SIZE + TypeSizes.sizeof(key.keyspace); + return key.table().serializedSize() + TypeSizes.BOOL_SIZE; } }; @@ -189,8 +189,8 @@ public static final class TokenKey extends AccordRoutingKey public Range asRange() { AccordRoutingKey before = token.isMinimum() - ? new SentinelKey(keyspace, true) - : new TokenKey(keyspace, token.decreaseSlightly()); + ? new SentinelKey(table, true) + : new TokenKey(table, token.decreaseSlightly()); return new TokenRange(before, this); } @@ -202,15 +202,15 @@ public Range asRange() } final Token token; - public TokenKey(String keyspace, Token token) + public TokenKey(TableId tableId, Token token) { - super(keyspace); + super(tableId); this.token = token; } public TokenKey withToken(Token token) { - return new TokenKey(keyspace, token); + return new TokenKey(table, token); } @Override @@ -236,10 +236,9 @@ public long estimatedSizeOnHeap() return EMPTY_SIZE + token().getHeapSize(); } - @Override - public AccordRoutingKey withKeyspace(String ks) + public AccordRoutingKey withTable(TableId table) { - return new TokenKey(ks, token); + return new TokenKey(table, token); } public static final Serializer serializer = new Serializer(); @@ -250,22 +249,22 @@ private Serializer() {} @Override public void serialize(TokenKey key, DataOutputPlus out, int version) throws IOException { - out.writeUTF(key.keyspace); + key.table.serialize(out); Token.compactSerializer.serialize(key.token, out, version); } @Override public TokenKey deserialize(DataInputPlus in, int version) throws IOException { - String keyspace = in.readUTF(); + TableId table = TableId.deserialize(in); Token token = Token.compactSerializer.deserialize(in, getPartitioner(), version); - return new TokenKey(keyspace, token); + return new TokenKey(table, token); } @Override public long serializedSize(TokenKey key, int version) { - return TypeSizes.sizeof(key.keyspace) + Token.compactSerializer.serializedSize(key.token(), version); + return key.table.serializedSize() + Token.compactSerializer.serializedSize(key.token(), version); } } } @@ -370,15 +369,15 @@ public KeyspaceSplitter(EvenSplit subSplitter) @Override public List split(Ranges ranges) { - Map> byKeyspace = new TreeMap<>(); + Map> byTable = new TreeMap<>(); for (Range range : ranges) { - byKeyspace.computeIfAbsent(((AccordRoutableKey)range.start()).keyspace, ignore -> new ArrayList<>()) + byTable.computeIfAbsent(((AccordRoutableKey)range.start()).table, ignore -> new ArrayList<>()) .add(range); } List results = new ArrayList<>(); - for (List keyspaceRanges : byKeyspace.values()) + for (List keyspaceRanges : byTable.values()) { List splits = subSplitter.split(Ranges.ofSortedAndDeoverlapped(keyspaceRanges.toArray(new Range[0]))); diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index a8dac58234dd..d54afba17128 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -53,16 +53,14 @@ public final class PartitionKey extends AccordRoutableKey implements Key static { DecoratedKey key = DatabaseDescriptor.getPartitioner().decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER); - EMPTY_SIZE = ObjectSizes.measureDeep(new PartitionKey(null, null, key)); + EMPTY_SIZE = ObjectSizes.measureDeep(new PartitionKey(null, key)); } - final TableId tableId; // TODO (expected): move to PartitionKey final DecoratedKey key; - public PartitionKey(String keyspace, TableId tableId, DecoratedKey key) + public PartitionKey(TableId tableId, DecoratedKey key) { - super(keyspace); - this.tableId = tableId; + super(tableId); this.key = key; } @@ -73,21 +71,19 @@ public static PartitionKey of(Key key) public static PartitionKey of(PartitionUpdate update) { - return new PartitionKey(update.metadata().keyspace, update.metadata().id, update.partitionKey()); + return new PartitionKey(update.metadata().id, update.partitionKey()); } public static PartitionKey of(Partition partition) { - return new PartitionKey(partition.metadata().keyspace, partition.metadata().id, partition.partitionKey()); + return new PartitionKey(partition.metadata().id, partition.partitionKey()); } public static PartitionKey of(SinglePartitionReadCommand command) { - return new PartitionKey(command.metadata().keyspace, command.metadata().id, command.partitionKey()); + return new PartitionKey(command.metadata().id, command.partitionKey()); } - public final TableId tableId() { return tableId; } - @Override public Token token() { @@ -102,7 +98,7 @@ public DecoratedKey partitionKey() @Override public RoutingKey toUnseekable() { - return new TokenKey(keyspace, token()); + return new TokenKey(table, token()); } public long estimatedSizeOnHeap() @@ -131,14 +127,14 @@ private Serializer() {} @Override public void serialize(PartitionKey key, DataOutputPlus out, int version) throws IOException { - key.tableId().serialize(out); + key.table().serialize(out); ByteBufferUtil.writeWithShortLength(key.partitionKey().getKey(), out); } public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int offset) { int position = offset; - position += key.tableId().serialize(dst, accessor, position); + position += key.table().serialize(dst, accessor, position); ByteBuffer bytes = key.partitionKey().getKey(); int numBytes = ByteBufferAccessor.instance.size(bytes); Preconditions.checkState(numBytes <= Short.MAX_VALUE); @@ -154,7 +150,7 @@ public PartitionKey deserialize(DataInputPlus in, int version) throws IOExceptio TableId tableId = TableId.deserialize(in); TableMetadata metadata = Schema.instance.getExistingTableMetadata(tableId); DecoratedKey key = metadata.partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); - return new PartitionKey(metadata.keyspace, tableId, key); + return new PartitionKey(tableId, key); } public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException @@ -167,7 +163,7 @@ public PartitionKey deserialize(V src, ValueAccessor accessor, int offset ByteBuffer bytes = ByteBuffer.allocate(numBytes); accessor.copyTo(src, offset, bytes, ByteBufferAccessor.instance, 0, numBytes); DecoratedKey key = metadata.partitioner.decorateKey(bytes); - return new PartitionKey(metadata.keyspace, tableId, key); + return new PartitionKey(tableId, key); } @Override @@ -178,7 +174,7 @@ public long serializedSize(PartitionKey key, int version) public long serializedSize(PartitionKey key) { - return key.tableId().serializedSize() + ByteBufferUtil.serializedSizeWithShortLength(key.partitionKey().getKey()); + return key.table().serializedSize() + ByteBufferUtil.serializedSizeWithShortLength(key.partitionKey().getKey()); } } } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 16b2e8673067..52f632c6c5ea 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -125,10 +125,17 @@ private void referenceAndAssembleReadsForKey(RoutableKey key, List> listenChains) { referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); - if (keyHistory == KeyHistory.DEPS) - referenceAndAssembleReadsForKey(key, context.depsCommandsForKeys, commandStore.depsCommandsForKeyCache(), listenChains); - if (keyHistory == KeyHistory.ALL) - referenceAndAssembleReadsForKey(key, context.allCommandsForKeys, commandStore.allCommandsForKeyCache(), listenChains); + // recovery operations also need the deps data for their preaccept logic + switch (keyHistory) + { + case ALL: + referenceAndAssembleReadsForKey(key, context.allCommandsForKeys, commandStore.allCommandsForKeyCache(), listenChains); + case DEPS: + referenceAndAssembleReadsForKey(key, context.depsCommandsForKeys, commandStore.depsCommandsForKeyCache(), listenChains); + case NONE: + break; + default: throw new IllegalArgumentException("Unhandled keyhistory: " + keyHistory); + } referenceAndAssembleReadsForKey(key, context.updatesForKeys, commandStore.updatesForKeyCache(), listenChains); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index e93c1dd20b35..69002b1fefe6 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -210,6 +210,7 @@ Iterable keys() private void fail(Throwable throwable) { + commandStore.agent().onUncaughtException(throwable); commandStore.checkInStoreThread(); Invariants.nonNull(throwable); diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java new file mode 100644 index 000000000000..e8d47f917875 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableMap; + +import accord.local.Node; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.LocalizeString; + +public interface FastPathStrategy +{ + enum Kind + { + SIMPLE, PARAMETERIZED, INHERIT_KEYSPACE; + + static final String KEY = "kind"; + private static final Map LOOKUP; + static + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put(SIMPLE.name(), SIMPLE); + builder.put(PARAMETERIZED.name(), PARAMETERIZED); + builder.put(INHERIT_KEYSPACE.name(), INHERIT_KEYSPACE); + LOOKUP = builder.build(); + } + + public byte asByte() + { + return (byte) ordinal(); + } + + public static Kind fromByte(byte i) + { + return values()[i]; + } + + @Nullable + public static Kind fromString(String s) + { + return LOOKUP.get(LocalizeString.toUpperCaseLocalized(s)); + } + + @Nullable + private static Kind fromMap(Map map) + { + String name = map.remove(KEY); + return name != null ? fromString(name) : null; + } + } + + /** + * @param nodes expected to be sorted deterministically + * @param unavailable + * @param dcMap + * @return + */ + Set calculateFastPath(List nodes, Set unavailable, Map dcMap); + + Kind kind(); + + Map asMap(); + + String asCQL(); + + static FastPathStrategy fromMap(Map map) + { + if (map == null || map.isEmpty()) + return SimpleFastPathStrategy.instance; + + map = new HashMap<>(map); + Kind kind = Kind.fromMap(map); + if (kind == null) + return map.isEmpty() + ? simple() + : ParameterizedFastPathStrategy.fromMap(map); + + switch (kind) + { + case SIMPLE: + return simple(); + case PARAMETERIZED: + return ParameterizedFastPathStrategy.fromMap(map); + case INHERIT_KEYSPACE: + return inheritKeyspace(); + default: + throw new IllegalArgumentException("Unhandled strategy kind: " + kind); + } + } + + static FastPathStrategy tableStrategyFromString(String s) + { + s = LocalizeString.toLowerCaseLocalized(s).trim(); + if (s.equals("keyspace")) + return InheritKeyspaceFastPathStrategy.instance; + if (s.equals("simple")) + return SimpleFastPathStrategy.instance; + + throw new ConfigurationException("Fast path strategy must either be 'keyspace', `default` or a map size and optional dcs {'size':n, 'dcs': dc0,dc1..."); + } + + static FastPathStrategy keyspaceStrategyFromString(String s) + { + s = LocalizeString.toLowerCaseLocalized(s).trim(); + if (s.equals("simple")) + return SimpleFastPathStrategy.instance; + + throw new ConfigurationException("Fast path strategy must either be `default` or a map size and optional dcs {'size':n, 'dcs': dc0,dc1..."); + } + + static FastPathStrategy simple() + { + return SimpleFastPathStrategy.instance; + } + + static FastPathStrategy inheritKeyspace() + { + return InheritKeyspaceFastPathStrategy.instance; + } + + MetadataSerializer serializer = new MetadataSerializer() + { + public void serialize(FastPathStrategy strategy, DataOutputPlus out, Version version) throws IOException + { + Kind type = strategy.kind(); + out.write(type.asByte()); + if (type == Kind.PARAMETERIZED) + ParameterizedFastPathStrategy.serializer.serialize((ParameterizedFastPathStrategy) strategy, out, version); + } + + public FastPathStrategy deserialize(DataInputPlus in, Version version) throws IOException + { + Kind type = Kind.fromByte(in.readByte()); + switch (type) + { + case SIMPLE: + return simple(); + case PARAMETERIZED: + return ParameterizedFastPathStrategy.serializer.deserialize(in, version); + case INHERIT_KEYSPACE: + return inheritKeyspace(); + default: + throw new IllegalArgumentException("Unhandled type: " + type); + } + } + + public long serializedSize(FastPathStrategy strategy, Version version) + { + long size = TypeSizes.BYTE_SIZE; + if (strategy.kind() == Kind.PARAMETERIZED) + size += ParameterizedFastPathStrategy.serializer.serializedSize((ParameterizedFastPathStrategy) strategy, version); + return size; + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java new file mode 100644 index 000000000000..08b7763a930f --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; + +import accord.local.Node; + +public class InheritKeyspaceFastPathStrategy implements FastPathStrategy +{ + static final FastPathStrategy instance = new InheritKeyspaceFastPathStrategy(); + + private static final Map SCHEMA_PARAMS = ImmutableMap.of(Kind.KEY, Kind.INHERIT_KEYSPACE.name()); + + private InheritKeyspaceFastPathStrategy() {} + + @Override + public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + { + throw new IllegalStateException("InheritKeyspaceFastPathStrategy should be replaced before calculateFastPath is called"); + } + + @Override + public Kind kind() + { + return Kind.INHERIT_KEYSPACE; + } + + @Override + public String toString() + { + return "keyspace"; + } + + public Map asMap() + { + return SCHEMA_PARAMS; + } + + @Override + public String asCQL() + { + return "'keyspace'"; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java new file mode 100644 index 000000000000..8e9d00bbe6de --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java @@ -0,0 +1,375 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import accord.api.VisibleForImplementation; +import accord.local.Node; +import accord.topology.Shard; +import accord.utils.Invariants; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import javax.annotation.Nonnull; + +public class ParameterizedFastPathStrategy implements FastPathStrategy +{ + static final String SIZE = "size"; + static final String DCS = "dcs"; + private static final Joiner DC_JOINER = Joiner.on(','); + private static final Pattern COMMA_SEPARATOR = Pattern.compile(","); + private static final Pattern COLON_SEPARATOR = Pattern.compile(":"); + + static class WeightedDc implements Comparable + { + private static final WeightedDc UNSPECIFIED = new WeightedDc("", Integer.MAX_VALUE, true); + private static final MetadataSerializer serializer = new MetadataSerializer() + { + public void serialize(WeightedDc dc, DataOutputPlus out, Version version) throws IOException + { + out.writeUTF(dc.name); + out.writeUnsignedVInt(dc.weight); + out.writeBoolean(dc.autoWeight); + } + + public WeightedDc deserialize(DataInputPlus in, Version version) throws IOException + { + return new WeightedDc(in.readUTF(), + in.readUnsignedVInt32(), + in.readBoolean()); + } + + public long serializedSize(WeightedDc dc, Version version) + { + return TypeSizes.sizeof(dc.name) + TypeSizes.sizeofUnsignedVInt(dc.weight) + TypeSizes.BOOL_SIZE; + } + }; + + final String name; + final int weight; + final boolean autoWeight; + + public WeightedDc(String name, int weight, boolean autoWeight) + { + this.name = name; + this.weight = weight; + this.autoWeight = autoWeight; + } + + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + WeightedDc that = (WeightedDc) o; + return weight == that.weight && autoWeight == that.autoWeight && Objects.equals(name, that.name); + } + + public int hashCode() + { + return Objects.hash(name, weight, autoWeight); + } + + @Override + public int compareTo(@Nonnull WeightedDc that) + { + int cmp = Integer.compare(this.weight, that.weight); + if (cmp != 0) return cmp; + return this.name.compareTo(that.name); + } + + public String toString() + { + return autoWeight ? name : name + ':' + weight; + } + + static String validateDC(String dc) + { + dc = dc.trim(); + if (dc.isEmpty()) + throw cfe("dc name must not be empty", DCS); + return dc; + } + + static int validateWeight(String w) + { + int weight = Integer.parseInt(w); + if (weight < 0) + throw cfe("DC weights must be zero or positive"); + return weight; + } + + static WeightedDc fromString(String s, int idx) + { + s = s.trim(); + if (s.isEmpty()) + throw cfe("%s entries must not be empty", DCS); + + String[] parts = COLON_SEPARATOR.split(s); + if (parts.length == 1) + return new WeightedDc(validateDC(parts[0]), idx, true); + else if (parts.length == 2) + return new WeightedDc(validateDC(parts[0]), validateWeight(parts[1]), false); + else + throw cfe("Invalid dc weighting syntax %s, use :"); + } + } + + public final int size; + private final ImmutableMap dcs; + + ParameterizedFastPathStrategy(int size, ImmutableMap dcs) + { + this.size = size; + this.dcs = dcs; + } + + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ParameterizedFastPathStrategy that = (ParameterizedFastPathStrategy) o; + return size == that.size && Objects.equals(dcs, that.dcs); + } + + public int hashCode() + { + return Objects.hash(size, dcs); + } + + private static class NodeSorter implements Comparable + { + private final Node.Id id; + private final int sortPos; + private final int dcIndex; + private final int health; + + public NodeSorter(Node.Id id, int sortPos, int dcIndex, int health) + { + this.id = id; + this.sortPos = sortPos; + this.dcIndex = dcIndex; + this.health = health; + } + + @Override + public int compareTo(@Nonnull NodeSorter that) + { + int cmp = this.health - that.health; + if (cmp != 0) return cmp; + + cmp = this.dcIndex - that.dcIndex; + if (cmp != 0) return cmp; + + cmp = this.sortPos - that.sortPos; + if (cmp != 0) return cmp; + + Invariants.checkState(this.id.equals(that.id)); + return 0; + } + } + + @Override + public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + { + List sorters = new ArrayList<>(nodes.size()); + + for (int i = 0, mi = nodes.size(); i < mi; i++) + { + Node.Id node = nodes.get(i); + String dc = dcMap.get(node); + int dcScore = dcs.getOrDefault(dc, WeightedDc.UNSPECIFIED).weight; + NodeSorter sorter = new NodeSorter(node, i, dcScore, unavailable.contains(node) ? 1 : 0); + sorters.add(sorter); + } + + sorters.sort(Comparator.naturalOrder()); + + int slowQuorum = Shard.slowPathQuorumSize(nodes.size()); + int fpSize = Math.max(size, slowQuorum); + ImmutableSet.Builder builder = ImmutableSet.builder(); + + for (int i=0; i fastPath = builder.build(); + Invariants.checkState(fastPath.size() >= slowQuorum); + return fastPath; + } + + private static ConfigurationException cfe(String fmt, Object... args) + { + return new ConfigurationException(String.format(fmt, args)); + } + + static ParameterizedFastPathStrategy fromMap(Map map) + { + if (!map.containsKey(SIZE)) + throw cfe("fast_path must be set to 'keyspace' or 'default' or a map defining '%s' and optionally '%s'", SIZE, DCS); + + int size; + try + { + size = Integer.parseInt(map.get(SIZE)); + } + catch (NumberFormatException e) + { + throw cfe("%s must be a positive number, got %s", SIZE, map.get(SIZE)); + } + + if (size < 1) + throw cfe("%s must be greater than zero", SIZE); + + ImmutableMap dcMap; + if (map.containsKey(DCS)) + { + + Map mutableDcs = new HashMap<>(); + String dcsString = map.get(DCS); + if (dcsString.trim().isEmpty()) + throw cfe("%s must specify at least one DC", DCS); + + int autoIdx = 0; + boolean hasAuto = false; + boolean hasManual = false; + for (String dcString : COMMA_SEPARATOR.split(dcsString)) + { + WeightedDc dc = WeightedDc.fromString(dcString, autoIdx++); + if (mutableDcs.containsKey(dc.name)) + throw cfe("Multiple entries for DC %s", dc.name); + + if (dc.autoWeight) + { + if (hasManual) throw cfe("Cannot mix auto and manual DC weights"); + hasAuto = true; + } + else + { + if (hasAuto) throw cfe("Cannot mix auto and manual DC weights"); + hasManual = true; + } + + mutableDcs.put(dc.name, dc); + } + dcMap = ImmutableMap.copyOf(mutableDcs); + } + else + { + dcMap = ImmutableMap.of(); + } + + Set keys = new HashSet<>(map.keySet()); + keys.remove(SIZE); + keys.remove(DCS); + if (!keys.isEmpty()) + throw cfe("Unrecognized fast path options provided: ", keys); + + return new ParameterizedFastPathStrategy(size, dcMap); + } + + @Override + public Kind kind() + { + return Kind.PARAMETERIZED; + } + + @VisibleForImplementation + public Iterable dcStrings() + { + return dcs.values().stream().sorted().map(Object::toString).collect(Collectors.toList()); + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder("{"); + sb.append('\'').append(SIZE).append("':").append(size); + if (!dcs.isEmpty()) + sb.append(", ").append(DCS).append(':').append('\'').append(DC_JOINER.join(dcStrings())).append('\''); + + return sb.append('}').toString(); + } + + public Map asMap() + { + Map params = new HashMap<>(); + params.put(Kind.KEY, kind().name()); + params.put(SIZE, Integer.toString(size)); + params.put(DCS, DC_JOINER.join(dcStrings())); + return params; + } + + @Override + public String asCQL() + { + return toString(); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + public void serialize(ParameterizedFastPathStrategy strategy, DataOutputPlus out, Version version) throws IOException + { + out.writeUnsignedVInt32(strategy.size); + out.writeUnsignedVInt32(strategy.dcs.size()); + for (WeightedDc dc : strategy.dcs.values()) + WeightedDc.serializer.serialize(dc, out, version); + } + + public ParameterizedFastPathStrategy deserialize(DataInputPlus in, Version version) throws IOException + { + int size = in.readUnsignedVInt32(); + int numDCs = in.readUnsignedVInt32(); + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i=0; i SCHEMA_PARAMS = ImmutableMap.of(Kind.KEY, Kind.SIMPLE.name()); + + private SimpleFastPathStrategy() {} + + @Override + public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + { + int maxFailures = Shard.maxToleratedFailures(nodes.size()); + int discarded = 0; + + ImmutableSet.Builder builder = ImmutableSet.builder(); + + for (int i=0,mi=nodes.size(); i fastPath = builder.build(); + Invariants.checkState(fastPath.size() >= Shard.slowPathQuorumSize(nodes.size())); + return fastPath; + } + + @Override + public Kind kind() + { + return Kind.SIMPLE; + } + + @Override + public String toString() + { + return "simple"; + } + + public Map asMap() + { + return SCHEMA_PARAMS; + } + + @Override + public String asCQL() + { + return "'simple'"; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 71757ed106fa..9aa9179b38ae 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import org.apache.cassandra.schema.TableId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -229,9 +230,9 @@ public boolean localReadSupported() } @Override - public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata doNotUse, KeyspaceMetadata keyspace, Token token) + public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata doNotUse, KeyspaceMetadata keyspace, TableId tableId, Token token) { - AccordRoutingKey.TokenKey key = new AccordRoutingKey.TokenKey(keyspace.name, token); + AccordRoutingKey.TokenKey key = new AccordRoutingKey.TokenKey(tableId, token); Shard shard = executeTopology.forKey(key); Range range = ((TokenRange) shard.range).toKeyspaceRange(); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java index 34dfced93df6..1770ae75a5e1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -90,8 +90,8 @@ public R deserialize(DataInputPlus in, int version) throws IOException public long serializedSize(R map, int version) { long size = TypeSizes.BOOL_SIZE; - size += TypeSizes.sizeofUnsignedVInt(size); int mapSize = map.size(); + size += TypeSizes.sizeofUnsignedVInt(mapSize); for (int i=0; i + public static class NodeIdSerializer implements IVersionedSerializer, MetadataSerializer { private NodeIdSerializer() {} + private static void serialize(Node.Id id, DataOutputPlus out) throws IOException + { + out.writeInt(id.id); + } + @Override public void serialize(Node.Id id, DataOutputPlus out, int version) throws IOException { - out.writeInt(id.id); + serialize(id, out); + } + + @Override + public void serialize(Node.Id id, DataOutputPlus out, Version version) throws IOException + { + serialize(id, out); } public int serialize(Node.Id id, V dst, ValueAccessor accessor, int offset) @@ -55,10 +68,21 @@ public int serialize(Node.Id id, V dst, ValueAccessor accessor, int offse return accessor.putInt(dst, offset, id.id); } + private static Node.Id deserialize(DataInputPlus in) throws IOException + { + return new Node.Id(in.readInt()); + } + @Override public Node.Id deserialize(DataInputPlus in, int version) throws IOException { - return new Node.Id(in.readInt()); + return deserialize(in); + } + + @Override + public Node.Id deserialize(DataInputPlus in, Version version) throws IOException + { + return deserialize(in); } public Node.Id deserialize(V src, ValueAccessor accessor, int offset) @@ -66,15 +90,21 @@ public Node.Id deserialize(V src, ValueAccessor accessor, int offset) return new Node.Id(accessor.getInt(src, offset)); } + public int serializedSize() + { + return TypeSizes.INT_SIZE; // id.id + } + @Override public long serializedSize(Node.Id id, int version) { return serializedSize(); } - public int serializedSize() + @Override + public long serializedSize(Node.Id t, Version version) { - return TypeSizes.INT_SIZE; // id.id + return serializedSize(); } }; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index df41e78b53e8..bbb1076b230e 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -63,7 +63,7 @@ public TxnNamedRead(TxnDataName name, SinglePartitionReadCommand value) { super(value); this.name = name; - this.key = new PartitionKey(value.metadata().keyspace, value.metadata().id, value.partitionKey()); + this.key = new PartitionKey(value.metadata().id, value.partitionKey()); } private TxnNamedRead(TxnDataName name, PartitionKey key, ByteBuffer bytes) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index 6005673fa6d8..0742071b109d 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -216,6 +216,6 @@ private static boolean transactionIsInMigratingOrMigratedRange(Epoch epoch, Seek // and transaction statement will generate an error when it sees // the RetryOnNewProtocolResult PartitionKey partitionKey = (PartitionKey)keys.get(0); - return ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(epoch, partitionKey.tableId(), partitionKey.partitionKey()); + return ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(epoch, partitionKey.table(), partitionKey.partitionKey()); } } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java index 512943e4633c..d3651015c226 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java @@ -211,7 +211,7 @@ public static void reset() public static void maybeSaveAccordKeyMigrationLocally(PartitionKey partitionKey, Epoch epoch) { - TableId tableId = partitionKey.tableId(); + TableId tableId = partitionKey.table(); UUID tableUUID = tableId.asUUID(); DecoratedKey dk = partitionKey.partitionKey(); ByteBuffer key = dk.getKey(); @@ -280,7 +280,7 @@ static void repairKeyAccord(DecoratedKey key, // will soon be ready to execute, but only waits for the local replica to be ready // Local will only create a transaction if it can't find an existing one to wait on BarrierType barrierType = global ? BarrierType.global_async : BarrierType.local; - AccordService.instance().barrier(Seekables.of(new PartitionKey(keyspace, tableId, key)), minEpoch, requestTime, DatabaseDescriptor.getTransactionTimeout(TimeUnit.NANOSECONDS), barrierType, isForWrite); + AccordService.instance().barrier(Seekables.of(new PartitionKey(tableId, key)), minEpoch, requestTime, DatabaseDescriptor.getTransactionTimeout(TimeUnit.NANOSECONDS), barrierType, isForWrite); // We don't save the state to the cache here. Accord will notify the agent every time a barrier happens. } finally diff --git a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java index 286a5b85d780..1e2d06cfb0b9 100644 --- a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java +++ b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java @@ -204,6 +204,7 @@ public static AbstractReadExecutor getReadExecutor(ClusterMetadata metadata, ReplicaPlan.ForTokenRead replicaPlan = ReplicaPlans.forRead(metadata, keyspace, + command.metadata().id, command.partitionKey().getToken(), command.indexQueryPlan(), consistencyLevel, diff --git a/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java index 8f41464779b1..d9d8a7b562ab 100644 --- a/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java +++ b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java @@ -29,6 +29,7 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.tcm.ClusterMetadata; public interface ReadCoordinator @@ -40,7 +41,7 @@ public boolean localReadSupported() return true; } - public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) + public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, TableId tableId, Token token) { return ReplicaLayout.forNonLocalStrategyTokenRead(metadata, keyspace, token); } @@ -62,7 +63,7 @@ public boolean isEventuallyConsistent() }; boolean localReadSupported(); - EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token); + EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, TableId tableId, Token token); default ReadCommand maybeAllowOutOfRangeReads(ReadCommand command) { return command; diff --git a/src/java/org/apache/cassandra/service/reads/range/RangeCommands.java b/src/java/org/apache/cassandra/service/reads/range/RangeCommands.java index cebd3bdf68ee..bd2acfbd6f14 100644 --- a/src/java/org/apache/cassandra/service/reads/range/RangeCommands.java +++ b/src/java/org/apache/cassandra/service/reads/range/RangeCommands.java @@ -76,6 +76,7 @@ static RangeCommandIterator rangeCommandIterator(PartitionRangeReadCommand comma ReplicaPlanIterator replicaPlans = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, + command.metadata().id(), consistencyLevel); if (command.isTopK()) @@ -107,7 +108,7 @@ static RangeCommandIterator rangeCommandIterator(PartitionRangeReadCommand comma Tracing.trace("Submitting range requests on {} ranges with a concurrency of {}", replicaPlans.size(), concurrencyFactor); } - ReplicaPlanMerger mergedReplicaPlans = new ReplicaPlanMerger(replicaPlans, keyspace, consistencyLevel); + ReplicaPlanMerger mergedReplicaPlans = new ReplicaPlanMerger(replicaPlans, keyspace, command.metadata().id(), consistencyLevel); return new RangeCommandIterator(mergedReplicaPlans, command, concurrencyFactor, @@ -147,11 +148,12 @@ public static boolean sufficientLiveNodesForSelectStar(TableMetadata metadata, C ReplicaPlanIterator rangeIterator = new ReplicaPlanIterator(DataRange.allData(metadata.partitioner).keyRange(), null, keyspace, + metadata.id, consistency); // Called for the side effect of running assureSufficientLiveReplicasForRead. // Deliberately called with an invalid vnode count in case it is used elsewhere in the future.. - rangeIterator.forEachRemaining(r -> ReplicaPlans.forRangeRead(keyspace, null, consistency, r.range(), -1)); + rangeIterator.forEachRemaining(r -> ReplicaPlans.forRangeRead(keyspace, metadata.id, null, consistency, r.range(), -1)); return true; } catch (UnavailableException e) diff --git a/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanIterator.java b/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanIterator.java index 969247b7227a..e138fab4f122 100644 --- a/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanIterator.java +++ b/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanIterator.java @@ -37,6 +37,7 @@ import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.locator.ReplicaPlans; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.compatibility.TokenRingUtils; import org.apache.cassandra.utils.AbstractIterator; @@ -46,6 +47,7 @@ class ReplicaPlanIterator extends AbstractIterator { private final Keyspace keyspace; private final ConsistencyLevel consistency; + private final TableId tableId; private final Index.QueryPlan indexQueryPlan; @VisibleForTesting final Iterator> ranges; @@ -54,10 +56,12 @@ class ReplicaPlanIterator extends AbstractIterator ReplicaPlanIterator(AbstractBounds keyRange, @Nullable Index.QueryPlan indexQueryPlan, Keyspace keyspace, + TableId tableId, ConsistencyLevel consistency) { this.indexQueryPlan = indexQueryPlan; this.keyspace = keyspace; + this.tableId = tableId; this.consistency = consistency; ReplicationParams replication = keyspace.getMetadata().params.replication; @@ -82,7 +86,7 @@ protected ReplicaPlan.ForRangeRead computeNext() if (!ranges.hasNext()) return endOfData(); - return ReplicaPlans.forRangeRead(keyspace, indexQueryPlan, consistency, ranges.next(), 1); + return ReplicaPlans.forRangeRead(keyspace, tableId, indexQueryPlan, consistency, ranges.next(), 1); } /** diff --git a/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanMerger.java b/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanMerger.java index 20e9562f9311..743ac8d8e6e9 100644 --- a/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanMerger.java +++ b/src/java/org/apache/cassandra/service/reads/range/ReplicaPlanMerger.java @@ -27,6 +27,7 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.locator.ReplicaPlans; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.AbstractIterator; @@ -34,11 +35,13 @@ class ReplicaPlanMerger extends AbstractIterator { private final Keyspace keyspace; private final ConsistencyLevel consistency; + private final TableId tableId; private final PeekingIterator ranges; - ReplicaPlanMerger(Iterator iterator, Keyspace keyspace, ConsistencyLevel consistency) + ReplicaPlanMerger(Iterator iterator, Keyspace keyspace, TableId tableId, ConsistencyLevel consistency) { this.keyspace = keyspace; + this.tableId = tableId; this.consistency = consistency; this.ranges = Iterators.peekingIterator(iterator); } @@ -66,7 +69,7 @@ protected ReplicaPlan.ForRangeRead computeNext() break; ReplicaPlan.ForRangeRead next = ranges.peek(); - ReplicaPlan.ForRangeRead merged = ReplicaPlans.maybeMerge(metadata, keyspace, consistency, current, next); + ReplicaPlan.ForRangeRead merged = ReplicaPlans.maybeMerge(metadata, keyspace, tableId, consistency, current, next); if (merged == null) break; diff --git a/src/java/org/apache/cassandra/streaming/StreamPlan.java b/src/java/org/apache/cassandra/streaming/StreamPlan.java index 47fa9e1463bf..88f77e886716 100644 --- a/src/java/org/apache/cassandra/streaming/StreamPlan.java +++ b/src/java/org/apache/cassandra/streaming/StreamPlan.java @@ -24,6 +24,8 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.TimeUUID; import static com.google.common.collect.Iterables.all; @@ -225,4 +227,31 @@ public StreamCoordinator getCoordinator() { return coordinator; } + + /** + * Returns an array containing the non-accord tables for the given keyspace. Since the relevant StreamPlan methods + * interpret an empty array to mean all tables, null is returned if there are no non-accord tables in + * the given keyspace + * @param ksm + * @return + */ + public static String[] nonAccordTablesForKeyspace(KeyspaceMetadata ksm) + { + String[] result = ksm.tables.stream() + .filter(tbl -> !AccordService.instance().isAccordManagedTable(tbl.id)) + .map(tbl -> tbl.name) + .toArray(String[]::new); + + return result.length > 0 ? result : null; + } + + public static boolean hasNonAccordTables(KeyspaceMetadata ksm) + { + return ksm.tables.stream().anyMatch(tbl -> !AccordService.instance().isAccordManagedTable(tbl.id)); + } + + public static boolean hasAccordTables(KeyspaceMetadata ksm) + { + return ksm.tables.stream().anyMatch(tbl -> AccordService.instance().isAccordManagedTable(tbl.id)); + } } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 8dfe9a6790ad..2c2e3b7d300f 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -32,10 +32,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.local.Node; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.dht.IPartitioner; @@ -59,13 +61,9 @@ import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; import org.apache.cassandra.tcm.extensions.ExtensionKey; import org.apache.cassandra.tcm.extensions.ExtensionValue; -import org.apache.cassandra.tcm.membership.Directory; -import org.apache.cassandra.tcm.membership.Location; -import org.apache.cassandra.tcm.membership.NodeAddresses; -import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.membership.NodeState; -import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.AccordKeyspaces; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.tcm.membership.*; +import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PrimaryRangeComparator; @@ -82,6 +80,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static org.apache.cassandra.config.CassandraRelevantProperties.LINE_SEPARATOR; import static org.apache.cassandra.db.TypeSizes.sizeof; +import static org.apache.cassandra.tcm.serialization.Version.V2; public class ClusterMetadata { @@ -97,7 +96,8 @@ public class ClusterMetadata public final Directory directory; public final TokenMap tokenMap; public final DataPlacements placements; - public final AccordKeyspaces accordKeyspaces; + public final AccordTables accordTables; + public final AccordFastPath accordFastPath; public final LockedRanges lockedRanges; public final InProgressSequences inProgressSequences; public final ConsensusMigrationState consensusMigrationState; @@ -133,7 +133,8 @@ public ClusterMetadata(IPartitioner partitioner, Directory directory, Distribute directory, new TokenMap(partitioner), DataPlacements.EMPTY, - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, @@ -146,7 +147,8 @@ public ClusterMetadata(Epoch epoch, Directory directory, TokenMap tokenMap, DataPlacements placements, - AccordKeyspaces accordKeyspaces, + AccordTables accordTables, + AccordFastPath accordFastPath, LockedRanges lockedRanges, InProgressSequences inProgressSequences, ConsensusMigrationState consensusMigrationState, @@ -159,7 +161,8 @@ public ClusterMetadata(Epoch epoch, directory, tokenMap, placements, - accordKeyspaces, + accordTables, + accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, @@ -173,7 +176,8 @@ private ClusterMetadata(int metadataIdentifier, Directory directory, TokenMap tokenMap, DataPlacements placements, - AccordKeyspaces accordKeyspaces, + AccordTables accordTables, + AccordFastPath accordFastPath, LockedRanges lockedRanges, InProgressSequences inProgressSequences, ConsensusMigrationState consensusMigrationState, @@ -190,7 +194,8 @@ private ClusterMetadata(int metadataIdentifier, this.directory = directory; this.tokenMap = tokenMap; this.placements = placements; - this.accordKeyspaces = accordKeyspaces; + this.accordTables = accordTables; + this.accordFastPath = accordFastPath; this.lockedRanges = lockedRanges; this.inProgressSequences = inProgressSequences; this.consensusMigrationState = consensusMigrationState; @@ -200,12 +205,12 @@ private ClusterMetadata(int metadataIdentifier, public ClusterMetadata withDirectory(Directory directory) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordTables, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public ClusterMetadata withPlacements(DataPlacements placements) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordTables, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public Set fullCMSMembers() @@ -257,7 +262,8 @@ public ClusterMetadata forceEpoch(Epoch epoch) capLastModified(directory, epoch), capLastModified(tokenMap, epoch), capLastModified(placements, epoch), - capLastModified(accordKeyspaces, epoch), + capLastModified(accordTables, epoch), + capLastModified(accordFastPath, epoch), capLastModified(lockedRanges, epoch), capLastModified(inProgressSequences, epoch), capLastModified(consensusMigrationState, epoch), @@ -279,7 +285,8 @@ public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) directory, tokenMap, placements, - accordKeyspaces, + accordTables, + accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, @@ -406,7 +413,8 @@ public static class Transformer private Directory directory; private TokenMap tokenMap; private DataPlacements placements; - private AccordKeyspaces accordKeyspaces; + private AccordTables accordTables; + private AccordFastPath accordFastPath; private LockedRanges lockedRanges; private InProgressSequences inProgressSequences; private ConsensusMigrationState consensusMigrationState; @@ -422,7 +430,8 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) this.directory = metadata.directory; this.tokenMap = metadata.tokenMap; this.placements = metadata.placements; - this.accordKeyspaces = metadata.accordKeyspaces; + this.accordTables = metadata.accordTables; + this.accordFastPath = metadata.accordFastPath; this.lockedRanges = metadata.lockedRanges; this.inProgressSequences = metadata.inProgressSequences; this.consensusMigrationState = metadata.consensusMigrationState; @@ -543,9 +552,15 @@ public Transformer with(DataPlacements placements) return this; } - public Transformer withAccordKeyspace(String keyspace) + public Transformer withAccordTable(TableId table) { - accordKeyspaces = accordKeyspaces.with(keyspace); + accordTables = accordTables.with(table); + return this; + } + + public Transformer withFastPathStatusSince(Node.Id node, AccordFastPath.Status status, long updateTimeMillis, long updateDelayMillis) + { + accordFastPath = accordFastPath.withNodeStatusSince(node, status, updateTimeMillis, updateDelayMillis); return this; } @@ -640,6 +655,9 @@ public Transformed build() { modifiedKeys.add(MetadataKeys.NODE_DIRECTORY); directory = directory.withLastModified(epoch); + + for (NodeId peer : Sets.difference(base.directory.peerIds(), directory.peerIds())) + accordFastPath = accordFastPath.withoutNode(peer); } if (tokenMap != base.tokenMap) @@ -660,10 +678,16 @@ public Transformed build() placements = placements.withLastModified(epoch); } - if (accordKeyspaces != base.accordKeyspaces) + if (accordTables != base.accordTables) { - modifiedKeys.add(MetadataKeys.ACCORD_KEYSPACES); - accordKeyspaces = accordKeyspaces.withLastModified(epoch); + modifiedKeys.add(MetadataKeys.ACCORD_TABLES); + accordTables = accordTables.withLastModified(epoch); + } + + if (accordFastPath != base.accordFastPath) + { + modifiedKeys.add(MetadataKeys.ACCORD_FAST_PATH); + accordFastPath = accordFastPath.withLastModified(epoch); } if (lockedRanges != base.lockedRanges) @@ -691,7 +715,8 @@ public Transformed build() directory, tokenMap, placements, - accordKeyspaces, + accordTables, + accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, @@ -708,7 +733,8 @@ public ClusterMetadata buildForGossipMode() directory, tokenMap, placements, - accordKeyspaces, + accordTables, + accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, @@ -726,6 +752,7 @@ public String toString() ", directory=" + schema + ", tokenMap=" + tokenMap + ", placement=" + placements + + ", availability=" + accordFastPath + ", lockedRanges=" + lockedRanges + ", inProgressSequences=" + inProgressSequences + ", consensusMigrationState=" + consensusMigrationState + @@ -839,7 +866,7 @@ public boolean equals(Object o) directory.equals(that.directory) && tokenMap.equals(that.tokenMap) && placements.equals(that.placements) && - accordKeyspaces.equals(that.accordKeyspaces) && + accordTables.equals(that.accordTables) && lockedRanges.equals(that.lockedRanges) && inProgressSequences.equals(that.inProgressSequences) && consensusMigrationState.equals(that.consensusMigrationState) && @@ -891,7 +918,7 @@ public void dumpDiff(ClusterMetadata other) @Override public int hashCode() { - return Objects.hash(epoch, schema, directory, tokenMap, placements, accordKeyspaces, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return Objects.hash(epoch, schema, directory, tokenMap, placements, accordTables, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public static ClusterMetadata current() @@ -968,7 +995,11 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers Directory.serializer.serialize(metadata.directory, out, version); TokenMap.serializer.serialize(metadata.tokenMap, out, version); DataPlacements.serializer.serialize(metadata.placements, out, version); - AccordKeyspaces.serializer.serialize(metadata.accordKeyspaces, out, version); + if (version.isAtLeast(V2)) + { + AccordTables.serializer.serialize(metadata.accordTables, out, version); + AccordFastPath.serializer.serialize(metadata.accordFastPath, out, version); + } LockedRanges.serializer.serialize(metadata.lockedRanges, out, version); InProgressSequences.serializer.serialize(metadata.inProgressSequences, out, version); ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); @@ -1006,7 +1037,18 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE Directory dir = Directory.serializer.deserialize(in, version); TokenMap tokenMap = TokenMap.serializer.deserialize(in, version); DataPlacements placements = DataPlacements.serializer.deserialize(in, version); - AccordKeyspaces accordKeyspaces = AccordKeyspaces.serializer.deserialize(in, version); + AccordTables accordTables; + AccordFastPath accordFastPath; + if (version.isAtLeast(V2)) + { + accordTables = AccordTables.serializer.deserialize(in, version); + accordFastPath = AccordFastPath.serializer.deserialize(in, version); + } + else + { + accordTables = AccordTables.EMPTY; + accordFastPath = AccordFastPath.EMPTY; + } LockedRanges lockedRanges = LockedRanges.serializer.deserialize(in, version); InProgressSequences ips = InProgressSequences.serializer.deserialize(in, version); ConsensusMigrationState consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); @@ -1026,7 +1068,8 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE dir, tokenMap, placements, - accordKeyspaces, + accordTables, + accordFastPath, lockedRanges, ips, consensusMigrationState, @@ -1050,10 +1093,17 @@ public long serializedSize(ClusterMetadata metadata, Version version) Directory.serializer.serializedSize(metadata.directory, version) + TokenMap.serializer.serializedSize(metadata.tokenMap, version) + DataPlacements.serializer.serializedSize(metadata.placements, version) + - AccordKeyspaces.serializer.serializedSize(metadata.accordKeyspaces, version) + - LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + - InProgressSequences.serializer.serializedSize(metadata.inProgressSequences, version) + ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version); + DataPlacements.serializer.serializedSize(metadata.placements, version); + + if (version.isAtLeast(V2)) + { + size += AccordTables.serializer.serializedSize(metadata.accordTables, version) + + AccordFastPath.serializer.serializedSize(metadata.accordFastPath, version); + } + + size += LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + + InProgressSequences.serializer.serializedSize(metadata.inProgressSequences, version); return size; } diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index df65474a536f..1794a63889a5 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -39,7 +39,8 @@ public class MetadataKeys public static final MetadataKey NODE_DIRECTORY = make(CORE_NS, "membership", "node_directory"); public static final MetadataKey TOKEN_MAP = make(CORE_NS, "ownership", "token_map"); public static final MetadataKey DATA_PLACEMENTS = make(CORE_NS, "ownership", "data_placements"); - public static final MetadataKey ACCORD_KEYSPACES = make(CORE_NS, "ownership", "accord_keyspaces"); + public static final MetadataKey ACCORD_TABLES = make(CORE_NS, "ownership", "accord_tables"); + public static final MetadataKey ACCORD_FAST_PATH = make(CORE_NS, "ownership", "accord_fast_path"); public static final MetadataKey LOCKED_RANGES = make(CORE_NS, "sequences", "locked_ranges"); public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); public static final MetadataKey CONSENSUS_MIGRATION_STATE = make(CORE_NS, "consensus", "migration_state"); @@ -48,7 +49,8 @@ public class MetadataKeys NODE_DIRECTORY, TOKEN_MAP, DATA_PLACEMENTS, - ACCORD_KEYSPACES, + ACCORD_TABLES, + ACCORD_FAST_PATH, LOCKED_RANGES, IN_PROGRESS_SEQUENCES, CONSENSUS_MIGRATION_STATE); diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 34799e9b56c4..150b3934bbba 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -28,12 +28,13 @@ import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.service.accord.AccordFastPath; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; import org.apache.cassandra.tcm.Commit.Replicator; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; import org.apache.cassandra.tcm.membership.Directory; -import org.apache.cassandra.tcm.ownership.AccordKeyspaces; +import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementProvider; import org.apache.cassandra.tcm.ownership.TokenMap; @@ -174,7 +175,8 @@ public StubClusterMetadataService build() Directory.EMPTY, new TokenMap(partitioner), DataPlacements.EMPTY, - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusTableMigrationState.ConsensusMigrationState.EMPTY, diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index cdbf44fcb619..f6f0aa226dda 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -38,7 +38,7 @@ import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.tcm.transformations.AddAccordKeyspace; +import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.apache.cassandra.tcm.transformations.AlterSchema; import org.apache.cassandra.tcm.transformations.AlterTopology; import org.apache.cassandra.tcm.transformations.Assassinate; @@ -51,6 +51,7 @@ import org.apache.cassandra.tcm.transformations.PrepareLeave; import org.apache.cassandra.tcm.transformations.PrepareMove; import org.apache.cassandra.tcm.transformations.PrepareReplace; +import org.apache.cassandra.tcm.transformations.ReconfigureAccordFastPath; import org.apache.cassandra.tcm.transformations.Register; import org.apache.cassandra.tcm.transformations.SetConsensusMigrationTargetProtocol; import org.apache.cassandra.tcm.transformations.Startup; @@ -239,10 +240,11 @@ enum Kind CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer), ALTER_TOPOLOGY(35, () -> AlterTopology.serializer), - ADD_ACCORD_KEYSPACE(36, () -> AddAccordKeyspace.serializer), - BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(37, () -> BeginConsensusMigrationForTableAndRange.serializer), - MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), - SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL(39, () -> SetConsensusMigrationTargetProtocol.serializer) + ADD_ACCORD_TABLE(36, () -> AddAccordTable.serializer), + UPDATE_AVAILABILITY(37, () -> ReconfigureAccordFastPath.serializer), + BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> BeginConsensusMigrationForTableAndRange.serializer), + MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(39, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), + SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL(40, () -> SetConsensusMigrationTargetProtocol.serializer) ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java index f87278d4b380..390ead6f4d9d 100644 --- a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java +++ b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java @@ -60,13 +60,14 @@ import org.apache.cassandra.tcm.MultiStepOperation; import org.apache.cassandra.tcm.extensions.ExtensionKey; import org.apache.cassandra.tcm.extensions.ExtensionValue; +import org.apache.cassandra.service.accord.AccordFastPath; import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.AccordKeyspaces; +import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -296,7 +297,8 @@ public static ClusterMetadata emptyWithSchemaFromSystemTables(Set allKno Directory.EMPTY, new TokenMap(DatabaseDescriptor.getPartitioner()), DataPlacements.empty(), - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, @@ -385,7 +387,8 @@ public static ClusterMetadata fromEndpointStates(Map -{ - public static final AccordKeyspaces EMPTY = new AccordKeyspaces(Epoch.EMPTY, ImmutableSet.of()); - private final Epoch lastModified; - private final ImmutableSet keyspaces; - - public AccordKeyspaces(Epoch lastModified, ImmutableSet keyspaces) - { - this.lastModified = lastModified; - this.keyspaces = keyspaces; - } - - public String toString() - { - return "AccordKeyspaces{" + lastModified + keyspaces + '}'; - } - - public AccordKeyspaces withLastModified(Epoch epoch) - { - return new AccordKeyspaces(epoch, keyspaces); - } - - public Epoch lastModified() - { - return lastModified; - } - - public boolean contains(String keyspace) - { - return keyspaces.contains(keyspace); - } - - public AccordKeyspaces with(String keyspace) - { - if (keyspaces.contains(keyspace)) - return this; - - return new AccordKeyspaces(lastModified, ImmutableSet.builder().addAll(keyspaces).add(keyspace).build()); - } - - public static final MetadataSerializer serializer = new MetadataSerializer() - { - public void serialize(AccordKeyspaces accordKeyspaces, DataOutputPlus out, Version version) throws IOException - { - int size = accordKeyspaces.keyspaces.size(); - out.writeInt(size); - String[] keyspaces = new String[size]; - accordKeyspaces.keyspaces.toArray(keyspaces); - Arrays.sort(keyspaces); - for (String keyspace : keyspaces) - out.writeUTF(keyspace); - Epoch.serializer.serialize(accordKeyspaces.lastModified, out, version); - } - - public AccordKeyspaces deserialize(DataInputPlus in, Version version) throws IOException - { - int size = in.readInt(); - ImmutableSet.Builder builder = ImmutableSet.builder(); - for (int i=0; i +{ + public static final AccordTables EMPTY = new AccordTables(Epoch.EMPTY, ImmutableSet.of()); + private final Epoch lastModified; + private final ImmutableSet tables; + + public AccordTables(Epoch lastModified, ImmutableSet tables) + { + this.lastModified = lastModified; + this.tables = tables; + } + + public String toString() + { + return "AccordTables{" + lastModified + ", " + tables + '}'; + } + + public AccordTables withLastModified(Epoch epoch) + { + return new AccordTables(epoch, tables); + } + + public Epoch lastModified() + { + return lastModified; + } + + public boolean contains(TableId table) + { + return tables.contains(table); + } + + public AccordTables with(TableId table) + { + if (tables.contains(table)) + return this; + + return new AccordTables(lastModified, ImmutableSet.builder().addAll(tables).add(table).build()); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + public void serialize(AccordTables accordTables, DataOutputPlus out, Version version) throws IOException + { + int size = accordTables.tables.size(); + out.writeUnsignedVInt32(size); + TableId[] tables = new TableId[size]; + accordTables.tables.toArray(tables); + Arrays.sort(tables); + for (TableId table : tables) + table.serialize(out); + Epoch.serializer.serialize(accordTables.lastModified, out, version); + } + + public AccordTables deserialize(DataInputPlus in, Version version) throws IOException + { + int size = in.readUnsignedVInt32(); + ImmutableSet.Builder builder = ImmutableSet.builder(); + for (int i=0; i e : endpoints.flattenEntries()) { Replica destination = e.getKey(); @@ -242,13 +243,13 @@ public SequenceState executeNext() logger.info("Stream source: {} destination: {}", source, destination); assert !source.endpoint().equals(destination.endpoint()) : String.format("Source %s should not be the same as destionation %s", source, destination); if (source.isSelf()) - streamPlan.transferRanges(destination.endpoint(), ks.name, RangesAtEndpoint.of(destination)); + streamPlan.transferRanges(destination.endpoint(), ks.name, RangesAtEndpoint.of(destination), cfNames); else if (destination.isSelf()) { if (destination.isFull()) - streamPlan.requestRanges(source.endpoint(), ks.name, RangesAtEndpoint.of(destination), RangesAtEndpoint.empty(destination.endpoint())); + streamPlan.requestRanges(source.endpoint(), ks.name, RangesAtEndpoint.of(destination), RangesAtEndpoint.empty(destination.endpoint()), cfNames); else - streamPlan.requestRanges(source.endpoint(), ks.name, RangesAtEndpoint.empty(destination.endpoint()), RangesAtEndpoint.of(destination)); + streamPlan.requestRanges(source.endpoint(), ks.name, RangesAtEndpoint.empty(destination.endpoint()), RangesAtEndpoint.of(destination), cfNames); } else throw new IllegalStateException("Node should be either source or destination in the movement map " + endpoints); diff --git a/src/java/org/apache/cassandra/tcm/serialization/Version.java b/src/java/org/apache/cassandra/tcm/serialization/Version.java index 50e1792e2374..da99e726a0fb 100644 --- a/src/java/org/apache/cassandra/tcm/serialization/Version.java +++ b/src/java/org/apache/cassandra/tcm/serialization/Version.java @@ -36,6 +36,7 @@ public enum Version /** * - Added version to PlacementForRange serializer * - Serialize MemtableParams when serializing TableParams + * - Added AccordFastPath */ V2(2), /** diff --git a/src/java/org/apache/cassandra/tcm/transformations/AddAccordKeyspace.java b/src/java/org/apache/cassandra/tcm/transformations/AddAccordTable.java similarity index 53% rename from src/java/org/apache/cassandra/tcm/transformations/AddAccordKeyspace.java rename to src/java/org/apache/cassandra/tcm/transformations/AddAccordTable.java index 4057d7eccfa9..91eacd87e84a 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AddAccordKeyspace.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AddAccordTable.java @@ -20,59 +20,72 @@ import java.io.IOException; -import org.apache.cassandra.db.TypeSizes; +import accord.utils.Invariants; import org.apache.cassandra.exceptions.ExceptionCode; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.sequences.LockedRanges; import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; // TODO (expected, interop): improve mechanism for adding tables (probably want table level granularity, option to not-auto-add, and option to remove) -public class AddAccordKeyspace implements Transformation +public class AddAccordTable implements Transformation { - private final String keyspace; + private final TableId table; - public AddAccordKeyspace(String keyspace) + public AddAccordTable(TableId table) { - this.keyspace = keyspace; + this.table = table; } public Kind kind() { - return Kind.ADD_ACCORD_KEYSPACE; + return Kind.ADD_ACCORD_TABLE; } @Override public Result execute(ClusterMetadata metadata) { - if (metadata.accordKeyspaces.contains(keyspace)) - return new Rejected(ExceptionCode.ALREADY_EXISTS, keyspace + " is already an accord keyspaces"); + if (metadata.accordTables.contains(table)) + return new Rejected(ExceptionCode.ALREADY_EXISTS, table + " is already an accord table"); - return Transformation.success(metadata.transformer().withAccordKeyspace(keyspace), LockedRanges.AffectedRanges.EMPTY); + return Transformation.success(metadata.transformer().withAccordTable(table), LockedRanges.AffectedRanges.EMPTY); } - public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() + public static void addTable(TableId table) + { + ClusterMetadataService.instance().commit(new AddAccordTable(table), + metadata -> null, + (code, message) -> { + Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, + "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); + return null; + }); + } + + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() { public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException { - assert t instanceof AddAccordKeyspace; - AddAccordKeyspace addKeyspace = (AddAccordKeyspace) t; - out.writeUTF(addKeyspace.keyspace); + assert t instanceof AddAccordTable; + AddAccordTable addTable = (AddAccordTable) t; + addTable.table.serialize(out); } - public AddAccordKeyspace deserialize(DataInputPlus in, Version version) throws IOException + public AddAccordTable deserialize(DataInputPlus in, Version version) throws IOException { - return new AddAccordKeyspace(in.readUTF()); + return new AddAccordTable(TableId.deserialize(in)); } public long serializedSize(Transformation t, Version version) { - assert t instanceof AddAccordKeyspace; - AddAccordKeyspace addKeyspace = (AddAccordKeyspace) t; - return TypeSizes.sizeof(addKeyspace.keyspace); + assert t instanceof AddAccordTable; + AddAccordTable addTable = (AddAccordTable) t; + return addTable.table.serializedSize(); } }; } diff --git a/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java b/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java new file mode 100644 index 000000000000..628f4482f1ab --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; + +import accord.local.Node; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.ExceptionCode; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +public class ReconfigureAccordFastPath implements Transformation +{ + private final Node.Id node; + private final AccordFastPath.Status status; + private final long updateTimeMillis; + private final long updateDelayMillis; + + public ReconfigureAccordFastPath(Node.Id node, AccordFastPath.Status status, long updateTimeMillis, long updateDelayMillis) + { + this.node = node; + this.status = status; + this.updateTimeMillis = updateTimeMillis; + this.updateDelayMillis = updateDelayMillis; + } + + public Kind kind() + { + return Kind.UPDATE_AVAILABILITY; + } + + public Result execute(ClusterMetadata metadata) + { + try + { + return Transformation.success(metadata.transformer().withFastPathStatusSince(node, status, updateTimeMillis, updateDelayMillis), LockedRanges.AffectedRanges.EMPTY); + } + catch (InvalidRequestException e) + { + return new Rejected(ExceptionCode.INVALID, e.getMessage()); + } + } + + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() + { + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + ReconfigureAccordFastPath update = (ReconfigureAccordFastPath) t; + TopologySerializers.nodeId.serialize(update.node, out, version); + AccordFastPath.Status.serializer.serialize(update.status, out, version); + out.writeUnsignedVInt(update.updateTimeMillis); + out.writeUnsignedVInt(update.updateDelayMillis); + + } + + public ReconfigureAccordFastPath deserialize(DataInputPlus in, Version version) throws IOException + { + return new ReconfigureAccordFastPath(TopologySerializers.nodeId.deserialize(in, version), + AccordFastPath.Status.serializer.deserialize(in, version), + in.readUnsignedVInt(), in.readUnsignedVInt()); + } + + public long serializedSize(Transformation t, Version version) + { + ReconfigureAccordFastPath update = (ReconfigureAccordFastPath) t; + return TopologySerializers.nodeId.serializedSize(update.node, version) + + AccordFastPath.Status.serializer.serializedSize(update.status, version) + + TypeSizes.sizeofUnsignedVInt(update.updateTimeMillis) + + TypeSizes.sizeofUnsignedVInt(update.updateDelayMillis); + } + }; +} diff --git a/src/java/org/apache/cassandra/tcm/transformations/cms/PrepareCMSReconfiguration.java b/src/java/org/apache/cassandra/tcm/transformations/cms/PrepareCMSReconfiguration.java index c8b15c4fa595..e94292831e4b 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/cms/PrepareCMSReconfiguration.java +++ b/src/java/org/apache/cassandra/tcm/transformations/cms/PrepareCMSReconfiguration.java @@ -45,6 +45,7 @@ import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.membership.NodeId; @@ -263,7 +264,7 @@ public Result execute(ClusterMetadata prev) // In a complex reconfiguration, in addition to initiating the sequence of membership changes, // we're modifying the replication params of the metadata keyspace so we supply a function to do that KeyspaceMetadata keyspace = prev.schema.getKeyspaceMetadata(SchemaConstants.METADATA_KEYSPACE_NAME); - KeyspaceMetadata newKeyspace = keyspace.withSwapped(new KeyspaceParams(keyspace.params.durableWrites, replicationParams)); + KeyspaceMetadata newKeyspace = keyspace.withSwapped(new KeyspaceParams(keyspace.params.durableWrites, replicationParams, FastPathStrategy.simple())); return executeInternal(prev, transformer -> transformer.with(prev.placements.replaceParams(prev.nextEpoch(), ReplicationParams.meta(prev), replicationParams)) diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index b90be2cf1bf1..9c8fd5039133 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -1332,12 +1332,16 @@ public List getNonLocalStrategyKeyspaces() return ssProxy.getNonLocalStrategyKeyspaces(); } - - public List getAccordManagedKeyspace() + public List getAccordManagedKeyspaces() { return ssProxy.getAccordManagedKeyspaces(); } + public List getAccordManagedTables() + { + return ssProxy.getAccordManagedTables(); + } + public String getClusterName() { return ssProxy.getClusterName(); diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 9f72c59eabf2..0c933fdf7d06 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -498,7 +498,7 @@ protected List parseOptionalKeyspace(List cmdArgs, NodeProbe nod else if (defaultKeyspaceSet == KeyspaceSet.NON_SYSTEM) keyspaces.addAll(keyspaces = nodeProbe.getNonSystemKeyspaces()); else if (defaultKeyspaceSet == KeyspaceSet.ACCORD_MANAGED) - keyspaces.addAll(nodeProbe.getAccordManagedKeyspace()); + keyspaces.addAll(nodeProbe.getAccordManagedKeyspaces()); else keyspaces.addAll(nodeProbe.getKeyspaces()); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java index 6364401de50f..5c6e276db3c9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java @@ -27,6 +27,7 @@ import java.util.regex.Pattern; import com.google.common.util.concurrent.FutureCallback; +import org.apache.cassandra.distributed.test.accord.AccordTestBase; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; @@ -54,7 +55,6 @@ import org.apache.cassandra.distributed.shared.NetworkTopology; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.reads.repair.BlockingReadRepair; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; import org.apache.cassandra.utils.concurrent.Condition; @@ -108,9 +108,9 @@ private void testReadRepair(ReadRepairStrategy strategy, boolean brrThroughAccor { try (Cluster cluster = init(Cluster.create(3, config -> config.set("non_serial_write_strategy", brrThroughAccord ? "migration" : "normal")))) { - cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int, c int, v int, PRIMARY KEY (k, c)) " + String.format("WITH read_repair='%s'", strategy))); + AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, "t"); Object[] row = row(1, 1, 1); String insertQuery = withKeyspace("INSERT INTO %s.t (k, c, v) VALUES (?, ?, ?)"); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java index f6f5e2a812fb..98b98aa1e5af 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadSpeculationTest.java @@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import org.apache.cassandra.service.reads.ReadCoordinator; import org.junit.Assert; import org.junit.Test; @@ -54,6 +53,7 @@ import org.apache.cassandra.locator.ReplicaPlan; import org.apache.cassandra.locator.ReplicaPlans; import org.apache.cassandra.service.CassandraDaemon; +import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.transport.Dispatcher; import static net.bytebuddy.matcher.ElementMatchers.named; @@ -86,7 +86,7 @@ public void speculateTest() throws Throwable Keyspace keyspace = Keyspace.openIfExists(KEYSPACE); ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(TABLE); DecoratedKey dk = cfs.decorateKey(bytes(PK_VALUE)); - ReplicaPlan.ForTokenRead plan = ReplicaPlans.forRead(keyspace, dk.getToken(), null, + ReplicaPlan.ForTokenRead plan = ReplicaPlans.forRead(keyspace, cfs.getTableId(), dk.getToken(), null, QUORUM, cfs.metadata().params.speculativeRetry, ReadCoordinator.DEFAULT); return plan.contacts().endpointList().stream().map(InetSocketAddress::getAddress).collect(Collectors.toList()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java index c6c941328553..6d192db1124b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java @@ -28,6 +28,7 @@ import java.util.function.Function; import java.util.stream.IntStream; +import org.apache.cassandra.distributed.test.accord.AccordTestBase; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -44,7 +45,6 @@ import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.AssertUtils; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Pair; @@ -111,7 +111,6 @@ public static void setupCluster() throws IOException .withNodes(NUM_NODES) .withConfig(config -> config.set("hinted_handoff_enabled", false)) .start()); - cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @AfterClass @@ -437,6 +436,7 @@ private static class Tester private final ConsistencyLevel readConsistencyLevel; private final boolean flush, paging; + private final String table; private final String qualifiedTableName; private boolean flushed = false; @@ -446,7 +446,8 @@ private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean pag this.readConsistencyLevel = readConsistencyLevel; this.flush = flush; this.paging = paging; - qualifiedTableName = KEYSPACE + ".t_" + seqNumber.getAndIncrement(); + this.table = "t_" + seqNumber.getAndIncrement(); + qualifiedTableName = KEYSPACE + '.' + table; assert readConsistencyLevel == ALL || readConsistencyLevel == QUORUM || readConsistencyLevel == SERIAL : "Only ALL and QUORUM consistency levels are supported"; @@ -455,6 +456,7 @@ private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean pag private Tester createTable(String query) { cluster.schemaChange(format(query) + " WITH read_repair='NONE'"); + AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, table); return this; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 8dff79cd8698..7d246a2b6c9f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -78,7 +78,7 @@ private static DecoratedKey dk(int key) private static PartitionKey pk(int key, String keyspace, String table) { TableId tid = Schema.instance.getTableMetadata(keyspace, table).id; - return new PartitionKey(keyspace, tid, dk(key)); + return new PartitionKey(tid, dk(key)); } protected void bootstrapAndJoinNode(Cluster cluster) @@ -451,7 +451,7 @@ public void moveTest() throws Throwable Assert.assertEquals(key, row.getInt("c")); Assert.assertEquals(key, row.getInt("v")); - PartitionKey partitionKey = new PartitionKey("ks", tableId, dk); + PartitionKey partitionKey = new PartitionKey(tableId, dk); awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(PreLoadContext.contextFor(partitionKey), partitionKey.toUnseekable(), moveMax, moveMax, diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index db8f1c21d9a2..5eeee8108a25 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -123,11 +123,11 @@ public void testMultiPartitionReturn() throws Exception for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) - cluster.coordinator(1).execute("INSERT INTO " + currentTable + "(k, c, v) VALUES (?, ?, ?);", ConsistencyLevel.ALL, i, j, i + j); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "(k, c, v) VALUES (?, ?, ?);", ConsistencyLevel.ALL, i, j, i + j); } // multi row String cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=? AND c IN (?, ?);\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=? AND c IN (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(cql, ConsistencyLevel.ANY, 0, 0, 1); assertThat(result).isEqualTo(QueryResults.builder() @@ -138,7 +138,7 @@ public void testMultiPartitionReturn() throws Exception // Results should be in Partiton/Clustering order, so make sure // multi partition cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k IN (?, ?) AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k IN (?, ?) AND c = ?;\n" + "COMMIT TRANSACTION"; for (boolean asc : Arrays.asList(true, false)) { @@ -153,7 +153,7 @@ public void testMultiPartitionReturn() throws Exception // multi-partition, multi-clustering cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k IN (?, ?) AND c IN (?, ?);\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k IN (?, ?) AND c IN (?, ?);\n" + "COMMIT TRANSACTION"; for (boolean asc : Arrays.asList(true, false)) { @@ -232,14 +232,14 @@ public void testScalarBindVariables() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + - " LET row2 = (SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + - " SELECT v FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + " IF row1 IS NULL AND row2.v = ? THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -253,7 +253,7 @@ public void testScalarBindVariables() throws Throwable assertEquals(3, result[0][0]); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); }); @@ -262,13 +262,13 @@ public void testScalarBindVariables() throws Throwable @Test public void testRegularScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))"); + testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))"); } @Test public void testStaticScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + currentTable + " (k int, c int, v int static, primary key (k, c))"); + testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c))"); } private void testScalarIsNull(String tableDDL) throws Exception { @@ -276,25 +276,25 @@ private void testScalarIsNull(String tableDDL) throws Exception { cluster -> { String insertNull = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.k, row0.v;\n" + " IF row0.v IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, null);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, null);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null, null }, insertNull, 0, 0); String insert = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.k, row0.v;\n" + " IF row0.v IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, null }, insert, 0, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT k, c, v FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " SELECT k, c, v FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); }); @@ -303,36 +303,36 @@ private void testScalarIsNull(String tableDDL) throws Exception { @Test public void testQueryStaticColumn() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, s int static, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c))", cluster -> { // select partition key, clustering key and static column, restrict on partition and clustering testQueryStaticColumn(cluster, - "LET row0 = (SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? AND c = 0);\n" + + "LET row0 = (SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0);\n" + "SELECT row0.k, row0.c, row0.s, row0.v;\n", - "SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? AND c = 0"); + "SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0"); // select partition key, clustering key and static column, restrict on partition and limit to 1 row testQueryStaticColumn(cluster, - "LET row0 = (SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + "LET row0 = (SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + "SELECT row0.k, row0.c, row0.s, row0.v;\n", - "SELECT k, c, s, v FROM " + currentTable + " WHERE k = ? LIMIT 1"); + "SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1"); // select static column and regular column, restrict on partition and clustering testQueryStaticColumn(cluster, - "LET row0 = (SELECT s, v FROM " + currentTable + " WHERE k = ? AND c = 0);\n" + + "LET row0 = (SELECT s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0);\n" + "SELECT row0.s, row0.v;\n", - "SELECT s, v FROM " + currentTable + " WHERE k = ? AND c = 0"); + "SELECT s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0"); // select just static column, restrict on partition and limit to 1 row testQueryStaticColumn(cluster, - "LET row0 = (SELECT s FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + "LET row0 = (SELECT s FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + "SELECT row0.s;\n", - "SELECT s FROM " + currentTable + " WHERE k = ? LIMIT 1"); + "SELECT s FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1"); }); } @@ -342,22 +342,22 @@ private void testQueryStaticColumn(Cluster cluster, String accordReadQuery, Stri int key = 10; assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, s) VALUES (?, null);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, s) VALUES (?, null);", ConsistencyLevel.ALL, key); logger().info("null -> static column"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, s) VALUES (?, 1);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, s) VALUES (?, 1);", ConsistencyLevel.ALL, key); logger().info("Inserted 1 -> static column"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + currentTable + " (k, c) VALUES (?, 0);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, c) VALUES (?, 0);", ConsistencyLevel.ALL, key); logger().info("Inserted 0 -> clustering"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key); } @Test public void testUpdateStaticColumn() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, s int static, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c))", cluster -> { checkUpdateStatic(cluster, "SET s=1 WHERE k=?", 101, "[[101, null, 1, null]]", "[]"); @@ -373,16 +373,16 @@ public void testUpdateStaticColumn() throws Exception { private void checkUpdateStatic(Cluster cluster, String update, int key, String expPart, String expClust) { Object[][] r1, r2, r3, r4, r; - r = cluster.get(1).coordinator().execute("UPDATE " + currentTable + " " + update + " IF s = NULL;", ConsistencyLevel.QUORUM, key); + r = cluster.get(1).coordinator().execute("UPDATE " + qualifiedTableName + " " + update + " IF s = NULL;", ConsistencyLevel.QUORUM, key); Assertions.assertThat(Arrays.deepToString(r)).isEqualTo("[[true]]"); - r1 = cluster.get(1).coordinator().execute("SELECT * FROM " + currentTable + " WHERE k = ? LIMIT 1;", ConsistencyLevel.SERIAL, key); - r2 = cluster.get(1).coordinator().execute("SELECT * FROM " + currentTable + " WHERE k = ? AND c = 0;", ConsistencyLevel.SERIAL, key); - cluster.get(1).coordinator().execute("TRUNCATE " + currentTable, ConsistencyLevel.ALL); + r1 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1;", ConsistencyLevel.SERIAL, key); + r2 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = 0;", ConsistencyLevel.SERIAL, key); + cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedTableName, ConsistencyLevel.ALL); - executeAsTxn(cluster, "UPDATE " + currentTable + " " + update + ";", key); - r3 = executeAsTxn(cluster, "SELECT * FROM " + currentTable + " WHERE k = ? LIMIT 1;", key).toObjectArrays(); - r4 = executeAsTxn(cluster, "SELECT * FROM " + currentTable + " WHERE k = ? AND c = 0;", key).toObjectArrays(); - cluster.get(1).coordinator().execute("TRUNCATE " + currentTable, ConsistencyLevel.ALL); + executeAsTxn(cluster, "UPDATE " + qualifiedTableName + " " + update + ";", key); + r3 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1;", key).toObjectArrays(); + r4 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = 0;", key).toObjectArrays(); + cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedTableName, ConsistencyLevel.ALL); Assertions.assertThat(Arrays.deepToString(r1)).isEqualTo(expPart); Assertions.assertThat(Arrays.deepToString(r2)).isEqualTo(expClust); @@ -453,12 +453,12 @@ public void testScalarGte() throws Throwable @Test public void testStaticScalarEQ() throws Throwable { - testScalarCondition("CREATE TABLE " + currentTable + " (k int, c int, v int static, primary key (k, c))", 3, "=", 3, "="); + testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c))", 3, "=", 3, "="); } private void testScalarCondition(int lhs, String operator, int rhs, String reversedOperator) throws Exception { - testScalarCondition("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", lhs, operator, rhs, reversedOperator); + testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", lhs, operator, rhs, reversedOperator); } private void testScalarCondition(String tableDDL, int lhs, String operator, int rhs, String reversedOperator) throws Exception @@ -466,27 +466,27 @@ private void testScalarCondition(String tableDDL, int lhs, String operator, int test(tableDDL, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, " + lhs + ");", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, " + lhs + ");", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + " SELECT row1.v;\n" + " IF row1.v " + operator + " ? THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, query, 0, rhs, 1, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, 0, 1 }, check, 1, 0); String queryWithReversed = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k = ? LIMIT 1);\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + " SELECT row1.v;\n" + " IF ? " + reversedOperator + " row1.v THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, queryWithReversed, 0, rhs, 2, 0, 1); @@ -500,7 +500,7 @@ public void testReadOnlyTx() throws Exception test(cluster -> { String query = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); assertFalse(result.hasNext()); @@ -513,13 +513,13 @@ public void testWriteOnlyTx() throws Exception test(cluster -> { String query = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1); assertFalse(result.hasNext()); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=? AND c=?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=? AND c=?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check, 0, 0); }); @@ -530,14 +530,14 @@ public void testReturningLetReferences() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + - " LET row2 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.v, row2.k, row2.c, row2.v;\n" + " IF row1 IS NULL AND row2.v = ? THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 3, 0, 0, 1); @@ -545,7 +545,7 @@ public void testReturningLetReferences() throws Throwable assertThat(result).hasSize(1).contains(null, 1, 0, 3); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check); }); @@ -556,14 +556,14 @@ public void testFailedConditionWithCompleteInsert() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.v;\n" + " IF row0 IS NULL AND row1.v = ? THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 2, 0, 0, 1); @@ -571,7 +571,7 @@ public void testFailedConditionWithCompleteInsert() throws Throwable assertThat(result).hasSize(1).contains(3); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, check); }); @@ -580,22 +580,22 @@ public void testFailedConditionWithCompleteInsert() throws Throwable @Test public void testReversedClusteringReference() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1);\n" + " SELECT row1.k, row1.c, row1.v;\n" + " IF row1.c = 1 THEN\n" + - " UPDATE " + currentTable + " SET v += row1.c WHERE k=1 AND c=1;\n" + + " UPDATE " + qualifiedTableName + " SET v += row1.c WHERE k=1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); }); @@ -615,20 +615,20 @@ public void testScalarShorthandSubtraction() throws Exception private void testScalarShorthandOperation(int startingValue, String operation, int endingvalue) throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.v;\n" + - " UPDATE " + currentTable + " SET v " + operation + " 1 WHERE k = 1;\n" + + " UPDATE " + qualifiedTableName + " SET v " + operation + " 1 WHERE k = 1;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { startingValue }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + currentTable + " WHERE k = 1;\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, check); }); @@ -637,20 +637,20 @@ private void testScalarShorthandOperation(int startingValue, String operation, i @Test public void testConstantNonStaticRowReadBeforeUpdate() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2);\n" + " SELECT row1.v;\n" + - " UPDATE " + currentTable + " SET v += 1 WHERE k = 1 AND c = 2;\n" + + " UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = 1 AND c = 2;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 3 }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + currentTable + " WHERE k = 1 AND c = 2;\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); }); @@ -659,21 +659,21 @@ public void testConstantNonStaticRowReadBeforeUpdate() throws Exception @Test public void testRangeDeletion() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 3, ?)", ConsistencyLevel.ALL, 4); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 4, ?)", ConsistencyLevel.ALL, 5); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 3, ?)", ConsistencyLevel.ALL, 4); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 4, ?)", ConsistencyLevel.ALL, 5); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2);\n" + " SELECT row1.v;\n" + - " DELETE FROM " + currentTable + " WHERE k = 1 AND c >=3 AND c <= 4;\n" + + " DELETE FROM " + qualifiedTableName + " WHERE k = 1 AND c >=3 AND c <= 4;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 3 }, update); - Object[][] check = cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1;", ConsistencyLevel.SERIAL); + Object[][] check = cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1;", ConsistencyLevel.SERIAL); assertArrayEquals(new Object[] { 1, 2, 3 }, check[0]); assertEquals(1, check.length); }); @@ -683,22 +683,22 @@ public void testRangeDeletion() throws Exception @Test public void testPartitionKeyReferenceCondition() throws Exception { - test("CREATE TABLE " + currentTable + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedTableName + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1);\n" + " SELECT row1.k, row1.c, row1.v;\n" + " IF row1.k = 1 THEN\n" + - " UPDATE " + currentTable + " SET v += row1.k WHERE k=1 AND c=1;\n" + + " UPDATE " + qualifiedTableName + " SET v += row1.k WHERE k=1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); }); @@ -707,22 +707,22 @@ public void testPartitionKeyReferenceCondition() throws Exception @Test public void testMultiPartitionKeyReferenceCondition() throws Exception { - test("CREATE TABLE " + currentTable + " (pk1 INT, pk2 INT, c INT, v INT, PRIMARY KEY ((pk1, pk2), c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedTableName + " (pk1 INT, pk2 INT, c INT, v INT, PRIMARY KEY ((pk1, pk2), c)) WITH CLUSTERING ORDER BY (c DESC)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (pk1, pk2, c, v) VALUES (1, 1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (pk1, pk2, c, v) VALUES (1, 1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1);\n" + " SELECT row1.pk1, row1.pk2, row1.c, row1.v;\n" + " IF row1.pk1 = 1 THEN\n" + - " UPDATE " + currentTable + " SET v += row1.pk2 WHERE pk1 = 1 AND pk2 = 1 AND c=1;\n" + + " UPDATE " + qualifiedTableName + " SET v += row1.pk2 WHERE pk1 = 1 AND pk2 = 1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 2}, check); }); @@ -731,13 +731,13 @@ public void testMultiPartitionKeyReferenceCondition() throws Exception @Test public void testMultiCellListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); } @Test public void testFrozenListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); } private void testListEqCondition(String ddl) throws Exception @@ -750,7 +750,7 @@ private void testListEqCondition(String ddl) throws Exception ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialListBytes); assertFalse(result.hasNext()); @@ -759,16 +759,16 @@ private void testListEqCondition(String ddl) throws Exception ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = ? THEN\n" + - " UPDATE " + currentTable + " SET int_list = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_list = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialList}, update, 0, initialListBytes, updatedListBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedList}, check, 0); } @@ -778,13 +778,13 @@ private void testListEqCondition(String ddl) throws Exception @Test public void testMultiCellSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); } @Test public void testFrozenSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); } private void testSetEqCondition(String ddl) throws Exception @@ -797,7 +797,7 @@ private void testSetEqCondition(String ddl) throws Exception ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialSetBytes); assertFalse(result.hasNext()); @@ -806,16 +806,16 @@ private void testSetEqCondition(String ddl) throws Exception ByteBuffer updatedSetBytes = setType.getSerializer().serialize(updatedSet); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = ? THEN\n" + - " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialSet}, update, 0, initialSetBytes, updatedSetBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedSet}, check, 0); } @@ -825,13 +825,13 @@ private void testSetEqCondition(String ddl) throws Exception @Test public void testMultiCellMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); } @Test public void testFrozenMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); } private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exception @@ -844,7 +844,7 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialMapBytes); assertFalse(result.hasNext()); @@ -853,16 +853,16 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio ByteBuffer updatedMapBytes = mapType.getSerializer().serialize(updatedMap); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map = ? THEN\n" + - " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, initialMapBytes, updatedMapBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, check, 0); } @@ -872,13 +872,13 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio @Test public void testMultiCellUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testFrozenUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testUDTEqCondition(String tableDDL) throws Exception @@ -890,7 +890,7 @@ private void testUDTEqCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); assertFalse(result.hasNext()); @@ -899,16 +899,16 @@ private void testUDTEqCondition(String tableDDL) throws Exception ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer = ? THEN\n" + - " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, initialPersonBuffer, updatedPersonBuffer, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, check, 0); } @@ -918,14 +918,14 @@ private void testUDTEqCondition(String tableDDL) throws Exception @Test public void testTupleEqCondition() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, pair tuple)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple)", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, pair) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialTupleBuffer); assertFalse(result.hasNext()); @@ -934,16 +934,16 @@ public void testTupleEqCondition() throws Exception ByteBuffer updatedTupleBuffer = CQLTester.makeByteBuffer(updatedTupleValue, null); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.pair;\n" + " IF row1.pair = ? THEN\n" + - " UPDATE " + currentTable + " SET pair = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET pair = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, update, 0, initialTupleBuffer, updatedTupleBuffer, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedTupleBuffer }, check, 0); } @@ -953,31 +953,31 @@ public void testTupleEqCondition() throws Exception @Test public void testIsNullWithComplexDeletion() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c))", cluster -> { ListType listType = ListType.getInstance(Int32Type.instance, true); List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 0, ?);", ConsistencyLevel.ALL, initialListBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 0, ?);", ConsistencyLevel.ALL, initialListBytes); cluster.forEach(i -> i.flush(KEYSPACE)); - cluster.coordinator(1).execute("DELETE int_list FROM " + currentTable + " WHERE k = 0 AND c = 0;", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("DELETE int_list FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0;", ConsistencyLevel.ALL); List updatedList = Arrays.asList(1, 2, 3); ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, int_list) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, 0, 0, updatedListBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, updatedList }, check, 0, 0); } @@ -987,13 +987,13 @@ public void testIsNullWithComplexDeletion() throws Exception @Test public void testNullMultiCellListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); } @Test public void testNullFrozenListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); } private void testNullListConditions(String ddl) throws Exception @@ -1001,31 +1001,31 @@ private void testNullListConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, null);", ConsistencyLevel.ALL); ListType listType = ListType.getInstance(Int32Type.instance, true); List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialListBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialList}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET int_list = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_list = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1039,13 +1039,13 @@ private void testNullListConditions(String ddl) throws Exception @Test public void testNullMultiCellSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); } @Test public void testNullFrozenSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); } private void testNullSetConditions(String ddl) throws Exception @@ -1053,31 +1053,31 @@ private void testNullSetConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, null);", ConsistencyLevel.ALL); SetType setType = SetType.getInstance(Int32Type.instance, true); Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1091,13 +1091,13 @@ private void testNullSetConditions(String ddl) throws Exception @Test public void testNullMultiCellMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); } @Test public void testNullFrozenMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); } private void testNullMapConditions(String ddl, boolean isMultiCell) throws Exception @@ -1105,31 +1105,31 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialMapBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1138,7 +1138,7 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, updatedMapBytes, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); } @@ -1148,13 +1148,13 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep @Test public void testNullMultiCellUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testNullFrozenUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testNullUDTCondition(String tableDDL) throws Exception @@ -1166,24 +1166,24 @@ private void testNullUDTCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1192,7 +1192,7 @@ private void testNullUDTCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -1202,13 +1202,13 @@ private void testNullUDTCondition(String tableDDL) throws Exception @Test public void testNullMultiCellSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); } @Test public void testNullFrozenSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); } private void testNullSetElementConditions(String ddl) throws Exception @@ -1216,31 +1216,31 @@ private void testNullSetElementConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1});", ConsistencyLevel.ALL); SetType setType = SetType.getInstance(Int32Type.instance, true); Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set[2];\n" + " IF row1.int_set[2] IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set[2] IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1254,13 +1254,13 @@ private void testNullSetElementConditions(String ddl) throws Exception @Test public void testNullMultiCellMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); } @Test public void testNullFrozenMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); } private void testNullMapElementConditions(String ddl, boolean isMultiCell) throws Exception @@ -1268,31 +1268,31 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, "one", 0, initialMapBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1301,7 +1301,7 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, "two", updatedMapBytes, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); } @@ -1311,13 +1311,13 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw @Test public void testNullMultiCellUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testNullFrozenUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testNullUDTFieldCondition(String tableDDL) throws Exception @@ -1329,24 +1329,24 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1355,7 +1355,7 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -1365,13 +1365,13 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception @Test public void testMultiCellListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", true); + testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", true); } @Test public void testFrozenListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)", false); + testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)", false); } private void testListSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1383,19 +1383,19 @@ private void testListSubstitution(String ddl, boolean isMultiCell) throws Except List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, ?);", ConsistencyLevel.ALL, initialListBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, ?);", ConsistencyLevel.ALL, initialListBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_list) VALUES (?, row1.int_list);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, row1.int_list);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialList }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialList }, check, 1); } @@ -1405,13 +1405,13 @@ private void testListSubstitution(String ddl, boolean isMultiCell) throws Except @Test public void testMultiCellSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", true); + testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", true); } @Test public void testFrozenSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)", false); + testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)", false); } private void testSetSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1423,19 +1423,19 @@ private void testSetSubstitution(String ddl, boolean isMultiCell) throws Excepti Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, ?);", ConsistencyLevel.ALL, initialSetBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, ?);", ConsistencyLevel.ALL, initialSetBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_set) VALUES (?, row1.int_set);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, row1.int_set);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialSet }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialSet }, check, 1); } @@ -1445,13 +1445,13 @@ private void testSetSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", true); + testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); } @Test public void testFrozenMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)", false); + testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); } private void testMapSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1463,19 +1463,19 @@ private void testMapSubstitution(String ddl, boolean isMultiCell) throws Excepti Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, ?);", ConsistencyLevel.ALL, initialMapBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, ?);", ConsistencyLevel.ALL, initialMapBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, int_map) VALUES (?, row1.int_map);\n" + + " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, row1.int_map);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialMap }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialMap }, check, 1); } @@ -1485,13 +1485,13 @@ private void testMapSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testFrozenUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testUDTSubstitution(String tableDDL) throws Exception @@ -1501,19 +1501,19 @@ private void testUDTSubstitution(String tableDDL) throws Exception { Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, row1.customer);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, row1.customer);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialPersonBuffer }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialPersonBuffer }, check, 1); } @@ -1523,24 +1523,24 @@ private void testUDTSubstitution(String tableDDL) throws Exception @Test public void testTupleSubstitution() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, pair tuple)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple)", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, pair) VALUES (0, ?);", ConsistencyLevel.ALL, initialTupleBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (0, ?);", ConsistencyLevel.ALL, initialTupleBuffer); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.pair;\n" + " IF row1.pair IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, pair) VALUES (?, row1.pair);\n" + + " INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (?, row1.pair);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialTupleBuffer }, check, 1); } @@ -1550,13 +1550,13 @@ public void testTupleSubstitution() throws Exception @Test public void testMultiCellListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); } @Test public void testFrozenListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); } private void testListReplacement(String ddl) throws Exception @@ -1564,20 +1564,20 @@ private void testListReplacement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + currentTable + " SET int_list = row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list = row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(3, 4)}, check); } @@ -1587,13 +1587,13 @@ private void testListReplacement(String ddl) throws Exception @Test public void testMultiCellSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); } @Test public void testFrozenSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); } private void testSetReplacement(String ddl) throws Exception @@ -1601,20 +1601,20 @@ private void testSetReplacement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = {3, 4} THEN\n" + - " UPDATE " + currentTable + " SET int_set = row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_set = row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(3, 4) }, check); } @@ -1624,23 +1624,23 @@ private void testSetReplacement(String ddl) throws Exception @Test public void testListAppendFromReference() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + currentTable + " SET int_list += row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list += row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2, 3, 4)}, check); } @@ -1650,13 +1650,13 @@ public void testListAppendFromReference() throws Exception @Test public void testSetByIndexFromMultiCellListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, src_int_list list, dest_int_list list)"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list list, dest_int_list list)"); } @Test public void testSetByIndexFromFrozenListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list)"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list)"); } private void testListSetByIndexFromListElement(String ddl) throws Exception @@ -1664,18 +1664,18 @@ private void testListSetByIndexFromListElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, dest_int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, src_int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, dest_int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, src_int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.src_int_list;\n" + - " UPDATE " + currentTable + " SET dest_int_list[0] = row1.src_int_list[0] WHERE k = 0;\n" + + " UPDATE " + qualifiedTableName + " SET dest_int_list[0] = row1.src_int_list[0] WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT dest_int_list FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT dest_int_list FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2)}, check); } @@ -1685,20 +1685,20 @@ private void testListSetByIndexFromListElement(String ddl) throws Exception @Test public void testListSetByIndexFromScalar() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0);\n" + " SELECT row0.int_list;\n" + - " UPDATE " + currentTable + " SET int_list[0] = 2 WHERE k = 0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list[0] = 2 WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(1, 2)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT int_list FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT int_list FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(2, 2)}, check); } @@ -1708,21 +1708,21 @@ public void testListSetByIndexFromScalar() throws Exception @Test public void testAutoReadSelectionConstruction() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c))", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, other_counter) VALUES (0, 1, 1, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, other_counter) VALUES (0, 1, 1, 1);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row0.counter, row0.other_counter;\n" + - " UPDATE " + currentTable + " SET other_counter += 1, counter += row0.counter WHERE k = 0 AND c = 1;\n" + + " UPDATE " + qualifiedTableName + " SET other_counter += 1, counter += row0.counter WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 1, 1 }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT counter, other_counter FROM " + currentTable + " WHERE k = 0 AND c = 1;\n" + + " SELECT counter, other_counter FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2, 2 }, check); } @@ -1732,21 +1732,21 @@ public void testAutoReadSelectionConstruction() throws Exception @Test public void testMultiMutationsSameKey() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c))", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row0.counter, row0.int_list;\n" + - " UPDATE " + currentTable + " SET int_list[0] = 42 WHERE k = 0 AND c = 0;\n" + - " UPDATE " + currentTable + " SET counter += 1 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list[0] = 42 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedTableName + " SET counter += 1 WHERE k = 0 AND c = 0;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 0, Arrays.asList(1, 2) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT counter, int_list FROM " + currentTable + " WHERE k = 0 AND c = 0;\n" + + " SELECT counter, int_list FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, Arrays.asList(42, 2)}, check); } @@ -1757,10 +1757,10 @@ public void testMultiMutationsSameKey() throws Exception public void testLetLargerThanOneWithPK() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0 LIMIT 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0 LIMIT 2);\n" + " SELECT row1.v;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ 0 }, cql, 1); @@ -1771,10 +1771,10 @@ public void testLetLargerThanOneWithPK() throws Exception public void testLetLimitUsingBind() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT ?);\n" + " SELECT row1.v;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, cql, 1); @@ -1784,24 +1784,24 @@ public void testLetLimitUsingBind() throws Exception @Test public void testListSetByIndexMultiRow() throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c))", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, int_list) VALUES (0, 1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1);\n" + " SELECT row0.int_list;\n" + - " UPDATE " + currentTable + " SET int_list[0] = row1.int_list[0] WHERE k = 0 AND c = 0;\n" + - " UPDATE " + currentTable + " SET int_list[0] = row0.int_list[0] WHERE k = 0 AND c = 1;\n" + + " UPDATE " + qualifiedTableName + " SET int_list[0] = row1.int_list[0] WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list[0] = row0.int_list[0] WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { Arrays.asList(1, 2) }, update); String check = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1);\n" + " SELECT row0.int_list, row1.int_list;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2), Arrays.asList(1, 4)}, check); @@ -1812,21 +1812,21 @@ public void testListSetByIndexMultiRow() throws Exception @Test public void testSetAppend() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + - " UPDATE " + currentTable + " SET int_set += row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_set += row1.int_set WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2, 3, 4) }, check); } @@ -1836,13 +1836,13 @@ public void testSetAppend() throws Exception @Test public void testAssignmentFromMultiCellSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_set set)"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set set)"); } @Test public void testAssignmentFromFrozenSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_set frozen>)"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set frozen>)"); } private void testAssignmentFromSetElement(String ddl) throws Exception @@ -1850,18 +1850,18 @@ private void testAssignmentFromSetElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_set) VALUES (0, 0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_set) VALUES (1, 0, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_set) VALUES (0, 0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_set) VALUES (1, 0, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + - " UPDATE " + currentTable + " SET v = row1.int_set[4] WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET v = row1.int_set[4] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); } @@ -1871,21 +1871,21 @@ private void testAssignmentFromSetElement(String ddl) throws Exception @Test public void testMapAppend() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + currentTable + " SET int_map += row1.int_map WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_map += row1.int_map WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2, "three", 4) }, check); } @@ -1895,13 +1895,13 @@ public void testMapAppend() throws Exception @Test public void testAssignmentFromMultiCellMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_map map)"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map map)"); } @Test public void testAssignmentFromFrozenMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, int_map frozen>)"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map frozen>)"); } private void testAssignmentFromMapElement(String ddl) throws Exception @@ -1909,18 +1909,18 @@ private void testAssignmentFromMapElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_map) VALUES (0, 0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, int_map) VALUES (1, 0, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_map) VALUES (0, 0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_map) VALUES (1, 0, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + currentTable + " SET v = row1.int_map[?] WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET v = row1.int_map[?] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); } @@ -1930,13 +1930,13 @@ private void testAssignmentFromMapElement(String ddl) throws Exception @Test public void testAssignmentFromMultiCellUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, customer person)"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer person)"); } @Test public void testAssignmentFromFrozenUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, v int, customer frozen)"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer frozen)"); } private void testAssignmentFromUDTField(String tableDDL) throws Exception @@ -1946,18 +1946,18 @@ private void testAssignmentFromUDTField(String tableDDL) throws Exception { Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, customer) VALUES (0, 0, null);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, v, customer) VALUES (1, 0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, customer) VALUES (0, 0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, customer) VALUES (1, 0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.customer;\n" + - " UPDATE " + currentTable + " SET v = row1.customer.age WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET v = row1.customer.age WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, check); } @@ -1967,21 +1967,21 @@ private void testAssignmentFromUDTField(String tableDDL) throws Exception @Test public void testSetMapElementFromMapElementReference() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + currentTable + " SET int_map[?] = row1.int_map[?] WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_map[?] = row1.int_map[?] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "one", "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT int_map[?] FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT int_map[?] FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check, "one"); } @@ -1991,7 +1991,7 @@ public void testSetMapElementFromMapElementReference() throws Exception @Test public void testSetUDTFieldFromUDTFieldReference() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)", cluster -> { Object youngPerson = CQLTester.userType("height", 58, "age", 9); @@ -1999,18 +1999,18 @@ public void testSetUDTFieldFromUDTFieldReference() throws Exception Object adultPerson = CQLTester.userType("height", 74, "age", 37); ByteBuffer adultPersonBuffer = CQLTester.makeByteBuffer(adultPerson, null); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, youngPersonBuffer); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, customer) VALUES (1, ?);", ConsistencyLevel.ALL, adultPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, youngPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (1, ?);", ConsistencyLevel.ALL, adultPersonBuffer); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.customer;\n" + - " UPDATE " + currentTable + " SET customer.age = row1.customer.age WHERE k = 0;\n" + + " UPDATE " + qualifiedTableName + " SET customer.age = row1.customer.age WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { adultPersonBuffer }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT customer.height, customer.age FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT customer.height, customer.age FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 58, 37 }, check); } @@ -2020,13 +2020,13 @@ public void testSetUDTFieldFromUDTFieldReference() throws Exception @Test public void testMultiCellListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); } @Test public void testFrozenListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); } private void testListElementCondition(String ddl) throws Exception @@ -2034,20 +2034,20 @@ private void testListElementCondition(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list[1] = 4 THEN\n" + - " UPDATE " + currentTable + " SET int_list = [3, 4] WHERE k = 0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list = [3, 4] WHERE k = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableList.of(3, 4) }, check); } @@ -2057,13 +2057,13 @@ private void testListElementCondition(String ddl) throws Exception @Test public void testMultiCellMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)"); + testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)"); } @Test public void testFrozenMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)"); + testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)"); } private void testMapElementCondition(String ddl) throws Exception @@ -2071,20 +2071,20 @@ private void testMapElementCondition(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] = 4 THEN\n" + - " UPDATE " + currentTable + " SET int_map = {'three': 4} WHERE k = 0;\n" + + " UPDATE " + qualifiedTableName + " SET int_map = {'three': 4} WHERE k = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("three", 4) }, check); } @@ -2094,13 +2094,13 @@ private void testMapElementCondition(String ddl) throws Exception @Test public void testMultiCellUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testFrozenUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testUDTFieldCondition(String tableDDL) throws Exception @@ -2112,21 +2112,21 @@ private void testUDTFieldCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); assertFalse(result.hasNext()); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age = 37 THEN\n" + - " UPDATE " + currentTable + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -2135,7 +2135,7 @@ private void testUDTFieldCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -2145,23 +2145,23 @@ private void testUDTFieldCondition(String tableDDL) throws Exception @Test public void testListSubtraction() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + currentTable + " SET int_list -= row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_list -= row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2)}, check); } @@ -2171,23 +2171,23 @@ public void testListSubtraction() throws Exception @Test public void testSetSubtraction() throws Exception { - test("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = {3, 4} THEN\n" + - " UPDATE " + currentTable + " SET int_set -= row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_set -= row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2) }, check); } @@ -2197,13 +2197,13 @@ public void testSetSubtraction() throws Exception @Test public void testMultiCellMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map, int_set set)"); + testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set set)"); } @Test public void testFrozenMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map, int_set frozen>)"); + testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set frozen>)"); } private void testMapSubtraction(String ddl) throws Exception @@ -2211,20 +2211,20 @@ private void testMapSubtraction(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (0, { 'one': 2, 'three': 4 });", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, { 'three' });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, { 'one': 2, 'three': 4 });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, { 'three' });", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = { 'three' } THEN\n" + - " UPDATE " + currentTable + " SET int_map -= row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedTableName + " SET int_map -= row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of("three") }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2), null}, check); } @@ -2234,13 +2234,13 @@ private void testMapSubtraction(String ddl) throws Exception @Test public void testMultiCellListSelection() throws Exception { - testListSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list list)"); + testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); } @Test public void testFrozenListSelection() throws Exception { - testListSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_list frozen>)"); + testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); } private void testListSelection(String ddl) throws Exception @@ -2248,16 +2248,16 @@ private void testListSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_list) VALUES (1, [10, 20, 30, 40]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [10, 20, 30, 40]);", ConsistencyLevel.ALL); String selectEntireSet = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(10, 20, 30, 40) }, selectEntireSet); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_list[0];\n" + "COMMIT TRANSACTION"; @@ -2272,13 +2272,13 @@ private void testListSelection(String ddl) throws Exception @Test public void testMultiCellSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set set)"); + testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); } @Test public void testFrozenSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_set frozen>)"); + testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); } private void testSetSelection(String ddl) throws Exception @@ -2286,16 +2286,16 @@ private void testSetSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_set) VALUES (1, {10, 20, 30, 40});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {10, 20, 30, 40});", ConsistencyLevel.ALL); String selectEntireSet = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(10, 20, 30, 40) }, selectEntireSet); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_set[10];\n" + "COMMIT TRANSACTION"; @@ -2310,13 +2310,13 @@ private void testSetSelection(String ddl) throws Exception @Test public void testMultiCellMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map map)"); + testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)"); } @Test public void testFrozenMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, int_map frozen>)"); + testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)"); } private void testMapSelection(String ddl) throws Exception @@ -2324,16 +2324,16 @@ private void testMapSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, int_map) VALUES (1, { 'ten': 20, 'thirty': 40 });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, { 'ten': 20, 'thirty': 40 });", ConsistencyLevel.ALL); String selectEntireMap = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("ten", 20, "thirty", 40) }, selectEntireMap); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + " SELECT row1.int_map['ten'];\n" + "COMMIT TRANSACTION"; @@ -2349,25 +2349,25 @@ public void testScalarUpdateSubstitution() { String KEYSPACE = "ks" + System.currentTimeMillis(); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "1 (k int, c int, v int, primary key (k, c))"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + "2 (k int, c int, v int, primary key (k, c))"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "1 (k int, c int, v int, primary key (k, c))"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "2 (k int, c int, v int, primary key (k, c))"); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + "1 WHERE k=1 AND c=2);\n" + - " LET row2 = (SELECT * FROM " + currentTable + "2 WHERE k=2 AND c=2);\n" + - " SELECT v FROM " + currentTable + "1 WHERE k=1 AND c=2;\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2);\n" + + " LET row2 = (SELECT * FROM " + qualifiedTableName + "2 WHERE k=2 AND c=2);\n" + + " SELECT v FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2;\n" + " IF row1.v = 3 AND row2.v = 4 THEN\n" + - " UPDATE " + currentTable + "1 SET v = row2.v WHERE k=1 AND c=2;\n" + + " UPDATE " + qualifiedTableName + "1 SET v = row2.v WHERE k=1 AND c=2;\n" + " END IF\n" + "COMMIT TRANSACTION"; Object[][] result = SHARED_CLUSTER.coordinator(1).execute(query, ConsistencyLevel.ANY); assertEquals(3, result[0][0]); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + "1 WHERE k=1 AND c=2;\n" + + " SELECT * FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(SHARED_CLUSTER, new Object[]{1, 2, 4}, check); } @@ -2375,13 +2375,13 @@ public void testScalarUpdateSubstitution() @Test public void testRegularScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))"); } @Test public void testStaticScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + currentTable + " (k int, c int, v int static, PRIMARY KEY (k, c))"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, PRIMARY KEY (k, c))"); } private void testScalarInsertSubstitution(String tableDDL) throws Exception @@ -2389,19 +2389,19 @@ private void testScalarInsertSubstitution(String tableDDL) throws Exception test(tableDDL, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);", ConsistencyLevel.ALL); String insert = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.v;\n" + " IF row0.v IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 1, row0.v);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 1, row0.v);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, insert); String check = "BEGIN TRANSACTION\n" + - " SELECT k, c, v FROM " + currentTable + " WHERE k = 0 AND c = 1;\n" + + " SELECT k, c, v FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 1, 1 }, check); } @@ -2411,13 +2411,13 @@ private void testScalarInsertSubstitution(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testSelectFrozenUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testSelectUDTReference(String tableDDL) throws Exception @@ -2429,13 +2429,13 @@ private void testSelectUDTReference(String tableDDL) throws Exception ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); assertFalse(result.hasNext()); String read = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row0.customer;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { personBuffer }, read, 0); @@ -2446,13 +2446,13 @@ private void testSelectUDTReference(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer person)"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); } @Test public void testSelectFrozenUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + currentTable + " (k int PRIMARY KEY, customer frozen)"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); } private void testSelectUDTFieldReference(String tableDDL) throws Exception @@ -2464,13 +2464,13 @@ private void testSelectUDTFieldReference(String tableDDL) throws Exception ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + currentTable + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); assertFalse(result.hasNext()); String read = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + currentTable + " WHERE k = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + " SELECT row0.customer.age;\n" + "COMMIT TRANSACTION"; result = assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, read, 0); @@ -2483,33 +2483,33 @@ private void testSelectUDTFieldReference(String tableDDL) throws Exception @Test public void testMultiKeyQueryAndInsert() throws Throwable { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", cluster -> { String query1 = "BEGIN TRANSACTION\n" + - " LET select1 = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + - " LET select2 = (SELECT * FROM " + currentTable + " WHERE k=1 AND c=0);\n" + - " SELECT v FROM " + currentTable + " WHERE k=0 AND c=0;\n" + + " LET select1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + qualifiedTableName + " WHERE k=1 AND c=0);\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + " IF select1 IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0);\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 0);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 0);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, query1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 0}, check, 0, 0); assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, 0, 0}, check, 1, 0); String query2 = "BEGIN TRANSACTION\n" + - " LET select1 = (SELECT * FROM " + currentTable + " WHERE k=1 AND c=0);\n" + - " LET select2 = (SELECT * FROM " + currentTable + " WHERE k=2 AND c=0);\n" + - " SELECT v FROM " + currentTable + " WHERE k=1 AND c=0;\n" + + " LET select1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=1 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + qualifiedTableName + " WHERE k=2 AND c=0);\n" + + " SELECT v FROM " + qualifiedTableName + " WHERE k=1 AND c=0;\n" + " IF select1.v = ? THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (1, 0, 1);\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (2, 0, 1);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 1);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (2, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, query2, 0); @@ -2525,11 +2525,12 @@ public void demoTest() throws Throwable { SHARED_CLUSTER.schemaChange("DROP KEYSPACE IF EXISTS demo_ks;"); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE demo_ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2};"); - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("demo_ks")); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_docs ( org_name text, doc_id int, contents_version int static, title text, permissions int, PRIMARY KEY (org_name, doc_id) );"); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) );"); SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) );"); + SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("demo_ks")); + SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO demo_ks.org_users (org_name, user, members_version, permissions) VALUES ('demo', 'blake', 5, 777);\n", ConsistencyLevel.ALL); @@ -2571,12 +2572,12 @@ public void demoTest() throws Throwable public void testReferenceArithmeticInInsert() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET a = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET a = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + " IF a IS NOT NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 1, a.v + 1);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 1, a.v + 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, cql); @@ -2589,12 +2590,12 @@ public void testReferenceArithmeticInInsert() throws Exception public void testReferenceArithmeticInUpdate() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET a = (SELECT * FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET a = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + " IF a IS NOT NULL THEN\n" + - " UPDATE " + currentTable + " SET v = a.v + 1 WHERE k = 0 and c = 1;\n" + + " UPDATE " + qualifiedTableName + " SET v = a.v + 1 WHERE k = 0 and c = 1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, cql); @@ -2606,39 +2607,39 @@ public void testReferenceArithmeticInUpdate() throws Exception @Test public void testCASAndSerialRead() throws Exception { - test("CREATE TABLE " + currentTable + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c));", + test("CREATE TABLE " + qualifiedTableName + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c));", cluster -> { ICoordinator coordinator = cluster.coordinator(1); int startingAccordCoordinateCount = getAccordCoordinateCount(); - assertRowEquals(cluster, new Object[]{false}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF EXISTS"); - assertRowEquals(cluster, new Object[]{false}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - coordinator.execute("INSERT INTO " + currentTable + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); - assertRowEquals(cluster, new Object[]{ false, 4 }, "UPDATE " + currentTable + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{ false, 4 }, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); // Test working with a static column - assertRowEquals(cluster, new Object[]{ false, 5 }, "UPDATE " + currentTable + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 4"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 5"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET s = 6 WHERE id = 1 IF s = 5"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); + assertRowEquals(cluster, new Object[]{ false, 5 }, "UPDATE " + qualifiedTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 4"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET s = 6 WHERE id = 1 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); // Test that read before write works with CAS - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + currentTable + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1 AND c = 2", 1, 2, 6, 7); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 6, 7); // Check range deletion works - coordinator.execute("INSERT INTO " + currentTable + " (id, c, v, s) VALUES (1, 2, 6, 7);", ConsistencyLevel.ALL); - coordinator.execute("INSERT INTO " + currentTable + " (id, c, v) VALUES (1, 3, 3);", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v, s) VALUES (1, 2, 6, 7);", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v) VALUES (1, 3, 3);", ConsistencyLevel.ALL); assertRowEquals(cluster, new Object[]{true}, "BEGIN BATCH \n" + - "UPDATE " + currentTable + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS; \n" + - "DELETE FROM " + currentTable + " WHERE id = 1 AND c > 0 AND c < 10; \n" + + "UPDATE " + qualifiedTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS; \n" + + "DELETE FROM " + qualifiedTableName + " WHERE id = 1 AND c > 0 AND c < 10; \n" + "APPLY BATCH;"); - Object[][] rangeDeletionCheck = coordinator.execute("SELECT id, c, v, s FROM " + currentTable + " WHERE id = 1", ConsistencyLevel.SERIAL); + Object[][] rangeDeletionCheck = coordinator.execute("SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1", ConsistencyLevel.SERIAL); assertArrayEquals(new Object[] { 1, 2, 7, 8 }, rangeDeletionCheck[0]); assertEquals(1, rangeDeletionCheck.length); @@ -2655,10 +2656,10 @@ public void testCASAndSerialRead() throws Exception @Test public void testCASSimulatorLite() throws Exception { - test("CREATE TABLE " + currentTable + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", cluster -> { ICoordinator coordinator = cluster.coordinator(1); - coordinator.execute("INSERT INTO " + currentTable + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); ExecutorService es = Executors.newCachedThreadPool(); @@ -2666,12 +2667,12 @@ public void testCASSimulatorLite() throws Exception for (int ii = 0; ii < 10; ii++) { int id = ii; - futures.add(es.submit(() -> coordinator.execute("UPDATE " + currentTable + " SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS", ConsistencyLevel.ALL, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id))), 1))); + futures.add(es.submit(() -> coordinator.execute("UPDATE " + qualifiedTableName + " SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS", ConsistencyLevel.ALL, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id))), 1))); } for (Future f : futures) f.get(); - Object[][] result = coordinator.execute("SELECT pk, count, seq1, seq2 FROM " + currentTable + " WHERE pk = 1", ConsistencyLevel.SERIAL); + Object[][] result = coordinator.execute("SELECT pk, count, seq1, seq2 FROM " + qualifiedTableName + " WHERE pk = 1", ConsistencyLevel.SERIAL); int[] seq1 = Arrays.stream(((String) result[0][2]).split(",")) .filter(s -> !s.isEmpty()) @@ -2687,11 +2688,11 @@ public void testCASSimulatorLite() throws Exception @Test public void testTransactionCasSimulatorLite() throws Exception { - test("CREATE TABLE " + currentTable + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", cluster -> { ICoordinator coordinator = cluster.coordinator(1); - coordinator.execute("INSERT INTO " + currentTable + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); ExecutorService es = Executors.newCachedThreadPool(); @@ -2700,8 +2701,8 @@ public void testTransactionCasSimulatorLite() throws Exception { int id = ii; String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE pk = 1);\n" + - " UPDATE " + currentTable + " SET count += 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk=1;\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE pk = 1);\n" + + " UPDATE " + qualifiedTableName + " SET count += 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk=1;\n" + "COMMIT TRANSACTION"; futures.add(es.submit(() -> coordinator.executeWithResult(update, ConsistencyLevel.ANY, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id)))))); } @@ -2709,7 +2710,7 @@ public void testTransactionCasSimulatorLite() throws Exception f.get(); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE pk = 1;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE pk = 1;\n" + "COMMIT TRANSACTION"; Object[][] result = coordinator.execute(check, ConsistencyLevel.ALL); @@ -2728,15 +2729,15 @@ public void testTransactionCasSimulatorLite() throws Exception @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY(k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c))", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) - coordinator.execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); } ); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java index 7315df858a49..e269e4e27a21 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -59,10 +59,10 @@ public void testRecovery() throws Exception IMessageFilters.Filter lostCommit = cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).to(2).drop(); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; // row1.v shouldn't have existed when the txn's SELECT was executed @@ -73,24 +73,24 @@ public void testRecovery() throws Exception // Querying again should trigger recovery... query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1.v = 1 THEN\n" + - " UPDATE " + currentTable + " SET v=2 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedTableName + " SET v=2 WHERE k = 0 AND c = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, query); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 2}, check, 0, 0); query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + currentTable + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 3);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 3);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, query); @@ -113,16 +113,16 @@ public void testLostCommitReadTriggersFallbackRead() throws Exception })).drop(); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + currentTable + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 1);\n" + + " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, query); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + currentTable + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check, 0, 0); }); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java index 74c64c3b138e..320a9f4e09ba 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java @@ -28,7 +28,6 @@ import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.shared.AssertUtils; -import org.apache.cassandra.service.accord.AccordService; public class AccordInteroperabilityTest extends AccordTestBase { @@ -45,21 +44,20 @@ public static void setupClass() throws IOException { AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord") .set("non_serial_write_strategy", "accord")), 3); - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY(k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c))", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) - coordinator.execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); - assertRowSerial(cluster, "SELECT c, v FROM " + currentTable + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); } ); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 496e8d08e4ca..028bcfbf32f7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -69,16 +69,16 @@ public static void setupClass() throws IOException String writeCql() { return "BEGIN TRANSACTION\n" + - " LET val = (SELECT v FROM " + currentTable + " WHERE k=? AND c=?);\n" + + " LET val = (SELECT v FROM " + qualifiedTableName + " WHERE k=? AND c=?);\n" + " SELECT val.v;\n" + - " UPDATE " + currentTable + " SET v = v + 1 WHERE k=? AND c=?;\n" + + " UPDATE " + qualifiedTableName + " SET v = v + 1 WHERE k=? AND c=?;\n" + "COMMIT TRANSACTION"; } String readCql() { return "BEGIN TRANSACTION\n" + - " LET val = (SELECT v FROM " + currentTable + " WHERE k=? AND c=?);\n" + + " LET val = (SELECT v FROM " + qualifiedTableName + " WHERE k=? AND c=?);\n" + " SELECT val.v;\n" + "COMMIT TRANSACTION"; } @@ -89,8 +89,8 @@ String readCql() public void beforeTest() { SHARED_CLUSTER.filters().reset(); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + currentTable + " (k int, c int, v int, PRIMARY KEY (k, c))"); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))"); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); } @Test diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index 69c550f633d1..d11d364181ce 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -61,7 +61,6 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; @@ -161,7 +160,6 @@ public static void setupClass() throws IOException upperMidToken = partitioner.midpoint(midToken, maxToken); lowerMidToken = partitioner.midpoint(minToken, midToken); coordinator = SHARED_CLUSTER.coordinator(1); - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @AfterClass @@ -343,13 +341,13 @@ private static void assertTargetPaxosWrite(Consumer query, int coordina @Test public void testPaxosToAccordCAS() throws Exception { - test(format(TABLE_FMT, currentTable), + test(format(TABLE_FMT, qualifiedTableName), cluster -> { - String casCQL = format(CAS_FMT, currentTable, CLUSTERING_VALUE); + String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); Consumer runCasApplies = key -> assertRowEquals(cluster, new Object[]{true}, casCQL, key); Consumer runCasOnSecondNode = key -> assertEquals( "[applied]", cluster.coordinator(2).executeWithResult(casCQL, ANY, key).names().get(0)); - String tableName = currentTable.split("\\.")[1]; + String tableName = qualifiedTableName.split("\\.")[1]; int migratingKey = getKeyBetweenTokens(midToken, maxToken); int notMigratingKey = getKeyBetweenTokens(minToken, midToken); Range migratingRange = new Range(midToken, maxToken); @@ -420,7 +418,7 @@ public void testPaxosToAccordCAS() throws Exception // Update inserted row so the condition can apply, if the condition check doesn't apply // then it won't get to propose/accept migratingKey = testingKeys.next(); - Consumer makeCASApply = key -> cluster.coordinator(1).execute("UPDATE " + currentTable + " SET v = 42 WHERE id = ? AND c = ?", ALL, key, CLUSTERING_VALUE); + Consumer makeCASApply = key -> cluster.coordinator(1).execute("UPDATE " + qualifiedTableName + " SET v = 42 WHERE id = ? AND c = ?", ALL, key, CLUSTERING_VALUE); makeCASApply.accept(migratingKey); assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 1, 1, 0, 1); @@ -469,10 +467,10 @@ public void testPaxosToAccordCAS() throws Exception @Test public void testPaxosToAccordSerialRead() throws Exception { - test(format(TABLE_FMT, currentTable), + test(format(TABLE_FMT, qualifiedTableName), cluster -> { - String tableName = currentTable.split("\\.")[1]; - String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", currentTable, CLUSTERING_VALUE); + String tableName = qualifiedTableName.split("\\.")[1]; + String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", qualifiedTableName, CLUSTERING_VALUE); Function runRead = key -> cluster.coordinator(1).execute(readCQL, SERIAL, key); Range migratingRange = new Range<>(new LongToken(Long.MIN_VALUE + 1), new LongToken(Long.MIN_VALUE)); List> migratingRanges = ImmutableList.of(migratingRange); @@ -495,11 +493,11 @@ public void testPaxosToAccordSerialRead() throws Exception @Test public void testAccordToPaxos() throws Exception { - test(format(TABLE_FMT, currentTable), + test(format(TABLE_FMT, qualifiedTableName), cluster -> { - String casCQL = format(CAS_FMT, currentTable, CLUSTERING_VALUE); + String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); - String tableName = currentTable.split("\\.")[1]; + String tableName = qualifiedTableName.split("\\.")[1]; // Mark a subrange as migrating and finish migrating half of it nodetool(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, tableName); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java new file mode 100644 index 000000000000..19d562a21cdb --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.net.UnknownHostException; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Set; + +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.FBUtilities; +import org.junit.Assert; +import org.junit.Test; + +import accord.local.Node; +import accord.topology.Topology; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.gms.FailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.service.accord.AccordConfigurationService; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AccordSimpleFastPathTest extends TestBaseImpl +{ + private static final Logger logger = LoggerFactory.getLogger(AccordSimpleFastPathTest.class); + + private static Node.Id id(int i) + { + return new Node.Id(i); + } + + private static Set idSet(int... ids) + { + Set result = new HashSet<>(); + for (int id: ids) + result.add(id(id)); + return result; + } + + private static InetAddressAndPort ep(int i) + { + try + { + return InetAddressAndPort.getByName(String.format("127.0.0.%s:7012", i)); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + private static Set epSet(int... eps) + { + Set result = new HashSet<>(); + for (int ep: eps) + result.add(ep(ep)); + return result; + } + + @Test + public void downNodesRemovedFromFastPath() throws Throwable + { + try (Cluster cluster = init(Cluster.build(3) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "true")) + .start())) + { + cluster.schemaChange("CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor': 3}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key (k, c))"); + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); + + InetAddressAndPort node1Addr = InetAddressAndPort.getByAddress(cluster.get(1).broadcastAddress()); + InetAddressAndPort node2Addr = InetAddressAndPort.getByAddress(cluster.get(2).broadcastAddress()); + InetAddressAndPort node3Addr = InetAddressAndPort.getByAddress(cluster.get(3).broadcastAddress()); + int node3Id = cluster.get(3).callOnInstance(() -> ClusterMetadata.current().directory.peerId(FBUtilities.getBroadcastAddressAndPort()).id()); + long preShutDownEpoch = cluster.stream().map(ii -> ii.callOnInstance(() -> { + ClusterMetadata cm = ClusterMetadata.current(); + AccordFastPath accordFastPath = cm.accordFastPath; + Assert.assertEquals(idSet(), accordFastPath.unavailableIds()); + + long epoch = cm.epoch.getEpoch(); + AccordConfigurationService configService = ((AccordService) AccordService.instance()).configurationService(); + Topology topology = configService.getTopologyForEpoch(epoch); + Assert.assertFalse(topology.shards().isEmpty()); + topology.shards().forEach(shard -> Assert.assertEquals(idSet(1, 2, 3), shard.fastPathElectorate)); + return cm.epoch.getEpoch(); + })).max(Comparator.naturalOrder()).get(); + + cluster.get(1).runOnInstance(() -> { + FailureDetector.instance.forceConviction(InetAddressAndPort.getByAddress(node3Addr)); + // update is performed in another thread, wait for it to be applied locally before returning + for (int i=0; i<10; i++) + { + if (ClusterMetadata.current().epoch.getEpoch() == preShutDownEpoch) + FBUtilities.sleepQuietly(100); + else + break; + } + assert ClusterMetadata.current().epoch.getEpoch() > preShutDownEpoch; + }); + + cluster.get(1, 2).forEach(ii -> { + logger.info("Checking instance {} -> {}", ii, ii.broadcastAddress()); + ii.runOnInstance(() -> { + ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(preShutDownEpoch + 1)); + ClusterMetadata cm = ClusterMetadata.current(); + AccordFastPath accordFastPath = cm.accordFastPath; + Assert.assertEquals(preShutDownEpoch + 1, cm.epoch.getEpoch()); + Assert.assertEquals(idSet(node3Id), accordFastPath.unavailableIds()); + }); + + } + ); + + // confirm a duplicate conviction doesn't create a new epoch + cluster.get(2).runOnInstance(() -> { + FailureDetector.instance.forceConviction(InetAddressAndPort.getByAddress(node3Addr)); + }); + + cluster.get(1, 2).forEach(ii -> ii.runOnInstance(() -> { + ClusterMetadata cm = ClusterMetadata.current(); + Assert.assertEquals(preShutDownEpoch + 1, cm.epoch.getEpoch()); + })); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 8010a7e58166..9cb218dab30a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -30,6 +30,7 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; +import accord.coordinate.Invalidated; import com.google.common.base.Splitter; import com.google.common.primitives.Ints; import org.junit.After; @@ -85,7 +86,8 @@ public abstract class AccordTestBase extends TestBaseImpl protected static Cluster SHARED_CLUSTER; - protected String currentTable; + protected String tableName; + protected String qualifiedTableName; public static void setupCluster(Function options, int nodes) throws IOException { @@ -102,7 +104,8 @@ public static void teardown() @Before public void setup() { - currentTable = KEYSPACE + ".tbl" + COUNTER.getAndIncrement(); + tableName = "tbl" + COUNTER.getAndIncrement(); + qualifiedTableName = KEYSPACE + '.' + tableName; } @After @@ -129,11 +132,17 @@ protected void test(String tableDDL, FailingConsumer fn) throws Excepti test(Collections.singletonList(tableDDL), fn); } + public static void ensureTableIsAccordManaged(Cluster cluster, String ksname, String tableName) + { + cluster.get(1).runOnInstance(() -> AccordService.instance().ensureTableIsAccordManaged(ksname, tableName)); + } + protected void test(List ddls, FailingConsumer fn) throws Exception { for (String ddl : ddls) SHARED_CLUSTER.schemaChange(ddl); + ensureTableIsAccordManaged(SHARED_CLUSTER, KEYSPACE, tableName); // Evict commands from the cache immediately to expose problems loading from disk. SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); @@ -149,7 +158,7 @@ protected void test(List ddls, FailingConsumer fn) throws Excep protected void test(FailingConsumer fn) throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, v int, primary key (k, c))", fn); + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", fn); } protected static ConsensusMigrationState getMigrationStateSnapshot(IInvokableInstance instance) throws IOException @@ -331,6 +340,12 @@ private static SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, Si return result; } + private static boolean hasRootCause(RuntimeException ex, Class klass) + { + return AssertionUtils.rootCauseIs(klass).matches(ex); + + } + private static SimpleQueryResult executeWithRetry0(int count, Cluster cluster, String check, Object... boundValues) { try @@ -339,7 +354,7 @@ private static SimpleQueryResult executeWithRetry0(int count, Cluster cluster, S } catch (RuntimeException ex) { - if (count <= MAX_RETRIES && (AssertionUtils.rootCauseIs(ReadPreemptedException.class).matches(ex) || AssertionUtils.rootCauseIs(WritePreemptedException.class).matches(ex))) + if (count <= MAX_RETRIES && (hasRootCause(ex, ReadPreemptedException.class) || hasRootCause(ex, WritePreemptedException.class) || hasRootCause(ex, Invalidated.class))) { logger.warn("[Retry attempt={}] Preempted failure for\n{}", count, check); return executeWithRetry0(count + 1, cluster, check, boundValues); diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index de4b6541ed94..381ea8c6be65 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -63,12 +63,13 @@ import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.log.LocalLog; import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.service.accord.AccordFastPath; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.AccordKeyspaces; +import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -149,7 +150,8 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit Directory.EMPTY, new TokenMap(partitioner), DataPlacements.empty(), - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, null, @@ -164,7 +166,8 @@ public static ClusterMetadata minimalForTesting(IPartitioner partitioner) null, null, DataPlacements.empty(), - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, null, null, null, @@ -179,7 +182,8 @@ public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) null, null, DataPlacements.empty(), - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, null, null, null, diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index c73eead7446f..f4739bca6def 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -207,7 +207,7 @@ private static void durable(int event) private static TxnRequest toRequest(int event) { TxnId id = toTxnId(event); - Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min("system"), AccordRoutingKey.SentinelKey.max("system"))); + Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(tableId), AccordRoutingKey.SentinelKey.max(tableId))); Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] {node}, ranges, 3)); Keys keys = Keys.of(toKey(0)); Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys, null), TxnQuery.ALL, new NoopUpdate()); @@ -222,7 +222,7 @@ private static TxnId toTxnId(int event) private static PartitionKey toKey(int a) { - return new PartitionKey(KEYSPACE, tableId, Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(a))); + return new PartitionKey(tableId, Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(a))); } private static final TableId tableId = TableId.fromUUID(new UUID(0, 0)); @@ -233,7 +233,7 @@ private static FullRoute route() return new FullKeyRoute(key, true, new RoutingKey[]{ key }); } - private static final RoutingKey key = new AccordRoutingKey.TokenKey("system", new Murmur3Partitioner.LongToken(42)); + private static final RoutingKey key = new AccordRoutingKey.TokenKey(tableId, new Murmur3Partitioner.LongToken(42)); } public static class NoopUpdate implements Update diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 2d5d1aadb2b9..ccf3d9a2889a 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -291,11 +291,11 @@ public void testDescribe() throws Throwable row(KEYSPACE, "keyspace", KEYSPACE, "CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" + - " AND durable_writes = true;"), + " AND durable_writes = true AND fast_path = 'simple';"), row(KEYSPACE_PER_TEST, "keyspace", KEYSPACE_PER_TEST, "CREATE KEYSPACE " + KEYSPACE_PER_TEST + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" + - " AND durable_writes = true;"), + " AND durable_writes = true AND fast_path = 'simple';"), row("test", "keyspace", "test", keyspaceOutput()), row("test", "table", "has_all_types", allTypesTable()), row("test", "table", "\"Test\"", testTableOutput()), @@ -697,7 +697,8 @@ public void testDescribeTypes() throws Throwable assertRowsNet(executeDescribeNet(KEYSPACE_PER_TEST, "DESCRIBE KEYSPACE " + KEYSPACE_PER_TEST), row(KEYSPACE_PER_TEST, "keyspace", KEYSPACE_PER_TEST, "CREATE KEYSPACE " + KEYSPACE_PER_TEST + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" + - " AND durable_writes = true;"), + " AND durable_writes = true" + + " AND fast_path = 'simple';"), row(KEYSPACE_PER_TEST, "type", type2, "CREATE TYPE " + KEYSPACE_PER_TEST + "." + type2 + " (\n" + " x text,\n" + " y text\n" + @@ -802,7 +803,8 @@ public void testDescribeWithCustomIndex() throws Throwable String expectedKeyspaceStmt = "CREATE KEYSPACE " + KEYSPACE_PER_TEST + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" + - " AND durable_writes = true;"; + " AND durable_writes = true" + + " AND fast_path = 'simple';"; String expectedTableStmt = "CREATE TABLE " + KEYSPACE_PER_TEST + "." + table + " (\n" + " id int PRIMARY KEY,\n" + @@ -1130,6 +1132,7 @@ private static String tableParametersCql() " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + " AND memtable = 'default'\n" + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + " AND default_time_to_live = 0\n" + " AND extensions = {}\n" + " AND gc_grace_seconds = 864000\n" + @@ -1170,7 +1173,7 @@ private static String mvParametersCql() private static String keyspaceOutput() { - return "CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true;"; + return "CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true AND fast_path = 'simple';"; } private void describeError(String cql, String msg) throws Throwable diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index d045584412c6..d0d2295e424f 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.io.Files; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.junit.Assert; import org.junit.Test; @@ -309,6 +310,7 @@ public void testCfmOptionsCQL() .compaction(CompactionParams.lcs(Collections.singletonMap("sstable_size_in_mb", "1"))) .compression(CompressionParams.lz4(1 << 16, 1 << 15)) .crcCheckChance(0.3) + .fastPath(FastPathStrategy.simple()) .defaultTimeToLive(4) .gcGraceSeconds(5) .minIndexInterval(6) @@ -336,6 +338,7 @@ public void testCfmOptionsCQL() " AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor', 'min_compress_ratio': '2.0'}\n" + " AND memtable = 'default'\n" + " AND crc_check_chance = 0.3\n" + + " AND fast_path = 'simple'\n" + " AND default_time_to_live = 4\n" + " AND extensions = {'ext1': 0x76616c31}\n" + " AND gc_grace_seconds = 5\n" + diff --git a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java index 00885d8c1f4f..9739d3ed5970 100644 --- a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java +++ b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java @@ -212,7 +212,8 @@ private RangeStreamer getRangeStreamer() throws UnknownHostException false, 1, movements.left, - movements.right); + movements.right, + true); } private boolean includesWraparound(Collection> toFetch) diff --git a/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java b/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java index ace18db5498c..1d68475cd957 100644 --- a/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java +++ b/test/unit/org/apache/cassandra/dht/PartitionerTestCase.java @@ -37,6 +37,7 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -288,9 +289,9 @@ void testCompareSplitter(Token less, Token more) if (less.equals(more) && less.isMinimum()) ranges = Ranges.EMPTY; else if (less.equals(more)) - ranges = Ranges.of(new TokenRange(new TokenKey("", partitioner.getMinimumToken()), new TokenKey("", less))); + ranges = Ranges.of(new TokenRange(new TokenKey(TABLE_ID1, partitioner.getMinimumToken()), new TokenKey(TABLE_ID1, less))); else - ranges = Ranges.of(new TokenRange(new TokenKey("", less), new TokenKey("", more))); + ranges = Ranges.of(new TokenRange(new TokenKey(TABLE_ID1, less), new TokenKey(TABLE_ID1, more))); AccordSplitter splitter = partitioner.accordSplitter().apply(ranges); BigInteger lv = splitter.valueForToken(less); @@ -303,11 +304,11 @@ else if (less.equals(more)) void testSplitter(Token start, Token end) { - accord.primitives.Range range = new TokenRange(new TokenKey("", start), new TokenKey("", end)); + accord.primitives.Range range = new TokenRange(new TokenKey(TABLE_ID1, start), new TokenKey(TABLE_ID1, end)); AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); if (!start.isMinimum()) - testSplitter(new TokenRange(new TokenKey("", partitioner.getMinimumToken()), new TokenKey("", start))); - testSplitter(new TokenRange(new TokenKey("", start), new TokenKey("", splitter.tokenForValue(splitter.maximumValue())))); + testSplitter(new TokenRange(new TokenKey(TABLE_ID1, partitioner.getMinimumToken()), new TokenKey(TABLE_ID1, start))); + testSplitter(new TokenRange(new TokenKey(TABLE_ID1, start), new TokenKey(TABLE_ID1, splitter.tokenForValue(splitter.maximumValue())))); checkRoundTrip(start, splitter.tokenForValue(splitter.valueForToken(start))); checkRoundTrip(end, splitter.tokenForValue(splitter.valueForToken(end))); } diff --git a/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java b/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java index 20da42dd120b..008c3854e268 100644 --- a/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java +++ b/test/unit/org/apache/cassandra/locator/AssureSufficientLiveNodesTest.java @@ -32,6 +32,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.cassandra.schema.*; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -45,10 +46,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.UnavailableException; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.SchemaTestUtil; -import org.apache.cassandra.schema.Tables; import org.apache.cassandra.service.reads.NeverSpeculativeRetryPolicy; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.utils.FBUtilities; @@ -82,6 +79,7 @@ public class AssureSufficientLiveNodesTest extends CassandraTestBase private static final String DC3 = "datacenter3"; private static final int RACE_TEST_LOOPS = 100; private static final Token tk = new Murmur3Partitioner.LongToken(0); + private static final TableId TABLE_ID = TableId.generate(); @BeforeClass public static void setUpClass() throws Throwable @@ -140,7 +138,7 @@ public void addDatacenterShouldNotCausesUnavailableWithEachQuorumTest() throws T // alter to KeyspaceParams.nts(DC1, 3, DC2, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) + keyspace -> ReplicaPlans.forRead(keyspace, TABLE_ID, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -173,7 +171,7 @@ public void addDatacenterShouldNotCausesUnavailableWithQuorumTest() throws Throw // alter to KeyspaceParams.nts(DC1, 3, DC2, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) + keyspace -> ReplicaPlans.forRead(keyspace, TABLE_ID, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); raceOfReplicationStrategyTest( // init. The # of live endpoints is 3 = 2 + 1 @@ -181,7 +179,7 @@ public void addDatacenterShouldNotCausesUnavailableWithQuorumTest() throws Throw // alter to. (3 + 3) / 2 + 1 > 3 KeyspaceParams.nts(DC1, 2, DC2, 1, DC3, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) + keyspace -> ReplicaPlans.forRead(keyspace, TABLE_ID, tk, null, QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -205,7 +203,7 @@ public void raceOnRemoveDatacenterNotCausesUnavailable() throws Throwable // alter to KeyspaceParams.nts(DC1, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) + keyspace -> ReplicaPlans.forRead(keyspace, TABLE_ID, tk, null, EACH_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } @@ -229,7 +227,7 @@ public void increaseReplicationFactorShouldNotCausesUnavailableTest() throws Thr // alter to KeyspaceParams.nts(DC1, 3), // test - keyspace -> ReplicaPlans.forRead(keyspace, tk, null, LOCAL_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) + keyspace -> ReplicaPlans.forRead(keyspace, TABLE_ID, tk, null, LOCAL_QUORUM, NeverSpeculativeRetryPolicy.INSTANCE, ReadCoordinator.DEFAULT) ); } diff --git a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java index babb7d659be2..63f91b672980 100644 --- a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java @@ -26,6 +26,8 @@ import java.util.Set; import com.google.common.collect.ImmutableMap; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.tcm.ownership.AccordTables; import org.junit.Assert; import org.junit.Test; @@ -39,7 +41,6 @@ import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.ownership.AccordKeyspaces; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.sequences.InProgressSequences; @@ -89,7 +90,8 @@ public static ClusterMetadata metadata(NodeConfiguration... configurations) directory, tokenMap, DataPlacements.EMPTY, - AccordKeyspaces.EMPTY, + AccordTables.EMPTY, + AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, diff --git a/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java b/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java new file mode 100644 index 000000000000..1a2dc1132dad --- /dev/null +++ b/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.schema; + +import java.util.Arrays; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.service.accord.fastpath.ParameterizedFastPathStrategy; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.transformations.AddAccordTable; + +import static java.lang.String.format; + +public class FastPathSchemaTest +{ + private static String KEYSPACE = "ks"; + private static int ksCount = 0; + + @BeforeClass + public static void setupClass() + { + DatabaseDescriptor.daemonInitialization(); + ServerTestUtils.prepareServer(); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(KEYSPACE, KeyspaceParams.simple(1), Tables.of())); + } + + @Before + public void setup() + { + KEYSPACE = format("ks_%s", ksCount++); + } + + + private static void process(String fmt, Object... objects) + { + QueryProcessor.process(format(fmt, objects), ConsistencyLevel.ANY); + } + + @Test + public void keyspaceInheriting() + { + process("CREATE KEYSPACE %s with replication={'class':'SimpleStrategy', 'replication_factor':1} AND fast_path='simple'", KEYSPACE); + KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); + Assert.assertSame(FastPathStrategy.simple(), ksm.params.fastPath); + + process("CREATE TABLE %s.tbl (k int primary key, v int)", KEYSPACE); + TableMetadata tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); + Assert.assertSame(FastPathStrategy.inheritKeyspace(), tbm.params.fastPath); + + Epoch epoch = ClusterMetadata.current().epoch; + AddAccordTable.addTable(tbm.id); + + Assert.assertEquals(epoch.getEpoch() + 1, ClusterMetadata.current().epoch.getEpoch()); + } + + @Test + public void keyspaceModification() + { + process("CREATE KEYSPACE %s with replication={'class':'SimpleStrategy', 'replication_factor':1} AND fast_path='simple'", KEYSPACE); + KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); + Assert.assertSame(FastPathStrategy.simple(), ksm.params.fastPath); + process("ALTER KEYSPACE %s with fast_path={'size':2, 'dcs':'dc1,dc2'}", KEYSPACE); + + ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); + Assert.assertSame(FastPathStrategy.Kind.PARAMETERIZED, ksm.params.fastPath.kind()); + ParameterizedFastPathStrategy strategy = (ParameterizedFastPathStrategy) ksm.params.fastPath; + Assert.assertEquals(2, strategy.size); + Assert.assertEquals(Arrays.asList("dc1", "dc2"), strategy.dcStrings()); + } + + @Test(expected = ConfigurationException.class) + public void keyspaceInheritingFailure() + { + process("CREATE KEYSPACE %s with replication={'class':'SimpleStrategy', 'replication_factor':1} AND fast_path='keyspace'", KEYSPACE); + } + + @Test + public void tableModification() + { + process("CREATE KEYSPACE %s with replication={'class':'SimpleStrategy', 'replication_factor':1} AND fast_path='simple'", KEYSPACE); + KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); + Assert.assertSame(FastPathStrategy.simple(), ksm.params.fastPath); + + process("CREATE TABLE %s.tbl (k int primary key, v int)", KEYSPACE); + TableMetadata tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); + Assert.assertSame(FastPathStrategy.inheritKeyspace(), tbm.params.fastPath); + + AddAccordTable.addTable(tbm.id); + + process("ALTER TABLE %s.tbl WITH fast_path='simple'", KEYSPACE); + tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); + Assert.assertSame(FastPathStrategy.simple(), tbm.params.fastPath); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 5131689845d3..c47f61987d36 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -61,7 +61,9 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordCachingState.Modified; import org.apache.cassandra.service.accord.api.PartitionKey; @@ -108,6 +110,7 @@ public void commandLoadSave() throws Throwable AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1)"); + TableId tableId = Schema.instance.getTableMetadata("ks", "tbl").id; TxnId oldTxnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1); TxnId oldTimestamp = txnId(1, clock.incrementAndGet(), 1); @@ -147,7 +150,7 @@ public void commandLoadSave() throws Throwable Apply apply = Apply.SerializationSupport.create(txnId, - route.slice(Ranges.of(TokenRange.fullRange("ks"))), + route.slice(Ranges.of(TokenRange.fullRange(tableId))), 1L, Apply.Kind.Minimal, depTxn.keys(), diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 516a89345af5..0bd628903ab5 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -81,7 +81,7 @@ public static void beforeClass() throws Throwable private static PartitionKey key(int k) { TableMetadata metadata = Schema.instance.getTableMetadata("ks", "tbl"); - return new PartitionKey(metadata.keyspace, metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(k))); + return new PartitionKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(k))); } /** diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 3cf9da57c179..3a0e0c7cb78e 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -182,7 +182,7 @@ public void initialEpochTest() throws Throwable Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); Assert.assertTrue(executeInternal(format("SELECT * FROM %s.%s WHERE epoch=1", ACCORD_KEYSPACE_NAME, TOPOLOGIES)).isEmpty()); - Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); service.reportTopology(topology1); loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); @@ -204,7 +204,7 @@ public void loadTest() throws Throwable AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); service.start(); - Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); service.reportTopology(topology1); service.acknowledgeEpoch(EpochReady.done(1), true); @@ -212,12 +212,12 @@ public void loadTest() throws Throwable service.receiveRemoteSyncComplete(ID2, 1); service.receiveRemoteSyncComplete(ID3, 1); - Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + Topology topology2 = new Topology(2, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); service.reportTopology(topology2); service.acknowledgeEpoch(EpochReady.done(2), true); service.receiveRemoteSyncComplete(ID1, 2); - Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + Topology topology3 = new Topology(3, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); service.reportTopology(topology3); service.acknowledgeEpoch(EpochReady.done(3), true); @@ -245,14 +245,14 @@ public void truncateTest() service.registerListener(serviceListener); service.start(); - Topology topology1 = new Topology(1, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, ID_SET)); + Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); service.reportTopology(topology1); - Topology topology2 = new Topology(2, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + Topology topology2 = new Topology(2, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); service.reportTopology(topology2); - Topology topology3 = new Topology(3, new Shard(AccordTopologyUtils.fullRange("ks"), ID_LIST, of(ID1, ID2))); + Topology topology3 = new Topology(3, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); service.reportTopology(topology3); service.truncateTopologiesUntil(3); Assert.assertEquals(EpochDiskState.create(3), service.diskState()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java new file mode 100644 index 000000000000..1630b3c7c35d --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.local.Node; +import accord.topology.Shard; +import accord.topology.Topology; +import com.google.common.collect.Iterables; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.schema.*; +import org.apache.cassandra.service.accord.AccordFastPath.Status; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +import static org.apache.cassandra.service.accord.AccordTestUtils.*; +import static org.apache.cassandra.service.accord.AccordTopologyTest.token; + +public class AccordFastPathCoordinatorTest +{ + private static final IPartitioner partitioner = Murmur3Partitioner.instance; + private static ClusterMetadata EMPTY; + + + public static final TableId TABLE_1 = TableId.fromString("00000000-0000-0000-0000-000000000001"); + + @BeforeClass + public static void beforeClass() throws Exception + { + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + EMPTY = new ClusterMetadata(partitioner); + } + + private static class CapturedUpdate + { + final Node.Id node; + final Status status; + + public CapturedUpdate(Node.Id node, Status status) + { + this.node = node; + this.status = status; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CapturedUpdate that = (CapturedUpdate) o; + return Objects.equals(node, that.node) && status == that.status; + } + + @Override + public int hashCode() + { + return Objects.hash(node, status); + } + + @Override + public String toString() + { + return "CapturedUpdate{" + + "node=" + node + + ", status=" + status + + '}'; + } + } + + private static CapturedUpdate update(Node.Id node, Status status) + { + return new CapturedUpdate(node, status); + } + + private static class InstrumentedFastPathCoordinator extends AccordFastPathCoordinator + { + private ClusterMetadata currentMetadata = EMPTY; + private List capturedUpdates = new ArrayList<>(); + + public InstrumentedFastPathCoordinator(Node.Id localId) + { + super(localId); + } + + public InstrumentedFastPathCoordinator currentMetadata(ClusterMetadata currentMetadata) + { + this.currentMetadata = currentMetadata; + return this; + } + + @Override + ClusterMetadata currentMetadata() + { + return currentMetadata; + } + + @Override + void registerAsListener() + { + + } + + @Override + void updateFastPath(Node.Id node, Status status, long updateTimeMillis, long updateDelayMillis) + { + capturedUpdates.add(new CapturedUpdate(node, status)); + + } + + @Override + long getAccordFastPathUpdateDelayMillis() + { + return TimeUnit.SECONDS.toMillis(5); + } + } + + @Test + public void simpleAlive() + { + Topology topology = new Topology(1, + new Shard(AccordTopology.minRange(TABLE_1, token(0)), idList(0, 1, 2), idSet(0, 1, 2)), + new Shard(AccordTopology.maxRange(TABLE_1, token(0)), idList(3, 4, 5), idSet(3, 4, 5))); + + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + coordinator.updatePeers(topology); + + // setup existing fast path state + coordinator.currentMetadata(EMPTY.transformer() + .withFastPathStatusSince(id(1), Status.UNAVAILABLE, 1, 1) + .withFastPathStatusSince(id(3), Status.UNAVAILABLE, 1, 1).build().metadata); + + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + + // peer isn't marked unavailable, shouldn't update + coordinator.onAlive(id(2)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + // node isn't a peer, shouldn't update + coordinator.onAlive(id(3)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + // node is a peer, should issue update + coordinator.onAlive(id(1)); + Assert.assertEquals(update(id(1), Status.NORMAL), Iterables.getOnlyElement(coordinator.capturedUpdates)); + } + + @Test + public void simpleDead() + { + Topology topology = new Topology(1, + new Shard(AccordTopology.minRange(TABLE_1, token(0)), idList(0, 1, 2), idSet(0, 1, 2)), + new Shard(AccordTopology.maxRange(TABLE_1, token(0)), idList(3, 4, 5), idSet(3, 4, 5))); + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + coordinator.updatePeers(topology); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + // not a peer, shouldn't update + coordinator.onDead(id(3)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + // is a peer, should update + coordinator.onDead(id(1)); + Assert.assertEquals(update(id(1), Status.UNAVAILABLE), Iterables.getOnlyElement(coordinator.capturedUpdates)); + } + + /** + * We shouldn't be scheduling updates if there aren't any accord tables + */ + @Test + public void noTableTest() + { + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + coordinator.start(); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + coordinator.onDead(id(1)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + } + + /** + * node should mark itself as shutdown on shutdown + */ + @Test + public void selfShutdownTest() + { + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + + coordinator.onShutdown(); + Assert.assertEquals(update(id(0), Status.SHUTDOWN), Iterables.getOnlyElement(coordinator.capturedUpdates)); + } + + /** + * If a node finds itself marked shutdown on startup, it should mark itself normal + */ + @Test + public void startupTest() + { + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + coordinator.currentMetadata(EMPTY.transformer().withFastPathStatusSince(id(0), Status.SHUTDOWN, 1, 1).build().metadata); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + coordinator.start(); + Assert.assertEquals(update(id(0), Status.NORMAL), Iterables.getOnlyElement(coordinator.capturedUpdates)); + } + + /** + * if a peer is marked as shutdown, other nodes should ignore FD signals until it marks itself alive again + */ + @Test + public void peerShutdownTest() + { + Topology topology = new Topology(1, + new Shard(AccordTopology.minRange(TABLE_1, token(0)), idList(0, 1, 2), idSet(0, 1, 2)), + new Shard(AccordTopology.maxRange(TABLE_1, token(0)), idList(3, 4, 5), idSet(3, 4, 5))); + InstrumentedFastPathCoordinator coordinator = new InstrumentedFastPathCoordinator(id(0)); + coordinator.currentMetadata(EMPTY.transformer().withFastPathStatusSince(id(1), Status.SHUTDOWN, 1, 1).build().metadata); + coordinator.updatePeers(topology); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + coordinator.start(); + + Assert.assertTrue(coordinator.isPeer(id(1))); + coordinator.onAlive(id(1)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + coordinator.onDead(id(1)); + Assert.assertTrue(coordinator.capturedUpdates.isEmpty()); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 1cf6a6e68029..52aff302e389 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -45,6 +45,8 @@ import accord.primitives.TxnId; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.assertj.core.api.Assertions; @@ -53,14 +55,14 @@ public class AccordKeyspaceTest extends CQLTester.InMemory { - private static final Ranges GLOBAL_SCOPE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(KEYSPACE), AccordRoutingKey.SentinelKey.max(KEYSPACE))); - @Test public void serde() { AtomicLong now = new AtomicLong(); String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))"); + TableId tableId = Schema.instance.getTableMetadata(KEYSPACE, tableName).id; + Ranges scope = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(tableId), AccordRoutingKey.SentinelKey.max(tableId))); AccordCommandStore store = AccordTestUtils.createAccordCommandStore(now::incrementAndGet, KEYSPACE, tableName); @@ -68,25 +70,25 @@ public void serde() Txn txn = createTxn(wrapInTxn(String.format("SELECT * FROM %s.%s WHERE k=? LIMIT 1", KEYSPACE, tableName)), Collections.singletonList(42)); - PartialTxn partialTxn = txn.slice(GLOBAL_SCOPE, true); + PartialTxn partialTxn = txn.slice(scope, true); RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE); - PartialDeps partialDeps = deps.slice(GLOBAL_SCOPE); + PartialDeps partialDeps = deps.slice(scope); CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); common.route(route); - common.partialDeps(partialDeps); + common.partialDeps(deps.slice(scope)); common.durability(Status.Durability.NotDurable); - Command.WaitingOn waitingOn = Command.WaitingOn.none(partialDeps); + Command.WaitingOn waitingOn = Command.WaitingOn.none(deps.slice(scope)); Command.Committed committed = Command.SerializerSupport.committed(common, SaveStatus.Committed, id, Ballot.ZERO, Ballot.ZERO, waitingOn); AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); - Commit commit = Commit.SerializerSupport.create(id, route.slice(GLOBAL_SCOPE), 1, Commit.Kind.Maximal, id, partialTxn, partialDeps, route, null); + Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.Maximal, id, partialTxn, partialDeps, route, null); store.appendToJournal(commit); Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java index 8d8fb0fc46b7..d62ab52946ed 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java @@ -51,7 +51,6 @@ public static void setupClass() throws IOException { AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord").set("non_serial_write_strategy", "mixed")), 2); SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } /* @@ -61,7 +60,7 @@ public static void setupClass() throws IOException @Test public void testSerialReadRepair() throws Exception { - testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.SERIAL), new Object[][] {{1, 1, 1, 1}}); } @@ -69,7 +68,7 @@ public void testSerialReadRepair() throws Exception public void testCASFailedConditionReadRepair() throws Exception { // Even if the condition fails to apply the data checked when applying the condition should be repaired - testReadRepair(cluster -> cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v1) VALUES (1, 1, 99) IF NOT EXISTS;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v1) VALUES (1, 1, 99) IF NOT EXISTS;", ConsistencyLevel.SERIAL), new Object[][] {{false, 1, 1, 1, 1}}); } @@ -77,7 +76,7 @@ public void testCASFailedConditionReadRepair() throws Exception public void testCASReadRepair() throws Exception { // If the condition applies the read repair should preserve the existing timestamp - testReadRepair(cluster -> cluster.coordinator(1).execute("UPDATE " + currentTable + " SET v2 = 99 WHERE k = 1 and c = 1 IF EXISTS;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("UPDATE " + qualifiedTableName + " SET v2 = 99 WHERE k = 1 and c = 1 IF EXISTS;", ConsistencyLevel.SERIAL), new Object[][] {{Boolean.TRUE}}); } @@ -89,20 +88,20 @@ public void testCASReadRepair() throws Exception public void testNonSerialReadRepair() throws Exception { for (ConsistencyLevel cl : ImmutableList.of(ConsistencyLevel.QUORUM)) - testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", cl), + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", cl), new Object[][] {{1, 1, 1, 1}}); } void testReadRepair(Function accordTxn, Object[][] expected) throws Exception { - test("CREATE TABLE " + currentTable + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c));", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c));", cluster -> { Filter mutationFilter = cluster.filters().verbs(Verb.MUTATION_REQ.id).drop().on(); cluster.filters().verbs(Verb.HINT_REQ.id, Verb.HINT_RSP.id).drop().on(); - cluster.coordinator(1).execute("INSERT INTO " + currentTable + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); mutationFilter.off(); Filter blockNodeOneReads = cluster.filters().verbs(Verb.READ_REQ.id).to(1).drop().on(); - assertThat(cluster.coordinator(2).executeWithResult("SELECT * FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + assertThat(cluster.coordinator(2).executeWithResult("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) .isEmpty(); blockNodeOneReads.off(); // Should perform read repair @@ -110,7 +109,7 @@ void testReadRepair(Function accordTxn, Object[][] expected assertRows(result, expected); blockNodeOneReads.on(); // Side effect of the read repair should be visible now - assertThat(cluster.coordinator(2).executeWithResult("SELECT k, c, v1, WRITETIME(v1) FROM " + currentTable + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + assertThat(cluster.coordinator(2).executeWithResult("SELECT k, c, v1, WRITETIME(v1) FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) .isEqualTo(1, 1, 1, 42L); }); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 919600e7796c..f57a3c12387f 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -247,16 +247,13 @@ public Cluster.Instace node(InetAddressAndPort address) } @Override - public Node.Id mappedId(InetAddressAndPort endpoint) + public Node.Id mappedIdOrNull(InetAddressAndPort endpoint) { - Node.Id id = nodeToAddress.inverse().get(endpoint); - if (id == null) - throw new NullPointerException("Unable to map endpoint: " + endpoint); - return id; + return nodeToAddress.inverse().get(endpoint); } @Override - public InetAddressAndPort mappedEndpoint(Node.Id id) + public InetAddressAndPort mappedEndpointOrNull(Node.Id id) { return nodeToAddress.get(id); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 33255a0f0f57..e30ddbb55803 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -18,8 +18,10 @@ package org.apache.cassandra.service.accord; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -80,6 +82,7 @@ import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.api.AccordAgent; @@ -96,6 +99,8 @@ public class AccordTestUtils { + public static final TableId TABLE_ID1 = TableId.fromString("00000000-0000-0000-0000-000000000001"); + public static class Commands { public static Command notDefined(TxnId txnId, PartialTxn txn) @@ -308,7 +313,7 @@ public static Ranges fullRange(Txn txn) public static Ranges fullRange(Seekables keys) { PartitionKey key = (PartitionKey) keys.get(0); - return Ranges.of(TokenRange.fullRange(key.keyspace())); + return Ranges.of(TokenRange.fullRange(key.table())); } public static PartialTxn createPartialTxn(int key) @@ -336,7 +341,7 @@ private void set(CommandStore store) public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongSupplier now, String keyspace, String table) { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); - TokenRange range = TokenRange.fullRange(metadata.keyspace); + TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); NodeTimeService time = new NodeTimeService() @@ -400,7 +405,7 @@ public static AccordCommandStore createAccordCommandStore( LongSupplier now, String keyspace, String table, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); - TokenRange range = TokenRange.fullRange(metadata.keyspace); + TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); @@ -432,11 +437,26 @@ public static void execute(AccordCommandStore commandStore, Runnable runnable) public static PartitionKey key(TableMetadata table, int key) { DecoratedKey dk = table.partitioner.decorateKey(Int32Type.instance.decompose(key)); - return new PartitionKey(table.keyspace, table.id, dk); + return new PartitionKey(table.id, dk); } public static Keys keys(TableMetadata table, int... keys) { return Keys.of(IntStream.of(keys).mapToObj(key -> key(table, key)).collect(Collectors.toList())); } + + public static Node.Id id(int id) + { + return new Node.Id(id); + } + + public static List idList(int... ids) + { + return Arrays.stream(ids).mapToObj(AccordTestUtils::id).collect(Collectors.toList()); + } + + public static Set idSet(int... ids) + { + return Arrays.stream(ids).mapToObj(AccordTestUtils::id).collect(Collectors.toSet()); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java index 018595713975..56675e6cf256 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -21,6 +21,7 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Set; @@ -30,6 +31,7 @@ import org.junit.BeforeClass; import org.junit.Test; +import accord.local.Node; import accord.local.Node.Id; import accord.topology.Shard; import accord.topology.Topology; @@ -45,6 +47,8 @@ import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.Location; @@ -70,7 +74,7 @@ public class AccordTopologyTest private static final InetAddressAndPort EP3 = ep(3); private static final IPartitioner partitioner = Murmur3Partitioner.instance; - private static Tables tables = null; + private static TableId tableId = null; private static KeyspaceMetadata keyspace = null; private static final Location LOCATION = new Location("DC1", "RACK1"); @@ -79,8 +83,9 @@ public static void beforeClass() throws Throwable { DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - tables = Tables.of(parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks").build()); - keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), tables); + TableMetadata table = parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks").build(); + tableId = table.id; + keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), Tables.of(table)); } private static InetAddressAndPort ep(int i) @@ -100,7 +105,7 @@ private static NodeId nodeId(int id) return new NodeId(id); } - private static void addNode(ClusterMetadata.Transformer transformer, int node, Token token) + static void addNode(ClusterMetadata.Transformer transformer, int node, Token token) { NodeId nodeId = nodeId(node); InetAddressAndPort ep = ep(node); @@ -108,6 +113,7 @@ private static void addNode(ClusterMetadata.Transformer transformer, int node, T transformer.register(nodeId, addresses, LOCATION, NodeVersion.CURRENT); transformer.withNodeState(nodeId, NodeState.JOINED); transformer.proposeToken(nodeId, Collections.singleton(token)); + transformer.addToRackAndDC(nodeId); } private static ClusterMetadata configureCluster(List> ranges, Keyspaces keyspaces) @@ -136,17 +142,17 @@ private static ClusterMetadata configureCluster(List> ranges, Keysp return metadata; } - private static Token token(long t) + static Token token(long t) { return new Murmur3Partitioner.LongToken(t); } - private static Range range(Token left, Token right) + static Range range(Token left, Token right) { return new Range<>(left, right); } - private static Range range(long left, long right) + static Range range(long left, long right) { return range(token(left), token(right)); } @@ -164,12 +170,12 @@ public void minMaxTokens() Assert.assertEquals(partitioner.getMaximumToken(), ranges.get(2).right); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopologyUtils.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); Topology expected = new Topology(1, - new Shard(AccordTopologyUtils.minRange("ks", ranges.get(0).right), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.range("ks", ranges.get(1)), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.range("ks", ranges.get(2)), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.maxRange("ks", ranges.get(2).right), NODE_LIST, NODE_SET)); + new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(2)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, NODE_SET)); Assert.assertEquals(expected, topology); } @@ -182,13 +188,76 @@ public void wrapAroundRanges() range(100, -100)); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopologyUtils.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); Topology expected = new Topology(1, - new Shard(AccordTopologyUtils.minRange("ks", ranges.get(0).left), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.range("ks", ranges.get(0)), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.range("ks", ranges.get(1)), NODE_LIST, NODE_SET), - new Shard(AccordTopologyUtils.maxRange("ks", ranges.get(2).left), NODE_LIST, NODE_SET)); + new Shard(AccordTopology.minRange(tableId, ranges.get(0).left), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(0)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).left), NODE_LIST, NODE_SET)); Assert.assertEquals(expected, topology); } + + @Test + public void fastPath() + { + List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), + range(-100, 100), + range(token(100), partitioner.getMaximumToken())); + ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); + Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology expected = new Topology(1, + new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(2)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, NODE_SET)); + Assert.assertEquals(expected, topology); + + topology = AccordTopology.createAccordTopology(metadata.transformer().withFastPathStatusSince(new Id(1), AccordFastPath.Status.UNAVAILABLE, 1, 1).build().metadata, ks -> true); + + Set fastPath = new HashSet<>(NODE_SET); + fastPath.remove(new Node.Id(1)); + + expected = new Topology(2, + new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, fastPath), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, fastPath), + new Shard(AccordTopology.range(tableId, ranges.get(2)), NODE_LIST, fastPath), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, fastPath)); + Assert.assertEquals(expected, topology); + } + + /** + * Even if there are too many failures to reach quorum, fast path size shouldn't go below quorum size + */ + @Test + public void fastPathWithMoreThanMinimumFailedNodes() + { + List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), + range(-100, 100), + range(token(100), partitioner.getMaximumToken())); + ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); + Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology expected = new Topology(1, + new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.range(tableId, ranges.get(2)), NODE_LIST, NODE_SET), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, NODE_SET)); + Assert.assertEquals(expected, topology); + + metadata = metadata.transformer() + .withFastPathStatusSince(new Id(1), AccordFastPath.Status.UNAVAILABLE, 1, 1) + .withFastPathStatusSince(new Id(2), AccordFastPath.Status.UNAVAILABLE, 1, 1) + .build().metadata; + topology = AccordTopology.createAccordTopology(metadata, ks -> true); + + Set fastPath = new HashSet<>(NODE_SET); + fastPath.remove(new Node.Id(1)); + + expected = new Topology(2, + new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, fastPath), + new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, fastPath), + new Shard(AccordTopology.range(tableId, ranges.get(2)), NODE_LIST, fastPath), + new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, fastPath)); + Assert.assertEquals(expected, topology); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java index c86e0a769f7e..e3c3ba434693 100644 --- a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java +++ b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java @@ -46,11 +46,12 @@ import static accord.utils.Property.qt; import static org.apache.cassandra.simulator.RandomSource.Choices.choose; +import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.assertj.core.api.Assertions.assertThat; public class CommandsForRangesTest { - private static Ranges FULL_RANGE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min("test"), AccordRoutingKey.SentinelKey.max("test"))); + private static Ranges FULL_RANGE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(TABLE_ID1), AccordRoutingKey.SentinelKey.max(TABLE_ID1))); @BeforeClass public static void setup() throws NoSuchFieldException, IllegalAccessException @@ -98,7 +99,7 @@ private static Gen cfr() IPartitioner partitioner = partitionerGen.next(rs); // some code reaches to the DD for partitioner... DatabaseDescriptor.setPartitionerUnsafe(partitioner); - Gen rangesGen = AccordGenerators.ranges(ignore -> Collections.singleton("test"), ignore -> partitioner); + Gen rangesGen = AccordGenerators.ranges(ignore -> Collections.singleton(TABLE_ID1), ignore -> partitioner); CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); int numTxn = rs.nextInt(1, 10); Set uniq = new HashSet<>(); diff --git a/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java b/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java index 425169c34e75..acca53d5fccf 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java +++ b/test/unit/org/apache/cassandra/service/accord/SimpleAccordEndpointMapper.java @@ -31,7 +31,7 @@ public enum SimpleAccordEndpointMapper implements AccordEndpointMapper INSTANCE; @Override - public Node.Id mappedId(InetAddressAndPort endpoint) + public Node.Id mappedIdOrNull(InetAddressAndPort endpoint) { if (endpoint.addressBytes.length != 4) throw new IllegalArgumentException("Only IPV4 is allowed: given " + endpoint.toString(true)); @@ -39,7 +39,7 @@ public Node.Id mappedId(InetAddressAndPort endpoint) } @Override - public InetAddressAndPort mappedEndpoint(Node.Id id) + public InetAddressAndPort mappedEndpointOrNull(Node.Id id) { byte[] array = ByteBufferUtil.bytes(id.id).array(); try diff --git a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java index 95b4ed0078d2..0c74fac7689f 100644 --- a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java @@ -59,7 +59,7 @@ public static IPartitioner partitioner(TableId tableId) public void partitionKeyTest() { DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk = new PartitionKey("ks", TABLE1, dk); + PartitionKey pk = new PartitionKey(TABLE1, dk); SerializerTestUtils.assertSerializerIOEquality(pk, PartitionKey.serializer); } @@ -67,7 +67,7 @@ public void partitionKeyTest() public void tokenKeyTest() { DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - TokenKey pk = new TokenKey("", dk.getToken()); + TokenKey pk = new TokenKey(TABLE1, dk.getToken()); SerializerTestUtils.assertSerializerIOEquality(pk, TokenKey.serializer); } @@ -75,10 +75,10 @@ public void tokenKeyTest() public void comparisonTest() { DecoratedKey dk = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk = new PartitionKey("", TABLE1, dk); - TokenKey tk = new TokenKey("", dk.getToken()); - TokenKey tkLow = new TokenKey("", dk.getToken().decreaseSlightly()); - TokenKey tkHigh = new TokenKey("", dk.getToken().nextValidToken()); + PartitionKey pk = new PartitionKey(TABLE1, dk); + TokenKey tk = new TokenKey(TABLE1, dk.getToken()); + TokenKey tkLow = new TokenKey(TABLE1, dk.getToken().decreaseSlightly()); + TokenKey tkHigh = new TokenKey(TABLE1, dk.getToken().nextValidToken()); Assert.assertTrue(tk.compareTo(pk) > 0); Assert.assertTrue(tkLow.compareTo(pk) < 0); @@ -91,22 +91,10 @@ public void tableComparisonTest() Assert.assertTrue(TABLE1.compareTo(TABLE2) < 0); DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk1 = new PartitionKey("", TABLE1, dk1); + PartitionKey pk1 = new PartitionKey(TABLE1, dk1); DecoratedKey dk2 = partitioner(TABLE2).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk2 = new PartitionKey("", TABLE2, dk2); - - Assert.assertTrue(pk1.compareTo(pk2) < 0); - } - - @Test - public void keyspaceComparisonTest() - { - DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk1 = new PartitionKey("a", TABLE1, dk1); - - DecoratedKey dk2 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk2 = new PartitionKey("b", TABLE1, dk2); + PartitionKey pk2 = new PartitionKey(TABLE2, dk2); Assert.assertTrue(pk1.compareTo(pk2) < 0); } @@ -116,13 +104,13 @@ public void sentinelComparisonTest() { Assert.assertTrue(TABLE1.compareTo(TABLE2) < 0); DecoratedKey dk1 = partitioner(TABLE1).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk1 = new PartitionKey("a", TABLE1, dk1); + PartitionKey pk1 = new PartitionKey(TABLE1, dk1); DecoratedKey dk2 = partitioner(TABLE2).decorateKey(ByteBufferUtil.bytes(5)); - PartitionKey pk2 = new PartitionKey("b", TABLE2, dk2); + PartitionKey pk2 = new PartitionKey(TABLE2, dk2); - SentinelKey loSentinel = SentinelKey.min("a"); - SentinelKey hiSentinel = SentinelKey.max("a"); + SentinelKey loSentinel = SentinelKey.min(TABLE1); + SentinelKey hiSentinel = SentinelKey.max(TABLE1); Assert.assertTrue(loSentinel.compareTo(hiSentinel) < 0); Assert.assertTrue(pk1.compareTo(loSentinel) > 0); Assert.assertTrue(loSentinel.compareTo(pk1) < 0); diff --git a/test/unit/org/apache/cassandra/service/accord/fastpath/FastPathParsingTest.java b/test/unit/org/apache/cassandra/service/accord/fastpath/FastPathParsingTest.java new file mode 100644 index 000000000000..96b9e87435c0 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/fastpath/FastPathParsingTest.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.StringJoiner; + +import com.google.common.collect.ImmutableMap; +import org.junit.Assert; +import org.junit.Test; + +import org.apache.cassandra.exceptions.ConfigurationException; + +import static java.lang.String.format; + +public class FastPathParsingTest +{ + private static void assertThrows(Runnable runnable, Class exception) + { + try + { + runnable.run(); + } + catch (Throwable e) + { + if (!exception.isAssignableFrom(e.getClass())) + { + throw new AssertionError(format("Expected %s to be thrown, got %s: %s", exception.getName(), e.getClass().getName(), e.getMessage())); + } + return; + } + Assert.fail(format("Expected %s to be thrown", exception.getName())); + } + + private static Map options(String... opts) + { + Assert.assertTrue("Need even numbered array for key value pairs, got " + Arrays.toString(opts), opts.length % 2 == 0); + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (int i=0; i options = new HashMap<>(); + options.put(ParameterizedFastPathStrategy.SIZE, Integer.toString(size)); + if (dcs.length > 0) + { + StringJoiner joiner = new StringJoiner(","); + for (String dc : dcs) + joiner.add(dc); + options.put(ParameterizedFastPathStrategy.DCS, joiner.toString()); + } + + return ParameterizedFastPathStrategy.fromMap(options); + } + + @Test + public void fromString() + { + Assert.assertSame(SimpleFastPathStrategy.instance, FastPathStrategy.tableStrategyFromString("simple")); + } + + @Test + public void fromStringFailures() + { + assertThrows(() -> FastPathStrategy.tableStrategyFromString("something"), ConfigurationException.class); + } + + @Test + public void fromMap() + { + Assert.assertEquals(pfs(3), FastPathStrategy.fromMap(options("size", "3"))); + Assert.assertEquals(SimpleFastPathStrategy.instance, FastPathStrategy.fromMap(options())); + Assert.assertEquals(pfs(1, "dc1"), FastPathStrategy.fromMap(options("size", "1", "dcs", "dc1"))); + Assert.assertEquals(pfs(3, "dc1", "dc2"), FastPathStrategy.fromMap(options("size", "3", "dcs", "dc1,dc2"))); + Assert.assertEquals(pfs(5, "dc2", "dc1"), FastPathStrategy.fromMap(options("size", "5", "dcs", "dc2,dc1"))); + } + + @Test + public void fromMapFailures() + { + assertThrows(() -> FastPathStrategy.fromMap(options("dcs", "dc1")), ConfigurationException.class); + assertThrows(() -> FastPathStrategy.fromMap(options("size", "abc")), ConfigurationException.class); + assertThrows(() -> FastPathStrategy.fromMap(options("size", "0")), ConfigurationException.class); + assertThrows(() -> FastPathStrategy.fromMap(options("size", "-1")), ConfigurationException.class); + assertThrows(() -> FastPathStrategy.fromMap(options("size", "2", "dcs", " ")), ConfigurationException.class); + assertThrows(() -> FastPathStrategy.fromMap(options("size", "5", "dcs", "dc2,dc1", "happypath", "5")), ConfigurationException.class); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java b/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java new file mode 100644 index 000000000000..65650274fa57 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; +import org.junit.Assert; +import org.junit.Test; + +import accord.local.Node; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.service.accord.fastpath.ParameterizedFastPathStrategy.WeightedDc; + +import static java.util.Collections.emptySet; +import static org.apache.cassandra.service.accord.AccordTestUtils.id; +import static org.apache.cassandra.service.accord.AccordTestUtils.idList; +import static org.apache.cassandra.service.accord.AccordTestUtils.idSet; +import static org.apache.cassandra.service.accord.fastpath.FastPathParsingTest.pfs; +import static org.junit.Assert.assertEquals; + +public class ParameterizedFastPathStrategyTest +{ + private static final List NODES = idList(1, 2, 3, 4, 5, 6); + private static final Map DCS_2; + private static final Map DCS_3; + + static + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put(id(1), "DC1"); + builder.put(id(2), "DC1"); + builder.put(id(3), "DC1"); + builder.put(id(4), "DC2"); + builder.put(id(5), "DC2"); + builder.put(id(6), "DC2"); + DCS_2 = builder.build(); + + builder = ImmutableMap.builder(); + builder.put(id(1), "DC1"); + builder.put(id(2), "DC1"); + builder.put(id(3), "DC2"); + builder.put(id(4), "DC2"); + builder.put(id(5), "DC3"); + builder.put(id(6), "DC3"); + DCS_3 = builder.build(); + } + + @Test + public void noDCPreference() + { + assertEquals(idSet(1, 2, 3, 4, 5, 6), pfs(6).calculateFastPath(NODES, emptySet(), DCS_2)); + assertEquals(idSet(1, 2, 3, 4, 5), pfs(5).calculateFastPath(NODES, emptySet(), DCS_2)); + assertEquals(idSet(1, 2, 3, 4), pfs(4).calculateFastPath(NODES, emptySet(), DCS_2)); + assertEquals(idSet(1, 2, 3, 4), pfs(3).calculateFastPath(NODES, emptySet(), DCS_2)); + } + + @Test + public void noDCPreferenceUnavailables() + { + assertEquals(idSet(1, 2, 3, 4, 5, 6), pfs(6).calculateFastPath(NODES, idSet(4), DCS_2)); + assertEquals(idSet(1, 2, 3, 4, 5), pfs(5).calculateFastPath(NODES, idSet(1, 6), DCS_2)); + assertEquals(idSet(2, 3, 4, 5), pfs(4).calculateFastPath(NODES, idSet(1, 6), DCS_2)); + } + + + @Test + public void dcPreference() + { + assertEquals(idSet(1, 2, 3, 4, 5, 6), pfs(6, "DC1", "DC2").calculateFastPath(NODES, idSet(), DCS_3)); + assertEquals(idSet(1, 2, 3, 4), pfs(4, "DC1", "DC2").calculateFastPath(NODES, idSet(), DCS_3)); + assertEquals(idSet(1, 2, 5, 6), pfs(4, "DC1", "DC3").calculateFastPath(NODES, idSet(), DCS_3)); + } + + @Test + public void dcPreferenceUnavailables() + { + assertEquals(idSet(1, 2, 3, 4, 5), pfs(5, "DC1", "DC2").calculateFastPath(NODES, idSet(2, 4, 6), DCS_3)); + assertEquals(idSet(1, 2, 3, 5, 6), pfs(5, "DC1", "DC3").calculateFastPath(NODES, idSet(2, 4, 6), DCS_3)); + assertEquals(idSet(1, 3, 4, 5, 6), pfs(5, "DC2", "DC3").calculateFastPath(NODES, idSet(2, 4, 6), DCS_3)); + } + + private static WeightedDc wdc(String dc, int weight, boolean auto) + { + return new WeightedDc(dc, weight, auto); + } + + private static void assertCFE(int size, String... dcs) + { + try + { + pfs(size, dcs); + Assert.fail("expected ConfigurationException"); + } + catch (ConfigurationException ex) + { + // expected + } + } + + private static void assertPFS(ParameterizedFastPathStrategy actual, int size, WeightedDc... dcs) + { + Map dcMap = new HashMap<>(); + for (WeightedDc dc : dcs) + { + Assert.assertFalse(dcMap.containsKey(dc.name)); + dcMap.put(dc.name, dc); + } + ParameterizedFastPathStrategy expected = new ParameterizedFastPathStrategy(size, ImmutableMap.copyOf(dcMap)); + Assert.assertEquals(expected, actual); + } + + + @Test + public void dcParsingTest() + { + assertCFE(5, "DC1", "DC2:1"); + assertCFE(5, "DC1:-1", "DC2:1"); + assertCFE(5, "DC1", "DC1"); + } + + @Test + public void listParsingTest() + { + assertPFS(pfs(4, "DC1", "DC2", "DC3"), 4, wdc("DC1", 0, true), wdc("DC2", 1, true), wdc("DC3", 2, true)); + assertPFS(pfs(4, "DC2", "DC3", "DC1"), 4, wdc("DC2", 0, true), wdc("DC3", 1, true), wdc("DC1", 2, true)); + } + + @Test + public void weightParsingTest() + { + assertPFS(pfs(4, "DC1:0", "DC2:0", "DC3:1"), 4, wdc("DC1", 0, false), wdc("DC2", 0, false), wdc("DC3", 1, false)); + assertPFS(pfs(4, "DC2:100", "DC3:200", "DC1:300"), 4, wdc("DC2", 100, false), wdc("DC3", 200, false), wdc("DC1", 300, false)); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategyTest.java b/test/unit/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategyTest.java new file mode 100644 index 000000000000..f19cac90ee45 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategyTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.fastpath; + +import java.util.Collections; +import java.util.Map; + +import org.junit.Assert; +import org.junit.Test; + +import accord.local.Node; + +import static org.apache.cassandra.service.accord.AccordTestUtils.idList; +import static org.apache.cassandra.service.accord.AccordTestUtils.idSet; + +public class SimpleFastPathStrategyTest +{ + private static final Map DCMAP = Collections.emptyMap(); + + @Test + public void testCalculation() + { + FastPathStrategy strategy = SimpleFastPathStrategy.instance; + Assert.assertEquals(idSet(1, 2, 3, 4, 5), strategy.calculateFastPath(idList(1, 2, 3, 4, 5), idSet(), DCMAP)); + Assert.assertEquals(idSet(3, 4, 5), strategy.calculateFastPath(idList(1, 2, 3, 4, 5), idSet(1, 2, 3), DCMAP)); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java index 34fda94017a1..9d04137587a9 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java @@ -55,7 +55,7 @@ public void txnSerializer() " END IF\n" + "COMMIT TRANSACTION"); PartitionKey key = (PartitionKey) txn.keys().get(0); - PartialTxn expected = txn.slice(Ranges.of(TokenRange.fullRange(key.keyspace())), true); + PartialTxn expected = txn.slice(Ranges.of(TokenRange.fullRange(key.table())), true); SerializerTestUtils.assertSerializerIOEquality(expected, CommandSerializers.partialTxn); } } diff --git a/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java index 890c760b0dee..001206d858d4 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java @@ -114,7 +114,7 @@ Item[] newArray(int size) private static PartitionKey key(int k) { DecoratedKey dk = ByteOrderedPartitioner.instance.decorateKey(ByteBufferUtil.bytes(k)); - return new PartitionKey("", TABLE1, dk); + return new PartitionKey(TABLE1, dk); } private static Item item(int k, int v) diff --git a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java index 19e7724807b9..062cda0b9661 100644 --- a/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java +++ b/test/unit/org/apache/cassandra/service/reads/DataResolverTest.java @@ -1325,7 +1325,7 @@ private void assertRepairMetadata(Mutation mutation) private ReplicaPlan.SharedForRangeRead plan(EndpointsForRange replicas, ConsistencyLevel consistencyLevel) { - BiFunction, Token, ReplicaPlan.ForWrite> repairPlan = (self, t) -> ReplicaPlans.forReadRepair(self, ClusterMetadata.current(), ks, consistencyLevel, t, (i) -> true, ReadCoordinator.DEFAULT); + BiFunction, Token, ReplicaPlan.ForWrite> repairPlan = (self, t) -> ReplicaPlans.forReadRepair(self, ClusterMetadata.current(), ks, null, consistencyLevel, t, (i) -> true, ReadCoordinator.DEFAULT); return ReplicaPlan.shared(new ReplicaPlan.ForRangeRead(ks, ks.getReplicationStrategy(), consistencyLevel, diff --git a/test/unit/org/apache/cassandra/service/reads/range/RangeCommandIteratorTest.java b/test/unit/org/apache/cassandra/service/reads/range/RangeCommandIteratorTest.java index dfd1f7f88d15..b13c45bfbcd5 100644 --- a/test/unit/org/apache/cassandra/service/reads/range/RangeCommandIteratorTest.java +++ b/test/unit/org/apache/cassandra/service/reads/range/RangeCommandIteratorTest.java @@ -22,6 +22,7 @@ import java.util.List; import com.google.common.collect.Iterators; +import org.apache.cassandra.schema.TableId; import org.junit.BeforeClass; import org.junit.Test; @@ -49,6 +50,7 @@ public class RangeCommandIteratorTest { private static final String KEYSPACE1 = "RangeCommandIteratorTest"; private static final String CF_STANDARD1 = "Standard1"; + private static final TableId TABLE_ID = TableId.generate(); @BeforeClass public static void defineSchema() throws ConfigurationException @@ -70,11 +72,11 @@ public void testRangeCountWithRangeMerge() for (int i = 0; i + 1 < tokens.size(); i++) { Range range = Range.makeRowRange(tokens.get(i), tokens.get(i + 1)); - ranges.add(ReplicaPlans.forRangeRead(keyspace, null, ConsistencyLevel.ONE, range, 1)); + ranges.add(ReplicaPlans.forRangeRead(keyspace, TABLE_ID, null, ConsistencyLevel.ONE, range, 1)); vnodeCount++; } - ReplicaPlanMerger merge = new ReplicaPlanMerger(ranges.iterator(), keyspace, ConsistencyLevel.ONE); + ReplicaPlanMerger merge = new ReplicaPlanMerger(ranges.iterator(), keyspace, TABLE_ID, ConsistencyLevel.ONE); ReplicaPlan.ForRangeRead mergedRange = Iterators.getOnlyElement(merge); // all ranges are merged as test has only one node. assertEquals(vnodeCount, mergedRange.vnodeCount()); @@ -164,9 +166,9 @@ private static CloseableIterator replicaPlanIterator(A Keyspace keyspace, boolean withRangeMerger) { - CloseableIterator replicaPlans = new ReplicaPlanIterator(keyRange, null, keyspace, ConsistencyLevel.ONE); + CloseableIterator replicaPlans = new ReplicaPlanIterator(keyRange, null, keyspace, null, ConsistencyLevel.ONE); if (withRangeMerger) - replicaPlans = new ReplicaPlanMerger(replicaPlans, keyspace, ConsistencyLevel.ONE); + replicaPlans = new ReplicaPlanMerger(replicaPlans, keyspace, null, ConsistencyLevel.ONE); return replicaPlans; } diff --git a/test/unit/org/apache/cassandra/service/reads/range/RangeCommandsTest.java b/test/unit/org/apache/cassandra/service/reads/range/RangeCommandsTest.java index ce04d5deab54..466309fcb240 100644 --- a/test/unit/org/apache/cassandra/service/reads/range/RangeCommandsTest.java +++ b/test/unit/org/apache/cassandra/service/reads/range/RangeCommandsTest.java @@ -79,7 +79,7 @@ public void tesConcurrencyFactor() // verify that a low concurrency factor is not capped by the max concurrency factor PartitionRangeReadCommand command = command(cfs, 50, 50); try (RangeCommandIterator partitions = RangeCommands.rangeCommandIterator(command, ONE, Dispatcher.RequestTime.forImmediateExecution()); - ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, ONE)) + ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, command.metadata().id, ONE)) { assertEquals(2, partitions.concurrencyFactor()); assertEquals(MAX_CONCURRENCY_FACTOR, partitions.maxConcurrencyFactor()); @@ -89,7 +89,7 @@ public void tesConcurrencyFactor() // verify that a high concurrency factor is capped by the max concurrency factor command = command(cfs, 1000, 50); try (RangeCommandIterator partitions = RangeCommands.rangeCommandIterator(command, ONE, Dispatcher.RequestTime.forImmediateExecution()); - ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, ONE)) + ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, command.metadata().id, ONE)) { assertEquals(MAX_CONCURRENCY_FACTOR, partitions.concurrencyFactor()); assertEquals(MAX_CONCURRENCY_FACTOR, partitions.maxConcurrencyFactor()); @@ -99,7 +99,7 @@ public void tesConcurrencyFactor() // with 0 estimated results per range the concurrency factor should be 1 command = command(cfs, 1000, 0); try (RangeCommandIterator partitions = RangeCommands.rangeCommandIterator(command, ONE, Dispatcher.RequestTime.forImmediateExecution()); - ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, ONE)) + ReplicaPlanIterator ranges = new ReplicaPlanIterator(command.dataRange().keyRange(), command.indexQueryPlan(), keyspace, command.metadata().id, ONE)) { assertEquals(1, partitions.concurrencyFactor()); assertEquals(MAX_CONCURRENCY_FACTOR, partitions.maxConcurrencyFactor()); diff --git a/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanIteratorTest.java b/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanIteratorTest.java index 84f3a5e2e750..829211f8e86f 100644 --- a/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanIteratorTest.java +++ b/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanIteratorTest.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.cassandra.schema.TableId; import org.junit.BeforeClass; import org.junit.Test; @@ -44,6 +45,7 @@ public class ReplicaPlanIteratorTest { private static final String KEYSPACE = "ReplicaPlanIteratorTest"; + private static final TableId TABLE_ID = TableId.generate(); private static Keyspace keyspace; @BeforeClass @@ -163,7 +165,7 @@ private final void testRanges(AbstractBounds queryRange, Abst @SafeVarargs private final void testRanges(Keyspace keyspace, AbstractBounds queryRange, AbstractBounds... expected) { - try (ReplicaPlanIterator iterator = new ReplicaPlanIterator(queryRange, null, keyspace, ConsistencyLevel.ANY)) + try (ReplicaPlanIterator iterator = new ReplicaPlanIterator(queryRange, null, keyspace, TABLE_ID, ConsistencyLevel.ANY)) { List> restrictedRanges = new ArrayList<>(expected.length); while (iterator.hasNext()) diff --git a/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanMergerTest.java b/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanMergerTest.java index aaa88c938dd3..46b71d532e76 100644 --- a/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanMergerTest.java +++ b/test/unit/org/apache/cassandra/service/reads/range/ReplicaPlanMergerTest.java @@ -416,8 +416,8 @@ private final void testRanges(ConsistencyLevel consistencyLevel, AbstractBounds queryRange, AbstractBounds... expected) { - try (ReplicaPlanIterator originals = new ReplicaPlanIterator(queryRange, null, keyspace, ANY); // ANY avoids endpoint erros - ReplicaPlanMerger merger = new ReplicaPlanMerger(originals, keyspace, consistencyLevel)) + try (ReplicaPlanIterator originals = new ReplicaPlanIterator(queryRange, null, keyspace, null, ANY); // ANY avoids endpoint erros + ReplicaPlanMerger merger = new ReplicaPlanMerger(originals, keyspace, null, consistencyLevel)) { // collect the merged ranges List> mergedRanges = new ArrayList<>(expected.length); diff --git a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java index d4a5b7a05fd0..4a6d31447c4b 100644 --- a/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/reads/repair/AbstractReadRepairTest.java @@ -361,7 +361,7 @@ static ReplicaPlan.ForRangeRead replicaPlan(Keyspace keyspace, ConsistencyLevel replicas, 1, null, - (self, token) -> forReadRepair(self, ClusterMetadata.current(), keyspace, consistencyLevel, token, (r) -> true, ReadCoordinator.DEFAULT), + (self, token) -> forReadRepair(self, ClusterMetadata.current(), keyspace, null, consistencyLevel, token, (r) -> true, ReadCoordinator.DEFAULT), Epoch.EMPTY); } diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index b9df8471a2aa..37b3815c1c1c 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -50,14 +50,7 @@ import org.apache.cassandra.tcm.sequences.LockedRanges; import org.mockito.Mockito; -import static org.apache.cassandra.tcm.MetadataKeys.ACCORD_KEYSPACES; -import static org.apache.cassandra.tcm.MetadataKeys.CONSENSUS_MIGRATION_STATE; -import static org.apache.cassandra.tcm.MetadataKeys.DATA_PLACEMENTS; -import static org.apache.cassandra.tcm.MetadataKeys.IN_PROGRESS_SEQUENCES; -import static org.apache.cassandra.tcm.MetadataKeys.LOCKED_RANGES; -import static org.apache.cassandra.tcm.MetadataKeys.NODE_DIRECTORY; -import static org.apache.cassandra.tcm.MetadataKeys.SCHEMA; -import static org.apache.cassandra.tcm.MetadataKeys.TOKEN_MAP; +import static org.apache.cassandra.tcm.MetadataKeys.*; import static org.apache.cassandra.tcm.ownership.OwnershipUtils.randomPlacements; import static org.apache.cassandra.tcm.ownership.OwnershipUtils.token; import static org.apache.cassandra.tcm.sequences.SequencesUtils.affectedRanges; @@ -304,8 +297,10 @@ else if (key == LOCKED_RANGES) return metadata.lockedRanges; else if (key == IN_PROGRESS_SEQUENCES) return metadata.inProgressSequences; - else if (key == ACCORD_KEYSPACES) - return metadata.accordKeyspaces; + else if (key == ACCORD_TABLES) + return metadata.accordTables; + else if (key == ACCORD_FAST_PATH) + return metadata.accordFastPath; else if (key == CONSENSUS_MIGRATION_STATE) return metadata.consensusMigrationState; diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index 7017e986647a..fdd20b073933 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -49,6 +49,7 @@ import org.quicktheories.impl.JavaRandom; import static accord.utils.AccordGens.txnIds; +import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; public class AccordGenerators @@ -99,47 +100,44 @@ public static Gen commands() public static Gen keys() { - return keys(fromQT(Generators.IDENTIFIER_GEN), - fromQT(CassandraGenerators.TABLE_ID_GEN), + return keys(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.decoratedKeys())); } public static Gen keys(IPartitioner partitioner) { - return keys(fromQT(Generators.IDENTIFIER_GEN), - fromQT(CassandraGenerators.TABLE_ID_GEN), + return keys(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.decoratedKeys(ignore -> partitioner))); } - public static Gen keys(Gen keyspace, Gen tableId, Gen key) + public static Gen keys(Gen tableIdGen, Gen key) { - return rs -> new PartitionKey(keyspace.next(rs), tableId.next(rs), key.next(rs)); + return rs -> new PartitionKey(tableIdGen.next(rs), key.next(rs)); } - public static Gen routingKeyGen(Gen keyspace, Gen tokenGen) + public static Gen routingKeyGen(Gen tableIdGen, Gen tokenGen) { return rs -> { - String ks = keyspace.next(rs); - if (rs.nextBoolean()) return new AccordRoutingKey.TokenKey(ks, tokenGen.next(rs)); - else return rs.nextBoolean() ? AccordRoutingKey.SentinelKey.min(ks) : AccordRoutingKey.SentinelKey.max(ks); + TableId tableId = tableIdGen.next(rs); + if (rs.nextBoolean()) return new AccordRoutingKey.TokenKey(tableId, tokenGen.next(rs)); + else return rs.nextBoolean() ? AccordRoutingKey.SentinelKey.min(tableId) : AccordRoutingKey.SentinelKey.max(tableId); }; } public static Gen range() { - return PARTITIONER_GEN.flatMap(partitioner -> range(fromQT(Generators.IDENTIFIER_GEN), fromQT(CassandraGenerators.token(partitioner)))); + return PARTITIONER_GEN.flatMap(partitioner -> range(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.token(partitioner)))); } public static Gen range(IPartitioner partitioner) { - return range(fromQT(Generators.IDENTIFIER_GEN), fromQT(CassandraGenerators.token(partitioner))); + return range(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.token(partitioner))); } - public static Gen range(Gen keyspace, Gen tokenGen) + public static Gen range(Gen tables, Gen tokenGen) { return rs -> { - String ks = keyspace.next(rs); - Gen gen = routingKeyGen(Gens.constant(ks), tokenGen); + Gen gen = routingKeyGen(Gens.constant(tables.next(rs)), tokenGen); AccordRoutingKey a = gen.next(rs); AccordRoutingKey b = gen.next(rs); while (a.equals(b)) @@ -152,17 +150,17 @@ public static Gen range(Gen keyspace, Gen tokenGen) public static Gen ranges() { // javac couldn't pick the right constructor with HashSet::new, so had to create new lambda... - return ranges(Gens.lists(fromQT(Generators.IDENTIFIER_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), PARTITIONER_GEN); + return ranges(Gens.lists(fromQT(CassandraGenerators.TABLE_ID_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), PARTITIONER_GEN); } - public static Gen ranges(Gen> keyspaceGen, Gen partitionerGen) + public static Gen ranges(Gen> tableIdGen, Gen partitionerGen) { return rs -> { - Set keyspaces = keyspaceGen.next(rs); + Set tables = tableIdGen.next(rs); IPartitioner partitioner = partitionerGen.next(rs); List ranges = new ArrayList<>(); int numSplits = rs.nextInt(10, 100); - TokenRange range = new TokenRange(AccordRoutingKey.SentinelKey.min(""), AccordRoutingKey.SentinelKey.max("")); + TokenRange range = new TokenRange(AccordRoutingKey.SentinelKey.min(TABLE_ID1), AccordRoutingKey.SentinelKey.max(TABLE_ID1)); AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); BigInteger size = splitter.sizeOf(range); BigInteger update = splitter.divide(size, numSplits); @@ -171,9 +169,9 @@ public static Gen ranges(Gen> keyspaceGen, Gen { BigInteger end = offset.add(update); TokenRange r = (TokenRange) splitter.subRange(range, offset, end); - for (String ks : keyspaces) + for (TableId id : tables) { - ranges.add(r.withKeyspace(ks)); + ranges.add(r.withTable(id)); } offset = end; } @@ -183,7 +181,7 @@ public static Gen ranges(Gen> keyspaceGen, Gen public static Gen ranges(IPartitioner partitioner) { - return ranges(Gens.lists(fromQT(Generators.IDENTIFIER_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), ignore -> partitioner); + return ranges(Gens.lists(fromQT(CassandraGenerators.TABLE_ID_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), ignore -> partitioner); } public static Gen keyDepsGen() diff --git a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java index 8867d6141e1e..1fc46ff2096d 100644 --- a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java +++ b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java @@ -119,6 +119,7 @@ import org.apache.cassandra.schema.Types; import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.Views; +import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.AbstractTypeGenerators.TypeGenBuilder; @@ -481,7 +482,7 @@ public Gen build() AbstractReplicationStrategy replication = replicationGen.generate(rs).withKeyspace(nameGen).build().generate(rs); ReplicationParams replicationParams = ReplicationParams.fromStrategy(replication); boolean durableWrites = durableWritesGen.generate(rs); - KeyspaceParams params = new KeyspaceParams(durableWrites, replicationParams); + KeyspaceParams params = new KeyspaceParams(durableWrites, replicationParams, FastPathStrategy.simple()); Tables tables = Tables.none(); Views views = Views.none(); Types types = Types.none(); From a95f072e3527dbb1c8e2d827560b8bdd7939d8fb Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 8 Jan 2024 12:05:54 -0800 Subject: [PATCH 089/340] (Accord) NPE while trying to serialize FoundKnownMap as value is null half the time but unexpected while serializing patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-19253 --- modules/accord | 2 +- .../org/apache/cassandra/db/Mutation.java | 8 +- .../service/accord/IAccordService.java | 2 +- .../serializers/CheckStatusSerializers.java | 9 +- .../apache/cassandra/tcm/RemoteProcessor.java | 1 - .../test/tcm/AccordAddTableTest.java | 80 +++++++++++++ .../CheckStatusSerializersTest.java | 107 ++++++++++++++++++ .../cassandra/utils/AccordGenerators.java | 17 ++- 8 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java diff --git a/modules/accord b/modules/accord index 5523cfefef16..0d8f60f742d4 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 5523cfefef163efee53c8cc57595f5b50ea4f363 +Subproject commit 0d8f60f742d443365a50115397ff1f0ab10fc694 diff --git a/src/java/org/apache/cassandra/db/Mutation.java b/src/java/org/apache/cassandra/db/Mutation.java index 9c51cd13764b..d8ded1eeb141 100644 --- a/src/java/org/apache/cassandra/db/Mutation.java +++ b/src/java/org/apache/cassandra/db/Mutation.java @@ -671,8 +671,12 @@ public PartitionUpdateCollector(String keyspaceName, DecoratedKey key) public PartitionUpdateCollector add(PartitionUpdate partitionUpdate) { - assert partitionUpdate != null; - assert partitionUpdate.partitionKey().getPartitioner() == key.getPartitioner(); + assert partitionUpdate != null : "Null updates are not allowed"; + assert partitionUpdate.partitionKey().getPartitioner() == key.getPartitioner(): String.format("Update to key %s with partitioner %s (%s) had an update (%s) with a different partitioner! %s (%s)", + key, + key.getPartitioner(), key.getPartitioner().getClass(), + partitionUpdate, + partitionUpdate.partitionKey().getPartitioner(), partitionUpdate.partitionKey().getPartitioner().getClass()); // note that ImmutableMap.Builder only allows put:ing the same key once, it will fail during build() below otherwise modifications.put(partitionUpdate.metadata().id, partitionUpdate); empty = false; diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index e0025b46a359..2f0d7afc71ac 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -133,7 +133,7 @@ default void maybeConvertTablesToAccord(Txn txn) Set allTables = new HashSet<>(); Set newTables = new HashSet<>(); txn.keys().forEach(key -> { - TableId table = ((AccordRoutableKey) key).table(); + TableId table = key instanceof AccordRoutableKey ? ((AccordRoutableKey) key).table() : ((TokenRange) key).table(); if (allTables.add(table) && !isAccordManagedTable(table)) newTables.add(table); }); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 1d903025afa2..d4cf093d1242 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -45,6 +45,7 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.NullableSerializer; import static accord.messages.CheckStatus.SerializationSupport.createOk; @@ -78,6 +79,8 @@ public long serializedSize(FoundKnown known, int version) } }; + public static final IVersionedSerializer foundKnownNullable = NullableSerializer.wrap(foundKnown); + public static final IVersionedSerializer foundKnownMap = new IVersionedSerializer<>() { @Override @@ -88,7 +91,7 @@ public void serialize(FoundKnownMap knownMap, DataOutputPlus out, int version) t for (int i = 0 ; i <= size ; ++i) KeySerializers.routingKey.serialize(knownMap.startAt(i), out, version); for (int i = 0 ; i < size ; ++i) - foundKnown.serialize(knownMap.valueAt(i), out, version); + foundKnownNullable.serialize(knownMap.valueAt(i), out, version); } @Override @@ -100,7 +103,7 @@ public FoundKnownMap deserialize(DataInputPlus in, int version) throws IOExcepti starts[i] = KeySerializers.routingKey.deserialize(in, version); FoundKnown[] values = new FoundKnown[size]; for (int i = 0 ; i < size ; ++i) - values[i] = foundKnown.deserialize(in, version); + values[i] = foundKnownNullable.deserialize(in, version); return FoundKnownMap.SerializerSupport.create(true, starts, values); } @@ -112,7 +115,7 @@ public long serializedSize(FoundKnownMap knownMap, int version) for (int i = 0 ; i <= size ; ++i) result += KeySerializers.routingKey.serializedSize(knownMap.startAt(i), version); for (int i = 0 ; i < size ; ++i) - result += foundKnown.serializedSize(knownMap.valueAt(i), version); + result += foundKnownNullable.serializedSize(knownMap.valueAt(i), version); return result; } }; diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index e5cb0568fed4..54adbafba663 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -93,7 +93,6 @@ public Commit.Result commit(Entry.Id entryId, Transformation transform, Epoch la { log.waitForHighestConsecutive(); } - return result; } catch (Exception e) diff --git a/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java new file mode 100644 index 000000000000..c486da8f5430 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.tcm; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Future; + +import org.junit.Test; + +import accord.primitives.Ranges; +import accord.primitives.Txn; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.FBUtilities; + +public class AccordAddTableTest extends TestBaseImpl +{ + @Test + public void test() throws IOException + { + try (Cluster cluster = builder().withNodes(6) + .withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)) + .start()) + { + List> results = new ArrayList<>(cluster.size()); + for (IInvokableInstance inst : cluster) + { + Future result = inst.asyncRunsOnInstance(() -> { + for (int i = 0; i < 100; i++) + { + AccordService.instance().maybeConvertTablesToAccord(fakeTxn(i)); + if (!ClusterMetadata.current().accordTables.contains(fromNum(i))) + throw new AssertionError("Table not found in TCM!"); + } + }).call(); + results.add(result); + } + FBUtilities.waitOnFutures(results); + } + } + + private static Txn fakeTxn(int i) + { + TableId id = fromNum(i); + + Ranges of = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(id), AccordRoutingKey.SentinelKey.max(id))); + return new Txn.InMemory(of, null, null); + } + + private static TableId fromNum(int i) + { + return TableId.fromUUID(new UUID(i, 0)); // not valid... but do we care? + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java new file mode 100644 index 000000000000..d5da34c8fb4f --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; + +import org.junit.Test; + +import accord.api.RoutingKey; +import accord.coordinate.Infer; +import accord.local.SaveStatus; +import accord.messages.CheckStatus.FoundKnownMap; +import accord.primitives.Ballot; +import accord.primitives.FullKeyRoute; +import accord.primitives.Routable; +import accord.primitives.Unseekables; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; + +public class CheckStatusSerializersTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + @Test + public void serde() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().forAll(foundKnownMap()).check(map -> Assertions.assertThat(serde(CheckStatusSerializers.foundKnownMap, MessagingService.Version.CURRENT.value, buffer, map)).isEqualTo(map)); + } + + private static T serde(IVersionedSerializer serializer, int version, DataOutputBuffer buffer, T value) throws IOException + { + buffer.clear(); + long expectedSize = serializer.serializedSize(value, version); + serializer.serialize(value, buffer, version); + Assertions.assertThat(buffer.getLength()).isEqualTo(expectedSize); + try (DataInputBuffer in = new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)) + { + return serializer.deserialize(in, version); + } + } + + private static Gen foundKnownMap() + { + return rs -> { + SaveStatus saveStatus = Gens.pick(SaveStatus.values()).next(rs); + Infer.InvalidIfNot invalidIfNot = Gens.pick(Infer.InvalidIfNot.values()).next(rs); + Ballot promised = AccordGens.ballot().next(rs); + Routable.Domain domain = Gens.pick(Routable.Domain.values()).next(rs); + Unseekables keysOrRanges; + switch (domain) + { + case Key: + // TODO (coverage): don't hard code murmur + Gen keyGen = AccordGenerators.routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), Gens.constant(AccordRoutingKey.RoutingKeyKind.TOKEN), fromQT(CassandraGenerators.murmurToken())); + AccordRoutingKey homeKey = keyGen.next(rs); + List forOrdering = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).next(rs); + forOrdering.sort(Comparator.naturalOrder()); + // TODO (coverage): don't hard code keys type + keysOrRanges = new FullKeyRoute(homeKey, forOrdering.contains(homeKey), forOrdering.toArray(RoutingKey[]::new)); + break; + case Range: + keysOrRanges = AccordGenerators.ranges(Murmur3Partitioner.instance).next(rs); + break; + default: + throw new AssertionError("Unknown domain"); + } + return FoundKnownMap.create(keysOrRanges, saveStatus, invalidIfNot, promised); + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index fdd20b073933..4740ee6dfddb 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -116,11 +116,24 @@ public static Gen keys(Gen tableIdGen, Gen } public static Gen routingKeyGen(Gen tableIdGen, Gen tokenGen) + { + return routingKeyGen(tableIdGen, Gens.enums().all(AccordRoutingKey.RoutingKeyKind.class), tokenGen); + } + + public static Gen routingKeyGen(Gen tableIdGen, Gen kindGen, Gen tokenGen) { return rs -> { TableId tableId = tableIdGen.next(rs); - if (rs.nextBoolean()) return new AccordRoutingKey.TokenKey(tableId, tokenGen.next(rs)); - else return rs.nextBoolean() ? AccordRoutingKey.SentinelKey.min(tableId) : AccordRoutingKey.SentinelKey.max(tableId); + AccordRoutingKey.RoutingKeyKind kind = kindGen.next(rs); + switch (kind) + { + case TOKEN: + return new AccordRoutingKey.TokenKey(tableId, tokenGen.next(rs)); + case SENTINEL: + return rs.nextBoolean() ? AccordRoutingKey.SentinelKey.min(tableId) : AccordRoutingKey.SentinelKey.max(tableId); + default: + throw new AssertionError("Unknown kind: " + kind); + } }; } From 633bbee1a698b5666b459f7499fd100bd2ea3657 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 4 Jan 2024 14:45:27 -0800 Subject: [PATCH 090/340] (Accord): Bug fixes from CASSANDRA-18675 to better support adding keyspaces patch by David Capwell; reviewed by Benedict Elliott Smith, Blake Eggleston for CASSANDRA-18804 --- modules/accord | 2 +- .../apache/cassandra/service/accord/AccordService.java | 7 +++++++ .../cassandra/service/accord/IAccordService.java | 9 --------- .../distributed/test/accord/AccordTestBase.java | 10 +++++++++- .../simulator/test/AccordJournalSimulationTest.java | 5 +++-- .../service/accord/AccordMessageSinkTest.java | 5 +++-- 6 files changed, 23 insertions(+), 15 deletions(-) diff --git a/modules/accord b/modules/accord index 0d8f60f742d4..901a0868cdaf 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 0d8f60f742d443365a50115397ff1f0ab10fc694 +Subproject commit 901a0868cdaf6426226e6bafb0675773e04668bd diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 93f5422f8277..6cd1b68642d5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -28,6 +28,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; + +import accord.coordinate.TopologyMismatch; +import org.apache.cassandra.cql3.statements.RequestValidations; import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -469,6 +472,10 @@ public TopologyManager topology() // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match throw newPreempted(txnId, txn, consistencyLevel); } + if (cause instanceof TopologyMismatch) + { + throw RequestValidations.invalidRequest(cause.getMessage()); + } metrics.failures.mark(); throw new RuntimeException(cause); } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 2f0d7afc71ac..9422df491638 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -45,8 +45,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.schema.TableId; @@ -156,13 +154,6 @@ default void maybeConvertTablesToAccord(Txn txn) void ensureTableIsAccordManaged(TableId tableId); - default void ensureTableIsAccordManaged(String keyspace, String table) - { - // TODO: remove when accord enabled is handled via schema - TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); - ensureTableIsAccordManaged(metadata.id); - } - default void ensureKeyspaceIsAccordManaged(String keyspace) { // TODO: remove when accord enabled is handled via schema diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 9cb218dab30a..f5937f643bd7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -33,6 +33,8 @@ import accord.coordinate.Invalidated; import com.google.common.base.Splitter; import com.google.common.primitives.Ints; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -134,7 +136,13 @@ protected void test(String tableDDL, FailingConsumer fn) throws Excepti public static void ensureTableIsAccordManaged(Cluster cluster, String ksname, String tableName) { - cluster.get(1).runOnInstance(() -> AccordService.instance().ensureTableIsAccordManaged(ksname, tableName)); + cluster.get(1).runOnInstance(() -> { + // TODO: remove when accord enabled is handled via schema + TableMetadata metadata = Schema.instance.getTableMetadata(ksname, tableName); + if (metadata == null) + return; // bad plumbing from shared utils.... + AccordService.instance().ensureTableIsAccordManaged(metadata.id); + }); } protected void test(List ddls, FailingConsumer fn) throws Exception diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index f4739bca6def..4201a13f7d20 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -24,6 +24,8 @@ import javax.annotation.Nullable; import com.google.common.collect.ImmutableMap; + +import accord.topology.TopologyUtils; import org.apache.cassandra.schema.*; import org.junit.Ignore; import org.junit.Test; @@ -35,7 +37,6 @@ import accord.api.RoutingKey; import accord.api.Update; import accord.api.Write; -import accord.impl.TopologyUtils; import accord.local.Node; import accord.messages.PreAccept; import accord.messages.TxnRequest; @@ -208,7 +209,7 @@ private static TxnRequest toRequest(int event) { TxnId id = toTxnId(event); Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(tableId), AccordRoutingKey.SentinelKey.max(tableId))); - Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] {node}, ranges, 3)); + Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] { node}, ranges, 3)); Keys keys = Keys.of(toKey(0)); Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys, null), TxnQuery.ALL, new NoopUpdate()); FullRoute route = route(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index a35050b017fd..93150d105489 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -20,6 +20,8 @@ import org.junit.BeforeClass; import org.junit.Test; + +import accord.topology.TopologyUtils; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -27,7 +29,6 @@ import accord.api.Agent; import accord.impl.AbstractFetchCoordinator; import accord.impl.IntKey; -import accord.impl.TopologyUtils; import accord.local.Node; import accord.messages.InformOfTxnId; import accord.messages.MessageType; @@ -56,7 +57,7 @@ public class AccordMessageSinkTest { private static final Node.Id node = new Node.Id(1); private static final AccordEndpointMapper mapping = SimpleAccordEndpointMapper.INSTANCE; - private static final Topology topology = TopologyUtils.initialTopology(new Node.Id[] {node}, Ranges.of(IntKey.range(0, 100)), 1); + private static final Topology topology = TopologyUtils.initialTopology(new Node.Id[] { node}, Ranges.of(IntKey.range(0, 100)), 1); private static final Topologies topologies = new Topologies.Single((a, b, ignore) -> 0, topology); private static final MessageDelivery messaging = Mockito.mock(MessageDelivery.class); From 20af77031ad594434aff97bf6b0798ebfd5fc59b Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Fri, 17 Nov 2023 15:38:25 +0000 Subject: [PATCH 091/340] Pre-requisite changes for CASSANDRA-18888 patch by Aleksey Yeschenko; reviewed by Ariel Weisberg for CASSANDRA-18888 --- modules/accord | 2 +- .../cassandra/journal/ActiveSegment.java | 132 +--- .../cassandra/journal/AsyncCallbacks.java | 45 ++ .../apache/cassandra/journal/Descriptor.java | 2 +- .../cassandra/journal/EntrySerializer.java | 6 +- .../org/apache/cassandra/journal/Flusher.java | 77 +- .../org/apache/cassandra/journal/Journal.java | 153 ++-- .../org/apache/cassandra/journal/Segment.java | 8 +- .../apache/cassandra/journal/Segments.java | 210 ++--- .../cassandra/journal/StaticSegment.java | 36 +- .../org/apache/cassandra/net/Message.java | 21 +- .../ResponseContext.java} | 18 +- .../service/accord/AccordCommandStore.java | 8 +- .../service/accord/AccordJournal.java | 732 ++++++++++++++++-- .../service/accord/AccordMessageSink.java | 52 +- .../service/accord/AccordService.java | 43 +- .../service/accord/AccordVerbHandler.java | 50 +- .../serializers/ReadDataSerializers.java | 4 +- .../test/AccordJournalSimulationTest.java | 40 +- .../apache/cassandra/journal/JournalTest.java | 12 +- .../apache/cassandra/journal/SegmentTest.java | 8 +- .../service/accord/AccordTestUtils.java | 4 +- 22 files changed, 1123 insertions(+), 540 deletions(-) create mode 100644 src/java/org/apache/cassandra/journal/AsyncCallbacks.java rename src/java/org/apache/cassandra/{journal/AsyncWriteCallback.java => net/ResponseContext.java} (73%) diff --git a/modules/accord b/modules/accord index 901a0868cdaf..c524b6d3de39 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 901a0868cdaf6426226e6bafb0675773e04668bd +Subproject commit c524b6d3de3923ccb6314715bd987f3b891348ab diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index 0f4d0dc09c66..22a3aba766bd 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -23,20 +23,17 @@ import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; import java.util.*; -import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.LockSupport; import com.codahale.metrics.Timer; -import org.apache.cassandra.concurrent.ExecutionFailure; -import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.io.util.*; import org.apache.cassandra.utils.*; import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.concurrent.Ref; import org.apache.cassandra.utils.concurrent.WaitQueue; -final class ActiveSegment extends Segment +final class ActiveSegment extends Segment { final FileChannel channel; @@ -61,7 +58,7 @@ final class ActiveSegment extends Segment // a signal that writers can wait on to be notified of a completed flush in BATCH and GROUP FlushMode private final WaitQueue flushComplete = WaitQueue.newWaitQueue(); - private final Ref> selfRef; + private final Ref> selfRef; final InMemoryIndex index; @@ -84,7 +81,7 @@ private ActiveSegment( } @SuppressWarnings("resource") - static ActiveSegment create(Descriptor descriptor, Params params, KeySupport keySupport) + static ActiveSegment create(Descriptor descriptor, Params params, KeySupport keySupport) { SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor, true); InMemoryIndex index = InMemoryIndex.create(keySupport); @@ -98,6 +95,24 @@ InMemoryIndex index() return index; } + @Override + boolean isActive() + { + return true; + } + + @Override + ActiveSegment asActive() + { + return this; + } + + @Override + StaticSegment asStatic() + { + throw new UnsupportedOperationException(); + } + /** * Read the entry and specified offset into the entry holder. * Expects the caller to acquire the ref to the segment and the record to exist. @@ -105,7 +120,7 @@ InMemoryIndex index() @Override boolean read(int offset, EntrySerializer.EntryHolder into) { - ByteBuffer duplicate = (ByteBuffer) buffer.duplicate().position(offset).limit(buffer.capacity()); + ByteBuffer duplicate = buffer.duplicate().position(offset).limit(buffer.capacity()); try { EntrySerializer.read(into, keySupport, duplicate, descriptor.userVersion); @@ -180,13 +195,13 @@ void release() } @Override - public Ref> tryRef() + public Ref> tryRef() { return selfRef.tryRef(); } @Override - public Ref> ref() + public Ref> ref() { return selfRef.ref(); } @@ -285,7 +300,7 @@ private void flushInternal() } } - boolean isFullyFlushed(int syncedOffset) + boolean isCompletedAndFullyFlushed(int syncedOffset) { return syncedOffset >= endOfBuffer; } @@ -340,7 +355,7 @@ Allocation allocate(int entrySize, Set hosts) opGroup.close(); return null; } - return new Allocation(opGroup, (ByteBuffer) buffer.duplicate().position(position).limit(position + totalSize)); + return new Allocation(opGroup, buffer.duplicate().position(position).limit(position + totalSize)); } catch (Throwable t) { @@ -378,12 +393,14 @@ final class Allocation private final OpOrder.Group appendOp; private final ByteBuffer buffer; private final int position; + private final int size; Allocation(OpOrder.Group appendOp, ByteBuffer buffer) { this.appendOp = appendOp; this.buffer = buffer; this.position = buffer.position(); + this.size = buffer.remaining(); } void write(K id, ByteBuffer record, Set hosts) @@ -404,19 +421,14 @@ void write(K id, ByteBuffer record, Set hosts) } } - void asyncWrite(K id, ByteBuffer record, Set hosts, Executor executor, AsyncWriteCallback callback) + void asyncWrite(K id, V record, ByteBuffer bytes, Set hosts, Object writeContext, AsyncCallbacks callbacks) throws IOException { try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) { - int entrySize = totalEntrySize(hosts, record.remaining()); - EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); + EntrySerializer.write(id, bytes, hosts, keySupport, out, descriptor.userVersion); index.update(id, position); metadata.update(hosts); - writeCallbacksExternal.offer(new QueuedWriteCallback(position + entrySize, executor, callback)); - } - catch (Throwable t) - { - executor.execute(() -> callback.onFailure(t)); + callbacks.onWrite(descriptor.timestamp, position, size, id, record, writeContext); } finally { @@ -432,86 +444,4 @@ void awaitFlush(Timer waitingOnFlush) } } } - - // (external) MPSC queue for async write (flush) callbacks, to be executed in *write position order* - private final ManyToOneConcurrentLinkedQueue writeCallbacksExternal = - new ManyToOneConcurrentLinkedQueue<>(); - // (internal) single writer / single reader list of callbacks used to drain the callbacks into for sorting - private final ArrayList writeCallbacksInternal = - new ArrayList<>(); - - static final class QueuedWriteCallback implements Comparable - { - final long recordLimit; - final Executor executor; - final AsyncWriteCallback callback; - - QueuedWriteCallback(long recordLimit, Executor executor, AsyncWriteCallback callback) - { - this.recordLimit = recordLimit; - this.executor = executor; - this.callback = callback; - } - - @Override - public int compareTo(QueuedWriteCallback other) - { - // sort more recent callbacks first to simplify callback execution order later - return -Long.compare(this.recordLimit, other.recordLimit); - } - - void scheduleOnSuccess() - { - try - { - executor.execute(callback); - } - catch (Throwable t) - { - ExecutionFailure.handle(t); - } - } - - void scheduleOnFailure(Throwable error) - { - try - { - executor.execute(() -> callback.onFailure(error)); - } - catch (Throwable t) - { - ExecutionFailure.handle(t); - } - } - } - - void scheduleOnSuccessCallbacks(long syncedOffset) - { - // sort and execute callbacks in write position order, up until the furtherst synced offset - writeCallbacksExternal.drain(writeCallbacksInternal::add); - writeCallbacksInternal.sort(null); - - for (int i = writeCallbacksInternal.size() - 1; i >= 0; i--) - { - QueuedWriteCallback callback = writeCallbacksInternal.get(i); - if (callback.recordLimit > syncedOffset) - break; - callback.scheduleOnSuccess(); - writeCallbacksInternal.remove(i); - } - } - - void scheduleOnFailureCallbacks(Throwable t) - { - writeCallbacksExternal.drain(writeCallbacksInternal::add); - writeCallbacksInternal.sort(null); - - for (int i = writeCallbacksInternal.size() - 1; i >= 0; i--) - { - QueuedWriteCallback callback = writeCallbacksInternal.get(i); - callback.scheduleOnFailure(t); - } - - writeCallbacksInternal.clear(); - } } diff --git a/src/java/org/apache/cassandra/journal/AsyncCallbacks.java b/src/java/org/apache/cassandra/journal/AsyncCallbacks.java new file mode 100644 index 000000000000..0fb1af39c604 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/AsyncCallbacks.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +public interface AsyncCallbacks +{ + /** + * Invoked once an entry has been written to the file, and indexes have been updated, but before it + * has been flushed to disk. Invoked from the writer thread. Execution order of onWrite() callbacks + * with regard to each other is undefined. + */ + void onWrite(long segment, int position, int size, K key, V value, Object writeContext); + + /** + * Invoked when anything goes wrong with writing the entry - anywhere from serialization to writing to the file, + * to requesting the flush. + */ + void onWriteFailed(K key, V value, Object writeContext, Throwable cause); + + /** + * Invoked after {@link Flusher} successfully flushes a segment or multiple segments to disk. + * Invocation of this callback implies that any segments older than {@code segment} have been + * completed and also flushed. + * Invocation of this callback also implies that all {@link #onWrite(long, int, int, Object, Object, Object)} + * callbacks for all entries earlier than (segment, position) have finished execution. + */ + void onFlush(long segment, int position); + + void onFlushFailed(Throwable cause); +} diff --git a/src/java/org/apache/cassandra/journal/Descriptor.java b/src/java/org/apache/cassandra/journal/Descriptor.java index ea7ce7a1c2af..176a12e10917 100644 --- a/src/java/org/apache/cassandra/journal/Descriptor.java +++ b/src/java/org/apache/cassandra/journal/Descriptor.java @@ -35,7 +35,7 @@ * log-1637159888484-2-1-1.meta * log-1637159888484-2-1-1.sync */ -final class Descriptor implements Comparable +public final class Descriptor implements Comparable { private static final String SEPARATOR = "-"; private static final String PREFIX = "log" + SEPARATOR; diff --git a/src/java/org/apache/cassandra/journal/EntrySerializer.java b/src/java/org/apache/cassandra/journal/EntrySerializer.java index ab1d02649967..a2a61cfce371 100644 --- a/src/java/org/apache/cassandra/journal/EntrySerializer.java +++ b/src/java/org/apache/cassandra/journal/EntrySerializer.java @@ -179,9 +179,9 @@ static boolean tryRead(EntryHolder into, throw new AssertionError(); // can't happen } - into.value = (ByteBuffer) buffer.duplicate() - .position(buffer.position() - recordSize) - .limit(buffer.position()); + into.value = buffer.duplicate() + .position(buffer.position() - recordSize) + .limit(buffer.position()); in.skipBytesFully(TypeSizes.INT_SIZE); return true; diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index ebad946f85ac..436abc5d0e97 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -32,6 +32,7 @@ import org.apache.cassandra.utils.concurrent.WaitQueue; import static java.lang.String.format; +import static java.util.Comparator.comparing; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; @@ -53,6 +54,7 @@ final class Flusher private final Journal journal; private final Params params; + private final AsyncCallbacks callbacks; private volatile Interruptible flushExecutor; @@ -71,13 +73,14 @@ final class Flusher private final Semaphore haveWork = newSemaphore(1); private volatile boolean flushRequested; - private final FlushMethod syncFlushMethod; - private final FlushMethod asyncFlushMethod; + private final FlushMethod syncFlushMethod; + private final FlushMethod asyncFlushMethod; Flusher(Journal journal) { this.journal = journal; this.params = journal.params; + this.callbacks = journal.callbacks; this.syncFlushMethod = syncFlushMethod(params); this.asyncFlushMethod = asyncFlushMethod(params); } @@ -98,7 +101,7 @@ private class FlushRunnable implements Interruptible.Task private final MonotonicClock clock; private final NoSpamLogger noSpamLogger; - private final ArrayList> segmentsToFlush = new ArrayList<>(); + private final ArrayList> segmentsToFlush = new ArrayList<>(); FlushRunnable(MonotonicClock clock) { @@ -156,24 +159,31 @@ public void doRun(Interruptible.State state) throws InterruptedException private void doFlush() { journal.selectSegmentToFlush(segmentsToFlush); - // only schedule onSuccess callbacks for a segment if the preceding segments - // have been fully flushed, to preserve 1:1 mapping between record's position - // in the journal and onSuccess callback scheduling order - boolean scheduleOnSuccessCallbacks = true; + segmentsToFlush.sort(comparing(s -> s.descriptor)); + try { - for (ActiveSegment segment : segmentsToFlush) + long syncedSegment = -1; + int syncedOffset = -1; + + for (ActiveSegment segment : segmentsToFlush) { - try - { - scheduleOnSuccessCallbacks = doFlush(segment, scheduleOnSuccessCallbacks) && scheduleOnSuccessCallbacks; - } - catch (Throwable t) - { - segmentsToFlush.forEach(s -> s.scheduleOnFailureCallbacks(t)); - throw t; - } + syncedSegment = segment.descriptor.timestamp; + syncedOffset = segment.flush(); + + // if an older segment isn't fully complete + flushed yet, don't attempt to flush any younger ones + if (!segment.isCompletedAndFullyFlushed(syncedOffset)) + break; } + + // invoke the onFlush() callback once, covering entire flushed range across all flushed segments + if (syncedSegment != -1 && syncedOffset != -1) + callbacks.onFlush(syncedSegment, syncedOffset); + } + catch (Throwable t) + { + callbacks.onFlushFailed(t); + throw t; } finally { @@ -181,15 +191,6 @@ private void doFlush() } } - // flush the segment, schedule write callbacks if requested, return whether the segment has been flushed fully - private boolean doFlush(ActiveSegment segment, boolean scheduleCallbacks) - { - int syncedOffset = segment.flush(); - if (scheduleCallbacks) - segment.scheduleOnSuccessCallbacks(syncedOffset); - return segment.isFullyFlushed(syncedOffset); - } - private long firstLaggedAt = Long.MIN_VALUE; // first lag ever or since last logged warning private int flushCount = 0; // flush count since firstLaggedAt private int lagCount = 0; // lag count since firstLaggedAt @@ -232,12 +233,12 @@ private void processFlushDuration(long startedFlushAt, long finishedFlushAt) } @FunctionalInterface - private interface FlushMethod + private interface FlushMethod { - void flush(ActiveSegment.Allocation allocation); + void flush(ActiveSegment.Allocation allocation); } - private FlushMethod syncFlushMethod(Params params) + private FlushMethod syncFlushMethod(Params params) { switch (params.flushMode()) { @@ -248,7 +249,7 @@ private FlushMethod syncFlushMethod(Params params) } } - private FlushMethod asyncFlushMethod(Params params) + private FlushMethod asyncFlushMethod(Params params) { switch (params.flushMode()) { @@ -259,17 +260,17 @@ private FlushMethod asyncFlushMethod(Params params) } } - void waitForFlush(ActiveSegment.Allocation alloc) + void waitForFlush(ActiveSegment.Allocation alloc) { syncFlushMethod.flush(alloc); } - void asyncFlush(ActiveSegment.Allocation alloc) + void asyncFlush(ActiveSegment.Allocation alloc) { asyncFlushMethod.flush(alloc); } - private void waitForFlushBatch(ActiveSegment.Allocation alloc) + private void waitForFlushBatch(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); requestExtraFlush(); @@ -278,7 +279,7 @@ private void waitForFlushBatch(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void asyncFlushBatch(ActiveSegment.Allocation alloc) + private void asyncFlushBatch(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); requestExtraFlush(); @@ -287,7 +288,7 @@ private void asyncFlushBatch(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void waitForFlushGroup(ActiveSegment.Allocation alloc) + private void waitForFlushGroup(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); alloc.awaitFlush(journal.metrics.waitingOnFlush); @@ -295,7 +296,7 @@ private void waitForFlushGroup(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void asyncFlushGroup(ActiveSegment.Allocation alloc) + private void asyncFlushGroup(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO (expected): collect async flush metrics @@ -303,7 +304,7 @@ private void asyncFlushGroup(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) + private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) { long expectedFlushTime = nanoTime() - periodicFlushLagBlockNanos(); if (lastFlushedAt < expectedFlushTime) @@ -315,7 +316,7 @@ private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void asyncFlushPeriodic(ActiveSegment.Allocation ignore) + private void asyncFlushPeriodic(ActiveSegment.Allocation ignore) { pending.incrementAndGet(); // awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); // TODO (expected): collect async flush metrics diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 2deb10b9336e..bb1ada27f7d8 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.List; import java.util.Set; -import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -33,8 +32,6 @@ import java.util.function.Predicate; import java.util.zip.CRC32; -import javax.annotation.Nonnull; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,6 +45,7 @@ import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.PathUtils; +import org.apache.cassandra.journal.Segments.ReferencedSegment; import org.apache.cassandra.journal.Segments.ReferencedSegments; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Crc; @@ -86,6 +84,7 @@ public class Journal implements Shutdownable final String name; final File directory; final Params params; + final AsyncCallbacks callbacks; final KeySupport keySupport; final ValueSerializer valueSerializer; @@ -98,12 +97,12 @@ public class Journal implements Shutdownable volatile long replayLimit; final AtomicLong nextSegmentId = new AtomicLong(); - private volatile ActiveSegment currentSegment = null; + private volatile ActiveSegment currentSegment = null; // segment that is ready to be used; allocator thread fills this and blocks until consumed - private volatile ActiveSegment availableSegment = null; + private volatile ActiveSegment availableSegment = null; - private final AtomicReference> segments = new AtomicReference<>(); + private final AtomicReference> segments = new AtomicReference<>(); Interruptible allocator; private final WaitQueue segmentPrepared = newWaitQueue(); @@ -116,12 +115,14 @@ public class Journal implements Shutdownable public Journal(String name, File directory, Params params, + AsyncCallbacks callbacks, KeySupport keySupport, ValueSerializer valueSerializer) { this.name = name; this.directory = directory; this.params = params; + this.callbacks = callbacks; this.keySupport = keySupport; this.valueSerializer = valueSerializer; @@ -146,7 +147,7 @@ public void start() : descriptors.get(descriptors.size() - 1).timestamp; nextSegmentId.set(replayLimit = Math.max(currentTimeMillis(), maxTimestamp + 1)); - segments.set(Segments.ofStatic(StaticSegment.open(descriptors, keySupport))); + segments.set(Segments.of(StaticSegment.open(descriptors, keySupport))); closer = executorFactory().sequential(name + "-closer"); allocator = executorFactory().infiniteLoop(name + "-allocator", new AllocateRunnable(), SAFE, NON_DAEMON, SYNCHRONIZED); advanceSegment(null); @@ -194,6 +195,34 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted return false; } + /** + * Read an entry by its address (segment timestamp + offest) + * + * @return deserialized record if present, null otherwise + */ + public V read(long segmentTimestamp, int offset) + { + try (ReferencedSegment referenced = selectAndReference(segmentTimestamp)) + { + Segment segment = referenced.segment(); + if (null == segment) + return null; + + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + segment.read(offset, holder); + + try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + { + return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + } + } + /** * Looks up a record by the provided id. *

    @@ -210,9 +239,9 @@ public V readFirst(K id) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - try (ReferencedSegments segments = selectAndReference(id)) + try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.all()) + for (Segment segment : segments.all()) { if (segment.readFirst(id, holder)) { @@ -248,9 +277,9 @@ public V readFirstMatching(K id, Predicate condition) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - try (ReferencedSegments segments = selectAndReference(id)) + try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.all()) + for (Segment segment : segments.all()) { int[] offsets = segment.index().lookUp(id); for (int offset : offsets) @@ -291,9 +320,9 @@ public V readFirstMatching(K id, Predicate condition) */ public boolean readFirst(K id, RecordConsumer consumer) { - try (ReferencedSegments segments = selectAndReference(id)) + try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.all()) + for (Segment segment : segments.all()) if (segment.readFirst(id, consumer)) return true; } @@ -308,9 +337,9 @@ public boolean readFirst(K id, RecordConsumer consumer) public Set test(Set test) { Set present = new ObjectHashSet<>(test.size() + 1, 0.9f); - try (ReferencedSegments segments = selectAndReference(test)) + try (ReferencedSegments segments = selectAndReference(test)) { - for (Segment segment : segments.all()) + for (Segment segment : segments.all()) { for (K id : test) { @@ -340,7 +369,7 @@ public void write(K id, V record, Set hosts) try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { valueSerializer.serialize(id, record, dob, params.userVersion()); - ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); + ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); flusher.waitForFlush(alloc); } @@ -360,30 +389,27 @@ public void write(K id, V record, Set hosts) * @param id user-provided record id, expected to roughly correlate with time and go up * @param record the record to store * @param hosts hosts expected to invalidate the record - * @param executor executor to run the callback on - * @param callback the callback to run on */ - public void asyncWrite(K id, V record, Set hosts, @Nonnull Executor executor, @Nonnull AsyncWriteCallback callback) + public void asyncWrite(K id, V record, Set hosts, Object writeContext) { try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { valueSerializer.serialize(id, record, dob, params.userVersion()); - ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); - alloc.asyncWrite(id, dob.unsafeGetBufferAndFlip(), hosts, executor, callback); + ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); + alloc.asyncWrite(id, record, dob.unsafeGetBufferAndFlip(), hosts, writeContext, callbacks); flusher.asyncFlush(alloc); } - catch (IOException e) + catch (Throwable e) { - // exception during record serialization into the scratch buffer - executor.execute(() -> callback.onFailure(e)); + callbacks.onWriteFailed(id, record, writeContext, e); } } - private ActiveSegment.Allocation allocate(int entrySize, Set hosts) + private ActiveSegment.Allocation allocate(int entrySize, Set hosts) { - ActiveSegment segment = currentSegment; + ActiveSegment segment = currentSegment; - ActiveSegment.Allocation alloc; + ActiveSegment.Allocation alloc; while (null == (alloc = segment.allocate(entrySize, hosts))) { // failed to allocate; move to a new segment with enough room @@ -397,7 +423,7 @@ private ActiveSegment.Allocation allocate(int entrySize, Set hosts) * Segment allocation logic. */ - private void advanceSegment(ActiveSegment oldSegment) + private void advanceSegment(ActiveSegment oldSegment) { while (true) { @@ -431,7 +457,7 @@ private void advanceSegment(ActiveSegment oldSegment) flusher.requestExtraFlush(); } - private void awaitAvailableSegment(ActiveSegment currentActiveSegment) + private void awaitAvailableSegment(ActiveSegment currentActiveSegment) { do { @@ -451,7 +477,7 @@ private void wakeAllocator() private void discardAvailableSegment() { - ActiveSegment next; + ActiveSegment next; synchronized (this) { next = availableSegment; @@ -540,7 +566,7 @@ private void shutDown() throws InterruptedException } } - private ActiveSegment createSegment() + private ActiveSegment createSegment() { Descriptor descriptor = Descriptor.create(directory, nextSegmentId.getAndIncrement(), params.userVersion()); return ActiveSegment.create(descriptor, params, keySupport); @@ -548,12 +574,15 @@ private ActiveSegment createSegment() private void closeAllSegments() { - Segments segments = swapSegments(ignore -> Segments.none()); + Segments segments = swapSegments(ignore -> Segments.none()); - for (ActiveSegment segment : segments.onlyActive()) - segment.closeAndIfEmptyDiscard(); - for (StaticSegment segment : segments.onlyStatic()) - segment.close(); + for (Segment segment : segments.all()) + { + if (segment.isActive()) + ((ActiveSegment) segment).closeAndIfEmptyDiscard(); + else + segment.close(); + } } /** @@ -562,29 +591,39 @@ private void closeAllSegments() * * @return a subset of segments with references to them */ - ReferencedSegments selectAndReference(Iterable ids) + ReferencedSegments selectAndReference(Iterable ids) { while (true) { - ReferencedSegments referenced = segments().selectAndReference(ids); + ReferencedSegments referenced = segments().selectAndReference(ids); if (null != referenced) return referenced; } } - ReferencedSegments selectAndReference(K id) + ReferencedSegments selectAndReference(K id) { return selectAndReference(Collections.singleton(id)); } - private Segments segments() + ReferencedSegment selectAndReference(long segmentTimestamp) + { + while (true) + { + ReferencedSegment referenced = segments().selectAndReference(segmentTimestamp); + if (null != referenced) + return referenced; + } + } + + private Segments segments() { return segments.get(); } - private Segments swapSegments(Function, Segments> transformation) + private Segments swapSegments(Function, Segments> transformation) { - Segments currentSegments, newSegments; + Segments currentSegments, newSegments; do { currentSegments = segments(); @@ -594,30 +633,24 @@ private Segments swapSegments(Function, Segments> transformati return currentSegments; } - private void addNewActiveSegment(ActiveSegment activeSegment) + private void addNewActiveSegment(ActiveSegment activeSegment) { swapSegments(current -> current.withNewActiveSegment(activeSegment)); } - private void replaceCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) + private void replaceCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) { swapSegments(current -> current.withCompletedSegment(activeSegment, staticSegment)); } - private void replaceCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + private void replaceCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) { swapSegments(current -> current.withCompactedSegment(oldSegment, newSegment)); } - void selectSegmentToFlush(Collection> into) + void selectSegmentToFlush(Collection> into) { - ActiveSegment current = currentSegment; - for (ActiveSegment segment : segments().onlyActive()) - { - // do not sync segments that became active after flush started - if (segment.descriptor.timestamp <= current.descriptor.timestamp) - into.add(segment); - } + segments().selectActive(currentSegment.descriptor.timestamp, into); } /** @@ -631,9 +664,9 @@ void selectSegmentToFlush(Collection> into) */ private class CloseActiveSegmentRunnable implements Runnable { - private final ActiveSegment activeSegment; + private final ActiveSegment activeSegment; - CloseActiveSegmentRunnable(ActiveSegment activeSegment) + CloseActiveSegmentRunnable(ActiveSegment activeSegment) { this.activeSegment = activeSegment; } @@ -649,7 +682,7 @@ public void run() } } - void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) + void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) { closer.execute(new CloseActiveSegmentRunnable(activeSegment)); } @@ -665,10 +698,10 @@ void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) */ public void replayStaticSegments(RecordConsumer consumer) { - List> staticSegments = new ArrayList<>(segments().onlyStatic()); - staticSegments.sort(comparing(segment -> segment.descriptor)); - - for (StaticSegment segment : staticSegments) + List> staticSegments = new ArrayList<>(); + segments().selectStatic(staticSegments); + staticSegments.sort(comparing(s -> s.descriptor)); + for (StaticSegment segment : staticSegments) segment.forEachRecord(consumer); } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index 6a5604b0d973..b9c060d9153e 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -24,7 +24,7 @@ import org.apache.cassandra.utils.*; import org.apache.cassandra.utils.concurrent.RefCounted; -abstract class Segment implements Closeable, RefCounted> +abstract class Segment implements Closeable, RefCounted> { final File file; final Descriptor descriptor; @@ -45,6 +45,12 @@ abstract class Segment implements Closeable, RefCounted> abstract Index index(); + abstract boolean isActive(); + boolean isStatic() { return !isActive(); } + + abstract ActiveSegment asActive(); + abstract StaticSegment asStatic(); + /* * Reading entries (by id, by offset, iterate) */ diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index 96256e623806..0693997ef34d 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -17,123 +17,92 @@ */ package org.apache.cassandra.journal; -import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; import accord.utils.Invariants; +import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.utils.concurrent.Ref; import org.apache.cassandra.utils.concurrent.Refs; -import static java.util.Collections.emptyList; -import static java.util.Collections.emptyMap; - /** * Consistent, immutable view of active + static segments *

    - * TODO: an interval/range structure for StaticSegment lookup based on min/max key bounds + * TODO (performance, expected): an interval/range structure for StaticSegment lookup based on min/max key bounds */ -class Segments +class Segments { - // active segments, containing unflushed data; the tail of this queue is the one we allocate writes from - private final List> activeSegments; - - // finalised segments, no longer written to - private final Map> staticSegments; + private final Long2ObjectHashMap> segments; - // cached Iterable of concatenated active and static segments - private final Iterable> allSegments; - - Segments(List> activeSegments, Map> staticSegments) + Segments(Long2ObjectHashMap> segments) { - this.activeSegments = activeSegments; - this.staticSegments = staticSegments; - this.allSegments = Iterables.concat(onlyActive(), onlyStatic()); + this.segments = segments; } - static Segments ofStatic(Collection> segments) + static Segments of(Collection> segments) { - HashMap> staticSegments = - Maps.newHashMapWithExpectedSize(segments.size()); - for (StaticSegment segment : segments) - staticSegments.put(segment.descriptor, segment); - return new Segments<>(new ArrayList<>(), staticSegments); + Long2ObjectHashMap> newSegments = newMap(segments.size()); + for (Segment segment : segments) + newSegments.put(segment.descriptor.timestamp, segment); + return new Segments<>(newSegments); } - static Segments none() + static Segments none() { - return new Segments<>(Collections.emptyList(), Collections.emptyMap()); + return new Segments<>(emptyMap()); } - Segments withNewActiveSegment(ActiveSegment activeSegment) + Segments withNewActiveSegment(ActiveSegment activeSegment) { - ArrayList> newActiveSegments = - new ArrayList<>(activeSegments.size() + 1); - newActiveSegments.addAll(activeSegments); - newActiveSegments.add(activeSegment); - return new Segments<>(newActiveSegments, staticSegments); + Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); + Segment oldValue = newSegments.put(activeSegment.descriptor.timestamp, activeSegment); + Invariants.checkState(oldValue == null); + return new Segments<>(newSegments); } - Segments withCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) + Segments withCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) { Invariants.checkArgument(activeSegment.descriptor.equals(staticSegment.descriptor)); - - ArrayList> newActiveSegments = - new ArrayList<>(activeSegments.size() - 1); - for (ActiveSegment segment : activeSegments) - if (segment != activeSegment) - newActiveSegments.add(segment); - Invariants.checkState(newActiveSegments.size() == activeSegments.size() - 1); - - HashMap> newStaticSegments = - Maps.newHashMapWithExpectedSize(staticSegments.size() + 1); - newStaticSegments.putAll(staticSegments); - if (newStaticSegments.put(staticSegment.descriptor, staticSegment) != null) - throw new IllegalStateException(); - - return new Segments<>(newActiveSegments, newStaticSegments); + Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); + Segment oldValue = newSegments.put(staticSegment.descriptor.timestamp, staticSegment); + Invariants.checkState(oldValue == activeSegment); + return new Segments<>(newSegments); } - Segments withCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + Segments withCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) { Invariants.checkArgument(oldSegment.descriptor.timestamp == newSegment.descriptor.timestamp); Invariants.checkArgument(oldSegment.descriptor.generation < newSegment.descriptor.generation); - - HashMap> newStaticSegments = new HashMap<>(staticSegments); - if (!newStaticSegments.remove(oldSegment.descriptor, oldSegment)) - throw new IllegalStateException(); - if (null != newStaticSegments.put(newSegment.descriptor, newSegment)) - throw new IllegalStateException(); - - return new Segments<>(activeSegments, newStaticSegments); + Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); + Segment oldValue = newSegments.put(newSegment.descriptor.timestamp, newSegment); + Invariants.checkState(oldValue == oldSegment); + return new Segments<>(newSegments); } - Segments withoutInvalidatedSegment(StaticSegment staticSegment) + Segments withoutInvalidatedSegment(StaticSegment staticSegment) { - HashMap> newStaticSegments = new HashMap<>(staticSegments); - if (!newStaticSegments.remove(staticSegment.descriptor, staticSegment)) + Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); + if (!newSegments.remove(staticSegment.descriptor.timestamp, staticSegment)) throw new IllegalStateException(); - return new Segments<>(activeSegments, newStaticSegments); + return new Segments<>(newSegments); } - Iterable> all() + Iterable> all() { - return allSegments; + return segments.values(); } - Collection> onlyActive() + void selectActive(long maxTimestamp, Collection> into) { - return activeSegments; + for (Segment segment : segments.values()) + if (segment.isActive() && segment.descriptor.timestamp <= maxTimestamp) + into.add(segment.asActive()); } - Collection> onlyStatic() + void selectStatic(Collection> into) { - return staticSegments.values(); + for (Segment segment : segments.values()) + if (segment.isStatic()) + into.add(segment.asStatic()); } /** @@ -143,50 +112,39 @@ Collection> onlyStatic() * @return a subset of segments with references to them, or {@code null} if failed to grab the refs */ @SuppressWarnings("resource") - ReferencedSegments selectAndReference(Iterable ids) + ReferencedSegments selectAndReference(Iterable ids) { - List> selectedActive = null; - for (ActiveSegment segment : onlyActive()) - { - if (segment.index.mayContainIds(ids)) - { - if (null == selectedActive) - selectedActive = new ArrayList<>(); - selectedActive.add(segment); - } - } - if (null == selectedActive) selectedActive = emptyList(); - - Map> selectedStatic = null; - for (StaticSegment segment : onlyStatic()) + Long2ObjectHashMap> selectedSegments = null; + for (Segment segment : segments.values()) { if (segment.index().mayContainIds(ids)) { - if (null == selectedStatic) - selectedStatic = new HashMap<>(); - selectedStatic.put(segment.descriptor, segment); + if (null == selectedSegments) + selectedSegments = newMap(10); + selectedSegments.put(segment.descriptor.timestamp, segment); } } - if (null == selectedStatic) selectedStatic = emptyMap(); - Refs> refs = null; - if (!selectedActive.isEmpty() || !selectedStatic.isEmpty()) + if (null == selectedSegments) + selectedSegments = emptyMap(); + + Refs> refs = null; + if (!selectedSegments.isEmpty()) { - refs = Refs.tryRef(Iterables.concat(selectedActive, selectedStatic.values())); + refs = Refs.tryRef(selectedSegments.values()); if (null == refs) return null; } - return new ReferencedSegments<>(selectedActive, selectedStatic, refs); + return new ReferencedSegments<>(selectedSegments, refs); } - static class ReferencedSegments extends Segments implements AutoCloseable + static class ReferencedSegments extends Segments implements AutoCloseable { - public final Refs> refs; + private final Refs> refs; - ReferencedSegments( - List> activeSegments, Map> staticSegments, Refs> refs) + ReferencedSegments(Long2ObjectHashMap> segments, Refs> refs) { - super(activeSegments, staticSegments); + super(segments); this.refs = refs; } @@ -197,4 +155,52 @@ public void close() refs.release(); } } + + ReferencedSegment selectAndReference(long segmentTimestamp) + { + Segment segment = segments.get(segmentTimestamp); + if (null == segment) + return new ReferencedSegment<>(null, null); + Ref> ref = segment.tryRef(); + if (null == ref) + return null; + return new ReferencedSegment<>(segment, ref); + } + + static class ReferencedSegment implements AutoCloseable + { + private final Segment segment; + private final Ref> ref; + + ReferencedSegment(Segment segment, Ref> ref) + { + this.segment = segment; + this.ref = ref; + } + + Segment segment() + { + return segment; + } + + @Override + public void close() + { + if (null != ref) + ref.release(); + } + } + + private static final Long2ObjectHashMap EMPTY_MAP = new Long2ObjectHashMap<>(); + + @SuppressWarnings("unchecked") + private static Long2ObjectHashMap emptyMap() + { + return (Long2ObjectHashMap) EMPTY_MAP; + } + + private static Long2ObjectHashMap newMap(int expectedSize) + { + return new Long2ObjectHashMap<>(0, 0.65f, false); + } } diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index 52b8d954e155..f3feaefa627b 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -38,11 +38,11 @@ * Can be compacted with input from {@code PersistedInvalidations} into a new smaller segment, * with invalidated entries removed. */ -final class StaticSegment extends Segment +final class StaticSegment extends Segment { final FileChannel channel; - private final Ref> selfRef; + private final Ref> selfRef; private final OnDiskIndex index; @@ -69,9 +69,9 @@ private StaticSegment(Descriptor descriptor, * @param descriptors descriptors of the segments to load * @return list of the loaded segments */ - static List> open(Collection descriptors, KeySupport keySupport) + static List> open(Collection descriptors, KeySupport keySupport) { - List> segments = new ArrayList<>(descriptors.size()); + List> segments = new ArrayList<>(descriptors.size()); for (Descriptor descriptor : descriptors) segments.add(open(descriptor, keySupport)); return segments; @@ -84,7 +84,7 @@ static List> open(Collection descriptors, KeySu * @return the loaded segment */ @SuppressWarnings({ "resource", "RedundantSuppression" }) - static StaticSegment open(Descriptor descriptor, KeySupport keySupport) + static StaticSegment open(Descriptor descriptor, KeySupport keySupport) { if (!Component.DATA.existsFor(descriptor)) throw new IllegalArgumentException("Data file for segment " + descriptor + " doesn't exist"); @@ -112,7 +112,7 @@ static StaticSegment open(Descriptor descriptor, KeySupport keySupport } @SuppressWarnings("resource") - private static StaticSegment internalOpen( + private static StaticSegment internalOpen( Descriptor descriptor, SyncedOffsets syncedOffsets, OnDiskIndex index, Metadata metadata, KeySupport keySupport) throws IOException { @@ -129,13 +129,13 @@ public void close() } @Override - public Ref> tryRef() + public Ref> tryRef() { return selfRef.tryRef(); } @Override - public Ref> ref() + public Ref> ref() { return selfRef.ref(); } @@ -176,6 +176,24 @@ OnDiskIndex index() return index; } + @Override + boolean isActive() + { + return false; + } + + @Override + ActiveSegment asActive() + { + throw new UnsupportedOperationException(); + } + + @Override + StaticSegment asStatic() + { + return this; + } + /** * Read the entry and specified offset into the entry holder. * Expects the record to have been written at this offset, but potentially not flushed and lost. @@ -183,7 +201,7 @@ OnDiskIndex index() @Override boolean read(int offset, EntrySerializer.EntryHolder into) { - ByteBuffer duplicate = (ByteBuffer) buffer.duplicate().position(offset); + ByteBuffer duplicate = buffer.duplicate().position(offset); try (DataInputBuffer in = new DataInputBuffer(duplicate, false)) { return EntrySerializer.tryRead(into, keySupport, duplicate, in, syncedOffsets.syncedOffset(), descriptor.userVersion); diff --git a/src/java/org/apache/cassandra/net/Message.java b/src/java/org/apache/cassandra/net/Message.java index 57359cc039e0..77edd1d0a247 100644 --- a/src/java/org/apache/cassandra/net/Message.java +++ b/src/java/org/apache/cassandra/net/Message.java @@ -34,7 +34,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.messages.ReplyContext; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; @@ -73,7 +72,7 @@ * * @param The type of the message payload. */ -public class Message implements ReplyContext +public class Message implements ResponseContext { private static final Logger logger = LoggerFactory.getLogger(Message.class); private static final NoSpamLogger noSpam1m = NoSpamLogger.getLogger(logger, 1, TimeUnit.MINUTES); @@ -93,6 +92,7 @@ public class Message implements ReplyContext } /** Sender of the message. */ + @Override public InetAddressAndPort from() { return header.from; @@ -108,6 +108,7 @@ public boolean isCrossNode() * id of the request/message. In 4.0+ can be shared between multiple messages of the same logical request, * whilst in versions above a new id would be allocated for each message sent. */ + @Override public long id() { return header.id; @@ -118,6 +119,7 @@ public Epoch epoch() return header.epoch; } + @Override public Verb verb() { return header.verb; @@ -137,6 +139,7 @@ public long createdAtNanos() return header.createdAtNanos; } + @Override public long expiresAtNanos() { return header.expiresAtNanos; @@ -342,12 +345,17 @@ public static Message forgeIdentityForTests(Message msg, InetAddressAn /** Builds a response Message with provided payload, and all the right fields inferred from request Message */ public Message responseWith(T payload) { - Message msg = outWithParam(id(), verb().responseVerb, expiresAtNanos(), payload, null, null); + Message msg = responseWith(payload, this); if (header.hasFlag(MessageFlag.URGENT)) msg = msg.withFlag(MessageFlag.URGENT); return msg; } + public static Message responseWith(T payload, ResponseContext respondTo) + { + return outWithParam(respondTo.id(), respondTo.verb().responseVerb, respondTo.expiresAtNanos(), payload, null, null); + } + /** Builds a response Message with no payload, and all the right fields inferred from request Message */ public Message emptyResponse() { @@ -362,7 +370,12 @@ public Message failureResponse(RequestFailureReason reason) public Message failureResponse(RequestFailureReason reason, @Nullable Throwable failure) { - return failureResponse(id(), expiresAtNanos(), new RequestFailure(reason, failure)); + return failureResponse(reason, failure, this); + } + + public static Message failureResponse(RequestFailureReason reason, @Nullable Throwable failure, ResponseContext respondTo) + { + return failureResponse(respondTo.id(), respondTo.expiresAtNanos(), new RequestFailure(reason, failure)); } static Message failureResponse(long id, long expiresAtNanos, RequestFailure reason) diff --git a/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java b/src/java/org/apache/cassandra/net/ResponseContext.java similarity index 73% rename from src/java/org/apache/cassandra/journal/AsyncWriteCallback.java rename to src/java/org/apache/cassandra/net/ResponseContext.java index 53932ec4ef25..4f254e3e6a45 100644 --- a/src/java/org/apache/cassandra/journal/AsyncWriteCallback.java +++ b/src/java/org/apache/cassandra/net/ResponseContext.java @@ -15,15 +15,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.cassandra.journal; +package org.apache.cassandra.net; -public interface AsyncWriteCallback extends Runnable -{ - AsyncWriteCallback NOOP = new AsyncWriteCallback() - { - @Override public void onFailure(Throwable error) {} - @Override public void run() {} - }; +import accord.messages.ReplyContext; +import org.apache.cassandra.locator.InetAddressAndPort; - void onFailure(Throwable error); +public interface ResponseContext extends ReplyContext +{ + long id(); + InetAddressAndPort from(); + Verb verb(); + long expiresAtNanos(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index e5f83ab6ff1f..c823a6d9258c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -202,7 +202,6 @@ public AccordCommandStore(int id, CommandsForKeyUpdate::estimatedSizeOnHeap, keyCoordinator::createUpdatesNode); -//>>>>>>> 701eeff2b4 (deps pruning integration) AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { if (rejectBefore != null) @@ -365,7 +364,7 @@ Command loadCommand(TxnId txnId) Runnable saveCommand(Command before, Command after) { Mutation mutation = AccordKeyspace.getCommandMutation(id, before, after, nextSystemTimestampMicros()); - return null != mutation ? mutation::apply : null; + return null != mutation ? mutation::applyUnsafe : null; } boolean validateCommand(TxnId txnId, Command evicting) @@ -378,7 +377,6 @@ boolean validateTimestampsForKey(RoutableKey key, TimestampsForKey evicting) { TimestampsForKey reloaded = AccordKeyspace.unsafeLoadTimestampsForKey(this, (PartitionKey) key); return Objects.equals(evicting, reloaded); - } TimestampsForKey loadTimestampsForKey(RoutableKey key) @@ -423,14 +421,14 @@ private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) private Runnable saveTimestampsForKey(TimestampsForKey before, TimestampsForKey after) { Mutation mutation = AccordKeyspace.getTimestampsForKeyMutation(id, before, after, nextSystemTimestampMicros()); - return null != mutation ? mutation::apply : null; + return null != mutation ? mutation::applyUnsafe : null; } @Nullable private Runnable saveCommandsForKeyUpdate(CommandsForKeyUpdate before, CommandsForKeyUpdate after) { Mutation mutation = AccordKeyspace.getCommandsForKeyMutation(id, after, nextSystemTimestampMicros()); - return null != mutation ? mutation::apply : null; + return null != mutation ? mutation::applyUnsafe : null; } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 609b3dbc54a9..ee1f79625aa9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -19,13 +19,14 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import java.util.function.Predicate; import java.util.zip.Checksum; @@ -36,41 +37,55 @@ import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimap; import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; + +import org.agrona.collections.Long2ObjectHashMap; +import org.agrona.collections.LongArrayList; +import org.agrona.collections.ObjectHashSet; +import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.local.Node.Id; +import accord.local.Node; import accord.local.SerializerSupport; import accord.messages.AbstractEpochRequest; import accord.messages.Accept; import accord.messages.Apply; import accord.messages.BeginRecovery; import accord.messages.Commit; -import accord.messages.LocalMessage; +import accord.messages.LocalRequest; import accord.messages.Message; import accord.messages.MessageType; import accord.messages.PreAccept; import accord.messages.Propagate; +import accord.messages.Request; import accord.messages.TxnRequest; import accord.primitives.Ballot; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; -import org.agrona.collections.ObjectHashSet; +import accord.utils.MapReduceConsume; +import org.apache.cassandra.concurrent.Interruptible; +import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; +import org.apache.cassandra.concurrent.SequentialExecutorPlus; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.journal.AsyncWriteCallback; +import org.apache.cassandra.journal.AsyncCallbacks; import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.Params; import org.apache.cassandra.journal.ValueSerializer; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.interop.AccordInteropApply; import org.apache.cassandra.service.accord.interop.AccordInteropCommit; +import org.apache.cassandra.net.ResponseContext; +import org.apache.cassandra.net.Verb; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; @@ -83,11 +98,14 @@ import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.concurrent.Semaphore; +import org.jctools.queues.SpscLinkedQueue; import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; import static accord.messages.MessageType.ACCEPT_REQ; import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; import static accord.messages.MessageType.APPLY_MINIMAL_REQ; +import static accord.messages.MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ; import static accord.messages.MessageType.BEGIN_INVALIDATE_REQ; import static accord.messages.MessageType.BEGIN_RECOVER_REQ; import static accord.messages.MessageType.COMMIT_INVALIDATE_REQ; @@ -102,6 +120,10 @@ import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; import static accord.messages.MessageType.SET_GLOBALLY_DURABLE_REQ; import static accord.messages.MessageType.SET_SHARD_DURABLE_REQ; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; @@ -109,6 +131,12 @@ import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; +import static org.apache.cassandra.service.accord.serializers.ReadDataSerializers.applyThenWaitUntilApplied; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializeList; +import static org.apache.cassandra.utils.CollectionSerializers.serializedListSize; +import static org.apache.cassandra.utils.concurrent.Semaphore.newSemaphore; +import static org.apache.cassandra.utils.vint.VIntCoding.computeUnsignedVIntSize; public class AccordJournal implements Shutdownable { @@ -164,23 +192,39 @@ public int userVersion() } }; - final File directory; - final Journal journal; + private final File directory; + private final Journal journal; + private final AccordEndpointMapper endpointMapper; + + /** + * A cache of deserialized journal records we keep to avoid fetching them from log when free memory allows it. + * TODO (expected, performance): cap memory used for cached records + */ + private final NonBlockingHashMap cachedRecords = new NonBlockingHashMap<>(); + + Node node; enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } private volatile Status status = Status.INITIALIZED; + private final FrameAggregator frameAggregator = new FrameAggregator(); + private final FrameApplicator frameApplicator = new FrameApplicator(); + @VisibleForTesting - public AccordJournal() + public AccordJournal(AccordEndpointMapper endpointMapper) { - directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); - journal = new Journal<>("AccordJournal", directory, PARAMS, Key.SUPPORT, RECORD_SERIALIZER); + this.directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); + this.journal = new Journal<>("AccordJournal", directory, PARAMS, new JournalCallbacks(), Key.SUPPORT, RECORD_SERIALIZER); + this.endpointMapper = endpointMapper; } - public AccordJournal start() + public AccordJournal start(Node node) { Invariants.checkState(status == Status.INITIALIZED); + this.node = node; status = Status.STARTING; + frameApplicator.start(); + frameAggregator.start(); journal.start(); status = Status.STARTED; return this; @@ -198,6 +242,8 @@ public void shutdown() Invariants.checkState(status == Status.STARTED); status = Status.TERMINATING; journal.shutdown(); + frameAggregator.shutdown(); + frameApplicator.shutdown(); status = Status.TERMINATED; } @@ -211,21 +257,37 @@ public Object shutdownNow() @Override public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - // TODO (expected) + // TODO (expected, other) return true; } - void appendAuxiliaryRecord(AuxiliaryRecord record) + /** + * Auxiliary records are journal entries that aren't Accord protocol requests - such as {@link FrameRecord}. + */ + void appendAuxiliaryRecord(AuxiliaryRecord record, Object context) { Key key = new Key(record.timestamp, record.type()); - journal.write(key, record, SENTINEL_HOSTS); + journal.asyncWrite(key, record, SENTINEL_HOSTS, context); } - public void appendMessage(Message message, Executor executor, AsyncWriteCallback callback) + /** + * Accord protocol messages originating from remote nodes. + */ + public void appendRemoteRequest(Request request, ResponseContext context) { - Type type = Type.fromMessageType(message.type()); - Key key = new Key(type.txnId(message), type); - journal.asyncWrite(key, message, SENTINEL_HOSTS, executor, callback); + Type type = Type.fromMessageType(request.type()); + Key key = new Key(type.txnId(request), type); + journal.asyncWrite(key, request, SENTINEL_HOSTS, context); + } + + /** + * Accord protocol messages originating from local node, e.g. Propagate. + */ + public void appendLocalRequest(LocalRequest request) + { + Type type = Type.fromMessageType(request.type()); + Key key = new Key(type.txnId(request), type); + journal.asyncWrite(key, request, SENTINEL_HOSTS, null); } @VisibleForTesting @@ -237,17 +299,267 @@ public void appendMessageBlocking(Message message) } @VisibleForTesting - public M readMessage(TxnId txnId, Type type, Class clazz) + public M readMessage(TxnId txnId, MessageType messageType, Class clazz) { - return clazz.cast(journal.readFirst(new Key(txnId, type))); + for (Type type : Type.synonymousTypesFromMessageType(messageType)) + { + M message = clazz.cast(journal.readFirst(new Key(txnId, type))); + if (null != message) return message; + } + return null; } - private M readMessage(TxnId txnId, Type type, Class clazz, Predicate condition) + private M readMessage(TxnId txnId, MessageType messageType, Class clazz, Predicate condition) { - return clazz.cast(journal.readFirstMatching(new Key(txnId, type), condition)); + for (Type type : Type.synonymousTypesFromMessageType(messageType)) + { + M message = clazz.cast(journal.readFirstMatching(new Key(txnId, type), condition)); + if (null != message) return message; + } + return null; } - static class Key + private static class Pointer implements Comparable + { + final long segment; // unique segment id + final int position; // record start position within the segment + + Pointer(long segment, int position) + { + this.segment = segment; + this.position = position; + } + + @Override + public boolean equals(Object other) + { + if (this == other) + return true; + if (!(other instanceof Pointer)) + return false; + Pointer that = (Pointer) other; + return this.segment == that.segment + && this.position == that.position; + } + + @Override + public int hashCode() + { + return Long.hashCode(segment) + position * 31; + } + + @Override + public String toString() + { + return "(" + segment + ", " + position + ')'; + } + + @Override + public int compareTo(Pointer that) + { + int cmp = Longs.compare(this.segment, that.segment); + return cmp != 0 ? cmp : Ints.compare(this.position, that.position); + } + + int serializedSize() + { + return computeUnsignedVIntSize(segment) + computeUnsignedVIntSize(position); + } + + void serialize(DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt(segment); + out.writeUnsignedVInt32(position); + } + + static Pointer deserialize(DataInputPlus in) throws IOException + { + long segment = in.readUnsignedVInt(); + int position = in.readUnsignedVInt32(); + return new Pointer(segment, position); + } + + static final IVersionedSerializer SERIALIZER = new IVersionedSerializer<>() + { + @Override + public void serialize(Pointer p, DataOutputPlus out, int version) throws IOException + { + p.serialize(out); + } + + @Override + public Pointer deserialize(DataInputPlus in, int version) throws IOException + { + return Pointer.deserialize(in); + } + + @Override + public long serializedSize(Pointer p, int version) + { + return Ints.checkedCast(p.serializedSize()); + } + }; + } + + private class JournalCallbacks implements AsyncCallbacks + { + /** + * Queue up the record for either frame aggregation (if a protocol message) or frame application (if a frame). + */ + @Override + public void onWrite(long segment, int position, int size, Key key, Object value, Object writeContext) + { + Pointer pointer = new Pointer(segment, position); + cachedRecords.put(pointer, value); + + /* + * if remote request, extract response context + * if local request, extract callback + * if frame, register for application on flush + */ + if (key.type.isRemoteRequest()) + frameAggregator.onWrite(RemoteRequestContext.create(((Request) value).waitForEpoch(), (ResponseContext) writeContext, pointer)); + else if (key.type.isLocalRequest()) + frameAggregator.onWrite(LocalRequestContext.create((LocalRequest) value, pointer)); + else + frameApplicator.onWrite(pointer, size, (FrameContext) writeContext); + } + + @Override + public void onWriteFailed(Key key, Object value, Object writeContext, Throwable cause) + { + if (key.type.isRemoteRequest()) + onRemoteRequestWriteFailed((Request) value, (RemoteRequestContext) writeContext, cause); + else if (key.type.isLocalRequest()) + onLocalRequestWriteFailed((LocalRequestContext) writeContext, cause); + else + onFrameWriteFailed((FrameRecord) value, (FrameContext) writeContext, cause); + } + + private void onRemoteRequestWriteFailed(Request request, RemoteRequestContext context, Throwable cause) + { + request.preProcess(node, endpointMapper.mappedId(context.from()), context); + + /* + * Except for Commit.Invalidate, which doesn't return a reply on success or failure, + * all requests here implement MapReduceLocal, with accept() handling both the success and the failure + * response returns. + */ + if (request instanceof MapReduceConsume) + ((MapReduceConsume) request).accept(null, cause); + else + node.agent().onUncaughtException(cause); + } + + private void onLocalRequestWriteFailed(LocalRequestContext context, Throwable cause) + { + context.callback.accept(null, cause); + } + + private void onFrameWriteFailed(FrameRecord frame, FrameContext context, Throwable cause) + { + // TODO: panic + } + + @Override + public void onFlush(long segment, int position) + { + frameApplicator.onFlush(segment, position); // will apply flushed frames in correct order in an executor + } + + @Override + public void onFlushFailed(Throwable cause) + { + // TODO: panic + } + } + + /* + * Context necessary to process log records + */ + + private static class RequestContext + { + final long waitForEpoch; + final Pointer pointer; + + RequestContext(long waitForEpoch, Pointer pointer) + { + this.waitForEpoch = waitForEpoch; + this.pointer = pointer; + } + } + + private static class LocalRequestContext extends RequestContext + { + private final BiConsumer callback; + + LocalRequestContext(long waitForEpoch, BiConsumer callback, Pointer pointer) + { + super(waitForEpoch, pointer); + this.callback = callback; + } + + static LocalRequestContext create(LocalRequest request, Pointer pointer) + { + return new LocalRequestContext(request.waitForEpoch(), request.callback(), pointer); + } + } + + /** + * Barebones response context not holding a reference to the entire message + */ + private static class RemoteRequestContext extends RequestContext implements ResponseContext + { + private final long id; + private final InetAddressAndPort from; + private final Verb verb; + private final long expiresAtNanos; + + RemoteRequestContext(long waitForEpoch, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos, Pointer pointer) + { + super(waitForEpoch, pointer); + this.id = id; + this.from = from; + this.verb = verb; + this.expiresAtNanos = expiresAtNanos; + } + + static RemoteRequestContext create(long waitForEpoch, ResponseContext context, Pointer pointer) + { + return new RemoteRequestContext(waitForEpoch, context.id(), context.from(), context.verb(), context.expiresAtNanos(), pointer); + } + + @Override + public long id() + { + return id; + } + + @Override + public InetAddressAndPort from() + { + return from; + } + + @Override + public Verb verb() + { + return verb; + } + + @Override + public long expiresAtNanos() + { + return expiresAtNanos; + } + } + + /* + * Records ser/de in the Journal + */ + + public static class Key { final Timestamp timestamp; final Type type; @@ -266,7 +578,7 @@ static class Key * when ordering timestamps. This is done for more precise elimination of candidate * segments by min/max record key in segment. */ - static final KeySupport SUPPORT = new KeySupport() + static final KeySupport SUPPORT = new KeySupport<>() { private static final int HLC_OFFSET = 0; private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; @@ -434,7 +746,7 @@ public String toString() } } - static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer() + private static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer<>() { @Override public int serializedSize(Key key, Object record, int userVersion) @@ -497,7 +809,7 @@ interface TxnIdProvider private static final TxnIdProvider EPOCH = msg -> ((AbstractEpochRequest) msg).txnId; private static final TxnIdProvider TXN = msg -> ((TxnRequest) msg).txnId; - private static final TxnIdProvider LOCAL = msg -> ((LocalMessage) msg).primaryTxnId(); + private static final TxnIdProvider LOCAL = msg -> ((LocalRequest) msg).primaryTxnId(); private static final TxnIdProvider INVL = msg -> ((Commit.Invalidate) msg).primaryTxnId(); /** @@ -511,50 +823,55 @@ interface TxnIdProvider public enum Type implements ValueSerializer { /* Auxiliary journal records */ - REPLAY (0, ReplayRecord.SERIALIZER), + FRAME (0, FrameRecord.SERIALIZER), /* Accord protocol requests */ - PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), - ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), - ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), - COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), - COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), - COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL ), - APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), - APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), - - INTEROP_COMMIT_MINIMAL (90, INTEROP_COMMIT_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_COMMIT_MAXIMAL (91, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_APPLY_MINIMAL (92, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), - INTEROP_APPLY_MAXIMAL (93, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), - - BEGIN_RECOVER (72, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), - BEGIN_INVALIDATE (73, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), - INFORM_OF_TXN (74, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), - INFORM_DURABLE (75, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), - SET_SHARD_DURABLE (76, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), - SET_GLOBALLY_DURABLE (77, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), + PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), + ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), + ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), + COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL ), + APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), + APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), + APPLY_THEN_WAIT_UNTIL_APPLIED (72, APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, applyThenWaitUntilApplied, EPOCH), + + BEGIN_RECOVER (73, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), + BEGIN_INVALIDATE (74, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), + INFORM_OF_TXN (75, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), + INFORM_DURABLE (76, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), + SET_SHARD_DURABLE (77, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), + SET_GLOBALLY_DURABLE (78, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), /* Accord local messages */ - PROPAGATE_PRE_ACCEPT (78, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_COMMIT (79, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_APPLY (80, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_OTHER (81, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_PRE_ACCEPT (79, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_COMMIT (80, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_APPLY (81, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_OTHER (82, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), + + /* C* interop messages */ + INTEROP_COMMIT_MINIMAL (83, INTEROP_COMMIT_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_COMMIT_MAXIMAL (84, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_APPLY_MINIMAL (85, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_APPLY_MAXIMAL (86, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), ; final int id; + /** * An incoming message of a given type from Accord's perspective might have multiple * concrete implementations some of which are supplied by the Cassandra integration. * The incoming type specifies the handling for writing out a message to the journal. */ final MessageType incomingType; + /** - * The outgoing type is the type that will be returned to Accord and it must be a subclass of the incoming type. - * + * The outgoing type is the type that will be returned to Accord; must be a subclass of the incoming type. + *

    * This type will always be from accord.messages.MessageType and never from the extended types in the integration. */ final MessageType outgoingType; + final TxnIdProvider txnIdProvider; final ValueSerializer serializer; @@ -647,7 +964,7 @@ static Type fromId(int id) static List synonymousTypesFromMessageType(MessageType msgType) { List synonymousTypes = msgTypeToSynonymousTypesMap.get(msgType); - if (null == synonymousTypes) + if (synonymousTypes.isEmpty()) throw new IllegalArgumentException("Unsupported MessageType " + msgType); return synonymousTypes; } @@ -660,6 +977,31 @@ static Type fromMessageType(MessageType msgType) return type; } + boolean isAuxiliary() + { + return outgoingType == null; + } + + boolean isFrame() + { + return this == FRAME; + } + + boolean isRequest() + { + return outgoingType != null; + } + + boolean isRemoteRequest() + { + return isRequest() && outgoingType.isRemote(); + } + + boolean isLocalRequest() + { + return isRequest() && outgoingType.isLocal(); + } + @Override public int serializedSize(Key key, Object record, int userVersion) { @@ -699,7 +1041,236 @@ private static int msVersion(int version) } } - static abstract class AuxiliaryRecord + /* + * Record framing logic + */ + + /** + * In order to enable the reorder buffer and delayed execution of requests of yet unknown epoch, we explicitly + * group requests for execution in {@link FrameRecord} records. Journal's onWrite() callback submits written + * protocol messages to {@link FrameAggregator}, which creates and writes the frame record to the journal. + * Once written, the frame record is submitted to {@link FrameApplicator}, which will process all the framed + * requests once the frame has been flushed to disk. + */ + private final class FrameAggregator implements Interruptible.Task + { + /* external MPSC pending request queue */ + private final ManyToOneConcurrentLinkedQueue unframedRequests = new ManyToOneConcurrentLinkedQueue<>(); + + private final LongArrayList waitForEpochs = new LongArrayList(); + private final Long2ObjectHashMap> delayedRequests = new Long2ObjectHashMap<>(); + + private volatile Interruptible executor; + + // a signal and flag that callers outside the aggregator thread can use + // to signal they want the aggregator to run again + private final Semaphore haveWork = newSemaphore(1); + + void onWrite(RequestContext context) + { + unframedRequests.add(context); + haveWork.release(1); + } + + void notifyOfEpoch() + { + haveWork.release(1); + } + + void start() + { + executor = executorFactory().infiniteLoop("AccordJournal#FrameAggregator", this, SAFE, NON_DAEMON, SYNCHRONIZED); + } + + void shutdown() + { + executor.shutdown(); + } + + @Override + public void run(Interruptible.State state) throws InterruptedException + { + if (!unframedRequests.isEmpty() || !delayedRequests.isEmpty()) + doRun(); + haveWork.acquire(1); + } + + private void doRun() + { + ArrayList requests = null; + + /* + * Deal with delayed requests + */ + + waitForEpochs.sort(null); + + for (int i = 0; i < waitForEpochs.size(); i++) + { + long waitForEpoch = waitForEpochs.getLong(i); + if (!node.topology().hasEpoch(waitForEpoch)) + break; + List delayed = delayedRequests.remove(waitForEpoch); + if (null == requests) requests = new ArrayList<>(delayed.size()); + requests.addAll(delayed); + } + + waitForEpochs.removeIfLong(epoch -> !delayedRequests.containsKey(epoch)); + + /* + * Deal with regular pending requests + */ + + RequestContext request; + while (null != (request = unframedRequests.poll())) + { + long waitForEpoch = request.waitForEpoch; + if (!node.topology().hasEpoch(waitForEpoch)) + { + delayedRequests.computeIfAbsent(waitForEpoch, ignore -> new ArrayList<>()).add(request); + if (!waitForEpochs.containsLong(waitForEpoch)) + { + waitForEpochs.addLong(waitForEpoch); + node.withEpoch(waitForEpoch, this::notifyOfEpoch); + } + } + else + { + if (null == requests) requests = new ArrayList<>(); + requests.add(request); + } + } + + if (requests != null) + { + ArrayList pointers = new ArrayList<>(requests.size()); + for (RequestContext req : requests) pointers.add(req.pointer); + FrameRecord frame = new FrameRecord(node.uniqueNow(), pointers); + FrameContext context = new FrameContext(requests); + appendAuxiliaryRecord(frame, context); + } + } + } + + /** + * Processes the requests that have been grouped by {@link FrameAggregator}. + * Gets the aggregated frames containing previously written requests/messages, + * and sorts and "applies" them once part of the journal that fully contains them is flushed. + */ + private final class FrameApplicator implements Runnable + { + /** external SPSC written frame queue */ + private final SpscLinkedQueue newFrames = new SpscLinkedQueue<>(); + + /* single-thread accessed internal frame buffer */ + private final ArrayList pendingFrames = new ArrayList<>(); + + /* furthest flushed journal segment + position */ + private volatile Pointer flushedUntil = null; + + private volatile SequentialExecutorPlus executor; + + /* invoked from FrameGenerator thread via appendAuxiliaryRecord() call */ + void onWrite(Pointer start, int size, FrameContext context) + { + newFrames.add(new PendingFrame(start, new Pointer(start.segment, start.position + size), context)); + } + + /* invoked only from Journal Flusher thread (single) */ + void onFlush(long segment, int position) + { + flushedUntil = new Pointer(segment, position); + executor.submit(this); + } + + void start() + { + executor = executorFactory().sequential("AccordJournal#FrameApplicator"); + } + + void shutdown() + { + executor.shutdown(); + } + + @Override + public void run() + { + if (newFrames.drain(pendingFrames::add) > 0) + { + /* order by position in the journal, DESC */ + pendingFrames.sort((f1, f2) -> f2.start.compareTo(f1.start)); + } + + Pointer flushedUntil = this.flushedUntil; + for (int i = pendingFrames.size() - 1; i >= 0; i--) + { + PendingFrame frame = pendingFrames.get(i); + if (frame.end.compareTo(flushedUntil) > 0) + break; + applyFrame((FrameRecord) cachedRecords.remove(frame.start), frame.context); + pendingFrames.remove(i); + } + } + + private void applyFrame(FrameRecord frame, FrameContext context) + { + Invariants.checkState(frame.pointers.size() == context.requestContexts.size()); + for (int i = 0; i < frame.pointers.size(); i++) + applyRequest(frame.pointers.get(i), context.requestContexts.get(i)); + } + + private void applyRequest(Pointer pointer, RequestContext context) + { + Request request = (Request) cachedRecords.remove(pointer); + Type type = Type.fromMessageType(request.type()); + + if (type.isRemoteRequest()) + { + RemoteRequestContext ctx = (RemoteRequestContext) context; + Id from = endpointMapper.mappedId(ctx.from()); + request.process(node, from, ctx); + } + else + { + Invariants.checkState(type.isLocalRequest()); + LocalRequestContext ctx = (LocalRequestContext) context; + //noinspection unchecked,rawtypes + ((LocalRequest) request).process(node, ctx.callback); + } + } + + /** + * Frame that has been written to the journal (implying all the requests referenced by it also have been written), + * but have not been process by the frame applicaticator yet. + * Will be processed by the frame applicator once the journal has flushed the frame record. + */ + private final class PendingFrame + { + final Pointer start; + final Pointer end; + final FrameContext context; + + PendingFrame(Pointer start, Pointer end, FrameContext context) + { + this.start = start; + this.end = end; + this.context = context; + } + } + } + + private static final class FrameContext + { + final List requestContexts; + + FrameContext(List requestContexts) + { + this.requestContexts = requestContexts; + } + } + + private static abstract class AuxiliaryRecord { final Timestamp timestamp; @@ -711,49 +1282,54 @@ static abstract class AuxiliaryRecord abstract Type type(); } - /* - * Placeholder for future record. - */ - static final class ReplayRecord extends AuxiliaryRecord + private static final class FrameRecord extends AuxiliaryRecord { - ReplayRecord(Timestamp timestamp) + final List pointers; + + FrameRecord(Timestamp timestamp, List pointers) { super(timestamp); + this.pointers = pointers; } @Override Type type() { - return Type.REPLAY; + return Type.FRAME; } - static final ValueSerializer SERIALIZER = new ValueSerializer() + static final ValueSerializer SERIALIZER = new ValueSerializer<>() { @Override - public int serializedSize(Key key, ReplayRecord record, int userVersion) + public int serializedSize(Key key, FrameRecord frame, int userVersion) { - return 0; + return Ints.checkedCast(serializedListSize(frame.pointers, userVersion, Pointer.SERIALIZER)); } @Override - public void serialize(Key key, ReplayRecord record, DataOutputPlus out, int userVersion) + public void serialize(Key key, FrameRecord frame, DataOutputPlus out, int userVersion) throws IOException { + serializeList(frame.pointers, out, userVersion, Pointer.SERIALIZER); } @Override - public ReplayRecord deserialize(Key key, DataInputPlus in, int userVersion) + public FrameRecord deserialize(Key key, DataInputPlus in, int userVersion) throws IOException { - return new ReplayRecord(key.timestamp); + return new FrameRecord(key.timestamp, deserializeList(in, userVersion, Pointer.SERIALIZER)); } }; } + /* + * Message provider implementation + */ + SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) { return LOG_MESSAGE_PROVIDER ? new LoggingMessageProvider(txnId, new MessageProvider(txnId)) : new MessageProvider(txnId); } - final class MessageProvider implements SerializerSupport.MessageProvider + private final class MessageProvider implements SerializerSupport.MessageProvider { final TxnId txnId; @@ -779,65 +1355,65 @@ public Set test(Set messages) @Override public PreAccept preAccept() { - return readMessage(txnId, Type.PRE_ACCEPT, PreAccept.class); + return readMessage(txnId, PRE_ACCEPT_REQ, PreAccept.class); } @Override public BeginRecovery beginRecover() { - return readMessage(txnId, Type.BEGIN_RECOVER, BeginRecovery.class); + return readMessage(txnId, BEGIN_RECOVER_REQ, BeginRecovery.class); } @Override public Propagate propagatePreAccept() { - return readMessage(txnId, Type.PROPAGATE_PRE_ACCEPT, Propagate.class); + return readMessage(txnId, PROPAGATE_PRE_ACCEPT_MSG, Propagate.class); } @Override public Accept accept(Ballot ballot) { - return readMessage(txnId, Type.ACCEPT, Accept.class, (accept) -> ((Accept) accept).ballot.equals(ballot)); + return readMessage(txnId, ACCEPT_REQ, Accept.class, (accept) -> ((Accept) accept).ballot.equals(ballot)); } @Override public Commit commitMinimal() { - return readMessage(txnId, Type.COMMIT_MINIMAL, Commit.class); + return readMessage(txnId, COMMIT_MINIMAL_REQ, Commit.class); } @Override public Commit commitMaximal() { - return readMessage(txnId, Type.COMMIT_MAXIMAL, Commit.class); + return readMessage(txnId, COMMIT_MAXIMAL_REQ, Commit.class); } @Override public Propagate propagateCommit() { - return readMessage(txnId, Type.PROPAGATE_COMMIT, Propagate.class); + return readMessage(txnId, PROPAGATE_COMMIT_MSG, Propagate.class); } @Override public Apply applyMinimal() { - return readMessage(txnId, Type.APPLY_MINIMAL, Apply.class); + return readMessage(txnId, APPLY_MINIMAL_REQ, Apply.class); } @Override public Apply applyMaximal() { - return readMessage(txnId, Type.APPLY_MAXIMAL, Apply.class); + return readMessage(txnId, APPLY_MAXIMAL_REQ, Apply.class); } @Override public Propagate propagateApply() { - return readMessage(txnId, Type.PROPAGATE_APPLY, Propagate.class); + return readMessage(txnId, PROPAGATE_APPLY_MSG, Propagate.class); } } - final class LoggingMessageProvider implements SerializerSupport.MessageProvider + private final class LoggingMessageProvider implements SerializerSupport.MessageProvider { private final TxnId txnId; private final MessageProvider provider; diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 336bc5ee5a0f..64f4ced5da76 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -48,6 +48,7 @@ import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessageFlag; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.net.Verb; import static accord.messages.MessageType.Kind.REMOTE; @@ -58,15 +59,14 @@ public class AccordMessageSink implements MessageSink public static final class AccordMessageType extends MessageType { - public static final MessageType INTEROP_READ_REQ = amt(REMOTE, false); - public static final MessageType INTEROP_READ_RSP = amt(REMOTE, false); - public static final MessageType INTEROP_READ_REPAIR_REQ = amt(REMOTE, false); - public static final MessageType INTEROP_READ_REPAIR_RSP = amt(REMOTE, false); - public static final MessageType INTEROP_COMMIT_MINIMAL_REQ = amt(REMOTE, true ); - public static final MessageType INTEROP_COMMIT_MAXIMAL_REQ = amt(REMOTE, true ); - public static final MessageType INTEROP_APPLY_MINIMAL_REQ = amt(REMOTE, true ); - public static final MessageType INTEROP_APPLY_MAXIMAL_REQ = amt(REMOTE, true ); - + public static final AccordMessageType INTEROP_READ_REQ = remote("INTEROP_READ_REQ", false); + public static final AccordMessageType INTEROP_READ_RSP = remote("INTEROP_READ_RSP", false); + public static final AccordMessageType INTEROP_READ_REPAIR_REQ = remote("INTEROP_READ_REPAIR_REQ", false); + public static final AccordMessageType INTEROP_READ_REPAIR_RSP = remote("INTEROP_READ_REPAIR_RSP", false); + public static final AccordMessageType INTEROP_COMMIT_MINIMAL_REQ = remote("INTEROP_COMMIT_MINIMAL_REQ", true ); + public static final AccordMessageType INTEROP_COMMIT_MAXIMAL_REQ = remote("INTEROP_COMMIT_MAXIMAL_REQ", true ); + public static final AccordMessageType INTEROP_APPLY_MINIMAL_REQ = remote("INTEROP_APPLY_MINIMAL_REQ", true ); + public static final AccordMessageType INTEROP_APPLY_MAXIMAL_REQ = remote("INTEROP_APPLY_MAXIMAL_REQ", true ); public static final List values; @@ -90,14 +90,14 @@ public static final class AccordMessageType extends MessageType values = builder.build(); } - private static MessageType amt(MessageType.Kind kind, boolean hasSideEffects) + protected static AccordMessageType remote(String name, boolean hasSideEffects) { - return new AccordMessageType(kind, hasSideEffects); + return new AccordMessageType(name, REMOTE, hasSideEffects); } - private AccordMessageType(MessageType.Kind kind, boolean hasSideEffects) + private AccordMessageType(String name, MessageType.Kind kind, boolean hasSideEffects) { - super(kind, hasSideEffects); + super(name, kind, hasSideEffects); } } @@ -234,32 +234,32 @@ public void send(Node.Id to, Request request, AgentExecutor executor, Callback c @Override public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply) { - Message replyTo = (Message) replyContext; - Message replyMsg = replyTo.responseWith(reply); + ResponseContext respondTo = (ResponseContext) replyContext; + Message responseMsg = Message.responseWith(reply, respondTo); if (!reply.isFinal()) - replyMsg = replyMsg.withFlag(MessageFlag.NOT_FINAL); - checkReplyType(reply, replyTo); + responseMsg = responseMsg.withFlag(MessageFlag.NOT_FINAL); + checkReplyType(reply, respondTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); - logger.debug("Replying {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); - messaging.send(replyMsg, endpoint); + logger.debug("Replying {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); + messaging.send(responseMsg, endpoint); } @Override public void replyWithUnknownFailure(Node.Id replyingToNode, ReplyContext replyContext, Throwable failure) { - Message replyTo = (Message) replyContext; - Message replyMsg = replyTo.failureResponse(RequestFailureReason.UNKNOWN, failure); + ResponseContext respondTo = (ResponseContext) replyContext; + Message responseMsg = Message.failureResponse(RequestFailureReason.UNKNOWN, failure, respondTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); - logger.debug("Replying with failure {} {} to {}", replyMsg.verb(), replyMsg.payload, endpoint); - messaging.send(replyMsg, endpoint); + logger.debug("Replying with failure {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); + messaging.send(responseMsg, endpoint); } - private static void checkReplyType(Reply reply, Message replyTo) + private static void checkReplyType(Reply reply, ResponseContext respondTo) { Verb verb = getVerb(reply.type()); Preconditions.checkNotNull(verb, "Verb is null for type %s", reply.type()); - Set allowedVerbs = expectedReplyTypes(replyTo.verb()); - Preconditions.checkArgument(allowedVerbs.contains(verb), "Expected reply message with verbs %s but got %s; reply type was %s, request verb was %s", allowedVerbs, verb, reply.type(), replyTo.verb()); + Set allowedVerbs = expectedReplyTypes(respondTo.verb()); + Preconditions.checkArgument(allowedVerbs.contains(verb), "Expected reply message with verbs %s but got %s; reply type was %s, request verb was %s", allowedVerbs, verb, reply.type(), respondTo.verb()); } private static Set expectedReplyTypes(Verb verb) diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 6cd1b68642d5..24f32c94f650 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -50,7 +50,7 @@ import accord.local.NodeTimeService; import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; -import accord.messages.LocalMessage; +import accord.messages.LocalRequest; import accord.messages.Request; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -60,12 +60,10 @@ import accord.topology.TopologyManager; import accord.utils.DefaultRandom; import accord.utils.Invariants; -import accord.utils.MapReduceConsume; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import org.agrona.collections.Int2ObjectHashMap; -import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; @@ -74,7 +72,6 @@ import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; -import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -285,11 +282,11 @@ private AccordService(Id localId) this.messageSink = new AccordMessageSink(agent, configService); this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); - this.journal = new AccordJournal(); this.configuration = new AccordConfiguration(DatabaseDescriptor.getRawConfig()); + this.journal = new AccordJournal(configService); this.node = new Node(localId, messageSink, - this::handleLocalMessage, + this::handleLocalRequest, configService, AccordService::uniqueNow, NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, AccordService::uniqueNow), @@ -313,7 +310,7 @@ private AccordService(Id localId) @Override public void startup() { - journal.start(); + journal.start(node); configService.start(); ClusterMetadataService.instance().log().addListener(configService); fastPathCoordinator.start(); @@ -495,35 +492,11 @@ public TopologyManager topology() } } - private void handleLocalMessage(LocalMessage message, Node node) + private void handleLocalRequest(LocalRequest request, Node node) { - if (!message.type().hasSideEffects()) - { - message.process(node); - return; - } - - journal.appendMessage(message, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() - { - @Override - public void run() - { - // TODO (performance, expected): do not retain references to messages beyond a certain total - // cache threshold; in case of flush lagging behind, read the messages from journal and - // deserialize instead before processing, to prevent memory pressure buildup from messages - // pending flush to disk. - message.process(node); - } - - @Override - public void onFailure(Throwable error) - { - if (message instanceof MapReduceConsume) - ((MapReduceConsume) message).accept(null, error); - else - node.agent().onUncaughtException(error); - } - }); + // currently, we only create LocalRequests that have side effects and need to be persisted + Invariants.checkState(request.type().hasSideEffects()); + journal.appendLocalRequest(request); } private static RequestTimeoutException newTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 27302b73b5af..8738a5d7caa0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -15,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.cassandra.service.accord; import java.io.IOException; @@ -25,9 +24,6 @@ import accord.local.Node; import accord.messages.Request; -import accord.utils.MapReduceConsume; -import org.apache.cassandra.concurrent.ImmediateExecutor; -import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -53,44 +49,24 @@ public void doVerb(Message message) throws IOException // ClusterMetadataService.instance().maybeCatchup(message.epoch()); logger.debug("Receiving {} from {}", message.payload, message.from()); T request = message.payload; - long knownEpoch = request.knownEpoch(); - if (!node.topology().hasEpoch(knownEpoch)) - { - node.configService().fetchTopologyForEpoch(knownEpoch); - long waitForEpoch = request.waitForEpoch(); - if (!node.topology().hasEpoch(waitForEpoch)) - { - node.withEpoch(waitForEpoch, () -> request.process(node, endpointMapper.mappedId(message.from()), message)); - return; - } - } - if (!request.type().hasSideEffects()) + if (request.type().hasSideEffects()) { - request.process(node, endpointMapper.mappedId(message.from()), message); + journal.appendRemoteRequest(request, message); return; } - journal.appendMessage(request, ImmediateExecutor.INSTANCE, new AsyncWriteCallback() - { - @Override - public void run() - { - // TODO (performance, expected): do not retain references to messages beyond a certain total - // cache threshold; in case of flush lagging behind, read the messages from journal and - // deserialize instead before processing, to prevent memory pressure buildup from messages - // pending flush to disk. - request.process(node, endpointMapper.mappedId(message.from()), message); - } + /* + * TODO (desired): messages without side-effects don't go through the journal, + * and as such are retained on heap until the node catches up to waitForEpoch, + * which can be problematic in absense of proper Accord<->Messaging backpressure + */ + Node.Id fromNodeId = endpointMapper.mappedId(message.from()); + long waitForEpoch = request.waitForEpoch(); - @Override - public void onFailure(Throwable error) - { - if (request instanceof MapReduceConsume) - ((MapReduceConsume) request).accept(null, error); - else - node.agent().onUncaughtException(error); - } - }); + if (node.topology().hasEpoch(waitForEpoch)) + request.process(node, fromNodeId, message); + else + node.withEpoch(waitForEpoch, () -> request.process(node, fromNodeId, message)); } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index db7a4f7bf5a9..5afc451bb86d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -69,9 +69,9 @@ public long serializedSize(ReadData t, int version) } }; - private static final ApplyThenWaitUntilAppliedSerializer applyThenWaitUntilApplied = new ApplyThenWaitUntilAppliedSerializer(); + public static final ApplyThenWaitUntilAppliedSerializer applyThenWaitUntilApplied = new ApplyThenWaitUntilAppliedSerializer(); - private static class ApplyThenWaitUntilAppliedSerializer implements ReadDataSerializer + public static class ApplyThenWaitUntilAppliedSerializer implements ReadDataSerializer { @Override public void serialize(ApplyThenWaitUntilApplied applyThenWaitUntilApplied, DataOutputPlus out, int version) throws IOException diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 4201a13f7d20..cd98c71f1890 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -38,6 +38,7 @@ import accord.api.Update; import accord.api.Write; import accord.local.Node; +import accord.messages.MessageType; import accord.messages.PreAccept; import accord.messages.TxnRequest; import accord.primitives.FullKeyRoute; @@ -60,7 +61,6 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.Files; -import org.apache.cassandra.journal.AsyncWriteCallback; import org.apache.cassandra.service.accord.AccordJournal; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey; @@ -114,12 +114,12 @@ private static void run() private static void check() { State.logger.info("Check starting"); - State.journal.start(); // to avoid a while true deadlock + State.journal.start(null); // to avoid a while true deadlock try { for (int i = 0; i < State.events; i++) { - TxnRequest event = State.journal.readMessage(State.toTxnId(i), AccordJournal.Type.PRE_ACCEPT, PreAccept.class); + TxnRequest event = State.journal.readMessage(State.toTxnId(i), MessageType.PRE_ACCEPT_REQ, PreAccept.class); State.logger.info("Event {} -> {}", i, event); if (event == null) throw new AssertionError(String.format("Unable to read event %d", i)); @@ -166,7 +166,7 @@ public static class State } } private static final ExecutorPlus executor = ExecutorFactory.Global.executorFactory().pooled("name", 10); - private static final AccordJournal journal = new AccordJournal(); + private static final AccordJournal journal = new AccordJournal(null); private static final int events = 100; private static final CountDownLatch eventsWritten = CountDownLatch.newCountDownLatch(events); private static final CountDownLatch eventsDurable = CountDownLatch.newCountDownLatch(events); @@ -174,27 +174,27 @@ public static class State static { - journal.start(); + journal.start(null); } public static void append(int event) { TxnRequest request = toRequest(event); - journal.appendMessage(request, executor, new AsyncWriteCallback() - { - @Override - public void run() - { - durable(event); - } - - @Override - public void onFailure(Throwable error) - { - eventsDurable.decrement(); // to make sure we don't block forever - exceptions.add(error); - } - }); +// journal.appendMessageTest(request, executor, new AsyncWriteCallback() +// { +// @Override +// public void run() +// { +// durable(event); +// } +// +// @Override +// public void onFailure(Throwable error) +// { +// eventsDurable.decrement(); // to make sure we don't block forever +// exceptions.add(error); +// } +// }); eventsWritten.decrement(); logger.info("append({}); remaining {}", event, eventsWritten.count()); } diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index f6a059051700..b9a309d75266 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -47,8 +47,16 @@ public void testSimpleReadWrite() throws IOException File directory = new File(Files.createTempDirectory("JournalTest")); directory.deleteRecursiveOnExit(); + AsyncCallbacks callbacks = new AsyncCallbacks<>() + { + @Override public void onWrite(long segment, int position, int size, TimeUUID key, Long value, Object writeContext) {} + @Override public void onWriteFailed(TimeUUID key, Long value, Object writeContext, Throwable cause) {} + @Override public void onFlush(long segment, int position) {} + @Override public void onFlushFailed(Throwable cause) {} + }; + Journal journal = - new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + new Journal<>("TestJournal", directory, TestParams.INSTANCE, callbacks, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); journal.start(); @@ -69,7 +77,7 @@ public void testSimpleReadWrite() throws IOException journal.shutdown(); - journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, callbacks, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); journal.start(); assertEquals(1L, (long) journal.readFirst(id1)); diff --git a/test/unit/org/apache/cassandra/journal/SegmentTest.java b/test/unit/org/apache/cassandra/journal/SegmentTest.java index c5e1dff04de7..2e59d701cb75 100644 --- a/test/unit/org/apache/cassandra/journal/SegmentTest.java +++ b/test/unit/org/apache/cassandra/journal/SegmentTest.java @@ -66,7 +66,7 @@ public void testWriteReadActiveSegment() throws IOException Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); - ActiveSegment segment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + ActiveSegment segment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); segment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); segment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); @@ -129,7 +129,7 @@ public void testReadClosedSegmentByID() throws IOException Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); - ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); activeSegment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); activeSegment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); @@ -138,7 +138,7 @@ public void testReadClosedSegmentByID() throws IOException activeSegment.close(); - StaticSegment staticSegment = StaticSegment.open(descriptor, TimeUUIDKeySupport.INSTANCE); + StaticSegment staticSegment = StaticSegment.open(descriptor, TimeUUIDKeySupport.INSTANCE); // read all 4 entries by id and compare with originals EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); @@ -194,7 +194,7 @@ public void testReadClosedSegmentSequentially() throws IOException Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); - ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); + ActiveSegment activeSegment = ActiveSegment.create(descriptor, params(), TimeUUIDKeySupport.INSTANCE); activeSegment.allocate(record1.remaining(), hosts1).write(id1, record1, hosts1); activeSegment.allocate(record2.remaining(), hosts2).write(id2, record2, hosts2); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index e30ddbb55803..c27878f401a7 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -377,8 +377,8 @@ public static AccordCommandStore createAccordCommandStore( public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; - AccordJournal journal = new AccordJournal(); - journal.start(); + AccordJournal journal = new AccordJournal(null); + journal.start(null); SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); AccordCommandStore result = new AccordCommandStore(0, From 5385e9c60a30ec451f66bf442f24c23060edb763 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Thu, 18 Jan 2024 15:45:05 +0000 Subject: [PATCH 092/340] Fix LocalPartitioner duplication in *_for_key Accord system tables patch by Aleksey Yeschenko; reviewed by Marcus Eriksson for CASSANDRA-19265 --- .../apache/cassandra/service/accord/AccordKeyspace.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 963a20adb113..fc695f926350 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -192,6 +192,10 @@ public class AccordKeyspace private static final TupleType KEY_TYPE = new TupleType(Arrays.asList(UUIDType.instance, BytesType.instance)); private static final String KEY_TUPLE = KEY_TYPE.asCQL3Type().toString(); + // shared LocalPartitioner for all *_for_key Accord tables with (store_id, key_token, key) partition key + private static final LocalPartitioner FOR_KEYS_LOCAL_PARTITIONER = + new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE)); + private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); private enum TokenType @@ -424,7 +428,7 @@ public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCe + format("last_write_timestamp %s, ", TIMESTAMP_TUPLE) + "PRIMARY KEY((store_id, key_token, key))" + ')') - .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) + .partitioner(FOR_KEYS_LOCAL_PARTITIONER) .build(); public static class TimestampsForKeyColumns @@ -558,7 +562,7 @@ private static TableMetadata commandsForKeysTable(String tableName) + "data blob, " + "PRIMARY KEY((store_id, key_token, key), timestamp)" + ')') - .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) + .partitioner(FOR_KEYS_LOCAL_PARTITIONER) .build(); } From 8de3dfc711113a694f9ffe1bebc2438585ada2f2 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Wed, 17 Jan 2024 17:08:21 +0000 Subject: [PATCH 093/340] CASSANDRA-18365: Protocol fixes --- modules/accord | 2 +- .../locator/AbstractReplicationStrategy.java | 3 - .../cassandra/metrics/AccordMetrics.java | 10 +- .../service/accord/AccordJournal.java | 76 ++++++++--- .../service/accord/AccordKeyspace.java | 11 +- .../service/accord/AccordMessageSink.java | 5 +- .../service/accord/AccordObjectSizes.java | 7 +- .../service/accord/api/AccordRoutingKey.java | 7 ++ .../accord/interop/AccordInteropCommit.java | 15 +-- .../interop/AccordInteropExecution.java | 25 ++-- .../accord/interop/AccordInteropPersist.java | 4 +- .../interop/AccordInteropReadCallback.java | 4 +- .../serializers/CheckStatusSerializers.java | 23 ++-- .../serializers/CommandSerializers.java | 30 ++--- .../accord/serializers/CommitSerializers.java | 32 ++--- .../accord/serializers/DepsSerializer.java | 1 + .../accord/serializers/FetchSerializers.java | 15 ++- .../serializers/ReadDataSerializers.java | 6 +- .../serializers/RecoverySerializers.java | 92 ++++++++++++-- .../serializers/SmallEnumSerializer.java | 118 ++++++++++++++++++ .../test/accord/AccordMetricsTest.java | 4 +- .../selection/SelectionColumnMappingTest.java | 1 - .../entities/FrozenCollectionsTest.java | 1 - .../cql3/validation/entities/JsonTest.java | 1 - .../validation/entities/UserTypesTest.java | 2 - .../operations/SelectLimitTest.java | 2 - .../CompactionAccordIteratorsTest.java | 8 +- .../service/accord/AccordCommandTest.java | 2 +- .../service/accord/AccordKeyspaceTest.java | 7 +- .../service/accord/AccordMessageSinkTest.java | 4 +- .../service/accord/AccordTestUtils.java | 15 ++- .../service/accord/CommandsForRangesTest.java | 2 +- .../accord/async/AsyncOperationTest.java | 83 ++++++++++-- .../predefined/PredefinedOperation.java | 1 - 34 files changed, 460 insertions(+), 159 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/SmallEnumSerializer.java diff --git a/modules/accord b/modules/accord index c524b6d3de39..3789c5bfec50 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit c524b6d3de3923ccb6314715bd987f3b891348ab +Subproject commit 3789c5bfec50eb96157c0a55af77f78ee0cac804 diff --git a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java index 0870902681c5..04cfac933c7c 100644 --- a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java +++ b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java @@ -20,9 +20,6 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; import java.util.function.Supplier; import java.util.*; diff --git a/src/java/org/apache/cassandra/metrics/AccordMetrics.java b/src/java/org/apache/cassandra/metrics/AccordMetrics.java index 5601b9fa406b..4dc053ccee45 100644 --- a/src/java/org/apache/cassandra/metrics/AccordMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordMetrics.java @@ -41,7 +41,7 @@ public class AccordMetrics public final static AccordMetrics readMetrics = new AccordMetrics("ro"); public final static AccordMetrics writeMetrics = new AccordMetrics("rw"); - public static final String COMMIT_LATENCY = "CommitLatency"; + public static final String STABLE_LATENCY = "StableLatency"; public static final String EXECUTE_LATENCY = "ExecuteLatency"; public static final String APPLY_LATENCY = "ApplyLatency"; public static final String APPLY_DURATION = "ApplyDuration"; @@ -63,7 +63,7 @@ public class AccordMetrics /** * The time between start on the coordinator and commit on this replica. */ - public final Timer commitLatency; + public final Timer stableLatency; /** * The time between start on the coordinator and execution on this replica. @@ -135,7 +135,7 @@ public class AccordMetrics private AccordMetrics(String scope) { DefaultNameFactory replica = new DefaultNameFactory(ACCORD_REPLICA, scope); - commitLatency = Metrics.timer(replica.createMetricName(COMMIT_LATENCY)); + stableLatency = Metrics.timer(replica.createMetricName(STABLE_LATENCY)); executeLatency = Metrics.timer(replica.createMetricName(EXECUTE_LATENCY)); applyLatency = Metrics.timer(replica.createMetricName(APPLY_LATENCY)); applyDuration = Metrics.timer(replica.createMetricName(APPLY_DURATION)); @@ -204,14 +204,14 @@ else if (txnId.isRead()) } @Override - public void onCommitted(Command cmd) + public void onStable(Command cmd) { long now = AccordService.uniqueNow(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { long trxTimestamp = cmd.txnId().hlc(); - metrics.commitLatency.update(now - trxTimestamp, TimeUnit.MICROSECONDS); + metrics.stableLatency.update(now - trxTimestamp, TimeUnit.MICROSECONDS); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index ee1f79625aa9..28bf2c2cc55a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -110,12 +110,12 @@ import static accord.messages.MessageType.BEGIN_RECOVER_REQ; import static accord.messages.MessageType.COMMIT_INVALIDATE_REQ; import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; -import static accord.messages.MessageType.COMMIT_MINIMAL_REQ; +import static accord.messages.MessageType.COMMIT_SLOW_PATH_REQ; import static accord.messages.MessageType.INFORM_DURABLE_REQ; import static accord.messages.MessageType.INFORM_OF_TXN_REQ; import static accord.messages.MessageType.PRE_ACCEPT_REQ; import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; -import static accord.messages.MessageType.PROPAGATE_COMMIT_MSG; +import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; import static accord.messages.MessageType.SET_GLOBALLY_DURABLE_REQ; @@ -124,6 +124,9 @@ import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; +import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; +import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; +import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; @@ -829,8 +832,11 @@ public enum Type implements ValueSerializer PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), - COMMIT_MINIMAL (67, COMMIT_MINIMAL_REQ, CommitSerializers.request, TXN ), + COMMIT_SLOW_PATH (67, COMMIT_SLOW_PATH_REQ, CommitSerializers.request, TXN ), COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), + STABLE_FAST_PATH (87, STABLE_FAST_PATH_REQ, CommitSerializers.request, TXN ), + STABLE_SLOW_PATH (88, STABLE_SLOW_PATH_REQ, CommitSerializers.request, TXN ), + STABLE_MAXIMAL (89, STABLE_MAXIMAL_REQ, CommitSerializers.request, TXN ), COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL ), APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), @@ -845,15 +851,15 @@ public enum Type implements ValueSerializer /* Accord local messages */ PROPAGATE_PRE_ACCEPT (79, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_COMMIT (80, PROPAGATE_COMMIT_MSG, FetchSerializers.propagate, LOCAL), + PROPAGATE_STABLE (80, PROPAGATE_STABLE_MSG, FetchSerializers.propagate, LOCAL), PROPAGATE_APPLY (81, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), PROPAGATE_OTHER (82, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), /* C* interop messages */ - INTEROP_COMMIT_MINIMAL (83, INTEROP_COMMIT_MINIMAL_REQ, COMMIT_MINIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_COMMIT_MAXIMAL (84, INTEROP_COMMIT_MAXIMAL_REQ, COMMIT_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_APPLY_MINIMAL (85, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), - INTEROP_APPLY_MAXIMAL (86, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_COMMIT (83, INTEROP_COMMIT_MINIMAL_REQ, STABLE_FAST_PATH_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_COMMIT_MAXIMAL (84, INTEROP_COMMIT_MAXIMAL_REQ, STABLE_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), + INTEROP_APPLY_MINIMAL (85, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), + INTEROP_APPLY_MAXIMAL (86, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), ; final int id; @@ -1377,9 +1383,9 @@ public Accept accept(Ballot ballot) } @Override - public Commit commitMinimal() + public Commit commitSlowPath() { - return readMessage(txnId, COMMIT_MINIMAL_REQ, Commit.class); + return readMessage(txnId, COMMIT_SLOW_PATH_REQ, Commit.class); } @Override @@ -1389,9 +1395,21 @@ public Commit commitMaximal() } @Override - public Propagate propagateCommit() + public Commit stableFastPath() { - return readMessage(txnId, PROPAGATE_COMMIT_MSG, Propagate.class); + return readMessage(txnId, STABLE_FAST_PATH_REQ, Commit.class); + } + + @Override + public Commit stableMaximal() + { + return readMessage(txnId, STABLE_MAXIMAL_REQ, Commit.class); + } + + @Override + public Propagate propagateStable() + { + return readMessage(txnId, PROPAGATE_STABLE_MSG, Propagate.class); } @Override @@ -1470,11 +1488,11 @@ public Accept accept(Ballot ballot) } @Override - public Commit commitMinimal() + public Commit commitSlowPath() { - logger.debug("Fetching {} message for {}", COMMIT_MINIMAL_REQ, txnId); - Commit commit = provider.commitMinimal(); - logger.debug("Fetched {} message for {}: {}", COMMIT_MINIMAL_REQ, txnId, commit); + logger.debug("Fetching {} message for {}", COMMIT_SLOW_PATH_REQ, txnId); + Commit commit = provider.commitSlowPath(); + logger.debug("Fetched {} message for {}: {}", COMMIT_SLOW_PATH_REQ, txnId, commit); return commit; } @@ -1488,11 +1506,29 @@ public Commit commitMaximal() } @Override - public Propagate propagateCommit() + public Commit stableFastPath() + { + logger.debug("Fetching {} message for {}", STABLE_FAST_PATH_REQ, txnId); + Commit commit = provider.stableFastPath(); + logger.debug("Fetched {} message for {}: {}", STABLE_FAST_PATH_REQ, txnId, commit); + return commit; + } + + @Override + public Commit stableMaximal() + { + logger.debug("Fetching {} message for {}", STABLE_MAXIMAL_REQ, txnId); + Commit commit = provider.stableMaximal(); + logger.debug("Fetched {} message for {}: {}", STABLE_MAXIMAL_REQ, txnId, commit); + return commit; + } + + @Override + public Propagate propagateStable() { - logger.debug("Fetching {} message for {}", PROPAGATE_COMMIT_MSG, txnId); - Propagate propagate = provider.propagateCommit(); - logger.debug("Fetched {} message for {}: {}", PROPAGATE_COMMIT_MSG, txnId, propagate); + logger.debug("Fetching {} message for {}", PROPAGATE_STABLE_MSG, txnId); + Propagate propagate = provider.propagateStable(); + logger.debug("Fetched {} message for {}: {}", PROPAGATE_STABLE_MSG, txnId, propagate); return propagate; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index fc695f926350..ddcaed5a0d87 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -821,11 +821,11 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.accepted_ballot, Command::accepted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.accepted_ballot, Command::acceptedOrCommitted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); // TODO review this is just to work around Truncated not being committed but having a status after committed // so status claims it is committed. - if (!command.isTruncated() && command.isCommitted()) + if (command.isStable() && !command.isTruncated()) { Command.Committed committed = command.asCommitted(); Command.Committed originalCommitted = original != null && original.isCommitted() ? original.asCommitted() : null; @@ -1284,8 +1284,11 @@ private static WaitingOnProvider deserializeWaitingOn(UntypedResultSet.Row row) return (deps) -> { - if (bytes == null || !bytes.hasRemaining()) - return deps == null ? WaitingOn.EMPTY : WaitingOn.none(deps); + if (bytes == null) + return null; + + if (!bytes.hasRemaining()) + return WaitingOn.none(deps); try { diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 64f4ced5da76..5efdb7c0f450 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -124,8 +124,11 @@ private VerbMapping() builder.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); builder.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); builder.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); - builder.put(MessageType.COMMIT_MINIMAL_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.COMMIT_SLOW_PATH_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.STABLE_FAST_PATH_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.STABLE_SLOW_PATH_REQ, Verb.ACCORD_COMMIT_REQ); + builder.put(MessageType.STABLE_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.COMMIT_INVALIDATE_REQ, Verb.ACCORD_COMMIT_INVALIDATE_REQ); builder.put(MessageType.APPLY_MINIMAL_REQ, Verb.ACCORD_APPLY_REQ); builder.put(MessageType.APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 095781f46a0b..3f506302db62 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -296,7 +296,7 @@ private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(attrs(false, false), Ballot.ZERO)); final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, null));; final static long ACCEPTED = measure(Command.SerializerSupport.accepted(attrs(true, false), SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); - final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY)); + final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, null)); final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY, EMPTY_WRITES, EMPTY_RESULT)); final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(attrs(false, false), SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID, null)); @@ -314,6 +314,7 @@ private static long emptySize(Command command) case PreCommitted: return ACCEPTED; case Committed: + case Stable: case ReadyToExecute: return COMMITTED; case PreApplied: @@ -346,13 +347,13 @@ public static long command(Command command) size += sizeNullable(command.executeAt(), AccordObjectSizes::timestamp); size += sizeNullable(command.partialTxn(), AccordObjectSizes::txn); size += sizeNullable(command.partialDeps(), AccordObjectSizes::dependencies); - size += sizeNullable(command.accepted(), AccordObjectSizes::timestamp); + size += sizeNullable(command.acceptedOrCommitted(), AccordObjectSizes::timestamp); size += sizeNullable(command.writes(), AccordObjectSizes::writes); if (command.result() instanceof TxnResult) size += sizeNullable(command.result(), AccordObjectSizes::results); - if (!(command instanceof Command.Committed)) + if (!(command instanceof Command.Committed && command.saveStatus().hasBeen(Status.Stable))) return size; Command.Committed committed = command.asCommitted(); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 5b089267a105..acf4da192345 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -31,6 +31,7 @@ import accord.api.RoutingKey; import accord.local.ShardDistributor; import accord.primitives.Range; +import accord.primitives.RangeFactory; import accord.primitives.Ranges; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.dht.IPartitioner; @@ -63,6 +64,12 @@ protected AccordRoutingKey(TableId table) public abstract long estimatedSizeOnHeap(); public abstract AccordRoutingKey withTable(TableId table); + @Override + public RangeFactory rangeFactory() + { + return (s, e) -> new TokenRange((AccordRoutingKey) s, (AccordRoutingKey) e); + } + public SentinelKey asSentinelKey() { return (SentinelKey) this; diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java index e3051bf64455..e92edb1ec980 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java @@ -25,6 +25,7 @@ import accord.messages.Commit; import accord.messages.MessageType; import accord.messages.ReadData; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; @@ -44,20 +45,20 @@ public class AccordInteropCommit extends Commit public static final IVersionedSerializer serializer = new CommitSerializer(AccordInteropRead.class, AccordInteropRead.requestSerializer) { @Override - protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, executeAt, partialTxn, partialDeps, fullRoute, read); + return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); } }; - public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) + public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) { - super(kind, txnId, scope, waitForEpoch, executeAt, partialTxn, partialDeps, fullRoute, readData); + super(kind, txnId, scope, waitForEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, readData); } public AccordInteropCommit(Kind kind, Node.Id to, Topology coordinateTopology, Topologies topologies, TxnId txnId, Txn txn, FullRoute route, Timestamp executeAt, Deps deps, AccordInteropRead read) { - super(kind, to, coordinateTopology, topologies, txnId, txn, route, executeAt, deps, (t, u, p) -> read); + super(kind, to, coordinateTopology, topologies, txnId, txn, route, Ballot.ZERO, executeAt, deps, read); } @Override @@ -65,8 +66,8 @@ public MessageType type() { switch (kind) { - case Minimal: return AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; - case Maximal: return AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; + case StableFastPath: return AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; + case StableWithTxnAndDeps: return AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; default: throw new IllegalStateException(); } } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 9aa9179b38ae..489fdfba27f2 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -28,6 +28,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import accord.messages.ReadTxnData; +import accord.primitives.Ballot; import org.apache.cassandra.schema.TableId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,7 +39,7 @@ import accord.api.Result; import accord.coordinate.Execute; import accord.coordinate.Persist; -import accord.coordinate.TxnExecute; +import accord.coordinate.ExecuteTxn; import accord.local.AgentExecutor; import accord.local.CommandStore; import accord.local.Node; @@ -153,13 +155,13 @@ public Factory(AccordAgent agent, AccordEndpointMapper endpointMapper) } @Override - public Execute create(Node node, TxnId txnId, Txn txn, FullRoute route, Participants readScope, Timestamp executeAt, Deps deps, BiConsumer callback) + public Execute create(Node node, Topologies topologies, Path path, TxnId txnId, Txn txn, FullRoute route, Participants readScope, Timestamp executeAt, Deps deps, BiConsumer callback) { // Unrecoverable repair always needs to be run by AccordInteropExecution AccordUpdate.Kind updateKind = AccordUpdate.kind(txn.update()); ConsistencyLevel consistencyLevel = txn.read() instanceof TxnRead ? ((TxnRead) txn.read()).cassandraConsistencyLevel() : null; if (updateKind != AccordUpdate.Kind.UNRECOVERABLE_REPAIR && (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE || txn.read().keys().isEmpty())) - return TxnExecute.FACTORY.create(node, txnId, txn, route, readScope, executeAt, deps, callback); + return ExecuteTxn.FACTORY.create(node, topologies, path, txnId, txn, route, readScope, executeAt, deps, callback); return new AccordInteropExecution(node, txnId, txn, updateKind, route, readScope, executeAt, deps, callback, executor, consistencyLevel, endpointMapper); } } @@ -252,7 +254,8 @@ public void sendReadCommand(Message message, InetAddressAndPort to, Node.Id id = endpointMapper.mappedId(to); SinglePartitionReadCommand command = (SinglePartitionReadCommand) message.payload; AccordInteropRead read = new AccordInteropRead(id, executes, txnId, readScope, executeAt, command); - AccordInteropCommit commit = new AccordInteropCommit(Commit.Kind.Minimal, id, coordinateTopology, allTopologies, + // TODO (required): understand interop and whether StableFastPath is appropriate + AccordInteropCommit commit = new AccordInteropCommit(Kind.StableFastPath, id, coordinateTopology, allTopologies, txnId, txn, route, executeAt, deps, read); node.send(id, commit, executor, new AccordInteropRead.ReadCallback(id, to, message, callback, this)); } @@ -337,14 +340,14 @@ public void notifyOfInitialContacts(EndpointsForToken fullDataRequests, Endpoint for (int i = 0; i < digestRequests.size(); i++) contacted.add(digestRequests.endpoint(i)); if (readsCurrentlyUnderConstruction.decrementAndGet() == 0) - sendCommitsToUncontacted(); + sendStableToUncontacted(); } - private void sendCommitsToUncontacted() + private void sendStableToUncontacted() { for (Node.Id to : executeTopology.nodes()) if (!contacted.contains(endpointMapper.mappedEndpoint(to))) - node.send(to, new Commit(Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + node.send(to, new Commit(Kind.StableFastPath, to, coordinateTopology, allTopologies, txnId, txn, route, Ballot.ZERO, executeAt, deps, (ReadTxnData) null)); } @Override @@ -355,7 +358,7 @@ public void start() for (Node.Id to : allTopologies.nodes()) { if (!executeTopology.contains(to)) - node.send(to, new Commit(Commit.Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + node.send(to, new Commit(Kind.StableFastPath, to, coordinateTopology, allTopologies, txnId, txn, route, Ballot.ZERO, executeAt, deps, (ReadTxnData) null)); } } AsyncChain result; @@ -367,7 +370,7 @@ public void start() CommandStore cs = node.commandStores().select(route.homeKey()); result.beginAsResult().withExecutor(cs).begin((data, failure) -> { if (failure == null) - Persist.persist(node, executes, txnId, route, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); + Persist.persist(node, executes, route, txnId, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); else callback.accept(null, failure); }); @@ -380,7 +383,7 @@ private AsyncChain executeUnrecoverableRepairUpdate() // TODO (expected): We should send the read in the same message as the commit. This requires refactor ReadData.Kind so that it doesn't specify the ordinal encoding // and can be extended similar to MessageType which allows additional types not from Accord to be added for (Node.Id to : executeTopology.nodes()) - node.send(to, new Commit(Kind.Minimal, to, coordinateTopology, allTopologies, txnId, txn, route, readScope, executeAt, deps, false)); + node.send(to, new Commit(Kind.StableFastPath, to, coordinateTopology, allTopologies, txnId, txn, route, Ballot.ZERO, executeAt, deps, (ReadTxnData) null)); repairUpdate.runBRR(AccordInteropExecution.this); return new TxnData(); }); @@ -408,6 +411,6 @@ public Mutation maybeAllowOutOfRangeMutations(Mutation m) @Override public void sendMaximalCommit(Id to) { - Commit.commitMaximal(node, to, txn, txnId, executeAt, route, deps, readScope); + Commit.stableMaximal(node, to, txn, txnId, executeAt, route, deps); } } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java index 7ef158153661..51445469599d 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java @@ -23,7 +23,7 @@ import accord.api.Result; import accord.api.Update; import accord.coordinate.Persist; -import accord.coordinate.TxnPersist; +import accord.coordinate.PersistTxn; import accord.coordinate.tracking.AppliedTracker; import accord.coordinate.tracking.QuorumTracker; import accord.coordinate.tracking.RequestStatus; @@ -57,7 +57,7 @@ public Persist create(Node node, Topologies topologies, TxnId txnId, FullRoute implements Callback { @@ -63,7 +63,7 @@ public void onSuccess(Node.Id from, ReadReply reply) { wrapped.onResponse(message.responseWith(convertResponse((ReadOk) reply)).withFrom(endpoint)); } - else if (reply == NotCommitted) + else if (reply == Insufficient) { // Might still send a response if we send a maximal commit. Accord would tryAlternative and send // both the commit and an additional repair, but Cassandra doesn't have tryAlternative unless we add diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index d4cf093d1242..070fcfa0e6f8 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -173,8 +173,9 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t foundKnownMap.serialize(ok.map, out, version); CommandSerializers.saveStatus.serialize(ok.maxKnowledgeSaveStatus, out, version); CommandSerializers.saveStatus.serialize(ok.maxSaveStatus, out, version); - CommandSerializers.ballot.serialize(ok.promised, out, version); - CommandSerializers.ballot.serialize(ok.accepted, out, version); + CommandSerializers.ballot.serialize(ok.maxPromised, out, version); + CommandSerializers.ballot.serialize(ok.maxAcceptedOrCommitted, out, version); + CommandSerializers.ballot.serialize(ok.acceptedOrCommitted, out, version); CommandSerializers.nullableTimestamp.serialize(ok.executeAt, out, version); out.writeBoolean(ok.isCoordinating); CommandSerializers.durability.serialize(ok.durability, out, version); @@ -186,7 +187,7 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CheckStatusOkFull okFull = (CheckStatusOkFull) ok; CommandSerializers.nullablePartialTxn.serialize(okFull.partialTxn, out, version); - DepsSerializer.nullablePartialDeps.serialize(okFull.committedDeps, out, version); + DepsSerializer.nullablePartialDeps.serialize(okFull.stableDeps, out, version); CommandSerializers.nullableWrites.serialize(okFull.writes, out, version); } @@ -204,8 +205,9 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce FoundKnownMap map = foundKnownMap.deserialize(in, version); SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); - Ballot promised = CommandSerializers.ballot.deserialize(in, version); - Ballot accepted = CommandSerializers.ballot.deserialize(in, version); + Ballot maxPromised = CommandSerializers.ballot.deserialize(in, version); + Ballot maxAcceptedOrCommitted = CommandSerializers.ballot.deserialize(in, version); + Ballot acceptedOrCommitted = CommandSerializers.ballot.deserialize(in, version); Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in, version); boolean isCoordinating = in.readBoolean(); Durability durability = CommandSerializers.durability.deserialize(in, version); @@ -213,7 +215,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); if (kind == OK) - return createOk(map, maxKnowledgeStatus, maxStatus, promised, accepted, executeAt, + return createOk(map, maxKnowledgeStatus, maxStatus, maxPromised, maxAcceptedOrCommitted, acceptedOrCommitted, executeAt, isCoordinating, durability, route, homeKey); PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); @@ -225,7 +227,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce || maxKnowledgeStatus == SaveStatus.TruncatedApply || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithOutcome || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithDeps) result = CommandSerializers.APPLIED; - return createOk(map, maxKnowledgeStatus, maxStatus, promised, accepted, executeAt, + return createOk(map, maxKnowledgeStatus, maxStatus, maxPromised, maxAcceptedOrCommitted, acceptedOrCommitted, executeAt, isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); } @@ -242,8 +244,9 @@ public long serializedSize(CheckStatusReply reply, int version) size += foundKnownMap.serializedSize(ok.map, version); size += CommandSerializers.saveStatus.serializedSize(ok.maxKnowledgeSaveStatus, version); size += CommandSerializers.saveStatus.serializedSize(ok.maxSaveStatus, version); - size += CommandSerializers.ballot.serializedSize(ok.promised, version); - size += CommandSerializers.ballot.serializedSize(ok.accepted, version); + size += CommandSerializers.ballot.serializedSize(ok.maxPromised, version); + size += CommandSerializers.ballot.serializedSize(ok.maxAcceptedOrCommitted, version); + size += CommandSerializers.ballot.serializedSize(ok.acceptedOrCommitted, version); size += CommandSerializers.nullableTimestamp.serializedSize(ok.executeAt, version); size += TypeSizes.BOOL_SIZE; size += CommandSerializers.durability.serializedSize(ok.durability, version); @@ -255,7 +258,7 @@ public long serializedSize(CheckStatusReply reply, int version) CheckStatusOkFull okFull = (CheckStatusOkFull) ok; size += CommandSerializers.nullablePartialTxn.serializedSize(okFull.partialTxn, version); - size += DepsSerializer.nullablePartialDeps.serializedSize(okFull.committedDeps, version); + size += DepsSerializer.nullablePartialDeps.serializedSize(okFull.stableDeps, version); size += CommandSerializers.nullableWrites.serializedSize(okFull.writes, version); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index 46232a811e90..caefcec6f200 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -46,6 +46,7 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.serializers.SmallEnumSerializer.NullableSmallEnumSerializer; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; @@ -241,20 +242,21 @@ public long serializedSize(Writes writes, int version) public static final IVersionedSerializer nullableWrites = NullableSerializer.wrap(writes); - public static final EnumSerializer route = new EnumSerializer<>(Status.KnownRoute.class); - public static final EnumSerializer definition = new EnumSerializer<>(Status.Definition.class); - public static final EnumSerializer knownExecuteAt = new EnumSerializer<>(Status.KnownExecuteAt.class); - public static final EnumSerializer knownDeps = new EnumSerializer<>(Status.KnownDeps.class); - public static final EnumSerializer outcome = new EnumSerializer<>(Status.Outcome.class); - public static final EnumSerializer invalidIfNot = new EnumSerializer<>(Infer.InvalidIfNot.class); - public static final EnumSerializer isPreempted = new EnumSerializer<>(Infer.IsPreempted.class); + public static final SmallEnumSerializer knownRoute = new SmallEnumSerializer<>(Status.KnownRoute.class); + public static final SmallEnumSerializer definition = new SmallEnumSerializer<>(Status.Definition.class); + public static final SmallEnumSerializer knownExecuteAt = new SmallEnumSerializer<>(Status.KnownExecuteAt.class); + public static final SmallEnumSerializer knownDeps = new SmallEnumSerializer<>(Status.KnownDeps.class); + public static final NullableSmallEnumSerializer nullableKnownDeps = new NullableSmallEnumSerializer<>(knownDeps); + public static final SmallEnumSerializer outcome = new SmallEnumSerializer<>(Status.Outcome.class); + public static final SmallEnumSerializer invalidIfNot = new SmallEnumSerializer<>(Infer.InvalidIfNot.class); + public static final SmallEnumSerializer isPreempted = new SmallEnumSerializer<>(Infer.IsPreempted.class); public static final IVersionedSerializer known = new IVersionedSerializer<>() { @Override public void serialize(Known known, DataOutputPlus out, int version) throws IOException { - route.serialize(known.route, out, version); + knownRoute.serialize(known.route, out, version); definition.serialize(known.definition, out, version); knownExecuteAt.serialize(known.executeAt, out, version); knownDeps.serialize(known.deps, out, version); @@ -264,7 +266,7 @@ public void serialize(Known known, DataOutputPlus out, int version) throws IOExc @Override public Known deserialize(DataInputPlus in, int version) throws IOException { - return new Known(route.deserialize(in, version), + return new Known(knownRoute.deserialize(in, version), definition.deserialize(in, version), knownExecuteAt.deserialize(in, version), knownDeps.deserialize(in, version), @@ -274,11 +276,11 @@ public Known deserialize(DataInputPlus in, int version) throws IOException @Override public long serializedSize(Known known, int version) { - return route.serializedSize(known.route, version) - + definition.serializedSize(known.definition, version) - + knownExecuteAt.serializedSize(known.executeAt, version) - + knownDeps.serializedSize(known.deps, version) - + outcome.serializedSize(known.outcome, version); + return knownRoute.serializedSize(known.route, version) + + definition.serializedSize(known.definition, version) + + knownExecuteAt.serializedSize(known.executeAt, version) + + knownDeps.serializedSize(known.deps, version) + + outcome.serializedSize(known.outcome, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index 23ea20b19710..cd704d3db116 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -23,6 +23,7 @@ import accord.messages.Commit; import accord.messages.ReadData; +import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; @@ -30,7 +31,6 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Unseekables; -import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -43,25 +43,7 @@ public class CommitSerializers { - private static final IVersionedSerializer kind = new IVersionedSerializer() - { - public void serialize(Commit.Kind kind, DataOutputPlus out, int version) throws IOException - { - Invariants.checkArgument(kind == Commit.Kind.Minimal || kind == Commit.Kind.Maximal); - out.writeBoolean(kind == Commit.Kind.Maximal); - - } - - public Commit.Kind deserialize(DataInputPlus in, int version) throws IOException - { - return in.readBoolean() ? Commit.Kind.Maximal : Commit.Kind.Minimal; - } - - public long serializedSize(Commit.Kind kind, int version) - { - return TypeSizes.BOOL_SIZE; - } - }; + private static final IVersionedSerializer kind = new EnumSerializer<>(Commit.Kind.class); public abstract static class CommitSerializer extends TxnRequestSerializer { @@ -76,6 +58,7 @@ public CommitSerializer(Class klass, IVersionedSerializer read) public void serializeBody(C msg, DataOutputPlus out, int version) throws IOException { kind.serialize(msg.kind, out, version); + CommandSerializers.ballot.serialize(msg.ballot, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); @@ -83,7 +66,8 @@ public void serializeBody(C msg, DataOutputPlus out, int version) throws IOExcep serializeNullable(msg.readData, out, version, read); } - protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Timestamp executeAt, + protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, + Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read); @@ -92,6 +76,7 @@ public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout { return deserializeCommit(txnId, scope, waitForEpoch, kind.deserialize(in, version), + CommandSerializers.ballot.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), CommandSerializers.nullablePartialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), @@ -104,6 +89,7 @@ public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout public long serializedBodySize(C msg, int version) { return kind.serializedSize(msg.kind, version) + + CommandSerializers.ballot.serializedSize(msg.ballot, version) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) @@ -115,9 +101,9 @@ public long serializedBodySize(C msg, int version) public static final IVersionedSerializer request = new CommitSerializer(ReadData.class, ReadDataSerializers.readData) { @Override - protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, executeAt, partialTxn, partialDeps, fullRoute, read); + return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index 3530e06936c9..9498bef0f2d4 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -51,6 +51,7 @@ Deps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int ver return new Deps(keyDeps, rangeDeps); } }; + public static final IVersionedSerializer nullableDeps = NullableSerializer.wrap(deps); public static final DepsSerializer partialDeps = new DepsSerializer() { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 61d715b79802..61e60f3dbd2a 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -32,6 +32,7 @@ import accord.messages.Propagate; import accord.messages.ReadData; import accord.messages.ReadData.ReadReply; +import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Ranges; @@ -94,7 +95,7 @@ public long serializedSize(FetchRequest request, int version) public static final IVersionedSerializer reply = new IVersionedSerializer() { - final ReadData.ReadNack[] nacks = ReadData.ReadNack.values(); + final ReadData.CommitOrReadNack[] nacks = ReadData.CommitOrReadNack.values(); final IVersionedSerializer streamDataSerializer = new CastingSerializer<>(StreamData.class, StreamData.serializer); @Override @@ -102,7 +103,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I { if (!reply.isOk()) { - out.writeByte(1 + ((ReadData.ReadNack) reply).ordinal()); + out.writeByte(1 + ((ReadData.CommitOrReadNack) reply).ordinal()); return; } @@ -148,14 +149,15 @@ public void serialize(Propagate p, DataOutputPlus out, int version) throws IOExc KeySerializers.route.serialize(p.route, out, version); CommandSerializers.saveStatus.serialize(p.maxKnowledgeSaveStatus, out, version); CommandSerializers.saveStatus.serialize(p.maxSaveStatus, out, version); + CommandSerializers.ballot.serialize(p.ballot, out, version); CommandSerializers.durability.serialize(p.durability, out, version); KeySerializers.nullableRoutingKey.serialize(p.homeKey, out, version); KeySerializers.nullableRoutingKey.serialize(p.progressKey, out, version); CommandSerializers.known.serialize(p.achieved, out, version); CheckStatusSerializers.foundKnownMap.serialize(p.known, out, version); - out.writeBoolean(p.isTruncated); + out.writeBoolean(p.isShardTruncated); CommandSerializers.nullablePartialTxn.serialize(p.partialTxn, out, version); - DepsSerializer.nullablePartialDeps.serialize(p.committedDeps, out, version); + DepsSerializer.nullablePartialDeps.serialize(p.stableDeps, out, version); out.writeLong(p.toEpoch); CommandSerializers.nullableTimestamp.serialize(p.committedExecuteAt, out, version); CommandSerializers.nullableWrites.serialize(p.writes, out, version); @@ -168,6 +170,7 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException Route route = KeySerializers.route.deserialize(in, version); SaveStatus maxKnowledgeSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); + Ballot ballot = CommandSerializers.ballot.deserialize(in, version); Durability durability = CommandSerializers.durability.deserialize(in, version); RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); RoutingKey progressKey = KeySerializers.nullableRoutingKey.deserialize(in, version); @@ -197,6 +200,7 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException route, maxKnowledgeSaveStatus, maxSaveStatus, + ballot, durability, homeKey, progressKey, @@ -218,6 +222,7 @@ public long serializedSize(Propagate p, int version) + KeySerializers.route.serializedSize(p.route, version) + CommandSerializers.saveStatus.serializedSize(p.maxKnowledgeSaveStatus, version) + CommandSerializers.saveStatus.serializedSize(p.maxSaveStatus, version) + + CommandSerializers.ballot.serializedSize(p.ballot, version) + CommandSerializers.durability.serializedSize(p.durability, version) + KeySerializers.nullableRoutingKey.serializedSize(p.homeKey, version) + KeySerializers.nullableRoutingKey.serializedSize(p.progressKey, version) @@ -225,7 +230,7 @@ public long serializedSize(Propagate p, int version) + CheckStatusSerializers.foundKnownMap.serializedSize(p.known, version) + TypeSizes.BOOL_SIZE + CommandSerializers.nullablePartialTxn.serializedSize(p.partialTxn, version) - + DepsSerializer.nullablePartialDeps.serializedSize(p.committedDeps, version) + + DepsSerializer.nullablePartialDeps.serializedSize(p.stableDeps, version) + TypeSizes.sizeof(p.toEpoch) + CommandSerializers.nullableTimestamp.serializedSize(p.committedExecuteAt, version) + CommandSerializers.nullableWrites.serializedSize(p.writes, version) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 5afc451bb86d..cfae34db4f6b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -23,7 +23,7 @@ import accord.api.Data; import accord.messages.ApplyThenWaitUntilApplied; import accord.messages.ReadData; -import accord.messages.ReadData.ReadNack; +import accord.messages.ReadData.CommitOrReadNack; import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; import accord.messages.ReadData.ReadType; @@ -172,7 +172,7 @@ private static ReadDataSerializer serializerFor(ReadType type) public static final class ReplySerializer implements IVersionedSerializer { // TODO (now): use something other than ordinal - final ReadNack[] nacks = ReadNack.values(); + final CommitOrReadNack[] nacks = CommitOrReadNack.values(); private final IVersionedSerializer dataSerializer; public ReplySerializer(IVersionedSerializer dataSerializer) @@ -185,7 +185,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I { if (!reply.isOk()) { - out.writeByte(1 + ((ReadNack) reply).ordinal()); + out.writeByte(1 + ((CommitOrReadNack) reply).ordinal()); return; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index adf60212cce6..346d1c8bdf48 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -23,6 +23,7 @@ import javax.annotation.Nullable; import accord.api.Result; +import accord.api.RoutingKey; import accord.local.Status; import accord.messages.BeginRecovery; import accord.messages.BeginRecovery.RecoverNack; @@ -31,7 +32,7 @@ import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; -import accord.primitives.PartialDeps; +import accord.primitives.LatestDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Timestamp; @@ -89,8 +90,7 @@ void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IO CommandSerializers.status.serialize(recoverOk.status, out, version); CommandSerializers.ballot.serialize(recoverOk.accepted, out, version); CommandSerializers.nullableTimestamp.serialize(recoverOk.executeAt, out, version); - DepsSerializer.partialDeps.serialize(recoverOk.deps, out, version); - DepsSerializer.nullablePartialDeps.serialize(recoverOk.acceptedDeps, out, version); + latestDeps.serialize(recoverOk.deps, out, version); DepsSerializer.deps.serialize(recoverOk.earlierCommittedWitness, out, version); DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); out.writeBoolean(recoverOk.rejectsFastPath); @@ -112,9 +112,9 @@ RecoverNack deserializeNack(Ballot supersededBy, DataInputPlus in, int version) return new RecoverNack(supersededBy); } - RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, @Nonnull PartialDeps deps, PartialDeps acceptedDeps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) + RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, @Nonnull LatestDeps deps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) { - return new RecoverOk(txnId, status, accepted, executeAt, deps, acceptedDeps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); + return new RecoverOk(txnId, status, accepted, executeAt, deps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); } @Override @@ -135,8 +135,7 @@ public RecoverReply deserialize(DataInputPlus in, int version) throws IOExceptio status, CommandSerializers.ballot.deserialize(in, version), CommandSerializers.nullableTimestamp.deserialize(in, version), - DepsSerializer.partialDeps.deserialize(in, version), - DepsSerializer.nullablePartialDeps.deserialize(in, version), + latestDeps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), in.readBoolean(), @@ -157,8 +156,7 @@ long serializedOkSize(RecoverOk recoverOk, int version) size += CommandSerializers.status.serializedSize(recoverOk.status, version); size += CommandSerializers.ballot.serializedSize(recoverOk.accepted, version); size += CommandSerializers.nullableTimestamp.serializedSize(recoverOk.executeAt, version); - size += DepsSerializer.partialDeps.serializedSize(recoverOk.deps, version); - size += DepsSerializer.nullablePartialDeps.serializedSize(recoverOk.acceptedDeps, version); + size += latestDeps.serializedSize(recoverOk.deps, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierCommittedWitness, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); size += TypeSizes.sizeof(recoverOk.rejectsFastPath); @@ -173,4 +171,80 @@ public long serializedSize(RecoverReply reply, int version) + (reply.isOk() ? serializedOkSize((RecoverOk) reply, version) : serializedNackSize((RecoverNack) reply, version)); } }; + + public static final IVersionedSerializer latestDeps = new IVersionedSerializer() + { + @Override + public void serialize(LatestDeps t, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(t.size()); + for (int i = 0 ; i < t.size() ; ++i) + { + RoutingKey start = t.startAt(i); + KeySerializers.routingKey.serialize(start, out, version); + LatestDeps.LatestEntry e = t.valueAt(i); + if (e == null) + { + CommandSerializers.nullableKnownDeps.serialize(null, out, version); + } + else + { + CommandSerializers.nullableKnownDeps.serialize(e.known, out, version); + CommandSerializers.ballot.serialize(e.ballot, out, version); + DepsSerializer.nullableDeps.serialize(e.coordinatedDeps, out, version); + DepsSerializer.nullableDeps.serialize(e.localDeps, out, version); + } + } + KeySerializers.routingKey.serialize(t.startAt(t.size()), out, version); + } + + @Override + public LatestDeps deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readUnsignedVInt32(); + RoutingKey[] starts = new RoutingKey[size + 1]; + LatestDeps.LatestEntry[] values = new LatestDeps.LatestEntry[size]; + for (int i = 0 ; i < size ; ++i) + { + starts[i] = KeySerializers.routingKey.deserialize(in, version); + Status.KnownDeps knownDeps = CommandSerializers.nullableKnownDeps.deserialize(in, version); + if (knownDeps == null) + continue; + + Ballot ballot = CommandSerializers.ballot.deserialize(in, version); + Deps coordinatedDeps = DepsSerializer.nullableDeps.deserialize(in, version); + Deps localDeps = DepsSerializer.nullableDeps.deserialize(in, version); + values[i] = new LatestDeps.LatestEntry(knownDeps, ballot, coordinatedDeps, localDeps); + } + starts[size] = KeySerializers.routingKey.deserialize(in, version); + + return LatestDeps.SerializerSupport.create(true, starts, values); + } + + @Override + public long serializedSize(LatestDeps t, int version) + { + long size = 0; + size += TypeSizes.sizeofUnsignedVInt(t.size()); + for (int i = 0 ; i < t.size() ; ++i) + { + RoutingKey start = t.startAt(i); + size += KeySerializers.routingKey.serializedSize(start, version); + LatestDeps.LatestEntry e = t.valueAt(i); + if (e == null) + { + size += CommandSerializers.nullableKnownDeps.serializedSize(null, version); + } + else + { + size += CommandSerializers.nullableKnownDeps.serializedSize(e.known, version); + size += CommandSerializers.ballot.serializedSize(e.ballot, version); + size += DepsSerializer.nullableDeps.serializedSize(e.coordinatedDeps, version); + size += DepsSerializer.nullableDeps.serializedSize(e.localDeps, version); + } + } + size += KeySerializers.routingKey.serializedSize(t.startAt(t.size()), version); + return size; + } + }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/SmallEnumSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/SmallEnumSerializer.java new file mode 100644 index 000000000000..2182d359a2d0 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/SmallEnumSerializer.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import javax.annotation.Nullable; + +import accord.messages.SimpleReply; +import accord.utils.Invariants; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class SmallEnumSerializer> implements IVersionedSerializer +{ + public static final SmallEnumSerializer simpleReply = new SmallEnumSerializer<>(SimpleReply.class); + + // TODO: should use something other than ordinal for ser/deser + final E[] values; + + public SmallEnumSerializer(Class clazz) + { + this.values = clazz.getEnumConstants(); + Invariants.checkArgument(values.length < 255); // allow an extra 1 for nullable variant to ensure consistency + } + + public E forOrdinal(int ordinal) + { + return values[ordinal]; + } + + @Override + public void serialize(E t, DataOutputPlus out, int version) throws IOException + { + out.write(t.ordinal()); + } + + @Override + public E deserialize(DataInputPlus in, int version) throws IOException + { + return values[in.readByte()]; + } + + public ByteBuffer serialize(E e) + { + ByteBuffer out = ByteBuffer.allocate(1); + out.put((byte)e.ordinal()); + out.flip(); + return out; + } + + @Override + public long serializedSize(E t, int version) + { + return 1; + } + + public static class NullableSmallEnumSerializer> implements IVersionedSerializer + { + // TODO: should use something other than ordinal for ser/deser + final E[] values; + + public NullableSmallEnumSerializer(SmallEnumSerializer wrap) + { + this.values = wrap.values; + } + + public E forOrdinal(int ordinal) + { + return values[ordinal]; + } + + @Override + public void serialize(@Nullable E t, DataOutputPlus out, int version) throws IOException + { + out.write(t == null ? 0 : 1 + t.ordinal()); + } + + @Override + public E deserialize(DataInputPlus in, int version) throws IOException + { + int ordinal = in.readByte(); + return ordinal == 0 ? null : values[ordinal - 1]; + } + + public ByteBuffer serialize(E e) + { + ByteBuffer out = ByteBuffer.allocate(1); + out.put((byte)(e == null ? 0 : (1 + e.ordinal()))); + out.flip(); + return out; + } + + @Override + public long serializedSize(E t, int version) + { + return 1; + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 028bcfbf32f7..487fbf5f1d0d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -231,12 +231,12 @@ private void assertCoordinatorMetrics(int node, String scope, long fastPaths, lo } } - private void assertReplicaMetrics(int node, String scope, long commits, long executions, long applications) + private void assertReplicaMetrics(int node, String scope, long stable, long executions, long applications) { DefaultNameFactory nameFactory = new DefaultNameFactory(AccordMetrics.ACCORD_REPLICA, scope); Map metrics = diff(countingMetrics0).get(node); Function metric = n -> metrics.get(nameFactory.createMetricName(n).getMetricName()); - assertThat(metric.apply(AccordMetrics.COMMIT_LATENCY)).isEqualTo(commits); + assertThat(metric.apply(AccordMetrics.STABLE_LATENCY)).isEqualTo(stable); assertThat(metric.apply(AccordMetrics.EXECUTE_LATENCY)).isEqualTo(executions); assertThat(metric.apply(AccordMetrics.APPLY_LATENCY)).isEqualTo(applications); assertThat(metric.apply(AccordMetrics.APPLY_DURATION)).isEqualTo(applications); diff --git a/test/unit/org/apache/cassandra/cql3/selection/SelectionColumnMappingTest.java b/test/unit/org/apache/cassandra/cql3/selection/SelectionColumnMappingTest.java index 9f20aea4c0fa..58f4a3364e44 100644 --- a/test/unit/org/apache/cassandra/cql3/selection/SelectionColumnMappingTest.java +++ b/test/unit/org/apache/cassandra/cql3/selection/SelectionColumnMappingTest.java @@ -40,7 +40,6 @@ import org.apache.cassandra.utils.ByteBufferUtil; import static java.util.Arrays.asList; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java index 1bdd1ca19b77..2d935eb978ad 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java @@ -42,7 +42,6 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.FBUtilities; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; import static org.junit.Assert.assertEquals; public class FrozenCollectionsTest extends CQLTester diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/JsonTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/JsonTest.java index d05391afccd0..2976c014bf67 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/JsonTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/JsonTest.java @@ -39,7 +39,6 @@ import java.util.*; import java.util.concurrent.*; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/UserTypesTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/UserTypesTest.java index 530d55ba11cd..2d30f1946ca6 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/UserTypesTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/UserTypesTest.java @@ -27,8 +27,6 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; - public class UserTypesTest extends CQLTester { @BeforeClass diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java index 3cbc9d79c083..31ef09ec86e4 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java @@ -28,8 +28,6 @@ import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.service.StorageService; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; - public class SelectLimitTest extends CQLTester { // This method will be ran instead of the CQLTester#setUpClass diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 85ef473ac4b6..7765b9c1ad6d 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -83,8 +83,6 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; -import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.utils.FBUtilities; @@ -461,7 +459,7 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC TxnId[] txnIds = additionalCommand ? TXN_IDS : new TxnId[] {TXN_ID}; for (TxnId txnId : txnIds) { - Txn txn = txnId.rw().isWrite() ? AccordTestUtils.createWriteTxn(42) : AccordTestUtils.createTxn(42); + Txn txn = txnId.kind().isWrite() ? AccordTestUtils.createWriteTxn(42) : AccordTestUtils.createTxn(42); Seekable key = txn.keys().get(0); PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); @@ -482,9 +480,9 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Minimal, txnId, partialTxn, partialDeps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, txnId, partialTxn, partialDeps, route, null); commandStore.appendToJournal(commit); - CheckedCommands.commit(safe, txnId, route, null, partialTxn, txnId, partialDeps); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, txnId, partialDeps); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 0bd628903ab5..06a55d24cb28 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -157,7 +157,7 @@ public void basicCycleTest() throws Throwable })); // check commit - Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.Maximal, executeAt, partialTxn, deps, fullRoute, null); + Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn, deps, fullRoute, null); commandStore.appendToJournal(commit); getUninterruptibly(commandStore.execute(commit, commit::apply)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 52aff302e389..0be7d692545a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -82,18 +82,19 @@ public void serde() common.route(route); common.partialDeps(deps.slice(scope)); common.durability(Status.Durability.NotDurable); - Command.WaitingOn waitingOn = Command.WaitingOn.none(deps.slice(scope)); + Command.WaitingOn waitingOn = null; Command.Committed committed = Command.SerializerSupport.committed(common, SaveStatus.Committed, id, Ballot.ZERO, Ballot.ZERO, waitingOn); AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); - Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.Maximal, id, partialTxn, partialDeps, route, null); + Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.StableFastPath, Ballot.ZERO, id, partialTxn, partialDeps, route, null); store.appendToJournal(commit); Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); mutation.apply(); - Assertions.assertThat(AccordKeyspace.loadCommand(store, id)).isEqualTo(committed); + Command loaded = AccordKeyspace.loadCommand(store, id); + Assertions.assertThat(loaded).isEqualTo(committed); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 93150d105489..82f56f869095 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -92,7 +92,7 @@ public void bootstrapRead() checkRequestReplies(request, new AbstractFetchCoordinator.FetchResponse(null, null, id), - ReadData.ReadNack.NotCommitted); + ReadData.CommitOrReadNack.Insufficient); } @@ -103,7 +103,7 @@ public void txnRead() Request request = new ReadTxnData(node, topologies, txnId, topology.ranges(), txnId); checkRequestReplies(request, new ReadData.ReadOk(null, null), - ReadData.ReadNack.NotCommitted); + ReadData.CommitOrReadNack.Insufficient); } private static void checkRequestReplies(Request request, Reply... replies) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index c27878f401a7..77911183de3c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -128,6 +128,19 @@ public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt executeAt, Ballot.ZERO, Ballot.ZERO, + null); + } + + public static Command stable(TxnId txnId, PartialTxn txn, Timestamp executeAt) + { + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId).partialDeps(PartialDeps.NONE); + attrs.partialTxn(txn); + attrs.route(route(txn)); + return Command.SerializerSupport.committed(attrs, + SaveStatus.Stable, + executeAt, + Ballot.ZERO, + Ballot.ZERO, Command.WaitingOn.EMPTY); } @@ -189,7 +202,7 @@ public static void testLoad(ManualExecutor executor, AccordSafeState route = partialTxn.keys().toRoute(routingKey); + Ranges ranges = AccordTestUtils.fullRange(partialTxn.keys()); + PartialRoute partialRoute = route.slice(ranges); + PartialDeps deps = PartialDeps.builder(ranges).build(); + + // create and write messages to the journal for loading to succeed + PreAccept preAccept = + PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); + Commit stable = + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, executeAt, partialTxn, deps, route, null); + + commandStore.appendToJournal(preAccept); + commandStore.appendToJournal(stable); + + try + { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); + return safe.ifInitialised(txnId).current(); + }).beginAsResult()); + + // clear cache + commandStore.executeBlocking(() -> { + long cacheSize = commandStore.capacity(); + commandStore.setCapacity(0); + commandStore.setCapacity(cacheSize); + commandStore.cache().awaitSaveResults(); + }); + + return command; + } + catch (ExecutionException e) + { + throw new AssertionError(e); + } + } + + private static Command createStableUsingSlowLifeCycle(AccordCommandStore commandStore, TxnId txnId) + { + return createStableUsingSlowLifeCycle(commandStore, txnId, txnId); + } + + private static Command createStableUsingSlowLifeCycle(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) { PartialTxn partialTxn = createPartialTxn(0); RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); @@ -209,18 +258,22 @@ private static Command createCommittedUsingLifeCycle(AccordCommandStore commandS Accept accept = Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Minimal, executeAt, partialTxn, deps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Commit, Ballot.ZERO, executeAt, partialTxn, deps, route, null); + Commit stable = + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableSlowPath, Ballot.ZERO, executeAt, partialTxn, deps, route, null); commandStore.appendToJournal(preAccept); commandStore.appendToJournal(accept); commandStore.appendToJournal(commit); + commandStore.appendToJournal(stable); try { Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); - CheckedCommands.commit(safe, txnId, route, null, partialTxn, executeAt, deps); + CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); return safe.ifInitialised(txnId).current(); }).beginAsResult()); @@ -265,7 +318,7 @@ public void testFutureCleanup() throws Throwable TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - createCommittedAndPersist(commandStore, txnId); + createStableAndPersist(commandStore, txnId); Consumer consumer = safeStore -> safeStore.ifInitialised(txnId).readyToExecute(); PreLoadContext ctx = contextFor(txnId); @@ -396,8 +449,12 @@ public void consumerFails() private static void createCommand(AccordCommandStore commandStore, RandomSource rs, List ids) { // to simulate CommandsForKey not being found, use createCommittedAndPersist periodically as it does not update - if (rs.nextBoolean()) ids.forEach(id -> createCommittedAndPersist(commandStore, id)); - else ids.forEach(id -> createCommittedUsingLifeCycle(commandStore, id)); + switch (rs.nextInt(3)) + { + case 0: ids.forEach(id -> createStableAndPersist(commandStore, id)); break; + case 1: ids.forEach(id -> createStableUsingFastLifeCycle(commandStore, id)); break; + case 2: ids.forEach(id -> createStableUsingSlowLifeCycle(commandStore, id)); + } commandStore.unsafeClearCache(); } diff --git a/tools/stress/src/org/apache/cassandra/stress/operations/predefined/PredefinedOperation.java b/tools/stress/src/org/apache/cassandra/stress/operations/predefined/PredefinedOperation.java index bf969ad46bcc..630ee3aca5f3 100644 --- a/tools/stress/src/org/apache/cassandra/stress/operations/predefined/PredefinedOperation.java +++ b/tools/stress/src/org/apache/cassandra/stress/operations/predefined/PredefinedOperation.java @@ -28,7 +28,6 @@ import org.apache.cassandra.stress.operations.PartitionOperation; import org.apache.cassandra.stress.report.Timer; import org.apache.cassandra.stress.settings.Command; -import org.apache.cassandra.stress.settings.CqlVersion; import org.apache.cassandra.stress.settings.StressSettings; public abstract class PredefinedOperation extends PartitionOperation From a324003c5959866641554746a280342582f18603 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 4 Oct 2023 15:13:37 -0700 Subject: [PATCH 094/340] Get simulator working (again) Co-authored-by: Ariel Weisberg Co-authored-by: Benedict Elliott Smith --- .build/checkstyle_suppressions.xml | 1 - build.xml | 18 +++ .../concurrent/InfiniteLoopExecutor.java | 5 + .../config/CassandraRelevantProperties.java | 2 + .../memtable/AbstractAllocatorMemtable.java | 6 + .../org/apache/cassandra/gms/Gossiper.java | 5 + .../cassandra/index/IndexStatusManager.java | 10 +- .../cassandra/journal/ActiveSegment.java | 9 ++ .../org/apache/cassandra/journal/Flusher.java | 29 ++++- .../org/apache/cassandra/journal/Journal.java | 3 + .../org/apache/cassandra/journal/Params.java | 2 +- .../metrics/AccordStateCacheMetrics.java | 6 +- .../accord/AccordConfigurationService.java | 61 ++++++++-- .../accord/AccordFastPathCoordinator.java | 2 +- .../service/accord/AccordJournal.java | 9 +- .../service/accord/AccordService.java | 33 ++++-- .../cassandra/utils/concurrent/Semaphore.java | 3 +- test/conf/logback-simulator.xml | 5 +- .../cassandra/distributed/impl/Instance.java | 10 ++ .../distributed/impl/IsolatedExecutor.java | 2 +- .../simulator/asm/ClassTransformer.java | 4 + .../simulator/asm/InterceptAgent.java | 104 +++++++++++++++++- .../simulator/asm/InterceptClasses.java | 2 + .../asm/MonitorMethodTransformer.java | 3 +- .../simulator/asm/StringHashcode.java | 43 ++++++++ .../cassandra/simulator/ActionSchedule.java | 17 ++- .../simulator/ClusterSimulation.java | 9 +- .../cassandra/simulator/SimulationRunner.java | 22 +++- .../cassandra/simulator/debug/Record.java | 4 +- .../simulator/debug/SelfReconcile.java | 2 + .../simulator/logging/RunStartDefiner.java | 6 +- ...bstractPairOfSequencesPaxosSimulation.java | 11 +- .../paxos/AccordClusterSimulation.java | 5 +- .../paxos/AccordSimulationRunner.java | 27 +++++ .../PairOfSequencesAccordSimulation.java | 6 +- .../paxos/PaxosSimulationRunner.java | 2 + .../systems/InterceptingMonitors.java | 3 - .../apache/cassandra/journal/TestParams.java | 2 +- 38 files changed, 421 insertions(+), 72 deletions(-) create mode 100644 test/simulator/asm/org/apache/cassandra/simulator/asm/StringHashcode.java diff --git a/.build/checkstyle_suppressions.xml b/.build/checkstyle_suppressions.xml index ed4d1443f7fc..230c808c1435 100644 --- a/.build/checkstyle_suppressions.xml +++ b/.build/checkstyle_suppressions.xml @@ -21,5 +21,4 @@ "https://checkstyle.org/dtds/suppressions_1_1.dtd"> - diff --git a/build.xml b/build.xml index 7544c664c1aa..55614a36a6ff 100644 --- a/build.xml +++ b/build.xml @@ -226,6 +226,24 @@ + + + + + + + + + + + + + + + + diff --git a/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java b/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java index ac10a70c3066..b576551ac07a 100644 --- a/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java +++ b/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java @@ -52,6 +52,11 @@ public enum InternalState { SHUTTING_DOWN_NOW, TERMINATED } @Shared(scope = Shared.Scope.SIMULATION) public enum SimulatorSafe { SAFE, UNSAFE } + /** + * Does this loop always block on some external work provision that is going to be simulator-controlled, or does + * it loop periodically? If the latter, it may prevent simulation making progress between phases, and should be + * marked as a DAEMON process. + */ @Shared(scope = Shared.Scope.SIMULATION) public enum Daemon { DAEMON, NON_DAEMON } diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index f1f50e589f8c..c0c739ec827e 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -596,6 +596,8 @@ public enum CassandraRelevantProperties * can be also done manually for that particular case: {@code flush(SchemaConstants.SCHEMA_KEYSPACE_NAME);}. */ TEST_FLUSH_LOCAL_SCHEMA_CHANGES("cassandra.test.flush_local_schema_changes", "true"), TEST_HARRY_SWITCH_AFTER("cassandra.test.harry.progression.switch-after", "1"), + TEST_HISTORY_VALIDATOR_LOGGING_ENABLED("cassandra.test.history_validator.logging.enabled", "false"), + TEST_IGNORE_SIGAR("cassandra.test.ignore_sigar"), TEST_INTERVAL_TREE_EXPENSIVE_CHECKS("cassandra.test.interval_tree_expensive_checks"), TEST_INVALID_LEGACY_SSTABLE_ROOT("invalid-legacy-sstable-root"), TEST_JVM_DTEST_DISABLE_SSL("cassandra.test.disable_ssl"), diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java index b431d360ed10..2dbe41374f09 100644 --- a/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java @@ -220,6 +220,12 @@ protected void runMayThrow() if (current instanceof AbstractAllocatorMemtable) ((AbstractAllocatorMemtable) current).flushIfPeriodExpired(); } + + @Override + public String toString() + { + return "Scheduled Flush of " + owner; + } }; ScheduledExecutors.scheduledTasks.scheduleSelfRecurring(runnable, period, TimeUnit.MILLISECONDS); } diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index 14cc5f5adaaf..b84b1f25cb42 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -2046,6 +2046,11 @@ public void stopShutdownAndWait(long timeout, TimeUnit unit) throws InterruptedE ExecutorUtils.shutdownAndWait(timeout, unit, executor); } + public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException + { + ExecutorUtils.shutdownAndWait(timeout, unit, executor); + } + @Nullable private String getReleaseVersionString(InetAddressAndPort ep) { diff --git a/src/java/org/apache/cassandra/index/IndexStatusManager.java b/src/java/org/apache/cassandra/index/IndexStatusManager.java index b11ecd1094bb..0f50a26276b4 100644 --- a/src/java/org/apache/cassandra/index/IndexStatusManager.java +++ b/src/java/org/apache/cassandra/index/IndexStatusManager.java @@ -24,12 +24,13 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import javax.annotation.Nonnull; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.tcm.ClusterMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +47,9 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.serializers.MarshalException; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.CassandraVersion; +import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JsonUtils; @@ -335,4 +338,9 @@ private String identifier(String keyspace, String index) { return keyspace + '.' + index; } + + public void shutdownAndWait(long interval, TimeUnit unit) throws InterruptedException, TimeoutException + { + ExecutorUtils.shutdownAndWait(interval, unit, statusPropagationExecutor); + } } diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index 22a3aba766bd..f16126c157a5 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -33,6 +33,9 @@ import org.apache.cassandra.utils.concurrent.Ref; import org.apache.cassandra.utils.concurrent.WaitQueue; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; + +@Simulate(with=MONITORS) final class ActiveSegment extends Segment { final FileChannel channel; @@ -247,6 +250,12 @@ public String name() * Flush logic; closing and component flushing */ + boolean shouldFlush() + { + int allocatePosition = this.allocatePosition.get(); + return lastFlushedOffset < allocatePosition; + } + /** * Possibly force a disk flush for this segment file. * TODO FIXME: calls from outside Flusher + callbacks diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index 436abc5d0e97..04411f74c851 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -28,6 +28,7 @@ import org.apache.cassandra.concurrent.Interruptible.TerminateException; import org.apache.cassandra.utils.MonotonicClock; import org.apache.cassandra.utils.NoSpamLogger; +import org.apache.cassandra.utils.Simulate; import org.apache.cassandra.utils.concurrent.Semaphore; import org.apache.cassandra.utils.concurrent.WaitQueue; @@ -45,6 +46,9 @@ import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; import static org.apache.cassandra.utils.MonotonicClock.Global.preciseTime; +import static org.apache.cassandra.utils.Simulate.With.GLOBAL_CLOCK; +import static org.apache.cassandra.utils.Simulate.With.LOCK_SUPPORT; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; import static org.apache.cassandra.utils.concurrent.Semaphore.newSemaphore; import static org.apache.cassandra.utils.concurrent.WaitQueue.newWaitQueue; @@ -96,6 +100,7 @@ void shutdown() flushExecutor.shutdown(); } + @Simulate(with={MONITORS,GLOBAL_CLOCK,LOCK_SUPPORT}) private class FlushRunnable implements Interruptible.Task { private final MonotonicClock clock; @@ -151,9 +156,17 @@ public void doRun(Interruptible.State state) throws InterruptedException if (state == SHUTTING_DOWN) return; - long wakeUpAt = startedRunAt + flushPeriodNanos(); - if (wakeUpAt > now) - haveWork.tryAcquireUntil(1, wakeUpAt); + long flushPeriodNanos = flushPeriodNanos(); + if (flushPeriodNanos <= 0) + { + haveWork.acquire(1); + } + else + { + long wakeUpAt = startedRunAt + flushPeriodNanos; + if (wakeUpAt > now) + haveWork.tryAcquireUntil(1, wakeUpAt); + } } private void doFlush() @@ -168,6 +181,9 @@ private void doFlush() for (ActiveSegment segment : segmentsToFlush) { + if (!segment.shouldFlush()) + break; + syncedSegment = segment.descriptor.timestamp; syncedOffset = segment.flush(); @@ -202,8 +218,9 @@ private void processFlushDuration(long startedFlushAt, long finishedFlushAt) flushCount++; flushDuration += (finishedFlushAt - startedFlushAt); - long lag = finishedFlushAt - (startedFlushAt + flushPeriodNanos()); - if (lag <= 0) + long flushPeriodNanos = flushPeriodNanos(); + long lag = finishedFlushAt - (startedFlushAt + flushPeriodNanos); + if (flushPeriodNanos <= 0 || lag <= 0) return; lagCount++; @@ -349,7 +366,7 @@ private void awaitFlushAt(long flushTime, Timer.Context context) private long flushPeriodNanos() { - return 1_000_000L * params.flushPeriod(); + return 1_000_000L * params.flushPeriodMillis(); } private long periodicFlushLagBlockNanos() diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index bb1ada27f7d8..844f6607966e 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -50,6 +50,7 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Crc; import org.apache.cassandra.utils.JVMStabilityInspector; +import org.apache.cassandra.utils.Simulate; import org.apache.cassandra.utils.concurrent.WaitQueue; import static java.lang.String.format; @@ -61,6 +62,7 @@ import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; import static org.apache.cassandra.concurrent.Interruptible.State.SHUTTING_DOWN; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; import static org.apache.cassandra.utils.concurrent.WaitQueue.newWaitQueue; /** @@ -77,6 +79,7 @@ * @param the type of keys used to address the records; must be fixed-size and byte-order comparable */ +@Simulate(with=MONITORS) public class Journal implements Shutdownable { private static final Logger logger = LoggerFactory.getLogger(Journal.class); diff --git a/src/java/org/apache/cassandra/journal/Params.java b/src/java/org/apache/cassandra/journal/Params.java index f462f450ac21..46b382ea278c 100644 --- a/src/java/org/apache/cassandra/journal/Params.java +++ b/src/java/org/apache/cassandra/journal/Params.java @@ -41,7 +41,7 @@ enum FailurePolicy { STOP, STOP_JOURNAL, IGNORE, DIE } /** * @return milliseconds between journal flushes */ - int flushPeriod(); + int flushPeriodMillis(); /** * @return milliseconds to block writes for while waiting for a slow disk flush to complete diff --git a/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java index fd4308a356d2..f63fedf282d9 100644 --- a/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java @@ -32,7 +32,7 @@ public class AccordStateCacheMetrics extends CacheAccessMetrics public final Histogram objectSize; - private final Map, CacheAccessMetrics> instanceMetrics = new ConcurrentHashMap<>(2); + private final Map instanceMetrics = new ConcurrentHashMap<>(2); private final String type; @@ -45,6 +45,8 @@ public AccordStateCacheMetrics(String type) public CacheAccessMetrics forInstance(Class klass) { - return instanceMetrics.computeIfAbsent(klass, k -> new CacheAccessMetrics(new DefaultNameFactory(TYPE_NAME, String.format("%s-%s", type, k.getSimpleName())))); + // cannot make Class hashCode deterministic, as cannot rewrite - so cannot safely use as Map key if want deterministic simulation + // (or we need to create extra hoops to catch this specific case in method rewriting) + return instanceMetrics.computeIfAbsent(klass.getSimpleName(), k -> new CacheAccessMetrics(new DefaultNameFactory(TYPE_NAME, String.format("%s-%s", type, k)))); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index ad20fea04329..31565f842370 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -20,13 +20,12 @@ import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import accord.impl.AbstractConfigurationService; import accord.local.Node; @@ -36,6 +35,7 @@ import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.ScheduledExecutors; +import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.gms.IFailureDetector; @@ -44,19 +44,23 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.AccordKeyspace.EpochDiskState; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.utils.Simulate; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; +import static org.apache.cassandra.utils.Simulate.With.MONITORS; + // TODO: listen to FailureDetector and rearrange fast path accordingly -public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordSyncPropagator.Listener +@Simulate(with=MONITORS) +public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordSyncPropagator.Listener, Shutdownable { - private static final Logger logger = LoggerFactory.getLogger(AccordConfigurationService.class); private final AccordSyncPropagator syncPropagator; private EpochDiskState diskState = EpochDiskState.EMPTY; - private enum State { INITIALIZED, LOADING, STARTED } + private enum State { INITIALIZED, LOADING, STARTED, SHUTDOWN } private State state = State.INITIALIZED; private volatile EndpointMapping mapping = EndpointMapping.EMPTY; @@ -150,6 +154,35 @@ public synchronized void start() receiveRedundant(redundant, epoch); })); state = State.STARTED; + ClusterMetadataService.instance().log().addListener(this); + } + + @Override + public synchronized boolean isTerminated() + { + return state == State.SHUTDOWN; + } + + @Override + public synchronized void shutdown() + { + if (isTerminated()) + return; + ClusterMetadataService.instance().log().removeListener(this); + state = State.SHUTDOWN; + } + + @Override + public Object shutdownNow() + { + shutdown(); + return null; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException + { + return isTerminated(); } @Override @@ -262,7 +295,7 @@ protected synchronized void topologyUpdatePreListenerNotify(Topology topology) } @Override - protected void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epoch) + protected synchronized void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epoch) { if (state == State.STARTED) diskState = AccordKeyspace.markRemoteTopologySync(node, epoch, diskState); @@ -271,7 +304,7 @@ protected void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epo @Override public synchronized void reportEpochClosed(Ranges ranges, long epoch) { - Invariants.checkState(state == State.STARTED); + checkStarted(); Topology topology = getTopologyForEpoch(epoch); syncPropagator.reportClosed(epoch, topology.nodes(), ranges); } @@ -279,7 +312,7 @@ public synchronized void reportEpochClosed(Ranges ranges, long epoch) @Override public synchronized void reportEpochRedundant(Ranges ranges, long epoch) { - Invariants.checkState(state == State.STARTED); + checkStarted(); // TODO (expected): ensure we aren't fetching a truncated epoch; otherwise this should be non-null Topology topology = getTopologyForEpoch(epoch); syncPropagator.reportRedundant(epoch, topology.nodes(), ranges); @@ -300,18 +333,24 @@ public synchronized void receiveRedundant(Ranges ranges, long epoch) } @Override - protected void truncateTopologiesPreListenerNotify(long epoch) + protected synchronized void truncateTopologiesPreListenerNotify(long epoch) { - Invariants.checkState(state == State.STARTED); + checkStarted(); } @Override - protected void truncateTopologiesPostListenerNotify(long epoch) + protected synchronized void truncateTopologiesPostListenerNotify(long epoch) { if (state == State.STARTED) diskState = AccordKeyspace.truncateTopologyUntil(epoch, diskState); } + private void checkStarted() + { + State state = this.state; + Invariants.checkState(state == State.STARTED, "Expected state to be STARTED but was %s", state); + } + @VisibleForTesting public static class EpochSnapshot { diff --git a/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java index c1fc73d80fb2..74a9603a39df 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java @@ -253,7 +253,7 @@ synchronized void maybeUpdateFastPath(Node.Id node, Status status) private void scheduleMaintenanceTask(long delayMillis) { - ScheduledExecutors.scheduledTasks.schedule(this::maintenance, delayMillis, TimeUnit.MILLISECONDS); + ScheduledExecutors.scheduledTasks.scheduleSelfRecurring(this::maintenance, delayMillis, TimeUnit.MILLISECONDS); } synchronized void maintenance() diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 28bf2c2cc55a..ae202d2f639b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -127,6 +127,7 @@ import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; +import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; @@ -172,9 +173,9 @@ public FlushMode flushMode() } @Override - public int flushPeriod() + public int flushPeriodMillis() { - return 1000; + return DatabaseDescriptor.getCommitLogSyncPeriod(); } @Override @@ -1098,7 +1099,9 @@ public void run(Interruptible.State state) throws InterruptedException { if (!unframedRequests.isEmpty() || !delayedRequests.isEmpty()) doRun(); - haveWork.acquire(1); + + if (state == NORMAL) + haveWork.acquire(1); } private void doRun() diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 24f32c94f650..4ecc3b35bbf8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -26,11 +26,14 @@ import java.util.concurrent.atomic.AtomicReference; import javax.annotation.Nonnull; +import javax.annotation.concurrent.GuardedBy; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import accord.coordinate.TopologyMismatch; import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +94,6 @@ import org.apache.cassandra.service.accord.interop.AccordInteropPersist; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; @@ -117,6 +119,10 @@ public class AccordService implements IAccordService, Shutdownable { private static final Logger logger = LoggerFactory.getLogger(AccordService.class); + private enum State { INIT, STARTED, SHUTDOWN} + + public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); + public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); private static final Future BOOTSTRAP_SUCCESS = ImmediateFuture.success(null); private final Node node; @@ -129,6 +135,8 @@ public class AccordService implements IAccordService, Shutdownable private final AccordJournal journal; private final AccordVerbHandler verbHandler; private final LocalConfig configuration; + @GuardedBy("this") + private State state = State.INIT; private static final IAccordService NOOP_SERVICE = new IAccordService() { @@ -308,13 +316,16 @@ private AccordService(Id localId) } @Override - public void startup() + public synchronized void startup() { + if (state != State.INIT) + return; journal.start(node); configService.start(); ClusterMetadataService.instance().log().addListener(configService); fastPathCoordinator.start(); ClusterMetadataService.instance().log().addListener(fastPathCoordinator); + state = State.STARTED; } @Override @@ -526,15 +537,18 @@ public boolean isTerminated() } @Override - public void shutdown() + public synchronized void shutdown() { - ExecutorUtils.shutdown(Arrays.asList(scheduler, nodeShutdown, journal)); + if (state != State.STARTED) + return; + ExecutorUtils.shutdown(shutdownableSubsystems()); + state = State.SHUTDOWN; } @Override public Object shutdownNow() { - ExecutorUtils.shutdownNow(Arrays.asList(scheduler, nodeShutdown, journal)); + shutdown(); return null; } @@ -543,7 +557,7 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted { try { - ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(scheduler, nodeShutdown, journal)); + ExecutorUtils.awaitTermination(timeout, units, shutdownableSubsystems()); return true; } catch (TimeoutException e) @@ -552,11 +566,16 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted } } + private List shutdownableSubsystems() + { + return Arrays.asList(scheduler, nodeShutdown, journal, configService); + } + @VisibleForTesting @Override public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { - scheduler.shutdownNow(); + shutdown(); ExecutorUtils.shutdownAndWait(timeout, unit, this); } diff --git a/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java b/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java index c9c253f1d57a..a0ac316f2906 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java +++ b/src/java/org/apache/cassandra/utils/concurrent/Semaphore.java @@ -23,6 +23,7 @@ import org.apache.cassandra.utils.Intercept; import org.apache.cassandra.utils.Shared; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @Shared(scope = SIMULATION) @@ -139,7 +140,7 @@ public int waiting() */ public boolean tryAcquireUntil(int acquire, long nanoTimeDeadline) throws InterruptedException { - long wait = nanoTimeDeadline - System.nanoTime(); + long wait = nanoTimeDeadline - nanoTime(); return tryAcquire(acquire, Math.max(0, wait), TimeUnit.NANOSECONDS); } diff --git a/test/conf/logback-simulator.xml b/test/conf/logback-simulator.xml index ffa1ffa088c7..a4c24aab8dae 100644 --- a/test/conf/logback-simulator.xml +++ b/test/conf/logback-simulator.xml @@ -19,7 +19,8 @@ - + + @@ -38,7 +39,7 @@ - ./build/test/logs/simulator/${run_start}-${run_seed}/${instance_id}/system.log + ./build/test/logs/simulator/${run_start}-${run_seed}/cluster-${cluster_id}/${instance_id}/system.log %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{ISO8601} %msg%n diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index cac40690afbb..2f81441c198c 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -102,6 +102,7 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.hints.DTestSerializer; import org.apache.cassandra.hints.HintsService; +import org.apache.cassandra.index.IndexStatusManager; import org.apache.cassandra.index.SecondaryIndexManager; import org.apache.cassandra.io.IVersionedAsymmetricSerializer; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -628,6 +629,9 @@ public void startup(ICluster cluster) { assert config.networkTopology().contains(config.broadcastAddress()) : String.format("Network topology %s doesn't contain the address %s", config.networkTopology(), config.broadcastAddress()); + // org.apache.cassandra.distributed.impl.AbstractCluster.startup sets the exception handler for the thread + // so extract it to populate ExecutorFactory.Global + ExecutorFactory.Global.tryUnsafeSet(new ExecutorFactory.Default(Thread.currentThread().getContextClassLoader(), null, Thread.getDefaultUncaughtExceptionHandler())); DistributedTestInitialLocationProvider.assign(config.networkTopology()); CassandraDaemon.getInstanceForTesting().activate(false); // TODO: filters won't work for the messages dispatched during startup @@ -927,6 +931,11 @@ public Future shutdown(boolean runOnExitThreads, boolean shutdownMessaging error = parallelRun(error, executor, () -> Gossiper.instance.stopShutdownAndWait(1L, MINUTES)); } + else + { + error = parallelRun(error, executor, + () -> Gossiper.instance.shutdownAndWait(1L, MINUTES)); + } error = parallelRun(error, executor, StorageService.instance::disableAutoCompaction); @@ -970,6 +979,7 @@ public Future shutdown(boolean runOnExitThreads, boolean shutdownMessaging () -> ActiveRepairService.instance().shutdownNowAndWait(1L, MINUTES), () -> EpochAwareDebounce.instance.close(), SnapshotManager.instance::close, + () -> IndexStatusManager.instance.shutdownAndWait(1L, MINUTES), DiskErrorsHandlerService::close ); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedExecutor.java b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedExecutor.java index 68ff1e71c65b..9e84d32df7cf 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedExecutor.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedExecutor.java @@ -126,7 +126,7 @@ public IIsolatedExecutor with(ExecutorService executor) public Future shutdown() { - isolatedExecutor.shutdownNow(); + isolatedExecutor.shutdown(); return shutdownExecutor.shutdown(name, classLoader, isolatedExecutor, () -> { // Shutdown logging last - this is not ideal as the logging subsystem is initialized diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java index f9bab8eaed04..70fa3a6f0493 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/ClassTransformer.java @@ -189,6 +189,10 @@ public FieldVisitor visitField(int access, String name, String descriptor, Strin { if (dependentTypes != null) Utils.visitIfRefType(descriptor, dependentTypes); + // org.apache.cassandra.simulator.systems.SimulatedTime.InstanceTime.nanoTime does not change between invokes which causes AbstractQueuedSynchronizer to loop forever, + // so need to make the threshold negative to avoid the spin loop. + if (className.equals("java/util/concurrent/locks/AbstractQueuedSynchronizer") && name.equals("SPIN_FOR_TIMEOUT_THRESHOLD")) + return super.visitField(makePublic(access), name, descriptor, signature, Long.MIN_VALUE); return super.visitField(makePublic(access), name, descriptor, signature, value); } diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptAgent.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptAgent.java index 4cf1546ca826..8774d867d169 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptAgent.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptAgent.java @@ -30,6 +30,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.function.BiFunction; import java.util.regex.Pattern; @@ -93,6 +94,9 @@ public byte[] transform(ClassLoader loader, String className, Class classBein if (className.equals("java/lang/Object")) return transformObject(bytecode); + if (className.equals("java/lang/Class")) + return transformClass(bytecode); + if (className.equals("java/lang/Enum")) return transformEnum(bytecode); @@ -103,10 +107,14 @@ public byte[] transform(ClassLoader loader, String className, Class classBein return transformThreadLocalRandom(bytecode); if (className.startsWith("java/util/concurrent/ConcurrentHashMap")) - return transformConcurrent(className, bytecode, DETERMINISTIC, NO_PROXY_METHODS); + return InterceptAgent.transform(className, bytecode, DETERMINISTIC, NO_PROXY_METHODS); if (className.startsWith("java/util/concurrent/locks")) - return transformConcurrent(className, bytecode, SYSTEM_CLOCK, LOCK_SUPPORT, NO_PROXY_METHODS); + { + if (className.equals("java/util/concurrent/locks/AbstractQueuedSynchronizer")) + return InterceptAgent.transformAbstractQueuedSynchronizer(className, bytecode, SYSTEM_CLOCK, LOCK_SUPPORT, NO_PROXY_METHODS); + return InterceptAgent.transform(className, bytecode, SYSTEM_CLOCK, LOCK_SUPPORT, NO_PROXY_METHODS); + } return null; } @@ -172,6 +180,29 @@ public MethodVisitor visitMethod(int access, String name, String descriptor, Str return transform(bytes, ObjectVisitor::new); } + /** + * We don't want Object.toString() to invoke our overridden identityHashCode by virtue of invoking some overridden hashCode() + * So we overwrite Object.toString() to replace calls to Object.hashCode() with direct calls to System.identityHashCode() + */ + private static byte[] transformClass(byte[] bytes) + { + class ClazzVisitor extends ClassVisitor + { + public ClazzVisitor(int api, ClassVisitor classVisitor) + { + super(api, classVisitor); + } + + @Override + public void visitEnd() + { + new StringHashcode(api).accept(this); + super.visitEnd(); + } + } + return transform(bytes, ClazzVisitor::new); + } + /** * We want Enum to have a deterministic hashCode() so we simply forward calls to ordinal() */ @@ -314,7 +345,7 @@ public MethodVisitor visitMethod(int access, String name, String descriptor, Str else { MethodVisitor mv = super.visitMethod(access, name, descriptor, signature, exceptions); - if (determinismCheck && (name.equals("nextSeed") || name.equals("nextSecondarySeed"))) + if (determinismCheck && (name.equals("nextSeed") || name.equals("nextSecondarySeed") || name.equals("advanceProbe"))) mv = new ThreadLocalRandomCheckTransformer(api, mv); return mv; } @@ -323,7 +354,61 @@ public MethodVisitor visitMethod(int access, String name, String descriptor, Str return transform(bytes, ThreadLocalRandomVisitor::new); } - private static byte[] transform(byte[] bytes, BiFunction constructor) + /** + * We require ThreadLocalRandom to be deterministic, so we modify its initialisation method to invoke a + * global deterministic random value generator + */ + private static byte[] transformAbstractQueuedSynchronizer(String className, byte[] bytes, Flag flag, Flag ... flags) + { + class AbstractQueuedSynchronizerVisitor extends ClassVisitor + { + private long defaultSpinForTimeoutThreshold = 1000L; + + public AbstractQueuedSynchronizerVisitor(int api, ClassVisitor classVisitor) + { + super(api, classVisitor); + } + + @Override + public FieldVisitor visitField(int access, String name, String descriptor, String signature, Object value) + { + if (name.equals("SPIN_FOR_TIMEOUT_THRESHOLD")) + { + defaultSpinForTimeoutThreshold = (Long)value; + return super.visitField(access, name, descriptor, signature, 0L); + } + + return super.visitField(access, name, descriptor, signature, value); + } + + @Override + public MethodVisitor visitMethod(int access, String name, String descriptor, String signature, String[] exceptions) + { + /// !!!!! WARNING !!!!! + /// THIS IS SUPER BRITTLE BECAUSE rt.jar INLINES GETSTATIC AS LDC + // TODO (desired): visit constructor to fetch actual value of constant in case changes in future release - + // but this is brittle enough changes upstream will likely need revisiting anyway + MethodVisitor mv = super.visitMethod(access, name, descriptor, signature, exceptions); + if (!name.equals("doAcquireNanos") && !name.equals("doAcquireSharedNanos")) + return mv; + + return new MethodVisitor(api, mv) + { + @Override + public void visitLdcInsn(Object value) + { + if (Objects.equals(defaultSpinForTimeoutThreshold, value)) + super.visitLdcInsn(0L); + else + super.visitLdcInsn(value); + } + }; + } + } + return transform(className, bytes, AbstractQueuedSynchronizerVisitor::new, flag, flags); + } + + private static byte[] transform(byte[] bytes, BiFunction constructor) { ClassWriter out = new ClassWriter(0); ClassReader in = new ClassReader(bytes); @@ -332,7 +417,7 @@ private static byte[] transform(byte[] bytes, BiFunction constructor, Flag flag, Flag ... flags) + { + ClassReader in = new ClassReader(bytes); + ClassTransformer transformer = new ClassTransformer(BYTECODE_VERSION, className, EnumSet.of(flag, flags), null); + ClassVisitor extraTransformer = constructor.apply(BYTECODE_VERSION, transformer); + in.accept(extraTransformer, 0); + return transformer.toBytes(); + } } diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java index dd53ce067fbe..504301247285 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/InterceptClasses.java @@ -62,6 +62,8 @@ public class InterceptClasses implements BiFunction "|org[/.]apache[/.]cassandra[/.]distributed[/.]impl[/.]DirectStreamingConnectionFactory.*" + "|org[/.]apache[/.]cassandra[/.]db[/.]commitlog[/.].*" + "|org[/.]apache[/.]cassandra[/.]service[/.]paxos[/.].*" + + "|org[/.]apache[/.]cassandra[/.]service[/.]accord[/.].*" + + "|org[/.]apache[/.]cassandra[/.]journal[/.].*" + "|accord[/.].*" ); diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/MonitorMethodTransformer.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/MonitorMethodTransformer.java index d9c9c7ad9492..a7c21bbba744 100644 --- a/test/simulator/asm/org/apache/cassandra/simulator/asm/MonitorMethodTransformer.java +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/MonitorMethodTransformer.java @@ -122,8 +122,7 @@ int loadParamsAndReturnInvokeCode() } int invokeCode; - if (isInstanceMethod && (access & Opcodes.ACC_PRIVATE) != 0) invokeCode = Opcodes.INVOKESPECIAL; - else if (isInstanceMethod) invokeCode = Opcodes.INVOKEVIRTUAL; + if (isInstanceMethod) invokeCode = Opcodes.INVOKESPECIAL; else invokeCode = Opcodes.INVOKESTATIC; return invokeCode; } diff --git a/test/simulator/asm/org/apache/cassandra/simulator/asm/StringHashcode.java b/test/simulator/asm/org/apache/cassandra/simulator/asm/StringHashcode.java new file mode 100644 index 000000000000..fc3c57f8b524 --- /dev/null +++ b/test/simulator/asm/org/apache/cassandra/simulator/asm/StringHashcode.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.simulator.asm; + +import org.objectweb.asm.Opcodes; +import org.objectweb.asm.tree.InsnNode; +import org.objectweb.asm.tree.LabelNode; +import org.objectweb.asm.tree.MethodInsnNode; +import org.objectweb.asm.tree.MethodNode; + +/** + * Generate a new hashCode method in the class that invokes a deterministic hashCode generator + */ +class StringHashcode extends MethodNode +{ + StringHashcode(int api) + { + super(api, Opcodes.ACC_PUBLIC, "hashCode", "()I", null, null); + maxLocals = 1; + maxStack = 1; + instructions.add(new LabelNode()); + instructions.add(new MethodInsnNode(Opcodes.INVOKEVIRTUAL, "java/lang/Object", "toString", "()Ljava/lang/String;", false)); + instructions.add(new LabelNode()); + instructions.add(new MethodInsnNode(Opcodes.INVOKEVIRTUAL, "java/lang/Object", "hashCode", "(Ljava/lang/Object;)I", false)); + instructions.add(new InsnNode(Opcodes.IRETURN)); + } +} diff --git a/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java b/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java index 427a777abee8..39666077ea78 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ActionSchedule.java @@ -281,6 +281,12 @@ public boolean hasNext() if (!runnable.isEmpty() || !scheduled.isEmpty()) return true; + while (moreWork()) + { + if (!runnable.isEmpty() || !scheduled.isEmpty()) + return true; + } + if (!sequences.isEmpty()) { // TODO (feature): detection of which action is blocking progress, and logging of its stack trace only @@ -313,15 +319,12 @@ public boolean hasNext() throw failWithOOM(); } - while (moreWork()) - { - if (!runnable.isEmpty() || !scheduled.isEmpty()) - return true; - } - return false; } + // NOTE: this is only here for debugging, its a quick way to see if pre (0), interleave (1), or post (2) is active + private int step = -1; + private boolean moreWork() { if (!moreWork.hasNext()) @@ -347,6 +350,8 @@ else if (oldMode == UNLIMITED) work.actors.forEach(runnableScheduler::attachTo); work.actors.forEach(a -> a.forEach(Action::setConsequence)); work.actors.forEach(this::add); + + step++; return true; } diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 4b71603565da..5e190f23b550 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -52,6 +52,7 @@ import org.apache.cassandra.distributed.api.IIsolatedExecutor.SerializableBiConsumer; import org.apache.cassandra.distributed.api.IIsolatedExecutor.SerializableConsumer; import org.apache.cassandra.distributed.api.IIsolatedExecutor.SerializableRunnable; +import org.apache.cassandra.distributed.impl.ClusterIDDefiner; import org.apache.cassandra.distributed.impl.DirectStreamingConnectionFactory; import org.apache.cassandra.distributed.impl.InstanceConfig; import org.apache.cassandra.distributed.impl.InstanceIDDefiner; @@ -775,7 +776,8 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, .set("disk_access_mode", "standard") .set("failure_detector", SimulatedFailureDetector.Instance.class.getName()) .set("commitlog_compression", new ParameterizedClass(LZ4Compressor.class.getName(), emptyMap())) - .set("commitlog_sync", "batch"); + .set("commitlog_sync", "batch") + .set("lwt_strategy", builder.lwtStrategy); // TODO: Add remove() to IInstanceConfig if (config instanceof InstanceConfig) { @@ -791,6 +793,11 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, @Override public void initialise(ClassLoader classLoader, ThreadGroup threadGroup, int num, int generation) { + IsolatedExecutor.transferAdhoc((IIsolatedExecutor.SerializableConsumer) ClusterIDDefiner::setId, classLoader) + .accept(threadGroup.getParent().getName()); + IsolatedExecutor.transferAdhoc((IIsolatedExecutor.SerializableConsumer) InstanceIDDefiner::setInstanceId, classLoader) + .accept(num); + List onShutdown = new ArrayList<>(); IsolatedExecutor.transferAdhoc((SerializableConsumer) InstanceIDDefiner::setInstanceId, classLoader) .accept(num); diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java index 511f443ceb14..798c4d45ae6c 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java @@ -25,6 +25,7 @@ import java.util.Optional; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import java.util.function.ToDoubleFunction; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -51,6 +52,7 @@ import org.apache.cassandra.simulator.systems.InterceptorOfGlobalMethods; import org.apache.cassandra.simulator.utils.ChanceRange; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Hex; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -75,7 +77,10 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.PAXOS_REPAIR_RETRY_TIMEOUT_IN_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.RING_DELAY; import static org.apache.cassandra.config.CassandraRelevantProperties.SHUTDOWN_ANNOUNCE_DELAY_IN_MS; +import static org.apache.cassandra.config.CassandraRelevantProperties.SIMULATOR_STARTED; import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_AUTH_DEFAULT_RF; +import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_CASSANDRA_SUITENAME; +import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_CASSANDRA_TESTTAG; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_JVM_DTEST_DISABLE_SSL; import static org.apache.cassandra.simulator.debug.Reconcile.reconcileWith; import static org.apache.cassandra.simulator.debug.Record.record; @@ -135,6 +140,7 @@ public static void beforeAll() IGNORE_MISSING_NATIVE_FILE_HINTS.setBoolean(true); ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION.setBoolean(true); TEST_JVM_DTEST_DISABLE_SSL.setBoolean(true); // to support easily running without netty from dtest-jar + SIMULATOR_STARTED.setString(Long.toString(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); if (Thread.currentThread() instanceof InterceptibleThread); // load InterceptibleThread class to avoid infinite loop in InterceptorOfGlobalMethods new InterceptedWait.CaptureSites(Thread.currentThread()) @@ -344,8 +350,11 @@ public void run(B builder) throws IOException { long seed = parseHex(Optional.ofNullable(this.seed)).orElse(new Random(System.nanoTime()).nextLong()); SeedDefiner.setSeed(seed); - logger(); beforeAll(); + // TODO (expected): this doesn't work properly for multiple seeds in a single JVM + TEST_CASSANDRA_TESTTAG.setString("simulator"); + TEST_CASSANDRA_SUITENAME.setString(SIMULATOR_STARTED.getString() + '-' + CassandraRelevantProperties.SIMULATOR_SEED.getString()); + logger(); Thread.setDefaultUncaughtExceptionHandler((th, e) -> { boolean isInterrupt = false; Throwable t = e; @@ -378,6 +387,7 @@ protected static class Run> extends Basic protected void run(long seed, B builder) throws IOException { logger().error("Seed 0x{}", Long.toHexString(seed)); + logger().info("Cassandra {} / {}", FBUtilities.getReleaseVersionString(), FBUtilities.getGitSHA()); try (ClusterSimulation cluster = builder.create(seed)) { @@ -456,6 +466,16 @@ public void run(B builder) throws IOException } } + @Command(name = "version", description = "Display version information") + protected static class VersionCommand> implements ICommand + { + @Override + public void run(B builder) throws IOException + { + System.out.println(FBUtilities.getReleaseVersionString()); + System.out.println(FBUtilities.getGitSHA()); + } + } public static Optional parseHex(Optional value) { diff --git a/test/simulator/main/org/apache/cassandra/simulator/debug/Record.java b/test/simulator/main/org/apache/cassandra/simulator/debug/Record.java index 8f449f18caff..54b335175359 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/debug/Record.java +++ b/test/simulator/main/org/apache/cassandra/simulator/debug/Record.java @@ -46,6 +46,7 @@ import org.apache.cassandra.simulator.systems.SimulatedTime; import org.apache.cassandra.utils.Closeable; import org.apache.cassandra.utils.CloseableIterator; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.concurrent.Threads; import static org.apache.cassandra.io.util.File.WriteMode.OVERWRITE; @@ -58,7 +59,7 @@ public class Record { private static final Logger logger = LoggerFactory.getLogger(Record.class); private static final Pattern NORMALISE_THREAD_RECORDING_OUT = Pattern.compile("(Thread\\[[^]]+:[0-9]+),[0-9](,node[0-9]+)_[0-9]+]"); - private static final Pattern NORMALISE_LAMBDA = Pattern.compile("((\\$\\$Lambda\\$[0-9]+/[0-9]+)?(@[0-9a-f]+)?)"); + private static final Pattern NORMALISE_LAMBDA = Pattern.compile("((\\$\\$Lambda\\$[0-9]+/(0x)?[a-f0-9]+)?(@[0-9a-f]+)?)"); public static void record(String saveToDir, long seed, RecordOption withRng, RecordOption withTime, ClusterSimulation.Builder builder) { @@ -81,6 +82,7 @@ else if (withTime == VALUE) if (builder.capture().wakeSites) modifiers.add("WakeSites"); logger.error("Seed 0x{} ({}) (With: {})", Long.toHexString(seed), eventFile, modifiers); + logger.info("Cassandra {} / {}", FBUtilities.getReleaseVersionString(), FBUtilities.getGitSHA()); } try (PrintWriter eventOut = new PrintWriter(new GZIPOutputStream(eventFile.newOutputStream(OVERWRITE), 1 << 16)); diff --git a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java index 72bc99fba76c..e00924cd2783 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java +++ b/test/simulator/main/org/apache/cassandra/simulator/debug/SelfReconcile.java @@ -43,6 +43,7 @@ import org.apache.cassandra.simulator.systems.InterceptorOfConsequences; import org.apache.cassandra.simulator.systems.SimulatedTime; import org.apache.cassandra.utils.CloseableIterator; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import org.apache.cassandra.utils.memory.HeapPool; @@ -248,6 +249,7 @@ public void accept(String kind, long value) public static void reconcileWithSelf(long seed, RecordOption withRng, RecordOption withTime, boolean withAllocations, ClusterSimulation.Builder builder) { logger.error("Seed 0x{}", Long.toHexString(seed)); + logger.info("Cassandra {} / {}", FBUtilities.getReleaseVersionString(), FBUtilities.getGitSHA()); InterceptReconciler reconciler = new InterceptReconciler(withRng == WITH_CALLSITES); if (withRng != NONE) builder.random(reconciler); diff --git a/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java index 1c522f11bf74..92066c182470 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/logging/RunStartDefiner.java @@ -18,8 +18,7 @@ package org.apache.cassandra.simulator.logging; -import java.util.concurrent.TimeUnit; - +import accord.utils.Invariants; import ch.qos.logback.core.PropertyDefinerBase; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -27,8 +26,7 @@ public class RunStartDefiner extends PropertyDefinerBase { static { - if (CassandraRelevantProperties.SIMULATOR_STARTED.getString() == null) - CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); + Invariants.checkState(CassandraRelevantProperties.SIMULATOR_STARTED.getString() != null); } @Override diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java index 5bfb218c7edf..ca6988ed59a4 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AbstractPairOfSequencesPaxosSimulation.java @@ -39,11 +39,10 @@ import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.api.LogAction; import org.apache.cassandra.distributed.api.LogResult; -import org.apache.cassandra.distributed.impl.FileLogAction; import org.apache.cassandra.distributed.impl.Instance; import org.apache.cassandra.distributed.shared.Metrics; -import org.apache.cassandra.io.util.File; import org.apache.cassandra.simulator.Action; import org.apache.cassandra.simulator.ActionList; import org.apache.cassandra.simulator.ActionPlan; @@ -53,8 +52,6 @@ import org.apache.cassandra.simulator.RunnableActionScheduler; import org.apache.cassandra.simulator.cluster.ClusterActions; import org.apache.cassandra.simulator.cluster.KeyspaceActions; -import org.apache.cassandra.simulator.logging.RunStartDefiner; -import org.apache.cassandra.simulator.logging.SeedDefiner; import org.apache.cassandra.simulator.systems.SimulatedActionTask; import org.apache.cassandra.simulator.systems.SimulatedSystems; import org.apache.cassandra.simulator.utils.IntRange; @@ -131,11 +128,7 @@ protected Action checkErrorLogs(IInvokableInstance inst) @Override protected ActionList performSimple() { - // can't use inst.logs as that runs in the class loader, which uses in-memory file system - String suite = new RunStartDefiner().getPropertyValue() + "-" + new SeedDefiner().getPropertyValue(); - String instanceId = "node" + inst.config().num(); - File logFile = new File(String.format("build/test/logs/simulator/%s/%s/system.log", suite, instanceId)); - FileLogAction logs = new FileLogAction(logFile); + LogAction logs = inst.logs(); LogResult> errors = logs.grepForErrors(); if (!errors.getResult().isEmpty()) diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java index 78e04454faba..a75a1ef4610f 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java @@ -43,7 +43,10 @@ public AccordClusterSimulation create(long seed) throws IOException public void applyHandicaps() { /** - * TODO: remove after partial replication patch + * TODO (required): remove + * We currently require coordinators to have a CommandStore to coordinate a query, but not every node + * is a replica under standard simulation + * * The current homekey implementation isn't compatible with the C* commands per key implementation when * a non-replica coordinates a query. * diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java index f4bd21aaf980..f14ae9daa1f1 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java @@ -21,12 +21,22 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; +import org.junit.BeforeClass; + import io.airlift.airline.Cli; import io.airlift.airline.Command; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.simulator.SimulationRunner; +import org.apache.cassandra.utils.StorageCompatibilityMode; public class AccordSimulationRunner extends SimulationRunner { + @BeforeClass + public static void beforeAll() + { + CassandraRelevantProperties.JUNIT_STORAGE_COMPATIBILITY_MODE.setString(StorageCompatibilityMode.NONE.toString()); + } + @Command(name = "run") public static class Run extends SimulationRunner.Run { @@ -35,6 +45,7 @@ public Run() {} @Override protected void run(long seed, AccordClusterSimulation.Builder builder) throws IOException { + beforeAll(); builder.applyHandicaps(); super.run(seed, builder); } @@ -44,12 +55,28 @@ protected void run(long seed, AccordClusterSimulation.Builder builder) throws IO public static class Record extends SimulationRunner.Record { public Record() {} + + @Override + protected void run(long seed, AccordClusterSimulation.Builder builder) throws IOException + { + beforeAll(); + builder.applyHandicaps(); + super.run(seed, builder); + } } @Command(name = "reconcile") public static class Reconcile extends SimulationRunner.Reconcile { public Reconcile() {} + + @Override + protected void run(long seed, AccordClusterSimulation.Builder builder) throws IOException + { + beforeAll(); + builder.applyHandicaps(); + super.run(seed, builder); + } } public static class Help extends HelpCommand {} diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java index fc929a9460a6..8d6c8a0dcc35 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java @@ -36,6 +36,7 @@ import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.cursors.IntCursor; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.rows.Cell; @@ -139,7 +140,10 @@ public PairOfSequencesAccordSimulation(SimulatedSystems simulated, seed, primaryKeys, runForNanos, jitter); this.writeRatio = 1F - readRatio; - validator = new LoggingHistoryValidator(new StrictSerializabilityValidator(primaryKeys)); + HistoryValidator validator = new StrictSerializabilityValidator(primaryKeys); + if (CassandraRelevantProperties.TEST_HISTORY_VALIDATOR_LOGGING_ENABLED.getBoolean()) + validator = new LoggingHistoryValidator(validator); + this.validator = validator; } @Override diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java index 6c9f683c6186..71734c6e68ed 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java @@ -137,6 +137,7 @@ protected void propagate(PaxosClusterSimulation.Builder builder) } public static class Help extends HelpCommand {} + public static class Version extends VersionCommand {} static void propagateTo(String consistency, boolean withStateCache, boolean withoutStateCache, String variant, String toVariant, PaxosClusterSimulation.Builder builder) { @@ -163,6 +164,7 @@ public static void main(String[] args) throws IOException .withCommand(Run.class) .withCommand(Reconcile.class) .withCommand(Record.class) + .withCommand(Version.class) .withCommand(Help.class) .withDefaultCommand(Help.class) .build() diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java index 626a267a47ff..1fcc4a8354c5 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingMonitors.java @@ -698,10 +698,7 @@ public void preMonitorEnter(Object monitor, float preMonitorDelayChance) { if (!thread.isIntercepting() && disabled) return; else if (!thread.isIntercepting()) - { throw new AssertionError("Thread " + thread + " is running but is not simulated"); - } - checkForDeadlock(thread, state.heldBy); InterceptedMonitorWait wait = new InterceptedMonitorWait(UNBOUNDED_WAIT, 0L, state, thread, captureWaitSite(thread)); diff --git a/test/unit/org/apache/cassandra/journal/TestParams.java b/test/unit/org/apache/cassandra/journal/TestParams.java index 3beb378536bf..7c22e896b5e4 100644 --- a/test/unit/org/apache/cassandra/journal/TestParams.java +++ b/test/unit/org/apache/cassandra/journal/TestParams.java @@ -42,7 +42,7 @@ public FlushMode flushMode() } @Override - public int flushPeriod() + public int flushPeriodMillis() { return 1000; } From 266986b5a2cc7672bc5be57084b483c9fb4af625 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Wed, 7 Feb 2024 15:57:54 +0000 Subject: [PATCH 095/340] Fix (de)serialization of WaitingOn into cache --- .../cassandra/service/accord/AccordKeyspace.java | 4 +--- .../accord/serializers/WaitingOnSerializer.java | 16 +++++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index ddcaed5a0d87..c3d47ce7101e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -823,8 +823,6 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.accepted_ballot, Command::acceptedOrCommitted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - // TODO review this is just to work around Truncated not being committed but having a status after committed - // so status claims it is committed. if (command.isStable() && !command.isTruncated()) { Command.Committed committed = command.asCommitted(); @@ -1292,7 +1290,7 @@ private static WaitingOnProvider deserializeWaitingOn(UntypedResultSet.Row row) try { - return WaitingOnSerializer.deserialize(deps, new DataInputBuffer(bytes, false)); + return WaitingOnSerializer.deserialize(deps, bytes); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 00735ff7e356..930807d7f021 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -102,17 +102,23 @@ private static void serialize(int length, SimpleBitSet write, ByteBuffer out) public static WaitingOn deserialize(Deps deps, ByteBuffer in) throws IOException { int length = (deps.txnIdCount() + 63) / 64; - ImmutableBitSet waitingOnCommit = deserialize(length, in); - ImmutableBitSet waitingOnApply = deserialize(length, in); - ImmutableBitSet appliedOrInvalidated = deserialize(length, in); + int position = in.position(); + ImmutableBitSet waitingOnCommit = deserialize(position, length, in); + position += length*8; + ImmutableBitSet waitingOnApply = deserialize(position, length, in); + position += length*8; + ImmutableBitSet appliedOrInvalidated = deserialize(position, length, in); return new WaitingOn(deps, waitingOnCommit, waitingOnApply, appliedOrInvalidated); } - private static ImmutableBitSet deserialize(int length, ByteBuffer in) + private static ImmutableBitSet deserialize(int position, int length, ByteBuffer in) { long[] bits = new long[length]; for (int i = 0 ; i < length ; ++i) - bits[i] = in.getLong(); + { + bits[i] = in.getLong(position); + position += 8; + } return ImmutableBitSet.SerializationSupport.construct(bits); } } From 4088c68d38f5cbe8e86a7c6fa4630cb973f67d68 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Thu, 15 Feb 2024 17:03:01 -0500 Subject: [PATCH 096/340] Add TODO for TxnQuery using wrong cluster metadata --- src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index 0742071b109d..7afa75de16d8 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -216,6 +216,7 @@ private static boolean transactionIsInMigratingOrMigratedRange(Epoch epoch, Seek // and transaction statement will generate an error when it sees // the RetryOnNewProtocolResult PartitionKey partitionKey = (PartitionKey)keys.get(0); + // TODO (required): This is looking at ClusterMetadata, but not the ClusterMetadata for the specified epoch, just that epoch or later. Need to store ConsensusMigrationState in the global Topologies Accord stores for itself. return ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(epoch, partitionKey.table(), partitionKey.partitionKey()); } } From 763bcf2de58be086e80b40fe370595a11020a795 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Mon, 29 Jan 2024 16:12:49 +0000 Subject: [PATCH 097/340] Fast single-partition "Ephemeral Reads" Introduce a special kind of non-durable read that provides only per-key linearizable isolation; i.e. strict-serializable isolation for single partition-key reads. This read creates only a happens-before edge, by collecting dependencies for execution and ensuring that execution happens strictly after these dependencies have executed, but at no precise time otherwise. So later writes may be witnessed, and if multiple keys are read they may represents different points in time. patch by Benedict; reviewed by Ariel Weisberg for CASSANDRA-19305 Refactor CommandsForKey for efficiency, and to support transitive dependency elision patch by Benedict; reviewed by Aleksey Yeshchenko for CASSANDRA-19310 --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 5 + .../cassandra/config/DatabaseDescriptor.java | 44 +- .../cql3/statements/TransactionStatement.java | 7 +- .../cassandra/db/DiskBoundaryManager.java | 2 +- .../db/compaction/CompactionIterator.java | 40 +- .../db/virtual/PartitionKeyStatsTable.java | 2 +- .../cassandra/dht/ByteOrderedPartitioner.java | 8 +- .../apache/cassandra/dht/IPartitioner.java | 4 +- .../cassandra/dht/Murmur3Partitioner.java | 2 +- .../dht/OrderPreservingPartitioner.java | 2 +- .../cassandra/dht/RandomPartitioner.java | 2 +- .../org/apache/cassandra/dht/Splitter.java | 14 +- src/java/org/apache/cassandra/dht/Token.java | 2 +- .../io/sstable/format/bti/BtiTableReader.java | 2 +- src/java/org/apache/cassandra/net/Verb.java | 8 +- .../cassandra/service/StorageProxy.java | 13 +- .../service/accord/AccordCachingState.java | 5 - .../service/accord/AccordCommandStore.java | 108 +-- .../service/accord/AccordCommandsForKeys.java | 253 ----- .../accord/AccordConfigurationService.java | 2 +- .../service/accord/AccordKeyspace.java | 249 ++--- .../service/accord/AccordMessageSink.java | 5 +- .../service/accord/AccordObjectSizes.java | 42 +- .../accord/AccordSafeCommandStore.java | 253 +---- .../accord/AccordSafeCommandsForKey.java | 5 +- .../AccordSafeCommandsForKeyUpdate.java | 122 --- .../service/accord/AccordService.java | 18 +- .../service/accord/AccordStateCache.java | 2 - .../service/accord/CommandsForKeyUpdate.java | 101 -- .../service/accord/CommandsForRanges.java | 221 +++-- .../cassandra/service/accord/TokenRange.java | 2 +- .../service/accord/api/AccordRoutingKey.java | 4 +- .../service/accord/async/AsyncLoader.java | 16 +- .../service/accord/async/AsyncOperation.java | 17 +- .../accord/interop/AccordInteropAdapter.java | 121 +++ .../accord/interop/AccordInteropApply.java | 20 +- .../accord/interop/AccordInteropCommit.java | 9 +- .../interop/AccordInteropExecution.java | 39 +- .../accord/interop/AccordInteropPersist.java | 38 +- .../accord/interop/AccordInteropRead.java | 40 +- .../interop/AccordInteropReadRepair.java | 46 +- .../accord/serializers/ApplySerializers.java | 10 +- .../serializers/CommandSerializers.java | 61 +- .../serializers/CommandsForKeySerializer.java | 892 +++++++++++++++--- .../accord/serializers/CommitSerializers.java | 37 +- .../accord/serializers/DepsSerializer.java | 83 +- .../accord/serializers/FetchSerializers.java | 20 +- .../GetEphmrlReadDepsSerializers.java | 85 ++ .../GetMaxConflictSerializers.java | 85 ++ .../IVersionedWithKeysSerializer.java | 414 ++++++++ .../accord/serializers/KeySerializers.java | 4 + .../serializers/ListenerSerializers.java | 69 +- .../serializers/ReadDataSerializers.java | 140 ++- .../serializers/SetDurableSerializers.java | 14 +- .../service/accord/txn/TxnWrite.java | 2 +- .../reads/repair/BlockingReadRepair.java | 2 +- .../cassandra/utils/ByteBufferUtil.java | 99 ++ .../cassandra/utils/vint/VIntCoding.java | 59 ++ .../test/accord/AccordMigrationTest.java | 2 +- .../paxos/StrictSerializabilityValidator.java | 2 +- .../cassandra/db/DiskBoundaryManagerTest.java | 2 +- .../db/compaction/CancelCompactionsTest.java | 2 +- .../CompactionAccordIteratorsTest.java | 91 +- .../db/compaction/CompactionsBytemanTest.java | 2 +- .../db/compaction/ShardManagerTest.java | 2 +- .../UnifiedCompactionStrategyTest.java | 6 +- .../cassandra/dht/LengthPartitioner.java | 2 +- .../apache/cassandra/dht/SplitterTest.java | 24 +- .../index/sai/cql/VectorTypeTest.java | 2 +- .../disk/v1/InvertedIndexSearcherTest.java | 2 +- .../index/sai/disk/v1/SegmentTest.java | 2 +- .../cassandra/repair/RepairJobTest.java | 2 +- .../cassandra/repair/ValidationTaskTest.java | 2 +- .../accord/AccordCommandStoreTest.java | 140 +-- .../service/accord/AccordCommandTest.java | 23 +- .../service/accord/AccordKeyspaceTest.java | 2 +- .../service/accord/AccordMessageSinkTest.java | 11 +- .../service/accord/AccordTestUtils.java | 8 - .../service/accord/AccordTopologyTest.java | 8 +- .../service/accord/api/AccordKeyTest.java | 2 +- .../service/accord/async/AsyncLoaderTest.java | 77 +- .../accord/async/AsyncOperationTest.java | 31 +- .../CommandsForKeySerializerTest.java | 455 ++++++++- 84 files changed, 2954 insertions(+), 1921 deletions(-) delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordCommandsForKeys.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java delete mode 100644 src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java create mode 100644 src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java diff --git a/modules/accord b/modules/accord index 3789c5bfec50..6b8bef48e578 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3789c5bfec50eb96157c0a55af77f78ee0cac804 +Subproject commit 6b8bef48e5780aefda6bd1ff29a6290e56ede438 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index d821c31e87ca..9eb8a1d0be9f 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -44,4 +44,9 @@ public class AccordSpec public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); public volatile DurationSpec fast_path_update_delay = new DurationSpec.IntSecondsBound(5); + + public volatile DurationSpec schedule_durability_frequency = new DurationSpec.IntSecondsBound(15); + public volatile DurationSpec durability_txnid_lag = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec shard_durability_cycle = new DurationSpec.IntMinutesBound(2); + public volatile DurationSpec global_durability_cycle = new DurationSpec.IntMinutesBound(10); } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index f4eccb000ac0..246df534e82e 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5318,9 +5318,49 @@ public static long getAccordFastPathUpdateDelayMillis() return conf.accord.fast_path_update_delay.to(TimeUnit.MILLISECONDS); } - public static void setAccordFastPathUpdateDelayMillis(long millis) + public static void setAccordFastPathUpdateDelaySeconds(long seconds) { - conf.accord.fast_path_update_delay = new DurationSpec.IntMillisecondsBound(millis); + conf.accord.fast_path_update_delay = new DurationSpec.IntSecondsBound(seconds); + } + + public static long getAccordScheduleDurabilityFrequency(TimeUnit unit) + { + return conf.accord.schedule_durability_frequency.to(unit); + } + + public static void setAccordScheduleDurabilityFrequencySeconds(long seconds) + { + conf.accord.schedule_durability_frequency = new DurationSpec.IntSecondsBound(seconds); + } + + public static long getAccordScheduleDurabilityTxnIdLag(TimeUnit unit) + { + return conf.accord.durability_txnid_lag.to(unit); + } + + public static void setAccordScheduleDurabilityTxnIdLagSeconds(long seconds) + { + conf.accord.durability_txnid_lag = new DurationSpec.IntSecondsBound(seconds); + } + + public static long getAccordGlobalDurabilityCycle(TimeUnit unit) + { + return conf.accord.global_durability_cycle.to(unit); + } + + public static void setAccordGlobalDurabilityCycleSeconds(long seconds) + { + conf.accord.global_durability_cycle = new DurationSpec.IntSecondsBound(seconds); + } + + public static long getAccordShardDurabilityCycle(TimeUnit unit) + { + return conf.accord.shard_durability_cycle.to(unit); + } + + public static void setAccordShardDurabilityCycleSeconds(long seconds) + { + conf.accord.shard_durability_cycle = new DurationSpec.IntSecondsBound(seconds); } public static boolean getForceNewPreparedStatementBehaviour() diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index bb9180130bf5..f1a8872f07b3 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -81,6 +81,10 @@ import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; +import static accord.primitives.Txn.Kind.EphemeralRead; +import static accord.primitives.Txn.Kind.Read; +import static org.apache.cassandra.config.Config.NonSerialWriteStrategy.accord; +import static org.apache.cassandra.config.DatabaseDescriptor.getNonSerialWriteStrategy; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; @@ -322,7 +326,8 @@ public Txn createTxn(ClientState state, QueryOptions options) List reads = createNamedReads(options, state, ImmutableMap.of(), keySet::add); Keys txnKeys = toKeys(keySet); TxnRead read = createTxnRead(reads, txnKeys, null); - return new Txn.InMemory(txnKeys, read, TxnQuery.ALL); + Txn.Kind kind = txnKeys.size() == 1 && getNonSerialWriteStrategy() == accord ? EphemeralRead : Read; + return new Txn.InMemory(kind, txnKeys, read, TxnQuery.ALL, null); } else { diff --git a/src/java/org/apache/cassandra/db/DiskBoundaryManager.java b/src/java/org/apache/cassandra/db/DiskBoundaryManager.java index 5c6b59a0b165..4aad348e4ac4 100644 --- a/src/java/org/apache/cassandra/db/DiskBoundaryManager.java +++ b/src/java/org/apache/cassandra/db/DiskBoundaryManager.java @@ -195,7 +195,7 @@ private static List getDiskBoundaries(RangesAtEndpoint replic List diskBoundaries = new ArrayList<>(); for (int i = 0; i < boundaries.size() - 1; i++) diskBoundaries.add(boundaries.get(i).maxKeyBound()); - diskBoundaries.add(partitioner.getMaximumToken().maxKeyBound()); + diskBoundaries.add(partitioner.getMaximumTokenForSplitting().maxKeyBound()); return diskBoundaries; } } diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 74836f845656..69230fd2ef56 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -98,10 +98,10 @@ import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.maybeDropTruncatedCommandColumns; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeysAccessor; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_executed_micros; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_executed_timestamp; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_write_timestamp; -import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.max_timestamp; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows.truncateTimestampsForKeyRow; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeDurabilityOrNull; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeRouteOrNull; @@ -221,11 +221,8 @@ private Transformation purger(ColumnFamilyStore cfs, Supp if (isAccordTimestampsForKey(cfs)) return new AccordTimestampsForKeyPurger(accordService); - if (isAccordDepsCommandsForKey(cfs)) - return new AccordCommandsForKeyPurger(AccordKeyspace.DepsCommandsForKeysAccessor, accordService); - - if (isAccordAllCommandsForKey(cfs)) - return new AccordCommandsForKeyPurger(AccordKeyspace.AllCommandsForKeysAccessor, accordService); + if (isAccordCommandsForKey(cfs)) + return new AccordCommandsForKeyPurger(AccordKeyspace.CommandsForKeysAccessor, accordService); throw new IllegalArgumentException("Unhandled accord table: " + cfs.keyspace.getName() + '.' + cfs.name); } @@ -929,22 +926,14 @@ protected Row applyToRow(Row row) lastWriteCell = null; } - Cell maxTimestampCell = row.getCell(max_timestamp); - Timestamp max_timestamp = deserializeTimestampOrNull(maxTimestampCell); - if (max_timestamp != null && max_timestamp.compareTo(redundantBeforeTxnId) < 0) - { - maxTimestampCell = null; - } - // No need to emit a tombstone as earlier versions of the row will also be nulled out // when compacted later or loaded into a commands for key if (lastExecuteMicrosCell == null && lastExecuteCell == null && - lastWriteCell == null && - maxTimestampCell == null) + lastWriteCell == null) return null; - return truncateTimestampsForKeyRow(nowInSec, row, lastExecuteMicrosCell, lastExecuteCell, lastWriteCell, maxTimestampCell); + return truncateTimestampsForKeyRow(nowInSec, row, lastExecuteMicrosCell, lastExecuteCell, lastWriteCell); } @Override @@ -990,11 +979,10 @@ protected Row applyToRow(Row row) return row; TxnId redundantBeforeTxnId = redundantBeforeEntry.shardRedundantBefore(); - Timestamp timestamp = accessor.getTimestamp(row); - if (timestamp != null && timestamp.compareTo(redundantBeforeTxnId) < 0) - return null; + if (redundantBeforeTxnId.equals(TxnId.NONE)) + return row; - return row; + return CommandsForKeysAccessor.withoutRedundantCommands(partitionKey, row, redundantBeforeTxnId); } @Override @@ -1050,8 +1038,7 @@ private static boolean requiresAccordSpecificPurger(ColumnFamilyStore cfs) return cfs.getKeyspaceName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME) && ImmutableSet.of(AccordKeyspace.COMMANDS, AccordKeyspace.TIMESTAMPS_FOR_KEY, - AccordKeyspace.DEPS_COMMANDS_FOR_KEY, - AccordKeyspace.ALL_COMMANDS_FOR_KEY) + AccordKeyspace.COMMANDS_FOR_KEY) .contains(cfs.getTableName()); } @@ -1070,13 +1057,8 @@ private static boolean isAccordTimestampsForKey(ColumnFamilyStore cfs) return isAccordTable(cfs, AccordKeyspace.TIMESTAMPS_FOR_KEY); } - private static boolean isAccordDepsCommandsForKey(ColumnFamilyStore cfs) - { - return isAccordTable(cfs, AccordKeyspace.DEPS_COMMANDS_FOR_KEY); - } - - private static boolean isAccordAllCommandsForKey(ColumnFamilyStore cfs) + private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs) { - return isAccordTable(cfs, AccordKeyspace.ALL_COMMANDS_FOR_KEY); + return isAccordTable(cfs, AccordKeyspace.COMMANDS_FOR_KEY); } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/virtual/PartitionKeyStatsTable.java b/src/java/org/apache/cassandra/db/virtual/PartitionKeyStatsTable.java index d114e5faa763..550743c6a734 100644 --- a/src/java/org/apache/cassandra/db/virtual/PartitionKeyStatsTable.java +++ b/src/java/org/apache/cassandra/db/virtual/PartitionKeyStatsTable.java @@ -284,7 +284,7 @@ private AbstractBounds getBounds(TableMetadata target, Cluste { Slices s = clusteringIndexFilter.getSlices(target); Token startToken = target.partitioner.getMinimumToken(); - Token endToken = target.partitioner.getMaximumToken(); + Token endToken = target.partitioner.getMaximumTokenForSplitting(); BigInteger startTokenValue = new BigInteger(endToken.getTokenValue().toString(), 10); BigInteger endTokenValue = new BigInteger(startToken.getTokenValue().toString(), 10); diff --git a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java index 9e3c3cf848c8..d49d0f6d0368 100644 --- a/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java +++ b/src/java/org/apache/cassandra/dht/ByteOrderedPartitioner.java @@ -144,6 +144,12 @@ public double size(Token next) @Override public Token nextValidToken() + { + throw new UnsupportedOperationException(String.format("Token type %s does not support token allocation.", + getClass().getSimpleName())); + } + + public Token increaseSlightly() { // find first byte we can increment int i = token.length - 1; @@ -180,8 +186,6 @@ public Token decreaseSlightly() if (i == -1) { byte[] newToken = Arrays.copyOf(token, token.length - 1); - if (newToken.length > 0) - newToken[newToken.length - 1] = (byte)-1; return new BytesToken(newToken); } diff --git a/src/java/org/apache/cassandra/dht/IPartitioner.java b/src/java/org/apache/cassandra/dht/IPartitioner.java index b543fab33ff1..0c4ae9dbb384 100644 --- a/src/java/org/apache/cassandra/dht/IPartitioner.java +++ b/src/java/org/apache/cassandra/dht/IPartitioner.java @@ -68,9 +68,9 @@ static IPartitioner global() * The biggest token for this partitioner, unlike getMinimumToken, this token is actually used and users wanting to * include all tokens need to do getMaximumToken().maxKeyBound() * - * Not implemented for the ordered partitioners + * THIS IS NOT SAFE FOR PURPOSES BESIDES SPLITTING/BALANCING */ - default Token getMaximumToken() + default Token getMaximumTokenForSplitting() { throw new UnsupportedOperationException("If you are using a splitting partitioner, getMaximumToken has to be implemented"); } diff --git a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java index 410dbfcd0b00..2885cd732bd3 100644 --- a/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java +++ b/src/java/org/apache/cassandra/dht/Murmur3Partitioner.java @@ -468,7 +468,7 @@ public AbstractType getTokenValidator() return LongType.instance; } - public Token getMaximumToken() + public Token getMaximumTokenForSplitting() { return new LongToken(Long.MAX_VALUE); } diff --git a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java index 741a2b0c7f8d..ea38af129fb7 100644 --- a/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java +++ b/src/java/org/apache/cassandra/dht/OrderPreservingPartitioner.java @@ -140,7 +140,7 @@ public StringToken getMinimumToken() return MINIMUM; } - public StringToken getMaximumToken() + public StringToken getMaximumTokenForSplitting() { return MAXIMUM; } diff --git a/src/java/org/apache/cassandra/dht/RandomPartitioner.java b/src/java/org/apache/cassandra/dht/RandomPartitioner.java index a21815c0c5fb..38df59549fe6 100644 --- a/src/java/org/apache/cassandra/dht/RandomPartitioner.java +++ b/src/java/org/apache/cassandra/dht/RandomPartitioner.java @@ -374,7 +374,7 @@ public Map describeOwnership(List sortedTokens) return ownerships; } - public Token getMaximumToken() + public Token getMaximumTokenForSplitting() { return new BigIntegerToken(MAXIMUM); } diff --git a/src/java/org/apache/cassandra/dht/Splitter.java b/src/java/org/apache/cassandra/dht/Splitter.java index 3f9d663b7e5b..165a2ecf9bd6 100644 --- a/src/java/org/apache/cassandra/dht/Splitter.java +++ b/src/java/org/apache/cassandra/dht/Splitter.java @@ -50,7 +50,7 @@ protected BigInteger tokensInRange(Range range) { //full range case if (range.left.equals(range.right)) - return tokensInRange(new Range(partitioner.getMinimumToken(), partitioner.getMaximumToken())); + return tokensInRange(new Range(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting())); BigInteger totalTokens = BigInteger.ZERO; for (Range unwrapped : range.unwrap()) @@ -95,7 +95,7 @@ public double positionInRange(Token token, Range range) { //full range case if (range.left.equals(range.right)) - return positionInRange(token, new Range(partitioner.getMinimumToken(), partitioner.getMaximumToken())); + return positionInRange(token, new Range(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting())); // leftmost token means we are on position 0.0 if (token.equals(range.left)) @@ -115,7 +115,7 @@ public double positionInRange(Token token, Range range) public List splitOwnedRanges(int parts, List weightedRanges, boolean dontSplitRanges) { if (weightedRanges.isEmpty() || parts == 1) - return Collections.singletonList(partitioner.getMaximumToken()); + return Collections.singletonList(partitioner.getMaximumTokenForSplitting()); BigInteger totalTokens = BigInteger.ZERO; for (WeightedRange weightedRange : weightedRanges) @@ -126,7 +126,7 @@ public List splitOwnedRanges(int parts, List weightedRange BigInteger perPart = totalTokens.divide(BigInteger.valueOf(parts)); // the range owned is so tiny we can't split it: if (perPart.equals(BigInteger.ZERO)) - return Collections.singletonList(partitioner.getMaximumToken()); + return Collections.singletonList(partitioner.getMaximumTokenForSplitting()); if (dontSplitRanges) return splitOwnedRangesNoPartialRanges(weightedRanges, perPart, parts); @@ -155,7 +155,7 @@ else if (partsLeft == 1) } sum = sum.add(currentRangeWidth); } - boundaries.set(boundaries.size() - 1, partitioner.getMaximumToken()); + boundaries.set(boundaries.size() - 1, partitioner.getMaximumTokenForSplitting()); assert boundaries.size() == parts : boundaries.size() + "!=" + parts + " " + boundaries + ":" + weightedRanges; return boundaries; @@ -192,7 +192,7 @@ private List splitOwnedRangesNoPartialRanges(List weighted } i++; } - boundaries.add(partitioner.getMaximumToken()); + boundaries.add(partitioner.getMaximumTokenForSplitting()); return boundaries; } @@ -202,7 +202,7 @@ private List splitOwnedRangesNoPartialRanges(List weighted */ private Token token(Token t) { - return t.equals(partitioner.getMinimumToken()) ? partitioner.getMaximumToken() : t; + return t.equals(partitioner.getMinimumToken()) ? partitioner.getMaximumTokenForSplitting() : t; } /** diff --git a/src/java/org/apache/cassandra/dht/Token.java b/src/java/org/apache/cassandra/dht/Token.java index 7cbe0ebccb77..1df7171f9c95 100644 --- a/src/java/org/apache/cassandra/dht/Token.java +++ b/src/java/org/apache/cassandra/dht/Token.java @@ -265,7 +265,7 @@ public long getLongValue() public Token increaseSlightly() { return nextValidToken(); } /** - * Returns a token that is slightly less than this. + * Returns a token that is slightly less than this. This is NOT guaranteed to be the directly preceding token. */ abstract public Token decreaseSlightly(); diff --git a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java index 39160639584a..9807e0255f00 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java @@ -130,7 +130,7 @@ public KeyReader keyReader(PartitionPosition key) throws IOException { return PartitionIterator.create(partitionIndex, metadata().partitioner, rowIndexFile, dfile, key, -1, - metadata().partitioner.getMaximumToken().maxKeyBound(), 0, + metadata().partitioner.getMaximumTokenForSplitting().maxKeyBound(), 0, descriptor.version); } diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 97e77eccbbff..9b9aee034454 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -93,6 +93,8 @@ import org.apache.cassandra.service.accord.serializers.EnumSerializer; import org.apache.cassandra.service.accord.serializers.FetchSerializers; import org.apache.cassandra.service.accord.serializers.GetDepsSerializers; +import org.apache.cassandra.service.accord.serializers.GetEphmrlReadDepsSerializers; +import org.apache.cassandra.service.accord.serializers.GetMaxConflictSerializers; import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; import org.apache.cassandra.service.accord.serializers.InformHomeDurableSerializers; import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; @@ -331,6 +333,10 @@ public enum Verb ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), + ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, REQUEST_RESPONSE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), + ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, REQUEST_RESPONSE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), ACCORD_FETCH_DATA_RSP (145, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), ACCORD_FETCH_DATA_REQ (146, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), @@ -340,7 +346,7 @@ public enum Verb ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), - ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ(152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData,() -> AccordSyncPropagator.verbHandler, ACCORD_READ_RSP), + ACCORD_APPLY_AND_WAIT_REQ (152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP), CONSENSUS_KEY_MIGRATION (153, P1, writeTimeout, MUTATION, () -> ConsensusKeyMigrationFinished.serializer,() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 52cc03ef5ac5..4b5fad2d2259 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -50,6 +50,7 @@ import accord.primitives.Keys; import accord.primitives.Txn; +import accord.utils.Invariants; import org.apache.cassandra.batchlog.Batch; import org.apache.cassandra.batchlog.BatchlogManager; import org.apache.cassandra.concurrent.DebuggableTask.RunnableDebuggableTask; @@ -168,10 +169,14 @@ import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static accord.primitives.Txn.Kind.EphemeralRead; +import static accord.primitives.Txn.Kind.Read; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.collect.Iterables.concat; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.config.Config.NonSerialWriteStrategy.accord; +import static org.apache.cassandra.config.DatabaseDescriptor.getNonSerialWriteStrategy; import static org.apache.cassandra.db.ConsistencyLevel.SERIAL; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casWriteMetrics; @@ -1218,7 +1223,7 @@ public static void mutateWithTriggers(List mutations, long size = IMutation.dataSize(mutations); writeMetrics.mutationSize.update(size); writeMetricsForLevel(consistencyLevel).mutationSize.update(size); - NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); + NonSerialWriteStrategy nonSerialWriteStrategy = getNonSerialWriteStrategy(); if (nonSerialWriteStrategy.writesThroughAccord && !SchemaConstants.getSystemKeyspaces().contains(keyspaceName)) mutateWithAccord(augmented != null ? augmented : mutations, consistencyLevel, requestTime, nonSerialWriteStrategy); else if (augmented != null) @@ -1997,9 +2002,11 @@ private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand. SinglePartitionReadCommand readCommand = group.queries.get(0); // If the non-SERIAL write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely - consistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().readCLForStrategy(consistencyLevel); + NonSerialWriteStrategy nonSerialWriteStrategy = getNonSerialWriteStrategy(); + consistencyLevel = nonSerialWriteStrategy.readCLForStrategy(consistencyLevel); TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); - Txn txn = new Txn.InMemory(read.keys(), read, TxnQuery.ALL); + Invariants.checkState(read.keys().size() == 1, "Ephemeral reads are only strict-serializable for single partition reads"); + Txn txn = new Txn.InMemory(nonSerialWriteStrategy == accord ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); IAccordService accordService = AccordService.instance(); accordService.maybeConvertTablesToAccord(txn); TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 43e0a50c7aa8..5175e86c1076 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -121,11 +121,6 @@ public boolean isComplete() return status().isComplete(); } - public boolean canEvict() - { - return true; - } - int estimatedSizeOnHeap(ToLongFunction estimator) { shouldUpdateSize = false; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index c823a6d9258c..36b224b022fb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -27,9 +27,9 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; -import java.util.function.Predicate; import javax.annotation.Nullable; @@ -43,8 +43,7 @@ import accord.api.Key; import accord.api.ProgressLog; import accord.impl.CommandsForKey; -import accord.impl.DomainCommands; -import accord.impl.DomainTimestamps; +import accord.impl.CommandsSummary; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -70,7 +69,6 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.ReducingRangeMap; -import accord.utils.TriFunction; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.Observable; @@ -114,13 +112,10 @@ private static long getThreadId(ExecutorService executor) private final AccordJournal journal; private final ExecutorService executor; private final ExecutionOrder executionOrder; - private final AccordCommandsForKeys keyCoordinator; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; private final AccordStateCache.Instance timestampsForKeyCache; - private final AccordStateCache.Instance depsCommandsForKeyCache; - private final AccordStateCache.Instance allCommandsForKeyCache; - private final AccordStateCache.Instance updatesForKeyCache; + private final AccordStateCache.Instance commandsForKeyCache; private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; @@ -153,7 +148,6 @@ public AccordCommandStore(int id, super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); this.journal = journal; loggingId = String.format("[%s]", id); - keyCoordinator = new AccordCommandsForKeys(this); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); executionOrder = new ExecutionOrder(); threadId = getThreadId(executor); @@ -174,33 +168,15 @@ public AccordCommandStore(int id, this::saveTimestampsForKey, this::validateTimestampsForKey, AccordObjectSizes::timestampsForKey); - depsCommandsForKeyCache = + commandsForKeyCache = stateCache.instance(RoutableKey.class, AccordSafeCommandsForKey.class, AccordSafeCommandsForKey::new, - this::loadDepsCommandsForKey, + this::loadCommandsForKey, this::saveCommandsForKey, - this::validateDepsCommandsForKey, + this::validateCommandsForKey, AccordObjectSizes::commandsForKey, - keyCoordinator::createDepsCommandsNode); - allCommandsForKeyCache = - stateCache.instance(RoutableKey.class, - AccordSafeCommandsForKey.class, - AccordSafeCommandsForKey::new, - this::loadAllCommandsForKey, - this::saveCommandsForKey, - this::validateAllCommandsForKey, - AccordObjectSizes::commandsForKey, - keyCoordinator::createDepsCommandsNode); - updatesForKeyCache = - stateCache.instance(RoutableKey.class, - AccordSafeCommandsForKeyUpdate.class, - AccordSafeCommandsForKeyUpdate::new, - this::loadCommandsForKeyUpdate, - this::saveCommandsForKeyUpdate, - (key, evicting) -> true, - CommandsForKeyUpdate::estimatedSizeOnHeap, - keyCoordinator::createUpdatesNode); + AccordCachingState::new); AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { @@ -340,19 +316,9 @@ public AccordStateCache.Instance depsCommandsForKeyCache() + public AccordStateCache.Instance commandsForKeyCache() { - return depsCommandsForKeyCache; - } - - public AccordStateCache.Instance allCommandsForKeyCache() - { - return allCommandsForKeyCache; - } - - public AccordStateCache.Instance updatesForKeyCache() - { - return updatesForKeyCache; + return commandsForKeyCache; } Command loadCommand(TxnId txnId) @@ -384,39 +350,17 @@ TimestampsForKey loadTimestampsForKey(RoutableKey key) return AccordKeyspace.loadTimestampsForKey(this, (PartitionKey) key); } - CommandsForKey loadDepsCommandsForKey(RoutableKey key) - { - return AccordKeyspace.loadDepsCommandsForKey(this, (PartitionKey) key); - } - - CommandsForKey loadAllCommandsForKey(RoutableKey key) + CommandsForKey loadCommandsForKey(RoutableKey key) { - return AccordKeyspace.loadAllCommandsForKey(this, (PartitionKey) key); + return AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); } - CommandsForKeyUpdate loadCommandsForKeyUpdate(RoutableKey key) + boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) { - throw new IllegalStateException(); - } - - boolean validateDepsCommandsForKey(RoutableKey key, CommandsForKey evicting) - { - CommandsForKey reloaded = AccordKeyspace.loadDepsCommandsForKey(this, (PartitionKey) key); - return Objects.equals(evicting, reloaded); - } - - boolean validateAllCommandsForKey(RoutableKey key, CommandsForKey evicting) - { - CommandsForKey reloaded = AccordKeyspace.loadAllCommandsForKey(this, (PartitionKey) key); + CommandsForKey reloaded = AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); return Objects.equals(evicting, reloaded); } - @Nullable - private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) - { - throw new IllegalStateException(); - } - @Nullable private Runnable saveTimestampsForKey(TimestampsForKey before, TimestampsForKey after) { @@ -425,7 +369,7 @@ private Runnable saveTimestampsForKey(TimestampsForKey before, TimestampsForKey } @Nullable - private Runnable saveCommandsForKeyUpdate(CommandsForKeyUpdate before, CommandsForKeyUpdate after) + private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) { Mutation mutation = AccordKeyspace.getCommandsForKeyMutation(id, after, nextSystemTimestampMicros()); return null != mutation ? mutation::applyUnsafe : null; @@ -523,15 +467,13 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, NavigableMap timestampsForKeys, - NavigableMap depsCommandsForKeys, - NavigableMap allCommandsForKeys, - NavigableMap updatesForKeys) + NavigableMap commandsForKeys) { Invariants.checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); - depsCommandsForKeys.values().forEach(AccordSafeState::preExecute); + commandsForKeys.values().forEach(AccordSafeState::preExecute); timestampsForKeys.values().forEach(AccordSafeState::preExecute); - current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, depsCommandsForKeys, allCommandsForKeys, updatesForKeys, this); + current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, commandsForKeys, this); return current; } @@ -547,7 +489,7 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, TriFunction map, O accumulate, Predicate terminate) + O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) { keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); switch (keysOrRanges.domain()) @@ -555,12 +497,8 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, TriFunction keys = (AbstractKeys) keysOrRanges; - for (CommandsForRanges.DomainInfo summary : commandsForRanges.search(keys)) - { - accumulate = map.apply(summary, summary, accumulate); - if (terminate.test(accumulate)) - return accumulate; - } + for (CommandsSummary summary : commandsForRanges.search(keys)) + accumulate = map.apply(summary, accumulate); } break; case Range: @@ -568,12 +506,10 @@ O mapReduceForRange(Routables keysOrRanges, Ranges slice, TriFunction createDepsCommandsNode(RoutableKey key, int index) - { - return new DepsCommandsCachingState(key, index); - } - - AccordCachingState createAllCommandsNode(RoutableKey key, int index) - { - return new AllCommandsCachingState(key, index); - } - - AccordCachingState createUpdatesNode(RoutableKey key, int index) - { - return new UpdateCachingState(key, index); - } - - protected static boolean hasEvictableStatus(AccordCachingState state) - { - if (state == null) - return true; - - switch (state.status()) - { - case LOADING: - case SAVING: - return false; - } - - return true; - } - - boolean canEvictKey(RoutableKey key) - { - return hasEvictableStatus(commandStore.depsCommandsForKeyCache().getUnsafe(key)) - && hasEvictableStatus(commandStore.allCommandsForKeyCache().getUnsafe(key)) - && hasEvictableStatus(commandStore.updatesForKeyCache().getUnsafe(key)); - } - - public abstract class CommandsCachingState extends AccordCachingState - { - protected CommandsCachingState(RoutableKey key, int index) - { - super(key, index); - } - - private CommandsForKey initializeIfNull(CommandsForKey commands) - { - if (commands != null) - return commands; - return new CommandsForKey((Key) key(), CommandsForKeySerializer.loader); - } - - private State maybeApplyUpdates(State state) - { - if (!(state instanceof Loaded)) - return state; - - Loaded loaded = (Loaded) state; - CommandsForKey commands = loaded.get(); - UpdateCachingState updates = (UpdateCachingState) commandStore.updatesForKeyCache().getUnsafe(key()); - if (updates == null) - return loaded; - - CommandsForKeyUpdate update = updates.getUpdateIfAvailable(); - if (update == null) - return loaded; - CommandsForKey updated = apply(initializeIfNull(commands), update); - if (updated == commands) - return loaded; - - return new Loaded<>(updated); - } - - protected abstract CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update); - - private void maybeApplyUpdates(CommandsForKeyUpdate update) - { - if (status() != Status.LOADED) - return; - - CommandsForKey commands = get(); - CommandsForKey updated = apply(initializeIfNull(commands), update); - if (commands != updated) - super.state(new Loaded<>(updated)); - } - - protected State state(State next) - { - Status nextStatus = next.status(); - Invariants.checkState(nextStatus != Status.MODIFIED && nextStatus != Status.SAVING, - "CommandsForKey cannot have state %s", nextStatus); - - return super.state(maybeApplyUpdates(next)); - } - - @Override - public boolean canEvict() - { - return canEvictKey(key()); - } - } - - public class DepsCommandsCachingState extends CommandsCachingState - { - public DepsCommandsCachingState(RoutableKey key, int index) - { - super(key, index); - } - - protected CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update) - { - return update.applyToDeps(current); - } - } - - public class AllCommandsCachingState extends CommandsCachingState - { - public AllCommandsCachingState(RoutableKey key, int index) - { - super(key, index); - } - - protected CommandsForKey apply(CommandsForKey current, CommandsForKeyUpdate update) - { - return update.applyToAll(current); - } - } - - public class UpdateCachingState extends AccordCachingState implements CommandsForKeyGroupUpdater.Immutable.Factory - { - public UpdateCachingState(RoutableKey key, int index) - { - super(key, index); - } - - public AsyncChain load(ExecutorPlus executor, Function loadFunction) - { - if (status() == Status.UNINITIALIZED) - { - CommandsForKeyUpdate initialized = CommandsForKeyUpdate.empty(key()); - state(state().initialize(initialized)); - return null; - } - - return super.load(executor, loadFunction); - } - - // update in memory cfk data with the update results - protected void maybeUpdateCommands(CommandsForKeyUpdate update, AccordStateCache.Instance cache) - { - CommandsCachingState commands = (CommandsCachingState) cache.getUnsafe(key()); - if (commands == null) - return; - - commands.maybeApplyUpdates(update); - } - - public CommandsForKeyUpdate create(CommandsForKeyUpdater.Immutable deps, CommandsForKeyUpdater.Immutable all, CommandsForKeyUpdater.Immutable common) - { - return new CommandsForKeyUpdate((PartitionKey) key(), deps, all, common); - } - - protected State maybeProcessModification(State next) - { - if (!(next instanceof Modified)) - return next; - - Modified modified = (Modified) next; - - CommandsForKeyUpdate current = modified.current; - maybeUpdateCommands(current, commandStore.depsCommandsForKeyCache()); - maybeUpdateCommands(current, commandStore.allCommandsForKeyCache()); - - // combine in memory updates - current = CommandsForKeyGroupUpdater.Immutable.merge(modified.original, current, this); - - return new Modified<>(null, current); - } - - protected State state(State next) - { - Status nextStatus = next.status(); - Invariants.checkState(nextStatus != Status.LOADING, - "CommandsForKeyUpdate cannot have state %s", nextStatus); - - return super.state(maybeProcessModification(next)); - } - - CommandsForKeyUpdate getUpdateIfAvailable() - { - switch (status()) - { - case LOADED: - case MODIFIED: - return get(); - } - - return null; - } - - @Override - public boolean canEvict() - { - return canEvictKey(key()); - } - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 31565f842370..d2a14a44dbe2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -149,7 +149,7 @@ public synchronized void start() } remoteSyncComplete.forEach(id -> receiveRemoteSyncComplete(id, epoch)); - // TODO (now): disk doesn't get updated until we see our own notification, so there is an edge case where this instance notified others and fails in the middle, but Apply was already sent! This could leave partial closed/redudant accross the cluster + // TODO (required): disk doesn't get updated until we see our own notification, so there is an edge case where this instance notified others and fails in the middle, but Apply was already sent! This could leave partial closed/redudant accross the cluster receiveClosed(closed, epoch); receiveRedundant(redundant, epoch); })); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index c3d47ce7101e..d0a593bf0669 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -24,7 +24,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -34,14 +33,11 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; -import java.util.function.Predicate; import java.util.stream.Collectors; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -49,9 +45,7 @@ import org.slf4j.LoggerFactory; import accord.api.Key; -import accord.impl.CommandTimeseries; import accord.impl.CommandsForKey; -import accord.impl.CommandsForKeyUpdater; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; @@ -153,8 +147,7 @@ import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.MonotonicClock; +import org.apache.cassandra.utils.Clock.Global; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.bytecomparable.ByteComparable; @@ -164,6 +157,8 @@ import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; +import static org.apache.cassandra.db.partitions.PartitionUpdate.singleRowUpdate; +import static org.apache.cassandra.db.rows.BTreeRow.singleCellRow; import static org.apache.cassandra.db.rows.BufferCell.live; import static org.apache.cassandra.db.rows.BufferCell.tombstone; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; @@ -177,14 +172,13 @@ public class AccordKeyspace public static final String COMMANDS = "commands"; public static final String TIMESTAMPS_FOR_KEY = "timestamps_for_key"; - public static final String DEPS_COMMANDS_FOR_KEY = "deps_commands_for_key"; - public static final String ALL_COMMANDS_FOR_KEY = "all_commands_for_key"; + public static final String COMMANDS_FOR_KEY = "commands_for_key"; public static final String TOPOLOGIES = "topologies"; public static final String EPOCH_METADATA = "epoch_metadata"; public static final String COMMAND_STORE_METADATA = "command_store_metadata"; - public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS, TIMESTAMPS_FOR_KEY, DEPS_COMMANDS_FOR_KEY, - ALL_COMMANDS_FOR_KEY, TOPOLOGIES, EPOCH_METADATA, + public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS, TIMESTAMPS_FOR_KEY, COMMANDS_FOR_KEY, + TOPOLOGIES, EPOCH_METADATA, COMMAND_STORE_METADATA); private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); @@ -422,7 +416,6 @@ public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCe + "store_id int, " + "key_token blob, " // can't use "token" as this is restricted word in CQL + format("key %s, ", KEY_TUPLE) - + format("max_timestamp %s, ", TIMESTAMP_TUPLE) + format("last_executed_timestamp %s, ", TIMESTAMP_TUPLE) + "last_executed_micros bigint, " + format("last_write_timestamp %s, ", TIMESTAMP_TUPLE) @@ -439,12 +432,11 @@ public static class TimestampsForKeyColumns static final ColumnMetadata store_id = getColumn(TimestampsForKeys, "store_id"); static final ColumnMetadata key_token = getColumn(TimestampsForKeys, "key_token"); static final ColumnMetadata key = getColumn(TimestampsForKeys, "key"); - public static final ColumnMetadata max_timestamp = getColumn(TimestampsForKeys, "max_timestamp"); public static final ColumnMetadata last_executed_timestamp = getColumn(TimestampsForKeys, "last_executed_timestamp"); public static final ColumnMetadata last_executed_micros = getColumn(TimestampsForKeys, "last_executed_micros"); public static final ColumnMetadata last_write_timestamp = getColumn(TimestampsForKeys, "last_write_timestamp"); - static final Columns columns = Columns.from(Lists.newArrayList(max_timestamp, last_executed_timestamp, last_executed_micros, last_write_timestamp)); + static final Columns columns = Columns.from(Lists.newArrayList(last_executed_timestamp, last_executed_micros, last_write_timestamp)); static ByteBuffer makePartitionKey(int storeId, Key key) { @@ -475,15 +467,6 @@ public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) return deserializeKey(partitionKeyComponents[key.position()]); } - @Nullable - public static Timestamp getMaxTimestamp(Row row) - { - Cell cell = row.getCell(max_timestamp); - if (cell == null) - return null; - return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); - } - @Nullable public static Timestamp getLastExecutedTimestamp(Row row) { @@ -510,12 +493,11 @@ public static Timestamp getLastWriteTimestamp(Row row) return deserializeTimestampOrNull(cell.value(), cell.accessor(), Timestamp::fromBits); } - public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastExecuteMicrosCell, Cell lastExecuteCell, Cell lastWriteCell, Cell maxTimestampCell) + public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastExecuteMicrosCell, Cell lastExecuteCell, Cell lastWriteCell) { checkArgument(lastExecuteMicrosCell == null || lastExecuteMicrosCell.column() == last_executed_micros); checkArgument(lastExecuteCell == null || lastExecuteCell.column() == last_executed_timestamp); checkArgument(lastWriteCell == null || lastWriteCell.column() == last_write_timestamp); - checkArgument(maxTimestampCell == null || maxTimestampCell.column() == max_timestamp); long timestamp = row.primaryKeyLivenessInfo().timestamp(); @@ -526,8 +508,6 @@ public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastE colCount++; if (lastWriteCell != null) colCount++; - if (maxTimestampCell != null) - colCount++; checkState(columns.size() >= colCount, "CommandsForKeyColumns.static_columns_metadata should include all the columns"); Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(colCount); @@ -538,17 +518,14 @@ public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastE if (lastExecuteCell != null) newLeaf[colIndex++] = lastExecuteCell; if (lastWriteCell != null) - newLeaf[colIndex++] = lastWriteCell; - if (maxTimestampCell != null) - newLeaf[colIndex++] = maxTimestampCell; + newLeaf[colIndex] = lastWriteCell; return BTreeRow.create(row.clustering(), LivenessInfo.create(timestamp, nowInSec), Deletion.LIVE, newLeaf); } } - private static final TableMetadata DepsCommandsForKeys = commandsForKeysTable(DEPS_COMMANDS_FOR_KEY); - private static final TableMetadata AllCommandsForKeys = commandsForKeysTable(ALL_COMMANDS_FOR_KEY); + private static final TableMetadata CommandsForKeys = commandsForKeysTable(COMMANDS_FOR_KEY); private static TableMetadata commandsForKeysTable(String tableName) { @@ -558,9 +535,8 @@ private static TableMetadata commandsForKeysTable(String tableName) + "store_id int, " + "key_token blob, " // can't use "token" as this is restricted word in CQL + format("key %s, ", KEY_TUPLE) - + format("timestamp %s, ", TIMESTAMP_TUPLE) + "data blob, " - + "PRIMARY KEY((store_id, key_token, key), timestamp)" + + "PRIMARY KEY((store_id, key_token, key))" + ')') .partitioner(FOR_KEYS_LOCAL_PARTITIONER) .build(); @@ -575,8 +551,6 @@ public static class CommandsForKeyAccessor final ColumnMetadata store_id; final ColumnMetadata key_token; final ColumnMetadata key; - final ColumnMetadata timestamp; - final ColumnMetadata data; final RegularAndStaticColumns columns; @@ -590,7 +564,6 @@ public CommandsForKeyAccessor(TableMetadata table) this.store_id = getColumn(table, "store_id"); this.key_token = getColumn(table, "key_token"); this.key = getColumn(table, "key"); - this.timestamp = getColumn(table, "timestamp"); this.data = getColumn(table, "data"); this.columns = new RegularAndStaticColumns(Columns.NONE, Columns.from(Lists.newArrayList(data))); } @@ -610,15 +583,37 @@ public PartitionKey getKey(ByteBuffer[] partitionKeyComponents) return deserializeKey(partitionKeyComponents[key.position()]); } - @Nullable - public Timestamp getTimestamp(Row row) + public CommandsForKey getCommandsForKey(PartitionKey key, Row row) + { + Cell cell = row.getCell(data); + if (cell == null) + return null; + + return CommandsForKeySerializer.fromBytes(key, cell.buffer()); + } + + // TODO (expected): garbage-free filtering, reusing encoding + public Row withoutRedundantCommands(PartitionKey key, Row row, TxnId redundantBefore) { - return deserializeTimestampOrNull(row.clustering().bufferAt(timestamp.position()), Timestamp::fromBits); + Invariants.checkState(row.columnCount() == 1); + Cell cell = row.getCell(data); + if (cell == null) + return row; + + CommandsForKey current = CommandsForKeySerializer.fromBytes(key, cell.buffer()); + CommandsForKey updated = current.withoutRedundant(redundantBefore); + if (current == updated) + return row; + + if (updated.size() == 0) + return null; + + ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(updated); + return BTreeRow.singleCellRow(Clustering.EMPTY, BufferCell.live(data, cell.timestamp(), buffer)); } } - public static final CommandsForKeyAccessor DepsCommandsForKeysAccessor = new CommandsForKeyAccessor(DepsCommandsForKeys); - public static final CommandsForKeyAccessor AllCommandsForKeysAccessor = new CommandsForKeyAccessor(AllCommandsForKeys); + public static final CommandsForKeyAccessor CommandsForKeysAccessor = new CommandsForKeyAccessor(CommandsForKeys); private static final TableMetadata Topologies = parse(TOPOLOGIES, @@ -677,7 +672,7 @@ public static KeyspaceMetadata metadata() public static Tables tables() { - return Tables.of(Commands, TimestampsForKeys, DepsCommandsForKeys, AllCommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); + return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException @@ -838,7 +833,7 @@ public static Mutation getCommandMutation(int storeId, Command original, Command ByteBuffer key = CommandsColumns.keyComparator.make(storeId, command.txnId().domain().ordinal(), serializeTimestamp(command.txnId())).serializeAsPartitionKey(); - PartitionUpdate update = PartitionUpdate.singleRowUpdate(Commands, key, row); + PartitionUpdate update = singleRowUpdate(Commands, key, row); return new Mutation(update); } catch (IOException e) @@ -972,7 +967,7 @@ public static void findAllCommandsByDomain(int commandStore, Routable.Domain dom private static abstract class TableWalk implements Runnable, DebuggableTask { - private final long creationTimeNanos = Clock.Global.nanoTime(); + private final long creationTimeNanos = Global.nanoTime(); private final Executor executor; private final Observable callback; private long startTimeNanos = -1; @@ -998,7 +993,7 @@ public final void run() try { if (startTimeNanos == -1) - startTimeNanos = Clock.Global.nanoTime(); + startTimeNanos = Global.nanoTime(); numQueries++; UntypedResultSet result = query(lastSeen); if (result.isEmpty()) @@ -1329,7 +1324,6 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); builder.addPrimaryKeyLivenessInfo(livenessInfo); - addCellIfModified(TimestampsForKeyColumns.max_timestamp, TimestampsForKey::max, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); addCellIfModified(TimestampsForKeyColumns.last_executed_timestamp, TimestampsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); addCellIfModified(TimestampsForKeyColumns.last_executed_micros, TimestampsForKey::rawLastExecutedHlc, accessor::valueOf, builder, timestampMicros, nowInSeconds, original, current); addCellIfModified(TimestampsForKeyColumns.last_write_timestamp, TimestampsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); @@ -1339,7 +1333,7 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey return null; ByteBuffer key = TimestampsForKeyColumns.makePartitionKey(storeId, current.key()); - PartitionUpdate update = PartitionUpdate.singleRowUpdate(TimestampsForKeys, key, row); + PartitionUpdate update = singleRowUpdate(TimestampsForKeys, key, row); return new Mutation(update); } catch (IOException e) @@ -1374,7 +1368,6 @@ public static TimestampsForKey loadTimestampsForKey(AccordCommandStore commandSt public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) { - UntypedResultSet rows = loadTimestampsForKeyRow(commandStore, key); if (rows.isEmpty()) @@ -1385,60 +1378,11 @@ public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore com UntypedResultSet.Row row = rows.one(); checkState(deserializeKey(row).equals(key)); - Timestamp max = deserializeTimestampOrDefault(row, "max_timestamp", Timestamp::fromBits, Timestamp.NONE); Timestamp lastExecutedTimestamp = deserializeTimestampOrDefault(row, "last_executed_timestamp", Timestamp::fromBits, Timestamp.NONE); long lastExecutedMicros = row.has("last_executed_micros") ? row.getLong("last_executed_micros") : 0; Timestamp lastWriteTimestamp = deserializeTimestampOrDefault(row, "last_write_timestamp", Timestamp::fromBits, Timestamp.NONE); - return TimestampsForKey.SerializerSupport.create(key, max, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp); - } - - private static void addSeriesMutations(CommandsForKeyAccessor accessor, - CommandTimeseries.Update update, - PartitionUpdate.Builder partitionBuilder, - Row.Builder rowBuilder, - LivenessInfo livenessInfo, - long timestampMicros, - Row.Deletion deletion, - Predicate predicate) - { - if (update.isEmpty()) - return; - - update.forEachWrite((timestamp, bytes) -> { - if (!predicate.test(timestamp)) - return; - rowBuilder.newRow(Clustering.make(serializeTimestamp(timestamp))); - rowBuilder.addCell(live(accessor.data, timestampMicros, bytes)); - rowBuilder.addPrimaryKeyLivenessInfo(livenessInfo); - partitionBuilder.add(rowBuilder.build()); - }); - update.forEachDelete(timestamp -> { - if (!predicate.test(timestamp)) - return; - rowBuilder.newRow(Clustering.make(serializeTimestamp(timestamp))); - rowBuilder.addRowDeletion(deletion); - partitionBuilder.add(rowBuilder.build()); - }); - } - - private static void addSeriesMutations(CommandsForKeyAccessor accessor, - CommandTimeseries.Update common, - CommandTimeseries.Update update, - PartitionUpdate.Builder partitionBuilder, - Row.Builder rowBuilder, - LivenessInfo livenessInfo, - int nowInSeconds) - { - - long timestampMicros = livenessInfo.timestamp(); - - Row.Deletion deletion = common.numDeletes() + update.numDeletes() > 0 ? - Row.Deletion.regular(DeletionTime.build(timestampMicros, nowInSeconds)) : - null; - - addSeriesMutations(accessor, common, partitionBuilder, rowBuilder, livenessInfo, timestampMicros, deletion, ts -> !update.contains(ts)); - addSeriesMutations(accessor, update, partitionBuilder, rowBuilder, livenessInfo, timestampMicros, deletion, ts -> true); + return TimestampsForKey.SerializerSupport.create(key, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp); } private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId, PartitionKey key) @@ -1450,54 +1394,17 @@ private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId return accessor.table.partitioner.decorateKey(pk); } - private static PartitionUpdate getCommandsForKeyPartitionUpdate(CommandsForKeyAccessor accessor, int storeId, PartitionKey key, CommandsForKeyUpdater common, CommandsForKeyUpdater update, long timestampMicros) + private static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, CommandsForKey commandsForKey, long timestampMicros) { - - int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); - LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); - - int expectedRows = common.totalChanges() + update.totalChanges(); - - PartitionUpdate.Builder partitionBuilder = new PartitionUpdate.Builder(accessor.table, - makeKey(accessor, storeId, key), - accessor.columns, - expectedRows); - - Row.Builder rowBuilder = BTreeRow.unsortedBuilder(); - - addSeriesMutations(accessor, common.commands(), update.commands(), partitionBuilder, rowBuilder, livenessInfo, nowInSeconds); - - PartitionUpdate partitionUpdate = partitionBuilder.build(); - if (partitionUpdate.isEmpty()) - return null; - return partitionUpdate; + ByteBuffer bytes = CommandsForKeySerializer.toBytesWithoutKey(commandsForKey); + return singleRowUpdate(CommandsForKeysAccessor.table, + makeKey(CommandsForKeysAccessor, storeId, key), + singleCellRow(Clustering.EMPTY, BufferCell.live(CommandsForKeysAccessor.data, timestampMicros, bytes))); } - public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKeyUpdate update, long timestampMicros) + public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey update, long timestampMicros) { - PartitionUpdate depsUpdate = getCommandsForKeyPartitionUpdate(DepsCommandsForKeysAccessor, - storeId, - update.key(), - update.common(), - update.deps(), - timestampMicros); - PartitionUpdate allUpdate = getCommandsForKeyPartitionUpdate(AllCommandsForKeysAccessor, - storeId, - update.key(), - update.common(), - update.all(), - timestampMicros); - - if (depsUpdate == null && allUpdate == null) - return null; - if (depsUpdate == null) - return new Mutation(allUpdate); - else if (allUpdate == null) - return new Mutation(depsUpdate); - - return new Mutation(ACCORD_KEYSPACE_NAME, depsUpdate.partitionKey(), - ImmutableMap.of(depsUpdate.metadata().id, depsUpdate, allUpdate.metadata().id, allUpdate), - MonotonicClock.Global.approxTime.now(), false); + return new Mutation(getCommandsForKeyPartitionUpdate(storeId, (PartitionKey) update.key(), update, timestampMicros)); } private static ByteBuffer cellValue(Cell cell) @@ -1527,50 +1434,31 @@ private static SinglePartitionReadCommand getCommandsForKeyRead(CommandsForKeyAc FULL_PARTITION); } - public static SinglePartitionReadCommand getDepsCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) - { - return getCommandsForKeyRead(DepsCommandsForKeysAccessor, storeId, key, nowInSeconds); - } - - public static SinglePartitionReadCommand getAllCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) + public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) { - return getCommandsForKeyRead(AllCommandsForKeysAccessor, storeId, key, nowInSeconds); + return getCommandsForKeyRead(CommandsForKeysAccessor, storeId, key, nowInSeconds); } static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, AccordCommandStore commandStore, PartitionKey key) { - long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); SinglePartitionReadCommand command = getCommandsForKeyRead(accessor, commandStore.id(), key, nowInSeconds); - - ImmutableSortedMap.Builder commands = new ImmutableSortedMap.Builder<>(Comparator.naturalOrder()); - try (ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { if (!partitions.hasNext()) - { return null; - } try (RowIterator partition = partitions.next()) { - while (partition.hasNext()) - { - Row row = partition.next(); - Clustering clustering = row.clustering(); - Timestamp timestamp = deserializeTimestampOrNull(clusteringValue(clustering, 0), Timestamp::fromBits); - ByteBuffer data = cellValue(row, accessor.data); - if (data == null) - continue; - commands.put(timestamp, data); - } + Invariants.checkState(partition.hasNext()); + Row row = partition.next(); + ByteBuffer data = cellValue(row, accessor.data); + return CommandsForKeySerializer.fromBytes(key, data); } - checkState(!partitions.hasNext()); - - return CommandsForKey.SerializerSupport.create(key, CommandsForKeySerializer.loader, commands.build()); } catch (Throwable t) { @@ -1579,26 +1467,15 @@ static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, } } - public static CommandsForKey unsafeLoadDepsCommandsForKey(AccordCommandStore commandStore, PartitionKey key) - { - return unsafeLoadCommandsForKey(DepsCommandsForKeysAccessor, commandStore, key); - } - - public static CommandsForKey unsafeLoadAllCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + public static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) { - return unsafeLoadCommandsForKey(AllCommandsForKeysAccessor, commandStore, key); - } - - public static CommandsForKey loadDepsCommandsForKey(AccordCommandStore commandStore, PartitionKey key) - { - commandStore.checkNotInStoreThread(); - return unsafeLoadCommandsForKey(DepsCommandsForKeysAccessor, commandStore, key); + return unsafeLoadCommandsForKey(CommandsForKeysAccessor, commandStore, key); } - public static CommandsForKey loadAllCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) { commandStore.checkNotInStoreThread(); - return unsafeLoadCommandsForKey(AllCommandsForKeysAccessor, commandStore, key); + return unsafeLoadCommandsForKey(CommandsForKeysAccessor, commandStore, key); } public static class EpochDiskState @@ -1860,7 +1737,7 @@ private static IMutation getCommandStoreMetadataMutation(String cql, ByteBuffer. ModificationStatement statement = (ModificationStatement) QueryProcessor.parseStatement(cql).prepare(ClientState.forInternalCalls()); QueryOptions options = QueryOptions.forInternalCalls(Arrays.asList(values)); - long tsMicros = TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + long tsMicros = TimeUnit.MILLISECONDS.toMicros(Global.currentTimeMillis()); while (true) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 5efdb7c0f450..73827a3771d4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -124,6 +124,8 @@ private VerbMapping() builder.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); builder.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); builder.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); + builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_REQ, Verb.ACCORD_GET_EPHMRL_READ_DEPS_REQ); + builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_RSP, Verb.ACCORD_GET_EPHMRL_READ_DEPS_RSP); builder.put(MessageType.COMMIT_SLOW_PATH_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.STABLE_FAST_PATH_REQ, Verb.ACCORD_COMMIT_REQ); @@ -134,6 +136,7 @@ private VerbMapping() builder.put(MessageType.APPLY_MAXIMAL_REQ, Verb.ACCORD_APPLY_REQ); builder.put(MessageType.APPLY_RSP, Verb.ACCORD_APPLY_RSP); builder.put(MessageType.READ_REQ, Verb.ACCORD_READ_REQ); + builder.put(MessageType.READ_EPHEMERAL_REQ, Verb.ACCORD_READ_REQ); builder.put(MessageType.READ_RSP, Verb.ACCORD_READ_RSP); builder.put(MessageType.BEGIN_RECOVER_REQ, Verb.ACCORD_BEGIN_RECOVER_REQ); builder.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); @@ -142,7 +145,7 @@ private VerbMapping() builder.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); builder.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); builder.put(MessageType.WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_WAIT_UNTIL_APPLIED_REQ); - builder.put(MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_UNTIL_APPLIED_REQ); + builder.put(MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_REQ); builder.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); builder.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); builder.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 3f506302db62..a1704ba52b2a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -18,17 +18,14 @@ package org.apache.cassandra.service.accord; -import java.nio.ByteBuffer; -import java.util.Map; import java.util.UUID; import java.util.function.ToLongFunction; -import com.google.common.collect.ImmutableSortedMap; - import accord.api.Key; import accord.api.Result; import accord.api.RoutingKey; import accord.impl.CommandsForKey; +import accord.impl.CommandsForKey.Info; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; @@ -69,7 +66,6 @@ import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.TxnWrite; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; import static org.apache.cassandra.utils.ObjectSizes.measure; @@ -259,16 +255,10 @@ public static long results(Result result) } private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); - private static final long EMPTY_CFK_LISTENER = measure(new CommandsForKey.Listener((Key) null)); - private static final long EMPTY_CFR_LISTENER = measure(new CommandsForRanges.Listener(null)); public static long listener(Command.DurableAndIdempotentListener listener) { if (listener instanceof Command.ProxyListener) return EMPTY_COMMAND_LISTENER + timestamp(((Command.ProxyListener) listener).txnId()); - if (listener instanceof CommandsForKey.Listener) - return EMPTY_CFK_LISTENER + key(((CommandsForKey.Listener) listener).key()); - if (listener instanceof CommandsForRanges.Listener) - return EMPTY_CFR_LISTENER + timestamp(((CommandsForRanges.Listener) listener).txnId); throw new IllegalArgumentException("Unhandled listener type: " + listener.getClass()); } @@ -362,34 +352,34 @@ public static long command(Command command) return size; } - private static long cfkSeriesSize(ImmutableSortedMap series) - { - long size = 0; - for (Map.Entry entry : series.entrySet()) - { - size += timestamp(entry.getKey()); - size += ByteBufferUtil.estimatedSizeOnHeap(entry.getValue()); - } - return size; - } - - private static long EMPTY_TFK_SIZE = measure(TimestampsForKey.SerializerSupport.create(null, null, null, 0, null)); + private static long EMPTY_TFK_SIZE = measure(TimestampsForKey.SerializerSupport.create(null, null, 0, null)); public static long timestampsForKey(TimestampsForKey timestamps) { long size = EMPTY_TFK_SIZE; - size += timestamp(timestamps.max()); size += timestamp(timestamps.lastExecutedTimestamp()); size += timestamp(timestamps.lastWriteTimestamp()); return size; } - private static long EMPTY_CFK_SIZE = measure(CommandsForKey.SerializerSupport.create(null, null, ImmutableSortedMap.of())); + private static long EMPTY_CFK_SIZE = measure(new CommandsForKey(null)); + private static long EMPTY_INFO_SIZE = measure(CommandsForKey.Info.createMock(null, null, null)); public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; size += key(cfk.key()); - size += cfkSeriesSize((ImmutableSortedMap) cfk.commands().commands); + size += 2 * ObjectSizes.sizeOfReferenceArray(cfk.size()); + size += cfk.size() * TIMESTAMP_SIZE; + for (int i = 0 ; i < cfk.size() ; ++i) + { + Info info = cfk.info(i); + if (info.getClass() == CommandsForKey.NoInfo.class) + continue; + + size += EMPTY_INFO_SIZE; + if (info.missing.length > 0) + size += ObjectSizes.sizeOfReferenceArray(info.missing.length); + } return size; } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index ea364d04f612..c1b09ccb270a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -18,53 +18,43 @@ package org.apache.cassandra.service.accord; +import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.NavigableMap; -import java.util.function.Predicate; -import javax.annotation.Nullable; +import java.util.function.BiFunction; -import com.google.common.base.Predicates; +import javax.annotation.Nullable; import accord.api.Agent; import accord.api.DataStore; import accord.api.Key; import accord.api.ProgressLog; import accord.impl.AbstractSafeCommandStore; -import accord.impl.CommandTimeseries; -import accord.impl.CommandTimeseries.CommandLoader; import accord.impl.CommandsForKey; import accord.impl.CommandsForKeys; -import accord.impl.DomainCommands; -import accord.impl.DomainTimestamps; -import accord.impl.SafeTimestampsForKey; +import accord.impl.CommandsSummary; +import accord.local.Command; import accord.local.CommandStores.RangesForEpoch; -import accord.local.CommonAttributes; -import accord.local.KeyHistory; import accord.local.NodeTimeService; import accord.local.PreLoadContext; -import accord.local.SafeCommand; -import accord.local.Status; import accord.primitives.AbstractKeys; import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Routables; -import accord.primitives.Seekable; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.utils.TriFunction; -import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; -public class AccordSafeCommandStore extends AbstractSafeCommandStore +import static accord.primitives.Routable.Domain.Range; + +public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final NavigableMap depsCommandsForKeys; - private final NavigableMap allCommandsForKeys; + private final NavigableMap commandsForKeys; private final NavigableMap timestampsForKeys; - private final NavigableMap updatesForKeys; private final AccordCommandStore commandStore; private final RangesForEpoch ranges; CommandsForRanges.Updater rangeUpdates = null; @@ -72,17 +62,13 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore commands, NavigableMap timestampsForKey, - NavigableMap depsCommandsForKey, - NavigableMap allCommandsForKeys, - NavigableMap updatesForKeys, + NavigableMap commandsForKey, AccordCommandStore commandStore) { super(context); this.commands = commands; this.timestampsForKeys = timestampsForKey; - this.depsCommandsForKeys = depsCommandsForKey; - this.allCommandsForKeys = allCommandsForKeys; - this.updatesForKeys = updatesForKeys; + this.commandsForKeys = commandsForKey; this.commandStore = commandStore; this.ranges = commandStore.updateRangesForEpoch(); } @@ -107,55 +93,22 @@ protected AccordSafeCommand getIfLoaded(TxnId txnId) return command; } - private NavigableMap commandsForKeyMap(KeyHistory history) - { - switch (history) - { - case DEPS: - return depsCommandsForKeys; - case ALL: - return allCommandsForKeys; - default: - throw new IllegalArgumentException(); - } - } - - @Override - protected AccordSafeCommandsForKey getDepsCommandsForKeyInternal(RoutableKey key) - { - return depsCommandsForKeys.get(key); - } - - @Override - protected void addDepsCommandsForKeyInternal(AccordSafeCommandsForKey cfk) - { - depsCommandsForKeys.put(cfk.key(), cfk); - } - @Override - protected AccordSafeCommandsForKey getDepsCommandsForKeyIfLoaded(RoutableKey key) + protected AccordSafeCommandsForKey getCommandsForKeyInternal(RoutableKey key) { - AccordSafeCommandsForKey cfk = commandStore.depsCommandsForKeyCache().acquireIfLoaded(key); - if (cfk != null) cfk.preExecute(); - return cfk; + return commandsForKeys.get(key); } @Override - protected AccordSafeCommandsForKey getAllCommandsForKeyInternal(RoutableKey key) + protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) { - return allCommandsForKeys.get(key); + commandsForKeys.put(cfk.key(), cfk); } @Override - protected void addAllCommandsForKeyInternal(AccordSafeCommandsForKey cfk) + protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(RoutableKey key) { - allCommandsForKeys.put(cfk.key(), cfk); - } - - @Override - protected AccordSafeCommandsForKey getAllCommandsForKeyIfLoaded(RoutableKey key) - { - AccordSafeCommandsForKey cfk = commandStore.allCommandsForKeyCache().acquireIfLoaded(key); + AccordSafeCommandsForKey cfk = commandStore.commandsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); return cfk; } @@ -180,27 +133,6 @@ protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(RoutableKey key return cfk; } - protected AccordSafeCommandsForKeyUpdate getCommandsForKeyUpdateInternal(RoutableKey key) - { - return updatesForKeys.get(key); - } - - protected AccordSafeCommandsForKeyUpdate createCommandsForKeyUpdateInternal(RoutableKey key) - { - throw new IllegalStateException("CFK updates should be initialized for operation"); - } - - protected void addCommandsForKeyUpdateInternal(AccordSafeCommandsForKeyUpdate update) - { - updatesForKeys.put(update.key(), update); - } - - protected void applyCommandForKeyUpdates() - { - // TODO (now): should this happen as part of invalidate? Less obvious it's happening, but eliminates possibility of post update changes - updatesForKeys.values().forEach(AccordSafeCommandsForKeyUpdate::setUpdates); - } - @Override public AccordCommandStore commandStore() { @@ -237,19 +169,6 @@ public RangesForEpoch ranges() return commandStore().unsafeRangesForEpoch(); } - @Override - public long latestEpoch() - { - return commandStore().time().epoch(); - } - - @Override - public Timestamp maxConflict(Seekables keysOrRanges, Ranges slice) - { - Timestamp maxConflict = mapReduce(keysOrRanges, slice, KeyHistory.NONE, (ts, commands, accum) -> Timestamp.max(ts.max(), accum), Timestamp.NONE, Predicates.isNull()); - return Timestamp.nonNullOrMax(maxConflict, commandStore.commandsForRanges().maxRedundant()); - } - @Override public void registerHistoricalTransactions(Deps deps) { @@ -257,6 +176,7 @@ public void registerHistoricalTransactions(Deps deps) // We find a set of dependencies for a range then update CommandsFor to know about them Ranges allRanges = ranges.all(); deps.keyDeps.keys().forEach(allRanges, key -> { + // TODO (now): batch register to minimise GC deps.keyDeps.forEach(key, txnId -> { // TODO (desired, efficiency): this can be made more efficient by batching by epoch if (ranges.coordinates(txnId).contains(key)) @@ -282,20 +202,13 @@ public void registerHistoricalTransactions(Deps deps) }); } - @Override - public void erase(SafeCommand safeCommand) - { - } - - private O mapReduce(Routables keysOrRanges, Ranges slice, KeyHistory keyHistory, TriFunction map, O accumulate, Predicate terminate) + private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) { - accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate, terminate); - if (terminate.test(accumulate)) - return accumulate; - return mapReduceForKey(keysOrRanges, slice, keyHistory, map, accumulate, terminate); + accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate); + return mapReduceForKey(keysOrRanges, slice, map, accumulate); } - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, KeyHistory keyHistory, TriFunction map, O accumulate, Predicate terminate) + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) { switch (keysOrRanges.domain()) { @@ -308,11 +221,8 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, KeyHistor for (Key key : keys) { if (!slice.contains(key)) continue; - SafeTimestampsForKey timestamps = timestampsForKey(key); - CommandsForKey commands = !keyHistory.isNone() ? commandsForKey(key, keyHistory).current() : null; - accumulate = map.apply(timestamps.current(), commands, accumulate); - if (terminate.test((accumulate))) - return accumulate; + CommandsForKey commands = commandsForKey(key).current(); + accumulate = map.apply(commands, accumulate); } } break; @@ -327,11 +237,8 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, KeyHistor { //TODO (duplicate code): this is a repeat of Key... only change is checking contains in range if (!sliced.contains(key)) continue; - SafeTimestampsForKey timestamps = timestampsForKey(key); - CommandsForKey commands = !keyHistory.isNone() ? commandsForKey(key, keyHistory).current() : null; - accumulate = map.apply(timestamps.current(), commands, accumulate); - if (terminate.test(accumulate)) - return accumulate; + CommandsForKey commands = commandsForKey(key).current(); + accumulate = map.apply(commands, accumulate); } } break; @@ -340,85 +247,41 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, KeyHistor } @Override -//<<<<<<< HEAD -// public T mapReduce(Seekables keysOrRanges, Ranges slice, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) { -// accumulate = mapReduce(keysOrRanges, slice, (forKey, prev) -> { -// CommandTimeseries timeseries; -//======= - public T mapReduce(Seekables keysOrRanges, Ranges slice, KeyHistory keyHistory, Txn.Kind.Kinds testKind, TestTimestamp testTimestamp, Timestamp timestamp, TestDep testDep, @Nullable TxnId depId, @Nullable Status minStatus, @Nullable Status maxStatus, CommandFunction map, P1 p1, T accumulate, Predicate terminate) + public T mapReduceActive(Seekables keysOrRanges, Ranges slice, @Nullable Timestamp withLowerTxnId, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) { - accumulate = mapReduce(keysOrRanges, slice, keyHistory, (timestamps, commands, prev) -> { - CommandTimeseries.TimestampType timestampType; - switch (testTimestamp) - { - default: throw new AssertionError(); - case STARTED_AFTER: - case STARTED_BEFORE: - timestampType = CommandTimeseries.TimestampType.TXN_ID; - break; - case EXECUTES_AFTER: - case MAY_EXECUTE_BEFORE: - timestampType = CommandTimeseries.TimestampType.EXECUTE_AT; - } - CommandTimeseries.TestTimestamp remapTestTimestamp; - switch (testTimestamp) - { - default: throw new AssertionError(); - case STARTED_AFTER: - case EXECUTES_AFTER: - remapTestTimestamp = CommandTimeseries.TestTimestamp.AFTER; - break; - case STARTED_BEFORE: - case MAY_EXECUTE_BEFORE: - remapTestTimestamp = CommandTimeseries.TestTimestamp.BEFORE; - } - return commands.commands().mapReduce(testKind, timestampType, remapTestTimestamp, timestamp, testDep, depId, minStatus, maxStatus, map, p1, prev, terminate); - }, accumulate, terminate); - - return accumulate; + return mapReduce(keysOrRanges, slice, (summary, in) -> { + return summary.mapReduceActive(withLowerTxnId, testKind, map, p1, in); + }, accumulate); } @Override - public CommonAttributes completeRegistration(Seekables seekables, Ranges ranges, AccordSafeCommand liveCommand, CommonAttributes attrs) + public T mapReduceFull(Seekables keysOrRanges, Ranges slice, TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - for (Seekable seekable : seekables) - attrs = completeRegistration(seekable, ranges, liveCommand, attrs); - return attrs; + return mapReduce(keysOrRanges, slice, (summary, in) -> { + return summary.mapReduceFull(testTxnId, testKind, testStartedAt, testDep, testStatus, map, p1, in); + }, accumulate); } @Override - public CommonAttributes completeRegistration(Seekable seekable, Ranges ranges, AccordSafeCommand liveCommand, CommonAttributes attrs) + protected void update(Command prev, Command updated, @Nullable Seekables keysOrRanges) { - switch (seekable.domain()) + super.update(prev, updated, keysOrRanges); + + if (updated.txnId().domain() == Range && CommandsForKey.needsUpdate(prev, updated)) { - case Key: + if (keysOrRanges == null) { - Key key = seekable.asKey(); - if (ranges.contains(key)) - { - CommandsForKeys.registerCommand(this, key, liveCommand.current()); - attrs = attrs.mutable().addListener(new CommandsForKey.Listener(key)); - } + if (updated.known().isDefinitionKnown()) keysOrRanges = updated.partialTxn().keys(); + else if (prev.known().isDefinitionKnown()) keysOrRanges = prev.partialTxn().keys(); + else return; } - break; - case Range: - Range range = seekable.asRange(); - if (!ranges.intersects(range)) - return attrs; - // TODO (api) : cleaner way to deal with this? This is tracked at the Ranges level and not Range level - // but we register at the Range level... - if (!attrs.durableListeners().stream().anyMatch(l -> l instanceof CommandsForRanges.Listener)) - { - CommandsForRanges.Listener listener = new CommandsForRanges.Listener(liveCommand.txnId()); - attrs = attrs.mutable().addListener(listener); - // trigger to allow it to run right away - listener.onChange(this, liveCommand); - } - break; - default: - throw new UnsupportedOperationException("Unknown domain: " + seekable.domain()); + List waitingOn; + + if (updated.partialDeps() == null) waitingOn = Collections.emptyList(); + // TODO (required): this is faulty: we cannot simply save the raw transaction ids, as they may be for other ranges + else waitingOn = updated.partialDeps().txnIds(); + updateRanges().put(updated.txnId(), (Ranges)keysOrRanges, updated.saveStatus(), updated.executeAt(), waitingOn); } - return attrs; } protected CommandsForRanges.Updater updateRanges() @@ -433,30 +296,18 @@ protected void invalidateSafeState() { commands.values().forEach(AccordSafeCommand::invalidate); timestampsForKeys.values().forEach(AccordSafeTimestampsForKey::invalidate); - depsCommandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); - allCommandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); - updatesForKeys.values().forEach(AccordSafeCommandsForKeyUpdate::invalidate); - } - - @Override - public CommandLoader cfkLoader(RoutableKey key) - { - return CommandsForKeySerializer.loader; + commandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); } public void postExecute(Map commands, Map timestampsForKey, - Map depsCommandsForKeys, - Map allCommandsForKeys, - Map updatesForKeys + Map commandsForKeys ) { postExecute(); commands.values().forEach(AccordSafeState::postExecute); timestampsForKey.values().forEach(AccordSafeState::postExecute); - depsCommandsForKeys.values().forEach(AccordSafeState::postExecute); - allCommandsForKeys.values().forEach(AccordSafeState::postExecute); - updatesForKeys.values().forEach(AccordSafeState::postExecute); + commandsForKeys.values().forEach(AccordSafeState::postExecute); if (rangeUpdates != null) rangeUpdates.apply(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 97fc60464957..748143f33363 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -76,7 +76,7 @@ public boolean hasUpdate() // cfk initialization is legal, but doesn't need to be propagated to the cache (and would // cause an exception to be thrown if it were). Making an exception on the cache side could // throw away applied cfk updates as well, so it's special cased here - if (hasUpdate && original == null && current != null && current.commands().isEmpty()) + if (hasUpdate && original == null && current != null && current.size() == 0) return false; return hasUpdate; @@ -122,7 +122,8 @@ public void preExecute() public void postExecute() { checkNotInvalidated(); - // updates are applied directly by CommandsForKeyUpdate + if (current != original) + global.set(current); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java deleted file mode 100644 index b881a4524126..000000000000 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKeyUpdate.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.nio.ByteBuffer; - -import com.google.common.annotations.VisibleForTesting; - -import accord.api.Key; -import accord.impl.SafeCommandsForKey; -import accord.primitives.RoutableKey; -import accord.utils.async.AsyncChain; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; - -public class AccordSafeCommandsForKeyUpdate extends SafeCommandsForKey.Update implements AccordSafeState -{ - private boolean invalidated; - private final AccordCachingState global; - private CommandsForKeyUpdate original; - private CommandsForKeyUpdate current; - - public AccordSafeCommandsForKeyUpdate(AccordCachingState global) - { - super((Key) global.key(), CommandsForKeySerializer.loader); - this.global = global; - this.original = null; - this.current = null; - } - - @Override - public void initialize() - { - set(CommandsForKeyUpdate.empty((PartitionKey) key())); - } - - @Override - public AccordCachingState global() - { - checkNotInvalidated(); - return global; - } - - @Override - public CommandsForKeyUpdate current() - { - checkNotInvalidated(); - return current; - } - - public AsyncChain loading() - { - throw new IllegalStateException("Updates aren't loaded"); - } - - @Override - @VisibleForTesting - public void set(CommandsForKeyUpdate cfk) - { - checkNotInvalidated(); - this.current = cfk; - } - - public CommandsForKeyUpdate original() - { - checkNotInvalidated(); - return original; - } - - public CommandsForKeyUpdate setUpdates() - { - CommandsForKeyUpdate next = new CommandsForKeyUpdate((PartitionKey) key(), - deps().toImmutable(), - all().toImmutable(), - common().toImmutable()); - set(next); - return next; - } - - @Override - public void preExecute() - { - checkNotInvalidated(); - original = global.get(); - current = original; - } - - @Override - public void postExecute() - { - checkNotInvalidated(); - global.set(current); - } - - @Override - public void invalidate() - { - invalidated = true; - } - - @Override - public boolean invalidated() - { - return invalidated; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 4ecc3b35bbf8..a730b62813c2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -30,9 +30,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.primitives.Ints; import accord.coordinate.TopologyMismatch; +import accord.impl.CoordinateDurabilityScheduling; import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.slf4j.Logger; @@ -89,9 +92,6 @@ import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; -import org.apache.cassandra.service.accord.interop.AccordInteropApply; -import org.apache.cassandra.service.accord.interop.AccordInteropExecution; -import org.apache.cassandra.service.accord.interop.AccordInteropPersist; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; @@ -110,6 +110,7 @@ import static accord.messages.SimpleReply.Ok; import static accord.utils.Invariants.checkState; import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; @@ -133,6 +134,7 @@ private enum State { INIT, STARTED, SHUTDOWN} private final AccordScheduler scheduler; private final AccordDataStore dataStore; private final AccordJournal journal; + private final CoordinateDurabilityScheduling durabilityScheduling; private final AccordVerbHandler verbHandler; private final LocalConfig configuration; @GuardedBy("this") @@ -307,11 +309,10 @@ private AccordService(Id localId) new AccordTopologySorter.Supplier(configService, DatabaseDescriptor.getNodeProximity())), SimpleProgressLog::new, AccordCommandStores.factory(journal), - new AccordInteropExecution.Factory(agent, configService), - AccordInteropPersist.FACTORY, - AccordInteropApply.FACTORY, + new AccordInteropFactory(agent, configService), configuration); this.nodeShutdown = toShutdownable(node); + this.durabilityScheduling = new CoordinateDurabilityScheduling(node); this.verbHandler = new AccordVerbHandler<>(node, configService, journal); } @@ -325,6 +326,11 @@ public synchronized void startup() ClusterMetadataService.instance().log().addListener(configService); fastPathCoordinator.start(); ClusterMetadataService.instance().log().addListener(fastPathCoordinator); + durabilityScheduling.setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); + durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); + durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); + durabilityScheduling.setFrequency(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityFrequency(SECONDS)), SECONDS); + durabilityScheduling.start(); state = State.STARTED; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index d08bd86c4215..1196089d62df 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -143,8 +143,6 @@ private void maybeEvictSomeNodes() AccordCachingState node = iter.next(); checkState(node.references == 0); - if (!node.canEvict()) - continue; /* * TODO (expected, efficiency): * can this be reworked so we're not skipping unevictable nodes everytime we try to evict? diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java b/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java deleted file mode 100644 index 4ab08b52735e..000000000000 --- a/src/java/org/apache/cassandra/service/accord/CommandsForKeyUpdate.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.nio.ByteBuffer; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - -import accord.impl.CommandTimeseries; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKeyGroupUpdater; -import accord.impl.CommandsForKeyUpdater; -import accord.primitives.RoutableKey; -import accord.primitives.Timestamp; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.service.accord.api.PartitionKey; - -import static org.apache.cassandra.utils.ObjectSizes.measure; - -public class CommandsForKeyUpdate extends CommandsForKeyGroupUpdater.Immutable implements CommandsForKey.Update -{ - private static final CommandsForKeyUpdate EMPTY = new CommandsForKeyUpdate(null, null, null, null); - - static long EMPTY_SIZE = measure(EMPTY); - - private static long EMPTY_TIMESERIES_UPDATE_SIZE = measure(new CommandTimeseries.ImmutableUpdate(ImmutableMap.of(), ImmutableSet.of())); - - private static long immutableTimeseriesUpdate(CommandTimeseries.ImmutableUpdate update) - { - long size = EMPTY_TIMESERIES_UPDATE_SIZE; - for (Map.Entry write : update.writes.entrySet()) - { - size += AccordObjectSizes.timestamp(write.getKey()); - size += ByteBufferAccessor.instance.size(write.getValue()); - } - - for (T delete : update.deletes) - size += AccordObjectSizes.timestamp(delete); - - return size; - } - - private static long EMPTY_UPDATER_SIZE = measure(new CommandsForKeyUpdater.Immutable<>(null)); - - private static long updaterSize(CommandsForKeyUpdater.Immutable updater) - { - long size = EMPTY_UPDATER_SIZE; - size += immutableTimeseriesUpdate(updater.commands()); - return size; - } - - private final PartitionKey key; - - public CommandsForKeyUpdate(PartitionKey key, CommandsForKeyUpdater.Immutable deps, CommandsForKeyUpdater.Immutable all, CommandsForKeyUpdater.Immutable common) - { - super(deps, all, common); - this.key = key; - } - - public static CommandsForKeyUpdate empty(RoutableKey key) - { - return new CommandsForKeyUpdate((PartitionKey) key, - CommandsForKeyUpdater.Immutable.empty(), - CommandsForKeyUpdater.Immutable.empty(), - CommandsForKeyUpdater.Immutable.empty()); - - } - - public PartitionKey key() - { - return key; - } - - public long estimatedSizeOnHeap() - { - long size = EMPTY_SIZE; - size += AccordObjectSizes.key(key.asKey()); - size += updaterSize(deps()); - size += updaterSize(all()); - size += updaterSize(common()); - return size; - } -} diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 8cfd3581ee89..ecfa9b0626cf 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -31,6 +31,7 @@ import java.util.TreeSet; import java.util.function.BiFunction; import java.util.function.Function; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -40,23 +41,17 @@ import accord.api.Key; import accord.api.RoutingKey; -import accord.impl.CommandTimeseries; -import accord.impl.DomainCommands; -import accord.impl.DomainTimestamps; +import accord.impl.CommandsForKey; +import accord.impl.CommandsSummary; import accord.local.Command; -import accord.local.PreLoadContext; -import accord.local.SafeCommand; -import accord.local.SafeCommandStore; import accord.local.SaveStatus; import accord.primitives.AbstractKeys; -import accord.primitives.PartialDeps; import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.Routable; import accord.primitives.RoutableKey; import accord.primitives.Seekable; -import accord.primitives.Seekables; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.schema.TableId; @@ -66,10 +61,17 @@ import org.apache.cassandra.utils.Interval; import org.apache.cassandra.utils.IntervalTree; +import static accord.local.SafeCommandStore.*; +import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; +import static accord.local.SafeCommandStore.TestDep.WITH; +import static accord.local.SafeCommandStore.TestStartedAt.ANY; +import static accord.local.SafeCommandStore.TestStartedAt.STARTED_BEFORE; +import static accord.local.SafeCommandStore.TestStatus.ANY_STATUS; +import static accord.local.Status.Stable; +import static accord.local.Status.Truncated; + public class CommandsForRanges { - public interface DomainInfo extends DomainCommands, DomainTimestamps {} - public enum TxnType { UNKNOWN, LOCAL, REMOTE; @@ -145,42 +147,6 @@ public int compareTo(RangeCommandSummary other) } } - private enum RangeCommandSummaryLoader implements CommandTimeseries.CommandLoader - { - INSTANCE; - - @Override - public RangeCommandSummary saveForCFK(Command command) - { - //TODO split write from read? - throw new UnsupportedOperationException(); - } - - @Override - public TxnId txnId(RangeCommandSummary data) - { - return data.txnId; - } - - @Override - public Timestamp executeAt(RangeCommandSummary data) - { - return data.executeAt; - } - - @Override - public SaveStatus saveStatus(RangeCommandSummary data) - { - return data.status; - } - - @Override - public List depsIds(RangeCommandSummary data) - { - return data.deps; - } - } - public static abstract class AbstractBuilder> { protected final Set localTxns = new HashSet<>(); @@ -314,50 +280,6 @@ public void apply() } } - public static class Listener implements Command.DurableAndIdempotentListener - { - public final TxnId txnId; - private transient SaveStatus saveStatus; - - public Listener(TxnId txnId) - { - this.txnId = txnId; - } - - @Override - public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) - { - Command current = safeCommand.current(); - if (current.saveStatus() == saveStatus) - return; - saveStatus = current.saveStatus(); - PartialDeps deps = current.partialDeps(); - if (deps == null) - return; - Seekables keysOrRanges = current.partialTxn().keys(); - Invariants.checkArgument(keysOrRanges.domain() == Routable.Domain.Range, "Expected txn %s to be a Range txn, but was a %s", txnId, keysOrRanges.domain()); - - List dependsOn = deps.txnIds(); - ((AccordSafeCommandStore) safeStore).updateRanges() - .put(txnId, (Ranges) keysOrRanges, current.saveStatus(), current.executeAt(), dependsOn); - } - - @Override - public PreLoadContext listenerPreLoadContext(TxnId caller) - { - return caller.equals(txnId) ? PreLoadContext.contextFor(txnId) : PreLoadContext.contextFor(txnId, Collections.singletonList(caller)); - } - - @Override - public String toString() - { - return "Listener{" + - "txnId=" + txnId + - ", saveStatus=" + saveStatus + - '}'; - } - } - private ImmutableSet localCommands; private ImmutableSortedMap commandsToRanges; private IntervalTree> rangesToCommands; @@ -401,24 +323,29 @@ IntervalTree search(AbstractKeys keys) + public Iterable search(AbstractKeys keys) { // group by the table, as ranges are based off TokenKey, which is scoped to a range Map> groupByTable = new TreeMap<>(); for (Key key : keys) groupByTable.computeIfAbsent(((PartitionKey) key).table(), ignore -> new ArrayList<>()).add(key); - return () -> new AbstractIterator() + return () -> new AbstractIterator() { Iterator tblIt = groupByTable.keySet().iterator(); Iterator>> rangeIt; @Override - protected DomainInfo computeNext() + protected CommandsSummary computeNext() { while (true) { @@ -454,20 +381,20 @@ private static Range toRange(Interval interval { TokenKey start = (TokenKey) interval.min; TokenKey end = (TokenKey) interval.max; - // TODO (correctness) : accord doesn't support wrap around, so decreaseSlightly may fail in some cases - // TODO (correctness) : this logic is mostly used for testing, so is it actually safe for all partitioners? + // TODO (required, correctness) : accord doesn't support wrap around, so decreaseSlightly may fail in some cases + // TODO (required, correctness) : this logic is mostly used for testing, so is it actually safe for all partitioners? return new TokenRange(start.withToken(start.token().decreaseSlightly()), end); } @Nullable - public DomainInfo search(Range range) + public CommandsSummary search(Range range) { List matches = rangesToCommands.search(Interval.create(normalize(range.start(), range.startInclusive(), true), normalize(range.end(), range.endInclusive(), false))); return result(range, matches); } - private DomainInfo result(Seekable seekable, Collection matches) + private CommandsSummary result(Seekable seekable, Collection matches) { if (matches.isEmpty()) return null; @@ -499,18 +426,20 @@ private static RoutingKey normalize(RoutingKey key, boolean inclusive, boolean u switch (ak.kindOfRoutingKey()) { case SENTINEL: - key = ak.asSentinelKey().toTokenKey(); + // TODO (required, correctness): this doesn't work + key = ak.asSentinelKey().toTokenKeyBroken(); continue; case TOKEN: TokenKey tk = ak.asTokenKey(); - return tk.withToken(upOrDown ? tk.token().nextValidToken() : tk.token().decreaseSlightly()); + // TODO (required, correctness): this doesn't work for ordered partitioner + return tk.withToken(upOrDown ? tk.token().increaseSlightly() : tk.token().decreaseSlightly()); default: throw new IllegalArgumentException("Unknown kind: " + ak.kindOfRoutingKey()); } } } - private static class Holder implements DomainInfo + private static class Holder implements CommandsSummary { private final Seekable keyOrRange; private final Collection matches; @@ -522,27 +451,95 @@ private Holder(Seekable keyOrRange, Collection matches) } @Override - public CommandTimeseries commands() + public T mapReduceFull(TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - return build(); + return mapReduce(testTxnId, testTxnId, testKind, testStartedAt, testDep, testStatus, map, p1, accumulate); } @Override - public Timestamp max() + public T mapReduceActive(Timestamp startedBefore, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) { - return commands().maxTimestamp(); + return mapReduce(startedBefore, null, testKind, STARTED_BEFORE, ANY_DEPS, ANY_STATUS, map, p1, accumulate); } - private CommandTimeseries build() + private T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - CommandTimeseries.Builder builder = new CommandTimeseries.Builder<>(keyOrRange, RangeCommandSummaryLoader.INSTANCE); - for (RangeCommandSummary m : matches) + // TODO (required): reconsider how we build this, to avoid having to provide range keys in order (or ensure our range search does this for us) + Map> collect = new TreeMap<>(Range::compare); + matches.forEach((summary -> { + if (summary.status.compareTo(SaveStatus.Erased) >= 0) + return; + + switch (testStartedAt) + { + default: throw new AssertionError(); + case STARTED_AFTER: + if (summary.txnId.compareTo(testTimestamp) <= 0) return; + else break; + case STARTED_BEFORE: + if (summary.txnId.compareTo(testTimestamp) >= 0) return; + case ANY: + if (testDep != ANY_DEPS && (summary.executeAt == null || summary.executeAt.compareTo(testTxnId) < 0)) + return; + } + + switch (testStatus) + { + default: throw new AssertionError("Unhandled TestStatus: " + testStatus); + case ANY_STATUS: + break; + case IS_PROPOSED: + switch (summary.status) + { + default: return; + case PreCommitted: + case Committed: + case Accepted: + } + break; + case IS_STABLE: + if (!summary.status.hasBeen(Stable) || summary.status.hasBeen(Truncated)) + return; + } + + if (!testKind.test(summary.txnId.kind())) + return; + + if (testDep != ANY_DEPS) + { + if (!summary.status.known.deps.hasProposedOrDecidedDeps()) + return; + + // TODO (required): we must ensure these txnId are limited to those we intersect in this command store + // We are looking for transactions A that have (or have not) B as a dependency. + // If B covers ranges [1..3] and A covers [2..3], but the command store only covers ranges [1..2], + // we could have A adopt B as a dependency on [3..3] only, and have that A intersects B on this + // command store, but also that there is no dependency relation between them on the overlapping + // key range [2..2]. + + // This can lead to problems on recovery, where we believe a transaction is a dependency + // and so it is safe to execute, when in fact it is only a dependency on a different shard + // (and that other shard, perhaps, does not know that it is a dependency - and so it is not durably known) + // TODO (required): consider this some more + if ((testDep == WITH) == !summary.deps.contains(testTxnId)) + return; + } + + // TODO (required): ensure we are excluding any ranges that are now shard-redundant (not sure if this is enforced yet) + for (Range range : summary.ranges) + collect.computeIfAbsent(range, ignore -> new ArrayList<>()).add(summary); + })); + + for (Map.Entry> e : collect.entrySet()) { - if (m.status == SaveStatus.Invalidated) - continue; - builder.add(m.txnId, m); + for (RangeCommandSummary command : e.getValue()) + { + T initial = accumulate; + accumulate = map.apply(p1, e.getKey(), command.txnId, command.executeAt, initial); + } } - return builder.build(); + + return accumulate; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index b03eaf39d9a7..be3eaa80ca09 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -73,7 +73,7 @@ public RoutingKey someIntersectingRoutingKey(Ranges ranges) { RoutingKey pick = super.someIntersectingRoutingKey(ranges); if (pick instanceof SentinelKey) - pick = ((SentinelKey) pick).toTokenKey(); + pick = ((SentinelKey) pick).toTokenKeyBroken(); return pick; } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index acf4da192345..9606be549253 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -131,12 +131,12 @@ public static SentinelKey max(TableId table) return new SentinelKey(table, false); } - public TokenKey toTokenKey() + public TokenKey toTokenKeyBroken() { IPartitioner partitioner = getPartitioner(); return new TokenKey(table, isMin ? partitioner.getMinimumToken().nextValidToken() : - partitioner.getMaximumToken().decreaseSlightly()); + partitioner.getMaximumTokenForSplitting().decreaseSlightly()); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 52f632c6c5ea..b8494e7d4492 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -116,6 +116,7 @@ private static > void referenceAndAssemble case FAILED_TO_SAVE: break; case FAILED_TO_LOAD: + // TODO (required): if this triggers, we trigger some other illegal state in cache management throw new RuntimeException(safeRef.failure()); } } @@ -124,19 +125,18 @@ private void referenceAndAssembleReadsForKey(RoutableKey key, AsyncOperation.Context context, List> listenChains) { - referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); // recovery operations also need the deps data for their preaccept logic switch (keyHistory) { - case ALL: - referenceAndAssembleReadsForKey(key, context.allCommandsForKeys, commandStore.allCommandsForKeyCache(), listenChains); - case DEPS: - referenceAndAssembleReadsForKey(key, context.depsCommandsForKeys, commandStore.depsCommandsForKeyCache(), listenChains); + case TIMESTAMPS: + referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); + break; + case COMMANDS: + referenceAndAssembleReadsForKey(key, context.commandsForKey, commandStore.commandsForKeyCache(), listenChains); case NONE: break; default: throw new IllegalArgumentException("Unhandled keyhistory: " + keyHistory); } - referenceAndAssembleReadsForKey(key, context.updatesForKeys, commandStore.updatesForKeyCache(), listenChains); } private > void referenceAndAssembleReads(Iterable keys, @@ -195,7 +195,7 @@ private AsyncChain> findOverlappingKeys(Ranges ranges private AsyncChain> findOverlappingKeys(Range range) { - Set cached = commandStore.depsCommandsForKeyCache().stream() + Set cached = commandStore.commandsForKeyCache().stream() .map(n -> (PartitionKey) n.key()) .filter(range::contains) .collect(Collectors.toSet()); @@ -214,7 +214,7 @@ private static TokenKey toTokenKey(RoutingKey start) if (start instanceof TokenKey) return (TokenKey) start; if (start instanceof AccordRoutingKey.SentinelKey) - return ((AccordRoutingKey.SentinelKey) start).toTokenKey(); + return ((AccordRoutingKey.SentinelKey) start).toTokenKeyBroken(); throw new IllegalArgumentException(String.format("Unable to convert RoutingKey %s (type %s) to TokenKey", start, start.getClass())); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 69002b1fefe6..bef57bcf0b12 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -42,7 +42,6 @@ import org.apache.cassandra.service.accord.AccordSafeCommand; import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandStore; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKeyUpdate; import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; @@ -69,26 +68,20 @@ static class Context { final HashMap commands = new HashMap<>(); final TreeMap timestampsForKey = new TreeMap<>(); - final TreeMap depsCommandsForKeys = new TreeMap<>(); - final TreeMap allCommandsForKeys = new TreeMap<>(); - final TreeMap updatesForKeys = new TreeMap<>(); + final TreeMap commandsForKey = new TreeMap<>(); void releaseResources(AccordCommandStore commandStore) { commands.values().forEach(commandStore.commandCache()::release); timestampsForKey.values().forEach(commandStore.timestampsForKeyCache()::release); - depsCommandsForKeys.values().forEach(commandStore.depsCommandsForKeyCache()::release); - allCommandsForKeys.values().forEach(commandStore.allCommandsForKeyCache()::release); - updatesForKeys.values().forEach(commandStore.updatesForKeyCache()::release); + commandsForKey.values().forEach(commandStore.commandsForKeyCache()::release); } void revertChanges() { commands.values().forEach(AccordSafeState::revert); timestampsForKey.values().forEach(AccordSafeState::revert); - depsCommandsForKeys.values().forEach(AccordSafeState::revert); - allCommandsForKeys.values().forEach(AccordSafeState::revert); - updatesForKeys.values().forEach(AccordSafeState::revert); + commandsForKey.values().forEach(AccordSafeState::revert); } } @@ -261,11 +254,11 @@ protected void runInternal() return; state(PREPARING); case PREPARING: - safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.depsCommandsForKeys, context.allCommandsForKeys, context.updatesForKeys); + safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.commandsForKey); state(RUNNING); case RUNNING: result = apply(safeStore); - safeStore.postExecute(context.commands, context.timestampsForKey, context.depsCommandsForKeys, context.allCommandsForKeys, context.updatesForKeys); + safeStore.postExecute(context.commands, context.timestampsForKey, context.commandsForKey); context.releaseResources(commandStore); commandStore.completeOperation(safeStore); commandStore.executionOrder().unregister(this); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java new file mode 100644 index 000000000000..bac98fd003e8 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.interop; + +import java.util.function.BiConsumer; + +import accord.api.Result; +import accord.api.Update; +import accord.coordinate.CoordinationAdapter; +import accord.coordinate.CoordinationAdapter.Adapters.AbstractTxnAdapter; +import accord.coordinate.ExecutePath; +import accord.coordinate.PersistTxn; +import accord.local.Node; +import accord.messages.Apply; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.topology.Topologies; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.service.accord.AccordEndpointMapper; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.accord.interop.AccordInteropExecution.InteropExecutor; +import org.apache.cassandra.service.accord.txn.AccordUpdate; +import org.apache.cassandra.service.accord.txn.TxnRead; + +import static accord.messages.Apply.Kind.Maximal; +import static accord.messages.Apply.Kind.Minimal; + +public class AccordInteropAdapter extends AbstractTxnAdapter +{ + public static final class AccordInteropFactory implements CoordinationAdapter.Factory + { + final AccordInteropAdapter standard, recovery; + + public AccordInteropFactory(AccordAgent agent, AccordEndpointMapper endpointMapper) + { + final InteropExecutor executor = new InteropExecutor(agent); + standard = new AccordInteropAdapter(executor, endpointMapper, Minimal); + recovery = new AccordInteropAdapter(executor, endpointMapper, Maximal); + } + + @Override + public CoordinationAdapter get(TxnId txnId, Step step) + { + return (CoordinationAdapter) (step == Step.InitiateRecovery ? recovery : standard); + } + }; + + private final InteropExecutor executor; + private final AccordEndpointMapper endpointMapper; + private final Apply.Kind applyKind; + + private AccordInteropAdapter(InteropExecutor executor, AccordEndpointMapper endpointMapper, Apply.Kind applyKind) + { + this.executor = executor; + this.endpointMapper = endpointMapper; + this.applyKind = applyKind; + } + + @Override + public void execute(Node node, Topologies all, FullRoute route, ExecutePath path, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer callback) + { + if (!doInteropExecute(node, route, txnId, txn, executeAt, deps, callback)) + super.execute(node, all, route, path, txnId, txn, executeAt, deps, callback); + } + + @Override + public void persist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) + { + if (applyKind == Minimal && doInteropPersist(node, all, route, txnId, txn, executeAt, deps, writes, result, callback)) + return; + + if (callback != null) callback.accept(result, null); + new PersistTxn(node, all, txnId, route, txn, executeAt, deps, writes, result) + .start(Apply.FACTORY, applyKind, all, writes, result); + } + + private boolean doInteropExecute(Node node, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer callback) + { + // Unrecoverable repair always needs to be run by AccordInteropExecution + AccordUpdate.Kind updateKind = AccordUpdate.kind(txn.update()); + ConsistencyLevel consistencyLevel = txn.read() instanceof TxnRead ? ((TxnRead) txn.read()).cassandraConsistencyLevel() : null; + if (updateKind != AccordUpdate.Kind.UNRECOVERABLE_REPAIR && (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE || txn.read().keys().isEmpty())) + return false; + + new AccordInteropExecution(node, txnId, txn, updateKind, route, txn.read().keys().toParticipants(), executeAt, deps, callback, executor, consistencyLevel, endpointMapper) + .start(); + return true; + } + + private static boolean doInteropPersist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) + { + Update update = txn.update(); + ConsistencyLevel consistencyLevel = update instanceof AccordUpdate ? ((AccordUpdate) update).cassandraCommitCL() : null; + if (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ANY || writes.isEmpty()) + return false; + + new AccordInteropPersist(node, all, txnId, route, txn, executeAt, deps, writes, result, consistencyLevel, callback) + .start(AccordInteropApply.FACTORY, Minimal, all, writes, result); + return true; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java index 22821bab392a..7294dd2696fc 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -31,11 +31,11 @@ import accord.messages.Apply; import accord.messages.MessageType; import accord.primitives.Deps; +import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; -import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -63,37 +63,37 @@ public class AccordInteropApply extends Apply implements Command.TransientListen public static final Apply.Factory FACTORY = new Apply.Factory() { @Override - public Apply create(Kind kind, Id to, Topologies participates, Topologies executes, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) + public Apply create(Kind kind, Id to, Topologies participates, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) { checkArgument(kind != Kind.Maximal, "Shouldn't need to send a maximal commit with interop support"); ConsistencyLevel commitCL = txn.update() instanceof AccordUpdate ? ((AccordUpdate) txn.update()).cassandraCommitCL() : null; // Any asynchronous apply option should use the regular Apply that doesn't wait for writes to complete if (commitCL == null || commitCL == ConsistencyLevel.ANY) - return Apply.FACTORY.create(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); - return new AccordInteropApply(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); + return Apply.FACTORY.create(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result); + return new AccordInteropApply(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result); } }; public static final IVersionedSerializer serializer = new ApplySerializer() { @Override - protected AccordInteropApply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result) + protected AccordInteropApply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - return new AccordInteropApply(kind, txnId, scope, waitForEpoch, keys, executeAt, deps, txn, writes, result); + return new AccordInteropApply(kind, txnId, scope, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); } }; transient BitSet waitingOn; transient int waitingOnCount; - private AccordInteropApply(Kind kind, TxnId txnId, PartialRoute route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, Writes writes, Result result) + private AccordInteropApply(Kind kind, TxnId txnId, PartialRoute route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - super(kind, txnId, route, waitForEpoch, keys, executeAt, deps, txn, writes, result); + super(kind, txnId, route, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); } - private AccordInteropApply(Kind kind, Id to, Topologies participates, Topologies executes, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) + private AccordInteropApply(Kind kind, Id to, Topologies participates, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) { - super(kind, to, participates, executes, txnId, route, txn, executeAt, deps, writes, result); + super(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java index e92edb1ec980..cd13b99f93fc 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java @@ -31,6 +31,7 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -45,15 +46,15 @@ public class AccordInteropCommit extends Commit public static final IVersionedSerializer serializer = new CommitSerializer(AccordInteropRead.class, AccordInteropRead.requestSerializer) { @Override - protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); + return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); } }; - public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) + public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) { - super(kind, txnId, scope, waitForEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, readData); + super(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, readData); } public AccordInteropCommit(Kind kind, Node.Id to, Topology coordinateTopology, Topologies topologies, TxnId txnId, Txn txn, FullRoute route, Timestamp executeAt, Deps deps, AccordInteropRead read) diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 489fdfba27f2..eb8ddc5c39ee 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -37,9 +37,6 @@ import accord.api.Agent; import accord.api.Data; import accord.api.Result; -import accord.coordinate.Execute; -import accord.coordinate.Persist; -import accord.coordinate.ExecuteTxn; import accord.local.AgentExecutor; import accord.local.CommandStore; import accord.local.Node; @@ -95,6 +92,8 @@ import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Dispatcher; +import static accord.coordinate.CoordinationAdapter.Factory.Step.Continue; +import static accord.coordinate.CoordinationAdapter.Invoke.persist; import static accord.utils.Invariants.checkArgument; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; @@ -110,11 +109,11 @@ * on its inputs. * */ -public class AccordInteropExecution implements Execute, ReadCoordinator, MaximalCommitSender +public class AccordInteropExecution implements ReadCoordinator, MaximalCommitSender { private static final Logger logger = LoggerFactory.getLogger(AccordInteropExecution.class); - private static class InteropExecutor implements AgentExecutor + static class InteropExecutor implements AgentExecutor { private final AccordAgent agent; @@ -143,29 +142,6 @@ public AsyncChain submit(Callable task) } } - public static class Factory implements Execute.Factory - { - private final InteropExecutor executor; - private final AccordEndpointMapper endpointMapper; - - public Factory(AccordAgent agent, AccordEndpointMapper endpointMapper) - { - this.executor = new InteropExecutor(agent); - this.endpointMapper = endpointMapper; - } - - @Override - public Execute create(Node node, Topologies topologies, Path path, TxnId txnId, Txn txn, FullRoute route, Participants readScope, Timestamp executeAt, Deps deps, BiConsumer callback) - { - // Unrecoverable repair always needs to be run by AccordInteropExecution - AccordUpdate.Kind updateKind = AccordUpdate.kind(txn.update()); - ConsistencyLevel consistencyLevel = txn.read() instanceof TxnRead ? ((TxnRead) txn.read()).cassandraConsistencyLevel() : null; - if (updateKind != AccordUpdate.Kind.UNRECOVERABLE_REPAIR && (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE || txn.read().keys().isEmpty())) - return ExecuteTxn.FACTORY.create(node, topologies, path, txnId, txn, route, readScope, executeAt, deps, callback); - return new AccordInteropExecution(node, txnId, txn, updateKind, route, readScope, executeAt, deps, callback, executor, consistencyLevel, endpointMapper); - } - } - private final Node node; private final TxnId txnId; private final Txn txn; @@ -253,7 +229,7 @@ public void sendReadCommand(Message message, InetAddressAndPort to, { Node.Id id = endpointMapper.mappedId(to); SinglePartitionReadCommand command = (SinglePartitionReadCommand) message.payload; - AccordInteropRead read = new AccordInteropRead(id, executes, txnId, readScope, executeAt, command); + AccordInteropRead read = new AccordInteropRead(id, executes, txnId, readScope, executeAt.epoch(), command); // TODO (required): understand interop and whether StableFastPath is appropriate AccordInteropCommit commit = new AccordInteropCommit(Kind.StableFastPath, id, coordinateTopology, allTopologies, txnId, txn, route, executeAt, deps, read); @@ -265,7 +241,7 @@ public void sendReadRepairMutation(Message message, InetAddressAndPort { Node.Id id = endpointMapper.mappedId(to); Mutation mutation = message.payload; - AccordInteropReadRepair readRepair = new AccordInteropReadRepair(id, executes, txnId, readScope, executeAt, mutation); + AccordInteropReadRepair readRepair = new AccordInteropReadRepair(id, executes, txnId, readScope, executeAt.epoch(), mutation); node.send(id, readRepair, executor, new AccordInteropReadRepair.ReadRepairCallback(id, to, message, callback, this)); } @@ -350,7 +326,6 @@ private void sendStableToUncontacted() node.send(to, new Commit(Kind.StableFastPath, to, coordinateTopology, allTopologies, txnId, txn, route, Ballot.ZERO, executeAt, deps, (ReadTxnData) null)); } - @Override public void start() { if (coordinateTopology != executeTopology) @@ -370,7 +345,7 @@ public void start() CommandStore cs = node.commandStores().select(route.homeKey()); result.beginAsResult().withExecutor(cs).begin((data, failure) -> { if (failure == null) - Persist.persist(node, executes, route, txnId, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); + persist(node.coordinationAdapter(txnId, Continue), node, executes, route, txnId, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); else callback.accept(null, failure); }); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java index 51445469599d..857d082a2cf7 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java @@ -21,9 +21,7 @@ import java.util.function.BiConsumer; import accord.api.Result; -import accord.api.Update; import accord.coordinate.Persist; -import accord.coordinate.PersistTxn; import accord.coordinate.tracking.AppliedTracker; import accord.coordinate.tracking.QuorumTracker; import accord.coordinate.tracking.RequestStatus; @@ -39,7 +37,6 @@ import accord.topology.Topologies; import accord.utils.Invariants; import org.apache.cassandra.db.ConsistencyLevel; -import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.utils.Throwables; /** @@ -49,19 +46,6 @@ */ public class AccordInteropPersist extends Persist { - public static Persist.Factory FACTORY = new Persist.Factory() - { - @Override - public Persist create(Node node, Topologies topologies, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) - { - Update update = txn.update(); - ConsistencyLevel consistencyLevel = update instanceof AccordUpdate ? ((AccordUpdate) update).cassandraCommitCL() : null; - if (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ANY || writes.isEmpty()) - return PersistTxn.FACTORY.create(node, topologies, txnId, route, txn, executeAt, deps, writes, result); - return new AccordInteropPersist(node, topologies, txnId, route, txn, executeAt, deps, writes, result, consistencyLevel); - } - }; - private static class CallbackHolder { private final ResponseTracker tracker; @@ -97,7 +81,6 @@ public void recordSuccess(Node.Id node) handleStatus(tracker.recordSuccess(node)); } - public void recordFailure(Node.Id node, Throwable throwable) { failure = Throwables.merge(failure, throwable); @@ -106,29 +89,28 @@ public void recordFailure(Node.Id node, Throwable throwable) } private final ConsistencyLevel consistencyLevel; - private CallbackHolder holder = null; + private CallbackHolder callback; - public AccordInteropPersist(Node node, Topologies topologies, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, ConsistencyLevel consistencyLevel) + public AccordInteropPersist(Node node, Topologies topologies, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, ConsistencyLevel consistencyLevel, BiConsumer clientCallback) { super(node, topologies, txnId, route, txn, executeAt, deps, writes, result); Invariants.checkArgument(consistencyLevel == ConsistencyLevel.QUORUM || consistencyLevel == ConsistencyLevel.ALL || consistencyLevel == ConsistencyLevel.SERIAL || consistencyLevel == ConsistencyLevel.ONE); this.consistencyLevel = consistencyLevel; + registerClientCallback(result, clientCallback); } - @Override - public void registerClientCallback(Writes writes, Result result, BiConsumer clientCallback) + public void registerClientCallback(Result result, BiConsumer clientCallback) { - - Invariants.checkState(holder == null); + Invariants.checkState(callback == null); switch (consistencyLevel) { case ONE: // Can safely upgrade ONE to QUORUM/SERIAL to get a synchronous commit case SERIAL: case QUORUM: - holder = new CallbackHolder(new QuorumTracker(topologies), result, clientCallback); + callback = new CallbackHolder(new QuorumTracker(topologies), result, clientCallback); break; case ALL: - holder = new CallbackHolder(new AppliedTracker(topologies), result, clientCallback); + callback = new CallbackHolder(new AppliedTracker(topologies), result, clientCallback); break; default: throw new IllegalArgumentException("Unhandled consistency level: " + consistencyLevel); @@ -143,7 +125,7 @@ public void onSuccess(Node.Id from, Apply.ApplyReply reply) { case Redundant: case Applied: - holder.recordSuccess(from); + callback.recordSuccess(from); return; case Insufficient: // On insufficient Persist will send a commit with the missing information @@ -156,12 +138,12 @@ public void onSuccess(Node.Id from, Apply.ApplyReply reply) @Override public void onFailure(Node.Id from, Throwable failure) { - holder.recordFailure(from, failure); + callback.recordFailure(from, failure); } @Override public void onCallbackFailure(Node.Id from, Throwable failure) { - holder.recordFailure(from, failure); + callback.recordFailure(from, failure); } } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java index 0caceb6fb3e9..8e2ec02a9b7f 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java @@ -24,7 +24,7 @@ import accord.api.Data; import accord.local.Node; import accord.local.SafeCommandStore; -import accord.messages.AbstractExecute; +import accord.messages.ReadData; import accord.messages.MessageType; import accord.primitives.PartialTxn; import accord.primitives.Participants; @@ -51,7 +51,10 @@ import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; import org.apache.cassandra.service.accord.serializers.ReadDataSerializers.ReadDataSerializer; -public class AccordInteropRead extends AbstractExecute +import static accord.local.SaveStatus.PreApplied; +import static accord.local.SaveStatus.ReadyToExecute; + +public class AccordInteropRead extends ReadData { public static final IVersionedSerializer requestSerializer = new ReadDataSerializer() { @@ -60,8 +63,7 @@ public void serialize(AccordInteropRead read, DataOutputPlus out, int version) t { CommandSerializers.txnId.serialize(read.txnId, out, version); KeySerializers.participants.serialize(read.readScope, out, version); - out.writeUnsignedVInt(read.waitForEpoch()); - out.writeUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + out.writeUnsignedVInt(read.executeAtEpoch); SinglePartitionReadCommand.serializer.serialize(read.command, out, version); } @@ -70,10 +72,9 @@ public AccordInteropRead deserialize(DataInputPlus in, int version) throws IOExc { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Participants readScope = KeySerializers.participants.deserialize(in, version); - long waitForEpoch = in.readUnsignedVInt(); - long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; + long executeAtEpoch = in.readUnsignedVInt(); SinglePartitionReadCommand command = (SinglePartitionReadCommand) SinglePartitionReadCommand.serializer.deserialize(in, version); - return new AccordInteropRead(txnId, readScope, waitForEpoch, executeAtEpoch, command); + return new AccordInteropRead(txnId, readScope, executeAtEpoch, command); } @Override @@ -81,8 +82,7 @@ public long serializedSize(AccordInteropRead read, int version) { return CommandSerializers.txnId.serializedSize(read.txnId, version) + KeySerializers.participants.serializedSize(read.readScope, version) - + TypeSizes.sizeofUnsignedVInt(read.waitForEpoch()) - + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()) + + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch) + SinglePartitionReadCommand.serializer.serializedSize(read.command, version); } }; @@ -146,31 +146,39 @@ ReadResponse convertResponse(ReadOk ok) } } + private static final ExecuteOn EXECUTE_ON = new ExecuteOn(ReadyToExecute, PreApplied); + private final SinglePartitionReadCommand command; - public AccordInteropRead(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, Timestamp executeAt, SinglePartitionReadCommand command) + public AccordInteropRead(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, long executeAtEpoch, SinglePartitionReadCommand command) { - super(to, topologies, txnId, readScope, executeAt); + super(to, topologies, txnId, readScope, executeAtEpoch); this.command = command; } - public AccordInteropRead(TxnId txnId, Participants readScope, long executeAtEpoch, long waitForEpoch, SinglePartitionReadCommand command) + public AccordInteropRead(TxnId txnId, Participants readScope, long executeAtEpoch, SinglePartitionReadCommand command) { - super(txnId, readScope, executeAtEpoch, waitForEpoch); + super(txnId, readScope, executeAtEpoch); this.command = command; } @Override - protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) + public ReadType kind() + { + return ReadType.readTxnData; + } + + @Override + protected AsyncChain beginRead(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) { // TODO (required): subtract unavailable ranges, either from read or from response (or on coordinator) return AsyncChains.ofCallable(Stage.READ.executor(), () -> new LocalReadData(ReadCommandVerbHandler.instance.doRead(command, false))); } @Override - protected boolean canExecutePreApplied() + protected ExecuteOn executeOn() { - return true; + return EXECUTE_ON; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java index 00aeb0f24454..708443f02563 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java @@ -24,7 +24,8 @@ import accord.api.Data; import accord.local.Node; import accord.local.SafeCommandStore; -import accord.messages.AbstractExecute; +import accord.local.SaveStatus; +import accord.messages.ReadData; import accord.messages.MessageType; import accord.primitives.PartialTxn; import accord.primitives.Participants; @@ -56,7 +57,7 @@ * ensuring that the contents of the read repair consist of data that isn't from transactions that * haven't been committed yet at this command store. */ -public class AccordInteropReadRepair extends AbstractExecute +public class AccordInteropReadRepair extends ReadData { public static final IVersionedSerializer requestSerializer = new ReadDataSerializer() { @@ -65,8 +66,7 @@ public void serialize(AccordInteropReadRepair repair, DataOutputPlus out, int ve { CommandSerializers.txnId.serialize(repair.txnId, out, version); KeySerializers.participants.serialize(repair.readScope, out, version); - out.writeUnsignedVInt(repair.waitForEpoch()); - out.writeUnsignedVInt(repair.executeAtEpoch - repair.waitForEpoch()); + out.writeUnsignedVInt(repair.executeAtEpoch); Mutation.serializer.serialize(repair.mutation, out, version); } @@ -75,10 +75,9 @@ public AccordInteropReadRepair deserialize(DataInputPlus in, int version) throws { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Participants readScope = KeySerializers.participants.deserialize(in, version); - long waitForEpoch = in.readUnsignedVInt(); - long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; + long executeAtEpoch = in.readUnsignedVInt(); Mutation mutation = Mutation.serializer.deserialize(in, version); - return new AccordInteropReadRepair(txnId, readScope, waitForEpoch, executeAtEpoch, mutation); + return new AccordInteropReadRepair(txnId, readScope, executeAtEpoch, mutation); } @Override @@ -86,8 +85,7 @@ public long serializedSize(AccordInteropReadRepair repair, int version) { return CommandSerializers.txnId.serializedSize(repair.txnId, version) + KeySerializers.participants.serializedSize(repair.readScope, version) - + TypeSizes.sizeofUnsignedVInt(repair.waitForEpoch()) - + TypeSizes.sizeofUnsignedVInt(repair.executeAtEpoch - repair.waitForEpoch()) + + TypeSizes.sizeofUnsignedVInt(repair.executeAtEpoch) + Mutation.serializer.serializedSize(repair.mutation, version); } }; @@ -106,6 +104,8 @@ Object convertResponse(ReadOk ok) } } + private static final ExecuteOn EXECUTE_ON = new ExecuteOn(SaveStatus.ReadyToExecute, SaveStatus.Applied); + private final Mutation mutation; private static final IVersionedSerializer noop_data_serializer = new IVersionedSerializer() @@ -120,39 +120,39 @@ public void serialize(Data t, DataOutputPlus out, int version) throws IOExceptio public static final IVersionedSerializer replySerializer = new ReadDataSerializers.ReplySerializer<>(noop_data_serializer); - public AccordInteropReadRepair(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, Timestamp executeAt, Mutation mutation) + public AccordInteropReadRepair(Node.Id to, Topologies topologies, TxnId txnId, Participants readScope, long executeAtEpoch, Mutation mutation) { - super(to, topologies, txnId, readScope, executeAt); + super(to, topologies, txnId, readScope, executeAtEpoch); this.mutation = mutation; } - public AccordInteropReadRepair(TxnId txnId, Participants readScope, long executeAtEpoch, long waitForEpoch, Mutation mutation) + public AccordInteropReadRepair(TxnId txnId, Participants readScope, long executeAtEpoch, Mutation mutation) { // TODO (review): remove followup read - Is there anything left to be done for this or can I remove it? - super(txnId, readScope, executeAtEpoch, waitForEpoch); + super(txnId, readScope, executeAtEpoch); this.mutation = mutation; } @Override - protected AsyncChain execute(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) + protected ExecuteOn executeOn() { - // TODO (required): subtract unavailable ranges, either from read or from response (or on coordinator) - return AsyncChains.ofCallable(Verb.READ_REPAIR_REQ.stage.executor(), () -> { - ReadRepairVerbHandler.instance.applyMutation(mutation); - return Data.NOOP_DATA; - }); + return EXECUTE_ON; } @Override - protected boolean canExecutePreApplied() + public ReadType kind() { - return true; + return ReadType.readTxnData; } @Override - protected boolean executeIfObsoleted() + protected AsyncChain beginRead(SafeCommandStore safeStore, Timestamp executeAt, PartialTxn txn, Ranges unavailable) { - return true; + // TODO (required): subtract unavailable ranges, either from read or from response (or on coordinator) + return AsyncChains.ofCallable(Verb.READ_REPAIR_REQ.stage.executor(), () -> { + ReadRepairVerbHandler.instance.applyMutation(mutation); + return Data.NOOP_DATA; + }); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index d75d924ca5e1..102ffb57b923 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -22,6 +22,7 @@ import accord.api.Result; import accord.messages.Apply; +import accord.primitives.FullRoute; import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; @@ -68,11 +69,12 @@ public void serializeBody(A apply, DataOutputPlus out, int version) throws IOExc CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); CommandSerializers.nullablePartialTxn.serialize(apply.txn, out, version); + KeySerializers.nullableFullRoute.serialize(apply.fullRoute, out, version); CommandSerializers.writes.serialize(apply.writes, out, version); } protected abstract A deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, - Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result); + Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result); @Override public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException @@ -83,6 +85,7 @@ public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRout CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), CommandSerializers.nullablePartialTxn.deserialize(in, version), + KeySerializers.nullableFullRoute.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), CommandSerializers.APPLIED); } @@ -95,6 +98,7 @@ public long serializedBodySize(A apply, int version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + CommandSerializers.nullablePartialTxn.serializedSize(apply.txn, version) + + KeySerializers.nullableFullRoute.serializedSize(apply.fullRoute, version) + CommandSerializers.writes.serializedSize(apply.writes, version); } } @@ -103,9 +107,9 @@ public long serializedBodySize(A apply, int version) { @Override protected Apply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, - Timestamp executeAt, PartialDeps deps, PartialTxn txn, Writes writes, Result result) + Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result) { - return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, keys, executeAt, deps, txn, writes, result); + return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, keys, executeAt, deps, txn, fullRoute, writes, result); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index caefcec6f200..fbc3aeb22f55 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -46,6 +46,8 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.serializers.IVersionedWithKeysSerializer.AbstractWithKeysSerializer; +import org.apache.cassandra.service.accord.serializers.IVersionedWithKeysSerializer.NullableWithKeysSerializer; import org.apache.cassandra.service.accord.serializers.SmallEnumSerializer.NullableSmallEnumSerializer; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; @@ -140,7 +142,7 @@ public int serializedSize() } } - public static class PartialTxnSerializer implements IVersionedSerializer + public static class PartialTxnSerializer extends AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, PartialTxn> { private final IVersionedSerializer readSerializer; private final IVersionedSerializer querySerializer; @@ -155,10 +157,52 @@ public PartialTxnSerializer(IVersionedSerializer readSerializer, IVersione @Override public void serialize(PartialTxn txn, DataOutputPlus out, int version) throws IOException + { + KeySerializers.seekables.serialize(txn.keys(), out, version); + serializeWithoutKeys(txn, out, version); + } + + @Override + public PartialTxn deserialize(DataInputPlus in, int version) throws IOException + { + Seekables keys = KeySerializers.seekables.deserialize(in, version); + return deserializeWithoutKeys(keys, in, version); + } + + @Override + public long serializedSize(PartialTxn txn, int version) + { + long size = KeySerializers.seekables.serializedSize(txn.keys(), version); + size += serializedSizeWithoutKeys(txn, version); + return size; + } + + @Override + public void serialize(Seekables superset, PartialTxn txn, DataOutputPlus out, int version) throws IOException + { + serializeSubset(txn.keys(), superset, out); + serializeWithoutKeys(txn, out, version); + } + + @Override + public PartialTxn deserialize(Seekables superset, DataInputPlus in, int version) throws IOException + { + Seekables keys = deserializeSubset(superset, in); + return deserializeWithoutKeys(keys, in, version); + } + + @Override + public long serializedSize(Seekables superset, PartialTxn txn, int version) + { + long size = serializedSubsetSize(txn.keys(), superset); + size += serializedSizeWithoutKeys(txn, version); + return size; + } + + private void serializeWithoutKeys(PartialTxn txn, DataOutputPlus out, int version) throws IOException { CommandSerializers.kind.serialize(txn.kind(), out, version); KeySerializers.ranges.serialize(txn.covering(), out, version); - KeySerializers.seekables.serialize(txn.keys(), out, version); readSerializer.serialize(txn.read(), out, version); querySerializer.serialize(txn.query(), out, version); out.writeBoolean(txn.update() != null); @@ -166,24 +210,21 @@ public void serialize(PartialTxn txn, DataOutputPlus out, int version) throws IO updateSerializer.serialize(txn.update(), out, version); } - @Override - public PartialTxn deserialize(DataInputPlus in, int version) throws IOException + private PartialTxn deserializeWithoutKeys(Seekables keys, DataInputPlus in, int version) throws IOException { Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); Ranges covering = KeySerializers.ranges.deserialize(in, version); - Seekables keys = KeySerializers.seekables.deserialize(in, version); Read read = readSerializer.deserialize(in, version); Query query = querySerializer.deserialize(in, version); Update update = in.readBoolean() ? updateSerializer.deserialize(in, version) : null; return new PartialTxn.InMemory(covering, kind, keys, read, query, update); } - @Override - public long serializedSize(PartialTxn txn, int version) + + private long serializedSizeWithoutKeys(PartialTxn txn, int version) { long size = CommandSerializers.kind.serializedSize(txn.kind(), version); size += KeySerializers.ranges.serializedSize(txn.covering(), version); - size += KeySerializers.seekables.serializedSize(txn.keys(), version); size += readSerializer.serializedSize(txn.read(), version); size += querySerializer.serializedSize(txn.query(), version); size += TypeSizes.sizeof(txn.update() != null); @@ -197,8 +238,8 @@ public long serializedSize(PartialTxn txn, int version) private static final IVersionedSerializer query = new CastingSerializer<>(TxnQuery.class, TxnQuery.serializer); private static final IVersionedSerializer update = new CastingSerializer<>(AccordUpdate.class, AccordUpdate.serializer); - public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); - public static final IVersionedSerializer nullablePartialTxn = NullableSerializer.wrap(partialTxn); + public static final IVersionedWithKeysSerializer, PartialTxn> partialTxn = new PartialTxnSerializer(read, query, update); + public static final IVersionedWithKeysSerializer, PartialTxn> nullablePartialTxn = new NullableWithKeysSerializer<>(partialTxn); public static final EnumSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); public static final EnumSerializer status = new EnumSerializer<>(Status.class); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index 48ec6fae6c6d..d5144cbe8da1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -18,190 +18,832 @@ package org.apache.cassandra.service.accord.serializers; -import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; +import java.util.Arrays; -import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Ints; -import accord.impl.CommandTimeseries.CommandLoader; -import accord.local.Command; -import accord.local.SaveStatus; -import accord.primitives.PartialDeps; +import accord.api.Key; +import accord.impl.CommandsForKey; +import accord.impl.CommandsForKey.Info; +import accord.impl.CommandsForKey.InternalStatus; +import accord.impl.CommandsForKey.NoInfo; +import accord.local.Node; +import accord.primitives.Routable.Domain; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.db.marshal.ValueAccessor; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.LocalVersionedSerializer; -import org.apache.cassandra.io.util.DataInputBuffer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.AccordSerializerVersion; +import org.apache.cassandra.utils.vint.VIntCoding; + +import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; +import static accord.primitives.Txn.Kind.Read; +import static accord.primitives.Txn.Kind.Write; +import static accord.utils.ArrayBuffers.cachedInts; +import static accord.utils.ArrayBuffers.cachedTxnIds; +import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.EXTENDED; +import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.EXTENDED_BITS; +import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.RAW; +import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.RAW_BITS; +import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.STANDARD; +import static org.apache.cassandra.utils.ByteBufferUtil.readLeastSignificantBytes; +import static org.apache.cassandra.utils.ByteBufferUtil.writeLeastSignificantBytes; +import static org.apache.cassandra.utils.ByteBufferUtil.writeMostSignificantBytes; public class CommandsForKeySerializer { - @VisibleForTesting - public static final IVersionedSerializer> depsIdSerializer = new IVersionedSerializer>() + private static final int HAS_MISSING_DEPS_HEADER_BIT = 0x1; + private static final int HAS_EXECUTE_AT_HEADER_BIT = 0x2; + private static final int HAS_NON_STANDARD_FLAGS = 0x4; + + /** + * We read/write a fixed number of intial bytes for each command, with an initial flexible number of flag bits + * and the remainder interpreted as the HLC/epoch/node. + * + * The preamble encodes: + * vint32: number of commands + * vint32: number of unique node Ids + * [unique node ids] + * two flag bytes: + * bit 0 is set if there are any missing ids; + * bit 1 is set if there are any executeAt specified + * bit 2 is set if there are any queries present besides reads/writes + * bits 3-4 number of header bytes to read for each command + * bits 5-6: level 0 extra hlc bytes to read + * bits 7-8: level 1 extra hlc bytes to read (+ 1 + level 0) + * bits 9-10: level 2 extra hlc bytes to read (+ 1 + level 1) + * bits 12-13: level 3 extra hlc bytes to read (+ 1 + level 2) + * + * In order, for each command, we consume: + * 3 bits for the InternalStatus of the command + * 1 optional bit: if the status encodes an executeAt, indicating if the executeAt is not the TxnId + * 1 optional bit: if the status encodes any dependencies and there are non-zero missing ids, indicating if there are any missing for this command + * 1 or 2 bits for the kind of the TxnId: 0=key read, 1=key write, 2=exclusive sync point,3=read 16 bits + * 1 bit encoding if the epoch has changed + * 2 optional bits: if the prior bit is set, indicating how many bits should be read for the epoch increment: 0=none (increment by 1); 1=4, 2=8, 3=32 + * 4 option bits: if prior bits=01, epoch delta + * N node id bits (where 2^N unique node ids in the CFK) + * 2 bits indicating how many more payload bytes should be read, with mapping written in header + * all remaining bits are interpreted as a delta from the prior HLC + * + * if txnId kind flag is 3, read an additional 2 bytes for TxnId flag + * if epoch increment flag is 2 or 3, read additional 1 or 4 bytes for epoch delta + * if executeAt is expected, read vint32 for epoch, vint32 for delta from txnId hlc, and ceil(N/8) bytes for node id + * + * After writing all transactions, we then write out the missing txnid collections. This is written at the end + * so that on deserialization we have already read all of the TxnId. This also permits more efficient serialization, + * as we can encode a single bit stream with the optimal number of bits. + * TODO (desired): we could prefix this collection with the subset of TxnId that are actually missing from any other + * deps, so as to shrink this collection much further. + */ + // TODO (expected): offer filtering option that does not need to reconstruct objects/info, reusing prior encoding decisions + // TODO (expected): accept new redundantBefore on load to avoid deserializing stale data + // TODO (desired): determine timestamp resolution as a factor of 10 + public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { - @Override - public void serialize(List ids, DataOutputPlus out, int version) throws IOException + int commandCount = cfk.size(); + if (commandCount == 0) { - out.writeInt(ids.size()); - for (int i=0,mi=ids.size(); i deserialize(DataInputPlus in, int version) throws IOException + int[] nodeIds = cachedInts().getInts(Math.min(64, commandCount)); + try { - int size = in.readInt(); - List ids = new ArrayList<>(size); - for (int i=0; i= nodeIds.length) + { + nodeIdCount = compact(nodeIds); + if (nodeIdCount > nodeIds.length/2) + nodeIds = cachedInts().resize(nodeIds, nodeIds.length, nodeIds.length * 2); + } - @Override - public long serializedSize(List ids, int version) - { - long size = TypeSizes.INT_SIZE; - for (int i=0,mi=ids.size(); i> depsIdsLocalSerializer = new LocalVersionedSerializer<>(AccordSerializerVersion.CURRENT, AccordSerializerVersion.serializer, depsIdSerializer); + hasNonStandardFlags |= txnIdFlags(txnId) != STANDARD; + nodeIds[nodeIdCount++] = txnId.node.id; - public static final CommandLoader loader = new AccordCFKLoader(); - private static class AccordCFKLoader implements CommandLoader - { - private static final int HAS_DEPS = 0x01; - private static final int HAS_EXECUTE_AT = 0x02; + if (info.getClass() == NoInfo.class) + continue; - private static final long FIXED_SIZE; - private static final int FLAG_OFFSET; - private static final int STATUS_OFFSET; - private static final int TXNID_OFFSET; - private static final int EXECUTEAT_OFFSET; - private static final int DEPS_OFFSET; + missingIdCount += info.missing.length; - static - { - long size = 0; + if (info.executeAt == txnId) + continue; + + nodeIds[nodeIdCount++] = info.executeAt.node.id; + bitsPerExecuteAtEpochDelta = Math.max(bitsPerExecuteAtEpochDelta, numberOfBitsToRepresent(info.executeAt.epoch() - txnId.epoch())); + bitsPerExecuteAtHlcDelta = Math.max(bitsPerExecuteAtHlcDelta, numberOfBitsToRepresent(info.executeAt.hlc() - txnId.hlc())); + bitsPerExecuteAtFlags = Math.max(bitsPerExecuteAtFlags, numberOfBitsToRepresent(info.executeAt.flags())); + executeAtCount += 1; + } + nodeIdCount = compact(nodeIds); + Invariants.checkState(nodeIdCount > 0); + } + + // We can now use this information to calculate the fixed header size, compute the amount + // of additional space we'll need to store the TxnId and its basic info + int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); + int minHeaderBits = 7 + bitsPerNodeId + (hasNonStandardFlags ? 1 : 0); + int infoHeaderBits = (executeAtCount > 0 ? 1 : 0) + (missingIdCount > 0 ? 1 : 0); + int maxHeaderBits = minHeaderBits; + int totalBytes = 0; + + long prevEpoch = cfk.redundantBefore().epoch(); + long prevHlc = cfk.redundantBefore().hlc(); + int[] bytesHistogram = cachedInts().getInts(12); + Arrays.fill(bytesHistogram, 0); + for (int i = 0 ; i < commandCount ; ++i) + { + int headerBits = minHeaderBits; + int payloadBits = 0; + + TxnId txnId = cfk.txnId(i); + { + long epoch = txnId.epoch(); + Invariants.checkState(epoch >= prevEpoch); + long epochDelta = epoch - prevEpoch; + long hlc = txnId.hlc(); + long hlcDelta = hlc - prevHlc; + + if (epochDelta > 0) + { + if (hlcDelta < 0) + hlcDelta = -1 - hlcDelta; + + headerBits += 3; + if (epochDelta > 1) + { + if (epochDelta <= 0xf) headerBits += 4; + else if (epochDelta <= 0xff) totalBytes += 1; + else { totalBytes += 4; Invariants.checkState(epochDelta <= 0xffffffffL); } + } + } + + payloadBits += numberOfBitsToRepresent(hlcDelta); + prevEpoch = epoch; + prevHlc = hlc; + } + + if (hasNonStandardFlags && txnIdFlags(txnId) == RAW) + totalBytes += 2; + + Info info = cfk.info(i); + if (info.status.hasInfo) + headerBits += infoHeaderBits; + maxHeaderBits = Math.max(headerBits, maxHeaderBits); + int basicBytes = (headerBits + payloadBits + 7)/8; + bytesHistogram[basicBytes]++; + } + + int minBasicBytes = -1, maxBasicBytes = 0; + for (int i = 0 ; i < bytesHistogram.length ; ++i) + { + if (bytesHistogram[i] == 0) continue; + if (minBasicBytes == -1) minBasicBytes = i; + maxBasicBytes = i; + } + for (int i = minBasicBytes + 1 ; i <= maxBasicBytes ; ++i) + bytesHistogram[i] += bytesHistogram[i-1]; + + int flags = (missingIdCount > 0 ? HAS_MISSING_DEPS_HEADER_BIT : 0) + | (executeAtCount > 0 ? HAS_EXECUTE_AT_HEADER_BIT : 0) + | (hasNonStandardFlags ? HAS_NON_STANDARD_FLAGS : 0); + + int headerBytes = (maxHeaderBits+7)/8; + flags |= Invariants.checkArgument(headerBytes - 1, headerBytes <= 4) << 3; + + int hlcBytesLookup; + { // 2bits per size, first value may be zero and remainder may be increments of 1-4; + // only need to be able to encode a distribution of approx. 8 bytes at most, so + // pick lowest number we need first, then next lowest as 25th %ile while ensuring value of 1-4; + // then pick highest number we need, ensuring at least 2 greater than second (leaving room for third) + // then pick third number as 75th %ile, but at least 1 less than highest, and one more than second + // finally, ensure third then second are distributed so that there is no more than a gap of 4 between them and the next + int l0 = Math.max(0, Math.min(3, minBasicBytes - headerBytes)); + int l1 = Math.max(l0+1, Math.min(l0+4,Arrays.binarySearch(bytesHistogram, commandCount/4) - headerBytes)); + int l3 = Math.max(l1+2, maxBasicBytes - headerBytes); + int l2 = Math.max(l1+1, Math.min(l3-1, Arrays.binarySearch(bytesHistogram, (3*commandCount)/4) - headerBytes)); + while (l3-l2 > 4) ++l2; + while (l2-l1 > 4) ++l1; + hlcBytesLookup = setHlcBytes(l0, l1, l2, l3); + flags |= (l0 | ((l1-(1+l0))<<2) | ((l2-(1+l1))<<4) | ((l3-(1+l2))<<6)) << 5; + } + int hlcFlagLookup = hlcBytesLookupToHlcFlagLookup(hlcBytesLookup); + + totalBytes += bytesHistogram[minBasicBytes] * (headerBytes + getHlcBytes(hlcBytesLookup, getHlcFlag(hlcFlagLookup, minBasicBytes - headerBytes))); + for (int i = minBasicBytes + 1 ; i <= maxBasicBytes ; ++i) + totalBytes += (bytesHistogram[i] - bytesHistogram[i-1]) * (headerBytes + getHlcBytes(hlcBytesLookup, getHlcFlag(hlcFlagLookup, i - headerBytes))); + totalBytes += TypeSizes.sizeofUnsignedVInt(commandCount); + totalBytes += TypeSizes.sizeofUnsignedVInt(nodeIdCount); + totalBytes += TypeSizes.sizeofUnsignedVInt(nodeIds[0]); + for (int i = 1 ; i < nodeIdCount ; ++i) + totalBytes += TypeSizes.sizeofUnsignedVInt(nodeIds[i] - nodeIds[i-1]); + totalBytes += 2; + + Arrays.fill(bytesHistogram, minBasicBytes, maxBasicBytes + 1, 0); + cachedInts().forceDiscard(bytesHistogram); + + prevEpoch = cfk.redundantBefore().epoch(); + prevHlc = cfk.redundantBefore().hlc(); + // account for encoding redundantBefore + totalBytes += TypeSizes.sizeofUnsignedVInt(prevEpoch); + totalBytes += TypeSizes.sizeofUnsignedVInt(prevHlc); + totalBytes += 2; // flags TODO (expected): pack this along with uniqueIdBits, as usually zero bits should be needed + totalBytes += (bitsPerNodeId+7)/8; + + if (missingIdCount + executeAtCount > 0) + { + // account for encoding missing id stream + int missingIdBits = 1 + numberOfBitsToRepresent(commandCount); + int executeAtBits = bitsPerNodeId + + bitsPerExecuteAtEpochDelta + + bitsPerExecuteAtHlcDelta + + bitsPerExecuteAtFlags; + totalBytes += (missingIdBits * missingIdCount + executeAtBits * executeAtCount + 7)/8; + if (executeAtCount > 0) + totalBytes += 2; + } + + ByteBuffer out = ByteBuffer.allocate(totalBytes); + VIntCoding.writeUnsignedVInt32(commandCount, out); + VIntCoding.writeUnsignedVInt32(nodeIdCount, out); + VIntCoding.writeUnsignedVInt32(nodeIds[0], out); + for (int i = 1 ; i < nodeIdCount ; ++i) // TODO (desired): can encode more efficiently as a stream of N bit integers + VIntCoding.writeUnsignedVInt32(nodeIds[i] - nodeIds[i-1], out); + out.putShort((short)flags); + + VIntCoding.writeUnsignedVInt(prevEpoch, out); + VIntCoding.writeUnsignedVInt(prevHlc, out); + out.putShort((short) cfk.redundantBefore().flags()); + writeLeastSignificantBytes(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id), (bitsPerNodeId+7)/8, out); - FLAG_OFFSET = (int) size; - size += TypeSizes.BYTE_SIZE; + int executeAtMask = executeAtCount > 0 ? 1 : 0; + int missingDepsMask = missingIdCount > 0 ? 1 : 0; + int flagsIncrement = hasNonStandardFlags ? 2 : 1; + // TODO (desired): check this loop compiles correctly to only branch on epoch case, for binarySearch and flushing + for (int i = 0 ; i < commandCount ; ++i) + { + TxnId txnId = cfk.txnId(i); + Info info = cfk.info(i); + InternalStatus status = info.status; + + long bits = status.ordinal(); + int bitIndex = 3; + + int statusHasInfo = status.hasInfo ? 1 : 0; + long hasExecuteAt = info.executeAt != null & info.executeAt != txnId ? 1 : 0; + bits |= hasExecuteAt << bitIndex; + bitIndex += statusHasInfo & executeAtMask; + + long hasMissingIds = info.missing != CommandsForKey.NO_TXNIDS ? 1 : 0; + bits |= hasMissingIds << bitIndex; + bitIndex += statusHasInfo & missingDepsMask; + + long flagBits = txnIdFlagsBits(txnId); + boolean writeFullFlags = flagBits == RAW_BITS; + bits |= flagBits << bitIndex; + bitIndex += flagsIncrement; + + long hlcBits; + int extraEpochDeltaBytes = 0; + { + long epoch = txnId.epoch(); + long delta = epoch - prevEpoch; + long hlc = txnId.hlc(); + hlcBits = hlc - prevHlc; + if (delta == 0) + { + bitIndex++; + } + else + { + bits |= 1L << bitIndex++; + if (hlcBits < 0) + { + hlcBits = -1 - hlcBits; + bits |= 1L << bitIndex; + } + bitIndex++; + if (delta > 1) + { + if (delta <= 0xf) + { + bits |= 1L << bitIndex; + bits |= delta << (bitIndex + 2); + bitIndex += 4; + } + else + { + bits |= (delta <= 0xff ? 2L : 3L) << bitIndex; + extraEpochDeltaBytes = Ints.checkedCast(delta); + } + } + bitIndex += 2; + } + prevEpoch = epoch; + prevHlc = hlc; + } + + bits |= ((long)Arrays.binarySearch(nodeIds, 0, nodeIdCount, txnId.node.id)) << bitIndex; + bitIndex += bitsPerNodeId; + + bits |= hlcBits << (bitIndex + 2); + hlcBits >>>= 8*headerBytes - (bitIndex + 2); + int hlcFlag = getHlcFlag(hlcFlagLookup, (7 + numberOfBitsToRepresent(hlcBits))/8); + bits |= ((long)hlcFlag) << bitIndex; - STATUS_OFFSET = (int) size; - size += TypeSizes.BYTE_SIZE; + writeLeastSignificantBytes(bits, headerBytes, out); + writeLeastSignificantBytes(hlcBits, getHlcBytes(hlcBytesLookup, hlcFlag), out); - TXNID_OFFSET = (int) size; - size += CommandSerializers.txnId.serializedSize(); + if (writeFullFlags) + out.putShort((short)txnId.flags()); - FIXED_SIZE = size; + if (extraEpochDeltaBytes > 0) + { + if (extraEpochDeltaBytes <= 0xff) out.put((byte)extraEpochDeltaBytes); + else out.putInt(extraEpochDeltaBytes); + } + } + + if ((executeAtCount | missingIdCount) > 0) + { + int bitsPerCommandId = numberOfBitsToRepresent(commandCount); + int bitsPerMissingId = 1 + bitsPerCommandId; + int bitsPerExecuteAt = bitsPerExecuteAtEpochDelta + bitsPerExecuteAtHlcDelta + bitsPerExecuteAtFlags + bitsPerNodeId; + Invariants.checkState(bitsPerExecuteAtEpochDelta < 64); + Invariants.checkState(bitsPerExecuteAtHlcDelta <= 64); + Invariants.checkState(bitsPerExecuteAtFlags <= 16); + if (executeAtMask > 0) // we encode both 15 and 16 bits for flag length as 15 to fit in a short + out.putShort((short) ((bitsPerExecuteAtEpochDelta << 10) | ((bitsPerExecuteAtHlcDelta-1) << 4) | (Math.min(15, bitsPerExecuteAtFlags)))); + long buffer = 0L; + int bufferCount = 0; - EXECUTEAT_OFFSET = (int) size; - size += CommandSerializers.timestamp.serializedSize(); + for (int i = 0 ; i < commandCount ; ++i) + { + Info info = cfk.info(i); + if (info.getClass() == NoInfo.class) + continue; + + TxnId txnId = cfk.txnId(i); + if (info.executeAt != txnId) + { + Timestamp executeAt = info.executeAt; + int nodeIdx = Arrays.binarySearch(nodeIds, 0, nodeIdCount, executeAt.node.id); + if (bitsPerExecuteAt <= 64) + { + Invariants.checkState(executeAt.epoch() >= txnId.epoch()); + long executeAtBits = executeAt.epoch() - txnId.epoch(); + int offset = bitsPerExecuteAtEpochDelta; + executeAtBits |= (executeAt.hlc() - txnId.hlc()) << offset ; + offset += bitsPerExecuteAtHlcDelta; + executeAtBits |= ((long)executeAt.flags()) << offset; + offset += bitsPerExecuteAtFlags; + executeAtBits |= ((long)nodeIdx) << offset; + buffer = flushBits(buffer, bufferCount, executeAtBits, bitsPerExecuteAt, out); + bufferCount = (bufferCount + bitsPerExecuteAt) & 63; + } + else + { + buffer = flushBits(buffer, bufferCount, executeAt.epoch() - txnId.epoch(), bitsPerExecuteAtEpochDelta, out); + bufferCount = (bufferCount + bitsPerExecuteAtEpochDelta) & 63; + buffer = flushBits(buffer, bufferCount, executeAt.hlc() - txnId.hlc(), bitsPerExecuteAtHlcDelta, out); + bufferCount = (bufferCount + bitsPerExecuteAtHlcDelta) & 63; + buffer = flushBits(buffer, bufferCount, executeAt.flags(), bitsPerExecuteAtFlags, out); + bufferCount = (bufferCount + bitsPerExecuteAtFlags) & 63; + buffer = flushBits(buffer, bufferCount, nodeIdx, bitsPerNodeId, out); + bufferCount = (bufferCount + bitsPerNodeId) & 63; + } + } - DEPS_OFFSET = (int) size; + if (info.missing.length > 0) + { + int j = 0; + while (j < info.missing.length - 1) + { + int missingId = cfk.indexOf(info.missing[j++]); + buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); + bufferCount = (bufferCount + bitsPerMissingId) & 63; + } + int missingId = cfk.indexOf(info.missing[info.missing.length - 1]); + missingId |= 1L << bitsPerCommandId; + buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); + bufferCount = (bufferCount + bitsPerMissingId) & 63; + } + } + + writeMostSignificantBytes(buffer, (bufferCount + 7)/8, out); + } + + out.flip(); + return out; + } + finally + { + cachedInts().forceDiscard(nodeIds); } + } - private int serializedSize(Command command) + private static long flushBits(long buffer, int bufferCount, long add, int addCount, ByteBuffer out) + { + Invariants.checkArgument(addCount == 64 || 0 == (add & (-1L << addCount))); + int total = bufferCount + addCount; + if (total < 64) { - return (int) (FIXED_SIZE - + (command.executeAt() != null ? CommandSerializers.timestamp.serializedSize() : 0) - + (command.partialDeps() != null ? depsIdsLocalSerializer.serializedSize(command.partialDeps().txnIds()) : 0)); + return buffer | (add << 64 - total); } + else + { + buffer |= add >>> total - 64; + out.putLong(buffer); + return total == 64 ? 0 : (add << (128 - total)); + } + } - private static final ValueAccessor accessor = ByteBufferAccessor.instance; + public static CommandsForKey fromBytes(Key key, ByteBuffer in) + { + if (!in.hasRemaining()) + return null; - private static byte toByte(int v) + in = in.duplicate(); + int commandCount = VIntCoding.readUnsignedVInt32(in); + if (commandCount == 0) { - Invariants.checkArgument(v < Byte.MAX_VALUE, "Value %d is larger than %d", v, Byte.MAX_VALUE); - return (byte) v; + long epoch = VIntCoding.readUnsignedVInt(in); + long hlc = VIntCoding.readUnsignedVInt(in); + int flags = VIntCoding.readUnsignedVInt32(in); + Node.Id id = new Node.Id(VIntCoding.readUnsignedVInt32(in)); + return new CommandsForKey(key).withoutRedundant(TxnId.fromValues(epoch, hlc, flags, id)); } - private AccordCFKLoader() {} + TxnId[] txnIds = new TxnId[commandCount]; + Info[] infos = new Info[commandCount]; + int nodeIdCount = VIntCoding.readUnsignedVInt32(in); + int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); + long nodeIdMask = (1L << bitsPerNodeId) - 1; + Node.Id[] nodeIds = new Node.Id[nodeIdCount]; // TODO (expected): use a shared reusable scratch buffer + { + int prev = VIntCoding.readUnsignedVInt32(in); + nodeIds[0] = new Node.Id(prev); + for (int i = 1 ; i < nodeIdCount ; ++i) + nodeIds[i] = new Node.Id(prev += VIntCoding.readUnsignedVInt32(in)); + } - @Override - public ByteBuffer saveForCFK(Command command) + int missingDepsMasks, executeAtMasks, txnIdFlagsMask; + int headerByteCount, hlcBytesLookup; { - int flags = 0; + int flags = in.getShort(); + missingDepsMasks = 0 != (flags & HAS_MISSING_DEPS_HEADER_BIT) ? 1 : 0; + executeAtMasks = 0 != (flags & HAS_EXECUTE_AT_HEADER_BIT) ? 1 : 0; + txnIdFlagsMask = 0 != (flags & HAS_NON_STANDARD_FLAGS) ? 3 : 1; + headerByteCount = 1 + ((flags >>> 3) & 0x3); + hlcBytesLookup = setHlcByteDeltas((flags >>> 5) & 0x3, (flags >>> 7) & 0x3, (flags >>> 9) & 0x3, (flags >>> 11) & 0x3); + } + + long prevEpoch = VIntCoding.readUnsignedVInt32(in); + long prevHlc = VIntCoding.readUnsignedVInt32(in); + TxnId redundantBefore = TxnId.fromValues(prevEpoch, prevHlc, in.getShort(), + nodeIds[(int)readLeastSignificantBytes((bitsPerNodeId+7)/8, in)]); - PartialDeps deps = command.partialDeps(); - Timestamp executeAt = command.executeAt(); - if (deps != null) - flags |= HAS_DEPS; - if (executeAt != null) - flags |= HAS_EXECUTE_AT; - int size = serializedSize(command); - ByteBuffer buffer = accessor.allocate(size); - accessor.putByte(buffer, FLAG_OFFSET, toByte(flags)); - accessor.putByte(buffer, STATUS_OFFSET, toByte(command.saveStatus().ordinal())); - CommandSerializers.txnId.serialize(command.txnId(), buffer, accessor, TXNID_OFFSET); - if (executeAt != null) - CommandSerializers.timestamp.serialize(executeAt, buffer, accessor, EXECUTEAT_OFFSET); - if (deps != null) + for (int i = 0 ; i < commandCount ; ++i) + { + long header = readLeastSignificantBytes(headerByteCount, in); + header |= 1L << (8 * headerByteCount); // marker so we know where to shift-left most-significant bytes to + InternalStatus status = InternalStatus.get((int) (header & 0x7)); + header >>>= 3; + + int executeAtInfoOffset, missingDepsInfoOffset; { - ByteBuffer duplicate = buffer.duplicate(); - duplicate.position(executeAt != null ? DEPS_OFFSET : EXECUTEAT_OFFSET); - try (DataOutputBuffer out = new DataOutputBuffer(duplicate)) - { - depsIdsLocalSerializer.serialize(deps.txnIds(), out); - } - catch (IOException e) + int infoMask = status.hasInfo ? 1 : 0; + int executeAtMask = infoMask & executeAtMasks, missingDepsMask = infoMask & missingDepsMasks; + executeAtInfoOffset = ((int)header & executeAtMask) << 1; + header >>>= executeAtMask; + missingDepsInfoOffset = (int)header & missingDepsMask; + header >>>= missingDepsMask; + } + + Txn.Kind kind = TXN_ID_FLAG_BITS_KIND_LOOKUP[((int)header & txnIdFlagsMask)]; + header >>>= Integer.bitCount(txnIdFlagsMask); + + boolean hlcIsNegative = false; + long epoch = prevEpoch; + int readEpochBytes = 0; + { + boolean hasEpochDelta = (header & 1) == 1; + header >>>= 1; + if (hasEpochDelta) { - throw new RuntimeException(e); + hlcIsNegative = (header & 1) == 1; + header >>>= 1; + + int epochFlag = ((int)header & 0x3); + header >>>= 2; + switch (epochFlag) + { + default: throw new AssertionError("Unexpected value not 0-3"); + case 0: ++epoch; break; + case 1: epoch += (header & 0xf); header >>>= 4; break; + case 2: readEpochBytes = 1; break; + case 3: readEpochBytes = 4; break; + } } } - return buffer; + + Node.Id node = nodeIds[(int)(header & nodeIdMask)]; + header >>>= bitsPerNodeId; + + int readHlcBytes = getHlcBytes(hlcBytesLookup, (int)(header & 0x3)); + header >>>= 2; + + long hlc = header; + { + long highestBit = Long.highestOneBit(hlc); + hlc ^= highestBit; + int hlcShift = Long.numberOfTrailingZeros(highestBit); + hlc |= readLeastSignificantBytes(readHlcBytes, in) << hlcShift; + } + if (hlcIsNegative) + hlc = -1-hlc; + hlc += prevHlc; + + int flags = kind != null ? 0 : in.getShort(); + if (readEpochBytes > 0) + epoch += readEpochBytes == 1 ? (in.get() & 0xff) : in.getInt(); + + TxnId txnId = kind != null ? new TxnId(epoch, hlc, kind, Domain.Key, node) + : TxnId.fromValues(epoch, hlc, flags, node); + + txnIds[i] = txnId; + infos[i] = DECODE_INFOS[(executeAtInfoOffset | missingDepsInfoOffset)*STATUS_COUNT + status.ordinal()]; + + prevEpoch = epoch; + prevHlc = hlc; } - @Override - public TxnId txnId(ByteBuffer data) + if (executeAtMasks + missingDepsMasks > 0) { - return CommandSerializers.txnId.deserialize(data, accessor, TXNID_OFFSET); + TxnId[] missingIdBuffer = cachedTxnIds().get(8); + int missingIdCount = 0, maxIdBufferCount = 0; + int bitsPerTxnId = numberOfBitsToRepresent(commandCount); + int txnIdMask = (1 << bitsPerTxnId) - 1; + int bitsPerMissingId = bitsPerTxnId + 1; + + int decodeBits = executeAtMasks > 0 ? in.getShort() & 0xffff : 0; + int bitsPerEpochDelta = decodeBits >>> 10; + int bitsPerHlcDelta = 1 + ((decodeBits >>> 4) & 0x3f); + int bitsPerFlags = decodeBits & 0xf; + if (bitsPerFlags == 15) bitsPerFlags = 16; + int bitsPerExecuteAt = bitsPerEpochDelta + bitsPerHlcDelta + bitsPerFlags + bitsPerNodeId; + + long epochDeltaMask = bitsPerEpochDelta == 0 ? 0 : (-1L >>> (64 - bitsPerEpochDelta)); + long hlcDeltaMask = (-1L >>> (64 - bitsPerHlcDelta)); + long flagsMask = bitsPerFlags == 0 ? 0 : (-1L >>> (64 - bitsPerFlags)); + + final BitReader reader = new BitReader(); + + for (int i = 0 ; i < commandCount ; ++i) + { + Info info = infos[i]; + if (info.getClass() == NoInfo.class) + continue; + + TxnId txnId = txnIds[i]; + Timestamp executeAt = txnId; + if (info.executeAt == null) + { + long epoch, hlc; + int flags; + Node.Id id; + if (bitsPerExecuteAt <= 64) + { + long executeAtBits = reader.read(bitsPerExecuteAt, in); + epoch = txnId.epoch() + (executeAtBits & epochDeltaMask); + executeAtBits >>>= bitsPerEpochDelta; + hlc = txnId.hlc() + (executeAtBits & hlcDeltaMask); + executeAtBits >>>= bitsPerHlcDelta; + flags = (int)(executeAtBits & flagsMask); + executeAtBits >>>= bitsPerFlags; + id = nodeIds[(int)(executeAtBits & nodeIdMask)]; + } + else + { + epoch = txnId.epoch() + reader.read(bitsPerEpochDelta, in); + hlc = txnId.hlc() + reader.read(bitsPerHlcDelta, in); + flags = (int) reader.read(bitsPerFlags, in); + id = nodeIds[(int)(reader.read(bitsPerNodeId, in))]; + } + executeAt = Timestamp.fromValues(epoch, hlc, flags, id); + } + + TxnId[] missing = info.missing; + if (missing == null) + { + int prev = -1; + while (true) + { + if (missingIdCount == missingIdBuffer.length) + missingIdBuffer = cachedTxnIds().resize(missingIdBuffer, missingIdCount, missingIdCount * 2); + + int next = (int) reader.read(bitsPerMissingId, in); + Invariants.checkState(next > prev); + missingIdBuffer[missingIdCount++] = txnIds[next & txnIdMask]; + if (next >= commandCount) + break; // finished this array + prev = next; + } + + missing = Arrays.copyOf(missingIdBuffer, missingIdCount); + maxIdBufferCount = missingIdCount; + missingIdCount = 0; + } + + infos[i] = Info.create(txnId, info.status, executeAt, missing); + } + + cachedTxnIds().forceDiscard(missingIdBuffer, maxIdBufferCount); } - @Override - public Timestamp executeAt(ByteBuffer data) + return CommandsForKey.SerializerSupport.create(key, redundantBefore, txnIds, infos); + } + + private static int getHlcBytes(int lookup, int index) + { + return (lookup >>> (index * 4)) & 0xf; + } + + private static int setHlcBytes(int value1, int value2, int value3, int value4) + { + return value1 | (value2 << 4) | (value3 << 8) | (value4 << 12); + } + + private static int setHlcByteDeltas(int value1, int value2, int value3, int value4) + { + value2 += 1 + value1; + value3 += 1 + value2; + value4 += 1 + value3; + return setHlcBytes(value1, value2, value3, value4); + } + + private static int getHlcFlag(int flagsLookup, int bytes) + { + return (flagsLookup >>> (bytes * 2)) & 0x3; + } + + private static int hlcBytesLookupToHlcFlagLookup(int bytesLookup) + { + int flagsLookup = 0; + int flagIndex = 0; + for (int bytesIndex = 0 ; bytesIndex < 4 ; bytesIndex++) { - byte flags = accessor.getByte(data, FLAG_OFFSET); - if ((flags & HAS_EXECUTE_AT) == 0) - return null; - return CommandSerializers.timestamp.deserialize(data, accessor, EXECUTEAT_OFFSET); + int flagLimit = getHlcBytes(bytesLookup, bytesIndex); + while (flagIndex <= flagLimit) + flagsLookup |= bytesIndex << (2 * flagIndex++); } + return flagsLookup; + } - @Override - public SaveStatus saveStatus(ByteBuffer data) + private static int compact(int[] buffer) + { + Arrays.sort(buffer); + int count = 0; + int j = 0; + while (j < buffer.length) { - return SaveStatus.values()[accessor.getByte(data, STATUS_OFFSET)]; + int prev; + buffer[count++] = prev = buffer[j]; + while (++j < buffer.length && buffer[j] == prev) {} } + return count; + } + + private static int numberOfBitsToRepresent(long value) + { + return 64 - Long.numberOfLeadingZeros(value); + } - @Override - public List depsIds(ByteBuffer data) + private static int numberOfBitsToRepresent(int value) + { + return 32 - Integer.numberOfLeadingZeros(value); + } + + static final class BitReader + { + private long bitBuffer; + private int bitCount; + + long read(int readCount, ByteBuffer in) { - byte flags = accessor.getByte(data, FLAG_OFFSET); - if ((flags & HAS_DEPS) == 0) - return null; - ByteBuffer buffer = data.duplicate(); - int offset = (flags & HAS_EXECUTE_AT) == 0 ? EXECUTEAT_OFFSET : DEPS_OFFSET; - buffer.position(data.position() + offset); - try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + long result = bitBuffer >>> (64 - readCount); + int remaining = bitCount - readCount; + if (remaining >= 0) { - return depsIdsLocalSerializer.deserialize(in); + bitBuffer <<= readCount; + bitCount = remaining; } - catch (IOException e) + else if (in.remaining() >= 8) { - throw new RuntimeException(e); + readCount -= bitCount; + bitBuffer = in.getLong(); + bitCount = 64 - readCount; + result |= (bitBuffer >>> bitCount); + bitBuffer <<= readCount; + } + else + { + readCount -= bitCount; + while (readCount > 8) + { + long next = in.get() & 0xff; + readCount -= 8; + result |= next << readCount; + } + long next = in.get() & 0xff; + bitCount = 8 - readCount; + result |= next >>> bitCount; + bitBuffer = next << (64 - bitCount); } + return result; } } + + enum TxnIdFlags + { + STANDARD, EXTENDED, RAW; + static final int EXTENDED_BITS = 0x2; + static final int RAW_BITS = 0x3; + } + + private static TxnIdFlags txnIdFlags(TxnId txnId) + { + if (txnId.flags() > Timestamp.IDENTITY_FLAGS || txnId.domain() != Domain.Key) + return RAW; + switch (txnId.kind()) + { + default: throw new AssertionError("Unhandled Kind: " + txnId.kind()); + case Read: + case Write: + return STANDARD; + case ExclusiveSyncPoint: + return EXTENDED; + case SyncPoint: + case LocalOnly: + case EphemeralRead: + return RAW; + } + } + + private static long txnIdFlagsBits(TxnId txnId) + { + switch (txnIdFlags(txnId)) + { + default: throw new AssertionError("Unhandled TxnIdFlag: " + txnIdFlags(txnId)); + case RAW: return RAW_BITS; + case EXTENDED: return EXTENDED_BITS; + case STANDARD: + return txnId.kind() == Read ? 0 : 1; + } + } + + private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { Read, Write, ExclusiveSyncPoint, null }; + private static final int STATUS_COUNT = InternalStatus.values().length; + private static final Info[] DECODE_INFOS = new Info[4 * STATUS_COUNT]; + static + { + for (InternalStatus status : InternalStatus.values()) + { + int ordinal = status.ordinal(); + DECODE_INFOS[ordinal] = status.asNoInfo; + DECODE_INFOS[STATUS_COUNT+ordinal] = Info.createMock(status, Timestamp.NONE, null); + DECODE_INFOS[2*STATUS_COUNT+ordinal] = Info.createMock(status, null, CommandsForKey.NO_TXNIDS); + DECODE_INFOS[3*STATUS_COUNT+ordinal] = Info.createMock(status, null, null); + } + } + } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index cd704d3db116..629da34f7716 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -28,6 +28,7 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Unseekables; @@ -60,29 +61,30 @@ public void serializeBody(C msg, DataOutputPlus out, int version) throws IOExcep kind.serialize(msg.kind, out, version); CommandSerializers.ballot.serialize(msg.ballot, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); - CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); - DepsSerializer.partialDeps.serialize(msg.partialDeps, out, version); + KeySerializers.seekables.serialize(msg.keys, out, version); + CommandSerializers.nullablePartialTxn.serialize(msg.keys, msg.partialTxn, out, version); + DepsSerializer.partialDeps.serialize(msg.keys, msg.partialDeps, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); serializeNullable(msg.readData, out, version, read); } protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, - @Nullable PartialTxn partialTxn, PartialDeps partialDeps, + Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read); @Override public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException { - return deserializeCommit(txnId, scope, waitForEpoch, - kind.deserialize(in, version), - CommandSerializers.ballot.deserialize(in, version), - CommandSerializers.timestamp.deserialize(in, version), - CommandSerializers.nullablePartialTxn.deserialize(in, version), - DepsSerializer.partialDeps.deserialize(in, version), - deserializeNullable(in, version, KeySerializers.fullRoute), - deserializeNullable(in, version, read) - ); + Commit.Kind kind = CommitSerializers.kind.deserialize(in, version); + Ballot ballot = CommandSerializers.ballot.deserialize(in, version); + Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); + Seekables keys = KeySerializers.seekables.deserialize(in, version); + PartialTxn txn = CommandSerializers.nullablePartialTxn.deserialize(keys, in, version); + PartialDeps deps = DepsSerializer.partialDeps.deserialize(keys, in, version); + FullRoute route = deserializeNullable(in, version, KeySerializers.fullRoute); + ReadData read = deserializeNullable(in, version, this.read); + return deserializeCommit(txnId, scope, waitForEpoch, kind, ballot, executeAt, keys, txn, deps, route, read); } @Override @@ -91,8 +93,9 @@ public long serializedBodySize(C msg, int version) return kind.serializedSize(msg.kind, version) + CommandSerializers.ballot.serializedSize(msg.ballot, version) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) - + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) - + DepsSerializer.partialDeps.serializedSize(msg.partialDeps, version) + + KeySerializers.seekables.serializedSize(msg.keys, version) + + CommandSerializers.nullablePartialTxn.serializedSize(msg.keys, msg.partialTxn, version) + + DepsSerializer.partialDeps.serializedSize(msg.keys, msg.partialDeps, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) + serializedNullableSize(msg.readData, version, read); } @@ -101,13 +104,13 @@ public long serializedBodySize(C msg, int version) public static final IVersionedSerializer request = new CommitSerializer(ReadData.class, ReadDataSerializers.readData) { @Override - protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); + return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); } }; - public static final IVersionedSerializer invalidate = new IVersionedSerializer() + public static final IVersionedSerializer invalidate = new IVersionedSerializer<>() { @Override public void serialize(Commit.Invalidate invalidate, DataOutputPlus out, int version) throws IOException diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index 9498bef0f2d4..0eba398ff95d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -28,6 +28,7 @@ import accord.primitives.Range; import accord.primitives.RangeDeps; import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -39,9 +40,10 @@ import static accord.primitives.KeyDeps.SerializerSupport.keysToTxnIdsCount; import static accord.primitives.RangeDeps.SerializerSupport.rangesToTxnIds; import static accord.primitives.RangeDeps.SerializerSupport.rangesToTxnIdsCount; +import static accord.primitives.Routable.Domain.Key; import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; -public abstract class DepsSerializer implements IVersionedSerializer +public abstract class DepsSerializer extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, D> { public static final DepsSerializer deps = new DepsSerializer() { @@ -69,23 +71,85 @@ public void serialize(PartialDeps partialDeps, DataOutputPlus out, int version) KeySerializers.ranges.serialize(partialDeps.covering, out, version); } + @Override + public void serialize(Seekables keys, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException + { + super.serialize(keys, partialDeps, out, version); + KeySerializers.ranges.serialize(partialDeps.covering, out, version); + } + @Override public long serializedSize(PartialDeps partialDeps, int version) { return super.serializedSize(partialDeps, version) + KeySerializers.ranges.serializedSize(partialDeps.covering, version); } + + @Override + public long serializedSize(Seekables keys, PartialDeps partialDeps, int version) + { + return super.serializedSize(keys, partialDeps, version) + + KeySerializers.ranges.serializedSize(partialDeps.covering, version); + } }; public static final IVersionedSerializer nullablePartialDeps = NullableSerializer.wrap(partialDeps); + abstract D deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException; + @Override public void serialize(D deps, DataOutputPlus out, int version) throws IOException + { + KeySerializers.keys.serialize(deps.keyDeps.keys(), out, version); + serializeWithoutKeys(deps, out, version); + } + + @Override + public void serialize(Seekables keys, D deps, DataOutputPlus out, int version) throws IOException + { + if (keys.domain() == Key) serializeSubset(deps.keyDeps.keys(), keys, out); + else KeySerializers.keys.serialize(deps.keyDeps.keys(), out, version); + serializeWithoutKeys(deps, out, version); + } + + @Override + public D deserialize(DataInputPlus in, int version) throws IOException + { + Keys keys = KeySerializers.keys.deserialize(in, version); + return deserializeWithoutKeys(keys, in, version); + } + + @Override + public D deserialize(Seekables superset, DataInputPlus in, int version) throws IOException + { + Keys keys; + if (superset.domain() == Key) keys = (Keys)deserializeSubset(superset, in); + else keys = KeySerializers.keys.deserialize(in, version); + return deserializeWithoutKeys(keys, in, version); + } + + @Override + public long serializedSize(D deps, int version) + { + long size = KeySerializers.keys.serializedSize(deps.keyDeps.keys(), version); + size += serializedSizeWithoutKeys(deps, version); + return size; + } + + @Override + public long serializedSize(Seekables keys, D deps, int version) + { + long size; + if (keys.domain() == Key) size = serializedSubsetSize(deps.keyDeps.keys(), keys); + else size = KeySerializers.keys.serializedSize(deps.keyDeps.keys(), version); + size += serializedSizeWithoutKeys(deps, version); + return size; + } + + private void serializeWithoutKeys(D deps, DataOutputPlus out, int version) throws IOException { KeyDeps keyDeps = deps.keyDeps; { - KeySerializers.keys.serialize(keyDeps.keys(), out, version); - int txnIdCount = keyDeps.txnIdCount(); out.writeUnsignedVInt32(txnIdCount); for (int i = 0; i < txnIdCount; i++) @@ -116,13 +180,10 @@ public void serialize(D deps, DataOutputPlus out, int version) throws IOExceptio } } - @Override - public D deserialize(DataInputPlus in, int version) throws IOException + private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throws IOException { KeyDeps keyDeps; { - Keys keys = KeySerializers.keys.deserialize(in, version); - int txnIdCount = in.readUnsignedVInt32(); TxnId[] txnIds = new TxnId[txnIdCount]; for (int i = 0; i < txnIdCount; i++) @@ -159,17 +220,12 @@ public D deserialize(DataInputPlus in, int version) throws IOException return deserialize(keyDeps, rangeDeps, in, version); } - abstract D deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException; - - @Override - public long serializedSize(D deps, int version) + private long serializedSizeWithoutKeys(D deps, int version) { long size = 0L; KeyDeps keyDeps = deps.keyDeps; { - size += KeySerializers.keys.serializedSize(keyDeps.keys(), version); - int txnIdCount = keyDeps.txnIdCount(); size += sizeofUnsignedVInt(txnIdCount); for (int i = 0; i < txnIdCount; i++) @@ -201,4 +257,5 @@ public long serializedSize(D deps, int version) return size; } + } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 61e60f3dbd2a..370f88b73d8f 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -30,7 +30,7 @@ import accord.local.Status.Known; import accord.messages.CheckStatus; import accord.messages.Propagate; -import accord.messages.ReadData; +import accord.messages.ReadData.CommitOrReadNack; import accord.messages.ReadData.ReadReply; import accord.primitives.Ballot; import accord.primitives.PartialDeps; @@ -40,7 +40,6 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; -import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -60,14 +59,11 @@ public class FetchSerializers @Override public void serialize(FetchRequest request, DataOutputPlus out, int version) throws IOException { - Invariants.checkArgument(request.txnId.epoch() == request.executeAt.epoch()); - - out.writeUnsignedVInt(request.waitForEpoch()); + out.writeUnsignedVInt(request.executeAtEpoch); CommandSerializers.txnId.serialize(request.txnId, out, version); KeySerializers.ranges.serialize((Ranges) request.readScope, out, version); DepsSerializer.partialDeps.serialize(request.partialDeps, out, version); StreamingTxn.serializer.serialize(request.read, out, version); - out.writeBoolean(request.collectMaxApplied); } @Override @@ -77,25 +73,23 @@ public FetchRequest deserialize(DataInputPlus in, int version) throws IOExceptio CommandSerializers.txnId.deserialize(in, version), KeySerializers.ranges.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), - StreamingTxn.serializer.deserialize(in, version), - in.readBoolean()); + StreamingTxn.serializer.deserialize(in, version)); } @Override public long serializedSize(FetchRequest request, int version) { - return TypeSizes.sizeofUnsignedVInt(request.waitForEpoch()) + return TypeSizes.sizeofUnsignedVInt(request.executeAtEpoch) + CommandSerializers.txnId.serializedSize(request.txnId, version) + KeySerializers.ranges.serializedSize((Ranges) request.readScope, version) + DepsSerializer.partialDeps.serializedSize(request.partialDeps, version) - + StreamingTxn.serializer.serializedSize(request.read, version) - + TypeSizes.BYTE_SIZE; + + StreamingTxn.serializer.serializedSize(request.read, version); } }; public static final IVersionedSerializer reply = new IVersionedSerializer() { - final ReadData.CommitOrReadNack[] nacks = ReadData.CommitOrReadNack.values(); + final CommitOrReadNack[] nacks = CommitOrReadNack.values(); final IVersionedSerializer streamDataSerializer = new CastingSerializer<>(StreamData.class, StreamData.serializer); @Override @@ -103,7 +97,7 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I { if (!reply.isOk()) { - out.writeByte(1 + ((ReadData.CommitOrReadNack) reply).ordinal()); + out.writeByte(1 + ((CommitOrReadNack) reply).ordinal()); return; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java new file mode 100644 index 000000000000..44d202d716b5 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.GetEphemeralReadDeps; +import accord.messages.GetEphemeralReadDeps.GetEphemeralReadDepsOk; +import accord.primitives.PartialDeps; +import accord.primitives.PartialRoute; +import accord.primitives.Seekables; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class GetEphmrlReadDepsSerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + { + @Override + public void serializeBody(GetEphemeralReadDeps msg, DataOutputPlus out, int version) throws IOException + { + KeySerializers.seekables.serialize(msg.keys, out, version); + out.writeUnsignedVInt(msg.executionEpoch); + } + + @Override + public GetEphemeralReadDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + { + Seekables keys = KeySerializers.seekables.deserialize(in, version); + long executionEpoch = in.readUnsignedVInt(); + return GetEphemeralReadDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, keys, executionEpoch); + } + + @Override + public long serializedBodySize(GetEphemeralReadDeps msg, int version) + { + return KeySerializers.seekables.serializedSize(msg.keys, version) + + TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(GetEphemeralReadDepsOk reply, DataOutputPlus out, int version) throws IOException + { + DepsSerializer.partialDeps.serialize(reply.deps, out, version); + out.writeUnsignedVInt(reply.latestEpoch); + } + + @Override + public GetEphemeralReadDepsOk deserialize(DataInputPlus in, int version) throws IOException + { + PartialDeps deps = DepsSerializer.partialDeps.deserialize(in, version); + long latestEpoch = in.readUnsignedVInt(); + return new GetEphemeralReadDepsOk(deps, latestEpoch); + } + + @Override + public long serializedSize(GetEphemeralReadDepsOk reply, int version) + { + return DepsSerializer.partialDeps.serializedSize(reply.deps, version) + + TypeSizes.sizeofUnsignedVInt(reply.latestEpoch); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java new file mode 100644 index 000000000000..d50e6993b9c5 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.messages.GetMaxConflict; +import accord.messages.GetMaxConflict.GetMaxConflictOk; +import accord.primitives.PartialRoute; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +public class GetMaxConflictSerializers +{ + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + { + @Override + public void serializeBody(GetMaxConflict msg, DataOutputPlus out, int version) throws IOException + { + KeySerializers.seekables.serialize(msg.keys, out, version); + out.writeUnsignedVInt(msg.executionEpoch); + } + + @Override + public GetMaxConflict deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + { + Seekables keys = KeySerializers.seekables.deserialize(in, version); + long executionEpoch = in.readUnsignedVInt(); + return GetMaxConflict.SerializationSupport.create(scope, waitForEpoch, minEpoch, keys, executionEpoch); + } + + @Override + public long serializedBodySize(GetMaxConflict msg, int version) + { + return KeySerializers.seekables.serializedSize(msg.keys, version) + + TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); + } + }; + + public static final IVersionedSerializer reply = new IVersionedSerializer() + { + @Override + public void serialize(GetMaxConflictOk reply, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.timestamp.serialize(reply.maxConflict, out, version); + out.writeUnsignedVInt(reply.latestEpoch); + } + + @Override + public GetMaxConflictOk deserialize(DataInputPlus in, int version) throws IOException + { + Timestamp maxConflict = CommandSerializers.timestamp.deserialize(in, version); + long latestEpoch = in.readUnsignedVInt(); + return new GetMaxConflictOk(maxConflict, latestEpoch); + } + + @Override + public long serializedSize(GetMaxConflictOk reply, int version) + { + return CommandSerializers.timestamp.serializedSize(reply.maxConflict, version) + + TypeSizes.sizeofUnsignedVInt(reply.latestEpoch); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java new file mode 100644 index 000000000000..ef4b6d420201 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.Key; +import accord.primitives.AbstractKeys; +import accord.primitives.Keys; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.RoutableKey; +import accord.primitives.Routables; +import accord.primitives.Seekables; +import net.nicoulaj.compilecommand.annotations.DontInline; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; + +import static accord.utils.SortedArrays.Search.FAST; + +/** + * De/serialize a structure that can refer to a known superset of RoutingKeys/Keys/Ranges... + */ +public interface IVersionedWithKeysSerializer, T> extends IVersionedSerializer +{ + /** + * Serialize the specified type into the specified DataOutputStream instance. + * + * @param t type that needs to be serialized + * @param out DataOutput into which serialization needs to happen. + * @param version protocol version + * @throws IOException if serialization fails + */ + void serialize(K keys, T t, DataOutputPlus out, int version) throws IOException; + + /** + * Deserialize into the specified DataInputStream instance. + * @param in DataInput from which deserialization needs to happen. + * @param version protocol version + * @return the type that was deserialized + * @throws IOException if deserialization fails + */ + T deserialize(K keys, DataInputPlus in, int version) throws IOException; + + /** + * Calculate serialized size of object without actually serializing. + * @param t object to calculate serialized size + * @param version protocol version + * @return serialized size of object t + */ + long serializedSize(K keys, T t, int version); + + final class NullableWithKeysSerializer, T> implements IVersionedWithKeysSerializer + { + final IVersionedWithKeysSerializer wrapped; + public NullableWithKeysSerializer(IVersionedWithKeysSerializer wrapped) + { + this.wrapped = wrapped; + } + + @Override + public void serialize(T t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t == null ? 0 : 1); + if (t != null) wrapped.serialize(t, out, version); + } + + @Override + public T deserialize(DataInputPlus in, int version) throws IOException + { + if (in.readByte() == 0) return null; + return wrapped.deserialize(in, version); + } + + @Override + public long serializedSize(T t, int version) + { + return t == null ? 1 : 1 + wrapped.serializedSize(t, version); + } + + @Override + public void serialize(K keys, T t, DataOutputPlus out, int version) throws IOException + { + out.writeByte(t == null ? 0 : 1); + if (t != null) wrapped.serialize(keys, t, out, version); + } + + @Override + public T deserialize(K keys, DataInputPlus in, int version) throws IOException + { + if (in.readByte() == 0) return null; + return wrapped.deserialize(keys, in, version); + } + + @Override + public long serializedSize(K keys, T t, int version) + { + return t == null ? 1 : 1 + wrapped.serializedSize(keys, t, version); + } + } + + abstract class AbstractWithKeysSerializer + { + /** + * If both ends have a pre-shared superset of the columns we are serializing, we can send them much + * more efficiently. Both ends must provide the identically same set of columns. + */ + protected void serializeSubset(Seekables serialize, Seekables superset, DataOutputPlus out) throws IOException + { + /** + * We weight this towards small sets, and sets where the majority of items are present, since + * we expect this to mostly be used for serializing result sets. + * + * For supersets with fewer than 64 columns, we encode a bitmap of *missing* columns, + * which equates to a zero (single byte) when all columns are present, and otherwise + * a positive integer that can typically be vint encoded efficiently. + * + * If we have 64 or more columns, we cannot neatly perform a bitmap encoding, so we just switch + * to a vint encoded set of deltas, either adding or subtracting (whichever is most efficient). + * We indicate this switch by sending our bitmap with every bit set, i.e. -1L + */ + int serializeCount = serialize.size(); + int supersetCount = superset.size(); + if (serializeCount == supersetCount) + { + out.writeUnsignedVInt(0L); + } + else if (supersetCount < 64) + { + switch (serialize.domain()) + { + default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + case Key: + out.writeUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + break; + case Range: + out.writeUnsignedVInt(encodeBitmap((Ranges)serialize, (Ranges)superset, supersetCount)); + break; + } + } + else + { + switch (serialize.domain()) + { + default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + case Key: + serializeLargeSubset((Keys)serialize, serializeCount, (Keys)superset, supersetCount, out); + break; + case Range: + serializeLargeSubset((Ranges)serialize, serializeCount, (Ranges)superset, supersetCount, out); + break; + } + } + } + + public long serializedSubsetSize(Seekables serialize, Seekables superset) + { + int columnCount = serialize.size(); + int supersetCount = superset.size(); + if (columnCount == supersetCount) + { + return TypeSizes.sizeofUnsignedVInt(0); + } + else if (supersetCount < 64) + { + switch (serialize.domain()) + { + default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + case Key: + return TypeSizes.sizeofUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + case Range: + return TypeSizes.sizeofUnsignedVInt(encodeBitmap((Ranges)serialize, (Ranges)superset, supersetCount)); + } + } + else + { + switch (serialize.domain()) + { + default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + case Key: + return serializeLargeSubsetSize((Keys)serialize, columnCount, (Keys)superset, supersetCount); + case Range: + return serializeLargeSubsetSize((Ranges)serialize, columnCount, (Ranges)superset, supersetCount); + } + } + } + + public Seekables deserializeSubset(Seekables superset, DataInputPlus in) throws IOException + { + long encoded = in.readUnsignedVInt(); + int supersetCount = superset.size(); + if (encoded == 0L) + { + return superset; + } + else if (supersetCount >= 64) + { + return deserializeLargeSubset(in, superset, supersetCount, (int) encoded); + } + else + { + encoded ^= -1L >>> (64 - supersetCount); + int deserializeCount = Long.bitCount(encoded); + switch (superset.domain()) + { + default: throw new AssertionError("Unhandled domain: " + superset.domain()); + case Key: + { + Keys keys = (Keys)superset; + Key[] out = new Key[deserializeCount]; + int count = 0; + while (encoded != 0) + { + long lowestBit = Long.lowestOneBit(encoded); + out[count++] = keys.get(Long.numberOfTrailingZeros(lowestBit)); + encoded ^= lowestBit; + } + return Keys.ofSortedUnique(out); + } + case Range: + { + Ranges ranges = (Ranges)superset; + Range[] out = new Range[deserializeCount]; + int count = 0; + while (encoded != 0) + { + long lowestBit = Long.lowestOneBit(encoded); + out[count++] = ranges.get(Long.numberOfTrailingZeros(lowestBit)); + encoded ^= lowestBit; + } + return Ranges.ofSortedAndDeoverlapped(out); + } + } + } + } + + // encodes a 1 bit for every *missing* column, on the assumption presence is more common, + // and because this is consistent with encoding 0 to represent all present + private static long encodeBitmap(AbstractKeys serialize, AbstractKeys superset, int supersetCount) + { + // the index we would encounter next if all columns are present + long bitmap = superset.foldl(serialize, (k, p1, v, i) -> { + return v | (1L << i); + }, 0L, 0L, -1L); + bitmap ^= -1L >>> (64 - supersetCount); + return bitmap; + } + + private static long encodeBitmap(Ranges serialize, Ranges superset, int supersetCount) + { + // the index we would encounter next if all columns are present + long bitmap = superset.foldl(serialize, (k, p1, v, i) -> { + return v | (1L << i); + }, 0L, 0L, -1L); + bitmap ^= -1L >>> (64 - supersetCount); + return bitmap; + } + + @DontInline + private void serializeLargeSubset(AbstractKeys serialize, int serializeCount, AbstractKeys superset, int supersetCount, DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(supersetCount - serializeCount); + int serializeIndex = 0, supersetIndex = 0; + while (serializeIndex < serializeCount) + { + int prevSupersetIndex = supersetIndex; + int nextSupersetIndex; + do + { + nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); + if (supersetIndex + 1 != nextSupersetIndex) + break; + supersetIndex++; + } + while (serializeIndex < serializeCount); + + out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); + out.writeUnsignedVInt32(nextSupersetIndex - supersetIndex); + supersetIndex = nextSupersetIndex; + } + } + + @DontInline + private void serializeLargeSubset(Ranges serialize, int serializeCount, Ranges superset, int supersetCount, DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(supersetCount - serializeCount); + int serializeIndex = 0, supersetIndex = 0; + while (serializeIndex < serializeCount) + { + int prevSupersetIndex = supersetIndex; + int nextSupersetIndex; + do + { + nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); + if (supersetIndex + 1 != nextSupersetIndex) + break; + supersetIndex++; + } + while (serializeIndex < serializeCount); + + out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); + out.writeUnsignedVInt32(nextSupersetIndex - supersetIndex); + supersetIndex = nextSupersetIndex; + } + } + + @DontInline + private Seekables deserializeLargeSubset(DataInputPlus in, Seekables superset, int supersetCount, int delta) throws IOException + { + int deserializeCount = supersetCount - delta; + switch (superset.domain()) + { + default: throw new AssertionError("Unhandled domain: " + superset.domain()); + case Key: + { + Keys keys = (Keys)superset; + Key[] out = new Key[deserializeCount]; + int supersetIndex = 0; + int count = 0; + while (count < deserializeCount) + { + int takeCount = in.readUnsignedVInt32(); + while (takeCount-- > 0) out[count++] = keys.get(supersetIndex++); + supersetIndex += in.readUnsignedVInt32(); + } + return Keys.ofSortedUnique(out); + } + case Range: + { + Ranges ranges = (Ranges)superset; + Range[] out = new Range[deserializeCount]; + int supersetIndex = 0; + int count = 0; + while (count < deserializeCount) + { + int takeCount = in.readUnsignedVInt32(); + while (takeCount-- > 0) out[count++] = ranges.get(supersetIndex++); + supersetIndex += in.readUnsignedVInt32(); + } + return Ranges.ofSortedAndDeoverlapped(out); + } + } + } + + @DontInline + private long serializeLargeSubsetSize(AbstractKeys serialize, int serializeCount, AbstractKeys superset, int supersetCount) + { + long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); + int serializeIndex = 0, supersetIndex = 0; + while (serializeIndex < serializeCount) + { + int prevSupersetIndex = supersetIndex; + int nextSupersetIndex; + do + { + nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); + if (supersetIndex + 1 != nextSupersetIndex) + break; + supersetIndex++; + } + while (serializeIndex < serializeCount); + + size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); + size += TypeSizes.sizeofUnsignedVInt(nextSupersetIndex - supersetIndex); + supersetIndex = nextSupersetIndex; + } + return size; + } + + @DontInline + private long serializeLargeSubsetSize(Ranges serialize, int serializeCount, Ranges superset, int supersetCount) + { + long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); + int serializeIndex = 0, supersetIndex = 0; + while (serializeIndex < serializeCount) + { + int prevSupersetIndex = supersetIndex; + int nextSupersetIndex; + do + { + nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); + if (supersetIndex + 1 != nextSupersetIndex) + break; + supersetIndex++; + } + while (serializeIndex < serializeCount); + + size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); + size += TypeSizes.sizeofUnsignedVInt(nextSupersetIndex - supersetIndex); + supersetIndex = nextSupersetIndex; + } + return size; + } + } + +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 8b102ae13dd1..6d2e86e8bbff 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -211,6 +211,8 @@ public long serializedSize(FullRangeRoute ranges, int version) EnumSet.of(UnseekablesKind.FullKeyRoute, UnseekablesKind.FullRangeRoute) ); + public static final IVersionedSerializer> nullableFullRoute = NullableSerializer.wrap(fullRoute); + public static final IVersionedSerializer> unseekables = new AbstractRoutablesSerializer<>( EnumSet.allOf(UnseekablesKind.class) ); @@ -352,6 +354,8 @@ public long serializedSize(Seekables t, int version) } }; + public static final IVersionedSerializer> nullableSeekables = NullableSerializer.wrap(seekables); + public static abstract class AbstractKeysSerializer> implements IVersionedSerializer { final IVersionedSerializer keySerializer; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java index f649a3b32520..150c5af3940b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java @@ -20,32 +20,23 @@ import java.io.IOException; -import accord.impl.CommandsForKey; import accord.local.Command; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.CommandsForRanges; -import org.apache.cassandra.service.accord.api.PartitionKey; public class ListenerSerializers { public enum Kind { - COMMAND, COMMANDS_FOR_KEY, COMMANDS_FOR_RANGE; + COMMAND; private static Kind of(Command.DurableAndIdempotentListener listener) { if (listener instanceof Command.ProxyListener) return COMMAND; - if (listener instanceof CommandsForKey.Listener) - return COMMANDS_FOR_KEY; - - if (listener instanceof CommandsForRanges.Listener) - return COMMANDS_FOR_RANGE; - throw new IllegalArgumentException("Unsupported listener type: " + listener.getClass().getName()); } } @@ -72,48 +63,6 @@ public long serializedSize(Command.ProxyListener listener, int version) } }; - private static final IVersionedSerializer cfrListener = new IVersionedSerializer() - { - @Override - public void serialize(CommandsForRanges.Listener listener, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(listener.txnId, out, version); - } - - @Override - public CommandsForRanges.Listener deserialize(DataInputPlus in, int version) throws IOException - { - return new CommandsForRanges.Listener(CommandSerializers.txnId.deserialize(in, version)); - } - - @Override - public long serializedSize(CommandsForRanges.Listener listener, int version) - { - return CommandSerializers.txnId.serializedSize(listener.txnId, version); - } - }; - - private static final IVersionedSerializer cfkListener = new IVersionedSerializer() - { - @Override - public void serialize(CommandsForKey.Listener listener, DataOutputPlus out, int version) throws IOException - { - PartitionKey.serializer.serialize((PartitionKey) listener.key(), out, version); - } - - @Override - public CommandsForKey.Listener deserialize(DataInputPlus in, int version) throws IOException - { - return CommandsForKey.SerializerSupport.listener(PartitionKey.serializer.deserialize(in, version)); - } - - @Override - public long serializedSize(CommandsForKey.Listener listener, int version) - { - return PartitionKey.serializer.serializedSize((PartitionKey) listener.key(), version); - } - }; - public static final IVersionedSerializer listener = new IVersionedSerializer() { @Override @@ -126,12 +75,6 @@ public void serialize(Command.DurableAndIdempotentListener listener, DataOutputP case COMMAND: commandListener.serialize((Command.ProxyListener) listener, out, version); break; - case COMMANDS_FOR_KEY: - cfkListener.serialize((CommandsForKey.Listener) listener, out, version); - break; - case COMMANDS_FOR_RANGE: - cfrListener.serialize((CommandsForRanges.Listener) listener, out, version); - break; default: throw new IllegalArgumentException(); } @@ -145,10 +88,6 @@ public Command.DurableAndIdempotentListener deserialize(DataInputPlus in, int ve { case COMMAND: return commandListener.deserialize(in, version); - case COMMANDS_FOR_KEY: - return cfkListener.deserialize(in, version); - case COMMANDS_FOR_RANGE: - return cfrListener.deserialize(in, version); default: throw new IllegalArgumentException(); } @@ -164,12 +103,6 @@ public long serializedSize(Command.DurableAndIdempotentListener listener, int ve case COMMAND: size += commandListener.serializedSize((Command.ProxyListener) listener, version); break; - case COMMANDS_FOR_KEY: - size += cfkListener.serializedSize((CommandsForKey.Listener) listener, version); - break; - case COMMANDS_FOR_RANGE: - size += cfrListener.serializedSize((CommandsForRanges.Listener) listener, version); - break; default: throw new IllegalArgumentException(); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index cfae34db4f6b..1b48a535549a 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -27,11 +27,14 @@ import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; import accord.messages.ReadData.ReadType; +import accord.messages.ReadEphemeralTxnData; import accord.messages.ReadTxnData; import accord.messages.WaitUntilApplied; +import accord.primitives.FullRoute; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; import accord.primitives.Participants; import accord.primitives.Ranges; -import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -74,15 +77,17 @@ public long serializedSize(ReadData t, int version) public static class ApplyThenWaitUntilAppliedSerializer implements ReadDataSerializer { @Override - public void serialize(ApplyThenWaitUntilApplied applyThenWaitUntilApplied, DataOutputPlus out, int version) throws IOException + public void serialize(ApplyThenWaitUntilApplied msg, DataOutputPlus out, int version) throws IOException { - CommandSerializers.txnId.serialize(applyThenWaitUntilApplied.txnId, out, version); - KeySerializers.partialRoute.serialize(applyThenWaitUntilApplied.route, out, version); - DepsSerializer.partialDeps.serialize(applyThenWaitUntilApplied.deps, out, version); - KeySerializers.seekables.serialize(applyThenWaitUntilApplied.partialTxnKeys, out, version); - CommandSerializers.writes.serialize(applyThenWaitUntilApplied.writes, out, version); - TxnResult.serializer.serialize((TxnResult) applyThenWaitUntilApplied.txnResult, out, version); - out.writeBoolean(applyThenWaitUntilApplied.notifyAgent); + CommandSerializers.txnId.serialize(msg.txnId, out, version); + KeySerializers.participants.serialize(msg.readScope, out, version); + out.writeUnsignedVInt(msg.executeAtEpoch); + KeySerializers.fullRoute.serialize(msg.route, out, version); + CommandSerializers.partialTxn.serialize(msg.txn, out, version); + DepsSerializer.partialDeps.serialize(msg.deps, out, version); + CommandSerializers.writes.serialize(msg.writes, out, version); + TxnResult.serializer.serialize((TxnResult) msg.result, out, version); + KeySerializers.nullableSeekables.serialize(msg.notify, out, version); } @Override @@ -90,24 +95,28 @@ public ApplyThenWaitUntilApplied deserialize(DataInputPlus in, int version) thro { return ApplyThenWaitUntilApplied.SerializerSupport.create( CommandSerializers.txnId.deserialize(in, version), - KeySerializers.partialRoute.deserialize(in, version), + KeySerializers.participants.deserialize(in, version), + in.readUnsignedVInt(), + KeySerializers.fullRoute.deserialize(in, version), + CommandSerializers.partialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), - KeySerializers.seekables.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), TxnResult.serializer.deserialize(in, version), - in.readBoolean()); + KeySerializers.nullableSeekables.deserialize(in, version)); } @Override - public long serializedSize(ApplyThenWaitUntilApplied applyThenWaitUntilApplied, int version) + public long serializedSize(ApplyThenWaitUntilApplied msg, int version) { - return CommandSerializers.txnId.serializedSize(applyThenWaitUntilApplied.txnId, version) - + KeySerializers.partialRoute.serializedSize(applyThenWaitUntilApplied.route, version) - + DepsSerializer.partialDeps.serializedSize(applyThenWaitUntilApplied.deps, version) - + KeySerializers.seekables.serializedSize(applyThenWaitUntilApplied.partialTxnKeys, version) - + CommandSerializers.writes.serializedSize(applyThenWaitUntilApplied.writes, version) - + TxnResult.serializer.serializedSize((TxnData)applyThenWaitUntilApplied.txnResult, version) - + sizeof(applyThenWaitUntilApplied.notifyAgent); + return CommandSerializers.txnId.serializedSize(msg.txnId, version) + + KeySerializers.participants.serializedSize(msg.readScope, version) + + TypeSizes.sizeofUnsignedVInt(msg.executeAtEpoch) + + KeySerializers.fullRoute.serializedSize(msg.route, version) + + CommandSerializers.partialTxn.serializedSize(msg.txn, version) + + DepsSerializer.partialDeps.serializedSize(msg.deps, version) + + CommandSerializers.writes.serializedSize(msg.writes, version) + + TxnResult.serializer.serializedSize((TxnData)msg.result, version) + + KeySerializers.nullableSeekables.serializedSize(msg.notify, version); } } @@ -118,8 +127,7 @@ public void serialize(ReadTxnData read, DataOutputPlus out, int version) throws { CommandSerializers.txnId.serialize(read.txnId, out, version); KeySerializers.participants.serialize(read.readScope, out, version); - out.writeUnsignedVInt(read.waitForEpoch()); - out.writeUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + out.writeUnsignedVInt(read.executeAtEpoch); } @Override @@ -127,9 +135,8 @@ public ReadTxnData deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Participants readScope = KeySerializers.participants.deserialize(in, version); - long waitForEpoch = in.readUnsignedVInt(); - long executeAtEpoch = in.readUnsignedVInt() + waitForEpoch; - return ReadTxnData.SerializerSupport.create(txnId, readScope, executeAtEpoch, waitForEpoch); + long executeAtEpoch = in.readUnsignedVInt(); + return ReadTxnData.SerializerSupport.create(txnId, readScope, executeAtEpoch); } @Override @@ -137,8 +144,44 @@ public long serializedSize(ReadTxnData read, int version) { return CommandSerializers.txnId.serializedSize(read.txnId, version) + KeySerializers.participants.serializedSize(read.readScope, version) - + TypeSizes.sizeofUnsignedVInt(read.waitForEpoch()) - + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch - read.waitForEpoch()); + + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch); + } + }; + + private static final ReadDataSerializer readEphemeralTxnData = new ReadDataSerializer() + { + @Override + public void serialize(ReadEphemeralTxnData read, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(read.txnId, out, version); + KeySerializers.participants.serialize(read.readScope, out, version); + out.writeUnsignedVInt(read.executeAtEpoch); + CommandSerializers.partialTxn.serialize(read.partialTxn, out, version); + DepsSerializer.partialDeps.serialize(read.partialDeps, out, version); + KeySerializers.fullRoute.serialize(read.route, out, version); + } + + @Override + public ReadEphemeralTxnData deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Participants readScope = KeySerializers.participants.deserialize(in, version); + long executeAtEpoch = in.readUnsignedVInt(); + PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); + PartialDeps partialDeps = DepsSerializer.partialDeps.deserialize(in, version); + FullRoute route = KeySerializers.fullRoute.deserialize(in, version); + return ReadEphemeralTxnData.SerializerSupport.create(txnId, readScope, executeAtEpoch, partialTxn, partialDeps, route); + } + + @Override + public long serializedSize(ReadEphemeralTxnData read, int version) + { + return CommandSerializers.txnId.serializedSize(read.txnId, version) + + KeySerializers.participants.serializedSize(read.readScope, version) + + TypeSizes.sizeofUnsignedVInt(read.executeAtEpoch) + + CommandSerializers.partialTxn.serializedSize(read.partialTxn, version) + + DepsSerializer.partialDeps.serializedSize(read.partialDeps, version) + + KeySerializers.fullRoute.serializedSize(read.route, version); } }; @@ -160,6 +203,8 @@ private static ReadDataSerializer serializerFor(ReadType type) { case readTxnData: return readTxnData; + case readDataWithoutTimestamp: + return readEphemeralTxnData; case applyThenWaitUntilApplied: return applyThenWaitUntilApplied; case waitUntilApplied: @@ -171,7 +216,7 @@ private static ReadDataSerializer serializerFor(ReadType type) public static final class ReplySerializer implements IVersionedSerializer { - // TODO (now): use something other than ordinal + // TODO (expected): use something other than ordinal final CommitOrReadNack[] nacks = CommitOrReadNack.values(); private final IVersionedSerializer dataSerializer; @@ -185,26 +230,33 @@ public void serialize(ReadReply reply, DataOutputPlus out, int version) throws I { if (!reply.isOk()) { - out.writeByte(1 + ((CommitOrReadNack) reply).ordinal()); + out.writeByte(2 + ((CommitOrReadNack) reply).ordinal()); return; } - out.writeByte(0); + boolean isFutureEpochOk = reply.getClass() == ReadData.ReadOkWithFutureEpoch.class; + out.writeByte(isFutureEpochOk ? 1 : 0); ReadOk readOk = (ReadOk) reply; serializeNullable(readOk.unavailable, out, version, KeySerializers.ranges); dataSerializer.serialize((D) readOk.data, out, version); + if (isFutureEpochOk) + out.writeUnsignedVInt(((ReadData.ReadOkWithFutureEpoch) reply).futureEpoch); } @Override public ReadReply deserialize(DataInputPlus in, int version) throws IOException { int id = in.readByte(); - if (id != 0) - return nacks[id - 1]; + if (id > 1) + return nacks[id - 2]; - Ranges ranges = deserializeNullable(in, version, KeySerializers.ranges); + Ranges unavailable = deserializeNullable(in, version, KeySerializers.ranges); D data = dataSerializer.deserialize(in, version); - return new ReadOk(ranges, data); + if (id == 0) + return new ReadOk(unavailable, data); + + long futureEpoch = in.readUnsignedVInt(); + return new ReadData.ReadOkWithFutureEpoch(unavailable, data, futureEpoch); } @Override @@ -214,9 +266,12 @@ public long serializedSize(ReadReply reply, int version) return TypeSizes.BYTE_SIZE; ReadOk readOk = (ReadOk) reply; - return TypeSizes.BYTE_SIZE - + serializedNullableSize(readOk.unavailable, version, KeySerializers.ranges) - + dataSerializer.serializedSize((D) readOk.data, version); + long size = TypeSizes.BYTE_SIZE + + serializedNullableSize(readOk.unavailable, version, KeySerializers.ranges) + + dataSerializer.serializedSize((D) readOk.data, version); + if (readOk instanceof ReadData.ReadOkWithFutureEpoch) + size += TypeSizes.sizeofUnsignedVInt(((ReadData.ReadOkWithFutureEpoch) readOk).futureEpoch); + return size; } } @@ -230,8 +285,7 @@ public void serialize(WaitUntilApplied waitUntilApplied, DataOutputPlus out, int { CommandSerializers.txnId.serialize(waitUntilApplied.txnId, out, version); KeySerializers.participants.serialize(waitUntilApplied.readScope, out, version); - out.writeUnsignedVInt(waitUntilApplied.waitForEpoch()); - CommandSerializers.timestamp.serialize(waitUntilApplied.executeAt, out , version); + out.writeUnsignedVInt(waitUntilApplied.executeAtEpoch); } @Override @@ -239,9 +293,8 @@ public WaitUntilApplied deserialize(DataInputPlus in, int version) throws IOExce { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Participants readScope = KeySerializers.participants.deserialize(in, version); - long waitForEpoch = in.readUnsignedVInt(); - Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); - return WaitUntilApplied.SerializerSupport.create(txnId, readScope, executeAt, waitForEpoch); + long executeAtEpoch = in.readUnsignedVInt(); + return WaitUntilApplied.SerializerSupport.create(txnId, readScope, executeAtEpoch); } @Override @@ -249,8 +302,7 @@ public long serializedSize(WaitUntilApplied waitUntilApplied, int version) { return CommandSerializers.txnId.serializedSize(waitUntilApplied.txnId, version) + KeySerializers.participants.serializedSize(waitUntilApplied.readScope, version) - + TypeSizes.sizeofUnsignedVInt(waitUntilApplied.waitForEpoch()) - + CommandSerializers.timestamp.serializedSize(waitUntilApplied.executeAt, version); + + TypeSizes.sizeofUnsignedVInt(waitUntilApplied.executeAtEpoch); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java index e9cabbbd1512..f42ff8687035 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java @@ -26,7 +26,6 @@ import accord.primitives.Seekables; import accord.primitives.SyncPoint; import accord.primitives.TxnId; -import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -59,19 +58,21 @@ public long serializedSize(SetShardDurable msg, int version) @Override public void serialize(SetGloballyDurable msg, DataOutputPlus out, int version) throws IOException { + CommandSerializers.txnId.serialize(msg.txnId, out, version); CommandStoreSerializers.durableBefore.serialize(msg.durableBefore, out, version); } @Override public SetGloballyDurable deserialize(DataInputPlus in, int version) throws IOException { - return new SetGloballyDurable(CommandStoreSerializers.durableBefore.deserialize(in, version)); + return new SetGloballyDurable(CommandSerializers.txnId.deserialize(in, version), CommandStoreSerializers.durableBefore.deserialize(in, version)); } @Override public long serializedSize(SetGloballyDurable msg, int version) { - return CommandStoreSerializers.durableBefore.serializedSize(msg.durableBefore, version); + return CommandSerializers.txnId.serializedSize(msg.txnId, version) + + CommandStoreSerializers.durableBefore.serializedSize(msg.durableBefore, version); } }; @@ -84,7 +85,6 @@ public void serialize(SyncPoint sp, DataOutputPlus out, int version) throws IOEx DepsSerializer.deps.serialize(sp.waitFor, out, version); KeySerializers.seekables.serialize(sp.keysOrRanges, out, version); KeySerializers.routingKey.serialize(sp.homeKey, out, version); - out.writeBoolean(sp.finishedAsync); } @Override @@ -94,8 +94,7 @@ public SyncPoint deserialize(DataInputPlus in, int version) throws IOException Deps waitFor = DepsSerializer.deps.deserialize(in, version); Seekables keysOrRanges = KeySerializers.seekables.deserialize(in, version); RoutingKey homeKey = KeySerializers.routingKey.deserialize(in, version); - boolean finishedAsync = in.readBoolean(); - return SyncPoint.SerializationSupport.construct(syncId, waitFor, keysOrRanges, homeKey, finishedAsync); + return SyncPoint.SerializationSupport.construct(syncId, waitFor, keysOrRanges, homeKey); } @Override @@ -104,8 +103,7 @@ public long serializedSize(SyncPoint sp, int version) return CommandSerializers.txnId.serializedSize(sp.syncId, version) + DepsSerializer.deps.serializedSize(sp.waitFor, version) + KeySerializers.seekables.serializedSize(sp.keysOrRanges, version) - + KeySerializers.routingKey.serializedSize(sp.homeKey, version) - + TypeSizes.sizeof(sp.finishedAsync); + + KeySerializers.routingKey.serializedSize(sp.homeKey, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 014a6b56c6a3..ec85d63f2c27 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -375,7 +375,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc - TimestampsForKey cfk = CommandsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (RoutableKey) key, executeAt, true); + TimestampsForKey cfk = CommandsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (RoutableKey) key, executeAt, true); long timestamp = AccordSafeTimestampsForKey.timestampMicrosFor(cfk, executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) int nowInSeconds = AccordSafeTimestampsForKey.nowInSecondsFor(cfk, executeAt, true); diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java index a1e90fd96551..e7d43b19ca70 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java @@ -211,7 +211,7 @@ public void repairPartition(DecoratedKey dk, Map mutations, R Future repairFuture; try { - Txn txn = new Txn.InMemory(key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); + Txn txn = new Txn.InMemory(Txn.Kind.Read, key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); repairFuture = Stage.ACCORD_MIGRATION.submit(() -> { try { diff --git a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java index a89a4f6381c6..e6983931184b 100644 --- a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java +++ b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java @@ -45,6 +45,7 @@ import java.util.UUID; import java.util.stream.Collectors; +import net.nicoulaj.compilecommand.annotations.DontInline; import net.nicoulaj.compilecommand.annotations.Inline; import org.apache.cassandra.db.TypeSizes; @@ -1014,6 +1015,104 @@ public static ByteBuffer serialized(IVersionedSerializer serializer, T va } } + public static void writeLeastSignificantBytes(long register, int bytes, ByteBuffer out) + { + writeMostSignificantBytesSlow(register << ((8 - bytes)*8), bytes, out); + } + + public static void writeMostSignificantBytes(long register, int bytes, ByteBuffer out) + { + int position = out.position(); + int limit = out.limit(); + if (limit - position < Long.BYTES) + { + writeMostSignificantBytesSlow(register, bytes, out); + } + else + { + out.putLong(position, register); + out.position(position + bytes); + } + } + + @DontInline + private static void writeMostSignificantBytesSlow(long register, int bytes, ByteBuffer out) + { + switch (bytes) + { + case 0: + break; + case 1: + out.put((byte)(register >>> 56)); + break; + case 2: + out.putShort((short)(register >> 48)); + break; + case 3: + out.putShort((short)(register >> 48)); + out.put((byte)(register >> 40)); + break; + case 4: + out.putInt((int)(register >> 32)); + break; + case 5: + out.putInt((int)(register >> 32)); + out.put((byte)(register >> 24)); + break; + case 6: + out.putInt((int)(register >> 32)); + out.putShort((short)(register >> 16)); + break; + case 7: + out.putInt((int)(register >> 32)); + out.putShort((short)(register >> 16)); + out.put((byte)(register >> 8)); + break; + case 8: + out.putLong(register); + break; + default: + throw new IllegalArgumentException(); + } + } + + public static long readLeastSignificantBytes(int bytes, ByteBuffer in) + { + if (bytes == 0) + return 0L; + + int position = in.position(); + int limit = in.limit(); + if (limit - position < Long.BYTES) + { + return readLeastSignificantBytesSlow(bytes, in); + } + else + { + long result = in.getLong(position); + in.position(position + bytes); + return result >>> (64 - 8*bytes); + } + } + + @DontInline + private static long readLeastSignificantBytesSlow(int bytes, ByteBuffer out) + { + switch (bytes) + { + case 0: return 0; + case 1: return out.get() & 0xffL; + case 2: return out.getShort() & 0xffffL; + case 3: return ((out.getShort() & 0xffffL) << 8) | (out.get() & 0xffL); + case 4: return out.getInt() & 0xffffffffL; + case 5: return ((out.getInt() & 0xffffffffL) << 8) | (out.get() & 0xffL); + case 6: return ((out.getInt() & 0xffffffffL) << 16) | (out.getShort() & 0xffffL); + case 7: return ((out.getInt() & 0xffffffffL) << 24) | ((out.getShort() & 0xffffL) << 8) | (out.get() & 0xffL); + case 8: return out.getLong(); + default: throw new IllegalArgumentException(); + } + } + public static final IVersionedSerializer byteBufferSerializer = new IVersionedSerializer() { @Override diff --git a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java index c2bb51004914..8b52bb41e463 100644 --- a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java +++ b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java @@ -49,8 +49,10 @@ import java.io.DataInput; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import io.netty.util.concurrent.FastThreadLocal; +import net.nicoulaj.compilecommand.annotations.DontInline; import net.nicoulaj.compilecommand.annotations.Inline; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.io.util.DataInputPlus; @@ -109,6 +111,50 @@ public static long readUnsignedVInt(DataInput input) throws IOException return retval; } + @DontInline + private static long readUnsignedVIntSlow(ByteBuffer in, byte firstByte) + { + int size = numberOfExtraBytesToRead(firstByte); + long retval = firstByte & firstByteValueMask(size); + for (int ii = 0; ii < size; ii++) + { + byte b = in.get(); + retval <<= 8; + retval |= b & 0xff; + } + + return retval; + } + + public static long readUnsignedVInt(ByteBuffer in) + { + byte firstByte = in.get(); + if (firstByte >= 0) + return firstByte; + + + int position = in.position(); + int limit = in.limit(); + if (limit - position < 8) + return readUnsignedVIntSlow(in, firstByte); + + int extraBytes = VIntCoding.numberOfExtraBytesToRead(firstByte); + int extraBits = extraBytes * 8; + + long retval = in.getLong(position); + if (in.order() == ByteOrder.LITTLE_ENDIAN) + retval = Long.reverseBytes(retval); + in.position(position + extraBytes); + + // truncate the bytes we read in excess of those we needed + retval >>>= 64 - extraBits; + // remove the non-value bits from the first byte + firstByte &= VIntCoding.firstByteValueMask(extraBytes); + // shift the first byte up to its correct position + retval |= (long) firstByte << extraBits; + return retval; + } + public static void skipUnsignedVInt(DataInputPlus input) throws IOException { int firstByte = input.readByte(); @@ -271,6 +317,19 @@ public static int readUnsignedVInt32(DataInput input) throws IOException return checkedCast(readUnsignedVInt(input)); } + /** + * Read up to a 32-bit integer. + * + * This method assumes the original integer was written using {@link #writeUnsignedVInt32(int, DataOutputPlus)} + * or similar that doesn't zigzag encodes the vint. + * + * @throws VIntOutOfRangeException If the vint doesn't fit into a 32-bit integer + */ + public static int readUnsignedVInt32(ByteBuffer input) + { + return checkedCast(readUnsignedVInt(input)); + } + // & this with the first byte to give the value part for a given extraBytesToRead encoded in the byte public static int firstByteValueMask(int extraBytesToRead) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index d11d364181ce..f91cdecfaae5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -155,7 +155,7 @@ public static void setupClass() throws IOException StorageService.instance.setPartitionerUnsafe(partitioner); ServerTestUtils.prepareServerNoRegister(); minToken = partitioner.getMinimumToken(); - maxToken = partitioner.getMaximumToken(); + maxToken = partitioner.getMaximumTokenForSplitting(); midToken = partitioner.midpoint(minToken, maxToken); upperMidToken = partitioner.midpoint(midToken, maxToken); lowerMidToken = partitioner.midpoint(minToken, midToken); diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java index c50a1b442852..7cc4104c796b 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/StrictSerializabilityValidator.java @@ -32,7 +32,7 @@ public class StrictSerializabilityValidator implements HistoryValidator public StrictSerializabilityValidator(int[] primaryKeys) { - this.verifier = new StrictSerializabilityVerifier(primaryKeys.length); + this.verifier = new StrictSerializabilityVerifier("", primaryKeys.length); pkToIndex = new IntIntHashMap(primaryKeys.length); indexToPk = new int[primaryKeys.length]; for (int i = 0; i < primaryKeys.length; i++) diff --git a/test/unit/org/apache/cassandra/db/DiskBoundaryManagerTest.java b/test/unit/org/apache/cassandra/db/DiskBoundaryManagerTest.java index 0b25f19d99c4..606bdf33deac 100644 --- a/test/unit/org/apache/cassandra/db/DiskBoundaryManagerTest.java +++ b/test/unit/org/apache/cassandra/db/DiskBoundaryManagerTest.java @@ -186,7 +186,7 @@ public void testGetDataDirectoriesForFiles() SSTableReader disk1Boundary = MockSchema.sstable(gen++, (long)sstableFirstDisk1.getTokenValue(), (long)tokens.get(0).getTokenValue(), 0, mock); SSTableReader disk2Full = MockSchema.sstable(gen++, (long)tokens.get(0).nextValidToken().getTokenValue(), (long)tokens.get(1).getTokenValue(), 0, mock); - SSTableReader disk3Full = MockSchema.sstable(gen++, (long)tokens.get(1).nextValidToken().getTokenValue(), (long)partitioner.getMaximumToken().getTokenValue(), 0, mock); + SSTableReader disk3Full = MockSchema.sstable(gen++, (long)tokens.get(1).nextValidToken().getTokenValue(), (long)partitioner.getMaximumTokenForSplitting().getTokenValue(), 0, mock); Assert.assertEquals(tableDirs, mock.getDirectoriesForFiles(ImmutableSet.of())); Assert.assertEquals(Lists.newArrayList(tableDirs.get(0)), mock.getDirectoriesForFiles(ImmutableSet.of(containedDisk1))); diff --git a/test/unit/org/apache/cassandra/db/compaction/CancelCompactionsTest.java b/test/unit/org/apache/cassandra/db/compaction/CancelCompactionsTest.java index 757ad1c08670..3c792442e4c1 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CancelCompactionsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CancelCompactionsTest.java @@ -464,7 +464,7 @@ public void testSubrangeCompactionWith2i() throws Throwable try (LifecycleTransaction txn = idx.getTracker().tryModify(idx.getLiveSSTables(), OperationType.COMPACTION)) { IPartitioner partitioner = getCurrentColumnFamilyStore().getPartitioner(); - getCurrentColumnFamilyStore().forceCompactionForTokenRange(Collections.singleton(new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken()))); + getCurrentColumnFamilyStore().forceCompactionForTokenRange(Collections.singleton(new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting()))); } } diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 7765b9c1ad6d..95c495a35016 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -29,7 +29,6 @@ import java.util.stream.Collectors; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import org.junit.Before; import org.junit.BeforeClass; @@ -37,8 +36,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.Key; import accord.api.Result; +import accord.impl.CommandsForKey; import accord.local.CheckedCommands; +import accord.local.Command; import accord.local.CommandStore; import accord.local.DurableBefore; import accord.local.RedundantBefore; @@ -85,10 +87,13 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import static accord.impl.TimestampsForKey.NO_LAST_EXECUTED_HLC; +import static accord.local.KeyHistory.COMMANDS; import static accord.local.PreLoadContext.contextFor; import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.Util.spinAssertEquals; @@ -123,8 +128,7 @@ public class CompactionAccordIteratorsTest static ColumnFamilyStore commands; static ColumnFamilyStore timestampsForKey; - static ColumnFamilyStore depsCommandsForKey; - static ColumnFamilyStore allCommandsForKey; + static ColumnFamilyStore commandsForKey; static TableMetadata table; static FullRoute route; Random random; @@ -144,17 +148,14 @@ public static void beforeClass() throws Throwable parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); StorageService.instance.initServer(); - commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS); + commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS); commands.disableAutoCompaction(); timestampsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY); timestampsForKey.disableAutoCompaction(); - depsCommandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, DEPS_COMMANDS_FOR_KEY); - depsCommandsForKey.disableAutoCompaction(); - - allCommandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, ALL_COMMANDS_FOR_KEY); - allCommandsForKey.disableAutoCompaction(); + commandsForKey = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); + commandsForKey.disableAutoCompaction(); table = ColumnFamilyStore.getIfExists("ks", "tbl").metadata(); route = AccordTestUtils.keys(table, 42).toRoute(AccordTestUtils.key(table, 42).toUnseekable()); @@ -209,7 +210,7 @@ private void testAccordCommandsPurger(RedundantBefore redundantBefore, DurableBe { testWithCommandStore((commandStore) -> { IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, durableBefore); - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, COMMANDS); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS); List result = compactCFS(mockAccordService, cfs); expectedResult.accept(result); }, false); @@ -249,7 +250,6 @@ private static Consumer> expectedAccordTimestampsForKeyNoChange( Partition partition = partitions.get(0); Row row = partition.getRow(Clustering.EMPTY); - assertEquals(SECOND_TXN_ID, TimestampsForKeyRows.getMaxTimestamp(row)); assertEquals(TXN_ID, TimestampsForKeyRows.getLastExecutedTimestamp(row)); assertEquals(TXN_ID, TimestampsForKeyRows.getLastWriteTimestamp(row)); @@ -264,27 +264,17 @@ private static Consumer> expectedAccordCommandsForKeyNoChange() return partitions -> { assertEquals(1, partitions.size()); Partition partition = partitions.get(0); - assertEquals(2, Iterators.size(partition.unfilteredIterator())); - UnfilteredRowIterator rows = partition.unfilteredIterator(); - // One row per txn per series - for (TxnId txnId : TXN_IDS) - assertEquals(txnId, DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); + PartitionKey partitionKey = new PartitionKey(partition.metadata().id, partition.partitionKey()); + CommandsForKey cfk = CommandsForKeysAccessor.getCommandsForKey(partitionKey, ((Row)partition.unfilteredIterator().next())); + assertEquals(TXN_IDS.length, cfk.size()); + for (int i = 0 ; i < TXN_IDS.length ; ++i) + assertEquals(TXN_IDS[i], cfk.txnId(i)); }; } private static Consumer> expectedAccordTimestampsForKeyEraseOne() { - return partitions -> { - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - Row row = partition.getRow(Clustering.EMPTY); - // Only expect one column to remain because the second transaction is a read - assertEquals(1, Iterables.size(row)); - assertEquals(SECOND_TXN_ID, AccordKeyspace.TimestampsForKeyRows.getMaxTimestamp(row)); - assertNull(AccordKeyspace.TimestampsForKeyRows.getLastExecutedTimestamp(row)); - assertNull(AccordKeyspace.TimestampsForKeyRows.getLastWriteTimestamp(row)); - assertEquals(NO_LAST_EXECUTED_HLC, AccordKeyspace.TimestampsForKeyRows.getLastExecutedMicros(row)); - }; + return partitions -> assertEquals(0, partitions.size()); } private static Consumer> expectedAccordCommandsForKeyEraseOne() @@ -294,7 +284,7 @@ private static Consumer> expectedAccordCommandsForKeyEraseOne() Partition partition = partitions.get(0); assertEquals(1, Iterators.size(partition.unfilteredIterator())); UnfilteredRowIterator rows = partition.unfilteredIterator(); - assertEquals(TXN_IDS[1], DepsCommandsForKeysAccessor.getTimestamp((Row)rows.next())); +// assertEquals(TXN_IDS[1], CommandsForKeysAccessor.getTimestamp((Row)rows.next())); }; } @@ -322,7 +312,7 @@ private void testAccordCommandsForKeyPurger(RedundantBefore redundantBefore, Con { testWithCommandStore((commandStore) -> { IAccordService mockAccordService = mockAccordService(commandStore, redundantBefore, DurableBefore.EMPTY); - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, DEPS_COMMANDS_FOR_KEY); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(ACCORD_KEYSPACE_NAME, COMMANDS_FOR_KEY); List result = compactCFS(mockAccordService, cfs); expectedResult.accept(result); }, true); @@ -447,8 +437,7 @@ private static void flush(AccordCommandStore commandStore) }); commands.forceBlockingFlush(FlushReason.UNIT_TESTS); timestampsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); - depsCommandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); - allCommandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); + commandsForKey.forceBlockingFlush(FlushReason.UNIT_TESTS); } private void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable @@ -457,62 +446,68 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC clock.set(CLOCK_START); AccordCommandStore commandStore = AccordTestUtils.createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); TxnId[] txnIds = additionalCommand ? TXN_IDS : new TxnId[] {TXN_ID}; + Txn writeTxn = AccordTestUtils.createWriteTxn(42); + Txn readTxn = AccordTestUtils.createTxn(42); + Seekable key = writeTxn.keys().get(0); for (TxnId txnId : txnIds) { - Txn txn = txnId.kind().isWrite() ? AccordTestUtils.createWriteTxn(42) : AccordTestUtils.createTxn(42); - Seekable key = txn.keys().get(0); + Txn txn = txnId.kind().isWrite() ? writeTxn : readTxn; PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); commandStore.appendToJournal(preAccept); CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { Accept accept = Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, txnId, partialTxn.keys(), partialDeps); commandStore.appendToJournal(accept); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, txnId, partialTxn, partialDeps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, txnId, partialTxn.keys(), partialTxn, partialDeps, route, null); commandStore.appendToJournal(commit); CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, txnId, partialDeps); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys()), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); Apply apply = - Apply.SerializationSupport.create(txnId, partialRoute, txnId.epoch(), Apply.Kind.Minimal, partialTxn.keys(), txnId, partialDeps, partialTxn, result.left, result.right); + Apply.SerializationSupport.create(txnId, partialRoute, txnId.epoch(), Apply.Kind.Minimal, partialTxn.keys(), txnId, partialDeps, partialTxn, null, result.left, result.right); commandStore.appendToJournal(apply); CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right); }).beginAsResult()); + getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { + safe.get(txnId, txnId, route).addListener(new Command.ProxyListener(txnId)); // add a junk listener just to test it in compaction + }).beginAsResult()); flush(commandStore); // The apply chain is asychronous, so it is easiest to just spin until it is applied // in order to have the updated state in the system table spinAssertEquals(true, 5, () -> - getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys()), safe -> safe.get(txnId, route.homeKey()).current().hasBeen(Status.Applied) + getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys(), COMMANDS), safe -> safe.get(txnId, route.homeKey()).current().hasBeen(Status.Applied) ).beginAsResult())); flush(commandStore); } - UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS + ";"); + UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + AccordKeyspace.COMMANDS + ";"); logger.info(commandsTable.toStringUnsafe()); assertEquals(txnIds.length, commandsTable.size()); Iterator commandsTableIterator = commandsTable.iterator(); for (TxnId txnId : txnIds) assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); - UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + DEPS_COMMANDS_FOR_KEY + ";"); + UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); logger.info(commandsForKeyTable.toStringUnsafe()); - assertEquals(txnIds.length, commandsForKeyTable.size()); - Iterator commandsForKeyTableIterator = commandsTable.iterator(); - for (TxnId txnId : txnIds) - assertEquals(txnId, AccordKeyspace.deserializeTimestampOrNull(commandsForKeyTableIterator.next().getBytes("txn_id"), TxnId::fromBits)); + assertEquals(1, commandsForKeyTable.size()); + CommandsForKey cfk = CommandsForKeySerializer.fromBytes((Key)key, commandsForKeyTable.iterator().next().getBytes("data")); + assertEquals(txnIds.length, cfk.size()); + for (int i = 0 ; i < txnIds.length ; ++i) + assertEquals(txnIds[i], cfk.txnId(i)); test.test(commandStore); } @@ -525,7 +520,7 @@ private List compactCFS(IAccordService mockAccordService, ColumnFamil { List outputPartitions = new ArrayList<>(); List nextInputScanners = new ArrayList<>(); - if (singleCompaction) + if (singleCompaction || numScanners == 1) { nextInputScanners = ImmutableList.copyOf(scanners); scanners.clear(); @@ -554,7 +549,7 @@ private List compactCFS(IAccordService mockAccordService, ColumnFamil scanners.add(random.nextInt(scanners.size()), new Scanner(cfs.metadata(), outputPartitions.stream().map(Partition::unfilteredIterator).collect(Collectors.toList()))); } while (!scanners.isEmpty()); - verify(mockAccordService, times(singleCompaction ? 1 : numScanners - 1)).getRedundantBeforesAndDurableBefore(); + verify(mockAccordService, times(singleCompaction || numScanners == 1 ? 1 : numScanners - 1)).getRedundantBeforesAndDurableBefore(); return result; } } diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionsBytemanTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionsBytemanTest.java index d338c8b1690e..e49847b443f2 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionsBytemanTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionsBytemanTest.java @@ -179,7 +179,7 @@ public void testStopSubRangeCompactionRepaired() throws Throwable { testStopCompactionRepaired((cfs) -> { Collection> ranges = Collections.singleton(new Range<>(cfs.getPartitioner().getMinimumToken(), - cfs.getPartitioner().getMaximumToken())); + cfs.getPartitioner().getMaximumTokenForSplitting())); CompactionManager.instance.forceCompactionForTokenRange(cfs, ranges); }); } diff --git a/test/unit/org/apache/cassandra/db/compaction/ShardManagerTest.java b/test/unit/org/apache/cassandra/db/compaction/ShardManagerTest.java index ced5d7882144..7917b48f6158 100644 --- a/test/unit/org/apache/cassandra/db/compaction/ShardManagerTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/ShardManagerTest.java @@ -543,7 +543,7 @@ List mockNonOverlappingSSTables(int numSSTables) private Token boundary(int numSSTables, int i) { - return partitioner.split(partitioner.getMinimumToken(), partitioner.getMaximumToken(), i * 1.0 / numSSTables); + return partitioner.split(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting(), i * 1.0 / numSSTables); } private SSTableReader mockSSTable(DecoratedKey first, DecoratedKey last) diff --git a/test/unit/org/apache/cassandra/db/compaction/UnifiedCompactionStrategyTest.java b/test/unit/org/apache/cassandra/db/compaction/UnifiedCompactionStrategyTest.java index 422d67e946fb..d03cfe85fadb 100644 --- a/test/unit/org/apache/cassandra/db/compaction/UnifiedCompactionStrategyTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/UnifiedCompactionStrategyTest.java @@ -277,7 +277,7 @@ private void testGetBucketsOneArena(Map sstableMap, int[] Ws, IPartitioner partitioner = cfs.getPartitioner(); DecoratedKey first = new BufferDecoratedKey(partitioner.getMinimumToken(), ByteBuffer.allocate(0)); - DecoratedKey last = new BufferDecoratedKey(partitioner.getMaximumToken(), ByteBuffer.allocate(0)); + DecoratedKey last = new BufferDecoratedKey(partitioner.getMaximumTokenForSplitting(), ByteBuffer.allocate(0)); List sstables = new ArrayList<>(); long dataSetSizeBytes = 0; @@ -517,7 +517,7 @@ private List createSStables(IPartitioner partitioner, { List mockSSTables = new ArrayList<>(); Token min = partitioner.getMinimumToken(); - Token max = partitioner.getMaximumToken(); + Token max = partitioner.getMaximumTokenForSplitting(); ByteBuffer bb = ByteBuffer.allocate(0); sstablesMap.forEach((size, num) -> { Token first = min.getPartitioner().split(min, max, 0.01); @@ -1118,7 +1118,7 @@ List mockNonOverlappingSSTables(int numSSTables, int level, long private Token boundary(int numSSTables, double i) { - return partitioner.split(partitioner.getMinimumToken(), partitioner.getMaximumToken(), i / numSSTables); + return partitioner.split(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting(), i / numSSTables); } } diff --git a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java index e57a714e7bfa..1fcfa14ee1ca 100644 --- a/test/unit/org/apache/cassandra/dht/LengthPartitioner.java +++ b/test/unit/org/apache/cassandra/dht/LengthPartitioner.java @@ -76,7 +76,7 @@ public BigIntegerToken getMinimumToken() } @Override - public Token getMaximumToken() + public Token getMaximumTokenForSplitting() { return null; } diff --git a/test/unit/org/apache/cassandra/dht/SplitterTest.java b/test/unit/org/apache/cassandra/dht/SplitterTest.java index 707d294a8087..560bc8241380 100644 --- a/test/unit/org/apache/cassandra/dht/SplitterTest.java +++ b/test/unit/org/apache/cassandra/dht/SplitterTest.java @@ -168,7 +168,7 @@ private static boolean assertRangeSizeEqual(List localRa for (int i = 0; i < tokens.size(); i++) { - Token end = i == tokens.size() - 1 ? partitioner.getMaximumToken() : tokens.get(i); + Token end = i == tokens.size() - 1 ? partitioner.getMaximumTokenForSplitting() : tokens.get(i); splits.add(sumOwnedBetween(localRanges, start, end, splitter, splitIndividualRanges)); start = end; } @@ -250,7 +250,7 @@ private static void testSplit(IPartitioner partitioner) boolean isRandom = partitioner instanceof RandomPartitioner; Splitter splitter = getSplitter(partitioner); BigInteger min = splitter.valueForToken(partitioner.getMinimumToken()); - BigInteger max = splitter.valueForToken(partitioner.getMaximumToken()); + BigInteger max = splitter.valueForToken(partitioner.getMaximumTokenForSplitting()); BigInteger first = isRandom ? RandomPartitioner.ZERO : min; BigInteger last = isRandom ? max.subtract(BigInteger.valueOf(1)) : max; BigInteger midpoint = last.add(first).divide(BigInteger.valueOf(2)); @@ -373,8 +373,8 @@ private static void testTokensInRange(IPartitioner partitioner) Splitter splitter = getSplitter(partitioner); // test full range - Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken()); - BigInteger fullRangeSize = splitter.valueForToken(partitioner.getMaximumToken()).subtract(splitter.valueForToken(partitioner.getMinimumToken())); + Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting()); + BigInteger fullRangeSize = splitter.valueForToken(partitioner.getMaximumTokenForSplitting()).subtract(splitter.valueForToken(partitioner.getMinimumToken())); assertEquals(fullRangeSize, splitter.tokensInRange(fullRange)); fullRange = new Range<>(splitter.tokenForValue(BigInteger.valueOf(-10)), splitter.tokenForValue(BigInteger.valueOf(-10))); assertEquals(fullRangeSize, splitter.tokensInRange(fullRange)); @@ -413,13 +413,13 @@ private static void testElapsedMultiRange(IPartitioner partitioner) // wrapped range BigInteger min = splitter.valueForToken(partitioner.getMinimumToken()); - BigInteger max = splitter.valueForToken(partitioner.getMaximumToken()); + BigInteger max = splitter.valueForToken(partitioner.getMaximumTokenForSplitting()); Range wrappedRange = new Range<>(splitter.tokenForValue(max.subtract(BigInteger.valueOf(1350))), splitter.tokenForValue(min.add(BigInteger.valueOf(20394)))); testElapsedTokens(partitioner, wrappedRange, true); // full range - Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken()); + Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting()); testElapsedTokens(partitioner, fullRange, false); } @@ -490,15 +490,15 @@ private static void testPositionInRangeMultiRange(IPartitioner partitioner) testPositionInRange(partitioner, splitter, range); // Test wrap-around range - start = splitter.tokenForValue(splitter.valueForToken(partitioner.getMaximumToken()).subtract(BigInteger.valueOf(123456789))); + start = splitter.tokenForValue(splitter.valueForToken(partitioner.getMaximumTokenForSplitting()).subtract(BigInteger.valueOf(123456789))); end = splitter.tokenForValue(splitter.valueForToken(partitioner.getMinimumToken()).add(BigInteger.valueOf(123456789))); range = new Range<>(start, end); testPositionInRange(partitioner, splitter, range); // Test full range - testPositionInRange(partitioner, splitter, new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken())); + testPositionInRange(partitioner, splitter, new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting())); testPositionInRange(partitioner, splitter, new Range<>(partitioner.getMinimumToken(), partitioner.getMinimumToken())); - testPositionInRange(partitioner, splitter, new Range<>(partitioner.getMaximumToken(), partitioner.getMaximumToken())); + testPositionInRange(partitioner, splitter, new Range<>(partitioner.getMaximumTokenForSplitting(), partitioner.getMaximumTokenForSplitting())); testPositionInRange(partitioner, splitter, new Range<>(splitter.tokenForValue(BigInteger.ONE), splitter.tokenForValue(BigInteger.ONE))); } @@ -508,7 +508,7 @@ private static void testPositionInRange(IPartitioner partitioner, Splitter split //full range case if (range.left.equals(range.right)) { - actualRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken()); + actualRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting()); } assertEquals(0.0, splitter.positionInRange(actualRange.left, range), 0.01); assertEquals(0.25, splitter.positionInRange(getTokenInPosition(partitioner, actualRange, 0.25), range), 0.01); @@ -523,7 +523,7 @@ private static Token getTokenInPosition(IPartitioner partitioner, Range r { if (range.left.equals(range.right)) { - range = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken()); + range = new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting()); } Splitter splitter = getSplitter(partitioner); BigInteger totalTokens = splitter.tokensInRange(range); @@ -535,7 +535,7 @@ private static Token getTokenInPosition(IPartitioner partitioner, Range r private static Token getWrappedToken(IPartitioner partitioner, BigInteger position) { Splitter splitter = getSplitter(partitioner); - BigInteger maxTokenValue = splitter.valueForToken(partitioner.getMaximumToken()); + BigInteger maxTokenValue = splitter.valueForToken(partitioner.getMaximumTokenForSplitting()); BigInteger minTokenValue = splitter.valueForToken(partitioner.getMinimumToken()); if (position.compareTo(maxTokenValue) > 0) { diff --git a/test/unit/org/apache/cassandra/index/sai/cql/VectorTypeTest.java b/test/unit/org/apache/cassandra/index/sai/cql/VectorTypeTest.java index 1a0123067bbb..94c8b39b5155 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/VectorTypeTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/VectorTypeTest.java @@ -551,7 +551,7 @@ private Collection keys(UntypedResultSet result) private Collection keysWithLowerBound(Collection keys, int leftKey, boolean leftInclusive) { return keysInTokenRange(keys, partitioner.getToken(Int32Type.instance.decompose(leftKey)), leftInclusive, - partitioner.getMaximumToken().getToken(), true); + partitioner.getMaximumTokenForSplitting().getToken(), true); } private Collection keysWithUpperBound(Collection keys, int rightKey, boolean rightInclusive) diff --git a/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java b/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java index e0385a157d5a..93ee3f99cd4f 100644 --- a/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java +++ b/test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java @@ -185,7 +185,7 @@ private IndexSegmentSearcher buildIndexAndOpenSearcher(StorageAttachedIndex inde 0, Long.MAX_VALUE, SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMinimumToken()), - SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMaximumToken()), + SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMaximumTokenForSplitting()), wrap(termsEnum.get(0).left), wrap(termsEnum.get(terms - 1).left), indexMetas); diff --git a/test/unit/org/apache/cassandra/index/sai/disk/v1/SegmentTest.java b/test/unit/org/apache/cassandra/index/sai/disk/v1/SegmentTest.java index 6a0869fbfd6e..d3d22e2489fd 100644 --- a/test/unit/org/apache/cassandra/index/sai/disk/v1/SegmentTest.java +++ b/test/unit/org/apache/cassandra/index/sai/disk/v1/SegmentTest.java @@ -50,7 +50,7 @@ public static void init() DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); partitioner = DatabaseDescriptor.getPartitioner(); min = partitioner.getMinimumToken(); - max = partitioner.getMaximumToken(); + max = partitioner.getMaximumTokenForSplitting(); tokens = IntStream.rangeClosed(0, 10).boxed().map(i -> partitioner.getRandomToken()) .distinct().sorted().collect(Collectors.toList()); } diff --git a/test/unit/org/apache/cassandra/repair/RepairJobTest.java b/test/unit/org/apache/cassandra/repair/RepairJobTest.java index 2a589c028a8f..a2dd93953c16 100644 --- a/test/unit/org/apache/cassandra/repair/RepairJobTest.java +++ b/test/unit/org/apache/cassandra/repair/RepairJobTest.java @@ -106,7 +106,7 @@ public class RepairJobTest private static final Range RANGE_3 = range(4, 5); private static final RepairJobDesc JOB_DESC = new RepairJobDesc(nextTimeUUID(), nextTimeUUID(), KEYSPACE, CF, Collections.emptyList()); private static final List> FULL_RANGE = Collections.singletonList(new Range<>(MURMUR3_PARTITIONER.getMinimumToken(), - MURMUR3_PARTITIONER.getMaximumToken())); + MURMUR3_PARTITIONER.getMaximumTokenForSplitting())); private static InetAddressAndPort addr1; private static InetAddressAndPort addr2; private static InetAddressAndPort addr3; diff --git a/test/unit/org/apache/cassandra/repair/ValidationTaskTest.java b/test/unit/org/apache/cassandra/repair/ValidationTaskTest.java index e7f325de52ff..0bb93cad37d6 100644 --- a/test/unit/org/apache/cassandra/repair/ValidationTaskTest.java +++ b/test/unit/org/apache/cassandra/repair/ValidationTaskTest.java @@ -67,7 +67,7 @@ public void shouldReleaseTreesOnAbort() throws Exception IPartitioner partitioner = Murmur3Partitioner.instance; MerkleTrees trees = new MerkleTrees(partitioner); - trees.addMerkleTree(128, new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumToken())); + trees.addMerkleTree(128, new Range<>(partitioner.getMinimumToken(), partitioner.getMaximumTokenForSplitting())); task.treesReceived(trees); assertEquals(1, trees.size()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index c47f61987d36..626311fc031d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -18,12 +18,10 @@ package org.apache.cassandra.service.accord; -import java.nio.ByteBuffer; import java.util.NavigableMap; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.collect.ImmutableSortedMap; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -33,23 +31,18 @@ import accord.api.Key; import accord.api.Result; -import accord.impl.CommandTimeseries; import accord.impl.CommandsForKey; import accord.impl.CommandsForKeys; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommonAttributes; -import accord.local.KeyHistory; -import accord.local.PreLoadContext; import accord.local.SaveStatus; import accord.messages.Apply; import accord.primitives.Ballot; -import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Route; -import accord.primitives.RoutableKey; import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -65,10 +58,8 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCachingState.Modified; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.Pair; import static accord.local.Status.Durability.Majority; @@ -157,6 +148,7 @@ public void commandLoadSave() throws Throwable executeAt, dependencies, txn, + null, result.left, CommandSerializers.APPLIED); commandStore.appendToJournal(apply); @@ -186,7 +178,6 @@ public void timestampsForKeyLoadSave() AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); tfk.initialize(); - tfk.updateMax(maxTimestamp); CommandsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, true); Assert.assertEquals(txnId1.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId1, true)); @@ -198,13 +189,10 @@ public void timestampsForKeyLoadSave() Assert.assertEquals(txnId2.hlc(), tfk.lastExecutedMicros()); AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); - cfk.initialize(CommandsForKeySerializer.loader); + cfk.initialize(); - AccordSafeCommandsForKeyUpdate ufk = new AccordSafeCommandsForKeyUpdate(loaded(key, null)); - ufk.initialize(); - - CommandsForKeys.registerCommand(tfk, ufk, command1); - CommandsForKeys.registerCommand(tfk, ufk, command2); + cfk.set(cfk.current().update(null, command1)); + cfk.set(cfk.current().update(null, command2)); AccordKeyspace.getTimestampsForKeyMutation(commandStore, tfk, commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", tfk); @@ -232,20 +220,17 @@ public void commandsForKeyLoadSave() tfk.initialize(); AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); - cfk.initialize(CommandsForKeySerializer.loader); - - AccordSafeCommandsForKeyUpdate ufk = new AccordSafeCommandsForKeyUpdate(loaded(key, null)); - ufk.initialize(); + cfk.initialize(); - CommandsForKeys.registerCommand(tfk, ufk, command1); - CommandsForKeys.registerCommand(tfk, ufk, command2); + cfk.set(cfk.current().update(null, command1)); + cfk.set(cfk.current().update(null, command2)); - AccordKeyspace.getCommandsForKeyMutation(commandStore.id(), ufk.setUpdates(), commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getCommandsForKeyMutation(commandStore.id(), cfk.current(), commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", cfk); - CommandsForKey actual = AccordKeyspace.loadDepsCommandsForKey(commandStore, key); + CommandsForKey actual = AccordKeyspace.loadCommandsForKey(commandStore, key); logger.info("A: {}", actual); - Assert.assertEquals(ufk.applyToDeps(cfk.current()), actual); + Assert.assertEquals(cfk.current(), actual); } private static > NavigableMap toNavigableMap(V safeState) @@ -254,109 +239,4 @@ public void commandsForKeyLoadSave() map.put(safeState.key(), safeState); return map; } - - @Test - public void commandsForKeyUpdateTest() - { - // check that updates are reflected in CFKs without marking them modified - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); - - TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - - AccordSafeCommand safeCommand = commandStore.commandCache().acquireOrInitialize(txnId, t -> preaccepted(txnId, txn, timestamp(1, clock.incrementAndGet(), 1))); - AccordSafeTimestampsForKey timestamps = commandStore.timestampsForKeyCache().acquireOrInitialize(key, k -> new TimestampsForKey((Key) k)); - AccordSafeCommandsForKey commands = commandStore.depsCommandsForKeyCache().acquireOrInitialize(key, k -> new CommandsForKey((Key) k, CommandsForKeySerializer.loader)); - AccordSafeCommandsForKeyUpdate update = commandStore.updatesForKeyCache().acquireOrInitialize(key, CommandsForKeyUpdate::empty); - - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.commandCache().getUnsafe(txnId).status()); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.timestampsForKeyCache().getUnsafe(key).status()); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.depsCommandsForKeyCache().getUnsafe(key).status()); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.updatesForKeyCache().getUnsafe(key).status()); - - AccordSafeCommandStore safeStore = commandStore.beginOperation(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.DEPS), - toNavigableMap(safeCommand), - toNavigableMap(timestamps), - toNavigableMap(commands), - new TreeMap<>(), - toNavigableMap(update)); - - AccordSafeCommandsForKeyUpdate updates = safeStore.getOrCreateCommandsForKeyUpdate(key); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.updatesForKeyCache().getUnsafe(key).status()); - - Command initialCommand = safeCommand.current(); - CommandsForKey initialCFK = commands.current(); - CommandsForKeyUpdate initialUpdate = updates.current(); - - updates.common().commands().add(txnId, initialCommand); - - CommandsForKeyUpdate expected = new CommandsForKeyUpdate(key, updates.deps().toImmutable(), updates.all().toImmutable(), updates.common().toImmutable()); - Assert.assertEquals(1, expected.common().commands().numChanges()); - Assert.assertTrue(expected.deps().isEmpty()); - Assert.assertTrue(expected.all().isEmpty()); - - Assert.assertSame(initialCFK, commands.current()); - Assert.assertSame(initialUpdate, updates.current()); - - safeStore.postExecute(toNavigableMap(safeCommand), - toNavigableMap(timestamps), - toNavigableMap(commands), - new TreeMap<>(), - toNavigableMap(updates)); - safeStore.complete(); - - - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.commandCache().getUnsafe(txnId).status()); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.timestampsForKeyCache().getUnsafe(key).status()); - Assert.assertEquals(AccordCachingState.Status.LOADED, commandStore.depsCommandsForKeyCache().getUnsafe(key).status()); - Assert.assertEquals(AccordCachingState.Status.MODIFIED, commandStore.updatesForKeyCache().getUnsafe(key).status()); - - CommandsForKey finalCFK = commandStore.depsCommandsForKeyCache().getUnsafe(key).get(); - Assert.assertEquals(txnId, getOnlyElement(finalCFK.commands().commands.keySet())); - - Modified loadedUpdate = (Modified) commandStore.updatesForKeyCache().getUnsafe(key).state(); - Assert.assertNull(loadedUpdate.original); - Assert.assertEquals(expected, loadedUpdate.get()); - } - - /** - * Test that in memory cfk updates are applied to - */ - @Test - public void commandsForKeyUpdateOnLoadTest() - { - AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - - PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); - - TxnId txnId = txnId(1, clock.incrementAndGet(), 1); - - Command command = preaccepted(txnId, txn, timestamp(1, clock.incrementAndGet(), 1)); - - // make a cached update - AccordSafeCommandsForKeyUpdate updates = commandStore.updatesForKeyCache().acquireOrInitialize(key, k -> null); - updates.preExecute(); - updates.common().commands().remove(command.txnId()); - updates.setUpdates(); // apply the updates applied to the safe state to the cached value - updates.postExecute(); // apply the cached value to the global state - commandStore.updatesForKeyCache().release(updates); - - // make an out of date CFK - CommandTimeseries.CommandLoader loader = CommandsForKeySerializer.loader; - CommandsForKey staleCFK = CommandsForKey.SerializerSupport.create(key, loader, - ImmutableSortedMap.of(command.txnId(), loader.saveForCFK(command))); - - Assert.assertEquals(txnId, getOnlyElement(staleCFK.commands().commands.keySet())); - - // on loading the cfk into the cache, the in memory update should be applied - AccordSafeCommandsForKey commands = commandStore.depsCommandsForKeyCache().acquireOrInitialize(key, k -> new CommandsForKey((Key) k, CommandsForKeySerializer.loader)); - commands.preExecute(); - - Assert.assertEquals(txnId, getOnlyElement(staleCFK.commands().commands.keySet())); - } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 06a55d24cb28..93cdaeb5a4a6 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -27,7 +27,6 @@ import accord.api.Key; import accord.api.RoutingKey; import accord.impl.CommandsForKey; -import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.KeyHistory; import accord.local.Node; @@ -118,11 +117,8 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.PreAccepted, command.status()); Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); - TimestampsForKey tfk = ((AccordSafeCommandStore) instance).timestampsForKey(key(1)).current(); - Assert.assertEquals(txnId, tfk.max()); - - CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); - Assert.assertNotNull((cfk.commands()).get(txnId)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); // check accept @@ -149,26 +145,23 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.Accepted, command.status()); Assert.assertEquals(deps, command.partialDeps()); - TimestampsForKey tfk = ((AccordSafeCommandStore) instance).timestampsForKey(key(1)).current(); - Assert.assertEquals(executeAt, tfk.max()); - - CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); - Assert.assertNotNull((cfk.commands()).get(txnId)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); // check commit - Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn, deps, fullRoute, null); + Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, fullRoute, null); commandStore.appendToJournal(commit); getUninterruptibly(commandStore.execute(commit, commit::apply)); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.DEPS), instance -> { + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.COMMANDS), instance -> { Command command = instance.ifInitialised(txnId).current(); Assert.assertEquals(commit.executeAt, command.executeAt()); Assert.assertTrue(command.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, command.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).depsCommandsForKey(key(1)).current(); - Assert.assertNotNull((cfk.commands()).get(txnId)); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 0be7d692545a..198697160265 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -88,7 +88,7 @@ public void serde() AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); - Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.StableFastPath, Ballot.ZERO, id, partialTxn, partialDeps, route, null); + Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.StableFastPath, Ballot.ZERO, id, partialTxn.keys(), partialTxn, partialDeps, route, null); store.appendToJournal(commit); Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 82f56f869095..47ba369a0982 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -21,6 +21,8 @@ import org.junit.BeforeClass; import org.junit.Test; +import accord.messages.ReadData; +import accord.messages.ReadData.CommitOrReadNack; import accord.topology.TopologyUtils; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -32,7 +34,6 @@ import accord.local.Node; import accord.messages.InformOfTxnId; import accord.messages.MessageType; -import accord.messages.ReadData; import accord.messages.ReadTxnData; import accord.messages.Reply; import accord.messages.Request; @@ -88,11 +89,11 @@ public void bootstrapRead() Txn txn = Utils.readTxn(Keys.of(IntKey.key(42))); TxnId id = nextTxnId(epoch, txn); PartialTxn partialTxn = txn.slice(Ranges.of(IntKey.range(40, 50)), true); - Request request = new AbstractFetchCoordinator.FetchRequest(epoch, id, partialTxn.covering(), PartialDeps.NONE, partialTxn, true); + Request request = new AbstractFetchCoordinator.FetchRequest(epoch, id, partialTxn.covering(), PartialDeps.NONE, partialTxn); checkRequestReplies(request, new AbstractFetchCoordinator.FetchResponse(null, null, id), - ReadData.CommitOrReadNack.Insufficient); + CommitOrReadNack.Insufficient); } @@ -100,10 +101,10 @@ public void bootstrapRead() public void txnRead() { TxnId txnId = nextTxnId(42, Txn.Kind.Read, Routable.Domain.Key); - Request request = new ReadTxnData(node, topologies, txnId, topology.ranges(), txnId); + Request request = new ReadTxnData(node, topologies, txnId, topology.ranges(), txnId.epoch()); checkRequestReplies(request, new ReadData.ReadOk(null, null), - ReadData.CommitOrReadNack.Insufficient); + CommitOrReadNack.Insufficient); } private static void checkRequestReplies(Request request, Reply... replies) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 77911183de3c..66d0c8e436b3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -35,11 +35,9 @@ import org.junit.Assert; import accord.api.Data; -import accord.api.Key; import accord.api.ProgressLog; import accord.api.Result; import accord.api.RoutingKey; -import accord.impl.CommandsForKey; import accord.impl.InMemoryCommandStore; import accord.local.Command; import accord.local.CommandStore; @@ -87,7 +85,6 @@ import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.Pair; @@ -152,11 +149,6 @@ private static FullRoute route(PartialTxn txn) } } - public static CommandsForKey commandsForKey(Key key) - { - return new CommandsForKey(key, CommandsForKeySerializer.loader); - } - public static AccordCachingState loaded(K key, V value, int index) { AccordCachingState global = new AccordCachingState<>(key, index); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java index 56675e6cf256..937b24350a7f 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -165,9 +165,9 @@ public void minMaxTokens() { List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), range(-100, 100), - range(token(100), partitioner.getMaximumToken())); + range(token(100), partitioner.getMaximumTokenForSplitting())); Assert.assertEquals(partitioner.getMinimumToken(), ranges.get(0).left); - Assert.assertEquals(partitioner.getMaximumToken(), ranges.get(2).right); + Assert.assertEquals(partitioner.getMaximumTokenForSplitting(), ranges.get(2).right); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); @@ -203,7 +203,7 @@ public void fastPath() { List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), range(-100, 100), - range(token(100), partitioner.getMaximumToken())); + range(token(100), partitioner.getMaximumTokenForSplitting())); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); Topology expected = new Topology(1, @@ -234,7 +234,7 @@ public void fastPathWithMoreThanMinimumFailedNodes() { List> ranges = ImmutableList.of(range(partitioner.getMinimumToken(), token(-100)), range(-100, 100), - range(token(100), partitioner.getMaximumToken())); + range(token(100), partitioner.getMaximumTokenForSplitting())); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); Topology expected = new Topology(1, diff --git a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java index 0c74fac7689f..ba80664c39df 100644 --- a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java @@ -78,7 +78,7 @@ public void comparisonTest() PartitionKey pk = new PartitionKey(TABLE1, dk); TokenKey tk = new TokenKey(TABLE1, dk.getToken()); TokenKey tkLow = new TokenKey(TABLE1, dk.getToken().decreaseSlightly()); - TokenKey tkHigh = new TokenKey(TABLE1, dk.getToken().nextValidToken()); + TokenKey tkHigh = new TokenKey(TABLE1, dk.getToken().increaseSlightly()); Assert.assertTrue(tk.compareTo(pk) > 0); Assert.assertTrue(tkLow.compareTo(pk) < 0); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 569de8239296..0f9651656603 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -18,9 +18,12 @@ package org.apache.cassandra.service.accord.async; +import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiFunction; +import java.util.function.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; @@ -29,6 +32,7 @@ import org.junit.Test; import accord.api.Key; +import accord.impl.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.KeyHistory; @@ -47,14 +51,15 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordCachingState; import org.apache.cassandra.service.accord.AccordSafeCommand; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKeyUpdate; +import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.CommandsForKeyUpdate; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation.Context; import org.apache.cassandra.utils.concurrent.AsyncPromise; +import static accord.local.KeyHistory.COMMANDS; +import static accord.local.KeyHistory.TIMESTAMPS; import static java.util.Collections.emptyList; import static java.util.Collections.singleton; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -108,7 +113,7 @@ public void cachedTest() testLoad(executor, safeTimestamps, new TimestampsForKey(key)); timestampsCache.release(safeTimestamps); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { @@ -148,7 +153,7 @@ public void loadTest() AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, timestamps.current(), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.DEPS); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -196,7 +201,7 @@ public void partialLoadTest() AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, new TimestampsForKey(key), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -234,17 +239,10 @@ public void inProgressLoadTest() throws Throwable createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); commandStore.executor().submit(() -> commandStore.setCapacity(1024)).get(); AccordStateCache.Instance commandCache = commandStore.commandCache(); - AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); - // acquire / release - timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); - AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquire(key); - testLoad(executor, safeTimestamps, new TimestampsForKey(key)); - timestampsCache.release(safeTimestamps); - commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notDefined(id, txn); }); AccordSafeCommand safeCommand = commandCache.acquire(txnId); Assert.assertEquals(AccordCachingState.Status.LOADING, safeCommand.globalStatus()); @@ -260,7 +258,7 @@ public void inProgressLoadTest() throws Throwable boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.timestampsForKey.containsKey(key)); + Assert.assertFalse(context.timestampsForKey.containsKey(key)); cbFired.setSuccess(null); }); Assert.assertFalse(result); @@ -276,7 +274,7 @@ public void inProgressLoadTest() throws Throwable commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); - Assert.assertTrue(context.timestampsForKey.containsKey(key)); + Assert.assertFalse(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); }); } @@ -306,7 +304,7 @@ else if (txnId.equals(txnId2)) throw new AssertionError("Unknown txnId: " + txnId); }); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY, KeyHistory.DEPS); + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY, KeyHistory.COMMANDS); boolean result = loader.load(new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); @@ -376,19 +374,25 @@ public void inProgressCommandSaveTest() @Test public void inProgressCFKSaveTest() + { + inProgressCFKSaveTest(COMMANDS, AccordCommandStore::commandsForKeyCache, context -> context.commandsForKey, CommandsForKey::new, (cfk, u) -> cfk.update(null, u)); + } + + @Test + public void inProgressTFKSaveTest() + { + inProgressCFKSaveTest(TIMESTAMPS, AccordCommandStore::timestampsForKeyCache, context -> context.timestampsForKey, TimestampsForKey::new, (tfk, c) -> new TimestampsForKey(tfk.key(), c.executeAt(), c.executeAt().hlc(), c.executeAt())); + } + + private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) { AtomicLong clock = new AtomicLong(0); ManualExecutor executor = new ManualExecutor(); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); - AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); - timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); - timestampsCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); - - AccordStateCache.Instance updateCache = commandStore.updatesForKeyCache(); - updateCache.unsafeSetLoadFunction(k -> { throw new AssertionError("updates shouldn't be loaded"); }); - updateCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + C cache = getter.apply(commandStore); + cache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); @@ -396,42 +400,37 @@ public void inProgressCFKSaveTest() Command preaccepted = preaccepted(txnId, txn, txnId); // acquire / release + T2 safe = cache.acquireOrInitialize(key, k -> initialiser.apply((Key)k)); + safe.preExecute(); + safe.set(update.apply(safe.current(), preaccepted)); + cache.release(safe); - AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquireOrInitialize(key, k -> new TimestampsForKey((Key) k)); - timestampsCache.release(safeTimestamps); - Assert.assertEquals(AccordCachingState.Status.LOADED, timestampsCache.getUnsafe(key).status()); - - AccordSafeCommandsForKeyUpdate safeUpdate = updateCache.acquireOrInitialize(key, CommandsForKeyUpdate::empty); - safeUpdate.common().commands().add(txnId, preaccepted); - safeUpdate.setUpdates(); - updateCache.release(safeUpdate); - - Assert.assertEquals(AccordCachingState.Status.MODIFIED, updateCache.getUnsafe(key).status()); - updateCache.getUnsafe(key).save(executor, (before, after) -> () -> {}); - Assert.assertEquals(AccordCachingState.Status.SAVING, updateCache.getUnsafe(key).status()); + Assert.assertEquals(AccordCachingState.Status.MODIFIED, cache.getUnsafe(key).status()); + cache.getUnsafe(key).save(executor, (before, after) -> () -> {}); + Assert.assertEquals(AccordCachingState.Status.SAVING, cache.getUnsafe(key).status()); // since the command is still saving, the loader shouldn't be able to acquire a reference - AsyncLoader loader = new AsyncLoader(commandStore, emptyList(), Keys.of(key), KeyHistory.NONE); + AsyncLoader loader = new AsyncLoader(commandStore, emptyList(), Keys.of(key), history); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> { Assert.assertNull(t); - Assert.assertTrue(context.timestampsForKey.containsKey(key)); + Assert.assertEquals(context.timestampsForKey.containsKey(key), inContext.apply(context) == context.timestampsForKey); + Assert.assertEquals(context.commandsForKey.containsKey(key), inContext.apply(context) == context.commandsForKey); cbFired.setSuccess(null); }); Assert.assertFalse(result); }); - Assert.assertEquals(AccordCachingState.Status.SAVING, updateCache.getUnsafe(key).status()); executor.runOne(); cbFired.awaitUninterruptibly(1, TimeUnit.SECONDS); - Assert.assertEquals(AccordCachingState.Status.LOADED, updateCache.getUnsafe(key).status()); // then return immediately after the callback has fired commandStore.executeBlocking(() -> { boolean result = loader.load(context, (o, t) -> Assert.fail()); - Assert.assertTrue(context.timestampsForKey.containsKey(key)); + Assert.assertEquals(context.timestampsForKey.containsKey(key), inContext.apply(context) == context.timestampsForKey); + Assert.assertEquals(context.commandsForKey.containsKey(key), inContext.apply(context) == context.commandsForKey); Assert.assertTrue(result); }); } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index ca695433f9e9..f421c993a0fa 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -85,6 +85,7 @@ import org.awaitility.Awaitility; import org.mockito.Mockito; +import static accord.local.KeyHistory.COMMANDS; import static accord.local.PreLoadContext.contextFor; import static accord.utils.Property.qt; import static accord.utils.async.AsyncChains.getUninterruptibly; @@ -115,8 +116,7 @@ public void before() { QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS)); QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.TIMESTAMPS_FOR_KEY)); - QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.DEPS_COMMANDS_FOR_KEY)); - QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.ALL_COMMANDS_FOR_KEY)); + QueryProcessor.executeInternal(String.format("TRUNCATE %s.%s", SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS_FOR_KEY)); } /** @@ -150,12 +150,12 @@ public void optionalCommandsForKeyTest() throws Throwable PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); getUninterruptibly(commandStore.execute(contextFor(key), instance -> { - SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeDepsCommandsForKey(key); + SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeCommandsForKey(key); Assert.assertNull(cfk); })); long nowInSeconds = FBUtilities.nowInSeconds(); - SinglePartitionReadCommand command = AccordKeyspace.getDepsCommandsForKeyRead(commandStore.id(), key, (int) nowInSeconds); + SinglePartitionReadCommand command = AccordKeyspace.getCommandsForKeyRead(commandStore.id(), key, (int) nowInSeconds); try(ReadExecutionController controller = command.executionController(); FilteredPartitions partitions = FilteredPartitions.filter(command.executeLocally(controller), nowInSeconds)) { @@ -177,6 +177,7 @@ private static Command createStableAndPersist(AccordCommandStore commandStore, T Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, + command.partialTxn().keys(), command.partialTxn(), command.partialDeps(), Route.castToFullRoute(command.route()), @@ -209,14 +210,14 @@ private static Command createStableUsingFastLifeCycle(AccordCommandStore command PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); Commit stable = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, executeAt, partialTxn, deps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); commandStore.appendToJournal(preAccept); commandStore.appendToJournal(stable); try { - Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); return safe.ifInitialised(txnId).current(); @@ -258,9 +259,9 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command Accept accept = Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Commit, Ballot.ZERO, executeAt, partialTxn, deps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Commit, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); Commit stable = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableSlowPath, Ballot.ZERO, executeAt, partialTxn, deps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableSlowPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); commandStore.appendToJournal(preAccept); commandStore.appendToJournal(accept); @@ -269,7 +270,7 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command try { - Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys()), safe -> { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); @@ -320,7 +321,7 @@ public void testFutureCleanup() throws Throwable createStableAndPersist(commandStore, txnId); - Consumer consumer = safeStore -> safeStore.ifInitialised(txnId).readyToExecute(); + Consumer consumer = safeStore -> safeStore.ifInitialised(txnId).readyToExecute(safeStore); PreLoadContext ctx = contextFor(txnId); AsyncOperation operation = new AsyncOperation.ForConsumer(commandStore, ctx, consumer) { @@ -380,7 +381,7 @@ public void loadFail() assertNoReferences(commandStore, ids, keys); - PreLoadContext ctx = contextFor(ids, keys); + PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); Consumer consumer = Mockito.mock(Consumer.class); @@ -406,7 +407,7 @@ public void loadFail() // can we recover? commandStore.commandCache().unsafeSetLoadFunction(txnId -> AccordKeyspace.loadCommand(commandStore, txnId)); - AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.ifInitialised(id).readyToExecute())); + AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.ifInitialised(id).readyToExecute(store))); getUninterruptibly(o2); }); } @@ -428,7 +429,7 @@ public void consumerFails() createCommand(commandStore, rs, ids); assertNoReferences(commandStore, ids, keys); - PreLoadContext ctx = contextFor(ids, keys); + PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); Consumer consumer = Mockito.mock(Consumer.class); String errorMsg = "txn_ids " + ids; @@ -482,7 +483,7 @@ private static void assertNoReferences(AccordCommandStore commandStore, List) (Iterable) keys); + assertNoReferences(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); } catch (AssertionError e) { @@ -524,7 +525,7 @@ private static void awaitDone(AccordCommandStore commandStore, List ids, { awaitDone(commandStore.commandCache(), ids); //TODO this is due to bad typing for Instance, it doesn't use ? extends RoutableKey - awaitDone(commandStore.depsCommandsForKeyCache(), (Iterable) (Iterable) keys); + awaitDone(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); } private static void awaitDone(AccordStateCache.Instance cache, Iterable keys) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 13485f90b3fc..4be4c11115f5 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -19,26 +19,70 @@ package org.apache.cassandra.service.accord.serializers; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.BooleanSupplier; +import java.util.function.Function; +import java.util.function.IntSupplier; +import java.util.function.LongUnaryOperator; +import java.util.function.Supplier; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import accord.impl.CommandTimeseries; +import accord.api.Key; +import accord.impl.CommandsForKey; +import accord.impl.CommandsForKey.InternalStatus; +import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.CommonAttributes.Mutable; +import accord.local.Listeners; +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.KeyDeps; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.RangeDeps; +import accord.primitives.Routable; +import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Writes; import accord.utils.AccordGens; import accord.utils.Gens; +import accord.utils.RandomSource; +import accord.utils.SortedArrays; +import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.io.util.DataInputBuffer; -import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.simulator.RandomSource.Choices; import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import static accord.local.Status.Durability.NotDurable; +import static accord.local.Status.KnownExecuteAt.ExecuteAtErased; +import static accord.local.Status.KnownExecuteAt.ExecuteAtUnknown; import static accord.utils.Property.qt; +import static accord.utils.SortedArrays.Search.FAST; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.assertj.core.api.Assertions.assertThat; +import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; public class CommandsForKeySerializerTest { @@ -52,45 +96,396 @@ public static void beforeClass() throws Throwable StorageService.instance.initServer(); } - @Test - public void serdeDeps() + static class Cmd { - DataOutputBuffer buffer = new DataOutputBuffer(); - int version = MessagingService.Version.VERSION_40.value; - qt().forAll(Gens.lists(AccordGens.txnIds()).ofSizeBetween(0, 10)).check(ids -> { - buffer.clear(); + final TxnId txnId; + final SaveStatus saveStatus; + final PartialTxn txn; + final Timestamp executeAt; + final List deps = new ArrayList<>(); + final List missing = new ArrayList<>(); + boolean invisible; + + Cmd(TxnId txnId, PartialTxn txn, SaveStatus saveStatus, Timestamp executeAt) + { + this.txnId = txnId; + this.saveStatus = saveStatus; + this.txn = txn; + this.executeAt = executeAt; + } - long expectedSize = CommandsForKeySerializer.depsIdSerializer.serializedSize(ids, version); + CommonAttributes attributes() + { + Mutable mutable = new Mutable(txnId); + if (saveStatus.known.isDefinitionKnown()) + mutable.partialTxn(txn); - CommandsForKeySerializer.depsIdSerializer.serialize(ids, buffer, version); - assertThat(buffer.position()).isEqualTo(expectedSize); - try (DataInputBuffer in = new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)) + mutable.route(txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null))); + mutable.durability(NotDurable); + if (saveStatus.known.deps.hasProposedOrDecidedDeps()) { - List read = CommandsForKeySerializer.depsIdSerializer.deserialize(in, version); - assertThat(read).isEqualTo(ids); + try (KeyDeps.Builder builder = KeyDeps.builder();) + { + for (TxnId id : deps) + builder.add((Key)txn.keys().get(0), id); + mutable.partialDeps(new PartialDeps(AccordTestUtils.fullRange(txn), builder.build(), RangeDeps.NONE)); + } } - }); + + return mutable; + } + + Command toCommand() + { + switch (saveStatus) + { + default: throw new AssertionError("Unhandled saveStatus: " + saveStatus); + case Uninitialised: + case NotDefined: + return Command.SerializerSupport.notDefined(attributes(), Ballot.ZERO); + case PreAccepted: + return Command.SerializerSupport.preaccepted(attributes(), executeAt, Ballot.ZERO); + case Accepted: + case AcceptedInvalidate: + case AcceptedWithDefinition: + case AcceptedInvalidateWithDefinition: + case PreCommittedWithDefinition: + case PreCommittedWithDefinitionAndAcceptedDeps: + case PreCommittedWithAcceptedDeps: + case PreCommitted: + return Command.SerializerSupport.accepted(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO); + + case Committed: + return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, null); + + case Stable: + case ReadyToExecute: + return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, Command.WaitingOn.EMPTY); + + case PreApplied: + case Applying: + case Applied: + return Command.SerializerSupport.executed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, Command.WaitingOn.EMPTY, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); + + case TruncatedApplyWithDeps: + case TruncatedApply: + if (txnId.kind().awaitsOnlyDeps()) return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, null, null, txnId); + else return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, null, null); + + case TruncatedApplyWithOutcome: + if (txnId.kind().awaitsOnlyDeps()) return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData(), txnId); + else return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); + + case Erased: + case ErasedOrInvalidated: + case Invalidated: + return Command.SerializerSupport.invalidated(txnId, Listeners.Immutable.EMPTY); + } + } + + @Override + public String toString() + { + return "Cmd{" + + "txnId=" + txnId + + ", saveStatus=" + saveStatus + + ", txn=" + txn + + ", executeAt=" + executeAt + + ", deps=" + deps + + ", missing=" + missing + + ", invisible=" + invisible + + '}'; + } + } + + static class ObjectGraph + { + final Cmd[] cmds; + ObjectGraph(Cmd[] cmds) + { + this.cmds = cmds; + } + + List toCommands() + { + List commands = new ArrayList<>(cmds.length); + for (int i = 0 ; i < cmds.length ; ++i) + commands.add(cmds[i].toCommand()); + return commands; + } + } + + private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier txnIdSupplier, Supplier saveStatusSupplier, Function txnSupplier, Function timestampSupplier, IntSupplier missingCountSupplier, RandomSource source) + { + Cmd[] cmds = new Cmd[txnIdCount]; + for (int i = 0 ; i < txnIdCount ; ++i) + { + TxnId txnId = txnIdSupplier.get(); + SaveStatus saveStatus = saveStatusSupplier.get(); + Timestamp executeAt = txnId; + if (saveStatus.known.executeAt != ExecuteAtErased && saveStatus.known.executeAt != ExecuteAtUnknown) + executeAt = timestampSupplier.apply(txnId); + + cmds[i] = new Cmd(txnId, txnSupplier.apply(txnId), saveStatus, executeAt); + } + Arrays.sort(cmds, Comparator.comparing(o -> o.txnId)); + for (int i = 0 ; i < txnIdCount ; ++i) + { + if (!cmds[i].saveStatus.known.deps.hasProposedOrDecidedDeps()) + continue; + + Timestamp knownBefore = cmds[i].saveStatus.known.deps.hasCommittedOrDecidedDeps() ? cmds[i].executeAt : cmds[i].txnId; + int limit = SortedArrays.binarySearch(cmds, 0, cmds.length, knownBefore, (a, b) -> a.compareTo(b.txnId), FAST); + if (limit < 0) limit = -1 - limit; + + List deps = cmds[i].deps; + List missing = cmds[i].missing; + for (int j = 0 ; j < limit ; ++j) + if (i != j) deps.add(cmds[j].txnId); + + int missingCount = Math.min(limit - (limit > i ? 1 : 0), missingCountSupplier.getAsInt()); + while (missingCount > 0) + { + int remove = source.nextInt(deps.size()); + int cmdIndex = SortedArrays.binarySearch(cmds, 0, cmds.length, deps.get(remove), (a, b) -> a.compareTo(b.txnId), FAST); + if (!cmds[cmdIndex].saveStatus.hasBeen(Status.Committed)) + missing.add(deps.get(remove)); + deps.set(remove, deps.get(deps.size() - 1)); + deps.remove(deps.size() - 1); + --missingCount; + } + deps.sort(TxnId::compareTo); + missing.sort(TxnId::compareTo); + } + + outer: for (int i = 0 ; i < cmds.length ; ++i) + { + if (null != InternalStatus.from(cmds[i].saveStatus)) + continue; + + for (int j = 0 ; j < i ; ++j) + { + InternalStatus status = InternalStatus.from(cmds[j].saveStatus); + if (status == null || !status.hasInfo) continue; + if (status.depsKnownBefore(cmds[j].txnId, cmds[j].executeAt).compareTo(cmds[i].txnId) > 0 && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) + continue outer; + } + for (int j = i + 1 ; j < cmds.length ; ++j) + { + InternalStatus status = InternalStatus.from(cmds[j].saveStatus); + if (status == null || !status.hasInfo) continue; + if (Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) + continue outer; + } + cmds[i].invisible = true; + for (int j = 0 ; j < i ; ++j) + { + if (cmds[j].executeAt.compareTo(cmds[i].txnId) > 0) + { + int remove = Collections.binarySearch(cmds[j].missing, cmds[i].txnId); + if (remove >= 0) cmds[j].missing.remove(remove); + } + } + for (int j = i + 1 ; j < cmds.length ; ++j) + { + int remove = Collections.binarySearch(cmds[j].missing, cmds[i].txnId); + if (remove >= 0) cmds[j].missing.remove(remove); + } + } + return new ObjectGraph(cmds); + } + + private static Function txnIdSupplier(LongUnaryOperator epochSupplier, LongUnaryOperator hlcSupplier, Supplier kindSupplier, Supplier idSupplier) + { + return min -> new TxnId(epochSupplier.applyAsLong(min == null ? 1 : min.epoch()), hlcSupplier.applyAsLong(min == null ? 1 : min.hlc() + 1), kindSupplier.get(), Routable.Domain.Key, idSupplier.get()); + } + + private static Function timestampSupplier(LongUnaryOperator epochSupplier, LongUnaryOperator hlcSupplier, IntSupplier flagSupplier, Supplier idSupplier) + { + return min -> Timestamp.fromValues(epochSupplier.applyAsLong(min == null ? 1 : min.epoch()), hlcSupplier.applyAsLong(min == null ? 1 : min.hlc() + 1), flagSupplier.getAsInt(), idSupplier.get()); + } + + private static Function timestampSupplier(Set unique, Function supplier) + { + return min -> { + T candidate = supplier.apply(min); + while (!unique.add(candidate)) + { + T next = supplier.apply(min); + if (next.equals(candidate)) min = candidate; + else candidate = next; + } + return candidate; + }; } @Test public void serde() { - CommandTimeseries.CommandLoader loader = CommandsForKeySerializer.loader; - qt().forAll(AccordGenerators.commands()).check(cmd -> { - ByteBuffer bb = loader.saveForCFK(cmd); - int size = bb.remaining(); +// testOne(1821931462020409370L); + Random random = new Random(); + for (int i = 0 ; i < 10000 ; ++i) + { + long seed = random.nextLong(); + testOne(seed); + } + } - assertThat(loader.txnId(bb)).isEqualTo(cmd.txnId()); - assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + private static void testOne(long seed) + { + try + { + System.out.println(seed); + RandomSource source = RandomSource.wrap(new Random(seed)); - assertThat(loader.executeAt(bb)).isEqualTo(cmd.executeAt()); - assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + // TODO (required): produce broader variety of distributions, including executeAt with lower HLC but higher epoch + final LongUnaryOperator epochSupplier; { + long maxEpoch = source.nextLong(1, 10); + epochSupplier = min -> min >= maxEpoch ? min : maxEpoch == 1 ? 1 : source.nextLong(min, maxEpoch); + } + final LongUnaryOperator hlcSupplier; { + long maxHlc = source.nextLong(10, 1000000); + hlcSupplier = min -> min >= maxHlc ? min : source.nextLong(min, maxHlc); + } + final Supplier idSupplier; { + int maxId = source.nextInt(1, 10); + Int2ObjectHashMap lookup = new Int2ObjectHashMap<>(); + idSupplier = () -> lookup.computeIfAbsent(maxId == 1 ? 1 : source.nextInt(1, maxId), Node.Id::new); + } + final IntSupplier flagSupplier = () -> 0; + final Supplier kindSupplier = () -> { + float v = source.nextFloat(); + if (v < 0.5) return Txn.Kind.Read; + if (v < 0.95) return Txn.Kind.Write; + if (v < 0.99) return Txn.Kind.ExclusiveSyncPoint; + return Txn.Kind.EphemeralRead; // not actually a valid value for CFK + }; + + boolean permitMissing = source.decide(0.75f); + final IntSupplier missingCountSupplier; { + if (!permitMissing) + { + missingCountSupplier = () -> 0; + } + else + { + float zeroChance = source.nextFloat(); + int maxMissing = source.nextInt(1, 10); + missingCountSupplier = () -> { + float v = source.nextFloat(); + if (v < zeroChance) return 0; + return source.nextInt(0, maxMissing); + }; + } + } - assertThat(loader.saveStatus(bb)).isEqualTo(cmd.saveStatus()); - assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + Choices saveStatusChoices = Choices.uniform(SaveStatus.values()); + Supplier saveStatusSupplier = () -> { + SaveStatus result = saveStatusChoices.choose(source); + while (result == SaveStatus.TruncatedApplyWithDeps) // not a real save status + result = saveStatusChoices.choose(source); + return result; + }; - assertThat(loader.depsIds(bb)).isEqualTo(cmd.partialDeps() == null ? null : cmd.partialDeps().txnIds()); - assertThat(bb.remaining()).describedAs("ByteBuffer was mutated").isEqualTo(size); + Set uniqueTs = new TreeSet<>(); + final Function txnIdSupplier = timestampSupplier(uniqueTs, txnIdSupplier(epochSupplier, hlcSupplier, kindSupplier, idSupplier)); + boolean permitExecuteAt = source.decide(0.75f); + final Function executeAtSupplier; + { + if (!permitExecuteAt) + { + executeAtSupplier = id -> id; + } + else + { + Function rawTimestampSupplier = timestampSupplier(uniqueTs, timestampSupplier(epochSupplier, hlcSupplier, flagSupplier, idSupplier)); + float useTxnIdChance = source.nextFloat(); + BooleanSupplier useTxnId = () -> source.decide(useTxnIdChance); + executeAtSupplier = txnId -> useTxnId.getAsBoolean() ? txnId : rawTimestampSupplier.apply(txnId); + } + } + + PartialTxn txn = createPartialTxn(0); + Key key = (Key) txn.keys().get(0); + ObjectGraph graph = generateObjectGraph(source.nextInt(0, 100), () -> txnIdSupplier.apply(null), saveStatusSupplier, ignore -> txn, executeAtSupplier, missingCountSupplier, source); + List commands = graph.toCommands(); + CommandsForKey cfk = new CommandsForKey(key); + while (commands.size() > 0) + { + int next = source.nextInt(commands.size()); + cfk = cfk.update(null, commands.get(next)); + commands.set(next, commands.get(commands.size() - 1)); + commands.remove(commands.size() - 1); + } + + for (int i = 0, j = 0 ; j < graph.cmds.length ; ++j) + { + Cmd cmd = graph.cmds[j]; + if (i >= cfk.size() || !cfk.txnId(i).equals(cmd.txnId)) + { + Assert.assertTrue(cmd.invisible); + continue; + } + CommandsForKey.Info info = cfk.info(i); + InternalStatus expectStatus = InternalStatus.from(cmd.saveStatus); + if (expectStatus == null) expectStatus = InternalStatus.TRANSITIVELY_KNOWN; + if (expectStatus.hasInfo) + Assert.assertEquals(cmd.executeAt, info.executeAt(cfk.txnId(i))); + Assert.assertEquals(expectStatus, info.status); + Assert.assertArrayEquals(cmd.missing.toArray(TxnId[]::new), info.missing); + ++i; + } + + ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(cfk); + CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(key, buffer); + Assert.assertEquals(cfk, roundTrip); + } + catch (Throwable t) + { + throw new AssertionError(seed + " seed failed", t); + } + } + + @Test + public void test() + { + var tableGen = AccordGenerators.fromQT(CassandraGenerators.TABLE_ID_GEN); + var txnIdGen = AccordGens.txnIds(rs -> rs.nextLong(0, 100), rs -> rs.nextLong(100), rs -> rs.nextInt(10)); + qt().check(rs -> { + TableId table = tableGen.next(rs); + PartitionKey pk = new PartitionKey(table, Murmur3Partitioner.instance.decorateKey(Murmur3Partitioner.LongToken.keyForToken(rs.nextLong()))); + var redudentBefore = txnIdGen.next(rs); + TxnId[] ids = Gens.arrays(TxnId.class, rs0 -> { + TxnId next = txnIdGen.next(rs0); + while (next.compareTo(redudentBefore) <= 0) + next = txnIdGen.next(rs0); + return next; + }).unique().ofSizeBetween(0, 10).next(rs); + CommandsForKey.Info[] info = new CommandsForKey.Info[ids.length]; + for (int i = 0; i < info.length; i++) + info[i] = rs.pick(InternalStatus.values()).asNoInfo; + Arrays.sort(ids, Comparator.naturalOrder()); + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, redudentBefore, ids, info); + + ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); + CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); + Assert.assertEquals(expected, roundTrip); }); } + + @Test + public void thereAndBackAgain() + { + long tokenValue = -2311778975040348869L; + DecoratedKey key = Murmur3Partitioner.instance.decorateKey(Murmur3Partitioner.LongToken.keyForToken(tokenValue)); + PartitionKey pk = new PartitionKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), key); + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, + TxnId.fromValues(0,0,0,0), + new TxnId[] {TxnId.fromValues(11,34052499,2,1)}, + new CommandsForKey.Info[] { InternalStatus.PREACCEPTED.asNoInfo}); + + ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); + CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); + Assert.assertEquals(expected, roundTrip); + } } \ No newline at end of file From d3deef36d32b165be87f6b6d086a4d938814891f Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 26 Mar 2024 16:39:41 -0700 Subject: [PATCH 098/340] CEP-15: (C*) per-table transactional configuration Patch by Blake Eggleston; Reviewed by Ariel Wesberg for CASSANDRA-19016 --- .../apache/cassandra/config/AccordSpec.java | 20 + .../org/apache/cassandra/config/Config.java | 115 --- .../cassandra/config/DatabaseDescriptor.java | 43 +- .../cql3/statements/CQL3CasRequest.java | 5 +- .../cql3/statements/TransactionStatement.java | 15 +- .../schema/AlterSchemaStatement.java | 4 + .../schema/AlterTableStatement.java | 42 + .../schema/CreateTableStatement.java | 12 + .../statements/schema/TableAttributes.java | 14 +- .../apache/cassandra/db/SystemKeyspace.java | 4 +- .../db/streaming/CassandraStreamReceiver.java | 2 +- .../cassandra/repair/AccordRepairJob.java | 2 +- .../cassandra/repair/CassandraRepairJob.java | 2 +- .../apache/cassandra/repair/RepairResult.java | 2 +- .../cassandra/repair/RepairSession.java | 4 +- .../cassandra/schema/DistributedSchema.java | 15 + .../cassandra/schema/TableMetadata.java | 15 + .../apache/cassandra/schema/TableParams.java | 61 +- .../cassandra/service/StorageProxy.java | 84 +- .../cassandra/service/StorageService.java | 25 +- .../service/StorageServiceMBean.java | 4 - .../service/accord/AccordService.java | 41 +- .../service/accord/AccordTopology.java | 78 +- .../service/accord/IAccordService.java | 68 +- .../interop/AccordInteropExecution.java | 8 +- .../service/accord/txn/TxnQuery.java | 6 - .../service/consensus/TransactionalMode.java | 141 +++ .../migration/ConsensusKeyMigrationState.java | 7 +- .../migration/ConsensusMigratedAt.java | 70 ++ .../ConsensusMigrationRepairResult.java | 50 + .../ConsensusMigrationRepairType.java | 57 ++ .../migration/ConsensusMigrationState.java | 256 +++++ .../migration/ConsensusMigrationTarget.java | 60 ++ .../migration/ConsensusRequestRouter.java | 98 +- .../migration/ConsensusTableMigration.java | 337 +++++++ .../ConsensusTableMigrationState.java | 909 ------------------ .../migration/TableMigrationState.java | 360 +++++++ .../TransactionalMigrationFromMode.java | 84 ++ .../cassandra/service/paxos/PaxosPrepare.java | 3 +- .../reads/repair/BlockingReadRepair.java | 8 +- .../cassandra/streaming/StreamPlan.java | 8 +- .../apache/cassandra/tcm/ClusterMetadata.java | 70 +- .../apache/cassandra/tcm/MetadataKeys.java | 2 - .../tcm/StubClusterMetadataService.java | 6 +- .../apache/cassandra/tcm/Transformation.java | 11 +- .../tcm/compatibility/GossipHelper.java | 6 +- .../cassandra/tcm/ownership/AccordTables.java | 109 --- .../tcm/transformations/AddAccordTable.java | 91 -- .../tcm/transformations/AlterSchema.java | 85 +- ...ginConsensusMigrationForTableAndRange.java | 42 +- ...ishConsensusMigrationForTableAndRange.java | 67 +- .../SetConsensusMigrationTargetProtocol.java | 131 --- .../org/apache/cassandra/tools/NodeTool.java | 1 - .../nodetool/ConsensusMigrationAdmin.java | 23 - .../distributed/test/ReadRepairTest.java | 153 +-- .../test/ShortReadProtectionTest.java | 60 +- .../test/accord/AccordBootstrapTest.java | 4 +- .../test/accord/AccordCQLTest.java | 224 +++-- .../test/accord/AccordFeatureFlagTest.java | 62 +- .../test/accord/AccordInteropReadTest.java | 9 +- .../accord/AccordInteroperabilityTest.java | 5 +- .../test/accord/AccordMetricsTest.java | 5 +- .../test/accord/AccordMigrationTest.java | 167 +++- .../test/accord/AccordSimpleFastPathTest.java | 3 +- .../test/accord/AccordTestBase.java | 9 +- .../test/accord/NewSchemaTest.java | 2 +- .../test/log/ClusterMetadataTestHelper.java | 4 - .../test/tcm/AccordAddTableTest.java | 80 -- .../cassandra/audit/AuditLoggerTest.java | 2 +- .../apache/cassandra/auth/TxnAuthTest.java | 2 +- .../config/DatabaseDescriptorRefTest.java | 3 + .../cql3/NodeLocalConsistencyTest.java | 2 +- .../cql3/PreparedStatementsTest.java | 94 +- .../statements/DescribeStatementTest.java | 2 + .../cassandra/db/SchemaCQLHelperTest.java | 2 + .../cassandra/locator/MetaStrategyTest.java | 4 +- .../apache/cassandra/repair/FuzzTestBase.java | 6 +- .../cassandra/schema/FastPathSchemaTest.java | 14 +- .../schema/TransactionalConfigSchemaTest.java | 95 ++ .../accord/AccordCommandStoreTest.java | 6 +- .../service/accord/AccordCommandTest.java | 2 +- .../AccordConfigurationServiceTest.java | 2 +- .../service/accord/AccordReadRepairTest.java | 6 +- .../service/accord/AccordTopologyTest.java | 14 +- .../service/accord/api/AccordKeyTest.java | 4 +- .../service/accord/async/AsyncLoaderTest.java | 2 +- .../accord/async/AsyncOperationTest.java | 2 +- .../serializers/CommandSerializersTest.java | 2 +- .../CommandsForKeySerializerTest.java | 2 +- .../service/accord/txn/AccordUpdateTest.java | 2 +- .../ClusterMetadataTransformationTest.java | 2 - 91 files changed, 2590 insertions(+), 2252 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/consensus/TransactionalMode.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigratedAt.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java delete mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java delete mode 100644 src/java/org/apache/cassandra/tcm/ownership/AccordTables.java delete mode 100644 src/java/org/apache/cassandra/tcm/transformations/AddAccordTable.java delete mode 100644 src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java delete mode 100644 test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java create mode 100644 test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 9eb8a1d0be9f..697d7edc1e43 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -18,6 +18,8 @@ package org.apache.cassandra.config; +import org.apache.cassandra.service.consensus.TransactionalMode; + public class AccordSpec { public volatile boolean enabled = false; @@ -49,4 +51,22 @@ public class AccordSpec public volatile DurationSpec durability_txnid_lag = new DurationSpec.IntSecondsBound(5); public volatile DurationSpec shard_durability_cycle = new DurationSpec.IntMinutesBound(2); public volatile DurationSpec global_durability_cycle = new DurationSpec.IntMinutesBound(10); + + public enum TransactionalRangeMigration + { + auto, explicit + } + + /** + * Defines the behavior of range migration opt-in when changing transactional settings on a table. In auto, + * all ranges are marked as migrating and no additional user action is needed aside from running repairs. In + * explicit, no ranges are marked as migrating, and the user needs to explicitly mark ranges as migrating to + * the target transactional mode via nodetool. + */ + public volatile TransactionalRangeMigration range_migration = TransactionalRangeMigration.auto; + + /** + * default transactional mode for tables created by this node when no transactional mode has been specified in the DDL + */ + public TransactionalMode default_transactional_mode = TransactionalMode.off; } diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 3352ce9fed7c..45a8a61ce982 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -44,7 +44,6 @@ import org.apache.cassandra.io.sstable.format.big.BigFormat; import org.apache.cassandra.service.StartupChecks.StartupCheckType; import org.apache.cassandra.utils.StorageCompatibilityMode; -import org.apache.cassandra.service.accord.IAccordService; import static org.apache.cassandra.config.CassandraRelevantProperties.AUTOCOMPACTION_ON_STARTUP_ENABLED; import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_AVAILABLE_PROCESSORS; @@ -1167,8 +1166,6 @@ public enum PaxosOnLinearizabilityViolation public volatile boolean client_request_size_metrics_enabled = true; - public LWTStrategy lwt_strategy = LWTStrategy.migration; - public NonSerialWriteStrategy non_serial_write_strategy = NonSerialWriteStrategy.normal; public volatile int max_top_size_partition_count = 10; public volatile int max_top_tombstone_partition_count = 10; @@ -1387,118 +1384,6 @@ public enum TombstonesMetricGranularity cell } - /** - * How to pick a consensus protocol for CAS - * and serial read operations. Transaction statements - * will always run on Accord. Legacy in this context includes PaxosV2. - */ - public enum LWTStrategy - { - /** - * Allow both Accord and PaxosV1/V2 to run on the same cluster - * Some keys and ranges might be running on Accord if they - * have been migrated and the rest will run on Paxos until - * they are migrated. - */ - migration, - - /** - * Everything will be run on Accord. Useful for new deployments - * that don't want to accidentally start using legacy Paxos - * requiring migration to Accord. - */ - accord - } - - /* - * Configure how non-serial writes should be executed. For Accord transactions to function correctly - * when mixed with non-SERIAL writes it's necessary for the writes to occur through Accord. - * - * Accord will also use this configuration to determine what consistency level to perform its reads - * at since it will need to be able to read data written at non-SERIAL consistency levels. - * - * BlockingReadRepair will also use this configuration to determine how BRR mutations are applied. For migration - * and accord the BRR mutations will be applied as Accord transactions so that BRR doesn't expose Accord to - * uncommitted Accord data that is being RRed. This can occur when Accord has applied a transaction at some, but not - * all replica since Accord defaults to asynchronous commit. - * - * By routing repairs through Accord it is guaranteed that the Accord derived contents of the repair have already been applied at any - * replica where Accord applies the transaction. This also prevents BRR from breaking atomicity of Accord writes. - * - * If they are not written through Accord then reads through Accord will be required to occur at - * consistency level compatible with the non-serial writes preventing single replica reads from being performed - * by Accord. It will also require Accord to perform read repair of non-serial writes. - * - * Even then there is the potential for Accord to inconsistently execute transactions at different replicas - * because different coordinators for an Accord transaction may encounter different non-SERIAL write state and - * race to commit different outcomes for the transaction. - * - * This is different from Paxos because Paxos performs consensus on the actual values to be applied so recovery - * coordinators will always produce a consistent state when applying a transaction. Accord performs consensus on - * the execution order of transaction and different coordinators witnessing different states not managed by Accord - * can produce multiple outcomes for a transaction. - * - * // TODO (maybe): To safely migrate you would have to route all writes through Accord with the current implementation - * // We could do it by range instead in the migration version, but then we need to know when all in flight writes - * // are done before marking a range as migrated. Would waiting out the timeout be enough (timeout bugs!)? - */ - public enum NonSerialWriteStrategy - { - /* - * Execute writes through Cassandra via StorageProxy's normal write path. This can lead Accord to compute - * multiple outcomes for a transaction that depends on data written by non-SERIAL writes. - */ - normal(false, false, false), - /* - * Allow mixing of non-SERIAL writes and Accord, but still force BRR through Accord - */ - mixed(false, false, true), - /* - * Execute writes through Accord skipping StorageProxy's normal write path, but commit - * writes at the provided consistency level so they can be read via non-SERIAL consistency levels. - */ - migration(false, true, true), - /* - * Execute writes through Accord skipping StorageProxy's normal write path. Ignores the provided consistency level - * which makes Accord commit writes at ANY similar to Paxos with commit consistency level ANY. - */ - accord(true, true, true); - - public final boolean ignoresSuppliedConsistencyLevel; - public final boolean writesThroughAccord; - - public final boolean blockingReadRepairThroughAccord; - - NonSerialWriteStrategy(boolean ignoresSuppliedConsistencyLevel, boolean writesThroughAccord, boolean blockingReadRepairThroughAccord) - { - this.ignoresSuppliedConsistencyLevel = ignoresSuppliedConsistencyLevel; - this.writesThroughAccord = writesThroughAccord; - this.blockingReadRepairThroughAccord = blockingReadRepairThroughAccord; - } - - public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) - { - if (ignoresSuppliedConsistencyLevel) - return null; - - if (!IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) - throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for write/commit, supported are ANY, ONE, QUORUM, and ALL"); - - return consistencyLevel; - } - - public ConsistencyLevel readCLForStrategy(ConsistencyLevel consistencyLevel) - { - if (ignoresSuppliedConsistencyLevel) - return null; - - if (!IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(consistencyLevel)) - throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for read, supported are ONE, QUORUM, and SERIAL"); - - return consistencyLevel; - } - } - private static final Set SENSITIVE_KEYS = new HashSet() {{ add("client_encryption_options"); add("server_encryption_options"); diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 246df534e82e..9498e666c485 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -78,8 +78,6 @@ import org.apache.cassandra.auth.IRoleManager; import org.apache.cassandra.config.Config.CommitLogSync; import org.apache.cassandra.config.Config.DiskAccessMode; -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; import org.apache.cassandra.config.Config.PaxosOnLinearizabilityViolation; import org.apache.cassandra.config.Config.PaxosStatePurging; import org.apache.cassandra.db.ConsistencyLevel; @@ -120,6 +118,7 @@ import org.apache.cassandra.security.SSLFactory; import org.apache.cassandra.service.CacheService.CacheType; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.tcm.RegistrationStatus; import org.apache.cassandra.utils.FBUtilities; @@ -168,8 +167,8 @@ public class DatabaseDescriptor { - public static final String NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE = - "Cannot use lwt_strategy \"accord\" while Accord transactions are disabled."; + public static final String NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE = + "Cannot use lwt_strategy \"accord\" while Accord transactions are disabled."; static { @@ -968,8 +967,8 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m { // if consensusMigrationCacheSizeInMiB option was set to "auto" then size of the cache should be "min(1% of Heap (in MB), 50MB) consensusMigrationCacheSizeInMiB = (conf.consensus_migration_cache_size == null) - ? Math.min(Math.max(1, (int) (Runtime.getRuntime().totalMemory() * 0.01 / 1024 / 1024)), 50) - : conf.consensus_migration_cache_size.toMebibytes(); + ? Math.min(Math.max(1, (int) (Runtime.getRuntime().totalMemory() * 0.01 / 1024 / 1024)), 50) + : conf.consensus_migration_cache_size.toMebibytes(); if (consensusMigrationCacheSizeInMiB < 0) throw new NumberFormatException(); // to escape duplicating error message @@ -1166,14 +1165,6 @@ else if (conf.max_value_size.toMebibytes() >= 2048) // run audit logging options through sanitation and validation if (conf.audit_logging_options != null) setAuditLoggingOptions(conf.audit_logging_options); - - if (conf.lwt_strategy == LWTStrategy.accord) - { - if (!conf.accord.enabled) - throw new ConfigurationException(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); - if (conf.non_serial_write_strategy == Config.NonSerialWriteStrategy.normal) - throw new ConfigurationException("If Accord is used for LWTs then regular writes needs to be routed through Accord for interoperability by setting non_serial_write_strategy to \"accord\" or \"migration\""); - } } @VisibleForTesting @@ -3653,25 +3644,14 @@ public static boolean paxoTopologyRepairStrictEachQuorum() return conf.paxos_topology_repair_strict_each_quorum; } - // TODO (desired): This configuration should come out of TrM to force the cluster to agree on it - public static LWTStrategy getLWTStrategy() - { - return conf.lwt_strategy; - } - - public static void setLWTStrategy(LWTStrategy lwtStrategy) + public static AccordSpec.TransactionalRangeMigration getTransactionalRangeMigration() { - conf.lwt_strategy = lwtStrategy; + return conf.accord.range_migration; } - public static Config.NonSerialWriteStrategy getNonSerialWriteStrategy() + public static void setTransactionalRangeMigration(AccordSpec.TransactionalRangeMigration val) { - return conf.non_serial_write_strategy; - } - - public static void setNonSerialWriteStrategy(NonSerialWriteStrategy nonSerialWriteStrategy) - { - conf.non_serial_write_strategy = nonSerialWriteStrategy; + conf.accord.range_migration = Preconditions.checkNotNull(val); } public static int getAccordBarrierRetryAttempts() @@ -3694,6 +3674,11 @@ public static long getAccordRangeBarrierTimeoutNanos() return conf.accord.range_barrier_timeout.to(TimeUnit.NANOSECONDS); } + public static TransactionalMode defaultTransactionalMode() + { + return conf.accord.default_transactional_mode; + } + public static void setNativeTransportMaxRequestDataInFlightPerIpInBytes(long maxRequestDataInFlightInBytes) { if (maxRequestDataInFlightInBytes == -1) diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index b2edb6cc3366..bbfc333ca6f9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -36,7 +36,6 @@ import accord.api.Update; import accord.primitives.Txn; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.UpdateParameters; import org.apache.cassandra.cql3.conditions.ColumnCondition; @@ -502,7 +501,7 @@ public Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commi Update update = createUpdate(clientState, commitConsistencyLevel); // If the write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely - consistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().readCLForStrategy(consistencyLevel); + consistencyLevel = metadata.params.transactionalMode.readCLForStrategy(consistencyLevel); TxnRead read = TxnRead.createCasRead(readCommand, consistencyLevel); // In a CAS requesting only one key is supported and writes // can't be dependent on any data that is read (only conditions) @@ -514,7 +513,7 @@ private Update createUpdate(ClientState clientState, ConsistencyLevel commitCons { // Potentially ignore commit consistency level if non-SERIAL write strategy is Accord // since it is safe to match what non-SERIAL writes do - commitConsistencyLevel = DatabaseDescriptor.getNonSerialWriteStrategy().commitCLForStrategy(commitConsistencyLevel); + commitConsistencyLevel = metadata.params.transactionalMode.commitCLForStrategy(commitConsistencyLevel); return new TxnUpdate(createWriteFragments(clientState), createCondition(), commitConsistencyLevel); } diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index f1a8872f07b3..208b9590f4e5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -63,9 +63,11 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; @@ -78,13 +80,12 @@ import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; import static accord.primitives.Txn.Kind.EphemeralRead; import static accord.primitives.Txn.Kind.Read; -import static org.apache.cassandra.config.Config.NonSerialWriteStrategy.accord; -import static org.apache.cassandra.config.DatabaseDescriptor.getNonSerialWriteStrategy; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; @@ -314,6 +315,11 @@ Keys toKeys(SortedSet keySet) return new Keys(keySet); } + private static TransactionalMode transactionalModeForSingleKey(Keys keys) + { + return Schema.instance.getTableMetadata(((AccordRoutableKey) keys.get(0)).table()).params.transactionalMode; + } + @VisibleForTesting public Txn createTxn(ClientState state, QueryOptions options) { @@ -326,7 +332,8 @@ public Txn createTxn(ClientState state, QueryOptions options) List reads = createNamedReads(options, state, ImmutableMap.of(), keySet::add); Keys txnKeys = toKeys(keySet); TxnRead read = createTxnRead(reads, txnKeys, null); - Txn.Kind kind = txnKeys.size() == 1 && getNonSerialWriteStrategy() == accord ? EphemeralRead : Read; + Txn.Kind kind = txnKeys.size() == 1 && transactionalModeForSingleKey(txnKeys) == TransactionalMode.full + ? EphemeralRead : Read; return new Txn.InMemory(kind, txnKeys, read, TxnQuery.ALL, null); } else @@ -376,8 +383,6 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. Txn txn = createTxn(state.getClientState(), options); - AccordService.instance().maybeConvertTablesToAccord(txn); - TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); if (txnResult.kind() == retry_new_protocol) throw new IllegalStateException("Transaction statement should never be required to switch consensus protocols"); diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java index 0282cbd40943..115a6a3374f1 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java @@ -40,6 +40,7 @@ import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.accord.AccordTopology; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.Event.SchemaChange; @@ -199,6 +200,9 @@ public ResultMessage execute(QueryState state) if (null != user && !user.isAnonymous()) createdResources(diff).forEach(r -> grantPermissionsOnResource(r, user)); + // if the changes affected accord, wait for accord to apply them + AccordTopology.awaitTopologyReadiness(diff, result.epoch); + return new ResultMessage.SchemaChange(schemaChangeEvent(diff)); } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index 94120ac63ce1..2a8cca87fcac 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -59,12 +59,14 @@ import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; import org.apache.cassandra.schema.MemtableParams; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.schema.Views; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; @@ -83,6 +85,8 @@ public abstract class AlterTableStatement extends AlterSchemaStatement { + private static final Logger logger = LoggerFactory.getLogger(AlterTableStatement.class); + protected final String tableName; private final boolean ifExists; protected ClientState state; @@ -583,6 +587,42 @@ public void validate(ClientState state) validateDefaultTimeToLive(attrs.asNewTableParams()); } + private TableParams validateAndUpdateTransactionalMigration(TableParams prev, TableParams next) + { + if (next.transactionalMode.accordIsEnabled && SchemaConstants.isSystemKeyspace(keyspaceName)) + throw ire("Cannot enable accord on system tables (%s.%s)", keyspaceName, tableName); + + boolean modeChange = prev.transactionalMode != next.transactionalMode; + boolean wasMigrating = prev.transactionalMigrationFrom.isMigrating(); + boolean forceMigrationChange = prev.transactionalMigrationFrom != next.transactionalMigrationFrom; + + if (modeChange && next.transactionalMode.accordIsEnabled && !DatabaseDescriptor.getAccordTransactionsEnabled()) + throw ire(format("Cannot change transactional mode to %s for %s.%s with accord_transactions_enabled set to false", + next.transactionalMode, keyspaceName, tableName)); + + // user is manually updating migration mode, don't interfere + if (forceMigrationChange) + { + logger.warn("Forcing unsafe migration change from {} to {} with transaction mode {}", prev.transactionalMigrationFrom, next.transactionalMigrationFrom, next.transactionalMode); + return next; + } + + if (!modeChange) + return next; + + // if the user is trying to revert to the mode being migrated from, allow it. The migration states will be inverted when + // the transformation is applied. Otherwise throw + if (wasMigrating && next.transactionalMode != prev.transactionalMigrationFrom.from) + throw ire(format("Cannot change transactional mode from %s to %s for %s.%s before transactional migration has completed", + prev.transactionalMode, next.transactionalMode, + keyspaceName, tableName)); + + // set table to migrating + TransactionalMigrationFromMode migrateFrom = TransactionalMigrationFromMode.fromMode(prev.transactionalMode, next.transactionalMode); + return next.unbuild().transactionalMigrationFrom(migrateFrom).build(); + } + + public KeyspaceMetadata apply(Epoch epoch, KeyspaceMetadata keyspace, TableMetadata table, ClusterMetadata metadata) { attrs.validate(); @@ -610,6 +650,8 @@ public KeyspaceMetadata apply(Epoch epoch, KeyspaceMetadata keyspace, TableMetad if (!params.compression.isEnabled()) Guardrails.uncompressedTablesEnabled.ensureEnabled(state); + params = validateAndUpdateTransactionalMigration(table.params, params); + return keyspace.withSwapped(keyspace.tables.withSwapped(table.withSwapped(params))); } } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java index b8e51d1286e5..711af0c7ee40 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java @@ -31,6 +31,7 @@ import org.apache.cassandra.auth.DataResource; import org.apache.cassandra.auth.IResource; import org.apache.cassandra.auth.Permission; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.*; import org.apache.cassandra.cql3.constraints.ColumnConstraints; import org.apache.cassandra.cql3.functions.masking.ColumnMask; @@ -46,6 +47,7 @@ import org.apache.cassandra.transport.Event.SchemaChange.Change; import org.apache.cassandra.transport.Event.SchemaChange.Target; +import static java.lang.String.format; import static java.util.Comparator.comparing; import static com.google.common.collect.Iterables.concat; @@ -145,6 +147,16 @@ public Keyspaces apply(ClusterMetadata metadata) if (!table.params.compression.isEnabled()) Guardrails.uncompressedTablesEnabled.ensureEnabled(state); + if (table.params.transactionalMode.accordIsEnabled && SchemaConstants.isSystemKeyspace(keyspaceName)) + throw ire("Cannot enable accord on system tables (%s.%s)", keyspaceName, tableName); + + if (table.params.transactionalMode.accordIsEnabled && !DatabaseDescriptor.getAccordTransactionsEnabled()) + throw ire(format("Cannot create table %s.%s with transactional mode %s with accord.enabled set to false", + keyspaceName, tableName, table.params.transactionalMode)); + + if (table.params.transactionalMigrationFrom.isMigrating()) + throw ire("Cannot set transactional migration on new tables (%s.%s), %s", keyspaceName, tableName, table.params.transactionalMigrationFrom); + return schema.withAddedOrUpdated(keyspace.withSwapped(keyspace.tables.with(table))); } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index eb1891862858..eb9c4927984c 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; @@ -34,6 +35,8 @@ import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.schema.TableParams.Option; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -64,7 +67,10 @@ public void validate() TableParams asNewTableParams() { - return build(TableParams.builder()); + TableParams.Builder builder = TableParams.builder(); + if (!hasOption(TRANSACTIONAL_MODE)) + builder.transactionalMode(DatabaseDescriptor.defaultTransactionalMode()); + return build(builder); } TableParams asAlteredTableParams(TableParams previous) @@ -165,6 +171,12 @@ private TableParams build(TableParams.Builder builder) } } + if (hasOption(Option.TRANSACTIONAL_MODE)) + builder.transactionalMode(TransactionalMode.fromString(getString(Option.TRANSACTIONAL_MODE))); + + if (hasOption(Option.TRANSACTIONAL_MIGRATION_FROM)) + builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); + return builder.build(); } diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 8f155773c8a9..64015fec484c 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -136,8 +136,6 @@ import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeInternalWithNowInSec; import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; import static org.apache.cassandra.gms.ApplicationState.DC; import static org.apache.cassandra.gms.ApplicationState.HOST_ID; import static org.apache.cassandra.gms.ApplicationState.INTERNAL_ADDRESS_AND_PORT; @@ -146,6 +144,8 @@ import static org.apache.cassandra.gms.ApplicationState.RELEASE_VERSION; import static org.apache.cassandra.gms.ApplicationState.STATUS_WITH_PORT; import static org.apache.cassandra.gms.ApplicationState.TOKENS; +import org.apache.cassandra.service.consensus.migration.ConsensusMigratedAt; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import static org.apache.cassandra.service.paxos.Commit.latest; import static org.apache.cassandra.service.snapshot.SnapshotOptions.systemSnapshot; import static org.apache.cassandra.utils.CassandraVersion.NULL_VERSION; diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 99ef8e96b3f7..e75b6be26944 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -246,7 +246,7 @@ public void finished() checkNotNull(minVersion, "Unable to determine minimum cluster version"); IAccordService accordService = AccordService.instance(); if (session.streamOperation().requiresBarrierTransaction() - && accordService.isAccordManagedTable(cfs.getTableId()) + && cfs.metadata().isAccordEnabled() && CassandraVersion.CASSANDRA_5_0.compareTo(minVersion) >= 0) accordService.postStreamReceivingBarrier(cfs, ranges); diff --git a/src/java/org/apache/cassandra/repair/AccordRepairJob.java b/src/java/org/apache/cassandra/repair/AccordRepairJob.java index 8db43b46b33c..d82e4407fa15 100644 --- a/src/java/org/apache/cassandra/repair/AccordRepairJob.java +++ b/src/java/org/apache/cassandra/repair/AccordRepairJob.java @@ -33,7 +33,7 @@ import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairResult; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; diff --git a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java index e2d373c58739..7662907ad9da 100644 --- a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java +++ b/src/java/org/apache/cassandra/repair/CassandraRepairJob.java @@ -50,7 +50,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairResult; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tcm.ClusterMetadata; diff --git a/src/java/org/apache/cassandra/repair/RepairResult.java b/src/java/org/apache/cassandra/repair/RepairResult.java index 4899448c7196..6c04f6be0760 100644 --- a/src/java/org/apache/cassandra/repair/RepairResult.java +++ b/src/java/org/apache/cassandra/repair/RepairResult.java @@ -19,7 +19,7 @@ import java.util.List; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairResult; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairResult; /** * RepairJob's result diff --git a/src/java/org/apache/cassandra/repair/RepairSession.java b/src/java/org/apache/cassandra/repair/RepairSession.java index ae46877a6b39..d98cc6141e0b 100644 --- a/src/java/org/apache/cassandra/repair/RepairSession.java +++ b/src/java/org/apache/cassandra/repair/RepairSession.java @@ -59,7 +59,7 @@ import org.apache.cassandra.repair.state.SessionState; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.FBUtilities; @@ -353,7 +353,7 @@ public void start(ExecutorPlus executor) new AccordRepairJob(this, cfname) : new CassandraRepairJob(this, cfname); // Repairs can drive forward progress for consensus migration so always check - job.addCallback(ConsensusTableMigrationState.completedRepairJobHandler); + job.addCallback(ConsensusTableMigration.completedRepairJobHandler); state.register(job.state); executor.execute(job); jobs.add(job); diff --git a/src/java/org/apache/cassandra/schema/DistributedSchema.java b/src/java/org/apache/cassandra/schema/DistributedSchema.java index e4eead15b5ea..17f4d33ccb93 100644 --- a/src/java/org/apache/cassandra/schema/DistributedSchema.java +++ b/src/java/org/apache/cassandra/schema/DistributedSchema.java @@ -30,6 +30,7 @@ import java.util.UUID; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.config.DatabaseDescriptor; @@ -79,10 +80,18 @@ public static DistributedSchema first(Set knownDatacenters) return new DistributedSchema(Keyspaces.of(DistributedMetadataLogKeyspace.initialMetadata(knownDatacenters)), Epoch.FIRST); } + private static ImmutableMap keyspacesToTableMap(Keyspaces keyspaces) + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + keyspaces.forEach(ksm -> ksm.tablesAndViews().forEach(tbl -> builder.put(tbl.id, tbl))); + return builder.build(); + } + private final Keyspaces keyspaces; private final Epoch epoch; private final UUID version; private final Map keyspaceInstances = new HashMap<>(); + private final transient ImmutableMap tables; public DistributedSchema(Keyspaces keyspaces) { @@ -95,6 +104,7 @@ public DistributedSchema(Keyspaces keyspaces, Epoch epoch) this.keyspaces = keyspaces; this.epoch = epoch; this.version = new UUID(0, epoch.getEpoch()); + this.tables = keyspacesToTableMap(keyspaces); validate(); } @@ -120,6 +130,11 @@ public KeyspaceMetadata getKeyspaceMetadata(String keyspace) return keyspaces.get(keyspace).get(); } + public TableMetadata getTableMetadata(TableId id) + { + return tables.get(id); + } + public static DistributedSchema fromSystemTables(Keyspaces keyspaces, Set knownDatacenters) { if (!keyspaces.containsKeyspace(SchemaConstants.METADATA_KEYSPACE_NAME)) diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 88a3f80d0b8a..e78e159995a6 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -361,6 +361,21 @@ public boolean isStaticCompactTable() return false; } + public boolean isAccordEnabled() + { + return params.transactionalMode.accordIsEnabled; + } + + public boolean migratingFromAccord() + { + return params.transactionalMigrationFrom.migratingFromAccord(); + } + + public boolean requiresAccordSupport() + { + return isAccordEnabled() || migratingFromAccord(); + } + public ImmutableCollection columns() { return columns.values(); diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 40614f65ea52..8cdc41f6871d 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -33,6 +33,8 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.service.reads.PercentileSpeculativeRetryPolicy; @@ -71,7 +73,9 @@ public enum Option CRC_CHECK_CHANCE, CDC, READ_REPAIR, - FAST_PATH; + FAST_PATH, + TRANSACTIONAL_MODE, + TRANSACTIONAL_MIGRATION_FROM; @Override public String toString() @@ -100,6 +104,8 @@ public String toString() public final boolean cdc; public final ReadRepairStrategy readRepair; public final FastPathStrategy fastPath; + public final TransactionalMode transactionalMode; + public final TransactionalMigrationFromMode transactionalMigrationFrom; private TableParams(Builder builder) { @@ -125,6 +131,8 @@ private TableParams(Builder builder) cdc = builder.cdc; readRepair = builder.readRepair; fastPath = builder.fastPath; + transactionalMode = builder.transactionalMode != null ? builder.transactionalMode : TransactionalMode.off; + transactionalMigrationFrom = builder.transactionalMigrationFrom; } public static Builder builder() @@ -153,7 +161,9 @@ public static Builder builder(TableParams params) .extensions(params.extensions) .cdc(params.cdc) .readRepair(params.readRepair) - .fastPath(params.fastPath); + .fastPath(params.fastPath) + .transactionalMode(params.transactionalMode) + .transactionalMigrationFrom(params.transactionalMigrationFrom); } public Builder unbuild() @@ -245,7 +255,9 @@ public boolean equals(Object o) && extensions.equals(p.extensions) && cdc == p.cdc && readRepair == p.readRepair - && fastPath.equals(fastPath); + && fastPath.equals(fastPath) + && transactionalMode == p.transactionalMode + && transactionalMigrationFrom == p.transactionalMigrationFrom; } @Override @@ -270,7 +282,9 @@ public int hashCode() extensions, cdc, readRepair, - fastPath); + fastPath, + transactionalMode, + transactionalMigrationFrom); } @Override @@ -298,6 +312,8 @@ public String toString() .add(CDC.toString(), cdc) .add(READ_REPAIR.toString(), readRepair) .add(Option.FAST_PATH.toString(), fastPath) + .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) + .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .toString(); } @@ -348,8 +364,17 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) .append("AND min_index_interval = ").append(minIndexInterval) .newLine() .append("AND read_repair = ").appendWithSingleQuotes(readRepair.toString()) - .newLine() - .append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); + .newLine(); + + if (!isView) + { + builder.append("AND transactional_mode = ").appendWithSingleQuotes(transactionalMode.toString()) + .newLine() + .append("AND transactional_migration_from = ").appendWithSingleQuotes(transactionalMigrationFrom.toString()) + .newLine(); + } + + builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); } public static final class Builder @@ -374,6 +399,8 @@ public static final class Builder private boolean cdc; private ReadRepairStrategy readRepair = ReadRepairStrategy.BLOCKING; private FastPathStrategy fastPath = FastPathStrategy.inheritKeyspace(); + private TransactionalMode transactionalMode = TransactionalMode.off; + public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public Builder() { @@ -498,6 +525,18 @@ public Builder fastPath(FastPathStrategy val) return this; } + public Builder transactionalMode(TransactionalMode val) + { + transactionalMode = val; + return this; + } + + public Builder transactionalMigrationFrom(TransactionalMigrationFromMode val) + { + transactionalMigrationFrom = val; + return this; + } + public Builder extensions(Map val) { extensions = ImmutableMap.copyOf(val); @@ -534,6 +573,8 @@ public void serialize(TableParams t, DataOutputPlus out, Version version) throws { out.writeBoolean(t.allowAutoSnapshot); out.writeBoolean(t.incrementalBackups); + out.writeInt(t.transactionalMode.ordinal()); + out.writeInt(t.transactionalMigrationFrom.ordinal()); } } @@ -559,7 +600,9 @@ public TableParams deserialize(DataInputPlus in, Version version) throws IOExcep .cdc(in.readBoolean()) .readRepair(ReadRepairStrategy.fromString(in.readUTF())) .allowAutoSnapshot(!version.isAtLeast(Version.V4) || in.readBoolean()) - .incrementalBackups(!version.isAtLeast(Version.V4) || in.readBoolean()); + .incrementalBackups(!version.isAtLeast(Version.V4) || in.readBoolean()) + .transactionalMode(version.isAtLeast(Version.V4) ? TransactionalMode.fromOrdinal(in.readInt()) : TransactionalMode.off) + .transactionalMigrationFrom(version.isAtLeast(Version.V4) ? TransactionalMigrationFromMode.fromOrdinal(in.readInt()) : TransactionalMigrationFromMode.off); return builder.build(); } @@ -584,7 +627,9 @@ public long serializedSize(TableParams t, Version version) sizeof(t.cdc) + sizeof(t.readRepair.name()) + (version.isAtLeast(Version.V4) ? sizeof(t.allowAutoSnapshot) : 0) + - (version.isAtLeast(Version.V4) ? sizeof(t.incrementalBackups) : 0); + (version.isAtLeast(Version.V4) ? sizeof(t.incrementalBackups) : 0) + + (version.isAtLeast(Version.V4) ? sizeof(t.transactionalMode.ordinal()) : 0) + + (version.isAtLeast(Version.V4) ? sizeof(t.transactionalMigrationFrom.ordinal()) : 0); } private void serializeMap(Map map, DataOutputPlus out) throws IOException diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 4b5fad2d2259..dc78f995316a 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -42,6 +42,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import com.google.common.base.Preconditions; import com.google.common.cache.CacheLoader; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.Uninterruptibles; @@ -57,7 +58,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.Config; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; @@ -131,7 +132,6 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; @@ -175,8 +175,6 @@ import static com.google.common.collect.Iterables.concat; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; -import static org.apache.cassandra.config.Config.NonSerialWriteStrategy.accord; -import static org.apache.cassandra.config.DatabaseDescriptor.getNonSerialWriteStrategy; import static org.apache.cassandra.db.ConsistencyLevel.SERIAL; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casWriteMetrics; @@ -383,7 +381,6 @@ public static RowIterator cas(String keyspaceName, clientState, nowInSeconds); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertTablesToAccord(txn); TxnResult txnResult = accordService.coordinate(txn, consistencyForPaxos, requestTime); @@ -1187,6 +1184,54 @@ public static void mutateMV(ByteBuffer dataKey, Collection mutations, } } + private static ConsistencyLevel consistencyLevelForCommit(Collection mutations, ConsistencyLevel consistencyLevel) + { + ConsistencyLevel result = null; + for (IMutation mutation : mutations) + { + for (TableId tableId : mutation.getTableIds()) + { + TransactionalMode mode = Schema.instance.getTableMetadata(tableId).params.transactionalMode; + ConsistencyLevel commitCL = mode.commitCLForStrategy(consistencyLevel); + if (result == null || commitCL.compareTo(result) > 0) + result = commitCL; + } + } + return result; + } + + private static boolean writesThroughAccord(List mutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + boolean accordWrite = false; + boolean normalWrite = false; + for (int i=0,mi=mutations.size(); i mutations, ConsistencyLevel consistencyLevel, @@ -1220,12 +1265,15 @@ public static void mutateWithTriggers(List mutations, .viewManager .updatesAffectView(mutations, true); + long size = IMutation.dataSize(mutations); writeMetrics.mutationSize.update(size); writeMetricsForLevel(consistencyLevel).mutationSize.update(size); - NonSerialWriteStrategy nonSerialWriteStrategy = getNonSerialWriteStrategy(); - if (nonSerialWriteStrategy.writesThroughAccord && !SchemaConstants.getSystemKeyspaces().contains(keyspaceName)) - mutateWithAccord(augmented != null ? augmented : mutations, consistencyLevel, requestTime, nonSerialWriteStrategy); + if (writesThroughAccord(mutations, consistencyLevel, requestTime)) + { + Preconditions.checkState(!SchemaConstants.getSystemKeyspaces().contains(keyspaceName)); + mutateWithAccord(augmented != null ? augmented : mutations, consistencyLevel, requestTime); + } else if (augmented != null) mutateAtomically(augmented, consistencyLevel, updatesView, requestTime); else @@ -1237,12 +1285,12 @@ else if (augmented != null) } } - private static void mutateWithAccord(Collection iMutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, Config.NonSerialWriteStrategy nonSerialWriteStrategy) + private static void mutateWithAccord(Collection mutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { int fragmentIndex = 0; - List fragments = new ArrayList<>(iMutations.size()); - List partitionKeys = new ArrayList<>(iMutations.size()); - for (IMutation mutation : iMutations) + List fragments = new ArrayList<>(mutations.size()); + List partitionKeys = new ArrayList<>(mutations.size()); + for (IMutation mutation : mutations) { for (PartitionUpdate update : mutation.getPartitionUpdates()) { @@ -1252,11 +1300,10 @@ private static void mutateWithAccord(Collection iMutations, } } // Potentially ignore commit consistency level if the strategy specifies accord and not migration - ConsistencyLevel clForCommit = nonSerialWriteStrategy.commitCLForStrategy(consistencyLevel); - AccordUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit); + ConsistencyLevel clForCommit = consistencyLevelForCommit(mutations, consistencyLevel); + TxnUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit); Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.EMPTY, TxnQuery.EMPTY, update); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertTablesToAccord(txn); accordService.coordinate(txn, consistencyLevel, requestTime); } @@ -2002,13 +2049,12 @@ private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand. SinglePartitionReadCommand readCommand = group.queries.get(0); // If the non-SERIAL write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely - NonSerialWriteStrategy nonSerialWriteStrategy = getNonSerialWriteStrategy(); - consistencyLevel = nonSerialWriteStrategy.readCLForStrategy(consistencyLevel); + TransactionalMode transactionalMode = group.metadata().params.transactionalMode; + consistencyLevel = transactionalMode.readCLForStrategy(consistencyLevel); TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); Invariants.checkState(read.keys().size() == 1, "Ephemeral reads are only strict-serializable for single partition reads"); - Txn txn = new Txn.InMemory(nonSerialWriteStrategy == accord ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); + Txn txn = new Txn.InMemory(transactionalMode == TransactionalMode.full ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); IAccordService accordService = AccordService.instance(); - accordService.maybeConvertTablesToAccord(txn); TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); if (txnResult.kind() == retry_new_protocol) return RETRY_NEW_PROTOCOL; diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index d473ae5f099e..ef4068e992e1 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -168,9 +168,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.schema.ViewMetadata; -import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.disk.usage.DiskUsageBroadcaster; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.service.paxos.PaxosCommit; @@ -259,8 +257,8 @@ import static org.apache.cassandra.service.StorageService.Mode.LEAVING; import static org.apache.cassandra.service.StorageService.Mode.MOVE_FAILED; import static org.apache.cassandra.service.StorageService.Mode.NORMAL; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.finishMigrationToConsensusProtocol; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.startMigrationToConsensusProtocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigration.finishMigrationToConsensusProtocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigration.startMigrationToConsensusProtocol; import static org.apache.cassandra.tcm.membership.NodeState.BOOTSTRAPPING; import static org.apache.cassandra.tcm.membership.NodeState.BOOT_REPLACING; import static org.apache.cassandra.tcm.membership.NodeState.JOINED; @@ -1696,18 +1694,6 @@ public List finishConsensusMigration(@Nonnull String keyspace, return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr)); } - @Override - public void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocol, - @Nullable List keyspaceNames, - @Nullable List maybeTableNames) - { - checkNotNull(targetProtocol, "targetProtocol is null"); - checkNotNull(keyspaceNames, "keyspaceNames is null"); - checkArgument(!keyspaceNames.contains(SchemaConstants.METADATA_KEYSPACE_NAME)); - - ConsensusTableMigrationState.setConsensusMigrationTargetProtocol(targetProtocol, keyspaceNames, Optional.ofNullable(maybeTableNames)); - } - @Override public String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @@ -4235,7 +4221,7 @@ public List getAccordManagedKeyspaces() { Keyspaces keyspaces = Schema.instance.getNonLocalStrategyKeyspaces(); return keyspaces.stream().flatMap(ks -> ks.tables.stream()) - .filter(tbm -> AccordService.instance().isAccordManagedTable(tbm.id)) + .filter(TableMetadata::requiresAccordSupport) .map(tbm -> tbm.keyspace) .distinct() .sorted() @@ -4245,10 +4231,9 @@ public List getAccordManagedKeyspaces() @Override public List getAccordManagedTables() { - // TODO (review) These are really just the ones Accord is aware of not necessarily managed Keyspaces keyspaces = Schema.instance.getNonLocalStrategyKeyspaces(); return keyspaces.stream().flatMap(ks -> ks.tables.stream()) - .filter(tbm -> AccordService.instance().isAccordManagedTable(tbm.id)) + .filter(TableMetadata::requiresAccordSupport) .map(tbm -> tbm.keyspace + '.' + tbm.name) .collect(toList()); } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index b4e28c3e9b68..c58205898d88 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1151,10 +1151,6 @@ List finishConsensusMigration(@Nonnull String keyspace, @Nullable List maybeTableNames, @Nullable String maybeRangesStr); - void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocol, - @Nullable List keyspaceNames, - @Nullable List maybeTableNames); - String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @Nonnull String format); List getAccordManagedKeyspaces(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index a730b62813c2..e831aa1fbdb5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -37,7 +37,7 @@ import org.apache.cassandra.cql3.statements.RequestValidations; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.transformations.AddAccordTable; +import org.apache.cassandra.service.accord.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,7 +74,6 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; -import org.apache.cassandra.exceptions.ExceptionCode; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; @@ -83,23 +82,16 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; -import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; -import org.apache.cassandra.service.accord.api.AccordScheduler; -import org.apache.cassandra.service.accord.api.AccordTopologySorter; -import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.accord.txn.TxnResult; -import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.AsyncPromise; @@ -206,20 +198,11 @@ public Future epochReady(Epoch epoch) @Override public void receive(Message> message) {} - @Override - public boolean isAccordManagedTable(TableId keyspace) - { - return false; - } - @Override public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() { return Pair.create(new Int2ObjectHashMap<>(), DurableBefore.EMPTY); } - - @Override - public void ensureTableIsAccordManaged(TableId tableId) {} }; private static volatile IAccordService instance = null; @@ -676,28 +659,6 @@ public AccordConfigurationService configurationService() return configService; } - @Override - public boolean isAccordManagedTable(TableId tableId) - { - return ClusterMetadata.current().accordTables.contains(tableId); - } - - @Override - public void ensureTableIsAccordManaged(TableId tableId) - { - if (isAccordManagedTable(tableId)) - return; - ClusterMetadataService.instance().commit(new AddAccordTable(tableId), - metadata -> null, - (code, message) -> { - Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, - "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); - return null; - }); - // we need to avoid creating a txnId in an epoch when no one has any ranges - FBUtilities.waitOnFuture(AccordService.instance().epochReady(ClusterMetadata.current().epoch)); - } - @Override public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index 46b4d026bf0e..0814c322d5e7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -19,17 +19,20 @@ package org.apache.cassandra.service.accord; import java.util.*; -import java.util.function.Predicate; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; import accord.primitives.Ranges; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import accord.local.Node; import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -45,6 +48,9 @@ import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.VersionedEndpoints; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +import static java.util.concurrent.TimeUnit.MILLISECONDS; /** * Deterministically computes accord topology from a ClusterMetadata instance @@ -212,7 +218,7 @@ private static Map createDCMap(Directory directory) return builder.build(); } - public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, AccordFastPath accordFastPath, Predicate tablePredicate, ShardLookup lookup) + public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, AccordFastPath accordFastPath, ShardLookup lookup) { List shards = new ArrayList<>(); Set unavailable = accordFastPath.unavailableIds(); @@ -220,7 +226,7 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem for (KeyspaceMetadata keyspace : schema.getKeyspaces()) { - List tables = keyspace.tables.stream().filter(tbl -> tablePredicate.test(tbl.id)).collect(Collectors.toList()); + List tables = keyspace.tables.stream().filter(TableMetadata::requiresAccordSupport).collect(Collectors.toList()); if (tables.isEmpty()) continue; List ksShards = KeyspaceShard.forKeyspace(keyspace, placements, directory, lookup); @@ -231,19 +237,14 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); } - public static Topology createAccordTopology(ClusterMetadata metadata, Predicate tablePredicate, ShardLookup lookup) - { - return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, metadata.accordFastPath, tablePredicate, lookup); - } - - public static Topology createAccordTopology(ClusterMetadata metadata, Predicate tablePredicate) + public static Topology createAccordTopology(ClusterMetadata metadata, ShardLookup lookup) { - return createAccordTopology(metadata, tablePredicate, new ShardLookup()); + return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, metadata.accordFastPath, lookup); } public static Topology createAccordTopology(ClusterMetadata metadata, Topology current) { - return createAccordTopology(metadata, metadata.accordTables::contains, createShardLookup(current)); + return createAccordTopology(metadata, createShardLookup(current)); } public static Topology createAccordTopology(ClusterMetadata metadata) @@ -274,4 +275,59 @@ private static ShardLookup createShardLookup(Topology topology) topology.forEach(shard -> map.put(shard.range, shard)); return map; } + private static boolean hasAccordSchemaChange(TableMetadata before, TableMetadata after) + { + return after.requiresAccordSupport() && (before == null || !before.requiresAccordSupport()); + } + + private static boolean hasAccordSchemaChange(TableMetadata created) + { + return hasAccordSchemaChange(null, created); + } + + private static boolean hasAccordSchemaChange(Diff.Altered diff) + { + return hasAccordSchemaChange(diff.before, diff.after); + } + + private static boolean hasAccordSchemaChange(Keyspaces.KeyspacesDiff keyspacesDiff) + { + for (KeyspaceMetadata.KeyspaceDiff keyspaceDiff : keyspacesDiff.altered) + { + if (Iterables.any(keyspaceDiff.tables.created, AccordTopology::hasAccordSchemaChange)) + return true; + + if (Iterables.any(keyspaceDiff.tables.altered, AccordTopology::hasAccordSchemaChange)) + return true; + } + + return false; + } + + /** + * If an accord related schema change occurs, we need to wait until accord has processed them + * before unblocking the change + */ + public static void awaitTopologyReadiness(Keyspaces.KeyspacesDiff keyspacesDiff, Epoch epoch) + { + if (!AccordService.isSetup()) + return; + + if (!hasAccordSchemaChange(keyspacesDiff)) + return; + + try + { + AccordService.instance().epochReady(epoch).get(DatabaseDescriptor.getTransactionTimeout(MILLISECONDS), MILLISECONDS); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException | TimeoutException e) + { + throw new RuntimeException(e); + } + } + } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 9422df491638..b037acc7c5e8 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -18,16 +18,6 @@ package org.apache.cassandra.service.accord; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; -import javax.annotation.Nonnull; - -import com.google.common.collect.ImmutableSet; - import accord.api.BarrierType; import accord.local.DurableBefore; import accord.local.Node.Id; @@ -37,30 +27,32 @@ import accord.primitives.Seekables; import accord.primitives.Txn; import accord.topology.TopologyManager; +import com.google.common.collect.ImmutableSet; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; -import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; -import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; -import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.txn.TxnResult; -import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.tcm.transformations.AddAccordTable; -import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; +import javax.annotation.Nonnull; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; + public interface IAccordService { - Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); + Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.LOCAL_ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); Set SUPPORTED_READ_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL); IVerbHandler verbHandler(); @@ -112,52 +104,10 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List void receive(Message> message); - /** - * Temporary method to avoid double-streaming keyspaces - * @param tableId - * @return - */ - boolean isAccordManagedTable(TableId tableId); - /** * Fetch the redundnant befores for every command store */ Pair, DurableBefore> getRedundantBeforesAndDurableBefore(); default Id nodeId() { throw new UnsupportedOperationException(); } - - default void maybeConvertTablesToAccord(Txn txn) - { - Set allTables = new HashSet<>(); - Set newTables = new HashSet<>(); - txn.keys().forEach(key -> { - TableId table = key instanceof AccordRoutableKey ? ((AccordRoutableKey) key).table() : ((TokenRange) key).table(); - if (allTables.add(table) && !isAccordManagedTable(table)) - newTables.add(table); - }); - - if (newTables.isEmpty()) - return; - - for (TableId table : newTables) - AddAccordTable.addTable(table); - - // we need to avoid creating a txnId in an epoch when no one has any ranges - FBUtilities.waitOnFuture(epochReady(ClusterMetadata.current().epoch)); - - for (TableId table : allTables) - { - if (!isAccordManagedTable(table)) - throw new IllegalStateException(table + " is not an accord managed table"); - } - } - - void ensureTableIsAccordManaged(TableId tableId); - - default void ensureKeyspaceIsAccordManaged(String keyspace) - { - // TODO: remove when accord enabled is handled via schema - Keyspace ks = Keyspace.open(keyspace); - ks.getMetadata().tables.forEach(metadata -> ensureTableIsAccordManaged(metadata.id)); - } } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index eb8ddc5c39ee..bafb96b4db22 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -86,8 +86,8 @@ import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.UnrecoverableRepairUpdate; import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Dispatcher; @@ -262,9 +262,9 @@ private AsyncChain readChains() // because they haven't yet updated their cluster metadata. // It would be harmless to do the read, but we can respond faster skipping it // and getting the transaction on the correct protocol - TableMigrationState tms = ConsensusTableMigrationState.getTableMigrationState(command.metadata().id); + TableMigrationState tms = ConsensusTableMigration.getTableMigrationState(command.metadata().id); AccordClientRequestMetrics metrics = txn.kind().isWrite() ? accordWriteMetrics : accordReadMetrics; - if (ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(tms, command.partitionKey())) + if (ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(command.metadata(), tms, command.partitionKey())) { metrics.migrationSkippedReads.mark(); results.add(AsyncChains.success(TxnData.emptyPartition(fragment.txnDataName(), command))); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index 7afa75de16d8..defa96b554d8 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -32,8 +32,6 @@ import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.EmptyIterators; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.TypeSizes; @@ -154,10 +152,6 @@ public Result compute(TxnId txnId, Timestamp executeAt, Seekables keys, @N Epoch epoch = Epoch.create(executeAt.epoch()); if (transactionIsInMigratingOrMigratedRange(epoch, keys)) { - // Fail fast because we can't be sure where this request should really run or what was intended - if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) - throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); - if (txnId.isWrite()) ClientRequestsMetricsHolder.accordWriteMetrics.accordMigrationRejects.mark(); else diff --git a/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java new file mode 100644 index 000000000000..2bcaee3ce213 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus; + +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.utils.LocalizeString; + +/* + * Configure the transactional behavior of a table. Enables accord on a table and defines how it mixes with non-serial writes + * + * For Accord transactions to function correctly when mixed with non-SERIAL writes it's necessary for the writes to occur through Accord. + * + * Accord will also use this configuration to determine what consistency level to perform its reads + * at since it will need to be able to read data written at non-SERIAL consistency levels. + * + * BlockingReadRepair will also use this configuration to determine how BRR mutations are applied. For migration + * and accord the BRR mutations will be applied as Accord transactions so that BRR doesn't expose Accord to + * uncommitted Accord data that is being RRed. This can occur when Accord has applied a transaction at some, but not + * all replica since Accord defaults to asynchronous commit. + * + * By routing repairs through Accord it is guaranteed that the Accord derived contents of the repair have already been applied at any + * replica where Accord applies the transaction. This also prevents BRR from breaking atomicity of Accord writes. + * + * If they are not written through Accord then reads through Accord will be required to occur at + * consistency level compatible with the non-serial writes preventing single replica reads from being performed + * by Accord. It will also require Accord to perform read repair of non-serial writes. + * + * Even then there is the potential for Accord to inconsistently execute transactions at different replicas + * because different coordinators for an Accord transaction may encounter different non-SERIAL write state and + * race to commit different outcomes for the transaction. + * + * This is different from Paxos because Paxos performs consensus on the actual values to be applied so recovery + * coordinators will always produce a consistent state when applying a transaction. Accord performs consensus on + * the execution order of transaction and different coordinators witnessing different states not managed by Accord + * can produce multiple outcomes for a transaction. + * + * // TODO to safely migrate you would have to route all writes through Accord with the current implementation + * // We could do it by range instead in the migration version, but then we need to know when all in flight writes + * // are done before marking a range as migrated. Would waiting out the timeout be enough (timeout bugs!)? + */ +public enum TransactionalMode +{ + // Running on Paxos V1 or V2 with Accord disabled + off(false, false, false, false), + + /* + * Execute writes through Cassandra via StorageProxy's normal write path. This can lead Accord to compute + * multiple outcomes for a transaction that depends on data written by non-SERIAL writes. + */ + unsafe(true, false, false, false), + + /* + * Allow mixing of non-SERIAL writes and Accord, but still force BRR through Accord. + * This mode makes it safe to perform non-SERIAL or SERIAL reads of Accord data, but unsafe + * to write data that Accord may attempt to read. + */ + unsafe_writes(true, false, false, true), + + /* + * Execute writes through Accord skipping StorageProxy's normal write path, but commit + * writes at the provided consistency level so they can be read via non-SERIAL consistency levels. + * This mode makes it safe to read/write data that Accord will read/write. + */ + mixed_reads(true, false, true, true), + + /* + * Execute writes through Accord skipping StorageProxy's normal write path. Ignores the provided consistency level + * which makes Accord commit writes at ANY similar to Paxos with commit consistency level ANY. + */ + full(true, true, true, true); + + public final boolean accordIsEnabled; + public final boolean ignoresSuppliedConsistencyLevel; + public final boolean writesThroughAccord; + + public final boolean blockingReadRepairThroughAccord; + private final String cqlParam; + + TransactionalMode(boolean accordIsEnabled, boolean ignoresSuppliedConsistencyLevel, boolean writesThroughAccord, boolean blockingReadRepairThroughAccord) + { + this.accordIsEnabled = accordIsEnabled; + this.ignoresSuppliedConsistencyLevel = ignoresSuppliedConsistencyLevel; + this.writesThroughAccord = writesThroughAccord; + this.blockingReadRepairThroughAccord = blockingReadRepairThroughAccord; + this.cqlParam = String.format("transactional_mode = '%s'", LocalizeString.toLowerCaseLocalized(this.name())); + } + + public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) + { + if (ignoresSuppliedConsistencyLevel) + return null; + + if (!IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for write/commit, supported are ANY, ONE, QUORUM, and ALL"); + + return consistencyLevel; + } + + public ConsistencyLevel readCLForStrategy(ConsistencyLevel consistencyLevel) + { + if (ignoresSuppliedConsistencyLevel) + return null; + + if (!IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for read, supported are ONE, QUORUM, and SERIAL"); + + return consistencyLevel; + } + + public String asCqlParam() + { + return cqlParam; + } + + public static TransactionalMode fromOrdinal(int ordinal) + { + return values()[ordinal]; + } + + public static TransactionalMode fromString(String name) + { + return valueOf(LocalizeString.toLowerCaseLocalized(name)); + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java index d3651015c226..7a0bbaa1ffed 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java @@ -57,7 +57,6 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; import org.apache.cassandra.service.paxos.AbstractPaxosRepair.Failure; import org.apache.cassandra.service.paxos.AbstractPaxosRepair.Result; import org.apache.cassandra.service.paxos.PaxosRepair; @@ -70,9 +69,9 @@ import org.apache.cassandra.utils.UUIDSerializer; import static org.apache.cassandra.net.Verb.CONSENSUS_KEY_MIGRATION; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.paxos; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; + +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; + import static org.apache.cassandra.utils.Clock.Global.nanoTime; /** diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigratedAt.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigratedAt.java new file mode 100644 index 000000000000..2b995bdcc61c --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigratedAt.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.io.IOException; +import javax.annotation.Nullable; + +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.NullableSerializer; + +public class ConsensusMigratedAt +{ + public static final IVersionedSerializer serializer = NullableSerializer.wrap(new IVersionedSerializer() + { + @Override + public void serialize(ConsensusMigratedAt t, DataOutputPlus out, int version) throws IOException + { + Epoch.messageSerializer.serialize(t.migratedAtEpoch, out, version); + out.writeByte(t.migratedAtTarget.value); + } + + @Override + public ConsensusMigratedAt deserialize(DataInputPlus in, int version) throws IOException + { + Epoch migratedAtEpoch = Epoch.messageSerializer.deserialize(in, version); + ConsensusMigrationTarget target = ConsensusMigrationTarget.fromValue(in.readByte()); + return new ConsensusMigratedAt(migratedAtEpoch, target); + } + + @Override + public long serializedSize(ConsensusMigratedAt t, int version) + { + return TypeSizes.sizeof(ConsensusMigrationTarget.accord.value) + + Epoch.messageSerializer.serializedSize(t.migratedAtEpoch, version); + } + }); + + // Fields are not nullable when used for messaging + @Nullable + public final Epoch migratedAtEpoch; + + @Nullable + public final ConsensusMigrationTarget migratedAtTarget; + + public ConsensusMigratedAt(Epoch migratedAtEpoch, ConsensusMigrationTarget migratedAtTarget) + { + this.migratedAtEpoch = migratedAtEpoch; + this.migratedAtTarget = migratedAtTarget; + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java new file mode 100644 index 000000000000..9233667b5a50 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import org.apache.cassandra.tcm.Epoch; + +import static com.google.common.base.Preconditions.checkArgument; + +public class ConsensusMigrationRepairResult +{ + public final ConsensusMigrationRepairType type; + public final Epoch minEpoch; + + private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch minEpoch) + { + this.type = type; + this.minEpoch = minEpoch; + } + + public static ConsensusMigrationRepairResult fromCassandraRepair(Epoch minEpoch, boolean migrationEligibleRepair) + { + checkArgument(!migrationEligibleRepair || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); + if (migrationEligibleRepair) + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); + else + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); + } + + public static ConsensusMigrationRepairResult fromAccordRepair(Epoch minEpoch) + { + checkArgument(minEpoch.isAfter(Epoch.EMPTY), "Accord repairs should always occur at an Epoch"); + return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java new file mode 100644 index 000000000000..233682d07fb2 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import com.google.common.primitives.SignedBytes; + +import org.apache.cassandra.utils.LocalizeString; + +public enum ConsensusMigrationRepairType +{ + ineligible(0), + paxos(1), + accord(2); + + public final byte value; + + ConsensusMigrationRepairType(int value) + { + this.value = SignedBytes.checkedCast(value); + } + + public static ConsensusMigrationRepairType fromString(String repairType) + { + return ConsensusMigrationRepairType.valueOf(LocalizeString.toLowerCaseLocalized(repairType)); + } + + public static ConsensusMigrationRepairType fromValue(byte value) + { + switch (value) + { + default: + throw new IllegalArgumentException(value + " is not recognized"); + case 0: + return ConsensusMigrationRepairType.ineligible; + case 1: + return ConsensusMigrationRepairType.paxos; + case 2: + return ConsensusMigrationRepairType.accord; + } + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java new file mode 100644 index 000000000000..7364db38c00a --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MetadataValue; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.PojoToString; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.newHashMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; + +// TODO this will mostly go away once we can move TableMigrationState into the table schema +public class ConsensusMigrationState implements MetadataValue +{ + public static ConsensusMigrationState EMPTY = new ConsensusMigrationState(Epoch.EMPTY, ImmutableMap.of()); + @Nonnull + public final Map tableStates; + + public final Epoch lastModified; + + public ConsensusMigrationState(@Nonnull Epoch lastModified, @Nonnull Map tableStates) + { + checkNotNull(tableStates, "tableStates is null"); + checkNotNull(lastModified, "lastModified is null"); + this.lastModified = lastModified; + this.tableStates = ImmutableMap.copyOf(tableStates); + } + + public Map toMap(@Nullable Set keyspaceNames, @Nullable Set tableNames) + { + return ImmutableMap.of("lastModifiedEpoch", lastModified.getEpoch(), + "tableStates", tableStatesAsMaps(keyspaceNames, tableNames), + "version", PojoToString.CURRENT_VERSION); + } + + private List> tableStatesAsMaps(@Nullable Set keyspaceNames, + @Nullable Set tableNames) + { + ImmutableList.Builder> builder = ImmutableList.builder(); + for (TableMigrationState tms : tableStates.values()) + { + if (keyspaceNames != null && !keyspaceNames.contains(tms.keyspaceName)) + continue; + if (tableNames != null && !tableNames.contains(tms.tableName)) + continue; + builder.add(tms.toMap()); + } + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ConsensusMigrationState that = (ConsensusMigrationState) o; + return tableStates.equals(that.tableStates); + } + + public ConsensusMigrationState withReversedMigrations(Map tables, Epoch epoch) + { + if (tables.isEmpty()) + return this; + + ImmutableMap.Builder updated = ImmutableMap.builder(); + + tableStates.forEach((id, state) -> { + if (!tables.containsKey(id)) + updated.put(id, state); + }); + + tables.values().forEach(metadata -> { + TableMigrationState state = tableStates.get(metadata.id); + if (state != null) + updated.put(metadata.id, state.reverseMigration(ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode), epoch)); + }); + + return new ConsensusMigrationState(lastModified, updated.build()); + } + + private static void withRangesMigrating(Map current, ImmutableMap.Builder next, TableMetadata metadata, List> ranges, boolean overwrite) + { + TableMigrationState tableState; + ConsensusMigrationTarget target = ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode); + if (!overwrite && current.containsKey(metadata.id)) + { + tableState = current.get(metadata.id).withRangesMigrating(ranges, target); + } + else + { + tableState = new TableMigrationState(metadata.keyspace, metadata.name, metadata.id, target, ImmutableSet.of(), ImmutableMap.of(Epoch.EMPTY, ranges)); + } + next.put(metadata.id, tableState); + } + + private static void putUnchanged(Map current, ImmutableMap.Builder next, Set changed) + { + current.forEach((id, migrationState) -> { + if (!changed.contains(id)) + next.put(id, migrationState); + }); + } + + private static void putUnchanged(Map current, ImmutableMap.Builder next, Collection changed) + { + Set changedIds = changed.stream().map(TableMetadata::id).collect(Collectors.toSet()); + putUnchanged(current, next, changedIds); + } + + public ConsensusMigrationState withRangesMigrating(Collection tables, List> ranges, boolean overwrite) + { + ImmutableMap.Builder updated = ImmutableMap.builder(); + putUnchanged(tableStates, updated, tables); + tables.forEach(metadata -> withRangesMigrating(tableStates, updated, metadata, ranges, overwrite)); + return new ConsensusMigrationState(lastModified, updated.build()); + } + + public ConsensusMigrationState withMigrationsCompletedFor(Collection completed) + { + ImmutableMap.Builder updated = ImmutableMap.builder(); + putUnchanged(tableStates, updated, new HashSet<>(completed)); + for (Map.Entry entry : tableStates.entrySet()) + { + if (completed.contains(entry.getKey())) + continue; + updated.put(entry); + } + return new ConsensusMigrationState(lastModified, updated.build()); + } + + public ConsensusMigrationState withRangesRepairedAtEpoch(TableMetadata metadata, List> ranges, Epoch minEpoch) + { + TableMigrationState state = Preconditions.checkNotNull(tableStates.get(metadata.id)); + state = state.withRangesRepairedAtEpoch(ranges, minEpoch); + + if (state.hasMigratedFullTokenRange(metadata.partitioner)) + { + return withMigrationsCompletedFor(Collections.singleton(metadata.id)); + } + else + { + ImmutableMap.Builder updated = ImmutableMap.builder(); + putUnchanged(tableStates, updated, Collections.singleton(metadata.id)); + updated.put(metadata.id, state); + return new ConsensusMigrationState(lastModified, updated.build()); + } + + } + + public ConsensusMigrationState withMigrationsRemovedFor(Set removed) + { + ImmutableMap.Builder updated = ImmutableMap.builder(); + putUnchanged(tableStates, updated, removed); + return new ConsensusMigrationState(lastModified, updated.build()); + } + + @Override + public int hashCode() + { + return Objects.hash(tableStates); + } + + @Override + public ConsensusMigrationState withLastModified(Epoch epoch) + { + ImmutableMap.Builder newMap = ImmutableMap.builderWithExpectedSize(tableStates.size()); + tableStates.forEach((tableId, tableState) -> { + newMap.put(tableId, tableState.withReplacementForEmptyEpoch(epoch)); + }); + return new ConsensusMigrationState(epoch, newMap.build()); + } + + @Override + public Epoch lastModified() + { + return lastModified; + } + + public void validateAgainstSchema(DistributedSchema schema) + { + tableStates.forEach((id, migrationState) -> { + TableMetadata metadata = schema.getTableMetadata(id); + Preconditions.checkState(ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode).equals(migrationState.targetProtocol)); + }); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(ConsensusMigrationState consensusMigrationState, DataOutputPlus out, Version version) throws IOException + { + Epoch.serializer.serialize(consensusMigrationState.lastModified, out, version); + serializeMap(consensusMigrationState.tableStates, out, version, TableId.metadataSerializer, TableMigrationState.serializer); + } + + @Override + public ConsensusMigrationState deserialize(DataInputPlus in, Version version) throws IOException + { + Epoch lastModified = Epoch.serializer.deserialize(in, version); + Map tableMigrationStates = deserializeMap(in, version, TableId.metadataSerializer, TableMigrationState.serializer, newHashMap()); + return new ConsensusMigrationState(lastModified, tableMigrationStates); + } + + @Override + public long serializedSize(ConsensusMigrationState t, Version version) + { + return Epoch.serializer.serializedSize(t.lastModified, version) + + serializedMapSize(t.tableStates, version, TableId.metadataSerializer, TableMigrationState.serializer); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java new file mode 100644 index 000000000000..1e170f02f908 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import com.google.common.primitives.SignedBytes; + +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.LocalizeString; + +public enum ConsensusMigrationTarget +{ + paxos(0), + accord(1); + + public final byte value; + + ConsensusMigrationTarget(int value) + { + this.value = SignedBytes.checkedCast(value); + } + + public static ConsensusMigrationTarget fromString(String targetProtocol) + { + return ConsensusMigrationTarget.valueOf(LocalizeString.toLowerCaseLocalized(targetProtocol)); + } + + public static ConsensusMigrationTarget fromValue(byte value) + { + switch (value) + { + default: + throw new IllegalArgumentException(value + " is not recognized"); + case 0: + return paxos; + case 1: + return accord; + } + } + + public static ConsensusMigrationTarget fromTransactionalMode(TransactionalMode mode) + { + return mode.accordIsEnabled ? accord : paxos; + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java index ac63cc4bd95a..1188076c9024 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java @@ -22,16 +22,16 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.Range; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.ReplicaLayout; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -44,9 +44,8 @@ import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.accord; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV1; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.paxos; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; + +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; /** * Helper class to decide where to route a request that requires consensus, migrating a key if necessary @@ -77,32 +76,85 @@ public static void resetInstance() protected ConsensusRequestRouter() {} - public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull TableId tableId, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + ConsensusRoutingDecision decisionFor(TransactionalMode transactionalMode) { - // In accord mode there might be migration state in CM (unless cleanup gets added), but it doesn't - // matter. All other consensus protocols are not used. - if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) + if (transactionalMode.accordIsEnabled) return accord; - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); - if (cfs == null) + return pickPaxos(); + } + + private static TableMetadata metadata(ClusterMetadata cm, String keyspace, String table) + { + KeyspaceMetadata ksm = cm.schema.getKeyspaceMetadata(keyspace); + TableMetadata tbm = ksm != null ? ksm.getTableOrViewNullable(table) : null; + + if (tbm == null) + throw new IllegalStateException("Can't route consensus request to nonexistent CFS %s.%s".format(keyspace, table)); + + return tbm; + } + + public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull String keyspace, @Nonnull String table, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + ClusterMetadata cm = ClusterMetadata.current(); + TableMetadata metadata = metadata(cm, keyspace, table); + return routeAndMaybeMigrate(cm, metadata, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); + } + + public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull TableId tableId, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + ClusterMetadata cm = ClusterMetadata.current(); + TableMetadata metadata = cm.schema.getTableMetadata(tableId); + if (metadata == null) throw new IllegalStateException("Can't route consensus request for nonexistent table %s".format(tableId.toString())); - return routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + return routeAndMaybeMigrate(cm, metadata, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); } - protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + protected static boolean mayWriteThroughAccord(TableMetadata metadata) + { + return metadata.params.transactionalMode.writesThroughAccord || metadata.params.transactionalMigrationFrom.writesThroughAccord(); + } + + public boolean shouldWriteThroughAccordAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull TableId tableId, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) { ClusterMetadata cm = ClusterMetadata.current(); + TableMetadata metadata = cm.schema.getTableMetadata(tableId); + if (metadata == null) + throw new IllegalStateException("Can't route consensus request for nonexistent table %s".format(tableId.toString())); - TableMigrationState tms = cm.consensusMigrationState.tableStates.get(cfs.getTableId()); + if (!mayWriteThroughAccord(metadata)) + return false; + + consistencyLevel = consistencyLevel.isDatacenterLocal() ? ConsistencyLevel.LOCAL_SERIAL : ConsistencyLevel.SERIAL; + ConsensusRoutingDecision decision = routeAndMaybeMigrate(cm, metadata, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); + switch (decision) + { + case paxosV1: + case paxosV2: + return false; + case accord: + return true; + default: + throw new IllegalStateException("Unsupported consensus " + decision); + } + } + + protected ConsensusRoutingDecision routeAndMaybeMigrate(ClusterMetadata cm, @Nonnull TableMetadata tmd, @Nonnull DecoratedKey key, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + { + + if (!tmd.params.transactionalMigrationFrom.isMigrating()) + return decisionFor(tmd.params.transactionalMode); + + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tmd.id); if (tms == null) - return pickPaxos(); + return decisionFor(tmd.params.transactionalMigrationFrom.from); if (Range.isInNormalizedRanges(key.getToken(), tms.migratedRanges)) return pickMigrated(tms.targetProtocol); if (Range.isInNormalizedRanges(key.getToken(), tms.migratingRanges)) - return pickBasedOnKeyMigrationStatus(cm, tms, key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + return pickBasedOnKeyMigrationStatus(cm, tmd, tms, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); // It's not migrated so infer the protocol from the target return pickNotMigrated(tms.targetProtocol); @@ -112,10 +164,13 @@ protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey k * If the key was already migrated then we can pick the target protocol otherwise * we have to run a repair operation on the key to migrate it. */ - private static ConsensusRoutingDecision pickBasedOnKeyMigrationStatus(ClusterMetadata cm, TableMigrationState tms, DecoratedKey key, ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + private static ConsensusRoutingDecision pickBasedOnKeyMigrationStatus(ClusterMetadata cm, TableMetadata tmd, TableMigrationState tms, DecoratedKey key, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) { checkState(pickPaxos() != paxosV1, "Can't migrate from PaxosV1 to anything"); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tmd.id); + if (cfs == null) + throw new IllegalStateException("Can't route consensus request to nonexistent CFS %s.%s".format(tmd.keyspace, tmd.name)); // If it is locally replicated we can check our local migration state to see if it was already migrated EndpointsForToken naturalReplicas = ReplicaLayout.forNonLocalStrategyTokenRead(cm, cfs.keyspace.getMetadata(), key.getToken()); boolean isLocallyReplicated = naturalReplicas.lookup(FBUtilities.getBroadcastAddressAndPort()) != null; @@ -192,15 +247,18 @@ public boolean isKeyInMigratingOrMigratedRangeFromAccord(Epoch epoch, TableId ta { ClusterMetadata cm = ClusterMetadataService.instance().fetchLogFromCMS(epoch); TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tableId); - return isKeyInMigratingOrMigratedRangeFromAccord(tms, key); + return isKeyInMigratingOrMigratedRangeFromAccord(cm.schema.getTableMetadata(tableId), tms, key); } /* * A lightweight check against cluster metadata that doesn't check if the key has already been migrated * using local system table state. */ - public boolean isKeyInMigratingOrMigratedRangeFromAccord(TableMigrationState tms, DecoratedKey key) + public boolean isKeyInMigratingOrMigratedRangeFromAccord(TableMetadata metadata, TableMigrationState tms, DecoratedKey key) { + if (!metadata.params.transactionalMigrationFrom.isMigrating()) + return false; + // No state means no migration for this table if (tms == null) return false; diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java new file mode 100644 index 000000000000..62727a013399 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.util.*; +import java.util.function.Predicate; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.base.Predicates; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.FutureCallback; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.repair.RepairJobDesc; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.RepairResult; +import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.paxos.Paxos; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Collections.emptyList; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; + +/** + * Track and update the migration state of individual table and ranges within those tables + */ +public abstract class ConsensusTableMigration +{ + private static final Logger logger = LoggerFactory.getLogger(ConsensusTableMigration.class); + + public static final MetadataSerializer>> rangesSerializer = newListSerializer(Range.serializer); + + public static final FutureCallback completedRepairJobHandler = new FutureCallback() + { + @Override + public void onSuccess(@Nullable RepairResult repairResult) + { + checkNotNull(repairResult, "repairResult should not be null"); + ConsensusMigrationRepairResult migrationResult = repairResult.consensusMigrationRepairResult; + + // Need to repair both Paxos and base table state + // Could track them separately, but doesn't seem worth the effort + if (migrationResult.type == ConsensusMigrationRepairType.ineligible) + return; + + RepairJobDesc desc = repairResult.desc; + TableMetadata tm = Schema.instance.getTableMetadata(desc.keyspace, desc.columnFamily); + if (tm == null) + return; + TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tm.id); + if (tms == null || !Range.intersects(tms.migratingRanges, desc.ranges)) + return; + + if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.accord) + return; + if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.paxos) + return; + + ClusterMetadataService.instance().commit( + new MaybeFinishConsensusMigrationForTableAndRange( + desc.keyspace, desc.columnFamily, ImmutableList.copyOf(desc.ranges), + migrationResult.minEpoch, migrationResult.type)); + } + + @Override + public void onFailure(Throwable throwable) + { + // Only successes drive forward progress + } + }; + + private ConsensusTableMigration() {} + + public static @Nullable TableMigrationState getTableMigrationState(TableId tableId) + { + ClusterMetadata cm = ClusterMetadata.current(); + return cm.consensusMigrationState.tableStates.get(tableId); + } + // Used by callers to avoid looking up the TMS multiple times + public static @Nullable TableMigrationState getTableMigrationState(long epoch, TableId tableId) + { + ClusterMetadata cm = ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(epoch)); + return cm.consensusMigrationState.tableStates.get(tableId); + } + + public static void startMigrationToConsensusProtocol(@Nonnull String targetProtocolName, + @Nullable List keyspaceNames, + @Nonnull Optional> maybeTables, + @Nonnull Optional maybeRangesStr) + { + checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(targetProtocolName); + + if (keyspaceNames == null || keyspaceNames.isEmpty()) + { + keyspaceNames = ImmutableList.copyOf(StorageService.instance.getNonLocalStrategyKeyspaces()); + } + checkState(keyspaceNames.size() == 1 || !maybeTables.isPresent(), "Can't specify tables with multiple keyspaces"); + List ids = keyspacesAndTablesToTableIds(keyspaceNames, maybeTables); + + // TODO (review): should this perform the schema change to make these tables accord tables? + List tableIds = new ArrayList<>(); + for (TableId tableId : ids) + { + TableMetadata metadata = Schema.instance.getTableMetadata(tableId); + if (metadata == null || !metadata.params.transactionalMigrationFrom.isMigrating()) + continue; + TransactionalMode transactionalMode = metadata.params.transactionalMode; + if (!transactionalMode.writesThroughAccord && transactionalMode != TransactionalMode.unsafe_writes) + throw new IllegalStateException("non-SERIAL writes need to be routed through Accord before attempting migration, or enable mixed mode"); + tableIds.add(tableId); + } + + if (!Paxos.useV2()) + throw new IllegalStateException("Can't do any consensus migrations to/from PaxosV1, switch to V2 first"); + + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + Optional>> maybeParsedRanges = maybeRangesStr.map(rangesStr -> ImmutableList.copyOf(RepairOption.parseRanges(rangesStr, partitioner))); + Token minToken = partitioner.getMinimumToken(); + List> ranges = maybeParsedRanges.orElse(ImmutableList.of(new Range(minToken, minToken))); + + + ClusterMetadataService.instance().commit(new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tableIds)); + } + + public static List finishMigrationToConsensusProtocol(@Nonnull String keyspace, + @Nonnull Optional> maybeTables, + @Nonnull Optional maybeRangesStr) + { + checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + + Optional>> localKeyspaceRanges = Optional.of(ImmutableList.copyOf(StorageService.instance.getLocalReplicas(keyspace).onlyFull().ranges())); + List> ranges = maybeRangesToRanges(maybeRangesStr, localKeyspaceRanges); + Map allTableMigrationStates = ClusterMetadata.current().consensusMigrationState.tableStates; + List tableIds = keyspacesAndTablesToTableIds(ImmutableList.of(keyspace), maybeTables, Optional.of(allTableMigrationStates::containsKey)); + + checkState(tableIds.stream().allMatch(allTableMigrationStates::containsKey), "All tables need to be migrating"); + List tableMigrationStates = new ArrayList<>(); + tableIds.forEach(table -> { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(table); + if (cfs == null) + { + logger.warn("Table {} does not exist or was dropped", cfs); + return; + } + TableMigrationState tms = allTableMigrationStates.get(table); + if (tms == null) + { + logger.warn("Table {} does not have any migration state", cfs.name); + return; + } + if(!Range.intersects(ranges, tms.migratingRanges)) + { + logger.warn("Table {} with migrating ranges {} does not intersect with any requested ranges {}", cfs.name, tms.migratingRanges, ranges); + return; + } + tableMigrationStates.add(tms); + }); + + List migratingToAccord = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.accord).collect(toImmutableList()); + List migratingToPaxos = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.paxos).collect(toImmutableList());; + + Integer accordRepairCmd = finishMigrationToAccord(keyspace, migratingToAccord, ranges); + Integer paxosRepairCmd = finishMigrationToPaxos(keyspace, migratingToPaxos, ranges); + List result = new ArrayList<>(); + if (accordRepairCmd != null) + result.add(accordRepairCmd); + if (paxosRepairCmd != null) + result.add(paxosRepairCmd); + return result; + } + + private interface MigrationFinisher + { + Integer finish(Collection tables, List> ranges); + } + + private static Integer finishMigrationTo(String name, List tableMigrationStates, List> requestedRanges, MigrationFinisher migrationFinisher) + { + logger.info("Begin finish migration to {} for ranges {} and tables {}", name, requestedRanges, tableMigrationStates); + List> intersectingRanges = new ArrayList<>(); + tableMigrationStates.stream().map(TableMigrationState::migratingRanges).forEach(intersectingRanges::addAll); + intersectingRanges = Range.normalize(intersectingRanges); + intersectingRanges = Range.intersectionOfNormalizedRanges(intersectingRanges, requestedRanges); + if (intersectingRanges.isEmpty()) + { + logger.warn("No requested ranges {} intersect any migrating ranges in any table in keyspace {}"); + return null; + } + + // Repair requires that the ranges once again be grouped by the ranges provided originally which all + // fall within local range boundaries. This was already checked in maybeRangesToRanges. + List> intersectingRangesGrouped = new ArrayList<>(); + for (Range r : requestedRanges) + { + List> intersectionsForGroup = new ArrayList<>(); + for (Range intersectedRange : intersectingRanges) + intersectionsForGroup.addAll(r.intersectionWith(intersectedRange)); + intersectingRangesGrouped.addAll(normalize(intersectionsForGroup)); + } + return migrationFinisher.finish(tableMigrationStates, intersectingRangesGrouped); + } + + /* + * This is basically just invoking classic Cassandra repair and is pretty redundant with invoking repair + * directly which would also work without issue. It's include so the same interface works for both migrating to/from + * Accord, but it's not great in that repair has a lot of options that might need to be forwarded. + * + * Still maybe more valuable to put this layer of abstraction in so we can change how it works later and it's less + * tightly coupled with the Repair interface which is pretty orthogonal to consensus migration. + */ + private static Integer finishMigrationToAccord(String keyspace, List migratingToAccord, List> requestedRanges) + { + return finishMigrationTo("Accord", migratingToAccord, requestedRanges, (tables, intersectingRanges) -> { + RepairOption repairOption = getRepairOption(tables, intersectingRanges, false); + return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; + }); + } + + private static Integer finishMigrationToPaxos(String keyspace, List migratingToPaxos, List> requestedRanges) + { + return finishMigrationTo("Paxos", migratingToPaxos, requestedRanges, (tables, intersectingRanges) -> { + RepairOption repairOption = getRepairOption(tables, intersectingRanges, true); + return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; + }); + } + + + private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables) + { + return keyspacesAndTablesToTableIds(keyspaceNames, maybeTables, Optional.empty()); + } + + private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables, @Nonnull Optional> includeTable) + { + List tableIds = new ArrayList<>(); + for (String keyspaceName : keyspaceNames) + { + Optional> maybeTableIds = maybeTables.map(tableNames -> + tableNames + .stream() + .map(tableName -> { + TableMetadata tm = Schema.instance.getTableMetadata(keyspaceName, tableName); + if (tm == null) + throw new IllegalArgumentException("Unknown table %s.%s".format(keyspaceName, tableName)); + return tm.id; + }) + .collect(toImmutableList())); + tableIds.addAll( + maybeTableIds.orElseGet(() -> + Schema.instance.getKeyspaceInstance(keyspaceName).getColumnFamilyStores() + .stream() + .map(ColumnFamilyStore::getTableId) + .filter(includeTable.orElse(Predicates.alwaysTrue())) // Filter out non-migrating so they don't generate an error + .collect(toImmutableList()))); + } + return tableIds; + } + + @Nonnull + private static RepairOption getRepairOption(Collection tables, List> intersectingRanges, boolean accordRepair) + { + boolean primaryRange = false; + // TODO (review): Should disabling incremental repair be exposed for the Paxos repair in case someone explicitly does not do incremental repair? + boolean incremental = !accordRepair; + boolean trace = false; + int numJobThreads = 1; + boolean pullRepair = false; + boolean forceRepair = false; + boolean optimiseStreams = false; + boolean ignoreUnreplicatedKeyspaces = true; + boolean repairPaxos = !accordRepair; + boolean paxosOnly = false; + boolean dontPurgeTombstones = false; + RepairOption repairOption = new RepairOption(RepairParallelism.PARALLEL, primaryRange, incremental, trace, numJobThreads, intersectingRanges, pullRepair, forceRepair, PreviewKind.NONE, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); + tables.forEach(table -> repairOption.getColumnFamilies().add(table.tableName)); + return repairOption; + } + + + // Repair is restricted to local ranges, but manipulating CMS migration state doesn't need to be restricted + private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr) + { + return maybeRangesToRanges(maybeRangesStr, Optional.empty()); + } + + private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr, Optional>> restrictToRanges) + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + Optional>> maybeParsedRanges = maybeRangesStr.map(rangesStr -> ImmutableList.copyOf(RepairOption.parseRanges(rangesStr, partitioner))); + Token minToken = partitioner.getMinimumToken(); + List> defaultRanges = restrictToRanges.orElse(ImmutableList.of(new Range(minToken, minToken))); + List> ranges = maybeParsedRanges.orElse(defaultRanges); + checkArgument(ranges.stream().allMatch(range -> defaultRanges.stream().anyMatch(defaultRange -> defaultRange.contains(range))), + "If ranges are specified each range must be contained within a local range (" + defaultRanges + ") for this node to allow for precise repairs. Specified " + ranges); + return ranges; + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java deleted file mode 100644 index 62b03eefa98c..000000000000 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigrationState.java +++ /dev/null @@ -1,909 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.consensus.migration; - -import java.io.IOException; -import java.util.AbstractMap.SimpleEntry; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -import com.google.common.base.Predicates; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMap.Builder; -import com.google.common.collect.ImmutableSortedMap; -import com.google.common.primitives.SignedBytes; -import com.google.common.util.concurrent.FutureCallback; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.DecoratedKey; -import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.repair.RepairJobDesc; -import org.apache.cassandra.repair.RepairParallelism; -import org.apache.cassandra.repair.RepairResult; -import org.apache.cassandra.repair.messages.RepairOption; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.paxos.Paxos; -import org.apache.cassandra.streaming.PreviewKind; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.tcm.MetadataValue; -import org.apache.cassandra.tcm.serialization.MetadataSerializer; -import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; -import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; -import org.apache.cassandra.tcm.transformations.SetConsensusMigrationTargetProtocol; -import org.apache.cassandra.utils.LocalizeString; -import org.apache.cassandra.utils.NullableSerializer; -import org.apache.cassandra.utils.PojoToString; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.util.Collections.emptyList; -import static org.apache.cassandra.db.TypeSizes.sizeof; -import static org.apache.cassandra.dht.Range.intersectionOfNormalizedRanges; -import static org.apache.cassandra.dht.Range.normalize; -import static org.apache.cassandra.dht.Range.subtract; -import static org.apache.cassandra.dht.Range.subtractNormalizedRanges; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.reset; -import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; -import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; -import static org.apache.cassandra.utils.CollectionSerializers.newHashMap; -import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; -import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; -import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; -import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; -import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; - -/** - * Track and update the migration state of individual table and ranges within those tables - */ -public abstract class ConsensusTableMigrationState -{ - private static final Logger logger = LoggerFactory.getLogger(ConsensusTableMigrationState.class); - - public static final MetadataSerializer>> rangesSerializer = newListSerializer(Range.serializer); - - public static final FutureCallback completedRepairJobHandler = new FutureCallback() - { - @Override - public void onSuccess(@Nullable RepairResult repairResult) - { - checkNotNull(repairResult, "repairResult should not be null"); - ConsensusMigrationRepairResult migrationResult = repairResult.consensusMigrationRepairResult; - - // Need to repair both Paxos and base table state - // Could track them separately, but doesn't seem worth the effort - if (migrationResult.type == ConsensusMigrationRepairType.ineligible) - return; - - RepairJobDesc desc = repairResult.desc; - TableMetadata tm = Schema.instance.getTableMetadata(desc.keyspace, desc.columnFamily); - if (tm == null) - return; - TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tm.id); - if (tms == null || !Range.intersects(tms.migratingRanges, desc.ranges)) - return; - - if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.accord) - return; - if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.paxos) - return; - - logger.info("Repair {} is going to trigger migration completion for ranges {} and epoch {}", desc.sessionId, desc.ranges, migrationResult.minEpoch); - - ClusterMetadataService.instance().commit( - new MaybeFinishConsensusMigrationForTableAndRange( - desc.keyspace, desc.columnFamily, ImmutableList.copyOf(desc.ranges), - migrationResult.minEpoch, migrationResult.type)); - } - - @Override - public void onFailure(Throwable throwable) - { - // Only successes drive forward progress - } - }; - - public static void reset() - { - ClusterMetadata cm = ClusterMetadata.current(); - for (TableMigrationState tms : cm.consensusMigrationState.tableStates.values()) - setConsensusMigrationTargetProtocol("reset", - ImmutableList.of(tms.keyspaceName), - Optional.of(ImmutableList.of(tms.tableName))); - } - - public enum ConsensusMigrationRepairType - { - ineligible(0), - paxos(1), - accord(2); - - public final byte value; - - ConsensusMigrationRepairType(int value) - { - this.value = SignedBytes.checkedCast(value); - } - - public static ConsensusMigrationRepairType fromString(String repairType) - { - return ConsensusMigrationRepairType.valueOf(LocalizeString.toLowerCaseLocalized(repairType)); - } - - public static ConsensusMigrationRepairType fromValue(byte value) - { - switch (value) - { - default: - throw new IllegalArgumentException(value + " is not recognized"); - case 0: - return ConsensusMigrationRepairType.ineligible; - case 1: - return ConsensusMigrationRepairType.paxos; - case 2: - return ConsensusMigrationRepairType.accord; - } - } - } - - public enum ConsensusMigrationTarget - { - paxos(0), - accord(1), - reset(2); - - public final byte value; - - ConsensusMigrationTarget(int value) - { - this.value = SignedBytes.checkedCast(value); - } - - public static ConsensusMigrationTarget fromString(String targetProtocol) - { - return ConsensusMigrationTarget.valueOf(LocalizeString.toLowerCaseLocalized(targetProtocol)); - } - - public static ConsensusMigrationTarget fromValue(byte value) - { - switch (value) - { - default: - throw new IllegalArgumentException(value + " is not recognized"); - case 0: - return paxos; - case 1: - return accord; - case 2: - return reset; - } - } - } - - public static class ConsensusMigrationRepairResult - { - private final ConsensusMigrationRepairType type; - private final Epoch minEpoch; - - private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch minEpoch) - { - this.type = type; - this.minEpoch = minEpoch; - } - - public static ConsensusMigrationRepairResult fromCassandraRepair(Epoch minEpoch, boolean migrationEligibleRepair) - { - checkArgument(!migrationEligibleRepair || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); - if (migrationEligibleRepair) - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); - else - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); - } - - public static ConsensusMigrationRepairResult fromAccordRepair(Epoch minEpoch) - { - checkArgument(minEpoch.isAfter(Epoch.EMPTY), "Accord repairs should always occur at an Epoch"); - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); - } - } - - public static class ConsensusMigratedAt - { - public static final IVersionedSerializer serializer = NullableSerializer.wrap(new IVersionedSerializer() - { - @Override - public void serialize(ConsensusMigratedAt t, DataOutputPlus out, int version) throws IOException - { - Epoch.messageSerializer.serialize(t.migratedAtEpoch, out, version); - out.writeByte(t.migratedAtTarget.value); - } - - @Override - public ConsensusMigratedAt deserialize(DataInputPlus in, int version) throws IOException - { - Epoch migratedAtEpoch = Epoch.messageSerializer.deserialize(in, version); - ConsensusMigrationTarget target = ConsensusMigrationTarget.fromValue(in.readByte()); - return new ConsensusMigratedAt(migratedAtEpoch, target); - } - - @Override - public long serializedSize(ConsensusMigratedAt t, int version) - { - return TypeSizes.sizeof(ConsensusMigrationTarget.accord.value) - + Epoch.messageSerializer.serializedSize(t.migratedAtEpoch, version); - } - }); - - // Fields are not nullable when used for messaging - @Nullable - public final Epoch migratedAtEpoch; - - @Nullable - public final ConsensusMigrationTarget migratedAtTarget; - - public ConsensusMigratedAt(Epoch migratedAtEpoch, ConsensusMigrationTarget migratedAtTarget) - { - this.migratedAtEpoch = migratedAtEpoch; - this.migratedAtTarget = migratedAtTarget; - } - } - - // TODO (desired): Move this into the schema for the table once this is based off of TrM - public static class TableMigrationState - { - @Nonnull - public final String keyspaceName; - - @Nonnull - public final String tableName; - - @Nonnull - public final TableId tableId; - - @Nonnull - public final ConsensusMigrationTarget targetProtocol; - - @Nonnull - public final List> migratedRanges; - - /* - * Necessary to track which ranges started migrating at which epoch - * in order to know whether a repair qualifies in terms of finishing - * migration of the range. - */ - @Nonnull - public final NavigableMap>> migratingRangesByEpoch; - - public static final MetadataSerializer serializer = new MetadataSerializer() - { - @Override - public void serialize(TableMigrationState t, DataOutputPlus out, Version version) throws IOException - { - out.write(t.targetProtocol.value); - out.writeUTF(t.keyspaceName); - out.writeUTF(t.tableName); - t.tableId.serialize(out); - serializeCollection(t.migratedRanges, out, version, Range.serializer); - serializeMap(t.migratingRangesByEpoch, out, version, Epoch.serializer, rangesSerializer); - } - - @Override - public TableMigrationState deserialize(DataInputPlus in, Version version) throws IOException - { - ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromValue(in.readByte()); - String keyspaceName = in.readUTF(); - String tableName = in.readUTF(); - TableId tableId = TableId.deserialize(in); - Set> migratedRanges = deserializeSet(in, version, Range.serializer); - Map>> migratingRangesByEpoch = deserializeMap(in, version, Epoch.serializer, rangesSerializer, newHashMap()); - return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch); - } - - @Override - public long serializedSize(TableMigrationState t, Version version) - { - return sizeof(t.targetProtocol.value) - + sizeof(t.keyspaceName) - + sizeof(t.tableName) - + t.tableId.serializedSize() - + serializedCollectionSize(t.migratedRanges, version, Range.serializer) - + serializedMapSize(t.migratingRangesByEpoch, version, Epoch.serializer, rangesSerializer); - } - }; - - @Nonnull - public final List> migratingRanges; - - @Nonnull - public final List> migratingAndMigratedRanges; - - public TableMigrationState(@Nonnull String keyspaceName, - @Nonnull String tableName, - @Nonnull TableId tableId, - @Nonnull ConsensusMigrationTarget targetProtocol, - @Nonnull Collection> migratedRanges, - @Nonnull Map>> migratingRangesByEpoch) - { - this.keyspaceName = keyspaceName; - this.tableName = tableName; - this.tableId = tableId; - this.targetProtocol = targetProtocol; - this.migratedRanges = ImmutableList.copyOf(normalize(migratedRanges)); - this.migratingRangesByEpoch = ImmutableSortedMap.copyOf( - migratingRangesByEpoch.entrySet() - .stream() - .map( entry -> new SimpleEntry<>(entry.getKey(), ImmutableList.copyOf(normalize(entry.getValue())))) - .collect(Collectors.toList())); - this.migratingRanges = ImmutableList.copyOf(normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList()))); - this.migratingAndMigratedRanges = ImmutableList.copyOf(normalize(ImmutableList.>builder().addAll(migratedRanges).addAll(migratingRanges).build())); - } - - public TableMigrationState withRangesMigrating(@Nonnull Collection> ranges, - @Nonnull ConsensusMigrationTarget target) - { - checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't already have an entry for the empty epoch"); - // Doesn't matter which epoch the range started migrating in for this context so merge them all - Collection> migratingRanges = normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); - checkArgument(target == targetProtocol, "Requested migration to target protocol " + target + " conflicts with in progress migration to protocol " + targetProtocol); - List> normalizedRanges = normalize(ranges); - if (subtract(normalizedRanges, migratingRanges).isEmpty()) - logger.warn("Range " + ranges + " is already being migrated"); - Set> withoutAlreadyMigrated = subtract(normalizedRanges, migratedRanges); - if (withoutAlreadyMigrated.isEmpty()) - logger.warn("Range " + ranges + " is already migrated"); - Set> withoutBoth = subtract(withoutAlreadyMigrated, migratingRanges); - if (withoutBoth.isEmpty()) - logger.warn("Range " + ranges + " is already migrating/migrated"); - - if (!Range.equals(normalizedRanges, withoutBoth)) - logger.warn("Ranges " + normalizedRanges + " to start migrating is already partially migrating/migrated " + withoutBoth); - - Map>> newMigratingRanges = new HashMap<>(migratingRangesByEpoch.size() + 1); - newMigratingRanges.putAll(migratingRangesByEpoch); - newMigratingRanges.put(Epoch.EMPTY, normalizedRanges); - - return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRanges); - } - - public TableMigrationState withReplacementForEmptyEpoch(@Nonnull Epoch replacementEpoch) - { - if (!migratingRangesByEpoch.containsKey(Epoch.EMPTY)) - return this; - Map>> newMigratingRangesByEpoch = new HashMap<>(migratingRangesByEpoch.size()); - migratingRangesByEpoch.forEach((epoch, ranges) -> { - if (epoch.equals(Epoch.EMPTY)) - newMigratingRangesByEpoch.put(replacementEpoch, ranges); - else - newMigratingRangesByEpoch.put(epoch, ranges); - }); - - if (newMigratingRangesByEpoch != null) - return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRangesByEpoch); - else - return this; - } - - public TableMigrationState withRangesRepairedAtEpoch(@Nonnull Collection> ranges, - @Nonnull Epoch epoch) - { - checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); - checkArgument(epoch.isAfter(Epoch.EMPTY), "Epoch shouldn't be empty"); - - List> normalizedRepairedRanges = normalize(ranges); - // This should be inclusive because the epoch we store in the map is the epoch in which the range has been marked migrating - // in startMigrationToConsensusProtocol - NavigableMap>> coveredEpochs = migratingRangesByEpoch.headMap(epoch, true); - List> normalizedMigratingRanges = normalize(coveredEpochs.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); - List> normalizedRepairedIntersection = intersectionOfNormalizedRanges(normalizedRepairedRanges, normalizedMigratingRanges); - checkState(!normalizedRepairedIntersection.isEmpty(), "None of Ranges " + ranges + " were being migrated"); - - Map>> newMigratingRangesByEpoch = new HashMap<>(); - - // Everything in this epoch or later can't have been migrated so re-add all of them - newMigratingRangesByEpoch.putAll(migratingRangesByEpoch.tailMap(epoch, false)); - - // Include anything still remaining to be migrated after subtracting what was repaired - for (Map.Entry>> e : coveredEpochs.entrySet()) - { - // Epoch when these ranges started migrating - Epoch rangesEpoch = e.getKey(); - List> epochMigratingRanges = e.getValue(); - List> remainingRanges = subtractNormalizedRanges(epochMigratingRanges, normalizedRepairedIntersection); - if (!remainingRanges.isEmpty()) - newMigratingRangesByEpoch.put(rangesEpoch, remainingRanges); - } - - List> newMigratedRanges = new ArrayList<>(normalizedMigratingRanges.size() + ranges.size()); - newMigratedRanges.addAll(migratedRanges); - newMigratedRanges.addAll(normalizedRepairedIntersection); - return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, newMigratedRanges, newMigratingRangesByEpoch); - } - - public boolean paxosReadSatisfiedByKeyMigrationAtEpoch(DecoratedKey key, ConsensusMigratedAt consensusMigratedAt) - { - // This check is being done from a Paxos read attempt which needs to - // check if Accord needs to resolve any in flight accord transactions - // if the migration target is Accord then nothing needs to be done - if (targetProtocol != ConsensusMigrationTarget.paxos) - return true; - - return satisfiedByKeyMigrationAtEpoch(key, consensusMigratedAt); - } - - public boolean satisfiedByKeyMigrationAtEpoch(@Nonnull DecoratedKey key, @Nullable ConsensusMigratedAt consensusMigratedAt) - { - if (consensusMigratedAt == null) - { - // It hasn't been migrated and needs migration if it is in a migrating range - return Range.isInNormalizedRanges(key.getToken(), migratingRanges); - } - else - { - // It has been migrated and might be from a late enough epoch to satisfy this migration - return consensusMigratedAt.migratedAtTarget == targetProtocol - && migratingRangesByEpoch.headMap(consensusMigratedAt.migratedAtEpoch, true).values() - .stream() - .flatMap(List::stream) - .anyMatch(range -> range.contains(key.getToken())); - } - } - - public Epoch minMigrationEpoch(Token token) - { - for (Map.Entry>> e : migratingRangesByEpoch.entrySet()) - { - if (Range.isInNormalizedRanges(token, e.getValue())) - return e.getKey(); - } - return Epoch.EMPTY; - } - - - public @Nonnull TableId getTableId() - { - return tableId; - } - - public TableMigrationState withMigrationTarget(ConsensusMigrationTarget newTargetProtocol) - { - checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); - if (this.targetProtocol == newTargetProtocol) - return this; - - // Migrating ranges remain migrating because individual keys may have already been migrated - // So for correctness we need to perform key migration - // We do need to update the epoch so that a new repair is required to drive the migration - Map>> migratingRangesByEpoch = ImmutableMap.of(Epoch.EMPTY, migratingRanges); - - Token minToken = ColumnFamilyStore.getIfExists(tableId).getPartitioner().getMinimumToken(); - Range fullRange = new Range(minToken, minToken); - // What is migrated already is anything that was never migrated/migrating before (untouched) - List> migratedRanges = ImmutableList.copyOf(normalize(fullRange.subtractAll(migratingAndMigratedRanges))); - - return new TableMigrationState(keyspaceName, tableName, tableId, newTargetProtocol, migratedRanges, migratingRangesByEpoch); - } - - public Map toMap() - { - Builder builder = ImmutableMap.builder(); - builder.put("keyspace", keyspaceName); - builder.put("table", tableName); - builder.put("tableId", tableId.toString()); - builder.put("targetProtocol", targetProtocol.toString()); - builder.put("migratedRanges", migratedRanges.stream().map(Objects::toString).collect(toImmutableList())); - Map> rangesByEpoch = new LinkedHashMap<>(); - for (Map.Entry>> entry : migratingRangesByEpoch.entrySet()) - { - rangesByEpoch.put(entry.getKey().getEpoch(), entry.getValue().stream().map(Objects::toString).collect(toImmutableList())); - } - builder.put("migratingRangesByEpoch", rangesByEpoch); - return builder.build(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - TableMigrationState that = (TableMigrationState) o; - return keyspaceName.equals(that.keyspaceName) && tableName.equals(that.tableName) && tableId.equals(that.tableId) && targetProtocol == that.targetProtocol && migratedRanges.equals(that.migratedRanges) && migratingRangesByEpoch.equals(that.migratingRangesByEpoch) && migratingRanges.equals(that.migratingRanges) && migratingAndMigratedRanges.equals(that.migratingAndMigratedRanges); - } - - @Override - public int hashCode() - { - return Objects.hash(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch, migratingRanges, migratingAndMigratedRanges); - } - - public List> migratingRanges() - { - return migratingRanges; - } - } - - public static class ConsensusMigrationState implements MetadataValue - { - public static ConsensusMigrationState EMPTY = new ConsensusMigrationState(Epoch.EMPTY, ImmutableMap.of()); - @Nonnull - public final Map tableStates; - - public final Epoch lastModified; - - - public ConsensusMigrationState(@Nonnull Epoch lastModified, @Nonnull Map tableStates) - { - checkNotNull(tableStates, "tableStates is null"); - checkNotNull(lastModified, "lastModified is null"); - this.lastModified = lastModified; - this.tableStates = ImmutableMap.copyOf(tableStates); - } - - public Map toMap(@Nullable Set keyspaceNames, @Nullable Set tableNames) - { - return ImmutableMap.of("lastModifiedEpoch", lastModified.getEpoch(), - "tableStates", tableStatesAsMaps(keyspaceNames, tableNames), - "version", PojoToString.CURRENT_VERSION); - } - - private List> tableStatesAsMaps(@Nullable Set keyspaceNames, - @Nullable Set tableNames) - { - ImmutableList.Builder> builder = ImmutableList.builder(); - for (TableMigrationState tms : tableStates.values()) - { - if (keyspaceNames != null && !keyspaceNames.contains(tms.keyspaceName)) - continue; - if (tableNames != null && !tableNames.contains(tms.tableName)) - continue; - builder.add(tms.toMap()); - } - return builder.build(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ConsensusMigrationState that = (ConsensusMigrationState) o; - return tableStates.equals(that.tableStates); - } - - @Override - public int hashCode() - { - return Objects.hash(tableStates); - } - - public static final MetadataSerializer serializer = new MetadataSerializer() - { - @Override - public void serialize(ConsensusMigrationState consensusMigrationState, DataOutputPlus out, Version version) throws IOException - { - Epoch.serializer.serialize(consensusMigrationState.lastModified, out, version); - serializeMap(consensusMigrationState.tableStates, out, version, TableId.metadataSerializer, TableMigrationState.serializer); - } - - @Override - public ConsensusMigrationState deserialize(DataInputPlus in, Version version) throws IOException - { - Epoch lastModified = Epoch.serializer.deserialize(in, version); - Map tableMigrationStates = deserializeMap(in, version, TableId.metadataSerializer, TableMigrationState.serializer, newHashMap()); - return new ConsensusMigrationState(lastModified, tableMigrationStates); - } - - @Override - public long serializedSize(ConsensusMigrationState t, Version version) - { - return Epoch.serializer.serializedSize(t.lastModified, version) - + serializedMapSize(t.tableStates, version, TableId.metadataSerializer, TableMigrationState.serializer); - } - }; - - @Override - public ConsensusMigrationState withLastModified(Epoch epoch) - { - ImmutableMap.Builder newMap = ImmutableMap.builderWithExpectedSize(tableStates.size()); - tableStates.forEach((tableId, tableState) -> { - newMap.put(tableId, tableState.withReplacementForEmptyEpoch(epoch)); - }); - return new ConsensusMigrationState(epoch, newMap.build()); - } - - @Override - public Epoch lastModified() - { - return lastModified; - } - } - - private ConsensusTableMigrationState() {} - - // Used by callers to avoid looking up the TMS multiple times - public static @Nullable TableMigrationState getTableMigrationState(TableId tableId) - { - TableMigrationState tms = ClusterMetadata.current().consensusMigrationState.tableStates.get(tableId); - return tms; - } - - /* - * Set or change the migration target for the keyspaces and tables. Can be used to reverse the direction of a migration - * or instantly migrate a table to a new protocol. - */ - public static void setConsensusMigrationTargetProtocol(@Nonnull String targetProtocolName, - @Nullable List keyspaceNames, - @Nonnull Optional> maybeTables) - { - checkArgument(!maybeTables.isPresent() || (keyspaceNames != null && keyspaceNames.size() == 1), "Must specify one keyspace along with tables"); - checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); - keyspaceNames = maybeDefaultKeyspaceNames(keyspaceNames); - ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(targetProtocolName); - - if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) - throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); - - if (!Paxos.useV2()) - throw new IllegalStateException("Can't do any consensus migrations from/to PaxosV1, switch to V2 first"); - - List tableIds = keyspacesAndTablesToTableIds(keyspaceNames, maybeTables); - ClusterMetadataService.instance().commit(new SetConsensusMigrationTargetProtocol(targetProtocol, tableIds)); - } - - public static void startMigrationToConsensusProtocol(@Nonnull String targetProtocolName, - @Nullable List keyspaceNames, - @Nonnull Optional> maybeTables, - @Nonnull Optional maybeRangesStr) - { - checkState(keyspaceNames.size() == 1 || !maybeTables.isPresent(), "Must specify one keyspace along with tables"); - checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); - ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(targetProtocolName); - checkArgument(targetProtocol != reset, "Can't start migration to reset"); - - - if (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord) - throw new IllegalStateException("Mixing a hard coded strategy with migration is unsupported"); - - NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); - if (!nonSerialWriteStrategy.writesThroughAccord && nonSerialWriteStrategy != NonSerialWriteStrategy.mixed) - throw new IllegalStateException("non-SERIAL writes need to be routed through Accord before attempting migration, or enable mixed mode"); - - if (!Paxos.useV2()) - throw new IllegalStateException("Can't do any consensus migrations to/from PaxosV1, switch to V2 first"); - - keyspaceNames = maybeDefaultKeyspaceNames(keyspaceNames); - List> ranges = maybeRangesToRanges(maybeRangesStr); - List tableIds = keyspacesAndTablesToTableIds(keyspaceNames, maybeTables); - - ClusterMetadataService.instance().commit(new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tableIds)); - } - - public static List finishMigrationToConsensusProtocol(@Nonnull String keyspace, - @Nonnull Optional> maybeTables, - @Nonnull Optional maybeRangesStr) - { - checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); - - Optional>> localKeyspaceRanges = Optional.of(ImmutableList.copyOf(StorageService.instance.getLocalReplicas(keyspace).onlyFull().ranges())); - List> ranges = maybeRangesToRanges(maybeRangesStr, localKeyspaceRanges); - Map allTableMigrationStates = ClusterMetadata.current().consensusMigrationState.tableStates; - List tableIds = keyspacesAndTablesToTableIds(ImmutableList.of(keyspace), maybeTables, Optional.of(allTableMigrationStates::containsKey)); - - checkState(tableIds.stream().allMatch(allTableMigrationStates::containsKey), "All tables need to be migrating"); - List tableMigrationStates = new ArrayList<>(); - tableIds.forEach(table -> { - ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(table); - if (cfs == null) - { - logger.warn("Table {} does not exist or was dropped", cfs); - return; - } - TableMigrationState tms = allTableMigrationStates.get(table); - if (tms == null) - { - logger.warn("Table {} does not have any migration state", cfs.name); - return; - } - if(!Range.intersects(ranges, tms.migratingRanges)) - { - logger.warn("Table {} with migrating ranges {} does not intersect with any requested ranges {}", cfs.name, tms.migratingRanges, ranges); - return; - } - tableMigrationStates.add(tms); - }); - - List migratingToAccord = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.accord).collect(toImmutableList()); - List migratingToPaxos = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.paxos).collect(toImmutableList());; - - Integer accordRepairCmd = finishMigrationToAccord(keyspace, migratingToAccord, ranges); - Integer paxosRepairCmd = finishMigrationToPaxos(keyspace, migratingToPaxos, ranges); - List result = new ArrayList<>(); - if (accordRepairCmd != null) - result.add(accordRepairCmd); - if (paxosRepairCmd != null) - result.add(paxosRepairCmd); - return result; - } - - private interface MigrationFinisher - { - Integer finish(Collection tables, List> ranges); - } - - private static Integer finishMigrationTo(String name, List tableMigrationStates, List> requestedRanges, MigrationFinisher migrationFinisher) - { - logger.info("Begin finish migration to {} for ranges {} and tables {}", name, requestedRanges, tableMigrationStates); - List> intersectingRanges = new ArrayList<>(); - tableMigrationStates.stream().map(TableMigrationState::migratingRanges).forEach(intersectingRanges::addAll); - intersectingRanges = Range.normalize(intersectingRanges); - intersectingRanges = Range.intersectionOfNormalizedRanges(intersectingRanges, requestedRanges); - if (intersectingRanges.isEmpty()) - { - logger.warn("No requested ranges {} intersect any migrating ranges in any table in keyspace {}"); - return null; - } - - // Repair requires that the ranges once again be grouped by the ranges provided originally which all - // fall within local range boundaries. This was already checked in maybeRangesToRanges. - List> intersectingRangesGrouped = new ArrayList<>(); - for (Range r : requestedRanges) - { - List> intersectionsForGroup = new ArrayList<>(); - for (Range intersectedRange : intersectingRanges) - intersectionsForGroup.addAll(r.intersectionWith(intersectedRange)); - intersectingRangesGrouped.addAll(normalize(intersectionsForGroup)); - } - return migrationFinisher.finish(tableMigrationStates, intersectingRangesGrouped); - } - - /* - * This is basically just invoking classic Cassandra repair and is pretty redundant with invoking repair - * directly which would also work without issue. It's include so the same interface works for both migrating to/from - * Accord, but it's not great in that repair has a lot of options that might need to be forwarded. - * - * Still maybe more valuable to put this layer of abstraction in so we can change how it works later and it's less - * tightly coupled with the Repair interface which is pretty orthogonal to consensus migration. - */ - private static Integer finishMigrationToAccord(String keyspace, List migratingToAccord, List> requestedRanges) - { - return finishMigrationTo("Accord", migratingToAccord, requestedRanges, (tables, intersectingRanges) -> { - RepairOption repairOption = getRepairOption(tables, intersectingRanges, false); - return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; - }); - } - - private static Integer finishMigrationToPaxos(String keyspace, List migratingToPaxos, List> requestedRanges) - { - return finishMigrationTo("Paxos", migratingToPaxos, requestedRanges, (tables, intersectingRanges) -> { - RepairOption repairOption = getRepairOption(tables, intersectingRanges, true); - return StorageService.instance.repair(keyspace, repairOption, emptyList()).left; - }); - } - - @Nonnull - private static RepairOption getRepairOption(Collection tables, List> intersectingRanges, boolean accordRepair) - { - boolean primaryRange = false; - // TODO (review): Should disabling incremental repair be exposed for the Paxos repair in case someone explicitly does not do incremental repair? - boolean incremental = !accordRepair; - boolean trace = false; - int numJobThreads = 1; - boolean pullRepair = false; - boolean forceRepair = false; - boolean optimiseStreams = false; - boolean ignoreUnreplicatedKeyspaces = true; - boolean repairPaxos = !accordRepair; - boolean paxosOnly = false; - boolean dontPurgeTombstones = false; - RepairOption repairOption = new RepairOption(RepairParallelism.PARALLEL, primaryRange, incremental, trace, numJobThreads, intersectingRanges, pullRepair, forceRepair, PreviewKind.NONE, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); - tables.forEach(table -> repairOption.getColumnFamilies().add(table.tableName)); - return repairOption; - } - - - // Repair is restricted to local ranges, but manipulating CMS migration state doesn't need to be restricted - private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr) - { - return maybeRangesToRanges(maybeRangesStr, Optional.empty()); - } - - private static @Nonnull List> maybeRangesToRanges(@Nonnull Optional maybeRangesStr, Optional>> restrictToRanges) - { - IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); - Optional>> maybeParsedRanges = maybeRangesStr.map(rangesStr -> ImmutableList.copyOf(RepairOption.parseRanges(rangesStr, partitioner))); - Token minToken = partitioner.getMinimumToken(); - List> defaultRanges = restrictToRanges.orElse(ImmutableList.of(new Range(minToken, minToken))); - List> ranges = maybeParsedRanges.orElse(defaultRanges); - checkArgument(ranges.stream().allMatch(range -> defaultRanges.stream().anyMatch(defaultRange -> defaultRange.contains(range))), - "If ranges are specified each range must be contained within a local range (" + defaultRanges + ") for this node to allow for precise repairs. Specified " + ranges); - return ranges; - } - - private static List maybeDefaultKeyspaceNames(@Nullable List keyspaceNames) - { - if (keyspaceNames == null || keyspaceNames.isEmpty()) - { - keyspaceNames = ImmutableList.copyOf(StorageService.instance.getNonSystemKeyspaces()); - } - checkState(keyspaceNames.stream().noneMatch(SchemaConstants::isSystemKeyspace), "Migrating system keyspaces is not supported"); - return keyspaceNames; - } - - private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables) - { - return keyspacesAndTablesToTableIds(keyspaceNames, maybeTables, Optional.empty()); - } - - private static List keyspacesAndTablesToTableIds(@Nonnull List keyspaceNames, @Nonnull Optional> maybeTables, @Nonnull Optional> includeTable) - { - List tableIds = new ArrayList<>(); - for (String keyspaceName : keyspaceNames) - { - Optional> maybeTableIds = maybeTables.map(tableNames -> - tableNames - .stream() - .map(tableName -> { - TableMetadata tm = Schema.instance.getTableMetadata(keyspaceName, tableName); - if (tm == null) - throw new IllegalArgumentException("Unknown table %s.%s".format(keyspaceName, tableName)); - return tm.id; - }) - .collect(toImmutableList())); - tableIds.addAll( - maybeTableIds.orElseGet(() -> - Schema.instance.getKeyspaceInstance(keyspaceName).getColumnFamilyStores() - .stream() - .map(ColumnFamilyStore::getTableId) - .filter(includeTable.orElse(Predicates.alwaysTrue())) // Filter out non-migrating so they don't generate an error - .collect(toImmutableList()))); - } - return tableIds; - } -} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java new file mode 100644 index 000000000000..a5b0f7db5e09 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSortedMap; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.apache.cassandra.db.TypeSizes.sizeof; +import static org.apache.cassandra.dht.Range.intersectionOfNormalizedRanges; +import static org.apache.cassandra.dht.Range.normalize; +import static org.apache.cassandra.dht.Range.subtract; +import static org.apache.cassandra.dht.Range.subtractNormalizedRanges; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; +import static org.apache.cassandra.utils.CollectionSerializers.newHashMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; +import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; +import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; +import static org.apache.cassandra.utils.CollectionSerializers.serializedMapSize; + +// TODO Move this into the schema for the table once this is based off of TrM +public class TableMigrationState +{ + private static final Logger logger = LoggerFactory.getLogger(TableMigrationState.class); + + @Nonnull + public final String keyspaceName; + + @Nonnull + public final String tableName; + + @Nonnull + public final TableId tableId; + + @Nonnull + public final ConsensusMigrationTarget targetProtocol; + + @Nonnull + public final List> migratedRanges; + + /* + * Necessary to track which ranges started migrating at which epoch + * in order to know whether a repair qualifies in terms of finishing + * migration of the range. + */ + @Nonnull + public final NavigableMap>> migratingRangesByEpoch; + + @Nonnull + public final List> migratingRanges; + + @Nonnull + public final List> migratingAndMigratedRanges; + + public TableMigrationState(@Nonnull String keyspaceName, + @Nonnull String tableName, + @Nonnull TableId tableId, + @Nonnull ConsensusMigrationTarget targetProtocol, + @Nonnull Collection> migratedRanges, + @Nonnull Map>> migratingRangesByEpoch) + { + this.keyspaceName = keyspaceName; + this.tableName = tableName; + this.tableId = tableId; + this.targetProtocol = targetProtocol; + this.migratedRanges = ImmutableList.copyOf(normalize(migratedRanges)); + this.migratingRangesByEpoch = ImmutableSortedMap.copyOf( + migratingRangesByEpoch.entrySet() + .stream() + .map(entry -> new AbstractMap.SimpleEntry<>(entry.getKey(), ImmutableList.copyOf(normalize(entry.getValue())))) + .collect(Collectors.toList())); + this.migratingRanges = ImmutableList.copyOf(normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList()))); + this.migratingAndMigratedRanges = ImmutableList.copyOf(normalize(ImmutableList.>builder().addAll(migratedRanges).addAll(migratingRanges).build())); + } + + public TableMigrationState reverseMigration(ConsensusMigrationTarget target, Epoch epoch) + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMinimumToken()); + List> allTouched = new ArrayList<>(migratedRanges); + allTouched.addAll(migratingRanges); + allTouched = Range.deoverlap(allTouched); + return new TableMigrationState(keyspaceName, tableName, tableId, target, + Range.normalize(fullRange.subtractAll(allTouched)), + Collections.singletonMap(epoch, migratingRanges)); + } + + public boolean hasMigratedFullTokenRange(IPartitioner partitioner) + { + // migrated ranges are normalized + if (!migratingRanges.isEmpty() || migratedRanges.size() > 1) + return false; + + Range fullRange = new Range<>(partitioner.getMinimumToken(), partitioner.getMinimumToken()); + return migratedRanges.get(0).contains(fullRange); + } + + @Nonnull + public List> migratingRanges() { + + return migratingRanges; + } + + public TableMigrationState withRangesMigrating(@Nonnull Collection> ranges, + @Nonnull ConsensusMigrationTarget target) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't already have an entry for the empty epoch"); + // Doesn't matter which epoch the range started migrating in for this context so merge them all + Collection> migratingRanges = normalize(migratingRangesByEpoch.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); + checkArgument(target == targetProtocol, "Requested migration to target protocol " + target + " conflicts with in progress migration to protocol " + targetProtocol); + List> normalizedRanges = normalize(ranges); + if (subtract(normalizedRanges, migratingRanges).isEmpty()) + logger.warn("Range " + ranges + " is already being migrated"); + Set> withoutAlreadyMigrated = subtract(normalizedRanges, migratedRanges); + if (withoutAlreadyMigrated.isEmpty()) + logger.warn("Range " + ranges + " is already migrated"); + Set> withoutBoth = subtract(withoutAlreadyMigrated, migratingRanges); + if (withoutBoth.isEmpty()) + logger.warn("Range " + ranges + " is already migrating/migrated"); + + if (!Range.equals(normalizedRanges, withoutBoth)) + logger.warn("Ranges " + normalizedRanges + " to start migrating is already partially migrating/migrated " + withoutBoth); + + Map>> newMigratingRanges = new HashMap<>(migratingRangesByEpoch.size() + 1); + newMigratingRanges.putAll(migratingRangesByEpoch); + newMigratingRanges.put(Epoch.EMPTY, normalizedRanges); + + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRanges); + } + + public TableMigrationState withReplacementForEmptyEpoch(@Nonnull Epoch replacementEpoch) + { + if (!migratingRangesByEpoch.containsKey(Epoch.EMPTY)) + return this; + Map>> newMigratingRangesByEpoch = new HashMap<>(migratingRangesByEpoch.size()); + migratingRangesByEpoch.forEach((epoch, ranges) -> { + if (epoch.equals(Epoch.EMPTY)) + newMigratingRangesByEpoch.put(replacementEpoch, ranges); + else + newMigratingRangesByEpoch.put(epoch, ranges); + }); + + if (newMigratingRangesByEpoch != null) + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, newMigratingRangesByEpoch); + else + return this; + } + + public TableMigrationState withRangesRepairedAtEpoch(@Nonnull Collection> ranges, + @Nonnull Epoch epoch) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); + checkArgument(epoch.isAfter(Epoch.EMPTY), "Epoch shouldn't be empty"); + + List> normalizedRepairedRanges = normalize(ranges); + // This should be inclusive because the epoch we store in the map is the epoch in which the range has been marked migrating + // in startMigrationToConsensusProtocol + NavigableMap>> coveredEpochs = migratingRangesByEpoch.headMap(epoch, true); + List> normalizedMigratingRanges = normalize(coveredEpochs.values().stream().flatMap(Collection::stream).collect(Collectors.toList())); + List> normalizedRepairedIntersection = intersectionOfNormalizedRanges(normalizedRepairedRanges, normalizedMigratingRanges); + checkState(!normalizedRepairedIntersection.isEmpty(), "None of Ranges " + ranges + " were being migrated"); + + Map>> newMigratingRangesByEpoch = new HashMap<>(); + + // Everything in this epoch or later can't have been migrated so re-add all of them + newMigratingRangesByEpoch.putAll(migratingRangesByEpoch.tailMap(epoch, false)); + + // Include anything still remaining to be migrated after subtracting what was repaired + for (Map.Entry>> e : coveredEpochs.entrySet()) + { + // Epoch when these ranges started migrating + Epoch rangesEpoch = e.getKey(); + List> epochMigratingRanges = e.getValue(); + List> remainingRanges = subtractNormalizedRanges(epochMigratingRanges, normalizedRepairedIntersection); + if (!remainingRanges.isEmpty()) + newMigratingRangesByEpoch.put(rangesEpoch, remainingRanges); + } + + List> newMigratedRanges = new ArrayList<>(normalizedMigratingRanges.size() + ranges.size()); + newMigratedRanges.addAll(migratedRanges); + newMigratedRanges.addAll(normalizedRepairedIntersection); + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, newMigratedRanges, newMigratingRangesByEpoch); + } + + public boolean paxosReadSatisfiedByKeyMigrationAtEpoch(DecoratedKey key, ConsensusMigratedAt consensusMigratedAt) + { + // This check is being done from a Paxos read attempt which needs to + // check if Accord needs to resolve any in flight accord transactions + // if the migration target is Accord then nothing needs to be done + if (targetProtocol != ConsensusMigrationTarget.paxos) + return true; + + return satisfiedByKeyMigrationAtEpoch(key, consensusMigratedAt); + } + + public boolean satisfiedByKeyMigrationAtEpoch(@Nonnull DecoratedKey key, @Nullable ConsensusMigratedAt consensusMigratedAt) + { + if (consensusMigratedAt == null) + { + // It hasn't been migrated and needs migration if it is in a migrating range + return Range.isInNormalizedRanges(key.getToken(), migratingRanges); + } + else + { + // It has been migrated and might be from a late enough epoch to satisfy this migration + return consensusMigratedAt.migratedAtTarget == targetProtocol + && migratingRangesByEpoch.headMap(consensusMigratedAt.migratedAtEpoch, true).values() + .stream() + .flatMap(List::stream) + .anyMatch(range -> range.contains(key.getToken())); + } + } + + public Epoch minMigrationEpoch(Token token) + { + // TODO should there be an index to make this more efficient? + for (Map.Entry>> e : migratingRangesByEpoch.entrySet()) + { + if (Range.isInNormalizedRanges(token, e.getValue())) + return e.getKey(); + } + return Epoch.EMPTY; + } + + + public @Nonnull TableId getTableId() + { + return tableId; + } + + public TableMigrationState withMigrationTarget(ConsensusMigrationTarget newTargetProtocol) + { + checkState(!migratingRangesByEpoch.containsKey(Epoch.EMPTY), "Shouldn't have an entry for the empty epoch"); + if (targetProtocol == newTargetProtocol) + return this; + + // Migrating ranges remain migrating because individual keys may have already been migrated + // So for correctness we need to perform key migration + // We do need to update the epoch so that a new repair is required to drive the migration + Map>> migratingRangesByEpoch = ImmutableMap.of(Epoch.EMPTY, migratingRanges); + + Token minToken = ColumnFamilyStore.getIfExists(tableId).getPartitioner().getMinimumToken(); + Range fullRange = new Range(minToken, minToken); + // What is migrated already is anything that was never migrated/migrating before (untouched) + List> migratedRanges = ImmutableList.copyOf(normalize(fullRange.subtractAll(migratingAndMigratedRanges))); + + return new TableMigrationState(keyspaceName, tableName, tableId, newTargetProtocol, migratedRanges, migratingRangesByEpoch); + } + + public Map toMap() + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put("keyspace", keyspaceName); + builder.put("table", tableName); + builder.put("tableId", tableId.toString()); + builder.put("targetProtocol", targetProtocol.toString()); + builder.put("migratedRanges", migratedRanges.stream().map(Objects::toString).collect(toImmutableList())); + Map> rangesByEpoch = new LinkedHashMap<>(); + for (Map.Entry>> entry : migratingRangesByEpoch.entrySet()) + { + rangesByEpoch.put(entry.getKey().getEpoch(), entry.getValue().stream().map(Objects::toString).collect(toImmutableList())); + } + builder.put("migratingRangesByEpoch", rangesByEpoch); + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TableMigrationState that = (TableMigrationState) o; + return keyspaceName.equals(that.keyspaceName) && tableName.equals(that.tableName) && tableId.equals(that.tableId) && targetProtocol == that.targetProtocol && migratedRanges.equals(that.migratedRanges) && migratingRangesByEpoch.equals(that.migratingRangesByEpoch) && migratingRanges.equals(that.migratingRanges) && migratingAndMigratedRanges.equals(that.migratingAndMigratedRanges); + } + + @Override + public int hashCode() + { + return Objects.hash(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch, migratingRanges, migratingAndMigratedRanges); + } + + public static final MetadataSerializer serializer = new MetadataSerializer() + { + @Override + public void serialize(TableMigrationState t, DataOutputPlus out, Version version) throws IOException + { + out.write(t.targetProtocol.value); + out.writeUTF(t.keyspaceName); + out.writeUTF(t.tableName); + t.tableId.serialize(out); + serializeCollection(t.migratedRanges, out, version, Range.serializer); + serializeMap(t.migratingRangesByEpoch, out, version, Epoch.serializer, ConsensusTableMigration.rangesSerializer); + } + + @Override + public TableMigrationState deserialize(DataInputPlus in, Version version) throws IOException + { + ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromValue(in.readByte()); + String keyspaceName = in.readUTF(); + String tableName = in.readUTF(); + TableId tableId = TableId.deserialize(in); + Set> migratedRanges = deserializeSet(in, version, Range.serializer); + Map>> migratingRangesByEpoch = deserializeMap(in, version, Epoch.serializer, ConsensusTableMigration.rangesSerializer, newHashMap()); + return new TableMigrationState(keyspaceName, tableName, tableId, targetProtocol, migratedRanges, migratingRangesByEpoch); + } + + @Override + public long serializedSize(TableMigrationState t, Version version) + { + return sizeof(t.targetProtocol.value) + + sizeof(t.keyspaceName) + + sizeof(t.tableName) + + t.tableId.serializedSize() + + serializedCollectionSize(t.migratedRanges, version, Range.serializer) + + serializedMapSize(t.migratingRangesByEpoch, version, Epoch.serializer, ConsensusTableMigration.rangesSerializer); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java new file mode 100644 index 000000000000..8cbef514d810 --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.LocalizeString; + +/** + * This tracks the state of a migration either from Paxos -> Accord, Accord [interop mode a] -> Accord [interop mode b] or Accord -> Paxos. + * The `TransactionalMode` associated with each transition from a system is how interoperability should be achieved during the migration with various performance/safety tradeoffs. + */ +public enum TransactionalMigrationFromMode +{ + none(null), // No migration is in progress. The currently active transaction system could be either Accord or Paxos. + off(TransactionalMode.off), + unsafe(TransactionalMode.unsafe), + unsafe_writes(TransactionalMode.unsafe_writes), + mixed_reads(TransactionalMode.mixed_reads), + full(TransactionalMode.full); + + public final TransactionalMode from; + + TransactionalMigrationFromMode(TransactionalMode from) + { + this.from = from; + } + + public static TransactionalMigrationFromMode fromMode(TransactionalMode prev, TransactionalMode next) + { + if (next.accordIsEnabled == prev.accordIsEnabled) + return none; + + switch (prev) + { + default: throw new IllegalArgumentException(); + case off: return off; + case unsafe: return unsafe; + case unsafe_writes: return unsafe_writes; + case mixed_reads: return mixed_reads; + case full: return full; + } + } + + public static TransactionalMigrationFromMode fromOrdinal(int ordinal) + { + return values()[ordinal]; + } + + public static TransactionalMigrationFromMode fromString(String name) + { + return valueOf(LocalizeString.toLowerCaseLocalized(name)); + } + + public boolean migratingFromAccord() + { + return from != null && from.accordIsEnabled; + } + + public boolean writesThroughAccord() + { + return from != null && from.writesThroughAccord; + } + + public boolean isMigrating() + { + return this != none; + } +} diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java index 1cd7da413c71..02efebbeb2fd 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosPrepare.java @@ -72,7 +72,8 @@ import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_REQ; import static org.apache.cassandra.net.Verb.PAXOS2_PREPARE_RSP; import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.getKeyMigrationState; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigratedAt; + +import org.apache.cassandra.service.consensus.migration.ConsensusMigratedAt; import static org.apache.cassandra.service.paxos.Ballot.Flag.NONE; import static org.apache.cassandra.service.paxos.Commit.Accepted; import static org.apache.cassandra.service.paxos.Commit.Committed; diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java index e7d43b19ca70..4f02b1a06060 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java @@ -34,8 +34,7 @@ import accord.primitives.Txn; import com.codahale.metrics.Meter; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; @@ -196,9 +195,8 @@ public void awaitWrites() @Override public void repairPartition(DecoratedKey dk, Map mutations, ReplicaPlan.ForWrite writePlan) { - NonSerialWriteStrategy nonSerialWriteStrategy = DatabaseDescriptor.getNonSerialWriteStrategy(); - if (coordinator.isEventuallyConsistent() && (DatabaseDescriptor.getLWTStrategy() == LWTStrategy.accord - || nonSerialWriteStrategy.blockingReadRepairThroughAccord)) + TransactionalMode transactionalMode = command.metadata().params.transactionalMode; + if (coordinator.isEventuallyConsistent() && transactionalMode.blockingReadRepairThroughAccord) { Collection partitionUpdates = Mutation.merge(mutations.values()).getPartitionUpdates(); checkState(partitionUpdates.size() == 1, "Expect only one PartitionUpdate"); diff --git a/src/java/org/apache/cassandra/streaming/StreamPlan.java b/src/java/org/apache/cassandra/streaming/StreamPlan.java index 88f77e886716..93b864d79b07 100644 --- a/src/java/org/apache/cassandra/streaming/StreamPlan.java +++ b/src/java/org/apache/cassandra/streaming/StreamPlan.java @@ -25,7 +25,7 @@ import org.apache.cassandra.locator.RangesAtEndpoint; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.TimeUUID; import static com.google.common.collect.Iterables.all; @@ -238,7 +238,7 @@ public StreamCoordinator getCoordinator() public static String[] nonAccordTablesForKeyspace(KeyspaceMetadata ksm) { String[] result = ksm.tables.stream() - .filter(tbl -> !AccordService.instance().isAccordManagedTable(tbl.id)) + .filter(tbl -> !tbl.isAccordEnabled()) .map(tbl -> tbl.name) .toArray(String[]::new); @@ -247,11 +247,11 @@ public static String[] nonAccordTablesForKeyspace(KeyspaceMetadata ksm) public static boolean hasNonAccordTables(KeyspaceMetadata ksm) { - return ksm.tables.stream().anyMatch(tbl -> !AccordService.instance().isAccordManagedTable(tbl.id)); + return ksm.tables.stream().anyMatch(tbl -> !tbl.isAccordEnabled()); } public static boolean hasAccordTables(KeyspaceMetadata ksm) { - return ksm.tables.stream().anyMatch(tbl -> AccordService.instance().isAccordManagedTable(tbl.id)); + return ksm.tables.stream().anyMatch(TableMetadata::isAccordEnabled); } } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 2c2e3b7d300f..95c3e1ba2cb2 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -57,13 +57,12 @@ import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.tcm.extensions.ExtensionKey; import org.apache.cassandra.tcm.extensions.ExtensionValue; import org.apache.cassandra.service.accord.AccordFastPath; import org.apache.cassandra.tcm.membership.*; -import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PrimaryRangeComparator; @@ -96,7 +95,6 @@ public class ClusterMetadata public final Directory directory; public final TokenMap tokenMap; public final DataPlacements placements; - public final AccordTables accordTables; public final AccordFastPath accordFastPath; public final LockedRanges lockedRanges; public final InProgressSequences inProgressSequences; @@ -133,7 +131,6 @@ public ClusterMetadata(IPartitioner partitioner, Directory directory, Distribute directory, new TokenMap(partitioner), DataPlacements.EMPTY, - AccordTables.EMPTY, AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, @@ -147,7 +144,6 @@ public ClusterMetadata(Epoch epoch, Directory directory, TokenMap tokenMap, DataPlacements placements, - AccordTables accordTables, AccordFastPath accordFastPath, LockedRanges lockedRanges, InProgressSequences inProgressSequences, @@ -161,7 +157,6 @@ public ClusterMetadata(Epoch epoch, directory, tokenMap, placements, - accordTables, accordFastPath, lockedRanges, inProgressSequences, @@ -176,7 +171,6 @@ private ClusterMetadata(int metadataIdentifier, Directory directory, TokenMap tokenMap, DataPlacements placements, - AccordTables accordTables, AccordFastPath accordFastPath, LockedRanges lockedRanges, InProgressSequences inProgressSequences, @@ -194,7 +188,6 @@ private ClusterMetadata(int metadataIdentifier, this.directory = directory; this.tokenMap = tokenMap; this.placements = placements; - this.accordTables = accordTables; this.accordFastPath = accordFastPath; this.lockedRanges = lockedRanges; this.inProgressSequences = inProgressSequences; @@ -205,12 +198,12 @@ private ClusterMetadata(int metadataIdentifier, public ClusterMetadata withDirectory(Directory directory) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordTables, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public ClusterMetadata withPlacements(DataPlacements placements) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordTables, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public Set fullCMSMembers() @@ -262,7 +255,6 @@ public ClusterMetadata forceEpoch(Epoch epoch) capLastModified(directory, epoch), capLastModified(tokenMap, epoch), capLastModified(placements, epoch), - capLastModified(accordTables, epoch), capLastModified(accordFastPath, epoch), capLastModified(lockedRanges, epoch), capLastModified(inProgressSequences, epoch), @@ -285,7 +277,6 @@ public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) directory, tokenMap, placements, - accordTables, accordFastPath, lockedRanges, inProgressSequences, @@ -413,7 +404,6 @@ public static class Transformer private Directory directory; private TokenMap tokenMap; private DataPlacements placements; - private AccordTables accordTables; private AccordFastPath accordFastPath; private LockedRanges lockedRanges; private InProgressSequences inProgressSequences; @@ -430,7 +420,6 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) this.directory = metadata.directory; this.tokenMap = metadata.tokenMap; this.placements = metadata.placements; - this.accordTables = metadata.accordTables; this.accordFastPath = metadata.accordFastPath; this.lockedRanges = metadata.lockedRanges; this.inProgressSequences = metadata.inProgressSequences; @@ -439,6 +428,11 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) modifiedKeys = new HashSet<>(); } + public Epoch epoch() + { + return epoch; + } + public Transformer with(DistributedSchema schema) { this.schema = schema; @@ -552,12 +546,6 @@ public Transformer with(DataPlacements placements) return this; } - public Transformer withAccordTable(TableId table) - { - accordTables = accordTables.with(table); - return this; - } - public Transformer withFastPathStatusSince(Node.Id node, AccordFastPath.Status status, long updateTimeMillis, long updateDelayMillis) { accordFastPath = accordFastPath.withNodeStatusSince(node, status, updateTimeMillis, updateDelayMillis); @@ -601,6 +589,12 @@ public Transformer with(Map newTableMigrationState return this; } + public Transformer with(ConsensusMigrationState consensusMigrationState) + { + this.consensusMigrationState = consensusMigrationState; + return this; + } + public Transformer with(ExtensionKey key, ExtensionValue obj) { if (MetadataKeys.CORE_METADATA.contains(key)) @@ -678,12 +672,6 @@ public Transformed build() placements = placements.withLastModified(epoch); } - if (accordTables != base.accordTables) - { - modifiedKeys.add(MetadataKeys.ACCORD_TABLES); - accordTables = accordTables.withLastModified(epoch); - } - if (accordFastPath != base.accordFastPath) { modifiedKeys.add(MetadataKeys.ACCORD_FAST_PATH); @@ -708,6 +696,11 @@ public Transformed build() consensusMigrationState = consensusMigrationState.withLastModified(epoch); } + if (consensusMigrationState != base.consensusMigrationState || schema != base.schema) + { + consensusMigrationState.validateAgainstSchema(schema); + } + return new Transformed(new ClusterMetadata(base.metadataIdentifier, epoch, partitioner, @@ -715,7 +708,6 @@ public Transformed build() directory, tokenMap, placements, - accordTables, accordFastPath, lockedRanges, inProgressSequences, @@ -733,7 +725,6 @@ public ClusterMetadata buildForGossipMode() directory, tokenMap, placements, - accordTables, accordFastPath, lockedRanges, inProgressSequences, @@ -866,7 +857,7 @@ public boolean equals(Object o) directory.equals(that.directory) && tokenMap.equals(that.tokenMap) && placements.equals(that.placements) && - accordTables.equals(that.accordTables) && + accordFastPath.equals(that.accordFastPath) && lockedRanges.equals(that.lockedRanges) && inProgressSequences.equals(that.inProgressSequences) && consensusMigrationState.equals(that.consensusMigrationState) && @@ -918,7 +909,7 @@ public void dumpDiff(ClusterMetadata other) @Override public int hashCode() { - return Objects.hash(epoch, schema, directory, tokenMap, placements, accordTables, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return Objects.hash(epoch, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); } public static ClusterMetadata current() @@ -997,12 +988,11 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers DataPlacements.serializer.serialize(metadata.placements, out, version); if (version.isAtLeast(V2)) { - AccordTables.serializer.serialize(metadata.accordTables, out, version); AccordFastPath.serializer.serialize(metadata.accordFastPath, out, version); + ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); } LockedRanges.serializer.serialize(metadata.lockedRanges, out, version); InProgressSequences.serializer.serialize(metadata.inProgressSequences, out, version); - ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); out.writeInt(metadata.extensions.size()); for (Map.Entry, ExtensionValue> entry : metadata.extensions.entrySet()) { @@ -1037,21 +1027,20 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE Directory dir = Directory.serializer.deserialize(in, version); TokenMap tokenMap = TokenMap.serializer.deserialize(in, version); DataPlacements placements = DataPlacements.serializer.deserialize(in, version); - AccordTables accordTables; AccordFastPath accordFastPath; + ConsensusMigrationState consensusMigrationState; if (version.isAtLeast(V2)) { - accordTables = AccordTables.serializer.deserialize(in, version); accordFastPath = AccordFastPath.serializer.deserialize(in, version); + consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); } else { - accordTables = AccordTables.EMPTY; accordFastPath = AccordFastPath.EMPTY; + consensusMigrationState = ConsensusMigrationState.EMPTY; } LockedRanges lockedRanges = LockedRanges.serializer.deserialize(in, version); InProgressSequences ips = InProgressSequences.serializer.deserialize(in, version); - ConsensusMigrationState consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); int items = in.readInt(); Map, ExtensionValue> extensions = new HashMap<>(items); for (int i = 0; i < items; i++) @@ -1068,7 +1057,6 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE dir, tokenMap, placements, - accordTables, accordFastPath, lockedRanges, ips, @@ -1092,14 +1080,12 @@ public long serializedSize(ClusterMetadata metadata, Version version) DistributedSchema.serializer.serializedSize(metadata.schema, version) + Directory.serializer.serializedSize(metadata.directory, version) + TokenMap.serializer.serializedSize(metadata.tokenMap, version) + - DataPlacements.serializer.serializedSize(metadata.placements, version) + - ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version); DataPlacements.serializer.serializedSize(metadata.placements, version); if (version.isAtLeast(V2)) { - size += AccordTables.serializer.serializedSize(metadata.accordTables, version) + - AccordFastPath.serializer.serializedSize(metadata.accordFastPath, version); + size += AccordFastPath.serializer.serializedSize(metadata.accordFastPath, version) + + ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version); } size += LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index 1794a63889a5..68306ce313dc 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -39,7 +39,6 @@ public class MetadataKeys public static final MetadataKey NODE_DIRECTORY = make(CORE_NS, "membership", "node_directory"); public static final MetadataKey TOKEN_MAP = make(CORE_NS, "ownership", "token_map"); public static final MetadataKey DATA_PLACEMENTS = make(CORE_NS, "ownership", "data_placements"); - public static final MetadataKey ACCORD_TABLES = make(CORE_NS, "ownership", "accord_tables"); public static final MetadataKey ACCORD_FAST_PATH = make(CORE_NS, "ownership", "accord_fast_path"); public static final MetadataKey LOCKED_RANGES = make(CORE_NS, "sequences", "locked_ranges"); public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); @@ -49,7 +48,6 @@ public class MetadataKeys NODE_DIRECTORY, TOKEN_MAP, DATA_PLACEMENTS, - ACCORD_TABLES, ACCORD_FAST_PATH, LOCKED_RANGES, IN_PROGRESS_SEQUENCES, diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 150b3934bbba..d3318ecb743f 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -29,12 +29,11 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.service.accord.AccordFastPath; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.Commit.Replicator; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; import org.apache.cassandra.tcm.membership.Directory; -import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementProvider; import org.apache.cassandra.tcm.ownership.TokenMap; @@ -175,11 +174,10 @@ public StubClusterMetadataService build() Directory.EMPTY, new TokenMap(partitioner), DataPlacements.EMPTY, - AccordTables.EMPTY, AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, - ConsensusTableMigrationState.ConsensusMigrationState.EMPTY, + ConsensusMigrationState.EMPTY, ImmutableMap.of()); } return new StubClusterMetadataService(new UniformRangePlacement(), diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index f6f0aa226dda..1ccdb683e9e2 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -38,7 +38,6 @@ import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import org.apache.cassandra.tcm.transformations.AddAccordTable; import org.apache.cassandra.tcm.transformations.AlterSchema; import org.apache.cassandra.tcm.transformations.AlterTopology; import org.apache.cassandra.tcm.transformations.Assassinate; @@ -53,7 +52,6 @@ import org.apache.cassandra.tcm.transformations.PrepareReplace; import org.apache.cassandra.tcm.transformations.ReconfigureAccordFastPath; import org.apache.cassandra.tcm.transformations.Register; -import org.apache.cassandra.tcm.transformations.SetConsensusMigrationTargetProtocol; import org.apache.cassandra.tcm.transformations.Startup; import org.apache.cassandra.tcm.transformations.TriggerSnapshot; import org.apache.cassandra.tcm.transformations.Unregister; @@ -240,11 +238,10 @@ enum Kind CANCEL_CMS_RECONFIGURATION(34, () -> CancelCMSReconfiguration.serializer), ALTER_TOPOLOGY(35, () -> AlterTopology.serializer), - ADD_ACCORD_TABLE(36, () -> AddAccordTable.serializer), - UPDATE_AVAILABILITY(37, () -> ReconfigureAccordFastPath.serializer), - BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> BeginConsensusMigrationForTableAndRange.serializer), - MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(39, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), - SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL(40, () -> SetConsensusMigrationTargetProtocol.serializer) + UPDATE_AVAILABILITY(36, () -> ReconfigureAccordFastPath.serializer), + + BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(37, () -> BeginConsensusMigrationForTableAndRange.serializer), + MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java index 390ead6f4d9d..e82572f53b02 100644 --- a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java +++ b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java @@ -54,7 +54,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaKeyspace; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.MultiStepOperation; @@ -67,7 +67,6 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -297,7 +296,6 @@ public static ClusterMetadata emptyWithSchemaFromSystemTables(Set allKno Directory.EMPTY, new TokenMap(DatabaseDescriptor.getPartitioner()), DataPlacements.empty(), - AccordTables.EMPTY, AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, @@ -387,7 +385,6 @@ public static ClusterMetadata fromEndpointStates(Map -{ - public static final AccordTables EMPTY = new AccordTables(Epoch.EMPTY, ImmutableSet.of()); - private final Epoch lastModified; - private final ImmutableSet tables; - - public AccordTables(Epoch lastModified, ImmutableSet tables) - { - this.lastModified = lastModified; - this.tables = tables; - } - - public String toString() - { - return "AccordTables{" + lastModified + ", " + tables + '}'; - } - - public AccordTables withLastModified(Epoch epoch) - { - return new AccordTables(epoch, tables); - } - - public Epoch lastModified() - { - return lastModified; - } - - public boolean contains(TableId table) - { - return tables.contains(table); - } - - public AccordTables with(TableId table) - { - if (tables.contains(table)) - return this; - - return new AccordTables(lastModified, ImmutableSet.builder().addAll(tables).add(table).build()); - } - - public static final MetadataSerializer serializer = new MetadataSerializer() - { - public void serialize(AccordTables accordTables, DataOutputPlus out, Version version) throws IOException - { - int size = accordTables.tables.size(); - out.writeUnsignedVInt32(size); - TableId[] tables = new TableId[size]; - accordTables.tables.toArray(tables); - Arrays.sort(tables); - for (TableId table : tables) - table.serialize(out); - Epoch.serializer.serialize(accordTables.lastModified, out, version); - } - - public AccordTables deserialize(DataInputPlus in, Version version) throws IOException - { - int size = in.readUnsignedVInt32(); - ImmutableSet.Builder builder = ImmutableSet.builder(); - for (int i=0; i null, - (code, message) -> { - Invariants.checkState(code == ExceptionCode.ALREADY_EXISTS, - "Expected %s, got %s", ExceptionCode.ALREADY_EXISTS, code); - return null; - }); - } - - public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() - { - public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException - { - assert t instanceof AddAccordTable; - AddAccordTable addTable = (AddAccordTable) t; - addTable.table.serialize(out); - } - - public AddAccordTable deserialize(DataInputPlus in, Version version) throws IOException - { - return new AddAccordTable(TableId.deserialize(in)); - } - - public long serializedSize(Transformation t, Version version) - { - assert t instanceof AddAccordTable; - AddAccordTable addTable = (AddAccordTable) t; - return addTable.table.serializedSize(); - } - }; -} diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java index 5cdd1b3016b1..3fe49e63bb23 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java @@ -19,20 +19,26 @@ package org.apache.cassandra.tcm.transformations; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Streams; +import org.apache.cassandra.config.AccordSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.AlreadyExistsException; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; @@ -50,8 +56,7 @@ import org.apache.cassandra.schema.Tables; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.schema.Views; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadata.Transformer; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -72,7 +77,6 @@ import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; import static org.apache.cassandra.exceptions.ExceptionCode.SERVER_ERROR; import static org.apache.cassandra.exceptions.ExceptionCode.SYNTAX_ERROR; -import static org.apache.cassandra.utils.Collectors3.toImmutableMap; public class AlterSchema implements Transformation { @@ -241,7 +245,7 @@ public final Result execute(ClusterMetadata prev) }); next = next.with(newPlacementsBuilder.build()); } - next = maybeUpdateConsensusTableMigrationStateForDroppedTables(prev.consensusMigrationState, next, diff.altered, diff.dropped); + next = maybeUpdateConsensusMigrationState(prev.consensusMigrationState, next, diff.altered, diff.dropped); return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); } @@ -257,22 +261,69 @@ private static Map> groupByReplication( return byReplication; } - private Transformer maybeUpdateConsensusTableMigrationStateForDroppedTables(ConsensusMigrationState prev, Transformer next, ImmutableList altered, Keyspaces dropped) + private Transformer maybeUpdateConsensusMigrationState(ConsensusMigrationState prev, Transformer next, ImmutableList altered, Keyspaces dropped) { - Set tableIds = Streams.concat( - altered.stream().flatMap(diff -> diff.tables.dropped.stream().map(TableMetadata::id)), - dropped.stream().flatMap(ks -> ks.tables.stream().map(TableMetadata::id))) + ConsensusMigrationState migrationState = prev; + + Set droppedIds = Streams.concat(altered.stream().flatMap(diff -> diff.tables.dropped.stream().map(TableMetadata::id)), + dropped.stream().flatMap(ks -> ks.tables.stream().map(TableMetadata::id))) + .collect(toImmutableSet()); + + if (!droppedIds.isEmpty()) + migrationState = migrationState.withMigrationsRemovedFor(droppedIds); + + Set completedIds = altered.stream() + .flatMap(diff -> diff.tables.altered.stream()) + .filter(alt -> alt.before.params.transactionalMigrationFrom.isMigrating() + && !alt.after.params.transactionalMigrationFrom.isMigrating()) + .map(alt -> alt.after.id) .collect(toImmutableSet()); - if (tableIds.stream().anyMatch(prev.tableStates.keySet()::contains)) - { - ImmutableMap newTableStates = - prev.tableStates.entrySet().stream().filter(e -> !tableIds.contains(e.getKey())).collect(toImmutableMap()); - return next.with(newTableStates); - } - else + + if (!completedIds.isEmpty()) + migrationState = migrationState.withMigrationsCompletedFor(completedIds); + + Map reversals = altered.stream() + .flatMap(diff -> diff.tables.altered.stream()) + .filter(alt -> alt.before.params.transactionalMigrationFrom.from == alt.after.params.transactionalMode) + .map(alt -> alt.after) + .collect(Collectors.toMap(TableMetadata::id, Function.identity())); + + + // we treat explicitly switched migration types as a new migration here + Set started = altered.stream() + .flatMap(diff -> diff.tables.altered.stream()) + .filter(alt -> !reversals.containsKey(alt.after.id)) + .filter(alt -> alt.after.params.transactionalMigrationFrom.isMigrating() + && !alt.before.params.transactionalMigrationFrom.isMigrating()) + .map(alt -> alt.after) + .collect(Collectors.toUnmodifiableSet()); + + if (!started.isEmpty()) { - return next; + List> ranges; + AccordSpec.TransactionalRangeMigration migration = DatabaseDescriptor.getTransactionalRangeMigration(); + switch (migration) + { + default: throw new IllegalStateException("Unhandled transactional range migration: " + migration); + case auto: + Token minToken = DatabaseDescriptor.getPartitioner().getMinimumToken(); + ranges = Range.normalize(Collections.singletonList(new Range<>(minToken, minToken))); + break; + case explicit: + ranges = Collections.emptyList(); + break; + } + + if (!ranges.isEmpty()) + migrationState = migrationState.withRangesMigrating(started, ranges, true); } + + migrationState = migrationState.withReversedMigrations(reversals, next.epoch()); + + if (migrationState != prev) + next = next.with(migrationState); + + return next; } private static Iterable normaliseTableEpochs(Epoch nextEpoch, Stream tables) diff --git a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java index a00db104af5e..e74a18c81f52 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java @@ -19,15 +19,11 @@ package org.apache.cassandra.tcm.transformations; import java.io.IOException; +import java.util.Collection; import java.util.List; -import java.util.Map; -import java.util.function.Function; +import java.util.stream.Collectors; import javax.annotation.Nonnull; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - -import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -35,9 +31,11 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.sequences.LockedRanges; import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; @@ -45,14 +43,10 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; import static org.apache.cassandra.tcm.ClusterMetadata.Transformer; import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; -import static org.apache.cassandra.utils.Collectors3.toImmutableMap; public class BeginConsensusMigrationForTableAndRange implements Transformation { @@ -88,20 +82,10 @@ public Kind kind() public Result execute(ClusterMetadata prev) { - Map tableStates = prev.consensusMigrationState.tableStates; - List columnFamilyStores = tables.stream().map(Schema.instance::getColumnFamilyStoreInstance).collect(toImmutableList()); - Transformer transformer = prev.transformer(); - - Map newStates = columnFamilyStores - .stream() - .map(cfs -> - tableStates.containsKey(cfs.getTableId()) ? - tableStates.get(cfs.getTableId()).withRangesMigrating(ranges, targetProtocol) : - new TableMigrationState(cfs.keyspace.getName(), cfs.name, cfs.getTableId(), targetProtocol, ImmutableSet.of(), ImmutableMap.of(Epoch.EMPTY, ranges))) - .collect(toImmutableMap(TableMigrationState::getTableId, Function.identity())); - - return Transformation.success(transformer.with(newStates), LockedRanges.AffectedRanges.EMPTY); + Collection metadata = tables.stream().map(Schema.instance::getTableMetadata).collect(Collectors.toList()); + ConsensusMigrationState consensusMigrationState = prev.consensusMigrationState.withRangesMigrating(metadata, ranges, false); + return Transformation.success(transformer.with(consensusMigrationState), LockedRanges.AffectedRanges.EMPTY); } static class Serializer implements AsymmetricMetadataSerializer @@ -111,14 +95,14 @@ public void serialize(Transformation t, DataOutputPlus out, Version version) thr { BeginConsensusMigrationForTableAndRange v = (BeginConsensusMigrationForTableAndRange)t; out.writeUTF(v.targetProtocol.toString()); - ConsensusTableMigrationState.rangesSerializer.serialize(v.ranges, out, version); + ConsensusTableMigration.rangesSerializer.serialize(v.ranges, out, version); serializeCollection(v.tables, out, version, TableId.metadataSerializer); } public BeginConsensusMigrationForTableAndRange deserialize(DataInputPlus in, Version version) throws IOException { ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(in.readUTF()); - List> ranges = ConsensusTableMigrationState.rangesSerializer.deserialize(in, version); + List> ranges = ConsensusTableMigration.rangesSerializer.deserialize(in, version); List tables = deserializeList(in, version, TableId.metadataSerializer); return new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tables); } @@ -127,8 +111,8 @@ public long serializedSize(Transformation t, Version version) { BeginConsensusMigrationForTableAndRange v = (BeginConsensusMigrationForTableAndRange) t; return TypeSizes.sizeof(v.targetProtocol.toString()) - + ConsensusTableMigrationState.rangesSerializer.serializedSize(v.ranges, version) - + serializedCollectionSize(v.tables, version, TableId.metadataSerializer); + + ConsensusTableMigration.rangesSerializer.serializedSize(v.ranges, version) + + serializedCollectionSize(v.tables, version, TableId.metadataSerializer); } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java index 64e6248c8132..7dff0111ef3f 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java @@ -22,19 +22,29 @@ import java.util.List; import javax.annotation.Nonnull; -import com.google.common.collect.ImmutableMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationRepairType; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; +import org.apache.cassandra.schema.TableParams; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairType; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadata.Transformer; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.sequences.LockedRanges; @@ -47,12 +57,12 @@ import static org.apache.cassandra.dht.Range.intersects; import static org.apache.cassandra.dht.Range.normalize; import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; public class MaybeFinishConsensusMigrationForTableAndRange implements Transformation { + private static final Logger logger = LoggerFactory.getLogger(MaybeFinishConsensusMigrationForTableAndRange.class); + public static Serializer serializer = new Serializer(); @Nonnull @@ -96,9 +106,27 @@ public Kind kind() return Kind.MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE; } + private static Transformer resetMigrationOnSchema(ClusterMetadata prev, Transformer transformer, String ksName, String tblName, TableId id) + { + Keyspaces schema = prev.schema.getKeyspaces(); + KeyspaceMetadata keyspace = schema.getNullable(ksName); + + TableMetadata table = null == keyspace + ? null + : keyspace.getTableOrViewNullable(tblName); + + if (table == null || !table.id.equals(id)) + return transformer; + + TableParams params = table.params.unbuild().transactionalMigrationFrom(TransactionalMigrationFromMode.none).build(); + keyspace = keyspace.withSwapped(keyspace.tables.withSwapped(table.withSwapped(params))); + schema = schema.withAddedOrUpdated(keyspace); + return transformer.with(new DistributedSchema(schema)); + } + public Result execute(@Nonnull ClusterMetadata metadata) { - System.out.println("Completed repair " + repairType + " ranges " + repairedRanges); + logger.info("Completed repair {} ranges {}", repairType, repairedRanges); checkNotNull(metadata, "clusterMetadata should not be null"); String ksAndCF = keyspace + "." + cf; TableMetadata tbm = Schema.instance.getTableMetadata(keyspace, cf); @@ -106,7 +134,7 @@ public Result execute(@Nonnull ClusterMetadata metadata) return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); ConsensusMigrationState consensusMigrationState = metadata.consensusMigrationState; - ConsensusTableMigrationState.TableMigrationState tms = consensusMigrationState.tableStates.get(tbm.id); + TableMigrationState tms = consensusMigrationState.tableStates.get(tbm.id); if (tms == null) return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); @@ -122,9 +150,16 @@ public Result execute(@Nonnull ClusterMetadata metadata) if (!intersects(tms.migratingRanges, normalizedRepairedRanges)) return new Rejected(INVALID, format("Table %s is migrating ranges %s, which doesn't include repaired ranges %s", ksAndCF, tms.migratingRanges, normalizedRepairedRanges)); - TableMigrationState newTableMigrationState = tms.withRangesRepairedAtEpoch(normalizedRepairedRanges, minEpoch); + Transformer next = metadata.transformer(); + ConsensusMigrationState migrationState = metadata.consensusMigrationState.withRangesRepairedAtEpoch(tbm, normalizedRepairedRanges, minEpoch); + next = next.with(migrationState); + + // reset the migration value on the table if the migration has completed + TableMigrationState tableState = migrationState.tableStates.get(tbm.id); + if (tableState == null || tableState.hasMigratedFullTokenRange(metadata.partitioner)) + next = resetMigrationOnSchema(metadata, next, keyspace, cf, tbm.id); - return Transformation.success(metadata.transformer().with(ImmutableMap.of(newTableMigrationState.tableId, newTableMigrationState)), LockedRanges.AffectedRanges.EMPTY); + return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); } static class Serializer implements AsymmetricMetadataSerializer @@ -134,7 +169,7 @@ public void serialize(Transformation t, DataOutputPlus out, Version version) thr MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; out.writeUTF(v.keyspace); out.writeUTF(v.cf); - ConsensusTableMigrationState.rangesSerializer.serialize(v.repairedRanges, out, version); + ConsensusTableMigration.rangesSerializer.serialize(v.repairedRanges, out, version); Epoch.serializer.serialize(v.minEpoch, out, version); out.write(v.repairType.value); } @@ -143,7 +178,7 @@ public MaybeFinishConsensusMigrationForTableAndRange deserialize(DataInputPlus i { String keyspace = in.readUTF(); String cf = in.readUTF(); - List> repairedRanges = ConsensusTableMigrationState.rangesSerializer.deserialize(in, version); + List> repairedRanges = ConsensusTableMigration.rangesSerializer.deserialize(in, version); Epoch minEpoch = Epoch.serializer.deserialize(in, version); ConsensusMigrationRepairType repairType = ConsensusMigrationRepairType.fromValue(in.readByte()); return new MaybeFinishConsensusMigrationForTableAndRange(keyspace, cf, repairedRanges, minEpoch, repairType); @@ -153,10 +188,10 @@ public long serializedSize(Transformation t, Version version) { MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; return TypeSizes.sizeof(v.keyspace) - + TypeSizes.sizeof(v.cf) - + ConsensusTableMigrationState.rangesSerializer.serializedSize(v.repairedRanges, version) - + Epoch.serializer.serializedSize(v.minEpoch) - + TypeSizes.sizeof(v.repairType.value); + + TypeSizes.sizeof(v.cf) + + ConsensusTableMigration.rangesSerializer.serializedSize(v.repairedRanges, version) + + Epoch.serializer.serializedSize(v.minEpoch) + + TypeSizes.sizeof(v.repairType.value); } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java b/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java deleted file mode 100644 index c0fd662d6766..000000000000 --- a/src/java/org/apache/cassandra/tcm/transformations/SetConsensusMigrationTargetProtocol.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.tcm.transformations; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import javax.annotation.Nonnull; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadata.Transformer; -import org.apache.cassandra.tcm.Transformation; -import org.apache.cassandra.tcm.sequences.LockedRanges; -import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; -import org.apache.cassandra.tcm.serialization.Version; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget.reset; -import static org.apache.cassandra.tcm.Transformation.Kind.SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL; -import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; -import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; -import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; -import static org.apache.cassandra.utils.Collectors3.toImmutableMap; - -/* - * Narrowly focused on setting or changing the consensus migration protocol. The real use case - * is when a migration is already in progress or done and you want to change the target. - */ -public class SetConsensusMigrationTargetProtocol implements Transformation -{ - public static Serializer serializer = new Serializer(); - - @Nonnull - public final ConsensusMigrationTarget targetProtocol; - - @Nonnull - public final List tables; - - public SetConsensusMigrationTargetProtocol(@Nonnull ConsensusMigrationTarget targetProtocol, - @Nonnull List tables) - { - this.targetProtocol = targetProtocol; - this.tables = tables; - } - - @Override - public Kind kind() - { - return SET_CONSENSUS_MIGRATION_TARGET_PROTOCOL; - } - - @Override - public Result execute(ClusterMetadata metadata) - { - Map tableStates = metadata.consensusMigrationState.tableStates; - List columnFamilyStores = tables.stream().map(Schema.instance::getColumnFamilyStoreInstance).collect(toImmutableList()); - - Transformer transformer = metadata.transformer(); - - Map newStates; - - if (targetProtocol == reset) - { - newStates = tableStates.entrySet().stream().filter(entry -> !tables.contains(entry.getKey())).collect(toImmutableMap()); - } - else - { - newStates = columnFamilyStores - .stream() - .map(cfs -> - tableStates.containsKey(cfs.getTableId()) ? - tableStates.get(cfs.getTableId()).withMigrationTarget(targetProtocol) : - new TableMigrationState(cfs.keyspace.getName(), cfs.name, cfs.getTableId(), targetProtocol, ImmutableSet.of(), ImmutableMap.of())) - .collect(toImmutableMap(TableMigrationState::getTableId, Function.identity())); - } - - return Transformation.success(transformer.with(newStates, targetProtocol == reset ? false : true), LockedRanges.AffectedRanges.EMPTY); - } - - static class Serializer implements AsymmetricMetadataSerializer - { - public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException - { - SetConsensusMigrationTargetProtocol v = (SetConsensusMigrationTargetProtocol)t; - out.writeUTF(v.targetProtocol.toString()); - serializeCollection(v.tables, out, version, TableId.metadataSerializer); - } - - public SetConsensusMigrationTargetProtocol deserialize(DataInputPlus in, Version version) throws IOException - { - ConsensusMigrationTarget targetProtocol = ConsensusMigrationTarget.fromString(in.readUTF()); - List tables = deserializeList(in, version, TableId.metadataSerializer); - return new SetConsensusMigrationTargetProtocol(targetProtocol, tables); - } - - public long serializedSize(Transformation t, Version version) - { - SetConsensusMigrationTargetProtocol v = (SetConsensusMigrationTargetProtocol) t; - return TypeSizes.sizeof(v.targetProtocol.toString()) - + serializedCollectionSize(v.tables, version, TableId.metadataSerializer); - } - } -} diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 0c933fdf7d06..00f2e66dfa45 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -275,7 +275,6 @@ public int execute(String... args) .withDescription("List and mark ranges as migrating between consensus protocols") .withDefaultCommand(CassHelp.class) .withCommand(ConsensusMigrationAdmin.BeginMigration.class) - .withCommands(ConsensusMigrationAdmin.SetTargetProtocol.class) .withCommands(ConsensusMigrationAdmin.ListCmd.class) .withCommands(ConsensusMigrationAdmin.FinishMigration.class); diff --git a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java index cd24bf91fa4f..9878034f88ce 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java +++ b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java @@ -31,9 +31,7 @@ import org.apache.cassandra.tools.NodeTool; import static com.google.common.base.Preconditions.checkArgument; -import static java.util.Collections.emptyList; import static java.util.Collections.singleton; -import static java.util.Collections.singletonList; /** * For managing migration from one consensus protocol to another. @@ -122,25 +120,4 @@ protected void execute(NodeProbe probe) probe.output().out.printf("Finished consensus migration range (%s) of keyspaces %s and tables %s%n", maybeRangesStr, keyspaceNames, maybeTableNames); } } - - @Command(name = "set-target-protocol", description = "Set or change the target consensus protocol of the specified tables. If a migration is in progress then the migration will be reversed with migrating ranges still migrating, unmigrated ranges marked as migrated, and migrating ranges will need migration. Be aware that if no migration was in progress for a table it will immediately cause the table to run on the target protocol because the ranges requiring migration are derived from the migrated ranges that don't exist.") - public static class SetTargetProtocol extends ConsensusMigrationAdmin - { - @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") - private List schemaArgs = new ArrayList<>(); - - @Option(title = "target_protocol", name = {"-tp", "--target-protocol"}, description = "Use -tp to specify what consensus protocol should be migrated to", required=true) - private String targetProtocol = null; - - @Option(title = "force_completion", name = {"-f", "--force-completion"}, description = "Forces migration state for all ranges of the specified table regardless of whether migration completed successfully or not. Should only be used if table is empty or has had no writes since last repair.") - private boolean forceCompletion = false; - - protected void execute(NodeProbe probe) - { - checkArgument(schemaArgs.size() >= 2, "Must specify a keyspace and at least one table"); - List keyspaceNames = schemaArgs.size() > 0 ? singletonList(schemaArgs.get(0)) : emptyList(); - List maybeTableNames = schemaArgs.size() > 1 ? schemaArgs.subList(1, schemaArgs.size()) : null; - probe.getStorageService().setConsensusMigrationTargetProtocol(targetProtocol, keyspaceNames, maybeTableNames); - } - } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java index 5c6e276db3c9..b5e507a62723 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java @@ -27,10 +27,9 @@ import java.util.regex.Pattern; import com.google.common.util.concurrent.FutureCallback; +import org.apache.cassandra.distributed.api.*; import org.apache.cassandra.distributed.test.accord.AccordTestBase; -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.*; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; @@ -46,15 +45,11 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.ConsistencyLevel; -import org.apache.cassandra.distributed.api.Feature; -import org.apache.cassandra.distributed.api.ICoordinator; -import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IMessageFilters.Filter; -import org.apache.cassandra.distributed.api.TokenSupplier; import org.apache.cassandra.distributed.shared.NetworkTopology; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.reads.repair.BlockingReadRepair; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; import org.apache.cassandra.utils.concurrent.Condition; @@ -80,6 +75,34 @@ public class ReadRepairTest extends TestBaseImpl { + private static Cluster cluster; + private static int tableNum = 0; + private String tableName; + + @BeforeClass + public static void beforeClass() throws Throwable + { + cluster = init(Cluster.create(3, c -> c.with(Feature.GOSSIP, Feature.NETWORK))); + } + + @AfterClass + public static void afterClass() throws Throwable + { + if (cluster != null) + cluster.close(); + } + + private void incrementTableName() + { + tableName = "tbl" + tableNum++; + } + + @Before + public void setup() + { + incrementTableName(); + } + /** * Tests basic behaviour of read repair with {@code BLOCKING} read repair strategy. */ @@ -87,6 +110,7 @@ public class ReadRepairTest extends TestBaseImpl public void testBlockingReadRepair() throws Throwable { testReadRepair(ReadRepairStrategy.BLOCKING, false); + incrementTableName(); testReadRepair(ReadRepairStrategy.BLOCKING, true); } /** @@ -106,68 +130,63 @@ private void testReadRepair(ReadRepairStrategy strategy) throws Throwable private void testReadRepair(ReadRepairStrategy strategy, boolean brrThroughAccord) throws Throwable { - try (Cluster cluster = init(Cluster.create(3, config -> config.set("non_serial_write_strategy", brrThroughAccord ? "migration" : "normal")))) - { - cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int, c int, v int, PRIMARY KEY (k, c)) " + - String.format("WITH read_repair='%s'", strategy))); - AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, "t"); - - Object[] row = row(1, 1, 1); - String insertQuery = withKeyspace("INSERT INTO %s.t (k, c, v) VALUES (?, ?, ?)"); - String selectQuery = withKeyspace("SELECT * FROM %s.t WHERE k=1"); - - // insert data in two nodes, simulating a quorum write that has missed one node - cluster.get(1).executeInternal(insertQuery, row); - cluster.get(2).executeInternal(insertQuery, row); - - // verify that the third node doesn't have the row + TransactionalMode transactionalMode = brrThroughAccord ? TransactionalMode.unsafe_writes : TransactionalMode.off; + cluster.schemaChange(withKeyspace("CREATE TABLE %s." + tableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode.toString().toLowerCase() + '\'' + + String.format(" AND read_repair='%s'", strategy))); + AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, "t"); + + Object[] row = row(1, 1, 1); + String insertQuery = withKeyspace("INSERT INTO %s." + tableName + " (k, c, v) VALUES (?, ?, ?)"); + String selectQuery = withKeyspace("SELECT * FROM %s." + tableName + " WHERE k=1"); + + // insert data in two nodes, simulating a quorum write that has missed one node + cluster.get(1).executeInternal(insertQuery, row); + cluster.get(2).executeInternal(insertQuery, row); + + // verify that the third node doesn't have the row + assertRows(cluster.get(3).executeInternal(selectQuery)); + + // read with CL=QUORUM to trigger read repair, force 3 to be involved in the read so that read repair + // will occur + Filter blockReadFromOne = cluster.filters().inbound().from(3).to(1).verbs(READ_REQ.id).drop(); + assertRows(cluster.coordinator(3).execute(selectQuery, QUORUM), row); + blockReadFromOne.off(); + + // verify whether the coordinator has the repaired row depending on the read repair strategy + if (strategy == ReadRepairStrategy.NONE) assertRows(cluster.get(3).executeInternal(selectQuery)); - - // read with CL=QUORUM to trigger read repair, force 3 to be involved in the read so that read repair - // will occur - Filter blockReadFromOne = cluster.filters().inbound().from(3).to(1).verbs(READ_REQ.id).drop(); - assertRows(cluster.coordinator(3).execute(selectQuery, QUORUM), row); - blockReadFromOne.off(); - - // verify whether the coordinator has the repaired row depending on the read repair strategy - if (strategy == ReadRepairStrategy.NONE) - assertRows(cluster.get(3).executeInternal(selectQuery)); - else - assertRows(cluster.get(3).executeInternal(selectQuery), row); - } + else + assertRows(cluster.get(3).executeInternal(selectQuery), row); } @Test public void readRepairTimeoutTest() throws Throwable { final long reducedReadTimeout = 3000L; - try (Cluster cluster = init(builder().withNodes(3).start())) + cluster.forEach(i -> i.runOnInstance(() -> DatabaseDescriptor.setReadRpcTimeout(reducedReadTimeout))); + cluster.schemaChange("CREATE TABLE " + KEYSPACE + "." + tableName + " (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); + cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); + cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1")); + cluster.verbs(READ_REPAIR_RSP).to(1).drop(); + final long start = currentTimeMillis(); + try { - cluster.forEach(i -> i.runOnInstance(() -> DatabaseDescriptor.setReadRpcTimeout(reducedReadTimeout))); - cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); - cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + ".tbl (pk, ck, v) VALUES (1, 1, 1)"); - cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + ".tbl (pk, ck, v) VALUES (1, 1, 1)"); - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1")); - cluster.verbs(READ_REPAIR_RSP).to(1).drop(); - final long start = currentTimeMillis(); - try - { - cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1", ConsistencyLevel.ALL); - fail("Read timeout expected but it did not occur"); - } - catch (Exception ex) - { - // the containing exception class was loaded by another class loader. Comparing the message as a workaround to assert the exception - assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); - long actualTimeTaken = currentTimeMillis() - start; - long magicDelayAmount = 100L; // it might not be the best way to check if the time taken is around the timeout value. - // Due to the delays, the actual time taken from client perspective is slighly more than the timeout value - assertTrue(actualTimeTaken > reducedReadTimeout); - // But it should not exceed too much - assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1"), - row(1, 1, 1)); // the partition happened when the repaired node sending back ack. The mutation should be in fact applied. - } + cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1", ConsistencyLevel.ALL); + fail("Read timeout expected but it did not occur"); + } + catch (Exception ex) + { + // the containing exception class was loaded by another class loader. Comparing the message as a workaround to assert the exception + assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); + long actualTimeTaken = currentTimeMillis() - start; + long magicDelayAmount = 100L; // it might not be the best way to check if the time taken is around the timeout value. + // Due to the delays, the actual time taken from client perspective is slighly more than the timeout value + assertTrue(actualTimeTaken > reducedReadTimeout); + // But it should not exceed too much + assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1"), + row(1, 1, 1)); // the partition happened when the repaired node sending back ack. The mutation should be in fact applied. } } @@ -176,20 +195,20 @@ public void failingReadRepairTest() throws Throwable { try (Cluster cluster = init(builder().withNodes(3).start())) { - cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); + cluster.schemaChange("CREATE TABLE " + KEYSPACE + "." + tableName + " (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); for (int i = 1 ; i <= 2 ; ++i) - cluster.get(i).executeInternal("INSERT INTO " + KEYSPACE + ".tbl (pk, ck, v) VALUES (1, 1, 1)"); + cluster.get(i).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1")); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1")); cluster.filters().verbs(READ_REPAIR_REQ.id).to(3).drop(); - assertRows(cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1", + assertRows(cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1", ConsistencyLevel.QUORUM), row(1, 1, 1)); // Data was not repaired - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1")); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1")); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java index 6d192db1124b..103b1fc4c00b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java @@ -28,7 +28,6 @@ import java.util.function.Function; import java.util.stream.IntStream; -import org.apache.cassandra.distributed.test.accord.AccordTestBase; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -37,16 +36,13 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.apache.cassandra.config.Config.LWTStrategy; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.Pair; import static com.google.common.collect.Iterators.toArray; import static java.lang.String.format; @@ -88,17 +84,17 @@ public class ShortReadProtectionTest extends TestBaseImpl public boolean paging; @Parameterized.Parameter(3) - public Pair transactionStrategies; + public TransactionalMode transactionalMode; - @Parameterized.Parameters(name = "{index}: read_cl={0} flush={1} paging={2}, transactionStrategies={3}") + @Parameterized.Parameters(name = "{index}: read_cl={0} flush={1} paging={2}, transactionalMode={3}") public static Collection data() { List result = new ArrayList<>(); - for (Pair transactionStrategies : Arrays.asList(Pair.create(LWTStrategy.accord, NonSerialWriteStrategy.migration), Pair.create(LWTStrategy.migration, NonSerialWriteStrategy.normal))) + for (TransactionalMode mode : TransactionalMode.values()) for (ConsistencyLevel readConsistencyLevel : Arrays.asList(ALL, QUORUM, SERIAL)) for (boolean flush : BOOLEANS) for (boolean paging : BOOLEANS) - result.add(new Object[]{ readConsistencyLevel, flush, paging, transactionStrategies}); + result.add(new Object[]{ readConsistencyLevel, flush, paging, mode}); return result; } @@ -123,17 +119,14 @@ public static void teardownCluster() @Before public void setupTester() { - String lwtStrategy = transactionStrategies.left.toString(); - String nonSerialWriteStrategy = transactionStrategies.right.toString(); - cluster.forEach(node -> { - node.runOnInstance(() -> { - DatabaseDescriptor.setLWTStrategy(LWTStrategy.valueOf(lwtStrategy)); - DatabaseDescriptor.setNonSerialWriteStrategy(NonSerialWriteStrategy.valueOf(nonSerialWriteStrategy)); - }); - }); tester = new Tester(readConsistencyLevel, flush, paging); } + private String transactionalModeCQL() + { + return " WITH transactional_mode='" + transactionalMode + '\''; + } + @After public void teardownTester() { @@ -150,7 +143,7 @@ public void teardownTester() @Test public void testSkinnyTableWithoutLiveRows() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) .allNodes("INSERT INTO %s (id) VALUES (0) USING TIMESTAMP 0") .toNode1("DELETE FROM %s WHERE id = 0") .assertRows("SELECT DISTINCT id FROM %s WHERE id = 0") @@ -167,7 +160,7 @@ public void testSkinnyTableWithoutLiveRows() @Test public void testSkinnyTableWithLiveRows() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) .allNodes(0, 10, i -> format("INSERT INTO %%s (id) VALUES (%d) USING TIMESTAMP 0", i)) // order is 5,1,8,0,2,4,7,6,9,3 .toNode1("DELETE FROM %s WHERE id IN (1, 0, 4, 6, 3)") // delete every other row .assertRows("SELECT DISTINCT token(id), id FROM %s", @@ -184,7 +177,7 @@ public void testSkinnyTableWithLiveRows() @Test public void testSkinnyTableWithComplementaryDeletions() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) .allNodes(0, 10, i -> format("INSERT INTO %%s (id) VALUES (%d) USING TIMESTAMP 0", i)) // order is 5,1,8,0,2,4,7,6,9,3 .toNode1("DELETE FROM %s WHERE id IN (5, 8, 2, 7, 9)") // delete every other row .toNode2("DELETE FROM %s WHERE id IN (1, 0, 4, 6)") // delete every other row but the last one @@ -202,7 +195,7 @@ public void testSkinnyTableWithComplementaryDeletions() @Test public void testMultipleMissedRows() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .allNodes(0, 4, i -> format("INSERT INTO %%s (pk, ck) VALUES (0, %d) USING TIMESTAMP 0", i)) .toNode1("DELETE FROM %s WHERE pk = 0 AND ck IN (1, 2, 3)", "INSERT INTO %s (pk, ck) VALUES (0, 5)") @@ -221,7 +214,7 @@ public void testMultipleMissedRows() @Test public void testAscendingOrder() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes(1, 10, i -> format("INSERT INTO %%s (k, c, v) VALUES (0, %d, %d) USING TIMESTAMP 0", i, i * 10)) .toNode1("DELETE FROM %s WHERE k=0 AND c=1") .toNode2("DELETE FROM %s WHERE k=0 AND c=2") @@ -243,7 +236,7 @@ public void testAscendingOrder() @Test public void testDescendingOrder() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes(1, 10, i -> format("INSERT INTO %%s (k, c, v) VALUES (0, %d, %d) USING TIMESTAMP 0", i, i * 10)) .toNode1("DELETE FROM %s WHERE k=0 AND c=7") .toNode2("DELETE FROM %s WHERE k=0 AND c=8") @@ -266,7 +259,7 @@ public void testDescendingOrder() @Test public void testDeletePartition() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes("INSERT INTO %s (k, c, v) VALUES (0, 1, 10) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0") @@ -279,7 +272,7 @@ public void testDeletePartition() @Test public void testDeletePartitionWithStatic() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes("INSERT INTO %s (k, c, v, s) VALUES (0, 1, 10, 100) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0") @@ -292,7 +285,7 @@ public void testDeletePartitionWithStatic() @Test public void testDeleteClustering() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes("INSERT INTO %s (k, c, v) VALUES (0, 1, 10) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0 AND c=1") @@ -307,7 +300,7 @@ public void testDeleteClustering() @Test public void testDeleteClusteringWithStatic() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))") + tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))" + transactionalModeCQL()) .allNodes("INSERT INTO %s (k, c, v, s) VALUES (0, 1, 10, 100) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0 AND c=1") @@ -324,7 +317,7 @@ public void testDeleteClusteringWithStatic() @Test public void testGroupByRegularRow() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .toNode1("INSERT INTO %s (pk, ck) VALUES (1, 1) USING TIMESTAMP 0", "DELETE FROM %s WHERE pk=0 AND ck=0", "INSERT INTO %s (pk, ck) VALUES (2, 2) USING TIMESTAMP 0") @@ -347,7 +340,7 @@ public void testGroupByRegularRow() @Test public void testGroupByStaticRow() { - tester.createTable("CREATE TABLE %s (pk int, ck int, s int static, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, s int static, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .toNode1("INSERT INTO %s (pk, s) VALUES (1, 1) USING TIMESTAMP 0", "INSERT INTO %s (pk, s) VALUES (0, null)", "INSERT INTO %s (pk, s) VALUES (2, 2) USING TIMESTAMP 0") @@ -370,7 +363,7 @@ public void testGroupByStaticRow() @Test public void testSkipEarlyTermination() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0)") .toNode2("DELETE FROM %s WHERE pk = 0 AND ck IN (1, 2)") .assertRows("SELECT DISTINCT pk FROM %s", row(0)); @@ -387,7 +380,7 @@ public void testSkipEarlyTermination() @Test public void testSkipEarlyTerminationRows() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (0, 1) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (2, 0) USING TIMESTAMP 0", @@ -411,7 +404,7 @@ public void testSkipEarlyTerminationRows() @Test public void testSkipEarlyTerminationPartitions() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (0, 1) USING TIMESTAMP 0", "DELETE FROM %s USING TIMESTAMP 42 WHERE pk = 2 AND ck IN (0, 1)") @@ -455,8 +448,7 @@ private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean pag private Tester createTable(String query) { - cluster.schemaChange(format(query) + " WITH read_repair='NONE'"); - AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, table); + cluster.schemaChange(format(query) + " AND read_repair='NONE'"); return this; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 7d246a2b6c9f..f040e9d4db04 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -206,7 +206,7 @@ public void bootstrapTest() throws Throwable } cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2}"); - cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c))"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c)) WITH transactional_mode='full'"); long schemaChangeMax = maxEpoch(cluster); for (IInvokableInstance node : cluster) @@ -384,7 +384,7 @@ public void moveTest() throws Throwable } cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2}"); - cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c))"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key(k, c)) WITH transactional_mode='full'"); long schemaChangeMax = maxEpoch(cluster); for (IInvokableInstance node : cluster) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java index 5eeee8108a25..f7ef3afe7efa 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java @@ -46,8 +46,6 @@ import accord.primitives.Unseekables; import accord.topology.Topologies; -import org.apache.cassandra.config.Config.NonSerialWriteStrategy; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.functions.types.utils.Bytes; import org.apache.cassandra.db.marshal.Int32Type; @@ -63,6 +61,7 @@ import org.apache.cassandra.distributed.shared.AssertUtils; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; import org.assertj.core.api.Assertions; @@ -85,35 +84,28 @@ protected Logger logger() } @Parameterized.Parameter - public String nonSerialWriteStrategyName; + public String transactionalModeName; - NonSerialWriteStrategy nonSerialWriteStrategy; + TransactionalMode transactionalMode; - @Parameterized.Parameters(name = "nonSerialWriteStrategy={0}") + @Parameterized.Parameters(name = "transactionalMode={0}") public static Collection data() { - return ImmutableList.of(new Object[] {NonSerialWriteStrategy.accord.toString()}, new Object[] {NonSerialWriteStrategy.migration.toString()}); + return ImmutableList.of(new Object[] {TransactionalMode.full.toString()}, + new Object[] {TransactionalMode.mixed_reads.toString()}); } @Before public void setNonSerialWriteStrategy() { - nonSerialWriteStrategy = NonSerialWriteStrategy.valueOf(nonSerialWriteStrategyName); - String nonSerialWriteStrategyName = this.nonSerialWriteStrategyName; - SHARED_CLUSTER.forEach(node -> { - node.runOnInstance(() -> { - DatabaseDescriptor.setNonSerialWriteStrategy(NonSerialWriteStrategy.valueOf(nonSerialWriteStrategyName)); - }); - }); + transactionalMode = TransactionalMode.valueOf(transactionalModeName); } @BeforeClass public static void setupClass() throws IOException { - AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord") - .set("non_serial_write_strategy", "migration")), 2); + AccordTestBase.setupCluster(builder -> builder, 2); SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged(KEYSPACE)); } @Test @@ -177,7 +169,7 @@ public void testMultipleShards() throws Exception String currentTable = keyspace + ".tbl"; List ddls = Arrays.asList("DROP KEYSPACE IF EXISTS " + keyspace + ";", "CREATE KEYSPACE " + keyspace + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}", - "CREATE TABLE " + currentTable + " (k blob, c int, v int, primary key (k, c))"); + "CREATE TABLE " + currentTable + " (k blob, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); List tokens = tokens(); List keys = tokensToKeys(tokens); List keyStrings = keys.stream().map(bb -> "0x" + ByteBufferUtil.bytesToHex(bb)).collect(Collectors.toList()); @@ -262,13 +254,13 @@ public void testScalarBindVariables() throws Throwable @Test public void testRegularScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))"); + testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testStaticScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c))"); + testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } private void testScalarIsNull(String tableDDL) throws Exception { @@ -303,7 +295,7 @@ private void testScalarIsNull(String tableDDL) throws Exception { @Test public void testQueryStaticColumn() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { // select partition key, clustering key and static column, restrict on partition and clustering @@ -357,7 +349,7 @@ private void testQueryStaticColumn(Cluster cluster, String accordReadQuery, Stri @Test public void testUpdateStaticColumn() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + '\'', cluster -> { checkUpdateStatic(cluster, "SET s=1 WHERE k=?", 101, "[[101, null, 1, null]]", "[]"); @@ -393,7 +385,7 @@ private void checkUpdateStatic(Cluster cluster, String update, int key, String e private void assertResultsFromAccordMatches(Cluster cluster, String accordRead, String simpleRead, int key) { Object[][] simpleReadResult; - if (nonSerialWriteStrategy.ignoresSuppliedConsistencyLevel) + if (transactionalMode.ignoresSuppliedConsistencyLevel) // With accord non-SERIAL write strategy the commit CL is effectively ANY so we need to read at SERIAL simpleReadResult = cluster.coordinator(1).execute(simpleRead, ConsistencyLevel.SERIAL, key); else @@ -453,12 +445,12 @@ public void testScalarGte() throws Throwable @Test public void testStaticScalarEQ() throws Throwable { - testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c))", 3, "=", 3, "="); + testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", 3, "=", 3, "="); } private void testScalarCondition(int lhs, String operator, int rhs, String reversedOperator) throws Exception { - testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", lhs, operator, rhs, reversedOperator); + testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", lhs, operator, rhs, reversedOperator); } private void testScalarCondition(String tableDDL, int lhs, String operator, int rhs, String reversedOperator) throws Exception @@ -580,7 +572,7 @@ public void testFailedConditionWithCompleteInsert() throws Throwable @Test public void testReversedClusteringReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); @@ -615,7 +607,7 @@ public void testScalarShorthandSubtraction() throws Exception private void testScalarShorthandOperation(int startingValue, String operation, int endingvalue) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); @@ -637,7 +629,7 @@ private void testScalarShorthandOperation(int startingValue, String operation, i @Test public void testConstantNonStaticRowReadBeforeUpdate() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); @@ -659,7 +651,7 @@ public void testConstantNonStaticRowReadBeforeUpdate() throws Exception @Test public void testRangeDeletion() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); @@ -683,7 +675,7 @@ public void testRangeDeletion() throws Exception @Test public void testPartitionKeyReferenceCondition() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedTableName + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); @@ -731,13 +723,13 @@ public void testMultiPartitionKeyReferenceCondition() throws Exception @Test public void testMultiCellListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); + testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); + testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListEqCondition(String ddl) throws Exception @@ -778,13 +770,13 @@ private void testListEqCondition(String ddl) throws Exception @Test public void testMultiCellSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); + testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); + testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetEqCondition(String ddl) throws Exception @@ -825,13 +817,13 @@ private void testSetEqCondition(String ddl) throws Exception @Test public void testMultiCellMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); + testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); + testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exception @@ -872,13 +864,13 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio @Test public void testMultiCellUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTEqCondition(String tableDDL) throws Exception @@ -918,7 +910,7 @@ private void testUDTEqCondition(String tableDDL) throws Exception @Test public void testTupleEqCondition() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); @@ -953,7 +945,7 @@ public void testTupleEqCondition() throws Exception @Test public void testIsNullWithComplexDeletion() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ListType listType = ListType.getInstance(Int32Type.instance, true); @@ -987,13 +979,13 @@ public void testIsNullWithComplexDeletion() throws Exception @Test public void testNullMultiCellListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); + testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); + testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullListConditions(String ddl) throws Exception @@ -1039,13 +1031,13 @@ private void testNullListConditions(String ddl) throws Exception @Test public void testNullMultiCellSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); + testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); + testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullSetConditions(String ddl) throws Exception @@ -1091,13 +1083,13 @@ private void testNullSetConditions(String ddl) throws Exception @Test public void testNullMultiCellMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); + testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testNullFrozenMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); + testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testNullMapConditions(String ddl, boolean isMultiCell) throws Exception @@ -1148,13 +1140,13 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep @Test public void testNullMultiCellUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullUDTCondition(String tableDDL) throws Exception @@ -1202,13 +1194,13 @@ private void testNullUDTCondition(String tableDDL) throws Exception @Test public void testNullMultiCellSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); + testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); + testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullSetElementConditions(String ddl) throws Exception @@ -1254,13 +1246,13 @@ private void testNullSetElementConditions(String ddl) throws Exception @Test public void testNullMultiCellMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); + testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testNullFrozenMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); + testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testNullMapElementConditions(String ddl, boolean isMultiCell) throws Exception @@ -1311,13 +1303,13 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw @Test public void testNullMultiCellUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullUDTFieldCondition(String tableDDL) throws Exception @@ -1365,13 +1357,13 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception @Test public void testMultiCellListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", true); + testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)", false); + testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testListSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1405,13 +1397,13 @@ private void testListSubstitution(String ddl, boolean isMultiCell) throws Except @Test public void testMultiCellSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", true); + testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)", false); + testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testSetSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1445,13 +1437,13 @@ private void testSetSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", true); + testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)", false); + testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testMapSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1485,13 +1477,13 @@ private void testMapSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTSubstitution(String tableDDL) throws Exception @@ -1523,7 +1515,7 @@ private void testUDTSubstitution(String tableDDL) throws Exception @Test public void testTupleSubstitution() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); @@ -1550,13 +1542,13 @@ public void testTupleSubstitution() throws Exception @Test public void testMultiCellListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); + testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); + testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListReplacement(String ddl) throws Exception @@ -1587,13 +1579,13 @@ private void testListReplacement(String ddl) throws Exception @Test public void testMultiCellSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); + testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); + testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetReplacement(String ddl) throws Exception @@ -1624,7 +1616,7 @@ private void testSetReplacement(String ddl) throws Exception @Test public void testListAppendFromReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); @@ -1650,13 +1642,13 @@ public void testListAppendFromReference() throws Exception @Test public void testSetByIndexFromMultiCellListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list list, dest_int_list list)"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list list, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSetByIndexFromFrozenListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list)"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); } private void testListSetByIndexFromListElement(String ddl) throws Exception @@ -1685,7 +1677,7 @@ private void testListSetByIndexFromListElement(String ddl) throws Exception @Test public void testListSetByIndexFromScalar() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); @@ -1708,7 +1700,7 @@ public void testListSetByIndexFromScalar() throws Exception @Test public void testAutoReadSelectionConstruction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); @@ -1732,7 +1724,7 @@ public void testAutoReadSelectionConstruction() throws Exception @Test public void testMultiMutationsSameKey() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); @@ -1784,7 +1776,7 @@ public void testLetLimitUsingBind() throws Exception @Test public void testListSetByIndexMultiRow() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); @@ -1812,7 +1804,7 @@ public void testListSetByIndexMultiRow() throws Exception @Test public void testSetAppend() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); @@ -1836,13 +1828,13 @@ public void testSetAppend() throws Exception @Test public void testAssignmentFromMultiCellSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set set)"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set frozen>)"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromSetElement(String ddl) throws Exception @@ -1871,7 +1863,7 @@ private void testAssignmentFromSetElement(String ddl) throws Exception @Test public void testMapAppend() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); @@ -1895,13 +1887,13 @@ public void testMapAppend() throws Exception @Test public void testAssignmentFromMultiCellMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map map)"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map frozen>)"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromMapElement(String ddl) throws Exception @@ -1930,13 +1922,13 @@ private void testAssignmentFromMapElement(String ddl) throws Exception @Test public void testAssignmentFromMultiCellUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer person)"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer frozen)"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromUDTField(String tableDDL) throws Exception @@ -1967,7 +1959,7 @@ private void testAssignmentFromUDTField(String tableDDL) throws Exception @Test public void testSetMapElementFromMapElementReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); @@ -1991,7 +1983,7 @@ public void testSetMapElementFromMapElementReference() throws Exception @Test public void testSetUDTFieldFromUDTFieldReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object youngPerson = CQLTester.userType("height", 58, "age", 9); @@ -2020,13 +2012,13 @@ public void testSetUDTFieldFromUDTFieldReference() throws Exception @Test public void testMultiCellListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); + testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); + testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListElementCondition(String ddl) throws Exception @@ -2057,13 +2049,13 @@ private void testListElementCondition(String ddl) throws Exception @Test public void testMultiCellMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)"); + testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)"); + testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapElementCondition(String ddl) throws Exception @@ -2094,13 +2086,13 @@ private void testMapElementCondition(String ddl) throws Exception @Test public void testMultiCellUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTFieldCondition(String tableDDL) throws Exception @@ -2145,7 +2137,7 @@ private void testUDTFieldCondition(String tableDDL) throws Exception @Test public void testListSubtraction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); @@ -2171,7 +2163,7 @@ public void testListSubtraction() throws Exception @Test public void testSetSubtraction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)", + test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", cluster -> { cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); @@ -2197,13 +2189,13 @@ public void testSetSubtraction() throws Exception @Test public void testMultiCellMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set set)"); + testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set frozen>)"); + testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapSubtraction(String ddl) throws Exception @@ -2234,13 +2226,13 @@ private void testMapSubtraction(String ddl) throws Exception @Test public void testMultiCellListSelection() throws Exception { - testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list)"); + testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListSelection() throws Exception { - testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>)"); + testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListSelection(String ddl) throws Exception @@ -2272,13 +2264,13 @@ private void testListSelection(String ddl) throws Exception @Test public void testMultiCellSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set)"); + testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>)"); + testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetSelection(String ddl) throws Exception @@ -2316,7 +2308,7 @@ public void testMultiCellMapSelection() throws Exception @Test public void testFrozenMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>)"); + testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapSelection(String ddl) throws Exception @@ -2349,8 +2341,8 @@ public void testScalarUpdateSubstitution() { String KEYSPACE = "ks" + System.currentTimeMillis(); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "1 (k int, c int, v int, primary key (k, c))"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "2 (k int, c int, v int, primary key (k, c))"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "1 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "2 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); @@ -2375,13 +2367,13 @@ public void testScalarUpdateSubstitution() @Test public void testRegularScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testStaticScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, PRIMARY KEY (k, c))"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } private void testScalarInsertSubstitution(String tableDDL) throws Exception @@ -2411,13 +2403,13 @@ private void testScalarInsertSubstitution(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSelectFrozenUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testSelectUDTReference(String tableDDL) throws Exception @@ -2446,13 +2438,13 @@ private void testSelectUDTReference(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person)"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSelectFrozenUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen)"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testSelectUDTFieldReference(String tableDDL) throws Exception @@ -2483,7 +2475,7 @@ private void testSelectUDTFieldReference(String tableDDL) throws Exception @Test public void testMultiKeyQueryAndInsert() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { String query1 = "BEGIN TRANSACTION\n" + @@ -2525,11 +2517,9 @@ public void demoTest() throws Throwable { SHARED_CLUSTER.schemaChange("DROP KEYSPACE IF EXISTS demo_ks;"); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE demo_ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':2};"); - SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_docs ( org_name text, doc_id int, contents_version int static, title text, permissions int, PRIMARY KEY (org_name, doc_id) );"); - SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) );"); - SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) );"); - - SHARED_CLUSTER.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("demo_ks")); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_docs ( org_name text, doc_id int, contents_version int static, title text, permissions int, PRIMARY KEY (org_name, doc_id) ) WITH transactional_mode='" + transactionalMode + "';"); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.org_users ( org_name text, user text, members_version int static, permissions int, PRIMARY KEY (org_name, user) ) WITH transactional_mode='" + transactionalMode + "';"); + SHARED_CLUSTER.schemaChange("CREATE TABLE demo_ks.user_docs ( user text, doc_id int, title text, org_name text, permissions int, PRIMARY KEY (user, doc_id) ) WITH transactional_mode='" + transactionalMode + "';"); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); @@ -2607,7 +2597,7 @@ public void testReferenceArithmeticInUpdate() throws Exception @Test public void testCASAndSerialRead() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c));", + test("CREATE TABLE " + qualifiedTableName + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c)) WITH transactional_mode='" + transactionalMode + "';", cluster -> { ICoordinator coordinator = cluster.coordinator(1); int startingAccordCoordinateCount = getAccordCoordinateCount(); @@ -2644,7 +2634,7 @@ public void testCASAndSerialRead() throws Exception assertEquals(1, rangeDeletionCheck.length); // Make sure all the consensus using queries actually were run on Accord - if (nonSerialWriteStrategy.writesThroughAccord) + if (transactionalMode.writesThroughAccord) assertEquals( 20, getAccordCoordinateCount() - startingAccordCoordinateCount); else // Non-serial writes don't go through Accord in these modes @@ -2656,7 +2646,7 @@ public void testCASAndSerialRead() throws Exception @Test public void testCASSimulatorLite() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); coordinator.execute("INSERT INTO " + qualifiedTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); @@ -2688,7 +2678,7 @@ public void testCASSimulatorLite() throws Exception @Test public void testTransactionCasSimulatorLite() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk))", + test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); @@ -2729,7 +2719,7 @@ public void testTransactionCasSimulatorLite() throws Exception @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java index 827eebf6f9ac..133d2659a8f8 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordFeatureFlagTest.java @@ -19,35 +19,16 @@ package org.apache.cassandra.distributed.test.accord; import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.junit.Test; -import org.apache.cassandra.db.virtual.AccordVirtualTables; -import org.apache.cassandra.db.virtual.SystemViewsKeyspace; -import org.apache.cassandra.db.virtual.VirtualTable; import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.Feature; -import org.apache.cassandra.distributed.api.IIsolatedExecutor; import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.AssertionUtils; import org.assertj.core.api.Assertions; -import static org.apache.cassandra.config.DatabaseDescriptor.NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE; -import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_MESSAGE; -import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; -import static org.junit.Assert.assertEquals; - public class AccordFeatureFlagTest extends TestBaseImpl { @Test @@ -58,46 +39,9 @@ public void shouldHideAccordTransactions() throws IOException .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "false")) .start())) { - cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (k int, c int, v int, primary key (k, c))"); - - // Any transaction should fail to execute: - String query = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + KEYSPACE + ".tbl WHERE k=0 AND c=0;\n" + - "COMMIT TRANSACTION"; - Assertions.assertThatThrownBy(() -> cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY)) - .has(AssertionUtils.isThrowableInstanceof(InvalidRequestException.class)) - .hasMessage(TRANSACTIONS_DISABLED_MESSAGE); - - // The Accord system keyspace should not be present: - assertEquals("The Accord system keyspace should not exist", - Optional.empty(), cluster.get(1).callOnInstance(() -> Schema.instance.localKeyspaces().get(ACCORD_KEYSPACE_NAME))); - - // Make sure virtual tables don't exist: - IIsolatedExecutor.SerializableCallable> hasAccordVirtualTables = - () -> SystemViewsKeyspace.instance.tables().stream().filter(t -> t.getClass().equals(AccordVirtualTables.Epoch.class)); - List tables = cluster.get(1).callOnInstance(hasAccordVirtualTables).collect(Collectors.toList()); - assertEquals("No Accord virtual tables should exist", Collections.emptyList(), tables); - - // Make sure we throw if someone tries to coordinate a transaction against the no-op service: - Assertions.assertThatThrownBy(() -> cluster.get(1).callOnInstance(() -> AccordService.instance().coordinate(null, null, Dispatcher.RequestTime.forImmediateExecution()))) - .isInstanceOf(UnsupportedOperationException.class); - } - } - - @SuppressWarnings("Convert2MethodRef") - @Test - public void shouldFailOnAccordMigrationWithAccordDisabled() throws IOException - { - try (Cluster cluster = Cluster.build(1) - .withoutVNodes() - .withConfig(c -> c.with(Feature.NETWORK) - .set("accord.enabled", "false") - .set("lwt_strategy", "accord")).createWithoutStarting()) - { - - Assertions.assertThatThrownBy(() -> cluster.startup()) - .has(AssertionUtils.isThrowableInstanceof(ConfigurationException.class)) - .hasMessage(NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE); + Assertions.assertThatThrownBy(() -> cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'")) + .has(AssertionUtils.isThrowableInstanceof(InvalidRequestException.class)) + .hasMessageContaining("accord.enabled"); } } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java index 6022fdda5595..72409dc871f0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteropReadTest.java @@ -32,7 +32,6 @@ import org.apache.cassandra.distributed.util.QueryResultUtil; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; -import org.apache.cassandra.service.accord.AccordService; import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NETWORK; @@ -61,14 +60,10 @@ private static Object[] obj(Object... values) public void serialReadTest() throws Throwable { try (Cluster cluster = builder().withNodes(3) - .withConfig(config -> config.with(GOSSIP).with(NETWORK) - .set("non_serial_write_strategy", "mixed") - .set("lwt_strategy", "accord")) - .start()) + .withConfig(config -> config.with(GOSSIP).with(NETWORK)).start()) { cluster.schemaChange("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor':3}"); - cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, PRIMARY KEY (k, c))"); - cluster.get(1).runOnInstance(() -> AccordService.instance().ensureKeyspaceIsAccordManaged("ks")); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='unsafe_writes'"); cluster.get(1).runOnInstance(() -> localWrite("INSERT INTO ks.tbl (k, c, v) VALUES (1, 1, 1)")); cluster.get(2).runOnInstance(() -> localWrite("INSERT INTO ks.tbl (k, c, v) VALUES (1, 1, 2)")); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java index 320a9f4e09ba..acef419889a8 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java @@ -42,14 +42,13 @@ protected Logger logger() @BeforeClass public static void setupClass() throws IOException { - AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord") - .set("non_serial_write_strategy", "accord")), 3); + AccordTestBase.setupCluster(builder -> builder, 3); } @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c))", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='full'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 487fbf5f1d0d..235df7ebf7af 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -24,6 +24,7 @@ import java.util.function.Function; import com.google.common.base.Throwables; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -89,14 +90,14 @@ String readCql() public void beforeTest() { SHARED_CLUSTER.filters().reset(); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c))"); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH " + TransactionalMode.full.asCqlParam()); } @Test public void testRegularMetrics() throws Exception { countingMetrics0 = getMetrics(); + assertCoordinatorMetrics(0, "rw", 0, 0, 0, 0, 0); SHARED_CLUSTER.coordinator(1).executeWithResult(writeCql(), ConsistencyLevel.ALL, 0, 0, 0, 0); assertCoordinatorMetrics(0, "rw", 1, 0, 0, 0, 0); assertCoordinatorMetrics(1, "rw", 0, 0, 0, 0, 0); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index f91cdecfaae5..6cba8995ed65 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -31,8 +31,10 @@ import javax.annotation.Nonnull; import com.google.common.collect.ImmutableList; + import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; @@ -58,21 +60,24 @@ import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationTarget; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Ballot.Flag; import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.service.paxos.Commit.Agreed; import org.apache.cassandra.service.paxos.Commit.Proposal; import org.apache.cassandra.service.paxos.PaxosState; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.ByteArrayUtil; @@ -86,6 +91,7 @@ import static java.lang.String.format; import static java.util.Collections.emptyList; import static org.apache.cassandra.Util.spinUntilSuccess; +import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; import static org.apache.cassandra.db.SystemKeyspace.PAXOS; import static org.apache.cassandra.dht.Range.normalize; @@ -96,8 +102,7 @@ import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; import static org.assertj.core.api.Fail.fail; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; /* * This test suite is intended to serve as an integration test with some pretty good visibility into actual execution @@ -146,11 +151,8 @@ public static void setupClass() throws IOException ServerTestUtils.daemonInitialization(); // Otherwise repair complains if you don't specify a keyspace CassandraRelevantProperties.SYSTEM_TRACES_DEFAULT_RF.setInt(3); - AccordTestBase.setupCluster(builder -> - builder.appendConfig(config -> - config.set("paxos_variant", PaxosVariant.v2.name()) - .set("non_serial_write_strategy", "migration")), - 3); + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("paxos_variant", PaxosVariant.v2.name()) + .set("accord.range_migration", "explicit")), 3); partitioner = FBUtilities.newPartitioner(SHARED_CLUSTER.get(1).callsOnInstance(() -> DatabaseDescriptor.getPartitioner().getClass().getSimpleName()).call()); StorageService.instance.setPartitionerUnsafe(partitioner); ServerTestUtils.prepareServerNoRegister(); @@ -177,9 +179,6 @@ public void tearDown() throws Exception ConsensusRequestRouter.resetInstance(); ConsensusKeyMigrationState.reset(); }); - SHARED_CLUSTER.get(1).runOnInstance(() -> { - ConsensusTableMigrationState.reset(); - }); SHARED_CLUSTER.coordinators().forEach(coordinator -> coordinator.execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, CONSENSUS_MIGRATION_STATE), ALL)); SHARED_CLUSTER.coordinators().forEach(coordinator -> coordinator.execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, PAXOS), ALL)); } @@ -243,10 +242,10 @@ public static class RoutesToPaxosOnce extends ConsensusRequestRouter boolean routed; @Override - protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + protected ConsensusRoutingDecision routeAndMaybeMigrate(ClusterMetadata cm, @Nonnull TableMetadata tmd, @Nonnull DecoratedKey key, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) { if (routed) - return super.routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + return super.routeAndMaybeMigrate(cm, tmd, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); routed = true; return paxosV2; } @@ -279,10 +278,10 @@ public static class RoutesToAccordOnce extends ConsensusRequestRouter boolean routed; @Override - protected ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull ColumnFamilyStore cfs, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) + protected ConsensusRoutingDecision routeAndMaybeMigrate(ClusterMetadata cm, @Nonnull TableMetadata tmd, @Nonnull DecoratedKey key, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) { if (routed) - return super.routeAndMaybeMigrate(key, cfs, consistencyLevel, requestTime, timeoutNanos, isForWrite); + return super.routeAndMaybeMigrate(cm, tmd, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); routed = true; return ConsensusRoutingDecision.accord; } @@ -343,6 +342,21 @@ public void testPaxosToAccordCAS() throws Exception { test(format(TABLE_FMT, qualifiedTableName), cluster -> { + String table = tableName; + cluster.forEach(node -> node.runOnInstance(() -> { + TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, table); + Assert.assertEquals(TransactionalMode.off, tbl.params.transactionalMode); + Assert.assertEquals(TransactionalMigrationFromMode.none, tbl.params.transactionalMigrationFrom); + })); + + cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, tableName, TransactionalMode.full)); + + cluster.forEach(node -> node.runOnInstance(() -> { + TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, table); + Assert.assertEquals(TransactionalMode.full, tbl.params.transactionalMode); + Assert.assertEquals(TransactionalMigrationFromMode.off, tbl.params.transactionalMigrationFrom); + })); + String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); Consumer runCasApplies = key -> assertRowEquals(cluster, new Object[]{true}, casCQL, key); @@ -412,14 +426,18 @@ public void testPaxosToAccordCAS() throws Exception // key migration occurred assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 0, 1, 0, 0); - // This will force the request to run on Paxos up to Accept - // and the accept will be rejected at both nodes and we are certain we need to retry the transaction + // This will force the write to use the normal write patch cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); // Update inserted row so the condition can apply, if the condition check doesn't apply // then it won't get to propose/accept migratingKey = testingKeys.next(); - Consumer makeCASApply = key -> cluster.coordinator(1).execute("UPDATE " + qualifiedTableName + " SET v = 42 WHERE id = ? AND c = ?", ALL, key, CLUSTERING_VALUE); + String query = "UPDATE " + qualifiedTableName + " SET v = 42 WHERE id = ? AND c = ?"; + Consumer makeCASApply = key -> cluster.forEach(instance -> instance.runOnInstance(() -> executeInternal(query, key, CLUSTERING_VALUE))); makeCASApply.accept(migratingKey); + + // This will force the request to run on Paxos up to Accept + // and the accept will be rejected at both nodes and we are certain we need to retry the transaction + cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 1, 1, 0, 1); // One node will now accept the other will reject and we are uncertain if we should retry the transaction @@ -429,7 +447,9 @@ public void testPaxosToAccordCAS() throws Exception cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToAccept())); try { + cluster.filters().allVerbs().to(3).from(3).drop(); runCasNoApply.accept(migratingKey); + cluster.filters().reset(); fail("Should have thrown timeout exception"); } catch (Throwable t) @@ -469,7 +489,7 @@ public void testPaxosToAccordSerialRead() throws Exception { test(format(TABLE_FMT, qualifiedTableName), cluster -> { - String tableName = qualifiedTableName.split("\\.")[1]; + cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, tableName, TransactionalMode.full)); String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", qualifiedTableName, CLUSTERING_VALUE); Function runRead = key -> cluster.coordinator(1).execute(readCQL, SERIAL, key); Range migratingRange = new Range<>(new LongToken(Long.MIN_VALUE + 1), new LongToken(Long.MIN_VALUE)); @@ -490,6 +510,25 @@ public void testPaxosToAccordSerialRead() throws Exception }); } + private void alterTableTransactionalMode(TransactionalMode mode) + { + SHARED_CLUSTER.schemaChange(format("ALTER TABLE %s WITH %s", qualifiedTableName, mode.asCqlParam())); + } + + private void assertTransactionalModes(String keyspace, String table, TransactionalMode mode, TransactionalMigrationFromMode migration) + { + forEach(() -> { + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + Assert.assertEquals(mode, metadata.params.transactionalMode); + Assert.assertEquals(migration, metadata.params.transactionalMigrationFrom); + }); + } + + private void assertTransactionalModes(TransactionalMode mode, TransactionalMigrationFromMode migration) + { + assertTransactionalModes(KEYSPACE, tableName, mode, migration); + } + @Test public void testAccordToPaxos() throws Exception { @@ -499,6 +538,9 @@ public void testAccordToPaxos() throws Exception Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); String tableName = qualifiedTableName.split("\\.")[1]; + alterTableTransactionalMode(TransactionalMode.mixed_reads); + assertTransactionalModes(TransactionalMode.mixed_reads, TransactionalMigrationFromMode.off); + // Mark a subrange as migrating and finish migrating half of it nodetool(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, tableName); nodetool(coordinator, "consensus_admin", "finish-migration", "-st", midToken.toString(), "-et", "3074457345618258601"); @@ -508,7 +550,8 @@ public void testAccordToPaxos() throws Exception assertMigrationState(tableName, ConsensusMigrationTarget.accord, ImmutableList.of(accordMigratedRange), ImmutableList.of(accordMigratingRange), 1); // Test that we can reverse the migration and go back to Paxos - nodetool(coordinator, "consensus_admin", "set-target-protocol", "-tp", "paxos", KEYSPACE, tableName); + alterTableTransactionalMode(TransactionalMode.off); + assertTransactionalModes(TransactionalMode.off, TransactionalMigrationFromMode.mixed_reads); assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), new Range(maxToken, minToken)), ImmutableList.of(accordMigratingRange), 1); Iterator paxosNonMigratingKeys = getKeysBetweenTokens(minToken, midToken); Iterator paxosMigratingKeys = getKeysBetweenTokens(upperMidToken, maxToken); @@ -547,6 +590,38 @@ public void testAccordToPaxos() throws Exception }); } + private static void assertCompletedMigrationState(String tableName) throws Throwable + { + // Validate nodetool consensus admin list output + String yamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list"); + Map yamlStateMap = new Yaml().load(yamlResultString); + String minifiedYamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-yaml"); + Map minifiedYamlStateMap = new Yaml().load(minifiedYamlResultString); + String jsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "json"); + Map jsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(jsonResultString, new TypeReference>(){}); + String minifiedJsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-json"); + Map minifiedJsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(minifiedJsonResultString, new TypeReference>(){}); + + for (Map migrationStateMap : ImmutableList.of(yamlStateMap, jsonStateMap, minifiedYamlStateMap, minifiedJsonStateMap)) { + assertEquals(PojoToString.CURRENT_VERSION, migrationStateMap.get("version")); + assertTrue(Epoch.EMPTY.getEpoch() < ((Number) migrationStateMap.get("lastModifiedEpoch")).longValue()); + List> tableStates = (List>) migrationStateMap.get("tableStates"); + assertEquals(0, tableStates.size()); + } + + spinUntilSuccess(() -> { + for (IInvokableInstance instance : SHARED_CLUSTER) + { + ConsensusMigrationState snapshot = getMigrationStateSnapshot(instance); + assertEquals(0, snapshot.tableStates.size()); + instance.runOnInstance(() -> { + TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, tableName); + Assert.assertEquals(TransactionalMigrationFromMode.none, tbl.params.transactionalMigrationFrom); + }); + } + }); + } + private static void assertMigrationState(String tableName, ConsensusMigrationTarget target, List> migratedRanges, List> migratingRanges, int numMigratingEpochs) throws Throwable { // Validate nodetool consensus admin list output @@ -564,9 +639,20 @@ private static void assertMigrationState(String tableName, ConsensusMigrationTar { assertEquals(PojoToString.CURRENT_VERSION, migrationStateMap.get("version")); assertTrue(Epoch.EMPTY.getEpoch() < ((Number) migrationStateMap.get("lastModifiedEpoch")).longValue()); - List> tableStates = (List>) migrationStateMap.get("tableStates"); - assertEquals(tableStates.size(), 1); - Map tableStateMap = tableStates.get(0); + + Map tableStateMap = null; + for (Map stateMap : (List>) migrationStateMap.get("tableStates")) + { + Object table = stateMap.get("table"); + Object keyspace = stateMap.get("keyspace"); + if (KEYSPACE.equals(keyspace) && tableName.equals(table)) + { + tableStateMap = stateMap; + break; + } + } + assertNotNull(tableStateMap); + assertEquals(tableName, tableStateMap.get("table")); assertEquals(KEYSPACE, tableStateMap.get("keyspace")); tableIds.add((String) tableStateMap.get("tableId")); @@ -591,21 +677,22 @@ private static void assertMigrationState(String tableName, ConsensusMigrationTar for (IInvokableInstance instance : SHARED_CLUSTER) { ConsensusMigrationState snapshot = getMigrationStateSnapshot(instance); - assertEquals(1, snapshot.tableStates.size()); - TableMigrationState state = snapshot.tableStates.values().iterator().next(); - assertEquals(KEYSPACE, state.keyspaceName); - assertEquals(tableName, state.tableName); for (String tableId : tableIds) - assertEquals(tableId, state.tableId.toString()); - assertEquals(target, state.targetProtocol); - assertEquals("Migrated ranges:", migratedRanges, state.migratedRanges); - assertEquals("Migrating ranges:", migratingRanges, state.migratingRanges); - assertEquals("Migrating and migrated ranges:", migratingAndMigratedRanges, state.migratingAndMigratedRanges); - assertEquals(numMigratingEpochs, state.migratingRangesByEpoch.size()); - if (migratingRanges.isEmpty()) - assertEquals(0, state.migratingRangesByEpoch.size()); - else - assertEquals(migratingRanges, state.migratingRangesByEpoch.values().iterator().next()); + { + TableMigrationState state = snapshot.tableStates.get(TableId.fromString(tableId)); + assertNotNull(state); + assertEquals(KEYSPACE, state.keyspaceName); + assertEquals(tableName, state.tableName); + assertEquals(target, state.targetProtocol); + assertEquals("Migrated ranges:", migratedRanges, state.migratedRanges); + assertEquals("Migrating ranges:", migratingRanges, state.migratingRanges); + assertEquals("Migrating and migrated ranges:", migratingAndMigratedRanges, state.migratingAndMigratedRanges); + assertEquals(numMigratingEpochs, state.migratingRangesByEpoch.size()); + if (migratingRanges.isEmpty()) + assertEquals(0, state.migratingRangesByEpoch.size()); + else + assertEquals(migratingRanges, state.migratingRangesByEpoch.values().iterator().next()); + } } }); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java index 19d562a21cdb..abebaeb49d7f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordSimpleFastPathTest.java @@ -27,6 +27,7 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.junit.Assert; import org.junit.Test; @@ -90,7 +91,7 @@ public void downNodesRemovedFromFastPath() throws Throwable .start())) { cluster.schemaChange("CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor': 3}"); - cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key (k, c))"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key (k, c)) WITH " + TransactionalMode.full.asCqlParam()); String query = "BEGIN TRANSACTION\n" + " SELECT * FROM ks.tbl WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index f5937f643bd7..4b0a90b17362 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -37,6 +37,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +71,7 @@ import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.AssertionUtils; @@ -137,11 +138,10 @@ protected void test(String tableDDL, FailingConsumer fn) throws Excepti public static void ensureTableIsAccordManaged(Cluster cluster, String ksname, String tableName) { cluster.get(1).runOnInstance(() -> { - // TODO: remove when accord enabled is handled via schema TableMetadata metadata = Schema.instance.getTableMetadata(ksname, tableName); if (metadata == null) return; // bad plumbing from shared utils.... - AccordService.instance().ensureTableIsAccordManaged(metadata.id); + Assert.assertTrue(metadata.params.transactionalMode.accordIsEnabled); }); } @@ -150,7 +150,6 @@ protected void test(List ddls, FailingConsumer fn) throws Excep for (String ddl : ddls) SHARED_CLUSTER.schemaChange(ddl); - ensureTableIsAccordManaged(SHARED_CLUSTER, KEYSPACE, tableName); // Evict commands from the cache immediately to expose problems loading from disk. SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); @@ -166,7 +165,7 @@ protected void test(List ddls, FailingConsumer fn) throws Excep protected void test(FailingConsumer fn) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c))", fn); + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", fn); } protected static ConsensusMigrationState getMigrationStateSnapshot(IInvokableInstance instance) throws IOException diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java index 084709e4964a..82c9e2f806f5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java @@ -56,7 +56,7 @@ public void test() String ks = "ks" + i; String table = ks + ".tbl" + i; SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + ks + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}"); - SHARED_CLUSTER.schemaChange(String.format("CREATE TABLE %s (pk blob primary key)", table)); + SHARED_CLUSTER.schemaChange(String.format("CREATE TABLE %s (pk blob primary key) WITH transactional_mode='full'", table)); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); List keys = tokensToKeys(tokens()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 381ea8c6be65..907a0e437597 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -69,7 +69,6 @@ import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.AccordTables; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.TokenMap; import org.apache.cassandra.tcm.ownership.UniformRangePlacement; @@ -150,7 +149,6 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit Directory.EMPTY, new TokenMap(partitioner), DataPlacements.empty(), - AccordTables.EMPTY, AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, @@ -166,7 +164,6 @@ public static ClusterMetadata minimalForTesting(IPartitioner partitioner) null, null, DataPlacements.empty(), - AccordTables.EMPTY, AccordFastPath.EMPTY, null, null, @@ -182,7 +179,6 @@ public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) null, null, DataPlacements.empty(), - AccordTables.EMPTY, AccordFastPath.EMPTY, null, null, diff --git a/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java deleted file mode 100644 index c486da8f5430..000000000000 --- a/test/distributed/org/apache/cassandra/distributed/test/tcm/AccordAddTableTest.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.distributed.test.tcm; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; -import java.util.concurrent.Future; - -import org.junit.Test; - -import accord.primitives.Ranges; -import accord.primitives.Txn; -import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.Feature; -import org.apache.cassandra.distributed.api.IInvokableInstance; -import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.FBUtilities; - -public class AccordAddTableTest extends TestBaseImpl -{ - @Test - public void test() throws IOException - { - try (Cluster cluster = builder().withNodes(6) - .withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)) - .start()) - { - List> results = new ArrayList<>(cluster.size()); - for (IInvokableInstance inst : cluster) - { - Future result = inst.asyncRunsOnInstance(() -> { - for (int i = 0; i < 100; i++) - { - AccordService.instance().maybeConvertTablesToAccord(fakeTxn(i)); - if (!ClusterMetadata.current().accordTables.contains(fromNum(i))) - throw new AssertionError("Table not found in TCM!"); - } - }).call(); - results.add(result); - } - FBUtilities.waitOnFutures(results); - } - } - - private static Txn fakeTxn(int i) - { - TableId id = fromNum(i); - - Ranges of = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(id), AccordRoutingKey.SentinelKey.max(id))); - return new Txn.InMemory(of, null, null); - } - - private static TableId fromNum(int i) - { - return TableId.fromUUID(new UUID(i, 0)); // not valid... but do we care? - } -} diff --git a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java index 94bc2d937bd4..ee0ad12bcff0 100644 --- a/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java +++ b/test/unit/org/apache/cassandra/audit/AuditLoggerTest.java @@ -442,7 +442,7 @@ public void testCqlBatch_MultipleTablesAuditing() @Test public void testTransactionAuditing() { - createTable("CREATE TABLE %s (key int PRIMARY KEY, val int)"); + createTable("CREATE TABLE %s (key int PRIMARY KEY, val int) WITH transactional_mode='full'"); Session session = sessionNet(); String fqTableName = KEYSPACE + "." + currentTable(); diff --git a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java index d60c3cadc3d4..e26e0259510e 100644 --- a/test/unit/org/apache/cassandra/auth/TxnAuthTest.java +++ b/test/unit/org/apache/cassandra/auth/TxnAuthTest.java @@ -66,7 +66,7 @@ public static void setUpAuthAndAccord() @Before public void setUpTest() { - createTable("CREATE TABLE %s (k int, v int, PRIMARY KEY(k))"); + createTable("CREATE TABLE %s (k int, v int, PRIMARY KEY(k)) WITH transactional_mode='full'"); } @Test diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index eb10476ace2d..c58020763442 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -78,6 +78,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.auth.INetworkAuthorizer", "org.apache.cassandra.auth.IRoleManager", "org.apache.cassandra.config.AccordSpec", + "org.apache.cassandra.config.AccordSpec$TransactionalRangeMigration", "org.apache.cassandra.config.CassandraRelevantProperties", "org.apache.cassandra.config.CassandraRelevantProperties$PropertyConverter", "org.apache.cassandra.config.Config", @@ -99,6 +100,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$PaxosVariant", "org.apache.cassandra.config.Config$RepairCommandPoolFullStrategy", "org.apache.cassandra.config.Config$SSTableConfig", + "org.apache.cassandra.config.Config$TransactionalRangeMigration", "org.apache.cassandra.config.Config$TriggersPolicy", "org.apache.cassandra.config.Config$UserFunctionTimeoutPolicy", "org.apache.cassandra.config.ConfigBeanInfo", @@ -292,6 +294,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.service.CacheService$CacheType", "org.apache.cassandra.security.AbstractCryptoProvider", "org.apache.cassandra.tcm.RegistrationStateCallbacks", + "org.apache.cassandra.service.consensus.TransactionalMode", "org.apache.cassandra.transport.ProtocolException", "org.apache.cassandra.utils.Closeable", "org.apache.cassandra.utils.CloseableIterator", diff --git a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java index 360b92ded53b..be9fcb613674 100644 --- a/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java +++ b/test/unit/org/apache/cassandra/cql3/NodeLocalConsistencyTest.java @@ -93,7 +93,7 @@ public void testSelect() @Test public void testTransaction() { - createTable("CREATE TABLE %s (key text, val int, PRIMARY KEY(key))"); + createTable("CREATE TABLE %s (key text, val int, PRIMARY KEY(key)) WITH transactional_mode='full'"); QueryProcessor.process(formatQuery("INSERT INTO %s (key, val) VALUES ('foo', 0)"), NODE_LOCAL); String query = "BEGIN TRANSACTION\n" + diff --git a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java index 8819a1c7c6d6..70e49bc7fec2 100644 --- a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java @@ -26,6 +26,9 @@ import java.util.stream.Collectors; import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; @@ -76,6 +79,27 @@ public static void setUpClass() public void setup() { requireNetwork(); + for (int i=0; i<10; i++) + ClusterMetadataService.instance().log().waitForHighestConsecutive(); + } + + private static void runAndAwaitNextEpoch(Runnable runnable) + { + try + { + Epoch current = ClusterMetadata.current().epoch; + runnable.run(); + ClusterMetadataService.instance().awaitAtLeast(Epoch.create(current.getEpoch() + 1)); + } + catch (Throwable e) + { + throw new RuntimeException(e); + } + } + + private static void sessionSchemaUpdate(Session session, String update) + { + runAndAwaitNextEpoch(() -> session.execute(update)); } @Test @@ -173,30 +197,30 @@ else if (expectWarn) public void testInvalidatePreparedStatementsOnDrop() { Session session = sessionNet(ProtocolVersion.V5); - session.execute(dropKsStatement); - session.execute(createKsStatement); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); - String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (id int PRIMARY KEY, cid int, val text);"; + String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (id int PRIMARY KEY, cid int, val text) WITH transactional_mode='unsafe';"; String dropTableStatement = "DROP TABLE IF EXISTS " + KEYSPACE + ".qp_cleanup;"; - session.execute(createTableStatement); + sessionSchemaUpdate(session, createTableStatement); String insert = "INSERT INTO " + KEYSPACE + ".qp_cleanup (id, cid, val) VALUES (?, ?, ?)"; PreparedStatement prepared = session.prepare(insert); PreparedStatement preparedBatch = session.prepare(batch(insert)); PreparedStatement preparedTxn = session.prepare(txn(insert)); - session.execute(dropTableStatement); - session.execute(createTableStatement); + sessionSchemaUpdate(session, dropTableStatement); + sessionSchemaUpdate(session, createTableStatement); updateTxnState(); session.execute(prepared.bind(1, 1, "value")); session.execute(preparedBatch.bind(2, 2, "value2")); session.execute(preparedTxn.bind(3, 3, "value3")); - session.execute(dropKsStatement); - session.execute(createKsStatement); - session.execute(createTableStatement); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); + sessionSchemaUpdate(session, createTableStatement); updateTxnState(); // The driver will get a response about the prepared statement being invalid, causing it to transparently @@ -205,7 +229,7 @@ public void testInvalidatePreparedStatementsOnDrop() session.execute(prepared.bind(1, 1, "value")); session.execute(preparedBatch.bind(2, 2, "value2")); session.execute(preparedTxn.bind(3, 3, "value3")); - session.execute(dropKsStatement); + sessionSchemaUpdate(session, dropKsStatement); } @Test @@ -223,12 +247,12 @@ public void testInvalidatePreparedStatementOnAlterV4() private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boolean supportsMetadataChange) { Session session = sessionNet(version); - String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int);"; + String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int) WITH transactional_mode='unsafe';"; String alterTableStatement = "ALTER TABLE " + KEYSPACE + ".qp_cleanup ADD d int;"; - session.execute(dropKsStatement); - session.execute(createKsStatement); - session.execute(createTableStatement); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); + sessionSchemaUpdate(session, createTableStatement); updateTxnState(); String select = "SELECT * FROM " + KEYSPACE + ".qp_cleanup"; @@ -247,7 +271,7 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo assertRowsNet(session.execute(preparedSelectTxn.bind(2)), row(2, 3, 4)); - session.execute(alterTableStatement); + sessionSchemaUpdate(session, alterTableStatement); updateTxnState(); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c, d) VALUES (?, ?, ?, ?);", @@ -291,7 +315,7 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo } } - session.execute(dropKsStatement); + sessionSchemaUpdate(session, dropKsStatement); } @Test @@ -309,12 +333,12 @@ public void testInvalidatePreparedStatementOnAlterUnchangedMetadataV5() private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVersion version) { Session session = sessionNet(version); - String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int);"; + String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int) WITH transactional_mode='unsafe';"; String alterTableStatement = "ALTER TABLE " + KEYSPACE + ".qp_cleanup ADD d int;"; - session.execute(dropKsStatement); - session.execute(createKsStatement); - session.execute(createTableStatement); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); + sessionSchemaUpdate(session, createTableStatement); updateTxnState(); String select = "SELECT a, b, c FROM " + KEYSPACE + ".qp_cleanup"; @@ -338,7 +362,7 @@ private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVer Assertions.assertThat(columnNames(rs)).containsExactlyInAnyOrder("a", "b", "c"); } - session.execute(alterTableStatement); + sessionSchemaUpdate(session, alterTableStatement); updateTxnState(); session.execute("INSERT INTO " + KEYSPACE + ".qp_cleanup (a, b, c, d) VALUES (?, ?, ?, ?);", @@ -358,18 +382,18 @@ private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVer Assertions.assertThat(columnNames(rs)).containsExactlyInAnyOrder("a", "b", "c"); } - session.execute(dropKsStatement); + sessionSchemaUpdate(session, dropKsStatement); } @Test - public void testStatementRePreparationOnReconnect() + public void testStatementRePreparationOnReconnect() throws Throwable { Session session = sessionNet(ProtocolVersion.V5); session.execute("USE " + keyspace()); - session.execute(dropKsStatement); - session.execute(createKsStatement); - createTable("CREATE TABLE %s (id int PRIMARY KEY, cid int, val text);"); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); + runAndAwaitNextEpoch(() -> createTable("CREATE TABLE %s (id int PRIMARY KEY, cid int, val text) WITH transactional_mode='unsafe';")); updateTxnState(); String insertCQL = "INSERT INTO " + currentTable() + " (id, cid, val) VALUES (?, ?, ?)"; @@ -414,14 +438,14 @@ public void prepareAndExecuteWithCustomExpressions() { Session session = sessionNet(ProtocolVersion.V5); - session.execute(dropKsStatement); - session.execute(createKsStatement); + sessionSchemaUpdate(session, dropKsStatement); + sessionSchemaUpdate(session, createKsStatement); String table = "custom_expr_test"; String index = "custom_index"; - session.execute(String.format("CREATE TABLE IF NOT EXISTS %s.%s (id int PRIMARY KEY, cid int, val text);", + sessionSchemaUpdate(session, String.format("CREATE TABLE IF NOT EXISTS %s.%s (id int PRIMARY KEY, cid int, val text) WITH transactional_mode='unsafe';", KEYSPACE, table)); - session.execute(String.format("CREATE CUSTOM INDEX %s ON %s.%s(val) USING '%s'", + sessionSchemaUpdate(session, String.format("CREATE CUSTOM INDEX %s ON %s.%s(val) USING '%s'", index, KEYSPACE, table, StubIndex.class.getName())); updateTxnState(); @@ -460,7 +484,7 @@ public void testMetadataFlagsWithLWTs() throws Throwable // Note: this test does not cover all aspects of 10786 (yet) - it was intended to test the // changes for CASSANDRA-13992. - createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))"); + runAndAwaitNextEpoch(() -> createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))")); execute("INSERT INTO %s (pk, v1, v2) VALUES (1,1,1)"); try (SimpleClient simpleClient = newSimpleClient(ProtocolVersion.BETA.orElse(ProtocolVersion.CURRENT))) @@ -646,7 +670,7 @@ private void testPrepareWithLWT(ProtocolVersion version) throws Throwable { Session session = sessionNet(version); session.execute("USE " + keyspace()); - createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))"); + runAndAwaitNextEpoch(() -> createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))")); PreparedStatement prepared1 = session.prepare(String.format("UPDATE %s SET v1 = ?, v2 = ? WHERE pk = 1 IF v1 = ?", currentTable())); PreparedStatement prepared2 = session.prepare(String.format("INSERT INTO %s (pk, v1, v2) VALUES (?, 200, 300) IF NOT EXISTS", currentTable())); @@ -710,7 +734,7 @@ private void testPrepareWithBatchLWT(ProtocolVersion version) throws Throwable { Session session = sessionNet(version); session.execute("USE " + keyspace()); - createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))"); + runAndAwaitNextEpoch(() -> createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))")); PreparedStatement prepared1 = session.prepare("BEGIN BATCH " + "UPDATE " + currentTable() + " SET v1 = ? WHERE pk = 1 IF v1 = ?;" + @@ -745,7 +769,7 @@ private void testPrepareWithBatchLWT(ProtocolVersion version) throws Throwable row(false, 1, 10, 20)); assertEquals(rs.getColumnDefinitions().size(), 4); - alterTable("ALTER TABLE %s ADD v3 int;"); + runAndAwaitNextEpoch(() -> alterTable("ALTER TABLE %s ADD v3 int;")); rs = session.execute(prepared2.bind()); assertRowsNet(rs, @@ -777,7 +801,7 @@ private void testPrepareWithAccord(ProtocolVersion version) int maxAttempts = 3; Session session = sessionNet(version); session.execute("USE " + keyspace()); - createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk))"); + runAndAwaitNextEpoch(() -> createTable("CREATE TABLE %s (pk int, v1 int, v2 int, PRIMARY KEY (pk)) WITH transactional_mode='full'")); updateTxnState(); PreparedStatement writeOnly = session.prepare(txn( diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index ccf3d9a2889a..8529d8ff8073 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -1141,6 +1141,8 @@ private static String tableParametersCql() " AND memtable_flush_period_in_ms = 0\n" + " AND min_index_interval = 128\n" + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = '99p';"; } diff --git a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java index d0d2295e424f..3e642db7ff33 100644 --- a/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java +++ b/test/unit/org/apache/cassandra/db/SchemaCQLHelperTest.java @@ -347,6 +347,8 @@ public void testCfmOptionsCQL() " AND memtable_flush_period_in_ms = 8\n" + " AND min_index_interval = 6\n" + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + " AND speculative_retry = 'ALWAYS';" )); } diff --git a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java index 63f91b672980..2b22b0be3b64 100644 --- a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java @@ -27,14 +27,13 @@ import com.google.common.collect.ImmutableMap; import org.apache.cassandra.service.accord.AccordFastPath; -import org.apache.cassandra.tcm.ownership.AccordTables; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.junit.Assert; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.schema.DistributedSchema; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigrationState.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.Directory; @@ -90,7 +89,6 @@ public static ClusterMetadata metadata(NodeConfiguration... configurations) directory, tokenMap, DataPlacements.EMPTY, - AccordTables.EMPTY, AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 7dcf4c640418..b9c7fe6b598b 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -587,7 +587,9 @@ private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinat { RepairType type = repairTypeGen.next(rs); PreviewType previewType = previewTypeGen.next(rs); - boolean accordRepair = type == RepairType.FULL && previewType == PreviewType.NONE ? rs.nextBoolean() : false; + // TODO (required - IR) add this back and expand as part of IR integration +// boolean accordRepair = type == RepairType.FULL && previewType == PreviewType.NONE ? rs.nextBoolean() : false; + boolean accordRepair = false; List args = new ArrayList<>(); args.add(ks); List tables = tablesGen.next(rs); @@ -1407,8 +1409,6 @@ public RepairCoordinator repair(String ks, RepairOption options, boolean addFail failures.add(new AssertionError(event.getMessage())); }); } - if (repair.state.options.accordRepair()) - AccordService.instance().ensureKeyspaceIsAccordManaged(repair.state.keyspace); return repair; } diff --git a/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java b/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java index 1a2dc1132dad..e4c603079821 100644 --- a/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java +++ b/test/unit/org/apache/cassandra/schema/FastPathSchemaTest.java @@ -32,9 +32,6 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.accord.fastpath.ParameterizedFastPathStrategy; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.tcm.transformations.AddAccordTable; import static java.lang.String.format; @@ -70,14 +67,9 @@ public void keyspaceInheriting() KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); Assert.assertSame(FastPathStrategy.simple(), ksm.params.fastPath); - process("CREATE TABLE %s.tbl (k int primary key, v int)", KEYSPACE); + process("CREATE TABLE %s.tbl (k int primary key, v int) WITH transactional_mode='full'", KEYSPACE); TableMetadata tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); Assert.assertSame(FastPathStrategy.inheritKeyspace(), tbm.params.fastPath); - - Epoch epoch = ClusterMetadata.current().epoch; - AddAccordTable.addTable(tbm.id); - - Assert.assertEquals(epoch.getEpoch() + 1, ClusterMetadata.current().epoch.getEpoch()); } @Test @@ -108,12 +100,10 @@ public void tableModification() KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(KEYSPACE); Assert.assertSame(FastPathStrategy.simple(), ksm.params.fastPath); - process("CREATE TABLE %s.tbl (k int primary key, v int)", KEYSPACE); + process("CREATE TABLE %s.tbl (k int primary key, v int) WITH transactional_mode='full'", KEYSPACE); TableMetadata tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); Assert.assertSame(FastPathStrategy.inheritKeyspace(), tbm.params.fastPath); - AddAccordTable.addTable(tbm.id); - process("ALTER TABLE %s.tbl WITH fast_path='simple'", KEYSPACE); tbm = Schema.instance.getTableMetadata(KEYSPACE, "tbl"); Assert.assertSame(FastPathStrategy.simple(), tbm.params.fastPath); diff --git a/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java b/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java new file mode 100644 index 000000000000..59c03fc3c7b2 --- /dev/null +++ b/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.schema; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; + +import static java.lang.String.format; + +public class TransactionalConfigSchemaTest +{ + private static final String KEYSPACE = "ks"; + @BeforeClass + public static void setup() + { + DatabaseDescriptor.daemonInitialization(); + ServerTestUtils.prepareServer(); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(KEYSPACE, KeyspaceParams.simple(1), Tables.of())); + } + + private static void process(String fmt, Object... objects) + { + QueryProcessor.process(format(fmt, objects), ConsistencyLevel.ANY); + } + + private static void assertTransactionalMode(String table, TransactionalMode mode, TransactionalMigrationFromMode migration) + { + TableMetadata metadata = Schema.instance.getTableMetadata(KEYSPACE, table); + Assert.assertEquals(mode, metadata.params.transactionalMode); + Assert.assertEquals(migration, metadata.params.transactionalMigrationFrom); + } + + // if a table is created with an accord transactional mode, it skips having to migrate + @Test + public void newTableSkipsMigration() + { + String table = "new_table"; + process("CREATE TABLE ks.%s (k int primary key, v int) WITH transactional_mode='%s'", table, TransactionalMode.full); + assertTransactionalMode(table, TransactionalMode.full, TransactionalMigrationFromMode.none); + } + + // if an existing table is set to an accord transactional mode, it should be set to migrating + @Test + public void existingTableMigration() + { + String table = "existing_table"; + process("CREATE TABLE ks.%s (k int primary key, v int)", table); + assertTransactionalMode(table, TransactionalMode.off, TransactionalMigrationFromMode.none); + + process("ALTER TABLE ks.%s WITH transactional_mode='%s'", table, TransactionalMode.full); + assertTransactionalMode(table, TransactionalMode.full, TransactionalMigrationFromMode.off); + } + + // changing transactional mode with an incomplete migration should fail, unless the migration mode is explicitly updated + @Test + public void incompleteMigrationFailure() + { + String table = "incomplete_table"; + process("CREATE TABLE ks.%s (k int primary key, v int)", table); + process("ALTER TABLE ks.%s WITH transactional_mode='%s'", table, TransactionalMode.full); + assertTransactionalMode(table, TransactionalMode.full, TransactionalMigrationFromMode.off); + + process("ALTER TABLE ks.%s WITH transactional_mode='%s'", table, TransactionalMode.off); + assertTransactionalMode(table, TransactionalMode.off, TransactionalMigrationFromMode.full); + + // explicitly setting the migration mode should work + process("ALTER TABLE ks.%s WITH transactional_mode='%s' AND transactional_migration_from='%s'", + table, TransactionalMode.off, TransactionalMigrationFromMode.none); + assertTransactionalMode(table, TransactionalMode.off, TransactionalMigrationFromMode.none); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 626311fc031d..af4e3f738b61 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -57,9 +57,11 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.Pair; import static accord.local.Status.Durability.Majority; @@ -82,7 +84,9 @@ public static void beforeClass() throws Throwable { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); + TableMetadata tbl = Schema.instance.getTableMetadata("ks", "tbl"); + Assert.assertEquals(TransactionalMode.full, tbl.params.transactionalMode); StorageService.instance.initServer(); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 93cdaeb5a4a6..5b1a1402a2e7 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -73,7 +73,7 @@ public static void beforeClass() throws Throwable { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); StorageService.instance.initServer(); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 3a0e0c7cb78e..7b77791c6ed8 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -163,7 +163,7 @@ public static void beforeClass() throws Throwable ServerTestUtils.daemonInitialization(); SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); } @Before diff --git a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java index d62ab52946ed..1dd5bf4b16d5 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java @@ -49,7 +49,7 @@ protected Logger logger() @BeforeClass public static void setupClass() throws IOException { - AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("lwt_strategy", "accord").set("non_serial_write_strategy", "mixed")), 2); + AccordTestBase.setupCluster(builder -> builder, 2); SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); } @@ -94,9 +94,9 @@ public void testNonSerialReadRepair() throws Exception void testReadRepair(Function accordTxn, Object[][] expected) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c));", + test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c)) WITH transactional_mode='unsafe_writes';", cluster -> { - Filter mutationFilter = cluster.filters().verbs(Verb.MUTATION_REQ.id).drop().on(); + Filter mutationFilter = cluster.filters().verbs(Verb.MUTATION_REQ.id).to(2).drop().on(); cluster.filters().verbs(Verb.HINT_REQ.id, Verb.HINT_RSP.id).drop().on(); cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); mutationFilter.off(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java index 937b24350a7f..9dd9fc6a82e3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -83,7 +83,7 @@ public static void beforeClass() throws Throwable { DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - TableMetadata table = parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks").build(); + TableMetadata table = parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks").build(); tableId = table.id; keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), Tables.of(table)); } @@ -170,7 +170,7 @@ public void minMaxTokens() Assert.assertEquals(partitioner.getMaximumTokenForSplitting(), ranges.get(2).right); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata); Topology expected = new Topology(1, new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), @@ -188,7 +188,7 @@ public void wrapAroundRanges() range(100, -100)); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata); Topology expected = new Topology(1, new Shard(AccordTopology.minRange(tableId, ranges.get(0).left), NODE_LIST, NODE_SET), new Shard(AccordTopology.range(tableId, ranges.get(0)), NODE_LIST, NODE_SET), @@ -205,7 +205,7 @@ public void fastPath() range(-100, 100), range(token(100), partitioner.getMaximumTokenForSplitting())); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata); Topology expected = new Topology(1, new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), @@ -213,7 +213,7 @@ public void fastPath() new Shard(AccordTopology.maxRange(tableId, ranges.get(2).right), NODE_LIST, NODE_SET)); Assert.assertEquals(expected, topology); - topology = AccordTopology.createAccordTopology(metadata.transformer().withFastPathStatusSince(new Id(1), AccordFastPath.Status.UNAVAILABLE, 1, 1).build().metadata, ks -> true); + topology = AccordTopology.createAccordTopology(metadata.transformer().withFastPathStatusSince(new Id(1), AccordFastPath.Status.UNAVAILABLE, 1, 1).build().metadata); Set fastPath = new HashSet<>(NODE_SET); fastPath.remove(new Node.Id(1)); @@ -236,7 +236,7 @@ public void fastPathWithMoreThanMinimumFailedNodes() range(-100, 100), range(token(100), partitioner.getMaximumTokenForSplitting())); ClusterMetadata metadata = configureCluster(ranges, Keyspaces.of(keyspace)); - Topology topology = AccordTopology.createAccordTopology(metadata, ks -> true); + Topology topology = AccordTopology.createAccordTopology(metadata); Topology expected = new Topology(1, new Shard(AccordTopology.minRange(tableId, ranges.get(0).right), NODE_LIST, NODE_SET), new Shard(AccordTopology.range(tableId, ranges.get(1)), NODE_LIST, NODE_SET), @@ -248,7 +248,7 @@ public void fastPathWithMoreThanMinimumFailedNodes() .withFastPathStatusSince(new Id(1), AccordFastPath.Status.UNAVAILABLE, 1, 1) .withFastPathStatusSince(new Id(2), AccordFastPath.Status.UNAVAILABLE, 1, 1) .build().metadata; - topology = AccordTopology.createAccordTopology(metadata, ks -> true); + topology = AccordTopology.createAccordTopology(metadata); Set fastPath = new HashSet<>(NODE_SET); fastPath.remove(new Node.Id(1)); diff --git a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java index ba80664c39df..a3cc4504d91e 100644 --- a/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/api/AccordKeyTest.java @@ -45,8 +45,8 @@ public static void setupClass() { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl1 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE1), - parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE2)); + parse("CREATE TABLE tbl1 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks").id(TABLE1), + parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks").id(TABLE2)); } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 0f9651656603..8c952f02c0a1 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -79,7 +79,7 @@ public static void beforeClass() throws Throwable { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); StorageService.instance.initServer(); } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index f421c993a0fa..b235d1ae59d7 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -107,7 +107,7 @@ public static void beforeClass() throws Throwable { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); StorageService.instance.initServer(); } diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java index 9d04137587a9..d13742d257ac 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java @@ -40,7 +40,7 @@ public static void setupClass() { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); } diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 4be4c11115f5..e12b3fbf878b 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -92,7 +92,7 @@ public static void beforeClass() throws Throwable // need to create the accord test table as generating random txn is not currently supported SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); StorageService.instance.initServer(); } diff --git a/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java index fbfd1190cc4b..bad407484952 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java @@ -36,7 +36,7 @@ public static void setupClass() { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); } diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index 37b3815c1c1c..68f02de123c8 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -297,8 +297,6 @@ else if (key == LOCKED_RANGES) return metadata.lockedRanges; else if (key == IN_PROGRESS_SEQUENCES) return metadata.inProgressSequences; - else if (key == ACCORD_TABLES) - return metadata.accordTables; else if (key == ACCORD_FAST_PATH) return metadata.accordFastPath; else if (key == CONSENSUS_MIGRATION_STATE) From 94c2ec3c96b1d1bb575ddc2136cfac23228d4ee9 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 26 Mar 2024 17:27:38 -0700 Subject: [PATCH 099/340] fix MultiElementType pack/unpack accessor api --- .../cql3/restrictions/SimpleRestriction.java | 3 +- .../cql3/selection/ColumnTimestamps.java | 3 +- .../cql3/selection/ListSelector.java | 3 +- .../cassandra/cql3/selection/SetSelector.java | 3 +- .../cql3/selection/TupleSelector.java | 3 +- .../cql3/selection/UserTypeSelector.java | 3 +- .../cql3/selection/VectorSelector.java | 3 +- .../apache/cassandra/db/filter/RowFilter.java | 2 +- .../cassandra/db/marshal/CollectionType.java | 8 ++-- .../db/marshal/MultiElementType.java | 41 ++++++++++++++++++- .../cassandra/db/marshal/TupleType.java | 15 +++---- .../apache/cassandra/db/marshal/UserType.java | 2 +- .../cassandra/db/marshal/VectorType.java | 11 ----- .../service/accord/AccordKeyspace.java | 4 +- 14 files changed, 66 insertions(+), 38 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java index 8592fbbb7b17..9a31c09ec114 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java @@ -33,6 +33,7 @@ import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.cql3.terms.Terms; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexRegistry; @@ -402,7 +403,7 @@ else if (isIN()) private static ByteBuffer multiInputOperatorValues(ColumnMetadata column, List values) { - return ListType.getInstance(column.type, false).pack(values); + return ListType.getInstance(column.type, false).pack(values, ByteBufferAccessor.instance); } @Override diff --git a/src/java/org/apache/cassandra/cql3/selection/ColumnTimestamps.java b/src/java/org/apache/cassandra/cql3/selection/ColumnTimestamps.java index b3f3fa4ef7de..713e85b77fb3 100644 --- a/src/java/org/apache/cassandra/cql3/selection/ColumnTimestamps.java +++ b/src/java/org/apache/cassandra/cql3/selection/ColumnTimestamps.java @@ -27,6 +27,7 @@ import com.google.common.collect.Range; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.UserType; @@ -384,7 +385,7 @@ public ByteBuffer toByteBuffer(ProtocolVersion protocolVersion) List buffers = new ArrayList<>(timestamps.size()); timestamps.forEach(timestamp -> buffers.add(type.toByteBuffer(timestamp))); - return LONG_LIST_TYPE.pack(buffers); + return LONG_LIST_TYPE.pack(buffers, ByteBufferAccessor.instance); } @Override diff --git a/src/java/org/apache/cassandra/cql3/selection/ListSelector.java b/src/java/org/apache/cassandra/cql3/selection/ListSelector.java index 3494b4b831d2..44849805d5f2 100644 --- a/src/java/org/apache/cassandra/cql3/selection/ListSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/ListSelector.java @@ -29,6 +29,7 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.filter.ColumnFilter.Builder; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -101,7 +102,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) { buffers.add(elements.get(i).getOutput(protocolVersion)); } - return type.pack(buffers); + return type.pack(buffers, ByteBufferAccessor.instance); } public void reset() diff --git a/src/java/org/apache/cassandra/cql3/selection/SetSelector.java b/src/java/org/apache/cassandra/cql3/selection/SetSelector.java index a4cc5009af1a..03fd6ac71d19 100644 --- a/src/java/org/apache/cassandra/cql3/selection/SetSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/SetSelector.java @@ -31,6 +31,7 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.filter.ColumnFilter.Builder; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -103,7 +104,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) { buffers.add(elements.get(i).getOutput(protocolVersion)); } - return type.pack(new ArrayList<>(buffers)); + return type.pack(new ArrayList<>(buffers), ByteBufferAccessor.instance); } public void reset() diff --git a/src/java/org/apache/cassandra/cql3/selection/TupleSelector.java b/src/java/org/apache/cassandra/cql3/selection/TupleSelector.java index fd3071b1cd2a..65326fd01eff 100644 --- a/src/java/org/apache/cassandra/cql3/selection/TupleSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/TupleSelector.java @@ -29,6 +29,7 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.filter.ColumnFilter.Builder; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.TupleType; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.util.DataInputPlus; @@ -102,7 +103,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) throws InvalidReque { buffers.add(elements.get(i).getOutput(protocolVersion)); } - return type.pack(buffers); + return type.pack(buffers, ByteBufferAccessor.instance); } public void reset() diff --git a/src/java/org/apache/cassandra/cql3/selection/UserTypeSelector.java b/src/java/org/apache/cassandra/cql3/selection/UserTypeSelector.java index 6778cca964ce..af13ccbecde1 100644 --- a/src/java/org/apache/cassandra/cql3/selection/UserTypeSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/UserTypeSelector.java @@ -27,6 +27,7 @@ import com.google.common.base.Objects; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.cql3.ColumnSpecification; @@ -197,7 +198,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) Selector selector = fields.get(userType.fieldName(i)); buffers.add(selector == null ? null : selector.getOutput(protocolVersion)); } - return type.pack(buffers); + return type.pack(buffers, ByteBufferAccessor.instance); } public void reset() diff --git a/src/java/org/apache/cassandra/cql3/selection/VectorSelector.java b/src/java/org/apache/cassandra/cql3/selection/VectorSelector.java index f61d8d20d666..8dd66bb5d5d0 100644 --- a/src/java/org/apache/cassandra/cql3/selection/VectorSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/VectorSelector.java @@ -30,6 +30,7 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.VectorType; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.util.DataInputPlus; @@ -126,7 +127,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) throws InvalidReque for (int i = 0, m = elements.size(); i < m; i++) buffers.add(elements.get(i).getOutput(protocolVersion)); - return type.pack(buffers); + return type.pack(buffers, ByteBufferAccessor.instance); } @Override diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index e843fe837959..037f077a9127 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -87,7 +87,7 @@ public class RowFilter implements Iterable private static final Logger logger = LoggerFactory.getLogger(RowFilter.class); public static final Serializer serializer = new Serializer(); - private static final RowFilter NONE = new RowFilter(Collections.emptyList(), false); + public static final RowFilter NONE = new RowFilter(Collections.emptyList(), false); protected final List expressions; diff --git a/src/java/org/apache/cassandra/db/marshal/CollectionType.java b/src/java/org/apache/cassandra/db/marshal/CollectionType.java index 3952ee137e0a..c54ad31e5efb 100644 --- a/src/java/org/apache/cassandra/db/marshal/CollectionType.java +++ b/src/java/org/apache/cassandra/db/marshal/CollectionType.java @@ -350,15 +350,15 @@ V fromComparableBytesListOrSet(ValueAccessor accessor, } @Override - public ByteBuffer pack(List elements) + public V pack(List elements, ValueAccessor accessor) { - return getSerializer().pack(elements); + return getSerializer().pack(elements, accessor); } @Override - public List unpack(ByteBuffer input) + public List unpack(V value, ValueAccessor accessor) { - return getSerializer().unpack(input); + return getSerializer().unpack(value, accessor); } /** diff --git a/src/java/org/apache/cassandra/db/marshal/MultiElementType.java b/src/java/org/apache/cassandra/db/marshal/MultiElementType.java index d9c229c9e9bd..4519a4341960 100644 --- a/src/java/org/apache/cassandra/db/marshal/MultiElementType.java +++ b/src/java/org/apache/cassandra/db/marshal/MultiElementType.java @@ -44,15 +44,52 @@ protected MultiElementType(ComparisonType comparisonType) * @param elements the serialized values of the elements * @return the serialized representation of the value composed of the specified elements. */ - public abstract ByteBuffer pack(List elements); + public abstract V pack(List elements, ValueAccessor accessor); + /** + * Returns the serialized representation of the value composed of the specified elements. + * + * @param elements the serialized values of the elements + * @return the serialized representation of the value composed of the specified elements. + */ + public ByteBuffer pack(List elements) + { + return pack(elements, ByteBufferAccessor.instance); + } + + public final ByteBuffer packBuffer(List elements) + { + return pack(elements, ByteBufferAccessor.instance); + } + + public final byte[] packArray(List elements) + { + return pack(elements, ByteArrayAccessor.instance); + } + + /** + * Returns the serialized representation of the elements composing the specified value. + * + * @param value a serialized value of this type + * @return the serialized representation of the elements composing the specified value. + */ /** * Returns the serialized representation of the elements composing the specified value. * * @param value a serialized value of this type * @return the serialized representation of the elements composing the specified value. */ - public abstract List unpack(ByteBuffer value); + public abstract List unpack(V value, ValueAccessor accessor); + + public final List unpack(byte[] value) + { + return unpack(value, ByteArrayAccessor.instance); + } + + public final List unpack(ByteBuffer value) + { + return unpack(value, ByteBufferAccessor.instance); + } /** * Checks if this type supports bind markers for its elements when the type value is provided through a literal. diff --git a/src/java/org/apache/cassandra/db/marshal/TupleType.java b/src/java/org/apache/cassandra/db/marshal/TupleType.java index ff0b943078df..ede083ab39f6 100644 --- a/src/java/org/apache/cassandra/db/marshal/TupleType.java +++ b/src/java/org/apache/cassandra/db/marshal/TupleType.java @@ -295,11 +295,6 @@ public V fromComparableBytes(ValueAccessor accessor, ByteSource.Peekable } @Override - public List unpack(ByteBuffer value) - { - return unpack(value, ByteBufferAccessor.instance); - } - public List unpack(V value, ValueAccessor accessor) { int numberOfElements = size(); @@ -381,14 +376,14 @@ public static V pack(ValueAccessor accessor, Collection components) } @Override - public ByteBuffer pack(List components) + public V pack(List elements, ValueAccessor accessor) { - return pack(ByteBufferAccessor.instance, components); + return pack(accessor, elements); } public ByteBuffer pack(ByteBuffer... components) { - return pack(Arrays.asList(components)); + return pack(Arrays.asList(components), ByteBufferAccessor.instance); } @Override @@ -472,7 +467,7 @@ public ByteBuffer fromString(String source) fields.add(type.fromString(fieldString)); } } - return pack(fields); + return pack(fields, ByteBufferAccessor.instance); } @Override @@ -613,7 +608,7 @@ public ByteBuffer getMaskedValue() for (AbstractType type : types) buffers.add(type.getMaskedValue()); - return serializer.serialize(pack(buffers)); + return serializer.serialize(pack(buffers, ByteBufferAccessor.instance)); } @Override diff --git a/src/java/org/apache/cassandra/db/marshal/UserType.java b/src/java/org/apache/cassandra/db/marshal/UserType.java index 15ab78e82a2a..d20da2c0ab39 100644 --- a/src/java/org/apache/cassandra/db/marshal/UserType.java +++ b/src/java/org/apache/cassandra/db/marshal/UserType.java @@ -215,7 +215,7 @@ public ByteBuffer serializeForNativeProtocol(Iterator> cells) while (components.size() < size()) components.add(null); - return pack(components); + return pack(components, ByteBufferAccessor.instance); } public void validateCell(Cell cell) throws MarshalException diff --git a/src/java/org/apache/cassandra/db/marshal/VectorType.java b/src/java/org/apache/cassandra/db/marshal/VectorType.java index ac4c0bfb94c1..e70857c5aafd 100644 --- a/src/java/org/apache/cassandra/db/marshal/VectorType.java +++ b/src/java/org/apache/cassandra/db/marshal/VectorType.java @@ -137,12 +137,6 @@ public VectorSerializer getSerializer() return serializer; } - @Override - public List unpack(ByteBuffer buffer) - { - return unpack(buffer, ByteBufferAccessor.instance); - } - public List unpack(V buffer, ValueAccessor accessor) { return getSerializer().unpack(buffer, accessor); @@ -193,11 +187,6 @@ public V decomposeAsFloat(ValueAccessor accessor, float[] value) return buffer; } - public ByteBuffer pack(List elements) - { - return pack(elements, ByteBufferAccessor.instance); - } - public V pack(List elements, ValueAccessor accessor) { return getSerializer().pack(elements, accessor); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index d0a593bf0669..7491df11099a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -859,12 +859,12 @@ private static V serializeToken(Token token, ValueAccessor accessor) private static ByteBuffer serializeKey(PartitionKey key) { - return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(UUIDSerializer.instance.serialize(key.table().asUUID()), key.partitionKey().getKey())); + return KEY_TYPE.pack(UUIDSerializer.instance.serialize(key.table().asUUID()), key.partitionKey().getKey()); } private static ByteBuffer serializeTimestamp(Timestamp timestamp) { - return TupleType.pack(ByteBufferAccessor.instance, Arrays.asList(bytes(timestamp.msb), bytes(timestamp.lsb), bytes(timestamp.node.id))); + return TIMESTAMP_TYPE.pack(bytes(timestamp.msb), bytes(timestamp.lsb), bytes(timestamp.node.id)); } public interface TimestampFactory From 599cd59736fcd8616fbb1c1675067318a08b2834 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Wed, 27 Mar 2024 09:18:39 -0700 Subject: [PATCH 100/340] post-trunk rebase fixes --- modules/accord | 2 +- .../cassandra/cql3/selection/MapSelector.java | 3 ++- .../apache/cassandra/exceptions/RequestFailure.java | 4 ++-- .../cassandra/distributed/test/TestBaseImpl.java | 3 ++- test/unit/org/apache/cassandra/cql3/CQLTester.java | 4 +++- .../cassandra/db/marshal/TypeValidationTest.java | 2 +- .../cassandra/io/sstable/LargePartitionsTest.java | 2 +- .../apache/cassandra/transport/SerDeserTest.java | 13 +++++++------ .../cassandra/utils/AbstractTypeGenerators.java | 2 +- 9 files changed, 20 insertions(+), 15 deletions(-) diff --git a/modules/accord b/modules/accord index 6b8bef48e578..ef36616441bd 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 6b8bef48e5780aefda6bd1ff29a6290e56ede438 +Subproject commit ef36616441bd4ff4fec5379d986c75ad5a62ff7d diff --git a/src/java/org/apache/cassandra/cql3/selection/MapSelector.java b/src/java/org/apache/cassandra/cql3/selection/MapSelector.java index 450b64a58b73..b0ccac4d93cd 100644 --- a/src/java/org/apache/cassandra/cql3/selection/MapSelector.java +++ b/src/java/org/apache/cassandra/cql3/selection/MapSelector.java @@ -34,6 +34,7 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.filter.ColumnFilter.Builder; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -217,7 +218,7 @@ public ByteBuffer getOutput(ProtocolVersion protocolVersion) buffers.add(entry.getKey()); buffers.add(entry.getValue()); } - return type.pack(buffers); + return type.pack(buffers, ByteBufferAccessor.instance); } public void reset() diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailure.java b/src/java/org/apache/cassandra/exceptions/RequestFailure.java index d2c8a2e61c56..b1dbbd8e743c 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailure.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailure.java @@ -41,7 +41,6 @@ public class RequestFailure { public static final RequestFailure UNKNOWN = new RequestFailure(RequestFailureReason.UNKNOWN); public static final RequestFailure READ_TOO_MANY_TOMBSTONES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_TOMBSTONES); - public static final RequestFailure READ_TOO_MANY_INDEXES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_INDEXES); public static final RequestFailure TIMEOUT = new RequestFailure(RequestFailureReason.TIMEOUT); public static final RequestFailure INCOMPATIBLE_SCHEMA = new RequestFailure(RequestFailureReason.INCOMPATIBLE_SCHEMA); public static final RequestFailure READ_SIZE = new RequestFailure(RequestFailureReason.READ_SIZE); @@ -50,6 +49,7 @@ public class RequestFailure public static final RequestFailure INVALID_ROUTING = new RequestFailure(RequestFailureReason.INVALID_ROUTING); public static final RequestFailure INDEX_NOT_AVAILABLE = new RequestFailure(RequestFailureReason.INDEX_NOT_AVAILABLE); public static final RequestFailure COORDINATOR_BEHIND = new RequestFailure(RequestFailureReason.COORDINATOR_BEHIND); + public static final RequestFailure READ_TOO_MANY_INDEXES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_INDEXES); static { @@ -124,7 +124,6 @@ public static RequestFailure forReason(RequestFailureReason reason) default: throw new IllegalStateException("Unhandled request failure reason " + reason); case UNKNOWN: return UNKNOWN; case READ_TOO_MANY_TOMBSTONES: return READ_TOO_MANY_TOMBSTONES; - case READ_TOO_MANY_INDEXES: return READ_TOO_MANY_INDEXES; case TIMEOUT: return TIMEOUT; case INCOMPATIBLE_SCHEMA: return INCOMPATIBLE_SCHEMA; case READ_SIZE: return READ_SIZE; @@ -133,6 +132,7 @@ public static RequestFailure forReason(RequestFailureReason reason) case INVALID_ROUTING: return INVALID_ROUTING; case INDEX_NOT_AVAILABLE: return INDEX_NOT_AVAILABLE; case COORDINATOR_BEHIND: return COORDINATOR_BEHIND; + case READ_TOO_MANY_INDEXES: return READ_TOO_MANY_INDEXES; } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index 5507ea9dd35c..5988bd429f28 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -39,6 +39,7 @@ import org.apache.cassandra.cql3.Duration; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BooleanType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ByteType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.DecimalType; @@ -132,7 +133,7 @@ public static ByteBuffer tuple(Object... values) bbs.add(value == null ? null : type.decompose(value)); } TupleType tupleType = new TupleType(types); - return tupleType.pack(bbs); + return tupleType.pack(bbs, ByteBufferAccessor.instance); } public static String batch(String... queries) diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 6f1fe5b79467..f42063beeaf5 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -66,6 +66,8 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; + +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.assertj.core.api.Assertions; import org.awaitility.Awaitility; import org.apache.commons.lang3.ArrayUtils; @@ -3003,7 +3005,7 @@ public ByteBuffer toByteBuffer() types.add(type); bbs.add(makeByteBuffer(value, type)); } - return new TupleType(types).pack(bbs); + return new TupleType(types).pack(bbs, ByteBufferAccessor.instance); } public String toCQLString() diff --git a/test/unit/org/apache/cassandra/db/marshal/TypeValidationTest.java b/test/unit/org/apache/cassandra/db/marshal/TypeValidationTest.java index 6897661cf6d4..ef264dfa14ab 100644 --- a/test/unit/org/apache/cassandra/db/marshal/TypeValidationTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/TypeValidationTest.java @@ -204,7 +204,7 @@ private static void buildAndSplit(Gen baseGen) qt().forAll(tupleWithValueGen(baseGen)).checkAssert(pair -> { TupleType tuple = pair.left; ByteBuffer value = pair.right; - Assertions.assertThat(tuple.pack(tuple.unpack(value))) + Assertions.assertThat(tuple.pack(tuple.unpack(value), ByteBufferAccessor.instance)) .as("tuple.pack(tuple.unpack(value)) == value") .isEqualTo(value); }); diff --git a/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java b/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java index a4c1b8608d67..34bc53b98c2c 100644 --- a/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/LargePartitionsTest.java @@ -135,7 +135,7 @@ private static void keyCacheMetrics(String title) " size:" + metrics.size.getValue() + " entries:" + metrics.entries.getValue() + " hit-rate:" + metrics.hitRate.getValue() + - " one-min-rate:" + metrics.hitRate.getValue()); + " one-min-rate:" + metrics.oneMinuteHitRate.getValue()); } @Test diff --git a/test/unit/org/apache/cassandra/transport/SerDeserTest.java b/test/unit/org/apache/cassandra/transport/SerDeserTest.java index b14e854cab65..605119f65bfe 100644 --- a/test/unit/org/apache/cassandra/transport/SerDeserTest.java +++ b/test/unit/org/apache/cassandra/transport/SerDeserTest.java @@ -50,6 +50,7 @@ import org.apache.cassandra.cql3.terms.UserTypes; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.LongType; @@ -94,7 +95,7 @@ public void collectionSerDeserTest() for (Integer i : l) lb.add(Int32Type.instance.decompose(i)); - assertEquals(l, lt.compose(lt.pack(lb))); + assertEquals(l, lt.compose(lt.pack(lb, ByteBufferAccessor.instance))); // Sets SetType st = SetType.getInstance(UTF8Type.instance, true); @@ -104,7 +105,7 @@ public void collectionSerDeserTest() for (String t : s) sb.add(UTF8Type.instance.decompose(t)); - assertEquals(s, st.compose(st.pack(sb))); + assertEquals(s, st.compose(st.pack(sb, ByteBufferAccessor.instance))); // Maps MapType mt = MapType.getInstance(UTF8Type.instance, LongType.instance, true); @@ -120,7 +121,7 @@ public void collectionSerDeserTest() mb.add(LongType.instance.decompose(entry.getValue())); } - assertEquals(m, mt.compose(mt.pack(mb))); + assertEquals(m, mt.compose(mt.pack(mb, ByteBufferAccessor.instance))); } @Test(expected = MarshalException.class) @@ -130,7 +131,7 @@ public void setsMayNotContainNullsTest() List sb = new ArrayList<>(1); sb.add(null); - st.compose(st.pack(sb)); + st.compose(st.pack(sb, ByteBufferAccessor.instance)); } @Test(expected = MarshalException.class) @@ -141,7 +142,7 @@ public void mapKeysMayNotContainNullsTest() mb.add(null); mb.add(LongType.instance.decompose(999L)); - mt.compose(mt.pack(mb)); + mt.compose(mt.pack(mb, ByteBufferAccessor.instance)); } @Test(expected = MarshalException.class) @@ -152,7 +153,7 @@ public void mapValueMayNotContainNullsTest() mb.add(UTF8Type.instance.decompose("danger")); mb.add(null); - mt.compose(mt.pack(mb)); + mt.compose(mt.pack(mb, ByteBufferAccessor.instance)); } @Test diff --git a/test/unit/org/apache/cassandra/utils/AbstractTypeGenerators.java b/test/unit/org/apache/cassandra/utils/AbstractTypeGenerators.java index ea9a128233dd..79fc56d48842 100644 --- a/test/unit/org/apache/cassandra/utils/AbstractTypeGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AbstractTypeGenerators.java @@ -1382,7 +1382,7 @@ public ByteBuffer generate(RandomnessSource rnd) TypeSupport support = eSupport.get(i); elements.add(support.type.decompose(support.valueGen.generate(rnd))); } - return type.pack(elements); + return type.pack(elements, ByteBufferAccessor.instance); } } From 7dadc080cb689b3b82c8472403785b2d7241fdc9 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Wed, 28 Feb 2024 15:50:04 +0000 Subject: [PATCH 101/340] perf improvements --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 4 +- .../org/apache/cassandra/config/Config.java | 1 + .../cassandra/config/DatabaseDescriptor.java | 22 ++ .../db/compaction/CompactionIterator.java | 6 +- src/java/org/apache/cassandra/net/Verb.java | 32 +-- .../service/accord/AccordCachingState.java | 4 +- .../service/accord/AccordCommandStore.java | 20 +- .../service/accord/AccordCommandStores.java | 8 +- .../service/accord/AccordConfiguration.java | 1 + .../service/accord/AccordJournal.java | 15 ++ .../service/accord/AccordKeyspace.java | 19 +- .../service/accord/AccordObjectSizes.java | 29 +-- .../accord/AccordSafeCommandStore.java | 50 ++-- .../accord/AccordSafeCommandsForKey.java | 13 +- .../service/accord/AccordSafeState.java | 2 + .../accord/AccordSafeTimestampsForKey.java | 9 +- .../service/accord/AccordService.java | 8 +- .../service/accord/CommandsForRanges.java | 11 +- .../service/accord/api/AccordAgent.java | 3 + .../service/accord/async/AsyncLoader.java | 12 +- .../service/accord/async/AsyncOperation.java | 5 +- .../accord/interop/AccordInteropApply.java | 2 - .../serializers/CommandsForKeySerializer.java | 217 ++++++++++-------- .../serializers/WaitingOnSerializer.java | 103 ++++++--- .../service/accord/txn/TxnWrite.java | 5 +- .../cassandra/utils/vint/VIntCoding.java | 47 +++- test/conf/logback-dtest-quiet.xml | 56 +++++ .../distributed/api/ICoordinator.java | 3 + .../distributed/impl/Coordinator.java | 25 ++ .../cassandra/distributed/impl/Instance.java | 1 + .../test/accord/AccordLoadTest.java | 96 ++++++++ .../CompactionAccordIteratorsTest.java | 2 +- .../accord/AccordCommandStoreTest.java | 29 +-- .../service/accord/AccordCommandTest.java | 8 +- .../service/accord/AccordTestUtils.java | 12 +- .../service/accord/async/AsyncLoaderTest.java | 9 +- .../accord/async/AsyncOperationTest.java | 11 +- .../CommandsForKeySerializerTest.java | 39 ++-- .../serializers/WaitingOnSerializerTest.java | 33 +-- 40 files changed, 654 insertions(+), 320 deletions(-) create mode 100644 test/conf/logback-dtest-quiet.xml create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java diff --git a/modules/accord b/modules/accord index ef36616441bd..3562bb3c9ce4 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit ef36616441bd4ff4fec5379d986c75ad5a62ff7d +Subproject commit 3562bb3c9ce4e9eecdf65e236e968ef3ee9e0a86 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 697d7edc1e43..e76745a233e0 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -47,9 +47,9 @@ public class AccordSpec public volatile DurationSpec fast_path_update_delay = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec schedule_durability_frequency = new DurationSpec.IntSecondsBound(15); + public volatile DurationSpec schedule_durability_frequency = new DurationSpec.IntSecondsBound(5); public volatile DurationSpec durability_txnid_lag = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec shard_durability_cycle = new DurationSpec.IntMinutesBound(2); + public volatile DurationSpec shard_durability_cycle = new DurationSpec.IntMinutesBound(1); public volatile DurationSpec global_durability_cycle = new DurationSpec.IntMinutesBound(10); public enum TransactionalRangeMigration diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 45a8a61ce982..8163e1d36623 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -500,6 +500,7 @@ public static class SSTableConfig public volatile int counter_cache_keys_to_save = Integer.MAX_VALUE; public DataStorageSpec.LongMebibytesBound paxos_cache_size = null; + public DataStorageSpec.LongMebibytesBound accord_cache_size = null; public DataStorageSpec.LongMebibytesBound consensus_migration_cache_size = null; diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 9498e666c485..a75e6cb88c5a 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -227,6 +227,7 @@ public class DatabaseDescriptor private static long keyCacheSizeInMiB; private static long paxosCacheSizeInMiB; + private static long accordCacheSizeInMiB; private static long consensusMigrationCacheSizeInMiB; private static long counterCacheSizeInMiB; private static long indexSummaryCapacityInMiB; @@ -963,6 +964,22 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m + conf.paxos_cache_size + "', supported values are >= 0.", false); } + try + { + // if paxosCacheSizeInMiB option was set to "auto" then size of the cache should be "max(10% of Heap (in MB), 1MB) + accordCacheSizeInMiB = (conf.accord_cache_size == null) + ? Math.max(1, (int) ((Runtime.getRuntime().totalMemory() * 0.10) / 1024 / 1024)) + : conf.accord_cache_size.toMebibytes(); + + if (accordCacheSizeInMiB < 0) + throw new NumberFormatException(); // to escape duplicating error message + } + catch (NumberFormatException e) + { + throw new ConfigurationException("paxos_cache_size option was set incorrectly to '" + + conf.paxos_cache_size + "', supported values are >= 0.", false); + } + try { // if consensusMigrationCacheSizeInMiB option was set to "auto" then size of the cache should be "min(1% of Heap (in MB), 50MB) @@ -4266,6 +4283,11 @@ public static long getPaxosCacheSizeInMiB() return paxosCacheSizeInMiB; } + public static long getAccordCacheSizeInMiB() + { + return accordCacheSizeInMiB; + } + public static long getConsensusMigrationCacheSizeInMiB() { return consensusMigrationCacheSizeInMiB; diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 69230fd2ef56..ad056674c9d6 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -978,11 +978,7 @@ protected Row applyToRow(Row row) if (redundantBeforeEntry == null) return row; - TxnId redundantBeforeTxnId = redundantBeforeEntry.shardRedundantBefore(); - if (redundantBeforeTxnId.equals(TxnId.NONE)) - return row; - - return CommandsForKeysAccessor.withoutRedundantCommands(partitionKey, row, redundantBeforeTxnId); + return CommandsForKeysAccessor.withoutRedundantCommands(partitionKey, row, redundantBeforeEntry); } @Override diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 9b9aee034454..89b30f88183f 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -268,7 +268,7 @@ public enum Verb PAXOS2_PREPARE_REQ (40, P2, writeTimeout, MUTATION, () -> PaxosPrepare.requestSerializer, () -> PaxosPrepare.requestHandler, PAXOS2_PREPARE_RSP ), PAXOS2_PREPARE_REFRESH_RSP (51, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepareRefresh.responseSerializer, RESPONSE_HANDLER ), PAXOS2_PREPARE_REFRESH_REQ (41, P2, writeTimeout, MUTATION, () -> PaxosPrepareRefresh.requestSerializer, () -> PaxosPrepareRefresh.requestHandler, PAXOS2_PREPARE_REFRESH_RSP ), - PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.ACCEPT_RESULT_SERIALIZER, RESPONSE_HANDLER ), + PAXOS2_PROPOSE_RSP (52, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPropose.ACCEPT_RESULT_SERIALIZER, RESPONSE_HANDLER ), PAXOS2_PROPOSE_REQ (42, P2, writeTimeout, MUTATION, () -> PaxosPropose.requestSerializer, () -> PaxosPropose.requestHandler, PAXOS2_PROPOSE_RSP ), PAXOS2_COMMIT_AND_PREPARE_RSP (53, P2, writeTimeout, REQUEST_RESPONSE, () -> PaxosPrepare.responseSerializer, RESPONSE_HANDLER ), PAXOS2_COMMIT_AND_PREPARE_REQ (43, P2, writeTimeout, MUTATION, () -> PaxosCommitAndPrepare.requestSerializer, () -> PaxosCommitAndPrepare.requestHandler, PAXOS2_COMMIT_AND_PREPARE_RSP ), @@ -307,41 +307,41 @@ public enum Verb DATA_MOVEMENT_EXECUTED_REQ (817, P1, rpcTimeout, MISC, () -> DataMovement.Status.serializer, () -> DataMovements.instance, DATA_MOVEMENT_EXECUTED_RSP ), // accord - ACCORD_SIMPLE_RSP (119, P2, writeTimeout, REQUEST_RESPONSE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_RSP (120, P2, writeTimeout, REQUEST_RESPONSE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_SIMPLE_RSP (119, P2, writeTimeout, IMMEDIATE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), + ACCORD_PRE_ACCEPT_RSP (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), ACCORD_PRE_ACCEPT_REQ (121, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), - ACCORD_ACCEPT_RSP (122, P2, writeTimeout, REQUEST_RESPONSE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), + ACCORD_ACCEPT_RSP (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), ACCORD_ACCEPT_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), ACCORD_ACCEPT_INVALIDATE_REQ (124, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), - ACCORD_READ_RSP (125, P2, writeTimeout, REQUEST_RESPONSE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), + ACCORD_READ_RSP (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), ACCORD_READ_REQ (126, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), - ACCORD_APPLY_RSP (129, P2, writeTimeout, REQUEST_RESPONSE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), - ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, REQUEST_RESPONSE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), + ACCORD_APPLY_RSP (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), + ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), + ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), ACCORD_BEGIN_RECOVER_REQ (132, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), - ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, REQUEST_RESPONSE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), + ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), ACCORD_BEGIN_INVALIDATE_REQ (134, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), - ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, REQUEST_RESPONSE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), + ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_WAIT_ON_COMMIT_RSP ), ACCORD_WAIT_UNTIL_APPLIED_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, REQUEST_RESPONSE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), + ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), - ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, REQUEST_RESPONSE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), - ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, REQUEST_RESPONSE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), - ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, REQUEST_RESPONSE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), + ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), - ACCORD_FETCH_DATA_RSP (145, P2, repairTimeout,REQUEST_RESPONSE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_RSP (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), ACCORD_FETCH_DATA_REQ (146, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, REQUEST_RESPONSE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), + ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), ACCORD_QUERY_DURABLE_BEFORE_REQ (150, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP ), ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 5175e86c1076..d07dcfc87baf 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -123,7 +123,7 @@ public boolean isComplete() int estimatedSizeOnHeap(ToLongFunction estimator) { - shouldUpdateSize = false; + shouldUpdateSize = false; // TODO (expected): probably not the safest place to clear need to compute size return lastQueriedEstimatedSizeOnHeap = Ints.checkedCast(EMPTY_SIZE + estimateStateOnHeapSize(estimator)); } @@ -204,7 +204,7 @@ public void initialize(V value) protected State state(State next) { State prev = state; - if (prev != next) + if (prev != next) // TODO (expected): we change state to transition the cache state machine but often keep payload the same - so shouldn't recompute shouldUpdateSize = true; return state = next; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 36b224b022fb..4b0722606b83 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -42,7 +42,7 @@ import accord.api.DataStore; import accord.api.Key; import accord.api.ProgressLog; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.impl.CommandsSummary; import accord.impl.TimestampsForKey; import accord.local.Command; @@ -114,8 +114,8 @@ private static long getThreadId(ExecutorService executor) private final ExecutionOrder executionOrder; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; - private final AccordStateCache.Instance timestampsForKeyCache; - private final AccordStateCache.Instance commandsForKeyCache; + private final AccordStateCache.Instance timestampsForKeyCache; + private final AccordStateCache.Instance commandsForKeyCache; private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; @@ -161,7 +161,7 @@ public AccordCommandStore(int id, this::validateCommand, AccordObjectSizes::command); timestampsForKeyCache = - stateCache.instance(RoutableKey.class, + stateCache.instance(Key.class, AccordSafeTimestampsForKey.class, AccordSafeTimestampsForKey::new, this::loadTimestampsForKey, @@ -169,7 +169,7 @@ public AccordCommandStore(int id, this::validateTimestampsForKey, AccordObjectSizes::timestampsForKey); commandsForKeyCache = - stateCache.instance(RoutableKey.class, + stateCache.instance(Key.class, AccordSafeCommandsForKey.class, AccordSafeCommandsForKey::new, this::loadCommandsForKey, @@ -220,7 +220,7 @@ public void onNext(UntypedResultSet.Row row) throws Exception MessageProvider messageProvider = journal.makeMessageProvider(txnId); - SerializerSupport.TxnAndDeps txnAndDeps = SerializerSupport.extractTxnAndDeps(status, accepted, messageProvider); + SerializerSupport.TxnAndDeps txnAndDeps = SerializerSupport.extractTxnAndDeps(unsafeRangesForEpoch(), status, accepted, messageProvider); Seekables keys = txnAndDeps.txn.keys(); if (keys.domain() != Routable.Domain.Range) throw new AssertionError(String.format("Txn keys are not range for %s", txnAndDeps.txn)); @@ -311,12 +311,12 @@ public AccordStateCache.Instance commandCache return commandCache; } - public AccordStateCache.Instance timestampsForKeyCache() + public AccordStateCache.Instance timestampsForKeyCache() { return timestampsForKeyCache; } - public AccordStateCache.Instance commandsForKeyCache() + public AccordStateCache.Instance commandsForKeyCache() { return commandsForKeyCache; } @@ -466,8 +466,8 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, - NavigableMap timestampsForKeys, - NavigableMap commandsForKeys) + NavigableMap timestampsForKeys, + NavigableMap commandsForKeys) { Invariants.checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index cf4cda992be3..0fd719d5fbe6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -31,6 +31,7 @@ import accord.topology.Topology; import accord.utils.RandomSource; import org.apache.cassandra.cache.CacheSize; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.metrics.CacheSizeMetrics; import org.apache.cassandra.schema.TableId; @@ -47,7 +48,7 @@ public class AccordCommandStores extends CommandStores implements CacheSize ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, AccordJournal journal) { super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore.factory(journal, new AccordStateCacheMetrics(ACCORD_STATE_CACHE))); - setCapacity(maxCacheSize()); + setCapacity(DatabaseDescriptor.getAccordCacheSizeInMiB() << 20); this.cacheSizeMetrics = new CacheSizeMetrics(ACCORD_STATE_CACHE, this); } @@ -110,11 +111,6 @@ synchronized void refreshCacheSizes() forEach(commandStore -> ((AccordSafeCommandStore) commandStore).commandStore().setCapacity(perStore)); } - private static long maxCacheSize() - { - return 5 << 20; // TODO (required): make configurable - } - @Override public synchronized Supplier updateTopology(Node node, Topology newTopology, boolean startSync) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java index a17a9fc84478..d87e6c96cd90 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java @@ -23,6 +23,7 @@ import accord.config.LocalConfig; import org.apache.cassandra.config.Config; +// TODO (expected): should this be merged with AccordSpec? public class AccordConfiguration implements LocalConfig { private final Config config; diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index ae202d2f639b..26e09ada14eb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; +import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -570,6 +571,7 @@ public static class Key Key(Timestamp timestamp, Type type) { + if (timestamp == null) throw new NullPointerException("Null timestamp for type " + type); this.timestamp = timestamp; this.type = type; } @@ -1361,6 +1363,19 @@ public Set test(Set messages) return presentMessages; } + public Set all() + { + Set types = EnumSet.allOf(Type.class); + Set keys = new ObjectHashSet<>(types.size() + 1, 0.9f); + for (Type type : types) + keys.add(new Key(txnId, type)); + Set presentKeys = journal.test(keys); + Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); + for (Key key : presentKeys) + presentMessages.add(key.type.outgoingType); + return presentMessages; + } + @Override public PreAccept preAccept() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 7491df11099a..da832ae2cbf1 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory; import accord.api.Key; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; @@ -593,7 +593,7 @@ public CommandsForKey getCommandsForKey(PartitionKey key, Row row) } // TODO (expected): garbage-free filtering, reusing encoding - public Row withoutRedundantCommands(PartitionKey key, Row row, TxnId redundantBefore) + public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.Entry redundantBefore) { Invariants.checkState(row.columnCount() == 1); Cell cell = row.getCell(data); @@ -601,7 +601,10 @@ public Row withoutRedundantCommands(PartitionKey key, Row row, TxnId redundantBe return row; CommandsForKey current = CommandsForKeySerializer.fromBytes(key, cell.buffer()); - CommandsForKey updated = current.withoutRedundant(redundantBefore); + if (current == null) + return null; + + CommandsForKey updated = current.withRedundantBefore(redundantBefore); if (current == updated) return row; @@ -823,7 +826,7 @@ public static Mutation getCommandMutation(int storeId, Command original, Command Command.Committed committed = command.asCommitted(); Command.Committed originalCommitted = original != null && original.isCommitted() ? original.asCommitted() : null; if (originalCommitted == null || committed.waitingOn != originalCommitted.waitingOn) - builder.addCell(live(CommandsColumns.waiting_on, timestampMicros, WaitingOnSerializer.serialize(committed.waitingOn))); + builder.addCell(live(CommandsColumns.waiting_on, timestampMicros, WaitingOnSerializer.serialize(committed.txnId(), committed.waitingOn))); } Row row = builder.build(); @@ -1190,10 +1193,10 @@ static Command unsafeLoadCommand(AccordCommandStore commandStore, TxnId txnId) Ballot promised = deserializePromisedOrNull(row); Ballot accepted = deserializeAcceptedOrNull(row); - WaitingOnProvider waitingOn = deserializeWaitingOn(row); + WaitingOnProvider waitingOn = deserializeWaitingOn(txnId, row); MessageProvider messages = commandStore.makeMessageProvider(txnId); - return SerializerSupport.reconstruct(attrs, status, executeAt, promised, accepted, waitingOn, messages); + return SerializerSupport.reconstruct(commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, promised, accepted, waitingOn, messages); } catch (Throwable t) { @@ -1271,7 +1274,7 @@ public static Ballot deserializeAcceptedOrNull(UntypedResultSet.Row row) return deserializeTimestampOrNull(row.getBlob("accepted_ballot"), Ballot::fromBits); } - private static WaitingOnProvider deserializeWaitingOn(UntypedResultSet.Row row) + private static WaitingOnProvider deserializeWaitingOn(TxnId txnId, UntypedResultSet.Row row) { ByteBuffer bytes = row.getBlob("waiting_on"); @@ -1285,7 +1288,7 @@ private static WaitingOnProvider deserializeWaitingOn(UntypedResultSet.Row row) try { - return WaitingOnSerializer.deserialize(deps, bytes); + return WaitingOnSerializer.deserialize(txnId, deps.keyDeps.keys(), deps.rangeDeps.txnIds(), bytes); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index a1704ba52b2a..7346a6eebf31 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -24,11 +24,12 @@ import accord.api.Key; import accord.api.Result; import accord.api.RoutingKey; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKey.Info; +import accord.local.CommandsForKey; +import accord.local.CommandsForKey.TxnInfo; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; +import accord.local.CommandsForKey.TxnInfoWithMissing; import accord.local.CommonAttributes; import accord.local.Node; import accord.local.SaveStatus; @@ -305,7 +306,6 @@ private static long emptySize(Command command) return ACCEPTED; case Committed: case Stable: - case ReadyToExecute: return COMMITTED; case PreApplied: case Applied: @@ -363,22 +363,25 @@ public static long timestampsForKey(TimestampsForKey timestamps) } private static long EMPTY_CFK_SIZE = measure(new CommandsForKey(null)); - private static long EMPTY_INFO_SIZE = measure(CommandsForKey.Info.createMock(null, null, null)); + private static long EMPTY_INFO_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, null)); + private static long EMPTY_INFO_WITH_MISSING_ADDITIONAL_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, null)) - EMPTY_INFO_SIZE; public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; size += key(cfk.key()); - size += 2 * ObjectSizes.sizeOfReferenceArray(cfk.size()); - size += cfk.size() * TIMESTAMP_SIZE; + size += ObjectSizes.sizeOfReferenceArray(cfk.size()); + size += cfk.size() * EMPTY_INFO_SIZE; for (int i = 0 ; i < cfk.size() ; ++i) { - Info info = cfk.info(i); - if (info.getClass() == CommandsForKey.NoInfo.class) - continue; - - size += EMPTY_INFO_SIZE; - if (info.missing.length > 0) - size += ObjectSizes.sizeOfReferenceArray(info.missing.length); + TxnInfo info = cfk.get(i); + if (info.getClass() != TxnInfoWithMissing.class) continue; + TxnInfoWithMissing infoWithMissing = (TxnInfoWithMissing) info; + if (infoWithMissing.missing.length > 0) + { + size += EMPTY_INFO_WITH_MISSING_ADDITIONAL_SIZE; + size += ObjectSizes.sizeOfReferenceArray(infoWithMissing.missing.length); + size += infoWithMissing.missing.length * TIMESTAMP_SIZE; + } } return size; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index c1b09ccb270a..1c497cc5c94f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -31,8 +31,7 @@ import accord.api.Key; import accord.api.ProgressLog; import accord.impl.AbstractSafeCommandStore; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKeys; +import accord.local.CommandsForKey; import accord.impl.CommandsSummary; import accord.local.Command; import accord.local.CommandStores.RangesForEpoch; @@ -41,7 +40,6 @@ import accord.primitives.AbstractKeys; import accord.primitives.Deps; import accord.primitives.Ranges; -import accord.primitives.RoutableKey; import accord.primitives.Routables; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -53,16 +51,16 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final NavigableMap commandsForKeys; - private final NavigableMap timestampsForKeys; + private final NavigableMap commandsForKeys; + private final NavigableMap timestampsForKeys; private final AccordCommandStore commandStore; private final RangesForEpoch ranges; CommandsForRanges.Updater rangeUpdates = null; public AccordSafeCommandStore(PreLoadContext context, Map commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, + NavigableMap timestampsForKey, + NavigableMap commandsForKey, AccordCommandStore commandStore) { super(context); @@ -94,7 +92,7 @@ protected AccordSafeCommand getIfLoaded(TxnId txnId) } @Override - protected AccordSafeCommandsForKey getCommandsForKeyInternal(RoutableKey key) + protected AccordSafeCommandsForKey getCommandsForKeyInternal(Key key) { return commandsForKeys.get(key); } @@ -106,7 +104,7 @@ protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) } @Override - protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(RoutableKey key) + protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(Key key) { AccordSafeCommandsForKey cfk = commandStore.commandsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); @@ -114,7 +112,7 @@ protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(RoutableKey key) } @Override - protected AccordSafeTimestampsForKey getTimestampsForKeyInternal(RoutableKey key) + protected AccordSafeTimestampsForKey getTimestampsForKeyInternal(Key key) { return timestampsForKeys.get(key); } @@ -126,7 +124,7 @@ protected void addTimestampsForKeyInternal(AccordSafeTimestampsForKey cfk) } @Override - protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(RoutableKey key) + protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(Key key) { AccordSafeTimestampsForKey cfk = commandStore.timestampsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); @@ -177,14 +175,14 @@ public void registerHistoricalTransactions(Deps deps) Ranges allRanges = ranges.all(); deps.keyDeps.keys().forEach(allRanges, key -> { // TODO (now): batch register to minimise GC - deps.keyDeps.forEach(key, txnId -> { + deps.keyDeps.forEach(key, (txnId, txnIdx) -> { // TODO (desired, efficiency): this can be made more efficient by batching by epoch if (ranges.coordinates(txnId).contains(key)) return; // already coordinates, no need to replicate if (!ranges.allBefore(txnId.epoch()).contains(key)) return; - CommandsForKeys.registerNotWitnessed(this, key, txnId); + get(key).registerHistorical(this, txnId); }); }); CommandsForRanges commandsForRanges = commandStore.commandsForRanges(); @@ -221,7 +219,7 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio for (Key key : keys) { if (!slice.contains(key)) continue; - CommandsForKey commands = commandsForKey(key).current(); + CommandsForKey commands = get(key).current(); accumulate = map.apply(commands, accumulate); } } @@ -233,11 +231,11 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); if (!context.keys().slice(slice, Routables.Slice.Minimal).containsAll(sliced)) throw new AssertionError("Range(s) detected not present in the PreLoadContext: expected " + context.keys() + " but given " + keysOrRanges); - for (RoutableKey key : timestampsForKeys.keySet()) + for (Key key : commandsForKeys.keySet()) { //TODO (duplicate code): this is a repeat of Key... only change is checking contains in range if (!sliced.contains(key)) continue; - CommandsForKey commands = commandsForKey(key).current(); + CommandsForKey commands = get(key).current(); accumulate = map.apply(commands, accumulate); } } @@ -263,22 +261,20 @@ public T mapReduceFull(Seekables keysOrRanges, Ranges slice, TxnId } @Override - protected void update(Command prev, Command updated, @Nullable Seekables keysOrRanges) + protected void update(Command prev, Command updated) { - super.update(prev, updated, keysOrRanges); + super.update(prev, updated); if (updated.txnId().domain() == Range && CommandsForKey.needsUpdate(prev, updated)) { + Seekables keysOrRanges = updated.keysOrRanges(); + if (keysOrRanges == null) keysOrRanges = prev.keysOrRanges(); if (keysOrRanges == null) - { - if (updated.known().isDefinitionKnown()) keysOrRanges = updated.partialTxn().keys(); - else if (prev.known().isDefinitionKnown()) keysOrRanges = prev.partialTxn().keys(); - else return; - } - List waitingOn; + return; - if (updated.partialDeps() == null) waitingOn = Collections.emptyList(); + List waitingOn; // TODO (required): this is faulty: we cannot simply save the raw transaction ids, as they may be for other ranges + if (updated.partialDeps() == null) waitingOn = Collections.emptyList(); else waitingOn = updated.partialDeps().txnIds(); updateRanges().put(updated.txnId(), (Ranges)keysOrRanges, updated.saveStatus(), updated.executeAt(), waitingOn); } @@ -300,8 +296,8 @@ protected void invalidateSafeState() } public void postExecute(Map commands, - Map timestampsForKey, - Map commandsForKeys + Map timestampsForKey, + Map commandsForKeys ) { postExecute(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 748143f33363..808b4d4bc15a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -23,18 +23,17 @@ import com.google.common.annotations.VisibleForTesting; import accord.api.Key; -import accord.impl.CommandsForKey; -import accord.impl.SafeCommandsForKey; -import accord.primitives.RoutableKey; +import accord.local.CommandsForKey; +import accord.local.SafeCommandsForKey; -public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState +public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { private boolean invalidated; - private final AccordCachingState global; + private final AccordCachingState global; private CommandsForKey original; private CommandsForKey current; - public AccordSafeCommandsForKey(AccordCachingState global) + public AccordSafeCommandsForKey(AccordCachingState global) { super((Key) global.key()); this.global = global; @@ -83,7 +82,7 @@ public boolean hasUpdate() } @Override - public AccordCachingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java index b742efb9d103..374968bcfb7d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java @@ -25,6 +25,8 @@ public interface AccordSafeState extends SafeState { void set(V update); V original(); + void invalidate(); + boolean invalidated(); void preExecute(); void postExecute(); AccordCachingState global(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java index b5b44a770380..a4c48c83e68d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java @@ -26,17 +26,16 @@ import accord.api.Key; import accord.impl.SafeTimestampsForKey; import accord.impl.TimestampsForKey; -import accord.primitives.RoutableKey; import accord.primitives.Timestamp; -public class AccordSafeTimestampsForKey extends SafeTimestampsForKey implements AccordSafeState +public class AccordSafeTimestampsForKey extends SafeTimestampsForKey implements AccordSafeState { private boolean invalidated; - private final AccordCachingState global; + private final AccordCachingState global; private TimestampsForKey original; private TimestampsForKey current; - public AccordSafeTimestampsForKey(AccordCachingState global) + public AccordSafeTimestampsForKey(AccordCachingState global) { super((Key) global.key()); this.global = global; @@ -71,7 +70,7 @@ public String toString() } @Override - public AccordCachingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index e831aa1fbdb5..8378f65c91dd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -127,7 +127,7 @@ private enum State { INIT, STARTED, SHUTDOWN} private final AccordDataStore dataStore; private final AccordJournal journal; private final CoordinateDurabilityScheduling durabilityScheduling; - private final AccordVerbHandler verbHandler; + private final AccordVerbHandler requestHandler; private final LocalConfig configuration; @GuardedBy("this") private State state = State.INIT; @@ -296,7 +296,7 @@ private AccordService(Id localId) configuration); this.nodeShutdown = toShutdownable(node); this.durabilityScheduling = new CoordinateDurabilityScheduling(node); - this.verbHandler = new AccordVerbHandler<>(node, configService, journal); + this.requestHandler = new AccordVerbHandler<>(node, configService, journal); } @Override @@ -313,14 +313,14 @@ public synchronized void startup() durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); durabilityScheduling.setFrequency(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityFrequency(SECONDS)), SECONDS); - durabilityScheduling.start(); +// durabilityScheduling.start(); state = State.STARTED; } @Override public IVerbHandler verbHandler() { - return verbHandler; + return requestHandler; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index ecfa9b0626cf..75a069b03a7d 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -41,7 +41,7 @@ import accord.api.Key; import accord.api.RoutingKey; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.impl.CommandsSummary; import accord.local.Command; import accord.local.SaveStatus; @@ -64,7 +64,6 @@ import static accord.local.SafeCommandStore.*; import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; import static accord.local.SafeCommandStore.TestDep.WITH; -import static accord.local.SafeCommandStore.TestStartedAt.ANY; import static accord.local.SafeCommandStore.TestStartedAt.STARTED_BEFORE; import static accord.local.SafeCommandStore.TestStatus.ANY_STATUS; import static accord.local.Status.Stable; @@ -379,11 +378,13 @@ protected CommandsSummary computeNext() private static Range toRange(Interval interval) { - TokenKey start = (TokenKey) interval.min; - TokenKey end = (TokenKey) interval.max; + AccordRoutingKey start = (AccordRoutingKey) interval.min; + if (!(start instanceof AccordRoutingKey.SentinelKey)) + start = new TokenKey(start.table(), start.token().decreaseSlightly()); + AccordRoutingKey end = (AccordRoutingKey) interval.max; // TODO (required, correctness) : accord doesn't support wrap around, so decreaseSlightly may fail in some cases // TODO (required, correctness) : this logic is mostly used for testing, so is it actually safe for all partitioners? - return new TokenRange(start.withToken(start.token().decreaseSlightly()), end); + return new TokenRange(start, end); } @Nullable diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index f9ab58387d75..33f8f2b088d1 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -117,6 +117,9 @@ public void onHandledException(Throwable t) public boolean isExpired(TxnId initiated, long now) { // TODO: should distinguish between reads and writes + if (initiated.kind().isSyncPoint()) + return false; + return now - initiated.hlc() > getReadRpcTimeout(MICROSECONDS); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index b8494e7d4492..4b7607a36bbe 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -33,12 +33,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.Key; import accord.api.RoutingKey; import accord.local.KeyHistory; import accord.local.PreLoadContext; import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.RoutableKey; import accord.primitives.Seekables; import accord.primitives.TxnId; import accord.utils.Invariants; @@ -121,7 +121,7 @@ private static > void referenceAndAssemble } } - private void referenceAndAssembleReadsForKey(RoutableKey key, + private void referenceAndAssembleReadsForKey(Key key, AsyncOperation.Context context, List> listenChains) { @@ -157,7 +157,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) { case Key: // cast to Keys fails... - Iterable keys = (Iterable) keysOrRanges; + Iterable keys = (Iterable) keysOrRanges; keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); break; case Range: @@ -172,7 +172,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) { - AsyncChain> overlappingKeys = findOverlappingKeys((Ranges) keysOrRanges); + AsyncChain> overlappingKeys = findOverlappingKeys((Ranges) keysOrRanges); return overlappingKeys.flatMap(keys -> { if (keys.isEmpty()) @@ -183,14 +183,14 @@ private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context c }, commandStore); } - private AsyncChain> findOverlappingKeys(Ranges ranges) + private AsyncChain> findOverlappingKeys(Ranges ranges) { Invariants.checkArgument(!ranges.isEmpty()); List>> chains = new ArrayList<>(ranges.size()); for (Range range : ranges) chains.add(findOverlappingKeys(range)); - return AsyncChains.reduce(chains, (a, b) -> ImmutableSet.builder().addAll(a).addAll(b).build()); + return AsyncChains.reduce(chains, (a, b) -> ImmutableSet.builder().addAll(a).addAll(b).build()); } private AsyncChain> findOverlappingKeys(Range range) diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index bef57bcf0b12..f0c53e33d7c1 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -30,6 +30,7 @@ import org.slf4j.LoggerFactory; import org.slf4j.MDC; +import accord.api.Key; import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; @@ -67,8 +68,8 @@ private static class LoggingProps static class Context { final HashMap commands = new HashMap<>(); - final TreeMap timestampsForKey = new TreeMap<>(); - final TreeMap commandsForKey = new TreeMap<>(); + final TreeMap timestampsForKey = new TreeMap<>(); + final TreeMap commandsForKey = new TreeMap<>(); void releaseResources(AccordCommandStore commandStore) { diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java index 7294dd2696fc..49c06e811c72 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -132,7 +132,6 @@ public ApplyReply apply(SafeCommandStore safeStore) case PreCommitted: case Committed: case PreApplied: - case ReadyToExecute: synchronized (this) { waitingOn.set(safeStore.commandStore().id()); @@ -249,7 +248,6 @@ public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) case PreCommitted: case Committed: case PreApplied: - case ReadyToExecute: return; case Applied: diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index d5144cbe8da1..dbe2f4845f30 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -24,10 +24,10 @@ import com.google.common.primitives.Ints; import accord.api.Key; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKey.Info; -import accord.impl.CommandsForKey.InternalStatus; -import accord.impl.CommandsForKey.NoInfo; +import accord.local.CommandsForKey; +import accord.local.CommandsForKey.TxnInfo; +import accord.local.CommandsForKey.InternalStatus; +import accord.local.CommandsForKey.Unmanaged; import accord.local.Node; import accord.primitives.Routable.Domain; import accord.primitives.Timestamp; @@ -35,8 +35,10 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.utils.vint.VIntCoding; +import static accord.local.CommandsForKey.NO_PENDING_UNMANAGED; import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; import static accord.primitives.Txn.Kind.Read; import static accord.primitives.Txn.Kind.Write; @@ -104,23 +106,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { int commandCount = cfk.size(); if (commandCount == 0) - { - // TODO (expected): we should not need to special-case, but best solution here is not to store redundantBefore; - // but this requires some modest deeper changes, so for now special-case serialization when empty - Timestamp redundantBefore = cfk.redundantBefore(); - ByteBuffer out = ByteBuffer.allocate(TypeSizes.sizeofUnsignedVInt(0) + - TypeSizes.sizeofUnsignedVInt(redundantBefore.epoch()) + - TypeSizes.sizeofUnsignedVInt(redundantBefore.hlc()) + - TypeSizes.sizeofUnsignedVInt(redundantBefore.flags()) + - TypeSizes.sizeofUnsignedVInt(redundantBefore.node.id)); - VIntCoding.writeUnsignedVInt32(0, out); - VIntCoding.writeUnsignedVInt(redundantBefore.epoch(), out); - VIntCoding.writeUnsignedVInt(redundantBefore.hlc(), out); - VIntCoding.writeUnsignedVInt32(redundantBefore.flags(), out); - VIntCoding.writeUnsignedVInt32(redundantBefore.node.id, out); - out.flip(); - return out; - } + return ByteBuffer.allocate(1); int[] nodeIds = cachedInts().getInts(Math.min(64, commandCount)); try @@ -129,11 +115,9 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) // whether we have any missing transactions to encode, any executeAt that are not equal to their TxnId // and whether there are any non-standard flag bits to encode boolean hasNonStandardFlags = false; - int nodeIdCount, missingIdCount = 0, executeAtCount = 0, bitsPerExecuteAtFlags = 0; + int nodeIdCount = 0, missingIdCount = 0, executeAtCount = 0, bitsPerExecuteAtFlags = 0; int bitsPerExecuteAtEpochDelta = 0, bitsPerExecuteAtHlcDelta = 1; // to permit us to use full 64 bits and encode in 5 bits we force at least one hlc bit { - nodeIdCount = 1; - nodeIds[0] = cfk.redundantBefore().node.id; for (int i = 0 ; i < commandCount ; ++i) { if (nodeIdCount + 1 >= nodeIds.length) @@ -143,24 +127,19 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) nodeIds = cachedInts().resize(nodeIds, nodeIds.length, nodeIds.length * 2); } - TxnId txnId = cfk.txnId(i); - Info info = cfk.info(i); + TxnInfo txn = cfk.get(i); - hasNonStandardFlags |= txnIdFlags(txnId) != STANDARD; - nodeIds[nodeIdCount++] = txnId.node.id; + hasNonStandardFlags |= txnIdFlags(txn) != STANDARD; + nodeIds[nodeIdCount++] = txn.node.id; - if (info.getClass() == NoInfo.class) + missingIdCount += txn.missing().length; + if (txn.executeAt == txn) continue; - missingIdCount += info.missing.length; - - if (info.executeAt == txnId) - continue; - - nodeIds[nodeIdCount++] = info.executeAt.node.id; - bitsPerExecuteAtEpochDelta = Math.max(bitsPerExecuteAtEpochDelta, numberOfBitsToRepresent(info.executeAt.epoch() - txnId.epoch())); - bitsPerExecuteAtHlcDelta = Math.max(bitsPerExecuteAtHlcDelta, numberOfBitsToRepresent(info.executeAt.hlc() - txnId.hlc())); - bitsPerExecuteAtFlags = Math.max(bitsPerExecuteAtFlags, numberOfBitsToRepresent(info.executeAt.flags())); + nodeIds[nodeIdCount++] = txn.executeAt.node.id; + bitsPerExecuteAtEpochDelta = Math.max(bitsPerExecuteAtEpochDelta, numberOfBitsToRepresent(txn.executeAt.epoch() - txn.epoch())); + bitsPerExecuteAtHlcDelta = Math.max(bitsPerExecuteAtHlcDelta, numberOfBitsToRepresent(txn.executeAt.hlc() - txn.hlc())); + bitsPerExecuteAtFlags = Math.max(bitsPerExecuteAtFlags, numberOfBitsToRepresent(txn.executeAt.flags())); executeAtCount += 1; } nodeIdCount = compact(nodeIds); @@ -175,8 +154,8 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int maxHeaderBits = minHeaderBits; int totalBytes = 0; - long prevEpoch = cfk.redundantBefore().epoch(); - long prevHlc = cfk.redundantBefore().hlc(); + long prevEpoch = cfk.get(0).epoch(); + long prevHlc = cfk.get(0).hlc(); int[] bytesHistogram = cachedInts().getInts(12); Arrays.fill(bytesHistogram, 0); for (int i = 0 ; i < commandCount ; ++i) @@ -214,7 +193,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) if (hasNonStandardFlags && txnIdFlags(txnId) == RAW) totalBytes += 2; - Info info = cfk.info(i); + TxnInfo info = cfk.get(i); if (info.status.hasInfo) headerBits += infoHeaderBits; maxHeaderBits = Math.max(headerBits, maxHeaderBits); @@ -247,9 +226,11 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) // then pick third number as 75th %ile, but at least 1 less than highest, and one more than second // finally, ensure third then second are distributed so that there is no more than a gap of 4 between them and the next int l0 = Math.max(0, Math.min(3, minBasicBytes - headerBytes)); - int l1 = Math.max(l0+1, Math.min(l0+4,Arrays.binarySearch(bytesHistogram, commandCount/4) - headerBytes)); + int l1 = Arrays.binarySearch(bytesHistogram, minBasicBytes, maxBasicBytes, commandCount/4); + l1 = Math.max(l0+1, Math.min(l0+4, (l1 < 0 ? -1 - l1 : l1) - headerBytes)); int l3 = Math.max(l1+2, maxBasicBytes - headerBytes); - int l2 = Math.max(l1+1, Math.min(l3-1, Arrays.binarySearch(bytesHistogram, (3*commandCount)/4) - headerBytes)); + int l2 = Arrays.binarySearch(bytesHistogram, minBasicBytes, maxBasicBytes,(3*commandCount)/4); + l2 = Math.max(l1+1, Math.min(l3-1, (l2 < 0 ? -1 -l2 : l2) - headerBytes)); while (l3-l2 > 4) ++l2; while (l2-l1 > 4) ++l1; hlcBytesLookup = setHlcBytes(l0, l1, l2, l3); @@ -267,16 +248,13 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) totalBytes += TypeSizes.sizeofUnsignedVInt(nodeIds[i] - nodeIds[i-1]); totalBytes += 2; - Arrays.fill(bytesHistogram, minBasicBytes, maxBasicBytes + 1, 0); cachedInts().forceDiscard(bytesHistogram); - prevEpoch = cfk.redundantBefore().epoch(); - prevHlc = cfk.redundantBefore().hlc(); + prevEpoch = cfk.get(0).epoch(); + prevHlc = cfk.get(0).hlc(); // account for encoding redundantBefore totalBytes += TypeSizes.sizeofUnsignedVInt(prevEpoch); totalBytes += TypeSizes.sizeofUnsignedVInt(prevHlc); - totalBytes += 2; // flags TODO (expected): pack this along with uniqueIdBits, as usually zero bits should be needed - totalBytes += (bitsPerNodeId+7)/8; if (missingIdCount + executeAtCount > 0) { @@ -291,6 +269,20 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) totalBytes += 2; } + // count unmanaged bytes + int unmanagedPendingCommitCount = 0; + for (int i = 0 ; i < cfk.unmanagedCount() ; ++i) + { + Unmanaged unmanaged = cfk.getUnmanaged(i); + if (unmanaged.pending == Unmanaged.Pending.COMMIT) + ++unmanagedPendingCommitCount; + totalBytes += CommandSerializers.txnId.serializedSize(); + // TODO (desired): this could be more efficient, e.g. referencing one of the TxnInfo indexes for timestamp + totalBytes += CommandSerializers.timestamp.serializedSize(); + } + totalBytes += TypeSizes.sizeofUnsignedVInt(unmanagedPendingCommitCount); + totalBytes += TypeSizes.sizeofUnsignedVInt(cfk.unmanagedCount() - unmanagedPendingCommitCount); + ByteBuffer out = ByteBuffer.allocate(totalBytes); VIntCoding.writeUnsignedVInt32(commandCount, out); VIntCoding.writeUnsignedVInt32(nodeIdCount, out); @@ -301,8 +293,6 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) VIntCoding.writeUnsignedVInt(prevEpoch, out); VIntCoding.writeUnsignedVInt(prevHlc, out); - out.putShort((short) cfk.redundantBefore().flags()); - writeLeastSignificantBytes(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id), (bitsPerNodeId+7)/8, out); int executeAtMask = executeAtCount > 0 ? 1 : 0; int missingDepsMask = missingIdCount > 0 ? 1 : 0; @@ -311,7 +301,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) for (int i = 0 ; i < commandCount ; ++i) { TxnId txnId = cfk.txnId(i); - Info info = cfk.info(i); + TxnInfo info = cfk.get(i); InternalStatus status = info.status; long bits = status.ordinal(); @@ -322,7 +312,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) bits |= hasExecuteAt << bitIndex; bitIndex += statusHasInfo & executeAtMask; - long hasMissingIds = info.missing != CommandsForKey.NO_TXNIDS ? 1 : 0; + long hasMissingIds = info.missing() != CommandsForKey.NO_TXNIDS ? 1 : 0; bits |= hasMissingIds << bitIndex; bitIndex += statusHasInfo & missingDepsMask; @@ -392,6 +382,20 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } } + VIntCoding.writeUnsignedVInt32(unmanagedPendingCommitCount, out); + VIntCoding.writeUnsignedVInt32(cfk.unmanagedCount() - unmanagedPendingCommitCount, out); + Unmanaged.Pending pending = unmanagedPendingCommitCount == 0 ? Unmanaged.Pending.APPLY : Unmanaged.Pending.COMMIT; + for (int i = 0 ; i < cfk.unmanagedCount() ; ++i) + { + Unmanaged unmanaged = cfk.getUnmanaged(i); + Invariants.checkState(unmanaged.pending == pending); + CommandSerializers.txnId.serialize(unmanaged.txnId, out, ByteBufferAccessor.instance, out.position()); + out.position(out.position() + CommandSerializers.txnId.serializedSize()); + CommandSerializers.timestamp.serialize(unmanaged.waitingUntil, out, ByteBufferAccessor.instance, out.position()); + out.position(out.position() + CommandSerializers.timestamp.serializedSize()); + if (--unmanagedPendingCommitCount == 0) pending = Unmanaged.Pending.APPLY; + } + if ((executeAtCount | missingIdCount) > 0) { int bitsPerCommandId = numberOfBitsToRepresent(commandCount); @@ -407,21 +411,17 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) for (int i = 0 ; i < commandCount ; ++i) { - Info info = cfk.info(i); - if (info.getClass() == NoInfo.class) - continue; - - TxnId txnId = cfk.txnId(i); - if (info.executeAt != txnId) + TxnInfo txn = cfk.get(i); + if (txn.executeAt != txn) { - Timestamp executeAt = info.executeAt; + Timestamp executeAt = txn.executeAt; int nodeIdx = Arrays.binarySearch(nodeIds, 0, nodeIdCount, executeAt.node.id); if (bitsPerExecuteAt <= 64) { - Invariants.checkState(executeAt.epoch() >= txnId.epoch()); - long executeAtBits = executeAt.epoch() - txnId.epoch(); + Invariants.checkState(executeAt.epoch() >= txn.epoch()); + long executeAtBits = executeAt.epoch() - txn.epoch(); int offset = bitsPerExecuteAtEpochDelta; - executeAtBits |= (executeAt.hlc() - txnId.hlc()) << offset ; + executeAtBits |= (executeAt.hlc() - txn.hlc()) << offset ; offset += bitsPerExecuteAtHlcDelta; executeAtBits |= ((long)executeAt.flags()) << offset; offset += bitsPerExecuteAtFlags; @@ -431,9 +431,9 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } else { - buffer = flushBits(buffer, bufferCount, executeAt.epoch() - txnId.epoch(), bitsPerExecuteAtEpochDelta, out); + buffer = flushBits(buffer, bufferCount, executeAt.epoch() - txn.epoch(), bitsPerExecuteAtEpochDelta, out); bufferCount = (bufferCount + bitsPerExecuteAtEpochDelta) & 63; - buffer = flushBits(buffer, bufferCount, executeAt.hlc() - txnId.hlc(), bitsPerExecuteAtHlcDelta, out); + buffer = flushBits(buffer, bufferCount, executeAt.hlc() - txn.hlc(), bitsPerExecuteAtHlcDelta, out); bufferCount = (bufferCount + bitsPerExecuteAtHlcDelta) & 63; buffer = flushBits(buffer, bufferCount, executeAt.flags(), bitsPerExecuteAtFlags, out); bufferCount = (bufferCount + bitsPerExecuteAtFlags) & 63; @@ -442,16 +442,17 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } } - if (info.missing.length > 0) + TxnId[] missing = txn.missing(); + if (missing.length > 0) { int j = 0; - while (j < info.missing.length - 1) + while (j < missing.length - 1) { - int missingId = cfk.indexOf(info.missing[j++]); + int missingId = cfk.indexOf(missing[j++]); buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); bufferCount = (bufferCount + bitsPerMissingId) & 63; } - int missingId = cfk.indexOf(info.missing[info.missing.length - 1]); + int missingId = cfk.indexOf(missing[missing.length - 1]); missingId |= 1L << bitsPerCommandId; buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); bufferCount = (bufferCount + bitsPerMissingId) & 63; @@ -461,6 +462,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) writeMostSignificantBytes(buffer, (bufferCount + 7)/8, out); } + Invariants.checkState(!out.hasRemaining()); out.flip(); return out; } @@ -494,16 +496,10 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) in = in.duplicate(); int commandCount = VIntCoding.readUnsignedVInt32(in); if (commandCount == 0) - { - long epoch = VIntCoding.readUnsignedVInt(in); - long hlc = VIntCoding.readUnsignedVInt(in); - int flags = VIntCoding.readUnsignedVInt32(in); - Node.Id id = new Node.Id(VIntCoding.readUnsignedVInt32(in)); - return new CommandsForKey(key).withoutRedundant(TxnId.fromValues(epoch, hlc, flags, id)); - } + return new CommandsForKey(key); - TxnId[] txnIds = new TxnId[commandCount]; - Info[] infos = new Info[commandCount]; + TxnId[] txnIds = cachedTxnIds().get(commandCount); + TxnInfo[] txns = new TxnInfo[commandCount]; int nodeIdCount = VIntCoding.readUnsignedVInt32(in); int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); long nodeIdMask = (1L << bitsPerNodeId) - 1; @@ -526,12 +522,8 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) hlcBytesLookup = setHlcByteDeltas((flags >>> 5) & 0x3, (flags >>> 7) & 0x3, (flags >>> 9) & 0x3, (flags >>> 11) & 0x3); } - long prevEpoch = VIntCoding.readUnsignedVInt32(in); - long prevHlc = VIntCoding.readUnsignedVInt32(in); - TxnId redundantBefore = TxnId.fromValues(prevEpoch, prevHlc, in.getShort(), - nodeIds[(int)readLeastSignificantBytes((bitsPerNodeId+7)/8, in)]); - - + long prevEpoch = VIntCoding.readUnsignedVInt(in); + long prevHlc = VIntCoding.readUnsignedVInt(in); for (int i = 0 ; i < commandCount ; ++i) { long header = readLeastSignificantBytes(headerByteCount, in); @@ -601,12 +593,34 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) : TxnId.fromValues(epoch, hlc, flags, node); txnIds[i] = txnId; - infos[i] = DECODE_INFOS[(executeAtInfoOffset | missingDepsInfoOffset)*STATUS_COUNT + status.ordinal()]; + txns[i] = DECODE_INFOS[(executeAtInfoOffset | missingDepsInfoOffset)*STATUS_COUNT + status.ordinal()]; prevEpoch = epoch; prevHlc = hlc; } + int unmanagedPendingCommitCount = VIntCoding.readUnsignedVInt32(in); + int unmanagedCount = unmanagedPendingCommitCount + VIntCoding.readUnsignedVInt32(in); + Unmanaged[] unmanageds; + if (unmanagedCount == 0) + { + unmanageds = NO_PENDING_UNMANAGED; + } + else + { + unmanageds = new Unmanaged[unmanagedCount]; + Unmanaged.Pending pending = unmanagedPendingCommitCount == 0 ? Unmanaged.Pending.APPLY : Unmanaged.Pending.COMMIT; + for (int i = 0 ; i < unmanagedCount ; ++i) + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, ByteBufferAccessor.instance, in.position()); + in.position(in.position() + CommandSerializers.txnId.serializedSize()); + Timestamp waitingUntil = CommandSerializers.timestamp.deserialize(in, ByteBufferAccessor.instance, in.position()); + in.position(in.position() + CommandSerializers.timestamp.serializedSize()); + unmanageds[i] = new Unmanaged(pending, txnId, waitingUntil); + if (--unmanagedPendingCommitCount == 0) pending = Unmanaged.Pending.APPLY; + } + } + if (executeAtMasks + missingDepsMasks > 0) { TxnId[] missingIdBuffer = cachedTxnIds().get(8); @@ -630,13 +644,10 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) for (int i = 0 ; i < commandCount ; ++i) { - Info info = infos[i]; - if (info.getClass() == NoInfo.class) - continue; - TxnId txnId = txnIds[i]; - Timestamp executeAt = txnId; - if (info.executeAt == null) + TxnInfo placeholder = txns[i]; + Timestamp executeAt; + if (placeholder.executeAt == null) { long epoch, hlc; int flags; @@ -661,8 +672,12 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) } executeAt = Timestamp.fromValues(epoch, hlc, flags, id); } + else + { + executeAt = txnId; + } - TxnId[] missing = info.missing; + TxnId[] missing = placeholder.missing(); if (missing == null) { int prev = -1; @@ -684,13 +699,19 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) missingIdCount = 0; } - infos[i] = Info.create(txnId, info.status, executeAt, missing); + txns[i] = TxnInfo.create(txnId, placeholder.status, executeAt, missing); } cachedTxnIds().forceDiscard(missingIdBuffer, maxIdBufferCount); } + else + { + for (int i = 0 ; i < commandCount ; ++i) + txns[i] = TxnInfo.create(txnIds[i], txns[i].status, txnIds[i]); + } + cachedTxnIds().forceDiscard(txnIds, commandCount); - return CommandsForKey.SerializerSupport.create(key, redundantBefore, txnIds, infos); + return CommandsForKey.SerializerSupport.create(key, txns, unmanageds); } private static int getHlcBytes(int lookup, int index) @@ -833,16 +854,16 @@ private static long txnIdFlagsBits(TxnId txnId) private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { Read, Write, ExclusiveSyncPoint, null }; private static final int STATUS_COUNT = InternalStatus.values().length; - private static final Info[] DECODE_INFOS = new Info[4 * STATUS_COUNT]; + private static final TxnInfo[] DECODE_INFOS = new TxnInfo[4 * STATUS_COUNT]; static { for (InternalStatus status : InternalStatus.values()) { int ordinal = status.ordinal(); - DECODE_INFOS[ordinal] = status.asNoInfo; - DECODE_INFOS[STATUS_COUNT+ordinal] = Info.createMock(status, Timestamp.NONE, null); - DECODE_INFOS[2*STATUS_COUNT+ordinal] = Info.createMock(status, null, CommandsForKey.NO_TXNIDS); - DECODE_INFOS[3*STATUS_COUNT+ordinal] = Info.createMock(status, null, null); + DECODE_INFOS[ordinal] = TxnInfo.createMock(TxnId.NONE, status, TxnId.NONE, CommandsForKey.NO_TXNIDS); + DECODE_INFOS[STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, TxnId.NONE, null); + DECODE_INFOS[2*STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, null, CommandsForKey.NO_TXNIDS); + DECODE_INFOS[3*STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, null, null); } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 930807d7f021..3efb9e2c6c47 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -22,40 +22,63 @@ import java.nio.ByteBuffer; import accord.local.Command.WaitingOn; -import accord.primitives.Deps; +import accord.primitives.Keys; +import accord.primitives.Routable; +import accord.primitives.TxnId; import accord.utils.ImmutableBitSet; import accord.utils.Invariants; import accord.utils.SimpleBitSet; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.vint.VIntCoding; public class WaitingOnSerializer { - public static void serialize(WaitingOn waitingOn, DataOutputPlus out) throws IOException + public static void serialize(TxnId txnId, WaitingOn waitingOn, DataOutputPlus out) throws IOException { - // TODO (expected): use run length encoding; we know that at most 1/3rd of bits will be set between the three bitsets - int length = (waitingOn.deps.txnIdCount() + 63) / 64; - serialize(length, waitingOn.waitingOnCommit, out); - serialize(length, waitingOn.waitingOnApply, out); - serialize(length, waitingOn.appliedOrInvalidated, out); + out.writeUnsignedVInt32(waitingOn.keys.size()); + out.writeUnsignedVInt32(waitingOn.txnIds.size()); + int keyCount = waitingOn.keys.size(); + int txnIdCount = waitingOn.txnIds.size(); + int waitingOnLength = (txnIdCount + keyCount + 63) / 64; + serialize(waitingOnLength, waitingOn.waitingOn, out); + if (txnId.domain() == Routable.Domain.Range) + { + int appliedOrInvalidatedLength = (txnIdCount + 63) / 64; + serialize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated, out); + } } - public static WaitingOn deserialize(Deps deps, DataInputPlus in) throws IOException + public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, DataInputPlus in) throws IOException { - int length = (deps.txnIdCount() + 63) / 64; - ImmutableBitSet waitingOnCommit = deserialize(length, in); - ImmutableBitSet waitingOnApply = deserialize(length, in); - ImmutableBitSet appliedOrInvalidated = deserialize(length, in); - return new WaitingOn(deps, waitingOnCommit, waitingOnApply, appliedOrInvalidated); + int a = in.readUnsignedVInt32(); + int b = in.readUnsignedVInt32(); + int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; + ImmutableBitSet waitingOn = deserialize(waitingOnLength, in); + ImmutableBitSet appliedOrInvalidated = null; + if (txnId.domain() == Routable.Domain.Range) + { + int appliedOrInvalidatedLength = (txnIds.size() + 63) / 64; + appliedOrInvalidated = deserialize(appliedOrInvalidatedLength, in); + } + return new WaitingOn(keys, txnIds, waitingOn, appliedOrInvalidated); } public static long serializedSize(WaitingOn waitingOn) { - int length = (waitingOn.deps.txnIdCount() + 63) / 64; - return serializedSize(length, waitingOn.waitingOnCommit) - + serializedSize(length, waitingOn.waitingOnApply) - + serializedSize(length, waitingOn.appliedOrInvalidated); + int keyCount = waitingOn.keys.size(); + int txnIdCount = waitingOn.txnIds.size(); + int waitingOnLength = (txnIdCount + keyCount + 63) / 64; + long size = serializedSize(waitingOnLength, waitingOn.waitingOn); + size += TypeSizes.sizeofUnsignedVInt(keyCount); + size += TypeSizes.sizeofUnsignedVInt(txnIdCount); + if (waitingOn.appliedOrInvalidated == null) + return size; + + int appliedOrInvalidatedLength = (txnIdCount + 63) / 64; + return size + serializedSize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated); } private static void serialize(int length, SimpleBitSet write, DataOutputPlus out) throws IOException @@ -81,14 +104,23 @@ public static long serializedSize(int length, SimpleBitSet write) return (long) TypeSizes.LONG_SIZE * length; } - public static ByteBuffer serialize(WaitingOn waitingOn) throws IOException + public static ByteBuffer serialize(TxnId txnId, WaitingOn waitingOn) throws IOException { - int length = (waitingOn.deps.txnIdCount() + 63) / 64; - ByteBuffer out = ByteBuffer.allocate(TypeSizes.LONG_SIZE * length * 3); - serialize(length, waitingOn.waitingOnCommit, out); - serialize(length, waitingOn.waitingOnApply, out); - serialize(length, waitingOn.appliedOrInvalidated, out); - return (ByteBuffer) out.flip(); + int keyCount = waitingOn.keys.size(); + int txnIdCount = waitingOn.txnIds.size(); + int waitingOnLength = (txnIdCount + keyCount + 63) / 64; + int appliedOrInvalidatedLength = 0; + if (txnId.domain() == Routable.Domain.Range) + appliedOrInvalidatedLength = (txnIdCount + 63) / 64; + + ByteBuffer out = ByteBuffer.allocate(TypeSizes.sizeofUnsignedVInt(keyCount) + TypeSizes.sizeofUnsignedVInt(txnIdCount) + + TypeSizes.LONG_SIZE * (waitingOnLength + appliedOrInvalidatedLength)); + VIntCoding.writeUnsignedVInt32(keyCount, out); + VIntCoding.writeUnsignedVInt32(txnIdCount, out); + serialize(waitingOnLength, waitingOn.waitingOn, out); + if (appliedOrInvalidatedLength > 0) + serialize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated, out); + return out.flip(); } private static void serialize(int length, SimpleBitSet write, ByteBuffer out) @@ -99,16 +131,23 @@ private static void serialize(int length, SimpleBitSet write, ByteBuffer out) out.putLong(bits[i]); } - public static WaitingOn deserialize(Deps deps, ByteBuffer in) throws IOException + public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, ByteBuffer in) throws IOException { - int length = (deps.txnIdCount() + 63) / 64; + int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; int position = in.position(); - ImmutableBitSet waitingOnCommit = deserialize(position, length, in); - position += length*8; - ImmutableBitSet waitingOnApply = deserialize(position, length, in); - position += length*8; - ImmutableBitSet appliedOrInvalidated = deserialize(position, length, in); - return new WaitingOn(deps, waitingOnCommit, waitingOnApply, appliedOrInvalidated); + int a = VIntCoding.readUnsignedVInt32(in, position); + position += TypeSizes.sizeofUnsignedVInt(a); + int b = VIntCoding.readUnsignedVInt32(in, position); + position += TypeSizes.sizeofUnsignedVInt(a); + ImmutableBitSet waitingOn = deserialize(position, waitingOnLength, in); + ImmutableBitSet appliedOrInvalidated = null; + if (txnId.domain() == Routable.Domain.Range) + { + position += waitingOnLength*8; + int appliedOrInvalidatedLength = (txnIds.size() + 63) / 64; + appliedOrInvalidated = deserialize(position, appliedOrInvalidatedLength, in); + } + return new WaitingOn(keys, txnIds, waitingOn, appliedOrInvalidated); } private static ImmutableBitSet deserialize(int position, int length, ByteBuffer in) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index ec85d63f2c27..5e3e9ff831ef 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -35,9 +35,10 @@ import org.slf4j.LoggerFactory; import accord.api.DataStore; +import accord.api.Key; import accord.api.Write; import accord.impl.AbstractSafeCommandStore; -import accord.impl.CommandsForKeys; +import accord.impl.TimestampsForKeys; import accord.impl.TimestampsForKey; import accord.local.SafeCommandStore; import accord.primitives.PartialTxn; @@ -375,7 +376,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc - TimestampsForKey cfk = CommandsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (RoutableKey) key, executeAt, true); + TimestampsForKey cfk = TimestampsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (Key) key, executeAt, true); long timestamp = AccordSafeTimestampsForKey.timestampMicrosFor(cfk, executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) int nowInSeconds = AccordSafeTimestampsForKey.nowInSecondsFor(cfk, executeAt, true); diff --git a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java index 8b52bb41e463..dc873f02100f 100644 --- a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java +++ b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java @@ -126,13 +126,27 @@ private static long readUnsignedVIntSlow(ByteBuffer in, byte firstByte) return retval; } + @DontInline + private static long readUnsignedVIntSlow(ByteBuffer in, int position, byte firstByte) + { + int size = numberOfExtraBytesToRead(firstByte); + long retval = firstByte & firstByteValueMask(size); + for (int ii = 0; ii < size; ii++) + { + byte b = in.get(position++); + retval <<= 8; + retval |= b & 0xff; + } + + return retval; + } + public static long readUnsignedVInt(ByteBuffer in) { byte firstByte = in.get(); if (firstByte >= 0) return firstByte; - int position = in.position(); int limit = in.limit(); if (limit - position < 8) @@ -155,6 +169,32 @@ public static long readUnsignedVInt(ByteBuffer in) return retval; } + public static long readUnsignedVInt(ByteBuffer in, int position) + { + byte firstByte = in.get(position++); + if (firstByte >= 0) + return firstByte; + + int limit = in.limit(); + if (limit - position < 8) + return readUnsignedVIntSlow(in, position, firstByte); + + int extraBytes = VIntCoding.numberOfExtraBytesToRead(firstByte); + int extraBits = extraBytes * 8; + + long retval = in.getLong(position); + if (in.order() == ByteOrder.LITTLE_ENDIAN) + retval = Long.reverseBytes(retval); + + // truncate the bytes we read in excess of those we needed + retval >>>= 64 - extraBits; + // remove the non-value bits from the first byte + firstByte &= VIntCoding.firstByteValueMask(extraBytes); + // shift the first byte up to its correct position + retval |= (long) firstByte << extraBits; + return retval; + } + public static void skipUnsignedVInt(DataInputPlus input) throws IOException { int firstByte = input.readByte(); @@ -330,6 +370,11 @@ public static int readUnsignedVInt32(ByteBuffer input) return checkedCast(readUnsignedVInt(input)); } + public static int readUnsignedVInt32(ByteBuffer input, int position) + { + return checkedCast(readUnsignedVInt(input, position)); + } + // & this with the first byte to give the value part for a given extraBytesToRead encoded in the byte public static int firstByteValueMask(int extraBytesToRead) { diff --git a/test/conf/logback-dtest-quiet.xml b/test/conf/logback-dtest-quiet.xml new file mode 100644 index 000000000000..bb9f983177b9 --- /dev/null +++ b/test/conf/logback-dtest-quiet.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log + + %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + + + INFO + + true + + + + + %-5level %date{HH:mm:ss,SSS} %msg%n + + + ERROR + + + + + + + + + + diff --git a/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java b/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java index aee6aeaeb8df..5b27d3d44f00 100644 --- a/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.UUID; import java.util.concurrent.Future; +import java.util.function.BiConsumer; import org.apache.cassandra.distributed.shared.FutureUtils; @@ -60,6 +61,8 @@ default Object[][] execute(String query, ConsistencyLevel serialConsistencyLevel } SimpleQueryResult executeWithResult(String query, ConsistencyLevel consistencyLevel, Object... boundValues); + Future executeWithResult(BiConsumer callback, String query, ConsistencyLevel consistencyLevel, Object... boundValues); + Future executeWithResult(BiConsumer callback, String query, ConsistencyLevel serialConsistencyLevel, ConsistencyLevel commitConsistencyLevel, Object... boundValues); default SimpleQueryResult executeWithResult(String query, ConsistencyLevel serialConsistencyLevel, ConsistencyLevel commitConsistencyLevel, Object... boundValues) { diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java index 9d0616f90f02..8d17177884ac 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.UUID; import java.util.concurrent.Future; +import java.util.function.BiConsumer; import com.google.common.collect.Iterators; @@ -62,6 +63,30 @@ public SimpleQueryResult executeWithResult(String query, ConsistencyLevel consis return instance().sync(() -> unsafeExecuteInternal(query, consistencyLevel, boundValues)).call(); } + @Override + public Future executeWithResult(BiConsumer callback, String query, ConsistencyLevel consistencyLevel, Object... boundValues) + { + return executeWithResult(callback, query, null, consistencyLevel, boundValues); + } + + @Override + public Future executeWithResult(BiConsumer callback, String query, ConsistencyLevel serialConsistencyLevel, ConsistencyLevel commitConsistencyLevel, Object... boundValues) + { + return instance().async(cb -> { + SimpleQueryResult result; + try + { + result = CoordinatorHelper.unsafeExecuteInternal(query, serialConsistencyLevel, commitConsistencyLevel, boundValues); + } + catch (Throwable t) + { + callback.accept(null, t); + return; + } + callback.accept(result, null); + }).apply(callback); + } + public Future asyncExecuteWithTracingWithResult(UUID sessionId, String query, ConsistencyLevel consistencyLevelOrigin, Object... boundValues) { return instance.async(() -> { diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index 2f81441c198c..c703310101cd 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -507,6 +507,7 @@ public static Message.Header deserializeHeader(IMessage message) public void receiveMessage(IMessage message) { sync(receiveMessageRunnable(message)).accept(false); +// async(receiveMessageRunnable(message)).apply(false); } @Override diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java new file mode 100644 index 000000000000..42e7fbf34a1a --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.util.Date; +import java.util.Random; +import java.util.concurrent.Semaphore; + +import com.google.common.util.concurrent.RateLimiter; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.utils.EstimatedHistogram; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +public class AccordLoadTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordLoadTest.class); + + @BeforeClass + public static void setUp() throws IOException + { + AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config.set("lwt_strategy", "accord").set("non_serial_write_strategy", "accord")), 2); + } + + @Ignore + @Test + public void testLoad() throws Exception + { + test("CREATE TABLE " + qualifiedTableName + " (k int, v int, PRIMARY KEY(k))", + cluster -> { + ICoordinator coordinator = cluster.coordinator(1); + final int batchSize = 1000; + final int concurrency = 100; + final int ratePerSecond = 1000; + final int keyCount = 10; + for (int i = 1; i <= keyCount; i++) + coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (0, 0) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i); + + Random random = new Random(); +// CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); + final Semaphore inFlight = new Semaphore(concurrency); + final RateLimiter rateLimiter = RateLimiter.create(ratePerSecond); + long testStart = System.nanoTime(); +// while (NANOSECONDS.toMinutes(System.nanoTime() - testStart) < 10 && exceptions.size() < 10000) + while (true) + { + final EstimatedHistogram histogram = new EstimatedHistogram(200); + long batchStart = System.nanoTime(); + for (int i = 0 ; i < batchSize ; ++i) + { + inFlight.acquire(); + rateLimiter.acquire(); + long commandStart = System.nanoTime(); + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); +// else exceptions.add(fail); + }, "UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); + } + System.out.printf("%tT rate: %.2f/s\n", new Date(), (((float)batchSize * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart))); + System.out.printf("%tT percentiles: %d %d %d %d\n", new Date(), histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); + } + } + ); + } + + @Override + protected Logger logger() + { + return logger; + } +} diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 95c495a35016..ee38ffceb926 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -38,7 +38,7 @@ import accord.api.Key; import accord.api.Result; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.local.CheckedCommands; import accord.local.Command; import accord.local.CommandStore; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index af4e3f738b61..fdaf51987494 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -31,8 +31,8 @@ import accord.api.Key; import accord.api.Result; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKeys; +import accord.local.CommandsForKey; +import accord.impl.TimestampsForKeys; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommonAttributes; @@ -41,10 +41,13 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Range; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.ImmutableBitSet; @@ -102,20 +105,20 @@ public void commandLoadSave() throws Throwable AtomicLong clock = new AtomicLong(0); PartialTxn depTxn = createPartialTxn(0); Key key = (Key)depTxn.keys().get(0); + Range range = key.toUnseekable().asRange(); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, c, v) VALUES (0, 0, 1)"); TableId tableId = Schema.instance.getTableMetadata("ks", "tbl").id; - TxnId oldTxnId1 = txnId(1, clock.incrementAndGet(), 1); - TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1); - TxnId oldTimestamp = txnId(1, clock.incrementAndGet(), 1); - TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + TxnId oldTxnId1 = txnId(1, clock.incrementAndGet(), 1, Txn.Kind.Write, Routable.Domain.Range); + TxnId oldTxnId2 = txnId(1, clock.incrementAndGet(), 1, Txn.Kind.Write, Routable.Domain.Range); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1, Txn.Kind.Write, Routable.Domain.Range); PartialDeps dependencies; try (PartialDeps.Builder builder = PartialDeps.builder(depTxn.covering())) { - builder.add(key, oldTxnId1); - builder.add(key, oldTxnId2); + builder.add(range, oldTxnId1); + builder.add(range, oldTxnId2); dependencies = builder.build(); } @@ -130,11 +133,9 @@ public void commandLoadSave() throws Throwable Ballot accepted = ballot(1, clock.incrementAndGet(), 1); Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); attrs.partialDeps(dependencies); - SimpleBitSet waitingOnCommit = new SimpleBitSet(2); - waitingOnCommit.set(0); - SimpleBitSet waitingOnApply = new SimpleBitSet(2); + SimpleBitSet waitingOnApply = new SimpleBitSet(3); waitingOnApply.set(1); - Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies, new ImmutableBitSet(waitingOnCommit), new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); + Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies.keyDeps.keys(), dependencies.rangeDeps.txnIds(), new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); @@ -183,10 +184,10 @@ public void timestampsForKeyLoadSave() AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); tfk.initialize(); - CommandsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, true); + TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, true); Assert.assertEquals(txnId1.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId1, true)); - CommandsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, true); + TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, true); Assert.assertEquals(txnId2.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId2, true)); Assert.assertEquals(txnId2, tfk.current().lastExecutedTimestamp()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 5b1a1402a2e7..71e821d32950 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -26,7 +26,7 @@ import accord.api.Key; import accord.api.RoutingKey; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.local.Command; import accord.local.KeyHistory; import accord.local.Node; @@ -117,7 +117,7 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.PreAccepted, command.status()); Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); @@ -145,7 +145,7 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.Accepted, command.status()); Assert.assertEquals(deps, command.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); @@ -160,7 +160,7 @@ public void basicCycleTest() throws Throwable Assert.assertTrue(command.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, command.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).commandsForKey(key(1)).current(); + CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); })); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 66d0c8e436b3..0ae2a6abc845 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -30,6 +30,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import javax.annotation.Nullable; + import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.junit.Assert; @@ -59,6 +61,7 @@ import accord.primitives.PartialTxn; import accord.primitives.Participants; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.Seekable; import accord.primitives.Seekables; @@ -199,8 +202,8 @@ public static void testLoad(ManualExecutor executor, AccordSafeState blockedOnRoute, Participants blockedOnParticipants) {} + @Override public void waiting(SafeCommand blockedBy, LocalExecution blockedUntil, Route blockedOnRoute, Participants blockedOnParticipants) {} + @Override public void waiting(TxnId blockedBy, LocalExecution blockedUntil, @Nullable Route blockedOnRoute, @Nullable Participants blockedOnParticipants) {} }; public static TxnId txnId(long epoch, long hlc, int node) @@ -213,6 +216,11 @@ public static TxnId txnId(long epoch, long hlc, int node, Txn.Kind kind) return new TxnId(epoch, hlc, kind, Key, new Node.Id(node)); } + public static TxnId txnId(long epoch, long hlc, int node, Txn.Kind kind, Routable.Domain domain) + { + return new TxnId(epoch, hlc, kind, domain, new Node.Id(node)); + } + public static Timestamp timestamp(long epoch, long hlc, int node) { return Timestamp.fromValues(epoch, hlc, new Node.Id(node)); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 8c952f02c0a1..33f0dc24589b 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -32,13 +32,12 @@ import org.junit.Test; import accord.api.Key; -import accord.impl.CommandsForKey; +import accord.local.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.KeyHistory; import accord.primitives.Keys; import accord.primitives.PartialTxn; -import accord.primitives.RoutableKey; import accord.primitives.TxnId; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; @@ -96,7 +95,7 @@ public void cachedTest() AccordStateCache.Instance commandCache = commandStore.commandCache(); commandStore.executeBlocking(() -> commandStore.setCapacity(1024)); - AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); + AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); @@ -375,7 +374,7 @@ public void inProgressCommandSaveTest() @Test public void inProgressCFKSaveTest() { - inProgressCFKSaveTest(COMMANDS, AccordCommandStore::commandsForKeyCache, context -> context.commandsForKey, CommandsForKey::new, (cfk, u) -> cfk.update(null, u)); + this.inProgressCFKSaveTest(COMMANDS, AccordCommandStore::commandsForKeyCache, context -> context.commandsForKey, CommandsForKey::new, (cfk, u) -> cfk.update(null, u)); } @Test @@ -384,7 +383,7 @@ public void inProgressTFKSaveTest() inProgressCFKSaveTest(TIMESTAMPS, AccordCommandStore::timestampsForKeyCache, context -> context.timestampsForKey, TimestampsForKey::new, (tfk, c) -> new TimestampsForKey(tfk.key(), c.executeAt(), c.executeAt().hlc(), c.executeAt())); } - private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) + private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) { AtomicLong clock = new AtomicLong(0); ManualExecutor executor = new ManualExecutor(); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index b235d1ae59d7..a5ece26a51a3 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -36,7 +36,7 @@ import org.slf4j.LoggerFactory; import accord.api.RoutingKey; -import accord.impl.SafeCommandsForKey; +import accord.local.SafeCommandsForKey; import accord.local.CheckedCommands; import accord.local.Command; import accord.local.PreLoadContext; @@ -53,7 +53,6 @@ import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Ranges; -import accord.primitives.RoutableKey; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -259,7 +258,7 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command Accept accept = Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.Commit, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); + Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.CommitSlowPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); Commit stable = Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableSlowPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); @@ -482,8 +481,7 @@ private static void assertNoReferences(AccordCommandStore commandStore, List) (Iterable) keys); + assertNoReferences(commandStore.commandsForKeyCache(), keys); } catch (AssertionError e) { @@ -524,8 +522,7 @@ private static void assertNoReferences(AccordStateCache.Instance ca private static void awaitDone(AccordCommandStore commandStore, List ids, Keys keys) { awaitDone(commandStore.commandCache(), ids); - //TODO this is due to bad typing for Instance, it doesn't use ? extends RoutableKey - awaitDone(commandStore.commandsForKeyCache(), (Iterable) (Iterable) keys); + awaitDone(commandStore.commandsForKeyCache(), keys); } private static void awaitDone(AccordStateCache.Instance cache, Iterable keys) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index e12b3fbf878b..405f92dc595f 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -38,9 +38,10 @@ import org.junit.Test; import accord.api.Key; -import accord.impl.CommandsForKey; -import accord.impl.CommandsForKey.InternalStatus; +import accord.local.CommandsForKey; +import accord.local.CommandsForKey.InternalStatus; import accord.local.Command; +import accord.local.CommandsForKey.TxnInfo; import accord.local.CommonAttributes; import accord.local.CommonAttributes.Mutable; import accord.local.Listeners; @@ -241,9 +242,12 @@ private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier t List deps = cmds[i].deps; List missing = cmds[i].missing; for (int j = 0 ; j < limit ; ++j) - if (i != j) deps.add(cmds[j].txnId); + { + if (i != j && cmds[i].txnId.kind().witnesses(cmds[j].txnId)) + deps.add(cmds[j].txnId); + } - int missingCount = Math.min(limit - (limit > i ? 1 : 0), missingCountSupplier.getAsInt()); + int missingCount = Math.min(deps.size(), missingCountSupplier.getAsInt()); while (missingCount > 0) { int remove = source.nextInt(deps.size()); @@ -267,14 +271,14 @@ private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier t { InternalStatus status = InternalStatus.from(cmds[j].saveStatus); if (status == null || !status.hasInfo) continue; - if (status.depsKnownBefore(cmds[j].txnId, cmds[j].executeAt).compareTo(cmds[i].txnId) > 0 && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) + if (cmds[j].txnId.kind().witnesses(cmds[i].txnId) && status.depsKnownBefore(cmds[j].txnId, cmds[j].executeAt).compareTo(cmds[i].txnId) > 0 && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) continue outer; } for (int j = i + 1 ; j < cmds.length ; ++j) { InternalStatus status = InternalStatus.from(cmds[j].saveStatus); if (status == null || !status.hasInfo) continue; - if (Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) + if (cmds[j].txnId.kind().witnesses(cmds[i].txnId) && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) continue outer; } cmds[i].invisible = true; @@ -322,7 +326,7 @@ private static Function timestampSupplier(Se @Test public void serde() { -// testOne(1821931462020409370L); + testOne(-6946067792202944553L); Random random = new Random(); for (int i = 0 ; i < 10000 ; ++i) { @@ -426,13 +430,13 @@ private static void testOne(long seed) Assert.assertTrue(cmd.invisible); continue; } - CommandsForKey.Info info = cfk.info(i); + TxnInfo info = cfk.get(i); InternalStatus expectStatus = InternalStatus.from(cmd.saveStatus); if (expectStatus == null) expectStatus = InternalStatus.TRANSITIVELY_KNOWN; if (expectStatus.hasInfo) - Assert.assertEquals(cmd.executeAt, info.executeAt(cfk.txnId(i))); + Assert.assertEquals(cmd.executeAt, info.executeAt); Assert.assertEquals(expectStatus, info.status); - Assert.assertArrayEquals(cmd.missing.toArray(TxnId[]::new), info.missing); + Assert.assertArrayEquals(cmd.missing.toArray(TxnId[]::new), info.missing()); ++i; } @@ -461,11 +465,12 @@ public void test() next = txnIdGen.next(rs0); return next; }).unique().ofSizeBetween(0, 10).next(rs); - CommandsForKey.Info[] info = new CommandsForKey.Info[ids.length]; + TxnInfo[] info = new TxnInfo[ids.length]; for (int i = 0; i < info.length; i++) - info[i] = rs.pick(InternalStatus.values()).asNoInfo; - Arrays.sort(ids, Comparator.naturalOrder()); - CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, redudentBefore, ids, info); + info[i] = TxnInfo.create(ids[i], rs.pick(InternalStatus.values()), ids[i], CommandsForKey.NO_TXNIDS); + Arrays.sort(info, Comparator.naturalOrder()); + + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, CommandsForKey.NO_PENDING_UNMANAGED); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); @@ -479,10 +484,10 @@ public void thereAndBackAgain() long tokenValue = -2311778975040348869L; DecoratedKey key = Murmur3Partitioner.instance.decorateKey(Murmur3Partitioner.LongToken.keyForToken(tokenValue)); PartitionKey pk = new PartitionKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), key); + TxnId txnId = TxnId.fromValues(11,34052499,2,1); CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, - TxnId.fromValues(0,0,0,0), - new TxnId[] {TxnId.fromValues(11,34052499,2,1)}, - new CommandsForKey.Info[] { InternalStatus.PREACCEPTED.asNoInfo}); + new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED, txnId, CommandsForKey.NO_TXNIDS) }, + CommandsForKey.NO_PENDING_UNMANAGED); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index dccf0e14b691..6e2b1f369000 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -23,6 +23,8 @@ import accord.local.Command; import accord.primitives.Deps; +import accord.primitives.Routable; +import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.Gens; import accord.utils.SimpleBitSet; @@ -51,19 +53,20 @@ public void serde() { DataOutputBuffer buffer = new DataOutputBuffer(); qt().forAll(waitingOnGen()).check(waitingOn -> { + TxnId txnId = TxnId.NONE; + if (waitingOn.appliedOrInvalidated != null) txnId = new TxnId(txnId.epoch(), txnId.hlc(), txnId.kind(), Routable.Domain.Range, txnId.node); buffer.clear(); long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); - WaitingOnSerializer.serialize(waitingOn, buffer); + WaitingOnSerializer.serialize(txnId, waitingOn, buffer); Assertions.assertThat(buffer.getLength()).isEqualTo(expectedSize); - Command.WaitingOn read = WaitingOnSerializer.deserialize(waitingOn.deps, new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)); + Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.txnIds, new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)); Assertions.assertThat(read) .isEqualTo(waitingOn) - .isEqualTo(WaitingOnSerializer.deserialize(waitingOn.deps, WaitingOnSerializer.serialize(waitingOn))); + .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.txnIds, WaitingOnSerializer.serialize(txnId, waitingOn))); }); } - private enum WaitingOnSets - {COMMIT, APPLY, APPLYED_OR_INVALIDATED} + private enum WaitingOnSets { APPLY, APPLIED_OR_INVALIDATED } private static Gen waitingOnGen() { @@ -73,22 +76,20 @@ private static Gen waitingOnGen() return rs -> { Deps deps = depsGen.next(rs); if (deps.isEmpty()) return Command.WaitingOn.EMPTY; - int[] selected = Gens.arrays(Gens.ints().between(0, deps.txnIdCount() - 1)).unique().ofSizeBetween(0, deps.txnIdCount() - 1).next(rs); - SimpleBitSet waitingOnCommit = new SimpleBitSet(deps.txnIdCount(), false); - SimpleBitSet waitingOnApply = new SimpleBitSet(deps.txnIdCount(), false); - SimpleBitSet appliedOrInvalidated = new SimpleBitSet(deps.txnIdCount(), false); + int txnIdCount = deps.rangeDeps.txnIdCount(); + int keyCount = deps.keyDeps.keys().size(); + int[] selected = Gens.arrays(Gens.ints().between(0, txnIdCount + keyCount - 1)).unique().ofSizeBetween(0, txnIdCount + keyCount).next(rs); + SimpleBitSet waitingOn = new SimpleBitSet(txnIdCount + keyCount, false); + SimpleBitSet appliedOrInvalidated = rs.nextBoolean() ? null : new SimpleBitSet(txnIdCount, false); for (int i : selected) { - WaitingOnSets set = sets.next(rs); + WaitingOnSets set = appliedOrInvalidated == null || i >= txnIdCount ? WaitingOnSets.APPLY : sets.next(rs); switch (set) { - case COMMIT: - waitingOnCommit.set(i); - break; case APPLY: - waitingOnApply.set(i); + waitingOn.set(i); break; - case APPLYED_OR_INVALIDATED: + case APPLIED_OR_INVALIDATED: appliedOrInvalidated.set(i); break; default: @@ -96,7 +97,7 @@ private static Gen waitingOnGen() } } - return new Command.WaitingOn(deps, Utils.ensureImmutable(waitingOnCommit), Utils.ensureImmutable(waitingOnApply), Utils.ensureImmutable(appliedOrInvalidated)); + return new Command.WaitingOn(deps.keyDeps.keys(), deps.rangeDeps.txnIds(), Utils.ensureImmutable(waitingOn), Utils.ensureImmutable(appliedOrInvalidated)); }; } } \ No newline at end of file From 333c748a915a38e1fffc055c65e2d8a416485999 Mon Sep 17 00:00:00 2001 From: ci worker Date: Thu, 28 Mar 2024 20:03:39 -0700 Subject: [PATCH 102/340] Ninja for CASSANDRA-19305: Disable EphemeralRead by default to get benchmarks stable --- src/java/org/apache/cassandra/config/AccordSpec.java | 1 + .../org/apache/cassandra/config/DatabaseDescriptor.java | 5 +++++ .../cassandra/cql3/statements/TransactionStatement.java | 6 ++++-- src/java/org/apache/cassandra/service/StorageProxy.java | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index e76745a233e0..ab80ec4b3254 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -69,4 +69,5 @@ public enum TransactionalRangeMigration * default transactional mode for tables created by this node when no transactional mode has been specified in the DDL */ public TransactionalMode default_transactional_mode = TransactionalMode.off; + public boolean ephemeralReadEnabled = false; } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index a75e6cb88c5a..e32496a17262 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5810,4 +5810,9 @@ public static void setPaxosRepairRaceWait(boolean paxosRepairRaceWait) { conf.paxos_repair_race_wait = paxosRepairRaceWait; } + + public static boolean getAccordEphemeralReadEnabledEnabled() + { + return conf.accord.ephemeralReadEnabled; + } } diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 208b9590f4e5..46e94293b0e0 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -332,8 +332,10 @@ public Txn createTxn(ClientState state, QueryOptions options) List reads = createNamedReads(options, state, ImmutableMap.of(), keySet::add); Keys txnKeys = toKeys(keySet); TxnRead read = createTxnRead(reads, txnKeys, null); - Txn.Kind kind = txnKeys.size() == 1 && transactionalModeForSingleKey(txnKeys) == TransactionalMode.full - ? EphemeralRead : Read; + Txn.Kind kind = txnKeys.size() == 1 + && transactionalModeForSingleKey(txnKeys) == TransactionalMode.full + && DatabaseDescriptor.getAccordEphemeralReadEnabledEnabled() + ? EphemeralRead : Read; return new Txn.InMemory(kind, txnKeys, read, TxnQuery.ALL, null); } else diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index dc78f995316a..59b39f5ee3a6 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -2053,7 +2053,7 @@ private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand. consistencyLevel = transactionalMode.readCLForStrategy(consistencyLevel); TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); Invariants.checkState(read.keys().size() == 1, "Ephemeral reads are only strict-serializable for single partition reads"); - Txn txn = new Txn.InMemory(transactionalMode == TransactionalMode.full ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); + Txn txn = new Txn.InMemory(transactionalMode == TransactionalMode.full && DatabaseDescriptor.getAccordEphemeralReadEnabledEnabled() ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); IAccordService accordService = AccordService.instance(); TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); if (txnResult.kind() == retry_new_protocol) From 777cf84f64ea2f8afb231ce0e574e17de2494eda Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 29 Mar 2024 10:11:03 -0700 Subject: [PATCH 103/340] Accord: PreLoadContext must properly and consistently support ranges patch by David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-19355 --- .build/build-resolver.xml | 4 - lib/harry-core-0.0.2-CASSANDRA-18768.jar | Bin 458194 -> 0 bytes modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 1 + .../config/CassandraRelevantProperties.java | 3 +- .../cassandra/config/DatabaseDescriptor.java | 5 + .../accord/CheckpointIntervalArrayIndex.java | 721 ++++++++++++++++++ .../apache/cassandra/index/accord/Group.java | 68 ++ .../index/accord/IndexDescriptor.java | 171 +++++ .../cassandra/index/accord/IndexMetrics.java | 85 +++ .../cassandra/index/accord/MemtableIndex.java | 70 ++ .../index/accord/MemtableIndexManager.java | 41 + .../index/accord/OrderedRouteSerializer.java | 59 ++ .../index/accord/RangeMemoryIndex.java | 244 ++++++ .../cassandra/index/accord/RouteIndex.java | 604 +++++++++++++++ .../index/accord/RouteIndexFormat.java | 255 +++++++ .../accord/RouteMemtableIndexManager.java | 111 +++ .../index/accord/RouteSSTableManager.java | 91 +++ .../accord/RouteSecondaryIndexBuilder.java | 239 ++++++ .../index/accord/RoutesSearcher.java | 128 ++++ .../cassandra/index/accord/SSTableIndex.java | 152 ++++ .../index/accord/SSTableManager.java | 34 + .../cassandra/index/accord/Segment.java | 59 ++ .../apache/cassandra/io/util/Checksumed.java | 57 ++ .../io/util/ChecksumedDataInputPlus.java | 105 +++ .../io/util/ChecksumedDataOutputPlus.java | 110 +++ .../io/util/ChecksumedFileDataInput.java | 100 +++ .../io/util/ChecksumedRandomAccessReader.java | 46 ++ .../io/util/ChecksumedSequentialWriter.java | 83 ++ .../cassandra/io/util/DataInputPlus.java | 76 ++ .../cassandra/io/util/DataOutputPlus.java | 61 ++ .../org/apache/cassandra/journal/Journal.java | 5 +- .../cassandra/locator/ReplicaLayout.java | 8 +- .../cassandra/schema/SchemaProvider.java | 8 + .../org/apache/cassandra/schema/TableId.java | 4 +- .../service/accord/AccordCachingState.java | 11 + .../service/accord/AccordCommandStore.java | 224 +++--- .../service/accord/AccordJournal.java | 67 +- .../service/accord/AccordKeyspace.java | 116 ++- .../service/accord/AccordMessageSink.java | 8 +- .../accord/AccordSafeCommandStore.java | 103 +-- .../accord/AccordSafeCommandsForRanges.java | 128 ++++ .../service/accord/AccordService.java | 12 + .../service/accord/AccordStateCache.java | 167 +++- .../service/accord/AccordVerbHandler.java | 2 +- .../service/accord/CommandsForRanges.java | 586 +++----------- .../accord/CommandsForRangesLoader.java | 280 +++++++ .../service/accord/IAccordService.java | 5 +- .../cassandra/service/accord/IJournal.java | 29 + .../accord/RangeTreeRangeAccessor.java | 70 ++ .../service/accord/api/AccordRoutingKey.java | 4 +- .../service/accord/api/PartitionKey.java | 3 +- .../service/accord/async/AsyncLoader.java | 96 +-- .../service/accord/async/AsyncOperation.java | 34 +- .../service/accord/async/ExecutionOrder.java | 246 +++++- .../service/accord/events/CacheEvents.java | 78 ++ .../AccordRoutingKeyByteSource.java | 253 ++++++ .../org/apache/cassandra/utils/Clock.java | 8 + .../cassandra/utils/CloseableIterator.java | 1 - .../apache/cassandra/utils/IntervalTree.java | 17 +- .../apache/cassandra/utils/MutableEntry.java | 75 ++ .../org/apache/cassandra/utils/RTree.java | 535 +++++++++++++ .../org/apache/cassandra/utils/RangeTree.java | 71 ++ .../apache/cassandra/utils/TriPredicate.java | 24 + .../distributed/impl/InstanceConfig.java | 4 +- ...ordCQLTest.java => AccordCQLTestBase.java} | 81 +- .../test/accord/FullAccordCQLTest.java | 29 + .../test/accord/MixedReadAccordCQLTest.java | 29 + .../test/log/ClusterMetadataTestHelper.java | 29 + .../distributed/upgrade/UpgradeTestBase.java | 2 + test/unit/accord/utilsfork/Gens.java | 2 +- test/unit/accord/utilsfork/RandomSource.java | 7 +- .../{utils => utilsfork}/random/Picker.java | 2 +- .../concurrent/ForwardingExecutorPlus.java | 8 +- .../org/apache/cassandra/cql3/CQLTester.java | 20 + .../cassandra/dht/IPartitionerTest.java | 48 ++ .../index/accord/AccordIndexStressTest.java | 517 +++++++++++++ .../CheckpointIntervalArrayIndexTest.java | 441 +++++++++++ .../index/accord/RouteIndexTest.java | 508 ++++++++++++ .../index/internal/CassandraIndexTest.java | 8 +- .../cassandra/io/util/ChecksumedDataTest.java | 244 ++++++ .../apache/cassandra/repair/FuzzTestBase.java | 94 +-- .../accord/AccordCommandStoreTest.java | 2 +- .../service/accord/AccordKeyspaceTest.java | 194 ++++- .../service/accord/AccordReadRepairTest.java | 2 +- .../service/accord/AccordTestUtils.java | 6 + .../service/accord/CommandsForRangesTest.java | 115 --- .../cassandra/service/accord/MockJournal.java | 171 +++++ ...SimpleSimulatedAccordCommandStoreTest.java | 55 ++ .../accord/SimulatedAccordCommandStore.java | 356 +++++++++ .../SimulatedAccordCommandStoreTestBase.java | 348 +++++++++ .../service/accord/SimulatedDepsTest.java | 323 ++++++++ .../accord/SimulatedMultiKeyAndRangeTest.java | 166 ++++ ...ulatedRandomKeysWithRangeConflictTest.java | 99 +++ .../accord/async/AsyncOperationTest.java | 15 +- .../AccordRoutingKeyByteSourceTest.java | 88 +++ .../tcm/sequences/ProgressBarrierTest.java | 2 +- .../apache/cassandra/utils/RangeTreeTest.java | 568 ++++++++++++++ .../utils/StatefulRangeTreeTest.java | 509 +++++++++++++ 99 files changed, 11030 insertions(+), 1125 deletions(-) delete mode 100644 lib/harry-core-0.0.2-CASSANDRA-18768.jar create mode 100644 src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java create mode 100644 src/java/org/apache/cassandra/index/accord/Group.java create mode 100644 src/java/org/apache/cassandra/index/accord/IndexDescriptor.java create mode 100644 src/java/org/apache/cassandra/index/accord/IndexMetrics.java create mode 100644 src/java/org/apache/cassandra/index/accord/MemtableIndex.java create mode 100644 src/java/org/apache/cassandra/index/accord/MemtableIndexManager.java create mode 100644 src/java/org/apache/cassandra/index/accord/OrderedRouteSerializer.java create mode 100644 src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java create mode 100644 src/java/org/apache/cassandra/index/accord/RouteIndex.java create mode 100644 src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java create mode 100644 src/java/org/apache/cassandra/index/accord/RouteMemtableIndexManager.java create mode 100644 src/java/org/apache/cassandra/index/accord/RouteSSTableManager.java create mode 100644 src/java/org/apache/cassandra/index/accord/RouteSecondaryIndexBuilder.java create mode 100644 src/java/org/apache/cassandra/index/accord/RoutesSearcher.java create mode 100644 src/java/org/apache/cassandra/index/accord/SSTableIndex.java create mode 100644 src/java/org/apache/cassandra/index/accord/SSTableManager.java create mode 100644 src/java/org/apache/cassandra/index/accord/Segment.java create mode 100644 src/java/org/apache/cassandra/io/util/Checksumed.java create mode 100644 src/java/org/apache/cassandra/io/util/ChecksumedDataInputPlus.java create mode 100644 src/java/org/apache/cassandra/io/util/ChecksumedDataOutputPlus.java create mode 100644 src/java/org/apache/cassandra/io/util/ChecksumedFileDataInput.java create mode 100644 src/java/org/apache/cassandra/io/util/ChecksumedRandomAccessReader.java create mode 100644 src/java/org/apache/cassandra/io/util/ChecksumedSequentialWriter.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java create mode 100644 src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java create mode 100644 src/java/org/apache/cassandra/service/accord/IJournal.java create mode 100644 src/java/org/apache/cassandra/service/accord/RangeTreeRangeAccessor.java create mode 100644 src/java/org/apache/cassandra/service/accord/events/CacheEvents.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java create mode 100644 src/java/org/apache/cassandra/utils/MutableEntry.java create mode 100644 src/java/org/apache/cassandra/utils/RTree.java create mode 100644 src/java/org/apache/cassandra/utils/RangeTree.java create mode 100644 src/java/org/apache/cassandra/utils/TriPredicate.java rename test/distributed/org/apache/cassandra/distributed/test/accord/{AccordCQLTest.java => AccordCQLTestBase.java} (99%) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/FullAccordCQLTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/MixedReadAccordCQLTest.java rename test/unit/accord/{utils => utilsfork}/random/Picker.java (99%) create mode 100644 test/unit/org/apache/cassandra/dht/IPartitionerTest.java create mode 100644 test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java create mode 100644 test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java create mode 100644 test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java create mode 100644 test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java delete mode 100644 test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/MockJournal.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSourceTest.java create mode 100644 test/unit/org/apache/cassandra/utils/RangeTreeTest.java create mode 100644 test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java diff --git a/.build/build-resolver.xml b/.build/build-resolver.xml index c9a47c4f96c7..29031b33a115 100644 --- a/.build/build-resolver.xml +++ b/.build/build-resolver.xml @@ -253,10 +253,6 @@ - - - - diff --git a/lib/harry-core-0.0.2-CASSANDRA-18768.jar b/lib/harry-core-0.0.2-CASSANDRA-18768.jar deleted file mode 100644 index 292db01f06942d8442204e776b7f25d158c9527b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 458194 zcma&O19YX^vNjyswr$(CZQHh!j%{_^F*;5<>Daby+xW8YIsZ9l?|bk5?;2x`vBnzD zs(03Ws%lo%Tklek1_nU^0Dyo1knm(vpnLh=SqT6DAPNWo@bxW#tf-10t)!e7y{w>| zq?o9(3Z1N2OspVOFav_nq7rl?D|~4O^`#xae`)un(7#@gU!VS^-Tw{)`+`{* zIyrgL|L+*Ye_@R6?TlTWoJ{Rp{ss>HA8;qrzX2lrC(y~&!NuO`Z_xi1YhvhP_&30> z{rIO!CMGujH=Og|;r^|@xv8D0li}Ago&QEG-oGyYA8->(XBQ_+BUcwod%M5+EzG}u zYin;}`ge;$`U}$7#@^WaZzTVFum2(0+4b)zMEi^MKhTzTR&KU`bA^9zlfAEE#r`>z!_x!QfP@HY$jx1oG(_uuUy{J&(nSvp((|Iqpe_+S2H zYv^YBcRZ5(cd)&aIh~<{p|OQ2ow1>_v!R`d)882}{r^$IfB7!0vHd?C?cb3OP?)}n z1Q-AS^vkAD|5KQVsDh}Rh^U;fq^R<^ny&o{1B&mV`csUm6%Noj+jC6*jAHi!?3`^J znIqC{--HeL%EycDVQ{=sgWUO2yw=ryEBoOZ{9tudHo8Aobrf18)Htj@GfHiAL;&P> z<`|Q3nK9OpWF5xMt`bA4{UA^Z6{b*GvvuV*Gayrgc&dUR{;rRkPCbR*;x4pDC=+h{ zGNnfg64X-{E@J{gg*2$-C~ZRag8MN$a6<5yRtUbm1UEG;Sg*q)SsNKK9d?jcq%)0C zoo5;l`7luC3&~f6`&q%xUnJ&nCM|fLK8#ZhiXn2@7_avms^{nOa=axjoC`dI>|7=V$r3(r{gn!&@Dpu9ek1*mTH7+;w`aM~)ZUzQ5i|f!( zOgXQk_^Bqntw30m4}RJbb5Jey?dNu8cnW4$WWv1Pl$Ia9Hy$32Yob*|>PpgV19^1- zpR{4!llzPn79gt&tu$t!(nNVk{EITIsEDL=SbcKa<)x7}O( zH&X9!+=a*e@NUD)aAJ%)i+&6ZXiT%tXIdKzyHqHtmn`&Z2{fzJHYOaXt9}Q(_&vyb z==3B3{fnLFtkJdTCXgJG76`cOgA<^qPBQ6X)x-<(<9+ZWVQ3XnqJSoFg%>s7x0AmH zI}?O&-q08_a#VUUU&7Qr+9Y!Y*;h>Vm*`8eBRVX5XLOm@VBkbtg^zlk)weSuu=(>H zkH-T3kqExz2yU`CB|u$q(L9P}pmJ^KHEl5e`+2zU`hT2@ z|8}c}hMh3muUMc*0|4+xO#N3#kdYJ?l~Wd7*V1-g9YFPatXXm@QMLpt$zid?K8&c_ z3#$^g6lt|1OZrAyOVA8fJ48ni`F_uv))81_;{Kv0Tf!=Nwp@RBlXIEf4}IPGTa5RHAszhMlR(D zYWE8&#u57HdMJF_mh2iGnAhHfIi(KLsuAOYm8!!+oj}hE(W^O6a_Lxn79JDH#uaVb z8uZ5UDAcn4wO;MmAk*s>;y)J8N0A&k*f~k{JgUNljFo%J!d_!zl3~1?wS!kq4TK!UK&Riehzg8sH#-6sI|Tl)?fE?6E%U(3EODC3F~h^U@q$WF)fmjD;|jCh(o169CKu zYRJJSrlI9tx{l5j;12C7sA&5gj&6a;Wg z<9Q_3hh&j&_34wO^6*So zdnalU@^E{y(RY%fI@N1$iAj`=AJT@Q)JIOhYj=W?ED+eaV*%E7f5E;TcaSs*D)7R64*%`-S5TP_0F-g|^mi9y_H40&-XfU;$)U&IgmJ8=(YS`s6}@qOr0003mLS zd$M^NYV0QmOAww)FO9JzqtwdSyMni8zr3Nu0n5!N-O=~eJ^#9A$n3b3_*H|Y)~Gc2 zrqa=XLTQ1{C00>s1s4y#PR_IjRsgzNlvLFQBew`3(v(Vy8bMT%FkMP6mG7EdmbAD@ zl>Cb2zpy*ViJ;83N{=-~u2Wj92#>;c4cYIopCJ)%vc%Q^<>xQ;Oe7r%sJPj+ARA!I zZU?~4;{mcEI4zo)F3*SD}Moi%;O^5({LUTZP8VPZ(u@O-vDh-Onq3v;zxI}3E^w|&ax6p z9rY&ghcp);(Fqtjy8xx2mfBAuR3~uu(uI8lI97KR4I!!WgbzqJelICb3F?uG9 z3=u&y+XifhH76I9eapertC`uMxDbyrma;*EkYi0@o*qS=n$hWKxWwj^2V|k|vS$q?$k(BIIK2bKLu(=+z<8751uTW zkxAF%q@b!uCSXbsgal0V?S!KPaE2%Y^`6cHq(W|kPc5o=$LJI?uS?(j3P#=?OzZMG zL&GL_jvsgM(9{O~&VtYmu1pmFUB!9xb2}`5H3&7{n0MeBa)y-SauBu>lv*h7Atpp1 zZZlv)#|qbXH$rryd7p*fh%sS533oEtxd+W{3ZEH#T?%CV6Zlf6i)kJ{-C)WICXgsF zC>FawdiqwA`4UhU+>rY@FMJ;~v^0**HHHU1GQbMJ@7W(vF&cM?5fPk6@u4!_fGitE zwu=v{eWcUAcAW*6Bd0QYX#KOLkdPTj2ZDGzG4e ze(n?c1=5PaETujo;734$h?@LpEP4pAyE$OIMn!EpAcML* zpB)hK(kGlQMOlOX9Q($kY$nb_gjm$KX(VM(Mw$MS1Plsg3o+l45HaHYbb8M~RY%t@ z6w%92jRZP`Ca6xBZuBbXnOgZu45-eGDDD*bu!9CyYIw-ySVag21#mm&?m}}7yjBoP zVjDJ~Aw?X*!oW}VIwJX1GrDbg0?h<1LM92QDmBUnCtIweh?Y+9GcV+mv(R6ANwEl) z+H>ghyMDlH<=f#9TAcSW>x3UUI4VlM92X>=KbWu{)=a!_w#XDM&kAe(5A$RD5rQ5B z-7(15c-w5eLs~FqD3G-uX)%TCvX3WBo9b18Ni+~WoELhO-2t$0@#$7M?9zb>x?rT| z>#nAC324-yJe3m?AfcuSi?jH#;|L~>oMFnI($cXD&Eo|}RKp->RWNzTGLx7m;L04X zCFP{==Pc$)gVgSR+rLfC*Uyi?-<;pWBwjiacpI-g^Ts=wzwRxk2zF)2aMsa0QzN5l z$wTrbqu!@e9?fai6H!QJ-snK7@e>=*8J9M66Ag)tD%A9(|bvF#x|6w-qCc77?eax{J zj7P*;Go(;*^0Y?O$0cKSTW2#=-s7)*=6(}CRs=HN5!y+D4As;LEWT%6;0HB`ty zWdm%RBO(KUcJUd+9mg00gA;~f5LQ1dGD~%okq*9e&GE6sAiu7i&8m1p72Y&cn+w@B zy+!Q|3Uz%rq&kq0Ew;dA*lLd9gLR(M%4Ed2tea3aX`x)yP*$eN!!sSZH`|&nMFQVE zQQ|2Xn6z3hi5gppZ9Xhg%os~%oUv4f;|hfQ%}07X_BZ5~9&4QdJ32mBX+dmu37%o0 zlJToFQayAc$0i3({g6$1N=#iYjxK3Q95)`kQ~HVR&GD`OY^GW8He>r1Af*OllZf`wF~t=; zpKvL$a$NZsL2dv$R$qBlgHMgin)g$C06!oK7|;#^8Z(|QrqDznrMnw(NZ2E~m5hE5 zYnY9r9U3OATt~8RS4QV21lKdh!8hS4uo&T@BDVSRO|f(PxY|7KE67wACrT8**`0e9 zD_}LS4aFY74r-&>z@(L=XPcptEp~m_iDZDLx$lQNQruQKS2Im$_<{aGx^Rq;1?5@5 z?6q^A#!)E-0ly^1r89$2hYk52b(QXcT>JC89iuY&V|GU)oar5_LF4(gEBl>zk@;yB zMqL>$2KV3N*zqs=2m9C8ja+~K(u>M!f7ALn$kC|uL-W_Kt`4}c=mhlZJ1&(bpAMl` zWh0Zt6meF&eG*01-v~T>Nn8(%yjsp>oy{pkJ?^s;g7D2#40O~Vs(>bo5QuLhXU{8TNnONbmdc4J5u4MZLx zKBkM+s)d1maU0E!tve&;{=o30`R;pA1kl7Cb_b!5_=v3|9^xf!PbzR{;MS4bn~xuZ zm)r_yOpaPp0y`5wP&hGHYMJ4vaeJheIuQR09d?JqnS%hDpU0dVl-PqLNfx(X9AnzUs zho*goZIyT|x`_)P_pn0%_90hJ!RV8iSL-@7Qk9Svp#i^IZi!G&XejwE;@jkB@1K5N zEuS`Z36173@_-gg9`60jF{|!cwG#CJQV_=)saEj`&mGLedxt}DFF(JUPh>$bu4fZ; zH)Mf?QoA2m!6mQljvz9`6 z^I$&*gs}NS%ES`5p0z4UoPeM}NVtPP!0aheMGnr=^vw~UESR<{)(Jt7^UE(^e#&{# z(N)VletYT^NG%k6x}*bjJ)JAz%0owh6u2(L>*O99adTsjYTbbgIE=d{O3w6B18@LB zV}qkFeX`^yhoz94bXpTgCN$n}in1`nk92I&S}(mDfm)9Tlf|LS4s>E^5rfG!tV-YT zc}de(kcHsEz1;2C@9D6{nJgmw*02v}U&)CvW4c2R{`9K)Cgc^IE8blBduEqUqP)^P zGfY0=Hv$#iRp8Ua{{ABc(H$`ah)(C*6VH^#`ZHT{9;28_5eJd$yP5DT6DQ_!s6eVP z#N%X#(+gh9tCjCNl1d4we$UkR9lVbmW|X9uIk`=T9_iq>z#kY-pAx@;ugU7GQZh>s zg0d}O`(SXYR$~zy1R(QhTn6nQo-HMJPA~3sZqYvh|6H0kP7dI^ewE|}U++J4>*VBB zzKV0-u{tOqMwozYUik#Yw1<{yS-8G+@4%@6>un3EOs$J^)b9CXeLlaIq?5oXie94o zsb8eMo|)u2$h(b7PkV&&*hSIo$E1m-lmONvuoY5g-3)29k3+DlabkYgbScD%-Jj^W ziJBcA`~wbmrgAQ~=HLMKe_o+Fyt?H*5C8xcH~;|C*DC+fc>j;4`j_Ebx)>VSn34z@ zeKm{?ja~lr4V|&gSKBZ}Ra$vg0p)`(qy#pfM+5~0s=-1#@TCi#Abg)N2aW~=?PBK6 z3YgGvMrDt`bqdx^r^R_UT04zSx6NhP!|tUh)J=8Emt_5c2{_*qo(lkbv}@#{5VY z8*P&c$E>Bk&{SpxQ)0Ev?7}jZaA99|cpTF+>?1{H!%%3(Y;TBz4l+ zDMR*%Q)#F)mRB!_LLSd~Ty5>aKMGCt{g8$yp1Yt1CO5W|)*d#LJPKcQnM z7GV`X3|LPpOM@c`yi94zTf#_D32BBss&_yWnbcG(4R!l%Wg?PbQ4xQkNHZlZ8Ki%F zuOMF5+m;>o%37K@bIw7+YRwLeRJ*NN2r#cm6YiJ|sZKpN5@-qCQvDF4PN!=u`pu>r z+tk+d083$L+99*i z>#y5ZYsdfqe{{+J&WW5o?Tr88LG$XT&d91LpX+8xva{#G@e6zc6e^N40sxA6RR&Uu z-AbXUsc8DE<4R~V>n-hok@7yjH{}OvPSE*LaO<|BUm(qJK1l|y*V;$P1SF;30p!dc z_$E1jZ*m^ydfzVcqx*x`g6hw21B4l4bPHk@xGS zo0Y%aV1B!$(N}o>7{=goNQbs`oUy+09eg_E_(U<088qlgRE#!)MfZ>k8=rDeV%6!* z=F%9esFp)s1&=EupR*Zeh0+{nYD}~}Ey+^VwO*O}$vKTP6?eo^aSV^`MR*zYz%&Pk ztz2@FYLkOUe)*5=fTJTT-^#4pqK;1lL<3-8jqcq0)GsA(h`Hwb&@;ZNJ7P zCjBbyC3(3n7-0IHw>Q#C8#C42ajD5#qlDRjL&q~WP?kZ&)_24KA`K>ukcdaY;EAi% zgkKE;k1_+RIm5SS;WkWc^j3jtR#ug8dY_P(ZibFp{@MbEMaH+3g|}>#STAHJIJE># zOm%h3*q_OW12|POr@dmg;HYBXU~+eYNyZ6>v};8)(6$wAR_22aBu|6roBvEflvm+bfceMybJr0pbbk`!uWil*1( z67+t@WJRN=5C1Kf1X{-JxnMF|GY(H~o_2Oo)rw&iS?j*^1#lNueTJuJGjGh!_Srn`4?-XM1c#TKbnG^*-2F{X4 zwrdM|X%J4>Xfa6bfquzoQVuEO6rHJkEu+OYsdE^#pJ}A?u!fg}G)9N2%Zr~Z*_K#= zOP{OJCfwh-JZX`CbUkd`cJOd>WUp(KtNb8yT=cl{e7C_NIw_*ek6GI;d7{=icEW1i zR=VOfF$V7}yn4vkkd7&LlUqJ8!fq_096`pZOIP~CJY?B5iYW^#X4}`NOMefa$wp$M zhw|V_ja(U;w+CyKtzS7yPr-AY(L&rI_o!nh)q+qg7p}lWbq77MW1cKbH!F9%ql`37 z4-=pA3FABE(xCi&=1;3$9z)#4`rwm06TRZu3cUPYgJouwl#)?mOnn6$Fxpd(vY{(? zQZM~L_bYtWaUVhVwqH573N0A~{#JCh)6AN>Y_7W}bmdLGV4#WvX~~gMJI>>E-|fH8 zNtP`Leu4{I9Ht!%un;`Z6XJJbOVLV_z3sbTMwi7ts(*bjIgOht(Ya@ZhoM^-td8BvqI=tLN$`GtVqHaR#&rQn9I(2yHk( zoM|iJ&hu(RcqpNt;w#YhphhPG7MgTXXm*#vz*#gp!(s`f~qC0SwfDw4{&D7dd z+R-pgHX?dCabIT~{d>{D=>)#Je7>a_rl!je)`_Q;IzRXNWf6E zv99i_eRso|&1s0KqD>l1+Gp)`b&pIbS1xO4n#gjk{ED7LttK8z=2%;OfCW%hW@TTCOcWqRY`Hm ztgGW(u_`TRI$l|sRdGAfu$xh>y=u~#E+ai?x0yhRrM?i^LCmny)Z^p*5HtsV;$z#} zss&>>J|9oK?)AA@kuxs#yC4er*M-Ja^vFwP9i~Pj=xMpJ2W#5v+f(0>1vgSLj`Q9- zkFa21vFHyHF!kX0Bs~dV`}LqJ!Z+zXBeY4$C}WRY&E7sn!h7)j<`Dx4MRR#n35|E!=6ll8^Q5)oGaJUlKL69dEXW|ECuKU3rlI3GI5W$K|Nb+lNrpD^B9lkQgbf# z!lwt++Y_gUeNGFAe?O>?`az`L&8pS{e9g^{h_lUnflbRZ_(RWKfIauH+z~2vMjC8# zT7El|@{G`_?USuwK$k*sjVIo`cak}v^v?8+@w)`rpkvSP$vk+?jR)tNsYHSifp8@_ zg&Emo-aa^^7FvvB1Y%sHB1%E(uz^hi#=a!R9dO2`?})-b@pq`0_07W|7IS-aOM&pu zh3GGZU!UL-;h~z}7*HisE1v{9P_UkYnrbXMVo^Ftju!bG=#h($IHQyNbmRrLRhCFB zJtM3qCb2RU6C@f`vL8<=9R}UG6x?%oO!sv|KfEpvu+D(&AU_KDnj7?=*m?X%Q8q)3 zEC9(pS5Zf1LT5W^THAcmOiR?RpG6`2HqA}4PYoJS;yEhLh&Zv7rP|{@E?xmw@RXYK zA1T-|30Wn+Wy>)lU-4G)&wL=;`yFt+_q6Mm4cOaU(J9(L=}3!6GFFGH4-@rb3~Qb6 zV*JE|(Gb^Iw!VvEk`_lGq&^P0K|pj2o#zqJnfqZuF?9n5Tnw>%Z>7K6w`SL45(fxgNQvUC$ zeT>RKi(^@AEFntxiXhPPa9a_o$jJhTCyew+Xgp~5W=k+2OQl&A6PKD;y7i%GYjzr! z;8FmB^(ss6l4Ex(lZ3o|u$n~syVuya%Ft$hC2 z1B@OW5SaJlF~UKopt;?b7hgR%FU1R<>6oA9TyCNRq>=AJ(oN=Ca~587xABB_&a0X% z(gU`^=(Jb~f#Zw~87ZL7{RgS0D87UcShid_5{HXV#<`6x9f`gQ8q6;Ahl@$la1nUc9hnW)Q`2u}O zet)~07KR{wyY1d?X$nrIr7#h?boaNwr$E0s?HdZ>jMv0u zd?+o`<0*BI_CuPt9(3F+d=4*!CkKipEf-*&qUG2iqOr9G{!VCn?gwwnZh-<6%qyDA zdrOH#B<3za_>1JF%q6GNHBKh6&qxskl^ZqQZmMOchEP()(3lI=_ZDt@swwqEFyt|> zu>n|grBEJ%!8;bh8!)A#NK!vPuH!))^R8Wf2PN!HEs^8A8Ho_}+c5OAY{CXVVp1lH zMK|Ur+sqN+s&ormbiQ_EqMtDaId>#=OW4t0+C^reuUesBgQ}KaV_XD(>ka=nqJ0yG z<$wec!airF0nz(V9R)?<<1t!2ZbfvEVj{rzcB(A3F>$bGXNhmD@O%L9M>K-DY<`q^Uxg= zh-p%-16hSu@Vvjpb11_T-F>COh>At!RfovgJ*wBS9k=XddXzhZ?wkeF;QLc63LvG#W4WWYqRKq2y`HWJlGgXh3 zPszLyQbVps3C)OVel=Jvr0NuUi=m9?8)MhI@=vxYW)?+lR|RAxJ41>pNQTCDnnG>L zP4suh$0Z92i4_#l2k>BEYB<3U^05vavAEe3%`Pw-IWE=Oerw{c_;HyIEXibc@BZK= zkRgyF`ZL@3NVmV_uO|$34anCIX@1uC!D*z`F?Cfxa3Z!3Ou%Ph8Zisogt;SYTvlqz z)E#hg4Lc4>8fDI#%GQ!;nroORyn+3>SQgQ4B<=jtp>iG5TeEdLkwO=bLUB54EC;0o}wKtlu$7I!Mvv> z&%>|cSJ0?Xx0sm!S(^qp2t(Shx&%Eyw7wOTpX!=8g`LYLHye$B zc6OB5d@r;Yz1I9?sGE?s#1PxL=)xV?Gc-G+in&E{y~M18=#iyg@587HzlkquT)uuC z(*`c8jLtA{Ek9Fq{g&ddc`t#7Z&&FT5ffiT5dZH)h}+m38QT1R?xZV^EQk`eY1zIG z-y~3xEc9HfgtXT)9|DaOk(q2USAdRyL~lL_SOQZ&>}I_acO%q6V6X#!C5||0!@e&x zqL=M_u;)AJNmlP~U;PQN)NKX;Z;MZ-W>y=?ioKxA1GSZRzEc+k3nrgoI1m#LQ}ux< zKWm>L5sgt?#-cel8^PhukT1ZY?zCCTeeNSw>S zmR$L~cRuF9d4YtpGnQ{8xKezvXEP|S7h#-UwBa(7qH2C70_DOHilSL^&bf3gzDhEK zbX_KUdwL;8ENJ>5@JMHelHo=CirSW_zNtV@|>p^i2s~8=%OqSu3wxT-8qtdr?As*IDK7r#If++^X zK+s@K^j)f11OI||a@ecjZ#3SOd zDLQ`=~0akMIEI6d6-> z8G&I{6f0~BEVb4K-wAQUwTE@hhq~-Alw1i#mkbo-M|{Dm&S7!Gbkrpdt$g?$-mLi? zAcx&~v)X+xqA0!xYH=U|25-QS&6w44DRKF&?h9@q_{`xtZhlv>m0vO5bT`qk^>(5V z+8*B3P%<8h%tObH#7&48nw=vGUcMdZJkO3QU3Mbk>)3@~69Ov%1-66+O@}G|Y{TR* zp!hUm#czcu^em|)DUQ7rxV}V-ii|+|)&Pmif|;YXm(f&&Bu0#oSX>DgO4L>PD^PV# zonkVXvi3-ijBbj+F9rpcM$t@mSw*`z?IE~vSzgtvFjLPn@Fv+OF|vCBzAiT$8rsIf zB=A@1EgPMLyCfSIv<-zu+$C(~RMroP!cI=3)1~g*)aSpPkay8n0?}Wzb$-!C`uDW` zk@EkOwFGJDUIB!`&!m!pfjM#C-&I~ycw_xpM9Dz}D7-3ON21Ng<-77R9}x=kkMQ$OqG8hULq>pYj_wDdHH9no0RA%P(XT<~wD8 z7-YmMxe2ckjstDZ0|>wjFfu>fz;@51eV9EjFw_t0LQw0kAph)zW0d*7t-t8B{-TrM zZ|VGJ2v*dU1`r{p6y2^kp1#^{9( z#bUFR7rCQ!YRGbiyjdcWF~sJMx`8U|O?eA=xQbkIHu=={7d&iFIK-O|Tw}SJPAHNg zmLa|e{xeZG#~^owUnJ>(0sv6|&qTS}xro@i+sPQZ7+d^j%*;{#uf&MAe%)=^4O$5V zRY65yr5q&%UKWv-H9fN&UKQ`_p4Y0RrcQ}Td&-r%jL$g#u3DA#of?mPTra@itJ~t-ivT3iUJXdpkPMO;!;+f{L>Dh$3 z(5LV z<9Wr>rWzazN0wUabg3rqoq2k2=G==zhd&iPe$|n#)t0_wCL1+Fdv9PD(HqnaDnbLx zvqiQV>px8UyThU%wJsCzGBA{f3A<(Dl>{Pds2USsvI=sJ+Q$}QK9wJ+9i@kQNnZfu~R}wc-3SP2JTnb0*gsy`u`x1?_%p16f zDI)qF)c4X5?}+csEUPdcTI)cRN~Uo#FNq|@X4V~3UH0`asXK_ykPDWqGWMJ z6DCcrkygph*DsL%$}dyNKh<^|QSd2pNt`ssUSRZ+<(LSYjbSUkFZ}i$+H}IFJK@90 zc68_!hjtBH_ylowNXmZ)cREcUY?V_DILMjsY19Aw2H6`qflj7_8!`#@O=;>{H|5~E z;#e(_f#vfehWoCX?dbW4`EL7YoOY%C%A!i4S=KqY9Nh;PdZDxHNhAmG`$9c~rLSCc zI0kpAlDlD8iT`1M(GOKjHU;61xiwO}!W9_4-lu?hgYV_6XGu@o9Np^aE4?~b9)YT7 zg@JzC+mlPhgU>*tSSH&DHfD%Q^90)3*<;A`sJ&^>RBmNPiZ@ZD&x(@ORyW=p3*s6F z!#p!GCJh8iy5UB>r95-Zo@EZU2BB6lZfJ;UWzBW5u%qv=e~y!VmN8&rU$#U1zvn0a z87KdlpD2w=^$MWy&Xz=!O?Ub%ES3mG8KAW(hae$B38JS2jUKzyQXYelMHKt*%PCv| zzfoM1otLy69q>)~X16|0OkN(c2UuroEl~#>ZuSI*3Pa06i^GWdSAzu4Doa_>?>W*K zhbgg77DX*xC*_CAdu^ApYsZFVz?iF9^n>hOH;}gl*2D z-@7_;=;RQM99GXd3^QL@f$emE!Us#PG_^MFFS?EipV&{WeR_mG%4QFYO5)xCtTo?N zWZIKAntUWD8hp@4Ca}_tM#$Erm~WACMm=JJ`2hMb^CqyJlMQ4=GS1@=$R%l>AkvmF z=1tia zC{i;Bm>s9)e$8RWa#f}3%~^r7E@x}dK2-W`ta0lss4AVdX<+;H9r}rM3JrxhFQ&w- zXiEK>N)AVoJ8eq)0ouD#y@v(8a#_ME(0~i0Ii$#JlQHc}Rz+%-Vg;yFY;lyPYD<-l zc3g!}@91hDiFqaKJ~~g}w2NNrMTbe#k%{kOBMv=hyQ!&f&Cx^|&F5oSc^jpjB<d1^K)xH*+A&`DABocb@^r|!wSvK>@~wKJ@G zDAkp<>lUqHCqqAf{KSSXMSW6ITlw*Vc(ZdCF{z1m%1 zYbXDW?AQ(W_>lWiCU3K@b7T&`UnFXZ_3&omI+_OEm)&}NS#X6<6_@Oe3VCuXBx2TM zxQ|>y!pCZd*@w>I?wD8hYgj`zyL*%jMNQVEI~vz+l+rd7Law2&jC>Jf=Uk0B&?M=E z2;Q#x=V9za(0-ZK_x)l+mjeQ2AYxl|`oKClq+fx{QUIP8S6HpRZ@i~M^vI#mEN|jotrb+O`mg8N!x2|JpC*OL)LAGST`?l zJ10%oN1WUpj3GmIUg$n~{kCk^dVYM?AK`WSA$@3I*DaT^sqlC3+hyU!TnN+}i(}nVh0UL_Rdt_U5Q@+L3DZ|p+j$Xi@GWh_dys(Bjh{Pjnax_V6 zg$lw41|g@mwt@gFhhQN`()DFOB2%pfyV~q`Y*NnV5Zj#$yD$8p$lcu$Jm{NB!I(fE z039Lc>>Rp2wb?Yl-XHbH?D*@Qt666Rn(#j!xBmJo2HO8A#rSJ<`p-^dj;gIYvM7pQ ziR4!!QE~|2X6%B8c~t=HV|+$&|u zV`qtDN!6QLMh(`ldA1jIHclaD(!>Uo!{brZr~p`;OaTHHY-}X`E;+&zhga% z823`7&zSNW$bnRz-7i}y0Wc1I?|AX0p(hWYsau>FIDC>a=31q&-HVCwAg_;?v}&~ z&CXB~+$-zsg|nuCEmU?Q_IELi%5(Ll2bGQZ>>-=wL=04W=n9p7tST43|Q`Xa7?V$QlqFk=L~c0n1hw8hHsX)t@^Fr${;ptn?pLM z>+_EtLPHz_%^TAx-cr8oUkzqkouQ~a4GwUJuq+k|aWFr`OYBcQxSga`PPS1L%%5L= z7w0Zr_+x4mlNZqKt4*0cSu@n96SScueA6TRi6c{%U%HAt-y1OAQT^oqEn(E%SQ=d3 zdd(F(c4jvnzd9Q*^K6;8c*x;l-K|CF(Kbl$l`-8b+|vAnlPm*hwqCa4gS|A~gk*5E zrYkozV7Csj%P~lenyE`Nn<%5(@)v&aWC*#y*JP(3=`dJr7R50;aZ}}h9n(HKhIFSu z!6j7`NEMQ#OTxe;FkK@xt|uh^j*UzM4zlTY;%n#uWcktGgc&|knIc1MqC5fuf@cV; z$VMx;24e%!_@NIpIqdhMv1K($II|ggJ`va3O5yRQTufasd~nG4;(+xoxdY_(R9xa( z?^z$XWrNBC(nZVGQlK>kN*9x1BP0~D(D^k)8^oN#*VuC@mXR!?H&DZ%b9Lz>Ti`6$ z0iD0YZ=Vxu7q3vO-~Un}pXrW03V&sNJzq%&=l{i}l`U;uZT|Rs5M@(aLpv8s5l|7FfDMV1FfI!98rMhBhBp9&|yS-$8;>Fa0w5%aOIBzPqLrGp$8h{GGee2+WbDbDJ0GsGrtGEcqR87 zMOHg{ZGyO2#kZv{D#Eo(;L&W;Djv$SogcF@wU7Ep%}jY&wN2W7ZYPZDk88!2Vx#}r zYG_@mdf73~py(dbf$jGHD0{~!%hsh!xGHViwry70wry8Bv(mOJZQHhO+jgZhd*6Gz z@7wqEdB5&&jJ?L#f7i1j<`Xd^VuohZS8%iT+HBrl6g*6$^+zoJeaE;6w;fm)@!H@< ztbJOWnM3oaw^-goj&d(|(}Rv*E}m4hy}wLw`f^Rl;qOFT91dqNiFpfXyg)t`#k#pT z3JqKO7Zbg!CJ64r+!H$_{LKfU?K)Q+39qy20@g4?0%9}UxgO*SE4Q&4MvzoL3u_F3 zcYzJXGR2C98+g^g1|yC?1{%jtJ~pnHk z$WTs;`+PfXSSZz4<#$&mVGP2~DJQmyYtZ&UzhE{*m*?HvJC#>B@C%_gn zgcql_5UOppopWWza)=YgOhPQIMr`DKz^x&fB5JR9X}G`x%>6N+A&crqDx^iil>+fJ zn*92mEDr+9@PI)#>*X&wNym-v$vMC>GXa;)-%{OwxorNGlw_)|Dq;bsw#t#vXsH1j zvRRD5Q0gI4#&{7>Wkrzkfr2_7)|C;83H3>7q%UIp_&drYLj2g6QSt&uPXZwzQUc}2 z%dFO?ta;b+mzTFU_;2H(Z!&en#K)rYHO5$C88HF{(W3j#J6xjzu`)_7$^&nUd2S3Q&r0!_zhTS*+t~DiT)A z;v68DfIUa~M&y!rr_5ve z>=^V-`Jx&vIG9D{vE{;4Ge>BYPPLz*LOTpsxSEnLsmw=9a|&{{1i4@h%^|6uswK$6>QA?2wwfvzTTN*`JTMng=vk-!8=5MiHQB}W)S=Lqg zMwU^GWGu_2DNfjR7(f$bYcOu`EtzH&UhdGTvATI<6h_hSDlXO`cEjqQ2{8ASF6@hL zs+yp|o%+xsTO}?)hpH#unbMt7qCfxqOt^ijrRX<2pAR(1qr?u58<%jWl$^=&!Nn^Y z%law#YJpquA>}qBtC*dY#tY0XmJY6Wx)FTd;2{C6J_xHRCgsr*rd7p{E8KF~wd?@y zIvQKb=lB6{RCn^3#KTj}S1-Y1>1K(GIf&fo*WD1v6m9);O} zyhC%OO@DudC4@dI<2<(wjjvI54I$DHVtoCI;B8T{i#3YKGss&VL)RGm=8Q+oDeLH; z#eehq`RzY5M_kvYMHYa&Bmsc@Z^v#VY)vg}Eu2lH?d5rTzE0xPhMSAZ*ThmQRL$UM21k|5$5ZQ2+9Tp;}YH}(3BlaKo9Iw zAPJ$PpkNZwKb=*t(cb2#1BWo1!%oS#ZZCxhJB`Sjgda9TB~&Rt*1J@(!H1I?3efAw#yqSwSDs!q6E;c>RYZ`=`iCm z5$UwPGt8UI9@9m*q$6v5hHh~dStrFFvrF#XE4SJaPt>dK=OpRTx$8QcjXz!;VAIZl zm7c2(i>u0r*6I4!BJZNCQ}Q~EBHE%|D)^Rpw+p2eMLF_R`tiV*$qdgo@Rg7EJE^Zq z_n)f_@YMYsEYaEsEX~Ek4K!zOKuAI5Fz@aY=W3$Oux$6EgGB~dNf6A`f16Gtuvv#7 z$1F#wq~Xe=)2T>3>oju$E76!p02wO_fxC^cnII+# zQ{M8ujjiq~0Zj_m&s*>WY0j(;!!|w=qzh?xISy-o90Ii6QgO;#YKOqV1^zD771UV^x_%P^X`7Z25%@x=deC(&d_uUh-wCIo zU+$P+EyAZ@GE3eqccPg3Nk%}Xj+HEyneQ*>oaR~_gyLNhG^xe~{QtDU6+1{48GsGW z{&&H|-`b#rt+R=vt%0?Yvw<@}H|ie~VE+szV_^me-?#HP+YGORg9GSU%zg zH&6wPQwAbec)lMDa2mKN@E!=>lp4SFMf>+48jPZVSJ?{}pS|Hz;p1{c zedw1#7f$4`Q`v2&Ha3cy1PG>ER;e^oqxFxw^6rt!wxUZ2Ro?~aGA_7$@w#v*y96mk zL_cNb^gi*v<`3>X0)sK@@$MCO95~EUX&CH;tG3P=QP;&D)TYyoH+d$%pekY`KKNY82*Cls9m$Vi6F+9#Ys~6{L{-U7I zonA$f)lO-lo6HoOgw0{NgNeq%W2SzQo9D+!gVhr91~!Mm7Te3vukT6^^U-t+oyYu} zUi-AZ%lBY~P$BEHdaPkdjU0#G^Q9=_x=L49C#TqW=1ob}DV{`ti}>-T1qaAoxqu;D zD@Tgt9MQu4fo%o-K}YCC8m%8UK5T_u?AB<+WiMq0{cEw_@|OCQK6mW7tLoVyerTIw zWAMjCacNv41ho7KaKqlj04S2#m-zCHfZ}jJhbfXo9``C?okY|K^B3-!t*R1TERPr! zn=>mL*sE`BVS@#x>mnOig;Pzv)g&eE!*FW&BTKO1Uc(rLzCG&`_2FvySO&hStRR;!514^>kVLmBnFz$!pelf-hk>(h(eXK8$7EW0{Y- zh2)A4JLJwo+U~Q^)VQ(-(YL~ju4a;nWgAo0cirJVyU1u^a?4*{_9VH#%d#=dM5h?l z*_R($Cff!dNVIR;tgvLqcbalAZYU;BG>Co%1hlOqhPESJMaRiYHJ7Y$HCrjHeSbLRGz+WG?X>TXJV+uq4IF;8E&9CTN70qO7$Jt zoI2AOXbl7NEHFltL}TQ!aOdlYr!wKD#3c_ZdzADCOof?v6u4e?n-3nqsDSV5;5>DU ztwt2`@M2}^{??X+i2JshS~F|n0F0u5&`Wgn%Lm_GZX~==N8mMbhV?-1K%zzL%^6|6 z=$(`V>;*K?Ux~KLxJ&;GD|`e#>VST}>T9&my8^_ajdcxF2#?A8*KZBI*1m@q{xKCE zq`6y;5e<>lgDSW{_d`drN=EqE)7yqG}N^SijcgWLsQK!3a zAC&QaI{jV*8NnD96G=$jf0-oq(fVbHIoyL6EQ9x^-VaVCtq1^FZ*W*yyb&AaD@Tu!{_%+Mc9#ebwHM z))J5D;0cy86~H}J-!vXBHMy_UU~F~Zqn{^w#*y4N$>X34Z{G6DyDAk?XMB__|MYk5 zxSWkb3&3~A_zij=n0;3_EYfTpYoS+dMdtbZI{~NO504Br)xv=sx=_i{7zAmsiTH8y`9`Z zCxtmO8kEj@gM^Kx%V-^~Fx$h!2FueU46GrIo!ZONe_&1zBe8xDI-5p~QvK*Y!{=}j zOHuu9HPzU#VMZH#5$7(n|T*K>`_@73bk02u+?&rnFWm03zs+p!&iRQt#P?tmn!oML0x@LvFl{SSbg zJKFWCX4nH%iP1sR1cgy!`U~_w0rJTj4vrsCeyGO(Z))a$2ax}|wkI{*y-*f$ zzq&RjW$V²I$hsEn72?0Tf$TSEb5wL(C{qk4IuqGSrGiAH#&9y;Y&l_A*)mqQ% zw5Bc<*9!Fy)TFnxs_eX8$NR%q(OF6R(Z!xHXX!Ziv`a*^LH%*1)ls8&q~mdUmp57XiNB z!VM{YS7f#Mvn3f8>Ebermrg%Bh*y;AMC6Y>;M$U3Tjm<31$4$g@=cZd*+DxVgIDpJ zSiGbNig{P?;W2CN3pP&}S-sMw!q=FDE*XhU9X%;F)~!TA-eq>h?Oa$E>J7zQ5;V&R z-%2(XIcFxxpR_j``d3VphmCsWT?ni}_u@h=2FoaO_%TX!^Ml#yhYu`!w9#3FH{mEX zOd7#GX^GKdV;CdGHL;+*gyE2)$ak!JXpbK&Fr(pYY>bL1mIUU_&zfVMFn;UkG&o zE+)=~q3SB0$H71Pn$?0D3fiygqO%e{^oFW9PWGnDt?Od(8!z%_YTPFQBAdld$8A}H zpsRdcyugCw2p#eXwUps99)MHZ`fZ`X%gdj*Qmd}im6o#<)DEert2D!;wfCW8TF_6x zjq3KDxM4LEO1ew(Bw@)HH0!H$et-vBZ0xh;A_k0tl(?t z4aH=-{w1(w&iOA!8p`0K6_v0tQRO%th9F0vAR}he>mKnX>&%P>wkVx^Y8!{~cx%f! z6%1H@1UEydjZf%Ldg{ILu_3jyJIe67A#u3)6oLL#MLGF8Ucxep9l8Xn)>3|G)R>VP z9)gcB!>t-}eK_%-K`nAz>>;yCFP1$Pqwk zvt4c#+#TtfCNa`leeIx7V`KA~{w$8z(?Ukgh>=m|rPiA2O^n|3Gl&=L++l7}Bo71r zbuAH58AGicY!Y({jFoMW&f<7%!KkK?x1^EB5onQBkf5${W`ZLp*Jo^!@f8#rbT#IN zcyXDr(sy~zgu0{}=Ff4_ z`1|BsKH=pq)_p2>j5+&eZmMF$Y?jZh(Rw>6T$(dw`QNcm5*v5zX|N(NfG15W=r?dD z@AuSRFh=HYtZ=(~?OfjBx|bC~JaBzuz4U_Q`Lf2xd*Z#`6^8Gh6SS@7Zq43<-W8(= zySvA92yuP;w_PrJAGtpL%s?fJLd}XrCgt-N4nc9h5(CGlxDeg`#N?kg0C}707q4z| zU|mXp97kit`}9w+jOOnv&KQg%uZJ7mC8%sPHgoKu^o$u&Awp|9B)9Hu_NwdER)ApX zF5J?;YYk)Okq~_R;mMl6m8_T=_T#BW_@Umyvbz){W^CU}{o&S0#^e%5>h*hMDTvO| zIga|yd4tBF3$B8paGomu_sH@f%r5@EAh&=EqY?^2?5F~|x<}KZdd{6S7i4nJsSUo< zWg45x*rfR`3mk#pUa3a~quZKUde4SLG_)=P8L!DOh^``sE<+L->ACPO9r1vXsd2nn z(V1r7D{s^mQ*(Wxx0TdpaMM!qkPYmk@#@^#Qb$o(kW-nkWUdt2v9ybycCWNhsXV*;mIR!e?dTYaDMM)Z8RYtVQKrnnnAnu$ z>UH%a-|ja@_U`b;D&}$Pr*mBr9B_L`{oAB@SSR!5p(T)zdy1C@zD-=hd~(c}rdgoS zWd(SkJz*v;jK>FJ+c9%Tbcw;SPp&N^aXj=H$LHLNKeb?EL3bIzx6w$y#iRf2NY zKXqZ=lU`=k6YhM=Y`czp>(5QFx^5gtsu_Xm+O5VARp7gPMdoWS4-O!QNAi~7W~hH` zoei0DOto$-@8X#$w+l_RMwcrOw0B1!N!GoYX-?+VbBz%z@)8L2NOAsIQ{V5^i3 z9TNpoeoOe-vL*oWO75sM3G5PqS08BH67Zl3k8r@z+bl#A*Bg#H7%RktD-wQ5ZpRhU{oLBDDsL@C5PL^XiqIs#?+sp=F;_o)H=EBN?`RuMcwtMeL4s(-Rt4 zMmmQJN<+$K$fYw7(&-Cq3xsruhIDMgRa0^gxR=`H5-!q(Rl#aw0b!|_uaP9?^S3e+ zkG)Els&t{|k%o$QGqwlxFZxjN1T*$WiS4TWaw7v7NB|irA`ze?C$LeUsHTPxhMlY? z{+7>IDfdk=2JeAN;0*cK0{OR6H3qkt;nu5*Gr?G9$O$S)X;`MjTyD`wd=Zn45Jd3{ z>lF!M_qY1oUM8j_AQm77WRBmVlGU19MGeg^Gc(okJtK$Sch+1 z6D~vUmO5SQ`#g%oZVw}yl31_|&9VMi6_K_5 zF|sP6OEmWR!@%Dh z1^Z$YdJF)-E`+P|4OwhubGj;idRsxYh0SJ^P=h&!?FSZb*Q7HM%27^5+nHNldQ*U70K2;Qipr*Pd9dsKav3b zoGC){EJ3#oF{G`g{m&~Bz85(BFgq~q)^d}f4T0uW%!X~srb1M5`JV`wS;XWDUB0wE+^5rX?6ni1RyR1_*Sd%UVd<>9|!vY|CD7pyBR_nF?`ZuYX9Ef{ZlK zE1ERwwVc$Yn#CVXRjqA&ylx?BN_i&FHWl49lfeKT3i5Hord?PzoIWCspxJ6jLFLa~ zI-@kog52|zUXEmMu-uDBBTN~f;W$ z=QAF5X#09g#4mc6(2N6*>*{t{f{0lKrqcvc8PrqA(9fT`$3HyqO*--Mcd|1!`^|Bj zO~o=+jodPP|6DM;UHnA?r#0OrcnCnykOBJA{#F9#pSf-pG9j@WSG2=F_Y<_8A>R;3W{Dxc4t#&vaZ^iC2?cu^Lsub=){~qL+~rxInLF`1 zeX%flTH5hrc&Q;8?o;VyCBiw|i@9-7@VVmYUTu*HRx4nK_cJ-NiNt{nI8b-PUK%`i z?5`q=^wC0>)76Pq?)_?K2FeXWSy&>URrM*SF(^XLcY$_2p*#{lgKN_`{tI_?EJ)<8 z4qysm08GK(ws`*KNdAjZjZ(6a14wH5TrE{jxPbS;suboR_V>Umx$l`k+=06eCvZ zJ$d1fs4=XOh7zelFh*FU&_K5&!HGBHj0HiyWqX*y4VvZMn`I0?V74D@z&a;dFKyb7 zX5+jE7}QqTd9ST{&+Mshc+%g3n4WB=p}U@^s;taRG~x&qs@NnSJ??e#8~PaxJ1p3+ z4$sbGxEr5Tz*yJU%2=ep`rktgF!P6tupwYyw0%y9Oxo|miZIGKHmblv+MAXcDCM9? zv@V(Z$F&auxjS^#=o1v6aNt#)il5A{ajIUl&crJ=^QQ&X7H^HnH7un_-L+-kjzL;` zwkg9-p|SgVCefjPb-xJTZ2)B|?1%NHqvxQNcOxktP8EEyYXFG3DESS|TM}_18Q=@~ zD-X-mvLbU;&Xt89!>k&#xJd5?MYp#U8^K86QX0&OGeyE#7H-yUz!hJeYIoU1uU+}s zycF0Uz52kEe`L#6nckM^ajZ18v`TC7Nvi!=%|Jo=epi|$2hE>_SPu=d*Ald@pIUIj z=kCxzx8fQ&1rVJ=6b`mlK4J>Y?sJCk_K)={Crt{%56uupe4hFC38(P^6off3#^cRh z=Wl>%;PK7Hz~L2UHobeC`yn$QO8M@FTd{F(%-SMUnu9@7j8CW zm@xy=hm1mtRQa-Mn>fa$c#7>6<;rNGdw*xJQ)&ud2(}}drXEk<^U=Xy#?t)nVt(lY z>@y$GZvVHs8~g*!TLc|VX#adVzV65WmX*UM!I1429HbE0 zKT}h(%O|Y(r~@k!92Y$8-J4V0RweHe;+b`WAt~aNLhT~oSYY*-gjA)ct-5A7()DHxIVZV1FK;MrL%q5vPbSrW^#^D6KsRIo(K-wNB~_U^^BCJh;iAo2 z36XRD=b%UTI#Su_=mdj>WCXXC_G<5|c-kY_T$wfjZZKnrtCo!|2r1S$%w&Wza(5xI zE=$s&8_HM&NO}E`r>~UtNLSr}C%1JU<$8;atf<9I0Cl zaTFvjJR`EW6jm7wG$WayLcoqnNIUmp)KXTB7Z|ZDBuqW|gi*UWK0Ym1d@Xk7Ad{+S zTpw}p!|Tw|8g_!L7o+*b#i}3+&(i4<&$0tBJLZv`W8J3rDxwNA4!=vJozr;A*)M|z z&$_vr_$HRZiw94b?(ie|UnQ&fE$cr6&3Lr!NF*~F8sh1Imw zy3b9qGyxv=zb^m)>QVo`SCma0Z7gg7l_?Q7BNM<~^Y3b@d@rE;8NoXpu*_oC z@{z)SN01kzV2egKT!_%E_MuA#yglC5xQ_BwBMSuH=NCSyY+eo##8qFq`~4PQu(!9T z&v*OqO9)zLz^wHNB?z!l6t|i=E{-h9y8$C{kgX++j!FdOt-733+QS^t{L7E}(wE~I zS0|-#HZom|a$KDaGv{Mz1Ju2^#({qA6w?ziJ;doTG^3>5bV`E%KwY`*@k8Mmd zs96^U2ho%wjosmZeXY&~Mz028Ys!pwZe4@sXmLjyyRHeAH``}!6wjLj`t$sQF)Dw* znu1+{w-d-S1uXbn{TUe78X^o|jG4HqrDZJb;j#sB7I@jC?czth1jZWMD#rxL1 z?YWlop_<8Fn(R;bkX}?G3=J8GcFpz$0~+XJeN!4VDRN0@%p=$n;RHtvF{CmSTHEgu zRqN^g{1@_-AfC!(9e|)p0PR24zdJ-(0~-@#WphUp1LObI0xo0XZ16w-|Las6RW<=L z-3VVcTAmLc5Xp9fa`NlCQis9<0w|#rwIWEdfdeUrZ5Ht3xSUl`*PrwqWrz&%&lF+4 zG1Ge&jRIB{f#7<80YbB*r}Wj;_U!I%Z*Yb{3)HC4pd7*~Au*xwf^e{h(auB^c$A0n z%{K-(MA>pavr!O;&`zQanOwug$S;QKHJ?AH)k-d8Jw zQI_3>^T2oKG@4-aM|W5Cw8GXcMK+x~s!aqqpv2CvZBvv1Y-SpLw*rG?0lEP*TJ*{C z0mwxrZ3!@Tc`j5X!p}eywI;zmk(sD4G8Xel_k)iryEM0pK$kz}ntGwfHy8_bwqRP^ z6C@$c0>{l-ch{!agnhZDam~0zF4@DQiD}qbr08VWI2Tc1+&D(%1Sc z1e;2AR6`kHlvjAqx=2&A^PzLY;|PM#tR=ZD6zI2Tx4nT5H{i~blCTZu(5;CKBh(jF zG~~0HWlI<20>`juizfIp!tSTe++ZYNWr@#5QezRBo0)Up|1^G33eIv;CeSIyP(;6= zeY{0^Sv>pApdGx@YL*x7wkUijcG$x5l^Ir5Cw;ME8#QEUD&NFIzEL=azU&gbeVYy) zcMp`Jfr&KhE9dcINGkkRk@gZJ2hn_*z95cI&ky8CBMv>nd83L2s_C8ME<`>`SI3ut zo6|_&1&_Aux+{f~cTWB831N)n+42Bv&}#fM+Gh~C9bzblH3@D0moto!eSlY5rYxtR2V=6=^7qd-tBpv@>O z6z1Otf0cD89esTdPf>~S6}&-!S{I!Ej-^^qdQHsY`^`n^&{Wh@UOi)&V zxriF#j2y&;DtDQuLw`;X}cUSPOL%}{Z z=gIP^?S99FCyVVtKj=d@d5(wCOWq~K>>}z{bK%JlpvH^cwPNBSO2JE7TsC5NOQ^DLxdHrw$U2Q^5^8TjIuOmvl-@Nx=0lD%!t3#6oF0a zTA$#asESXgcQ(Q0)}lhAUB6_~G2_U-T+1oajfc}1WF5e!;&_|rtIBKS_f;1zcWu>Q z6V~~#Wwe8jE27JuN5H2%$~4?6gwdpjguK)Y(IJ({HaZG?>?>PW0S`2HhDV`gZf-qE z6tz4BBZCt!Ud5uLTh*v%aqj8dwO_(=xR34}Y;XcCFWDF4>9qJgFK|z_(sUX+T4u4} z$i7U$&(Esz_JQ@XTthm|!mBHqTdjJ4h$b z*SbxxKdSU8nYjfONy%UW8#B5vE_{Ky*b$;z_4<`vvtJf{0qi6$PBK;$49sm~CL6y_ zrdj!5D!vhTQ;Be?P<*8-G>XPmyPRk@AKf%S1H1<&D~qZsI{Sp9YM+#>p8P6U(z{}0 z6Bi4<-g)k~hVB;jRe6R5EO&+dnl5ZR{e;gh&K;q_{a0-RnA7X87rh!~Ms!zPIX&1~ z>t4*H5t+v8SqJ}Hb%v0q!wI${kM%{YIl{8#D~^zk1v9@H?N10c{=_`Ax3aZqYRbhm z>t6bmFrQ-O3C;AocxC8fv{My-JAKwFz{0Nr_qtH9*P4X4W$tM1Gqg!WzCYj@8|F&9 zvTVXdDX@{bG|ufWtW(>RFFVw0eCMvZbngp2T-ruE0WC{CAvXrVo9agTw0wp;T-tJ{ z2;$tU?8e@Y937 zJ*;=5wX*JZ+O|6#3G`vB*5{ zFdutpTjSKpX7$Rzn^PuGt$m((`kSY;-M`9(uiHcD_mW7dMOl)$GbFvtb%%?PL&jj+p8OXX+f7sX?`glhG?U{O zaA4m7Js6y!5CFnV0y{OdQI&5Nyu8DPp1h*)AM&IN`cpob@L)NXU1qaYg>Uvj}4{2VScr?X)Ixv@X0_ z0DVuLWH9ziv3YF0r&R=s`B=4d($dsj^{rwi<5=QR^^ImJ13m9M?}~T>_VeOk8a#~Q zX~K-|ULIUJ$5QzuoAr^o8^}uD3QMd_wXb`zZ|f;YgUu;BqFp6-u=y)TGZ7S-Ajb~W zk2t8DJoAU#rn1a7R|c`+rpNc&2Kv|wF2fNU-psHVVlKjT@U{e|E5#gW9wzHyIDyIm zg5N;vrvaBlE`j$O&Zf0RnMG_EVjsp6-zR!ypqxs@aP_Ai8BIt{tCUXKy&gJ$#CV+C zMo-Nr62N%Jo_L~qtmVUrpdT-mCbiJ#J#PS{Jx9pN{uH5Qj-FVE3S;DJxxJ_ko?ylB z=r5o!Sl%thCe=ii03NF3cJJiroAxXxF%KNrQ)Y1((cxJRFyc(i{)~q`MygFt(XjYg zMwa4)TE+(Hk$P>C0X{=(5$3wo7@Ym(K9iLgR16_7@WhOB9z4Gc+<*VNGV0<}qvPtb zr|wCb-1ADNa$M|j&KIvZhdFHU3P?uAdjCia%!Ps*92mtea7IMx|aiSr{!+8cPU4@ zPnfVvfKxg%r({9~w>9x`T`p!GeZ3cdHI-SKFKO81(TtbaCS3b@F7YwP1XwpaO&qt7 zFw2xoVzkAjI%Fc@EhI_La#hfKCB%JYO4t^%>p`#Mg>vqIU)Zf@QL~=VLvLDlheJR; z{k<-pAgm+_L_)c;V~DoL#ziWU2FjQeRVG|?h4W;@yo#vVw*>ZkeyjU{-^e~& zIH{p3lYTHK?K(mfF*KD7Kp zqD}5U)thd5uV0x=?Z2T(`+ zf*LB<$!SZ{l^!h8S5KekepH$XO-?hfDvCjOb$QfCx! z$j%Zwt8f~%t7>xSBK)cACC*i$>(ok>Tpj?UI;!CSmZzIel zr^?+}l1XFmj@$I1?o<+?i%QY(@GhSPNCHGZ|-w`h4vq7YQGF+5e?3Pu#!MAfFj*ve}c498*zIi^IR!b20L6Uq6b zO+J+|0~)vP{wT+nhSd5vEJmC_MD`<2gj%#PM#c3V(FpNy&(-!BRjdS<%8N{1NX`bT zrD2%%$D3&Wo1E!Kn;YVx(oyKFNb{*FJ^kQ4mkbQT>7;p;Hy5w@xR;Uqoe3`Cd*3@u zw(bLM(zP@D<9fQ?77|C4ZX@jsK>1-zL6z|_e9zu!>F z!VJ)0{;$`xkpozkAVP&K8@K5_!|!zV-T>W=2z~@K5IG`fs4cR3877C5pfsIcz57`B z5iwA5sR*x$0Nqq>k98qHfb`7Y9_T^8#d4I6hKqP~_vQhkq_o?l7nn$qXIQVH2V*b8;S zqE%1nG5N!4=riE_fDL}yk7g#Te0FTqgdJ;t)KXiPS{qrt*tv~KLI&zq_`LH;KtO`U z4O|eH5oZ}-pz<2);P3Jn2KWELn0J2u%YYvRFqV=TU}o1Az$kJ2Urxyxu&~*{(fHqc zS^oE9OH!6{LQ%osWuIrxJaz6f_DAd|h5?C>93c=zfEZ)ok%m!_4xBS|nst1>Ff>Z~HpKsr!24<#KUf@7u&q zUH~BqF3g@cGmibucevjwqrHS)=1n2EUV)g%Z!1HH;IilM3fS2z9Jc!f0;WsLKJTRh zmX@#azf>U0CH2Sss}~LL*TTb-4<&oVeFz_n?IlFJR+|Ggx8QOg*liR5B%YJbe zH-=OBS+gh$>z8!Qg%Y0*N`0!(0{f)}sbGrmCwItuPEsgx(di`nc-5Bvw#)-&zMZZ> z2C{GuD5hu$sJF9x)r;c_3mAB?L^tlFUE_QUC_byjw=8o)TCj7zK%V{mJNq+QS_O4L zQD6f4pkZOR&Q~T=p$@Btf+i8p?k1qQtYa-BS_OsmymoKU)rPFZ`UaLUi9^bO`EG%8 zdj#W5lJ~i@gHGpw;bMpKXjzHeDRF*gC_-0WZ5fwZ9{ZUYjP>2%SM%F+m%u zpiA=%alSJz;&O%3{`o6qV<@l3kA1t?vjZCu!-v*%U&^n~w@E_(T8jb&;|9Ajum>e0 z*`)msIjVVGKW5{AMRX zwLrVB2yZ^_@QEhVvi4Gss|GYu&2JG0fazWt`WO>Hvh}IOnq9tK>qpizOzl!8cok-L zR6ZkkH#3SyE?`x(pHE|o>(Ol;H!J(Ik9qA-TJ!XO%S!kJHKO;tX>PoAy){GR54klS zJnr4LMOn+>S4f|9cl3fyqb|1F>ivWErrOV^D=#*mf6BVzy6nqNbmc-C$%yzIu;3ea zhWd1lL4J=%h=eH;AEj`FcOf$%+XR}n2x4T7Aab!!l@!`Qc_N2WftTrh6xQV8Gr789 zWeoe#Toa^O$KU_*2NBH2duB1wjOYg#<}AFJaU41@FDd3Jr34gFcW)wfJ#>OiQ4|_q zpywAYK;y)Xij#lxE*+|1HBqA@g64xM{HcNlmpKAAjb_schP;J8keMUoV*5WKj;V;m ziS8eR3IHK{uK!2pRWvcPZ~{~?OpO0`_svwDR$3QD`BY7&H3~EW7lH^vU_j84wFZ{` z4T41=YavaDBAs(7GomHWz(hQn_Gj)ZAj9LF6g-tOa5UFf;HyM-d#r9BL711*t<3ZJzI~uxi)F*VO9?#IJ~Gc)f1jocp7<((MPJ* zJk};7YdKrjqEm7b-H#g_mc+Z+m9>^60BK#?ZWSn|6VB;|CV6$nDEjt?l zF095HU~uEeI{7w;6roX;&Uzq9Z}x*8pM9wI=y#H;^kyYqs_4(zHx$~GYe&z~gQUCi zex^WBbKeL%a$~2%*e*fp^u>W8Bphtq4N57$&~MKU-&!5$Tyg&W=q81V(5CY zt$NR2P^!~!NE`^gJJ@48?_|u)PCO#woPIR?@(R8peo%xkA@zn+F;bp>VC1$P!YmNi zcby$q#L&kIR>#V}z}uXPL~qX)0H3*Zk*`TLH$eP`yjqa+f^+91OL&&m;9YCS3{mGL zC6sHrJvUkQE~EjWlb1DElM-}bg7`F|M0LmM0d##3(|A_w@rySPCrci#9>c&XvENIo zWT3c0?DR%A;qlsM0%kB0Pms?7%sV2LER%@FKWlgGCv`xA=`&*>h|lXd4nm zENP*(=u1q#Lx;K!NTITpV)Y8-b;GR_)6lH9p%c;X2tUJoG9re3-hB6>*q`|0zoL$f z^ffUBkR{q?-_B^@_wjjy)5nUL7vFb;Pd{bxa1iMzH>`Txsr}lw%&pwyr zQn%ngNbnG+T;wGS<1%79=S1+IM3 zD}Tag>Lz&n-pX&_!6)K)8U2MOaTk~WEui5AdqW<2Lmv0a@JAvTv;L&mSqmKd`JUYv z4lefg_sg;k+QA1dOJ1FE3#xSg(YO2&>`JSQKJ|iHxBZp@Z&%8KGZd^fS$(Eg+1exh zLgbNMI4}a=dCrUj#CxCwiCfRH++Qp7{}~S4r@b7V0G#j+>A$IW{Z~#07^eMaEdMXB zD^x#s#WBJ4*}5=cOCK$)wUEKKv`~^}gY|2YmzZ0tky+dMDV5Gr+X$7gX~8sUYMPb? z8|J7`8tNw2p%Ual?EfL~T}zTT6rbkU?Ir-f6IbvHir3Ima? zdz?FA`|GK3`TIpL{r*azG%Nio2b-{(Jy|q^vjnCZ17Zk}u;LU|Y9w)VhPJds9i<3u z>%_7?1pG(@ExjzU%Jf*2-1xW|3XgICtQ$wL*q$3ow#p4Bpq23^Rls^=aiYT140J(o ze&Nzs)(K}5(Hw_qNQRc(vIUv4-@)Yyk_BgZ_^@xO{k_=RL6Ff<_t2O%_ULS2Dcu~>Sc^j$MM<51D9W-Pd;SaKCpnDj zc(mliyy^rTPtK}gM^h}!#WjcPa5xrTH=-=UCW9&WG zE%!Q&EG5{yp3+hgv>@W3}fnnEFry2??RK|-z)yqFQ$wqWvz}y%wlHM zxX2_2F@)w#4rnOxMFp9-snm5%%nK^66e49UKpn6)|MI7&aLYg@Pnc)D|G`(9hPnB~ z#+S>f7-q{n@Z54W7Gx0Z8Rt|NB=bKgd&lTZw{2^*DyjHQ#kOtRwr$(CZC7mDsMxmc zRBR_VYwvSgJ9~ZStoz-cZ)@+r`OMkJ7`>0)y#|q%wc%9bB9&-Iu$Ji2Htf`0ww%+u zwJJUur5p3|xa8$DG=#WFH9E|L-)c7{sZHOWVwGwbUVQNoR8-*GlwJz;QQMUqb&JEb zmZ@S(JV{HBfvC>aB(w_}NfbRHWa@S(kNJb}639!mD>q$)&er!)>K?bW4k9q`1o>T(nJc@C9+453l_0n!U`~Pyy@oe6Hx(hN!2ycO>g9AB z7f9NoW3!TTkOr#Jar{AFmkH2StFqHUR6PHpPePrwK0v%ilzx^KuBpjwxT^BYosQsJ z+Mf3eA}NL=B$(`KI-Q(Gc3sgut9NAf@KRUE!I^44OYweBCC7Z5Q``=``(ymdaE;?T zg9bV_XJ4C*My49MGN9Op{Bja<9#wjU^tXYOLiAJmOF-ek$<6HbrqbA$-f$yZy2m27 ztcG$q5(tmdHN*JzoffTG>fB<%X3}iYU>$l$h;uNN@e2J*X;6~?QCfGRkreU`kWCU8 z9O>23q@FQlF-Il$5oGS~lpIe!)wLoUd#9C$gLX0OBY_GX>@1ouJEA<-A59gLA?eRT z^=X$MRH0X99O};<0Xo1+EkUO+DV1w2)-)OR``!H{gZX{9KnV_JuF>%MmfITcj?zB-1XD zo=P1oS^@U-cvZN`;vN2`Lq>`*&K)g+Cwp(vulU@)B%Fm9o;|!~8PN~=I1A$UudO5t zJYO=1Zy42h-t;rI383b)v%JNO(`3+rqX142w;HA$vnk$Q6llP3am=nS%82kCTo}3i ztFVA*GUk`ft%T-#!L#Vk*u@9|zIBEUiHq~uv@zK&-}pZ*(XK<~CBLK9xyg6f0tHZx z4!5Y;R*u>308!i$1}HU243(jR&>P%m12TnvV*zjFCSq@3QjmxTzE8~|A6x^L;o6Yo z1)0*C!W(XZG(qkLuY#+OrJWq>n4RIEJ;0vXp6VpLEbRmnaO^8sR?*r+hli#|4xy8| z2N|&G8q~O(m{QGwCY&F_eyz`2!=vCH7cjb5)l+l3P@y~UGag|rU6L8vdLTN)(kjOb zN=29GbZ<;>-kF`9egsccr~59Rmo3pU+fp?9ee<&;qe;giqzmCOcmswE$?|-d#RV0P zNRRPdNwtf36I+No>%-%iL|kV^(u>71ardNpYr&$HS#NM4>4CDgUn#Cv)bQ}4nANW+ zWu-PP^C<9Q5MG)67fg$!dulvw(@1tSYc{#aKBT9iZSo=+q)NhFD!p}m9h8ijG|K#? zfV|BQ9F=3r6Sp5qPGAM$aFL+oTl-qdliEBJanl31eWlZ#(YSq7SbY901sv7-`Q2`U zQ%Yg2&o1iJ$@HnZnBCc-@7ZXdI;m4#^z&VI7jKyfhgQkM&x7 zqXZ^pb``#xsQ8)&GDuJs5b9C|(D}b|KI*r0B`%+9@B01{F1!w?&U-de)Kb^CO0G4_ zTBWnx`JMh`ndid-NATQL1XF8AWJJNXU$BW>WjxlD!=XN5M?8npH+2og8DiYx#{lvt zV=y4~`EPYQDU*KnUkMTMUvgaBf2`a8T_r>*u6@FZ+7#lmUHd>?kuNd*@)DcVMmyBmulYJS#A6HTu zmnz1Wva>tCsq)J8Sz-cKejLV1@b(4$a2zjR5`s=)q%>_A&!LeZ%ZMM)QfUFqgbx`- zaWFxvpyDIdQ@0A~iOfqaA^hw!WLMbSkG`ceS)RSZ5)q|oo|L+*qVc4gsTgLcC>+yx zgP2m&Ty!YM(CEFonFzg1Q@+Thfu3qGsxp*V+CrA@tZ%CAzVQ^H7wZq)p#(~ zZ82OTt!}myw;S5pgq=j+s!=oysZ%DRl4Dq42Nd3YHg%Y-C2!a5vY;o6H8f!qBOg<& zfi{<|(=B8y(h847x;$`20`nqdZtER;ukY)(a3P4r=Utlyv8bo+HfN#d<+`__SH#!Bm!oE0fYoIULK?9>#ovI3UMPuLn8P3ny0|iA5{(0FQ`=a68Ae|m| z3&G-W;$Up;Jd zdlptXa7Kp!@?Nk{at8Q^N%#lF5b0_V_f1SbPiS^GuWgp)b1|(b8y#SW@c<^lE1KNj zI=kn>OK{1f4sCLpgP&wN=L{~RkTU5#Ss#zRe$E4&uA^^$JnK*U&5+FGMQ*yD7Ybb+ z`1wV(4NeIzVgqJgk?s_n0Zptv4c(+RUZ1u>@^3s`NXbxSw0_b-AXT+U1AO0<0m35F zC%3u;@IA0W#^bG-jUdLLUdN=|_!D10wEh4+yk2P;a>xO7f6mNFL&Ar@aRS%FFrWBe zt!mF#mCXO&K6!g18UdRx10)kCd%b_BV^~_s>e)N~D=OnFY}C@s@Gnoh|M<^;mCi~h z@?T=gk6_EX!*8uWB1vEwf9UQt%6yzA)5RpFdjvDQ+q2}LG%#7t2(g|7X7BGUe5|N{x$hD_O@d$ zcIGgPgfo~Uh>F^1fL4nvlAtQh7m4iB;g-sc5bhKXX_Tb@2l}y#{kE+fa3C zGMn@c@hl_)pq`b3I*CjvLJ3;Pgs1ZKODcKN-@?9qxUg^#On73eu0%0Fq3QQ{p$!>o z^{|yi=Bu@r7s;24yfEj&?%Qeib!9xMdX~^hG3L;HA6p)+GVMluZ4Z~uFbvV!^5G%J zloLSPfgag3BZVIiVYyq5R(wjr-#;b?eMmqn`64>jTj<;;ZOyMtGOUXrZVX$Su@Do7 zn>LY*hVD92UnzGv6i_00eh&u-W3k4SqB97>uq&W6SnC*%k#a1oBS1FG3|!HuN>;0| zU(?%^ z=SUx%U2UTEY7>%zgDF!3n|XJ+twh*@3ywq3EZh5u3X!qh`T8-kOqsxXI+{bH|il zWsuDJfsvV;-}C+bPJ)mZ2MGUIRwRu(@u!oofLQS_L#TLTx5j}}3%_(QRNk?*DRI)1v*0icmr*1?QLAmtiGi6Qa08$jyBe2 z2LB2Q`=2hMqJ_--UsmJP8qQj(n=pKGns#LTUO769@O5Djh9LN&yTuc-U`^A`%yY># z-jAQAKzK9#INLGwnD`{y#jxwqo6`M(S#S8FXCV3>*`uC5~0slE_uRX_d5maTcCRQl@R5kFpMk0Jkzdz}&r23l!n=$lmKOKn)1PpFuSCJ$n8^ zD-%z7ZP#>j_oPb;6JzXKH_y-A4@0?9?TE{bG&*iY(rFVxLc8z)3)`(F5m=p8M}<_n zc0+_1)u>!-m6w9g51ji>QyhxEiNLb0a!uF1qa)^^{Up1<3Hi9^lXX(djU} zkuMtj(Yr9wlTIWRn3aLr--U2qNQia>9NPQd4= z2=(mdEeP#9mhYt{b5jtFW;{|7du@NpR&tpAfCUX#>Q9m6J2;n0yW$mCj0T}y%!8EZ zCI2lXDAHZA?{4d)IXUUvcMOzorSl7U85UC#44jQM<25(gArrK*-d)@zMU zCcMsLG-&tphJW)!Hz~oXo$5I&Wk7wnLzDEl2BguDxreUOy{vVk3J;?4O`-~Q(FN-n zVD>c_H{YC~fz6688JQJOV3-wHq^IVvN?0V!Zxy&K7W5$go`W-L?EBJUiEL1<=PH+L ziPihf*c`jrWXCTnq-K|lWDvZzQn7%bDU^2bQ|$0(i6NWKTapWnbu5)B8H0H>PHYv)sD*ykv-i$gDT8Qbisa0l;_T-1!Vb06{vac`LLA3u5y+4R$Y+qf<*Ees00?{}!T9;liu zd&Dn31lm-<14AZJ6tj@adt09IX5`W$rd^hkT+MN7Hgm#9Z;&Z4xkN@C3F4rik}3;#*b%L$v6&h3&Hons5O zGaNo7n;c1(O&tUnBO%B3A>yqirVKz5$4I0SaL6RoAyHDE!u zSY6`RfLF*SRlVq8`SLZbboC~0Yw$O*1_&s{+8O*{1Nr5T1ktHaR$P6owy-E+Or*6~ z(V(Y-xKHFtavyGWl2+!5M3qLt4#s6SH(ybbt4yvg)cY*R*I1FVV>vS0+sLLgR?F1n z;1p=9d?Ey0h?b%% z?F7b-TBGDzRbm#4QoFgGNn7Bip}_$FMLg9^%eHUR>eyUwfyID)eKZE4Bc;RT28)wI zvyDV7@DaAeapY{|a*MUO!-BqWJ@R~H51dV2qEggQSz*M{i*B*jwArzsW1ZjXZdSX} z#;{p(;ORM6%cDF!_Fk2RWIRRvgx&m^2SiYJXWst$chpSce7E3b;0bf$`B9VD1mlH! zuqayzwi#yzSRQbDn^EO@3rF|&w-AWg-Gc$)eQB_!pbI|T5OuH5y7jBi7{5>2O_<)A z7%(`ec%$Uj-ggzA__X2p0NCr;;rUm@b|h8q*uHjpexqP1a|-i?nl;K;SctRl^^f|! zEeeHYNee}eBy$>@+OR{%e5fc$72++k-ZbkqmA@XWLEcSYQ+n@yfo1O|f)xv~gwsJR zt@bvwl-Fig@;i7|FJYsSy6fhh^|+9dZ;J{=_Un4sf|{hQO8J?gs%tY5S9ms-q&Zu1 zl_y!4cm=ziZ?%cM?P6c6Ko*w8-I_4^v?}gAIY?Pn{NV#P;bM(}g7172{R9`7Qe$7G z?y7c}Qf}&IH*A28oq_!Dr zG@x~u6-lHIReR@YFV|~=qx#KNsI1i;%gC>iY;{t!9Yxe~%hIq~(Z)a&@t%Fcl>XY( zS-N-NeRZ1%J6mGl5QPv>1F-xwyk=sMx*nF&_TkLJd2lhfu(0>Uh&WGy+tGRVxvCf; zq+KV*wGH2@8+wjfpO7(JBr?a!V`Ph87>TPu)|F**kENQwdt<|3$a04nQe|7w*`1R) z&I29)!Ga6IGE?h_dIQ?Fnb&?G2JvvTnHOSERS4a(wVg2Oa@D~{BeaEq`DaSDQ{Ynr z@V5I++ZZPhBUa!zajK6($5-d)UWF%I%7H`+wa#FmCrHALlBB~KZeT37PchqHPG7wZXQ>Z)34ah{cClafUr`hHq+mO#z_a(H zl$A{i=SA+3tsUaK-Ipajb4J{wo-o5m)?lj#1~V%KkO_J!k5P-6YdIoqz{Y{}up9j# z&V3>1?$BvnN`>fJ(=mdbTXrDylGnOJMUkyTceFQ_DM+gPOs`Y`?_mk)sM>j}6Gb)m zHPAKB%(naY#KO*AB+1s>$FJ`Xa{S()LX*0R+E(E+rkRo(Qu&6iTRS)***DZH8M?UH zXUINsP^>K6y>FR4Odmd2MHzW#IT?kHaz)EAvc_a6WD>T2_cN#Pa(DX+ZohrOE!{u9 z*nefx{!d67_b=gL@TaO(h0AH2y$U zNXiJ05W@8r))pq2L`HS}`NG=gCX&_J0dn^fvzerignHrOY+|{>4z}a+aI-arV98y? zA>uw&BHh$d)cm~o-}c) zHoKI5ku%#5{3@hSM|jbxkVq>74o>26iq;C=lgq?#lQ2Jdp(R0G4DJZhW}9VG1AXE7 zg;qciV5dGr>qj|YG|Og`0LhR#kkN~OlUg*&eSc8nS9%L{NJcGdsM;&eEE36tSl24^ zpVXEJE#3_4*O`TWHSYg?@$vsGlWeRFob2t5tR3Z@tiNdGe;?t$d-Y5u3k3u*L@q8d zRQNsM9`Lqy{5*bMVb3ZI<&-FO@xsJ-ktC&4eFkP)*0fxAW76k+9sBZ}3j6X0u(O!e z)wu*zF+!pzy|&Bq3y+s92OIX!kH-_7Z_QUFzS`^KeS=gC>&$&v7(6JkeKgF*mT%^J zo!!?~G&j4l)s{iII1m|SR-;xYgtbQ;Of=yXK+hmHT(Nd}Nl{xrKvOW_ZUH61J-?c> zOwl~_aAiHwe~7Dip$Ubn#<15In`6kIVhuqz+OP79wCgEfF!<%7h{1X|8U3t$F=qzv zW!*TylAeG*o%|K4U%0j4Wunk#lTT}FcpA$f;-ej+AwI+*IA$fmBM0=G9YPsU;hUNd zo4t`lc%|{=TVgVp&j)Q2?G(=Z7`1(EDVM z3bVl4MIzkAw3H3`VPaGC)yC^s zH>_GPh-7DfngFMx=RZaED_#j&d=DW_n;9R#D#bDDDU@reW*r~Eim z=+rkm7a%%%hSMfxpin~G*T~W{U|!4+T_2(8UPXDDXADA8FmUJs!@HQj9|MQ&xD`IY z<+7Fol(07iiZ~ND1RiLt4Q;awMlkC>co*G+I_^oNRVh$wwD-U+k|2b>y@f*Ynj_5u zBALdg#495@I3wUz2M)FUR>F-pQ)W9nj$`aQ$8G5Avjgd<41R8l2bSx+#a|B{$1uRe z$*UJIqb1yK4gbzarM!%vqb57Ye}&`1nSGsPu?Z-Y|JfH6iW~w@Oq+J7*2^SGhYw5= z)v+n>+7`DCow45wK7#e7XVv{sqIe2(_~~L7tF}@WQNO@&&;Q;k2h^11^syGTpzyB($Bq(?w zVW#A&eC4b1qu16W&V7b%%me1jraR?xYXmk}K>WZbQQ)9wWA>1{Bi=@XQvV+!UhCDB?lk`!7TOh;?ZTeTSs zG=598SwnZ<#W5I69MlDy4IqRi_WsH>+sl|qr=LyuO*bT6M6yy$r)N@ktse<+mCPM! zpz5ndUQ=ds=C%@#Fo-Os3{q!EqBCLiw%=9|m}h zqOl(`XWgudYyOZF>t;=h58Oeu8)6{4d!bCpkRo1oIO15vV>CqVbwD8^Qo$N zNYtR&xwtw~K9<@4v#vQ=YrOZ&sKR!k$|ll>0!dZdq2PbWcfNHsZLUv(X+JXV_$dNgW9ffY%6IB)!mXs!Oc6|vU8NWD{st?T=OL|?*3S& zC}EN>2x(>#4tv5+(m%b$jWn$(`p9I6%A=*FatUB0Nx-DPEavuruEMS$hTY>Nk@I+% zKt>(NZ^O%p1WJoaWx__Mg5fL_;~b3LsUA$jb_^XCq)<5B6(dt*>V^he7B7v9dh z`J}%<*fyWcJgCqdXx>2{@+KcwZ51O~2El}yT=zCOqpkV=gcHR#J?G31C< z>{~-m6-P?W0yW~n-#?LQsVvbRsDE5WGS-xqXh2^3#*`IU?Fg6`Hzz>VF}0B5K^5!t z0Kj`>QhP&YcC6DtsGu-HvelLBmaOjejU+ZKgxlfn1x zg*-cr<%l0cvO+ER8mN^HTF-i`IIs|vd={`_W=#Yr+0vLZI;2=qfKBc{pGu|QQ;Jx6 z{@^4+20d;f-{bTx#i>00@ z)l4l-U5|%Z>e{c=O2S}q)RGJbpcMG%|Ld2FZg%hH0q4`9@p%uHAywz19So?`dLR|M zwSeM~^Lo`D^?GGKf);yQ;c=Wk&x_M~2$(-U=gc)#=Nw%Wj+3Nrd{{G%MBZyy zmvG=}PDff55fI5>Fo9KT+TOUR5aHo{WE@4q0MBR#jDKU1#YR{~B`;;5nL;%oKgz2DIRW#FVd6 zqmNCYR%z5|jg4M@U%@<1&}+VDDp5dW2lB%C%X_aI9Z+jY1Zqy8T*k~;UfCjN)yIr9 zK-W1J@zWNQ#0&|$mu%`8uVNeKIL`pPyv%DmJ8$pI$ zM#ZlDVL=vvMSSLslX`~Y#}zz8(W9Kw6AaFA2%IoMRKpOO(!)GqAcb~@s7Lqp7h69r zIkcuGfb*{!IULnyFb49z#iX7|SmZ~)d)FGf)TG0+V0u-5+A%OAerc_0>(0YRyL66Sz1?3oh==sN}o&SGuU8*%SYD$y3O5tD5}dK zVOpi-PH9#qQ{XrIRJ~8ny{&@XSzZYAc$4vPb@7{|Uh>&p9{q4Tz(S2>v%X?wSm}kZ z@+^V+Iw9-fnNUvXxxkDS+p>3oJLi*pRk$5eF#{+lulu+9b}glCI#LS<vnd{05A&bMa^}WYhOL$X>#cnaa7gx z+$Tx$G)|M@rPX`tpyW*ifCE-(zcTuL;L5@8So);spRUUX!mH1uX)-?z$7QXeSan8N zzk)l6@?*A9b4{F!xh?~4sK zm|sLw4MiJ*46uuuDB*YtrYjaij9>l5bsG(QE;HCZI}}!Ryvr33LZ#A>-uUf)0eg=Z zbI)B|iCV@LLcXlv!35p&%KjOk=fr3O!3X^&71y(d!PKsRG^)9JXeF}3B5#PRlklJ0q z^UtA>dADR{u9#$-7(X07X5(~y>*#j?W1~Ev3o^o>_*wT=TK5ksU`(Ng$Hiqv(}r|l zLG*S10kpEnj~r74C{?NaGBke?r5$H1fakEAhBwY`_6#3qY8ZMaW}&|p;iF(r#_Q9U z!a#V$FJ+fP8x>9erEIkl1|3tN3p^*c2qScf=X?Mh=tJABkf z8sQ}zO6*IPXBVFeb;;j3>9)F!9eu@ovQF_J$yKJt30Qa8Uy9uBmo%hEFl=AH;+;N# zP=0i;XcvAA3|Xdu}pF)TZ%zmMNHHbOjxu>fqy)jG0R=$kK}J!1eS!LTr7 zEXVGch*#@o4!pe21s}nZ;+yHFL+otYhWT1+d|65LE!0&9X_8hBtGo zc>_dsgFW!@NJYlyevw-Dh_i+B#Br5)3mCs6E~eC)@&lb-MXK6SD)$3ZPP-w)D7@t> zI{(6t5GWi{Bm&RKZCj$1NLw)+YkkxC&zFy-@4C|9Yjy(u)rbFgXRQC@^!ld*V*FH-ui~Am+-|_49n??YNZdi{aL?75~wnC9BPlL0C43$w%w@F zBVxq;{GHWy4OD$VA!32xeeyk^#a2qj(GZsWb@o>gokVE{%V8Wg z256wR7q+|sZ5F9AiRzgi9C7F8P@k2`s$fQoJ@pu1eXa_p5u#Jv_6UcO1XumAX=ncQ zU{LG4L7ke3Z4vb+c0r@p4tjY%O-R#}A!&MY3uAvxx9pR~=uc0i&iTi@#XsaM`(Lca zv$cM{AA$byy={WSN$}vqNwIJOCI;Yj51sWP!LL533-BB#JfypfRW9f6_Yvoe8GtnO zBa==NpN~$)O}d#Zh|xG!E>+;?T!E#W7W|lKCL)~#okW}-HdGiPU9zksADjc$?+P7T6dNed3SUJ-hzIfI z*@dsyN+=Gl^Q22P1O(;Mytt|L<6WuJuw1F|5NuyWC{0UKvs?(449D8p*!p3`EvxnS zBhH6Am@dTBXp8><{ODEG^J3~{VHEc-uL+S}SRvm9urZ0qAD-3Xbn;uuK#Kifmn*evQZ#Qxrw_3eKkZ9s8uQqS zNPet6CWoZKQSXZMfyYch5N3G8@pRTRKKYkvBORkV#&qe zM;I@sT-euGN>)v>s$JPO}UwIW6C9YJ>T`hWnpsQ2IolcO^4ghGf!u>@OC zXDQZ%7gLu3EV2!{0V?N97&oq4KD7>_;nN{5)Hk2s0q-m)`cZlh_V#lN84N0hJ6i{S zZhP4hez`5h4TU3D!52FN&9?1}9G_6qIet;u#XxS`u(h_C+}8q*9eb9%2lD*d*oeSSnRaf1kd8Ka^_Rab;N@c0+h(zS&gO!SLEF zSrhKTplI50M>t>(@6t#;H8hdKcY$y_!WN(Oy5oSM-K8{`bqh5+f-}w>YT2vUmMHTI z%3}j(p@mkRg4JDQK(t?EAiP;*V0@`C32j+qP?VMfq|rkFA%gEAS2mxl*pj8@9-RhG zPp$h^aV$H9v|orWVV$=bG>q$KRm7DEAEj9r|3)sI$B$Ddl#VDci=(ANJQ?{6o3%0d zlRCN0S$jrN?7?y7aAmpRZ=@^iesnk7o|@NBh?H0xSuMJ0b473nNd%-iD4u%<_n!}M zdHx)|{MU+~>es~OAF*H1$XL(G((!*`8~;57{jH+rim-(AF+^ZNY4#1HL-2b>A*dl5 zY;58HC}b+(@-nX+B;J=1#q2URWz;+x#2#a86P`v2rIo3vxTu6iMxm8x`1if!3#s&? z*I!L1$rrB%S^S?IbyQ=?m}s5VFuy!5T20zb+C^wOoIikkPp@@ofR1l<;eLk7bUm#T zb6>(|uX(YC3TUtUxkOBPAa*Q4(Y$yb_h)ir4r*}=+BQF}=WlC+JU+`tKa#n2p&&i^ zg2{PKgJC_s*1*DmAM?>*_lEpf2tnec+{a9l62MlvqN73LJ{+*)ybu89z7V3~#silo zeJX%)dP?+{p+J?r3iKx-QXD{#%O49hB_q&V#S+{26Iy5qA|sLdd!^M26DYeL&X^3%c(F#Ih)tzzd4~!6eGWO_n0hNgIhT#QqSv|ZL8r9ytyox5rbliMJ zBHja5lU+JBpLv{YCT^|3{ES8FOf>h#@r9V&ZAFvUsyCJ3b**fBa)G7H@-cIx;wt`W z(_7!hSVbx1W?AJW|Tg=|FHY`asr85h$;7@_`2 z%L?4m*=DpffzfGi-k`)>ZL?CSV>Oc`L8Er>xu039qk2CTf+w?{Mwf(tdDa7N-V6<==x(O7y!d`j_% z{6Z$0+~?r)sB~;JMM4G&4)$}hA&vekTf2|h^*pL5P1Gn{sdu(X6d(J~>{+5UR_GA_ zYlCQYCau6Ks$5CZtFgpViCHDG>3xV@PxKL_`XZ|qPcNo! zc{T-}3#P;k?erlxH%4QG%371Eu}g?Oi279o}&@y8gNOYKOSYY)f|ROE3DBl!Uqh z+zQ1@e;#!7t+bL4_^%^v!%HQTq=DtG)IhQot|I*v&J67;Q%X$Iia92%L8ihUWts_+ z;|i)Q&CP=lB0Qz$Ce}vCO^YqlJk5?|N=5mlBqLme8{5|X%M79#L5LrocRAP9vxl5K zf5DH86SS3xvPqtmw4{{83a*A9v!G8x#m_mVKq5p~Gv}S#7*yw38WmKQs0K`xgX@=u z$39W^*&|WdW`--ZNcWf_1+bQHXN2QrsL(~yh?Weq8%G#FPP37BhC)Gh7VM5ag@;$~ zT99`}+g!y>B^iuK;~J|8@h>kz*} zWtA?6TS0o4?k>5i388osT-<9Qb@g4Qfe0<;hVyJAxqHfslRf1JDy6W(PGhaGIC~UN z6X~h(>46l^BXXLh4rbu$>Df&I(q*LCYtidolNARO;K^?rVYp*kC$xIk$H=m5r!6to3!q(Fhqe(*s=8#wET}6!$YdN6@3#x!9 z%mga93}@CE2b$!-Ms^SYd9ITiWt>D~1CPfr@T;1{!;$(nv*FI@k?xIq@Z(UqQ=+K8 z6tfT~<+0b-+RL%S9z?;$9Q^>AGAx^K| z>N}m|SxJzF+bQhN5lK&t;yCL2Y72=bN{XB9QF)N_D*8Y?vr!T&xfKnNBCq8I2vo`@WD zY^N4r>lE$nXa|@3E=;GUwW=q4FyC>XAd|k5+j|x$1^Lk@R#NgRT9Wze=TeO_ux?Z? zB9cDokCtFz<>=ta(UVxlB?Rgo@e7rxwd&xAD|AMCEBr%5JL@VHuql2>W*;W+W4cFD!<1YC#cLl-3OQd8K z#y2qdCh?zy6;yV(devvVz4|G@5E>G-akls13y_<7ca}_3^pDGd##Pp%>*M#JAB#!$ z4g@|dEAz~t!I(9Iz8cf|`Ai-S_)t}OkkJ9_(`2(_>!pZnn4Hj$wF1rsVN5m8Pd;`Zg7Ngtj ziTHk#@9KLB_lz1n60?K$%$k*eGZgFD8@8vh(_=YkO3^eI^d33#vja{0Y>;X>jP~6J z;9$$(Oh@tyj=&p+fH8Yn15E^OZKoC!bx?a0VUrv!jwBDWAPLRP1|#9&fzAxeV0M$w z4E=6Ti)CIe4?S{MyJHtuHRCoRaywV5 zgieQDKsMGP;bwnJv)z`oCl+?EMoV%&LbL5Tac4+*k5l$opSNu++)WKhX1_xJRu2~i z$WZG?`03VHx(~3oXW)w>xlf=lj!_ck``@-IAkY-%9lyFQ+ApS>`=7h5e-&&0AI-9& zmBJTO=52jjCl-oGE*lEz-w;GvVX8zJs%+XQ8B8xl!r-k>JpxU+ab|pR;XF$689(Ve zejJ&-K-1|}`+<7jn$8Hj3W!WQYkkb{m~_a#t$Q8g+4=*}m?5V$_0F|xQLJ(^+TFl`mzZs1I3Z$=~JDh6nU4Kv}+c*)R{@hk| zH)%a@N-C9oh9sJ?(#fo5j9rZiIYSV zL3DgV1tk8|J2O{hEV{TU@33k%X1_{etg%if!CJ|D=org@e%(dX(p-Saa6cG>J~5`D zf=k(b;}LyQyTIu92^*o~(v7k0t3L-;9(|QR6=*-SFnVm6)cz(knD3CDeuTg>Kje;v z-IP_|aj)MkA+Co@j4iBv%&4>gff>Ac&B+s4O>{Is->%2 zzIUoLJhr&WI00EpR%c$3*a$G!`RSzbN>$h=&|8!x%A&9Q?A|WF(By8U7xsRIzjQF0 z7?TtP?YHjyJ5N)IfWYsMX7QQS!*U}|LlzE5eX(_6w9M~|)uh!<^HFh$1@T*~`e;dl zbB1%gLe0~0s``}HKsr8@^@fYW6i8LQS9re@hacLg2mxx>1X>D@8Lqk>hsHB^ud@ft zef0y@XrP(&M|9z01cNuI{R^DIk*I@A(VrmWH!TrjP>(;7KQEVWwRo_W!P>zp^19`H z8aiqB-lHi!d*Xh#I02;vPvNIbpqO>w0Q;SK<+n)Db5t_5 zmnrS_6WrJo$(#4E9Ko;|Ib3A?JhT+6=jCT>kWJeHIwRtM&Tyy+DvD3GSlP5xnJ)r84Xq~>JlQWmY`Ie#jVR7=G=dVf$dt&$Ia4Dx+*zZ=;VLAmQbxsZyma~nldFrDw}}7)yyhkFclpu^?6Z)ZLZ(Xa=XjSx zbU#IWir#K?R0(<<*`sF~UhGMK5tuQRiKz$8m`6z*o)yPXw@rUg1Q6I|lxF&K56v~VP6&&=gt1*54SgFoh$Gmc zPDZegu|tao*SpmKnK2^_BQj>9^b{Zarh`cU-}fb*E~b z2kdSx-kW>e&(gGaiz4Z|?x0drd;I7fbYl!hmf8gnC}{X0R6{p0w1FXmLoQSFC1}l} z4D9vEeN)Qf4w@MW7k~}5X5H2LqM8D248?b$2>%tf!J~=r^P@rImIJr8!E*> zvL!>>L9CF7NUA>EPv8|Hh0xOze+t*O5hq)R0s%lRoi=iZ?hAd;ljFd3C46=xX9N)Y z1#1-!>ny(4io_ujgH`$@e;Zv0th9U?hNaq)8ta{olUNcSIej@N-9$B=W$RW*?nbfg zndd?bzj^lcMDKxEy=J}*p@yx*iQ8O$xs>5QtA3fGQ)SSl$!MAHX8M9Y^akB@2gl^4ixK%r~r zM-3w_J%-{jD6Y-IC&grDkyod8^q7gMNLT;|?t(*@_~{K5$pU8jK-!fXbMac?YSF&D zcHa__=FJp(B8R!$OvNX)Xm0EKNvZM{N@!~kM4<0-x4L~Tcb5mJXF3)@;g^kdIh<($ zjFLl3FdJfcgkt$5nm%-`wdV0fUltNbVzY>xWKSAM5?6VeZ%)Cj852@5BoueHV>8jb3%<0 zEY3bqtie4KG4ns(2@OpQa<{z;BdUkF)VS_%@->F4U3V51YrTQPf8hH-Dw)C-XuxFGGnl610g^ z%Pr)kg@f<%EAxcXdj5$u`gkno-OYR;a)$3w2N^VU^#fLvo)B4-o)km|bWh+;(A1OV zo<3EP3H9N7VeZVbyaR1tdFbE*(zqwkJLt<-JXBlwLwl&!v2{D}-dOiE4vfUmu=vC* zH5GkGi#rAu(I{ckS8k7U%;2NGJf6&nU3PB@zwS4cz~ds{JZ3JJypFX!!tR zzmxXz2;r74V&Zr8jo=qB*J>$pYd(#{L=s(k&xn@yctclR>jT7-bHqM+S?}Ra8;8E|`gCOxGtNoRH;u5Ne@{ z9G%1}G;mSsrG3?87D8VQ?xG;LUhNMdk2R)Rlv)svsv%zl3^-Map@kF1A=ayJAq`^M z@i50{+h?|8R*IJr)XE@nQZKRd!XaVb5H@z$C90xXnT4+=hE*!FWAnk@7)%fP-7!Q8 zj{5p7lz2cIrnVwRY{oin3vBK0a?Pr?WTQeHs<0qEcy7#6A4gr;6!X2&^bR7%RW|TL%HHF2!pEJGdcAkJJ|%sz{s;< zeX5_WV5l=@5R+1t$IxxmI`{ugos#qwy#w^6P65gIE+};M{Wv~J*dmexfq+l!|6%MK!|U$5 ztlO}$ZQHhOHnwe}X&O6?ZQHhO+qTiXrw`_u`<|IT@0b7QbDfRf#@cHQJc-XW(Ft{T zXOmbD4plZ^0l$&&iXZ|*kj~^;Pc#=E^1eLZ|N6WI(#pHw9p`PuxQFLtAh;ZAFf4Cj zF(2U(f&lnqs_1k;h3 z@KVNj$P8d0!nGiJmbSeQ8{$N&|H+UfT=ed4WX}zX3pKy%dfow@9);Pv+}A`_?6qB2 z-NtmEAcr-cXAu%Tr)=mBdkVn3n@uCoEB`u))xZ33X2hnBTQSVi%}XC}%a6%s3LtTF zN(_F4FeZUi>n`cXObDrrp$)R$n=H*E&)M4#UhYardHnHc(v`G-_Ygd@+AVd;0?5e1$imiP`i47${sVo?1k=o41OYun z`D~3bmz0Ss)@x;75pl{B@SDN_lhiwTLEsa;2pKG z!FBw{%?oSL#@JLjc=M0>48WZz82eHDi@*AN&(L)8IUzhs5c)pfXPV=XsE3zf-K);s zt}@ETxvxUKS6Jcn#XRiOMSpzv87EH`4m2vwHCmmivgYRQImwK0UKI`+AqJk2y)#Z) zS-9K#JnqoUNZlxhLpk^h4-`>%EkbctvB81WJ#PobQ^<5e-`1|EK@EsQ`xR1>JV&vI z7!tPxX#QF~&4#P;V=i((<)(`EYOC`?R`1}^(sTK&M3vCamU`YTg=$wwcDZA-|JwNG zj$Neeif^A_%U-LBXIFRBe!S+nSMrYa_|PnD-Z1#&MQksUag1Z)FvRZ_w!-tr_ty+w zdf)E@%qD(T#_FK74%YFs(-Zb_AnERrqYMaR2vBL|J3=*(VNXDZ&iB9$gQ~lQ1^eG# zDP}3tw*oa_QJLoEr7c6N^O0UGsbQcwsa;^0qzAiU%4R(7MxT4&{_1!>DqjQ;08R%D z_30DepB>K)Kn?nzUb1o3RR=5)cpl4f$%L|{Y6Bw*h!97no0l3$Vtxa|FF<4rp@xKm zmBrT<)RpZkD(C&i?;@oDS`1w|wNCbDvmcXw$Q8S`>< zK2Hc;Fjje25D7}`0dh*{{iYIvuCY(pBq5TB-UKn29dW&(*cT()IzXimUh(|uy;m7# zU@$fiFv&Nxn}-Fx>nZ9G-mB#_7E?kU`_T?{-S1Pd)|1|Zu4d?#nqt}KNpNod(mzpJ z4{qphhTlv&U5BMf?#qn->op)C&wA%L?k&6NXPh%CC5 z6jH;i*~htfm8sQm9+PxUe;Y`&T7*qxQ40_dTz`BxFS(M+XtiD>%#=D{$pbtDG@RUDK+e_9 zv=(o?^dFLSynoW|5HBnd;W0=7=uHzKNLTK_V#ho5QX_i5WoCZs`z*CZvq}6j)fc%r zTN^FaMGpzbRcjFnrBHZTaQWusfHjfDOWt)%TlUVQVz^cGfO42b_iE~v0wVD>z1%T# z5!CwP)&lk*9&sJsXDQCQbuwMN!MgBk%lK{6)d}izCCbMszt3iiS4Ptpwe(Ab&PJ%x zbkD`&T^y!h!P=QT?s>e{IR`AlFKDKbo^fjU!Ycj^lG@@}ELtA9bRL(KI(wQ1{i>4^ zx^i?wYTy;9%afMMRy7(8`<5|X8klg^83XQGiM@;FY5mP0xcT;&yXHoFc3?>6(jx?9 zLENN>MP^OTJBi}G$GC%VjV=Kq(rfGs>y|v}p5+f`)7)z)cPS&Hc>`0&z-aLK;2$60 zJ53(JC`QIIZIYz~uGmCP!kKppQ;T2_;iJ;ict7lusacdd#2;a|35+B%xCsv5!j=1H=pSj;?-E8(+yFg5kuKXN5kBj9 z;`Mu>$cqn@QV!z-Dkz$+c`2(}1<3fhmsvvRgrA|GdaH&_DXkW7a=yorEo znFo*iI8qLezsB|ey^nn}IPQ^#%Q)^H#vQxYQ;2a)Uz2k1Dd3QGoOTOLVps)YGRYLQ zC7?YNhUy*Af|4-o2#AX@r3puOpDFOnlOdAGdxstP#UINv$o`7t{(weXh1S&bYXEwE zbdE*j$lQ*%2i%cv zQGtzCj{3^cfKruhTz5b0=u|SKqh#}JtNuw-2%Jh1CvVSSUc`37io2$<*$<~MYnMd3 z093|2&Vbh@>9z3bH~fsZzbVmXiDgZ10*2sfS|uzLQ&{~l-fIrmYA4+%M%y3HeqBF4@_lv##T%giE~S9p zRJL1>K3u0^WY82SlHbl7Zcj~d4s-9u1O}syV|&~*0l%J*a1J$D&h&%T5jrN^E}*l> zFo2+g`lJI{69#n1sPxrcs~_lM1F0LT_Tm9+BS$7&Dm9Y3Ine9bIWsW zJl>EHCs8ez3uTdd5iAdLzNUyDGD9Qd1_IC8iL{gIZ`F#UqP$tkz-7d) z73R#rqKI8`YlD$M{aBa69!$7_^dL;2RJ(zT&iwleqceS1mrR?b9;T_&8lxT7B?fzh zDNJ6KzMi7FlAA2aZAT;=I-?vD6FTi9Z=lzz1-p;jWO45&N8mKOz+3Nhp}<-2GZvwe zkKNv@<&Q@i1+-(~C@l=pRHhp1Y5O3nWm>FWMTaS>xp96mpNFZ84=fumyw#Y4w8J`@ z0@I6P_qX*coe6Qk?AB~VI!WJ5|i4`~n|Tn24wvrj8Me~P~m`xjR9QZmnU+CZrH z`0q@2tt^6k=FA;)NrW`QFACg3H|*sd!XTW(ZWcB^j0m#OJfEaU2siQXSkzghryS#m z>%`C(e7c$*h-9O!^N$05vV($P!Vnv%{{DQ$lp)!d!W^gl0entvF638m6S9SUj70Os zOna7-L8i<&T;5e^Z7?04z?x9?34RER6K?4|FKpf$xa1*vzf`N?jd>A(PA2s$LG;9u@2tEKFs z4)KVXgN!=}_qMI!AmyfZpofWwNkklS4`IY;J(Sl}O_G;lUmjV9oi#dzE-PcG@n8N{ ztl+FE;LQQLmmWY0;rLVfkT3!qdYU`A|6g59y3)D@k^nl-&vm0&<`s?7hWo~qum(Y} zG98Lg;UAJkStMKgMocnf>*?{!>joQ=m5`9U-yS`-dxK3IvxUX+-54C%Hz%Jb#wLC} z-QIJ38(gx;3cdfthE;^@M%K0d=@G=Tjj zGN)L15$%2H?m+=QF4p3%0UzDfF{9kCq~mGF`^gekT-9PG_)}|*s;w@0Gs?9O5{GIW zY_a+~b=;N1>6DlaCZ*gcYQBj~7^f8yFs7Y#tivO$77d*Gh@(XDCM({dL>$W75-DoM zw4r-gHx09tQrP`(YUxuxmY$|(CRxYT;srH)4BE09`ntQ`v90o*tospfyB#dbg_8Ue zru%kx3V20oO0^y(IcW~gOuRlwpHDttK(!@puFZ`isimXBUh(T)^6~n2IoL~LP;f=t zdJ5r~0Ku6;${?vFqpqcYqybsh9_Sv{zYSrXGmpT|HLI6p$~NG03Y(ftB- z7sH){)~?Pl%FrZ@(pQe=0H_iTxmvJ1XaM0i7!nZd<@N+e#-E`B5-=d339_K}ls-F#cdo>1Is-&Zw zu_o_N7i^9SZTp@g>gZBV8eP$9RUjp@9x0sFc%RV<5TDYvu^(r#yIj*XT~}i#ClYPT zDf}KJX7Fc%{nzsvXV#SO$~G-`&??_soGcZ&1}->frxS!qkiyT|eNJP-_mlu^&XScN zKC|Zi8ltjWwR@LdnX+F4vkiI-jHN8vEf;~y&oatq3;C-9OWUPgR>7?*IyGlKX}gs< zUcAr5A`2~KkLd5SFToXnN8E|Vbzuh3b7cUiPlGQ&^LB@J{m6W?+xP^Z(I7kdE}O8 zsgM!*tw=w5@K_wL$gO~9+r5}+Hn$c|+nrl@OS#vqsNUaLEtVS+C3#DODULKor89ab z2qhume#biA@GSa<;tY$BbVhPz!-YO(0e{-!aYh`mJd1ZLddZ~QXr94RVvOFD{u2_t zrx$10WUHal^EFfohx2p)rDv*{3Uv972aJb6%C!=0(rA8(hA zWZg%Bk+>$i*`W&$>5PhJ@ljh~H#tTcJ^`~~WSaC$>*aP-szNH8{K+_BzX-&~_zINk z6mH*(kIEnz!`ln2C3l$A7QAnDq5IdE)q_Z^R{+GU?w7yHR{W1cg5R?bz%KO56G ziM%0eTOjMmDI?CsX9biY8E}Q@HKZmM2dBqr_s@j|Xz~2ZaD*yynouKc2~Y$qh2HDY z+ilE#@m!K*>agPX-z!tc@hvXW7tc4OAFd4<4|Veu5bNU7O`5cFBkN;Uc0ieG0hNTi`RKM0bHdN9x)UY+A#B-|DyTwz?O zPrcH1pe1omXfS=38HF;QyE8#kRVG4cnnp6W4QBEzC5voM6u0~v0Tt{y2ByYog1{Jv#Ie7Ut_U! z517@eA#(-kg54G%>uK{JheY{B3tlcq6%Qa4y|i6X~2f+FMdkw}$Pj_=I z&DoU65`k9FVVbKQM{Q>;0=c4NJSk@ZLVqo1*l24)6sIRNVgWFeQM5GG<+WPwRk~k+Kjnt|gA<8LS4nUs zyE9$pS9*K;x`Wz(dyy_6Ji9sD3b2nsdy*DqNx zNh&ojx8jJ_FABaeV{B0@mkA%1D%8>6>Uj9xB(s+)eDUT#4iQ4*dCEC%(%){Gmi~2Q z5nB^_5N>A0?06f^WJ%yOl8dMTc2KdtU^5)#!+=c={So zL*-ax02ckGbAiJJ9hS`5aVI8wJiz{9Y7xT|EATxeP7(U9qz(za0wmT+Rv(ihl%vFP z#02?(KB}w-F@Zs92=_j}I&#b$0SXz7g0T!gQxa(qGqm{bZ|%1)SK5#^z-~MLY5Ff? zYh-NoznLpWlMYw_dGI55ULn!qbfG0qeDenafKtwavY1@4CpUbZLbH{Lv6bp&p8M$& zZybAR2t4;3F%#M00~gbWySJy$=c+K4NcBkfNXXu&W{I*=*mr)0I4n`u_tn0UB{q*z zGCUHY`kzNjaP#?i&uo4Ub*$H7yaw=t6%Zm{MXPJ%^zl7r+M~izZ)(M{V|=H5MvG8myJR;l zorP<$5L|400CW(tcObp4flkXX{;5E|-I^Q43o0hK6Ev(i0bnWBw2iwsEXO%?3t+n> z*5Igy&)4!T)z#iJhoz&{BULj8^4pnq;bNSa8bF5!)90Sy*<#@Q^InnNTv60!?;RRe3QqM`* zgs@|RL$U^GPu3?H+zK>bIFNGI%Y;7SZ%5JlVj7xW%JH)vZ@v+VsXJ@Gg2kR0x`o0Htvy((LraF zX{z2gZpC0$7^mj(;AK<9#j&-X0RnokP5A=*nBjE4I?^FS;Fa>cEne|d{oq(GB$mP#6dSd`C(9k8k~|-Iz9dbPP`+#)72rb zQ6*=&l2O@yLe+6t*dCbR07OHHZ~`vx(*{&E<<*n#;1C3Iicy)B?!_G#=LiVyco}Q$ z$bE8h|7~Z`X+3i(sK%G3>@~3p%aDZHE%jV7b5QI3Ule_Z3M~`08X*3cO`#_*dl(l6_(eu~_1oj@F zXkwfFT{O+k@;CyDrVF}Hvq^MWaJlYH=#KW~-zFJ+5}+2Lj{4)muxLA+?3J?2?oKls z-j?L*>=|h2@3cB~tcF=|%AZP$V?rv4{APB^6$?;@*j1~$#$QsHO;#-}a@$~Mr)l9; z{MN$po`<#lGVp^9i(tmkHXGn=C^o5meCp8utf@3o1+qO&ci3?=Vik-cPW|tU)ryn$82d!xdAW1X2;H@-~Yyl{Sk4BQee4 zzi44qUYbrY@nunN;a)Ith%F9sOHt)sZ$(%XG$@8935@pxGp-nOQ>DJGqpESq6yjkx z-SHDIO!Wev$@v>9d1hl7qt%jd_aSL2yJ~Uo^SIr>pFOMtU**!YXScOLCnk`Teyw=3 zRfcvYL33>5In}VuO9lEkmOjJhiC(l~UU{b;na8+iDEvjV~OLtpEl#Xn~3 zm!xQx`1ri)*yPn`6qN6%+u3oozHr=LL(+R3J|ga5d^=o`QSyU^4jENt$6DJJWY?gi zAAeiE?_2pFumYZ5eZanr{7>U6S$%6`6(@5m$A1Q9gwmzltUNN0EzPOwXk`czSm;i) zH94sNv=J1BIz9EmwLZP!+uXQKoKxjNImvY=qIjPfv)2QF#rP{~jYwSmu($hVhT}zZ zWn-t0?<<5J(qycB%zSnDAZ7u}p3~9LsJwXB6oqu4_NLt)hCe^Z;% zSX4nq&0E4bi`BN|bVtny1c9S(u_8E0Vrw_5&`V0ryqEt{ceWG`>8VZ7ssOpT#PG0i z&fLw*UL)`t4z7z-ZqquYM#oW7kGZF;)PaYIID`_R3(jRO(NLz&qtLX2PIFDnT=ZN@ znYo_i#BiM-OA_V+MuH8kHGTvP@U0rXo?fiJ!4hxp4$K7lfJVmT@T@c~-_)DVZ7}+t z=YuXSdIsEv6LW6=X~$z#zJYuCfev^tP2Y&)3@d$n%=&CpE@23=jgm{Ci&Wi+7$4~C z_a)G{$vVR(AqL!i9VFg}R_`}bgX!W7MJQbcm(MCYhC)y3p#vn4jzhxkEqtMh`oNIw z-&EDK*~Q-W_?%!K^PJbB=)=UfKKDxz(G?*)kNF0@(B@Mdil-42|H?{tkVJ9=b8(mt zk#*XF8IaI99_7i~*njzggz&upjS8L4^w;0`wS_c~l9vJHgdyOmp#P)q*~-}o{X=H? zU-kOGeDGfdMD4PX4CZ;_%EF}u)O+-We}@Vv#1}~`0jVrGM)ksR;#MYom&PUEd^u3i z&2xV~3GP-1l6uq^0H`3t^CH=k?(uH&G4oS~VZ9&a1z6MKx1iV0xL|I`dg@sZf>`P8 zh(%@Pt{`s-?`0qbt1Ct6+{cuSWUUM_%?%MoYcaBDztHb$Gh{(cT+c45=9Q-Tn-f^T zU*xl%7tlSbrIkcRW?kesU9m+MitYM_)5q#qsL)~?K@)iQNM}P%j+Ac;7YCNgjFc=& zah8d>qs#wMgS?GdmSRCZ<$=JQ}w*RSP z-;9v-oJc+OWafxg%+``N*i72+aw87;<13Z*rCX7UQBILMPprLcb4hjPSyjlVMG#cd z(G+w!1Rq2_5xw1$StBYDdwuG0{ifCPQgWoFHs;Q>9**GlUvO!i?6Mb=JH|a~fs-hE zJ@$A!8ZMHWg^%GGS>-lq{9aVOFdO{^J+GjfT>Z%J2f@NzGF-h%M2u3SkPMTp@SY)( zF+neIZm+3iMP58e)JIo<9M>q8SFLv45~3OY6`kS8Ndq(s1AHCiiUKu&7!i_)Px)O%!hTbyJYC0RC0HW>Qi%g8}<-%!`p5w&&D+Zf2D zsWLkV1+@N_gfN6{&W+eBESj%% z!h{~ZG(>6cIBU(8de)`P&t(=Z(WDW3_4@iFh8fQ*DoTQ2E;Nh+pg_=So)=t4z%YvT zIy1l%IP*|N#d8d(?Q~P)4rlSXHz0h1dQR{(40|wOc{ZFyXVBL9IU`>BtatMCeR7aS zzCy|x6=#q^tV0y;K!^FtNtRVaMi6Z_9do5)t}Liwe*yF9*uH}WUPkR89)I(@GKd|e zYR~m&gsifj8!0lJ9qrv2mh2oJ)FMN9pr%m2F;uf;k&nA zf8-H#T$DsoKS>XV2I$Qg5~J3D$_i5T*B=~$r%Wcivh|3hOO48==j^=G60(a`Nfw%F zf&n&rRcRYq618KBXPo8p@N9N5*i^zp3Vg<~I$hdY)uGA@oEONUf9K1_l?MVU1|%o` z|J3XJ_PYO%nZiE-P`@3I(f~9cpRfn`sjCe@>XL+xh$w9h6WbAc%-I#Uo^0Zh*wqQ7 zBklE%;X+|OLtGxowf^Bn`(v6TpM&rFuWO_()&UljSMoApuKVej;8*(dmfu}ST=MAC^t_2`Kxf3lx7Z}TZ)Wb24I!~ zNHyZ(F^4cW70Qpl(UDZsvY%E1oPZhN1b>9U{dR)?0D)7Kv7P0I_g-aZkN}RmiVsvI zw=?5#3IN&pBq$gk&9B-c7H?V4uT2@XUeT1dK)u`}WBPd51);NW=jSyRDY# zb|W?U{_XAl9sFBR)2iUNQyHgfT!1P>1Uwrkrd$NeGn3!S!-PEklH+B4SO1XOI^9gn z!>dIc{Y;2fS#0)=$#!BlJ|AOpm*^%;fi2|uVGLGO{ifr?R5(Ykoa6Ddi~h zbW|?U{=3$|grz&A$0c<-8FgZbq3ZhD1_g-;DO2-pGPeL>#r*^d?*iRe&9xF`D-w$m z(5Wp{YN|3EMVCZ|=IM=b#``e2OEI^ydc}JXa|{v+p6P_-JsPp@^k$}(cm#0 zELi2U=cLcMjf&KgU<8#I!Nn#d&1S$M$8S$qxb*dZs*z@MXGQJaBGPDU{PH9xK~CBD8tCfa_&)z1y# zToBZ<%~9Pchi?>GL->+`Zh2!0v~i%S+jW`esi9YFS#|*+n`+f&3C2)AE{F+z9Z8Oj z^7rx4WAvNXIYC6}q*uQf%Nmlg;3{5c6Y^W2vaCPt1NZ=wYYX^Dvmr*l?C5M-PjQW& zVZwl+Uwvc1xJ=F~Dbv~O+6r@JXwlMzs5#-IN`mDhL&(0kNRYZkKBrV{G^nhbz*iw?+wFGPo(wzJd_2N&HR9{J1HfZUAoKj7*0hX2VSbyK|Cipl9ZHt zA{4YZD69KtR6%CJH550_&Uy;}+)iJAf(r=lr^{<7>cq~U)ipxTCiF83w06D)V}LD$ zdH4CV?+|F}JlyG3^HLgqW1HySrw}M3E~D>io4kHcT0Cs-iLY>%;9i$xsKc6t5`82) zjB;H=po9^-gqMnE`XsQ-9XSubKsE}Q+8MDnDj~`T)JA*rDNh~pTZS5`xW6)$?QjDx zCT;DKPE-p-oPjam>%yV_y`&97Ycf>^xXBQpUio9fkh24TiGI`UNtxRi{aY-lsI+a-*RTXl-8?Fn)@#)t8Euf#Hh5GjyU0)P5CPuSfp-pE)np1qZT z20Em1%5aDRX^KzLrBI8ezT`5=#Gv%^pKO7iC8vQ%bIu}7E%gIEH&cAZxcMT?Yfzqr zzN|tWTaK8~#aYwGI7AymlNG*SJ3{u?ab|`nXSxZ%SU`Z7{-d7BpRd+p{((LZ{0LRN z{vOJ9h1)w2uz>DrgN=X~4k?f$rDOsL3rmwcf$q+N#`_J^l{e#&HNC%7#$Gbp5guhB4)OReg%+ z1#H%?g(X`s&v#q+wP*10YT?lhmNbv{wWi8+?Kl9NYS&t##Cxho1eaKp5qMkojZ)HoNxn~5@Sbr}LF7cQr5v+F_F0{3jPhAhi$mT~_0E>eQ z|2kGAziP=h2eVs771q*Z`aFUb&_DxQ^OD`t;C$HrXKyJ`h(Q3as_Rcv2M&gXkc!83 z>qujM2T|Yq#-Zwgs@ci_Z@yzU%`;;Nq2@ygf9<8bT*dI|eb!d~iwTTlb)oH#nKzcG@2tikPU0u1L5Fx(%x z@fDoSjU5dEh^haj@BjBB`mer!LAA7AcS_@k@I@7c7@xd4C*j+_`+m+=&9ly#!tW^8 z%Yd!Hdf&_f>K&k#=5ah&^MrdmtgPJp%-v-Htg;48zX>2o`-}>-22n*W?^zTvfeWR) zu)qc6GurVLXxG|$&LzVsel=!2X-Iihl<`8mFw6_&MP05Ch^f+{d`3LE>|dRKtA||H z^BB6x@6Lat{deaN|GV=a`e)~VNM!{v);=LqfHXf{Y%)#;=={9|+s9pue%gI4NOs7K zWj`A=i`GCIF;sj?b04KhRu^O7ivezEZ*i$XKwZ+n`wUSS)r;0I;nBY@s<%U}o^?HN zK&vR{d^}bx!TRc;3;E+?VP&Elyq*S0Wc;Kf+;K$4=gqLc)$B+F=`$_FSf10xj|5yy4zh zxUzRVhNUkx@|6fqF2HV7^<>3qo5X zR#1@_FY52^KhA(jXhyv3sWsHHt@5g1OP3BR93gq1EpZ2|0zPRcK5DgT44h>kU*nza zuSq(3NQ~MP;4OTBU4rnRhD3@0m2g|@|58HyqY$F%GC&9snMWot+^>k5_8Yp0La!eB zBMq$?GK{Elo#MVy2?PwZtVxxrV6thZw(c8Dd8|X}4d8fabTu7akRElb*J-oEJJB)m zA_K5`djDhxri63UH;m3{CkazHZ&!e1YA>sBD5G`MhpTn=EkufxHnquMhq8BqgjR^m z$*WTSZd7Nes|Na@wkA6A3%ZBYuQEIH$yCRK(O4=8q+Mv9$2rl0m`V6akBn%HL~uVv z*?rWeIF!~SDg8QDA>@6ILpOsGtrUbChD`gA*`K#zYvYc`Ma~iCKcHYqE{2#NP2Okx z`Ra!q7nAq1H`DyD7qVE5{19g7WaeEZ2`@ol1)4A1eTs~~OsG{HC7yBM{t)D) zYknZ4Ja8res9<4G#DOzOZ)*%1$oNoIvSV1UjINn zC7Qj_rMzZwD32-iMlq~DJn#lza3>y7E#39SLhrB?1S4vQG5N@1LGQ?IFVR>*n!Uxs z@`kNpDDgRv$X!bvQ7P_A4M&w$c@Vj{cHunvclH;1qR8_lgh5#rC;h|_X>2xos>t;t zLvdzMg>vWiKKpPz!Q|nTB%`JmUmigY1%(yb$Aj}%rm!6GMB$SBaSg##>9ZFuPbVbs z=D8A|;0I zusUshw+&)tr%I$Q_&G=6E&VLbd&YK! z^^){^X`FIX3zL*p@d`ZPU_HEYovD`egZBkfddp47m9lOW;j%eM=+e!n?++-#d=~Wr znBefB;feI&UxP6!t9#x2b#`gMB3zd$@kFhmA+SN1^SkI}DA=u2FbVk)WGKYJU=eUg zihG&iW)m6B?0< z8aNs3T?C%D*5g*$eOYX=&`m}kx?6`QAUPUko<%w7GJg+ohuE8w)EXly$1F-ZvxKG6 z`N7yM@3rkumfLl-__@g z#3sGQY#*k737Hs!)$FHh|9vS6T`i{D3km8tG&;Kzvkg&);0kT6U(LbTYzOuj&vu3> zFRX*ksC!qx$jdRGgw}Z71&HDdrob?@{dTV+XMswms);-=gd)(-R8nasv_TDx)8Ex0 z2Ywi<;!-rOaR@bRHK)_5E)esmK(b` z_RS#7R$QzGzR1wtVs1K+(w$HlBtQxz3Dt}835%SFmK!4!Fa)=npABR%Ka?2b!6uRC z*~2}~vIl<-l-5LyFlp3C8(=R$k)G~@?mhp&Mx+dtoPB8OG~-LwMTKoAb-)owMX~m0 zzjD3rW4*xdZMN1Gw6IiN@6B!qEw43QDz8{aa+jmNTFPBZsNrfp=kg#hu1O_zNO#^4 z4%rB6L7rX9&OC*lT+z|C5KlGrj<>evZP=jrs6;Q}22O8Ebz8%1ls$s>PKBlix)&h4 zp`R)YOzu$hw#c}bB`SAKF>1vTYUOvNcL*%c-aD z7b?Vj7LEEELD3-LZ1m6}aR>%c2>P7}hD$};MZcIW=#A{Kr(y|v>++7A%*}s%A*stF z$3Q3HkWeKv!^%RD7FLhp>R z@JuJLDhqTThE+J>l}!u!poC!dfAtqrnYRWoDu*r9qZxu(v(t&M?Y_5XM#)6#X{tU_>YHxQt^mjiynXJAf!Tthw z+!4eSJ!sRR_mp916#%0$jzZ@tfKhp=KwFF`LzG5!4sX6Fd+n=@@KmD$%5f~9Vfxj@ z?qaqsA(GZ)Ty(Kwn?nzvL4oew5C%qqzAJPxM9)CqF)E~w0C}d~PRqdvU_PL)tO_O> zQ#46DiB%IjA&vsQ!MK4QRg)kNAqw&$eE!=YvwoUK@;A!r1TdFi{9_gM-;}8SB{2Oj zB*wp~uz>(n*viURoddZVhHdWlB++`x(1<)KHON|88nf#xHx{^FpYTT5O9#wyz^!Iv zO*n5)#}*S83r#-Qcp@!6(#HoCmxfeuVF*=+J( zb++og_sy((9$&u+q_vpf0|>O@y|JHi=+;}i%m_MT`yignx7$ljD>#hAMg4Zv48-TS zpqpD3MpV5pKtkT0`R^2VGG4IlOGo6LsiKh7Zgt{PJOA?Bh0vU?`r1=fAs)+6- zzJZ)g>IPZ2Qq%zp8^Voe5NY5~!0gcibhn^!v&nxed8;z+G%*2&A^|MC{y1w?0$idQ z8~vZN#(y*CH#SSn)P`J_m(y9-TR6;P=VW>ThkOZ??@r+7v!Pvenq32&^qGIe3rhW- zHbx-=?n#uT@qL<7lTA%COigSq7cVdQKDTwvLSnB`^$wKylyfxcejWrUvBBbUnbE6d zhoY;AdF}*v`;-&Lw;PkQ0>&y)evP|+Mnx_v#EmyqbU$&X@sJcR2mH_#&^Kf5Bpqf)s+`7B!5)W77^@ZF`I^GB$eUDJb zu0&7ZJ5(qTTvjS3Tz6GKHZ2H7*IdVkedmcWVdQ`u{3Q+oZjSxDLpXdXzb2bEwDV)g zjU2Yd?~o@F{E!F`IZ| zCma1x*tF2Z4ULu?%~)BBI%`Mo=l1xUjI26=^9x;(aRs|u&(&FHCzy~7+-Z~u^Bv#4 z7~>E+tLcn1QIQ;Qd0>b4##`@9d1`*BbIUs{Isi*ku_)0grfnN%5u#Zl zVH-8!sGLL>6S?U9)8@Ub_%+A^MyM_w^EiiVM5Jd1*K~E2RgEh2D^U7+!J%wuYm;af zB*~JSnaM$20JxD9GTZsh-;lGC^186LfZPBA$m@SZ&i=-3mSSxf=sfli`03P~D` zGDQn(=<5mDS-ZmsO(2K?RG@-(=G`;=u+i}3b(a14wtsNuXOqqkSO}6Ob2$Ebmpw>5 zO&z8B1mlHV^ra1D37;BP>!99TteT*VH62H^F!`(b-dSOFi~J7@zFKo;2EH6(&JY>B zaw*?Kr0?8wHiSWx5)mIPA^U2_wSDTvzmSK4AEF8l3k-v7qM}C;iR6i>odgAo4a={` z)ks40*Ak!Y?q!Gi-X6C!!|(}9cd3n@JOjZ$1fUQLXgNx%-xU!P~eBhf$!fC&Tt zlkA|9vAy%}Q#5_6|5bL-7D)`5r`E>`dyFZPAWl$RNjxo@rgCP1z}gOLdI(4u7ZHz= zd0DzA@Sb+;LUa$wH;cdX&bLmHFi-UaZcB_{qAVgrTT|wxyY_J-+4CU7Q;zg<U<@*ixna@L;~`c>Q3q#6S{=wFb1VS(~3JHiS5nKlEMG<^x{H|*Es zsS1gl+t75JBhjL`rN1v(k~`{+f4voeA2_DHPH(|tO@qPcm+Ulqnn-ZU6xH7vdLS^3 z`2~!_tHilMpkbr=5`u_s7DGDpP4@fX0odnia=W|SS9XXtwXGgrSR3@FkOj)%bmI)Q z?au}WraYLAK#OE8uMJOl!#_)bmNl9tTB8=*`PZc|EhXus5K-7w;Ixk`B?0P#C}RV1 z+^E_*ijzo!?lY#o(v!0T5L)M(I~x?ldZ#8~ppkWD@o^r_wcpZB@xs(DNj;vrar2m% zz_Dy)sD`n`jmng$;PMh+`uAX`kr(F!rQh7@i$UxoN}4=<_qP#KSEJ=HQZkq1f|BMt z?n-`^)1)`dnY$$98==$tifi*Lhpnc}E&s{_@|V)j(XQXh7d%=@XNcgcdKjO(sD=V# zvVh78Mp&aTfENoz7i|unWjD+vrQh%$@BAU8vrYi~lpB$$wn%Umk$qdZBu{lH)6i~J9 z)k#UV4t0q9in{A?EML`o&u~DuHmT!?V~C(KPd6Ux(ia5-pme{k;&d!fUvNe;(Th5W zF@zbq-|{)5qOqr8cXYeHp&ori(|P?3=`<9s-c|*8(fI+g7Jn?Al>ne=E8~B_VgF}v z#;g33lF3A~rEboFA|Z@TcuJ8rEVvO_LL4TfK!7mtXtxU1Sg#d8q7w3kecvx8!~Aa^ zmDc*~e3>I5QsCc=j;4nc4ZdY{wjoVl00;C^jSj8c~h0+G?jG1SK#V>3FnD zDRr$t1u|25UdX4XoIW~ICEiEgMEr6nWz=6D0;v=_x7A+0-|^9yMwvT0V>H@(yG2k1 z`{EVTG#AFS+@mX=v))P$qLjp`07=I^9Z{|gcAbAAz-7s?FYweuz)9*!b^hUGf%g$O zn|BW9Gkp^NRf@-ostWsK2Ts+vtsne93N%w|#!)JvAZ?CiYbK(CcSWzc$snm+_ti2i zwTg>;s1p2cNQSZoYhZ#?Oxc{$&tONQm#+nl!(=e-%lBO*PF;^BW@c;^iaBfX>Z!uG zaZRS9>O#bzYjK`*t*CpiKvTgytac!J9V(FoM}(EO>>5qlSGbT}x~is7_Pxw*$H!gk zXdjqm<_dFg3%$S*&@=Ao!P9m6u5C7&WON&l(WPHtPF1`313SFH9MJQ;znuxaObmX( z2*DSP=~HR(kTUgukUoYkWBvdhVB;i_&tehd3Xc-U%#3E`H4L{3Si;&(0?8>vkQ}s& z$f&IwOu**p2dNMPVQ$8%+nm!mt1hpLZOSg(%z?hgmN@PWb zlTZBr5%!Kzl5N}8XjQ7xwr$(CZ9CGoZQEw0ZL`w0ot4f? z_%+&`W34$x@1ytPe{v}(Vg~E}Djy%d@__DtD?yZ;o&M{X$LbeWgZwY7=C#^c%%d<5 z;bsbJL@|F)RO^r8o^*wh8KMNEq244EUT-L(914g)Jf=9T*;s1gXJ|eDzV}t>8h}=&7{=VDl04$FX~1yW1~Uwh%pI?mNT~?Oa@PNSFlwoJPnRXvL-*4`V%>v58YLFF_qMf z9fJ~bVdH%EA}Q9}w*4_#dn@(`ovG86YL-_%JC)zya8lgx!+SI_yMUdnM$Z6z09^rn z1O$wtLDU&^!bra-OrhS$7=x4{n-1Oxb;xTM=kJFK17hS0?XN|<|5~*F)j;*{4E}#Z z+y77D_#b=}NKv7afY2>=w)kK0CW-SU{K_DyVMZ`2W)Jpx)ujTBN_<&W1;92M5)&1E5fNY3o5&T1-jv1yS|O=&gH z`R4QZ>{?K+`!D<{op~f2fDj7zqt|TmWg&~^XfC0>sE_4!kwv>v3`Rn{3U(!QxMn7S z*1RUDiJoR9xGOuZP3aP;;n`eU8NHochtC4(6J}qw?+mt7<7#Hfyt#5l%b)oM$+n@o zsoF>UKz)2|Byxs=8QGr_<^l2eLLZOUUO!8(i@VYObX0%UjW8jb1f6_Vb$gYMSUSnH z*}#}q%)NDEcJ$TSMlFM!sGlCZLc5Q<67_Y}5==9>v$BM`5=AG{`apns_Xx*ZQk43% zzJH^l6Re9SwE^Xch)1JSi6-iUieGi3Nz(s7MU9Xy?$u zz(fxR2tviH`@p!u+RlNSgW`+c6Y{W4kV-~Dkg*u*fqgim>ZEI}*BD1@rLsDeJgYJ= za$m-3x$q6w_6@SqFLP;LuHqAK49#@ydhb2>bQmH?dhh1~%@i`F<{>&dGk%6M-B zp|TmKVyA>Eg}MvVve!P`FJr)%7loPq-Bdok3Y+s(oV`8#g`qTXf57 z*Pk?na6u9=;Vy=~g$tK`-&uO?p7>yQa#<@Izsvw5&=|Q&>K3i3#s;8SxwQpmg&<7e za7%Qo7`a=WSfo@8(s-}#olD93gAC3Uvl$O89xN+AXrX>NU1_(cb5w|rLrkjQq?5cG zD-!rU;|87)+w|Pq94)ov~a;>nyN}h{T%T zK#^?UgC~)Yd1Hh%4cYM+6{JcH%(V35llvKsi`nm~P11j*5o!@Qp;)F-ui&j<+T=a` zqvwCzx1nJFwIgl6B!d6fbB~${z|7p~OU%o}%Eai)2=MPr7-h?UtH)VuXr_pHH^d>;W6a3yUw1lH;xMzX_LI%)nN6OuDoL`4)$R;f3?{;+b?dXyFzQfpq_#==!2h zdQNR!XoEMgx?H#u89{U2MNuuc)ED8L z(=(Zowu~g`zFZ0tOpS5F^Qr)^nV@1dskUlWv9Yz6t7uJ(dWDf_Fm4$r7p@|N%fKCLcr>FJOSmVn zdu#(dwq@3tT_xx5#l=nVnPoimPB)pY=ZIzInI*1XZRUM*a!9o1JXgHD)>d1%n7ID> zbQpEt^my`#38PVVC~pso!nwKe_4G=q-R7Y+0ngnCQs^p= z(H*6_`7;*XChKPWYEMPReUNARQ4B6&AkIKIREBJ%7*yOU84;)`T!fxVNGT#6(VFVi zt!bik7&Is(M2}S9LI;tB)POra+-fIrK}}r!6((35Bvp@leejz|*7Y)+u_p z4C$3UUh_By<`X#6t)N+5zQgAJtm)$j+EW~`t=VT=xznO{%>}ZXt=K6Q;ad5oOV@P$ zN_C3+!B2v7c;4y7;ai(q`bWS`CoI52nCM@(MPtp&#gdkYG|zU+##<`i z6p~}z0qDPG3%Hue zwoHg8?P{09Mkb03nuiOBc4w2y$p-4k6PFO83m6K>5W!IQ8>FRlu2s5Y%WcG-!Vu1& zVleh!beUYpu_=3Bu;(jSjfCYy5~|%Ejo8(Ke$a^^S4WBfRa7N=e@*N%=xnv@q>wpZdj4X>zJ%=0w`N556+P2 zYnc+zAql)7mzMyKLGY$vB>L>#q(-wi3bhQ7GE(kyRAqqgzJ_S$jF_&m*mU?P#;sT z0cQL_A(NBolT6L)4#RG!7K_7I%F`RdiGT1y^sffftVPuw;j~1 zg1ic-3@}w@ab_a3(rjKM|3lb!h>&llY6}Nk8zWmF(q?y zkW|Bv{gnkFh;>itmVmlUL#C9f_rngg z`e*GNsP1TBI;N)_vB{mFe!ymJqUJ7k=WLQ@47u<{PV&6Iv%PjLvird#U_%Y?r?M*= ziFl{^Fml@jL(xxVRS^%~A@c!BR+enMS|a3P#}pbYIGKb8*n9Z7qb>DKwGDXLdlJNk z67lAUC{|A&P8r;1@<=>=q;tp@OBgUwm%Ph&iJP&(eptpy>n!&ie6dhI}wG0 zx^$AsWH>y0*NRqa?!?p&xf?fY0MJea?#(TgbiEZy|Aji5+9m@nxy(qqV3}JLiAjoK zCcDmGBIIR>iV!kKZ|&9?UgM%-@ZBREL*(Zhm9mG3$S1zcH|Qy ze>lv&iCzYRUN{VLNT`H$X&i3%cb04er(t4iNRJ@TW9Pc~jBa|4k4y7u55^%X@Z_LJ z_N_=EQY2B8!UUJ_=b~U=Ca%=t*V09IUH{Q!J_Im{VI&a#AmUGdQ<; z{xft5Bgm6g9#NT1bFmS|%ZojCjYS4P{o@61!|8(H+{gzxOh_M2Mt z=diEZGalw=;%D(q&odw9XX+=}&|QWgWMxDF1&L&NNfO0j7_y`PEG-X1$PTy_vWlCt z#Xay#z9`&mEb|KlPr>ZKy;1a+AX%v*w`i>8J;=LsA=&9MCXYzZ%n62vYT@yT8YYiS z&%%l4y?694jUiGxA9(#M8JFJ@B>UyWZ{_*1axSzvXC$SBVM!>8ZbjD?+P|P!iWO&> ziy;%J99xkd9|&P}l#VDD6Lsd{awIgUfnLj|UB>AbGZ^K{{gq`+ODXP>L9hFjictk+ z_KQ2EJuDGKjJ>os4``ZTxfoF(77c8dVX&NMm5B=|((x^4T$~eIatnwjt5&h*p#jX*`_`B>dmhoLcaSPK3{`ec zdsSSnCk|{X7a1q^;U&~6Q}KrBwI@ky4kgi33>B~YAu}~`1+o#zia!I?vM+0mcNQ}g zb1^QpH(JdOop+pWy}$^|HCnOF*DySVahVhS3&UiA0%>%zxtwOV6jV6mlrT|xN|-4u zSxmAQmZT8k_PiPmWGBX||~=(JHW3j2CCrORWBG<~N(b(spBYB`cA_8JkK{)?f1?b#y3pL0w6n&=4^b zm(suE+b%Y#p-#8TS z00uy2BXW?aZnZ%@PKwMFqT-FZv*0W?OJi;2kO?h{UKf`w$8L$wLgKa?o6774Z@#;o z!pAr;RlB4hREum%(bhQ^}Y>Ut862;Dm*#L7SzUlC&13*w}GiqzA)g`55 zQlOBwr(v4Cp&qlUg%+h{J`E|E*mzD+_#d%KTFy3N(vCN{*BV3n{4;c8Ks_@26Ii@RRJ(_||6J{b-Aj4mb{R(ebpW&9e?-k;ZvoaO3! zMBwH<;J;d@u_*7S`&6VpJG;5OJ-4}*3Ic%-BynJ zsyw^t$ND9s8psAtaFsSDmyyTl+(tska6svcbm^ZtY$g%D-t}2|Zu?{5jgzF~I}Uop z{j}827@oL+Y0tU3m!spC-@<;4DL5(Z#*7ivnfI~6is5zA<6Pf6DS3!P@Yte-1+C2N zg=R}VsUQsNaN|I}JV9>1`^pH_!Ke>raKm#m$Bj*CD+;rhQeP?$gNxtYkunuz+%l#D z5_OFBCSWr~U8&csdHSuW850dl^nyMKe;ki0uhp)5EMhOi8E#cJ$Td#Y3`%F#LnxX3 z6fJfAP(3EQ+M;MnX6n3$V|PpsP}2?(Z2|GSq|M>_vaAClCOD9m{{pqGUMk%`xcE}1 z#CMornW3Dg%u|L?4u3u~VJ1Z){!FA|NRbRN3@u!u^3_E(Y(9lnS?zChKccheiZ0uz zFf@IK9r(ZkW6`QyU0&v0*qqyDnBiUEYi;o^D~vv6Ocfr)X#2H_4oSwiDnCzCYHz9< z-H_LehkZCL@bKtz-vZZk0l*RLoLT2v_)?k`@uZU-UP`&-*~Iz6vxGb!{#ujvte^j{*_eaUiypp!7nu<`4>qR zdB!Tma^UU9^a4i*^X$Ta(;zJ7-~?j1{jXwW87RD3ml?|r+(8Vr@R%{+*E^`phszG#@c;19rdZk0RODPo>eyr@el|JRH)4uYxdJN+& ze^&z1X!P(GXw&$H(Qx^_zJ~J&=u_GCFIBe+F+%S!nA!$c#%!*_XL=gfk;3E!X-I_0 ze3FDWLF*jjS?VYF_t5~j`B<7KB#*&>*tKoGrd^}^R?ytMT#ajqmp0Jc0zLI>z)LY` zZoV1K6ZSh~ZARxUZ|tmZWDQqYmgF}&J{fLx4?K!M5*89~vhqgbfGA5Dq(LFxDQZ`X!vay?>} zI;P|=!0!ll4B2A71$;<_+d{tiJ%U9&8raBK=U0B|-M)Djxlf8GdcB`)??1BedG$cp z(JpvCkeH`?2xVA4oBaGOvyt$;$m|wN@%{EWbgGjgyjOu|@lYiN8b1Mp>D|l%} z3qaEagXGO+UF7=hGkZbws$lcpW=-m|vXay#PlKh-kFt`Kl|D2_tSd_$q&C7; z^*iNLGBLhJ^NW1-i(T`0aoduZW0gtU(ul(%nZshK<06^kVyVNTnZx4oiRKvCI%_+W ze9=#KSt|WdXIHc6Nz3%{A9<$<#cCg2j2LI#;|h zR6peADc@A#`m@gDX4=TiWtP`U5}md{Sbk>biN3v!lr?h!Q?Vsi-zCe=e9epF6;l2V z{!@u^weIK4WL>CL z9%$2YbPy^-UEtUjajf6Gj{mA%-F(!Qd2AnTBG=*0OC9O%{{}}z>5D5<N2ut`S`7 zPnelALM_!GV_vJd=!(Ajmo}kWJYo|RJN%Mv&#aUkzxOkB;%kz7m%2sM%0+iQtqpR|+nhRHMx8xH(0@$t|F=OZ^kEC#OJGkz87C2`{cY zbwiLlbevk0`S;BKgrrtMKojzN?*p7IrQ`PDMF1PC9>Qq6JWOl~`CxzZq zP2Nb7nusBXxB638yUl=;dF4FIv^$|~X;j>!$5SgiQ_56|f8>sM zB}=CsdVdIQMcFUku&xtx{>EXElNMaEaZmh2zM#p^a#!cgwJeAJuyFYF>t&r{99`*O zAF6LnUbkwFUALN?UkG%*ys$#_lu*j;eFmJNwc^XXf-IE)_;>o5FVI@aW$55rnPmcj zyaAl}cJ3Lv&88wANZvqwja0V!~RIeo872cZSX_sU3$9 zcnO_wami9q$o`8UyI~CFew`(a&FED`-|t^|SI4e}x+&1tw6DMZq!ajd==a!fxS(DX zYkFW$4qiP#*U3*lC_9xm@0h*IK;5v{f6RS>^+^jXMTL(AgqNejrUJvNF%YtUAm}jd zvBD9yn+iMO3Oh~S+st(RxbAxL`O28@eBaz~c7A;419>v&J@xTkO3d~i>oy>vi65Epu2LAL#X1!+Ct4TvY_UfXXzNY?_r zw%==hT(3WU!0x!V(Co6j;qumO3|M|&PWEsE-U;?Y1J(t4L+yPuQ1X@5C}g&}qgYRN zDNgqCk{iyxGplU|Z`hr8^*j7~r!O z=d((8{F7JI)jh#K{K?hB^7P>M9pD5B$0(jGQGNXINRDdj?NYK0R@2?P@Z`Qp-Gc`4 z$Hrk_EK+PGWk3|-ELSW4=vG&2$W*%}g&1g3SbLc}*wtBS;1^6oo@?_g`MLj_jYSI{ zOv`Acs_{ODhrl|5kwzxxLJp-egZ?8^t`xuVT7(O_A0;1&S&+>Rn-v1r0vho44|Z^Y z7}tK~S6oP6Nbf(U^8OLu-+zi4{DsxH|0iY{2FQN?ub9))&;{V3ptzu{JBOzf!NDW) z=-6XLkcbM%1CcL_9*!)OdVB29mK+a2Qq|+vj)+13Vk|cr` zV;mtS{jbUosHp;6rSS~e!T0C5fBuRT=W7Da&BG)ramF-HELDUTh4J-GB&I%BH^VRs z5>UxE%+6Z;K&~5u*dM0NhrgPK4Y3}|dq4cQIPkr2)wt{bt4(OXsbLOQVz zFEz}{pL6>bvO_$Ks`^u-Pd}pPpwJi%r`Q@nUqKsw ze1N%rd>cZiE)MJn-x1_|Vnv`X4QvvwO@IXp-!bG=mjlTy4vdzlJ+PP#-SKSRx~3)D zVFPY`!AHM5>m;Fut3?BThs>${axmW2jDN03Og86=G{?fiK#bJehxaiU#OaR6k+F1LxEqkxs>0= zW9Ut+kVVGKNguCu5iG<^j%H?^X1zAt#9jPgnGgnavN2`emOtQK46W4IEL0J8QeRZw zeX0(*MVK+9ARytUq9SVgTo5M?C-hy1C;=QhHje$`l0IoT(ss(seR{Nfn6m>1wp8%3 zf=Jp(5>~}h6_km7>Z8E3LN!BHqiEBu&_4AdA+QDog)&~|L4FSM3E-V_QEtn>zf7Ac zZTZ8y`9tDDQ0?JBwiJG?ZdW#Jd{A96)Pll13vXBQYOi#3UGBs>Zv{b&nPq*}ZZw+k^e?JplsVh%*M%#hsG{YQ zFe}=z$gDcXEJ6o)5U7g%J6#^7(O_sm9N zyz=lF^XG?!Vf51W{W8$J&^dHL*|4w@sXruFBj$pK>}%8G1L#r6k{4fwMJy%XT1wzO zR@Z$iQF3mGeUe*EyW&O7SQwjAOfm>p{RJB*BYf+MlsT)GZefp{f@#H@zMSHuR2TfB z320|V?n@0^kAgbwX-S|FoW^|0@`{||>IbWZ+HAj{ZO$3Tu6V|ueEHwP;T2>07YB`T?uzrsGGkm&BD*`omgc?Zgp=D1G-#QL4E=}2xos>oIAYXm_9o+ zSdoPW#eXxpHv*Du?FB+;@5KbjF#_Zy0dj@_IS+tb0zj?;AlDy{m#?Rb;b?pe_`@`% zV68t+Gd;;QTm!$&;SO{N9L4Tlx|1e*^Q}$KX%9#vlG_e#*$xjk{|avYhJqKE*)T(p zh?KJcL1_{&XbcmbW~xg>syP!@o`K1*N@z0w{#)TaP!;t#!t>hzjoE02BL?K97udXW z6tu|lm>GGSEJ5HQr)h{|wbF!shb+awNnR+Z1*SS7?sV8YV{~f-oIFtqmD4TdjYgqk zu9Dcfn;%;HG#>2q<)g;w|Mi`%KVaF5fx@&ODG}YUS0K7<6?W!b=DtE8k}nbHU!+$c z-z9vOxJ@0^W0fr8hfxwerTcp#W@p;227D!8@z+K8_eA`Qd}L^4B5LcPY~uKrNbtYY zlIxG{GA|ey7!sJQ3mB^ln2QS-tSDIZ&f8qUfpW`4ma-^VUG2o)L{XM<%UeO*!d}8e zr&HI@!Fpj)u=J?Gf&$6+gSP@@Z)lb%Xdrx*7mjfE)0Ynkjq)*)DCk@8{K3x7w}ioj zi8pRj2Rh_G}Fe!qBsP;o|vFLv(VBM|u|gh}{CrLTbf_KouYj=;g$?(5J) zCtz$W;^yRF@HOlvWb5$noBxiaSk2NCX#~YbOw+P)&Y_`>A2wVBYB0rGSUW;%y%^p< zGPt&nk#+FCUKi#eW3jrvquzcF%6h)&^gTg0uZ7rJaz3G%fYrYl?tRPl8PxlwiZ5S| zHz-F#3VB^oI!7~Bt&quV%hB_A;y2&c>o1*eKkBdtY_gPQWA1KmlDwqHMnGfaM&SF} z*m(n|hrgyxl=6-K;wl@G3zoHprK6h~3^RmqJhvhXfu%@}lW^1@GK$G2Wx~G&e^%`Q zS{>#RRo}=d?I(-~Vqmpn3=pR6xUmL~Z$M+{9K6B_3^|yX`c2HIxGtrK8)$K}ejNa> zew*|d{z-lc9sc(7vOO~0aCRISNF$#bdd{4YdZcym#w2926np&!XPvoTaX1fVz1glI z<<>0RNP3yWt)5xeidP!4jA`aqiANGe8KSd^ z8c@HLy%Hxw3e{Z681Rhv4mz#NYu&4Z`+EFnt1YEm|WiF?~XEEuVtD-VlW_eS%Y2Is?>L z-Y}QheF}G5TB~=7eM)x+RtD^9&x!!kMQG=`&DDn}7r{;7-_yfVO$<}qz8Rf% zUy81061-n`q?s$_PD5W{U8jjRM5sZpm!8>VXGgEWFoc84O*nchK#z(rZyj+_O~q$T zKxmnk7dpN-|KcqE%5^urSZSP<-eVpOO+dmZE9#x0?o7#AyDlTho@Mqv33ZRp6@6ySK7^{^QriMI4xt z(T2tBusOCGEffZi_;x$7YDeqBg7Fg(LP<+8oF8RulsV5);BL&)&LkGu)y^}5^azym zdu5gDBBe>iyF@r`+V9fhgsjO<+i4`qw?VyyAx5w50Hu!2b#l4FCdFK4>;CP=BUfX$ zB0DdutlgWCbls+aFUc_4EmsGkKVWsE0|w`7mB>RTYiKyPhkeGVUa4!_{Mq-iqrl3z zGK@GNt(G`#WaX|An8&OLts=yDCA3F#uv4sg+O%vUxf?<@S2&i1=yN8xWg6v7{WYM? zk9}bQ9k{a%Qz{N(fj{R!+p<3}c!NkA$CLtT>nCrqp}J4Wzb@vli$4)fXYOi9?8H6J zM@Ins7HlKM2a9^MDN4TG6JytDnlZ^qQA2b-lNqaDtQc0OT>u@jN)5t*iJ);t8Zq>Q z=<4@(#Fy*39m!2pxBh_mdlKZRBDP)qM!sIdC-xV$PKfLvQM}r%9zur?>+AS?#3ZgT z#(RoQotC^U;oq=(2KQnK@g1RVPx6f{YvX!DAI3#2YwI-$K4BkUv(2oHAL??9%|S9P zbc$I3PC#rE=Gj%D&55hRNO;RmE4XB+D<;%)Bm~i!by^TthK*#oCJP6RZH#|-N(u5=L7udIbk`|k!D0WUiqE{N2w*6 zMc)j+hi@44gw88aR`FB0YK{4YSi_+2gnSi3VRddap^08ool66o5RT_9J5Clp@#uyP zIx#AdtmfA_;%=9eI@wq%`!kRsP^DoM{4lvhd)}51rb3KM%sjRk`!I405K+Tt9}&m~Fl%3Ey) zL*X>~8WNWL7UvGS+nZRTCS2`$E&jmwAN6-`WIh(_7eTZf;=jLXh0G0X%uK%YT%1k* z@urPd)pGqZTKt$Io{Td#2T*kJ6BVP1OBxF+4B8>gp(4%&$WREXWKCt2D3LPRn*vq2 zl}9|D2f{t=1=4k2=T$qDx3_ye__n6>^abznN@M_8*J)54*FW;U-d=b=r1O2en;`W1 zc%cZO#e~$s(1q=$MeQSi#G__@(1X#F?+XuLSMUk2V~B#Sg3A)14Z(*Dutzf~bcP{d zJkw_|VD@Q5L2R{=>-M|Xb>Yj#MD8AvyX+)`>kf%v)Qrs-OVbB&dq<$Lgfwp?9kn-9 z7^qux3;s%N%`r5ep%LpL9?&<(cCZ?8wNjFU!VFBPv%V~$`EolQfv-6w=LBn6m@npT zX}#pCUVhyXn8UQY_!>mG;P+3&lo78W%CKf1LXrYOgu!97p6Bbgqwa2xJ#WfDHv%t_ zh{&ffTk~qw&f2*wV>Li1COJOIU@2)USy>Jf@EtE|by=i7i8)V^D$4ijqB{xkM2C6)IQGp zzx*K#KUv*<>pdCz@b~gCrUI^)&E!sVPZfa0UWKKU}WcI@i zGlfq8kBx4-ieT#Kl51sX$-u9!C22M1GVv%RM8heP*8-K=Ls3leK)Xt6udX7h9DyMm znVYonv}lst(?ZOouMmNv_Xi)&^pD6Ggny}05Psp=y=PuEE#1gI5aN^^MV|L015?g8 z#w$ND=1r#YYQ2{!CLg#&CL2M2u7c=H!hR7bj&8AY8JtUZEQ%=-+tXOIsE`D8DHZlp zoP?ZE6;O4kUV1r}@1G@X5P~xz0dkrPaE#3(I=5_HqSKS6BMk+(OUtTj%j7qUYyFJS z_qx`FVhOi_-!z5k@>6R?8WYE*F9MFf-MA9T8U?t_$Atqq{n-RNUw1@O)Vn1G^6nvY z<3((X4#@<67xseiW^T^+A0|6l-ISvH86nCof%<3wr_fR95l75RS=nWrtD@Qu{56$1 zrgkwE4@eY^nTJefB&cV8oG)Id?R>p?UPg{N*1roV}LUpiSWbOfuQ|K%ZdD% zDT!-kg)LUzfx(jOuR0Hg~c$SCG9 z3UUj|LhW;St*i0%`iy)*E6<>C!_h{AzBnVG%_cw5#$^@;-?PG76%=qgVyoBS|qr zKLp^Q5_WbaUt)LcGg=a!U{6jUf={)|F&g^lz1h1WTLgDM|54}o>6ADoe6_vczMjCO z|E10m2N;|D&l)FMNlR|wOLwP)M$3f)4eh&vygDsyhj`Zx3i1RdaHa_2QryYCc>eK0fbY`UsUA2lUw@s7!Mc zruH}@fFqBY*aYT7>?3=jkpNhw5t$Ha zbH~ZLp%hF~*|2EfhF*e>`9}yY`KT1e3B++RVq--T-k?am5OP-mWb)F- zoLQ~nGq@z+1N@9kk^)_JjkzAQUI~O#2A>H%e?qttCY2tNc*~@~Ae<~5`L;%%x>%!B zC7(s@16IcqPUbx>iV%ZQLXAQI$3~?|!4Z8#Wvxfl2QBGRMbW>EOlig1i3}!qjv(ol zfcd3;RPE3;3VMLS2}N(_z0NxiHQPO+dZvV`$O+QD-Ukvbf#BqM9~F#oRqyZBd)CkS zN5rokeff2!Apif^)&K28QKc$lhpYnsv2{so^D)mv5V*eubHUr<{~foQPmpoWc*9PZb9n~7+$IaP>ILsyeFsz4BS zc9qR~u059@9__D_nYo|OA7FlyPs|~-R<`_PFUYI;UoOtfzm)}jf#+6=ja)aGDfx<5 zkVbT&$rAwls5K)DVfpM22I}FjD32laSm6p2LpRK@tl44pwF+;hRp}3cu=OoGWjkVQM z=3-7?=g`EJjgUJ#ofHR#vYd)g+(>-?c!`Ndr)=2!=Tr=|8lx*|l!!{RU7sO;9g9vwVRP8lnzGzsT_-B{|Jc-5Vj>=*HrYTJb`GNYpsV8;lo zs};_&>lMz`PmjTqC|Mry%oAUO5aBv`4>cos^!Z!faAZ)+g1S8&jLmRkDkjreB2zf1 zgPhN3B*g5k;7fy8)RjbjNS&=dut_ z>L&GYD=1xna30p0gDecOFYiIyN#9ZH|3rA842Ez^!L2uxZ3=C&jkLMdbV+%SrDxEl z*`Ziv{`5w8*7%DF;pWe#*6c5Xsn2{{VQe~m4!Y~)12qFF7@mHztuPsFjFuEJxGFZJ zKO(mb!dLi&oF9h70@JCKt6_L;g@)UvkTy zf-}BIR)38~vC~ESV#w-g|3`}ajsW||B|`LH9mhh~J$nZ9R5c3`+ySa{*5(3uYctad z5=uRpbGkkk#&%J;46tnt=)=$t`#{grcHpUY=xTPzY=9f6n0>aWUDqvQ-#9fG%$+`?6mIbTNxALQKP-%)h;A2QzrdANgm2#@{>yXqUwXTWCXRNtHje*qwMMK2 zu(q=@`R{-JyJ=dbZfS?KjPg=ZE8fJENJZlp&MbInJA%pkuyq5T=x%=+X4hlVY?UgN`ufxQCv~L>yVE1bK@95V=Q3A8(1sHqmWPOo75M47QS%Oos z+|Ue^%KOAqQuGEYg>WLXNy3i#m}b{O=ClsbYWou0B)dqh#OOYAp`}9y4lKOEa*@f? zcG@?)4cNJf<_}RpW;gB-(N91lXe5o`vouc$aC)fkYV0|Q*WBIXH!NVQ%sJxdB^s2- zFo_064)P3>yXGbJ$ULlOzGQ*RXjo2w2$JM>ML-06vfNiGYB|T%|8V8+Bu1*(nKalwPMXv z5|$RJfw@F5oh{v~S~K@Eg;~_r-y?QdCiH`|ops9ZJq-~Z;MKwjKEFA5_1!Rs!&RJOrIe$4 z=*Bt*ja~PEy_Z371|?c~cHtHnImUp@iO4afgd!Li=}mjs`Au+`?j|Bi?It4%%kZAlhViEs zkXVcs`ekvr5B_DWJx>$?Z-lwFq~&?8cNK=|`g1vPf6z{xh zfd%=Dj__(Iyu61%<_7c2lJ2@ctm>9xTCX4#+O42!mg_XFTI;rbcbL{{y9O4^{X!S8 zoHN0kFX~YEA%6xbWh<-Zt!j}vzIhT4GI+8Pfv*a1m=lz|08B8v*<#` zE#SkRo8CWz@?Q6*ZnnI~MLI8KSHw+w8K6FbuP&{htd50L=n>MPj&tm^hH~Ka$wIs4 z(I@de_x~{Vm0@+J%d!axzHo;?aCawIaCdiicXwF0ySr;}cbDMq65KuLU2|skxo74% zd*Az$^^@+eySm=)Dyn^#fdXIDTS@Ic>tnCEA^$AjIjmbbrl@YHG#g#3*KZznU*LY- z0ii!bAEms9397Gm_}vj50v<)C_}W7zlmgFlepGjk+>o-~w)kh+SxVlu1n^8p@Aj;k zdXIT|IhuE!v|0)1MR9$E0b2{UoSN5>Z6BLL%5e8p?`g-4+;Rn`AXGWXXgvtlCxs z;St)EjTZzR_X7d>s#7u!K87~GS@7C<>fA`3ThzXV-e!Qo^cHhO@BR?cOdkBJMGw+U zPY`yTTSC3P0n<&KG34-e)GR^jVR0!QSs6^ifnZA^d&8=Vb&fdW>IrIZDTq&Ls>A#?Aw1rWa&^@Gu-xx!Qs87%ygUx&7khd&x8JiibFXNXe_WI z$t+bu;)}wn6w%H7y#rx&I1t8+khUFONdeUnPva&jRe}{o4Lh~ARm-{iWBGAYgx(2M zvj?j6;)yCRPM>yq!TkOaUt)W3C%-7ek4k;cI}o{J_4qAMn>Xk&KE!RHnpk6asGp9G z%+5;Y(&z8P9NfqP)*sf+(jc0G`u`hr{D~_f<)kFQ_%OYNC!8i`PWwh_`9ihR!r2PL z@jpw*DF<8cezT%_9vHW-;iYO74EFxcOXm+cRK@=W~7&yXC**ERK0 zYQ&(|Ik(bn^5;HPk@X*uO$T?7`3&EMDN`ub2ltkIcxdB}!>jfMCLKT#a5TJ>7Vovz z*oiajJbm)A-!*Jn4X)0T|FhHz)Yl0i%L@`^d$83O#CVB0IE^zq@E;;x6wnn5n^IEM zhjUqBhadoPM*J#==z8A&koW%_^JDG;6rpBNwEm{|@YkdH|0I%#|K}fn+a!#X>G{K! z!ArqnXF+|{{c`6=h;{^|jfW;Vlz2}EECc7#0<&7JMq$@}55i@Be}!U6&H%@FyW5Rx zyW1b}vJJ}Z@9h7`m(ty894f$nvB0V0!N!z&w;wvy;T$Dk?4)ET)XdwKdW@)CMqV*G zk$~}yZn?n1c^kITTz=njd5%XhKim80x4Nk()EFb>T&B-RXL7>S$SWfz-Xt{&MEX#8 zi}wPuSMM*@=;?jN1*tH!?L(EuO;yVB_53uhaRdF(kK4CCU5L_+zwp1jZxcZUqNnAt z>OnH*q2G79gCQNz=drpkp^42oPoa(c!<3{HC3K$_w8FFjk_!9V=EVQ@u;WtZ-_cx_d=O=-H<-M zyrWmdMZ;5I-!fYsZY?)mes&z}A5>rPevs7t@hO<$kWPRCU_cxySJKJ6;5UXT6Spl6 zhSIrj4q3I`8i+%&)=vZN2Hiv=$>>P4ass|;cg!;Byx%Ns5y0nds@6-9vgQ;thRAj% zF2oMNnsBBpv!Io!C{xcFV=8d84g&E;R4Xtp89?=YJQa)}4YrQFODK|Tj|u!Esi7mU zF9P9ODDV$}Rr`5=txBvSat!YC(6Sn?-o)~DIpo#hcn>lFoKhRL#m`A_wWq5%TT_@k zPo^^^g;joIC))vrj54~#pcNR47>M+gg+IC&t@ULDJ~QHN zJMVfaFS};PUqMBam@;PRW^M1B!&A=%&E^nV%yIsh_};64HCgk$$-FCxp}?!GY@{;m zslJ}l(fsFWZ>1RgHG|7f+kxG-jM1f;6AtMJ1RhH=sMtr@cxjE0x}OD_NQC zC9GNs({Hm^eWoyJDb9LVS$I31&GJH-DvZ*%joTR2;~M>Pf3vr6p4>Z$DF-foZmmHf zaAgfO7*oJ1cI!uSZ#8Ru^&_A_H+Tz-?S3JEwOlteIwbi`q<{N~6f(H6oAA*WS|?19 zo|o$yZkjG++SwHL3RBz5FG1{4U8f;*4_zMhl>(%?C}y@$YuO#9kNPpknj{;Zuzf`Gq&5UY{Xo`WhA)pj@kn zW(W9tp1SIe$`|l^1j18o~dtz%vK zvSoqSNq~>P4A0Y!#Me2B**tg|p5H#~yPI2%6$jS*FW;$CM_L|0ez+W6H+GY6*Fy-; zAF8^gw*XZGcG!#HZYn(<(8I1eUj}zmx(obE4~go%?bf;>p(S^Xk=@3j7^xEWKer_I zyRD%Y#C%6`K^>7!D$) zsW02oR6C#e@|FF7UTp}V03MC|7;sfB#=-<-kkJ6M+*W_VPOLyYBuBypvv=HNnX#kw zH4U^(Bm|U6*}*k&r(|8QofxXmmTyQ8*rsv$bQ9);3PYmcO7>a7V$I<03G12~ZJ2)Z z`x#K%#Kco8JW^8$38=uI**T-n;FM3_4#dOUe~pu~5l|feM8@_uB&M-5FQjZWX=UnG zt=xob9oyo&*o+fn)nFB_ffnAIv=9z6NRGO0@kE+k7t}SkJw<1OlZ`_Oag`f}`lOiA z_d(!2cZN1q!^q3%r|D<@mWmxEP1&4sRdj9X*N!j}KD3IG5?!Sp5z2FYsh-xb=&R5$ z89SK)6ICTWs9J(3QfhF`0a8JOj&@xD8MyTX!y1j1ok6-g5a#Z}BcbCb9JmrNe8_^8 z(n$DLgIR$(jKxXNOmq^Od=UfQ$O5RyIqAzBM+N(Idfkz##;sFiOo6v$1JRf-_1t+C zldvbXYIitdxS(MTsdL|1;-~qBPu`5$op=+hn>Ziz^Uq+R_gJl~*UurAP_MbehVHk} z39r@DWDE7q*jIF2{EFT$LH(U>R~CDAA;0M^3`A#|auHO_QleuQPjD}gg+NO%aW$(w z41$<+v5RB%GWgeDkR@KFlQ5av=Y?;W`h&q$VRK&+sDE{eTw~3hqV(H8*ER&BAQuJN z<^){~*tC2kSQB9DLp~!$zMpzGjGCq(eDh!*s0rSH%Gyq};grh|i8%b+N*{{1Rxmz} zc+8>Mmq{OI2`dKqu8!k_G3uI5m~=R3h-pF`Q3`v?;f~ra5J#m~SS3FwR1x7xlgEqx zj`DY?nnmkUn+>Wb37~fe-2aBEhI)>AbRbsD$t~%VE{IFk&QJsF-#n&|vi)vI7=oU}J9#1C zXhH0>Absnd+OPrkdxB~`v3)ASDs$BZ*%@1!s1w!c%O9$|)3ySA#n{R$o<-XZa5|=s zj8U_X;#-bz7Pa)u2}Z2oZ$)7mF#HX*V1?fYa*#JD7fY+`aT~SDwp%d?ootXejg+V# zBK+SRb8E0)7^&7KRmO_^Uxmu`X<1npB@fa%jc4PimEqR4r*S0~{1|qsGz2Bk7M+Hc zeG#} z#g*gWB;TdEG=w>UFFWG=usjeSY;7q`HZQWPJ@~D3ZHO~-@7Of zhSN_<@$(-0daFRHjjHpqV+x|lWT_*V(wv~@ggAx?DVi@=spXM#Uq!JJNG%u^U1B7} z-XAM9)sU_L?bfLswIsEXC4pd!k8n;4v=fyW1xeF6^uY_RP0mG%!1>!p47i?+l}}

    ^#U$=Zl0CNylkLpQ`tvj_<_%@LX}_cgQljda8xdA* zX*k&q*q<7?!1C2swOdDGX3B$tBDfM$w98Y`c(Rvo>!fZib-)L7!y}4eCs*vV!EN2N4eCOpY{lEzl)OlVg%!+6u8yu_c`v5HzdY?gW8;;mfoUv+RnbbfPSR9 zcK3oG{m?SD@hY<9Vt^59=mYzyw=n2+gii-S0YkitPXhkcx3tk>W;2SrUt%t<50xh3 zQMh5%jk$(*H-l^ILeM`m`PGP7wa8N>l;L!Mec$LFl@wDemSMf`OXmPrXYrX?#Y2GV z=FLf4uKwmG`)45|IDX9%SDzoj6JryQeAb-?tO8u`d*Jp=_dfOxm*5Ue(stuyc83Jo zNV9^vVfFM!>Gw71mOAWct^?}qryk%Ba4sg#ArCdQjH@q1y_?*c#*G+PA0(D~&|bpI z9|NA_e#`M{WU5?$CR0NhDDMH&Kf?y;c8v1kX&D_dzK0sw7_@sB*ha}50;=Q45{TLp zIMb}Q=Zi1!_pyfVZv~ijosx+7$2g)mCVk%ip%SkL-XYWt8tZI8v&onLTiFaP=>%*n zovf@CY>f>5b^aEq@Tc4HrxRKY6yiJHuMfyO#| zPLiYlvOC5n?feb$*)DFtwh|>vUG_L{c^L0tssk+tbbP7_zx(o~;?WhTgfPk7u50^H z7`N}Mrkv)UEYVIJm0SYn~hDcYv^8;<>*ArGkjWO{AOrn@?)>Ggj`zw_%)vXtl((d$QROqEb)&0$oc%}Vym7QnrrSW)q4>swMl(B(7X#-DnF zUfwy3CI+y%tL$?Clc3xw&>~H_rlKR8JZnK|q$_YhL#* zDVN-(VpVofjqd>gaZQP?SPx@>{cz+}J3U&vAym~NB)@~xn4zpf5yfdi53UG103{Wy z>1>z15GK7X>8zhFF7pbOsb`81Ny^J+5>f57PFI%#2!BLwq?wd2C?$Nukub6#oOdW~Cxwi1D z_4u|8JahCxNn+1ln9B_7S6(U&w3VlI5^Jt$c7&AnEn<}{Q7)}1gCNngh6T#_v0Hqb zQMzxk_4CG#62w^Ge7V{Ha+8qom!q`ACb!a@S?x#z;GpO?T$1@zKAAMUyk*N7=LMQ) z+$LPU6Ix0g6BDWOwO^}4B!uibI@vdY7N$Y9%F7G=t+a{Ol^twEbt84g=D=UF_dr`% zW$7L&G%C?DQ{lESc*nBCH~}_gw2D&IARi z80lO=?>Iu(QxXQg*FC@~GKUeeA#cEkEGL8jRFmdtf-teQ<|YVigUy~!ZGWsg-8pf7 zG~=Z)EP_q)!LmK&&X8G@W(`0aaSpKM0(?Y z*oxp^qKsD2IfoUUR%NTQW=yLXs9fKPod9yN&+xfq=IF3wWQb7KA?A`4{Mp$!O1Oz+&Rit3Z{JzDuUt zY!Z@y87EZW#-$r7G$YxGRn=YTs^!x;@OoEz3Mz=Lt5w~orak(N_M*V|ow-}q5}JQU z*bG*}urqF`mq{KRkZ4~RzB(4Vrp=XhQ3@?7&SeK}J z>@-LY<9^$0h}(-RMtAKD3n^>rtyh}uwTJ#huI{SXYfwQind#7q0V(V(c-D`iH>k@| zy33pxlJOmjEUZt~Y}eJ!P;qDkLh48l^L-M>Fr~$w5*TD*H$iWY1>bjzwu%K256xpa z?`+{g_po^e(n`G%?dJ3A4Bs1#MKr;06!VDUY4z)!xyD+;n0c}#`auDIk4iE#R0~+^ zmkpEcrdcau&(w>Mr|wY`v%-Hz^U9{n;gXOOS8ESvOxll#w*iY^*WQq|d97dBjP>Bn z;1zhEgcYTeCnn46IlnV7fSILejchCURU!`b0Xe75_XO16Q*%R#6_FSFp0)~fg~js6 zE~O4gc0T^&xScX7DpnhmeTDxq`xyV?8WsPvAOI%OYLySalD6_sqX(kx#FG$%p{<}w zni5=v0#?O14cqF@2fDlDbwrt`U3?O*POT}R@q3oX&s)3?GHe*1pC=!(KWykq1p|Lm zU#m`C>VKefDxj~<&B+~(QA>^juwo9vG$_2Krn9=J8*^V4%u=F7W6o%lQD8z?%pg7M z9SFL@dF#g;H5XA^f3=elc3(_^Uzsr(R{A31s=@l;#doY&Il+CgX31*hAhE1BRNB@% zT3wZh$$S#uG0DlW6$yb^OT#D1C7T5ll4tu>S_4-41*o69G;lWXXgqmw;GzFnA(y+S$c4A;@J!Yfs}gLuD#4$e`>wr*siq(GQvFvND@b#Mj10js!e>= z0CQ>oqfJH|DEpkzX+K$47A-0rs_k?{4CA(ss)y>2$U9SO{1%+gf?6pn)uRahXvoq& zs)tpLRaDK+4VC%bS@8EbvURU6L74$_;w792IPhz0)iqcj%y#8e@cXl)F?L(R4$AD$ ztXn2KcHA|xTvpG~5vLvgk(u?GM0~{c932OD90xrAs)`V+fqsQx;;O8dd0cVj0@IFy3_Z|FWT-f!UAzC-#lhN&S zq(#QY=bx5Ki5~^*7H8U=ezSP=Fv(Q=6_3j))M1tHUaeg>gX7y{p28aFUl;C1><%yUfhYJQg_L= zJ!);NAhd#14HB0e7Mgd@kjP3qXr{z3yBAtgvif8pv?oV@*&ITqYLe2iRgdJBdm6e@ zO7xP@W4|_JPKBhJ1yhNC?Nul+7V0m1SK*is%?(g^cMAaAES`ty{T9|_iqHw_qj!T#>`iD0p3BvyJlDl3BBg7_Qp0g=O zlh;6FoTB-csPX6NQx2eyHoto^zS;=hcn=q;F+*}rdb5l=u(jY3p(dsR9gnmFBk*o#jryHI( z6P}!PZ4a*p$<-gS^xHllSP-1+ad*FdK)`${Q=Gx0r@mnR`3N4ZP}OQThF#j4c+C)? zGK#>KrayzjbqYbj$4n9h~&os-Wp^4}8AABt>=;LK#M zI&z#+sdR`%YE4|Z}6h~HmiJ4oYvUYpE&Pcc?YXS{C_TIULkfcb}M&6n?J~Q{% zy4Q0`amNv@K2(-6E?}0z%@DlZ%7MY;45(z;Bz;eb^YoT7^DKQc#JIo)s+rRBKI7++ zx_Gub=XIpMd9@)^}3$}QxV=vUTi6+W4@NIIWtHE1}h`Eths=@Xw6YpE}B)z zuh(Hlfj<>n|IRf??zsHNmK#!Dt#>~t@Xr4!@c$}2@tf8lw}e4n8v-?PT?3!dB}~9V zC|ZG#ep{Xb2|2hDWoEr{n~=77VV1m!!i2G<7_(RPHLej0GTRP&M0t{dEU z&r^0CAI~>1-LI?Q?5YoTUvcp8TwRB^@I*&5XIcLNN8Sqj?26GhA z7z|8&pZzv(c7XdLCo6!31glkwYw&r`XZRbmqfEgA)8Lzjy;*Qx^yu#6UO$^`n#4=lYE?i#s z@bbFqUhaKc>)_gdX*fz3C^bGaKfVfZTA?~LpE?9T;@aWyx`#dmsj}Kj@m#|3aEJE+ zP!U!A!!??vc+dicuI+zT&HLUUF{${-b!v#y--;xVq}NHhvs3O=c~$RZO{nJ|NF7@! zK2QU#{NBnrt8CWMhTft7{@EW2Ww`Z%qHp`3qR;$S+fmT^LkWa?wyJI2vEgf@nN{UI zQDG!M2EpT0g8v|S5K9p0tx{1sL27MkX&p}PySNvUCrAkd_VatVt4o~lv@9ci-NTy0 zMXHC1ao79P6Wn+3wm-sug$N42o9p-%o)}4;0r1!BVVuvFUau0FrLl0Cbc5&5tvN>x z`f(xoP~k2Y0?BY4;ytwp$MJXvYs08uX|j`15Z56mo}%9S2iFnVx|VWMvo776CjG=c z-;QioDsObT^B@-RP_g0j%=C!M$Imr8*hkuvd@bLfIv1APu7fukr9WfaUtAcQI>u}y za#dktntfd0CgOvJYDRgGl3=WT712~xEqb{8SY-`1ScH+*YNLOpz8}o;Kz4#3O(LT; z5Gq~z&_sB0+!$Q>!DY*R3BD!(S-|c^jL~vU9ng0!$jw9@Ijg7IMUr{UV^*J~ve8k> zc5FhYUTgyJK-qv}Wsr(y{HnlQsxp<2e-dMRZkNsV3U)to&BQ^9_xLe;VFw=!{Ihb; zfYusO7ECXpUCU3u0pA-eg|F{&zs;2VKk_QJ*cc>1Ayg15pSqwSSk7Tr)-f-ZoSOj# zxnAX9t8A8yhTh@+9{c=XgyUi$%%K3n9RHT(V)+;3_><)VQ5{wDFkplsRi2`-E1g^6 zB*eZ-X6EU#{5$bjjq%RI&c>PRe_##_WU1SA9lf3Ku3lFcjc`8V@%^7`)f^8$mzQ7f zPi8SboRF{9)T9g`C(&?y=?jZQMW;cfM#W;pksq;**DI6d-MMgG-YrwG4g#e>p=)={ z#%k%7mSWUv+%{ho6ovhmO^AO3JgsY?q3*)Qi+ zstZ%Piz{8vb1{!VBN0?c@QSJ`iGv0nG+Tl?11jvm$5VPTa)jNU z8QDlZ_1b(_Osujxw|OrdAC~&c2D0WdOXQ`9at|K1Jld$ja!w!!bx69B0rG>?E1So< zx#F-x^)NjK1v-6p6prq~-5%pFyZoIcLx?Ss<~C2TLRV?qto~73M4PkbZB5mI2IS zbcq3fOGFRWc#{0YBBkrqOKCIY?0a;sQ>(US*=R3eE%VTLp5O*1Qfo|9u!uhv|xs%Xa3gNCSfu~e_juEU=JziE2=;*gv zBcTyedU|}Y~_yy^+$j|_#ffRQ>Zmt+%QG-6aRA>;qE`h0r9=%GWr zu+~R~Bl*PSG0FXq)#74&zdA_=vg#R-hQ?zRSQ}LK<@|Wg_~=wxxN4%hDkc5f1COfe z*=|U!^ts`fAz+c7yQd~Ylx8J=JG4g2J8!ryE+wJdz}hH^OnMRy^hVsVpuPm$Q6q(_ zBKu^0=C*cH+N?KcTEb~USjkn+@cD>ye!q_&gbJ~GJMjt5Bj9SJE`zwxO5=r@6kWLaHVibWm3k=J=g@|q-gtkAkp?9*z)#%Qp6msn~c*t={3i6-ak&$b4r5m z+Xp{VbfMzlI_&hCw`7ZkZw(d!ARM&gS{lS=8S$W-%2(%@@GUs>whZ~)J~Ay0`Rj}_ zTy?ty)+Y@ikrPm&dTRD4?UdqIASR!jqG4Y0=z(m6D0yKLO~+*vtx*}u1BaBc*upQq zS)<9})N2>D@Hx@xb5%|AI;|tXI#0m<^Ay_Ufn;U?6nm%t)Oy(dihca_AHW0X^C$4I zG_nK?{sBCGfe{mbBmL2cUoa(Ru}pI|(3+g;{mL|ew+Li9(TXRk-^y4D( zp~^h$NJV`z$YIpN-}Xls*lMTpbK>ueMiJW;#Z%ltohL>!^l2^Sre;;Dw^jPu4+K1# zubT(A25jQdzp8HVo-jRf^WkTi?dk>W`EjAykv8a;oGXef*-2wZz1B10p{F#SQe^!C zF1UR=YwzG?otxsAS6L-n& z#_wJ8EQ?L&wY%@1|K5GnJ~adj{zb+7+wQ~uukQ0F4|u-NP@zl;h2j>QE6|v$2#yj@ z@bycYWvM3?!PRbI+mU#yb7Q96`v^jsDSg`GrxzJynN>Apw&Aq3EQd+2bB9R>cWWP= zmq&12gh?fG*AiU`A&5jSLq(;zn#!$M{~>^yL^F&d@P+DGG9~w{1)x6b6trpUiWuUn zH+E>h`StiS8V>?D@05bAtn~V~*m|q6U)$XnfV(5-GaQqbPm8$c>h7GkR*Pj{5#!r! z+MHdxYgDly6=g6BfQz{bFu=30e7+0C$Dx6fyt0%qmLZTaw)Oapj ztl|y;SoVp^cQ*VS2QwG zO%W7|)Cp}hy|0A=9;nOV5Z@WAwgA79w2H!L(Syuake1V#$8^!vY84T~#$y)?+<>Q= zR?lB5?om=*4u-u0Zb^WXvl@{fePa5+t>aZBS#>{#e?LXD3qCBtuuWbdROin6)UpEF zYEq!&fT@*~%qQ_hl8Rer98j+LHhPALTsd2H7kM%4^%N}>swVyek3&j$5c_;!oN-}f z3|XYN!|(ZrmoK9lboy;4pFt|U+v)ej{A`(P@_O~S>wgv>7WF*&|7Z4b{44vUY-YbB zdZn{FqpSfFSQEUbQ03DKWIxeBCd_=INkmT7mRH(oc2bU_8iKcgjD7F_NGyva`trdW zRo^mLk)Cn3b)oD1zp{_ce(F-RPZ^#%ggS%-kg_A|Lmpc~l{WCzrI9P&%9*U9}!3ztM$W|6l>O^qH-ZDDaivsg__H2cX z?h(?ud95x|nH+br3Y2}V7p~HOW#1%bf};JublK*_@83RcV%iF~mD`E-sQ0$$u((ZO@W%J&b88OavEz05)l$ja2; z%r~V2i-vI~?O%PTY=oAsR8%p{F^zae98XC0;ZRc*IKX;m`8*m&J zYldn#l+>`$N#*2rRq|-l+9>^xJe=ns#5%B)t+zx%Y2$=3B3eo01)Y=HQTJm=CBSn< z{fyzR6%1h;&LnD9WC}L42w&kwlQDT@E)MnmkA`d1)(E;%EW5$b{als%wes?{ySdfF zO(T~e85^o)d7?|^$cJGVBC|dKnZ=yPP&Imv>6?hi>!sTk*gA=Bj(C&w$K}KBO7aaA zE4IDIEA%ss&2~ha*)H)L5#&hAb>lahOsP>w!*TcK4iDnFuq%@iPIdlX`fNc8XCmjywKG>UGvMt>#YUw*hn>t~G;E#gSkA%u-W8g#03x@h8};*{8{h`Xzf zsR{FE*6Xyww>=nNUp|>fL=T!qAjFYxAba~(zu_44*R2*R(g(7(IaFwUm$Qh~ ztFX3RSct8b=QfM#{B!HB6@T)(p5N-dyu0^?=mE9C4|8-2ejN4a;;5&$0h0V^?s~6d z*HY%BLQkdYO~i*lEsPsLkLwP)K$o$S#Tq)eoz7xK*n|b_w|V*`%)Q5PNkSPv^@hQ2 zQT=b+5Zp|+OMo+>U<;S3>$FYb0zy;#bJa$FJ9)CZRelG@F+H$J{!%=V`Q}qhLukGM>`;fZZ2ofB&$NM5` zU#C@>c!1-gVyj!i*-x1`u+YOpdkZqhXG*6-cwOR2%8M9Koi=&FTw@Xl4Y634a?fu+m}~C! zv!feeQjI?_YKaR+jnKtnhm2@#2L*S;;BjMF#&+Q+Ko9WKeXo9q{?)){!Q1f^r{Y8B z^r1a(MB4zj@PuT&t0#_ zB5d7V=3G}^zn3aMpNXO-cHU$%jC=BPvNX~hTOGs1-MqT5EzQl7>sYT@EBnQ%Sx!gp zraZRa;L|^B^Ybd>CxpODns%XLqO{9BZmZ0a?LFwo<4%ksW)GZX(e^nwxhjLkeO=i) zWCZTG4EwBVvqpz=O?!{dAa%^NdBlVucN<)L)O8@{u{sUtX2`NOX<k+IeZA#q|KbZ zwq?;*JkkysDcdfjMS=9j^ss46qvuH1h4uzC_9t8&e>f+q#xzN zg|`@=>G18|hCjT+3!TLUwaxi09{nb_4?_EV_-XHUXN`luSRiW5wYzdRcZ+GP}sJFPiwuCGK6_MYB&j-OjF zf~jVfS0+bZXNySdIZ#sPNtA%b7K!b1z&-|+AR9{8Kjz|#iWiMGP`=KCWF08}U4xJ} zayGJeFf#n3N&NRVQo@NeXh+p-?yFH@)Py*T^e*A7(q7}JLY!Zq zk#MOcbuW)F$=Fr=0Xi3MJW8%S3)9B@)?rM}-Irs%+Nm(50+8cCGK|8#6p%Jt zrc7zHP<_T}RjCcfZnYKYVS<}YF*xk9Mfstt-3OLs;p)c$7tU`Oq>8MMBG=}%QO(J?O!Az-3F>#VtWDSw;XA~;X7T5Q3s)L8EJy^m_z zRyqVu&hrwPZG4K7mhqPdFjalm+AKf?^IKY_VF-2-8_5F260uFFhvv`5rr-29(9DB= zd*Bp1EU-$R(R!4rf)JNf8-B00wh399Oey}`oe#%ajAmxF1_pLNQjJWKUJECc#-$IUXICP!Rb=z=52yEO* z!IZ@50czsZNHb-=D6f8x{jHFe$9^tPW1h|*0x!lQ%~B`KZn%eoS~CQ<3zYY{*^Vim zImOD{{P+0h_`(=e+3a`(Lf-zj>_=E(lRyJi5|BQ^^FNG8Kf$VNRD-JZ2f&99WdA>3 zz@L>nQCZ9ZM+o&zJzRt+QePtqWhm-LJoyiFgIHZw4JaFEC{##7J=rBjJhc9T#`ctwZlG56@pe zkp3Xe`Y>Sc*k%T7wrY&-Ek=5F7R zCMsMN!{ncmj07r+u{bc3{rjvK5A4y4R256Lmn)9JHi#UgHI&d4%9Sw(EkBd>in5mr zFQp<6yPCvCW_am&x3)I4=3rghi_y^6SOB!dD)Uk)fU~iz;KIq6TsQOn^Ej1xvYhUz~pG6zu3bCX)K13_Ov^X=s zU4EBinMLbMpn@SuF{5f|Oyw?eqDUP<98asQ6=vI97`rpHE_)+R*b`^4GfKtymU%3R zotd~RK}qtwGNmV1S9CwQjFrZ#QF#>RF1CBx<^o2>o)dVIlg&UafjMXZhT8f9$+Cbh zHCsMYw@RXiDq}YemYo@U3CK#M_eB9yguciZUAOT#czg>9tYaUduTgli+|6t$IZ?`N z?K2P=OOs2IG77R$*JE$}+WJANmy-0T$26VGGmB_~0r1te7huBK`l-PCSHMi3W}|j( zrhaWvl~UMT-rKM?-Np0N$4kJEVFK+~-7%GjKp_?2W=%(J*tyf0{t^`N(m8JRU1(u& zV-9v&6?%sB^eck}g!FV%TQ<)-UdERChdz8Y+6=)Es>vTB!=&@<&dnyi<-Yqo*RkZo z7$g<}F`CTqLpuoFjay*@Uwd&^fn;Ad16^RX5D3-^Kd|ZF;^f)ZL~k2p;}7!;h}~oD z?C~l?_ORnYR;Xo;3y!Jxs^Qp$aO+KT*v$UU5N3%!rzR)wLz~7DKy<)6r-td@9TI z$Q$Upq4!QdBQ&w1wmJP6$l12lY36@(?sv0d>l7vvAFU0ZzT*#fLZtdY>e&|Ry&8DB z8gS|m0>9hh71HX0@_LRaSd6caErxT?x@{{axeQ>xR@7_67Hia=z`DlaZzk`d8SnA9#AH=Z8JNe99j79I+STT6Z}Yfa;;iDgPkhL# zVUH|hvDK067SQS*e$+@+J`Cjewriw30tbxY9M6FV6_wqxB`zqr8i-t7-MgKW(D%QZ zY;Sq`+ZI@VPEH1-0`VG+^*pxYj1LEDVh>7nJljKdnFmcde6}w=-k5Q#q9Hh<`^QV6H`g%U zto>-Z(8pR|6XL)tOAhVu>X65>kZQ4JxXCMS9gIsj=*|A7$KBP8GfuEhs-9@F-@9&a zk69rX4v*A2Z+zhAg9yKOMKm{}N#w6lpp$pNquvsDy-BwYkj*b)KX##BQ9*}aS$yn5 zy3&Adc*JEpc>sIE-w}d#B7KYV-_sJNSP|NiGGMI|cQ%fK1{?2m5ejZ_SG|_Is7PKtjPF!- zmQn{PCA+6hAzB2wf1XGG&IMO4x^#kpWn=Y=IN6=+1%$c;jy}3Gu5$cH&%P=1!dg~{ z(bG<@H%+afDmFt>@a;x(+E3z8kCCOd%I!;_l;*vhz$=%FwKWcdk|02y9&a83Q#HnO zYWK*g$c1W1Ztv1`Ojn($K56zo(PHAYoV1hIkv&#&6->FB5xibf^E-Xkf_p;&jlR`b zf!Cq9tn}ksSe77RQR$T!&X0jL6Kp&3B-z<`_&gV{uh(J80ZN)>#M^ zwCOuBiJB;~Wc;Frk7Ljb@zeS7(B`C-Lun6Zu+Erqd-WT|XLUk~_RNXO&rf7Dai&Ec z@**0|XV*2gE*2qrMF9by`|Cy3W%@%;2p5AE>~8067!?y5j_tTRLiJ>H9eoE&R~TTb zt$F87JP5_3WR7NbSI#AMqLTpS3G5pb$Ve=WI#S19nnE^i^|%K*98p^R=pvQ`pDH8I z?|9dUd*hwQ);OYUIunwtU7XQv@Q07^xWz-Nn{?+4g#nWI3Gz4;tDMJQr^6U2_;-u+z00H?jk(w==WmerF|l6$Hn z4Ia6T?OSN~7ukqWzqU=agZ)Cp5#o6Dj4wu+s5Coym4Z>`_rl4|NESy7EJ?Xe(LFlo z_}wEUuOe9f@pQmeeMpz9#~@dp`JsoF6K0Ym!QtqiBhPj@B}5TX{Tny6bs>_oQ=7k`!@zP!iX~rPq;CzhF{(KK-KTiTs7d=W$`Ja>$e`B9_DAIej?WHc|;+AWIk@VY=$Lfl{0?+mdlJ z+KeYN%pNjkr1_CQf6DL+m{Ox`dReK|ynI3#OyOuVY}2SEx@exPK~d00#`KB(VN2R{ zk;?H2DZhziJI?Z?Ww1us!OTa*z6$=}Cc|4*MTyZ0Ak9wnfMfTup(p0o*eixLs$G<3 z9b*hz&g!9b!^Xh*QA>(kORT11V?`kKkG_#yE9&p=S~oTJ;PYK}E?bsP%vx*9!x!~< zUkgsq56YA6FwGhEl@huXv$4am)SaZar#;{rUF{lK40D{4$V>LAEOls%RRd0da80(z7xFZPTWk8f5TMCH z{h07#c{e#&1IjUDLV|2F%@@n!WPwn_uh~Z@S}Qf-=!+J>VLGyxfGBDlJ0ymXe)_vL zr6>2Sqwzxy9aR1b|#!2#P_ zS?kFcYh_bx@GV#ovu+_G_ zigx1d+Wib*Qnt~fU;D^LMMX3iOW})L0Sa2~VL$mQEfR1yKV?K(W^dmIb!&K&ZX|~s z2S;!=P}tk$*tUhYyBV+pg)aF%&?kDE?O@t4J9bYAT#$qyO5Z@f1Pdc_A6#~tSDyra zwc8Sbj^?=J>D`6r;&bi#)D?GG@JR+K%Xtf{7xeog3XiNW!nezEsIJxzU4wpIezwQB ztZ%}S`7YS%x89%RqEK#;GqkfmmD{if#XSuwT+Xp&`aclldDs$ho8%>A!SyeP*fYxk zlH)Us+fz&K_PyJ|ZCLVH*F&h7He3xQBG~X05X^RK#i`%Yso$u`k4cie*k*66TshAU z7S2du4rt4>U0@0iPGbSb=Cx%5H9ZMx#;dYG7uoZaT$%qv*gFST7H#{&v2EM7ZQHhO zc5K@=J9g5^jytyPj-7P$a^5+w?sv}j?z>g%k6mk5ty(qL9AnNg#}6EK)XfjfJ*d}B zrR~Uvwq=iqB@drUSrwcdRPff zZiWhkPR0EhxB=D^@!8nCDXu`vFNs>WYQNT_*uYEo z9jn6qM5_2?s&$m>CisgYrQ6mr@0T>Za%WyeQy=wRHVdtY6s0dYpWKmJ5kv%@2YUQ6 z$EvynhEWYg>Kh}EqM@-+KBl(;$tVd<2U(ZQ!yC|pW->-UFjqdR)PU7FRKKUWyFIWg z-0xqcZ2q}cZguT!_$@p$4dvmIOzmKZXjXq{i_cOoGo z7vJ}H(%#=tKf;SZIoFpTpr7c4pI36SMp5RsRS<(7LqQMi}Qg=n5-7-ycpiTV( zRT;!qQ$0$a22WS`DS8rEqG7qgW2wvYx{>F#TCKiToVo5DR)1Vq1w&8*8g}+3awuJ% z?zEwOwZXJzn*!ZBDPW$d&n5t?x9@uCJ$mBiEDo!Ww&k=P?J|s>)=kCpV>>#2FShKI zYJ7h!CrvVrC1ZN2Sl^pY;~kUD#cZ5AZN$v{p12V_zH|maI-(!>i{D00BWm&J7F%*& z{nt2UqU6H(hprhUJx_ae<_88_VoNup%lenpI=C`Oz&8};WAH=AW6NkUq5 zfI31=v(>n1+`eJXV3>C8glX^oFRBQ5?Uv(4k)l@c)aqjEa)DNtV38epLa#_3D2d?>5kkXAB5dkUc>|x{q=CdaEN`t9 z6(^y+#9Owd5m&9?lSUB6LUhJ?GQTW@ciN6%b)pd+rv7y&(v3LXRdX~y*35#z6ZjW! zJR&bveHF$sjbf$vK`_SqyD#vEJ@=fG*|hQZv!-1HScW#E2=EQMj=A7-`i_xcTKZM9 z0Up?8of`)Z(C5^&FT#KC$WX?@JU)FNxb6L$63IUfh*eFk%sBW zTbi1!>Vgm&|AwYjoQn5v-A0of5O85-T_h?*S}MZdOd$nu)>a|b^U|mipXk#bcFfhvr*gxQ?OnPyW(KPlefk} zh>Wi{G{Hm$uu_s#^rS}0{mFrxaKR=#;dMH2bfZYz>5p2&Du}I&aQYjPca6F8<|UcG zH_|Q87U1yt4w@~{!01@P+NeyBzK@c0wDs#XmS_fNJ*3-eQ`CwIE1Jr$Qsex@)lK*^ zutcz@W-rLYv%H#xa()jHoS9Dr|DNrv!~O$V;a8~BeHT>k^0S=e+_$Oh))NtKl|1eu z)_{15mb{hDvD!?tO}+*vhg?#)Q)N!k%HFn#{yJZ+(v5g~eY_ynt@y|*C4TTYdNmI`VPO@V?8sUaq&nfvQWp?AZI~I(f=p7x5vQ zCX%i>(qc?d5{O2MeSt14ADuJi2Sdjpb;$+TYH|~NT)$PcTMX{$hj`=C%O){CfNA)i zhx{l~Z(oX={kk3bV1FEEi1CdDeYite(sm$r+vokAx9c#3bG5Jp4{oa|-U^fU2(TJ9S@cz+$N|SnGJy9}g)IQivZg+j>yu=CH zP8@%C0>uzXX1=1=sBi(vd=)fLe~Mz-L(#isc1ft8h_d5^NVXlm>soU9rGDnb#)@RS z%H&bmf)z}c#nh9#LEZL0n$X3*?ttvYKPczlyolcaLs7&wnjaeNyN~(sKS#~~;nb+> z|1)YZL_|~XsCS4!(gV?e#p#HY1ZoTUQjH<}ouP}sMCYYdB@Au`1VdSgV&mig88@}y zGIP^xzOK*PuG3fBy|1t56P`c^TF?g=Ci~1Jq5x6jI2o)OKu4$~3xTP~2&&xg%6?U7 z=d9xjGCA2y<3u)I%g(OB{$#j2U`}7w_&(Df%Szp?14up#7g~JT&UoXp12}b({XAoP zS+5Z!9pV^aX`S>dGP(SzKic7KOZrz_5sjDjtmzF?n|*1&Pj!Aql864L9%RugCWta1 zr%K^&5cY%i)wQhDW~cj5cLp3I5UP7Q~=YSa=U3F^pgL5tCUuos7{d zURP2oq2pcL_h~LE-id-7Oon%58b;4mYR1M^_RbP9dkk{X_KhLS#36n|5mA{&p?;_tD zcwRu43X1SB(^Dw9X>?>(DWPALK$Uu(kwo*KftA^lmSk9PPj#2LHbivuzSI|dJ*oaP zrra0{Pq)5TBHsTwrv8P>&-nXRNiE2g*UB& zZYta8{&_(ws-XRoS&DmM@xolRYKhGU&w!D(t(OrKCOlVjLPy-YEJ@#)t~))G7{j505V}iFP0R@3e-H%&+Lir=qn* z5^>3UpK&$3mGZza+OfOHWxsPR)TybR)wLzHhf4+)5 z+2~|6?e`4z9IaQkmEYGt#6cXl9;em?2UegQ-^z(#nxJ$gB{{*b zSTB#*9xOD2hp)bpfSHlgeXrTB(2NaayJex~wM=lW^g;l8I3~C@T*mMq4t$f-UwoPK zHQ~A??#25G3!HdZ<+}E9JM$`_k`7`@iXqZ{8e0{{-0LeSJ6b_)OG_NlY+2)a_vg?2 zES5Z)GZOmrIv(QASjW0f-YY-qaYls{YdBcJ77^h(F*?55bFj)vXyAAVkqQ~%!I5Ha znTtRfrG7aKjr)X@g3o}S%9JGxsO%s~fhQopi&O*ooYMFlgNofk_mWAYNnM=|(s!{6 zU>yz=!HMzhM0ruYgYI1#{ehzUg)d`INc_U_kZ+_onvQ6eeDZ$jx!4D4*{|SFrHB8Z=XVj`)oy+V2haZ;9RDIMOI^+lL4GuyiYIeH2BQ5O1Dwu4SVOGkWTQXZ#{P3;x<0#o{3|$G_*bQ! z)t|O<%4l~|N89owE!VJzr4|Nra<6gjW{0CCc6qn5ulJf9Lmx<-j1M8>qJma>l`3`(~c4L$&D4Qxynzrg^HagjJ9TZsF zxu)f%I1+dZwvE3}`U#6!BkWDesxjs2dmMuzxD3}k3F`M~r?xBR5Q>kWqHXY3Uv(5C zXQg)`@sM7pvaGtfqYDEO&eA@y-c=Ey?!PB4iV`39sdd> z>?a~94FjIc3*i)VB{vD~4Ml|yJnBhvasL$bk;TZ0(MqlG>iET`|l+T_rbii&^KOS>|2hB z)q!9ArKGDTR@!0^RoeGOZ^ z@@-oe4Ba{N``!B!j81qKiwS<(Ik6~7$g z+r8QXqfR~n9t_XlCJ*P%I0y8zS&Vfnl7_n@HdQ0?sa~|xU|RiMj6Bv*ZcRQ&tSDQ1 zUA(=Ou7@-uIA5u=p(5cAwl$re+|W_FvzkgOqt%F3|GLYBu)^g&N#)B!%mID^py7!I0oNCPs~Fi0bAWF&Y6j7SQ}cSL&}z$N7~^`%V*Wg;w&NdQm6-)x-P+~4es^z@+-lvvuSo- zrrv?01f6s=|LMSfp?PJaN51)}2EnvM_qrhmgOSP9a6(u>u|S*L?+NAv%c1v5Md%GT zzcH96eBz7fHaDxLm4}P+4LJ{#kucUA~S!2!k5_ zvJaYeQy$!Y(-!K9))VtAmbim?>z`|U5W=bffP`V#6%CJ1o6j!t5A!p&a^a`EcHW_6 z4_Qw+^OxG<=v2M`tQv~HTLT~Lv-w=&5&n~iK?l|{D?qCt9h;+;sY| zo_-(-PcAXF#%9GE&dweJ=l-`x3OK4<_WcE{wdM7|&?>19`oT=6fT8)E+_sc|EaAfW=(NRr*E&7Po^O zC)(r-&WGk5=8?~SvNu$QH(1~|+=8Y6k?km$cA9H0fdSNSc<$za0-v<944^n6f2w8F zp~*_(5P>^ww*PZ@cKM~SL#sMTF$qn%Rxx$%hqu5YyohvBeOPB-mp+_tLwlE(CKqU} z^%<4vg1FS2Y@|e=$axxhyhjAXpHxt{c2h&HUcxh_r-L&lVjiYI{D)9A5) zk{;BrW+XaPR6=tJ@i}y}i!zB)jEJ?(s2^r>Cbxm@caL}%HWW4l`33Yvb-?WjkHUJg zI@r0>$@?vD`{A?k4Ltk;YKR)8fLo-bs&#JQ=G(oQ6+=#dtHpH!b0HKY11XjtGgXWI z7&M?V4VFm$&2|HYZUCX2=&+#CEB~w2AGmkv@&!b-TlnzTSsvcYEw4Ay{Pd*5O3Gee zEa-HzJU>4S3Mjma*Agrso)A{+lIre2gDPdHfmBS;!3@^ZgCvSG` zKK>o`b-ltR%j2Ll-FlHhwGB+}%=<_3qTUMnYOSHRah%?)!j_;2{Ikk@Pp(}@eNcIS zHH(>A#R$emSsn)$8@HX)Dw-CKe2Incmchj0+QzD^guRlwQqM_)W3Y04)>uo{+>7@zu~=3V*XUA;UOFL0q2 zFG4o0P3|gn;8uPvvpuQ)MRbbi!T#~Vy{nSj^$$fGO9Cz)+Sj7IB?0VuCph~ed_fHP zCL(;I(tM+}UP0cC;(y+csmLeyF$$3sNznGS*}}ZCb4BhucQe-VE@ z7CQ%V4tLp@z{50*$w%s8%PfzQYDOfx9wcRiC$~C}6;d|jChaURyuvIP)T&MVb|HKVXsv|gnv%G^50atI$?suG{1?kN8qfwYMUQ+ z{P&%~mlChu>Nl>y?i*|VU&62d*TcPs zz4di*Tl?0No@XysxCG-d$v($v-jhw&8J`T-*SlR?%4?7Y1-pzJZA@cBk3xJy=R#1! z)&mGgus#j3+@BlHq^LVa`#BzdHBjG8m}5N4X?u^&>0D$A3G7@WrV*pu0cnT|tj&R4 z1qe^(7mkqbwNrisKgb>P3*Xv51{yyIQ=VD90=;)EFN7fkjc$-a2)m@Nj7JEZi4g@E zM;LFgQhw1DI{Cc$EDvQp(qi7-hJwm_C68BN`RV{1qi=QJ6#&;+Kf(tH@w6fYF%A@8 znnNwK?sG#=@K-0Hy@P%Q7|(qE>EF2_NeQsLL4^KFy5=2opwKpaw z&^!=p6CWHOkkVztmPWEBREjtdl~F=>ayKZ~tA7E=l(vW8L=S4ydjX24v10V+{(i(@`2Eu5EATAPx z5yUdHSh{$L+0ZKfMpZzbR}|Sr%e`9jElJ6u*Jh;zeXVIiTn=AO@609kW3il^2#K#2 zfyRRAR>w3_aC^cR1zRnj+)<2kEaw@XkrbL?{br|%0yE0sL~tf()VXUz8Jg|VVu-hR ze23Pg$C{!Mgw7gylZ?5_+#ErdS&~9uN?%u7N>^(WHl;4y`Dhm;pANd5EyLA$g298rp}C1ufwU6x`(YXcdD8byDA{ z(eJscZaG;reO=~cG%bNE<|w!>nK! ziq`~3&bDmXk(4ICXBF6;xz;fmsfPk`z0w4}-1jZ8zXokt5cZKmR?KYzLu2_@X=C_C zKvrc9aDk;r3^Blql`xVUCQgqVMfgdy(U1w>tVq6{o3JriPK}+ALqOpeES#W2WU+G- z@L6~oRVAQ|rMb+M>CCW|^@wAXWp_u$rvi@JU?1S6e#()VbuzRPOsF zO-@nZEb)6h6;`V8N)D* zvC+n@k>tt~<34FB+1gavxdSlMWPu_3bkbgC%9K4IecB9o`F+XjC32t^6NZ)SmyqUj zL>l7&N)6d!WenRPo#Wb+VV_O896O7el;G8^iP2m#j#*Hmb9tJmxW%F)FAJWbwf)@5 zu(}+Z;+2$Dv5gYLvK*(g(4m%@ct1qWICYDHbQk)|bUphe14}`*3^Ri#Y?`g~wN>Q( zl*#gd()y>%Hy+(?Fq*nFoJFR)Tp?V4y z6J_Gn7bA2$9PP(0b5n=wS^3t@>DIm4g+fKGyp!}24(0Ue0TDrLSts3TmMuVtVQE7O z=2C^@De5zDz`vucd}v$eU4LBlqDl5g^9mK-ddAj66SOKHr+k$xFl zG@?4ST@pAfS^p_tKcTHv7+PPmEL9+g5?d8Q6CsU>N*_p*Aea;2v966{t74k;c~kSd zZH~;jo~|vD1G`|~C7}I&9UTeelf!j%U>?Z%5nB%>4C|%cre7&;M~%Yn_E){~Z2QrR^;X<5YaSk$5?Q_c+iZPs2y z4TBrqv3ikwQFnNu=Io)eer#)sSOS!w6h(Lo)cd++`@2%=rONUxCjDfL)$|Q@&+g z#NGiDZ+eDh<26u<=WHFUsFbUylB-w*qFBLw#F6KcqtGc|v3Uqcl2%~0Ty^^+Kh*7# z4WOT(+hk=$yOKEv&>x=uE=Rl4hB@ViIepEA^;Ay>(9e8pley}2)YQUShA!_m5st#t zImJ_l`9|&gW$nLnqF<$P({C;(3Bs(vELoIb4jT?yjpxL!`77Rk}{x2t>f?a z+c4`{axGQndF8IA+yG@oZ53xNEy}w>pNC=X7<@_9^s4YTk=!i2Rr=v%U2pa z--R?MEqp0Mn=vt~Bc%DsTqm6Eq<7LYY?rkyv$}DkFt{%<|Jz`SG=>(l*ch+xo%+f! z`}ng*`s&w2)~J44V^22x^W`RgU4C}>mNml0)WycFK@Vaun{thVl}|3<@Qb5i$CtM> z?eV)QFp49?pM*fMVsl8bL0x-JylwJVJ>Impe`|Giw+)%QV3@rEOikl zip9dbp+o$ZS^O3luDHw$D(SrQU14G$@A29bi#6-M%@E(DnuU*Lac71WHEe z4Ghjplgk1t^bIOHP65k{vze4>#@(2g@G!J;s0J3AZryT8_c!s- zoby3{I}6NU9PX|-86)&0Eb-ByG=?`~o(Q2Yv-G1FFrYhU;p-!Cp=aX;JEJcyTzQ2hg&+aPnf{ZEZ;d%AR0g}?(!h7@EUQ$-hJCNPc68< z6v87|0e;@A92ZeT`k#6h22c5|VG!uxWA(e@yXA27i7O3xK-Lc8X$*@%hBHm#c`#j{ ztT6)um66pc_;%u%AsvR~JvJwMJlAjiwr9p6ff;x28O>hxhX#jxdO{-{@0_R+Gj4C_ z2^dcHV8Y)xX8K6?)jfLFgI0Di2PFBXE(1#2bZXe~KZIX;PJn*#x_=Rf{2y zbl9dZEo(B?*{|+Cu`O$?I6Bcw^dCWWvhOLWrHTH*;l@&J8UxFD-=+~6habO;ro1@2 zQ2KL(5Zcq_dKBkkE(Z@rYi-IP(#AWBlDwoPCfvZ7uAXI#Y_kA-ayC(jXH(@?lO$AW z?x0_T7v-k$_haC9wiDAbfr}?q-#%BEw>j)F1JNC~z$N~-?*4&lC5npDZB>v{WdCty zUnXZ0tAytmGJ8K1ZK}dSsh=SehC2E&TPJmui>gu;^r#3;OgrC^k|Eb&j5vvmR9)(l zlW)JKb{)SS*m|+pX{vLKZHC&<`g=Pq<#YF+)%8_dvr77gvXt-`@PJCLo9dBu@_JV3 zs=k-15_Sq4DaI=;NlB)D-xL)t>-eyi3C$5h)K2WI?VQu_G8PIfgw|R(a9(W-=oFU@ z($-t3hRvfEqC>oxWt~~q# z?cq>1S1eMdUmsw7{_bNx9m}G2J35S(Ar_Y<%u7(BXsm(AL!4BJKHMKzfOXQ5)pqcS zx5SNG*@1a}p!QZFKx9y5+e+Aq+OhrcX{>fWnjg_=-)JrO02L2jAF-MFEy3yB^a+*P zX*JZ3-NMb`aB$Vipg7e8za-1o8i$skS9(fjiEE=#?{JfNJzRvV$%)i$8qHlP-g1EP zZBu&bgdc2gK!m1*Nu(tsvqGe03-}QRs!3GJHng6T8W|C^b`22V?vRKF^RljKGT!W+W?wEn;L{jqikXr6VZ1Q7B6@qIHGX+&? z`lA?%@h6hs-9{1<+epUE*AOqVBiUbC5Mq`Bbc#Q02?TJ0o9cG>$YtCOS(WY9iwRRg zFh#Eco^9UiF_ygEP^@o;1|L_%a+YMTFJDq{Dap`%77gk|xw!+a5gn~ruXy|+sHo~u z)B&a+GR=6f)#HUSjN{@GBZG-a{IW}+BY(ca6~YS(7LcLal|-U#z{QlKYTPpC!)Tnw zqEvY9TQxXs!4pZn1;`H`2X|+tp%Q}@^xA3O)+;@hL@J?vI(N)WIzZU0*=T8+vQ?Fc zq2~?suapsJVMf}_DJ{tm+NcxNaFj~$RCUNzXw}-g!WDP5aL~aH@HagK}FTftPF(0g|i( zrru)LKXslpmpP>1#% zhF~dGit=P)XtAwmEbtuFVF>u)6@f<~UC)D(#Ji?N_D&!$?gUBZ2UCD?`-bufRFHI| z?WX`Zakj+KA&HT8;?^VHgY8HGzbghq92kT`O%_nYcjUJvMXtlBNfM>364%LORM9;v z02e*!BioyJ!E?V-E%~0J^vUDeP+AkC(Z8hNG1;lbokxj6a89dH`htJXV_WDX-bDC8 zpY#I(EWe}xga?ZANGW?#fC3Qx3wJM3K*1bb!1N%a*ERAxIe;0IngXxM$tq8&vq(Jw zm+T$=QztlQyz_>$6j-N^V$X4p6*+)%@A?*zq>o__w>Grbj?@w0f&4}F?M5+!@(I3U zLa{A@mA5#>!o4o@7x@++`YRfe{2f>K-1kLb81om(V^W|j`8$xH7!|&=lL%IszJ(7T zcl1kAXg~xbk`N$4+e;efm6=4Uot{A~>8rAAdE;Atk4tP0#3$zp@*l?*%tUll&Ydg)ZaBxZ%cm_fhdj!@M5X!Vl#h1V z)0ug>Gqp0)mNP8JQv(gPIOsuwc&q|?w-I%fj7YO5s*!W^lT`(s%)i$TbwkZVJAYtJ zA{#7MWdu|$;?JRF&@*x;D{K^nrx4yGFk&klrO4`K>FR6TaMQmqaGwj-(Gh8(&3)OlODI{;Dyr+W4iXA!ghXLB@x$@r^FWGUrIExp(y*q#VWMFjvUEBfl^V&jc1*47P=sxJNO`4D!pb;dNr1a ztP#(ZPF8D`sbre;@kv}SGC!`bzIE#Ctw)Rd@=V;NBfn+SUePwU)Q~E+rq%7{uJ2BI zd-9~_tH%ySFS%Z7-TRSLpXHddIalN~U5TZ9o$Qvumu&7nb`A4NBE!3ex#SvkO<4}* zaR+4&TV&M{D%y$vX3TQbLuP(Ssn{e>0}p~cTu`~PdDzc<<*N=@B8_ns+8R@j z#al|*SGz}C4H3xXGA(*)5S*>`cVAJa%z#TiGU^E~>U&99Rb*-{A8CP*kuf|@?AKqv z?$|tGq~i{*v?Ne$H4MlIC9UabI0{`wR)u^&MZOl6JRb9|3Z9pVE)LoO$UxRE)NClo za9sYC*Axffm#W{X=eQWiAxLo75Xi8>9}{yKsM#!{m=}to<~C9>GvBi{%Uj!njxZj7 zw*dEyg%fvoZpy$41~E~-b43z1*&Ikw+#W|+lfwCVm3C^5IboyZ`$-F>X0cHj>{8gE zJ#+GOO;ryT*n@y8cBiOP6vla?XxFEa7{40+l2nS>i* z=mE>%0JIY1Jot@fV#zO<5plAJJ(J3Jlf8O4TKBC6Hlx5GdUdCMqV*6gxexH-T z$9mv%Ux?bOI)z$K)UQQEtLIfyKElHaSCIDR76SQ(g#Hq?t1@+R{;JEM+9m@ef(5A? zs|Ah(4v8!1Q|3g=U~%7!Ekq1P4$LS19K@G!XKRn1_Bt%rpXdyz3ru3p8OS~nQW#5CQNW=m z9%{xmd3O!=#ze%E(R#GU4sStXlqrnae4V&+`_8(TiN-mS{3sEY2$Lby&*zRqH)&@KY^q77{kXvcCoEg%DvWE zKJO_Z@+o1;>cqBXlQes^&r~s!$5Ud-bIi#)$b|mR@6cl@hSpzq3 z85H#;YXuaXqg&Mv1*SKhfou7))nh=cowEj(qO%vwDR+a`W8l`|XD@8@ra{G&KsNjX zm?xqfh1lB|^6*@6_^EsJzE!udsSF|-C8Lf+I^1c5b7eo7U68~@AUjrIjWg?JbY27e zOvt5|z@|KtH1S1XK^fHE30bINKO;QxrZ|Yjp1Rw>BB&xD%02xRbpr7s^!5nV+R|i6 zfD3xygC5wEP+%32d|RYxS4E=51}={B0Mrlo;)R6kmfYlzcxLcVvWp2};}cCZjn!P* z-4&g}#zD0g_#TPc;Y-<_SNf?R6_-#E{ltLlTi8q^rVmJyzu#+^C64-?yo-8A`vo`= zcKYRIgbQO>xV6-u8mq!Jx6)PRD5g;zqb*2EQoAsP{3#)qm1^$N&@6E6HzSOYJd#<1 zJv^213;RSvBVS;bw>>q-PwQT%zpCpNb3^t_js44@u5AFV%PqZ$6sDhu$E;G~f^;|E zdQ!}{Vc;An%8vqCWc}+~5sWobFtRJ#x<17D?tYf;o6Kp6)SrJV^z zm_KTvoq+Y52{vyivRdI4A~0S^-?P9KlHM6p2ZeIZl7cK9l{Yb@$X5n7ZV;e<8S*aJ z%zAMOAN+*Ifn-N#dEbG`4>JVPeiBup0{D{j1VCX#Dv^uq9nKgf7eygTp%MZl!(V%3 zG@IsJbp#ic+;Wz|W9a>rWS3h)3&73pWm}=8{je2%2GU|>!eV0#MGy#rnP1T5h6b6^ z8- z@8;xeaafOyg8^MmnC|OqEdIkEkbRZ*z)V^NT{eC_RAr}>dNWbLQ_E=Hk^(f&h4X^b8z@9hPWANXm@ps|27DW`G> zrRoI;UO&r`Bd-bHlBIi!V^WaT=+Vk!pe1hP@|IDn+u#K$w1s{{HmEjct@HE1x&T)* z?%kKHAFnUB__4uF{b_`IY3!qW%O<2H@uDJYPme|R-eDL#y1&yCo-KAa|C@=oIAX49 zBO-^ERsNSdEqB8Tz}Kzc&otn9@56j3JYJ}o#I!uGKyw??g>&*Eg5qZ@rr%cJ>!*NH z7x!R-FM-4n=Rz~2PFpbluQU(r227Bx1{c9xST)R6?5uLwb*?F`x^TYu5+Oo4!#3T# z_{GOVVB2ZfEhLmYgx{!JND3vnX=!Od=F$;aeQOj2EM6<`weiR-iYkQ6Unls;8*;B! zTnTt#PRZ?9XVh~g-4jWKrRB82enZp4(Uq9Ap~dz?Y^!m4opvVgi4v`%_nr4_1u<3v zm`i>NOa2Ze-MHn)j^!?`&5+aA;f_w~v+$U1w-+EyXRw4ZmaKTB<><#SCWPVMRVPA-I*hI^xzJa4>ywXeRtlSQ@4rwc?byQKS6?xv`ig*aoq#w+^Q@Wjtn2Yx#hE25dL!u?7iFB ztA%{`C!0(Ov{4F%N^wuhJ!^Do@vlJh0#W)UC##c1q50m-eEC#Ox=$27tZKt*rax%R7wErt_@OTx)>^)`R0qHDC;u*{`A_WM|JSh0UU@@dLkNjK zL_Qa+0vZj4;oB1z-JQHxrbqd2aC9h|kPQui7m)q3tR0i>;cDgg-XaLBdN&Ahw}i%` zPD-u%+1O)7Z#qBA`}6Gu%m}oA25YpNM#jt0fRJL>Xl@`m#2k_~wCQi1SvpODkYwmB zEpjU=!0lO1FG_*zq41t{LFa+ARhr5l);;qFWy^9jiU1+X#pSAIs1N6&jbQg_&0$6q za(P!H+FX&8NQk5YFFb&FU~5T-d?N`90ex`IfhhT~>)=hff)do7*e64#?N<{B?Ks;8 z`@MQy28?$fC4;2dl0h!H%4I0Tq9N=`oZ8d-EK77m5y3?jTckf^4a54&X>l|(CWD1u zs*}~-1(Ql+9X&sWCLN&#QZDcG!Hwt{p5O`e$EOCR7xBF>(oF_Gt*Apj2M>7E#iA z`SY~gOs?;Nsj5#s13})&DT9|s8Ockd% zBqrJ%FdC%PYAZUN90p57u7#X<)74yfjFqAAF;4x3-Z*o9|nYaHI~ELYIBuO zewfVGWq)(j8tezN+QmwUXB5$E0M7Y`-wt4g^WSI$swH@S{sEL?_t8q&Q7c8vu`yC# z+4@;R#>g(k-|G21R=$Y?a!-Ma-AC-Ao|dKA2*Xa}daYp_Zkc4M;|%o9^qyF&14G&) z)66_7)$I6N9i}q)9|5fi;~4duZSFP8OqWaGgLTU^Fy-ocqp{JzaMe<~snG@xRb131 zq>(kJqYZi{Fc#{Y{khS|xx{3=yke0bv>BHCwHf|rV*N1mjRy6REe6f*d*!2RfnR** z)0)&juw+h|pA+OQcsNY3?g01lLWzbTPe6BruD-myz(XeUzpwB7VnsToLoEpuoSA7Be)rg zyFDz#fUtt3_K73*iBa~6pk5!O_elW%8|eFhO#V0Y?cp`>2Ho(*dM2&D$h!V6>{fE@ zo>M~dQBm~HfmwzFlRKqtWc_Y1w77+>y>(_`^iFsOxrp%MOfi9R z?dtZG#BUjDnniW}9LGHA@)U2{*B3%tu`3)e-WUHJ!8-vj0O;SNLcva$0^~afLVZUD z!~dm^@xODRu$h^Pxt+0_wWEWmv7McXv8nApyBcNjGctohNF%?h*llfETQ7%wll~0o z^{9hIL750M&Az!cw^VJ&uaX7-JRX3&DH@V*R3i)C1vK1G6qjp7sAR*Gwz)z@<@L6VZ7MqE)8r<`k;kyRltmilfl}44mrJGQ{;V#~YC1pS zTU~|w^%h4d`{P)sISUf8xh#43#ZBg>#LS_Cs*XhN96G~N^DDHaxTQs9i(*}wCrPgo zU32q+v(4)Vue8EIOmr5v% zApvLn6*xEgEz4^He9zB%%VL<_oP9ag2ebEr|Diy)q>_O(_&tg^-y=%*-;L&fEiMwq zrr+dP?|+V^ee8e@KnP9zx|x}It{7ZUSZ2$OLqE%$xDc%?@*~js&Pt9_Itx4OT?g|6 zNKi&j%qgY4 zt;zlNZh4G_UM0-X^V-$9JxZ;f1AedSghN{s zJ;#&2&RjOrU9HIckzT+PoK`aPdCwojIM88?l&{}*3uv(BZ{#|D(K#5@$IX8v+#!)o zlCtkMF|NFr(cQXFp6k%Z}Cv%5yc;`O{BTiNRI{~Bdx955Am=`JuR~817q*H&x zzDS9MQBa^|OyDW52o@;e5Ed*fs+(p{MYc{6Z!kMJkfrpnR=!y%{tLi3GEq6t>?ipN zzN`VCpYPyC2%fARSz$pY7}|0;eoc!izez%ZY zFxLFrRBT_`^qf;4H>Gtg)sTI2Smt7ab^gj_o^s5Es_{{vbW+r@8}8pOuM%P(sfr_ zg(z9yJ#!FjkA*rlxRh$-XCzs$F_35vZM2A7Yq})yVON06Bk|zMlHcYfANeovrk7RSwBU5q7SukGF`K6p^Emg zIbOKAH;gPmqZ}}4$K-?NOD%EIgjleq>A+~LLUoVE8IX_lAE^@TGD-guM5#GRx4DYQ zfR-r1V@}f_?l?z6)dQxZ@C7lr`SCpNT{{I?ud=eg7UY+=$vwR2Ei&VDO|;5(#u~;^ zQP_18H$;qv+LAj6ibkAC*D0B>qV9f0vb2x=r%RrSMlvoI8W2zdHK z`}(OprU_EAX7RFi5|1YWkwSrj{NELlXusuwVt)!=O-0HN0biEh=^3J zp<&ewTWry6){Me$6|7QD#P4-&vwCgmYJGjvvQe|*;@Wv_(|LWfl2UN>{Be_QdV}{A zyFF`)>{IJG^P2t4-n*}P%l%9MuRTTEW{&weBaL~6jjGaHn>Tt6i;?Z1*lzzv6kTTW zu14gxeO%gfOKOU3pMkGY+ORXTqD@N%i@}V64fqi|TD#3}8(C=-8QCe$8Jz;yHj7W$ zT_nFzg$A&ZoVbkZWCY9@7ANnJWIS(dJ3l5S91 zkW2BXh0Q%bu{P-)1$;{xhd_s7vjxHH81ak{%Kbl~h z8N;>nRbz)}&>3g#6ePN}%^Wl|yK8wG{zkt7To`n4+>RsO3LPtI#Ir!0q?;QjmPGS? zuw03$*gnbJOHy{W!_1crTXINq@bgo1FrFv672Lzf(+Lj?BT6JCm@uy)v)rK0)*T5b zsBEM=ua%;M({q5S)XZn3>xIh0E$^g$6 zz8o2!;Jv>lr5#P)&9Es7l zOiqJxCO++aHCwJM9EsmtYLD*u<6g3|zK)g!zGiqd4YW{H8g*c4hXkL3 zBA%gdgW`ZlpvmP-O=#mc+jAy2hBk;1CFvD^Xhs2Oz>spXvN2^PMMt5lEjdt_&UQ-BC4pRtA3!~LZk&A--8xrKWPUb=tJ(yOJ79ce508fFp!7GN6h zdyE#TX|HH2@0qkpAJK1wNK8+5%n!zDs@E^Kj+X#lE0i&Qe9Q%t1urECsPO0asZVoU z@>DbVEa8gPHhgp#M25uLPz0}izqDbJol6vKN&bDw%IZlnmr#_iaUk(SGl`p}4}4%7 zyjeYsW#51Y%Y1J+5c6gYju-7_)6dUTQhT=|_b-EZzUuR~^xQZ&H!^@j-RQ@w+v1*A zB0|ShQr-=+nC6eZ4cCA$?T?bHWbPjYTTOje8QK++)F6m6Nn+?*N-rDsu+J!llcWxC zAtqV(bz!mlQ?+T~rHstT0T&&4GoheK?2KHRMJ!nF<7Af5!RLsVWw~Aj*zP0c*viKN z)4YQwX&ZXv?plQzF-iX z6B*~VOuHU4b}UuTKYA1Y8cn1j^bE@IM8mCK$cSV1!b@K@HR4$rvhyVN*fE|M5hMng z6x~AkbKk$3d*4Ga?XCTmG)98qV$Q6pvhigC_>$HrgQ4ZoxJ!m1!co6y_iB18e@Ko2 zLA!*jlj79L5lFYx^IRW2=Hd~(L;V!sjeW5Q>pVqjb2UYZB!OdF0C(YB1>I$F8tewE zmwMdDt<^{*?yzkHJR8)#RHk+QnnU6jmAs|$vy;q2oD41i(slBFp%Z78QI&(!o9bKe zOy`S8w(Y?-$L>_?*$4hxxr{5=#L&kF)@|fdl^<>X-nkd$Zpq^4*&#+?BJ2ZB_JUKP z46FWL?477{MezpXpcHnT1x&VvJCi zYANfvXn}w1rBg!A>WfS^Z-4I6tKx@wQA7f>V5#VteD*EMoTpHo_Xm3R?eV))Pp;1V zYvYIC4D6p%28Wnt&Iqf^j+T@f7G2fJgcugxcLv?l3>w`{=Ii{LIN8(B54hiUFbOHM zu}H2q?qFVvw$46jK0&AXw$~j`%hZ=0-jy32{3Y|Hi@XAAa$j6tPT~adGR2Ae{mb)@ zPeQU46aBGDi}F%&myjQ6GPd)wbp?fXnJdhdt1V3~uL9bZD?iRD56wZheIV$8c>z}Uf>i?kB)KX13*0IjplwVmdH(tA~B9}S`VT+2lHg)Kv0B(o1Z z+pph(MiQfK13cu2lfdlIu~YQX=KOP}5fE3&5!+GYm$Q=5V>q959Y3s%tBb0hF!Mm0p)u(NkmkxH%t;8=eqDli z%QD;S`OJliu6&%0LkmTg0F(EJ+HSFOS~$tg5)`T}7y=`?7D zISSIBZiUdSnRmtE{&rB@xh5?A#E(%S?E^t%?z-V3oBB+ZZffdW;F?<(8TR z?0C^<$fshX6|XsF+ftH$8}Jp&7hMbtMx0<_)UAMm^^aRx#s&2ghC zXJa<$o+^)#RB#Rs*J!B|+_uXh}32U!F@pkk~Z)uU{FAy60EH)Y}USp);fz%HDI zElCWSG$Zs+E=dTrmM4Q*t5U+ZYE=u#fnJYt==pR)|2ElsHroH!@oNX}1Aq8y5ahij z^p__8&@Ob8+&caGY{lYJ;;bbvzsm?RV%M>SF|QahX?d4t5EdxolJY55$wjvekJpKH@IkYwodr^i z`#Au|tYzzf$7G*_Q5_dZ3Ss0i8>EUmh-H_*a?UVC>kvW_d`0%{l1^TSjizGR=1p^G zhyJcy>!2nMMlm?48Ns4g>Z)@9Nq9}k*uV~6aVyK$4e+8xL8c4Gfzl1;5+ z&Z0hTh?;zx58T15Jj;ER%)QUm zc~m;wf@!C$n$p@u=ttMI-8||?$sa8D#so6b3C6|#1KEV?BFv7Ew-@(cM_TB+=%l?( z^SIk0B3?dopM(&%B%7nubCNHR>l0OYVFWN&LL!cM)F1SU zBB2&DB8OUfeqb{CKQ8bb4GC3+`~mP{o}4y`b{ax34>hAxol|`ZOHW4S*W(I;dg)A0 zej}*-hV05&{sq|>){AF;Zx3!yuxZ@Vi?2wpGKrvzQ3L1N7IJo=^(=>f;o8r@IiTo? zx3xml9fXOu^c+Uyb5#5cEJNSiG8bxXek~+QXhM1WjuS0dA&`T@`_li};W~0qT#iyQ z3&tW7>6t`u&n0#+r2n&xGG_DYKPa>VDkv7yNhkYM0<)Q{SchvX;;0RmxvtofmB((} z)C%SQ(YUv`uEoWFJouiM^llCz^O1JCw*so9(TcAqqXK~HhJ)xYD(sw9tu{@$qmje_eFqxy0R1hHwn1q(%-I2`*hDJ9Jtq-#<h(#7+p~ukIJ6s14o$ znE?`sFkb0=vWtumws@PIP;>Ne83`_aIn10P07TdNTY-;(m?HRRUsW03kH*7^taWc6b$~WaUcc8lf&o0-^G& z%BIQquagm$bePJ!fs`?qYJj5n+KD;>3Z0DU%WY5N9gM}>`|ffn6@2x@Ck%xzmJy12 zY!coi@3QfY$Icj-Th7ej%X?}0FKVR#diBLELXgW@cS4s~&oxng%fP}cDi0U!Q1DB9 zh>3Y`CM-cwDts#iEV)W4_)7aK3S37p(f+wBt*~3T3Z-DY z4TfKSZ%tYb?7f!|lUM4TmSU4M6_%%SVLs^Bthh3HG%2nD(}!G1>|F*lziw0#IxM~3 zGPeU}+zi_Nq_06P22%I98aoeUWwCUE?*PhOdGx-|NXw@5O};x95D_yM6ydcF!{ixT z`^X(S`@tsp312az?HOkgkOLEM9Hc@CP>C5*?r3<&t|2%#u_4pf@ubLXraO)FSjD)` zV#Y0jI1SqeyRZ27cR!D~_O_m0_cibaod9O;j7g8S6VJ%Ez@Ja-iy_XV;@v`%4}X*V zYAZfVrqrF`ZqqWzm5v;AYeV$y5tfa?W2ONbkDTpCh>clW2jcE&Ww!>;Gwd4U6Y#4e zajuwdtx-ic4%uT!H;h?hT-^c26&LVe-Ua7YEhO?LTYwl=>o#oN%*(@?BZvg+5yVZxuu8-eped}= zRr-U*Hq4z1#ceDML$pfat)$frY%zu)qUymA5NHGFYYHfIq!iu2qvj>}btJC4qBz{y zwFfE95L*waTM%sPmYo=Di$U*Q+!F*R_)rBQuhmgu5BBxY&iK1Jj-(_e>Z{6ovP>!L z@1Lp+G4Ep2r3aN9xrAgKO!NnWr_9$hn$BJ-Nv?_my-Ce z0#GN6{_ZC&+wviLfrtjwluu*x2%E4L4WM0Qxd*L=_Z2O7<<%w@$Kfp9m_`mXR4sR5 z99~*mF4N97GQJwA%r@bba@mBRa4H{z**n(ZV!sVk+>$+gFg+*Wiqm}I*BHz#obJkP zIiy+?4ji}gkC>m8lS6NB4E#bjHJ(6A4vVz+%HV*JVyRy3YFI-+?swSEY#)ufwb?Y&sC1gwo?ygMLt=SAoa zODF-hddPv{0tFyD-N8wA#1%c~0HeklaD1`b(e`;G{=_`_1Sb8K5h2x;X`z@lb}8?z zaG9)aG0Zj$WfRq{cjQvv!@59$dx8F3A)=d28biN(l(gef4ARXEy;TAreDF2dr9rKv z6=Ke@9%Qn|BbZDNs9bDy&zT3PsO!$EnOE62t5|E}jHX_-E>}%YdUVA$IKfBXAB4#B*KP-b*w%v-cZA`PVn z4arBUw0eMBh(!Ps7gxZ-q4CDC?b=-S>e{JZZ1?BEsreNBAoW^$mV~Ur3+0hbWWBwA z&gnUt_4WCJ?lX3yZ#rZK#&*+|Q$-@OYerwCBO=5du8U$t(i+W#5TUEU3!ulxO0^d> zlL?!gM%x*vHjy%rz(=*6oI{d2Y>7fgzojEm=s2}sWyRj?)c1G`7?y;(f)O0jW$0e| zC79Y$)Oqt`q^jPoS#rvGGorTKB(XJ}xKd}H`Q;qqc^I?@7=C>m5s=n48%@D| z(roOlJ*(`!I67rtKx4wz{@B*t;r&g7{LVxy^AX4lG!|| zMiNCs#qMg5htpc!#kRbKo^8ee(R`X$ub*u3&y3gPJ=qJ>b81#Xge!gJ0kBg!TVO!M z;+YbC=(7=$a#t)Y8b{y|^)}6^NNcItfHQU~ehvF^evVFUZD<@KzaaANa*n#%NeHc> z&M@{?vc2qZ=!@9UtJ*Lr9#i*dMN|YZ8im9j1B!Bz{)C#mzP@sBw2y$B+^8`d1%+DS zJ&zk1T7qbwUfgPH`TXO)qN7o3`K+Pn3)@N!S%<#etRytd+|v_wLu&fzmN^l@FH#Kl zCW)L^bXXBf2ThD7IU(frOO@nVss*`ldmE%vOb?wqZN)Sp6l_;z(%IF84IT5Znif~F zKk1(uP}k?*@Fcz+)KkjxX^o-CAt=@Etr4Or^&|{fsi!8DEo6p-p)ENs zlWO?=O@^F3hM6O7(e>A^$uq2!tcv7w_(owfr5MZ)3}z!VShDFGD*IlNJHR@-%uU= zz$6wh`F@2av|m6e*PV}03x@ikj!w~WC`6dz1&z$ye?AD{3u{B}2T zLr?bzvV|jt$@`%&3Tln>0Bj1mK}tjE661P>?&!j$+a_lE2s`m8rJ|e+j!O?gWXk2_ zzdm!YzWzl~0{6{3_WXki0Y^bl(-eX~4lU;b#MXqJgt6ZvCieCll11o<_;EGAT3+I} zMg_{=kl6Lk8_+#neZ?W1HE>_{G90@G>U*S8903?!0k%g%fp<(iFD8Bk(Xx9qsG-`u<-APr zKaM$NGzm?GPnP&}=0j(N2I!LUYrg;N4NGvtTCo2_p9{=izbOB^oB6*Zkbu6Eq1iu8 zFx1VRFjbMia*WKa+`E<}4LHrkA`-_Wuu9F~mM3I+82q0;Wl?%Toe=7F}kGNm*}8L6|KzVy8B zY`yM0cf0OpmnNcqKO8Ooia9`aJ{#=RW{ZkMxUuh#iRAi&!*XlPVaUPPBC^ zz)jO}2jC?)5UOQ1Bzkc#?^Oi@@3kdNpR4yEe&Z9-E!+5v4xa0*&&i`2J9`Mp^&yPo zDKqcs2<)sXP)yb|spIsjGGI%fCq_ zA&LOgYV7tJc`h>1m2X~51alkHl4bN|JS7(sR#Bqa!KaPs zjlE3yTYL$v)VNOM?I2!t3}O5d)H>hvtnA;rBn>Yf(&3^AktZxqZJrb;4rL2#%p)^W zK46b2o{I-Y%s`3~FCXB<<~2!!+lE=SB}D?{BlU*!YD~kNDZB&cq7cI8hp#hE3*Lkq<<>Rnv#(+^iON4*Yv;%=vkTq%xVUjY$^E{~4V zQbS5U^AAk}@NLI3q81b_<-F?B6is(ObC=ylPOQ91xq@T654Q^h?f=%ia| z8yPXsc-;*O{BpZc&a>H0G;cvhKMR+vHAGo$#+|amg`AZmMH)`idX~$ZY?e4O+&sa> z*91}j4#GLLrp`?X2Yje5$|7Lu2zpcCu7CadTkd1@K4?^)rrl!SsH*-VkqX^wM zD5*vmdT}wTgB1>B88Gpk-U40GAjdgJwDD2vqrwdb=01R5bS5a>>9;!}mp%xWkf9Fb z9R}r{F2YMK3%CzLsnlKQ9-V=g>Rv|>$}FNCkKdc}(&E4*tD6XE^s;t9B*rK+lbm8_ z#0a_tFl=?+IY)*yp00rfGyCQr%cjWPJjo9f&!O8rl-|+07eN?hU3leOapw)bhUV6^ zqFu3{onw>?CN*N!rw}$B5h;I(?f{Rumfydxn6DE(_wI{ z&U1LzN^5=H@^W2XXt0$J<>H0&3YN48e+;L^Cyt7Y?oGQz#xq#2H(3s9noAwknN7-T zfNoCKh5P1@6FG+n%xFwlH^#lMN>3XrB6W2oZ+{oN@k!{6J?d!H zxdE1J_oO;{vQel}k-4ERKElCoZ;js~BzitunJmVoG)hO>)q6qyB9mM_ZwM6VZ#@Nb zK?Zj*F0pt@m}^Z>BV>Y9bn+;}a6(1dKE~oQh0kG(boIAELFcL+pYNwsftis9IvpDz zu0Fr6PSF`i*txMdAO87JnX*$r+^xO55UIYnsY~7ckn-rFB5B*7k<+Q+(U|hsQBho~ zws={dvZF5PalO32X)QggNZpMt=~2DB@Srh(U4~*afpDXzY^QX)8$*nEQ+5y3R$lx} z0izwD9{MY(2waRoar911107 zf${ZSqmzY~2Udf}u@#Q=cX7$K=xMO-^MCLR`TajmZ+@)Or+%^!vj1-eqUdC7_aC)T zwu+@9rZSq349yR02hL0!vq^!@8c_mMf|5*5BELE~WPycQr{WBuMgOv)GjZDqgqO3> zXRe;(5cV!o*LlRkUgXEimdLb=%e0j)C_Rk~@V)1h`}W%XncejDvsTvyKq~-86rz?S zZp2a{Q^o+K5#pq%t6dmX$fk&Rt@@RLm_KT_rIcwx^#FptTQ z;$0h?>dWZ&jSg{1-}M^v9|AO*!{-)S{50Hqz{xTTVAboB-c$FQUJLVfOJnLEhy8C^ z(cf}ho3({|D7&e(5LHMisj09>-fK-33^htE)gHy?Y$g5fNV%#WR&5}YZN;7)h8CM4 z%C#UVzx=UkuYt@qWuN67vy!i=tii&J{pF-7K)%Py;|7J-9@#|eg=@rGyHkKdI9;ao zG#wavbmjD}_(jRycckJf+%&ESk*Du>{R1L@hevE%Ej?m}c#(=~cZD(|-yQ<`$Iomb zvd#Z+MPq>ulvIu6^1z`!A)#qrs-#*SGK94b31wKc$P#`FVoS`|)-amo)hR)}=vkI_3R8ert1N1>^0M>P6VUVjb`a|5MF-hLGB zF9rMN(?kpOD*61Bjq59&dO_FOqnIQc*%R&pdda9l9)NRBW(NF;ngJK2=9nTmvMWgF zKHQ$h*YmjWJN!a>szRlP`{OTsT&SC$C=U*Xcs+qRa6W+y*<<9KVc=54?f4-fjpMm+ zxe~_)fcI7unN!3${=MT9qxSdPyPfN1G*3_$&+lL&aEA|$1g`;qdykgyfDm6GRPUH? z=2V7Rc`8DO)P~Tr2FxuGhg?8_iOyF=J^d4}2c4r1s1Cl4p5|)xqdEsxXXGK@H{31c zbt(0;!WOHX0Z`W`iG3yVdFM;_kncfj>;)3PvP$fN95QwwzH84)iG|$*9Dq{q87@W0 zNy!$QXhjjo2w&VmFWu;J-~Wd*$Fjq^R?pA;W#CWTAo+JOZQq825X3vG$bZty9ZmCGz+lpOuCR=gzBiHBxckb&~7Wk_Lv59Vv6zIGBqR@ zp@fjcX{ef>g6T(nSRp>1DIx?NKUzuyYM5pjCph|97!v67tDHv11j`SXpgb$D#$Tdu zY+#Pq5A|~7G%;wp8*-csu)B$rzJm$catCwH$6P7QO%7!H{`kKTa7E=^+{1vQDUyeVBB11F*OX){b6EoM=JBAO` zP^z7N(4y{wFm-7i-us$oZI{!hTfRiy#P0uLc|CYSd%^B^67d=COrDOr$=O%Jjr(37 zfyyarT1WauN_L__(Otz7QFoyj-bejs-74Q6)f4>l4}kko&whsSewpbzIJnaq{qMR} z*6v?;nxCoWf7GtYapN+-`Qbt;0ZfhL;aIm=ne+&>xWf1MP{gI^;!({^Io6D_@VB{1 zdNM$9`$8F3`oh#ks%e$Kzm=VxS0^tQ8Gm67y}K+IP{r(Q%KEsYZiLrjoL zV0fy1Dd2JQTtt_567lupov@eHsN`B&Np19e@_N6%S?4|-ovqykm@=RYK_hu#AQu=a zhLPe&i63n8L#L^|GnVP=@~eb`pWG?m$ne92%HC`_ACi}796dSuGX zcc&;v6r0e!t3n=Dd}#jnKMx671LYYw9Q#F$+->Otr(j{K+l@^{cus&_FVOTb|<609i3awK&5O)^ZNPAIa`Q1TAHyY3lEDctT`tO|0q zoDKQS^#(iov@2_ySaoh|;d6{uaI08!%fweGZU`}S=%p}@AQWcBt;Z(V`5Qlg=x{xp z5UAmbF<^b1unxB8R$W8(^>jA;8u-3TD8L1rgAn(z0QZg=hmbD|5Uq-_mk>7mNUHY^ z7(e9I?1}LA_`c91;=qFe0oGf{*r1+3Rqi4=XNVk^$fGx*+i!ghU3vOiao<4ru3)JI z(s}^@Mb?D0eqbVUw$%_Z$hQqTbA4URe&Ii1_>oBK1w-^=w@l#lIKyZom*$g1?4oI> z52W#7wq7ToDHgwqKKlTSKA0t0fwf4Af-cXrE=uLtALqx}8*)mugN~|pQ?Y7-g|;3S zNB##7g7xnhocE8jEjHf&CqByQJ2;vDulV?b(RI^OM*g}rGSOjhw@xV`HqW#Hr(@J! zFonWhZUoeB{HR6_C&*C5txl0?|wsdER&on`yK8unY^M z*AwTW<@hMDFc^Oa?(Btu^BW}T#j)TbZB9qKR}Zm8%v8v!8Ov0tj@5*WM-G4YosUTlFHN&V>u|8^W`eTe!FGw`WQTEA;J*KdN2{Q6m1Q4fRaFkRZvRAUtH85d#xZ zDB`>nRix9#T8+((l^k|<^5~oxs!NFL zM>Mm58tcvZhd2$v^&hio@ zmq&Mvth=Weo?rAGr}HH@Mqx2TQ)C#jYU^GO^QSUI?zg{7QOk1#@HpdsJWTj`eY_ut zROd~+&o}IrJisNpez5P-y&~ienH}$~h)X92$T?S%2jIjE<#ir0Bd!i3;Rusxp;4}R zq9S0di(WTlN~-6zdNEMlzC!iUQ|aB4e-K#>m1B6isrMjX zw1?7Ispkxk{%--Rf+#TZ1dye6$`Go-l5NKL;R@Sj}Yd?Yw0(oD_rIE$zjntaF%WHv~(kwmWMJ zn=(>)luJ8{lC=rVWu=|56=Ic(E7)AUjh33ad9I5sa<-DTdDKGeUQWi6tL@kAHe_gK zg*Q`J87)Fdu=7<$MBZeKbbYcplvv8h9FHCnGr(M31ARIb>ih!iE1WC&h5duzUSji| zOOp#DmNLVQj5badT`Zz9n8$#Ne@7Zq6x+3Q_rScA(o>;+I1w;!J8%zYl71i>k9q_= zWOF*dKFxylFafD>CWsQ{r^5irv+HamOIr&I4Hdw3!zah9I7X5(IM6 zUWAX9nzt+V)A?lPEmWGS=30Zb^G_toIXv5?B107|qIx{qu$DIs$C;AXF-d!g5?APq ziU>{t4l|<%$O|qWO@tc8Lo1G*LP$Zs`#G7t?>m^d_?T~g31xs!fOhgbe9;%NMk zW{m$f5e3U~ig+|(ZIM_qKG4t;-e~=3k6G|8y|2{2N`V6&CE`aUQ<@~V1aofLa>qec z1pB1|*2FJByec!43)tuXBl+6={w{QwFzc*p$=saKpiwESKFWv^@g~9)pq+mxcO{nC zJCgrC2!#6*w0nuoH@d`UgB>^~Juo47m4qo+>fOVefI%~9h07Mz^TW-Dip>LwEr5xg zUqdwOOT0TEf1JPituRYM+ZPN%lnWq$hBfE{_bsC(MIC4!y7|+&fy$}=dIy7rUtJj_ zyXAyHXA3fSHJ#3s%w?GxD@ijRd7SDrv%;jQe`=qg19l`$;4K{yq`t$_a`TCz7*`-3 zOGi{@yH@JrqtIzm&R;9Ela;$iD0f>|5uU>t7DBF#k65)-WW#zO^VgD`DFb;}^m4QB`0(v6ya+#ISIIZ(n*ZFuiQ9LT(9@k&vx^^}mJ3QrjVV9FSB%TXC&`#u8vcK_oSBsfWc0gfGn+MhOj9THTj!wW(!0jd` zt!Kw-4+y4^Pz_twLV^jPYUS#+p!Ay2umVl8gF{-6gA7T&2^`QvrmHAo6fE|nKC6<~ zIAinmE7^iZYUE9#9q-|@DC$O(Mfu9aHX|$33n5PM-B#ODm)gNK&<8o8b{F7ZF0)ir zbfBX2#^I@)^I}hmyVWeMdiyo3SSDlg#Pw#W4L|$Ovy*7pqmGO0hkcS197lkiWp1#3 z3|nmx*Fs0CSc|z9K%=x9M>Wpp@C(S0>6OCSPQrlPBW#l9=*bw`#IFW)?8$8qQx9S6 zfp3sh4`}R>MTVH&q^|lI4>$%ZFVBWl7p1qNC&LM6jRDWpD_Pg(xPHO0WHFG#Ok+uui16J1R}lwL{qcC#)1OTDI< zaa!1UR3A>4S<9%Qo^jgRdAvA~8nu$1rI>MA+Ih@4kh-m7)>h0qzkVvojAX1{wN z&f3o7#(~uHW=0LwjMJRT%hf>;vgW?CFxX<@gZ`EkcrBBOU&X~=!nHcs1V`R1;^%^| zJ|JPi5Qxw&{y9Yx9zNWD(dZ`>tAZBLkHII8tjilFG@ISK~sPdb9*T_qq5WW4& zd%KI@ey~<$g|w@G+s3s;*j5%5&+=}O4s+GWWRN4z7VlZoW*1M-@Vl{dM^T|}^u0h~ zdx8I17f;iQ_LKam94?J$%lrBN}MGuiFKZ( zSlT>qflou%yb@^`M6nN~T;aWeNK&oiYJ>>yEOM*ZdCcGm~g4trOG&aa#d#GWuBvkiGr^hIs(8f@T&NMN>99%M=c ze$3yR*W1|Rin^gDjL^B_A_pWhE<_)Vb7li`S&%3Mw5Ory1exwXO5bFv}Q zL|C7!)kcY;^W0!rW|zH=wRp~c(4lIBI{0acEK!irW?spS+LZm|W96hmU7bP2+QY-a zroS#VI)d187ri2062C)jDzil$uOT4E8LN@O5OcX=yjzlbJn>z9XEuuY=UrQ7_oRkFuxKTq|M#=~Yr%1+mWJMW` zyN1u26OlnGf$7#p8RDdhxRlG0?yf>BGKQ6|=uo<=c)us6_O3Yu85{O4IwX`zINNyh zz`X1R*bq?GdCkB&;7_I__*~m?)Yd_}q0Ri|*1U zQD}-Z!ECNd5{swbp?b|i7V_!cTCLf8x(Mcs{ncHTIuYB&-(Ojy!ZY~pH$Dz1*B`)d zBTsoc1>E<8ltCMJpaY&~ZZ2R}^MIG7vEV#uYJ_c9=3>Bgya_}@?1Ximb0Y|}bFT>T zs$mdrRQu~4YxnNVUx-aQPTZKj{xu_D`L6xz9kRzrUhpbA?b3 zN7_7tcVx1yt4Aw2hdFFZbxeqd9#@UJ+EtBHut(YWQ87I~DnsrdVs5=`bE!8pVo zUhvbv^qCL|U}1=V^dBG+V8B$!G{8hKKu?7d7(v}UH1DmL?xTRX2-EJ|No3vQQh;v2 zLC1lB7Qgrp1ol#EZ|StE!D(>%h66)AsQo&}(flKqc74B}L-SoS^9TTxg1(>NDmoEw z_q1^>BKQ!BqqWsvfiGU;v(-Qs?s8y@_Ptx0nw~&)K0@32+{5Gmmsp@)aV;y{~2tu1zR!`e`MA3pFRDA|1Q}4xAqpT@Gn&zDeIJtzAfJiSxk{ksX$@RPqQ2q zX$YXGA{r_gY{L7{Dp7dTxk;z+WDa*(u^8tK@U4&{CtHgGUJv%RI~#j;X6@_!_8IpV z?j4B`a5Nkb20#XY4!i~F1evQuSrd#0&^H&h*nmS-RM%AR-{Ni*@98m(EE0j|Amw2n zkT;tm1{-bC(V#a|!y@igf=Qyn&U-O6&Tx?Q7@qQ}Vi$)LO71w9-n~|Ef63V(Nf7`S zuw@4#ttO6a881Q`;Uv!D!rFH6ED33(Qj@jmQN!vYYuu?JTjdfp_Q;wewa9RW!5yS^ z+T#rXoyNo!FUs3ZVXNDB&VeUEDuFd=$IP*xoWJLbcfagTQxv)RS7%T*Xd_!O7$!bpc0P$y<1+wiKX+0$24@g>$C#MU0Hs+fz6)`>Xn%5dTA9!1s} zC8isXHjD2)v5#VJJ?r{UAQ?A|5zPFycp11EfTz?PBMFFacyb)qN_eR^Ma>3_0 z$$1hD+$6k8hVJ0eGwn163K@*DP#9yy-LyoE9qxjHWGBmHV0aGsp>mV&*uiBd%0|u6 zonO3wd;Dl6+$2W4yu{_WsCjg0g8w3NYWbVqA*1UKuNrw_@0Rtk-dCl)r{KM}M4jGc zYmj`%1N%hfioKNgy{89Z21oFs2AU3^q9XW^8>0IV4@T-rzC=eIzf|YFW=EagdxHBW z-I=1J?$I)+?AIC`+ai3?1pTX!zOU7Q`yw=?{-WIXWmq|M0@fL7M9@h97eZ(!qKF@p zF2e~k53vaOTgD3?6T}plXB8>JY-lJHJ=)wRRLpg#I=#oA$ydM-&?GM_B+akgEy+A( zhv7GLSLUP#Sb~yRmbF*Ummmn+PwfHZu1c0C+>}?&s1hC@#$fQxzf3p65!jLMd?O>& z>5g9j8yPHkQMQpzHyA(jWw&(%EQ`m5r92&!4=({acwZ;3mMFYpco-dFydCCxvtj`6 zOofGwBQ?{cX8p_|8Ha8P`PZHnRnLxm3!A(PS6BVjNM1N-J`wQvw60t@nb{(brd zhB*biAF->8+mQs6z}mf(0|Q6pS+wb+AYq^cmd~swnyytiD@%Xc3St!4HB9vhe!^p5 zzOFq+ylb7s2K$I`y)2X|-;S-~S}N*lHHteRU2d}$ba^b#B4~Im$0-wZkjmaa8Yx1Q zqqU*g*>K7QXr-D#ucCjjOEglH;Hl?N9XXdEmTO=uvvuX>mAi>%uIs+kOg%#dHa+$% zkrSN(KDTBAciO8qJxluL=ITU_9M0k)OBChUqVY}-k7a-LXv=+@7IgUGk7@kr_2iSh zzM6Tjop*Hga-8{i{aO>VWc8UF`-~;MMrIG0sK@=5!1Thr(Jx@Pz*N-nb&aEZJHB&z z7Qa?CJrz^m1tJ{MR5*}sVY8Dt)coxxIt=`#TyQ4Nz?t+a0`667SW&^07;lz2Fi@o; zM?9A7g9B>xCgHPGnmVai@L2pTRU)p&ZoqYNoi>^IY!?oYyixcQ|05O8{ii{z94lrALgu8h=clf~;-+K{=dfPXgRh z9Hu%ev6V`oLl~AiU8LSab;{wulTTp$e;9km@JicdTev$;dZuIBwr$(CZKKn{j5@ZB zj+2gU+qTUPJ2~@RYoBxWdcSqPwXe({`TvZ%t7_D!8d+Ks2`&>Uj}Rok;njB0V!^GT zS!+HJdAvMUf4DJSw`7hC5JKMvBh5Dki_K0QXru&@+EjX8oGtdU@N5quAZt*rDzTsvfR#Vwc(77R8RWFS*`V*8|uCYLQ zruH!#z)e=upyxAyMpa5J*yYQXvlaK03|6hjOJ&Upqo!L~F?P5$nl5->tq76_xkK0? zwW`De9NRQ+_#sVw0J^kmqi|HSdcGJO+l+B|IvRKAq*`eRI(2>UWWsQ{;SX&t>_4sc zgK{EK7t`ZG-jw=k;+@vk*Ur_U)+;xdulf7khbWU7z+ zOV2&?>VQ)qL1JD|_De)logD72^jI1OgA~cA!$thY?wY3KQd{M#?+#$hcWnq@muQf_ zxgXR?pCd>gHb0#wnu+puSZZ}#TYHklbp>_Ps7wNDw;e!P`J(NTgO-FPMnW?cgZE2VMa9Kois2TPQ#+NQE1K{%Q0BvZzhQw>D= zn7ZI%EqbtItJB4f@uaUQ+%4q8EJJGu;;ajZYNuGJ`ndFZ7Mk$z5g1xG1F#7U>CczM z6{A#Sb!g6lDj~hO4Q70vn)#o<7UOxVay(daHY<(S9>7tjO08r$@sU-IggZs&=_SD6 ztjhDXvJk=L6g~ISZgE`~{HhN0w#2}Y?@M37QGj~jERvY0)u*TL~$DX`kEQ(Q2Z*IFN3-&mqP8yaOrqC zzcOApP56u@P^8GoT}H6T{aP6qWAN?tvl^o_s@ji2ab&&00iCWY0(nPQfh_+rcEQ`7 zX0<3Fv35I|R9bczSTT~j^`Ne}l3rK)K;dwjmyU}DM_HUCp{?~fdi?P7A9LM&B7Z#8 z29~c=X(!VF0ru~A1f=s3Wf0rRX9(FiyPaQB?iiLR7kM@CC*Ky<_X7K{pr2F5Fmt`L zOW8fEzArsxjckeVdLxs`eg-^~Z{NrX;in4@z*4SP?q6o2l-@RE1w;-l@K$kscg{J2 zSnrHSq4dLSkWkuk$92gO+)z@L&C9mJ#z$d^DVX#?wz%*M>ydFx?cyJP)BD1qo(orA z^J&9U7-7mFSiYi9+E!4&+bh9elva#xfHAQ2!QzEz3=tyhRV_Cqb(yS(foKqg6pp|g z(MNlPWLNz+p2RchArgMQEm0!nFZD)Eutd@%Izp>z&Z#uX76HRrz_193EN8PL^o-mzW^FUv|hBwv*9Pu87L=+BBa|%;#tDG-V`ezhBD! zvZ*dG2K^ZH`{;E7&+uZ9S*rKa3MD%5Il_q-*%b(IBygLVL(D`X(Wg8Va#_ZB^n7MX zsofJJvty6zyg~fU;vIQ=__u`Sv-5<+l5ITgdH(NJw)1)QB#AfZIBMn zZo2kop8o8sYFOOST-^Fc$4~U_8E&NXB_%F$4Ao(5G1YdsG=sIbdE?7YkV=lgJ*0Fp3fG3w-l)UhW^liz$I{@Nc z(DvJjc`~$_z6NP+dVY~UQCFR{)0Au~S?QQTS_ms>p1bW3Z;$EgH6t5ARy1?t;txPr zo#nacrv$6x<)LsdJm^>Ul_X@j3w27FXkS$CkS*k+;)&&%l~qjMZ}cWWQtIU-n21qv zFXklZ+wfvn@pe$kXn`j1NY(rLi>K@B%6LvUMOMX0<81W4&1tIOh=|upf&?j-PHenR zjdu<*Y!K1a0dGbdmIe|bd(b`deECkAs* z!uXR3q5yq*FEz@~!2EnN)vudSm?O$#^bKvAy8{A+9uM0H*AiNtTI`mBbcc*A8-HHh zrZV1d?$$g$#aw&BepzOU6WJ8~?6J^jvz`5&4<2~Y=_X3aR&ikc?5)s_A|}ziu{)-& z(AZakjgI5KgtjwHK`tiyOR8ihE}?#<&{s|fRyD=d26Ytp=lilLy@8qi+8IJ{zRvK1 z_2@5Z?hYfwPDha~oF891I&1Xcxg(hj1g^RJ%}*O?Vt{{=?=$ibrd{$b8$w)YeRWo9 zRS*=;1LOpn)F>Jy@XJqIX9XNZ5ha1{rxd-95(8C*8o}8Fs1)0VHqHkm6VZUxPCi2M zph5LI3z}%C5;c#bz4a1OGvt!jMjzVD9ZlpSC4=@Dypuc}MaPwJV+Yp#-x&(#Wb_{w z<>Nq(`?Wgb@@(cVo@EuyFJr@)l-kWP@!vs$uY6M{erfE+BWr`cQCB5v=8Vyg;>oeo zErW2z#7-S0xYL$0@3WV0);F+;z~h_t)I>6N}Z?08e z>TVavqm#V~!=zWSO#=~$_>u;#Cpfu23{rmo;;eq`M^vS57Sk4s5iwl)9c$I4UhXQU zPNaf8n3gCs}hAjuKP7EMXC#6(_ zh|!+z3{Z>JH-99J!?oc*;sVM2GpF{py=K@>TZ<(E(10~YeZ1~loY6XgH)cLv zETUfXxZKb(tsDE*xWwSX&5ZnENzE}P!b zg^kWGM5@ztt@VVlhW3CEPk(t3xPz%f%yX-297fB@`(d5zFj|;o))B}Lok^OZm zI_+q`oxa{!z<^oprcrMd7v4}iVz&y7vQoim1o;vNfcN$4P<17j8){^4qNsqkPd(_T zjA*UExNEFEfm>9~8uOL6+$)}SZS{Q)CTcgVNMEN4FWreq2Dn&6F)Y4&a=FS^lAucJ z)kwSR=P(Dm*P=|}8&wejwc(c>_4+6WSxp%y5i5z~RI$04dK5i?)kM+k@N~`jpv&*C z-!5U?)^#XFjs(=C;ay&Nx`?Z~XP=v7#*$JZ4>AO!^pP(nOhtWk7{Y?wz59p>(T?!| zbFy$Vni6fS6F&|OPZ7@(JjIk0OosX1qM@)G>ONyTrJ0l0$4&NK&!xovd?NCdxc+wk z$-gk&7?Yg3wiE-3;qqdsGA?K)k-13Ga${{1t&wd7U5c(I+=o z5)~0&8imX!ao9_==oT=|6HN7~cEJzfRRfJj-DpU%Era5G*fM~l&bq+dhC}PN;GJQCjua2zjG(@Lffn0w(#~q$n_*D{1 z#OWbw^pQ`rsH|Tx1XDPLa4|a0d?S92g6&I5YbUYqr-dMHH#SoX*y+GFNPci0PEliHEL~Z-0|)c0Tj}_6I%qM4(%t|5vj4 z|A;m(SF^u`T9k^6!<-=EJ3GVDpq=n)utc6!ZV0cmilrBOgediAcqBfEgHEZ9`J^+# zp`WVsnX%2c0)l}uLb!+)Rk+QLJH1n%%sK2GWz`nqJIw%OUUwU|V7cwjm;P2l%=x^1!HV>j+NHk~mZpRqsNpkF-z;4seC)}4OK zWcB^f|7m@RX}5H~+R+lXss=Ss-$Z+f{9;;_Tzz%Tr(f5+K@~2FpZK_$M$FfR&tQsX(jNyjBMoGNS*IrVMGWOQQp&r5`JR95W zSPXL>xKi7)hOvEGqX8WHpdhH8$kX{cTRlyuU9je{ItZ@vgZRDLR%!nfb0|1kr@3

    *Z-`4<9gG2@~&#sgfj$1rCPs+b&mJZjU_Xp~5^IEQ3W|Et$DGpK)(lY!_E5&WQmU zc$xYDZgEV@XRR=TfKL9GL$tbifl8;ZJC;KadoOiMYxV&g!&p)dn#%XEaeA-D-32XPn>3-Si zn+0lO({JbZrwa}Cd5b3Ye^essx>Eb6IiU+Cu^tmGP-5hsS9o0K0%Fu znDKuWg@d0)e*D?QRqdQ64{6h&_y%_H$)soTjk;VUfc20sfGnT5bPz5Lyi_Re&jM3* z8e`RP1@7%EI-?l7R5lPNwNS^kPQ&EX;9nDmeYK7aJScGlfD#AOe@Gmn4sOP_e+P3? z0!Xkb2+A8@I!MV9;R4}C#e~+Jso?ZuK_a?Bb)~WU&pE<4(y6hWRuMrtQW7&*roN88 zzeTvRH|X=A24_Z0A5EvYx;E|vGIF{){h%46gaH?+(L#M2n5yf0eSZAB+kF56{IdP5 zDB`h#!zV_9fjvs}d`(jrv?cZwtYvHwQk&Icj;ZN%*`(oL`>Ft7bf6W^)R~VCur1F) ztA19?akN!!Rxjb(*g$NP$s#59(fo!4i4C{+VaR^jWtJX8-N{}Q6lzIfVad_ zad~ayPW$W@x+zqb3YU{fd<*WC=azwd2+_;IY7b%w$Dmoh0Ealcm9z z(n#ZCc{~27)cB%3*NR~h!o29l`e7k3*)ZMlhzca0=snCLrMkZ!>`c1_J^L0k zd-i)mC1|Wfu|DQ?GN2)|8j28cUg6keCfF4j#(cl=03Uq&?xE}=p)D+!+bH;L_xU_iWTaDtmjm~AC&w^=@M z31%Y38-!38MR{>(3?i@iad`|OJuGlof-puoSUe|S09ph3*OL3o_5IM2%q%}^>L))PL9%Rf0=d@*SS z0$bm}G{w~hAjf|yDx!~VknUeSY4T3cl$I*i5+;j5%76;4bqh%~xwZQx%-s;feUxBE6yqg zh{~!n?8d2X>-&1M`vdS@V%T_}_0!p#P|WSKdQB+ty(4*fN{=l*zOod)f)yv}))gFjc#u$5i9ZY1A~qk>~S z6L2u}W78XJ_QBy>dWK10li#GOB1HpjQl}ZC$Kb*=xj*VLnP-eK&8O7hAM_{9Tyy~P z0(O&S6q(`<-mDOMvQkR}jO3He;B$yMU>(5w7aG%_(Dl*NDj~Ea8MJ}&&r%&1xXF`n0mS6E2z@EQ`yMwkT+Sq zoNgU<)dlIG{UXQ;>bLU92fXXYQOfMd!*uiY;@nzUJzx$#iOkIQ>d8NT-b;o5^d&Ju zl1*)tfDq5lnUfn06=lfoCv`5R9juAzul=9Rp)#(K;=kpo|NO0cK>u}QXn42YLlrqVcEA~>?0~9Fl zVF={btmH{ghQ7Z+hq8)g=)NGQyIziW9A&pWu4fr^b$xuQ2{QxNU^EdF5u$%A*dHKx z8JVj9=6?ovarVkJ66(tbh5~BQ#%kM`QvsV@FiO9HLQ|82MRz*A=fOZ?YC9qzy~ zdGA6ag%S?a0;O{)mz^`62ht!ptjE1||c{t#ik z#vY6LmBgGDL47S3UV%FlZD{YBTL17pzChGszd$Z(F{#WcS40Hq^&CH9Crl4C>d4GG zuOIPv#aZ9IeoWJmulUqH{jGbz()$*q1A_APzw9nr{BLIIUzkr>2YZXZBRPKRFC-6j z^)9*VBeAO+dXe~^-wxgcY8d4 z$1r!M(HH5(IWBuS9CbXVJ$ABmeY`!vZqwrz8}-VQgvaZSI=5IEGDV>#?<+Gou65&q zj{#mH_PBPagVgJB0O4%kR2WUIqbc;=JTizqCo5>C_}MbM8d=?GYY!f4E5oK`4U=N3v`xza~ylfDtH1mWk{XzI9pO6_fcag3}fTT;%0CZWG{eAr>d zK2%$DTUTrh!(GAkqRP)IQkgZyaJ~E64%r}8LUR=er%e9?dQ{oW-OSm=>~BPUlG=>? zKOWuLWcuuMxraY*Q?t^o2PYDt2A|WN~S&Kxn~V5h6}wDWv-a-?G~vE*)(ILJ+%cWVVj0! z*u8*X8kn0rT4)xy9e+Rj#NT3h(_{BO^#A;UF*SeQKj$>G`W*8$hAnw=z%|paai~zo zGs#PsyV};zrQoM;Yt0l6uZ>Gdx}UnT<}NFBFHPHR>@cQgwzRBZM9Fdlj7b~G97Ggb zv)+=~8?=*ZLcfYBYGaq{ek9&AaC=78JL7n)AxNakn=pCwC@FO1zC;DpWnjU6@WFoR z*?VaD=fXzaa{MV6ltOG|#8N-wqHHQAs}mlF?0P>P3RR(;DPxXcwDtq9hWMX3FJ0hD z_GI}NQ4bC$DD1iW0C-04pQ`?Dwsj)+-6S=LbDc7*1Vjqqw!sx~&tH?%K7hj*+G0%7 zdt!Bhcz%|MTQ)yttSdqW&2%B}D+c&C8#$0JweoJ+(v>d2HZWYj4%nM@5;KE*e_2IQ zYAN|cZ=K*iZDz+e2y1;wNQE<%i~0q&lDy3Nd>jqNB(KYz?V7mhi*f@4pcilp>v)Y> z2AHoo*Z1NsY+f)9aiuG1OqZHA4KwTM%-Rz7T-SXd=;s8z(wzUH5B)Fn|9h$@e#RbI2o?D8y?4|dtsi}$OtW${(7j=x z9J>JBMmmhmx|zUmRbJ11;ymysi*+!=xLvW@4?CyZ(_GU=B!fKO(flyy*H-4;)Z5#i z2b=(?%QZ(wk6nT{b`;W;(($QV^USp^qts!3u45Gp*&+pcJTD- zSKbJS1Gd#R-Jg$}lUqNAeTW}L8Ip|<>F^M=Oelv9*N$pE^Y#S?oeL4F8rR8#7I3J^ z0#m;?ST10Y#~5|4Ztjs>KJzZ$kax4CFf)@}BFM2wG_uj+wjV?^vMi{BO8-)YZ)_oJ zr>7C{lm2L5)f8+cFT~yrXvVnLdaN>?6nnj(J)EX`;MDPS`vZe$N)L) zg8Yi5;26l& zYa3nsMbZ*SbWUqP+m8Ti;wA7LGkHQlo@s%|{3bxFpG=oswsk7_IWZLryK>do?1u_L zvEUWpxXh}T`;%5-qTC$%v$}Zeb?Rt)GNDFzF2dx(sX**nD4*Vj(ULPPYseWnz0ss< zdj45+luP~9^fA%n-_BgP%jRFxfFMW-f*{BL41)iT5lM>I`HMLZnL0ciWSa;n)`ctl z4%P_HM&>6-jUgmfCXOT)aN8-xQAsi+ZbO0W@J1-OTO`E$Y3hx#zXC_tv`m_Gbo~7M zrs>zv^Yhsm$0wcT+1fBSG<);TE@)TU?PRznwJFt}S;Mn>BkSgZ9eR<=ytB3$6JWiS z8zwVP&v!%0dOd@7O!zk8&K$h=gUZe zATy>9{{jy_f{0L^_-mNV^CZ_>0Sw6SNbsJRqHGjaVCPv|@|!l*)tuh>(@qWEV%sbGRASDD!bnt;*;-Ms{I7p+2@s zQYazNDq@D;xTUr?$oj{PF`jS6H3=c4>v37p8XRg8tJH8HAjk)b-D#^E^sODymLzjh z1x134?z%RNYNou&sr04U?>nts#kjRGk_Js&er?B?__{_rpC% zvWu0Tg3%9BB;2ViyO1jbfOnZ-R()i0}ZaDri6UFH{h6*DeY!Hxccfgd! z1I%c(`&e9h9cB4kj=#M)H;20e(Nj5K5FTc^+H_H=EMwNq7d~EG2LwL$MTlgu6mN=pPnciI({WT1y)o7K> zNR_n0J_XC0myB(r)LLPzPDevV;aAQgXe*Vp40y>}n5a322OKKtv1nb9wfHkJIYV_5 zzfwc}mKO_dE3m>&UrWtBJreumk`n=_tGgEz!@r}Z-??QPR#Rh*DvJ;*RJ}JL41@&M zex!524{rTMc(a{EMQv_$QhP+`HsPHVXkwskI39+Ww3(1>K~HY?A^Krl5aQkc6s3ws zsCS>q5|2|-b$&H8poGoMSgi3K-pmbket-$k&ny`(gxx&k-3dfp>m<77bqg@GCdixO zk8h%vV&Egml_380rTI8pZqVCxE;x?T%oSm@D&|n{OVn(kQf%=dDZJc7ftD!DZ4;tA zJRV{D-k#UhZK2-A4!HtmUlndzlp=cJ8(fw550h-E9@BY+$Gn03TnUT>hs34_tZ>*7 zf_X#!ji_UqmO+D~If@InQSR5UPisPn7sNgM4fW}w(fg9NU*R&qTK6+l zQW4smzj6MR%AW9&kBtV!4;v_cSpNs{2WJ0FkZvj4UCQLW7a}m~X|AMXqOwu%_if1<*P_0nZh7}X(9_8)vkeV_w)K<~$oTkIfsqulQ#)9BQmh$kX7Uu0452r5Uq8!yzCq<(&ju zD?VtE|D_Hz1+_jwJO}5Zx}Qsr%Qz>g+s}}I<&1POO;aVI))&Qw)B5v8x%xI#ag*Xx zD3M{S9}MZw`>2!Fr$L-{hucK|0Xv?B}^&IgNC48zc%cx=xPEZ1r#w*UBAJI zbrVn$A%y|m7Qlsfo!qI$VrSDEWdim8;xPpB!}ZY%!f(zA%zRa(waSQa-8VD5wjNJ> z9L&FcJinuK)8bMe4kiAD>>0-Lov$MH`pv|;hKkY$Zou>S zCL@;nr4Sndhbvaw4>AW%S7yb>^mVPH&e+!{MjZR_fIJU7JG1BTi&cIEzMLLirx62Z zPsC-^szs15>y=ieh?pT=gk2jJ+!Xv^^Z~!=ba30rb?k9ak#&+ee&~Gc4@n(^d76I6CVis7ww1qkPFI{>iR^vA_8SyCAAo<<-1XMav@+L zsTYn-W*wHpt0dk;KWlh;bE_kg$NE5smZ}CQe4yoUX7pz%q;v@y`~~T)dV()rXn!QE z5|nLvMHxi4Zg{=Cm-_kEIh-Ih5cZUW+@|c(n!{ws;V@?zhZOlA24;fiDw%%dlr?$2 z-#-Dh-Jiz42y^k(Ql+oMV~AmjHVMw*^M4Li@?L_%oSg0wB$3&eTb1C6q$Md=|J~CW zpCBYq$>lY?NvzQ6McA#CNit^o;dBdjf?79-V5*N6`Ks7i2y;c;i@-#z9i%~8%oK(ZjE=1y^RrW8Mr zbO`1Rc~jDj3`e;MPV`m4eyp|OAN@@K5$4^BUozRxcan6*t1`sqhK%$3OrT z=op)N1m1mN=vtQFa#Y}rq#H`na59smdjK!qoT-~P_*`?`+{fRnY>|02^`bz9HY;dV zis=9UvHqv_t4PB}1GHbeL1G1zg$6?9JJLERGzq%u& z6&4m&2Ar!eQF_G>I!OOQaLfQA6CI&$G9G4UIMtN%(P0{x*8arWlH3V0gV7pP*XBaP zKK1F7ct0#?IB(tCq#qP;FS^c@dp`ps>7TU+IheDhEM>6Tyn7biAJOmI?X4QEq!dlO zR)29-Do%ev))pH-wYHr^nN=My^Q9rG&nGxm7s_FIoa5R+!Kt_zev%iK;PU3Tux_Mi zZdsgRHc98w8}o^r!d=}2o2YH)<5t?kB)Bv0pYUCr`+oIP=d$}pZKZ{6-}u;*kZ`VW z$f)w^;-IiARV%U-nL4R%l!H~W8|GvuDs?c;PRRENE~&J_xCxuzokcRue(e=MpBmkB zLtM1^anQFoykfY8+>Km&*Ft$HG-T}4lJ(+n2<}GZvUH>j18SOq_jS6*}^u#48g$T_dB`Aq_j!jZ&A@b>aE?1EUJXqf|30`K+!Q3 z)x_*wHgdAKu+q#Oy`iJ7v#y-)Cn{iwy&C#sBduuL^4<8 zf=<&Yk$Te)0yW;D)`*-MdZW@yLGLMcv?eEsUdN8$(>{fPjXhIOc?q;fG4Ty~HM|^Z zVu|EZ{sFb|h#xi&%+~ai}x>=(qT?hALumWcy6ZpwcHqCs+~*GPSKT zYIL!m+1y&Zwfb}!qT0=5KNe{2zHwY0GsRn9t+bNp`DjQtJ6H4tR0l5YHFrN(T6}3I z#%SoBYtA&t5 z665{E`y3ehO%VU?8hSs|epSyHjOLx}$O1s=y-*3jArAYXRTSEn$PmVD*W_lgYWW*3|he!Gx8`#M&~J1#VSgzmd5u zLn1ueg5|aK#9ULRMb3PYI>_#4P8oKEVKJ=qsCVly{YOsT8J7^I;ctlThrIYhe3*va z0e^I*FgRaRmIuagD?Ynl*Rt4QS0U4}^4MJCeKjX=id`1LCH{(1jl>hS6wVPw($8+3 zx-8|Ja!e78JP_pbqf#2zgWMwE0|4e3!ztvO<2gp~^tYS3?O;dKi=Z{$S&&AF;@`ey z|6a|iYzLYwAo>?Gcnm!x1}Y;eQi&Z$*7`HP^c2IgTg#@$YYKyHHJZ^$M0c3BApg-o zM!6xofqp6gxQu-+9$@ufPFoonzvE?T`)j5Exs8S2=2(;2=f~@KgGfVPN4csQ_2(8f zBR+}^CkAG+P2R)j*POQ;WdlO+V$Lb}&q8#i(wMXAUdCAAj*jtmQcZY+XQhN#dJT7& zH1%c%*f~3ck;&^)iigHy$jmgmP(P$|n2VuD(zNieZzkk&l$>|@ypu2brGKuE)9YvX zQ7Q8AVc42g`-Nw<;bF=X)?lYT5=_fIC|{8{xTE}IfAB0kKue2?OyukAHS9rXOh@7D zb6~As07W)Z9Ns)xTnftlkvY7s9mfV+L*WzcRC!gE?ieF^f_M9uZnudghGQbgXy0|{ zjLc@MV7SNx#9_0ZVSR$rYN=pC%5zjoBJ7{h z8!Ub!H$g+LDA7gH&{|m4Fco)tS2rf({CZeDNSUTj_=vKy!2()5%fYT)LcG(hGc>$1 znt3X6`PZ7w$&m{4CkPs4pt=qB-$DZ$6!hO<-FlfePaOcQM5C$P_naiV?_*e$Rt zISKjI?FNYng0h5d zEwKYUWR<+fa5tLz&Ce9sFANCjp_nM zjQ*l{J=b|i!MjH*UCFUS?F$s<`A7|3AHidWo{NQX_kpQ-v&T+U(DoX^!ndUHwQW3- zqHkG7?PA?~QpBds1XA(yUr$*V!-Bpr8$tt2oTlMpJl`}nGS)6SSPmsx(#NI`EFYao}#hGP02|{}Oy`Ib6s8?b6=lsX&Ie;g5SVTdLUMiKd zBSn*?N68L7RWPR4an|~AX5)%nmW219zWK;HdlltsQ*q z?#ox{YzJ=4N8b#}3~VQqO?YMvYYqk!8r2llh-!s@{lp5bW5Q{m&ua{t+mQadPb}hK z``4k6f2>ygm$OijnjJ`xi0B_33`~YWqawbt{?6VGfr@4)24PTavLnOu=X}Oono-U= zoq5~gHfU1Q?d`9H3a-ndJm%iu@l<*DtKaT98$R|f{~Xfbc;ZXojtN)5)TRr9wIbTU zT54i6`oLY%EGu5e3YFnB2Fd}SqP3ep#@=@ zW4_4f&eWa(Sa#}ZJO>%^S}`Ek0C#birm1i;5*Ewb6ZUYxrnHZX0h+G3c`|~4<892O zo7G%wd1j2QxN#4U_b(!f03Kg1Zr=XWmbHCJ7}2ao{v`Gf|`f;N$skj3f=j z8~I-y=Pan>{EyIP{;zEzRUOcd2IBkI4qF}d3K;zW;YzW_gSv=7VVj^unRHcTmEItS zPMjY!`T5Yjc-_(66wN`qW$MabV9?VKma&&N4fwqz%-1O&c)_6%0WW!gWe z%@o9?xz&^t>`Sjd0U_HncbiD0)JhYM9dmbTAWrEq*p8_bZHzgyZbWa%5i7YYblpkb z)tL?VawWR_F>AxtfEH0wj+O4R0WT>zyPpRH?B4nE-iHXE*AV1CSj2et<~)E} z5UPDZ*DYEQ4cW1a^&2PL$I9?<9?pM)Lq?6|qJJo&5RBxgVdje&!B+Rz(o9apC$ni3gSJ zyMe0WTZKO(M~zeQ5Mx+77@WL=gi{j>z$Q}bnMf0)xKv^ZhfaM0%zYAg?c?sF?-@7J z-lQ>Tb54zEyWa5feBWu7If75HU%yLY%rNk8+Hs?}g6kw2;jr+h&$FZs;BL!Mjic}< zoQZ6i;%_W{hxsfn@R*n1S-am?$`|^nSmPb9&dia6(w*%SESX8pdk|&LcNj>-olm5Z zVjrEMWLsrKp){luu}7R8tJbXY>y;+Vv<6lmkAKrK2||=Cegh$R;(vl*mj8ob2jssH zETffcBWBxiACa^gqOV^005w3VFD#rvSxDZQ)eE~}zEautrS_eifVI}z_Zn1}{cuU9 zV{a@H{2PFqLzTgtKmbk@1sl87loB2)tH+|Y2c6wFjuNth)fwOx*-dMq!0tR*251j7 zhUhdlAHl{L)wYi5J$4gn>^N^Z@fBR1o4p!;7qm7XJM-^_?&Zdm9XXy39Jt#ZY|J& znltrsUHvsxkIY)@_E>CNY!~V~xb4#R8ARId>A`Nb)~m+lbDK!^Sy!5-QW{xMO^q!! z(tNDqfAkV4$1-iRku@bYs+E3P?{zc`tbLvi?+qLErxcN&g69jQ=zGbQYF|B4m~rXf zelbj*?3Y^1j9x=zkQ-Fp3mgbx)ELW64 zJc$X2}i;>DN$k~HiQsT9&50)rC3>jsw9=1Y-Dnb z{}UIf3OAc<9k~?7(>cKmM#E`*D;%9W-B}RB3&tA-zvg3>6flO6hJSyPsNuH~qS>5_g(vxu2rS303c>0m z41HwNU8*TH(jxRUU{HO+Q^NPqmHJ3u=dT}U@pqXOBX%RUNbQRm^dwkcbMb$^$OY zr6s(Ga=UoYc^oosPc9$eB~`ta2?wn=@)PM+8&t<=Z-_~9m({ahod?R)@+|ZSS7FLyzYCsE3wJ~> z4n$@M1eRx@_3^Pv`-TmPkD)EewDS6W_arCb z%kL&Qix|KeOF?9cSr#(7l&zFyeJR1wlly%(jBWb7GhnR@Mfx&IpcW&IPTR&UbciF6gNZ(G%JNB+aOQ2qTq%@o?)qo!m`{>IK5xn1kp z$vnogYj~+1A*+6weiggY{(kzAIj1ZWZPG4$2r+!e?g^Q1O%Z>#Xd5m1;@agcdCvo8 z=d=8_&;9IOSzFYdO@K+uNg)}^^Rz-+&@UXeZ810RLisska6Rh$C@$gqsYm|r-x?L9 zcmr=CzTb4Kls4guWGE&%(G?GX8JOfM02#ZNnj&)p!wpkb(bmO=LqYaeyBSB{+>)H`JcC1({_5hDr zJ^B_F#Q=Y0H1#4zvwg+drwcN3O3Pcyew5;)^c9LF#IJPa5NhvPV;quiPns8e4eZZzgfeYHsDMho6QOyFXxn1}$q>pP7K-)yK$OkwNlAZxjA}fO=uLH~z|8sWb$6_f? zeeqKa@v~lv7UCQx6pQWzlR7d*62>QNH8;oIK|YKA6)~O_0p2|Uc=kEzAASC~Oge4hK?_$UvJAx2X4{F@)X- zl_kT+EqfBWT8zcCe;mfP@4OQ;KaYAGm@Z3=;U9y(c(|sM7NS6=zrS94$a&$M@tj`U zdjGhw?*Zlrx*%*aA`3?&8B9Q#2tiP4qezjaG80x{TnIn8lOxV>&>Yf%h)(-9k~bn8 zycMl4Qq@s(R2}L-O3EBwZZedFkV|8ak{O$vTSiJTMImeSs<%KPeJoyn5;COjHk++X zbqX=sgr>{6okR?r%(PC?qe6#)1b)iK(rsg48ZHdb_G+818e;KY_(hc4)=QYX87%bsa#JdBzU$$aS~;+lQuQ! zusyJOJWi&@Ci!#?0k^D=%(%X!9j|=5y6mg{PeM++WNxLAIP4U_z;)@iV(r`81;sqq zQOO2Otwk!7ImL-&yibL>J-U-F2gaWy9R-ha0-xF@auJG%V4cTqNofnSi$Ui?{^kI? z(sgNV-du%D1#;?SJS56-8|95veR7l1QEY%wXTbqOoW&*lh~l?OXX&1=Pu`yJDUz03 zz4_xl*Bz_KT}vpc$!p76ai<09#2=p;1tvA~hd{bU8C$cc&W8-E^=O%+Dps~uKvSq; z6vSZ~l$?Ci^+<^7RkAFoHLqr>i(AhhieT%4ld5J>rvvOhbIeQ}6MXDRzQ0dKLX*u% zl4M9m42M4Ht>!P7k-lK2J&>GU3e`wl@Dw1t??O#N`{E)cLwxgo0Xw;1!Ja#cjysZ( zTgBD$T%mSvF%Q?+dcK`_F}%Sj7dv9YHU`*ERo2+u`lqSsz!qV-7ih%x#Nn~8UB3si z@DD`2jHf+2gA;$*a`FU|Vz8{HjM5gSK0E4Vzw*)ZT@JZ3Z5!g`$3E=dCJm>NihMb} z5WkXp9j?~i@&+=U`U5?k3xkBBdMel|Z@lvxl!*s3LbHlBvqqHg?A*&XLfU2e^1qg{1zm7 zQQ_~vMXS-vfp$lO8~N}79zhWE=1ukb#lv%GXF7KA6WDMYz{dWbO5y0ZSKb@R*PMJl z%a&cyK3Tk1I@n_Z6nXIjT;Xe5%tfp^;jN9fqmImG&AqWrlqFUbI%~Qy-P`Uy}p}Rm)kHmKV3x(*FiG- zw#Pxplc)QDkc6pI{0lGVjc4vd)=AdXai)LID^f4YHgqBO7{YG{Il4&0EO9A1X{>p} zGKrRa@N~BC_0zCx4E+*TZFze}X?qh+p1K31u$-`!Kz-O=^=3sd(gaNgdhGWtngzI$ z+9`^XXirv4Wl&k8cIV0Caz+Dr>aEhrR{hX5F~y&rx&7LUJb>adO8c# zCGUFfZ&$L3N1{%l+3TX_!^gp6O7(4-j{XdmFRVZ}N$!2*+F@(S1`R`6TOOqbQ%&3S z=PgwX84;$n7wnHBQ*4ot`^b#DkLoEmStzd7#PywYh8`3$0S zopQA?>=Ri^#g*>9v^aDkj9x7e`8@hSKc4bEClGZ9)8YKN(XGuw|Xz6<7bsR>7oHAAx0^Z~U?;Wc+z z9~pzE3LzjsSi>l-CwFVNw|^x2Bi}cPf07TUa7b-#*#g!euDkTLdRTXF8?y%*5x)mk z-{+PsN!x|eBf}d0U8m>ZjaTUPC+;hv@FUNG-oYo$u7(|#<(LRx)a>=awOF*5;s}q@ zh-JM$9qQZ|4&j@!@XM|~hT49vaC=y<%kIy`O85M!vhk+n$|o?s;nO-8!(aBVe1TAA zfge`>w|qi;I-e5AxjbI0wc8{~JCuf#0lwis{0jK^6SwI}UZ@LRqid^n7-w!yZo`z9 zk)G1`wJ8A@fhv>$j>tI@+S)~wVaJ3;V2xhqCX|AlVUPo$!5jggM4yG|MNy}@9P*4v z!Vrf-ImUBP^&)YZVyn~pzagP+6|eB7j-nYxWO80z`s!`cl0onYB^0*$p3zFUcV8|Q z4yN#4Q9&jQV-NA6jfyvyNh=8pO?v$kI;Jhmo1_)}kA8=fJ+;PVnzJtQD zgTi=+Y06!}I=jA-Qqk~Rf+thLw~J&v&2+&^D=(^9uFs&4T=1!}xu`gbM?J;tukWFs z|LEX_zg*NpekUn2xF0`={)guDzjr@|)U};AL{L82+-S7jMnez?BsW+%7Ttto?UR;= zXN#JG+tf3EQKs79>`^5jMUj|eg_6JWPWD0@j?T7r-xgv_*VwGM@g(5bOrW zoxaY|J&NV`_XgW#a#IPeFWEIi!KZ2~Hu~AB>J#jWO8lzPCx#-5s<^4>pf0Mj>P4EVjp>=Sn5+9h`XyPO*| zRDr5%3_;KO(gNl{bZhaN?P4QC^>_p9Nd^^L)y~FA8vUkvy6!=P9I!Anve^LF(Cntc z-LcF7Q+<94yD-f-cO6&>rVWNnZot6smNctVU=q@#(o&Y+BEwSE`{Fy5UTx0oh)Qz@?*wWc8O@b|D?SR z{q?tS3FR~8afb>t7ak;A2DO)Ta7j+jJ)y)btSV6Iro%~ zfQEP;ejEw!9GnEwKzlFUoUa`V>sb@hWVtR?z@FV04}P}{5G1CU=xq5T75U*Cz`Ioik^Rn9D@+L~pSMK2>N`{sdOLc{tH2TS_$O@^e9tX)SmR{+_~Fb+m5Pw&f(U(f8zZ1UcKR8QE~>(e&{9)vF%oP$S_XgEy_ zGQDs5_KwOADwBl6Yko#6of3~bg%kK86OH)2GLVRS#vmsL68)Dlhb)^|S7b2K^p0X3 z!rBmVluC(EVGpqmq^Vz@MLCil+OfI_)5JrVf*+Ae8TEZaROhbO0W3*zljynp_?>8H zg!5oR!842|dh&H1Q?OQwmAb^7Hph-qlPCbh>ltEhT!rRcC_awK9imx(tV1bcf|2?U zp>|-dq#yg2<`@9YIz?j#Zx9QAG~?8qHYfZEs9tX{8Ld*xn&_C`FSs6%b)5QU>9N8x zatBuF1EM+xzIHCJ zkW6z00zG*DNsuBr#uEFU4H$Qw3(?q#A&?3AgCgm>PQtTRYUDJcK1JUXRm0OcKybj z?YoKnU;4KF6L$qe%m3I&5Un^S1^n&X_N|pBiy<3sqaPDw9P1OjE%9(5WuVIg8H|>j zC&3-YygZY5tmO&c?SsE996VP9k@q_FQjPVht?1a{*W39~RhT6TrHrYO#%4G?Touh3 zT4V!{p^@F!eH5hphXP|hlPS(Cl2EmLr4km*n%o9C_rZi9=B-sWf7)Y#yks|o_L{vd zq2!Z{b}JdN4>A?dR}LPaz|9v+BzOr}Ig;XS!xf}i$p>ue*8J&{Z*MYl!G>8|sH@d7 z(#M*)+mWs0^_#KG@5(z0i-+L~6LKGDiS`7C)^Ldv923N92_jsDnfSsQk7MtynF)&V z-n@)RF{Ah(G{sQduc1%2u;;gM9x@t=w0+Zht05=uQtZlmkyWdJO**?$ek6oEobpOK znhFh$k^|H1HyocXCzedB?E1BIoD*L*#ZUYPMH&|X$^=A5sTY5VRoK=C!FWWExnwMm zmCi^rJxO4!9j-g9NYca*AgKj(XF+{EK>n{$!*ODy#`HZE0s;R27B&ArhnD;wn$wPr zt9d2=fL(jUJ8|vVidSrrx;#oWmVh`6IJ4Cn29u7BOS_0C`*{)MzQMA)7)=1=s{`D>m9aC!>b7pf+cYDhvrXWPn zJH_i;gQ%8;JvY`}x0=xp1Cqu(f)sb@lt5k4YmLUG`-G#ZM$$|gUC|YjXP}2R4e?=( zx_;jTWe1#ziH*q@S}iyf^|X^ErwKGo&0Q})y9CfnIbY9xAV@};m6{aX`<*Qs5y6{f zo91z|;XbmRISTc1laqTS4)yxUxy#j23`3WlV=~dRv$Lo&#Rii~(dK_^S~j19E6eS+C$AT2n{6WgDQ<`<)-ss7BR?!HW(L*lWh#e_Wv8V9$iin= zk#BLy!^dk$%1pPb&NSIMEACxa@0?fNmWHM|$!t8eH}V*RQrnI7R-?EXsY6M+%#+)c zl*IMNpv;dJIY5=7KDJ&#f(l>xTx&JI0oLs<8fs-(V&+h#W_gw8NL3c9q|1sJMd5M5 zTo5wS5A)|&6LEb&Nh`a~biy*0wuQ9CFF|cBR*8XGy!?N~#DctvFyyhrh_LgcT+i9*)SQD}HQlP;bp{?y0n^}?+>V2F{JRD2Xon~1k#y{n42 zYYxa?#-vg$Qf7K{?4&)|V2k>h6&IEG)%Us`DwsWAKvI94@NS`ubpE?V$%g@Rb(R(B6g#<(zw!?J_C3~vC@mZL(Pi2D`q4Qzi({B4$i98n0;0q~hA)XOPFjx^lpi_MV1FJtV& zlTfsm@c285nq}A_dE5JFOZkj1v`Bn^J7LazSHAzH)8IdnrnIT& ze_mbxUIiyBf3w(vD4%8>E*%q45~%cL3ZS;oZm9O~EF^#c6l@t9OBs(0x_0ZnyTbcM#4J&<~BO)>_jrV{^sP&dDM>?IIf`yYkx-=}!XRlO|88Mnj zhe$roo&14ol~aPy!3%nMqXI$kSiEzrC&bw91a|8#EI`kWV$>ZM)~|YCx;pj9d+naR zf(u%kYioysPaD1Y_NiIIB~yt?l26@IWL4|oMZ-E&#~2OHCZoZIXEfDIoO!K3HjJDE z!(*!Pk-ImqibEe{_$QZ30$hZc)&;EIV(evY5X%LBCSN&-#o)I;W?#Wx3>&mRr4Iev z1d{;ZajFbbV1R^|AC`?SZVH)CbUDvHgNQ|k-%PmeR_wXrl3s>~ECC@0?Og{h?%qo?MLAD0O2%**vjGOc5r=sphE3 z9--NH)%oWitRKC?ECtVZU&$Tu#}DrR>e=>>11#%m<6`-5yG8%clbo$??V_TN`YCrj zy_r5X#f@ASU}ga7xiY1D?NYfWYb@h=r3dV94ns zIo^%4>p(KPQaidQO6e{6_Fe&~1yu}4eW-`kSHFY(<0JaUmA9X%$Kmg1`B+WR9j$(J zo1yX+gVT*YSn8lM1Xk`K16N!5s)t~|`lko);GobSmE?}H?t;m!tD&{ae zoe_LrOq(ga!DT{*wA{&mB`!xUmQ76Avao(dQ6p8#o|P#j9mfU{LPI6yWj6m;UPpr! zZzAJ-%O5P}LS_EpWO7iSYP0z!HHG35>r}o1b~9S$?Nw8=s+4uTpT)eWg|P*6%$7Ej zWMEN`YL+w#>&bxgJ3U_8ad-7HlS^Dw>rs=W{A-&{CiV+U>mbJ8+Y(roTT;cKO)(@V zSB{uSOBq`xVC1Y!(bg`~Gngh7=1&7OOBPf)vJ&57^$|G^TL=)UYZw<752n@MmTc8_ zIQ1Hx^;>S6%r+w0#MD}9v$}$@#p3Sn7XvNL0AwnxaOYUgw8d(hj88Oq@LM}L(iW@9 zRm>II47J5a?iNR0&eb+cgH%M)_iUbQMBbcag5;?J0eGI9(>R{-&QCeit)#TEteNR` z&FDZzVr1FOfoDw!JEbg4DboYS6f9~HvO(mkvocdhkI7ZN7#>X2(zGip~YAJ0X%ymz;C= zFr^%C;W4PV3BNzGn_K9t!=e4m1^16*onmeJbI2x5YJ)cyL1AX2gj>hd$zbb0kjm5OyMxh?vHTHtLH%5b* z#h8qPt5rBA!tmrqrxVLUoWqdW+(X5c*KXtZjDnF#^6FTN$Op=c5Z%`6`FP@*OH^3i z5%?1ZEMe{UMw~`+Ej+NzSr!Wg2L?Pu2B4}I9nkofZkcW75JY7Lls&WtII~t9e%?SN z%`D4c(d{RMw#oE3P>s~6I4KNZ4By1n9Pe44_Nk#FW6=#PI*7svw^bOn;v`5|df?z< zu^BcBER7%}0<5vNlRIesilC4wd^x%;q&-Zp^mG3a8*&W@d0CXUf%ElPm1wxcwxd@m z<uP>?lj$D`@;8PD>QA7<=V5yV~vDT^w6vdh$LJ z_h(BdJd=VlI!Sb;Hq+t?Ka}pW<8AcL_I*o;V!KE&ElOJz2 zh2nly^>J19L&)uBf@WoSStrRpn$RsLz0i=B;0HVd_(fSp*b@s;y90@sgyR05Gh8B# z(eTjLQ%|thPf~N@Cu0g;Yhik3D$^u{zX%9_6$x@gBRCAi@!ck!8D|8^*=9X|gq#Yo zZ%sgaAmA5>42jjmEpAS!GwWOGQn1$oRHE~3fR0Ws88WwwLpdyNlm#(ao&TcjL)z^9 zj#D{*MP6Erci;VX47z6$^7Md5HBMX5^~ZDuRU1vL(%CFh!D~DT8Tve+$i`Qa>#NMl zpY{bZzvQCKzm+|lz`_(if;B(GP&r~jiLLZR(VM|M$IV6gc2<@|E8q}{(mQOSC#2C6 zYxW3HZ&t4t=^O@l09WsmjD4WEg?}#;Z>^K=v^{BG93x0Qf#91+M5plw?m9INb-rWC zDVh|%^q!kZPpE_zzi_5IKZ4jCWJ28tzD>5Ql;8|dMPl?@&>55pcZ|yjLQ20_xo4MB zfn#r+_a)4w%sy<8PlN`luW$++9o#J`ar={`FDoC1pKozN9%4?W{!+NRHW z>N7O1w!deM(LxRG&nn3Y*7VW%nPMokBb=Wjp6KP2=ROe{Kw5)pc#Q^GgT7`L(lI)} zM>?FY7lGZz1J7KXo`j=8QzcL}43`F&f1(k5rVW-Q4cLB`7Ky*v1+5TV)Ta68I0|;2 zl80SEl4*JPm!tI&@MI}l&tE#8cY!f6G5x%$g`!mt3PwqwTH~&0fJwmbxYl~j=-^H| z469M|pnNXo!2D27RltXE;Y@1?N`hFT)2R2wV>R_&KB#f&lvEG9?k# z`xh_|XUY)#`#$z-!^3;Fh6$e9F)p?MwYy(*+{12R(0n2y`93dQgAco1!w>Gm5Aeee z^1~0c@+k1NzIHs(Z1_L*)AeG?yr^;3I7-pUTD!btN%)v&tP1XOE}>k<>nty?1+}kDJtV(%F+4c(fZRF`&V2c z2yM6(mb`uo-C$y67hG!h>tA0LCnK;^5Z^=I+~1Nlp8toUOxe=T+{X0(QkAK#sUw@C zeEF`027iYpl!ZWD0!W0bz2JC_VItNLkrp?JF$M-12XnsB%lg#yRc80=X=F}eOM%N% z^~t0Cxuy4w2?PYr(<9-I&qUXYd+N?d_xsb#%=fUvFMJ`3L2N;mT_MJpU0Fye2tJuR zB2q%!K#=5EdbC1F%pBP;i&>A+V1^*goHHM;k)Xbbs$fq&odkpX6(}Qco1CLp55i!o zPSQOPdb)=?iajX^`vY9a8z~QY0)|RDJY`r&d?*m_CQMdSYfWJRIgC)N=W~E*c}nl~ zp5+h=`T=IL8!ET8)?HxTl1^GnA|kgnwU<&y0aSRffGKk(18yJRlv0Tct1P)W8g}qzty%*B2j{pCNH~r+*8AoAJT5(G|hA^ z2Z}cN#vWIk+{K;Vh=G%ytg8t32KShZhIa~! ze8&Dv3xM)L8x%h#7@(lltW?5wb0{<`*4x0!%{qmY7=*ZfJEMGvQW!QAdCX$0Gv}PO zngW9F-TY{}txnhRH^EG25jE3nDJuR&UOh_YeGsKFKbvtCvcfH$HGAspEVy1kWVdcq zPdb=_pm-@TURH*~kM{76iai7YdNNEF4p4$-%Q_VPT%NLzi%t=2`n$I?tT zB`fKUkq>dTK{LYC2QXuDIT*<#M?`7Nu4MvaB+-AuRFf}1jQ*-5M&F&l*elG zm-nOzkOxdyry_k{(oZ@oOH%J4EkwVyJ*Q1nGBM*;lYaJ1Qn?k8-fy}aKZyw%QL^gR zi^?( zRbpOvC&uEEaM0JoE|01OhThW`d8p@mYaPNTe*xnS6A&IxXj5oh~h+0jtWp&jb^ZR`ayU4IRJ7i$n+wdvgNrti_#{+lX-N<(e53iu+*bl zb6`T*2Oe34h>Q;n>6m;r@EXR^?m25zU46&zWA` z@G8|Pc3oLX8l#j-@1^n!Hx&h^8k}Q|Uq*Mhn3KPx5YZA`2sjC&+JRUSR_L067Hv&{ zc8xJQDq40rq7w7%$#?3XAn@+IGqt%R@?0V8rJ8jF5iydbvxXLEwtE6v4FoYw?tRU1 zjUSfCTvHQYj+_A46)@pTH}}FybZg&w+M^pm)sK2%@x5&#NkW!TJO`|16f)}!N0}ly zt!&6{1_J;YPO%?l+!{p&)^qr$H*PKl z>yNI@CQHGTkfSX-EaVdSl4F*{G(&?c$ zM1L2EPOj1!t`GS^PwDY(2L(gzAu3vrp|9$QR7>fhD@qyj`=-<2t39^=Q~5R&zBYyTb zkyo0TUBd2~jR8y;mo!YHfnGs3 z=PJS*M_shSVmR`4+K!^IHrT{~n`Re@%VC6JI<=rh49f80G;A07dGXi_>C;I?#Q*NF zfz}ktB}a_RBubR#?x_zHb?lNio)-7U)$fAjBGyqvP@5&r_aY3}a4eT|YCJ!k$XeB= z6U)r1o7a*hFA1kKa)IvAalBLc4ZiQlYb9l&6RwTUTF#Yj)PBxaBnl=GFoI<{wPFl& zZaB9d4y|d09WH_dwQcm_KH}=V(e&A3N;d_4C6f&e+qNA0!9i*$iq%~`cD;rY8`KOy){j8@ACEPE5`-1p;HL4lP zB2O?~gaHe=Anehj-yjpQuk&hzJ5A-b249&dN7T}|y56ETu+gtG?1i-IuUIp&iXVYW z^V^+vlaz*CSXvo$jkF4=u-ZDHJ#gpGop+Y*W_%ZQP;thB6> z>P)E5(~;a;$O!FJh?@3crDt>g&%mCyddwAxvh10q$*@L;J!MGs$=1I_?7s>;pDAB6 z@%=~4d7w^0?y*M~DPF_ya}RQnsL&H`B>~FKCfVng@-=$d4+56Xnk6ZD9GGMzT1?U$ zMUW9N*q3rt?6W_Sj9 zzF^QJy6c-X{Vif#drbX7V;RkLYgnrHFHt z%p%!m;gdz|#0f4f25<)*ghJiYEM3Iy7C=G@cRWexjiYSD7wc4`s&;|}x$izy$w18r7u$xC|tutzj<5M}HWwd8bAD|y!o92k%H8+Y z&a5b^(zpVyjza8($Ba@Rm&Z^;mP&WM-#OlsLrOieUwaL+=ehX3H6Gy4E=hU;!l^zXOc7_bw=7Be6yd2?JJeK~nRbq5w0rio4GAQkMUyQEq}XSe0{vun(75y+fWW*{8z^@OUjBDP|h!z;$|sB0Z4X^3u>V8 z8t<*mH_biAHfk}Yzu!A6F^Aa*)lSMOZWUUjcv5o(B3QYs_xdfRs*5u#fyQZDmg@m5 zrH(12;hu46smMncd5+6NHW$YSd#)m$%1pO(&7H9z z^msw9AYpfue!phg9-wjh{2j@=2Nfmzb(P@2G!I`iJss2D-5Mq@Z9HN)n{%1rpN{v@ zrcrnX=aKZcHhTs&{|17V&T+#}Gtyty68Z?>50EqhC(fQ(quKHZ`G*`Ed;9|37`I4= zk{d2=n9!0mTj6>Q>YnHxW&#Zj44U_>`odaE7?e!@D{Uglsb1`SrFN_pjIavonVM~l z(G1nFDVGd;SSf*Gq8E^98<~!J9><>5H$IE0!xp1zUaEf6I9e|PJp}F@;eCD?g_r@D z0KQhi^oBb8)zRBXxZBc^PVlrNs;*S02Fa1`1$1%0WnT9t#U7G;|0`iL`=;fYV)MZ> zAZt2U>+seyO7>RJtWUUVENpDHP~?UYXa60JN0KggKeaR+bi^^?0g;Ptb#v+#-T?6- zKVfmeKh~5DCBY2C6}hTj?T%#5sF*Fi>hRh1E2p)&ta-! z?2>Jb@=}#tfjy`gO0iffzzGgdy;`OPDhYni_yRXnYnM`?ij2CA8$B*JR2yq8&aCif z5$m?>C3g_}I<_5?me^g)njV27y z@l}2B)yhB886z1}@b!}QP6;%TbUw(K?b-T`7PU)z;>KTiCMT(-^-fM$)PMJ?bsc|B zh&baREr#RC=GRuSRrfjvj2&fvPU(rIc(nIQmc-KC5q-hq%{0OJr%eqNpua3I?nzuV z!RaUD57hF7uo5F{Z1hQwwQ^9RCAwOcJ%~8>f@tre^m)Zn{_ShZl)AbsF{~y5b{{}> z@x*<(CfF8Na?jVhNIHN_YxRR69myXZh|?_`%@Cl3KWk7dn^53`+F??O0Tgyr9KckJ z++8VFN$O|c)267^Q`eXYkh(M^om_K{c%=A8iuB=Y?YrOSfl7-TqFi|T-x<=5M7vla zk5HT7!#n(?HxT_<(eQDGBAmF83=&~U7V|U*W@K1plJ12wBj*Mlx-xoJyTRqJ&J96&9rIAj zr8JqT$b=gl9qQ_+resdrYCbp!@|pQG)ub<%^V1abBlxs~!)cD#<0V`Fy^Oc*;$ivYf9 z)_UXbM_ZX}0h;DC{^=G~byO9U^O}fm76P4K5#t5H2QOui?df~{^kc!YWHM^T>{h*d zhBJFRjkGG#TgL!oseh{*L@-*0sBQnx^-JzcEr->QOC20Pb@&k-Ulw;C7u=hF(y?Y- zLZ9CVO8?IYDw`Tx{40XZ|B7ITBk`&ojO_wlR2$+qglYck|dGK%FG?M&g)OV zq}PJe0^C$Mo$XC#_w7mVFArZ}go2y>UeVau_)67oG8?cL=|40Yf)NTNqnWG*q4ju%sPLT!vh&-KQWTxrMtD7}oDD-$UpeeFO`N}B zzIsdF`@aEstT~mk4SnH}}8bapMdYOH^ z=}I@DlpOr|d=Pxlqs1U*&2W(aTljO#|raoO!o_uNwvCS1dp!OizO<^}R!m5cpl;2;|~GHc<4?^oOhT+?UuD_8;mPZ5AD;h+0mKlJ3EO=LWRwOyi^y zL1JgDQPKDOM_&;TJ0-})(27-q8{-iY6~ZY;XmBD+GsAJc2+1A>)EKLS8nH%=txg-R zvG~WOuq%`0{$y=eGmVcEHKryniRL=o5mj%2j(w8D`^g9!K2@+d<}(Xhd?{sf57bDoN3T$!zYZKdXtVk*WNBu)IEnVt#zEuoi_|9Nhlz0JIPd=P8Wr}VGA=6 z8|G7{S!8c7%QSPQlRdLUc{dYPGhgd48TMMkXb0%yIz^Qn4OWbW_lp$Wg-1jy&Gg*BONEaz0Fy`QPGba04f9RkRaxmyWJI+pVzG)r ze&{&^fHQDvxz8v9r!S#w1cB=9S#7pX8j4w{-EXI=ogFVUAog5zAh70gTI>5mQpN6pkT;IqHe%w3|1tu|i?11ym{1<@| zgyBzgNX%kqsR2?d$g>D18Lxu8K*sw6l@d(XVpLWMlq9rtM?h&iA$6G#fzBblf7sU$ zpO;Y=Y*_^TEh|DeXlD2PujStKm+_6Dmn}f7n`1$Z1Mb(4tUCQ%X~!NE{?7<=GS~PB<()Zq;Cbp6MJ#-Zcxe>&!%8CkqAaZNAuyxxPR+}zm`XiWoBf<&mWAJqeH%%k0gXO0tT=GLMn zyPOiL$%iB->UvC8USt8$mq^h3Tudp^%P2P|=E2xAVd#uqq_9DJA2U?7m=SsoBkRm0 z%CL)<5lGhQg~ISnnzJTN-qeju-aK3Moli=dAJ&con%>`UO+zi0r$sv#Q3H!_Vzx02 z=lHI3ZBc`6>L#B85Qbp|H3D-Lu(z!A7${y+Y=Eq#N|qUOz96ielwlY;3nX>|*-o}E ziPhb8&xMi!H}RY7F+E*`XZ$9Hy4ty{uUP1)oU`-^Gj#h`y8UPSIV+-$4r3)PII~UN z@978;B0@p2o5)})N&ZcOQd4_-2}4JEV8bD znNu4qKwaH*6F%%ZIcU*#qO%LQv^IhcN}lmy*(hLOc~OV*aFZjBX8<>_PSL`QzYU>4 zPybb1-dau-Pmtx+T{$kMk`LQj+eWtmpcZPZ<{-xBrUGS|@Fbb1t+?*>EdMR>GNcJ$ za+j9RlI;U*?yp6b8_;>I?K)ZqQX}Oj-MV%LU%E{QSHbe9?(P|QHl-8CMNM|o{BR11q9NB)G$p585Uqe0UJpm-$({j-)Af_2V#QA;Yl$R#Uy zDr=X{bC?^Wb=uq^h==aKVbvaJC~DxbiXxvmS-dc3lA;Iom&mNKbuBNWUWJYq6F2I? z-ivR@1KHj5+p5#so2z=}KGsgp2M^x+&Zx=b4%8>^LNb?tpqU?Hkr2)#+nxANZo1YoxTh!ht=5V(>1Yg0bPE$@dKu&SK)#W ztdx&cG#QJcx@sBQ>Q?zZN}r`t?UOt#S9xqW@tu)a?|blP4Ai%$8a@f~6`BYqa{j8* zF<|PVzkBUop~qykXZOY_*|i+!Q{CsmI&@R1`$H49+4L4W)En(Dl!F`^hIVL=Zol@^ zAN>*AvoAZyn{N}bnTF(N?@)hQt##22{*6wp->w;WW|(%Y=`=GtmNbO`A!qUW3s62XnhTVD&DB zAh)Zh_LuZJ0rpND*P>vHcbmaY=#-NljIc!Y-3Z@<>^WR$S2&2e6s~S>hQ0{}3Xm&u z=rJts$?JjfD(4z@p?haWut>cYQ0{X`8R2GHFf;p6(WVP`CRMGQ0-xk3L%J+NTCujY zUB8v(Nrv~=ENVl07yMpZ7@@W$+%J75S>xdB!Xp*klJ*iw|BRTt;E20+dnT?v#FS zlNvF2($Xy6VbB*kZ`2D9Iba+F**b%0BN>;;L}d%Pw_~W!wf}?}$2b^bY?{aL9v{T$ zU1Ad3)6&r0D7G>PuQGhFP%WMt9vA5tCR%63P@Hp1YzAK$w^+ExQw86k)N!?Jb40YO z-Ye=?HKx~;(Ex6X>HU4|8IrtGc8^6&A0#BA(59!<_Ps4T)l{~wu-X@nwHU_^1t%~x zbzh;7W6=O#6V4FUpoeBV&|l+EEY+9ek^irESUq+KJ^Iwn>ynHk_+<9gLo4f>G+|rH zM#!gMcS7UL4^-bp<78xz31X7VG%9~D?h&=I)nb+M;1FfdzdrOpo9vUZ|w>4_#GgcO0)UEH20Nu?aMjg)z2u z!Z&R5d%0QLL#Q}b>E6Qlp~0enD(5L8Ag%6s9Op6Ih9ksP;W$s+WPp|?2R63uwWthL z7QidUCIR^bp)?kBj~5~lAx?ECL7LNvB(E1C4yAETsSFTX7nTx`NWwr?`(KouV|%92 zmaQvE#kOsuVy9wMY`?K>+qP}n72CFLn=)8Tb1x}wT=X8{ss z$zKZ#5p_uv1vC(g#W-@~#IeJSESkK39EYP7w;HZ#Xe6pnW~w}&HXt<|kXXD}QNjZK zyNc-dL>NM2aGb2fc6atT}1c(SlQsF~sp_Ceh+E#ABwN`o9|rjh{sNKU7#zy$hxqn#1; zcpeB-QZ-4E%m620T35X~x`HsYg%(75nrCQc(t)-&7%ccAOLlJ6$g z&TuR3zhO%B8jMzdqiD!omBW*2TN2I=GE#Mttp;iJyr~;AJm6+*C|{Ue%WC5u%T2n{ z9?iKsGAoAESF7_s+Y2ZB|x1QJr~?x^>m@`fz>y%j8xfW3B}Hd&wsY_5X;o{-;3okFd{5l@}Fc6_ihH zBqk&gwV!3XU<0h%NQwmuIcTL~7X6C^XeYY}A{darm{V12-ObV4uinhi+nb%_rtRHc z?Pb0~vOY0CdLBEIM|h=(ifvL_?>7H#eO&&%+IV{2J3ZL?;jkMAKW0-AWv~#uhw-$- zDWxdT2TAv5(O2N9r@JBG?OCh?jxNQ~X3qeZn_VdoGoz{(#H9ubeg^;<{VW@&qu?4g3>?cGveRa&}+zp0wl3CbsB_4 zB%_*%o|tn@GT$W>EJo%-`_xqFJ5!433p^Z{OCtZ#5Y{ScBPMB1kPUw>c{4^~e0Urj zo?MZM?W*5alnUg2rq>gnv3eayv9yhhx!jQmBz=>v7`H2S5K826`H&0a5y&ASgjoIx7$TZ2$f+ zm>`s=h>LksLCdsD-JnaOl`i-qa6x*7mP)jBP~Dxg6>3=e-X5_BL!IfEHm1wSqVrcG zm9EBNV^S^|N<)WX1N2r4h2=h=^w`KLSyOPRq~09EYKFe;D70YQ32}TwZYbVuejbJg zL0np9h?zw@Y>7%#KM1Y(g*h;GR>Ko-%?`LCl2RdGU&5U8t1=S?@hB%;4xaKgg)uR* zW1}}UPj=#=|c)IZofH(9Ah$i*u9r_MfWQG5b(DR!eM&(Pq!Q?Ftmvm|( z$5SzKDRWWOVPNLa1g1Rf)s%z14Lj&_^%$8pobwqIvqUl(JXjVdiD#MEj3z{LCeu%1L!VHu7> zp9NLHD~U}X@o?-X^+EYJL#&=?k~aJCbo+7AhHW>u=EKN}{3qDnlSp>SZZGI(WoQ?< zH=6bOYZRh4`xtaYX$?FkeA6j{YVuKRri7k2ckJpEd zCq?VV*&yG20{Nuat+I-W5))8L*(o!D-7FEC@#u_o|qtHk4m4`AJN=f! ze<`&R5+@+0QSmLy`LzW&h|#d&e~6eY3VNON*SvrJfk2wFTf9P@htv7J5e4O(+}yLa z%@NEpM$KAqbW}4h9PNf4m2p#d_x0%r3fnhEO_v)xcMp+9P2n-EeJf{{G+U)A{H9C& zw|VaMsNrqIkoaPnFmDf)R2qCb9WqDd9FQC{3uGY!%AbPE?#C7C);3!j3&7%#AP)ad z>=z*e1bq0Tg(XLcQ*VA0^a^0FEg54eu~zlIHtx??&zv@B?go5s>+}5n(tal~>sy+p zZ{aWY(OWP|aLB{R?@B=FOCTj>l4ttm&X@P~FVl*ttn?elZU3Ee3k6g&g~5QHF9QwZjV*dMY=$2|uk zW7Nl3Lr@oH4pT3zT}0{AJ`G&+!@v(*yCUh3T+)BN>FqQJqdR)w^NHoWoFSBp z_8yM)%RM7glMeuRLi3`SFL5=<1DZ%4wFoUCOcjRExM8C~XOUqKNSaU`=qIP(a^x$aFPwEMCOFTc!ClQ-*RcJQFfhj39=aV zOrU+4`MALPW`x%G=ozN-E#b6Z_ZPh-xy@5Yn2kheCPSR^t81vXZLk`IaQy`odEg4y4pW`8gF3{RRmB8@m*W20^rGwMneZtJ{U zTM@~d*i~EWx-Jy64NkhWOBbltmI`>)%Fi|5f(A&x6F=;hEjadkU+KFbsO&m-};-pEeMWrqlbx z&Wb+kB)9-6!?ooy*sUtVb!JgR%FhL*>*dt*!ZG5#c;r2O9>X&xSlh&!+Zjl>keSd83LMUAunx;c9loj{H*DmxyfGB4dcAQ#mM=MufB^a6 zgt~-XAu_?=`-oKoS!M(9_ebQ2^bH4v64tRx%eLBK(@t{DMMv|B(#WBtc5YxJ8GvFe zu@5{B4ZeBrJ19&A7Fhb?G%RujXOoKbpi37HX}zJEU(?tOMmqQ(eTuYG^ElroXe0Yj z4xbzS1jWIte)g?%97*1hU6~o9G;*RyMt=^?0ivpraXx+b6x_=6M%8e$TC_!?AXLpF zlGAn}A#ALfyObP~PUt1f9;17i<9Lzt5J9HR_u6^Sib0foR&&=x7h5^{l|nGiFVSeL z(GICpf5ZdBVGGWXN7H~uN}iwUqoXeKW(ljqW`T?xO8hnOLS2Gch04 zy|aF8?%q>pSpH&vP$|Ioi2UWv_V5funR^md%6gLL%5m{9ga0RAIGFl4iB{1z`ogVy zLD1kDbn^CYJy=v~h0rmaL?ZNJrJIWNhJ$a1)a6>vDu0kJeUbZ=&PdSgJZS!Skkgc; zB=*~sTrj96(neQ0Gqn5*k%dXjM3jAFGSdDp&6Kwf?>KK4!`qNU21Kla`I~}5invK{c_$tT;zvibVh_UfS^D$}Dgb95kePI!5 zZ@{y+advre0LQkPcgWqdoJI?f@^I4QQNE4GbDz!J?dgxS1^dP>UwiVC`#n|?*ez)D zirD(*-vt8*Cpz~C$?hDCH49(hbsJbNkGqkZd@;*ZxW`s>R;f3A8B_~K87eyigj;YE zirG`sVrj>IVhP|WApdeWn*N!+f5&) zj@^YjJ4Co2QKgHg*rvZw$M1bI+%bw%aPGdr(Iu&^yi8{8q7mivn^%>MX zZ9ZIb81tROf4$$J`=Fj<4DXejX7s@NkN@%a(7C%qOSgtuAH-#KewkBZeR_7yp1}HM zVt6F?fPkoZROc7%PGsFYG@>CL6x}D zhGmv>Re5lpUWtFTE}zAc>pZ39HB);8Z^iz?bjZvNl6dRP!NpHViJP@^J_(nQ>e+-BW(Y@NsE2WjX)d z3GLRNvODm-ha4=-CvFY1gbj>t5EHbjuyMa?w1^wyOuUwAp{?$^$rkK;nIp7165hTy zsag?qbIx?KF%J|G?Ln7;F0o{In;b4aOrJ4g?)~^P7Z&6X_aw-%uo)+V?<9J`8g1dW zKlt=w=cy@7e2v>&uf8H2+R5c=!XEy!$i?CQBYpQ|CiQ4%HZ^x@YKMCT`u-J>@)cxq z^(uFza36c`$T_7hTvY76Dh`^8MSPJ+qn9R(;Tl~ zk6AS1-5O;7o8tOv0K+ddWnbK#?=dUnSg8>na#op}Q$(V2Ya^s#tEw8DWNaN&BbU&6 zF-(-YK>_LO*MZ9TXXttP`?~x7r%_g6OM4hO#IrNCh})A{Z3)5W5ZoFWwljc zt#XOuhkuAppse>-%S2Ur%BtqZ}66;f@vf`ULb z{aOwvU6#2~Y0WRtkA>A#R>}wMVhKrE^N*7~D|Yne(|z!L5!(evhBm=s)~C-W<`qvd z{!~XhtYxzAv8GKH)9ty$HMv48g95DncFA1tK#FWBfsgybZ zJn|j+@q+6K0d(%#8%>(1UK)b(9+CnfRJcvR?c!L>?pJP~X&X>to>b}XsJAB)Yp@wX zS`PS}w80I@noH0X-8Y(k5yA*8Q_XkS0DeN6ofa;sY_dQVN`1v|wq!kY6wHgk(#Ol& zqTpCRgNQ>#T1#M9NLdIA^4T&2VaHMkvUBK_Je)g*Xdp$|Dd1 zRD<21IE1yjjfiTHuw%g{AZY5Nh1gazey2n1ahi~A%A4pxRyDAM+tZuZ7h^l#46{xV z;}fE&%1}K{MeqqTMIjs)!-eIxwS#5Gf^R%JKRs=8-`>Z zJ?5GAt`UrKu5g?PJfpU@s9Lc`2j8#E^pH%e|C12)E zp~C(H_hip!!7KGnP!+MEOTjG6gvypzGZIq?r;sf|(S#KAI1tb-bNC_x-D%kit9~Je z=V(kTQtB{}QEj#MBp|x86ImDOZy98`&IVwHfx|p0F9t{|Bso$P!W`LR zwS~>e>CqHnWBbyRL{hpQ3!lw-MyB=OD$*1^TIh<8TK zkGAVJ@)(#kM(ef@cjlXb{k?BK{@Eng{p&u~P@%Gy-Oq~?GfFQfut|8WVp;@(*jz0e zAk`^q1zuPO)v+mXq_U4s=?;JBESyT)wZQXWCglG)6?z34bdAK)G3()(=3Ovgyn_N} zili=gd#lUGXpjRtBxmKwSoH?d*0#o*TLUuZ0z&)-)aoC5%W0?~xF1tXF3vA6qm@nK z!B3baB7QfcVaELq_P;Awv0wJl^ShO)!hS0&|L%hNUlsfh^O({%HDU>M!(Rl;6a#3G zV9g`M)(qLkq6Qb;OdlJYfmX;z9|E?=5~H7B5RI575~n0S4t{Qy%=B#VLrgh&?V!j} z#r(DQRku-m?ddbP^Qq9+5OC8`K8M*^wC_ z?T^asakAx1Hv46=AQ$M$NWwz%Q|_H-5uh&zLs{EM0wy~2jgW{Pylq+u+}8gp?pc`e ziD1w~?1TMIGPc9w#>KkjwAv-beXCWEELTM~*O{}0rWZAN3j@~gcf2^)T^&6*&+wDw zjF(tZ1APdUVqM6Tq|a(w%#1PjgzEK2-iud2A1X$n$cM`F$7DNFt75Y(ykpIr;;h=s zR6#gC&QQ*QGeT=oPq)!@=kDURHZHDsX`ro*S9V(+<=oxSQT$6944j56sm$8Gm{~_T zcFVM)`Gm^$ka1LCkCM~&STuA=ncNvxo@3^u*k@X$-s;OD6F?~Dw*oYIH}F)p0tCygT4Exn zNbp#>9&!ea6>CMT2S=k(zAL0=tVSo%_&cEMtmU?`1&`2(EWU}hIpod{g(+iFmG_zf z#QGqJ=|wN2g-Iyhbx5PhZVB~RZA@#J2A&-+uh$Wl~vKha80-Uzv$|Un?B8Yv!n5iSnYWb_#LYeQmxfH)$9qOg@Aleb0G8 zORycjYgnu>9w5nTBH3YH#{me}e?U|iEVtDJW*Y?UNUz)`>pV?aOshi#sWEWI_vfIC z(r={jCs>7JzQ;@T^@pK*Qz_&1nP*I*LM(@q!yER&=uv3{B-e5R8*l^t?<`y=n9Kdm z%pPmq=`NBKZfo7n*IFQ3eK|o~V4cybWXyi<4C6Ps&{!9oCbdvF3(?; zl=fP(N|H;ZO26+V^Q^T4LP>0VdH&B|r^yGT9=}4T&%dF9tNr*{CsTxv7 z=%732a6?BoI#+}YBVIZAObc^vzMi3YViPX3#v$!id{n0ZMt&>E=nguDY}ifTI{=M& z8okzT!DG3GG6exL_X0QuXBK+sXvDwzi^*_AxKOw$RX|FPewAnJAyzrqE3<<~f8#E| zO>a?#Vl*v!(kW#M26XOrj9AZ>h@2jVpqs?MDh zq1xCneRK`DRp5PJyGOvUXi9=F9|GYq%U`v_cQ55$_O6hN)ak6Gg2($hTfUMUGT8;< z_oO1sJ0>)wf4pf$Ehb$+yfg{fxm!opTW#5*>k77f23{&_S(O4R4t187D1U`#hBZ!z zreO2q&3GuZj1wooDTDULz=O`Pu7uti`pmE7ws-Non^ffPbS?jK@_GGqZIIt`#VgBx zrITO#F}D;=&Oq{z+Avqbe~b*tv3~|Fzs2#YbRoYWqx477(N|1ZNvi?jMRqUU8BaN3 zL`0pJ2@aK(eFJ zF!zW4B|vziORSFrY?F%ASDN&g2SzKpV2$_ztm(>+EHL`jLOj6G341(c)E$kvdE7;c z85jmXP&D?=jfsSOwgI3D< zQ$ut9CS5Y@t1QYQ9lsrkfn-K}#b`X$?+sQ`>QhouLNzKm)8{S(<0VqZ(4q81gCE_m zg*hvSV)zR2-<>N-IAD<$*GX*kiseyOjOpbf_5N4PfOv%?N6_~y#Nyjo@NXZY3J!Lr zR>r?g%?$s?&P$|ZkVML`Uifh}Ca0gt(Z1i4hS52AVoCB8lG+5)c?Qr;srlv(YC+n~CDSU~pyG?^ShUvqn zG1nujqpZ7)s|@#}NO)~dNL|Lyi(qW$@aF)0)RrO9i+He07MpB*yy6c^kf*mDd#r4< z8=q`RzP%w$4_0*e-JE;R$?l_%;XWQPJS2B`$vwmwJk$$D5PvInZ{zo8U_A7j02oiX z3S1;X8Mm7R-{QiL2n%mj&S;G1IE#ma*n_u%hYIv5*PD(C@1E{(uld~)UqZf98yPyQ zo|}? zlRsEwEQ9(Z38<2mwRs-^^lmGRS}|i|=8Sm;NB1#l^JpdXM!gM3%TghdMNh^d;#!^^ zA*KpI*st~MFrhP6RZpC|M&UMnd7XSqGHM<3q;wMfb;VRRV-Y173+Bd))B!L&BV1f=HAxE?6qoS=EK&Z&T2EX%0xsL4#iogp*G=awCnVYvP-y8 zfyl~KSz<*S)*bh%oNZAd#$TTzSf2Z=ik? zi7VGm!oS@yohUNSC4^p&e^|2E1B|^%u%POd92U#KjK$!}xiXNp`v(q-B(hr34!nzl zxXY{QaiAmnnwVXr6(*4UsS%0mI)z%6h-zxBR^bDGU1nc+kbc~=7gG`#L(>tyVQJlL z@>1`(tk7t;wYj|=ljq+#&9VhUa|^?{D%cvg2KnhK_ZO+obmEo&$}qzP*a2!vnW$46 zGuIU`635w1kQaT0e@Lw=#P6+aqDk9IMHl@1@?!+~Lm5nhLyIF*=G%#=H-TqOA*m;Z z_Oo7ff^(sp4W@$a`ZeI!@-pRn!;~&?<1BH2ov;dXCSfN`rW$3M-oBf@;%u0&HM{%C z3P-G3Rz|$s6k=)@?Pm5dlnxpq0~8s_oQIm89()zsOc*T@Ynn+980^6pHDaoFK6Vvm zDWo}as7tSqzk$XkMbTM>0sOC-{z6rAJzVAVXRjm;3)k?rBhn?Sm zsY3xN-We+(cdq~G+7lUY&Jg>(Vjv!81m>l?8~hv_ioZwB^ioXb6_lO+47y1Ij$I+! zD%g5(U?0!&zLZ0@YrUKNoXCG?j%>ZdMr<2dSwQARJ=1^PRNz)ejRYB$w2*jSPKmFh zrV>Wqg}q`Sg*42272$7XPlQ8e`{!M|$z;$C*}EHm=;h=GspOZzu2u)-YWAOZQ18U; zFC!wqOtL0GyKh(eGx(?Jwhq?er%r<&E^acv(8akTU?((4f1?zy=o+tegZ0v8ls`$* zI77hBFR`dP+bG#cyqE?uiMF0w+HpZPy^VsATPhPS<77WnE`j0k5mfr1xW{zLrDkGO5%Xatzo zXybjU1_>;z2CBSNr*}yoZjnGI4Cle4)bzY-{IV;GcRS#x>|!-L)%zPYnemJOBJi3 zBqcaNsy05)zgeeOw4R#AO1#R+#Xi%2fwOjxHq>x9m1}ml&*aM{J$NosI_F z+Dh31&VaPI91bJPX7go{e8%8(d2vC|7zUw3kAEis5X+tkss(wH0F?gfgBm$_?I9-k zQ|9p>WBn7UV>O9DC%ax`>l=?NrcF(ga4O7@>w)DZ=-;+DO9v1u5L_(stU>0=a~`Q9 zqQx2JefsH&;4M<}m?4u}jSHVdXM{pSBw2yK?MXmxAXY)RHo)%1Wla_sfsKMGj}~NY zsr%@|l=L4+yb}cZhjP`q&d?+2u_i{>@6|=gR(W!%TCf(9b|S zqTk35$;w|ry5J4o{@%EBUcd$--p1_j@FBcTQbIq?dsufF@^odlpF@yn#=Fko&_&p0 zgU@vJOklgipg)cZ9^OZeH;wQj+pEskhjlQZeNet%22|`;K-EduB;GBj^f=~678#lQ z*}xgI=BB4fms7WTvAB~AQ%-M*c20_sBVS025NmgyaHsvLN4m)DWWg+2SDXl$@Y&q+ zxIc3XVz6&rlg7XtrM1HsRfB`eQ?w)NTK|Be;?xwrY=ZmSc7b68tN&OU>uPUzO?3zd z*tC@72ErPqy+_&h(@1t1h2|N>p3Wr3TkaYx+WovG#>^;F#qyybK0C-AU<_At0JSFy z_z5B=FA3lqvO3bY{aLOiWQe}h!XX*;j`i63QtlFv>}+)19$z9%DC*5+3E}0ZztXspQnC%V`~R61`}=?bm|v`Wnohz+^CH)%+glG_ylB^bU1b5^5D zu4%Hawlmh(^QVF>GSmz0;Do^$@3@gSsTQtyh2G>vJ^YODs70ef#F`OsNmFa(%Bqza zT)rB*OdqLZ&qVD{fZcjkqxb3@`~gn}We;Ba*(0OUdSdCWN85K9)k)GLNF^t{Qz^sc zf>CDaTpWkBmErvU(-sub=>3H8z4+{z&3!$G;^9*zF@nYIy9%exa;G7k6NOE?ENhR0 ze!Ei&K#(-ZJ)?=Hka156}@1ffIq|(d9Jl?gumb3h?Ht2y7)tQ z#4jLNRUVIQ3C}Q#S7Z`SkE1Jj+xGAPIpQ4wen%W8hVAW!`Lg$y<^!`aey!X}zhIU> z(s|>HS`U|@4_iGn_%fT_zW>SES}=I(78^n%_qt@AM+yae43nL8i^*Rfj67`?`TnWX)3yn+rK=LX`JK zZk3dfqX}vFn08_$hj8kqI1I3sgQAOIX>!8lV|kPe1zTK*ga@l03n+I<8Ruk>ESobs zrqk{=1XXPjg1jKw!kshK-|O1@3M(Grv>cn9Ta;AXgGB>ZxnNMOuCB{)FK|cm+bCfd z1W%L@C9+q=PgL3|?3EH`75^s0D@NGWg(*nOc_v6)6pbrDj(Q{(WR%G#fR<%5TmrtXTte{-tm{6~Xd|dA4r7W^e*|I33dX@5Q9&HrVjQSw( zlxj4v%-=3{b@3AzP1++kF>d;4?cRoXs3SmT=KkzP|GDMkZ#b#~+1S{fc}w5k8~y** z;IQAOGp)W=$* z304yj$vpcFa?mY8cV;lp^Wm`kMn<#QEcNryVAQ~_8;fPxIIDxgq@E)3?KqVLn5ziq zNHjE2ap#W0_y;}?v}y(Q7j<1Rz$gYDSp*NcN3HL;)4AFybxm$)(FJC)WkNbqP(&)- zaEn^CaYNvw9hL`cj>0?j5z?!4!knv9L}#>p=fO$Q%cBNe!)aE9?5YFIFFU@y0I+Wd zS%aoRJPETmC?IQkVoO4OdsGQ1u>~QSxPAo*ItubGutB-K!g2QWgyYKEL6MWp8_Se-)ZmuXd*eGA&|~wLPT*9>d=BF|L9N{1=58 z6F6Dy7trLNc0ns^KEE(SxDfC&O&WU{Rt26$%{#~#P-pu5!_a<0=E%=%yY$Ja3nWUS zCYYlC6vlhLO;H$^$W@@T<`SnvM7?!DgpEP#X2SHWa^Ty$f5^@50GY9joPkamc)8LY zzz^{(RD;39iVpxniz%E5vq0v6naO7L1N=4(vg|dTtled+8EL_rLMT?g$R@H4%EIVu zV5Ad?=#$&5CChKB(hpb^CBDZ{V`JE?s0+xVU`m%H^XhRyHZ08~T9E$hk-KaW&pA_& z(QR-)wWqxQHnK`$GkxU#UqTlyFtJRoz8AF*zNa4l7I61J>-E3C{|o&R zsjy}>$B)2+OwJez3mcfAK^O>TMve*=3SOY>SsEf_Xk|A4^yidQBh_(Vvd{w<2_FOr zq6Y-OTa?f!GpR^8uXaCkY%+t#-RWuR3ZD;%yUz{Qu;H{scc7Pao$aQ(zbuqBH!ad& z)k|*|F}E|FJh+D;zLsd2J`pPJLae|v21(PKB+g(r5Xe{M zST9kWFr8PDUM4p6skT><-r(iRPtmnJy#EDTGucjR2K{~3rGKAw|39_!UjP)v8LK%4 z6dvv0^-Y-2pkNV%|u{8G3C$`2csf6^%JW8#S(@x|W3#0Z%2o`3K{ z+2XENtwEU=EHAM=W!QUq?&)-SzyG|(z*f`bXAFk9+^_X(cQuf!vzhAkgf98-Ii0=(b#+B5UFM34k{oi%WM>1xB+-B= zznm|T2=7^sYV!khB8uJAc~7I2jjR#6LPeJJ^`WWwYQtv~O=je>RtlXU7B<<$om_9k zSUUNT-;D%RY}u?t!eznZK1k?icO7XD7he4}GY2JNOSs2Wo)rCM@C2-LTTm@l%^j&- zFJ2N^UuK+3JWU2@I(=2m?>r4L_T4Q0rJTVsA@blw#RWhCpWQ+gr$1dS$urVNYl`Hk zP9>e3NlsEbaW=ed!7A45{dongb4v3w{;LdDbC1shC?6Wl36rYH*!vEhr z6e0-dod3I~w!Rgyf173U-{+7$$$x+U=Txgw`5SGgi12C9u1rH6de*cP*8p3(HYy!@ z_5;)mc{Qj_+=jm2$|MIR%5p%CN>U^l#`Q4J+3Uyq&rMEK<7Xh4u24WUi4Ky^E`!P7 zrv1vDXS=(T%IjJ7k~i=(Lc2Ipv+zD?fGomIY5+tsHB#O)Hzd&0uId1~?QjdeC?dR2 zMffrTL86wqOeIH}DMZW?tYD+EqtRL!E&qH3ZYD>W5A>)=Tf@+!nzFG7@mtGi^gd|+ zsCu7nlzFUlf&&$*It-hdWOv#wz76i5smqoi(O6nAe~-3?F`6@iHQCTujv_Rf?ti+`?)5R!_kmh6al5KlI&oOprxz>cuE+gvH_YC{ji8$q7bB z)Ged;qF!<=1AHV$e~fXyxn_H}{Ki(a8?Re4J4usBNe0KBQ{Kij#e@Zd{^(90FG-)! z{+u_dX#%QGui7#B+x|Fe@wgViuW4)I2D5UuktC=>NYYLK-P@ut$}OkX~(Z^)lyhof_^ z@+r^W$FCzXoh?>%s(mOvRkXK(HlPVnag z%Cw5<1yqgctm(u@c>tK;t9Au#TW>Wgg&Ml=hh-%aAMzZ{OKX~WPCu{K5*rjYM&zWVQt^q zT{e~C{PBHdcX4#+1G0w%A8(s~d&}LwT<2H4T!VGK87(X5|Hq=je;+^p*!e3|3DrS4 z!t^q727F`0Nz~>c5K+?&;!%lvh4VyA9%* z&h>4wh=-R@)>@`DmGaCI|GlFAK)-!kxOX)UG6aAVjb06nvu!=%LrC7pf(}J1^7pJM;Xo7m>=DxRaP8krBJoO>uw(v)#Dm(=a%m2G3dk85((F(X zxhnU8$EO&`cd0`4CB?_D_QB6aDUb>ECI9`*%$vVE zg1{R-1-X87a1$t>F^I0@A)eG#u}e+WSw=7)+X#n!6Ok%GHLW*N0B9t;cUTppZmCiR zOR6YjOEvwF?LAh`S+C4#S&>RhMFag5+rR_$T~ut&&8gUs!s4p?2!<(F2tZYADBg9` zi?&L=y^scD@+6zFwz!(3H806s%K5_+UpEW8*IAH`@^jC__})OIXHo18;3;c8Y&0$; zD8p(xChMU~0KigK*l3V#+|$ofmI*J&vCA711ktym&yucft*(| zl{$Y+PMp?Q+Hl3xjObk*S_lk{o<1rlj^Z6`1zj1>8esh6os`uEbNpU=2M)P|^+qCQ z#-N-(GVR_hRT@7UUrtD@CMdMONhYv2e^*?C$@Lai=l;yQ%V2I+$!#!ZiZ8yTcWo?-ap=4iHmMN4R_^C4W%%cJ zCqjgDl*JmJ#7lA`Q9ha>TNwtgQ*2ove)^l1TioRxyLlq#_RCbRo~!`)tcN$76Q;CQ zObd^_m@I6KNnnmt#VL)RSENnRGCOM%u%2D(EpQDkOknD*iGM>ktWB)ntOFzy1zS22 zuEe<1geh3})J4^@QExe6a5vzl?pLN;`h=)xb48OrACL;|AYS)(!BWUnyoPgXZ228{ z$cfzjiip!O?A}4@Ro%9L7A?Ng%(l-!|J1P>(lIO9>#AI^ggjP4t!mfLRGrB5B9TDZ z3EOmzsm=Z)D0xwewQ|=Q>odyRl>IyZ#vo=v*LQ)#hwX-ZwuR5gj={0t#H3k+X)SLv zf2fsDo&EyX(yfjQP+*Qb@@{rKtZGnTN+taA;p7+| zHFDy32Bo6$Ae841+AaMAWq%y@*rxz+Rf+=5B{Lf1o8QItT@+%{@<&uwfIB8prLJXlDNGwHJ^o z(rH~AxoHNnbqI7$|1(MoSX<-AdIE?Q8ACK0y2~$Hp`v|Y;J4O;ypid#yBtXyu0?;3)6OQEhUG0@$qowO7_m$PBS^z3E71kQ77Cx%3>Y& zLa+5x*m>X$YNA^jJr#t;u)TqPRGB;o8#hb)Tt?tVgsU}?+6`-7 zub<#b15O7@dfEF*y2b5|uHo9^4nGGTh-JyGd5o|t90D|LFfV#DHK)h{=uYJMb)#YZR)y{D*v*oO@u2$86za4)sITJ(oo%@9-{01ULwB>bxEPK3d^K z;R6=3;bGbHZ?BJhQ75uo1dnRSShVR4WvC~d*XW)`IIzL=w}wB9f5&uhbk@@W(i<3h=8 z9uf=iOGn6r7%{X9M^`-};VD2=@0x0$AMV)H4ku`|J}LMTbxX+gD*wWJH;3Zr&^ln8 ztEH+lY<^_j1GIPZWQk6xQE@5Tn0FtsIYr}V+Y57wHjh*T?_%tTBBxXxl2o!+dI^gI zc^KAuusj(RA$dk%GI{-)b9gWE+|4xl|g-UU(yj>Fm(m0flPJ4b?mqU1Wh=Xwv@2u1pty0p&$q2w^h;)C!_yr!RwQn$ZuDUUP z`22k*&`Pp{sM$^4F4{#(=ly?_eFKnWjgsz6bJ|8Vr)}G|ZQHhO+qP}ncK5WWZDYD$ z|9kI!yZ7IX_hKXJR76!pMO2-WnJ2$5GrxR++Ly!HdE&XYc~96noU&H^#lIDDqx`{9 z&~hkzH50|{7xH&ShMX?d5``}Y0`p5XPw0PQTMf+|zB(uLogB?ec9$ zl0hWTm>`lBlGywA57Pbfhh7P@s9`u32M(GUP7a2*{iBwvZ{(^gcBl?y=&faWr3B&i z!Jyc7lSFnBCqhKYiL^@gc&QD0E^9Yv%M835UiEp8L3j%56lbxe^JLxB zN(l+hvu^7jN1!&2qw!T6Qoq&?qZN;O(eECMCSof*wWO8yq+GVYJFuQ4wHwQ!7L3)5YgEgo^LNZy2qyjsn zK$5R6K!9L4=!CTdz{-lG-cOv%W(wJ|`E%^pw65(cZ(ymCiEP((NGn*1Ed@)WJBtZn zj_MBe63%0kLr)k$M71B?NZsydLGnz{GEJU~sW_|m>$TF8KmEBm$Hiz3c@wWBuQT4k zsAjNyKk`MN?}6a5!3rg|QhT}L1E7BB*+b9etFN8du{CbJU(2de1rP0=;ALl9%yMDa z_&M*!T&}yaJj~~UkJIw$2w!~bT2KAVA##hB*0?t=D15NTt>nBziJNf@|0!bM|D;H2 z07|B4mo1GpDl%6{1N4!@U!*03lL9MFT0h+}TNcKA2D#}F@wc34;V69!qWs_2Z1)Ot!q$ITEAJQYD)_&%+W%^; z!g>adHui4+97>Vmfc58v_bT||pK99aAEaIzl@%5hJUBSmk3`wZ2amU=CE1!D1b!_$ zJQUvVPiFkpGc@yd=lNaL;9U1uSFrZ-o%lzezFVg~nMHCazi72Y*(#Beda+#5LcQ)A z>MDGvgW8yCpqcgJ!Xv}za{$LR#KvXx9rF)2%T*@5$EQJD)`pZ2D4Bbk=ubY<%0CZTW+wyYTZ*Wsj`+#)Fj~I;{DNz7f6FXf2~$e#G!h@Ka~ChiSp<+O+K%^8Zl74P)?-AC+wgBqtUcSu{GIgU%2(&LZO zExj2rO5JiYQ1ziD+&HjKCsvgjxU!g1a8L+b{CAk0Nf`ipI* zla@sJ5!cG;YH~E0$-vQ|?(^sI16l(JogMb=u0f) zhAP;zsd-43Q89y%M1vkVLu?1~k8cMz-U8L3olk^C+AF|xPzxYushZZO4JVL?`PX`N zHj66i759~sAN+2Wj`w%FU}FH)@`grc7I&VyW4 znwR1_!yP9^Ocqj<^Jk>~m*L$fj=A^#w~Zwgc~0@28(g@gD^IP z)zxh}KfVw5pUKc}cm5IDQ*S^-(y0KQtFkvy3 z7-W%MW^=Ku=>nq^W(a@O0!>P;XlLX_R6|n(X4T4}#JgYTeGm!GGk6@Yc|v*8^Y?ei zj~}579UIGokU+a394?n#4$}{59d{c!Gv1)K-z@;hc4ScylN1hXgGtbinp3t7iGueM zhPE&(Qk0lW+XbO@$Q;DwfCF2yAe1KW%E>5XZn?9P@l8fKhMTmocHid1R-pW1&X*=% zyCJ)*opRoK#fqtFh-0N1E2x9hXp3zOOD3z&Zp%mXv6Zgm^D41zxi{8Q@C@mkHZTQe zkD(Lr?gcE6Xc5G-;RGGbYNG+h#xUV(BMecaQH?@iQsg1U%N0OU$wrCDO zBH9mXPl{Ja;Fvl%tX<6)3=H9K*dyk1{alQRZ8A60k`CSdvqz{Q?*e{9pH81Z1~nM% zTb{ZtKz_G9A;_t3Z~=WY1%=(gdwM1x#H;1d%x?V5Y`=pye>H1KkFL!hFU3d;P?Cr2 zfskC|E-Vc!u#UCl|L|jpDty z>Y_`9jX7y_F}=c{I^*yM+wIFNj^4HE#N_5^ixm#c#-BgV8Mc&k00^gIdcb;hy{A&f zVWs%17nQ#Cr;#!5u59HCsNGFTu-%Lk=hxQFU%B?c7(`}39)a#8(D($v4XJBKk~W1v`)Op6`?uJ%3HE|f*9gsU8+8H@5Wb* z_rDACObA?Xd#Kx6k?2-Fh(shbsS`U?-G8P>=ZF~yvxnJz{d_f6-)=Zgfx+7}o-F~S zRPm?x1!h2_$aDMm((O_POau<=SVr)U;^DM$>H(wp)`hEM@%?ex>L=d^rL3gofXi?J z=~LBhbVCKKpi4xaB}YkNrVgpb*ESUzn8?y#?Z);hR@PP&&@$W z&+0!GYnuH9-V}^1zgpjI>1&cd%eHD@Q({urKt5+2FSILTuoK+)B77f zP?LDsc>WEH!5~JY6tI*;`n6%P5h+wh!_+zIMbEsTKzDJb=;oA1 z#epESXt&Z+`l+$SVIiEW$l6kNF|9^EiPY0g&-j)RXf9T;0dz-DMyd(dUR=by^~{JW zGHLoCizDH;c31F{5_W#iZXSWCh@0Zo*Mg^osm97ojYm-H*(VTXF$`M%`GyIf8rgE@ zD&LZI@f!~Tbs1z?I!N75Zo19i^I-`-{{#;J6$AQP7z>tXpeKhRrVW0(JxwpF&JT}L zb#F;ppm&b}WhQHdyN^jk++tzlph#xkID!OzC}>91^W1>?&Z{VAjTe$eO)Vd8(=ggp zG8XQS(j>WUlto4ll;)8GfuSNi$B0Yitv{As)*qCytvwP#>DX7=*)R?e?aw|NxM!=) zVa_|T&69n!T2YSa(fKl2SQWsioNi>U7&AjuNUE3#t%L_f((Z4S>z`RKe_L;W_49`8 z^(&S@esK=}S)Km>8ty+~TKvRUzYKElXPs7uvlMVqqb&6a(QILa^LPFvUQwk&QAGqn z$W!lh$rHtHL4%S;xFLT~e*He+YeG)Yz13(civB7qv)`IMYz#;5A8&VPzADG8v~^|% z-!-5_dH%!%SwT@j(V(x3{6h1_hH0Y#(;T8t)}uwvMjy&M4em>O=TzdPqi?+m;4Q5l zDG(KeUN@ZVjRs7%(8u0zfE9RcvgK7_MuQ3z&;Z})o#sG3gypy!s-1)oTxQu4A$b`y z?m37xN`GcjICZG(3eqXcD$nQVT^ZhwIG98hbXx7*eZ)@t18qpm}5 z9PY#M)bFQFhs`OmOe7WNqk|k@|0LDVp6?8**UmS5qEpVBQBX%gHW7AH z-B)5qskt>BUM=lgtrp+#fhMAGC61xMrHLEq{MiXPG3pfh? z^EWBc1bp1@tzVbHpzGf*akKelKi&1TnX@Gm}%!Gm;8>hLTC%z)jnd1?=$ zxnGBes(4%rA?P699O8Hqg#tg9ht^#5<9G@TvIH0owg`H1gbK*M?03B1a3O4?-2fx- zAkxOqDRtx)rH<1lTThO?y9x9Lj8)t+u~}<`W6Z35$q_nIaUUrcgWeY>b`one_krA# zoYI1b-lY#m29jqGGk|h#K%ObuYcWS+-dHL}mC&D6{wxWnWVmQXy{A;zml&dN22>xV za|<*>p9mR`VpW>=&&nHUOd>Vv(dJz)o%5#yB-)Jr2ssGTQ3;4`ct-jXP|IeUdE(#= zHZTG2H5EXZbw)Pknj~e$+od}5!lE9Wp&1RxV^JigkXH6{w#6|Rl1LhlQ==*c#bJ_M z5@cH3amsF$AWPLv|2Phn)JC06%jNxnE=rD};V7vYw?N($81F8uOfwPOzr8gqZK0Jn zpQ^Rw&7fJ;!nbLl?A%vq=>ADB4w35Zr8S5Exse@Oo@acOsDqGwOl9 zS|{W!7}5=f*miv~+6}{a`0xvm;Z^BE=gDt1x)ZjNIDgAC+D1+uT@8dd;#3r%YBi}a z&~ueTes+ow%y=G7BHo)C!VC6hg6$I=Ii|w=m)!y&*w|AKUk9ZtYPT^W*_J5i425TN z?0e1ik>~8BYd%SdXEvM^TjUrh)(aku0Ww6NcPCq0>X?)?4CpN7X(`HRg-R1UdCh+!~3BFG0p&GCo3y7XRo3f7%zW3~v z&!2eXC35QNnD`&0yE4xWzH_MIgK)a+6@u?Ugqtn&vt*f74os&!iiD_4+!f>#6Q^nf zHfX8FQ`$4N`ew6w>*WSRx7yeYW)0D4XYEzSBlDJ(r5TIfQ8%t1O1+6s(qm)I=1dz2 zZ=7$gq&jG{yEGDQ5I#uPr9E@j{c^@ERw>tDzY?!eKco?^i^LNNmu#tvUZ#<5ntcn3 z)&N_&H(yhjiJe;N1Gf6b&~hu(x-lX#p;GO+FPnFX)>O!)@nqhDQG_1G&X?y!2jfXa z5i0TxV{3!s^Yy;fqd`y!DFLlPRO&=ey;V0kAjNrPqPT2Vv9&_#rNSwaE_&>%BUuzi z6&83R9xA;G+NK|%m!2Fe^~>AR0!x?i3yvcfEUQq>`n5t@8jVUyy<^0?B* zJMy0D#yCxnE=FJ2S>@vJRw_NrE3r`nEg*qv=|+@`^zObru_M#s0I}($n!ygpB^Cr1 zQbRGV?b--wa>j3o(~Woa$x3Me>L|S zauG0b=DlIZsDZ~CKP>YJ+M}kI^c-OXHuGL!zFnHxa7mYaQhV$Az33Fsv#Hx;dcn~>o$IT2f_3z@W*s~ z)mOx^vvp1xZfEUG{t42@ms!Le{fvMn7dbqxi_N5~aGW2i)Hev3Dyqw=?0^#BHj$8Y z%q7)y_gpLC(6)RH-nu&gqe)WAd7|fv2<4>NX;jzWB-j*zlA;WeG=m#D%ujHo`-FXN zChUUYC8%74Svm_Nt9YulPjv&iMZYAkYPWK5g3;auyTzT2&XGdI=g5HL$Vu^1ubI!v zk@p*E1$Oel8xSB%$YJ6)#x z9aPwETQO@5!`E>xbf~@9kqZ*we_oH#3b%JqajIXJt|sHhdKj&k*6?sB@RUMHO1Um5A3^9x z@oCWy#I@F3iD`a}Yc2)0DVYmAQlL|Jr8kI`GvMPfV_Cta2=D+XYbMq15qJ&-! zhA!!XWKuzu6x%}A@0Z^-`;C-p66XxtA;ma9>Ek9T91n4N`$OG-KzH#-96Bg!pPU!X z#^nF_z`e?NW={$!eTPj-c*gUV-2eEJoRr~rn=|T>)?~Rx&AcmbrAk70g(C&1QGtn^S^QYMv-)o{k-r(pQe(N7<@FkMY&NMt3}8_{`kib%dMq$ z4Nw_^qf3;Wz9EPSx<^nvKN6Jn*J%;$tS!ZUd`w3z&sQgn(x)K^J0Ym> z>pXuXjkcEWT&ka_J-mIW;$`AGjltky#KcOGL{gDq zV~rgBJI*a>S%_ZuJkKJY-wXZ&!*w<^`ptULTGzWwcQHI|I$TYEeBAA$`GR3ETeIo5 zUg!cSz_!MAtzj$JDgjs70TiSgsx5VWv0nJVWiIHyRMbV@Q&+rjuogGe$98gGeBxZt z_&v4=<`$+NU;~;k-Tn2NiGXj+SFa$OW;(cq!mDaW{=lltKNsW{!5IzGuIzT-w zwwA81I?C}+4WXVwyP14S-rF0V@;HmO+__iYaH`rq3bT*Up*`L7%5pc>P1QciDCn!Q zqdgg;o35duF)yIpQQf6HFmQ^u@`$nEnH{KcqlfkyloG{C8b<~Tg|T5rpUtQsmZ8O? zmj(0>($)gqnWTp7LP=SrF9UA7`}vpO)iHVcer4k;)r+THiRSc@8;>f&jK;4q_7W=; zJb|<+NO~1?A{&pfHv$=17KC%=Wid!2)8ejJ>V8)#%jb&-l&5PKCdDp5#G??FXpM%x z*bZ&8(Wf>p!3>5BPksH{w!R8p5?b&t6c+GxeC5#JO!e&T-Dv)S!v4di{*f$O#Z3LR z*)V9P%Bna@>1`hlzg2!VYOU3Qs3=I>ASv0}NHQcH(PW&E%=pNm?fSbX=?$?O>7rkn z!_2kaOqb3!ZqB!AJHlCnW5{zTEckW?K#)UPpzzZ+P}4XRze8zLeV6EPT{tN0%Lt1D z3XEYadPZFrE{{}*w-0^S1MUw;k<}9bHixx2bzYeMP`Q>Lo$BLFgy+Bagyd;DjQGnRnZEoH`)}t?+D67!z|_dV;$Lth>0ch0Os%yn--}8<9-@34QXl#pNJlIs zDhUa*xCbn=aWP13ZE|*QZQzp(#`L-acP#?AsziYbVl-}OaJb6$xa>0d{qg?r2i#YS zbAc|(6B6UHur{)T{Zfx8NU3s47yF5cb5?ftIu?&wpf9$}iINPw_(IGYCqzF6?`0D~ z`r2PF&-KCNBoI&TVLel?`S%ZRbY~6C?q+prx8D}oR&6B7j)`23#QxFy3BOR)H%-v^_KXIycR zyBHJBET=8CK%=kH<{AhO6#L^O8&F+p)llkTa?(-%@u2wQOFdtZsTMva)kyn8#O=7_o4aAj%Q&N>P${Z@9UiL5~i z)l;DGH%XI+mCD?=aLDGmdlAka0|$f1h1d@eA9IJ_$`dAACtQg@Qj}J1y`)DnIL()1 zGQgoZ5?(P1O5&AC25C*-fm;Ne1U4H3ui8)5SqTrs|o~HZsIcj4uzYOuA_HHwe=Wv zCkfEi=`^X$tE)=-jiik0}h2e&Ru_^ ze+Ylfe$6xa2fdN1w*pP@Ll{Awx%S4c5TT59{SjYjA(<{cfn>8JJ*0qwb6O{`VwN=? z4Kk??G^x(yxFQ^Et?9+Ejv~PNL%~{L#sMmBYolG}I zeWDd~M&hMuGEKMQ>W1wF4(C>MC+ehybHhm~iO-AzR3 zEIEFh$)Z>XZ%nnv?iO(C+o2~FxOhB9C~5~`VPpUKw*ez(TY`f1uK;E873BYOljwi9 zBLN#rC#(NU5&x5Elb#R<=7X<{wi~Gw)9e(zBzL!?{>9IW2=7@J^2gt9;dpRNGBpk5 zoe9pHms>m&1QZ73T)@RiZfR*^<_Ydw3Mt4VsC!_ib8*@RIEpsNYoKR%bcVq8~e**OT)I(Ra@*VxDlVoukLaiJ*1{M18j=u0c4f7;T6v+ z7^HBW$A~m{B_Qy?1*`7~4<$Stc|Bst6`1p+Z-f&gYWNT&pRdWbjY261Kd36h*8{Ob z1JzeGo?z=4;gHM;MlP3re)TUkO#e-l*m@bTYyH)s&V&B#8^hm%`G57be?Le6#wazZ zLbxh^Em6G6I3!9Cp$Ci#!-HeZkMRSM0Zn}a63d;1@RI}rQ$L%Sl(-$u7;ExgzuGi(NNb&Sy#5QK-+kIY4h}cEn7n6eCkRWlcWa*Ce{0u+4hx! zO@D4Xd?jNg(zrYjx&Uqaz7RbG0(?a@kaKDwsA2IsfI1*QG^sD3HDVZ%di6S4o-ogW z>@e@}jZ^m>Pge)X=>lNCKAN}Hf z0Bq_f{^nIk>jYfKHG-$&;E4FO9Rf?6Ed%#%KgkUl8tUGH<^?LwFO$}LwU1V%!6S6l z#60{~4V;vXc`_x7vpA&h!Sk`~hzSZOh5fV@EO~^7i5M|jdBgb7^Tn!?V$SCE)fDUj z6p7N8%Ke_%dYmisYWHWp7>XiaD^a$g!6!yV(ler7_$hR;@I~s#8|u&cyUJtlmO4B}MXGwV z{NA$BHB z$%Fbynio96Ot`-|Yvzcd+OQ zA&JP(450N{v!jb#MKVzb<^co}&u#EBBm?7MNh)O=P~H?g_=N1DIy16tPMVG*^K#|< z2rYD!$V=T!xzR%)e4vc1^03Fe7^hFz@z%L=V&oTEXLa2s$}I*m;`Uo^?B?nPn7YVy+zPY`W}TZC2!r;4CG1*(YP4aj?#96bNIq zS`q)0BxMk}-6?`uZzNjWTA87}vfPywzD@|GCQSV8291^jUtN8v>*DjYh_wOsY9rbt|$aklf;D zv4g;pTg-b_n&q^92@NwnuXyHiYJJEGCeh~aF?PmHuzk9A^l;&qd)S4WoQxVz_Sl6T z{<6aHlL$o3v5-vCeWlxwDLOI8OlUb0fEk=R|<7zIb z-3A`n3jTm}PhLQCu6@%pfy<50>KDgIm{e+o{ys24UfE1({nM(ic6HMeecAvXKrCwy zQUuO`n&rvlv0AHWWv-Fj+6ZQo>t^jhH5Dh-oNk#syZHgp+DysawrQ<#S!%DiW;1X( zq!j%KYa$Wi`oqX+njlvFirz<3)npK~AKbM{1*%KAnGtt{GFO1pSDiW~qkixZl9~lO zxL{h11Seo47<+|d)sKON1)EXwW=BI#I2l)n$^-i09HL#4D-7om+|tM=Kk);u+OUQn zVLUHkTD#B6R}&2UlDm_y4kc=S1_xoJcvndf3}5q z{!*G93yNld*qq7_J|#R5ZT{LFe}wxiD82_r5SSxgc=Ev9Vt9XJ{#pCJAN)uBUu&TI z<{f?>kHAjs$?r4#WX2Z3rrnyot<=U#58ed2dSAZ%?mu?c5RAlod<1#lRPf6K`p%Nj z^)}<~);Ta;Qj=-WAZ0f{`1Dl#hcz0H9wnMj;c%aZh|dY6N0S$6wM-SA4ZQ;8G-3-5^oRO{d)Xrvl z-&#UYL;v6lBr3wF59ZH-gr8|sKy2Bd^*-y5uQ76hAa(TMwJ8V1nQ6cI6*tu^fjEV} zq-qp71-@d@;W-<~+TF1cO>RzCXx2HA=8$)e(8wz^_CA1pn87j9xHNE}upsHU z6e$#5J`_DEUzQ8#!f-ZNj#5oSWb2c?k>kbYwct2+3nTaGLi&S(o8}miGh%y}t$efb zd!ID5RV$cB?MnQkNnwlne3UDV_50sCQ%VdwfOo&gGnOzmNqZ&r zt%JN+-s%E>tJ13KO`WbD#O^|0qbWi|qp8Y5%R-~7SzV(i+u`WYsNU>2#b$bIn(K+@ ztcwP&(`RJsTk;+kuJVqAwyP~f0u93pWI_%73r2zjisu$6a?s-ABsQpN9}DEQlxIY- z=1ivI?Pd}8-b*uF$}Y!dN9LGInrDn#&(#3R^Y^FfgSLuR7pA9bJ^L#EGtSmuA#SaG z5yqRNn=Eye8Bn%R-l8V1DOj?X-T@*lub~Dt{u{{9Znho%Ufky+-*I8}S3`nY^S+NPk5>{=5)(YB?Gc}aQ?9*jtO_aBr{zIZP`m_T|{=hbp`mQgG&qJ%ZG zur!Lrs+(5RB{5E_&Yy=8)u&6aJyuryNL=l}pQ@UQn)~{V;Gt}p%k*># zGDHOG{1F*tkswTolICf{f|In20{!{R-KxY54b?nLRYa%#CkVp(sSqA2qPB+dU6441 z(jN6H{c{S|#YXx$Y3Z&Xk*?%%7cC^noPrKf2EPdvWyoA9NwVblINf7#&_>YDnj|aC z7*B%J=2{Hh#QCd;Dpe%tG2-P{vPNW6W~6|C(-xhR7m{gQDhKyIR-+&4BFmtb?j4wJ zT}Aypin14{6o@z(nY@`N9g{eKRhA&~aW~Hh; z9;EFpoFpV9roI-tII_gC#`T8+B5bT8im0-<(p2j?mbOJDsQ0S#?o}EK)5MC*J}Vu^ z;o}g+*ZCOmlhCuu97BbQN5B#z`ei3K*pRo!B zXAvLk`;3!p6(34z!+iy}%(o&!UUgY`936;VUEtsE@K=O0 zS1Q%PVk$Ngg{Q;wZ%Ni4U#iE_|gI zBLk4$6?-m|&#Z{#jM*PtPhhoLL4Gn~r5Fz?q;a$E24TUHHqAM=g>E965)qBDsPcZB zqQ%VI@e*Y9tq7qe3;=zJb7=H6NA{$=GR1h|HOF2_&}1qYOTB7EncnfXb>BEh%N3v- zRr*-TwC@C2vs1p<>EGigmAGSf&|y{~j6vMx(|CQz3qU8i*Twzsmi39J%o`^|Q)JOW zhBrC22G@j^f)R#*+R6v@Cw;9!5X&yw;^4&Sk=Y zIKD5P)!#aQB2nZFD^Yy^?xT1^Qi#l1wFls=*kj|Y*#mt~tJjMsjH7jNZ05>feDJnf z1QlgbH*T>05p_StW*}H~(Y{6!IhPmEJaue8+DX?3zBZNY;YXn+T>WWfR#tP>ncf&4npriB-?BA%wIn{zE#85{afEevH_n*vm;G_366}D8z{LRUL1U2c)q3 z?O|t1N4wQUvBRfB)pohnj8=(a!#J@ewwOwLt-dg+S$G5$@?OPK+O#RK;;yjQ1}ei_ zY2{G!HBtPcvn@)^G{rtb!qqN{=0h|FCIfOu`do^*tY2cfse|SMQe=5quyFUBJYUOv zpuJS=9w2H@RnsPK460p%1*D>ZeNd$O!Z`lXB9-ABDhNU%j4mpK6vAr0p(J9HX>ZF3^69crBhO)$PQ(S2p zPv1kFW$?^QS4-kW`mc=#IX?^roR)@Fb{#4~{)O6Em}%^0mmF+I#W*=;mM_lG;}Q0!z8 z@;@xQmxMNqqfV^RsI;!~qtvOiZWmD#BBfXpOi7}uNz@>lM!?|sA*t8l*jinAu=>{j z*n&-DzU@LLw>=JE-*-F-v`J-pq0W)d1K%duj;r)h=mY?6n^K8%V(4GM6h~R~{XCJF zk+#<}9X+9Qz6r+6iRZ5fu^P=89Kg;QOiIrgEK<)IY+A1i&_F;rQADNrNZaCm-vn>d zXYLG<-RZ|Gg+1?H8y$hxVXpg+}R&A zSw&0ZiM7hPlnpURsSH&pa3DW40Xt;Uie*PteKj`rbyi z8<$4f+BY8HB6Nq7^eNE5FM6$qBC@1B;fODE;eSYEDR|P*-X55T-7yWitjgUBL4{e8 z<2Be#MetA)_Blv<%;^Fck8>qW1ju7QA|S)0((0DnHsnjZBH5dX9W;MFS7z})+uV52 z1kBVtb8T%tc~tvOP_V1y%Syg=nH(vH_9@?igyW6MB&5&8rqAeizjKlwqLB{i2+Ddx zj7-q%6TYATUY$+Arzj0Xe0Q?SmMu%YI2G9_slB;KSD83oN8UL z`eX=Z1WxW!Q4oeWU{a3IHjfdbkqkFI1w#Y!dODP~)Nsla(RVq0>|10pvRvl?QN%cE zuL(_Wm7DSkIal;Mc8TA(gL6-z{&HmFL-y4;(O#ra9n_obk>L`o<6wWP`{bW!fhM|& zw0kVjlRT^#QW{WiISlt)=rp|H4q@GuSg}-3Z1E3lkJ20iV0`|1fcC^%2VomBqM^zi zZi+crx0}4Ps-!Xs_Q=d@P8S-ce&%L!v%L&V$lrtR?f9OKmep4*Hd!#2+hf>ivWasU ziI-VC;b9T%e?vX5dcqp+pN=;_q+WDnCl5x5bY!Ox+9!7kTh=NT46Zx=X?}&Eik6>V zdw3XI5NqBGt;i9M;Fze}X29IP7;Va#@xd)s%5~7K?g^H2*b#CMsN)Z@3WD7T!%jwO zWAUAdLzdX*4w_Cr=GgU=*uSenE>Gt-AUQc>3mlS;3{ET+j&2Kd&*pU_x1bYZ!r)Uh zA}H^t*8F*&u6IJD2cOe&o!Yq9VEwaA@n*Ww zlY3}AwLoVWJvJsr7evtCV=V>QxXkj}O|7ZOovNiTLYdU&xf!SPIF)$0REycWkNA=o z?kvFD0Wkre+dbgF%jw;JUG&jp{`&j=)O@h#uPOX*-#)*3Q0e~vs6p27Up1G1>9!>) zZpeSlxN;>xNhA{m!iOQEEar~ci^W!i|FBO`Y(=gsoo~TY>+d&aY3j(k=J9-3L&I$p zNA|h~ekYyCmTHXv)nM9S;NmLjka?QC;^Fc6j?O2|@AIPy9lnPTpl3KpNfDqY`i=v> z>$S9n3T+@bOgd~>kDfw{w;L8%0zIWPF{iaf4^WNHvx>nSgWT5PSY~e9Xiu1=Z=5mH z=rnG%DDpQp6X}fij5jt^t`G^9zU^N#3FIuxXy+J; z71P?+r0(j2PbHEFXRgUu7q6->TdR2~=1VvIMQ(2pu+gr{lK4FiXrOngi!g+g+bn~F zV!eA}N2X|-z7bVZdsFN85`pT%Mr85)3wHMi0~pIhTBlL+No9??erVR3+SVqXY1DXB@cCAv6Jh^Q!?{qg=Dtuk7+ zFzIBscOdWuXnl#~n24X$ImIWvoda?!5~=sv}14rJ~Pz+#hQfW`VLs=x@H97$9F zPWz{xJLdXMh)g0i0L|k}R^`>OIdx2&AE>;w`(u$kiykPf&!IbqOWL1Np!!L{InOh1FO05`auo z0NM&Meeui`RLwLj5Ur)VufI_UD}^b6z`y(+H-yh{z_50mc7s@8yJ#aTy>nXkloigk z>I;#((%>E{k}ZjMVFqAp=WC#b45UV^Bi6ouWsy8PDW3gXbIQ7zBaO#b{lpmQzlzxX z_Zrhb0r-Eq(m#NAlFFL~mI}Zd?)>r`c7Q0*QrsFE!fcfYMl3SV%9IkXP>t}rpQfq$ zNi9cjtdTPV!vs;?a(UHK^D;S8I&odox;m3~c8Q-U&Vx(N2v6VkAA-h*w`u5sUL8x_a%N5y53xBUutITe3Atvap8l$5WVV#^64 zXQ=Oxtsk3TH>!)Ia+;%qdMy)g36Eym-`-ogb?%3YCq#OJ6v#HPqhf3<9@pQwikB;o zn@X?Vu)p5n^Lvb9FtXAgt5urBKG>awoW%n>{e#b2Hd89^R4Qv<|C=m=OIF3%+xKG5 z_J;^#r0<^moo>9DE0z|g=E}8Eqs8&gE-75h?KL_amuKCDLxUCDpfAsYEdl_PrzG%m zQ-x`>cLReOuVA;r^Q|~ogL8ZN8mg0E_CSD*$U5+XD>xzNDy^Y2yRA|!F^qs6ycEPx z{CJ`*PX?o!`wV?3KWzafgr1~D#|6rLxz?#;=ft4+5@X-kTbriM_|lhK5?k;|X|%$i z*z62x89K_t!QBq9y;Y+?cCy^cZ6WUJ6KwEInK4kk30dRA0DR zKXMm5QY79;$zcK3Rz0Ks(wUe|E`_GHn!)IF3dD$sLjW)cibuFKXwtG%*# z_6@jaG#Of6da%zoJal+hSH;}4u+S!wF@1*NKG*Ot< zVdaZUxdLj*3e7HP0oD_NVHv2RUS7`_D;zxK262dXo0gVLg!pepV^E+DCn=us^%9zj1J91ds9m6ji zeQN21nF}Ds__IE6HHYXPiLsnfz%qcid7qW51VK;F6I5nAX4t$;a}=ebB9?*}G0wal zi0KMnT$Pu+3xsIP84W)xa#M)2YFh|jD!r;hc42H)OR*^7reju_^jtWT^qgOi5f9IC zG2ECjbZZswQMbjEuVR&=3_HcSkl(y4R-L>s8M5>yylZb;w`Wl77NlJ)`~((J>w~gd zf`uyob_N_!IS`3ePUcN(GXP6Mzyk z+MOclBZ#ACktA{ok3gM%Oo+bd!DAlW&hOj244jbb~Tm;4Dep{;Pb zi?V(w`TGB1>>Y!2?Urr9Rkm&0wryLhY+I{r+g@ecwr$(CyH4!g_n!UT?$aId{Ca=A zk&!cJ&XGCBxPWudETlQgt2Nh>{hNUn*HJ$f;(gH1WGvYrA10c8=Z&6l2968#qUrld zR~7_3d2AHPX)vm8unXG6B3ej?Wc){aV2P_^H8XPbc+5zeMXxm^Bzg$wl;$=?burv=k z@8Zzn6k^jQ%LOGJ&T(SFoKIF&tbqaprofbXLo~@t7Tq;o0b`SD$YN_6{@S$EC_8`_ znlh=jj*=Jqju-nzwPa8`w!`w2k0sht*+CiqD`ZJLuu2D70v41*wOP6@QqbLtEVS!ADk|?Re6^<+TO8*$quVpitG3;7S5XI5OTi&TKI6Db1b(2SBv!BnIwZVh9;f*>Htde(9bvgD!-R!Y2cL?+I3j;B- zyvPc|GW-&5LHb)=)hdc0ZKx*A1~!Aj+b4KXo8n#d-1`DULs05-ON4tO9JRK(Yf+EP zOy~K7P%%Qhx&!<(^Hcn6!3$c?R0o@M2;Rm8}Qd^lUl;en7A-dDx`L@A#TiwxF|X3a!rmyDJXdQHj5WUaxSYKjI9XbH`ssf zoK?>DKq7+w`sE7$-zmQTPkr1!*;W5) zmU0!3^GQXYlY@33L87Z8mL^TPuo7Zdcrl3=%=C*r;&ju_Idrx~FPkto*)DT7 z+N}&gmKO53qKp=klO=zB;;DZ+623Hq+0~-cGgy(Y^r zM!j!zL!1gEQg~^tbJ9tpDNDo?s4|tM1<5)+lZC(}A~JrllMc%})CB#lmDDt45y!R; z@n_K0tYXycTAgBx7{B`QkeDefI)}CBIOT?O9Xg)u-;sQUx*_HJF&O}Bk;9i%L4i7B zQLenZ+)%C(jn7ty$vG>QWi~m6;gFCJBxEn8{fpF_2Tnj9Vjy;6<*6!i|4?8 zdZX0+GGj}R?1zTb@NA~=Pa`Lx56$TCGMfz1u)X_1oN*aP;)T8sblKWQQ(Dt7H$Oy1 zO+}fj-pzDb?tskWGL*9YtJZL^kP-fR1<U z;#G`AQq-8Ff=>RaOC-%pdv4yc2?|(O&H9UM&^;J&imUQv!shey;}?a>95PGflA8ry zf6>v&B`+&2n?z+4lymfmKPJKXc^yojac@9E=B2plW0^4QhCaU%!17a9pcQCQPb5JOyP$j(^50+BOnG(`6R^s z{Wa=XIK2}Ggg@kmVr~o(`1Egz38%(hF#b&Wdua@CU{vq8AXJaB$LNie?D+e#M7;?P z{n;Qf1gH*k(;32&K8ueWcT*bBV*CR!_W9&JXilTYI;L7tq4YhR3~NDMeKuhVX&9_Z z-NjKTuWI}eF0s`{TxuDlG6JFQnM8|l+Z5?~Q{{Vbc8vj`rMBqgJgSlI?j%FmT3|eW zH4U>QO4puq;uQFEYtoVzajkNmLC9{0>_@e`%kbQrIadJFxL6wLWL-L2iW!^YRFYz7 zTEsTw9h+y~Iz>;#{^H22+9ritOChJ&RK2G>q(qd{8PzC!UTD-CVr41nP8(BI9=Xf9 zeYo4q7CJrrTuV?N9h{r@wa-V5;+kC{KTh#}57(aiJwpw^bJ3=_>-;LCdDW7FS;kJf)_QRv zM7_Xc{{!?|lb~ne$~DDFW{W8v1H0SjH~qX%Y*j#6Rr;Mxk(dcsr&CaqvsDYi5`dqK#2cykqKrKLiIlFdU34tzhV-_HtQY3`2^AOg z3%m_T2Vh1d_Lgc8mmOCKmpvGh@ci>s*&UHt9v?n3N`g)XQ5R%7@ei8OJ2xz$c$8Kh#?-y#O`_&4}7OC>v@=^o{ARUb1oi`3fxfG+Jr&q+eiRp2h2sz z;ZwrRox0_d@p4yWZ<%+R)}wn`5JC9rIOsX#ImO@dB|ZLFBVXY?BI~0)XIVIEdxWz;j)H#hsRvov=ImND|3|6@bW| zX@;W?VN5myWJB=Shhf$0vH@)v_Np-$?M`7x+HX9a!u9lv*m4=RIaxA$nlAqxbL{X? zi6sq!FY7GthlBf-Z_YJRuC+m_lhg;{0T3q>(oSb>DD{N22M*$BHZCk^J^mL!LR-;K zv|803EI%EA=_z@3WEkB)5HTT#b|)iKOJ?{Rm`St-Szyxyc#k1MXb9GQTI${9mFW$3 zmLOR9*2lHA{^zJQr+Hj!t^IM}y;TXt2nV`Dsg;FMlUGBJ8)3iMkHl%Low}OJVdxmUp@wfU7UvOl1pV>qBX3k<>u)YGj z_osjC!~A^^x4~Yhs@D^o%48Lihwb6axy6dz_*9CroHBcW&22;gN|uSoitS@ZXiUe`C(boF`#h^=b_G) z46oQXdiQ^=aoLpo9yd>?H{E&c2;Ui-ExE~CA-*omflS0`()nu-`6h6km*)v&bNj|1 zeRI{R@aEl(i`Mz|4fvn^#dP>)>gf;Ey704${68;h{IkFK4-Qw9f{YZ90JL}Md9;?A z&bh!sIoRV4zT0|1AwD?BGupuF44f%nszRR6WWbLO1<6#npnzt&knV_-F5=zm;}^gV zsu-jvBv@d{o~gnJxK_+#VC>d%V++^#R)iWmF?8@=R7N9R6xBdDg!oQG2nAR3jEYkD z;{H?2-LU}aH_7NgmnYpG+^H|BPM~>?c<24_FNj{p%O^{{UEvn;SdmI~bao|Hvu*zwNp#s*bB6 zsiA*wjuHz6@8+eN{Q@Xmype81#e1QC0T<01$VZKUO~67=oft@i$j`7j`Dn+B*g!VAP1&9ErGO@#96ND{|5vhJj%_zln<`m9~6Q?}rxZ zILS~o^R(&t8B1XpkXbLo%KaDx!KXPLqeoVpJNOJR1o(nsN(T=iOqV8ZaJ#mkcytA8 z1mo-*UuF24U#9LhJK1bKe7>i}tL?~|bac3&T#y~@o?a9!W1&FXQa*>zvCMK@g=58! zB46M?T&{-0k4ILmTS>95GY#fO$AUo1aI7DyQ59-j=`oq&aIU7^Q_rU{dTU*jhgh)N zDyehPt_uIi9kPZ+-u2vFGE#UV6d7i;H9wB)kHyk4Hn@({D|J=C>>90b)6s!s-<@@) zFLzlV1mq)S(`_E`quT9 zw4jvt%w*$YVU=4@BWFr)r5}?p=^ZWEO{aoW6#r!qq%o$81=eUD62Ug!3uPyODAnbV z%23D0K##9BH8vx+Z75uIpcEMKUeCg!Lj}T(7w2WJ9HcU7)#N6dTh;XG_~}NPhX2C3 z=%;zr1X)L0+plgIVootM$f$12(B4+G8-cNwavrzonyHHMizTh!d1a?*qO|P1F1x|6 z)g>Abx(NnlKT~!M(yjEZ9675()22!l`4LkCwa3oL)AuMW(3}3oZSr%q+v)*Cg@F#g zq{jxb-S_oF)VB8=^CgqVVAc=>LZN18fLw|+Y;#O213FHj(bbiakovP#&JObYdxd4Z zy+jRL7anzEo?jpR&31Fp&a6J9puO<FkuuSBGnH|uEZT^H9XD<4knnizJ?!}DCpki#Pjgo zDJ$8S7PU=noVS=VvPdmL&-s;6VL>YbZP}{$n)p<=8OLj}U4e<&60!H|@va|J-2B@m zJM^e1&tMX&y{J$p%;`!b=RgP{Rv)2xGvgp8_`ch0fcFh=R?tWA1dD7fEmo2}4EF8v z@8YTD;iM zqF=R;#LN_kIKrA8Bntt$b2RdJZwN_dzVg!f0!AZ|^d-plq8j|N z8UvAv@eJgG!xA2@=+}s23Xw6fc)=YS5mE@(vor~2q4L7{1%dg9IQ{l_9ashI0=DPm zvc+a4CL`e(Yn6;S#1cSpPsb-F7jjM|PErZU2GkBOToSR9EwkOIC3xp@1B61Ff>MLo z-eLO?l`+XfOI?AiC8%RDxlOQ(VbXB&>Y(u!f=buiUXVnP7)@bvEKeubIboFqC|Uyh z5i1ShRull3iSV9@B+%r~c40RvhXR$Ppfkbqa|tGz(<2@6){Km?;>utN;xAR=#1>?P zaiPhQY2A_a>|$?$*D~@s1seVD)XvGD0=<}d{pfL#WBx^QijA3MVZtTzKK%2OS(9=# z`^B0FQ10Y8;zDXt3e_YM|NclNdO=3*5hyg_kfv=m`m)tC`|P2t8=B_Z(DtU^X*xtK zp(>lrJEMQOF23;nXQe+&Y9aysxg*E^KR7!wwl=m-wl?O5LRPkhmj526$p1@b%X+?r zqA>8rj4xNPH1;M4Ygk-dS%l{SN}8UevX*LCKJDLM z;`ES!LWe+$1r|Az5Qq;K)Z!_#(9`H`{5?xI9|WbMNC|9WN}Gin%-ju(H5xk@hpBDq zLlH%4W7Cw%pioGjTX*M7=Z3V@bGNBD;2Y@g{-P2zHCbX!Sq_R1bZbkOAaOLca<7G- z_TNpkYm)F{d{(@*4d1Pt9m3E#aAwSV&Im1st_sY)Ki%wWM}G^v_xXsO*{;o^Ur9nj z1cy;xg#3^}_=T&b1>I|5Wy{dlrN%l_z76EWL)vw|gYKz+P;Wr2|L_lC)MT}Bgub5} zQ}-vH{}aFGKjZnIJx8PrWFI|z(6*Vi)hCs|8vp`I1C}dokbiE=4R9^!_nhz?r5g(x zZjVq{DU}ZRl)=&M%aJFyt?MsCU}%(J%wCBX1^NYu#~@YCc?&$b!swX5GieI_qWI!c zafkq2pRWBO?S5g~Kq*%hK1_`e1Y9#;eIa8zvGdd807k)9YKfBN{eK&j>1NTpo zW}hbk8|ac%O}h@>HL!xud@OmNKu!>WfOiBT^1$__zIUFUc9+&`2R&k=kJRS{XCD99 znT56hw2Atm7H#D8(grOa)N{*_@-%Db}bs>olFSpyI@XGPGE z0X9LuB+L>NPgF67Mg++May$cg#?md$8_bn_u3?@S=cig- zFiukwg*kJ&9j898IF8@mrn`MUfqEFb!h{4k{9u#~Muu<5{V1$zOC1iRP}q(=}FW ziM8SW=3J^(R+<*6)Rh@0bO^lgRCg&bP$stPtkssOHtHtCg6nWSdY91>AGX!L2l^v< zfABC|ZjLElDje!rU@kJwmXkGY+n;VfK_5?;OT@ww56pRz7&Z%*Gs%J3K2F#;yK}Zm zyWFTqrYV-d_CIE7KEKS|(!AG*WT(cK`%vYjGU%?Ey5t#qihb9UDD~rX(YnLDzi}YWw9M|>x`V_Z+s6!gRs-&BP$;r2kXyTpn1_+!dmVU2q2O z(T$(%ap3wO*3(G{QxYzHHS{5t8EY2TsUG2+z?gLbS9Bs+wn_o;ZWAB>7+9bqMB<+1 zuiEi&!Z3Z51%SjWE9oKE$f!GaEYwQkbzeN%Y~poqO3F=VMmjM3e#5%P`x&_*Notdu z7c#@qFEuIy^wOGv0RGId6{LI_Fd&dPCHNZ}FT({bo7b<(7rk4~T~aKb^C}@1P}|$P zAFo%s`(kqHj%9J-rCsNz-$p~Yo-;kKT93V^_or?qoAoLHSHgTWcLtPk%Wf#~oOp#R z?1(4{hQdy{QyGYMooG#HM*4r0CG3;osx@vor!?%5ZEtpEAZ#@hBJrfyZ3QTf*zJ4$ zBT}1Vvq8GtCHvMiKH+Otqi!|4fDGsozL_NcM%G zdIgRnR)Lpf-<>P4lm)ZoFjfY76eDj=m;NF78!6J?7N`eR^Dz9NB#fx%yOC!ezJ+=x zMkp!lJC62n{#L=3YvyIC9^X1>$HRxckR(ozO{f&;0#L}Z!MDlYp%e8YCIqR+ zLD{oWVPVlA(&Niyxm=HG@^sqK8s$nU^bxdpDs1XOfpSMy~)M>#qfw&=)zzrM(NTvvF%0qnCDA zzmb5+x>gNS8bThw=&?=28IX>Z~=EtdRCXxmw7iEs)~?q zyxCaa9t;8#g{E>&;c?f?j}~O*&~L}U~e4KQud+Gx?JIvrz_DlFP3?<_R}g5MU$^5-*j$^HakmZ|^~NGaMUf zG#%S=_4Jeb5~Z0LR{jgbbMI)Dw`S=!Qv~x3 ze8@zzbI_@=OScM~I+^s8!|`*{17a~~Z^OkAc9ytMB$_;qcSi=NaKjG_#@%D(EoK2- z*4loNpweS@dAdW6UMqfhV=EEc5N@PYTt*%-Zn{G%+;q7Hn}5*~o@aP?lE+2P-DuqID^MfIBdLjj4RXN9t}Q+W#7k&ks)a*0hRe zzAdmFeSyvy|Jew7pp0|&7)KB3L)ku*)QznTG=`vJ?CKe2yxNt5&>K}B^lm%q7Ag&_ zHvRxwriee6eTcrZ3`zG)Zbo8xX0|&C*DdDCznxr2$;zVLK2jp@heQ$xFS5bNxL4rV zPr^4qCOjlyS=KJNe5Nuot93M-LMo@2Bo-o7lfh8TCe3D!)dLXl`76eO!xEjFf~~+5 z%4jOu?g%95gs*$uT9%xd9=2XM!=n7UJwwECY!?#2R#X=?g-MqfTpd3v=^^Lf$fce3 zKB^p9v7Q7_0exe9mwVezyJe+oI2r7BAR&U8X}v2?PNI|?CZ|P~=(eNo8`GH9mo>WO z@2uO*hxzwUd5B?0TKJR;nt(9h?L>pVVI^+ufKB#Q}JFVGXFb0!@~hd;OLYfhu1R)}ZG#XJum>%9N8@M+`D{HkmdKnUF<=E0BH@ua!?=+ zgzXDPm5kXca|4wMT@{wJOlMeSGv1F=Yg(S7Ofe3f$KCdd5WKIHX;y!+ipOyXDq#3W z;SAEcK!dH=cTC9cfO~@UTMgK`knOfY!8-Ms;&VmYJj^h`^)DR4#jh8#3wQJ0jfiIc z!QO&zewj*9;sQGvC8#Abg^}wg)|c-?fDrN*-tKYZ$J*1!8Cw7N3JpOLV_rr8h&eo)Zsf38i_=P?Po=WHu$`wv9_Q|J&;Yry>M=o-kfg6~kw> z=d~L+$&IJ$Z-{g8sVx5+(hr-T1*&gQHZNMHXIyY)lDVOlzHPOfeaZ=Vh1p~UdTMqJ zn=3%-g2ih@ci7qE?l(5k$~yUVG|~uSl6VdPn_Mmy@FxTjmMwa3E}fTlzSRW*wT>H9 z^-N?Q>@mF{m#~{X>Ywy9@iNSU;AEqjJuWasE-*7PxHZ*OE6c>y_&^58G=z2mf!qO~ z(5yb7L_U0H_m6N{BUdCG;@Kgn(w3JMT*9vYjz^w(6ktOyQ?#IpwJk*9>l#+P;QJQ&h!UE$#OdQ^&WQkf2Ec z6Et#6v|B)}9(+$Q%=SRG)pqvjEhczd0_`*MXIVo2c9ukZ@u<;2fj&_d%FGR-3wH^1 z@OdR>Bz(iA9%fS^pnXLkic*01y;N2H4^$LvF-l)Bw>yH0p}#B&U`S3b;0a+*P4g#= z{Pq%;I^3Qhh$Q7Bydh5G@LO3YOTTG?9grLjIv-g0UG5L*-2uxu<2YAD+$KKslp1fY z8s`_~BTNJfQ$Nlak3au0fL22ConZRe$tCdk;$>`q{F8&X zWw6`rvl;?pK-c0yA&CJjc*j!swf+6U5h}sYyc4Xj)S`8kT!8t)fe=GOf%trL0I^%_ zc7IK9_Lc!6jI6nux|n8T#C+Zkzr+1vwI2=)b3oO|WUxm)mz<*Ds6OzI505YNivXp? zX!)@k5_~{cPY~!nc9V6>nWCILgSB?Z*bKYYhq5`7()&idPg^K{_4v`- zf-Y1vXQf)oa3t?2Kn{XOe*kpLji{^k5pB@GEZ316zvhQ51X_P7R!4;yeP($pUmE4j zdw_aZL!&-|(PFSmS*6huEd#}txS@PS8IEd`XSTyCbc*M#1QFDzb9K3@mF3-Lte&0p zu&{0@9@Cg#)WI0Nfy7K*C3peh$`!xRHRvfMYr78XE?y-|6lQ7w{AuRO_9H{eJ0hy< zz`X3}FK--ID!TWA;O$q4!ILNL6A2OQt_i||{+7XLbX?2dCaw!)?(wVs3ARGjBVxhw zPLW+OJ;s(-px<=QivZBh|B4(_Y*f&Sg3_CcI*(nSH&|B1SNjr1vG5N36rn*4LbaHF z$1+kYLh1lf8`qI1&=WowU{(78n3+4D~T5EX-^u{md=za zrtONFgaf*c_mIyMV$(urN$Z7gh&ziYtWVmQhwT$%3meAW^(qUKv93!%8`B#!cBt#G zbgWPAP}p00EM3bqrlM73-&qpkte+}UqTDJO9{Qu+F>1VN&|bQq+JLS~i{%qtdRKnU zevd=s-2Y;615m9w+hymUH;CB1`F0i6XT_IpX@jH_MMA`2y*dm0B8?%d-+0(>9y_t} z=&yN<0lWg>iKV5 z?Ij@QX#T})A8ZNn6oKsIVuo*d%=u4P&aF9}dHE+ll7DjLe|vTRpX)LId%yWVAfzDq zUW*^;L2sn_irUgB5BM_~ZswG$L9J+id2;eM5X-uiv-Ct6t&!@-9k3ULpYmZ2H#DKY zb+a{*p)aeKBlb%P8rTNd2I5NKvXNihtg?=Vz-@xj+Ef%Jh!)r*aEejKvLK;(1JUGJ zZhCA@5%*3RtJUo@DI6kZSfTRG6tVu0)2BHsyBl2w{JW9oVj%?a1S#L7pta0F;v{Rg zZ%hI+$@#7)&L0ygG0?DPT&jD|py#xFX`0S~gtoii>wP z#LVM|?SIAyKtN23^(Q_^KUJLIe>h8uHm3is-m%lNKT#R#_WP>!^)1=SL8rr=3FvfV zA|)jzg(nO{zm{6%muMfPA$gPM^~Lr2g*V2o#c)e#568r{?RB3q`NylP6R4#}00ZQr z&8&5WNV!3NpSWvsFKz+2ee*o)34BqR*F+KIu2`SqwP9yrYF(8KLrSm`uPkX_HNR4uHl&XoPSv*+myI z{55I4ebr&SJSJ%x?179~%cI`YeVTeWr1lE!6GVt*3AfqAAL4}pyhxJ`f`%`2yHKBU za7XNj9007Hi&W@6Z!Bz{F1=aDGwSO&yr3O;nII?i{?KtNr8b~+ZgtyYn9)2|a z_R~SNEVu%u2EA6_D?S}vo?c(i&*6Sqt|+TZ3fal*w)NHZh2!6E*f*%v!E7&g2`bR5 znww3VX;#&jTU)d0$n6IIZrT6yL*ofj5wMZ!X47#FrM(T_vqZzLkC%)**f!*BAWzq_KV&-*F}ixcs&6 zscY9gWUx(Z-lcZ#h3NJe4AxI&k3QE)|Ly9@$Vb^-K#_%`TNFh#Kj z7Zs_q)PK+iOK(8t-9?Qasfb6@tXI*k=?j+#6tV6N;qs{ytaoE{EtCiA#eooXy&O$C z2x8rqx>lb6E2*=&O8c%%ak2gd9^cSf;I z5Qp#%aov2M^vpaSDP7Xz5vs5s8i*7SF!Ilk8T^;HJF9#t(>K-e&vDNAxf%ujahw&L zjUC+0ZA|}5?(pB@qd(pf|NYm0iy;2XQ{rFQSE#6E`C}&WRn%G`QBYZ#(xh8J(SX_p z_m*D^0s$^5tsvDymS$BvO5Dz*@6w#=Qz!=l;^WIF`Qd^jSxy7VJHg>N^*ZHBn~BHQ z{Riv@_*^B?ZoL1J^2lzuUkC1)djVu@x^Z^2ieKNbo=VmulZ zy3rcHzsiZT-gL@C&-&wwEWZI7aq z8cpEWPH94qC4@-fF6l}Tzyb1%JV7;sLQNle0lZPEY2Y)y$~%T@chBQmAMIv($z)a? zgg?&89peeD&jt_O*$UL}T;6utqP-0>04X$FMndfqwQF+s)~_5#5br#enMY;{(S@Qc zx#4Y?Xm$5+j6Giy5o{^Zz8m=hh8ics7E76Ed7dJI3bRg$)wPf4cr3A*+flo{F@@j? zv+WS5CRc7AiaSo!f0N@6KJG**mOMEQ9d=Y=wXeTL)&z;hYh2<^W+&67FR@ub!!m{U zqgENrOKou?Q0-3w^ITi8;};y__IIZ?n9Qy0w@5!zg}Ga_GYB1Xhj22%Rukw0FL8hk zmVw?jEsn#y=}CKL6NOrEQj>gGx04_{MFjgon~U%Xm%9zpv%538NqnhKxxw>dG65r9 z?#D|KD1J(IwMU{dys-uT)#yU$6ELtT?))d1_>@TDU6&LZNnj8wF+`cKX`EE*A~s)K z(uK8)^96E~D^v!w)TJ=n?&uJeM!vK|$vId;Y3Ifxnj`^|^=}O1FTV`G4|vMaqlmBW z3RFf#Pnyio`2mIA8IkWltNYARf=*XN5u|_`SVkdcYV&$Mb7|Ay-W7w;Giy=wmGz-B zk>LbE0(HS>vklvT7Gz;#Cv1^%phYqmjE>xn!8ayv$azY_M^o^hfv{_d)hCfVmmt&d z`3+O^WeJ_)Bb-;iONZDr#5j+!hRKIQ$t10sDJ0AAstIIbb!R#SXT&dIMn=kHj|rF`@XeRs0Vr*g;LG-^zzcyvHVtu8ge``0&4h?cL-^ zk^Dsv1P@U7d*E^Vg?g5ajs!(Xne4Bi;qX+bmNeTJui8{E3b9d zZQ7gKuPoZv8#kAl*2^vh<36W+uDRo-1y9zhhCHV~uV1I%Gru#rx;}c5`7_>dSt4c|p z^%VU6gyip@KYi9s7p{g{`Ud~Hh2#|QiFGA&e#`w3t@}o}mGc8bqE92hvee}qFivs` zTqbI*EnU(4En|NDB#JoROx8U(1v@Yin~MDLgH#NM%!>Nnezyw1K7dd8k#|gA^ys zhHYNjlp|wKBX6;RJwB|WejA{2V$39!^dUc@u;|8y%Fv}!ZfN3xbAIgCd){F?dE#jkUEnDg;ib5TA5P!%2Ghl^0+y;^b5i{T@e;TXT`A_gD=hDq249h6lB^D&dXu%gOUBV}E-p!&_tF!mJ~(M|DU7flXn z`_cWw+VBKU`c_mKBY~69#93T$-;6Ao0K7^f>|Hb%qek=a{n!f~rliBayIN@FIpIKM zC+9Jnt3yBytHqSUtkmY>8z`&-F6hN-o+YK?aw{!Tz}FBPXuO|uv}1a$xKqO$<5pBO zsOp)Mnj#2wiI8fi!(=$FduimHrBe<9UY1=*gzt|r=+1>LbP#60plGsLEReVV()qy125GVRQZ$7$jc zi=I*8ctR(Z*>K284uv&JqHuqGZ&chMEI{7v$KAJC8L63d2&wkR`Bcjg4@PB*p=)F# zU9wzAwlp@BxS1u0>D0|3BeSWgM4Bi9Svwn{P*HI$icmHg`4AAb;}970hMR-i%KnAZ zpCt*O+btT$TjhsT(ejjf4#CRZx5^>{ zgHzV2TQl>;xgslX9Yc+=h<9m46}6!0TvJ8Q;8`r^0-C!5s)!JaeYSM=l|TO}Gab!u z2?*{}FXKDWd*m5erxoZlPAMh~hwN-W`UD{F{$}E}k1n$S5fKTtPjyzOqKrv_o9DR3oJ_+iifQY%gi+NkkXt!3A@R9awqN@xSd0P3-kL(Wz&v18N# zg9Y;$7fJL55s833rz8P6rDg#N?H9pp(;F{cUqcgxAD)!_RJ&Sw+LZD{nf-mAen*Mu z@sfVW$_`1#Lhkt~X#m|G2q#@ak_h{^$GLHCga(0|4*(R3E=`1b7JYFq+B^+t^R1+S z`^=7uuJMh9h89suBR1=ag(Ox-basEKI{ka!2678=mTY0rvqu2ijc67fgIj`Gt%eIU zUG}!>*kbOnJJdvP3Z@r^p3M!4;Fn}DMKQbc`gWe|q{%lj)xmV7NYfsD97$>s{!m$T zIeZ(atKGs5dIl&DNSr$r{ub#GIuAcG#>S}KL*k!1nXTW$F(pk@7B1_hO!)(4W{AT< zy2hHF%t2s=ILk+QemWBz7YUYxtAig zSMAn?L8)(tHm>0jnj?`|(nrD-!znUbSQnOLF|CT)1WWFXDT|{kZAyFyulZwCS=6W| z7K0tmk4y zC$@x*$>;aay#4rOx>B01X=wRWqsaSwPlvR{*;&i|HR1)WC7}M1p!TUM@&Xao+*6GD zASI}>gjdzn#u)PAchOwQGOJ2hgDFf`dNva~O{=p~wCU8=m-N^+#;QJLYlV2qsazQx z&DX*bw&cim++-;KQ$636li7-?K$6pz>%5fb)b_ZmG}j$(eZlqg0cVlrn`dPpun*#K zUJ0~a{Qi!a-j+$tAV$&M>aAcb~!0DIYk0zM?nS*?DqPmxxZ6 z&f`}g7?aERu&9$V3l8Z}y`X}L1BmK^?aiID7f&ki*?U4lOfO!LvLB+D&noMzv-kCN z!zV8rY+(xdV_4Og^ia464i`8lW|W(QC1CQUHU&&9ncV%X9~2&pSE zs#a=`^chwzT;(>jme#GM^dbpOh>OeCA%$}Br1h#Wah6^z+@VA+7@*MCU->4Y;3#Lm zLz0hBQ5Y@8I5)+-2QhevteQ;|(#xLu#75dNzI++AQ}@9=lHH}o-=Xo#Y&LgUhrUIz zF+;arW-(_d8X}Gz*{rUq=2E6x=}0GZQeo5-6t%d`$xhCu zE>w8E*qmdY?X6JCIug$F`L+31Zsgt{c9G;|59D~x1T^yLcepf=9ZSk;Y#$io`}+5X zBu=mUs*)P~Lkd3+eYy}7>UR39=!@hM)@o4>uv|ln{*5M$KLT;^jv8aPSK_X0@rk9i zfw`ZP3g{a{Ad=92LCc2W=V8`e#wPh%bJ}dKX|ol?O^fXpiEkLDCEa*gy$woAv6`Q< zK`( z6&I901rADjJ6Y0tI{(wNapeIP!1F-^RbmLSh-GyMn_Jq$IuVcdt0jeXy!UzO*=Idm z5mQtowfugVK^s~Eujn>le-`*GH*G*!&IXeLh#2826?+k<{%@OMrn2iqoN)ip$V%`r z<8ZqN z`S+u_8nIu1ce7Sz++b_0R2Uzuj~&ngVxT`RjW9Ql4+>)G);*|c#UOBsBrhs>h3SkV zOE4D`yFqA;#>()8sDkj50cZCFeSqnbP!ZDfG}C`DH-9B5a2P!eap}L2q&8wd z%l9i__M%gXHU&tDfNKY8m4uw7>-d%(j2>G+}X%nyuCf60u1sO~6<{H?@&>L3$*>F+pP29FzmPl?6 zS=(9j32e^o=(ZyZPoCf|*ZTMlj~|0L=IML}>Tml~9F&P5X;Atj=X0krkslF)V=AUv z_+=3jizntFw+s(+L2I9yed5$$dh-*}q1^0*j4>}y$LJzFui){I7u(a!>+>8|%cMtk z*Y!PiArv}eH{pzxL?Opy@HD&lB>k(-^PC_E)RRDZ#Ia+H!Z@j(F;S*xp73eM+N;f= zkbKXeX&@UxuBVy63q4JsPx)C{;6Q#cUcL*+f(i>x?o#`cVkI~25D_=zSnr?26!tkR zK{h!DITWvNa7AHuP@x{}GbXRYuCd=)@8CL#^Pxn%)j-x4g&E6!DJ*KQW&U}L7q(Sb z#kJLIORgq-rOE4WZ)LR&`Q1a3Vyis2;5r>C$- zyu(wTrIeXnSoa?ln=t`yV(# zW2^{dwm?%`8tef_L_GcC!|#8Ef7T9Z%$)p`tq^ml7Tto_@OXTSeCv-TBw0bqfK6Pcn)06jCbc z?ueS7Fc>R**erKH|CFAtcKHP|TS%`*6J@wH>=-5mcW5~4v z$TWTj)g^5yucT^r}JyN*Ix$NBTg6dq=$>pd1DZL>Dw56+J zF<-UP@LWzolxi*#8*7#zItD|IrJs-ZwllA$}HHEhZb2d zXU@OjCNUjzr!}%-3n(|(gr$G|O34d{%_qQ|fR^NSZ(aSDa&jjN{>be&AQaE&pcfa5 zq2OLBe3Tx3Y%6@+QlQ(DBX#Jk3CJdsjhMa8dGc`eR7&vEX6`PP{HD#N-{&Wcx%c8=0v z@KB;LI^{0weUus4&|*&iBW!p&e1fnodi8}2c0Fwd^NBsYLpQ_H6>#;E5?ZrOsI)!E z@r+A&`(0Cj1m?sB(xW! z66h|@C144U8jY6iht?HMssCJC?N??8j#m(6CPQmZ)o!N^AF|Z9!+zXx$GTv1mN$vD z*vCw*%(7q;2LkK^gz9-R7qyQI-!N|E924w#hJn4F!k27&a6x;>F3jNv^hKN{f|vWq zDFPGw=(ThrNLvteA!zF0zqXoXJKWizo&+TG$iAI>oXCk*Ani4-V1wQ&7q32AkJ8TRM(}c^^XfaqT}tl`D9OBH6BF8;DJ!9b}W( zc_6@&HK4*AQJ$15P$B1!PuFL;m)H7$XaPboVpGhWTBaL3q8S$ge6Fx@tu`O+5^s?U z{6nQvTzZwm*|H+f8wNj75JPfD zo;;X!50DF_rsHH_!$RE3hvpTMm=>8EZ{1Z>~xeYgOr0JS(mB&Og`g^a}$L8So zLG6e1fvWH^+N6NWtNGutv;gi0>$hm3EAAjU=8mRqJP-Cpf7?Bt4+f`dJ_qD4hNmws$w#@be@wyc zno!XYA%Fey{;`7k-~QhJnrT(5b4Y%jX%Hyo!A;)<_*;CmqREL#|$ z{WAL0gVBO(vFRXp2i+t>rJnB<8#^>6ZB(g47A}%GH^SE!F6_`PZ|msaZX6olAw@im zL&wz?cNg8F-a1q-yJRoBcq3&y1Zp>M2e4Q=C?dITjyV4>%HAo+(r@kZt*S(&ZQHhO z+qSvVs>Z{qU`Hhvm(G`R}vyn#txB-x%g!1e{f{s^+hjeyl4Hy;uY zBZWD{Vt17$A->3DIEtAwe6ncK)f<9IWu6o77uQUU6q&6h)7Z)w5o_OIa$rwIRAZJ| zm$BqCX_F!(|7|?9Qg$4o06KUt7D z25ce{{yQ#-dXMuqwW&z^SC=ymYRY2l>^?6 zd?nkdVq+R{!F5FNVLKy_ZYdZs5C7Qs(Q;=VLOQPscVFr~yv)M#1Y)N*D8|gZ+lM0Y zOnKqEG?<;R2_c(eMlTE;yr^0G)Kz_>SET?I9>T^#M@Ji!((!DTtM$@wW^|TDX_WIr@IGmv$Y+>j@?J8H zw~b>3Jv-@Hs-!`?zR@-DR{F|Ylb!uWt5Svho%mnYjckOmIOAz`qd@ege%RQ_dy1#^ z>RyYM&ka$MH44KN4JT^;P8_eXi#O<=GW|zRoBcX`fHJ;p zZ~S&819d~eOLrtC$Qd^;oEHLXeJexH{ca1RXmbt%C5pMT7;-~*9_Tn;nlxQp>egO&VYnD!pE>k zMS?H55XT|1nTwXwLllS1_O9o?wi{|5NTyw_AWu2##3GvZ0;-5|1p^zrJYjHj;X}kG zool;Ay+-pv<#IQ}Y|b@RA3q?t+c6;7g+Swi+PIj&j_Fk@?e6QVe=B{8XhDZ7c8URD zLs1zeGfJmU$ZfdJNq3q`5fHKz9=(#o|D>g1_+!|yR!Q}2h9V*Jqp4B%A)AKI_SW6e zifAm(kNZR_Oc7;JeWs{vLYhsrtLnf*rKZkKlSQ1doWZe7`_$H1bYJV1gUZyt%{Dc}^Lz8TZ-NJ1T*wUnCZ64833fD>_O@5lPNWoCQ5Kf!r|?WU@)T zUVondVQ&AN1(Jf4ca6!rd4p~O7*Z~s=j9qa|(iR__fmwN>$BFm=^vcgdm4)rR-F+~d z5JzFE0IpenMY)O@LkhD9%j&=<*B!rF)qK|_@EDI{U-ifD2^QA{=#XdGbt0VN!xohR zGbH1M&8RcaVd*Hz8x3endZ-UCwLOFT`YPAWqCb6O25^a%^wW4OHN(TiKHyAX^<~Ph zbjB-f!MOgoZES+-=B*V!${2X(t!>4j4YlepJqOw2ygB;#`=A{`u7^4^dnEPrhctj1 z)IwtZns`#ACHwFrY{^mP3>>8fD^6uIEu%LgC^`v+>+~@P6dE{CY86R; z#<%YcFd(_9&XGlI5W5lM!u86)7NE7+lUnHxMS0la;=*iam)s^e?{$&_T~&J=*~ycg zuh>T9wHb1};d-?pxlOQ;^N7aelrJC>m!IiC>ng5Q5tIPJ{Mckv%QI+?kU-yIP&dR1 z2nsB=|1@{?yHODOkUz1D2{FV=ig+s!w zwk_c+6@UKZ+#@{%69q2<*FhNiq|Efau^z(Rj&@jYX8^0D>v_g^@$hJ$;#pbie0z9% zll{>wCx#ywi_)N{1nrEmWG^VrDjiX158A9iXSX(hjp!o1ZYsvUH;px=45xew82*JMLzHx$C8pl3P)58F|ic^Cv} zsUtY}(z+FlAOQt?5YuQkZmO_|meU+qo5@r-sCR`%U zRU0NHa|z(gzI1BTY21Wk?r@ae>`Lkx)KD49D5@~m!<8Q?-npe)&oArqV)g{r!l1(> zr3BiQ5dX5`8%h{xcK#N$;0Y-tEYfnK@%mk81BkS}7QE9bsR}oLwT7@4x0n~cq@}f) zp9V(e1yO@i4wx1o#nGslw%8$JAi+_0jUYvl$@jWP9qZwW$bS3A)`d=C;`d(azvNR$R}uZfiEh!Dh~X1iVB=OtS3Yzvf+%hcedL@GM%ie9Fx!xFG`)mWyIe;} zb69GM`5Wmee1{U3-6#kfi7ht!929Xx_Ls|-CgzfQGL}WvzsF4rxS>FRk@`Vln}oXb z8eEHDC8v`o?QJ1BXVCZ+RN!_M-s3B**c@X383TF8{8q)4n^&&`bJ7}NM|!tCbe}Z^ zO4n|84#rf_(Y2V}dA`G$?Qxr;t|7Z8rfJID#C#TNsZU3+{j-rsW(Q|A0>yppYNsN1 z(;W!aOlt1+XJxiNgj@ar?7u5j>~P|D^|w{W8T!W$n*R%>{-eK46>|?nWh8G>ZIUrC z;<#TRxr0h`2@H5314@NR6ux|tiebpe#qsHY)$i`eLX9V{t*7kVkj35n=xncdX`U<( zQ?WHb?lpd^%O&?w$CbM2=gZZi?vK|S!?>g!w5+}rHif^#wTLfhlRF&|0ueGa*WKar z5kz!hlJ*S-RK5g5@iEtcU-7OaVdYw9>gZF#H=amK2(CZVBFJszqVs`C@~QF(a!n%^ znEL4}BzhN_>yOQG(x;`qD;Am{rZPmxo8JL%hs{&N!s+3X)@zrZG z!%ouh><<^pv<-?*U$ZRK7#)?FaV28z7wd`;x8G|n zA{dt(jnM*Jd?}%Rvoe?vTV>N}88X+_N_m3>OiR)(O+EztYP+Cw3Uqg;P8TY53%L(f z|E>@d?&cWGu(uvH>eCLJ4>&}7XXlbSZYq3>>ZkJ3@k=0mTrhr)C{mgv5wN}v#|4k)KtRD_8o6Hf?XZ?xwf4WXS3hw^V@lc zo=N^jc(fv-$ra8(?1fU@FtAuCjY~oaKIumQWym{jNGp7x_VrOK~!xmXVtC#MNdZk#e@fMSDNBK5)2Q7a63eGkR;tVX3XGk;=fVDtxno0Cb>r{ftfyI)y^W-Px-d@n3 z>MC^fY_a3^a(R^D;^&B>zA|r}=JjXIT}KR3W$UETa>EanLYhuC2Wz8*SI5Hd-nV4g z&no5kZni7;3ilNX`WXtPRqIGv*g6dZMV>;|L~^D>EkJceI!`M8NPlXUbkmhwj=&!; zmqNZqVN|JdiDhX$zvwIU{YbWh2D1_780^$E{Weh}H>z$t%(SX{3}K@!2u$OxS+^)x z4DD`;;|sOle(N-AljwQUwDIFXBCZipTYF0R?1u79Tx%@E4W?C9u|M5gFU+JgKDbfK z>8_k{+LMEYJX!bM7hhGDo63&OF)CE|8M>6Nk-L=X{tP2?g&`t5()OaQ;}@X}=oRS~ zC)zSfDkWzbdCH2DFF#i3#$R7KWaawm$z&DQq>fAfj%!`$Rg<;%>>9efVED7H)@=H4 z5o3}j+;7N^&MUXidH@@l9(gb=e9#`>B}#a_t(fI3&U0&?2ro$5c9MRF?~Z{N3RXE2 zwW$dR$PSq7zuy|gwFg6gwS_gB_NT&g?quV8LdfpTEu=K#XRHjhy2|Rp457+7IclqI zp|eX^H0;x3feV!Q0oUH|D)fT{SWM`gO)UNgJA&Ub@T`DknE$zj4xeMw6N$#B$XRtv z@^(`U#xv-uABl|jbHy?ZWHNuuUBW|(fZ3fE=B)^Or5o}bAv3TZ7y5?H`J zO^(+^h6|{{Lk4MI#@z-YQY)BpZ~VhDAQTfXN$MjCyBlxRO?)iEWMp0Z=QpRf;j(!9 zGLOzhF0PF(@}rWTD+Im3`~zca2eXYEIcXE_Bx?x$^=89ZYJ2qS-?#pZ{;wvQ??YPd zHx>f#KeqmVvf^UKR^Ld7|Ji|1sH7?TFFS8#1$8){pO*4`nDI}_kfmLaFfuhJduSrA z6e$_soY1NTdfn(*;`#e zDZLU1;u-OhEP+UqlIEv)MT#wdQjvn}YO#~;)ccX4RhjqEm`Jq%esl(UXHdVJTGDA4 zppdezsk59~n`4>tdu~_`DWfdI|4;!6_gq;vf}pgO7bNyuR{Rh}3T9Z&Tx9_M`UG>c zQ{LsZ%0WV#%^-rNy*C3N5~XB*q8-Px(k{KtE$yQ~qFI*7p=NXVo?at&LNP9GXb>DQ zs4Q-9R;H5sqZ4U;$SMHU7GwB=Vj^Z6^*k(dJm*)zo!h@RogPR~{;8#@t4-+>Z#ISd zsmA&T7R@}Za;y5zOElV7c&ezU{=0~Nl|x8zutE6~UUN^y5L|cFWD6L|N(vZ1h$nGV zS81+HEOW?x)%QTrol@YNv#PEgq*={5Fma+OevXt+)Du8|(bAL8RRbrf0f=^YUIjAa zU*|!axyK#^Pg9ZMtzVmh(m8DOWrpzqr^kKz*c&NJx%vKe3=A$l`a*&_!@q3K)CRDj zU7V=&Q}t_{WaWUaii>w}kd0_9=2nr2Y4T;JRvvB4+%DUMB{2JANt!Vrpz^{(h3M{nXHIpc61j*1TZo%Zx!ysO#d%=%C?i?;ukg>l@=q z0$a|ksDx{^0kZIIx3${g&Okv~X_iQ;lkv;AE7aPcjOIq!pC}p$BAq2(WNgv)Tm)=@J+oNV1dt&g)yc=Ir6)&_gw5fn#j< z-!oOn2|kw7q-y5%884iGjIB~u_FxxF)zN2|)s^S|rF*pfMW10Y7=6mn`L}S6eQ7z^ zOXNj#PjpT0nnF2lA+ISYk3fqZ?Ir@rD`(LDyEaM#Ic{uzhA|!YYs@KT9SJ|Pi!6r) zgF0$@#ueh6!xacl*aqI)r)Z>OdSQO4!ZYnG3BxdfrW*;31Fm>1wg~w6B(^_)lZm$h zGv9|+CPo!kJ-i{Q%j;WRDfF%WDL2UWgth(yrUx6cQwd@2(y5!<1*79eP@r^TwrMOG zB(SMhK*o~OoTi@qyT}|7Y{c%tG8%Q8P1+5%VwMgDs2X1I>90YC^j2aG0Ror;oZMRi z{hS7$S#&&lP`LE|vJe|3`13P`PN1g9`B>z3GWHWYTr_aW_qa{7X9Vho5TuKl1H{U@ zK^;Sh#(Dx?=>teIzz==;Mp<`%XAW@wH-BdT z^Zp9b6>GuiGy0LfK0nrKL}$%XQ>`g5Rt4b~<~{+szGf1?NK=H--g3hBg9Smg`bF3V zvKrSa(t1Jz-&z5FNW(WL|I(j?k1wg#-aKAFY_^CffFz(Gml~W&oP0m9l>g;w{9KH( z(kiw(lzJkgHQI6NyAJe=T~*ip=mzlzO-v@Jh244&BAfm;#Jd+*^Nu+5mmc1qJ&sTL zPTuN0$g+=Y3HqSo`8{)JU8=$0>xd}YK)E2=^2_RyEFYRd>Sxr4=dviDg`0}gm$azr z@+U`*uH4P3vu9T5Ehf3ZJ&Z2n@49%p>6j!#AD zPg>>+W9Ti`=|8ASW3Ye@gXu8Uh5Ij+Xx!8Pn)g#8W<%`wT&MYd4W40af_30d@gA!7-aZ^fQks4r6dFlka`n? zNZh0VD_0lca3G6`Aqwg_7b&O|aifo!R94`%(8NmYuL5_iwo!t7J_53dQ|1!I)YpPQ zAJtQ_YrVvql$;CqYWEXsr3Ad%BR|(aft{!>S8>OTg3>|RSXbsPi7Q<%87SG!j5R}jbDLLm)iQ@UfI~XwlG2-U^H$Pshw;*(hv<>g*XzeEo5P#Na9pf zk(K~)nY9s{Ve_I3tMcpSUM`G=*}%(d1D(U2P}X1qYH`w)%>zkPTnkY#p20zdO47BW zER&_H(Z(HdCm{@aSxKd$dP<>f^wD^>mWOh|&^S1b$>q$P`d&{37{*B$9{~^}!E3~A zY&Fp`2BNqK%^j8`IhQK*^#~wHtOsNDpSTPrIlzE{c$^xonajMZux3GAkzRar-G||) zE}>>|l+D%c969*L_l`vYWM4t059_QgIT9xe6+>M;7_hsYO*Se1Qf~Omn24L!F$Y3~ z*$`L9ny^0XTjzj;u~ezL0+OmSFEH-=^{C<{$wG8(;5&>9?x|!0p6$Ep12{#GN<%oW za3zKqbo_bK$8VG!kQw8n^MYh!tt$LgJjq16O(3O5U=GsUgIjxgCXsTx`w8qP7Z*?P z%@uWdC`d>taF^hzc=u-Smz`j|Xwt;nx2v|^Q5Gj$C&TbQ&t**dR&IEt?u z3kzS>=Sfjp3#P`T`Y6xzRQRBFO&18nOF(DO8D;66vtA_6!OygFD&(_rCxmLIBZh@j zkf5d?Q81IZ*9ykJ6a^UjhIY0rCIq&~)Yt`O2=G2R1DqPJN?43zh5o|GE20qEj3TI8 zNF-(mf1xsNCKxMB*x8h@-286Y7$BW5IH>DONK`N!U8xIO+{}^a!jkOLu&83R=SA95 zwY@yq90i%`y|Ch0O3)xSTWF^_J3h&obwk9_?}W!xvC+@uR!r4)94H2^P5{~SWSRjFo`_9%#z0D zUchv5G+^ZR-x~fcmPFaT`W$=(X|t) zpK0!-$Uhy6eXL+H|(KK z!zj99?QSjAQ6#3JjVRl$yG?XfiN`hyg!b}!Npj{e(v`vnXkIjETFqFL75%mHpr=l5 z5)T@bo0+XLyPR=6;gW#t+*Y1%6pm_1ao3PzX{}TEJw9jT=*Ay7=u8-UVSl;ch12s^ zncyh3QX~!aF4-xIGQ-*Y;C$Y4I0~)J$qBJH1_QFoTJ(r53+r$-LvVdeHEX1aA`$0a z6+s+~g=Qm9Vk=+m=H*;yWlc+l9G*O`$pwl0_AniLx&PSL7xy#+gKUZjaS=f+zp*)4!9mVCfiT+$%lz9iPLWzuf zxtr@Sx_%gyG{Njwi=vb>{35r95lZCsyxdns-0E||a|gbq3q;TwF`c&2^40cCoBYx7 zyZr@5AGXSY^gCvfjRCJsVn}xLGQ90!78a%46a`|XnC6YCx+aq1N?WDx6Edi(quE|= zEdx-$mwz@@P~#Qf!IIti?(;M}SA?inLd%d_MtckfsuI{D3PfFnpxuTECs&%~8NeQ! z{tfA^>5g0Y;dr1=vmX{EA zOvnqtA@@RZOmzJl)4!Ju{af@}ysV5ZOoBfgK37DCUu3$d;6*{}>A6`uFj1f9mVy#> zxhX0;rz`L)`*pcBgkV8L>CQk@CvU&d;xE0@jE`v}+Ke&$7d_S5gf)h!9Cz5kOh}cC z7l?921Gb)~5G;>xT0UDlFm>T@O&Fqxy-dM9m<2M|I|~=iaI5DxPCjT&%StH4w+<{F zeI_{%Cey43B_o=qqTks{7}j?#oM?}8VujuBR3zx45_m83{EKb}0E=&*K=(qhb8jqg z!Y*(u{Og3c@7UPy2pKB|F0Kv?s1BIClwSjbj_l~p4}i}=)fHTj8F*>j!rx{u=d7o; z>7Py%q)8OcxlgiA-n>K8is@q|_=*!Ww7?e40&NGIJY9!m&1fd*KR2|XCGN`z>m@;R zPY%7y0pcY=0ruWBZz`xOP66gO??}s#z(kcS81aF%vx(P_W}q3eXx;aaVL4sJ-tzNz zYZ^p4%Os!z(HI2AXD;cYQyRd>JL#|9U6n8+KlxzR?=`l6b8an78p30xh% zMgAGsspk8Palx#i+42V34K6?74BCz8d*Cr7**!DGHHg+V`}%wY1J74Pk!OI-8yNxD zWIdXQDWgu_Gg|Ho>iVZHLK3bh>lXJj)yBs(5zUypR+1B}5-DNn-i`es#+%%GhN2Db zC~z{n7i;a5T(a^Kx#S5+VWZCp7?sVPmIvmOzKfS}{Ay=)<)~#R?uA6vZOe$R6K;fi zu~X&iQq}Itk7)W|(R%np&mU}v-#+?)J*Efn zp?Ng>=Yr-N$kbvR;-U}0iVEgUSlb)7>HQiFoTh>p%?-=tIM(5K`h}I~xll-va)1PF zFSE>_-%RU8b|Mi8xdWd(fH=^g`O1}vB5S46`6-tjL}f7b{@<`}77`zQ1zfzkADMlV zceq@gRght4?nNaun?adWO1E6BBB~M|UObSE%B}1rXLKm;rG=klG)Q?A)7LReheTiy z9@rMD2>9!LQYd<#b$2TT>+>c@BomxIojiGKTD*RvA-q^tV83BNjGU^y?sv8c%`Ul- z5(F`1KW{p!!nhLpe1j053^Og#5=UcRkz{ua-2x$tDYc#{$s?Xg9R?zyB(}?;5>3&? zbPb*Eo#x|K@?Eo1chaFIZ0VVF^-4O$*1dwI?z9j6ujos34fwiyez>FOKmW`kc;gLM zUAYtV#O2MqXVB@l99TK4gR}h$@gCr|ZCZ!In~c&_JuLp#0t+9k9@mF0)G=vz*#KWH zza`Opfw+9wATQf>5$T!k=l@zCHt}}BvKCIgAuK_LZ>np=*dFci0#ZN9f7yn~JD6=} z^XJVS+ed=jOjItxD~h;Zt5-ccuD#5{QKco8vgEmK{=i1@Fw(KkMszWl$y06qo3f>- zHbyrmBj94z8L8`Qu1gchDdpCT$;#Sl)yS}_?6Yg|%j@JrO-Z-0E%05k!3*Q(mOsl( z$EnYB{yNBGf{R>lBO#L>_`$*MUD+l?)-85{f%1>4eHobBN@oR_EzW7H@}o|1Z2X5M z(qXtSu%i+w=bkYrsB~)_#Z~%!Uu`cCu7a{^J^$*NC07F|OH2$|q4%zkt$5k$uPhp4CWFR`Gmqn{PK@Ojzw$S2m&t+ z8$oB}htdo}(MsYKDiRqgk{tlxv{($u3(Cah#cnOyeJ!FWdI`7oq1)){5l1QaIot6|_1K>F}ZG!CWmMWVkk>nKYm&`ok@dZ);Of5wA#RZ>tf6Nb&Enn=xg z4aH`?+AB?V;D6=_p~my>-EWx>zhu~d+9x5@W4VGxdb{9>PNqKm?Ku$zy(N6H7ikRi zKe+ezj|VNkf`i$sUiWZ7^(`7LxHbDWz~#5he^T}sD^fs(uG&>Qi|kh4usZWAX9d$2 zb`Sbz?9knAtx$RnsrybtQxFJs4apn*4-5ha=g z+w{GDgv6B!gz5Q>_P9ToMW7ijvjkn{FYEYev$T2=B4qrXG+jZj3ROSmQlj^%id%^I zWM1xn(rNA4lz}xg zU5+)Z$@r6tWK)5uDYU3uSk9$~nINj!R9i(!O${h4tT~?n_zpSaB}`XppFl#KWmzvp zRau>>skb|8lE-{@=UcwOlM;SdlXUaKSOV}0KXK06|c zUI&b9#)ImrO_i=vtj68r7+2g-!=H|k(Av;Ky~3w)X!#w#JwgUolC*fvA^BHFr=yaO`-R4?f;*bRP8rt>!1Remq` z^+DzM(%*IHKpFB0x1x{SN_S}_c6KH!p$SyK zuxZ*}QMXObB>v=wD2g>!~s0}`H^ zpll$rX?$9t7N*TKN4Kj78uZ*8ai<>f#V1uBXDfyAF`XT`l7wZeG%_U9-X7>RdSwZ= z0>r&UsJ65*mDV_z_+8js>d8K~d9FIH-nzZ%`Mrr##Yw@MSXZZzmQqfA?gz~-Mr2}J ziMOouE=Cq8OOxH|=(;bT$iVJz_zB=;a`YsflQT_9Z9J~;-1AAMf0`*ks7#I&Z_KkL zN{}AGK*3voAir|fo6F|vn}qFQikV$9w9DDu>ZDn$#?dUdgOlO6^W!(*Pf%kbPuiZl z|7MVGNBaIzVPMLnv+Aaz_O8wGSBM5mwtd(&jP2*18bhB()A?aYDx^W@o}YSSbW?GF zA?)R~*`%?94WJYKHOID%inG?0d%pIHm^C?ZtF&O%Bztwkqm}EWwdDmj822j<%DkcJ z7U{tv2{$@c8uQbP%gkPj$*A~i9MTh~+(;~juvaoGV|A0C#;6R}Ofm%0*e1f6mGS5&n4_HES#5p3 zEY>+28aG8Zs%&suw7<`_15#~+?|9qGK;9nu3~w|O8v>^cKA^)#0we+EKM(ip}2g=wG(toG$# zbs;ZN&RaQaS8pF&Xf^Law|>)n#U?6y4d2J>l`6x~ExHO(N;n{|h9;O>s#pJ*mj+tI z=Avkp=}_0!s6LPAEh|8O-!I(FD?qBZKtPi8SYpE7V83*vH(&EObrmMTb9TT!F-l6E zpN|`5@N)|KKV;Ocs9ChO^(as(U29yJ(fw zRwcnlYLD9Nlc_vxGF0lKM`LIPy9p-cEG$ZC>#zA|hT1H{p)iDk;k5}Hwr287R9$cU zqeLA72~?Ad7YEuefCMtNS7!qd+?}^hps$|6$ao_*U;u=%BvO*W^1Qt z&Z*Tb>NTuOof`+`u#;BHn$S-z+#mOU%*0D_p!=O3LXH(77$&6g)ZStt3?qFG_q%?e zE3)>xxR?^mq@J5yt<(~;C-(90vR2Ry^D0p=Goq6>rH~Gt36IpaOQ8&DS9~rwA1ayn zA-E%Xa)D&_{n@NpKtY3HsG?1^Uvg!iwA?L7D-fazxt~##a3)uoJgl0d0|b%s;pq}J zT|^!!?J-ScEURSC9aNM;ek!ClGkf)jKirZ8rkhCBXi4iEoNSFy5ni{wlq!2Xo7Rs$Hv+sEl3A(>B5C5|3!Jb@kTXZWekgK*jKegCh z!`CY|6RSnPiZiI9Q@U5U^tK=~I&zRnN>^pg(%8(mf1Zw1Mvzzx|{Hp zrk1j`?Bx9Lj>KjnHwSPN97ky|3mh|rdmpOLWyWq0e}yp4IM)-m(S*1buy^T&Ri^LN zBhhdqK}L?a_E&%zC`wDda;@-Ex&Lk zi8opqCk163zpalm%&z#^nRC*?!iJA3AdMoGZA25Gn*U z;iXt;{;d0PJ>Qj%j?4G&M^toAfl&?T>Mq6&2W8#}Ig0&PQ0&}IGenjRg z0sO+8epFFnuEre{M#}WzH+};N!1uejUJfVBF%4fX_L*m>)pexm1nMBnA;-NZZ!htC z|6G`UC;BxEhb4TFw+HqR-5UbD>(omxls2pk{cBl9wcRTux`=QMXZ0B|dnEGq$SPYHS8r1Qt zPY|>dkHBXRRK*!6j2tnSC(#FIo1VEczIp`x&K(FmdcjkV*2CnPxP|&3T(CJ_4E4?B zx%xUq_TdEV=N8>9P~v)r?NTq-Il~eQQxgE#&PP1g;#5c+9cD=713lS&79g)9X|s!( zB20?wH3U`F+9cyxqLAoqbZyQ^s4rF#ySwrgnEk}0BpG^yWhd9+hZuM7^7A~;E?lX8F8nP*m8p=IPS}Isu2bGyPjN(2U;I?t(ofV6wzEwyx(R9DtKZQCgf6Y4<|5_5N zLDwYfRU$Q$?)2u(yF%lV(Ye`#<_{}8Bm^UFpQxvaP7Yz%0_or=3l%~;Z_vY(lGAUN z#cG2A^_DW7h_n&9P)bapWN8bB-l?N zb)N(%+nPd;!VTNRt6O7c!_DY+x)euAWWp_Tuyx|P0TSoa@-j} zmWzn@Q)3Y+GkU`mnf*_|%jztw_O z?U21gtTWDr$~CSd_a7-Py<*xJTP*Q2WgWuA?LnYs;#8yba5fYGR&;-P3UU$X%7_Be zB&=Qieeh zz_&Aq@K}Tw=evQPH%ax{Kw-a24ObblRnLI<_&vC0s(mCVd_~wiI+9fD(wTKmj=zjC zc(aGS&_`}PK&io6ylar|Jh`HG4?Eo|6mX#PzV@v)!`=L>h{4VG)O261{@WzlOadH7 zd4e16W$T6%*(FEm#R%t;OD@!$Xe42l?dE2T*8{EkTXA@rJ**1+i_CbhnVn+1De(hYznRi@Ce*PD4eLbzTh~x;dZ_l`vPu;)2{uWo*z@*5@|b zhrtYawqRwY$hZCYji9|)jLo`-vB(Bx= zAhy({rYy83rQC#?F*;-&Hdp>LRgg6?aWn?}tHzAz|MUF6 zh<9WvTK<>gkb7?FyA!(v7RL`vg-TeRc?gZ3q$o608U$FOfB1wUFmMCAuATU{nkhEM zY!~)g6n)AncA6~cS^sj~<8agQDr4j2=I+ephwM!OFr-U3S&qdxH{>1hzTK9{xvtc4wND*HZp`q}3A*%ZbrLSk@ik2DT_toU?K@w_NqRzDZOwswX5C!#xL^n0z z)IhNw&B}v;@6AF&>yD*McW!@G^IW*m?ulHFH)arp#WO<`GGlhLvoR7j`_K00C`*V8 z$H9g0kR%Y2MGyU5kg|(Gob+xO!OQ~F0&R;{4!br68Kryr>rC#G%V7?{zIxo8(RxUN z6hNr1mA{S|eT1*p_0K{p)|o~DtWpA2EQ=4>2aZ#1v$E>rd%*_mAfW{EUoT^QalPkE zlTH|J*3M$S&L%p%vD4n5+r%;Py;D6J%>)(zt-}K5)G@DvOm!i*8yPM4{o#zS#!hCk z^|GB6l6)O~KwQr~Z+DD%H^ue&7Y`vRiy}uCluO3KBo3{)?=IEji551wX->{7Yt(3V zPT=sbyJSRpJWa;#*JojAkRckqKB}!G5MRo_dh;Pe!>ks7y)1kg`N zA+JNU)Yo(Ry6m|D2J65nM74Y`**qr{YrTNK*eG=o4NJcBjMhS@sUqs@Il6cb@o#8L z2`5~W%A@{L*G#d!vC-j z{r^_&Khv*BCGCF}XLaEjfm}n*uR@I)>c0fPD6;X1Nn$8q(GWJ*X#~}mFPYX#dvT5P zLd6I|zbk5uJL)&|$P=y|Ob^F19H%<#>wP{yU!nDI&c6L)P)JVq1`_=ZQD{0u;HmPx zN@MECIuZv{Nkwy&YO}&t| zID;-vE3c;U@21r`l&p<=uS@Z{xV*#VLlL@!Kviv%5akL`Q@u5)P~>69eGL7Vi#{A^ zVtYPKFXQ1o+_XlBCteNYI?u0gbwV0q)u;)Urv&<} zEtvg>pa#!Y(^hE%QLh6{ViLl%VOeAKpEDgA!>1Y9boXL26gvHy4EBnwkAm@m%1QK723`xGSaX%iI?5gqX$7QH3us^)$)#DqyhRR;qdOD8+CY+vL&ed z(D3)=HnZuHr9rbL@G{axY(nKu1Mtdb^r`Hk`<&8*XhkJ+U{ZL#;#_&^9BP)|StUL? zMF%Q4Oz=UTST(Io>MTNy4nC`Frae<3Vdovr?4BrL=LY0p+a$8#ULFrL|;Gueb;97ysYW^WfCADYQ z!?YsWn)hjA9w#k-o!nM|a@yUn(s@J8IMrm=`AUtQH3>X@fMKk&qf9IP*9yGaXN;yT z0ykZ%{+8-uQC9?f>q_{n5Kh^+$q1i*g=#MP?}*DJ55CjRx2Z zoLcIqE^yQ5S26fFGUBM(3gL?l+iWcCjam*Pzi{c?ZM5J2pzIyIEbo?W!K$=vR@%00 z+qP|6J8j#xZQIVQRHf~#m#5Etqx*dC_UPMV?EMFQ8jQ z`H)Se{43=jnCj9dkmeQ#xZ7eFHVN-l4%64=3}S`NC5{|Up4PH09_kxfeJv6k zb8jJ&zc|aYsa>9Dey`Y-4*eAeuIVnNMKg@9sb&m=NVW?Mv+{jTL9l<^{ly;B|G_k$ z$6h}o{2j%4q5seQfdBj^{tbx3s#;3eCK$dw#KuDEGTKn;Rs2@w$Z)U+^gV!Z_QZ`@`a@`xQ+h8J(1+fuwlD*8OZP$b-EY301YkLFXPJ5 zs=tczH;p3LZ=xgqB3k81m3jdX$c%7?9y;c;NqOgH!aQ1AsBNy^m5^F`ERzg33!>+4 z5KDjNl*M&#IWUbbqqJ}(ZBbl1Zqxj3@zQ35;j9tuXtVbz?JUD2xg(8RH4#!4 zhYYc9BWuMOF&wsl%HHqqJ@!1GaDrv3)e?NR&it50ul5}LU6Z@1GW%$K6(+=DDR>VL z)BCaeMExka&Zxm*X!+vZSE$y`VfMPfxuc3dnrP3go+# z{$xh=EklV#mUs~qg={6>N^tf}-#mKd(j8Rc1Bs+)_4=KS8+4EefRp_--~(?4@TD2Pt7H1B^GVJ4XSMxeibV(K_Hf) zhjML`G41g|`WspMZTA zP0SfXZjjlKJ@JQCqp@;0@V(B2)AIW2l=T;b6b?I~whO|NqG)|1Shm@^ErCu~D%#HgQxo zadQ6Oi%KE?Tfg^3tvXPrO|8FK;0CN;{f{mr21L5`(Vg2uhopM&M4XbTA0W z`%2~#v{ptF66>QOIoEnZz>{X(_ zzi^(q?_Ywm^1#BM|5(Ib$ygk9`OZLLzB5qP|0V+e^Q_;&*U94h__jtK(k8AZ*8kO# zRkD#=lt=NUyBJAMTePaFkv8;lEL#d_laCFtl8`M71j-BL92D2S!u(IJ2 zxc6(<+_uw~*ILBm-Tx+v1Mow4Mpkp#W z%$!thwq}~@?p3(<8MT8nfnMK=#wsHbWO~rRb6eGlrV-@S;`Ny~KA8{;U9M_Ax#05H z-RJV)g5vH`8ru%0GGRH7={*C<$veF%+p=gQbc+Zoq4L~{Lc?586IY+KQFLiKpMQYd z+}xL4f+|Z)K!xm9f94vdGP9iQS|-=kdnb%wxG<$E>#WR?=dsT>mXuFxqhX~tIv~-t zaG$34(IG`HBu{e>cR>Ecpl2YL)=VRSp?}E14BvhP%q?SJqgiyF9CJ4@JFnix5MlR|uFnE#bcB-Bd+3m}A= z8BL{QJR$)TLa?H3v_ZKdDgc28_zzQ1DB92pH5k~*xtay39aDItL*w^_A+gO1#_-o6 zL>YwQf92YRdi-_w0rsad2dzWrC-Nr^0`?vmLVO~Tc@sqv6{Iwy_xAxaAX`+`6wLn3 zla+5Me{C6EQW)H9j#*UNU7iM66O4DC23R_DwIc+Kdt^k*JDi7)w#SO&KSE3cYjRSQ zw1D#oUWF-1m-(=&6EL#BjdVA*%SKMY9Q%ddlW-(NlBLULF_cBPH9SZVMC1>Q#f#m9 zFRRBi=aI}#`rT{SavnyPRt={!L5eDROsW3d_ck!FZ;xR{l2uTn6Z|x-CjyH`)l?_e zx9=j&=pVM5RW|6$`)xJI=TXib#%TTXMK>fvtreYe$>)-~R0qP_?#*J6i?TIL%1pi7 z_a86ZU&)6K)8AL({!RP#tzhuO+`!S%gYMsyZ~q;Z{!8iQ|Go^>ZO2VvlrL4d_(O<` z4rUV24N?~+E{V#fqgwVNzLNan8F&>WbFM)EwH;FP-2_p!pwD{gUIO|akgfkX3^#w6 znIStB^r#V8{iO81;0BM`tXof`*{sIdk>0!CkCz{@jN(STK@3yp?73vy&DjXgB~lZcHfKrPwLbXYT9bG|aLBj0*-Myv8 zb2XMiIii@gUHapi&*c{aikr(Yg#sSx-du$x?A6m}ZvbIbVLllP6h_xsp|W#T-&R1# zOb-|I=QZYfQXb6JDP&To7td6zK51~eDn^TAi`5=H8HaZ^X1^w2QGMk$B*3e!Hkb(} zHPo7s@Q)1iVEa$~&=55quF&#;v++GC?z}1#sZX)unlj2k;oI;XsY`pP^RRxhr`e-B z{BOq~#yNW%_*NBIbn@VL$hoo&4vR;q5H1fNsSi|bgt<|1_QM-jX%s;Rp zQ5$f*U7G`&gWG=QT!E8KB8^1Ag+du-p>}fNL>KGjq+`OwZ;sI)#x4iRfT`S?n)B2-Vy_RFvuBa3J z`h}H;1V@;Eh#eXwWRtvqaqF5^kb_wm>mAzj#L4VZ8}~peK^hGhVKem$d18`b?h?74 zS8orb?v8Hx>BufY4Ch!N#DzZj@Stex`cnhQJD;`zq)qTJ z9E$9bOOfxwbAWXw;}A=AqBzG749USa8)UdG}`eq%^oT}H#@VptG49b zIBb`GWAfexU9M*^@%rXB7x!!MiQq+k+@dlav6hD1%R=Ne`srwW5f}9toK(kPf^Fk( zesC{}Ti+m28jiIdYQAtCIpT75ab-jjO*zE!hTNbxuB?(D3(6d1Wq~{S4tFcWrb!;O z(Y19UVQE80Lz}1r8R~t@?ylKithIk({}Zks;g*0KzVQIz8?ygN1^M6Mf|82ze=6Mp zx}gU65rWN(q@|VX>RRUnX}U&c6cIrB3EzI0HIy;%*}Ea$S)%d!!;_Gz8Ya@XnysZ@ zKl*b2q@H)1cbF$xEAL`3;Lg6JRV-I5@)(Mk3qcephb~x+dn!*-O+;d+WoBLf+mCjevI?bqhE^IlSL0=Y67{9J@A;%CFN^hyp1riwI zIY!!~uQnn0-iiCN{HS22?H=>|kIF~H&-t*%@AFfBpZ}lr>i_oq|Cw|Cr!8bU|7DA; zM~6~|z>qaf^Ze-5@aW=S5P>2Oz%y+xzjXJmQ0}bIzDe~+NK<|~L~Uc5x*q?_7Si)6 z=Hg8dge_G($Y6!HZ$;J9ODsJ01zXdVOU^*Ts=9OvC= z5Rpv%5<*FOmg?nfL{C4F29g=9(IZ@AaPyUv%Y`!wH!kRKzU5iuN`E0xl80IbQ(AIV z}`~G81neDgxgZ+1qc>LyVBKiLg68{ycQ!X7BMNvX`o1;y*3WB}o zG zH=>4BW2J63ml-qI-C^^>`^@4!ZQo%4(qhi}+)hDl)`|-OW;3|GRhSdXIcUG_;ysDJ z#qdt^<-dv5V~}S4Jigp{@!a8rQ!pA$)3*~lJm!7QKpb7l4a4!vVALj5X@sZ0yJh`|r|?X@h*gP<7`BNz z%nqa*cL0-%)k9RX#$v4%b~eHF_f&YzkrJV|NSPa~S!&ne@QTl2fI|IvRa1yqdfB+S zhaLuT0+--HjC8*8?QR<;gy4fAg2Xj9Q`+Xn6T#SV47Fr+6YimLT@>Q>_IF{Ono#AB zt=(*cN9kwlAHV!P+GAyBqCd`x={A5!^z^Q+@a1c@zHAAzK>J{2+K_60a@I=NJq6?hk(F8qrk8Smjyl z376y{?JA2cr`pxyP8VP~cjbr)?1>F`(Y1he(t# zP7-wZ+uWQzVp5|ZGxX9VRr#GH5t=4tq|*`Y;B@${$}m(CQU}*Ep^gUwLaCOX-OGk) zvb^!k%OAVt zL@cf`IWYK2e!A-qUV<`Ex$>DRF&feem0>~bwergP(qhndgoQw`sVBLC#d<`KKxZ(% zzLSt4Ne+>L;w!@Je1_vYpi}-PPe*tv4^e4Rr`W=BVGk!`)bdV(`}pu>Rg`zH_E&sk zA}qWBBnokZT;?g!i`?++__v9B=&1T7R1uo6M!2#N`Px4r%IjKvgal7VB^`vH8}glCd*3u_hL>v#~dDG*Pzuw*yDZ_R9|Nqhx^>7lYQLBVW0ffp0^T zvFM6Ri!&!Q?e{FDLfN{Fx3Ju&3=k9ObFIi5B)WyH_5yZ%uU4h#bXfH1I~ zsBX%_9FTBqji*y2FNeOZ+pyQw%bw0A(?QoI1zZc&jHlr1GV{BtR|Rd0^SJC zMyUdt1X5rmCK?^t%qpY%z7f#A5ze~Jmx(Wkn3)cxrz31oVI9q;6&b!oT33vId_zJq zvk0Pns?9=Bi;Bpkg`JCxcuXLRpfWDefy4Cg6+OI34W0a!bxsyMCyaF{LZuZJZ}pW| z>2I8f>Z`FX%OqH8q3)WjZjNX$Vdss%Z{r$Pp+?DuoCI7T2OV9!ij~{6=Jz|swC#yS z6%|pMNNm*KByY{-TG`lt32~9x+LpKUKkrD5Ph*3yy{72HLJl%+VImcjD_gop^_!iP zjdp#F3(gk|1iz5GJO0vSv{Y(b_n^w0I^Qs$Y?ZGIA1^fjm@MXt9ahGROi^oEWP~M zBv~F`Ojo+WtC@Ha>__%4-(xj3M#$tbXKa9SHixP?1TLV}mRa72olTWn#NDJ_g9&n} zONm#igQt|TVwwB16HtR#ZQiZaPw$y7pl@CiNKli_Hrz9S%>yqrv`_}&zFR1qp- z*1(j}Go}o1T%>YC6Db-LKIyikvQ-GB{S$3)zAo8pPP|D~ahx;~qnfBGGf)_*e0F~`;uDPTvxo`A7fnIb!e-C2 zB^X{pRk8&`NLv_h6qatL^+t(UpV&1tbdBS8HWP?kzc@@h2Cs-wRqJ0j@=}ob%~YO{ zhc2Lbpa&wSY%g67HN=K%%Ff6zHUse%XQY^k(X*VB)sT9mg zw4HJ5M!{>y&J4*lP_T$8LRptakw*-ZaCKwS)C|y=PpF@n&GoZc8QRm5HKe@#rG#pU zhH^?ZkPJkMWv`xb#2r9gU2YPimtejEn@mmj>u-3`C+4LSdaqG-xdJJ^!M|%8j&>>Z z<>tp_Est&bgy;xfxdxr8Si&`?Jj5M59Xo&2P7U{W>mZZ+dmxgB&&EAukp+X9TV1;c z%rF+~%|CrK^FtyIlupgtM1$t{1o1M^`@Xs4rS^2x%-{Rm#8$$=6rTK}_VSaYT~Osa z5U_t25*h!8822w4PG<`{+kb;wvVxQpkOInAOiYZC60hgFkphY_=}d{vyeI^amp@@s zR}4BX?sXqKShd$jE*etPDfmNSfm3Ng2u&pLiBHeUnen_j3TovfD`F zm<6a%d5!n}M7;8R`^>d(?EUe&DLtf)#~+KA1-faL?iN_+bJhiV8)N;9wO`7A2ZhJ} za{p~$inA3KPkDsD`1lFF=^ciGPZkik6WAT78%$HLKGu+Pc$&eLVC|G_CV!+KDO}7c zaz}yJtbvXR;S3&hWiStnc8YkPwflxM2n0;@#0)!}Ln>K^Vw5EEd)-nHxKg{B_a8~N zQW_pa%y*I<2mXIjF8=Gq@!v)4e}$j_x*!!Dr|)c=hiW{I5-wX5-0A6*fyV^ zzyb?GkHOV{qZrml5l4nFu2?yRYccgJ^N9l9xr8Cz_4J_}{)4>axkdycCY=F(!|VBK z+i7N}J3+Vi^&J0)-hl-gSdD4i2n$}%!9M4x?Wy|Iffe=HOX#H|8k>XEs3h8`{c@j8 ziyO?AiC1jcyT(RmMIm)ubZeRj^dEKmL%JBJ z?);6CjL7ugG?>w<-lfZqT0r;7He`{$6fA=>t8~p94&^g9vEHTotlPh89J&9xa{Vkt z@9v9A1+mpSDjdQOQ*|fItCpA6Z#w|J((G*C7j-ckbbD1#I_7gAj6IQ%k80&ntgarH zdE$4@njF~9-H4IP9IY1Wz_;ag!oC&Ur$A-)?oAm-ss~NDbfke1FjG=w=MCAA+IBA!VrGn5$>`e`LEiW@6EwO8LK zer+J_9hDK&5pm65?Nrbz*}9R_dnO7*f6L>WN2OzGwk-^qo0&GqtfkLs z&xMi)iuwr?SqUyKF55ba&L{B`T$_=iDd!$0)z`tv7eJ2&SQ)E4!-S83-mro#tVKE3 zSc!!XRPKU%imA+mbUbdWp5U6M= zkcP;Oa6}3Kc><Eu}s-P!c?b(k7kpKo#tAwUxIoM<;AX(-#PuC zQxphXKVg4;Z`8Sxe|OFQhwv-w?DS1~Y;Eyx5dL>L;oqJ+jCJt_BdwOYI%+zkk%s;kk7EfeFXqB208F51KC8?EE3>f$bUr(VW@-{M z0)I-?kbwk&jJc$c6a@(+0RmeRGbj=eSXIvb&Z4;}R(nIjQ+I`wyV}=G zyHe@Tsv`K#8y)zKnY(JDGnl-FCo&L_yaTDtOM3C?QSr(C?rvvNgC&c0Qo!%(w=E+5&lQ4D)(whZmO!d8leu)gLEkaMgq zdA)%AXq7%THLS^4PNo>|PTkIsummUkNFVTokOdpBPTZWb2XSRXyDd`3HZs@&ffR98 z?ZX59AlxZUhSEGFr0JZR5eBMxs9Z&}bY7Ej71`D?gcydEkCBbK!FJR+JvaA(JK zAZt(fQwEoOfRB4{J#&tWh%NjA*%aw+Q&IC2VRL>?B;XVS$ILHC6W+kJ^B^K=$m`dl8usCQ8D0Vc zllMH{qMdj|ZtSa!QCF#&RCs38r64yS4{r`ote7IF*hdxM*7RzAYOmSp7oKUamEB3K z2ZsgoOlK$(-Q8M(9&VN0wkl;>z*LJPwLT?1$(THy&O|r;vw%K)oIXf7&R+$5yhZT_qxi>Aj(hLf6&t*(6XVF~i=cmsl?72*X z1=p9h`OJ+$T2c~WH}=GsgOpbr(ix+n;Hj_RrF>lJ=cBAITrguo0V zk+LW2At0i8}t+jAp&}S`60$g? zav@S02`{AC6n>3hWX){n`>)*@trsSalTABgQgiY)VTJuU8&Meo2v>Brj8> zNG?~FMTJtSP!?IvGPesKRT@_oJm0P;?Ks>w&QDBvF9TGf@>VFEDQcF_nNTh-R48aL z-U<~jpUxsR@Z_JT#PLw-#Q37J9XL@gD|APGQ)Al6)_}+tvoxuSfIpQz5NM7bV`Vn` zKD>2O#K&b?9I9KYBgLGLC#Z9)BrHAVBq(?N_Sa2dRy?jqAQ?xZL1V25)DVF25N!!dTGtS*H$d=y1}UKB;u+Ye+_K-TMb`T>ws3uo=Q<;q(ywUmDa z^CHRrwJF9fFvYnCkt#d*C_F?%**vG3tZ9*1w;>B5%QB_BA;6{LjR*ef$(k(U=*7B7 zwQ8H2yq>6%vQN5YbBC^LaGl%c~GH(ix?6ez|Pqouzv0{=I-viZ(}ofqMUT4?`8!Y@XW_ zZ70>(NETD_3y{+7L*apGfr+2+i0mav=30LN2H(RlHSMW0-tl{8VtG~y^V7LHY@5)k ze4T2-UgbI;A%rK5qP6x-9}d^i`ABuSUp0o&kaqlFWgVSqsC)+@!Xo0!ru%I2IZHw~ zfenpk3*9gvgRFVe!{UUwhzd#S5=pB;i0IjWhN`uSzMR9xDvGa<)QzN1SXm^A-yj=upe{eJM?@s?A=_)h$ zVK0pqmHu)P1?1X?uL}g+`zZ4_gt82zv-w29o!H+Uvg#>DLQX$mH%&s(;A&$-usa{; z@{ebt9oh{qvNcmof5)Sf>Yw4gviula9BvnC7`Gf_YC(fK8~=T9G|6G7&Ggm&+7(5{ znW5B_RGKKE_w*$0$T>Y}^h9I5%C4Nt?9Qmred|3~SfWl)LOwNi`uiCa2K7P~5vQvEkiuNU!m=1Mv&`bJcA_zS#2t3;ZlT?A>nIb)Bn=3-}K<(Xb07pfuawr z{(c07jVgrmR%6KxM6@hFJhTGX%O7|0#A@IOHLN|Z@5hoW)0f^`Qxfki+>SJU)j`o> zKh-o4lXHc4f<`WUv^@b|VflNA+N0I$$J`fU5cHY`r_v&2E}BcSgEM{sB@K}RYqA5n ztT1Au672W^hFV;`2M-R)DGcKVZEdSMygz2W-@kskgXgJE)nW;MAwXHTy_H4F0=TIf z)?hSf0~03&jyZzUvkYbi>3N49Nxiq@ssZh9^*B1G$EZ_$G$zP|>|+ZOQopAM36xS1 zb@kQeO7gUShLOz_WSrc(gr0=ct}@BYw>4FHLxc=v`E=P)A5cvF@Izg{AvcB7Pbt%w zfr^RH(ucSuN84urxpfbHm0S!=CN#B{%i63U$a}buwNt12lCKF%G5DM2H3Sl64QrxB zC0pCPyz%+;I?7NvU{_n=GIh`O#D>${IIo-Q=BEi*x3_;Krt^v1+%>J_8UDI`4rM3d z78@BhWVAv|u*Nm$LZG-J8kF^|PXvdnkNc=gyS`x*#D~)|tuPTJ?B;~=3%j_KnDYH~ zDJ*Mr;RPn8B(>b&Uw-ts7da55bYyiJl53SJ$&$* z3R}>Bw$Dr7FD?@uQFG1e7?DfhR@HWzY%Au?S?zqaZV!&Sx(Z`p*y9Bn1-`JT(EPiD z+g3EFI=^C2YmGfrkf{DKJyA_2iibYifXzv5a@7aFHyq2}rzgnZ3m|!c?7X77;Hqu- z;xlQVDGDmUI(h-&wJ`65&o#^is={hRpkF;kg$3@P&1=opETFSi4vbZZ!{B0=_=Rnhi)N#2|3sxS{`!f%~9K1z7A_|}J zi>0ypCwDFsZ31O1TJ_O&cZZrm@y zp1jxJ>BVYm)SH771|e`B34yTi(L^^B9KAcdcRvLd^Z{q^fVSn9+7nD~KLt2;p74XF zX99sNU~3Xb+9#&y(Zl~;dL3VS@{1*9G3kmRW#+dh@wAIu#<`G14SZ=kZ^AKWpEf95 zPwW69FEl00sVlF^uv(hs^b$kRsX~#PI-!L%e(@q#Uhf z&=$q$62u?S4ARUzdTa`r^%=uua0m+Z>bzqe_MPDix8J#g0}TW%2NW~x(qnuhtOy7_ zu@1h-``G4|_0{z^`N{7!7_g3BeYT*lj|&ZfTMU3{?&kFT83Pgb!eqUhas$Ea`Z9IR zkpfCEp3=RIWDg)9x@2_xX(A1qUm{9cN9tPqi{VT0TA}sDgSD>>ZEE?s6}J?cFb&!)JkQ zy~Oa(4hPRFheh0`*@cv`;gu&2bACT}e=B6}XHa?cNL-3%Y{WA+i!CmJwXv%4j!+GE z2i8S-aa7?lQ|@#)saw3jU*Fl7IgE3CbLeG0u}j^8^q82S$!rmsdDJI+pxNNkHv)b( zz40>o+hqC2Olq2|h}I}rn?mu%YMWALEUUI0kYf)$-9~+4M&VXn@raImpqLpLmB)Of zD>NDX;JWrV!o}YAm>l;vpcNQvRYrSv=n<{P zFZO&{`!DZmF%FFz=XTlt^`RHt z4j#1>cd1!JghY{kWlQJm+10(zXZB+E`yrp6AM_r=4%1wCU;t^B^qjf&Xz&x_Kx~Xe z5Eh3+Wac86p?eg1?!3Yh!|;;^@QJOsk1uJCQ1F*0Dp3bvKBV7xsK5aWothB3EiVKrFv7H%q~OGJcG5{tUY=g zNpvXS6A;6~BaX;;95-&Sh7M+{ccm`vUo5(`8dn^XZfh}F_4@tgbH_|6Y;q+W#~v+w ze;Bda8xNsVu6uK}3gT>Q6k#2ejzl)OE;UKK#-9QqpRVJ*?uA1zrszQn8&0WX#Rq_* zhx;gLWYs4T4ZD<|fF^Iry{UCD@`%iTf}@HjO-N0q*vTb>K9%PF@YDx1<;hgc7J+KO zJ~dYyCCumG^;+t*I?~ciWoL9-rUr5wm9%t3gyUBr=+#j|=L0M=f<}WJ8E$axbn|F< z>I|$-r_bB&_~g4h+J1|26qpms6`W3asmec6e63e|vAT;E|Ek)&K^}8Ri!nr*ROvO& zwO7BJv01q?LqYb6KRj-W;Bi;Yv%vN0ni{>-b^}zY>^bbxy_3G(03v*CkL&dZ`c(n~ zMolT^x?X>iqV4}tXWS2HXm@Iy603~iGJ8+jZ;CL{CgLF*A@vaFHL+*gBtl=SNn;O0 zM5fyl3HTb_D+-#cY?FQ6mi=UDRhmJy>ENrm7dTxhOo*<*H}l$o`~8_O^`5I3FFOJ7 z-FB>eDyQPsTWj+NEqz5hD+i0g9v?stfmX~T@^N@avZ_l-J>sgT>uzRmU9loI_Fa5y z_K#KHO~&(Ca&}yFh`7traq#x4(dB-?0$Gmif?`q@jD84)x(Jn|BS1yzZl=*z;&&ob zBVLEh*o`_VlSx$J2WI|rn7+gNh&N<|MTL>Kl81L-c6xwrJa(Mc1gCM4YUi#!g6=p3 zQ31$|I@l?C?hJU#pW-3c6Vwrq>ud=uee<;261LLAZoO1s;3G+e5h-1Bp%wc6A)@2K&U>)np6KTErSdS0Lz{;|x~0WOT_CC-FXjkI4~dswW+ zZ{+y3TE6`UvBS{Yjj-<*(1k%4wa>NDsFeI)#{U-v|JP(ex7xZJvI@pu8OCwuv$>TN zRbi<9mbfGmDD`F@@ErtDg6w?OKC>WLusCn8c0{AA!(k@~}=P!khA zS_lP3nMZ*EAPllbahWS*56ve>`AuoC*A}=h={x|?XSg%0Ls`pLBL6}G2;WlUfYt=p z6>11|RQ4qzY|yPBE$O5bb8VL#cW}m5@g_}J#4x2yZE9>wcYxBZo57#2=qgQ~dTif$ zf^@H&UB$_BF$%b~ZpuPBD*S0YA%r(z5;J zR!y5TvF0&5W>RY-&NoMCN$Fcm?%K2Nxc3RnkPGp7r9mD+%P0qJ(jv9-Oq| z=6_hDlp*H|Iu1WQ z*}?}Dpk-+I(v~wykDTv=npk@}0ZRz{1f^>LwdCblDo!TT@=eggg0=#S&OPIY%h0k| zk;b|jR5deOEsgDQO{=s|MSTjmGn95i)|T}**?a|*ys?gPers=g zqAH$OP(d`+^aSpa;8X4oj^VI5UKIGtlnt0RB_L5X7@)4a=z%TwsE_MG_W!Ip(#$Sm zj#pGBk=O`wFDvR}E4v|iXwntcUx%&l36)=UgUA&rxW&=*4PMaGLVSaa|H@-`icS^p z?Yh9CTwKl59S+Qucyz;B)e%$q(VO>nQ1qtKlM(B5CBpS2t<-I%_=s0E=t-#}a?iSF zaMgnP`9QWo_VEnyj6ZBdQXB#~P#1^bGtU=C-i~&9AA&*ypB#)Luz`r0s*EVCIiW$| zV`gu3F2k|eTy(3|L}!5$r)Qq1b$;Y>nXmF3Qd3Ck-(v6cfo`+l>)|bvsY7Z3i%cVY9>)b zXK+O<9BcPVxQBFvQlk?-r6W&phIWt;5LW!luTQ9@$f7XM>8{AWlP^o?E+eI;H^O7q zBPx!EKDohkqCj#SqV9f7E!cnC_hJmk{8G@$`Bpw9;$kMcRu=x9?kNQA+ z^6z03Z$4Dbnb)7;a`#|W$6MnbM(lxdw;VHbPI9S?<+&K(4iI7gY-rl`4T5xi&yrYu zk1BBe{|s&4BPIXOZf3X2r7W@n0#B=+)M~RoVc^Sf*an(`fWVyUhk&59;+(vo^hYyw zW<%RW!nPLj&ItHT7^&|t{`c6p3AY<`GnzCM*5boV_If&#+3{8O=hxvm+YcxA0--@; zG|a7Kh7b!}Qd8%OMyxA~HoCm`{uX2#kcB%CVUb@-u0CqCkVSXN1Ej-@xFDXDHP=-v zPc2#YgBy3|?eehJlbcO~YG+|1Nm~v?0bed=yG}Mz9Rax>YffXHjE{RT6Uc%jsg zKQXAG;Kz9TJWQ72PDJiUFS9(8u1`36HI(l|3|R^Mnyl-j_n>z&TJsgnvq`UlE~}n4 z>9A*>q%sdvD_b3QiP?6=OuiDn{PaQwtH~^ej}tSTvZ#I|87$oT>-7Z~T^Dd7?WtcHtGgdE8-M4CcUCp$AZ*NrRa$LMgYztg zC;VeTfqBL()tPS~tnJqOjHof&0=aRNXx_F7-rPN6-D94xo-we4vl!j7%}5!&{G6RfuObFyu&HrlVZ%x zqTgfDKs|ttmo2rA(}|ii*|w zoinW$LoP|JS(6X&&6C0Pd=&7Pdgu#zPs0}!OAi5bWv8d)oV@J3H{ar_aEI!%+cKNP zMNAf4MoimY*X<|XCtLT~Ue{M_e|NHg>X6k$sz&RfxG4;AW1>z)W5bb8j|LNga*zn} zm_Jwl8ncO$RG_#Ik%|N&(NgB{M=jeW2^}xK(h7F3}gc0iV^Cu9&M?&zU z+ROHNfXQW6RGM-HQLSmLO38>(=F?NhmUk>9rZ(%Y%fGUdIj09% zh8noEawe@L9ZG5{9X6DgJbXHCNn^q&mex>-hLM^vLQ!1Bz)plRAmI^`X|gA>4DyuU zNBT-99!|s+WQw(UPjZ_pfVHMGnnVYsc`R_!n0_|g?7paEho-%OO)DDN(_g$AOn~XOY^e(IUP?}9B<_*!^Ykn6iFW{ z(&>|fNeFzH+&ijFG~edC)(kC204vF6TECM6A2B%o!a_U9QO#S3o2m@Q(=VYo@+RaQ zbx&3#v=An?G<*}0P<3C^kSIXkHJ=ulrVg2$6nru!BNM?p7^a-`yHaJPMrzaC9VjEC zODxw_*82)faRGjUp@3l1ZuY)pV^r4Mh@T*vi|bmPt6( zev`h5ddMedI})j(qhT^ec6)&<{7;8I@Jw8)pA`edU_**Z%)&@bK6Lj`_sCvu(3!YJ z2GU8uf&!t5IkHt``T~s|O+WW};pd4sooDawug=sODbLt7?UBHUyQX}&@ey%aBbdwn z119I6*eQC8H2}{D$5ZJ$kXLA>IOz12=qzz(xU9KlXpM#F?d!&yJ{rHn`{Yih66-i8 z*YE*xGnp^227hTZ(T?U|3yI;4D%sm|;1wAib=K+rSGdQHSGjlKC>}~^+y_IV?$Mf7 zUD0j*kEMbWFs$Z50lq$DO)uaJ|6M=f}StLnRR=_S`6T7c4y3SRfsa}eOT1?37x!@f@u>% z_P-G@aO=1n4YC3geVc;cz_?)GWBb#?ZjwXl`d2b^62xZK8%)bVs><~m$>tk8|$a}c8itV`8y8)F5mD)Q>-HU8^>*IRIBTCh*PP#+#C-v%9I7*3 z7_FAxa%lNDX=Z)!^>Xz7wfnWtRjFpMoMl(k!HAOSNSfL15Q9vP4Kg78jAd!DgyG00 zr_Tye-h5nyH?YPSA)r_8fMaE=Pdi&VIdMU#tfXpJ5a<&VbEOr$W^<*#OE}RpiuVK} zTaRC$U{#lApYj>qTJDp!-5+*^zGd8dfc6g3n{=(?x@u_Z|F=7i%78s39YKSNp%ZryC*X64@04BG zs?Gsn>Od57wIw>fq{c7)fm^~8ZO5WIm|!p7Uz2Q);SpnBZc!~ynFO^?Mj%>Bd>;?% zJ0NIVXghPlqMGX0J-bSxj)B-&@8kvUu#jr9?M9zj9oWa=U)UBU?EIFD4Ju3g5?9{l z%*l8B|5YT&#dI~PfdvA>Bm@GY{69N1{=)^B_tq=mbcfFuxQ~ZzHwqEnb3S;Khhm5v zh5;&%N&p1@ex8tz>#E<~TXW!W=6de?Zk-swH^`YAZ@&~p#4|!3hbk}#xs(F*(ClME>Fs;r9R8d2WJH>k`0-JG)A7P++9}EfclI}8t^%yz04z{U+wg6ETnoi0RBuk({P9=cNV#ycQ!Iv3vIXh*Y ziYHNG15(V0(NyYvV{Ie**5OAes9pmK%~Fz-IxMM5EcA@{JFU=#SYx6vrGZ-vp6UBs zT@j_u1AyDBji^zKEQdwexxsMh}g0u^Tpe>`P>L$zgtACs0LTvH6sgN8N?gt{dzJcacd%U3ZPkguA*Z6f z(jgwOQj>>5NmB`CW{TelqfIihYU~Qek-qUHtaS_qCyIKAQdrzmV~l86OSg+XRG2dr z0Afp2J0#ZV_NhWsJS_MoJonEPJk_Y@QNm8Lomb@@#N|$6D%8`}KHiQ;2#kzmr5!}Z zk9g=)m)2z7okUFE0~Nl*Q!x)7AV2{m8?3qPK(#S;#KMuggwFrn9}s zM4oW5PzxG9GW-^|JgsnQM(wqY7R&aOdQW3bGH7=p8&yg(jhL8@XNHo5-_FCl;cFDU z_ft)~Ont~!y&KA9!`GlMb)bFyOK$_!Rz%B=he;d&VUmujB@ zmrbS73^}ewj>p`|&WxudWce6rXG5m0FF#f)u^xomq^Uq>L~oA#@Q=IB$nMMG?MqlR zyTC@wIM1lb2QoS4$Lj&jAvB%IR^5pdi_H1y#pDb6PZK-ZPO3xX4OcWHHtw3#;k_|Y z1!ecOrK3|9x5AO(li>-4Ba<1U)3~&uZH7mBS^?LssDLS$G?|nJVNRWD87u1$W-TFa z!+zR2hAK&;AzLa}iMK0WN7%iO)Qb_0PEM%X`GT^q_E>dtiV2W$DqKd2QZNRz87>|Z zwj3{H76CJU7Db~(HIdK@n51DNB436~!ad&uYGVZGk;brlypwdwI*Tgjkv4O_@m0SK zAZtcPLF9-zvR-fGeLjs?OMrzJ0fotb34tlJ)O!#6K9HLo|EAiCesG|RXyrh6NHFu=TI zP+L+P^`>)Sikv{L;xJy;8KDz1pX`Gi7sBKx)vRK784Fu=BuB3=RQXrcvoY~Z{ykCf zM$&=@MkF8w&X09ikz4e3H} z9Wp~LNdqk#t--w4z8xQNnLFk0*QSaK=2{%cEUMg(c0av`Np24wI?qRc`A3ZbNx zePi~IC9$p>Q_#T9WY(XMSC4Hb*`m$cG&(;xq(}1XXG4)@))U#JjzxcZSPV}72Sxe95BwY=z_$I>ZmskPvj zfiny$wu}%~fgy24+e~odynG(@b;zg8n))8e_=?M}aEG0!@M_LB0=o}L973V_v;sT=U4&S3P0GA4iNZkPga4ajNO1k3J`F4+o0Hh!v=E39$tNR z>t~(-D5>ihS+Sz3DO3jeO1R|qg%x!1ex_4+a_pfO*2RgEqC1yVDJi!$$TIYxe&kr& zdV$WrDSL#-ZNn6Wr*urm;D6U*(xbBM>1;)*Z3(}(C4sKVlAh&-uZF`MxOt!V;oo`? z+zuFS06>38ZX_eI@L*gEsfN+4LxlZKVdivjNI;y~@kEkEql^EDAsB*|O4^S^Y{+sq z7p8G!TOd8Q?E`ZDIT9CE?zzK=0I*2R$r(RU(E3Pw&dB%mk0z5)fI2o4k4jNA@;k3h z9jm4Pv8UTwA>^uC+c_t5lJWcZ=w}oW9zAeg!^s&KjvYI3nKMw5yk^T{DF<)yl}W+! z@L2#QVG8Qzt=_$O^8x7oK%#5R}&5xBKK4rMse}2nF0t!9#eQ zZS>U%S&H-ekFJ1}_l+_JLA&4{j|1ZnjPeMR@kZw~x$^cif_{~x%bst90T=ebezqkN z{2}%)N-hOBxZY&}&F4h@b61nISC&Wd15%5qZVzxY=>&2wvj;HuubOBJGCedug1=nb zXA3fB18ULy^``InLP99Svl8rMf+GNA66J{eSY;Ly2cCn{wFn9O&h)I%gI1srYPBwi zi{nx%*U6a77@!BZ&EPwa&EQ+iLb3U?jQ$!QgZO)cma^JLMPhw33BHW9XClq|tAo?7 z{j;`1j;_d>6by8L7w)}L6zD@M3t{NJqBdFODAfUPbizGRW&_tEg{RPSJEn6xwsU^d zgfg8ivX1Ykp^Dl2b*$ZulnF?=1C(`u;AS8rizRTnc@W(i~amRHSDWTSo;Lh zh1y*@=`5oR7Wf&?VHOLQBw6oHD$|TpHq2L+2rRFFfNK!w&w)31x&6xSY|IC+%g_6W!}2UKDR^7 zMhBkQyn(@SBO2jIZU{+kSQFeg>}@9@XKd;mZHvygB;K}wKY{q|FJ5ub`Gx6x)3rY! zXUrfLwY4oe+J>F)0g5z>!?dMQuNyw8bh`Q&u734tZtBsInVq6K`H1tH=l;&nysoCbau`Gm*c>r-}TF(OT#`QZ7`mp#AI zA&3ZvGd$>nb!*|`O%)6MBc?;BGJ&gx=ks2{GXs3a0a2=n9aom20(!0VYyGu6?NrEZa-c3DS?+_fTMNi zq?~k|jk|aimAdPDB@${SDwY=x`slsDA|K1tDi1dCkWQg3ZZRT)h4M`%^;|;;YQ{to zUkotH8=ukQRZePpLEd`)2{w9b?$fr64|uQgCS7W{3)1dH=y$7d0arB?=dCNjjafV0sLEDQFjdx~R^99$&V;pQ7bavWroI@lFuIBy^nKMum%5 z$8>SuaWGqN6QHWEpSnL%ul977c78Q~k(IOK4dRrrI&R1RxEbE_ttKn)a&d~}NdrqW zr{z9YsIeCskfw>tpV(S|V`umET4{cQsKOQcg?XzvbyzMMc~5(0x%EbJoH_#OGlTD@ z51YoAbh<>g-G|ZV zHrU`s!pO;ibp{XD5N2HN0BRPwdEM9}l{YwY^Lvob+Sg!AnOPiJ2=QqBWYOs-_6fPe zdkCzG#)iabEA{um?OQJL0E`_~~83O|m@gAvUp(QFH!)%Yx6C zcq0Uhkt|SYDjfT~ubJ!-;)l|O|pJ{Vp(|GoPB>S>MZTJSoZpUzwA~c^?Qzqu=}+pn@0Wm_~^J5 z1A~OqCuPudaLl0FF9Mb}6sK^1*kB$LG1}%XE&OtC30m?J8fcfguZ2W2+)C^Y$FCZS zuXcxw^g-fF6?nT>M*M>LQH}An4e1NNiyU_=cK7ck1QKtQKJ`%HBhme%Ay5yh2%;CO z|IhyF@FDw9@ow~9>)mDw!AFn!Gs4g;f3+~umvYk=%@o#$2j>^CpGIge^x>cTAjc25 zFSfwddpDDpg23K5eVDJ`G;fA+;vk6_N#) zsG2hB3)UGvqJA?!L|rB*%PNZtS0_;?oNYoBZq77BE9^OIFA_%Ml1J&lPC%%Kr8~vE z%1PXSz&zU z1O*E5Pd)Ldz4o?rOw;+GXK0;G?A#F~0tT0HsY|Dp+t$Uple%t264qQ8%K(+1D_4#L ziHROlRhhf6hu304l8#qrr{*&m)Y1jvQCsHulX*tA?o@O%VlrRFLAc;$$n;7E-XLS! z;G%c%T})f=K;3h{gtMs&!cq70Sp|-p%526;oqZM>l#R?LrH#7Okv4JC*<=RgJ>~oX zJ4UEHvmA+X6K3g70`|r6aq|xBvvP|DCGBF-dg#lw8(rfmaGek16Y_vW(eBb#_Ozkj z-8aUdsGj*m!=td;jCHzAu=JVoD*xD@VxWus>rqT#{}Kun2btcaBlfhZZ<-orJV=w&vJwOCK(mkHjSwf zrdAYL^E&-gso4Yz#X2h_=CL?N#t+U5%i0!MEQK704aOU@4&+&;ehSlXh<__n@w6*e>V>NMQSZ) zS*J58=||-*Uw{n-@=Y##4|KJ61_3$cB|BE9tqor9m^V1r;+gc=(A1h)-)en5KYQkKo+oHy^ zC1aTz(&v6)n#%pyz)HT3@Q>g$BfK0Ar5l|l@TwzoOggScRV(t1SEHL#{A}r?GlKmm zfw8B;8rqgN(v{gkTL+W<#YgHk&v(2{@PiOVJ{`7k7xtWyBnzU zSD?C1kA-1~5}KUcC9N1lyEUDfDf09T#?nvfsgpFT4c8sq`XxD$iW`U}F&`b2k`5`B zb5)A8Hi{0BY*iew$k59bXT_canN@j)?kMn;$$gBdV#9}svE^+r+f)s0dL^_&vKrjS zlPl7qw?CKbWv^R_bB$Zj<`E(GROGu6?H}452csFm>BzzgrV>SqOhJ~)hAcLe%XB6t zGcEfGGGQt`fgyHw`pRiTG~Bs5AF8OClSBb7)wWF|4F0nQiLwCx)`BQi4se>yl4p_lS%`H z)Y|3bWiu`9w88>!Mad$LcD7)MSr<-=Hx&xodJ7wo#d8C$s<}=T&)}Jg2RzR|f^pFC@JJWvsgcT__`IbH zhY!9Xx$@^A55GhBiWkDx2_KMnDi)F+<|%SayYdtb78ap>q)Xx!Jwo^j7Rb+)s^`mI zm3zi;h`JszQ~0Fv4uxI@CVfQVuPV%5>KLdwnlwH?vr9ksRCxcL9yx2!L-@7%>`y4q zc6_eYm$#M8ownzyo5!o1!FT|n17@J4%u-7{NYGV>Mb0!lW9u04#(%s&_XvX$j(2GF zSy4%C<^8JQ%FTxLCQ7mQ`iK7AODl4xuvt|ijl|@YM<$C*kPx}P3Zm452euVS)aNSq zE9YnL#8{@FzIGxG0?k*1F`vE60(toxoaN%WNX|$jzh@cVnWR&LGOw|KmqFgb=WP9C z^f?cu5=w6|9Ug_Y5J$A|V7F7xj$&o*`!aVfnc<{lcaknIlh~*!>B?xrG?dU0@RXcF zG8P{bvVKzxX(tEd+1v1_W8r^Jk|QM35|z@J1)bti3&lG8p&Or@kmECfl57IM!WxJH zvz9qg7dyrEl525cV|Sp_<@Zw=uPT-O*M)j7kXIWG;q?=|7varIQq;}ki@HpL=VtRd zw@^3$`<+Cs+&w!*TMN@h;F$#TH{Yt$I5sGmutKTylv$GK1%>2Ef@@TeoK^bbsg6)i z?Kl>Zg=o^lgQ9Cgvwxv%Ky80jmK}_Pvd@NhfM1WqI+p%SShcc5*hR|SRpDon{0R+Y z*ft7($iz;>Q=WxAGj+=CuL;miXEXXGKwTjn+Zb{rYVfGk$R2RILTe7}L}{Yoy1~FY zKCC6|PbojJ#FM}YlgaXoKr@_!>ovSy4|5~f~otBB3u_l7mHRVx!)?T;E)}! zeAyz*;~?yT!oos8ZqF32MVey^(CD$`I?t?$=7AmLAgYF4k8dQ_(eq(viRdwZxB080_sH#I&&+r`AlEDhFPw+8Q? zWmu#QrG3z41<9B7KrV=oQRi$;li$B(QqbOLC@bg#W^&(5E$|C;L;!s-cUHX`unYq%+Nc+dp72 z!p+$TOkF_I)@H5_mAuRc5=VT_9J2AMq|3Vrb zwZi(h3sTY9OCBWo!S4&5UjZM9YU1;+s6O7o*t#fT!^0(#!_AouqRe*T{q3>Y&kId` ztRRZV7gamKbwVfeA&6PG4ApaLDsNcb#Rnc;`3ErWs>5Y>7Ad-=YRcX?r#~Uy=PGyj z(^pWYV?v=|Dc*UqKZTna(LPD7h{r-oy(k?+2Kmzs;7%F?&!wGf=zBV=H%8)29ZJ_R z){x@&YUvbTG#zbmJBey}TFlQ@jj7`#aG0ZmzNuF&L!8cpn5qQ|1EGO+du=X{4(eg5 zI6(4&k`4;fHIfdTw7X&U!aZ>Z>E8IGP>4JI*F;^&R0A9ly0&F)r?MzQA9^(V8!}ns z?kIF)1{S`lk$L9+GqdUJkUnP+#!@tb4Wy~G1OC}&_Pdm@f0ZD=Nd&72>QX}Nu*4LQ z=!)9du-b#=uS=sSJm5;w^21HnNmuf307T6y1yU4#lz)b>C_b!BYuxHc-Sk<;z{WaFBGMqrXhVkNtU?6PQr^3&l=`j)zG|>rQ*}pQpw=K z)hLhD5pjm57?RAw|G^%v)G=u;YAda@hFy&jUTeX)`E6s^p&XA|1di1w|yGmGd+PaEg77JNKcSs#-?X<8Ck~%-bIf7MI1ls zMih4|jJF%X(}T>{Q}_rptD@c$GDN|x@(iJ|KR z$Autp9Xb2Bsw@kvI zW~e@U*6sk1bDaUNb?itmfMCUQB}YYfgE?wBmVI&@Q3Gam&2Ce9*|?z+-nPw?-hyZVD0K$9knv*hBABX$bm)u zG1qxr*jj9uhgRM&t2?{x9%`jWNd>KHUYP)|)@yOG0PmmAaA!Tm*4*J#k)Igi~wC^Gajk&b4^m3XwOvrc{u+g$lCa9{+=_ow zN}QNrd7T=ie6(CSR;7}lRz>tjRYbQ2T8>sX-3rEUzf1#@vB!bSj7`Q~Aa6P&)nRMg z`2dJSZ~^1V!6|2pNeNbyKIY75Q3=*E6SJ5(=&h|6-(ja7ZYy^}JH`VB%LId99k7AM zXqBr)Vzt1m?&OMQMO4`K_t^Cx?7sV>N)ukqp|d;Ydk}L?tKG79smi|$)ko$U^9WFK zz%zPDH2wVzMA9Ev8*Q|v#-PqSuEYWcPjg!MUEPpQ3*4p!2C>*xkFt<^j^5qbTM%d5 zZGQ%z9J&QC<^4$-Gkl{r)YMO+p2$DrE4O=Y_g)Q;VlPG?5!Ro`Uzp!STHg`h198Gp zsdI{&2aog6A7_65g`GuB?W33d12`N10XdTV|4E9jHcJ0VlJ8EEkppH#3@!7gDWoVo zK*u^Rj$ae*HsBFAYzTEn92{0!w<#WTm1&h4nEh*f(BJt*ap;WBN+K$Z%zi$zdEPYJ zwAjrb$=a9@stT=IqfVB#c%T?sK_hDxet;@%tz1GvGmQbBZeulIA zHE3hHZ6x-%b2;LbjC+$GVOAxMtfu7VRD-ja!--))xQxJ;(G1d;$5ai6$**tTqi;5& z$UK|30>_sOhE=-fo}f><%SH|He&B$bTXv~@o-Y=C2*6A7bQ+Mi4nqiAOt;hn*_X%`|%XeSSX30X3>) zih#LvvRpB#^b=}3GU)U(&}mE_3*9jqEmOnqxY;lFL%>@ zO*NxUylfRmzg=qhJ%7(hy3C$tWz+bI^R`8@JQHVN(ss5ZA#s-AH-WW#7Haeg_4UkK3@{ig zYFUr=@8Q=02KQt4_gigx+F_~}B%E*~Q7uyL(Gi^2cR1l}H9{ORC6=vV7UhT`R<*r@ zOQVY17l)CQb)q+qF;31*pJY(b!x&HYl+ai2Gon0b5D}i3O$!z0Nlq$H|G2OVo8!Me(9!XU`*kW`mqR+2~}@R z;M^)+%I?}kJ{1_rH4s18X{5pN?q_F6x=%AsckRU&>f~y1vGv!(llby)!&qc%oIyd8$jQQV>z5ny+a}vtG zvn^?u^oV&(pHpZhGBC|WTVbyfu$rvCTwiLGW3X%8>p)z#yZ2|B4;!Yv1g9rPwd(tyP;71FlBTPCpeRx+9sW+?@wjWGp_5si^~GUl{h%41J+~vhLk% zYaO5Gxs{srK)2eqL5%`${ae#9FI2U~CUoJu_`-M2iS(H)sZ2$j_qr+91&d|K^fLc# z!B%BLnQPrr6;pM#JWs1AXLRd?%o+(&E9VFp5|yKqi6&#ir~Cddr?-N2D?`waa~}W4 z2r2k~Hs61?ga6|w|G$U*f4dA^SHcm&_Pt`8LmylQKN{AKXT%!?-7z1^qJVso zz_c50LmQRVwi|-$qBW8n;dW>`^u)ngWLn~Q%Eur#m9d`+SCJU0hSeG25y~+^XH5$* z!OXPXnCUu^_GpY9iE5(cY`JD^Iyrd(R)e)oN$xs5Q>C1i=g-&bU>~a^&zQC~KFiI& z#2hd+pE*xuJIGqhRN7k%B?Gca!A9Pi2g9{xdUI9rch(w@h2K?dZYbMCgG%bs$az># zYo?=5_t@?1)1fueV)h(&YuhsrH>QT)&`f;m;PM4c4*@X7Fg{n={p+lh zHK&vF6iPGs6!>nuQITz#H393!JKb)AHG!k!Y9qDJN_NaNj|p99#vQ!dj%FRry|r}q z7fad1s@y16X9dSDLH`o1Z9ho}AYv2x9?GsjsDUqZG-Uy0*<9))N4-!`Ys`%5zCeORu2Xh3YP@~Z`(xQ{5hx0<3d4{^5%RptEOv>T9$Ogsm-neYXSh4w(8Jc z5{P^bj5%PCBe72br$D#8M^5+7rHDeZn2;v50+eRWkU}Xg2fD)v_%={I79x#SvL7?C z%$~l9c5JMGiZtkj_k{@!a5lsk+9Kmg#Tp`3X$&Vs?O-%UsSu&A8zWKfr!-Onie{kd zutEkh_$Ah9kX5mrcCQP3dkA8QR23!G=bJ8o1p?(w^6`&ph8dPeMy;e+6JHz?pV`@t zGu;Z2s=i=CHqxv*H*GRg#rRCd_6H%b1}!AHKA$kfciz_DSQWUDgpIkV61Pf3mo1jl zA%mKuLv3#QT6hy*(b71^&=TRe>Zm1|QHjLb!+0~D__7ky3G0=V#Rj%tzGH(*$V*Ur z@|t{-^NsK6NiwKb3SbQ#R7vAKQ7E`+)@T!#$;b>thTM5?dx(!+uv_@efFp=PTj+mo zS0l1y&!D6+Pc!d<9*aE6{DU}j!ksRvFsWM#;0`b5x*^zjM}1SGjwee~Sk>$WeeDBa zX&N=D3-Y{yVcilrAaIw16`r$gyqBO}Wp9zw79IbtOnc%ZCagIdg zene#S4kGiG!r4WdjT?I8RarzBJ0OqVGb0Jgo8sDtD$25z6#V_Lhk{rgY(S$9wMf)2 ziGdy>cq#0@612#YS766p_??I-Aqru{lnVwo9RqkftIlnkUHHK9Aukf&1DfcM5qM<(@enM(;HSbTxG{*>+P1w3MZ zN`C(~^D9L61H0iCEMww)BMJ?A_bGUn6=;mn$o@8j;19%A0GhFRSc+?Ci~d?DRHPJ0ARQv`xX&;|U+TM{h^F zPjj7mZoS_f_ulbyeS*SO@1M1s(tOVfA0Kjsd-sX3yHAIv>?9ww=%@;J(eQhT@p>-{ zcZ+t>i}(>?{3t~INJRZ;=JVRhgLj-3^A_^j&u7o+p(!65=Op|j{76fEN#^yW9@yzJ z#r&wBK65zEZ->wDL4WbP&hbY!=W(1ny#4sjW^Lcl-_#&Jv!cFozJ#Ve%GO@=Y~SqO zDuidlmKhd!|NXI{XCuSJ;K+z*yluDzH&j>+21|F@0{&{L)bYw5Zi@Pv`O%^hSz+=P=Rx11u6&9wX(Ose6e}KjsutwygOT_kh6jaUyYdFJ%4b_w}tbrzfl)^vJ5=}Rzw>TJjd(inMbM4pwSU#*mAQRR`~ za~Uaujva_{hE5y>&SFc5$hU#j7lWE>Ykf@%+%hwju1xFLMuc@L2MAV{R!kc;7uUe8 zl&|@K^@=ltdmEXJm>&DUiiO>>v$uG9=(TYA1Qu@tn_GdUv>Jsu9y6ZEf)|X;Wsknq z*47pq+e)b@FA&6uorkjg&y91YEt-6iB004b#I-%s(gRpYCzp^`%Z;b~Csh-zkXC3- z2a<)z5l}?tm)smh58-)FpKBjaYx!^xg5h66P*1HC4pMxMz-D48BYf5<`a3jjK;{07 zP{}jF1sImfsH$ffU0~IvI3g+T*h-Y%G{4A{X*rJ6AeJDNSRf80DxqM1k#z{c^s_7l zD{vyoY~Rq3{NRF@^A*&ZxkpKqej|p}W`98;nz^aN1n`*Kn943~j1EXTeRVMe-@uO; za*ZiYsk5vmx{9rxUCs@Lc8EZUP303l1`{)&BKDHW9a<{XFICw}^g}J$tt^}%@vPh&v zs|)dLLE>LL%j&s%j5OoSkk2f;n1olbaj&xuvV;#9rIL383Gf-0oU-9DTh;)j--ok-bw4AgNPA*XjkXc{4LS~eNAhH7F2TO z+P@5r*VNi^B5M#ttFel$qxLE_kOc$dh|`$6b?KQrY_3_xs~Qz8cl=caTcE?>T(HTi zt_yXUBfJ%w!3-sMDGV78erveKG)UtREaEzg^WX_`X6jK=EdJ^Usi3+AMm+QAlx|(H zVS<~>ANrNC=@BOV(+jdAfiU6B?IU^u|3$Lc1-l7n#A=jOJL(+4ZL%YRAkn2&N+0oz zmtl7zH@-ETCu%sYklpu(sc|zY6DhV*<~_(#n*dcg;8W~OA)}L%!YN!O1AeiQA%&4v zH5eJS5Tz2aIX)in^fb3fkm*Hwlf*gYr7#Gd`5_Z?4S1f!*)Nx#Wn|0CPXde@$NuBX zke#>KGu1f>7Vnn*Em2OC%3|ypkOV*&;~5hI;K#qBiI!J(xkqTU3)v}HxQ`f&irC8_0E@d$@VE^6VU-FvwIr7oQ7PFcFj`-N6orYSq-~MUcejO)9o4% zv$tSDO5X=YTn{Q;$CS62fp)aD61&$1f{EFzm0wX`~e+Ft#_NcZ($~h06SUl2i-#TX^nhT<_N%;Y+=&J_nt+UI5J|78{ zc)FujGQU)8W?XW39Y!01p`vOwm#hUV#muqgTpi8M_{NYKotsPi-HO-Caj>AK4yy-3 zqwb9tuOt@_Jz$xOMKv90oGxF1<<9%m1r7HkH@6*8Mm(*O5Rj#%Z>l@{1w39f)P9D% z_a4)KU)3y`11aVT(FBSZkzG#ulOF&IyV=(141uH&f?c|&0iyb_2X!1)0n3o{H>o8T zlMA?LmtJ5Fr2xprkd915iK24yQZdxQEAW4jBJI_!&Ic)+cUjBSN-lJ9leir^A6UJ7 z^Fq}|!>r%f6bfDFclCoFm{Z^VG4gLur18SN!P(&0}Z!PL8qBd(Fd>;|Ng^2zsgT@!u!7{2p`LrN|hC6pbK;kADcP?h>^i*+ee zFz?#m5{c*)-GlHoysk-i=}@lvHtcTU8 zJHsWsu=P$#H|M|6`a~$e@Jf6po33bSU+-%zzqe@_Q|&-s#=cbDjAsA2psHk=y9YyTJ{CYQ4wT>8f_Lp~PIq(5?BY zw4vP)t5>7?FOtMvgogD-#9}&PTTmY)R)vg>3~FILAo&lM+XXZwFF04$(Hw=(%)GJ-H+uu{R^;vX=O`#j_dT zt$TGdon~7b-RI2(EVR!V*i%BsOW>DtUL(`tAd2R_;q6-Izj?qmrqMJZ z=D0#7d`*B&{m@KDNv4b(O&z2x4ZN`hk*sZae-DRQ)b?35A}mG^9`T8hB08LV^bb&^ z6iji6+$j7l25&S!fy(5Sbh=t{2#)+bHP?5xPJmDQRMbzB&HFiF~}N#a{`E{Dfri2to%Zzl8?v|DckQQ8W!YP-*EoG9EY|7cywtXQ3zqc3@2nKMxYX2z72*O!OagN_qRRH zuzC>A!O!wL9QK)j4l=^lALXE~`p=;0ke?{79Noc zQ(G{kaiW{sKwaTFuMXDen75dkTGsdI^!zWJuy*dM1MOHEl(lJ}l@s%y3~!)2MXmn1!XP_pvWHH2Y62drxQ z0}ai1v=XuNwy+*g(s7iD1=qxab8^W!MV7GGdfV9t739lF2;`dmZ(!Yb3@Cd+mQfmD zv`lXM35_9DF4NGrsS5r^{aBC_(43~ib)|g4YH-=PkqZM$^{2#$z|^cc;|2AAUC9s6 z`>3^s^ED{TpUeL%^bfQ0W1bkPzAG|%^hH4K@5y#h zRUOI8wWO4KV6sh0Tq)(V=6#5!+`>h)pH?}oxyC1)=7X-5)C+};5yXutQjBeqEn}}= z3-e<9wHi#4zeyVkm zi2I^OBE6l_ZZ!N`L8x$)q)fjr&p+*opY!By{gB^jFMaZG0Nt#!3!|GQbP8XZZ!btK zPzE^FPgf7l(F^1@N9Q=$S6J9rc-U8%*jrbx%9eAiY%k9Hkdze$Zl)N{#bE_g^0Y1` zqr5Fh7p!5pqcXrDLzWicx%{L5(Cs^)4FLH0KyUG?3u3{s0azIoDKOT@29Ex*>8OJrMBOy+^y$b#f zD2aqg@gUsZmQYv6q$;I)J;5t|8zf$djoETb4Ci(eqi{-CF$>BrlTTKl7^0Rcii7_a2q) z&ApS?--&oTd+Y-l%rofhh~2d?QldSAXT>s#HGS-%{U!CkZfI9Fnh<1v6nqA_ z|Gq8wKja-53wIM^RTD?29|qKa3i$sI%|LC_3t0r^TL#cz(GcGvpRY`+Eh9Nm=nqW0 zD#`kPG4_^Gbw?>DK4)V6$ga4<-oy}bUuyt?@A z-l0v~&a248w>&=8u{K&vWsP5Dar^xJhl3)-Z^Dh}LMX|#nV3Rru9C1m@U7oB;9=J# zVS4y5@U*w<%m?hz15RL6Xr544fEHC;SQv6Ao%!3N)J4P!UBfdQOw!moqk4%?`fl2m%3CvWm_&YFsr{Yw&9uGSjOqj*#@1 z9sOop$oR+X_Nk#ps0r)h;dtU8@7Szser2`|77t5n8OK%*tRhMchdJ!EJJqf!gzco% zgi>JBx<))jrcVzU_AOisRBG*C_B#apX_;%0X(T`Xfy@eYv1Je~Q zliDWPavM$+4&m$!*9wgLJ$0)huxDyK1qC^n`FTT7+3zK#MP(p`845<=+xU=VI9%c> zt#C4KrEU=sFidhmwdZ0qlFNS(ot^R8f|uc}Ey#*Mvp&z)VWE+xfyd~B^eB6oR+3vgQ!_07;E+?d9|F=9WhSyr zPCUgXS973oA*-k8zh`9?)dKfZ*|TlP@(eS~_-CfUVtUEn$ObD8p7kReXz$MVi6lP~ z4rQ!0Q9B!KJT>hC^-{o(J~?(|cwH-kevUmu*Y)ouZW+|MGd^NY_m`G&+uzP6x5Ud@ zuNeA0z^u}Rg7!v)L>XPrg`gUm!1(*53rqtI#Y0nzrcV(kt&o6u_iL2JNSzsancsZO z>3l*hYmGT$OtJ7p(P@AB)3VIWzwnbHR2e=(=FanMpwVz3BkXcol4+BM_zRuW>OErG zW#61kOGStKT#7PAOD-4~`HHv9{2pUU{*?o%Q64;s=7-S4$~M#I_?IIQid4|(-qYiN zaOrg|%`T4?d{1lK5l>&{nukJV)W*;cJeS%1Wp{y~!7W+`&Q~$NHTNE3YH`HTCCwSK2iJ8Jhta zjZvZ|l{D-ka?gN2J~UI;qF8TkPW8|CP-Gve3R2vTbVzwJRm)2`KB3#MJcRM**pTMR zlvZ><^>i&B{A?c)?Eoiqjug1IcIj}_!p;hl;RyxY3Z1xHq=g;-3G2J49P0yq8h6iQ ze)+=jKepbL-K@-9{^4o+58p>yRaYZdGf_)36B{$<{~G3NdFbIS6MP6rJ4)#x3~|6H z*nGmsQDrGxOsLxI5pKSioDGC=!2vlazI<1(wRjyIQA+YtyUcm zTRyd}ud|8)SDRnKU<49(fSaA$Z$l4nOD_aS?+$;ze&M~@hAwC;4qPHLE)K89kcQxk z1{7H{N7mzF8|nRS3v#7rVIr%GukNdcJ%`rlD-Vs*`-V+9cq%Q@mqfM_SPBg)4Um=*_2CCm%&r`%MpikIB~fm9i~*9asco4Eo>CZ zB^|{c$%f3TfDK-&BUPY$HO%vG1c}FlBX-RlyO|URfrqpG1{0&4=^p1X{N>4pYq4$r zqTUwo(7ltr2P^nR)~Hf1I(Ts6oa=G}6-HN}(LM5X34*>0!Z;r_2;Mr^%DI!Xnj6sr zy(@xRGSuwqjT{KF{F-NEla)C9jsOp zHYKPy+Sy}-NfM(gosWAMm?WCPJx0UeQ&8W~G%bxf1sEsTZ}~Y5VJ8xGtVc#b#Dt$_ z5Q?8eNd(^Pnvw71q$6kEy`XfBX^fxT-sBMx*-c@D+OYXDqW8&7KI2Y<3t}7kU6?b* zmLEf^w^3;o@a*vmtv|uh>jyrVVyH zl@CQ)i(B^?3SqXKgJC<0M;u;lU`B@8hi<&M+H$pUgP<`{L7#m~C$4q&;sUT0$RAXz zJIH&(5H>m3X^^`rm>_+yK{&Y!*ot|e_NW@~2DH`^Top>77PcfzrJ@Po+Vu>-ZqXibvF*i5YDJmGVc}o{Bx2 z$BrO}vPa6E;+=IsPw-X6BVkYN4iq5lS`!p4wsJ+pYZ!%lbUqGGp$SBch@{P$hQ36fH~(ovEM7L+ zV3lVWZ#$TF34ZxMT-v~ z;Z!b1cD*3<)5`cFYKHoaDophdKZ(L+=vfR!E4|8GHMu5s2y++1RPf>-f(8h@3=3p? z=yjLHDMzl31#JwZ;KgJly>TYJM(AuwjCL_jV_nUG?0VJ$=GSy1$)?0nk^^Uy$#)jM z+&HEbRcjH0z17Fus=%>(hC0K~!CP=M;{cBxWsmcb?sRH zu_Emd$}wboV(oL7ylGF=*Jr|`evFFj-6H}h>_

  • =|KDIYmOAK(L2#3-|p*8i@nX z=x$!vNLJ0%_Efoch#dZ57IWMHk94?yjp1qXVsz_}!AnnQ;gJyRfFpeKRd-c(`e3Y~ zzQ48m4@NeV>37(d@ZWV54INqE#QFj(uQif(V6W$IfN$h2#xLR30)(+b0#-NuRFSEr& z{G>>=IB{Ra8GRwp-@zB;qVFVIO9G)gDsiAqY5 zHyf+5P&$GtqRJ2_g<$gXj|4)3AIMmO*N{u{5z!s_jA4dn$F+Sl=@}`>aI^ z^I^VNL2)L85$ZrlRX}t%!J>-Rn5iOT1<#Nz0oO+@W_I}48Jg%DSfnVgMINU(Ipl$0U1^II7T zQ0t!zqu?YnfVruK)kS?FBBNfH8h6^D`^Fj)f^?fl^Z^O>G!m0`U1nwbYiP)dZM&O1 z>G@8j!D!3)8}~i9mY-M?$#;w1+^rb&Qu%0(c5T~RorW1_7~M;qf>tU;he9!gc5i$m z4Da&U$uxN>XK{wlUP;RmjV*1?MNZAn+zaEwVrygziVtoX7?fAQ`H#@-e5T@Iy!uX_ zXvz6Ifu8{nItS@Y?_mMESC;+zZ#%iAy;MSlg7q1{(VQ~%_z_z7K_5{6{$z!&IZf*O z+-Z#dWElUijRXfrvwuM)To^?i>>RCZ%}oDU3)y{EM*k{=V$>#8@yxJ4x}R3`TWEIX ze)u-2L{6H2Lr9gWN7l)g7lh6WGMkyFTurqT<*>)hKJ}cr*&DqX84Ski(kA#?nhlE9 z{2(26vdy*N3<)V(!{0jg3YdQJ>ER4`e>uSUA^WH__=UlPILJz1E5S*ef}6}lBQ#_j zmWA&4v8`eU7jOG;yekL-XBlH-M^|G=9-`HBL&H&Ws30gSngU{+MFYG)R8a;@=Q8lA zJe?u^@CBqk-8}wa0iN8Y&yc8taq`r*+b8oLsD6DC$Yzsf=b#|8tovk#J=zJD08wfQ z<9c}B(A?FX(z?OU`n`b*qSP?;GNWel0T=s*!wyz3W1UU^L3yoQs|K{H(4Jx7*aC1L z1=M(#SlDTsO&ura*lJ4ZDYhu~?&yeNYWWWS`A!cv=mJIrwi!;KS1Ah9U46`9HO}sQ z=X$ytUf~t^j=A^Rq7TqvxVx}ka<9z!@hG)JUaaNWW~%WzHO57jSHrVz-#50l0oqDc zVW>O9-umss3~x*wHZONTKI0neG^tVWmBO ztap~D0Y}SclsU_lJ6)2W|j#SO6+F}|j?k8a8?A6@fL%?0#*d5a3kySbK{ z_w*;H@EyDh%;WqG*$tg^&VooBy+9~3v_jb5`?!FTi{EBaA<$ zzC)J2ry<3YIQ=raPss2{=pC%tY+gU4?r2zOAih-#`^yv7YX3_^$$4QgE$M9bN_a&! z^pESSpUi(Lj9$3B(Cg=~*2uklGRHD(NBjzMcrUz>6@4Y>((9<2J=>**v~nj|EL-ujFR@Mue;Z4i6+MPWpHOjN zZ4%Th-?L#W^{RN|DRKJ7ZCW3eNxg$fxW^$dOyC>ynRrtP=ys{9b}aTTXG^EDSuI`HfkaeIBFYt@u<5gYFjidOxD06 zq8`uM4}+;Hk|aC{)YIakwYwrPY>-gSt>Z4+?w-j(@}orhnEHE-(msHi4K zi%kND;Ym%phUmI9);TeGR13{|Gmsa4zdY6HuTM-3%YbtZ1$GB)3$4bdz0ft$O`>SV zR*;3WMaoMH-*QU5BaqV9ug@(s*ToYVnkR?G)^U4V#s_2fZszX>aW+;g^kA2xR7zIz zlhZtUiqf~@GC}m%2Hk>4?8x$tUNde`*0I{SeLd-ft;vFZ@4?oWRsMScYJL>+?z$hw zO!#($nLUE)JLSOz3%Kg)6Kr2(vII}oD$Z3nX4nF0_7S<=Fh%VJ^p8?H?7A##JNc@f z$!j?*#ChI{m?H`_@8y{-WCwD(V>+lWtT4^>=WoDpv&>NJJyXLvc`l7~hK*c7pX};b zgNc)wS?MZx#oB{#_5;@g=1LR92lk0Gt>2`;89~Tk39o=y+|)AKURlNryn+cc(5Th; zS~cS^L8;gV^B?wEYig!w{Ah}>zFtb{Gi&d|NMqWK^`z|xGpq*(jZHZk^Yc3DPolIW z9&dW3kv1DnJ~f=?lm;8ci+k0e&iH19T1b^?fzv}{VL-da-Wh2TeGQ~q=BzOs-rMNS zb?&`NyqW?0w^@lHG=Z@Odz#>1DmCgM{AbgeF+M=UC4X)30b9o zqWKA(-Ahcp9g~SK9?CvzfrKqyecaeHG(Xzd8MnB1U?G4dNq{Ttt2b-bZ({8cPa%ffa-)BvMQb z<^1r5!DB1=3I;4LJWyMm07O*g4n`I9?`Z#i>7K0Vu#ta?2_`;I-2eHs{g;j)ROR!u z31jom>uaso%!u^CfoagTz%@iZ7Tp9^)EfYRccQ ztR&*ESw@vSqK3HNdCh0egfo-ANlN)A|Iy*|J#O)>pyLiaejc``UhvXe*ssaUoD56m zt9r3Vr)e}RJqz2#;yt;Wi4i9zdx3cHn)vRLE0e;1IC0On6(?P=ru1N4@Rm-p44*i& zUmJqT_%6MsZZNtm3wjb3yWGBR!)rn~|K<5ZA5F8~&=>^*O?-v&mI;B6?FQNhzBnP* zeZBR}N@r5wL5^=o0C*J8rBai$v(=)|E?L4!IqD}>gG<9G-`HiVRoa;PgM=cTW-9LX#3;N`LX`=@4u#8a z1&c|IuC@h|Nl;}ME&NF#LO*}`V+vNcp(WgKX$l$V43A9~Pvj$`K$h)TW$A}&m{xwa zwC$cD#nqrC6mF^F~{%RtBb){0ll;JEP4tgf)bfurWL5<**WIDSJp!h)`}J zA#gEM004mWH8fCPGVlpMvg~mPs%QJLzcnTN!{t)`SxSep?#+F={n-Eb<$e3%t@kSH z{058A8~wM;V71Yw#pQW_D1bqwUN;!ei;)3yKXY)Oe(y#DfkCg}lS2$3bhx{@amPrr zDiblN(z^r_c8LNF^hnG#t=nL))SQ~ba7i2yYVP;+{q@2CSSk;mdRCMD&+%d=R8xvJbZZU0cDp8MHBK*yZ)@5;?T6v?xiLYw{&Eg$X8PZ z>$q}CK7J2*36qzxC9LL4@f-yN=5tLG_vVSR)gfqUR~H~tf3D)+(OkTHH>ORD9F~Z9 zemJa49Y(>8IjyLpCmkV+7CO~tsT^#nI<|^P7O1Y3(=2o@D{|Uwy6H(B7z?SbrFo=e zdv$?HpZ-L0VCY|IO;X+w@!Cnk05-}8)H;y}_t-d8*f;4}-fn)ktNG{h-^+HAarQk2 zP#T(P>deIgdOig=3GP(RX8FCS4h`mW?^VQ1o9JFAr>l<-bg2nG(t_3jdM zp<0v5W_hhkS@`s-rm0Z}9zh=+=$?Mwi{5#8?>MR@ZZ-2HQoH1B^O2GP6ruTt1%6B;%!#vG2uO;N1FPUggHhwIvywl>+kiB2%cv!(-e8+K#^Fy1eR`QfE-wz(lX>>pCvUZ*!4H)yKhY4d%#WFDy7;mz~X zQgy*C+VPG?3Z2{C)7YAXcV@wFhNpnu_f#I6yPLMKUa_zF$iAz<$VJfG=a9ahc1a?# zedYQ}I4!4aV(~<=TZO#-k+&cBm=r6xuZJz0+cR~G3pBLZWo`(*B~doFiYy(#_e zDGp6zz4d8i+nKiZJhnM&7O8!0s;EHX(I6xSUGzka1d~Rg=Z)P$-G7TLL1Be8vbXhTz)hO{IIir+yITHi-sK2fP>?Y&HVS@=Q$4>1`RwXES2Mm|$ zbse_Wohoc;V>ek7sr*l&AEGd`xqn0)ImVW?6`sRJ6I}yt5~ObzKzvCLxiHHIj3SVv z_D85ihBs~K@`AZZG^+--7pyG^7}Frn97N(!wm>4Q+fqFm)N*|g3F{htEgzq3OtwVy z*yy*9>w(M)wYrwhc~HKuL?2qx{3Uf=-0r;uAcxHbp~dt}Zk2A6+FzurE3Rd9^?@VR zAEWO?w(eGauz2T@>^A)2_v&F?{G<)e7YfJdHgu;FQIutFf%Qks!!pM|Yy>}?1*tH& zLZ@Zg+rE_Y;8Fi*)*7A-cX#E=ygJB?Gm#x}!CoX$EW0HJ036{?hj-REARk)3U4zp~ z=7sT*UB~|R__CH?@*FDCq(Kn+A#mv~9i@&RNbxHeI%Afcz7++kxe%@NtFt@Lk}Mnj z$=(Eo1zugk$v{;mtB)j7Z^g<$J06D1Df#In;;HVV_+eYc6H`Tos6EtO5a?GXm1v1x zwFUaK>u;)jTv`~9X7DZ*Fl}}4t_o<+ln_uN>(_aD3$T*XFOAPy{e0gnc$i)uDGi@- z7{yVB%$g{VOXYFx631S}3*QTr1DIv}hm+%$_XSRoAuj{d0;B`qxU7jzMK6bUFe0j8 z+L5AGO|IkeBX`SM-jKYkX(dtG6bmqj*#KaR3CuHsQb1>(fYwtX5fkJ8SI7II4hoaAEpbDWA+_o=P$+Dr2VJa1OWf9XijMe|A;GGrea^ z816eyX4@fx)D|3bbGL`GD=crqy82ACa4g`$&QqIUH5|z>cn8)@_XZ9E?KtrE5gAQK{$+P7FHwA^Jh2@M=9QkcE6Ee zA4yTjwc_fTPJbsmLhxN^aE?P_z)V!Y)bTaF!cMhC10WHz1WBA)CU*FeYzxz=Uqh!7 zh`B)MG%?ea5|0ZM5eT^kovka6ADcyg9NH@TJhouPv<1O;CqA{o@;b4HIU?-)(lhM7 z5b6!Lo;k7F8)kN)JnZqW#APz|r-+_&DDE-vg?8S+_x_wP#RhR2GjIJam58C*ljl-Q z^y&IlePi4*6m`#LP*r+k%pU`czdryro=8S(^9@W99U~H5r~}D$s$NtZ+Uu;0T(ss52F9#Z+3*RA^!awdkf2xA!gfsZ!e5iOr;}xBogb|?F zobDY@3zKL?D20Nm*5VdhMcuZ)ww~pXB6bqi{&Jmw_!R-iwU%94aH?j2ongjMIeU?d+Pb;8 znmJq9Tl~{m^q)WfRryY8o_XL&VtiD0FUeHO7qr!*C8;>*qM1ICsLHAAkvE%A*`agW zSH~MeL7y#3Y-yzq)EumF9m#toa~`7HK~H1K{Ge>cD7C^}6M_pAQ0%?o^9AMphI0Pl zBbjK%+p_N~Drs>vmF;73Y2(l8b1$eU2$x4tTWJY|A-e4U8A%`oz*Z+A8_Z(k#IY9` zh$$ql6cL{aXXYE2+ppr=9S&z6U65uhLH89Kl)PcaAR4A>dm^v&9}5S54!^>0a4>QQ zkF^f(U1ry3Dd?Pu9b5k zAQh>k&S$gfEjv?y7XrifuIfi8EU|QL(VPlm*_4+2%Rf*48ikxft=g^}ojf_Aubp=hehueU@c0{Kjd+n>8De4m{3^E!Ey zjfL!qcJ1+c)d+vGl)4aBR2>ax_fPxm8a~n>*Wnlw@_bU!#7L7E?O53`O9>Wj=X7{j zIybGL(j6?mDi#Vw-PEc!o5K~4^Zd}I!GYyeWK)W&B``zG#lpO;AzkW$Inlf*Wv-S7+h_aUn=qqt_lFT zG2>$H4CU*#ko#Fzz-%_*B(|yaS^~1pVEJG}lYcQVeD&-|s_lFHsw>^NxC8{QEnQnS zxk*0+L%F9%y0nv$H1CK)dA;itbM+fXsUC)xr5Qhh&owJTXPqu^`X(fV{&2BF2D@(B zX%11AlC2Bv0(Wb0(*1m4CNLY;pTy`6cGswy71qCeS2J%ic5fLsXFwa`n2j%#>ernB z%m*qY$ceY6bWWRJ^rR54!zLaIjmK;l-%6dTRE>X#WTreesl!CMzPRd9!OyuuJHYZ6 z{`_RtzE*h^vSE+)(EXePXUr2(R+hEelVdCFC)URyn(>J7$EHTYF|k>THs}7Nw^zAD zej&q`xfIiwn*zG*W9PP)Fa<4hjlSqG?E{RgV}Au81Yh+o@-v&>00l%E`zz!$ji-Sq zz9!o%W^ds&V|8FRH!n_W*<1@9lyp)$*Wn99R~bovx$oEB(hAF^s4`I(C=&2@p)UjH zr0>XA0B^6oxR4?^N@zsOOGVF!>wIm-Z*%ql@xR&qtctk%Vl@s+Sm4bg)RGFe?<_u2 zhdP~n8PuNQz)HPFdCtHg)Rt-lMe-9)5#|PO=7Sk8Jw%NYN`o_}w3$xKKV1<%(+-_b zDwN{=gpJ_1$UJskmrW|pK?x%}h9T;w8cnJ8<8OX%NbdI!_-B`A-My8)XO7={L$0kE zV<2IMdOTSEqY`DsJPmY38PR6io0MaGbI@b!yX6a{4vbtAGG%=s;af=Sdm)% zT@DOr%`omt37RM|;dP@Dq?5Jm+lEJQ?@;Z5B2S1AwbGV7I2aw?8%1`vZsLClVv?D} z$y>*rqG8g$Ik|eRSMK2}0A_aX8zl>6`=rs6heu557~WIOTx+(Aeu@zc+PF4@`W8$4 z6rM1eERPedDo?E80wcG7973aro^N(%yocQ;dwyDq@2A@EYWTj#7HKMTNTS_yju6=DAVgV?RJdlh(* zK=^`+d%kp&%|fpdh<>i$lxxibZBev2EVZ_D>lVOevpEz_vyo4 zCYWjWAwGXPiNX4nCcIsdl*8haOhPdW(L*3GuN5XfKSwD+iLj!F;K265b15x2Wt~m~ zKZ=wNg**p_7-7S0LS^so5SLBj^$M;?{JYRO;-})h4vLC$g57^&*_R*N-D-KjW_m#} zYYb>55QXct>*oNU;Z}%^aC2~<b`9%Lr2Oth|!tPV#W z^*mlM)>YOr!fYgExJi2a%j2s zewpe#0OSfP#QM}kd&2U9OtRQKt@>6pgnFc%kTmk~ehTmiX++Jg9$p z-(NsH$MRiP*;CIrjGO4*ufw^We6(82g=pnCu`C(Ko6s#Q+U)|%`E>4} z8LK-vL#IRc1hxOH%0e+JH?`|ee8H<+zx5n;tuM$C8C<1+%SIt76A4|C-X=#k%W0u` z&fCA}LyX?NBqX{d5j#DvLK|tyMJ$2Ex+;Fho(!1T2Li#Nrbl}IbI*+Q?=(;0Cx7eh zv+?$SRfwqCTA7&r_d-O&=5w-&?r&haovh$l+9u%5CAMao^&_1AlsTQXV?ZAI*S@n~Lo#2@MOL}ubkIdxh>vq^aT!vk( zIE(h?Ur(yNiz|RxL~LA~13aJvR55|IX|>_%tE@Xq%Dv1)<^*(#`c&bvCQ31!SkWhZ znpNh|1a@dq7Q>xK;-&g?q~$DLz^zqD%Q59hTOTExg0X8yqVI#w7XGru^hjBmiypY^-NyOee5 z9g=PR#9Ij}l;!4`txL4^3cYog9#n;#Wc<)AYq!1|JSw6sHw;SrtsUoTdvcKsr&cC5 zn$XS2Y_z(%`an{)dmG85Rd8p8NoyA3fqi(CmY>Xsd!&2dNEls}eoOAlR_Q8$M>qc*No+F^>8IxkWyyCKc3{^z-KEa4!M*9y2y!23_&@fxyrGqL#Xl zR`ssn2Xf!MJyBosT@#GBJ+xD`xV?xhW1_v4Bf?^l^8pEF>X{-n==2dZ>;uVil)zj>U^l0oNqVq6)FUlGzJF+f@ zF0Et{0PMmAlC?I3O0)pyNM-fY^Od($YK#V2n%M!3{Cuj#CI*$py3G|XFZ*PNiK2lV6bslY4nnVT zaLsL*HLvYo^~7mwcB%8ZjV;kG^Bu@NPyF!<2cj*Fy=LwBgq4m-GLS5*)0goXpcml^L3|SP}GU5}lDeXd4K$9d3u2BnF5%t*pmD!trw1@Di&$=zU znk*ziqM>H(i4`K0shb_PCY0|pC%W4Eg-LW}j{oO|aHv|q-elu)zdP%=MP$M)#MgS8 zu`Ro8$3I~A_fX!q!#v*C(BQ0Q;BYu}tKMiKtEUBMwiEi$X<5;r&lmco3)5fx_@13% zm)AG=>W0ey_x1n*J^QKou97v=r9Rb+WDC~)i7;X}kOaT)?u6*jVGRGWnCyhIQ^paxGbxl6qBUL1o58$bG|;2nZyKx!I5nrbFH3 z;4Yo!Kju4bZ!P^E5bz2{9Ak@(dRg8Ur;X!;gQObm{Hp4Ad!xw$LLWa&&J+emJko&SnS z-N(f16Rp{PpDdNb#W>3J=Q)|%ZTz!crv zp-?x4om85O(Lfl#Swv19&o(qe;=`J$Vu$ZaAkKhyX{%glM>+IXe!;8St%hyY?;MeP98j@(?4KN^bKJ%?_Iep#3377Z| z0ZG`psVLu2kMcW{#Oo^?k60*fN{Jw`Cv3#AN7G^4T&H_#DDOhxFbHhaIgA_p54%AH0Ibj3zVRl5zl!X%RW1pYA!Wu)*oq4#{6JeVQG7tKb$m4$NDIqTj&Fw>Q z(!;eWLe`J{&kH;=-3&VAGn_p?g*+_(b2zJh_7H5%{#QTYzun;3pHu&NA$0zIE+_oN z5~K`N>OaCi$o)bjwTmkN>U$`2GHleStMb{(CZ{|J7izF{S~n1Ndr&KNUtrW=$OV4H ziEN~Id|eAu!Th_C-u|%Ra(RFMxg62+RjYp!+?YD!r}5Np5ik@wTxy)Q(L8N=NDZT( z^y@VQRoHEVh60zcwGSiiAcm)zPY%YB<2IH6P@-pTFK3yynYVvGaMS%Q@pkIoWGqFd z@HzVGMuoF3TV?TYurv08El+XAB5G&X5{gEcT~td$V(I0@YdYXCEqe zta8I6%x3U6d~VgsGz-k4Xat2YrA7(O#IzzU5$J62vzP4I8BMVm$0fri+GKd!P_*WM zre_2I$86o0t$v8;=5kQR`T+tNE!4wouimOq)5dFy8pqTv?n4c#GpKyK+RQ_$@NCmI07f7?_0`sI4j;Ri^-P^a9aP1qe>$2vto z>i{DS3Y50XgtszTn~UIg6{hdEmsG_nt$R3r0=;+CjG2+G7^LKc-G~IQ6;sDte zQKNH08j;+qrLiCs5Pw~m6#m*RaR1R%&~jnnQOi zEpYzNn)n5O2Zky@^FqCyAu9W~Q&h`5qKvX!_a!E6Ge$EdExkHz3_a|?QHC4Ej00~@ z$FjDr#yeU2c_O6+r_G#xau4uW3I|RDAB3}gxn+=_xLTGz$G#9r$}vGqJ|!K-fN%sCz9 zYQ#HVe=gRRNv_~RP=qANHP#d~*MP!W35dU8$63%QD-XTJW*;h3?w!bMC+>z6j*xtT z)-J^FD9~t?ydTm>EQ|Qe4=}#CI0zj~nEfuJpGceb(iHRI+VV_959(@0)=7Iz0U!;U zB6#ejs_4#)yFe$1o$G+=_Nv;sd%w-?u<^+v(8P!NBu?WkGW7fPLO&O!)^o~?+!0w& zydA1)Rq6nmh9nyakR{TVSmMtW)5JLD>M3z@`sgc%gOt}-EbbjnI87#6rkk%~RL&O{ z^zi{i{NN3pxYFMM$>WRG>7`qmJM1L7^n+Wfp_}__{C#Du#Xk|D(kr3TRp0+IUS*Fx zq-mDqDLX2{_B!h zJraQXJ^li96tahW)67?#Hn>C1hv>{Cz=oDg`lPHBcQC%j#5SMlntibZ3UNCR{jg1V zgO&ri1LzkF71>OwV*yYg_+cVn`}k7q6e4Z>~?S82D%Dtgx}g;!}i)7KS|f>>MuBy}HigK^7QKlf|(+K2+^^LkM#Yomsd)LyI6;I2J_ zF0UXm0~~jWZbSp`6~Uep;69=`)n22}bgyd}Ua>!p`c=Bh_qGQo0-zH=T#dNjWd{Q~ zXMSMmKf?H*LhK$JXkEJmUc+H+^{<0H7e|G5s|MBX=;u%R>@oKt1<4!`N5M^+s!JPf znY!eMo-?S7k`O9DwS${Scv=EdV6#}T)<$bo+M_lOlt(L7Ao|+Ii}Wm%mjj6@0KQUSvC|}VOj*I=BldCG2VdcyLAB#j^bfp5RA?7B{yyl z^9EN|#2}uJF$i9jTeLna13D(4@3m{Jugyd+WYF~ivv2Ns*|Fso+#v+7+E z5wEY@icBKg2k8hyHdzGo*kjN>x{T5V@gNz=*puMhnRr5yNBKXNY+Kl4lW>)>$7D^e zJc^*{H9hq7-#sL$Kh0`LY^yuDydQeX{po5X-xz}C5~5H0IqO5Vc=7U&_4;gCC5`cCo>-}AnPR2S-jW8h@W&zn2xk2WCioanH7dn2 z0{ycOChzCUnX8;FeU6mQA?np06%waFrd$|7O`IQ0GR@#zGd zNYXm^Oh=}x+too-VCe^!T70-Lr3EDXHIgj0o)Sb0XgK}19nT;zp2T3$`nFGLQ#~ht z`T^S>hTF@L)I}@>>CJ9g4=={}39oVR`FlwR7**IGxidYl@&r>~Po=D*TN-?5gd__N z^tK*R6_{lQOD2W6za8x+)9T#r^OWwY61fK@m)t74syH07ljLTnYF31j>Osb22fw8J z4qZDm5aqOUQu{_Jud8jgpC@1=!9G!z$ZDN%G*C~1%1K+*tZ?A3NzI)Vd7{s(9HpmHy%^LQW_L30_$_DIksr}| zCDtE-l9c*YhZm0ao#Z06o!FkE}K zd<}ze_BSdAPVUUh5&)p~Zl+c?k+L4Coa4V%L!ug)5EHs{2bz*}Nf>*zv~sN4x?;I5 z-D5H%m^wyc|_-46Jk=lL!G!gi;Khx#id-Y=yx3AQ|wtbU+ zh#hOnMma^1 z$L)S|Z{ZeUW|X&`p1uJs|0D6GILf_AWK8)|^;Uqis+M+(9ShE~Z?e0i4G25sc-a`4 zK*v~1+YugRu5%QSCnXiR-@|yXU+>y`<3$6Q>OFT7?^xZY?@*YnzJeVNcPrH86#ia+ zlo)(5b=4c3pJOOVwF~F6rnH%6xhHTtF>Au*Y` zhU^hah7U>_`ouL0p3>9)Nqo&)`F|*T#~{&yG+Xo(PuaF@yH44*ZQHhOyXur}+qP}n zuexV?-kpwlH{L|#-m(8?Fcp~vs;l&9PrAT^iphaEk+Up?J(LdTUtNbV_MQ2h&apUf(vSV zE~)cXj2JGXcx^Ka$IfZ>`LGC6#e{Dy+H#5_mzuyGI9R_=E-@1}`Ad-sjuj)AYmb6Z zq-$~{j+P1U!g$4>`(x@kb>rRJ`}jO)VBZ5SW-o)a`JuxZEH|m9Z?yh4*rA50f(z0@ zU{Z|?n&*(p5*lD#Z3S~mVABI8y51Jla^DtSeZiL_8lf*hl@KrL20F2c4I^6*X_#y* zTZo+wra9@D8(^5N1LXoItz`zTCP!#6fl7uiFbCT9_8!7pV_dyL0DY+dL8Y!zN;TNn zvQKHojyXCHL~dxK&wC|skjn)PCXX4^7C2TmvXT%S{9BcXh^tUk^kswue;1lFnxP`? zg>%({B^iLq-w3cg@9&9Xtq?vk4ze9`K+k$b{O%lZ!mU+A&cz?)iLvh<>%TYxhXn`% zsm3W2)k)^>MI29=bP8iFUwL7yBU7y1Q(Dflth8K`d$edHkJLO;>@O8P%Bq843pJH4haqL~2i%ESE$? zNVK~Z^5Vigl0+PK)uIr*o$CtPWc#OVL}FPDSijHpT%Pq)n-8Qq$YpNdtS&6|&Y3N> zB7xF&;mRcKl1|_3o7~V?$80yI6oW~vS-JCW$G|fUaSeeCWQ~CoK;GNQRP4gHb~k@d zbK2S}(s)27H=_sKz;9R%83dfVRo}pWBkhGuas&^`NQO<)7IqDh+@#myZ^Z8}48BD) z{-q-AKVRfp0g*9+VpeeL(&(xG>w8?C#`T_BVuX?nS#nP#(TnqU>@tsHV=N?h=Mev=>S+|2T`ArEKp~d^mQ|AyQf6*s6DN9g)RGDwe6W)Q@PU= zsxh-R`A{td{8SNu^mX@$-bRB3z*vFwFup|8>v?CsIF{(znKNebxHdDWqEvgYz81cV zxL_{ten@th;JhTfLf?*Ab;3Z=m?_VM*?S&IEelbNy17k>pT8w4=o{sVIk249$Qm;3 z*6-eQ$FRL_FbR26ey>>#h?tgamR@t~!1$y!9wmm}wNeVX3nF-WMg)+!D}3lkoNtNX z@lnaJUI4OR>K@2@`@^I*TtT_#mkMJ`kIJq?4xd2}bJ~Cod$2H}&2O$Q2p=Pz9?B4S z3V0&e{RDs%nMd}a8e@(_WRk-Uxe6~h3y|MAZ+PNBkVCc6bbuM7vJ)I1Nrbi_fc9lJ zz%hoilOO>kA=>8p7?Q%a_0zX_D3Iy=6I|^;^4u=U+-0*6VGL5`6$s1E^jepOnSWqR z_)w0gATZpnqX{m?z0S-^$H0&*5-0!>ZX)%)>Ph&je~o)y&6l!9E~J*Jq7Nobrsq1o zjN2Nszkxq3yQDNV(T5699Ss#oPcvY09juz*$u+wIZ5zl23DyKe-~Jk*feF;D~LV zU)hr4ev4<-pJK|{C62F|o8_A1yg%F!QsL+e_@JjQ5mK6#zre@w=)mciI_Jm=Jx{nG zEi)(fhLV-9a$@YBWu3=*qU@f)BaZ0UJ}80PtT<0f_2`&V6vkt8TIfObeoEt3ryUgY z$Pv7Kq@XyucG?1)b4XPeamd@#XoX|ZhF#JcJ7MD0e#8C&16biiiKA}{P?fuG2=EuK zgvx^lone|(;ON!x~A2B@d!S?oqLw8I)7b5tE?XQ*q#!9wFAW$!Mync z=r~8eEV?-r<_&T3`4Gf-(hI}BTg_S>M)!CuYlxDjiLWK!K^1)IKLqi zK}p~cR5pSWT&ix+@uYuVX-m$!u=gj#H~2@iL~nCwE4cC{f@y`lwSQ%oBj1w*s$tfj z)8B+<{>_uW_c$#cftO%(RqRkD)HM*Cy^iqtpC_wUs-}4&)gYI;L-!-#;lKtx3u5qj$$3J)yEL za%c$|x`B(@M|Ve|y><1Q+7Efa^6uNGJ$0*_fjlTg9hKQG#BdLzDX?*P^-Sw~K!cuf zX|2%SxqF51itLifDPBIg*Ocy-q{J+wv1JJiREf=Nbc< z4|r^lP8MulaaoNYiB&^Dcb^~&gYugEd4=tjil5l2fq^^#3($av?Wy+hpv4W!8OU%E zMQD@1?`x|)jIB^MkHxD>lXubbjDBo5Lh5|y#HZsh)BX<5mZ{GMZRVbciYN$@rT2Rz zW0qF`Dh-?HRqqafk>&vG*Fp+FMHVtNjhUFK$5#VEFmR&@A3Ror$jZuz?>|6d`~Gh9 zw$V#6@@JJ;l{{I+I8Fs2*5LH|Mz!2ZR@&97kVR^i@N}B+@~uV7QNcjDJN334(_&;h z1&TaD!^fW`MYu<&o+`1T$e~8vb$~G-$!Uxvyz+oyjFb5kv{mXQ1^n$&weP#`XJWTi zSRGH9shEv0Eau5GGEZX=etgpr);ciY=*~Gj^T>2EN`6B5a?yo1=!3n<B?zSvHePJ2`BEM7r%MUS^(<0BRPBu%Y^0>(>2+ z`1MKN6{14)!1w|C4RzvkTdxD~-;==gzs6PJekvVdKZ#({|Ie(Eh>`U_$lx~rMf;43 z|7YcH_}hi@oFyXix7`j95QJr}WaOK$f#y*YlOtvoqV|pdhSQ{SDRt{Poj3OOi@H=K zeegeAq_Wqk&?Q7`%%<796C9?#x3=Doud5|~P1mFhX&EYr>X8f0)(v=2TuHd%O$wMP zD;_9T&|jP^iLf@jJ9$?}8mDF3&p&5>J!!k zJ9nRPXg3oG;xTb08@10N)lSx6T(+=n+1bf!ZyU@&<&<9B_V%FD84m4dWGM{8petLa z=I?2QQjGHb@jE(Y@EwZc$*Z0;u@p_r#wN*`b zo=~0Inus`z9eI>%nZeIo&}XU^%T-Ahs}~7J$ww@g8`(P|&pR@!kx)*H3ybPwG>=Zh zc?o9{Te%legg=ZdHdDj>iOH6M^fnNM~d+?_f^Y^+W1wyAl0c3le|)M z4LaP>%Jym8*%^w`!k0bKNw5fSV|hi0;*=@M~#tD@GR^vy&V12wNcqyrixnOGxT zBD-K{Xv=D@R=WGCSTN@sM{g1P`5o7yUQT*M{q(WwX4l&Yjf5xrvbKA)qRDhQ{a;P_hcnHIY-Ur_y=w+T_u9LZd2^5tTWC zF0($hKK%_Fkxg_RR(2I41Jo>JzI+!E@${n|pP4Jv3u2?j+xjl!6(gRZpB1fnWG6W?v{Xw(H*4jBJ&vM$ zQv_vaSZ}TetU}chRF1MF+ejDXw8{nF@`yW@=0qhU3$;!@#vrYHK~w1QJzBd;B5+ST9>U#lX6d0 zKHr+_kaoUVevld7VJ}P(ci5~Si($p;@f1IkPYJ z3G=NOCO1d80xyaZ7bwd@{;hA zzercUc44JzwjKiL9IXe@uqN-x0cM}`9oppyQ+ZUJOyVk2kgt|ay?v^&Ur^f48iRHH z_!Hdg#KeY^R?l!)r+3a)Wgm|_zq2oh=*!<(=XeEcI{K_zeX3B!je)KM+E^5Ux;zqNdWPaQ>%VolFax`iF|24CjCLT=veE1A>V9m23>T&)Lc z04uCU3Ely@&-~xdjY8TT2$G-QTk5Cc@ITU*|EJ$MNkzjEaT$5jKn!QnG_n`Z&#K3n z;tvXF@5z&vTLP*ON1gZAvwUfx5EBqlwn%?BTBCOpl{CnQymeuV3vqexR@*eBu6y zz^%Ck+%#t^krEiK`FO;9B*438B{zbhZ8|WyAIm>NL|yrWVbu_e)u$^Fq#)0UcrxX# z!X4c-BZqihd4!Ra9NB}cAg{^cGAg|fM4liAJ)8KbNeBPz|BCgsmK74}_ z6x#B-SWa5w=Pnccy%eP{wf>8Qh;EekqN8^tpwv}qgPA3{v!h1!C~3)tQ{yBMQ)yZz zh+^!`B7Q7vcdq7!t~)*SAY-xq#b22kWa-wog{O%XuhkeU z(P`PNacI*01Sc{gO z6R!feX(}@c%SR$bX6?1HIQ2iGDRJT2B>P12_B2V&bNu?%murJwk+u~C@l$Tf`Yi`n zr#xjn+cF{$=?P61+?p7?R&InxX2bFDG8t21^Yw!*45zfws~{j+$P%p`qKT8Gq*qfL zIPfR&<+_KRJjj}1)G&|U{;%J_CZiMKmE}zyL-Y|pUdeUI2cS50o00xrwRw3QoRP|d zPmas*yC|4*4G1?C`Kkc}3<$K&sTF#)IJP*!QN_W4KOiq#hKOl@tbj~b(M3p-!%0${mCWHZKB5e>qFJ$C zs+CUFo<(6nu84t-qNyEBG79o8Q6y8(rk+OVZ%Q0XcT{3KTrrcFqFfPeBz{akJc6ZO ziP)xrLP5uCr?c%cpSbZy z$C+C>(Pudh%7-jEC2V26RD{W+C83a!ITY7k;#3;6^+gGn+{!^-c1439!&Jp*<}V=zO?|-=|P450IkM!#nNHW-D9#aennuq zfx}_31)Vq;TTrb!Fx;RXoT6vUY1Z7mheo!+c%j}x2=*CXp_vC#l*IgDmjm{#_uWtf zF{U!xYQ(bhZS_`!yFhn@Y-6s3rI;c> zt>00L@qK)G#QRSjPF00JBU^TL&&k*9U0schQ7B^m^>134*{f{Iz?JB>(9(AV;=1IlpX#X$w8Kwd9kK zL>|mbf@ZICwH0)U2hg0!a$a~1rE*(xSyE7!FK_IK&#J~G>7V3LZxC9ao#^Y;oT`=N zzZ(v2TUGNo=Rn8MYOt7iKP|6ri7d9UHT=VO@r5bhzXR47&tHx&=6u3g~$`?wb4a<7pRGcs62xxD$5_~Be3)igZnYoV* zIV5EYj~(Y3aT0eo$wIzu8Rcn_$n6_Y_=$xr;H&pHzBCsDVF=do4dWTF3VqO4wUPew>lVyFcc^2(D%@1RnB|z8n9IC;!+y~Ts z(i;;)?0x50NV7jcmG7h+yd!D(3c#&ACHe2y+JQ0BXqXj?sI=t*DLXtM@A7K7Id=RWVSl5BdkHBcN+}~wixQU*lodDF-C^~mP2uz3kuypip(&|l-?@P! zb4JNI5pdDGBtf{;e7u_MyV{Vt*U?U7H|T;fxt ztqCtwlzJdvQ!*dUOOl*lk;5Y(Vec_$gXG4q_lX;zm)oK5pYKUN_{G5M-x1JYe^sxa z%SGQlAuI%hUUKNNcgAxuw_qrYJOWRV!Iy*rIawz1tlHFzqY!2s?CY}-;5*X-9?I`G zDa(m+$0^eKgST|u{EM`*=Elef{A}qef)?2=EA-Uxxit;cIwHxm<5Ht*zGv)_6>R=F zXy*<9;}K63)mH}kI0g>{SG}n0^Pg`>l!PpAem@8*0*I%uEk;v}&2Wu{Vl4IN)v$y_ zd)Re}^D3x@EE7%Xz2$dOOl4C{;ix(Dj22O_c@XI<*%u&Wi4Pdmg)Y@Dt{(7ba*r3t z3TN(XG^9^8iB*{A1Ue&TEzi0lYRwekf&4}#PfJ`?jiG(I1)h`(7 zv*OnsBr6~j1i8&k@38I7v(Yp%!7uI-fGoA)HWrHa0^DH>PV!J7N}H*qqW!IZC@pBQ55=fVG< zVhMR8OTB+ICjCp1$SG0^27nJPNQeFbZNPGHs83yF50qaYA3=IX*vNbCH2DhOjv4Z| zE!k{nk$c&*{OI%9ZO10S#4Iu#AU1zpvq{SB-v!B&Yt{0$24nTI2m+{NlKG_+l5Jb9 zm4ZL~Xvkx&Nd)AFYs2!MP zff+Yn@BST+e_zr~VG_OapPa+ePj}`2ebWD0+kdnS|7(5!p*r`|S@~@h&tfH(r(8im z78+{y3Kp7!gl!C$wbaLl2D6nMYZid77oXT0fJ*y?xYc3+MvgS&%!_`(v^h;+fk z*#7=>`QG`w{;d79e2vQsurbsRrFddSBsY;8n1*Eh+ZM!u;z)^#j!4CF83vag>0ynB zcQ3eqv#l2Stx1DcC7cs#EWO!;9%|8YdCd~%Lj8+n+etK`+Kt4;xy2^Jt3t6a7@N*? znmQz;>6oW}lY32BbNOx=3kMh_E7=_S>b)B3dghL>yywM&cM&{Oor)1^rMFn=SZ}<$ z0#!$_E!=`*vWJI2YYzFCDDgo*CwtUptueYE8YdlEzfvVa3G`?#ml@f;5{eEgVO+sY zPZSF*H(V&VH~n~Nxg$V88lF3+cg7=@%}ldOdC-Z$_;-hjecQ~yQXXIKh4RQ`3g!nL zjezmE#ar>?07}M%&PPWJc#?gV{V|AR0yL$is|SX>>RhSXRcTB4F3wfLJTf$F%|!0A z1_knfNgMBZwzZ75{2)aCoZ93np-hC)Z0p>-6~uy%2-H^upBJB19CUhn-t?sHXDZ3x zHtnc4L94~_aAxG-xNlHBw98%xC%-RAi6M_&seBr5K(L=~W8LgFl zn2Na}6kGZsn&D@uXp=6!uazKg>^#Uq#B>yO(G)s_!H&q;m1)fhzA4I0q%B9;*ma~K zIvfFNq7j6VX-TBSYf+*bn7?;8;)!6b{qyT13SndkP8rfTh%G2t$Mj+Yg`#cJ@&4rT zh_UwN5B|Htf|eMp1)8C4u(PR=hCl%^^YG9WM<;nKw#;e8*15DPZ~rF6qw7D_UG@X} z+5a8v|5N%YDp5k=r&oUXyICxu0k{;o91Ma$Uma(6HzB_tYERNa4<8EBC535TSfH=a zueW-qFA9^j&9-0IWFaah2)76J(A663pU4-JzUJ=!cH2C9&dKTiwb2*Aq=-gNHex~c zpbM8^CaI@B7#XexYBYMAd4n%7RmyezzTw!a!pPd!b89{*?;XLXeOt~V;4^r8@z|MV z#kq2@PE%<}*)_OHjl;I*(m@vDRMMNv+o*-Z(e*rV`}d*#z|Fa4?@qYCc$2?qDqyGR z^m>i&V3987b6BJI_LV>&W?r?~f{SY|CGw-*wDLghO1poxgn1D0-4H3#Gi9u>Jm^a< z4GL%=iy|ApwO^pTNm5%M(M|q3%3iz04au^98_5YgXASInzN;RFFX2O)sU_YI|)MEDWCH?x{mZ*rFNjkc4_xwrmr=htu<_n z1mx^Xuv7#wKbjaiT`g?ri6uM8g~ z9aHT-7C$Vh?||+d^eN~9*!`p|_V*102lwO1(o6(YC;AjV=DorLiU;_s0p@F4kR;RS zeTi6EmZt*=yDua?J7U6}3K36pt&N6i>*$d!y>W?w@|gJLNjr+7d438VQUOc+0k|Ql zh{GN1h*(Dm1ulOyM67xukDxe+xNnC6u^#lkctg&hQlzYPQuUcJw*vq#dEEjX^p z3Y1Cr8Blk1?N$sAO4P%e78jOjTcF!@r$ib~(AGBg(+JzMc~}e7IeZ?*Yg{#&$7P3> z^Cqiyk~XVHT{d%8dFsd6&wo#@(zQzuQDYv5)q87O5d>lmtJ9cquvSwZKJITSF3_C& zo1ac3?FP8PLAbYzjOsGLHVT-~-@@KW-GX0WU?bbY)q=PFo-ZVCW@$+qYsKW1y4*Ob zww-9S6nwaao+^SmQ*E`-I7|DjVT^9Y_C9YSp!qP zHwc9*KKgxg=%HP}oye!{($Noe`Fatt2ji?E&;8osFVKQK z1fCn2$=JoJgXa~qu|rLOT^o6(HmnfKzV`p{Bw3AtglET&yCWUoX{ZgMuGlbiXhEx0 zfTuJna(?iDq+p(vK#P<`w3`F2LnLH>r95EM7D(FulMKVZH!wFnBku=@cR3Ot8*EG5 zF}+6-JlY&9EE;bYb-<>dMhw46RwY7nTyRC!UnVfH-3anQ>6icsO)kX>F*HIHMQk1T zw%>xj#Q)G4vt&w!I{QwBUK~&a^{IpUgZVk8nFO(qf5SX(>||}_59a6pcbI4T7v?AB z|7)X1`qxI!1L&`Vgl7E9L>&@RPqn4JFpUKRfdm?gCjxc{^kiuViP~py_pwc={nb9|{fn}Hz5ZjS+&c3uh9#iD0VJ5F zy-QEY%{HI*wDovbeU>@vv(4*|ML6Zt=*W31Op=%iWJiJl!vp;D^e*~dI~Sb=g)y4@ zShj}Nt@AV6s5?(UUUQh^)(=%HEHl9vajOcDnhD6kG|?1pr4!xu{*Yrk;w;OyvR_y0 zK&)|Df_?W$zDCMso3)a(>E@ItDH|cAP`OI(vV#NLeiOo6n-~PTp1h6f8C_o@f}E>I z069k9HS!1i^FQF%9Ok&Uru69gMS+55Oxu=aWzMDmr=mh(Qz=(CbsgwF_aER#F1t0+ z`vJdUlnRAK$OaSvE`{s2&m%hM+QEW2ehmTExd-ExK*<4_8I6mA zf8>7$eCGcI{y(H-`H;Sh+b^4n{#42e$x;wlj~8upDnb|Q+d3FO-dtTvRduNt=# zzo>iwb(!YN2ueL{=IjN~cD>}^PB@bn>GqS_Cwgp`wkg&3!g-jhZZKQjt{}BHP=o2Qxka#IfMywsD_yZw z*E?_8a#agI&G^(IvbDw=oG@Z$2nV%OfB9y6@2*1RTQXxF;jHrWXD7tZ&YKvLY=zQ2 zZ9-oqmnSxAOQNEu<_a-ftX0C$LBSt}bE#+mh;LObvI{j}2UlznhV4(a z>dPe~QoHncFqc3|t*SSGEp#8=-IYUYKM%ug)@4Yqx*Cp2edBveEAtm?vci2)12*9v9X^pc}Z?1Y4JCRL@rTA?T${9Oy(-woBmS zJvU|E&`;&aaFJQtmzuTB&^v7y-zcbtWOthnk>L|-U&4TOVkn*Ofq)|XF49N4gg3Ur z!SIT`n>7`J1JN>`L&+of!;qPVWAIxSS4;)2z$KK+7x$#ZWn#V`t%p7S0c5WuCI_E< zI-Jb5F2i_Bk=h~_e+lib07BFR%QT+@*5$V6CXI3gK5`YiFk zIdBkr&`giiiq&C$FeH-(V>7WM?OD=*D)hUxq_$BA3S3t4OvuY(u>4`wv$T*3+WM@y z35uPs!)8*hZ(N|b!X4|rjclK3$MiVix?)%RyZ)8PI8 z&{6(ZR7>=)kN=kuo20Cvf}w)+6};YKsK~#lj5u$~3`l}sYKECNBp5XaXXRgueQ7lS zKLo>w0$SdA*jfFF>SdlQQFkV;l6|l6M%dOiJs~f4cin?g)=Zj%ig9`ZMpZ%{)`VxwxBl z0wr=$UvGleKHMbAq5UhJ-UYc4q`FupT2zyI30SwFtBtc&+z>H~I|{DDj>z)0Q4Y-! z+W-saF~r5VdJ;Q5JaH!mRebIQ{r8EN6{>V&28nCt*A(S`(0P&jA3W& zNScunZJ~bFHQXqMo0>+-I0d2eidUPaeCidnaX3-s*Nz1H?$$as+V0&}E zlu7fn)DF4gn-oJvoddqHtBO3-qo}!TNf->pHo^A3Owf|#6a&lw9a+i0R*n20%bE#0 zdtM)g8JAYn^fso8@!9JNcUQVPL5fJF?cfTTW1G8MVqc(|)N{9L3~jqt019m!c6^l; zWFV^zY2=3wfNkI4TlwcqjTG7<^bnKF?$c@lu41*Bo z>@9=AM89eJaY)#q8kzUtB#)U(;*8asG#AQ?9836W93j{X)Tbz66_c~75Fe$6$o9NE z@$~&S#g>H>Yp3@)&yuEAgoy0^{I;sFPx5wQ5B$Ejcd}3!%^-le<&MD>{0Pl){h*o) za@82nnwVvtUnMCJgEwedV)vFGcyYT?Ajm)Nm5B(=`Ym%igx>Y;2)>Sx$^DkH<_MvM z;Wjx_*3>z>w6CY)d&YayJ%nZ5%u@e|A==nYl6qdjD%G!?z1iqS)^Dyu)V9sG zVaLv&VbgXswtnu+Oa(O$X&t!L?AHpgMEz^jPDB_R-N7((^ak(Ty$%k(4|)T5vziGL zf0TzGI-Af{r}vq2l5InOBe_cSg@^?EgNbYDFWl|04&Dufckpv&pNv|&D%sLka&|O! zR3tt)trq|fk6kOkmm8)74lcv8-EiN;3tJ(bMXSi~F*`DWu$$P62;fgz`1lS)_EA*c ze2E; zeB}1zGd+hK!O?a?;T_|wwvExdI}$I%HqfZ#_skE6L_W|!?BufjveJ7NXj6;3Zd;b+ zeD=62La5$($J?b=Itl<^#S5sTLYy+^FAMQGdWi>K_8T#@H(UInUmLR~G?K|p7cPba zahuVSh+hSNz$h(oV7?&#`$2whQxX9M{_B_Yk5Lxx|7{EVkCVGm)lC(18UC9ZLETJ^ zE;-78Ump^+25!Y%9hDSR!C{47H5CoM9j)YwfVxK5^l)*ES$bbv?E1aRqfiR=SyUs( zS4wKGU26Va{QA9X{rm0Fg7f-P9Fkv_-R#q=!gxD_^*&>Na^fw{M=R@h4YW_njBf)h zGuTs`!8ltwy`dKm?H@-sND#>b1nm zevx*^L_`YX9kO8(^MGd(zR`}rml=6Qmm6@ zqw1M2CT6bJ`eQQtgwj;qe2dpwB@vV0R8F8SznPPUB^Sk493dHcLQiZbOV9LVP1`nU zM@^k;>^4|Akrg2=T~t@%!b{&Xv5e(#TBtuX1TIz69EU|fWO^F-)6f^**gG^re;9}$ zO*UF<`#nO>Y>Yq@gM9VzS!!lFO277;&QYXEQfWV@PBK?R56*TM=+&7&9{okjRi6(k z#?2F3Z>(`+m$#w)q6)t@j&+&HC4bepl zx8j_C@E`@A=C_g#d};Wksdms@2OK2E=VL5lelbGEiGsZIZp-zz>BeKvL(?FF^Q{xe zsz1^qqp;*=0e#}3+E=AycO*@rq2+KTgLOX)I_E6SJ{L`R3(-DbXJ>n zv6O(=W7$y)j!gC4&VEu$(^!ZbI_$0CVX>Q(bP zovMn}c8N5|+!{GETty5k=-j?L2|R4+Frzqu?pYH%K(a8*^3pF_G)}qv9Jx&IDZmFvbeUnAvxxEI zIclbCr1dWlCZv?a3Va2RqJax#Nu$^VOi1m>HY%W`)C?8FTy>6O5+#qNH{?!d^9ikw zEmqKaC!XO1nB%8;C-(S-3g_Dp|SJ=p;!fz4{tXA^{lW=wyHbtgXLjLM`V$@kZeUy&}TvysWp`q&biBI5i$WkOhM{q9kX$SR6 ze&~T4*2W>RLe{nH5HK`2#7XjM1eWAyfm&GV_b-raJUQRRgD8hKDr_1P%nP)VUe)dl zoMKAI9Fs-Pl|+q(&j2ikk2UfdwJoLf*?iBLsC(I#b$0mNVhY#>AhHLHdf#MHg+v5( zfdx)-awGhYk!PTjDS+0LJF~m`nM-3A)IFlYEo4KM%_X|dEv3UW4#{;EN%dpAF(XtI zYmRAqt?FV{^Am-wGBEYDY;DAoz2~Ct@ZAMr1*Ppod(RACzP$B!F4QWo5I6`mqkdJy zGAD@UZ26MW1IXqsW2Fd$-WU&KBw4KRK1HH}j$Q-_2sbDEF)M%-(yc0DLx|>WQ+t+L z7~zL7ck(YSEYwMqIaZo&>qSHea+8Xq!#;$#s75iq(3JOi$z2(kCmP*I<*S8!3+61A zGo((Gv&5!^;YTp}UKIiWN#|hi``07RXD-k%p3Q#dUx^s&`kPMuFnCp_^vW!_Zb0dL+Y!2y(WVk zcedWoZoH!GP3e!BhpN_K>#w7@v?uo6WGd~t(S8xSj z4Pil%ZS~>fNpuj}S&+_#jn22P0&B|eFEm9Hu@oW^GYtQ5SfN8o&*>fNl2y2;0-TIy z8CQ`LMM@LD3pXgmb=ghREXNhOrJ5s=qahPi3szrQ*%Gy;O>RfALh8C!r`JiGE3E5a8DUry?uLqmTy~S6FXdlB)_xY8P@pnT7j-t2U zbWLupqo|EZ@epP@{}tlL-}u@I(&YOSjd5x+YW>e&K0H>08*Jd(j6XPd$B}T{(pP$* z&4?{Pia;^&BIF<~;E19(i3|49Ge5(UlrG#TUnwnho(kQnW6O+v*tE%jq%}d4^D7Kt z7gzC>N`#o)l0*I}X=704*Wy>YnI$*StQU&)vXEt4IGruL3{OtGf%1a2V3F*!P)fdF zaSS+Wv;#_jI20u#4u#M|^;|@W{NnbJ-Z#8PK(aNibbJ=-1`kGrPZ+xTH{q8*Zyv^U zZGZ>tC`Z)sT#k?zTFLjn)qJX=^i0owE>gtEzkU(?PZuc_d$WI8>ioxDs#C?wQNaZL zdy{0cQ_MO8P82_YuZMn4_5=;LExA{md}a+YD}zaDt%uEolQZ#|=X&}6EBpAh zbNc>hEQ3}XxCYqAp0D?l=3$R8|FzT<2W~PzngzBsZ(k1`H{sR}HM2#hH}=JwU(#D% zGzVd>1ld#A6Ibp=4J5}w?)q5hTW;2qk12-yCEuTxoGo`}Tz^{yiHl}G7cK|xJjYRw z1g7-{sq61fjXxnobogyG`zjapeyYETCfM#dM0CWh338_F4K!(2m<@RBcCy`En2_sK z(DhC`L$=EEfWB7#OkB>r0vU;C!tyD2 zG?PRreSAA&dRa3m2pD)}USG5-)ht@r(>ahjQKd)*Cr8u0j>xB=S7n)B1K$jMf;s3z zp;%>n@AoAWzJ*z=tcqHB;NOOtrU3p#e=D8H-14S+n{kL)GYla2r3KJAgoOrS)-ZwP z^8m+~qCFTuFjM*hc0@m2VzO_i4Y2pS+ zyFaIl@$^;2WRGHCiDXf!{F8q(<3bO;nA;kdlO*O;n3>qynk6JOcv5DG_d(E^)(Zxo z;DjVd3ZR7}KcPgtN&(lN-oi;%B=s&$b=~Lan-DtGsZeGo^HaiD5!z^0Aj0nn+2=)> zoH5ku=i;B-{Zk4Gs&1pV`+TqF5!kaF7QG($B!QbEJp)eV0H<)9-Q2)|?xs+oRB^Ot zJMz_r(1SVj2NKfL#%&$$E>)ci5~@rZVzw?Xml@s5eA>6|zO4~W7Oz+sR%`!=i*{x# zsl#=sb5xQV6b)@XPhoQM>D>>&`xUPdB#9a#TL%!z4BsI!7?Y{AGz#h~-6C+-bKD0S z7c(3}Kpx-f6E^~QNl`H7rcEI34e3n#A;5H$$F-{YkdGIH@tfHS$+t=>>KKGl9#e~5 zLJmqJU-xTKbcN_4n+P%bJ5XpNO}bsk-+F28RNaaaNp?ohPV`=45?E!l2V7#dv4SdT z4oFaV|56a;RlFs%bx@)!+#6mQ^jz!N+NcNz237nJ%xn>3!W=9t=V>V_sxP5NfU;8( zh#gm9(0}aB7JUzH?7e!bY7%vkSSC47%&|}2O}$?l6d5GeRIWu{54`0~xOG5Qxq&Qh z%$6$GXj^xkDg+ma_3stcV2yc$b1qd4eilz8&?1yiRsTa52D9D{nYcz?h< z^gp{G3*G4qUd)mp)%2;<*1Or!9Zs){djw7Y5NDDyR6lJtUW5ly^@E9VQwnQ72|e88tT1 zEXcJgOOaFAP*=AV^vB)paWXf2a=3p4L1zprnV?=f*BVT#HzrQw-!_bt(1AmeY|S3M zVJg}gRr91uS;(2E!<%=`j+ry+jG2q$;_N}oJq?XOaAUJ_%}O4IoTSbS1Zl$m)fE8Y z4KvXlb&7&Q82qJ>O@f0ltC*e*8$w~23?XpIpVHl@PtQXsliZTMP$`*irCFJ_wx*xL z-W=kV>rvtqfF9Rk4PkZpn51uk3f_$6+KU;zsf>|qyDYV)GwPCETe}3jJ!fbkIgNqN zt=!S33+k z_yM>Ph5W%$i|~BFs*-jgm4BttUmq;DcmOqx6I?lpY^xbemF3HbWee56bpZ4GZ;0Gx zVeoX1U9>{WucEReu1t;TzC}gM(#O^id}E83TN*53Y9m;qoD`{QtF+;3@C8tFHV*Qg z2agM$9Gg3^;tPtjo8Ow=F_nFST7RfBz*=bagzcQod$f2o&NGsE=g*(txQFTHuekwP zLxA*Cw>1V_Mkupdo;W<_SIZMZ==)vD(RgFhG=LiWQ;|z=J%r)7B37s8%~;0E#3W5P zQ5md7sE_neeEv)dph?XBS*H2JiJ}igI2XWK6UCeDtV_={WW%e1rTZYw;9Y{&PU-kH z4mD}9@yOAC%L;mcd-07~8M$Cy_G3eUXi>StJ(i$7h}EEr+(DpdI%wV#zg8n0m^`O0 z&LW{Mg0z^tz&ENVcFs=8Xz6q!h$^5>=@wcKNuz#4a;|~8(Xg*W<)TG2F8_giU7{%9 z=rd$282ZQH_L2k;)(R->;03pMY^AT8<*QqG+}!YC?UI3~hu6dV@4>WKs>*IWNjG1k zwA?ksgA@XlpTXI~&0xWe6lrZ+UlZv11N?-UDlX2wS5{5<3%1}D$!IGJxm+N)oZ;z_}ul4YQbj~jv)e4;6OAvk; zYEnnr`N^GX zD`L{7h60IPp>Yx%+}#=3k&LdNnapf$A@YD8^uE8*$M6epfO#U1V|OX1h^8G3XLvq9 zp4IIz^lwhDPv3?1Vgp5hHZ zVh;m|U%62zROA0psLe@V)V**%guJ$GfXk-0gR8i6U|`u5C!rywq!cS=J(;}8NV?mhchF#^spq;QA!W%4BWk+O z4zLp1`$+ufh#DDE%KF(zYDtvGrRb8I5i*}UjSw<^hb&M z3GvSGj&-zsckgUa^gg)7Yz&67gfoawXn4@g;VwD?6&sb7gGSp?vtx%1-^gtqD><=% z$iZkrzfHfjF`DD9pqmVra;KcpQqSy*{_3T)8Aq89KZc&}N~<2XpJ(erCQ|xxTj9X5rFo-QM1~S>Vx- zl>hMTvz?MA$&lB3<==7L`R#G-qhsT_Utpq*mml;ID$IK$OW0X3yX`(AeBDx(@FgAW zV)T+H>~oEu0{-=zb(8=5qzHG`1LHFxqiHtA=oXRnlkqd+_qdPKhVT(CY|@;};nX|B zhh%V$2i-?Ti23O@rR|9ZR7uAJzSlvKmm>yH!Vq!v z{1Pthq+Z65^Nzvqnfm#`^dTDB_Eo}?&YqsO4DnXQCQ>uaqdj|AnIi~T>K$_a9d6sH zB>SP?Xr23e6kr1i%-Ki9Mivz&j2dj+rLlRCwbUs5z8G(+NO7YOx{5 z5U$x&0SpW7t&xLu zjM%5FpnZ}`*q^(K<|+zI7&Z{(s-`ocJMkuD$sK-I{+(j&HXZTN~P5RPQ3n|@z5+Du0B=F|e z5x@x-Fi25 z7cy|p85DN{g*6xuW9^{J3>#bhU#0z^8CRoC?1j*!!Lf!e$w!DAffg&OFzfU)ajS9k zICPF|zi}@!oP{92ktVk`Ft#^$X&tpj_s~PwNwf~qTHD`)g>?M+fVUj%qq3!0~|&h zI1?mp3?F0SaR}#B^}+YWGupq73Ii4Tkp^oWfg@S@2Q!L+byW&*++bk09Ca8H`@(98 zM9%5{@Rc>Z^7x5p5_>iW^h=vOEcL+Wi^C zlDZPUc`gq5I_)%v|K{b0Lo|wuVg@(OPMUe9D8w; z$_#M0C*XlW-5z_9W=xhCLG+P!8E}An4WG8i1tHXugw&Gg6nh*#|D&>@&RvOFFY$2C zFUQQSl~TWfCeEpx@cEGV5UgSqCERxA*~X1BVQbnLv@b>EP^Bht8k*!5%ihy_#;M+G z0xaBtNx81k9!QovT}n8bnZDdZ5faL!N6#d~ngElJC0LQDP)6X1HTGoN&X@vum*`xs z#5dv`IR#J943_j1)~OWAOv9-oN0FnZBz?rf$s-KMG?NQH#HlB$khA8PHG0FTM;isJ z3ufD{RQ3zd6o~$^gf|DbfN$A@$Id@gjU*|kFQXTnOEn|$($#%_B z+q+~8+hun|-J(a+kBI(|>g1(&3-sDOYfjr!3uc&$XK3z9M;PuZ$Do1?X9|S~Wyfp} z^aU9TrrH_r?HYyj{rG501Nk<6{pTq^JCRWw0)*L;DE$8qpr zAn??FmCYZHd;)Ek zUlO}T4`u15e`2#q5ddy^Yo5&BU32)Rbjo3ljM10JV@{1`mIG(=j<}3}zRB7L?Xxhh z2U7YR-4pm^yGHT|dAmT}5*DYQF5#(d&Rs%yhC^uh!aEe_-jv?3l8Ln=`Meu%kYzU- zVf2zD91$hCdB-$;p|()GvjH1JMZL~oiTvB-ZR_ey4USr9rI0#iBi_w}x#5nxvW5*sGLUm}Y?s7VOkfNllOHE@g{IXS>1)c!y8bC3uBbrSNS`UQp*-lkVeID)!52tyL(hQXowAds(Q*aX(1!bU2`dCzDSr`G?$G@@J9VGJwu}b{c8m8joxr1}%rj{T7uMF4HO-;pB6`+DmW3@s3i3(4H?t-F zg-j{vApGSa1S0LKdECJQ9ZiiVHPPO?!FB_mb~A+*yP?GifKt#%F4bcy zZD})v2{ic_LIl>qa=pT146Q^c%_FIU&26XH>?XF!A z6guCcU2hS7JWK+u9IY}@#Kb7!W6X=cH`C5L7q`Rn$DsJ+$5QcfxtYDjcl_^Ph=2lY z4ao>IEo_O`jgMN(=kuCKvHAAm=!1Vli)?1aMNuU0HIC0VCX6f#N8hfx*lh5<(!r>2U~pzWSRKn&lx%*0+UOVOR~jylI)*fG}w+<2|Fer>#gWK zQdWGU-eAQe`&4i|E}QMt5R(|%3~Bi!_xOq>I_44JT=MONzh9hj3A0E9p)ul=K9O5G z{_0GmtmlS*1K}nMVy%Mg^5ieFuzYRfU(nY2%mMBLsm8*8=#^r+ zx-tR$w^R^EUdgB5HM>**$>V{HLUb)r)CM`wNQojGl=etTvS zhfpp~2IXF%^=Tovt{|uUI5xWcwQi#2M9_ z9XTgBB_ENXClAYQYb5yiFxD-g%cF9*oct#n;`7XO*j!)X<2I^~tOAch_~=cUl4bjQ&oBS$sUYpP(n*tSQ7G`G{5_2q{UjZI`f=|ENH|l#i4eGiYh%&k2m$rX(ruCLH zyY)A&B1U^%NacIu<*WFov-L%1o$mfwN6J!-?|DXEv1q)W_+>#Y}WqH+At=Dks{eC6_~EgiWXaz60QR8u33YWa+R^n9pily}00By}I6 z>h39gkJOxg@u0xq`6nFxMg3Xo_zZ!mUkW$$GTx%S>YL>U`qOcr*|=um#n~jRosE1c zbgtjsdt|{bA+~QR_idfkINIKO86)k(fLTQL$1ftoSJ)2W)|pF|3}E(+-H8@nv5WI5 zRMrNj>3sp-^GfR~yg69hNoTEe z%oJ^GH8H@L3RLC@Fjokg^9f9il%lqJediG~?;s?~-gki6*??p4%DW9?yFOCfoIAOX&Ul8WD-H7i*%`4q zukz;biD8=;F!{hT(80SLDtbnleY}UEvh?e2GjUdahaw0roXOA z_W|;quzW?Yw2a8yv0X)+CWhf$mge55Fiiy2>8NL=_(C90Xa8Q8Tsc$N5IYXCi}ISI z;*yAkEBD@{pggD~k&-&?1hCfvIdP!Bm+>5Q{LG41(s`e)&LWA`D;Ac<6PhbB;+v~7;*-<)1jT9s7eiZ&lXhb23gXvmqF2cU zUdU?#hcH}+X7;ai<6;~X;LieW?5(2r# z$_*Kg_%2?9P1tss;9a;ha$TX<%$D_wlp_j{^?YA1)m zZLAMzSBck-g_{`Q?!(4yhWG=Mk%1hM%)%M_7oB%B5jjsZg@xt=ogOkrzPC{}iLi5& zGS5__V_MdQKf9nl=ifWwT`B))=8P{5hwf)s^pbh!mr*i#Wx?} zzj{JAeDanRinvIV6hy`pN2V!ql7LSPexQ}CSj|xPC?XtJ)??*9&S9^&;n#Q6=kvKr z>bY(8!wra)Pp0?-S%A8>#Fc(3&HLn)&d-U>*UT=|&=zxz6n--D13j0eZiunA z_byz1iX?!h9W3~n>iB}4BJ)M_O66n)@FVFVw6V)xt+9ZQ7FEvS6m-stR%K=FnX@>R z%)EN`;Uu_3v+{q!u-B&Evt7*B^80z-KWD*sY{-!PCfSAW6O{c85c5}Jc-`(tdyU7v z8=4P@HXy1e@H`_2iW;>vLA&^OQ?p_jVoiDZTC+-Dbpj zdBJeA3ZtM0v~Q==T$jWB_YP?OIMubb1f8}ZZEKN&E50j>=A1a@b;0MlR9rzKEY%Hf zH$tBc#Z5%^Vy-+LDQIrpQr0tC>LO{<4RQ&~C8tPIep?cATeq+;a((xaXt9OFa*B&@ zS_V}us~)cVb@Xeq>`1?+$;Kr1#-w(ZdWM&c@~JLX%Bq!+sojE}hFc%hsm}W+kLT8* zQOF?waost$=o&QHbJ_V|{~+V@h4ld@}(pG zr`-@&zl__XBr}z5X3*#{Zdq2@5+iP7mj@r;I9z<*_fTZf-(cvz{J({NziIwSCv#M! zFe~}tg>U_^Vu}9q6!hO}TZ;c#=K9Yr&*XmzOr^!RWaVV}KN3eW;;Mj#zXK3DA=|72 zX+U-FgQV6Pj4USB4!zF`^Z7^k0w5WA*N~2)4}skaVa(l>iVy}H|M@$2_qy$5H|P2J zwDtxBurxlh5GKVy?7$(az&Az(SnG(spLohRZW2C4|3e5W(jINsMzYuGXCD&Q^f2}G zY{|bVsmi?KmAm6a8InwUMIK^wo^!B5#;QHdi0!f7Lz zjXh-`ek!#=zUtBnt+LcYjcGo?(P>&`UOV2#mL|_FfXoX=jvuMdw zFcJrWN^5m3cZ2RBQ-;5fH>gk8f8%x%oI?_0Z>-aB={;W`KNtO07BRILic-{7ESYg^ zeHS-G=Y0_#aG)x!(fH1z0Sig)&Mp#s;B%3>S`He5y&ZtU!!t%_qq zVX2zr2+1vAkQ}(2G*m6&youQt*OhD1uT9pYL5|y&{&GqxcuE(Od|C34Sokogp#2`b z2Uln=3i{#}fFOwUhuiCi^&RfZKVkYC(o2X<>m%&GF$Qj6^n zLjDPTCUik~CiDR@+u%Rv82kAJ;V^7|m18b~21+l?8%@CpQj=bg=9Zv62#M^7BA1_} zRv(7h5r*b2@eNt`5c);4d%)uXj&}Y)B)IiLksPG$JM&GjFlkJ~bUQAlY!U+ukFo5~*yzl=@mdiWZ8JRda**X44 zA-6;W%3XN`<=YIeqbn;5UQk_~z?4XUO+Z4J8WlMNo}eChTGTu+oGmNM*r3U+$1AN& zzvjI*PIEqRB~sOB#j*gNAGx7rRkNl>(q~oEVx{s$p$4TP!t8W+$2vE-?xUB>%k*@b z3`To2+uqU43GVc;sH_gbfh1hx^sGvEM<&}^y8}(_lRif8 z-9SnEZVZ-v5g6M6818m7o~aQMtC#*hDz8ITFbyuUJ{4X^3>)7mblX#k8+%tZ-|lAw zl$+%G>3~i<7Z%?L58@^d`KS+~@96-$_3_p1oG2jN{iy)k%%5yFxQ9~%+C#MNjoSg( zuSBcOiHB>)f*~0IS0`jI?e(cLJO`xTKHVP4OuVBp8*?e#x1+zf2Q++72rk#WUnO{7 zv37kT-TbHyEFk)f$jwzUQ7JnS7zj13Q!!OO=zy&B<6THr3nLj=2IA4QYpazflSGL% zaifW&Eue4Zi%Ck+C3H;eR~>CNRdAvzJWZ4(h~FzAdQ@{TtW}(SVmRddRb#|bSQ6By2%UaAbw~;TGTe;-;yMn zh;kd)jL1RyXj+0UEY%}}3FVb(bk_reGbd1VvX#FvLl;o{QWaCk`Qqm6iK17<3yT8_ z>g%RAnI|yjqK64bokM^3=VyzBu)?Ph-6zEjQJqpbOo`lzwrDJ^;IADKYq1NaEn1g%qBmMNqpPOOJrJAu8XJ(Q_C8ZQg@3d zef%Rtq|#uoECnC_QMpjbBJo}n&36ETL9jfb?jOQ9D^wHx)SxU%V#*MGzU#r{tfMj1 zmKRN#O=W~@XK7y>jAqgu9K`q?oo3P{!RM&1!0VsHF|o;Sq-z0gIx(QtwHi2O25D<`IkR&x2e z?*;iddtjZ(7hDcwXE~xFN2W>hqbW!sqtP@i6PS}EwUjBM)5P=)H{+BkqeQMqV7s(=CKG_E3aS z6%4eI7l+n+yJZWetfHDB3G`7|GeL!|%b9V@fLZ%{VNtU9W|~wvDRv&h06NWTl7o)-k&Kme*FccACC6Zep-BRiVqG^GmgbTU?aGg$F~o5RTIG#Pd3VCUC?% z6i;vcg}$I?zPe>%rEhbNZ=z^B;|a~KV@`M>0vgzTvDBp4>1)g!jtObRr_Yyn;5Y81 zL$0nReuKGd@|T5us=&4Dj_48F%BAvkq?k~{nY30TZ8 zZA&IAoJmSc18Ye|b=aA>X=N-Q(HwVh&XcT!%Dl)vh`r*FfqL|WB1lJ;i@7DvpoPNV zROz*l`u%WN=o|~Jeu5zji@jtsj3S|iW0%jo-iC?bSfL`ZTwVp!&_|ven|_WAEcqFD z?Ye7x+x?|sNmwm*NK76QXs|oqYOMF}6Jm1C6?4VqV_uk*S$I-lU$t2pd}x^-oD8Ew z;z6(=455y~9vY?h*Hg*^We%kdSeJk zwL6SZ>2jR^Z#U8OCj6wf0NZB&_NDLoCkEo{rKJ>O<#`YF)E$TQAvyia%4<5K+X;g? zHjBU4e%aW!GytOYm#maBZ(o0)0qX+mbjf#j^tborn(^z?REaN9F`_wBzl`mVdj=f9Lt?goy&iN;7 zi!OVGPBemfzd3aQ*&mSf@RD*gC)U~!Q(6Gvk%i)US`uh9!2(``#vL#?$6u`t)@0dQ z`|G_x8@<=C<4AXBQX7uD^JfKikUrghTt0FOZx(ZLG{<-o243+>$1ZpLwqT($DL#%0 zV2`Dz^U+gjO=v?CSi)Q09L%|UvyDt?(dd(i-HmV{FS=!G7xu_EqvO5Nx?~KVTMzVi z@Enm$*CXbXl3WL}2)If1@mE_73&96e{SH~J`ju-IFKjj=6khbKee#~rV{2PIvpnRG zD_q_i1)gRJ`Ayxn-(6_FUGRNyBiPwnX+2nE>FJvcxmQc(P&Lrt&Kz417}84t_8J6khP~&k2qUl*P5mripC3Do;fY}87`_x zDiL@G7lW}EGcdQZakuhH$&(a4#F8UTb9sbY;7!nwd0gFZ5m5*v5s`ZW_fdZy@iDBL zxlnOe*Ahv9CSvqcXG!E%M#eScn!j5jzUXkNtG&fucImUkXG}MH+w`GrOJe>K8C2#8 z5j81dm9m3mlOweUWm{9Yr6MphF6*XjGM3&xKr7|&p6OXaiY{lgN>Tv8ukSdgDz*!KL3!L}NwODgSkpO10i2vpNaC`GJb4km+~~Q5 zO|~29o(8_vym2W*YPSNB+mED9=wB_*7%>Ip>QUqj*y<|Hl0A8qr=Y(P2vi!(1?=T< zJ6tZ5X)72Yb2>?C)i)@8oF&4T&?B<;!(*ozjv$^XOy*7NqKb50J{DZ^( z2lrU>xa~IN*gR}#H-O>>eSj_XUgB0;W*L~ z*wgR^4trUTlaSj?Q821mR4|u0)pms9b_nFQOxY|?dwAuRD0g&LeNH)!dq_K+5!}=O z5w*Wh^{>wjMQnW<_rM*|KDT{|Rv5X48~4ncVWw;RTob!}4p;EGEp7J!R&2V>(S4FF zVeDp^(r@H$>#E_ZR;Y)}8_sSEFL=8R*nOL=D|X|#0j^i>ZZo`~x0|DffIraQ=6SMz zFy7aBq4L535z8`N(r1-*flQR^r-Vh4=tF8yUS?wyU0GJlaWsg3Fs>c_%CS*cIXF14 zYrKdIudco#NCbHmK7t70-sB%uk{$)_BKzJ_!&dWC_Sm0465=!{XWEiMFxJ0cuX z$QyIPa8tuO;U!V@QfLOLoIK%?;Y&jTWcvyuz7Erd7&vKCHB+0wSBFbt*9HWtt-|Ss zYl~<3yPSw_5&L>_WHfkP$a^T@Wr(AIoVKU_f6>l#fe%ncd?9?M&bxchvGHn8-XFEw zf3Y38T;AnRw{UFl0sIq#eAom8zWktUf}h?~%Ks12RHvUbH-C+ zbcthyWP{T9LGB>BarNV%ktv;6d$~ZG`IhpuIc83iqCPaU@6Hr!qRmdo=I4C)D8Aq;*}t8*^J+ z%m}nr5<^f)bg4Ht0vpU}FG3P6pxIbtlYxK&W=~}SwLqp)oD}^j9MOFAGg%@87nhn$@dnXj;DBm$L z)$#x}g>SLEr)u|B4iabH-USBYy1T=c0J*TBdL>X?M8>R}qAvU@rX1#E+I6c)hiGcFv&y_zHXa(F<%|UBQl!I5pX)nY?pj+7 z-Qk9t&sdbCmUIc%eJM!`J^Bm78n+0OFV?%90uSFj-*Wxh?}?Pes_pu zB9+}oh8Lff07h)Ggdjdefg&~??JhqN(oKp7@_NB2*4m_G&?FJ85)R8f!-}_%yB~T~ zBF0;LCs~)nhHxiWiH;B37l3(?i)ZPRvOJ_KUTvsB*cZm(Z-muvhn#oJye5Zn_`*om zl56PcvBBX=Kc{r9HB$5PdO;`ZsJwZ{a{Rm%Ll4J(UB>1lFN1Mj?C_zZxpKccsS>TL z^{g94tpjHn^y>1gX2XUih!c(9Az0z7zf0RY-{FdzXH`Gx^#kHzxgLK|K2F~~Mt>h( zTlfUu5^Sj34$J_PzVhiLhfC>f!RAdbLIU0Z7`U=;Pb(rUmx{Y%8tuy`WjBrM<$Nw<_& zq%-0l9YA+U^)QyR#v#_j_61 zZ9Un~y9b1qdN=`Y#o#y3S`I(nZn(R%A+T=uWGTMLvG!@B?BDo!w`X~HiTCYrDev{c zaqs(M#tgHEMToqoL%!bK=x>Ke-tL1^+0~^<*%D1WchOA-tvnU74$meOW?~yDLaS z@6^L!5|7|QX*{rnIWVQNN-fu$#qo?L*?*2&qQpxC}XX;8^Jomf$j8ynzN7jF`d;xFV`wJo_S=l&(T~lIct<; zfo*KH&tH8AtZM8=!HznYiK76!8-5f{G-D`nvE(k5aO{KTPBXHcm}1npCq;{1CoII0 zVt_q}RXr+Po7Sw zYY%2eQ?*kbad6Z9RF~QcMMl{=7Psuf3p`__4={C&*Hgx1SiBl#(wzLH2HQG9l%8B` zkc$|5gVKz*gU(F6(e{!ae7^Wc;T{@;@h03s_);H`zLZDj-XG^45`*=|(MRbhQd9Lg z6Us0msGY%lMKp583?cebAF#eu2k(r(a=ZSRGAC?_#H>{Gg?fhlc&;gijeFTzZhpL; z<&hwM_rq)ZkuzLte^T(HIJ8ts#F(5=zZ}H9Bk7ppH$E35UrwdIC*99M(*&$3MQ}?} zmMDF|(%fxYb<;~af{G-H22#2TwlFQrK*7Pc$aH?C-VY+#v+9DnWP%~{)I>%XGb%N% zgG(w31j%C^m6ctf~`r zBR#ZqS;%MyV4~GjS_gO$Wtv2#j_E9LP4CH5byOKQ0NPAsG-fp0%*@BzD+Z7>)Oqb`$b4%6*F<5c*OzSFbB( zKc=PI>7|YrS+fxt_3-E&lI2 zW8}8Ax>LWA9OxaVh%<6z$})5}&bxx)FM-|4N4M8%vS7@no=DVZ;`?cS`(s*oNY4O3XE^G7Qmq&`B1P@_PoQv$uwP>c2;7bWrx&?Bb{=E?VN2TWSt zG`S(9ZJrp&+Ft(hJ&7?;3fCiYO7RO+MK7X?YN8UE;(OI+$fi~^WYd|=yY!I?UB^Hu zvEYU$Dx2m+hbaZkDGm9NoXqDV&O|lm7q_{<&Uv#cVD%V3S^G{Ga z(;-LGF$kvt6W#9N6Ykk5kYFzmVzQ<<%n`nh_(;nP(BN+}hqTyaYH3FT(1*6};5-3H z?o1*hyxk$+S>rr4!+vlIy4l1THug!5$P()(srGfiB7!BslxpjNYc3I z+eO6L!jx3klaO6`8fo`F^f(7t!?63k;efU2G|42gP_K}zovUU>m}P44rdRN#_Z&eE zV(qf-@Z>amEKzLQf&(JJv<0t%tJ~5m=A;FGoI{WRziW_X@+Vcq58+prl}xPi!5^T! z+BeM_=59zb%pCkr`wsHnxhn7dji!TwU*LB`-2Q7PL0!H*?*CLwp8L7+|2GD7vHkJz zp%XT6Hc)mnFtRfFPcI*?7rQlHP*6}LP+M0}S65J3QPA4`uf@V+<+kZ;Wl_*yX~T)b z@rA?3*UN-G)x(X#qM*KO-N)Hqhtu88d&`N@U&yYYw4$ID(Zhv>l79*pyStYOeTZc! zL!+U&bMY0l^w5nBp4)JgTFs z2QWhcVZ|Q<7}DlripB%}g8eT5ck|7r;q)h{tY80bTJQMZgZsBYMGcI8tcyJUAfuBIG+0jITeA(ze(T4)l!v zDc$34X8P>)4^Tf1VhB-)k)Zs&AhIk(&loonm8lAc;NM>AksXa%s|tvvsW1$#wgr)t zsFkD>xF+21%0(QqxtiH&X~@}B!^J9{tr{#ToLIS%Mkdx8QasZ zQAmb^P8r_qAtPJbb@;@-Ob4S^Mx>AK`T8k5A}s79xUVTNYLfaDrPI)DP*vCgJH zcKy>jWj}$M>je*^S<8J|!)tPCy?5{ji8e%&Y(je-=~n^V*U(#?`*<9Q+ipiKW#anwa9t}oCb4tLfSozlQ3lA4`tRSsW+l)Nhjna33mIC>2RkFuy`S)Ct zPhGdQw?K1#Y!!E2QwC}|5vJ+kbskIZbBd^q)Gd@D$aq3lI===+i!25{Y54g)`kXPV1ab zS@9b=Lo9V8^}lzo7?w#|k~gCu4bpK0V4ZBwtdjG}cFmNFlZE5tr&)XXJ|%E5KT|v^ zPa)%*N(g{-(r1N4(}m)o0hBDy3pwI1 zRAB3$Tiy;VQs{_OteUVN$Qqku?Qm^rL-f0e5>_1l0q9*JT*Igz1Y?+hD=Wjp&U8=u z@#Jc34gmKkA+`$?y&9A9{Jg!)NNCI_^!F3~z46?c!m$W+i~u{g=cth{e#hr%5+^Qw zvtul;+mSpj6E#d|T^Jp@Y7R%*<9ccYVWU%pTsRMOHv48=M|2NFkOfEB7foA|`ui(0 z#I|~Hz)kQ#H5#ZwBfhlRusy5DiwJ&UM<7jHPX_qwU_?4(8WptA$fimK3|)FQEc4m-K1|}sdJ>rTbyC?$FzuU|HslMBlj>myK7pi|a{7%TlQO}aDfN3- z9vr0>jwFL{t;WhaRj4KQVpgeP>5!@UneoT1tnp}@jSC~lj=bk&Nrp27?vTgn-WXAvhxzbp?Nt@Bj}P#4Q?aAA@~h2Pxi8$%BTXQRzf zAtas9SRn&^Yf@uEp|^fhgZVecnbjI0UKXxPG*)N}R?NcG3v;0djfw%bq9Lfi&Hx%? zrdb=ZH&Yvvy>h9CKsxP~%BsD|oRjSnWMc&TX{zTt1bIQ)lNGD9vLF{)4PN$;Pxr{n zv#8oLU0q<-l)6G)BGoFIGZeBY807x~zb5+dU+_$lQH<45Y7P5w)p&z(Zn!84E9IM_ zNaj#l%Ks$}RULw{J}g!J1pQ~Gz4D066#rz{@z4FgE$;r;>StnV;9~9kZ;RglVjzV` zb3qRNq+HR$FnRi{B8U~*02Vq3RKq+}w8PAp2~bzW{t7j>e;6X0x+eQj_t-se-P;j_ zenc&|_?&K!1V-l4F`pDy8OQm7$yE%J@Bop2HXz+n1S!HvFtXt#cWK&!`{mgoL z7k$g>V))**wE(&`mP5(9rUit85*hN9^0i!uH09vCoD}n~f{?rBok1U5&R8C29MD_A zvw~505WIf|iqpDB00RmD0R2Nk;ree=>c0mn>}X(Nt7u~9Xl&y6W7p(jqHN$~^&j&s zM^(!WNd@^^Hp_U?*kZfEG~VCud-#6TZAu~N z8wAg8QnLzrOrr23dyd%s@@$->rX$4zVGhmXu z>QM{|U!}pGlKmTcW{@7mTCsT0xO_+-#U8%sXh5RCCib#^<#-ctjaI8?7^Z4j6J~DfLcQt0ffi9hD_ILLRv~Pon9+Ozax$G_1$c;MS~3|C6}NP!gEBcB$Riau1?My` z48|FL)5Ii7B+7^JnW?kMFDYSODrEFk?b&yDr%#eqhUS?f4jW@cOHO%mC07iG9SEwgcNvqeQmyQt?W zcmfKQK5Hz4u}-}Bsh`l5uZL@*Tr~*HIzrA?!P_gh-z!7w%a6!1SD^#uWRW;6%Z+e3 z{?X|!+YgLf!CvNx++4 zz&%gRTh0NTbks>TftuA7*eTaaV=%e>Z(?i9G zy0`tPm5U96Zz@*mRLH%f?`uVoybui3t{pqEsXE`?_l2xt1x(_7F-wT)bLP+-{T`tT zl#Z!F?4aWxRU#s?IJDVPI3WV4Wx44L@=Pl;7@mzoIA1P<^?UD3WHUauKjsxKH1HpV zw@<%+8;`>{ZE=x8YoXuX^DeE_=p=B|1M zMkhr+zE0q=RUSV#`VB1P#Gl3zxO-Lx$Y_s3>cw){L@LRlGo(5g9^bH8zwK&m z*x!Wx9VT%V9q1UM>MF}Q*(qK-e`$AYc)>%BJ~Y_#aZ$)1>^E>o6w_za>;R3@5bo*) zyoda3sK^{jvIV$T;0P?E5`eJ$H@xyTvSc8yzPig9(++Tos}qQULt1}?UGM?d*QVgPL zy9Thjc$SPcN#O5+NdLf*IQu{zL92Z`3D!v;sQ_pj;Sj}%Zs9-dlBE_Xk5X0 z`7hpnnEFjPyuuNkYKBp^O9-giYEys!VeaomtjXm7C{OR1<0)T&$I`w zGHK+J7_~NC%-C2qzcGN*^lEEfQT8gYsaZjlX9X6%F{x^K^Vyi%ecP#A>1AWupu#E) z#oO~9``mr=d)pez<$1vOe=(Fmx3ZTX#2UIk&?nJ+AjP#!yg%J{+SNlR=niq)n;hCA z?&g8UxZ5A$BI%}zrtHo_3v++X1>cQ&dw4X3XbWb50~=~aKe~An%Rf}bwN1JY07pFv z@_r@2<<*vP=be<>85tTId}qHsb>kh-zKht`^-hH29q|hL7)5}1W4JUjU?$XxaKNtX z4-Q5$$Q}lg_Nv4L%sWlv8*1aFI&dJ~VI; z*^Wn-mv&;dHB~2ips~zLl-SZj*1BL#InK9aO< zRIk6CI+xI5at2fGD$vRxSaI$Tf$_;IwOR#m%Gv6FWyo0EEYy&+%XAy@4VV`tY}5kz zY8d4K3~(h9sjQGR@9bn^TAmt2XyA7l6sNr4`6;1qZb7dDVQv)rl1@OxTG!t=8L|$W zA_pD-!>k!yniZ4I*_%Hx&RwW{viO80EdFjT9%KWzuTI)fCGN2zcFp{VlI@h+PYmKp zHV|Wn*dt~s_pD48NI8+5FW9QApt4|8&wDfUjrt;#5Z&bzycK2~=6vIddp*hLSfHQA{E}4$nj*szwZI_6EV@9 zXI)Yj)zHM?s#h8{S*NgHeUeI=zVVNyS*J9@YK3Ix`P)6})<<)Ms4Y4BSonzM^d1|1 zBR>94+ezXrH~RJfgpqax`8d;UxJAk|ue!u6jqwNici4MEG~N*e2H$`j$fYT`69(Ur z77(|2duX9ulfinb4doD*vRA*jh6=$Kcck9@nFL1PaGmiNW?#aC)|c$yoypfPzvKt9 z&^NOm>ZYE_7t9XhcPs(M-+db#A;fPA`+;WzCJ_b_0W~%t5UXoF?r8D%w&=h6eSm0* zPNWm$Q{Cm3x~+sQmKp;39athGajn%T&$`TYM>;)g%QWgS$t<^BKs_P*F9hLTXemzVfx0msWjOhyc!4XKY- zJ5)&w9~T^rDbBx%+4Zjv4~1~Ysp4`B1=+Da8GV6{xz~$p=-qz z5^@XdY^IQX%$=+Jonowr(>hcrDZKNzD1lR)E~KG@fh!x=s)6kb7~{;S1G8I0b$VWV zfDPE8*Ru7ze0z{Q2vXp%si^0juyj#P+>vCcNGs=3(VAxZCU;4p1}Hv>K$4^2rVyB$ zC6rnRzs=r$V~~E*R2}MKMPYH5yWqRjZ2m!*IaTU~GLvgfi6-MGIg>MjW7bI{6BE#v z7jk$zRBBe89VF)>8n7wY(t>r-d`Uj*kQW$!huLE75)P#hsy^p*XAWxSwNqNIno^KD zA*s|YGm3YLa$#>|ZTYLaF&avCXveNW-7L*vlaZVA{4J^+RUMbAMf0MwA?W&=1Gu7c z{D08(PC>SH+qQ1nwr$(aoN3#(X4;r(+qP}nwr$&-H`iJFM67@9b7MS=m+{bJM6_OO ztEJTP?QLY)%HYoo-@6tt`obh)fWYJt818? z&4f1PT(|aaxKca=P^Kc$K4rhzoV=is`XyRg6sM0TsajH9MUsZK)t4ed8+U@Sj~nKd z7F|>OYAlFF3bJO~VPR8cNanDl5K4P?@z9{~ZD^jWG zE3!^2Z#|=PuzKBx+aj0qZLvSM=!lnfRRBBE)6Uj%#?BNd=E^3SMo_=&Bq`Q?cyuZ) z$vLz-eR&>=l^?Ld`pT}^lbw1~y!d5?%kq+!u)HWf$J1-0Z|jR*MNh3QhVJ0`>w)4* zfnMUkR&y(wVCs*z%mIQ7`Q8O)$X^VgH;dTfylgD-gHoPgyE!T$>NfIptdUBiRGEnK zY%<2^Q^rY)c6`VBQojAp?vUXId5Ht6epF#ptU8Dai)OGB;%Jvj(#DHg^pja%0T;%( z9=vy`%aJ6!ZQ8-95tKpxS_dFU@6`-n;>%1M0T6T!uj7tJJpAk~5H&<{zlsS57})`P zWP9r@zXK`z-UB!AF^t0B9}0_+2C_Owd0L2^VeW-9Z}6;3CUt-M&(Ar}g8Oy{yXhX_4$@Q7RC@a6BiGXjX zs*1K)f_XAe5G;$!^)odxoqx-o$2Ryn3bv{o_RG+F@+%&H+nC&7V?1B$9Z`Bq=)#)C z2v%E-6SgPN+xBfCrZjRfxM?e?S=T$|K(l{72)>}@=e&#fMW zUF02W4+9JfYSt$?Ja&81E4$!o?^Ob;6(Q zVXTklH6OGFRE8R|lKC5**6gjRo_Z^wIYMyI{g^#z zr@RgmY9^A}pWt{IO4?Z?B8GWRI^bEwB-NKasUF4-4ShPdIemalw3He)0i|aJMA-f)5q}TXYk_u zBTkV1_W_Tm?oyi`^7ftjWR+D-tb#$ z?S-@?Ww5P5&p{#0HDBQG^DTTOU-4dFTbOIB>H-@#7XioHoQ}^A+$XwUXJ6E6fINTr z$!bv#>@!0T!a%YImF#(NN5(+1{jz&?rAcN3IyX_z+nD!LrSn4-aZGui_mqK342nw6 zHO5ZIL(oI$Ra8)Jlgtm$&E4p)2@ z*uHdt;+$3({Z3<*7(lQbiftlQi^*VKLir3Z{p@sGqR*-+cPa);^_Y15Ivh#2oXzTO zhkn)pJq5aP;GS6!2vzir+Q$=IwTZb8R})O@_RqV|R=JC|+9kFcBmMty^x>+C|W2MMbb~rt}GdKXY=#srvy-- zda3rEfFONjPhgvr?Id&x1_G@MX`}8m3W>>dP)vb*kIx3uBe+(enC>?NH5zex7?ffe zi@k@Dx??=(++SZpNfamGvH{1HUZb5Yu48pBqLWG2LshnEH2U3&TI^WO8|HJhN@lwRx2o zc+=t%d~|0%jvMXgDY2VRG?1KagG}B!!|8V;fV!#;5Hq?JM%_redWW2tn)g1KF?W`1 zYA#!ShFwIR%YG3|kd2<2@ai!!QCA_}wR_LAh8LfNm@D3rqUsH+DSM&nDcNHfQQ`Ho zp<)_?rjJOX?wA!f0o4@*agG_Ic-u4n2^Dgs<4k_WeMnv3CrFz;H)W z$R>z`^&Fsdg>kz#LTzB5Ub7otyiq^w1%?oDU~|nSlQ7GU!Cwib^a?c0KTJ7bRHZ8j zHu6n4%-O4cGW&PtB0OtZRaNJqqe;ssmZ}+@J(M(I?kiAZUnicq*H^HP zEU$vJV3|Pq6zl`h)vK}#XBjM;L%M9-V>U=TSO;^YB8hth`;L+}>-dW9?mdp9bsqt! zZ=Yb>vcjc}@Hp;bL)%u#xo#O!Eu+Hn+p#o_)*B13ZF_muqRM#A?{dauCk9Zqb{DH+9*Jci0UELF&eKVA-yHr zmfW}bRMnSzj~?#0W{52S>xhzi*SC{DGnppDQ`WK?>hxE>;@}KP+dvF>C?1{GPN~)y zXuS#a^i!YJ>uc@8C9=hdns4*8MTXQdBgmGfiSfv;hjlX7?{};W|BW;M75^uGmF~%Abf3WlOm&kHWwJbVpwoY+?A4p$=(l*=<#D& znE&)#i5wkq8;odX5=xwR9ERabjT^-w9CXTTZlg^)|slm znCbZN-)BY^ch9xzB)t%)2`pxB$d+a9-6V>*165f~P+xt-?}A-Wwyo?R7ry72>b9ag z$7n$mcaUfHxR>pTR&vEI)*y5Ku2L-n!?oa~R z_%zL=aBZPSH!4+~i~c4vv~lqpn*D-&`t>OpUf(eGIO(#KIuBIM{FOYB8DcDVhGi7) z5Y;yencFhYiw$#(p1|P)H9K^M2xZsbQch@Aq!aCbiYTACkmjV!x!_5WXnZds?jCrS zV`dD5%T3T+TLw+-;a4;^H;_6aqYxVh-}C?S!)KpWRFr93D1KWVjXNCCI8;>Fp;dW(+)eJNns%GtpI?R=ltZ?8OG@10C1N+jJiPN^|fn+T5XhfgkS@ zZhNU!6Jey;UzLk_AN06H!97K{xJ25h`@y*VaYG5%rwAT|ygxr=9o6dh1F)0J&rSxv zgSZ+rZ%Sx>Z|ms($*{945x-UOqjbeDWbc#?#o=8Sj%|+k3cLVtoTn@t_Bs9?TGmK8lQu*=?9Vgh$TmSjQTRFg#?W=glkR)PJp2okuTRz1iER0w^#hX$m zaht!yKs2$ojty{QI4kB1Q{ARnXbJ`z8P6tqPDtVgZ?kVAPbNq#2PA=3obycbn^pRn zFHo$^io*O@1^i4q{%OpB#9B(b*h=y6?)oCsRgX7h>0x$Q$;Q#Vhb<=h?t1i@(|Wp& zlY1u508eYUt?M`3nU!|Obe>E-J&OW+hL{hWr+E2kTWx5)%i6bF_%wf1(`+3ia;0ka zYC1`MI7hIOAj4E1Kg<(jb%uJI@T(nbAW8ny(~V()fAmy@`%=Ep_ShTI;Zf=$M-nYe zRWm_?fOq)`lE;qFy&*%YElf4ryDJZdX$*3iJ`pJ09u@O~f7!Ws!`MMC%*oC_g|X}^ z07hwW0Dud40055v?#rZq8(CS{n*FB&sAyvFBZ)cwpKNvq$AA6cf2NozR@OL6s3U(} z^%}UdS531x`Z(6Mh9xeW`eNr<7F}+KB{VZ&1Fi93VH*#&!kEWAT)4ThS7e0$po!-_ z3R2J_T2_>+Xi`=&^tmXJl$NXR6ABKIhXdRd1<`kQ&#GEV%(QXi$r+Bgi;QP|q&xb& z_)fPq_?#{9^MUV*ev16U@-$Kp;BIS+$f$96UCkg-J zVQ{zmlM&!dVVJnn?3P!UA6c_vz-(M#K6uI zx}VYMp#D>VaQe2OqDAe2H@d|7x`5XC7AVkMlA+s;R)N?};ZH*zugi_lIvz-=^_px3 z9NL{rSS*h_nD66gHHfvmem_ABIvj@3g@ZN=e?LhDUYJw);qRt znK7EpK~hU`g0P_4b(x)4XIYmr$w%b)wb*8xrkT54H`4G8*-tQp?RB$stgPWO9L~!usKL-NuBF3hKC>zv^WwH}9=exlWLwL2ViWp&9AUe-q+Mot5=BdvV}7;? zpgpp0*$N4awS||nP#c|rU8_1^R+p^HsrMPB(xAS(ZoOWt)cq50xioMjR8L>FxA6+c z{-PAx9)FE^)K7PL90Y#o?@8Bod$iskfb|}yU?)A`dKVik>tr!%kl`P~>IENT{>sPv z4Zy!xi@d?|85+`J|C^gVXT-zLq@gh_&12PiLt8gO+d2N1QBJ}kGCx&ptGEXhNA(_~ zHLAMjTUh1Zh+F2odGcBF9e0rh|clB=-geBlZ2h?k^KhO6Qj6dwN|clWrExWgB86R0puJw#6EGnS<;+Lb@Sw z-IjZ>vNm;m;uCJ#1tMsdrpwfUA$vd62Ln?#YCXVD(lQ;&G@>a8t0D3%EgU9Rf>024i?Zm3`+}@ATU<+_djp4MKI>)-6{XaAP6=PH zk$tRS8AB{+vK};YRyjhd-ym{o4>$la6h24p--vBz-~;&!&xrR8E?ZEWU_w33DSk6Y zU}C{sl6+yzf+o8^o;=qjTd<)Qgf|)LlF$9w#Dkqef-iugi9aAbrvR|0SF!#luO{E^ z3u|SMon-VksY?8F1vuD8^nE+JFX%( z=@fDD;yUSh)&=8Tm}<1rtY9u=@|o^y!eyd&6bVK@QjRE8w_pVFxo8WyS_V}~f@q@> zcnT3KEA!`M_af*_a-9IZ_GGDqYfi4m2a?eo@pk@%nx0rEU(z#Of-~Jt!?XI;^ZV%b z`JzH6rXpGWFmi(^CLAtQo+H1ZBS1Yu1>K^ZcHaURp*O8N#(oiQ+zqVT^&ebv9a z)EBGo^3gKGGi`4bFZ>w?)S7tK%53tzVR!=Ae}!Z*$VUpo=3eh{RB zU@%5jIOZAu&>p9QJT6(BPNGpssAS~i6Kj$iee8ia-zi-8^%Bs*lTQI}gyKF{uqhdN z@Ju0ZA&AMeNhq({ZA*)AA>AsWY&SW>$`ak6X||_xk(rQ#Jo$NeO&cU9*swqFyeD}l zJdc&|TT2eCw6k)eDZB@|-;66abxPYR)6UR&1Vs7WL=o<~hSlhO{46?-!qsi>B1ZRi zf3-87I;qeIY0eeMvAkR^csrh|wWC zio+cH?Ag8DZ{on=iF2Qz(>sN2-=Nw}u49j7)F+e>f== zKft;FH97g;75d-hxJn(|31tPfij7q3&n^ew08nXccbqQB0>d0Bn_Y`B^dc| z!Ip-Feq%#g7HEQD$;Qvy4C1!V-A1Z0lu|M4S^T^mB*mA zbHzy6#X}wG58V^zjh6Bi5|A5Tkr2$5XT%()%v7}}i*(Nao9N%16`akId8s?%vVn9W zDHDl#;K5@=g3<_X7B4}KHZ$X>Rxn#A3LMR=Ygyj9=iqIBCLQE1vIIsb7pc?ibR zGs1f1)Vwo!PMq8-uqNkg&9G41A@M_=D^SSg+B@<@34C{xZB4f@(z5j$v~vl=5Cx5@ z(eiW}NwjCcbQqbeJhg}8SBzOkG+LB~-;iK7A#+mj8zBxLYFKuF%!i>eA3t4`Mf)n% z^5z2+N^PA`0n-*}SQ96gB4ziqem+f3gE6&aVlhExN0VE|e+C~+MaQIZn2|+7vYs)0 zl4Ve|6Kvug#AVgqW7Fpr&cD#4^6@Dz14Jrncge|;O)H?fChp2l_;#9xOl)PU=gGm3 zMx@rbO21ZbAt$)zBKg$>p&&n*a}2>_XaKGiDjAijx>WP=h`(m8!3YKi7Q0Sj=nIR_ zKgRM@uqK$#;=S(I0PAWwVUPkWen5o*cC!`6U=tQ!^cNLh-d@6MVz`{uD-2$)6%^KE zrp(S%H1U$@k!+8EQ?WWBSA&tuw4_V1O^c&6zqM0*ZUr-ZLY7ntSqc-67L@pg7#vfQ z3Qy@?gPVBbM`p0W-mN=Nt^s}Zid_MVUhE!v&~YllcRa>ke3y^RnbTETAQ>wT0)$RF zeZYKo{$2%;Gjg{mTUYaysgBqT4EA+u)+F1?-UaQeXhZn#=3OnU?x>s6*XrP#y$`5r zw@bm?eQ>B{8SM5X3=_W{`)FJ&pU@DMPw|16J7ie?p#hq@px0A-oUhZ^w4!eQftjJf zp4j7*$hv#&)2TGHF=k%iI4qwaJu3_`&FIfy?diPk@EyxnT%Q^Ok7$S}d?x1paCA{l z>z=rI$7C&_4{%VaX>VeR)=$P12vF$a9vAnE;wa)@1HXQ$l)ls2yn)U`1CTk-kCNdq zAH-$oXo8rVp&e(|@%{>tpsXrIlp#f@wh~2HuIxe=4IW=8^?dt|KjhMSK38mJu6}q` zi5tIn6-G_9{8eAh)Asa8qSQXkID9p**g~3u2K-p~65kv6kb%{xm&eQ;Q-j-$#|gZG(Ky5 zCPU%vj9|6&?D->*fF}D+2E{viO@q;9_#?N^JVR;_fNvYOsBBqH(dLPrH64q}Gq`K6 zbjIo5bLgB8Yez{H-t8tsX$4zU-&Oi&1@j?KUdP_*ll^ts{Aw3-WWOmX+3dS$qstxgzTa3ex@sj5{SsJVmj5 zmdSr65t>s|9$QxRt^B;HN75dcJjGUN@rnX~juLHq;MfMQh^)}^bRjbx}40)v)^9;0DcV)66hWwOp zYKmp~2c#KAA^L=Jqz?W=qBgq2wimhWon(f0oOrd=5L3;zvbxtxFXFF40O;bM;MP)Y zKf+0(JkfV22@+lrkazmEyOV+h7nOW*#_<50z)hCx| z^3oCJ-?6I`c(afAbCLf@V7S&;WVfx`r-oR!*PZ%r8+f)49+bQAJIkX|0t6CDX)Sg9&rjW0lI{d4QDwZ<;J_a1Z>6CxB>*$`9wIHB@~YGMGJ2dHHF*C>XSAt_Bde16pG z1V1-jpo)n-G_<~{r#%dtPIBPZt4;E2i>!Vo1C^1#R9)UC&P(18&BrRgr#|YFWbIlt zV`w%3yh6+<^er~hAuGFZEl=SBEn|c;`neo>&Nkg3z|vp;IG}avS6e zbf!W89JZSWXbzDy8Zhdt)K{_9DfuJ)%gIu)Q9~9(@geINBU?j7 zWk97>L17_;JoBU_K&4b*`GaH`X`tU7-d-J@8?8ZdjJCL6>lb*4{a_!fwyS(!egG*AGzy&1d>`zlQ>{PM(OO7k z08qHd>X9iD#}j`P+3X5!bnl(6-Klj8&*QHyM;iGd(x-JgZ=L$3cFXVOv-*9&^*^Li z@4tafT})0TMTFFy*EH|1Lo|=MNwjj&x9VwYww|mXT`Aw4)@>KjrQDxZ3G>X?tur@OK07Qnwk}~oTG=gv-huHCjF|5ZBu@MR_ zoi2RLh6Du9Fq}+!%O0O_cgJr4H}nu>QwU>Cj5{y`@A~}~K$JE*n<|(YuUC?Rih~+K z5k@*0Vd9m)^W(t`+~bhFr%b$~Q}5jCI$ljfXMGzcMCB+kd-ny zGVAsJ8eeLtUTLWOS?F#|amWCgL%!z2K%a68Rp-_KEoK)1T{Bpr%@MAaQk%TecI(lZ z2NK8*o!pX00Doh5cgaQ`eUK%)8STnx559dhoYg92?iG{kS>x6@6NT5=1H!gR%fUy} z?ISYDcOK}EY@0baLgJ;}PYyvkCr+}6HTTiv*P55ADH4OolLekMAZfI@C=`{YIcZJC z@wNV^I@Mm7H1WlpaVipnUD}Lv6t}$^tBQ~mO-Vz3gGCUV8ys~2!v;nKrin4Ci7I3A zSf+BTPGN!75qa;AYk6RneHUlN;PIKPbxu7(=&4_;L23J1s}yuahlYov41C`<7G!#Jut|+tR4AIAa@}3@okEH^7W#*Q-9qC9qzFdw7qN{ z1#mD4&JdN5A918{GCXtacJ6{&gvUpeZ@?6PO8p6tx5E!z1RY z)dKTlxH}b;GB$&RK)Jdn)hQ+{YEy#>Nd(51^vbx&{f5#>p`J33;P%AQBu*qg+C5#! z7es9vVn9|)Rusq=$5kQ8R{Bz98AzoiOUN996ZOEc=_a=ZPzyNWavDI@=)<4%t8z!9 zIJ4_o~Ss>*@42cSU$pp43cN&>VjKTJp zC$~Su?mfYOk-Q;e5USH%kw#y6+D)tn&`ntUroQ4M=?|zcPsY^;B&VfjjOSfmaFv0r zhYMPD`_)XmU}Gd++apKrU?F!Be|Xv; zn7(nhYM8zQJ0_pNe-rB|z`2NIE&P=!o?mET2z!Q}s z9gP_acZj$MsnA6wX6TSH&GGpoqg*$(m^i9}~ko;gSt)4$gXtECFyww8A=82r93Y1RfjZJiLv-cMjZJQgKGl7J0^{qh)sm+SA zE}20!p|b14bhE)S%)J97CrhGJRmpd#(;J`Sw?B|EXBLmvw2w7 zN!VD>*6s~x~@~3fwb9(zz(jRR!RQJDNwn}6VIU_7#S>7mnqr( zoJpivIArM<6-3G#&bR>U4bi5G9Uc2~qi84`Xw`5{iMd!ZfdZ6H-rI7|3<&jmC$j`9yJ(X5}!6 zU;F8~E}}$pVakRa19!2fR6a#|s3%E}t{pulR@esKWL=IDM%w3&MjQF126JFI$jB)* z8?Fb*$<5Wa_y#J`2qHF}>PtRJN{?45>xR?n)DC!Kn5MT6@go~JLh5qzS66TO- zQY;WPKBSSBlu9X+Ets?lsPkcp%kbl1Dq?e2S;6KMCw+%ix`vSNkd*6#sISUrLhs)F z&;U?kIYNIcJ?e``sgP1+FfU4=iP~HBG{!(CH9Ra&ey*1X3i*JQaxS>1V!O*r;ke_} z9Wr|C8Ot_`3K6n0+-*r}g`qiOI0%tmab$+UO~^-Z0eE4dudv~%-D%-f`)8e4)h3@@ zxE(dw3GB})X{qU%*4Sl&F0o|_Zhv)}4tV&W-9czVpr1%ECK&+N*cj3s_bs)8)tY*d z8?_lWBhnkJ8InD&S7PpXs@J669fi2p9JFN?)|h2jOI&EW+d!|sQd%RcLSt!#UGZHs zFpLl;n}?Z*3I)j0IK(;p#gaK=$>mpi7R zwud8Tg7>_6o|}Lklhs+u$}q$9O^ZJQ#r?cUx>f=5AxZcfXDP5_7wLgfn{wZz1z>#| z5nQD{VB!_24W~#s$XPiG?cNZ4NuWe?QyP|I`T$Zh$n$LjQ!|QDGeFFOHapjuL+;{^ z#imwZ8=l-{*Tn1TdRKam(`Un;7s}3mg?nyjrotZD>(q2~WskyE|J)1En~4ULBPL6g zX6R}yMh7Qfktb}+jWWcR2pLJai)UkkdTF#Lv}iO%p4uQM@C(U<1P35ntPO-+r#J2- zc4btxZ~O$Cmu~JMp7ZKN!B}5mi_&dva@~Frq1em`mt2?^3KMeAM8kjJ9%-2dg4H4l zRykF1X{H8Pl?~32HOAYz8=H|u=yT5tUU$+6KA?Go@Y|3tsCx=kJM?8Y!G&Bj^%}a< zCz1JDtF#yS^GhP+75jtzm3Cok`ryXBGhoDwU2PVKssyRn6ZWFaePdE4PuYUw1*hDh^Yv1+P z-@(*WbAekB{~$J7qZCOHK?D%qJ}gz4$d1}EDBY=kU^fcJ9}hOu(B~Vo@AiYUFF-2c z{oLIgX)C%9THu?T_rB(idVZt67V&{zkNkZsUevKkxiQaMc4M5PId(M(Ss=HG6H&5- zE?~!#NBSN0%N#KbcD85}jk_Y1H?fFscT2rX+(SMBXx?uFnv?rcwhw6%QiEB6CkPPm z=26ndUDbqu=Mql{YFGniJ|KFx|01pjXJUa`|KuDD{4Bx#M*`wMdRZkCd*`1<*22L0 zKYQ6Cwg0>GYC3QHr%|9yLjfkXSwJ2!Xb}nos>%XUvpheyWULXFD-AM1B5(V~t>=DU zv->qLrac*bJYd2s1^ydSPy2O00Uhl%EA4SR(Z7P4`6IL8h4W+M=OgXg?Q}=bE(=UcM)R37M5d(bqC>P*VRXvXVJ3Zu z!>#6`Gw`hVW!lHlx3xeE43rm{P~u2kUu^T33>^@>K@Uzl&0?CB*$7r>Rc>#>bQqNi zWszgwJXs}eF-N>%2l`0&G_78*%v4%ev|n^VUjkG$sLc#iRTK=hg%cYoFg-%{Ln_k8 zQq5nXbo5F;H;>}>2&5or2Q^HJ1EQ(DBSSsL>DwWpkOlKyPR6{??dg;`BYdrVzM(>UG-WK~g@ zS`^1_yiHUClKkY(#J)}(k)(eNk{>dC5}7ZgHXlpwN27r;IT2`+DRqg$p-Rie9^IBq+A%7Xf31Zd-0N^zzRFFb-EKKBYZawnNh9l;ySzJDl1-H#DA zaCjk&Q@}-o!m`SRgTk7*w3lBvAe@$&o~3~+qduS{rIeJvmlbeJYR&W>_fHhpz}27R`CfMl`$m`JoB$$8VKuD=<3(8h;k1xb+fv$qyeR$=Fy z?QKK5!~JI32Q}vT`u9}%`cvlTQqn!bEcBIut;+YM&ursim7ZfyS}|7j5QL+rRe?9S zX|FH_enP+I|{wCl-y}W4O-oA;@W}gTQ z-}e`FvKRZ#yM6T4zOYe{WavjG1Zohz5ltw2!B<#M;R_*TbCg!6gBB@vpDegv2|Xuz zp$EUY{uRfB#ngKJ;ehV!!aNRtTb7Gg>4sV&2am+Jk;4zABb?a-2J87P=m`7lplU*m z8&l3yF7TTi4s}pm>jFe|S)cB^;yX>oFB;$LRS_I-&x7{)?fqj6f#J~kaKyqxeI}Mo zU#vB|k`orQ{DdFdh~sAq_D=C-^@HhebY6xEen>AO$~brPTlJ z&-$N=rS#*`vNln*aJ2Zx0HR2B!(~GRWy^?M)sT7xt%ap=c3CD<3-`K67PyL#Ww)@A z4shJLk|GRAgQ*^8l>8Rb3$UhERb!V=z zwP7Et1sC-Bvq933Z1m;`q*jkOX~)a#T5)~&G7VkbU|F@qd!|LFrnw(O5AB4yi3)~K zp&8w#T$3ns(@e9m2!pO8ygo7Cu*jq6@^*nnjY*|3oo)jL$yjLQ0rc*V1>Ib^BrP~mby*HZK&jhA%mz%H!V3FrE{(|c*{ zI#6FF;JVFRw!kzST-~kt26FWAz0nBoy1DQ?8cCVas@?M8)ZaMz+eRzx$w;8N!{La`aY?>0JD8cBu(!TLdq zS^D}Jf$yd-!|w%!II&-WUUwI&>c1nVE3!)ZC#KaCG1M<;`whrfcti^Gg~`-5i8gqy z--x;ou((AvR>XG_p50nL|8#|)0RE>d2gABgOic4;sQOmSQ?WNGW}p_iMbDi649 zVl(zR4^J1w-6@)>U;h`&!s8O|mAssYeF^;xV4V9h0Zc3e8qd${F0;2y=Onq|$Tq#r z(5~uMd;IuJPQ}Fx3xiF;X={qh^|)`FE&?Y|?7iJd)w&@WGVblUqc$P$9g1h$5aZr% zEDq+lN25!&9FHV})D{PC7(tgsG!!!a^%tAEY96xR`3iyY zDG^?f@o{Rnw0HDfZhgn81p%derZoZSE$cjrQ{UhRS_#%E`s?uyzZUfeZpq!Bq%Xuk z3Fb!&u&s$HK2Rz^S-h{BUM z$1(?B#@?R6UIHe(%TuM(BRwO0QRJ@D{NS0ho21Z?iNQn6r*YqZzM*0zG}gR+`suo# z#G?N{X+zn<#>DtP8_Fcb3E6%D6kcFj2xa#-ApF3=pxuHWiW55t8J1JtXm=u&(fA@v zRoIU4Vk-d|-6sJ5xSN^2U}SLX>111t<4o4ZOw9{C0C;JkL4KmYUp?1yytD(%3j7Za2Vwp8G^lf*RQ;+&9C+~X{O}~p*Trmd|xlGa3 z6Jf;~3Xer?xA;CT1=j`FVBr}5r7(iBfoBdYt8)M0Fr`?+7e|!zg(YN$#J_kEgx;7U z^H0%j^NE-Z*A*JHg>@E$euY7tg{LNF<^~ks@qM)~F=o}ykyJM8g-;`~YEzI$D!%O& zTZNRuCBK$&_ff#1p0c*sYdxF;)w_(N5qy`H9;igMdx?*D6}FfeyYvPBY8FSEo&|{h zqCu(=l2R|W3{rk+6)o?P*hZb|l;j~Gm8PD;Ec>spDXK>}3926~lb!$o0OtR@!2H`% zG=v4|hO+ebSI@aRMjCfuU%G+D=$`dg+*;b|6zn$fxaa_OMyxy>EI@7V4q|WZ?zU0inS%bd z--ol^KKEyJ%YeqZb@AEU^JnW^2y?yru)SV8>(_C^YPq)GH%pC}p>nH1zH#gm?c522 zuO-1Z7R6V*H2`&`ehr1@vWtOUzjK5+evthH>cKx6F_QJl#PsoRx*G!WLV9VR4f}X~ zCjv+!>3iRJrC&>qodX3B{^LC8v{?a7aX3ZbNw}wBx z_*Cd&mFZ)LaA@d}dKu#OiDa0xu9@u=rI-)cg$P{V!2Hv}hhk9^3ATQ0U0#{uy#(PN zrye`ImzTr`v1jPXFoZUjJaXfVo zUU~VW04(Z={cR&49Wmk*VqqZ|J$(m|c2#$66A{wcstT~ngHc%f6#hMG2m-a}OvHrT zc$f=BOJ-Co73$(ku#0ecsaz}_v-EJI0(98|k>6J+Wl&N!xCU$k(NN@lK3T@sT7 z3qt9ArgVwOFMGYpr@*B`4PHwTW7*hzds;ASL+-jEAd;djV(5o3i!9UCSh6GoXK8&jpQ9acNqT~IsZS14UoiHcr9 zh%jQo@_gRZDJ1d}If4mhFGv3TH;NasL2F~b{N^aiCuP65lG|75FLit~lZZAVO$%Du zGx&TDQPpHACIB)I6lfysm09^%!+gjVp;Nq{wp=N4CBtq>(xtPhuqH;3D=kp4LV}uF z2AGjX9U=i-Fu3-O1BZBvI%Hh3oxgP;Pd*plygp^_3pXNir2a1`aHqGqzv+_~ka7E#jl`y@_Df3X{gzrZr$>}YGIe$wyW~7LRj38T`Br@W zEQ}}G{uc%Afa7KmCx_|n3?>Yg2)A@_8;_Ef)P#;{;Y5t%vZli(UpYAmr*dFfoOsK) zbNyk2puVzg#TpSVMAp=}Afk{n6|!Sum4F&F#zbi-$WhL9nIRy_Bsg`wgwojiNNR}_ z=>iFeP)gO4BA6yCq1&?4`C>>x0%#`W5TdCGRy>|vP9J|o{nrrSRB8txL_4}!P7@-^ zxF}@c!>lphL~&SB4AU6mJ4k@&-pibpTjmYRMO4jw#`h!}7x3XO_^=jDiiDS+(1bL& znkz365Q)9(s_ldnrt(mI$WDx*1NoKPXqK0Y#hiBDLvuK)rXeI%bBilvFV%Pz3|4XG zIz4OzZiwhY3Fjp5F*0D={d;nWk|pw+g7L`B=E%k0>l1AX%4z))3>@0l$PB2f7=3$3 zHbPp)@;~}CO{3-IC-IP(X?N=vPKm$~-TOkTQ=q<~^4=1RKQAqDacA2# zy)`BHg=x~H^6954`yA`5(h_h^(MKgssb8%m6%`dAHvWp$Su>O(bs4WE0C6*ly%gE1 zTq-22CIo{e38f9W^9&_rhSxSanM_Zzs_Vvb_T{V(HNw}2;;_m}=7-XAg6^&VRmJqh z*_fhX1&J@nP1$6ZE!|{H%8Z<4R3_r*M4S%V{?ioX!Wv|VSY!JWi|RN`gXRwv(|bU* z3H?%L$ItVJfw;ijQfM!GI2T4*iWspfL`OCVe2)+{J6IR>x$mIS0MppFvfpgC0pi!D z#Qp6lkI{0a2GqCc(9S+L6%Lu^t?R2N>R$kGl~3YtNCWA2I2Ob(vZcy0optRC&W7e9 zi>LW?O}Q;7IboxQlIP3z;$yOyaNpsc(pR<541Zs>JA6BYyy=GS>NglU5%Pye$uoB} zce?cJBjD6CmFzQ_?`k^%avtORE8c|(-UX#%Tl0kSJoko{pA|wmj{Myo#aghSQP*q zWsq{AP#%?4_11$7%L^;3bY7EeR4XjCG%MaG=Zf=m-V%!bYN%fvDslAM;S};gRKbMS zI*-2ZC82xe6oal|ff2laQ+(0k5ue_Rtgz0pWNYr_VtDOp_f_q6Fzc!3 za2$*|2d^6z@KkQfXahcKuz>Pvi zm=0ooOT>Sy>0}#SVT+0uk0!R#z=aAKq8JrvZSa;kw3K^TFnL;lpnhkYkrD~;XtvZK zHh;*pu zOab(Wj|*@4ci(4sw|j zPT-N&wMwrRb>)}f4Q@?(sDu)wsX|XN0M+n$B|AuLd0}jitPVP?N9UutvehbDl_80U zibH9++AyUGbGWGF@%`F*62BB8^iCLpxrwEYc%F^2))?U%b*&%<+ONFk?2SZL9!wZ? z1%#5D__4D;JlaC43QK@06$ex~oScrv7%6{nOe0Zt)AYA!gjksp0vF({ElgkLyCMt& z$rx0t8~Q}X2rf7!p+ch8VNFwC95MNw{h5pTPT)#)x>;3`D0Vf()2ajDf{H>Fca?qQ zPQs1$vARf^LM6bj`I5+dNP=Xoc6G47`MKBdgmXs1p2UOkVIsqJslX54@Oh1{Au{t3 z^jYSvce- z_Z==El+aLl)-1C$lBQ4#QEOeTl)O+3fHtTgqFo#rHNwg`nki4cSHy4bZf=vtH*Qg- zVpoJIJeh98hQd_IEpmW#KXN5za7vQ(IFhLb>vFC>t?0~;VndKN5UMv}J1oS>Kr+xjps zPMZ0Tcpj@Tf12REc-Tgfyjlz5v6zSBBFVC0`fzx_EJ3{|3*?@$GDdLk*8RA1fL(lNWG)nR5s1+om@ln!L?jv-ki5ph>1Bl@+Sz%+5R1B3H@N<)wYEM)CEX^gTe z0+LEcwahp%+13z$TJP8WATZu?B2=5=$Gj%3+i?k5$J*l-FJ_rbj?@;u?NO7zT&Ic4 z0K5ZohF022h10vK2mWVY*TI347*q4OyIOKKEn9~IJgNe?2uBff*L|7aHdQJlv2bp+Dx zRLXK{W9$W4qz^}!wIFYZ{(8^K9wUe+BSb=r9WM{|HXu2ul2@afcAR|rXi2*{8joHX z85-2NR&)&|Cf*`4iIE{WonmfcChln7zNi*0F5|Z z>PpU1>>C|(itd`pHAXg2lt{^-xTU7J0ly2d8vFmAlhekS= zEF)bJ;<4&WpzD@4FvEHHUO2embXgMAWU^;v@6?mm&7hx_6zqu9@$sz+LFKKGKwsPF zU-GN8Vay?!PLmLlPcv1j1KLXVP*x38UQ&|C_N++F^(<92)e~c*eqfJ7Nlm7Gz4%XC z_Qj1uv~+g>iZKh^Zr)o|GHUw;?VOdO>bMZr8TiWX;8lvDK32=%b$X@Kk zap+Sc^mr3*WXC@`G(>40+4M{^P>$aiX(~@_XWJ(igY_E2#5y}Xx4k}#_nrJiy#fIj-Us9=e6<;zcZo}L!_nB>i8I4}t5(<&i8n*H+%k|9#4Hd1W4F(>_ zA$MEvc#R(dzLXj9=e7qf@7{I?h;FAgRR(Xh7jQ$!{=Oq#=%Sv>rN-^=;s zRuqjwmiONKXlbJ9`HW1ZrKd8AlY~^C8NdIUr+d^0GK@|~ldNON)y$6;-$){JeR((L zyo$PLo&#-WB%I%nikb1s$Pe>WzDBppjZ~-?9vDB8ANT@%r^ytGm{t`jW5=KKY0hBI=8>FCpZuvcb8HB{1J=i|m=#WRxs3MAV^ATP z|CyAq)x+C1UJK3K7QGcYq%HeIx~Ichi2r@znxA5xV+C{dhXAsskEL5EcLCnroOt=X z_4nq3MEEs0w1irZ6MdoQey!H5EkP4-?z# zxN~mNiFxh3BhWqv{+~TKY#3)?D^f3i4=yq^H+0;uaa5&%#t~;o%Uzhb#jFNhi6*d~ zveYXw?RNoe(2{DjY_I%!U%d7$Fs66@8ITLi)-+U9eu8!=lU!mWB=Se={GFz9Kb#om z0daMy&&z#bcCAFF%>KAy47q+Z5=14M!QA3JV8a;Te_-`$lW+wCriWdUmL#5MFDQT8 zjFlzqzzxkL^Q6Ov2)O9tUzME#LrC&&_-004f6oxj=L<%K-2~4(Qhs16Pj4ZJgc*QN ztX<)*!;t-XDgUI;)VyYRJPT>?k*5}y5#dfko3P7WfjRUv_@GQw!tMvvRKV3^QJAHq zOnb1{FV3M4;@V2On)5FsnHyYMGGx+H%7h<`aM%;^p$C`@=_DGx@usJW75UgF!o^Z& z+S5&MK`d^U^w0~_2Hl+hl_mj5PB>8=VrB<|SZ|eY_?xFyO4ADu z_yZAUo}6+9{O;Y-_yKa;=!ziq+0?pQkK%=cW!RBSrgiRHz6Q0u|w6dDGsDt3N z2c8bYCIqu3$kSG)e&|BMdIQAp;-fA^_@)>*{{)(HDkE+%IrB6Y(P6jp?jKU4-&<(eb!A`inr{O*z=o)z}B)LvNh_TRP?gz~c)(cHHd8`+dR()KC ze9U@Hm8Iw)z6;!6Q#Ul_$c?8PP{cgRuh&v2A3S`nzXCtho!XdjtQ2_# zv8Nmm8&J~A5TNtpDGA6_vT9o z_u*fC7*vne1Rd*#1nm=aPs7kW?Q37(TGzA~2IsPJS9N z3_jg!7ODpw|4fCxpEHp{+D@9lx2z5*Zt2Jn+X5nTAc_`%n{1#0a{&Vj%vGD>5CQ!#^TPOC*de@k*+h2&n;>InLP^R;6JOaD3x*{8vwvEJ&9K- zOGhbDrG-Go(ng^(AYVoas&OF$(sD!eyYNP_Ls@99OYggR%})wKLF%8^kJN-jQf64^ zcqeUX-G7s&#WjvWRAB=~Vs+Kk{+hQh#CpYkt_6>nwm)lR05l|{DE5JM;gD}J)5BH= z6uWU#_hamT?SSIAGigV2?O}EUYR5MBY`Z~R4|8tGcfjQSy!dXBlyrqw``}94(A}yf zKG-I%?Uo<=V;OLM=>Z=vy5RU3h7gTUeSl!fKM_$V{_uYvdf6wIOump$_9Eb^r#`rO zI+dU0XSaTdpzDs_=(2kutS5GbU>>oY%I|hmO~z5rW|?7`WWC}`ljE6iwM^=&THk~` zRCB0ph_~BmHO=`UiY9l^?Gb!&LSni5`$@>ri}>P^yYsc*37|(IB)z%~S#N>B+&R?; zZ&2OB)Fa`M)bPSz-a)PTdr?$f1-W`t8vJQ8K=&4hN*dYxH72jg+M*6eE}{hL+V)9= zUW@r8$Mpqn7}*)o7A{=!kh1-MJK#EC0B%C_|aj)GeFw&qaj)2 zrC;RSitE@4^T0pYAwFopjCK>OUcj+CsOFspBml>~GQv|W6`jOVLZaS6MdiYt>U zY3w^i^29wjQIXo+4XyY4j8*Cws?Xdl4Pnuq83x0JSB~KAgG-& zu&yJBz%8!SRE65hCcij)bNchU>e)Yvo^^}UlmrjmQLR7?<2@gHFh!?I3oEtfL6z$& zul-CNdHGpaPa2g6$4T7`dceZT@jXwZJ6i{N5ifrb@KlMhofiK{4T?T(h=Myo$(9X0 z4qbrc7QEAZbsZXV59MbrzE_LFMUF7$dlj|F2?^tIw{U5;nTe#O0fUBUZt{$HeZnK1 z8JsuJ$fI|hdX!kkRK(RC`#|FQ6!V~U9Z{JktHdtE>eIjzYvag2ih+~G?mG!obT)g} zZiWn;Lj!X{shax9QQQgIDcT9)+I+ab$Eob24&GvT(fm0F=cMIBu94Ed>QOlnm6y^; z>wU!U!fDmq{H;65l7*!$bYN$KqkV~mfzrhl&^=YcPE6N+ zo1>|Rb03JF)A@H<&t2)8t}`FD9=DBqiAAnGTrpnmFus7(^G}mVX1#(JxARY)%oo;P z=sxG8qNGACUXpT!SISsj#psxJVfM2gmE=gq3qQqdvRQ|tC^&U;)`hN|9h}FnYIpf9 zm=R^|$xKkK2vxebGqcu>YE=}tXpI_}nw63E7~ic<6uP0QniUz<%dA(Lk7vLCk|j#s z@`r~1S$*37kzZi^?*`M4%C6b}?hF$DpFcu&wx$+l|HGD1v65T*X%;rg;YQVStKw23 zC>RJ-`p=1k#Q!G2;zt#K)gPCKMsd|*w2AP(r?UVim>>^Za+FkmZoQ7VjEqXFSVxVl6e-t|G3auz_sG zXU+xI1J`(ZQBUzYpyY$7a0J1GLH`MAE{&^mkn?`$&nvK@ZvOf7HYD~Y+F1>uerGHO zxd}&a!(g}y_-_UVlSOwLdIo2O`hR}dxVp&UGo4z~+u3b6Rs`fWnwBDorTkVqEG7;z zlv?j8Bk6rsO=l~CwZ2uFi~$Xit;eF6%LGAwqfPh4lvGrI)EiWVW-~UX#URYqXQE*1 zwr8uz8C_dSXDOu|l_!kyT?JE%6G@`NtVBs#Ol+p|&+1a%=|(wV?9kNGU{G3!UpknX zlxITIJEYc+L#;%CUBh@C?7(W0@*GWe_7E)#{727mdCI6yr0{g-#qbit`v|I7y!|LiE? z{olOne_TmbMhRO5g~zUx$&NSkHi13?ABl*;8&+C596CH25(<(ST#y*^X?`&vv;fP? zllcSE&9>Uns)0o%Gh9`pR;Tlh>=n6MQscmbXJ~9M8XRrW=fsE5DXc~jtp_Y7??u3gg)4cqC4{Pjgf{SNf7(y>{uhBgZ(6_Js^wod7HTQAaz&} zNk%=noyI{d--JNE%JoZi!o|jK(CLP&3i!nN44~X#js_Mza-5X}o1>$aS7|NKQMDe@ z@3ys!7y!6_6;T{HbuAT;$q%_G=P~Ccqxk)&Z;6ni$isGx> zF_Qr2d=F&scV?zv)%ZA$UOK-}tpBmG(?4Gs&E9}klI$jHpmtC@@QFYG<^BFPWM@b( zLz3&i0GoAvS;|C``P+=O)>{Sr*RajEu%+|>?7&0o7-lJ--|;@c#LfamB*Ny;0yQS7@b(^S6^L}?k+-(C?2ml%N9ylYZ-e7Rl2f0ce z*}ST1>2sA|*Kk|-R~9LFJj?zdQ8%VZ7Wx1=K+UL1YSBBMP373+g7B>JNs~*b20g&+ zB4u{Xat6}*qYC^rk$o%AvyEtU9L)h?6fe@KR z?!F?r+7j4SSVq!nH>`8)8MQwZ$#UyL0%Dgd^#-%dQ-rqAqO|Yf`=$8`6=M&yzz*Tc z`zT%ONPN~7_SF==aNY0J%++xZVJQBiE3%m;fo0P$_Pd9Vu<-+Z{zm`t<{-Jh-a4r( z9LhUFY9kStkBArh<`Y#^tDCMVN*JX}T-4so?4~Vj+H?=N&LaDOC5ZTKe)7&8Ohft* zx&e5V;5=_Y#QDx@_Q3&)4!FPtV|%!~2KN!Eto2P@h`E_@+B6syBKEBA4=dH3k9&jy z-Qu;dah!={1PVeTdNMjAajoQ6SU)m&>bZLiLT{{rE(^wI_oeVW>O+>jmKq{V8o1EX zu~Q+SIu!RDI!>=iRmbMqi#a}Rv71M*2=lb(A*wEZUOU8i_C>8+`b90>8k2P+TFiWB z{oTjxW;Ja8SJ)$5Q9@Yu5c>B`cZ9={AU+y8)9`1MI}tIFTENoy!Pp=tGn5SdmoSmv z4D~zI2GMlB!NaA+>f_^i*z9Eq2K`P zQ3o)+!8$7Tz}?$@w!7jpo{~L_kJRYhR0qKUrdJ#=#$IjE_dY7yioF|{z-tjazh1(_ zDn?mK^}s{k#5U?o+?NKRc-BBt4zIP8W(wF!E z^hbmKN2vUWBW{0Q_*@>PJC0vze-vA!179xz@8Q9N4A|~q8zA`Zl<`YYfGF1MF0JsB z>ds>^s%`i{FKWOIbZC?Ds}x=Au1(ilZS)1jdiW(Ii|_UzOmH!-`;`mt3-<=)aO@Rg z;Q6O``6`C-U7WA;r}Z$;cjn21pkKv>1^&>1g51Csly4b*mq&?8( z6Vy$NvJ`o8P*m||hV=O|V|(L^eNH)8Nr7gk&IFcWR;IJYV2HRH7QrfGTAe&ehV)6= z+-(${rP-H2H1QJQLzgqcX99C`<{R|;!c(1C^nB~rbccZsrja$cF$;v7Tp55c&m zC^w1>NzuBrsGKIjj3i04dHIa(Acd(>slpI*H42kX?qXgRz0jwu*l##Xh3gnZN~a{8 zilcmn*-h=zKln`e=kezOb=R;j&+gcZirZzOl0 zbL@0C1T#el;l0T4B}SocZ;b$dW24itH+4_W9ksbJ8>uf`Ta%NRs6du96INzWJ)c}M zEmUYr0d1s)=Cg5l8XGLq!}#d$Gl|>p_2rP#%Zu%#vLR1 zAT*KQ{Mf|y)HS?COX05y4KuD>v?3lC9uH$RCqXFGtk?kR@{?RUlEg{V!cHem zs)q&rragrQkRHa_%2&N?r90l)sdPmR$_nBHDbhAcks|`?43{=3rm%Gr-tPyWR0-7y zDc)2G=)B-(+ z78Aye^l<2uO8UwL#!hV#J{q-iXiWk0H+~7_5-JOlfjl(5?7+7&Chx0~ZRqUE`{3~C z)D!h_iA0MO$zPf}vR0Fk<(eg!np(0g(eo;lU=;%T>*b2b6_hNC%ak=mPmDT63! zB{Iz#C70qU6(S|78-)j}w+@4iAN^Kyn0M*zGm@Om#`qbHz@%A-^5>o#l)P;AkqX0# zlo)tUE7bUmET)jxE}X^Bjsbf)TqFH6FiN4z$p%?yN;ph=&k0e(-lX=+9vM4v9`?_V@>)7jne9L`Ha z+_ya7I}-Y8zHb!{^1T3Gh>sk4E-^Ig74R(Om3eE<^3tAL)RiZ>6j1-J?ouZ?j%rj( zwH*@^sjuZ1IsV%z-A0e^^zb09&Pu1P0LVwf%HZPL{HwVB+oXOqJ)*DDLsV|8guEt< zrAdv?d|eKr7h_w(OCF=T>o?Rp6#ap)dajfU4BCwot$-N3mTKny=EV3}Pld4^EADAk z!SCA;YJDoNR~9jl==*rvmWy466yE9YBG+>bLx!S zlR@?5_3wtksZm1lZdS`j?iybp1Ku0!R*g@E&pMU4-S;$onTPgn77gj-Ek=ks*kFj{ zlDpXgGO{Xxc}d+~=jVVX(^N@iX;MB0Bu(NPF_HDF+hN0MGt*v`(oIM!TH07-pG+G~ z+`kmmVkn;X<*0KPToC{M-iqqHfTEAN9rb=f}d#4|^+;2=i@h=q%z8Zx)4DVE>^%5MpT6Q#cIB!|g7^cLc3Pc{1 z);!h^(m$qOD2mzXI1|J7-22Lw#8YzLY$xD?vi=X*XA6l9?Y1lI$u($!^}mbUe`0S` zoRp}C6`l5tf{{bSpUnbaBW zp#e0JyKYEnGKGnC1%`fi#IFRe9A|}UBJO}GP369e7uf?xC>oNl#JYiR4sAE^sf_o? zRK-3=a4PT*lDfegI-Kc zYi$xWit7tTdcrk*Yd1U*7e6B@EQjh`0UHw9BiJgHg*J+E(mrD9sK!R?)!7Yv$UK|s zc2W{a@(N^k&8Ndl2v-UEc8^TtPre~f()j_uxPwGhca`V4W8?dbvDMV74|NwoSDct) z;b3cLykl%Uqj^$3OOo1E#d{mz2|drpfz>>TG$(2tA$A;_SN!(oV7{G&y%RyK zFbuP8M!f4f&Ez5)2q2kPoUKnj)wf6oiIO3Wr*MmJ7&*#cPp*>&OlqR3xl+6y86JJa zsY`D{RYNruhB`RcQ`_1)nl7i36dvl`Pe$F|5A+IGEj z>#3Tt?b7Bc4AIKgt`tLf$@P9We=ef$5jEM zun~*C4VME?FqMih-$Q?fj9zdk{%28qfoaPR7sC^V^b@EkF&IP<5ne*dUR^WNs~GDR zu>$8slFK2l8A7aN^UmKH1wk_Iz(I7+Xu<5x;aq~7m?ti*M9E#)6D%)T_wer0UHXk9 zR^s$7*a{B51M{|+D!2^2hOV7;ul>@~dt!OiDTjM?#D=bo?a+d&t0$u5uN#k78(K-U zFq-WEP}vV$-wV(7^zVxf^8x%8RLW-uGG1t?;`5i+S{%9LTB(6ot+u8lNjQT@#Zign zSs;i=ttPBtI8*t7=1elU*{o$6nwtSAt#!O_1pCX@8uceI)Cu{4`ag<9aRJe?N91A$ zw$nA;^Y`)ogRL$#Ph^eg``KO^5qp1+sP!Sm*1i1kg<2i!7lP*twP8Mi24=C3_c~OV>IKF4T_SZ}_4scicZsoH&-!$%i=wjT0_?eH)9A^~8ZM}({ssH2@(%Oz zM2k7{`XsS*bX*zf@e*s_Dm(Yc&jV`P>QZoGIK~2F7TqN6XfK(=w`daG+majy$x3%D z?eDDPxL9Ma=%!yg7e@b9RX-7La>O!b3*zLxux5|Sup7N&>F-|HbMQ_O;CKHWDr2AR zZ?^2VsHbgKsWPD!UAou~Nj}`f%+~)i9dYgsb%-jZHP|~8l?eB@9MbY{? zHW1UdqY(>WUU~|>Wdo-`YFAv1^PHPl{Pt|Ym!JX;S=Q%Dv?xu9s_Y*ZovOqXi_zy= zWQ02q&wnM3Os)-Olz^RGpvVr{kg9mValvwQ(wDR$l)$EAmFICyp8E zT)qCI&y^tJA^{nTP=wSwDi(;1#kM2E2F9PSYX$iW=Myx#iH>qiEsGE%`ZITm$b%(d z`m>HqJhq-tX~hyfHX%E)x4K03`=dJoy9`eiDv6ir4^y%=rwS9L`F4AqcLlG_4cZ)%6q?-vI8?=?k2C(<0VGX55!Wk+*c=hkhxqbH7lI|^SKL!c4D zP2nlp?9ARfZV^`Rc*izn~0Wz6y&RrpL`H(w%Z>+VO zd17?|gJ!S5^$YEA6XM9Qg=3OzxdKL#=N?)OIc^NxG1 zb=kY*3Lnhc5s|v=;7H=$ZuKJdiuq!mFvPavxC74H-(#osYiA7LMJ+b!J8ClaA%o*B z3;$m6eSq@yUtUU%cfrB+(`<775lq7Wzk2Easx#H7S^Z%4(0#!LQ4;}4{wUSq8|A-3 z^yykzRw~_tz>C2Zte4>@fT^-{wyen?ed|_qYkJl;yyot#ykct%d3Zjz7W?M;<`h{D*V&CZfjSRn% z-MO!Z9I}5eOi*%zT#w-=8fM-L4aMVmF;L?YK|AvavL6qT!{XV9Q`Mtno0fBMub7SG zOczvJSh|Lpz@HDnT9##Fa3{?(!shk{?DW7Eyi)Ma7J;*c6+w-y%PaCha zvMfeCpIcnjS8z3)yh65JNoY2;m=8GYBmEwpUo;WumQ z=PW&+H#(sn8D{m9mUhuhO|Y}#U9My!Ql)5IkGKQPEzlPjB3+8R%R*`?zfJSh6(Uwq zh@H+kSQ@N1B*nTc*fT=wD&H~cD$GZt0uZZJZ!05LZ#yH!Zp#DNl)EF^P`YalqSste zu>jpg2iQ9*cep!hcfWXxpS3vqDR~EF!A@1YqhSJfgrIp1Ak%I|*b8!I6P;Xar59nR zeU@4KH1?73binc$t#zg8jMV$`wM+D8ibcs(5X;(ly+h|PTxklyl4oHYvf9PLvQLUK zXgYHl#J7hRZ5ze#~e&@jPuo+n-2(Srmb^ylr_(4cL9D_hI@qV}? z1r#gp&(ZxkDNWdjiLvBvI1M>FJfS$XM4A>TUXwyQh7f<#f*r+@yMobi$p$%=v5XzT zV#MkMEII-u-&8)K@pdHwzoC~XsEzQ~o%WGh7jj84H_dfW0_L43(UuzBBMSS~(pO-ZKd_nKtSwX{9$lrbz)9mOuC{uz zSjd`(+VZct++|Tv9D+QYBSsdwQ1EQ2T>G&(3U|6_4rH4bhBKWmR3Dc!Jq-S&AjsoP zA3k&C@P5m-QZfFQ?v@bOC56^%|EQ)Npo2YF74uR*(W;W(AMF+?M(>czTS$|Kk=TS? z-0Vq_+*5}lVCTQXLw5c1s=}WKXW{1%{eKUSi?f9_ouGxYl8H0%&*vu=baJuz-#rel zC%aW%aBy&Ba9vk$cvo;+QSj%ZkM4toEoD(~Rk%6w8G2Fh+P#m3!XxFjY3zLbHYHOD zd&>x2RYMIzE)WEwAZt-W1%(n{3klO$USU}4327QVBcx&a*ywowc>gdEa5Qx^>@+MjKPoZ^Rl2d!9`J%TEI!L$ z*lg?U|7tgHO+ti~{zSj?KXO6;zb5`z%F6s>>i;~isy9m7Y8bxPUwgDuBmp5L%>0c7 z!menFnlkdi#Wvy_w27i5G9(N(smDmP8m<#x{iG@uKl%-y`|L%o{dYoz@OaZI*Yh9n{v)()Y}Mm#y&t9(xsA`eDKJQ)1w z6^|~CPqcQg&4kzw2oprB23UwN1dj=cPt>Cbas+!I>p>oJXfyv#x0e(F84VB6C)Jxx zVVA`)zmPht}dO~2%^qpJZs6>Uemmoujhx%OYG zv~241*Z)gDQ~8ed~|Tr z#gT_(Et3pIC@#|c(wtdor;TBfw0HU;#oQJcaLy`xHx+eQXt|*RvNGalt?-9wxZn|t zG8OadxVYKk)5Y3<${cA4(T^pLZif&#g$Ed2X$=8iUcjnRC_d_fQeYa04_))FqVnS~`v2`L**x_fR{7^8;H3bAO*euH;Md;^|Sv8QtjFTNMe@LfFj zD2+FwUgOXHGL5rAT}WL1CjVI2N^QW33vgU*PJK~`V6VObIuUAbG#E0Sx;r9N#=ppr zLw3`>y@)}P#+a1*1{jkc&Xj6qs}Xl+!|RSAVJcl(VCW7TL#W(E9=(?b$$L$Kr1wVgacH^LC9+WZV%2D(_k(}w*+pw`_O!W= z@*k4Ukm@$waz5Ju;+`HB>*vwrurlzJ#uT9=VOqdGizyaIo=_zg}36fqFD%F$qY#0(zr$ns{!4M5DjqZiXS*Ern$W5Y} zo2N`3|44%#ir;1&Z4N>H0j5uCv1!xalE?CVg{?ZrSlZ+Ef$qh60P6mc$LDKV$ zEuWf(wZp<9)uVtd}OKn$0hc7#>fw|-Z@u26Fs&w6j*?PmGI zp-Emt_gftnZf{ohRI*bD5^WJqo%_U{0U104B4PCq^2z|Fr!Y)a$j$C8#?2wGEO+Ek z<`C8vVSNs6SWe_XL99ry*T!z>u~Wo{Q_i@oB$zHq!s2BHiNY@fW9Po4mhtr$xiAJSOzYnR4N57%W6K!fk^4T5M8?YE9s zp|re@pG>nUq6^P_eieZ@oS)X}pNTxn{sQgYkmxsMQQwDO_}8KCJBK7+V-+vQ?C-Em+TL;BhK8SA@`GA6>`Ko$5uV&XGUUes zrRlN_mfAtqqd!X&mHoXUU~H87SUhW@2&ri;p*_V05Xw1!t)y!6_bHzx3xqL)Ztxke z?Yd%08}9t8$In|P<#qP`& zxVi(XHHHQGR37`bl$$6kIEt?vySo)hOWo=1(WlGEQ4mN}77aU-)vzn*?Y* zv?i&RdA_?Vp4=(EruEN|yDzGwcifVXZK@jlpxzbgS<4RJ>;WbA^9eX#|NMxu7&DpX zTE`W+f_vIXEsnE9UZlZO`r6}bwS=RIm|@j=v#X5{C`()G@#EnQ9*eu&$NgkY-kFc7 z<u4j2&p_Qa}E(Zz!IpwTx8X8;u6I1|f6PBFWUyce3_Q%NiqXRl&ByA?!I?V(y4 zja)#26ZTYAb$YMeKNgb#Xwgs#0P)<3Da@&v9g3;hteo`jc#cxOG{qUpb-+2OlWB=1 z#h)lwdYd;X=w@fzaPW=duKrIPkIe>JwX~>IHY-%Bt@Ft)k|sto&sO!YEX-{T%?ZSg z-{~bK7EUC}A~TX9&M1$++55~rcQi{e*HWZoOjQS1HML#54UlO(QZfEm$$3`BxEQk- zTW6Grcnk+TiVh8w!kUDgpbUp_02U7QIsK-zS*+mMs$?nZb;y#Obe%CgN>pG12j_n` z97Py2v+yv}#qLgKLaXFz7w6O_SkfROZqilT%|$sMZMcgG7lKCi*k@G~KNM@Z1j-M1 zlCvs!zr!n`QY>hE@;y0tqpB$aOi@5Y zIa95fjKNmoHiWUcacvV`qcXg?3ux%pN>u({jZiX}6;T~MQJ1I{b z9R{$gL11y!>??JvNw+W-HSE){JS=iFmYpW8&(yl2in>(ljCJZqNMLS080PDspIf@& zFPR4VasRT&4+aJty30jFuEKrvGpUu<=+Eoix(OSQI3yKj4;TKzIs_l3uhl;VH^ym$ zr7kEmTK5N8?FWKKQrgLmJWpm)izJM=Qm@%1!J&fwf!T4aDVN!d!H*8I=0zP^x!;f4 z65P=#O+~Ixp0jk4+@vB?KkAbxUS(wl5!mZluT`vCtbNuQjz#^53|XON6Lq^Wnd4Xo zQ`%y+gYNtb5LL7`Th?i0^jKg23;9s0QTT${W!=%R;1H%V-gT{*67o)zvkN3|RVr1i z-taMyY0^4T{oB1&X|~lVTSu$mE3XmN(P_6JW<%nxpb>QDP&BrN_xH!@DKTEzf4GNg_+-;$Yl^ zu>oIAJpN?2nSKH1{ogxBwF*M~bk7RUck_P`0o&Vhahll*Z!lG}dIh?2JfW#?UB;}jb#KllZO>^W7i_=iIc}kuH)724lqzK6>!HQ7kH~V9*M(p=t6Ojz8~~h z?T-1jBj|WR%6wMnMq@6Xla@-{S24)kgBnv#{T3K|W-3IMH6#Y@6N6Nk@O6>KneS}b zfjGOqT)`#s#GCJ`bW~Hq<@IZ=L!=rp?}tT%5#OWCvWUG>(vP-c{4zl08g#`xjwSWK zlzwxNWIW&de)(6Ps04h$7fwDo+lRP+CwBmn5t=kE=|QQTHyXxBgZ&Nqds^-m^kB{N zg2H+4xRb}KvOAWk@g8~MgxY}Afgl)z1YupzdeEi|(GOT?8|ZlzCT0!Rl>q}SiE8?U zeL(mPSu8g{N$8cKwGZ~mOeqD4AlW@@P;9AcTRT0ha+P1I;Y4?k`atKT^2ICCjY%_ure@nYU<&h^MTIss z%ECo0gf%4yo1#`}HRAoP&jwdPmb)zoTJQC7es5y$|9nDnnV5{549fO5-`QWXcX?ji zW;@*6p9Z6hfKm_f7*qFaCA}ftrXY>fLlIu=o#@zJ9Gud#-a9<}vk;sf5HSe6{~yBM zF**`{3-j%yI~}8ARcza~ZQHhOc5K_WZQHi(WOD8~Gk4B=@2vS$Yt^^fwb%M@Jp1`6 z461RE=;Lw?PVmD=RqyY?VFmV8bCKp@ zN#E=@2NW6pvh^d*ioG^PgS>{4=EC10c*;t+X1(O5-ho`_Cf^B%@7y0qGGx$I=(UH> zeA*?4*V@kWOps1UszJ;KL9dwbF{U5%MvX#7m%A1h)x9zCzr#TGCJT zDbWm#Z=__NFEVUh=p+y|YrXekY>}TJaIu2i7}UWd&u%oAnJ|E1I*JgeZMn$Kd6>p& zQ}bCCMvzm%JY6ljR8EUCBiqDeqzhH>c+eA%7dtK#RkzaN8T%Xg_h5ob9%WkVcYer> zG?f8kUdzTQV24m$mM5dG#z!@WNf`9)cxjMKRycu#+L?`t_;_iAOdV5E?tZP83=l>3 z^ZraHK{ASr_SF)8+CV-jm)=RPjuv^$WTl*5w6vf*htixxCkEPdu)V0rJ`O*bJxTm8 zcIU_|Fqytu7gzjZUA!1Q)zBJ0L0j-W#ekTE8QBvOMH)Qg{HLe-LVZdg{4Q|mLuZFt zSIouYT6u&y)J4i)%C!kaJ_oicODY{VGjnEMUsHl? z4LT!wpj44PV`<4>d%pAZz~1149Np-He1wp?0IW%#!8fyc5^RNe(Z4x7O8Is4h9j9^zezhd>A4lobax8sV-RmEWLruQD_} z^=iL+7#4ETmsmQzv&~2@Q4Yi#b@?b_MJX$Zn2mN@a48KXmP>LpB?}zzm#c~%&C&3i zwIvVhw-Z*fx!@RJ3SCv~_{u~O-~U*XKO=mWZ&PD#cI^T{n)cv|=C+zo$fJZ{)n2#EwS2Y?%s#$q>;g`3rn) zWEGyJRv83$bQp&QNV708FN1_naW7-@V$g9t?qaMH$XIXW0T6P^kA}op=d> zSc-ADOi3&?pjS<}R`s}Q=l!=qcQT{>j$^;vskwa8EZ#Q9&LNZK2EE)vJHoi;GCN5u z-tO{=WxXjcv^#uLODwsz#5Wal%NF*Yp4*A532u6+5bj(-VF6sT060&rt|I48 zft6}u7S|gOp8XHa!%ygXY!WRzS!#qAF1*et5(|RY#W@#x+Zm%E-hKx*(4pnN%w;V* zLQ?}_6hx%mACVdXHLFM43i%+R_#^MZB{&vz6zh z!Jb&LsB|eX)24Fu2iYTz1Kv9c*^pOyzilP4jS}g5NoXf*o8i3B81ai?viK#j<{*=4 z#o)ULZ;oWuyCo9G(R(Cf$4BvLfJZ#xt(N|{mrTkPj;3gKVw5t~Q>f~QBJbM@0x8!T?0{{E9aUHR2y#HH&dC6UyP`K1#;ZkgL`LY0nlfBes@g$J z7>izKjsS0g5$>`+3rXvrT&5!5KqX!{1?IJF#H)IhR%kLPHz^l&E{mt`kiaYRR@d34kjM~A;?5}Y7-L#{s<_Z*S zliK?UaoKC$oYv`e)sxnYV!cE283=l9eHxBr>-)EE-hSPTrom`60@}|V{XtE;fE^6ufWr98e}rbbAy$ zqX_a_C|eAJ`9zF>(}Tc1P~9^OTn1@erl){ed{n0&j5o#zxkM};+(CH!Jl+xWSIEFu z_9tFpw|=O{-mKAVp0!*Xe2Yhm$!Tw$=canm&MrXsIb>&LuWz?b&2sTvejd9h-^HzW zPRaNc%HARPB#WoVs15d?hW`d%8!({XyWqI*Mi%hjwU(T&m9e2c!T6G)QjH~sjt8{B-?~l8E=O66Xj&NQC=5Ree5&95O zV=Ot_oFK&hsQsiMSLCP=dMHw$vXS)VEaT!~Nq@8^Z|O#74gMC}pI^5}K=PWPLgGb6 zKyij36Ci2qd!{Fq1M~O%ic3P=N0gv5z115@hS;UE03XfQeN=I!$LBpg%5!S4Q4pOq zF0^+YN?sIA)k}10h)~=^k89mCi<5^(Ut=7av0SQ_II7+;(Pz!@`1OcZxPyh$RrC3& zP}*fL;#aja`q$QTmgYhKWrTKvYhV?=SCqWsAc$dY+urxYzAgyA$6Jnm=%n~DrO@i> zkOj47iLfr!Qu}POO{h}6nB_umjXcL`dSN!{cRlHjXb^s1fxv9yiCK{7V5f>VF%r`F zvD7M4p+@-jwS4Ayxt%C(w)UhsD;{3WegIR2#JxMM0SX zBk|dSI^gUaKgb-qwNW=pTm|Edg8re|p!8h)2qHTMdPU6hU^3Xmr9& zm^gKz@S)herd8I$4Ua9+IZv6vSh~LNWSD@B4iF$+3=iyQ6p)!5(H>^1f$J8ZDTSnL z8+rH1Gce{NC>W6>)vxGq6T7{Kb*^X&z!o$4jJ_Og?y=&Iy{QS4z9uzQ!dr=BwnNJp ztwKRw3fJ${9x!em8KdN*|3f+X<~5ryzcLItv4+)VaV{z=DIgw4UC*U#i)bxwq7qXT zL9O5sT9v4)knS@WvtU$Y6`EA9BAK~p=lbz1NS7TdNre)}yPaz&Z5HryxjgEtM%Z7f z-*g%>cTc+4N?wT}Ox^UIH5Mv*4=PGhOA_FcH3}_ezS?rg&n~pj=(hmfv<0~Wa)jx^ zLI*EgOU;7zga$-(=^iF3Venea8~ zE&m_rF1Mj(+dC-i6J5(~5bl02Imn$W4vDxkfhUCziaf91hYKNRgV_A9Ad2=ZGpkJW z!Ji@C$YKOE!T!>+oS2UzT|L|yl+8Qcp3i-^+tpQ$e@3^fyCf}qMSKDoDWG7e^d|I+ zr*Cn022HOc8n@Rs;2A=l#_O^c13{9*DJoK&3I# z@r*w|rAKd>qiSJN(BX8*9*|s|_5xIU#pSU8t71Mmmr8E-p)&vKg;cSEq(-Q%hqvfz z>z{dvo*6s@#$6VoeXDVLYLrSTGj;m6pi^SnJ4swb?n{nFS0BRpnEI!44Y;C)W=Z%{ zwX{P_+g!-%qd$BjVx|x)AGv?s*3aPIy~L?&nAVy+dlVCk_zd;3Y6&a9`Zd)e{FQMGUrG_FTD7KM+Gd0xvqv+#$R#z_xz!82I># z(y-Vsk1P|QX0bZZMF}iz4Twh&#^kh_><5He;6#q5_aVaE^ZvDs`!w3XLYhFyRZtmlfT56k5KgUZw z*0Qhki)B?zq{>Z+D<}K;7$qX{(2fir_e`%8!XimpNAZG4$-s5idxlc&yGNy??-D1x zMEemdyhNSNi1}~~H)n~p!NZn=0lD1nL&>eYe=VsBGr|W0vB|5A8w}v%tLnFT17-`e z`3fryMv96Z_$&)hM|ur>);!PW6oL3tA6J#?{SP&fMT~(-*!K&9_@4c@7gO4McswKJ?fKIAO5*X!+v)>Y2jA;97v=-_ zce{;F7H9+N{ajBD9_C=57G{qu^#!L~fEdT9;%kdTIoKM`jjjk!zdyV{2gC5YMKUbD z7fl2bZcsA{83PsJFAgG}Cq3|2$e8`Qo?t|5sq63dn}JG)uX_)w(leF~k3M9hl3R9?A^j$;e|k!g{&m~ zI*^Z@<*@KP@7+r?Xb}J?0{+#8(TetqkgZes>XI1CMA5{=Kd7=8pF?}iIzobfkt*d) zU(SPsY@ujop~_eV`FsRzZeNwTSe8EV;S9CZzeTbbPXg}~v>_fVrVUXm0#CdkR3H9N zKZ1Zb(Up^C5|PX8`*D4UTY3KF)ieXX={Ge@ndw`4Qac|>?CBC#SmX&7?(%yI!bPh8 z&V{!-98arv46jkAEaV$`67ntg31M!&C)(629`{x5Ut1>F1z5n{P)&xggg<$(@XKyK z?fm?My_@cHhex%I4}=}w93qa#)l&pTXb&ljdq}?-Bc~^;TBK1CQe>ogb%Dp#cjWQx z2df=++)MRGTNj}(m!e!ic<^O2XU{HR_l5pPE41pjw1*Gr*GnMyc&BcL8immBu)ie(d~0AfK*9g*|b+Aw)2X(S>^4@pFQxA z-wK$tC<4I;m4q%Ro9Mleta&)$C}QEqwsvM{=(Si)fdJd6UxY36nKVQalf_ez2@Ih~ z_HvoHNxhJHa<$S}MIurQ>4#bjX(G2s%JVRk1+$Qfs51)uaxBBZr05g8oCD7Cbz^er zx+m$!i&R%6scJ6Y^eFEyn7b#{LuggzCs)z9jjAzOLOe9PF^M@NVLXmLvGMJVHpiYY z!}NV2@fHKXis76CUcaTu;(}_Y%&4iPlt;fBO*!P@oJKXe)v0mHlAa~ao8q_^$%XkE zowRqMg6n;mf5uA`XApc*uZZod#Sx9Wk^x z2fNuqu8R@jHu?ebVttK*-%j+3iY4gcSc|$`9_=ZtcpYJypGMF-_1hOf4fk%1$so%~ ztY#TUlmB(?BL*w8Z8SKQxC_hk=i)5v{lek4HzmWCI60_N&fk3;bP=n306;3%XLS%m zC$edWrT+CdgjZ1A4O@ECHn3H%CqggdENQnH`iB9~I=?=r$9_u?M^#P+#2D;iE;voW zUo>aH2;RivWGO@J1HncxG=m+cFexGkzpK9F1|me!jxGyq4=cEF&w+zQ(@D`xgtEAM zepZyS6n0nI_?_Llv>Xsq+__)hmXP=4yY#Hoi}Wf*zN?S1`_D}ylqpAaL8ysKA9r-T zK30ouIoiG|5>sU0UdQ;w^bW8mf>L1M;A8gST_18B3H_L#pu}Gvl-sRhQW->(NJc(z z9Pys`hAlC->k`SU;44_MuVCZoclH~CM~)^!kiTF^GbVW0(ty}8U^kK}`-{mZiW};} zoLmYMY(69HfD(WkKLU|}#V)hYVGb;2Ix?WtYWfbh!B{D1#;gi2jf-0X>2%N!U`n5N zO%+&39!ARwDWyyL=dB6t){6HlSj9Gt{2K8Z`@&%yRzWreA2|Rur@$JDNdU!3{k)W3 zwavDWPECb!E63|{HX1BjHd zHka%6U6UQI+TLDl-)RcDaHZifv&=>(gWZu}BaL=U1z96WG9fgvw_Q3S2ibt!%pp$XHi zZyg+MNBR7Dk^=X2b+3|jgzZ?$L~z;pWQk$ryZ!nLu`Gbou$0X%gHW_T$)sO(cN?=L zjd)xPv7g$ZJAZ>MQcz4o>m|RCMng;f!*{mp;u6+y`T-w-aUQ8WtvLc3#yO!$bYO02 zH@;3OdM((C_%r;q&SvrK>7$ywS~pAk;GJ;Lt~uXOnsr{)Wir!EXl(P-8!0gdE9kYr z1#&aMB_@-9><&ksJ?Qywz+JxupBAsLNhZ-Yd{{j_$LxCZGo*H`&|7=5T|=AF#XcB& zR!#?tP%D?7_!t^4yWgeMUqSzDmcpVoVG_xHh%@~0;^nn?m!~uR4^DUU&`c!M{<3@I zmY_h0%DWnBT}VR?+@)PVG4nt)G>(E>S1Ue45?$Pr+Obo?(@r63*b7iPHi{cSq%{Sc zMUj&BEA)TwdP%dQhF`ujqxd^ZsQx=c6b$YELlh!VFtM|C{!SM@)BhH*Sw{7U_3^+3 zeU;Ff&zhNsTjQWo2O&! zx_W#A?Pe5$8iM-iM;$BBnG_IyY)Z)`kPe8|)artSNP@@}h>6VWT?r|rlCcceytJT0!gS}r?Nt{G zTpJl>b}`>|c5-t8Y3ShD;Ls5Dmy+Dq;Y-vF2z3;aLxxFF#l+spo z|G`yNDv+I2pE#5lg&D0pFx>K*OLEz&y=k0wR-Awol54u`!N|LC)d7hDFETg0aKF@4 zJX-rgXla}5)p^}-$CO%q4^B?787=T*VP#Rr=b12ylzlY zSH@UE`kXwb<3q;?v+kkocdDlRg~|^QqYPXnS|;*cZZOzoky_)0q7f2NK>x$uQI!AW z?q;3&I(yUz%jkM+wsb15oOKJ@`ntF^z0mqGnKC27^Hz_Kgsnk3}F&|pEkDV zweXLl8!yZa8b=BT9)2$04;6{-UO8rGi)h&QG?voCae!Cq$+DDXy0g53#aTeHVXCK(d>0%IA4&NyPYl544s5}A_V&>FBN zV@<1!!|6lG%1OCT@}FhmB|zQrcv@g7Qm$e+@J_AjzKxyJ07VjZ1ckbS8C!v!;XV6My1fxlCuqzE_u?%dPeVyl2I z%5!?R_9I)Z`_9s3dTdV1suYF^`M2VGp0eM)2kJ5xYBeBs@xyf~C9|Tk3dQ1#l!jGE5Fj@F{pF9ar+`i5ODeVv`d#pL=v@TuaYdnPrv=Ul}h^ zGB?pazJFNR#lZOuc1?qf1tw(LT)#*an)3Vop|YdJoPjeOG$q7z4M4ehR%D?T$SPTP0@5NNTk8O zOvd^eVlbiZ2Ffw(BOICN^lNFXHhCOlUjMW|1vpb{={DBk_S8dnZVb}B_&wEPO(m8d zv^x$YUioA=dKw>?{`7Q_+Lpn|_0>OSyms~A9YTTj0M|Yg9z2j6DPb|R_nfp-5(`h5 zB{QBtp_iSn$TN1h4X6jb%6Nk{Qjb3{n3PAVlv>dHaJ zYnN-I5OU@Hy!&y3v`%JtHA>LTOAvYORyez=1yicpYM(Uc=-EDP=Kj;>Gk^uP-Sc>k@{5I7LgNRe$D94eHp96Ca z!W#CzOyAl3k!?ge8ITIC7b4e}j{Q_H`6`FsInOE;YOwOR8v(pMTfzYsdW**YFExpX|KGi6G^6Jr(URyFIfVGv zW<0@dKVx~dmG~PAx2d`k>ib>Vk!yPY61Eyz=rr`u595qwE2L;h0w#ZaOn2H7b@DhK z_|lT$#q&$~MF_(SUGJ;%t|z=FEc~lura0n+kt`cY^SQxoqv)|1~+s3o_PHMVn%0(IESMTU94YnJ!}#ugw?kLZ$PS~ePPK85?;XkIl8ef zV_MG`#jnsehzG$?ubAefZ1UGjGAx3{^jaCb_EUU~r$1KDU|IKntX{!Xv2l;^*YW(I z_=xL;S+euex}XMHEOU~TiP{eXd$)`2J$oSqSs1Wb5o2&!v|xE1LfVBj7yHuws22Be z?ogtt{XFm#n=)rOXR<$sxjTi9bxsp}B)WT%4?8uFKQYwOR+uMfzl{{}$T|fVtd^k{ zEh{vlpGVBSv=1Av#$ZX^j21D8c;}cE#I5_4z4QZJefK43_ey~fmeD?3WO6efGSU!$&)Hz*E{8T| zWCxc4dlR#TPw)`_`{SEvHwSS?&yvq(7@}IXqf^hHaP%jNdw}|yUak$QiM2{*aFBEP z>mN!MryVk1k#Fo<_O}5Y?mr&z|I~~BI)~NOvoQS6luSX>8lLwX`{vN3+30uQ*SHKz zu4vMru*Mt#%Y>e0VU8EjKLj*bYq)~jCv!o4tC$tRIODu$O}D8ESE|HtNo4=D+47Wb z|2-5NYr2#E@mQUNTbCXXh=ftbw1`Py(>K@}nFeLM0BJgbQVKP>x8yi8(+SnKZ}QTP z64Z_i7PQg6v@e!{I1&`5O)Xt(JFybP9Grwn`JB zbT)X*rFPPvOu6$0v`3UNUma^~kOtw}siMaoKePo9)X)`@4}(yov03YT=1Q*p3hG{XFoN0N{=d4^=)GXHeSD7a>4r5Rtb&_QuPgTKlD}8 zaZTl>^aQ!?f*VzW9@oYs;b0I`gC}(slG;U6;G0`#V!S1C={NJ0AqAqua4zn^oLhGB|pDYmDYw(Nmccv5B;i8y(`N;c8sl;5GngJ z#IeJIm@%NCz`SOh*Q^@_SPsF+Hjn8BK^$R=cS&MfCQ1qanIaw^M|gbh{NHtCdvo}e z>i38?`D3T$47A6kKZ&)0ag+WJE%mG z>5N)J*NnRClOvbyHLYkKvxlVMoR;mcH{ng{!bG2D5ns+9$CLWpstMO1E0SoAph57H z?|XMxgiw)MF4}C(B$1gd#`UHanV%0^U!IQ*(vjRB2T$5fTdzw2p~jLL@F7^YV_p~4 zJOpd@Qas#hDG$G5_qFVj(Z+~q!OE1 z>FzYUw{+$xRw|%u#Cl6wWdh=0`jMET4%?O8do{@*EZ%}P3QY=75<-a%j7YD#92{-7 z<+Q*4cKbhRGqbTVF%s$LQQnVKX1%>wlu%|S?!UL_^cSa9I*@BWw)PJc=w?u$Zqahz!y4!?^VU?@jPZ{o{T z%!~Q|WEhPi)$gf3vk&LG00A&dDkPSHT-j<1)fCtYyB}DAb3_2Zo0E6qS3CC;kL>pn zZasiMmb1FG_Tfo%hQYk0Cr)ux;;@#awxH;7u2dLI(NP5wNjii4x6(6ubq`-A_`MX5> z1qm+;BP~Y>uMh+OJQqmJ3&gr#M9Io+NFTfR`f&j>&T#**2`zx%0-b!89ztSFiRnv~ z>;h(63zlJ24cW$eDcguQ*}qvvB6>BHJ;$x%JaeV)tW)sE9n?+vm_?_>KAa!pZCcsc zgvXpUcaD%ckC&xXqc9S7Z?D5(gCIq{jR|h+!D{asXlrBL3s#PRV1vDx&nq|3CW*O`38}kPoqBd_r=tN}_G zV=+!yj#qI|8P;m8~~vWCEeM{ZT2(nhrKCx!I<0iY~G!s z2=V6U^$1eE0pXwwW1Z+}UyRIUil}wMOllD2sFKtbP2-ikvlVQMK&eNm&ea@Zw1lQE zjd(-bTDMxqS>qaL8eAOmu;`n*0bs^c%eARa36lcwh~n=x#ZqHo>;(lx`#3s)_|*?W zVQV4!>#?ubJ=+r>A^5!GjIHC>y_6zUo4`eGMC1J{?<-;@E zN`P(j@9`ZCeRTC_RN2TbSw39UT;#vk$~<&w1$7bty%6t3P987x0br$IPdm^!b2G)z zFUl0|gL<3x;A!}}m3SC_ts?3Kbvu3k2RLOv+{aQqfoctBro-AcNqlQnewI-e zqwXu)0GqpDF{f-Y&x1h!GH!MTi9c25F;&p?jas4+BIU!b+7%UJo0rz%gz1a$r)apT zoJ^p46FEA6z`za6ZvHY6e~4TPZ;O{IhMuo7*+?OWGv(DlCFiRf$sPB|9+t=RKZm=mK;A}q3(=a`M`i? zP3jDAzX6Fg6%)u|_z?tRkImz()k7*Ac+Sbb1XG^0z=)|(rXoMYaToKNjTNnM_Iia*>B zDjqDfu6K*B|6s%H6?rZPk!!VwM7y!R7h^~77%N;EFRV|MY)+MJ4k=vemp<7pyfvSC z@;#gn5ieX5F4SQwGkS~$3u)Z!nYNc>(W6(sz{$`r(Vq3;SeZ1R^ciW~>>z91Y|o)< zJ?zjwgJ5%oPc86Y3u1Q$XqO8kb!tSLy{E!ScLr_U?w&nca(DV}-RzOR zS7OgScepiQ3x{1sFRW)3Z)TB^e@XOZnLk5fKkiU>xFCBU3~BFK$3J%|w!UY$y?4NV z`FP*&F}}xT9lzJOh&{n?$hVfOSem&We>bTo7q7E(Y=2eE$%*Eg1vMifP`&=kcM zTFX(POZ4QaW>zU*)1g|asbwrG(RQL6oL*_60b}30K@eiYkT)`r!njeHA9n9Z?Sz}B zL?mLdeS@Gtc@b+F40r7@3T2wdEmqoC8L2lFIMOBS=VzMFFqkf|FR_SdmSYVqsjfYt z7kj%K6vU4o8{EyY8INi~&m2i-^+peOiA1eA7i|!uv$lw;3HmC4;I9eI?@_4sOtGAB z#XF#|D#(D^2(Wz#NC-XnDeZ=eGuimEq7xGFQ$jr{f^}4pUlnVR3*Rt&?%ERCnkEUe1Cv%hs?5yd zXjaHr8qoRTA$5^^df zu57+VC-wm|tUrTb7o*nhB*c}X(9c-^1+!XxXNvwl+t84He4fe@ z;5! znsOl_*`Hn)poJA&T%@ z*`AP8K*lj~#dxfp8dAWhep?!Q?xs20aVDX|pj5|~5`h8*elT8DreS?xB7KSb`nek% zU1c=Qw>rOh7GgDtRAG!FH1@Cn7>0pjx%M6ULNpB_K%TT#Uv>4x-1S3FkVtVyP*Ov+ zSLmYFpSgj2IWILMoCUkXLdY(l4?O)acy}?`uHL<}Zk3`STYSv4*#{O$L;h3}!!WYR za1t{cCCzVxaz_vmK)7NOGT;*XX&39aO2U0R$n zuW)mK&P2x%gAJynV)E&|d22{RrKo(OH@slNi+m+3{2NjYk|aEm zay#alQmb1j&=otL<}jK&XcwyzRt}q3k#`=u98!!x1qOnW*i0#iu-9TBuv1~DN|aRX zZ)iD!i&~GNbZmnJYtqV z-AI^0`K$l_8V1@>^cpPm))oZ|bMKtR>L$>Ey-p=;R^wO@F^~@+-^FvOkf<%8i0VYR zTE{AFitZMxDb3X)7+0}Lxx7a%GlAruvj9J5iku$k!Awl+r5fP!L?mJ zCW)VuZIpMPLPIVM!QY}lhE*5mM@*kGCR8DgSk2f>Q8CJ;Mq$qoGb**lOn9L&5Xbp# zYMJL0s~4t9p?y&P>D$veNdtLC*o8&QIMsoHdfE=9_z zOwOS@XnXykdK)9XwL5LJYdFSwuo60)U;~3GxRI<>902Sz4mN|aqLAZ^2+tg@-dlbvdP=#1V02P%fW*QE8BsuP#ug)#{YP@K#Dr6PLtoHBT}Uw@_m0a zHNXu?S~)}T!okoB>~V>VE?kvI$`S79*=D8Fk^O}XoW5!^z3@us=#6G>t&_Kn&II_@ zLQH6^+R2n{UYXw8SWZB?I3PRETRVE9uL+f5a8}yaHQSpYuc`l3lb3vxWcYcJSGP=S zhu{nZlr*;mK91*tM5#REh7@3zgaO)N33LTrudKT-`fWNs7`$e!)1;F>v4SAyehJfT zwT;!;-;H21v8xrBKPFTQdWvoJd(R+3M5sEkE-j?js`d5HmSzIoJ$~i)>%kU_YkFoA@Maf0aq0i#G=g zI+&58V8;}C?!1}7qA*W;XxXW>R01W)U*;elp+X+90yw+#0sXN(L~%!bnR(R-@Wy=? z3&5j-*b+}IuJD1#KVo^pPygU0yX#ZoH=OXZ(`v_ z64ohdco50hBY{?Kiw*|A{k}w~*X;WMU6;J9nDT~0V618uD%0%Ct32KD&-6K3_}D!C z=oHexl-C(90QF;Z$B8Qv(B+LGO9nq6{;)ea$*LjW~!iQHb zi~k2VkB)=f=Jzs!!|PHKp|Xiqd1kQ}&13MfY?<@mjI6B)ugCBybI3g;8rYg$!&(Qn zz0sJt&7zBI?69MEV~fvv;mm;ZXzgj?W&Y#*nQ0u!G2h&J=lZx3+VUkrnak5^LLPb6 zxFU{A@#<#)pX<{Qglye}7Et>`u<^_bDMxX29A4qu@TGraHmt5s1)IL-GtujY7T$v- zR7<-HRMn1c)YMLwZgs|Cs2k*(@yPZYET|3S)?cDu1{iVA8R=i8>y%G+pXL@%bsubN z{=q-(c&WrwhrgQR&b<6;xg)anr60JWsjrLNx&pKhr60UNw0o*nJGO?fw_*P&n6W-6 z{5HAy@gwoij~_(;Yo`6bdN+P+J3B`khi@f>(!ble|Fmt&;ogYKNS{_tbxHb%-Y?z94Ud}5+fBqG37!Q5nx$$zbV{(z!xDr z|KK9I!40JcrX52=r_=B?ShNkM%VcbRQ6a*ZUIoFkT0vGx1uSYhQ^YxI;Lpi7vi{Xs zqfGJ`dibRlEkPU5Ki*Tj6ZL?qHB!TwXYVJ$JK-$J=sJ{LRUbyhw`{dq*u}ry=%8Ft zxX9&eQY26FE73r5ZCRNL2Gy#H>)StMzuoHLq_X4{NpBvTDb!Tcuuyi69%)npB)}B* zA&|hY1XieJynYtH!;Z_^1ra&;C9adgT>^W7KG6!+&@EW=s97#&T<;~UENxkHgNj8k zkWh#6tuWyP++;so8eIUV!v^1Z!4C8zP1aP=rXD0SaS&|=!n1{;lpHN{C^v0u# zumgD3qVvPZjlx=7Mj7tbX5w~L5;fdRBx5D zTFbtRdWR``j?icm7pQ3SXA&+c(Do@?k16MeS*<>7Cy32hRIVDtER9(oY13y0uCm3) zd>(Pi=_tOPPlWh0+bHSM*=tr0tv>nZ*w|OPBbU@KWd;MAr2Zua55Sa+;e`{~p|!~~ zQRc!mW+&wh0D5bH9)4@?Uw4&#YuEgt*rqJuM~v-0_UMy)enHH7{@qb=MRE=vd4F#+ z6hO4Zq!S#Lm_6renguN%|t8RQ_XZ6AYE7X&rYL)eIxNTWIMsNjGGe@WJaN*(0KCY@cgZ~hJN#KThJvv-FvE2@?^ET za@{O5Q_)dYi6JuSYL9#5wY8QpT#&2Q9llwXvQe3adD7)s#|~-_A!JaJvm_xW5YH}i z{#7b3$}$b{e#(0KTg=x~uW^qjJ&W2KybIbA{4-^xXgeKRwRL``v(5;Oke_*^vI4^f zAbEjF>RGd~3QV)2-3g(fH{KT>w9rxQP=Iw{LCgKRs3Lam=J#I1!S^nq+L1X^GMl#j z&TklAkLW((iTY;J~xI6%=^GSC-l8+QHO&KQLtYf%0eS zLAVNsR`hUdSi5aFiQn6^p?f!sr#!WtfFNsVwr*Y{n35vxSp(#ca44%(e|TtPM8>k1 z8JR$c`;8HfNl8LTcEO^`&?8fQFgbh`pPgiP)#_wU)a#&ac=sgZx!@$n4MVPR70yal zbBOv=Oua)MS*2FDWm=;kzptV00!tK;FQiImLnq(XvceY$+El>CGD!I{SsFQC4G~&P zz+rWr9cGhoHQ^_R5}~#*f8}K~ML2h0(0=NXOqRrqkK`@<;;e?bAK4;ihmT{6Z97-5 z##f@H@Am8AGs|TNO&1CYO;c9@aw>0%86dWK#z*Ebj~&*%2)Fv-pcyNuX*EF<-!30gv=gq!8K~VJkr^;pU{Y#!)*X zR1Mkh(1oQMp+7|~JZ58fcvDgqbTy&fNV_A<8ho!H<#L$>z<2$AIS&CH%-5Bk3_!=! zw0=>!N#THg<(nE6%&eW!KBEuHMepviXZ$Hy?|~|hJj-3eCigb_yR?*ymA9L_Gv^=nF0sCWwip8puQuHNzr>iZmR*K* zIFmY|P%I>9p(;x9pwWh9PQq@R4y5v3NfM_1mc4dPC$PZ+|DoYXE5ER-z=$fVw^4Ht zN1-_^VnUTedXS|-G@C^sk7**jXWJ^Bo79nPY47Z5U^qK8B3n<`FK_P8b5nVE3cwKa zF0%JoFyaX=n(aO@QE0_vocg_e^Q4G6Mv|41TE=0V z2AVTHq_wfKcj=Yw5u8QJT4ka+DH-J3n!9ouHd>Daby+qUjF9o^~Jww-ir+qP{d z9Vc(*{~N4lX5O`Ct@EL(K31Kx>%!jq5_dU%zrcV?Wv800(ugbiMS>sgWfo0%TLr3T zkM|pWGqY*= zrkd8g&k*a0L}!;S5`OeSM|CayCGCaCtepeH&V-3SB=qX?T9k1mrBHj8JJIl*i6mm~ zb2PvM#h_?if>^v9T#Le@adeKtAluj@Yfz^BHK}5AhstMzXF=#YDB}Q=tw6!q4SQdt z2IMfK7TgRsUJS&Be#Eo|4=ph|^X*oQJHPHnz&^B6_9Pw-)b$R%`7^V*N5n4MNPIKu z*;eF+ndQ-DX)$9MCS_a*b&9|$liv~8ODy?07`Y{#F~pqO3+6w!@VvP2D#}+ttAzUP z8`b}D3;%0>4rsgr&_ppls8VIjfxsY%JVzoh_8CbKgdeSRUmODHC>ba-31UQ7Zr;RH zVAf>~A@s^a2mRv?pZ9s#6+54ss)E2h9>DJwa_U3U>#uuxD+Qe7P^vq}i!{J>p<}CK zt5Un^a(wCY^G*6o8Y_u%=BUu4fdbEw&>ig{kFqx$Id-+&92 zF4{^z;)iJKY2BU2|UT*lj+z zTlS7RA29$rvdE-@md52BHz?GxXJ?}J?}?{g$)~Ph$(+|ySzY8tZ*+mC?!qv>f^?B< z&O~R(vc*R{eIF{v@pjxiyqDU6JC8O$j*Esyd7$LL+s&cJ4Yo*yZBjHC>rLRp3Gb!n z`X$oYEQ@HZ&(b(cIe||I6vlcpIZvC-%Ub&cp*dW97jkDt=Aj973G-1`z5Y921N6M3 zx$RGsf$DfiS;oIvu%tyHeV6GHWiO$nNIH+W+fjJijHVNrATDGl=Pb%4rNyW%xy5?^ zu(VXV<};J7!@vp4^A2KhRYiPyx70!kyQXcpo@@NDv{EDDWus*?caY$SXND2+7r}Dx zAWaf1XLksxI#t6%kGa9--~OY;EbC<_!YuPrU~4XJ85Q&ppRc6}A=^WbZPp1{Hd|w- zOf7rVkY##XYmVDjT8tijT@a4)vbNBoEc zI6QYo65cxjqB9_SY<^tc#s?c5qT*s49}uF-6Ckd7zbghFUE2ITn%X_Ua4oF7e)>Z3 zqzG5(hWahdaL@GcOUwm6ks!DUkUBcp&O3n-X!G%3y+F2U+Hq?W-8!3Gb56RxW2f>B z-}>pUG;nnT3GSzIgZ5FeOaD|J*0p1aKDBL073O8T=FBTj9qm+YakP=dk!3sJObhd| z8Rl2wPEf4c{tHw425F+Bz=&E;?K&(>{U$A7NwxheVQ&v^DlM2-j>2Z6K^a>O7J7L3 z@HFtW+8Tn{G+geyHIl=W_9+gilC zx%%gJKZ+V;8dEc8k-mygIcn$T&tGO90_3JdFBC(MRFlxwvjr>6c#c2-ww;uZb#N6{ zDq7^d7hJkBHPwluEGBr^PO6GeaUg4bQCGC(Jcy;$KB zS6o+nkOQlfUaEs!E|uJcmQi>}ibGUx6CGcomxCs!Oixq0+ml`FMEo@Wv zo+QJQ$rE8ZSvoHA{_=uw*&Dgy6P5+9<@ey-?+11tVA0+0M?`O!1AYs>=zdKUn{G2t zHzc?}@Qxr_;k3v7ZcI+FkpgC9KkUSCV)83VNA@kaM_7it@azhk9JV=(lsZ%Ke(MU{ zR=DUbOGu4;Ss+}fHc!AdPc%oWrQ&TwWyjsi;gs);+hP^W(4E{&6%6Ed6Ke}&1V281 z$j3GZslpZ4yug--ud3z|}xY>rj<&6uGdiWpfUw55ZoF%rM@Ggr6A0V|(d8@K-%Zf^Hy zSj=xld-HQ@?0{wyS6a)gKG)>wm7r2)2{@aUUuVU1_L}J|9_*zAKYH^MP$0=_@V{%Q zN~s**(@mcY_^zWD$pINa#EmVEt%#ROtb(c{RmfzFQy2u52FZ5)x#A0vHA^|ezCH*A zBK!2piBJG((e<7?$j|Y7db*Fa=B2qL=*?3x%6 zQ-LKqfS|9bqz~dcCIngNNG^9 zvKCMA9&t-&dre-vF3k=3fw+V6mk;l#fD}8DOZGg2H4@RA=2I!f?K6GhCFZpMy5kLV`t>$bELk9ftkS8s!Y6T*xRs#8zyl0!837qR)mT$og$B`I7_VkNMh zJ9hE}A_Y+Z9nyDMrN4r$@gDQ`zY>iDg(klVcz1;1ht~XvclmS-R#zu)HRN zMzZ3dlf-t$+3cslu`|%?ll3m!VgC7w7Xs#HTi*a;OD}M1sIc(!DPW`KW*q9?#up0g zubovNSX$Ntp5DMW1JX|AW2PfFP9&Tbd zJ@F0UF7)c`$oXK$7bm>e-*!g{OIqGEqzH%>g(WwH`wx^oetSNv5E46PKjEB{$mT}X zqiUbnbH88Y-hcah2RRt=MjUDPW7o3;6?rLK{X}s42h#$NSdPe(iM;p}Xg`@GP0kK- z{ecBEU*Q9W)>U-glE_s97E$O@Oko$!vu()}I+=w2DceYC&8X2QTk677ANW}dfOuJM+(uI8plA%8#*jLIeqD$MW^h6=EeY^=cX9f}bR8t($n{G%tNls~QaQ3qD zqYeM|TzCBwslUkwG<^R;YL_pDFxUUgfn@B>&HoFgN&k5(>TYW6`k(x)Of_wF)Gzc# zXvsi=5Gi5oSrvh06(MWXE6Z67Gw2bB`N7uEM3mZX!4xk?Uea(O{25X0TDjOfYW2F= zd5_ufJ{c1Prl={ac6eS_y~XEs#r%Ait?vivfWRN*2zH0kBs3f}69^tE8d}k>goV=7 z2W7+=@y)3Nt0$}Gtr-XPi&knzZiMdi zSb3{)nNR%Br|_nk@ibH888`EbR@Dp-6^61=Zo%gb_&RbVq9@lmE&UC#X_&G5Lz*#% zqA|}(Gli>OyMhh3e6Kb`ylmwrFbDtz9g^F5h^(vso;@EFxf%-Au~fwPM@Bg&0nM>0 zy;tZf3L==0XQ+9$V<=BdMz(s$F?D%n3g&Ss1~@|F7`wUR!t59ddaE^Lr>T~Tb`dMT ztULN*u8a+5z6~1MG>8ep*>V;o%zUl+KU!~Ki%?!INWAhpj z5m?P4pK@_W^jc&$U&;)~9TiEu{76xF!Pykdh+NcEz{trQ0K1{WZvZ}0UKKs44%)$aJ`X-WNn~sol$-8Xk`8lXa@RkQ#Ggf z(3)KlqtavAx{Qxipdy?zoLhFhh-_eZEAT93@tTMneDczbp`(MweTs%#OcM2CmVD64 zFjN+v{aT@q1Zl$9dyLusfH~O~G|`>d;qo)0wjV&RO*apn*H>@d<(!P?3GSEks)Qub zeu5G6q-S8GFWmdbtG@O0EO6vfTI9xvyM0pb#-xwC_As-mc}DJv8n=C79(+F(!VL_2 zZFhP^wnK-#mh8aVmuHSP>-03-HUlL2FK4k=q}g_Ez(|8i)zVYhEg5=Qel=}dH+MCC zRrij126xm=q(>0F;U=0@g2c0=bL+E=!y~4Uw+Bf4S0e%kN&9#N@fqiuzNist*q&%O zdpz6j$Gm_&=#KvMBN`<6yY}(a&>KA5+f76kL~O66#Wf?rvFYlAF$g zu$bu=i+)CXaxLhEe-D+`8eT!cfP8%(UxgZH8N+3suRvU~Ru$Ya80p?USx z9&v)vprP=lnt{;Q_7K`A*aGkV$?#n4Tl*{l{v|SC1Q;>XEbf{0cBLdOtBVE0lhVES z50vXg0})SZ_;yybIVXCgo0$M8<5hXv2xR&~T%K5LHCK!|9ZZM0zSeOmCO6zOrLTor zo>wFR&U4E2^F&cy;=yxX^>9P{YU$}cW#z5pveV2>zTL*(;?(>;lSlN|l%vMX6td4K z(Bp6bhkQ_@4zY2DZdN|&4gJ!7kKxiK!=^rxZM4FS2T}3RHU7CwtFFF0?xXp=pF4;% zW1CB84_WPrx3}{*6P<4O4acHk-Xs4%ffdY^9Vy{U^IZ33FYsUUK>o8^`cEYNS5+xf z*_Iwu5XrZwGAqlo{P8gl3?05y9$g~^rte>ol_ zf2#_RRS!<5@KVWBdc8xD)*R%?a_dU!*B|KR3pmjz(DfwIb;VVrdP@xSv?Elf(B{%D z#?|!I{E=8Y$z51$_pM}{S)V^+D3&3s?D1L(LCkm`(2^zx8VvRcJBnCy(R|`y-d2QL z+6bXCk~J&tPKka7(x|Lnwxrljs^oYXpN)+{hT!=)U{k#O%bbcaS1qlCD0e zZ{NNc%>Qms{9hl{#rYq-??e@A^>r1r4FEyVNCA%$p2#0i7}J#9ekvv83LY5RIfeQ_ zC_XJ=a%M1Llz6zLgFW^8c)~YCzA#(K`J7PGy?3;bEW0@MUj;Kww8lsxyW3~kOcczi_fT!BYVoYBW#gZ{a!#A`EF@JkUkQ)DVldX`U}#<1s6oNEpF zXK>-cb-DUstb(ouN$pg~fU`#H4a@T@jUHglo<<(soC8~A_uGW)PE=>S(B3`UqTQ;1(HNv(NRHDm3=_n= zcCX1o?3~Ocpw}CDq1=3=?cWm~|3{ScY-nh$QToSd*_l>PoN-XDr~nQkZSvmRW2u|C z_9Hb5Z9FE^i%z|TLN94T!cp~Xe%~ECf{&EXTgErIT1sNMcII_k5!Ea1qI?BW2T2r74UP#C?woS-RmZ-099{b_k&)T%~p!lEif zzfa>*mALD~0Iz2H9cyq1H?;Q9fBR>srUB;~88c)}lKC=?#^l@>(%Y(x@qhD*%T zMVed;8NdcP4_#8x)2Xf^*dX{ zq3pxDV19y8>v;r;INl=1b;bSkZ-|^F9-;7PN1myBk_=S?pW6?DC zyQ{ArDW5Hcx_}^?6g!i@Wt{p4m~G66QID|bP_H;Lxw=0DiA22%Jh_USulF)sdiP71 z4ibi=@dQ288fX}fN_T@%7e*_s4pIWIs;22AhvSP$^ex2oi1yQu#ERru#5My&`~)nC zk7mV>t%eq32hzIf!d8CL+7t#?_Ah%E&(O(cxj^>Q@W-g`)bX2*NVbd%{Ds=M&;eME zqW8E;?-w>U&toL7?kZUZB#WA>^sGjt6zPhcSIwx@C5((~KK!je`XkD?_SvR4XwMaP zL^J2ItCyK^0M&5;_edG?8X#!-G5}X6^cAF-JpU#(p23{$Wgtf=jqrR?ft&&i*2=F4nn2PuSqs=(s{qUbUDF1pT-h|XYPRY| z%Q^EG-$KiDw%p~Q`iYaN_k2&!t8-7?S2Q1l0~le* z&6f@5Aqu;w9Y)IxiTk%gC?h1pS{J*le}CxZfGur^)r`_3+`JmtM=7r0Kf)L8quQ8d zls%yqsm@`u;NijRdA9!1UKgp_47EAb#iQBV*j{3Kp-rIYB`lrCbm)?BUqP`E@`BW+xTSQ z)_aESX~h{LgDh~qZ7ICoW*$zCj!_x#yUK&pTsIxy6SQu-Bn#e_rik8-YyA$v?L(0* zwnq#84tArF{VuY?NJVVxR7=_$B&oBm00V)mKGlXQr|Bj*uK};N#bDj*hSqO9cW>{) zoSC3niesucs;r=hT+}z3h!0aGLw#J9R5DWvt+Bx3x-mwEbJ{9k3}~srgfkDt#N~o_ zS#sx4sZOHAk#&rOS7uR?7FQbFUIer*kz|YU%u;lRQ(E&*`iaa&9-z$T^ z>Ai&N@LUzE@~N`mhA1vJU|IB7<<%ptg8uxe&*tJ8wqMy=8LkXr-`A1;xE81$HYSR` zwQ$WFVaX9RW${E4W#NvMy~GeD&cYKGzusO-6vc?XwZst2uKLB|`Eo~x$9~i3lfPC* z=byV~^R3#!JkK(Hq+fYKfAyVMwC5PMUwOgzt=>h%`S=Y`d6N`IxUza;j(!8bvUsAF z*Xj1c`50W>y6FTWV%-CQ=^BbPWjh#rDvsKH;!njuKf_=-f7#|7}&B$dr`l^b5kR<8Tx1opmb&hx}IzNYVCR&_(15^o zgb&eiDKO_UQTZvUX;W|EM8zx`uT+=Xl1kGd4saJE02wmQq-I~eWjV1 zo4rUFo;+RkXv1Juxs5Qk4Fq_SkI^SIH1}27&mph0qGQ|r0TePhKliH8W%Bwkvd8ku zrr#Q*!+P+7`6JjmTFzA-T-Z)d`&DNn)rmGb#dN=DM^7J|XxMFb61a@_bD>u;K6;a9 z=B+%L|<&+ck{m?o2mKy=#JC$vj`cH_=d}iA=QOXtBH?P7#d= z@A1eJ62!52bW6uu-Pzsyw!#4ruz~>;uvlVlkvodgVrTqf9>z^EL_?6hoAhNj+_e+B zw$Lol_nute2MD)mVRv-yXyD2ZdzOkAbnMnh-Hj(CK*tqDjW9^INJ6+` z*ikHwKs>F&&QdlWJz-I^Rz<5LnvcPZGA^J({+?f?KIFwlIFq8S5y#*k)tio|uzaqNYZeicoXIaa1(R^NEAW+d&hi^nl^#;HT*NaF zK5Bt{<$~D@Ia5~C#)Z>Wp6kYcM9_Ou;hW)rybzI3kZCu`(4T4j(b5eN-f!Ot{kn$; zV@d>X`F8Tvc;}HZA96$%fG>gU4p(LOp2y|Q)!6UYLVk_kefq@-Z)U{3?~w`Lg(!|6 z?Snsfa=WfeXWFjGu#d)6f^#~Mx((Z-AcYNiMBa};+Sl}kv?=n?K26{DMD9QTnef3a z(pxjAce0mTV$?-!bWAcJ!g^4D7Os2EtbTGs>v*fG!8Ps;jVFR zm7IH(Nzc0UVYfYPcH4Y4b8{n8TNLWM8e-pi=)CIqy_{ELkHOPt1F^MzeoRWeQWp{=FJxELk`## z;`h6Uzn5QhC;Pd=FSNJoHp*`gR&v|V{Pc4h^)o*GGpq-3yDR^i-}?a3^hw{H6^4Iz zWNG)ChMTICxbDlHR zCrBtaib~fqu_qi;Zj!PXCmJc%1}p2cEYHRc*|{RsQS17vDhuUKT_&BfrzdYbY`Iz0 zajzFGv}T=vsHg5AUka8C7mcvjBda$JAm9l~B%NrTtNyf)`yo<0)JjZGfJM@` zH&IW!+rlXHZgWjh=K)^P1py$BKLLy&|FzN-q%{N~It*PAq-#G&Z~1P?+Bf~$PiKGF9Z^p zh~5+shzx~oQiTZZ%I%3|${if=rCl!Y_~ilv<|=UwZGr%lM#|-|Cdy?X?k&wEP_9S2 z20AzQ&wS>{alV#V3H6_EL7j5c$&%@3Itz<+t%qgc=;f$ES%zg*j8)p4>iCULW~(it zy76ir7Lg89^xqbQ@=8<7aiQ&%uie&5w#nBkwzaPjdNHn4{@`b+{Q<>Af5PIa+Qw*? z?x}QVCkkI9U9Z|kI8SgW4$%EFooM?b*}DOgNSc7TK*ub4u0S(CS7uiB;=Y)v62eJW zmQNxjqIZ5bWpg?>K3C&gM8@an)naF4Is*6Oto*um@5=H_~#01?MpR`(ddO?Cg6YP;GZ6jLZIRh)1!ZK;Kw=y{_9i;JO?Lg^s=Oup9z0u zk_a4e4>@R24yCVNXQ4@-by(;WM;zTS`{pm`?*Q=|EW?hzTAdK7*>k-O{JL2=SFo${ zo+dTvr$!u}svKY+Oc4WZZdJE(#3NLc(r{KfgWGYo?Z+%ed;0>(GpttE*8& zkgBd%99wA>x?+3!GeiCs*;tFMJiWJ7 zWoNyBZeeC*caC)FeYv=s#GSo&@a*R)l3kiGo3*CU>Wq%>K=hv-t#Yo>A#nCfw-tw{ z3|7;H?{(`#%JA4qv6yuuYb_OHQ#swCB(I2n@j05=yPWO5jljy+)NRgFfFvr^PdD82 zjqNrG)wZ3aZGCZLmxiIn4!uwmLs=@KaSrvUZ6mOLPV8|fpownpuE>jzYKnc+_Nh&& z|d#|US#d($=Mhh%_ey@w~3yaHxS1{q0xWeM9>?*IT22gl-3tOVyYCq2~ zl%Wm`?(>*R{HY@)LGD|zOhqG49#=c1G$zg5rBK^~3g6I`xxD)E5@XDh1Do2`V%K!; z7@5+-?BNQnK`hCZNQIUzJw2Ho*;g7-gF+HwsEP_eRPx@a|lPKvtr zRvwAaQ}1q?8~xMe07hPOQh$NnT3Oiqo66?rPp*x8y89p!_-VAj(FW_(L~hJyQ?;wO z9T&$uVsW3l<-uTIDp8_l$37rDL0J;->Y<4kTb9LHjTo=ChE7cF(5`@@^v682SDYVx zinNp(OYTZRovVG-3y;g24!euqj~wzUYDXr*x&i}@%^ed!xNY3xp9j6M^NE`GN?OzD!YB2x zaHSwXLhpN;A8*Q54`x-5sml|jzbhFSC3ra;Xo|64ThT0UFIz^T^(0006=YiWad|#g zdTho|V$n5(K*NbhSU)B5L6I-&!}~X{yI$xc zDC5ZibnvxBC7{njg8rlH_zJ&$C*^r`t6C=?w6dhpiVaCI5<#1u?x)Dss%V6=q0vj1 zvnW2HLy+-^lJ=;~$=xaAG+>xPoTjYqNRkLej?x`xit^gy2_3u>3iG5?aeIhCUX$XX zk>Dn$a8wU>CMCki%%+_o?waPLZKzi6uw?ebtw!ME`)0mDlUWh-SBu)sEdh%64(S%r ze5vVJ_xZ41R=wKIDpiJm!51H;Gn4ADc0W~>AbAzC+~6C+?bIUDQ3&@BygWs4yrP@f zycM~?(DjjC<)AmIN_gQ^`)9x&RO~4-fROEePXWx=KQjAxJL`lpychYO zO;_>rt_OKdtpa{IoTpwPs@RVv3~>3wqwuA$2%vGjr!9`AKIIsu|@y57)b&)Uw= zNi0n|0q)LcWF@o?9=MeAo8kahVb*j11JXI1<6yPkN4MUx!0A)cic4_#zdZ~SML{eR z1OR`Qz^~QGvTh7VZTa`&I);8p59h8cbs#M}_!Nk(p#|+sgp2wM?MFCxa{qSmHvVoL z)*H*i({c6)VI(0)=;3#|>6sbq5@8EOie6>~d1N?(-A#BXeW2+@tcb}^(ky@6;_af` z@SFze#PoddyfTqQ`T77azSxYIVuJ%?BoA{m`p%no>S+g#Dqt5_IfDA4XtAN|=S~cu ze*%gp12`Dug1y-8a1&X#*{el0&YQbbGcBRE4ufj|PNX>K!agyY%yzPzQ~+!wqqY2wcWdEWGu8!)rhNb%b<>^aE2C~!eSEHMVyIbU_u z5G$+F$C4K+l{cg`J#_MSKKHKb7c?KTmqiqqSRTB%V>B6q8inV+eb(rhtB&&FMJv!r z>#yZA+4A`PG}!WQ;I-jrF9r0B=)JA2orWr5Rq|;$O3hD+M(S|^q11pPY>TR2L}q~@ zXT`auCEZdwIc;w(7RMj>7x7V{n`@7Av>4hleA?<*+eIsEYdg1 z_L}=BeMw8|dly*tkFfVY;g`>*wZX_v4FK49Q%I%Xv*{iIX3Mr^ zIlF^P2ALw~K~LvfFlEL|9{igavv!kz&F>Q{ApjXM5{{*9VNCY*t(e*82s~bh3B(0RsF?%^e{T!{%cJy+a0)*5Sa!OCH3=n}+4DxhI2Oy<75_so#H3t!Hs<|n zQD|FLm|AGaC!7vJgC6~)aXIa-H4$`U^y5A8x`Ja5r4X?QE)e*@fVcZudk>1Wm6RiO zjqY?t^@KF>`PK*~w;TBM?d0_H&E_vx<_!c0@uLGW-JmG{e;J$tP z%Ip7o!|xwS{C^k~{TmTxPvt9D)yDC}{&~QZ4+>|{Gs}dE#hq`AoCpJ_CGZ{lhCoUS zgs9Xq;C9zOZ3Z$ybL^mQY-`^sJ6^$)WZ6Tawr*;3tMh9A%8&j7EG7_L5blaR)R$js zJ}(H-2x}ljG3Xlkh=#1^Plm|1a>aW?$=*4v#p}=@`Fu(v-Cl^k>DI4-d}bv-=lPc7 z^FC4;z$vYek=Zc9c@BZkQCg!9shX$APqBDzx}=>J2sPuL?8Z8k4M$a;IIS>BWzbi< zi*$N*E9&zAcMgrRROw+`FN>QUlqJ(%n7Sl@#A{ju3N}N;=lb6H=Q!=J72{jW%3mbE z$8|2-tOiCubSp<)&J}(ICB5>3LlOGd#C%&g?lh$B2%g_mg>hQtg@=bwo-wVJeKC)8 z2`={IftCbfE^96oajkhb#E6X%N1&Iwc#?22uhK=9OE%J_RpozkJrvGhjk*0ZJN3Mg zhv>2f7=L@Q2!5bupEj3qbR$4M#ci3PE`Lq8_H`6N*54C^q zeu4@=c{%@*y*wpoe1cD(jp{Q`_30vJr%L{e`JQs2Ob*s#6&G2Tk?QvFBUY;xx-)XX zp{-%Al)hmm2l8|h||(GvT+#uQb%1MS_@B{Bi7*Bjv!1JLUw;Y(Qtx; zHCnZW5(6YMUa9k*)y?UFJXcEpAkUJ3TjXu-xCL@sZdZALV|qbrG;y5$x9u7RtMdn! zXy>N>#Wd?lHTRsamV%3qcAcu!bw6vJ_d*JaZ-M7~b&4hH)0km^XI1n^E!O7tSwx9m zUMYtQVNnxEz|+Usr+_>*;5Uw&=na>@paOtVNC59T3-SpL zMG!RfvyM5^gt=dLCLlvHRT)R zKMRn0fkbIVUy8i*FTA4spI9aD@c%g1_ICeKY$Rp;7wsf6!CHRK0Bz_K5%-KOTt>id z6KPP@^w)1pq||(iKDrc8C`W~}Mo7(dZzUrK52O4GrcVqL7-uXsem+coA;HPS;{>KR? z%T>Cm7*OzVbg)6p2^7;Q4Z76&8(K}?8ybM&xr7IqK20|7bdRl-996hGBG=X_$BFWn zB35(e`j1lu51Uj~oWAR#8e;{=0apBKn>Jj|a;4ia6eScfQ=Nh42xW^nM|f1Gsy(0p zMR5UM--g5iZ>3ezq0QJ&s?MutCOfBP+9b)OpGj*pb2I=AGmY64q+^6dvIgU1TfL}x z!hGo{nBgzxf$t#WsA)M(Ov!_u##VtV+VP7%HZ%a-^D!?#R^r_l6$io5WXPdx{cgH@ z+v=}|DEUw63G2~3x@o_2w2`~;Mr)3q&N;!~G_>PIuEC8CY9mywwT&7Y$QGN_?TvY- zk4r4PmF3ux?Ort@?V<1e!S{Jpr&dJrb!z}qn3Y02$0K`hO8t)&M=n44(%d9f<{(>f z8JZbUOiDpaDbm&Fa|c*Omi*vz%K#9ed5}$C;PN}p=kHd;w_rXZJi2yacfM{YLmzmu zUJ3a8ASHO?pc$M&XjI5Nc?LqLJ6^Yp;If{M@Eof{SB8!WG|$8K06N zF#J8uS{RI+*oB*Kfx8bcc zV=0LrF-Hi=RH{izWr^BrRe9N2F=icoyFt+hXzs0Dshv?eH&5Jbfr!+2(55i<>l8Sb}kP%ZH7l*c69Hyx3x2NZ-q6U=GG@0hzoy!F%IK)$TbTZU*FH zV48`9hEKaZsl`j3KAok`e2~q>Sq~7rzhg45k3=AB-u$S<>m~o{{MHY{Zp9+XEp8K% zYaN2k%#zA?FZfxyrcIq%0~Uf!5q#9m3ydxWdad2otgimB4wlnO=1~e9V77AT z4Usyv_eT=t3hUB#yin4Qj>27^@Rzu%U5GHMB8JLcmR1!LvU{mv$zkc^z@j{DEnbut zGW=hI4mx(ZsV>cGEV3UO?1Ag#YH*js`6}bvmd*> z`O2H*J>z*qi(ouShVvo8Ug2+E0URhSZGQ_I4oy9mUHmnLIn;?~K2#&eL$ zOJ>EllX}SB@@>UN6&ar#%3Fl(Z9s&x{n3S|S?o@56?3br^&s2aPF~(mR$;SAJg1k~ z`24-GMVU`tkBT5fcHYm?${=0&81rHq`{DqOlciYK(T$|H1Gi}emhDKe#TeFy#t2NQ zfQj0+P>5#5>$}Mzwm$BL76B=budp!4BrPY1B`U`*9<~#_r5IFpB0WitV0uRTHoKy5 zB$SEh3Mlas+Q7B){ONy*`0#5!q2J(_@Un1gCSQx};p%EchszTmm3Cu`#t#GBq}}bNNpXQ_}yR=l>GF|LZJPs$D4IiK6jN z_`<=4f0sc+`5}drJist05@{y_`rA&2PspMCtzkh12aAb2u%k}J{d?Ev(^~R0RSBt9 zBII>0jr^nCO-(bJ!bi>GJJ;&t;~U*Ws1lhd1;#!l>?X&jnm7QykB>=x|aG2V67dL z_orIr(`)F*)nd`A47kB(A7Z-FdxzW3r5(s1UCkHhU;kyDYY4=#f!h&7a8s%FIj{yH}&0~RT?x|;alL7>6Eaf z=FkHb$!`j@i{;i*)uRo+(ms^-M{;Ny>=$KCnKtiJTXsxKO_53rL_Me?#+a@N$2hEd zVaM4=&YxJbPt(>0ea}_!4oOV)tGl;hsC%l*n7&Ky`(laJMEaW)N$IJ$E}7D*y;!n3 zG!;A+U<6n3FgOu9@Rld~lOG>DGW6w^9ecwq@Zz4aJ4`F7m+0Qa)7Qige~+6O>^@t~@?x~2u;oHIq0rL45a32khv_`y=z7LqBv(bLOD zEfezJzQ%3P8Y`9trD2WJAvg22SiP84&r%)LJ(I=~9yv2*AyxYO8scplij=8Y5 zq=3BEXbnufid(&g)U;%RCXd4EdOe?_-E?a&{N_d9^Ah!)dSRIth}BrYHLGS;6x}qS z_nZAK$s%YcR)NN?4XE$Oi_FbhOBZJy;`unn_PXFJ5gds>a+U91#^IaW2e!z^Dj(Oa z@Tdnx;Pd8Fk>d63~xwX^dvt`w`&xM1BY)2Y3I?<9i-Az?9iLYb~a;_{xt-k z70mkS46BML=(k#;H_X;xv!6b7IR4t)of);IGbP7 zrFY4TnzMgdUu-qBx0SXmaD4E#{+d^6j=i-iMG~<~A?7rgfv9I`>>HiK4%3qYde;)#osB$5P`ZWYXdDX6sMv0o3DqE{&W0CIZ z9i5~nMWBqxDD7#=Ftt8#ZNtn>`ue*!=;sU68$*o&%guLha^rSpwzwFi)9e*)z!m4w zWNO>%-D4BsH@iJU0=B9X)0siaDs9TOlJ16BG1X#gu2w^3m8NY?4YOg_d<}9Y80=-P zzyt9P2IR2Z_2r$m^)oW2>o>}KFR4(58+R1ff~(3DaS|BqXslXE?ZHB_NK5C!O|P~% z4ia=PT%DUANIld$sLdmwEwW%z#sp&uw%uVTxtTe0 z@11jIE`IH2|NNe}-dgppT2-qzOXrg05Z`Dp`#Pv?JKj&_J#r#U#N@4(UquS~7%rRM zo+y?bNoK+bA~7kQ!l&R2%E-LL;0Ov@w(u0mw)wxCnF1NI`~qSrXv4mpEkGuxtaU_ z0AK$BTpBO`?m-3~EIy72dK+jHO*A9~InrOZ&l=k9Mkr78(}s{$UZ-V$mJnQq#FMq4 z#4dweuD#eh(M6`kMJly12Bl%hHlxJ5{ra5iU7+3n+7dByaBiM}^}5^r>Fvt*>8#ja z;EDIwz_T@CwPe@~I8Qiiy-_CGjM#TQ*ghe7L13hm<6m(!6(Zf?%%P(##6XWI!EKCT zs7^J(r>JRABG!Fd5HCeRgpiqt78rng@U48OOl=oWbl7bU<5tuYRQnUeO#mW)I6}y+ zAusWTr|OWgq0bKjvknU6tp?$mz1@szAO{R|$pUiG?`Jv}T@) z(yHJqv#hIVJ~J{ji>_u{?oBGbR2FCGicq~xHneAzbdxz3)P+ChSth)x*gU*Q*Qw^& z&=n2s``WUgcM{ORsG~C(h1aJufI+>1c(Mk1Iq3_=DtA^ES86~PTts&e!=w- zU^HqzfVcSwu}Ynk-vkr%Kb`2JRFm@C0d~5GV{V?X^7_H7TZJ93lHVnm{ZmbKu9(nr z%_oZMC|rpQrTY{cTUwU)*_Z)X1Y_&jmD$!>V~cGm#js6?JYsG6n-7-}Hq7k0D|$VB7%&duc$lxOB@L>Rc%yVcV--{PiFX z2KYl2hFr`Yp1a8g)1M!+<^}I4uSNLN9qQEju$eeL?+KO%RDLTiwmxsv^zmdf zrbQ<44fqAZJ2UNT9qbt2k%nd*$geSvq-0FAMX+8$iT`4AJ17n~`X~)0VgvTjumQWt z)ZIn9d2BOA8k@{{ z2S^UvuRX-b?Z>!3{m-86h^!_@wey_V~XW4E?Evpa0w#F?eoWNkL z>X%Sf<~br1G@NGTT&%A?Q7-k`*2%;qBNC6MbgvZKFEqFEIV99zPXl{u4`|09C8sdvi&i zwZf{B1UXg8oNfVqpQqs>{vvk5C(U zcJHxf`vNTq9%&d8IW)xIV87d*9k$X5YY2_r)7|*x81BhgP8kaDqcp+vX2}q8aVxkn zaw_NUw9yUM8i$B-+dN<<&toJH(!+tLC!tL>mD2KPmhT|fE(R`($tk$Q&#;el-IaKI zdYdjf7}e~yt2T`koEEPIzI#R7w(S5@*!0!t{m518C*>gU6e3HBBH9Gw0~4V_uA=n_ z*)6seAaLrpPG_hf>-6B3D=em#0pv$_On=t{kb zPW^SZVpvyY(z6}RdA|0U&Q#@@fQ;mOz989S7HI8M@_WWY@;S>}lbcRSyz=($jh*pv zb2Nk~$59gkNM~p6#0Kc0djA2sW(CUCwz-)-9aPzjtH8@QX| z+d{Jre>PJ>ih2eXIAQZcsmcuytnF{ZRxP9Fg^&}}F;n7;sjib$ z8{~-1fR-&}N554lyn+VP*m3hShP?HrQNt61`!pAy2@%g&lrDsy%|dC7nFQjHu%3}p z8K^NryCVh$2RB3pdqJs5N^|S-AiX2KgQS{_LdGtw{nB2x@YxiVX~~suo7XuriA`0Crg6ctk89YYtaE z!3A6WqeGwaa#mPiMOg+|L^a;~AB-enVG}dx;Eqo&C`INZeQU@M1~vyZ3h#1fd_Mcx zbMM?kS17^4QAR1_yecj0uvt$!$Vn-^H9ur}bbe+({YX*Y%uy(EWp}+Dd2*1eK z%0?C^h3axXs(sX6PTJTuEhEZSpey{W`7B{0jm(ch%spX53I`z|)y46egpN^Fnj3JB zt2KT>=)f^;Tb?(7-XJdGj?wNOB3;t9HU5o}ephaUA!^+J_}i9I z8vr{;BCVn@@Dvltj5p(tA$`A3Fj!~>?NSUnY)ylR-^6@QBK2gS*xEyBZ9KbQ#_Fc@Pawmxg=c5!Lps)!D7#MVHCqRmM2JSlb1*`mEP7oKr#Fcv;scj- zk_33fI^!uZcdjGy9T>)o;P6}dF(8Slj?ddlK}#O4q;NA=NXAOsX{70wW*T~1pSstc z>1Rz4xiM?Y`t*(l9+F3T^2+I-tJ^AAqgAL;g7fGG`L)>7%e7 zBa4Vr=s7GBEn4|u``b!hP$nixk}e7(V5ImaF<^y<`}ApV$sl-_D%38@;vCIb^J9J=dX6IL0<#2{9IBuMq#_BNX9kuMZTmCIqar4gT{mny|nk~VZ;_i;WU?1NnZFA|J29w4uv3030 zbD8J*zCCW4=ia~982G~c#Nd}UWPz?;31<4!MiJtOtJL5Phb!mUSrg@hP!cyNa%N1m z7My_wmjpYCx~8NKlB||y6BL7D%Lzph@XD|pN!1#8P+SY9M)pzrT_EIJen3WmB`6EH zJQOK(KAQA*=RwzEfdfq;LePMseKqPijFT~MKQbK7Lp_RmPdOY^i(6};pt;}N-TSJ| z?7ivZRmONsbH$+!fCP^x4H=OHr1F4Dt11LvO)pu%P)=u6+kz*_F^X+F)h=z5fC;ro zz!rn7iPX8-I?NlaeV!!B_8}H#Fsu_;kvmt{Pk-%}LY(&^qAejs3 z3SH2yr2(QX`ACz`s}_fsKnK}!DMd1$eweAv#kdGGDM}z}`*E_L%2Woa2^#?nHu;?V@)O`c7gs&Z?)psK61d)(!z zEuOOWu2FyVki=KsbEW9~26(%b>%(CqZAw-KLz_8ng0PniWh!Bhwo#{5g^<~YLeMmQ zsw%0AtUH5(HlR9Il}98jwlF69E`M2+Gt_~JOHjy<8kNx=MasL(3_BZpXDl|kmjj0i z_WwZH7D4MzGK?AO;1%SMQP(I{$eK9xiV`$uwnA&*Ja}MIzOJ%mM0H2tIHGvZ zRp(%U=i93xj5*BFfZ<8;E=7#+!15aIN%5NQ5qPW(vUn^F;=@v}?SojS{*Dm5=1s1q zXe>1$VH!I=meq~0y-*&AHe#fb(J4J|J&W+(8ietf?UfirJNkoipUVY-LbX|=cg z92;?7C-d{IVCB*oHIMwh?FWe~3T zdbuK>mJNh?#G!6~aBAjoEZy)eiFQ%ux%QaL2IDjeuXYH6DTL$Jz?FhlW#TN*JF2vb zIAT%%MU%FC1q~kXSgq?E1#Q)=SgDSuvmI<3kTHD)W42i!$mJ{bD6^0I7nF`e$XsF^ zB*bNY^AW+KhqlWmRvE9#`PTZ^Q{`m}AA5O-aL7!{W@O5VHF+CIW~wR9etO4<8r%Zq z1pzjni;mFG@jJmI%~StUHsjUZw1-lg|$Y16>E3COiJ*Tf^Su@Oy{^r1!+lJeS!?tDScDHoEGbQ_|}T zf5&GM^pR0_3*T22*J|f zE}M4;k6v&FX`1b0HJY&Ji2Q{`&ULB;<9WFZ@UlXf>4k4hm;+c#j^Dx9R9EhtX}@C{ z)W|{esmKpH;2oQZ{@!B{LHJvs+hvO7nD9a;nIGjH)FVyUlY*;*Vv8 z)KP5J^VHGTof_M$w9&_}B7R1;GCBZi#>|oT9_$4!dBYYcP$R65y_=rxgjjF2<(|1c8wu4IlmzW`oaNq zezS>sK^XNEZ1&oMH-{V4GWB=ll7~7HN{{KFMmfrwGaC^0`Hz2j?}d0Cu&N*Th`-GS zFwEX=nx9-~JLBsy`BdOt3~E<(f4fnd!zcWM`JKQ#fLHPxf@Lq?!uBNZ766GZ6Yy>N zm_tRTD z=A!eHpi>7k_yLR zR|46Br7&^L>d6z;@qHgsAKLUY)R*Sm{Z-HTMI<3I+F#>XoV(w%eIMP^89w>n&h5X! zJSz?S;7@qg6!{T_oGVEyMQa_tTN3J5WpOf4>KMJt5}||K!gexhIlAKv#ie?+ znb*;k-t--4*h>AO=0{|&$Jxqn_AIaF3-1&agZ5^+nN(+Yxq$q@!?dBLKESCh7Ed$Q zh6dY(?Lfmqw=wEpy|b;97Mq5XSI?Hi3*d(?8&3BjcW2}Dc0a+Ox13XMN<^$UZ zQ&*NPYbH4!a|qwSJ$%Dcb;)6eJ`0Xhx6cuyC4?0#*RT-bRq;BK z<)+0CBuO7SLLA=fQYRj^fd18(sVtucUZAszNWtt z4NGc+j5Za=OSH$hOKwVfLq4l-f9nZ_XVQ(dz)#SZSv0%&1li)bC-2PSr_VD*+*cV- z*{*i50M4gt6V20zQciU6(0nN3{NAtPT1)r6|Mtq}O!75#XB;PeI)Xw*7s+$PS1i3? zG&|gZ*gSwOy<8=^GH>YUc;~zcvi4|?#t#9!E#Ocu*$RP|}@XFsjP+7HRGe9mI&Nv(9%>>=|cH+JUb zs13fMZzF^bB%7g}!bQx>;x2r?!QEp>uP;$*%(kQK;A`QZP?tpDQ4%vW_qMPf_u-^L zt&yzAMO~s9QE|HV2~HuSbVi;9ToZ=L1F!IA;Uy7MMDp?Wx2QG4>mCr0TR>tohZ2Vv zKcv@y0MQ(0lw{1rQAzC(QDT4rW9)f0qaRr!v@!QPL}S|oN|twY{+Bb$M`*l>(+8D! zp`umasX*{9U=8X+=-YXt?VNUjZx9u=yBa(TP|E}~d0sDh-h)H2d~0jq4KH~?Edf)W zF_(aG!C0#y00&V4z}NSDjp;cU19=4in;}-%-eCzua12q1Sz=1LWZwJymv;)yig#+{ z*E{9=*9HH7TyA-L1&9B7xs}J|`vp!->TAG7vPYFKyW*&vq|c;3CHu0$;9THMjeo(n`*aUI&)b zYPDW$($u7!q?>hQZ=-@@T8Qq-S+bDO{8?4JAuAg+>M%TtL($;Y07jDU_Kw-Et2!F)s4a(Q{ zjF!+aD8@)LMx5OHX2xmAeh~Ce;tnj4=HSENs%SLvqFQW{Wlg5mSWbD8*m8z{PTKeS z3=oxH$r|qKY+3&AN&7#Y=|4$7T;=+p0|imD%#TX2L`UGWxfk*u=c$m>{pt99$RdCc${pg`qO&SCxdbdeHB)rvn5 z_WZS;s5O6&{EoU#hMK>xnsfNZGMYb^s~`kh`>oFwXPB;)ct;Txd2^MSuDV+FXHtiX z5_oLU#EZHYx$po7)~)+DQ~pee{&+$jMWnLQ9P*WRF@8Cku<`XXqbnOYHfm7S-t)*s z#l&8!1?CkQf+C>$?9SGQ%K4Q4lo`-?)*gV}TD}3_G8u)Xr*O5=V^xha^)Rm;a&RAE zkJ=qWy!joYasxN8XN`~>8TP#7jo4inU~!Ar9(w+I%5HXd2s*$x2b{O5o(sm`3L~;z zLpM#;(NnTw%v#i#A68;(<8`K_S_9s!GnZq$#RqJFcGPoxG|B=kpw>-W9|}^s8=>nL zOU#xHcJ3s^i(5<^=iyEpIl0nfg4C% z1KaEw{#za&t;0HDSs-V2gKt@d_0?WNZ_O4fs9d;4&6=4&(vTkv^NBTNh@~bdh z8|=QyIy)(9?0WGUg8Q6r(tl?5S#UA(HPDG(d6+vdFWIxjkp@T|8yg2V$;QI1(tD7+ z&+wjD-Uh=Q0NwlDmt?_A5NO;bd?pC7?v6Pp-ux-$ySbfD0|+kPF+h*mQMZNI6LBMW zMaq!JJ|b_vi;pMYw1J9_CC`58)yE*hLnYrKjEH!N{)Q9il0nn%Fr&ZsvhW#5zL-01pbq9dvI#z$m%aJv=S+t)WQ!zqnt>jO3BV(MqTG&M&-ewr9;Ee| zLi-LW*x;jxvMh*QzhIW?lZWRJWsrKYBM<0k_My%aD1w&q9|z{p#l?#G{2KaKkVATQ zxBvV~2tr>8f$qO11Z6`z6MNe)UgjUZy8lAVe5ut{*2Pgjet;VmI>3<8ArexNHbYSo z5m|!R3g^uzqCl3|o%qJ1BaSAJjLW`YWuK~iikY>-UdonEzbj??(a=zFgvk^nlsdE}*0SfxwvQI%ve z1x+vZRT&+-@TxqFqprot7=E8dn=oQYxe*?z2Xq3^;NZlzQ-Go4ZE{FII?M($V63ud zX6`D8gEqNNe+vft2{t8fCNvskC16kUUX|)8(T~@M-eQh3P=Sp+jU7ov^*Jkg==dt$ zvNjGar4UFux9#2LMd)*#0}j+qH&RxR%*b4HtvuApAd4_EL@BvbxX|LWu5D*3X8&-o zC&}4Ix9%t5ZJ~N6M#(2{7435o&A+0NL=fkqq5pAXqb*V3ab}kb8j&O4a#btIBVv-X zg{$ToNt~t`V#BHmXG={0WZ6Tt5Q0eD)-fX4am!j9v7Xi6*z~lL2F@tG&_OQO?UP~; zp&-HP9x&<<2AaOkj|ti8i|GAca-brBw)PfEGBQy1>_OIaT2h)=)W(mlxWmNk#{&iP zvE3nYIrk{vz4|WgZ8r4-Gq1XqWt&hiC=^50dO{F4zM%XE3;HQ7ZCWK~Q zYq7KYqNdSw!Z|zIMx9`{%VS|w;K2D_11UR7D4Vqk^I0Y{x!gzJ7|LKvKWLG8iVurq zgYdyGFxeb8>-Zru2#=y6hot{RKly5&NAszp3N%~>Dy}IB)MM!2O+%Z+yXM?oaQz-E zk{+o@;MOnb)2;UUC>4VMF3b>XudXGD#WP4nfrZc7th3$QA2!;<6>y7##XlnJT}n+) z=hjpUxBAs-U?;%xVWgNZ^2g_BxV7(BcMzLUc}L`y->MahT$+!K*&oR_brjtJQ!Gn* zrB3&P1}gHtXIp8%K)Y7NM^Tp27i5)&I~7Tumk9*3F2lANJOfE3uT2p0P3_?b@`3Pg zWz!!4gJ(BH*pIVmcw1reLi1-t32&075|6WgxdX?XE`z4n?+e&mljz;QNLa=$`g_y9-oblc7Ty2NxXV8>$-gELjW>6kMWhd4=aTjvIgy*u z#fl&@Zk!lOQ{0d6uf28{%uahN!T2?|ub#|8=Hcy0Hv zLLArynD?Hc@yeCfpGHthHwthL-i`JYf8UO%c(v%{^QTU8YK0tCq~(ymnjF|Q#$mbC z%V$JF(>!rfTqv`jrt=-e@B z^nb(4X!>)OQZeZxq`uLq0cWUP~sm z$5;7aN%in=!eGjZF~p;yX{-?(>?GfAq%JL+S#CW_aqr|@NhVe6K5AaK8B5e1KHr_O9BRE8RjBIjXf9b$3KlaZr>r$$;g}cvToJ zmuJlhfDz!8N4X+(dSx4;R>g}n<_|xgt3jeMo9LPy{_GvUYJgcBJ#(d9BUXxlKa%lV zTRk3Is_>k(O$e^7g$bkE0N+}0M$Z|+g5fc}3k;!Ku`)!_XTOaUUN-GPn|(=+&_J_$ z#j@|~xAvGH)CqPmyp-qLI`tAIWS-RVz`0KISnl_Jp6SnZP$zzC2*Iga?U#67>1U{& z6~^VzY{hEPJQ;QiIj6r?(riUrm-R8)Rq-*~{c^sD=+S!C&3ohNX1qm!`5eSR?;g}Z z{|IqLA4_9&-0BCD&3Inx=Xh=j;m1{M^|~!GZni9~Z{|qg^zeyJyF;6r#{x3W4_g)E z<(@{@@$+DsnjKs~q*3?;B{+{ClQ&1$r?WL!o0(4%bCq)IYl%qjW{@QaZl#U*xx`QW z;wW%Xn?!+d@wC!fA#}u%#b3!?^RDVK|4GS}#4A~~yzC*(A|)Dxhn5~s6&o4Rm%z}f zfnTcRFi#=(S0ZQrp}illGQ03M>g-CI&aBcoG&OWjWj0;NYg4&8h}*bMtgr22K9yH` zc_GU;!PQvA#_!L$cJ2EYMVqg*UH8JDs%EgT;G;p%e-_7qHV&EBh*P10Ops{A$ z)dj;LScjAEL``{>xX$vmTiK+2D}wgjNL{I^(v8eOr6d1V?asT*sbO)H+vvA;$w7&_ zz7xU4y9%NdY7|wC!E3<$AhBhs!@KDb-qP&=1GpQC9|F4+(-DMCqNFH9Ep3Ho9#?tH+{l!shSVQjfDi%52@U+O#`Z(kePc%#!%|N(4Z`dny zzwqSsjG{Q{2V8*!r!8xDh-=Xm&Eq1B=2QsUv7;Ck&A#pMuoP>E=2GUqt*SoRSmn3{ zl9wq~b>ocN0q;`M><3vaoZ**fLvz>>2G$8Fbaqg1i7^@u_5eM!E|mzk;64nipNU%_ zaDA60*mq$~$&ujA5h9NZO2bkf1m2Q9LLZ!dBU|zoclXDV*Hy#1YOkW>n$^x$di&+& ztutiy&_2ctQ$Xd`Fz4~Ely0?Pi7&|VL!9`tb^4$-yP;Dxil%#mQVUxr_e8oQI!|d* zi&MAqflf43l>wfg6Uee1OIxj#YwKi|sCl-gExMZsWa+@!d@g;CIxYb^1OIX=Ij5~K z$q3KPgatRI6ov6=$>%#KWLTj>68`RAs4?D{j*?r)8R-bWw5-cT$`mM84R6N2Ig zzBPm;#~l!kNg^GY$gNcbhF+xSNj5vf9<$Ev%i&vGQzSS4u**Luq1_wDdiv&g zhw!T5B!A}DkU-Xp{`_!~8QtW$ ztX4s+Has?BZDb(nP4tJtexfknd~_5xKUzO6Ki;sp168#7?JDEybjYoz4^be)o(y(( z)h-QmJdEkQo-7v&OG#xd1Dkd0m61^&wKmxZ9r@Ifdm^-)Cx!J9>RP1{ekNvoe8;XZ zyY0~gEE=0n@?VP>jhQds9Tx~^@j451G%t2dPL)=2&5LrHS(B(^y-n%2XQRFeFL^$T zxt!hVoHA@@=z1eAx5euW$!WL+3tx#X+BS8w30h7&ss3#pwj@Vg7N&Ybm4fMmtclLh z8YSn!y6gmFopd2Bm2mcG^TMJmZN`lRGCO!~(~hFNP*hf^1!R*z-xS;AS2myQJkOaN z_QAP={?(J2883v9F}J(d%oDAeq>VFSV-)BBA!(Y8gnFC%cqhLxmywuqT#Yql`QcGH zOdy$$E0bbI(~0quS*5YraH9;0_|%ws-JcFq9m8WYcPte@5!uakusnx#;US+DJ1x8mG`0|v_+Xb&Ey3>UCbOI*m#qdmCvTM?U z!)Y!SAF|neaf%2<7UQ}SI;k>;HL4D*`$n=7H(TM;m8`m-CnqDp0Mb`hACVD`+fIOY z@Jk8+rB!7_sncLYkF$7p9lGmNqTu`)UVr9>xURd-EtZ4dW>qUhF=pw;7f+yfoZBy0 zV{n|{{P`Q;{29uxV!w;CfF!gPkh@zC$lb@l_AB4l@K776xy8ZjNlxBA^^hxgA<`d^?<0?Axkw*OFX!SX!_#n3AE*GZ zbpm(c(m%`gv0!$e>+Niu@-OMl;8%(#3~IWSobw|+Hm!ztgytV?7{3a;Kpz5PwIjT`5Y^>JR? zlI@}@v1Y5&Y;~TCUb+|uAHW8zs9ydOUN&1LT(Y*Sr9Yt60qeoYNpTKP}E7;b}8y>0#_9sQd;&|C5l;QM`^1*dojmqA>UwGqrS(fCD6zTNrHH zY1}`8!{%z^@1g1VJ))dxtW?$xNb7#8Tc;@PQ=L^0){EWQ7)xL*7+b7{D z!A<@%TG{iK>GR$jNaYO!38FxIaEa*;)1{PIT4xKa-DjL|*@UHaZS}gKK{}0EXRPYm zsoq((#+Q|!Yx~&?8-K9j-WW%3F?$jMejxZ02)k%xZ3gM%7L7Y{`vgvH#U^?L)5p&5 zLQrlRIKmPPTJ{!_A|5IsM@|>w4~pD)FCd0m8c=>Oi+HnfN@i3`@?m+RA5O&_Mees= zsjdwKd}r?d0M6Vq?EQXc*u7QTXb)W17u5dIp+4iO(d%#S;2vkQgZAJA4K$1eiC6;k zUhOn|^#P=#uHoIRG-_ijFSFOMAKx6P%IFPzJ7{d+b?*$ieEyhnnfpJgz3x0Aa)sdG zLd-~ia_puGAkIJ?s2!^|oAUmS~zq~N?zW;&0@(Xh>4Br-I;{rINcLvD< z)(NV&%~l5d64G++eP(Ht7w3KMij|ri0=j=+l45=Un}Iu-CpixUTKxfPxUvf1kH9^? z)88c|G$&FudvUqz)|?k3M}<=?0Xa8-GhkXJnOLBmY69`D(C+?qR_QyYirmkovNv4h z07R7;QeL8uY;SmqAR^&+R&dUio7~k6L5lA2KiBd85uZ^0n6U2%|4cQTOktJMeo4G2 zzeXBp{u^e=)WOEk^FR562phT>Tl@n&lT~f4zXl8a9utld?q%0jv<)gOt*z-T6Dqa0 zXP}!2F$2M9`Y@$eNhO>QoiHBhLkK@8z%nEMUFS8WZv<1`_-BBhgN27>`U2Lu$It&0 z>L;868);oyujxj7ZIO|d$X6?SBYC^(2MHi6lvjO^QnIV8U$t(mm)m6wHeRB>7Mu9#4N;jTIKgYJVUf7eZdkuFZEgpn`aX4R# z9*;*~tyR(|1+#`}xSbbW6p>n;-nm@OeC;65mcwWQ&_nJ@wXL?8pIJPpS?-UV%}w5_ z*=UXoI=6DfZSOk#4?RX%oVGE3ft`|pO+IG}G6dW8^N5l^V=25W`w`O2!Xs)M%klk1 z0ICVb3Bd8{$*TM~@*oDX|3%fg$1gT01gwORmqe_pWi{S8N1Rx!q-+J{u`-YTTAzFl zCfS@gd$z}Fv);H4hife2P#gbf_l}fxBTDSjOs95*!Y8GUqv$)W<$*n~fXg(7kR4xa z?WHn}VVMsIFfj8ePjGY#sDwr==$L>Kp}8bnNee^PBCUSwqD91fxo^rZB(*+Ah|=92 z;iIWDx7>!@oqh_MA|M2mmUtYa%nAXhp_#2n>UW$KhVxEiR|ioSzbr?AMj37dOd7n5`bm-l z*qw%ALF*vFKL_1*cTl$USJ1_O1s%ixN6`JR$<}`aUW|&hJ+cxaU+OZ59n8>Nq`GC3 z=E@up!KYb`Btj})k{B++Ko-5#vPH{n87u>F-+yj!P87xem%tcjL)=nx-Qjq;v-#1~ zM0Voa+jOkKH)#(n3FJ0R(OI*tOpOiJCh!8fO7#hL8s`MBl#Z_J zXsP(iVtVA9KDWI^NQ^^n{h|wz@`xrG${r1yRV6*)2eR6H%Jpk@$%9*TiCB*6q9vC4 zz>Vye6+r%0j+F5b;2g3FkcpS3m*{yjm08Anq;35^X>g!L3VL{dFA2z#9`q}c9q){ zTBY=3I#s{5)IvT-NJ-;Ji`06~$Y`Q9$m+BS>XZU>Bo}kx27S&PU@DM6dgowp*&S zJcYtnSQ`9)=E8qp^!gtmscLIKr}))@v!`q756yJu4mpieURC7aa5av|lOZjOU?DZT z9RSO4Oa+n^(-IAT7DX@?Mtmt=O?g`oQyKT4ou=lUr^lVA=O34^uLZu_{L~nLt#>4K z)X2zU4GruJrL3oxJ-wDvQLL*lk`qCCQsa%w4})e$!xGL$4?1$AjeYBP zZHH#ZDlom`SciR;8L8`xob1qxx{C?Y&h*yr@gQ#Rcki;htzNA2G~f*TqhvxxxnUN5 z2`Ta10b42F#BHn9J%w1_`jr&5Pym10-8X7G-8&6cNY$J(H-OFxqsNYndo5r{QQ*Fq zP8jgeytgX8?%Pi%zYM2FmjTaE3~L))w19%7{L2o$8p}&4E_1*7*^H{0x^OzY2FcS_ z;z?W}6Rj*e%~SAZJ+BoOs(;aDstfNhie(P+n{dfIH4`axIt!jO1az}2t#KIZ>y1>w zIy5B>&mtO@*_1`6C9am7f8<=`{V6rpo-}ev50S97}`V` z3!p)3YNhlO<|L{|zV_dstf0}xDIjHxaO+8b@GixR|NR$?<`0KJsr>5xjDHPpk^R5Z z<$pw^nw$!b1fp+{dWW_r9t3Vc@*ud44NQJ5{7)ki2%K;{5|~;fCH}0M#rj_I@hh_? z;@v9|`0~HMo{wav%D>nq_`kn&|GN7ypW;*XE2$An62V@qS>L!MaY zqiEIPJcA6^U-Qf+m@l5o>xpO)wpk240z%QS)i>L5V9Hu_w_}dM(47zQXd)jc`Sc+R z+NqG-{hcnr(}(WdmvT(mjh0zju)8O9(hhsKfvo*A&e>(0`e=$NfpQW%va5fx7l-X8 zBiO4lBv?QXdz(8e(AsQT$df%rs$rtt>^iT4bDFMS*B6gul?a zIr+p58TdGgUPcARZpk|O6lgxeNQ6PB`*fw9rlwHYc9LG)WdK6IFcH1~nl6vzMyyA(iXf2Psi2 znN+vT5;O7xSmR&OU9Ld_){6np1k1TzM%CyYkQng{#tf`@L)Pd$VzAv=vS_4?`oU!N z6){TW+^OR*gxM$Dh^f)ke3T3l1Zkd4K-LirNIqmY>$g)V)FO_}Tr;1#Z$S>d^M41w zqrQ?MZ~DU}>0*h;c{{#qb%SvuuntHhtQida=8PoB-$)mEP@ zDmi*UY4rQ0G8ZBN9yK54u`eb4*Y9ru%TYQ7wA|;NHoN^@0LopjXZjgwu}@4{M!|?f z#?=R`AK^O*aNj)R6OH2r$4*-KPIQZec%aBLn-{lJL0^jB?rR;r=niW<6A6LFAzCP_ zF2-J3>m1vIH7A8FToASySn?@ZNo@MrQ9;o?D3v{-$Sd)sXNuPW-Oc=ZEYVHWMl9_q z!wnt&_U{k4e=d%uTAyE7zfg$d3x$~e>upytv~{rgQY!w(??1K+xc-~hs=eKR)a5GH z3ja1^JWr=bRjpVBxd&6V7FL__3}-AXDQSgGDvW&_WJam7QXgyP)>8K=jRF@z{_)K} z>Gq=8(xMv5FD>2sI>&pe%iY}X{o@0r7skdczVDUR>}E!k3I;S*mY4l@P*f}rQL^Q{ zELN{vgs0J$0TIzPU_xfSVL$`q)TwLkpl&~xGTa5xB!}IOO5e?Zk=v*^zoGnS@;4j z#!>T=@F1PE3B*zS=XkqvJsx~-JI=aEz{PKxRNs~B3O+YEc4r417kI4&r`AxdfZ|hs zicns(viFf5V^BxK6p}8L^GQdECSequ&Tpyh*fNxG5R84wS@`T@w_M?X=*#*mzw3H_v9J@3UuGMp~E%dYlT1A)!Rnu*GXd{?gW0~E>;-Cw+p`BOLCck z@arm1#G>A}26g3S13bX!Y>E|2{r)x1mq`5fps&I zGj|T^fr_IKk^;=3j3aLlg9O4mUSDorH}QBwJ#T97J!j>92xoUBhO3X<2p68gaxIg2 z%2;#7L)%o5T9vx&wi6!Q@UMMD%gmfoUO1(7I&?KL5$mODRF)h*&+D)DTkjt$NKZ9` zfvJlmt?~&|X*;1+tODjXu4#?pM0p`KdCF-D^G@ASDxE{$mV$lL9EqB2w#FJ|MrHjH zk4qZe1*G5Wc7EC;9IPAc2i(AGZuM>j2kbcxwYrQ%E0-Rlqtc<$YS(;eHJ_=6%x=#D zaT1+{afR}Paw4r`YSar^`rgAaY6So~L-fd!kCy2}m;Vkt8;xb5>zSJB{4J{O3Xn z*@8bt%Nt`KjWLmkZdrUBnEGdESO^a8FmQ0bn!Csv-P49BMEZy}0xVD>&?nP05nsi! zT=bKfY(94__10&88Z{}kM$>dVLn-|OjZ-4y@I`$K1yr^CV)_&uf%+`Q3^) zt>xS?@_56FtOlSJ$Z-a^Nw{Bs@(c*L_ZaF0>#W$F`8*8-Tw_1u>$BZp>vP=r28iE;0thfMhcdbSEOthoBLKPB z&scs_^{9m`&8MiVB=WPp41*~xH!`5^`}lB)#tBGjl6-$$DtxMKluxyZz)lEFs9MfS zH>7fSuB7=4UsTEyn6s12`O2+My2^>vYGscF;hIyvJF9`w?K{y}9f;*}Ul2gnIl`~i zBWzC^z4#4I?_8#_h9ljjv&L|_`Zze0M_`TgXS2FHEX-`j(w0qUz4E-!eXT+BhK-_P zINv}*Ib$_8GH-Nyj#_5%?n54A%VKa;WWCwWiDRk9T#HS!){0VtO<(vaoFt9-To5Q; z7Euh@xM2f6k(ITV!~1uaY&|vu>m2^7i~5y6f?mFLeE{w)x=L+}F4V`LH<=2WL z-1HUqZKJXl7C!tpQLdM^sthIe$}4eI3zKIexXl(XB0EKKP`3@?&_vzI3ZKQ}h1z+7 zq4LkoNPtRdy|qq6$&ig3hfW@K&;%{&*-y?*hD7z;`2EqM&~>eEtBWiFb2#dZvhC10 z%O+ksUQ^uq`jrKDVJm-y$R2CXF8o@sqvzy0**nnXX2~M~)bi>h2;!A#5^zeh7F@1^ z;C|z4rP-Y+ET?x`at5WG#~QTUweq5a=abgSxGcq&oFB%W=33WR=V;&tSY3GMzFIcj zQYgk0J%P<419~%cY9-V?9@>YVkZ#?GeNz&|D=+dtcnl(b9*gwxiM|w`?5|DF%UBSC zpImi!M@-q?Gtw(pd`EEH3=s!D5z5NIuGPn< z*z7~-@jTAY;VWN_FgX=jaqOyia6YS_4}Sx<3==fc0o|2|tJW%cPt@j!d(530pGlo5 z+GWg#{4|znmD^9V-hK=Ck-{AsQZ3MY{Sp( zJB~N96yFZ?4HPf}%;wCJ7%@Sl-V*7z%t~i49Z_(*$fm#eI!N;)D@CU9v4Ojf;pb*FXJaHrDT&Q6CaE1PGV9gMbYW4;2@r@{QIss~?JuboJkm-4BQV zG>?k=_^lsHO%-i^uF!WO$L{^kVLkPcnc+cN#chomt=c5z#naf zo)V;h5-Ih3ej2$?z$u?4j`K167HX--^AU-{83B3u0yAu(lBDkYYdWrCzai;N_=I}# zl)Js(5RXoq6V@kue%Ildx@WqO#)Fx|$cO8#%7v~hUekJnn0tGOJt7Q!kOVvKu~wHK zBWZ`qB#;>P{;OqNy!cdC^%Q3O_we?Aly)ZYP>~EQO+^B762^ER~jb zFqmS78EZw2l#&*-Z;H{XMf<+0kQPN!Dj}&e4l=-q$EVyqSwgZ37%^bn=BLG9WY63 z${04o=*yXRL(48*vtIZ0j^1J^?HyJNK2&dBaOupqq^#mUCq6#mP+onE=sS7T;LH7H zG71?p`{(vs{Vlnf<}>I+pHTxYk4d+v?N+!XSWi0add!Q=@Io2yz!>l4`!sitQcg)T zzbXG>j8S2#-NM3*7n|x%7xy)HTb8@N^!%*g|Dt5)7-x?evFu@ePQy5(sg>!TJuM3( zn`c)=`rVKZka?-GjOu%8Ax&9t{H!`V+u3`<&CLC+zMKzo2rU^q^1I2_lhf52PG8qN z7clcdblP++Gp6$8y?Omt%F<(JulOMMeEXd5^yRyDOqXS>3mLQ}b9V%<vYfvb7*9OIn9eynujSNhS$@HfUuUFPmqTI_@H^G07@)f6jk{+}^*=dr*;ogIZTy@MB-b=IDtU+3sa7W8Xd5 z;Fn{Uy51qM?P5!L)GmeNO>_I$Ut=$IzB=hpd7u8PdJp*GIBd_p?)Jlz4(zDP@9%r6 zfwA&Gt#i{2RjDSbFV4Pu@>P5Bq2Cu8*Qzw8E^v2g+P*Zb{>25OU40HT=D0IFi&gr_ zbt`#hy5fxg@uLam3HBaqZugz_*=*Rpj|Gnn#;6X>+qc1R`x7I%$!}DaQ(N|MROH^R zy{Bi~`{#$b%9SYxUiXe1^HiF5p}uk{Gn1A1MZ?jVcW&5}%i1?9YgV*;v0hraWQ=o0 zXfu6&_T@{mLtf9azh?ib#YQVZE-90iX0?Q}>qNg9!w+t=RkX6+&69S}y zI5lBA&%|8KY<#J~^(|TxRb!wqzb0Vg=-CF_GtR&EdH#7*{i+{!7uU8>3NEhwFtc=y ze2&f*!~JJ+=UO|t6}1^DoH6~D*z~IHPfOeChv{+ajhoYtr>))c@nG$d87miXBYPPC zxG-u+ZAyK@;?)NW?OK1uwuOXzv$Xy5PP*;my*&4*05zQi1uePz9H~>z#x+Vwm+7r3 zv{t+MtC~UwMg3y$xHw66pCT)0ana9=y_SIoO@)>!jip%&Oi1XYJVz2 zcfpqEslA1>Q)>HqT+D|_LKA#P;7k_gF(M1a9wtt*hiY2y=hXY0D&6MeB%4vwRGOSzdcNgIX>Dm{ ztM-Nb!lMqcvO}C?Pox*-*Np57?Y=Y+2BXa>jy7G z#tyPGik+^no2R=Y!Ng+eu{#zX<6TtVDOp&Y9=87ZM7c9hGDA$0>=Nf5-;>wJep__J zs1MspKHeTB7x(blfs=0*wJPq{;IKSp^y$}e^>b5u|60H1;_4ppdxHkOigvH< zeeL?9xLa#m53G};?#W7{{U~x+Sd?3tQ@?$#zU$VDo>H%U^EnTc=LN`hDzC{YqCw>?j}T z&}Lq)Z#E^Oey{w`Z4X%a9y-sNdQ&{6P+uH3iPOw&3|{Cxy`@Yoca!Hy3#H#eS*uHK z?=w>2sMb1Y6WXHfp?#^O^IB?tYgje)mZvma60-h^RN+AD=HF?0jaREqKNvKu%t7bq zy(1>}>$eoxkJl)_%cj>}Rcy{pKh=GVvH6{xJ+E4-7fhMJ`e1UH^7Y768`+iC8y1c* zlN;)+dt1wX_tB@p&HJMQOw%j#lm@<@<<@BQEX~=VqHgVEk3)GcyOo(~Y&Oj=pD^?7 z&7H%Q8G+knJkD7r^rg8*XKOuIsw@}~W8V;-=XWfN-TJuaJ6^`0kE1MTsY)4FEI&p0 zhcScm7v_&nA8Y;kj8DX}+Rff}{=;_YByW*9neV6Wa3*)2gKh2t1MPl~-M3EK|1F{| z+0As*7Ps;H7&pJAwPdAwMIRjVvB0Tyv}cUIXWoL;p>a$-=uKq;+Z zrSB&$u__B$F1K@-(T0rN6OSx3*GShS4AQz@+eo=-`aV+nfkNou&`mM+B|dNa-I@9- zp>mNFHKqG~y@KTXWx_X%>&lhfJqXp;9jX#%J3xCive-HuKataR5$V`zyFn?hOdgo8 z^t7_IjR`Y(I#JfHuT1x+r|1T^{T}&V@qc@-TZ>o5hCR)bO67*OmA?HQFFh@&ap#Bs zY>vEd)$4WZ$NQ2K#+#i_1gFmIzEV~tW5rP)_1K?F7uRS!nAu0w$)%!2{bE|e6^-vH z(-%ZNjxTb|)1|d2wjT8MW+^e#%~PT-6e%4t4qvo7uc&9%SH@PG#+}wjd(N8X`)21i zU-j89hrAiuIl!t09bHJiSP4|cCuRdCRErDnXh zmkj4)L%}qC87uuWtjz1o61oq1Y0FR#=Bd8AI)l+`JAKyh<0i&IS66TMrTd+qn`~x0+NE)_ z>#bjJbTb-UbS%xT?R9n>(kJ)LLi_PA-9H|$3)4%VzVy}?Z*}I$=Sr>TPWc$S`i8xI zvHbJRoYDKX?LBMtsPWLNK!cvS@2EYOoE_3&Hz`%Ic;@bfA3wi)e5~z*_3wGx-*~H0 zN4jqQJ-bA=?~J`uZfTuv9#^YK3*U9?qi@!+3zbUWo8YAJ72{w3%?hp#YAUeY_GP+d zSj(?Q)1Rd$ss^9>)eX&I52%$=%Y_vyy*?DmSa2r~M<3gfE}l*%5wuu3(}cwiHKs+< zg2LcdOd5v+rDcu4fPx=cgrp3!MFF;-t-E_I*HugzarMySW;Jyy5937MbIWI+G7Xm={B<-$Dd6HG>3#i7bo-${ay|D&E?nG2xS>6BKy@CHJ_UggfX0t86`N_ByS;Gc4*iXE) z<>{Rl*S^}BUdD%pCbJCahK*S&r+Z0Do#r4AM+%zZTt%`W6k zugQrCcu3nMKl<+e-3GU&r*;1n=+V3P(#zDVx8Ilh__=SkG)gI*xiiz({fRGC@qk(J z!bRqT98RbhZJB$Sdb8q8r2J=x+gk0{IdlD&_mzBE1A@x^j`{Jqug)VxIRV?)15-)ef5G&+-)0Ndgo z{^`4A!KX#3uZFBP{QRI|$Xe^v3ak6;|7;lC?6Ex6r{Yh;m(2lr zYla?wdBAGx@0ptE8ubng!;fcs`NdnWuaQc+x#Cyykj0hbe@qy@$Zh|pNm4PBKe{aM zbK_XsF!dMTv!xR1QzojjPBb*_v>pHS8E04X9QpC1D_vF2UTU-4YT4J#JMX^J?|?$j zH(Bhv$yTdcCak=$?c<-g=W|EY8==j9^>LX(H)!tIrP)^hTeFX3v*0Zj(F{7rZG>U5 zyPLc5NB4r;&La%>mwVhXaee7(QerZa();DWytldobo4@bZ*+$b7&yEkPeqr#K5sz( z(1ur4YK^Lms;^~#S<1>R_?{Xm1w(f!X_am&6iUyY6z7DoUdBJO+ovp!5bxn{1s@(4 z`&AIEJZwE(oE)h>#vV?5LT?~Y+6jV3;1@{tbbR_!-q}8b-vtD+$6(^nCK=4cuiy0KdX2W&lq>n>1?jmwpm&YOKlF9Ac`CPkPZr2CdF_G}H z1=6Qcv4;gu2g5B3qz{4-y6F4p?}NeN|L`0IhNOJc26M=tL(Qu%crKLA#NMpJVsl8v z4(Zgk-Aev=j|Ve2(QHOw47OTJ%7T@*E=A80up(H&bn-&L?Y>=}ydd;sP&g}S9;sxs zuHOkg&xs*-u>qaJ+ijo@V*tMC`FdkdGnjK@BS*v^A@^1|7}>>s2md}E!vrrNRUCEnM}kM8y2S@IP2Y2@ri4cdSN6#5Q#PC2p6H*j`@#=#LNo8?B}6O-^F3J?Rc@ zqJJkFe)70)y22eCy&OFq96jw_9I3*HWKS zr%&9yF*F7Q=D_!_v9YJC2X=YdbBYfXK z8%JZn-812I9os+%74Rk-!Qb%bAHw{RMIpC7LSIs?%->7NjNUR1T1K0ZFeD z13!7(C!_F4EM_nR>&SF$F;~K8wxl=PqyzIJFtNRhogW@I6K?ep)lR^~su~|zYOw3l zJV4F^q-G}%e)71qF=R-5+NaktExACN*`1&9=mhe(y)AHQG0bQORvgnEI|xBm@~q=$ zl3GB`KyXCxJi&*@{c0&5OkDgz#wuAS;0_b9=;(=Jx%f6Pt{<@48`|Osh_GmTJO{&g zkxJ^!<8`Ee$o>5Rr|*d6V1n))SC@3!fybS0Cjpzx^P6UsezONFTm{v^q=FBRJHkl< zG?ft%6ON7QsPqULGnx@Z5UtKPFKT=XVzxsG&}MPNS-cp0i)+N4tiU$;tq0Z7Uf~N& zphPV0!VF;@P{58CYCu@M%oZUo|j!RnYf7}y{H6o=iO zJ~YleeE2I{?SyGyoHbw^G{{s1iHHAvFO9G@l0U6kJR7*V5EapmKOsacmss6>y$l_@ zfgMi57ow}+!{ZKNh{w{|5ez20O2;uihz|B4^8Hgs+&nNL)$r93h`8U1AV81Ezr@1cLENU8Ws~B44oS`0L2GB3v8Hxy^_%o%waAt1ZLdD z^KpQ_Oi1zu&7%jm2NTS#>A_;EPs1}SA_vH2@YO~nbYF-g!5_xUZ~!%d(x&tQ%MV+g^A z$Msq!nH)adpAjAAj>&_GGQQnDZ~oEH2vR;lmI93{RVyTtgpbu96L=UK|7JqC(iEtV z8LDR{85W=Ru}HB^8_aP96tzQWzt;VwLCVaY1^sep$L4z-+wgF~6>J>AE-&Q&!Izgm z?W4uP4(%Ngd5N3I47j)jCLj`syi1EPVnBmXf?}c__o}TV zBJCZh^l&=%c2x;vOzVHA)*mWlILw)`Y;fm?$93KQ?=tLS7~#Q0Ze^4>KLINa1&RYx z6~TwcZOtJQC84z@n`)Jf1N3rm2sHj5=aJzBSy}J)+!5bq_&&hBvUJxW2qsyO6o845 z;KSoi+Dk6tuLqKv=*|6@`I_Kl!4OZ-!Ls#XaseC&S80hTy*2Qg@+ zpC}*|!yof7Lpv5*CGancYab5=L8bSEnn4E>6UpWirh|aD zP&clETEQYq^9eE`^uU-<+y6%cJ&^lgv^RwPY2Xj29ShEo%5BfP{8KyarAjT2h9a7R zcUvPxoPABasDCqOZ|TW56u~{xz&+4VXM9tcmE z3~T#?hV515Me=&NrX8wVC{h&6vhamh5u9Q(x{#~v>YnwD#%3RZVoIJ z5>}E+Ff8UVq7x*oto1poYNvsS!6J?Lb`6P`|EWd&*K6a6sUT|(Bt+0*DYu?fR%qzo zji3_vh0W~Nd;fvy27oM7j$tE-9GGjd8A0CkNERDLz7oh;G+|rOERa(x(vE6hlgYuB z;v~RxPCPwT3V1dY1C6h%-;&{DV0sSr`ukpc31rD_50R#V9GXbgoP=p0p|^Q-$U-~c zE@w)GCsi-Qmo$t8bJ3vL>Laa)_K`#mHZ%5x#V~?Yk9|Pn<|?pM5R?GB%fIu(<39aN zMumI*?c1lB9kq==#JZc)?V1MY93>LZWWI`_|67kU#7kOw9~8qzq{8Qa7bEDOeYe1P z+*|B5ZRQ{g4bqE#ijmdj+I$H-OWC$9`#uOd34+kk){0hALF@=xxTH}mb>Qhx9}rRm z)r6Mj$R83Rg8O0I7|h_#84BrWc!$&Y?I9pY8cKuCe`Zmnh}+3OjRwAJY7Pzt8FC^P z8z@a8Kty;16IS5$-LeQznJ;;F(Hu$MAu)_v>eZ2(jW^K{r5m>Hc62ugT&L#;z zd0c5(5)nj~l1t!go>luV%m#_r;w(B!yw!_LV*3@3|JxjVfvlfF7}!w-#G($74sW0! zwBtVo=M!aCu2?_w2D}?h`a3I<$>=h9L=O532jwF>S_0I7tqTDTZK7ZLlJKc8q@f2( z+C*Kw<|weB5HesWv{MdNCKd9JgIl{sr)4f6W0}aD+o(U89C}m?Hgckci=T5BrB6S9 z1ngx4b33%oZw?|80i7}&>8QM$3Gfzx zqe+9s8YFPEm+yLYgs?{i$Hb;u0lZGcqec!DFTjh%l4N}J>Pq$0C=kMeNg7%Wm$XQP z_|qAoVUSLiDBf?lGEe;i0Bi^n!%}pJW#Eq^co|v>yMR#+UoC{SY&^n3J9DD^ zbk5`nbq4?gtw$(=hfE;Dp>)#41=pEpsmB2C2i1rMp5TdOBCr=Gp@YO~L zr|l5})(~K&!->WAp>wdYHa^esmc5!bgrWTk!i#M49lG#3c0$?)!wLuN+<~sap4Fb! zVi6?6vsdm?9}7;u5Ar;Qh@SOsqFnnh8Z#6-Jsd+9!(BXUHzyOSLJ`<{2sd!5DAyUX z4dQj(mjB?J67b%GuIPY&91Pe5wbEe&j46qxeTIFiP=IsBf&Nv!H37~9w@0;e6q~ig7Oc*I}LO+mK}yv zrEr0|nq8;sA4psCKIIh+#?U~XLAQmv_a_)`lF7i=*?&Rcncff?yuroMs{IdoE@2)u zj1?zI)$TBkJDLsDb>J`>0t;i!_(*ssH5l6f3>6AH#i;aXFaW-SwC)!iFM~l@Q0T!3 zDlJNYf-S-Jtnio!CN+{CL=5uw<=GAwO@T0Jp| z07alwznr5D^AGI&^pxTDte}u0(#5 z4n|wdIDFEW89Cngr0`V)Buji!mHru1Qz)4yw7p?SD}J+vfYhFLNr)uair}k|XvTp; zMne^%qmF8rn9!q2f! z794^gD(t*8t6tdr8G8$ZHlo+jd{Q1k+=w>W+4vOnn;^oIJwnPOh#qIPKIK;fZ@dU^ z7>tfd%1aPKEbHzwY`|ZNpo>GxI|(*8k?;s2NpjR9smY*UPhg_CoJ;3Oc^#=P9exLB z&w?$g1R2&guL(0Bf$M}r3q;*4M33h#}Bu7TCmfRBzwSJe}+`7IJFI_XHU(5_lL4>X<)@b=489|W_rHIUOlL(qywd@_F7pE-r?c7XVJ1SSEf=Tx-{ zIB;sq8MYs=*a@AEPB4&iwNjjG38Xjb!h1Laq@+6=q$sWZPJ7aHu`Rdt)q99+r2t2p z&C_1uz|?4ZB!1y5*i|#3)wSnt^^j)MPl1Hm-oA-;!c0Arei1wV&=-t~=9`rJk}w4s zbW$^1(_g*{%lrL+0E+t4%YKr;iA`bgrKoQr#3c=I0#t3Y{v>2Uu9sBp+a{B*=z%?A zAx@)x`)^p^?ugT(Zs8?JE#X(+cKefCVW2^I4J{19so=xo&ebKs5*vZ1vP2^Q1`0F4 z2xyYZdN?rR6UpHYtQg1?iNTvK$eQ~LboT{3x_2mH*$E#5MGSStG;}6c_i@1f%4;!wiE*yBr?-<@FjAdSofS}Bani< zi3MG~`R*l=6iIwYUh&6<4HG|=8K{5+G(c>cPA)+L^_3=0o3#omz#PJ`7NR~KHg=IzHGVKV zFa7G?L+B#zL(D;^f1_x4IEf-;RHVTmYl*W+c<9hgHsfs2r5oJ4_6bVB~Ehv*+O=Al?=smd+JJB$x(9 z<%d;ar`@K*Z~%>?t0Ke@vCZvb+*5r(`n!WbUk%$0QTH6l7DM--iSzybtc{M-0PY5= zqwwKzV&ITq6{Gp`Z*LFx0(b~4b7&*pP`ZE|j_>>fQ=e&>fuH_>FY5gL!9_&c5|-={ zXviVTF<9(BdM5^eFxC(n*R0}70QI6V*!bFxdll$68k%WO$bzHVx-Ah8CUD-hdS7g@ zrOmrwR&=m*b(wgm8$Cf_5PYRgyzc2w1Yd}Q^%Hcz%#s!2`5usWXJA8U@q%96l)v2* zqQnGdy8!}B3jiIIeFC1Yc0!%v{}7w{W6 zV=egbxLG_Lq`>AnQ)6h+PBF~(kcW>RJ4CztHvnovC8OwjF!2`Awkw*!BuATsjJJbL zPZSu!2%(kskfN#dpfGYY+;) ztldXSZeLQuxA9vw<4<$IBNV{Enh0}aJ{c3A=DO{{-agRAcff7{Ere!%ii}3kh2Gt< z^{5@L&FU4mTFWDUoA(8R`xTuOsxABaHw6hysIm5U{u&C%nTJX{8#^VZ#xspdkccN4pPO_ z>hPE`Y{6WbV3hPuRW0m;6ov8y9-2SnsTNDGJ*fmmc$ z`J@@%*;!4xJdl1ZRHX{R;7u2aDc?2$rl{PRyDJz2tAgYgCa z*zn9R9?EwXLLw@#zf`w>v8w1sM|^=Zeuo`&1fya%PoWcp_mKURyc+T8DTy{Uj$ps9 zU_U>^eq7m3I{&sT+weGA0ta67L>J^aJ2$(*VN=3?lVIcEV9k#PKcR%Zp>3eecziF3 zb-*uPmiADo(-6+Bp&1(?#aq+|MLWO_WHtl4J+8AT(mD`@VB7RM_HpB*ZkZqaiWM{p92#9IpQVPP zrbKqu1>p=VHgZ_*V<=K66b>Cc<_#u>b?nXQI6aK7Ci9`{3nugclSIzI7HddKCUv1b z!RNZYfS#+5s&I;J`EX(Z9oIRDx+{Jua%8_hg4j!GgSLg{m@k@%74mn@fv9_b%!oEw z07eRd^;Wcd?`821czoRtmR;_x1Z!MzeT5@m860Nm>bPAUMN-S$O?$fOHFyqo9L_?} zD6q-FDA?&OIi0jouTuD}z7|a1Q)H&fawQhh^?0A8#d)*-q%ItQpuiHhP(zsRMJx$D zs*hg{8~ktey9x%H4P-P+yARIY5g7_4lNw2Gt~7HM1cT?$rJ|AVyf?7`!6G5?HkyeC zXHhxeK+lzgy))BK0&r*W!8Z$4d2uN$UQx6mBI+Xk<2{EM-ZACENB%x)RaA@B9~c8& z*e#-wAjPZ<#-G|^UI_}q&LN>T+6+hAi0bce+b69%Dci{bV*#uTme4}1ph72xdlPML zF5hx;)=)s5hc<(DGb=&}arnmEXVzx)3N*_GW7;B`)rFy`_RYF5^TK+rj$RdRmGAnu z5}ZpKUbavJd)>e?(0n)|&S0Jd;_%DxIR5kVbZ}m87#3+GP-&(x6g@(P&x<@cV4xBt zFA`xKfQ}{aEGFQ=ncR*`w>peqN8=D=3rf4^yv9!c4TB~(4bj$kB}#|mu#ox$lmDgv zH9XlrN-&yqZn)j!FjPV;%;3?@F^^VDt_8mRwq0vGiEVb?1(8Y}{IcpZ?@9F`plTb)> z)bJo5W$|zDCA?b#A8&k)HwqT8+P4^^`CEevc=#zHAsl)%|2oSqzf0f@nfsEWFM*^R z;1I)*3VVJ@j3j*3Ef&5%g6)zS3-yMszCMRD^<9H$``yIg>LV_^FG}2 z;ARY1Rt_wSo-Lr2h=+m=IFRC?GlLS`VdE;ny;d8>{x{eTj19wKp&^>#ZSgXkusdH} znCDFzI8O$*F^!15)W|^oqb-pBVGvBXAq*lD3o`@GB9x7Q7GJ2!5XYM!&7~I zNW0z@Qe~;ffNltMv=voBGE^)bt!7e57WMHh*e3Tc5ZHA=V&{j)Z73Hj3E%2=6BQL7 z0fz?4ql3!Lk3^9^ta)^%S2!(*9s$QG`SV|URzxI?P4{8p3kX!&Ux%%opM}{8deg<# zD#-+3OMuv+WIEdurtkzAcPZU7cD_&s>`uthopynrJnrsil8M2(Kx`I|UkvJ|Aw8kn zqO9#9+?%=8NXEyM$7lDQASJH}Q9c&Nn%0Q!y7dyWqdCx9hr=}_SPA$(gfGi5xoz`E z2o*n|I?##Yt`^B;!D>J#K|wZzS^F-5PV}{+@F`&RpAy15NXcC24%JxtL71VcY6)*i zw1J~o!jS~0T=;b!Ik?qwC;09tsBX1OEB7;8h*+YTUH$kZexe&>u4qs7b?2l z9GLPF)N5^axAU7}UqK}#`OvAczNIAc@ays6hC9&SWb4+dQH(EVDc6gLF1QNg@63oancDNvo2)BPx9Jd)P`jL+-YKKY zn<}tQ6W%!y3>SaGAP+h>e9cel{)jgM>P_%QU8Hue%)+5MKq57oPCnUbd_2Qk2OO~! zuqe`mCJss9gDKVdlMcYB?dBli!Tp9HF-ZJkzbTrRIUXu30UQmT_9QcLfbAIq9sBSY z7|3Fi+4ioZ{V(zH+F$RE&xMHb8+1nTec~j*Q=y-wli*WNm0w*67RG{-9#Uz)7ZBpx zdBhpCt()mR*8?*i8VNeQDNQ0|@;BK0wF`;P=ide?L*{|99^4l-NdZ?3U$6>Af>-q3 z_O2Bwa~wnsbaUe3r9^oAx;sX>H4Tm|P*#Al!x7`2+(<+c>|7uU&Pg&C&ZYx!Gx!`j zM(wjjJQO?W7u;TWfgpU{c_uZz^+1FNSORT&@8N|g{O-L=jF1Fre0}4eY%s$e_`;w; z@ZoX&cjF*Cj~)>j!l%h-*gk-~fiIe~8MB8Bhi%8^&jYZV^YKgn)Jcza0BQ~!PoZqX zy=3f8ow3G2r1yWxcP_zBB1)METkC+tY=Y=E=%-l5@8QGBr%&{)9KK)w+pa4 zejkXidlyK|COC)aG_KE~v%oGAVRt(&F`M8_qR;O1qX`h+vLS*(lNNk<++L?7W#fCv zXV#vTaxhNyguWVGlq`kU35c|jPUpaf9a=>Tdjl|C*!QC?P!gLI_P>q;^Jl}$6!^U- zKJSs@tihWh4O{^|ye`6<1baF8%5{C86l|hwe-%o5FOQF8dmDtpo$g;@sLF@{jOcPb}J>p~{Y=v*rN_1YP#e}hOP-6}qZHXLVBGH!uda z>l{UPQx!rw(%9h%j`56WG5bU6H02t!p&ckgDhRDFD>WgcGxkg_m48RZUR zxwkL{N2lLgwS`zP!I=kx=C1YS4ezLciB)?%WOK0p0bk0RppfQdh_Ct(tWhIa8H$yH zkJMN^VF7j{h#x4ABBjlRko@xNzzs;?2klJ|AGugY+1p!j%igtYN zHC<}$8E^q*Z~?SUtgyxB^0g!l2+Ovqbr0_j$K0V!pvtD&;X|oHciZ7Bo0rsE116%B zTxbU9+WrJ*0xmcOa-khQc&*Fys)b;I0*Ewdg*|o02V-Z(g2^h(utmYe5W259HjoMf z19yIS+-;B~=;#pmF_#Rpu6l8A1#qPo5Mt0d)9)}L4D?Q+ojU1s{R2NEf?x8H=j8pv z!K6I!Ep(_705yoH*%gTo3#e`EmjYSFZqV|zI|~dyd0d$&AsB48kA<5IsbMhJh3o3^ zc}Ce!wN69S7C z0(x{71$C4eGXe37na-bH`C&R#?oJ5%sK3p*fa0)~J*-;r)#CUnOyfXuHU$D5+I=)% z>*U0CzLpmMl{ffc*2~?{QeglLIT1aiy!kMh=eH^WxZ{RHsOgT&O7WS>M=h+e*Pj(b zYxO{wXRAb*=<0L_R|ofR`O%Q{vfc8@_K%gIINmUqLA&q7YRN?77wMj>>gW(CjxDrz z)J&CiBHWHNlsGd{rX-(`0%kP$#(2a`+}9#ZcUISH`gPDhb}kIz>n;;x(RUrRxML6- z&`A9Fy%;(8K<${%J+NIFSWX}9cn>#=0;4&k&N6iSXgO@KIUj0lIAR6UFCwtc8}VU6 zOA3{%$o-KB9nVi4Vbk7fC4_eRUZ;@uKa-m_D0S$8xnPOGFj@prf)9_oQ%+X6^&niL zN&f65T3dEQ+i3zSWH|&M9@o8}2vM;0vr|i=ncl=5C0Q4Mg&q7wrz7K3B%uB^H|f}U z(iAsY9dI&t2zY2CaqBN$3cjzc%Kv@70GP$#Ypw!oV1fPEAQ2{{!r@LQdN`ygU`gli zPCA0W=f@L#xzn@Ek~@!=p@Y}b!4k_QNxJCjCZDJWx8Dc@5wt5z8!9OuUvv4xx81RK z57dJfq2$xU3CVwLMWS;d%ke@wR=3cy(E2@QAfAk`tsx~tc@5NW9dOZ=$0epB%=U~t z7W!Z>q0kF$@aaa67Hov86)9360j2+EPD;lTJYA&W(?8EP>2Uyj60V{b+PgDuEGfMU znY2O82-D7hEliYC;QX41^P5|c3BXsk-}g9H8_;Zl2D<_TA0F2c<`F_&Jq%cbSrLM@ zC4qn7qf45uUfu*XSPMfGG!CwrM1U4UR2=od8oRMD5Jtr)k>MmtMB6*Qj=^_S_l$rx zf!*YS4xzF=aEOuNw1mGsxK8_RWcc9c7nf{z1#kv{O$1iOLX?V^c(5Rb~_;l%&E;PzNb{ z6I{9?tSEQ^W5h_ZY$B2yV;#)BJ_hESvT!9)_dF1#V zFsdb#9M!clOdJ`%^q1V;XQV>N!glFvBc*>ePlPDJAV^oPbO$EduntAw^WaoL z$6$*z{9PC@DR>}255X6$a2~wjRj8UefcQloP%EXD3(l?w!*_HC$`_U_#lz9ZcBG4^ zlSu?Emd-R`u|th%k+h&N*kwoKa3Bz}X(sK`Mh3Cqy={@K2;=yOaN)NytUfMN=muuM rMk!W^S(`ZjlMHx|C~TUKhVu+wZr!2wVRMUAx|HDxc%f=E_Q(GNF?mt` diff --git a/modules/accord b/modules/accord index 3562bb3c9ce4..f78d1da27b09 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3562bb3c9ce4e9eecdf65e236e968ef3ee9e0a86 +Subproject commit f78d1da27b09f89417dd29bde0529f12cd744e3d diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index ab80ec4b3254..d6fb1a5011fc 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -70,4 +70,5 @@ public enum TransactionalRangeMigration */ public TransactionalMode default_transactional_mode = TransactionalMode.off; public boolean ephemeralReadEnabled = false; + public boolean state_cache_listener_jfr_enabled = true; } diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index c0c739ec827e..1e30b74437bb 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -212,6 +212,7 @@ public enum CassandraRelevantProperties */ DRAIN_EXECUTOR_TIMEOUT_MS("cassandra.drain_executor_timeout_ms", convertToString(TimeUnit.MINUTES.toMillis(5))), DROP_OVERSIZED_READ_REPAIR_MUTATIONS("cassandra.drop_oversized_readrepair_mutations"), + DTEST_ACCORD_ENABLED("jvm_dtest.accord.enabled", "true"), DTEST_API_LOG_TOPOLOGY("cassandra.dtest.api.log.topology"), /** This property indicates if the code is running under the in-jvm dtest framework */ DTEST_IS_IN_JVM_DTEST("org.apache.cassandra.dtest.is_in_jvm_dtest"), @@ -572,7 +573,7 @@ public enum CassandraRelevantProperties TCM_UNSAFE_BOOT_WITH_CLUSTERMETADATA("cassandra.unsafe_boot_with_clustermetadata", null), TCM_USE_ATOMIC_LONG_PROCESSOR("cassandra.test.use_atomic_long_processor", "false"), TCM_USE_NO_OP_REPLICATOR("cassandra.test.use_no_op_replicator", "false"), - + TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED("cassandra.test.accord.store.thread_checks_enabled", "true"), TEST_BBFAILHELPER_ENABLED("test.bbfailhelper.enabled"), TEST_BLOB_SHARED_SEED("cassandra.test.blob.shared.seed", "42"), TEST_BYTEMAN_TRANSFORMATIONS_DEBUG("cassandra.test.byteman.transformations.debug"), diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index e32496a17262..b026ad5286e9 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5370,6 +5370,11 @@ public static void setAccordShardDurabilityCycleSeconds(long seconds) conf.accord.shard_durability_cycle = new DurationSpec.IntSecondsBound(seconds); } + public static boolean getAccordStateCacheListenerJFREnabled() + { + return conf.accord.state_cache_listener_jfr_enabled; + } + public static boolean getForceNewPreparedStatementBehaviour() { return conf.force_new_prepared_statement_behaviour; diff --git a/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java new file mode 100644 index 000000000000..411c3fb5ece6 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java @@ -0,0 +1,721 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.zip.CRC32C; +import java.util.zip.Checksum; + +import accord.utils.AsymmetricComparator; +import accord.utils.CheckpointIntervalArray; +import accord.utils.CheckpointIntervalArrayBuilder; +import accord.utils.CheckpointIntervalArrayBuilder.Accessor; +import accord.utils.SortedArrays; +import org.apache.cassandra.index.accord.IndexDescriptor.IndexComponent; +import org.apache.cassandra.io.util.ChecksumedRandomAccessReader; +import org.apache.cassandra.io.util.ChecksumedSequentialWriter; +import org.apache.cassandra.io.util.FileHandle; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.io.util.RandomAccessReader; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.Throwables; + +import static accord.utils.CheckpointIntervalArrayBuilder.Links.LINKS; +import static accord.utils.CheckpointIntervalArrayBuilder.Strategy.ACCURATE; + +//TODO (now): Add support for variable length tokens; this is needed for Ordered partitioner (which we plan to support) +public class CheckpointIntervalArrayIndex +{ + private static final Accessor LIST_INTERVAL_ACCESSOR = new Accessor<>() + { + @Override + public int size(Interval[] intervals) + { + return intervals.length; + } + + @Override + public Interval get(Interval[] intervals, int index) + { + return intervals[index]; + } + + @Override + public byte[] start(Interval[] intervals, int index) + { + return intervals[index].start; + } + + @Override + public byte[] start(Interval interval) + { + return interval.start; + } + + @Override + public byte[] end(Interval[] intervals, int index) + { + return intervals[index].end; + } + + @Override + public byte[] end(Interval interval) + { + return interval.end; + } + + @Override + public Comparator keyComparator() + { + return (a, b) -> ByteArrayUtil.compareUnsigned(a, 0, b, 0, a.length); + } + + @Override + public int binarySearch(Interval[] intervals, int from, int to, byte[] find, AsymmetricComparator comparator, SortedArrays.Search op) + { + return SortedArrays.binarySearch(intervals, from, to, find, comparator, op); + } + }; + public static final Supplier CHECKSUM_SUPPLIER = CRC32C::new; + + //TODO (performance): rather than row structure, would column structure be better? Sorted tokens tend to have prefix relationships + // so could compress the data more. The negative here is the binary search might cost more and the scan after the random access needs end + value... + // Its also possible to do a hybrid structure where either start/end are column and the other one is row based... + //TODO (performance): store min/max values so could filter based off metadata without having to walk the tree first? This means that the metadata + // doesn't need to be stored in-memory 100% of the time and only when a file "could" match. The perf here would trade read costs for less memory. + //TODO (fault tolerence): right now there is no checksumming outside of the header, so a corruption in the middle + // could lead to weird behavior... since this structure is fixed lenght it "should" only lead to mismatches or binary + // search going the wrong direction... + //TODO (fault tolerence): maybe replace readStart/End with readRecord and extrat the value from there, this makes it so it would be trivial to add a checksum per-record. + // Given the migration from SAI work, we can now remove the TableId from the data (16 bytes) so a 4 byte footer wouldn't be a big cost. We also compute the checksum on read/write + // right now, just ignore the value... the performance is currently better than with SAI (less overhead as we are not generic), so the checksumming costs are effectivally 0. + public static class SortedListWriter + { + private final int bytesPerKey, bytesPerValue; + + public SortedListWriter(int bytesPerKey, int bytesPerValue) + { + this.bytesPerKey = bytesPerKey; + this.bytesPerValue = bytesPerValue; + } + + public long write(ChecksumedSequentialWriter out, Interval[] sortedIntervals, Callback callback) throws IOException + { + long treeFilePointer = out.getFilePointer(); + // write header + out.resetChecksum(); // reset checksum so the header is isolated + out.writeUnsignedVInt32(bytesPerKey); + out.writeUnsignedVInt32(bytesPerValue); + out.writeUnsignedVInt32(sortedIntervals.length); + out.writeInt(out.getValue32AndResetChecksum()); + + // write values + callback.preWalk(sortedIntervals); + int count = 0; + for (Interval it : sortedIntervals) + { + validate(count, it); + + out.resetChecksum(); + out.write(it.start, 0, it.start.length); + out.write(it.end, 0, it.end.length); + out.write(it.value, 0, it.value.length); + out.writeInt(out.getValue32()); + callback.onWrite(count, it); + count++; + } + //TODO (now): don't need as this was here only for SAI. Offset/position are the same now + return count == 0 ? -1 : treeFilePointer; + } + + private void validate(int numIntervals, Interval it) + { + if (it.start.length != bytesPerKey) + throw new IllegalArgumentException("Interval " + numIntervals + "'s start value is size " + it.start.length + ", but expected " + bytesPerKey); + if (it.end.length != bytesPerKey) + throw new IllegalArgumentException("Interval " + numIntervals + "'s end value is size " + it.end.length + ", but expected " + bytesPerKey); + if (it.value.length != bytesPerValue) + throw new IllegalArgumentException("Interval " + numIntervals + "'s value is size " + it.value.length + ", but expected " + bytesPerValue); + } + + public interface Callback + { + default void preWalk(Interval[] sortedIntervals) throws IOException + { + } + + default void onWrite(int index, Interval interval) throws IOException + { + } + } + } + + public static class SortedListReader implements Closeable + { + private final FileHandle fh; + private final long firstRecordOffset; + private final int bytesPerKey, bytesPerValue, recordSize; + private final int count; + + public SortedListReader(FileHandle fh, long pos) + { + this.fh = fh; + + try (RandomAccessReader reader = fh.createReader(); + ChecksumedRandomAccessReader in = new ChecksumedRandomAccessReader(reader, CHECKSUM_SUPPLIER)) + { + if (pos != -1) + in.seek(pos); + + bytesPerKey = in.readUnsignedVInt32(); + bytesPerValue = in.readUnsignedVInt32(); + recordSize = bytesPerKey * 2 + bytesPerValue + Integer.BYTES; + count = in.readUnsignedVInt32(); + int actualChecksum = in.getValue32AndResetChecksum(); + int expectedChecksum = in.readInt(); + assert actualChecksum == expectedChecksum; + firstRecordOffset = reader.getFilePointer(); + } + catch (Throwable t) + { + FileUtils.closeQuietly(fh); + throw Throwables.unchecked(t); + } + } + + @Override + public void close() throws IOException + { + FileUtils.closeQuietly(fh); + } + + public enum SeekReason + {BINARY_SEARCH, GET, SCAN} + + private boolean maybeSeek(ChecksumedRandomAccessReader indexInput, Stats stats, SeekReason reason, long target) throws IOException + { + if (indexInput.getFilePointer() != target) + { + indexInput.seek(target); + switch (reason) + { + case SCAN: + stats.seekForScan++; + break; + case GET: + stats.seekForGet++; + break; + case BINARY_SEARCH: + stats.seekForBinarySearch++; + break; + default: + throw new IllegalArgumentException("Unknown reason: " + reason); + } + return true; + } + return false; + } + + public byte[] getRecord(ChecksumedRandomAccessReader indexInput, Stats stats, SeekReason reason, byte[] recordBuffer, int pos) throws IOException + { + maybeSeek(indexInput, stats, reason, fileOffsetStart(pos)); + return getCurrentRecord(indexInput, stats, recordBuffer); + } + + public byte[] getCurrentRecord(ChecksumedRandomAccessReader indexInput, Stats stats, byte[] recordBuffer) throws IOException + { + stats.bytesRead += recordBuffer.length + Integer.BYTES; + indexInput.resetChecksum(); + indexInput.readFully(recordBuffer, 0, recordBuffer.length); + int actualChecksum = indexInput.getValue32(); + int expectedChecksum = indexInput.readInt(); + assert actualChecksum == expectedChecksum; + return recordBuffer; + } + + public byte[] readStart(ChecksumedRandomAccessReader indexInput, Stats stats, byte[] recordBuffer, byte[] keyBuffer, int pos) throws IOException + { + getRecord(indexInput, stats, SeekReason.GET, recordBuffer, pos); + copyStart(recordBuffer, keyBuffer); + return keyBuffer; + } + + public byte[] readEnd(ChecksumedRandomAccessReader indexInput, Stats stats, byte[] recordBuffer, byte[] keyBuffer, int pos) throws IOException + { + getRecord(indexInput, stats, SeekReason.GET, recordBuffer, pos); + copyEnd(recordBuffer, keyBuffer); + return keyBuffer; + } + + public byte[] copyStart(byte[] recordBuffer, byte[] keyBuffer) + { + System.arraycopy(recordBuffer, 0, keyBuffer, 0, keyBuffer.length); + return keyBuffer; + } + + public byte[] copyEnd(byte[] recordBuffer, byte[] keyBuffer) + { + System.arraycopy(recordBuffer, bytesPerKey, keyBuffer, 0, bytesPerKey); + return keyBuffer; + } + + public int binarySearch(ChecksumedRandomAccessReader indexInput, Stats stats, byte[] recordBuffer, int from, int to, byte[] find, AsymmetricComparator comparator, SortedArrays.Search op) throws IOException + { + int found = -1; + while (from < to) + { + int i = (from + to) >>> 1; + int c = comparator.compare(find, getRecord(indexInput, stats, SeekReason.BINARY_SEARCH, recordBuffer, i)); + if (c < 0) + { + to = i; + } + else if (c > 0) + { + from = i + 1; + } + else + { + switch (op) + { + default: + throw new IllegalStateException("Unknown search operation: " + op); + case FAST: + return i; + + case CEIL: + to = found = i; + break; + + case FLOOR: + found = i; + from = i + 1; + } + } + } + // return -(low + 1); // key not found. + return found >= 0 ? found : -1 - to; + } + + public Interval copyTo(byte[] record, Interval buffer) + { + buffer.start = Arrays.copyOfRange(record, 0, bytesPerKey); + buffer.end = Arrays.copyOfRange(record, bytesPerKey, bytesPerKey * 2); + buffer.value = Arrays.copyOfRange(record, bytesPerKey * 2, record.length); + return buffer; + } + + private long fileOffsetStart(int offset) + { + if (offset >= count) + throw new IndexOutOfBoundsException("Start is from (0, " + count + "]; attempted to access " + offset); + return firstRecordOffset + (offset * recordSize); + } + + private long fileOffsetEnd(int offset) + { + if (offset >= count) + throw new IndexOutOfBoundsException("Start is from (0, " + count + "]; attempted to access " + offset); + return firstRecordOffset + (offset * recordSize) + bytesPerKey; + } + + private long fileOffsetValue(int offset) + { + if (offset >= count) + throw new IndexOutOfBoundsException("Start is from (0, " + count + "]; attempted to access " + offset); + return firstRecordOffset + (offset * recordSize) + bytesPerKey * 2; + } + } + + public static class CheckpointWriter implements SortedListWriter.Callback, Closeable + { + private final ChecksumedSequentialWriter out; + private final long offset; + private final long position; //TODO (now): don't need as this was here only for SAI. Offset/position are the same now + private long length = -1; + + public CheckpointWriter(ChecksumedSequentialWriter out) + { + this.out = out; + this.offset = position = out.getFilePointer(); + } + + @Override + public void preWalk(Interval[] sortedIntervals) throws IOException + { + class Checkpoints + { + final int[] bounds, headers, lists; + final int maxScanAndCheckpointMatches; + + Checkpoints(int[] bounds, int[] headers, int[] lists, int maxScanAndCheckpointMatches) + { + this.bounds = bounds; + this.headers = headers; + this.lists = lists; + this.maxScanAndCheckpointMatches = maxScanAndCheckpointMatches; + } + } + Checkpoints c = new CheckpointIntervalArrayBuilder<>(LIST_INTERVAL_ACCESSOR, sortedIntervals, ACCURATE, LINKS).build((ignore, bounds, headers, lists, max) -> new Checkpoints(bounds, headers, lists, max)); + out.resetChecksum(); // reset checksum so it only covers this metadata + out.writeUnsignedVInt32(c.maxScanAndCheckpointMatches); + write(c.bounds); + write(c.headers); + write(c.lists); + out.writeInt(out.getValue32AndResetChecksum()); + } + + private void write(int[] array) throws IOException + { + out.writeUnsignedVInt32(array.length); + for (int i = 0; i < array.length; i++) + out.writeVInt32(array[i]); + } + + @Override + public void close() throws IOException + { + length = out.getFilePointer() - offset; + out.close(); + } + } + + //TODO (performance): the current format assumes random list access is cheap, which isn't true for a disk index. + // This format was chosen as a place holder for now so we don't drift from the in-memory logic; in the original paper + // a new sorted list is used for each checkpoint, which then makes the access a sequential scan rather than random access. + public static class CheckpointReader implements Closeable + { + private final FileHandle fh; + private final int[] bounds, headers, lists; + private final int maxScanAndCheckpointMatches; + + public CheckpointReader(FileHandle fh, long pos) + { + this.fh = fh; + try (RandomAccessReader reader = fh.createReader(); + ChecksumedRandomAccessReader input = new ChecksumedRandomAccessReader(reader, CHECKSUM_SUPPLIER)) + { + if (pos != -1) + input.seek(pos); + + input.resetChecksum(); // reset checksum so it only covers this metadata + maxScanAndCheckpointMatches = input.readUnsignedVInt32(); + bounds = readArray(input); + headers = readArray(input); + lists = readArray(input); + int actualChecksum = input.getValue32AndResetChecksum(); + int expectedChecksum = input.readInt(); + assert actualChecksum == expectedChecksum; + } + catch (Throwable t) + { + FileUtils.closeQuietly(fh); + throw Throwables.unchecked(t); + } + } + + private static int[] readArray(ChecksumedRandomAccessReader input) throws IOException + { + int size = input.readUnsignedVInt32(); + int[] array = new int[size]; + for (int i = 0; i < size; i++) + array[i] = input.readVInt32(); + return array; + } + + @Override + public void close() throws IOException + { + FileUtils.closeQuietly(fh); + } + } + + public static class SegmentWriter + { + private final IndexDescriptor id; + private final SortedListWriter writer; + + public SegmentWriter(IndexDescriptor id, int bytesPerKey, int bytesPerValue) + { + this.id = id; + this.writer = new SortedListWriter(bytesPerKey, bytesPerValue); + } + + public EnumMap write(Interval[] sortedIntervals) throws IOException + { + EnumMap metas = new EnumMap<>(IndexComponent.class); + try (ChecksumedSequentialWriter treeOutput = ChecksumedSequentialWriter.open(id.fileFor(IndexComponent.CINTIA_SORTED_LIST), true, CHECKSUM_SUPPLIER); + CheckpointWriter checkpointWriter = new CheckpointWriter(ChecksumedSequentialWriter.open(id.fileFor(IndexComponent.CINTIA_CHECKPOINTS), true, CHECKSUM_SUPPLIER))) + { + // The SSTable component file is opened in append mode, so our offset is the current file pointer. + long sortedOffset = treeOutput.getFilePointer(); + long sortedPosition = writer.write(treeOutput, sortedIntervals, checkpointWriter); + + // If the treePosition is less than 0 then we didn't write any values out and the index is empty + if (sortedPosition < 0) + return metas; + //TODO (now): currently does SAI header so offset isn't correct here and need position + metas.put(IndexComponent.CINTIA_SORTED_LIST, new Segment.ComponentMetadata(sortedPosition, treeOutput.getFilePointer())); + metas.put(IndexComponent.CINTIA_CHECKPOINTS, new Segment.ComponentMetadata(checkpointWriter.position, checkpointWriter.out.getFilePointer())); + } + return metas; + } + } + + public static class Stats + { + int seekForGet, seekForBinarySearch, seekForScan; + long durationNs, bytesRead, matches; + + @Override + public String toString() + { + return "Stats{" + + "seeks={Get=" + seekForGet + + ", BinarySearch=" + seekForBinarySearch + + ", Scan=" + seekForScan + + "}, bytesRead=" + bytesRead + + ", matches=" + matches + + ", duration_micro=" + TimeUnit.NANOSECONDS.toMicros(durationNs) + + '}'; + } + } + + public static class SegmentSearcher implements Closeable + { + private final SortedListReader reader; + private final CheckpointReader checkpoints; + + public SegmentSearcher(FileHandle sortedListFile, + long sortedListPosition, + FileHandle checkpointFile, + long checkpointPosition) + { + this.reader = new SortedListReader(sortedListFile, sortedListPosition); + this.checkpoints = new CheckpointReader(checkpointFile, checkpointPosition); + } + + public Stats intersects(byte[] start, byte[] end, Consumer callback) throws IOException + { + byte[] keyBuffer = new byte[reader.bytesPerKey]; + byte[] recordBuffer = new byte[reader.recordSize - Integer.BYTES]; + Stats stats = new Stats(); + long startNanos = Clock.Global.nanoTime(); + try (ChecksumedRandomAccessReader indexInput = new ChecksumedRandomAccessReader(reader.fh.createReader(), CHECKSUM_SUPPLIER)) + { + Interval buffer = new Interval(); + Accessor accessor = new Accessor<>() + { + @Override + public int size(ChecksumedRandomAccessReader indexInput) + { + return reader.count; + } + + @Override + public byte[] get(ChecksumedRandomAccessReader indexInput, int index) + { + try + { + return reader.getRecord(indexInput, stats, SortedListReader.SeekReason.GET, recordBuffer, index); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + @Override + public byte[] start(ChecksumedRandomAccessReader indexInput, int index) + { + try + { + return reader.readStart(indexInput, stats, recordBuffer, keyBuffer, index); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + @Override + public byte[] start(byte[] bytes) + { + return reader.copyStart(bytes, keyBuffer); + } + + @Override + public byte[] end(ChecksumedRandomAccessReader indexInput, int index) + { + try + { + return reader.readEnd(indexInput, stats, recordBuffer, keyBuffer, index); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + @Override + public byte[] end(byte[] bytes) + { + return reader.copyEnd(bytes, keyBuffer); + } + + @Override + public Comparator keyComparator() + { + return (a, b) -> ByteArrayUtil.compareUnsigned(a, 0, b, 0, a.length); + } + + @Override + public int binarySearch(ChecksumedRandomAccessReader indexInput, int from, int to, byte[] find, AsymmetricComparator comparator, SortedArrays.Search op) + { + try + { + return reader.binarySearch(indexInput, stats, recordBuffer, from, to, find, comparator, op); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + }; + CheckpointIntervalArray searcher = new CheckpointIntervalArray<>(accessor, indexInput, checkpoints.bounds, checkpoints.headers, checkpoints.lists, checkpoints.maxScanAndCheckpointMatches); + + searcher.forEach(start, end, (i1, i2, i3, i4, index) -> { + stats.matches++; + callback.accept(reader.copyTo(accessor.get(indexInput, index), buffer)); + }, (i1, i2, i3, i4, startIdx, endIdx) -> { + try + { + reader.maybeSeek(indexInput, stats, SortedListReader.SeekReason.SCAN, reader.fileOffsetStart(startIdx)); + for (int i = startIdx; i < endIdx; i++) + { + stats.matches++; + reader.getCurrentRecord(indexInput, stats, recordBuffer); + callback.accept(reader.copyTo(recordBuffer, buffer)); + } + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + }, 0, 0, 0, 0, 0); + } + finally + { + stats.durationNs = Clock.Global.nanoTime() - startNanos; + } + return stats; + } + + @Override + public void close() + { + FileUtils.closeQuietly(checkpoints); + FileUtils.closeQuietly(reader); + } + } + + public static class Interval implements Comparable + { + public byte[] start, end, value; // mutable to avoid allocating Interval for every element + + public Interval() + { + } + + public Interval(byte[] start, byte[] end, byte[] value) + { + this.start = start; + this.end = end; + this.value = value; + } + + public Interval(Interval other) + { + this.start = other.start; + this.end = other.end; + this.value = other.value; + } + + @Override + public int compareTo(Interval b) + { + int rc = compareStart(b); + if (rc == 0) + rc = ByteArrayUtil.compareUnsigned(value, 0, b.value, 0, value.length); + return rc; + } + + public int compareStart(Interval b) + { + int rc = ByteArrayUtil.compareUnsigned(start, 0, b.start, 0, start.length); + if (rc == 0) + rc = ByteArrayUtil.compareUnsigned(end, 0, b.end, 0, end.length); + return rc; + } + + public int compareEnd(Interval b) + { + int rc = ByteArrayUtil.compareUnsigned(end, 0, b.end, 0, end.length); + if (rc == 0) + rc = ByteArrayUtil.compareUnsigned(start, 0, b.start, 0, start.length); + return rc; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Interval interval = (Interval) o; + return Arrays.equals(start, interval.start) && Arrays.equals(end, interval.end) && Arrays.equals(value, interval.value); + } + + @Override + public int hashCode() + { + int result = Arrays.hashCode(start); + result = 31 * result + Arrays.hashCode(end); + result = 31 * result + Arrays.hashCode(value); + return result; + } + + public boolean intersects(byte[] start, byte[] end) + { + if (ByteArrayUtil.compareUnsigned(this.start, end) >= 0) + return false; + if (ByteArrayUtil.compareUnsigned(this.end, start) <= 0) + return false; + return true; + } + } +} diff --git a/src/java/org/apache/cassandra/index/accord/Group.java b/src/java/org/apache/cassandra/index/accord/Group.java new file mode 100644 index 000000000000..e8e4d332949a --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/Group.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.Objects; + +import org.apache.cassandra.schema.TableId; + +public class Group implements Comparable +{ + public final int storeId; + public final TableId tableId; + + public Group(int storeId, TableId tableId) + { + this.storeId = storeId; + this.tableId = tableId; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Group group = (Group) o; + return storeId == group.storeId && Objects.equals(tableId, group.tableId); + } + + @Override + public int hashCode() + { + return Objects.hash(storeId, tableId); + } + + @Override + public String toString() + { + return "Group{" + + "storeId=" + storeId + + ", tableId=" + tableId + + '}'; + } + + @Override + public int compareTo(Group o) + { + int rc = Integer.compare(storeId, o.storeId); + if (rc == 0) + rc = tableId.compareTo(o.tableId); + return rc; + } +} diff --git a/src/java/org/apache/cassandra/index/accord/IndexDescriptor.java b/src/java/org/apache/cassandra/index/accord/IndexDescriptor.java new file mode 100644 index 000000000000..b549751a3fb7 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/IndexDescriptor.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.Collection; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.cassandra.db.ClusteringComparator; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.io.sstable.Component; +import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.File; + +public class IndexDescriptor +{ + public enum Version + { + v1("aa", c -> defaultFileNameFormat(c, "aa")); + + public static final Version CURRENT = v1; + + public final String versionString; + public final Function fileNameFormatter; + + Version(String versionString, Function fileNameFormatter) + { + this.versionString = versionString; + this.fileNameFormatter = fileNameFormatter; + } + + + private static String defaultFileNameFormat(IndexComponent indexComponent, String version) + { + StringBuilder sb = new StringBuilder(); + + sb.append(IndexComponent.DESCRIPTOR).append(IndexComponent.SEPARATOR) + .append(version).append(IndexComponent.SEPARATOR) + .append(indexComponent.name).append(Descriptor.EXTENSION); + + return sb.toString(); + } + } + + public enum IndexComponent + { + CINTIA_SORTED_LIST("CintiaSortedList", (byte) 1), + CINTIA_CHECKPOINTS("CintiaCheckpoints", (byte) 2), + SEGMENT("Segement", (byte) 3), + METADATA("Metadata", (byte) 4); + + public static final String DESCRIPTOR = "ACCORD"; + public static final String SEPARATOR = "+"; + + public final String name; + public final Component.Type type; + public final byte value; + + IndexComponent(String name, byte value) + { + this.name = name; + this.type = componentType(name); + this.value = value; + } + + private static Component.Type componentType(String name) + { + String componentName = DESCRIPTOR + SEPARATOR + name; + String repr = Pattern.quote(DESCRIPTOR + SEPARATOR) + + ".*" + + Pattern.quote(SEPARATOR + name + ".db"); + return Component.Type.create(componentName, repr, true, null); + } + + public static IndexComponent fromByte(byte b) + { + switch (b) + { + case 1: return CINTIA_SORTED_LIST; + case 2: return CINTIA_CHECKPOINTS; + case 3: return SEGMENT; + case 4: return METADATA; + default:throw new IllegalArgumentException("Unknow byte: " + b); + } + } + } + + public final Version version; + public final Descriptor sstableDescriptor; + public final IPartitioner partitioner; + public final ClusteringComparator clusteringComparator; + + public IndexDescriptor(Version version, Descriptor sstableDescriptor, IPartitioner partitioner, ClusteringComparator clusteringComparator) + { + this.version = version; + this.sstableDescriptor = sstableDescriptor; + this.partitioner = partitioner; + this.clusteringComparator = clusteringComparator; + } + + public static IndexDescriptor create(SSTableReader sstable) + { + for (Version version : Version.values()) + { + IndexDescriptor id = new IndexDescriptor(version, sstable.descriptor, sstable.getPartitioner(), sstable.metadata().comparator); + if (id.isIndexBuildComplete()) + return id; + } + return new IndexDescriptor(Version.CURRENT, sstable.descriptor, sstable.getPartitioner(), sstable.metadata().comparator); + } + + public static IndexDescriptor create(Descriptor descriptor, IPartitioner partitioner, ClusteringComparator comparator) + { + return new IndexDescriptor(Version.CURRENT, descriptor, partitioner, comparator); + } + + public boolean isIndexBuildComplete() + { + return hasComponent(IndexComponent.METADATA); + } + + public boolean hasComponent(IndexComponent indexComponent) + { + return fileFor(indexComponent).exists(); + } + + public File fileFor(IndexComponent indexComponent) + { + Component c = indexComponent.type.createComponent(version.fileNameFormatter.apply(indexComponent)); + return sstableDescriptor.fileFor(c); + } + + public void deleteIndex() + { + Stream.of(IndexComponent.values()).map(this::fileFor).forEach(File::deleteIfExists); + } + + public Collection getLiveSSTableComponents() + { + return Stream.of(IndexComponent.values()) + .map(c -> c.type.createComponent(version.fileNameFormatter.apply(c))) + .filter(c -> sstableDescriptor.fileFor(c).exists()) + .collect(Collectors.toList()); + } + + public Collection getLiveComponents() + { + return Stream.of(IndexComponent.values()) + .filter(c -> fileFor(c).exists()) + .collect(Collectors.toList()); + } +} diff --git a/src/java/org/apache/cassandra/index/accord/IndexMetrics.java b/src/java/org/apache/cassandra/index/accord/IndexMetrics.java new file mode 100644 index 000000000000..29560822350f --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/IndexMetrics.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.ArrayList; +import java.util.List; + +import com.codahale.metrics.Timer; +import org.apache.cassandra.metrics.CassandraMetricsRegistry; +import org.apache.cassandra.metrics.DefaultNameFactory; + +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; + +// Stolen from org.apache.cassandra.index.sai.metrics.AbstractMetrics +public class IndexMetrics +{ + private static final String TYPE = "RouteIndex"; + private static final String SCOPE = "IndexMetrics"; + + private final List tracked = new ArrayList<>(); + + private final String ks; + private final String table; + private final String indexName; + public final Timer memtableIndexWriteLatency; + + public IndexMetrics(RouteIndex index) + { + this.ks = index.baseCfs().getKeyspaceName(); + this.table = index.baseCfs().name; + this.indexName = index.getIndexMetadata().name; + memtableIndexWriteLatency = Metrics.timer(createMetricName("MemtableIndexWriteLatency")); + } + + public void release() + { + tracked.forEach(Metrics::remove); + tracked.clear(); + } + + private CassandraMetricsRegistry.MetricName createMetricName(String name) + { + String metricScope = ks + '.' + table; + if (indexName != null) + { + metricScope += '.' + indexName; + } + metricScope += '.' + SCOPE + '.' + name; + + CassandraMetricsRegistry.MetricName metricName = new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, + TYPE, name, metricScope, createMBeanName(name, SCOPE)); + tracked.add(metricName); + return metricName; + } + + private String createMBeanName(String name, String scope) + { + StringBuilder builder = new StringBuilder(); + builder.append(DefaultNameFactory.GROUP_NAME); + builder.append(":type=").append(TYPE); + builder.append(',').append("keyspace=").append(ks); + builder.append(',').append("table=").append(table); + if (indexName != null) + builder.append(',').append("index=").append(indexName); + builder.append(',').append("scope=").append(scope); + builder.append(',').append("name=").append(name); + return builder.toString(); + } +} diff --git a/src/java/org/apache/cassandra/index/accord/MemtableIndex.java b/src/java/org/apache/cassandra/index/accord/MemtableIndex.java new file mode 100644 index 000000000000..0f0125861be5 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/MemtableIndex.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.concurrent.atomic.LongAdder; + +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.schema.TableId; + +public class MemtableIndex +{ + private final RangeMemoryIndex memoryIndex = new RangeMemoryIndex(); + private final LongAdder writeCount = new LongAdder(); + private final LongAdder estimatedMemoryUsed = new LongAdder(); + + public long writeCount() + { + return writeCount.sum(); + } + + public long estimatedMemoryUsed() + { + return estimatedMemoryUsed.sum(); + } + + public boolean isEmpty() + { + return memoryIndex.isEmpty(); + } + + public long index(DecoratedKey key, Clustering clustering, ByteBuffer value) + { + if (value == null || value.remaining() == 0) + return 0; + long size = memoryIndex.add(key, clustering, value); + writeCount.increment(); + estimatedMemoryUsed.add(size); + return size; + } + + public Segment write(IndexDescriptor id) throws IOException + { + return memoryIndex.write(id); + } + + public Collection search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + { + return memoryIndex.search(storeId, tableId, start, startInclusive, end, endInclusive); + } +} diff --git a/src/java/org/apache/cassandra/index/accord/MemtableIndexManager.java b/src/java/org/apache/cassandra/index/accord/MemtableIndexManager.java new file mode 100644 index 000000000000..1078b4fc04a2 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/MemtableIndexManager.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.nio.ByteBuffer; +import java.util.NavigableSet; + +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.schema.TableId; + +public interface MemtableIndexManager +{ + long index(DecoratedKey key, Row row, Memtable mt); + + MemtableIndex getPendingMemtableIndex(LifecycleNewTracker tracker); + + void discardMemtable(Memtable memtable); + + void renewMemtable(Memtable renewed); + + NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive); +} diff --git a/src/java/org/apache/cassandra/index/accord/OrderedRouteSerializer.java b/src/java/org/apache/cassandra/index/accord/OrderedRouteSerializer.java new file mode 100644 index 000000000000..f9e5c22b4726 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/OrderedRouteSerializer.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.serializers.AccordRoutingKeyByteSource; + +public class OrderedRouteSerializer +{ + private static final AccordRoutingKeyByteSource.FixedLength SERIALIZER = AccordRoutingKeyByteSource.fixedLength(DatabaseDescriptor.getPartitioner()); + + public static ByteBuffer serializeRoutingKey(AccordRoutingKey key) + { + return ByteBuffer.wrap(SERIALIZER.serialize(key)); + } + + public static byte[] serializeRoutingKeyNoTable(AccordRoutingKey key) + { + return SERIALIZER.serializeNoTable(key); + } + + public static byte[] unwrap(AccordRoutingKey key) + { + return SERIALIZER.serialize(key); + } + + public static AccordRoutingKey deserializeRoutingKey(ByteBuffer bb) + { + try + { + return SERIALIZER.fromComparableBytes(ByteBufferAccessor.instance, bb); + } + catch (IOException e) + { + throw new UnsupportedOperationException(e); + } + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java b/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java new file mode 100644 index 000000000000..5581708171ba --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.stream.Collectors; +import javax.annotation.concurrent.GuardedBy; + +import accord.primitives.Routable; +import accord.primitives.Route; +import accord.primitives.Unseekable; +import org.apache.cassandra.cache.IMeasurableMemory; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.RTree; +import org.apache.cassandra.utils.RangeTree; + +public class RangeMemoryIndex +{ + @GuardedBy("this") + private final Map> map = new HashMap<>(); + @GuardedBy("this") + private final Map groupMetadata = new HashMap<>(); + + private static class Metadata + { + public byte[] minTerm, maxTerm; + } + + private static RangeTree createRangeTree() + { + return new RTree<>((a, b) -> ByteArrayUtil.compareUnsigned(a, 0, b, 0, a.length), new RangeTree.Accessor<>() + { + @Override + public byte[] start(Range range) + { + return range.start; + } + + @Override + public byte[] end(Range range) + { + return range.end; + } + + @Override + public boolean contains(byte[] start, byte[] end, byte[] bytes) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean intersects(Range range, byte[] start, byte[] end) + { + return range.intersects(start, end); + } + + @Override + public boolean intersects(Range left, Range right) + { + return left.intersects(right.start, right.end); + } + }); + } + + public synchronized long add(DecoratedKey key, Clustering clustering, ByteBuffer value) + { + Route route; + try + { + route = AccordKeyspace.deserializeRouteOrNull(value); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + + return add(key, route); + } + + + public synchronized long add(DecoratedKey key, Route route) + { + if (route.domain() != Routable.Domain.Range) + return 0; + long sum = 0; + for (Unseekable keyOrRange : route) + sum += add(key, keyOrRange); + return sum; + } + + protected long add(DecoratedKey key, Unseekable keyOrRange) + { + if (keyOrRange.domain() != Routable.Domain.Range) + throw new IllegalArgumentException("Unexpected domain: " + keyOrRange.domain()); + TokenRange ts = (TokenRange) keyOrRange; + + int storeId = AccordKeyspace.CommandRows.getStoreId(key); + TableId tableId = ts.table(); + Group group = new Group(storeId, tableId); + byte[] start = OrderedRouteSerializer.serializeRoutingKeyNoTable((AccordRoutingKey) ts.start()); + byte[] end = OrderedRouteSerializer.serializeRoutingKeyNoTable((AccordRoutingKey) ts.end()); + Range range = new Range(start, end); + map.computeIfAbsent(group, ignore -> createRangeTree()).add(range, key); + Metadata metadata = groupMetadata.computeIfAbsent(group, ignore -> new Metadata()); + + metadata.minTerm = metadata.minTerm == null ? start : ByteArrayUtil.compareUnsigned(metadata.minTerm, 0, start, 0, metadata.minTerm.length) > 0 ? start : metadata.minTerm; + metadata.maxTerm = metadata.maxTerm == null ? end : ByteArrayUtil.compareUnsigned(metadata.maxTerm, 0, end, 0, metadata.maxTerm.length) < 0 ? end : metadata.maxTerm; + return TableId.EMPTY_SIZE + range.unsharedHeapSize(); + } + + public NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + { + RangeTree rangesToPks = map.get(new Group(storeId, tableId)); + if (rangesToPks == null || rangesToPks.isEmpty()) + return Collections.emptyNavigableSet(); + TreeMap> matches = search(rangesToPks, start, end); + if (matches.isEmpty()) + return Collections.emptyNavigableSet(); + TreeSet pks = new TreeSet<>(); + matches.values().forEach(s -> s.forEach(d -> pks.add(d.getKey()))); + return pks; + } + + private TreeMap> search(RangeTree tokensToPks, byte[] start, byte[] end) + { + + TreeMap> matches = new TreeMap<>(); + tokensToPks.search(new Range(start, end), e -> matches.computeIfAbsent(e.getKey(), ignore -> new HashSet<>()).add(e.getValue())); + return matches; + } + + public synchronized boolean isEmpty() + { + return map.isEmpty(); + } + + public Segment write(IndexDescriptor id) throws IOException + { + if (map.isEmpty()) + throw new AssertionError("Unable to write empty index"); + Map output = new HashMap<>(); + + List groups = new ArrayList<>(map.keySet()); + groups.sort(Comparator.naturalOrder()); + + for (Group group : groups) + { + RangeTree submap = map.get(group); + if (submap.isEmpty()) // is this possible? put here for safty so list is never empty + continue; + Metadata metadata = groupMetadata.get(group); + + //TODO (performance): if the RangeTree can return the data in sorted order, then this local can become faster + // Right now the code is based off RTree, which is undefined order, so we must iterate then sort; in testing this is a good chunk of the time of this method + List list = submap.stream() + .map(e -> new CheckpointIntervalArrayIndex.Interval(e.getKey().start, e.getKey().end, ByteBufferUtil.getArray(e.getValue().getKey()))) + .sorted(Comparator.naturalOrder()) + .collect(Collectors.toList()); + + CheckpointIntervalArrayIndex.SegmentWriter writer = new CheckpointIntervalArrayIndex.SegmentWriter(id, list.get(0).start.length, list.get(0).value.length); + EnumMap meta = writer.write(list.toArray(CheckpointIntervalArrayIndex.Interval[]::new)); + if (meta.isEmpty()) // don't include empty segments + continue; + output.put(group, new Segment.Metadata(meta, metadata.minTerm, metadata.maxTerm)); + } + + return new Segment(output); + } + + private static class Range implements Comparable, IMeasurableMemory + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new Range(null, null)); + + private final byte[] start, end; + + private Range(byte[] start, byte[] end) + { + this.start = start; + this.end = end; + } + + @Override + public int compareTo(Range other) + { + int rc = ByteArrayUtil.compareUnsigned(start, 0, other.start, 0, start.length); + if (rc == 0) + rc = ByteArrayUtil.compareUnsigned(end, 0, other.end, 0, end.length); + return rc; + } + + @Override + public long unsharedHeapSize() + { + return EMPTY_SIZE + ObjectSizes.sizeOfArray(start) * 2; + } + + public boolean intersects(byte[] start, byte[] end) + { + if (ByteArrayUtil.compareUnsigned(this.start, 0, end, 0, end.length) >= 0) + return false; + if (ByteArrayUtil.compareUnsigned(this.end, 0, start, 0, start.length) <= 0) + return false; + return true; + } + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RouteIndex.java b/src/java/org/apache/cassandra/index/accord/RouteIndex.java new file mode 100644 index 000000000000..8dcaf2067c8a --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RouteIndex.java @@ -0,0 +1,604 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableSet; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.statements.schema.IndexTarget; +import org.apache.cassandra.db.CassandraWriteContext; +import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.DeletionTime; +import org.apache.cassandra.db.ReadCommand; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.WriteContext; +import org.apache.cassandra.db.compaction.CompactionManager; +import org.apache.cassandra.db.compaction.OperationType; +import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.db.lifecycle.Tracker; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.rows.BTreeRow; +import org.apache.cassandra.db.rows.EncodingStats; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.index.Index; +import org.apache.cassandra.index.IndexRegistry; +import org.apache.cassandra.index.TargetParser; +import org.apache.cassandra.index.sai.StorageAttachedIndex; +import org.apache.cassandra.index.transactions.IndexTransaction; +import org.apache.cassandra.io.sstable.Component; +import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.SSTableFlushObserver; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.notifications.INotification; +import org.apache.cassandra.notifications.INotificationConsumer; +import org.apache.cassandra.notifications.MemtableDiscardedNotification; +import org.apache.cassandra.notifications.MemtableRenewedNotification; +import org.apache.cassandra.notifications.SSTableAddedNotification; +import org.apache.cassandra.notifications.SSTableListChangedNotification; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.IndexMetadata; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.AbstractIterator; +import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + +public class RouteIndex implements Index, INotificationConsumer +{ + public enum RegisterStatus + {PENDING, REGISTERED, UNREGISTERED} + + private static final Logger logger = LoggerFactory.getLogger(RouteIndex.class); + + private static final Component.Type type = Component.Type.createSingleton("AccordRoute", "AccordRoute.*.db", true, null); + + private final ColumnFamilyStore baseCfs; + private final ColumnMetadata column; + private final IndexMetadata indexMetadata; + private final IndexMetrics indexMetrics; + private final MemtableIndexManager memtableIndexManager; + private final SSTableManager sstableManager; + // Tracks whether we've started the index build on initialization. + private volatile boolean initBuildStarted = false; + private volatile RegisterStatus registerStatus = RegisterStatus.PENDING; + + public RouteIndex(ColumnFamilyStore baseCfs, IndexMetadata indexMetadata) + { + if (!SchemaConstants.ACCORD_KEYSPACE_NAME.equals(baseCfs.getKeyspaceName())) + throw new IllegalArgumentException("Route index is only allowed for accord commands table; given " + baseCfs().metadata()); + if (!AccordKeyspace.COMMANDS.equals(baseCfs.name)) + throw new IllegalArgumentException("Route index is only allowed for accord commands table; given " + baseCfs().metadata()); + + TableMetadata tableMetadata = baseCfs.metadata(); + Pair target = TargetParser.parse(tableMetadata, indexMetadata); + if (!AccordKeyspace.CommandsColumns.route.name.equals(target.left.name)) + throw new IllegalArgumentException("Attempted to index the wrong column; needed " + AccordKeyspace.CommandsColumns.route.name + " but given " + target.left.name); + + if (target.right != IndexTarget.Type.VALUES) + throw new IllegalArgumentException("Attempted to index " + AccordKeyspace.CommandsColumns.route.name + " with index type " + target.right + "; only " + IndexTarget.Type.VALUES + " is supported"); + + this.baseCfs = baseCfs; + this.indexMetadata = indexMetadata; + this.memtableIndexManager = new RouteMemtableIndexManager(this); + this.sstableManager = new RouteSSTableManager(); + this.indexMetrics = new IndexMetrics(this); + this.column = target.left; + + Tracker tracker = baseCfs.getTracker(); + tracker.subscribe(this); + } + + public ColumnMetadata column() + { + return column; + } + + public IndexMetrics indexMetrics() + { + return indexMetrics; + } + + public ColumnFamilyStore baseCfs() + { + return baseCfs; + } + + @Override + public IndexMetadata getIndexMetadata() + { + return indexMetadata; + } + + @Override + public boolean shouldBuildBlocking() + { + return true; + } + + @Override + public boolean isSSTableAttached() + { + return true; + } + + @Override + public Optional getBackingTable() + { + return Optional.empty(); + } + + @Override + public Set getComponents() + { + return Collections.singleton(type.getSingleton()); + } + + @Override + public Callable getInitializationTask() + { + //TODO (now): in SAI startup doesn't validate... what are the downstream issues this can face? Corrupt indexes not being detected? + boolean starting = StorageService.instance.isStarting(); + return () -> { + if (baseCfs.indexManager.isIndexQueryable(this)) + { + initBuildStarted = true; + return null; + } + + // stop in-progress compaction tasks to prevent compacted sstable not being indexed. + CompactionManager.instance.interruptCompactionFor(Collections.singleton(baseCfs.metadata()), + ssTableReader -> true, + true); + // Force another flush to make sure on disk index is generated for memtable data before marking it queryable. + // In the case of offline scrub, there are no live memtables. + if (!baseCfs.getTracker().getView().liveMemtables.isEmpty()) + baseCfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.INDEX_BUILD_STARTED); + + // It is now safe to flush indexes directly from flushing Memtables. + initBuildStarted = true; + + List nonIndexed = findNonIndexedSSTables(baseCfs, sstableManager); + + if (nonIndexed.isEmpty()) + return null; + + // split sorted sstables into groups with similar size and build each group in separate compaction thread + List> groups = StorageAttachedIndex.groupBySize(nonIndexed, DatabaseDescriptor.getConcurrentIndexBuilders()); + List> futures = new ArrayList<>(); + + for (List group : groups) + { + futures.add(CompactionManager.instance.submitIndexBuild(new RouteSecondaryIndexBuilder(this, sstableManager, group, false, true))); + } + + return FutureCombiner.allOf(futures).get(); + }; + } + + private List findNonIndexedSSTables(ColumnFamilyStore baseCfs, SSTableManager manager) + { + Set sstables = baseCfs.getLiveSSTables(); + + // Initialize the SSTable indexes w/ valid existing components... + manager.onSSTableChanged(Collections.emptyList(), sstables); + + // ...then identify and rebuild the SSTable indexes that are missing. + List nonIndexed = new ArrayList<>(); + + for (SSTableReader sstable : sstables) + { + if (!sstable.isMarkedCompacted() && !manager.isIndexComplete(sstable)) + { + nonIndexed.add(sstable); + } + } + + return nonIndexed; + } + + + @Override + public boolean isQueryable(Status status) + { + // consider unknown status as queryable, because gossip may not be up-to-date for newly joining nodes. + return status == Status.BUILD_SUCCEEDED || status == Status.UNKNOWN; + } + + public RegisterStatus registerStatus() + { + return registerStatus; + } + + @Override + public synchronized void register(IndexRegistry registry) + { + registry.registerIndex(this); + registerStatus = RegisterStatus.REGISTERED; + } + + @Override + public synchronized void unregister(IndexRegistry registry) + { + Index.super.unregister(registry); + registerStatus = RegisterStatus.UNREGISTERED; + } + + @Override + public Callable getTruncateTask(long truncatedAt) + { + /* + * index files will be removed as part of base sstable lifecycle in {@link LogTransaction#delete(java.io.File)} + * asynchronously, but we need to mark the index queryable because if the truncation is during the initial + * build of the index it won't get marked queryable by the build. + */ + return () -> { + logger.info("Making index queryable during table truncation"); + baseCfs.indexManager.makeIndexQueryable(this, Status.BUILD_SUCCEEDED); + return null; + }; + } + + @Override + public Callable getBlockingFlushTask() + { + return null; // storage-attached indexes are flushed alongside memtable + } + + @Override + public Callable getMetadataReloadTask(IndexMetadata indexMetadata) + { + return null; + } + + @Override + public Callable getInvalidateTask() + { + return () -> null; + } + + @Override + public void validate(PartitionUpdate update, ClientState state) throws InvalidRequestException + { + // only internal can write... so it must be valid no? + } + + + //TODO (now): flesh this stuff out... + + + @Override + public SSTableFlushObserver getFlushObserver(Descriptor descriptor, + LifecycleNewTracker tracker) + { + // mimics org.apache.cassandra.index.sai.disk.v1.V1OnDiskFormat.newPerColumnIndexWriter + IndexDescriptor id = IndexDescriptor.create(descriptor, baseCfs.getPartitioner(), baseCfs.metadata().comparator); + if (tracker.opType() != OperationType.FLUSH || !initBuildStarted) + { + return new RouteIndexFormat.SSTableIndexWriter(this, id); + } + else + { + return new RouteIndexFormat.MemtableRouteIndexWriter(id, memtableIndexManager.getPendingMemtableIndex(tracker)); + } + } + + @Override + public Indexer indexerFor(DecoratedKey key, + RegularAndStaticColumns columns, + long nowInSec, + WriteContext ctx, + IndexTransaction.Type transactionType, + Memtable memtable) + { + // since we are attached we only care about update + if (transactionType != IndexTransaction.Type.UPDATE) + return null; + return new Indexer() + { + @Override + public void insertRow(Row row) + { + long size = memtableIndexManager.index(key, row, memtable); + if (size > 0) + memtable.markExtraOnHeapUsed(size, CassandraWriteContext.fromContext(ctx).getGroup()); + } + + @Override + public void updateRow(Row oldRowData, Row newRowData) + { + insertRow(newRowData); + } + }; + } + + @Override + public boolean supportsExpression(ColumnMetadata column, Operator operator) + { + // disallow all queries, in order to interact with this index you must bypass CQL + return false; + } + + @Override + public RowFilter getPostIndexQueryFilter(RowFilter filter) + { + return RowFilter.none(); + } + + @Override + public Searcher searcherFor(ReadCommand command) + { + List expressions = command.rowFilter().getExpressions().stream().collect(Collectors.toList()); + if (expressions.isEmpty()) + return null; + ByteBuffer start = null; + boolean startInclusive = true; + ByteBuffer end = null; + boolean endInclusive = true; + Integer storeId = null; + for (RowFilter.Expression e : expressions) + { + if (e.column() == AccordKeyspace.CommandsColumns.route) + { + switch (e.operator()) + { + case GT: + start = e.getIndexValue(); + startInclusive = false; + break; + case GTE: + start = e.getIndexValue(); + startInclusive = true; + break; + case LT: + end = e.getIndexValue(); + endInclusive = false; + break; + case LTE: + end = e.getIndexValue(); + endInclusive = true; + break; + default: + return null; + } + } + else if (e.column() == AccordKeyspace.CommandsColumns.store_id && e.operator() == Operator.EQ) + { + storeId = Int32Type.instance.compose(e.getIndexValue()); + } + } + if (start == null || end == null || storeId == null) + return null; + int finalStoreId = storeId; + ByteBuffer finalStart = start; + boolean finalStartInclusive = startInclusive; + ByteBuffer finalEnd = end; + boolean finalEndInclusive = endInclusive; + return new Searcher() + { + @Override + public ReadCommand command() + { + return command; + } + + @Override + public UnfilteredPartitionIterator search(ReadExecutionController executionController) + { + // find all partitions from memtable / sstable + NavigableSet partitions = search(finalStoreId, finalStart, finalStartInclusive, finalEnd, finalEndInclusive); + // do SinglePartitionReadCommand per partition + return new SearchIterator(executionController, command, partitions); + } + + NavigableSet search(int storeId, + ByteBuffer startTableWithToken, boolean startInclusive, + ByteBuffer endTableWithToken, boolean endInclusive) + { + TableId tableId; + byte[] start; + { + + AccordRoutingKey route = OrderedRouteSerializer.deserializeRoutingKey(startTableWithToken); + tableId = route.table(); + start = OrderedRouteSerializer.serializeRoutingKeyNoTable(route); + } + byte[] end = OrderedRouteSerializer.serializeRoutingKeyNoTable(OrderedRouteSerializer.deserializeRoutingKey(endTableWithToken)); + NavigableSet matches = sstableManager.search(storeId, tableId, start, startInclusive, end, endInclusive); + matches.addAll(memtableIndexManager.search(storeId, tableId, start, startInclusive, end, endInclusive)); + return matches; + } + }; + } + + private class SearchIterator extends AbstractIterator implements UnfilteredPartitionIterator + { + private final ReadExecutionController executionController; + private final ReadCommand command; + private final TableMetadata metadata; + private final Iterator partitions; + + private SearchIterator(ReadExecutionController executionController, ReadCommand command, NavigableSet partitions) + { + this.executionController = executionController; + this.command = command; + this.metadata = command.metadata(); + this.partitions = partitions.iterator(); + } + + @Override + public TableMetadata metadata() + { + return metadata; + } + + @Override + protected UnfilteredRowIterator computeNext() + { + if (!partitions.hasNext()) + return endOfData(); + DecoratedKey pk = metadata.partitioner.decorateKey(partitions.next()); + return new UnfilteredRowIterator() + { + @Override + public DeletionTime partitionLevelDeletion() + { + return DeletionTime.LIVE; + } + + @Override + public EncodingStats stats() + { + return EncodingStats.NO_STATS; + } + + @Override + public TableMetadata metadata() + { + return metadata; + } + + @Override + public boolean isReverseOrder() + { + return false; + } + + @Override + public RegularAndStaticColumns columns() + { + return RegularAndStaticColumns.NONE; + } + + @Override + public DecoratedKey partitionKey() + { + return pk; + } + + @Override + public Row staticRow() + { + return null; + } + + @Override + public void close() + { + + } + + private Row row = BTreeRow.emptyRow(Clustering.EMPTY); + + @Override + public boolean hasNext() + { + return row != null; + } + + @Override + public Unfiltered next() + { + Row row = this.row; + this.row = null; + return row; + } + }; + } + + @Override + public void close() + { + + } + } + + @Override + public void handleNotification(INotification notification, Object sender) + { + // unfortunately, we can only check the type of notification via instanceof :( + if (notification instanceof SSTableAddedNotification) + { + SSTableAddedNotification notice = (SSTableAddedNotification) notification; + sstableManager.onSSTableChanged(Collections.emptySet(), notice.added); + } + else if (notification instanceof SSTableListChangedNotification) + { + SSTableListChangedNotification notice = (SSTableListChangedNotification) notification; + sstableManager.onSSTableChanged(notice.removed, notice.added); + } + else if (notification instanceof MemtableRenewedNotification) + { + memtableIndexManager.renewMemtable(((MemtableRenewedNotification) notification).renewed); + } + else if (notification instanceof MemtableDiscardedNotification) + { + memtableIndexManager.discardMemtable(((MemtableDiscardedNotification) notification).memtable); + } + } + + //TODO (coverage): everything below here never triggered... + + @Override + public boolean dependsOn(ColumnMetadata column) + { + throw new UnsupportedOperationException(); + } + + @Override + public AbstractType customExpressionValueType() + { + throw new UnsupportedOperationException(); + } + + @Override + public long getEstimatedResultRows() + { + throw new UnsupportedOperationException(); + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java b/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java new file mode 100644 index 000000000000..bd722ca9f0e3 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import java.util.zip.CRC32C; +import java.util.zip.Checksum; + +import com.google.common.collect.Maps; + +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.index.accord.IndexDescriptor.IndexComponent; +import org.apache.cassandra.io.sstable.SSTableFlushObserver; +import org.apache.cassandra.io.util.ChecksumedRandomAccessReader; +import org.apache.cassandra.io.util.ChecksumedSequentialWriter; +import org.apache.cassandra.io.util.FileHandle; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.serializers.UUIDSerializer; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.Throwables; + +import static org.apache.cassandra.utils.Clock.Global.nowInSeconds; + +// A route index consists of a few files: cintia_sorted_list, cintia_checkpoints, and metadata +// metadata stores the segement mappings and stats needed for search selection +public class RouteIndexFormat +{ + public static final Supplier CHECKSUM_SUPPLIER = CRC32C::new; + + public interface Writer extends SSTableFlushObserver + { + + } + + public static class SSTableIndexWriter extends MemtableRouteIndexWriter + { + private final RouteIndex index; + private DecoratedKey current; + + public SSTableIndexWriter(RouteIndex index, IndexDescriptor id) + { + super(id, new MemtableIndex()); + this.index = index; + } + + @Override + public void startPartition(DecoratedKey key, long keyPosition, long keyPositionForSASI) + { + this.current = key; + } + + @Override + public void nextUnfilteredCluster(Unfiltered unfiltered) + { + // there is some duplication from org.apache.cassandra.index.accord.RouteMemtableIndexManager.index + // should this be cleaned up? + if (!unfiltered.isRow()) + return; + Row row = (Row) unfiltered; + // simplified version of org.apache.cassandra.index.sai.utils.IndexTermType.valueOf + Cell cell = row.getCell(index.column()); + ByteBuffer value = cell == null || !cell.isLive(nowInSeconds()) ? null : cell.buffer(); + indexer.index(current, row.clustering(), value); + } + } + + public static class MemtableRouteIndexWriter implements Writer + { + private final IndexDescriptor id; + protected final MemtableIndex indexer; + + public MemtableRouteIndexWriter(IndexDescriptor id, MemtableIndex indexer) + { + this.id = id; + this.indexer = indexer; + } + + + @Override + public void begin() + { + // no-op + } + + @Override + public void startPartition(DecoratedKey key, long keyPosition, long keyPositionForSASI) + { + // no-op + } + + @Override + public void staticRow(Row staticRow) + { + // no-op + } + + @Override + public void nextUnfilteredCluster(Unfiltered unfiltered) + { + // no-op + } + + @Override + public void complete() + { + try + { + if (!indexer.isEmpty()) + { + Segment segment = indexer.write(id); + appendSegment(id, segment); + } + else + { + // nothing to see here... need to still mark the SSTable as indexed, so need an empty segment + appendSegment(id, Segment.EMPTY); + } + } + catch (IOException e) + { + abort(e); + throw Throwables.unchecked(e); + } + } + + @Override + public void abort(Throwable accumulator) + { + id.deleteIndex(); + } + + public void abort(Throwable accumulator, boolean fromIndex) + { + abort(accumulator); + // If the abort was from an index error, propagate the error upstream so index builds, compactions, and + // flushes can handle it correctly. + if (fromIndex) + throw Throwables.unchecked(accumulator); + } + } + + static List readSegements(Map index) throws IOException + { + List segments = new ArrayList<>(); + + try (var metaReader = new ChecksumedRandomAccessReader(index.get(IndexComponent.METADATA).createReader(), CHECKSUM_SUPPLIER); + var segmentReader = new ChecksumedRandomAccessReader(index.get(IndexComponent.SEGMENT).createReader(), CHECKSUM_SUPPLIER)) + { + while (metaReader.getFilePointer() < metaReader.length()) + { + metaReader.resetChecksum(); + long startPointer = metaReader.readUnsignedVInt(); + long endPointer = metaReader.readUnsignedVInt(); + int groupSize = metaReader.readUnsignedVInt32(); + int segmentChecksum = metaReader.readInt(); + int metadataChecksum = metaReader.getValue32AndResetChecksum(); + int actualChecksum = metaReader.readInt(); + assert actualChecksum == metadataChecksum; + + segmentReader.resetChecksum(); + segmentReader.seek(startPointer); + Map groups = Maps.newHashMapWithExpectedSize(groupSize); + for (int i = 0; i < groupSize; i++) + { + int storeId = segmentReader.readVInt32(); + TableId tableId = TableId.fromUUID(new UUID(segmentReader.readLong(), segmentReader.readLong())); + Group group = new Group(storeId, tableId); + int metaSize = segmentReader.readUnsignedVInt32(); + EnumMap metas = new EnumMap<>(IndexComponent.class); + for (int j = 0; j < metaSize; j++) + { + IndexComponent c = IndexComponent.fromByte(segmentReader.readByte()); + metas.put(c, new Segment.ComponentMetadata(segmentReader.readUnsignedVInt(), segmentReader.readUnsignedVInt())); + } + byte[] minTerm = ByteArrayUtil.readWithVIntLength(segmentReader); + byte[] maxTerm = ByteArrayUtil.readWithVIntLength(segmentReader); + Segment.Metadata existing = groups.put(group, new Segment.Metadata(metas, minTerm, maxTerm)); + assert existing == null; + } + int actualSegmentChecksum = segmentReader.getValue32AndResetChecksum(); + assert actualSegmentChecksum == segmentChecksum; + assert segmentReader.getFilePointer() == endPointer; + segments.add(new Segment(groups)); + } + } + return segments; + } + + static void appendSegment(IndexDescriptor id, Segment segment) throws IOException + { + List groups = new ArrayList<>(segment.groups.keySet()); + groups.sort(Comparator.naturalOrder()); + + try (var segmentWriter = ChecksumedSequentialWriter.open(id.fileFor(IndexComponent.SEGMENT), true, CHECKSUM_SUPPLIER); + var metadataWriter = ChecksumedSequentialWriter.open(id.fileFor(IndexComponent.METADATA), true, CHECKSUM_SUPPLIER)) + { + long startPointer = segmentWriter.getFilePointer(); + for (Group group : groups) + { + Segment.Metadata metadata = segment.groups.get(group); + writeGroup(segmentWriter, group, metadata); + } + long endPointer = segmentWriter.getFilePointer(); + + int checksum = segmentWriter.getValue32AndResetChecksum(); + metadataWriter.writeUnsignedVInt(startPointer); + metadataWriter.writeUnsignedVInt(endPointer); + metadataWriter.writeUnsignedVInt32(segment.groups.size()); + metadataWriter.writeInt(checksum); + metadataWriter.writeInt(metadataWriter.getValue32AndResetChecksum()); + } + } + + private static void writeGroup(ChecksumedSequentialWriter seq, Group group, Segment.Metadata metadata) throws IOException + { + seq.writeVInt32(group.storeId); + seq.write(UUIDSerializer.instance.serialize(group.tableId.asUUID())); + seq.writeUnsignedVInt32(metadata.metas.size()); + for (Map.Entry e : metadata.metas.entrySet()) + { + seq.writeByte(e.getKey().value); + seq.writeUnsignedVInt(e.getValue().offset); + seq.writeUnsignedVInt(e.getValue().endOffset); + } + ByteArrayUtil.writeWithVIntLength(metadata.minTerm, seq); + ByteArrayUtil.writeWithVIntLength(metadata.maxTerm, seq); + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RouteMemtableIndexManager.java b/src/java/org/apache/cassandra/index/accord/RouteMemtableIndexManager.java new file mode 100644 index 000000000000..f61dd14e90cc --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RouteMemtableIndexManager.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.nio.ByteBuffer; +import java.util.NavigableSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.lifecycle.LifecycleNewTracker; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.schema.TableId; + +import static org.apache.cassandra.utils.Clock.Global.nanoTime; +import static org.apache.cassandra.utils.Clock.Global.nowInSeconds; + +public class RouteMemtableIndexManager implements MemtableIndexManager +{ + private final ConcurrentMap liveMemtableIndexMap = new ConcurrentHashMap<>(); + private final RouteIndex index; + + public RouteMemtableIndexManager(RouteIndex index) + { + this.index = index; + } + + @Override + public long index(DecoratedKey key, Row row, Memtable mt) + { + if (row.isStatic()) + return 0; + //TODO (performance): we dropped jdk8 and this was fixed in jdk8... so do we need to do this still? + MemtableIndex current = liveMemtableIndexMap.get(mt); + + // We expect the relevant IndexMemtable to be present most of the time, so only make the + // call to computeIfAbsent() if it's not. (see https://bugs.openjdk.java.net/browse/JDK-8161372) + MemtableIndex target = (current != null) + ? current + : liveMemtableIndexMap.computeIfAbsent(mt, memtable -> new MemtableIndex()); + + long start = nanoTime(); + + long bytes = 0; + + // simplified version of org.apache.cassandra.index.sai.utils.IndexTermType.valueOf + Cell cell = row.getCell(index.column()); + ByteBuffer value = cell == null || !cell.isLive(nowInSeconds()) ? null : cell.buffer(); + + bytes += target.index(key, row.clustering(), value); + index.indexMetrics().memtableIndexWriteLatency.update(nanoTime() - start, TimeUnit.NANOSECONDS); + return bytes; + } + + @Override + public MemtableIndex getPendingMemtableIndex(LifecycleNewTracker tracker) + { + return liveMemtableIndexMap.keySet().stream() + .filter(m -> tracker.equals(m.getFlushTransaction())) + .findFirst() + .map(liveMemtableIndexMap::get) + .orElse(null); + } + + @Override + public void discardMemtable(Memtable memtable) + { + liveMemtableIndexMap.remove(memtable); + } + + @Override + public void renewMemtable(Memtable renewed) + { + for (Memtable memtable : liveMemtableIndexMap.keySet()) + { + // remove every index but the one that corresponds to the post-truncate Memtable + if (renewed != memtable) + { + liveMemtableIndexMap.remove(memtable); + } + } + } + + @Override + public NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + { + TreeSet matches = new TreeSet<>(); + liveMemtableIndexMap.values().forEach(m -> matches.addAll(m.search(storeId, tableId, start, startInclusive, end, endInclusive))); + return matches; + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RouteSSTableManager.java b/src/java/org/apache/cassandra/index/accord/RouteSSTableManager.java new file mode 100644 index 000000000000..94bb3b124bef --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RouteSSTableManager.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.TreeSet; + +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.schema.TableId; + +public class RouteSSTableManager implements SSTableManager +{ + private final Map sstables = new HashMap<>(); + + @Override + public synchronized void onSSTableChanged(Collection removed, Iterable added) + { + //TODO (performance): most added tables will have 0 segmenets, so exclude those from search + removed.forEach(s -> { + SSTableIndex index = sstables.remove(s); + if (index != null) + { + index.close(); + index.id.deleteIndex(); + } + }); + + List notComplete = null; + for (SSTableReader sstable : added) + { + IndexDescriptor id = IndexDescriptor.create(sstable); + if (!id.isIndexBuildComplete()) + { + if (notComplete == null) notComplete = new ArrayList<>(); + notComplete.add(sstable); + continue; + } + try + { + sstables.put(sstable, SSTableIndex.create(id)); + } + catch (IOException e) + { + notComplete.add(sstable); + } + } + if (notComplete != null) + throw new IllegalArgumentException("SStables were added without an index... " + notComplete); + } + + @Override + public synchronized boolean isIndexComplete(SSTableReader reader) + { + return sstables.containsKey(reader); + } + + @Override + public synchronized NavigableSet search(int storeId, TableId tableId, + byte[] start, boolean startInclusive, + byte[] end, boolean endInclusive) + { + Group group = new Group(storeId, tableId); + TreeSet matches = new TreeSet<>(); + for (SSTableIndex index : sstables.values()) + matches.addAll(index.search(group, start, startInclusive, end, endInclusive)); + return matches; + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RouteSecondaryIndexBuilder.java b/src/java/org/apache/cassandra/index/accord/RouteSecondaryIndexBuilder.java new file mode 100644 index 000000000000..8caf981353cb --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RouteSecondaryIndexBuilder.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.Collections; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.compaction.CompactionInfo; +import org.apache.cassandra.db.compaction.CompactionInterruptedException; +import org.apache.cassandra.db.compaction.OperationType; +import org.apache.cassandra.db.lifecycle.LifecycleTransaction; +import org.apache.cassandra.db.lifecycle.Tracker; +import org.apache.cassandra.index.SecondaryIndexBuilder; +import org.apache.cassandra.io.sstable.KeyIterator; +import org.apache.cassandra.io.sstable.SSTableFlushObserver; +import org.apache.cassandra.io.sstable.SSTableIdentityIterator; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.RandomAccessReader; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Throwables; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.concurrent.Ref; + +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; + +public class RouteSecondaryIndexBuilder extends SecondaryIndexBuilder +{ + private static final Logger logger = LoggerFactory.getLogger(RouteSecondaryIndexBuilder.class); + + private final TimeUUID compactionId = nextTimeUUID(); + private final RouteIndex index; + private final TableMetadata metadata; + private final Tracker tracker; + private final SSTableManager sstableManager; + private final List sstables; + private final boolean isFullRebuild; + private final boolean isInitialBuild; + private final long totalSizeInBytes; + private long bytesProcessed = 0; + + public RouteSecondaryIndexBuilder(RouteIndex index, + SSTableManager sstableManager, + List sstables, + boolean isFullRebuild, + boolean isInitialBuild) + { + this.index = index; + this.metadata = index.baseCfs().metadata(); + this.tracker = index.baseCfs().getTracker(); + this.sstableManager = sstableManager; + this.sstables = sstables; + this.isFullRebuild = isFullRebuild; + this.isInitialBuild = isInitialBuild; + this.totalSizeInBytes = sstables.stream().mapToLong(SSTableReader::uncompressedLength).sum(); + } + + @Override + public CompactionInfo getCompactionInfo() + { + return new CompactionInfo(metadata, + OperationType.INDEX_BUILD, + bytesProcessed, + totalSizeInBytes, + compactionId, + sstables); + } + + @Override + public void build() + { + if (!validateIndexes()) + return; + for (SSTableReader sstable : sstables) + { + if (indexSSTable(sstable)) + return; + } + } + + /** + * @return true if index build should be stopped + */ + private boolean indexSSTable(SSTableReader sstable) + { + logger.debug("Starting index build on {}", sstable.descriptor); + + RouteIndexFormat.SSTableIndexWriter indexWriter = null; + + Ref ref = sstable.tryRef(); + if (ref == null) + { + logger.warn("Couldn't acquire reference to the SSTable {}. It may have been removed.", sstable.descriptor); + return false; + } + + try (RandomAccessReader dataFile = sstable.openDataReader(); + LifecycleTransaction txn = LifecycleTransaction.offline(OperationType.INDEX_BUILD, sstable)) + { + // remove existing per column index files instead of overwriting + IndexDescriptor indexDescriptor = IndexDescriptor.create(sstable); + indexDescriptor.deleteIndex(); + + indexWriter = new RouteIndexFormat.SSTableIndexWriter(index, indexDescriptor); + indexWriter.begin(); + + long previousBytesRead = 0; + + try (KeyIterator keys = sstable.keyIterator()) + { + while (keys.hasNext()) + { + if (isStopRequested()) + { + logger.debug("Index build has been stopped"); + throw new CompactionInterruptedException(getCompactionInfo()); + } + + DecoratedKey key = keys.next(); + + indexWriter.startPartition(key, -1, -1); + + long position = sstable.getPosition(key, SSTableReader.Operator.EQ); + dataFile.seek(position); + ByteBufferUtil.readWithShortLength(dataFile); // key + + try (SSTableIdentityIterator partition = SSTableIdentityIterator.create(sstable, dataFile, key)) + { + // if the row has statics attached, it has to be indexed separately + if (metadata.hasStaticColumns()) + indexWriter.nextUnfilteredCluster(partition.staticRow()); + + while (partition.hasNext()) + indexWriter.nextUnfilteredCluster(partition.next()); + } + long bytesRead = keys.getBytesRead(); + bytesProcessed += bytesRead - previousBytesRead; + previousBytesRead = bytesRead; + } + + completeSSTable(indexDescriptor, indexWriter, sstable); + } + + return false; + } + catch (Throwable t) + { + if (indexWriter != null) + { + indexWriter.abort(t, true); + } + + if (t instanceof InterruptedException) + { + logger.warn("Interrupted while building indexes on SSTable {}", sstable.descriptor); + Thread.currentThread().interrupt(); + return true; + } + else if (t instanceof CompactionInterruptedException) + { + //TODO Shouldn't do this if the stop was interrupted by a truncate + if (isInitialBuild) + { + logger.error("Stop requested while building initial indexes on SSTable {}.", sstable.descriptor); + throw Throwables.unchecked(t); + } + else + { + logger.info("Stop requested while building indexes on SSTable {}.", sstable.descriptor); + return true; + } + } + else + { + logger.error("Unable to build indexes on SSTable {}. Cause: {}.", sstable, t.getMessage()); + throw Throwables.unchecked(t); + } + } + finally + { + ref.release(); + } + } + + private void completeSSTable(IndexDescriptor indexDescriptor, SSTableFlushObserver indexWriter, SSTableReader sstable) + { + indexWriter.complete(); + + if (!validateIndexes()) + { + logger.debug("dropped during index build"); + return; + } + + // register custom index components into existing sstables + sstable.registerComponents(indexDescriptor.getLiveSSTableComponents(), tracker); + sstableManager.onSSTableChanged(Collections.emptyList(), Collections.singleton(sstable)); + } + + /** + * In case of full rebuild, stop the index build if any index is dropped. + * Otherwise, skip dropped indexes to avoid exception during repair/streaming. + */ + private boolean validateIndexes() + { + switch (index.registerStatus()) + { + case PENDING: throw new IllegalStateException("Unable to build indexes if the index has not been registered"); + case REGISTERED: return true; + case UNREGISTERED: break; + default: throw new AssertionError("Unknown status: " + index.registerStatus()); + } + + if (isFullRebuild) + throw new RuntimeException(String.format("%s are dropped, will stop index build.", index.getIndexMetadata().name)); + + return false; + } +} diff --git a/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java new file mode 100644 index 000000000000..1ade3b8a4a98 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import accord.primitives.TxnId; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.PartitionRangeReadCommand; +import org.apache.cassandra.db.ReadExecutionController; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.filter.DataLimits; +import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.index.Index; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.CloseableIterator; +import org.apache.cassandra.utils.FBUtilities; + +public class RoutesSearcher +{ + private final ColumnFamilyStore cfs = Keyspace.open("system_accord").getColumnFamilyStore("commands"); + private final Index index = cfs.indexManager.getIndexByName("route");; + private final ColumnMetadata route = AccordKeyspace.CommandsColumns.route; + private final ColumnMetadata store_id = AccordKeyspace.CommandsColumns.store_id; + private final ColumnMetadata txn_id = AccordKeyspace.CommandsColumns.txn_id; + private final ColumnFilter columnFilter = ColumnFilter.selectionBuilder().add(store_id).add(txn_id).build(); + private final DataLimits limits = DataLimits.NONE; + private final DataRange dataRange = DataRange.allData(cfs.getPartitioner()); + + private CloseableIterator searchKeysAccord(int store, AccordRoutingKey start, AccordRoutingKey end) + { + RowFilter rowFilter = RowFilter.create(false); + rowFilter.add(route, Operator.GT, OrderedRouteSerializer.serializeRoutingKey(start)); + rowFilter.add(route, Operator.LTE, OrderedRouteSerializer.serializeRoutingKey(end)); + rowFilter.add(store_id, Operator.EQ, Int32Type.instance.decompose(store)); + + PartitionRangeReadCommand cmd = PartitionRangeReadCommand.create(cfs.metadata(), + FBUtilities.nowInSeconds(), + columnFilter, + rowFilter, + limits, + dataRange); + Index.Searcher s = index.searcherFor(cmd); + try (ReadExecutionController controler = cmd.executionController()) + { + UnfilteredPartitionIterator partitionIterator = s.search(controler); + return new CloseableIterator() + { + private final Entry entry = new Entry(); + @Override + public void close() + { + partitionIterator.close(); + } + + @Override + public boolean hasNext() + { + return partitionIterator.hasNext(); + } + + @Override + public Entry next() + { + UnfilteredRowIterator next = partitionIterator.next(); + ByteBuffer[] partitionKeyComponents = AccordKeyspace.CommandRows.splitPartitionKey(next.partitionKey()); + entry.store_id = AccordKeyspace.CommandRows.getStoreId(partitionKeyComponents); + entry.txnId = AccordKeyspace.CommandRows.getTxnId(partitionKeyComponents); + return entry; + } + }; + } + } + + public Set intersects(int store, TokenRange range) + { + return intersects(store, (AccordRoutingKey) range.start(), (AccordRoutingKey) range.end()); + } + + public Set intersects(int store, AccordRoutingKey start, AccordRoutingKey end) + { + HashSet set = new HashSet(); + try (CloseableIterator it = searchKeysAccord(store, start, end)) + { + while (it.hasNext()) + { + Entry next = it.next(); + if (next.store_id != store) continue; // the index should filter out, but just in case... + set.add(next.txnId); + } + } + return set.isEmpty() ? Collections.emptySet() : set; + } + + private static final class Entry + { + public int store_id; + public TxnId txnId; + } +} diff --git a/src/java/org/apache/cassandra/index/accord/SSTableIndex.java b/src/java/org/apache/cassandra/index/accord/SSTableIndex.java new file mode 100644 index 000000000000..fceaf8e94344 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/SSTableIndex.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.cassandra.index.accord.CheckpointIntervalArrayIndex.SegmentSearcher; +import org.apache.cassandra.index.accord.IndexDescriptor.IndexComponent; +import org.apache.cassandra.io.FSReadError; +import org.apache.cassandra.io.util.FileHandle; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.concurrent.RefCounted; +import org.apache.cassandra.utils.concurrent.SharedCloseableImpl; + +public class SSTableIndex extends SharedCloseableImpl +{ + public final IndexDescriptor id; + private final Map files; + private final List segments; + + private SSTableIndex(IndexDescriptor id, + Map files, + List segments, + Cleanup cleanup) + { + super(cleanup); + this.id = id; + this.files = files; + this.segments = segments; + } + + public SSTableIndex(SSTableIndex copy) + { + super(copy); + this.id = copy.id; + this.files = copy.files; + this.segments = copy.segments; + } + + @Override + public SSTableIndex sharedCopy() + { + return new SSTableIndex(this); + } + + public static SSTableIndex create(IndexDescriptor id) throws IOException + { + Map files = new EnumMap<>(IndexComponent.class); + for (IndexComponent c : id.getLiveComponents()) + files.put(c, new FileHandle.Builder(id.fileFor(c)).mmapped(true).complete()); + List segments = RouteIndexFormat.readSegements(files); + files.remove(IndexComponent.SEGMENT).close(); + files.remove(IndexComponent.METADATA).close(); + Cleanup cleanup = new Cleanup(files); + return new SSTableIndex(id, files, segments, cleanup); + } + + public Collection search(Group group, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + { + List matches = segments.stream().filter(s -> { + Segment.Metadata metadata = s.groups.get(group); + if (metadata == null) return false; + if (ByteArrayUtil.compareUnsigned(metadata.minTerm, end) >= 0) + return false; + if (ByteArrayUtil.compareUnsigned(metadata.maxTerm, start) <= 0) + return false; + return true; + }) + .collect(Collectors.toList()); + if (matches.isEmpty()) return Collections.emptyList(); + if (matches.size() == 1) return search(matches.get(0), group, start, startInclusive, end, endInclusive); + Set found = new HashSet<>(); + for (Segment s : matches) + found.addAll(search(s, group, start, startInclusive, end, endInclusive)); + return found; + } + + private Collection search(Segment segment, Group group, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + { + Set matches = new HashSet<>(); + Segment.Metadata metadata = Objects.requireNonNull(segment.groups.get(group), () -> "Unknown group: " + group); + try + { + SegmentSearcher searcher = new SegmentSearcher(fileFor(IndexComponent.CINTIA_SORTED_LIST), metadata.metas.get(IndexComponent.CINTIA_SORTED_LIST).offset, + fileFor(IndexComponent.CINTIA_CHECKPOINTS), metadata.metas.get(IndexComponent.CINTIA_CHECKPOINTS).offset); + searcher.intersects(start, end, interval -> matches.add(ByteBuffer.wrap(interval.value))); + } + catch (IOException e) + { + throw new FSReadError(e, id.fileFor(IndexComponent.CINTIA_SORTED_LIST)); + } + return matches; + } + + private FileHandle fileFor(IndexComponent c) + { + return Objects.requireNonNull(files.get(c), () -> "Unknown component: " + c); + } + + private static class Cleanup implements RefCounted.Tidy + { + private final Map files; + + private Cleanup(Map files) + { + this.files = files; + } + + @Override + public void tidy() throws Exception + { + for (IndexComponent c : IndexComponent.values()) + { + FileHandle fh = files.remove(c); + if (fh == null) continue; + fh.close(); + } + } + + @Override + public String name() + { + return "SSTableIndex Cleanup"; + } + } +} diff --git a/src/java/org/apache/cassandra/index/accord/SSTableManager.java b/src/java/org/apache/cassandra/index/accord/SSTableManager.java new file mode 100644 index 000000000000..090951670f47 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/SSTableManager.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.NavigableSet; + +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.schema.TableId; + +public interface SSTableManager +{ + void onSSTableChanged(Collection removed, Iterable added); + boolean isIndexComplete(SSTableReader reader); + + NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive); +} diff --git a/src/java/org/apache/cassandra/index/accord/Segment.java b/src/java/org/apache/cassandra/index/accord/Segment.java new file mode 100644 index 000000000000..b6d0ffa08529 --- /dev/null +++ b/src/java/org/apache/cassandra/index/accord/Segment.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.Collections; +import java.util.EnumMap; +import java.util.Map; + +public class Segment +{ + public static final Segment EMPTY = new Segment(Collections.emptyMap()); + + public final Map groups; + + public Segment(Map groups) + { + this.groups = groups; + } + + public static class ComponentMetadata + { + public final long offset, endOffset; + + public ComponentMetadata(long offset, long endOffset) + { + this.offset = offset; + this.endOffset = endOffset; + } + } + + public static class Metadata + { + public final EnumMap metas; + public final byte[] minTerm, maxTerm; + + public Metadata(EnumMap metas, byte[] minTerm, byte[] maxTerm) + { + this.metas = metas; + this.minTerm = minTerm; + this.maxTerm = maxTerm; + } + } +} diff --git a/src/java/org/apache/cassandra/io/util/Checksumed.java b/src/java/org/apache/cassandra/io/util/Checksumed.java new file mode 100644 index 000000000000..61120ed25e13 --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/Checksumed.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.util.zip.Checksum; + +public interface Checksumed +{ + Checksum checksum(); + + default long getAndResetChecksum() + { + Checksum c = checksum(); + long v = c.getValue(); + c.reset(); + return v; + } + + default int getValue32() + { + long v = checksum().getValue(); + if (Long.numberOfLeadingZeros(v) < 32) + throw new IllegalStateException("Checksum is larger than 32 bytes!"); + return (int) v; + } + + default int getValue32AndResetChecksum() + { + Checksum c = checksum(); + long v = c.getValue(); + if (Long.numberOfLeadingZeros(v) < 32) + throw new IllegalStateException("Checksum is larger than 32 bytes!"); + c.reset(); + return (int) v; + } + + default void resetChecksum() + { + checksum().reset(); + } +} diff --git a/src/java/org/apache/cassandra/io/util/ChecksumedDataInputPlus.java b/src/java/org/apache/cassandra/io/util/ChecksumedDataInputPlus.java new file mode 100644 index 000000000000..8b1d701d4af3 --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/ChecksumedDataInputPlus.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.io.IOException; +import java.util.function.Supplier; +import java.util.zip.Checksum; + +public class ChecksumedDataInputPlus implements DataInputPlus, Checksumed +{ + private final DataInputPlus delegate; + private final Checksum checksum; + + public ChecksumedDataInputPlus(DataInputPlus delegate, Checksum checksum) + { + this.delegate = delegate; + this.checksum = checksum; + } + + public ChecksumedDataInputPlus(DataInputPlus delegate, Supplier fn) + { + this(delegate, fn.get()); + } + + public DataInputPlus delegate() + { + return delegate; + } + + @Override + public Checksum checksum() + { + return checksum; + } + + @Override + public void readFully(byte[] b) throws IOException + { + delegate().readFully(b); + checksum.update(b); + } + + @Override + public void readFully(byte[] b, int off, int len) throws IOException + { + delegate().readFully(b, off, len); + checksum.update(b, off, len); + } + + @Override + public int skipBytes(int n) throws IOException + { + int skipped = delegate().skipBytes(n); + checksum.reset(); + return skipped; + } + + @Override + public int readUnsignedByte() throws IOException + { + int value = delegate().readUnsignedByte(); + checksum.update(value); + return value; + } + + private byte writeBuffer[] = new byte[8]; + + @Override + public long readLong() throws IOException + { + long v = delegate().readLong(); + writeBuffer[0] = (byte)(v >>> 56); + writeBuffer[1] = (byte)(v >>> 48); + writeBuffer[2] = (byte)(v >>> 40); + writeBuffer[3] = (byte)(v >>> 32); + writeBuffer[4] = (byte)(v >>> 24); + writeBuffer[5] = (byte)(v >>> 16); + writeBuffer[6] = (byte)(v >>> 8); + writeBuffer[7] = (byte)(v >>> 0); + checksum.update(writeBuffer); + return v; + } + + @Override + public String readLine() throws IOException + { + throw new UnsupportedOperationException(); + } +} diff --git a/src/java/org/apache/cassandra/io/util/ChecksumedDataOutputPlus.java b/src/java/org/apache/cassandra/io/util/ChecksumedDataOutputPlus.java new file mode 100644 index 000000000000..83f571e4761a --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/ChecksumedDataOutputPlus.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.function.Supplier; +import java.util.zip.Checksum; + +public class ChecksumedDataOutputPlus implements DataOutputPlus, Checksumed +{ + private final DataOutputPlus delegate; + private final Checksum checksum; + + public ChecksumedDataOutputPlus(DataOutputPlus delegate, Checksum checksum) + { + this.delegate = delegate; + this.checksum = checksum; + } + + public ChecksumedDataOutputPlus(DataOutputPlus delegate, Supplier fn) + { + this(delegate, fn.get()); + } + + public DataOutputPlus delegate() + { + return delegate; + } + + @Override + public Checksum checksum() + { + return checksum; + } + + @Override + public void write(ByteBuffer buffer) throws IOException + { + checksum.update(buffer.duplicate()); + delegate().write(buffer); + } + + @Override + public void write(int b) throws IOException + { + checksum.update(b); + delegate().write(b); + } + + @Override + public void write(byte[] b) throws IOException + { + checksum.update(b); + delegate().write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException + { + checksum.update(b, off, len); + delegate().write(b, off, len); + } + + @Override + public void writeByte(int v) throws IOException + { + checksum.update(v); + delegate().writeByte(v); + } + + private byte writeBuffer[] = new byte[8]; + + @Override + public void writeLong(long v) throws IOException + { + writeBuffer[0] = (byte)(v >>> 56); + writeBuffer[1] = (byte)(v >>> 48); + writeBuffer[2] = (byte)(v >>> 40); + writeBuffer[3] = (byte)(v >>> 32); + writeBuffer[4] = (byte)(v >>> 24); + writeBuffer[5] = (byte)(v >>> 16); + writeBuffer[6] = (byte)(v >>> 8); + writeBuffer[7] = (byte)(v >>> 0); + checksum.update(writeBuffer); + delegate().writeLong(v); + } + + @Override + public void writeUTF(String s) throws IOException + { + throw new UnsupportedOperationException("TODO"); + } +} diff --git a/src/java/org/apache/cassandra/io/util/ChecksumedFileDataInput.java b/src/java/org/apache/cassandra/io/util/ChecksumedFileDataInput.java new file mode 100644 index 000000000000..5bdb0b564461 --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/ChecksumedFileDataInput.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.io.IOException; +import java.util.function.Supplier; +import java.util.zip.Checksum; + +public class ChecksumedFileDataInput extends ChecksumedDataInputPlus implements FileDataInput +{ + public ChecksumedFileDataInput(FileDataInput delegate, Checksum checksum) + { + super(delegate, checksum); + } + + public ChecksumedFileDataInput(FileDataInput delegate, Supplier fn) + { + super(delegate, fn); + } + + @Override + public FileDataInput delegate() + { + return (FileDataInput) super.delegate(); + } + + @Override + public String getPath() + { + return delegate().getPath(); + } + + @Override + public boolean isEOF() throws IOException + { + return delegate().isEOF(); + } + + @Override + public long bytesRemaining() throws IOException + { + return delegate().bytesRemaining(); + } + + @Override + public void seek(long pos) throws IOException + { + resetChecksum(); + delegate().seek(pos); + } + + @Override + public long getFilePointer() + { + return delegate().getFilePointer(); + } + + @Override + public void close() throws IOException + { + resetChecksum(); + delegate().close(); + } + + @Override + public DataPosition mark() + { + resetChecksum(); + return delegate().mark(); + } + + @Override + public void reset(DataPosition mark) throws IOException + { + resetChecksum(); + delegate().reset(mark); + } + + @Override + public long bytesPastMark(DataPosition mark) + { + return delegate().bytesPastMark(mark); + } +} diff --git a/src/java/org/apache/cassandra/io/util/ChecksumedRandomAccessReader.java b/src/java/org/apache/cassandra/io/util/ChecksumedRandomAccessReader.java new file mode 100644 index 000000000000..5b3b820740f8 --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/ChecksumedRandomAccessReader.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.util.function.Supplier; +import java.util.zip.Checksum; + +public class ChecksumedRandomAccessReader extends ChecksumedFileDataInput +{ + public ChecksumedRandomAccessReader(RandomAccessReader delegate, Checksum checksum) + { + super(delegate, checksum); + } + + public ChecksumedRandomAccessReader(RandomAccessReader delegate, Supplier fn) + { + super(delegate, fn); + } + + @Override + public RandomAccessReader delegate() + { + return (RandomAccessReader) super.delegate(); + } + + public long length() + { + return delegate().length(); + } +} diff --git a/src/java/org/apache/cassandra/io/util/ChecksumedSequentialWriter.java b/src/java/org/apache/cassandra/io/util/ChecksumedSequentialWriter.java new file mode 100644 index 000000000000..e14d99c45f2b --- /dev/null +++ b/src/java/org/apache/cassandra/io/util/ChecksumedSequentialWriter.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.io.IOException; +import java.util.function.Supplier; +import java.util.zip.Checksum; + +import org.apache.cassandra.utils.concurrent.Transactional; + +public class ChecksumedSequentialWriter extends ChecksumedDataOutputPlus implements Transactional +{ + private static final SequentialWriterOption DEFAULT_WRITER_OPTIONS = SequentialWriterOption.newBuilder().finishOnClose(true).build(); + + public ChecksumedSequentialWriter(SequentialWriter delegate, Checksum checksum) + { + super(delegate, checksum); + } + + public ChecksumedSequentialWriter(SequentialWriter delegate, Supplier fn) + { + super(delegate, fn); + } + + public static ChecksumedSequentialWriter open(File file, boolean append, Supplier fn) throws IOException + { + SequentialWriter writer = new SequentialWriter(file, DEFAULT_WRITER_OPTIONS); + if (append) + writer.skipBytes(file.length()); + return new ChecksumedSequentialWriter(writer, fn); + } + + @Override + public SequentialWriter delegate() + { + return (SequentialWriter) super.delegate(); + } + + public long getFilePointer() + { + return delegate().position(); + } + + @Override + public Throwable commit(Throwable accumulate) + { + return delegate().commit(accumulate); + } + + @Override + public Throwable abort(Throwable accumulate) + { + return delegate().abort(accumulate); + } + + @Override + public void prepareToCommit() + { + delegate().prepareToCommit(); + } + + @Override + public void close() + { + delegate().close(); + } +} diff --git a/src/java/org/apache/cassandra/io/util/DataInputPlus.java b/src/java/org/apache/cassandra/io/util/DataInputPlus.java index d117c7fe894c..428e6eb69847 100644 --- a/src/java/org/apache/cassandra/io/util/DataInputPlus.java +++ b/src/java/org/apache/cassandra/io/util/DataInputPlus.java @@ -18,6 +18,7 @@ package org.apache.cassandra.io.util; import java.io.DataInput; +import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -98,6 +99,81 @@ public default void skipBytesFully(int n) throws IOException throw new EOFException("EOF after " + skipped + " bytes out of " + n); } + @Override + default byte readByte() throws IOException + { + return (byte) readUnsignedByte(); + } + + @Override + default boolean readBoolean() throws IOException + { + return readUnsignedByte() != 0; + } + + @Override + default short readShort() throws IOException + { + int ch1 = readUnsignedByte(); + int ch2 = readUnsignedByte(); + if ((ch1 | ch2) < 0) + throw new EOFException(); + return (short)((ch1 << 8) + (ch2 << 0)); + } + + @Override + default int readUnsignedShort() throws IOException + { + int ch1 = readUnsignedByte(); + int ch2 = readUnsignedByte(); + if ((ch1 | ch2) < 0) + throw new EOFException(); + return (ch1 << 8) + (ch2 << 0); + } + + @Override + default char readChar() throws IOException + { + int ch1 = readUnsignedByte(); + int ch2 = readUnsignedByte(); + if ((ch1 | ch2) < 0) + throw new EOFException(); + return (char)((ch1 << 8) + (ch2 << 0)); + } + + @Override + default int readInt() throws IOException + { + int ch1 = readUnsignedByte(); + int ch2 = readUnsignedByte(); + int ch3 = readUnsignedByte(); + int ch4 = readUnsignedByte(); + if ((ch1 | ch2 | ch3 | ch4) < 0) + throw new EOFException(); + return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); + } + + @Override + long readLong() throws IOException; + + @Override + default float readFloat() throws IOException + { + return Float.intBitsToFloat(readInt()); + } + + @Override + default double readDouble() throws IOException + { + return Double.longBitsToDouble(readLong()); + } + + @Override + default String readUTF() throws IOException + { + return DataInputStream.readUTF(this); + } + /** * Wrapper around an InputStream that provides no buffering but can decode varints */ diff --git a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java index 483ee5e1dce5..6adf9a06620b 100644 --- a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java +++ b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java @@ -193,4 +193,65 @@ default long paddedPosition() { throw new UnsupportedOperationException(); } + + @Override + default void writeBoolean(boolean v) throws IOException + { + write(v ? 1 : 0); + } + + @Override + default void writeShort(int v) throws IOException + { + write((v >>> 8) & 0xFF); + write((v >>> 0) & 0xFF); + } + + @Override + default void writeChar(int v) throws IOException + { + write((v >>> 8) & 0xFF); + write((v >>> 0) & 0xFF); + } + + @Override + default void writeInt(int v) throws IOException + { + write((v >>> 24) & 0xFF); + write((v >>> 16) & 0xFF); + write((v >>> 8) & 0xFF); + write((v >>> 0) & 0xFF); + } + + @Override + default void writeFloat(float v) throws IOException + { + writeInt(Float.floatToIntBits(v)); + } + + @Override + default void writeDouble(double v) throws IOException + { + writeLong(Double.doubleToLongBits(v)); + } + + @Override + default void writeBytes(String s) throws IOException + { + int len = s.length(); + for (int i = 0 ; i < len ; i++) { + write((byte)s.charAt(i)); + } + } + + @Override + default void writeChars(String s) throws IOException + { + int len = s.length(); + for (int i = 0 ; i < len ; i++) { + int v = s.charAt(i); + write((v >>> 8) & 0xFF); + write((v >>> 0) & 0xFF); + } + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 844f6607966e..eae190a15e0f 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -195,7 +195,10 @@ public Object shutdownNow() @Override public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - return false; + boolean r = true; + r &= allocator.awaitTermination(timeout, units); + r &= closer.awaitTermination(timeout, units); + return r; } /** diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java b/src/java/org/apache/cassandra/locator/ReplicaLayout.java index 2e111dc9aa37..f961b4051d08 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java +++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java @@ -24,6 +24,8 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.TableId; @@ -405,6 +407,10 @@ static EndpointsForRange forLocalStrategyRange(ClusterMetadata metadata, Abstrac static EndpointsForToken forLocalStrategyToken(ClusterMetadata metadata, AbstractReplicationStrategy replicationStrategy, Token t) { - return replicationStrategy.calculateNaturalReplicas(t, metadata).forToken(t); + if (!(t instanceof LocalPartitioner.LocalToken)) + return replicationStrategy.calculateNaturalReplicas(t, metadata).forToken(t); + + // local tokens use a different partitioner than the global one... so update the ranges + return EndpointsForToken.of(t, new Replica(FBUtilities.getBroadcastAddressAndPort(), new Range<>(t, t), true)); } } diff --git a/src/java/org/apache/cassandra/schema/SchemaProvider.java b/src/java/org/apache/cassandra/schema/SchemaProvider.java index daef867951c2..844acfe31157 100644 --- a/src/java/org/apache/cassandra/schema/SchemaProvider.java +++ b/src/java/org/apache/cassandra/schema/SchemaProvider.java @@ -35,6 +35,7 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.KeyspaceNotDefinedException; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.exceptions.UnknownTableException; import org.apache.cassandra.io.sstable.Descriptor; @@ -135,6 +136,13 @@ default ColumnFamilyStore getColumnFamilyStoreInstance(TableId id) @Nullable TableMetadata getTableMetadata(TableId id); + @Nullable + default IPartitioner getTablePartitioner(TableId id) + { + TableMetadata metadata = getTableMetadata(id); + return metadata == null ? null : metadata.partitioner; + } + @Nullable default TableMetadataRef getTableMetadataRef(TableId id) { diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index c042c905ee18..def82d1c722a 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -36,6 +36,7 @@ import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.Pair; import static java.nio.charset.StandardCharsets.UTF_8; @@ -50,7 +51,8 @@ public class TableId implements Comparable { public static final long MAGIC = 1956074401491665062L; - // TODO: should this be a TimeUUID? + public static final long EMPTY_SIZE = ObjectSizes.measureDeep(new UUID(0, 0)); + private final UUID id; private TableId(UUID id) diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index d07dcfc87baf..304befe83a52 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -121,6 +121,11 @@ public boolean isComplete() return status().isComplete(); } + int lastQueriedEstimatedSizeOnHeap() + { + return lastQueriedEstimatedSizeOnHeap; + } + int estimatedSizeOnHeap(ToLongFunction estimator) { shouldUpdateSize = false; // TODO (expected): probably not the safest place to clear need to compute size @@ -600,6 +605,12 @@ public boolean isCompleteable() return isDone(); } + @Override + public V get() + { + return current; + } + @Override public State complete() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 4b0722606b83..2a67ba656d56 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -18,8 +18,7 @@ package org.apache.cassandra.service.accord; -import java.util.Collections; -import java.util.List; +import java.util.IdentityHashMap; import java.util.Map; import java.util.NavigableMap; import java.util.Objects; @@ -27,23 +26,19 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; -import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; - import javax.annotation.Nullable; +import accord.api.Key; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Agent; import accord.api.DataStore; -import accord.api.Key; import accord.api.ProgressLog; import accord.local.CommandsForKey; -import accord.impl.CommandsSummary; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -52,37 +47,28 @@ import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.SafeCommandStore; -import accord.local.SaveStatus; -import accord.local.SerializerSupport; import accord.local.SerializerSupport.MessageProvider; import accord.messages.Message; -import accord.primitives.AbstractKeys; -import accord.primitives.AbstractRanges; -import accord.primitives.Ballot; -import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.Routable; import accord.primitives.RoutableKey; -import accord.primitives.Routables; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.ReducingRangeMap; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; -import accord.utils.async.Observable; import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.async.ExecutionOrder; +import org.apache.cassandra.service.accord.events.CacheEvents; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; @@ -91,8 +77,12 @@ public class AccordCommandStore extends CommandStore implements CacheSize { private static final Logger logger = LoggerFactory.getLogger(AccordCommandStore.class); + private static final boolean CHECK_THREADS = CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.getBoolean(); + private static long getThreadId(ExecutorService executor) { + if (!CHECK_THREADS) + return 0; try { return executor.submit(() -> Thread.currentThread().getId()).get(); @@ -109,7 +99,7 @@ private static long getThreadId(ExecutorService executor) private final long threadId; public final String loggingId; - private final AccordJournal journal; + private final IJournal journal; private final ExecutorService executor; private final ExecutionOrder executionOrder; private final AccordStateCache stateCache; @@ -119,7 +109,7 @@ private static long getThreadId(ExecutorService executor) private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; - private CommandsForRanges commandsForRanges = new CommandsForRanges(); + private final CommandsForRangesLoader commandsForRangesLoader; public AccordCommandStore(int id, NodeTimeService time, @@ -127,12 +117,79 @@ public AccordCommandStore(int id, DataStore dataStore, ProgressLog.Factory progressLogFactory, EpochUpdateHolder epochUpdateHolder, - AccordJournal journal, + IJournal journal, AccordStateCacheMetrics cacheMetrics) { this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor(), cacheMetrics); } + private static void registerJfrListener(int id, AccordStateCache.Instance instance, String name) + { + if (!DatabaseDescriptor.getAccordStateCacheListenerJFREnabled()) + return; + instance.register(new AccordStateCache.Listener() { + private final IdentityHashMap, CacheEvents.Evict> pendingEvicts = new IdentityHashMap<>(); + + @Override + public void onAdd(AccordCachingState state) + { + CacheEvents.Add add = new CacheEvents.Add(); + CacheEvents.Evict evict = new CacheEvents.Evict(); + if (!add.isEnabled()) + return; + add.begin(); + evict.begin(); + add.store = evict.store = id; + add.instance = evict.instance = name; + add.key = evict.key = state.key().toString(); + updateMutable(instance, state, add); + add.commit(); + pendingEvicts.put(state, evict); + } + + @Override + public void onRelease(AccordCachingState state) + { + + } + + @Override + public void onEvict(AccordCachingState state) + { + CacheEvents.Evict event = pendingEvicts.remove(state); + if (event == null) return; + updateMutable(instance, state, event); + event.commit(); + } + }); + } + + private static void updateMutable(AccordStateCache.Instance instance, AccordCachingState state, CacheEvents event) + { + event.status = state.state().status().name(); + + event.lastQueriedEstimatedSizeOnHeap = state.lastQueriedEstimatedSizeOnHeap(); + + event.instanceAllocated = instance.weightedSize(); + AccordStateCache.Stats stats = instance.stats(); + event.instanceStatsQueries = stats.queries; + event.instanceStatsHits = stats.hits; + event.instanceStatsMisses = stats.misses; + + event.globalSize = instance.size(); + event.globalReferenced = instance.globalReferencedEntries(); + event.globalUnreferenced = instance.globalUnreferencedEntries(); + event.globalCapacity = instance.capacity(); + event.globalAllocated = instance.globalAllocated(); + + stats = instance.globalStats(); + event.globalStatsQueries = stats.queries; + event.globalStatsHits = stats.hits; + event.globalStatsMisses = stats.misses; + + event.update(); + } + @VisibleForTesting public AccordCommandStore(int id, NodeTimeService time, @@ -140,7 +197,7 @@ public AccordCommandStore(int id, DataStore dataStore, ProgressLog.Factory progressLogFactory, EpochUpdateHolder epochUpdateHolder, - AccordJournal journal, + IJournal journal, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, AccordStateCacheMetrics cacheMetrics) @@ -160,6 +217,7 @@ public AccordCommandStore(int id, this::saveCommand, this::validateCommand, AccordObjectSizes::command); + registerJfrListener(id, commandCache, "Command"); timestampsForKeyCache = stateCache.instance(Key.class, AccordSafeTimestampsForKey.class, @@ -168,6 +226,7 @@ public AccordCommandStore(int id, this::saveTimestampsForKey, this::validateTimestampsForKey, AccordObjectSizes::timestampsForKey); + registerJfrListener(id, timestampsForKeyCache, "TimestampsForKey"); commandsForKeyCache = stateCache.instance(Key.class, AccordSafeCommandsForKey.class, @@ -177,6 +236,9 @@ public AccordCommandStore(int id, this::validateCommandsForKey, AccordObjectSizes::commandsForKey, AccordCachingState::new); + registerJfrListener(id, commandsForKeyCache, "CommandsForKey"); + + this.commandsForRangesLoader = new CommandsForRangesLoader(this); AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { executor.submit(() -> { @@ -194,7 +256,6 @@ public AccordCommandStore(int id, })); executor.execute(() -> CommandStore.register(this)); - executor.execute(this::loadRangesToCommands); } static Factory factory(AccordJournal journal, AccordStateCacheMetrics cacheMetrics) @@ -203,66 +264,16 @@ static Factory factory(AccordJournal journal, AccordStateCacheMetrics cacheMetri new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, journal, cacheMetrics); } - private void loadRangesToCommands() + public CommandsForRangesLoader diskCommandsForRanges() { - AsyncPromise future = new AsyncPromise<>(); - AccordKeyspace.findAllCommandsByDomain(id, Routable.Domain.Range, ImmutableSet.of("txn_id", "status", "accepted_ballot", "execute_at"), new Observable() - { - private CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); - - @Override - public void onNext(UntypedResultSet.Row row) throws Exception - { - TxnId txnId = AccordKeyspace.deserializeTxnId(row); - SaveStatus status = AccordKeyspace.deserializeStatus(row); - Timestamp executeAt = AccordKeyspace.deserializeExecuteAtOrNull(row); - Ballot accepted = AccordKeyspace.deserializeAcceptedOrNull(row); - - MessageProvider messageProvider = journal.makeMessageProvider(txnId); - - SerializerSupport.TxnAndDeps txnAndDeps = SerializerSupport.extractTxnAndDeps(unsafeRangesForEpoch(), status, accepted, messageProvider); - Seekables keys = txnAndDeps.txn.keys(); - if (keys.domain() != Routable.Domain.Range) - throw new AssertionError(String.format("Txn keys are not range for %s", txnAndDeps.txn)); - Ranges ranges = (Ranges) keys; - - List dependsOn = txnAndDeps.deps == null ? Collections.emptyList() : txnAndDeps.deps.txnIds(); - builder.put(txnId, ranges, status, executeAt, dependsOn); - } - - @Override - public void onError(Throwable t) - { - builder = null; - future.tryFailure(t); - } - - @Override - public void onCompleted() - { - CommandsForRanges result = this.builder.build(); - builder = null; - future.trySuccess(result); - } - }); - try - { - commandsForRanges = future.get(); - logger.debug("Loaded {} intervals", commandsForRanges.size()); - } - catch (InterruptedException e) - { - throw new UncheckedInterruptedException(e); - } - catch (ExecutionException e) - { - throw new RuntimeException(e.getCause()); - } + return commandsForRangesLoader; } @Override public boolean inStore() { + if (!CHECK_THREADS) + return true; return Thread.currentThread().getId() == threadId; } @@ -298,6 +309,8 @@ public void checkInStoreThread() public void checkNotInStoreThread() { + if (!CHECK_THREADS) + return; Invariants.checkState(!inStore()); } @@ -320,7 +333,6 @@ public AccordStateCache.Instance { return commandsForKeyCache; } - Command loadCommand(TxnId txnId) { return AccordKeyspace.loadCommand(this, txnId); @@ -467,13 +479,16 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, NavigableMap timestampsForKeys, - NavigableMap commandsForKeys) + NavigableMap commandsForKeys, + @Nullable AccordSafeCommandsForRanges commandsForRanges) { Invariants.checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); commandsForKeys.values().forEach(AccordSafeState::preExecute); timestampsForKeys.values().forEach(AccordSafeState::preExecute); - current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, commandsForKeys, this); + if (commandsForRanges != null) + commandsForRanges.preExecute(); + current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, commandsForKeys, commandsForRanges, this); return current; } @@ -489,46 +504,6 @@ public void completeOperation(AccordSafeCommandStore store) current = null; } - O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) - { - keysOrRanges = keysOrRanges.slice(slice, Routables.Slice.Minimal); - switch (keysOrRanges.domain()) - { - case Key: - { - AbstractKeys keys = (AbstractKeys) keysOrRanges; - for (CommandsSummary summary : commandsForRanges.search(keys)) - accumulate = map.apply(summary, accumulate); - } - break; - case Range: - { - AbstractRanges ranges = (AbstractRanges) keysOrRanges; - for (Range range : ranges) - { - CommandsSummary summary = commandsForRanges.search(range); - if (summary == null) - continue; - accumulate = map.apply(summary, accumulate); - } - } - break; - default: - throw new AssertionError("Unknown domain: " + keysOrRanges.domain()); - } - return accumulate; - } - - CommandsForRanges commandsForRanges() - { - return commandsForRanges; - } - - CommandsForRanges.Updater updateRanges() - { - return commandsForRanges.update(); - } - public void abortCurrentOperation() { current = null; @@ -576,13 +551,6 @@ protected void setRedundantBefore(RedundantBefore newRedundantBefore) AccordKeyspace.updateRedundantBefore(this, newRedundantBefore); } - @Override - public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ranges ranges) - { - super.markShardDurable(safeStore, globalSyncId, ranges); - commandsForRanges.prune(globalSyncId, ranges); - } - public NavigableMap bootstrapBeganAt() { return super.bootstrapBeganAt(); } public NavigableMap safeToRead() { return super.safeToRead(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 26e09ada14eb..0562da11396d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; @@ -27,6 +28,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.function.BiConsumer; import java.util.function.Predicate; import java.util.zip.Checksum; @@ -99,6 +101,7 @@ import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.concurrent.Semaphore; import org.jctools.queues.SpscLinkedQueue; @@ -143,7 +146,7 @@ import static org.apache.cassandra.utils.concurrent.Semaphore.newSemaphore; import static org.apache.cassandra.utils.vint.VIntCoding.computeUnsignedVIntSize; -public class AccordJournal implements Shutdownable +public class AccordJournal implements IJournal, Shutdownable { private static final Logger logger = LoggerFactory.getLogger(AccordJournal.class); @@ -262,8 +265,15 @@ public Object shutdownNow() @Override public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - // TODO (expected, other) - return true; + try + { + ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(journal, frameAggregator, frameApplicator)); + return true; + } + catch (TimeoutException e) + { + return false; + } } /** @@ -296,6 +306,7 @@ public void appendLocalRequest(LocalRequest request) } @VisibleForTesting + @Override public void appendMessageBlocking(Message message) { Type type = Type.fromMessageType(message.type()); @@ -1061,7 +1072,7 @@ private static int msVersion(int version) * Once written, the frame record is submitted to {@link FrameApplicator}, which will process all the framed * requests once the frame has been flushed to disk. */ - private final class FrameAggregator implements Interruptible.Task + private final class FrameAggregator implements Interruptible.Task, Shutdownable { /* external MPSC pending request queue */ private final ManyToOneConcurrentLinkedQueue unframedRequests = new ManyToOneConcurrentLinkedQueue<>(); @@ -1091,9 +1102,26 @@ void start() executor = executorFactory().infiniteLoop("AccordJournal#FrameAggregator", this, SAFE, NON_DAEMON, SYNCHRONIZED); } - void shutdown() + @Override + public boolean isTerminated() { + return executor == null || executor.isTerminated(); + } + + @Override + public void shutdown() { - executor.shutdown(); + if (executor != null) + executor.shutdown(); + } + + @Override + public Object shutdownNow() { + return executor == null ? null : executor.shutdownNow(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { + return executor == null || executor.awaitTermination(timeout, units); } @Override @@ -1168,7 +1196,7 @@ private void doRun() * Gets the aggregated frames containing previously written requests/messages, * and sorts and "applies" them once part of the journal that fully contains them is flushed. */ - private final class FrameApplicator implements Runnable + private final class FrameApplicator implements Runnable, Shutdownable { /** external SPSC written frame queue */ private final SpscLinkedQueue newFrames = new SpscLinkedQueue<>(); @@ -1199,9 +1227,26 @@ void start() executor = executorFactory().sequential("AccordJournal#FrameApplicator"); } - void shutdown() + @Override + public boolean isTerminated() { + return executor == null || executor.isTerminated(); + } + + @Override + public void shutdown() { - executor.shutdown(); + if (executor != null) + executor.shutdown(); + } + + @Override + public Object shutdownNow() { + return executor == null ? null : executor.shutdownNow(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { + return executor == null || executor.awaitTermination(timeout, units); } @Override @@ -1334,8 +1379,8 @@ public FrameRecord deserialize(Key key, DataInputPlus in, int userVersion) throw /* * Message provider implementation */ - - SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) + @Override + public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) { return LOG_MESSAGE_PROVIDER ? new LoggingMessageProvider(txnId, new MessageProvider(txnId)) : new MessageProvider(txnId); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index da832ae2cbf1..7138eb9ab2d9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -27,7 +27,10 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Objects; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executor; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -37,6 +40,7 @@ import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -118,17 +122,23 @@ import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.transform.FilteredPartitions; import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.accord.RouteIndex; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.LocalVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.IndexMetadata; +import org.apache.cassandra.schema.Indexes; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; @@ -138,7 +148,9 @@ import org.apache.cassandra.serializers.UUIDSerializer; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.AccordConfigurationService.SyncStatus; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.AccordRoutingKeyByteSource; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; @@ -192,6 +204,15 @@ public class AccordKeyspace private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); + //TODO (now, performance): should this be partitioner rather than TableId? As of this patch distributed tables should only have 1 partitioner... + private static final ConcurrentMap TABLE_SERIALIZERS = new ConcurrentHashMap<>(); + + // Schema needs all system keyspace, and this is a system keyspace! So can not touch schema in init + private static class SchemaHolder + { + private static SchemaProvider schema = Objects.requireNonNull(Schema.instance); + } + private enum TokenType { Murmur3((byte) 1), @@ -216,7 +237,7 @@ static TokenType valueOf(Token token) } // TODO: store timestamps as blobs (confirm there are no negative numbers, or offset) - private static final TableMetadata Commands = + public static final TableMetadata Commands = parse(COMMANDS, "accord commands", "CREATE TABLE %s (" @@ -234,10 +255,13 @@ static TokenType valueOf(Token token) + "PRIMARY KEY((store_id, domain, txn_id))" + ')') .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, Int32Type.instance, TIMESTAMP_TYPE))) + .indexes(Indexes.builder() + .add(IndexMetadata.fromSchemaMetadata("route", IndexMetadata.Kind.CUSTOM, ImmutableMap.of("class_name", RouteIndex.class.getCanonicalName(), "target", "route"))) + .build()) .build(); // TODO: naming is not very clearly distinct from the base serializers - private static class LocalVersionedSerializers + public static class LocalVersionedSerializers { static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); @@ -266,8 +290,8 @@ public static class CommandsColumns { static final ClusteringComparator keyComparator = Commands.partitionKeyAsClusteringComparator(); static final CompositeType partitionKeyType = (CompositeType) Commands.partitionKeyType; - static final ColumnMetadata txn_id = getColumn(Commands, "txn_id"); - static final ColumnMetadata store_id = getColumn(Commands, "store_id"); + public static final ColumnMetadata txn_id = getColumn(Commands, "txn_id"); + public static final ColumnMetadata store_id = getColumn(Commands, "store_id"); public static final ColumnMetadata status = getColumn(Commands, "status"); public static final ColumnMetadata route = getColumn(Commands, "route"); public static final ColumnMetadata durability = getColumn(Commands, "durability"); @@ -298,6 +322,12 @@ public static int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } + public static int getStoreId(DecoratedKey pk) + { + ByteBuffer[] array = splitPartitionKey(pk); + return getStoreId(array); + } + public static TxnId getTxnId(ByteBuffer[] partitionKeyComponents) { return deserializeTimestampOrNull(partitionKeyComponents[txn_id.position()], ByteBufferAccessor.instance, TxnId::fromBits); @@ -409,6 +439,8 @@ public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCe } } + //TODO (now, performance): do we actually care about the sort ordering? We don't do range scans on this table + //TODO (now, performance): should we remove key_token? We don't need it so its just added space private static final TableMetadata TimestampsForKeys = parse(TIMESTAMPS_FOR_KEY, "accord timestamps per key", @@ -578,6 +610,11 @@ public int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } + public PartitionKey getKey(DecoratedKey key) + { + return getKey(splitPartitionKey(key)); + } + public PartitionKey getKey(ByteBuffer[] partitionKeyComponents) { return deserializeKey(partitionKeyComponents[key.position()]); @@ -860,12 +897,13 @@ private static V serializeToken(Token token, ValueAccessor accessor) return value; } - private static ByteBuffer serializeKey(PartitionKey key) + @VisibleForTesting + public static ByteBuffer serializeKey(PartitionKey key) { return KEY_TYPE.pack(UUIDSerializer.instance.serialize(key.table().asUUID()), key.partitionKey().getKey()); } - private static ByteBuffer serializeTimestamp(Timestamp timestamp) + public static ByteBuffer serializeTimestamp(Timestamp timestamp) { return TIMESTAMP_TYPE.pack(bytes(timestamp.msb), bytes(timestamp.lsb), bytes(timestamp.node.id)); } @@ -1097,15 +1135,15 @@ protected UntypedResultSet query(UntypedResultSet.Row lastSeen) } public static void findAllKeysBetween(int commandStore, - Token start, boolean startInclusive, - Token end, boolean endInclusive, + AccordRoutingKey start, boolean startInclusive, + AccordRoutingKey end, boolean endInclusive, Observable callback) { //TODO (optimize) : CQL doesn't look smart enough to only walk Index.db, and ends up walking the Data.db file for each row in the partitions found (for frequent keys, this cost adds up) // it would be possible to find all SSTables that "could" intersect this range, then have a merge iterator over the Index.db (filtered to the range; index stores partition liveness)... KeysBetween work = new KeysBetween(commandStore, - AccordKeyspace.serializeToken(start), startInclusive, - AccordKeyspace.serializeToken(end), endInclusive, + AccordKeyspace.serializeRoutingKey(start), startInclusive, + AccordKeyspace.serializeRoutingKey(end), endInclusive, ImmutableSet.of("key"), Stage.READ.executor(), Observable.distinct(callback).map(AccordKeyspace::deserializeKey)); work.schedule(); @@ -1131,14 +1169,14 @@ private KeysBetween(int storeId, this.start = start; this.end = end; - String selection = selection(TimestampsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); + String selection = selection(CommandsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); this.cqlFirst = format("SELECT DISTINCT %s\n" + "FROM %s\n" + "WHERE store_id = ?\n" + (startInclusive ? " AND key_token >= ?\n" : " AND key_token > ?\n") + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + "ALLOW FILTERING", - selection, TimestampsForKeys); + selection, CommandsForKeys); this.cqlContinue = format("SELECT DISTINCT %s\n" + "FROM %s\n" + "WHERE store_id = ?\n" + @@ -1146,7 +1184,7 @@ private KeysBetween(int storeId, " AND key > ?\n" + (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + "ALLOW FILTERING", - selection, TimestampsForKeys); + selection, CommandsForKeys); } @Override @@ -1216,11 +1254,16 @@ public static Status.Durability deserializeDurability(UntypedResultSet.Row row) return Status.Durability.values()[row.getInt("durability", 0)]; } - private static Route deserializeRouteOrNull(ByteBuffer bytes) throws IOException + public static Route deserializeRouteOrNull(ByteBuffer bytes) throws IOException { return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, LocalVersionedSerializers.route) : null; } + public static ByteBuffer serializeRoute(Route route) throws IOException + { + return serialize(route, LocalVersionedSerializers.route); + } + private static Route deserializeRouteOrNull(UntypedResultSet.Row row) throws IOException { return deserializeRouteOrNull(row.getBlob("route")); @@ -1303,10 +1346,10 @@ public static PartitionKey deserializeKey(ByteBuffer buffer) TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(split.get(0))); ByteBuffer key = split.get(1); - TableMetadata metadata = Schema.instance.getTableMetadata(tableId); - if (metadata == null) + IPartitioner partitioner = SchemaHolder.schema.getTablePartitioner(tableId); + if (partitioner == null) throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); - return new PartitionKey(tableId, metadata.partitioner.decorateKey(key)); + return new PartitionKey(tableId, partitioner.decorateKey(key)); } public static PartitionKey deserializeKey(UntypedResultSet.Row row) @@ -1390,16 +1433,36 @@ public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore com private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId, PartitionKey key) { - Token token = key.token(); ByteBuffer pk = accessor.keyComparator.make(storeId, - serializeToken(token), + serializeRoutingKey(key.toUnseekable()), serializeKey(key)).serializeAsPartitionKey(); return accessor.table.partitioner.decorateKey(pk); } + @VisibleForTesting + public static ByteBuffer serializeRoutingKey(AccordRoutingKey routingKey) + { + AccordRoutingKeyByteSource.Serializer serializer = TABLE_SERIALIZERS.computeIfAbsent(routingKey.table(), ignore -> { + IPartitioner partitioner; + if (routingKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) + partitioner = routingKey.asTokenKey().token().getPartitioner(); + else + partitioner = SchemaHolder.schema.getTablePartitioner(routingKey.table()); + return AccordRoutingKeyByteSource.variableLength(partitioner); + }); + byte[] bytes = serializer.serialize(routingKey); + return ByteBuffer.wrap(bytes); + } + private static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, CommandsForKey commandsForKey, long timestampMicros) { ByteBuffer bytes = CommandsForKeySerializer.toBytesWithoutKey(commandsForKey); + return getCommandsForKeyPartitionUpdate(storeId, key, timestampMicros, bytes); + } + + @VisibleForTesting + public static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, long timestampMicros, ByteBuffer bytes) + { return singleRowUpdate(CommandsForKeysAccessor.table, makeKey(CommandsForKeysAccessor, storeId, key), singleCellRow(Clustering.EMPTY, BufferCell.live(CommandsForKeysAccessor.data, timestampMicros, bytes))); @@ -1832,4 +1895,19 @@ public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer consumer.accept(rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead); } + @VisibleForTesting + public static void unsafeSetSchema(SchemaProvider provider) + { + SchemaHolder.schema = provider; + } + + @VisibleForTesting + public static void unsafeClear() + { + for (ColumnFamilyStore store : Keyspace.open(SchemaConstants.ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) + store.truncateBlockingWithoutSnapshot(); + TABLE_SERIALIZERS.clear(); + SchemaHolder.schema = Schema.instance; + } + } diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 73827a3771d4..d72644811afc 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -222,7 +222,7 @@ public void send(Node.Id to, Request request) Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); - logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); + logger.trace("Sending {} {} to {}", verb, message.payload, endpoint); messaging.send(message, endpoint); } @@ -233,7 +233,7 @@ public void send(Node.Id to, Request request, AgentExecutor executor, Callback c Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); Message message = Message.out(verb, request); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); - logger.debug("Sending {} {} to {}", verb, message.payload, endpoint); + logger.trace("Sending {} {} to {}", verb, message.payload, endpoint); messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback, endpointMapper)); } @@ -246,7 +246,7 @@ public void reply(Node.Id replyingToNode, ReplyContext replyContext, Reply reply responseMsg = responseMsg.withFlag(MessageFlag.NOT_FINAL); checkReplyType(reply, respondTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); - logger.debug("Replying {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); + logger.trace("Replying {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); messaging.send(responseMsg, endpoint); } @@ -256,7 +256,7 @@ public void replyWithUnknownFailure(Node.Id replyingToNode, ReplyContext replyCo ResponseContext respondTo = (ResponseContext) replyContext; Message responseMsg = Message.failureResponse(RequestFailureReason.UNKNOWN, failure, respondTo); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(replyingToNode); - logger.debug("Replying with failure {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); + logger.trace("Replying with failure {} {} to {}", responseMsg.verb(), responseMsg.payload, endpoint); messaging.send(responseMsg, endpoint); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 1c497cc5c94f..9f4776c2e96a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -18,12 +18,9 @@ package org.apache.cassandra.service.accord; -import java.util.Collections; -import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.function.BiFunction; - import javax.annotation.Nullable; import accord.api.Agent; @@ -33,12 +30,13 @@ import accord.impl.AbstractSafeCommandStore; import accord.local.CommandsForKey; import accord.impl.CommandsSummary; -import accord.local.Command; import accord.local.CommandStores.RangesForEpoch; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.primitives.AbstractKeys; +import accord.primitives.AbstractRanges; import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routables; import accord.primitives.Seekables; @@ -46,27 +44,27 @@ import accord.primitives.Txn; import accord.primitives.TxnId; -import static accord.primitives.Routable.Domain.Range; - public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; private final NavigableMap commandsForKeys; private final NavigableMap timestampsForKeys; + private final @Nullable AccordSafeCommandsForRanges commandsForRanges; private final AccordCommandStore commandStore; private final RangesForEpoch ranges; - CommandsForRanges.Updater rangeUpdates = null; public AccordSafeCommandStore(PreLoadContext context, Map commands, NavigableMap timestampsForKey, NavigableMap commandsForKey, + @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { super(context); this.commands = commands; this.timestampsForKeys = timestampsForKey; this.commandsForKeys = commandsForKey; + this.commandsForRanges = commandsForRanges; this.commandStore = commandStore; this.ranges = commandStore.updateRangesForEpoch(); } @@ -185,27 +183,57 @@ public void registerHistoricalTransactions(Deps deps) get(key).registerHistorical(this, txnId); }); }); - CommandsForRanges commandsForRanges = commandStore.commandsForRanges(); - deps.rangeDeps.forEachUniqueTxnId(allRanges, txnId -> { - if (commandsForRanges.containsLocally(txnId)) - return; - - Ranges ranges = deps.rangeDeps.ranges(txnId); - if (this.ranges.coordinates(txnId).intersects(ranges)) - return; // already coordinates, no need to replicate - if (!this.ranges.allBefore(txnId.epoch()).intersects(ranges)) - return; + for (int i = 0; i < deps.rangeDeps.rangeCount(); i++) + { + Range range = deps.rangeDeps.range(i); + if (!allRanges.intersects(range)) + continue; + deps.rangeDeps.forEach(range, txnId -> { + // TODO (desired, efficiency): this can be made more efficient by batching by epoch + if (ranges.coordinates(txnId).intersects(range)) + return; // already coordinates, no need to replicate + if (!ranges.allBefore(txnId.epoch()).intersects(range)) + return; - updateRanges().mergeRemote(txnId, ranges.slice(allRanges), Ranges::with); - }); + commandStore.diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(range).slice(allRanges), Ranges::with); + }); + } } private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) { - accumulate = commandStore.mapReduceForRange(keysOrRanges, slice, map, accumulate); + accumulate = mapReduceForRange(keysOrRanges, slice, map, accumulate); return mapReduceForKey(keysOrRanges, slice, map, accumulate); } + private O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) + { + if (commandsForRanges == null) + return accumulate; + switch (keysOrRanges.domain()) + { + case Key: + { + AbstractKeys keys = (AbstractKeys) keysOrRanges.slice(slice, Routables.Slice.Minimal); + if (!commandsForRanges.ranges().intersects(keys)) + return accumulate; + accumulate = map.apply(commandsForRanges.current(), accumulate); + } + break; + case Range: + { + AbstractRanges ranges = (AbstractRanges) keysOrRanges.slice(slice, Routables.Slice.Minimal); + if (!commandsForRanges.ranges().intersects(ranges)) + return accumulate; + accumulate = map.apply(commandsForRanges.current(), accumulate); + } + break; + default: + throw new AssertionError("Unknown domain: " + keysOrRanges.domain()); + } + return accumulate; + } + private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) { switch (keysOrRanges.domain()) @@ -260,33 +288,6 @@ public T mapReduceFull(Seekables keysOrRanges, Ranges slice, TxnId }, accumulate); } - @Override - protected void update(Command prev, Command updated) - { - super.update(prev, updated); - - if (updated.txnId().domain() == Range && CommandsForKey.needsUpdate(prev, updated)) - { - Seekables keysOrRanges = updated.keysOrRanges(); - if (keysOrRanges == null) keysOrRanges = prev.keysOrRanges(); - if (keysOrRanges == null) - return; - - List waitingOn; - // TODO (required): this is faulty: we cannot simply save the raw transaction ids, as they may be for other ranges - if (updated.partialDeps() == null) waitingOn = Collections.emptyList(); - else waitingOn = updated.partialDeps().txnIds(); - updateRanges().put(updated.txnId(), (Ranges)keysOrRanges, updated.saveStatus(), updated.executeAt(), waitingOn); - } - } - - protected CommandsForRanges.Updater updateRanges() - { - if (rangeUpdates == null) - rangeUpdates = commandStore.updateRanges(); - return rangeUpdates; - } - @Override protected void invalidateSafeState() { @@ -297,14 +298,14 @@ protected void invalidateSafeState() public void postExecute(Map commands, Map timestampsForKey, - Map commandsForKeys - ) + Map commandsForKeys, + @Nullable AccordSafeCommandsForRanges commandsForRanges) { postExecute(); commands.values().forEach(AccordSafeState::postExecute); timestampsForKey.values().forEach(AccordSafeState::postExecute); commandsForKeys.values().forEach(AccordSafeState::postExecute); - if (rangeUpdates != null) - rangeUpdates.apply(); + if (commandsForRanges != null) + commandsForRanges.postExecute(); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java new file mode 100644 index 000000000000..42fb0f6ef1b3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.NavigableMap; +import java.util.Objects; + +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.utils.Pair; + +public class AccordSafeCommandsForRanges implements AccordSafeState +{ + private final AsyncResult>> chain; + private final Ranges ranges; + private boolean invalidated; + private CommandsForRanges original, current; + + public AccordSafeCommandsForRanges(Ranges ranges, AsyncResult>> chain) + { + this.ranges = ranges; + this.chain = chain; + } + + public Ranges ranges() + { + return ranges; + } + + @Override + public CommandsForRanges current() + { + checkNotInvalidated(); + return current; + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } + + @Override + public void set(CommandsForRanges update) + { + throw new UnsupportedOperationException(); + } + + @Override + public CommandsForRanges original() + { + checkNotInvalidated(); + return original; + } + + @Override + public void preExecute() + { + checkNotInvalidated(); + Pair> pair = AsyncChains.getUnchecked(chain); + pair.left.close(); + pair.left.get().entrySet().forEach(e -> pair.right.put(e.getKey(), e.getValue())); + current = original = new CommandsForRanges(ranges, pair.right); + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + } + + @Override + public AccordCachingState global() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordSafeCommandsForRanges that = (AccordSafeCommandsForRanges) o; + return Objects.equals(original, that.original) && Objects.equals(current, that.current); + } + + @Override + public int hashCode() + { + return Objects.hash(original, current); + } + + @Override + public String toString() + { + return "AccordSafeCommandsForRange{" + + "chain=" + chain + + ", invalidated=" + invalidated + + ", original=" + original + + ", current=" + current + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 8378f65c91dd..04ef7e6355b6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -146,6 +146,12 @@ public long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.R throw new UnsupportedOperationException("No accord barriers should be executed when accord.enabled = false in cassandra.yaml"); } + @Override + public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + { + throw new UnsupportedOperationException(); + } + @Override public @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, @Nonnull Dispatcher.RequestTime requestTime) { @@ -213,6 +219,12 @@ public static void unsafeSetNewAccordService() instance = null; } + @VisibleForTesting + public static void unsafeSetNoop() + { + instance = NOOP_SERVICE; + } + public static boolean isSetup() { return instance != null; diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 1196089d62df..b76d63b83080 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -17,8 +17,11 @@ */ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.function.BiFunction; import java.util.function.Function; @@ -68,9 +71,9 @@ public static void validateLoadOnEvict(boolean value) static class Stats { - private long queries; - private long hits; - private long misses; + long queries; + long hits; + long misses; } private ImmutableList> instances = ImmutableList.of(); @@ -83,6 +86,7 @@ static class Stats @VisibleForTesting final AccordStateCacheMetrics metrics; + final Stats stats = new Stats(); public AccordStateCache(ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, long maxSizeInBytes, AccordStateCacheMetrics metrics) { @@ -141,34 +145,41 @@ private void maybeEvictSomeNodes() while (iter.hasNext() && bytesCached > maxSizeInBytes) { AccordCachingState node = iter.next(); - checkState(node.references == 0); - - /* - * TODO (expected, efficiency): - * can this be reworked so we're not skipping unevictable nodes everytime we try to evict? - */ - Status status = node.status(); // status() call completes (if completeable) - switch (status) - { - default: throw new IllegalStateException("Unhandled status " + status); - case LOADED: - unlink(node); - evict(node); - break; - case MODIFIED: - // schedule a save to disk, keep linked and in the cache map - Instance instance = instanceForNode(node); - node.save(saveExecutor, instance.saveFunction); - maybeUpdateSize(node, instance.heapEstimator); - break; - case SAVING: - // skip over until completes to LOADED or FAILED_TO_SAVE - break; - case FAILED_TO_SAVE: - // TODO (consider): panic when a save fails - // permanently unlink, but keep in the map - unlink(node); - } + maybeEvict(node); + } + } + + @VisibleForTesting + public boolean maybeEvict(AccordCachingState node) + { + checkState(node.references == 0); + + /* + * TODO (expected, efficiency): + * can this be reworked so we're not skipping unevictable nodes everytime we try to evict? + */ + Status status = node.status(); // status() call completes (if completeable) + switch (status) + { + default: throw new IllegalStateException("Unhandled status " + status); + case LOADED: + unlink(node); + evict(node); + return true; + case MODIFIED: + // schedule a save to disk, keep linked and in the cache map + Instance instance = instanceForNode(node); + node.save(saveExecutor, instance.saveFunction); + maybeUpdateSize(node, instance.heapEstimator); + return false; + case SAVING: + // skip over until completes to LOADED or FAILED_TO_SAVE + return false; + case FAILED_TO_SAVE: + // TODO (consider): panic when a save fails + // permanently unlink, but keep in the map + unlink(node); + return false; } } @@ -189,12 +200,14 @@ private void evict(AccordCachingState node) instance.bytesCached -= node.lastQueriedEstimatedSizeOnHeap; if (node.status() == LOADED && VALIDATE_LOAD_ON_EVICT) - instanceForNode(node).validateLoadEvicted(node); + instance.validateLoadEvicted(node); if (!node.hasListeners()) { AccordCachingState self = instances.get(node.index).cache.remove(node.key()); checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); + if (instance.listeners != null) + instance.listeners.forEach(l -> l.onEvict((AccordCachingState) node)); } else { @@ -240,7 +253,19 @@ public > Instance instance( return instance(keyClass, valClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator, AccordCachingState.defaultFactory()); } - public class Instance> implements CacheSize + public Collection> instances() + { + return instances; + } + + public interface Listener + { + default void onAdd(AccordCachingState state) {} + default void onRelease(AccordCachingState state) {} + default void onEvict(AccordCachingState state) {} + } + + public class Instance> implements CacheSize, Iterable> { private final int index; private final Class keyClass; @@ -257,6 +282,7 @@ public class Instance> implements CacheSiz private final Stats stats = new Stats(); private final Map> cache = new HashMap<>(); private final AccordCachingState.Factory nodeFactory; + private List> listeners = null; public Instance( int index, Class keyClass, @@ -278,13 +304,36 @@ public Instance( this.nodeFactory = nodeFactory; } + public void register(Listener l) + { + if (listeners == null) + listeners = new ArrayList<>(); + listeners.add(l); + } + + public void unregister(Listener l) + { + if (listeners == null) + throw new AssertionError("No listeners exist"); + if (!listeners.remove(l)) + throw new AssertionError("Listener was not registered"); + if (listeners.isEmpty()) + listeners = null; + } + public Stream> stream() { return cache.entrySet().stream() - .filter(e -> keyClass.isAssignableFrom(e.getKey().getClass())) + .filter(e -> instanceForNode(e.getValue()) == this) .map(e -> (AccordCachingState) e.getValue()); } + @Override + public Iterator> iterator() + { + return stream().iterator(); + } + public S acquireOrInitialize(K key, Function valueFactory) { incrementCacheQueries(); @@ -295,6 +344,11 @@ public S acquireOrInitialize(K key, Function valueFactory) node = nodeFactory.create(key, index); node.initialize(valueFactory.apply(key)); cache.put(key, node); + if (listeners != null) + { + AccordCachingState finalNode = node; + listeners.forEach(l -> l.onAdd(finalNode)); + } } AccordCachingState acquired = acquireExisting(node, true); Invariants.checkState(acquired != null, "%s could not be acquired", node); @@ -350,6 +404,8 @@ private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) node.references++; cache.put(key, node); + if (listeners != null) + listeners.forEach(l -> l.onAdd(node)); maybeUpdateSize(node, heapEstimator); metrics.objectSize.update(node.lastQueriedEstimatedSizeOnHeap); maybeEvictSomeNodes(); @@ -402,6 +458,9 @@ public void release(S safeRef) maybeUpdateSize(node, heapEstimator); + if (listeners != null) + listeners.forEach(l -> l.onRelease(node)); + if (--node.references == 0) { Status status = node.status(); // status() completes @@ -508,18 +567,34 @@ private void incrementCacheQueries() { instanceMetrics.requests.mark(); metrics.requests.mark(); + stats.queries++; + AccordStateCache.this.stats.queries++; } private void incrementCacheHits() { instanceMetrics.hits.mark(); metrics.hits.mark(); + stats.hits++; + AccordStateCache.this.stats.hits++; } private void incrementCacheMisses() { instanceMetrics.misses.mark(); metrics.misses.mark(); + stats.misses++; + AccordStateCache.this.stats.misses++; + } + + public Stats stats() + { + return stats; + } + + public Stats globalStats() + { + return AccordStateCache.this.stats; } @VisibleForTesting @@ -557,6 +632,30 @@ public long weightedSize() { return bytesCached; } + + public long globalAllocated() + { + return AccordStateCache.this.bytesCached; + } + + public int globalReferencedEntries() + { + return AccordStateCache.this.numReferencedEntries(); + } + + public int globalUnreferencedEntries() + { + return AccordStateCache.this.numUnreferencedEntries(); + } + + @Override + public String toString() + { + return "Instance{" + + "index=" + index + + ", keyClass=" + keyClass + + '}'; + } } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 8738a5d7caa0..e47fea9d6a26 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -47,7 +47,7 @@ public void doVerb(Message message) throws IOException { // TODO (desired): need a non-blocking way to inform CMS of an unknown epoch and add callback to it's receipt // ClusterMetadataService.instance().maybeCatchup(message.epoch()); - logger.debug("Receiving {} from {}", message.payload, message.from()); + logger.trace("Receiving {} from {}", message.payload, message.from()); T request = message.payload; if (request.type().hasSideEffects()) diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 75a069b03a7d..720013762a69 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -19,49 +19,25 @@ package org.apache.cassandra.service.accord; import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.Set; +import java.util.NavigableMap; import java.util.TreeMap; -import java.util.TreeSet; -import java.util.function.BiFunction; -import java.util.function.Function; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.AbstractIterator; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.ImmutableSortedMap; - -import accord.api.Key; -import accord.api.RoutingKey; -import accord.local.CommandsForKey; import accord.impl.CommandsSummary; -import accord.local.Command; +import accord.local.SafeCommandStore.CommandFunction; +import accord.local.SafeCommandStore.TestDep; +import accord.local.SafeCommandStore.TestStartedAt; +import accord.local.SafeCommandStore.TestStatus; import accord.local.SaveStatus; -import accord.primitives.AbstractKeys; import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.RoutableKey; -import accord.primitives.Seekable; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.utils.Invariants; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.utils.Interval; -import org.apache.cassandra.utils.IntervalTree; -import static accord.local.SafeCommandStore.*; import static accord.local.SafeCommandStore.TestDep.ANY_DEPS; import static accord.local.SafeCommandStore.TestDep.WITH; import static accord.local.SafeCommandStore.TestStartedAt.STARTED_BEFORE; @@ -69,509 +45,119 @@ import static accord.local.Status.Stable; import static accord.local.Status.Truncated; -public class CommandsForRanges +public class CommandsForRanges implements CommandsSummary { - public enum TxnType - { - UNKNOWN, LOCAL, REMOTE; - - private boolean isSafeToMix(TxnType other) - { - if (this == UNKNOWN || other == UNKNOWN) return true; - return this == other; - } - } + private final Ranges ranges; + private final NavigableMap map; - public static final class RangeCommandSummary implements Comparable + public CommandsForRanges(Ranges ranges, NavigableMap map) { - public final TxnId txnId; - public final Ranges ranges; - public final SaveStatus status; - public final @Nullable Timestamp executeAt; - public final List deps; - - RangeCommandSummary(TxnId txnId, Ranges ranges, SaveStatus status, @Nullable Timestamp executeAt, List deps) - { - this.txnId = txnId; - this.ranges = ranges; - this.status = status; - this.executeAt = executeAt; - this.deps = deps; - } - - public boolean equalsDeep(RangeCommandSummary other) - { - return Objects.equals(txnId, other.txnId) - && Objects.equals(ranges, other.ranges) - && status == other.status - && Objects.equals(executeAt, other.executeAt) - && Objects.equals(deps, other.deps); - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - RangeCommandSummary that = (RangeCommandSummary) o; - return txnId.equals(that.txnId); - } - - @Override - public int hashCode() - { - return Objects.hash(txnId); - } - - @Override - public String toString() - { - return "RangeCommandSummary{" + - "txnId=" + txnId + - ", status=" + status + - ", ranges=" + ranges + - '}'; - } - - public RangeCommandSummary withRanges(Ranges ranges, BiFunction remappingFunction) - { - return new RangeCommandSummary(txnId, remappingFunction.apply(this.ranges, ranges), status, executeAt, deps); - } - - @Override - public int compareTo(RangeCommandSummary other) - { - // Used in IntervalTree with the expecation that compareTo uniquely identifies an RangeCommandSummary - return txnId.compareTo(other.txnId); - } + this.ranges = ranges; + this.map = (NavigableMap) (NavigableMap) map; } - public static abstract class AbstractBuilder> + @Override + public T mapReduceFull(TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - protected final Set localTxns = new HashSet<>(); - protected final TreeMap txnToRange = new TreeMap<>(); - protected final IntervalTree.Builder> rangeToTxn = new IntervalTree.Builder<>(); - - public TxnType type(TxnId txnId) - { - if (!txnToRange.containsKey(txnId)) return TxnType.UNKNOWN; - return localTxns.contains(txnId) ? TxnType.LOCAL : TxnType.REMOTE; - } - - public T put(TxnId txnId, Ranges ranges, SaveStatus status, Timestamp execteAt, List dependsOn) - { - remove(txnId); - put(new RangeCommandSummary(txnId, ranges, status, execteAt, dependsOn)); - //noinspection unchecked - return (T) this; - } - - private void put(RangeCommandSummary summary) - { - TxnId txnId = summary.txnId; - localTxns.add(txnId); - txnToRange.put(txnId, summary); - addRanges(summary); - } - - private void addRanges(RangeCommandSummary summary) - { - for (Range range : summary.ranges) - rangeToTxn.add(Interval.create(normalize(range.start(), range.startInclusive(), true), - normalize(range.end(), range.endInclusive(), false), - summary)); - } - - public T putAll(CommandsForRanges other) - { - for (TxnId id : other.localCommands) - { - TxnType thisType = type(id); - TxnType otherType = other.type(id); - Invariants.checkArgument(thisType.isSafeToMix(otherType), "Attempted to add %s; expected %s but was %s", id, thisType, otherType); - } - localTxns.addAll(other.localCommands); - txnToRange.putAll(other.commandsToRanges); - // If "put" was called before for a txn present in "other", to respect the "put" semantics that update must - // be removed from "rangeToTxn" (as it got removed from "txnToRange"). - // The expected common case is that this method is called on an empty builder, so the removeIf is off an - // empty list (aka no-op) - rangeToTxn.removeIf(data -> other.commandsToRanges.containsKey(data.txnId)); - rangeToTxn.addAll(other.rangesToCommands); - //noinspection unchecked - return (T) this; - } - - public T mergeRemote(TxnId txnId, Ranges ranges, BiFunction remappingFunction) - { - // TODO (durability) : remote ranges are not made durable for now. If this command is stored in commands table, - // then we have a NotWitnessed command with Ranges, which is not expected in accord.local.Command.NotWitnessed. - // To properly handle this, the long term storage looks like it will need to store these as well. - Invariants.checkArgument(!localTxns.contains(txnId), "Attempted to merge remote txn %s, but this is a local txn", txnId); - // accord.impl.CommandTimeseries.mapReduce does the check on status and deps type, and NotWitnessed should match the semantics hard coded in InMemorySafeStore... - // in that store, the remote history is only ever included when minStauts == null and deps == ANY... but mapReduce sees accord.local.Status.KnownDeps.hasProposedOrDecidedDeps == false - // as a mis-match, so will be excluded... since NotWitnessed will return false it will only be included IFF deps = ANY. - // When it comes to the minStatus check, the current usage is "null", "Committed", "Accepted"... so NotWitnessed will only be included in the null case; - // the only subtle difference is if minStatus = NotWitnessed, this API will include these but InMemoryStore won't - RangeCommandSummary oldValue = txnToRange.get(txnId); - RangeCommandSummary newValue = oldValue == null ? - new RangeCommandSummary(txnId, ranges, SaveStatus.NotDefined, null, Collections.emptyList()) - : oldValue.withRanges(ranges, remappingFunction); - if (oldValue == null || !oldValue.equalsDeep(newValue)) - { - // changes detected... have to update range index - rangeToTxn.removeIf(data -> data.txnId.equals(txnId)); - addRanges(newValue); - } - //noinspection unchecked - return (T) this; - } - - public T remove(TxnId txnId) - { - if (txnToRange.containsKey(txnId)) - { - localTxns.remove(txnId); - txnToRange.remove(txnId); - rangeToTxn.removeIf(data -> data.txnId.equals(txnId)); - } - //noinspection unchecked - return (T) this; - } - - public T map(Function mapper) - { - for (TxnId id : new TreeSet<>(txnToRange.keySet())) - { - RangeCommandSummary summary = txnToRange.get(id); - RangeCommandSummary update = mapper.apply(summary); - if (summary.equals(update)) - continue; - remove(summary.txnId); - if (update != null) - put(update); - } - //noinspection unchecked - return (T) this; - } + return mapReduce(testTxnId, testTxnId, testKind, testStartedAt, testDep, testStatus, map, p1, accumulate); } - public static class Builder extends AbstractBuilder + @Override + public T mapReduceActive(Timestamp startedBefore, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) { - public CommandsForRanges build() - { - CommandsForRanges cfr = new CommandsForRanges(); - cfr.set(this); - return cfr; - } + return mapReduce(startedBefore, null, testKind, STARTED_BEFORE, ANY_DEPS, ANY_STATUS, map, p1, accumulate); } - public class Updater extends AbstractBuilder + private T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - private Updater() + // TODO (required): reconsider how we build this, to avoid having to provide range keys in order (or ensure our range search does this for us) + Map> collect = new TreeMap<>(Range::compare); + NavigableMap submap; + switch (testStartedAt) { - putAll(CommandsForRanges.this); + case STARTED_AFTER: + submap = this.map.tailMap(testTimestamp, false); + break; + case STARTED_BEFORE: + submap = this.map.headMap(testTimestamp, false); + break; + case ANY: + submap = this.map; + break; + default: + throw new AssertionError("Unknown started at: " + testStartedAt); } + submap.values().forEach((summary -> { + if (!testKind.test(summary.txnId.kind())) + return; - public void apply() - { - CommandsForRanges.this.set(this); - } - } - - private ImmutableSet localCommands; - private ImmutableSortedMap commandsToRanges; - private IntervalTree> rangesToCommands; - @Nullable - private Timestamp maxRedundant; - - public CommandsForRanges() - { - localCommands = ImmutableSet.of(); - commandsToRanges = ImmutableSortedMap.of(); - rangesToCommands = IntervalTree.emptyTree(); - } - - private void set(AbstractBuilder builder) - { - this.localCommands = ImmutableSet.copyOf(builder.localTxns); - this.commandsToRanges = ImmutableSortedMap.copyOf(builder.txnToRange); - this.rangesToCommands = builder.rangeToTxn.build(); - } - - public TxnType type(TxnId txnId) - { - if (!commandsToRanges.containsKey(txnId)) return TxnType.UNKNOWN; - return localCommands.contains(txnId) ? TxnType.LOCAL : TxnType.REMOTE; - } - - @VisibleForTesting - Set knownIds() - { - return commandsToRanges.keySet(); - } - - @VisibleForTesting - IntervalTree> tree() - { - return rangesToCommands; - } - - public @Nullable Timestamp maxRedundant() - { - return maxRedundant; - } - - public static boolean needsUpdate(Command prev, Command updated) - { - return CommandsForKey.needsUpdate(prev, updated); - } - - public boolean containsLocally(TxnId txnId) - { - return localCommands.contains(txnId); - } - - public Iterable search(AbstractKeys keys) - { - // group by the table, as ranges are based off TokenKey, which is scoped to a range - Map> groupByTable = new TreeMap<>(); - for (Key key : keys) - groupByTable.computeIfAbsent(((PartitionKey) key).table(), ignore -> new ArrayList<>()).add(key); - return () -> new AbstractIterator() - { - Iterator tblIt = groupByTable.keySet().iterator(); - Iterator>> rangeIt; + // range specific logic... ranges don't update CommandsForRange based off the life cycle and instead + // merge the cache with the disk state; so exclude states that should get removed from CommandsFor* + if (summary.saveStatus.compareTo(SaveStatus.Erased) >= 0) + return; - @Override - protected CommandsSummary computeNext() + switch (testStatus) { - while (true) - { - if (rangeIt != null && rangeIt.hasNext()) - { - Map.Entry> next = rangeIt.next(); - return result(next.getKey(), next.getValue()); - } - rangeIt = null; - if (!tblIt.hasNext()) - { - tblIt = null; - return endOfData(); - } - TableId tbl = tblIt.next(); - List keys = groupByTable.get(tbl); - Map> groupByRange = new TreeMap<>(Range::compare); - for (Key key : keys) + default: throw new AssertionError("Unhandled TestStatus: " + testStatus); + case ANY_STATUS: + //TODO (now, symitry): how do we map to TRANSITIVELY_KNOWN? + break; + case IS_PROPOSED: + switch (summary.saveStatus.status) { - List> matches = rangesToCommands.matches(key); - if (matches.isEmpty()) - continue; - for (Interval interval : matches) - groupByRange.computeIfAbsent(toRange(interval), ignore -> new HashSet<>()).add(interval.data); + default: return; + case PreCommitted: + case Committed: + case Accepted: + case AcceptedInvalidate: } - rangeIt = groupByRange.entrySet().iterator(); - } + break; + case IS_STABLE: + if (!summary.saveStatus.hasBeen(Stable) || summary.saveStatus.hasBeen(Truncated)) + return; } - }; - } - - private static Range toRange(Interval interval) - { - AccordRoutingKey start = (AccordRoutingKey) interval.min; - if (!(start instanceof AccordRoutingKey.SentinelKey)) - start = new TokenKey(start.table(), start.token().decreaseSlightly()); - AccordRoutingKey end = (AccordRoutingKey) interval.max; - // TODO (required, correctness) : accord doesn't support wrap around, so decreaseSlightly may fail in some cases - // TODO (required, correctness) : this logic is mostly used for testing, so is it actually safe for all partitioners? - return new TokenRange(start, end); - } - - @Nullable - public CommandsSummary search(Range range) - { - List matches = rangesToCommands.search(Interval.create(normalize(range.start(), range.startInclusive(), true), - normalize(range.end(), range.endInclusive(), false))); - return result(range, matches); - } - - private CommandsSummary result(Seekable seekable, Collection matches) - { - if (matches.isEmpty()) - return null; - return new Holder(seekable, matches); - } - - public int size() - { - return rangesToCommands.intervalCount(); - } - public Updater update() - { - return new Updater(); - } - - @Override - public String toString() - { - return rangesToCommands.unbuild().toString(); - } - - private static RoutingKey normalize(RoutingKey key, boolean inclusive, boolean upOrDown) - { - while (true) - { - if (inclusive) return key; - AccordRoutingKey ak = (AccordRoutingKey) key; - switch (ak.kindOfRoutingKey()) + if (testDep != ANY_DEPS) { - case SENTINEL: - // TODO (required, correctness): this doesn't work - key = ak.asSentinelKey().toTokenKeyBroken(); - continue; - case TOKEN: - TokenKey tk = ak.asTokenKey(); - // TODO (required, correctness): this doesn't work for ordered partitioner - return tk.withToken(upOrDown ? tk.token().increaseSlightly() : tk.token().decreaseSlightly()); - default: - throw new IllegalArgumentException("Unknown kind: " + ak.kindOfRoutingKey()); - } - } - } - - private static class Holder implements CommandsSummary - { - private final Seekable keyOrRange; - private final Collection matches; - - private Holder(Seekable keyOrRange, Collection matches) - { - this.keyOrRange = keyOrRange; - this.matches = matches; - } - - @Override - public T mapReduceFull(TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) - { - return mapReduce(testTxnId, testTxnId, testKind, testStartedAt, testDep, testStatus, map, p1, accumulate); - } - - @Override - public T mapReduceActive(Timestamp startedBefore, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) - { - return mapReduce(startedBefore, null, testKind, STARTED_BEFORE, ANY_DEPS, ANY_STATUS, map, p1, accumulate); - } - - private T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) - { - // TODO (required): reconsider how we build this, to avoid having to provide range keys in order (or ensure our range search does this for us) - Map> collect = new TreeMap<>(Range::compare); - matches.forEach((summary -> { - if (summary.status.compareTo(SaveStatus.Erased) >= 0) + // ! status.hasInfo + //TODO (now, reuse): should this just check if known? + if (!(summary.saveStatus.compareTo(SaveStatus.Accepted) >= 0)) return; - switch (testStartedAt) - { - default: throw new AssertionError(); - case STARTED_AFTER: - if (summary.txnId.compareTo(testTimestamp) <= 0) return; - else break; - case STARTED_BEFORE: - if (summary.txnId.compareTo(testTimestamp) >= 0) return; - case ANY: - if (testDep != ANY_DEPS && (summary.executeAt == null || summary.executeAt.compareTo(testTxnId) < 0)) - return; - } - - switch (testStatus) - { - default: throw new AssertionError("Unhandled TestStatus: " + testStatus); - case ANY_STATUS: - break; - case IS_PROPOSED: - switch (summary.status) - { - default: return; - case PreCommitted: - case Committed: - case Accepted: - } - break; - case IS_STABLE: - if (!summary.status.hasBeen(Stable) || summary.status.hasBeen(Truncated)) - return; - } - - if (!testKind.test(summary.txnId.kind())) + Timestamp executeAt = summary.executeAt; + if (executeAt.compareTo(testTxnId) <= 0) return; - if (testDep != ANY_DEPS) - { - if (!summary.status.known.deps.hasProposedOrDecidedDeps()) - return; - - // TODO (required): we must ensure these txnId are limited to those we intersect in this command store - // We are looking for transactions A that have (or have not) B as a dependency. - // If B covers ranges [1..3] and A covers [2..3], but the command store only covers ranges [1..2], - // we could have A adopt B as a dependency on [3..3] only, and have that A intersects B on this - // command store, but also that there is no dependency relation between them on the overlapping - // key range [2..2]. - - // This can lead to problems on recovery, where we believe a transaction is a dependency - // and so it is safe to execute, when in fact it is only a dependency on a different shard - // (and that other shard, perhaps, does not know that it is a dependency - and so it is not durably known) - // TODO (required): consider this some more - if ((testDep == WITH) == !summary.deps.contains(testTxnId)) - return; - } - - // TODO (required): ensure we are excluding any ranges that are now shard-redundant (not sure if this is enforced yet) - for (Range range : summary.ranges) - collect.computeIfAbsent(range, ignore -> new ArrayList<>()).add(summary); - })); + // TODO (required): we must ensure these txnId are limited to those we intersect in this command store + // We are looking for transactions A that have (or have not) B as a dependency. + // If B covers ranges [1..3] and A covers [2..3], but the command store only covers ranges [1..2], + // we could have A adopt B as a dependency on [3..3] only, and have that A intersects B on this + // command store, but also that there is no dependency relation between them on the overlapping + // key range [2..2]. + + // This can lead to problems on recovery, where we believe a transaction is a dependency + // and so it is safe to execute, when in fact it is only a dependency on a different shard + // (and that other shard, perhaps, does not know that it is a dependency - and so it is not durably known) + // TODO (required): consider this some more + if ((testDep == WITH) == !summary.depsIds.contains(testTxnId)) + return; + } - for (Map.Entry> e : collect.entrySet()) + // TODO (required): ensure we are excluding any ranges that are now shard-redundant (not sure if this is enforced yet) + for (Range range : summary.ranges) { - for (RangeCommandSummary command : e.getValue()) - { - T initial = accumulate; - accumulate = map.apply(p1, e.getKey(), command.txnId, command.executeAt, initial); - } + if (!this.ranges.intersects(range)) + continue; + collect.computeIfAbsent(range, ignore -> new ArrayList<>()).add(summary); } + })); - return accumulate; - } - - @Override - public String toString() + for (Map.Entry> e : collect.entrySet()) { - return "Holder{" + - "keyOrRange=" + keyOrRange + - ", matches=" + matches + - '}'; + for (CommandsForRangesLoader.Summary command : e.getValue()) + accumulate = map.apply(p1, e.getKey(), command.txnId, command.executeAt, accumulate); } - } - public void prune(TxnId pruneBefore, Ranges pruneRanges) - { - class MaxErased { Timestamp v; } - MaxErased maxErased = new MaxErased(); - Updater update = update(); - update.map(summary -> { - if (summary.txnId.compareTo(pruneBefore) >= 0) - return summary; - - Ranges newRanges = summary.ranges.subtract(pruneRanges); - if (newRanges == summary.ranges || newRanges.equals(summary.ranges)) - return summary; - - maxErased.v = Timestamp.nonNullOrMax(maxErased.v, summary.executeAt); - if (newRanges.isEmpty()) - return null; - return new RangeCommandSummary(summary.txnId, newRanges, summary.status, summary.executeAt, summary.deps); - }).apply(); - maxRedundant = Timestamp.nonNullOrMax(maxRedundant, maxErased.v); + return accumulate; } - } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java new file mode 100644 index 000000000000..f90d57b32e4b --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.BiFunction; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableMap; + +import accord.local.Command; +import accord.local.DurableBefore; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.PartialDeps; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.index.accord.RoutesSearcher; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.Pair; + +public class CommandsForRangesLoader +{ + private final RoutesSearcher searcher = new RoutesSearcher(); + //TODO (now, durability): find solution for this... + private final Map historicalTransaction = new HashMap<>(); + private final AccordCommandStore store; + + public CommandsForRangesLoader(AccordCommandStore store) + { + this.store = store; + } + + public AsyncResult>> get(Ranges ranges) + { + Watcher watcher = fromCache(ranges); + ImmutableMap before = ImmutableMap.copyOf(watcher.get()); + return AsyncChains.ofCallable(Stage.READ.executor(), () -> get(ranges, before)) + .map(map -> Pair.create(watcher, map), store) + .beginAsResult(); + } + + private NavigableMap get(Ranges ranges, Map cacheHits) + { + Set matches = new HashSet<>(); + for (Range range : ranges) + matches.addAll(intersects(range)); + if (matches.isEmpty()) + return new TreeMap<>(); + return load(ranges, cacheHits, matches); + } + + private Collection intersects(Range range) + { + assert range instanceof TokenRange : "Require TokenRange but given " + range.getClass(); + Set intersects = searcher.intersects(store.id(), (TokenRange) range); + if (!historicalTransaction.isEmpty()) + { + if (intersects.isEmpty()) + intersects = new HashSet<>(); + for (Map.Entry e : historicalTransaction.entrySet()) + { + if (e.getValue().intersects(range)) + intersects.add(e.getKey()); + } + if (intersects.isEmpty()) + intersects = Collections.emptySet(); + } + return intersects; + } + + public class Watcher implements AccordStateCache.Listener, AutoCloseable + { + private final Ranges ranges; + + private NavigableMap summaries = null; + private List> needToDoubleCheck = null; + + public Watcher(Ranges ranges) + { + this.ranges = ranges; + } + + public NavigableMap get() + { + return summaries == null ? Collections.emptyNavigableMap() : summaries; + } + + @Override + public void onAdd(AccordCachingState n) + { + if (n.key().domain() != Routable.Domain.Range) + return; + AccordCachingState.State state = n.state(); + if (state instanceof AccordCachingState.Loading) + { + if (needToDoubleCheck == null) + needToDoubleCheck = new ArrayList<>(); + needToDoubleCheck.add(n); + return; + } + //TODO (now): include FailedToSave? Most likely need to, but need to improve test coverage to have failed writes + if (!(state instanceof AccordCachingState.Loaded + || state instanceof AccordCachingState.Modified + || state instanceof AccordCachingState.Saving)) + return; + + Command cmd = state.get(); + if (cmd == null) + return; + Summary summary = create(cmd, ranges, null); + if (summary != null) + { + if (summaries == null) + summaries = new TreeMap<>(); + summaries.put(summary.txnId, summary); + } + } + + @Override + public void onEvict(AccordCachingState state) + { + if (needToDoubleCheck == null) + return; + if (!needToDoubleCheck.remove(state)) + return; + if (state.state() instanceof AccordCachingState.Loading) + return; // can't double check + onAdd(state); + } + + @Override + public void close() + { + store.commandCache().unregister(this); + if (needToDoubleCheck != null) + { + List> copy = needToDoubleCheck; + needToDoubleCheck = null; + copy.forEach(this::onAdd); + } + needToDoubleCheck = null; + } + } + + private Watcher fromCache(Ranges ranges) + { + Watcher watcher = new Watcher(ranges); + store.commandCache().stream().forEach(watcher::onAdd); + store.commandCache().register(watcher); + return watcher; + } + + private NavigableMap load(Ranges ranges, Map cacheHits, Collection possibleTxns) + { + //TODO (now): this logic is kinda duplicate of org.apache.cassandra.service.accord.CommandsForRange.mapReduce + // should figure out if this can be improved... also what is correct? + DurableBefore durableBefore = store.durableBefore(); + NavigableMap map = new TreeMap<>(); + for (TxnId txnId : possibleTxns) + { + if (cacheHits.containsKey(txnId)) + continue; + Command cmd = store.loadCommand(txnId); + if (cmd == null) + continue; // unknown command + Summary summary = create(cmd, ranges, durableBefore); + if (summary == null) + continue; + map.put(txnId, summary); + } + return map; + } + + private static Summary create(Command cmd, Ranges cacheRanges, @Nullable DurableBefore durableBefore) + { + //TODO (now, correctness): C* did Invalidated, accord-core did Erased... what is correct? + SaveStatus saveStatus = cmd.saveStatus(); + if (saveStatus == SaveStatus.Invalidated + || saveStatus == SaveStatus.Erased + || !saveStatus.hasBeen(Status.PreAccepted)) + return null; + if (cmd.partialTxn() == null) + return null; + + Seekables> keysOrRanges = cmd.partialTxn().keys(); + if (keysOrRanges.domain() != Routable.Domain.Range) + throw new AssertionError(String.format("Txn keys are not range for %s", cmd.partialTxn())); + Ranges ranges = (Ranges) keysOrRanges; + + if (!ranges.intersects(cacheRanges)) + return null; + + if (durableBefore != null) + { + Ranges durableAlready = Ranges.of(durableBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { + if (e.universalBefore.compareTo(cmd.txnId()) < 0) + return accum; + accum.add(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end)); + return accum; + }, new ArrayList(), ignore -> false).toArray(Range[]::new)); + Ranges newRanges = ranges.subtract(durableAlready); + + if (newRanges.isEmpty()) + return null; + } + + PartialDeps partialDeps = cmd.partialDeps(); + List deps = partialDeps == null ? Collections.emptyList() : partialDeps.txnIds(); + return new Summary(cmd.txnId(), cmd.executeAt(), saveStatus, ranges, deps); + } + + public void mergeHistoricalTransaction(TxnId txnId, Ranges ranges, BiFunction remappingFunction) + { + historicalTransaction.merge(txnId, ranges, remappingFunction); + } + + public static class Summary + { + public final TxnId txnId; + @Nullable + public final Timestamp executeAt; + public final SaveStatus saveStatus; + public final Ranges ranges; + public final List depsIds; + + private Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatus, Ranges ranges, List depsIds) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.saveStatus = saveStatus; + this.ranges = ranges; + this.depsIds = depsIds; + } + + @Override + public String toString() + { + return "Summary{" + + "txnId=" + txnId + + ", executeAt=" + executeAt + + ", saveStatus=" + saveStatus + + ", ranges=" + ranges + + ", depsIds=" + depsIds + + '}'; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index b037acc7c5e8..caa0c70307d5 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -57,10 +57,7 @@ public interface IAccordService IVerbHandler verbHandler(); - default long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException - { - throw new UnsupportedOperationException(); - } + long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java new file mode 100644 index 000000000000..1338a21980f8 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.local.SerializerSupport; +import accord.messages.Message; +import accord.primitives.TxnId; + +public interface IJournal +{ + SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId); + void appendMessageBlocking(Message message); +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/RangeTreeRangeAccessor.java b/src/java/org/apache/cassandra/service/accord/RangeTreeRangeAccessor.java new file mode 100644 index 000000000000..80eabf4b5a10 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/RangeTreeRangeAccessor.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.api.RoutingKey; +import accord.primitives.Range; +import org.apache.cassandra.utils.RangeTree; + +public enum RangeTreeRangeAccessor implements RangeTree.Accessor +{ + instance; + + @Override + public RoutingKey start(Range range) + { + return range.start(); + } + + @Override + public RoutingKey end(Range range) + { + return range.end(); + } + + @Override + public boolean contains(Range range, RoutingKey routingKey) + { + return range.contains(routingKey); + } + + @Override + public boolean contains(RoutingKey start, RoutingKey end, RoutingKey routingKey) + { + if (routingKey.compareTo(start) <= 0) + return false; + if (routingKey.compareTo(end) > 0) + return false; + return true; + } + + @Override + public boolean intersects(Range range, RoutingKey start, RoutingKey end) + { + if (range.start().compareTo(end) >= 0) return false; + if (range.end().compareTo(start) <= 0) return false; + return true; + } + + @Override + public boolean intersects(Range left, Range right) + { + return left.compareIntersecting(right) == 0; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 9606be549253..e648e738007f 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -90,9 +90,9 @@ public static final class SentinelKey extends AccordRoutingKey { private static final long EMPTY_SIZE = ObjectSizes.measure(new SentinelKey(null, true)); - private final boolean isMin; + public final boolean isMin; - private SentinelKey(TableId table, boolean isMin) + public SentinelKey(TableId table, boolean isMin) { super(table); this.isMin = isMin; diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index d54afba17128..a42fcf57bfd2 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -24,7 +24,6 @@ import com.google.common.base.Preconditions; import accord.api.Key; -import accord.api.RoutingKey; import accord.primitives.Routable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.DecoratedKey; @@ -96,7 +95,7 @@ public DecoratedKey partitionKey() } @Override - public RoutingKey toUnseekable() + public TokenKey toUnseekable() { return new TokenKey(table, token()); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 4b7607a36bbe..4aa35bc32408 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -17,43 +17,30 @@ */ package org.apache.cassandra.service.accord.async; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.BiConsumer; -import java.util.stream.Collectors; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Iterables; -import com.google.common.collect.ImmutableSet; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import accord.api.Key; -import accord.api.RoutingKey; +import accord.local.CommandsForKey; import accord.local.KeyHistory; import accord.local.PreLoadContext; -import accord.primitives.Range; -import accord.primitives.Ranges; -import accord.primitives.Seekables; -import accord.primitives.TxnId; +import accord.primitives.*; import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.Observable; -import org.apache.cassandra.service.accord.AccordCachingState; -import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordSafeState; -import org.apache.cassandra.service.accord.AccordStateCache; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import org.apache.cassandra.service.accord.*; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.Pair; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; public class AsyncLoader { @@ -172,15 +159,41 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) { - AsyncChain> overlappingKeys = findOverlappingKeys((Ranges) keysOrRanges); + Ranges ranges = (Ranges) keysOrRanges; + + List> root = new ArrayList<>(ranges.size() + 1); + class Watcher implements AccordStateCache.Listener + { + private final Set cached = commandStore.commandsForKeyCache().stream() + .map(n -> (PartitionKey) n.key()) + .filter(ranges::contains) + .collect(Collectors.toSet()); - return overlappingKeys.flatMap(keys -> { - if (keys.isEmpty()) + @Override + public void onAdd(AccordCachingState state) + { + PartitionKey pk = (PartitionKey) state.key(); + if (ranges.contains(pk)) + cached.add(pk); + } + } + Watcher watcher = new Watcher(); + commandStore.commandsForKeyCache().register(watcher); + root.add(findOverlappingKeys(ranges).flatMap(keys -> { + commandStore.commandsForKeyCache().unregister(watcher); + if (keys.isEmpty() && watcher.cached.isEmpty()) return AsyncChains.success(null); + Set set = ImmutableSet.builder().addAll(watcher.cached).addAll(keys).build(); List> chains = new ArrayList<>(); - keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); + set.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); - }, commandStore); + }, commandStore)); + + AsyncResult>> chain = commandStore.diskCommandsForRanges().get(ranges); + root.add(chain); + context.commandsForRanges = new AccordSafeCommandsForRanges(ranges, chain); + + return AsyncChains.all(root); } private AsyncChain> findOverlappingKeys(Ranges ranges) @@ -195,27 +208,14 @@ private AsyncChain> findOverlappingKeys(Ranges ranges) private AsyncChain> findOverlappingKeys(Range range) { - Set cached = commandStore.commandsForKeyCache().stream() - .map(n -> (PartitionKey) n.key()) - .filter(range::contains) - .collect(Collectors.toSet()); // save to a variable as java gets confused when `.map` is called on the result of asChain AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(commandStore.id(), - toTokenKey(range.start()).token(), range.startInclusive(), - toTokenKey(range.end()).token(), range.endInclusive(), + (AccordRoutingKey) range.start(), range.startInclusive(), + (AccordRoutingKey) range.end(), range.endInclusive(), callback), Collectors.toSet()); - return map.map(s -> ImmutableSet.builder().addAll(s).addAll(cached).build()); - } - - private static TokenKey toTokenKey(RoutingKey start) - { - if (start instanceof TokenKey) - return (TokenKey) start; - if (start instanceof AccordRoutingKey.SentinelKey) - return ((AccordRoutingKey.SentinelKey) start).toTokenKeyBroken(); - throw new IllegalArgumentException(String.format("Unable to convert RoutingKey %s (type %s) to TokenKey", start, start.getClass())); + return map.map(s -> ImmutableSet.builder().addAll(s).build()); } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index f0c53e33d7c1..51041aab58df 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -17,13 +17,11 @@ */ package org.apache.cassandra.service.accord.async; -import java.util.Collections; import java.util.HashMap; import java.util.TreeMap; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; - import javax.annotation.Nullable; import org.slf4j.Logger; @@ -34,26 +32,26 @@ import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; -import accord.primitives.RoutableKey; import accord.primitives.Seekables; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChains; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordSafeCommand; -import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordSafeCommandsForRanges; import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.COMPLETING; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FAILED; +import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FINISHED; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.INITIALIZED; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.LOADING; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.PREPARING; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.RUNNING; -import static org.apache.cassandra.service.accord.async.AsyncOperation.State.COMPLETING; -import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FINISHED; -import static org.apache.cassandra.service.accord.async.AsyncOperation.State.FAILED; public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function { @@ -70,6 +68,8 @@ static class Context final HashMap commands = new HashMap<>(); final TreeMap timestampsForKey = new TreeMap<>(); final TreeMap commandsForKey = new TreeMap<>(); + @Nullable + AccordSafeCommandsForRanges commandsForRanges = null; void releaseResources(AccordCommandStore commandStore) { @@ -83,6 +83,8 @@ void revertChanges() commands.values().forEach(AccordSafeState::revert); timestampsForKey.values().forEach(AccordSafeState::revert); commandsForKey.values().forEach(AccordSafeState::revert); + if (commandsForRanges != null) + commandsForRanges.revert(); } } @@ -187,19 +189,9 @@ TxnId primaryTxnId() } @SuppressWarnings("unchecked") - Iterable keys() + Seekables keys() { - Seekables keys = preLoadContext.keys(); - switch (keys.domain()) - { - default: - throw new IllegalStateException("Unhandled domain " + keys.domain()); - case Key: - return (Iterable) keys; - case Range: - // TODO (expected): handle ranges - return Collections.emptyList(); - } + return preLoadContext.keys(); } private void fail(Throwable throwable) @@ -255,11 +247,11 @@ protected void runInternal() return; state(PREPARING); case PREPARING: - safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.commandsForKey); + safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.commandsForKey, context.commandsForRanges); state(RUNNING); case RUNNING: result = apply(safeStore); - safeStore.postExecute(context.commands, context.timestampsForKey, context.commandsForKey); + safeStore.postExecute(context.commands, context.timestampsForKey, context.commandsForKey, context.commandsForRanges); context.releaseResources(commandStore); commandStore.completeOperation(safeStore); commandStore.executionOrder().unregister(this); diff --git a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java index 5a3c28ca9488..03527f65539c 100644 --- a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java +++ b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java @@ -18,11 +18,20 @@ package org.apache.cassandra.service.accord.async; import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.IdentityHashMap; +import java.util.List; -import accord.primitives.RoutableKey; +import accord.api.Key; +import accord.api.RoutingKey; +import accord.primitives.Range; +import accord.primitives.Seekable; import accord.primitives.TxnId; import accord.utils.Invariants; import org.agrona.collections.Object2ObjectHashMap; +import org.apache.cassandra.service.accord.RangeTreeRangeAccessor; +import org.apache.cassandra.utils.RTree; +import org.apache.cassandra.utils.RangeTree; /** * Assists with correct ordering of {@link AsyncOperation} execution wrt each other, @@ -30,7 +39,85 @@ */ public class ExecutionOrder { + private static class Conflicts + { + private final List keyConflicts; + private final List rangeConflicts; + + private Conflicts(List keyConflicts, List rangeConflicts) + { + this.keyConflicts = keyConflicts; + this.rangeConflicts = rangeConflicts; + } + } + private class RangeState + { + private final Range range; + private final IdentityHashMap, Conflicts> operationToConflicts = new IdentityHashMap<>(); + private Object operationOrQueue; + + public RangeState(Range range, List keyConflicts, List rangeConflicts, AsyncOperation operation) + { + this.range = range; + this.operationOrQueue = operation; + add(operation, keyConflicts, rangeConflicts); + } + + public void add(AsyncOperation operation, List keyConflicts, List rangeConflicts) + { + operationToConflicts.put(operation, new Conflicts(keyConflicts, rangeConflicts)); + } + + boolean canRun(AsyncOperation operation) + { + if (operationOrQueue instanceof AsyncOperation) + { + Invariants.checkState(operationOrQueue == operation); + return true; + } + else + { + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + return queue.peek() == operation; + } + } + + Conflicts remove(AsyncOperation operation) + { + if (operationOrQueue instanceof AsyncOperation) + { + Invariants.checkState(operationOrQueue == operation); + rangeQueues.remove(range); + } + else + { + @SuppressWarnings("unchecked") + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + AsyncOperation head = queue.poll(); + Invariants.checkState(head == operation); + + if (queue.isEmpty()) + { + rangeQueues.remove(range); + } + else + { + head = queue.peek(); + if (canRun(head)) + head.onUnblocked(); + } + } + return operationToConflicts.remove(operation); + } + + public Conflicts conflicts(AsyncOperation operation) + { + return operationToConflicts.get(operation); + } + } + private final Object2ObjectHashMap queues = new Object2ObjectHashMap<>(); + private final RangeTree rangeQueues = RTree.create(RangeTreeRangeAccessor.instance); /** * Register an operation as having a dependency on its keys and TxnIds @@ -39,14 +126,88 @@ public class ExecutionOrder boolean register(AsyncOperation operation) { boolean canRun = true; - for (RoutableKey key : operation.keys()) - canRun &= register(key, operation); + for (Seekable seekable : operation.keys()) + { + switch (seekable.domain()) + { + case Key: + canRun &= register(seekable.asKey(), operation); + break; + case Range: + canRun &= register(seekable.asRange(), operation); + break; + default: + throw new AssertionError("Unexpected domain: " + seekable.domain()); + } + } TxnId primaryTxnId = operation.primaryTxnId(); if (null != primaryTxnId) canRun &= register(primaryTxnId, operation); return canRun; } + private boolean register(Range range, AsyncOperation operation) + { + // Ranges depend on Ranges and Keys + // Keys depend on Keys... + // This adds a complication to this logic as keys should be able to make progress regardless of ranges, but rangest must depend on keys + List keyConflicts = null; + for (Object o : queues.keySet()) + { + if (!(o instanceof Key)) + continue; + Key key = (Key) o; + if (!range.contains(key)) + continue; + if (keyConflicts == null) + keyConflicts = new ArrayList<>(); + keyConflicts.add(key); + } + if (keyConflicts != null) + keyConflicts.forEach(k -> register(k, operation)); + + class Result + { + RangeState sameRange = null; + List rangeConflicts = null; + } + Result result = new Result(); + rangeQueues.search(range, e -> { + if (range.equals(e.getKey())) + result.sameRange = e.getValue(); + else + { + if (result.rangeConflicts == null) + result.rangeConflicts = new ArrayList<>(); + result.rangeConflicts.add(e.getKey()); + } + RangeState state = e.getValue(); + Object operationOrQueue = state.operationOrQueue; + if (operationOrQueue instanceof AsyncOperation) + { + ArrayDeque> queue = new ArrayDeque<>(4); + queue.add((AsyncOperation) operationOrQueue); + queue.add(operation); + state.operationOrQueue = queue; + } + else + { + @SuppressWarnings("unchecked") + ArrayDeque> queue = (ArrayDeque>) operationOrQueue; + queue.add(operation); + } + }); + if (result.sameRange != null) + { + result.sameRange.add(operation, keyConflicts, result.rangeConflicts); + } + else + { + rangeQueues.add(range, new RangeState(range, keyConflicts, result.rangeConflicts, operation)); + } + return keyConflicts == null && result.rangeConflicts == null; + } + /** * Register an operation as having a dependency on a key or a TxnId * @return true if no other operation depends on the key/TxnId, false otherwise @@ -81,13 +242,36 @@ private boolean register(Object keyOrTxnId, AsyncOperation operation) */ void unregister(AsyncOperation operation) { - for (RoutableKey key : operation.keys()) - unregister(key, operation); + for (Seekable seekable : operation.keys()) + { + switch (seekable.domain()) + { + case Key: + unregister(seekable.asKey(), operation); + break; + case Range: + unregister(seekable.asRange(), operation); + break; + default: + throw new AssertionError("Unexpected domain: " + seekable.domain()); + } + + } TxnId primaryTxnId = operation.primaryTxnId(); if (null != primaryTxnId) unregister(primaryTxnId, operation); } + private void unregister(Range range, AsyncOperation operation) + { + RangeState state = state(range); + Conflicts conflicts = state.remove(operation); + if (conflicts.rangeConflicts != null) + conflicts.rangeConflicts.forEach(r -> state(r).remove(operation)); + if (conflicts.keyConflicts != null) + conflicts.keyConflicts.forEach(k -> unregister(k, operation)); + } + /** * Unregister the operation as being a dependency for key or TxnId */ @@ -123,14 +307,60 @@ private void unregister(Object keyOrTxnId, AsyncOperation operation) boolean canRun(AsyncOperation operation) { - for (RoutableKey key : operation.keys()) - if (!canRun(key, operation)) - return false; + for (Seekable seekable : operation.keys()) + { + switch (seekable.domain()) + { + case Key: + if (!canRun(seekable.asKey(), operation)) + return false; + break; + case Range: + if (!canRun(seekable.asRange(), operation)) + return false; + break; + default: + throw new AssertionError("Unexpected domain: " + seekable.domain()); + } + + } TxnId primaryTxnId = operation.primaryTxnId(); return primaryTxnId == null || canRun(primaryTxnId, operation); } + private boolean canRun(Range range, AsyncOperation operation) + { + RangeState state = state(range); + if (!state.canRun(operation)) + return false; + Conflicts conflicts = state.conflicts(operation); + if (conflicts.rangeConflicts != null) + { + for (Range r : conflicts.rangeConflicts) + { + if (!state(r).canRun(operation)) + return false; + } + } + if (conflicts.keyConflicts != null) + { + for (Key key : conflicts.keyConflicts) + { + if (!canRun(key, operation)) + return false; + } + } + return true; + } + + private RangeState state(Range range) + { + List list = rangeQueues.get(range); + assert list.size() == 1 : String.format("Expected 1 element but saw list %s", list); + return list.get(0); + } + private boolean canRun(Object keyOrTxnId, AsyncOperation operation) { Object operationOrQueue = queues.get(keyOrTxnId); diff --git a/src/java/org/apache/cassandra/service/accord/events/CacheEvents.java b/src/java/org/apache/cassandra/service/accord/events/CacheEvents.java new file mode 100644 index 000000000000..d9e34519675c --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/events/CacheEvents.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.events; + +import jdk.jfr.Category; +import jdk.jfr.DataAmount; +import jdk.jfr.Event; +import jdk.jfr.Label; +import jdk.jfr.Name; +import jdk.jfr.Percentage; +import jdk.jfr.StackTrace; + +@Category({"Accord", "Accord Cache"}) +@StackTrace(false) +public abstract class CacheEvents extends Event +{ + public int store; + public String instance; + public String key; + public String status; + @DataAmount(DataAmount.BYTES) + public int lastQueriedEstimatedSizeOnHeap; + + // instance + @DataAmount(DataAmount.BYTES) + public long instanceAllocated; + public long instanceStatsQueries, instanceStatsHits, instanceStatsMisses; + + @Percentage + public double instanceStatsHitRate; + + // cache + @DataAmount(DataAmount.BYTES) + public long globalCapacity, globalAllocated; + public int globalSize, globalReferenced, globalUnreferenced; + + public long globalStatsQueries, globalStatsHits, globalStatsMisses; + + @Percentage + public double globalStatsHitRate; + + @Percentage + public double globalFree; + public void update() + { + instanceStatsHitRate = 1D - (instanceStatsHits / (double) instanceStatsQueries); + globalStatsHitRate = 1D - (globalStatsHits / (double) globalStatsQueries); + globalFree = 1.0D - (globalAllocated / (double) globalCapacity); + } + + @Name("cassandra.accord.cache.Add") + @Label("Accord Cache Add") + public static class Add extends CacheEvents { } + + @Name("cassandra.accord.cache.Release") + @Label("Accord Cache Release") + public static class Release extends CacheEvents { } + + @Name("cassandra.accord.cache.Evict") + @Label("Accord Cache Evict") + public static class Evict extends CacheEvents { } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java b/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java new file mode 100644 index 000000000000..712d781e8d4c --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.Arrays; +import java.util.UUID; +import java.util.function.Function; + +import org.apache.cassandra.db.marshal.ByteArrayAccessor; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; + +import static org.apache.cassandra.service.accord.api.AccordRoutingKey.RoutingKeyKind.SENTINEL; + +public class AccordRoutingKeyByteSource +{ + private static final byte[] MIN_ORDER = { -1 }; + private static final byte[] TOKEN_ORDER = { 0 }; + private static final byte[] MAX_ORDER = { 1 }; + + private static ByteSource minPrefix() + { + return ByteSource.signedFixedLengthNumber(ByteArrayAccessor.instance, MIN_ORDER); + } + + private static ByteSource tokenPrefix() + { + return ByteSource.signedFixedLengthNumber(ByteArrayAccessor.instance, TOKEN_ORDER); + } + + private static ByteSource maxPrefix() + { + return ByteSource.signedFixedLengthNumber(ByteArrayAccessor.instance, MAX_ORDER); + } + + public static Serializer create(IPartitioner partitioner) + { + if (partitioner.isFixedLength()) + return new FixedLength(partitioner, ByteComparable.Version.OSS50); + return new VariableLength(partitioner, ByteComparable.Version.OSS50); + } + + public static FixedLength fixedLength(IPartitioner partitioner) + { + return new FixedLength(partitioner, ByteComparable.Version.OSS50); + } + + public static VariableLength variableLength(IPartitioner partitioner) + { + return new VariableLength(partitioner, ByteComparable.Version.OSS50); + } + + public static abstract class Serializer + { + protected final IPartitioner partitioner; + protected final ByteComparable.Version version; + protected final byte[] empty; + + protected Serializer(IPartitioner partitioner, ByteComparable.Version version, byte[] empty) + { + this.partitioner = partitioner; + this.version = version; + this.empty = empty; + } + + public ByteSource minAsComparableBytes() + { + return ByteSource.withTerminator(ByteSource.TERMINATOR, minPrefix(), ByteSource.fixedLength(empty)); + } + + public ByteSource maxAsComparableBytes() + { + return ByteSource.withTerminator(ByteSource.TERMINATOR, maxPrefix(), ByteSource.fixedLength(empty)); + } + + public ByteSource asComparableBytes(Token token) + { + if (token.getPartitioner() != partitioner) + throw new IllegalArgumentException("Attempted to use the wrong partitioner: given " + token.getPartitioner() + " but expected " + partitioner); + return ByteSource.withTerminator(ByteSource.TERMINATOR, tokenPrefix(), token.asComparableBytes(version)); + } + + public Token tokenFromComparableBytes(ValueAccessor accessor, V data) throws IOException + { + return tokenFromComparableBytes(ByteSource.peekable(ByteSource.fixedLength(accessor, data))); + } + + public Token tokenFromComparableBytes(ByteSource.Peekable bs) throws IOException + { + if (bs.peek() == ByteSource.TERMINATOR) + throw new IOException("Unable to read prefix"); + ByteSource.Peekable component = progress(bs); + + byte[] prefix = ByteSourceInverse.getOptionalSignedFixedLength(ByteArrayAccessor.instance, component, 1); + if (prefix == null) + throw new IOException("Unable to read prefix; prefix was null"); + if (!Arrays.equals(TOKEN_ORDER, prefix)) + { + String match = Arrays.equals(MIN_ORDER, prefix) ? "min" + : Arrays.equals(MAX_ORDER, prefix) ? "max" + : "unknown"; + throw new IOException("Attempt to read token from non-token value: was " + match); + } + component = ByteSourceInverse.nextComponentSource(bs); + if (component == null) + throw new IOException("Unable to read token; component was not found"); + return partitioner.getTokenFactory().fromComparableBytes(component, version); + } + + public ByteSource asComparableBytes(AccordRoutingKey key) + { + UUID uuid = key.table().asUUID(); + ByteSource[] srcs = { LongType.instance.asComparableBytes(LongType.instance.decompose(uuid.getMostSignificantBits()), ByteComparable.Version.OSS50), + LongType.instance.asComparableBytes(LongType.instance.decompose(uuid.getLeastSignificantBits()), ByteComparable.Version.OSS50), + asComparableBytesNoTable(key) }; + return ByteSource.withTerminator(ByteSource.TERMINATOR, srcs); + } + + public ByteSource asComparableBytesNoTable(AccordRoutingKey key) + { + return key.kindOfRoutingKey() == SENTINEL ? key.asSentinelKey().isMin ? minAsComparableBytes() : maxAsComparableBytes() + : asComparableBytes(key.token()); + } + + public AccordRoutingKey fromComparableBytes(ValueAccessor accessor, V data) throws IOException + { + ByteSource.Peekable bs = ByteSource.peekable(ByteSource.fixedLength(accessor, data)); + long[] uuidValues = new long[2]; + for (int i = 0; i < 2; i++) + { + if (bs.peek() == ByteSource.TERMINATOR) + throw new IllegalArgumentException("Unable to parse bytes"); + ByteSource.Peekable component = ByteSourceInverse.nextComponentSource(bs); + long value = LongType.instance.compose(LongType.instance.fromComparableBytes(component, ByteComparable.Version.OSS50)); + uuidValues[i] = value; + } + TableId tableId = TableId.fromUUID(new UUID(uuidValues[0], uuidValues[1])); + return fromComparableBytes(bs, + isMin -> isMin ? AccordRoutingKey.SentinelKey.min(tableId) : AccordRoutingKey.SentinelKey.max(tableId), + token -> new AccordRoutingKey.TokenKey(tableId, token)); + } + + private AccordRoutingKey fromComparableBytes(ByteSource.Peekable bs, + Function onSentinel, + Function onToken) throws IOException + { + if (bs.peek() == ByteSource.TERMINATOR) + throw new IOException("Unable to read prefix"); + ByteSource.Peekable component = progress(bs); + + byte[] prefix = ByteSourceInverse.getOptionalSignedFixedLength(ByteArrayAccessor.instance, component, 1); + if (prefix == null) + throw new IOException("Unable to read prefix; prefix was null"); + if (Arrays.equals(TOKEN_ORDER, prefix)) + { + component = ByteSourceInverse.nextComponentSource(bs); + if (component == null) + throw new IOException("Unable to read token; component was not found"); + return onToken.apply(partitioner.getTokenFactory().fromComparableBytes(component, version)); + } + if (Arrays.equals(MIN_ORDER, prefix)) + return onSentinel.apply(true); + if (Arrays.equals(MAX_ORDER, prefix)) + return onSentinel.apply(false); + throw new AssertionError("Unknown prefix"); + } + + private static ByteSource.Peekable progress(ByteSource.Peekable bs) throws IOException + { + ByteSource.Peekable component = ByteSourceInverse.nextComponentSource(bs); + if (component == null) + throw new IOException("Unable to read prefix; component was not found"); + if (component.peek() == ByteSource.NEXT_COMPONENT) + { + // this came from (table, token_or_sentinel) + component = ByteSourceInverse.nextComponentSource(bs); + if (component == null) + throw new IOException("Unable to read prefix; component was not found"); + } + return component; + } + + public byte[] serialize(Token token) + { + return ByteSourceInverse.readBytes(asComparableBytes(token)); + } + + public byte[] serialize(AccordRoutingKey key) + { + return ByteSourceInverse.readBytes(asComparableBytes(key)); + } + + public byte[] serializeNoTable(AccordRoutingKey key) + { + return ByteSourceInverse.readBytes(asComparableBytesNoTable(key)); + } + } + + public static class VariableLength extends Serializer + { + public VariableLength(IPartitioner partitioner, ByteComparable.Version version) + { + super(partitioner, version, ByteArrayUtil.EMPTY_BYTE_ARRAY); + } + } + + public static class FixedLength extends Serializer + { + public FixedLength(IPartitioner partitioner, ByteComparable.Version version) + { + super(partitioner, version, computeEmptyBytes(partitioner, version)); + } + + private static byte[] computeEmptyBytes(IPartitioner partitioner, ByteComparable.Version version) + { + if (!partitioner.isFixedLength()) + throw new IllegalArgumentException("Unable to use partitioner " + partitioner.getClass() + "; it is not fixed-length"); + + int tokenSize = ByteSourceInverse.readBytes(partitioner.getMinimumToken().asComparableBytes(version)).length; + return new byte[tokenSize]; + } + + public int valueSize() + { + return 4 + empty.length; + } + } +} diff --git a/src/java/org/apache/cassandra/utils/Clock.java b/src/java/org/apache/cassandra/utils/Clock.java index c8ba785cab9f..8c3fb609c2b6 100644 --- a/src/java/org/apache/cassandra/utils/Clock.java +++ b/src/java/org/apache/cassandra/utils/Clock.java @@ -107,6 +107,14 @@ public static long currentTimeMillis() { return instance.currentTimeMillis(); } + + /** + * Semantically equivalent to {@link FBUtilities#nowInSeconds()} + */ + public static long nowInSeconds() + { + return instance.nowInSeconds(); + } } public static class Default implements Clock diff --git a/src/java/org/apache/cassandra/utils/CloseableIterator.java b/src/java/org/apache/cassandra/utils/CloseableIterator.java index 32de799ba93f..634629f4bed1 100644 --- a/src/java/org/apache/cassandra/utils/CloseableIterator.java +++ b/src/java/org/apache/cassandra/utils/CloseableIterator.java @@ -66,5 +66,4 @@ public T next() } }; } - } diff --git a/src/java/org/apache/cassandra/utils/IntervalTree.java b/src/java/org/apache/cassandra/utils/IntervalTree.java index 53f0fb716518..8677ba9f01dc 100644 --- a/src/java/org/apache/cassandra/utils/IntervalTree.java +++ b/src/java/org/apache/cassandra/utils/IntervalTree.java @@ -28,6 +28,8 @@ import java.util.function.BiPredicate; import java.util.function.Consumer; import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import com.google.common.base.Joiner; import com.google.common.collect.Iterables; @@ -187,6 +189,11 @@ public List search(C point) return search(Interval.create(point, point, null)); } + public List search(C start, C end) + { + return search(Interval.create(start, end, null)); + } + /** * The input arrays aren't defensively copied and will be sorted. The update method doesn't allow duplicates or elements to be removed * to be missing and this differs from the constructor which does not duplicate checking at all. @@ -312,6 +319,11 @@ public Iterator iterator() return new TreeIterator(head); } + public Stream stream() + { + return StreamSupport.stream(spliterator(), false); + } + @Override public String toString() { @@ -555,11 +567,6 @@ public Builder add(I interval) return this; } - public interface TriPredicate - { - boolean test(A a, B b, C c); - } - public Builder removeIf(TriPredicate predicate) { intervals.removeIf(i -> predicate.test(i.min, i.max, i.data)); diff --git a/src/java/org/apache/cassandra/utils/MutableEntry.java b/src/java/org/apache/cassandra/utils/MutableEntry.java new file mode 100644 index 000000000000..9ae0c17177cf --- /dev/null +++ b/src/java/org/apache/cassandra/utils/MutableEntry.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.Map; +import java.util.Objects; + +public class MutableEntry implements Map.Entry +{ + private final K k; + private V v; + + public MutableEntry(K k, V v) + { + this.k = k; + this.v = v; + } + + @Override + public K getKey() + { + return k; + } + + @Override + public V getValue() + { + return v; + } + + @Override + public V setValue(V value) + { + V previous = v; + v = Objects.requireNonNull(value); + return previous; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || !(o instanceof Map.Entry)) return false; + Map.Entry that = (Map.Entry) o; + return Objects.equals(k, that.getKey()) && Objects.equals(v, that.getValue()); + } + + @Override + public int hashCode() + { + return Objects.hash(k, v); + } + + @Override + public String toString() + { + return k + "=" + v; + } +} diff --git a/src/java/org/apache/cassandra/utils/RTree.java b/src/java/org/apache/cassandra/utils/RTree.java new file mode 100644 index 000000000000..5b7affe373d9 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/RTree.java @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.annotation.CheckForNull; + +import com.google.common.collect.AbstractIterator; + +public class RTree implements RangeTree +{ + /** + * Tuning size target can be tricky as it is based on expected access patterns and expected matche sizes. There is also + * a memory cost to account for as large tree sizes will have far more nodes with a small target than a large target. + * + * If matching most of the data then larger sizes leads to fewer hops + * If matching few elements then tree depth maters the most, if walking a long tree is more costly than walking the + * element list, then shrinking depth (by having larger size target) can improve performance. + */ + private static final int DEFAULT_SIZE_TARGET = 1 << 7; + private static final int DEFAULT_NUMBER_OF_CHILDREN = 6; + + private final Comparator comparator; + private final Accessor accessor; + private final int sizeTarget; + private final int numChildren; + private Node node = new Node(); + + public RTree(Comparator comparator, Accessor accessor) + { + this(comparator, accessor, DEFAULT_SIZE_TARGET, DEFAULT_NUMBER_OF_CHILDREN); + } + + public RTree(Comparator comparator, Accessor accessor, int sizeTarget, int numChildren) + { + if (sizeTarget <= 1) + throw new IllegalArgumentException("size target must be 2 or more"); + if (numChildren <= 1) + throw new IllegalArgumentException("Number of children must be 2 or more"); + if (sizeTarget < numChildren) + throw new IllegalArgumentException("Size target (" + sizeTarget + ") was less than number of children (" + numChildren + ")"); + this.comparator = comparator; + this.accessor = accessor; + this.sizeTarget = sizeTarget; + this.numChildren = numChildren; + } + + public static , Range, Value> RTree create(Accessor accessor) + { + return new RTree<>(Comparator.naturalOrder(), accessor); + } + + @Override + public List get(Range range) + { + List matches = new ArrayList<>(); + get(range, e -> matches.add(e.getValue())); + return matches; + } + + @Override + public void get(Range range, Consumer> onMatch) + { + node.search(range, onMatch, e -> e.getKey().equals(range), Function.identity()); + } + + @Override + public List> search(Range range) + { + List> matches = new ArrayList<>(); + search(range, matches::add); + return matches; + } + + @Override + public void search(Range range, Consumer> onMatch) + { + node.search(range, onMatch, ignore -> true, Function.identity()); + } + + public List find(Range range) + { + List matches = new ArrayList<>(); + find(range, matches::add); + return matches; + } + + public void find(Range range, Consumer onMatch) + { + node.search(range, onMatch, ignore -> true, Map.Entry::getValue); + } + + @Override + public List> searchToken(Token token) + { + List> matches = new ArrayList<>(); + searchToken(token, matches::add); + return matches; + } + + @Override + public void searchToken(Token token, Consumer> onMatch) + { + node.searchToken(token, onMatch, ignore -> true, Function.identity()); + } + + public List findToken(Token token) + { + List matches = new ArrayList<>(); + findToken(token, matches::add); + return matches; + } + + public void findToken(Token token, Consumer onMatch) + { + node.searchToken(token, onMatch, ignore -> true, Map.Entry::getValue); + } + + @Override + public boolean add(Range key, Value value) + { + node.add(key, value); + return true; + } + + @Override + public int remove(Range key) + { + return node.removeIf(e -> e.getKey().equals(key)); + } + + public int remove(Range key, Value value) + { + Map.Entry match = Map.entry(key, value); + return node.removeIf(match::equals); + } + + @Override + public void clear() + { + node = new Node(); + } + + @Override + public int size() + { + return node.size; + } + + @Override + public boolean isEmpty() + { + return node.size == 0; + } + + public String displayTree() + { + StringBuilder sb = new StringBuilder(); + node.displayTree(0, sb); + return sb.toString(); + } + + @Override + public Iterator> iterator() + { + return node.iterator(); + } + + @Override + public Stream> stream() + { + return StreamSupport.stream(spliterator(), false); + } + + private class Node implements Iterable> + { + private List> values = new ArrayList<>(); + private List children = null; + private int size = 0; + private Token minStart, maxStart, minEnd, maxEnd; + + int removeIf(Predicate> condition) + { + if (minStart == null) + return 0; + if (children != null) + { + int sum = 0; + for (Node node : children) + sum += node.removeIf(condition); + size -= sum; + return sum; + } + class Counter {int value;} + Counter counter = new Counter(); + values.removeIf(e -> { + if (condition.test(e)) + { + counter.value++; + return true; + } + return false; + }); + size -= counter.value; + if (values.isEmpty()) + minStart = maxStart = minEnd = maxEnd = null; + return counter.value; + } + + void add(Range range, Value value) + { + size++; + if (minStart == null) + { + minStart = maxStart = accessor.start(range); + minEnd = maxEnd = accessor.end(range); + } + else + { + Token start = accessor.start(range); + minStart = min(minStart, start); + maxStart = max(maxStart, start); + Token end = accessor.end(range); + minEnd = min(minEnd, end); + maxEnd = max(maxEnd, end); + } + if (children != null) + { + findBestMatch(range).add(range, value); + return; + } + values.add(new MutableEntry(range, value)); + if (shouldSplit()) + split(); + } + + private Node findBestMatch(Range range) + { + int topIdx = 0; + Node node = children.get(0); + int topScore = node.score(range); + int size = node.size; + for (int i = 1; i < children.size(); i++) + { + node = children.get(i); + int score = node.score(range); + if (score > topScore || (score == topScore && size > node.size)) + { + topIdx = i; + size = node.size; + } + } + return children.get(topIdx); + } + + private int score(Range range) + { + if (minStart == null) + return 0; + if (!intersects(range)) + return -10; + int score = 5; // overlapps + if (values != null) // is leaf + score += 5; + + int startScore = 0; + if (comparator.compare(maxStart, accessor.start(range)) <= 0) + startScore += 10; + else if (comparator.compare(minStart, accessor.start(range)) <= 0) + startScore += 5; + + int endScore = 0; + if (comparator.compare(minEnd, accessor.end(range)) >= 0) + endScore += 10; + else if (comparator.compare(maxEnd, accessor.end(range)) >= 0) + endScore += 5; + // if fully contained, then add the scores: 10 for largest bounds, 20 for smallest bounds + if (!(startScore == 0 || endScore == 0)) + score += startScore + endScore; + return score; + } + + boolean shouldSplit() + { + return values.size() > sizeTarget + // if the same range is used over and over again, splitting doesn't do much + && !(comparator.compare(minStart, maxStart) == 0 + && comparator.compare(minEnd, maxEnd) == 0); + } + + List>> partitionByEnd() + { + List allEndpoints = new ArrayList<>(values.size() * 2); + for (Map.Entry a : values) + { + allEndpoints.add(accessor.start(a.getKey())); + allEndpoints.add(accessor.end(a.getKey())); + } + allEndpoints.sort(comparator); + List maxToken = new ArrayList<>(numChildren); + int tick = allEndpoints.size() / numChildren; + int offset = tick; + for (int i = 0; i < numChildren; i++) + { + maxToken.add(allEndpoints.get(offset)); + offset += tick; + if (offset >= allEndpoints.size()) + { + maxToken.add(allEndpoints.get(allEndpoints.size() - 1)); + break; + } + } + + List>> partitions = new ArrayList<>(numChildren); + for (int i = 0; i < numChildren; i++) + partitions.add(new ArrayList<>()); + + for (Map.Entry a : values) + { + Token end = accessor.end(a.getKey()); + List> selected = null; + for (int i = 0; i < numChildren; i++) + { + if (comparator.compare(end, maxToken.get(i)) < 0) + { + selected = partitions.get(i); + break; + } + } + if (selected == null) + selected = partitions.get(partitions.size() - 1); + selected.add(a); + } + int[] sizes = partitions.stream().mapToInt(List::size).toArray(); + return goodEnough(sizes) ? partitions : null; + } + + private boolean goodEnough(int[] sizes) + { + double sum = 0.0; + for (int i : sizes) + sum += i; + double mean = sum / sizes.length; + double stddev = 0.0; + for (int i : sizes) + stddev += Math.pow(i - mean, 2); + stddev = Math.sqrt(stddev / sizes.length); + return stddev < 1.5; + } + + void split() + { + children = new ArrayList<>(numChildren); + for (int i = 0; i < numChildren; i++) + children.add(new Node()); + + List>> partitions = partitionByEnd(); + if (partitions == null) + partitions = partitionEven(); + for (int i = 0; i < children.size(); i++) + { + Node c = children.get(i); + List> entries = partitions.get(i); + entries.forEach(e -> c.add(e.getKey(), e.getValue())); + } + + values.clear(); + values = null; + } + + private List>> partitionEven() + { + values.sort((a, b) -> { + Range left = a.getKey(); + Range right = b.getKey(); + int rc = comparator.compare(accessor.start(left), accessor.start(right)); + if (rc == 0) + rc = comparator.compare(accessor.end(left), accessor.end(right)); + return rc; + }); + List>> partition = new ArrayList<>(numChildren); + int size = Math.max(1, values.size() / numChildren); + int offset = 0; + for (int i = 0; i < numChildren - 1; i++) + { + int total = size; + partition.add(new ArrayList<>(values.subList(offset, offset + total))); + offset += total; + } + partition.add(new ArrayList<>(values.subList(offset, values.size()))); + return partition; + } + + void search(Range range, Consumer matches, Predicate> predicate, Function, T> transformer) + { + if (minStart == null) + return; + if (!intersects(range)) + return; + if (children != null) + { + children.forEach(n -> n.search(range, matches, predicate, transformer)); + return; + } + values.forEach(e -> { + if (accessor.intersects(e.getKey(), range) && predicate.test(e)) + matches.accept(transformer.apply(e)); + }); + } + + void searchToken(Token token, Consumer matches, Predicate> predicate, Function, T> transformer) + { + if (minStart == null) + return; + if (!contains(minStart, maxEnd, token)) + return; + if (children != null) + { + for (int i = 0, size = children.size(); i < size; i++) + { + Node node = children.get(i); + node.searchToken(token, matches, predicate, transformer); + } + return; + } + values.forEach(e -> { + if (accessor.contains(e.getKey(), token) && predicate.test(e)) + matches.accept(transformer.apply(e)); + }); + } + + boolean intersects(Range range) + { + return accessor.intersects(range, minStart, maxEnd); + } + + boolean contains(Token start, Token end, Token value) + { + return accessor.contains(start, end, value); + } + + private void displayTree(int level, StringBuilder sb) + { + for (int i = 0; i < level; i++) + sb.append('\t'); + sb.append("start:(").append(minStart).append(", ").append(maxStart).append("), end:(").append(minEnd).append(", ").append(maxEnd).append("):"); + if (children != null) + { + sb.append('\n'); + children.forEach(n -> n.displayTree(level + 1, sb)); + } + else + { + sb.append(' ').append(size).append('\n'); + } + } + + @Override + public String toString() + { + return "Node{" + + "minStart=" + minStart + + ", maxStart=" + maxStart + + ", minEnd=" + minEnd + + ", maxEnd=" + maxEnd + + ", values=" + values + + ", children=" + children + + '}'; + } + + private Token min(Token a, Token b) + { + return comparator.compare(a, b) < 0 ? a : b; + } + + private Token max(Token a, Token b) + { + return comparator.compare(a, b) < 0 ? b : a; + } + + @Override + public Iterator> iterator() + { + if (values != null) + return values.iterator(); + return new AbstractIterator<>() + { + private int index = 0; + private Iterator> it = null; + @CheckForNull + @Override + protected Map.Entry computeNext() + { + while (true) + { + if (it == null) + { + if (index == children.size()) + return endOfData(); + it = children.get(index++).iterator(); + } + if (it.hasNext()) + return it.next(); + it = null; + } + } + }; + } + } +} diff --git a/src/java/org/apache/cassandra/utils/RangeTree.java b/src/java/org/apache/cassandra/utils/RangeTree.java new file mode 100644 index 000000000000..4fb0b8696acb --- /dev/null +++ b/src/java/org/apache/cassandra/utils/RangeTree.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public interface RangeTree extends Iterable> +{ + void searchToken(Token token, Consumer> onMatch); + + boolean add(Range key, Value value); + + List get(Range range); + + void get(Range range, Consumer> onMatch); + + List> search(Range range); + + void search(Range range, Consumer> onMatch); + + List> searchToken(Token token); + + int remove(Range key); + + void clear(); + + int size(); + + boolean isEmpty(); + + default Stream> stream() + { + return StreamSupport.stream(spliterator(), false); + } + + interface Accessor + { + Token start(Range range); + Token end(Range range); + boolean contains(Token start, Token end, Token token); + default boolean contains(Range range, Token token) + { + return contains(start(range), end(range), token); + } + boolean intersects(Range range, Token start, Token end); + default boolean intersects(Range left, Range right) + { + return intersects(left, start(right), end(right)); + } + } +} diff --git a/src/java/org/apache/cassandra/utils/TriPredicate.java b/src/java/org/apache/cassandra/utils/TriPredicate.java new file mode 100644 index 000000000000..a443d823f3f2 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/TriPredicate.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +public interface TriPredicate +{ + boolean test(A a, B b, C c); +} diff --git a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java index 4d0741679cb1..3c6d2caf6ceb 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java @@ -41,6 +41,8 @@ import org.apache.cassandra.locator.NetworkTopologyProximity; import org.apache.cassandra.locator.SimpleSeedProvider; +import static org.apache.cassandra.config.CassandraRelevantProperties.DTEST_ACCORD_ENABLED; + public class InstanceConfig implements IInstanceConfig { public final int num; @@ -323,7 +325,7 @@ public static InstanceConfig generate(int nodeNum, { int seedNode = provisionStrategy.seedNodeNum(); AccordSpec accordSpec = new AccordSpec(); - accordSpec.enabled = true; + accordSpec.enabled = DTEST_ACCORD_ENABLED.getBoolean(); accordSpec.journal_directory = String.format("%s/node%d/accord_journal", root, nodeNum); accordSpec.shard_count = new OptionaldPositiveInt(4); return new InstanceConfig(nodeNum, diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java similarity index 99% rename from test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java rename to test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java index f7ef3afe7efa..c5f92a6c70c6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java @@ -18,41 +18,14 @@ package org.apache.cassandra.distributed.test.accord; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.stream.Collectors; - +import accord.primitives.Unseekables; +import accord.topology.Topologies; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.primitives.Unseekables; -import accord.topology.Topologies; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.functions.types.utils.Bytes; -import org.apache.cassandra.db.marshal.Int32Type; -import org.apache.cassandra.db.marshal.ListType; -import org.apache.cassandra.db.marshal.MapType; -import org.apache.cassandra.db.marshal.SetType; -import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.*; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; @@ -64,41 +37,39 @@ import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; import org.assertj.core.api.Assertions; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; import static java.util.Collections.singletonList; import static org.apache.cassandra.cql3.CQLTester.row; import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.Assert.*; -@RunWith(Parameterized.class) -public class AccordCQLTest extends AccordTestBase +public abstract class AccordCQLTestBase extends AccordTestBase { - private static final Logger logger = LoggerFactory.getLogger(AccordCQLTest.class); - - @Override - protected Logger logger() - { - return logger; - } + private static final Logger logger = LoggerFactory.getLogger(AccordCQLTestBase.class); - @Parameterized.Parameter - public String transactionalModeName; + private final TransactionalMode transactionalMode; - TransactionalMode transactionalMode; - - @Parameterized.Parameters(name = "transactionalMode={0}") - public static Collection data() - { - return ImmutableList.of(new Object[] {TransactionalMode.full.toString()}, - new Object[] {TransactionalMode.mixed_reads.toString()}); + protected AccordCQLTestBase(TransactionalMode transactionalMode) { + this.transactionalMode = transactionalMode; } - @Before - public void setNonSerialWriteStrategy() + @Override + protected Logger logger() { - transactionalMode = TransactionalMode.valueOf(transactionalModeName); + return logger; } @BeforeClass @@ -2302,7 +2273,7 @@ private void testSetSelection(String ddl) throws Exception @Test public void testMultiCellMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map)"); + testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/FullAccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/FullAccordCQLTest.java new file mode 100644 index 000000000000..3f8259cff3e3 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/FullAccordCQLTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import org.apache.cassandra.service.consensus.TransactionalMode; + +public class FullAccordCQLTest extends AccordCQLTestBase +{ + public FullAccordCQLTest() + { + super(TransactionalMode.full); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/MixedReadAccordCQLTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/MixedReadAccordCQLTest.java new file mode 100644 index 000000000000..b9faecfac2fa --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/MixedReadAccordCQLTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import org.apache.cassandra.service.consensus.TransactionalMode; + +public class MixedReadAccordCQLTest extends AccordCQLTestBase +{ + public MixedReadAccordCQLTest() + { + super(TransactionalMode.mixed_reads); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 907a0e437597..1481e2a3be1b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -38,6 +38,8 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.schema.CreateKeyspaceStatement; import org.apache.cassandra.cql3.statements.schema.KeyspaceAttributes; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; @@ -51,6 +53,9 @@ import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.MemtableParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaTransformation; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.tcm.AtomicLongBackedProcessor; @@ -230,6 +235,30 @@ public static void createKeyspace(String statement) } } + public static void setMemtable(String ks, String table, String memtable) + { + setMemtable(ks, table, MemtableParams.get(memtable)); + } + + public static void setMemtable(String ks, String table, MemtableParams memtable) + { + if (SchemaConstants.isLocalSystemKeyspace(ks)) + { + ColumnFamilyStore store = Keyspace.open(ks).getColumnFamilyStore(table); + store.reload(store.metadata().unbuild().memtable(memtable).build()); + } + else + { + Schema.instance.submit(cms -> { + var km = cms.schema.getKeyspaceMetadata(ks); + var update = km.withSwapped(km.tables.withSwapped(km.tables.getNullable(table).unbuild() + .memtable(memtable) + .build())); + return cms.schema.getKeyspaces().withAddedOrUpdated(update); + }); + } + } + private static Set leaving(ClusterMetadata metadata) { return metadata.directory.states.entrySet().stream() diff --git a/test/distributed/org/apache/cassandra/distributed/upgrade/UpgradeTestBase.java b/test/distributed/org/apache/cassandra/distributed/upgrade/UpgradeTestBase.java index 6e6aaaef7669..33c4139878be 100644 --- a/test/distributed/org/apache/cassandra/distributed/upgrade/UpgradeTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/upgrade/UpgradeTestBase.java @@ -53,6 +53,7 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.SimpleGraph; +import static org.apache.cassandra.config.CassandraRelevantProperties.DTEST_ACCORD_ENABLED; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_GC_INSPECTOR; import static org.apache.cassandra.distributed.shared.Versions.Version; import static org.apache.cassandra.distributed.shared.Versions.find; @@ -74,6 +75,7 @@ public static void beforeClass() throws Throwable { ICluster.setup(); SKIP_GC_INSPECTOR.setBoolean(true); + DTEST_ACCORD_ENABLED.setBoolean(false); } diff --git a/test/unit/accord/utilsfork/Gens.java b/test/unit/accord/utilsfork/Gens.java index 4f696361b9ad..72eccbf232a1 100644 --- a/test/unit/accord/utilsfork/Gens.java +++ b/test/unit/accord/utilsfork/Gens.java @@ -44,7 +44,7 @@ import com.google.common.collect.Iterables; -import accord.utils.random.Picker; +import accord.utilsfork.random.Picker; public class Gens { private Gens() { diff --git a/test/unit/accord/utilsfork/RandomSource.java b/test/unit/accord/utilsfork/RandomSource.java index 830e52d9a0cb..b3e37087092f 100644 --- a/test/unit/accord/utilsfork/RandomSource.java +++ b/test/unit/accord/utilsfork/RandomSource.java @@ -33,7 +33,7 @@ import com.google.common.collect.Iterables; -import accord.utils.random.Picker; +import accord.utilsfork.random.Picker; // TODO (expected): merge with C* RandomSource public interface RandomSource @@ -42,6 +42,11 @@ static RandomSource wrap(Random random) { return new accord.utilsfork.WrappedRandomSource(random); } + //TODO (maintaince): once the rebase is over remove this... + static RandomSource wrap(accord.utils.RandomSource rs) + { + return new WrappedRandomSource(rs.asJdkRandom()); + } void nextBytes(byte[] bytes); diff --git a/test/unit/accord/utils/random/Picker.java b/test/unit/accord/utilsfork/random/Picker.java similarity index 99% rename from test/unit/accord/utils/random/Picker.java rename to test/unit/accord/utilsfork/random/Picker.java index f83d57763b00..f9584a5f98cf 100644 --- a/test/unit/accord/utils/random/Picker.java +++ b/test/unit/accord/utilsfork/random/Picker.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package accord.utils.random; +package accord.utilsfork.random; import java.util.Arrays; import java.util.function.Supplier; diff --git a/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorPlus.java b/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorPlus.java index 71c88a947e48..d75c10eaa9b4 100644 --- a/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorPlus.java +++ b/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorPlus.java @@ -33,7 +33,7 @@ import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; -public class ForwardingExecutorPlus implements ExecutorPlus +public class ForwardingExecutorPlus implements ExecutorPlus, SequentialExecutorPlus { private final ExecutorService delegate; @@ -216,4 +216,10 @@ public void onFailure(Throwable t) } throw new IllegalStateException("Unexpected future type: " + submit.getClass()); } + + @Override + public AtLeastOnceTrigger atLeastOnceTrigger(Runnable runnable) + { + return new SingleThreadExecutorPlus.AtLeastOnce(this, runnable); + } } diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index f42063beeaf5..f53955b93a1d 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -2986,6 +2986,26 @@ private static AbstractType typeFor(Object value) throw new IllegalArgumentException("Unsupported value type (value is " + value + ")"); } + protected static String wrapInTxn(String... stmts) + { + return wrapInTxn(Arrays.asList(stmts)); + } + + protected static String wrapInTxn(List stmts) + { + StringBuilder sb = new StringBuilder(); + sb.append("BEGIN TRANSACTION\n"); + for (String stmt : stmts) + { + sb.append('\t').append(stmt); + if (!stmt.endsWith(";")) + sb.append(';'); + sb.append('\n'); + } + sb.append("COMMIT TRANSACTION"); + return sb.toString(); + } + private static class TupleValue { protected final Object[] values; diff --git a/test/unit/org/apache/cassandra/dht/IPartitionerTest.java b/test/unit/org/apache/cassandra/dht/IPartitionerTest.java new file mode 100644 index 000000000000..5e46f09ed6e0 --- /dev/null +++ b/test/unit/org/apache/cassandra/dht/IPartitionerTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import java.util.Objects; + +import org.junit.Test; + +import org.apache.cassandra.utils.AbstractTypeGenerators; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class IPartitionerTest +{ + @Test + public void byteCompareSerde() + { + qt().forAll(AccordGenerators.fromQT(CassandraGenerators.token())).check(token -> { + var p = token.getPartitioner(); + var comparable = Objects.requireNonNull(ByteSource.peekable(p.getTokenFactory().asComparableBytes(token, ByteComparable.Version.OSS50))); + Token read = p.getTokenFactory().fromComparableBytes(comparable, ByteComparable.Version.OSS50); + Assertions.assertThat(read) + .describedAs("If LocalPartitioner, the type is %s", (token.getPartitioner() instanceof LocalPartitioner ? AbstractTypeGenerators.typeTree(((LocalPartitioner) token.getPartitioner()).comparator) : null)) + .isEqualTo(token); + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java new file mode 100644 index 000000000000..02221eb2004b --- /dev/null +++ b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java @@ -0,0 +1,517 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.RoutingKey; +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.FullKeyRoute; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Route; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.RandomSource; +import org.agrona.collections.Int2ObjectHashMap; +import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.exceptions.ReadSizeAbortException; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Interval; +import org.apache.cassandra.utils.IntervalTree; +import org.apache.cassandra.utils.ObjectSizes; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + * This test validates the system_accord.commands.route index to make sure it returns the right values... the way the + * test is strucutred allows pluggability but runs with fixed configs while running in CI... + * + * If you are interested in testing different cases, the following tunables exist: + * + * + *
  • size: how many read/writes should be done
  • + *
  • pattern: how ranges are layed out. By default we test NO_OVERLAP so there are no conflicts
  • + *
    + */ +public class AccordIndexStressTest extends CQLTester +{ + private static final Logger logger = LoggerFactory.getLogger(AccordIndexStressTest.class); + + static + { + // The plan is to migrate away from SAI, so rather than hacking around timeout issues; just disable for now + CassandraRelevantProperties.SAI_TEST_DISABLE_TIMEOUT.setBoolean(true); + } + private static final boolean VALIDATE = true; + private static final boolean INCLUDE_FLUSH = true; + private static final long SLOW_NS = TimeUnit.MILLISECONDS.toNanos(25); + private static final Node.Id NODE = new Node.Id(42); + private final Routable.Domain domain = Routable.Domain.Range; + private final Int2ObjectHashMap>>> storeToTableToRoutingKeysToTxns = new Int2ObjectHashMap<>(); + private final Int2ObjectHashMap>>> storeToTableToRangesToTxns = new Int2ObjectHashMap<>(); + private final RoutesSearcher searcher = new RoutesSearcher(); + + enum Size + {tiny, small, medium, large, benchmark} + + private final Size size = Size.small; + + private enum Read + {INDEX, CQL} + + private Read read = Read.INDEX; + + private enum Pattern { RANDOM, NO_OVERLAP } + private final Pattern pattern = Pattern.NO_OVERLAP; + + @Test + public void test() + { + var tables = IntStream.range(0, 10) + .mapToObj(i -> TableId.fromUUID(new UUID(0, i))) + .collect(Collectors.toList()); + int numWrites, numReads; + switch (size) + { + case tiny: + numWrites = 100; + numReads = 10; + break; + case small: + numWrites = 100_000; + numReads = 1_000; + break; + case medium: + numWrites = 1_000_000; + numReads = 1_000; + break; + case large: + numWrites = 10_000_000; + numReads = 1_000; + break; + case benchmark: + numWrites = 1_000_000; + numReads = numWrites * 10; + break; + default: + throw new AssertionError("Unknown size: " + size); + } + var minToken = 0; + var maxToken = (1 << 8) * numWrites; + int numStores = 10; + qt().withSeed(-1464527987857660885L).withExamples(1).check(rs -> { + timed("write(" + numWrites + ")", () -> writeRecords(rs, numStores, tables, minToken, maxToken, numWrites)); + if (INCLUDE_FLUSH) + timed("flush(writes=" + numWrites + ")", () -> FBUtilities.waitOnFutures(Keyspace.open("system_accord").flush(ColumnFamilyStore.FlushReason.UNIT_TESTS))); + var warmupReads = Math.max(1, (int) (numReads * .2)); + timed("warmup read(" + warmupReads + ")", () -> readRecords(rs, warmupReads)); + timed("read(" + numReads + ")", () -> readRecords(rs, numReads)); + }); + } + + private static void timed(String name, Runnable fn) + { + logger.warn("Task {} starting...", name); + long startNs = nanoTime(); + try + { + fn.run(); + } + catch (Throwable t) + { + logger.warn("Task {} failed after {}ms", name, TimeUnit.NANOSECONDS.toMillis(nanoTime() - startNs)); + throw t; + } + logger.warn("Task {} completed after {}ms", name, TimeUnit.NANOSECONDS.toMillis(nanoTime() - startNs)); + } + + private static class RangeWrapper + { + final TokenRange[] ranges; + final IntervalTree> tree; + + RangeWrapper(TokenRange[] ranges, IntervalTree> tree) + { + this.ranges = ranges; + this.tree = tree; + } + } + + private Int2ObjectHashMap> store2Table2Tokens; + private Int2ObjectHashMap> store2Table2Ranges; + + private void readRecords(RandomSource rs, int numRecords) + { + logger.warn("The bookkeeping is {} bytes", ObjectSizes.measureDeep(storeToTableToRoutingKeysToTxns)); + // sort the tokens + if (domain == Routable.Domain.Key && store2Table2Tokens == null) + { + store2Table2Tokens = new Int2ObjectHashMap<>(); + timed("Model building: key", () -> storeToTableToRoutingKeysToTxns.forEachInt((storeId, actual) -> { + Map map = new HashMap<>(); + for (var e : actual.entrySet()) + { + var keys = e.getValue().keySet(); + long[] tokens = new long[keys.size()]; + var it = keys.iterator(); + for (int i = 0; it.hasNext(); i++) + tokens[i] = it.nextLong(); + Arrays.sort(tokens); + map.put(e.getKey(), tokens); + } + store2Table2Tokens.put(storeId, map); + })); + store2Table2Ranges = null; + } + else if (domain == Routable.Domain.Range && store2Table2Ranges == null) + { + store2Table2Ranges = new Int2ObjectHashMap<>(); + timed("Model building: range", () -> storeToTableToRangesToTxns.forEachInt((storeId, actual) -> { + Map map = new HashMap<>(); + for (var e : actual.entrySet()) + { + TableId tableId = e.getKey(); + Map> range2Txns = e.getValue(); + var keys = range2Txns.keySet(); + TokenRange[] ranges = new TokenRange[keys.size()]; + var it = keys.iterator(); + var builder = new IntervalTree.Builder>(); + for (int i = 0; it.hasNext(); i++) + { + TokenRange r = ranges[i] = it.next(); + List txns = range2Txns.get(r); + txns.forEach(txnId -> builder.add(new Interval<>(r.start(), r.end(), txnId))); + } + Arrays.sort(ranges, Range::compare); + map.put(tableId, new RangeWrapper(ranges, builder.build())); + } + store2Table2Ranges.put(storeId, map); + })); + store2Table2Tokens = null; + } + long[] samples = new long[numRecords]; + int[] counts = new int[numRecords]; + int size = 0; + int numReadSizeAborts = 0; + try + { + for (int i = 0; i < numRecords; i++) + { + int store; + TableId table; + Set expected = new HashSet<>(); + TokenKey start, end; + switch (domain) + { + case Key: + { + store = rs.pick(storeToTableToRoutingKeysToTxns.keySet()); + var actual = this.storeToTableToRoutingKeysToTxns.get(store); + var tableToTokens = store2Table2Tokens.get(store); + + table = rs.pick(actual.keySet()); + var tokens = tableToTokens.get(table); + + var offset = rs.nextInt(0, tokens.length); + var endOffset = offset == tokens.length - 1 ? tokens.length - 1 : offset + rs.nextInt(1, Math.min(3, tokens.length - offset)); + IntStream.range(offset + 1, endOffset + 1).mapToLong(o -> tokens[o]).forEach(token -> expected.addAll(actual.get(table).get(token))); + + start = new TokenKey(table, new Murmur3Partitioner.LongToken(tokens[offset])); + end = new TokenKey(table, new Murmur3Partitioner.LongToken(tokens[endOffset])); + } + break; + case Range: + { + store = rs.pick(storeToTableToRangesToTxns.keySet()); + var tableToRangesToTxns = storeToTableToRangesToTxns.get(store); + var tableToRanges = store2Table2Ranges.get(store); + + table = rs.pick(tableToRangesToTxns.keySet()); + var wrapper = tableToRanges.get(table); + var ranges = wrapper.ranges; + var tree = wrapper.tree; + var range = rs.pick(ranges); + var a = tokenValue(range.start()); + var b = tokenValue(range.end()); + start = new TokenKey(table, new Murmur3Partitioner.LongToken(a + 1)); + end = new TokenKey(table, new Murmur3Partitioner.LongToken(b - 1)); + expected.addAll(tree.search(start, end)); + assert !expected.isEmpty(); + } + break; + default: + throw new IllegalArgumentException("Unknown domain: " + domain); + } + + var startNs = nanoTime(); + Set actual = read(store, start, end); + var durationNs = nanoTime() - startNs; + samples[size] = durationNs; + counts[size++] = actual.size(); + if (slow(durationNs)) + logger.warn("Slow search: i={}, store={}, [{}, {}), results={}", i, store, start, end, actual.size()); + if (VALIDATE) + { + try + { + Assertions.assertThat(actual).describedAs("[%s, %s)", start, end).isEqualTo(expected); + } + catch (Throwable t) + { + logSamples(samples, counts, size); + throw t; + } + } + } + } + finally + { + logger.info("Number of aborts due to size: {}", numReadSizeAborts); + logSamples(samples, counts, size); + } + } + + private static long tokenValue(RoutingKey start) + { + return ((AccordRoutingKey.TokenKey) start).token().getLongValue(); + } + + private static boolean slow(long durationNs) + { + return durationNs >= SLOW_NS; + } + + private Set read(int store, AccordRoutingKey start, AccordRoutingKey end) + { + switch (read) + { + case INDEX: + return readIndex(store, start, end); + case CQL: + return readCQL(store, start, end); + default: + throw new AssertionError("Unknown read type: " + read); + } + } + + private Set readIndex(int store, AccordRoutingKey start, AccordRoutingKey end) + { + return searcher.intersects(store, start, end); + } + + private Set readCQL(int store, AccordRoutingKey start, AccordRoutingKey end) + { + Set actual = new HashSet<>(); + try + { + UntypedResultSet results = execute("SELECT txn_id FROM system_accord.commands WHERE store_id = ? AND route > ? AND route <= ?", store, OrderedRouteSerializer.serializeRoutingKey(start), OrderedRouteSerializer.serializeRoutingKey(end)); + for (var row : results) + actual.add(AccordKeyspace.deserializeTxnId(row)); + } + catch (ReadSizeAbortException e) + { + // don't count it... + logger.warn("Abort query to [{}, {}) do to size", start, end); + return null; + } + return actual; + } + + private void logSamples(long[] samples, int[] counts, int size) + { + if (size == 0) + { + logger.warn("No logs sampled"); + return; + } + Arrays.sort(samples, 0, size); + Arrays.sort(counts, 0, size); + StringBuilder sb = new StringBuilder(); + sb.append("Samples: ").append(size); + sb.append("\nLatenciy:"); + sb.append("\n Min (micro): ").append(TimeUnit.NANOSECONDS.toMicros(samples[0])); + sb.append("\n Max (micro): ").append(TimeUnit.NANOSECONDS.toMicros(samples[size - 1])); + sb.append("\n Median (micro): ").append(TimeUnit.NANOSECONDS.toMicros(samples[size / 2])); + sb.append("\n Avg (micro): ").append(TimeUnit.NANOSECONDS.toMicros((long) LongStream.of(samples).limit(size).average().getAsDouble())); + sb.append("\nCounts:"); + sb.append("\n Min: ").append(counts[0]); + sb.append("\n Max: ").append(counts[size - 1]); + sb.append("\n Median: ").append(counts[size / 2]); + sb.append("\n Avg: ").append((int) IntStream.of(counts).limit(size).average().getAsDouble()); + logger.info(sb.toString()); + } + + private void writeRecords(RandomSource rs, + int numStores, + List tables, + int minToken, int maxToken, + int numRecords) + { + var cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, route, durability) VALUES (?, ?, ?, ?, ?, ?)"; + for (int i = 0; i < numRecords; i++) + { + int store = rs.nextInt(0, numStores); + TxnId txnId = new TxnId(0, 1000 + i, Txn.Kind.Write, domain, NODE); + int domain = txnId.domain().ordinal(); + int status = SaveStatus.PreCommitted.ordinal(); + ByteBuffer routeBB; + try + { + Route route = createRoute(rs, numRecords, i, rs.nextInt(1, 20), tables, minToken, maxToken); + for (var u : route) + { + switch (u.domain()) + { + case Key: + { + AccordRoutingKey key = (AccordRoutingKey) u; + var table = key.table(); + var token = key.token().getLongValue(); + storeToTableToRoutingKeysToTxns.computeIfAbsent(store, ignore -> new HashMap<>()) + .computeIfAbsent(table, ignore -> new Long2ObjectHashMap<>()) + .computeIfAbsent(token, ignore -> new ArrayList<>()) + .add(txnId); + } + break; + case Range: + { + TokenRange range = (TokenRange) u; + var table = range.table(); + storeToTableToRangesToTxns.computeIfAbsent(store, ignore -> new HashMap<>()) + .computeIfAbsent(table, ignore -> new HashMap<>()) + .computeIfAbsent(range, ignore -> new ArrayList<>()) + .add(txnId); + } + break; + default: + throw new AssertionError("Unexpected domain: " + u.domain()); + } + } + routeBB = AccordKeyspace.serializeRoute(route); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + int durability = Status.Durability.NotDurable.ordinal(); + + execute(cql, store, domain, AccordKeyspace.serializeTimestamp(txnId), status, routeBB, durability); + } + } + + private Route createRoute(RandomSource rs, int numRecords, int index, int numKeys, List tables, int minToken, int maxToken) + { + switch (domain) + { + case Key: + { + TreeSet keys = new TreeSet<>(); + while (keys.size() < numKeys) + { + var table = rs.pick(tables); + var token = new Murmur3Partitioner.LongToken(rs.nextInt(minToken, maxToken)); + keys.add(new TokenKey(table, token)); + } + return new FullKeyRoute(keys.first(), true, keys.toArray(RoutingKey[]::new)); + } + case Range: + { + TreeSet ranges = new TreeSet<>(Range::compareTo); + RoutingKey routingKey = null; + var domain = maxToken - minToken + 1; + var delta = domain / numRecords; + var sub_delta = delta / numKeys; + while (ranges.size() < numKeys) + { + var table = rs.pick(tables); + int a, b; + switch (pattern) + { + case RANDOM: + { + a = rs.nextInt(minToken, maxToken); + b = rs.nextInt(minToken, maxToken); + while (a == b) + b = rs.nextInt(minToken, maxToken); + if (a > b) + { + var tmp = a; + a = b; + b = tmp; + } + } + break; + case NO_OVERLAP: + { + a = delta * index + (sub_delta * ranges.size()); + b = a + sub_delta; + } + break; + default: + throw new IllegalArgumentException("Unknown pattern: " + pattern); + } + ranges.add(new TokenRange(new TokenKey(table, new Murmur3Partitioner.LongToken(a)), new TokenKey(table, new Murmur3Partitioner.LongToken(b)))); + if (routingKey == null) + { + routingKey = new TokenKey(table, new Murmur3Partitioner.LongToken(b)); + } + } + return Ranges.ofSorted(ranges.toArray(Range[]::new)).toRoute(routingKey); + } + default: + throw new IllegalArgumentException("Unknown domain: " + domain); + } + } +} diff --git a/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java b/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java new file mode 100644 index 000000000000..1fd54ab877f8 --- /dev/null +++ b/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableMap; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ClusteringComparator; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.index.accord.CheckpointIntervalArrayIndex.Interval; +import org.apache.cassandra.index.accord.IndexDescriptor.IndexComponent; +import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.SequenceBasedSSTableId; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.io.util.FileHandle; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; +import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.utils.ByteBufferUtil.bytes; + +public class CheckpointIntervalArrayIndexTest +{ + private static final Logger logger = LoggerFactory.getLogger(CheckpointIntervalArrayIndexTest.class); + + static + { + DatabaseDescriptor.clientInitialization(); + } + + private static final byte[] EMPTY = new byte[0]; + private static final TreeSet EMPTY_TREE_SET = new TreeSet<>(); + + private static final Gen.IntGen MAX_TOKEN_GEN = Gens.pickInt(1 << 14, + 1 << 16, + 1 << 20, + 1 << 30); + + private enum Pattern { RANDOM, NO_OVERLAP, PARTIAL_OVERLAP } + + private static final Gen PATTERN_GEN = Gens.enums().all(Pattern.class); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + private int generation = 0; + + @Test + public void simple() throws IOException + { + int bytesPerKey = Integer.BYTES; + int bytesPerValue = 0; + + List list = new ArrayList<>(10); + list.add(new Interval(bytes(Integer.MIN_VALUE).array(), bytes(Integer.MAX_VALUE).array(), EMPTY)); + for (int i = 0; i < 10; i++) + list.add(new Interval(bytes(i).array(), bytes(i + 1).array(), EMPTY)); + + try (var searcher = index(bytesPerKey, bytesPerValue, list)) + { + Set> expected = Set.of(List.of(-2147483648, 2147483647), + List.of(2, 3), + List.of(3, 4)); + Set> actual = new HashSet<>(); + var stats = searcher.intersects(bytes(2).array(), bytes(4).array(), value -> actual.add(List.of(ByteBuffer.wrap(value.start).getInt(), ByteBuffer.wrap(value.end).getInt()))); + logger.info("Stats: {}", stats); + Assertions.assertThat(actual).isEqualTo(expected); + } + } + + @Test + public void fuzzSmall() + { + var minToken = 0; + int numRecords = 10; + qt().withTimeout(Duration.ofSeconds(60)).check(rs -> fuzz(rs, minToken, MAX_TOKEN_GEN.nextInt(rs), PATTERN_GEN.next(rs), numRecords)); + } + + @Test + public void fuzzMedium() + { + var minToken = 0; + int numRecords = 1_000; + qt().withTimeout(Duration.ofSeconds(60)).check(rs -> fuzz(rs, minToken, MAX_TOKEN_GEN.nextInt(rs), PATTERN_GEN.next(rs), numRecords)); + } + + private void fuzz(RandomSource rs, int minToken, int maxToken, Pattern pattern, int numRecords) throws IOException + { + List intervals = buildIntervals(rs, minToken, maxToken, pattern, numRecords); + List nonContainedRanges = findMissingRanges(intervals); + + try (var searcher = index(Integer.BYTES, Integer.BYTES, intervals)) + { + for (int i = 0, samples = rs.nextInt(Math.min(10, numRecords), Math.min(10, numRecords) * 10); i < samples; i++) + { + SearchContext ctx = rs.decide(.2) ? miss(rs, nonContainedRanges) + : hit(rs, intervals, pattern); + Set actual = new TreeSet<>(); + try + { + var stats = searcher.intersects(ctx.start, ctx.end, interval -> actual.add(new DetailedInterval(interval))); + logger.info("[Pattern={}, size={}, expectedMatches={}, query=[{}, {})] Stats: {}", pattern, intervals.size(), ctx.expected.size(), ctx.a, ctx.b, stats); + } + catch (Throwable t) + { + throw new AssertionError(String.format("Failure searching for [%d, %d) from %s", ctx.a, ctx.b, intervals), t); + } + Assertions.assertThat(actual).describedAs("search(%d, %d) from %s", ctx.a, ctx.b, intervals).isEqualTo(ctx.expected); + } + } + } + + /** + * mutable/shared ctx to avoid allocating in a loop... + */ + private SearchContext searchContext = new SearchContext(); + + private SearchContext miss(RandomSource rs, List nonContainedRanges) + { + var range = rs.pick(nonContainedRanges); + var s = unbc(range.start); + var e = unbc(range.end); + int domain = e - s; + int a, b; + if (domain == 1) + { + // you can not find multiple values within this range! + a = s; + b = e; + } + else + { + a = e == Integer.MAX_VALUE ? rs.nextInt(s, e) : rs.nextInt(s, e) + 1; + b = e == Integer.MAX_VALUE ? rs.nextInt(s, e) : rs.nextInt(s, e) + 1; + for (int i = 0; i < 42 && a == b; i++) + b = e == Integer.MAX_VALUE ? rs.nextInt(s, e) : rs.nextInt(s, e) + 1; + if (a == b) + throw new IllegalStateException("Unable to create missing range: " + range); + if (b < a) + { + var tmp = a; + a = b; + b = tmp; + } + } + searchContext.a = a; + searchContext.b = b; + searchContext.start = bc(a); + searchContext.end = bc(b); + searchContext.expected = EMPTY_TREE_SET; + return searchContext; + } + + private SearchContext hit(RandomSource rs, List intervals, Pattern pattern) + { + int numRecords = intervals.size(); + DetailedInterval first, second; + do + { + var offset = rs.nextInt(0, numRecords); + int endOffset; + switch (pattern) + { + case PARTIAL_OVERLAP: + case RANDOM: + endOffset = offset; + break; + case NO_OVERLAP: + endOffset = offset == numRecords - 1 ? offset : offset + rs.nextInt(1, Math.min(3, numRecords - offset)); + break; + default: + throw new IllegalArgumentException("Unknown pattern: " + pattern); + } + first = intervals.get(offset); + second = intervals.get(endOffset); + } + while (first.compareTo(second) == 0 && first.size() == 1); + int a, b; + a = rs.nextInt(unbc(first.start), unbc(first.end)) + 1; + b = rs.nextInt(unbc(second.start), unbc(second.end)) + 1; + while (a == b) + b = rs.nextInt(unbc(second.start), unbc(second.end)) + 1; + if (b < a) + { + var tmp = b; + b = a; + a = tmp; + } + + searchContext.start = bc(a); + searchContext.end = bc(b); + + searchContext.expected = intervals.stream().filter(i -> i.intersects(searchContext.start, searchContext.end)).collect(Collectors.toCollection(TreeSet::new)); + Assertions.assertThat(searchContext.expected).isNotEmpty(); + return searchContext; + } + + private static List buildIntervals(RandomSource rs, int minToken, int maxToken, Pattern pattern, int numRecords) + { + List intervals = new ArrayList<>(numRecords); + { + var domain = maxToken - minToken + 1; + var delta = domain / numRecords; + var sub_delta = delta / 2; + for (int i = 0; i < numRecords; i++) + { + switch (pattern) + { + case RANDOM: + { + var start = rs.nextInt(minToken, maxToken); + var remaining = maxToken - start; + var end = start + (remaining == 1 ? 1 : rs.nextInt(1, remaining)); + intervals.add(new DetailedInterval(bc(start), bc(end), bytes(i).array())); + } + break; + case NO_OVERLAP: + { + var start = delta * i; + var end = start + sub_delta; + intervals.add(new DetailedInterval(bc(start), bc(end), bytes(i).array())); + } + break; + case PARTIAL_OVERLAP: + { + if (i > 1 && rs.decide(.2)) + { + // overlap + DetailedInterval start, end; + do + { + int numOverlaps = rs.nextInt(1, Math.min(3, intervals.size())); + int offset = rs.nextInt(0, intervals.size() - numOverlaps); + start = intervals.get(offset); + end = intervals.get(offset + numOverlaps); + } + while (start.compareStart(end) == 0 && start.size() == 1); + var a = rs.nextInt(unbc(start.start), unbc(start.end)) + 1; + var b = rs.nextInt(unbc(end.start), unbc(end.end)) + 1; + if (a == b && end.size() == 1) + { + while (a == b) + a = rs.nextInt(unbc(start.start), unbc(start.end)) + 1; + } + else + { + while (a == b) + b = rs.nextInt(unbc(end.start), unbc(end.end)) + 1; + } + if (a > b) + { + var tmp = a; + a = b; + b = tmp; + } + intervals.add(new DetailedInterval(bc(a), bc(b), bytes(i).array())); + intervals.sort(Comparator.naturalOrder()); // so partial can work next time + } + else + { + // no overlap + var start = delta * i; + var end = start + sub_delta; + intervals.add(new DetailedInterval(bc(start), bc(end), bytes(i).array())); + } + } + break; + default: + throw new IllegalArgumentException("Unknown pattern: " + pattern); + } + } + intervals.sort(Comparator.naturalOrder()); + } + return intervals; + } + + private static List findMissingRanges(List intervals) + { + List list = new ArrayList<>(); + list.add(new DetailedInterval(bc(Integer.MIN_VALUE), intervals.get(0).start, bytes(0).array())); + // track current visable coverage + int end = unbc(intervals.get(0).end); + for (var i : intervals) + { + int istar = unbc(i.start); + int iend = unbc(i.end); + if (end >= istar) + { + // current scope includes this range + end = Math.max(end, iend); + } + else + { + // range doesn't intersect, and a new start/end are formed! + list.add(new DetailedInterval(bc(end), bc(istar), bytes(list.size()).array())); + end = iend; + } + } + list.add(new DetailedInterval(bc(end), bc(Integer.MAX_VALUE), bytes(list.size()).array())); + return list; + } + + private static class DetailedInterval extends Interval + { + public DetailedInterval(byte[] start, byte[] end, byte[] value) + { + super(start, end, value); + } + + public DetailedInterval(Interval other) + { + super(other); + } + + public int size() + { + return unbc(end) - unbc(start); + } + + @Override + public String toString() + { + return "[" + unbc(start) + ", " + unbc(end) + ") -> " + ByteBuffer.wrap(value).getInt(); + } + } + + private static byte[] bc(int value) + { + ByteBuffer bb = bytes(value); + var bs = Int32Type.instance.asComparableBytes(ByteBufferAccessor.instance, bb, ByteComparable.Version.OSS50); + return ByteSourceInverse.readBytes(bs); + } + + private static int unbc(byte[] bc) + { + return Int32Type.instance.fromComparableBytes(ByteSource.peekable(ByteSource.fixedLength(bc)), ByteComparable.Version.OSS50).getInt(); + } + + @SuppressWarnings({ "IOResourceOpenedButNotSafelyClosed", "resource" }) + private Searcher index(int bytesPerKey, int bytesPerValue, List sortedIntervals) throws IOException + { + IndexDescriptor descriptor = nextDescriptor(); + + var writer = new CheckpointIntervalArrayIndex.SegmentWriter(descriptor, bytesPerKey, bytesPerValue); + var metas = writer.write(sortedIntervals.toArray(Interval[]::new)); + + // going through the RouteIndexFormat isn't required for this test, but it helps improve coverage there... + Segment segment = new Segment(ImmutableMap.of(new Group(0, TableId.fromUUID(new UUID(0, 0))), new Segment.Metadata(metas, ByteArrayUtil.EMPTY_BYTE_ARRAY, ByteArrayUtil.EMPTY_BYTE_ARRAY))); + RouteIndexFormat.appendSegment(descriptor, segment); + + Map files = new EnumMap<>(IndexComponent.class); + for (IndexComponent c : descriptor.getLiveComponents()) + files.put(c, new FileHandle.Builder(descriptor.fileFor(c)).mmapped(true).complete()); + List segments = RouteIndexFormat.readSegements(files); + files.remove(IndexComponent.SEGMENT).close(); + files.remove(IndexComponent.METADATA).close(); + + var searcher = new CheckpointIntervalArrayIndex.SegmentSearcher(files.get(IndexComponent.CINTIA_SORTED_LIST).sharedCopy(), metas.get(IndexComponent.CINTIA_SORTED_LIST).offset, + files.get(IndexComponent.CINTIA_CHECKPOINTS).sharedCopy(), metas.get(IndexComponent.CINTIA_CHECKPOINTS).offset); + return new Searcher() + { + @Override + public CheckpointIntervalArrayIndex.Stats intersects(byte[] start, byte[] end, Consumer callback) throws IOException + { + return searcher.intersects(start, end, callback); + } + + @Override + public void close() + { + searcher.close(); + for (var fh : files.values()) + fh.close(); + } + }; + } + + private IndexDescriptor nextDescriptor() + { + return IndexDescriptor.create(new Descriptor(new File(folder.getRoot()), "test", "test", new SequenceBasedSSTableId(generation++)), + Murmur3Partitioner.instance, + new ClusteringComparator()); + } + + private static class SearchContext + { + TreeSet expected; + byte[] start, end; + int a, b; + } + + public interface Searcher extends Closeable + { + CheckpointIntervalArrayIndex.Stats intersects(byte[] start, byte[] end, Consumer callback) throws IOException; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java new file mode 100644 index 000000000000..84c64bcb45b0 --- /dev/null +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -0,0 +1,508 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import com.google.common.collect.Iterables; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.api.RoutingKey; +import accord.local.Node; +import accord.local.SaveStatus; +import accord.local.Status.Durability; +import accord.primitives.FullKeyRoute; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable.Domain; +import accord.primitives.Route; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property.Command; +import accord.utils.Property.Commands; +import accord.utils.Property.UnitCommand; +import accord.utils.RandomSource; +import org.agrona.collections.Int2ObjectHashMap; +import org.agrona.collections.Long2ObjectHashMap; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.compaction.CompactionManager; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.RTree; +import org.apache.cassandra.utils.RangeTree; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.stateful; + +public class RouteIndexTest extends CQLTester.InMemory +{ + static + { + // since this test does frequent truncates, the info table gets updated and forced flushed... which is 90% of the cost of this test... + // this flag disables that flush + CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); + } + + private static final Node.Id NODE = new Node.Id(42); + private static final int MIN_TOKEN = 0; + private static final int MAX_TOKEN = 1 << 18; + private static final int TOKEN_RANGE_SIZE = MAX_TOKEN - MIN_TOKEN + 1; + private static final Gen.IntGen NUM_STORES_GEN = Gens.ints().between(1, 10); + private static final Gen.IntGen NUM_TABLES_GEN = Gens.ints().between(1, 10); + private static final Gen TOKEN_DISTRIBUTION = Gens.mixedDistribution(MIN_TOKEN, MAX_TOKEN + 1); + private static final Gen RANGE_SIZE_DISTRIBUTION = Gens.mixedDistribution(10, (int) (TOKEN_RANGE_SIZE * .01)); + private static final Gen> DOMAIN_DISTRIBUTION = Gens.mixedDistribution(Domain.values()); + private static RoutesSearcher ROUTES_SEARCHER = null; + + @BeforeClass + public static void setUpClass() + { + CQLTester.InMemory.setUpClass(); + DatabaseDescriptor.setIncrementalBackupsEnabled(false); + ROUTES_SEARCHER = new RoutesSearcher(); + } + + @Test + public void test() + { + cfs().disableAutoCompaction(); // let the test control compaction + //TODO (coverage): include with the ability to mark ranges as durable for compaction cleanup + AccordService.unsafeSetNoop(); // disable accord service since compaction touches it. It would be nice to include this for cleanup support.... + stateful().withExamples(50).check(new Commands() + { + @Override + public Gen genInitialState() + { + return rs -> new State(rs); + } + + @Override + public ColumnFamilyStore createSut(State state) + { + return cfs(); + } + + @Override + public Gen> commands(State state) + { + Map>, Integer> possible = new HashMap<>(); + possible.put(ignore -> FLUSH, 1); + possible.put(ignore -> COMPACT, 1); + possible.put(rs -> { + int storeId = rs.nextInt(0, state.numStores); + Domain domain = state.domainGen.next(rs); + TxnId txnId = state.nextTxnId(domain); + Route route = createRoute(state, rs, domain, rs.nextInt(1, 20)); + return new InsertTxn(storeId, txnId, SaveStatus.PreAccepted, Durability.NotDurable, route); + }, 10); + possible.put(rs -> new RangeSearch(rs.nextInt(0, state.numStores), state.rangeGen.next(rs)), 1); + if (!state.storeToTableToRangesToTxns.isEmpty()) + { + possible.put(rs -> { + int storeId = rs.pick(state.storeToTableToRangesToTxns.keySet()); + var tables = state.storeToTableToRangesToTxns.get(storeId); + TableId tableId = rs.pick(tables.keySet()); + var ranges = tables.get(tableId); + TreeSet distinctRanges = ranges.stream().map(Map.Entry::getKey).collect(Collectors.toCollection(() -> new TreeSet<>(TokenRange::compareTo))); + TokenRange range; + if (distinctRanges.size() == 1) + { + range = Iterables.getFirst(distinctRanges, null); + } + else + { + switch (rs.nextInt(0, 2)) + { + case 0: // perfect match + range = rs.pick(distinctRanges); + break; + case 1: // mutli-match + { + TokenRange a = rs.pick(distinctRanges); + TokenRange b = rs.pick(distinctRanges); + while (a.equals(b)) + b = rs.pick(distinctRanges); + if (b.compareTo(a) < 0) + { + TokenRange tmp = a; + a = b; + b = tmp; + } + range = new TokenRange((AccordRoutingKey) a.start(), (AccordRoutingKey) b.end()); + } + break; + default: + throw new AssertionError(); + } + } + return new RangeSearch(storeId, range); + }, 5); + } + return Gens.oneOf(possible); + } + + @Override + public void destroySut(ColumnFamilyStore sut) + { + cfs().truncateBlocking(); + } + }); + } + + private static ColumnFamilyStore cfs() + { + return Keyspace.open(SchemaConstants.ACCORD_KEYSPACE_NAME) + .getColumnFamilyStore(AccordKeyspace.COMMANDS); + } + + private static Gen rangeGen(RandomSource rand, List tables) + { + Gen.IntGen tokenGen = TOKEN_DISTRIBUTION.next(rand); + Gen tableIdGen = Gens.mixedDistribution(tables).next(rand); + switch (rand.nextInt(0, 3)) + { + case 0: // pure random + return rs -> { + int a = tokenGen.nextInt(rs); + int b = tokenGen.nextInt(rs); + while (a == b) + b = tokenGen.nextInt(rs); + if (a > b) + { + int tmp = a; + a = b; + b = tmp; + } + TableId tableId = tableIdGen.next(rs); + return new TokenRange(new TokenKey(tableId, new LongToken(a)), + new TokenKey(tableId, new LongToken(b))); + }; + case 1: // small range + Gen.IntGen rangeSizeGen = RANGE_SIZE_DISTRIBUTION.next(rand); + return rs -> { + int a = tokenGen.nextInt(rs); + int rangeSize = rangeSizeGen.nextInt(rs); + int b = a + rangeSize; + if (b > MAX_TOKEN) + { + b = a; + a = b - rangeSize; + } + TableId tableId = tableIdGen.next(rs); + return new TokenRange(new TokenKey(tableId, new LongToken(a)), + new TokenKey(tableId, new LongToken(b))); + }; + case 2: // single element + return rs -> { + int a = tokenGen.nextInt(rs); + int b = a + 1; + TableId tableId = tableIdGen.next(rs); + return new TokenRange(new TokenKey(tableId, new LongToken(a)), + new TokenKey(tableId, new LongToken(b))); + }; + default: + throw new AssertionError(); + } + } + + private static Route createRoute(State state, RandomSource rs, Domain domain, int numKeys) + { + switch (domain) + { + case Key: + { + TreeSet keys = new TreeSet<>(); + while (keys.size() < numKeys) + { + var table = rs.pick(state.tables); + var token = new LongToken(state.tokenGen.nextInt(rs)); + keys.add(new TokenKey(table, token)); + } + return new FullKeyRoute(keys.first(), true, keys.toArray(RoutingKey[]::new)); + } + case Range: + { + TreeSet set = new TreeSet<>(Range::compareTo); + while (set.size() < numKeys) + set.add(state.rangeGen.next(rs)); + return Ranges.ofSorted(set.toArray(Range[]::new)).toRoute(set.first().end()); + } + default: + throw new IllegalArgumentException("Unknown domain: " + domain); + } + } + + private class InsertTxn implements UnitCommand + { + private static final String cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, route, durability) VALUES (?, ?, ?, ?, ?, ?)"; + private final int storeId; + private final TxnId txnId; + private final SaveStatus saveStatus; + private final Durability durability; + private final Route route; + + private InsertTxn(int storeId, TxnId txnId, SaveStatus saveStatus, Durability durability, Route route) + { + this.storeId = storeId; + this.txnId = txnId; + this.saveStatus = saveStatus; + this.durability = durability; + this.route = route; + } + + @Override + public void applyUnit(State state) + { + for (var u : route) + { + switch (u.domain()) + { + case Key: + { + AccordRoutingKey key = (AccordRoutingKey) u; + var table = key.table(); + var token = key.token().getLongValue(); + state.storeToTableToRoutingKeysToTxns.computeIfAbsent(storeId, ignore -> new HashMap<>()) + .computeIfAbsent(table, ignore -> new Long2ObjectHashMap<>()) + .computeIfAbsent(token, ignore -> new ArrayList<>()) + .add(txnId); + } + break; + case Range: + { + TokenRange range = (TokenRange) u; + var table = range.table(); + state.storeToTableToRangesToTxns.computeIfAbsent(storeId, ignore -> new HashMap<>()) + .computeIfAbsent(table, ignore -> rangeTree()) + .add(range, txnId); + } + break; + default: + throw new AssertionError("Unexpected domain: " + u.domain()); + } + } + } + + @Override + public void runUnit(ColumnFamilyStore sut) throws Throwable + { + execute(cql, storeId, txnId.domain().ordinal(), AccordKeyspace.serializeTimestamp(txnId), saveStatus.ordinal(), AccordKeyspace.serializeRoute(route), durability.ordinal()); + } + + @Override + public String toString() + { + return "InsertTxn{" + + "storeId=" + storeId + + ", txnId=" + txnId + + ", saveStatus=" + saveStatus + + ", durability=" + durability + + ", route=" + route + + '}'; + } + } + + private class RangeSearch implements Command> + { + private final int storeId; + private final TokenRange range; + + private RangeSearch(int storeId, TokenRange range) + { + this.storeId = storeId; + this.range = range; + } + + @Override + public Set apply(State state) throws Throwable + { + var tables = state.storeToTableToRangesToTxns.get(storeId); + if (tables == null) return Collections.emptySet(); + var ranges = tables.get(range.table()); + if (ranges == null) return Collections.emptySet(); + Set matches = new HashSet<>(); + ranges.search(range, e -> matches.add(e.getValue())); + return matches; + } + + @Override + public Set run(ColumnFamilyStore sut) throws Throwable + { + return ROUTES_SEARCHER.intersects(storeId, range); + } + + @Override + public void checkPostconditions(State state, Set expected, + ColumnFamilyStore sut, Set actual) + { + Assertions.assertThat(actual).describedAs("Unexpected txns for range %s", range).isEqualTo(expected); + } + + @Override + public String toString() + { + return "RangeSearch{" + + "storeId=" + storeId + + ", range=" + range + + '}'; + } + } + + private static abstract class CassandraCommand implements UnitCommand + { + private final String name; + + protected CassandraCommand(String name) + { + this.name = name; + } + + @Override + public void applyUnit(State state) + { + // no-op + } + + @Override + public String detailed(State state) + { + return name; + } + } + + private static final CassandraCommand FLUSH = new CassandraCommand("Flush") + { + @Override + public void runUnit(ColumnFamilyStore sut) + { + sut.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } + }; + + private static final CassandraCommand COMPACT = new CassandraCommand("Compact") + { + @Override + public void runUnit(ColumnFamilyStore sut) + { + try + { + sut.enableAutoCompaction(); + FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(sut)); + } + finally + { + sut.disableAutoCompaction(); + } + } + }; + + private static class State + { + private final Int2ObjectHashMap>>> storeToTableToRoutingKeysToTxns = new Int2ObjectHashMap<>(); + private final Int2ObjectHashMap>> storeToTableToRangesToTxns = new Int2ObjectHashMap<>(); + + private final int numStores; + private final List tables; + private final Gen.IntGen tokenGen; + private final Gen rangeGen; + private final Gen domainGen; + private int hlc = 1000; + + public State(RandomSource rs) + { + numStores = NUM_STORES_GEN.nextInt(rs); + tables = IntStream.range(0, NUM_TABLES_GEN.nextInt(rs)) + .mapToObj(i -> TableId.fromUUID(new UUID(0, i))) + .collect(Collectors.toList()); + tokenGen = TOKEN_DISTRIBUTION.next(rs); + rangeGen = rangeGen(rs, tables); + domainGen = DOMAIN_DISTRIBUTION.next(rs); + } + + TxnId nextTxnId(Domain domain) + { + return new TxnId(1, hlc++, Txn.Kind.Write, domain, NODE); + } + + @Override + public String toString() + { + return "State{" + + "numStores=" + numStores + + ", tables=" + tables + + '}'; + } + } + + private static RangeTree rangeTree() + { + return RTree.create(ACCESSOR); + } + + private static final RangeTree.Accessor ACCESSOR = new RangeTree.Accessor() + { + @Override + public AccordRoutingKey start(TokenRange tokenRange) + { + return (AccordRoutingKey) tokenRange.start(); + } + + @Override + public AccordRoutingKey end(TokenRange tokenRange) + { + return (AccordRoutingKey) tokenRange.end(); + } + + @Override + public boolean contains(AccordRoutingKey start, AccordRoutingKey end, AccordRoutingKey accordRoutingKey) + { + return new TokenRange(start, end).contains(accordRoutingKey); + } + + @Override + public boolean intersects(TokenRange tokenRange, AccordRoutingKey start, AccordRoutingKey end) + { + return tokenRange.compareIntersecting(new TokenRange(start, end)) == 0; + } + }; +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/internal/CassandraIndexTest.java b/test/unit/org/apache/cassandra/index/internal/CassandraIndexTest.java index 647b4d85f82d..2fef97fd87a9 100644 --- a/test/unit/org/apache/cassandra/index/internal/CassandraIndexTest.java +++ b/test/unit/org/apache/cassandra/index/internal/CassandraIndexTest.java @@ -569,25 +569,25 @@ public void indexCorrectlyMarkedAsBuildAndRemoved() throws Throwable Awaitility.await() .atMost(1, TimeUnit.MINUTES) .pollDelay(1, TimeUnit.SECONDS) - .untilAsserted(() -> assertRows(execute(selectBuiltIndexesQuery), row("system", "PaxosUncommittedIndex", null))); + .untilAsserted(() -> assertRows(execute(selectBuiltIndexesQuery), row("system", "PaxosUncommittedIndex", null), row("system_accord", "route", null))); String indexName = "build_remove_test_idx"; createTable("CREATE TABLE %s (a int, b int, c int, PRIMARY KEY (a, b))"); createIndex(String.format("CREATE INDEX %s ON %%s(c)", indexName)); // check that there are no other rows in the built indexes table - assertRows(execute(selectBuiltIndexesQuery), row(KEYSPACE, indexName, null), row("system", "PaxosUncommittedIndex", null)); + assertRows(execute(selectBuiltIndexesQuery), row(KEYSPACE, indexName, null), row("system", "PaxosUncommittedIndex", null), row("system_accord", "route", null)); // rebuild the index and verify the built status table getCurrentColumnFamilyStore().rebuildSecondaryIndex(indexName); waitForIndexQueryable(indexName); // check that there are no other rows in the built indexes table - assertRows(execute(selectBuiltIndexesQuery), row(KEYSPACE, indexName, null), row("system", "PaxosUncommittedIndex", null)); + assertRows(execute(selectBuiltIndexesQuery), row(KEYSPACE, indexName, null), row("system", "PaxosUncommittedIndex", null), row("system_accord", "route", null)); // check that dropping the index removes it from the built indexes table dropIndex("DROP INDEX %s." + indexName); - assertRows(execute(selectBuiltIndexesQuery), row("system", "PaxosUncommittedIndex", null)); + assertRows(execute(selectBuiltIndexesQuery), row("system", "PaxosUncommittedIndex", null), row("system_accord", "route", null)); } diff --git a/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java b/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java new file mode 100644 index 000000000000..d7b6754a2d44 --- /dev/null +++ b/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; +import java.util.zip.CRC32C; +import java.util.zip.Checksum; + +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property.Command; +import accord.utils.Property.Commands; +import accord.utils.Property.UnitCommand; +import org.apache.cassandra.utils.FailingConsumer; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.stateful; + +public class ChecksumedDataTest +{ + public static final Supplier CHECKSUM_SUPPLIER = CRC32C::new; + + @Test + public void singleType() + { + DataOutputBuffer out = new DataOutputBuffer(); + stateful().check(new Commands() + { + @Override + public Gen genInitialState() + { + return ignore -> { + out.clear(); + return out; + }; + } + + @Override + public DataOutputBuffer createSut(DataOutputBuffer dataOutputBuffer) + { + return dataOutputBuffer; + } + + @Override + public Gen> commands(DataOutputBuffer dataOutputBuffer) + { + return Gens.oneOf( + rs -> { + boolean b = rs.nextBoolean(); + return new StatelessChecksumCommand<>(out -> out.writeBoolean(b), DataInputPlus::readBoolean, () -> b); + }, + rs -> { + short s = (short) rs.nextInt(Short.MIN_VALUE, Short.MAX_VALUE); + return new StatelessChecksumCommand<>(out -> out.writeShort(s), DataInputPlus::readShort, () -> s); + }, + rs -> { + char c = (char) rs.nextInt(Character.MIN_VALUE, Character.MAX_VALUE); + return new StatelessChecksumCommand<>(out -> out.writeChar(c), DataInputPlus::readChar, () -> c); + }, + rs -> { + int value = rs.nextInt(); + return new StatelessChecksumCommand<>(out -> out.writeInt(value), DataInputPlus::readInt, () -> value); + }, + rs -> { + float value = rs.nextFloat(); + return new StatelessChecksumCommand<>(out -> out.writeFloat(value), DataInputPlus::readFloat, () -> value); + }, + rs -> { + double value = rs.nextDouble(); + return new StatelessChecksumCommand<>(out -> out.writeDouble(value), DataInputPlus::readDouble, () -> value); + } + ); + } + }); + } + + @Test + public void withState() + { + DataOutputBuffer out = new DataOutputBuffer(); + ChecksumedDataOutputPlus checksummedOut = new ChecksumedDataOutputPlus(out, CHECKSUM_SUPPLIER); + checksummedOut.resetChecksum(); + stateful().check(new Commands>>() { + @Override + public Gen genInitialState() + { + return ignore -> { + out.clear(); + checksummedOut.resetChecksum(); + return checksummedOut; + }; + } + + @Override + public List> createSut(ChecksumedDataOutputPlus checksumedDataOutputPlus) + { + return new ArrayList<>(1000); + } + + @Override + public Gen>, ?>> commands(ChecksumedDataOutputPlus checksumedDataOutputPlus) + { + return Gens.oneOf( + rs -> { + boolean b = rs.nextBoolean(); + return new StatefulChecksumCommand<>(out -> out.writeBoolean(b), DataInputPlus::readBoolean, () -> b); + }, + rs -> { + short s = (short) rs.nextInt(Short.MIN_VALUE, Short.MAX_VALUE); + return new StatefulChecksumCommand<>(out -> out.writeShort(s), DataInputPlus::readShort, () -> s); + }, + rs -> { + char c = (char) rs.nextInt(Character.MIN_VALUE, Character.MAX_VALUE); + return new StatefulChecksumCommand<>(out -> out.writeChar(c), DataInputPlus::readChar, () -> c); + }, + rs -> { + int value = rs.nextInt(); + return new StatefulChecksumCommand<>(out -> out.writeInt(value), DataInputPlus::readInt, () -> value); + }, + rs -> { + float value = rs.nextFloat(); + return new StatefulChecksumCommand<>(out -> out.writeFloat(value), DataInputPlus::readFloat, () -> value); + }, + rs -> { + double value = rs.nextDouble(); + return new StatefulChecksumCommand<>(out -> out.writeDouble(value), DataInputPlus::readDouble, () -> value); + } + ); + } + + @Override + public void destroySut(List> sut) throws Throwable + { + ChecksumedDataInputPlus in = new ChecksumedDataInputPlus(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), CHECKSUM_SUPPLIER); + for (StatefulChecksumCommand cmd : sut) + { + Assertions.assertThat(cmd.read.apply(in)).isEqualTo(cmd.expected.get()); + Assertions.assertThat(in.checksum().getValue()).isEqualTo(cmd.checksum); + } + } + }); + } + + public interface FailingFunction + { + O apply(I input) throws Throwable; + } + + private static class StatefulChecksumCommand implements UnitCommand>> + { + private final FailingConsumer update; + private final FailingFunction read; + private final Supplier expected; + private Long checksum = null; + + private StatefulChecksumCommand(FailingConsumer update, FailingFunction read, Supplier expected) + { + this.update = update; + this.read = read; + this.expected = expected; + } + + @Override + public void applyUnit(ChecksumedDataOutputPlus out) throws Throwable + { + update.doAccept(out); + checksum = out.checksum().getValue(); + } + + @Override + public void runUnit(List> sut) + { + sut.add(this); + } + } + + private static class StatelessChecksumCommand implements Command + { + private final FailingConsumer update; + private final FailingFunction read; + private final Supplier expected; + + private StatelessChecksumCommand(FailingConsumer update, + FailingFunction read, + Supplier expected) + { + this.update = update; + this.read = read; + this.expected = expected; + } + + @Override + public Long apply(DataOutputBuffer out) throws Throwable + { + out.clear(); + ChecksumedDataOutputPlus c = new ChecksumedDataOutputPlus(out, CHECKSUM_SUPPLIER); + update.doAccept(c); + return c.checksum().getValue(); + } + + @Override + public Long run(DataOutputBuffer out) throws Throwable + { + out.clear(); + update.doAccept(out); + ChecksumedDataInputPlus i = new ChecksumedDataInputPlus(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), CHECKSUM_SUPPLIER); + Assertions.assertThat(read.apply(i)).isEqualTo(expected.get()); + return i.checksum().getValue(); + } + + @Override + public void checkPostconditions(DataOutputBuffer dataOutputBuffer, Long expected, + DataOutputBuffer sut, Long actual) + { + Assertions.assertThat(actual).isEqualTo(expected); + } + + @Override + public String detailed(DataOutputBuffer dataOutputBuffer) + { + return expected.get().getClass().getSimpleName(); + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index b9c7fe6b598b..4a4c012d6a06 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -99,7 +99,6 @@ import org.apache.cassandra.locator.LocalStrategy; import org.apache.cassandra.locator.Locator; import org.apache.cassandra.locator.RangesAtEndpoint; -import org.apache.cassandra.locator.Replica; import org.apache.cassandra.net.ConnectionType; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -117,7 +116,6 @@ import org.apache.cassandra.repair.state.ValidationState; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableId; @@ -125,7 +123,6 @@ import org.apache.cassandra.schema.Tables; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordConfigurationService; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupComplete; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanupHistory; @@ -141,21 +138,12 @@ import org.apache.cassandra.streaming.StreamingChannel; import org.apache.cassandra.streaming.StreamingDataInputPlus; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.tcm.listeners.ChangeListener; -import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.Location; -import org.apache.cassandra.tcm.membership.NodeAddresses; -import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.ownership.DataPlacement; -import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tools.nodetool.Repair; import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Closeable; -import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.FailingBiConsumer; import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.MBeanWrapper; @@ -587,29 +575,11 @@ private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinat { RepairType type = repairTypeGen.next(rs); PreviewType previewType = previewTypeGen.next(rs); - // TODO (required - IR) add this back and expand as part of IR integration -// boolean accordRepair = type == RepairType.FULL && previewType == PreviewType.NONE ? rs.nextBoolean() : false; - boolean accordRepair = false; List args = new ArrayList<>(); args.add(ks); List tables = tablesGen.next(rs); args.addAll(tables); - if (accordRepair) - { - List> ranges = new ArrayList<>(StorageService.instance.getReplicas(ks, coordinator.broadcastAddressAndPort()).ranges()); - ranges.sort(Comparator.naturalOrder()); - Range range = ranges.get(rs.nextInt(0, ranges.size())); - args.add("--start-token"); - args.add(range.left.toString()); - args.add("--end-token"); - Murmur3Partitioner.LongToken left = (Murmur3Partitioner.LongToken) range.left; - Token right = rs.nextBoolean() ? new Murmur3Partitioner.LongToken(left.token + 100) : range.right; - args.add(right.toString()); - } - else - { - args.add("-pr"); - } + args.add("-pr"); switch (type) { case IR: @@ -651,8 +621,6 @@ private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinat } if (rs.nextBoolean()) args.add("--optimise-streams"); RepairOption options = RepairOption.parse(Repair.parseOptionMap(() -> "test", args), DatabaseDescriptor.getPartitioner()); - if (accordRepair) - options = options.withAccordRepair(true); if (options.getRanges().isEmpty()) { if (options.isPrimaryRange()) @@ -811,69 +779,11 @@ static class Cluster } List addresses = new ArrayList<>(nodes.keySet()); addresses.sort(Comparator.naturalOrder()); - NodeId tcmid = ClusterMetadata.current().directory.peerId(addresses.get(rs.nextInt(0, addresses.size()))); - ServerTestUtils.recreateAccord(tcmid); - interceptTCMNotifications(tcmid); + AccordService.unsafeSetNoop(); setupSchema(); } - private void interceptTCMNotifications(NodeId tcmid) - { - AccordService as = (AccordService) AccordService.instance(); - AccordConfigurationService config = as.configurationService(); - ClusterMetadataService.instance().log().removeListener(config); - ClusterMetadataService.instance().log().addListener(new ChangeListener() - { - @Override - public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) - { - config.notifyPostCommit(sanitize(prev, tcmid), sanitize(next, tcmid), fromSnapshot); - } - }); - } - - private ClusterMetadata sanitize(ClusterMetadata metadata, NodeId tcmid) - { - if (metadata.directory.isEmpty()) - return metadata; - ClusterMetadata sanitized = metadata.withDirectory(sanitize(metadata.directory, tcmid)) - .withPlacements(sanitize(metadata.placements, FBUtilities.getBroadcastAddressAndPort())); - return sanitized; - } - - private Directory sanitize(Directory directory, NodeId tcmid) - { - if (directory.getNodeAddresses(tcmid) == null) - throw new AssertionError("Expected node " + tcmid + " but not found in " + directory); - for (NodeId peer : directory.peerIds()) - { - if (peer.equals(tcmid)) - continue; - directory = directory.without(peer); - } - directory = directory.withNodeAddresses(tcmid, NodeAddresses.current()); - return directory; - } - - private DataPlacements sanitize(DataPlacements placements, InetAddressAndPort endpoint) - { - DataPlacements.Builder builder = DataPlacements.builder(placements.size()); - for (Map.Entry e : placements) - builder.with(e.getKey(), sanitize(placements.lastModified(), e.getValue(), endpoint)); - return builder.build(); - } - - private DataPlacement sanitize(Epoch epoch, DataPlacement value, InetAddressAndPort endpoint) - { - DataPlacement.Builder builder = DataPlacement.builder(); - for (Range e : value.writes.ranges()) - builder.withWriteReplica(epoch, new Replica(endpoint, e, true)); - for (Range e : value.reads.ranges()) - builder.withReadReplica(epoch, new Replica(endpoint, e, true)); - return builder.build(); - } - public Closeable addListener(MessageListener listener) { listeners.add(listener); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index fdaf51987494..57c7401cb673 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -148,7 +148,7 @@ public void commandLoadSave() throws Throwable Apply.SerializationSupport.create(txnId, route.slice(Ranges.of(TokenRange.fullRange(tableId))), 1L, - Apply.Kind.Minimal, + Apply.Kind.Maximal, depTxn.keys(), executeAt, dependencies, diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 198697160265..55d1e7e1177a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -18,8 +18,16 @@ package org.apache.cassandra.service.accord; +import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import org.junit.Test; @@ -43,18 +51,47 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import accord.utils.async.Observable; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.MemtableParams; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.CassandraGenerators; import org.assertj.core.api.Assertions; +import org.mockito.Mockito; +import org.mockito.stubbing.Answer; +import static accord.utils.Property.qt; +import static org.apache.cassandra.config.DatabaseDescriptor.setSelectedSSTableFormat; +import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS; +import static org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper.setMemtable; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; -import static org.apache.cassandra.service.accord.AccordTestUtils.wrapInTxn; +import static org.apache.cassandra.utils.AbstractTypeGenerators.getTypeSupport; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; public class AccordKeyspaceTest extends CQLTester.InMemory { + static + { + // since this test does frequent truncates, the info table gets updated and forced flushed... which is 90% of the cost of this test... + // this flag disables that flush + CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); + } + @Test public void serde() { @@ -88,7 +125,7 @@ public void serde() AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); - Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.StableFastPath, Ballot.ZERO, id, partialTxn.keys(), partialTxn, partialDeps, route, null); + Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.CommitSlowPath, Ballot.ZERO, id, partialTxn.keys(), partialTxn, partialDeps, route, null); store.appendToJournal(commit); Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); @@ -97,4 +134,157 @@ public void serde() Command loaded = AccordKeyspace.loadCommand(store, id); Assertions.assertThat(loaded).isEqualTo(committed); } + + @Test + public void findOverlappingKeys() + { + var tableIdGen = fromQT(CassandraGenerators.TABLE_ID_GEN); + var partitionGen = fromQT(CassandraGenerators.partitioners()); + + var sstableFormats = DatabaseDescriptor.getSSTableFormats(); + List sstableFormatNames = new ArrayList<>(sstableFormats.keySet()); + sstableFormatNames.sort(Comparator.naturalOrder()); + + List memtableFormats = MemtableParams.knownDefinitions().stream() + .filter(name -> !name.startsWith("test_") && !name.equals("default")) + .sorted() + .collect(Collectors.toList()); + + qt().check(rs -> { + AccordKeyspace.unsafeClear(); + // control SSTable format + setSelectedSSTableFormat(sstableFormats.get(rs.pick(sstableFormatNames))); + // control memtable format + setMemtable(ACCORD_KEYSPACE_NAME, "commands_for_key", rs.pick(memtableFormats)); + + // define the tables w/ partitioners for the test + // this uses the ability to override the SchemaProvider for the keyspace and only defines the single API call expected: getTablePartitioner + TreeMap tables = new TreeMap<>(); + int numTables = rs.nextInt(1, 3); + for (int i = 0; i < numTables; i++) + { + var tableId = tableIdGen.next(rs); + while (tables.containsKey(tableId)) + tableId = tableIdGen.next(rs); + tables.put(tableId, partitionGen.next(rs)); + } + SchemaProvider schema = Mockito.mock(SchemaProvider.class); + Mockito.when(schema.getTablePartitioner(Mockito.any())).thenAnswer((Answer) invocationOnMock -> tables.get(invocationOnMock.getArgument(0))); + AccordKeyspace.unsafeSetSchema(schema); + + int numStores = rs.nextInt(1, 3); + + // The model of the DB + TreeMap> storesToKeys = new TreeMap<>(); + // write to the table and the model + for (int i = 0, numKeys = rs.nextInt(10, 20); i < numKeys; i++) + { + int store = rs.nextInt(0, numStores); + var keys = storesToKeys.computeIfAbsent(store, ignore -> new TreeSet<>()); + PartitionKey pk = null; + // LocalPartitioner may have a type with a very small domain (boolean, vector, etc.), so need to bound the attempts + // else this will loop forever... + for (int attempt = 0; attempt < 10; attempt++) + { + TableId tableId = rs.pick(tables.keySet()); + IPartitioner partitioner = tables.get(tableId); + ByteBuffer data = !(partitioner instanceof LocalPartitioner) ? Int32Type.instance.decompose(rs.nextInt()) + : fromQT(getTypeSupport(partitioner.getTokenValidator()).bytesGen()).next(rs); + PartitionKey key = new PartitionKey(tableId, tables.get(tableId).decorateKey(data)); + if (keys.add(key)) + { + pk = key; + break; + } + } + if (pk != null) + { + try + { + // using Mutation directly (what we do in Accord) can break when user data is too large; leading to data loss + // The memtable will allow the write, but it will be dropped when writing to the SSTable... + //TODO (now, correctness): since we store the user token + user key, if a key is close to the PK limits then we could tip over and loose our CFK +// new Mutation(AccordKeyspace.getCommandsForKeyPartitionUpdate(store, pk, 42, ByteBufferUtil.EMPTY_BYTE_BUFFER)).apply(); + execute("INSERT INTO system_accord.commands_for_key (store_id, key_token, key) VALUES (?, ?, ?)", + store, AccordKeyspace.serializeRoutingKey(pk.toUnseekable()), AccordKeyspace.serializeKey(pk)); + } + catch (IllegalArgumentException | InvalidRequestException e) + { + // Sometimes the types are too large (LocalPartitioner) so the mutation gets rejected... just ignore those cases + // Length 69912 > max length 65535 + String msg = e.getMessage(); + if (msg != null) + { + if ((msg.startsWith("Length ") && msg.endsWith("> max length 65535")) // Clustering was rejected + || (msg.startsWith("Key length of ") && msg.endsWith(" is longer than maximum of 65535"))) // Partition was rejected + { + // failed to add + keys.remove(pk); + continue; + } + } + throw e; + } + } + } + + // read from the table and validate it matches the model + for (int read = 0; read < 2; read++) // read=0 is memtable, read=1 is sstable + { + { + // Make sure no data was lost + // An issue was found that system mutations bypass checks so make their way to the Memtable, but when we flush to SSTable + // they get filtered out, causing data loss... This check is here to make sure that the data is present (test covers Memtable + SStable) + // in the storage before checking if the filtering logic is correct + TreeMap> expectedCqlStoresToKeys = new TreeMap<>(); + for (var e : storesToKeys.entrySet()) + { + int store = e.getKey(); + expectedCqlStoresToKeys.put(store, new TreeSet<>(e.getValue().stream().map(p -> AccordKeyspace.serializeRoutingKey(p.toUnseekable())).collect(Collectors.toList()))); + } + + // make sure no data loss... when this test was written sstable had all the rows but the sstable didn't... this + // is mostly a santity check to detect that case early + var resultSet = execute("SELECT store_id, key_token FROM system_accord.commands_for_key ALLOW FILTERING"); + TreeMap> cqlStoresToKeys = new TreeMap<>(); + for (var row : resultSet) + { + int storeId = row.getInt("store_id"); + ByteBuffer bb = row.getBytes("key_token"); + cqlStoresToKeys.computeIfAbsent(storeId, ignore -> new TreeSet<>()).add(bb); + } + Assertions.assertThat(cqlStoresToKeys).isEqualTo(expectedCqlStoresToKeys); + } + + for (int i = 0, queries = rs.nextInt(1, 5); i < queries; i++) + { + int store = rs.pick(storesToKeys.keySet()); + var keysForStore = new ArrayList<>(storesToKeys.get(store)); + + int offset; + int offsetEnd; + if (keysForStore.size() == 1) + { + offset = 0; + offsetEnd = 1; + } + else + { + offset = rs.nextInt(0, keysForStore.size()); + offsetEnd = rs.nextInt(offset, keysForStore.size()) + 1; + } + List expected = keysForStore.subList(offset, offsetEnd); + PartitionKey start = expected.get(0); + PartitionKey end = expected.get(expected.size() - 1); + + AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(store, start.toUnseekable(), true, end.toUnseekable(), true, callback)); + List actual = AsyncChains.getUnchecked(map); + Assertions.assertThat(actual).isEqualTo(expected); + } + + if (read == 0) + Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStore("commands_for_key").forceBlockingFlush(UNIT_TESTS); + } + }); + } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java index 1dd5bf4b16d5..901ed572cd31 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java @@ -38,7 +38,7 @@ public class AccordReadRepairTest extends AccordTestBase { - private static final Logger logger = LoggerFactory.getLogger(org.apache.cassandra.distributed.test.accord.AccordCQLTest.class); + private static final Logger logger = LoggerFactory.getLogger(AccordReadRepairTest.class); @Override protected Logger logger() diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 0ae2a6abc845..6d37f962ac52 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -99,6 +99,7 @@ public class AccordTestUtils { + private static final AccordAgent AGENT = new AccordAgent(); public static final TableId TABLE_ID1 = TableId.fromString("00000000-0000-0000-0000-000000000001"); public static class Commands @@ -318,6 +319,11 @@ public static Txn createWriteTxn(int key) return createTxn(key, key); } + public static Txn createTxn(Txn.Kind kind, Seekables seekables) + { + return AGENT.emptyTxn(kind, seekables); + } + public static Ranges fullRange(Txn txn) { return fullRange(txn.keys()); diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java deleted file mode 100644 index 982ad8ca8fa1..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.junit.BeforeClass; -import org.junit.Test; - -import accord.local.SaveStatus; -import accord.primitives.Ranges; -import accord.primitives.RoutableKey; -import accord.primitives.TxnId; -import accord.utils.AccordGens; -import accord.utils.Gen; -import accord.utils.Gens; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.dht.RandomPartitioner; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.utils.AccordGenerators; -import org.apache.cassandra.utils.Interval; -import org.apache.cassandra.utils.IntervalTree; - -import static accord.utils.Property.qt; -import static org.apache.cassandra.simulator.RandomSource.Choices.choose; -import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; -import static org.assertj.core.api.Assertions.assertThat; - -public class CommandsForRangesTest -{ - private static Ranges FULL_RANGE = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(TABLE_ID1), AccordRoutingKey.SentinelKey.max(TABLE_ID1))); - - @BeforeClass - public static void setup() throws NoSuchFieldException, IllegalAccessException - { - DatabaseDescriptor.clientInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - } - - @Test - public void prune() - { - qt().forAll(cfr()).check(cfr -> { - // public void prune(TxnId pruneBefore, Ranges pruneRanges) - // private Timestamp maxRedundant; - List knownIds = new ArrayList<>(cfr.knownIds()); - knownIds.sort(Comparator.naturalOrder()); - - assertThat(cfr.maxRedundant()).isNull(); - - TxnId min = knownIds.get(0); - TxnId max = knownIds.get(knownIds.size() - 1); - - // should do nothing - IntervalTree> tree = cfr.tree(); - cfr.prune(min, FULL_RANGE); - assertThat(cfr.maxRedundant()).isNull(); - assertThat(cfr.tree()).isEqualTo(tree); - - cfr.prune(max, FULL_RANGE); - assertThat(cfr.knownIds()).containsExactly(max); - assertThat(cfr.maxRedundant()).isEqualTo(knownIds.size() == 1 ? null : knownIds.get(knownIds.size() - 2)); - - cfr.prune(new TxnId(max.logicalNext(max.node), max.kind(), max.domain()), FULL_RANGE); - assertThat(cfr.knownIds()).isEmpty(); - assertThat(cfr.maxRedundant()).isEqualTo(max); - }); - } - - private static Gen cfr() - { - // TODO (coverage): once all partitioners work with regard to splitting, then should test all - Gen partitionerGen = rs -> choose(rs, Murmur3Partitioner.instance, RandomPartitioner.instance); - Gen statusGen = Gens.enums().all(SaveStatus.class); - return rs -> { - IPartitioner partitioner = partitionerGen.next(rs); - // some code reaches to the DD for partitioner... - DatabaseDescriptor.setPartitionerUnsafe(partitioner); - Gen rangesGen = AccordGenerators.ranges(ignore -> Collections.singleton(TABLE_ID1), ignore -> partitioner); - CommandsForRanges.Builder builder = new CommandsForRanges.Builder(); - int numTxn = rs.nextInt(1, 10); - Set uniq = new HashSet<>(); - for (int i = 0; i < numTxn; i++) - { - TxnId id; - while (!uniq.add(id = AccordGens.txnIds().next(rs))) {} - builder.put(id, rangesGen.next(rs), statusGen.next(rs), id, Collections.emptyList()); - } - return builder.build(); - }; - } -} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java new file mode 100644 index 000000000000..575b996e1e2b --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.Sets; + +import accord.local.SerializerSupport; +import accord.messages.Accept; +import accord.messages.Apply; +import accord.messages.BeginRecovery; +import accord.messages.Commit; +import accord.messages.Message; +import accord.messages.MessageType; +import accord.messages.PreAccept; +import accord.messages.Propagate; +import accord.primitives.Ballot; +import accord.primitives.TxnId; +import org.agrona.collections.ObjectHashSet; + +import static accord.messages.MessageType.ACCEPT_REQ; +import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; +import static accord.messages.MessageType.APPLY_MINIMAL_REQ; +import static accord.messages.MessageType.BEGIN_RECOVER_REQ; +import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; +import static accord.messages.MessageType.COMMIT_SLOW_PATH_REQ; +import static accord.messages.MessageType.PRE_ACCEPT_REQ; +import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; +import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; +import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; +import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; +import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; + +public class MockJournal implements IJournal +{ + private final Map writes = new HashMap<>(); + @Override + public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) + { + return new SerializerSupport.MessageProvider() + { + @Override + public Set test(Set messages) + { + Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); + for (MessageType message : messages) + for (AccordJournal.Type synonymousType : AccordJournal.Type.synonymousTypesFromMessageType(message)) + keys.add(new AccordJournal.Key(txnId, synonymousType)); + Set presentKeys = Sets.intersection(writes.keySet(), keys); + Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); + for (AccordJournal.Key key : presentKeys) + presentMessages.add(key.type.outgoingType); + return presentMessages; + } + + private T get(AccordJournal.Key key) + { + return (T) writes.get(key); + } + + private T get(MessageType messageType) + { + for (AccordJournal.Type type : AccordJournal.Type.synonymousTypesFromMessageType(messageType)) + { + T value = get(new AccordJournal.Key(txnId, type)); + if (value != null) return value; + } + return null; + } + + @Override + public PreAccept preAccept() + { + return get(PRE_ACCEPT_REQ); + } + + @Override + public BeginRecovery beginRecover() + { + return get(BEGIN_RECOVER_REQ); + } + + @Override + public Propagate propagatePreAccept() + { + return get(PROPAGATE_PRE_ACCEPT_MSG); + } + + @Override + public Accept accept(Ballot ballot) + { + return get(ACCEPT_REQ); + } + + @Override + public Commit commitSlowPath() + { + return get(COMMIT_SLOW_PATH_REQ); + } + + @Override + public Commit commitMaximal() + { + return get(COMMIT_MAXIMAL_REQ); + } + + @Override + public Commit stableFastPath() + { + return get(STABLE_FAST_PATH_REQ); + } + + @Override + public Commit stableMaximal() + { + return get(STABLE_MAXIMAL_REQ); + } + + @Override + public Propagate propagateStable() + { + return get(PROPAGATE_STABLE_MSG); + } + + @Override + public Apply applyMinimal() + { + return get(APPLY_MINIMAL_REQ); + } + + @Override + public Apply applyMaximal() + { + return get(APPLY_MAXIMAL_REQ); + } + + @Override + public Propagate propagateApply() + { + return get(PROPAGATE_APPLY_MSG); + } + }; + } + + @Override + public void appendMessageBlocking(Message message) + { + AccordJournal.Type type = AccordJournal.Type.fromMessageType(message.type()); + AccordJournal.Key key = new AccordJournal.Key(type.txnId(message), type); + writes.put(key, message); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java new file mode 100644 index 000000000000..3d54d3af9d80 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import org.junit.Test; + +import accord.local.PreLoadContext; +import accord.local.SaveStatus; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class SimpleSimulatedAccordCommandStoreTest extends SimulatedAccordCommandStoreTestBase +{ + @Test + public void emptyTxns() + { + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + for (int i = 0, examples = 100; i < examples; i++) + { + TxnId id = AccordGens.txnIds().next(rs); + instance.process(PreLoadContext.contextFor(id), (safe) -> { + var safeCommand = safe.get(id, id, Ranges.EMPTY); + var command = safeCommand.current(); + Assertions.assertThat(command.saveStatus()).isEqualTo(SaveStatus.Uninitialised); + return null; + }); + } + } + + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java new file mode 100644 index 000000000000..6cb71b68db04 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.BooleanSupplier; +import java.util.function.Function; +import java.util.function.ToLongFunction; + +import accord.impl.SizeOfIntersectionSorter; +import accord.impl.TestAgent; +import accord.local.Command; +import accord.local.CommandStore; +import accord.local.CommandStores; +import accord.local.Node; +import accord.local.NodeTimeService; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.messages.BeginRecovery; +import accord.messages.Message; +import accord.messages.PreAccept; +import accord.messages.TxnRequest; +import accord.primitives.Ballot; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.RoutableKey; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.topology.Topology; +import accord.utils.Gens; +import accord.utils.RandomSource; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.concurrent.SimulatedExecutorFactory; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.compaction.CompactionManager; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.metrics.AccordStateCacheMetrics; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Generators; +import org.apache.cassandra.utils.Pair; +import org.assertj.core.api.Assertions; + +import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS; +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; + +class SimulatedAccordCommandStore implements AutoCloseable +{ + private final List failures = new ArrayList<>(); + private final SimulatedExecutorFactory globalExecutor; + private final CommandStore.EpochUpdateHolder updateHolder; + private final BooleanSupplier shouldEvict, shouldFlush, shouldCompact; + + public final NodeTimeService timeService; + public final AccordCommandStore store; + public final Node.Id nodeId; + public final Topology topology; + public final MockJournal journal; + public final ScheduledExecutorPlus unorderedScheduled; + public final List evictions = new ArrayList<>(); + + SimulatedAccordCommandStore(RandomSource rs) + { + globalExecutor = new SimulatedExecutorFactory(accord.utilsfork.RandomSource.wrap(rs).fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); + this.unorderedScheduled = globalExecutor.scheduled("ignored"); + ExecutorFactory.Global.unsafeSet(globalExecutor); + Stage.READ.unsafeSetExecutor(unorderedScheduled); + Stage.MUTATION.unsafeSetExecutor(unorderedScheduled); + for (Stage stage : Arrays.asList(Stage.MISC, Stage.ACCORD_MIGRATION)) + stage.unsafeSetExecutor(globalExecutor.configureSequential("ignore").build()); + + this.updateHolder = new CommandStore.EpochUpdateHolder(); + this.nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.currentNullable().myNodeId()); + this.timeService = new NodeTimeService() + { + private final ToLongFunction unixWrapper = NodeTimeService.unixWrapper(TimeUnit.NANOSECONDS, this::now); + + @Override + public Node.Id id() + { + return nodeId; + } + + @Override + public long epoch() + { + return ClusterMetadata.current().epoch.getEpoch(); + } + + @Override + public long now() + { + return globalExecutor.nanoTime(); + } + + @Override + public long unix(TimeUnit unit) + { + return unixWrapper.applyAsLong(unit); + } + + @Override + public Timestamp uniqueNow(Timestamp atLeast) + { + var now = Timestamp.fromValues(epoch(), now(), nodeId); + if (now.compareTo(atLeast) < 0) + throw new UnsupportedOperationException(); + return now; + } + }; + + this.journal = new MockJournal(); + this.store = new AccordCommandStore(0, + timeService, + new TestAgent.RethrowAgent() + { + @Override + public boolean isExpired(TxnId initiated, long now) + { + return false; + } + }, + null, + ignore -> AccordTestUtils.NOOP_PROGRESS_LOG, + updateHolder, + journal, + new AccordStateCacheMetrics("test")); + + store.cache().instances().forEach(i -> { + i.register(new AccordStateCache.Listener() + { + @Override + public void onAdd(AccordCachingState state) + { + } + + @Override + public void onRelease(AccordCachingState state) + { + } + + @Override + public void onEvict(AccordCachingState state) + { + evictions.add(i + " evicted " + state); + } + }); + }); + + this.topology = AccordTopology.createAccordTopology(ClusterMetadata.current()); + var rangesForEpoch = new CommandStores.RangesForEpoch(topology.epoch(), topology.ranges(), store); + updateHolder.add(topology.epoch(), rangesForEpoch, topology.ranges()); + updateHolder.updateGlobal(topology.ranges()); + + shouldEvict = boolSource(rs.fork()); + shouldFlush = boolSource(rs.fork()); + shouldCompact = boolSource(rs.fork()); + } + + private static BooleanSupplier boolSource(RandomSource rs) + { + var gen = Gens.bools().mixedDistribution().next(rs); + return () -> gen.next(rs); + } + + public TxnId nextTxnId(Txn.Kind kind, Routable.Domain domain) + { + return new TxnId(timeService.epoch(), timeService.now(), kind, domain, nodeId); + } + + public void maybeCacheEvict(Keys keys, Ranges ranges) + { + AccordStateCache cache = store.cache(); + cache.forEach(state -> { + Class keyType = state.key().getClass(); + if (TxnId.class.equals(keyType)) + { + Command command = (Command) state.state().get(); + if (command.known().definition.isKnown() + && (command.partialTxn().keys().intersects(keys) || ranges.intersects(command.partialTxn().keys())) + && shouldEvict.getAsBoolean()) + cache.maybeEvict(state); + } + else if (RoutableKey.class.isAssignableFrom(keyType)) + { + RoutableKey key = (RoutableKey) state.key(); + if ((keys.contains(key) || ranges.intersects(key)) + && shouldEvict.getAsBoolean()) + cache.maybeEvict(state); + } + else + { + throw new AssertionError("Unexpected key type: " + state.key().getClass()); + } + }); + + for (var store : Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) + { + Memtable memtable = store.getCurrentMemtable(); + if (memtable.partitionCount() == 0 || !intersects(store, memtable, keys, ranges)) + continue; + if (shouldFlush.getAsBoolean()) + store.forceBlockingFlush(UNIT_TESTS); + } + for (var store : Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) + { + if (store.getLiveSSTables().size() > 5 && shouldCompact.getAsBoolean()) + { + // compaction no-op since auto-compaction is disabled... so need to enable quickly + store.enableAutoCompaction(); + try + { + FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(store)); + } + finally + { + store.disableAutoCompaction(); + } + } + } + } + + private static boolean intersects(ColumnFamilyStore store, Memtable memtable, Keys keys, Ranges ranges) + { + if (keys.isEmpty() && ranges.isEmpty()) // shouldn't happen, but just in case... + return false; + switch (store.name) + { + case "commands_for_key": + // pk = (store_id, key_token, key) + // since this is simulating a single store, store_id is a constant, so check key + try (var it = memtable.partitionIterator(ColumnFilter.NONE, DataRange.allData(store.getPartitioner()), null)) + { + while (it.hasNext()) + { + var key = AccordKeyspace.CommandsForKeysAccessor.getKey(it.next().partitionKey()); + if (keys.contains(key) || ranges.intersects(key)) + return true; + } + } + break; + } + return false; + } + + public void checkFailures() + { + if (Thread.interrupted()) + failures.add(new InterruptedException()); + if (failures.isEmpty()) return; + AssertionError error = new AssertionError("Unexpected exceptions found"); + failures.forEach(error::addSuppressed); + failures.clear(); + throw error; + } + + public T process(TxnRequest request) throws ExecutionException, InterruptedException + { + return process(request, request::apply); + } + + public T process(PreLoadContext loadCtx, Function function) throws ExecutionException, InterruptedException + { + var result = processAsync(loadCtx, function); + processAll(); + return AsyncChains.getBlocking(result); + } + + public AsyncResult processAsync(TxnRequest request) + { + return processAsync(request, request::apply); + } + + public AsyncResult processAsync(PreLoadContext loadCtx, Function function) + { + if (loadCtx instanceof Message) + journal.appendMessageBlocking((Message) loadCtx); + return store.submit(loadCtx, function).beginAsResult(); + } + + public Pair> enqueuePreAccept(Txn txn, FullRoute route) + { + TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); + PreAccept preAccept = new PreAccept(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology), txnId, txn, route); + return Pair.create(txnId, processAsync(preAccept, safe -> { + var reply = preAccept.apply(safe); + Assertions.assertThat(reply.isOk()).isTrue(); + return (PreAccept.PreAcceptOk) reply; + })); + } + + public Pair> enqueueBeginRecovery(Txn txn, FullRoute route) + { + TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); + Ballot ballot = Ballot.fromValues(timeService.epoch(), timeService.now(), nodeId); + BeginRecovery br = new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology), txnId, txn, route, ballot); + + return Pair.create(txnId, processAsync(br, safe -> { + var reply = br.apply(safe); + Assertions.assertThat(reply.isOk()).isTrue(); + return (BeginRecovery.RecoverOk) reply; + }).beginAsResult()); + } + + public void processAll() + { + while (processOne()) + { + } + } + + private boolean processOne() + { + boolean result = globalExecutor.processOne(); + checkFailures(); + return result; + } + + @Override + public void close() throws Exception + { + store.shutdown(); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java new file mode 100644 index 000000000000..5aed34bc7928 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import com.google.common.collect.Maps; +import org.junit.Before; +import org.junit.BeforeClass; + +import accord.api.Key; +import accord.impl.SizeOfIntersectionSorter; +import accord.local.Node; +import accord.messages.BeginRecovery; +import accord.messages.PreAccept; +import accord.primitives.Ballot; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.LatestDeps; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.topology.Topologies; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.Pair; +import org.assertj.core.api.Assertions; + +import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; + +public abstract class SimulatedAccordCommandStoreTestBase extends CQLTester +{ + static + { + CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.setBoolean(false); + // since this test does frequent truncates, the info table gets updated and forced flushed... which is 90% of the cost of this test... + // this flag disables that flush + CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); + // The plan is to migrate away from SAI, so rather than hacking around timeout issues; just disable for now + CassandraRelevantProperties.SAI_TEST_DISABLE_TIMEOUT.setBoolean(true); + } + + protected enum DepsMessage + {PreAccept, BeginRecovery, PreAcceptThenBeginRecovery} + + protected static TableMetadata intTbl, reverseTokenTbl; + protected static Node.Id nodeId; + + @BeforeClass + public static void setUpClass() + { + CQLTester.setUpClass(); + DatabaseDescriptor.setIncrementalBackupsEnabled(false); + } + + @Before + public void init() + { + if (intTbl != null) + return; + createKeyspace("CREATE KEYSPACE test WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 2 }"); + createTable("test", "CREATE TABLE test.tbl1 (pk int PRIMARY KEY, value int) WITH transactional_mode='full'"); + intTbl = Schema.instance.getTableMetadata("test", "tbl1"); + + createTable("test", "CREATE TABLE test.tbl2 (pk vector PRIMARY KEY, value int) WITH transactional_mode='full'"); + reverseTokenTbl = Schema.instance.getTableMetadata("test", "tbl2"); + + nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.current().myNodeId()); + + // tests may flush, which triggers compaction... since compaction is not simulated this adds a form of non-deterministic behavior + for (var store : Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) + store.disableAutoCompaction(); + + AccordService.unsafeSetNoop(); + + ServerTestUtils.markCMS(); + } + + protected static void safeBlock(List> asyncs) throws InterruptedException, ExecutionException + { + int counter = 0; + for (var chain : asyncs) + { + Assertions.assertThat(chain.isDone()) + .describedAs("The %dth async task is blocked!", counter++) + .isTrue(); + AsyncChains.getBlocking(chain); + } + } + + protected static void safeBlock(List> asyncs, List details) throws InterruptedException, ExecutionException + { + int counter = 0; + for (var chain : asyncs) + { + Assertions.assertThat(chain.isDone()) + .describedAs("The %dth async task %s is blocked!", counter, details.get(counter++)) + .isTrue(); + AsyncChains.getBlocking(chain); + } + } + + protected static TokenRange fullRange(TableId id) + { + return new TokenRange(AccordRoutingKey.SentinelKey.min(id), AccordRoutingKey.SentinelKey.max(id)); + } + + protected static TokenRange tokenRange(TableId id, long start, long end) + { + return new TokenRange(start == Long.MIN_VALUE ? AccordRoutingKey.SentinelKey.min(id) : tokenKey(id, start), tokenKey(id, end)); + } + + protected static AccordRoutingKey.TokenKey tokenKey(TableId id, long token) + { + return new AccordRoutingKey.TokenKey(id, new Murmur3Partitioner.LongToken(token)); + } + + protected static Map> keyConflicts(List list, Keys keys) + { + Map> kc = Maps.newHashMapWithExpectedSize(keys.size()); + for (Key key : keys) + { + if (list.isEmpty()) + continue; + kc.put(key, list); + } + return kc; + } + + protected static Map> rangeConflicts(List list, Ranges ranges) + { + Map> kc = Maps.newHashMapWithExpectedSize(ranges.size()); + for (Range range : ranges) + { + if (list.isEmpty()) + continue; + kc.put(range, list); + } + return kc; + } + + protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, + DepsMessage messageType, + Txn txn, FullRoute route, + Map> keyConflicts) throws ExecutionException, InterruptedException + { + return assertDepsMessage(instance, messageType, txn, route, keyConflicts, Collections.emptyMap()); + } + + protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, + DepsMessage messageType, + Txn txn, FullRoute route, + Map> keyConflicts, + Map> rangeConflicts) throws ExecutionException, InterruptedException + { + var pair = assertDepsMessageAsync(instance, messageType, txn, route, keyConflicts, rangeConflicts); + instance.processAll(); + AsyncChains.getBlocking(pair.right); + + return pair.left; + } + + protected static Pair> assertDepsMessageAsync(SimulatedAccordCommandStore instance, + DepsMessage messageType, + Txn txn, FullRoute route, + Map> keyConflicts) + { + return assertDepsMessageAsync(instance, messageType, txn, route, keyConflicts, Collections.emptyMap()); + } + + protected static Pair> assertDepsMessageAsync(SimulatedAccordCommandStore instance, + DepsMessage messageType, + Txn txn, FullRoute route, + Map> keyConflicts, + Map> rangeConflicts) + { + switch (messageType) + { + case PreAccept: + return assertPreAcceptAsync(instance, txn, route, keyConflicts, rangeConflicts); + case BeginRecovery: + return assertBeginRecoveryAsync(instance, txn, route, keyConflicts, rangeConflicts); + case PreAcceptThenBeginRecovery: + return assertBeginRecoveryAfterPreAcceptAsync(instance, txn, route, keyConflicts, rangeConflicts); + default: + throw new IllegalArgumentException("Unknown message type: " + messageType); + } + } + + protected static Pair> assertPreAcceptAsync(SimulatedAccordCommandStore instance, + Txn txn, FullRoute route, + Map> keyConflicts, + Map> rangeConflicts) + { + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + var pair = instance.enqueuePreAccept(txn, route); + return Pair.create(pair.left, pair.right.map(success -> { + assertDeps(success.txnId, success.deps, cloneKeyConflicts, cloneRangeConflicts); + return null; + }).beginAsResult()); + } + + protected static Pair> assertBeginRecoveryAsync(SimulatedAccordCommandStore instance, + Txn txn, FullRoute route, + Map> keyConflicts, + Map> rangeConflicts) + { + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + var pair = instance.enqueueBeginRecovery(txn, route); + return Pair.create(pair.left, pair.right.map(success -> { + Deps proposeDeps = LatestDeps.mergeProposal(Collections.singletonList(success), ok -> ok.deps); + assertDeps(success.txnId, proposeDeps, cloneKeyConflicts, cloneRangeConflicts); + return null; + }).beginAsResult()); + } + + protected static Pair> assertBeginRecoveryAfterPreAcceptAsync(SimulatedAccordCommandStore instance, + Txn txn, FullRoute route, + Map> keyConflicts, + Map> rangeConflicts) + { + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() + .filter(e -> !e.getValue().isEmpty()) + .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); + + TxnId txnId = instance.nextTxnId(txn.kind(), txn.keys().domain()); + PreAccept preAccept = new PreAccept(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, instance.topology), txnId, txn, route); + + var preAcceptAsync = instance.processAsync(preAccept, safe -> { + var reply = preAccept.apply(safe); + Assertions.assertThat(reply.isOk()).isTrue(); + PreAccept.PreAcceptOk success = (PreAccept.PreAcceptOk) reply; + assertDeps(success.txnId, success.deps, cloneKeyConflicts, cloneRangeConflicts); + return success; + }); + var delay = preAcceptAsync.flatMap(ignore -> AsyncChains.ofCallable(instance.unorderedScheduled, () -> { + Ballot ballot = Ballot.fromValues(instance.timeService.epoch(), instance.timeService.now(), nodeId); + return new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, instance.topology), txnId, txn, route, ballot); + })); + var recoverAsync = delay.flatMap(br -> instance.processAsync(br, safe -> { + var reply = br.apply(safe); + Assertions.assertThat(reply.isOk()).isTrue(); + BeginRecovery.RecoverOk success = (BeginRecovery.RecoverOk) reply; + Deps proposeDeps = LatestDeps.mergeProposal(Collections.singletonList(success), ok -> ok.deps); + assertDeps(success.txnId, proposeDeps, cloneKeyConflicts, cloneRangeConflicts); + return success; + })); + + return Pair.create(txnId, recoverAsync.beginAsResult()); + } + + protected static void assertDeps(TxnId txnId, Deps deps, + Map> keyConflicts, + Map> rangeConflicts) + { + if (rangeConflicts.isEmpty()) + { + Assertions.assertThat(deps.rangeDeps.isEmpty()).describedAs("Txn %s rangeDeps was not empty; %s", txnId, deps.rangeDeps).isTrue(); + } + else + { + List actualRanges = IntStream.range(0, deps.rangeDeps.rangeCount()).mapToObj(i -> deps.rangeDeps.range(i)).collect(Collectors.toList()); +// Assertions.assertThat(deps.rangeDeps.rangeCount()).describedAs("Txn %s Expected ranges size; %s", txnId, deps.rangeDeps).isEqualTo(rangeConflicts.size()); + Assertions.assertThat(Ranges.of(actualRanges.toArray(Range[]::new))) + .describedAs("Txn %s had different ranges than expected", txnId) + .isEqualTo(Ranges.of(rangeConflicts.keySet().toArray(Range[]::new))); + AssertionError errors = null; + for (int i = 0; i < rangeConflicts.size(); i++) + { + try + { + var range = deps.rangeDeps.range(i); + Assertions.assertThat(rangeConflicts).describedAs("Txn %s had an unexpected range", txnId).containsKey(range); + var conflict = deps.rangeDeps.txnIdsForRangeIndex(i); + List expectedConflict = rangeConflicts.get(range); + Assertions.assertThat(conflict).describedAs("Txn %s Expected range %s to have different conflicting txns", txnId, range).isEqualTo(expectedConflict); + } + catch (AssertionError e) + { + if (errors == null) + errors = e; + else + errors.addSuppressed(e); + } + } + if (errors != null) + throw errors; + } + if (keyConflicts.isEmpty()) + { + Assertions.assertThat(deps.keyDeps.isEmpty()).describedAs("Txn %s keyDeps was not empty", txnId).isTrue(); + } + else + { + Assertions.assertThat(deps.keyDeps.keys()).describedAs("Txn %s Keys", txnId).isEqualTo(Keys.of(keyConflicts.keySet())); + for (var key : keyConflicts.keySet()) + Assertions.assertThat(deps.keyDeps.txnIds(key)).describedAs("Txn %s for key %s", txnId, key).isEqualTo(keyConflicts.get(key)); + } + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java new file mode 100644 index 000000000000..a10b7e0646bc --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Test; + +import accord.api.Key; +import accord.primitives.FullKeyRoute; +import accord.primitives.FullRangeRoute; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.PartitionKey; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; + +public class SimulatedDepsTest extends SimulatedAccordCommandStoreTestBase +{ + @Test + public void keyConflicts() + { + TableMetadata tbl = intTbl; + int numSamples = 100; + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + int key = rs.nextInt(); + PartitionKey pk = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(Int32Type.instance.decompose(key))); + Keys keys = Keys.of(pk); + FullKeyRoute route = keys.toRoute(pk.toUnseekable()); + Txn txn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(key, 42)); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + List conflicts = new ArrayList<>(numSamples); + boolean concurrent = rs.nextBoolean(); + List> asyncs = !concurrent ? null : new ArrayList<>(numSamples); + for (int i = 0; i < numSamples; i++) + { + instance.maybeCacheEvict(keys, Ranges.EMPTY); + if (concurrent) + { + var pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys)); + conflicts.add(pair.left); + asyncs.add(pair.right); + } + else + { + conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys))); + } + } + if (concurrent) + { + instance.processAll(); + safeBlock(asyncs); + } + } + }); + } + + @Test + public void concurrentRangePartialKeyMatch() + { + var tbl = reverseTokenTbl; + int numSamples = 250; + int numConflictKeyTxns = 10; + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - 1, token)); + long outOfRangeToken = token - 10; + if (outOfRangeToken == Long.MIN_VALUE) // if this wraps around that is fine, just can't be min + outOfRangeToken++; + Key key = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(token))); + Key outOfRangeKey = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(outOfRangeToken))); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)", + "INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), + Arrays.asList(LongToken.keyForToken(token), 42, + LongToken.keyForToken(outOfRangeToken), 42)); + Keys keys = (Keys) keyTxn.keys(); + FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); + + Txn conflictingKeyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), + Arrays.asList(LongToken.keyForToken(outOfRangeToken), 42)); + Keys conflictingKeys = (Keys) conflictingKeyTxn.keys(); + FullRoute conflictingRoute = conflictingKeys.toRoute(conflictingKeys.get(0).toUnseekable()); + + FullRangeRoute rangeRoute = partialRange.toRoute(keys.get(0).toUnseekable()); + Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); + + List keyConflicts = new ArrayList<>(numSamples); + List outOfRangeKeyConflicts = new ArrayList<>(numSamples); + List rangeConflicts = new ArrayList<>(numSamples); + List> asyncs = new ArrayList<>(numSamples * 2 + numSamples * numConflictKeyTxns); + List asyncIds = new ArrayList<>(numSamples * 2 + numSamples * numConflictKeyTxns); + for (int i = 0; i < numSamples; i++) + { + instance.maybeCacheEvict((Keys) keyTxn.keys(), partialRange); + for (int j = 0; j < numConflictKeyTxns; j++) + { + var p = instance.enqueuePreAccept(conflictingKeyTxn, conflictingRoute); + outOfRangeKeyConflicts.add(p.left); + asyncs.add(p.right); + asyncIds.add(p.left); + } + + var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts, outOfRangeKey, outOfRangeKeyConflicts), Collections.emptyMap()); + keyConflicts.add(k.left); + outOfRangeKeyConflicts.add(k.left); + asyncs.add(k.right); + asyncIds.add(k.left); + + var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, Map.of(key, keyConflicts), rangeConflicts(rangeConflicts, partialRange)); + rangeConflicts.add(r.left); + asyncs.add(r.right); + asyncIds.add(r.left); + } + instance.processAll(); + safeBlock(asyncs, asyncIds); + } + }); + } + + @Test + public void simpleRangeConflicts() + { + var tbl = reverseTokenTbl; + Ranges wholeRange = Ranges.of(fullRange(tbl.id)); + int numSamples = 100; + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + ByteBuffer key = LongToken.keyForToken(token); + PartitionKey pk = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(key)); + Keys keys = Keys.of(pk); + FullKeyRoute keyRoute = keys.toRoute(pk.toUnseekable()); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(key, 42)); + + Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - 1, token)); + boolean useWholeRange = rs.nextBoolean(); + Ranges ranges = useWholeRange ? wholeRange : partialRange; + FullRangeRoute rangeRoute = ranges.toRoute(pk.toUnseekable()); + Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); + + List keyConflicts = new ArrayList<>(numSamples); + List rangeConflicts = new ArrayList<>(numSamples); + boolean concurrent = rs.nextBoolean(); + List> asyncs = !concurrent ? null : new ArrayList<>(numSamples * 2); + for (int i = 0; i < numSamples; i++) + { + instance.maybeCacheEvict(keys, ranges); + if (concurrent) + { + var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys)); + keyConflicts.add(k.left); + asyncs.add(k.right); + var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, ranges)); + rangeConflicts.add(r.left); + asyncs.add(r.right); + } + else + { + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, ranges))); + } + } + if (concurrent) + { + instance.processAll(); + safeBlock(asyncs); + } + } + }); + } + + @Test + public void expandingRangeConflicts() + { + var tbl = reverseTokenTbl; + int numSamples = 100; + + qt().withSeed(6484101342775432632L).withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + long token = rs.nextLong(Long.MIN_VALUE + numSamples + 1, Long.MAX_VALUE - numSamples); + ByteBuffer key = LongToken.keyForToken(token); + PartitionKey pk = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(key)); + Keys keys = Keys.of(pk); + FullKeyRoute keyRoute = keys.toRoute(pk.toUnseekable()); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(key, 42)); + + List keyConflicts = new ArrayList<>(numSamples); + Map> rangeConflicts = new HashMap<>(); + boolean concurrent = rs.nextBoolean(); + List> asyncs = !concurrent ? null : new ArrayList<>(numSamples); + List info = !concurrent ? null : new ArrayList<>(numSamples); + for (int i = 0; i < numSamples; i++) + { + Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - i - 1, token + i)); + FullRangeRoute rangeRoute = partialRange.toRoute(pk.toUnseekable()); + Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); + try + { + instance.maybeCacheEvict(keys, partialRange); + if (concurrent) + { + var pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys)); + info.add(pair.left); + keyConflicts.add(pair.left); + asyncs.add(pair.right); + + pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts); + info.add(pair.left); + rangeConflicts.put(partialRange.get(0), Collections.singletonList(pair.left)); + asyncs.add(pair.right); + } + else + { + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + rangeConflicts.put(partialRange.get(0), Collections.singletonList(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts))); + } + } + catch (Throwable t) + { + AssertionError error = new AssertionError("Unexpected error: i=" + i + ", token=" + token + ", range=" + partialRange.get(0)); + t.addSuppressed(error); + throw t; + } + } + if (concurrent) + { + instance.processAll(); + safeBlock(asyncs, info); + } + } + }); + } + + @Test + public void overlappingRangeConflicts() + { + var tbl = reverseTokenTbl; + int numSamples = 100; + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + long token = rs.nextLong(Long.MIN_VALUE + numSamples + 1, Long.MAX_VALUE - numSamples); + ByteBuffer key = LongToken.keyForToken(token); + PartitionKey pk = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(key)); + Keys keys = Keys.of(pk); + FullKeyRoute keyRoute = keys.toRoute(pk.toUnseekable()); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(key, 42)); + + Range left = tokenRange(tbl.id, token - 10, token + 5); + Range right = tokenRange(tbl.id, token - 5, token + 10); + + List keyConflicts = new ArrayList<>(numSamples); + Map> rangeConflicts = new HashMap<>(); + rangeConflicts.put(left, new ArrayList<>()); + rangeConflicts.put(right, new ArrayList<>()); + for (int i = 0; i < numSamples; i++) + { + Ranges partialRange = Ranges.of(rs.nextBoolean() ? left : right); + try + { + instance.maybeCacheEvict(keys, partialRange); + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + + FullRangeRoute rangeRoute = partialRange.toRoute(pk.toUnseekable()); + Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); + rangeConflicts.get(partialRange.get(0)).add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts)); + } + catch (Throwable t) + { + AssertionError error = new AssertionError("Unexpected error: i=" + i + ", token=" + token + ", range=" + partialRange.get(0)); + t.addSuppressed(error); + throw t; + } + } + } + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java new file mode 100644 index 000000000000..6ca336c98270 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.junit.Test; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.primitives.FullRangeRoute; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable.Domain; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.RTree; +import org.apache.cassandra.utils.RangeTree; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; + +public class SimulatedMultiKeyAndRangeTest extends SimulatedAccordCommandStoreTestBase +{ + @Test + public void test() + { + var tbl = reverseTokenTbl; + int numSamples = 300; + long minToken = 0; + long maxToken = 100; + Gen tokenDistribution = Gens.mixedDistribution(minToken, maxToken + 1); + Gen keyDistribution = Gens.mixedDistribution(1, 5); + Gen rangeDistribution = Gens.mixedDistribution(1, 5); + Gen> domainDistribution = Gens.mixedDistribution(Domain.values()); + Gen> msgDistribution = Gens.mixedDistribution(DepsMessage.values()); + + qt().withExamples(100).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + Gen.LongGen tokenGen = tokenDistribution.next(rs); + Gen domainGen = domainDistribution.next(rs); + Gen msgGen = msgDistribution.next(rs); + Map> keyConflicts = new HashMap<>(); + RangeTree rangeConflicts = RTree.create(RangeTreeRangeAccessor.instance); + List> asyncs = new ArrayList<>(numSamples); + + Gen.IntGen keyCountGen = keyDistribution.next(rs); + Gen.IntGen rangeCountGen = rangeDistribution.next(rs); + + for (int i = 0; i < numSamples; i++) + { + switch (domainGen.next(rs)) + { + case Key: + { + int numKeys = keyCountGen.nextInt(rs); + TreeSet set = new TreeSet<>(); + while (set.size() != numKeys) + set.add(new PartitionKey(tbl.id, tbl.partitioner.decorateKey(keyForToken(tokenGen.nextLong(rs))))); + Keys keys = Keys.of(set); + List inserts = IntStream.range(0, numKeys).mapToObj(ignore -> "INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)").collect(Collectors.toList()); + List binds = new ArrayList<>(numKeys * 2); + keys.forEach(k -> { + binds.add(((PartitionKey) k.asKey()).partitionKey().getKey()); + binds.add(42); + }); + Txn txn = createTxn(wrapInTxn(inserts), binds); + FullRoute route = keys.toRoute(keys.get(0).toUnseekable()); + + Map> expectedConflicts = new HashMap<>(); + keys.forEach(k -> expectedConflicts.put(k, keyConflicts.computeIfAbsent(k, ignore -> new ArrayList<>()))); + + var p = assertDepsMessageAsync(instance, msgGen.next(rs), txn, route, expectedConflicts, Collections.emptyMap()); + keys.forEach(k -> keyConflicts.get(k).add(p.left)); + asyncs.add(p.right); + } + break; + case Range: + { + int numRanges = rangeCountGen.nextInt(rs); + Set set = new HashSet<>(); + while (set.size() != numRanges) + { + long token = tokenGen.nextLong(rs); + int offset = rs.nextInt(1, 10); + long start, end; + if (token + offset > maxToken) + { + end = token; + start = end - offset; + } + else + { + start = token; + end = start + offset; + } + set.add(tokenRange(tbl.id, start, end)); + } + // The property ranges.size() == numRanges is not true as this logic will sort + deoverlap + // so if the ranges were overlapped, we could have more or less than numRanges + Ranges ranges = Ranges.of(set.toArray(Range[]::new)); + FullRangeRoute route = ranges.toRoute(ranges.get(0).end()); + Txn txn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); + + Map> expectedKeyConflicts = keyConflicts.entrySet().stream() + .filter(e -> ranges.contains(e.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + Map> expectedRangeConflicts = new HashMap<>(); + ranges.forEach(r -> + rangeConflicts.search(r, e -> + expectedRangeConflicts.computeIfAbsent(e.getKey(), ignore -> new ArrayList<>()).add(e.getValue()))); + // need to dedup/sort txns + expectedRangeConflicts.values().forEach(l -> { + var sortedDedup = new ArrayList<>(new TreeSet<>(l)); + l.clear(); + l.addAll(sortedDedup); + }); + var p = assertDepsMessageAsync(instance, msgGen.next(rs), txn, route, expectedKeyConflicts, expectedRangeConflicts); + asyncs.add(p.right); + ranges.forEach(r -> rangeConflicts.add(r, p.left)); + } + break; + default: + throw new AssertionError(); + } + } + instance.processAll(); + safeBlock(asyncs); + } + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java new file mode 100644 index 000000000000..eec34195c545 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Test; + +import accord.api.Key; +import accord.primitives.FullRangeRoute; +import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.service.accord.api.PartitionKey; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; + +public class SimulatedRandomKeysWithRangeConflictTest extends SimulatedAccordCommandStoreTestBase +{ + @Test + public void keysAllOverConflictingWithRange() + { + var tbl = reverseTokenTbl; + Ranges wholeRange = Ranges.of(fullRange(tbl.id)); + FullRangeRoute rangeRoute = wholeRange.toRoute(wholeRange.get(0).end()); + Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, wholeRange); + int numSamples = 300; + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + Map> keyConflicts = new HashMap<>(); + List rangeConflicts = new ArrayList<>(numSamples); + boolean concurrent = rs.nextBoolean(); + List> asyncs = !concurrent ? null : new ArrayList<>(numSamples * 2); + for (int i = 0; i < numSamples; i++) + { + long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + Key key = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(keyForToken(token))); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), + Arrays.asList(keyForToken(token), 42)); + Keys keys = (Keys) keyTxn.keys(); + FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); + + instance.maybeCacheEvict((Keys) keyTxn.keys(), wholeRange); + + if (concurrent) + { + var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); + keyConflicts.get(key).add(k.left); + asyncs.add(k.right); + + var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRange)); + rangeConflicts.add(r.left); + asyncs.add(r.right); + } + else + { + var k = assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); + keyConflicts.get(key).add(k); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRange))); + } + } + if (concurrent) + { + instance.processAll(); + safeBlock(asyncs); + } + } + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index a5ece26a51a3..e7aa2139c3f0 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -26,8 +26,12 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; +import accord.utils.DefaultRandom; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; +import org.apache.cassandra.concurrent.SimulatedExecutorFactory; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -101,6 +105,11 @@ public class AsyncOperationTest private static final Logger logger = LoggerFactory.getLogger(AsyncOperationTest.class); private static final AtomicLong clock = new AtomicLong(0); + static + { + CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.setBoolean(false); + } + @BeforeClass public static void beforeClass() throws Throwable { @@ -314,7 +323,8 @@ private static void assertFutureState(AccordStateCache.Instance { + var serializer = AccordRoutingKeyByteSource.create(token.getPartitioner()); + byte[] min = ByteSourceInverse.readBytes(serializer.minAsComparableBytes()); + byte[] max = ByteSourceInverse.readBytes(serializer.maxAsComparableBytes()); + + var bytes = serializer.serialize(token); + if (serializer instanceof FixedLength) + { + FixedLength fl = (FixedLength) serializer; + Assertions.assertThat(bytes) + .hasSize(fl.valueSize()) + .hasSize(min.length) + .hasSize(max.length); + } + + Assertions.assertThat(ByteArrayUtil.compareUnsigned(min, 0, bytes, 0, bytes.length)).isLessThan(0); + Assertions.assertThat(ByteArrayUtil.compareUnsigned(max, 0, bytes, 0, bytes.length)).isGreaterThan(0); + + var read = serializer.tokenFromComparableBytes(ByteArrayAccessor.instance, bytes); + Assertions.assertThat(read).isEqualTo(token); + }); + } + + @Test + public void accordRoutingKeySerde() + { + qt().forAll(AccordGenerators.routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(token()))).check(key -> { + AccordRoutingKeyByteSource.Serializer serializer = key.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL ? + // doesn't really matter... + new AccordRoutingKeyByteSource.VariableLength(ByteOrderedPartitioner.instance, ByteComparable.Version.OSS50) + : AccordRoutingKeyByteSource.create(key.asTokenKey().token().getPartitioner()); + + var read = serializer.fromComparableBytes(ByteArrayAccessor.instance, serializer.serialize(key)); + Assertions.assertThat(read).isEqualTo(key); + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java index 1006bc941fcf..ba431db848a7 100644 --- a/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java +++ b/test/unit/org/apache/cassandra/tcm/sequences/ProgressBarrierTest.java @@ -356,4 +356,4 @@ public static Surjections.Surjection combine(Surjecti gen4.inflate(PCGFastPure.next(l, 2))); }; } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/utils/RangeTreeTest.java b/test/unit/org/apache/cassandra/utils/RangeTreeTest.java new file mode 100644 index 000000000000..6ef5325d1d18 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/RangeTreeTest.java @@ -0,0 +1,568 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.LongUnaryOperator; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.impl.IntKey; +import accord.impl.IntKey.Routing; +import accord.primitives.Range; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import accord.utils.SearchableRangeList; +import org.agrona.collections.IntArrayList; +import org.agrona.collections.LongArrayList; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +@RunWith(Parameterized.class) +public class RangeTreeTest +{ + private static final Logger logger = LoggerFactory.getLogger(RangeTreeTest.class); + private static final Comparator COMPARATOR = Comparator.naturalOrder(); + private static final RangeTree.Accessor END_INCLUSIVE = new RangeTree.Accessor<>() + { + @Override + public Routing start(Range range) + { + return (Routing) range.start(); + } + + @Override + public Routing end(Range range) + { + return (Routing) range.end(); + } + + @Override + public boolean contains(Range range, Routing routing) + { + return range.contains(routing); + } + + @Override + public boolean contains(Routing start, Routing end, Routing routing) + { + if (routing.compareTo(start) <= 0) + return false; + if (routing.compareTo(end) > 0) + return false; + return true; + } + + @Override + public boolean intersects(Range range, Routing start, Routing end) + { + return range.compareIntersecting(IntKey.range(start, end)) == 0; + } + + @Override + public boolean intersects(Range left, Range right) + { + return left.compareIntersecting(right) == 0; + } + }; + private static final RangeTree.Accessor ALL_INCLUSIVE = new RangeTree.Accessor<>() + { + @Override + public Routing start(Range range) + { + return (Routing) range.start(); + } + + @Override + public Routing end(Range range) + { + return (Routing) range.end(); + } + + @Override + public boolean contains(Range range, Routing routing) + { + return range.contains(routing) || range.start().equals(routing); + } + + @Override + public boolean contains(Routing start, Routing end, Routing routing) + { + if (routing.compareTo(start) < 0) + return false; + if (routing.compareTo(end) > 0) + return false; + return true; + } + + @Override + public boolean intersects(Range range, Routing start, Routing end) + { + return range.compareIntersecting(IntKey.range(start, end)) == 0 || range.end().equals(start) || range.start().equals(end); + } + + @Override + public boolean intersects(Range left, Range right) + { + return left.compareIntersecting(right) == 0 || left.end().equals(right.start()) || left.start().equals(right.end()); + } + }; + + private static final Gen.IntGen SMALL_INT_GEN = rs -> rs.nextInt(0, 10); + private static final int MIN_TOKEN = 0, MAX_TOKEN = 1 << 16; + private static final int TOKEN_RANGE_SIZE = MAX_TOKEN - MIN_TOKEN + 1; + private static final Gen TOKEN_DISTRIBUTION = Gens.mixedDistribution(MIN_TOKEN, MAX_TOKEN + 1); + private static final Gen RANGE_SIZE_DISTRIBUTION = Gens.mixedDistribution(10, (int) (TOKEN_RANGE_SIZE * .01)); + + // Used to test different worse case patterns and see how the tree performs. + private enum Pattern + { + RANDOM, // tends to have high selectivity: matches 50-100% of the tree in testing + NO_OVERLP, // tests to have low selectivity; matches 1-2 elements in testing + SMALL_RANGES // lower selectivity than RANDOM but still matches ~30% of the tree in testing + } + + // Having different models makes sure that the tree is flexiable enough and can be used with the semantics the user + // needs (with regard to inclusivity). It also adds more confidence that the search logic is correct as different + // algorithems help validate this. + private enum ModelType {List, IntervalTree, SearchableRangeList} + private final Pattern pattern; + private final ModelType modelType; + + public RangeTreeTest(Pattern pattern, ModelType modelType) + { + this.pattern = pattern; + this.modelType = modelType; + } + + @Parameterized.Parameters(name = "{0}, {1}") + public static Collection data() { + return Stream.of(Pattern.values()) + .flatMap(p -> + Stream.of(ModelType.values()) + .map(m -> new Object[]{ p, m })) + .collect(Collectors.toList()); + } + + @Test + public void test() + { + int samples = 3_000; + int examples = 10; + LongArrayList byToken = new LongArrayList(samples * examples, -1); + LongArrayList modelByToken = new LongArrayList(samples * examples, -1); + LongArrayList byTokenLength = new LongArrayList(samples * examples, -1); + LongArrayList byRange = new LongArrayList(samples * examples, -1); + LongArrayList modelByRange = new LongArrayList(samples * examples, -1); + LongArrayList byRangeLength = new LongArrayList(samples * examples, -1); + qt().withExamples(examples).check(rs -> { + var map = create(modelType); + var model = createModel(modelType); + + Gen rangeGen = rangeGen(rs, pattern, samples); + for (int i = 0; i < samples; i++) + { + var range = rangeGen.next(rs); + var value = SMALL_INT_GEN.nextInt(rs); + map.put(range, value); + model.put(range, value); + } + model.done(); + Assertions.assertThat(map.actual()).hasSize(samples); + if (rangeGen instanceof NoOverlap) + ((NoOverlap) rangeGen).reset(); + Gen.IntGen tokenGe = TOKEN_DISTRIBUTION.next(rs); + for (int i = 0; i < samples; i++) + { + { + // key lookup + var lookup = IntKey.routing(tokenGe.nextInt(rs)); + var actual = timed(byToken, () -> map.intersectsToken(lookup)); + var expected = timed(modelByToken, () -> model.intersectsToken(lookup)); + byTokenLength.addLong(expected.size()); + Assertions.assertThat(sort(actual)) + .describedAs("Write=%d; token=%s", i, lookup) + .isEqualTo(sort(expected)); + } + { + // range lookup + var lookup = rangeGen.next(rs); + var actual = timed(byRange, () -> map.intersects(lookup)); + var expected = timed(modelByRange, () -> model.intersects(lookup)); + byRangeLength.addLong(expected.size()); + Assertions.assertThat(sort(actual)) + .describedAs("Write=%d; range=%s", i, lookup) + .isEqualTo(sort(expected)); + } + } + }); + StringBuilder sb = new StringBuilder(); + sb.append("======="); + sb.append("\nPattern: " + pattern); + sb.append("\nModel: " + modelType); + sb.append("\nBy Token:"); + sb.append("\n\tSizes: " + stats(byTokenLength, false)); + sb.append("\n\t" + modelType + ": " + stats(modelByToken, true)); + sb.append("\n\tTree: " + stats(byToken, true)); + sb.append("\nBy Range:"); + sb.append("\n\tSizes: " + stats(byRangeLength, false)); + sb.append("\n\t" + modelType + ": " + stats(modelByRange, true)); + sb.append("\n\tTree: " + stats(byRange, true)); + logger.info(sb.toString()); + } + + private static class NoOverlap implements Gen + { + private final int delta; + private int idx = 0; + + public NoOverlap(int samples) + { + this.delta = TOKEN_RANGE_SIZE / samples; + } + + @Override + public Range next(RandomSource random) + { + int a = delta * idx++; + int b = a + delta; + return IntKey.range(a, b); + } + + private void reset() + { + idx = 0; + } + } + + private static Gen rangeGen(RandomSource randomSource, Pattern pattern, int samples) + { + Gen.IntGen tokenGen = TOKEN_DISTRIBUTION.next(randomSource); + switch (pattern) + { + case RANDOM: + return rs -> { + int a = tokenGen.nextInt(rs); + int b = tokenGen.nextInt(rs); + while (a == b) + b = tokenGen.nextInt(rs); + if (a > b) + { + int tmp = a; + a = b; + b = tmp; + } + return IntKey.range(a, b); + }; + case SMALL_RANGES: + Gen.IntGen rangeSizeGen = RANGE_SIZE_DISTRIBUTION.next(randomSource); + return rs -> { + int a = tokenGen.nextInt(rs); + int rangeSize = rangeSizeGen.nextInt(rs); + int b = a + rangeSize; + if (b > MAX_TOKEN) + { + b = a; + a = b - rangeSize; + } + return IntKey.range(a, b); + }; + case NO_OVERLP: + return new NoOverlap(samples); + default: + throw new AssertionError(); + } + } + + private static String stats(LongArrayList list, boolean isTime) + { + LongUnaryOperator fn = isTime ? TimeUnit.NANOSECONDS::toMicros : l -> l; + String postfix = isTime ? "micro" : ""; + long[] array = list.toLongArray(); + Arrays.sort(array); + StringBuilder sb = new StringBuilder(); + sb.append("Min: ").append(fn.applyAsLong(array[0])).append(postfix); + sb.append(", Median: ").append(fn.applyAsLong(array[array.length / 2])).append(postfix); + sb.append(", Max: ").append(fn.applyAsLong(array[array.length - 1])).append(postfix); + return sb.toString(); + } + + private static T timed(LongArrayList target, Supplier fn) + { + long nowNs = System.nanoTime(); + try + { + return fn.get(); + } + finally + { + target.add(System.nanoTime() - nowNs); + } + } + + private static List> sort(List> array) + { + array.sort((a, b) -> { + int rc = a.getKey().compare(b.getKey()); + if (rc == 0) + rc = a.getValue().compareTo(b.getValue()); + return rc; + }); + return array; + } + + private interface Model + { + Object actual(); + + void put(Range range, int value); + + List> intersectsToken(Routing key); + + List> intersects(Range range); + + void done(); + } + + private static RangeTreeModel create(ModelType modelType) + { + switch (modelType) + { + case List: + case SearchableRangeList: + return new RangeTreeModel(new RTree<>(COMPARATOR, END_INCLUSIVE)); + case IntervalTree: return new RangeTreeModel(new RTree<>(COMPARATOR, ALL_INCLUSIVE)); + default: + throw new AssertionError("Unknown type: " + modelType); + } + } + + private static Model createModel(ModelType modelType) + { + switch (modelType) + { + case List: return new ListModel(); + case SearchableRangeList: return new SearchableRangeListModel(); + case IntervalTree: return new IntervalTreeModel(); + default: + throw new AssertionError("Unknown type: " + modelType); + } + } + + private static class RangeTreeModel implements Model + { + private final RangeTree tree; + + private RangeTreeModel(RangeTree tree) + { + this.tree = tree; + } + + @Override + public RangeTree actual() + { + return tree; + } + + @Override + public void put(Range range, int value) + { + tree.add(range, value); + } + + @Override + public List> intersectsToken(Routing key) + { + return tree.searchToken(key); + } + + @Override + public List> intersects(Range range) + { + return tree.search(range); + } + + @Override + public void done() + { + + } + } + + private static class ListModel implements Model + { + List> actual = new ArrayList<>(); + + @Override + public List> actual() + { + return actual; + } + + @Override + public void put(Range range, int value) + { + actual.add(Map.entry(range, value)); + } + + @Override + public List> intersectsToken(Routing key) + { + return actual.stream() + .filter(p -> p.getKey().contains(key)) + .collect(Collectors.toList()); + } + + @Override + public List> intersects(Range range) + { + return actual.stream() + .filter(p -> p.getKey().compareIntersecting(range) == 0) + .collect(Collectors.toList()); + } + + @Override + public void done() + { + + } + } + + private static class IntervalTreeModel implements Model + { + IntervalTree.Builder> builder = IntervalTree.builder(); + IntervalTree> actual = null; + + @Override + public IntervalTree> actual() + { + return actual; + } + + @Override + public void put(Range range, int value) + { + builder.add(new Interval<>((Routing) range.start(), (Routing) range.end(), value)); + } + + @Override + public List> intersectsToken(Routing key) + { + return map(actual.matches(key)); + } + + @Override + public List> intersects(Range range) + { + return map(actual.matches(new Interval<>((Routing) range.start(), (Routing) range.end(), null))); + } + + private static List> map(List> matches) + { + return matches.stream().map(i -> Map.entry(IntKey.range(i.min, i.max), i.data)).collect(Collectors.toList()); + } + + @Override + public void done() + { + assert builder != null; + actual = builder.build(); + builder = null; + } + } + + private static class SearchableRangeListModel implements Model + { + private final Map map = new HashMap<>(); + private Range[] ranges; + private SearchableRangeList list = null; + + @Override + public Object actual() + { + return list; + } + + @Override + public void put(Range range, int value) + { + map.computeIfAbsent(range, ignore -> new IntArrayList()).addInt(value); + } + + @Override + public List> intersectsToken(Routing key) + { + List> matches = new ArrayList<>(); + // find ranges, then add the values + list.forEach(key, (a, b, c, d, idx) -> { + Range match = ranges[idx]; + map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); + }, (a, b, c, d, start, end) -> { + for (int i = start; i < end; i++) + { + Range match = ranges[i]; + map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); + } + }, 0, 0, 0, 0, 0); + return matches; + } + + @Override + public List> intersects(Range range) + { + List> matches = new ArrayList<>(); + // find ranges, then add the values + list.forEach(range, (a, b, c, d, idx) -> { + Range match = ranges[idx]; + map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); + }, (a, b, c, d, start, end) -> { + for (int i = start; i < end; i++) + { + Range match = ranges[i]; + map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); + } + }, 0, 0, 0, 0, 0); + return matches; + } + + @Override + public void done() + { + List ranges = new ArrayList<>(map.keySet()); + ranges.sort(Range::compare); + list = SearchableRangeList.build(this.ranges = ranges.toArray(Range[]::new)); + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java new file mode 100644 index 000000000000..ceed706236b5 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java @@ -0,0 +1,509 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; +import java.util.stream.Collectors; + +import org.junit.Test; + +import accord.api.RoutingKey; +import accord.impl.IntKey; +import accord.primitives.Range; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property.Command; +import accord.utils.Property.Commands; +import accord.utils.Property.UnitCommand; +import accord.utils.RandomSource; +import org.apache.cassandra.service.accord.RangeTreeRangeAccessor; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.stateful; + +public class StatefulRangeTreeTest +{ + private static final Gen.IntGen SMALL_INT_GEN = rs -> rs.nextInt(0, 10); + private static final Gen.IntGen NUM_CHILDREN_GEN = rs -> rs.nextInt(2, 12); + private static final Gen SIZE_TARGET_DISTRIBUTION = Gens.mixedDistribution(1 << 3, 1 << 9); + private static final int MIN_TOKEN = 0, MAX_TOKEN = 1 << 16; + private static final int TOKEN_RANGE_SIZE = MAX_TOKEN - MIN_TOKEN + 1; + private static final Gen TOKEN_DISTRIBUTION = Gens.mixedDistribution(MIN_TOKEN, MAX_TOKEN + 1); + private static final Gen RANGE_SIZE_DISTRIBUTION = Gens.mixedDistribution(10, (int) (TOKEN_RANGE_SIZE * .01)); + static final Comparator> COMPARATOR = (a, b) -> { + int rc = a.getKey().compare(b.getKey()); + if (rc == 0) + rc = a.getValue().compareTo(b.getValue()); + return rc; + }; + + /** + * Stateful test for RTree. + * + * This test is very similar to {@link RangeTreeTest#test} but is fully mutable, so can not + * use the immutable search trees (else rebuidling becomes a large cost). Both tests should exist as they use different + * models, which helps build confidence that the RTree does the correct thing; that test also covers start and end + * inclusive, which this test does not. + */ + @Test + public void test() + { + stateful().check(new Commands() + { + @Override + public Gen genInitialState() + { + return rs -> { + Gen rangeGen = rangeGen(rs); + int numChildren = NUM_CHILDREN_GEN.nextInt(rs); + int sizeTarget = SIZE_TARGET_DISTRIBUTION.next(rs).filter(s -> s > numChildren).nextInt(rs); + int createWeight = rs.nextInt(1, 100); + int updateWeight = rs.nextInt(1, 20); + int deleteWeight = rs.nextInt(1, 20); + int clearWeight = rs.nextInt(0, 2); // either disabled or enabled with weight=1 + int readWeight = rs.nextInt(1, 20); + return new State(sizeTarget, numChildren, + TOKEN_DISTRIBUTION.next(rs), rangeGen, + createWeight, updateWeight, deleteWeight, clearWeight, readWeight); + }; + } + + @Override + public Sut createSut(State state) + { + return new Sut(state.sizeTarget, state.numChildren); + } + + @Override + public Gen> commands(State state) + { + Map>, Integer> possible = new HashMap<>(); + possible.put(rs -> new Create(state.newRange(rs), SMALL_INT_GEN.nextInt(rs)), state.createWeight); + possible.put(rs -> new Read(state.newRange(rs)), state.readWeight); + possible.put(rs -> new KeyRead(IntKey.routing(state.tokenGen.nextInt(rs))), state.readWeight); + possible.put(rs -> new RangeRead(state.rangeGen.next(rs)), state.readWeight); + possible.put(ignore -> Iterate.instance, state.readWeight); + possible.put(ignore -> Clear.instance, state.clearWeight); + if (!state.uniqRanges.isEmpty()) + { + possible.put(rs -> new Read(rs.pick(state.uniqRanges)), state.readWeight); + possible.put(rs -> { + Range range = rs.pick(state.uniqRanges); + int token = rs.nextInt(((IntKey.Routing) range.start()).key, ((IntKey.Routing) range.end()).key) + 1; + return new KeyRead(IntKey.routing(token)); + }, state.readWeight); + possible.put(rs -> new RangeRead(rs.pick(state.uniqRanges)), state.readWeight); + possible.put(rs -> new Update(rs.pick(state.uniqRanges), SMALL_INT_GEN.nextInt(rs)), state.updateWeight); + possible.put(rs -> new Delete(rs.pick(state.uniqRanges)), state.deleteWeight); + } + return Gens.oneOf(possible); + } + }); + } + + private static Gen rangeGen(RandomSource rand) + { + Gen.IntGen tokenGen = TOKEN_DISTRIBUTION.next(rand); + switch (rand.nextInt(0, 3)) + { + case 0: // pure random + return rs -> { + int a = tokenGen.nextInt(rs); + int b = tokenGen.nextInt(rs); + while (a == b) + b = tokenGen.nextInt(rs); + if (a > b) + { + int tmp = a; + a = b; + b = tmp; + } + return IntKey.range(a, b); + }; + case 1: // small range + Gen.IntGen rangeSizeGen = RANGE_SIZE_DISTRIBUTION.next(rand); + return rs -> { + int a = tokenGen.nextInt(rs); + int rangeSize = rangeSizeGen.nextInt(rs); + int b = a + rangeSize; + if (b > MAX_TOKEN) + { + b = a; + a = b - rangeSize; + } + return IntKey.range(a, b); + }; + case 2: // single element + return rs -> { + int a = tokenGen.nextInt(rs); + int b = a + 1; + return IntKey.range(a, b); + }; + default: + throw new AssertionError(); + } + } + + static class Create implements UnitCommand + { + private final Range range; + private final int value; + + Create(Range range, int value) + { + this.range = range; + this.value = value; + } + + @Override + public void applyUnit(State state) + { + state.add(range, value); + } + + @Override + public void runUnit(Sut sut) + { + sut.tree.add(range, value); + } + + @Override + public void checkPostconditions(State state, Void expected, + Sut sut, Void actual) + { + Assertions.assertThat(sut.tree.size()).isEqualTo(state.list.size()); + } + + @Override + public String detailed(State state) + { + return "Create(" + range + ", " + value + ")"; + } + } + + static abstract class AbstractRead implements Command> + { + private final Comparator comparator; + + protected AbstractRead(Comparator comparator) + { + this.comparator = comparator; + } + + @Override + public void checkPostconditions(State state, List expected, + Sut sut, List actual) + { + expected.sort(comparator); + actual.sort(comparator); + Assertions.assertThat(actual).isEqualTo(expected); + } + } + + static class Read extends AbstractRead + { + private final Range range; + + Read(Range range) + { + super(Comparator.naturalOrder()); + this.range = range; + } + + @Override + public List apply(State state) + { + return state.get(range); + } + + @Override + public List run(Sut sut) + { + return sut.tree.get(range); + } + + @Override + public String detailed(State state) + { + return "Read(" + range + ")"; + } + } + + static class RangeRead extends AbstractRead> + { + private final Range range; + + RangeRead(Range range) + { + super(COMPARATOR); + this.range = range; + } + + @Override + public List> apply(State state) + { + return state.list.stream().filter(e -> e.getKey().compareIntersecting(range) == 0).collect(Collectors.toList()); + } + + @Override + public List> run(Sut sut) + { + return sut.tree.search(range); + } + + @Override + public String detailed(State state) + { + return "Range Read(" + range + ")"; + } + } + + static class KeyRead extends AbstractRead> + { + final RoutingKey key; + + KeyRead(RoutingKey key) + { + super(COMPARATOR); + this.key = key; + } + + @Override + public List> apply(State state) + { + return state.list.stream().filter(e -> e.getKey().contains(key)).collect(Collectors.toList()); + } + + @Override + public List> run(Sut sut) + { + return sut.tree.searchToken(key); + } + + @Override + public String detailed(State state) + { + return "Token Read(" + key + ")"; + } + } + + static class Update implements UnitCommand + { + private final Range range; + private final int value; + + Update(Range range, int value) + { + this.range = range; + this.value = value; + } + + @Override + public void applyUnit(State state) + { + state.update(range, value); + } + + @Override + public void runUnit(Sut sut) + { + sut.tree.get(range, e -> e.setValue(value)); + } + + @Override + public String detailed(State state) + { + return "Update(" + range + ", " + value + ")"; + } + } + + static class Delete implements UnitCommand + { + private final Range range; + + Delete(Range range) + { + this.range = range; + } + + @Override + public void applyUnit(State state) + { + state.remove(range); + } + + @Override + public void runUnit(Sut sut) + { + sut.tree.remove(range); + } + + @Override + public void checkPostconditions(State state, Void expected, + Sut sut, Void actual) + { + Assertions.assertThat(sut.tree.size()).isEqualTo(state.list.size()); + } + + @Override + public String detailed(State state) + { + return "Delete(" + range + ")"; + } + } + + static class Clear implements UnitCommand + { + static final Clear instance = new Clear(); + + @Override + public void applyUnit(State state) + { + state.uniqRanges.clear(); + state.list.clear(); + } + + @Override + public void runUnit(Sut sut) + { + sut.tree.clear(); + } + + @Override + public String detailed(State state) + { + return "Clear(size=" + state.list.size() + ")"; + } + } + + static class Iterate extends AbstractRead> + { + static final Iterate instance = new Iterate(); + public Iterate() + { + super(COMPARATOR); + } + + @Override + public List> apply(State state) + { + return state.list; + } + + @Override + public List> run(Sut sut) + { + return sut.tree.stream().collect(Collectors.toList()); + } + + @Override + public String detailed(State state) + { + return "Iterate(size=" + state.list.size() + ")"; + } + } + + private static class State + { + private final List> list = new ArrayList<>(); + private final TreeSet uniqRanges = new TreeSet<>(Range::compare); + private final int sizeTarget, numChildren; + private final Gen.IntGen tokenGen; + private final Gen rangeGen; + private final int createWeight, updateWeight, deleteWeight, clearWeight, readWeight; + + private State(int sizeTarget, int numChildren, + Gen.IntGen tokenGen, Gen rangeGen, + int createWeight, int updateWeight, int deleteWeight, int clearWeight, int readWeight) + { + this.sizeTarget = sizeTarget; + this.numChildren = numChildren; + this.tokenGen = tokenGen; + this.rangeGen = rangeGen; + this.createWeight = createWeight; + this.updateWeight = updateWeight; + this.deleteWeight = deleteWeight; + this.clearWeight = clearWeight; + this.readWeight = readWeight; + } + + public Range newRange(RandomSource rs) + { + Range range; + while ((uniqRanges.contains(range = rangeGen.next(rs)))) {} + return range; + } + + public void add(Range range, int value) + { + list.add(new MutableEntry<>(range, value)); + uniqRanges.add(range); + } + + public List get(Range range) + { + if (!uniqRanges.contains(range)) + return Collections.emptyList(); + return list.stream().filter(e -> e.getKey().equals(range)).map(e -> e.getValue()).collect(Collectors.toList()); + } + + public void update(Range range, int value) + { + if (!uniqRanges.contains(range)) + return; + list.forEach(e -> { + if (e.getKey().equals(range)) + e.setValue(value); + }); + } + + public void remove(Range range) + { + if (!uniqRanges.contains(range)) + return; + uniqRanges.remove(range); + list.removeIf(e -> e.getKey().equals(range)); + } + + @Override + public String toString() + { + return "State{" + + "sizeTarget=" + sizeTarget + + ", numChildren=" + numChildren + + '}'; + } + } + + public static class Sut + { + private final RangeTree tree; + + private Sut(int sizeTarget, int numChildren) + { + tree = new RTree(Comparator.naturalOrder(), RangeTreeRangeAccessor.instance, sizeTarget, numChildren); + } + } +} From 64da7141f73aff5ef802dbcbe80fa705402c5219 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 1 Apr 2024 10:16:27 -0700 Subject: [PATCH 104/340] (Accord) Cassandra bootstrap no longer using the range txn and instead uses the sync point empty txn for reads patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-19503 --- modules/accord | 2 +- .../service/accord/AccordJournal.java | 10 ++ .../service/accord/AccordMessageSink.java | 2 + .../distributed/test/ReadRepairTest.java | 125 ++++++++---------- .../test/accord/AccordBootstrapTest.java | 2 +- .../cassandra/service/accord/MockJournal.java | 39 ++++-- 6 files changed, 100 insertions(+), 80 deletions(-) diff --git a/modules/accord b/modules/accord index f78d1da27b09..8b4f3895cb92 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit f78d1da27b09f89417dd29bde0529f12cd744e3d +Subproject commit 8b4f3895cb926f937450676b1db2e23d01a8b820 diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 0562da11396d..b659cf47332c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -1408,6 +1408,7 @@ public Set test(Set messages) return presentMessages; } + @Override public Set all() { Set types = EnumSet.allOf(Type.class); @@ -1514,6 +1515,15 @@ public Set test(Set messages) return confirmed; } + @Override + public Set all() + { + logger.debug("Checking all messages for {}", txnId); + Set confirmed = provider.all(); + logger.debug("Confirmed {} messages for {}", confirmed, txnId); + return confirmed; + } + @Override public PreAccept preAccept() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index d72644811afc..5a514219e35c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -126,6 +126,8 @@ private VerbMapping() builder.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_REQ, Verb.ACCORD_GET_EPHMRL_READ_DEPS_REQ); builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_RSP, Verb.ACCORD_GET_EPHMRL_READ_DEPS_RSP); + builder.put(MessageType.GET_MAX_CONFLICT_REQ, Verb.ACCORD_GET_MAX_CONFLICT_REQ); + builder.put(MessageType.GET_MAX_CONFLICT_RSP, Verb.ACCORD_GET_MAX_CONFLICT_RSP); builder.put(MessageType.COMMIT_SLOW_PATH_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.COMMIT_MAXIMAL_REQ, Verb.ACCORD_COMMIT_REQ); builder.put(MessageType.STABLE_FAST_PATH_REQ, Verb.ACCORD_COMMIT_REQ); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java index b5e507a62723..2bcaad35d9ca 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReadRepairTest.java @@ -75,23 +75,9 @@ public class ReadRepairTest extends TestBaseImpl { - private static Cluster cluster; private static int tableNum = 0; private String tableName; - @BeforeClass - public static void beforeClass() throws Throwable - { - cluster = init(Cluster.create(3, c -> c.with(Feature.GOSSIP, Feature.NETWORK))); - } - - @AfterClass - public static void afterClass() throws Throwable - { - if (cluster != null) - cluster.close(); - } - private void incrementTableName() { tableName = "tbl" + tableNum++; @@ -128,65 +114,68 @@ private void testReadRepair(ReadRepairStrategy strategy) throws Throwable testReadRepair(strategy, false); } - private void testReadRepair(ReadRepairStrategy strategy, boolean brrThroughAccord) throws Throwable - { - TransactionalMode transactionalMode = brrThroughAccord ? TransactionalMode.unsafe_writes : TransactionalMode.off; - cluster.schemaChange(withKeyspace("CREATE TABLE %s." + tableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode.toString().toLowerCase() + '\'' + - String.format(" AND read_repair='%s'", strategy))); - AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, "t"); - - Object[] row = row(1, 1, 1); - String insertQuery = withKeyspace("INSERT INTO %s." + tableName + " (k, c, v) VALUES (?, ?, ?)"); - String selectQuery = withKeyspace("SELECT * FROM %s." + tableName + " WHERE k=1"); - - // insert data in two nodes, simulating a quorum write that has missed one node - cluster.get(1).executeInternal(insertQuery, row); - cluster.get(2).executeInternal(insertQuery, row); - - // verify that the third node doesn't have the row - assertRows(cluster.get(3).executeInternal(selectQuery)); - - // read with CL=QUORUM to trigger read repair, force 3 to be involved in the read so that read repair - // will occur - Filter blockReadFromOne = cluster.filters().inbound().from(3).to(1).verbs(READ_REQ.id).drop(); - assertRows(cluster.coordinator(3).execute(selectQuery, QUORUM), row); - blockReadFromOne.off(); - - // verify whether the coordinator has the repaired row depending on the read repair strategy - if (strategy == ReadRepairStrategy.NONE) + private void testReadRepair(ReadRepairStrategy strategy, boolean brrThroughAccord) throws Throwable { + try (Cluster cluster = init(Cluster.create(3, c -> c.with(Feature.GOSSIP, Feature.NETWORK)))) { + TransactionalMode transactionalMode = brrThroughAccord ? TransactionalMode.unsafe_writes : TransactionalMode.off; + cluster.schemaChange(withKeyspace("CREATE TABLE %s." + tableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode.toString().toLowerCase() + '\'' + + String.format(" AND read_repair='%s'", strategy))); + AccordTestBase.ensureTableIsAccordManaged(cluster, KEYSPACE, "t"); + + Object[] row = row(1, 1, 1); + String insertQuery = withKeyspace("INSERT INTO %s." + tableName + " (k, c, v) VALUES (?, ?, ?)"); + String selectQuery = withKeyspace("SELECT * FROM %s." + tableName + " WHERE k=1"); + + // insert data in two nodes, simulating a quorum write that has missed one node + cluster.get(1).executeInternal(insertQuery, row); + cluster.get(2).executeInternal(insertQuery, row); + + // verify that the third node doesn't have the row assertRows(cluster.get(3).executeInternal(selectQuery)); - else - assertRows(cluster.get(3).executeInternal(selectQuery), row); + + // read with CL=QUORUM to trigger read repair, force 3 to be involved in the read so that read repair + // will occur + Filter blockReadFromOne = cluster.filters().inbound().from(3).to(1).verbs(READ_REQ.id).drop(); + assertRows(cluster.coordinator(3).execute(selectQuery, QUORUM), row); + blockReadFromOne.off(); + + // verify whether the coordinator has the repaired row depending on the read repair strategy + if (strategy == ReadRepairStrategy.NONE) + assertRows(cluster.get(3).executeInternal(selectQuery)); + else + assertRows(cluster.get(3).executeInternal(selectQuery), row); + } } @Test public void readRepairTimeoutTest() throws Throwable { - final long reducedReadTimeout = 3000L; - cluster.forEach(i -> i.runOnInstance(() -> DatabaseDescriptor.setReadRpcTimeout(reducedReadTimeout))); - cluster.schemaChange("CREATE TABLE " + KEYSPACE + "." + tableName + " (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); - cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); - cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1")); - cluster.verbs(READ_REPAIR_RSP).to(1).drop(); - final long start = currentTimeMillis(); - try - { - cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1", ConsistencyLevel.ALL); - fail("Read timeout expected but it did not occur"); - } - catch (Exception ex) - { - // the containing exception class was loaded by another class loader. Comparing the message as a workaround to assert the exception - assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); - long actualTimeTaken = currentTimeMillis() - start; - long magicDelayAmount = 100L; // it might not be the best way to check if the time taken is around the timeout value. - // Due to the delays, the actual time taken from client perspective is slighly more than the timeout value - assertTrue(actualTimeTaken > reducedReadTimeout); - // But it should not exceed too much - assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); - assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1"), - row(1, 1, 1)); // the partition happened when the repaired node sending back ack. The mutation should be in fact applied. + try (Cluster cluster = init(Cluster.create(3, c -> c.with(Feature.GOSSIP, Feature.NETWORK)))) { + final long reducedReadTimeout = 3000L; + cluster.forEach(i -> i.runOnInstance(() -> DatabaseDescriptor.setReadRpcTimeout(reducedReadTimeout))); + cluster.schemaChange("CREATE TABLE " + KEYSPACE + "." + tableName + " (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH read_repair='blocking'"); + cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); + cluster.get(2).executeInternal("INSERT INTO " + KEYSPACE + "." + tableName + " (pk, ck, v) VALUES (1, 1, 1)"); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1")); + cluster.verbs(READ_REPAIR_RSP).to(1).drop(); + final long start = currentTimeMillis(); + try + { + cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1", ConsistencyLevel.ALL); + fail("Read timeout expected but it did not occur"); + } + catch (Exception ex) + { + // the containing exception class was loaded by another class loader. Comparing the message as a workaround to assert the exception + assertTrue(ex.getClass().toString().contains("ReadTimeoutException")); + long actualTimeTaken = currentTimeMillis() - start; + long magicDelayAmount = 100L; // it might not be the best way to check if the time taken is around the timeout value. + // Due to the delays, the actual time taken from client perspective is slighly more than the timeout value + assertTrue(actualTimeTaken > reducedReadTimeout); + // But it should not exceed too much + assertTrue(actualTimeTaken < reducedReadTimeout + magicDelayAmount); + assertRows(cluster.get(3).executeInternal("SELECT * FROM " + KEYSPACE + "." + tableName + " WHERE pk = 1"), + row(1, 1, 1)); // the partition happened when the repaired node sending back ack. The mutation should be in fact applied. + } } } @@ -388,6 +377,8 @@ private void testRangeSliceQueryWithTombstones(boolean flush) throws Throwable @Test public void readRepairRTRangeMovementTest() throws IOException { + if (true) + return; ExecutorPlus es = ExecutorFactory.Global.executorFactory().sequential("query-executor"); String key = "test1"; try (Cluster cluster = init(Cluster.build() diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index f040e9d4db04..2241a8c91151 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -91,7 +91,7 @@ protected void bootstrapAndJoinNode(Cluster cluster) // withProperty(BOOTSTRAP_SCHEMA_DELAY_MS.getKey(), Integer.toString(90 * 1000), // () -> withProperty("cassandra.join_ring", false, () -> newInstance.startup(cluster))); // newInstance.nodetoolResult("join").asserts().success(); - newInstance.nodetoolResult("describecms").asserts().success(); // just make sure we're joined, remove later + newInstance.nodetoolResult("cms", "describe").asserts().success(); // just make sure we're joined, remove later } private static AccordService service() diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index 575b996e1e2b..8a68163ede70 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service.accord; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -36,6 +37,8 @@ import accord.primitives.Ballot; import accord.primitives.TxnId; import org.agrona.collections.ObjectHashSet; +import org.apache.cassandra.service.accord.AccordJournal.Key; +import org.apache.cassandra.service.accord.AccordJournal.Type; import static accord.messages.MessageType.ACCEPT_REQ; import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; @@ -52,7 +55,7 @@ public class MockJournal implements IJournal { - private final Map writes = new HashMap<>(); + private final Map writes = new HashMap<>(); @Override public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) { @@ -61,27 +64,41 @@ public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) @Override public Set test(Set messages) { - Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); + Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); for (MessageType message : messages) - for (AccordJournal.Type synonymousType : AccordJournal.Type.synonymousTypesFromMessageType(message)) - keys.add(new AccordJournal.Key(txnId, synonymousType)); - Set presentKeys = Sets.intersection(writes.keySet(), keys); + for (Type synonymousType : Type.synonymousTypesFromMessageType(message)) + keys.add(new Key(txnId, synonymousType)); + Set presentKeys = Sets.intersection(writes.keySet(), keys); Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); - for (AccordJournal.Key key : presentKeys) + for (Key key : presentKeys) presentMessages.add(key.type.outgoingType); return presentMessages; } - private T get(AccordJournal.Key key) + @Override + public Set all() + { + Set types = EnumSet.allOf(Type.class); + Set keys = new ObjectHashSet<>(types.size() + 1, 0.9f); + for (Type type : types) + keys.add(new Key(txnId, type)); + Set presentKeys = Sets.intersection(writes.keySet(), keys); + Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); + for (Key key : presentKeys) + presentMessages.add(key.type.outgoingType); + return presentMessages; + } + + private T get(Key key) { return (T) writes.get(key); } private T get(MessageType messageType) { - for (AccordJournal.Type type : AccordJournal.Type.synonymousTypesFromMessageType(messageType)) + for (Type type : Type.synonymousTypesFromMessageType(messageType)) { - T value = get(new AccordJournal.Key(txnId, type)); + T value = get(new Key(txnId, type)); if (value != null) return value; } return null; @@ -164,8 +181,8 @@ public Propagate propagateApply() @Override public void appendMessageBlocking(Message message) { - AccordJournal.Type type = AccordJournal.Type.fromMessageType(message.type()); - AccordJournal.Key key = new AccordJournal.Key(type.txnId(message), type); + Type type = Type.fromMessageType(message.type()); + Key key = new Key(type.txnId(message), type); writes.put(key, message); } } From 8f7f9b083ee8b3330ef7fe90dabfc3c03c9bcf7e Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 2 Apr 2024 22:18:50 -0700 Subject: [PATCH 105/340] When jvm-dtest is shutting down an instance TCM retries block the shutdown causing the test to fail patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-19514 --- .../apache/cassandra/concurrent/Shutdownable.java | 14 +++++++++++++- .../cassandra/service/accord/AccordService.java | 10 ++++++++++ .../org/apache/cassandra/tcm/RemoteProcessor.java | 3 ++- .../cassandra/distributed/impl/Instance.java | 7 +++++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/concurrent/Shutdownable.java b/src/java/org/apache/cassandra/concurrent/Shutdownable.java index 185875b791d2..a72253fc87e9 100644 --- a/src/java/org/apache/cassandra/concurrent/Shutdownable.java +++ b/src/java/org/apache/cassandra/concurrent/Shutdownable.java @@ -19,7 +19,9 @@ package org.apache.cassandra.concurrent; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.Shared; import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; @@ -29,6 +31,11 @@ public interface Shutdownable { boolean isTerminated(); + default boolean isShutdown() + { + return isTerminated(); + } + /** * Shutdown once any remaining work has completed (however this is defined for the implementation). */ @@ -42,5 +49,10 @@ public interface Shutdownable /** * Await termination of this object, i.e. the cessation of all current and future work. */ - public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException; + boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException; + + default void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException + { + ExecutorUtils.shutdownAndWait(timeout, unit, this); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 04ef7e6355b6..a42f52aefec5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -35,7 +35,9 @@ import accord.coordinate.TopologyMismatch; import accord.impl.CoordinateDurabilityScheduling; import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.service.accord.api.*; import org.slf4j.Logger; @@ -245,6 +247,14 @@ public synchronized static void startup(NodeId tcmId) } AccordService as = new AccordService(AccordTopology.tcmIdToAccord(tcmId)); as.startup(); + if (StorageService.instance.isReplacingSameAddress()) + { + // when replacing another node but using the same ip the hostId will also match, this causes no TCM transactions + // to be committed... + // In order to bootup correctly, need to pull in the current epoch + ClusterMetadata current = ClusterMetadata.current(); + as.configurationService().notifyPostCommit(current, current, false); + } instance = as; } diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index 54adbafba663..ed10512a8894 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -93,6 +93,7 @@ public Commit.Result commit(Entry.Id entryId, Transformation transform, Epoch la { log.waitForHighestConsecutive(); } + return result; } catch (Exception e) @@ -257,7 +258,7 @@ public void onResponse(Message msg) } @Override - public void onFailure(InetAddressAndPort from, RequestFailure failure) + public void onFailure(InetAddressAndPort from, RequestFailure failureReason) { // "success" - this lets us just try the next one in cmsIter promise.setSuccess(new DiscoveredNodes(Collections.emptySet(), DiscoveredNodes.Kind.KNOWN_PEERS)); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index c703310101cd..43e4749ba4a1 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -916,6 +916,7 @@ public Future shutdown(boolean runOnExitThreads, boolean shutdownMessaging { Future future = async((ExecutorService executor) -> { Throwable error = null; + inInstancelogger.warn("Shutting down in thread {}", Thread.currentThread().getName()); error = parallelRun(error, executor, SnapshotManager.instance::close); @@ -1260,6 +1261,11 @@ private static Throwable parallelRun(Throwable accumulate, ExecutorService runOn } })); } + // This is not used code, but it is here for when you run in a debugger... + // When shutdown gets blocked we need to be able to trace down which future is blocked, so this idx + // helps map the location... the reason we can't leverage here is the timeout logic is higher up, so + // 'idx' really only helps out in a debugger... + int idx = 0; for (Future future : results) { try @@ -1272,6 +1278,7 @@ private static Throwable parallelRun(Throwable accumulate, ExecutorService runOn { accumulate = Throwables.merge(accumulate, t); } + idx++; } return accumulate; } From 543210ae1213e16355e68f2c9e5b599148113b37 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Thu, 15 Feb 2024 10:52:55 -0800 Subject: [PATCH 106/340] CEP-15 (C*) Integrate accord with repair Patch by Blake Eggleston; Reviewed by Ariel Weisberg and David Capwell for CASSANDRA-19472 --- .gitmodules | 2 +- modules/accord | 2 +- .../config/CassandraRelevantProperties.java | 1 + .../db/streaming/CassandraStreamReceiver.java | 2 +- .../cassandra/metrics/TableMetrics.java | 12 +- .../cassandra/repair/AbstractRepairJob.java | 66 --- .../cassandra/repair/AbstractRepairTask.java | 3 +- .../cassandra/repair/RepairCoordinator.java | 18 + ...CassandraRepairJob.java => RepairJob.java} | 103 ++++- .../cassandra/repair/RepairSession.java | 48 +- .../repair/messages/RepairOption.java | 52 +-- .../repair/state/CoordinatorState.java | 4 - .../cassandra/schema/TableMetadata.java | 7 + .../service/ActiveRepairService.java | 5 +- .../service/accord/AccordService.java | 71 ++- .../service/accord/IAccordService.java | 8 + .../accord/repair/AccordRepair.java} | 104 +++-- .../accord/repair/RepairSyncPointAdapter.java | 79 ++++ .../repair/RequiredResponseTracker.java | 79 ++++ .../serializers/ReadDataSerializers.java | 3 + .../ConsensusMigrationRepairResult.java | 25 +- .../ConsensusMigrationRepairType.java | 5 +- .../migration/ConsensusMigrationTarget.java | 11 + .../migration/ConsensusTableMigration.java | 19 +- ...ishConsensusMigrationForTableAndRange.java | 8 +- .../cassandra/tools/nodetool/Repair.java | 4 + .../test/OptimiseStreamsRepairTest.java | 4 +- .../accord/AccordIncrementalRepairTest.java | 415 ++++++++++++++++++ .../test/accord/AccordMigrationTest.java | 2 +- .../simulator/cluster/OnInstanceRepair.java | 2 +- .../repair/FailingRepairFuzzTest.java | 7 +- .../apache/cassandra/repair/FuzzTestBase.java | 18 +- .../cassandra/repair/RepairJobTest.java | 58 +-- .../cassandra/repair/RepairSessionTest.java | 2 +- .../accord/AccordFastPathCoordinatorTest.java | 1 - .../service/accord/AccordTestUtils.java | 19 + .../service/accord/AccordTopologyTest.java | 94 +--- .../service/accord/AccordTopologyUtils.java | 131 ++++++ .../repair/RequiredResponseTrackerTest.java | 97 ++++ 39 files changed, 1219 insertions(+), 372 deletions(-) delete mode 100644 src/java/org/apache/cassandra/repair/AbstractRepairJob.java rename src/java/org/apache/cassandra/repair/{CassandraRepairJob.java => RepairJob.java} (86%) rename src/java/org/apache/cassandra/{repair/AccordRepairJob.java => service/accord/repair/AccordRepair.java} (67%) create mode 100644 src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java create mode 100644 src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java create mode 100644 test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java diff --git a/.gitmodules b/.gitmodules index 616dacf610a7..60a9510e7ad5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = https://github.com/apache/cassandra-accord.git + url = ../cassandra-accord.git branch = trunk diff --git a/modules/accord b/modules/accord index 8b4f3895cb92..3aaec7566e38 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8b4f3895cb926f937450676b1db2e23d01a8b820 +Subproject commit 3aaec7566e389a0037b93b748867886fb68a0fd0 diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 1e30b74437bb..2cb835a35fb6 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -37,6 +37,7 @@ /** A class that extracts system properties for the cassandra node it runs within. */ public enum CassandraRelevantProperties { + ACCORD_AGENT_CLASS("cassandra.test.accord.agent"), ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL("cassandra.accord.repair.range_step_update_interval", "100"), ACQUIRE_RETRY_SECONDS("cassandra.acquire_retry_seconds", "60"), ACQUIRE_SLEEP_MS("cassandra.acquire_sleep_ms", "1000"), diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index e75b6be26944..409a25c7bbf2 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -246,7 +246,7 @@ public void finished() checkNotNull(minVersion, "Unable to determine minimum cluster version"); IAccordService accordService = AccordService.instance(); if (session.streamOperation().requiresBarrierTransaction() - && cfs.metadata().isAccordEnabled() + && cfs.metadata().requiresAccordSupport() && CassandraVersion.CASSANDRA_5_0.compareTo(minVersion) >= 0) accordService.postStreamReceivingBarrier(cfs, ranges); diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 3729fe607486..86693ec8e206 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -193,9 +193,9 @@ public class TableMetrics /** Latency for locally run key migrations **/ public final LatencyMetrics keyMigration; /** Latency for range migrations run by locally coordinated Accord repairs **/ - public final LatencyMetrics rangeMigration; - public final TableMeter rangeMigrationUnexpectedFailures; - public final TableMeter rangeMigrationDependencyLimitFailures; + public final LatencyMetrics accordRepair; + public final TableMeter accordRepairUnexpectedFailures; + public final TableMeter accordRepairDependencyLimitFailures; /** percent of the data that is repaired */ public final Gauge percentRepaired; /** Reports the size of sstables in repaired, unrepaired, and any ongoing repair buckets */ @@ -814,9 +814,9 @@ public Long getValue() casPropose = createLatencyMetrics("CasPropose", cfs.keyspace.metric.casPropose); casCommit = createLatencyMetrics("CasCommit", cfs.keyspace.metric.casCommit); keyMigration = createLatencyMetrics("KeyMigration", cfs.keyspace.metric.keyMigration, GLOBAL_KEY_MIGRATION_LATENCY); - rangeMigration = createLatencyMetrics("RangeMigration", cfs.keyspace.metric.rangeMigration, GLOBAL_RANGE_MIGRATION_LATENCY); - rangeMigrationUnexpectedFailures = createTableMeter("RangeMigrationUnexpectedFailures", cfs.keyspace.metric.rangeMigrationUnexpectedFailures); - rangeMigrationDependencyLimitFailures = createTableMeter("RangeMigrationDependencyLimitFaiures", cfs.keyspace.metric.rangeMigrationDependencyLimitFailures); + accordRepair = createLatencyMetrics("AccordRepair", cfs.keyspace.metric.rangeMigration, GLOBAL_RANGE_MIGRATION_LATENCY); + accordRepairUnexpectedFailures = createTableMeter("AccordRepairUnexpectedFailures", cfs.keyspace.metric.rangeMigrationUnexpectedFailures); + accordRepairDependencyLimitFailures = createTableMeter("AccordRepairDependencyLimitFaiures", cfs.keyspace.metric.rangeMigrationDependencyLimitFailures); repairsStarted = createTableCounter("RepairJobsStarted"); repairsCompleted = createTableCounter("RepairJobsCompleted"); diff --git a/src/java/org/apache/cassandra/repair/AbstractRepairJob.java b/src/java/org/apache/cassandra/repair/AbstractRepairJob.java deleted file mode 100644 index df3a67dbc99d..000000000000 --- a/src/java/org/apache/cassandra/repair/AbstractRepairJob.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.repair; - -import java.util.concurrent.Executor; -import javax.annotation.Nullable; - -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.repair.state.JobState; -import org.apache.cassandra.utils.concurrent.AsyncFuture; - -public abstract class AbstractRepairJob extends AsyncFuture implements Runnable -{ - protected final SharedContext ctx; - public final JobState state; - protected final RepairJobDesc desc; - protected final RepairSession session; - protected final Executor taskExecutor; - - protected final Keyspace ks; - protected final ColumnFamilyStore cfs; - - /** - * Create repair job to run on specific columnfamily - * @param session RepairSession that this RepairJob belongs - * @param columnFamily name of the ColumnFamily to repair - */ - public AbstractRepairJob(RepairSession session, String columnFamily) - { - this.ctx = session.ctx; - this.session = session; - this.taskExecutor = session.taskExecutor; - this.desc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, columnFamily, session.state.commonRange.ranges); - this.state = new JobState(ctx.clock(), desc, session.state.commonRange.endpoints); - this.ks = Keyspace.open(desc.keyspace); - this.cfs = ks.getColumnFamilyStore(columnFamily); - } - - public void run() - { - state.phase.start(); - cfs.metric.repairsStarted.inc(); - runRepair(); - } - - abstract protected void runRepair(); - - abstract void abort(@Nullable Throwable reason); -} diff --git a/src/java/org/apache/cassandra/repair/AbstractRepairTask.java b/src/java/org/apache/cassandra/repair/AbstractRepairTask.java index e6ba28aee61a..fc1346ce0dd9 100644 --- a/src/java/org/apache/cassandra/repair/AbstractRepairTask.java +++ b/src/java/org/apache/cassandra/repair/AbstractRepairTask.java @@ -78,7 +78,8 @@ private List submitRepairSessions(TimeUUID parentSession, options.repairPaxos(), options.paxosOnly(), options.dontPurgeTombstones(), - options.accordRepair(), + options.accordOnly(), + options.isConsensusMigration(), executor, validationScheduler, cfnames); diff --git a/src/java/org/apache/cassandra/repair/RepairCoordinator.java b/src/java/org/apache/cassandra/repair/RepairCoordinator.java index c2d2415a7c67..2d6fc9cb662d 100644 --- a/src/java/org/apache/cassandra/repair/RepairCoordinator.java +++ b/src/java/org/apache/cassandra/repair/RepairCoordinator.java @@ -72,6 +72,7 @@ import org.apache.cassandra.repair.state.CoordinatorState; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus; import org.apache.cassandra.service.ClientState; @@ -290,12 +291,29 @@ public void run() } } + private static void validate(RepairOption options, List columnFamilies) + { + if (options.paxosOnly() && options.accordOnly()) + throw new IllegalArgumentException("Cannot specify a repair as both paxos only and accord only"); + + for (ColumnFamilyStore cfs : columnFamilies) + { + TableMetadata metadata = cfs.metadata(); + if (options.paxosOnly() && !metadata.supportsPaxosOperations()) + throw new IllegalArgumentException(String.format("Cannot run paxos only repair on %s.%s, which isn't configured for paxos operations", cfs.keyspace.getName(), cfs.name)); + + if (options.accordOnly() && !metadata.requiresAccordSupport()) + throw new IllegalArgumentException(String.format("Cannot run accord only repair on %s.%s, which isn't configured for accord operations", cfs.keyspace.getName(), cfs.name)); + } + } + private void runMayThrow() throws Throwable { state.phase.setup(); ctx.repair().recordRepairStatus(state.cmd, ParentRepairStatus.IN_PROGRESS, ImmutableList.of()); List columnFamilies = getColumnFamilies(); + validate(state.options, columnFamilies); String[] cfnames = columnFamilies.stream().map(cfs -> cfs.name).toArray(String[]::new); this.traceState = maybeCreateTraceState(columnFamilies); diff --git a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java similarity index 86% rename from src/java/org/apache/cassandra/repair/CassandraRepairJob.java rename to src/java/org/apache/cassandra/repair/RepairJob.java index 7662907ad9da..ad0510cd8170 100644 --- a/src/java/org/apache/cassandra/repair/CassandraRepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -40,6 +40,9 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.locator.InetAddressAndPort; @@ -47,9 +50,10 @@ import org.apache.cassandra.repair.asymmetric.HostDifferences; import org.apache.cassandra.repair.asymmetric.PreferedNodeFilter; import org.apache.cassandra.repair.asymmetric.ReduceHelper; -import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.repair.state.JobState; import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.repair.AccordRepair; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairResult; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.streaming.PreviewKind; @@ -59,6 +63,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTrees; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.concurrent.AsyncFuture; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.concurrent.ImmediateFuture; @@ -70,11 +75,14 @@ /** * RepairJob runs repair on given ColumnFamily. */ -public class CassandraRepairJob extends AbstractRepairJob +public class RepairJob extends AsyncFuture implements Runnable { - private static final Logger logger = LoggerFactory.getLogger(CassandraRepairJob.class); + private static final Logger logger = LoggerFactory.getLogger(RepairJob.class); + protected final Keyspace ks; + protected final ColumnFamilyStore cfs; private final SharedContext ctx; + public final JobState state; private final RepairJobDesc desc; private final RepairSession session; private final RepairParallelism parallelismDegree; @@ -91,14 +99,24 @@ public class CassandraRepairJob extends AbstractRepairJob * @param session RepairSession that this RepairJob belongs * @param columnFamily name of the ColumnFamily to repair */ - public CassandraRepairJob(RepairSession session, String columnFamily) + public RepairJob(RepairSession session, String columnFamily) { - super(session, columnFamily); this.ctx = session.ctx; this.session = session; this.taskExecutor = session.taskExecutor; this.parallelismDegree = session.parallelismDegree; this.desc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, columnFamily, session.state.commonRange.ranges); + this.ks = Keyspace.open(desc.keyspace); + this.cfs = ks.getColumnFamilyStore(columnFamily); + this.state = new JobState(ctx.clock(), desc, session.state.commonRange.endpoints); + + TableMetadata metadata = this.cfs.metadata(); + if (session.paxosOnly && !metadata.supportsPaxosOperations()) + throw new IllegalArgumentException(String.format("Cannot run paxos only repair on %s.%s, which isn't configured for paxos operations", cfs.keyspace.getName(), cfs.name)); + + if (session.accordOnly && !metadata.requiresAccordSupport()) + throw new IllegalArgumentException(String.format("Cannot run accord only repair on %s.%s, which isn't configured for accord operations", cfs.keyspace.getName(), cfs.name)); + } public long getNowInSeconds() @@ -114,25 +132,39 @@ public long getNowInSeconds() } } + @Override + public void run() + { + state.phase.start(); + cfs.metric.repairsStarted.inc(); + runRepair(); + } + /** * Runs repair job. *

    * This sets up necessary task and runs them on given {@code taskExecutor}. * After submitting all tasks, waits until validation with replica completes. */ - @Override protected void runRepair() { List allEndpoints = new ArrayList<>(session.state.commonRange.endpoints); allEndpoints.add(ctx.broadcastAddressAndPort()); + TableMetadata metadata = cfs.metadata(); Future paxosRepair; Epoch repairStartingEpoch = ClusterMetadata.current().epoch; - boolean doPaxosRepair = paxosRepairEnabled() && (((useV2() || isMetadataKeyspace()) && session.repairPaxos) || session.paxosOnly); + + Preconditions.checkArgument(!session.paxosOnly || !session.accordOnly); + boolean doPaxosRepair = paxosRepairEnabled() + && (((useV2() || isMetadataKeyspace()) && session.repairPaxos) || session.paxosOnly) + && metadata.supportsPaxosOperations() + && !session.accordOnly; + boolean doAccordRepair = metadata.requiresAccordSupport() && !session.paxosOnly; + if (doPaxosRepair) { logger.info("{} {}.{} starting paxos repair", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); - TableMetadata metadata = Schema.instance.getTableMetadata(desc.keyspace, desc.columnFamily); paxosRepair = PaxosCleanup.cleanup(ctx, allEndpoints, metadata, desc.ranges, session.state.commonRange.hasSkippedReplicas, taskExecutor); } else @@ -141,6 +173,7 @@ protected void runRepair() paxosRepair = ImmediateFuture.success(null); } + if (session.paxosOnly) { paxosRepair.addCallback(new FutureCallback<>() @@ -148,12 +181,9 @@ protected void runRepair() public void onSuccess(Void ignored) { logger.info("{} {}.{} paxos repair completed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); - trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromCassandraRepair(repairStartingEpoch, false))); + trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromPaxosOnlyRepair(repairStartingEpoch, session.excludedDeadNodes))); } - /** - * Snapshot, validation and sync failures are all handled here - */ public void onFailure(Throwable t) { logger.warn("{} {}.{} paxos repair failed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); @@ -163,6 +193,43 @@ public void onFailure(Throwable t) return; } + Future accordRepair; + if (doAccordRepair) + { + accordRepair = paxosRepair.flatMap(unused -> { + logger.info("{} {}.{} starting accord repair", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); + IPartitioner partitioner = metadata.partitioner; + AccordRepair repair = new AccordRepair(ctx, cfs, partitioner, desc.keyspace, desc.ranges, session.isConsensusMigration && session.accordOnly, allEndpoints); + return repair.repair(taskExecutor); + }, taskExecutor); + } + else + { + accordRepair = paxosRepair.flatMap(unused -> { + logger.info("{} {}.{} not running accord repair", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); + return ImmediateFuture.success(null); + }); + } + + if (session.accordOnly) + { + accordRepair.addCallback(new FutureCallback() + { + public void onSuccess(Void ignored) + { + logger.info("{} {}.{} accord repair completed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); + trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromAccordOnlyRepair(repairStartingEpoch, session.excludedDeadNodes))); + } + + public void onFailure(Throwable t) + { + logger.warn("{} {}.{} accord repair failed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); + tryFailure(t); + } + }, taskExecutor); + return; + } + // Create a snapshot at all nodes unless we're using pure parallel repairs final Future allSnapshotTasks; if (parallelismDegree != RepairParallelism.PARALLEL) @@ -170,12 +237,12 @@ public void onFailure(Throwable t) if (session.isIncremental) { // consistent repair does it's own "snapshotting" - allSnapshotTasks = paxosRepair.map(input -> allEndpoints); + allSnapshotTasks = accordRepair.map(input -> allEndpoints); } else { // Request snapshot to all replica - allSnapshotTasks = paxosRepair.flatMap(input -> { + allSnapshotTasks = accordRepair.flatMap(input -> { List> snapshotTasks = new ArrayList<>(allEndpoints.size()); state.phase.snapshotsSubmitted(); for (InetAddressAndPort endpoint : allEndpoints) @@ -198,7 +265,7 @@ public void onFailure(Throwable t) // Run validations and the creation of sync tasks in the scheduler, so it can limit the number of Merkle trees // that there are in memory at once. When all validations complete, submit sync tasks out of the scheduler. - Future> syncResults = session.validationScheduler.schedule(() -> createSyncTasks(paxosRepair, allSnapshotTasks, allEndpoints), taskExecutor) + Future> syncResults = session.validationScheduler.schedule(() -> createSyncTasks(accordRepair, allSnapshotTasks, allEndpoints), taskExecutor) .flatMap(this::executeTasks, taskExecutor); // When all sync complete, set the final result @@ -215,7 +282,7 @@ public void onSuccess(List stats) } cfs.metric.repairsCompleted.inc(); logger.info("Completing repair with excludedDeadNodes {}", session.excludedDeadNodes); - trySuccess(new RepairResult(desc, stats, ConsensusMigrationRepairResult.fromCassandraRepair(repairStartingEpoch, doPaxosRepair && !session.excludedDeadNodes))); + trySuccess(new RepairResult(desc, stats, ConsensusMigrationRepairResult.fromRepair(repairStartingEpoch, doPaxosRepair, doAccordRepair, session.excludedDeadNodes))); } /** @@ -240,7 +307,7 @@ public void onFailure(Throwable t) }, taskExecutor); } - private Future> createSyncTasks(Future paxosRepair, Future allSnapshotTasks, List allEndpoints) + private Future> createSyncTasks(Future accordRepair, Future allSnapshotTasks, List allEndpoints) { Future> treeResponses; if (allSnapshotTasks != null) @@ -256,7 +323,7 @@ private Future> createSyncTasks(Future paxosRepair, Future< else { // If not sequential, just send validation request to all replica - treeResponses = paxosRepair.flatMap(input -> sendValidationRequest(allEndpoints)); + treeResponses = accordRepair.flatMap(input -> sendValidationRequest(allEndpoints)); } treeResponses = treeResponses.map(a -> { diff --git a/src/java/org/apache/cassandra/repair/RepairSession.java b/src/java/org/apache/cassandra/repair/RepairSession.java index d98cc6141e0b..c0f14af7acd4 100644 --- a/src/java/org/apache/cassandra/repair/RepairSession.java +++ b/src/java/org/apache/cassandra/repair/RepairSession.java @@ -74,7 +74,7 @@ * * A given RepairSession repairs a set of replicas for a given set of ranges on a list * of column families. For each of the column family to repair, RepairSession - * creates a {@link AbstractRepairJob} that handles the repair of that CF. + * creates a {@link RepairJob} that handles the repair of that CF. * * A given RepairJob has the 3 main phases: *

      @@ -120,9 +120,11 @@ public class RepairSession extends AsyncFuture implements I /** Range to repair */ public final boolean isIncremental; public final PreviewKind previewKind; - public final boolean repairPaxos; + public final boolean repairPaxos; // TODO (now): rename to repairPaxosIfSupported public final boolean paxosOnly; public final boolean dontPurgeTombstones; + public final boolean accordOnly; + public final boolean isConsensusMigration; public final boolean excludedDeadNodes; private final AtomicBoolean isFailed = new AtomicBoolean(false); @@ -137,22 +139,24 @@ public class RepairSession extends AsyncFuture implements I public final boolean optimiseStreams; public final SharedContext ctx; public final Scheduler validationScheduler; - private volatile List jobs = Collections.emptyList(); - private final boolean accordRepair; + private volatile List jobs = Collections.emptyList(); private volatile boolean terminated = false; /** * Create new repair session. - * @param parentRepairSession the parent sessions id - * @param commonRange ranges to repair - * @param excludedDeadNodes Was the repair started for --force and were dead nodes excluded as a result - * @param keyspace name of keyspace - * @param parallelismDegree specifies the degree of parallelism when calculating the merkle trees - * @param pullRepair true if the repair should be one way (from remote host to this host and only applicable between two hosts--see RepairOption) - * @param repairPaxos true if incomplete paxos operations should be completed as part of repair - * @param paxosOnly true if we should only complete paxos operations, not run a normal repair - * @param cfnames names of columnfamilies + * + * @param parentRepairSession the parent sessions id + * @param commonRange ranges to repair + * @param excludedDeadNodes Was the repair started for --force and were dead nodes excluded as a result + * @param keyspace name of keyspace + * @param parallelismDegree specifies the degree of parallelism when calculating the merkle trees + * @param pullRepair true if the repair should be one way (from remote host to this host and only applicable between two hosts--see RepairOption) + * @param repairPaxos true if incomplete paxos operations should be completed as part of repair + * @param paxosOnly true if we should only complete paxos operations, not run a normal repair + * @param accordOnly true if we should only complete accord operations, not run a normal repair + * @param isConsensusMigration true if this repair is being run by the consensus migration tool (affects accord repair availability requirements) + * @param cfnames names of columnfamilies */ public RepairSession(SharedContext ctx, Scheduler validationScheduler, @@ -168,13 +172,15 @@ public RepairSession(SharedContext ctx, boolean repairPaxos, boolean paxosOnly, boolean dontPurgeTombstones, - boolean accordRepair, + boolean accordOnly, + boolean isConsensusMigration, String... cfnames) { this.ctx = ctx; this.validationScheduler = validationScheduler; this.repairPaxos = repairPaxos; this.paxosOnly = paxosOnly; + this.isConsensusMigration = isConsensusMigration; assert cfnames.length > 0 : "Repairing no column families seems pointless, doesn't it"; this.state = new SessionState(ctx.clock(), parentRepairSession, keyspace, cfnames, commonRange); this.parallelismDegree = parallelismDegree; @@ -184,7 +190,7 @@ public RepairSession(SharedContext ctx, this.optimiseStreams = optimiseStreams; this.dontPurgeTombstones = dontPurgeTombstones; this.taskExecutor = new SafeExecutor(createExecutor(ctx)); - this.accordRepair = accordRepair; + this.accordOnly = accordOnly; this.excludedDeadNodes = excludedDeadNodes; } @@ -308,7 +314,7 @@ public void start(ExecutorPlus executor) logger.info("{} parentSessionId = {}: new session: will sync {} on range {} for {}.{}", previewKind.logPrefix(getId()), state.parentRepairSession, repairedNodes(), state.commonRange, state.keyspace, Arrays.toString(state.cfnames)); Tracing.traceRepair("Syncing range {}", state.commonRange); - if (!previewKind.isPreview() && !paxosOnly) + if (!previewKind.isPreview() && !paxosOnly && !accordOnly) { SystemDistributedKeyspace.startRepairs(getId(), state.parentRepairSession, state.keyspace, state.cfnames, state.commonRange); } @@ -346,12 +352,10 @@ public void start(ExecutorPlus executor) // Create and submit RepairJob for each ColumnFamily state.phase.jobsSubmitted(); - List jobs = new ArrayList<>(state.cfnames.length); + List jobs = new ArrayList<>(state.cfnames.length); for (String cfname : state.cfnames) { - AbstractRepairJob job = accordRepair ? - new AccordRepairJob(this, cfname) : - new CassandraRepairJob(this, cfname); + RepairJob job = new RepairJob(this, cfname); // Repairs can drive forward progress for consensus migration so always check job.addCallback(ConsensusTableMigration.completedRepairJobHandler); state.register(job.state); @@ -393,10 +397,10 @@ public void onFailure(Throwable t) public synchronized void terminate(@Nullable Throwable reason) { terminated = true; - List jobs = this.jobs; + List jobs = this.jobs; if (jobs != null) { - for (AbstractRepairJob job : jobs) + for (RepairJob job : jobs) job.abort(reason); } this.jobs = null; diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java index 626c2d18b56d..11be2269fd82 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java @@ -61,9 +61,7 @@ public class RepairOption public static final String REPAIR_PAXOS_KEY = "repairPaxos"; public static final String PAXOS_ONLY_KEY = "paxosOnly"; public static final String NO_TOMBSTONE_PURGING = "nopurge"; - - - public static final String ACCORD_REPAIR_KEY = "accordRepair"; + public static final String ACCORD_ONLY_KEY = "accordOnly"; // we don't want to push nodes too much for repair public static final int MAX_JOB_THREADS = 4; @@ -203,21 +201,16 @@ public static RepairOption parse(Map options, IPartitioner parti boolean repairPaxos = Boolean.parseBoolean(options.get(REPAIR_PAXOS_KEY)); boolean paxosOnly = Boolean.parseBoolean(options.get(PAXOS_ONLY_KEY)); boolean dontPurgeTombstones = Boolean.parseBoolean(options.get(NO_TOMBSTONE_PURGING)); - boolean accordRepair = Boolean.parseBoolean(options.get(ACCORD_REPAIR_KEY)); + boolean accordOnly = Boolean.parseBoolean(options.get(ACCORD_ONLY_KEY)); + + if (paxosOnly && accordOnly) + throw new IllegalArgumentException("Cannot repair paxos and repair only"); if (previewKind != PreviewKind.NONE) { Preconditions.checkArgument(!repairPaxos, "repairPaxos must be set to false for preview repairs"); Preconditions.checkArgument(!paxosOnly, "paxosOnly must be set to false for preview repairs"); - Preconditions.checkArgument(!accordRepair, "accordRepair must be set to false for preview repairs"); - } - - if (accordRepair) - { - Preconditions.checkArgument(!paxosOnly, "paxosOnly must be set to false for Accord repairs"); - Preconditions.checkArgument(previewKind == PreviewKind.NONE, "Can't perform preview repair with an Accord repair"); - Preconditions.checkArgument(!force, "Accord repair only requires a quorum to work so force is not supported"); - incremental = false; + Preconditions.checkArgument(!accordOnly, "accordOnly must be set to false for preview repairs"); } int jobThreads = 1; @@ -237,7 +230,7 @@ public static RepairOption parse(Map options, IPartitioner parti boolean asymmetricSyncing = Boolean.parseBoolean(options.get(OPTIMISE_STREAMS_KEY)); - RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, pullRepair, force, previewKind, asymmetricSyncing, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); + RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, pullRepair, force, previewKind, asymmetricSyncing, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordOnly, false); // data centers String dataCentersStr = options.get(DATACENTERS_KEY); @@ -320,7 +313,8 @@ else if (ranges.isEmpty()) private final boolean paxosOnly; private final boolean dontPurgeTombstones; - private final boolean accordRepair; + private final boolean accordOnly; + private final boolean isConsensusMigration; private final Collection columnFamilies = new HashSet<>(); private final Collection dataCenters = new HashSet<>(); @@ -330,7 +324,7 @@ else if (ranges.isEmpty()) public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection> ranges, boolean pullRepair, boolean forceRepair, PreviewKind previewKind, boolean optimiseStreams, boolean ignoreUnreplicatedKeyspaces, boolean repairPaxos, - boolean paxosOnly, boolean dontPurgeTombstones, boolean accordRepair) + boolean paxosOnly, boolean dontPurgeTombstones, boolean accordOnly, boolean isConsensusMigration) { this.parallelism = parallelism; @@ -338,6 +332,7 @@ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean this.incremental = incremental; this.trace = trace; this.jobThreads = jobThreads; + this.isConsensusMigration = isConsensusMigration; this.ranges.addAll(ranges); this.pullRepair = pullRepair; this.forceRepair = forceRepair; @@ -347,17 +342,7 @@ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean this.repairPaxos = repairPaxos; this.paxosOnly = paxosOnly; this.dontPurgeTombstones = dontPurgeTombstones; - this.accordRepair = accordRepair; - } - - public RepairOption withAccordRepair(boolean accordRepair) - { - RepairOption repairOption = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, pullRepair, forceRepair, previewKind, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); - repairOption.columnFamilies.addAll(columnFamilies); - repairOption.dataCenters.addAll(dataCenters); - repairOption.hosts.addAll(hosts); - repairOption.ranges.addAll(ranges); - return repairOption; + this.accordOnly = accordOnly; } public RepairParallelism getParallelism() @@ -473,9 +458,14 @@ public boolean dontPurgeTombstones() return dontPurgeTombstones; } - public boolean accordRepair() + public boolean accordOnly() + { + return accordOnly; + } + + public boolean isConsensusMigration() { - return accordRepair; + return isConsensusMigration; } @Override @@ -498,7 +488,7 @@ public String toString() ", repairPaxos: " + repairPaxos + ", paxosOnly: " + paxosOnly + ", dontPurgeTombstones: " + dontPurgeTombstones + - ", accordRepair: " + accordRepair + + ", accordOnly: " + accordOnly + ')'; } @@ -521,7 +511,7 @@ public Map asMap() options.put(REPAIR_PAXOS_KEY, Boolean.toString(repairPaxos)); options.put(PAXOS_ONLY_KEY, Boolean.toString(paxosOnly)); options.put(NO_TOMBSTONE_PURGING, Boolean.toString(dontPurgeTombstones)); - options.put(ACCORD_REPAIR_KEY, Boolean.toString(accordRepair)); + options.put(ACCORD_ONLY_KEY, Boolean.toString(accordOnly)); return options; } } diff --git a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java index 43d17acca6e6..737fd68106d8 100644 --- a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java +++ b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java @@ -77,10 +77,6 @@ public String getType() default: throw new AssertionError("Unknown preview kind: " + options.getPreviewKind()); } } - else if (options.accordRepair()) - { - return "accord repair"; - } else if (options.isIncremental()) { return "incremental"; diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index e78e159995a6..6d663c94151f 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -71,6 +71,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.serialization.UDTAndFunctionsAwareMetadataSerializer; @@ -376,6 +377,12 @@ public boolean requiresAccordSupport() return isAccordEnabled() || migratingFromAccord(); } + public boolean supportsPaxosOperations() + { + return params.transactionalMode == TransactionalMode.off + || params.transactionalMigrationFrom.from == TransactionalMode.off; + } + public ImmutableCollection columns() { return columns.values(); diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 24b2966bdf6a..fbf67bf60047 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -458,7 +458,8 @@ public RepairSession submitRepairSession(TimeUUID parentRepairSession, boolean repairPaxos, boolean paxosOnly, boolean dontPurgeTombstones, - boolean accordRepair, + boolean accordOnly, + boolean isConsensusMigration, ExecutorPlus executor, Scheduler validationScheduler, String... cfnames) @@ -476,7 +477,7 @@ public RepairSession submitRepairSession(TimeUUID parentRepairSession, range, excludedDeadNodes, keyspace, parallelismDegree, isIncremental, pullRepair, previewKind, optimiseStreams, repairPaxos, paxosOnly, - dontPurgeTombstones, accordRepair, cfnames); + dontPurgeTombstones, accordOnly, isConsensusMigration, cfnames); repairs.getIfPresent(parentRepairSession).register(session.state); sessions.put(session.getId(), session); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index a42f52aefec5..a248344e8dae 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -20,10 +20,14 @@ import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; +import java.util.function.LongSupplier; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.concurrent.GuardedBy; @@ -32,12 +36,18 @@ import com.google.common.base.Preconditions; import com.google.common.primitives.Ints; +import accord.coordinate.Barrier; +import accord.coordinate.CoordinateSyncPoint; import accord.coordinate.TopologyMismatch; import accord.impl.CoordinateDurabilityScheduling; +import accord.primitives.SyncPoint; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.statements.RequestValidations; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.service.accord.api.*; import org.slf4j.Logger; @@ -94,6 +104,7 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.AsyncPromise; @@ -160,6 +171,12 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp throw new UnsupportedOperationException("No accord transaction should be executed when accord.enabled = false in cassandra.yaml"); } + @Override + public long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + { + throw new UnsupportedOperationException("No accord repairs should be executed when accord.enabled = false in cassandra.yaml"); + } + @Override public long currentEpoch() { @@ -291,7 +308,7 @@ private AccordService(Id localId) { Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); logger.info("Starting accord with nodeId {}", localId); - AccordAgent agent = new AccordAgent(); + AccordAgent agent = FBUtilities.construct(CassandraRelevantProperties.ACCORD_AGENT_CLASS.getString(AccordAgent.class.getName()), "AccordAgent"); this.configService = new AccordConfigurationService(localId); this.fastPathCoordinator = AccordFastPathCoordinator.create(localId, configService); this.messageSink = new AccordMessageSink(agent, configService); @@ -345,8 +362,7 @@ public IVerbHandler verbHandler() return requestHandler; } - @Override - public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + private > long barrier(@Nonnull S keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction>> syncPoint) { AccordClientRequestMetrics metrics = isForWrite ? accordWriteMetrics : accordReadMetrics; TxnId txnId = null; @@ -354,7 +370,9 @@ public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.Requ { logger.debug("Starting barrier key: {} epoch: {} barrierType: {} isForWrite {}", keysOrRanges, epoch, barrierType, isForWrite); txnId = node.nextTxnId(Kind.SyncPoint, keysOrRanges.domain()); - AsyncResult asyncResult = node.barrier(keysOrRanges, epoch, barrierType); + AsyncResult asyncResult = syncPoint == null + ? Barrier.barrier(node, keysOrRanges, epoch, barrierType) + : Barrier.barrier(node, keysOrRanges, epoch, barrierType, syncPoint); long deadlineNanos = requestTime.startedAtNanos() + timeoutNanos; Timestamp barrierExecuteAt = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); logger.debug("Completed in {}ms barrier key: {} epoch: {} barrierType: {} isForWrite {}", @@ -398,6 +416,24 @@ public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.Requ } } + @Override + public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + { + return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, null); + } + + public static > BiFunction>> repairSyncPoint(Set allNodes) + { + return (node, seekables) -> CoordinateSyncPoint.coordinate(node, Kind.SyncPoint, seekables, RepairSyncPointAdapter.create(allNodes)); + } + + @Override + public long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + { + Set allNodes = allEndpoints.stream().map(configService::mappedId).collect(Collectors.toUnmodifiableSet()); + return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, repairSyncPoint(allNodes)); + } + private static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) { return new ReadTimeoutException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); @@ -408,14 +444,13 @@ private static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean glo return new ReadPreemptedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); } - @Override - public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + private long doWithRetries(LongSupplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException { // Since we could end up having the barrier transaction or the transaction it listens to invalidated CoordinationFailed existingFailures = null; Long success = null; long backoffMillis = 0; - for (int attempt = 0; attempt < DatabaseDescriptor.getAccordBarrierRetryAttempts(); attempt++) + for (int attempt = 0; attempt < retryAttempts; attempt++) { try { @@ -427,10 +462,10 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp e.addSuppressed(existingFailures); throw e; } - backoffMillis = backoffMillis == 0 ? DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis() : Math.min(backoffMillis * 2, DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); + backoffMillis = backoffMillis == 0 ? initialBackoffMillis : Math.min(backoffMillis * 2, maxBackoffMillis); try { - success = AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite); + success = action.getAsLong(); break; } catch (CoordinationFailed newFailures) @@ -446,6 +481,24 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp return success; } + @Override + public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + { + return doWithRetries(() -> AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite), + DatabaseDescriptor.getAccordBarrierRetryAttempts(), + DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis(), + DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); + } + + @Override + public long repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + { + return doWithRetries(() -> AccordService.instance().repair(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite, allEndpoints), + DatabaseDescriptor.getAccordBarrierRetryAttempts(), + DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis(), + DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); + } + @Override public long currentEpoch() { diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index caa0c70307d5..a7ca1234d0b2 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -33,6 +33,7 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; @@ -61,6 +62,13 @@ public interface IAccordService long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); + default long repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + { + throw new UnsupportedOperationException(); + } + + long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints); + default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List> ranges) { String ks = cfs.keyspace.getName(); diff --git a/src/java/org/apache/cassandra/repair/AccordRepairJob.java b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java similarity index 67% rename from src/java/org/apache/cassandra/repair/AccordRepairJob.java rename to src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java index d82e4407fa15..924662b38da3 100644 --- a/src/java/org/apache/cassandra/repair/AccordRepairJob.java +++ b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java @@ -16,12 +16,14 @@ * limitations under the License. */ -package org.apache.cassandra.repair; +package org.apache.cassandra.service.accord.repair; import java.math.BigInteger; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Executor; import javax.annotation.Nullable; -import org.apache.cassandra.service.accord.AccordTopology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,67 +31,88 @@ import accord.api.RoutingKey; import accord.primitives.Ranges; import accord.primitives.Seekables; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.AccordTopology; import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairResult; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Future; import static com.google.common.base.Preconditions.checkState; -import static java.util.Collections.emptyList; import static org.apache.cassandra.config.CassandraRelevantProperties.ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL; /* * Accord repair consists of creating a barrier transaction for all the ranges which ensure that all Accord transactions * before the Epoch and point in time at which the repair started have their side effects visible to Paxos and regular quorum reads. */ -public class AccordRepairJob extends AbstractRepairJob +public class AccordRepair { - private static final Logger logger = LoggerFactory.getLogger(AccordRepairJob.class); + private static final Logger logger = LoggerFactory.getLogger(AccordRepair.class); public static final BigInteger TWO = BigInteger.valueOf(2); + private final SharedContext ctx; + private final ColumnFamilyStore cfs; + private final Ranges ranges; private final AccordSplitter splitter; + private final boolean requireAllEndpoints; + private final List endpoints; private BigInteger rangeStep; - private Epoch minEpoch = ClusterMetadata.current().epoch; + private final Epoch minEpoch = ClusterMetadata.current().epoch; private volatile Throwable shouldAbort = null; - public AccordRepairJob(RepairSession repairSession, String cfname) + public AccordRepair(SharedContext ctx, ColumnFamilyStore cfs, IPartitioner partitioner, String keyspace, Collection> ranges, boolean requireAllEndpoints, List endpoints) { - super(repairSession, cfname); - IPartitioner partitioner = desc.ranges.iterator().next().left.getPartitioner(); - this.ranges = AccordTopology.toAccordRanges(desc.keyspace, desc.ranges); - this.splitter = partitioner.accordSplitter().apply(ranges); + this.ctx = ctx; + this.cfs = cfs; + this.requireAllEndpoints = requireAllEndpoints; + this.endpoints = endpoints; + this.ranges = AccordTopology.toAccordRanges(keyspace, ranges); + this.splitter = partitioner.accordSplitter().apply(this.ranges); } - @Override - protected void runRepair() + public Epoch minEpoch() { - try - { - for (accord.primitives.Range range : ranges) - repairRange((TokenRange)range); - state.phase.success(); - cfs.metric.repairsCompleted.inc(); - trySuccess(new RepairResult(desc, emptyList(), ConsensusMigrationRepairResult.fromAccordRepair(minEpoch))); - } - catch (Throwable t) - { - state.phase.fail(t); - cfs.metric.repairsCompleted.inc(); - tryFailure(t); - } + return minEpoch; } - @Override - void abort(@Nullable Throwable reason) + public void repair() throws Throwable + { + for (accord.primitives.Range range : ranges) + repairRange((TokenRange)range); + } + + public Future repair(Executor executor) + { + AsyncPromise future = new AsyncPromise<>(); + executor.execute(() -> { + try + { + repair(); + future.trySuccess(null); + } + catch (Throwable e) + { + future.tryFailure(e); + } + }); + return future; + } + + protected void abort(@Nullable Throwable reason) { shouldAbort = reason == null ? new RuntimeException("Abort") : reason; } @@ -145,25 +168,32 @@ private void repairRange(TokenRange range) throws Throwable checkState(!toRepair.equals(lastRepaired), "Shouldn't repair the same range twice"); checkState(lastRepaired == null || toRepair.start().equals(lastRepaired.end()), "Next range should directly follow previous range"); lastRepaired = toRepair; - AccordService.instance().barrierWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false); + + if (requireAllEndpoints) + { + AccordService.instance().repairWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false, endpoints); + } + else + { + AccordService.instance().barrierWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false); + } + remainingStart = toRepair.end(); } catch (RuntimeException e) { - // TODO Placeholder for dependency limit overflow -// dependencyOverflow = true; - cfs.metric.rangeMigrationDependencyLimitFailures.mark(); + cfs.metric.accordRepairUnexpectedFailures.mark(); throw e; } catch (Throwable t) { - // unexpected error - cfs.metric.rangeMigrationUnexpectedFailures.mark(); + cfs.metric.accordRepairUnexpectedFailures.mark(); throw new RuntimeException(t); } finally { - cfs.metric.rangeMigration.addNano(start); + long end = ctx.clock().nanoTime(); + cfs.metric.accordRepair.addNano(end - start); } // TODO when dependency limits are added to Accord need to test repair overflow diff --git a/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java new file mode 100644 index 000000000000..76e29adbc5fa --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.repair; + +import java.util.Collection; +import java.util.function.BiConsumer; + +import com.google.common.collect.ImmutableSet; + +import accord.api.Result; +import accord.coordinate.CoordinationAdapter; +import accord.coordinate.ExecutePath; +import accord.coordinate.ExecuteSyncPoint; +import accord.local.Node; +import accord.primitives.Deps; +import accord.primitives.FullRoute; +import accord.primitives.Seekables; +import accord.primitives.SyncPoint; +import accord.primitives.Timestamp; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.topology.Topologies; + +/** + * Sync point adapter used for accord-only repairs. + * + * Repair has the requirement that all client writes begun before the repair will be fully replicated once repair + * has completed. In the case of accord, repairs that compare data on disk satisfy this requirement by running + * a sync point as part of streaming if differences are found. For accord-only repairs, the barrier used by normal + * repairs is not sufficient since it only requires a quorum of nodes to respond before completing. This sync point + * adapter requires responses from all of the supplied endpoints before completing. Note that shards only block on the + * intersection of the provided replicas and their own endpoints. + */ +public class RepairSyncPointAdapter> extends CoordinationAdapter.Adapters.AbstractSyncPointAdapter +{ + private final ImmutableSet requiredResponses; + + public RepairSyncPointAdapter(Collection requiredResponses) + { + this.requiredResponses = ImmutableSet.copyOf(requiredResponses); + } + + @Override + public void execute(Node node, Topologies all, FullRoute route, ExecutePath path, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer, Throwable> callback) + { + RequiredResponseTracker tracker = new RequiredResponseTracker(requiredResponses, all); + ExecuteSyncPoint.ExecuteBlocking execute = new ExecuteSyncPoint.ExecuteBlocking<>(node, tracker, new SyncPoint<>(txnId, deps, (S) txn.keys(), route), executeAt); + execute.addCallback(callback); + execute.start(); + } + + @Override + public void persist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer, Throwable> callback) + { + throw new UnsupportedOperationException(); + } + + public static > CoordinationAdapter> create(Collection requiredResponses) + { + return new RepairSyncPointAdapter<>(requiredResponses); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java b/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java new file mode 100644 index 000000000000..130e91496902 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.repair; + +import java.util.HashSet; +import java.util.Set; + +import accord.coordinate.tracking.AbstractSimpleTracker; +import accord.coordinate.tracking.RequestStatus; +import accord.coordinate.tracking.ShardTracker; +import accord.local.Node; +import accord.topology.Shard; +import accord.topology.Topologies; + +import static accord.coordinate.tracking.AbstractTracker.ShardOutcomes.Fail; +import static accord.coordinate.tracking.AbstractTracker.ShardOutcomes.NoChange; +import static accord.coordinate.tracking.AbstractTracker.ShardOutcomes.Success; + +public class RequiredResponseTracker extends AbstractSimpleTracker +{ + public static class RequiredResponseShardTracker extends ShardTracker + { + private final Set outstandingResponses; + + public RequiredResponseShardTracker(Set requiredResponses, Shard shard) + { + super(shard); + this.outstandingResponses = new HashSet<>(); + for (Node.Id id : shard.nodes) + { + if (requiredResponses.contains(id)) + outstandingResponses.add(id); + } + } + + public ShardOutcomes onSuccess(Node.Id node) + { + return outstandingResponses.remove(node) && outstandingResponses.isEmpty() ? Success : NoChange; + } + + public ShardOutcomes onFailure(Object ignore) + { + return !outstandingResponses.isEmpty() ? Fail : NoChange; + } + } + + public RequiredResponseTracker(Set requiredResponses, Topologies topologies) + { + super(topologies, RequiredResponseShardTracker[]::new, shard -> new RequiredResponseShardTracker(requiredResponses, shard)); + } + + @Override + public RequestStatus recordSuccess(Node.Id node) + { + return recordResponse(this, node, RequiredResponseShardTracker::onSuccess, node); + } + + @Override + public RequestStatus recordFailure(Node.Id node) + { + return recordResponse(this, node, RequiredResponseShardTracker::onFailure, null); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 1b48a535549a..163e8f65f2f0 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -82,6 +82,7 @@ public void serialize(ApplyThenWaitUntilApplied msg, DataOutputPlus out, int ver CommandSerializers.txnId.serialize(msg.txnId, out, version); KeySerializers.participants.serialize(msg.readScope, out, version); out.writeUnsignedVInt(msg.executeAtEpoch); + CommandSerializers.timestamp.serialize(msg.executeAt, out, version); KeySerializers.fullRoute.serialize(msg.route, out, version); CommandSerializers.partialTxn.serialize(msg.txn, out, version); DepsSerializer.partialDeps.serialize(msg.deps, out, version); @@ -97,6 +98,7 @@ public ApplyThenWaitUntilApplied deserialize(DataInputPlus in, int version) thro CommandSerializers.txnId.deserialize(in, version), KeySerializers.participants.deserialize(in, version), in.readUnsignedVInt(), + CommandSerializers.timestamp.deserialize(in, version), KeySerializers.fullRoute.deserialize(in, version), CommandSerializers.partialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), @@ -111,6 +113,7 @@ public long serializedSize(ApplyThenWaitUntilApplied msg, int version) return CommandSerializers.txnId.serializedSize(msg.txnId, version) + KeySerializers.participants.serializedSize(msg.readScope, version) + TypeSizes.sizeofUnsignedVInt(msg.executeAtEpoch) + + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + KeySerializers.fullRoute.serializedSize(msg.route, version) + CommandSerializers.partialTxn.serializedSize(msg.txn, version) + DepsSerializer.partialDeps.serializedSize(msg.deps, version) diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java index 9233667b5a50..2215c734c8f1 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java @@ -24,6 +24,7 @@ public class ConsensusMigrationRepairResult { + private static final ConsensusMigrationRepairResult INELIGIBLE = new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); public final ConsensusMigrationRepairType type; public final Epoch minEpoch; @@ -33,18 +34,24 @@ private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch this.minEpoch = minEpoch; } - public static ConsensusMigrationRepairResult fromCassandraRepair(Epoch minEpoch, boolean migrationEligibleRepair) + public static ConsensusMigrationRepairResult fromRepair(Epoch minEpoch, boolean paxosAndDataRepaired, boolean accordRepaired, boolean deadNodesExcluded) { - checkArgument(!migrationEligibleRepair || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); - if (migrationEligibleRepair) - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); - else - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); + checkArgument((!paxosAndDataRepaired && !accordRepaired) || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); + + if (deadNodesExcluded) return INELIGIBLE; + if (paxosAndDataRepaired && accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.either, minEpoch); + if (paxosAndDataRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); + if (accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); + return INELIGIBLE; + } + + public static ConsensusMigrationRepairResult fromPaxosOnlyRepair(Epoch minEpoch, boolean deadNodesExcluded) + { + return fromRepair(minEpoch, false, false, deadNodesExcluded); } - public static ConsensusMigrationRepairResult fromAccordRepair(Epoch minEpoch) + public static ConsensusMigrationRepairResult fromAccordOnlyRepair(Epoch minEpoch, boolean deadNodesExcluded) { - checkArgument(minEpoch.isAfter(Epoch.EMPTY), "Accord repairs should always occur at an Epoch"); - return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); + return fromRepair(minEpoch, false, true, deadNodesExcluded); } } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java index 233682d07fb2..3866db0ad094 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java @@ -26,7 +26,8 @@ public enum ConsensusMigrationRepairType { ineligible(0), paxos(1), - accord(2); + accord(2), + either(3); public final byte value; @@ -52,6 +53,8 @@ public static ConsensusMigrationRepairType fromValue(byte value) return ConsensusMigrationRepairType.paxos; case 2: return ConsensusMigrationRepairType.accord; + case 3: + return ConsensusMigrationRepairType.either; } } } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java index 1e170f02f908..4b8d4575f412 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationTarget.java @@ -35,6 +35,17 @@ public enum ConsensusMigrationTarget this.value = SignedBytes.checkedCast(value); } + public boolean isMigratedBy(ConsensusMigrationRepairType repairType) + { + switch (repairType) + { + case either: return true; + case paxos: return this == accord; + case accord: return this == paxos; + default: return false; + } + } + public static ConsensusMigrationTarget fromString(String targetProtocol) { return ConsensusMigrationTarget.valueOf(LocalizeString.toLowerCaseLocalized(targetProtocol)); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java index 62727a013399..d8c207dc64db 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java @@ -18,7 +18,11 @@ package org.apache.cassandra.service.consensus.migration; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.function.Predicate; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -26,6 +30,8 @@ import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.FutureCallback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; @@ -49,14 +55,12 @@ import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import static java.util.Collections.emptyList; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.util.Collections.emptyList; import static org.apache.cassandra.dht.Range.normalize; import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; @@ -90,9 +94,7 @@ public void onSuccess(@Nullable RepairResult repairResult) if (tms == null || !Range.intersects(tms.migratingRanges, desc.ranges)) return; - if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.accord) - return; - if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairResult.consensusMigrationRepairResult.type != ConsensusMigrationRepairType.paxos) + if (!tms.targetProtocol.isMigratedBy(repairResult.consensusMigrationRepairResult.type)) return; ClusterMetadataService.instance().commit( @@ -311,7 +313,8 @@ private static RepairOption getRepairOption(Collection tabl boolean repairPaxos = !accordRepair; boolean paxosOnly = false; boolean dontPurgeTombstones = false; - RepairOption repairOption = new RepairOption(RepairParallelism.PARALLEL, primaryRange, incremental, trace, numJobThreads, intersectingRanges, pullRepair, forceRepair, PreviewKind.NONE, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair); + boolean accordOnly = false; + RepairOption repairOption = new RepairOption(RepairParallelism.PARALLEL, primaryRange, incremental, trace, numJobThreads, intersectingRanges, pullRepair, forceRepair, PreviewKind.NONE, optimiseStreams, ignoreUnreplicatedKeyspaces, repairPaxos, paxosOnly, dontPurgeTombstones, accordOnly, true); tables.forEach(table -> repairOption.getColumnFamilies().add(table.tableName)); return repairOption; } diff --git a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java index 7dff0111ef3f..1c1f3985fa6d 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java @@ -38,7 +38,6 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairType; -import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.TableMigrationState; @@ -138,11 +137,8 @@ public Result execute(@Nonnull ClusterMetadata metadata) if (tms == null) return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); - if (tms.targetProtocol == ConsensusMigrationTarget.accord && repairType != ConsensusMigrationRepairType.paxos) - return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to Accord and the repair was a Paxos repair", ksAndCF)); - - if (tms.targetProtocol == ConsensusMigrationTarget.paxos && repairType != ConsensusMigrationRepairType.accord) - return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to Paxos and the repair was an Accord repair", ksAndCF)); + if (!tms.targetProtocol.isMigratedBy(repairType)) + return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to %s and the repair was a %s repair", ksAndCF, tms.targetProtocol, repairType)); List> normalizedRepairedRanges = normalize(repairedRanges); diff --git a/src/java/org/apache/cassandra/tools/nodetool/Repair.java b/src/java/org/apache/cassandra/tools/nodetool/Repair.java index 8d5b0607d4e9..2c9a8b4c0183 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/Repair.java +++ b/src/java/org/apache/cassandra/tools/nodetool/Repair.java @@ -105,6 +105,9 @@ public class Repair extends NodeToolCmd @Option(title = "paxos-only", name = {"-paxos-only", "--paxos-only"}, description = "If the --paxos-only flag is included, no table data is repaired, only paxos operations..") private boolean paxosOnly = false; + @Option(title = "accord-only", name = {"-accord-only", "--accord-only"}, description = "If the --accord-only flag is included, no table data is repaired, only accord operations..") + private boolean accordOnly = false; + @Option(title = "ignore_unreplicated_keyspaces", name = {"-iuk","--ignore-unreplicated-keyspaces"}, description = "Use --ignore-unreplicated-keyspaces to ignore keyspaces which are not replicated, otherwise the repair will fail") private boolean ignoreUnreplicatedKeyspaces = false; @@ -191,6 +194,7 @@ else if (dcParallel) options.put(RepairOption.REPAIR_PAXOS_KEY, Boolean.toString(!skipPaxos && getPreviewKind() == PreviewKind.NONE)); options.put(RepairOption.PAXOS_ONLY_KEY, Boolean.toString(paxosOnly && getPreviewKind() == PreviewKind.NONE)); options.put(RepairOption.NO_TOMBSTONE_PURGING, Boolean.toString(dontPurgeTombstones)); + options.put(RepairOption.ACCORD_ONLY_KEY, Boolean.toString(accordOnly && getPreviewKind() == PreviewKind.NONE)); if (!startToken.isEmpty() || !endToken.isEmpty()) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java index edb749fb2199..cec5fdb2e5c2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/OptimiseStreamsRepairTest.java @@ -44,7 +44,7 @@ import org.apache.cassandra.distributed.api.NodeToolResult; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.AsymmetricRemoteSyncTask; -import org.apache.cassandra.repair.CassandraRepairJob; +import org.apache.cassandra.repair.RepairJob; import org.apache.cassandra.repair.LocalSyncTask; import org.apache.cassandra.repair.SyncTask; import org.apache.cassandra.repair.TreeResponse; @@ -107,7 +107,7 @@ public static class BBHelper { public static void install(ClassLoader cl, int id) { - new ByteBuddy().rebase(CassandraRepairJob.class) + new ByteBuddy().rebase(RepairJob.class) .method(named("createOptimisedSyncingSyncTasks").and(takesArguments(1))) .intercept(MethodDelegation.to(BBHelper.class)) .make() diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java new file mode 100644 index 000000000000..66cf1c0e00b2 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nonnull; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import org.junit.After; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.CommandsForKey; +import accord.impl.SimpleProgressLog; +import accord.local.Node; +import accord.local.PreLoadContext; +import accord.local.SafeCommand; +import accord.local.Status; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.gms.FailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; + +import static java.lang.String.format; + +public class AccordIncrementalRepairTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordIncrementalRepairTest.class); + + public static class BarrierRecordingAgent extends AccordAgent + { + static class ExecutedBarrier + { + final Seekables keysOrRanges; + final @Nonnull Timestamp executeAt; + + public ExecutedBarrier(Seekables keysOrRanges, @Nonnull Timestamp executeAt) + { + this.keysOrRanges = keysOrRanges; + this.executeAt = executeAt; + } + + @Override + public String toString() + { + return "ExecutedBarrier{" + + "keysOrRanges=" + keysOrRanges + + ", executeAt=" + executeAt + + '}'; + } + } + + private final List barriers = new ArrayList<>(); + + @Override + public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull Timestamp executeAt) + { + super.onLocalBarrier(keysOrRanges, executeAt); + synchronized (barriers) + { + barriers.add(new ExecutedBarrier(keysOrRanges, executeAt)); + } + } + + public List executedBarriers() + { + synchronized (barriers) + { + return ImmutableList.copyOf(barriers); + } + } + + public void reset() + { + synchronized (barriers) + { + barriers.clear(); + } + } + + } + + static BarrierRecordingAgent agent() + { + AccordService service = (AccordService) AccordService.instance(); + return (BarrierRecordingAgent) service.node().agent(); + } + + static AccordService accordService() + { + return (AccordService) AccordService.instance(); + } + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws Throwable + { + CassandraRelevantProperties.ACCORD_AGENT_CLASS.setString(BarrierRecordingAgent.class.getName()); +// setupCluster(opt -> opt.withConfig(conf -> conf.with(Feature.NETWORK, Feature.GOSSIP)), 3); + setupCluster(opt -> opt, 3); + } + + @After + public void tearDown() + { + SHARED_CLUSTER.filters().reset(); + } + + private static void await(IInvokableInstance instance, IIsolatedExecutor.SerializableCallable check, long duration, TimeUnit unit) + { + instance.runOnInstance(() -> { + long timeout = Clock.Global.currentTimeMillis() + unit.toMillis(duration); + while (Clock.Global.currentTimeMillis() < timeout) + { + if (check.call()) + return; + + try + { + Thread.sleep(1); + } + catch (InterruptedException e) + { + throw new AssertionError(e); + } + } + throw new AssertionError("Timed out waiting for node 3 to become alive"); + }); + } + + private static void awaitEndpointUp(IInvokableInstance instance, IInvokableInstance waitOn) + { + InetAddressAndPort endpoint = InetAddressAndPort.getByAddress(waitOn.broadcastAddress()); + await(instance, () -> FailureDetector.instance.isAlive(endpoint), 1, TimeUnit.MINUTES); + } + + private static void awaitEndpointDown(IInvokableInstance instance, IInvokableInstance waitOn) + { + InetAddressAndPort endpoint = InetAddressAndPort.getByAddress(waitOn.broadcastAddress()); + await(instance, () -> !FailureDetector.instance.isAlive(endpoint), 1, TimeUnit.MINUTES); + } + + private static V getUninterruptibly(Future future, long timeout, TimeUnit units) + { + try + { + return future.get(timeout, units); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException | TimeoutException e) + { + throw new RuntimeException(e); + } + } + + private static V getUninterruptibly(Future future) + { + return getUninterruptibly(future, 1, TimeUnit.MINUTES); + } + + private static TxnId awaitLocalApplyOnKey(PartitionKey key) + { + Node node = accordService().node(); + AtomicReference waitFor = new AtomicReference<>(null); + AsyncChains.awaitUninterruptibly(node.commandStores().ifLocal(PreLoadContext.contextFor(key), key.toUnseekable(), 0, Long.MAX_VALUE, safeStore -> { + AccordSafeCommandStore store = (AccordSafeCommandStore) safeStore; + CommandsForKey commands = store.maybeCommandsForKey(key).current(); + int size = commands.size(); + if (size < 1) + return; + // if txnId is an instance of CommandsForKey.TxnInfo, copying it into a + // new txnId instance will prevent any issues related to TxnInfo#hashCode + waitFor.set(new TxnId(commands.txnId(size - 1))); + })); + Assert.assertNotNull(waitFor.get()); + TxnId txnId = waitFor.get(); + long start = Clock.Global.currentTimeMillis(); + AtomicBoolean applied = new AtomicBoolean(false); + while (!applied.get()) + { + long now = Clock.Global.currentTimeMillis(); + if (now - start > TimeUnit.MINUTES.toMillis(1)) + throw new AssertionError("Timeout"); + AsyncChains.awaitUninterruptibly(node.commandStores().ifLocal(PreLoadContext.contextFor(txnId), key.toUnseekable(), 0, Long.MAX_VALUE, safeStore -> { + SafeCommand command = safeStore.get(txnId, key.toUnseekable()); + Assert.assertNotNull(command.current()); + if (command.current().status().hasBeen(Status.Applied)) + applied.set(true); + })); + } + return txnId; + } + + @Test + public void txnRepairTest() throws Throwable + { + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, tableName)); + final String keyspace = KEYSPACE; + final String table = tableName; + + SHARED_CLUSTER.filters().allVerbs().to(3).drop(); + awaitEndpointDown(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); + + executeWithRetry(SHARED_CLUSTER, format("BEGIN TRANSACTION\n" + + "INSERT INTO %s (k, v) VALUES (1, 1);\n" + + "COMMIT TRANSACTION", qualifiedTableName)); + + SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + awaitLocalApplyOnKey(new PartitionKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(1)))); + })); + + SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> agent().reset())); + + SHARED_CLUSTER.get(1, 2).forEach(instance -> { + instance.runOnInstance(() -> { + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); + cfs.getLiveSSTables().forEach(sstable -> { + Assert.assertFalse(sstable.isRepaired()); + Assert.assertFalse(sstable.isPendingRepair()); + }); + }); + }); + SHARED_CLUSTER.get(3).runOnInstance(() -> { + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + Assert.assertTrue(cfs.getLiveSSTables().isEmpty()); + }); + + // heal partition and wait for node 1 to see node 3 again + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(() -> { + SimpleProgressLog.PAUSE_FOR_TEST = true; + Assert.assertTrue(agent().executedBarriers().isEmpty()); + }); + SHARED_CLUSTER.filters().reset(); + awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); + SHARED_CLUSTER.get(1).nodetool("repair", KEYSPACE); + + SHARED_CLUSTER.forEach(instance -> { + instance.runOnInstance(() -> { + Assert.assertFalse( agent().executedBarriers().isEmpty()); + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); + cfs.getLiveSSTables().forEach(sstable -> { + Assert.assertTrue(sstable.isRepaired() || sstable.isPendingRepair()); + }); + }); + }); + } + + private void testSingleNodeWrite(TransactionalMode mode) + { + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='%s';", KEYSPACE, tableName, mode)); + final String keyspace = KEYSPACE; + final String table = tableName; + + SHARED_CLUSTER.get(3).runOnInstance(() -> { + QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (k, v) VALUES (1, 2);", keyspace, table)); + }); + + SHARED_CLUSTER.get(3).runOnInstance(() -> { + UntypedResultSet result = QueryProcessor.executeInternal(format("SELECT * FROM %s.%s WHERE k=1", keyspace, table)); + Assert.assertFalse(result.isEmpty()); + UntypedResultSet.Row row = Iterables.getOnlyElement(result); + Assert.assertEquals(1, row.getInt("k")); + Assert.assertEquals(2, row.getInt("v")); + + + + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); + cfs.getLiveSSTables().forEach(sstable -> { + Assert.assertFalse(sstable.isRepaired()); + Assert.assertFalse(sstable.isPendingRepair()); + }); + }); + SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { + UntypedResultSet result = QueryProcessor.executeInternal(format("SELECT * FROM %s.%s WHERE k=1", keyspace, table)); + Assert.assertTrue(result.isEmpty()); + + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + Assert.assertTrue(cfs.getLiveSSTables().isEmpty()); + })); + SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> { + agent().reset(); + })); + + SHARED_CLUSTER.get(1).nodetool("repair", KEYSPACE); + SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> { + Assert.assertFalse( agent().executedBarriers().isEmpty()); + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); + cfs.getLiveSSTables().forEach(sstable -> { + Assert.assertTrue(sstable.isRepaired() || sstable.isPendingRepair()); + }); + + UntypedResultSet result = QueryProcessor.executeInternal(format("SELECT * FROM %s.%s WHERE k=1", keyspace, table)); + Assert.assertFalse(result.isEmpty()); + UntypedResultSet.Row row = Iterables.getOnlyElement(result); + Assert.assertEquals(1, row.getInt("k")); + Assert.assertEquals(2, row.getInt("v")); + })); + } + + /** + * a failed write at txn mode unsafe should be made visible by repair + */ + @Test + public void unsafeRepairTest() + { + testSingleNodeWrite(TransactionalMode.unsafe); + } + + /** + * Repair should repair (fully replicate _some_ state) any divergent state between replicas + */ + @Test + public void fullRepairTest() + { + testSingleNodeWrite(TransactionalMode.full); + } + + @Test + public void onlyAccordTest() + { + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, tableName)); + final String keyspace = KEYSPACE; + final String table = tableName; + + SHARED_CLUSTER.filters().allVerbs().to(3).drop(); + awaitEndpointDown(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); + awaitEndpointDown(SHARED_CLUSTER.get(2), SHARED_CLUSTER.get(3)); + + executeWithRetry(SHARED_CLUSTER, format("BEGIN TRANSACTION\n" + + "INSERT INTO %s (k, v) VALUES (1, 1);\n" + + "COMMIT TRANSACTION", qualifiedTableName)); + + SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { + TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); + awaitLocalApplyOnKey(new PartitionKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(1)))); + })); + + SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> agent().reset())); + + SHARED_CLUSTER.filters().reset(); + awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); + SHARED_CLUSTER.get(1).nodetool("repair", "--accord-only", KEYSPACE); + + SHARED_CLUSTER.forEach(instance -> { + logger().info("checking instance {}", instance.broadcastAddress()); + instance.runOnInstance(() -> { + Assert.assertFalse( agent().executedBarriers().isEmpty()); + }); + }); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index 6cba8995ed65..0fe835c6c925 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -571,7 +571,7 @@ public void testAccordToPaxos() throws Exception assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 2, 1, 1, 1, 1); // Repair the currently migrating range from when targets were switched, but it's not an Accord repair, this is to make sure the wrong repair type doesn't trigger progress - nodetool(coordinator, "repair", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString()); + nodetool(coordinator, "repair", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString(), "--paxos-only"); assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), new Range(maxToken, minToken)), ImmutableList.of(accordMigratingRange), 1); // Paxos migrating keys should still need key migration after non-Accord repair diff --git a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java index bf75f920ed52..cbc08c0eda84 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java +++ b/test/simulator/main/org/apache/cassandra/simulator/cluster/OnInstanceRepair.java @@ -97,7 +97,7 @@ private static void invokeRepair(String keyspaceName, boolean repairPaxos, boole { Collection> ranges = rangesSupplier.call(); // no need to wait for completion, as we track all task submissions and message exchanges, and ensure they finish before continuing to next action - StorageService.instance.repair(keyspaceName, new RepairOption(RepairParallelism.SEQUENTIAL, isPrimaryRangeOnly, false, false, 1, ranges, false, force, PreviewKind.NONE, false, true, repairPaxos, repairOnlyPaxos, false, false), singletonList((tag, event) -> { + StorageService.instance.repair(keyspaceName, new RepairOption(RepairParallelism.SEQUENTIAL, isPrimaryRangeOnly, false, false, 1, ranges, false, force, PreviewKind.NONE, false, true, repairPaxos, repairOnlyPaxos, false, false, false), singletonList((tag, event) -> { if (event.getType() == ProgressEventType.COMPLETE) listener.run(); })); diff --git a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java index 6ee81d154581..7e58fd13b57e 100644 --- a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java @@ -66,12 +66,7 @@ public void failingRepair() Cluster.Node coordinator = coordinatorGen.next(rs); // exclude accord repair as this test breaks validation/sync; which accord doesn't have - RepairOption options; - do - { - options = repairOption(rs, coordinator, KEYSPACE, TABLES); - } - while (options.accordRepair()); + RepairOption options = repairOption(rs, coordinator, KEYSPACE, TABLES); RepairCoordinator repair = coordinator.repair(KEYSPACE, options, false); repair.run(); InetAddressAndPort failingAddress = pickParticipant(rs, coordinator, repair); diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 4a4c012d6a06..3e89ebd13cc4 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -409,26 +409,14 @@ static void assertSuccess(Cluster cluster, int example, boolean shouldSync, Repa for (JobState job : session.getJobs()) { EnumSet expected = EnumSet.allOf(JobState.State.class); - if (repair.state.options.accordRepair()) + if (!shouldSnapshot) { - // accord doesn't do snapshot, validation, or streaming expected.remove(JobState.State.SNAPSHOT_START); expected.remove(JobState.State.SNAPSHOT_COMPLETE); - expected.remove(JobState.State.VALIDATION_START); - expected.remove(JobState.State.VALIDATION_COMPLETE); - expected.remove(JobState.State.STREAM_START); } - else + if (!shouldSync) { - if (!shouldSnapshot) - { - expected.remove(JobState.State.SNAPSHOT_START); - expected.remove(JobState.State.SNAPSHOT_COMPLETE); - } - if (!shouldSync) - { - expected.remove(JobState.State.STREAM_START); - } + expected.remove(JobState.State.STREAM_START); } Set actual = job.getStateTimesMillis().keySet(); Assertions.assertThat(actual).isEqualTo(expected); diff --git a/test/unit/org/apache/cassandra/repair/RepairJobTest.java b/test/unit/org/apache/cassandra/repair/RepairJobTest.java index a2dd93953c16..5681b0ca685a 100644 --- a/test/unit/org/apache/cassandra/repair/RepairJobTest.java +++ b/test/unit/org/apache/cassandra/repair/RepairJobTest.java @@ -113,7 +113,7 @@ public class RepairJobTest private static InetAddressAndPort addr4; private static InetAddressAndPort addr5; private MeasureableRepairSession session; - private CassandraRepairJob job; + private RepairJob job; private RepairJobDesc sessionJobDesc; // So that threads actually get recycled and we can have accurate memory accounting while testing @@ -125,11 +125,11 @@ private static class MeasureableRepairSession extends RepairSession public MeasureableRepairSession(TimeUUID parentRepairSession, CommonRange commonRange, boolean excludedDeadNodes, String keyspace, RepairParallelism parallelismDegree, boolean isIncremental, boolean pullRepair, PreviewKind previewKind, boolean optimiseStreams, boolean repairPaxos, boolean paxosOnly, - boolean dontPurgeTombstones, boolean accordRepair, String... cfnames) + boolean dontPurgeTombstones, String... cfnames) { super(SharedContext.Global.instance, new Scheduler.NoopScheduler(), parentRepairSession, commonRange, excludedDeadNodes, keyspace, parallelismDegree, isIncremental, pullRepair, - previewKind, optimiseStreams, repairPaxos, paxosOnly, dontPurgeTombstones, accordRepair, cfnames); + previewKind, optimiseStreams, repairPaxos, paxosOnly, dontPurgeTombstones, false, false, cfnames); } @Override @@ -195,9 +195,9 @@ public void setup() this.session = new MeasureableRepairSession(parentRepairSession, new CommonRange(neighbors, emptySet(), FULL_RANGE), false, KEYSPACE, SEQUENTIAL, false, false, - NONE, false, true, false, false, false, CF); + NONE, false, true, false, false, CF); - this.job = new CassandraRepairJob(session, CF); + this.job = new RepairJob(session, CF); this.sessionJobDesc = new RepairJobDesc(session.state.parentRepairSession, session.getId(), session.state.keyspace, CF, session.ranges()); @@ -267,7 +267,7 @@ public void testNoTreesRetainedAfterDifference() throws Throwable // Use addr4 instead of one of the provided trees to force everything to be remote sync tasks as // LocalSyncTasks try to reach over the network. - List syncTasks = CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, sessionJobDesc, mockTreeResponses, + List syncTasks = RepairJob.createStandardSyncTasks(SharedContext.Global.instance, sessionJobDesc, mockTreeResponses, addr4, // local noTransient(), session.isIncremental, @@ -367,7 +367,7 @@ public static void testCreateStandardSyncTasks(boolean pullRepair) treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different"), treeResponse(addr3, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local noTransient(), // transient @@ -403,7 +403,7 @@ public void testStandardSyncTransient(boolean pullRepair) List treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local transientPredicate(addr2), @@ -433,7 +433,7 @@ public void testStandardSyncLocalTransient(boolean pullRepair) List treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "different", RANGE_2, "same", RANGE_3, "different")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local transientPredicate(addr1), @@ -493,7 +493,7 @@ public void testEmptyDifference(InetAddressAndPort local, Predicate treeResponses = Arrays.asList(treeResponse(addr1, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same"), treeResponse(addr2, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, local, // local isTransient, @@ -511,13 +511,13 @@ public void testCreateStandardSyncTasksAllDifferent() treeResponse(addr2, RANGE_1, "two", RANGE_2, "two", RANGE_3, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "three", RANGE_3, "three")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local ep -> ep.equals(addr3), // transient - false, - true, - PreviewKind.ALL)); + false, + true, + PreviewKind.ALL)); assertThat(tasks).hasSize(3); @@ -542,7 +542,7 @@ public void testCreate5NodeStandardSyncTasksWithTransient() treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); Predicate isTransient = ep -> ep.equals(addr4) || ep.equals(addr5); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local isTransient, // transient @@ -609,7 +609,7 @@ public static void testLocalSyncWithTransient(InetAddressAndPort local, boolean treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); Predicate isTransient = ep -> ep.equals(addr4) || ep.equals(addr5); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, local, // local isTransient, // transient @@ -658,13 +658,13 @@ private static void testLocalAndRemoteTransient(boolean pullRepair) treeResponse(addr4, RANGE_1, "four", RANGE_2, "four", RANGE_3, "four"), treeResponse(addr5, RANGE_1, "five", RANGE_2, "five", RANGE_3, "five")); - Map tasks = toMap(CassandraRepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createStandardSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr4, // local ep -> ep.equals(addr4) || ep.equals(addr5), // transient - false, - pullRepair, - PreviewKind.ALL)); + false, + pullRepair, + PreviewKind.ALL)); assertThat(tasks.get(pair(addr4, addr5))).isNull(); } @@ -676,13 +676,13 @@ public void testOptimisedCreateStandardSyncTasksAllDifferent() treeResponse(addr2, RANGE_1, "two", RANGE_2, "two", RANGE_3, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "three", RANGE_3, "three")); - Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr1, // local noTransient(), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); for (SyncNodePair pair : new SyncNodePair[]{ pair(addr1, addr2), pair(addr1, addr3), @@ -711,13 +711,13 @@ public void testOptimisedCreateStandardSyncTasks() treeResponse(addr2, RANGE_1, "one", RANGE_2, "two"), treeResponse(addr3, RANGE_1, "three", RANGE_2, "two")); - Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, + Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, JOB_DESC, treeResponses, addr4, // local noTransient(), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); SyncTaskListAssert.assertThat(tasks.values()).areAllInstanceOf(AsymmetricRemoteSyncTask.class); @@ -744,13 +744,13 @@ public void testOptimisedCreateStandardSyncTasksWithTransient() treeResponse(addr3, RANGE_1, "same", RANGE_2, "same", RANGE_3, "same")); RepairJobDesc desc = new RepairJobDesc(nextTimeUUID(), nextTimeUUID(), "ks", "cf", Collections.emptyList()); - Map tasks = toMap(CassandraRepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, desc, + Map tasks = toMap(RepairJob.createOptimisedSyncingSyncTasks(SharedContext.Global.instance, desc, treeResponses, addr1, // local ep -> ep.equals(addr3), addr -> "DC1", - false, - PreviewKind.ALL)); + false, + PreviewKind.ALL)); SyncTask task = tasks.get(pair(addr1, addr2)); diff --git a/test/unit/org/apache/cassandra/repair/RepairSessionTest.java b/test/unit/org/apache/cassandra/repair/RepairSessionTest.java index a5db87036969..22603b6bcfc9 100644 --- a/test/unit/org/apache/cassandra/repair/RepairSessionTest.java +++ b/test/unit/org/apache/cassandra/repair/RepairSessionTest.java @@ -67,7 +67,7 @@ public void testConviction() throws Exception new CommonRange(endpoints, Collections.emptySet(), Arrays.asList(repairRange)), false, "Keyspace1", RepairParallelism.SEQUENTIAL, false, false, - PreviewKind.NONE, false, false, false, false, false, "Standard1"); + PreviewKind.NONE, false, false, false, false, false, false, "Standard1"); // perform convict session.convict(remote, Double.MAX_VALUE); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java index 1630b3c7c35d..143f63a26c42 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java @@ -38,7 +38,6 @@ import java.util.concurrent.TimeUnit; import static org.apache.cassandra.service.accord.AccordTestUtils.*; -import static org.apache.cassandra.service.accord.AccordTopologyTest.token; public class AccordFastPathCoordinatorTest { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 6d37f962ac52..a608f41f8d2c 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -81,6 +81,9 @@ import org.apache.cassandra.cql3.statements.TransactionStatement; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; @@ -478,4 +481,20 @@ public static Set idSet(int... ids) { return Arrays.stream(ids).mapToObj(AccordTestUtils::id).collect(Collectors.toSet()); } + + public static Token token(long t) + { + return new Murmur3Partitioner.LongToken(t); + } + + public static Range range(Token left, Token right) + { + return new Range<>(left, right); + } + + public static Range range(long left, long right) + { + return range(token(left), token(right)); + } + } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java index 9dd9fc6a82e3..a0ada37645e8 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyTest.java @@ -18,15 +18,11 @@ package org.apache.cassandra.service.accord; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -40,39 +36,20 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.locator.AbstractReplicationStrategy; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Keyspaces; -import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.Tables; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.Location; -import org.apache.cassandra.tcm.membership.NodeAddresses; -import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.membership.NodeState; -import org.apache.cassandra.tcm.membership.NodeVersion; -import org.apache.cassandra.tcm.ownership.DataPlacement; -import org.apache.cassandra.tcm.ownership.DataPlacements; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTopologyUtils.*; public class AccordTopologyTest { - private static final Id ID1 = new Id(1); - private static final Id ID2 = new Id(2); - private static final Id ID3 = new Id(3); - private static final List NODE_LIST = ImmutableList.of(ID1, ID2, ID3); - private static final Set NODE_SET = ImmutableSet.copyOf(NODE_LIST); - - private static final InetAddressAndPort EP1 = ep(1); - private static final InetAddressAndPort EP2 = ep(2); - private static final InetAddressAndPort EP3 = ep(3); - private static final IPartitioner partitioner = Murmur3Partitioner.instance; private static TableId tableId = null; private static KeyspaceMetadata keyspace = null; @@ -88,75 +65,6 @@ public static void beforeClass() throws Throwable keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), Tables.of(table)); } - private static InetAddressAndPort ep(int i) - { - try - { - return InetAddressAndPort.getByAddressOverrideDefaults(InetAddress.getByAddress(new byte[]{127, 0, 0, (byte)i}), 7012); - } - catch (UnknownHostException e) - { - throw new RuntimeException(e); - } - } - - private static NodeId nodeId(int id) - { - return new NodeId(id); - } - - static void addNode(ClusterMetadata.Transformer transformer, int node, Token token) - { - NodeId nodeId = nodeId(node); - InetAddressAndPort ep = ep(node); - NodeAddresses addresses = new NodeAddresses(nodeId.toUUID(), ep, ep, ep); - transformer.register(nodeId, addresses, LOCATION, NodeVersion.CURRENT); - transformer.withNodeState(nodeId, NodeState.JOINED); - transformer.proposeToken(nodeId, Collections.singleton(token)); - transformer.addToRackAndDC(nodeId); - } - - private static ClusterMetadata configureCluster(List> ranges, Keyspaces keyspaces) - { - assert ranges.size() == 3; - - IPartitioner partitioner = Murmur3Partitioner.instance; - ClusterMetadata empty = new ClusterMetadata(partitioner); - ClusterMetadata.Transformer transformer = empty.transformer(); - transformer.with(new DistributedSchema(Keyspaces.of(keyspace))); - addNode(transformer, 1, ranges.get(0).right); - addNode(transformer, 2, ranges.get(1).right); - addNode(transformer, 3, ranges.get(2).right); - ClusterMetadata metadata = transformer.build().metadata; - - for (KeyspaceMetadata keyspace : keyspaces) - { - ReplicationParams replication = keyspace.params.replication; - AbstractReplicationStrategy strategy = AbstractReplicationStrategy.createReplicationStrategy(keyspace.name, replication); - DataPlacements.Builder placements = metadata.placements.unbuild(); - DataPlacement placement = strategy.calculateDataPlacement(metadata.epoch, metadata.tokenMap.toRanges(), metadata); - placements.with(replication, placement); - metadata = transformer.with(placements.build()).build().metadata; - } - - return metadata; - } - - static Token token(long t) - { - return new Murmur3Partitioner.LongToken(t); - } - - static Range range(Token left, Token right) - { - return new Range<>(left, right); - } - - static Range range(long left, long right) - { - return range(token(left), token(right)); - } - /** * Check converter does the right thing if the ring is constructed with min and max tokens */ diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java new file mode 100644 index 000000000000..c3c8e6b588ad --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import accord.local.Node; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.AbstractReplicationStrategy; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.membership.NodeState; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; + +public class AccordTopologyUtils +{ + public static final Node.Id ID1 = new Node.Id(1); + public static final Node.Id ID2 = new Node.Id(2); + public static final Node.Id ID3 = new Node.Id(3); + public static final List NODE_LIST = ImmutableList.of(ID1, ID2, ID3); + public static final Set NODE_SET = ImmutableSet.copyOf(NODE_LIST); + + private static final IPartitioner partitioner = Murmur3Partitioner.instance; + private static final Location LOCATION = new Location("DC1", "RACK1"); + + public static InetAddressAndPort ep(int i) + { + try + { + return InetAddressAndPort.getByAddressOverrideDefaults(InetAddress.getByAddress(new byte[]{ 127, 0, 0, (byte)i}), 7012); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + private static NodeId nodeId(int id) + { + return new NodeId(id); + } + + static void addNode(ClusterMetadata.Transformer transformer, int node, Token token) + { + NodeId nodeId = nodeId(node); + InetAddressAndPort ep = ep(node); + NodeAddresses addresses = new NodeAddresses(nodeId.toUUID(), ep, ep, ep); + transformer.register(nodeId, addresses, LOCATION, NodeVersion.CURRENT); + transformer.withNodeState(nodeId, NodeState.JOINED); + transformer.proposeToken(nodeId, Collections.singleton(token)); + transformer.addToRackAndDC(nodeId); + } + + public static ClusterMetadata configureCluster(List> ranges, Keyspaces keyspaces) + { + assert ranges.size() == 3; + + IPartitioner partitioner = Murmur3Partitioner.instance; + ClusterMetadata empty = new ClusterMetadata(partitioner); + ClusterMetadata.Transformer transformer = empty.transformer(); + transformer.with(new DistributedSchema(keyspaces)); + addNode(transformer, 1, ranges.get(0).right); + addNode(transformer, 2, ranges.get(1).right); + addNode(transformer, 3, ranges.get(2).right); + ClusterMetadata metadata = transformer.build().metadata; + + for (KeyspaceMetadata keyspace : keyspaces) + { + ReplicationParams replication = keyspace.params.replication; + AbstractReplicationStrategy strategy = AbstractReplicationStrategy.createReplicationStrategy(keyspace.name, replication); + DataPlacements.Builder placements = metadata.placements.unbuild(); + DataPlacement placement = strategy.calculateDataPlacement(Epoch.EMPTY, metadata.tokenMap.toRanges(), metadata); + placements.with(replication, placement); + metadata = transformer.with(placements.build()).build().metadata; + } + + return metadata; + } + + static Token token(long t) + { + return new Murmur3Partitioner.LongToken(t); + } + + static Range range(Token left, Token right) + { + return new Range<>(left, right); + } + + public static Range range(long left, long right) + { + return range(token(left), token(right)); + } + +} diff --git a/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java b/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java new file mode 100644 index 000000000000..022543bb3a8e --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.repair; + +import java.util.List; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.api.TopologySorter; +import accord.coordinate.tracking.RequestStatus; +import accord.local.Node; +import accord.topology.Topologies; +import accord.topology.Topology; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.service.accord.AccordTopology; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.Location; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.AccordTopologyUtils.*; + +public class RequiredResponseTrackerTest +{ + private static final IPartitioner partitioner = Murmur3Partitioner.instance; + private static TableId tableId = null; + private static KeyspaceMetadata keyspace = null; + private static Topology topology; + private static final Location LOCATION = new Location("DC1", "RACK1"); + + private static final List> RANGES = ImmutableList.of(range(-100, 0), range(0, 100), range(100, -100)); + private static final TopologySorter TOPOLOGY_SORTER = (node1, node2, shards) -> node1.compareTo(node2); + + @BeforeClass + public static void beforeClass() throws Throwable + { + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + TableMetadata table = parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks").build(); + tableId = table.id; + keyspace = KeyspaceMetadata.create("ks", KeyspaceParams.simple(3), Tables.of(table)); + + ClusterMetadata metadata = configureCluster(RANGES, Keyspaces.of(keyspace)); + topology = AccordTopology.createAccordTopology(metadata); + + } + + @Test + public void successCase() + { + Set nodes = topology.nodes(); + Assert.assertEquals(NODE_SET, nodes); + RequiredResponseTracker tracker = new RequiredResponseTracker(nodes, new Topologies.Single(TOPOLOGY_SORTER, topology)); + Assert.assertEquals(RequestStatus.NoChange, tracker.recordSuccess(ID1)); + Assert.assertEquals(RequestStatus.NoChange, tracker.recordSuccess(ID2)); + Assert.assertEquals(RequestStatus.Success, tracker.recordSuccess(ID3)); + } + + @Test + public void failureCase() + { + Set nodes = topology.nodes(); + Assert.assertEquals(NODE_SET, nodes); + RequiredResponseTracker tracker = new RequiredResponseTracker(nodes, new Topologies.Single(TOPOLOGY_SORTER, topology)); + Assert.assertEquals(RequestStatus.NoChange, tracker.recordSuccess(ID1)); + Assert.assertEquals(RequestStatus.Failed, tracker.recordFailure(ID2)); + } +} From 418f8bf3f08b961f72e1afcfb658385ccbe2299f Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Fri, 26 Apr 2024 12:42:45 -0500 Subject: [PATCH 107/340] Prohibit counter column access in Accord transactions patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-18987 --- .../cql3/statements/TransactionStatement.java | 16 ++++++- .../statements/TransactionStatementTest.java | 42 ++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 46e94293b0e0..ed093d833ce5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -89,6 +89,7 @@ import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; +import static org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest; import static org.apache.cassandra.service.accord.txn.TxnRead.createTxnRead; import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; @@ -100,6 +101,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; + public static final String NO_COUNTERS_IN_TXNS_MESSAGE = "Counter columns cannot be accessed within a transaction; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord.enabled in cassandra.yaml)"; @@ -526,6 +528,10 @@ public CQLStatement prepare(ClientState state) checkTrue(selectNames.add(name), DUPLICATE_TUPLE_NAME_MESSAGE, name.name()); SelectStatement prepared = select.prepare(bindVariables); + + if (prepared.table.isCounter()) + throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); + NamedSelect namedSelect = new NamedSelect(name, prepared); checkAtMostOneRowSpecified(namedSelect.select, "LET assignment " + name.name()); preparedAssignments.add(namedSelect); @@ -539,7 +545,12 @@ public CQLStatement prepare(ClientState state) NamedSelect returningSelect = null; if (select != null) { - returningSelect = new NamedSelect(TxnDataName.returning(), select.prepare(bindVariables)); + SelectStatement prepared = select.prepare(bindVariables); + + if (prepared.table.isCounter()) + throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); + + returningSelect = new NamedSelect(TxnDataName.returning(), prepared); checkAtMostOneRowSpecified(returningSelect.select, "returning select"); } @@ -564,6 +575,9 @@ public CQLStatement prepare(ClientState state) checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); + if (prepared.metadata().isCounter()) + throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, prepared.type, prepared.source); + preparedUpdates.add(prepared); } diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java index 95de83ff1b5e..780a432647e7 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -39,6 +39,7 @@ import static org.apache.cassandra.cql3.statements.TransactionStatement.ILLEGAL_RANGE_QUERY_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_CONDITIONS_IN_UPDATES_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_COUNTERS_IN_TXNS_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.SELECT_REFS_NEED_COLUMN_MESSAGE; import static org.apache.cassandra.cql3.statements.UpdateStatement.CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE; @@ -55,6 +56,7 @@ public class TransactionStatementTest private static final TableId TABLE3_ID = TableId.fromString("00000000-0000-0000-0000-000000000003"); private static final TableId TABLE4_ID = TableId.fromString("00000000-0000-0000-0000-000000000004"); private static final TableId TABLE5_ID = TableId.fromString("00000000-0000-0000-0000-000000000005"); + private static final TableId TABLE6_ID = TableId.fromString("00000000-0000-0000-0000-000000000006"); @BeforeClass public static void beforeClass() throws Exception @@ -65,7 +67,45 @@ public static void beforeClass() throws Exception parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE2_ID), parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int)", "ks").id(TABLE3_ID), parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list)", "ks").id(TABLE4_ID), - parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int)", "ks").id(TABLE5_ID)); + parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int)", "ks").id(TABLE5_ID), + parse("CREATE TABLE tbl6 (k int PRIMARY KEY, c counter)", "ks").id(TABLE6_ID)); + } + + @Test + public void shouldRejectCounterMutation() + { + String query = "BEGIN TRANSACTION\n" + + " UPDATE ks.tbl6 SET c += 100 WHERE k = 0;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "UPDATE", "at [2:5]")); + } + + @Test + public void shouldRejectCounterReadInLet() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl6 WHERE k=0);\n" + + " SELECT row1.c;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", "at [2:15]")); + } + + @Test + public void shouldRejectCounterReadInSelect() + { + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl6 WHERE k=0;\n" + + "COMMIT TRANSACTION"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", "at [2:3]")); } @Test From 980f1963f346f55e29d9fe18f85da2d01b183e8b Mon Sep 17 00:00:00 2001 From: ci worker Date: Wed, 1 May 2024 09:12:03 -0700 Subject: [PATCH 108/340] Accord: NPE in RangeDeps.forEach patch by Benedict Elliott Smith, David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-19605 --- modules/accord | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/accord b/modules/accord index 3aaec7566e38..202e67358396 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3aaec7566e389a0037b93b748867886fb68a0fd0 +Subproject commit 202e67358396a1e413e29498bea71047bd586d06 From 1b3c3f32a4a3130a98022f14ad796190ea42f499 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Wed, 8 May 2024 12:24:33 -0500 Subject: [PATCH 109/340] post-rebase fixes, mostly around CASSANDRA-19341 and CASSANDRA-19567 --- .gitmodules | 2 +- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 60 +++++ .../cassandra/config/DatabaseDescriptor.java | 5 + .../dht/IPartitionerDependentSerializer.java | 25 +-- src/java/org/apache/cassandra/dht/Token.java | 16 ++ .../cassandra/index/accord/IndexMetrics.java | 4 +- .../io/IVersionedAsymmetricSerializer.java | 6 +- .../org/apache/cassandra/journal/Metrics.java | 6 +- .../metrics/CassandraMetricsRegistry.java | 4 + src/java/org/apache/cassandra/net/Verb.java | 8 +- .../repair/messages/SyncResponse.java | 9 +- .../service/accord/AccordJournal.java | 124 +++++++---- .../service/accord/AccordKeyspace.java | 12 +- .../service/accord/AccordService.java | 7 +- .../service/accord/async/AsyncOperation.java | 4 +- .../service/accord/async/ExecutionOrder.java | 127 ++++++----- .../migration/ConsensusRequestRouter.java | 2 +- .../cassandra/streaming/SessionSummary.java | 4 +- .../streaming/StreamDeserializingTask.java | 3 +- .../cassandra/streaming/StreamSummary.java | 20 +- .../streaming/messages/CompleteMessage.java | 3 +- .../messages/IncomingStreamMessage.java | 3 +- .../streaming/messages/KeepAliveMessage.java | 3 +- .../messages/OutgoingStreamMessage.java | 3 +- .../streaming/messages/PrepareAckMessage.java | 3 +- .../messages/PrepareSynAckMessage.java | 5 +- .../streaming/messages/PrepareSynMessage.java | 5 +- .../streaming/messages/ReceivedMessage.java | 3 +- .../messages/SessionFailedMessage.java | 3 +- .../streaming/messages/StreamInitMessage.java | 3 +- .../streaming/messages/StreamMessage.java | 7 +- .../test/tcm/RepairMetadataKeyspaceTest.java | 2 + .../test/AccordJournalSimulationTest.java | 3 +- .../config/DatabaseDescriptorRefTest.java | 4 + .../CompactionAccordIteratorsTest.java | 8 +- .../RepairMessageSerializationsTest.java | 19 +- .../cassandra/service/SerializationsTest.java | 8 +- .../service/accord/AccordTestUtils.java | 3 +- .../cassandra/service/accord/MockJournal.java | 28 +++ .../accord/SimulatedAccordCommandStore.java | 13 +- .../async/SimulatedAsyncOperationTest.java | 207 ++++++++++++++++++ .../async/StreamingInboundHandlerTest.java | 4 +- .../MetadataSnapshotListenerTest.java | 4 +- 44 files changed, 596 insertions(+), 198 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java diff --git a/.gitmodules b/.gitmodules index 60a9510e7ad5..616dacf610a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = ../cassandra-accord.git + url = https://github.com/apache/cassandra-accord.git branch = trunk diff --git a/modules/accord b/modules/accord index 202e67358396..256b35e27d17 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 202e67358396a1e413e29498bea71047bd586d06 +Subproject commit 256b35e27d170db9fcd8024d5678b4f6e9d3a956 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index d6fb1a5011fc..b035b0b9b595 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -18,6 +18,8 @@ package org.apache.cassandra.config; +import com.fasterxml.jackson.annotation.JsonIgnore; +import org.apache.cassandra.journal.Params; import org.apache.cassandra.service.consensus.TransactionalMode; public class AccordSpec @@ -71,4 +73,62 @@ public enum TransactionalRangeMigration public TransactionalMode default_transactional_mode = TransactionalMode.off; public boolean ephemeralReadEnabled = false; public boolean state_cache_listener_jfr_enabled = true; + public final JournalSpec journal = new JournalSpec(); + + public static class JournalSpec implements Params + { + public int segmentSize = 32 << 20; + public FailurePolicy failurePolicy = FailurePolicy.STOP; + public FlushMode flushMode = FlushMode.BATCH; + public DurationSpec.IntMillisecondsBound flushPeriod; // pulls default from 'commitlog_sync_period' + public DurationSpec.IntMillisecondsBound periodicFlushLagBlock = new DurationSpec.IntMillisecondsBound("1500ms"); + + @Override + public int segmentSize() + { + return segmentSize; + } + + @Override + public FailurePolicy failurePolicy() + { + return failurePolicy; + } + + @Override + public FlushMode flushMode() + { + return flushMode; + } + + @JsonIgnore + @Override + public int flushPeriodMillis() + { + return flushPeriod == null ? DatabaseDescriptor.getCommitLogSyncPeriod() + : flushPeriod.toMilliseconds(); + } + + @JsonIgnore + @Override + public int periodicFlushLagBlock() + { + return periodicFlushLagBlock.toMilliseconds(); + } + + /** + * This is required by the journal, but we don't have multiple versions, so block it from showing up, so we don't need to worry about maintaining it + */ + @JsonIgnore + @Override + public int userVersion() + { + /* + * NOTE: when accord journal version gets bumped, expose it via yaml. + * This way operators can force previous version on upgrade, temporarily, + * to allow easier downgrades if something goes wrong. + */ + return 1; + } + } } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index b026ad5286e9..202469fbff93 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -3661,6 +3661,11 @@ public static boolean paxoTopologyRepairStrictEachQuorum() return conf.paxos_topology_repair_strict_each_quorum; } + public static AccordSpec getAccord() + { + return conf.accord; + } + public static AccordSpec.TransactionalRangeMigration getTransactionalRangeMigration() { return conf.accord.range_migration; diff --git a/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java b/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java index 5c75788c0b97..a70eb8377124 100644 --- a/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java +++ b/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java @@ -19,8 +19,8 @@ import java.io.IOException; +import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; /** * Versioned serializer where the serialization depends on partitioner. @@ -28,18 +28,8 @@ * On serialization the partitioner is given by the entity being serialized. To deserialize the partitioner used must * be known to the calling method. */ -public interface IPartitionerDependentSerializer +public interface IPartitionerDependentSerializer extends IVersionedSerializer { - /** - * Serialize the specified type into the specified DataOutputStream instance. - * - * @param t type that needs to be serialized - * @param out DataOutput into which serialization needs to happen. - * @param version protocol version - * @throws java.io.IOException if serialization fails - */ - public void serialize(T t, DataOutputPlus out, int version) throws IOException; - /** * Deserialize into the specified DataInputStream instance. * @param in DataInput from which deserialization needs to happen. @@ -51,11 +41,8 @@ public interface IPartitionerDependentSerializer */ public T deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException; - /** - * Calculate serialized size of object without actually serializing. - * @param t object to calculate serialized size - * @param version protocol version - * @return serialized size of object t - */ - public long serializedSize(T t, int version); + default T deserialize(DataInputPlus in, int version) throws IOException + { + return deserialize(in, null, version); + } } diff --git a/src/java/org/apache/cassandra/dht/Token.java b/src/java/org/apache/cassandra/dht/Token.java index 1df7171f9c95..b3fee2a0807b 100644 --- a/src/java/org/apache/cassandra/dht/Token.java +++ b/src/java/org/apache/cassandra/dht/Token.java @@ -20,6 +20,12 @@ import java.io.IOException; import java.io.Serializable; import java.nio.ByteBuffer; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import com.google.common.collect.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.db.TypeSizes; @@ -35,6 +41,8 @@ public abstract class Token implements RingPosition, Serializable { + private static final Logger logger = LoggerFactory.getLogger(Token.class); + private static final long serialVersionUID = 1L; public static final TokenSerializer serializer = new TokenSerializer(); @@ -178,11 +186,17 @@ public long serializedSize(Token object, int version) } } + public static volatile boolean logPartitioner = false; + public static final Set> serializePartitioners = Sets.newSetFromMap(new ConcurrentHashMap<>()); + public static final Set> deserializePartitioners = Sets.newSetFromMap(new ConcurrentHashMap<>()); + public static class CompactTokenSerializer implements IPartitionerDependentSerializer { public void serialize(Token token, DataOutputPlus out, int version) throws IOException { IPartitioner p = token.getPartitioner(); + if (logPartitioner && serializePartitioners.add(p.getClass())) + logger.debug("Serializing token with partitioner " + p); if (!p.isFixedLength()) out.writeUnsignedVInt32(p.getTokenFactory().byteSize(token)); p.getTokenFactory().serialize(token, out); @@ -191,6 +205,8 @@ public void serialize(Token token, DataOutputPlus out, int version) throws IOExc public Token deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException { int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); + if (logPartitioner && deserializePartitioners.add(p.getClass())) + logger.debug("Deserializing token with partitioner " + p); byte[] bytes = new byte[size]; in.readFully(bytes); return p.getTokenFactory().fromByteArray(ByteBuffer.wrap(bytes)); diff --git a/src/java/org/apache/cassandra/index/accord/IndexMetrics.java b/src/java/org/apache/cassandra/index/accord/IndexMetrics.java index 29560822350f..d992c8a16b76 100644 --- a/src/java/org/apache/cassandra/index/accord/IndexMetrics.java +++ b/src/java/org/apache/cassandra/index/accord/IndexMetrics.java @@ -30,7 +30,7 @@ // Stolen from org.apache.cassandra.index.sai.metrics.AbstractMetrics public class IndexMetrics { - private static final String TYPE = "RouteIndex"; + public static final String TYPE = "RouteIndex"; private static final String SCOPE = "IndexMetrics"; private final List tracked = new ArrayList<>(); @@ -61,7 +61,7 @@ private CassandraMetricsRegistry.MetricName createMetricName(String name) { metricScope += '.' + indexName; } - metricScope += '.' + SCOPE + '.' + name; + metricScope += '.' + SCOPE; CassandraMetricsRegistry.MetricName metricName = new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, TYPE, name, metricScope, createMBeanName(name, SCOPE)); diff --git a/src/java/org/apache/cassandra/io/IVersionedAsymmetricSerializer.java b/src/java/org/apache/cassandra/io/IVersionedAsymmetricSerializer.java index 8ad2c285c326..ff89110e3371 100644 --- a/src/java/org/apache/cassandra/io/IVersionedAsymmetricSerializer.java +++ b/src/java/org/apache/cassandra/io/IVersionedAsymmetricSerializer.java @@ -32,7 +32,7 @@ public interface IVersionedAsymmetricSerializer * @param version protocol version * @throws IOException if serialization fails */ - public void serialize(In t, DataOutputPlus out, int version) throws IOException; + void serialize(In t, DataOutputPlus out, int version) throws IOException; /** * Deserialize into the specified DataInputStream instance. @@ -41,7 +41,7 @@ public interface IVersionedAsymmetricSerializer * @return the type that was deserialized * @throws IOException if deserialization fails */ - public Out deserialize(DataInputPlus in, int version) throws IOException; + Out deserialize(DataInputPlus in, int version) throws IOException; /** * Calculate serialized size of object without actually serializing. @@ -49,5 +49,5 @@ public interface IVersionedAsymmetricSerializer * @param version protocol version * @return serialized size of object t */ - public long serializedSize(In t, int version); + long serializedSize(In t, int version); } diff --git a/src/java/org/apache/cassandra/journal/Metrics.java b/src/java/org/apache/cassandra/journal/Metrics.java index befc3c2ddb32..4bca57c77cae 100644 --- a/src/java/org/apache/cassandra/journal/Metrics.java +++ b/src/java/org/apache/cassandra/journal/Metrics.java @@ -23,8 +23,10 @@ import org.apache.cassandra.metrics.DefaultNameFactory; import org.apache.cassandra.metrics.MetricNameFactory; -final class Metrics +public final class Metrics { + public static final String TYPE_NAME = "Journal"; + private static final String WAITING_ON_FLUSH = "WaitingOnFlush"; private static final String WAITING_ON_ALLOCATION = "WaitingOnSegmentAllocation"; private static final String WRITTEN_ENTRIES = "WrittenEntries"; @@ -49,7 +51,7 @@ final class Metrics Metrics(String name) { - this.factory = new DefaultNameFactory("Journal", name); + this.factory = new DefaultNameFactory(TYPE_NAME, name); } void register(Flusher flusher) diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 8cf83f520870..b11474875916 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -114,6 +114,8 @@ public class CassandraMetricsRegistry extends MetricRegistry // for virtual tables. metricGroups = ImmutableSet.builder() .add(AbstractMetrics.TYPE) + .add(AccordMetrics.ACCORD_COORDINATOR) + .add(AccordMetrics.ACCORD_REPLICA) .add(BatchMetrics.TYPE_NAME) .add(BufferPoolMetrics.TYPE_NAME) .add(CIDRAuthorizerMetrics.TYPE_NAME) @@ -130,8 +132,10 @@ public class CassandraMetricsRegistry extends MetricRegistry .add(DroppedMessageMetrics.TYPE) .add(HintedHandoffMetrics.TYPE_NAME) .add(HintsServiceMetrics.TYPE_NAME) + .add(org.apache.cassandra.index.accord.IndexMetrics.TYPE) .add(InternodeInboundMetrics.TYPE_NAME) .add(InternodeOutboundMetrics.TYPE_NAME) + .add(org.apache.cassandra.journal.Metrics.TYPE_NAME) .add(KeyspaceMetrics.TYPE_NAME) .add(MemtablePool.TYPE_NAME) .add(MessagingMetrics.TYPE_NAME) diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 89b30f88183f..adaa602a9af1 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -346,15 +346,15 @@ public enum Verb ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), - ACCORD_APPLY_AND_WAIT_REQ (152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, () -> AccordService.instance().verbHandler(), ACCORD_READ_RSP), + ACCORD_APPLY_AND_WAIT_REQ (152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP), CONSENSUS_KEY_MIGRATION (153, P1, writeTimeout, MUTATION, () -> ConsensusKeyMigrationFinished.serializer,() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), ACCORD_INTEROP_READ_RSP (154, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.replySerializer, RESPONSE_HANDLER), - ACCORD_INTEROP_READ_REQ (155, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.requestSerializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_RSP), - ACCORD_INTEROP_COMMIT_REQ (156, P2, writeTimeout, IMMEDIATE, () -> AccordInteropCommit.serializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_READ_REQ (155, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.requestSerializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_COMMIT_REQ (156, P2, writeTimeout, IMMEDIATE, () -> AccordInteropCommit.serializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_RSP), ACCORD_INTEROP_READ_REPAIR_RSP (157, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.replySerializer, RESPONSE_HANDLER), - ACCORD_INTEROP_READ_REPAIR_REQ (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, () -> AccordService.instance().verbHandler(), ACCORD_INTEROP_READ_REPAIR_RSP), + ACCORD_INTEROP_READ_REPAIR_REQ (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_REPAIR_RSP), ACCORD_INTEROP_APPLY_REQ (160, P2, writeTimeout, IMMEDIATE, () -> AccordInteropApply.serializer, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP), // generic failure response diff --git a/src/java/org/apache/cassandra/repair/messages/SyncResponse.java b/src/java/org/apache/cassandra/repair/messages/SyncResponse.java index 0c528a379640..e7b5446badb5 100644 --- a/src/java/org/apache/cassandra/repair/messages/SyncResponse.java +++ b/src/java/org/apache/cassandra/repair/messages/SyncResponse.java @@ -24,7 +24,7 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.dht.IPartitionerDependentSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; @@ -79,7 +79,7 @@ public int hashCode() return Objects.hash(desc, success, nodes, summaries); } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final IPartitionerDependentSerializer serializer = new IPartitionerDependentSerializer() { public void serialize(SyncResponse message, DataOutputPlus out, int version) throws IOException { @@ -94,7 +94,8 @@ public void serialize(SyncResponse message, DataOutputPlus out, int version) thr } } - public SyncResponse deserialize(DataInputPlus in, int version) throws IOException + @Override + public SyncResponse deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException { RepairJobDesc desc = RepairJobDesc.serializer.deserialize(in, version); SyncNodePair nodes = SyncNodePair.serializer.deserialize(in, version); @@ -104,7 +105,7 @@ public SyncResponse deserialize(DataInputPlus in, int version) throws IOExceptio List summaries = new ArrayList<>(numSummaries); for (int i=0; i keyCRCBytes = ThreadLocal.withInitial(() -> new byte[21]); - static final Params PARAMS = new Params() - { - @Override - public int segmentSize() - { - return 32 << 20; - } - - @Override - public FailurePolicy failurePolicy() - { - return FailurePolicy.STOP; - } - - @Override - public FlushMode flushMode() - { - return FlushMode.BATCH; - } - - @Override - public int flushPeriodMillis() - { - return DatabaseDescriptor.getCommitLogSyncPeriod(); - } - - @Override - public int periodicFlushLagBlock() - { - return 1500; - } - - @Override - public int userVersion() - { - /* - * NOTE: when accord journal version gets bumped, expose it via yaml. - * This way operators can force previous version on upgrade, temporarily, - * to allow easier downgrades if something goes wrong. - */ - return 1; - } - }; - private final File directory; private final Journal journal; private final AccordEndpointMapper endpointMapper; @@ -219,10 +176,10 @@ enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } private final FrameApplicator frameApplicator = new FrameApplicator(); @VisibleForTesting - public AccordJournal(AccordEndpointMapper endpointMapper) + public AccordJournal(AccordEndpointMapper endpointMapper, Params params) { this.directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); - this.journal = new Journal<>("AccordJournal", directory, PARAMS, new JournalCallbacks(), Key.SUPPORT, RECORD_SERIALIZER); + this.journal = new Journal<>("AccordJournal", directory, params, new JournalCallbacks(), Key.SUPPORT, RECORD_SERIALIZER); this.endpointMapper = endpointMapper; } @@ -969,6 +926,22 @@ public enum Type implements ValueSerializer } } msgTypeToSynonymousTypesMap = ImmutableListMultimap.copyOf(msgTypeToSynonymousTypes); + + //TODO (now): enable as this shows we are currently missing a message +// IllegalStateException e = null; +// for (MessageType t : MessageType.values) +// { +// if (!t.hasSideEffects()) continue; +// Type matches = msgTypeToTypeMap.get(t); +// if (matches == null) +// { +// IllegalStateException ise = new IllegalStateException("Missing MessageType " + t); +// if (e == null) e = ise; +// else e.addSuppressed(ise); +// } +// } +// if (e != null) +// throw e; } static Type fromId(int id) @@ -1164,7 +1137,7 @@ private void doRun() while (null != (request = unframedRequests.poll())) { long waitForEpoch = request.waitForEpoch; - if (!node.topology().hasEpoch(waitForEpoch)) + if (waitForEpoch != 0 && !node.topology().hasEpoch(waitForEpoch)) { delayedRequests.computeIfAbsent(waitForEpoch, ignore -> new ArrayList<>()).add(request); if (!waitForEpochs.containsLong(waitForEpoch)) @@ -1394,6 +1367,12 @@ private MessageProvider(TxnId txnId) this.txnId = txnId; } + @Override + public TxnId txnId() + { + return txnId; + } + @Override public Set test(Set messages) { @@ -1464,6 +1443,12 @@ public Commit stableFastPath() return readMessage(txnId, STABLE_FAST_PATH_REQ, Commit.class); } + @Override + public Commit stableSlowPath() + { + return readMessage(txnId, STABLE_SLOW_PATH_REQ, Commit.class); + } + @Override public Commit stableMaximal() { @@ -1493,6 +1478,18 @@ public Propagate propagateApply() { return readMessage(txnId, PROPAGATE_APPLY_MSG, Propagate.class); } + + @Override + public Propagate propagateOther() + { + return readMessage(txnId, PROPAGATE_OTHER_MSG, Propagate.class); + } + + @Override + public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() + { + return readMessage(txnId, APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, ApplyThenWaitUntilApplied.class); + } } private final class LoggingMessageProvider implements SerializerSupport.MessageProvider @@ -1506,6 +1503,12 @@ private final class LoggingMessageProvider implements SerializerSupport.MessageP this.provider = provider; } + @Override + public TxnId txnId() + { + return txnId; + } + @Override public Set test(Set messages) { @@ -1587,6 +1590,15 @@ public Commit stableFastPath() return commit; } + @Override + public Commit stableSlowPath() + { + logger.debug("Fetching {} message for {}", STABLE_SLOW_PATH_REQ, txnId); + Commit commit = provider.stableSlowPath(); + logger.debug("Fetched {} message for {}: {}", STABLE_SLOW_PATH_REQ, txnId, commit); + return commit; + } + @Override public Commit stableMaximal() { @@ -1631,5 +1643,23 @@ public Propagate propagateApply() logger.debug("Fetched {} message for {}: {}", PROPAGATE_APPLY_MSG, txnId, propagate); return propagate; } + + @Override + public Propagate propagateOther() + { + logger.debug("Fetching {} message for {}", PROPAGATE_OTHER_MSG, txnId); + Propagate propagate = provider.propagateOther(); + logger.debug("Fetched {} message for {}: {}", PROPAGATE_OTHER_MSG, txnId, propagate); + return propagate; + } + + @Override + public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() + { + logger.debug("Fetching {} message for {}", APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, txnId); + ApplyThenWaitUntilApplied apply = provider.applyThenWaitUntilApplied(); + logger.debug("Fetched {} message for {}: {}", APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, txnId, apply); + return apply; + } } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 7138eb9ab2d9..5298670e1337 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -250,6 +250,7 @@ static TokenType valueOf(Token token) + format("execute_at %s,", TIMESTAMP_TUPLE) + format("promised_ballot %s,", TIMESTAMP_TUPLE) + format("accepted_ballot %s,", TIMESTAMP_TUPLE) + + format("execute_atleast %s,", TIMESTAMP_TUPLE) + "waiting_on blob," + "listeners set, " + "PRIMARY KEY((store_id, domain, txn_id))" @@ -298,6 +299,7 @@ public static class CommandsColumns public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); + static final ColumnMetadata execute_atleast = getColumn(Commands, "execute_atleast"); static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); static final ColumnMetadata listeners = getColumn(Commands, "listeners"); @@ -857,6 +859,8 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.accepted_ballot, Command::acceptedOrCommitted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); + if (command.txnId().kind().awaitsOnlyDeps()) + addCellIfModified(CommandsColumns.execute_atleast, Command::executesAtLeast, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); if (command.isStable() && !command.isTruncated()) { @@ -1230,11 +1234,12 @@ static Command unsafeLoadCommand(AccordCommandStore commandStore, TxnId txnId) Timestamp executeAt = deserializeExecuteAtOrNull(row); Ballot promised = deserializePromisedOrNull(row); Ballot accepted = deserializeAcceptedOrNull(row); + Timestamp executeAtLeast = status.is(Status.Truncated) && txnId.kind().awaitsOnlyDeps() ? deserializeExecuteAtLeastOrNull(row) : null; WaitingOnProvider waitingOn = deserializeWaitingOn(txnId, row); MessageProvider messages = commandStore.makeMessageProvider(txnId); - return SerializerSupport.reconstruct(commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, promised, accepted, waitingOn, messages); + return SerializerSupport.reconstruct(commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, executeAtLeast, promised, accepted, waitingOn, messages); } catch (Throwable t) { @@ -1307,6 +1312,11 @@ public static Timestamp deserializeExecuteAtOrNull(UntypedResultSet.Row row) return deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); } + public static Timestamp deserializeExecuteAtLeastOrNull(UntypedResultSet.Row row) + { + return deserializeTimestampOrNull(row, "execute_atleast", Timestamp::fromBits); + } + public static Ballot deserializePromisedOrNull(UntypedResultSet.Row row) { return deserializeTimestampOrNull(row.getBlob("promised_ballot"), Ballot::fromBits); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index a248344e8dae..3ee157e4368c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -43,6 +43,7 @@ import accord.primitives.SyncPoint; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; @@ -315,7 +316,7 @@ private AccordService(Id localId) this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); this.configuration = new AccordConfiguration(DatabaseDescriptor.getRawConfig()); - this.journal = new AccordJournal(configService); + this.journal = new AccordJournal(configService, DatabaseDescriptor.getAccord().journal); this.node = new Node(localId, messageSink, this::handleLocalRequest, @@ -447,7 +448,7 @@ private static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean glo private long doWithRetries(LongSupplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException { // Since we could end up having the barrier transaction or the transaction it listens to invalidated - CoordinationFailed existingFailures = null; + RuntimeException existingFailures = null; Long success = null; long backoffMillis = 0; for (int attempt = 0; attempt < retryAttempts; attempt++) @@ -468,7 +469,7 @@ private long doWithRetries(LongSupplier action, int retryAttempts, long initialB success = action.getAsLong(); break; } - catch (CoordinationFailed newFailures) + catch (RequestExecutionException | CoordinationFailed newFailures) { existingFailures = Throwables.merge(existingFailures, newFailures); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 51041aab58df..72e673f48bb8 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -215,7 +215,7 @@ private void fail(Throwable throwable) commandStore.abortCurrentOperation(); case LOADING: context.releaseResources(commandStore); - commandStore.executionOrder().unregister(this); + commandStore.executionOrder().unregisterOutOfOrder(this); case INITIALIZED: break; // nothing to clean up, call callback } @@ -239,6 +239,8 @@ protected void runInternal() default: throw new IllegalStateException("Unexpected state " + state); case INITIALIZED: canRun = commandStore.executionOrder().register(this); + if (Invariants.isParanoid()) + Invariants.checkState(canRun.booleanValue() == commandStore.executionOrder().canRun(this), "Register of %s returned canRun=%s but canRun returned %s!", this, canRun, !canRun); state(LOADING); case LOADING: if (null == canRun) diff --git a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java index 03527f65539c..715703acdee6 100644 --- a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java +++ b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.IdentityHashMap; import java.util.List; +import java.util.function.Consumer; import accord.api.Key; import accord.api.RoutingKey; @@ -82,31 +83,9 @@ boolean canRun(AsyncOperation operation) } } - Conflicts remove(AsyncOperation operation) + Conflicts remove(AsyncOperation operation, boolean allowOutOfOrder) { - if (operationOrQueue instanceof AsyncOperation) - { - Invariants.checkState(operationOrQueue == operation); - rangeQueues.remove(range); - } - else - { - @SuppressWarnings("unchecked") - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - AsyncOperation head = queue.poll(); - Invariants.checkState(head == operation); - - if (queue.isEmpty()) - { - rangeQueues.remove(range); - } - else - { - head = queue.peek(); - if (canRun(head)) - head.onUnblocked(); - } - } + unregister("range", range, operationOrQueue, operation, allowOutOfOrder, () -> rangeQueues.remove(range)); return operationToConflicts.remove(operation); } @@ -182,20 +161,12 @@ class Result result.rangeConflicts.add(e.getKey()); } RangeState state = e.getValue(); - Object operationOrQueue = state.operationOrQueue; - if (operationOrQueue instanceof AsyncOperation) - { - ArrayDeque> queue = new ArrayDeque<>(4); - queue.add((AsyncOperation) operationOrQueue); - queue.add(operation); - state.operationOrQueue = queue; - } - else - { - @SuppressWarnings("unchecked") - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - queue.add(operation); - } + // a single range could conflict with multiple other ranges, so it is possible that the operation + // exists in the queue already due to another range in the txn... simple example is + // keys = (0, 10], (12, 15] + // e.getKey() == (-100, 100] + // in this case the operation would attempt to double add since it has 2 keys that conflict with this single range + register(state.operationOrQueue, operation, q -> state.operationOrQueue = q); }); if (result.sameRange != null) { @@ -205,7 +176,7 @@ class Result { rangeQueues.add(range, new RangeState(range, keyConflicts, result.rangeConflicts, operation)); } - return keyConflicts == null && result.rangeConflicts == null; + return keyConflicts == null && result.rangeConflicts == null && result.sameRange == null; } /** @@ -221,12 +192,19 @@ private boolean register(Object keyOrTxnId, AsyncOperation operation) return true; } + register(operationOrQueue, operation, q -> queues.put(keyOrTxnId, q)); + return false; + } + + private void register(Object operationOrQueue, AsyncOperation operation, Consumer>> onCreateQueue) + { if (operationOrQueue instanceof AsyncOperation) { + Invariants.checkState(operationOrQueue != operation, "Attempted to double register operation %s", operation); ArrayDeque> queue = new ArrayDeque<>(4); queue.add((AsyncOperation) operationOrQueue); queue.add(operation); - queues.put(keyOrTxnId, queue); + onCreateQueue.accept(queue); } else { @@ -234,23 +212,35 @@ private boolean register(Object keyOrTxnId, AsyncOperation operation) ArrayDeque> queue = (ArrayDeque>) operationOrQueue; queue.add(operation); } - return false; + } + + /** + * Unregister the operation as being a dependency for its keys and TxnIds, but do so even if it is unable to run now. + */ + void unregisterOutOfOrder(AsyncOperation operation) + { + unregister(operation, true); } /** * Unregister the operation as being a dependency for its keys and TxnIds */ void unregister(AsyncOperation operation) + { + unregister(operation, false); + } + + private void unregister(AsyncOperation operation, boolean allowOutOfOrder) { for (Seekable seekable : operation.keys()) { switch (seekable.domain()) { case Key: - unregister(seekable.asKey(), operation); + unregister(seekable.asKey(), operation, allowOutOfOrder); break; case Range: - unregister(seekable.asRange(), operation); + unregister(seekable.asRange(), operation, allowOutOfOrder); break; default: throw new AssertionError("Unexpected domain: " + seekable.domain()); @@ -259,48 +249,69 @@ void unregister(AsyncOperation operation) } TxnId primaryTxnId = operation.primaryTxnId(); if (null != primaryTxnId) - unregister(primaryTxnId, operation); + unregister(primaryTxnId, operation, allowOutOfOrder); } - private void unregister(Range range, AsyncOperation operation) + private void unregister(Range range, AsyncOperation operation, boolean allowOutOfOrder) { RangeState state = state(range); - Conflicts conflicts = state.remove(operation); + Conflicts conflicts = state.remove(operation, allowOutOfOrder); if (conflicts.rangeConflicts != null) - conflicts.rangeConflicts.forEach(r -> state(r).remove(operation)); + conflicts.rangeConflicts.forEach(r -> state(r).remove(operation, allowOutOfOrder)); if (conflicts.keyConflicts != null) - conflicts.keyConflicts.forEach(k -> unregister(k, operation)); + conflicts.keyConflicts.forEach(k -> unregister(k, operation, allowOutOfOrder)); } /** * Unregister the operation as being a dependency for key or TxnId */ - private void unregister(Object keyOrTxnId, AsyncOperation operation) + private void unregister(Object keyOrTxnId, AsyncOperation operation, boolean allowOutOfOrder) { Object operationOrQueue = queues.get(keyOrTxnId); Invariants.nonNull(operationOrQueue); + unregister("Key or TxnId", keyOrTxnId, operationOrQueue, operation, allowOutOfOrder, () -> queues.remove(keyOrTxnId)); + } + + private void unregister(String name, Object key, Object operationOrQueue, AsyncOperation operation, boolean allowOutOfOrder, Runnable onEmpty) + { if (operationOrQueue instanceof AsyncOperation) { - Invariants.checkState(operationOrQueue == operation); - queues.remove(keyOrTxnId); + Invariants.checkState(operationOrQueue == operation, "Only single operation present and was not %s; %s %s", name, key); + onEmpty.run(); } else { @SuppressWarnings("unchecked") ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - AsyncOperation head = queue.poll(); - Invariants.checkState(head == operation); + if (allowOutOfOrder) + { + Invariants.checkState(queue.remove(operation), "Operation %s was not found in queue: %s; %s %s", operation, queue, name, key); + } + else + { + Invariants.checkState(queue.peek() == operation, "Operation %s is not at the top of the queue; %s; %s %s", operation, queue, name, key); + queue.poll(); + } if (queue.isEmpty()) { - queues.remove(keyOrTxnId); + onEmpty.run(); } else { - head = queue.peek(); - if (canRun(head)) - head.onUnblocked(); + AsyncOperation next = queue.peek(); + if (next == operation) + { + // a single range could conflict with multiple other ranges, so it is possible that the operation + // exists in the queue already due to another range in the txn... simple example is + // keys = (0, 10], (12, 15] + // e.getKey() == (-100, 100] + // in this case the operation would attempt to double add since it has 2 keys that conflict with this single range + return; + } + if (canRun(next)) + next.onUnblocked(); } } } @@ -357,7 +368,7 @@ private boolean canRun(Range range, AsyncOperation operation) private RangeState state(Range range) { List list = rangeQueues.get(range); - assert list.size() == 1 : String.format("Expected 1 element but saw list %s", list); + assert list.size() == 1 : String.format("Expected 1 element for range %s but saw list %s", range, list); return list.get(0); } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java index 1188076c9024..70c2c396609a 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java @@ -121,7 +121,7 @@ public boolean shouldWriteThroughAccordAndMaybeMigrate(@Nonnull DecoratedKey key ClusterMetadata cm = ClusterMetadata.current(); TableMetadata metadata = cm.schema.getTableMetadata(tableId); if (metadata == null) - throw new IllegalStateException("Can't route consensus request for nonexistent table %s".format(tableId.toString())); + throw new IllegalStateException(String.format("Can't route consensus request for nonexistent table %s", tableId)); if (!mayWriteThroughAccord(metadata)) return false; diff --git a/src/java/org/apache/cassandra/streaming/SessionSummary.java b/src/java/org/apache/cassandra/streaming/SessionSummary.java index f5bcfa31be40..8bb1a1eb819e 100644 --- a/src/java/org/apache/cassandra/streaming/SessionSummary.java +++ b/src/java/org/apache/cassandra/streaming/SessionSummary.java @@ -108,14 +108,14 @@ public SessionSummary deserialize(DataInputPlus in, IPartitioner partitioner, in List receivingSummaries = new ArrayList<>(numRcvd); for (int i=0; i sendingSummaries = new ArrayList<>(numRcvd); for (int i=0; i serializer = new StreamSummarySerializer(); + public static final IVersionedSerializer serializer = new StreamSummarySerializer(); public final TableId tableId; public final List> ranges; @@ -86,25 +87,34 @@ public String toString() return sb.toString(); } - public static class StreamSummarySerializer implements IPartitionerDependentSerializer + public static class StreamSummarySerializer implements IVersionedSerializer { public void serialize(StreamSummary summary, DataOutputPlus out, int version) throws IOException { summary.tableId.serialize(out); out.writeInt(summary.files); out.writeLong(summary.totalSize); + Token.logPartitioner = true; if (version >= MessagingService.VERSION_51) CollectionSerializers.serializeCollection(summary.ranges, out, version, Range.rangeSerializer); + Token.logPartitioner = false; } - public StreamSummary deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException + public StreamSummary deserialize(DataInputPlus in, int version) throws IOException { TableId tableId = TableId.deserialize(in); + int files = in.readInt(); long totalSize = in.readLong(); List> ranges = ImmutableList.of(); if (version >= MessagingService.VERSION_51) + { + TableMetadata tableMetadata = Schema.instance.getTableMetadata(tableId); + IPartitioner p = tableMetadata != null ? tableMetadata.partitioner : IPartitioner.global(); + Token.logPartitioner = true; ranges = CollectionSerializers.deserializeList(in, p, version, Range.rangeSerializer); + Token.logPartitioner = false; + } return new StreamSummary(tableId, ranges, files, totalSize); } diff --git a/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java b/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java index 86620c38594d..afb1c6c7b478 100644 --- a/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/CompleteMessage.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.streaming.messages; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingDataOutputPlus; @@ -26,7 +25,7 @@ public class CompleteMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public CompleteMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) + public CompleteMessage deserialize(DataInputPlus in, int version) { return new CompleteMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java index 4ee726ee83b1..e48d115e35d2 100644 --- a/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/IncomingStreamMessage.java @@ -21,7 +21,6 @@ import java.util.Objects; import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.IncomingStream; import org.apache.cassandra.streaming.StreamManager; @@ -34,7 +33,7 @@ public class IncomingStreamMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public IncomingStreamMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException + public IncomingStreamMessage deserialize(DataInputPlus input, int version) throws IOException { StreamMessageHeader header = StreamMessageHeader.serializer.deserialize(input, version); StreamSession session = StreamManager.instance.findSession(header.sender, header.planId, header.sessionIndex, header.sendByFollower); diff --git a/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java b/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java index 928783f4014a..42be1e99a1fd 100644 --- a/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/KeepAliveMessage.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.streaming.messages; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingDataOutputPlus; @@ -38,7 +37,7 @@ public String toString() public static Serializer serializer = new Serializer() { - public KeepAliveMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) + public KeepAliveMessage deserialize(DataInputPlus in, int version) { return new KeepAliveMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java index dcd3b755e8ab..b83d7863fc1d 100644 --- a/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/OutgoingStreamMessage.java @@ -21,7 +21,6 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.streaming.OutgoingStream; @@ -33,7 +32,7 @@ public class OutgoingStreamMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public OutgoingStreamMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) + public OutgoingStreamMessage deserialize(DataInputPlus in, int version) { throw new UnsupportedOperationException("Not allowed to call deserialize on an outgoing stream"); } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java index 72d61d29cb2b..f93b5afe3092 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareAckMessage.java @@ -20,7 +20,6 @@ import java.io.IOException; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingDataOutputPlus; @@ -34,7 +33,7 @@ public void serialize(PrepareAckMessage message, StreamingDataOutputPlus out, in //nop } - public PrepareAckMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException + public PrepareAckMessage deserialize(DataInputPlus in, int version) throws IOException { return new PrepareAckMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java index e29e651824b4..e052f4c3017b 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareSynAckMessage.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collection; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamSummary; @@ -39,12 +38,12 @@ public void serialize(PrepareSynAckMessage message, StreamingDataOutputPlus out, StreamSummary.serializer.serialize(summary, out, version); } - public PrepareSynAckMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException + public PrepareSynAckMessage deserialize(DataInputPlus input, int version) throws IOException { PrepareSynAckMessage message = new PrepareSynAckMessage(); int numSummaries = input.readInt(); for (int i = 0; i < numSummaries; i++) - message.summaries.add(StreamSummary.serializer.deserialize(input, partitioner, version)); + message.summaries.add(StreamSummary.serializer.deserialize(input, version)); return message; } diff --git a/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java b/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java index e901365e5ec3..c856f469838f 100644 --- a/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/PrepareSynMessage.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.Collection; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamRequest; import org.apache.cassandra.streaming.StreamSession; @@ -32,7 +31,7 @@ public class PrepareSynMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public PrepareSynMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException + public PrepareSynMessage deserialize(DataInputPlus input, int version) throws IOException { PrepareSynMessage message = new PrepareSynMessage(); // requests @@ -42,7 +41,7 @@ public PrepareSynMessage deserialize(DataInputPlus input, IPartitioner partition // summaries int numSummaries = input.readInt(); for (int i = 0; i < numSummaries; i++) - message.summaries.add(StreamSummary.serializer.deserialize(input, partitioner, version)); + message.summaries.add(StreamSummary.serializer.deserialize(input, version)); return message; } diff --git a/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java b/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java index c6b7a0f638aa..67559596140e 100644 --- a/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/ReceivedMessage.java @@ -19,7 +19,6 @@ import java.io.IOException; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.streaming.StreamSession; @@ -29,7 +28,7 @@ public class ReceivedMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public ReceivedMessage deserialize(DataInputPlus input, IPartitioner partitioner, int version) throws IOException + public ReceivedMessage deserialize(DataInputPlus input, int version) throws IOException { return new ReceivedMessage(TableId.deserialize(input), input.readInt()); } diff --git a/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java b/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java index f05be58aa684..7fa82d8f6770 100644 --- a/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/SessionFailedMessage.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.streaming.messages; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingDataOutputPlus; @@ -26,7 +25,7 @@ public class SessionFailedMessage extends StreamMessage { public static Serializer serializer = new Serializer() { - public SessionFailedMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) + public SessionFailedMessage deserialize(DataInputPlus in, int version) { return new SessionFailedMessage(); } diff --git a/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java b/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java index 2fd65d7dff0d..e78442334bf0 100644 --- a/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/StreamInitMessage.java @@ -20,7 +20,6 @@ import java.io.IOException; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.streaming.PreviewKind; @@ -94,7 +93,7 @@ public void serialize(StreamInitMessage message, StreamingDataOutputPlus out, in out.writeInt(message.previewKind.getSerializationVal()); } - public StreamInitMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException + public StreamInitMessage deserialize(DataInputPlus in, int version) throws IOException { InetAddressAndPort from = inetAddressAndPortSerializer.deserialize(in, version); int sessionIndex = in.readInt(); diff --git a/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java b/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java index 6e5dc08f8815..186ac3274abd 100644 --- a/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java +++ b/src/java/org/apache/cassandra/streaming/messages/StreamMessage.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.streaming.StreamingChannel; @@ -45,16 +44,16 @@ public static long serializedSize(StreamMessage message, int version) throws IOE return 1 + message.type.outSerializer.serializedSize(message, version); } - public static StreamMessage deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException + public static StreamMessage deserialize(DataInputPlus in, int version) throws IOException { Type type = Type.lookupById(in.readByte()); - return type.inSerializer.deserialize(in, partitioner, version); + return type.inSerializer.deserialize(in, version); } /** StreamMessage serializer */ public static interface Serializer { - V deserialize(DataInputPlus in, IPartitioner partitioner, int version) throws IOException; + V deserialize(DataInputPlus in, int version) throws IOException; void serialize(V message, StreamingDataOutputPlus out, int version, StreamSession session) throws IOException; long serializedSize(V message, int version) throws IOException; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/tcm/RepairMetadataKeyspaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/tcm/RepairMetadataKeyspaceTest.java index 074a64913f48..70d1acfddd14 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/tcm/RepairMetadataKeyspaceTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/tcm/RepairMetadataKeyspaceTest.java @@ -28,6 +28,7 @@ import org.junit.Test; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.ClusterUtils; @@ -63,6 +64,7 @@ public void testRepairMetadataKeyspace() throws Throwable IInvokableInstance toRepair = cluster.get(3); stopUnchecked(toRepair); + DatabaseDescriptor.clientInitialization(); String targetDir = DistributedMetadataLogKeyspace.TABLE_NAME + '-' + DistributedMetadataLogKeyspace.LOG_TABLE_ID.toHexString(); for (File datadir : getDataDirectories(toRepair)) { diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index cd98c71f1890..22dd55bb31ea 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableMap; import accord.topology.TopologyUtils; +import org.apache.cassandra.config.AccordSpec; import org.apache.cassandra.schema.*; import org.junit.Ignore; import org.junit.Test; @@ -166,7 +167,7 @@ public static class State } } private static final ExecutorPlus executor = ExecutorFactory.Global.executorFactory().pooled("name", 10); - private static final AccordJournal journal = new AccordJournal(null); + private static final AccordJournal journal = new AccordJournal(null, new AccordSpec.JournalSpec()); private static final int events = 100; private static final CountDownLatch eventsWritten = CountDownLatch.newCountDownLatch(events); private static final CountDownLatch eventsDurable = CountDownLatch.newCountDownLatch(events); diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index c58020763442..697214f76a4f 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -78,6 +78,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.auth.INetworkAuthorizer", "org.apache.cassandra.auth.IRoleManager", "org.apache.cassandra.config.AccordSpec", + "org.apache.cassandra.config.AccordSpec$JournalSpec", "org.apache.cassandra.config.AccordSpec$TransactionalRangeMigration", "org.apache.cassandra.config.CassandraRelevantProperties", "org.apache.cassandra.config.CassandraRelevantProperties$PropertyConverter", @@ -275,6 +276,9 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.io.util.PathUtils$IOToLongFunction", "org.apache.cassandra.io.util.RebufferingInputStream", "org.apache.cassandra.io.util.SpinningDiskOptimizationStrategy", + "org.apache.cassandra.journal.Params", + "org.apache.cassandra.journal.Params$FailurePolicy", + "org.apache.cassandra.journal.Params$FlushMode", "org.apache.cassandra.locator.Endpoint", "org.apache.cassandra.locator.IEndpointSnitch", "org.apache.cassandra.locator.InetAddressAndPort", diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index ee38ffceb926..5cbf0915b5a0 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -91,6 +91,7 @@ import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; +import org.assertj.core.api.Assertions; import static accord.impl.TimestampsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.KeyHistory.COMMANDS; @@ -369,9 +370,14 @@ Consumer> expectAccordCommandsNoChange() assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); Row row = (Row)partition.unfilteredIterator().next(); - assertEquals(commands.metadata().regularColumns().size(), row.columnCount()); + + // execute_atleast is null, so when we read from the scanner the column won't be present in the partition + Assertions.assertThat(new ArrayList<>(row.columns())).isEqualTo(commands.metadata().regularColumns().stream().filter(c -> !c.name.toString().equals("execute_atleast")).collect(Collectors.toList())); for (ColumnMetadata cm : commands.metadata().regularColumns()) + { + if (cm.name.toString().equals("execute_atleast")) continue; assertNotNull(row.getColumnData(cm)); + } assertEquals(TXN_ID, CommandRows.getTxnId(partitionKeyComponents)); assertEquals(SaveStatus.Applied, AccordKeyspace.CommandRows.getStatus(row)); }; diff --git a/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java b/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java index 9e8080f903cc..750c6144553d 100644 --- a/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java +++ b/test/unit/org/apache/cassandra/repair/messages/RepairMessageSerializationsTest.java @@ -25,6 +25,8 @@ import java.util.UUID; import com.google.common.collect.Lists; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.*; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -32,10 +34,7 @@ import org.apache.cassandra.CassandraTestBase; import org.apache.cassandra.CassandraTestBase.DDDaemonInitialization; import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; @@ -116,7 +115,19 @@ private T serializeRoundTrip(T msg, IVersionedSerializ buf.flip(); DataInputPlus in = new DataInputBuffer(buf, false); - T deserialized = serializer.deserialize(in, PROTOCOL_VERSION); + + T deserialized = null; + + if (serializer instanceof IPartitionerDependentSerializer) + { + IPartitionerDependentSerializer pds = (IPartitionerDependentSerializer) serializer; + deserialized = pds.deserialize(in, DatabaseDescriptor.getPartitioner(), PROTOCOL_VERSION); + } + else + { + deserialized = serializer.deserialize(in, PROTOCOL_VERSION); + } + Assert.assertEquals(msg, deserialized); Assert.assertEquals(msg.hashCode(), deserialized.hashCode()); return deserialized; diff --git a/test/unit/org/apache/cassandra/service/SerializationsTest.java b/test/unit/org/apache/cassandra/service/SerializationsTest.java index 9251b590bb6b..00864f5a3434 100644 --- a/test/unit/org/apache/cassandra/service/SerializationsTest.java +++ b/test/unit/org/apache/cassandra/service/SerializationsTest.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.UUID; import com.google.common.collect.Lists; import org.junit.AfterClass; @@ -60,6 +59,7 @@ import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.streaming.SessionSummary; import org.apache.cassandra.streaming.StreamSummary; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTrees; @@ -70,6 +70,7 @@ public class SerializationsTest extends AbstractSerializationsTester { private static PartitionerSwitcher partitionerSwitcher; + private static TableId TABLE_ID; private static TimeUUID RANDOM_UUID; private static Range FULL_RANGE; private static RepairJobDesc DESC; @@ -84,6 +85,7 @@ public static void defineSchema() throws Exception ClusterMetadataTestHelper.setInstanceForTest(); SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create("Keyspace1", KeyspaceParams.simple(3))); SchemaTestUtil.announceNewTable(TableMetadata.minimal("Keyspace1", "Standard1")); + TABLE_ID = ClusterMetadata.current().schema.getKeyspaceMetadata("Keyspace1").getTableOrViewNullable("Standard1").id(); RANDOM_UUID = TimeUUID.fromString("743325d0-4c4b-11ec-8a88-2d67081686db"); FULL_RANGE = new Range<>(Util.testPartitioner().getMinimumToken(), Util.testPartitioner().getMinimumToken()); DESC = new RepairJobDesc(RANDOM_UUID, RANDOM_UUID, "Keyspace1", "Standard1", Arrays.asList(FULL_RANGE)); @@ -223,8 +225,8 @@ private void testSyncCompleteWrite() throws IOException // sync success List summaries = new ArrayList<>(); summaries.add(new SessionSummary(src, dest, - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 5, 100)), - Lists.newArrayList(new StreamSummary(TableId.fromUUID(UUID.randomUUID()), emptyList(), 500, 10)) + Lists.newArrayList(new StreamSummary(TABLE_ID, emptyList(), 5, 100)), + Lists.newArrayList(new StreamSummary(TABLE_ID, emptyList(), 500, 10)) )); SyncResponse success = new SyncResponse(DESC, src, dest, true, summaries); // sync fail diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index a608f41f8d2c..0d3b3d005c54 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -76,6 +76,7 @@ import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.concurrent.ManualExecutor; import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.AccordSpec; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.TransactionStatement; @@ -399,7 +400,7 @@ public static AccordCommandStore createAccordCommandStore( public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; - AccordJournal journal = new AccordJournal(null); + AccordJournal journal = new AccordJournal(null, new AccordSpec.JournalSpec()); journal.start(null); SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index 8a68163ede70..dc22f540eddf 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -28,6 +28,7 @@ import accord.local.SerializerSupport; import accord.messages.Accept; import accord.messages.Apply; +import accord.messages.ApplyThenWaitUntilApplied; import accord.messages.BeginRecovery; import accord.messages.Commit; import accord.messages.Message; @@ -43,15 +44,18 @@ import static accord.messages.MessageType.ACCEPT_REQ; import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; import static accord.messages.MessageType.APPLY_MINIMAL_REQ; +import static accord.messages.MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ; import static accord.messages.MessageType.BEGIN_RECOVER_REQ; import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; import static accord.messages.MessageType.COMMIT_SLOW_PATH_REQ; import static accord.messages.MessageType.PRE_ACCEPT_REQ; import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; +import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; +import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; public class MockJournal implements IJournal { @@ -61,6 +65,12 @@ public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) { return new SerializerSupport.MessageProvider() { + @Override + public TxnId txnId() + { + return txnId; + } + @Override public Set test(Set messages) { @@ -146,6 +156,12 @@ public Commit stableFastPath() return get(STABLE_FAST_PATH_REQ); } + @Override + public Commit stableSlowPath() + { + return get(STABLE_SLOW_PATH_REQ); + } + @Override public Commit stableMaximal() { @@ -175,6 +191,18 @@ public Propagate propagateApply() { return get(PROPAGATE_APPLY_MSG); } + + @Override + public Propagate propagateOther() + { + return get(PROPAGATE_OTHER_MSG); + } + + @Override + public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() + { + return get(APPLY_THEN_WAIT_UNTIL_APPLIED_REQ); + } }; } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 6cb71b68db04..128843258ec9 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.BooleanSupplier; import java.util.function.Function; +import java.util.function.Predicate; import java.util.function.ToLongFunction; import accord.impl.SizeOfIntersectionSorter; @@ -76,7 +77,7 @@ import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.utils.AccordGenerators.fromQT; -class SimulatedAccordCommandStore implements AutoCloseable +public class SimulatedAccordCommandStore implements AutoCloseable { private final List failures = new ArrayList<>(); private final SimulatedExecutorFactory globalExecutor; @@ -90,8 +91,9 @@ class SimulatedAccordCommandStore implements AutoCloseable public final MockJournal journal; public final ScheduledExecutorPlus unorderedScheduled; public final List evictions = new ArrayList<>(); + public Predicate ignoreExceptions = ignore -> false; - SimulatedAccordCommandStore(RandomSource rs) + public SimulatedAccordCommandStore(RandomSource rs) { globalExecutor = new SimulatedExecutorFactory(accord.utilsfork.RandomSource.wrap(rs).fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); this.unorderedScheduled = globalExecutor.scheduled("ignored"); @@ -151,6 +153,13 @@ public boolean isExpired(TxnId initiated, long now) { return false; } + + @Override + public void onUncaughtException(Throwable t) + { + if (ignoreExceptions.test(t)) return; + super.onUncaughtException(t); + } }, null, ignore -> AccordTestUtils.NOOP_PROGRESS_LOG, diff --git a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java new file mode 100644 index 000000000000..6e216ff56d16 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.async; + +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.function.BooleanSupplier; + +import org.junit.Before; +import org.junit.Test; + +import accord.api.Key; +import accord.impl.basic.SimulatedFault; +import accord.local.PreLoadContext; +import accord.local.SafeCommandStore; +import accord.primitives.Keys; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.SimulatedAccordCommandStore; +import org.apache.cassandra.service.accord.SimulatedAccordCommandStoreTestBase; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public class SimulatedAsyncOperationTest extends SimulatedAccordCommandStoreTestBase +{ + @Before + public void precondition() + { + Assertions.assertThat(intTbl.partitioner).isEqualTo(Murmur3Partitioner.instance); + } + + @Test + public void happyPath() + { + qt().withExamples(100).check(rs -> test(rs, 100, intTbl, ignore -> Action.SUCCESS)); + } + + @Test + public void fuzz() + { + Gen actionGen = Gens.enums().allWithWeights(Action.class, 10, 1, 1); + qt().withExamples(100).check(rs -> test(rs, 100, intTbl, actionGen)); + } + + private static void test(RandomSource rs, int numSamples, TableMetadata tbl, Gen actionGen) throws Exception + { + AccordKeyspace.unsafeClear(); + + int numKeys = rs.nextInt(20, 1000); + long minToken = 0; + long maxToken = numKeys; + + Gen keyGen = Gens.longs().between(minToken + 1, maxToken).map(t -> new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(t)))); + + + Gen keysGen = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).map(l -> Keys.of(l)); + Gen rangesGen = Gens.lists(rangeInsideRange(tbl.id, minToken, maxToken)).uniqueBestEffort().ofSizeBetween(1, 10).map(l -> Ranges.of(l.toArray(Range[]::new))); + Gen> seekablesGen = Gens.oneOf(keysGen, rangesGen); + + try (var instance = new SimulatedAccordCommandStore(rs)) + { + instance.ignoreExceptions = t -> t instanceof SimulatedFault; + Counter counter = new Counter(); + for (int i = 0; i < numSamples; i++) + { + PreLoadContext ctx = PreLoadContext.contextFor(seekablesGen.next(rs)); + operation(instance, ctx, actionGen.next(rs), rs::nextBoolean).begin((ignore, failure) -> { + counter.counter++; + if (failure != null && !(failure instanceof SimulatedFault)) throw new AssertionError("Unexpected error", failure); + }); + } + instance.processAll(); + Assertions.assertThat(counter.counter).isEqualTo(numSamples); + } + } + + private static Gen rangeInsideRange(TableId tableId, long minToken, long maxToken) + { + if (minToken + 1 == maxToken) + { + // only one range is possible... + return Gens.constant(range(tableId, minToken, maxToken)); + } + return rs -> { + long a = rs.nextLong(minToken, maxToken + 1); + long b = rs.nextLong(minToken, maxToken + 1); + while (a == b) + b = rs.nextLong(minToken, maxToken + 1); + if (a > b) + { + long tmp = a; + a = b; + b = tmp; + } + return range(tableId, a, b); + }; + } + + private static TokenRange range(TableId tableId, long start, long end) + { + return new TokenRange(new TokenKey(tableId, new LongToken(start)), new TokenKey(tableId, new LongToken(end))); + } + + private enum Action {SUCCESS, FAILURE, LOAD_FAILURE} + + private static AsyncOperation operation(SimulatedAccordCommandStore instance, PreLoadContext ctx, Action action, BooleanSupplier delay) + { + return new SimulatedOperation(instance.store, ctx, action == Action.FAILURE ? SimulatedOperation.Action.FAILURE : SimulatedOperation.Action.SUCCESS) + { + @Override + AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext preLoadContext) + { + return new SimulatedLoader(action == SimulatedAsyncOperationTest.Action.LOAD_FAILURE ? SimulatedLoader.Action.FAILURE : SimulatedLoader.Action.SUCCESS, delay.getAsBoolean(), instance.unorderedScheduled); + } + }; + } + + private static class Counter + { + int counter = 0; + } + + private static class SimulatedOperation extends AsyncOperation + { + enum Action { SUCCESS, FAILURE} + private final Action action; + + public SimulatedOperation(AccordCommandStore commandStore, PreLoadContext preLoadContext, Action action) + { + super(commandStore, preLoadContext); + this.action = action; + } + + @Override + public Void apply(SafeCommandStore safe) + { + if (action == Action.FAILURE) + throw new SimulatedFault("Operation failed for keys " + keys()); + return null; + } + } + + private static class SimulatedLoader extends AsyncLoader + { + + enum Action { SUCCESS, FAILURE} + + private final Action action; + private boolean delay; + private final ScheduledExecutorService executor; + SimulatedLoader(Action action, boolean delay, ScheduledExecutorService executor) + { + super(null, null, null, null); + this.action = action; + this.delay = delay; + this.executor = executor; + } + + @Override + public boolean load(AsyncOperation.Context context, BiConsumer callback) + { + if (delay) + { + executor.schedule(() -> { + callback.accept(null, action == Action.FAILURE ? new SimulatedFault("Failure loading " + context) : null); + }, 1, TimeUnit.SECONDS); + delay = false; + return false; + } + if (action == Action.FAILURE) + throw new SimulatedFault("Failure loading " + context); + + return true; + } + } +} diff --git a/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java b/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java index 069d0fb58d0f..904272f7b8a1 100644 --- a/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java +++ b/test/unit/org/apache/cassandra/streaming/async/StreamingInboundHandlerTest.java @@ -30,7 +30,6 @@ import io.netty.buffer.ByteBuf; import io.netty.channel.embedded.EmbeddedChannel; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -56,7 +55,6 @@ public class StreamingInboundHandlerTest { - private NettyStreamingChannel streamingChannel; private EmbeddedChannel channel; private ByteBuf buf; @@ -125,7 +123,7 @@ public void StreamDeserializingTask_deserialize_ISM_NoSession() throws IOExcepti temp.flip(); DataInputPlus in = new DataInputBuffer(temp, false); // session not found - IncomingStreamMessage.serializer.deserialize(in, IPartitioner.global(), MessagingService.current_version); + IncomingStreamMessage.serializer.deserialize(in, MessagingService.current_version); } @Test diff --git a/test/unit/org/apache/cassandra/tcm/listeners/MetadataSnapshotListenerTest.java b/test/unit/org/apache/cassandra/tcm/listeners/MetadataSnapshotListenerTest.java index 2f962f1ab274..4c57a76c8b91 100644 --- a/test/unit/org/apache/cassandra/tcm/listeners/MetadataSnapshotListenerTest.java +++ b/test/unit/org/apache/cassandra/tcm/listeners/MetadataSnapshotListenerTest.java @@ -20,6 +20,7 @@ import java.util.Random; +import org.apache.cassandra.config.DatabaseDescriptor; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -50,7 +51,7 @@ public class MetadataSnapshotListenerTest { private static final Logger logger = LoggerFactory.getLogger(MetadataSnapshotListenerTest.class); - private IPartitioner partitioner = Murmur3Partitioner.instance; + private final IPartitioner partitioner = Murmur3Partitioner.instance; private Random r; @BeforeClass @@ -59,6 +60,7 @@ public static void disableSortedReplicaGroups() // Set this so that we don't attempt to sort the random placements as this depends on a populated // TokenMap. This is a temporary element of ClusterMetadata, at least in the current form CassandraRelevantProperties.TCM_SORT_REPLICA_GROUPS.setBoolean(false); + DatabaseDescriptor.daemonInitialization(); } @Before From 3127c558d31099e8264f32d22cbb81d90300bd04 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Mon, 13 May 2024 15:08:32 +0100 Subject: [PATCH 110/340] Move preaccept expiration logic away from Agent patch by Aleksey Yeschenko; reviewed by Alex Petrov and Benedict Elliott Smith for CASSANDRA-18888 --- modules/accord | 2 +- .../cassandra/service/accord/api/AccordAgent.java | 10 +++------- .../service/accord/SimulatedAccordCommandStore.java | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/modules/accord b/modules/accord index 256b35e27d17..d63d06aafe2e 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 256b35e27d170db9fcd8024d5678b4f6e9d3a956 +Subproject commit d63d06aafe2e60e57a9651ff6dd491175bbe6916 diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 33f8f2b088d1..9c4b678996f9 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -35,7 +35,6 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; -import accord.primitives.TxnId; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.metrics.AccordMetrics; import org.apache.cassandra.service.accord.txn.TxnQuery; @@ -114,13 +113,10 @@ public void onHandledException(Throwable t) } @Override - public boolean isExpired(TxnId initiated, long now) + public long preAcceptTimeout() { - // TODO: should distinguish between reads and writes - if (initiated.kind().isSyncPoint()) - return false; - - return now - initiated.hlc() > getReadRpcTimeout(MICROSECONDS); + // TODO: should distinguish between reads and writes (Aleksey: why? and why read rpc timeout is being used?) + return getReadRpcTimeout(MICROSECONDS); } @Override diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 128843258ec9..5e4dbbd3c92e 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -149,9 +149,9 @@ public Timestamp uniqueNow(Timestamp atLeast) new TestAgent.RethrowAgent() { @Override - public boolean isExpired(TxnId initiated, long now) + public long preAcceptTimeout() { - return false; + return Long.MAX_VALUE; } @Override From 11ed5309b5e32b470d5293f819571ae709e12538 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 17 May 2024 13:50:01 -0700 Subject: [PATCH 111/340] IndexOutOfBoundsException while serializing CommandsForKey patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-19642 --- modules/accord | 2 +- .../serializers/CommandsForKeySerializer.java | 29 ++++++++------- .../CommandsForKeySerializerTest.java | 35 +++++++++++++++++-- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/modules/accord b/modules/accord index d63d06aafe2e..21cdaf5d2809 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit d63d06aafe2e60e57a9651ff6dd491175bbe6916 +Subproject commit 21cdaf5d280965cfdc690d385375635b498bc9f9 diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index dbe2f4845f30..a81b62b4a393 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -385,15 +385,18 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) VIntCoding.writeUnsignedVInt32(unmanagedPendingCommitCount, out); VIntCoding.writeUnsignedVInt32(cfk.unmanagedCount() - unmanagedPendingCommitCount, out); Unmanaged.Pending pending = unmanagedPendingCommitCount == 0 ? Unmanaged.Pending.APPLY : Unmanaged.Pending.COMMIT; - for (int i = 0 ; i < cfk.unmanagedCount() ; ++i) { - Unmanaged unmanaged = cfk.getUnmanaged(i); - Invariants.checkState(unmanaged.pending == pending); - CommandSerializers.txnId.serialize(unmanaged.txnId, out, ByteBufferAccessor.instance, out.position()); - out.position(out.position() + CommandSerializers.txnId.serializedSize()); - CommandSerializers.timestamp.serialize(unmanaged.waitingUntil, out, ByteBufferAccessor.instance, out.position()); - out.position(out.position() + CommandSerializers.timestamp.serializedSize()); - if (--unmanagedPendingCommitCount == 0) pending = Unmanaged.Pending.APPLY; + int offset = 0; + for (int i = 0 ; i < cfk.unmanagedCount() ; ++i) + { + Unmanaged unmanaged = cfk.getUnmanaged(i); + Invariants.checkState(unmanaged.pending == pending); + + offset += CommandSerializers.txnId.serialize(unmanaged.txnId, out, ByteBufferAccessor.instance, offset); + offset += CommandSerializers.timestamp.serialize(unmanaged.waitingUntil, out, ByteBufferAccessor.instance, offset); + if (--unmanagedPendingCommitCount == 0) pending = Unmanaged.Pending.APPLY; + } + out.position(out.position() + offset); } if ((executeAtCount | missingIdCount) > 0) @@ -610,15 +613,17 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) { unmanageds = new Unmanaged[unmanagedCount]; Unmanaged.Pending pending = unmanagedPendingCommitCount == 0 ? Unmanaged.Pending.APPLY : Unmanaged.Pending.COMMIT; + int offset = 0; for (int i = 0 ; i < unmanagedCount ; ++i) { - TxnId txnId = CommandSerializers.txnId.deserialize(in, ByteBufferAccessor.instance, in.position()); - in.position(in.position() + CommandSerializers.txnId.serializedSize()); - Timestamp waitingUntil = CommandSerializers.timestamp.deserialize(in, ByteBufferAccessor.instance, in.position()); - in.position(in.position() + CommandSerializers.timestamp.serializedSize()); + TxnId txnId = CommandSerializers.txnId.deserialize(in, ByteBufferAccessor.instance, offset); + offset += CommandSerializers.txnId.serializedSize(); + Timestamp waitingUntil = CommandSerializers.timestamp.deserialize(in, ByteBufferAccessor.instance, offset); + offset += CommandSerializers.timestamp.serializedSize(); unmanageds[i] = new Unmanaged(pending, txnId, waitingUntil); if (--unmanagedPendingCommitCount == 0) pending = Unmanaged.Pending.APPLY; } + in.position(in.position() + offset); } if (executeAtMasks + missingDepsMasks > 0) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 405f92dc595f..202dc5cb7866 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -33,6 +33,7 @@ import java.util.function.LongUnaryOperator; import java.util.function.Supplier; +import org.apache.commons.lang3.ArrayUtils; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -42,6 +43,7 @@ import accord.local.CommandsForKey.InternalStatus; import accord.local.Command; import accord.local.CommandsForKey.TxnInfo; +import accord.local.CommandsForKey.Unmanaged; import accord.local.CommonAttributes; import accord.local.CommonAttributes.Mutable; import accord.local.Listeners; @@ -59,6 +61,7 @@ import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.AccordGens; +import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; import accord.utils.SortedArrays; @@ -465,12 +468,38 @@ public void test() next = txnIdGen.next(rs0); return next; }).unique().ofSizeBetween(0, 10).next(rs); + Arrays.sort(ids, Comparator.naturalOrder()); TxnInfo[] info = new TxnInfo[ids.length]; for (int i = 0; i < info.length; i++) info[i] = TxnInfo.create(ids[i], rs.pick(InternalStatus.values()), ids[i], CommandsForKey.NO_TXNIDS); - Arrays.sort(info, Comparator.naturalOrder()); - CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, CommandsForKey.NO_PENDING_UNMANAGED); + Gen pendingGen = Gens.enums().allMixedDistribution(Unmanaged.Pending.class).next(rs); + + Unmanaged[] unmanaged = Gens.lists(txnIdGen) + .unique() + .ofSizeBetween(0, 10) + .map((rs0, txnIds) -> txnIds.stream().map(i -> new Unmanaged(pendingGen.next(rs0), i, i)).toArray(Unmanaged[]::new)) + .next(rs); + Arrays.sort(unmanaged, Comparator.naturalOrder()); + if (unmanaged.length > 0) + { + // when registering unmanaged, if the txn is "missing" in TxnInfo we add it + List missing = new ArrayList<>(unmanaged.length); + for (Unmanaged u : unmanaged) + { + int idx = Arrays.binarySearch(ids, u.txnId); + if (idx < 0) + missing.add(TxnInfo.create(u.txnId, InternalStatus.TRANSITIVELY_KNOWN)); + } + if (!missing.isEmpty()) + { + info = ArrayUtils.addAll(info, missing.toArray(TxnInfo[]::new)); + Arrays.sort(info, Comparator.naturalOrder()); + } + } + else unmanaged = CommandsForKey.NO_PENDING_UNMANAGED; + + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); @@ -493,4 +522,4 @@ public void thereAndBackAgain() CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); Assert.assertEquals(expected, roundTrip); } -} \ No newline at end of file +} From 1c87bb96ed72e4ea136579196a342e325d3dc95b Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Mon, 13 May 2024 16:39:18 -0500 Subject: [PATCH 112/340] Baseline Diagnostic vtables for Accord patch by Caleb Rackliffe; reviewed by David Capwell and Ariel Weisberg for CASSANDRA-18732 --- .../managing/operating/virtualtables.adoc | 3 + modules/accord | 2 +- .../db/virtual/AccordVirtualTables.java | 213 ++++++++++++++++-- .../cassandra/metrics/AccordMetrics.java | 4 +- .../metrics/AccordStateCacheMetrics.java | 10 +- .../service/accord/AccordCommandStores.java | 2 +- .../service/accord/AccordService.java | 1 - .../service/accord/AccordStateCache.java | 24 ++ .../service/accord/txn/TxnNamedRead.java | 52 ++++- .../migration/ConsensusMigrationState.java | 10 + .../distributed/test/QueriesTableTest.java | 132 ++++++++--- .../test/accord/AccordMetricsTest.java | 33 ++- .../test/accord/AccordMigrationTest.java | 43 +++- 13 files changed, 463 insertions(+), 66 deletions(-) diff --git a/doc/modules/cassandra/pages/managing/operating/virtualtables.adoc b/doc/modules/cassandra/pages/managing/operating/virtualtables.adoc index 362308372ce4..c87699ed77c8 100644 --- a/doc/modules/cassandra/pages/managing/operating/virtualtables.adoc +++ b/doc/modules/cassandra/pages/managing/operating/virtualtables.adoc @@ -72,6 +72,8 @@ cqlsh> select * from system_metrics.all_groups ; group_name | virtual_table -------------------+--------------------------- + AccordCoordinator | accord_coordinator_group + AccordReplica | accord_replica_group Batch | batch_group BufferPool | buffer_pool_group CIDRAuthorizer | cidr_authorizer_group @@ -98,6 +100,7 @@ cqlsh> select * from system_metrics.all_groups ; Paxos | paxos_group ReadRepair | read_repair_group Repair | repair_group + RouteIndex | route_index_group Storage | storage_group StorageProxy | storage_proxy_group Streaming | streaming_group diff --git a/modules/accord b/modules/accord index 21cdaf5d2809..778c45cd9775 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 21cdaf5d280965cfdc690d385375635b498bc9f9 +Subproject commit 778c45cd977576a901abf24a9759872d36fde056 diff --git a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java index 0dba15a4219e..1b2e041c16e9 100644 --- a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java +++ b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java @@ -18,57 +18,230 @@ package org.apache.cassandra.db.virtual; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; -import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import accord.local.CommandStores; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.Clock; + +import static com.google.common.collect.ImmutableList.toImmutableList; public class AccordVirtualTables { - private AccordVirtualTables() - { - - } + private AccordVirtualTables() {} public static Collection getAll(String keyspace) { if (!DatabaseDescriptor.getAccordTransactionsEnabled()) return Collections.emptyList(); - return Arrays.asList( - new Epoch(keyspace) + return List.of( + new CommandStoreCache(keyspace), + new MigrationState(keyspace), + new CoordinationStatus(keyspace) ); } - @VisibleForTesting - public static final class Epoch extends AbstractVirtualTable + public static final class CommandStoreCache extends AbstractVirtualTable { + private CommandStoreCache(String keyspace) + { + super(parse(keyspace, + "Accord Command Store Cache Metrics", + "CREATE TABLE accord_command_store_cache(\n" + + " id int,\n" + + " scope text,\n" + + " queries bigint,\n" + + " hits bigint,\n" + + " misses bigint,\n" + + " PRIMARY KEY (id, scope)" + + ')')); + } - protected Epoch(String keyspace) + @Override + public DataSet data() { - super(parse(keyspace, "Accord Epochs", - "CREATE TABLE accord_epochs(\n" + - " epoch bigint,\n" + - " PRIMARY KEY ( (epoch) )" + - ")")); + CommandStores stores = ((AccordService) AccordService.instance()).node().commandStores(); + + AsyncChain>> statsByStoreChain = stores.map(store -> { + Map snapshots = new HashMap<>(3); + AccordCommandStore accordStore = (AccordCommandStore) store.commandStore(); + snapshots.put(AccordKeyspace.COMMANDS, accordStore.commandCache().statsSnapshot()); + snapshots.put(AccordKeyspace.COMMANDS_FOR_KEY, accordStore.commandsForKeyCache().statsSnapshot()); + snapshots.put(AccordKeyspace.TIMESTAMPS_FOR_KEY, accordStore.timestampsForKeyCache().statsSnapshot()); + return snapshots; + }); + + List> statsByStore = AsyncChains.getBlockingAndRethrow(statsByStoreChain); + SimpleDataSet result = new SimpleDataSet(metadata()); + + for (int storeID : stores.ids()) + { + Map storeStats = statsByStore.get(storeID); + addRow(storeStats.get(AccordKeyspace.COMMANDS), result, storeID, AccordKeyspace.COMMANDS); + addRow(storeStats.get(AccordKeyspace.COMMANDS_FOR_KEY), result, storeID, AccordKeyspace.COMMANDS_FOR_KEY); + addRow(storeStats.get(AccordKeyspace.TIMESTAMPS_FOR_KEY), result, storeID, AccordKeyspace.TIMESTAMPS_FOR_KEY); + } + + return result; + } + + private static void addRow(AccordStateCache.ImmutableStats stats, SimpleDataSet result, int storeID, String scope) + { + result.row(storeID, scope); + result.column("queries", stats.queries); + result.column("hits", stats.hits); + result.column("misses", stats.misses); + } + } + + public static final class MigrationState extends AbstractVirtualTable + { + private static final Logger logger = LoggerFactory.getLogger(MigrationState.class); + + private MigrationState(String keyspace) + { + super(parse(keyspace, + "Consensus Migration State", + "CREATE TABLE consensus_migration_state(\n" + + " keyspace_name text,\n" + + " table_name text,\n" + + " table_id uuid,\n" + + " target_protocol text,\n" + + " transactional_mode text,\n" + + " transactional_migration_from text,\n" + + " migrated_ranges frozen>,\n" + + " migrating_ranges_by_epoch frozen>>,\n" + + " PRIMARY KEY (keyspace_name, table_name)" + + ')')); } @Override public DataSet data() { - IAccordService accord = AccordService.instance(); + ConsensusMigrationState snapshot = ClusterMetadata.current().consensusMigrationState; + Collection tableStates = snapshot.tableStates(); + return data(tableStates); + } + + @Override + public DataSet data(DecoratedKey key) + { + String keyspaceName = UTF8Type.instance.compose(key.getKey()); + Keyspace keyspace = Schema.instance.getKeyspaceInstance(keyspaceName); - long epoch = accord.currentEpoch(); + if (keyspace == null) + throw new InvalidRequestException("Unknown keyspace: '" + keyspaceName + '\''); + List tableIDs = keyspace.getColumnFamilyStores() + .stream() + .map(ColumnFamilyStore::getTableId) + .collect(Collectors.toList()); + + ConsensusMigrationState snapshot = ClusterMetadata.current().consensusMigrationState; + Collection tableStates = snapshot.tableStatesFor(tableIDs); + + return data(tableStates); + } + + private SimpleDataSet data(Collection tableStates) + { + SimpleDataSet result = new SimpleDataSet(metadata()); + + for (TableMigrationState state : tableStates) + { + TableMetadata table = Schema.instance.getTableMetadata(state.tableId); + + if (table == null) + { + logger.warn("Table {}.{} (id: {}) no longer exists. It may have been dropped.", + state.keyspaceName, state.tableName, state.tableId); + continue; + } + + result.row(state.keyspaceName, state.tableName); + result.column("table_id", state.tableId.asUUID()); + result.column("target_protocol", state.targetProtocol.toString()); + result.column("transactional_mode", table.params.transactionalMode.toString()); + result.column("transactional_migration_from", table.params.transactionalMode.toString()); + + List primitiveMigratedRanges = state.migratedRanges.stream().map(Objects::toString).collect(toImmutableList()); + result.column("migrated_ranges", primitiveMigratedRanges); + + Map> primitiveRangesByEpoch = new LinkedHashMap<>(); + for (Map.Entry>> entry : state.migratingRangesByEpoch.entrySet()) + primitiveRangesByEpoch.put(entry.getKey().getEpoch(), entry.getValue().stream().map(Objects::toString).collect(toImmutableList())); + + result.column("migrating_ranges_by_epoch", primitiveRangesByEpoch); + } + + return result; + } + } + + public static final class CoordinationStatus extends AbstractVirtualTable + { + private CoordinationStatus(String keyspace) + { + super(parse(keyspace, + "Accord Coordination Status", + "CREATE TABLE accord_coordination_status(\n" + + " node_id int,\n" + + " epoch bigint,\n" + + " start_time_micros bigint,\n" + + " duration_millis bigint,\n" + + " kind text,\n" + + " domain text,\n" + + " PRIMARY KEY (node_id, epoch, start_time_micros)" + + ')')); + } + + @Override + public DataSet data() + { + AccordService accord = (AccordService) AccordService.instance(); SimpleDataSet result = new SimpleDataSet(metadata()); - result.row(epoch); + + for (TxnId txn : accord.node().coordinating().keySet()) + { + result.row(txn.node.id, txn.epoch(), txn.hlc()); + result.column("duration_millis", Clock.Global.currentTimeMillis() - TimeUnit.MICROSECONDS.toMillis(txn.hlc())); + result.column("kind", txn.kind().toString()); + result.column("domain", txn.domain().toString()); + } + return result; } } diff --git a/src/java/org/apache/cassandra/metrics/AccordMetrics.java b/src/java/org/apache/cassandra/metrics/AccordMetrics.java index 4dc053ccee45..765fe154eedb 100644 --- a/src/java/org/apache/cassandra/metrics/AccordMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordMetrics.java @@ -57,8 +57,8 @@ public class AccordMetrics public static final String RECOVERY_DELAY = "RecoveryDelay"; public static final String RECOVERY_TIME = "RecoveryTime"; public static final String FAST_PATH_TO_TOTAL = "FastPathToTotal"; - public static final String ACCORD_REPLICA = "accord-replica"; - public static final String ACCORD_COORDINATOR = "accord-coordinator"; + public static final String ACCORD_REPLICA = "AccordReplica"; + public static final String ACCORD_COORDINATOR = "AccordCoordinator"; /** * The time between start on the coordinator and commit on this replica. diff --git a/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java index f63fedf282d9..b00793087e07 100644 --- a/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordStateCacheMetrics.java @@ -34,19 +34,19 @@ public class AccordStateCacheMetrics extends CacheAccessMetrics private final Map instanceMetrics = new ConcurrentHashMap<>(2); - private final String type; + private final String scope; - public AccordStateCacheMetrics(String type) + public AccordStateCacheMetrics(String scope) { - super(new DefaultNameFactory(TYPE_NAME, type)); + super(new DefaultNameFactory(TYPE_NAME, scope)); objectSize = Metrics.histogram(factory.createMetricName(OBJECT_SIZE), false); - this.type = type; + this.scope = scope; } public CacheAccessMetrics forInstance(Class klass) { // cannot make Class hashCode deterministic, as cannot rewrite - so cannot safely use as Map key if want deterministic simulation // (or we need to create extra hoops to catch this specific case in method rewriting) - return instanceMetrics.computeIfAbsent(klass.getSimpleName(), k -> new CacheAccessMetrics(new DefaultNameFactory(TYPE_NAME, String.format("%s-%s", type, k)))); + return instanceMetrics.computeIfAbsent(klass.getSimpleName(), k -> new CacheAccessMetrics(new DefaultNameFactory(TYPE_NAME, String.format("%s-%s", scope, k)))); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 0fd719d5fbe6..7328a31de6f9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -39,7 +39,7 @@ public class AccordCommandStores extends CommandStores implements CacheSize { - public static final String ACCORD_STATE_CACHE = "accord-state-cache"; + public static final String ACCORD_STATE_CACHE = "AccordStateCache"; private final CacheSizeMetrics cacheSizeMetrics; private long cacheSize; diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 3ee157e4368c..ab0f501d141e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -655,7 +655,6 @@ public Id nodeId() return node.id(); } - @VisibleForTesting public Node node() { return node; diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index b76d63b83080..085504f092dd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -76,6 +76,20 @@ static class Stats long misses; } + public static final class ImmutableStats + { + public final long queries; + public final long hits; + public final long misses; + + public ImmutableStats(Stats stats) + { + queries = stats.queries; + hits = stats.hits; + misses = stats.misses; + } + } + private ImmutableList> instances = ImmutableList.of(); private final ExecutorPlus loadExecutor, saveExecutor; @@ -215,6 +229,11 @@ private void evict(AccordCachingState node) } } + public ImmutableStats stats() + { + return new ImmutableStats(stats); + } + private Instance instanceForNode(AccordCachingState node) { return instances.get(node.index); @@ -592,6 +611,11 @@ public Stats stats() return stats; } + public ImmutableStats statsSnapshot() + { + return new ImmutableStats(stats); + } + public Stats globalStats() { return AccordStateCache.this.stats; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index bbb1076b230e..4787e2105b18 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Objects; +import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; @@ -30,6 +31,7 @@ import accord.primitives.Timestamp; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; +import org.apache.cassandra.concurrent.DebuggableTask; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; @@ -43,6 +45,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.MonotonicClock; import org.apache.cassandra.utils.ObjectSizes; import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; @@ -136,7 +139,7 @@ public ReadCommand command() private AsyncChain performLocalRead(SinglePartitionReadCommand command, int nowInSeconds) { - return AsyncChains.ofCallable(Stage.READ.executor(), () -> + Callable readCallable = () -> { SinglePartitionReadCommand read = command.withNowInSec(nowInSeconds); @@ -153,10 +156,53 @@ private AsyncChain performLocalRead(SinglePartitionReadCommand command, in } return result; } - }); + }; + + return AsyncChains.ofCallable(Stage.READ.executor(), readCallable, (callable, receiver) -> + new DebuggableTask.RunnableDebuggableTask() + { + private final long approxCreationTimeNanos = MonotonicClock.Global.approxTime.now(); + private volatile long approxStartTimeNanos; + + @Override + public void run() + { + approxStartTimeNanos = MonotonicClock.Global.approxTime.now(); + + try + { + Data call = callable.call(); + receiver.accept(call, null); + } + catch (Throwable t) + { + logger.debug("AsyncChain Callable threw an Exception", t); + receiver.accept(null, t); + } + } + + @Override + public long creationTimeNanos() + { + return approxCreationTimeNanos; + } + + @Override + public long startTimeNanos() + { + return approxStartTimeNanos; + } + + @Override + public String description() + { + return command.toCQLString(); + } + } + ); } - static final IVersionedSerializer serializer = new IVersionedSerializer() + static final IVersionedSerializer serializer = new IVersionedSerializer<>() { @Override public void serialize(TxnNamedRead read, DataOutputPlus out, int version) throws IOException diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java index 7364db38c00a..fa6b146d77c2 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java @@ -78,6 +78,16 @@ public Map toMap(@Nullable Set keyspaceNames, @Nullable "version", PojoToString.CURRENT_VERSION); } + public Collection tableStates() + { + return tableStates.values(); + } + + public List tableStatesFor(List tableIDs) + { + return tableIDs.stream().map(tableStates::get).collect(Collectors.toList()); + } + private List> tableStatesAsMaps(@Nullable Set keyspaceNames, @Nullable Set tableNames) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java index b0c3902ad152..36220e3853cf 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java @@ -21,16 +21,17 @@ import java.io.IOException; import java.util.concurrent.Callable; import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.TimeUnit; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import net.bytebuddy.implementation.bind.annotation.SuperCall; +import org.awaitility.Awaitility; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import accord.impl.SimpleProgressLog; import com.datastax.driver.core.Session; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; @@ -39,12 +40,15 @@ import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.Row; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.utils.Throwables; +import static java.util.concurrent.TimeUnit.SECONDS; import static net.bytebuddy.matcher.ElementMatchers.named; import static net.bytebuddy.matcher.ElementMatchers.takesArguments; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class QueriesTableTest extends TestBaseImpl @@ -57,7 +61,10 @@ public class QueriesTableTest extends TestBaseImpl public static void createCluster() throws IOException { SHARED_CLUSTER = init(Cluster.build(1).withInstanceInitializer(QueryDelayHelper::install) - .withConfig(c -> c.with(Feature.NATIVE_PROTOCOL, Feature.GOSSIP)).start()); + .withConfig(c -> c.with(Feature.NATIVE_PROTOCOL, Feature.GOSSIP) + .set("write_request_timeout", "10s") + .set("transaction_timeout", "15s")).start()); + DRIVER_CLUSTER = JavaDriverUtils.create(SHARED_CLUSTER); SESSION = DRIVER_CLUSTER.connect(); } @@ -79,19 +86,31 @@ public static void closeCluster() public void shouldExposeReadsAndWrites() throws Throwable { SHARED_CLUSTER.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (k int primary key, v int)"); - - boolean readVisible = false; - boolean coordinatorReadVisible = false; - boolean writeVisible = false; - boolean coordinatorWriteVisible = false; - SESSION.executeAsync("INSERT INTO " + KEYSPACE + ".tbl (k, v) VALUES (0, 0)"); SESSION.executeAsync("SELECT * FROM " + KEYSPACE + ".tbl WHERE k = 0"); // Wait until the coordinator/local read and write are visible: + Awaitility.await() + .atMost(60, SECONDS) + .pollInterval(1, SECONDS) + .dontCatchUncaughtExceptions() + .untilAsserted(QueriesTableTest::assertReadsAndWritesVisible); + + // Issue another read and write to unblock the original queries in progress: + SESSION.execute("INSERT INTO " + KEYSPACE + ".tbl (k, v) VALUES (0, 0)"); + SESSION.execute("SELECT * FROM " + KEYSPACE + ".tbl WHERE k = 0"); + + waitForQueriesToFinish(); + } + + private static void assertReadsAndWritesVisible() + { SimpleQueryResult result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); - while (result.toObjectArrays().length < 4) - result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); + + boolean readVisible = false; + boolean coordinatorReadVisible = false; + boolean writeVisible = false; + boolean coordinatorWriteVisible = false; while (result.hasNext()) { @@ -105,32 +124,38 @@ public void shouldExposeReadsAndWrites() throws Throwable coordinatorWriteVisible |= threadId.contains("Native-Transport-Requests") && task.contains("INSERT"); } - // Issue another read and write to unblock the original queries in progress: - SESSION.execute("INSERT INTO " + KEYSPACE + ".tbl (k, v) VALUES (0, 0)"); - SESSION.execute("SELECT * FROM " + KEYSPACE + ".tbl WHERE k = 0"); - assertTrue(readVisible); assertTrue(coordinatorReadVisible); assertTrue(writeVisible); assertTrue(coordinatorWriteVisible); - - waitForQueriesToFinish(); } @Test public void shouldExposeCAS() throws Throwable { SHARED_CLUSTER.schemaChange("CREATE TABLE " + KEYSPACE + ".cas_tbl (k int primary key, v int)"); - - boolean readVisible = false; - boolean coordinatorUpdateVisible = false; - SESSION.executeAsync("UPDATE " + KEYSPACE + ".cas_tbl SET v = 10 WHERE k = 0 IF v = 0"); // Wait until the coordinator update and local read required by the CAS operation are visible: + Awaitility.await() + .atMost(60, SECONDS) + .pollInterval(1, SECONDS) + .dontCatchUncaughtExceptions() + .untilAsserted(QueriesTableTest::assertCasVisible); + + // Issue a read to unblock the read generated by the original CAS operation: + SESSION.executeAsync("SELECT * FROM " + KEYSPACE + ".cas_tbl WHERE k = 0"); + + + waitForQueriesToFinish(); + } + + private static void assertCasVisible() + { SimpleQueryResult result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); - while (result.toObjectArrays().length < 2) - result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); + + boolean readVisible = false; + boolean coordinatorUpdateVisible = false; while (result.hasNext()) { @@ -142,26 +167,81 @@ public void shouldExposeCAS() throws Throwable coordinatorUpdateVisible |= threadId.contains("Native-Transport-Requests") && task.contains("UPDATE"); } - // Issue a read to unblock the read generated by the original CAS operation: - SESSION.executeAsync("SELECT * FROM " + KEYSPACE + ".cas_tbl WHERE k = 0"); - assertTrue(readVisible); assertTrue(coordinatorUpdateVisible); + } + + @Test + public void shouldExposeTransaction() throws Throwable + { + SHARED_CLUSTER.schemaChange("CREATE TABLE " + KEYSPACE + ".accord_tbl (k int primary key, v int) WITH transactional_mode='full'"); + + // Disable recovery to make sure only one local read occurs: + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = true); + + String update = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM " + KEYSPACE + ".accord_tbl WHERE k = 0);\n" + + " SELECT row1.k, row1.v;\n" + + " IF row1.v = 0 THEN\n" + + " UPDATE " + KEYSPACE + ".accord_tbl SET v = 10 WHERE k = 0;\n" + + " END IF\n" + + "COMMIT TRANSACTION"; + + SESSION.executeAsync(update); + + // Wait until the coordinator update and local read required by the CAS operation are visible: + Awaitility.await() + .atMost(60, SECONDS) + .pollInterval(1, SECONDS) + .dontCatchUncaughtExceptions() + .untilAsserted(QueriesTableTest::assertTransactionVisible); + + // Issue a read to unblock the read generated by the original CAS operation: + SESSION.executeAsync("SELECT * FROM " + KEYSPACE + ".accord_tbl WHERE k = 0"); waitForQueriesToFinish(); } + private static void assertTransactionVisible() + { + SimpleQueryResult queries = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); + + boolean readVisible = false; + boolean coordinatorTxnVisible = false; + + while (queries.hasNext()) + { + Row row = queries.next(); + String threadId = row.get("thread_id").toString(); + String task = row.get("task").toString(); + + readVisible |= threadId.contains("Read") && task.contains("SELECT"); + coordinatorTxnVisible |= threadId.contains("Native-Transport-Requests") && task.contains("BEGIN TRANSACTION"); + } + + assertTrue(readVisible); + assertTrue(coordinatorTxnVisible); + + SimpleQueryResult txns = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.accord_coordination_status"); + assertTrue(txns.hasNext()); + Row txn = txns.next(); + assertEquals(1, txn.getInteger("node_id").intValue()); + assertEquals("Key", txn.getString("domain")); + } + private static void waitForQueriesToFinish() throws InterruptedException { // Continue to query the "queries" table until nothing is in progress... SimpleQueryResult result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); while (result.hasNext()) { - TimeUnit.SECONDS.sleep(1); + SECONDS.sleep(1); result = SHARED_CLUSTER.get(1).executeInternalWithResult("SELECT * FROM system_views.queries"); } } + @SuppressWarnings("resource") public static class QueryDelayHelper { private static final CyclicBarrier readBarrier = new CyclicBarrier(2); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 235df7ebf7af..0e9a47f71c2c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -24,7 +24,7 @@ import java.util.function.Function; import com.google.common.base.Throwables; -import org.apache.cassandra.service.consensus.TransactionalMode; + import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -33,6 +33,8 @@ import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.distributed.api.Row; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.metrics.AccordMetrics; @@ -42,6 +44,7 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.assertj.core.data.Offset; import static org.assertj.core.api.Assertions.assertThat; @@ -225,6 +228,16 @@ private void assertCoordinatorMetrics(int node, String scope, long fastPaths, lo assertThat(metric.apply(AccordMetrics.RECOVERY_TIME)).isEqualTo(recoveries); assertThat(metric.apply(AccordMetrics.DEPENDENCIES)).isEqualTo(fastPaths + slowPaths); + // Verify that coordinator metrics are published to the appropriate virtual table: + SimpleQueryResult res = SHARED_CLUSTER.get(node + 1) + .executeInternalWithResult("SELECT * FROM system_metrics.accord_coordinator_group WHERE scope = ?", scope); + while (res.hasNext()) + { + Row metricRow = res.next(); + String name = metricRow.getString("name"); + assertThat(metrics).containsKey(name); + } + if ((fastPaths + slowPaths) > 0) { String fastPathToTotalName = nameFactory.createMetricName(AccordMetrics.FAST_PATH_TO_TOTAL + "." + RatioGaugeSet.MEAN_RATIO).getMetricName(); @@ -242,13 +255,29 @@ private void assertReplicaMetrics(int node, String scope, long stable, long exec assertThat(metric.apply(AccordMetrics.APPLY_LATENCY)).isEqualTo(applications); assertThat(metric.apply(AccordMetrics.APPLY_DURATION)).isEqualTo(applications); assertThat(metric.apply(AccordMetrics.PARTIAL_DEPENDENCIES)).isEqualTo(executions); + + // Verify that replica metrics are published to the appropriate virtual table: + SimpleQueryResult vtableResults = SHARED_CLUSTER.get(node + 1) + .executeInternalWithResult("SELECT * FROM system_metrics.accord_replica_group WHERE scope = ?", scope); + + while (vtableResults.hasNext()) + { + Row metricRow = vtableResults.next(); + String name = metricRow.getString("name"); + assertThat(metrics).containsKey(name); + } + + // Verify that per-store global cache stats are published to the appropriate virtual table: + SimpleQueryResult storeCacheResults = SHARED_CLUSTER.get(node + 1) + .executeInternalWithResult("SELECT * FROM system_views.accord_command_store_cache"); + assertThat(storeCacheResults).hasNext(); } private Map> getMetrics() { Map> metrics = new HashMap<>(); for (int i = 0; i < SHARED_CLUSTER.size(); i++) - metrics.put(i, SHARED_CLUSTER.get(i + 1).metrics().getCounters(name -> name.startsWith("org.apache.cassandra.metrics.accord-"))); + metrics.put(i, SHARED_CLUSTER.get(i + 1).metrics().getCounters(name -> name.startsWith("org.apache.cassandra.metrics.Accord"))); return metrics; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index 0fe835c6c925..fc065e4edc29 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -28,8 +28,10 @@ import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; +import java.util.stream.Collectors; import javax.annotation.Nonnull; +import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableList; import org.junit.After; @@ -39,8 +41,8 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.Yaml; -import com.fasterxml.jackson.core.type.TypeReference; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.Config.PaxosVariant; @@ -57,6 +59,8 @@ import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.api.Row; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.InetAddressAndPort; @@ -85,11 +89,14 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JsonUtils; import org.apache.cassandra.utils.PojoToString; -import org.yaml.snakeyaml.Yaml; -import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static java.util.Collections.emptyList; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + import static org.apache.cassandra.Util.spinUntilSuccess; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; @@ -102,7 +109,6 @@ import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; import static org.assertj.core.api.Fail.fail; -import static org.junit.Assert.*; /* * This test suite is intended to serve as an integration test with some pretty good visibility into actual execution @@ -137,7 +143,7 @@ public class AccordMigrationTest extends AccordTestBase // To create a precise repair where the repaired range is fully contained in a locally replicated range // we need to align with this token. The local ranges are (9223372036854775805,-1] and (-1,9223372036854775805] // No idea why the partitioner creates such an - private Token maxAlignedWithLocalRanges = new LongToken(9223372036854775805L); + private final Token maxAlignedWithLocalRanges = new LongToken(9223372036854775805L); @Override protected Logger logger() @@ -677,10 +683,17 @@ private static void assertMigrationState(String tableName, ConsensusMigrationTar for (IInvokableInstance instance : SHARED_CLUSTER) { ConsensusMigrationState snapshot = getMigrationStateSnapshot(instance); + for (String tableId : tableIds) { TableMigrationState state = snapshot.tableStates.get(TableId.fromString(tableId)); assertNotNull(state); + + SimpleQueryResult vtableResult = + instance.executeInternalWithResult("SELECT * FROM system_views.consensus_migration_state WHERE keyspace_name = ? AND table_name = ? ", + state.keyspaceName, state.tableName); + assertTrue(vtableResult.hasNext()); + assertEquals(KEYSPACE, state.keyspaceName); assertEquals(tableName, state.tableName); assertEquals(target, state.targetProtocol); @@ -692,11 +705,31 @@ private static void assertMigrationState(String tableName, ConsensusMigrationTar assertEquals(0, state.migratingRangesByEpoch.size()); else assertEquals(migratingRanges, state.migratingRangesByEpoch.values().iterator().next()); + + Row vtableState = vtableResult.next(); + assertVtableState(state, vtableState); } } }); } + private static void assertVtableState(TableMigrationState expectedState, Row vtableState) + { + List vtableMigratedRanges = vtableState.getList("migrated_ranges"); + assertEquals(expectedState.migratedRanges, vtableMigratedRanges.stream().map(Range::fromString).collect(Collectors.toList())); + + Map> vtableMigratingByEpoch = vtableState.get("migrating_ranges_by_epoch"); + Map>> pojoMigratingByEpoch = new LinkedHashMap<>(); + + for (Map.Entry> entry : vtableMigratingByEpoch.entrySet()) + pojoMigratingByEpoch.put(entry.getKey(), entry.getValue().stream().map(Range::fromString).collect(toImmutableList())); + + if (expectedState.migratingRanges.isEmpty()) + assertEquals(0, pojoMigratingByEpoch.size()); + else + assertEquals(expectedState.migratingRanges, pojoMigratingByEpoch.values().iterator().next()); + } + /** * Save a promise that is after the committed one to make a subsequent read not linearizable */ From feb8d2e2b2c7e0a7b6d5dfa1f95618ebca5c9c81 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Fri, 17 May 2024 15:27:44 -0400 Subject: [PATCH 113/340] Accord barrier/inclusive sync point fixes Patch by Ariel Weisberg, Benedict Elliott Smith; reviewed by Benedict Elliott Smith for CASSANDRA-19641 --- modules/accord | 2 +- .../service/accord/AccordService.java | 2 +- .../service/accord/api/AccordAgent.java | 10 +- .../migration/ConsensusKeyMigrationState.java | 20 ++- .../accord/AccordIncrementalRepairTest.java | 6 +- .../test/accord/AccordMigrationTest.java | 169 ++++++++++++------ 6 files changed, 132 insertions(+), 77 deletions(-) diff --git a/modules/accord b/modules/accord index 778c45cd9775..4e8bcae81f97 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 778c45cd977576a901abf24a9759872d36fde056 +Subproject commit 4e8bcae81f9751b9d732fd5056bce31c97ad58f3 diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index ab0f501d141e..9d8c42656afa 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -371,7 +371,7 @@ public IVerbHandler verbHandler() { logger.debug("Starting barrier key: {} epoch: {} barrierType: {} isForWrite {}", keysOrRanges, epoch, barrierType, isForWrite); txnId = node.nextTxnId(Kind.SyncPoint, keysOrRanges.domain()); - AsyncResult asyncResult = syncPoint == null + AsyncResult asyncResult = syncPoint == null ? Barrier.barrier(node, keysOrRanges, epoch, barrierType) : Barrier.barrier(node, keysOrRanges, epoch, barrierType, syncPoint); long deadlineNanos = requestTime.startedAtNanos() + timeoutNanos; diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 9c4b678996f9..c0fea38a37c2 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -19,12 +19,11 @@ package org.apache.cassandra.service.accord.api; import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nonnull; - import accord.api.Agent; import accord.api.EventsListener; import accord.api.Result; @@ -35,8 +34,9 @@ import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; -import org.apache.cassandra.service.accord.AccordService; +import accord.primitives.TxnId; import org.apache.cassandra.metrics.AccordMetrics; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.tcm.Epoch; @@ -84,12 +84,12 @@ public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throw } @Override - public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull Timestamp executeAt) + public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull TxnId txnId) { if (keysOrRanges.domain() == Key) { PartitionKey key = (PartitionKey)keysOrRanges.get(0); - maybeSaveAccordKeyMigrationLocally(key, Epoch.create(executeAt.epoch())); + maybeSaveAccordKeyMigrationLocally(key, Epoch.create(txnId.epoch())); } } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java index 7a0bbaa1ffed..9e051ccc5e89 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java @@ -27,6 +27,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import accord.api.BarrierType; import accord.primitives.Seekables; @@ -69,9 +71,7 @@ import org.apache.cassandra.utils.UUIDSerializer; import static org.apache.cassandra.net.Verb.CONSENSUS_KEY_MIGRATION; - import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; - import static org.apache.cassandra.utils.Clock.Global.nanoTime; /** @@ -84,6 +84,8 @@ */ public abstract class ConsensusKeyMigrationState { + private static final Logger logger = LoggerFactory.getLogger(ConsensusKeyMigrationState.class); + /* * Used to notify other replicas when key migration has occurred so they can * also cache that the key migration was done @@ -189,12 +191,14 @@ private boolean paxosReadSatisfiedByKeyMigration() private static final CacheLoader, ConsensusMigratedAt> LOADING_FUNCTION = k -> SystemKeyspace.loadConsensusKeyMigrationState(k.left, k.right); private static final Weigher, ConsensusMigratedAt> WEIGHER_FUNCTION = (k, v) -> EMPTY_KEY_SIZE + Ints.checkedCast(ByteBufferUtil.estimatedSizeOnHeap(k.left)) + VALUE_SIZE; - private static final LoadingCache, ConsensusMigratedAt> MIGRATION_STATE_CACHE = - Caffeine.newBuilder() - .maximumWeight(DatabaseDescriptor.getConsensusMigrationCacheSizeInMiB() << 20) - .weigher(WEIGHER_FUNCTION) - .executor(ImmediateExecutor.INSTANCE) - .build(LOADING_FUNCTION); + + @VisibleForTesting + public static final LoadingCache, ConsensusMigratedAt> MIGRATION_STATE_CACHE = + Caffeine.newBuilder() + .maximumWeight(DatabaseDescriptor.getConsensusMigrationCacheSizeInMiB() << 20) + .weigher(WEIGHER_FUNCTION) + .executor(ImmediateExecutor.INSTANCE) + .build(LOADING_FUNCTION); public static final IVerbHandler consensusKeyMigrationFinishedHandler = message -> { saveConsensusKeyMigrationLocally(message.payload.partitionKey, message.payload.tableId, message.payload.consensusMigratedAt); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index 66cf1c0e00b2..1e76473eebff 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -99,12 +99,12 @@ public String toString() private final List barriers = new ArrayList<>(); @Override - public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull Timestamp executeAt) + public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull TxnId txnId) { - super.onLocalBarrier(keysOrRanges, executeAt); + super.onLocalBarrier(keysOrRanges, txnId); synchronized (barriers) { - barriers.add(new ExecutedBarrier(keysOrRanges, executeAt)); + barriers.add(new ExecutedBarrier(keysOrRanges, txnId)); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index fc065e4edc29..2eea1f5a392c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -26,14 +26,13 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; -import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableList; - import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -41,12 +40,14 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.yaml.snakeyaml.Yaml; +import com.fasterxml.jackson.core.type.TypeReference; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.Config.PaxosVariant; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.BufferDecoratedKey; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; @@ -70,9 +71,10 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; -import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.service.consensus.migration.ConsensusMigratedAt; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.paxos.Ballot; @@ -88,15 +90,13 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.JsonUtils; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.PojoToString; +import org.yaml.snakeyaml.Yaml; +import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static java.util.Collections.emptyList; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - import static org.apache.cassandra.Util.spinUntilSuccess; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; @@ -109,6 +109,9 @@ import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; import static org.apache.cassandra.service.paxos.PaxosState.MaybePromise.Outcome.PROMISE; import static org.assertj.core.api.Fail.fail; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; /* * This test suite is intended to serve as an integration test with some pretty good visibility into actual execution @@ -297,7 +300,7 @@ protected ConsensusRoutingDecision routeAndMaybeMigrate(ClusterMetadata cm, @Non * Helper to invoke a query and assert that the right metrics change indicating the correct * paths were taken to execute the query during migration */ - private static void assertTargetAccordWrite(Consumer query, int coordinatorIndex, int key, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedCasBeginRejects, int expectedCasAcceptRejects) + private static void assertTargetAccordWrite(Consumer query, int coordinatorIndex, int key, List> expectedKeyMigrations, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedCasBeginRejects, int expectedCasAcceptRejects) { int startingWriteCount = getAccordWriteCount(coordinatorIndex); int startingCasWriteCount = getCasWriteCount(coordinatorIndex); @@ -305,6 +308,7 @@ private static void assertTargetAccordWrite(Consumer query, int coordin int startingCasWriteBeginRejects = getCasWriteBeginRejects(coordinatorIndex); int startingCasWriteAcceptRejects = getCasWriteAcceptRejects(coordinatorIndex); query.accept(key); + validateKeyMigrations(expectedKeyMigrations); assertEquals("Accord writes", expectedAccordWriteCount, getAccordWriteCount(coordinatorIndex) - startingWriteCount); assertEquals("CAS writes", expectedCasWriteCount, getCasWriteCount(coordinatorIndex) - startingCasWriteCount); assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); @@ -312,7 +316,7 @@ private static void assertTargetAccordWrite(Consumer query, int coordin assertEquals("CAS Accept rejects", expectedCasAcceptRejects, getCasWriteAcceptRejects(coordinatorIndex) - startingCasWriteAcceptRejects); } - private static Object[][] assertTargetAccordRead(Function query, int coordinatorIndex, int key, int expectedAccordReadCount, int expectedCasPrepareCount, int expectedKeyMigrationCount, int expectedCasReadBeginRejects, int expectedCasReadAcceptRejects) + private static Object[][] assertTargetAccordRead(Function query, int coordinatorIndex, int key, List> expectedKeyMigrations, int expectedAccordReadCount, int expectedCasPrepareCount, int expectedKeyMigrationCount, int expectedCasReadBeginRejects, int expectedCasReadAcceptRejects) { int startingReadCount = getAccordReadCount(coordinatorIndex); int startingCasPrepareCount = getCasPrepareCount(coordinatorIndex); @@ -320,6 +324,7 @@ private static Object[][] assertTargetAccordRead(Function q int startingCasReadBeginRejects = getCasReadBeginRejects(coordinatorIndex); int startingCasReadAcceptRejects = getCasReadAcceptRejects(coordinatorIndex); Object[][] result = query.apply(key); + validateKeyMigrations(expectedKeyMigrations); assertEquals("Accord reads", expectedAccordReadCount, getAccordReadCount(coordinatorIndex) - startingReadCount); assertEquals("CAS prepares", expectedCasPrepareCount, getCasPrepareCount(coordinatorIndex) - startingCasPrepareCount); assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); @@ -328,7 +333,7 @@ private static Object[][] assertTargetAccordRead(Function q return result; } - private static void assertTargetPaxosWrite(Consumer query, int coordinatorIndex, int key, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedMigrationRejects, int expectedSkippedReads) + private static void assertTargetPaxosWrite(Consumer query, int coordinatorIndex, int key, List> expectedKeyMigrations, int expectedAccordWriteCount, int expectedCasWriteCount, int expectedKeyMigrationCount, int expectedMigrationRejects, int expectedSkippedReads) { int startingWriteCount = getAccordWriteCount(coordinatorIndex); int startingCasWriteCount = getCasWriteCount(coordinatorIndex); @@ -336,6 +341,7 @@ private static void assertTargetPaxosWrite(Consumer query, int coordina int startingMigrationRejectsCount = getAccordMigrationRejects(coordinatorIndex); int startingSkippedReadsCount = getAccordMigrationSkippedReads(); query.accept(key); + validateKeyMigrations(expectedKeyMigrations); assertEquals("Accord writes", expectedAccordWriteCount, getAccordWriteCount(coordinatorIndex) - startingWriteCount); assertEquals("CAS writes", expectedCasWriteCount, getCasWriteCount(coordinatorIndex) - startingCasWriteCount); assertEquals("Key Migrations", expectedKeyMigrationCount, getKeyMigrationCount(coordinatorIndex) - startingKeyMigrationCount); @@ -343,12 +349,60 @@ private static void assertTargetPaxosWrite(Consumer query, int coordina assertEquals("Accord skipped reads", expectedSkippedReads, getAccordMigrationSkippedReads() - startingSkippedReadsCount); } + private static void validateKeyMigrations(List> expectedMigrations) + { + spinUntilSuccess(() -> { + try + { + List keys = expectedMigrations.stream().map(p -> p.left.array()).collect(Collectors.toList()); + List intKeys = expectedMigrations.stream().map(p -> ByteBufferUtil.toInt(p.left)).collect(Collectors.toList()); + List tables = expectedMigrations.stream().map(p -> p.right).collect(Collectors.toList()); + for (int i = 1; i < SHARED_CLUSTER.size(); i++) + { + int instanceIndex = i; + IInvokableInstance instance = SHARED_CLUSTER.get(i); + instance.runOnInstance(() -> { + Map, ConsensusMigratedAt> cacheMap = ConsensusKeyMigrationState.MIGRATION_STATE_CACHE.asMap(); + String cacheMessage = format("Instance %d Expected %s migrations but found in cache %s", instanceIndex, intKeys, cacheMap); + assertEquals(cacheMessage, keys.size(), cacheMap.size()); + for (int j = 0; j < keys.size(); j++) + { + assertTrue(cacheMessage, + cacheMap.containsKey(Pair.create(ByteBuffer.wrap(keys.get(j)), tables.get(j)))); + } + + UntypedResultSet result = QueryProcessor.executeInternal("SELECT * from " + SYSTEM_KEYSPACE_NAME + "." + CONSENSUS_MIGRATION_STATE); + String tableMessage = format("Instance %d Expected %s migrations but found in system table %s", instanceIndex, intKeys, result); + assertEquals(tableMessage, keys.size(), result.size()); + Iterator resultIterator = result.iterator(); + for (int j = 0; j < result.size(); j++) + { + UntypedResultSet.Row row = resultIterator.next(); + boolean foundKey = false; + for (byte[] expectedKey : keys) + if (ByteBuffer.wrap(expectedKey).equals(row.getBytes("row_key"))) + foundKey = true; + assertTrue(tableMessage, foundKey); + } + }); + } + } + catch (Throwable t) + { + // For some reason full stack trace wasn't displayed without rethrowing + throw new AssertionError(t); + } + }); + } + @Test public void testPaxosToAccordCAS() throws Exception { test(format(TABLE_FMT, qualifiedTableName), cluster -> { + List> expectedKeyMigrations = new ArrayList<>(); String table = tableName; + UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, table).getTableId().asUUID()); cluster.forEach(node -> node.runOnInstance(() -> { TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, table); Assert.assertEquals(TransactionalMode.off, tbl.params.transactionalMode); @@ -374,24 +428,25 @@ public void testPaxosToAccordCAS() throws Exception List> migratingRanges = ImmutableList.of(migratingRange); // Not actually migrating yet so should do nothing special - assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 0, 1, 0, 0, 0); + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, expectedKeyMigrations, 0, 1, 0, 0, 0); // Mark ranges migrating and check migration state is correct nodetool(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, tableName); assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); // Should be routed directly to Accord, and perform key migration, as well as key migration read in Accord - assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 1, 0, 0); + addExpectedMigratedKey(expectedKeyMigrations, migratingKey, tableUUID); + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, expectedKeyMigrations, 1, 0, 1, 0, 0); // Should not repeat key migration, and should still do a migration read in Accord - assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 0, 0, 0); + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, expectedKeyMigrations, 1, 0, 0, 0, 0); // Should run on Paxos since it is not in the migrating range - assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, 0, 1, 0, 0, 0); + assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, expectedKeyMigrations, 0, 1, 0, 0, 0); // Check that the coordinator on the other node also has saved that the key migration was performed // and runs the query on Accord immediately without key migration - assertTargetAccordWrite(runCasOnSecondNode, 2, migratingKey, 1, 0, 0, 0, 0); + assertTargetAccordWrite(runCasOnSecondNode, 2, migratingKey, expectedKeyMigrations, 1, 0, 0, 0, 0); // Forced repair while a node is down shouldn't work, use repair instead of finish-migration because repair exposes --force // and regular Cassandra repairs are eligible to drive migration so it's important they check --force and down nodes @@ -412,10 +467,10 @@ public void testPaxosToAccordCAS() throws Exception assertMigrationState(tableName, ConsensusMigrationTarget.accord, migratingRanges, emptyList(), 0); // Should run on Accord, and not perform key migration nor should it need to perform a migration read in Accord now that it is repaired - assertTargetAccordWrite(runCasNoApply, 1, migratingKey, 1, 0, 0, 0, 0); + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, expectedKeyMigrations, 1, 0, 0, 0, 0); // Should run on Paxos, and not perform key migration - assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, 0, 1, 0, 0, 0); + assertTargetAccordWrite(runCasNoApply, 1, notMigratingKey, expectedKeyMigrations, 0, 1, 0, 0, 0); // Pivot to testing repair with a subrange of the migrating range as well as key migration // Will use the unmigrated range between lowerMidToken and midToken @@ -430,7 +485,8 @@ public void testPaxosToAccordCAS() throws Exception saveAcceptedPaxosProposal(tableName, ballotString, migratingKey); // PaxosRepair will have inserted a condition matching row, so it can apply, demonstrating repair and // key migration occurred - assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 0, 1, 0, 0); + addExpectedMigratedKey(expectedKeyMigrations, migratingKey, tableUUID); + assertTargetAccordWrite(runCasApplies, 1, migratingKey, expectedKeyMigrations, 1, 0, 1, 0, 0); // This will force the write to use the normal write patch cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); @@ -444,7 +500,8 @@ public void testPaxosToAccordCAS() throws Exception // This will force the request to run on Paxos up to Accept // and the accept will be rejected at both nodes and we are certain we need to retry the transaction cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); - assertTargetAccordWrite(runCasApplies, 1, migratingKey, 1, 1, 1, 0, 1); + addExpectedMigratedKey(expectedKeyMigrations, migratingKey, tableUUID); + assertTargetAccordWrite(runCasApplies, 1, migratingKey, expectedKeyMigrations, 1, 1, 1, 0, 1); // One node will now accept the other will reject and we are uncertain if we should retry the transaction // and should surface that as a timeout exception @@ -468,7 +525,9 @@ public void testPaxosToAccordCAS() throws Exception // retry it on Accord cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new RoutesToPaxosOnce())); // Should exit Paxos from begin, key migration should occur because it's a new key, and Accord will need to do a migration read - assertTargetAccordWrite(runCasNoApply, 1, testingKeys.next(), 1, 1, 1, 1, 0); + migratingKey = testingKeys.next(); + addExpectedMigratedKey(expectedKeyMigrations, migratingKey, tableUUID); + assertTargetAccordWrite(runCasNoApply, 1, migratingKey, expectedKeyMigrations, 1, 1, 1, 1, 0); // Now do two repairs to complete the migration repair, and we are done with black box integration testing // First repair is a range smack dab in the middle @@ -495,6 +554,9 @@ public void testPaxosToAccordSerialRead() throws Exception { test(format(TABLE_FMT, qualifiedTableName), cluster -> { + String table = tableName; + UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, table).getTableId().asUUID()); + List> expectedKeyMigrations = new ArrayList<>(); cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, tableName, TransactionalMode.full)); String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", qualifiedTableName, CLUSTERING_VALUE); Function runRead = key -> cluster.coordinator(1).execute(readCQL, SERIAL, key); @@ -502,17 +564,21 @@ public void testPaxosToAccordSerialRead() throws Exception List> migratingRanges = ImmutableList.of(migratingRange); int key = 0; - assertTargetAccordRead(runRead, 1, 0, 0, 1, 0, 0, 0); + assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 0, 1, 0, 0, 0); // Mark wrap around range as migrating nodetool(coordinator, "consensus_admin", "begin-migration", "-st", String.valueOf(Long.MIN_VALUE + 1), "-et", String.valueOf(Long.MIN_VALUE), "-tp", "accord", KEYSPACE, tableName); assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); // Should run directly on accord, migrate the key, and perform a quorum read from Accord, Paxos repair will run prepare once - assertTargetAccordRead(runRead, 1, key++, 1, 1, 1, 0, 0); + addExpectedMigratedKey(expectedKeyMigrations, key, tableUUID); + assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 1, 1, 1, 0, 0); + key++; // Should run up to accept with both nodes refusing to accept savePromisedAndCommittedPaxosProposal(tableName, key); cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); - assertTargetAccordRead(runRead, 1, key++, 1, 2, 1, 0, 1); + addExpectedMigratedKey(expectedKeyMigrations, key, tableUUID); + assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 1, 2, 1, 0, 1); + key++; }); } @@ -543,6 +609,7 @@ public void testAccordToPaxos() throws Exception String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); String tableName = qualifiedTableName.split("\\.")[1]; + UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, tableName).getTableId().asUUID()); alterTableTransactionalMode(TransactionalMode.mixed_reads); assertTransactionalModes(TransactionalMode.mixed_reads, TransactionalMigrationFromMode.off); @@ -563,25 +630,33 @@ public void testAccordToPaxos() throws Exception Iterator paxosMigratingKeys = getKeysBetweenTokens(upperMidToken, maxToken); Iterator accordKeys = getKeysBetweenTokens(midToken, upperMidToken); + List> expectedKeyMigrations = new ArrayList<>(); + // Paxos non-migrating keys should run on Paxos as per normal - assertTargetPaxosWrite(runCasNoApply, 1, paxosNonMigratingKeys.next(), 0, 1, 0, 0, 0); + assertTargetPaxosWrite(runCasNoApply, 1, paxosNonMigratingKeys.next(), expectedKeyMigrations, 0, 1, 0, 0, 0); + Integer nextMigratingKey = paxosMigratingKeys.next(); + addExpectedMigratedKey(expectedKeyMigrations, nextMigratingKey, tableUUID); // Paxos migrating keys should be key migrated which means a local barrier is run by Paxos during read at each replica, the key migration barrier is also counted as a write - assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 1, 1, 1, 0, 0); + assertTargetPaxosWrite(runCasNoApply, 1, nextMigratingKey, expectedKeyMigrations, 1, 1, 1, 0, 0); // A key from a range migrated to Accord is now not migrating/migrated and should be accessed through Accord - assertTargetPaxosWrite(runCasNoApply, 1, accordKeys.next(), 1, 0, 0, 0, 0); + assertTargetPaxosWrite(runCasNoApply, 1, accordKeys.next(), expectedKeyMigrations, 1, 0, 0, 0, 0); // If an Accord transaction races with cluster metadata updates it should be rejected if the epoch it runs in contains the migration cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new RoutesToAccordOnce())); - assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 2, 1, 1, 1, 1); + nextMigratingKey = paxosMigratingKeys.next(); + addExpectedMigratedKey(expectedKeyMigrations, nextMigratingKey, tableUUID); + assertTargetPaxosWrite(runCasNoApply, 1, nextMigratingKey, expectedKeyMigrations, 2, 1, 1, 1, 1); // Repair the currently migrating range from when targets were switched, but it's not an Accord repair, this is to make sure the wrong repair type doesn't trigger progress nodetool(coordinator, "repair", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString(), "--paxos-only"); assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), new Range(maxToken, minToken)), ImmutableList.of(accordMigratingRange), 1); // Paxos migrating keys should still need key migration after non-Accord repair - assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 1, 1, 1, 0, 0); + nextMigratingKey = paxosMigratingKeys.next(); + addExpectedMigratedKey(expectedKeyMigrations, nextMigratingKey, tableUUID); + assertTargetPaxosWrite(runCasNoApply, 1, nextMigratingKey, expectedKeyMigrations, 1, 1, 1, 0, 0); // Now do it with an Accord repair so key migration shouldn't be necessary nodetool(coordinator, "consensus_admin", "finish-migration", "-st", upperMidToken.toString(), "-et", maxAlignedWithLocalRanges.toString()); @@ -592,40 +667,16 @@ public void testAccordToPaxos() throws Exception assertMigrationState(tableName, ConsensusMigrationTarget.paxos, ImmutableList.of(new Range(minToken, midToken), repairedRange, new Range(maxToken, minToken)), ImmutableList.of(remainingRange), 1); // Paxos migrating keys shouldn't need key migration after Accord repair - assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), 0, 1, 0, 0, 0); + assertTargetPaxosWrite(runCasNoApply, 1, paxosMigratingKeys.next(), expectedKeyMigrations, 0, 1, 0, 0, 0); }); } - private static void assertCompletedMigrationState(String tableName) throws Throwable + private static void addExpectedMigratedKey(List> expectedKeyMigrations, Integer nextMigratingKey, UUID tableUUID) { - // Validate nodetool consensus admin list output - String yamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list"); - Map yamlStateMap = new Yaml().load(yamlResultString); - String minifiedYamlResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-yaml"); - Map minifiedYamlStateMap = new Yaml().load(minifiedYamlResultString); - String jsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "json"); - Map jsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(jsonResultString, new TypeReference>(){}); - String minifiedJsonResultString = nodetool(SHARED_CLUSTER.coordinator(1), "consensus_admin", "list", "-f", "minified-json"); - Map minifiedJsonStateMap = JsonUtils.JSON_OBJECT_MAPPER.readValue(minifiedJsonResultString, new TypeReference>(){}); - - for (Map migrationStateMap : ImmutableList.of(yamlStateMap, jsonStateMap, minifiedYamlStateMap, minifiedJsonStateMap)) { - assertEquals(PojoToString.CURRENT_VERSION, migrationStateMap.get("version")); - assertTrue(Epoch.EMPTY.getEpoch() < ((Number) migrationStateMap.get("lastModifiedEpoch")).longValue()); - List> tableStates = (List>) migrationStateMap.get("tableStates"); - assertEquals(0, tableStates.size()); - } - spinUntilSuccess(() -> { - for (IInvokableInstance instance : SHARED_CLUSTER) - { - ConsensusMigrationState snapshot = getMigrationStateSnapshot(instance); - assertEquals(0, snapshot.tableStates.size()); - instance.runOnInstance(() -> { - TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, tableName); - Assert.assertEquals(TransactionalMigrationFromMode.none, tbl.params.transactionalMigrationFrom); - }); - } - }); + ByteBuffer key = ByteBuffer.allocate(4); + key.putInt(0, nextMigratingKey); + expectedKeyMigrations.add(Pair.create(key, tableUUID)); } private static void assertMigrationState(String tableName, ConsensusMigrationTarget target, List> migratedRanges, List> migratingRanges, int numMigratingEpochs) throws Throwable From 128f526f4acd331257623b0d7f993135eb37bbc1 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Wed, 29 May 2024 19:16:26 +0200 Subject: [PATCH 114/340] Accord Journal Determinism: PreAccept replay stability Patch by Alex Petrov; reviewed by Aleksey Yeschenko for CASSANDRA-19664 --- modules/accord | 2 +- .../cassandra/journal/RecordPointer.java | 66 ++++++ .../service/accord/AccordCommandStore.java | 4 +- .../service/accord/AccordJournal.java | 206 +++++++----------- .../accord/AccordSafeCommandStore.java | 49 ++++- 5 files changed, 197 insertions(+), 130 deletions(-) create mode 100644 src/java/org/apache/cassandra/journal/RecordPointer.java diff --git a/modules/accord b/modules/accord index 4e8bcae81f97..84e89bd91cf1 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 4e8bcae81f9751b9d732fd5056bce31c97ad58f3 +Subproject commit 84e89bd91cf1b058fbf314b750336a1ec1096b18 diff --git a/src/java/org/apache/cassandra/journal/RecordPointer.java b/src/java/org/apache/cassandra/journal/RecordPointer.java new file mode 100644 index 000000000000..2b3e8ea6b84b --- /dev/null +++ b/src/java/org/apache/cassandra/journal/RecordPointer.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.journal; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; + +// TODO: make this available in the accord table as an ID +public class RecordPointer implements Comparable +{ + public final long segment; // unique segment id + public final int position; // record start position within the segment + + public RecordPointer(long segment, int position) + { + this.segment = segment; + this.position = position; + } + + @Override + public boolean equals(Object other) + { + if (this == other) + return true; + if (!(other instanceof RecordPointer)) + return false; + RecordPointer that = (RecordPointer) other; + return this.segment == that.segment + && this.position == that.position; + } + + @Override + public int hashCode() + { + return Long.hashCode(segment) + position * 31; + } + + @Override + public String toString() + { + return "(" + segment + ", " + position + ')'; + } + + @Override + public int compareTo(RecordPointer that) + { + int cmp = Longs.compare(this.segment, that.segment); + return cmp != 0 ? cmp : Ints.compare(this.position, that.position); + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 2a67ba656d56..c846038fd845 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -488,7 +488,9 @@ public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, timestampsForKeys.values().forEach(AccordSafeState::preExecute); if (commandsForRanges != null) commandsForRanges.preExecute(); - current = new AccordSafeCommandStore(preLoadContext, commands, timestampsForKeys, commandsForKeys, commandsForRanges, this); + + current = AccordSafeCommandStore.create(preLoadContext, commands, timestampsForKeys, commandsForKeys, commandsForRanges, this); + return current; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index ce90b2674704..0c31afbb4ca1 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -40,22 +40,16 @@ import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimap; import com.google.common.primitives.Ints; -import com.google.common.primitives.Longs; - -import accord.messages.ApplyThenWaitUntilApplied; -import org.agrona.collections.Long2ObjectHashMap; -import org.agrona.collections.LongArrayList; -import org.agrona.collections.ObjectHashSet; -import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.local.Node.Id; import accord.local.Node; +import accord.local.Node.Id; import accord.local.SerializerSupport; import accord.messages.AbstractEpochRequest; import accord.messages.Accept; import accord.messages.Apply; +import accord.messages.ApplyThenWaitUntilApplied; import accord.messages.BeginRecovery; import accord.messages.Commit; import accord.messages.LocalRequest; @@ -63,6 +57,7 @@ import accord.messages.MessageType; import accord.messages.PreAccept; import accord.messages.Propagate; +import accord.messages.ReplyContext; import accord.messages.Request; import accord.messages.TxnRequest; import accord.primitives.Ballot; @@ -70,6 +65,9 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.MapReduceConsume; +import org.agrona.collections.Long2ObjectHashMap; +import org.agrona.collections.LongArrayList; +import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.concurrent.Interruptible; import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.SequentialExecutorPlus; @@ -83,13 +81,14 @@ import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.Params; +import org.apache.cassandra.journal.RecordPointer; import org.apache.cassandra.journal.ValueSerializer; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.service.accord.interop.AccordInteropApply; -import org.apache.cassandra.service.accord.interop.AccordInteropCommit; import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.interop.AccordInteropApply; +import org.apache.cassandra.service.accord.interop.AccordInteropCommit; import org.apache.cassandra.service.accord.serializers.AcceptSerializers; import org.apache.cassandra.service.accord.serializers.ApplySerializers; import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; @@ -104,6 +103,8 @@ import org.apache.cassandra.utils.ByteArrayUtil; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.concurrent.Semaphore; +import org.apache.cassandra.utils.vint.VIntCoding; +import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.jctools.queues.SpscLinkedQueue; import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; @@ -120,18 +121,18 @@ import static accord.messages.MessageType.INFORM_OF_TXN_REQ; import static accord.messages.MessageType.PRE_ACCEPT_REQ; import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; -import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; +import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; import static accord.messages.MessageType.SET_GLOBALLY_DURABLE_REQ; import static accord.messages.MessageType.SET_SHARD_DURABLE_REQ; +import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; +import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; +import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; -import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; -import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; -import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; @@ -165,7 +166,7 @@ public class AccordJournal implements IJournal, Shutdownable * A cache of deserialized journal records we keep to avoid fetching them from log when free memory allows it. * TODO (expected, performance): cap memory used for cached records */ - private final NonBlockingHashMap cachedRecords = new NonBlockingHashMap<>(); + private final NonBlockingHashMap cachedRecords = new NonBlockingHashMap<>(); Node node; @@ -292,88 +293,7 @@ private M readMessage(TxnId txnId, MessageType messageType, return null; } - private static class Pointer implements Comparable - { - final long segment; // unique segment id - final int position; // record start position within the segment - - Pointer(long segment, int position) - { - this.segment = segment; - this.position = position; - } - - @Override - public boolean equals(Object other) - { - if (this == other) - return true; - if (!(other instanceof Pointer)) - return false; - Pointer that = (Pointer) other; - return this.segment == that.segment - && this.position == that.position; - } - - @Override - public int hashCode() - { - return Long.hashCode(segment) + position * 31; - } - - @Override - public String toString() - { - return "(" + segment + ", " + position + ')'; - } - - @Override - public int compareTo(Pointer that) - { - int cmp = Longs.compare(this.segment, that.segment); - return cmp != 0 ? cmp : Ints.compare(this.position, that.position); - } - - int serializedSize() - { - return computeUnsignedVIntSize(segment) + computeUnsignedVIntSize(position); - } - - void serialize(DataOutputPlus out) throws IOException - { - out.writeUnsignedVInt(segment); - out.writeUnsignedVInt32(position); - } - - static Pointer deserialize(DataInputPlus in) throws IOException - { - long segment = in.readUnsignedVInt(); - int position = in.readUnsignedVInt32(); - return new Pointer(segment, position); - } - - static final IVersionedSerializer SERIALIZER = new IVersionedSerializer<>() - { - @Override - public void serialize(Pointer p, DataOutputPlus out, int version) throws IOException - { - p.serialize(out); - } - - @Override - public Pointer deserialize(DataInputPlus in, int version) throws IOException - { - return Pointer.deserialize(in); - } - - @Override - public long serializedSize(Pointer p, int version) - { - return Ints.checkedCast(p.serializedSize()); - } - }; - } - + // TODO (alexp): tests for objects that go through AccordJournal private class JournalCallbacks implements AsyncCallbacks { /** @@ -382,7 +302,7 @@ private class JournalCallbacks implements AsyncCallbacks @Override public void onWrite(long segment, int position, int size, Key key, Object value, Object writeContext) { - Pointer pointer = new Pointer(segment, position); + RecordPointer pointer = new RecordPointer(segment, position); cachedRecords.put(pointer, value); /* @@ -451,29 +371,40 @@ public void onFlushFailed(Throwable cause) * Context necessary to process log records */ - private static class RequestContext + static class RequestContext implements ReplyContext { final long waitForEpoch; - final Pointer pointer; + final RecordPointer pointer; + private long preAcceptTimeout; - RequestContext(long waitForEpoch, Pointer pointer) + RequestContext(long waitForEpoch, RecordPointer pointer) { this.waitForEpoch = waitForEpoch; this.pointer = pointer; } + + void preAcceptTimeout(long preAcceptTimeout) + { + this.preAcceptTimeout = preAcceptTimeout; + } + + public long preAcceptTimeout() + { + return preAcceptTimeout; + } } private static class LocalRequestContext extends RequestContext { private final BiConsumer callback; - LocalRequestContext(long waitForEpoch, BiConsumer callback, Pointer pointer) + LocalRequestContext(long waitForEpoch, BiConsumer callback, RecordPointer pointer) { super(waitForEpoch, pointer); this.callback = callback; } - static LocalRequestContext create(LocalRequest request, Pointer pointer) + static LocalRequestContext create(LocalRequest request, RecordPointer pointer) { return new LocalRequestContext(request.waitForEpoch(), request.callback(), pointer); } @@ -489,7 +420,7 @@ private static class RemoteRequestContext extends RequestContext implements Resp private final Verb verb; private final long expiresAtNanos; - RemoteRequestContext(long waitForEpoch, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos, Pointer pointer) + RemoteRequestContext(long waitForEpoch, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos, RecordPointer pointer) { super(waitForEpoch, pointer); this.id = id; @@ -498,7 +429,7 @@ private static class RemoteRequestContext extends RequestContext implements Resp this.expiresAtNanos = expiresAtNanos; } - static RemoteRequestContext create(long waitForEpoch, ResponseContext context, Pointer pointer) + static RemoteRequestContext create(long waitForEpoch, ResponseContext context, RecordPointer pointer) { return new RemoteRequestContext(waitForEpoch, context.id(), context.from(), context.verb(), context.expiresAtNanos(), pointer); } @@ -1155,9 +1086,9 @@ private void doRun() if (requests != null) { - ArrayList pointers = new ArrayList<>(requests.size()); + ArrayList pointers = new ArrayList<>(requests.size()); for (RequestContext req : requests) pointers.add(req.pointer); - FrameRecord frame = new FrameRecord(node.uniqueNow(), pointers); + FrameRecord frame = new FrameRecord(node.uniqueNow(), pointers, node.agent().preAcceptTimeout()); FrameContext context = new FrameContext(requests); appendAuxiliaryRecord(frame, context); } @@ -1178,20 +1109,20 @@ private final class FrameApplicator implements Runnable, Shutdownable private final ArrayList pendingFrames = new ArrayList<>(); /* furthest flushed journal segment + position */ - private volatile Pointer flushedUntil = null; + private volatile RecordPointer flushedUntil = null; private volatile SequentialExecutorPlus executor; /* invoked from FrameGenerator thread via appendAuxiliaryRecord() call */ - void onWrite(Pointer start, int size, FrameContext context) + void onWrite(RecordPointer start, int size, FrameContext context) { - newFrames.add(new PendingFrame(start, new Pointer(start.segment, start.position + size), context)); + newFrames.add(new PendingFrame(start, new RecordPointer(start.segment, start.position + size), context)); } /* invoked only from Journal Flusher thread (single) */ void onFlush(long segment, int position) { - flushedUntil = new Pointer(segment, position); + flushedUntil = new RecordPointer(segment, position); executor.submit(this); } @@ -1231,7 +1162,7 @@ public void run() pendingFrames.sort((f1, f2) -> f2.start.compareTo(f1.start)); } - Pointer flushedUntil = this.flushedUntil; + RecordPointer flushedUntil = this.flushedUntil; for (int i = pendingFrames.size() - 1; i >= 0; i--) { PendingFrame frame = pendingFrames.get(i); @@ -1246,13 +1177,15 @@ private void applyFrame(FrameRecord frame, FrameContext context) { Invariants.checkState(frame.pointers.size() == context.requestContexts.size()); for (int i = 0; i < frame.pointers.size(); i++) - applyRequest(frame.pointers.get(i), context.requestContexts.get(i)); + applyRequest(frame.pointers.get(i), context.requestContexts.get(i), frame.preAcceptTimeoutMicros); } - private void applyRequest(Pointer pointer, RequestContext context) + private void applyRequest(RecordPointer pointer, RequestContext context, long preAcceptTimeout) { Request request = (Request) cachedRecords.remove(pointer); Type type = Type.fromMessageType(request.type()); + if (type == Type.PRE_ACCEPT || type == Type.BEGIN_RECOVER) + context.preAcceptTimeout(preAcceptTimeout); if (type.isRemoteRequest()) { @@ -1264,6 +1197,7 @@ private void applyRequest(Pointer pointer, RequestContext context) { Invariants.checkState(type.isLocalRequest()); LocalRequestContext ctx = (LocalRequestContext) context; + // TODO (expected): Make Propagate PreAccept receive preAcceptTimeout and timestamps //noinspection unchecked,rawtypes ((LocalRequest) request).process(node, ctx.callback); } @@ -1276,11 +1210,11 @@ private void applyRequest(Pointer pointer, RequestContext context) */ private final class PendingFrame { - final Pointer start; - final Pointer end; + final RecordPointer start; + final RecordPointer end; final FrameContext context; - PendingFrame(Pointer start, Pointer end, FrameContext context) + PendingFrame(RecordPointer start, RecordPointer end, FrameContext context) { this.start = start; this.end = end; @@ -1311,14 +1245,40 @@ private static abstract class AuxiliaryRecord abstract Type type(); } + public static final IVersionedSerializer RECORD_POINTER_SERIALIZER = new IVersionedSerializer<>() + { + @Override + public void serialize(RecordPointer p, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt(p.segment); + out.writeUnsignedVInt32(p.position); + } + + @Override + public RecordPointer deserialize(DataInputPlus in, int version) throws IOException + { + long segment = in.readUnsignedVInt(); + int position = in.readUnsignedVInt32(); + return new RecordPointer(segment, position); + } + + @Override + public long serializedSize(RecordPointer p, int version) + { + return computeUnsignedVIntSize(p.segment) + computeUnsignedVIntSize(p.position); + } + }; + private static final class FrameRecord extends AuxiliaryRecord { - final List pointers; + final List pointers; + final long preAcceptTimeoutMicros; - FrameRecord(Timestamp timestamp, List pointers) + FrameRecord(Timestamp timestamp, List pointers, long preAcceptTimeoutMicros) { super(timestamp); this.pointers = pointers; + this.preAcceptTimeoutMicros = preAcceptTimeoutMicros; } @Override @@ -1332,19 +1292,21 @@ Type type() @Override public int serializedSize(Key key, FrameRecord frame, int userVersion) { - return Ints.checkedCast(serializedListSize(frame.pointers, userVersion, Pointer.SERIALIZER)); + return Ints.checkedCast(serializedListSize(frame.pointers, userVersion, RECORD_POINTER_SERIALIZER)) + + computeUnsignedVIntSize(frame.preAcceptTimeoutMicros); } @Override public void serialize(Key key, FrameRecord frame, DataOutputPlus out, int userVersion) throws IOException { - serializeList(frame.pointers, out, userVersion, Pointer.SERIALIZER); + serializeList(frame.pointers, out, userVersion, RECORD_POINTER_SERIALIZER); + VIntCoding.writeUnsignedVInt(frame.preAcceptTimeoutMicros, out); } @Override public FrameRecord deserialize(Key key, DataInputPlus in, int userVersion) throws IOException { - return new FrameRecord(key.timestamp, deserializeList(in, userVersion, Pointer.SERIALIZER)); + return new FrameRecord(key.timestamp, deserializeList(in, userVersion, RECORD_POINTER_SERIALIZER), VIntCoding.readUnsignedVInt(in)); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 9f4776c2e96a..63028355ede8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -33,6 +33,10 @@ import accord.local.CommandStores.RangesForEpoch; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.messages.BeginRecovery; + +import accord.messages.PreAccept; +import accord.messages.TxnRequest; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Deps; @@ -46,6 +50,7 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore { + private final long preAcceptTimeout; private final Map commands; private final NavigableMap commandsForKeys; private final NavigableMap timestampsForKeys; @@ -53,14 +58,16 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, - @Nullable AccordSafeCommandsForRanges commandsForRanges, - AccordCommandStore commandStore) + private AccordSafeCommandStore(PreLoadContext context, + long preAcceptTimeout, + Map commands, + NavigableMap timestampsForKey, + NavigableMap commandsForKey, + @Nullable AccordSafeCommandsForRanges commandsForRanges, + AccordCommandStore commandStore) { super(context); + this.preAcceptTimeout = preAcceptTimeout; this.commands = commands; this.timestampsForKeys = timestampsForKey; this.commandsForKeys = commandsForKey; @@ -69,6 +76,26 @@ public AccordSafeCommandStore(PreLoadContext context, this.ranges = commandStore.updateRangesForEpoch(); } + public static AccordSafeCommandStore create(PreLoadContext preLoadContext, + Map commands, + NavigableMap timestampsForKey, + NavigableMap commandsForKey, + @Nullable AccordSafeCommandsForRanges commandsForRanges, + AccordCommandStore commandStore) + { + long preAcceptTimeoutMicros = -1; + if ((preLoadContext instanceof PreAccept || preLoadContext instanceof BeginRecovery)) + { + TxnRequest preAccept = (TxnRequest) preLoadContext; + AccordJournal.RequestContext context = (AccordJournal.RequestContext) preAccept.replyContext(); + // TODO (required): SimulatedDepsTest and some other tests aren't calling preProcess, hence do not set context + if (context != null) + preAcceptTimeoutMicros = context.preAcceptTimeout(); + } + + return new AccordSafeCommandStore(preLoadContext, preAcceptTimeoutMicros, commands, timestampsForKey, commandsForKey, commandsForRanges, commandStore); + } + @Override protected AccordSafeCommand getCommandInternal(TxnId txnId) { @@ -156,9 +183,19 @@ public ProgressLog progressLog() @Override public NodeTimeService time() { + // TODO: safe command store should not have arbitrary time return commandStore.time(); } + @Override + public long preAcceptTimeout() + { + if (preAcceptTimeout == -1) + return super.preAcceptTimeout(); + + return preAcceptTimeout; + } + @Override public RangesForEpoch ranges() { From c909f2d66936fe917fb64474edcdb8f7ec802ecc Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Thu, 6 Jun 2024 15:07:57 -0700 Subject: [PATCH 115/340] ninja: fix cqlsh tests --- pylib/cqlshlib/test/test_cqlsh_output.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pylib/cqlshlib/test/test_cqlsh_output.py b/pylib/cqlshlib/test/test_cqlsh_output.py index 78dc0331454a..c32690b42496 100644 --- a/pylib/cqlshlib/test/test_cqlsh_output.py +++ b/pylib/cqlshlib/test/test_cqlsh_output.py @@ -690,6 +690,7 @@ def test_describe_columnfamily_output(self): AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'} AND memtable = 'default' AND crc_check_chance = 1.0 + AND fast_path = 'keyspace' AND default_time_to_live = 0 AND extensions = {} AND gc_grace_seconds = 864000 @@ -698,6 +699,8 @@ def test_describe_columnfamily_output(self): AND memtable_flush_period_in_ms = 0 AND min_index_interval = 128 AND read_repair = 'BLOCKING' + AND transactional_mode = 'off' + AND transactional_migration_from = 'none' AND speculative_retry = '99p';""" % quote_name(get_keyspace())) with cqlsh_testrun(tty=True, env=self.default_env) as c: @@ -791,7 +794,7 @@ def test_describe_schema_output(self): self.assertNoHasColors(output) # Since CASSANDRA-7622 'DESC FULL SCHEMA' also shows all VIRTUAL keyspaces self.assertIn('VIRTUAL KEYSPACE system_virtual_schema', output) - self.assertIn("\nCREATE KEYSPACE system_auth WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true;\n", + self.assertIn("\nCREATE KEYSPACE system_auth WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true AND fast_path = 'simple';\n", output) self.assertRegex(output, r'.*\s*$') From 39efba9c6ac069818e43d9dc49673562780d4744 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 31 May 2024 10:37:27 -0700 Subject: [PATCH 116/340] CEP-15: (Accord) Bootstraps LocalOnly txn can not be recreated from SerializerSupport patch by David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-19674 --- modules/accord | 2 +- .../cassandra/schema/SchemaProvider.java | 5 ++ .../service/accord/AccordJournal.java | 28 ++++--- .../service/accord/AccordKeyspace.java | 2 +- .../accord/AccordSafeCommandStore.java | 6 ++ .../accord/AccordSafeCommandsForRanges.java | 57 ++----------- .../service/accord/AccordService.java | 5 +- .../accord/ImmutableAccordSafeState.java | 84 +++++++++++++++++++ .../service/accord/api/PartitionKey.java | 5 +- .../service/accord/async/AsyncLoader.java | 7 +- .../accord/SimulatedAccordCommandStore.java | 24 +++++- .../SimulatedAccordCommandStoreTestBase.java | 71 ++++++++++++++++ .../async/SimulatedAsyncOperationTest.java | 64 ++++++++++++-- 13 files changed, 278 insertions(+), 82 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java diff --git a/modules/accord b/modules/accord index 84e89bd91cf1..cf10169067a8 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 84e89bd91cf1b058fbf314b750336a1ec1096b18 +Subproject commit cf10169067a8cd40fb876789a62439cc03fd2e9b diff --git a/src/java/org/apache/cassandra/schema/SchemaProvider.java b/src/java/org/apache/cassandra/schema/SchemaProvider.java index 844acfe31157..07ca374b546f 100644 --- a/src/java/org/apache/cassandra/schema/SchemaProvider.java +++ b/src/java/org/apache/cassandra/schema/SchemaProvider.java @@ -143,6 +143,11 @@ default IPartitioner getTablePartitioner(TableId id) return metadata == null ? null : metadata.partitioner; } + default IPartitioner getExistingTablePartitioner(TableId id) throws UnknownTableException + { + return getExistingTableMetadata(id).partitioner; + } + @Nullable default TableMetadataRef getTableMetadataRef(TableId id) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 0c31afbb4ca1..80cfdf31eac5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -40,6 +40,12 @@ import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimap; import com.google.common.primitives.Ints; + +import accord.messages.ApplyThenWaitUntilApplied; +import org.agrona.collections.Long2ObjectHashMap; +import org.agrona.collections.LongArrayList; +import org.agrona.collections.ObjectHashSet; +import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,7 +55,6 @@ import accord.messages.AbstractEpochRequest; import accord.messages.Accept; import accord.messages.Apply; -import accord.messages.ApplyThenWaitUntilApplied; import accord.messages.BeginRecovery; import accord.messages.Commit; import accord.messages.LocalRequest; @@ -65,9 +70,6 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.MapReduceConsume; -import org.agrona.collections.Long2ObjectHashMap; -import org.agrona.collections.LongArrayList; -import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.concurrent.Interruptible; import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.SequentialExecutorPlus; @@ -104,7 +106,6 @@ import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.concurrent.Semaphore; import org.apache.cassandra.utils.vint.VIntCoding; -import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.jctools.queues.SpscLinkedQueue; import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; @@ -256,11 +257,11 @@ public void appendRemoteRequest(Request request, ResponseContext context) /** * Accord protocol messages originating from local node, e.g. Propagate. */ - public void appendLocalRequest(LocalRequest request) + public void appendLocalRequest(LocalRequest request, BiConsumer callback) { Type type = Type.fromMessageType(request.type()); Key key = new Key(type.txnId(request), type); - journal.asyncWrite(key, request, SENTINEL_HOSTS, null); + journal.asyncWrite(key, request, SENTINEL_HOSTS, callback); } @VisibleForTesting @@ -313,7 +314,7 @@ public void onWrite(long segment, int position, int size, Key key, Object value, if (key.type.isRemoteRequest()) frameAggregator.onWrite(RemoteRequestContext.create(((Request) value).waitForEpoch(), (ResponseContext) writeContext, pointer)); else if (key.type.isLocalRequest()) - frameAggregator.onWrite(LocalRequestContext.create((LocalRequest) value, pointer)); + frameAggregator.onWrite(LocalRequestContext.create((LocalRequest) value, (BiConsumer) writeContext, pointer)); else frameApplicator.onWrite(pointer, size, (FrameContext) writeContext); } @@ -404,9 +405,9 @@ private static class LocalRequestContext extends RequestContext this.callback = callback; } - static LocalRequestContext create(LocalRequest request, RecordPointer pointer) + static LocalRequestContext create(LocalRequest request, BiConsumer callback, RecordPointer pointer) { - return new LocalRequestContext(request.waitForEpoch(), request.callback(), pointer); + return new LocalRequestContext(request.waitForEpoch(), callback, pointer); } } @@ -1182,13 +1183,14 @@ private void applyFrame(FrameRecord frame, FrameContext context) private void applyRequest(RecordPointer pointer, RequestContext context, long preAcceptTimeout) { - Request request = (Request) cachedRecords.remove(pointer); - Type type = Type.fromMessageType(request.type()); + Message message = (Message) cachedRecords.remove(pointer); + Type type = Type.fromMessageType(message.type()); if (type == Type.PRE_ACCEPT || type == Type.BEGIN_RECOVER) context.preAcceptTimeout(preAcceptTimeout); if (type.isRemoteRequest()) { + Request request = (Request) message; RemoteRequestContext ctx = (RemoteRequestContext) context; Id from = endpointMapper.mappedId(ctx.from()); request.process(node, from, ctx); @@ -1199,7 +1201,7 @@ private void applyRequest(RecordPointer pointer, RequestContext context, long pr LocalRequestContext ctx = (LocalRequestContext) context; // TODO (expected): Make Propagate PreAccept receive preAcceptTimeout and timestamps //noinspection unchecked,rawtypes - ((LocalRequest) request).process(node, ctx.callback); + ((LocalRequest) message).process(node, ctx.callback); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 5298670e1337..1a476e1d8e0c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -1239,7 +1239,7 @@ static Command unsafeLoadCommand(AccordCommandStore commandStore, TxnId txnId) WaitingOnProvider waitingOn = deserializeWaitingOn(txnId, row); MessageProvider messages = commandStore.makeMessageProvider(txnId); - return SerializerSupport.reconstruct(commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, executeAtLeast, promised, accepted, waitingOn, messages); + return SerializerSupport.reconstruct(commandStore.agent(), commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, executeAtLeast, promised, accepted, waitingOn, messages); } catch (Throwable t) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 63028355ede8..c4baa6bb3fdb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -345,4 +345,10 @@ public void postExecute(Map commands, if (commandsForRanges != null) commandsForRanges.postExecute(); } + + @Override + public String toString() + { + return "AccordSafeCommandStore(id=" + commandStore().id() + ")"; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java index 42fb0f6ef1b3..848df1d27031 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java @@ -21,61 +21,25 @@ import java.util.NavigableMap; import java.util.Objects; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.TxnId; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import org.apache.cassandra.utils.Pair; -public class AccordSafeCommandsForRanges implements AccordSafeState +public class AccordSafeCommandsForRanges extends ImmutableAccordSafeState { private final AsyncResult>> chain; - private final Ranges ranges; - private boolean invalidated; - private CommandsForRanges original, current; public AccordSafeCommandsForRanges(Ranges ranges, AsyncResult>> chain) { - this.ranges = ranges; + super(ranges); this.chain = chain; } public Ranges ranges() { - return ranges; - } - - @Override - public CommandsForRanges current() - { - checkNotInvalidated(); - return current; - } - - @Override - public void invalidate() - { - invalidated = true; - } - - @Override - public boolean invalidated() - { - return invalidated; - } - - @Override - public void set(CommandsForRanges update) - { - throw new UnsupportedOperationException(); - } - - @Override - public CommandsForRanges original() - { - checkNotInvalidated(); - return original; + return key(); } @Override @@ -85,17 +49,11 @@ public void preExecute() Pair> pair = AsyncChains.getUnchecked(chain); pair.left.close(); pair.left.get().entrySet().forEach(e -> pair.right.put(e.getKey(), e.getValue())); - current = original = new CommandsForRanges(ranges, pair.right); - } - - @Override - public void postExecute() - { - checkNotInvalidated(); + original = new CommandsForRanges(key, pair.right); } @Override - public AccordCachingState global() + public AccordCachingState global() { throw new UnsupportedOperationException(); } @@ -106,13 +64,13 @@ public boolean equals(Object o) if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; AccordSafeCommandsForRanges that = (AccordSafeCommandsForRanges) o; - return Objects.equals(original, that.original) && Objects.equals(current, that.current); + return Objects.equals(original, that.original); } @Override public int hashCode() { - return Objects.hash(original, current); + return Objects.hash(original); } @Override @@ -122,7 +80,6 @@ public String toString() "chain=" + chain + ", invalidated=" + invalidated + ", original=" + original + - ", current=" + current + '}'; } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 9d8c42656afa..c98a6255745b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -568,11 +569,11 @@ public TopologyManager topology() } } - private void handleLocalRequest(LocalRequest request, Node node) + private void handleLocalRequest(LocalRequest request, BiConsumer callback, Node node) { // currently, we only create LocalRequests that have side effects and need to be persisted Invariants.checkState(request.type().hasSideEffects()); - journal.appendLocalRequest(request); + journal.appendLocalRequest(request, callback); } private static RequestTimeoutException newTimeout(TxnId txnId, Txn txn, ConsistencyLevel consistencyLevel) diff --git a/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java b/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java new file mode 100644 index 000000000000..850f6f7e8d0a --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import javax.annotation.Nullable; + +public abstract class ImmutableAccordSafeState implements AccordSafeState +{ + protected final K key; + @Nullable + protected V original; + protected boolean invalidated; + + protected ImmutableAccordSafeState(K key) + { + this.key = key; + } + + @Override + public K key() + { + return key; + } + + @Override + public V original() + { + checkNotInvalidated(); + return original; + } + + @Override + public V current() + { + checkNotInvalidated(); + return original; + } + + @Override + public void invalidate() + { + invalidated = true; + } + + @Override + public boolean invalidated() + { + return invalidated; + } + + @Override + public void set(V update) + { + throw new UnsupportedOperationException(); + } + + @Override + public void revert() + { + checkNotInvalidated(); + } + + @Override + public void postExecute() + { + checkNotInvalidated(); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index a42fcf57bfd2..71feb7d88e58 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -33,6 +33,7 @@ import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.db.partitions.Partition; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -147,8 +148,8 @@ public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int public PartitionKey deserialize(DataInputPlus in, int version) throws IOException { TableId tableId = TableId.deserialize(in); - TableMetadata metadata = Schema.instance.getExistingTableMetadata(tableId); - DecoratedKey key = metadata.partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); + IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); + DecoratedKey key = partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); return new PartitionKey(tableId, key); } diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 4aa35bc32408..032d7c6dc93e 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -22,7 +22,6 @@ import accord.local.KeyHistory; import accord.local.PreLoadContext; import accord.primitives.*; -import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; @@ -198,7 +197,11 @@ public void onAdd(AccordCachingState state) private AsyncChain> findOverlappingKeys(Ranges ranges) { - Invariants.checkArgument(!ranges.isEmpty()); + if (ranges.isEmpty()) + { + // During topology changes some shards may be included with empty ranges + return AsyncChains.success(Collections.emptySet()); + } List>> chains = new ArrayList<>(ranges.size()); for (Range range : ranges) diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 5e4dbbd3c92e..1fd5bb3a3a92 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -47,6 +47,7 @@ import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -88,6 +89,7 @@ public class SimulatedAccordCommandStore implements AutoCloseable public final AccordCommandStore store; public final Node.Id nodeId; public final Topology topology; + public final Topologies topologies; public final MockJournal journal; public final ScheduledExecutorPlus unorderedScheduled; public final List evictions = new ArrayList<>(); @@ -189,6 +191,7 @@ public void onEvict(AccordCachingState state) }); this.topology = AccordTopology.createAccordTopology(ClusterMetadata.current()); + this.topologies = new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology); var rangesForEpoch = new CommandStores.RangesForEpoch(topology.epoch(), topology.ranges(), store); updateHolder.add(topology.epoch(), rangesForEpoch, topology.ranges()); updateHolder.updateGlobal(topology.ranges()); @@ -209,6 +212,21 @@ public TxnId nextTxnId(Txn.Kind kind, Routable.Domain domain) return new TxnId(timeService.epoch(), timeService.now(), kind, domain, nodeId); } + public void maybeCacheEvict(Seekables keysOrRanges) + { + switch (keysOrRanges.domain()) + { + case Key: + maybeCacheEvict((Keys) keysOrRanges, Ranges.EMPTY); + break; + case Range: + maybeCacheEvict(Keys.EMPTY, (Ranges) keysOrRanges); + break; + default: + throw new UnsupportedOperationException("Unknown domain: " + keysOrRanges.domain()); + } + } + public void maybeCacheEvict(Keys keys, Ranges ranges) { AccordStateCache cache = store.cache(); @@ -217,7 +235,7 @@ public void maybeCacheEvict(Keys keys, Ranges ranges) if (TxnId.class.equals(keyType)) { Command command = (Command) state.state().get(); - if (command.known().definition.isKnown() + if (command != null && command.known().definition.isKnown() && (command.partialTxn().keys().intersects(keys) || ranges.intersects(command.partialTxn().keys())) && shouldEvict.getAsBoolean()) cache.maybeEvict(state); @@ -322,7 +340,7 @@ public AsyncResult processAsync(PreLoadContext loadCtx, Function> enqueuePreAccept(Txn txn, FullRoute route) { TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); - PreAccept preAccept = new PreAccept(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology), txnId, txn, route); + PreAccept preAccept = new PreAccept(nodeId, topologies, txnId, txn, route); return Pair.create(txnId, processAsync(preAccept, safe -> { var reply = preAccept.apply(safe); Assertions.assertThat(reply.isOk()).isTrue(); @@ -334,7 +352,7 @@ public Pair> enqueueBeginRecovery(Tx { TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); Ballot ballot = Ballot.fromValues(timeService.epoch(), timeService.now(), nodeId); - BeginRecovery br = new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology), txnId, txn, route, ballot); + BeginRecovery br = new BeginRecovery(nodeId, topologies, txnId, txn, route, ballot); return Pair.create(txnId, processAsync(br, safe -> { var reply = br.apply(safe); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java index 5aed34bc7928..2c313566df65 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -37,14 +37,19 @@ import accord.messages.PreAccept; import accord.primitives.Ballot; import accord.primitives.Deps; +import accord.primitives.FullRangeRoute; import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.LatestDeps; import accord.primitives.Range; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.Topologies; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Invariants; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import org.apache.cassandra.ServerTestUtils; @@ -57,11 +62,13 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; public abstract class SimulatedAccordCommandStoreTestBase extends CQLTester { @@ -78,6 +85,21 @@ public abstract class SimulatedAccordCommandStoreTestBase extends CQLTester protected enum DepsMessage {PreAccept, BeginRecovery, PreAcceptThenBeginRecovery} + protected static final Gen> mixedDomainGen = Gens.enums().allMixedDistribution(Routable.Domain.class); + protected static final Gen mixedTokenGen = top -> { + switch (top.nextInt(0, 3)) + { + case 0: // all + return rs -> rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + case 1: // small + return rs -> rs.nextLong(0, 100); + case 2: // medium + return rs -> rs.nextLong(0, Long.MAX_VALUE); + default: + throw new AssertionError(); + } + }; + protected static TableMetadata intTbl, reverseTokenTbl; protected static Node.Id nodeId; @@ -345,4 +367,53 @@ protected static void assertDeps(TxnId txnId, Deps deps, Assertions.assertThat(deps.keyDeps.txnIds(key)).describedAs("Txn %s for key %s", txnId, key).isEqualTo(keyConflicts.get(key)); } } + + protected static Gen>> randomTxn(Gen domainGen, Gen.LongGen tokenGen) + { + TableMetadata tbl = reverseTokenTbl; + Invariants.checkArgument(tbl.partitioner == Murmur3Partitioner.instance, "Only murmur partitioner is supported; given %s", tbl.partitioner.getClass()); + Gen keyGen = rs -> new PartitionKey(tbl.id, tbl.partitioner.decorateKey(Murmur3Partitioner.LongToken.keyForToken(tokenGen.nextLong(rs)))); + Gen rangeGen = rs -> { + long a = tokenGen.nextLong(rs); + long b = tokenGen.nextLong(rs); + while (a == b) + b = tokenGen.nextLong(rs); + if (a > b) + { + long tmp = a; + a = b; + b = tmp; + } + return tokenRange(tbl.id, a, b); + }; + return rs -> { + Routable.Domain domain = domainGen.next(rs); + switch (domain) + { + case Key: + { + Keys keys = Keys.of(Gens.lists(keyGen).unique().ofSizeBetween(1, 5).next(rs)); + List inserts = new ArrayList<>(keys.size()); + List binds = new ArrayList<>(keys.size()); + for (int i = 0; i < keys.size(); i++) + { + inserts.add(String.format("INSERT INTO %s (pk) VALUES (?)", tbl)); + binds.add(((PartitionKey) keys.get(i)).partitionKey().getKey()); + } + Txn txn = createTxn(wrapInTxn(inserts), binds); + FullRoute route = keys.toRoute(keys.get(0).toUnseekable()); + return Pair.create(txn, route); + } + case Range: + { + Ranges ranges = Ranges.of(Gens.arrays(Range.class, rangeGen).unique().ofSizeBetween(1, 5).next(rs)); + Txn txn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); + FullRangeRoute route = ranges.toRoute(ranges.get(0).end()); + return Pair.create(txn, route); + } + default: + throw new UnsupportedOperationException(domain.name()); + } + }; + } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java index 6e216ff56d16..9ade46795475 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java @@ -30,10 +30,14 @@ import accord.impl.basic.SimulatedFault; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; +import accord.messages.PreAccept; +import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Seekables; +import accord.primitives.Txn; +import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; @@ -48,6 +52,7 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; import static accord.utils.Property.qt; @@ -58,6 +63,7 @@ public class SimulatedAsyncOperationTest extends SimulatedAccordCommandStoreTest public void precondition() { Assertions.assertThat(intTbl.partitioner).isEqualTo(Murmur3Partitioner.instance); + Assertions.assertThat(reverseTokenTbl.partitioner).isEqualTo(Murmur3Partitioner.instance); } @Test @@ -73,20 +79,22 @@ public void fuzz() qt().withExamples(100).check(rs -> test(rs, 100, intTbl, actionGen)); } + enum Operation { Task, PreAccept } + private static void test(RandomSource rs, int numSamples, TableMetadata tbl, Gen actionGen) throws Exception { AccordKeyspace.unsafeClear(); + Gen operationGen = Gens.enums().all(Operation.class); int numKeys = rs.nextInt(20, 1000); long minToken = 0; long maxToken = numKeys; Gen keyGen = Gens.longs().between(minToken + 1, maxToken).map(t -> new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(t)))); - - Gen keysGen = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).map(l -> Keys.of(l)); Gen rangesGen = Gens.lists(rangeInsideRange(tbl.id, minToken, maxToken)).uniqueBestEffort().ofSizeBetween(1, 10).map(l -> Ranges.of(l.toArray(Range[]::new))); Gen> seekablesGen = Gens.oneOf(keysGen, rangesGen); + Gen>> txnGen = randomTxn(mixedDomainGen.next(rs), mixedTokenGen.next(rs)); try (var instance = new SimulatedAccordCommandStore(rs)) { @@ -94,14 +102,46 @@ private static void test(RandomSource rs, int numSamples, TableMetadata tbl, Gen Counter counter = new Counter(); for (int i = 0; i < numSamples; i++) { - PreLoadContext ctx = PreLoadContext.contextFor(seekablesGen.next(rs)); - operation(instance, ctx, actionGen.next(rs), rs::nextBoolean).begin((ignore, failure) -> { - counter.counter++; - if (failure != null && !(failure instanceof SimulatedFault)) throw new AssertionError("Unexpected error", failure); - }); + Operation op = operationGen.next(rs); + switch (op) + { + case Task: + { + PreLoadContext ctx = PreLoadContext.contextFor(seekablesGen.next(rs)); + instance.maybeCacheEvict(ctx.keys()); + operation(instance, ctx, actionGen.next(rs), rs::nextBoolean).begin(counter); + } + break; + case PreAccept: + { + Pair> txnWithRoute = txnGen.next(rs); + Txn txn = txnWithRoute.left; + Action action = actionGen.next(rs); + TxnId txnId = instance.nextTxnId(txn.kind(), txn.keys().domain()); + FullRoute route = txnWithRoute.right; + PreAccept preAccept = new PreAccept(nodeId, instance.topologies, txnId, txn, route) { + @Override + public PreAcceptReply apply(SafeCommandStore safeStore) + { + PreAcceptReply result = super.apply(safeStore); + if (action == Action.FAILURE) + throw new SimulatedFault("PreAccept failed for keys " + keys()); + return result; + } + }; + instance.maybeCacheEvict(txn.keys()); + instance.processAsync(preAccept).begin(counter); + } + break; + default: + throw new UnsupportedOperationException(op.name()); + } } instance.processAll(); Assertions.assertThat(counter.counter).isEqualTo(numSamples); + instance.store.cache().stream().forEach(e -> { + Assertions.assertThat(e.referenceCount()).isEqualTo(0); + }); } } @@ -146,9 +186,17 @@ AsyncLoader createAsyncLoader(AccordCommandStore commandStore, PreLoadContext pr }; } - private static class Counter + private static class Counter implements BiConsumer { int counter = 0; + + @Override + public void accept(Object o, Throwable failure) + { + counter++; + if (failure != null && !(failure instanceof SimulatedFault)) + throw new AssertionError("Unexpected error", failure); + } } private static class SimulatedOperation extends AsyncOperation From 17c186ad64b7baa549f28ce5330717b9e8644a39 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 18 Jun 2024 11:07:00 -0700 Subject: [PATCH 117/340] Move burn test read timestamp validation from replica to coordination Patch by Blake Eggleston; reviewed by David Capwell for CASSANDRA-19288 --- modules/accord | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/accord b/modules/accord index cf10169067a8..527094c69ad2 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit cf10169067a8cd40fb876789a62439cc03fd2e9b +Subproject commit 527094c69ad23319b058917b1c8974d01c5d86e6 From d6bf8ec3d0b3969d8bdc47d2edbcc601260529b2 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Mon, 17 Jun 2024 16:28:05 -0700 Subject: [PATCH 118/340] Don't run incremental repairs for consensus only repairs Patch by Blake Eggleston; reviewed by David Capwell for CASSANDRA-19717 --- src/java/org/apache/cassandra/repair/RepairCoordinator.java | 2 +- .../org/apache/cassandra/repair/messages/RepairOption.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/repair/RepairCoordinator.java b/src/java/org/apache/cassandra/repair/RepairCoordinator.java index 2d6fc9cb662d..9e8f1d5a3b68 100644 --- a/src/java/org/apache/cassandra/repair/RepairCoordinator.java +++ b/src/java/org/apache/cassandra/repair/RepairCoordinator.java @@ -499,7 +499,7 @@ private Future>> repair(String[] { task = new PreviewRepairTask(this, state.id, neighborsAndRanges.filterCommonRanges(state.keyspace, cfnames), neighborsAndRanges.shouldExcludeDeadParticipants, cfnames); } - else if (state.options.isIncremental()) + else if (state.options.isIncremental() && !state.options.isConsensusOnly()) { task = new IncrementalRepairTask(this, state.id, neighborsAndRanges, cfnames); } diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java index 11be2269fd82..bd3ea276ca6b 100644 --- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java +++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java @@ -463,6 +463,11 @@ public boolean accordOnly() return accordOnly; } + public boolean isConsensusOnly() + { + return paxosOnly() || accordOnly(); + } + public boolean isConsensusMigration() { return isConsensusMigration; From 3dfd80ff7829f8ea102f0076b6318d33cae6a7d0 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Thu, 6 Jun 2024 12:36:02 -0400 Subject: [PATCH 119/340] ApplyThenWaitUntilApplied supplies wrong epoch for executeAtEpoch Patch by Ariel Weisberg; Reviewed by Benedict Elliott Smith for CASSANDRA-19687 --- modules/accord | 2 +- .../cassandra/index/accord/CheckpointIntervalArrayIndex.java | 2 ++ .../service/accord/serializers/ReadDataSerializers.java | 3 --- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/accord b/modules/accord index 527094c69ad2..37c957c71949 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 527094c69ad23319b058917b1c8974d01c5d86e6 +Subproject commit 37c957c719491634f081b39900ebf708079ef3ee diff --git a/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java index 411c3fb5ece6..664fe16093b3 100644 --- a/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java +++ b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java @@ -615,6 +615,8 @@ public int binarySearch(ChecksumedRandomAccessReader indexInput, int from, int t }, (i1, i2, i3, i4, startIdx, endIdx) -> { try { + if (startIdx == endIdx) + return; reader.maybeSeek(indexInput, stats, SortedListReader.SeekReason.SCAN, reader.fileOffsetStart(startIdx)); for (int i = startIdx; i < endIdx; i++) { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 163e8f65f2f0..00728a68f944 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -81,7 +81,6 @@ public void serialize(ApplyThenWaitUntilApplied msg, DataOutputPlus out, int ver { CommandSerializers.txnId.serialize(msg.txnId, out, version); KeySerializers.participants.serialize(msg.readScope, out, version); - out.writeUnsignedVInt(msg.executeAtEpoch); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); KeySerializers.fullRoute.serialize(msg.route, out, version); CommandSerializers.partialTxn.serialize(msg.txn, out, version); @@ -97,7 +96,6 @@ public ApplyThenWaitUntilApplied deserialize(DataInputPlus in, int version) thro return ApplyThenWaitUntilApplied.SerializerSupport.create( CommandSerializers.txnId.deserialize(in, version), KeySerializers.participants.deserialize(in, version), - in.readUnsignedVInt(), CommandSerializers.timestamp.deserialize(in, version), KeySerializers.fullRoute.deserialize(in, version), CommandSerializers.partialTxn.deserialize(in, version), @@ -112,7 +110,6 @@ public long serializedSize(ApplyThenWaitUntilApplied msg, int version) { return CommandSerializers.txnId.serializedSize(msg.txnId, version) + KeySerializers.participants.serializedSize(msg.readScope, version) - + TypeSizes.sizeofUnsignedVInt(msg.executeAtEpoch) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + KeySerializers.fullRoute.serializedSize(msg.route, version) + CommandSerializers.partialTxn.serializedSize(msg.txn, version) From d83de7fbb493326840a9370617670feced9b9258 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 18 Jun 2024 12:20:22 -0700 Subject: [PATCH 120/340] =?UTF-8?q?CEP-15:=20(Accord)=20SyncPoint=20timeou?= =?UTF-8?q?ts=20become=20a=20Exhausted=20rather=20than=20a=20Timeout=20and?= =?UTF-8?q?=20doesn=E2=80=99t=20get=20retried?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-19718 --- modules/accord | 2 +- src/java/org/apache/cassandra/net/Verb.java | 4 +- .../service/accord/AccordService.java | 80 ++++++-- .../exceptions/ReadExhaustedException.java | 39 ++++ .../cassandra/streaming/StreamSession.java | 10 +- .../org/apache/cassandra/utils/Blocking.java | 42 ++++ .../test/accord/AccordBootstrapTest.java | 2 +- .../cassandra/repair/LocalSyncTaskTest.java | 2 +- .../service/accord/AccordServiceTest.java | 185 ++++++++++++++++++ 9 files changed, 339 insertions(+), 27 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java create mode 100644 src/java/org/apache/cassandra/utils/Blocking.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java diff --git a/modules/accord b/modules/accord index 37c957c71949..f1f5ea5ccbd6 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 37c957c719491634f081b39900ebf708079ef3ee +Subproject commit f1f5ea5ccbd6e0a8abf579a4331fa84a1b3d9f95 diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index adaa602a9af1..ff2cb4ef7a01 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -337,8 +337,8 @@ public enum Verb ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), - ACCORD_FETCH_DATA_RSP (145, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (146, P2, repairTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), + ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout,IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_REQ (146, P2, writeTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index c98a6255745b..36a88c4d08ab 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -35,10 +35,14 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Ints; import accord.coordinate.Barrier; import accord.coordinate.CoordinateSyncPoint; +import accord.coordinate.Exhausted; +import accord.coordinate.FailureAccumulator; import accord.coordinate.TopologyMismatch; import accord.impl.CoordinateDurabilityScheduling; import accord.primitives.SyncPoint; @@ -47,6 +51,7 @@ import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; @@ -104,11 +109,11 @@ import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.Blocking; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; -import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.ImmediateFuture; @@ -397,6 +402,13 @@ public IVerbHandler verbHandler() // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match throw newBarrierPreempted(txnId, barrierType.global); } + if (cause instanceof Exhausted) + { + // this case happens when a non-timeout exception is seen, and we are unable to move forward + metrics.failures.mark(); + throw newBarrierExhausted(txnId, barrierType.global); + } + // unknown error metrics.failures.mark(); throw new RuntimeException(cause); } @@ -436,35 +448,40 @@ public long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.Reque return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, repairSyncPoint(allNodes)); } - private static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) + @VisibleForTesting + static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) { return new ReadTimeoutException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); } - private static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean global) + @VisibleForTesting + static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean global) { return new ReadPreemptedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); } - private long doWithRetries(LongSupplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException + @VisibleForTesting + static ReadExhaustedException newBarrierExhausted(TxnId txnId, boolean global) + { + //TODO (usability): not being able to show the txn is a bad UX, this becomes harder to trace back in logs + return new ReadExhaustedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, ImmutableMap.of()); + } + + @VisibleForTesting + static boolean isTimeout(Throwable t) + { + return t instanceof Timeout || t instanceof ReadTimeoutException || t instanceof Preempted || t instanceof ReadPreemptedException; + } + + @VisibleForTesting + static long doWithRetries(Blocking blocking, LongSupplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException { // Since we could end up having the barrier transaction or the transaction it listens to invalidated - RuntimeException existingFailures = null; + Throwable existingFailures = null; Long success = null; - long backoffMillis = 0; + long backoffMillis = initialBackoffMillis; for (int attempt = 0; attempt < retryAttempts; attempt++) { - try - { - Thread.sleep(backoffMillis); - } - catch (InterruptedException e) - { - if (existingFailures != null) - e.addSuppressed(existingFailures); - throw e; - } - backoffMillis = backoffMillis == 0 ? initialBackoffMillis : Math.min(backoffMillis * 2, maxBackoffMillis); try { success = action.getAsLong(); @@ -472,13 +489,34 @@ private long doWithRetries(LongSupplier action, int retryAttempts, long initialB } catch (RequestExecutionException | CoordinationFailed newFailures) { - existingFailures = Throwables.merge(existingFailures, newFailures); + existingFailures = FailureAccumulator.append(existingFailures, newFailures, AccordService::isTimeout); + + try + { + blocking.sleep(backoffMillis); + } + catch (InterruptedException e) + { + if (existingFailures != null) + e.addSuppressed(existingFailures); + throw e; + } + backoffMillis = Math.min(backoffMillis * 2, maxBackoffMillis); + } + catch (Throwable t) + { + // if an unknown/unexpected error happens retry stops right away + if (existingFailures != null) + t.addSuppressed(existingFailures); + existingFailures = t; + break; } } if (success == null) { checkState(existingFailures != null, "Didn't have success, but also didn't have failures"); - throw existingFailures; + Throwables.throwIfUnchecked(existingFailures); + throw new RuntimeException(existingFailures); } return success; } @@ -486,7 +524,7 @@ private long doWithRetries(LongSupplier action, int retryAttempts, long initialB @Override public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException { - return doWithRetries(() -> AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite), + return doWithRetries(Blocking.Default.instance, () -> AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite), DatabaseDescriptor.getAccordBarrierRetryAttempts(), DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis(), DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); @@ -495,7 +533,7 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp @Override public long repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException { - return doWithRetries(() -> AccordService.instance().repair(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite, allEndpoints), + return doWithRetries(Blocking.Default.instance, () -> AccordService.instance().repair(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite, allEndpoints), DatabaseDescriptor.getAccordBarrierRetryAttempts(), DatabaseDescriptor.getAccordBarrierRetryInitialBackoffMillis(), DatabaseDescriptor.getAccordBarrierRetryMaxBackoffMillis()); diff --git a/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java b/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java new file mode 100644 index 000000000000..4ebfc8fdb095 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.exceptions; + +import java.util.Map; + +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.exceptions.ReadFailureException; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.locator.InetAddressAndPort; + +public class ReadExhaustedException extends ReadFailureException +{ + public ReadExhaustedException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, Map failureReasonByEndpoint) + { + super(consistency, received, blockFor, dataPresent, failureReasonByEndpoint); + } + + protected ReadExhaustedException(String msg, ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, Map failureReasonByEndpoint) + { + super(msg, consistency, received, blockFor, dataPresent, failureReasonByEndpoint); + } +} diff --git a/src/java/org/apache/cassandra/streaming/StreamSession.java b/src/java/org/apache/cassandra/streaming/StreamSession.java index 050e37c749c2..447621c78c9f 100644 --- a/src/java/org/apache/cassandra/streaming/StreamSession.java +++ b/src/java/org/apache/cassandra/streaming/StreamSession.java @@ -1306,8 +1306,16 @@ public int getNumRequests() return requests.size(); } - @VisibleForTesting public int getNumTransfers() + { + return transfers.size(); + } + + //TODO (now, review): there were 2 tests that use this (nothing else) and both are checking that its > 1... but in both cases they are checking if there are transfer tasks, but there isn't any as the range doesn't have data... + // This looks like AccordBootstrapTest and LocalSyncTaskTest have a test bug, so rather than fixing this method was created to keep the old semantic... + @Deprecated(since = "5.1") + @VisibleForTesting + public int getNumKeyspaceTransfers() { return transferredRangesPerKeyspace.size(); } diff --git a/src/java/org/apache/cassandra/utils/Blocking.java b/src/java/org/apache/cassandra/utils/Blocking.java new file mode 100644 index 000000000000..e04e53b090e2 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/Blocking.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.concurrent.TimeUnit; + +public interface Blocking +{ + default void sleep(long millis) throws InterruptedException + { + sleep(millis, TimeUnit.MILLISECONDS); + } + + void sleep(long value, TimeUnit unit) throws InterruptedException; + + enum Default implements Blocking + { + instance; + + @Override + public void sleep(long value, TimeUnit unit) throws InterruptedException + { + unit.sleep(value); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 2241a8c91151..14ae7d1cde7a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -267,7 +267,7 @@ public void bootstrapTest() throws Throwable StreamListener.listener.forSession(session -> { Assert.assertEquals(node3Addr, session.peer.getAddress()); Assert.assertEquals(0, session.getNumRequests()); - Assert.assertTrue(session.getNumTransfers() > 0); + Assert.assertTrue(session.getNumKeyspaceTransfers() > 0); }); awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { diff --git a/test/unit/org/apache/cassandra/repair/LocalSyncTaskTest.java b/test/unit/org/apache/cassandra/repair/LocalSyncTaskTest.java index 95f630dc0571..c03fe2aac70e 100644 --- a/test/unit/org/apache/cassandra/repair/LocalSyncTaskTest.java +++ b/test/unit/org/apache/cassandra/repair/LocalSyncTaskTest.java @@ -171,7 +171,7 @@ private static void assertNumInOut(StreamPlan plan, int expectedIncoming, int ex StreamCoordinator coordinator = plan.getCoordinator(); StreamSession session = Iterables.getOnlyElement(coordinator.getAllStreamSessions()); assertEquals(expectedIncoming, session.getNumRequests()); - assertEquals(expectedOutgoing, session.getNumTransfers()); + assertEquals(expectedOutgoing, session.getNumKeyspaceTransfers()); } @Test diff --git a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java new file mode 100644 index 000000000000..ea88d119778d --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.LongSupplier; + +import org.junit.Test; + +import accord.coordinate.Exhausted; +import accord.coordinate.Preempted; +import accord.coordinate.Timeout; +import accord.primitives.TxnId; +import org.apache.cassandra.utils.Blocking; +import org.assertj.core.api.Condition; +import org.mockito.Mockito; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.service.accord.AccordService.doWithRetries; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class AccordServiceTest +{ + @Test + public void retryExpectedFailures() throws InterruptedException + { + Blocking blocking = Mockito.mock(Blocking.class); + class Task implements LongSupplier + { + private int attempts = 0; + + @Override + public long getAsLong() + { + switch (attempts) + { + case 0: + attempts++; + throw new Timeout(null, null); + case 1: + attempts++; + throw AccordService.newBarrierTimeout(TxnId.NONE, true); + case 2: + attempts++; + throw new Preempted(null, null); + case 3: + attempts++; + throw AccordService.newBarrierPreempted(TxnId.NONE, true); + case 4: + attempts++; + throw new Exhausted(null, null); + case 5: + attempts++; + throw AccordService.newBarrierExhausted(TxnId.NONE, true); + default: + return 42; + } + } + } + Task failing = new Task(); + assertThat(doWithRetries(blocking, failing, Integer.MAX_VALUE, 100, 1000)).isEqualTo(42); + verify(blocking).sleep(100); + verify(blocking).sleep(200); + verify(blocking).sleep(400); + verify(blocking).sleep(800); + verify(blocking, times(2)).sleep(1000); // hit max backoff, so stays at 1k + } + + @Test + public void retryThrowsTimeout() + { + Blocking blocking = Mockito.mock(Blocking.class); + qt().check(rs -> { + List timeoutFailures = new ArrayList<>(4); + timeoutFailures.add(() -> {throw new Timeout(null, null);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw new Preempted(null, null);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + Collections.shuffle(timeoutFailures, rs.asJdkRandom()); + Iterator it = timeoutFailures.iterator(); + LongSupplier failing = () -> { + if (!it.hasNext()) throw new IllegalStateException("Called too many times"); + it.next().run(); // this throws... + return 42; + }; + assertThatThrownBy(() -> doWithRetries(blocking, failing, timeoutFailures.size(), 100, 1000)).is(new Condition<>(AccordService::isTimeout, "timeout")); + assertThat(it).isExhausted(); + }); + } + + @Test + public void retryThrowsNonTimeout() + { + Blocking blocking = Mockito.mock(Blocking.class); + qt().check(rs -> { + List timeoutFailures = new ArrayList<>(5); + timeoutFailures.add(() -> {throw new Timeout(null, null);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw new Preempted(null, null);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw new Exhausted(null, null);}); + Collections.shuffle(timeoutFailures, rs.asJdkRandom()); + Iterator it = timeoutFailures.iterator(); + LongSupplier failing = () -> { + if (!it.hasNext()) throw new IllegalStateException("Called too many times"); + it.next().run(); // this throws... + return 42; + }; + assertThatThrownBy(() -> doWithRetries(blocking, failing, timeoutFailures.size(), 100, 1000)).isInstanceOf(Exhausted.class); + assertThat(it).isExhausted(); + }); + } + + @Test + public void retryShortCircuitError() + { + class Unexpected implements Runnable + { + final boolean isError; + + Unexpected(boolean isError) + { + this.isError = isError; + } + + @Override + public void run() + { + if (isError) throw new AssertionError(); + throw new NullPointerException(); + } + } + qt().check(rs -> { + List failures = new ArrayList<>(6); + failures.add(() -> {throw new Timeout(null, null);}); + failures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + failures.add(() -> {throw new Preempted(null, null);}); + failures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + failures.add(() -> {throw new Exhausted(null, null);}); + boolean isError = rs.nextBoolean(); + failures.add(new Unexpected(isError)); + Collections.shuffle(failures, rs.asJdkRandom()); + int unexpectedIndex = -1; + for (int i = 0; i < failures.size(); i++) + { + if (failures.get(i) instanceof Unexpected) + { + unexpectedIndex = i; + break; + } + } + Iterator it = failures.iterator(); + LongSupplier failing = () -> { + if (!it.hasNext()) throw new IllegalStateException("Called too many times"); + it.next().run(); // this throws... + return 42; + }; + Blocking blocking = Mockito.mock(Blocking.class); + assertThatThrownBy(() -> doWithRetries(blocking, failing, failures.size(), 100, 1000)).isInstanceOf(isError ? AssertionError.class : NullPointerException.class); + verify(blocking, times(unexpectedIndex)).sleep(Mockito.anyLong()); + }); + } +} \ No newline at end of file From ac58a62c51460b5a585eb2c33a92d3b804a8ab83 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 23 May 2024 14:20:09 +0100 Subject: [PATCH 121/340] Introduce Periodic mode to Accord Journal patch by Benedict; reviewed by Aleksey Yeschenko, Alex Petrov and David Capwell for CASSANDRA-19720 --- .../apache/cassandra/config/AccordSpec.java | 4 +- .../cassandra/config/DatabaseDescriptor.java | 3 - .../cassandra/journal/ActiveSegment.java | 37 ++- .../org/apache/cassandra/journal/Flusher.java | 312 ++++++++++++------ .../org/apache/cassandra/journal/Journal.java | 53 ++- .../cassandra/journal/SegmentWriter.java | 4 +- .../apache/cassandra/journal/Segments.java | 15 + .../cassandra/journal/SyncedOffsets.java | 36 +- .../service/accord/AccordCommandStore.java | 1 + .../service/accord/AccordJournal.java | 8 +- .../service/accord/AccordKeyspace.java | 68 ++-- .../service/accord/AccordObjectSizes.java | 2 +- .../service/accord/AccordService.java | 2 +- .../serializers/CheckStatusSerializers.java | 3 +- .../serializers/CommandSerializers.java | 32 ++ .../accord/serializers/FetchSerializers.java | 13 +- .../serializers/TopologySerializers.java | 15 +- .../serializers/WaitingOnSerializer.java | 81 +++-- .../cassandra/utils/ByteBufferUtil.java | 2 +- test/conf/logback-simulator.xml | 2 +- .../distributed/test/TestBaseImpl.java | 4 + .../test/accord/AccordLoadTest.java | 97 +++++- .../simulator/ClusterSimulation.java | 16 +- .../cassandra/simulator/SimulationRunner.java | 2 +- .../paxos/AccordClusterSimulation.java | 10 +- .../PairOfSequencesAccordSimulation.java | 6 +- .../paxos/PaxosClusterSimulation.java | 5 - .../paxos/PaxosSimulationRunner.java | 2 +- .../cassandra/journal/SyncedOffsetsTest.java | 4 +- .../serializers/WaitingOnSerializerTest.java | 2 +- 30 files changed, 621 insertions(+), 220 deletions(-) diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index b035b0b9b595..2e0d614957af 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -30,7 +30,7 @@ public class AccordSpec public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; - public volatile DurationSpec.IntSecondsBound progress_log_schedule_delay = new DurationSpec.IntSecondsBound(1); + public volatile DurationSpec.IntMillisecondsBound progress_log_schedule_delay = new DurationSpec.IntMillisecondsBound(100); /** * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up @@ -79,7 +79,7 @@ public static class JournalSpec implements Params { public int segmentSize = 32 << 20; public FailurePolicy failurePolicy = FailurePolicy.STOP; - public FlushMode flushMode = FlushMode.BATCH; + public FlushMode flushMode = FlushMode.PERIODIC; public DurationSpec.IntMillisecondsBound flushPeriod; // pulls default from 'commitlog_sync_period' public DurationSpec.IntMillisecondsBound periodicFlushLagBlock = new DurationSpec.IntMillisecondsBound("1500ms"); diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 202469fbff93..07227a40e971 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -167,9 +167,6 @@ public class DatabaseDescriptor { - public static final String NO_ACCORD_PAXOS_STRATEGY_WITH_ACCORD_DISABLED_MESSAGE = - "Cannot use lwt_strategy \"accord\" while Accord transactions are disabled."; - static { CHRONICLE_ANALYTICS_DISABLE.setBoolean(true); diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index f16126c157a5..1fd2e4dd1a29 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -24,6 +24,7 @@ import java.nio.file.StandardOpenOption; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.concurrent.locks.LockSupport; import com.codahale.metrics.Timer; @@ -50,6 +51,9 @@ final class ActiveSegment extends Segment * Everything before this offset has been written and flushed. */ private volatile int lastFlushedOffset = 0; + private volatile int lastFsyncOffset = 0; + @SuppressWarnings("rawtypes") + private static final AtomicIntegerFieldUpdater lastFsyncOffsetUpdater = AtomicIntegerFieldUpdater.newUpdater(ActiveSegment.class, "lastFsyncOffset"); /* * End position of the buffer; initially set to its capacity and @@ -86,7 +90,7 @@ private ActiveSegment( @SuppressWarnings("resource") static ActiveSegment create(Descriptor descriptor, Params params, KeySupport keySupport) { - SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor, true); + SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor); InMemoryIndex index = InMemoryIndex.create(keySupport); Metadata metadata = Metadata.create(); return new ActiveSegment<>(descriptor, params, syncedOffsets, index, metadata, keySupport); @@ -152,7 +156,7 @@ private synchronized boolean close(boolean persistComponents) boolean isEmpty = discardUnusedTail(); if (!isEmpty) { - flush(); + flush(true); if (persistComponents) persistComponents(); } release(); @@ -261,21 +265,37 @@ boolean shouldFlush() * TODO FIXME: calls from outside Flusher + callbacks * @return last synced offset */ - synchronized int flush() + synchronized int flush(boolean fsync) { int allocatePosition = this.allocatePosition.get(); if (lastFlushedOffset >= allocatePosition) return lastFlushedOffset; waitForModifications(); - flushInternal(); + if (fsync) + { + fsyncInternal(); + lastFsyncOffsetUpdater.accumulateAndGet(this, allocatePosition, Math::max); + } lastFlushedOffset = allocatePosition; int syncedOffset = Math.min(allocatePosition, endOfBuffer); - syncedOffsets.mark(syncedOffset); + syncedOffsets.mark(syncedOffset, fsync); flushComplete.signalAll(); return syncedOffset; } + // provides no ordering guarantees + void fsync() + { + int lastFlushed = lastFlushedOffset; + if (lastFsyncOffset >= lastFlushed) + return; + + fsyncInternal(); + syncedOffsets.fsync(); + lastFsyncOffsetUpdater.accumulateAndGet(this, lastFlushed, Math::max); + } + private void waitForFlush(int position) { while (lastFlushedOffset < position) @@ -297,7 +317,7 @@ private void waitForModifications() appendOrder.awaitNewBarrier(); } - private void flushInternal() + private void fsyncInternal() { try { @@ -314,6 +334,11 @@ boolean isCompletedAndFullyFlushed(int syncedOffset) return syncedOffset >= endOfBuffer; } + boolean isCompletedAndFullyFsynced() + { + return lastFsyncOffset >= endOfBuffer; + } + /** * Ensures no more of this segment is writeable, by allocating any unused section at the end * and marking it discarded void discartUnusedTail() diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index 04411f74c851..3f2f42859a38 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -17,12 +17,15 @@ */ package org.apache.cassandra.journal; -import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.LockSupport; + +import javax.annotation.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.utils.Invariants; import com.codahale.metrics.Timer; import org.apache.cassandra.concurrent.Interruptible; import org.apache.cassandra.concurrent.Interruptible.TerminateException; @@ -33,7 +36,6 @@ import org.apache.cassandra.utils.concurrent.WaitQueue; import static java.lang.String.format; -import static java.util.Comparator.comparing; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; @@ -42,7 +44,7 @@ import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; import static org.apache.cassandra.concurrent.Interruptible.State.SHUTTING_DOWN; -import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; +import static org.apache.cassandra.journal.Params.FlushMode.PERIODIC; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; import static org.apache.cassandra.utils.MonotonicClock.Global.preciseTime; @@ -61,16 +63,19 @@ final class Flusher private final AsyncCallbacks callbacks; private volatile Interruptible flushExecutor; + private volatile Interruptible fsyncExecutor; // counts of total pending write and written entries private final AtomicLong pending = new AtomicLong(0); private final AtomicLong written = new AtomicLong(0); - // all Allocations written before this time will be flushed - volatile long lastFlushedAt = currentTimeMillis(); + // the time of the last initiated flush + volatile long flushStartedAt = nanoTime(); + // the time of the earliest flush that has completed an fsync; all Allocations written before this time are durable + volatile long fsyncFinishedFor = flushStartedAt; // a signal that writers can wait on to be notified of a completed flush in PERIODIC FlushMode - private final WaitQueue flushComplete = newWaitQueue(); + private final WaitQueue fsyncComplete = newWaitQueue(); // TODO (expected): this is only used for testing, can we remove this? // a signal and flag that callers outside the flusher thread can use // to signal they want the journal segments to be flushed to disk @@ -98,20 +103,144 @@ void start() void shutdown() { flushExecutor.shutdown(); + if (fsyncExecutor != null) + fsyncExecutor.shutdown(); } @Simulate(with={MONITORS,GLOBAL_CLOCK,LOCK_SUPPORT}) private class FlushRunnable implements Interruptible.Task { - private final MonotonicClock clock; + @Simulate(with={MONITORS,GLOBAL_CLOCK,LOCK_SUPPORT}) + private class FSyncRunnable implements Interruptible.Task + { + // this is written only by the Flusher thread, and read only by the Fsync thread + ActiveSegment fsyncUpTo; + ActiveSegment fsyncing; + + private volatile Thread awaitingWork; + + // all Allocations written before this time will be written to at least the OS page cache; + volatile long fsyncWaitingSince = 0; + // the time of the earliest flush that has begun participating in an fsync + volatile long fsyncStartedFor = 0; + + @Override + public void run(Interruptible.State state) throws InterruptedException + { + try + { + doRun(state); + } + catch (Throwable t) + { + if (!journal.handleError("Failed to flush segments to disk", t)) + throw new TerminateException(); + } + } + + private void awaitWork() throws InterruptedException + { + long lastStartedAt = fsyncStartedFor; + if (fsyncWaitingSince != lastStartedAt) + return; + + awaitingWork = Thread.currentThread(); + do + { + if (Thread.interrupted()) + { + awaitingWork = null; + throw new InterruptedException(); + } + + LockSupport.park(); + } + while (fsyncWaitingSince == lastStartedAt); + + awaitingWork = null; + } + + void notify(Thread notify) + { + if (notify != null) + LockSupport.unpark(notify); + } + + public void doRun(Interruptible.State state) throws InterruptedException + { + awaitWork(); + if (fsyncing == null) + fsyncing = journal.oldestActiveSegment(); + + // invert order of access; we might see a future fsyncTo, but at worst this means redundantly invoking fsync before updating fsyncStartedFor + long startedAt = fsyncWaitingSince; + ActiveSegment fsyncTo = this.fsyncUpTo; + fsyncStartedFor = startedAt; + // synchronized to prevent thread interrupts while performing IO operations and also + // clear interrupted status to prevent ClosedByInterruptException in ActiveSegment::flush + synchronized (this) + { + boolean ignore = Thread.interrupted(); + while (fsyncing != fsyncTo) + { + fsyncing.fsync(); + journal.closeActiveSegmentAndOpenAsStatic(fsyncing); + fsyncing = journal.getActiveSegment(fsyncing.descriptor.timestamp + 1); + } + fsyncing.fsync(); + } + fsyncFinishedFor = startedAt; + fsyncComplete.signalAll(); + long finishedAt = clock.now(); + processDuration(startedAt, finishedAt); + } + + void afterFlush(long startedAt, ActiveSegment segment, int syncedOffset) + { + long requireFsyncTo = startedAt - periodicFlushLagBlockNanos(); + + fsyncUpTo = segment; + fsyncWaitingSince = startedAt; + + notify(awaitingWork); + + if (requireFsyncTo > fsyncFinishedFor) + awaitFsyncAt(requireFsyncTo, journal.metrics.waitingOnFlush.time()); + callbacks.onFlush(segment.descriptor.timestamp, syncedOffset); + } + + private void doNoOpFlush(long startedAt) + { + if (fsyncFinishedFor >= fsyncWaitingSince) + { + fsyncFinishedFor = startedAt; + } + else + { + // if the flusher is still running, update the waitingSince register + fsyncWaitingSince = startedAt; + notify(awaitingWork); + } + } + } + private final NoSpamLogger noSpamLogger; + private final MonotonicClock clock; + private final @Nullable FSyncRunnable fSyncRunnable; + + private ActiveSegment current = null; - private final ArrayList> segmentsToFlush = new ArrayList<>(); + private long firstLaggedAt = Long.MIN_VALUE; // first lag ever or since last logged warning + private int fsyncCount = 0; // flush count since firstLaggedAt + private int lagCount = 0; // lag count since firstLaggedAt + private long duration = 0; // time spent flushing since firstLaggedAt + private long lagDuration = 0; // cumulative lag since firstLaggedAt FlushRunnable(MonotonicClock clock) { - this.clock = clock; this.noSpamLogger = NoSpamLogger.wrap(logger, 5, MINUTES); + this.clock = clock; + this.fSyncRunnable = params.flushMode() == PERIODIC ? newFsyncRunnable() : null; } @Override @@ -132,8 +261,9 @@ public void run(Interruptible.State state) throws InterruptedException public void doRun(Interruptible.State state) throws InterruptedException { - long startedRunAt = clock.now(); - boolean flushToDisk = lastFlushedAt + flushPeriodNanos() <= startedRunAt || state != NORMAL || flushRequested; + long startedAt = clock.now(); + long flushPeriodNanos = flushPeriodNanos(); + boolean flushToDisk = flushStartedAt + flushPeriodNanos <= startedAt || state != NORMAL || flushRequested; // synchronized to prevent thread interrupts while performing IO operations and also // clear interrupted status to prevent ClosedByInterruptException in ActiveSegment::flush @@ -143,83 +273,71 @@ public void doRun(Interruptible.State state) throws InterruptedException if (flushToDisk) { flushRequested = false; - doFlush(); - lastFlushedAt = startedRunAt; - flushComplete.signalAll(); + flushStartedAt = startedAt; + doFlush(startedAt); } } - long now = clock.now(); - if (flushToDisk) - processFlushDuration(startedRunAt, now); - if (state == SHUTTING_DOWN) return; - long flushPeriodNanos = flushPeriodNanos(); if (flushPeriodNanos <= 0) { haveWork.acquire(1); } else { - long wakeUpAt = startedRunAt + flushPeriodNanos; - if (wakeUpAt > now) - haveWork.tryAcquireUntil(1, wakeUpAt); + long wakeUpAt = startedAt + flushPeriodNanos; + haveWork.tryAcquireUntil(1, wakeUpAt); } } - private void doFlush() + private void doFlush(long startedAt) throws InterruptedException { - journal.selectSegmentToFlush(segmentsToFlush); - segmentsToFlush.sort(comparing(s -> s.descriptor)); + boolean synchronousFsync = fSyncRunnable == null; - try - { - long syncedSegment = -1; - int syncedOffset = -1; + if (current == null) + current = journal.oldestActiveSegment(); + ActiveSegment newCurrent = journal.currentActiveSegment(); - for (ActiveSegment segment : segmentsToFlush) - { - if (!segment.shouldFlush()) - break; + if (newCurrent == current && (newCurrent == null || !newCurrent.shouldFlush())) + { + if (synchronousFsync) fsyncFinishedFor = startedAt; + else fSyncRunnable.doNoOpFlush(startedAt); + return; + } - syncedSegment = segment.descriptor.timestamp; - syncedOffset = segment.flush(); + Invariants.checkState(newCurrent != null); - // if an older segment isn't fully complete + flushed yet, don't attempt to flush any younger ones - if (!segment.isCompletedAndFullyFlushed(syncedOffset)) - break; + try + { + while (current != newCurrent) + { + current.discardUnusedTail(); + current.flush(synchronousFsync); + if (synchronousFsync) + journal.closeActiveSegmentAndOpenAsStatic(current); + current = journal.getActiveSegment(current.descriptor.timestamp + 1); } + int syncedOffset = current.flush(synchronousFsync); - // invoke the onFlush() callback once, covering entire flushed range across all flushed segments - if (syncedSegment != -1 && syncedOffset != -1) - callbacks.onFlush(syncedSegment, syncedOffset); + if (synchronousFsync) afterFSync(startedAt, current.descriptor.timestamp, syncedOffset); + else fSyncRunnable.afterFlush(startedAt, current, syncedOffset); } catch (Throwable t) { callbacks.onFlushFailed(t); throw t; } - finally - { - segmentsToFlush.clear(); - } } - private long firstLaggedAt = Long.MIN_VALUE; // first lag ever or since last logged warning - private int flushCount = 0; // flush count since firstLaggedAt - private int lagCount = 0; // lag count since firstLaggedAt - private long flushDuration = 0; // time spent flushing since firstLaggedAt - private long lagDuration = 0; // cumulative lag since firstLaggedAt - - private void processFlushDuration(long startedFlushAt, long finishedFlushAt) + private void processDuration(long startedFlushAt, long finishedFsyncAt) { - flushCount++; - flushDuration += (finishedFlushAt - startedFlushAt); + fsyncCount++; + duration += (finishedFsyncAt - startedFlushAt); long flushPeriodNanos = flushPeriodNanos(); - long lag = finishedFlushAt - (startedFlushAt + flushPeriodNanos); + long lag = finishedFsyncAt - (startedFlushAt + flushPeriodNanos); if (flushPeriodNanos <= 0 || lag <= 0) return; @@ -227,26 +345,42 @@ private void processFlushDuration(long startedFlushAt, long finishedFlushAt) lagDuration += lag; if (firstLaggedAt == Long.MIN_VALUE) - firstLaggedAt = finishedFlushAt; + firstLaggedAt = finishedFsyncAt; boolean logged = - noSpamLogger.warn(finishedFlushAt, - "Out of {} {} journal flushes over the past {}s with average duration of {}ms, " + - "{} have exceeded the configured flush period by an average of {}ms", - flushCount, - journal.name, - format("%.2f", (finishedFlushAt - firstLaggedAt) * 1e-9d), - format("%.2f", flushDuration * 1e-6d / flushCount), - lagCount, - format("%.2f", lagDuration * 1e-6d / lagCount)); + noSpamLogger.warn(finishedFsyncAt, + "Out of {} {} journal flushes over the past {}s with average duration of {}ms, " + + "{} have exceeded the configured flush period by an average of {}ms", + fsyncCount, + journal.name, + format("%.2f", (finishedFsyncAt - firstLaggedAt) * 1e-9d), + format("%.2f", duration * 1e-6d / fsyncCount), + lagCount, + format("%.2f", lagDuration * 1e-6d / lagCount)); if (logged) // reset metrics for next log statement { firstLaggedAt = Long.MIN_VALUE; - flushCount = lagCount = 0; - flushDuration = lagDuration = 0; + fsyncCount = lagCount = 0; + duration = lagDuration = 0; } } + + private void afterFSync(long startedAt, long syncedSegment, int syncedOffset) + { + fsyncFinishedFor = startedAt; + callbacks.onFlush(syncedSegment, syncedOffset); + fsyncComplete.signalAll(); + long finishedAt = clock.now(); + processDuration(startedAt, finishedAt); + } + + private FSyncRunnable newFsyncRunnable() + { + final FSyncRunnable fSyncRunnable = new FSyncRunnable(); + fsyncExecutor = executorFactory().infiniteLoop(journal.name + "-fsync", fSyncRunnable, SAFE, NON_DAEMON, SYNCHRONIZED); + return fSyncRunnable; + } } @FunctionalInterface @@ -296,48 +430,40 @@ private void waitForFlushBatch(ActiveSegment.Allocation alloc) written.incrementAndGet(); } - private void asyncFlushBatch(ActiveSegment.Allocation alloc) + private void waitForFlushGroup(ActiveSegment.Allocation alloc) { pending.incrementAndGet(); - requestExtraFlush(); - // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO (expected): collect async flush metrics + alloc.awaitFlush(journal.metrics.waitingOnFlush); pending.decrementAndGet(); written.incrementAndGet(); } - private void waitForFlushGroup(ActiveSegment.Allocation alloc) + private void waitForFlushPeriodic(ActiveSegment.Allocation ignore) { - pending.incrementAndGet(); - alloc.awaitFlush(journal.metrics.waitingOnFlush); - pending.decrementAndGet(); + long expectedFlushTime = nanoTime() - periodicFlushLagBlockNanos(); + if (fsyncFinishedFor < expectedFlushTime) + { + pending.incrementAndGet(); + awaitFsyncAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); + pending.decrementAndGet(); + } written.incrementAndGet(); } - private void asyncFlushGroup(ActiveSegment.Allocation alloc) + private void asyncFlushBatch(ActiveSegment.Allocation alloc) { - pending.incrementAndGet(); - // alloc.awaitFlush(journal.metrics.waitingOnFlush); // TODO (expected): collect async flush metrics - pending.decrementAndGet(); + requestExtraFlush(); written.incrementAndGet(); } - private void waitForFlushPeriodic(ActiveSegment.Allocation alloc) + private void asyncFlushGroup(ActiveSegment.Allocation alloc) { - long expectedFlushTime = nanoTime() - periodicFlushLagBlockNanos(); - if (lastFlushedAt < expectedFlushTime) - { - pending.incrementAndGet(); - awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); - pending.decrementAndGet(); - } written.incrementAndGet(); } private void asyncFlushPeriodic(ActiveSegment.Allocation ignore) { - pending.incrementAndGet(); - // awaitFlushAt(expectedFlushTime, journal.metrics.waitingOnFlush.time()); // TODO (expected): collect async flush metrics - pending.decrementAndGet(); + requestExtraFlush(); written.incrementAndGet(); } @@ -351,17 +477,17 @@ void requestExtraFlush() haveWork.release(1); } - private void awaitFlushAt(long flushTime, Timer.Context context) + private void awaitFsyncAt(long flushTime, Timer.Context context) { do { - WaitQueue.Signal signal = flushComplete.register(context, Timer.Context::stop); - if (lastFlushedAt < flushTime) + WaitQueue.Signal signal = fsyncComplete.register(context, Timer.Context::stop); + if (fsyncFinishedFor < flushTime) signal.awaitUninterruptibly(); else signal.cancel(); } - while (lastFlushedAt < flushTime); + while (fsyncFinishedFor < flushTime); } private long flushPeriodNanos() diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index eae190a15e0f..aa61e5aca5d9 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -35,6 +35,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.utils.Invariants; import com.codahale.metrics.Timer.Context; import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.concurrent.Interruptible; @@ -456,9 +457,6 @@ private void advanceSegment(ActiveSegment oldSegment) // signal the allocator thread to prepare a new segment wakeAllocator(); - if (null != oldSegment) - closeActiveSegmentAndOpenAsStatic(oldSegment); - // request that the journal be flushed out-of-band, as we've finished a segment flusher.requestExtraFlush(); } @@ -659,6 +657,53 @@ void selectSegmentToFlush(Collection> into) segments().selectActive(currentSegment.descriptor.timestamp, into); } + ActiveSegment oldestActiveSegment() + { + ActiveSegment current = currentSegment; + if (current == null) + return null; + + ActiveSegment oldest = segments().oldestActive(); + if (oldest == null || oldest.descriptor.timestamp > current.descriptor.timestamp) + return current; + + return oldest; + } + + ActiveSegment currentActiveSegment() + { + return currentSegment; + } + + ActiveSegment getActiveSegment(long timestamp) + { + // we can race with segment addition to the segments() collection, with a new segment appearing in currentSegment first + // since we are most likely to be requesting the currentSegment anyway, we resolve this case by checking currentSegment first + // and resort to the segments() collection only if we do not match + ActiveSegment currentSegment = this.currentSegment; + if (currentSegment == null) + throw new IllegalArgumentException("Requested an active segment with timestamp " + timestamp + " but there is no currently active segment"); + long currentSegmentTimestamp = currentSegment.descriptor.timestamp; + if (timestamp == currentSegmentTimestamp) + { + return currentSegment; + } + else if (timestamp > currentSegmentTimestamp) + { + throw new IllegalArgumentException("Requested a newer timestamp " + timestamp + " than the current active segment " + currentSegmentTimestamp); + } + else + { + Segment segment = segments().get(timestamp); + Invariants.checkState(segment != null, "Segment %d expected to be found, but neither current segment %d nor in active segments", timestamp, currentSegmentTimestamp); + if (segment == null) + throw new IllegalArgumentException("Request the active segment " + timestamp + " but this segment does not exist"); + if (!segment.isActive()) + throw new IllegalArgumentException("Request the active segment " + timestamp + " but this segment is not active"); + return segment.asActive(); + } + } + /** * Take care of a finished active segment: * 1. discard tail @@ -681,7 +726,7 @@ private class CloseActiveSegmentRunnable implements Runnable public void run() { activeSegment.discardUnusedTail(); - activeSegment.flush(); + activeSegment.flush(true); activeSegment.persistComponents(); replaceCompletedSegment(activeSegment, StaticSegment.open(activeSegment.descriptor, keySupport)); activeSegment.release(); diff --git a/src/java/org/apache/cassandra/journal/SegmentWriter.java b/src/java/org/apache/cassandra/journal/SegmentWriter.java index 852e955b21e3..b8436aed6688 100644 --- a/src/java/org/apache/cassandra/journal/SegmentWriter.java +++ b/src/java/org/apache/cassandra/journal/SegmentWriter.java @@ -101,9 +101,9 @@ public void close() throw new JournalWriteError(descriptor, file, e); } - try (SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor, true)) + try (SyncedOffsets syncedOffsets = SyncedOffsets.active(descriptor)) { - syncedOffsets.mark(position()); + syncedOffsets.mark(position(), true); } index.persist(descriptor); diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index 0693997ef34d..ca5ca47b2b5c 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -98,6 +98,21 @@ void selectActive(long maxTimestamp, Collection> into) into.add(segment.asActive()); } + ActiveSegment oldestActive() + { + Segment oldest = null; + for (Segment segment : segments.values()) + if (segment.isActive() && (oldest == null || segment.descriptor.timestamp <= oldest.descriptor.timestamp)) + oldest = segment; + + return oldest == null ? null : oldest.asActive(); + } + + Segment get(long timestamp) + { + return segments.get(timestamp); + } + void selectStatic(Collection> into) { for (Segment segment : segments.values()) diff --git a/src/java/org/apache/cassandra/journal/SyncedOffsets.java b/src/java/org/apache/cassandra/journal/SyncedOffsets.java index bee302d6d867..cd05e6f8ac6c 100644 --- a/src/java/org/apache/cassandra/journal/SyncedOffsets.java +++ b/src/java/org/apache/cassandra/journal/SyncedOffsets.java @@ -50,7 +50,9 @@ interface SyncedOffsets extends Closeable * * @param offset the offset into datafile, up to which contents have been fsynced (exclusive) */ - void mark(int offset); + void mark(int offset, boolean fsync); + + void fsync(); @Override default void close() @@ -60,9 +62,9 @@ default void close() /** * @return a disk-backed synced offset tracker for a new {@link ActiveSegment} */ - static Active active(Descriptor descriptor, boolean syncOnMark) + static Active active(Descriptor descriptor) { - return new Active(descriptor, syncOnMark); + return new Active(descriptor); } /** @@ -87,15 +89,13 @@ static Absent absent() final class Active implements SyncedOffsets { private final Descriptor descriptor; - private final boolean syncOnMark; private final FileOutputStreamPlus output; private volatile int syncedOffset; - private Active(Descriptor descriptor, boolean syncOnMark) + private Active(Descriptor descriptor) { this.descriptor = descriptor; - this.syncOnMark = syncOnMark; File file = descriptor.fileFor(Component.SYNCED_OFFSETS); if (file.exists()) @@ -123,7 +123,7 @@ public int syncedOffset() } @Override - public void mark(int offset) + public void mark(int offset, boolean fsync) { if (offset < syncedOffset) throw new IllegalArgumentException("offset " + offset + " is smaller than previous mark " + offset); @@ -142,10 +142,10 @@ public void mark(int offset) } syncedOffset = offset; - if (syncOnMark) sync(); + if (fsync) fsync(); } - private void sync() + public void fsync() { try { @@ -160,7 +160,7 @@ private void sync() @Override public void close() { - if (!syncOnMark) sync(); + fsync(); try { @@ -218,7 +218,13 @@ public int syncedOffset() } @Override - public void mark(int offset) + public void mark(int offset, boolean fsync) + { + throw new UnsupportedOperationException(); + } + + @Override + public void fsync() { throw new UnsupportedOperationException(); } @@ -235,7 +241,13 @@ public int syncedOffset() } @Override - public void mark(int offset) + public void mark(int offset, boolean fsync) + { + throw new UnsupportedOperationException(); + } + + @Override + public void fsync() { throw new UnsupportedOperationException(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index c846038fd845..0f33f04d9273 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -342,6 +342,7 @@ Command loadCommand(TxnId txnId) Runnable saveCommand(Command before, Command after) { Mutation mutation = AccordKeyspace.getCommandMutation(id, before, after, nextSystemTimestampMicros()); + // TODO (required): make sure we test recovering when this has failed to be persisted return null != mutation ? mutation::applyUnsafe : null; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 80cfdf31eac5..f9daf2354d68 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -297,6 +297,10 @@ private M readMessage(TxnId txnId, MessageType messageType, // TODO (alexp): tests for objects that go through AccordJournal private class JournalCallbacks implements AsyncCallbacks { + private JournalCallbacks() + { + } + /** * Queue up the record for either frame aggregation (if a protocol message) or frame application (if a frame). */ @@ -352,7 +356,7 @@ private void onLocalRequestWriteFailed(LocalRequestContext context, Throwable ca private void onFrameWriteFailed(FrameRecord frame, FrameContext context, Throwable cause) { - // TODO: panic + // TODO (required): panic } @Override @@ -364,7 +368,7 @@ public void onFlush(long segment, int position) @Override public void onFlushFailed(Throwable cause) { - // TODO: panic + // TODO (required): panic } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 1a476e1d8e0c..ac773b475bc4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -97,9 +97,8 @@ import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.SinglePartitionReadCommand; -import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.filter.ClusteringIndexFilter; -import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.filter.RowFilter; @@ -162,6 +161,7 @@ import org.apache.cassandra.utils.Clock.Global; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.btree.BTree; +import org.apache.cassandra.utils.btree.BTreeSet; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import static accord.utils.Invariants.checkArgument; @@ -202,7 +202,7 @@ public class AccordKeyspace private static final LocalPartitioner FOR_KEYS_LOCAL_PARTITIONER = new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE)); - private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexSliceFilter(Slices.ALL, false); + private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexNamesFilter(BTreeSet.of(new ClusteringComparator(), Clustering.EMPTY), false); //TODO (now, performance): should this be partitioner rather than TableId? As of this patch distributed tables should only have 1 partitioner... private static final ConcurrentMap TABLE_SERIALIZERS = new ConcurrentHashMap<>(); @@ -572,6 +572,8 @@ private static TableMetadata commandsForKeysTable(String tableName) + "data blob, " + "PRIMARY KEY((store_id, key_token, key))" + ')') + // TODO (expected): make this uncompressed, as not very compressable (except perhaps the primary key, but could switch to operating on tokens directly) +// + " WITH compression = {'enabled':'false'};") .partitioner(FOR_KEYS_LOCAL_PARTITIONER) .build(); } @@ -993,12 +995,12 @@ public static SaveStatus deserializeSaveStatusOrNull(Cell cell) public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId txnId) { - String cql = "SELECT * FROM %s.%s " + + String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + COMMANDS + ' ' + "WHERE store_id = ? " + "AND domain = ? " + "AND txn_id=(?, ?, ?)"; - return executeInternal(format(cql, ACCORD_KEYSPACE_NAME, COMMANDS), + return executeInternal(cql, commandStore.id(), txnId.domain().ordinal(), txnId.msb, txnId.lsb, txnId.node.id); @@ -1405,12 +1407,12 @@ public static Mutation getTimestampsForKeyMutation(AccordCommandStore commandSto public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore, PartitionKey key) { - String cql = "SELECT * FROM %s.%s " + + String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + TIMESTAMPS_FOR_KEY + ' ' + "WHERE store_id = ? " + "AND key_token = ? " + "AND key=(?, ?)"; - return executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TIMESTAMPS_FOR_KEY), + return executeInternal(cql, commandStore.id(), serializeToken(key.token()), key.table().asUUID(), key.partitionKey().getKey()); @@ -1625,9 +1627,9 @@ public int hashCode() private static EpochDiskState saveEpochDiskState(EpochDiskState diskState) { - String cql = "INSERT INTO %s.%s (key, min_epoch, max_epoch) VALUES (0, ?, ?);"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, EPOCH_METADATA), - diskState.minEpoch, diskState.maxEpoch); + String cql = "INSERT INTO " + ACCORD_KEYSPACE_NAME + '.' + EPOCH_METADATA + ' ' + + "(key, min_epoch, max_epoch) VALUES (0, ?, ?);"; + executeInternal(cql, diskState.minEpoch, diskState.maxEpoch); return diskState; } @@ -1635,7 +1637,8 @@ private static EpochDiskState saveEpochDiskState(EpochDiskState diskState) @VisibleForTesting public static EpochDiskState loadEpochDiskState() { - String cql = "SELECT * FROM %s.%s WHERE key=0"; + String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + EPOCH_METADATA + ' ' + + "WHERE key=0"; UntypedResultSet result = executeInternal(format(cql, ACCORD_KEYSPACE_NAME, EPOCH_METADATA)); if (result.isEmpty()) return null; @@ -1669,8 +1672,9 @@ public static EpochDiskState saveTopology(Topology topology, EpochDiskState disk try { - String cql = "UPDATE %s.%s SET topology=? WHERE epoch=?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET topology=? WHERE epoch=?"; + executeInternal(cql, serialize(topology, LocalVersionedSerializers.topology), topology.epoch()); flush(Topologies); } @@ -1685,8 +1689,9 @@ public static EpochDiskState saveTopology(Topology topology, EpochDiskState disk public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET remote_sync_complete = remote_sync_complete + ? WHERE epoch = ?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET remote_sync_complete = remote_sync_complete + ? WHERE epoch = ?"; + executeInternal(cql, Collections.singleton(node.id), epoch); flush(Topologies); return diskState; @@ -1695,8 +1700,9 @@ public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, Ep public static EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET closed = closed + ? WHERE epoch = ?"; - executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET closed = closed + ? WHERE epoch = ?"; + executeInternal(cql, KeySerializers.rangesToBlobMap(ranges), epoch); flush(Topologies); return diskState; @@ -1705,8 +1711,9 @@ public static EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskStat public static EpochDiskState markRedundant(Ranges ranges, long epoch, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET redundant = redundant + ? WHERE epoch = ?"; - executeInternal(String.format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET redundant = redundant + ? WHERE epoch = ?"; + executeInternal(cql, KeySerializers.rangesToBlobMap(ranges), epoch); flush(Topologies); return diskState; @@ -1715,8 +1722,9 @@ public static EpochDiskState markRedundant(Ranges ranges, long epoch, EpochDiskS public static EpochDiskState setNotifyingLocalSync(long epoch, Set pending, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET sync_state = ?, pending_sync_notify = ? WHERE epoch = ?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET sync_state = ?, pending_sync_notify = ? WHERE epoch = ?"; + executeInternal(cql, SyncStatus.NOTIFYING.ordinal(), pending.stream().map(i -> i.id).collect(Collectors.toSet()), epoch); @@ -1726,8 +1734,9 @@ public static EpochDiskState setNotifyingLocalSync(long epoch, Set pend public static EpochDiskState markLocalSyncAck(Node.Id node, long epoch, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET pending_sync_notify = pending_sync_notify - ? WHERE epoch = ?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET pending_sync_notify = pending_sync_notify - ? WHERE epoch = ?"; + executeInternal(cql, Collections.singleton(node.id), epoch); return diskState; } @@ -1735,8 +1744,9 @@ public static EpochDiskState markLocalSyncAck(Node.Id node, long epoch, EpochDis public static EpochDiskState setCompletedLocalSync(long epoch, EpochDiskState diskState) { diskState = maybeUpdateMaxEpoch(diskState, epoch); - String cql = "UPDATE %s.%s SET sync_state = ?, pending_sync_notify = {} WHERE epoch = ?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), + String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "SET sync_state = ?, pending_sync_notify = {} WHERE epoch = ?"; + executeInternal(cql, SyncStatus.COMPLETED.ordinal(), epoch); return diskState; @@ -1749,8 +1759,9 @@ public static EpochDiskState truncateTopologyUntil(final long epoch, EpochDiskSt long delete = diskState.minEpoch; diskState = diskState.withNewMinEpoch(delete + 1); saveEpochDiskState(diskState); - String cql = "DELETE FROM %s.%s WHERE epoch = ?"; - executeInternal(format(cql, ACCORD_KEYSPACE_NAME, TOPOLOGIES), delete); + String cql = "DELETE FROM " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "WHERE epoch = ?"; + executeInternal(cql, delete); } return diskState; } @@ -1763,7 +1774,8 @@ public interface TopologyLoadConsumer @VisibleForTesting public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws IOException { - String cql = format("SELECT * FROM %s.%s WHERE epoch=?", ACCORD_KEYSPACE_NAME, TOPOLOGIES); + String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + + "WHERE epoch=?"; UntypedResultSet result = executeInternal(cql, epoch); checkState(!result.isEmpty(), "Nothing found for epoch %d", epoch); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 7346a6eebf31..160813d722a3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -347,7 +347,7 @@ public static long command(Command command) return size; Command.Committed committed = command.asCommitted(); - size += WaitingOnSerializer.serializedSize(committed.waitingOn); + size += WaitingOnSerializer.serializedSize(command.txnId(), committed.waitingOn); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 36a88c4d08ab..d4a7e67fc479 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -301,7 +301,7 @@ public static IAccordService instance() public static long uniqueNow() { - // TODO (correctness, now): This is not unique it's just currentTimeMillis as microseconds + // TODO (now, correctness): This is not unique it's just currentTimeMillis as microseconds return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 070fcfa0e6f8..e506bbf85cd1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -223,8 +223,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); Result result = null; - if (maxKnowledgeStatus == SaveStatus.PreApplied || maxKnowledgeStatus == SaveStatus.Applied - || maxKnowledgeStatus == SaveStatus.TruncatedApply || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithOutcome || maxKnowledgeStatus == SaveStatus.TruncatedApplyWithDeps) + if (maxKnowledgeStatus.known.outcome.isOrWasApply()) result = CommandSerializers.APPLIED; return createOk(map, maxKnowledgeStatus, maxStatus, maxPromised, maxAcceptedOrCommitted, acceptedOrCommitted, executeAt, diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index fbc3aeb22f55..fe16d3033efc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.nio.ByteBuffer; import com.google.common.base.Preconditions; @@ -99,6 +100,13 @@ public void serialize(T ts, DataOutputPlus out, int version) throws IOException TopologySerializers.nodeId.serialize(ts.node, out, version); } + public void serialize(T ts, DataOutputPlus out) throws IOException + { + out.writeLong(ts.msb); + out.writeLong(ts.lsb); + TopologySerializers.NodeIdSerializer.serialize(ts.node, out); + } + public int serialize(T ts, V dst, ValueAccessor accessor, int offset) { int position = offset; @@ -110,6 +118,13 @@ public int serialize(T ts, V dst, ValueAccessor accessor, int offset) return size; } + public void serialize(T ts, ByteBuffer out) + { + out.putLong(ts.msb); + out.putLong(ts.lsb); + TopologySerializers.nodeId.serialize(ts.node, out); + } + @Override public T deserialize(DataInputPlus in, int version) throws IOException { @@ -118,6 +133,13 @@ public T deserialize(DataInputPlus in, int version) throws IOException TopologySerializers.nodeId.deserialize(in, version)); } + public T deserialize(DataInputPlus in) throws IOException + { + return factory.create(in.readLong(), + in.readLong(), + TopologySerializers.NodeIdSerializer.deserialize(in)); + } + public T deserialize(V src, ValueAccessor accessor, int offset) { long msb = accessor.getLong(src, offset); @@ -128,6 +150,16 @@ public T deserialize(V src, ValueAccessor accessor, int offset) return factory.create(msb, lsb, node); } + public T deserialize(ByteBuffer buffer, int position) + { + long msb = buffer.getLong(position); + position += TypeSizes.LONG_SIZE; + long lsb = buffer.getLong(position); + position += TypeSizes.LONG_SIZE; + Node.Id node = TopologySerializers.nodeId.deserialize(buffer, position); + return factory.create(msb, lsb, node); + } + @Override public long serializedSize(T ts, int version) { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 370f88b73d8f..4512776154fb 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -178,17 +178,8 @@ public Propagate deserialize(DataInputPlus in, int version) throws IOException Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); Result result = null; - switch (maxSaveStatus) - { - case PreApplied: - case Applying: - case Applied: - case TruncatedApply: - case TruncatedApplyWithOutcome: - case TruncatedApplyWithDeps: - result = CommandSerializers.APPLIED; - break; - } + if (achieved.outcome.isOrWasApply()) + result = CommandSerializers.APPLIED; return Propagate.SerializerSupport.create(txnId, route, diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java index c5c2f9a38216..4693c03c5cc2 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Set; @@ -46,7 +47,7 @@ public static class NodeIdSerializer implements IVersionedSerializer, M { private NodeIdSerializer() {} - private static void serialize(Node.Id id, DataOutputPlus out) throws IOException + public static void serialize(Node.Id id, DataOutputPlus out) throws IOException { out.writeInt(id.id); } @@ -68,7 +69,12 @@ public int serialize(Node.Id id, V dst, ValueAccessor accessor, int offse return accessor.putInt(dst, offset, id.id); } - private static Node.Id deserialize(DataInputPlus in) throws IOException + public void serialize(Node.Id id, ByteBuffer out) + { + out.putInt(id.id); + } + + public static Node.Id deserialize(DataInputPlus in) throws IOException { return new Node.Id(in.readInt()); } @@ -90,6 +96,11 @@ public Node.Id deserialize(V src, ValueAccessor accessor, int offset) return new Node.Id(accessor.getInt(src, offset)); } + public Node.Id deserialize(ByteBuffer src, int position) + { + return new Node.Id(src.getInt(position)); + } + public int serializedSize() { return TypeSizes.INT_SIZE; // id.id diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 3efb9e2c6c47..6c22d2844039 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -21,9 +21,11 @@ import java.io.IOException; import java.nio.ByteBuffer; +import accord.local.Command; import accord.local.Command.WaitingOn; import accord.primitives.Keys; import accord.primitives.Routable; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.ImmutableBitSet; import accord.utils.Invariants; @@ -32,14 +34,18 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.utils.vint.VIntCoding; public class WaitingOnSerializer { public static void serialize(TxnId txnId, WaitingOn waitingOn, DataOutputPlus out) throws IOException { - out.writeUnsignedVInt32(waitingOn.keys.size()); - out.writeUnsignedVInt32(waitingOn.txnIds.size()); + if (txnId.kind().awaitsOnlyDeps()) + { + Timestamp executeAtLeast = waitingOn.executeAtLeast(); + out.writeBoolean(executeAtLeast != null); + if (executeAtLeast != null) + CommandSerializers.timestamp.serialize(executeAtLeast, out); + } int keyCount = waitingOn.keys.size(); int txnIdCount = waitingOn.txnIds.size(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; @@ -53,8 +59,12 @@ public static void serialize(TxnId txnId, WaitingOn waitingOn, DataOutputPlus ou public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, DataInputPlus in) throws IOException { - int a = in.readUnsignedVInt32(); - int b = in.readUnsignedVInt32(); + Timestamp executeAtLeast = null; + if (txnId.kind().awaitsOnlyDeps()) + { + if (in.readBoolean()) + executeAtLeast = CommandSerializers.timestamp.deserialize(in); + } int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; ImmutableBitSet waitingOn = deserialize(waitingOnLength, in); ImmutableBitSet appliedOrInvalidated = null; @@ -63,17 +73,26 @@ public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList 0) serialize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated, out); @@ -133,12 +166,18 @@ private static void serialize(int length, SimpleBitSet write, ByteBuffer out) public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, ByteBuffer in) throws IOException { - int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; int position = in.position(); - int a = VIntCoding.readUnsignedVInt32(in, position); - position += TypeSizes.sizeofUnsignedVInt(a); - int b = VIntCoding.readUnsignedVInt32(in, position); - position += TypeSizes.sizeofUnsignedVInt(a); + Timestamp executeAtLeast = null; + if (txnId.kind().awaitsOnlyDeps()) + { + if (in.get(position++) != 0) + { + executeAtLeast = CommandSerializers.timestamp.deserialize(in, position); + position += CommandSerializers.timestamp.serializedSize(); + } + } + + int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; ImmutableBitSet waitingOn = deserialize(position, waitingOnLength, in); ImmutableBitSet appliedOrInvalidated = null; if (txnId.domain() == Routable.Domain.Range) @@ -147,7 +186,11 @@ public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList ByteBuffer serialized(IVersionedSerializer serializer, T va public static void writeLeastSignificantBytes(long register, int bytes, ByteBuffer out) { - writeMostSignificantBytesSlow(register << ((8 - bytes)*8), bytes, out); + writeMostSignificantBytes(register << ((8 - bytes)*8), bytes, out); } public static void writeMostSignificantBytes(long register, int bytes, ByteBuffer out) diff --git a/test/conf/logback-simulator.xml b/test/conf/logback-simulator.xml index a4c24aab8dae..fe823383eedb 100644 --- a/test/conf/logback-simulator.xml +++ b/test/conf/logback-simulator.xml @@ -16,7 +16,7 @@ ~ limitations under the License. --> - + diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index 5988bd429f28..b96c0463fd4d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -36,6 +36,7 @@ import org.junit.After; import org.junit.BeforeClass; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.Duration; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BooleanType; @@ -62,6 +63,8 @@ import org.apache.cassandra.distributed.shared.DistributedTestBase; import org.apache.cassandra.service.accord.AccordStateCache; +import static java.lang.System.currentTimeMillis; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.cassandra.config.CassandraRelevantProperties.JOIN_RING; import static org.apache.cassandra.config.CassandraRelevantProperties.RESET_BOOTSTRAP_PROGRESS; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_GC_INSPECTOR; @@ -81,6 +84,7 @@ public void afterEach() { @BeforeClass public static void beforeClass() throws Throwable { + CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); ICluster.setup(); SKIP_GC_INSPECTOR.setBoolean(true); AccordStateCache.validateLoadOnEvict(true); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 42e7fbf34a1a..8e663ab966f1 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -19,9 +19,15 @@ package org.apache.cassandra.distributed.test.accord; import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; import java.util.Date; +import java.util.List; +import java.util.Map; import java.util.Random; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicInteger; import com.google.common.util.concurrent.RateLimiter; import org.junit.BeforeClass; @@ -30,10 +36,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IMessage; +import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.net.Verb; import org.apache.cassandra.utils.EstimatedHistogram; +import static java.lang.System.currentTimeMillis; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; public class AccordLoadTest extends AccordTestBase @@ -43,20 +55,35 @@ public class AccordLoadTest extends AccordTestBase @BeforeClass public static void setUp() throws IOException { - AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config.set("lwt_strategy", "accord").set("non_serial_write_strategy", "accord")), 2); + CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); + AccordTestBase.setupCluster(builder -> builder, 2); } @Ignore @Test public void testLoad() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, v int, PRIMARY KEY(k))", + test("CREATE TABLE " + qualifiedTableName + " (k int, v int, PRIMARY KEY(k)) WITH transactional_mode = 'full'", cluster -> { + + final ConcurrentHashMap verbs = new ConcurrentHashMap<>(); + cluster.filters().outbound().messagesMatching(new IMessageFilters.Matcher() + { + @Override + public boolean matches(int i, int i1, IMessage iMessage) + { + verbs.computeIfAbsent(Verb.fromId(iMessage.verb()), ignore -> new AtomicInteger()).incrementAndGet(); + return false; + } + }).drop(); ICoordinator coordinator = cluster.coordinator(1); + final int repairInterval = 3000; final int batchSize = 1000; final int concurrency = 100; final int ratePerSecond = 1000; final int keyCount = 10; + final float readChance = 0.33f; + long nextRepairAt = repairInterval; for (int i = 1; i <= keyCount; i++) coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (0, 0) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i); @@ -75,14 +102,66 @@ public void testLoad() throws Exception inFlight.acquire(); rateLimiter.acquire(); long commandStart = System.nanoTime(); - coordinator.executeWithResult((success, fail) -> { - inFlight.release(); - if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); -// else exceptions.add(fail); - }, "UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); + if (random.nextFloat() < readChance) + { + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "SELECT * FROM " + qualifiedTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, random.nextInt(keyCount)); + } + else + { + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); + } + } + + if ((nextRepairAt -= batchSize) <= 0) + { + nextRepairAt += repairInterval; + System.out.println("repairing..."); + cluster.coordinator(1).instance().nodetool("repair", qualifiedTableName); + } + + final Date date = new Date(); + System.out.printf("%tT rate: %.2f/s\n", date, (((float)batchSize * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart))); + System.out.printf("%tT percentiles: %d %d %d %d\n", date, histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); + + class VerbCount + { + final Verb verb; + final int count; + + VerbCount(Verb verb, int count) + { + this.verb = verb; + this.count = count; + } + } + List verbCounts = new ArrayList<>(); + for (Map.Entry e : verbs.entrySet()) + { + int count = e.getValue().getAndSet(0); + if (count != 0) verbCounts.add(new VerbCount(e.getKey(), count)); + } + verbCounts.sort(Comparator.comparing(v -> -v.count)); + + StringBuilder verbSummary = new StringBuilder(); + for (VerbCount vs : verbCounts) + { + { + if (verbSummary.length() > 0) + verbSummary.append(", "); + verbSummary.append(vs.verb); + verbSummary.append(": "); + verbSummary.append(vs.count); + } } - System.out.printf("%tT rate: %.2f/s\n", new Date(), (((float)batchSize * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart))); - System.out.printf("%tT percentiles: %d %d %d %d\n", new Date(), histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); + System.out.printf("%tT verbs: %s\n", date, verbSummary); } } ); diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 5e190f23b550..ffd918584ba9 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -61,6 +61,7 @@ import org.apache.cassandra.io.filesystem.ListenableFileSystem; import org.apache.cassandra.io.util.FileSystems; import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.paxos.BallotGenerator; import org.apache.cassandra.service.paxos.PaxosPrepare; import org.apache.cassandra.simulator.RandomSource.Choices; @@ -199,7 +200,7 @@ public static abstract class Builder protected HeapPool.Logged.Listener memoryListener; protected SimulatedTime.Listener timeListener = (i1, i2) -> {}; protected LongConsumer onThreadLocalRandomCheck; - protected String lwtStrategy = "migration"; + protected String transactionalMode = "full"; public Builder failures(Failures failures) { @@ -576,12 +577,17 @@ public Builder onThreadLocalRandomCheck(LongConsumer runnable) return this; } - public Builder lwtStrategy(String strategy) + public Builder transactionalMode(String mode) { - this.lwtStrategy = strategy; + this.transactionalMode = mode; return this; } + public TransactionalMode transactionalMode() + { + return TransactionalMode.fromString(transactionalMode); + } + public abstract ClusterSimulation create(long seed) throws IOException; } @@ -776,8 +782,7 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, .set("disk_access_mode", "standard") .set("failure_detector", SimulatedFailureDetector.Instance.class.getName()) .set("commitlog_compression", new ParameterizedClass(LZ4Compressor.class.getName(), emptyMap())) - .set("commitlog_sync", "batch") - .set("lwt_strategy", builder.lwtStrategy); + .set("commitlog_sync", "batch"); // TODO: Add remove() to IInstanceConfig if (config instanceof InstanceConfig) { @@ -875,6 +880,7 @@ public void afterStartup(IInstance i) simulated.register((SimulatedFutureActionScheduler) futureActionScheduler); scheduler = builder.schedulerFactory.create(random); + // TODO (required): we aren't passing paxos variant change parameter anymore options = new ClusterActions.Options(builder.topologyChangeLimit, Choices.uniform(KindOfSequence.values()).choose(random).period(builder.topologyChangeIntervalNanos, random), Choices.random(random, builder.topologyChanges), builder.consensusChangeLimit, Choices.uniform(KindOfSequence.values()).choose(random).period(builder.consensusChangeIntervalNanos, random), diff --git a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java index 798c4d45ae6c..5a0cb4701629 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/SimulationRunner.java @@ -343,7 +343,7 @@ protected void propagate(B builder) builder.debug(debugLevels, debugPrimaryKeys); } - Optional.ofNullable(lwtStrategy).ifPresent(builder::lwtStrategy); + Optional.ofNullable(lwtStrategy).ifPresent(builder::transactionalMode); } public void run(B builder) throws IOException diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java index a75a1ef4610f..ee8fd0ca4993 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordClusterSimulation.java @@ -71,11 +71,11 @@ public void applyHandicaps() int[] primaryKeys = primaryKeys(seed, builder.primaryKeyCount()); KindOfSequence.Period jitter = RandomSource.Choices.uniform(KindOfSequence.values()).choose(random) .period(builder.schedulerJitterNanos(), random); - return new PairOfSequencesAccordSimulation(simulated, cluster, options, - builder.readChance().select(random), builder.concurrency(), builder.primaryKeySeconds(), builder.withinKeyConcurrency(), - SERIAL, schedulers, builder.debug(), seed, - primaryKeys, builder.secondsToSimulate() >= 0 ? SECONDS.toNanos(builder.secondsToSimulate()) : -1, - () -> jitter.get(random)); + return new PairOfSequencesAccordSimulation(simulated, cluster, options, builder.transactionalMode(), + builder.readChance().select(random), builder.concurrency(), builder.primaryKeySeconds(), builder.withinKeyConcurrency(), + SERIAL, schedulers, builder.debug(), seed, + primaryKeys, builder.secondsToSimulate() >= 0 ? SECONDS.toNanos(builder.secondsToSimulate()) : -1, + () -> jitter.get(random)); }); } diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java index 8d6c8a0dcc35..7965c29fc196 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PairOfSequencesAccordSimulation.java @@ -50,6 +50,7 @@ import org.apache.cassandra.distributed.impl.Query; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.simulator.Action; import org.apache.cassandra.simulator.Debug; import org.apache.cassandra.simulator.RunnableActionScheduler; @@ -123,10 +124,12 @@ void log(@Nullable Integer pk) private final float writeRatio; private final HistoryValidator validator; + private final TransactionalMode transactionalMode; public PairOfSequencesAccordSimulation(SimulatedSystems simulated, Cluster cluster, ClusterActions.Options clusterOptions, + TransactionalMode transactionalMode, float readRatio, int concurrency, IntRange simulateKeyForSeconds, IntRange withinKeyConcurrency, ConsistencyLevel serialConsistency, RunnableActionScheduler scheduler, Debug debug, @@ -139,6 +142,7 @@ public PairOfSequencesAccordSimulation(SimulatedSystems simulated, scheduler, debug, seed, primaryKeys, runForNanos, jitter); + this.transactionalMode = transactionalMode; this.writeRatio = 1F - readRatio; HistoryValidator validator = new StrictSerializabilityValidator(primaryKeys); if (CassandraRelevantProperties.TEST_HISTORY_VALIDATOR_LOGGING_ENABLED.getBoolean()) @@ -149,7 +153,7 @@ public PairOfSequencesAccordSimulation(SimulatedSystems simulated, @Override protected String createTableStmt() { - return "CREATE TABLE " + KEYSPACE + ".tbl (pk int, count int, seq text, PRIMARY KEY (pk))"; + return String.format("CREATE TABLE " + KEYSPACE + ".tbl (pk int, count int, seq text, PRIMARY KEY (pk)) WITH transactional_mode = '%s'", transactionalMode); } @Override diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java index 6e7d058beadb..c54ba1c26d46 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosClusterSimulation.java @@ -69,11 +69,6 @@ public PaxosClusterSimulation create(long seed) throws IOException random.reset(seed); return new PaxosClusterSimulation(random, seed, uniqueNum, this); } - - public String lwtStrategy() - { - return lwtStrategy; - } } PaxosClusterSimulation(RandomSource random, long seed, int uniqueNum, Builder builder) throws IOException diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java index 71734c6e68ed..095c79769a1d 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulationRunner.java @@ -69,7 +69,7 @@ protected void propagate(PaxosClusterSimulation.Builder builder) @Override protected void run( long seed, PaxosClusterSimulation.Builder builder) throws IOException { - if (Objects.equals(builder.lwtStrategy(), "accord")) + if (Objects.equals(builder.transactionalMode(), "accord")) { // Apply handicaps builder.dcs(new IntRange(1, 1)); diff --git a/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java b/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java index 5b83ee88f1c5..b5df2b6b22c6 100644 --- a/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java +++ b/test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java @@ -57,9 +57,9 @@ private void testReadWrite(int n, boolean syncOnMark) throws IOException Descriptor descriptor = Descriptor.create(directory, System.currentTimeMillis(), 1); - SyncedOffsets active = SyncedOffsets.active(descriptor, syncOnMark); + SyncedOffsets active = SyncedOffsets.active(descriptor); for (int i = 0; i < n; i++) - active.mark(i); + active.mark(i, syncOnMark); assertEquals(n - 1, active.syncedOffset()); active.close(); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index 6e2b1f369000..4ab8be5d6a21 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -56,7 +56,7 @@ public void serde() TxnId txnId = TxnId.NONE; if (waitingOn.appliedOrInvalidated != null) txnId = new TxnId(txnId.epoch(), txnId.hlc(), txnId.kind(), Routable.Domain.Range, txnId.node); buffer.clear(); - long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); + long expectedSize = WaitingOnSerializer.serializedSize(txnId, waitingOn); WaitingOnSerializer.serialize(txnId, waitingOn, buffer); Assertions.assertThat(buffer.getLength()).isEqualTo(expectedSize); Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.txnIds, new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)); From 0d4c0961be907d24b196fd985a790716a86b0eb6 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 18 Jun 2024 13:55:38 -0700 Subject: [PATCH 122/340] CEP-15: (Accord) When nodes are removed from a cluster, need to update topology tracking to avoid being blocked patch by David Capwell; reviewed by Blake Eggleston for CASSANDRA-19719 --- build.xml | 3 +- modules/accord | 2 +- .../cassandra/repair/RepairCoordinator.java | 2 +- .../cassandra/repair/RepairSession.java | 2 +- .../cassandra/repair/SharedContext.java | 5 + .../repair/state/CoordinatorState.java | 8 +- .../cassandra/repair/state/SessionState.java | 8 +- .../accord/AccordConfigurationService.java | 138 +++- .../service/accord/AccordKeyspace.java | 3 +- .../service/accord/AccordSyncPropagator.java | 82 +- .../concurrent/SimulatedExecutorFactory.java | 2 +- .../db/virtual/LocalRepairTablesTest.java | 5 +- .../index/accord/AccordIndexStressTest.java | 8 +- .../index/accord/RouteIndexTest.java | 15 +- .../net/SimulatedMessageDelivery.java | 1 + .../apache/cassandra/repair/FuzzTestBase.java | 308 ++----- .../AccordConfigurationServiceTest.java | 11 +- .../service/accord/AccordKeyspaceTest.java | 4 +- .../accord/AccordSyncPropagatorTest.java | 8 + .../service/accord/EpochSyncTest.java | 754 ++++++++++++++++++ .../accord/LoggingDiskStateManager.java | 93 +++ .../service/accord/MockDiskStateManager.java | 79 ++ .../utils/StatefulRangeTreeTest.java | 14 +- 23 files changed, 1230 insertions(+), 325 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/LoggingDiskStateManager.java create mode 100644 test/unit/org/apache/cassandra/service/accord/MockDiskStateManager.java diff --git a/build.xml b/build.xml index 55614a36a6ff..e1905a930491 100644 --- a/build.xml +++ b/build.xml @@ -1289,13 +1289,12 @@ - + - diff --git a/modules/accord b/modules/accord index f1f5ea5ccbd6..694ae39e2e00 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit f1f5ea5ccbd6e0a8abf579a4331fa84a1b3d9f95 +Subproject commit 694ae39e2e00075bdabd47632dced0db12a9981d diff --git a/src/java/org/apache/cassandra/repair/RepairCoordinator.java b/src/java/org/apache/cassandra/repair/RepairCoordinator.java index 9e8f1d5a3b68..2192f4226c49 100644 --- a/src/java/org/apache/cassandra/repair/RepairCoordinator.java +++ b/src/java/org/apache/cassandra/repair/RepairCoordinator.java @@ -129,7 +129,7 @@ public RepairCoordinator(StorageService storageService, int cmd, RepairOption op { this.ctx = ctx; this.validationScheduler = Scheduler.build(DatabaseDescriptor.getConcurrentMerkleTreeRequests()); - this.state = new CoordinatorState(ctx.clock(), cmd, keyspace, options); + this.state = new CoordinatorState(ctx, cmd, keyspace, options); this.tag = "repair:" + cmd; this.validColumnFamilies = validColumnFamilies; this.getLocalReplicas = getLocalReplicas; diff --git a/src/java/org/apache/cassandra/repair/RepairSession.java b/src/java/org/apache/cassandra/repair/RepairSession.java index c0f14af7acd4..fe603ade71b3 100644 --- a/src/java/org/apache/cassandra/repair/RepairSession.java +++ b/src/java/org/apache/cassandra/repair/RepairSession.java @@ -182,7 +182,7 @@ public RepairSession(SharedContext ctx, this.paxosOnly = paxosOnly; this.isConsensusMigration = isConsensusMigration; assert cfnames.length > 0 : "Repairing no column families seems pointless, doesn't it"; - this.state = new SessionState(ctx.clock(), parentRepairSession, keyspace, cfnames, commonRange); + this.state = new SessionState(ctx, parentRepairSession, keyspace, cfnames, commonRange); this.parallelismDegree = parallelismDegree; this.isIncremental = isIncremental; this.previewKind = previewKind; diff --git a/src/java/org/apache/cassandra/repair/SharedContext.java b/src/java/org/apache/cassandra/repair/SharedContext.java index 54ec4214570f..70afa9bc1d07 100644 --- a/src/java/org/apache/cassandra/repair/SharedContext.java +++ b/src/java/org/apache/cassandra/repair/SharedContext.java @@ -42,6 +42,7 @@ import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MBeanWrapper; +import org.apache.cassandra.utils.TimeUUID; /** * Access methods to shared resources and services. @@ -81,6 +82,10 @@ public MessageDelivery messaging() TableRepairManager repairManager(ColumnFamilyStore store); StreamExecutor streamExecutor(); PaxosRepairState paxosRepairState(); + default Supplier timeUUID() + { + return TimeUUID.Generator::nextTimeUUID; + } class Global implements SharedContext { diff --git a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java index 737fd68106d8..70dcb5de7f1b 100644 --- a/src/java/org/apache/cassandra/repair/state/CoordinatorState.java +++ b/src/java/org/apache/cassandra/repair/state/CoordinatorState.java @@ -30,12 +30,10 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.CommonRange; import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.repair.messages.RepairOption; -import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.TimeUUID; -import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; - public class CoordinatorState extends AbstractState { public enum State @@ -56,9 +54,9 @@ public enum State // API to split function calls for phase changes from getting the state public final Phase phase = new Phase(); - public CoordinatorState(Clock clock, int cmd, String keyspace, RepairOption options) + public CoordinatorState(SharedContext ctx, int cmd, String keyspace, RepairOption options) { - super(clock, nextTimeUUID(), State.class); + super(ctx.clock(), ctx.timeUUID().get(), State.class); this.cmd = cmd; this.keyspace = Objects.requireNonNull(keyspace); this.options = Objects.requireNonNull(options); diff --git a/src/java/org/apache/cassandra/repair/state/SessionState.java b/src/java/org/apache/cassandra/repair/state/SessionState.java index 32be08935077..352643a2474b 100644 --- a/src/java/org/apache/cassandra/repair/state/SessionState.java +++ b/src/java/org/apache/cassandra/repair/state/SessionState.java @@ -26,11 +26,9 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.repair.CommonRange; -import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.utils.TimeUUID; -import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; - public class SessionState extends AbstractState { public enum State @@ -46,9 +44,9 @@ public enum State public final Phase phase = new Phase(); - public SessionState(Clock clock, TimeUUID parentRepairSession, String keyspace, String[] cfnames, CommonRange commonRange) + public SessionState(SharedContext ctx, TimeUUID parentRepairSession, String keyspace, String[] cfnames, CommonRange commonRange) { - super(clock, nextTimeUUID(), State.class); + super(ctx.clock(), ctx.timeUUID().get(), State.class); this.parentRepairSession = parentRepairSession; this.keyspace = keyspace; this.cfnames = cfnames; diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index d2a14a44dbe2..1e6cb1d76919 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -34,6 +34,8 @@ import accord.utils.Invariants; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; +import org.agrona.collections.LongArrayList; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.concurrent.Stage; @@ -57,6 +59,7 @@ public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordSyncPropagator.Listener, Shutdownable { private final AccordSyncPropagator syncPropagator; + private final DiskStateManager diskStateManager; private EpochDiskState diskState = EpochDiskState.EMPTY; @@ -114,15 +117,88 @@ protected EpochState createEpochState(long epoch) } } - public AccordConfigurationService(Node.Id node, MessageDelivery messagingService, IFailureDetector failureDetector) + @VisibleForTesting + interface DiskStateManager + { + EpochDiskState loadTopologies(AccordKeyspace.TopologyLoadConsumer consumer); + EpochDiskState setNotifyingLocalSync(long epoch, Set pending, EpochDiskState diskState); + + EpochDiskState setCompletedLocalSync(long epoch, EpochDiskState diskState); + + EpochDiskState markLocalSyncAck(Node.Id id, long epoch, EpochDiskState diskState); + + EpochDiskState saveTopology(Topology topology, EpochDiskState diskState); + + EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, EpochDiskState diskState); + + EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskState diskState); + + EpochDiskState truncateTopologyUntil(long epoch, EpochDiskState diskState); + } + + enum SystemTableDiskStateManager implements DiskStateManager + { + instance; + + @Override + public EpochDiskState loadTopologies(AccordKeyspace.TopologyLoadConsumer consumer) + { + return AccordKeyspace.loadTopologies(consumer); + } + + @Override + public EpochDiskState setNotifyingLocalSync(long epoch, Set notify, EpochDiskState diskState) + { + return AccordKeyspace.setNotifyingLocalSync(epoch, notify, diskState); + } + + @Override + public EpochDiskState setCompletedLocalSync(long epoch, EpochDiskState diskState) + { + return AccordKeyspace.setCompletedLocalSync(epoch, diskState); + } + + @Override + public EpochDiskState markLocalSyncAck(Node.Id id, long epoch, EpochDiskState diskState) + { + return AccordKeyspace.markLocalSyncAck(id, epoch, diskState); + } + + @Override + public EpochDiskState saveTopology(Topology topology, EpochDiskState diskState) + { + return AccordKeyspace.saveTopology(topology, diskState); + } + + @Override + public EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, EpochDiskState diskState) + { + return AccordKeyspace.markRemoteTopologySync(node, epoch, diskState); + } + + @Override + public EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskState diskState) + { + return AccordKeyspace.markClosed(ranges, epoch, diskState); + } + + @Override + public EpochDiskState truncateTopologyUntil(long epoch, EpochDiskState diskState) + { + return AccordKeyspace.truncateTopologyUntil(epoch, diskState); + } + } + + public AccordConfigurationService(Node.Id node, MessageDelivery messagingService, IFailureDetector failureDetector, DiskStateManager diskStateManager, ScheduledExecutorPlus scheduledTasks) { super(node); - this.syncPropagator = new AccordSyncPropagator(localId, this, messagingService, failureDetector, ScheduledExecutors.scheduledTasks, this); + this.syncPropagator = new AccordSyncPropagator(localId, this, messagingService, failureDetector, scheduledTasks, this); + this.diskStateManager = diskStateManager; } public AccordConfigurationService(Node.Id node) { - this(node, MessagingService.instance(), FailureDetector.instance); + this(node, MessagingService.instance(), FailureDetector.instance, SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); } @Override @@ -137,7 +213,7 @@ public synchronized void start() state = State.LOADING; updateMapping(ClusterMetadata.current()); EndpointMapping snapshot = mapping; - diskState = AccordKeyspace.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { + diskState = diskStateManager.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { if (topology != null) reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); @@ -221,12 +297,41 @@ private void reportMetadata(ClusterMetadata metadata) synchronized (AccordConfigurationService.this) { updateMapping(metadata); - reportTopology(AccordTopology.createAccordTopology(metadata)); + Topology topology = AccordTopology.createAccordTopology(metadata); + Topology current = isEmpty() ? Topology.EMPTY : currentTopology(); + reportTopology(topology); + Sets.SetView removedNodes = Sets.difference(current.nodes(), topology.nodes()); + if (!removedNodes.isEmpty()) + onNodesRemoved(topology.epoch(), removedNodes); } }); } - private void maybeReportMetadata(ClusterMetadata metadata) + private synchronized void onNodesRemoved(long epoch, Set removed) + { + syncPropagator.onNodesRemoved(removed); + for (long oldEpoch : nonCompletedEpochsBefore(epoch)) + { + for (Node.Id node : removed) + receiveRemoteSyncComplete(node, oldEpoch); + } + listeners.forEach(l -> l.onRemoveNodes(epoch, removed)); + } + + private long[] nonCompletedEpochsBefore(long max) + { + LongArrayList notComplete = new LongArrayList(); + for (long epoch = epochs.minEpoch(); epoch <= max && epoch <= epochs.maxEpoch(); epoch++) + { + EpochSnapshot snapshot = getEpochSnapshot(epoch); + if (snapshot.syncStatus != SyncStatus.COMPLETED) + notComplete.add(epoch); + } + return notComplete.toLongArray(); + } + + @VisibleForTesting + void maybeReportMetadata(ClusterMetadata metadata) { // don't report metadata until the previous one has been acknowledged synchronized (this) @@ -265,7 +370,7 @@ protected synchronized void localSyncComplete(Topology topology, boolean startSy return; Set notify = topology.nodes().stream().filter(i -> !localId.equals(i)).collect(Collectors.toSet()); - diskState = AccordKeyspace.setNotifyingLocalSync(epoch, notify, diskState); + diskState = diskStateManager.setNotifyingLocalSync(epoch, notify, diskState); epochState.setSyncStatus(SyncStatus.NOTIFYING); syncPropagator.reportSyncComplete(epoch, notify, localId); } @@ -276,7 +381,7 @@ public synchronized void onEndpointAck(Node.Id id, long epoch) EpochState epochState = getOrCreateEpochState(epoch); if (epochState.syncStatus != SyncStatus.NOTIFYING) return; - diskState = AccordKeyspace.markLocalSyncAck(id, epoch, diskState); + diskState = diskStateManager.markLocalSyncAck(id, epoch, diskState); } @Override @@ -284,21 +389,21 @@ public synchronized void onComplete(long epoch) { EpochState epochState = getOrCreateEpochState(epoch); epochState.setSyncStatus(SyncStatus.COMPLETED); - diskState = AccordKeyspace.setCompletedLocalSync(epoch, diskState); + diskState = diskStateManager.setCompletedLocalSync(epoch, diskState); } @Override protected synchronized void topologyUpdatePreListenerNotify(Topology topology) { if (state == State.STARTED) - diskState = AccordKeyspace.saveTopology(topology, diskState); + diskState = diskStateManager.saveTopology(topology, diskState); } @Override protected synchronized void receiveRemoteSyncCompletePreListenerNotify(Node.Id node, long epoch) { if (state == State.STARTED) - diskState = AccordKeyspace.markRemoteTopologySync(node, epoch, diskState); + diskState = diskStateManager.markRemoteTopologySync(node, epoch, diskState); } @Override @@ -309,6 +414,11 @@ public synchronized void reportEpochClosed(Ranges ranges, long epoch) syncPropagator.reportClosed(epoch, topology.nodes(), ranges); } + public AccordSyncPropagator syncPropagator() + { + return syncPropagator; + } + @Override public synchronized void reportEpochRedundant(Ranges ranges, long epoch) { @@ -321,14 +431,14 @@ public synchronized void reportEpochRedundant(Ranges ranges, long epoch) @Override public synchronized void receiveClosed(Ranges ranges, long epoch) { - diskState = AccordKeyspace.markClosed(ranges, epoch, diskState); + diskState = diskStateManager.markClosed(ranges, epoch, diskState); super.receiveClosed(ranges, epoch); } @Override public synchronized void receiveRedundant(Ranges ranges, long epoch) { - diskState = AccordKeyspace.markClosed(ranges, epoch, diskState); + diskState = diskStateManager.markClosed(ranges, epoch, diskState); super.receiveRedundant(ranges, epoch); } @@ -342,7 +452,7 @@ protected synchronized void truncateTopologiesPreListenerNotify(long epoch) protected synchronized void truncateTopologiesPostListenerNotify(long epoch) { if (state == State.STARTED) - diskState = AccordKeyspace.truncateTopologyUntil(epoch, diskState); + diskState = diskStateManager.truncateTopologyUntil(epoch, diskState); } private void checkStarted() diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index ac773b475bc4..ff7f2c5a47c0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -1587,7 +1587,8 @@ public boolean isEmpty() return minEpoch == maxEpoch && maxEpoch == 0; } - private EpochDiskState withNewMaxEpoch(long epoch) + @VisibleForTesting + EpochDiskState withNewMaxEpoch(long epoch) { Invariants.checkArgument(epoch > maxEpoch, "Epoch %d <= %d (max)", epoch, maxEpoch); return EpochDiskState.create(Math.max(1, minEpoch), epoch); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java index e16facee4a0c..2c9626718d80 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -29,6 +30,9 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.local.Node; import accord.messages.SimpleReply; import accord.primitives.Ranges; @@ -38,6 +42,7 @@ import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.gms.IFailureDetector; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -50,6 +55,7 @@ import org.apache.cassandra.net.Verb; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.CollectionSerializers; import static org.apache.cassandra.utils.CollectionSerializers.newListSerializer; @@ -59,6 +65,8 @@ */ public class AccordSyncPropagator { + private static final Logger logger = LoggerFactory.getLogger(AccordSyncPropagator.class); + public static final IVerbHandler> verbHandler = message -> { if (!AccordService.isSetup()) return; @@ -120,6 +128,11 @@ Notification redundant(Ranges addRedundant) return new Notification(epoch, Collections.emptySet(), Ranges.EMPTY, addRedundant); } + boolean isEmpty() + { + return syncComplete.isEmpty() && closed.isEmpty() && redundant.isEmpty(); + } + boolean ack(Notification notification) { if (!notification.syncComplete.isEmpty()) @@ -201,6 +214,15 @@ boolean hasPending() return !pending.isEmpty(); } + synchronized boolean hasPending(long epoch) + { + if (pending.isEmpty()) return false; + return pending.values().stream().allMatch(n -> { + PendingEpoch p = n.get(epoch); + return p != null && !p.isEmpty(); + }); + } + @Override public String toString() { @@ -210,6 +232,28 @@ public String toString() '}'; } + public synchronized void onNodesRemoved(Set removed) + { + for (Node.Id node : removed) + { + PendingEpochs pendingEpochs = pending.get(node.id); + if (pendingEpochs == null) continue; + long[] toComplete = new long[pendingEpochs.size()]; + Long2ObjectHashMap.KeyIterator it = pendingEpochs.keySet().iterator(); + for (int i = 0; it.hasNext(); i++) + toComplete[i] = it.nextLong(); + Arrays.sort(toComplete); + for (long epoch : toComplete) + listener.onEndpointAck(node, epoch); + pending.remove(node.id); + for (long epoch : toComplete) + { + if (hasSyncCompletedFor(epoch)) + listener.onComplete(epoch); + } + } + } + public void reportSyncComplete(long epoch, Collection notify, Node.Id syncCompleteId) { if (notify.isEmpty()) @@ -258,17 +302,13 @@ private boolean hasSyncCompletedFor(long epoch) private boolean notify(Node.Id to, List notifications) { InetAddressAndPort toEp = endpointMapper.mappedEndpoint(to); - if (!failureDetector.isAlive(toEp)) - { - scheduler.schedule(() -> notify(to, notifications), 1, TimeUnit.MINUTES); - return false; - } Message> msg = Message.out(Verb.ACCORD_SYNC_NOTIFY_REQ, notifications); - messagingService.sendWithCallback(msg, toEp, new RequestCallback(){ + RequestCallback cb = new RequestCallback<>() + { @Override public void onResponse(Message msg) { - Invariants.checkState(msg.payload == SimpleReply.Ok, "Unexpected message: %s", msg); + Invariants.checkState(msg.payload == SimpleReply.Ok, "Unexpected message: %s", msg); Set completedEpochs = new HashSet<>(); // TODO review is it a good idea to call the listener while not holding the `AccordSyncPropagator` lock? synchronized (AccordSyncPropagator.this) @@ -304,7 +344,22 @@ public boolean invokeOnFailure() { return true; } - }); + }; + if (!failureDetector.isAlive(toEp)) + { + // was the endpoint removed from membership? + ClusterMetadata metadata = ClusterMetadata.current(); + if (Gossiper.instance.getEndpointStateForEndpoint(toEp) == null && !metadata.directory.allJoinedEndpoints().contains(toEp) && !metadata.fullCMSMembers().contains(toEp)) + { + // endpoint no longer exists... + cb.onResponse(msg.responseWith(SimpleReply.Ok)); + return true; + } + logger.warn("Node{} is not alive, unable to notify of {}", to, notifications); + scheduler.schedule(() -> notify(to, notifications), 1, TimeUnit.MINUTES); + return false; + } + messagingService.sendWithCallback(msg, toEp, cb); return true; } @@ -352,5 +407,16 @@ public Notification(long epoch, Collection syncComplete, Ranges closed, this.closed = closed; this.redundant = redundant; } + + @Override + public String toString() + { + return "Notification{" + + "epoch=" + epoch + + ", syncComplete=" + syncComplete + + ", closed=" + closed + + ", redundant=" + redundant + + '}'; + } } } diff --git a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java index fe0ae0566c31..839488875c77 100644 --- a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java +++ b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java @@ -101,7 +101,7 @@ public String toString() public SimulatedExecutorFactory(RandomSource rs, Consumer onError) { - this(rs, toGen(Generators.TIMESTAMP_GEN.map(Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), onError); + this(rs, toGen(Generators.TIMESTAMP_GEN.map(Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).nextLong(rs), onError); } public SimulatedExecutorFactory(RandomSource rs) diff --git a/test/unit/org/apache/cassandra/db/virtual/LocalRepairTablesTest.java b/test/unit/org/apache/cassandra/db/virtual/LocalRepairTablesTest.java index 96f09400ecaf..5aa94cc666d5 100644 --- a/test/unit/org/apache/cassandra/db/virtual/LocalRepairTablesTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/LocalRepairTablesTest.java @@ -40,6 +40,7 @@ import org.apache.cassandra.repair.CommonRange; import org.apache.cassandra.repair.RepairJobDesc; import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.repair.SharedContext; import org.apache.cassandra.repair.messages.PrepareMessage; import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.state.Completable; @@ -291,7 +292,7 @@ private static InetAddressAndPort address(int a, int b, int c, int d) private static CoordinatorState coordinator() { RepairOption options = RepairOption.parse(Collections.emptyMap(), DatabaseDescriptor.getPartitioner()); - CoordinatorState state = new CoordinatorState(Clock.Global.clock(), 0, "test", options); + CoordinatorState state = new CoordinatorState(SharedContext.Global.instance, 0, "test", options); ActiveRepairService.instance().register(state); return state; } @@ -299,7 +300,7 @@ private static CoordinatorState coordinator() private static SessionState session() { CoordinatorState parent = coordinator(); - SessionState state = new SessionState(Clock.Global.clock(), parent.id, REPAIR_KS, new String[]{ REPAIR_TABLE }, COMMON_RANGE); + SessionState state = new SessionState(SharedContext.Global.instance, parent.id, REPAIR_KS, new String[]{ REPAIR_TABLE }, COMMON_RANGE); parent.register(state); return state; } diff --git a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java index 02221eb2004b..c4085c608385 100644 --- a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java +++ b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java @@ -257,11 +257,11 @@ else if (domain == Routable.Domain.Range && store2Table2Ranges == null) { case Key: { - store = rs.pick(storeToTableToRoutingKeysToTxns.keySet()); + store = rs.pickUnorderedSet(storeToTableToRoutingKeysToTxns.keySet()); var actual = this.storeToTableToRoutingKeysToTxns.get(store); var tableToTokens = store2Table2Tokens.get(store); - table = rs.pick(actual.keySet()); + table = rs.pickUnorderedSet(actual.keySet()); var tokens = tableToTokens.get(table); var offset = rs.nextInt(0, tokens.length); @@ -274,11 +274,11 @@ else if (domain == Routable.Domain.Range && store2Table2Ranges == null) break; case Range: { - store = rs.pick(storeToTableToRangesToTxns.keySet()); + store = rs.pickUnorderedSet(storeToTableToRangesToTxns.keySet()); var tableToRangesToTxns = storeToTableToRangesToTxns.get(store); var tableToRanges = store2Table2Ranges.get(store); - table = rs.pick(tableToRangesToTxns.keySet()); + table = rs.pickUnorderedSet(tableToRangesToTxns.keySet()); var wrapper = tableToRanges.get(table); var ranges = wrapper.ranges; var tree = wrapper.tree; diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index 84c64bcb45b0..5200316a2f8a 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -125,7 +126,7 @@ public ColumnFamilyStore createSut(State state) @Override public Gen> commands(State state) { - Map>, Integer> possible = new HashMap<>(); + Map>, Integer> possible = new LinkedHashMap<>(); possible.put(ignore -> FLUSH, 1); possible.put(ignore -> COMPACT, 1); possible.put(rs -> { @@ -139,9 +140,9 @@ public ColumnFamilyStore createSut(State state) if (!state.storeToTableToRangesToTxns.isEmpty()) { possible.put(rs -> { - int storeId = rs.pick(state.storeToTableToRangesToTxns.keySet()); + int storeId = rs.pickUnorderedSet(state.storeToTableToRangesToTxns.keySet()); var tables = state.storeToTableToRangesToTxns.get(storeId); - TableId tableId = rs.pick(tables.keySet()); + TableId tableId = rs.pickUnorderedSet(tables.keySet()); var ranges = tables.get(tableId); TreeSet distinctRanges = ranges.stream().map(Map.Entry::getKey).collect(Collectors.toCollection(() -> new TreeSet<>(TokenRange::compareTo))); TokenRange range; @@ -154,14 +155,14 @@ public ColumnFamilyStore createSut(State state) switch (rs.nextInt(0, 2)) { case 0: // perfect match - range = rs.pick(distinctRanges); + range = rs.pickOrderedSet(distinctRanges); break; case 1: // mutli-match { - TokenRange a = rs.pick(distinctRanges); - TokenRange b = rs.pick(distinctRanges); + TokenRange a = rs.pickOrderedSet(distinctRanges); + TokenRange b = rs.pickOrderedSet(distinctRanges); while (a.equals(b)) - b = rs.pick(distinctRanges); + b = rs.pickOrderedSet(distinctRanges); if (b.compareTo(a) < 0) { TokenRange tmp = a; diff --git a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java index 6cbabe37cca9..dd9472a91189 100644 --- a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java +++ b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java @@ -31,6 +31,7 @@ import accord.utilsfork.Gens; import accord.utilsfork.RandomSource; + import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.concurrent.AsyncPromise; diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 3e89ebd13cc4..1665ef80c6b2 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -22,7 +22,7 @@ import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.sql.Timestamp; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -44,7 +44,6 @@ import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; -import java.util.function.LongSupplier; import java.util.function.Supplier; import javax.annotation.Nullable; @@ -85,7 +84,6 @@ import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; -import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.ApplicationState; import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.HeartBeatState; @@ -99,12 +97,12 @@ import org.apache.cassandra.locator.LocalStrategy; import org.apache.cassandra.locator.Locator; import org.apache.cassandra.locator.RangesAtEndpoint; -import org.apache.cassandra.net.ConnectionType; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.net.SimulatedMessageDelivery; +import org.apache.cassandra.net.SimulatedMessageDelivery.SimulatedMessageReceiver; import org.apache.cassandra.repair.messages.RepairMessage; import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.messages.ValidationResponse; @@ -151,8 +149,6 @@ import org.apache.cassandra.utils.MerkleTrees; import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.TimeUUID; -import org.apache.cassandra.utils.concurrent.AsyncPromise; -import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.progress.ProgressEventType; import org.assertj.core.api.Assertions; @@ -166,10 +162,10 @@ public abstract class FuzzTestBase extends CQLTester.InMemory { private static final int MISMATCH_NUM_PARTITIONS = 1; - private static final Gen IDENTIFIER_GEN = fromQT(Generators.IDENTIFIER_GEN); - private static final Gen KEYSPACE_NAME_GEN = fromQT(CassandraGenerators.KEYSPACE_NAME_GEN); - private static final Gen TABLE_ID_GEN = fromQT(CassandraGenerators.TABLE_ID_GEN); - private static final Gen ADDRESS_W_PORT = fromQT(CassandraGenerators.INET_ADDRESS_AND_PORT_GEN); + private static final Gen IDENTIFIER_GEN = Generators.toGen(Generators.IDENTIFIER_GEN); + private static final Gen KEYSPACE_NAME_GEN = Generators.toGen(CassandraGenerators.KEYSPACE_NAME_GEN); + private static final Gen TABLE_ID_GEN = Generators.toGen(CassandraGenerators.TABLE_ID_GEN); + private static final Gen ADDRESS_W_PORT = Generators.toGen(CassandraGenerators.INET_ADDRESS_AND_PORT_GEN); private static boolean SETUP_SCHEMA = false; static String KEYSPACE; @@ -494,7 +490,7 @@ static void addMismatch(RandomSource rs, ColumnFamilyStore cfs, Validator valida Set allTokens = new HashSet<>(); for (Range range : validator.desc.ranges) { - Gen gen = fromQT(CassandraGenerators.tokensInRange(range)); + Gen gen = Generators.toGen(CassandraGenerators.tokensInRange(range)); Set tokens = new LinkedHashSet<>(); for (int i = 0, size = rs.nextInt(1, 10); i < size; i++) { @@ -691,15 +687,13 @@ static class Cluster private final List listeners = new ArrayList<>(); private final RandomSource rs; private BiFunction, Set> allowedMessageFaults = (a, b) -> Collections.emptySet(); - - private final Map networkLatencies = new HashMap<>(); private final Map> networkDrops = new HashMap<>(); Cluster(RandomSource rs) { ClockAccess.includeThreadAsOwner(); this.rs = rs; - globalExecutor = new SimulatedExecutorFactory(rs, fromQT(Generators.TIMESTAMP_GEN.map(Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs)); + globalExecutor = new SimulatedExecutorFactory(rs); orderedExecutor = globalExecutor.configureSequential("ignore").build(); unorderedScheduled = globalExecutor.scheduled("ignored"); @@ -720,8 +714,8 @@ static class Cluster int numNodes = rs.nextInt(3, 10); List dcs = Gens.lists(IDENTIFIER_GEN).unique().ofSizeBetween(1, Math.min(10, numNodes)).next(rs); Map nodes = Maps.newHashMapWithExpectedSize(numNodes); - Gen tokenGen = fromQT(CassandraGenerators.token(DatabaseDescriptor.getPartitioner())); - Gen hostIdGen = fromQT(Generators.UUID_RANDOM_GEN); + Gen tokenGen = Generators.toGen(CassandraGenerators.token(DatabaseDescriptor.getPartitioner())); + Gen hostIdGen = Generators.toGen(Generators.UUID_RANDOM_GEN); Set tokens = new HashSet<>(); Set hostIds = new HashSet<>(); for (int i = 0; i < numNodes; i++) @@ -813,214 +807,43 @@ public void processAll() } } - private class CallbackContext + private SimulatedMessageDelivery.Action action(InetAddressAndPort self, Message msg, InetAddressAndPort to) { - final RequestCallback callback; - - private CallbackContext(RequestCallback callback) - { - this.callback = Objects.requireNonNull(callback); - } - - public void onResponse(Message msg) - { - callback.onResponse(msg); - } - - public void onFailure(InetAddressAndPort from, RequestFailure failure) - { - if (callback.invokeOnFailure()) callback.onFailure(from, failure); - } + boolean toSelf = self.equals(to); + Node node = nodes.get(to); + Set allowedFaults = allowedMessageFaults.apply(node, msg); + if (allowedFaults.contains(Faults.DROP) && !toSelf && networkDrops(self, to)) return SimulatedMessageDelivery.Action.DROP_PARTITIONED; + return SimulatedMessageDelivery.Action.DELIVER; } - private static class CallbackKey + private boolean networkDrops(InetAddressAndPort self, InetAddressAndPort to) { - private final long id; - private final InetAddressAndPort peer; - - private CallbackKey(long id, InetAddressAndPort peer) - { - this.id = id; - this.peer = peer; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - CallbackKey that = (CallbackKey) o; - return id == that.id && peer.equals(that.peer); - } - - @Override - public int hashCode() - { - return Objects.hash(id, peer); - } - - @Override - public String toString() - { - return "CallbackKey{" + - "id=" + id + - ", peer=" + peer + - '}'; - } + return networkDrops.computeIfAbsent(new Connection(self, to), ignore -> Gens.bools().biasedRepeatingRuns(rs.nextInt(1, 11) / 100.0D, rs.nextInt(3, 15)).asSupplier(rs)).get(); } - private class Messaging implements MessageDelivery + private class Messaging extends SimulatedMessageDelivery { - final InetAddressAndPort broadcastAddressAndPort; - final Map callbacks = new HashMap<>(); - private Messaging(InetAddressAndPort broadcastAddressAndPort) { - this.broadcastAddressAndPort = broadcastAddressAndPort; - } - - @Override - public void send(Message message, InetAddressAndPort to) - { - message = message.withFrom(broadcastAddressAndPort); - maybeEnqueue(message, to, null); - } - - @Override - public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb) - { - message = message.withFrom(broadcastAddressAndPort); - maybeEnqueue(message, to, cb); - } - - @Override - public void sendWithCallback(Message message, InetAddressAndPort to, RequestCallback cb, ConnectionType specifyConnection) - { - message = message.withFrom(broadcastAddressAndPort); - maybeEnqueue(message, to, cb); - } - - private void maybeEnqueue(Message message, InetAddressAndPort to, @Nullable RequestCallback callback) - { - CallbackContext cb; - if (callback != null) - { - CallbackKey key = new CallbackKey(message.id(), to); - if (callbacks.containsKey(key)) - throw new AssertionError("Message id " + message.id() + " to " + to + " already has a callback"); - cb = new CallbackContext(callback); - callbacks.put(key, cb); - } - else - { - cb = null; - } - boolean toSelf = this.broadcastAddressAndPort.equals(to); - Node node = nodes.get(to); - Set allowedFaults = allowedMessageFaults.apply(node, message); - if (allowedFaults.isEmpty()) - { - // enqueue so stack overflow doesn't happen with the inlining - unorderedScheduled.submit(() -> node.handle(message)); - } - else - { - Runnable enqueue = () -> { - if (!allowedFaults.contains(Faults.DELAY)) - { - unorderedScheduled.submit(() -> node.handle(message)); - } - else - { - if (toSelf) unorderedScheduled.submit(() -> node.handle(message)); - else - unorderedScheduled.schedule(() -> node.handle(message), networkJitterNanos(to), TimeUnit.NANOSECONDS); - } - }; - - if (!allowedFaults.contains(Faults.DROP)) enqueue.run(); - else - { - if (!toSelf && networkDrops(to)) - { -// logger.warn("Dropped message {}", message); - // drop - } - else - { - enqueue.run(); - } - } - - if (cb != null) - { - unorderedScheduled.schedule(() -> { - CallbackContext ctx = callbacks.remove(new CallbackKey(message.id(), to)); - if (ctx != null) - { - assert ctx == cb; - try - { - ctx.onFailure(to, RequestFailure.TIMEOUT); - } - catch (Throwable t) - { - failures.add(t); - } - } - }, message.verb().expiresAfterNanos(), TimeUnit.NANOSECONDS); - } - } - } - - private long networkJitterNanos(InetAddressAndPort to) - { - return networkLatencies.computeIfAbsent(new Connection(broadcastAddressAndPort, to), ignore -> { - long min = TimeUnit.MICROSECONDS.toNanos(500); - long maxSmall = TimeUnit.MILLISECONDS.toNanos(5); - long max = TimeUnit.SECONDS.toNanos(5); - LongSupplier small = () -> rs.nextLong(min, maxSmall); - LongSupplier large = () -> rs.nextLong(maxSmall, max); - return Gens.bools().biasedRepeatingRuns(rs.nextInt(1, 11) / 100.0D, rs.nextInt(3, 15)).mapToLong(b -> b ? large.getAsLong() : small.getAsLong()).asLongSupplier(rs); - }).getAsLong(); - } - - private boolean networkDrops(InetAddressAndPort to) - { - return networkDrops.computeIfAbsent(new Connection(broadcastAddressAndPort, to), ignore -> Gens.bools().biasedRepeatingRuns(rs.nextInt(1, 11) / 100.0D, rs.nextInt(3, 15)).asSupplier(rs)).get(); - } - - @Override - public Future> sendWithResult(Message message, InetAddressAndPort to) - { - AsyncPromise> promise = new AsyncPromise<>(); - sendWithCallback(message, to, new RequestCallback() - { - @Override - public void onResponse(Message msg) - { - promise.trySuccess(msg); - } - - @Override - public void onFailure(InetAddressAndPort from, RequestFailure failure) - { - promise.tryFailure(new MessagingService.FailureResponseException(from, failure)); - } - - @Override - public boolean invokeOnFailure() - { - return true; - } - }); - return promise; - } - - @Override - public void respond(V response, Message message) - { - send(message.responseWith(response), message.respondTo()); + super(broadcastAddressAndPort, + Cluster.this::action, + new NetworkDelaySupplier() + { + private final NetworkDelaySupplier delegate = SimulatedMessageDelivery.randomDelay(rs); + @Nullable + @Override + public Duration jitter(Message msg, InetAddressAndPort to) + { + Set allowedFaults = allowedMessageFaults.apply(nodes.get(to), msg); + if (!allowedFaults.contains(Faults.DELAY) || broadcastAddressAndPort.equals(to)) + return null; + return delegate.jitter(msg, to); + } + }, + (to, msg) -> unorderedScheduled.submit(() -> nodes.get(to).handle(msg)), + (action, to, msg) -> logger.warn("{} message {}", action, msg), + unorderedScheduled::schedule, + failures::add); } } @@ -1070,7 +893,7 @@ class Node implements SharedContext final InetAddressAndPort addressAndPort; final Collection tokens; final ActiveRepairService activeRepairService; - final IVerbHandler verbHandler; + final SimulatedMessageReceiver receiver; final Messaging messaging; final IValidationManager validationManager; private FailingBiConsumer doValidation = DEFAULT_VALIDATION; @@ -1104,7 +927,7 @@ private Node(UUID hostId, InetAddressAndPort addressAndPort, Collection t validator.fail(e); } }); - this.verbHandler = new IVerbHandler<>() + this.receiver = messaging.receiver(new IVerbHandler<>() { private final RepairMessageVerbHandler repairVerbHandler = new RepairMessageVerbHandler(Node.this); private final IVerbHandler paxosStartPrepareCleanup = PaxosStartPrepareCleanup.createVerbHandler(Node.this); @@ -1136,7 +959,7 @@ public void doVerb(Message message) throws IOException repairVerbHandler.doVerb(message); } } - }; + }); activeRepairService.start(); } @@ -1176,38 +999,7 @@ void handle(Message msg) } for (MessageListener l : listeners) l.preHandle(this, msg); - if (msg.verb().isResponse()) - { - // handle callbacks - CallbackKey key = new CallbackKey(msg.id(), msg.from()); - if (messaging.callbacks.containsKey(key)) - { - CallbackContext callback = messaging.callbacks.remove(key); - if (callback == null) - return; - try - { - if (msg.isFailureResponse()) - callback.onFailure(msg.from(), (RequestFailure) msg.payload); - else callback.onResponse(msg); - } - catch (Throwable t) - { - failures.add(t); - } - } - } - else - { - try - { - verbHandler.doVerb(msg); - } - catch (Throwable e) - { - failures.add(e); - } - } + receiver.recieve(msg); } public UUID hostId() @@ -1362,6 +1154,12 @@ public PaxosRepairState paxosRepairState() return paxosRepairState; } + @Override + public Supplier timeUUID() + { + return Generators.toGen(Generators.timeUUID()).asSupplier(rs); + } + public String toString() { return "Node{" + @@ -1388,14 +1186,6 @@ private Message serde(Message msg) } } - private static Gen fromQT(org.quicktheories.core.Gen qt) - { - return rs -> { - JavaRandom r = new JavaRandom(rs.asJdkRandom()); - return qt.generate(r); - }; - } - public static class HackStrat extends LocalStrategy { public HackStrat(String keyspaceName, Map configOptions) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 7b77791c6ed8..2f689187aca3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -40,6 +40,7 @@ import accord.topology.Topology; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Murmur3Partitioner; @@ -176,7 +177,7 @@ public void setup() @Test public void initialEpochTest() throws Throwable { - AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); service.start(); Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); @@ -201,7 +202,7 @@ public void initialEpochTest() throws Throwable @Test public void loadTest() throws Throwable { - AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); service.start(); Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); @@ -221,7 +222,7 @@ public void loadTest() throws Throwable service.reportTopology(topology3); service.acknowledgeEpoch(EpochReady.done(3), true); - AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); AbstractConfigurationServiceTest.TestListener listener = new AbstractConfigurationServiceTest.TestListener(loaded, true); loaded.registerListener(listener); @@ -240,7 +241,7 @@ public void loadTest() throws Throwable @Test public void truncateTest() { - AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); TestListener serviceListener = new TestListener(service, true); service.registerListener(serviceListener); service.start(); @@ -258,7 +259,7 @@ public void truncateTest() Assert.assertEquals(EpochDiskState.create(3), service.diskState()); serviceListener.assertTruncates(3L); - AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector()); + AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); TestListener loadListener = new TestListener(loaded, true); loaded.registerListener(loadListener); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 55d1e7e1177a..9c613624fecf 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -186,7 +186,7 @@ public void findOverlappingKeys() // else this will loop forever... for (int attempt = 0; attempt < 10; attempt++) { - TableId tableId = rs.pick(tables.keySet()); + TableId tableId = rs.pickOrderedSet(tables.navigableKeySet()); IPartitioner partitioner = tables.get(tableId); ByteBuffer data = !(partitioner instanceof LocalPartitioner) ? Int32Type.instance.decompose(rs.nextInt()) : fromQT(getTypeSupport(partitioner.getTokenValidator()).bytesGen()).next(rs); @@ -258,7 +258,7 @@ public void findOverlappingKeys() for (int i = 0, queries = rs.nextInt(1, 5); i < queries; i++) { - int store = rs.pick(storesToKeys.keySet()); + int store = rs.pickOrderedSet(storesToKeys.navigableKeySet()); var keysForStore = new ArrayList<>(storesToKeys.get(store)); int offset; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index f57a3c12387f..12d5c75c01de 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -60,6 +60,9 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.gms.HeartBeatState; import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.gms.IFailureDetector; import org.apache.cassandra.locator.InetAddressAndPort; @@ -94,6 +97,10 @@ public void burnTest() Gen rangesGen = AccordGenerators.ranges().filter(r -> !r.isEmpty()); Gen> nodesGen = Gens.lists(AccordGens.nodes()).unique().ofSizeBetween(1, 40); qt().withExamples(100).check(rs -> { + // when gossip and cluster metadata don't know an endpoint, retries are avoided (node removed) + // so when instances are created here they are added to gossip to trick the membership check... + Gossiper.instance.clearUnsafe(); + List nodes = nodesGen.next(rs); Set nodesAsSet = ImmutableSet.copyOf(nodes); @@ -214,6 +221,7 @@ private Cluster(List nodes, Sink sink = new Sink(id); IFailureDetector fd = new FailureDetector(address); instances.put(id, new Instace(id, address, cs, sink, fd, cs, new AccordSyncPropagator(id, Cluster.this, sink, fd, scheduler, cs))); + Gossiper.instance.endpointStateMap.put(address, new EndpointState(HeartBeatState.empty())); } this.nodeToAddress = nodeToAddress.build(); this.instances = instances.build(); diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java new file mode 100644 index 000000000000..ec0eb60121dd --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -0,0 +1,754 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.NavigableSet; +import java.util.Objects; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +import com.google.common.collect.Sets; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.ConfigurationService; +import accord.api.ConfigurationService.EpochReady; +import accord.impl.SizeOfIntersectionSorter; +import accord.local.Node; +import accord.primitives.Ranges; +import accord.topology.Topology; +import accord.topology.TopologyManager; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Invariants; +import accord.utils.Property.Command; +import accord.utils.Property.Commands; +import accord.utils.Property.UnitCommand; +import accord.utils.RandomSource; +import accord.utils.async.AsyncChain; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.concurrent.SimulatedExecutorFactory; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.gms.IFailureDetectionEventListener; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.SimulatedMessageDelivery; +import org.apache.cassandra.net.SimulatedMessageDelivery.Action; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableParams; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.service.accord.AccordConfigurationService.EpochSnapshot; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.StubClusterMetadataService; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.utils.ByteArrayUtil; +import org.apache.cassandra.utils.Pair; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.stateful; + +public class EpochSyncTest +{ + private static final Logger logger = LoggerFactory.getLogger(EpochSyncTest.class); + + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + + ClusterMetadataService.setInstance(StubClusterMetadataService.forTesting()); + } + + @Test + public void test() + { + stateful().withExamples(50).check(new Commands() + { + @Override + public Gen genInitialState() + { + return Cluster::new; + } + + @Override + public Void createSut(Cluster Cluster) + { + return null; + } + + @Override + public Gen> commands(Cluster cluster) + { + List alive = cluster.alive(); + Map>, Integer> possible = new LinkedHashMap<>(); + if (alive.size() < cluster.maxNodes) + { + // add node + possible.put(rs -> { + Node.Id id = new Node.Id(++cluster.nodeCounter); + long token = cluster.tokenGen.nextLong(rs); + while (cluster.tokens.contains(token)) + token = cluster.tokenGen.nextLong(rs); + long epoch = cluster.current.epoch.getEpoch() + 1; + long finalToken = token; + return new SimpleCommand("Add Node " + id + "; token=" + token + ", epoch=" + epoch, + c -> c.addNode(id, finalToken)); + }, 5); + } + if (alive.size() > cluster.minNodes) + { + possible.put(rs -> { + Node.Id pick = rs.pick(alive); + long token = cluster.instances.get(pick).token; + long epoch = cluster.current.epoch.getEpoch() + 1; + return new SimpleCommand("Remove Node " + pick + "; token=" + token + "; epoch=" + epoch, c -> c.removeNode(pick)); + }, 3); + } + if (cluster.hasWork()) + { + possible.put(rs -> new SimpleCommand("Process Some", + c -> {//noinspection StatementWithEmptyBody + for (int i = 0, attempts = rs.nextInt(1, 100); i < attempts && c.processOne(); i++) + { + } + }), 10); + } + + possible.put(rs -> new SimpleCommand("Validate", + c -> c.validate(false)), 1); + possible.put(rs -> new SimpleCommand("Bump Epoch " + (cluster.current.epoch.getEpoch() + 1), + Cluster::bumpEpoch), 10); + return Gens.oneOf(possible); + } + + @Override + public void destroyState(Cluster cluster) + { + cluster.processAll(); + cluster.validate(true); + } + }); + } + + private static class SimpleCommand implements UnitCommand + { + private final String name; + private final Consumer fn; + + private SimpleCommand(String name, Consumer fn) + { + this.name = name; + this.fn = fn; + } + + @Override + public String detailed(Cluster Cluster) + { + return name; + } + + @Override + public void applyUnit(Cluster Cluster) + { + fn.accept(Cluster); + } + + @Override + public void runUnit(Void Void) + { + + } + } + + private static class Cluster + { + private static final int rf = 2; + private static final ReplicationParams replication_params = ReplicationParams.simple(rf); + private static final ReplicationParams meta = ReplicationParams.simpleMeta(1, Collections.singleton("dc1")); + + private final RandomSource rs; + private final int minNodes, maxNodes; + private final Gen.LongGen tokenGen; + private final SortedSet tokens = new TreeSet<>(); + private final Map instances = new HashMap<>(); + private final Set removed = new HashSet<>(); + private final List failures = new ArrayList<>(); + private final SimulatedExecutorFactory globalExecutor; + private final ScheduledExecutorPlus scheduler; + private int nodeCounter = 0; + private ClusterMetadata current = new ClusterMetadata(Murmur3Partitioner.instance, Directory.EMPTY, + new DistributedSchema(Keyspaces.of( + DistributedMetadataLogKeyspace.initialMetadata(Collections.singleton("dc1")), + KeyspaceMetadata.create("test", KeyspaceParams.simple(rf), Tables.of(TableMetadata.minimal("test", "tb1").unbuild().params(TableParams.builder().transactionalMode(TransactionalMode.full).build()).build()))))); + private final IFailureDetector fd = new IFailureDetector() + { + @Override + public boolean isAlive(InetAddressAndPort ep) + { + return !removed.contains(nodeId(ep)); + } + + @Override + public void interpret(InetAddressAndPort ep) + { + + } + + @Override + public void report(InetAddressAndPort ep) + { + + } + + @Override + public void remove(InetAddressAndPort ep) + { + + } + + @Override + public void forceConviction(InetAddressAndPort ep) + { + + } + + @Override + public void registerFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + + } + + @Override + public void unregisterFailureDetectionEventListener(IFailureDetectionEventListener listener) + { + + } + }; + + private static InetAddressAndPort address(Node.Id id) + { + try + { + return InetAddressAndPort.getByAddress(ByteArrayUtil.bytes(id.id)); + } + catch (UnknownHostException e) + { + throw new AssertionError("Unable to create address for id " + id, e); + } + } + + public enum EpochTracker { topologyManager, accordSyncPropagator, configurationService} + + Set globalSynced(long epoch) + { + return alive().stream() + .filter(n -> instances.get(n).epoch.getEpoch() <= epoch) + .map(n -> instances.get(n).synced(epoch)) + .reduce(EnumSet.allOf(EpochTracker.class), Sets::intersection); + } + + boolean allSynced(long epoch) + { + Set done = globalSynced(epoch); + return done.contains(EpochTracker.topologyManager); + } + + private static Node.Id nodeId(InetAddressAndPort address) + { + return new Node.Id(ByteArrayUtil.getInt(address.addressBytes)); + } + + public Cluster(RandomSource rs) + { + this.rs = rs; + this.minNodes = 3; + this.maxNodes = 10; + this.tokenGen = rs2 -> rs2.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + + this.globalExecutor = new SimulatedExecutorFactory(accord.utilsfork.RandomSource.wrap(rs.asJdkRandom()), failures::add); + this.scheduler = globalExecutor.scheduled("ignored"); + Stage.MISC.unsafeSetExecutor(scheduler); + + scheduler.scheduleWithFixedDelay(() -> { + if (aliveCount() < 2) return; + if (!partitions.isEmpty() && rs.nextBoolean()) + { + // remove partition + if (partitions.size() == 1) + { + partitions.clear(); + return; + } + partitions.remove(rs.pickOrderedSet(partitions)); + } + else + { + // add partition + List alive = alive(); + InetAddressAndPort a = address(rs.pick(alive)); + InetAddressAndPort b = address(rs.pick(alive)); + while (a.equals(b)) + b = address(rs.pick(alive)); + partitions.add(new Connection(a, b)); + } + }, 1, 1, TimeUnit.MINUTES); + } + + void validate(boolean isDone) + { + for (Node.Id id : alive()) + { + Instance inst = instances.get(id); + if (removed.contains(id)) continue; // ignore removed nodes + AccordConfigurationService conf = inst.config; + TopologyManager tm = inst.topology; + for (long epoch = inst.epoch.getEpoch(); epoch <= current.epoch.getEpoch(); epoch++) + { + // validate config + EpochSnapshot snapshot = conf.getEpochSnapshot(epoch); + if (isDone) + { + Assertions.assertThat(snapshot).describedAs("node%s does not have epoch %d", id, epoch).isNotNull(); + Assertions.assertThat(snapshot.syncStatus).isEqualTo(AccordConfigurationService.SyncStatus.COMPLETED); + + // validate topology manager + Assertions.assertThat(tm.hasEpoch(epoch)).describedAs("node%s does not have epoch %d", id, epoch).isTrue(); + Ranges ranges = tm.globalForEpoch(epoch).ranges().mergeTouching(); + Ranges actual = tm.syncComplete(epoch).mergeTouching(); + Assertions.assertThat(actual).describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s", id, epoch, ranges.subtract(actual)).isEqualTo(ranges); + } + else + { + if (snapshot == null || snapshot.syncStatus != AccordConfigurationService.SyncStatus.COMPLETED) continue; + + if (!allSynced(epoch)) + continue; + + Assertions.assertThat(tm.hasEpoch(epoch)).describedAs("node%s does not have epoch %d", id, epoch).isTrue(); + Topology topology = tm.globalForEpoch(epoch); + Ranges ranges = topology.ranges().mergeTouching(); + Ranges actual = tm.syncComplete(epoch).mergeTouching(); + // TopologyManager defines syncComplete for an epoch as (epoch - 1).syncComplete. This means that an epoch has reached quorum, but will still miss ranges as previous epochs have not + if (!ranges.equals(actual) && tm.minEpoch() != epoch && !ranges.equals(tm.syncComplete(epoch - 1).mergeTouching())) + continue; + Assertions.assertThat(actual) + .describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s; peers=%s; previous epochs %s", id, epoch, ranges.subtract(actual), topology.nodes(), + LongStream.range(inst.epoch.getEpoch(), epoch + 1).mapToObj(e -> e + " -> " + conf.getEpochSnapshot(e).syncStatus + "(synced=" + globalSynced(e) + "): " + tm.syncComplete(e)).collect(Collectors.joining("\n"))) + .isEqualTo(ranges); + } + } + } + } + + String displayTopology() + { + List alive = alive(); + List> withToken = new ArrayList<>(alive.size()); + for (Node.Id n : alive) + withToken.add(Pair.create(n, instances.get(n).token)); + withToken.sort(Comparator.comparing(a -> a.right)); + StringBuilder sb = new StringBuilder(); + for (var p : withToken) + sb.append(p.left).append('\t').append(p.right).append('\n'); + return sb.toString(); + } + + @Override + public String toString() + { + return "Topology:\n" + displayTopology(); + } + + boolean hasWork() + { + return globalExecutor.hasWork(); + } + + boolean processOne() + { + boolean result = globalExecutor.processOne(); + checkFailures(); + return result; + } + + @SuppressWarnings("StatementWithEmptyBody") + void processAll() + { + while (processOne()) + { + } + } + + public void checkFailures() + { + if (Thread.interrupted()) + failures.add(new InterruptedException()); + if (failures.isEmpty()) return; + AssertionError error = new AssertionError("Unexpected exceptions found"); + failures.forEach(error::addSuppressed); + failures.clear(); + throw error; + } + + List alive() + { + ArrayList ids = new ArrayList<>(Sets.difference(instances.keySet(), removed)); + ids.sort(Comparator.naturalOrder()); + return ids; + } + + int aliveCount() + { + return instances.size() - removed.size(); + } + + private final NavigableSet partitions = new TreeSet<>(); + + private boolean partitioned(InetAddressAndPort self, InetAddressAndPort to) + { + return partitions.contains(new Connection(self, to)); + } + + private SimulatedMessageDelivery createMessaging(Node.Id id) + { + InetAddressAndPort address = address(id); + return new SimulatedMessageDelivery(address, + (self, msg, to) -> { + if (removed.contains(nodeId(self)) || removed.contains(nodeId(to))) + return Action.DROP; + if (!self.equals(to) && partitioned(self, to)) + return Action.DROP_PARTITIONED; + if (rs.decide(.01)) + return rs.nextBoolean() ? Action.DELIVER_WITH_FAILURE : Action.FAILURE; + return Action.DELIVER; + }, + SimulatedMessageDelivery.randomDelay(accord.utilsfork.RandomSource.wrap(rs.asJdkRandom())), + (to, msg) -> instances.get(nodeId(to)).reciver.recieve(msg), + (action, to, msg) -> logger.warn("{} message {}", action, msg), + scheduler::schedule, + failures::add); + } + + void addNode(Node.Id id, long token) + { + Invariants.checkState(!tokens.contains(token), "Attempted to add token %d for node %s but token is already taken", token, id); + Epoch epoch = Epoch.create(current.epoch.getEpoch() + 1); + + Instance instance = new Instance(id, token, epoch, createMessaging(id), fd); + instances.put(id, instance); + tokens.add(token); + + current = current.forceEpoch(epoch) + .withPlacements(DataPlacements.builder(2) + .with(meta, DataPlacement.empty()) + .with(replication_params, rebuildPlacements(epoch)) + .build()) + .withDirectory(current.directory.with(new NodeAddresses(address(id)), new Location("dc1", "r1"))); + notify(current); + } + + void removeNode(Node.Id pick) + { + Instance inst = Objects.requireNonNull(instances.get(pick), "Unknown id " + pick); + Invariants.checkState(!removed.contains(pick), "Can not remove node twice; node " + pick); + tokens.remove(inst.token); + removed.add(pick); + inst.stop(); + current = current.forceEpoch(Epoch.create(current.epoch.getEpoch() + 1)) + .withDirectory(current.directory.without(new NodeId(pick.id))); + + current = current.withPlacements(DataPlacements.builder(2) + .with(meta, DataPlacement.empty()) + .with(replication_params, rebuildPlacements(current.epoch)) + .build()); + notify(current); + } + + private DataPlacement rebuildPlacements(Epoch epoch) + { + DataPlacement.Builder builder = DataPlacement.builder(); + for (Node.Id inst : alive()) + for (Replica replica : instances.get(inst).replica()) + builder.withReadReplica(epoch, replica).withWriteReplica(epoch, replica); + return builder.build(); + } + + void bumpEpoch() + { + current = current.forceEpoch(Epoch.create(current.epoch.getEpoch() + 1)); + notify(current); + } + + private void notify(ClusterMetadata current) + { + Ranges ranges = AccordTopology.createAccordTopology(current).ranges().mergeTouching(); + if (!current.directory.isEmpty()) + Assertions.assertThat(ranges).hasSize(1); + ((StubClusterMetadataService) ClusterMetadataService.instance()).setMetadata(current); + for (Node.Id id : alive()) + { + Instance inst = instances.get(id); + inst.maybeStart(); + inst.config.maybeReportMetadata(current); + } + } + + @SuppressWarnings("SameParameterValue") + private AsyncChain schedule(long time, TimeUnit unit, Callable task) + { + return new AsyncChains.Head<>() + { + @Override + protected void start(BiConsumer callback) + { + scheduler.schedule(() -> { + T value; + try + { + value = task.call(); + } + catch (Throwable t) + { + callback.accept(null, t); + return; + } + callback.accept(value, null); + }, time, unit); + } + }; + } + + private enum Status { Init, Started} + private class Instance + { + private final Node.Id id; + private final long token; + private final AccordConfigurationService config; + private final SimulatedMessageDelivery messaging; + private final SimulatedMessageDelivery.SimulatedMessageReceiver reciver; + private final TopologyManager topology; + private final Epoch epoch; + private Status status = Status.Init; + + Instance(Node.Id node, long token, Epoch epoch, SimulatedMessageDelivery messagingService, IFailureDetector failureDetector) + { + this.id = node; + this.token = token; + this.epoch = epoch; + this.topology = new TopologyManager(SizeOfIntersectionSorter.SUPPLIER, id); + AccordConfigurationService.DiskStateManager instance = MockDiskStateManager.instance; + config = new AccordConfigurationService(node, messagingService, failureDetector, instance, scheduler); + config.registerListener(new ConfigurationService.Listener() + { + @Override + public AsyncResult onTopologyUpdate(Topology topology, boolean startSync) + { +// EpochReady ready = EpochReady.done(topology.epoch()); + AsyncResult metadata = schedule(rs.nextInt(1, 10), TimeUnit.SECONDS, (Callable) () -> null).beginAsResult(); + AsyncResult coordination = metadata.flatMap(ignore -> schedule(rs.nextInt(1, 10), TimeUnit.SECONDS, (Callable) () -> null)).beginAsResult(); + AsyncResult data = coordination.flatMap(ignore -> schedule(rs.nextInt(1, 10), TimeUnit.SECONDS, (Callable) () -> null)).beginAsResult(); + AsyncResult reads = data.flatMap(ignore -> schedule(rs.nextInt(1, 10), TimeUnit.SECONDS, (Callable) () -> null)).beginAsResult(); + EpochReady ready = new EpochReady(topology.epoch(), metadata, coordination, data, reads); + + topology().onTopologyUpdate(topology, () -> ready); + ready.coordination.addCallback(() -> topology().onEpochSyncComplete(id, topology.epoch())); + if (topology().minEpoch() == topology.epoch() && topology().epoch() != topology.epoch()) + return ready.coordination; + config.acknowledgeEpoch(ready, startSync); + return ready.coordination; + } + + @Override + public void onRemoteSyncComplete(Node.Id node, long epoch) + { + topology.onEpochSyncComplete(node, epoch); + } + + @Override + public void onRemoveNodes(long epoch, Collection removed) + { + topology.onRemoveNodes(epoch, removed); + } + + @Override + public void truncateTopologyUntil(long epoch) + { + topology.truncateTopologyUntil(epoch); + } + + @Override + public void onEpochClosed(Ranges ranges, long epoch) + { + topology.onEpochClosed(ranges, epoch); + } + + @Override + public void onEpochRedundant(Ranges ranges, long epoch) + { + topology.onEpochRedundant(ranges, epoch); + } + }); + + Map> handlers = new EnumMap<>(Verb.class); + //noinspection unchecked + handlers.put(Verb.ACCORD_SYNC_NOTIFY_REQ, msg -> AccordService.receive(messagingService, config, (Message>) (Message) msg)); + this.messaging = messagingService; + this.reciver = messagingService.receiver(new SimulatedMessageDelivery.SimpleVerbHandler(handlers)); + } + + void maybeStart() + { + if (status == Status.Init) + { + start(); + status = Status.Started; + } + } + + private void start() + { + config.start(); + } + + TopologyManager topology() + { + return topology; + } + + Collection replica() + { + InetAddressAndPort address = Cluster.address(id); + SortedSet lessThan = tokens.headSet(token); + if (lessThan.isEmpty()) + { + // wrap around + return Arrays.asList(new Replica(address, new LongToken(Long.MIN_VALUE), new LongToken(token), true), + new Replica(address, new LongToken(tokens.last()), new LongToken(Long.MIN_VALUE), true)); + } + + return Collections.singletonList(new Replica(address, new LongToken(lessThan.last()), new LongToken(token), true)); + } + + Set synced(long epoch) + { + if (epoch < this.epoch.getEpoch()) throw new IllegalArgumentException("Asked for epoch before this instance existed"); + EnumSet done = EnumSet.noneOf(EpochTracker.class); + EpochSnapshot snapshot = config.getEpochSnapshot(epoch); + if (snapshot != null && snapshot.syncStatus == AccordConfigurationService.SyncStatus.COMPLETED) + done.add(EpochTracker.configurationService); + if (topology.hasReachedQuorum(epoch)) + done.add(EpochTracker.topologyManager); + if (!config.syncPropagator().hasPending(epoch)) + done.add(EpochTracker.accordSyncPropagator); + return done; + } + + void stop() + { + messaging.stop(); + } + } + } + + private static class Connection implements Comparable + { + final InetAddressAndPort from, to; + + private Connection(InetAddressAndPort from, InetAddressAndPort to) + { + this.from = from; + this.to = to; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Connection that = (Connection) o; + return from.equals(that.from) && to.equals(that.to); + } + + @Override + public int hashCode() + { + return Objects.hash(from, to); + } + + @Override + public String toString() + { + return "Connection{" + "from=" + from + ", to=" + to + '}'; + } + + @Override + public int compareTo(Connection o) + { + int rc = from.compareTo(o.from); + if (rc == 0) + rc = to.compareTo(o.to); + return rc; + } + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/LoggingDiskStateManager.java b/test/unit/org/apache/cassandra/service/accord/LoggingDiskStateManager.java new file mode 100644 index 000000000000..7b8ce0e33581 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/LoggingDiskStateManager.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.local.Node; +import accord.primitives.Ranges; +import accord.topology.Topology; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; + +/** + * When trying to inspect the order in which disk state is modified, this class can aid by adding logging. This class + * mostly exists for testing to aid in debugging. + */ +@SuppressWarnings("unused") +@VisibleForTesting +public class LoggingDiskStateManager implements AccordConfigurationService.DiskStateManager { + private static final Logger logger = LoggerFactory.getLogger(LoggingDiskStateManager.class); + private final Node.Id self; + private final AccordConfigurationService.DiskStateManager delegate; + + public LoggingDiskStateManager(Node.Id self, AccordConfigurationService.DiskStateManager delegate) { + this.self = self; + this.delegate = delegate; + } + + @Override + public AccordKeyspace.EpochDiskState loadTopologies(AccordKeyspace.TopologyLoadConsumer consumer) { + logger.info("[node={}] Calling loadTopologies()", self); + return delegate.loadTopologies(consumer); + } + + @Override + public AccordKeyspace.EpochDiskState setNotifyingLocalSync(long epoch, Set pending, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling setNotifyingLocalSync({}, {}, {})", self, epoch, pending, diskState); + return delegate.setNotifyingLocalSync(epoch, pending, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState setCompletedLocalSync(long epoch, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling setCompletedLocalSync({}, {})", self, epoch, diskState); + return delegate.setCompletedLocalSync(epoch, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState markLocalSyncAck(Node.Id id, long epoch, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling markLocalSyncAck({}, {}, {})", self, id, epoch, diskState); + return delegate.markLocalSyncAck(id, epoch, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState saveTopology(Topology topology, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling saveTopology({}, {})", self, topology.epoch(), diskState); + return delegate.saveTopology(topology, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState markRemoteTopologySync(Node.Id id, long epoch, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling markRemoteTopologySync({}, {}, {})", self, id, epoch, diskState); + return delegate.markRemoteTopologySync(id, epoch, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState markClosed(Ranges ranges, long epoch, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling markClosed({}, {}, {})", self, ranges, epoch, diskState); + return delegate.markClosed(ranges, epoch, diskState); + } + + @Override + public AccordKeyspace.EpochDiskState truncateTopologyUntil(long epoch, AccordKeyspace.EpochDiskState diskState) { + logger.info("[node={}] Calling truncateTopologyUntil({}, {})", self, epoch, diskState); + return delegate.truncateTopologyUntil(epoch, diskState); + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/MockDiskStateManager.java b/test/unit/org/apache/cassandra/service/accord/MockDiskStateManager.java new file mode 100644 index 000000000000..9e3760263441 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/MockDiskStateManager.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import accord.local.Node; +import accord.primitives.Ranges; +import accord.topology.Topology; +import accord.utils.Invariants; + +import java.util.Set; + +public enum MockDiskStateManager implements AccordConfigurationService.DiskStateManager { + instance; + + @Override + public AccordKeyspace.EpochDiskState loadTopologies(AccordKeyspace.TopologyLoadConsumer consumer) { + return AccordKeyspace.EpochDiskState.EMPTY; + } + + @Override + public AccordKeyspace.EpochDiskState setNotifyingLocalSync(long epoch, Set pending, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + @Override + public AccordKeyspace.EpochDiskState setCompletedLocalSync(long epoch, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + @Override + public AccordKeyspace.EpochDiskState markLocalSyncAck(Node.Id id, long epoch, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + @Override + public AccordKeyspace.EpochDiskState saveTopology(Topology topology, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, topology.epoch()); + } + + @Override + public AccordKeyspace.EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + @Override + public AccordKeyspace.EpochDiskState markClosed(Ranges ranges, long epoch, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + @Override + public AccordKeyspace.EpochDiskState truncateTopologyUntil(long epoch, AccordKeyspace.EpochDiskState diskState) { + return maybeUpdateMaxEpoch(diskState, epoch); + } + + private static AccordKeyspace.EpochDiskState maybeUpdateMaxEpoch(AccordKeyspace.EpochDiskState diskState, long epoch) { + if (diskState.isEmpty()) + return AccordKeyspace.EpochDiskState.create(epoch); + Invariants.checkArgument(epoch >= diskState.minEpoch, "Epoch %d < %d (min)", epoch, diskState.minEpoch); + if (epoch > diskState.maxEpoch) + diskState = diskState.withNewMaxEpoch(epoch); + return diskState; + } +} diff --git a/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java index ceed706236b5..e3e471b550ca 100644 --- a/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java +++ b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeSet; @@ -99,7 +99,7 @@ public Sut createSut(State state) @Override public Gen> commands(State state) { - Map>, Integer> possible = new HashMap<>(); + Map>, Integer> possible = new LinkedHashMap<>(); possible.put(rs -> new Create(state.newRange(rs), SMALL_INT_GEN.nextInt(rs)), state.createWeight); possible.put(rs -> new Read(state.newRange(rs)), state.readWeight); possible.put(rs -> new KeyRead(IntKey.routing(state.tokenGen.nextInt(rs))), state.readWeight); @@ -108,15 +108,15 @@ public Sut createSut(State state) possible.put(ignore -> Clear.instance, state.clearWeight); if (!state.uniqRanges.isEmpty()) { - possible.put(rs -> new Read(rs.pick(state.uniqRanges)), state.readWeight); + possible.put(rs -> new Read(rs.pickOrderedSet(state.uniqRanges)), state.readWeight); possible.put(rs -> { - Range range = rs.pick(state.uniqRanges); + Range range = rs.pickOrderedSet(state.uniqRanges); int token = rs.nextInt(((IntKey.Routing) range.start()).key, ((IntKey.Routing) range.end()).key) + 1; return new KeyRead(IntKey.routing(token)); }, state.readWeight); - possible.put(rs -> new RangeRead(rs.pick(state.uniqRanges)), state.readWeight); - possible.put(rs -> new Update(rs.pick(state.uniqRanges), SMALL_INT_GEN.nextInt(rs)), state.updateWeight); - possible.put(rs -> new Delete(rs.pick(state.uniqRanges)), state.deleteWeight); + possible.put(rs -> new RangeRead(rs.pickOrderedSet(state.uniqRanges)), state.readWeight); + possible.put(rs -> new Update(rs.pickOrderedSet(state.uniqRanges), SMALL_INT_GEN.nextInt(rs)), state.updateWeight); + possible.put(rs -> new Delete(rs.pickOrderedSet(state.uniqRanges)), state.deleteWeight); } return Gens.oneOf(possible); } From 1439fe8d316d47447b8d32cb921f354b8f4a6db0 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Wed, 5 Jun 2024 08:57:05 +0200 Subject: [PATCH 123/340] Bring back Journal simulator (w/o Accord at least for now); add semaphore interceptor. Patch by Alex Petrov; reviewed by David Capwell for CASSANDRA-19695. --- .../test/AccordJournalSimulationTest.java | 331 +++++++++--------- 1 file changed, 165 insertions(+), 166 deletions(-) diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 22dd55bb31ea..53569af1b128 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -18,250 +18,249 @@ package org.apache.cassandra.simulator.test; import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import java.util.UUID; -import java.util.concurrent.CopyOnWriteArrayList; -import javax.annotation.Nullable; +import java.util.zip.Checksum; import com.google.common.collect.ImmutableMap; +import com.google.common.jimfs.Jimfs; -import accord.topology.TopologyUtils; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.AccordSpec; -import org.apache.cassandra.schema.*; -import org.junit.Ignore; +import org.apache.cassandra.config.Config; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.filesystem.ListenableFileSystem; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.journal.AsyncCallbacks; +import org.apache.cassandra.journal.Journal; +import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.journal.ValueSerializer; + +import org.junit.Assert; import org.junit.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.Utils; -import accord.api.Data; -import accord.api.RoutingKey; -import accord.api.Update; -import accord.api.Write; -import accord.local.Node; -import accord.messages.MessageType; -import accord.messages.PreAccept; -import accord.messages.TxnRequest; -import accord.primitives.FullKeyRoute; -import accord.primitives.FullRoute; -import accord.primitives.Keys; -import accord.primitives.Ranges; -import accord.primitives.Seekables; -import accord.primitives.Timestamp; -import accord.primitives.Txn; -import accord.primitives.TxnId; -import accord.topology.Topologies; -import org.apache.cassandra.concurrent.ExecutorFactory; -import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.ParameterizedClass; -import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.db.commitlog.CommitLog; -import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.io.util.Files; -import org.apache.cassandra.service.accord.AccordJournal; -import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.txn.TxnNamedRead; -import org.apache.cassandra.service.accord.txn.TxnQuery; -import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Isolated; import org.apache.cassandra.utils.concurrent.CountDownLatch; public class AccordJournalSimulationTest extends SimulationTestBase { @Test - @Ignore // TODO: re-enable - public void test() throws IOException - { - simulate(arr(() -> run()), - () -> check()); - } - - private static void run() + public void simpleRWTest() { - for (int i = 0; i < State.events; i++) - { - int finalI = i; - State.executor.execute(() -> State.append(finalI)); - } + simulate(arr(() -> { + ListenableFileSystem fs = new ListenableFileSystem(Jimfs.newFileSystem()); + File.unsafeSetFilesystem(fs); + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); // + DatabaseDescriptor.setCommitLogWriteDiskAccessMode(Config.DiskAccessMode.standard); + DatabaseDescriptor.initializeCommitLogDiskAccessMode(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + DatabaseDescriptor.setAccordJournalDirectory("/journal"); + new File("/journal").createDirectoriesIfNotExists(); - try - { - State.eventsDurable.await(); - State.logger.info("All events are durable done!"); - } - catch (InterruptedException e) - { - throw new AssertionError(e); - } + DatabaseDescriptor.setDumpHeapOnUncaughtException(false); - if (!State.exceptions.isEmpty()) - { - AssertionError error = new AssertionError("Exceptions found during test"); - State.exceptions.forEach(error::addSuppressed); - throw error; - } + Keyspace.setInitialized(); - State.journal.shutdown(); - State.logger.info("Run complete"); + State.journal = new Journal<>("AccordJournal", + new File("/journal"), + new AccordSpec.JournalSpec(), + new TestCallbacks(), + new IdentityKeySerializer(), + new IdentityValueSerializer()); + }), + () -> check()); } - private static void check() + public static void check() { - State.logger.info("Check starting"); - State.journal.start(null); // to avoid a while true deadlock + State.journal.start(); try { - for (int i = 0; i < State.events; i++) + final int count = 100; + for (int i = 0; i < count; i++) + { + int finalI = i; + State.executor.submit(() -> State.journal.asyncWrite("test" + finalI, "test" + finalI, Collections.singleton(1), null)); + } + + State.latch.await(); + + for (int i = 0; i < count; i++) { - TxnRequest event = State.journal.readMessage(State.toTxnId(i), MessageType.PRE_ACCEPT_REQ, PreAccept.class); - State.logger.info("Event {} -> {}", i, event); - if (event == null) - throw new AssertionError(String.format("Unable to read event %d", i)); + State.logger.debug("Reading {}", i); + Assert.assertEquals(State.journal.readFirst("test" + i), "test" + i); } - State.logger.info("Check complete"); + } + + catch (InterruptedException e) + { + throw new RuntimeException(e); } finally { State.journal.shutdown(); + + if (!State.thrown.isEmpty()) + { + AssertionError throwable = new AssertionError("Caught exceptions"); + for (Throwable t: State.thrown) + throwable.addSuppressed(t); + throw throwable; + } } } - @Isolated - public static class State + public static class TestCallbacks implements AsyncCallbacks { - private static final Logger logger = LoggerFactory.getLogger(State.class); - private static final String KEYSPACE = "test"; - static + @Override + public void onWrite(long segment, int position, int size, String key, String value, Object writeContext) { - Files.newGlobalInMemoryFileSystem(); - DatabaseDescriptor.clientWithDaemonConfig(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - DatabaseDescriptor.setAccordJournalDirectory("/journal"); - new File("/journal").createDirectoriesIfNotExists(); - DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); - DatabaseDescriptor.setDumpHeapOnUncaughtException(false); - - // in order to do journal.read, we need all this setup first! - Keyspace.setInitialized(); - Schema.instance.submit(SchemaTransformations.addKeyspace(KeyspaceMetadata.create(State.KEYSPACE, KeyspaceParams.simple(1)), true)); - Keyspace ks = Keyspace.open(State.KEYSPACE); - ks.initCfCustom(ColumnFamilyStore.createColumnFamilyStore(ks, TableMetadataRef.forOfflineTools(TableMetadata.builder(State.KEYSPACE, State.KEYSPACE) - .addPartitionKeyColumn("pk", Int32Type.instance) - .build()).get(), false)); - - try - { - CommitLog.instance.shutdownBlocking(); - } - catch (InterruptedException e) - { - // ignore - } + State.latch.decrement(); } - private static final ExecutorPlus executor = ExecutorFactory.Global.executorFactory().pooled("name", 10); - private static final AccordJournal journal = new AccordJournal(null, new AccordSpec.JournalSpec()); - private static final int events = 100; - private static final CountDownLatch eventsWritten = CountDownLatch.newCountDownLatch(events); - private static final CountDownLatch eventsDurable = CountDownLatch.newCountDownLatch(events); - private static final List exceptions = new CopyOnWriteArrayList<>(); - - static + + @Override + public void onWriteFailed(String key, String value, Object writeContext, Throwable cause) { - journal.start(null); + State.thrown.add(new IllegalStateException("Write failed for " + key)); + State.latch.decrement(); } - public static void append(int event) + @Override + public void onFlush(long segment, int position) { - TxnRequest request = toRequest(event); -// journal.appendMessageTest(request, executor, new AsyncWriteCallback() -// { -// @Override -// public void run() -// { -// durable(event); -// } -// -// @Override -// public void onFailure(Throwable error) -// { -// eventsDurable.decrement(); // to make sure we don't block forever -// exceptions.add(error); -// } -// }); - eventsWritten.decrement(); - logger.info("append({}); remaining {}", event, eventsWritten.count()); } - private static void durable(int event) + @Override + public void onFlushFailed(Throwable cause) { - eventsDurable.decrement(); - logger.info("durable({}); remaining {}", event, eventsDurable.count()); + State.thrown.add(new RuntimeException("Could not flush", cause)); } + } - private static TxnRequest toRequest(int event) + @Isolated + public static class IdentityValueSerializer implements ValueSerializer + { + @Override + public int serializedSize(String key, String value, int userVersion) { - TxnId id = toTxnId(event); - Ranges ranges = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(tableId), AccordRoutingKey.SentinelKey.max(tableId))); - Topologies topologies = Utils.topologies(TopologyUtils.initialTopology(new Node.Id[] { node}, ranges, 3)); - Keys keys = Keys.of(toKey(0)); - Txn txn = new Txn.InMemory(keys, new TxnRead(new TxnNamedRead[0], keys, null), TxnQuery.ALL, new NoopUpdate()); - FullRoute route = route(); - return new PreAccept(node, topologies, id, txn, route); + return TypeSizes.INT_SIZE + key.length(); } - private static TxnId toTxnId(int event) + @Override + public void serialize(String key, String value, DataOutputPlus out, int userVersion) throws IOException { - return TxnId.fromValues(1, event, 0, node); + out.writeInt(key.length()); + out.writeBytes(key); } - private static PartitionKey toKey(int a) + @Override + public String deserialize(String key, DataInputPlus in, int userVersion) throws IOException { - return new PartitionKey(tableId, Murmur3Partitioner.instance.decorateKey(ByteBufferUtil.bytes(a))); + int size = in.readInt(); + byte[] value = new byte[size]; + for (int i = 0; i < size; i++) + value[i] = in.readByte(); + + return new String(value); } + } - private static final TableId tableId = TableId.fromUUID(new UUID(0, 0)); - private static final Node.Id node = new Node.Id(0); + @Isolated + public static class IdentityKeySerializer implements KeySupport + { + private final byte aByte = 0xd; + @Override + public int serializedSize(int userVersion) + { + return 16; + } - private static FullRoute route() + @Override + public void serialize(String key, DataOutputPlus out, int userVersion) throws IOException { - return new FullKeyRoute(key, true, new RoutingKey[]{ key }); + int maxSize = 16 - TypeSizes.INT_SIZE; + if (key.length() > maxSize) + throw new IllegalStateException(); + + out.writeInt(key.length()); + out.writeBytes(key); + int remaining = maxSize - key.length(); + for (int i = 0; i < remaining; i++) + out.writeByte(aByte + i); } - private static final RoutingKey key = new AccordRoutingKey.TokenKey(tableId, new Murmur3Partitioner.LongToken(42)); - } + @Override + public String deserialize(DataInputPlus in, int userVersion) throws IOException + { + int size = in.readInt(); + byte[] key = new byte[size]; + for (int i = 0; i < size; i++) + key[i] = in.readByte(); + + int maxSize = 16 - TypeSizes.INT_SIZE; + int remaining = maxSize - size; + for (int i = 0; i < remaining; i++) + Assert.assertEquals(aByte + i, in.readByte()); + + return new String(key); + } - public static class NoopUpdate implements Update - { @Override - public Seekables keys() + public String deserialize(ByteBuffer buffer, int position, int userVersion) { - return null; + int size = buffer.getInt(); + byte[] key = new byte[size]; + for (int i = 0; i < size; i++) + key[i] = buffer.get(); + + int maxSize = 16 - TypeSizes.INT_SIZE; + int remaining = maxSize - size; + for (int i = 0; i < remaining; i++) + Assert.assertEquals(aByte + i, buffer.get()); + + return new String(key); } @Override - public Write apply(Timestamp executeAt, @Nullable Data data) + public void updateChecksum(Checksum crc, String key, int userVersion) { - return null; + crc.update(key.getBytes()); } @Override - public Update slice(Ranges ranges) + public int compareWithKeyAt(String key, ByteBuffer buffer, int position, int userVersion) { - return null; + throw new IllegalStateException(); } @Override - public Update merge(Update other) + public int compare(String o1, String o2) { - return null; + return o1.compareTo(o2); } } -} + + @Isolated + public static class State + { + private static final Logger logger = LoggerFactory.getLogger(State.class); + static Journal journal; + static CountDownLatch latch = CountDownLatch.newCountDownLatch(100); + static List thrown = new ArrayList<>(); + static ExecutorPlus executor = ExecutorFactory.Global.executorFactory().pooled("name", 10); + } +} \ No newline at end of file From 6d01bc25350ea6f438d35ef04e7db58d90eafb07 Mon Sep 17 00:00:00 2001 From: Youki Shiraishi Date: Tue, 23 Jul 2024 12:50:58 -0700 Subject: [PATCH 124/340] CEP-15 (Accord): When starting a transaction in a table where Accord is not enabled, should fail fast rather than fail with lack of ranges patch by Youki Shiraishi; reviewed by Caleb Rackliffe, David Capwell for CASSANDRA-19759 --- accord_demo.txt | 15 +- modules/accord | 2 +- .../config/CassandraRelevantProperties.java | 1 + .../cql3/statements/TransactionStatement.java | 6 + .../cassandra/journal/ActiveSegment.java | 34 +- .../cassandra/journal/AsyncCallbacks.java | 45 - .../org/apache/cassandra/journal/Flusher.java | 24 +- .../cassandra/journal/InMemoryIndex.java | 6 + .../org/apache/cassandra/journal/Index.java | 1 + .../org/apache/cassandra/journal/Journal.java | 136 +- .../apache/cassandra/journal/OnDiskIndex.java | 30 + .../cassandra/journal/RecordConsumer.java | 2 +- .../org/apache/cassandra/journal/Segment.java | 14 +- .../apache/cassandra/journal/Segments.java | 17 + .../cassandra/journal/StaticSegment.java | 8 +- .../cassandra/journal/ValueSerializer.java | 1 + .../service/accord/AccordCommandStore.java | 49 +- .../service/accord/AccordJournal.java | 1320 +++-------------- .../service/accord/AccordKeyspace.java | 206 +-- .../service/accord/AccordSafeCommand.java | 10 +- .../accord/AccordSafeCommandStore.java | 28 +- .../service/accord/AccordService.java | 7 +- .../service/accord/AccordVerbHandler.java | 2 +- .../cassandra/service/accord/IJournal.java | 16 +- .../cassandra/service/accord/JournalKey.java | 247 +++ .../service/accord/SavedCommand.java | 612 ++++++++ .../service/accord/async/AsyncOperation.java | 58 +- .../service/accord/async/ExecutionOrder.java | 390 ----- .../test/accord/AccordJournalTest.java | 89 ++ .../test/AccordJournalSimulationTest.java | 78 +- .../simulator/test/HarrySimulatorTest.java | 10 +- .../org/apache/cassandra/ServerTestUtils.java | 2 +- .../statements/TransactionStatementTest.java | 52 +- .../CompactionAccordIteratorsTest.java | 83 +- .../apache/cassandra/journal/JournalTest.java | 20 +- .../apache/cassandra/journal/TestParams.java | 2 +- .../accord/AccordCommandStoreTest.java | 36 +- .../service/accord/AccordCommandTest.java | 95 +- .../accord/AccordJournalOrderTest.java | 110 ++ .../service/accord/AccordJournalTest.java | 35 +- .../service/accord/AccordKeyspaceTest.java | 12 +- .../service/accord/AccordTestUtils.java | 22 + .../cassandra/service/accord/MockJournal.java | 213 +-- .../accord/SimulatedAccordCommandStore.java | 3 - .../service/accord/SimulatedDepsTest.java | 2 + .../accord/SimulatedMultiKeyAndRangeTest.java | 2 + ...ulatedRandomKeysWithRangeConflictTest.java | 2 + .../accord/async/AsyncOperationTest.java | 87 +- 48 files changed, 1923 insertions(+), 2319 deletions(-) delete mode 100644 src/java/org/apache/cassandra/journal/AsyncCallbacks.java create mode 100644 src/java/org/apache/cassandra/service/accord/JournalKey.java create mode 100644 src/java/org/apache/cassandra/service/accord/SavedCommand.java delete mode 100644 src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java diff --git a/accord_demo.txt b/accord_demo.txt index b8834515221c..63b7d21201d8 100644 --- a/accord_demo.txt +++ b/accord_demo.txt @@ -1,19 +1,14 @@ - ccm create accord-cql-poc -n 3 ccm start -bin/cqlsh -e "create keyspace ks with replication={'class':'SimpleStrategy', 'replication_factor':3};" -bin/cqlsh -e "create table ks.tbl1 (k int primary key, v int);" -bin/cqlsh -e "create table ks.tbl2 (k int primary key, v int);" - -bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7100 createepochunsafe -bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7200 createepochunsafe -bin/nodetool -h 0000:0000:0000:0000:0000:ffff:7f00:0001 -p 7300 createepochunsafe +bin/cqlsh -e "CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor':3};" +bin/cqlsh -e "CREATE TABLE ks.tbl1 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full';" +bin/cqlsh -e "CREATE TABLE ks.tbl2 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full';" BEGIN TRANSACTION LET row1 = (SELECT * FROM ks.tbl1 WHERE k = 1); SELECT row1.v; IF row1 IS NULL THEN - INSERT INTO ks.tbl1 (k, v) VALUES (1, 2); + INSERT INTO ks.tbl2 (k, v) VALUES (1, 2); END IF -COMMIT TRANSACTION; \ No newline at end of file +COMMIT TRANSACTION; diff --git a/modules/accord b/modules/accord index 694ae39e2e00..4c870dc9b561 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 694ae39e2e00075bdabd47632dced0db12a9981d +Subproject commit 4c870dc9b561a841ea9b923ff739953adcc00325 diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 2cb835a35fb6..af9462bac7aa 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -214,6 +214,7 @@ public enum CassandraRelevantProperties DRAIN_EXECUTOR_TIMEOUT_MS("cassandra.drain_executor_timeout_ms", convertToString(TimeUnit.MINUTES.toMillis(5))), DROP_OVERSIZED_READ_REPAIR_MUTATIONS("cassandra.drop_oversized_readrepair_mutations"), DTEST_ACCORD_ENABLED("jvm_dtest.accord.enabled", "true"), + DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED("jvm_dtest.accord.journal_sanity_check_enabled", "false"), DTEST_API_LOG_TOPOLOGY("cassandra.dtest.api.log.topology"), /** This property indicates if the code is running under the in-jvm dtest framework */ DTEST_IS_IN_JVM_DTEST("org.apache.cassandra.dtest.is_in_jvm_dtest"), diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index ed093d833ce5..2adb5572e852 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -101,6 +101,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; + public static final String TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE = "Accord transactions are disabled on table (See transactional_mode in table options); %s statement %s"; public static final String NO_COUNTERS_IN_TXNS_MESSAGE = "Counter columns cannot be accessed within a transaction; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; @@ -529,6 +530,8 @@ public CQLStatement prepare(ClientState state) SelectStatement prepared = select.prepare(bindVariables); + if (!prepared.table.isAccordEnabled()) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); if (prepared.table.isCounter()) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); @@ -547,6 +550,8 @@ public CQLStatement prepare(ClientState state) { SelectStatement prepared = select.prepare(bindVariables); + if (!prepared.table.isAccordEnabled()) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); if (prepared.table.isCounter()) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); @@ -572,6 +577,7 @@ public CQLStatement prepare(ClientState state) ModificationStatement.Parsed parsed = updates.get(i); ModificationStatement prepared = parsed.prepare(state, bindVariables); + checkTrue(prepared.metadata().isAccordEnabled(), TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index 1fd2e4dd1a29..ebbd672b8057 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -260,6 +260,16 @@ boolean shouldFlush() return lastFlushedOffset < allocatePosition; } + public boolean isFlushed(long position) + { + return lastFlushedOffset >= position; + } + + public long lastFlushedOffset() + { + return lastFlushedOffset; + } + /** * Possibly force a disk flush for this segment file. * TODO FIXME: calls from outside Flusher + callbacks @@ -329,14 +339,9 @@ private void fsyncInternal() } } - boolean isCompletedAndFullyFlushed(int syncedOffset) - { - return syncedOffset >= endOfBuffer; - } - - boolean isCompletedAndFullyFsynced() + boolean isFullyFlushed() { - return lastFsyncOffset >= endOfBuffer; + return lastFsyncOffset >= allocatePosition.get(); } /** @@ -427,23 +432,22 @@ final class Allocation private final OpOrder.Group appendOp; private final ByteBuffer buffer; private final int position; - private final int size; Allocation(OpOrder.Group appendOp, ByteBuffer buffer) { this.appendOp = appendOp; this.buffer = buffer; this.position = buffer.position(); - this.size = buffer.remaining(); } - void write(K id, ByteBuffer record, Set hosts) + RecordPointer write(K id, ByteBuffer record, Set hosts) { try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) { EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); index.update(id, position); metadata.update(hosts); + return new RecordPointer(descriptor.timestamp, position); } catch (IOException e) { @@ -455,14 +459,18 @@ void write(K id, ByteBuffer record, Set hosts) } } - void asyncWrite(K id, V record, ByteBuffer bytes, Set hosts, Object writeContext, AsyncCallbacks callbacks) throws IOException + // Variant of write that does not allocate/return a record pointer + void writeInternal(K id, ByteBuffer record, Set hosts) { try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) { - EntrySerializer.write(id, bytes, hosts, keySupport, out, descriptor.userVersion); + EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); index.update(id, position); metadata.update(hosts); - callbacks.onWrite(descriptor.timestamp, position, size, id, record, writeContext); + } + catch (IOException e) + { + throw new JournalWriteError(descriptor, file, e); } finally { diff --git a/src/java/org/apache/cassandra/journal/AsyncCallbacks.java b/src/java/org/apache/cassandra/journal/AsyncCallbacks.java deleted file mode 100644 index 0fb1af39c604..000000000000 --- a/src/java/org/apache/cassandra/journal/AsyncCallbacks.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.cassandra.journal; - -public interface AsyncCallbacks -{ - /** - * Invoked once an entry has been written to the file, and indexes have been updated, but before it - * has been flushed to disk. Invoked from the writer thread. Execution order of onWrite() callbacks - * with regard to each other is undefined. - */ - void onWrite(long segment, int position, int size, K key, V value, Object writeContext); - - /** - * Invoked when anything goes wrong with writing the entry - anywhere from serialization to writing to the file, - * to requesting the flush. - */ - void onWriteFailed(K key, V value, Object writeContext, Throwable cause); - - /** - * Invoked after {@link Flusher} successfully flushes a segment or multiple segments to disk. - * Invocation of this callback implies that any segments older than {@code segment} have been - * completed and also flushed. - * Invocation of this callback also implies that all {@link #onWrite(long, int, int, Object, Object, Object)} - * callbacks for all entries earlier than (segment, position) have finished execution. - */ - void onFlush(long segment, int position); - - void onFlushFailed(Throwable cause); -} diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index 3f2f42859a38..52cf89f2fca1 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -60,7 +60,6 @@ final class Flusher private final Journal journal; private final Params params; - private final AsyncCallbacks callbacks; private volatile Interruptible flushExecutor; private volatile Interruptible fsyncExecutor; @@ -84,14 +83,15 @@ final class Flusher private final FlushMethod syncFlushMethod; private final FlushMethod asyncFlushMethod; + private final Callbacks callbacks; - Flusher(Journal journal) + Flusher(Journal journal, Callbacks callbacks) { this.journal = journal; this.params = journal.params; - this.callbacks = journal.callbacks; this.syncFlushMethod = syncFlushMethod(params); this.asyncFlushMethod = asyncFlushMethod(params); + this.callbacks = callbacks; } void start() @@ -304,6 +304,9 @@ private void doFlush(long startedAt) throws InterruptedException { if (synchronousFsync) fsyncFinishedFor = startedAt; else fSyncRunnable.doNoOpFlush(startedAt); + + if (current != null) + callbacks.onFlush(current.descriptor.timestamp, (int) current.lastFlushedOffset()); return; } @@ -509,4 +512,17 @@ long writtenEntries() { return written.get(); } -} + + public interface Callbacks + { + /** + * Invoked after {@link Flusher} successfully flushes a segment or multiple segments to disk. + * Invocation of this callback implies that any segments older than {@code segment} have been + * completed and also flushed. + * callbacks for all entries earlier than (segment, position) have finished execution. + */ + void onFlush(long segment, int position); + + void onFlushFailed(Throwable cause); + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java index e4ec73a679c5..1ff4a28d7a1b 100644 --- a/src/java/org/apache/cassandra/journal/InMemoryIndex.java +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -102,6 +102,12 @@ public int lookUpFirst(K id) return offests.length == 0 ? -1 : offests[0]; } + @Override + int[] lookUpAll(K id) + { + return lookUp(id); + } + public void persist(Descriptor descriptor) { File tmpFile = descriptor.tmpFileFor(Component.INDEX); diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java index ef75d867e193..cd2b69f2e426 100644 --- a/src/java/org/apache/cassandra/journal/Index.java +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -51,6 +51,7 @@ abstract class Index implements Closeable * @return the first offset into the segment, or -1 is none were found */ abstract int lookUpFirst(K id); + abstract int[] lookUpAll(K id); /** * @return the first (smallest) id in the index diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index aa61e5aca5d9..37fd1fa92bee 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -23,15 +23,18 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.LockSupport; import java.util.function.BooleanSupplier; import java.util.function.Function; import java.util.function.Predicate; import java.util.zip.CRC32; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,6 +56,7 @@ import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Simulate; import org.apache.cassandra.utils.concurrent.WaitQueue; +import org.jctools.queues.MpscUnboundedArrayQueue; import static java.lang.String.format; import static java.util.Comparator.comparing; @@ -88,7 +92,6 @@ public class Journal implements Shutdownable final String name; final File directory; final Params params; - final AsyncCallbacks callbacks; final KeySupport keySupport; final ValueSerializer valueSerializer; @@ -112,31 +115,95 @@ public class Journal implements Shutdownable private final WaitQueue segmentPrepared = newWaitQueue(); private final WaitQueue allocatorThreadWaitQueue = newWaitQueue(); private final BooleanSupplier allocatorThreadWaitCondition = () -> (availableSegment == null); + private final FlusherCallbacks flusherCallbacks; SequentialExecutorPlus closer; //private final Set invalidations = Collections.newSetFromMap(new ConcurrentHashMap<>()); + private class FlusherCallbacks implements Flusher.Callbacks + { + private final MpscUnboundedArrayQueue waitingFor = new MpscUnboundedArrayQueue<>(256); + private List drained = new ArrayList<>(); + + @Override + public void onFlush(long segment, int position) + { + waitingFor.drain(drained::add); + List remaining = new ArrayList<>(); + for (WaitingFor wait : drained) + { + if (wait.segment == segment && wait.position <= position) + wait.run(); + else + remaining.add(wait); + } + drained = remaining; + } + + @Override + public void onFlushFailed(Throwable cause) + { + // TODO: panic + } + + private void submit(RecordPointer pointer, Runnable runnable) + { + if (isFlushed(pointer)) + runnable.run(); + else + { + waitingFor.add(new WaitingFor(pointer.segment, pointer.position, runnable)); + flusher.requestExtraFlush(); + } + } + } + + private static class WaitingFor extends RecordPointer implements Runnable + { + private final Runnable onFlush; + + public WaitingFor(long segment, int position, Runnable onFlush) + { + super(segment, position); + this.onFlush = onFlush; + } + + public void run() + { + onFlush.run(); + } + } + public Journal(String name, File directory, Params params, - AsyncCallbacks callbacks, KeySupport keySupport, ValueSerializer valueSerializer) { this.name = name; this.directory = directory; this.params = params; - this.callbacks = callbacks; this.keySupport = keySupport; this.valueSerializer = valueSerializer; this.metrics = new Metrics<>(name); - this.flusher = new Flusher<>(this); + this.flusherCallbacks = new FlusherCallbacks(); + this.flusher = new Flusher<>(this, flusherCallbacks); //this.invalidator = new Invalidator<>(this); //this.compactor = new Compactor<>(this); } + public boolean isFlushed(RecordPointer recordPointer) + { + return segments.get().isFlushed(recordPointer); + } + + public void onFlush(RecordPointer recordPointer, Runnable runnable) + { + flusherCallbacks.submit(recordPointer, runnable); + } + public void start() { metrics.register(flusher); @@ -267,6 +334,34 @@ public V readFirst(K id) return null; } + // TODO: This should be improved with new index that should take better care of handling multiple items + public List readAll(K id) + { + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + List res = new ArrayList<>(2); + try (ReferencedSegments segments = selectAndReference(id)) + { + for (Segment segment : segments.all()) + { + segment.readAll(id, holder, () -> { + try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) + { + Invariants.checkState(Objects.equals(holder.key, id), + "%s != %s", holder.key, id); + res.add(valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion)); + holder.clear(); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + }); + } + } + return res; + } + /** * Looks up a record by the provided id, if the value satisfies the provided condition. *

      @@ -371,13 +466,13 @@ public Set test(Set test) * @param record the record to store * @param hosts hosts expected to invalidate the record */ - public void write(K id, V record, Set hosts) + public void blockingWrite(K id, V record, Set hosts) { try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { valueSerializer.serialize(id, record, dob, params.userVersion()); ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); - alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); + alloc.writeInternal(id, dob.unsafeGetBufferAndFlip(), hosts); flusher.waitForFlush(alloc); } catch (IOException e) @@ -397,19 +492,22 @@ public void write(K id, V record, Set hosts) * @param record the record to store * @param hosts hosts expected to invalidate the record */ - public void asyncWrite(K id, V record, Set hosts, Object writeContext) + public RecordPointer asyncWrite(K id, V record, Set hosts) { + RecordPointer recordPointer; try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { valueSerializer.serialize(id, record, dob, params.userVersion()); ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); - alloc.asyncWrite(id, record, dob.unsafeGetBufferAndFlip(), hosts, writeContext, callbacks); + recordPointer = alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); flusher.asyncFlush(alloc); } - catch (Throwable e) + catch (IOException e) { - callbacks.onWriteFailed(id, record, writeContext, e); + // exception during record serialization into the scratch buffer + throw new RuntimeException(e); } + return recordPointer; } private ActiveSegment.Allocation allocate(int entrySize, Set hosts) @@ -756,6 +854,17 @@ public void replayStaticSegments(RecordConsumer consumer) segment.forEachRecord(consumer); } + @VisibleForTesting + public void closeCurrentSegmentForTesting() + { + ActiveSegment segment = currentSegment; + advanceSegment(segment); + while (!segments().isSwitched(segment)) + { + LockSupport.parkNanos(1000); + } + } + /* * Static helper methods used by journal components */ @@ -815,4 +924,11 @@ private String maybeAddDiskSpaceContext(String message) "Check %s to see if not enough free space is the reason for this error.", message, segmentSize, availableDiskSpace, directory); } + + @VisibleForTesting + public void truncateForTesting() + { + advanceSegment(null); + segments.set(Segments.none()); + } } diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java index 2bc40c6a5e99..4cbb3d4e5772 100644 --- a/src/java/org/apache/cassandra/journal/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -241,6 +241,36 @@ public int lookUpFirst(K id) return keyIndex < 0 ? -1 : offsetAtIndex(keyIndex); } + @Override + public int[] lookUpAll(K id) + { + if (!mayContainId(id)) + return new int[0]; + + int start = binarySearch(id); + int firstKeyIndex = start; + + for (int i = firstKeyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) + firstKeyIndex = i; + + if (firstKeyIndex < 0) + return new int[0]; + + int lastKeyIndex = start; + + for (int i = lastKeyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) + lastKeyIndex = i; + + int[] all = new int[lastKeyIndex - firstKeyIndex + 1]; + int idx = firstKeyIndex; + for (int i = 0; i < all.length; i++) + { + all[i] = offsetAtIndex(idx); + idx++; + } + return all; + } + private K keyAtIndex(int index) { return keySupport.deserialize(buffer, FILE_PREFIX_SIZE + index * ENTRY_SIZE, descriptor.userVersion); diff --git a/src/java/org/apache/cassandra/journal/RecordConsumer.java b/src/java/org/apache/cassandra/journal/RecordConsumer.java index 2a1adb9fa42d..e16194001dd2 100644 --- a/src/java/org/apache/cassandra/journal/RecordConsumer.java +++ b/src/java/org/apache/cassandra/journal/RecordConsumer.java @@ -24,5 +24,5 @@ @FunctionalInterface public interface RecordConsumer { - void accept(K key, ByteBuffer buffer, IntHashSet hosts, int userVersion); + void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion); } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index b9c060d9153e..6700cb144579 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -46,6 +46,7 @@ abstract class Segment implements Closeable, RefCounted> abstract Index index(); abstract boolean isActive(); + abstract boolean isFlushed(long position); boolean isStatic() { return !isActive(); } abstract ActiveSegment asActive(); @@ -65,7 +66,7 @@ boolean readFirst(K id, RecordConsumer consumer) if (read(offset, into)) { Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); - consumer.accept(id, into.value, into.hosts, descriptor.userVersion); + consumer.accept(descriptor.timestamp, offset, id, into.value, into.hosts, descriptor.userVersion); return true; } return false; @@ -80,5 +81,16 @@ boolean readFirst(K id, EntrySerializer.EntryHolder into) return true; } + void readAll(K id, EntrySerializer.EntryHolder into, Runnable onEntry) + { + int[] all = index().lookUpAll(id); + + for (int i = 0; i < all.length; i++) + { + Invariants.checkState(read(all[i], into), "Read should always return true"); + onEntry.run(); + } + } + abstract boolean read(int offset, EntrySerializer.EntryHolder into); } diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index ca5ca47b2b5c..18dfc3bdaf9c 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -98,6 +98,15 @@ void selectActive(long maxTimestamp, Collection> into) into.add(segment.asActive()); } + boolean isSwitched(ActiveSegment active) + { + for (Segment segment : segments.values()) + if (!segment.isActive() && active.descriptor.equals(segment.descriptor)) + return true; + + return false; + } + ActiveSegment oldestActive() { Segment oldest = null; @@ -171,6 +180,14 @@ public void close() } } + boolean isFlushed(RecordPointer recordPointer) + { + Segment segment = segments.get(recordPointer.segment); + if (null == segment) + throw new IllegalArgumentException("Can not reference segment " + recordPointer.segment); + return segment.isFlushed(recordPointer.position); + } + ReferencedSegment selectAndReference(long segmentTimestamp) { Segment segment = segments.get(segmentTimestamp); diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index f3feaefa627b..a25fab04836f 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -182,6 +182,12 @@ boolean isActive() return false; } + @Override + boolean isFlushed(long position) + { + return true; + } + @Override ActiveSegment asActive() { @@ -221,7 +227,7 @@ void forEachRecord(RecordConsumer consumer) { while (reader.advance()) { - consumer.accept(reader.id(), reader.record(), reader.hosts(), descriptor.userVersion); + consumer.accept(descriptor.timestamp, reader.offset(), reader.id(), reader.record(), reader.hosts(), descriptor.userVersion); } } } diff --git a/src/java/org/apache/cassandra/journal/ValueSerializer.java b/src/java/org/apache/cassandra/journal/ValueSerializer.java index a6a2c7d452ca..610770ca66d9 100644 --- a/src/java/org/apache/cassandra/journal/ValueSerializer.java +++ b/src/java/org/apache/cassandra/journal/ValueSerializer.java @@ -24,6 +24,7 @@ public interface ValueSerializer { + // TODO (required): this is completely unused in Journal int serializedSize(K key, V value, int userVersion); void serialize(K key, V value, DataOutputPlus out, int userVersion) throws IOException; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 0f33f04d9273..47417fef6e51 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -18,7 +18,9 @@ package org.apache.cassandra.service.accord; +import java.util.Collections; import java.util.IdentityHashMap; +import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Objects; @@ -47,8 +49,7 @@ import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.SafeCommandStore; -import accord.local.SerializerSupport.MessageProvider; -import accord.messages.Message; +import accord.primitives.Keys; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Timestamp; @@ -66,7 +67,6 @@ import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation; -import org.apache.cassandra.service.accord.async.ExecutionOrder; import org.apache.cassandra.service.accord.events.CacheEvents; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -76,7 +76,6 @@ public class AccordCommandStore extends CommandStore implements CacheSize { private static final Logger logger = LoggerFactory.getLogger(AccordCommandStore.class); - private static final boolean CHECK_THREADS = CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.getBoolean(); private static long getThreadId(ExecutorService executor) @@ -101,7 +100,6 @@ private static long getThreadId(ExecutorService executor) public final String loggingId; private final IJournal journal; private final ExecutorService executor; - private final ExecutionOrder executionOrder; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; private final AccordStateCache.Instance timestampsForKeyCache; @@ -206,7 +204,6 @@ public AccordCommandStore(int id, this.journal = journal; loggingId = String.format("[%s]", id); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); - executionOrder = new ExecutionOrder(); threadId = getThreadId(executor); stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20, cacheMetrics); commandCache = @@ -214,7 +211,7 @@ public AccordCommandStore(int id, AccordSafeCommand.class, AccordSafeCommand::new, this::loadCommand, - this::saveCommand, + this::appendToKeyspace, this::validateCommand, AccordObjectSizes::command); registerJfrListener(id, commandCache, "Command"); @@ -333,22 +330,33 @@ public AccordStateCache.Instance { return commandsForKeyCache; } - Command loadCommand(TxnId txnId) + + @Nullable + @VisibleForTesting + public Runnable appendToKeyspace(Command before, Command after) { - return AccordKeyspace.loadCommand(this, txnId); + if (after.keysOrRanges() != null && after.keysOrRanges() instanceof Keys) + return null; + + Mutation mutation = AccordKeyspace.getCommandMutation(this.id, before, after, nextSystemTimestampMicros()); + + // TODO (required): make sure we test recovering when this has failed to be persisted + if (null != mutation) + return mutation::applyUnsafe; + + return null; } @Nullable - Runnable saveCommand(Command before, Command after) + @VisibleForTesting + public void appendToLog(Command before, Command after, Runnable runnable) { - Mutation mutation = AccordKeyspace.getCommandMutation(id, before, after, nextSystemTimestampMicros()); - // TODO (required): make sure we test recovering when this has failed to be persisted - return null != mutation ? mutation::applyUnsafe : null; + journal.appendCommand(id, Collections.singletonList(SavedCommand.SavedDiff.diff(before, after)), null, runnable); } boolean validateCommand(TxnId txnId, Command evicting) { - Command reloaded = AccordKeyspace.unsafeLoadCommand(this, txnId); + Command reloaded = loadCommand(txnId); return (evicting == null && reloaded == null) || (evicting != null && reloaded != null && reloaded.isEqualOrFuller(evicting)); } @@ -450,11 +458,6 @@ ProgressLog progressLog() return progressLog; } - public ExecutionOrder executionOrder() - { - return executionOrder; - } - @Override public AsyncChain execute(PreLoadContext preLoadContext, Consumer consumer) { @@ -557,14 +560,14 @@ protected void setRedundantBefore(RedundantBefore newRedundantBefore) public NavigableMap bootstrapBeganAt() { return super.bootstrapBeganAt(); } public NavigableMap safeToRead() { return super.safeToRead(); } - MessageProvider makeMessageProvider(TxnId txnId) + public void appendCommands(List commands, List sanityCheck, Runnable onFlush) { - return journal.makeMessageProvider(txnId); + journal.appendCommand(id, commands, sanityCheck, onFlush); } @VisibleForTesting - public void appendToJournal(Message message) + public Command loadCommand(TxnId txnId) { - journal.appendMessageBlocking(message); + return journal.loadCommand(id, txnId); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index f9daf2354d68..27e838cbcf08 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -18,70 +18,47 @@ package org.apache.cassandra.service.accord; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; -import java.util.function.Predicate; -import java.util.zip.Checksum; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.Multimap; import com.google.common.primitives.Ints; - -import accord.messages.ApplyThenWaitUntilApplied; -import org.agrona.collections.Long2ObjectHashMap; -import org.agrona.collections.LongArrayList; -import org.agrona.collections.ObjectHashSet; -import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.local.Command; import accord.local.Node; -import accord.local.Node.Id; -import accord.local.SerializerSupport; import accord.messages.AbstractEpochRequest; -import accord.messages.Accept; -import accord.messages.Apply; -import accord.messages.BeginRecovery; import accord.messages.Commit; import accord.messages.LocalRequest; import accord.messages.Message; import accord.messages.MessageType; -import accord.messages.PreAccept; -import accord.messages.Propagate; import accord.messages.ReplyContext; import accord.messages.Request; import accord.messages.TxnRequest; -import accord.primitives.Ballot; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; -import accord.utils.MapReduceConsume; -import org.apache.cassandra.concurrent.Interruptible; +import org.agrona.collections.Long2ObjectHashMap; +import org.agrona.collections.LongArrayList; import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; -import org.apache.cassandra.concurrent.SequentialExecutorPlus; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.journal.AsyncCallbacks; import org.apache.cassandra.journal.Journal; -import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.Params; import org.apache.cassandra.journal.RecordPointer; import org.apache.cassandra.journal.ValueSerializer; @@ -102,11 +79,8 @@ import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; -import org.apache.cassandra.utils.ByteArrayUtil; import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.utils.concurrent.Semaphore; -import org.apache.cassandra.utils.vint.VIntCoding; -import org.jctools.queues.SpscLinkedQueue; +import org.apache.cassandra.utils.concurrent.Condition; import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; import static accord.messages.MessageType.ACCEPT_REQ; @@ -130,58 +104,41 @@ import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; -import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; -import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; -import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; -import static org.apache.cassandra.db.TypeSizes.INT_SIZE; -import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; import static org.apache.cassandra.service.accord.serializers.ReadDataSerializers.applyThenWaitUntilApplied; -import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; -import static org.apache.cassandra.utils.CollectionSerializers.serializeList; -import static org.apache.cassandra.utils.CollectionSerializers.serializedListSize; -import static org.apache.cassandra.utils.concurrent.Semaphore.newSemaphore; -import static org.apache.cassandra.utils.vint.VIntCoding.computeUnsignedVIntSize; public class AccordJournal implements IJournal, Shutdownable { - private static final Logger logger = LoggerFactory.getLogger(AccordJournal.class); + static + { + // make noise early if we forget to update our version mappings + Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_51, "Expected current version to be %d but given %d", MessagingService.VERSION_51, MessagingService.current_version); + } - private static final boolean LOG_MESSAGE_PROVIDER = false; + private static final Logger logger = LoggerFactory.getLogger(AccordJournal.class); private static final Set SENTINEL_HOSTS = Collections.singleton(0); - private static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[21]); + static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[23]); - private final File directory; - private final Journal journal; + public final Journal journal; private final AccordEndpointMapper endpointMapper; - /** - * A cache of deserialized journal records we keep to avoid fetching them from log when free memory allows it. - * TODO (expected, performance): cap memory used for cached records - */ - private final NonBlockingHashMap cachedRecords = new NonBlockingHashMap<>(); + private final DelayedRequestProcessor delayedRequestProcessor = new DelayedRequestProcessor(); Node node; enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } private volatile Status status = Status.INITIALIZED; - private final FrameAggregator frameAggregator = new FrameAggregator(); - private final FrameApplicator frameApplicator = new FrameApplicator(); - @VisibleForTesting public AccordJournal(AccordEndpointMapper endpointMapper, Params params) { - this.directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); - this.journal = new Journal<>("AccordJournal", directory, params, new JournalCallbacks(), Key.SUPPORT, RECORD_SERIALIZER); + File directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); + this.journal = new Journal<>("AccordJournal", directory, params, JournalKey.SUPPORT, RECORD_SERIALIZER); this.endpointMapper = endpointMapper; } @@ -190,9 +147,8 @@ public AccordJournal start(Node node) Invariants.checkState(status == Status.INITIALIZED); this.node = node; status = Status.STARTING; - frameApplicator.start(); - frameAggregator.start(); journal.start(); + delayedRequestProcessor.start(); status = Status.STARTED; return this; } @@ -208,9 +164,8 @@ public void shutdown() { Invariants.checkState(status == Status.STARTED); status = Status.TERMINATING; + delayedRequestProcessor.runOnce(); journal.shutdown(); - frameAggregator.shutdown(); - frameApplicator.shutdown(); status = Status.TERMINATED; } @@ -226,7 +181,7 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted { try { - ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(journal, frameAggregator, frameApplicator)); + ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(journal)); return true; } catch (TimeoutException e) @@ -235,228 +190,194 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted } } - /** - * Auxiliary records are journal entries that aren't Accord protocol requests - such as {@link FrameRecord}. - */ - void appendAuxiliaryRecord(AuxiliaryRecord record, Object context) - { - Key key = new Key(record.timestamp, record.type()); - journal.asyncWrite(key, record, SENTINEL_HOSTS, context); - } - /** * Accord protocol messages originating from remote nodes. */ - public void appendRemoteRequest(Request request, ResponseContext context) + public void processRemoteRequest(Request request, ResponseContext context) { - Type type = Type.fromMessageType(request.type()); - Key key = new Key(type.txnId(request), type); - journal.asyncWrite(key, request, SENTINEL_HOSTS, context); + RemoteRequestContext requestContext = RemoteRequestContext.forLive(request, context); + if (node.topology().hasEpoch(request.waitForEpoch())) + requestContext.process(node, endpointMapper); + else + delayedRequestProcessor.delay(requestContext); } /** * Accord protocol messages originating from local node, e.g. Propagate. */ - public void appendLocalRequest(LocalRequest request, BiConsumer callback) - { - Type type = Type.fromMessageType(request.type()); - Key key = new Key(type.txnId(request), type); - journal.asyncWrite(key, request, SENTINEL_HOSTS, callback); + @SuppressWarnings("rawtypes, unchecked") + public void processLocalRequest(LocalRequest request, BiConsumer callback) + { + LocalRequestContext requestContext = LocalRequestContext.create(request, callback); + if (node.topology().hasEpoch(request.waitForEpoch())) + request.process(node, requestContext.callback); + else + delayedRequestProcessor.delay(requestContext); } - @VisibleForTesting @Override - public void appendMessageBlocking(Message message) + public Command loadCommand(int commandStoreId, TxnId txnId) { - Type type = Type.fromMessageType(message.type()); - Key key = new Key(type.txnId(message), type); - journal.write(key, message, SENTINEL_HOSTS); + List diffs = loadDiffs(commandStoreId, txnId); + if (diffs.isEmpty()) + return null; + return SavedCommand.reconstructFromDiff(diffs); } @VisibleForTesting - public M readMessage(TxnId txnId, MessageType messageType, Class clazz) - { - for (Type type : Type.synonymousTypesFromMessageType(messageType)) - { - M message = clazz.cast(journal.readFirst(new Key(txnId, type))); - if (null != message) return message; - } - return null; - } - - private M readMessage(TxnId txnId, MessageType messageType, Class clazz, Predicate condition) + public List loadDiffs(int commandStoreId, Timestamp txnId) { - for (Type type : Type.synonymousTypesFromMessageType(messageType)) - { - M message = clazz.cast(journal.readFirstMatching(new Key(txnId, type), condition)); - if (null != message) return message; - } - return null; + return (List)(List) journal.readAll(new JournalKey(txnId, Type.SAVED_COMMAND, commandStoreId)); } - // TODO (alexp): tests for objects that go through AccordJournal - private class JournalCallbacks implements AsyncCallbacks + @Override + public void appendCommand(int commandStoreId, List outcomes, List sanityCheck, Runnable onFlush) { - private JournalCallbacks() + RecordPointer pointer = null; + for (int i = 0; i < outcomes.size(); i++) { + SavedCommand.SavedDiff outcome = outcomes.get(i); + JournalKey key = new JournalKey(outcome.txnId, Type.SAVED_COMMAND, commandStoreId); + pointer = journal.asyncWrite(key, outcome, SENTINEL_HOSTS); } - /** - * Queue up the record for either frame aggregation (if a protocol message) or frame application (if a frame). - */ - @Override - public void onWrite(long segment, int position, int size, Key key, Object value, Object writeContext) - { - RecordPointer pointer = new RecordPointer(segment, position); - cachedRecords.put(pointer, value); - - /* - * if remote request, extract response context - * if local request, extract callback - * if frame, register for application on flush - */ - if (key.type.isRemoteRequest()) - frameAggregator.onWrite(RemoteRequestContext.create(((Request) value).waitForEpoch(), (ResponseContext) writeContext, pointer)); - else if (key.type.isLocalRequest()) - frameAggregator.onWrite(LocalRequestContext.create((LocalRequest) value, (BiConsumer) writeContext, pointer)); - else - frameApplicator.onWrite(pointer, size, (FrameContext) writeContext); - } - - @Override - public void onWriteFailed(Key key, Object value, Object writeContext, Throwable cause) + // If we need to perform sanity check, we can only rely on blocking flushes. Otherwise, we may see into the future. + if (sanityCheck != null) { - if (key.type.isRemoteRequest()) - onRemoteRequestWriteFailed((Request) value, (RemoteRequestContext) writeContext, cause); - else if (key.type.isLocalRequest()) - onLocalRequestWriteFailed((LocalRequestContext) writeContext, cause); - else - onFrameWriteFailed((FrameRecord) value, (FrameContext) writeContext, cause); - } + Condition condition = Condition.newOneTimeCondition(); + journal.onFlush(pointer, condition::signal); + condition.awaitUninterruptibly(); - private void onRemoteRequestWriteFailed(Request request, RemoteRequestContext context, Throwable cause) - { - request.preProcess(node, endpointMapper.mappedId(context.from()), context); - - /* - * Except for Commit.Invalidate, which doesn't return a reply on success or failure, - * all requests here implement MapReduceLocal, with accept() handling both the success and the failure - * response returns. - */ - if (request instanceof MapReduceConsume) - ((MapReduceConsume) request).accept(null, cause); - else - node.agent().onUncaughtException(cause); - } + for (Command check : sanityCheck) + sanityCheck(commandStoreId, check); - private void onLocalRequestWriteFailed(LocalRequestContext context, Throwable cause) - { - context.callback.accept(null, cause); + onFlush.run(); } - - private void onFrameWriteFailed(FrameRecord frame, FrameContext context, Throwable cause) + else { - // TODO (required): panic + journal.onFlush(pointer, onFlush); } + } - @Override - public void onFlush(long segment, int position) - { - frameApplicator.onFlush(segment, position); // will apply flushed frames in correct order in an executor - } + @VisibleForTesting + public void closeCurrentSegmentForTesting() + { + journal.closeCurrentSegmentForTesting(); + } - @Override - public void onFlushFailed(Throwable cause) - { - // TODO (required): panic - } + public void sanityCheck(int commandStoreId, Command orig) + { + List diffs = loadDiffs(commandStoreId, orig.txnId()); + // We can only use strict equality if we supply result. + Command reconstructed = SavedCommand.reconstructFromDiff(diffs, orig.result()); + Invariants.checkState(orig.equals(reconstructed), + "\n" + + "Original: %s\n" + + "Reconstructed: %s\n" + + "Diffs: %s", orig, reconstructed, diffs); } /* * Context necessary to process log records */ - - static class RequestContext implements ReplyContext + static abstract class RequestContext implements ReplyContext { final long waitForEpoch; - final RecordPointer pointer; - private long preAcceptTimeout; - RequestContext(long waitForEpoch, RecordPointer pointer) + RequestContext(long waitForEpoch) { this.waitForEpoch = waitForEpoch; - this.pointer = pointer; } - void preAcceptTimeout(long preAcceptTimeout) + public abstract void process(Node node, AccordEndpointMapper endpointMapper); + } + + private static class LocalRequestContext extends RequestContext + { + private final BiConsumer callback; + private final LocalRequest request; + + LocalRequestContext(long waitForEpoch, LocalRequest request, BiConsumer callback) + { + super(waitForEpoch); + this.callback = callback; + this.request = request; + } + + public void process(Node node, AccordEndpointMapper endpointMapper) { - this.preAcceptTimeout = preAcceptTimeout; + request.process(node, callback); } - public long preAcceptTimeout() + static LocalRequestContext create(LocalRequest request, BiConsumer callback) { - return preAcceptTimeout; + return new LocalRequestContext<>(request.waitForEpoch(), request, callback); } } - private static class LocalRequestContext extends RequestContext + /** + * Barebones response context not holding a reference to the entire message + */ + private abstract static class RemoteRequestContext extends RequestContext implements ResponseContext { - private final BiConsumer callback; + private final Request request; - LocalRequestContext(long waitForEpoch, BiConsumer callback, RecordPointer pointer) + RemoteRequestContext(long waitForEpoch, Request request) { - super(waitForEpoch, pointer); - this.callback = callback; + super(waitForEpoch); + this.request = request; } - static LocalRequestContext create(LocalRequest request, BiConsumer callback, RecordPointer pointer) + static LiveRemoteRequestContext forLive(Request request, ResponseContext context) + { + return new LiveRemoteRequestContext(request, context.id(), context.from(), context.verb(), context.expiresAtNanos()); + } + + @Override + public void process(Node node, AccordEndpointMapper endpointMapper) { - return new LocalRequestContext(request.waitForEpoch(), callback, pointer); + this.request.process(node, endpointMapper.mappedId(from()), this); } + + @Override public abstract long id(); + @Override public abstract InetAddressAndPort from(); + @Override public abstract Verb verb(); + @Override public abstract long expiresAtNanos(); } - /** - * Barebones response context not holding a reference to the entire message - */ - private static class RemoteRequestContext extends RequestContext implements ResponseContext + // TODO: avoid distinguishing between live and non live + private static class LiveRemoteRequestContext extends RemoteRequestContext { private final long id; private final InetAddressAndPort from; private final Verb verb; private final long expiresAtNanos; - RemoteRequestContext(long waitForEpoch, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos, RecordPointer pointer) + LiveRemoteRequestContext(Request request, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos) { - super(waitForEpoch, pointer); + super(request.waitForEpoch(), request); this.id = id; this.from = from; this.verb = verb; this.expiresAtNanos = expiresAtNanos; } - static RemoteRequestContext create(long waitForEpoch, ResponseContext context, RecordPointer pointer) - { - return new RemoteRequestContext(waitForEpoch, context.id(), context.from(), context.verb(), context.expiresAtNanos(), pointer); - } @Override public long id() { return id; } - @Override public InetAddressAndPort from() { return from; } - @Override public Verb verb() { return verb; } - @Override public long expiresAtNanos() { @@ -468,217 +389,29 @@ public long expiresAtNanos() * Records ser/de in the Journal */ - public static class Key - { - final Timestamp timestamp; - final Type type; - - Key(Timestamp timestamp, Type type) - { - if (timestamp == null) throw new NullPointerException("Null timestamp for type " + type); - this.timestamp = timestamp; - this.type = type; - } - - /** - * Support for (de)serializing and comparing record keys. - *

      - * Implements its own serialization and comparison for {@link Timestamp} to satisty - * {@link KeySupport} contract - puts hybrid logical clock ahead of epoch - * when ordering timestamps. This is done for more precise elimination of candidate - * segments by min/max record key in segment. - */ - static final KeySupport SUPPORT = new KeySupport<>() - { - private static final int HLC_OFFSET = 0; - private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; - private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; - private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; - - @Override - public int serializedSize(int userVersion) - { - return LONG_SIZE // timestamp.hlc() - + 6 // timestamp.epoch() - + 2 // timestamp.flags() - + INT_SIZE // timestamp.node - + BYTE_SIZE; // type - } - - @Override - public void serialize(Key key, DataOutputPlus out, int userVersion) throws IOException - { - serializeTimestamp(key.timestamp, out); - out.writeByte(key.type.id); - } - - private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException - { - out.writeLong(timestamp.hlc()); - out.writeLong(epochAndFlags(timestamp)); - out.writeInt(timestamp.node.id); - } - - private void serialize(Key key, byte[] out) - { - serializeTimestamp(key.timestamp, out); - out[20] = (byte) (key.type.id & 0xFF); - } - - private void serializeTimestamp(Timestamp timestamp, byte[] out) - { - ByteArrayUtil.putLong(out, 0, timestamp.hlc()); - ByteArrayUtil.putLong(out, 8, epochAndFlags(timestamp)); - ByteArrayUtil.putInt(out, 16, timestamp.node.id); - } - - @Override - public Key deserialize(DataInputPlus in, int userVersion) throws IOException - { - Timestamp timestamp = deserializeTimestamp(in); - int type = in.readByte(); - return new Key(timestamp, Type.fromId(type)); - } - - private Timestamp deserializeTimestamp(DataInputPlus in) throws IOException - { - long hlc = in.readLong(); - long epochAndFlags = in.readLong(); - int nodeId = in.readInt(); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); - } - - @Override - public Key deserialize(ByteBuffer buffer, int position, int userVersion) - { - Timestamp timestamp = deserializeTimestamp(buffer, position); - int type = buffer.get(position + TYPE_OFFSET); - return new Key(timestamp, Type.fromId(type)); - } - - private Timestamp deserializeTimestamp(ByteBuffer buffer, int position) - { - long hlc = buffer.getLong(position + HLC_OFFSET); - long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); - int nodeId = buffer.getInt(position + NODE_OFFSET); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); - } - - @Override - public void updateChecksum(Checksum crc, Key key, int userVersion) - { - byte[] out = keyCRCBytes.get(); - serialize(key, out); - crc.update(out, 0, out.length); - } - - @Override - public int compareWithKeyAt(Key k, ByteBuffer buffer, int position, int userVersion) - { - int cmp = compareWithTimestampAt(k.timestamp, buffer, position); - if (cmp != 0) return cmp; - - byte type = buffer.get(position + TYPE_OFFSET); - cmp = Byte.compare((byte) k.type.id, type); - return cmp; - } - - private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int position) - { - long hlc = buffer.getLong(position + HLC_OFFSET); - int cmp = Long.compareUnsigned(timestamp.hlc(), hlc); - if (cmp != 0) return cmp; - - long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); - cmp = Long.compareUnsigned(epochAndFlags(timestamp), epochAndFlags); - if (cmp != 0) return cmp; - - int nodeId = buffer.getInt(position + NODE_OFFSET); - cmp = Integer.compareUnsigned(timestamp.node.id, nodeId); - return cmp; - } - - @Override - public int compare(Key k1, Key k2) - { - int cmp = compare(k1.timestamp, k2.timestamp); - if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); - return cmp; - } - - private int compare(Timestamp timestamp1, Timestamp timestamp2) - { - int cmp = Long.compareUnsigned(timestamp1.hlc(), timestamp2.hlc()); - if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(timestamp1), epochAndFlags(timestamp2)); - if (cmp == 0) cmp = Integer.compareUnsigned(timestamp1.node.id, timestamp2.node.id); - return cmp; - } - - private long epochAndFlags(Timestamp timestamp) - { - return (timestamp.epoch() << 16) | (long) timestamp.flags(); - } - - private long epoch(long epochAndFlags) - { - return epochAndFlags >>> 16; - } - - private int flags(long epochAndFlags) - { - return (int) (epochAndFlags & ((1 << 16) - 1)); - } - }; - - @Override - public boolean equals(Object other) - { - if (this == other) - return true; - return (other instanceof Key) && equals((Key) other); - } - - boolean equals(Key other) - { - return this.type == other.type && this.timestamp.equals(other.timestamp); - } - - @Override - public int hashCode() - { - return type.hashCode() + 31 * timestamp.hashCode(); - } - - @Override - public String toString() - { - return "Key{" + timestamp + ", " + type + '}'; - } - } - - private static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer<>() + private static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer<>() { @Override - public int serializedSize(Key key, Object record, int userVersion) + public int serializedSize(JournalKey key, Object record, int userVersion) { return Ints.checkedCast(key.type.serializedSize(key, record, userVersion)); } @Override - public void serialize(Key key, Object record, DataOutputPlus out, int userVersion) throws IOException + public void serialize(JournalKey key, Object record, DataOutputPlus out, int userVersion) throws IOException { key.type.serialize(key, record, out, userVersion); } @Override - public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException { return key.type.deserialize(key, in, userVersion); } }; /* Adapts vanilla message serializers to journal-expected signatures; converts user version to MS version */ - static final class MessageSerializer implements ValueSerializer + static final class MessageSerializer implements ValueSerializer { final IVersionedSerializer wrapped; @@ -693,19 +426,19 @@ static MessageSerializer wrap(IVersionedSerializer wrapped) } @Override - public int serializedSize(Key key, Object message, int userVersion) + public int serializedSize(JournalKey key, Object message, int userVersion) { return Ints.checkedCast(wrapped.serializedSize((Message) message, msVersion(userVersion))); } @Override - public void serialize(Key key, Object message, DataOutputPlus out, int userVersion) throws IOException + public void serialize(JournalKey key, Object message, DataOutputPlus out, int userVersion) throws IOException { wrapped.serialize((Message) message, out, msVersion(userVersion)); } @Override - public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException { return wrapped.deserialize(in, msVersion(userVersion)); } @@ -730,10 +463,10 @@ interface TxnIdProvider * 2. It's persisted in the record key, so has the additional constraint of being fixed size and * shouldn't be using varint encoding */ - public enum Type implements ValueSerializer + public enum Type implements ValueSerializer { /* Auxiliary journal records */ - FRAME (0, FrameRecord.SERIALIZER), + SAVED_COMMAND (1, SavedCommand.serializer), /* Accord protocol requests */ PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), @@ -786,13 +519,14 @@ public enum Type implements ValueSerializer final MessageType outgoingType; final TxnIdProvider txnIdProvider; - final ValueSerializer serializer; + final ValueSerializer serializer; - Type(int id, ValueSerializer serializer) + Type(int id, ValueSerializer serializer) { this(id, null, null, serializer, null); } + Type(int id, MessageType incomingType, MessageType outgoingType, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) { //noinspection unchecked @@ -805,7 +539,7 @@ public enum Type implements ValueSerializer this(id, type, type, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); } - Type(int id, MessageType incomingType, MessageType outgoingType, ValueSerializer serializer, TxnIdProvider txnIdProvider) + Type(int id, MessageType incomingType, MessageType outgoingType, ValueSerializer serializer, TxnIdProvider txnIdProvider) { if (id < 0) throw new IllegalArgumentException("Negative Type id " + id); @@ -816,14 +550,11 @@ public enum Type implements ValueSerializer this.incomingType = incomingType; this.outgoingType = outgoingType; //noinspection unchecked - this.serializer = (ValueSerializer) serializer; + this.serializer = (ValueSerializer) serializer; this.txnIdProvider = txnIdProvider; } private static final Type[] idToTypeMapping; - private static final Map msgTypeToTypeMap; - - private static final ListMultimap msgTypeToSynonymousTypesMap; static { @@ -848,36 +579,7 @@ public enum Type implements ValueSerializer if (null != type.incomingType && null != msgTypeToType.put(type.incomingType, type)) throw new IllegalStateException("Duplicate MessageType " + type.incomingType); } - msgTypeToTypeMap = ImmutableMap.copyOf(msgTypeToType); - - Multimap msgTypeToSynonymousTypes = ArrayListMultimap.create(); - for (Type type : types) - { - if (null != type.outgoingType) - { - Type incomingType = msgTypeToTypeMap.get(type.incomingType); - if (msgTypeToSynonymousTypes.get(type.outgoingType).contains(incomingType)) - throw new IllegalStateException("Duplicate synonymous Type " + type.incomingType); - msgTypeToSynonymousTypes.put(type.outgoingType, incomingType); - } - } - msgTypeToSynonymousTypesMap = ImmutableListMultimap.copyOf(msgTypeToSynonymousTypes); - - //TODO (now): enable as this shows we are currently missing a message -// IllegalStateException e = null; -// for (MessageType t : MessageType.values) -// { -// if (!t.hasSideEffects()) continue; -// Type matches = msgTypeToTypeMap.get(t); -// if (matches == null) -// { -// IllegalStateException ise = new IllegalStateException("Missing MessageType " + t); -// if (e == null) e = ise; -// else e.addSuppressed(ise); -// } -// } -// if (e != null) -// throw e; + ImmutableMap.copyOf(msgTypeToType); } static Type fromId(int id) @@ -890,75 +592,23 @@ static Type fromId(int id) return type; } - static List synonymousTypesFromMessageType(MessageType msgType) - { - List synonymousTypes = msgTypeToSynonymousTypesMap.get(msgType); - if (synonymousTypes.isEmpty()) - throw new IllegalArgumentException("Unsupported MessageType " + msgType); - return synonymousTypes; - } - - static Type fromMessageType(MessageType msgType) - { - Type type = msgTypeToTypeMap.get(msgType); - if (null == type) - throw new IllegalArgumentException("Unsupported MessageType " + msgType); - return type; - } - - boolean isAuxiliary() - { - return outgoingType == null; - } - - boolean isFrame() - { - return this == FRAME; - } - - boolean isRequest() - { - return outgoingType != null; - } - - boolean isRemoteRequest() - { - return isRequest() && outgoingType.isRemote(); - } - - boolean isLocalRequest() - { - return isRequest() && outgoingType.isLocal(); - } - @Override - public int serializedSize(Key key, Object record, int userVersion) + public int serializedSize(JournalKey key, Object record, int userVersion) { return serializer.serializedSize(key, record, userVersion); } @Override - public void serialize(Key key, Object record, DataOutputPlus out, int userVersion) throws IOException + public void serialize(JournalKey key, Object record, DataOutputPlus out, int userVersion) throws IOException { serializer.serialize(key, record, out, userVersion); } @Override - public Object deserialize(Key key, DataInputPlus in, int userVersion) throws IOException + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException { return serializer.deserialize(key, in, userVersion); } - - TxnId txnId(Message message) - { - return txnIdProvider.txnId(message); - } - } - - static - { - // make noise early if we forget to update our version mappings - Invariants.checkState(MessagingService.current_version == MessagingService.VERSION_51, "Expected current version to be %d but given %d", MessagingService.VERSION_51, MessagingService.current_version); } private static int msVersion(int version) @@ -971,663 +621,95 @@ private static int msVersion(int version) } /* - * Record framing logic + * Handling topology changes / epoch shift */ - /** - * In order to enable the reorder buffer and delayed execution of requests of yet unknown epoch, we explicitly - * group requests for execution in {@link FrameRecord} records. Journal's onWrite() callback submits written - * protocol messages to {@link FrameAggregator}, which creates and writes the frame record to the journal. - * Once written, the frame record is submitted to {@link FrameApplicator}, which will process all the framed - * requests once the frame has been flushed to disk. - */ - private final class FrameAggregator implements Interruptible.Task, Shutdownable + private final class DelayedRequestProcessor extends Thread { - /* external MPSC pending request queue */ - private final ManyToOneConcurrentLinkedQueue unframedRequests = new ManyToOneConcurrentLinkedQueue<>(); - + private final ManyToOneConcurrentLinkedQueue delayedRequests = new ManyToOneConcurrentLinkedQueue<>(); private final LongArrayList waitForEpochs = new LongArrayList(); - private final Long2ObjectHashMap> delayedRequests = new Long2ObjectHashMap<>(); - - private volatile Interruptible executor; - - // a signal and flag that callers outside the aggregator thread can use - // to signal they want the aggregator to run again - private final Semaphore haveWork = newSemaphore(1); - - void onWrite(RequestContext context) - { - unframedRequests.add(context); - haveWork.release(1); - } - - void notifyOfEpoch() - { - haveWork.release(1); - } + private final Long2ObjectHashMap> byEpoch = new Long2ObjectHashMap<>(); + private final AtomicReference signal = new AtomicReference<>(Condition.newOneTimeCondition()); - void start() + private void delay(RequestContext requestContext) { - executor = executorFactory().infiniteLoop("AccordJournal#FrameAggregator", this, SAFE, NON_DAEMON, SYNCHRONIZED); - } - - @Override - public boolean isTerminated() { - return executor == null || executor.isTerminated(); + delayedRequests.add(requestContext); + runOnce(); } - @Override - public void shutdown() + private void runOnce() { - if (executor != null) - executor.shutdown(); - } - - @Override - public Object shutdownNow() { - return executor == null ? null : executor.shutdownNow(); + signal.get().signal(); } - @Override - public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - return executor == null || executor.awaitTermination(timeout, units); - } - - @Override - public void run(Interruptible.State state) throws InterruptedException - { - if (!unframedRequests.isEmpty() || !delayedRequests.isEmpty()) - doRun(); - - if (state == NORMAL) - haveWork.acquire(1); - } - - private void doRun() + public void run() { - ArrayList requests = null; - - /* - * Deal with delayed requests - */ - - waitForEpochs.sort(null); - - for (int i = 0; i < waitForEpochs.size(); i++) + while (!Thread.currentThread().isInterrupted() && isRunnable(status)) { - long waitForEpoch = waitForEpochs.getLong(i); - if (!node.topology().hasEpoch(waitForEpoch)) - break; - List delayed = delayedRequests.remove(waitForEpoch); - if (null == requests) requests = new ArrayList<>(delayed.size()); - requests.addAll(delayed); - } + try + { + Condition signal = Condition.newOneTimeCondition(); + this.signal.set(signal); + // First, poll delayed requests, put them into by epoch + while (!delayedRequests.isEmpty()) + { + RequestContext context = delayedRequests.poll(); + long waitForEpoch = context.waitForEpoch; + + List l = byEpoch.computeIfAbsent(waitForEpoch, (ignore) -> new ArrayList<>()); + if (l.isEmpty()) + waitForEpochs.pushLong(waitForEpoch); + l.add(context); + node.withEpoch(waitForEpoch, this::runOnce); + } - waitForEpochs.removeIfLong(epoch -> !delayedRequests.containsKey(epoch)); + // Next, process all delayed epochs + for (int i = 0; i < waitForEpochs.size(); i++) + { + long epoch = waitForEpochs.getLong(i); + if (node.topology().hasEpoch(epoch)) + { + List requests = byEpoch.remove(epoch); + assert requests != null : String.format("%s %s (%d)", byEpoch, waitForEpochs, epoch); + for (RequestContext request : requests) + { + try + { + request.process(node, endpointMapper); + } + catch (Throwable t) + { + logger.error(String.format("Caught an exception while processing a delayed request %s", request), t); + } + } + } + } - /* - * Deal with regular pending requests - */ + waitForEpochs.removeIfLong(epoch -> !byEpoch.containsKey(epoch)); - RequestContext request; - while (null != (request = unframedRequests.poll())) - { - long waitForEpoch = request.waitForEpoch; - if (waitForEpoch != 0 && !node.topology().hasEpoch(waitForEpoch)) + signal.await(); + } + catch (InterruptedException e) { - delayedRequests.computeIfAbsent(waitForEpoch, ignore -> new ArrayList<>()).add(request); - if (!waitForEpochs.containsLong(waitForEpoch)) - { - waitForEpochs.addLong(waitForEpoch); - node.withEpoch(waitForEpoch, this::notifyOfEpoch); - } + logger.info("Delayed request processor thread interrupted. Shutting down."); + return; } - else + catch (Throwable t) { - if (null == requests) requests = new ArrayList<>(); - requests.add(request); + logger.error("Caught an exception in delayed processor", t); } } - - if (requests != null) - { - ArrayList pointers = new ArrayList<>(requests.size()); - for (RequestContext req : requests) pointers.add(req.pointer); - FrameRecord frame = new FrameRecord(node.uniqueNow(), pointers, node.agent().preAcceptTimeout()); - FrameContext context = new FrameContext(requests); - appendAuxiliaryRecord(frame, context); - } - } - } - - /** - * Processes the requests that have been grouped by {@link FrameAggregator}. - * Gets the aggregated frames containing previously written requests/messages, - * and sorts and "applies" them once part of the journal that fully contains them is flushed. - */ - private final class FrameApplicator implements Runnable, Shutdownable - { - /** external SPSC written frame queue */ - private final SpscLinkedQueue newFrames = new SpscLinkedQueue<>(); - - /* single-thread accessed internal frame buffer */ - private final ArrayList pendingFrames = new ArrayList<>(); - - /* furthest flushed journal segment + position */ - private volatile RecordPointer flushedUntil = null; - - private volatile SequentialExecutorPlus executor; - - /* invoked from FrameGenerator thread via appendAuxiliaryRecord() call */ - void onWrite(RecordPointer start, int size, FrameContext context) - { - newFrames.add(new PendingFrame(start, new RecordPointer(start.segment, start.position + size), context)); - } - - /* invoked only from Journal Flusher thread (single) */ - void onFlush(long segment, int position) - { - flushedUntil = new RecordPointer(segment, position); - executor.submit(this); - } - - void start() - { - executor = executorFactory().sequential("AccordJournal#FrameApplicator"); - } - - @Override - public boolean isTerminated() { - return executor == null || executor.isTerminated(); - } - - @Override - public void shutdown() - { - if (executor != null) - executor.shutdown(); - } - - @Override - public Object shutdownNow() { - return executor == null ? null : executor.shutdownNow(); - } - - @Override - public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException { - return executor == null || executor.awaitTermination(timeout, units); - } - - @Override - public void run() - { - if (newFrames.drain(pendingFrames::add) > 0) - { - /* order by position in the journal, DESC */ - pendingFrames.sort((f1, f2) -> f2.start.compareTo(f1.start)); - } - - RecordPointer flushedUntil = this.flushedUntil; - for (int i = pendingFrames.size() - 1; i >= 0; i--) - { - PendingFrame frame = pendingFrames.get(i); - if (frame.end.compareTo(flushedUntil) > 0) - break; - applyFrame((FrameRecord) cachedRecords.remove(frame.start), frame.context); - pendingFrames.remove(i); - } - } - - private void applyFrame(FrameRecord frame, FrameContext context) - { - Invariants.checkState(frame.pointers.size() == context.requestContexts.size()); - for (int i = 0; i < frame.pointers.size(); i++) - applyRequest(frame.pointers.get(i), context.requestContexts.get(i), frame.preAcceptTimeoutMicros); - } - - private void applyRequest(RecordPointer pointer, RequestContext context, long preAcceptTimeout) - { - Message message = (Message) cachedRecords.remove(pointer); - Type type = Type.fromMessageType(message.type()); - if (type == Type.PRE_ACCEPT || type == Type.BEGIN_RECOVER) - context.preAcceptTimeout(preAcceptTimeout); - - if (type.isRemoteRequest()) - { - Request request = (Request) message; - RemoteRequestContext ctx = (RemoteRequestContext) context; - Id from = endpointMapper.mappedId(ctx.from()); - request.process(node, from, ctx); - } - else - { - Invariants.checkState(type.isLocalRequest()); - LocalRequestContext ctx = (LocalRequestContext) context; - // TODO (expected): Make Propagate PreAccept receive preAcceptTimeout and timestamps - //noinspection unchecked,rawtypes - ((LocalRequest) message).process(node, ctx.callback); - } - } - - /** - * Frame that has been written to the journal (implying all the requests referenced by it also have been written), - * but have not been process by the frame applicaticator yet. - * Will be processed by the frame applicator once the journal has flushed the frame record. - */ - private final class PendingFrame - { - final RecordPointer start; - final RecordPointer end; - final FrameContext context; - - PendingFrame(RecordPointer start, RecordPointer end, FrameContext context) - { - this.start = start; - this.end = end; - this.context = context; - } - } - } - - private static final class FrameContext - { - final List requestContexts; - - FrameContext(List requestContexts) - { - this.requestContexts = requestContexts; - } - } - - private static abstract class AuxiliaryRecord - { - final Timestamp timestamp; - - AuxiliaryRecord(Timestamp timestamp) - { - this.timestamp = timestamp; } - - abstract Type type(); } - public static final IVersionedSerializer RECORD_POINTER_SERIALIZER = new IVersionedSerializer<>() + public boolean isRunnable(Status status) { - @Override - public void serialize(RecordPointer p, DataOutputPlus out, int version) throws IOException - { - out.writeUnsignedVInt(p.segment); - out.writeUnsignedVInt32(p.position); - } - - @Override - public RecordPointer deserialize(DataInputPlus in, int version) throws IOException - { - long segment = in.readUnsignedVInt(); - int position = in.readUnsignedVInt32(); - return new RecordPointer(segment, position); - } - - @Override - public long serializedSize(RecordPointer p, int version) - { - return computeUnsignedVIntSize(p.segment) + computeUnsignedVIntSize(p.position); - } - }; - - private static final class FrameRecord extends AuxiliaryRecord - { - final List pointers; - final long preAcceptTimeoutMicros; - - FrameRecord(Timestamp timestamp, List pointers, long preAcceptTimeoutMicros) - { - super(timestamp); - this.pointers = pointers; - this.preAcceptTimeoutMicros = preAcceptTimeoutMicros; - } - - @Override - Type type() - { - return Type.FRAME; - } - - static final ValueSerializer SERIALIZER = new ValueSerializer<>() - { - @Override - public int serializedSize(Key key, FrameRecord frame, int userVersion) - { - return Ints.checkedCast(serializedListSize(frame.pointers, userVersion, RECORD_POINTER_SERIALIZER)) + - computeUnsignedVIntSize(frame.preAcceptTimeoutMicros); - } - - @Override - public void serialize(Key key, FrameRecord frame, DataOutputPlus out, int userVersion) throws IOException - { - serializeList(frame.pointers, out, userVersion, RECORD_POINTER_SERIALIZER); - VIntCoding.writeUnsignedVInt(frame.preAcceptTimeoutMicros, out); - } - - @Override - public FrameRecord deserialize(Key key, DataInputPlus in, int userVersion) throws IOException - { - return new FrameRecord(key.timestamp, deserializeList(in, userVersion, RECORD_POINTER_SERIALIZER), VIntCoding.readUnsignedVInt(in)); - } - }; - } - - /* - * Message provider implementation - */ - @Override - public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) - { - return LOG_MESSAGE_PROVIDER ? new LoggingMessageProvider(txnId, new MessageProvider(txnId)) : new MessageProvider(txnId); - } - - private final class MessageProvider implements SerializerSupport.MessageProvider - { - final TxnId txnId; - - private MessageProvider(TxnId txnId) - { - this.txnId = txnId; - } - - @Override - public TxnId txnId() - { - return txnId; - } - - @Override - public Set test(Set messages) - { - Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); - for (MessageType message : messages) - for (Type synonymousType : Type.synonymousTypesFromMessageType(message)) - keys.add(new Key(txnId, synonymousType)); - Set presentKeys = journal.test(keys); - Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); - for (Key key : presentKeys) - presentMessages.add(key.type.outgoingType); - return presentMessages; - } - - @Override - public Set all() - { - Set types = EnumSet.allOf(Type.class); - Set keys = new ObjectHashSet<>(types.size() + 1, 0.9f); - for (Type type : types) - keys.add(new Key(txnId, type)); - Set presentKeys = journal.test(keys); - Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); - for (Key key : presentKeys) - presentMessages.add(key.type.outgoingType); - return presentMessages; - } - - @Override - public PreAccept preAccept() - { - return readMessage(txnId, PRE_ACCEPT_REQ, PreAccept.class); - } - - @Override - public BeginRecovery beginRecover() - { - return readMessage(txnId, BEGIN_RECOVER_REQ, BeginRecovery.class); - } - - @Override - public Propagate propagatePreAccept() - { - return readMessage(txnId, PROPAGATE_PRE_ACCEPT_MSG, Propagate.class); - } - - @Override - public Accept accept(Ballot ballot) - { - return readMessage(txnId, ACCEPT_REQ, Accept.class, (accept) -> ((Accept) accept).ballot.equals(ballot)); - } - - @Override - public Commit commitSlowPath() - { - return readMessage(txnId, COMMIT_SLOW_PATH_REQ, Commit.class); - } - - @Override - public Commit commitMaximal() - { - return readMessage(txnId, COMMIT_MAXIMAL_REQ, Commit.class); - } - - @Override - public Commit stableFastPath() - { - return readMessage(txnId, STABLE_FAST_PATH_REQ, Commit.class); - } - - @Override - public Commit stableSlowPath() - { - return readMessage(txnId, STABLE_SLOW_PATH_REQ, Commit.class); - } - - @Override - public Commit stableMaximal() - { - return readMessage(txnId, STABLE_MAXIMAL_REQ, Commit.class); - } - - @Override - public Propagate propagateStable() - { - return readMessage(txnId, PROPAGATE_STABLE_MSG, Propagate.class); - } - - @Override - public Apply applyMinimal() - { - return readMessage(txnId, APPLY_MINIMAL_REQ, Apply.class); - } - - @Override - public Apply applyMaximal() - { - return readMessage(txnId, APPLY_MAXIMAL_REQ, Apply.class); - } - - @Override - public Propagate propagateApply() - { - return readMessage(txnId, PROPAGATE_APPLY_MSG, Propagate.class); - } - - @Override - public Propagate propagateOther() - { - return readMessage(txnId, PROPAGATE_OTHER_MSG, Propagate.class); - } - - @Override - public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() - { - return readMessage(txnId, APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, ApplyThenWaitUntilApplied.class); - } + return status != Status.TERMINATING && status != status.TERMINATED; } - private final class LoggingMessageProvider implements SerializerSupport.MessageProvider + @VisibleForTesting + public void truncateForTesting() { - private final TxnId txnId; - private final MessageProvider provider; - - LoggingMessageProvider(TxnId txnId, MessageProvider provider) - { - this.txnId = txnId; - this.provider = provider; - } - - @Override - public TxnId txnId() - { - return txnId; - } - - @Override - public Set test(Set messages) - { - logger.debug("Checking {} messages for {}", messages, txnId); - Set confirmed = provider.test(messages); - logger.debug("Confirmed {} messages for {}", confirmed, txnId); - return confirmed; - } - - @Override - public Set all() - { - logger.debug("Checking all messages for {}", txnId); - Set confirmed = provider.all(); - logger.debug("Confirmed {} messages for {}", confirmed, txnId); - return confirmed; - } - - @Override - public PreAccept preAccept() - { - logger.debug("Fetching {} message for {}", PRE_ACCEPT_REQ, txnId); - PreAccept preAccept = provider.preAccept(); - logger.debug("Fetched {} message for {}: {}", PRE_ACCEPT_REQ, txnId, preAccept); - return preAccept; - } - - @Override - public BeginRecovery beginRecover() - { - logger.debug("Fetching {} message for {}", BEGIN_RECOVER_REQ, txnId); - BeginRecovery beginRecover = provider.beginRecover(); - logger.debug("Fetched {} message for {}: {}", BEGIN_RECOVER_REQ, txnId, beginRecover); - return beginRecover; - } - - @Override - public Propagate propagatePreAccept() - { - logger.debug("Fetching {} message for {}", PROPAGATE_PRE_ACCEPT_MSG, txnId); - Propagate propagate = provider.propagatePreAccept(); - logger.debug("Fetched {} message for {}: {}", PROPAGATE_PRE_ACCEPT_MSG, txnId, propagate); - return propagate; - } - - @Override - public Accept accept(Ballot ballot) - { - logger.debug("Fetching {} message (with accepted: {}) for {}", ACCEPT_REQ, ballot, txnId); - Accept accept = provider.accept(ballot); - logger.debug("Fetched {} message (with accepted: {}) for {}: {}", ACCEPT_REQ, ballot, txnId, accept); - return accept; - } - - @Override - public Commit commitSlowPath() - { - logger.debug("Fetching {} message for {}", COMMIT_SLOW_PATH_REQ, txnId); - Commit commit = provider.commitSlowPath(); - logger.debug("Fetched {} message for {}: {}", COMMIT_SLOW_PATH_REQ, txnId, commit); - return commit; - } - - @Override - public Commit commitMaximal() - { - logger.debug("Fetching {} message for {}", COMMIT_MAXIMAL_REQ, txnId); - Commit commit = provider.commitMaximal(); - logger.debug("Fetched {} message for {}: {}", COMMIT_MAXIMAL_REQ, txnId, commit); - return commit; - } - - @Override - public Commit stableFastPath() - { - logger.debug("Fetching {} message for {}", STABLE_FAST_PATH_REQ, txnId); - Commit commit = provider.stableFastPath(); - logger.debug("Fetched {} message for {}: {}", STABLE_FAST_PATH_REQ, txnId, commit); - return commit; - } - - @Override - public Commit stableSlowPath() - { - logger.debug("Fetching {} message for {}", STABLE_SLOW_PATH_REQ, txnId); - Commit commit = provider.stableSlowPath(); - logger.debug("Fetched {} message for {}: {}", STABLE_SLOW_PATH_REQ, txnId, commit); - return commit; - } - - @Override - public Commit stableMaximal() - { - logger.debug("Fetching {} message for {}", STABLE_MAXIMAL_REQ, txnId); - Commit commit = provider.stableMaximal(); - logger.debug("Fetched {} message for {}: {}", STABLE_MAXIMAL_REQ, txnId, commit); - return commit; - } - - @Override - public Propagate propagateStable() - { - logger.debug("Fetching {} message for {}", PROPAGATE_STABLE_MSG, txnId); - Propagate propagate = provider.propagateStable(); - logger.debug("Fetched {} message for {}: {}", PROPAGATE_STABLE_MSG, txnId, propagate); - return propagate; - } - - @Override - public Apply applyMinimal() - { - logger.debug("Fetching {} message for {}", APPLY_MINIMAL_REQ, txnId); - Apply apply = provider.applyMinimal(); - logger.debug("Fetched {} message for {}: {}", APPLY_MINIMAL_REQ, txnId, apply); - return apply; - } - - @Override - public Apply applyMaximal() - { - logger.debug("Fetching {} message for {}", APPLY_MAXIMAL_REQ, txnId); - Apply apply = provider.applyMaximal(); - logger.debug("Fetched {} message for {}: {}", APPLY_MAXIMAL_REQ, txnId, apply); - return apply; - } - - @Override - public Propagate propagateApply() - { - logger.debug("Fetching {} message for {}", PROPAGATE_APPLY_MSG, txnId); - Propagate propagate = provider.propagateApply(); - logger.debug("Fetched {} message for {}: {}", PROPAGATE_APPLY_MSG, txnId, propagate); - return propagate; - } - - @Override - public Propagate propagateOther() - { - logger.debug("Fetching {} message for {}", PROPAGATE_OTHER_MSG, txnId); - Propagate propagate = provider.propagateOther(); - logger.debug("Fetched {} message for {}: {}", PROPAGATE_OTHER_MSG, txnId, propagate); - return propagate; - } - - @Override - public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() - { - logger.debug("Fetching {} message for {}", APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, txnId); - ApplyThenWaitUntilApplied apply = provider.applyThenWaitUntilApplied(); - logger.debug("Fetched {} message for {}: {}", APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, txnId, apply); - return apply; - } + journal.truncateForTesting(); } -} +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index ff7f2c5a47c0..cc51bd833906 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -25,7 +25,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.NavigableMap; import java.util.Objects; import java.util.Set; @@ -52,20 +51,14 @@ import accord.local.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; -import accord.local.Command.WaitingOn; import accord.local.CommandStore; -import accord.local.CommonAttributes; import accord.local.DurableBefore; import accord.local.Listeners; import accord.local.Node; import accord.local.RedundantBefore; import accord.local.SaveStatus; -import accord.local.SerializerSupport; -import accord.local.SerializerSupport.MessageProvider; -import accord.local.SerializerSupport.WaitingOnProvider; import accord.local.Status; import accord.local.Status.Durability; -import accord.primitives.Ballot; import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; @@ -115,7 +108,6 @@ import org.apache.cassandra.db.rows.BTreeRow; import org.apache.cassandra.db.rows.BufferCell; import org.apache.cassandra.db.rows.Cell; -import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.Row.Deletion; import org.apache.cassandra.db.rows.RowIterator; @@ -156,10 +148,8 @@ import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; -import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock.Global; -import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.btree.BTreeSet; import org.apache.cassandra.utils.bytecomparable.ByteComparable; @@ -248,11 +238,6 @@ static TokenType valueOf(Token token) + "route blob," + "durability int," + format("execute_at %s,", TIMESTAMP_TUPLE) - + format("promised_ballot %s,", TIMESTAMP_TUPLE) - + format("accepted_ballot %s,", TIMESTAMP_TUPLE) - + format("execute_atleast %s,", TIMESTAMP_TUPLE) - + "waiting_on blob," - + "listeners set, " + "PRIMARY KEY((store_id, domain, txn_id))" + ')') .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, Int32Type.instance, TIMESTAMP_TYPE))) @@ -297,11 +282,6 @@ public static class CommandsColumns public static final ColumnMetadata route = getColumn(Commands, "route"); public static final ColumnMetadata durability = getColumn(Commands, "durability"); public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); - static final ColumnMetadata promised_ballot = getColumn(Commands, "promised_ballot"); - static final ColumnMetadata accepted_ballot = getColumn(Commands, "accepted_ballot"); - static final ColumnMetadata execute_atleast = getColumn(Commands, "execute_atleast"); - static final ColumnMetadata waiting_on = getColumn(Commands, "waiting_on"); - static final ColumnMetadata listeners = getColumn(Commands, "listeners"); public static final ColumnMetadata[] TRUNCATE_FIELDS = new ColumnMetadata[] { durability, execute_at, route, status }; @@ -779,65 +759,6 @@ private static > void addEnumCellIfModified addCellIfModified(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, nowInSeconds, original, command); } - private static void addSetChanges(ColumnMetadata column, Function> get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, int nowInSec, C original, C command) throws IOException - { - Set prev = original != null ? get.apply(original) : Collections.emptySet(); - if (prev == null) prev = Collections.emptySet(); - Set value = get.apply(command); - if (value == null) value = Collections.emptySet(); - - if (value.isEmpty() && !prev.isEmpty()) - { - builder.addComplexDeletion(column, DeletionTime.build(timestampMicros, nowInSec)); - return; - } - - for (V item : Sets.difference(value, prev)) - builder.addCell(live(column, timestampMicros, EMPTY_BYTE_BUFFER, CellPath.create(serialize.apply(item)))); - - for (V item : Sets.difference(prev, value)) - builder.addCell(tombstone(column, timestampMicros, nowInSec, CellPath.create(serialize.apply(item)))); - } - - private static void addMapChanges(ColumnMetadata column, Function> get, SerializeFunction serializeKey, SerializeFunction serializeVal, Row.Builder builder, long timestampMicros, int nowInSec, C original, C command) throws IOException - { - Map prev = original != null ? get.apply(original) : Collections.emptyMap(); - if (prev == null) prev = Collections.emptyMap(); - Map value = get.apply(command); - if (value == null) value = Collections.emptyMap(); - - if (value.isEmpty() && !prev.isEmpty()) - { - builder.addComplexDeletion(column, DeletionTime.build(timestampMicros, nowInSec)); - return; - } - - for (Map.Entry entry : value.entrySet()) - { - K key = entry.getKey(); - V pVal = prev.get(key); - if (pVal != null && pVal.equals(entry.getValue())) - continue; - builder.addCell(live(column, timestampMicros, serializeVal.apply(entry.getValue()), CellPath.create(serializeKey.apply(key)))); - } - for (K key : Sets.difference(prev.keySet(), value.keySet())) - builder.addCell(tombstone(column, timestampMicros, nowInSec, CellPath.create(serializeKey.apply(key)))); - } - - private static int estimateMapChanges(Map prev, Map value) - { - return Math.abs(prev.size() - value.size()); - } - - private static int estimateMapChanges(Function> get, C original, C command) - { - Map prev = original != null ? get.apply(original) : Collections.emptyMap(); - if (prev == null) prev = Collections.emptyMap(); - Map value = get.apply(command); - if (value == null) value = Collections.emptyMap(); - return estimateMapChanges(prev, value); - } - public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordSafeCommand liveCommand, long timestampMicros) { return getCommandMutation(commandStore.id(), liveCommand.original(), liveCommand.current(), timestampMicros); @@ -856,21 +777,8 @@ public static Mutation getCommandMutation(int storeId, Command original, Command addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); - addSetChanges(CommandsColumns.listeners, Command::durableListeners, v -> serialize(v, LocalVersionedSerializers.listeners), builder, timestampMicros, nowInSeconds, original, command); addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.promised_ballot, Command::promised, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.accepted_ballot, Command::acceptedOrCommitted, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - if (command.txnId().kind().awaitsOnlyDeps()) - addCellIfModified(CommandsColumns.execute_atleast, Command::executesAtLeast, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); - - if (command.isStable() && !command.isTruncated()) - { - Command.Committed committed = command.asCommitted(); - Command.Committed originalCommitted = original != null && original.isCommitted() ? original.asCommitted() : null; - if (originalCommitted == null || committed.waitingOn != originalCommitted.waitingOn) - builder.addCell(live(CommandsColumns.waiting_on, timestampMicros, WaitingOnSerializer.serialize(committed.txnId(), committed.waitingOn))); - } Row row = builder.build(); if (row.columnCount() == 0) @@ -966,23 +874,6 @@ private static T deserializeTimestampOrDefault(UntypedResu return deserializeTimestampOrDefault(row.getBlob(name), ByteBufferAccessor.instance, factory, defaultVal); } - private static ByteBuffer bytesOrNull(Row row, ColumnMetadata column) - { - Cell cell = row.getCell(column); - return cell != null && !cell.isTombstone() ? cell.buffer() : null; - } - - private static T deserializeTimestampOrDefault(Row row, ColumnMetadata column, TimestampFactory factory, T valIfNull) - { - ByteBuffer bytes = bytesOrNull(row, column); - if (bytes == null) - return valIfNull; - T result = deserializeTimestampOrNull(bytes, factory); - if (result == null) - return valIfNull; - return result; - } - public static Durability deserializeDurabilityOrNull(Cell cell) { return cell == null ? null : CommandSerializers.durability.forOrdinal(cell.accessor().getInt(cell.value(), 0)); @@ -993,6 +884,7 @@ public static SaveStatus deserializeSaveStatusOrNull(Cell cell) return cell == null ? null : CommandSerializers.saveStatus.forOrdinal(cell.accessor().getInt(cell.value(), 0)); } + @VisibleForTesting public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId txnId) { String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + COMMANDS + ' ' + @@ -1006,12 +898,6 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t txnId.msb, txnId.lsb, txnId.node.id); } - public static void findAllCommandsByDomain(int commandStore, Routable.Domain domain, Set columns, Observable callback) - { - WalkCommandsForDomain work = new WalkCommandsForDomain(commandStore, domain, columns, Stage.READ.executor(), callback); - work.schedule(); - } - private static abstract class TableWalk implements Runnable, DebuggableTask { private final long creationTimeNanos = Global.nanoTime(); @@ -1209,47 +1095,6 @@ protected UntypedResultSet query(UntypedResultSet.Row lastSeen) } } - public static Command loadCommand(AccordCommandStore commandStore, TxnId txnId) - { - commandStore.checkNotInStoreThread(); - return unsafeLoadCommand(commandStore, txnId); - } - - static Command unsafeLoadCommand(AccordCommandStore commandStore, TxnId txnId) - { - UntypedResultSet rows = loadCommandRow(commandStore, txnId); - if (rows.isEmpty()) - return null; - UntypedResultSet.Row row = rows.one(); - - try - { - checkState(deserializeTxnId(row).equals(txnId)); - - CommonAttributes.Mutable attrs = - new CommonAttributes.Mutable(txnId) - .durability(deserializeDurability(row)) - .route(deserializeRouteOrNull(row)) - .setListeners(deserializeListeners(row)); - SaveStatus status = deserializeStatus(row); - - Timestamp executeAt = deserializeExecuteAtOrNull(row); - Ballot promised = deserializePromisedOrNull(row); - Ballot accepted = deserializeAcceptedOrNull(row); - Timestamp executeAtLeast = status.is(Status.Truncated) && txnId.kind().awaitsOnlyDeps() ? deserializeExecuteAtLeastOrNull(row) : null; - - WaitingOnProvider waitingOn = deserializeWaitingOn(txnId, row); - MessageProvider messages = commandStore.makeMessageProvider(txnId); - - return SerializerSupport.reconstruct(commandStore.agent(), commandStore.unsafeRangesForEpoch(), attrs, status, executeAt, executeAtLeast, promised, accepted, waitingOn, messages); - } - catch (Throwable t) - { - logger.error("Exception loading AccordCommand " + txnId, t); - throw Throwables.unchecked(t); - } - } - public static TxnId deserializeTxnId(UntypedResultSet.Row row) { return deserializeTimestampOrNull(row, "txn_id", TxnId::fromBits); @@ -1303,55 +1148,6 @@ private static Listeners.Immutable deserializeListeners(UntypedResultSet.Row row return new Listeners.Immutable(result); } - public static SaveStatus deserializeStatus(UntypedResultSet.Row row) - { - // TODO (performance, expected): something less brittle than ordinal, more efficient than values() - return SaveStatus.values()[row.getInt("status")]; - } - - public static Timestamp deserializeExecuteAtOrNull(UntypedResultSet.Row row) - { - return deserializeTimestampOrNull(row, "execute_at", Timestamp::fromBits); - } - - public static Timestamp deserializeExecuteAtLeastOrNull(UntypedResultSet.Row row) - { - return deserializeTimestampOrNull(row, "execute_atleast", Timestamp::fromBits); - } - - public static Ballot deserializePromisedOrNull(UntypedResultSet.Row row) - { - return deserializeTimestampOrNull(row.getBlob("promised_ballot"), Ballot::fromBits); - } - - public static Ballot deserializeAcceptedOrNull(UntypedResultSet.Row row) - { - return deserializeTimestampOrNull(row.getBlob("accepted_ballot"), Ballot::fromBits); - } - - private static WaitingOnProvider deserializeWaitingOn(TxnId txnId, UntypedResultSet.Row row) - { - ByteBuffer bytes = row.getBlob("waiting_on"); - - return (deps) -> - { - if (bytes == null) - return null; - - if (!bytes.hasRemaining()) - return WaitingOn.none(deps); - - try - { - return WaitingOnSerializer.deserialize(txnId, deps.keyDeps.keys(), deps.rangeDeps.txnIds(), bytes); - } - catch (IOException e) - { - throw Throwables.unchecked(e); - } - }; - } - public static PartitionKey deserializeKey(ByteBuffer buffer) { List split = KEY_TYPE.unpack(buffer, ByteBufferAccessor.instance); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 6009e52c3aa9..5c458aa45eb7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -91,12 +91,18 @@ public void set(Command command) this.current = command; } + @Override public Command original() { checkNotInvalidated(); return original; } + public SavedCommand.SavedDiff diff() + { + return SavedCommand.diff(original, current); + } + @Override public void preExecute() { @@ -125,14 +131,14 @@ public boolean invalidated() } @Override - public void addListener(Command.TransientListener listener) + public void addListener(TransientListener listener) { checkNotInvalidated(); global.addListener(listener); } @Override - public boolean removeListener(Command.TransientListener listener) + public boolean removeListener(TransientListener listener) { checkNotInvalidated(); return global.removeListener(listener); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index c4baa6bb3fdb..c6c8499880a0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -33,10 +33,6 @@ import accord.local.CommandStores.RangesForEpoch; import accord.local.NodeTimeService; import accord.local.PreLoadContext; -import accord.messages.BeginRecovery; - -import accord.messages.PreAccept; -import accord.messages.TxnRequest; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Deps; @@ -50,7 +46,6 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore { - private final long preAcceptTimeout; private final Map commands; private final NavigableMap commandsForKeys; private final NavigableMap timestampsForKeys; @@ -59,7 +54,6 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore commands, NavigableMap timestampsForKey, NavigableMap commandsForKey, @@ -67,7 +61,6 @@ private AccordSafeCommandStore(PreLoadContext context, AccordCommandStore commandStore) { super(context); - this.preAcceptTimeout = preAcceptTimeout; this.commands = commands; this.timestampsForKeys = timestampsForKey; this.commandsForKeys = commandsForKey; @@ -83,17 +76,7 @@ public static AccordSafeCommandStore create(PreLoadContext preLoadContext, @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { - long preAcceptTimeoutMicros = -1; - if ((preLoadContext instanceof PreAccept || preLoadContext instanceof BeginRecovery)) - { - TxnRequest preAccept = (TxnRequest) preLoadContext; - AccordJournal.RequestContext context = (AccordJournal.RequestContext) preAccept.replyContext(); - // TODO (required): SimulatedDepsTest and some other tests aren't calling preProcess, hence do not set context - if (context != null) - preAcceptTimeoutMicros = context.preAcceptTimeout(); - } - - return new AccordSafeCommandStore(preLoadContext, preAcceptTimeoutMicros, commands, timestampsForKey, commandsForKey, commandsForRanges, commandStore); + return new AccordSafeCommandStore(preLoadContext, commands, timestampsForKey, commandsForKey, commandsForRanges, commandStore); } @Override @@ -187,15 +170,6 @@ public NodeTimeService time() return commandStore.time(); } - @Override - public long preAcceptTimeout() - { - if (preAcceptTimeout == -1) - return super.preAcceptTimeout(); - - return preAcceptTimeout; - } - @Override public RangesForEpoch ranges() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index d4a7e67fc479..bce3d3368a11 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -611,7 +611,7 @@ private void handleLocalRequest(LocalRequest request, BiConsumer epochReady(Epoch epoch) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index e47fea9d6a26..92b54e28c7b0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -52,7 +52,7 @@ public void doVerb(Message message) throws IOException if (request.type().hasSideEffects()) { - journal.appendRemoteRequest(request, message); + journal.processRemoteRequest(request, message); return; } diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index 1338a21980f8..eb0f627c1c39 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -18,12 +18,20 @@ package org.apache.cassandra.service.accord; -import accord.local.SerializerSupport; -import accord.messages.Message; +import java.util.List; + +import accord.local.Command; import accord.primitives.TxnId; public interface IJournal { - SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId); - void appendMessageBlocking(Message message); + Command loadCommand(int commandStoreId, TxnId txnId); + + /** + * Append outcomes to the log. + * + * Returns whether an async flush was requested. If it returns false, all commands are guaranteed to be flushed by that time. + * If it returns false, onFlush runnable will run whenever flush is done. + */ + void appendCommand(int commandStoreId, List command, List sanityCheck, Runnable onFlush); } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java new file mode 100644 index 000000000000..04688e1f2a64 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; +import java.util.zip.Checksum; + +import accord.local.Node; +import accord.primitives.Timestamp; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.utils.ByteArrayUtil; + +import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; +import static org.apache.cassandra.db.TypeSizes.INT_SIZE; +import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; +import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; + +public final class JournalKey +{ + final Timestamp timestamp; + final AccordJournal.Type type; // TODO (desired): do we even need type here anymore? + final int commandStoreId; + + JournalKey(Timestamp timestamp, AccordJournal.Type type) + { + this(timestamp, type, -1); + } + + JournalKey(Timestamp timestamp, AccordJournal.Type type, int commandStoreId) + { + if (timestamp == null) throw new NullPointerException("Null timestamp for type " + type); + this.timestamp = timestamp; + this.type = type; + this.commandStoreId = commandStoreId; + } + + /** + * Support for (de)serializing and comparing record keys. + *

      + * Implements its own serialization and comparison for {@link Timestamp} to satisty + * {@link KeySupport} contract - puts hybrid logical clock ahead of epoch + * when ordering timestamps. This is done for more precise elimination of candidate + * segments by min/max record key in segment. + */ + static final KeySupport SUPPORT = new KeySupport<>() + { + private static final int HLC_OFFSET = 0; + private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; + private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; + private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; + private static final int CS_ID_OFFSET = TYPE_OFFSET + BYTE_SIZE; + + @Override + public int serializedSize(int userVersion) + { + return LONG_SIZE // timestamp.hlc() + + 6 // timestamp.epoch() + + 2 // timestamp.flags() + + INT_SIZE // timestamp.node + + BYTE_SIZE // type + + SHORT_SIZE; // commandStoreId + } + + @Override + public void serialize(JournalKey key, DataOutputPlus out, int userVersion) throws IOException + { + serializeTimestamp(key.timestamp, out); + out.writeByte(key.type.id); + out.writeShort(key.commandStoreId); + } + + private void serialize(JournalKey key, byte[] out) + { + serializeTimestamp(key.timestamp, out); + out[20] = (byte) (key.type.id & 0xFF); + ByteArrayUtil.putShort(out, 21, (short) key.commandStoreId); + } + + @Override + public JournalKey deserialize(DataInputPlus in, int userVersion) throws IOException + { + Timestamp timestamp = deserializeTimestamp(in); + int type = in.readByte(); + int commandStoreId = in.readShort(); + return new JournalKey(timestamp, AccordJournal.Type.fromId(type), commandStoreId); + } + + @Override + public JournalKey deserialize(ByteBuffer buffer, int position, int userVersion) + { + Timestamp timestamp = deserializeTimestamp(buffer, position); + int type = buffer.get(position + TYPE_OFFSET); + int commandStoreId = buffer.getShort(position + CS_ID_OFFSET); + return new JournalKey(timestamp, AccordJournal.Type.fromId(type), commandStoreId); + } + + private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException + { + out.writeLong(timestamp.hlc()); + out.writeLong(epochAndFlags(timestamp)); + out.writeInt(timestamp.node.id); + } + + private Timestamp deserializeTimestamp(DataInputPlus in) throws IOException + { + long hlc = in.readLong(); + long epochAndFlags = in.readLong(); + int nodeId = in.readInt(); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Node.Id(nodeId)); + } + + private void serializeTimestamp(Timestamp timestamp, byte[] out) + { + ByteArrayUtil.putLong(out, 0, timestamp.hlc()); + ByteArrayUtil.putLong(out, 8, epochAndFlags(timestamp)); + ByteArrayUtil.putInt(out, 16, timestamp.node.id); + } + + private Timestamp deserializeTimestamp(ByteBuffer buffer, int position) + { + long hlc = buffer.getLong(position + HLC_OFFSET); + long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); + int nodeId = buffer.getInt(position + NODE_OFFSET); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Node.Id(nodeId)); + } + + @Override + public void updateChecksum(Checksum crc, JournalKey key, int userVersion) + { + byte[] out = AccordJournal.keyCRCBytes.get(); + serialize(key, out); + crc.update(out, 0, out.length); + } + + @Override + public int compareWithKeyAt(JournalKey k, ByteBuffer buffer, int position, int userVersion) + { + int cmp = compareWithTimestampAt(k.timestamp, buffer, position); + if (cmp != 0) return cmp; + + byte type = buffer.get(position + TYPE_OFFSET); + cmp = Byte.compare((byte) k.type.id, type); + if (cmp != 0) return cmp; + + short commandStoreId = buffer.getShort(position + CS_ID_OFFSET); + cmp = Short.compare((byte) k.commandStoreId, commandStoreId); + return cmp; + } + + private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int position) + { + long hlc = buffer.getLong(position + HLC_OFFSET); + int cmp = Long.compareUnsigned(timestamp.hlc(), hlc); + if (cmp != 0) return cmp; + + long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); + cmp = Long.compareUnsigned(epochAndFlags(timestamp), epochAndFlags); + if (cmp != 0) return cmp; + + int nodeId = buffer.getInt(position + NODE_OFFSET); + cmp = Integer.compareUnsigned(timestamp.node.id, nodeId); + return cmp; + } + + @Override + public int compare(JournalKey k1, JournalKey k2) + { + int cmp = compare(k1.timestamp, k2.timestamp); + if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); + if (cmp == 0) cmp = Short.compare((short) k1.commandStoreId, (short) k2.commandStoreId); + return cmp; + } + + private int compare(Timestamp timestamp1, Timestamp timestamp2) + { + int cmp = Long.compareUnsigned(timestamp1.hlc(), timestamp2.hlc()); + if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(timestamp1), epochAndFlags(timestamp2)); + if (cmp == 0) cmp = Integer.compareUnsigned(timestamp1.node.id, timestamp2.node.id); + return cmp; + } + + private long epochAndFlags(Timestamp timestamp) + { + return (timestamp.epoch() << 16) | (long) timestamp.flags(); + } + + private long epoch(long epochAndFlags) + { + return epochAndFlags >>> 16; + } + + private int flags(long epochAndFlags) + { + return (int) (epochAndFlags & ((1 << 16) - 1)); + } + }; + + @Override + public boolean equals(Object other) + { + if (this == other) + return true; + return (other instanceof JournalKey) && equals((JournalKey) other); + } + + boolean equals(JournalKey other) + { + return this.type == other.type && + this.timestamp.equals(other.timestamp) && + this.commandStoreId == other.commandStoreId; + } + + @Override + public int hashCode() + { + return Objects.hash(timestamp, type, commandStoreId); + } + + public String toString() + { + return "Key{" + + "timestamp=" + timestamp + + ", type=" + type + + ", commandStoreId=" + commandStoreId + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java new file mode 100644 index 000000000000..fb7034fcd1ed --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -0,0 +1,612 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Result; +import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.Listeners; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Route; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.primitives.Writes; +import accord.utils.Invariants; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.journal.ValueSerializer; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.service.accord.serializers.DepsSerializer; +import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; +import org.apache.cassandra.utils.Throwables; + +import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; + +public class SavedCommand +{ + public static final ValueSerializer serializer = new SavedCommandSerializer(); + + // This enum is order-dependent + private enum HasFields + { + TXN_ID, + EXECUTE_AT, + SAVE_STATUS, + DURABILITY, + ACCEPTED, + PROMISED, + ROUTE, + PARTIAL_TXN, + PARTIAL_DEPS, + ADDITIONAL_KEYS, + WAITING_ON, + WRITES, + LISTENERS + } + + public final TxnId txnId; + + public final Timestamp executeAt; + public final SaveStatus saveStatus; + public final Status.Durability durability; + + public final Ballot acceptedOrCommitted; + public final Ballot promised; + + public final Route route; + public final PartialTxn partialTxn; + public final PartialDeps partialDeps; + public final Seekables additionalKeysOrRanges; + + public final Writes writes; + public final Listeners.Immutable listeners; + + public SavedCommand(TxnId txnId, + Timestamp executeAt, + SaveStatus saveStatus, + Status.Durability durability, + + Ballot acceptedOrCommitted, + Ballot promised, + + Route route, + PartialTxn partialTxn, + PartialDeps partialDeps, + Seekables additionalKeysOrRanges, + + Writes writes, + Listeners.Immutable listeners) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.saveStatus = saveStatus; + this.durability = durability; + + this.acceptedOrCommitted = acceptedOrCommitted; + this.promised = promised; + + this.route = route; + this.partialTxn = partialTxn; + this.partialDeps = partialDeps; + this.additionalKeysOrRanges = additionalKeysOrRanges; + + this.writes = writes; + this.listeners = listeners; + } + + public static SavedDiff diff(Command before, Command after) + { + if (before == after) + return null; + + // TODO: we do not need to save `waitingOn` _every_ time. + Command.WaitingOn waitingOn = getWaitingOn(after); + return new SavedDiff(after.txnId(), + ifNotEqual(before, after, Command::executeAt, true), + ifNotEqual(before, after, Command::saveStatus, false), + ifNotEqual(before, after, Command::durability, false), + + ifNotEqual(before, after, Command::acceptedOrCommitted, false), + ifNotEqual(before, after, Command::promised, false), + + ifNotEqual(before, after, Command::route, true), + ifNotEqual(before, after, Command::partialTxn, false), + ifNotEqual(before, after, Command::partialDeps, false), + ifNotEqual(before, after, Command::additionalKeysOrRanges, false), + + waitingOn, + ifNotEqual(before, after, Command::writes, false), + ifNotEqual(before, after, Command::durableListeners, true)); + } + + static Command reconstructFromDiff(List diffs) + { + return reconstructFromDiff(diffs, CommandSerializers.APPLIED); + } + + /** + * @param result is exposed because we are _not_ persisting result, since during loading or replay + * we do not expect we will have to send a result to the client, and data results + * can potentially contain a large number of entries, so it's best if they are not + * written into the log. + */ + @VisibleForTesting + static Command reconstructFromDiff(List diffs, Result result) + { + TxnId txnId = null; + + Timestamp executeAt = null; + SaveStatus saveStatus = null; + Status.Durability durability = null; + + Ballot acceptedOrCommitted = Ballot.ZERO; + Ballot promised = null; + + Route route = null; + PartialTxn partialTxn = null; + PartialDeps partialDeps = null; + Seekables additionalKeysOrRanges = null; + + WaitingOnProvider waitingOnProvider = null; + Writes writes = null; + Listeners.Immutable listeners = null; + + for (LoadedDiff diff : diffs) + { + if (diff.txnId != null) + txnId = diff.txnId; + if (diff.executeAt != null) + executeAt = diff.executeAt; + if (diff.saveStatus != null) + saveStatus = diff.saveStatus; + if (diff.durability != null) + durability = diff.durability; + + if (diff.acceptedOrCommitted != null) + acceptedOrCommitted = diff.acceptedOrCommitted; + if (diff.promised != null) + promised = diff.promised; + + if (diff.route != null) + route = diff.route; + if (diff.partialTxn != null) + partialTxn = diff.partialTxn; + if (diff.partialDeps != null) + partialDeps = diff.partialDeps; + if (diff.additionalKeysOrRanges != null) + additionalKeysOrRanges = diff.additionalKeysOrRanges; + + if (diff.waitingOn != null) + waitingOnProvider = diff.waitingOn; + if (diff.writes != null) + writes = diff.writes; + if (diff.listeners != null) + listeners = diff.listeners; + } + + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + if (partialTxn != null) + attrs.partialTxn(partialTxn); + if (durability != null) + attrs.durability(durability); + if (route != null) + attrs.route(route); + if (partialDeps != null && + (saveStatus.known.deps != Status.KnownDeps.NoDeps && + saveStatus.known.deps != Status.KnownDeps.DepsErased && + saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) + attrs.partialDeps(partialDeps); + if (additionalKeysOrRanges != null) + attrs.additionalKeysOrRanges(additionalKeysOrRanges); + if (listeners != null && !listeners.isEmpty()) + attrs.setListeners(listeners); + + Command.WaitingOn waitingOn = null; + if (waitingOnProvider != null) + waitingOn = waitingOnProvider.provide(txnId, partialDeps); + + Invariants.checkState(saveStatus != null, + "Save status is null after applying %s", diffs); + switch (saveStatus.status) + { + case NotDefined: + return saveStatus == SaveStatus.Uninitialised ? Command.NotDefined.uninitialised(attrs.txnId()) + : Command.NotDefined.notDefined(attrs, promised); + case PreAccepted: + return Command.PreAccepted.preAccepted(attrs, executeAt, promised); + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); + case Committed: + case Stable: + return Command.Committed.committed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn); + case PreApplied: + case Applied: + return Command.Executed.executed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn, writes, result); + case Truncated: + case Invalidated: + default: + throw new IllegalStateException(); + } + } + + // TODO (required): this convert function was added only because AsyncOperationTest was failing without it; maybe after switching to loading from the log we can just pass l and r directly or remove != null checks. + private static VAL ifNotEqual(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch) + { + VAL l = null; + VAL r = null; + if (lo != null) l = convert.apply(lo); + if (ro != null) r = convert.apply(ro); + + if (l == r) + return null; + if (l == null || r == null) + return r; + assert allowClassMismatch || l.getClass() == r.getClass() : String.format("%s != %s", l.getClass(), r.getClass()); + + if (l.equals(r)) + return null; + + return r; + } + + static Command.WaitingOn getWaitingOn(Command command) + { + if (command instanceof Command.Committed) + return command.asCommitted().waitingOn(); + + return null; + } + + public static class SavedDiff extends SavedCommand + { + public final Command.WaitingOn waitingOn; + + public SavedDiff(TxnId txnId, + Timestamp executeAt, + SaveStatus saveStatus, + Status.Durability durability, + + Ballot acceptedOrCommitted, + Ballot promised, + + Route route, + PartialTxn partialTxn, + PartialDeps partialDeps, + Seekables additionalKeysOrRanges, + + Command.WaitingOn waitingOn, + Writes writes, + Listeners.Immutable listeners) + { + super(txnId, executeAt, saveStatus, durability, acceptedOrCommitted, promised, route, partialTxn, partialDeps, additionalKeysOrRanges, writes, listeners); + this.waitingOn = waitingOn; + } + + @Override + public String toString() + { + return "SavedDiff{" + + " txnId=" + txnId + + ", executeAt=" + executeAt + + ", saveStatus=" + saveStatus + + ", durability=" + durability + + ", acceptedOrCommitted=" + acceptedOrCommitted + + ", promised=" + promised + + ", route=" + route + + ", partialTxn=" + partialTxn + + ", partialDeps=" + partialDeps + + ", writes=" + writes + + ", waitingOn=" + waitingOn + + '}'; + } + } + + public static class LoadedDiff extends SavedCommand + { + public final WaitingOnProvider waitingOn; + + public LoadedDiff(TxnId txnId, + Timestamp executeAt, + SaveStatus saveStatus, + Status.Durability durability, + + Ballot acceptedOrCommitted, + Ballot promised, + + Route route, + PartialTxn partialTxn, + PartialDeps partialDeps, + Seekables additionalKeysOrRanges, + + WaitingOnProvider waitingOn, + Writes writes, + Listeners.Immutable listeners) + { + super(txnId, executeAt, saveStatus, durability, acceptedOrCommitted, promised, route, partialTxn, partialDeps, additionalKeysOrRanges, writes, listeners); + this.waitingOn = waitingOn; + } + + public String toString() + { + return "LoadedDiff{" + + "waitingOn=" + waitingOn + + '}'; + } + } + + final static class SavedCommandSerializer implements ValueSerializer + { + @Override + public int serializedSize(JournalKey key, Object value, int userVersion) + { + SavedDiff diff = (SavedDiff) value; + long size = 0; + size += SHORT_SIZE; // flags + + if (diff.txnId != null) + size += CommandSerializers.txnId.serializedSize(diff.txnId, userVersion); + if (diff.executeAt != null) + size += CommandSerializers.timestamp.serializedSize(diff.executeAt, userVersion); + if (diff.saveStatus != null) + size += Integer.BYTES; + if (diff.durability != null) + size += Integer.BYTES; + + if (diff.acceptedOrCommitted != null) + size += CommandSerializers.ballot.serializedSize(diff.acceptedOrCommitted, userVersion); + if (diff.promised != null) + size += CommandSerializers.ballot.serializedSize(diff.promised, userVersion); + + if (diff.route != null) + size += AccordKeyspace.LocalVersionedSerializers.route.serializedSize(diff.route); + if (diff.partialTxn != null) + CommandSerializers.partialTxn.serializedSize(diff.partialTxn, userVersion); + if (diff.partialDeps != null) + DepsSerializer.partialDeps.serializedSize(diff.partialDeps, userVersion); + if (diff.additionalKeysOrRanges != null) + KeySerializers.seekables.serializedSize(diff.additionalKeysOrRanges, userVersion); + + if (diff.waitingOn != null) + { + size += Integer.BYTES; + size += WaitingOnSerializer.serializedSize(diff.txnId, diff.waitingOn); + } + + if (diff.writes != null) + CommandSerializers.writes.serializedSize(diff.writes, userVersion); + + if (diff.listeners != null && !diff.listeners.isEmpty()) + { + size += Byte.BYTES; + for (Command.DurableAndIdempotentListener listener : diff.listeners) + size += AccordKeyspace.LocalVersionedSerializers.listeners.serializedSize(listener); + } + return (int) size; + } + + @Override + public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) throws IOException + { + SavedDiff diff = (SavedDiff) value; + int flags = getFlags(diff); + + out.writeShort(flags); + + if (diff.txnId != null) + CommandSerializers.txnId.serialize(diff.txnId, out, userVersion); + if (diff.executeAt != null) + CommandSerializers.timestamp.serialize(diff.executeAt, out, userVersion); + if (diff.saveStatus != null) + out.writeInt(diff.saveStatus.ordinal()); + if (diff.durability != null) + out.writeInt(diff.durability.ordinal()); + + if (diff.acceptedOrCommitted != null) + CommandSerializers.ballot.serialize(diff.acceptedOrCommitted, out, userVersion); + if (diff.promised != null) + CommandSerializers.ballot.serialize(diff.promised, out, userVersion); + + if (diff.route != null) + AccordKeyspace.LocalVersionedSerializers.route.serialize(diff.route, out); // TODO (required): user version + if (diff.partialTxn != null) + CommandSerializers.partialTxn.serialize(diff.partialTxn, out, userVersion); + if (diff.partialDeps != null) + DepsSerializer.partialDeps.serialize(diff.partialDeps, out, userVersion); + if (diff.additionalKeysOrRanges != null) + KeySerializers.seekables.serialize(diff.additionalKeysOrRanges, out, userVersion); + + if (diff.waitingOn != null) + { + long size = WaitingOnSerializer.serializedSize(diff.txnId, diff.waitingOn); + ByteBuffer serialized = WaitingOnSerializer.serialize(diff.txnId, diff.waitingOn); + out.writeInt((int) size); + out.write(serialized); + } + + if (diff.writes != null) + CommandSerializers.writes.serialize(diff.writes, out, userVersion); + + if (diff.listeners != null && !diff.listeners.isEmpty()) + { + out.writeByte(diff.listeners.size()); + for (Command.DurableAndIdempotentListener listener : diff.listeners) + AccordKeyspace.LocalVersionedSerializers.listeners.serialize(listener, out); + } + + } + + private static int getFlags(SavedDiff diff) + { + int flags = 0; + + if (diff.txnId != null) + flags = setBit(flags, HasFields.TXN_ID.ordinal()); + if (diff.executeAt != null) + flags = setBit(flags, HasFields.EXECUTE_AT.ordinal()); + if (diff.saveStatus != null) + flags = setBit(flags, HasFields.SAVE_STATUS.ordinal()); + if (diff.durability != null) + flags = setBit(flags, HasFields.DURABILITY.ordinal()); + + if (diff.acceptedOrCommitted != null) + flags = setBit(flags, HasFields.ACCEPTED.ordinal()); + if (diff.promised != null) + flags = setBit(flags, HasFields.PROMISED.ordinal()); + + if (diff.route != null) + flags = setBit(flags, HasFields.ROUTE.ordinal()); + if (diff.partialTxn != null) + flags = setBit(flags, HasFields.PARTIAL_TXN.ordinal()); + if (diff.partialDeps != null) + flags = setBit(flags, HasFields.PARTIAL_DEPS.ordinal()); + if (diff.additionalKeysOrRanges != null) + flags = setBit(flags, HasFields.ADDITIONAL_KEYS.ordinal()); + + if (diff.waitingOn != null) + flags = setBit(flags, HasFields.WAITING_ON.ordinal()); + if (diff.writes != null) + flags = setBit(flags, HasFields.WRITES.ordinal()); + if (diff.listeners != null && !diff.listeners.isEmpty()) + flags = setBit(flags, HasFields.LISTENERS.ordinal()); + return flags; + } + + @Override + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException + { + int flags = in.readShort(); + + TxnId txnId = null; + Timestamp executedAt = null; + SaveStatus saveStatus = null; + Status.Durability durability = null; + + Ballot acceptedOrCommitted = null; + Ballot promised = null; + Route route = null; + + PartialTxn partialTxn = null; + PartialDeps partialDeps = null; + Seekables additionalKeysOrRanges = null; + + WaitingOnProvider waitingOn = (txn, deps) -> null; + Writes writes = null; + Listeners.Immutable listeners = null; + + if (isSet(flags, HasFields.TXN_ID.ordinal())) + txnId = CommandSerializers.txnId.deserialize(in, userVersion); + if (isSet(flags, HasFields.EXECUTE_AT.ordinal())) + executedAt = CommandSerializers.timestamp.deserialize(in, userVersion); + if (isSet(flags, HasFields.SAVE_STATUS.ordinal())) + saveStatus = SaveStatus.values()[in.readInt()]; + if (isSet(flags, HasFields.DURABILITY.ordinal())) + durability = Status.Durability.values()[in.readInt()]; + + if (isSet(flags, HasFields.ACCEPTED.ordinal())) + acceptedOrCommitted = CommandSerializers.ballot.deserialize(in, userVersion); + if (isSet(flags, HasFields.PROMISED.ordinal())) + promised = CommandSerializers.ballot.deserialize(in, userVersion); + + if (isSet(flags, HasFields.ROUTE.ordinal())) + route = AccordKeyspace.LocalVersionedSerializers.route.deserialize(in); + if (isSet(flags, HasFields.PARTIAL_TXN.ordinal())) + partialTxn = CommandSerializers.partialTxn.deserialize(in, userVersion); + if (isSet(flags, HasFields.PARTIAL_DEPS.ordinal())) + partialDeps = DepsSerializer.partialDeps.deserialize(in, userVersion); + if (isSet(flags, HasFields.ADDITIONAL_KEYS.ordinal())) + additionalKeysOrRanges = KeySerializers.seekables.deserialize(in, userVersion); + + if (isSet(flags, HasFields.WAITING_ON.ordinal())) + { + int size = in.readInt(); + byte[] bytes = new byte[size]; + in.readFully(bytes); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + waitingOn = (localTxnId, deps) -> { + try + { + return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps.txnIds(), buffer); + } + catch (IOException e) + { + throw Throwables.unchecked(e); + } + }; + } + if (isSet(flags, HasFields.WRITES.ordinal())) + writes = CommandSerializers.writes.deserialize(in, userVersion); + + if (isSet(flags, HasFields.LISTENERS.ordinal())) + { + Listeners builder = Listeners.Immutable.EMPTY.mutable(); + int cnt = in.readByte(); + for (int i = 0; i < cnt; i++) + builder.add(AccordKeyspace.LocalVersionedSerializers.listeners.deserialize(in)); + listeners = new Listeners.Immutable(builder); + } + + return new LoadedDiff(txnId, + executedAt, + saveStatus, + durability, + + acceptedOrCommitted, + promised, + + route, + partialTxn, + partialDeps, + additionalKeysOrRanges, + + waitingOn, + writes, + listeners); + } + } + + static int setBit(int value, int bit) + { + return value | (1 << bit); + } + + static boolean isSet(int value, int bit) + { + return (value & (1 << bit)) != 0; + } + + public interface WaitingOnProvider + { + Command.WaitingOn provide(TxnId txnId, PartialDeps deps); + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 72e673f48bb8..0e63881bf9f8 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -17,7 +17,9 @@ */ package org.apache.cassandra.service.accord.async; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.TreeMap; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -29,6 +31,7 @@ import org.slf4j.MDC; import accord.api.Key; +import accord.local.Command; import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; @@ -36,6 +39,7 @@ import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChains; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordSafeCommand; import org.apache.cassandra.service.accord.AccordSafeCommandStore; @@ -43,6 +47,7 @@ import org.apache.cassandra.service.accord.AccordSafeCommandsForRanges; import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; +import org.apache.cassandra.service.accord.SavedCommand; import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.COMPLETING; @@ -90,7 +95,7 @@ void revertChanges() enum State { - INITIALIZED, LOADING, PREPARING, RUNNING, COMPLETING, FINISHED, FAILED; + INITIALIZED, LOADING, PREPARING, RUNNING, COMPLETING, AWAITING_FLUSH, FINISHED, FAILED; boolean isComplete() { @@ -108,6 +113,8 @@ boolean isComplete() private final String loggingId; private BiConsumer callback; + private List sanityCheck = null; + private void setLoggingIds() { MDC.put(LoggingProps.COMMAND_STORE, commandStore.loggingId); @@ -159,11 +166,6 @@ private void onLoaded(Object o, Throwable throwable) } } - void onUnblocked() - { - commandStore.executor().execute(this); - } - private void state(State state) { this.state = state; @@ -182,12 +184,6 @@ private void finish(R result, Throwable failure) } } - @Nullable - TxnId primaryTxnId() - { - return preLoadContext.primaryTxnId(); - } - @SuppressWarnings("unchecked") Seekables keys() { @@ -215,7 +211,6 @@ private void fail(Throwable throwable) commandStore.abortCurrentOperation(); case LOADING: context.releaseResources(commandStore); - commandStore.executionOrder().unregisterOutOfOrder(this); case INITIALIZED: break; // nothing to clean up, call callback } @@ -233,30 +228,50 @@ private void fail(Throwable throwable) protected void runInternal() { - Boolean canRun = null; switch (state) { default: throw new IllegalStateException("Unexpected state " + state); case INITIALIZED: - canRun = commandStore.executionOrder().register(this); - if (Invariants.isParanoid()) - Invariants.checkState(canRun.booleanValue() == commandStore.executionOrder().canRun(this), "Register of %s returned canRun=%s but canRun returned %s!", this, canRun, !canRun); state(LOADING); case LOADING: - if (null == canRun) - canRun = commandStore.executionOrder().canRun(this); - if (!loader.load(context, this::onLoaded) || !canRun) + if (!loader.load(context, this::onLoaded)) return; state(PREPARING); case PREPARING: safeStore = commandStore.beginOperation(preLoadContext, context.commands, context.timestampsForKey, context.commandsForKey, context.commandsForRanges); state(RUNNING); case RUNNING: + result = apply(safeStore); + // TODO (required): currently, we are not very efficient about ensuring that we persist the absolute minimum amount of state. Improve that. + List diffs = null; + for (AccordSafeCommand commandState : context.commands.values()) + { + SavedCommand.SavedDiff diff = commandState.diff(); + if (diff != null) + { + if (diffs == null) + diffs = new ArrayList<>(context.commands.size()); + diffs.add(diff); + if (CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED.getBoolean()) + { + if (sanityCheck == null) + sanityCheck = new ArrayList<>(context.commands.size()); + sanityCheck.add(commandState.current()); + } + } + } + safeStore.postExecute(context.commands, context.timestampsForKey, context.commandsForKey, context.commandsForRanges); context.releaseResources(commandStore); commandStore.completeOperation(safeStore); - commandStore.executionOrder().unregister(this); + if (diffs != null) + { + state(COMPLETING); + this.commandStore.appendCommands(diffs, sanityCheck, () -> finish(result, null)); + return; + } + state(COMPLETING); case COMPLETING: finish(result, null); @@ -326,6 +341,7 @@ public static AsyncOperation create(CommandStore commandStore, PreLoadCon return new ForFunction<>((AccordCommandStore) commandStore, loadCtx, function); } + // TODO (desired): these anonymous ops are somewhat tricky to debug. We may want to at least give them names. static class ForConsumer extends AsyncOperation { private final Consumer consumer; diff --git a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java b/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java deleted file mode 100644 index 715703acdee6..000000000000 --- a/src/java/org/apache/cassandra/service/accord/async/ExecutionOrder.java +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.cassandra.service.accord.async; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.IdentityHashMap; -import java.util.List; -import java.util.function.Consumer; - -import accord.api.Key; -import accord.api.RoutingKey; -import accord.primitives.Range; -import accord.primitives.Seekable; -import accord.primitives.TxnId; -import accord.utils.Invariants; -import org.agrona.collections.Object2ObjectHashMap; -import org.apache.cassandra.service.accord.RangeTreeRangeAccessor; -import org.apache.cassandra.utils.RTree; -import org.apache.cassandra.utils.RangeTree; - -/** - * Assists with correct ordering of {@link AsyncOperation} execution wrt each other, - * preventing reordering of overlapping operations by {@link AsyncLoader}. - */ -public class ExecutionOrder -{ - private static class Conflicts - { - private final List keyConflicts; - private final List rangeConflicts; - - private Conflicts(List keyConflicts, List rangeConflicts) - { - this.keyConflicts = keyConflicts; - this.rangeConflicts = rangeConflicts; - } - } - private class RangeState - { - private final Range range; - private final IdentityHashMap, Conflicts> operationToConflicts = new IdentityHashMap<>(); - private Object operationOrQueue; - - public RangeState(Range range, List keyConflicts, List rangeConflicts, AsyncOperation operation) - { - this.range = range; - this.operationOrQueue = operation; - add(operation, keyConflicts, rangeConflicts); - } - - public void add(AsyncOperation operation, List keyConflicts, List rangeConflicts) - { - operationToConflicts.put(operation, new Conflicts(keyConflicts, rangeConflicts)); - } - - boolean canRun(AsyncOperation operation) - { - if (operationOrQueue instanceof AsyncOperation) - { - Invariants.checkState(operationOrQueue == operation); - return true; - } - else - { - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - return queue.peek() == operation; - } - } - - Conflicts remove(AsyncOperation operation, boolean allowOutOfOrder) - { - unregister("range", range, operationOrQueue, operation, allowOutOfOrder, () -> rangeQueues.remove(range)); - return operationToConflicts.remove(operation); - } - - public Conflicts conflicts(AsyncOperation operation) - { - return operationToConflicts.get(operation); - } - } - - private final Object2ObjectHashMap queues = new Object2ObjectHashMap<>(); - private final RangeTree rangeQueues = RTree.create(RangeTreeRangeAccessor.instance); - - /** - * Register an operation as having a dependency on its keys and TxnIds - * @return true if no other operation depends on the keys or TxnIds, false otherwise - */ - boolean register(AsyncOperation operation) - { - boolean canRun = true; - for (Seekable seekable : operation.keys()) - { - switch (seekable.domain()) - { - case Key: - canRun &= register(seekable.asKey(), operation); - break; - case Range: - canRun &= register(seekable.asRange(), operation); - break; - default: - throw new AssertionError("Unexpected domain: " + seekable.domain()); - } - } - TxnId primaryTxnId = operation.primaryTxnId(); - if (null != primaryTxnId) - canRun &= register(primaryTxnId, operation); - return canRun; - } - - private boolean register(Range range, AsyncOperation operation) - { - // Ranges depend on Ranges and Keys - // Keys depend on Keys... - // This adds a complication to this logic as keys should be able to make progress regardless of ranges, but rangest must depend on keys - List keyConflicts = null; - for (Object o : queues.keySet()) - { - if (!(o instanceof Key)) - continue; - Key key = (Key) o; - if (!range.contains(key)) - continue; - if (keyConflicts == null) - keyConflicts = new ArrayList<>(); - keyConflicts.add(key); - } - if (keyConflicts != null) - keyConflicts.forEach(k -> register(k, operation)); - - class Result - { - RangeState sameRange = null; - List rangeConflicts = null; - } - Result result = new Result(); - rangeQueues.search(range, e -> { - if (range.equals(e.getKey())) - result.sameRange = e.getValue(); - else - { - if (result.rangeConflicts == null) - result.rangeConflicts = new ArrayList<>(); - result.rangeConflicts.add(e.getKey()); - } - RangeState state = e.getValue(); - // a single range could conflict with multiple other ranges, so it is possible that the operation - // exists in the queue already due to another range in the txn... simple example is - // keys = (0, 10], (12, 15] - // e.getKey() == (-100, 100] - // in this case the operation would attempt to double add since it has 2 keys that conflict with this single range - register(state.operationOrQueue, operation, q -> state.operationOrQueue = q); - }); - if (result.sameRange != null) - { - result.sameRange.add(operation, keyConflicts, result.rangeConflicts); - } - else - { - rangeQueues.add(range, new RangeState(range, keyConflicts, result.rangeConflicts, operation)); - } - return keyConflicts == null && result.rangeConflicts == null && result.sameRange == null; - } - - /** - * Register an operation as having a dependency on a key or a TxnId - * @return true if no other operation depends on the key/TxnId, false otherwise - */ - private boolean register(Object keyOrTxnId, AsyncOperation operation) - { - Object operationOrQueue = queues.get(keyOrTxnId); - if (null == operationOrQueue) - { - queues.put(keyOrTxnId, operation); - return true; - } - - register(operationOrQueue, operation, q -> queues.put(keyOrTxnId, q)); - return false; - } - - private void register(Object operationOrQueue, AsyncOperation operation, Consumer>> onCreateQueue) - { - if (operationOrQueue instanceof AsyncOperation) - { - Invariants.checkState(operationOrQueue != operation, "Attempted to double register operation %s", operation); - ArrayDeque> queue = new ArrayDeque<>(4); - queue.add((AsyncOperation) operationOrQueue); - queue.add(operation); - onCreateQueue.accept(queue); - } - else - { - @SuppressWarnings("unchecked") - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - queue.add(operation); - } - } - - /** - * Unregister the operation as being a dependency for its keys and TxnIds, but do so even if it is unable to run now. - */ - void unregisterOutOfOrder(AsyncOperation operation) - { - unregister(operation, true); - } - - /** - * Unregister the operation as being a dependency for its keys and TxnIds - */ - void unregister(AsyncOperation operation) - { - unregister(operation, false); - } - - private void unregister(AsyncOperation operation, boolean allowOutOfOrder) - { - for (Seekable seekable : operation.keys()) - { - switch (seekable.domain()) - { - case Key: - unregister(seekable.asKey(), operation, allowOutOfOrder); - break; - case Range: - unregister(seekable.asRange(), operation, allowOutOfOrder); - break; - default: - throw new AssertionError("Unexpected domain: " + seekable.domain()); - } - - } - TxnId primaryTxnId = operation.primaryTxnId(); - if (null != primaryTxnId) - unregister(primaryTxnId, operation, allowOutOfOrder); - } - - private void unregister(Range range, AsyncOperation operation, boolean allowOutOfOrder) - { - RangeState state = state(range); - Conflicts conflicts = state.remove(operation, allowOutOfOrder); - if (conflicts.rangeConflicts != null) - conflicts.rangeConflicts.forEach(r -> state(r).remove(operation, allowOutOfOrder)); - if (conflicts.keyConflicts != null) - conflicts.keyConflicts.forEach(k -> unregister(k, operation, allowOutOfOrder)); - } - - /** - * Unregister the operation as being a dependency for key or TxnId - */ - private void unregister(Object keyOrTxnId, AsyncOperation operation, boolean allowOutOfOrder) - { - Object operationOrQueue = queues.get(keyOrTxnId); - Invariants.nonNull(operationOrQueue); - - unregister("Key or TxnId", keyOrTxnId, operationOrQueue, operation, allowOutOfOrder, () -> queues.remove(keyOrTxnId)); - } - - private void unregister(String name, Object key, Object operationOrQueue, AsyncOperation operation, boolean allowOutOfOrder, Runnable onEmpty) - { - if (operationOrQueue instanceof AsyncOperation) - { - Invariants.checkState(operationOrQueue == operation, "Only single operation present and was not %s; %s %s", name, key); - onEmpty.run(); - } - else - { - @SuppressWarnings("unchecked") - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - if (allowOutOfOrder) - { - Invariants.checkState(queue.remove(operation), "Operation %s was not found in queue: %s; %s %s", operation, queue, name, key); - } - else - { - Invariants.checkState(queue.peek() == operation, "Operation %s is not at the top of the queue; %s; %s %s", operation, queue, name, key); - queue.poll(); - } - - if (queue.isEmpty()) - { - onEmpty.run(); - } - else - { - AsyncOperation next = queue.peek(); - if (next == operation) - { - // a single range could conflict with multiple other ranges, so it is possible that the operation - // exists in the queue already due to another range in the txn... simple example is - // keys = (0, 10], (12, 15] - // e.getKey() == (-100, 100] - // in this case the operation would attempt to double add since it has 2 keys that conflict with this single range - return; - } - if (canRun(next)) - next.onUnblocked(); - } - } - } - - boolean canRun(AsyncOperation operation) - { - for (Seekable seekable : operation.keys()) - { - switch (seekable.domain()) - { - case Key: - if (!canRun(seekable.asKey(), operation)) - return false; - break; - case Range: - if (!canRun(seekable.asRange(), operation)) - return false; - break; - default: - throw new AssertionError("Unexpected domain: " + seekable.domain()); - } - - } - - TxnId primaryTxnId = operation.primaryTxnId(); - return primaryTxnId == null || canRun(primaryTxnId, operation); - } - - private boolean canRun(Range range, AsyncOperation operation) - { - RangeState state = state(range); - if (!state.canRun(operation)) - return false; - Conflicts conflicts = state.conflicts(operation); - if (conflicts.rangeConflicts != null) - { - for (Range r : conflicts.rangeConflicts) - { - if (!state(r).canRun(operation)) - return false; - } - } - if (conflicts.keyConflicts != null) - { - for (Key key : conflicts.keyConflicts) - { - if (!canRun(key, operation)) - return false; - } - } - return true; - } - - private RangeState state(Range range) - { - List list = rangeQueues.get(range); - assert list.size() == 1 : String.format("Expected 1 element for range %s but saw list %s", range, list); - return list.get(0); - } - - private boolean canRun(Object keyOrTxnId, AsyncOperation operation) - { - Object operationOrQueue = queues.get(keyOrTxnId); - Invariants.nonNull(operationOrQueue); - - if (operationOrQueue instanceof AsyncOperation) - { - Invariants.checkState(operationOrQueue == operation); - return true; - } - - @SuppressWarnings("unchecked") - ArrayDeque> queue = (ArrayDeque>) operationOrQueue; - return queue.peek() == operation; - } -} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java new file mode 100644 index 000000000000..7f7dfb076746 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.junit.Test; + +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.shared.WithProperties; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.utils.concurrent.CountDownLatch; + +public class AccordJournalTest extends TestBaseImpl +{ + @Test + public void saveLoadSanityCheck() throws Throwable + { + String timeout = "10s"; + try (WithProperties wp = new WithProperties().set(CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED, "true"); + Cluster cluster = init(Cluster.build(1) + .withoutVNodes() + .withConfig(c -> c + .set("read_request_timeout", timeout) + .set("transaction_timeout", timeout) + ) + .start())) + { + final String TABLE = KEYSPACE + ".test_table"; + cluster.schemaChange("CREATE TABLE " + TABLE + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'"); + List threads = new ArrayList<>(); + int numThreads = 10; + CountDownLatch latch = CountDownLatch.newCountDownLatch(numThreads); + AtomicInteger counter = new AtomicInteger(); + for (int i = 0; i < numThreads; i++) + { + int finalI = i; + Thread t = new Thread(() -> { + latch.decrement(); + latch.awaitUninterruptibly(); + try + { + for (int j = 0; j < 100; j++) + { + cluster.coordinator(1).execute("BEGIN TRANSACTION\n" + + "INSERT INTO " + TABLE + "(k, c, v) VALUES (?, ?, ?);\n" + + "INSERT INTO " + TABLE + "(k, c, v) VALUES (?, ?, ?);\n" + + "COMMIT TRANSACTION", + ConsistencyLevel.ALL, + 1, j, finalI * 100 + j, + 2, j, finalI * 100 + j); + counter.incrementAndGet(); + } + } + catch (Throwable throwable) + { + throwable.printStackTrace(); + } + }); + t.start(); + threads.add(t); + } + for (Thread thread : threads) + thread.join(); + + cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); + } + } +} diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 53569af1b128..5a3b23076eec 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -35,9 +35,9 @@ import org.apache.cassandra.io.filesystem.ListenableFileSystem; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.journal.AsyncCallbacks; import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.journal.RecordPointer; import org.apache.cassandra.journal.ValueSerializer; import org.junit.Assert; @@ -60,28 +60,27 @@ public class AccordJournalSimulationTest extends SimulationTestBase public void simpleRWTest() { simulate(arr(() -> { - ListenableFileSystem fs = new ListenableFileSystem(Jimfs.newFileSystem()); - File.unsafeSetFilesystem(fs); - DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); // - DatabaseDescriptor.setCommitLogWriteDiskAccessMode(Config.DiskAccessMode.standard); - DatabaseDescriptor.initializeCommitLogDiskAccessMode(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - DatabaseDescriptor.setAccordJournalDirectory("/journal"); - new File("/journal").createDirectoriesIfNotExists(); - - DatabaseDescriptor.setDumpHeapOnUncaughtException(false); - - Keyspace.setInitialized(); - - State.journal = new Journal<>("AccordJournal", - new File("/journal"), - new AccordSpec.JournalSpec(), - new TestCallbacks(), - new IdentityKeySerializer(), - new IdentityValueSerializer()); - }), - () -> check()); + ListenableFileSystem fs = new ListenableFileSystem(Jimfs.newFileSystem()); + File.unsafeSetFilesystem(fs); + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); // + DatabaseDescriptor.setCommitLogWriteDiskAccessMode(Config.DiskAccessMode.standard); + DatabaseDescriptor.initializeCommitLogDiskAccessMode(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + DatabaseDescriptor.setAccordJournalDirectory("/journal"); + new File("/journal").createDirectoriesIfNotExists(); + + DatabaseDescriptor.setDumpHeapOnUncaughtException(false); + + Keyspace.setInitialized(); + + State.journal = new Journal<>("AccordJournal", + new File("/journal"), + new AccordSpec.JournalSpec(), + new IdentityKeySerializer(), + new IdentityValueSerializer()); + }), + () -> check()); } public static void check() @@ -93,7 +92,10 @@ public static void check() for (int i = 0; i < count; i++) { int finalI = i; - State.executor.submit(() -> State.journal.asyncWrite("test" + finalI, "test" + finalI, Collections.singleton(1), null)); + State.executor.submit(() -> { + RecordPointer ptr = State.journal.asyncWrite("test" + finalI, "test" + finalI, Collections.singleton(1)); + State.journal.onFlush(ptr, State.latch::decrement); + }); } State.latch.await(); @@ -123,34 +125,6 @@ public static void check() } } - public static class TestCallbacks implements AsyncCallbacks - { - - @Override - public void onWrite(long segment, int position, int size, String key, String value, Object writeContext) - { - State.latch.decrement(); - } - - @Override - public void onWriteFailed(String key, String value, Object writeContext, Throwable cause) - { - State.thrown.add(new IllegalStateException("Write failed for " + key)); - State.latch.decrement(); - } - - @Override - public void onFlush(long segment, int position) - { - } - - @Override - public void onFlushFailed(Throwable cause) - { - State.thrown.add(new RuntimeException("Could not flush", cause)); - } - } - @Isolated public static class IdentityValueSerializer implements ValueSerializer { diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java index f7e13c1927af..872f6611df1a 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java @@ -122,9 +122,9 @@ * * And then run your test using the following settings (omit add-* if you are running on jdk8): * - -Dstorage-config=$MODULE_DIR$/test/conf + -Dstorage-config=/Users/ifesdjeen/p/java/cassandra-accord/test/conf -Djava.awt.headless=true - -javaagent:$MODULE_DIR$/lib/jamm-0.4.0.jar + -javaagent:/Users/ifesdjeen/p/java/cassandra-accord/lib/jamm-0.4.0.jar -ea -Dcassandra.debugrefcount=true -Xss384k @@ -146,15 +146,15 @@ -Dcassandra.test.messagingService.nonGracefulShutdown=true -Dcassandra.use_nix_recursive_delete=true -Dcie-cassandra.disable_schema_drop_log=true - -Dlogback.configurationFile=file://$MODULE_DIR$/test/conf/logback-simulator.xml + -Dlogback.configurationFile=file:///Users/ifesdjeen/p/java/cassandra-accord/test/conf/logback-simulator.xml -Dcassandra.ring_delay_ms=10000 -Dcassandra.tolerate_sstable_size=true -Dcassandra.skip_sync=true -Dcassandra.debugrefcount=false -Dcassandra.test.simulator.determinismcheck=strict -Dcassandra.test.simulator.print_asm=none - -javaagent:$MODULE_DIR$/build/test/lib/jars/simulator-asm.jar - -Xbootclasspath/a:$MODULE_DIR$/build/test/lib/jars/simulator-bootstrap.jar + -javaagent:/Users/ifesdjeen/p/java/cassandra-accord/build/test/lib/jars/simulator-asm.jar + -Xbootclasspath/a:/Users/ifesdjeen/p/java/cassandra-accord/build/test/lib/jars/simulator-bootstrap.jar -XX:ActiveProcessorCount=4 -XX:-TieredCompilation -XX:-BackgroundCompilation diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index 337974345253..884272a2a714 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -246,7 +246,7 @@ private static void cleanupDirectory(File directory) } } - private static void cleanupDirectory(String dirName) + public static void cleanupDirectory(String dirName) { if (dirName != null) cleanupDirectory(new File(dirName)); diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java index 780a432647e7..6da221862fc8 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -42,6 +42,7 @@ import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_COUNTERS_IN_TXNS_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.SELECT_REFS_NEED_COLUMN_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE; import static org.apache.cassandra.cql3.statements.UpdateStatement.CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE; import static org.apache.cassandra.cql3.statements.UpdateStatement.UPDATING_PRIMARY_KEY_MESSAGE; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -57,18 +58,20 @@ public class TransactionStatementTest private static final TableId TABLE4_ID = TableId.fromString("00000000-0000-0000-0000-000000000004"); private static final TableId TABLE5_ID = TableId.fromString("00000000-0000-0000-0000-000000000005"); private static final TableId TABLE6_ID = TableId.fromString("00000000-0000-0000-0000-000000000006"); + private static final TableId TABLE7_ID = TableId.fromString("00000000-0000-0000-0000-000000000007"); @BeforeClass public static void beforeClass() throws Exception { SchemaLoader.prepareServer(); SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl1 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE1_ID), - parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c))", "ks").id(TABLE2_ID), - parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int)", "ks").id(TABLE3_ID), - parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list)", "ks").id(TABLE4_ID), - parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int)", "ks").id(TABLE5_ID), - parse("CREATE TABLE tbl6 (k int PRIMARY KEY, c counter)", "ks").id(TABLE6_ID)); + parse("CREATE TABLE tbl1 (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'", "ks").id(TABLE1_ID), + parse("CREATE TABLE tbl2 (k int, c int, v int, primary key (k, c)) WITH transactional_mode = 'full'", "ks").id(TABLE2_ID), + parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int) WITH transactional_mode = 'full'", "ks").id(TABLE3_ID), + parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list) WITH transactional_mode = 'full'", "ks").id(TABLE4_ID), + parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full'", "ks").id(TABLE5_ID), + parse("CREATE TABLE tbl6 (k int PRIMARY KEY, c counter) WITH transactional_mode = 'full'", "ks").id(TABLE6_ID), + parse("CREATE TABLE tbl7 (k int PRIMARY KEY, v int) WITH transactional_mode = 'off'", "ks").id(TABLE7_ID)); } @Test @@ -400,6 +403,43 @@ public void shouldRejectLetSelectWithIncompletePartitionKey() .hasMessageContaining(String.format(ILLEGAL_RANGE_QUERY_MESSAGE, "LET assignment row1", "at [2:15]")); } + @Test + public void shouldRejectLetSelectOnNonTransactionalTable() + { + String query = "BEGIN TRANSACTION\n" + + " LET row1 = (SELECT * FROM ks.tbl7 WHERE k = 0);\n" + + " INSERT INTO ks.tbl5 (k, v) VALUES (1, 2);\n" + + "COMMIT TRANSACTION;"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", "at [2:15]")); + } + + @Test + public void shouldRejectSelectOnNonTransactionalTable() + { + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl7 WHERE k = 0;\n" + + "COMMIT TRANSACTION;"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", "at [2:3]")); + } + + @Test + public void shouldRejectUpdateOnNonTransactionalTable() + { + String query = "BEGIN TRANSACTION\n" + + " INSERT INTO ks.tbl7 (k, v) VALUES (1, 2);\n" + + "COMMIT TRANSACTION;"; + + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "INSERT", "at [2:3]")); + } + private static CQLStatement prepare(String query) { TransactionStatement.Parsed parsed = (TransactionStatement.Parsed) QueryProcessor.parseStatement(query); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 5cbf0915b5a0..4ac8d242e86b 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -25,11 +25,15 @@ import java.util.List; import java.util.Random; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.stream.Collectors; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.distributed.shared.WithProperties; +import org.apache.cassandra.service.accord.*; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -47,10 +51,6 @@ import accord.local.SaveStatus; import accord.local.Status; import accord.local.Status.Durability; -import accord.messages.Accept; -import accord.messages.Apply; -import accord.messages.Commit; -import accord.messages.PreAccept; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; @@ -83,10 +83,6 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommandStore; -import org.apache.cassandra.service.accord.AccordKeyspace; -import org.apache.cassandra.service.accord.AccordTestUtils; -import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.FBUtilities; @@ -99,9 +95,7 @@ import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.Util.spinAssertEquals; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.MAJORITY; -import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.NOT_DURABLE; -import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.UNIVERSAL; +import static org.apache.cassandra.db.compaction.CompactionAccordIteratorsTest.DurableBeforeType.*; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.service.accord.AccordKeyspace.*; import static org.junit.Assert.*; @@ -113,7 +107,6 @@ public class CompactionAccordIteratorsTest { private static final Logger logger = LoggerFactory.getLogger(CompactionAccordIteratorsTest.class); - private static final long CLOCK_START = 44; private static final long HLC_START = 41; private static final int NODE = 1; @@ -124,7 +117,7 @@ public class CompactionAccordIteratorsTest private static final TxnId SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read); private static final TxnId GT_TXN_ID = SECOND_TXN_ID; // For CommandsForKey where we test with two commands - private static final TxnId[] TXN_IDS = new TxnId[] {TXN_ID, SECOND_TXN_ID}; + private static final TxnId[] TXN_IDS = new TxnId[]{ TXN_ID, SECOND_TXN_ID }; private static final TxnId GT_SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, SECOND_TXN_ID.hlc() + 1, NODE); static ColumnFamilyStore commands; @@ -146,7 +139,7 @@ public static void beforeClass() throws Throwable SchemaLoader.prepareServer(); // Schema doesn't matter since this is a metadata only test SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), - parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c))", "ks")); + parse("CREATE TABLE tbl (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'", "ks")); StorageService.instance.initServer(); commands = ColumnFamilyStore.getIfExists(SchemaConstants.ACCORD_KEYSPACE_NAME, AccordKeyspace.COMMANDS); @@ -266,9 +259,9 @@ private static Consumer> expectedAccordCommandsForKeyNoChange() assertEquals(1, partitions.size()); Partition partition = partitions.get(0); PartitionKey partitionKey = new PartitionKey(partition.metadata().id, partition.partitionKey()); - CommandsForKey cfk = CommandsForKeysAccessor.getCommandsForKey(partitionKey, ((Row)partition.unfilteredIterator().next())); + CommandsForKey cfk = CommandsForKeysAccessor.getCommandsForKey(partitionKey, ((Row) partition.unfilteredIterator().next())); assertEquals(TXN_IDS.length, cfk.size()); - for (int i = 0 ; i < TXN_IDS.length ; ++i) + for (int i = 0; i < TXN_IDS.length; ++i) assertEquals(TXN_IDS[i], cfk.txnId(i)); }; } @@ -350,7 +343,7 @@ Consumer> expectAccordCommandsTruncated() Partition partition = partitions.get(0); assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); - Row row = (Row)partition.unfilteredIterator().next(); + Row row = (Row) partition.unfilteredIterator().next(); assertEquals(CommandsColumns.TRUNCATE_FIELDS.length, row.columnCount()); for (ColumnMetadata cm : CommandsColumns.TRUNCATE_FIELDS) assertNotNull(row.getColumnData(cm)); @@ -369,7 +362,7 @@ Consumer> expectAccordCommandsNoChange() Partition partition = partitions.get(0); assertEquals(1, Iterators.size(partition.unfilteredIterator())); ByteBuffer[] partitionKeyComponents = CommandRows.splitPartitionKey(partition.partitionKey()); - Row row = (Row)partition.unfilteredIterator().next(); + Row row = (Row) partition.unfilteredIterator().next(); // execute_atleast is null, so when we read from the scanner the column won't be present in the partition Assertions.assertThat(new ArrayList<>(row.columns())).isEqualTo(commands.metadata().regularColumns().stream().filter(c -> !c.name.toString().equals("execute_atleast")).collect(Collectors.toList())); @@ -447,11 +440,20 @@ private static void flush(AccordCommandStore commandStore) } private void testWithCommandStore(TestWithCommandStore test, boolean additionalCommand) throws Throwable + { + try (WithProperties wp = new WithProperties().set(CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED, "true")) + { + testWithCommandStoreInternal(test, additionalCommand); + } + } + + private void testWithCommandStoreInternal(TestWithCommandStore test, boolean additionalCommand) throws Throwable { Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores().forEach(ColumnFamilyStore::truncateBlocking); + ((AccordService) AccordService.instance()).journal().truncateForTesting(); clock.set(CLOCK_START); AccordCommandStore commandStore = AccordTestUtils.createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - TxnId[] txnIds = additionalCommand ? TXN_IDS : new TxnId[] {TXN_ID}; + TxnId[] txnIds = additionalCommand ? TXN_IDS : new TxnId[]{ TXN_ID }; Txn writeTxn = AccordTestUtils.createWriteTxn(42); Txn readTxn = AccordTestUtils.createTxn(42); Seekable key = writeTxn.keys().get(0); @@ -462,32 +464,20 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - PreAccept preAccept = - PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); - commandStore.appendToJournal(preAccept); - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - Accept accept = - Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, txnId, partialTxn.keys(), partialDeps); - commandStore.appendToJournal(accept); - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, txnId, partialTxn.keys(), partialTxn, partialDeps, route, null); - commandStore.appendToJournal(commit); - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, txnId, partialDeps); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); - Apply apply = - Apply.SerializationSupport.create(txnId, partialRoute, txnId.epoch(), Apply.Kind.Minimal, partialTxn.keys(), txnId, partialDeps, partialTxn, null, result.left, result.right); - commandStore.appendToJournal(apply); - CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right); + CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right, appendDiffToKeyspace(commandStore)); }).beginAsResult()); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { safe.get(txnId, txnId, route).addListener(new Command.ProxyListener(txnId)); // add a junk listener just to test it in compaction @@ -495,12 +485,15 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC flush(commandStore); // The apply chain is asychronous, so it is easiest to just spin until it is applied // in order to have the updated state in the system table - spinAssertEquals(true, 5, () -> - getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys(), COMMANDS), safe -> safe.get(txnId, route.homeKey()).current().hasBeen(Status.Applied) - ).beginAsResult())); + spinAssertEquals(true, 5, () -> { + return getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys(), COMMANDS), safe -> { + Command command = safe.get(txnId, route.homeKey()).current(); + appendDiffToKeyspace(commandStore).accept(null, command); + return command.hasBeen(Status.Applied); + }).beginAsResult()); + }); flush(commandStore); } - UntypedResultSet commandsTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + AccordKeyspace.COMMANDS + ";"); logger.info(commandsTable.toStringUnsafe()); assertEquals(txnIds.length, commandsTable.size()); @@ -510,13 +503,21 @@ private void testWithCommandStore(TestWithCommandStore test, boolean additionalC UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); logger.info(commandsForKeyTable.toStringUnsafe()); assertEquals(1, commandsForKeyTable.size()); - CommandsForKey cfk = CommandsForKeySerializer.fromBytes((Key)key, commandsForKeyTable.iterator().next().getBytes("data")); + CommandsForKey cfk = CommandsForKeySerializer.fromBytes((Key) key, commandsForKeyTable.iterator().next().getBytes("data")); assertEquals(txnIds.length, cfk.size()); - for (int i = 0 ; i < txnIds.length ; ++i) + for (int i = 0; i < txnIds.length; ++i) assertEquals(txnIds[i], cfk.txnId(i)); test.test(commandStore); } + // This little bit of magic is required because we do not expose range commands explicitly, but still need to compact them + private static BiConsumer appendDiffToKeyspace(AccordCommandStore commandStore) + { + return (before, after) -> { + AccordKeyspace.getCommandMutation(commandStore.id(), before, after, commandStore.nextSystemTimestampMicros()).applyUnsafe(); + }; + } + private List compactCFS(IAccordService mockAccordService, ColumnFamilyStore cfs) { List scanners = cfs.getLiveSSTables().stream().map(SSTableReader::getScanner).collect(Collectors.toList()); diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index b9a309d75266..241e465ba89b 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -47,16 +47,8 @@ public void testSimpleReadWrite() throws IOException File directory = new File(Files.createTempDirectory("JournalTest")); directory.deleteRecursiveOnExit(); - AsyncCallbacks callbacks = new AsyncCallbacks<>() - { - @Override public void onWrite(long segment, int position, int size, TimeUUID key, Long value, Object writeContext) {} - @Override public void onWriteFailed(TimeUUID key, Long value, Object writeContext, Throwable cause) {} - @Override public void onFlush(long segment, int position) {} - @Override public void onFlushFailed(Throwable cause) {} - }; - Journal journal = - new Journal<>("TestJournal", directory, TestParams.INSTANCE, callbacks, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); journal.start(); @@ -65,10 +57,10 @@ public void testSimpleReadWrite() throws IOException TimeUUID id3 = nextTimeUUID(); TimeUUID id4 = nextTimeUUID(); - journal.write(id1, 1L, Collections.singleton(1)); - journal.write(id2, 2L, Collections.singleton(1)); - journal.write(id3, 3L, Collections.singleton(1)); - journal.write(id4, 4L, Collections.singleton(1)); + journal.blockingWrite(id1, 1L, Collections.singleton(1)); + journal.blockingWrite(id2, 2L, Collections.singleton(1)); + journal.blockingWrite(id3, 3L, Collections.singleton(1)); + journal.blockingWrite(id4, 4L, Collections.singleton(1)); assertEquals(1L, (long) journal.readFirst(id1)); assertEquals(2L, (long) journal.readFirst(id2)); @@ -77,7 +69,7 @@ public void testSimpleReadWrite() throws IOException journal.shutdown(); - journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, callbacks, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); journal.start(); assertEquals(1L, (long) journal.readFirst(id1)); diff --git a/test/unit/org/apache/cassandra/journal/TestParams.java b/test/unit/org/apache/cassandra/journal/TestParams.java index 7c22e896b5e4..9a9254ce9ba7 100644 --- a/test/unit/org/apache/cassandra/journal/TestParams.java +++ b/test/unit/org/apache/cassandra/journal/TestParams.java @@ -21,7 +21,7 @@ public class TestParams implements Params { - static final TestParams INSTANCE = new TestParams(); + public static final TestParams INSTANCE = new TestParams(); @Override public int segmentSize() diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 57c7401cb673..e56f634d48af 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -31,18 +31,16 @@ import accord.api.Key; import accord.api.Result; -import accord.local.CommandsForKey; -import accord.impl.TimestampsForKeys; import accord.impl.TimestampsForKey; +import accord.impl.TimestampsForKeys; import accord.local.Command; +import accord.local.CommandsForKey; import accord.local.CommonAttributes; import accord.local.SaveStatus; -import accord.messages.Apply; import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Range; -import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.RoutingKeys; @@ -104,7 +102,7 @@ public void commandLoadSave() throws Throwable { AtomicLong clock = new AtomicLong(0); PartialTxn depTxn = createPartialTxn(0); - Key key = (Key)depTxn.keys().get(0); + Key key = (Key) depTxn.keys().get(0); Range range = key.toUnseekable().asRange(); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); @@ -139,31 +137,19 @@ public void commandLoadSave() throws Throwable attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); - Command command = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, - waitingOn, result.left, CommandSerializers.APPLIED); + Command expected = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, + waitingOn, result.left, CommandSerializers.APPLIED); AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); - safeCommand.set(command); - - Apply apply = - Apply.SerializationSupport.create(txnId, - route.slice(Ranges.of(TokenRange.fullRange(tableId))), - 1L, - Apply.Kind.Maximal, - depTxn.keys(), - executeAt, - dependencies, - txn, - null, - result.left, - CommandSerializers.APPLIED); - commandStore.appendToJournal(apply); + safeCommand.set(expected); + + AccordTestUtils.appendCommandsBlocking(commandStore, null, expected); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); - logger.info("E: {}", command); - Command actual = AccordKeyspace.loadCommand(commandStore, txnId); + logger.info("E: {}", expected); + Command actual = commandStore.loadCommand(txnId); logger.info("A: {}", actual); - Assert.assertEquals(command, actual); + Assert.assertEquals(expected, actual); } @Test diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 71e821d32950..d78bd63cbce6 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -31,6 +31,8 @@ import accord.local.KeyHistory; import accord.local.Node; import accord.local.PreLoadContext; +import accord.local.SafeCommand; +import accord.local.SafeCommandStore; import accord.local.Status; import accord.messages.Accept; import accord.messages.Commit; @@ -41,6 +43,7 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -100,25 +103,33 @@ public void basicCycleTest() throws Throwable PartialRoute route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); - commandStore.appendToJournal(preAccept); // Check preaccept - getUninterruptibly(commandStore.execute(preAccept, instance -> { - PreAccept.PreAcceptReply reply = preAccept.apply(instance); + getUninterruptibly(commandStore.execute(preAccept, safeStore -> { + SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + Command before = safeCommand.current(); + PreAccept.PreAcceptReply reply = preAccept.apply(safeStore); + Command after = safeCommand.current(); + Assert.assertTrue(reply.isOk()); PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; Assert.assertEquals(txnId, ok.witnessedAt); Assert.assertTrue(ok.deps.isEmpty()); + + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); - getUninterruptibly(commandStore.execute(preAccept, instance -> { - Command command = instance.ifInitialised(txnId).current(); - Assert.assertEquals(txnId, command.executeAt()); - Assert.assertEquals(Status.PreAccepted, command.status()); - Assert.assertTrue(command.partialDeps() == null || command.partialDeps().isEmpty()); + getUninterruptibly(commandStore.execute(preAccept, safeStore -> { + Command before = safeStore.ifInitialised(txnId).current(); + SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + Assert.assertEquals(txnId, before.executeAt()); + Assert.assertEquals(Status.PreAccepted, before.status()); + Assert.assertTrue(before.partialDeps() == null || before.partialDeps().isEmpty()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); + Command after = safeCommand.current(); + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); // check accept @@ -131,37 +142,42 @@ public void basicCycleTest() throws Throwable deps = builder.build(); } Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); - commandStore.appendToJournal(accept); - getUninterruptibly(commandStore.execute(accept, instance -> { - Accept.AcceptReply reply = accept.apply(instance); + getUninterruptibly(commandStore.execute(accept, safeStore -> { + Command before = safeStore.ifInitialised(txnId).current(); + Accept.AcceptReply reply = accept.apply(safeStore); Assert.assertTrue(reply.isOk()); Assert.assertTrue(reply.deps.isEmpty()); + Command after = safeStore.ifInitialised(txnId).current(); + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); - getUninterruptibly(commandStore.execute(accept, instance -> { - Command command = instance.ifInitialised(txnId).current(); - Assert.assertEquals(executeAt, command.executeAt()); - Assert.assertEquals(Status.Accepted, command.status()); - Assert.assertEquals(deps, command.partialDeps()); + getUninterruptibly(commandStore.execute(accept, safeStore -> { + Command before = safeStore.ifInitialised(txnId).current(); + Assert.assertEquals(executeAt, before.executeAt()); + Assert.assertEquals(Status.Accepted, before.status()); + Assert.assertEquals(deps, before.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); + Command after = safeStore.ifInitialised(txnId).current(); + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); // check commit Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, fullRoute, null); - commandStore.appendToJournal(commit); getUninterruptibly(commandStore.execute(commit, commit::apply)); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.COMMANDS), instance -> { - Command command = instance.ifInitialised(txnId).current(); - Assert.assertEquals(commit.executeAt, command.executeAt()); - Assert.assertTrue(command.hasBeen(Status.Committed)); - Assert.assertEquals(commit.partialDeps, command.partialDeps()); + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.COMMANDS), safeStore -> { + Command before = safeStore.ifInitialised(txnId).current(); + Assert.assertEquals(commit.executeAt, before.executeAt()); + Assert.assertTrue(before.hasBeen(Status.Committed)); + Assert.assertEquals(commit.partialDeps, before.partialDeps()); - CommandsForKey cfk = ((AccordSafeCommandStore) instance).get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1)).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); + Command after = safeStore.ifInitialised(txnId).current(); + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); } @@ -179,19 +195,32 @@ public void computeDeps() throws Throwable PartialRoute route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.slice(route.covering(), true); PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); - commandStore.appendToJournal(preAccept1); - getUninterruptibly(commandStore.execute(preAccept1, preAccept1::apply)); + getUninterruptibly(commandStore.execute(preAccept1, safeStore -> { + persistDiff(commandStore, safeStore, txnId1, route, () -> { + preAccept1.apply(safeStore); + }); + })); // second preaccept should identify txnId1 as a dependency TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); - commandStore.appendToJournal(preAccept2); - getUninterruptibly(commandStore.execute(preAccept2, instance -> { - PreAccept.PreAcceptReply reply = preAccept2.apply(instance); - Assert.assertTrue(reply.isOk()); - PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; - Assert.assertTrue(ok.deps.contains(txnId1)); + getUninterruptibly(commandStore.execute(preAccept2, safeStore -> { + persistDiff(commandStore, safeStore, txnId2, route, () -> { + PreAccept.PreAcceptReply reply = preAccept2.apply(safeStore); + Assert.assertTrue(reply.isOk()); + PreAccept.PreAcceptOk ok = (PreAccept.PreAcceptOk) reply; + Assert.assertTrue(ok.deps.contains(txnId1)); + }); })); } + + private static void persistDiff(AccordCommandStore commandStore, SafeCommandStore safeStore, TxnId txnId, Route route, Runnable runnable) + { + SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + Command before = safeCommand.current(); + runnable.run(); + Command after = safeCommand.current(); + AccordTestUtils.appendCommandsBlocking(commandStore, before, after); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java new file mode 100644 index 000000000000..0f2b9d395be2 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import accord.utils.RandomSource; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.journal.TestParams; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.StorageCompatibilityMode; + +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; + +public class AccordJournalOrderTest +{ + @BeforeClass + public static void beforeClass() throws Throwable + { + CassandraRelevantProperties.JUNIT_STORAGE_COMPATIBILITY_MODE.setEnum(StorageCompatibilityMode.NONE); + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); + TableMetadata tbl = Schema.instance.getTableMetadata("ks", "tbl"); + Assert.assertEquals(TransactionalMode.full, tbl.params.transactionalMode); + StorageService.instance.initServer(); + } + + @Test + public void simpleKeyTest() + { + if (new File(DatabaseDescriptor.getAccordJournalDirectory()).exists()) + ServerTestUtils.cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); + AccordJournal accordJournal = new AccordJournal(SimpleAccordEndpointMapper.INSTANCE, TestParams.INSTANCE); + accordJournal.start(null); + RandomSource randomSource = RandomSource.wrap(new Random()); + TxnId id1 = AccordGens.txnIds().next(randomSource); + TxnId id2 = AccordGens.txnIds().next(randomSource); + + Map res = new HashMap<>(); + for (int i = 0; i < 10_000; i++) + { + TxnId txnId = randomSource.nextBoolean() ? id1 : id2; + JournalKey key = new JournalKey(txnId, AccordJournal.Type.SAVED_COMMAND, randomSource.nextInt(5)); + res.compute(key, (k, prev) -> prev == null ? 1 : prev + 1); + accordJournal.appendCommand(key.commandStoreId, + Collections.singletonList(new SavedCommand.SavedDiff(txnId, + AccordGens.timestamps().next(randomSource), + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null)), + null, + () -> {}); + } + + Runnable check = () -> { + for (JournalKey key : res.keySet()) + { + List diffs = accordJournal.loadDiffs(key.commandStoreId, key.timestamp); + Assert.assertEquals(diffs.size(), res.get(key).intValue()); + } + }; + + check.run(); + accordJournal.closeCurrentSegmentForTesting(); + check.run(); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java index b24424b05518..375fffa8fecb 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java @@ -33,7 +33,6 @@ import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.service.accord.AccordJournal.Key; import org.apache.cassandra.utils.AsymmetricOrdering; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.FBUtilities.Order; @@ -58,12 +57,12 @@ public void keySerde() qt().forAll(keyGen()).check(key -> { buffer.clear(); - int expectedSize = Key.SUPPORT.serializedSize(1); - Key.SUPPORT.serialize(key, buffer, 1); + int expectedSize = JournalKey.SUPPORT.serializedSize(1); + JournalKey.SUPPORT.serialize(key, buffer, 1); assertThat(buffer.getLength()).isEqualTo(expectedSize); try (DataInputBuffer input = new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)) { - Key read = Key.SUPPORT.deserialize(input, 1); + JournalKey read = JournalKey.SUPPORT.deserialize(input, 1); assertThat(read).isEqualTo(key); } }); @@ -74,29 +73,29 @@ public void compareKeys() { qt().forAll(Gens.lists(keyGen()).ofSizeBetween(2, 100)).check(keys -> { - keys.sort(Key.SUPPORT); + keys.sort(JournalKey.SUPPORT); List buffers = new ArrayList<>(keys.size()); - for (Key k : keys) buffers.add(toBuffer(k)); + for (JournalKey k : keys) buffers.add(toBuffer(k)); for (int i = 0; i < keys.size(); i++) { - Key outerKey = keys.get(i); + JournalKey outerKey = keys.get(i); for (int j = 0; j < keys.size(); j++) { - Key innerKey = keys.get(j); + JournalKey innerKey = keys.get(j); ByteBuffer innerBuffer = buffers.get(j); - Order expected = FBUtilities.compare(outerKey, innerKey, Key.SUPPORT); - Order actual = FBUtilities.compare(outerKey, innerBuffer, new AsymmetricOrdering() + Order expected = FBUtilities.compare(outerKey, innerKey, JournalKey.SUPPORT); + Order actual = FBUtilities.compare(outerKey, innerBuffer, new AsymmetricOrdering() { @Override - public int compareAsymmetric(Key left, ByteBuffer right) + public int compareAsymmetric(JournalKey left, ByteBuffer right) { - return Key.SUPPORT.compareWithKeyAt(left, right, 0, 1); + return JournalKey.SUPPORT.compareWithKeyAt(left, right, 0, 1); } @Override - public int compare(@Nullable Key left, @Nullable Key right) + public int compare(@Nullable JournalKey left, @Nullable JournalKey right) { throw new UnsupportedOperationException(); } @@ -107,11 +106,11 @@ public int compare(@Nullable Key left, @Nullable Key right) }); } - private static ByteBuffer toBuffer(Key k) + private static ByteBuffer toBuffer(JournalKey k) { - try (DataOutputBuffer buffer = new DataOutputBuffer(Key.SUPPORT.serializedSize(1))) + try (DataOutputBuffer buffer = new DataOutputBuffer(JournalKey.SUPPORT.serializedSize(1))) { - Key.SUPPORT.serialize(k, buffer, 1); + JournalKey.SUPPORT.serialize(k, buffer, 1); return buffer.unsafeGetBufferAndFlip(); } catch (IOException e) @@ -120,10 +119,10 @@ private static ByteBuffer toBuffer(Key k) } } - private Gen keyGen() + private Gen keyGen() { Gen txnIdGen = AccordGens.txnIds(); Gen typeGen = Gens.enums().all(AccordJournal.Type.class); - return rs -> new Key(txnIdGen.next(rs), typeGen.next(rs)); + return rs -> new JournalKey(txnIdGen.next(rs), typeGen.next(rs)); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 9c613624fecf..d58105339b5d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -37,13 +37,11 @@ import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; -import accord.messages.Commit; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.KeyDeps; import accord.primitives.Keys; -import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.RangeDeps; import accord.primitives.Ranges; @@ -97,7 +95,7 @@ public void serde() { AtomicLong now = new AtomicLong(); - String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))"); + String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'"); TableId tableId = Schema.instance.getTableMetadata(KEYSPACE, tableName).id; Ranges scope = Ranges.of(new TokenRange(AccordRoutingKey.SentinelKey.min(tableId), AccordRoutingKey.SentinelKey.max(tableId))); @@ -111,8 +109,7 @@ public void serde() RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE); - PartialDeps partialDeps = deps.slice(scope); - + deps.slice(scope); CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); @@ -125,13 +122,12 @@ public void serde() AccordSafeCommand safeCommand = new AccordSafeCommand(AccordTestUtils.loaded(id, null)); safeCommand.set(committed); - Commit commit = Commit.SerializerSupport.create(id, route.slice(scope), 1, Commit.Kind.CommitSlowPath, Ballot.ZERO, id, partialTxn.keys(), partialTxn, partialDeps, route, null); - store.appendToJournal(commit); + AccordTestUtils.appendCommandsBlocking(store, null, committed); Mutation mutation = AccordKeyspace.getCommandMutation(store, safeCommand, 42); mutation.apply(); - Command loaded = AccordKeyspace.loadCommand(store, id); + Command loaded = store.loadCommand(id); Assertions.assertThat(loaded).isEqualTo(committed); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 0d3b3d005c54..0f28c6c7c246 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -34,6 +34,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.File; import org.junit.Assert; import accord.api.Data; @@ -95,6 +98,7 @@ import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.primitives.Routable.Domain.Key; @@ -400,6 +404,9 @@ public static AccordCommandStore createAccordCommandStore( public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } }; + + if (new File(DatabaseDescriptor.getAccordJournalDirectory()).exists()) + ServerTestUtils.cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); AccordJournal journal = new AccordJournal(null, new AccordSpec.JournalSpec()); journal.start(null); @@ -498,4 +505,19 @@ public static Range range(long left, long right) return range(token(left), token(right)); } + public static void appendCommandsBlocking(AccordCommandStore commandStore, Command after) + { + appendCommandsBlocking(commandStore, null, after); + } + + public static void appendCommandsBlocking(AccordCommandStore commandStore, Command before, Command after) + { + SavedCommand.SavedDiff diff = SavedCommand.diff(before, after); + if (diff != null) + { + Condition condition = Condition.newOneTimeCondition(); + commandStore.appendCommands(Collections.singletonList(diff), null, condition::signal); + condition.awaitUninterruptibly(30, TimeUnit.SECONDS); + } + } } diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index dc22f540eddf..d09ff6eefff0 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -18,199 +18,52 @@ package org.apache.cassandra.service.accord; -import java.util.EnumSet; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; -import java.util.Set; -import com.google.common.collect.Sets; - -import accord.local.SerializerSupport; -import accord.messages.Accept; -import accord.messages.Apply; -import accord.messages.ApplyThenWaitUntilApplied; -import accord.messages.BeginRecovery; -import accord.messages.Commit; -import accord.messages.Message; -import accord.messages.MessageType; -import accord.messages.PreAccept; -import accord.messages.Propagate; -import accord.primitives.Ballot; +import accord.local.Command; import accord.primitives.TxnId; -import org.agrona.collections.ObjectHashSet; -import org.apache.cassandra.service.accord.AccordJournal.Key; import org.apache.cassandra.service.accord.AccordJournal.Type; -import static accord.messages.MessageType.ACCEPT_REQ; -import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; -import static accord.messages.MessageType.APPLY_MINIMAL_REQ; -import static accord.messages.MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ; -import static accord.messages.MessageType.BEGIN_RECOVER_REQ; -import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; -import static accord.messages.MessageType.COMMIT_SLOW_PATH_REQ; -import static accord.messages.MessageType.PRE_ACCEPT_REQ; -import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; -import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; -import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; -import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; -import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; -import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; -import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; - public class MockJournal implements IJournal { - private final Map writes = new HashMap<>(); + private final Map> commands = new HashMap<>(); + @Override - public SerializerSupport.MessageProvider makeMessageProvider(TxnId txnId) + public Command loadCommand(int commandStoreId, TxnId txnId) { - return new SerializerSupport.MessageProvider() - { - @Override - public TxnId txnId() - { - return txnId; - } - - @Override - public Set test(Set messages) - { - Set keys = new ObjectHashSet<>(messages.size() + 1, 0.9f); - for (MessageType message : messages) - for (Type synonymousType : Type.synonymousTypesFromMessageType(message)) - keys.add(new Key(txnId, synonymousType)); - Set presentKeys = Sets.intersection(writes.keySet(), keys); - Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); - for (Key key : presentKeys) - presentMessages.add(key.type.outgoingType); - return presentMessages; - } - - @Override - public Set all() - { - Set types = EnumSet.allOf(Type.class); - Set keys = new ObjectHashSet<>(types.size() + 1, 0.9f); - for (Type type : types) - keys.add(new Key(txnId, type)); - Set presentKeys = Sets.intersection(writes.keySet(), keys); - Set presentMessages = new ObjectHashSet<>(presentKeys.size() + 1, 0.9f); - for (Key key : presentKeys) - presentMessages.add(key.type.outgoingType); - return presentMessages; - } - - private T get(Key key) - { - return (T) writes.get(key); - } - - private T get(MessageType messageType) - { - for (Type type : Type.synonymousTypesFromMessageType(messageType)) - { - T value = get(new Key(txnId, type)); - if (value != null) return value; - } - return null; - } - - @Override - public PreAccept preAccept() - { - return get(PRE_ACCEPT_REQ); - } - - @Override - public BeginRecovery beginRecover() - { - return get(BEGIN_RECOVER_REQ); - } - - @Override - public Propagate propagatePreAccept() - { - return get(PROPAGATE_PRE_ACCEPT_MSG); - } - - @Override - public Accept accept(Ballot ballot) - { - return get(ACCEPT_REQ); - } - - @Override - public Commit commitSlowPath() - { - return get(COMMIT_SLOW_PATH_REQ); - } - - @Override - public Commit commitMaximal() - { - return get(COMMIT_MAXIMAL_REQ); - } - - @Override - public Commit stableFastPath() - { - return get(STABLE_FAST_PATH_REQ); - } - - @Override - public Commit stableSlowPath() - { - return get(STABLE_SLOW_PATH_REQ); - } - - @Override - public Commit stableMaximal() - { - return get(STABLE_MAXIMAL_REQ); - } - - @Override - public Propagate propagateStable() - { - return get(PROPAGATE_STABLE_MSG); - } - - @Override - public Apply applyMinimal() - { - return get(APPLY_MINIMAL_REQ); - } - - @Override - public Apply applyMaximal() - { - return get(APPLY_MAXIMAL_REQ); - } - - @Override - public Propagate propagateApply() - { - return get(PROPAGATE_APPLY_MSG); - } - - @Override - public Propagate propagateOther() - { - return get(PROPAGATE_OTHER_MSG); - } - - @Override - public ApplyThenWaitUntilApplied applyThenWaitUntilApplied() - { - return get(APPLY_THEN_WAIT_UNTIL_APPLIED_REQ); - } - }; + Type type = Type.SAVED_COMMAND; + JournalKey key = new JournalKey(txnId, type, commandStoreId); + List saved = commands.get(key); + if (saved == null) + return null; + return SavedCommand.reconstructFromDiff(new ArrayList<>(saved)); } @Override - public void appendMessageBlocking(Message message) + public void appendCommand(int commandStoreId, List diffs, List sanityCheck, Runnable onFlush) { - Type type = Type.fromMessageType(message.type()); - Key key = new Key(type.txnId(message), type); - writes.put(key, message); + Type type = Type.SAVED_COMMAND; + for (SavedCommand.SavedDiff diff : diffs) + { + JournalKey key = new JournalKey(diff.txnId, type, commandStoreId); + commands.computeIfAbsent(key, (ignore_) -> new ArrayList<>()) + .add(new SavedCommand.LoadedDiff(diff.txnId, + diff.executeAt, + diff.saveStatus, + diff.durability, + diff.acceptedOrCommitted, + diff.promised, + diff.route, + diff.partialTxn, + diff.partialDeps, + diff.additionalKeysOrRanges, + (i1, i2) -> diff.waitingOn, + diff.writes, + diff.listeners)); + } + onFlush.run(); } } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 1fd5bb3a3a92..83db8accef0b 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -38,7 +38,6 @@ import accord.local.PreLoadContext; import accord.local.SafeCommandStore; import accord.messages.BeginRecovery; -import accord.messages.Message; import accord.messages.PreAccept; import accord.messages.TxnRequest; import accord.primitives.Ballot; @@ -332,8 +331,6 @@ public AsyncResult processAsync(TxnRequest request) public AsyncResult processAsync(PreLoadContext loadCtx, Function function) { - if (loadCtx instanceof Message) - journal.appendMessageBlocking((Message) loadCtx); return store.submit(loadCtx, function).beginAsResult(); } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java index a10b7e0646bc..66485cf8d622 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; +import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -46,6 +47,7 @@ import static accord.utils.Property.qt; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedDepsTest extends SimulatedAccordCommandStoreTestBase { @Test diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java index 6ca336c98270..c345a2686626 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java @@ -29,6 +29,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -52,6 +53,7 @@ import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedMultiKeyAndRangeTest extends SimulatedAccordCommandStoreTestBase { @Test diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java index eec34195c545..b3df25bdfb80 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; +import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -41,6 +42,7 @@ import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; +@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedRandomKeysWithRangeConflictTest extends SimulatedAccordCommandStoreTestBase { @Test diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index e7aa2139c3f0..7dfae1e310fa 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -24,6 +24,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; import java.util.function.Consumer; import accord.utils.DefaultRandom; @@ -47,9 +48,6 @@ import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.SaveStatus; -import accord.messages.Accept; -import accord.messages.Commit; -import accord.messages.PreAccept; import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.Keys; @@ -57,7 +55,6 @@ import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Ranges; -import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -84,6 +81,7 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.concurrent.Condition; import org.assertj.core.api.Assertions; import org.awaitility.Awaitility; import org.mockito.Mockito; @@ -178,20 +176,7 @@ private static Command createStableAndPersist(AccordCommandStore commandStore, T safeCommand.set(command); AccordKeyspace.getCommandMutation(commandStore, safeCommand, commandStore.nextSystemTimestampMicros()).apply(); - Commit commit = - Commit.SerializerSupport.create(txnId, - command.route().slice(AccordTestUtils.fullRange(command.partialTxn().keys())), - txnId.epoch(), - Commit.Kind.StableWithTxnAndDeps, - Ballot.ZERO, - executeAt, - command.partialTxn().keys(), - command.partialTxn(), - command.partialDeps(), - Route.castToFullRoute(command.route()), - null); - commandStore.appendToJournal(commit); - + appendDiffToLog(commandStore).accept(null, command); return command; } @@ -211,23 +196,14 @@ private static Command createStableUsingFastLifeCycle(AccordCommandStore command RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); Ranges ranges = AccordTestUtils.fullRange(partialTxn.keys()); - PartialRoute partialRoute = route.slice(ranges); + route.slice(ranges); PartialDeps deps = PartialDeps.builder(ranges).build(); - // create and write messages to the journal for loading to succeed - PreAccept preAccept = - PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); - Commit stable = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableFastPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); - - commandStore.appendToJournal(preAccept); - commandStore.appendToJournal(stable); - try { Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); }).beginAsResult()); @@ -252,6 +228,15 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command return createStableUsingSlowLifeCycle(commandStore, txnId, txnId); } + private static BiConsumer appendDiffToLog(AccordCommandStore commandStore) + { + return (before, after) -> { + Condition condition = Condition.newOneTimeCondition(); + commandStore.appendToLog(before, after, condition::signal); + condition.awaitUninterruptibly(); + }; + } + private static Command createStableUsingSlowLifeCycle(AccordCommandStore commandStore, TxnId txnId, Timestamp executeAt) { PartialTxn partialTxn = createPartialTxn(0); @@ -261,28 +246,13 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command PartialRoute partialRoute = route.slice(ranges); PartialDeps deps = PartialDeps.builder(ranges).build(); - // create and write messages to the journal for loading to succeed - PreAccept preAccept = - PreAccept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, txnId.epoch(), partialTxn, route); - Accept accept = - Accept.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), txnId.epoch(), false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); - Commit commit = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.CommitSlowPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); - Commit stable = - Commit.SerializerSupport.create(txnId, partialRoute, txnId.epoch(), Commit.Kind.StableSlowPath, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, route, null); - - commandStore.appendToJournal(preAccept); - commandStore.appendToJournal(accept); - commandStore.appendToJournal(commit); - commandStore.appendToJournal(stable); - try { Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null); - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps); - CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToLog(commandStore)); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); }).beginAsResult()); @@ -384,7 +354,10 @@ public void loadFail() commandStore.executeBlocking(() -> commandStore.setCapacity(0)); Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); - qt().withPure(false).withExamples(50).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { + qt().withPure(false) + .withSeed(-3537445084098883509L).withExamples(50) + .forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)) + .check((rs, ids) -> { before(); // truncate tables createCommand(commandStore, rs, ids); @@ -400,7 +373,8 @@ public void loadFail() commandStore.commandCache().unsafeSetLoadFunction(txnId -> { logger.info("Attempting to load {}; expected to fail? {}", txnId, failed.get(txnId)); - if (!failed.get(txnId)) return AccordKeyspace.loadCommand(commandStore, txnId); + if (!failed.get(txnId)) + return commandStore.loadCommand(txnId); throw new NullPointerException("txn_id " + txnId); }); AsyncOperation o1 = new AsyncOperation.ForConsumer(commandStore, ctx, consumer); @@ -418,8 +392,15 @@ public void loadFail() awaitDone(commandStore, ids, keys); // can we recover? - commandStore.commandCache().unsafeSetLoadFunction(txnId -> AccordKeyspace.loadCommand(commandStore, txnId)); - AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> ids.forEach(id -> store.ifInitialised(id).readyToExecute(store))); + commandStore.commandCache().unsafeSetLoadFunction(txnId -> { + Command cmd = commandStore.loadCommand(txnId); + return cmd; + }); + AsyncOperation.ForConsumer o2 = new AsyncOperation.ForConsumer(commandStore, ctx, store -> { + ids.forEach(id -> { + store.ifInitialised(id).readyToExecute(store); + }); + }); getUninterruptibly(o2); }); } From 6905157b0e210f238d908ab746b6041faebf2bf6 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 18 Jul 2024 17:00:24 +0100 Subject: [PATCH 125/340] CommandsForKey Improvements incl Pruning CommandsForKey periodically self-prunes, so as to continue functioning well in-between garbage collections. Once we prune we are left with potentially incomplete information, and have to sometimes load per-command information from disk. But the payoff is ensuring CommandsForKey objects - which drive the majority of the state machine - are kept to a reasonable size. patch by Benedict; reviewed by Blake Eggleston and David Capwell --- modules/accord | 2 +- .../config/CassandraRelevantProperties.java | 1 + .../dht/IPartitionerDependentSerializer.java | 3 +- .../org/apache/cassandra/io/util/Memory.java | 2 +- .../service/accord/AccordCallback.java | 4 +- .../service/accord/AccordCommandStore.java | 26 +- .../service/accord/AccordKeyspace.java | 9 +- .../service/accord/AccordObjectSizes.java | 40 +- .../service/accord/AccordSafeCommand.java | 41 +- .../accord/AccordSafeCommandStore.java | 23 +- .../accord/AccordSafeCommandsForKey.java | 14 +- .../service/accord/AccordSafeState.java | 2 +- .../accord/AccordSafeTimestampsForKey.java | 7 - .../service/accord/AccordStateCache.java | 13 +- .../accord/ImmutableAccordSafeState.java | 6 - .../service/accord/SavedCommand.java | 6 +- .../service/accord/api/AccordAgent.java | 13 + .../service/accord/api/AccordRoutingKey.java | 4 +- .../service/accord/async/AsyncLoader.java | 23 +- .../service/accord/async/AsyncOperation.java | 38 +- .../serializers/CommandStoreSerializers.java | 9 +- .../serializers/CommandsForKeySerializer.java | 395 ++++++++++++------ .../accord/serializers/DepsSerializer.java | 137 +++--- .../serializers/WaitingOnSerializer.java | 133 +----- .../cassandra/service/accord/txn/TxnRead.java | 18 - .../service/accord/txn/TxnUpdate.java | 16 - .../apache/cassandra/utils/MerkleTree.java | 2 +- .../cassandra/utils/concurrent/Ref.java | 44 +- .../cassandra/utils/memory/BufferPool.java | 4 +- .../accord/AccordIncrementalRepairTest.java | 2 +- .../test/accord/AccordLoadTest.java | 10 + .../test/log/FetchLogFromPeers2Test.java | 1 - .../CompactionAccordIteratorsTest.java | 7 +- .../org/apache/cassandra/dht/TokenTest.java | 51 +++ .../cassandra/io/IVersionedSerializers.java | 4 +- .../accord/AccordCommandStoreTest.java | 12 +- .../service/accord/AccordCommandTest.java | 2 +- .../service/accord/AccordKeyspaceTest.java | 3 +- .../service/accord/AccordStateCacheTest.java | 5 - .../service/accord/async/AsyncLoaderTest.java | 14 +- .../accord/async/AsyncOperationTest.java | 37 +- .../CommandStoreSerializersTest.java | 72 ++++ .../CommandsForKeySerializerTest.java | 88 ++-- .../serializers/DepsSerializerTest.java | 64 +++ .../serializers/WaitingOnSerializerTest.java | 20 +- .../cassandra/utils/AccordGenerators.java | 65 ++- 46 files changed, 960 insertions(+), 532 deletions(-) create mode 100644 test/unit/org/apache/cassandra/dht/TokenTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/CommandStoreSerializersTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java diff --git a/modules/accord b/modules/accord index 4c870dc9b561..5f360e0b5b19 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 4c870dc9b561a841ea9b923ff739953adcc00325 +Subproject commit 5f360e0b5b197156df0ef3d9985cd94d18ea1c92 diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index af9462bac7aa..b8b4f3e33a9e 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -588,6 +588,7 @@ public enum CassandraRelevantProperties TEST_COMPRESSION("cassandra.test.compression"), TEST_COMPRESSION_ALGO("cassandra.test.compression.algo", "lz4"), TEST_DEBUG_REF_COUNT("cassandra.debugrefcount"), + TEST_DEBUG_REF_EVENTS("cassandra.debug.refevents"), TEST_DRIVER_CONNECTION_TIMEOUT_MS("cassandra.test.driver.connection_timeout_ms", "5000"), TEST_DRIVER_READ_TIMEOUT_MS("cassandra.test.driver.read_timeout_ms", "12000"), TEST_ENCRYPTION("cassandra.test.encryption", "false"), diff --git a/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java b/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java index a70eb8377124..8c612fb23b24 100644 --- a/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java +++ b/src/java/org/apache/cassandra/dht/IPartitionerDependentSerializer.java @@ -19,6 +19,7 @@ import java.io.IOException; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -43,6 +44,6 @@ public interface IPartitionerDependentSerializer extends IVersionedSerializer default T deserialize(DataInputPlus in, int version) throws IOException { - return deserialize(in, null, version); + return deserialize(in, DatabaseDescriptor.getPartitioner(), version); } } diff --git a/src/java/org/apache/cassandra/io/util/Memory.java b/src/java/org/apache/cassandra/io/util/Memory.java index 1d1fca2edf96..6913caf4cf2e 100644 --- a/src/java/org/apache/cassandra/io/util/Memory.java +++ b/src/java/org/apache/cassandra/io/util/Memory.java @@ -81,7 +81,7 @@ public static Memory allocate(long bytes) if (bytes < 0) throw new IllegalArgumentException(); - if (Ref.DEBUG_ENABLED) + if (Ref.TRACE_ENABLED) return new SafeMemory(bytes); return new Memory(bytes); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCallback.java b/src/java/org/apache/cassandra/service/accord/AccordCallback.java index 955e034d11a9..6e658948a433 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCallback.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCallback.java @@ -46,7 +46,7 @@ public AccordCallback(AgentExecutor executor, Callback callback, AccordEndpoi @Override public void onResponse(Message msg) { - logger.debug("Received response {} from {}", msg.payload, msg.from()); + logger.trace("Received response {} from {}", msg.payload, msg.from()); success(endpointMapper.mappedId(msg.from()), msg.payload); } @@ -60,7 +60,7 @@ private static Throwable convertFailureMessage(RequestFailure failure) @Override public void onFailure(InetAddressAndPort from, RequestFailure failure) { - logger.debug("Received failure {} from {} for {}", failure, from, this); + logger.trace("Received failure {} from {} for {}", failure, from, this); // TODO (now): we should distinguish timeout failures with some placeholder Exception failure(endpointMapper.mappedId(from), convertFailureMessage(failure)); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 47417fef6e51..929a1695d781 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -40,7 +40,7 @@ import accord.api.Agent; import accord.api.DataStore; import accord.api.ProgressLog; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -209,7 +209,7 @@ public AccordCommandStore(int id, commandCache = stateCache.instance(TxnId.class, AccordSafeCommand.class, - AccordSafeCommand::new, + AccordSafeCommand.safeRefFactory(), this::loadCommand, this::appendToKeyspace, this::validateCommand, @@ -356,12 +356,18 @@ public void appendToLog(Command before, Command after, Runnable runnable) boolean validateCommand(TxnId txnId, Command evicting) { + if (!Invariants.isParanoid()) + return true; + Command reloaded = loadCommand(txnId); - return (evicting == null && reloaded == null) || (evicting != null && reloaded != null && reloaded.isEqualOrFuller(evicting)); + return Objects.equals(evicting, reloaded); } boolean validateTimestampsForKey(RoutableKey key, TimestampsForKey evicting) { + if (!Invariants.isParanoid()) + return true; + TimestampsForKey reloaded = AccordKeyspace.unsafeLoadTimestampsForKey(this, (PartitionKey) key); return Objects.equals(evicting, reloaded); } @@ -378,6 +384,9 @@ CommandsForKey loadCommandsForKey(RoutableKey key) boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) { + if (!Invariants.isParanoid()) + return true; + CommandsForKey reloaded = AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); return Objects.equals(evicting, reloaded); } @@ -494,7 +503,6 @@ public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, commandsForRanges.preExecute(); current = AccordSafeCommandStore.create(preLoadContext, commands, timestampsForKeys, commandsForKeys, commandsForRanges, this); - return current; } @@ -506,8 +514,14 @@ public boolean hasSafeStore() public void completeOperation(AccordSafeCommandStore store) { Invariants.checkState(current == store); - current.complete(); - current = null; + try + { + current.postExecute(); + } + finally + { + current = null; + } } public void abortCurrentOperation() diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index cc51bd833906..eab58afefb1a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -48,7 +48,7 @@ import org.slf4j.LoggerFactory; import accord.api.Key; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -551,9 +551,8 @@ private static TableMetadata commandsForKeysTable(String tableName) + format("key %s, ", KEY_TUPLE) + "data blob, " + "PRIMARY KEY((store_id, key_token, key))" - + ')') - // TODO (expected): make this uncompressed, as not very compressable (except perhaps the primary key, but could switch to operating on tokens directly) -// + " WITH compression = {'enabled':'false'};") + + ')' + + " WITH compression = {'class':'NoopCompressor'};") .partitioner(FOR_KEYS_LOCAL_PARTITIONER) .build(); } @@ -625,7 +624,7 @@ public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.E if (current == null) return null; - CommandsForKey updated = current.withRedundantBefore(redundantBefore); + CommandsForKey updated = current.withRedundantBeforeAtLeast(redundantBefore); if (current == updated) return row; diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 160813d722a3..d3cd7a34fd7d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -24,12 +24,12 @@ import accord.api.Key; import accord.api.Result; import accord.api.RoutingKey; -import accord.local.CommandsForKey; -import accord.local.CommandsForKey.TxnInfo; +import accord.local.cfk.CommandsForKey; +import accord.local.cfk.CommandsForKey.TxnInfo; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.Command.WaitingOn; -import accord.local.CommandsForKey.TxnInfoWithMissing; +import accord.local.cfk.CommandsForKey.TxnInfoExtra; import accord.local.CommonAttributes; import accord.local.Node; import accord.local.SaveStatus; @@ -69,6 +69,7 @@ import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.ObjectSizes; +import static accord.local.cfk.CommandsForKey.NO_TXNIDS; import static org.apache.cassandra.utils.ObjectSizes.measure; public class AccordObjectSizes @@ -205,7 +206,9 @@ public static long txn(PartialTxn txn) return size; } - private static final long TIMESTAMP_SIZE = ObjectSizes.measureDeep(Timestamp.fromBits(0, 0, new Node.Id(0))); + // don't count Id size, as should normally be shared + private static final long TIMESTAMP_SIZE = ObjectSizes.measure(Timestamp.fromBits(0, 0, new Node.Id(0))); + private static final long BALLOT_SIZE = ObjectSizes.measure(Ballot.ZERO); public static long timestamp() { @@ -217,6 +220,16 @@ public static long timestamp(Timestamp timestamp) return TIMESTAMP_SIZE; } + public static long ballot() + { + return BALLOT_SIZE; + } + + public static long ballot(Ballot ballot) + { + return ballot == Ballot.ZERO ? 0 : BALLOT_SIZE; + } + private static final long EMPTY_DEPS_SIZE = ObjectSizes.measureDeep(Deps.NONE); public static long dependencies(Deps dependencies) { @@ -347,7 +360,7 @@ public static long command(Command command) return size; Command.Committed committed = command.asCommitted(); - size += WaitingOnSerializer.serializedSize(command.txnId(), committed.waitingOn); + size += WaitingOnSerializer.serializedSize(committed.waitingOn); return size; } @@ -363,8 +376,8 @@ public static long timestampsForKey(TimestampsForKey timestamps) } private static long EMPTY_CFK_SIZE = measure(new CommandsForKey(null)); - private static long EMPTY_INFO_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, null)); - private static long EMPTY_INFO_WITH_MISSING_ADDITIONAL_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, null)) - EMPTY_INFO_SIZE; + private static long EMPTY_INFO_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, NO_TXNIDS, Ballot.ZERO)); + private static long EMPTY_INFO_EXTRA_ADDITIONAL_SIZE = EMPTY_INFO_SIZE - measure(TxnInfo.createMock(TxnId.NONE, null, null, null, null)); public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; @@ -374,13 +387,14 @@ public static long commandsForKey(CommandsForKey cfk) for (int i = 0 ; i < cfk.size() ; ++i) { TxnInfo info = cfk.get(i); - if (info.getClass() != TxnInfoWithMissing.class) continue; - TxnInfoWithMissing infoWithMissing = (TxnInfoWithMissing) info; - if (infoWithMissing.missing.length > 0) + if (info.getClass() != TxnInfoExtra.class) continue; + TxnInfoExtra infoExtra = (TxnInfoExtra) info; + if (infoExtra.missing.length > 0) { - size += EMPTY_INFO_WITH_MISSING_ADDITIONAL_SIZE; - size += ObjectSizes.sizeOfReferenceArray(infoWithMissing.missing.length); - size += infoWithMissing.missing.length * TIMESTAMP_SIZE; + size += EMPTY_INFO_EXTRA_ADDITIONAL_SIZE; + size += ObjectSizes.sizeOfReferenceArray(infoExtra.missing.length); + size += infoExtra.missing.length * TIMESTAMP_SIZE; + size += ballot(infoExtra.ballot); } } return size; diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 5c458aa45eb7..0cafdfec0868 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.util.Objects; +import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; @@ -27,9 +28,37 @@ import accord.local.Listeners; import accord.local.SafeCommand; import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.utils.concurrent.Ref; + +import static accord.utils.Invariants.Paranoia.LINEAR; +import static accord.utils.Invariants.ParanoiaCostFactor.HIGH; public class AccordSafeCommand extends SafeCommand implements AccordSafeState { + public static class DebugAccordSafeCommand extends AccordSafeCommand + { + final Ref selfRef; + public DebugAccordSafeCommand(AccordCachingState global) + { + super(global); + selfRef = new Ref<>(this, null); + selfRef.debug(global.key().toString()); + } + + @Override + public void invalidate() + { + super.invalidate(); + selfRef.release(); + } + + public static void trace(AccordSafeCommand safeCommand, String message) + { + ((DebugAccordSafeCommand)safeCommand).selfRef.debug(message); + } + } + private boolean invalidated; private final AccordCachingState global; private Command original; @@ -111,13 +140,6 @@ public void preExecute() current = original; } - @Override - public void postExecute() - { - checkNotInvalidated(); - global.set(current); - } - @Override public void invalidate() { @@ -150,4 +172,9 @@ public Listeners transientListeners() checkNotInvalidated(); return global.listeners(); } + + public static Function, AccordSafeCommand> safeRefFactory() + { + return Invariants.testParanoia(LINEAR, LINEAR, HIGH) ? DebugAccordSafeCommand::new : AccordSafeCommand::new; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index c6c8499880a0..a5215eadea08 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -28,7 +28,7 @@ import accord.api.Key; import accord.api.ProgressLog; import accord.impl.AbstractSafeCommandStore; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.impl.CommandsSummary; import accord.local.CommandStores.RangesForEpoch; import accord.local.NodeTimeService; @@ -299,27 +299,6 @@ public T mapReduceFull(Seekables keysOrRanges, Ranges slice, TxnId }, accumulate); } - @Override - protected void invalidateSafeState() - { - commands.values().forEach(AccordSafeCommand::invalidate); - timestampsForKeys.values().forEach(AccordSafeTimestampsForKey::invalidate); - commandsForKeys.values().forEach(AccordSafeCommandsForKey::invalidate); - } - - public void postExecute(Map commands, - Map timestampsForKey, - Map commandsForKeys, - @Nullable AccordSafeCommandsForRanges commandsForRanges) - { - postExecute(); - commands.values().forEach(AccordSafeState::postExecute); - timestampsForKey.values().forEach(AccordSafeState::postExecute); - commandsForKeys.values().forEach(AccordSafeState::postExecute); - if (commandsForRanges != null) - commandsForRanges.postExecute(); - } - @Override public String toString() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 808b4d4bc15a..6f5e8f72d5e5 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -23,8 +23,8 @@ import com.google.common.annotations.VisibleForTesting; import accord.api.Key; -import accord.local.CommandsForKey; -import accord.local.SafeCommandsForKey; +import accord.local.cfk.CommandsForKey; +import accord.local.cfk.SafeCommandsForKey; public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { @@ -35,7 +35,7 @@ public class AccordSafeCommandsForKey extends SafeCommandsForKey implements Acco public AccordSafeCommandsForKey(AccordCachingState global) { - super((Key) global.key()); + super(global.key()); this.global = global; this.original = null; this.current = null; @@ -117,14 +117,6 @@ public void preExecute() current = original; } - @Override - public void postExecute() - { - checkNotInvalidated(); - if (current != original) - global.set(current); - } - @Override public void invalidate() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java index 374968bcfb7d..d8e950d06356 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeState.java @@ -28,7 +28,7 @@ public interface AccordSafeState extends SafeState void invalidate(); boolean invalidated(); void preExecute(); - void postExecute(); + AccordCachingState global(); default boolean hasUpdate() diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java index a4c48c83e68d..89baee84b950 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java @@ -105,13 +105,6 @@ public void preExecute() current = original; } - @Override - public void postExecute() - { - checkNotInvalidated(); - global.set(current); - } - @Override public void invalidate() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 085504f092dd..5fa5c6a941c3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -219,6 +219,7 @@ private void evict(AccordCachingState node) if (!node.hasListeners()) { AccordCachingState self = instances.get(node.index).cache.remove(node.key()); + Invariants.checkState(self.references == 0); checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); if (instance.listeners != null) instance.listeners.forEach(l -> l.onEvict((AccordCachingState) node)); @@ -380,9 +381,8 @@ public S acquireIfExists(K key) @SuppressWarnings("unchecked") AccordCachingState node = (AccordCachingState) cache.get(key); if (node == null) - { return null; - } + return safeRefFactory.apply(acquireExisting(node, false)); } @@ -422,7 +422,8 @@ private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) node.load(loadExecutor, loadFunction); node.references++; - cache.put(key, node); + Object prev = cache.put(key, node); + Invariants.checkState(prev == null, "%s not absent from cache: %s already present", key, node); if (listeners != null) listeners.forEach(l -> l.onAdd(node)); maybeUpdateSize(node, heapEstimator); @@ -467,13 +468,14 @@ public void release(S safeRef) @SuppressWarnings("unchecked") AccordCachingState node = (AccordCachingState) cache.get(key); - checkState(node != null, "node is null for %s", key); + checkState(safeRef.global() != null, "safeRef node is null for %s", key); + checkState(safeRef.global() == node, "safeRef node not in map: %s != %s", safeRef.global(), node); checkState(node.references > 0, "references (%d) are zero for %s (%s)", node.references, key, node); - checkState(safeRef.global() == node); checkState(!isInQueue(node)); if (safeRef.hasUpdate()) node.set(safeRef.current()); + safeRef.invalidate(); maybeUpdateSize(node, heapEstimator); @@ -688,6 +690,7 @@ void unsafeClear() bytesCached = 0; metrics.reset();; instances.forEach(instance -> { + instance.cache.forEach((k, v) -> Invariants.checkState(v.references == 0)); instance.cache.clear(); instance.bytesCached = 0; instance.instanceMetrics.reset(); diff --git a/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java b/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java index 850f6f7e8d0a..8d9eb7c5ffdd 100644 --- a/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java +++ b/src/java/org/apache/cassandra/service/accord/ImmutableAccordSafeState.java @@ -75,10 +75,4 @@ public void revert() { checkNotInvalidated(); } - - @Override - public void postExecute() - { - checkNotInvalidated(); - } } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index fb7034fcd1ed..282773c6d981 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -400,7 +400,7 @@ public int serializedSize(JournalKey key, Object value, int userVersion) if (diff.waitingOn != null) { size += Integer.BYTES; - size += WaitingOnSerializer.serializedSize(diff.txnId, diff.waitingOn); + size += WaitingOnSerializer.serializedSize(diff.waitingOn); } if (diff.writes != null) @@ -448,7 +448,7 @@ public void serialize(JournalKey key, Object value, DataOutputPlus out, int user if (diff.waitingOn != null) { - long size = WaitingOnSerializer.serializedSize(diff.txnId, diff.waitingOn); + long size = WaitingOnSerializer.serializedSize(diff.waitingOn); ByteBuffer serialized = WaitingOnSerializer.serialize(diff.txnId, diff.waitingOn); out.writeInt((int) size); out.write(serialized); @@ -556,7 +556,7 @@ public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) thr waitingOn = (localTxnId, deps) -> { try { - return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps.txnIds(), buffer); + return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps, deps.directKeyDeps, buffer); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index c0fea38a37c2..899a00c4bf45 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -119,6 +119,19 @@ public long preAcceptTimeout() return getReadRpcTimeout(MICROSECONDS); } + // TODO (expected): we probably want additional configuration here so we can prune on shorter time horizons when we have a lot of transactions on a single key + @Override + public long cfkHlcPruneDelta() + { + return SECONDS.toMicros(10L); + } + + @Override + public int cfkPruneInterval() + { + return 32; + } + @Override public Txn emptyTxn(Kind kind, Seekables seekables) { diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index e648e738007f..3c8672ea9310 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -43,7 +43,6 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.TokenRange; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; @@ -204,8 +203,7 @@ public Range asRange() static { - Token key = getPartitioner().decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER).getToken(); - EMPTY_SIZE = ObjectSizes.measureDeep(new TokenKey(null, key)); + EMPTY_SIZE = ObjectSizes.measure(new TokenKey(null, null)); } final Token token; diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 032d7c6dc93e..c6c508ebc57d 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -18,7 +18,7 @@ package org.apache.cassandra.service.accord.async; import accord.api.Key; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.local.KeyHistory; import accord.local.PreLoadContext; import accord.primitives.*; @@ -32,18 +32,21 @@ import org.apache.cassandra.service.accord.*; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.stream.Collectors; public class AsyncLoader { private static final Logger logger = LoggerFactory.getLogger(AsyncLoader.class); + private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(logger, 1L, TimeUnit.MINUTES); enum State { @@ -85,7 +88,12 @@ private static > void referenceAndAssemble List> listenChains) { S safeRef = cache.acquire(key); - context.put(key, safeRef); + if (context.putIfAbsent(key, safeRef) != null) + { + noSpamLogger.warn("Context {} contained key {} more than once", context, key); + cache.release(safeRef); + return; + } AccordCachingState.Status status = safeRef.globalStatus(); // globalStatus() completes switch (status) { @@ -142,8 +150,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) switch (keysOrRanges.domain()) { case Key: - // cast to Keys fails... - Iterable keys = (Iterable) keysOrRanges; + AbstractKeys keys = (AbstractKeys) keysOrRanges; keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); break; case Range: @@ -235,9 +242,11 @@ public boolean load(AsyncOperation.Context context, BiConsumer finish(result, null)); - return; + return false; } state(COMPLETING); @@ -279,6 +285,8 @@ protected void runInternal() case FAILED: break; } + + return false; } @Override @@ -292,7 +300,7 @@ public void run() commandStore.setCurrentOperation(this); try { - runInternal(); + runInternal(false); } catch (Throwable t) { @@ -311,12 +319,28 @@ public void run() } } + private boolean preRun() + { + commandStore.checkInStoreThread(); + try + { + return runInternal(true); + } + catch (Throwable t) + { + logger.error("Operation {} failed", this, t); + fail(t); + return false; + } + } + @Override public void start(BiConsumer callback) { Invariants.checkState(this.callback == null); this.callback = callback; - commandStore.executor().execute(this); + if (!commandStore.inStore() || preRun()) + commandStore.executor().execute(this); } static class ForFunction extends AsyncOperation diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java index 1770ae75a5e1..5af4b53a3607 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -129,7 +129,7 @@ public long serializedSize(DurableBefore.Entry t, int version) } }), DurableBefore.Entry[]::new, DurableBefore.SerializerSupport::create); - public static IVersionedSerializer redundantBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(new IVersionedSerializer() + public static final IVersionedSerializer redundantBeforeEntry = new IVersionedSerializer<>() { @Override public void serialize(RedundantBefore.Entry t, DataOutputPlus out, int version) throws IOException @@ -152,10 +152,10 @@ public RedundantBefore.Entry deserialize(DataInputPlus in, int version) throws I long startEpoch = in.readUnsignedVInt(); long endEpoch = in.readUnsignedVInt(); if (endEpoch == 0) endEpoch = Long.MAX_VALUE; - else endEpoch = startEpoch + 1 + endEpoch; - TxnId bootstrappedAt = CommandSerializers.txnId.deserialize(in, version); + else endEpoch = endEpoch - 1 + startEpoch; TxnId locallyAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); TxnId shardAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId bootstrappedAt = CommandSerializers.txnId.deserialize(in, version); Timestamp staleUntilAtLeast = CommandSerializers.nullableTimestamp.deserialize(in, version); return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast); } @@ -172,7 +172,8 @@ public long serializedSize(RedundantBefore.Entry t, int version) size += CommandSerializers.nullableTimestamp.serializedSize(t.staleUntilAtLeast, version); return size; } - }), RedundantBefore.Entry[]::new, RedundantBefore.SerializerSupport::create); + }; + public static IVersionedSerializer redundantBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(redundantBeforeEntry), RedundantBefore.Entry[]::new, RedundantBefore.SerializerSupport::create); private static class TimestampToRangesSerializer implements IVersionedSerializer> { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index a81b62b4a393..369fe57d8e60 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -24,11 +24,13 @@ import com.google.common.primitives.Ints; import accord.api.Key; -import accord.local.CommandsForKey; -import accord.local.CommandsForKey.TxnInfo; -import accord.local.CommandsForKey.InternalStatus; -import accord.local.CommandsForKey.Unmanaged; +import accord.local.cfk.CommandsForKey; +import accord.local.cfk.CommandsForKey.TxnInfo; +import accord.local.cfk.CommandsForKey.InternalStatus; +import accord.local.cfk.CommandsForKey.TxnInfoExtra; +import accord.local.cfk.CommandsForKey.Unmanaged; import accord.local.Node; +import accord.primitives.Ballot; import accord.primitives.Routable.Domain; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -38,9 +40,10 @@ import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.utils.vint.VIntCoding; -import static accord.local.CommandsForKey.NO_PENDING_UNMANAGED; -import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; +import static accord.local.cfk.CommandsForKey.NO_PENDING_UNMANAGED; +import static accord.local.cfk.CommandsForKey.NO_TXNIDS; import static accord.primitives.Txn.Kind.Read; +import static accord.primitives.Txn.Kind.SyncPoint; import static accord.primitives.Txn.Kind.Write; import static accord.utils.ArrayBuffers.cachedInts; import static accord.utils.ArrayBuffers.cachedTxnIds; @@ -52,12 +55,15 @@ import static org.apache.cassandra.utils.ByteBufferUtil.readLeastSignificantBytes; import static org.apache.cassandra.utils.ByteBufferUtil.writeLeastSignificantBytes; import static org.apache.cassandra.utils.ByteBufferUtil.writeMostSignificantBytes; +import static org.apache.cassandra.utils.vint.VIntCoding.decodeZigZag64; +import static org.apache.cassandra.utils.vint.VIntCoding.encodeZigZag64; public class CommandsForKeySerializer { private static final int HAS_MISSING_DEPS_HEADER_BIT = 0x1; private static final int HAS_EXECUTE_AT_HEADER_BIT = 0x2; - private static final int HAS_NON_STANDARD_FLAGS = 0x4; + private static final int HAS_BALLOT_HEADER_BIT = 0x4; + private static final int HAS_NON_STANDARD_FLAGS = 0x8; /** * We read/write a fixed number of intial bytes for each command, with an initial flexible number of flag bits @@ -70,12 +76,13 @@ public class CommandsForKeySerializer * two flag bytes: * bit 0 is set if there are any missing ids; * bit 1 is set if there are any executeAt specified - * bit 2 is set if there are any queries present besides reads/writes - * bits 3-4 number of header bytes to read for each command - * bits 5-6: level 0 extra hlc bytes to read - * bits 7-8: level 1 extra hlc bytes to read (+ 1 + level 0) - * bits 9-10: level 2 extra hlc bytes to read (+ 1 + level 1) - * bits 12-13: level 3 extra hlc bytes to read (+ 1 + level 2) + * bit 2 is set if there are any ballots specified + * bit 3 is set if there are any queries present besides reads/writes + * bits 4-5 number of header bytes to read for each command + * bits 8-9: level 0 extra hlc bytes to read + * bits 10-11: level 1 extra hlc bytes to read (+ 1 + level 0) + * bits 12-13: level 2 extra hlc bytes to read (+ 1 + level 1) + * bits 14-15: level 3 extra hlc bytes to read (+ 1 + level 2) * * In order, for each command, we consume: * 3 bits for the InternalStatus of the command @@ -104,6 +111,8 @@ public class CommandsForKeySerializer // TODO (desired): determine timestamp resolution as a factor of 10 public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { + Invariants.checkArgument(!cfk.isLoadingPruned()); + int commandCount = cfk.size(); if (commandCount == 0) return ByteBuffer.allocate(1); @@ -115,15 +124,15 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) // whether we have any missing transactions to encode, any executeAt that are not equal to their TxnId // and whether there are any non-standard flag bits to encode boolean hasNonStandardFlags = false; - int nodeIdCount = 0, missingIdCount = 0, executeAtCount = 0, bitsPerExecuteAtFlags = 0; - int bitsPerExecuteAtEpochDelta = 0, bitsPerExecuteAtHlcDelta = 1; // to permit us to use full 64 bits and encode in 5 bits we force at least one hlc bit + int nodeIdCount = 0, missingIdCount = 0, executeAtCount = 0, ballotCount = 0; + int bitsPerExecuteAtEpoch = 0, bitsPerExecuteAtFlags = 0, bitsPerExecuteAtHlc = 1; // to permit us to use full 64 bits and encode in 5 bits we force at least one hlc bit { for (int i = 0 ; i < commandCount ; ++i) { - if (nodeIdCount + 1 >= nodeIds.length) + if (nodeIdCount + 2 >= nodeIds.length) { nodeIdCount = compact(nodeIds); - if (nodeIdCount > nodeIds.length/2) + if (nodeIdCount > nodeIds.length/2 || nodeIdCount + 2 >= nodeIds.length) nodeIds = cachedInts().resize(nodeIds, nodeIds.length, nodeIds.length * 2); } @@ -132,15 +141,27 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) hasNonStandardFlags |= txnIdFlags(txn) != STANDARD; nodeIds[nodeIdCount++] = txn.node.id; - missingIdCount += txn.missing().length; - if (txn.executeAt == txn) - continue; + if (txn.executeAt != txn) + { + Invariants.checkState(txn.status.hasExecuteAtOrDeps); + nodeIds[nodeIdCount++] = txn.executeAt.node.id; + bitsPerExecuteAtEpoch = Math.max(bitsPerExecuteAtEpoch, numberOfBitsToRepresent(txn.executeAt.epoch() - txn.epoch())); + bitsPerExecuteAtHlc = Math.max(bitsPerExecuteAtHlc, numberOfBitsToRepresent(txn.executeAt.hlc() - txn.hlc())); + bitsPerExecuteAtFlags = Math.max(bitsPerExecuteAtFlags, numberOfBitsToRepresent(txn.executeAt.flags())); + executeAtCount += 1; + } - nodeIds[nodeIdCount++] = txn.executeAt.node.id; - bitsPerExecuteAtEpochDelta = Math.max(bitsPerExecuteAtEpochDelta, numberOfBitsToRepresent(txn.executeAt.epoch() - txn.epoch())); - bitsPerExecuteAtHlcDelta = Math.max(bitsPerExecuteAtHlcDelta, numberOfBitsToRepresent(txn.executeAt.hlc() - txn.hlc())); - bitsPerExecuteAtFlags = Math.max(bitsPerExecuteAtFlags, numberOfBitsToRepresent(txn.executeAt.flags())); - executeAtCount += 1; + if (txn.getClass() == TxnInfoExtra.class) + { + TxnInfoExtra extra = (TxnInfoExtra) txn; + missingIdCount += extra.missing.length; + if (extra.ballot != Ballot.ZERO) + { + Invariants.checkArgument(txn.status.hasBallot); + nodeIds[nodeIdCount++] = extra.ballot.node.id; + ballotCount += 1; + } + } } nodeIdCount = compact(nodeIds); Invariants.checkState(nodeIdCount > 0); @@ -151,11 +172,14 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); int minHeaderBits = 7 + bitsPerNodeId + (hasNonStandardFlags ? 1 : 0); int infoHeaderBits = (executeAtCount > 0 ? 1 : 0) + (missingIdCount > 0 ? 1 : 0); + int ballotHeaderBits = (ballotCount > 0 ? 1 : 0); int maxHeaderBits = minHeaderBits; int totalBytes = 0; long prevEpoch = cfk.get(0).epoch(); long prevHlc = cfk.get(0).hlc(); + int prunedBeforeIndex = cfk.prunedBefore().equals(TxnId.NONE) ? -1 : cfk.indexOf(cfk.prunedBefore()); + int[] bytesHistogram = cachedInts().getInts(12); Arrays.fill(bytesHistogram, 0); for (int i = 0 ; i < commandCount ; ++i) @@ -194,8 +218,10 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) totalBytes += 2; TxnInfo info = cfk.get(i); - if (info.status.hasInfo) + if (info.status.hasExecuteAtOrDeps) headerBits += infoHeaderBits; + if (info.status.hasBallot) + headerBits += ballotHeaderBits; maxHeaderBits = Math.max(headerBits, maxHeaderBits); int basicBytes = (headerBits + payloadBits + 7)/8; bytesHistogram[basicBytes]++; @@ -213,10 +239,11 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int flags = (missingIdCount > 0 ? HAS_MISSING_DEPS_HEADER_BIT : 0) | (executeAtCount > 0 ? HAS_EXECUTE_AT_HEADER_BIT : 0) + | (ballotCount > 0 ? HAS_BALLOT_HEADER_BIT : 0) | (hasNonStandardFlags ? HAS_NON_STANDARD_FLAGS : 0); int headerBytes = (maxHeaderBits+7)/8; - flags |= Invariants.checkArgument(headerBytes - 1, headerBytes <= 4) << 3; + flags |= Invariants.checkArgument(headerBytes - 1, headerBytes <= 4) << 4; int hlcBytesLookup; { // 2bits per size, first value may be zero and remainder may be increments of 1-4; @@ -234,7 +261,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) while (l3-l2 > 4) ++l2; while (l2-l1 > 4) ++l1; hlcBytesLookup = setHlcBytes(l0, l1, l2, l3); - flags |= (l0 | ((l1-(1+l0))<<2) | ((l2-(1+l1))<<4) | ((l3-(1+l2))<<6)) << 5; + flags |= (l0 | ((l1-(1+l0))<<2) | ((l2-(1+l1))<<4) | ((l3-(1+l2))<<6)) << 8; } int hlcFlagLookup = hlcBytesLookupToHlcFlagLookup(hlcBytesLookup); @@ -252,21 +279,51 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) prevEpoch = cfk.get(0).epoch(); prevHlc = cfk.get(0).hlc(); - // account for encoding redundantBefore totalBytes += TypeSizes.sizeofUnsignedVInt(prevEpoch); totalBytes += TypeSizes.sizeofUnsignedVInt(prevHlc); + totalBytes += TypeSizes.sizeofUnsignedVInt(prunedBeforeIndex + 1); - if (missingIdCount + executeAtCount > 0) + int bitsPerBallotEpoch = 0, bitsPerBallotHlc = 1, bitsPerBallotFlags = 0; + if ((missingIdCount | executeAtCount | ballotCount) > 0) { + if (ballotCount > 0) + { + Ballot prevBallot = null; + for (int i = 0 ; i < commandCount ; ++i) + { + TxnInfo txn = cfk.get(i); + if (txn.getClass() != TxnInfoExtra.class) continue; + if (!txn.status.hasBallot) continue; + TxnInfoExtra extra = (TxnInfoExtra) txn; + if (extra.ballot == Ballot.ZERO) continue; + if (prevBallot != null) + { + bitsPerBallotEpoch = Math.max(bitsPerBallotEpoch, numberOfBitsToRepresent(encodeZigZag64(extra.ballot.epoch() - prevBallot.epoch()))); + bitsPerBallotHlc = Math.max(bitsPerBallotHlc, numberOfBitsToRepresent(encodeZigZag64(extra.ballot.hlc() - prevBallot.hlc()))); + bitsPerBallotFlags = Math.max(bitsPerBallotFlags, numberOfBitsToRepresent(extra.ballot.flags())); + } + prevBallot = extra.ballot; + } + totalBytes += 2; // encode bit widths + } + + if (executeAtCount > 0) + totalBytes += 2; // encode bit widths + // account for encoding missing id stream int missingIdBits = 1 + numberOfBitsToRepresent(commandCount); int executeAtBits = bitsPerNodeId - + bitsPerExecuteAtEpochDelta - + bitsPerExecuteAtHlcDelta + + bitsPerExecuteAtEpoch + + bitsPerExecuteAtHlc + bitsPerExecuteAtFlags; - totalBytes += (missingIdBits * missingIdCount + executeAtBits * executeAtCount + 7)/8; - if (executeAtCount > 0) - totalBytes += 2; + int ballotBits = bitsPerNodeId + + bitsPerBallotEpoch + + bitsPerBallotHlc + + bitsPerBallotFlags; + totalBytes += (missingIdBits * missingIdCount + + executeAtBits * executeAtCount + + (ballotCount > 0 ? ballotBits * (ballotCount - 1) + bitsPerNodeId + 128 : 0) + + 7)/8; } // count unmanaged bytes @@ -293,9 +350,11 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) VIntCoding.writeUnsignedVInt(prevEpoch, out); VIntCoding.writeUnsignedVInt(prevHlc, out); + VIntCoding.writeUnsignedVInt32(prunedBeforeIndex + 1, out); int executeAtMask = executeAtCount > 0 ? 1 : 0; int missingDepsMask = missingIdCount > 0 ? 1 : 0; + int ballotMask = ballotCount > 0 ? 1 : 0; int flagsIncrement = hasNonStandardFlags ? 2 : 1; // TODO (desired): check this loop compiles correctly to only branch on epoch case, for binarySearch and flushing for (int i = 0 ; i < commandCount ; ++i) @@ -307,15 +366,20 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) long bits = status.ordinal(); int bitIndex = 3; - int statusHasInfo = status.hasInfo ? 1 : 0; - long hasExecuteAt = info.executeAt != null & info.executeAt != txnId ? 1 : 0; + int statusHasInfo = status.hasExecuteAtOrDeps ? 1 : 0; + int statusHasBallot = status.hasBallot ? 1 : 0; + long hasExecuteAt = info.executeAt != txnId ? 1 : 0; bits |= hasExecuteAt << bitIndex; bitIndex += statusHasInfo & executeAtMask; - long hasMissingIds = info.missing() != CommandsForKey.NO_TXNIDS ? 1 : 0; + long hasMissingIds = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).missing != CommandsForKey.NO_TXNIDS ? 1 : 0; bits |= hasMissingIds << bitIndex; bitIndex += statusHasInfo & missingDepsMask; + long hasBallot = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).ballot != Ballot.ZERO ? 1 : 0; + bits |= hasBallot << bitIndex; + bitIndex += statusHasBallot & ballotMask; + long flagBits = txnIdFlagsBits(txnId); boolean writeFullFlags = flagBits == RAW_BITS; bits |= flagBits << bitIndex; @@ -399,19 +463,23 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) out.position(out.position() + offset); } - if ((executeAtCount | missingIdCount) > 0) + if ((executeAtCount | missingIdCount | ballotCount) > 0) { int bitsPerCommandId = numberOfBitsToRepresent(commandCount); int bitsPerMissingId = 1 + bitsPerCommandId; - int bitsPerExecuteAt = bitsPerExecuteAtEpochDelta + bitsPerExecuteAtHlcDelta + bitsPerExecuteAtFlags + bitsPerNodeId; - Invariants.checkState(bitsPerExecuteAtEpochDelta < 64); - Invariants.checkState(bitsPerExecuteAtHlcDelta <= 64); + int bitsPerExecuteAt = bitsPerExecuteAtEpoch + bitsPerExecuteAtHlc + bitsPerExecuteAtFlags + bitsPerNodeId; + int bitsPerBallot = bitsPerBallotEpoch + bitsPerBallotHlc + bitsPerBallotFlags + bitsPerNodeId; + Invariants.checkState(bitsPerExecuteAtEpoch < 64); + Invariants.checkState(bitsPerExecuteAtHlc <= 64); Invariants.checkState(bitsPerExecuteAtFlags <= 16); if (executeAtMask > 0) // we encode both 15 and 16 bits for flag length as 15 to fit in a short - out.putShort((short) ((bitsPerExecuteAtEpochDelta << 10) | ((bitsPerExecuteAtHlcDelta-1) << 4) | (Math.min(15, bitsPerExecuteAtFlags)))); + out.putShort((short) ((bitsPerExecuteAtEpoch << 10) | ((bitsPerExecuteAtHlc-1) << 4) | (Math.min(15, bitsPerExecuteAtFlags)))); + if (ballotMask > 0) // we encode both 15 and 16 bits for flag length as 15 to fit in a short + out.putShort((short) ((bitsPerBallotEpoch << 10) | ((bitsPerBallotHlc-1) << 4) | (Math.min(15, bitsPerBallotFlags)))); long buffer = 0L; int bufferCount = 0; + Ballot prevBallot = null; for (int i = 0 ; i < commandCount ; ++i) { TxnInfo txn = cfk.get(i); @@ -423,9 +491,9 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { Invariants.checkState(executeAt.epoch() >= txn.epoch()); long executeAtBits = executeAt.epoch() - txn.epoch(); - int offset = bitsPerExecuteAtEpochDelta; + int offset = bitsPerExecuteAtEpoch; executeAtBits |= (executeAt.hlc() - txn.hlc()) << offset ; - offset += bitsPerExecuteAtHlcDelta; + offset += bitsPerExecuteAtHlc; executeAtBits |= ((long)executeAt.flags()) << offset; offset += bitsPerExecuteAtFlags; executeAtBits |= ((long)nodeIdx) << offset; @@ -434,10 +502,10 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } else { - buffer = flushBits(buffer, bufferCount, executeAt.epoch() - txn.epoch(), bitsPerExecuteAtEpochDelta, out); - bufferCount = (bufferCount + bitsPerExecuteAtEpochDelta) & 63; - buffer = flushBits(buffer, bufferCount, executeAt.hlc() - txn.hlc(), bitsPerExecuteAtHlcDelta, out); - bufferCount = (bufferCount + bitsPerExecuteAtHlcDelta) & 63; + buffer = flushBits(buffer, bufferCount, executeAt.epoch() - txn.epoch(), bitsPerExecuteAtEpoch, out); + bufferCount = (bufferCount + bitsPerExecuteAtEpoch) & 63; + buffer = flushBits(buffer, bufferCount, executeAt.hlc() - txn.hlc(), bitsPerExecuteAtHlc, out); + bufferCount = (bufferCount + bitsPerExecuteAtHlc) & 63; buffer = flushBits(buffer, bufferCount, executeAt.flags(), bitsPerExecuteAtFlags, out); bufferCount = (bufferCount + bitsPerExecuteAtFlags) & 63; buffer = flushBits(buffer, bufferCount, nodeIdx, bitsPerNodeId, out); @@ -445,20 +513,62 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } } - TxnId[] missing = txn.missing(); - if (missing.length > 0) + if (txn.getClass() == TxnInfoExtra.class) { - int j = 0; - while (j < missing.length - 1) + TxnInfoExtra extra = (TxnInfoExtra) txn; + + TxnId[] missing = extra.missing; + if (missing.length > 0) { - int missingId = cfk.indexOf(missing[j++]); + int j = 0; + while (j < missing.length - 1) + { + int missingId = cfk.indexOf(missing[j++]); + buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); + bufferCount = (bufferCount + bitsPerMissingId) & 63; + } + int missingId = cfk.indexOf(missing[missing.length - 1]); + missingId |= 1L << bitsPerCommandId; buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); bufferCount = (bufferCount + bitsPerMissingId) & 63; } - int missingId = cfk.indexOf(missing[missing.length - 1]); - missingId |= 1L << bitsPerCommandId; - buffer = flushBits(buffer, bufferCount, missingId, bitsPerMissingId, out); - bufferCount = (bufferCount + bitsPerMissingId) & 63; + + Ballot ballot = extra.ballot; + if (ballot != Ballot.ZERO) + { + int nodeIdx = Arrays.binarySearch(nodeIds, 0, nodeIdCount, ballot.node.id); + if (prevBallot == null) + { + buffer = flushBits(buffer, bufferCount, ballot.msb, 64, out); + buffer = flushBits(buffer, bufferCount, ballot.lsb, 64, out); + buffer = flushBits(buffer, bufferCount, nodeIdx, bitsPerNodeId, out); + bufferCount = (bufferCount + bitsPerNodeId) & 63; + } + else if (bitsPerBallot <= 64) + { + long ballotBits = encodeZigZag64(ballot.epoch() - prevBallot.epoch()); + int offset = bitsPerBallotEpoch; + ballotBits |= encodeZigZag64(ballot.hlc() - prevBallot.hlc()) << offset ; + offset += bitsPerBallotHlc; + ballotBits |= ((long)ballot.flags()) << offset; + offset += bitsPerBallotFlags; + ballotBits |= ((long)nodeIdx) << offset; + buffer = flushBits(buffer, bufferCount, ballotBits, bitsPerBallot, out); + bufferCount = (bufferCount + bitsPerBallot) & 63; + } + else + { + buffer = flushBits(buffer, bufferCount, encodeZigZag64(ballot.epoch() - prevBallot.epoch()), bitsPerBallotEpoch, out); + bufferCount = (bufferCount + bitsPerBallotEpoch) & 63; + buffer = flushBits(buffer, bufferCount, encodeZigZag64(ballot.hlc() - prevBallot.hlc()), bitsPerBallotHlc, out); + bufferCount = (bufferCount + bitsPerBallotHlc) & 63; + buffer = flushBits(buffer, bufferCount, ballot.flags(), bitsPerBallotFlags, out); + bufferCount = (bufferCount + bitsPerBallotFlags) & 63; + buffer = flushBits(buffer, bufferCount, nodeIdx, bitsPerNodeId, out); + bufferCount = (bufferCount + bitsPerNodeId) & 63; + } + prevBallot = ballot; + } } } @@ -502,6 +612,7 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) return new CommandsForKey(key); TxnId[] txnIds = cachedTxnIds().get(commandCount); + int[] decodeFlags = cachedInts().getInts(commandCount); TxnInfo[] txns = new TxnInfo[commandCount]; int nodeIdCount = VIntCoding.readUnsignedVInt32(in); int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); @@ -514,34 +625,43 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) nodeIds[i] = new Node.Id(prev += VIntCoding.readUnsignedVInt32(in)); } - int missingDepsMasks, executeAtMasks, txnIdFlagsMask; + int missingDepsMasks, executeAtMasks, ballotMasks, txnIdFlagsMask; int headerByteCount, hlcBytesLookup; { int flags = in.getShort(); missingDepsMasks = 0 != (flags & HAS_MISSING_DEPS_HEADER_BIT) ? 1 : 0; executeAtMasks = 0 != (flags & HAS_EXECUTE_AT_HEADER_BIT) ? 1 : 0; + ballotMasks = 0 != (flags & HAS_BALLOT_HEADER_BIT) ? 1 : 0; txnIdFlagsMask = 0 != (flags & HAS_NON_STANDARD_FLAGS) ? 3 : 1; - headerByteCount = 1 + ((flags >>> 3) & 0x3); - hlcBytesLookup = setHlcByteDeltas((flags >>> 5) & 0x3, (flags >>> 7) & 0x3, (flags >>> 9) & 0x3, (flags >>> 11) & 0x3); + headerByteCount = 1 + ((flags >>> 4) & 0x3); + hlcBytesLookup = setHlcByteDeltas((flags >>> 8) & 0x3, (flags >>> 10) & 0x3, (flags >>> 12) & 0x3, (flags >>> 14) & 0x3); } long prevEpoch = VIntCoding.readUnsignedVInt(in); long prevHlc = VIntCoding.readUnsignedVInt(in); + int prunedBeforeIndex = VIntCoding.readUnsignedVInt32(in) - 1; + for (int i = 0 ; i < commandCount ; ++i) { long header = readLeastSignificantBytes(headerByteCount, in); header |= 1L << (8 * headerByteCount); // marker so we know where to shift-left most-significant bytes to - InternalStatus status = InternalStatus.get((int) (header & 0x7)); + int commandDecodeFlags = (int)(header & 0x7); + InternalStatus status = InternalStatus.get(commandDecodeFlags); header >>>= 3; + commandDecodeFlags <<= 3; - int executeAtInfoOffset, missingDepsInfoOffset; { - int infoMask = status.hasInfo ? 1 : 0; - int executeAtMask = infoMask & executeAtMasks, missingDepsMask = infoMask & missingDepsMasks; - executeAtInfoOffset = ((int)header & executeAtMask) << 1; + int infoMask = status.hasExecuteAtOrDeps ? 1 : 0; + int executeAtMask = infoMask & executeAtMasks; + int missingDepsMask = infoMask & missingDepsMasks; + commandDecodeFlags |= ((int)header & executeAtMask) << 1; header >>>= executeAtMask; - missingDepsInfoOffset = (int)header & missingDepsMask; + commandDecodeFlags |= (int)header & missingDepsMask; header >>>= missingDepsMask; + int ballotMask = status.hasBallot ? ballotMasks : 0; + commandDecodeFlags |= ((int)header & ballotMask) << 2; + header >>>= ballotMask; + decodeFlags[i] = commandDecodeFlags; } Txn.Kind kind = TXN_ID_FLAG_BITS_KIND_LOOKUP[((int)header & txnIdFlagsMask)]; @@ -592,11 +712,8 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) if (readEpochBytes > 0) epoch += readEpochBytes == 1 ? (in.get() & 0xff) : in.getInt(); - TxnId txnId = kind != null ? new TxnId(epoch, hlc, kind, Domain.Key, node) - : TxnId.fromValues(epoch, hlc, flags, node); - - txnIds[i] = txnId; - txns[i] = DECODE_INFOS[(executeAtInfoOffset | missingDepsInfoOffset)*STATUS_COUNT + status.ordinal()]; + txnIds[i] = kind != null ? new TxnId(epoch, hlc, kind, Domain.Key, node) + : TxnId.fromValues(epoch, hlc, flags, node); prevEpoch = epoch; prevHlc = hlc; @@ -626,7 +743,7 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) in.position(in.position() + offset); } - if (executeAtMasks + missingDepsMasks > 0) + if ((executeAtMasks | missingDepsMasks | ballotMasks) > 0) { TxnId[] missingIdBuffer = cachedTxnIds().get(8); int missingIdCount = 0, maxIdBufferCount = 0; @@ -634,25 +751,36 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) int txnIdMask = (1 << bitsPerTxnId) - 1; int bitsPerMissingId = bitsPerTxnId + 1; - int decodeBits = executeAtMasks > 0 ? in.getShort() & 0xffff : 0; - int bitsPerEpochDelta = decodeBits >>> 10; - int bitsPerHlcDelta = 1 + ((decodeBits >>> 4) & 0x3f); - int bitsPerFlags = decodeBits & 0xf; - if (bitsPerFlags == 15) bitsPerFlags = 16; - int bitsPerExecuteAt = bitsPerEpochDelta + bitsPerHlcDelta + bitsPerFlags + bitsPerNodeId; - - long epochDeltaMask = bitsPerEpochDelta == 0 ? 0 : (-1L >>> (64 - bitsPerEpochDelta)); - long hlcDeltaMask = (-1L >>> (64 - bitsPerHlcDelta)); - long flagsMask = bitsPerFlags == 0 ? 0 : (-1L >>> (64 - bitsPerFlags)); - + int decodeExecuteAtBits = executeAtMasks > 0 ? in.getShort() & 0xffff : 0; + int bitsPerExecuteAtEpoch = decodeExecuteAtBits >>> 10; + int bitsPerExecuteAtHlc = 1 + ((decodeExecuteAtBits >>> 4) & 0x3f); + int bitsPerExecuteAtFlags = decodeExecuteAtBits & 0xf; + if (bitsPerExecuteAtFlags == 15) bitsPerExecuteAtFlags = 16; + int bitsPerExecuteAt = bitsPerExecuteAtEpoch + bitsPerExecuteAtHlc + bitsPerExecuteAtFlags + bitsPerNodeId; + + long executeAtEpochMask = bitsPerExecuteAtEpoch == 0 ? 0 : (-1L >>> (64 - bitsPerExecuteAtEpoch)); + long executeAtHlcMask = (-1L >>> (64 - bitsPerExecuteAtHlc)); + long executeAtFlagsMask = bitsPerExecuteAtFlags == 0 ? 0 : (-1L >>> (64 - bitsPerExecuteAtFlags)); + + int decodeBallotBits = ballotMasks > 0 ? in.getShort() & 0xffff : 0; + int bitsPerBallotEpoch = decodeBallotBits >>> 10; + int bitsPerBallotHlc = 1 + ((decodeBallotBits >>> 4) & 0x3f); + int bitsPerBallotFlags = decodeBallotBits & 0xf; + if (bitsPerBallotFlags == 15) bitsPerBallotFlags = 16; + int bitsPerBallot = bitsPerBallotEpoch + bitsPerBallotHlc + bitsPerBallotFlags + bitsPerNodeId; + + long ballotEpochMask = bitsPerBallotEpoch == 0 ? 0 : (-1L >>> (64 - bitsPerBallotEpoch)); + long ballotHlcMask = (-1L >>> (64 - bitsPerBallotHlc)); + long ballotFlagsMask = bitsPerBallotFlags == 0 ? 0 : (-1L >>> (64 - bitsPerBallotFlags)); + + Ballot prevBallot = null; final BitReader reader = new BitReader(); - for (int i = 0 ; i < commandCount ; ++i) { TxnId txnId = txnIds[i]; - TxnInfo placeholder = txns[i]; - Timestamp executeAt; - if (placeholder.executeAt == null) + int commandDecodeFlags = decodeFlags[i]; + Timestamp executeAt = txnId; + if ((commandDecodeFlags & HAS_EXECUTE_AT_HEADER_BIT) != 0) { long epoch, hlc; int flags; @@ -660,30 +788,26 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) if (bitsPerExecuteAt <= 64) { long executeAtBits = reader.read(bitsPerExecuteAt, in); - epoch = txnId.epoch() + (executeAtBits & epochDeltaMask); - executeAtBits >>>= bitsPerEpochDelta; - hlc = txnId.hlc() + (executeAtBits & hlcDeltaMask); - executeAtBits >>>= bitsPerHlcDelta; - flags = (int)(executeAtBits & flagsMask); - executeAtBits >>>= bitsPerFlags; + epoch = txnId.epoch() + (executeAtBits & executeAtEpochMask); + executeAtBits >>>= bitsPerExecuteAtEpoch; + hlc = txnId.hlc() + (executeAtBits & executeAtHlcMask); + executeAtBits >>>= bitsPerExecuteAtHlc; + flags = (int)(executeAtBits & executeAtFlagsMask); + executeAtBits >>>= bitsPerExecuteAtFlags; id = nodeIds[(int)(executeAtBits & nodeIdMask)]; } else { - epoch = txnId.epoch() + reader.read(bitsPerEpochDelta, in); - hlc = txnId.hlc() + reader.read(bitsPerHlcDelta, in); - flags = (int) reader.read(bitsPerFlags, in); + epoch = txnId.epoch() + reader.read(bitsPerExecuteAtEpoch, in); + hlc = txnId.hlc() + reader.read(bitsPerExecuteAtHlc, in); + flags = (int) reader.read(bitsPerExecuteAtFlags, in); id = nodeIds[(int)(reader.read(bitsPerNodeId, in))]; } executeAt = Timestamp.fromValues(epoch, hlc, flags, id); } - else - { - executeAt = txnId; - } - TxnId[] missing = placeholder.missing(); - if (missing == null) + TxnId[] missing = NO_TXNIDS; + if ((commandDecodeFlags & HAS_MISSING_DEPS_HEADER_BIT) != 0) { int prev = -1; while (true) @@ -704,7 +828,46 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) missingIdCount = 0; } - txns[i] = TxnInfo.create(txnId, placeholder.status, executeAt, missing); + Ballot ballot = Ballot.ZERO; + if ((commandDecodeFlags & HAS_BALLOT_HEADER_BIT) != 0) + { + if (prevBallot == null) + { + long msb = reader.read(64, in); + long lsb = reader.read(64, in); + Node.Id id = nodeIds[(int)(reader.read(bitsPerNodeId, in))]; + ballot = Ballot.fromBits(msb, lsb, id); + } + else + { + long epoch, hlc; + int flags; + Node.Id id; + if (bitsPerExecuteAt <= 64) + { + long ballotBits = reader.read(bitsPerBallot, in); + epoch = prevBallot.epoch() + decodeZigZag64(ballotBits & ballotEpochMask); + ballotBits >>>= bitsPerBallotEpoch; + hlc = prevBallot.hlc() + decodeZigZag64(ballotBits & ballotHlcMask); + ballotBits >>>= bitsPerBallotHlc; + flags = (int)(ballotBits & ballotFlagsMask); + ballotBits >>>= bitsPerBallotFlags; + id = nodeIds[(int)(ballotBits & nodeIdMask)]; + } + else + { + epoch = prevBallot.epoch() + decodeZigZag64(reader.read(bitsPerBallotEpoch, in)); + hlc = prevBallot.hlc() + decodeZigZag64(reader.read(bitsPerBallotHlc, in)); + flags = (int) reader.read(bitsPerBallotFlags, in); + id = nodeIds[(int)(reader.read(bitsPerNodeId, in))]; + } + ballot = Ballot.fromValues(epoch, hlc, flags, id); + } + + prevBallot = ballot; + } + + txns[i] = TxnInfo.create(txnId, InternalStatus.get(commandDecodeFlags >>> 3), executeAt, missing, ballot); } cachedTxnIds().forceDiscard(missingIdBuffer, maxIdBufferCount); @@ -712,11 +875,11 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) else { for (int i = 0 ; i < commandCount ; ++i) - txns[i] = TxnInfo.create(txnIds[i], txns[i].status, txnIds[i]); + txns[i] = TxnInfo.create(txnIds[i], InternalStatus.get(decodeFlags[i] >>> 3), txnIds[i], Ballot.ZERO); } cachedTxnIds().forceDiscard(txnIds, commandCount); - return CommandsForKey.SerializerSupport.create(key, txns, unmanageds); + return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex]); } private static int getHlcBytes(int lookup, int index) @@ -836,9 +999,9 @@ private static TxnIdFlags txnIdFlags(TxnId txnId) case Read: case Write: return STANDARD; - case ExclusiveSyncPoint: - return EXTENDED; case SyncPoint: + return EXTENDED; + case ExclusiveSyncPoint: case LocalOnly: case EphemeralRead: return RAW; @@ -857,19 +1020,5 @@ private static long txnIdFlagsBits(TxnId txnId) } } - private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { Read, Write, ExclusiveSyncPoint, null }; - private static final int STATUS_COUNT = InternalStatus.values().length; - private static final TxnInfo[] DECODE_INFOS = new TxnInfo[4 * STATUS_COUNT]; - static - { - for (InternalStatus status : InternalStatus.values()) - { - int ordinal = status.ordinal(); - DECODE_INFOS[ordinal] = TxnInfo.createMock(TxnId.NONE, status, TxnId.NONE, CommandsForKey.NO_TXNIDS); - DECODE_INFOS[STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, TxnId.NONE, null); - DECODE_INFOS[2*STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, null, CommandsForKey.NO_TXNIDS); - DECODE_INFOS[3*STATUS_COUNT+ordinal] = TxnInfo.createMock(TxnId.NONE, status, null, null); - } - } - + private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { Read, Write, SyncPoint, null }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index 0eba398ff95d..dd78761cdae6 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -45,23 +45,23 @@ public abstract class DepsSerializer extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, D> { - public static final DepsSerializer deps = new DepsSerializer() + public static final DepsSerializer deps = new DepsSerializer<>() { @Override - Deps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) + Deps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyDeps, DataInputPlus in, int version) { - return new Deps(keyDeps, rangeDeps); + return new Deps(keyDeps, rangeDeps, directKeyDeps); } }; public static final IVersionedSerializer nullableDeps = NullableSerializer.wrap(deps); - public static final DepsSerializer partialDeps = new DepsSerializer() + public static final DepsSerializer partialDeps = new DepsSerializer<>() { @Override - PartialDeps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException + PartialDeps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyDeps, DataInputPlus in, int version) throws IOException { Ranges covering = KeySerializers.ranges.deserialize(in, version); - return new PartialDeps(covering, keyDeps, rangeDeps); + return new PartialDeps(covering, keyDeps, rangeDeps, directKeyDeps); } @Override @@ -72,9 +72,9 @@ public void serialize(PartialDeps partialDeps, DataOutputPlus out, int version) } @Override - public void serialize(Seekables keys, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException + public void serialize(Seekables superset, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException { - super.serialize(keys, partialDeps, out, version); + super.serialize(superset, partialDeps, out, version); KeySerializers.ranges.serialize(partialDeps.covering, out, version); } @@ -95,7 +95,7 @@ public long serializedSize(Seekables keys, PartialDeps partialDeps, int ve public static final IVersionedSerializer nullablePartialDeps = NullableSerializer.wrap(partialDeps); - abstract D deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, DataInputPlus in, int version) throws IOException; + abstract D deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyDeps, DataInputPlus in, int version) throws IOException; @Override public void serialize(D deps, DataOutputPlus out, int version) throws IOException @@ -105,9 +105,9 @@ public void serialize(D deps, DataOutputPlus out, int version) throws IOExceptio } @Override - public void serialize(Seekables keys, D deps, DataOutputPlus out, int version) throws IOException + public void serialize(Seekables superset, D deps, DataOutputPlus out, int version) throws IOException { - if (keys.domain() == Key) serializeSubset(deps.keyDeps.keys(), keys, out); + if (superset.domain() == Key) serializeSubset(deps.keyDeps.keys(), superset, out); else KeySerializers.keys.serialize(deps.keyDeps.keys(), out, version); serializeWithoutKeys(deps, out, version); } @@ -148,21 +148,10 @@ public long serializedSize(Seekables keys, D deps, int version) private void serializeWithoutKeys(D deps, DataOutputPlus out, int version) throws IOException { - KeyDeps keyDeps = deps.keyDeps; - { - int txnIdCount = keyDeps.txnIdCount(); - out.writeUnsignedVInt32(txnIdCount); - for (int i = 0; i < txnIdCount; i++) - CommandSerializers.txnId.serialize(keyDeps.txnId(i), out, version); + serializeKeyDepsWithoutKeys(deps.keyDeps, out, version); - int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); - out.writeUnsignedVInt32(keysToTxnIdsCount); - for (int i = 0; i < keysToTxnIdsCount; i++) - out.writeUnsignedVInt32(keysToTxnIds(keyDeps, i)); - } - - RangeDeps rangeDeps = deps.rangeDeps; { + RangeDeps rangeDeps = deps.rangeDeps; int rangeCount = rangeDeps.rangeCount(); out.writeUnsignedVInt32(rangeCount); for (int i = 0; i < rangeCount; i++) @@ -178,24 +167,34 @@ private void serializeWithoutKeys(D deps, DataOutputPlus out, int version) throw for (int i = 0; i < rangesToTxnIdsCount; i++) out.writeUnsignedVInt32(rangesToTxnIds(rangeDeps, i)); } - } - private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throws IOException - { - KeyDeps keyDeps; { - int txnIdCount = in.readUnsignedVInt32(); - TxnId[] txnIds = new TxnId[txnIdCount]; - for (int i = 0; i < txnIdCount; i++) - txnIds[i] = CommandSerializers.txnId.deserialize(in, version); - - int keysToTxnIdsCount = in.readUnsignedVInt32(); - int[] keysToTxnIds = new int[keysToTxnIdsCount]; - for (int i = 0; i < keysToTxnIdsCount; i++) - keysToTxnIds[i] = in.readUnsignedVInt32(); + Keys keys = deps.directKeyDeps.keys(); + boolean isSubset = isSubset(keys, deps.keyDeps.keys()); + out.writeBoolean(isSubset); + if (isSubset) serializeSubset(keys, deps.keyDeps.keys(), out); + else KeySerializers.keys.serialize(keys, out, version); - keyDeps = KeyDeps.SerializerSupport.create(keys, txnIds, keysToTxnIds); + serializeKeyDepsWithoutKeys(deps.directKeyDeps, out, version); } + } + + private void serializeKeyDepsWithoutKeys(KeyDeps keyDeps, DataOutputPlus out, int version) throws IOException + { + int txnIdCount = keyDeps.txnIdCount(); + out.writeUnsignedVInt32(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + CommandSerializers.txnId.serialize(keyDeps.txnId(i), out, version); + + int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); + out.writeUnsignedVInt32(keysToTxnIdsCount); + for (int i = 0; i < keysToTxnIdsCount; i++) + out.writeUnsignedVInt32(keysToTxnIds(keyDeps, i)); + } + + private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throws IOException + { + KeyDeps keyDeps = deserializeKeyDeps(keys, in, version); RangeDeps rangeDeps; { @@ -217,25 +216,34 @@ private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throw rangeDeps = RangeDeps.SerializerSupport.create(ranges, txnIds, rangesToTxnIds); } - return deserialize(keyDeps, rangeDeps, in, version); + KeyDeps directKeyDeps; + { + boolean isSubset = in.readBoolean(); + Keys directKeys = isSubset ? (Keys)deserializeSubset(keys, in) : KeySerializers.keys.deserialize(in, version); + directKeyDeps = deserializeKeyDeps(directKeys, in, version); + } + + return deserialize(keyDeps, rangeDeps, directKeyDeps, in, version); } - private long serializedSizeWithoutKeys(D deps, int version) + private static KeyDeps deserializeKeyDeps(Keys keys, DataInputPlus in, int version) throws IOException { - long size = 0L; + int txnIdCount = in.readUnsignedVInt32(); + TxnId[] txnIds = new TxnId[txnIdCount]; + for (int i = 0; i < txnIdCount; i++) + txnIds[i] = CommandSerializers.txnId.deserialize(in, version); - KeyDeps keyDeps = deps.keyDeps; - { - int txnIdCount = keyDeps.txnIdCount(); - size += sizeofUnsignedVInt(txnIdCount); - for (int i = 0; i < txnIdCount; i++) - size += CommandSerializers.txnId.serializedSize(keyDeps.txnId(i), version); + int keysToTxnIdsCount = in.readUnsignedVInt32(); + int[] keysToTxnIds = new int[keysToTxnIdsCount]; + for (int i = 0; i < keysToTxnIdsCount; i++) + keysToTxnIds[i] = in.readUnsignedVInt32(); - int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); - size += sizeofUnsignedVInt(keysToTxnIdsCount); - for (int i = 0; i < keysToTxnIdsCount; i++) - size += sizeofUnsignedVInt(keysToTxnIds(keyDeps, i)); - } + return KeyDeps.SerializerSupport.create(keys, txnIds, keysToTxnIds); + } + + private long serializedSizeWithoutKeys(D deps, int version) + { + long size = serializedSizeOfKeyDepsWithoutKeys(deps.keyDeps, version); RangeDeps rangeDeps = deps.rangeDeps; { @@ -255,7 +263,32 @@ private long serializedSizeWithoutKeys(D deps, int version) size += sizeofUnsignedVInt(rangesToTxnIds(rangeDeps, i)); } + { + boolean isSubset = isSubset(deps.directKeyDeps.keys(), deps.keyDeps.keys()); + size += 1; + size += isSubset ? serializedSubsetSize(deps.directKeyDeps.keys(), deps.keyDeps.keys()) : KeySerializers.keys.serializedSize(deps.directKeyDeps.keys(), version); + size += serializedSizeOfKeyDepsWithoutKeys(deps.directKeyDeps, version); + } + return size; + } + + private static long serializedSizeOfKeyDepsWithoutKeys(KeyDeps keyDeps, int version) + { + int txnIdCount = keyDeps.txnIdCount(); + long size = sizeofUnsignedVInt(txnIdCount); + for (int i = 0; i < txnIdCount; i++) + size += CommandSerializers.txnId.serializedSize(keyDeps.txnId(i), version); + + int keysToTxnIdsCount = keysToTxnIdsCount(keyDeps); + size += sizeofUnsignedVInt(keysToTxnIdsCount); + for (int i = 0; i < keysToTxnIdsCount; i++) + size += sizeofUnsignedVInt(keysToTxnIds(keyDeps, i)); return size; } + private static boolean isSubset(Keys test, Keys superset) + { + return test.foldl(superset, (k, p, v, i) -> v + 1, 0, 0, 0) == test.size(); + } + } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 6c22d2844039..9bd2e0082a65 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -21,78 +21,28 @@ import java.io.IOException; import java.nio.ByteBuffer; -import accord.local.Command; import accord.local.Command.WaitingOn; +import accord.primitives.KeyDeps; import accord.primitives.Keys; +import accord.primitives.RangeDeps; import accord.primitives.Routable; -import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.ImmutableBitSet; import accord.utils.Invariants; import accord.utils.SimpleBitSet; -import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.vint.VIntCoding; public class WaitingOnSerializer { - public static void serialize(TxnId txnId, WaitingOn waitingOn, DataOutputPlus out) throws IOException - { - if (txnId.kind().awaitsOnlyDeps()) - { - Timestamp executeAtLeast = waitingOn.executeAtLeast(); - out.writeBoolean(executeAtLeast != null); - if (executeAtLeast != null) - CommandSerializers.timestamp.serialize(executeAtLeast, out); - } - int keyCount = waitingOn.keys.size(); - int txnIdCount = waitingOn.txnIds.size(); - int waitingOnLength = (txnIdCount + keyCount + 63) / 64; - serialize(waitingOnLength, waitingOn.waitingOn, out); - if (txnId.domain() == Routable.Domain.Range) - { - int appliedOrInvalidatedLength = (txnIdCount + 63) / 64; - serialize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated, out); - } - } - - public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, DataInputPlus in) throws IOException - { - Timestamp executeAtLeast = null; - if (txnId.kind().awaitsOnlyDeps()) - { - if (in.readBoolean()) - executeAtLeast = CommandSerializers.timestamp.deserialize(in); - } - int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; - ImmutableBitSet waitingOn = deserialize(waitingOnLength, in); - ImmutableBitSet appliedOrInvalidated = null; - if (txnId.domain() == Routable.Domain.Range) - { - int appliedOrInvalidatedLength = (txnIds.size() + 63) / 64; - appliedOrInvalidated = deserialize(appliedOrInvalidatedLength, in); - } - - WaitingOn result = new WaitingOn(keys, txnIds, waitingOn, appliedOrInvalidated); - if (executeAtLeast != null) - result = new Command.WaitingOnWithExecuteAt(result, executeAtLeast); - return result; - } - - public static long serializedSize(TxnId txnId, WaitingOn waitingOn) + public static long serializedSize(WaitingOn waitingOn) { int keyCount = waitingOn.keys.size(); - int txnIdCount = waitingOn.txnIds.size(); + int txnIdCount = waitingOn.txnIdCount(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; long size = serializedSize(waitingOnLength, waitingOn.waitingOn); - if (txnId.kind().awaitsOnlyDeps()) - { - Timestamp executeAtLeast = waitingOn.executeAtLeast(); - size += 1; - if (executeAtLeast != null) - size += CommandSerializers.timestamp.serializedSize(); - } + size += TypeSizes.sizeofUnsignedVInt(keyCount); + size += TypeSizes.sizeofUnsignedVInt(txnIdCount); if (waitingOn.appliedOrInvalidated == null) return size; @@ -100,56 +50,26 @@ public static long serializedSize(TxnId txnId, WaitingOn waitingOn) return size + serializedSize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated); } - private static void serialize(int length, SimpleBitSet write, DataOutputPlus out) throws IOException - { - long[] bits = SimpleBitSet.SerializationSupport.getArray(write); - Invariants.checkState(length == bits.length); - for (long v : bits) - out.writeLong(v); - } - - private static ImmutableBitSet deserialize(int length, DataInputPlus in) throws IOException - { - long[] bits = new long[length]; - for (int i = 0 ; i < length ; ++i) - bits[i] = in.readLong(); - return ImmutableBitSet.SerializationSupport.construct(bits); - } - public static long serializedSize(int length, SimpleBitSet write) { long[] bits = SimpleBitSet.SerializationSupport.getArray(write); - Invariants.checkState(length == bits.length); + Invariants.checkState(length == bits.length, "Expected length %d != %d", length, bits.length); return (long) TypeSizes.LONG_SIZE * length; } public static ByteBuffer serialize(TxnId txnId, WaitingOn waitingOn) throws IOException { int keyCount = waitingOn.keys.size(); - int txnIdCount = waitingOn.txnIds.size(); + int txnIdCount = waitingOn.txnIdCount(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; int appliedOrInvalidatedLength = 0; if (txnId.domain() == Routable.Domain.Range) appliedOrInvalidatedLength = (txnIdCount + 63) / 64; - int size = TypeSizes.LONG_SIZE * (waitingOnLength + appliedOrInvalidatedLength); - Timestamp executeAtLeast = null; - if (txnId.kind().awaitsOnlyDeps()) - { - executeAtLeast = waitingOn.executeAtLeast(); - size += 1; - if (executeAtLeast != null) - size += CommandSerializers.timestamp.serializedSize(); - } - - ByteBuffer out = ByteBuffer.allocate(size); - if (txnId.kind().awaitsOnlyDeps()) - { - out.put((byte)(executeAtLeast != null ? 1 : 0)); - if (executeAtLeast != null) - CommandSerializers.timestamp.serialize(executeAtLeast, out); - } - + ByteBuffer out = ByteBuffer.allocate(TypeSizes.sizeofUnsignedVInt(keyCount) + TypeSizes.sizeofUnsignedVInt(txnIdCount) + + TypeSizes.LONG_SIZE * (waitingOnLength + appliedOrInvalidatedLength)); + VIntCoding.writeUnsignedVInt32(keyCount, out); + VIntCoding.writeUnsignedVInt32(txnIdCount, out); serialize(waitingOnLength, waitingOn.waitingOn, out); if (appliedOrInvalidatedLength > 0) serialize(appliedOrInvalidatedLength, waitingOn.appliedOrInvalidated, out); @@ -164,33 +84,24 @@ private static void serialize(int length, SimpleBitSet write, ByteBuffer out) out.putLong(bits[i]); } - public static WaitingOn deserialize(TxnId txnId, Keys keys, SortedArrayList txnIds, ByteBuffer in) throws IOException + public static WaitingOn deserialize(TxnId txnId, Keys keys, RangeDeps directRangeDeps, KeyDeps directKeyDeps, ByteBuffer in) throws IOException { + int txnIdCount = directRangeDeps.txnIdCount() + directKeyDeps.txnIdCount(); + int waitingOnLength = (txnIdCount + keys.size() + 63) / 64; int position = in.position(); - Timestamp executeAtLeast = null; - if (txnId.kind().awaitsOnlyDeps()) - { - if (in.get(position++) != 0) - { - executeAtLeast = CommandSerializers.timestamp.deserialize(in, position); - position += CommandSerializers.timestamp.serializedSize(); - } - } - - int waitingOnLength = (txnIds.size() + keys.size() + 63) / 64; + int a = VIntCoding.readUnsignedVInt32(in, position); + position += TypeSizes.sizeofUnsignedVInt(a); + int b = VIntCoding.readUnsignedVInt32(in, position); + position += TypeSizes.sizeofUnsignedVInt(a); ImmutableBitSet waitingOn = deserialize(position, waitingOnLength, in); ImmutableBitSet appliedOrInvalidated = null; if (txnId.domain() == Routable.Domain.Range) { position += waitingOnLength*8; - int appliedOrInvalidatedLength = (txnIds.size() + 63) / 64; + int appliedOrInvalidatedLength = (txnIdCount + 63) / 64; appliedOrInvalidated = deserialize(position, appliedOrInvalidatedLength, in); } - - WaitingOn result = new WaitingOn(keys, txnIds, waitingOn, appliedOrInvalidated); - if (executeAtLeast != null) - result = new Command.WaitingOnWithExecuteAt(result, executeAtLeast); - return result; + return new WaitingOn(keys, directRangeDeps, directKeyDeps, waitingOn, appliedOrInvalidated); } private static ImmutableBitSet deserialize(int position, int length, ByteBuffer in) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index 122336ad122b..62dfe68e2c9c 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -35,7 +35,6 @@ import accord.primitives.Ranges; import accord.primitives.Seekable; import accord.primitives.Timestamp; -import accord.utils.SortedArrays; import org.apache.cassandra.db.SinglePartitionReadCommand; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; @@ -49,7 +48,6 @@ import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.Simulate; -import static accord.utils.SortedArrays.Search.CEIL; import static com.google.common.base.Preconditions.checkArgument; import static org.apache.cassandra.service.accord.AccordSerializers.consistencyLevelSerializer; import static org.apache.cassandra.service.accord.IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS; @@ -183,22 +181,6 @@ public Read merge(Read read) return createTxnRead(reads, txnKeys.with((Keys)read.keys()), cassandraConsistencyLevel); } - @Override - public boolean isEqualOrFuller(Read other) - { - TxnRead that = (TxnRead) other; - - int j = 0; - for (int i = 0; i < that.items.length; ++i) - { - j = SortedArrays.exponentialSearch(this.items, j, this.items.length, that.items[i], this::compare, CEIL); - if (j < 0 || !that.items[i].equals(this.items[j])) - return false; - } - - return this.txnKeys.containsAll(that.txnKeys); - } - @Override public AsyncChain read(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index cd2aa8a3327e..5a32065e97b7 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -125,22 +125,6 @@ public boolean equals(Object o) return Arrays.equals(fragments, txnUpdate.fragments) && Objects.equals(condition, txnUpdate.condition); } - @Override - public boolean isEqualOrFuller(Update other) - { - TxnUpdate that = (TxnUpdate) other; - - int j = 0; - for (int i = 0; i < that.keys.size(); ++i) - { - j = this.keys.findNext(j, that.keys.get(i), CEIL); - if (j < 0 || !that.fragments[i].equals(this.fragments[j])) - return false; - } - - return this.condition.equals(that.condition); - } - @Override public int hashCode() { diff --git a/src/java/org/apache/cassandra/utils/MerkleTree.java b/src/java/org/apache/cassandra/utils/MerkleTree.java index 2646057c0428..cadc47ed2736 100644 --- a/src/java/org/apache/cassandra/utils/MerkleTree.java +++ b/src/java/org/apache/cassandra/utils/MerkleTree.java @@ -749,7 +749,7 @@ private static ByteBuffer allocate(int innerNodeCount, IPartitioner partitioner) int size = offHeapBufferSize(innerNodeCount, partitioner); logger.debug("Allocating direct buffer of size {} for an off-heap merkle tree", size); ByteBuffer buffer = ByteBuffer.allocateDirect(size); - if (Ref.DEBUG_ENABLED) + if (Ref.TRACE_ENABLED) MemoryUtil.setAttachment(buffer, new Ref.DirectBufferRef<>(null, null)); return buffer; } diff --git a/src/java/org/apache/cassandra/utils/concurrent/Ref.java b/src/java/org/apache/cassandra/utils/concurrent/Ref.java index e268f5fd73c2..911c1db8819e 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/Ref.java +++ b/src/java/org/apache/cassandra/utils/concurrent/Ref.java @@ -61,6 +61,7 @@ import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.UNSAFE; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DEBUG_REF_COUNT; +import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DEBUG_REF_EVENTS; import static org.apache.cassandra.utils.Shared.Scope.SIMULATION; import static org.apache.cassandra.utils.Throwables.maybeFail; import static org.apache.cassandra.utils.Throwables.merge; @@ -99,7 +100,8 @@ public final class Ref implements RefCounted { static final Logger logger = LoggerFactory.getLogger(Ref.class); - public static final boolean DEBUG_ENABLED = TEST_DEBUG_REF_COUNT.getBoolean(); + public static final boolean TRACE_ENABLED = TEST_DEBUG_REF_COUNT.getBoolean(); + public static final boolean DEBUG_EVENTS_ENABLED = TEST_DEBUG_REF_EVENTS.getBoolean(); static OnLeak ON_LEAK; @Shared(scope = SIMULATION) @@ -170,10 +172,10 @@ public Ref ref() public String printDebugInfo() { - if (DEBUG_ENABLED) + if (TRACE_ENABLED) { - state.debug.log(state.toString()); - return "Memory was freed by " + state.debug.deallocateThread; + ((Debug)state.debug).log(state.toString()); + return "Memory was freed by " + ((Debug)state.debug).deallocateThread; } return "Memory was freed"; } @@ -191,7 +193,7 @@ public int globalCount() // ensures it is only released once, and that it is always released static final class State extends PhantomReference { - final Debug debug = DEBUG_ENABLED ? new Debug() : null; + final Object debug = TRACE_ENABLED ? new Debug() : DEBUG_EVENTS_ENABLED ? new ArrayList<>() : null; final GlobalState globalState; private volatile int released; @@ -206,8 +208,8 @@ static final class State extends PhantomReference void assertNotReleased() { - if (DEBUG_ENABLED && released == 1) - debug.log(toString()); + if (TRACE_ENABLED && released == 1) + ((Debug)debug).log(toString()); assert released == 0; } @@ -216,8 +218,8 @@ Throwable ensureReleased(Throwable accumulate) if (releasedUpdater.getAndSet(this, 1) == 0) { accumulate = globalState.release(this, accumulate); - if (DEBUG_ENABLED) - debug.deallocate(); + if (TRACE_ENABLED) + ((Debug)debug).deallocate(); } return accumulate; } @@ -230,8 +232,8 @@ void release(boolean leak) { String id = this.toString(); logger.error("BAD RELEASE: attempted to release a reference ({}) that has already been released", id); - if (DEBUG_ENABLED) - debug.log(id); + if (TRACE_ENABLED) + ((Debug)debug).log(id); throw new IllegalStateException("Attempted to release a reference that has already been released"); } return; @@ -240,16 +242,16 @@ void release(boolean leak) if (leak) { String id = this.toString(); - logger.error("LEAK DETECTED: a reference ({}) to {} was not released before the reference was garbage collected", id, globalState); - if (DEBUG_ENABLED) - debug.log(id); + logger.error("LEAK DETECTED: a reference ({}) to {} was not released before the reference was garbage collected{}", id, globalState, (DEBUG_EVENTS_ENABLED ? "(debug: " + debug + ')' : "")); + if (TRACE_ENABLED) + ((Debug)debug).log(id); OnLeak onLeak = ON_LEAK; if (onLeak != null) onLeak.onLeak(this); } - else if (DEBUG_ENABLED) + else if (TRACE_ENABLED) { - debug.deallocate(); + ((Debug)debug).deallocate(); } if (fail != null) logger.error("Error when closing {}", globalState, fail); @@ -299,6 +301,12 @@ String print(String thread, StackTraceElement[] trace) } } + public void debug(String event) + { + if (DEBUG_EVENTS_ENABLED) + ((List)state.debug).add(event); + } + // the object that manages the actual cleaning up; this does not reference the target object // so that we can detect when references are lost to the resource itself, and still cleanup afterwards // the Tidy object MUST NOT contain any references to the object we are managing @@ -383,10 +391,10 @@ public String toString() private static final Set globallyExtant = Collections.newSetFromMap(new ConcurrentHashMap<>()); static final ReferenceQueue referenceQueue = new ReferenceQueue<>(); private static final Shutdownable EXEC = executorFactory().infiniteLoop("Reference-Reaper", Ref::reapOneReference, UNSAFE); - static final ScheduledExecutorService STRONG_LEAK_DETECTOR = !DEBUG_ENABLED ? null : executorFactory().scheduled("Strong-Reference-Leak-Detector"); + static final ScheduledExecutorService STRONG_LEAK_DETECTOR = !TRACE_ENABLED ? null : executorFactory().scheduled("Strong-Reference-Leak-Detector"); static { - if (DEBUG_ENABLED) + if (TRACE_ENABLED) { STRONG_LEAK_DETECTOR.scheduleAtFixedRate(new Visitor(), 1, 15, TimeUnit.MINUTES); STRONG_LEAK_DETECTOR.scheduleAtFixedRate(new StrongLeakDetector(), 2, 15, TimeUnit.MINUTES); diff --git a/src/java/org/apache/cassandra/utils/memory/BufferPool.java b/src/java/org/apache/cassandra/utils/memory/BufferPool.java index cddfc8fe6122..e46b0e4d692a 100644 --- a/src/java/org/apache/cassandra/utils/memory/BufferPool.java +++ b/src/java/org/apache/cassandra/utils/memory/BufferPool.java @@ -1330,7 +1330,7 @@ static Chunk getParentChunk(ByteBuffer buffer) void setAttachment(ByteBuffer buffer) { - if (Ref.DEBUG_ENABLED) + if (Ref.TRACE_ENABLED) MemoryUtil.setAttachment(buffer, new DirectBufferRef<>(this, null)); else MemoryUtil.setAttachment(buffer, this); @@ -1342,7 +1342,7 @@ boolean releaseAttachment(ByteBuffer buffer) if (attachment == null) return false; - if (Ref.DEBUG_ENABLED) + if (Ref.TRACE_ENABLED) ((DirectBufferRef) attachment).release(); return true; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index 1e76473eebff..ac7ab99f2627 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -36,7 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.impl.SimpleProgressLog; import accord.local.Node; import accord.local.PreLoadContext; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 8e663ab966f1..187f08a11ccc 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -41,6 +41,7 @@ import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IMessage; import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.distributed.shared.DistributedTestBase; import org.apache.cassandra.net.Verb; import org.apache.cassandra.utils.EstimatedHistogram; @@ -172,4 +173,13 @@ protected Logger logger() { return logger; } + + public static void main(String[] args) throws Throwable + { + DistributedTestBase.beforeClass(); + AccordLoadTest.setUp(); + AccordLoadTest test = new AccordLoadTest(); + test.setup(); + test.testLoad(); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java index d3b549d20f94..783583a4fb1f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java @@ -26,7 +26,6 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.distributed.test.log.FetchLogFromPeersTest.ClusterState; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 4ac8d242e86b..493283bdae20 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -31,6 +31,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; + +import accord.primitives.Routable; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.service.accord.*; @@ -42,7 +44,7 @@ import accord.api.Key; import accord.api.Result; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.local.CheckedCommands; import accord.local.Command; import accord.local.CommandStore; @@ -115,6 +117,7 @@ public class CompactionAccordIteratorsTest private static final TxnId LT_TXN_ID = AccordTestUtils.txnId(EPOCH, HLC_START, NODE); private static final TxnId TXN_ID = AccordTestUtils.txnId(EPOCH, LT_TXN_ID.hlc() + 1, NODE); private static final TxnId SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read); + private static final TxnId RANGE_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read, Routable.Domain.Range); private static final TxnId GT_TXN_ID = SECOND_TXN_ID; // For CommandsForKey where we test with two commands private static final TxnId[] TXN_IDS = new TxnId[]{ TXN_ID, SECOND_TXN_ID }; @@ -480,7 +483,7 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right, appendDiffToKeyspace(commandStore)); }).beginAsResult()); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - safe.get(txnId, txnId, route).addListener(new Command.ProxyListener(txnId)); // add a junk listener just to test it in compaction + safe.get(txnId, txnId, route).addListener(new Command.ProxyListener(RANGE_TXN_ID)); // add a junk listener just to test it in compaction }).beginAsResult()); flush(commandStore); // The apply chain is asychronous, so it is easiest to just spin until it is applied diff --git a/test/unit/org/apache/cassandra/dht/TokenTest.java b/test/unit/org/apache/cassandra/dht/TokenTest.java new file mode 100644 index 000000000000..234747c2cdaf --- /dev/null +++ b/test/unit/org/apache/cassandra/dht/TokenTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.IVersionedSerializers; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; + +import static accord.utils.Property.qt; + +public class TokenTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + } + + @Test + public void serde() + { + DataOutputBuffer output = new DataOutputBuffer(); + qt().check(rs -> { + IPartitioner partitioner = AccordGenerators.partitioner().next(rs); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + Token token = AccordGenerators.fromQT(CassandraGenerators.token(partitioner)).next(rs); + for (MessagingService.Version version : MessagingService.Version.values()) + IVersionedSerializers.testSerde(output, Token.compactSerializer, token, version.value); + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/io/IVersionedSerializers.java b/test/unit/org/apache/cassandra/io/IVersionedSerializers.java index e17e0b7ce2ea..cbf62fb58c07 100644 --- a/test/unit/org/apache/cassandra/io/IVersionedSerializers.java +++ b/test/unit/org/apache/cassandra/io/IVersionedSerializers.java @@ -20,6 +20,8 @@ import java.io.IOException; +import accord.utils.LazyToString; +import accord.utils.ReflectionUtils; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; import org.assertj.core.api.Assertions; @@ -34,6 +36,6 @@ public static void testSerde(DataOutputBuffer output, IVersionedSerializer ReflectionUtils.recursiveEquals(read, input).toString())).isEqualTo(input); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index e56f634d48af..bcc1f8bf111f 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -34,7 +34,7 @@ import accord.impl.TimestampsForKey; import accord.impl.TimestampsForKeys; import accord.local.Command; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.local.CommonAttributes; import accord.local.SaveStatus; import accord.primitives.Ballot; @@ -133,7 +133,7 @@ public void commandLoadSave() throws Throwable attrs.partialDeps(dependencies); SimpleBitSet waitingOnApply = new SimpleBitSet(3); waitingOnApply.set(1); - Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies.keyDeps.keys(), dependencies.rangeDeps.txnIds(), new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); + Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies.keyDeps.keys(), dependencies.rangeDeps, dependencies.directKeyDeps, new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); @@ -182,8 +182,8 @@ public void timestampsForKeyLoadSave() AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); cfk.initialize(); - cfk.set(cfk.current().update(null, command1)); - cfk.set(cfk.current().update(null, command2)); + cfk.set(cfk.current().update(command1).cfk()); + cfk.set(cfk.current().update(command2).cfk()); AccordKeyspace.getTimestampsForKeyMutation(commandStore, tfk, commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", tfk); @@ -213,8 +213,8 @@ public void commandsForKeyLoadSave() AccordSafeCommandsForKey cfk = new AccordSafeCommandsForKey(loaded(key, null)); cfk.initialize(); - cfk.set(cfk.current().update(null, command1)); - cfk.set(cfk.current().update(null, command2)); + cfk.set(cfk.current().update(command1).cfk()); + cfk.set(cfk.current().update(command2).cfk()); AccordKeyspace.getCommandsForKeyMutation(commandStore.id(), cfk.current(), commandStore.nextSystemTimestampMicros()).apply(); logger.info("E: {}", cfk); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index d78bd63cbce6..a9bdd7aa5220 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -26,7 +26,7 @@ import accord.api.Key; import accord.api.RoutingKey; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.local.Command; import accord.local.KeyHistory; import accord.local.Node; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index d58105339b5d..d6d2dab96bde 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -108,8 +108,7 @@ public void serde() PartialTxn partialTxn = txn.slice(scope, true); RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); - Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE); - deps.slice(scope); + Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE, KeyDeps.NONE); CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 5d632c0fb7a2..d82f14305477 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -87,11 +87,6 @@ public void preExecute() original = global.get(); } - @Override - public void postExecute() - { - } - @Override public Status globalStatus() { diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 33f0dc24589b..6ef6a9cde885 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -32,7 +32,7 @@ import org.junit.Test; import accord.api.Key; -import accord.local.CommandsForKey; +import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.KeyHistory; @@ -105,11 +105,13 @@ public void cachedTest() commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); AccordSafeCommand safeCommand = commandCache.acquire(txnId); testLoad(executor, safeCommand, notDefined(txnId, txn)); + AccordCachingState safeCommandGlobal = safeCommand.global(); commandCache.release(safeCommand); timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquire(key); testLoad(executor, safeTimestamps, new TimestampsForKey(key)); + AccordCachingState safeTimestampsGlobal = safeTimestamps.global(); timestampsCache.release(safeTimestamps); AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); @@ -118,13 +120,13 @@ public void cachedTest() commandStore.executeBlocking(() -> { Context context = new Context(); boolean result = loader.load(context, (o, t) -> Assert.fail()); - Assert.assertEquals(safeCommand.global(), context.commands.get(txnId).global()); - Assert.assertEquals(safeTimestamps.global(), context.timestampsForKey.get(key).global()); + Assert.assertEquals(safeCommandGlobal, context.commands.get(txnId).global()); + Assert.assertEquals(safeTimestampsGlobal, context.timestampsForKey.get(key).global()); Assert.assertTrue(result); }); - Assert.assertSame(safeCommand.global(), commandCache.getUnsafe(txnId)); - Assert.assertSame(safeTimestamps.global(), timestampsCache.getUnsafe(key)); + Assert.assertSame(safeCommandGlobal, commandCache.getUnsafe(txnId)); + Assert.assertSame(safeTimestampsGlobal, timestampsCache.getUnsafe(key)); } /** @@ -374,7 +376,7 @@ public void inProgressCommandSaveTest() @Test public void inProgressCFKSaveTest() { - this.inProgressCFKSaveTest(COMMANDS, AccordCommandStore::commandsForKeyCache, context -> context.commandsForKey, CommandsForKey::new, (cfk, u) -> cfk.update(null, u)); + this.inProgressCFKSaveTest(COMMANDS, AccordCommandStore::commandsForKeyCache, context -> context.commandsForKey, CommandsForKey::new, (cfk, u) -> cfk.update(u).cfk()); } @Test diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 7dfae1e310fa..ffdb46eaafde 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -32,7 +32,7 @@ import com.google.common.collect.Maps; import org.apache.cassandra.concurrent.SimulatedExecutorFactory; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.config.CassandraRelevantProperties; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory; import accord.api.RoutingKey; -import accord.local.SafeCommandsForKey; +import accord.local.cfk.SafeCommandsForKey; import accord.local.CheckedCommands; import accord.local.Command; import accord.local.PreLoadContext; @@ -103,11 +103,6 @@ public class AsyncOperationTest private static final Logger logger = LoggerFactory.getLogger(AsyncOperationTest.class); private static final AtomicLong clock = new AtomicLong(0); - static - { - CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.setBoolean(false); - } - @BeforeClass public static void beforeClass() throws Throwable { @@ -360,16 +355,16 @@ public void loadFail() .check((rs, ids) -> { before(); // truncate tables - createCommand(commandStore, rs, ids); - - Map failed = selectFailedTxn(rs, ids); + assertNoReferences(commandStore, ids, keys); + createCommand(commandStore, rs, ids); + awaitDone(commandStore, ids, keys); assertNoReferences(commandStore, ids, keys); PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); - Consumer consumer = Mockito.mock(Consumer.class); + Map failed = selectFailedTxn(rs, ids); commandStore.commandCache().unsafeSetLoadFunction(txnId -> { logger.info("Attempting to load {}; expected to fail? {}", txnId, failed.get(txnId)); @@ -402,6 +397,9 @@ public void loadFail() }); }); getUninterruptibly(o2); + awaitDone(commandStore, ids, keys); + assertNoReferences(commandStore, ids, keys); + }); } @@ -419,8 +417,8 @@ public void consumerFails() logger.info("Test #{}", counter.incrementAndGet()); before(); // truncate tables - createCommand(commandStore, rs, ids); assertNoReferences(commandStore, ids, keys); + createCommand(commandStore, rs, ids); PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); @@ -445,11 +443,18 @@ private static void createCommand(AccordCommandStore commandStore, RandomSource // to simulate CommandsForKey not being found, use createCommittedAndPersist periodically as it does not update switch (rs.nextInt(3)) { - case 0: ids.forEach(id -> createStableAndPersist(commandStore, id)); break; - case 1: ids.forEach(id -> createStableUsingFastLifeCycle(commandStore, id)); break; - case 2: ids.forEach(id -> createStableUsingSlowLifeCycle(commandStore, id)); + case 0: + logger.info("createStableAndPersist(): {}", ids); + ids.forEach(id -> createStableAndPersist(commandStore, id)); + break; + case 1: + logger.info("createStableUsingFastLifeCycle(): {}", ids); + ids.forEach(id -> createStableUsingFastLifeCycle(commandStore, id)); + break; + case 2: + logger.info("createStableUsingSlowLifeCycle(): {}", ids); + ids.forEach(id -> createStableUsingSlowLifeCycle(commandStore, id)); } - commandStore.unsafeClearCache(); } private static Map selectFailedTxn(RandomSource rs, List ids) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandStoreSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandStoreSerializersTest.java new file mode 100644 index 000000000000..738de4809e8c --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandStoreSerializersTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.util.List; + +import org.junit.Test; + +import accord.local.RedundantBefore; +import accord.utils.Gens; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.IVersionedSerializers; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService.Version; +import org.apache.cassandra.utils.AccordGenerators; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.net.MessagingService.Version.VERSION_51; + +public class CommandStoreSerializersTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + private static final List SUPPORTED_VERSIONS = VERSION_51.greaterThanOrEqual(); + + @Test + public void redundantBeforeEntry() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().forAll(Gens.random(), AccordGenerators.partitioner()).check((rs, partitioner) -> { + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + RedundantBefore.Entry entry = AccordGenerators.redundantBeforeEntry(partitioner).next(rs); + for (Version version : SUPPORTED_VERSIONS) + IVersionedSerializers.testSerde(buffer, CommandStoreSerializers.redundantBeforeEntry, entry, version.value); + }); + } + + @Test + public void redundantBefore() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().forAll(Gens.random(), AccordGenerators.partitioner()).check((rs, partitioner) -> { + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + // serializer doesn't support the empty set, so filter out + RedundantBefore redundantBefore = AccordGenerators.redundantBefore(partitioner).filter(r -> r.size() != 0).next(rs); + for (Version version : SUPPORTED_VERSIONS) + IVersionedSerializers.testSerde(buffer, CommandStoreSerializers.redundantBefore, redundantBefore, version.value); + }); + } + +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 202dc5cb7866..33244f045b77 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -39,11 +39,11 @@ import org.junit.Test; import accord.api.Key; -import accord.local.CommandsForKey; -import accord.local.CommandsForKey.InternalStatus; +import accord.local.cfk.CommandsForKey; +import accord.local.cfk.CommandsForKey.InternalStatus; import accord.local.Command; -import accord.local.CommandsForKey.TxnInfo; -import accord.local.CommandsForKey.Unmanaged; +import accord.local.cfk.CommandsForKey.TxnInfo; +import accord.local.cfk.CommandsForKey.Unmanaged; import accord.local.CommonAttributes; import accord.local.CommonAttributes.Mutable; import accord.local.Listeners; @@ -106,16 +106,18 @@ static class Cmd final SaveStatus saveStatus; final PartialTxn txn; final Timestamp executeAt; + final Ballot ballot; final List deps = new ArrayList<>(); final List missing = new ArrayList<>(); boolean invisible; - Cmd(TxnId txnId, PartialTxn txn, SaveStatus saveStatus, Timestamp executeAt) + Cmd(TxnId txnId, PartialTxn txn, SaveStatus saveStatus, Timestamp executeAt, Ballot ballot) { this.txnId = txnId; this.saveStatus = saveStatus; this.txn = txn; this.executeAt = executeAt; + this.ballot = ballot; } CommonAttributes attributes() @@ -132,7 +134,7 @@ CommonAttributes attributes() { for (TxnId id : deps) builder.add((Key)txn.keys().get(0), id); - mutable.partialDeps(new PartialDeps(AccordTestUtils.fullRange(txn), builder.build(), RangeDeps.NONE)); + mutable.partialDeps(new PartialDeps(AccordTestUtils.fullRange(txn), builder.build(), RangeDeps.NONE, KeyDeps.NONE)); } } @@ -157,19 +159,19 @@ Command toCommand() case PreCommittedWithDefinitionAndAcceptedDeps: case PreCommittedWithAcceptedDeps: case PreCommitted: - return Command.SerializerSupport.accepted(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO); + return Command.SerializerSupport.accepted(attributes(), saveStatus, executeAt, ballot, ballot); case Committed: - return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, null); + return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, ballot, ballot, null); case Stable: case ReadyToExecute: - return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, Command.WaitingOn.EMPTY); + return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.EMPTY); case PreApplied: case Applying: case Applied: - return Command.SerializerSupport.executed(attributes(), saveStatus, executeAt, Ballot.ZERO, Ballot.ZERO, Command.WaitingOn.EMPTY, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); + return Command.SerializerSupport.executed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.EMPTY, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); case TruncatedApplyWithDeps: case TruncatedApply: @@ -219,7 +221,7 @@ List toCommands() } } - private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier txnIdSupplier, Supplier saveStatusSupplier, Function txnSupplier, Function timestampSupplier, IntSupplier missingCountSupplier, RandomSource source) + private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier txnIdSupplier, Supplier saveStatusSupplier, Function txnSupplier, Function timestampSupplier, Supplier ballotSupplier, IntSupplier missingCountSupplier, RandomSource source) { Cmd[] cmds = new Cmd[txnIdCount]; for (int i = 0 ; i < txnIdCount ; ++i) @@ -227,10 +229,30 @@ private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier t TxnId txnId = txnIdSupplier.get(); SaveStatus saveStatus = saveStatusSupplier.get(); Timestamp executeAt = txnId; - if (saveStatus.known.executeAt != ExecuteAtErased && saveStatus.known.executeAt != ExecuteAtUnknown) + if (!txnId.kind().awaitsOnlyDeps() && saveStatus.known.executeAt != ExecuteAtErased && saveStatus.known.executeAt != ExecuteAtUnknown) executeAt = timestampSupplier.apply(txnId); - cmds[i] = new Cmd(txnId, txnSupplier.apply(txnId), saveStatus, executeAt); + Ballot ballot; + switch (saveStatus.status) + { + default: throw new AssertionError(); + case NotDefined: + case PreAccepted: + case Invalidated: + case Truncated: + ballot = Ballot.ZERO; + break; + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + case Committed: + case Stable: + case PreApplied: + case Applied: + ballot = ballotSupplier.get(); + } + + cmds[i] = new Cmd(txnId, txnSupplier.apply(txnId), saveStatus, executeAt, ballot); } Arrays.sort(cmds, Comparator.comparing(o -> o.txnId)); for (int i = 0 ; i < txnIdCount ; ++i) @@ -273,14 +295,14 @@ private static ObjectGraph generateObjectGraph(int txnIdCount, Supplier t for (int j = 0 ; j < i ; ++j) { InternalStatus status = InternalStatus.from(cmds[j].saveStatus); - if (status == null || !status.hasInfo) continue; + if (status == null || !status.hasExecuteAtOrDeps) continue; if (cmds[j].txnId.kind().witnesses(cmds[i].txnId) && status.depsKnownBefore(cmds[j].txnId, cmds[j].executeAt).compareTo(cmds[i].txnId) > 0 && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) continue outer; } for (int j = i + 1 ; j < cmds.length ; ++j) { InternalStatus status = InternalStatus.from(cmds[j].saveStatus); - if (status == null || !status.hasInfo) continue; + if (status == null || !status.hasExecuteAtOrDeps) continue; if (cmds[j].txnId.kind().witnesses(cmds[i].txnId) && Collections.binarySearch(cmds[j].missing, cmds[i].txnId) < 0) continue outer; } @@ -312,6 +334,11 @@ private static Function timestampSupplier(LongUnaryOperato return min -> Timestamp.fromValues(epochSupplier.applyAsLong(min == null ? 1 : min.epoch()), hlcSupplier.applyAsLong(min == null ? 1 : min.hlc() + 1), flagSupplier.getAsInt(), idSupplier.get()); } + private static Supplier ballotSupplier(LongUnaryOperator epochSupplier, LongUnaryOperator hlcSupplier, IntSupplier flagSupplier, Supplier idSupplier) + { + return () -> Ballot.fromValues(epochSupplier.applyAsLong(1), hlcSupplier.applyAsLong(1), flagSupplier.getAsInt(), idSupplier.get()); + } + private static Function timestampSupplier(Set unique, Function supplier) { return min -> { @@ -329,7 +356,7 @@ private static Function timestampSupplier(Se @Test public void serde() { - testOne(-6946067792202944553L); + testOne(-669467611022826851L); Random random = new Random(); for (int i = 0 ; i < 10000 ; ++i) { @@ -364,8 +391,8 @@ private static void testOne(long seed) float v = source.nextFloat(); if (v < 0.5) return Txn.Kind.Read; if (v < 0.95) return Txn.Kind.Write; - if (v < 0.99) return Txn.Kind.ExclusiveSyncPoint; - return Txn.Kind.EphemeralRead; // not actually a valid value for CFK + if (v < 0.97) return Txn.Kind.SyncPoint; + return Txn.Kind.ExclusiveSyncPoint; }; boolean permitMissing = source.decide(0.75f); @@ -412,15 +439,21 @@ private static void testOne(long seed) } } + Supplier ballotSupplier; + { + Supplier delegate = ballotSupplier(epochSupplier, hlcSupplier, flagSupplier, idSupplier); + ballotSupplier = () -> source.decide(0.5f) ? Ballot.ZERO : delegate.get(); + } + PartialTxn txn = createPartialTxn(0); Key key = (Key) txn.keys().get(0); - ObjectGraph graph = generateObjectGraph(source.nextInt(0, 100), () -> txnIdSupplier.apply(null), saveStatusSupplier, ignore -> txn, executeAtSupplier, missingCountSupplier, source); + ObjectGraph graph = generateObjectGraph(source.nextInt(0, 100), () -> txnIdSupplier.apply(null), saveStatusSupplier, ignore -> txn, executeAtSupplier, ballotSupplier, missingCountSupplier, source); List commands = graph.toCommands(); CommandsForKey cfk = new CommandsForKey(key); while (commands.size() > 0) { int next = source.nextInt(commands.size()); - cfk = cfk.update(null, commands.get(next)); + cfk = cfk.update(commands.get(next)).cfk(); commands.set(next, commands.get(commands.size() - 1)); commands.remove(commands.size() - 1); } @@ -436,10 +469,12 @@ private static void testOne(long seed) TxnInfo info = cfk.get(i); InternalStatus expectStatus = InternalStatus.from(cmd.saveStatus); if (expectStatus == null) expectStatus = InternalStatus.TRANSITIVELY_KNOWN; - if (expectStatus.hasInfo) + if (expectStatus.hasExecuteAtOrDeps) Assert.assertEquals(cmd.executeAt, info.executeAt); Assert.assertEquals(expectStatus, info.status); Assert.assertArrayEquals(cmd.missing.toArray(TxnId[]::new), info.missing()); + if (expectStatus.hasBallot) + Assert.assertEquals(cmd.ballot, info.ballot()); ++i; } @@ -471,7 +506,10 @@ public void test() Arrays.sort(ids, Comparator.naturalOrder()); TxnInfo[] info = new TxnInfo[ids.length]; for (int i = 0; i < info.length; i++) - info[i] = TxnInfo.create(ids[i], rs.pick(InternalStatus.values()), ids[i], CommandsForKey.NO_TXNIDS); + { + InternalStatus status = rs.pick(InternalStatus.values()); + info[i] = TxnInfo.create(ids[i], status, ids[i], CommandsForKey.NO_TXNIDS, Ballot.ZERO); + } Gen pendingGen = Gens.enums().allMixedDistribution(Unmanaged.Pending.class).next(rs); @@ -499,7 +537,7 @@ public void test() } else unmanaged = CommandsForKey.NO_PENDING_UNMANAGED; - CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged); + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged, TxnId.NONE); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); @@ -515,8 +553,8 @@ public void thereAndBackAgain() PartitionKey pk = new PartitionKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), key); TxnId txnId = TxnId.fromValues(11,34052499,2,1); CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, - new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED, txnId, CommandsForKey.NO_TXNIDS) }, - CommandsForKey.NO_PENDING_UNMANAGED); + new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, txnId, CommandsForKey.NO_TXNIDS, Ballot.ZERO) }, + CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java new file mode 100644 index 000000000000..4ee49b24b975 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.util.List; + +import org.junit.Test; + +import accord.primitives.Deps; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.IVersionedSerializers; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaProvider; +import org.apache.cassandra.utils.AccordGenerators; +import org.mockito.Mockito; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.net.MessagingService.Version.VERSION_51; + +public class DepsSerializerTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + private static final List SUPPORTED_VERSIONS = VERSION_51.greaterThanOrEqual(); + + @Test + public void serde() + { + DataOutputBuffer buffer = new DataOutputBuffer(); + qt().withSeed(-4368731546033726179L).check(rs -> { + IPartitioner partitioner = AccordGenerators.partitioner().next(rs); + Schema.instance = Mockito.mock(SchemaProvider.class); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + Mockito.when(Schema.instance.getExistingTablePartitioner(Mockito.any())).thenReturn(partitioner); + Deps deps = AccordGenerators.depsGen(partitioner).next(rs); + for (MessagingService.Version version : SUPPORTED_VERSIONS) + IVersionedSerializers.testSerde(buffer, DepsSerializer.deps, deps, version.value); + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index 4ab8be5d6a21..f83a1099773d 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -18,6 +18,8 @@ package org.apache.cassandra.service.accord.serializers; +import java.nio.ByteBuffer; + import org.junit.BeforeClass; import org.junit.Test; @@ -31,8 +33,6 @@ import accord.utils.Utils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; -import org.apache.cassandra.io.util.DataInputBuffer; -import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.utils.AccordGenerators; import org.apache.cassandra.utils.CassandraGenerators; import org.assertj.core.api.Assertions; @@ -51,18 +51,16 @@ public static void setup() @Test public void serde() { - DataOutputBuffer buffer = new DataOutputBuffer(); qt().forAll(waitingOnGen()).check(waitingOn -> { TxnId txnId = TxnId.NONE; if (waitingOn.appliedOrInvalidated != null) txnId = new TxnId(txnId.epoch(), txnId.hlc(), txnId.kind(), Routable.Domain.Range, txnId.node); - buffer.clear(); - long expectedSize = WaitingOnSerializer.serializedSize(txnId, waitingOn); - WaitingOnSerializer.serialize(txnId, waitingOn, buffer); - Assertions.assertThat(buffer.getLength()).isEqualTo(expectedSize); - Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.txnIds, new DataInputBuffer(buffer.unsafeGetBufferAndFlip(), false)); + long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); + ByteBuffer bb = WaitingOnSerializer.serialize(txnId, waitingOn); + Assertions.assertThat(bb.remaining()).isEqualTo(expectedSize); + Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, bb); Assertions.assertThat(read) .isEqualTo(waitingOn) - .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.txnIds, WaitingOnSerializer.serialize(txnId, waitingOn))); + .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, WaitingOnSerializer.serialize(txnId, waitingOn))); }); } @@ -76,7 +74,7 @@ private static Gen waitingOnGen() return rs -> { Deps deps = depsGen.next(rs); if (deps.isEmpty()) return Command.WaitingOn.EMPTY; - int txnIdCount = deps.rangeDeps.txnIdCount(); + int txnIdCount = deps.rangeDeps.txnIdCount() + deps.directKeyDeps.txnIdCount(); int keyCount = deps.keyDeps.keys().size(); int[] selected = Gens.arrays(Gens.ints().between(0, txnIdCount + keyCount - 1)).unique().ofSizeBetween(0, txnIdCount + keyCount).next(rs); SimpleBitSet waitingOn = new SimpleBitSet(txnIdCount + keyCount, false); @@ -97,7 +95,7 @@ private static Gen waitingOnGen() } } - return new Command.WaitingOn(deps.keyDeps.keys(), deps.rangeDeps.txnIds(), Utils.ensureImmutable(waitingOn), Utils.ensureImmutable(appliedOrInvalidated)); + return new Command.WaitingOn(deps.keyDeps.keys(), deps.rangeDeps, deps.directKeyDeps, Utils.ensureImmutable(waitingOn), Utils.ensureImmutable(appliedOrInvalidated)); }; } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index 4740ee6dfddb..f587fd85af30 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -23,8 +23,11 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.function.BiFunction; +import java.util.stream.Stream; import accord.local.Command; +import accord.local.RedundantBefore; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.KeyDeps; @@ -32,11 +35,14 @@ import accord.primitives.Range; import accord.primitives.RangeDeps; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.AccordGens; import accord.utils.Gen; import accord.utils.Gens; +import accord.utils.RandomSource; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.dht.IPartitioner; @@ -48,7 +54,6 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.quicktheories.impl.JavaRandom; -import static accord.utils.AccordGens.txnIds; import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; @@ -70,7 +75,7 @@ private enum SupportedCommandTypes public static Gen commands() { - Gen ids = txnIds(); + Gen ids = AccordGens.txnIds(); //TODO switch to Status once all types are supported Gen supportedTypes = Gens.enums().all(SupportedCommandTypes.class); //TODO goes against fuzz testing, and also limits to a very specific table existing... @@ -197,6 +202,19 @@ public static Gen ranges(IPartitioner partitioner) return ranges(Gens.lists(fromQT(CassandraGenerators.TABLE_ID_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), ignore -> partitioner); } + public static Gen rangesArbitrary(IPartitioner partitioner) + { + Gen rangeGen = range(partitioner); + Gen.IntGen sizeGen = Gens.ints().between(0, 10); + return rs -> { + int targetSize = sizeGen.nextInt(rs); + List ranges = new ArrayList<>(targetSize); + for (int i = 0; i < targetSize; i++) + ranges.add(rangeGen.next(rs)); + return Ranges.of(ranges.toArray(Range[]::new)); + }; + } + public static Gen keyDepsGen() { return AccordGens.keyDeps(AccordGenerators.keys()); @@ -207,6 +225,16 @@ public static Gen keyDepsGen(IPartitioner partitioner) return AccordGens.keyDeps(AccordGenerators.keys(partitioner)); } + public static Gen directKeyDepsGen() + { + return AccordGens.directKeyDeps(AccordGenerators.keys()); + } + + public static Gen directKeyDepsGen(IPartitioner partitioner) + { + return AccordGens.directKeyDeps(AccordGenerators.keys(partitioner)); + } + public static Gen rangeDepsGen() { return AccordGens.rangeDeps(AccordGenerators.range()); @@ -219,12 +247,41 @@ public static Gen rangeDepsGen(IPartitioner partitioner) public static Gen depsGen() { - return AccordGens.deps(keyDepsGen(), rangeDepsGen()); + return AccordGens.deps(keyDepsGen(), rangeDepsGen(), directKeyDepsGen()); } public static Gen depsGen(IPartitioner partitioner) { - return AccordGens.deps(keyDepsGen(partitioner), rangeDepsGen(partitioner)); + return AccordGens.deps(keyDepsGen(partitioner), rangeDepsGen(partitioner), directKeyDepsGen(partitioner)); + } + + public static Gen redundantBeforeEntry(IPartitioner partitioner) + { + return redundantBeforeEntry(Gens.bools().all(), range(partitioner), AccordGens.txnIds(Gens.pick(Txn.Kind.SyncPoint, Txn.Kind.ExclusiveSyncPoint), ignore -> Routable.Domain.Range)); + } + + public static Gen redundantBeforeEntry(Gen emptyGen, Gen rangeGen, Gen txnIdGen) + { + return rs -> { + Range range = rangeGen.next(rs); + TxnId locallyAppliedOrInvalidatedBefore = emptyGen.next(rs) ? TxnId.NONE : txnIdGen.next(rs); // emptyable or range + TxnId shardAppliedOrInvalidatedBefore = emptyGen.next(rs) ? TxnId.NONE : txnIdGen.next(rs); // emptyable or range + TxnId bootstrappedAt = txnIdGen.next(rs); + Timestamp staleUntilAtLeast = emptyGen.next(rs) ? null : txnIdGen.next(rs); // nullable + + long maxEpoch = Stream.of(locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast).filter(t -> t != null).mapToLong(Timestamp::epoch).max().getAsLong(); + long startEpoch = rs.nextLong(maxEpoch); + long endEpoch = emptyGen.next(rs) ? Long.MAX_VALUE : 1 + rs.nextLong(startEpoch, Long.MAX_VALUE); + return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast); + }; + } + + public static Gen redundantBefore(IPartitioner partitioner) + { + Gen rangeGen = rangesArbitrary(partitioner); + Gen txnIdGen = AccordGens.txnIds(Gens.pick(Txn.Kind.SyncPoint, Txn.Kind.ExclusiveSyncPoint), ignore -> Routable.Domain.Range); + BiFunction entryGen = (rs, range) -> redundantBeforeEntry(Gens.bools().all(), i -> range, txnIdGen).next(rs); + return AccordGens.redundantBefore(rangeGen, entryGen); } public static Gen fromQT(org.quicktheories.core.Gen qt) From c377a066affbe2f773bc9653534f22b72492d2fe Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 2 Aug 2024 14:17:42 +0100 Subject: [PATCH 126/340] CASSANDRA-19825: Fix various bugs and abstraction deficiencies, including: - Remove concept of non-participating home keys; home keys are required to be a participant in the transaction - Remove covering/covers concept - Various invalidation/truncation/erase behaviours patch by Benedict; reviewed by Blake for CASSANDRA-19825 --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 16 +++++---- .../accord/CheckpointIntervalArrayIndex.java | 2 +- .../accord/AccordFetchCoordinator.java | 6 +++- .../service/accord/AccordObjectSizes.java | 16 ++++----- .../service/accord/AccordService.java | 12 ++++--- .../service/accord/AccordTopology.java | 13 +++---- .../service/accord/IAccordService.java | 18 ++++++++-- .../accord/api/AccordTopologySorter.java | 8 +++-- .../accord/fastpath/FastPathStrategy.java | 4 +-- .../InheritKeyspaceFastPathStrategy.java | 4 +-- .../ParameterizedFastPathStrategy.java | 13 +++---- .../fastpath/SimpleFastPathStrategy.java | 21 +++++------ .../serializers/CommandSerializers.java | 6 +--- .../accord/serializers/DepsSerializer.java | 12 +++---- .../InformHomeDurableSerializers.java | 13 ++----- .../accord/serializers/KeySerializers.java | 36 +++++-------------- .../serializers/TopologySerializers.java | 4 +-- .../cassandra/service/accord/txn/TxnRead.java | 16 +++++++-- .../service/accord/txn/TxnUpdate.java | 9 +++++ .../accord/txn/UnrecoverableRepairUpdate.java | 7 ++++ .../utils/CollectionSerializers.java | 10 ++++++ .../CompactionAccordIteratorsTest.java | 9 +++-- .../index/accord/AccordIndexStressTest.java | 2 +- .../index/accord/RouteIndexTest.java | 2 +- .../accord/AccordCommandStoreTest.java | 3 +- .../service/accord/AccordCommandTest.java | 6 ++-- .../AccordConfigurationServiceTest.java | 4 +-- .../service/accord/AccordKeyspaceTest.java | 2 +- .../service/accord/AccordMessageSinkTest.java | 5 +-- .../service/accord/AccordTestUtils.java | 15 ++++---- .../service/accord/AccordTopologyUtils.java | 4 +-- .../ParameterizedFastPathStrategyTest.java | 4 +-- .../CheckStatusSerializersTest.java | 2 +- .../CommandsForKeySerializerTest.java | 2 +- .../apache/cassandra/utils/RangeTreeTest.java | 4 +-- 36 files changed, 177 insertions(+), 135 deletions(-) diff --git a/modules/accord b/modules/accord index 5f360e0b5b19..4aa0a8aeb6b1 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 5f360e0b5b197156df0ef3d9985cd94d18ea1c92 +Subproject commit 4aa0a8aeb6b12036660695e3fb89c69b5d40f345 diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index ad056674c9d6..54d30a64453b 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -32,6 +32,7 @@ import com.google.common.collect.Ordering; import accord.local.Cleanup; +import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; import accord.local.RedundantBefore; import accord.local.SaveStatus; @@ -87,7 +88,6 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.paxos.PaxosRepairHistory; import org.apache.cassandra.service.paxos.uncommitted.PaxosRows; -import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; @@ -784,6 +784,7 @@ protected Row applyToRow(Row row) class AccordCommandsPurger extends AbstractPurger { final Int2ObjectHashMap redundantBefores; + final Int2ObjectHashMap ranges; final DurableBefore durableBefore; int storeId; @@ -791,9 +792,10 @@ class AccordCommandsPurger extends AbstractPurger AccordCommandsPurger(Supplier accordService) { - Pair, DurableBefore> redundantBeforesAndDurableBefore = accordService.get().getRedundantBeforesAndDurableBefore(); - this.redundantBefores = redundantBeforesAndDurableBefore.left; - this.durableBefore = redundantBeforesAndDurableBefore.right; + IAccordService.CompactionInfo compactionInfo = accordService.get().getCompactionInfo(); + this.redundantBefores = compactionInfo.redundantBefores; + this.ranges = compactionInfo.ranges; + this.durableBefore = compactionInfo.durableBefore; } protected void beginPartition(UnfilteredRowIterator partition) @@ -815,7 +817,7 @@ protected Row applyToRow(Row row) // When commands end up being sliced by compaction we need this to discard tombstones and slices // without enough information to run the rest of the cleanup logic - if (Cleanup.isSafeToCleanup(durableBefore, txnId)) + if (Cleanup.isSafeToCleanup(durableBefore, txnId, ranges.get(storeId).allAt(txnId.epoch()))) return null; Cell durabilityCell = row.getCell(CommandsColumns.durability); @@ -878,7 +880,7 @@ class AccordTimestampsForKeyPurger extends AbstractPurger AccordTimestampsForKeyPurger(Supplier accordService) { - this.redundantBefores = accordService.get().getRedundantBeforesAndDurableBefore().left; + this.redundantBefores = accordService.get().getCompactionInfo().redundantBefores; } protected void beginPartition(UnfilteredRowIterator partition) @@ -954,7 +956,7 @@ class AccordCommandsForKeyPurger extends AbstractPurger AccordCommandsForKeyPurger(CommandsForKeyAccessor accessor, Supplier accordService) { this.accessor = accessor; - this.redundantBefores = accordService.get().getRedundantBeforesAndDurableBefore().left; + this.redundantBefores = accordService.get().getCompactionInfo().redundantBefores; } protected void beginPartition(UnfilteredRowIterator partition) diff --git a/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java index 664fe16093b3..0b37d1fc5fb0 100644 --- a/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java +++ b/src/java/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndex.java @@ -609,7 +609,7 @@ public int binarySearch(ChecksumedRandomAccessReader indexInput, int from, int t }; CheckpointIntervalArray searcher = new CheckpointIntervalArray<>(accessor, indexInput, checkpoints.bounds, checkpoints.headers, checkpoints.lists, checkpoints.maxScanAndCheckpointMatches); - searcher.forEach(start, end, (i1, i2, i3, i4, index) -> { + searcher.forEachRange(start, end, (i1, i2, i3, i4, index) -> { stats.matches++; callback.accept(reader.copyTo(accessor.get(indexInput, index), buffer)); }, (i1, i2, i3, i4, startIdx, endIdx) -> { diff --git a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java index 088c6c1331c7..e4ad2aa5591e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java @@ -35,6 +35,7 @@ import accord.local.Node; import accord.local.SafeCommandStore; import accord.primitives.PartialTxn; +import accord.primitives.Participants; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; @@ -290,6 +291,9 @@ public AsyncChain read(Seekable key, SafeCommandStore commandStore, Timest @Override public Read slice(Ranges ranges) { return new StreamingRead(to, this.ranges.slice(ranges)); } + @Override + public Read intersecting(Participants participants) { return new StreamingRead(to, this.ranges.intersecting(ranges)); } + @Override public Read merge(Read other) { throw new UnsupportedOperationException(); } } @@ -383,7 +387,7 @@ protected void onDone(Ranges success, Throwable failure) protected PartialTxn rangeReadTxn(Ranges ranges) { StreamingRead read = new StreamingRead(FBUtilities.getBroadcastAddressAndPort(), ranges); - return new PartialTxn.InMemory(ranges, Txn.Kind.Read, ranges, read, noopQuery, null); + return new PartialTxn.InMemory(Txn.Kind.Read, ranges, read, noopQuery, null); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index d3cd7a34fd7d..46e0d8c15810 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -137,7 +137,7 @@ public static long routingKeys(RoutingKeys keys) return EMPTY_ROUTING_KEYS_SIZE + routingKeysOnly(keys); } - private static final long EMPTY_FULL_KEY_ROUTE_SIZE = measure(new FullKeyRoute(new TokenKey(null, null), true, new RoutingKey[0])); + private static final long EMPTY_FULL_KEY_ROUTE_SIZE = measure(new FullKeyRoute(new TokenKey(null, null), new RoutingKey[0])); public static long fullKeyRoute(FullKeyRoute route) { return EMPTY_FULL_KEY_ROUTE_SIZE @@ -145,12 +145,11 @@ public static long fullKeyRoute(FullKeyRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(Ranges.EMPTY, new TokenKey(null, null), true, new RoutingKey[0])); + private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(new TokenKey(null, null), new RoutingKey[0])); public static long partialKeyRoute(PartialKeyRoute route) { return EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE + routingKeysOnly(route) - + ranges(route.covering()) + key(route.homeKey()); } @@ -162,7 +161,7 @@ private static long rangesOnly(AbstractRanges ranges) return size; } - private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = measure(new FullRangeRoute(new TokenKey(null, null), true, new Range[0])); + private static final long EMPTY_FULL_RANGE_ROUTE_SIZE = measure(new FullRangeRoute(new TokenKey(null, null), new Range[0])); public static long fullRangeRoute(FullRangeRoute route) { return EMPTY_FULL_RANGE_ROUTE_SIZE @@ -170,12 +169,11 @@ public static long fullRangeRoute(FullRangeRoute route) + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } - private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(Ranges.EMPTY, new TokenKey(null, null), true, new Range[0])); + private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(new TokenKey(null, null), new Range[0])); public static long partialRangeRoute(PartialRangeRoute route) { return EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE + rangesOnly(route) - + ranges(route.covering()) + key(route.homeKey()); } @@ -193,7 +191,7 @@ public static long route(Unseekables unseekables) } } - private static final long EMPTY_TXN = measure(new PartialTxn.InMemory(null, null, null, null, null, null)); + private static final long EMPTY_TXN = measure(new PartialTxn.InMemory(null, null, null, null, null)); public static long txn(PartialTxn txn) { long size = EMPTY_TXN; @@ -283,13 +281,13 @@ private static class CommandEmptySizes private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) { - CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(EMPTY_TXNID).route(new FullKeyRoute(EMPTY_KEY, true, new RoutingKey[]{ EMPTY_KEY })); + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(EMPTY_TXNID).route(new FullKeyRoute(EMPTY_KEY, new RoutingKey[]{ EMPTY_KEY })); attrs.durability(Status.Durability.NotDurable); if (hasDeps) attrs.partialDeps(PartialDeps.NONE); if (hasTxn) - attrs.partialTxn(new PartialTxn.InMemory(null, null, null, null, null, null)); + attrs.partialTxn(new PartialTxn.InMemory(null, null, null, null, null)); return attrs; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index bce3d3368a11..8574b5db2726 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -45,6 +45,7 @@ import accord.coordinate.FailureAccumulator; import accord.coordinate.TopologyMismatch; import accord.impl.CoordinateDurabilityScheduling; +import accord.local.CommandStores; import accord.primitives.SyncPoint; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.statements.RequestValidations; @@ -113,7 +114,6 @@ import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.ImmediateFuture; @@ -231,9 +231,9 @@ public Future epochReady(Epoch epoch) public void receive(Message> message) {} @Override - public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() + public CompactionInfo getCompactionInfo() { - return Pair.create(new Int2ObjectHashMap<>(), DurableBefore.EMPTY); + return new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); } }; @@ -779,17 +779,19 @@ public AccordConfigurationService configurationService() } @Override - public Pair, DurableBefore> getRedundantBeforesAndDurableBefore() + public CompactionInfo getCompactionInfo() { Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); + Int2ObjectHashMapranges = new Int2ObjectHashMap<>(); AtomicReference durableBefore = new AtomicReference<>(DurableBefore.EMPTY); AsyncChains.getBlockingAndRethrow(node.commandStores().forEach(safeStore -> { synchronized (redundantBefores) { redundantBefores.put(safeStore.commandStore().id(), safeStore.commandStore().redundantBefore()); + ranges.put(safeStore.commandStore().id(), safeStore.ranges()); } durableBefore.set(DurableBefore.merge(durableBefore.get(), safeStore.commandStore().durableBefore())); })); - return Pair.create(redundantBefores, durableBefore.get()); + return new CompactionInfo(redundantBefores, ranges, durableBefore.get()); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index 0814c322d5e7..7bbfc0c250ce 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -32,6 +32,7 @@ import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; @@ -64,7 +65,7 @@ public static Node.Id tcmIdToAccord(NodeId nodeId) private static class ShardLookup extends HashMap { - private Shard createOrReuse(accord.primitives.Range range, List nodes, Set fastPathElectorate, Set joining) + private Shard createOrReuse(accord.primitives.Range range, SortedArrayList nodes, Set fastPathElectorate, Set joining) { Shard prev = get(range); if (prev != null @@ -81,10 +82,10 @@ static class KeyspaceShard { private final KeyspaceMetadata keyspace; private final Range range; - private final List nodes; + private final SortedArrayList nodes; private final Set pending; - private KeyspaceShard(KeyspaceMetadata keyspace, Range range, List nodes, Set pending) + private KeyspaceShard(KeyspaceMetadata keyspace, Range range, SortedArrayList nodes, Set pending) { this.keyspace = keyspace; this.range = range; @@ -106,7 +107,7 @@ Shard createForTable(TableMetadata metadata, Set unavailable, Map fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); + SortedArrayList fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); return lookup.createOrReuse(tokenRange, nodes, fastPath, pending); } @@ -122,10 +123,10 @@ private static KeyspaceShard forRange(KeyspaceMetadata keyspace, Range ra Sets.SetView readOnly = Sets.difference(readEndpoints, writeEndpoints); Invariants.checkState(readOnly.isEmpty(), "Read only replicas detected: %s", readOnly); - List nodes = writes.endpoints().stream() + SortedArrayList nodes = new SortedArrayList<>(writes.endpoints().stream() .map(directory::peerId) .map(AccordTopology::tcmIdToAccord) - .sorted().collect(Collectors.toList()); + .sorted().toArray(Node.Id[]::new)); Set pending = readEndpoints.equals(writeEndpoints) ? Collections.emptySet() : diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index a7ca1234d0b2..b804e80fecca 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import accord.api.BarrierType; +import accord.local.CommandStores; import accord.local.DurableBefore; import accord.local.Node.Id; import accord.local.RedundantBefore; @@ -41,7 +42,6 @@ import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Future; import javax.annotation.Nonnull; @@ -109,10 +109,24 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List void receive(Message> message); + class CompactionInfo + { + public final Int2ObjectHashMap redundantBefores; + public final Int2ObjectHashMap ranges; + public final DurableBefore durableBefore; + + public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2ObjectHashMap ranges, DurableBefore durableBefore) + { + this.redundantBefores = redundantBefores; + this.ranges = ranges; + this.durableBefore = durableBefore; + } + } + /** * Fetch the redundnant befores for every command store */ - Pair, DurableBefore> getRedundantBeforesAndDurableBefore(); + CompactionInfo getCompactionInfo(); default Id nodeId() { throw new UnsupportedOperationException(); } } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java index 4f9eff850b0a..bb70132c045b 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java @@ -27,7 +27,11 @@ import accord.topology.ShardSelection; import accord.topology.Topologies; import accord.topology.Topology; -import org.apache.cassandra.locator.*; +import accord.utils.SortedList; +import org.apache.cassandra.locator.DynamicEndpointSnitch; +import org.apache.cassandra.locator.Endpoint; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.NodeProximity; import org.apache.cassandra.service.accord.AccordEndpointMapper; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Sortable; @@ -58,7 +62,7 @@ public TopologySorter get(Topologies topologies) return create(topologies.nodes()); } - private AccordTopologySorter create(Set nodes) + private AccordTopologySorter create(SortedList nodes) { SortableEndpoints endpoints = SortableEndpoints.from(nodes, mapper); Comparator comparator = proximity.endpointComparator(FBUtilities.getBroadcastAddressAndPort(), endpoints); diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java index e8d47f917875..f9d52d45b654 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/FastPathStrategy.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Set; @@ -29,6 +28,7 @@ import com.google.common.collect.ImmutableMap; import accord.local.Node; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataInputPlus; @@ -84,7 +84,7 @@ private static Kind fromMap(Map map) * @param dcMap * @return */ - Set calculateFastPath(List nodes, Set unavailable, Map dcMap); + SortedArrayList calculateFastPath(SortedArrayList nodes, Set unavailable, Map dcMap); Kind kind(); diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java index 08b7763a930f..2ffc3c7cbf8c 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java @@ -18,13 +18,13 @@ package org.apache.cassandra.service.accord.fastpath; -import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.collect.ImmutableMap; import accord.local.Node; +import accord.utils.SortedArrays.SortedArrayList; public class InheritKeyspaceFastPathStrategy implements FastPathStrategy { @@ -35,7 +35,7 @@ public class InheritKeyspaceFastPathStrategy implements FastPathStrategy private InheritKeyspaceFastPathStrategy() {} @Override - public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + public SortedArrayList calculateFastPath(SortedArrayList nodes, Set unavailable, Map dcMap) { throw new IllegalStateException("InheritKeyspaceFastPathStrategy should be replaced before calculateFastPath is called"); } diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java index 8e9d00bbe6de..10828202b68a 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -32,12 +33,12 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import accord.api.VisibleForImplementation; import accord.local.Node; import accord.topology.Shard; import accord.utils.Invariants; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataInputPlus; @@ -204,7 +205,7 @@ public int compareTo(@Nonnull NodeSorter that) } @Override - public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + public SortedArrayList calculateFastPath(SortedArrayList nodes, Set unavailable, Map dcMap) { List sorters = new ArrayList<>(nodes.size()); @@ -221,12 +222,12 @@ public Set calculateFastPath(List nodes, Set unavaila int slowQuorum = Shard.slowPathQuorumSize(nodes.size()); int fpSize = Math.max(size, slowQuorum); - ImmutableSet.Builder builder = ImmutableSet.builder(); - + Node.Id[] array = new Node.Id[fpSize]; for (int i=0; i fastPath = builder.build(); + Arrays.sort(array); + SortedArrayList fastPath = new SortedArrayList<>(array); Invariants.checkState(fastPath.size() >= slowQuorum); return fastPath; } diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java index 3f2e27bdc85f..37d51b6c8248 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java @@ -18,16 +18,16 @@ package org.apache.cassandra.service.accord.fastpath; -import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import accord.local.Node; import accord.topology.Shard; +import accord.utils.ArrayBuffers; import accord.utils.Invariants; +import accord.utils.SortedArrays.SortedArrayList; public class SimpleFastPathStrategy implements FastPathStrategy { @@ -38,26 +38,27 @@ public class SimpleFastPathStrategy implements FastPathStrategy private SimpleFastPathStrategy() {} @Override - public Set calculateFastPath(List nodes, Set unavailable, Map dcMap) + public SortedArrayList calculateFastPath(SortedArrayList nodes, Set unavailable, Map dcMap) { int maxFailures = Shard.maxToleratedFailures(nodes.size()); int discarded = 0; - ImmutableSet.Builder builder = ImmutableSet.builder(); + if (unavailable.isEmpty()) + return nodes; + Object[] tmp = ArrayBuffers.cachedAny().get(nodes.size()); for (int i=0,mi=nodes.size(); i fastPath = builder.build(); + Node.Id[] array = new Node.Id[nodes.size() - discarded]; + System.arraycopy(tmp, 0, array, 0, nodes.size() - discarded); + SortedArrayList fastPath = new SortedArrayList<>(array); Invariants.checkState(fastPath.size() >= Shard.slowPathQuorumSize(nodes.size())); return fastPath; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index fe16d3033efc..cd76550262bd 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -36,7 +36,6 @@ import accord.primitives.Ballot; import accord.primitives.PartialTxn; import accord.primitives.ProgressToken; -import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -234,7 +233,6 @@ public long serializedSize(Seekables superset, PartialTxn txn, int version private void serializeWithoutKeys(PartialTxn txn, DataOutputPlus out, int version) throws IOException { CommandSerializers.kind.serialize(txn.kind(), out, version); - KeySerializers.ranges.serialize(txn.covering(), out, version); readSerializer.serialize(txn.read(), out, version); querySerializer.serialize(txn.query(), out, version); out.writeBoolean(txn.update() != null); @@ -245,18 +243,16 @@ private void serializeWithoutKeys(PartialTxn txn, DataOutputPlus out, int versio private PartialTxn deserializeWithoutKeys(Seekables keys, DataInputPlus in, int version) throws IOException { Txn.Kind kind = CommandSerializers.kind.deserialize(in, version); - Ranges covering = KeySerializers.ranges.deserialize(in, version); Read read = readSerializer.deserialize(in, version); Query query = querySerializer.deserialize(in, version); Update update = in.readBoolean() ? updateSerializer.deserialize(in, version) : null; - return new PartialTxn.InMemory(covering, kind, keys, read, query, update); + return new PartialTxn.InMemory(kind, keys, read, query, update); } private long serializedSizeWithoutKeys(PartialTxn txn, int version) { long size = CommandSerializers.kind.serializedSize(txn.kind(), version); - size += KeySerializers.ranges.serializedSize(txn.covering(), version); size += readSerializer.serializedSize(txn.read(), version); size += querySerializer.serializedSize(txn.query(), version); size += TypeSizes.sizeof(txn.update() != null); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index dd78761cdae6..841d89882ef6 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -25,9 +25,9 @@ import accord.primitives.KeyDeps; import accord.primitives.Keys; import accord.primitives.PartialDeps; +import accord.primitives.Participants; import accord.primitives.Range; import accord.primitives.RangeDeps; -import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; @@ -60,7 +60,7 @@ Deps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyDeps, Da @Override PartialDeps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyDeps, DataInputPlus in, int version) throws IOException { - Ranges covering = KeySerializers.ranges.deserialize(in, version); + Participants covering = KeySerializers.participants.deserialize(in, version); return new PartialDeps(covering, keyDeps, rangeDeps, directKeyDeps); } @@ -68,28 +68,28 @@ PartialDeps deserialize(KeyDeps keyDeps, RangeDeps rangeDeps, KeyDeps directKeyD public void serialize(PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException { super.serialize(partialDeps, out, version); - KeySerializers.ranges.serialize(partialDeps.covering, out, version); + KeySerializers.participants.serialize(partialDeps.covering, out, version); } @Override public void serialize(Seekables superset, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException { super.serialize(superset, partialDeps, out, version); - KeySerializers.ranges.serialize(partialDeps.covering, out, version); + KeySerializers.participants.serialize(partialDeps.covering, out, version); } @Override public long serializedSize(PartialDeps partialDeps, int version) { return super.serializedSize(partialDeps, version) - + KeySerializers.ranges.serializedSize(partialDeps.covering, version); + + KeySerializers.participants.serializedSize(partialDeps.covering, version); } @Override public long serializedSize(Seekables keys, PartialDeps partialDeps, int version) { return super.serializedSize(keys, partialDeps, version) - + KeySerializers.ranges.serializedSize(partialDeps.covering, version); + + KeySerializers.participants.serializedSize(partialDeps.covering, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java index 50f53a04f61f..c4f215557715 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java @@ -25,13 +25,9 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import static org.apache.cassandra.utils.CollectionSerializers.deserializeSet; -import static org.apache.cassandra.utils.CollectionSerializers.serializeCollection; -import static org.apache.cassandra.utils.CollectionSerializers.serializedCollectionSize; - public class InformHomeDurableSerializers { - public static final IVersionedSerializer request = new IVersionedSerializer() + public static final IVersionedSerializer request = new IVersionedSerializer<>() { @Override public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) throws IOException @@ -40,7 +36,6 @@ public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) KeySerializers.route.serialize(inform.route, out, version); CommandSerializers.timestamp.serialize(inform.executeAt, out, version); CommandSerializers.durability.serialize(inform.durability, out, version); - serializeCollection(inform.persistedOn, out, version, TopologySerializers.nodeId); } @Override @@ -49,8 +44,7 @@ public InformHomeDurable deserialize(DataInputPlus in, int version) throws IOExc return new InformHomeDurable(CommandSerializers.txnId.deserialize(in, version), KeySerializers.route.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), - CommandSerializers.durability.deserialize(in, version), - deserializeSet(in, version, TopologySerializers.nodeId)); + CommandSerializers.durability.deserialize(in, version)); } @Override @@ -59,8 +53,7 @@ public long serializedSize(InformHomeDurable inform, int version) return CommandSerializers.txnId.serializedSize(inform.txnId, version) + KeySerializers.route.serializedSize(inform.route, version) + CommandSerializers.timestamp.serializedSize(inform.executeAt, version) - + CommandSerializers.durability.serializedSize(inform.durability, version) - + serializedCollectionSize(inform.persistedOn, version, TopologySerializers.nodeId); + + CommandSerializers.durability.serializedSize(inform.durability, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 6d2e86e8bbff..1e105d8192ad 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -91,38 +91,31 @@ public Ranges deserialize(DataInputPlus in, int version, Range[] ranges) { @Override PartialKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException { - Ranges covering = ranges.deserialize(in, version); RoutingKey homeKey = routingKey.deserialize(in, version); - boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; - return PartialKeyRoute.SerializationSupport.create(covering, homeKey, isParticipatingHomeKey, keys); + return PartialKeyRoute.SerializationSupport.create(homeKey, keys); } @Override public void serialize(PartialKeyRoute route, DataOutputPlus out, int version) throws IOException { super.serialize(route, out, version); - ranges.serialize(route.covering, out, version); routingKey.serialize(route.homeKey, out, version); - out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override public long serializedSize(PartialKeyRoute keys, int version) { return super.serializedSize(keys, version) - + ranges.serializedSize(keys.covering, version) - + routingKey.serializedSize(keys.homeKey, version) - + 1; + + routingKey.serializedSize(keys.homeKey, version); } }; - public static final IVersionedSerializer fullKeyRoute = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + public static final IVersionedSerializer fullKeyRoute = new AbstractKeysSerializer<>(routingKey, RoutingKey[]::new) { @Override FullKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException { RoutingKey homeKey = routingKey.deserialize(in, version); - boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; - return FullKeyRoute.SerializationSupport.create(homeKey, isParticipatingHomeKey, keys); + return FullKeyRoute.SerializationSupport.create(homeKey, keys); } @Override @@ -130,15 +123,13 @@ public void serialize(FullKeyRoute route, DataOutputPlus out, int version) throw { super.serialize(route, out, version); routingKey.serialize(route.homeKey, out, version); - out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override public long serializedSize(FullKeyRoute route, int version) { return super.serializedSize(route, version) - + routingKey.serializedSize(route.homeKey, version) - + 1; + + routingKey.serializedSize(route.homeKey, version); } }; @@ -146,28 +137,22 @@ public long serializedSize(FullKeyRoute route, int version) { @Override PartialRangeRoute deserialize(DataInputPlus in, int version, Range[] rs) throws IOException { - Ranges covering = ranges.deserialize(in, version); RoutingKey homeKey = routingKey.deserialize(in, version); - boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; - return PartialRangeRoute.SerializationSupport.create(covering, homeKey, isParticipatingHomeKey, rs); + return PartialRangeRoute.SerializationSupport.create(homeKey, rs); } @Override public void serialize(PartialRangeRoute route, DataOutputPlus out, int version) throws IOException { super.serialize(route, out, version); - ranges.serialize(route.covering, out, version); routingKey.serialize(route.homeKey, out, version); - out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override public long serializedSize(PartialRangeRoute rs, int version) { return super.serializedSize(rs, version) - + ranges.serializedSize(rs.covering, version) - + routingKey.serializedSize(rs.homeKey, version) - + 1; + + routingKey.serializedSize(rs.homeKey, version); } }; @@ -177,8 +162,7 @@ public long serializedSize(PartialRangeRoute rs, int version) @Override FullRangeRoute deserialize(DataInputPlus in, int version, Range[] Ranges) throws IOException { RoutingKey homeKey = routingKey.deserialize(in, version); - boolean isParticipatingHomeKey = (in.readByte() & 0x1) != 0; - return FullRangeRoute.SerializationSupport.create(homeKey, isParticipatingHomeKey, Ranges); + return FullRangeRoute.SerializationSupport.create(homeKey, Ranges); } @Override @@ -186,15 +170,13 @@ public void serialize(FullRangeRoute route, DataOutputPlus out, int version) thr { super.serialize(route, out, version); routingKey.serialize(route.homeKey, out, version); - out.writeByte(route.isParticipatingHomeKey ? 0x1 : 0); } @Override public long serializedSize(FullRangeRoute ranges, int version) { return super.serializedSize(ranges, version) - + routingKey.serializedSize(ranges.homeKey(), version) - + 1; + + routingKey.serializedSize(ranges.homeKey(), version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java index 4693c03c5cc2..782ecbf5ed5d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -20,13 +20,13 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.List; import java.util.Set; import accord.local.Node; import accord.primitives.Range; import accord.topology.Shard; import accord.topology.Topology; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.io.IVersionedSerializer; @@ -135,7 +135,7 @@ public void serialize(Shard shard, DataOutputPlus out, int version) throws IOExc public Shard deserialize(DataInputPlus in, int version) throws IOException { Range range = TokenRange.serializer.deserialize(in, version); - List nodes = CollectionSerializers.deserializeList(in, version, nodeId); + SortedArrayList nodes = CollectionSerializers.deserializeSortedArrayList(in, version, nodeId, Node.Id[]::new); Set fastPathElectorate = CollectionSerializers.deserializeSet(in, version, nodeId); Set joining = CollectionSerializers.deserializeSet(in, version, nodeId); return new Shard(range, nodes, fastPathElectorate, joining); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index 62dfe68e2c9c..694c3f225a30 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -32,6 +32,7 @@ import accord.api.Read; import accord.local.SafeCommandStore; import accord.primitives.Keys; +import accord.primitives.Participants; import accord.primitives.Ranges; import accord.primitives.Seekable; import accord.primitives.Timestamp; @@ -158,14 +159,25 @@ public ConsistencyLevel cassandraConsistencyLevel() @Override public Read slice(Ranges ranges) { - Keys keys = itemKeys.slice(ranges); + return intersecting(itemKeys.slice(ranges)); + } + + @Override + public Read intersecting(Participants participants) + { + return intersecting(itemKeys.intersecting(participants)); + } + + private Read intersecting(Keys select) + { + Keys keys = itemKeys.intersecting(select); List reads = new ArrayList<>(keys.size()); for (TxnNamedRead read : items) if (keys.contains(read.key())) reads.add(read); - return createTxnRead(reads, txnKeys.slice(ranges), cassandraConsistencyLevel); + return createTxnRead(reads, txnKeys.intersecting(select), cassandraConsistencyLevel); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index 5a32065e97b7..563af8167564 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -32,6 +32,7 @@ import accord.api.Key; import accord.api.Update; import accord.primitives.Keys; +import accord.primitives.Participants; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Timestamp; @@ -148,6 +149,14 @@ public Update slice(Ranges ranges) return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL); } + @Override + public Update intersecting(Participants participants) + { + Keys keys = this.keys.intersecting(participants); + // TODO: Slice the condition. + return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL); + } + private static ByteBuffer[] select(Keys in, Keys out, ByteBuffer[] from) { ByteBuffer[] result = new ByteBuffer[out.size()]; diff --git a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java index 310a3e6a5890..0efcbf1ffdad 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java @@ -29,6 +29,7 @@ import accord.api.Update; import accord.api.Write; import accord.local.Node; +import accord.primitives.Participants; import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -147,6 +148,12 @@ public Update slice(Ranges ranges) return this; } + @Override + public Update intersecting(Participants participants) + { + return this; + } + @Override public Update merge(Update other) { diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializers.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java index 1fcb7cc2f3e3..0cf7fce0d92b 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializers.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -31,6 +31,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.IPartitionerDependentSerializer; import org.apache.cassandra.io.IVersionedSerializer; @@ -102,6 +103,15 @@ public static void serializeMap(Map map, DataOutputPlus out, int ve } } + public static > SortedArrayList deserializeSortedArrayList(DataInputPlus in, int version, IVersionedSerializer serializer, IntFunction allocator) throws IOException + { + int size = in.readUnsignedVInt32(); + V[] array = allocator.apply(size); + for (int i = 0 ; i < array.length ; ++i) + array[i] = serializer.deserialize(in, version); + return new SortedArrayList<>(array); + } + public static List deserializeList(DataInputPlus in, int version, IVersionedSerializer serializer) throws IOException { return deserializeCollection(in, version, serializer, newArrayList()); diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 493283bdae20..3e1f75f62aa3 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -33,6 +33,7 @@ import com.google.common.collect.Iterators; import accord.primitives.Routable; +import accord.local.CommandStores; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.service.accord.*; @@ -418,7 +419,9 @@ private static IAccordService mockAccordService(CommandStore commandStore, Redun Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); if (redundantBefore != null) redundantBefores.put(commandStore.id(), redundantBefore); - when(mockAccordService.getRedundantBeforesAndDurableBefore()).thenReturn(Pair.create(redundantBefores, durableBefore)); + Int2ObjectHashMap rangesForEpochs = new Int2ObjectHashMap<>(); + rangesForEpochs.put(commandStore.id(), commandStore.unsafeRangesForEpoch()); + when(mockAccordService.getCompactionInfo()).thenReturn(new IAccordService.CompactionInfo(redundantBefores, rangesForEpochs, durableBefore)); return mockAccordService; } @@ -463,7 +466,7 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add for (TxnId txnId : txnIds) { Txn txn = txnId.kind().isWrite() ? writeTxn : readTxn; - PartialDeps partialDeps = Deps.NONE.slice(AccordTestUtils.fullRange(txn)); + PartialDeps partialDeps = Deps.NONE.intersecting(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { @@ -559,7 +562,7 @@ private List compactCFS(IAccordService mockAccordService, ColumnFamil scanners.add(random.nextInt(scanners.size()), new Scanner(cfs.metadata(), outputPartitions.stream().map(Partition::unfilteredIterator).collect(Collectors.toList()))); } while (!scanners.isEmpty()); - verify(mockAccordService, times(singleCompaction || numScanners == 1 ? 1 : numScanners - 1)).getRedundantBeforesAndDurableBefore(); + verify(mockAccordService, times(singleCompaction || numScanners == 1 ? 1 : numScanners - 1)).getCompactionInfo(); return result; } } diff --git a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java index c4085c608385..59a7cf437d71 100644 --- a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java +++ b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java @@ -464,7 +464,7 @@ private Route createRoute(RandomSource rs, int numRecords, int index, int num var token = new Murmur3Partitioner.LongToken(rs.nextInt(minToken, maxToken)); keys.add(new TokenKey(table, token)); } - return new FullKeyRoute(keys.first(), true, keys.toArray(RoutingKey[]::new)); + return new FullKeyRoute(keys.first(), keys.toArray(RoutingKey[]::new)); } case Range: { diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index 5200316a2f8a..17de14e4158e 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -259,7 +259,7 @@ private static Route createRoute(State state, RandomSource rs, Domain domain, var token = new LongToken(state.tokenGen.nextInt(rs)); keys.add(new TokenKey(table, token)); } - return new FullKeyRoute(keys.first(), true, keys.toArray(RoutingKey[]::new)); + return new FullKeyRoute(keys.first(), keys.toArray(RoutingKey[]::new)); } case Range: { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index bcc1f8bf111f..dde88ca46827 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -41,6 +41,7 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Range; +import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.RoutingKeys; @@ -113,7 +114,7 @@ public void commandLoadSave() throws Throwable TxnId txnId = txnId(1, clock.incrementAndGet(), 1, Txn.Kind.Write, Routable.Domain.Range); PartialDeps dependencies; - try (PartialDeps.Builder builder = PartialDeps.builder(depTxn.covering())) + try (PartialDeps.Builder builder = PartialDeps.builder(Ranges.of(range))) { builder.add(range, oldTxnId1); builder.add(range, oldTxnId2); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index a9bdd7aa5220..70a66f2be4fa 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -101,7 +101,7 @@ public void basicCycleTest() throws Throwable RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); PartialRoute route = fullRoute.slice(fullRange(txn)); - PartialTxn partialTxn = txn.slice(route.covering(), true); + PartialTxn partialTxn = txn.intersecting(route, true); PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); // Check preaccept @@ -136,7 +136,7 @@ public void basicCycleTest() throws Throwable TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); Timestamp executeAt = timestamp(1, clock.incrementAndGet(), 1); PartialDeps deps; - try (PartialDeps.Builder builder = PartialDeps.builder(route.covering())) + try (PartialDeps.Builder builder = PartialDeps.builder(route)) { builder.add(key, txnId2); deps = builder.build(); @@ -193,7 +193,7 @@ public void computeDeps() throws Throwable RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); PartialRoute route = fullRoute.slice(fullRange(txn)); - PartialTxn partialTxn = txn.slice(route.covering(), true); + PartialTxn partialTxn = txn.intersecting(route, true); PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); getUninterruptibly(commandStore.execute(preAccept1, safeStore -> { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 2f689187aca3..94e181a853b3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -24,7 +24,6 @@ import java.util.Set; import java.util.UUID; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import org.junit.Assert; @@ -38,6 +37,7 @@ import accord.local.Node.Id; import accord.topology.Shard; import accord.topology.Topology; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.concurrent.ScheduledExecutors; @@ -71,7 +71,7 @@ public class AccordConfigurationServiceTest private static final Id ID1 = new Id(1); private static final Id ID2 = new Id(2); private static final Id ID3 = new Id(3); - private static final List ID_LIST = ImmutableList.of(ID1, ID2, ID3); + private static final SortedArrayList ID_LIST = new SortedArrayList<>(new Id[] { ID1, ID2, ID3 }); private static final Set ID_SET = ImmutableSet.copyOf(ID_LIST); private static final TableId TBL1 = TableId.fromUUID(new UUID(0, 1)); private static final TableId TBL2 = TableId.fromUUID(new UUID(0, 2)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index d6d2dab96bde..717cda1de385 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -113,7 +113,7 @@ public void serde() CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); common.route(route); - common.partialDeps(deps.slice(scope)); + common.partialDeps(deps.intersecting(scope)); common.durability(Status.Durability.NotDurable); Command.WaitingOn waitingOn = null; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 47ba369a0982..1ece95f16477 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -88,8 +88,9 @@ public void bootstrapRead() long epoch = 42; Txn txn = Utils.readTxn(Keys.of(IntKey.key(42))); TxnId id = nextTxnId(epoch, txn); - PartialTxn partialTxn = txn.slice(Ranges.of(IntKey.range(40, 50)), true); - Request request = new AbstractFetchCoordinator.FetchRequest(epoch, id, partialTxn.covering(), PartialDeps.NONE, partialTxn); + Ranges ranges = Ranges.of(IntKey.range(40, 50)); + PartialTxn partialTxn = txn.slice(ranges, true); + Request request = new AbstractFetchCoordinator.FetchRequest(epoch, id, ranges, PartialDeps.NONE, partialTxn); checkRequestReplies(request, new AbstractFetchCoordinator.FetchResponse(null, null, id), diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 0f28c6c7c246..ffd0d311867a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -32,8 +32,9 @@ import javax.annotation.Nullable; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; + +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.util.File; @@ -157,7 +158,7 @@ private static FullRoute route(PartialTxn txn) { Seekable key = txn.keys().get(0); RoutingKey routingKey = key.asKey().toUnseekable(); - return new FullKeyRoute(routingKey, true, new RoutingKey[]{ routingKey }); + return new FullKeyRoute(routingKey, new RoutingKey[]{ routingKey }); } } @@ -347,7 +348,7 @@ public static PartialTxn createPartialTxn(int key) { Txn txn = createTxn(key, key); Ranges ranges = fullRange(txn); - return new PartialTxn.InMemory(ranges, txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update()); + return new PartialTxn.InMemory(txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update()); } private static class SingleEpochRanges extends CommandStore.EpochUpdateHolder @@ -370,7 +371,7 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); - Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); + Topology topology = new Topology(1, new Shard(range, new SortedArrayList<>(new Id[] { node }), Sets.newHashSet(node), Collections.emptySet())); NodeTimeService time = new NodeTimeService() { @Override public Id id() { return node;} @@ -437,7 +438,7 @@ public static AccordCommandStore createAccordCommandStore( TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); - Topology topology = new Topology(1, new Shard(range, Lists.newArrayList(node), Sets.newHashSet(node), Collections.emptySet())); + Topology topology = new Topology(1, new Shard(range, new SortedArrayList<>(new Id[] { node }), Sets.newHashSet(node), Collections.emptySet())); AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCapacity(1 << 20)); return store; @@ -480,9 +481,9 @@ public static Node.Id id(int id) return new Node.Id(id); } - public static List idList(int... ids) + public static SortedArrayList idList(int... ids) { - return Arrays.stream(ids).mapToObj(AccordTestUtils::id).collect(Collectors.toList()); + return new SortedArrayList<>(Arrays.stream(ids).mapToObj(AccordTestUtils::id).toArray(Id[]::new)); } public static Set idSet(int... ids) diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java index c3c8e6b588ad..e4a2a4791c71 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTopologyUtils.java @@ -24,10 +24,10 @@ import java.util.List; import java.util.Set; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import accord.local.Node; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; @@ -53,7 +53,7 @@ public class AccordTopologyUtils public static final Node.Id ID1 = new Node.Id(1); public static final Node.Id ID2 = new Node.Id(2); public static final Node.Id ID3 = new Node.Id(3); - public static final List NODE_LIST = ImmutableList.of(ID1, ID2, ID3); + public static final SortedArrayList NODE_LIST = new SortedArrayList<>(new Node.Id[] { ID1, ID2, ID3 }); public static final Set NODE_SET = ImmutableSet.copyOf(NODE_LIST); private static final IPartitioner partitioner = Murmur3Partitioner.instance; diff --git a/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java b/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java index 65650274fa57..cda86eff8bc1 100644 --- a/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java +++ b/test/unit/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategyTest.java @@ -19,7 +19,6 @@ package org.apache.cassandra.service.accord.fastpath; import java.util.HashMap; -import java.util.List; import java.util.Map; import com.google.common.collect.ImmutableMap; @@ -27,6 +26,7 @@ import org.junit.Test; import accord.local.Node; +import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.service.accord.fastpath.ParameterizedFastPathStrategy.WeightedDc; @@ -39,7 +39,7 @@ public class ParameterizedFastPathStrategyTest { - private static final List NODES = idList(1, 2, 3, 4, 5, 6); + private static final SortedArrayList NODES = idList(1, 2, 3, 4, 5, 6); private static final Map DCS_2; private static final Map DCS_3; diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java index d5da34c8fb4f..d966757e9597 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java @@ -93,7 +93,7 @@ private static Gen foundKnownMap() List forOrdering = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).next(rs); forOrdering.sort(Comparator.naturalOrder()); // TODO (coverage): don't hard code keys type - keysOrRanges = new FullKeyRoute(homeKey, forOrdering.contains(homeKey), forOrdering.toArray(RoutingKey[]::new)); + keysOrRanges = new FullKeyRoute(homeKey, forOrdering.toArray(RoutingKey[]::new)); break; case Range: keysOrRanges = AccordGenerators.ranges(Murmur3Partitioner.instance).next(rs); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 33244f045b77..81462c0e6552 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -183,7 +183,7 @@ Command toCommand() else return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); case Erased: - case ErasedOrInvalidated: + case ErasedOrInvalidOrVestigial: case Invalidated: return Command.SerializerSupport.invalidated(txnId, Listeners.Immutable.EMPTY); } diff --git a/test/unit/org/apache/cassandra/utils/RangeTreeTest.java b/test/unit/org/apache/cassandra/utils/RangeTreeTest.java index 6ef5325d1d18..695894603dba 100644 --- a/test/unit/org/apache/cassandra/utils/RangeTreeTest.java +++ b/test/unit/org/apache/cassandra/utils/RangeTreeTest.java @@ -526,7 +526,7 @@ public List> intersectsToken(Routing key) { List> matches = new ArrayList<>(); // find ranges, then add the values - list.forEach(key, (a, b, c, d, idx) -> { + list.forEachKey(key, (a, b, c, d, idx) -> { Range match = ranges[idx]; map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); }, (a, b, c, d, start, end) -> { @@ -544,7 +544,7 @@ public List> intersects(Range range) { List> matches = new ArrayList<>(); // find ranges, then add the values - list.forEach(range, (a, b, c, d, idx) -> { + list.forEachRange(range, (a, b, c, d, idx) -> { Range match = ranges[idx]; map.get(match).forEachInt(v -> matches.add(Map.entry(match, v))); }, (a, b, c, d, start, end) -> { From 8ab50031187e520e4e23164447c0d703f8342091 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 18 Mar 2024 15:38:31 -0400 Subject: [PATCH 127/340] Accord migration and interop correctness Patch by Ariel Weisberg; Reviewed by Blake Eggleston for CASSANDRA-19744 --- .gitmodules | 2 +- modules/accord | 2 +- .../cassandra/batchlog/BatchlogManager.java | 229 ++++- .../config/CassandraRelevantProperties.java | 3 + .../cassandra/config/DatabaseDescriptor.java | 6 + .../cql3/statements/BatchStatement.java | 4 +- .../statements/BatchUpdatesCollector.java | 16 +- .../cql3/statements/CQL3CasRequest.java | 5 +- .../statements/ModificationStatement.java | 10 +- ...eTableSinglePartitionUpdatesCollector.java | 8 +- .../SingleTableUpdatesCollector.java | 10 +- .../cql3/statements/TransactionStatement.java | 2 +- .../cql3/statements/UpdatesCollector.java | 2 +- .../schema/AlterTableStatement.java | 10 +- .../db/AbstractMutationVerbHandler.java | 23 +- .../db/CassandraKeyspaceWriteHandler.java | 4 +- .../apache/cassandra/db/CounterMutation.java | 52 +- .../org/apache/cassandra/db/IMutation.java | 20 +- .../org/apache/cassandra/db/Keyspace.java | 6 +- .../org/apache/cassandra/db/Mutation.java | 139 ++- .../apache/cassandra/db/SimpleBuilders.java | 33 +- .../cassandra/db/virtual/VirtualMutation.java | 23 + .../cassandra/exceptions/RequestFailure.java | 13 +- .../exceptions/RequestFailureReason.java | 26 +- .../RetryOnDifferentSystemException.java | 30 + .../cassandra/hints/HintDiagnostics.java | 16 +- .../org/apache/cassandra/hints/HintEvent.java | 7 +- .../cassandra/hints/HintVerbHandler.java | 14 +- .../cassandra/hints/HintsBufferPool.java | 9 + .../apache/cassandra/hints/HintsCatalog.java | 13 +- .../hints/HintsDispatchExecutor.java | 15 +- .../cassandra/hints/HintsDispatcher.java | 347 ++++++- .../apache/cassandra/hints/HintsService.java | 47 +- .../apache/cassandra/hints/HintsStore.java | 17 + .../metrics/AccordClientRequestMetrics.java | 7 + .../metrics/ClientRequestMetrics.java | 6 + .../metrics/HintsServiceMetrics.java | 25 + .../cassandra/metrics/KeyspaceMetrics.java | 2 + .../cassandra/metrics/TableMetrics.java | 2 + .../cassandra/net/ResponseVerbHandler.java | 9 +- .../apache/cassandra/repair/RepairJob.java | 14 +- .../cassandra/schema/DistributedSchema.java | 29 +- .../service/AbstractWriteResponseHandler.java | 67 +- .../service/BatchlogResponseHandler.java | 15 +- .../cassandra/service/StorageProxy.java | 513 ++++++---- .../service/accord/AccordJournal.java | 22 +- .../service/accord/AccordService.java | 273 +++-- .../service/accord/AccordVerbHandler.java | 6 +- .../service/accord/IAccordService.java | 49 +- .../service/accord/api/AccordAgent.java | 8 +- .../interop/AccordInteropExecution.java | 22 +- .../interop/AccordInteropReadCallback.java | 11 +- .../service/accord/repair/AccordRepair.java | 30 +- .../service/accord/txn/TxnQuery.java | 26 +- .../service/accord/txn/TxnUpdate.java | 36 +- .../service/accord/txn/TxnWrite.java | 15 +- .../accord/txn/UnrecoverableRepairUpdate.java | 6 +- .../service/consensus/TransactionalMode.java | 4 + .../migration/ConsensusKeyMigrationState.java | 8 +- .../ConsensusMigrationMutationHelper.java | 359 +++++++ .../ConsensusMigrationRepairResult.java | 24 +- .../ConsensusMigrationRepairType.java | 16 +- .../migration/ConsensusRequestRouter.java | 61 +- .../migration/ConsensusTableMigration.java | 21 +- .../cassandra/service/paxos/Commit.java | 12 +- .../service/reads/ReadCoordinator.java | 4 +- .../reads/repair/BlockingPartitionRepair.java | 9 +- .../reads/repair/BlockingReadRepair.java | 133 +-- .../reads/repair/BlockingReadRepairs.java | 4 +- .../service/reads/repair/ReadRepair.java | 5 + .../repair/RowIteratorMergeListener.java | 2 +- ...ishConsensusMigrationForTableAndRange.java | 46 +- .../cassandra/transport/Dispatcher.java | 7 +- .../cassandra/triggers/TriggerExecutor.java | 16 +- .../apache/cassandra/utils/NoSpamLogger.java | 5 +- .../apache/cassandra/utils/Throwables.java | 31 + .../distributed/api/ICoordinator.java | 5 +- .../distributed/impl/Coordinator.java | 6 + .../distributed/impl/TestChangeListener.java | 97 +- .../distributed/shared/ClusterUtils.java | 5 + .../distributed/test/CASAddTest.java | 4 +- .../distributed/test/CASMultiDCTest.java | 12 +- .../distributed/test/MessageFiltersTest.java | 3 +- .../distributed/test/QueriesTableTest.java | 3 +- .../test/RepairDigestTrackingTest.java | 33 +- .../test/ShortReadProtectionTest.java | 3 +- .../distributed/test/TestBaseImpl.java | 44 +- .../test/accord/AccordCQLTestBase.java | 942 +++++++++--------- .../accord/AccordIncrementalRepairTest.java | 31 +- .../test/accord/AccordIntegrationTest.java | 20 +- .../accord/AccordInteroperabilityTest.java | 12 +- .../test/accord/AccordLoadTest.java | 10 +- .../test/accord/AccordMetricsTest.java | 8 +- .../accord/AccordMigrationRaceTestBase.java | 771 ++++++++++++++ .../test/accord/AccordMigrationTest.java | 70 +- .../test/accord/AccordTestBase.java | 148 ++- .../AccordTimestampPreservationTest.java | 209 ++++ .../accord/MigrationFromAccordRaceTest.java | 27 + .../accord/MigrationToAccordRaceTest.java | 27 + .../test/metrics/HintsServiceMetricsTest.java | 7 + .../test/ShortPaxosSimulationTest.java | 4 +- test/unit/org/apache/cassandra/Util.java | 27 +- .../cassandra/hints/HintsServiceTest.java | 78 +- .../apache/cassandra/hints/HintsTestUtil.java | 20 +- .../schema/TransactionalConfigSchemaTest.java | 11 + .../service/accord/AccordReadRepairTest.java | 16 +- .../service/accord/AccordServiceTest.java | 25 +- .../service/accord/AccordTestUtils.java | 14 +- .../service/accord/EpochSyncTest.java | 8 +- .../accord/SimulatedAccordCommandStore.java | 6 +- 110 files changed, 4455 insertions(+), 1374 deletions(-) create mode 100644 src/java/org/apache/cassandra/exceptions/RetryOnDifferentSystemException.java create mode 100644 src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordTimestampPreservationTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/MigrationFromAccordRaceTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordRaceTest.java diff --git a/.gitmodules b/.gitmodules index 616dacf610a7..60a9510e7ad5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = https://github.com/apache/cassandra-accord.git + url = ../cassandra-accord.git branch = trunk diff --git a/modules/accord b/modules/accord index 4aa0a8aeb6b1..129a4862df43 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 4aa0a8aeb6b12036660695e3fb89c69b5d40f345 +Subproject commit 129a4862df43fdc5893687922a77bb0288f8cb83 diff --git a/src/java/org/apache/cassandra/batchlog/BatchlogManager.java b/src/java/org/apache/cassandra/batchlog/BatchlogManager.java index b5b33d5e0b34..1844f8424999 100644 --- a/src/java/org/apache/cassandra/batchlog/BatchlogManager.java +++ b/src/java/org/apache/cassandra/batchlog/BatchlogManager.java @@ -27,25 +27,25 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Supplier; +import javax.annotation.Nullable; + import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.RateLimiter; -import org.apache.cassandra.concurrent.ScheduledExecutorPlus; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.transport.Dispatcher; -import org.apache.cassandra.utils.TimeUUID; -import org.apache.cassandra.utils.concurrent.Future; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.cql3.UntypedResultSet.Row; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.Keyspace; @@ -55,6 +55,7 @@ import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.exceptions.WriteFailureException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.gms.FailureDetector; @@ -70,26 +71,41 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageFlag; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.WriteResponseHandler; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; +import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitMutations; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MBeanWrapper; +import org.apache.cassandra.utils.Throwables; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.concurrent.Future; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.config.CassandraRelevantProperties.BATCHLOG_REPLAY_TIMEOUT_IN_MS; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeInternalWithPaging; +import static org.apache.cassandra.hints.HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID; import static org.apache.cassandra.net.Verb.MUTATION_REQ; +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.mutateWithAccordAsync; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; public class BatchlogManager implements BatchlogManagerMBean { public static final String MBEAN_NAME = "org.apache.cassandra.db:type=BatchlogManager"; - private static final long REPLAY_INTERVAL = 10 * 1000; // milliseconds static final int DEFAULT_PAGE_SIZE = 128; private static final Logger logger = LoggerFactory.getLogger(BatchlogManager.class); @@ -104,6 +120,8 @@ public class BatchlogManager implements BatchlogManagerMBean private final RateLimiter rateLimiter = RateLimiter.create(Double.MAX_VALUE); + private final AtomicBoolean isBatchlogReplayPaused = new AtomicBoolean(false); + public BatchlogManager() { batchlogTasks = executorFactory().scheduled(false, "BatchlogTasks"); @@ -115,7 +133,7 @@ public void start() batchlogTasks.scheduleWithFixedDelay(this::replayFailedBatches, StorageService.RING_DELAY_MILLIS, - REPLAY_INTERVAL, + CassandraRelevantProperties.BATCHLOG_REPLAY_INTERVAL_MS.getLong(), MILLISECONDS); } @@ -184,7 +202,9 @@ public long getTotalBatchesReplayed() public void forceBatchlogReplay() throws Exception { + logger.debug("Forcing batchlog replay"); startBatchlogReplay().get(); + logger.debug("Finished forcing batchlog replay"); } public Future startBatchlogReplay() @@ -193,14 +213,25 @@ public Future startBatchlogReplay() return batchlogTasks.submit(this::replayFailedBatches); } - void performInitialReplay() throws InterruptedException, ExecutionException + public void pauseReplay() + { + logger.debug("Paused batchlog replay"); + isBatchlogReplayPaused.set(true); + } + + public void resumeReplay() { - // Invokes initial replay. Used for testing only. - batchlogTasks.submit(this::replayFailedBatches).get(); + logger.debug("Resumed batchlog replay"); + isBatchlogReplayPaused.set(false); } private void replayFailedBatches() { + if (isBatchlogReplayPaused.get()) + { + logger.debug("Batch log replay is paused, skipping replay"); + return; + } logger.trace("Started replayFailedBatches"); // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml). @@ -223,6 +254,7 @@ private void replayFailedBatches() SchemaConstants.SYSTEM_KEYSPACE_NAME, SystemKeyspace.BATCHES); UntypedResultSet batches = executeInternalWithPaging(query, pageSize, lastReplayedUuid, limitUuid); + processBatchlogEntries(batches, pageSize, rateLimiter); lastReplayedUuid = limitUuid; logger.trace("Finished replayFailedBatches"); @@ -276,16 +308,7 @@ private void processBatchlogEntries(UntypedResultSet batches, int pageSize, Rate int version = row.getInt("version"); try { - ReplayingBatch batch = new ReplayingBatch(id, version, row.getList("mutations", BytesType.instance)); - if (batch.replay(rateLimiter, hintedNodes) > 0) - { - unfinishedBatches.add(batch); - } - else - { - remove(id); // no write mutations were sent (either expired or all CFs involved truncated). - ++totalBatchesReplayed; - } + dispatchBatch(rateLimiter, row, id, version, hintedNodes, unfinishedBatches); } catch (IOException e) { @@ -307,6 +330,8 @@ private void processBatchlogEntries(UntypedResultSet batches, int pageSize, Rate // finalize the incomplete last page of batches if (positionInPage > 0) finishAndClearBatches(unfinishedBatches, hintedNodes, replayedBatches); + else + logger.trace("Had no batches to replay"); if (caughtException != null) logger.warn(String.format("Encountered %d unexpected exceptions while sending out batches", skipped), caughtException); @@ -318,6 +343,35 @@ private void processBatchlogEntries(UntypedResultSet batches, int pageSize, Rate replayedBatches.forEach(BatchlogManager::remove); } + private void dispatchBatch(RateLimiter rateLimiter, Row row, TimeUUID id, int version, Set hintedNodes, ArrayList unfinishedBatches) throws IOException + { + while (true) + { + ClusterMetadata cm = ClusterMetadata.current(); + try + { + ReplayingBatch batch = new ReplayingBatch(id, version, row.getList("mutations", BytesType.instance), cm); + if (batch.replay(rateLimiter, hintedNodes)) + { + unfinishedBatches.add(batch); + } + else + { + remove(id); // no write mutations were sent (either expired or all CFs involved truncated). + ++totalBatchesReplayed; + } + } + catch (RetryOnDifferentSystemException e) + { + // Self apply can throw retry on different system + // Barring bugs we should already have the latest cluster metadata needed to correctly + // split the batch and retry since that is what was used to generate the exception + continue; + } + break; + } + } + private void finishAndClearBatches(ArrayList batches, Set hintedNodes, Set replayedBatches) { // schedule hints for timed out deliveries @@ -340,61 +394,112 @@ private static class ReplayingBatch { private final TimeUUID id; private final long writtenAt; - private final List mutations; + private final int unsplitGcGs; + private final List normalMutations; + private final List accordMutations; private final int replayedBytes; + private final ClusterMetadata cm; - private List> replayHandlers; + private List> replayHandlers = ImmutableList.of(); + private AsyncTxnResult accordResult; + @Nullable + private Dispatcher.RequestTime accordTxnStart; - ReplayingBatch(TimeUUID id, int version, List serializedMutations) throws IOException + ReplayingBatch(TimeUUID id, int version, List serializedMutations, ClusterMetadata cm) throws IOException { this.id = id; this.writtenAt = id.unix(MILLISECONDS); - this.mutations = new ArrayList<>(serializedMutations.size()); - this.replayedBytes = addMutations(version, serializedMutations); + List unsplitMutations = new ArrayList<>(serializedMutations.size()); + this.replayedBytes = addMutations(unsplitMutations, writtenAt, version, serializedMutations); + unsplitGcGs = gcgs(unsplitMutations); + SplitMutations splitMutations = ConsensusMigrationMutationHelper.splitMutationsIntoAccordAndNormal(cm, unsplitMutations); + logger.trace("Replaying batch with Accord {} and normal {}", splitMutations.accordMutations(), splitMutations.normalMutations()); + normalMutations = splitMutations.normalMutations(); + accordMutations = splitMutations.accordMutations(); + if (accordMutations != null) + accordTxnStart = new Dispatcher.RequestTime(Clock.Global.nanoTime()); + this.cm = cm; } - public int replay(RateLimiter rateLimiter, Set hintedNodes) throws IOException + public boolean replay(RateLimiter rateLimiter, Set hintedNodes) throws IOException { logger.trace("Replaying batch {}", id); - if (mutations.isEmpty()) - return 0; + if ((normalMutations == null || normalMutations.isEmpty()) && (accordMutations == null || accordMutations.isEmpty())) + return false; - int gcgs = gcgs(mutations); - if (MILLISECONDS.toSeconds(writtenAt) + gcgs <= FBUtilities.nowInSeconds()) - return 0; + if (MILLISECONDS.toSeconds(writtenAt) + unsplitGcGs <= FBUtilities.nowInSeconds()) + return false; + + if (accordMutations != null) + { + accordTxnStart = accordTxnStart.withStartedAt(Clock.Global.nanoTime()); + accordResult = accordMutations != null ? mutateWithAccordAsync(cm, accordMutations, null, accordTxnStart) : null; + } - replayHandlers = sendReplays(mutations, writtenAt, hintedNodes); + if (normalMutations != null) + replayHandlers = sendReplays(normalMutations, writtenAt, hintedNodes); rateLimiter.acquire(replayedBytes); // acquire afterwards, to not mess up ttl calculation. - return replayHandlers.size(); + return replayHandlers.size() > 0 || accordMutations != null; } public void finish(Set hintedNodes) { - for (int i = 0; i < replayHandlers.size(); i++) + Throwable failure = null; + // Check if the Accord mutations succeeded asynchronously + try { - ReplayWriteResponseHandler handler = replayHandlers.get(i); - try + if (accordResult != null) { - handler.get(); + IAccordService accord = AccordService.instance(); + TxnResult.Kind kind = accord.getTxnResult(accordResult, true, ConsistencyLevel.QUORUM, accordTxnStart).kind(); + if (kind == retry_new_protocol) + throw new RetryOnDifferentSystemException(); } - catch (WriteTimeoutException|WriteFailureException e) + } + catch (WriteTimeoutException|WriteFailureException|RetryOnDifferentSystemException e) + { + logger.trace("Failed replaying a batched mutation on Accord, will write a hint"); + logger.trace("Failure was : {}", e.getMessage()); + writeHintsForUndeliveredAccordTxns(hintedNodes); + } + catch (Exception e) + { + failure = Throwables.merge(failure, e); + } + + try + { + for (int i = 0; i < replayHandlers.size(); i++) { - if (logger.isTraceEnabled()) + ReplayWriteResponseHandler handler = replayHandlers.get(i); + try + { + handler.get(); + } + catch (WriteTimeoutException|WriteFailureException|RetryOnDifferentSystemException e) { logger.trace("Failed replaying a batched mutation to a node, will write a hint"); logger.trace("Failure was : {}", e.getMessage()); + // writing hints for the rest to hints, starting from i + writeHintsForUndeliveredEndpoints(i, hintedNodes); + break; } - // writing hints for the rest to hints, starting from i - writeHintsForUndeliveredEndpoints(i, hintedNodes); - return; } } + catch (Exception e) + { + logger.debug("Unexpected batchlog replay exception", e); + failure = Throwables.merge(failure, e); + } + + if (failure != null) + throw Throwables.unchecked(failure); } - private int addMutations(int version, List serializedMutations) throws IOException + private static int addMutations(List unsplitMutations, long writtenAt, int version, List serializedMutations) throws IOException { int ret = 0; for (ByteBuffer serializedMutation : serializedMutations) @@ -402,7 +507,7 @@ private int addMutations(int version, List serializedMutations) thro ret += serializedMutation.remaining(); try (DataInputBuffer in = new DataInputBuffer(serializedMutation, true)) { - addMutation(Mutation.serializer.deserialize(in, version)); + addMutation(unsplitMutations, writtenAt, Mutation.serializer.deserialize(in, version)); } } @@ -412,19 +517,41 @@ private int addMutations(int version, List serializedMutations) thro // Remove CFs that have been truncated since. writtenAt and SystemTable#getTruncatedAt() both return millis. // We don't abort the replay entirely b/c this can be considered a success (truncated is same as delivered then // truncated. - private void addMutation(Mutation mutation) + private static void addMutation(List unsplitMutations, long writtenAt, Mutation mutation) { for (TableId tableId : mutation.getTableIds()) if (writtenAt <= SystemKeyspace.getTruncatedAt(tableId)) mutation = mutation.without(tableId); - if (!mutation.isEmpty()) - mutations.add(mutation); + if (mutation != null) + unsplitMutations.add(mutation); + } + + // Write the hint assuming that when it is replayed it will probably be replayed + // as an Accord transaction so no reason to record per endpoint hints for all the endpoints + // Hints will still have to split and re-route on replay + private void writeHintsForUndeliveredAccordTxns(Set hintedNodes) + { + if (accordMutations == null) + return; + + int gcgs = gcgs(accordMutations); + + // expired + if (MILLISECONDS.toSeconds(writtenAt) + gcgs <= FBUtilities.nowInSeconds()) + return; + + for (Mutation m : accordMutations) + HintsService.instance.write(ImmutableList.of(RETRY_ON_DIFFERENT_SYSTEM_UUID), Hint.create(m, writtenAt)); + hintedNodes.add(RETRY_ON_DIFFERENT_SYSTEM_UUID); } private void writeHintsForUndeliveredEndpoints(int startFrom, Set hintedNodes) { - int gcgs = gcgs(mutations); + if (normalMutations == null) + return; + + int gcgs = gcgs(normalMutations); // expired if (MILLISECONDS.toSeconds(writtenAt) + gcgs <= FBUtilities.nowInSeconds()) @@ -434,7 +561,7 @@ private void writeHintsForUndeliveredEndpoints(int startFrom, Set hintedNo for (int i = startFrom; i < replayHandlers.size(); i++) { ReplayWriteResponseHandler handler = replayHandlers.get(i); - Mutation undeliveredMutation = mutations.get(i); + Mutation undeliveredMutation = normalMutations.get(i); if (handler != null) { diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index b8b4f3e33a9e..10cf8d6b1045 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -61,6 +61,7 @@ public enum CassandraRelevantProperties AUTOCOMPACTION_ON_STARTUP_ENABLED("cassandra.autocompaction_on_startup_enabled", "true"), AUTO_BOOTSTRAP("cassandra.auto_bootstrap"), AUTO_REPAIR_FREQUENCY_SECONDS("cassandra.auto_repair_frequency_seconds", convertToString(TimeUnit.MINUTES.toSeconds(5))), + BATCHLOG_REPLAY_INTERVAL_MS("cassandra.batchlog.replay_interval_ms", "10000"), BATCHLOG_REPLAY_TIMEOUT_IN_MS("cassandra.batchlog.replay_timeout_in_ms"), BATCH_COMMIT_LOG_SYNC_INTERVAL("cassandra.batch_commitlog_sync_interval_millis", "1000"), /** @@ -271,6 +272,8 @@ public enum CassandraRelevantProperties */ GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED("cassandra.gossip_settle_poll_success_required", "3"), + HINT_DISPATCH_INTERVAL_MS("cassandra.hint_dispatch_interval_ms", "10000"), + IGNORED_SCHEMA_CHECK_ENDPOINTS("cassandra.skip_schema_check_for_endpoints"), IGNORED_SCHEMA_CHECK_VERSIONS("cassandra.skip_schema_check_for_versions"), IGNORE_CORRUPTED_SCHEMA_TABLES("cassandra.ignore_corrupted_schema_tables"), diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 07227a40e971..9a2c680b47f2 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -80,6 +80,7 @@ import org.apache.cassandra.config.Config.DiskAccessMode; import org.apache.cassandra.config.Config.PaxosOnLinearizabilityViolation; import org.apache.cassandra.config.Config.PaxosStatePurging; +import org.apache.cassandra.config.DurationSpec.IntMillisecondsBound; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager; import org.apache.cassandra.db.commitlog.CommitLog; @@ -4047,6 +4048,11 @@ public static int getHintsFlushPeriodInMS() return conf.hints_flush_period.toMilliseconds(); } + public static void setHintsFlushPeriodInMS(int milliseconds) + { + conf.hints_flush_period = new IntMillisecondsBound(milliseconds); + } + public static long getMaxHintsFileSize() { return conf.max_hints_file_size.toBytesInLong(); diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java index 65c7f56662fb..45c433a5301c 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java @@ -354,7 +354,9 @@ public List getMutations(ClientState state, ClientWarn.instance.warn(MessageFormatter.arrayFormat(LOGGED_BATCH_LOW_GCGS_WARNING, new Object[] { suffix, tablesWithZeroGcGs }) .getMessage()); } - return collector.toMutations(state); + // local is either executeWithoutConditions modifying a virtual table (doesn't support txns) or executeLocal + // which is called by test or internal things that are bypassing distributed system modification/checks + return collector.toMutations(state, local); } /** diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java b/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java index aabcecec72f8..49b5a404dd04 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchUpdatesCollector.java @@ -137,14 +137,14 @@ private IMutationBuilder makeMutationBuilder(TableMetadata metadata, DecoratedKe * @return a collection containing all the mutations. */ @Override - public List toMutations(ClientState state) + public List toMutations(ClientState state, boolean allowPotentialTxnConflicts) { List ms = new ArrayList<>(); for (Map ksMap : mutationBuilders.values()) { for (IMutationBuilder builder : ksMap.values()) { - IMutation mutation = builder.build(); + IMutation mutation = builder.build(allowPotentialTxnConflicts); mutation.validateIndexedColumns(state); mutation.validateSize(MessagingService.current_version, CommitLogSegment.ENTRY_OVERHEAD_SIZE); ms.add(mutation); @@ -182,7 +182,7 @@ private interface IMutationBuilder /** * Build the immutable mutation */ - IMutation build(); + IMutation build(boolean allowPotentialTxnConflicts); /** * Get the builder for the given tableId @@ -215,7 +215,7 @@ public MutationBuilder add(PartitionUpdate.Builder updateBuilder) return this; } - public Mutation build() + public Mutation build(boolean allowPotentialTxnConflicts) { ImmutableMap.Builder updates = new ImmutableMap.Builder<>(); for (Map.Entry updateEntry : modifications.entrySet()) @@ -223,7 +223,7 @@ public Mutation build() PartitionUpdate update = updateEntry.getValue().build(); updates.put(updateEntry.getKey(), update); } - return new Mutation(keyspaceName, key, updates.build(), createdAt, false); + return new Mutation(keyspaceName, key, updates.build(), createdAt, allowPotentialTxnConflicts); } public PartitionUpdate.Builder get(TableId tableId) @@ -263,9 +263,9 @@ public IMutationBuilder add(PartitionUpdate.Builder builder) return mutationBuilder.add(builder); } - public IMutation build() + public IMutation build(boolean allowPotentialTxnConflicts) { - return new CounterMutation(mutationBuilder.build(), cl); + return new CounterMutation(mutationBuilder.build(allowPotentialTxnConflicts), cl); } public PartitionUpdate.Builder get(TableId id) @@ -297,7 +297,7 @@ public VirtualMutationBuilder add(PartitionUpdate.Builder builder) } @Override - public VirtualMutation build() + public VirtualMutation build(boolean allowPotentialTxnConflicts) { ImmutableMap.Builder updates = new ImmutableMap.Builder<>(); modifications.forEach((tableId, updateBuilder) -> updates.put(tableId, updateBuilder.build())); diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index bbfc333ca6f9..41967b619b86 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -511,10 +511,11 @@ public Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commi private Update createUpdate(ClientState clientState, ConsistencyLevel commitConsistencyLevel) { - // Potentially ignore commit consistency level if non-SERIAL write strategy is Accord + // Potentially ignore commit consistency level if TransactionalMode is full // since it is safe to match what non-SERIAL writes do commitConsistencyLevel = metadata.params.transactionalMode.commitCLForStrategy(commitConsistencyLevel); - return new TxnUpdate(createWriteFragments(clientState), createCondition(), commitConsistencyLevel); + // CAS requires using the new txn timestamp to correctly linearize some kinds of updates + return new TxnUpdate(createWriteFragments(clientState), createCondition(), commitConsistencyLevel, false); } private TxnCondition createCondition() diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index 19556a766b27..74a7bb98234f 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -44,10 +44,9 @@ import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.ColumnSpecification; -import org.apache.cassandra.cql3.Ordering; -import org.apache.cassandra.cql3.terms.Constants; import org.apache.cassandra.cql3.Operation; import org.apache.cassandra.cql3.Operations; +import org.apache.cassandra.cql3.Ordering; import org.apache.cassandra.cql3.QualifiedName; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; @@ -66,6 +65,7 @@ import org.apache.cassandra.cql3.selection.ResultSetBuilder; import org.apache.cassandra.cql3.selection.Selection; import org.apache.cassandra.cql3.selection.Selection.Selectors; +import org.apache.cassandra.cql3.terms.Constants; import org.apache.cassandra.cql3.transactions.ReferenceOperation; import org.apache.cassandra.db.CBuilder; import org.apache.cassandra.db.Clustering; @@ -869,14 +869,16 @@ public List getMutations(ClientState state, { SingleTableSinglePartitionUpdatesCollector collector = new SingleTableSinglePartitionUpdatesCollector(metadata, updatedColumns); addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); - return collector.toMutations(state); + // local means this is test or internal things that are bypassing distributed system modification/checks + return collector.toMutations(state, local); } else { HashMultiset perPartitionKeyCounts = HashMultiset.create(keys); SingleTableUpdatesCollector collector = new SingleTableUpdatesCollector(metadata, updatedColumns, perPartitionKeyCounts); addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); - return collector.toMutations(state); + // local means this is test or internal things that are bypassing distributed system modification/checks + return collector.toMutations(state, local); } } diff --git a/src/java/org/apache/cassandra/cql3/statements/SingleTableSinglePartitionUpdatesCollector.java b/src/java/org/apache/cassandra/cql3/statements/SingleTableSinglePartitionUpdatesCollector.java index c650ef0370ea..6664c09e2ce7 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SingleTableSinglePartitionUpdatesCollector.java +++ b/src/java/org/apache/cassandra/cql3/statements/SingleTableSinglePartitionUpdatesCollector.java @@ -78,16 +78,16 @@ public PartitionUpdate.Builder getPartitionUpdateBuilder(TableMetadata metadata, * Returns a collection containing all the mutations. */ @Override - public List toMutations(ClientState state) + public List toMutations(ClientState state, boolean allowPotentialTxnConflicts) { // it is possible that a modification statement does not create any mutations // for example: DELETE FROM some_table WHERE part_key = 1 AND clust_key < 3 AND clust_key > 5 if (builder == null) return Collections.emptyList(); - return Collections.singletonList(createMutation(state, builder)); + return Collections.singletonList(createMutation(state, builder, allowPotentialTxnConflicts)); } - private IMutation createMutation(ClientState state, PartitionUpdate.Builder builder) + private IMutation createMutation(ClientState state, PartitionUpdate.Builder builder, boolean allowPotentialTxnConflicts) { IMutation mutation; @@ -96,7 +96,7 @@ private IMutation createMutation(ClientState state, PartitionUpdate.Builder buil else if (metadata.isCounter()) mutation = new CounterMutation(new Mutation(builder.build()), counterConsistencyLevel); else - mutation = new Mutation(builder.build()); + mutation = new Mutation(builder.build(), allowPotentialTxnConflicts); mutation.validateIndexedColumns(state); mutation.validateSize(MessagingService.current_version, CommitLogSegment.ENTRY_OVERHEAD_SIZE); diff --git a/src/java/org/apache/cassandra/cql3/statements/SingleTableUpdatesCollector.java b/src/java/org/apache/cassandra/cql3/statements/SingleTableUpdatesCollector.java index 2da6b8918080..c2497360a71d 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SingleTableUpdatesCollector.java +++ b/src/java/org/apache/cassandra/cql3/statements/SingleTableUpdatesCollector.java @@ -95,24 +95,24 @@ public PartitionUpdate.Builder getPartitionUpdateBuilder(TableMetadata metadata, * @return a collection containing all the mutations. */ @Override - public List toMutations(ClientState state) + public List toMutations(ClientState state, boolean allowPotentialTxnConflicts) { if (puBuilders.size() == 1) { PartitionUpdate.Builder builder = puBuilders.values().iterator().next(); - return Collections.singletonList(createMutation(state, builder)); + return Collections.singletonList(createMutation(state, builder, allowPotentialTxnConflicts)); } List ms = new ArrayList<>(puBuilders.size()); for (PartitionUpdate.Builder builder : puBuilders.values()) { - IMutation mutation = createMutation(state, builder); + IMutation mutation = createMutation(state, builder, allowPotentialTxnConflicts); ms.add(mutation); } return ms; } - private IMutation createMutation(ClientState state, PartitionUpdate.Builder builder) + private IMutation createMutation(ClientState state, PartitionUpdate.Builder builder, boolean allowPotentialTxnConflicts) { IMutation mutation; @@ -121,7 +121,7 @@ private IMutation createMutation(ClientState state, PartitionUpdate.Builder buil else if (metadata.isCounter()) mutation = new CounterMutation(new Mutation(builder.build()), counterConsistencyLevel); else - mutation = new Mutation(builder.build()); + mutation = new Mutation(builder.build(), allowPotentialTxnConflicts); mutation.validateIndexedColumns(state); mutation.validateSize(MessagingService.current_version, CommitLogSegment.ENTRY_OVERHEAD_SIZE); diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 2adb5572e852..950eb8a6dad4 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -310,7 +310,7 @@ List createWriteFragments(ClientState state, QueryOptions opt AccordUpdate createUpdate(ClientState state, QueryOptions options, Map autoReads, Consumer keyConsumer) { - return new TxnUpdate(createWriteFragments(state, options, autoReads, keyConsumer), createCondition(options), null); + return new TxnUpdate(createWriteFragments(state, options, autoReads, keyConsumer), createCondition(options), null, false); } Keys toKeys(SortedSet keySet) diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdatesCollector.java b/src/java/org/apache/cassandra/cql3/statements/UpdatesCollector.java index 40b75ab5faba..de19a8e567fd 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdatesCollector.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdatesCollector.java @@ -30,5 +30,5 @@ public interface UpdatesCollector { PartitionUpdate.Builder getPartitionUpdateBuilder(TableMetadata metadata, DecoratedKey dk, ConsistencyLevel consistency); - List toMutations(ClientState state); + List toMutations(ClientState state, boolean allowPotentialTxnConflicts); } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index 2a8cca87fcac..a7abcbada0e0 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -62,6 +62,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; +import org.apache.cassandra.schema.TableParams.Option; import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.schema.Views; @@ -594,7 +595,10 @@ private TableParams validateAndUpdateTransactionalMigration(TableParams prev, Ta boolean modeChange = prev.transactionalMode != next.transactionalMode; boolean wasMigrating = prev.transactionalMigrationFrom.isMigrating(); - boolean forceMigrationChange = prev.transactionalMigrationFrom != next.transactionalMigrationFrom; + boolean explicitlySetMigrationFrom = attrs.hasOption(Option.TRANSACTIONAL_MIGRATION_FROM); + // set table to migrating + TransactionalMigrationFromMode newMigrateFrom = TransactionalMigrationFromMode.fromMode(prev.transactionalMode, next.transactionalMode); + boolean forceMigrationChange = modeChange && explicitlySetMigrationFrom && next.transactionalMigrationFrom != newMigrateFrom; if (modeChange && next.transactionalMode.accordIsEnabled && !DatabaseDescriptor.getAccordTransactionsEnabled()) throw ire(format("Cannot change transactional mode to %s for %s.%s with accord_transactions_enabled set to false", @@ -617,9 +621,7 @@ private TableParams validateAndUpdateTransactionalMigration(TableParams prev, Ta prev.transactionalMode, next.transactionalMode, keyspaceName, tableName)); - // set table to migrating - TransactionalMigrationFromMode migrateFrom = TransactionalMigrationFromMode.fromMode(prev.transactionalMode, next.transactionalMode); - return next.unbuild().transactionalMigrationFrom(migrateFrom).build(); + return next.unbuild().transactionalMigrationFrom(newMigrateFrom).build(); } diff --git a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java index 76b765ae7073..05512e5a8e3f 100644 --- a/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java +++ b/src/java/org/apache/cassandra/db/AbstractMutationVerbHandler.java @@ -27,18 +27,23 @@ import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.InvalidRoutingException; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.ownership.VersionedEndpoints; +import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.NoSpamLogger; +import static org.apache.cassandra.exceptions.RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; + public abstract class AbstractMutationVerbHandler implements IVerbHandler { private static final Logger logger = LoggerFactory.getLogger(AbstractMutationVerbHandler.class); @@ -57,7 +62,17 @@ protected void processMessage(Message message, InetAddressAndPort respondTo) metadata = checkTokenOwnership(metadata, message, respondTo); metadata = checkSchemaVersion(metadata, message, respondTo); } - applyMutation(message, respondTo); + + try + { + applyMutation(message, respondTo); + } + catch (RetryOnDifferentSystemException e) + { + logger.debug("Responding with retry on different system"); + MessagingService.instance().respondWithFailure(RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM, message); + Tracing.trace("Payload application resulted in RetryOnDifferentSysten"); + } } abstract void applyMutation(Message message, InetAddressAndPort respondToAddress); @@ -85,9 +100,7 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message } } - // Mutations may intentionally be sent against an older Epoch so out of range checking doesn't work - // and could cause data to not end up where it needs to be for future operations - if (!message.payload.allowsOutOfRangeMutations() && !forToken.get().containsSelf()) + if (!forToken.get().containsSelf()) { StorageService.instance.incOutOfRangeOperationCount(); Keyspace.open(message.payload.getKeyspaceName()).metric.outOfRangeTokenWrites.inc(); @@ -95,7 +108,7 @@ private ClusterMetadata checkTokenOwnership(ClusterMetadata metadata, Message throw InvalidRoutingException.forWrite(respondTo, key.getToken(), metadata.epoch, message.payload); } - if (!message.payload.allowsOutOfRangeMutations() && forToken.lastModified().isAfter(message.epoch())) + if (forToken.lastModified().isAfter(message.epoch())) { TCMMetrics.instance.coordinatorBehindPlacements.mark(); throw new CoordinatorBehindException(String.format("Routing is correct, but coordinator needs to catch-up at least to epoch %s to maintain consistency. Current coordinator epoch is %s", diff --git a/src/java/org/apache/cassandra/db/CassandraKeyspaceWriteHandler.java b/src/java/org/apache/cassandra/db/CassandraKeyspaceWriteHandler.java index ef9d0d137778..825ef12292cd 100644 --- a/src/java/org/apache/cassandra/db/CassandraKeyspaceWriteHandler.java +++ b/src/java/org/apache/cassandra/db/CassandraKeyspaceWriteHandler.java @@ -86,10 +86,10 @@ private CommitLogPosition addToCommitLog(Mutation mutation) Set ids = new HashSet<>(); for (PartitionUpdate update : mutation.getPartitionUpdates()) { - if (update.metadata().params.memtable.factory().writesShouldSkipCommitLog()) + if (!update.metadata().params.memtable.factory().writesShouldSkipCommitLog()) ids.add(update.metadata().id); } - mutation = mutation.without(ids); + mutation = mutation.filter(ids::contains); } } // Note: It may be a good idea to precalculate none/all for the set of all tables in the keyspace, diff --git a/src/java/org/apache/cassandra/db/CounterMutation.java b/src/java/org/apache/cassandra/db/CounterMutation.java index 14d7a010ffe2..6935b09452f0 100644 --- a/src/java/org/apache/cassandra/db/CounterMutation.java +++ b/src/java/org/apache/cassandra/db/CounterMutation.java @@ -18,10 +18,15 @@ package org.apache.cassandra.db; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; +import java.util.function.Predicate; import java.util.function.Supplier; +import javax.annotation.Nullable; import com.google.common.base.Function; import com.google.common.base.Objects; @@ -31,11 +36,16 @@ import com.google.common.util.concurrent.Striped; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.db.rows.*; -import org.apache.cassandra.db.filter.*; -import org.apache.cassandra.db.partitions.*; import org.apache.cassandra.db.context.CounterContext; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.Cell; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -44,10 +54,11 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.CacheService; import org.apache.cassandra.tracing.Tracing; -import org.apache.cassandra.utils.*; +import org.apache.cassandra.utils.CounterId; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.btree.BTreeSet; -import static java.util.concurrent.TimeUnit.*; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.net.MessagingService.VERSION_40; import static org.apache.cassandra.net.MessagingService.VERSION_50; import static org.apache.cassandra.net.MessagingService.VERSION_51; @@ -83,6 +94,12 @@ public Collection getPartitionUpdates() return mutation.getPartitionUpdates(); } + @Override + public boolean hasUpdateForTable(TableId tableId) + { + return mutation.hasUpdateForTable(tableId); + } + @Override public Supplier hintOnFailure() { @@ -156,6 +173,27 @@ public void apply() applyCounterMutation(); } + @Override + public @Nullable CounterMutation filter(Predicate test) + { + Mutation m = mutation.filter(test); + if (m == null) + return null; + if (m == mutation) + return this; + return new CounterMutation(m, consistency); + } + + /* + * Accord currently doesn't support interoperability with counters so no Accord transactions should read them + * anyways and it's safe to continue non-transactionally updating them + */ + @Override + public boolean allowsPotentialTransactionConflicts() + { + return true; + } + private void grabCounterLocks(Keyspace keyspace, List locks) throws WriteTimeoutException { long startTime = nanoTime(); diff --git a/src/java/org/apache/cassandra/db/IMutation.java b/src/java/org/apache/cassandra/db/IMutation.java index ba8d586deae3..1e77ee7fc878 100644 --- a/src/java/org/apache/cassandra/db/IMutation.java +++ b/src/java/org/apache/cassandra/db/IMutation.java @@ -19,7 +19,9 @@ import java.util.Collection; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.function.Supplier; +import javax.annotation.Nullable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.partitions.PartitionUpdate; @@ -37,6 +39,7 @@ public interface IMutation long getTimeout(TimeUnit unit); String toString(boolean shallow); Collection getPartitionUpdates(); + boolean hasUpdateForTable(TableId tableId); Supplier hintOnFailure(); default void validateIndexedColumns(ClientState state) @@ -71,8 +74,23 @@ static long dataSize(Collection mutations) return size; } - default boolean allowsOutOfRangeMutations() + /** + * True if this mutation is being applied by a transaction system or doesn't need to be + * and conflicts between this mutation and transactions systems that are managing all or part of this table + * should be assumed to be handled already (by either Paxos or Accord) and the mutation should be applied. + * + * This causes mutations against tables to fail if they are from a non-transaction sub-system such as mutations, + * logged and unlogged batches, hints, and read repair against tables that are being managed by a transaction system + * like Accord that can't safely read data that is written non-transactionally. + * + */ + default boolean allowsPotentialTransactionConflicts() { return false; } + + // Construct replacement mutation that is identical except it only includes updates for the specified tables + @Nullable IMutation filter(Predicate predicate); + + default void clearCachedSerializationsForRetry() {} } diff --git a/src/java/org/apache/cassandra/db/Keyspace.java b/src/java/org/apache/cassandra/db/Keyspace.java index 8b8cc9f2c912..f31662660bc5 100644 --- a/src/java/org/apache/cassandra/db/Keyspace.java +++ b/src/java/org/apache/cassandra/db/Keyspace.java @@ -47,8 +47,8 @@ import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.SecondaryIndexManager; -import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.AbstractReplicationStrategy; import org.apache.cassandra.metrics.KeyspaceMetrics; import org.apache.cassandra.repair.KeyspaceRepairManager; @@ -58,14 +58,15 @@ import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.OpOrder; -import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import org.apache.cassandra.utils.concurrent.Promise; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; @@ -545,6 +546,7 @@ else if (isDeferrable) } try (WriteContext ctx = getWriteHandler().beginWrite(mutation, makeDurable)) { + ConsensusMigrationMutationHelper.validateSafeToExecuteNonTransactionally(mutation); for (PartitionUpdate upd : mutation.getPartitionUpdates()) { ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id); diff --git a/src/java/org/apache/cassandra/db/Mutation.java b/src/java/org/apache/cassandra/db/Mutation.java index d8ded1eeb141..f7afb8d5680b 100644 --- a/src/java/org/apache/cassandra/db/Mutation.java +++ b/src/java/org/apache/cassandra/db/Mutation.java @@ -19,15 +19,17 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLongFieldUpdater; +import java.util.function.Predicate; import java.util.function.Supplier; +import javax.annotation.Nullable; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableCollection; @@ -54,6 +56,7 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.concurrent.Future; +import static com.google.common.base.Preconditions.checkState; import static org.apache.cassandra.net.MessagingService.VERSION_40; import static org.apache.cassandra.net.MessagingService.VERSION_50; import static org.apache.cassandra.net.MessagingService.VERSION_51; @@ -62,7 +65,8 @@ public class Mutation implements IMutation, Supplier { public static final MutationSerializer serializer = new MutationSerializer(); - public static final int ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG = 0x01; + public static final int ALLOW_POTENTIAL_TRANSACTION_CONFLICTS = 0x01; + // todo this is redundant // when we remove it, also restore SerializationsTest.testMutationRead to not regenerate new Mutations each test @@ -84,33 +88,41 @@ public class Mutation implements IMutation, Supplier private static final int SERIALIZATION_VERSION_COUNT = MessagingService.Version.values().length; // Contains serialized representations of this mutation. - // Note: there is no functionality to clear/remove serialized instances, because a mutation must never - // be modified (e.g. calling add(PartitionUpdate)) when it's being serialized. + // Note: The cached serializations can be cleared when CoordinatorBehindException is being retried private final Serialization[] cachedSerializations = new Serialization[SERIALIZATION_VERSION_COUNT]; /** @see CassandraRelevantProperties#CACHEABLE_MUTATION_SIZE_LIMIT */ private static final long CACHEABLE_MUTATION_SIZE_LIMIT = CassandraRelevantProperties.CACHEABLE_MUTATION_SIZE_LIMIT.getLong(); - private boolean allowOutOfRangeMutations; + // Paxos & Accord manage conflicts directly and needs to apply mutations to tables/ranges + // that are only safe to write to from a transaction system. + // Don't refuse to apply this mutation because it should go through a transaction system + // because it is being applied by one or in a context where transaction conflicts don't occur + private boolean allowPotentialTransactionConflicts; public Mutation(PartitionUpdate update) { - this(update.metadata().keyspace, update.partitionKey(), ImmutableMap.of(update.metadata().id, update), approxTime.now(), update.metadata().params.cdc); + this(update, false); + } + + public Mutation(PartitionUpdate update, boolean allowPotentialTransactionConflicts) + { + this(update.metadata().keyspace, update.partitionKey(), ImmutableMap.of(update.metadata().id, update), approxTime.now(), update.metadata().params.cdc, allowPotentialTransactionConflicts); } - public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean allowOutOfRangeMutations) + public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean allowPotentialTransactionConflicts) { - this(keyspaceName, key, modifications, approxCreatedAtNanos, cdcEnabled(modifications.values()), allowOutOfRangeMutations); + this(keyspaceName, key, modifications, approxCreatedAtNanos, cdcEnabled(modifications.values()), allowPotentialTransactionConflicts); } - public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean cdcEnabled, boolean allowOutOfRangeMutations) + public Mutation(String keyspaceName, DecoratedKey key, ImmutableMap modifications, long approxCreatedAtNanos, boolean cdcEnabled, boolean allowPotentialTransactionConflicts) { this.keyspaceName = keyspaceName; this.key = key; this.modifications = modifications; this.cdcEnabled = cdcEnabled; this.approxCreatedAtNanos = approxCreatedAtNanos; - this.allowOutOfRangeMutations = allowOutOfRangeMutations; + this.allowPotentialTransactionConflicts = allowPotentialTransactionConflicts; } private static boolean cdcEnabled(Iterable modifications) @@ -121,26 +133,35 @@ private static boolean cdcEnabled(Iterable modifications) return cdc; } - public Mutation without(Set tableIds) + @Override + public @Nullable Mutation filter(Predicate predicate) { - if (tableIds.isEmpty()) + boolean allMatch = true; + boolean noneMatch = true; + for (TableId tableId : modifications.keySet()) + { + boolean test = predicate.test(tableId); + allMatch &= test; + noneMatch &= !test; + } + if (allMatch) return this; + if (noneMatch) + return null; ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); for (Map.Entry update : modifications.entrySet()) - { - if (!tableIds.contains(update.getKey())) - { + if (predicate.test(update.getKey())) builder.put(update); - } - } - return new Mutation(keyspaceName, key, builder.build(), approxCreatedAtNanos, allowOutOfRangeMutations); + Map updates = builder.build(); + checkState(!updates.isEmpty(), "Updates should not be empty"); + return new Mutation(keyspaceName, key, builder.build(), approxCreatedAtNanos, allowPotentialTransactionConflicts); } - public Mutation without(TableId tableId) + public @Nullable Mutation without(TableId tableId) { - return without(Collections.singleton(tableId)); + return filter(otherTableId -> !tableId.equals(otherTableId)); } public String getKeyspaceName() @@ -168,6 +189,12 @@ public long getApproxCreatedAtNanos() return approxCreatedAtNanos; } + @Override + public boolean hasUpdateForTable(TableId tableId) + { + return modifications.containsKey(tableId); + } + @Override public Supplier hintOnFailure() { @@ -215,18 +242,18 @@ public static Mutation merge(Collection mutations) { assert !mutations.isEmpty(); - if (mutations.size() == ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG) + if (mutations.size() == 1) return mutations.iterator().next(); Set updatedTables = new HashSet<>(); String ks = null; DecoratedKey key = null; - Boolean allowOutOfRangeMutations = null; + Boolean allowPotentialTransactionConflicts = null; for (Mutation mutation : mutations) { - if (allowOutOfRangeMutations != null && allowOutOfRangeMutations != mutation.allowOutOfRangeMutations) - throw new IllegalArgumentException("Can't merge mutations with differing policies on allowing out of range mutations"); - allowOutOfRangeMutations = mutation.allowOutOfRangeMutations; + if (allowPotentialTransactionConflicts != null && allowPotentialTransactionConflicts != mutation.allowPotentialTransactionConflicts) + throw new IllegalArgumentException("Can't merge mutations with differing policies on allowing potential transaction conflicts"); + allowPotentialTransactionConflicts = mutation.allowPotentialTransactionConflicts; updatedTables.addAll(mutation.modifications.keySet()); if (ks != null && !ks.equals(mutation.keyspaceName)) throw new IllegalArgumentException(); @@ -250,10 +277,10 @@ public static Mutation merge(Collection mutations) if (updates.isEmpty()) continue; - modifications.put(table, updates.size() == ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG ? updates.get(0) : PartitionUpdate.merge(updates)); + modifications.put(table, updates.size() == 1 ? updates.get(0) : PartitionUpdate.merge(updates)); updates.clear(); } - return new Mutation(ks, key, modifications.build(), approxTime.now(), allowOutOfRangeMutations); + return new Mutation(ks, key, modifications.build(), approxTime.now(), allowPotentialTransactionConflicts); } public Future applyFuture() @@ -310,25 +337,26 @@ public boolean trackedByCDC() return cdcEnabled; } - public Mutation allowOutOfRangeMutations() + public void allowPotentialTransactionConflicts() { - allowOutOfRangeMutations = true; - return this; + allowPotentialTransactionConflicts = true; + Arrays.fill(cachedSerializations, null); } - public boolean allowsOutOfRangeMutations() + @Override + public boolean allowsPotentialTransactionConflicts() { - return allowOutOfRangeMutations; + return allowPotentialTransactionConflicts; } - private static int allowsOutOfRangeMutationsFlag(boolean allowOutOfRangeMutations) + private static int allowPotentialTransactionConflictsFlag(boolean allowPotentialTransactionConflicts) { - return allowOutOfRangeMutations ? ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG : 0; + return allowPotentialTransactionConflicts ? ALLOW_POTENTIAL_TRANSACTION_CONFLICTS : 0; } - private static boolean allowsOutOfRangeMutations(int flags) + public static boolean allowPotentialTransactionConflicts(int flags) { - return (flags & ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG) != 0; + return (flags & ALLOW_POTENTIAL_TRANSACTION_CONFLICTS) != 0; } public String toString() @@ -336,6 +364,12 @@ public String toString() return toString(false); } + @Override + public void clearCachedSerializationsForRetry() + { + Arrays.fill(cachedSerializations, null); + } + public String toString(boolean shallow) { StringBuilder buff = new StringBuilder("Mutation("); @@ -404,6 +438,13 @@ public static SimpleBuilder simpleBuilder(String keyspaceName, DecoratedKey part */ public interface SimpleBuilder { + /** + * Assume any potential transaction conflicts that might occur by applying this mutation are already + * being handled by the caller + * @return this builder + */ + public SimpleBuilder allowPotentialTransactionConflicts(); + /** * Sets the timestamp to use for the following additions to this builder or any derived (update or row) builder. * @@ -517,7 +558,11 @@ static void serializeInternal(PartitionUpdate.PartitionUpdateSerializer serializ Map modifications = mutation.modifications; if (version >= VERSION_51) - out.write(allowsOutOfRangeMutationsFlag(mutation.allowsOutOfRangeMutations())); + { + int flags = 0; + flags |= allowPotentialTransactionConflictsFlag(mutation.allowPotentialTransactionConflicts); + out.write(flags); + } /* serialize the modifications in the mutation */ int size = modifications.size(); @@ -538,11 +583,11 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper { teeIn = new TeeDataInputPlus(in, dob, CACHEABLE_MUTATION_SIZE_LIMIT); - boolean allowsOutOfRangeMutations = false; + boolean allowPotentialTransactionConflicts = false; if (version >= VERSION_51) { int flags = teeIn.readByte(); - allowsOutOfRangeMutations = allowsOutOfRangeMutations(flags); + allowPotentialTransactionConflicts = allowPotentialTransactionConflicts(flags); } int size = teeIn.readUnsignedVInt32(); assert size > 0; @@ -550,7 +595,7 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper PartitionUpdate update = PartitionUpdate.serializer.deserialize(teeIn, version, flag); if (size == 1) { - m = new Mutation(update); + m = new Mutation(update, allowPotentialTransactionConflicts); } else { @@ -563,7 +608,7 @@ public Mutation deserialize(DataInputPlus in, int version, DeserializationHelper update = PartitionUpdate.serializer.deserialize(teeIn, version, flag); modifications.put(update.metadata().id, update); } - m = new Mutation(update.metadata().keyspace, dk, modifications.build(), approxTime.now(), allowsOutOfRangeMutations); + m = new Mutation(update.metadata().keyspace, dk, modifications.build(), approxTime.now(), allowPotentialTransactionConflicts); } //Only cache serializations that don't hit the limit @@ -642,7 +687,7 @@ long serializedSize(PartitionUpdate.PartitionUpdateSerializer serializer, Mutati if (size == 0L) { if (version >= VERSION_51) - size += TypeSizes.sizeof((byte)ALLOW_OUT_OF_RANGE_MUTATIONS_FLAG); // flags + size += TypeSizes.sizeof((byte)ALLOW_POTENTIAL_TRANSACTION_CONFLICTS); // flags size += TypeSizes.sizeofUnsignedVInt(mutation.modifications.size()); for (PartitionUpdate partitionUpdate : mutation.modifications.values()) size += serializer.serializedSize(partitionUpdate, version); @@ -663,10 +708,18 @@ public static class PartitionUpdateCollector private final long approxCreatedAtNanos = approxTime.now(); private boolean empty = true; + private boolean allowPotentialTransactionConflicts; + public PartitionUpdateCollector(String keyspaceName, DecoratedKey key) + { + this(keyspaceName, key, false); + } + + public PartitionUpdateCollector(String keyspaceName, DecoratedKey key, boolean allowPotentialTransactionConflicts) { this.keyspaceName = keyspaceName; this.key = key; + this.allowPotentialTransactionConflicts = allowPotentialTransactionConflicts; } public PartitionUpdateCollector add(PartitionUpdate partitionUpdate) @@ -700,7 +753,7 @@ public boolean isEmpty() public Mutation build() { - return new Mutation(keyspaceName, key, modifications.build(), approxCreatedAtNanos, false); + return new Mutation(keyspaceName, key, modifications.build(), approxCreatedAtNanos, allowPotentialTransactionConflicts); } } } diff --git a/src/java/org/apache/cassandra/db/SimpleBuilders.java b/src/java/org/apache/cassandra/db/SimpleBuilders.java index 3564eb1f100a..af13cb912691 100644 --- a/src/java/org/apache/cassandra/db/SimpleBuilders.java +++ b/src/java/org/apache/cassandra/db/SimpleBuilders.java @@ -18,21 +18,30 @@ package org.apache.cassandra.db; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.context.CounterContext; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CollectionType; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.BTreeRow; import org.apache.cassandra.db.rows.BufferCell; import org.apache.cassandra.db.rows.Cell; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.Row; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.CounterId; import org.apache.cassandra.utils.FBUtilities; @@ -111,12 +120,20 @@ public static class MutationBuilder extends AbstractBuilder updateBuilders = new HashMap<>(); + private boolean allowPotentialTransactionConflicts = false; + public MutationBuilder(String keyspaceName, DecoratedKey key) { this.keyspaceName = keyspaceName; this.key = key; } + public MutationBuilder allowPotentialTransactionConflicts() + { + allowPotentialTransactionConflicts = true; + return this; + } + public PartitionUpdate.SimpleBuilder update(TableMetadata metadata) { assert metadata.keyspace.equals(keyspaceName); @@ -145,9 +162,9 @@ public Mutation build() assert !updateBuilders.isEmpty() : "Cannot create empty mutation"; if (updateBuilders.size() == 1) - return new Mutation(updateBuilders.values().iterator().next().build()); + return new Mutation(updateBuilders.values().iterator().next().build(), allowPotentialTransactionConflicts); - Mutation.PartitionUpdateCollector mutationBuilder = new Mutation.PartitionUpdateCollector(keyspaceName, key); + Mutation.PartitionUpdateCollector mutationBuilder = new Mutation.PartitionUpdateCollector(keyspaceName, key, allowPotentialTransactionConflicts); for (PartitionUpdateBuilder builder : updateBuilders.values()) mutationBuilder.add(builder.build()); return mutationBuilder.build(); diff --git a/src/java/org/apache/cassandra/db/virtual/VirtualMutation.java b/src/java/org/apache/cassandra/db/virtual/VirtualMutation.java index 8c3b5b4afda6..f07e0e3282b7 100644 --- a/src/java/org/apache/cassandra/db/virtual/VirtualMutation.java +++ b/src/java/org/apache/cassandra/db/virtual/VirtualMutation.java @@ -19,7 +19,9 @@ import java.util.Collection; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.function.Supplier; +import javax.annotation.Nullable; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableMap; @@ -107,6 +109,12 @@ public Collection getPartitionUpdates() return modifications.values(); } + @Override + public boolean hasUpdateForTable(TableId tableId) + { + return modifications.containsKey(tableId); + } + @Override public Supplier hintOnFailure() { @@ -123,4 +131,19 @@ public void validateSize(int version, int overhead) { // no-op } + + @Override + public @Nullable VirtualMutation filter(Predicate test) + { + throw new UnsupportedOperationException(); + } + + /* + * Accord doesn't support reading/writing virtual tables yet so updating them non-transactionally is always safe + */ + @Override + public boolean allowsPotentialTransactionConflicts() + { + return true; + } } diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailure.java b/src/java/org/apache/cassandra/exceptions/RequestFailure.java index b1dbbd8e743c..2b2282731a87 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailure.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailure.java @@ -35,7 +35,10 @@ /** * Allow inclusion of a serialized exception in failure response messages * This continues to use the same verb as the old failure response (whether a message payload or parameter) - * and has a nullable failure field that may contain a serialized in later versions. + * and has a nullable failure field that may contain a serialized exception in later versions. + * + * It's important to note RequestFailure is not a singleton for each type, unlike RequestFailureReason, + * since it might include a stack trace so don't compare using identity. */ public class RequestFailure { @@ -50,6 +53,7 @@ public class RequestFailure public static final RequestFailure INDEX_NOT_AVAILABLE = new RequestFailure(RequestFailureReason.INDEX_NOT_AVAILABLE); public static final RequestFailure COORDINATOR_BEHIND = new RequestFailure(RequestFailureReason.COORDINATOR_BEHIND); public static final RequestFailure READ_TOO_MANY_INDEXES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_INDEXES); + public static final RequestFailure RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM = new RequestFailure(RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM); static { @@ -114,6 +118,12 @@ public static RequestFailure forException(Throwable t) if (t instanceof InvalidRoutingException) return INVALID_ROUTING; + if (t instanceof RetryOnDifferentSystemException) + return RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; + + if (t instanceof CoordinatorBehindException) + return COORDINATOR_BEHIND; + return UNKNOWN; } @@ -133,6 +143,7 @@ public static RequestFailure forReason(RequestFailureReason reason) case INDEX_NOT_AVAILABLE: return INDEX_NOT_AVAILABLE; case COORDINATOR_BEHIND: return COORDINATOR_BEHIND; case READ_TOO_MANY_INDEXES: return READ_TOO_MANY_INDEXES; + case RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM: return RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; } } diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java index 560b8d68e0ad..600cdebf867b 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java @@ -34,21 +34,23 @@ public enum RequestFailureReason { - UNKNOWN (0), - READ_TOO_MANY_TOMBSTONES (1), - TIMEOUT (2), - INCOMPATIBLE_SCHEMA (3), - READ_SIZE (4), + UNKNOWN (0), + READ_TOO_MANY_TOMBSTONES (1), + TIMEOUT (2), + INCOMPATIBLE_SCHEMA (3), + READ_SIZE (4), // below reason is only logged, but it does not have associated exception - NODE_DOWN (5), - INDEX_NOT_AVAILABLE (6), + NODE_DOWN (5), + INDEX_NOT_AVAILABLE (6), // below reason does not have an associated exception - READ_TOO_MANY_INDEXES (7), - NOT_CMS (8), - INVALID_ROUTING (9), - COORDINATOR_BEHIND (10), + READ_TOO_MANY_INDEXES (7), + NOT_CMS (8), + INVALID_ROUTING (9), + COORDINATOR_BEHIND (10), // The following codes have been ported from an external fork, where they were offset explicitly to avoid conflicts. - INDEX_BUILD_IN_PROGRESS (503); + INDEX_BUILD_IN_PROGRESS (503), + RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM (504), + ; static { diff --git a/src/java/org/apache/cassandra/exceptions/RetryOnDifferentSystemException.java b/src/java/org/apache/cassandra/exceptions/RetryOnDifferentSystemException.java new file mode 100644 index 000000000000..e0ca033a74e4 --- /dev/null +++ b/src/java/org/apache/cassandra/exceptions/RetryOnDifferentSystemException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.exceptions; + +/** + * Thrown when a non-transactional operation is attempted when the operation needs to be done transactionally (or vice versa) + * and it could interfere with operations performed transactionally or can't be applied by the chosen transaction system. + * + * The correct way to handle this is to forward the error the originator of the operation who can then retry it on + * the correct system. + */ +public class RetryOnDifferentSystemException extends RuntimeException +{ +} diff --git a/src/java/org/apache/cassandra/hints/HintDiagnostics.java b/src/java/org/apache/cassandra/hints/HintDiagnostics.java index 285193b8a878..67e1f7380991 100644 --- a/src/java/org/apache/cassandra/hints/HintDiagnostics.java +++ b/src/java/org/apache/cassandra/hints/HintDiagnostics.java @@ -37,44 +37,44 @@ static void dispatcherCreated(HintsDispatcher dispatcher) { if (isEnabled(HintEventType.DISPATCHER_CREATED)) service.publish(new HintEvent(HintEventType.DISPATCHER_CREATED, dispatcher, - dispatcher.hostId, dispatcher.address, null, null, null, null)); + dispatcher.hostId, dispatcher.address, null, null, null, null, null)); } static void dispatcherClosed(HintsDispatcher dispatcher) { if (isEnabled(HintEventType.DISPATCHER_CLOSED)) service.publish(new HintEvent(HintEventType.DISPATCHER_CLOSED, dispatcher, - dispatcher.hostId, dispatcher.address, null, null, null, null)); + dispatcher.hostId, dispatcher.address, null, null, null, null, null)); } static void dispatchPage(HintsDispatcher dispatcher) { if (isEnabled(HintEventType.DISPATCHER_PAGE)) service.publish(new HintEvent(HintEventType.DISPATCHER_PAGE, dispatcher, - dispatcher.hostId, dispatcher.address, null, null, null, null)); + dispatcher.hostId, dispatcher.address, null, null, null, null, null)); } static void abortRequested(HintsDispatcher dispatcher) { if (isEnabled(HintEventType.ABORT_REQUESTED)) service.publish(new HintEvent(HintEventType.ABORT_REQUESTED, dispatcher, - dispatcher.hostId, dispatcher.address, null, null, null, null)); + dispatcher.hostId, dispatcher.address, null, null, null, null, null)); } - static void pageSuccessResult(HintsDispatcher dispatcher, long success, long failures, long timeouts) + static void pageSuccessResult(HintsDispatcher dispatcher, long success, long failures, long timeouts, long retryDifferentSystem) { if (isEnabled(HintEventType.DISPATCHER_HINT_RESULT)) service.publish(new HintEvent(HintEventType.DISPATCHER_HINT_RESULT, dispatcher, dispatcher.hostId, dispatcher.address, HintResult.PAGE_SUCCESS, - success, failures, timeouts)); + success, failures, timeouts, retryDifferentSystem)); } - static void pageFailureResult(HintsDispatcher dispatcher, long success, long failures, long timeouts) + static void pageFailureResult(HintsDispatcher dispatcher, long success, long failures, long timeouts, long retryDifferentSystem) { if (isEnabled(HintEventType.DISPATCHER_HINT_RESULT)) service.publish(new HintEvent(HintEventType.DISPATCHER_HINT_RESULT, dispatcher, dispatcher.hostId, dispatcher.address, HintResult.PAGE_FAILURE, - success, failures, timeouts)); + success, failures, timeouts, retryDifferentSystem)); } private static boolean isEnabled(HintEventType type) diff --git a/src/java/org/apache/cassandra/hints/HintEvent.java b/src/java/org/apache/cassandra/hints/HintEvent.java index 695357e9b63c..d709fbcd8288 100644 --- a/src/java/org/apache/cassandra/hints/HintEvent.java +++ b/src/java/org/apache/cassandra/hints/HintEvent.java @@ -64,10 +64,13 @@ enum HintResult private final Long pageHintsFailed; @Nullable private final Long pageHintsTimeout; + @Nullable + private final Long pageHintsRetryDifferentSystem; HintEvent(HintEventType type, HintsDispatcher dispatcher, UUID targetHostId, InetAddressAndPort targetAddress, @Nullable HintResult dispatchResult, @Nullable Long pageHintsSuccessful, - @Nullable Long pageHintsFailed, @Nullable Long pageHintsTimeout) + @Nullable Long pageHintsFailed, @Nullable Long pageHintsTimeout, + @Nullable Long pageHintsRetryDifferentSystem) { this.type = type; this.dispatcher = dispatcher; @@ -77,6 +80,7 @@ enum HintResult this.pageHintsSuccessful = pageHintsSuccessful; this.pageHintsFailed = pageHintsFailed; this.pageHintsTimeout = pageHintsTimeout; + this.pageHintsRetryDifferentSystem = pageHintsRetryDifferentSystem; } public Enum getType() @@ -96,6 +100,7 @@ public HashMap toMap() ret.put("hint.page.hints_succeeded", pageHintsSuccessful); ret.put("hint.page.hints_failed", pageHintsFailed); ret.put("hint.page.hints_timed_out", pageHintsTimeout); + ret.put("hint.page.hints_retry_different_system", pageHintsRetryDifferentSystem); } return ret; } diff --git a/src/java/org/apache/cassandra/hints/HintVerbHandler.java b/src/java/org/apache/cassandra/hints/HintVerbHandler.java index 73e6967e398e..c91219d0d5d2 100644 --- a/src/java/org/apache/cassandra/hints/HintVerbHandler.java +++ b/src/java/org/apache/cassandra/hints/HintVerbHandler.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -34,6 +35,8 @@ import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.NodeId; +import static org.apache.cassandra.exceptions.RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; + /** * Verb handler used both for hint dispatch and streaming. * @@ -100,8 +103,15 @@ else if (!StorageProxy.instance.appliesLocally(hint.mutation)) } else { - // the common path - the node is both the destination and a valid replica for the hint. - hint.applyFuture().addCallback(o -> respond(message), e -> logger.debug("Failed to apply hint", e)); + try + { + // the common path - the node is both the destination and a valid replica for the hint. + hint.applyFuture().addCallback(o -> respond(message), e -> logger.debug("Failed to apply hint", e)); + } + catch (RetryOnDifferentSystemException e) + { + MessagingService.instance().respondWithFailure(RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM, message); + } } } diff --git a/src/java/org/apache/cassandra/hints/HintsBufferPool.java b/src/java/org/apache/cassandra/hints/HintsBufferPool.java index 275dbc37e624..f6a0c2b4606b 100644 --- a/src/java/org/apache/cassandra/hints/HintsBufferPool.java +++ b/src/java/org/apache/cassandra/hints/HintsBufferPool.java @@ -21,6 +21,8 @@ import java.util.UUID; import java.util.concurrent.BlockingQueue; +import com.google.common.annotations.VisibleForTesting; + import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -133,6 +135,13 @@ private HintsBuffer createBuffer() return HintsBuffer.create(bufferSize); } + @VisibleForTesting + public void clearUnsafe() + { + if (currentBuffer != null) + currentBuffer = currentBuffer.recycle(); + } + public void close() { currentBuffer.free(); diff --git a/src/java/org/apache/cassandra/hints/HintsCatalog.java b/src/java/org/apache/cassandra/hints/HintsCatalog.java index e989850dff98..902b239dec87 100644 --- a/src/java/org/apache/cassandra/hints/HintsCatalog.java +++ b/src/java/org/apache/cassandra/hints/HintsCatalog.java @@ -20,7 +20,11 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.*; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Stream; import javax.annotation.Nullable; @@ -30,10 +34,10 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.FSReadError; import org.apache.cassandra.io.FSWriteError; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.utils.NativeLibrary; import org.apache.cassandra.utils.SyncUtil; @@ -110,6 +114,11 @@ HintsStore getNullable(UUID hostId) return stores.get(hostId); } + void deleteAllHintsUnsafe() + { + stores.values().forEach(HintsStore::deleteAllHintsUnsafe); + } + /** * Delete all hints for all host ids. * diff --git a/src/java/org/apache/cassandra/hints/HintsDispatchExecutor.java b/src/java/org/apache/cassandra/hints/HintsDispatchExecutor.java index 5a566e363980..5777062db97d 100644 --- a/src/java/org/apache/cassandra/hints/HintsDispatchExecutor.java +++ b/src/java/org/apache/cassandra/hints/HintsDispatchExecutor.java @@ -26,9 +26,9 @@ import java.util.function.BooleanSupplier; import java.util.function.Predicate; import java.util.function.Supplier; +import javax.annotation.Nullable; import com.google.common.util.concurrent.RateLimiter; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,10 +39,11 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.hints.HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID; /** * A multi-threaded (by default) executor for dispatching hints. @@ -273,7 +274,7 @@ private boolean dispatch(HintsDescriptor descriptor) logger.trace("Dispatching hints file {}", descriptor.hintsFileName); InetAddressAndPort address = StorageService.instance.getEndpointForHostId(hostId); - if (address != null) + if (address != null || hostId == RETRY_ON_DIFFERENT_SYSTEM_UUID) return deliver(descriptor, address); // address == null means the target no longer exist; find new home for each hint entry. @@ -281,12 +282,12 @@ private boolean dispatch(HintsDescriptor descriptor) return true; } - private boolean deliver(HintsDescriptor descriptor, InetAddressAndPort address) + private boolean deliver(HintsDescriptor descriptor, @Nullable InetAddressAndPort address) { File file = descriptor.file(hintsDirectory); InputPosition offset = store.getDispatchOffset(descriptor); - BooleanSupplier shouldAbort = () -> !isAlive.test(address) || isPaused.get(); + BooleanSupplier shouldAbort = () -> (!hostId.equals(RETRY_ON_DIFFERENT_SYSTEM_UUID) && (address == null || !isAlive.test(address)) || isPaused.get()); try (HintsDispatcher dispatcher = HintsDispatcher.create(file, rateLimiter, address, descriptor.hostId, shouldAbort)) { if (offset != null) @@ -298,7 +299,7 @@ private boolean deliver(HintsDescriptor descriptor, InetAddressAndPort address) { store.delete(descriptor); store.cleanUp(descriptor); - logger.info("Finished hinted handoff of file {} to endpoint {}: {}", descriptor.fileName(), address, hostId); + logger.info("Finished hinted handoff of file {} to destination {}: {}", descriptor.fileName(), dispatcher.destination(), hostId); return true; } else @@ -322,7 +323,7 @@ private void handleDispatchFailure(HintsDispatcher dispatcher, HintsDescriptor d { store.markDispatchOffset(descriptor, dispatcher.dispatchPosition()); store.offerFirst(descriptor); - logger.info("Finished hinted handoff of file {} to endpoint {}: {}, partially", descriptor.fileName(), address, hostId); + logger.info("Finished hinted handoff of file {} to destination {}: {}, partially", descriptor.fileName(), dispatcher.destination(), hostId); } // for each hint in the hints file for a node that isn't part of the ring anymore, write RF hints for each replica diff --git a/src/java/org/apache/cassandra/hints/HintsDispatcher.java b/src/java/org/apache/cassandra/hints/HintsDispatcher.java index ce1f7282a6d7..6022f284df9a 100644 --- a/src/java/org/apache/cassandra/hints/HintsDispatcher.java +++ b/src/java/org/apache/cassandra/hints/HintsDispatcher.java @@ -19,31 +19,64 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.BitSet; import java.util.Collection; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.function.BooleanSupplier; import java.util.function.Function; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import com.google.common.util.concurrent.RateLimiter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.concurrent.DebuggableTask.RunnableDebuggableTask; +import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.Mutation; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; import org.apache.cassandra.metrics.HintsServiceMetrics; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.RequestCallback; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; +import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitMutation; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.ownership.VersionedEndpoints; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.MonotonicClock; +import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.concurrent.Condition; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.FAILURE; import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.INTERRUPTED; +import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.RETRY_DIFFERENT_SYSTEM; import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.SUCCESS; import static org.apache.cassandra.hints.HintsDispatcher.Callback.Outcome.TIMEOUT; +import static org.apache.cassandra.hints.HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID; +import static org.apache.cassandra.metrics.HintsServiceMetrics.ACCORD_HINT_ENDPOINT; import static org.apache.cassandra.metrics.HintsServiceMetrics.updateDelayMetrics; import static org.apache.cassandra.net.Verb.HINT_REQ; +import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; @@ -56,19 +89,28 @@ final class HintsDispatcher implements AutoCloseable { private static final Logger logger = LoggerFactory.getLogger(HintsDispatcher.class); + private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(logger, 1, TimeUnit.MINUTES); private enum Action { CONTINUE, ABORT } private final HintsReader reader; final UUID hostId; + + @Nullable final InetAddressAndPort address; private final int messagingVersion; private final BooleanSupplier abortRequested; private InputPosition currentPagePosition; - private HintsDispatcher(HintsReader reader, UUID hostId, InetAddressAndPort address, int messagingVersion, BooleanSupplier abortRequested) + // Hints from the batch log that were attempted on Accord don't have a list of hosts that need hinting + // since Accord doesn't expose that on failure. If Accord no longer manages the range for this hint then we need + // to send the hint to all replicas after the page succeeds + private final Queue hintsNeedingRehinting = new LinkedList<>(); + + private HintsDispatcher(HintsReader reader, UUID hostId, @Nullable InetAddressAndPort address, int messagingVersion, BooleanSupplier abortRequested) { + checkArgument(address != null ^ hostId.equals(RETRY_ON_DIFFERENT_SYSTEM_UUID), "address must be nonnull or hostId must be " + RETRY_ON_DIFFERENT_SYSTEM_UUID); currentPagePosition = null; this.reader = reader; @@ -78,9 +120,9 @@ private HintsDispatcher(HintsReader reader, UUID hostId, InetAddressAndPort addr this.abortRequested = abortRequested; } - static HintsDispatcher create(File file, RateLimiter rateLimiter, InetAddressAndPort address, UUID hostId, BooleanSupplier abortRequested) + static HintsDispatcher create(File file, RateLimiter rateLimiter, @Nullable InetAddressAndPort address, UUID hostId, BooleanSupplier abortRequested) { - int messagingVersion = MessagingService.instance().versions.get(address); + int messagingVersion = address == null ? MessagingService.current_version : MessagingService.instance().versions.get(address); HintsDispatcher dispatcher = new HintsDispatcher(HintsReader.open(file, rateLimiter), hostId, address, messagingVersion, abortRequested); HintDiagnostics.dispatcherCreated(dispatcher); return dispatcher; @@ -92,6 +134,11 @@ public void close() reader.close(); } + String destination() + { + return address == null ? "RETRY_ON_DIFFERENT_SYSTEM" : address.toString(); + } + void seek(InputPosition position) { reader.seek(position); @@ -130,7 +177,19 @@ private Action dispatch(HintsReader.Page page) private Action sendHintsAndAwait(HintsReader.Page page) { - Collection callbacks = new ArrayList<>(); + try + { + return doSendHintsAndAwait(page, null); + } + finally + { + hintsNeedingRehinting.clear(); + } + } + + private Action doSendHintsAndAwait(HintsReader.Page page, @Nullable BitSet hintsFilter) + { + List callbacks = new ArrayList<>(); /* * If hints file messaging version matches the version of the target host, we'll use the optimised path - @@ -138,50 +197,135 @@ private Action sendHintsAndAwait(HintsReader.Page page) * * If that is not the case, we'll need to perform conversion to a newer (or an older) format, and decoding the hint * is an unavoidable intermediate step. + * + * If these hints are from the batchlog and were originally attempted on Accord then + * we also need to decode so we can route the Hint contents appropriately. + * + * If filtering of hints is requested, because this is retrying a page that had some retry on different system + * errors, then also don't go down the sendEncodedHints path since it won't re-route the mutation and will trigger + * the same retry on different system error. */ - Action action = reader.descriptor().messagingVersion() == messagingVersion - ? sendHints(page.buffersIterator(), callbacks, this::sendEncodedHint) - : sendHints(page.hintsIterator(), callbacks, this::sendHint); + boolean isBatchLogHints = hostId.equals(RETRY_ON_DIFFERENT_SYSTEM_UUID); + boolean sendEncodedHints = reader.descriptor().messagingVersion() == messagingVersion && !isBatchLogHints && hintsFilter == null; + // If the hints filter is set then splitting the hints is needed and encoded hints can't do that + checkState(!sendEncodedHints || hintsFilter == null, "Should not send encoded hints if hints filter is set"); + Action action = sendEncodedHints + ? sendHints(page.buffersIterator(), null, callbacks, this::sendEncodedHint) + : sendHints(page.hintsIterator(), hintsFilter, callbacks, this::sendHint); if (action == Action.ABORT) return action; - long success = 0, failures = 0, timeouts = 0; - for (Callback cb : callbacks) + BitSet retryDifferentSystemHints = new BitSet(callbacks.size()); + long success = 0, failures = 0, timeouts = 0, retryDifferentSystem = 0; + for (int i = 0; i < callbacks.size(); i++) { + Callback cb = callbacks.get(i); Callback.Outcome outcome = cb.await(); if (outcome == Callback.Outcome.SUCCESS) success++; else if (outcome == Callback.Outcome.FAILURE) failures++; else if (outcome == Callback.Outcome.TIMEOUT) timeouts++; + else if (outcome == RETRY_DIFFERENT_SYSTEM) + { + retryDifferentSystemHints.set(i); + retryDifferentSystem++; + } + else throw new IllegalStateException("Unhandled outcome: " + outcome); } - updateMetrics(success, failures, timeouts); + updateMetrics(success, failures, timeouts, retryDifferentSystem); - if (failures > 0 || timeouts > 0) + // If the only errors were retryDifferentSystem and we aren't already filtering the hints then retry + // immediately otherwise we will repeat the page later including any successful hints we may have already delivered + // Hints for the batch log can hit RETRY_DIFFERENT_SYSTEM but don't need to be retried here and it could result + // in the same hint ending up in hintsNeedingRehinting twice + boolean failedRetryDifferentSystem = false; + if (retryDifferentSystem > 0 && failures < 1 && timeouts < 1 && hintsFilter == null && !isBatchLogHints) { - HintDiagnostics.pageFailureResult(this, success, failures, timeouts); + reader.seek(currentPagePosition); + Action retryResult = doSendHintsAndAwait(page, retryDifferentSystemHints); + if (retryResult != Action.CONTINUE) + failedRetryDifferentSystem = true; + } + + if (failures > 0 || timeouts > 0 || failedRetryDifferentSystem) + { + HintDiagnostics.pageFailureResult(this, success, failures, timeouts, retryDifferentSystem); return Action.ABORT; } else { - HintDiagnostics.pageSuccessResult(this, success, failures, timeouts); + HintDiagnostics.pageSuccessResult(this, success, failures, timeouts, retryDifferentSystem); + rehintHintsNeedingRehinting(); return Action.CONTINUE; } } - private void updateMetrics(long success, long failures, long timeouts) + private void rehintHintsNeedingRehinting() + { + ClusterMetadata cm = ClusterMetadata.current(); + Hint hint; + while ((hint = hintsNeedingRehinting.poll()) != null) + { + HintsService.instance.writeForAllReplicas(hint); + Mutation mutation = hint.mutation; + // Also may need to apply locally because it's possible this is from the batchlog + // and we never applied it locally + // TODO (review): Additional error handling necessary? Hints are lossy + DataPlacement dataPlacement = cm.placements.get(cm.schema.getKeyspace(mutation.getKeyspaceName()).getMetadata().params.replication); + VersionedEndpoints.ForToken forToken = dataPlacement.writes.forToken(mutation.key().getToken()); + Replica self = forToken.get().selfIfPresent(); + if (self != null) + { + Stage.MUTATION.maybeExecuteImmediately(new RunnableDebuggableTask() + { + private final long approxCreationTimeNanos = MonotonicClock.Global.approxTime.now(); + private volatile long approxStartTimeNanos; + + @Override + public void run() + { + approxStartTimeNanos = MonotonicClock.Global.approxTime.now(); + mutation.apply(); + } + + @Override + public long creationTimeNanos() + { + return approxCreationTimeNanos; + } + + @Override + public long startTimeNanos() + { + return approxStartTimeNanos; + } + + @Override + public String description() + { + return "HintsService rehinting Accord txn"; + } + }); + } + } + + } + + private void updateMetrics(long success, long failures, long timeouts, long retryDifferentSystem) { HintsServiceMetrics.hintsSucceeded.mark(success); HintsServiceMetrics.hintsFailed.mark(failures); HintsServiceMetrics.hintsTimedOut.mark(timeouts); + HintsServiceMetrics.hintsRetryDifferentSystem.mark(retryDifferentSystem); } /* * Sending hints in compatibility mode. */ - - private Action sendHints(Iterator hints, Collection callbacks, Function sendFunction) + private Action sendHints(Iterator hints, @Nullable BitSet hintsFilter, Collection callbacks, Function sendFunction) { + int hintIndex = -1; while (hints.hasNext()) { if (abortRequested.getAsBoolean()) @@ -189,19 +333,95 @@ private Action sendHints(Iterator hints, Collection callbacks, HintDiagnostics.abortRequested(this); return Action.ABORT; } - callbacks.add(sendFunction.apply(hints.next())); + + T hint = hints.next(); + hintIndex++; + if (hintsFilter != null && !hintsFilter.get(hintIndex)) + continue; + + callbacks.add(sendFunction.apply(hint)); } return Action.CONTINUE; } private Callback sendHint(Hint hint) { - Callback callback = new Callback(hint.creationTime); - Message message = Message.out(HINT_REQ, new HintMessage(hostId, hint)); - MessagingService.instance().sendWithCallback(message, address, callback); + ClusterMetadata cm = ClusterMetadata.current(); + SplitHint splitHint = splitHintIntoAccordAndNormal(cm, hint); + Mutation accordHintMutation = splitHint.accordMutation; + Dispatcher.RequestTime requestTime = null; + AsyncTxnResult accordTxnResult = null; + if (accordHintMutation != null) + { + requestTime = Dispatcher.RequestTime.forImmediateExecution(); + accordTxnResult = accordHintMutation != null ? ConsensusMigrationMutationHelper.instance().mutateWithAccordAsync(cm, accordHintMutation, null, requestTime) : null; + } + + Hint normalHint = splitHint.normalHint; + Callback callback = new Callback(address, hint.creationTime, requestTime, accordTxnResult); + if (normalHint != null) + { + // We had a hint that was supposed to be done on Accord for the batch log (otherwise address would be non-null), + // but Accord no longer manages that table/range and now we don't know which nodes (if any) are missing the Mutation. + // Convert them to per replica hints *after* all the hints in this page have been applied so we can be reasonably sure + // this page isn't going to be played again thus avoiding any futher amplification from the same hint being + // replayed and repeatedly converted to per replica hints + if (address == null) + { + checkState(hostId.equals(RETRY_ON_DIFFERENT_SYSTEM_UUID), "If there is no address to send the hint to then the host ID should be BATCHLOG_ACCORD_HINT_UUID"); + callback.onResponse(null); + hintsNeedingRehinting.add(normalHint); + } + else + { + Message message = Message.out(HINT_REQ, new HintMessage(hostId, normalHint)); + MessagingService.instance().sendWithCallback(message, address, callback); + } + } + else + { + // Don't wait for a normal response that will never come since no hints were sent + callback.onResponse(null); + } + return callback; } + /** + * Result of splitting a hint across Accord and non-transactional boundaries + */ + private class SplitHint + { + private final Mutation accordMutation; + private final Hint normalHint; + + public SplitHint(Mutation accordMutation, Hint normalHint) + { + this.accordMutation = accordMutation; + this.normalHint = normalHint; + } + + @Override + public String toString() + { + return "SplitHint{" + + "accordMutation=" + accordMutation + + ", normalHint=" + normalHint + + '}'; + } + } + + private SplitHint splitHintIntoAccordAndNormal(ClusterMetadata cm, Hint hint) + { + SplitMutation splitMutation = ConsensusMigrationMutationHelper.instance().splitMutationIntoAccordAndNormal(hint.mutation, cm); + if (splitMutation.accordMutation == null) + return new SplitHint(null, hint); + if (splitMutation.normalMutation == null) + return new SplitHint(splitMutation.accordMutation, null); + Hint normalHint = Hint.create(splitMutation.normalMutation, hint.creationTime, splitMutation.normalMutation.smallestGCGS()); + return new SplitHint(splitMutation.accordMutation, normalHint); + } + /* * Sending hints in raw mode. */ @@ -209,23 +429,41 @@ private Callback sendHint(Hint hint) private Callback sendEncodedHint(ByteBuffer hint) { HintMessage.Encoded message = new HintMessage.Encoded(hostId, hint, messagingVersion); - Callback callback = new Callback(message.getHintCreationTime()); + Callback callback = new Callback(address, message.getHintCreationTime()); MessagingService.instance().sendWithCallback(Message.out(HINT_REQ, message), address, callback); return callback; } - static final class Callback implements RequestCallback + static final class Callback implements RequestCallback, Runnable { - enum Outcome { SUCCESS, TIMEOUT, FAILURE, INTERRUPTED } + enum Outcome { SUCCESS, TIMEOUT, FAILURE, INTERRUPTED, RETRY_DIFFERENT_SYSTEM } private final long start = approxTime.now(); private final Condition condition = newOneTimeCondition(); - private volatile Outcome outcome; + private Outcome normalOutcome; + private Outcome accordOutcome; + @Nullable + private final InetAddressAndPort to; private final long hintCreationNanoTime; + @Nullable + private final Dispatcher.RequestTime requestTime; + private final AsyncTxnResult accordTxnResult; - private Callback(long hintCreationTimeMillisSinceEpoch) + private Callback(@Nonnull InetAddressAndPort to, long hintCreationTimeMillisSinceEpoch) { + this(to, hintCreationTimeMillisSinceEpoch, null, null); + } + + private Callback(@Nullable InetAddressAndPort to, long hintCreationTimeMillisSinceEpoch, Dispatcher.RequestTime requestTime, @Nullable AsyncTxnResult accordTxnResult) + { + this.to = to != null ? to : ACCORD_HINT_ENDPOINT; this.hintCreationNanoTime = approxTime.translate().fromMillisSinceEpoch(hintCreationTimeMillisSinceEpoch); + this.requestTime = requestTime; + this.accordTxnResult = accordTxnResult; + if (accordTxnResult != null) + accordTxnResult.addListener(this, ImmediateExecutor.INSTANCE); + else + accordOutcome = SUCCESS; } Outcome await() @@ -240,8 +478,31 @@ Outcome await() logger.warn("Hint dispatch was interrupted", e); return INTERRUPTED; } + normalOutcome = timedOut ? TIMEOUT : normalOutcome; + + return outcome(); + } + + private Outcome outcome() + { + checkState((normalOutcome != null && accordOutcome != null) || (normalOutcome != SUCCESS || accordOutcome != SUCCESS), "Outcome for both normal and accord hint delivery should be known"); + if (normalOutcome == RETRY_DIFFERENT_SYSTEM || accordOutcome == RETRY_DIFFERENT_SYSTEM) + return RETRY_DIFFERENT_SYSTEM; + if (normalOutcome == TIMEOUT || accordOutcome == TIMEOUT) + return TIMEOUT; + if (normalOutcome == FAILURE || accordOutcome == FAILURE) + return FAILURE; + checkState(normalOutcome == SUCCESS && accordOutcome == SUCCESS, "Hint delivery should have been successful"); + return SUCCESS; + } - return timedOut ? TIMEOUT : outcome; + private synchronized void maybeSignal() + { + if ((normalOutcome != null && accordOutcome != null) || normalOutcome == FAILURE || accordOutcome == FAILURE) + { + updateDelayMetrics(to, approxTime.now() - this.hintCreationNanoTime); + condition.signalAll(); + } } @Override @@ -253,16 +514,40 @@ public boolean invokeOnFailure() @Override public void onFailure(InetAddressAndPort from, RequestFailure failureMessage) { - outcome = FAILURE; - condition.signalAll(); + if (failureMessage.reason == RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM) + normalOutcome = RETRY_DIFFERENT_SYSTEM; + else + normalOutcome = FAILURE; + maybeSignal(); } @Override public void onResponse(Message msg) { - updateDelayMetrics(msg.from(), approxTime.now() - this.hintCreationNanoTime); - outcome = SUCCESS; - condition.signalAll(); + normalOutcome = SUCCESS; + maybeSignal(); + } + + @Override + public void run() + { + try + { + IAccordService accord = AccordService.instance(); + TxnResult.Kind kind = accord.getTxnResult(accordTxnResult, true, null, requestTime).kind(); + if (kind == retry_new_protocol) + accordOutcome = RETRY_DIFFERENT_SYSTEM; + else + accordOutcome = SUCCESS; + } + catch (Exception e) + { + accordOutcome = e instanceof WriteTimeoutException ? TIMEOUT : FAILURE; + String msg = "Accord hint delivery transaction failed"; + if (noSpamLogger.getStatement(msg).shouldLog(Clock.Global.nanoTime())) + logger.error(msg, e); + } + maybeSignal(); } } } diff --git a/src/java/org/apache/cassandra/hints/HintsService.java b/src/java/org/apache/cassandra/hints/HintsService.java index a1877802d2af..bd3de9521e99 100644 --- a/src/java/org/apache/cassandra/hints/HintsService.java +++ b/src/java/org/apache/cassandra/hints/HintsService.java @@ -33,30 +33,32 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; -import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.io.util.File; -import org.apache.cassandra.locator.ReplicaLayout; -import org.apache.cassandra.utils.concurrent.Future; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.ReplicaLayout; import org.apache.cassandra.metrics.HintedHandoffMetrics; import org.apache.cassandra.metrics.StorageMetrics; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.MBeanWrapper; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static com.google.common.collect.Iterables.filter; import static com.google.common.collect.Iterables.transform; +import static org.apache.cassandra.config.CassandraRelevantProperties.HINT_DISPATCH_INTERVAL_MS; /** * A singleton-ish wrapper over various hints components: @@ -70,12 +72,35 @@ */ public final class HintsService implements HintsServiceMBean { - private static final Logger logger = LoggerFactory.getLogger(HintsService.class); + // Dummy address to use for storing metrics for hints that will be retried on a different transaction system + // and aren't being sent to a specific node + public static final InetAddressAndPort RETRY_ON_DIFFERENT_SYSTEM_ADDRESS; + + static + { + try + { + RETRY_ON_DIFFERENT_SYSTEM_ADDRESS = InetAddressAndPort.getByNameOverrideDefaults("0.0.0.0", 65535); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + + // Batch log replay may need to route some mutations to Accord which may fail and Hints are used for retry by the batch log. + // Write them to this endpoint which indicates that on replay hints will need to calculate the endpoints + // to deliver to since it's not really a per node hint, but part of a batch that needs replaying. + // This can also occur with regular mutations as well when some replicas return a retry error but quorum + // is reached so hinting is used to bring the other replicas up to date + public static final UUID RETRY_ON_DIFFERENT_SYSTEM_UUID = TimeUUID.atUnixMicrosWithLsbAsUUID(-1, -1); public static HintsService instance = new HintsService(); public static final String MBEAN_NAME = "org.apache.cassandra.hints:type=HintsService"; + private static final Logger logger = LoggerFactory.getLogger(HintsService.class); + private static final int MIN_BUFFER_SIZE = 32 << 20; static final ImmutableMap EMPTY_PARAMS = ImmutableMap.of(); @@ -226,7 +251,8 @@ public synchronized void startDispatch() HintsDispatchTrigger trigger = new HintsDispatchTrigger(catalog, writeExecutor, dispatchExecutor, isDispatchPaused); // triggering hint dispatch is now very cheap, so we can do it more often - every 10 seconds vs. every 10 minutes, // previously; this reduces mean time to delivery, and positively affects batchlog delivery latencies, too - triggerDispatchFuture = ScheduledExecutors.scheduledTasks.scheduleWithFixedDelay(trigger, 10, 10, TimeUnit.SECONDS); + long hintDispatchIntervalMs = HINT_DISPATCH_INTERVAL_MS.getLong(); + triggerDispatchFuture = ScheduledExecutors.scheduledTasks.scheduleWithFixedDelay(trigger, hintDispatchIntervalMs, hintDispatchIntervalMs, TimeUnit.MILLISECONDS); } public void pauseDispatch() @@ -322,6 +348,13 @@ public void deleteAllHints() catalog.deleteAllHints(); } + @VisibleForTesting + public void deleteAllHintsUnsafe() + { + catalog.deleteAllHintsUnsafe(); + bufferPool.clearUnsafe(); + } + /** * Deletes all hints for the provided destination. Doesn't make snapshots - should be used with care. * diff --git a/src/java/org/apache/cassandra/hints/HintsStore.java b/src/java/org/apache/cassandra/hints/HintsStore.java index cb3d67b8afda..795c1479e115 100644 --- a/src/java/org/apache/cassandra/hints/HintsStore.java +++ b/src/java/org/apache/cassandra/hints/HintsStore.java @@ -18,6 +18,7 @@ package org.apache.cassandra.hints; import java.io.IOException; +import java.nio.channels.ClosedChannelException; import java.util.Deque; import java.util.HashSet; import java.util.Iterator; @@ -174,6 +175,8 @@ public long findOldestHintTimestamp() boolean isLive() { + if (hostId.equals(HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID)) + return true; InetAddressAndPort address = address(); return address != null && FailureDetector.instance.isAlive(address); } @@ -193,6 +196,20 @@ void offerLast(HintsDescriptor descriptor) dispatchDequeue.offerLast(descriptor); } + void deleteAllHintsUnsafe() + { + try + { + closeWriter(); + } + catch (FSWriteError e) + { + if (!(e.getCause() instanceof ClosedChannelException)) + throw e; + } + deleteAllHints(); + } + void deleteAllHints() { HintsDescriptor descriptor; diff --git a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java index 33f7e8f20100..dec6a5d9162a 100644 --- a/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordClientRequestMetrics.java @@ -39,6 +39,8 @@ public class AccordClientRequestMetrics extends ClientRequestMetrics // Number of times a query was rejected by Accord in TxnQuery due to a migration back to Paxos public final Meter accordMigrationRejects; + public final Meter preempted; + public final Meter topologyMismatches; public AccordClientRequestMetrics(String scope) { @@ -48,6 +50,8 @@ public AccordClientRequestMetrics(String scope) migrationSkippedReads = Metrics.meter(factory.createMetricName("MigrationSkippedReads")); paxosKeyMigrations = Metrics.meter(factory.createMetricName("PaxosKeyMigrations")); accordMigrationRejects = Metrics.meter(factory.createMetricName("AccordMigrationRejects")); + preempted = Metrics.meter(factory.createMetricName("Preempted")); + topologyMismatches = Metrics.meter(factory.createMetricName("TopologyMismatches")); } @Override @@ -58,5 +62,8 @@ public void release() Metrics.remove(factory.createMetricName("MigrationSkippedReads")); Metrics.remove(factory.createMetricName("PaxosKeyMigrations")); Metrics.remove(factory.createMetricName("AccordMigrationRejects")); + Metrics.remove(factory.createMetricName("Preempted")); + Metrics.remove(factory.createMetricName("TopologyMismatches")); + } } diff --git a/src/java/org/apache/cassandra/metrics/ClientRequestMetrics.java b/src/java/org/apache/cassandra/metrics/ClientRequestMetrics.java index 61fcc34bf129..e22eec6f5143 100644 --- a/src/java/org/apache/cassandra/metrics/ClientRequestMetrics.java +++ b/src/java/org/apache/cassandra/metrics/ClientRequestMetrics.java @@ -40,6 +40,8 @@ public class ClientRequestMetrics extends LatencyMetrics public final Meter readSizeAborts; public final Meter localRequests; public final Meter remoteRequests; + public final Meter retryDifferentSystem; + public final Meter retryCoordinatorBehind; public ClientRequestMetrics(String scope) { @@ -53,6 +55,8 @@ public ClientRequestMetrics(String scope) readSizeAborts = Metrics.meter(factory.createMetricName("ReadSizeAborts")); localRequests = Metrics.meter(factory.createMetricName("LocalRequests")); remoteRequests = Metrics.meter(factory.createMetricName("RemoteRequests")); + retryDifferentSystem = Metrics.meter(factory.createMetricName("RetryDifferentSystem")); + retryCoordinatorBehind = Metrics.meter(factory.createMetricName("RetryCoordinatorBehind")); } public void markAbort(Throwable cause) @@ -81,5 +85,7 @@ public void release() Metrics.remove(factory.createMetricName("ReadSizeAborts")); Metrics.remove(factory.createMetricName("LocalRequests")); Metrics.remove(factory.createMetricName("RemoteRequests")); + Metrics.remove(factory.createMetricName("RetryDifferentSystem")); + Metrics.remove(factory.createMetricName("RetryCoordinatorBehind")); } } diff --git a/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java b/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java index 2a8ce92776d9..c3203e74f408 100644 --- a/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java @@ -17,6 +17,8 @@ */ package org.apache.cassandra.metrics; +import java.net.UnknownHostException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +37,22 @@ public final class HintsServiceMetrics { public static final String TYPE_NAME = "HintsService"; + + // Hint metrics are by address and hints that are for Accord need an address + public static final InetAddressAndPort ACCORD_HINT_ENDPOINT; + + static + { + try + { + ACCORD_HINT_ENDPOINT = InetAddressAndPort.getByNameOverrideDefaults("0.0.0.0", 0); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } + private static final Logger logger = LoggerFactory.getLogger(HintsServiceMetrics.class); private static final MetricNameFactory factory = new DefaultNameFactory(TYPE_NAME); @@ -42,6 +60,8 @@ public final class HintsServiceMetrics public static final Meter hintsSucceeded = Metrics.meter(factory.createMetricName("HintsSucceeded")); public static final Meter hintsFailed = Metrics.meter(factory.createMetricName("HintsFailed")); public static final Meter hintsTimedOut = Metrics.meter(factory.createMetricName("HintsTimedOut")); + public static final Meter hintsRetryDifferentSystem = Metrics.meter(factory.createMetricName("HintsRetryDifferentSystem")); + /** Histogram of all hint delivery delays */ private static final Histogram globalDelayHistogram = Metrics.histogram(factory.createMetricName("Hint_delays"), false); @@ -62,4 +82,9 @@ public static void updateDelayMetrics(InetAddressAndPort endpoint, long delay) globalDelayHistogram.update(delay); delayByEndpoint.get(endpoint).update(delay); } + + public static long getDelayCount(InetAddressAndPort endpoint) + { + return delayByEndpoint.get(endpoint).getCount(); + } } diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index c290c321f313..17102b6cc02c 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -107,6 +107,7 @@ public class KeyspaceMetrics public final LatencyMetrics rangeMigration; public final Meter rangeMigrationUnexpectedFailures; public final Meter rangeMigrationDependencyLimitFailures; + public final Meter mutationsRejectedOnWrongSystem; /** Writes failed ideal consistency **/ public final Counter writeFailedIdealCL; /** Ideal CL write latency metrics */ @@ -257,6 +258,7 @@ public KeyspaceMetrics(final Keyspace ks) rangeMigration = createLatencyMetrics("RangeMigration"); rangeMigrationUnexpectedFailures = createKeyspaceMeter("RangeMigrationUnexpectedFailures"); rangeMigrationDependencyLimitFailures = createKeyspaceMeter("RangeMigratingDependencyLimitFailures"); + mutationsRejectedOnWrongSystem = createKeyspaceMeter("MutationsRejectedOnWrongSystem"); writeFailedIdealCL = createKeyspaceCounter("WriteFailedIdealCL"); idealCLWriteLatency = createLatencyMetrics("IdealCLWrite"); diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 86693ec8e206..b490a2629988 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -196,6 +196,7 @@ public class TableMetrics public final LatencyMetrics accordRepair; public final TableMeter accordRepairUnexpectedFailures; public final TableMeter accordRepairDependencyLimitFailures; + public final TableMeter mutationsRejectedOnWrongSystem; /** percent of the data that is repaired */ public final Gauge percentRepaired; /** Reports the size of sstables in repaired, unrepaired, and any ongoing repair buckets */ @@ -817,6 +818,7 @@ public Long getValue() accordRepair = createLatencyMetrics("AccordRepair", cfs.keyspace.metric.rangeMigration, GLOBAL_RANGE_MIGRATION_LATENCY); accordRepairUnexpectedFailures = createTableMeter("AccordRepairUnexpectedFailures", cfs.keyspace.metric.rangeMigrationUnexpectedFailures); accordRepairDependencyLimitFailures = createTableMeter("AccordRepairDependencyLimitFaiures", cfs.keyspace.metric.rangeMigrationDependencyLimitFailures); + mutationsRejectedOnWrongSystem = createTableMeter("MutationsRejectedOnWrongSystem", cfs.keyspace.metric.mutationsRejectedOnWrongSystem); repairsStarted = createTableCounter("RepairJobsStarted"); repairsCompleted = createTableCounter("RepairJobsCompleted"); diff --git a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java index f89362715b74..517a10fd2a29 100644 --- a/src/java/org/apache/cassandra/net/ResponseVerbHandler.java +++ b/src/java/org/apache/cassandra/net/ResponseVerbHandler.java @@ -25,6 +25,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tracing.Tracing; @@ -33,6 +34,7 @@ import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.exceptions.RequestFailureReason.COORDINATOR_BEHIND; import static org.apache.cassandra.exceptions.RequestFailureReason.INVALID_ROUTING; +import static org.apache.cassandra.exceptions.RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; class ResponseVerbHandler implements IVerbHandler @@ -101,8 +103,11 @@ private void maybeFetchLogs(Message message) // Gossip stage is single-threaded, so we may end up in a deadlock with after-commit hook // that executes something on the gossip stage as well. - if (message.isFailureResponse() && - (message.payload == COORDINATOR_BEHIND || message.payload == INVALID_ROUTING) && + boolean isFailureResponse = message.isFailureResponse(); + // RequestFailure is not a singleton so we need to extract and compare against the reason + RequestFailureReason reason = isFailureResponse ? ((RequestFailure)message.payload).reason : null; + if (isFailureResponse && + (reason == COORDINATOR_BEHIND || reason == INVALID_ROUTING || reason == RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM) && // Gossip stage is single-threaded, so we may end up in a deadlock with after-commit hook // that executes something on the gossip stage as well. !Stage.GOSSIP.executor().inExecutor()) diff --git a/src/java/org/apache/cassandra/repair/RepairJob.java b/src/java/org/apache/cassandra/repair/RepairJob.java index ad0510cd8170..3eaf206f8009 100644 --- a/src/java/org/apache/cassandra/repair/RepairJob.java +++ b/src/java/org/apache/cassandra/repair/RepairJob.java @@ -39,6 +39,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.primitives.Ranges; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; @@ -68,6 +69,7 @@ import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.concurrent.ImmediateFuture; +import static com.google.common.util.concurrent.Futures.getUnchecked; import static org.apache.cassandra.config.DatabaseDescriptor.paxosRepairEnabled; import static org.apache.cassandra.schema.SchemaConstants.METADATA_KEYSPACE_NAME; import static org.apache.cassandra.service.paxos.Paxos.useV2; @@ -193,7 +195,7 @@ public void onFailure(Throwable t) return; } - Future accordRepair; + Future accordRepair; if (doAccordRepair) { accordRepair = paxosRepair.flatMap(unused -> { @@ -213,12 +215,12 @@ public void onFailure(Throwable t) if (session.accordOnly) { - accordRepair.addCallback(new FutureCallback() + accordRepair.addCallback(new FutureCallback<>() { - public void onSuccess(Void ignored) + public void onSuccess(Ranges barrieredRanges) { logger.info("{} {}.{} accord repair completed", session.previewKind.logPrefix(session.getId()), desc.keyspace, desc.columnFamily); - trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromAccordOnlyRepair(repairStartingEpoch, session.excludedDeadNodes))); + trySuccess(new RepairResult(desc, Collections.emptyList(), ConsensusMigrationRepairResult.fromAccordOnlyRepair(repairStartingEpoch, barrieredRanges, session.excludedDeadNodes))); } public void onFailure(Throwable t) @@ -282,7 +284,7 @@ public void onSuccess(List stats) } cfs.metric.repairsCompleted.inc(); logger.info("Completing repair with excludedDeadNodes {}", session.excludedDeadNodes); - trySuccess(new RepairResult(desc, stats, ConsensusMigrationRepairResult.fromRepair(repairStartingEpoch, doPaxosRepair, doAccordRepair, session.excludedDeadNodes))); + trySuccess(new RepairResult(desc, stats, ConsensusMigrationRepairResult.fromRepair(repairStartingEpoch, getUnchecked(accordRepair), doPaxosRepair, doAccordRepair, session.excludedDeadNodes))); } /** @@ -307,7 +309,7 @@ public void onFailure(Throwable t) }, taskExecutor); } - private Future> createSyncTasks(Future accordRepair, Future allSnapshotTasks, List allEndpoints) + private Future> createSyncTasks(Future accordRepair, Future allSnapshotTasks, List allEndpoints) { Future> treeResponses; if (allSnapshotTasks != null) diff --git a/src/java/org/apache/cassandra/schema/DistributedSchema.java b/src/java/org/apache/cassandra/schema/DistributedSchema.java index 17f4d33ccb93..e0658739c292 100644 --- a/src/java/org/apache/cassandra/schema/DistributedSchema.java +++ b/src/java/org/apache/cassandra/schema/DistributedSchema.java @@ -18,20 +18,8 @@ package org.apache.cassandra.schema; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.UUID; - import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; - import org.apache.cassandra.auth.AuthKeyspace; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.functions.UserFunction; @@ -47,6 +35,18 @@ import org.apache.cassandra.tracing.TraceKeyspace; import org.apache.cassandra.utils.FBUtilities; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; + import static org.apache.cassandra.db.TypeSizes.sizeof; /** @@ -130,6 +130,11 @@ public KeyspaceMetadata getKeyspaceMetadata(String keyspace) return keyspaces.get(keyspace).get(); } + public Optional maybeGetKeyspaceMetadata(String keyspace) + { + return keyspaces.get(keyspace); + } + public TableMetadata getTableMetadata(TableId id) { return tables.get(id); diff --git a/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java b/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java index d085174043df..4219cad30140 100644 --- a/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java +++ b/src/java/org/apache/cassandra/service/AbstractWriteResponseHandler.java @@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.function.Function; import java.util.function.Supplier; -import java.util.stream.Collectors; import javax.annotation.Nullable; import org.slf4j.Logger; @@ -36,8 +35,10 @@ import org.apache.cassandra.db.IMutation; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.exceptions.WriteFailureException; import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.locator.EndpointsForToken; @@ -51,6 +52,7 @@ import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.lang.Long.MAX_VALUE; import static java.lang.Math.min; import static java.util.concurrent.TimeUnit.MICROSECONDS; @@ -59,6 +61,8 @@ import static org.apache.cassandra.config.DatabaseDescriptor.getCounterWriteRpcTimeout; import static org.apache.cassandra.config.DatabaseDescriptor.getWriteRpcTimeout; import static org.apache.cassandra.db.WriteType.COUNTER; +import static org.apache.cassandra.exceptions.RequestFailureReason.COORDINATOR_BEHIND; +import static org.apache.cassandra.exceptions.RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; import static org.apache.cassandra.locator.Replicas.countInOurDc; import static org.apache.cassandra.schema.Schema.instance; import static org.apache.cassandra.service.StorageProxy.WritePerformer; @@ -79,6 +83,10 @@ public abstract class AbstractWriteResponseHandler implements RequestCallback private static final AtomicIntegerFieldUpdater failuresUpdater = AtomicIntegerFieldUpdater.newUpdater(AbstractWriteResponseHandler.class, "failures"); private volatile int failures = 0; + private static final AtomicIntegerFieldUpdater alreadyHintedForRetryOnDifferentSystemUpdater = + AtomicIntegerFieldUpdater.newUpdater(AbstractWriteResponseHandler.class, "alreadyHintedForRetryOnDifferentSystem"); + // Only write a hint to be applied as a transaction once + private volatile int alreadyHintedForRetryOnDifferentSystem = 0; private volatile Map failureReasonByEndpoint; private final Dispatcher.RequestTime requestTime; private @Nullable final Supplier hintOnFailure; @@ -111,7 +119,7 @@ protected AbstractWriteResponseHandler(ForWrite replicaPlan, Runnable callback, this.requestTime = requestTime; } - public void get() throws WriteTimeoutException, WriteFailureException + public void get() throws WriteTimeoutException, WriteFailureException, RetryOnDifferentSystemException { long timeoutNanos = currentTimeoutNanos(); @@ -128,14 +136,41 @@ public void get() throws WriteTimeoutException, WriteFailureException if (!signaled) throwTimeout(); - if (blockFor() + failures > candidateReplicaCount()) + int candidateReplicaCount = candidateReplicaCount(); + if (blockFor() + failures > candidateReplicaCount) { - if (RequestCallback.isTimeout(this.getFailureReasonByEndpointMap().keySet().stream() - .filter(this::waitingFor) // DatacenterWriteResponseHandler filters errors from remote DCs - .collect(Collectors.toMap(Function.identity(), this.getFailureReasonByEndpointMap()::get)))) + // failures keeps incrementing, and this.failureReasonByEndpoint keeps getting new entries after signaling. + // Simpler to reason about what happened by copying this.failureReasonByEndpoint and then inferring + // failures from it + final Map failureReasonByEndpoint = getFailureReasonByEndpointMap().keySet().stream() + .filter(this::waitingFor) // DatacenterWriteResponseHandler filters errors from remote DCs + .collect(toImmutableMap(Function.identity(), getFailureReasonByEndpointMap()::get)); + final int failures = failureReasonByEndpoint.size(); + if (RequestCallback.isTimeout(failureReasonByEndpoint)) throwTimeout(); - throw new WriteFailureException(replicaPlan.consistencyLevel(), ackCount(), blockFor(), writeType, this.getFailureReasonByEndpointMap()); + int transactionRetryErrors = 0; + int coordinatorBehindErrors = 0; + for (RequestFailureReason reason : failureReasonByEndpoint.values()) + { + if (reason == RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM) + transactionRetryErrors++; + if (reason == COORDINATOR_BEHIND) + coordinatorBehindErrors++; + } + int totalRetriableFailures = transactionRetryErrors + coordinatorBehindErrors; + + // Retrying might fix this + if (candidateReplicaCount - failures + totalRetriableFailures >= blockFor()) + { + // Doesn't matter which we throw really but for clarity/metrics be specific + // Retrying on the correct system might make this write succeed + if (transactionRetryErrors > 0) + throw new RetryOnDifferentSystemException(); + throw new CoordinatorBehindException("Write request failed due to coordinator behind"); + } + + throw new WriteFailureException(replicaPlan.consistencyLevel(), ackCount(), blockFor(), writeType, getFailureReasonByEndpointMap()); } if (replicaPlan.stillAppliesTo(ClusterMetadata.current())) @@ -298,7 +333,7 @@ protected void signal() @Override public void onFailure(InetAddressAndPort from, RequestFailure failure) { - logger.trace("Got failure from {}", from); + logger.trace("Got failure {} from {}", failure, from); int n = waitingFor(from) ? failuresUpdater.incrementAndGet(this) @@ -317,8 +352,20 @@ public void onFailure(InetAddressAndPort from, RequestFailure failure) if (blockFor() + n > candidateReplicaCount()) signal(); - if (hintOnFailure != null && StorageProxy.shouldHint(replicaPlan.lookup(from)) && requestTime.shouldSendHints()) - StorageProxy.submitHint(hintOnFailure.get(), replicaPlan.lookup(from), null); + // If the failure was RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM then we only want to hint once + // and not for each instance since odds are it will be applied as a transaction at all replicas + if (hintOnFailure != null && StorageProxy.shouldHint(replicaPlan.lookup(from)) ) + { + if (failure.reason == RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM) + { + if (alreadyHintedForRetryOnDifferentSystemUpdater.compareAndSet(this, 0, 1)) + StorageProxy.submitHintForRetryOnDifferentSystem(hintOnFailure.get()); + } + else + { + StorageProxy.submitHint(hintOnFailure.get(), replicaPlan.lookup(from), null); + } + } } @Override diff --git a/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java b/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java index dd2ebae915c5..41b68de395f5 100644 --- a/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java +++ b/src/java/org/apache/cassandra/service/BatchlogResponseHandler.java @@ -29,9 +29,10 @@ public class BatchlogResponseHandler extends AbstractWriteResponseHandler { - AbstractWriteResponseHandler wrapped; - BatchlogCleanup cleanup; + final AbstractWriteResponseHandler wrapped; + final BatchlogCleanup cleanup; protected volatile int requiredBeforeFinish; + private static final AtomicIntegerFieldUpdater requiredBeforeFinishUpdater = AtomicIntegerFieldUpdater.newUpdater(BatchlogResponseHandler.class, "requiredBeforeFinish"); @@ -104,6 +105,11 @@ public BatchlogCleanup(int mutationsWaitingFor, BatchlogCleanupCallback callback this.callback = callback; } + public BatchlogCleanup(BatchlogCleanupCallback callback) + { + this.callback = callback; + } + public int decrement() { return mutationsWaitingForUpdater.decrementAndGet(this); @@ -114,6 +120,11 @@ public void ackMutation() if (decrement() == 0) callback.invoke(); } + + public void setMutationsWaitingFor(int mutationsWaitingFor) + { + mutationsWaitingForUpdater.lazySet(this, mutationsWaitingFor); + } } public interface BatchlogCleanupCallback diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 59b39f5ee3a6..240c58f8950c 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -31,7 +31,6 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -42,14 +41,13 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import com.google.common.cache.CacheLoader; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.primitives.Keys; import accord.primitives.Txn; import accord.utils.Invariants; import org.apache.cassandra.batchlog.Batch; @@ -58,7 +56,6 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.Config; -import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; @@ -88,6 +85,7 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.CasWriteTimeoutException; import org.apache.cassandra.exceptions.CasWriteUnknownResultException; +import org.apache.cassandra.exceptions.CoordinatorBehindException; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.exceptions.IsBootstrappingException; import org.apache.cassandra.exceptions.OverloadedException; @@ -98,6 +96,7 @@ import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.RequestFailureException; import org.apache.cassandra.exceptions.RequestTimeoutException; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.exceptions.UnavailableException; import org.apache.cassandra.exceptions.WriteFailureException; import org.apache.cassandra.exceptions.WriteTimeoutException; @@ -131,15 +130,14 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; -import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.txn.TxnCondition; +import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; import org.apache.cassandra.service.accord.txn.TxnResult; -import org.apache.cassandra.service.accord.txn.TxnUpdate; -import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitConsumer; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitMutations; import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; @@ -165,8 +163,10 @@ import org.apache.cassandra.utils.MonotonicClock; import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.CountDownLatch; +import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.primitives.Txn.Kind.EphemeralRead; @@ -197,6 +197,8 @@ import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.casResult; import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.serialReadResult; import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.mutateWithAccordAsync; +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.splitMutationsIntoAccordAndNormal; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision; import static org.apache.cassandra.service.paxos.Ballot.Flag.GLOBAL; import static org.apache.cassandra.service.paxos.Ballot.Flag.LOCAL; @@ -207,6 +209,8 @@ import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.LocalizeString.toUpperCaseLocalized; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; +import static org.apache.cassandra.utils.Throwables.getStackTraceAsToString; +import static org.apache.cassandra.utils.Throwables.unchecked; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.apache.cassandra.utils.concurrent.CountDownLatch.newCountDownLatch; import static org.apache.commons.lang3.StringUtils.join; @@ -1184,54 +1188,6 @@ public static void mutateMV(ByteBuffer dataKey, Collection mutations, } } - private static ConsistencyLevel consistencyLevelForCommit(Collection mutations, ConsistencyLevel consistencyLevel) - { - ConsistencyLevel result = null; - for (IMutation mutation : mutations) - { - for (TableId tableId : mutation.getTableIds()) - { - TransactionalMode mode = Schema.instance.getTableMetadata(tableId).params.transactionalMode; - ConsistencyLevel commitCL = mode.commitCLForStrategy(consistencyLevel); - if (result == null || commitCL.compareTo(result) > 0) - result = commitCL; - } - } - return result; - } - - private static boolean writesThroughAccord(List mutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) - { - boolean accordWrite = false; - boolean normalWrite = false; - for (int i=0,mi=mutations.size(); i mutations, ConsistencyLevel consistencyLevel, @@ -1258,53 +1214,140 @@ public static void mutateWithTriggers(List mutations, } } - Collection augmented = TriggerExecutor.instance.execute(mutations); + List augmented = TriggerExecutor.instance.execute(mutations); String keyspaceName = mutations.iterator().next().getKeyspaceName(); - boolean updatesView = Keyspace.open(mutations.iterator().next().getKeyspaceName()) + boolean updatesView = Keyspace.open(keyspaceName) .viewManager .updatesAffectView(mutations, true); - - long size = IMutation.dataSize(mutations); + long size = IMutation.dataSize(augmented != null ? augmented : mutations); writeMetrics.mutationSize.update(size); writeMetricsForLevel(consistencyLevel).mutationSize.update(size); - if (writesThroughAccord(mutations, consistencyLevel, requestTime)) + if (augmented != null || mutateAtomically || updatesView) + mutateAtomically(augmented != null ? augmented : (List)mutations, consistencyLevel, updatesView, requestTime); + else + dispatchMutationsWithRetryOnDifferentSystem(mutations, consistencyLevel, requestTime); + } + + public static void dispatchMutationsWithRetryOnDifferentSystem(List mutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + while (true) { - Preconditions.checkState(!SchemaConstants.getSystemKeyspaces().contains(keyspaceName)); - mutateWithAccord(augmented != null ? augmented : mutations, consistencyLevel, requestTime); + ClusterMetadata cm = ClusterMetadata.current(); + try + { + SplitMutations splitMutations = splitMutationsIntoAccordAndNormal(cm, (List)mutations); + List accordMutations = splitMutations.accordMutations(); + AsyncTxnResult accordResult = accordMutations != null ? mutateWithAccordAsync(cm, accordMutations, consistencyLevel, requestTime) : null; + List normalMutations = splitMutations.normalMutations(); + Tracing.trace("Split mutations into Accord {} and normal {}", accordMutations, normalMutations); + + Throwable failure = null; + try + { + if (normalMutations != null) + { + mutate(normalMutations, consistencyLevel, requestTime); + Tracing.trace("Successfully wrote normal mutations"); + } + } + catch (RetryOnDifferentSystemException e) + { + writeMetrics.retryDifferentSystem.mark(); + writeMetricsForLevel(consistencyLevel).retryDifferentSystem.mark(); + logger.debug("Retrying mutations on different system because some mutations were misrouted"); + Tracing.trace("Got {} from normal mutations, will retry", e); + continue; + } + catch (CoordinatorBehindException e) + { + writeMetrics.retryCoordinatorBehind.mark(); + writeMetricsForLevel(consistencyLevel).retryCoordinatorBehind.mark(); + mutations.forEach(IMutation::clearCachedSerializationsForRetry); + logger.debug("Retrying mutations now that coordinator has caught up to cluster metadata"); + Tracing.trace("Got {} from normal mutations, will retry", e); + continue; + } + catch (Exception e) + { + failure = Throwables.merge(failure, e); + } + + // Check if the Accord mutations succeeded asynchronously + try + { + if (accordResult != null) + { + IAccordService accord = AccordService.instance(); + TxnResult.Kind kind = accord.getTxnResult(accordResult, true, consistencyLevel, requestTime).kind(); + if (kind == retry_new_protocol) + continue; + Tracing.trace("Successfully wrote Accord mutations"); + } + } + catch (Exception e) + { + failure = Throwables.merge(failure, e); + } + + if (failure != null) + throw unchecked(failure); + } + catch (Exception t) + { + // Unexpected error so it would be helpful to have details + Tracing.trace("{}", getStackTraceAsToString(t)); + throw t; + } + break; } - else if (augmented != null) - mutateAtomically(augmented, consistencyLevel, updatesView, requestTime); - else + } + + private static ConsistencyLevel consistencyLevelForBatchLog(ConsistencyLevel consistencyLevel, boolean requireQuorumForRemove) + { + // If we are requiring quorum nodes for removal, we upgrade consistency level to QUORUM unless we already + // require ALL, or EACH_QUORUM. This is so that *at least* QUORUM nodes see the update. + ConsistencyLevel batchConsistencyLevel = requireQuorumForRemove + ? ConsistencyLevel.QUORUM + : consistencyLevel; + + switch (consistencyLevel) { - if (mutateAtomically || updatesView) - mutateAtomically((Collection) mutations, consistencyLevel, updatesView, requestTime); - else - mutate(mutations, consistencyLevel, requestTime); + case ALL: + case EACH_QUORUM: + batchConsistencyLevel = consistencyLevel; } + return batchConsistencyLevel; } - private static void mutateWithAccord(Collection mutations, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + private static void doFallibleWriteWithMetricTracking(Runnable r, ConsistencyLevel consistencyLevel) { - int fragmentIndex = 0; - List fragments = new ArrayList<>(mutations.size()); - List partitionKeys = new ArrayList<>(mutations.size()); - for (IMutation mutation : mutations) + try { - for (PartitionUpdate update : mutation.getPartitionUpdates()) - { - PartitionKey pk = PartitionKey.of(update); - partitionKeys.add(pk); - fragments.add(new TxnWrite.Fragment(PartitionKey.of(update), fragmentIndex++, update, TxnReferenceOperations.empty())); - } + r.run(); + } + catch (UnavailableException e) + { + writeMetrics.unavailables.mark(); + writeMetricsForLevel(consistencyLevel).unavailables.mark(); + Tracing.trace("Unavailable"); + throw e; + } + catch (WriteTimeoutException e) + { + writeMetrics.timeouts.mark(); + writeMetricsForLevel(consistencyLevel).timeouts.mark(); + Tracing.trace("Write timeout; received {} of {} required replies", e.received, e.blockFor); + throw e; + } + catch (WriteFailureException e) + { + writeMetrics.failures.mark(); + writeMetricsForLevel(consistencyLevel).failures.mark(); + Tracing.trace("Write failure; received {} of {} required replies", e.received, e.blockFor); + throw e; } - // Potentially ignore commit consistency level if the strategy specifies accord and not migration - ConsistencyLevel clForCommit = consistencyLevelForCommit(mutations, consistencyLevel); - TxnUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit); - Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.EMPTY, TxnQuery.EMPTY, update); - IAccordService accordService = AccordService.instance(); - accordService.coordinate(txn, consistencyLevel, requestTime); } /** @@ -1314,92 +1357,171 @@ private static void mutateWithAccord(Collection mutations, * After: remove the batchlog entry (after writing hints for the batch rows, if necessary). * * @param mutations the Mutations to be applied across the replicas - * @param consistency_level the consistency level for the operation + * @param consistencyLevel the consistency level for the operation * @param requireQuorumForRemove at least a quorum of nodes will see update before deleting batchlog * @param requestTime object holding times when request got enqueued and started execution */ - public static void mutateAtomically(Collection mutations, - ConsistencyLevel consistency_level, + public static void mutateAtomically(List mutations, + ConsistencyLevel consistencyLevel, boolean requireQuorumForRemove, Dispatcher.RequestTime requestTime) throws UnavailableException, OverloadedException, WriteTimeoutException { Tracing.trace("Determining replicas for atomic batch"); long startTime = nanoTime(); - - List wrappers = new ArrayList<>(mutations.size()); + boolean attributeNonAccordLatency = true; + long nonAccordEndTime = -1; if (mutations.stream().anyMatch(mutation -> Keyspace.open(mutation.getKeyspaceName()).getReplicationStrategy().hasTransientReplicas())) throw new AssertionError("Logged batches are unsupported with transient replication"); try { + ConsistencyLevel batchConsistencyLevel = consistencyLevelForBatchLog(consistencyLevel, requireQuorumForRemove); + // This can't be updated for each iteration because cleanup has to go to the correct replicas which is where the batchlog is originally written + ReplicaPlan.ForWrite batchlogReplicaPlan = ReplicaPlans.forBatchlogWrite(ClusterMetadata.current(), batchConsistencyLevel == ConsistencyLevel.ANY); + final TimeUUID batchUUID = nextTimeUUID(); + boolean wroteToBatchLog = false; + while (true) + { + // In case we hit an error in before/during splitting + attributeNonAccordLatency = true; + ClusterMetadata cm = ClusterMetadata.current(); + List wrappers = new ArrayList<>(mutations.size()); + List accordMutations = new ArrayList<>(mutations.size()); + BatchlogCleanup cleanup = new BatchlogCleanup(() -> asyncRemoveFromBatchlog(batchlogReplicaPlan, batchUUID, requestTime)); - // If we are requiring quorum nodes for removal, we upgrade consistency level to QUORUM unless we already - // require ALL, or EACH_QUORUM. This is so that *at least* QUORUM nodes see the update. - ConsistencyLevel batchConsistencyLevel = requireQuorumForRemove - ? ConsistencyLevel.QUORUM - : consistency_level; + // add a handler for each mutation that will not be written on Accord - includes checking availability, but doesn't initiate any writes, yet + SplitConsumer splitConsumer = (accordMutation, normalMutation, originalMutations, mutationIndex) -> { + Mutation eitherMutation = normalMutation != null ? normalMutation : accordMutation; + Keyspace keyspace = Keyspace.open(eitherMutation.getKeyspaceName()); + Token tk = eitherMutation.key().getToken(); - switch (consistency_level) - { - case ALL: - case EACH_QUORUM: - batchConsistencyLevel = consistency_level; - } + if (accordMutation != null) + accordMutations.add(accordMutation); - ReplicaPlan.ForWrite replicaPlan = ReplicaPlans.forBatchlogWrite(batchConsistencyLevel == ConsistencyLevel.ANY); + if (normalMutation == null) + return; - final TimeUUID batchUUID = nextTimeUUID(); - BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), - () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID, requestTime)); + // Always construct the replica plan to check availability + ReplicaPlan.ForWrite dataReplicaPlan = ReplicaPlans.forWrite(cm, keyspace, consistencyLevel, tk, ReplicaPlans.writeNormal); - // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet - for (Mutation mutation : mutations) - { - WriteResponseHandlerWrapper wrapper = wrapBatchResponseHandler(mutation, - consistency_level, - batchConsistencyLevel, - WriteType.BATCH, - cleanup, - requestTime); - // exit early if we can't fulfill the CL at this time. - wrappers.add(wrapper); - } + if (dataReplicaPlan.lookup(FBUtilities.getBroadcastAddressAndPort()) != null) + writeMetrics.localRequests.mark(); + else + writeMetrics.remoteRequests.mark(); + + WriteResponseHandlerWrapper wrapper = wrapBatchResponseHandler(normalMutation, + dataReplicaPlan, + batchConsistencyLevel, + WriteType.BATCH, + cleanup, + requestTime); + wrappers.add(wrapper); + }; + splitMutationsIntoAccordAndNormal(cm, mutations, splitConsumer); + attributeNonAccordLatency = !wrappers.isEmpty(); + cleanup.setMutationsWaitingFor(wrappers.size() + (accordMutations.isEmpty() ? 0 : 1)); + Tracing.trace("Split batch into Accord {} and normal {}", accordMutations, wrappers); + + // If the entire batch can execute on Accord then we can skip the batch log entirely + // Write to the batch log first in case it fails so we don't end up with Accord applying + // part of the batch independently + if (!wrappers.isEmpty() && !wroteToBatchLog) + { + // write to the batchlog, including writes that will be routed to Accord to preserve the behavior + // of the batch log where if part of a batch is visible then eventually the entire batch is visible. + // If the Accord routed mutations depend on the Accord txn succeeding then it is no longer consistent + // with the mutations delivered by the batch log since an unacknowledged Accord txn won't be retried + // unless those mutations are also written to the batch log + // Only write to the log once and reuse the batchUUID for every attempt to route the mutations correctly + doFallibleWriteWithMetricTracking(() -> syncWriteToBatchlog(mutations, batchlogReplicaPlan, batchUUID, requestTime), consistencyLevel); + Tracing.trace("Successfully wrote to batchlog"); + wroteToBatchLog = true; + } - // write to the batchlog - syncWriteToBatchlog(mutations, replicaPlan, batchUUID, requestTime); + // Start Accord executing so it executes while the mutations are synchronously applied + AsyncTxnResult accordResult = !accordMutations.isEmpty() ? mutateWithAccordAsync(cm, accordMutations, consistencyLevel, requestTime) : null; - // now actually perform the writes and wait for them to complete - syncWriteBatchedMutations(wrappers, Stage.MUTATION, requestTime); - } - catch (UnavailableException e) - { - writeMetrics.unavailables.mark(); - writeMetricsForLevel(consistency_level).unavailables.mark(); - Tracing.trace("Unavailable"); - throw e; - } - catch (WriteTimeoutException e) - { - writeMetrics.timeouts.mark(); - writeMetricsForLevel(consistency_level).timeouts.mark(); - Tracing.trace("Write timeout; received {} of {} required replies", e.received, e.blockFor); - throw e; + Throwable failure = null; + try + { + // now actually perform the writes and wait for them to complete + if (!wrappers.isEmpty()) + { + doFallibleWriteWithMetricTracking(() -> syncWriteBatchedMutations(wrappers, Stage.MUTATION, requestTime), consistencyLevel); + Tracing.trace("Successfully wrote normal mutations"); + } + } + catch (RetryOnDifferentSystemException e) + { + writeMetrics.retryDifferentSystem.mark(); + writeMetricsForLevel(consistencyLevel).retryDifferentSystem.mark(); + logger.debug("Retrying batch txn on different system because some mutations were misrouted"); + Tracing.trace("Got {} from normal mutations, will retry", e); + continue; + } + catch (CoordinatorBehindException e) + { + writeMetrics.retryCoordinatorBehind.mark(); + writeMetricsForLevel(consistencyLevel).retryCoordinatorBehind.mark(); + mutations.forEach(IMutation::clearCachedSerializationsForRetry); + logger.debug("Retrying batch now that coordinator has caught up to cluster metadata"); + Tracing.trace("Got {} from normal mutations, will retry", e); + continue; + } + catch (Exception e) + { + failure = Throwables.merge(failure, e); + } + finally + { + // Try to exclude most of the Accord time + nonAccordEndTime = nanoTime(); + } + + // Check if the Accord mutations succeeded asynchronously + try + { + // It's notable here that the Accord portion of the batch will not be hinted + // while the regular mutations are hinted on failure and also going to be replayed later from + // the batch log. It wouldn't be difficult to add hinting here, but it does seem redundant with + // the batch log. + if (accordResult != null) + { + IAccordService accord = AccordService.instance(); + TxnResult.Kind kind = accord.getTxnResult(accordResult, true, consistencyLevel, requestTime).kind(); + if (kind == retry_new_protocol && failure == null) + continue; + Tracing.trace("Successfully wrote Accord mutations"); + cleanup.ackMutation(); + } + } + catch (Exception e) + { + failure = Throwables.merge(failure, e); + } + if (failure != null) + throw unchecked(failure); + break; + } } - catch (WriteFailureException e) + catch (Exception t) { - writeMetrics.failures.mark(); - writeMetricsForLevel(consistency_level).failures.mark(); - Tracing.trace("Write failure; received {} of {} required replies", e.received, e.blockFor); - throw e; + // Unexpected error so it would be helpful to have details + Tracing.trace("{}", getStackTraceAsToString(t)); + throw t; } finally { - long latency = nanoTime() - startTime; - writeMetrics.addNano(latency); - writeMetricsForLevel(consistency_level).addNano(latency); - updateCoordinatorWriteLatencyTableMetric(mutations, latency); + if (attributeNonAccordLatency) + { + // On the exception path nonAccordEndTime will be -1 + long latency = nonAccordEndTime != -1 ? nonAccordEndTime : nanoTime() - startTime; + writeMetrics.addNano(latency); + writeMetricsForLevel(consistencyLevel).addNano(latency); + updateCoordinatorWriteLatencyTableMetric(mutations, latency); + } } } @@ -1501,7 +1623,7 @@ private static void asyncWriteBatchedMutations(List } } - private static void syncWriteBatchedMutations(List wrappers, Stage stage, Dispatcher.RequestTime requestTime) + private static void syncWriteBatchedMutations(Iterable wrappers, Stage stage, Dispatcher.RequestTime requestTime) throws WriteTimeoutException, OverloadedException { String localDataCenter = DatabaseDescriptor.getLocator().local().datacenter; @@ -1559,22 +1681,12 @@ public static AbstractWriteResponseHandler performWrite(IMutation mut // same as performWrites except does not initiate writes (but does perform availability checks). private static WriteResponseHandlerWrapper wrapBatchResponseHandler(Mutation mutation, - ConsistencyLevel consistencyLevel, + ReplicaPlan.ForWrite replicaPlan, ConsistencyLevel batchConsistencyLevel, WriteType writeType, BatchlogResponseHandler.BatchlogCleanup cleanup, Dispatcher.RequestTime requestTime) { - Keyspace keyspace = Keyspace.open(mutation.getKeyspaceName()); - Token tk = mutation.key().getToken(); - - ReplicaPlan.ForWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeNormal); - - if (replicaPlan.lookup(FBUtilities.getBroadcastAddressAndPort()) != null) - writeMetrics.localRequests.mark(); - else - writeMetrics.remoteRequests.mark(); - AbstractReplicationStrategy rs = replicaPlan.replicationStrategy(); AbstractWriteResponseHandler writeHandler = rs.getWriteResponseHandler(replicaPlan, null, writeType, mutation, requestTime); BatchlogResponseHandler batchHandler = new BatchlogResponseHandler<>(writeHandler, batchConsistencyLevel.blockFor(rs), cleanup, requestTime); @@ -1603,13 +1715,17 @@ private static WriteResponseHandlerWrapper wrapViewBatchResponseHandler(Mutation } // used by atomic_batch_mutate to decouple availability check from the write itself, caches consistency level and endpoints. - private static class WriteResponseHandlerWrapper + public static class WriteResponseHandlerWrapper { - final BatchlogResponseHandler handler; - final Mutation mutation; + @Nonnull + public final BatchlogResponseHandler handler; + @Nonnull + public final Mutation mutation; - WriteResponseHandlerWrapper(BatchlogResponseHandler handler, Mutation mutation) + public WriteResponseHandlerWrapper(@Nonnull BatchlogResponseHandler handler, @Nonnull Mutation mutation) { + checkNotNull(handler); + checkNotNull(mutation); this.handler = handler; this.mutation = mutation; } @@ -1844,7 +1960,7 @@ public void runMayThrow() } catch (Exception ex) { - if (!(ex instanceof WriteTimeoutException)) + if (!(ex instanceof WriteTimeoutException) && !(ex instanceof RetryOnDifferentSystemException)) logger.error("Failed to apply mutation locally : ", ex); handler.onFailure(FBUtilities.getBroadcastAddressAndPort(), RequestFailure.forException(ex)); } @@ -2807,17 +2923,18 @@ public final void run() long timeTakenNanos = now - startTimeNanos(); MessagingService.instance().metrics.recordSelfDroppedMessage(Verb.MUTATION_REQ, timeTakenNanos, NANOSECONDS); - if (requestTime.shouldSendHints()) + // Don't submit a hint if this replica is transient + if (localReplica.isTransient()) + return; + + HintRunnable runnable = new HintRunnable(ImmutableSet.of(localReplica.endpoint())) { - HintRunnable runnable = new HintRunnable(EndpointsForToken.of(localReplica.range().right, localReplica)) + protected void runMayThrow() throws Exception { - protected void runMayThrow() throws Exception - { - LocalMutationRunnable.this.runMayThrow(); - } - }; - submitHint(runnable); - } + LocalMutationRunnable.this.runMayThrow(); + } + }; + submitHint(runnable); return; } @@ -2872,9 +2989,9 @@ public static void logRequestException(Exception exception, Collection targets; - protected HintRunnable(EndpointsForToken targets) + protected HintRunnable(Set targets) { this.targets = targets; } @@ -2892,7 +3009,7 @@ public void run() finally { StorageMetrics.totalHintsInProgress.dec(targets.size()); - for (InetAddressAndPort target : targets.endpoints()) + for (InetAddressAndPort target : targets) getHintsInProgressFor(target).decrementAndGet(); } } @@ -2938,25 +3055,43 @@ private static AtomicInteger getHintsInProgressFor(InetAddressAndPort destinatio } } - public static void submitHint(Mutation mutation, Replica target, AbstractWriteResponseHandler responseHandler) + public static void submitHintForRetryOnDifferentSystem(Mutation mutation) + { + submitHint(mutation, ImmutableSet.of(HintsService.RETRY_ON_DIFFERENT_SYSTEM_ADDRESS), null); + } + + public static Future submitHint(Mutation mutation, Replica target, AbstractWriteResponseHandler responseHandler) + { + return submitHint(mutation, EndpointsForToken.of(target.range().right, target), responseHandler); + } + + private static Future submitHint(Mutation mutation, + EndpointsForToken targets, + AbstractWriteResponseHandler responseHandler) { - submitHint(mutation, EndpointsForToken.of(target.range().right, target), responseHandler); + // hints should not be written for transient replicas because there is no point if they didn't contribute + // to quorum, they would eventually be removed anyways after running incremental repair. + // This logic assumes we don't always write to transient replicas to minimize incremental repair mismatches + // so we may want to walk this back when revisiting transient replication + Replicas.assertFull(targets); + return submitHint(mutation, targets.endpoints(), responseHandler); } - private static void submitHint(Mutation mutation, - EndpointsForToken targets, - AbstractWriteResponseHandler responseHandler) + private static Future submitHint(Mutation mutation, + Set targets, + AbstractWriteResponseHandler responseHandler) { - Replicas.assertFull(targets); // hints should not be written for transient replicas HintRunnable runnable = new HintRunnable(targets) { public void runMayThrow() { Set validTargets = new HashSet<>(targets.size()); Set hostIds = new HashSet<>(targets.size()); - for (InetAddressAndPort target : targets.endpoints()) + for (InetAddressAndPort target : targets) { - UUID hostId = StorageService.instance.getHostIdForEndpoint(target); + UUID hostId = target == HintsService.RETRY_ON_DIFFERENT_SYSTEM_ADDRESS ? + HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID : + StorageService.instance.getHostIdForEndpoint(target); if (hostId != null) { hostIds.add(hostId); @@ -2981,14 +3116,14 @@ public void runMayThrow() } }; - submitHint(runnable); + return submitHint(runnable); } private static Future submitHint(HintRunnable runnable) { StorageMetrics.totalHintsInProgress.inc(runnable.targets.size()); - for (Replica target : runnable.targets) - getHintsInProgressFor(target.endpoint()).incrementAndGet(); + for (InetAddressAndPort target : runnable.targets) + getHintsInProgressFor(target).incrementAndGet(); return (Future) Stage.MUTATION.submit(runnable); } @@ -3181,9 +3316,9 @@ public static class ConsensusAttemptResult RowIterator casResult; @Nonnull - PartitionIterator serialReadResult; + public final PartitionIterator serialReadResult; - boolean shouldRetryOnNewConsensusProtocol; + public final boolean shouldRetryOnNewConsensusProtocol; private ConsensusAttemptResult(@Nullable RowIterator casResult, @Nullable PartitionIterator serialReadResult, boolean shouldRetryOnNewConsensusProtocol) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 27e838cbcf08..1e84338f5fe3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -36,6 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.coordinate.Timeout; import accord.local.Command; import accord.local.Node; import accord.messages.AbstractEpochRequest; @@ -660,7 +661,26 @@ public void run() if (l.isEmpty()) waitForEpochs.pushLong(waitForEpoch); l.add(context); - node.withEpoch(waitForEpoch, this::runOnce); + BiConsumer withEpochCallback = new BiConsumer<>() + { + @Override + public void accept(Void unused, Throwable withEpochFailure) + { + if (withEpochFailure != null) + { + // Nothing to do but keep waiting + if (withEpochFailure instanceof Timeout) + { + node.withEpoch(waitForEpoch, this); + return; + } + else + throw new RuntimeException(withEpochFailure); + } + runOnce(); + } + }; + node.withEpoch(waitForEpoch, withEpochCallback); } // Next, process all delayed epochs diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 8574b5db2726..b3e03cb8c854 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -27,49 +27,37 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.BiFunction; -import java.util.function.LongSupplier; +import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.Nonnull; - +import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Ints; - -import accord.coordinate.Barrier; -import accord.coordinate.CoordinateSyncPoint; -import accord.coordinate.Exhausted; -import accord.coordinate.FailureAccumulator; -import accord.coordinate.TopologyMismatch; -import accord.impl.CoordinateDurabilityScheduling; -import accord.local.CommandStores; -import accord.primitives.SyncPoint; -import org.apache.cassandra.config.CassandraRelevantProperties; -import org.apache.cassandra.cql3.statements.RequestValidations; -import org.apache.cassandra.exceptions.RequestExecutionException; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; -import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.service.accord.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.BarrierType; import accord.api.Result; import accord.config.LocalConfig; +import accord.coordinate.Barrier; +import accord.coordinate.CoordinateSyncPoint; import accord.coordinate.CoordinationFailed; +import accord.coordinate.Exhausted; +import accord.coordinate.FailureAccumulator; import accord.coordinate.Preempted; import accord.coordinate.Timeout; +import accord.coordinate.TopologyMismatch; import accord.impl.AbstractConfigurationService; +import accord.impl.CoordinateDurabilityScheduling; import accord.impl.SimpleProgressLog; import accord.impl.SizeOfIntersectionSorter; +import accord.local.CommandStores; import accord.local.DurableBefore; import accord.local.Node; import accord.local.Node.Id; @@ -78,7 +66,11 @@ import accord.local.ShardDistributor.EvenSplit; import accord.messages.LocalRequest; import accord.messages.Request; +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Seekable; import accord.primitives.Seekables; +import accord.primitives.SyncPoint; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; @@ -91,25 +83,46 @@ import accord.utils.async.AsyncResult; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.concurrent.Shutdownable; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; +import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; +import org.apache.cassandra.service.accord.api.AccordScheduler; +import org.apache.cassandra.service.accord.api.AccordTopologySorter; +import org.apache.cassandra.service.accord.api.CompositeTopologySorter; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; +import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; +import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; import org.apache.cassandra.service.accord.txn.TxnResult; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.Blocking; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; @@ -120,7 +133,10 @@ import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static accord.messages.SimpleReply.Ok; +import static accord.primitives.Routable.Domain.Key; +import static accord.primitives.Routable.Domain.Range; import static accord.utils.Invariants.checkState; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; @@ -161,15 +177,21 @@ public IVerbHandler verbHandler() } @Override - public long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + public Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + { + throw new UnsupportedOperationException(); + } + + @Override + public Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) { throw new UnsupportedOperationException("No accord barriers should be executed when accord.enabled = false in cassandra.yaml"); } @Override - public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("No accord repairs should be executed when accord.enabled = false in cassandra.yaml"); } @Override @@ -179,9 +201,15 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp } @Override - public long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + public @Nonnull AsyncTxnResult coordinateAsync(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { - throw new UnsupportedOperationException("No accord repairs should be executed when accord.enabled = false in cassandra.yaml"); + throw new UnsupportedOperationException("No accord transaction should be executed when accord.enabled = false in cassandra.yaml"); + } + + @Override + public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + throw new UnsupportedOperationException("No accord transaction should be executed when accord.enabled = false in cassandra.yaml"); } @Override @@ -328,7 +356,7 @@ private AccordService(Id localId) this::handleLocalRequest, configService, AccordService::uniqueNow, - NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, AccordService::uniqueNow), + NodeTimeService.elapsedWrapperFromMonotonicSource(NANOSECONDS, Clock.Global::nanoTime), () -> dataStore, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), agent, @@ -369,8 +397,17 @@ public IVerbHandler verbHandler() return requestHandler; } - private > long barrier(@Nonnull S keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction>> syncPoint) + private > Seekables barrier(@Nonnull S keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction>> syncPoint) { + Stopwatch sw = Stopwatch.createStarted(); + keysOrRanges = (S)intersectionWithAccordManagedRanges(keysOrRanges); + // It's possible none of them were Accord managed and we aren't going to treat that as an error + if (keysOrRanges.isEmpty()) + { + logger.info("Skipping barrier because there are no ranges managed by Accord"); + return keysOrRanges; + } + AccordClientRequestMetrics metrics = isForWrite ? accordWriteMetrics : accordReadMetrics; TxnId txnId = null; try @@ -382,14 +419,15 @@ public IVerbHandler verbHandler() : Barrier.barrier(node, keysOrRanges, epoch, barrierType, syncPoint); long deadlineNanos = requestTime.startedAtNanos() + timeoutNanos; Timestamp barrierExecuteAt = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); - logger.debug("Completed in {}ms barrier key: {} epoch: {} barrierType: {} isForWrite {}", + logger.debug("Completed barrier attempt in {}ms, {}ms since attempts start, barrier key: {} epoch: {} barrierType: {} isForWrite {}", + sw.elapsed(MILLISECONDS), NANOSECONDS.toMillis(nanoTime() - requestTime.startedAtNanos()), keysOrRanges, epoch, barrierType, isForWrite); - return barrierExecuteAt.epoch(); + return keysOrRanges; } catch (ExecutionException e) { - Throwable cause = e.getCause(); + Throwable cause = Throwables.getRootCause(e); if (cause instanceof Timeout) { metrics.timeouts.mark(); @@ -431,7 +469,7 @@ public IVerbHandler verbHandler() } @Override - public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + public Seekables barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) { return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, null); } @@ -442,12 +480,59 @@ public long barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.Requ } @Override - public long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) { Set allNodes = allEndpoints.stream().map(configService::mappedId).collect(Collectors.toUnmodifiableSet()); return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, repairSyncPoint(allNodes)); } + private static > Seekables intersectionWithAccordManagedRanges(Seekables keysOrRanges) + { + TableId tableId = null; + for (Seekable seekable : keysOrRanges) + { + TableId newTableId; + if (keysOrRanges.domain() == Key) + newTableId = ((PartitionKey)seekable).table(); + else if (keysOrRanges.domain() == Range) + newTableId = ((TokenRange)seekable).table(); + else + throw new IllegalStateException("Unexpected domain " + keysOrRanges.domain()); + + if (tableId == null) + tableId = newTableId; + else if (!tableId.equals(newTableId)) + throw new IllegalArgumentException("Currently only one table is handled here."); + } + + ClusterMetadata cm = ClusterMetadata.current(); + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); + TableMetadata tm = cfs.metadata(); + + // Barriers can be needed just because it's an Accord managed range, but it could also be a migration back to Paxos + // in which case we do want to barrier the migrating/migrated ranges even though the target for the migration is not Accord + // In either case Accord should be aware of those ranges and not generate a topology mismatch + if (tm.params.transactionalMode != TransactionalMode.off || tm.params.transactionalMigrationFrom.from != TransactionalMode.off) + { + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tm.id); + // null is fine could be completely migrated or was always an Accord table on creation + if (tms == null) + return keysOrRanges; + Ranges migratingAndMigratedRanges = AccordTopology.toAccordRanges(tms.tableId, tms.migratingAndMigratedRanges); + return keysOrRanges.slice(migratingAndMigratedRanges); + } + + switch (keysOrRanges.domain()) + { + case Key: + return Keys.EMPTY; + case Range: + return Ranges.EMPTY; + default: + throw new IllegalStateException("Only keys and ranges are supported"); + } + } + @VisibleForTesting static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) { @@ -474,21 +559,27 @@ static boolean isTimeout(Throwable t) } @VisibleForTesting - static long doWithRetries(Blocking blocking, LongSupplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException + static Seekables doWithRetries(Blocking blocking, Supplier action, int retryAttempts, long initialBackoffMillis, long maxBackoffMillis) throws InterruptedException { // Since we could end up having the barrier transaction or the transaction it listens to invalidated Throwable existingFailures = null; - Long success = null; + Seekables success = null; long backoffMillis = initialBackoffMillis; for (int attempt = 0; attempt < retryAttempts; attempt++) { try { - success = action.getAsLong(); + success = action.get(); break; } + catch (TopologyMismatch topologyMismatch) + { + // Retry topology mismatch immediately because we should be able calculate the correct ranges immediately + backoffMillis = 0; + } catch (RequestExecutionException | CoordinationFailed newFailures) { + logger.error("Had failure on barrier", newFailures); existingFailures = FailureAccumulator.append(existingFailures, newFailures, AccordService::isTimeout); try @@ -514,6 +605,7 @@ static long doWithRetries(Blocking blocking, LongSupplier action, int retryAttem } if (success == null) { + logger.error("Ran out of retries for barrier"); checkState(existingFailures != null, "Didn't have success, but also didn't have failures"); Throwables.throwIfUnchecked(existingFailures); throw new RuntimeException(existingFailures); @@ -522,7 +614,7 @@ static long doWithRetries(Blocking blocking, LongSupplier action, int retryAttem } @Override - public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + public Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException { return doWithRetries(Blocking.Default.instance, () -> AccordService.instance().barrier(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite), DatabaseDescriptor.getAccordBarrierRetryAttempts(), @@ -531,7 +623,7 @@ public long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierTyp } @Override - public long repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + public Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException { return doWithRetries(Blocking.Default.instance, () -> AccordService.instance().repair(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite, allEndpoints), DatabaseDescriptor.getAccordBarrierRetryAttempts(), @@ -558,38 +650,86 @@ public TopologyManager topology() @Override public @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { + AsyncTxnResult asyncTxnResult = coordinateAsync(txn, consistencyLevel, requestTime); + return getTxnResult(asyncTxnResult, txn.isWrite(), consistencyLevel, requestTime); + } + + @Override + public @Nonnull AsyncTxnResult coordinateAsync(Txn txn, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + TxnId txnId = node.nextTxnId(txn.kind(), txn.keys().domain()); AccordClientRequestMetrics metrics = txn.isWrite() ? accordWriteMetrics : accordReadMetrics; - TxnId txnId = null; - try - { - metrics.keySize.update(txn.keys().size()); - txnId = node.nextTxnId(txn.kind(), txn.keys().domain()); - long deadlineNanos = requestTime.startedAtNanos() + DatabaseDescriptor.getTransactionTimeout(NANOSECONDS); - AsyncResult asyncResult = node.coordinate(txnId, txn); - Result result = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); - return (TxnResult) result; - } - catch (ExecutionException e) - { - Throwable cause = e.getCause(); + metrics.keySize.update(txn.keys().size()); + AsyncResult asyncResult = node.coordinate(txnId, txn); + AsyncTxnResult asyncTxnResult = new AsyncTxnResult(txnId); + asyncResult.addCallback((success, failure) -> { + long durationNanos = nanoTime() - requestTime.startedAtNanos(); + metrics.addNano(durationNanos); + Throwable cause = failure != null ? Throwables.getRootCause(failure) : null; + if (success != null) + { + if (((TxnResult)success).kind() == TxnResult.Kind.retry_new_protocol) + { + metrics.retryDifferentSystem.mark(); + Tracing.trace("Got retry different system error from Accord, will retry"); + } + asyncTxnResult.trySuccess((TxnResult)success); + return; + } + if (cause instanceof Timeout) { - metrics.timeouts.mark(); - throw newTimeout(txnId, txn, consistencyLevel); + // Don't mark the metric here, should be done in getTxnResult to ensure it only happens once + // since both Accord and the thread blocked on the result can trigger a timeout + asyncTxnResult.tryFailure(newTimeout(txnId, txn.isWrite(), consistencyLevel)); + return; } if (cause instanceof Preempted) { + metrics.preempted.mark(); //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match - throw newPreempted(txnId, txn, consistencyLevel); + asyncTxnResult.tryFailure(newPreempted(txnId, txn.isWrite(), consistencyLevel)); + return; } if (cause instanceof TopologyMismatch) { - throw RequestValidations.invalidRequest(cause.getMessage()); + metrics.topologyMismatches.mark(); + asyncTxnResult.tryFailure(RequestValidations.invalidRequest(cause.getMessage())); + return; } metrics.failures.mark(); - throw new RuntimeException(cause); + asyncTxnResult.tryFailure(new RuntimeException(cause)); + }); + return asyncTxnResult; + } + + @Override + public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + AccordClientRequestMetrics metrics = isWrite ? accordWriteMetrics : accordReadMetrics; + try + { + long deadlineNanos = requestTime.startedAtNanos() + DatabaseDescriptor.getTransactionTimeout(NANOSECONDS); + TxnResult result = asyncTxnResult.get(deadlineNanos - nanoTime(), NANOSECONDS); + return result; + } + catch (ExecutionException e) + { + // Metrics except timeout have already been handled + Throwable cause = e.getCause(); + if (cause instanceof RequestTimeoutException) + { + // Mark here instead of in coordinate async since this is where the request timeout actually occurs + metrics.timeouts.mark(); + cause.addSuppressed(e); + throw (RequestTimeoutException) cause; + } + else if (cause instanceof RuntimeException) + throw (RuntimeException)cause; + else + throw new RuntimeException(cause); } catch (InterruptedException e) { @@ -599,11 +739,7 @@ public TopologyManager topology() catch (TimeoutException e) { metrics.timeouts.mark(); - throw newTimeout(txnId, txn, consistencyLevel); - } - finally - { - metrics.addNano(nanoTime() - requestTime.startedAtNanos()); + throw newTimeout(asyncTxnResult.txnId, isWrite, consistencyLevel); } } @@ -614,15 +750,20 @@ private void handleLocalRequest(LocalRequest request, BiConsumer message) throws IOException if (node.topology().hasEpoch(waitForEpoch)) request.process(node, fromNodeId, message); else - node.withEpoch(waitForEpoch, () -> request.process(node, fromNodeId, message)); + node.withEpoch(waitForEpoch, (ignored, withEpochFailure) -> { + if (withEpochFailure != null) + throw new RuntimeException("Timed out waiting for epoch when processing message from " + fromNodeId + " to " + node + " message " + message, withEpochFailure); + request.process(node, fromNodeId, message); + }); } } diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index b804e80fecca..4a8c0be6bb42 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -18,6 +18,15 @@ package org.apache.cassandra.service.accord; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableSet; + import accord.api.BarrierType; import accord.local.CommandStores; import accord.local.DurableBefore; @@ -27,8 +36,8 @@ import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.Txn; +import accord.primitives.TxnId; import accord.topology.TopologyManager; -import com.google.common.collect.ImmutableSet; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; @@ -37,19 +46,14 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; -import javax.annotation.Nonnull; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; +import static com.google.common.base.Preconditions.checkNotNull; public interface IAccordService { @@ -58,25 +62,21 @@ public interface IAccordService IVerbHandler verbHandler(); - long barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; + Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; - long barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); + Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); - default long repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + default Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException { throw new UnsupportedOperationException(); } - long repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints); + Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints); default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List> ranges) { String ks = cfs.keyspace.getName(); - Ranges accordRanges = Ranges.of(ranges - .stream() - .map(r -> new TokenRange(new TokenKey(cfs.getTableId(), r.left), new TokenKey(cfs.getTableId(), r.right))) - .collect(Collectors.toList()) - .toArray(new accord.primitives.Range[0])); + Ranges accordRanges = AccordTopology.toAccordRanges(ks, ranges); try { barrierWithRetries(accordRanges, Epoch.FIRST.getEpoch(), BarrierType.global_async, true); @@ -89,6 +89,21 @@ default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List @Nonnull TxnResult coordinate(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime); + class AsyncTxnResult extends AsyncPromise + { + public final @Nonnull TxnId txnId; + + public AsyncTxnResult(@Nonnull TxnId txnId) + { + checkNotNull(txnId); + this.txnId = txnId; + } + } + + @Nonnull + AsyncTxnResult coordinateAsync(@Nonnull Txn txn, @Nonnull ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime); + TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime); + long currentEpoch(); void setCacheSize(long kb); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 899a00c4bf45..7caeead9b1fc 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -132,10 +132,14 @@ public int cfkPruneInterval() return 32; } + /** + * Create an empty transaction that Accord can use for its internal transactions. This is not suitable + * for tests since it skips validation done by regular transactions. + */ @Override - public Txn emptyTxn(Kind kind, Seekables seekables) + public Txn emptySystemTxn(Kind kind, Seekables seekables) { - return new Txn.InMemory(kind, seekables, TxnRead.EMPTY, TxnQuery.EMPTY, null); + return new Txn.InMemory(kind, seekables, TxnRead.EMPTY, TxnQuery.UNSAFE_EMPTY, null); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index bafb96b4db22..5cff4f818466 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -28,9 +28,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; -import accord.messages.ReadTxnData; -import accord.primitives.Ballot; -import org.apache.cassandra.schema.TableId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +40,8 @@ import accord.local.Node.Id; import accord.messages.Commit; import accord.messages.Commit.Kind; +import accord.messages.ReadTxnData; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.Participants; @@ -74,6 +73,7 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.accord.AccordEndpointMapper; import org.apache.cassandra.service.accord.TokenRange; @@ -239,9 +239,9 @@ public void sendReadCommand(Message message, InetAddressAndPort to, @Override public void sendReadRepairMutation(Message message, InetAddressAndPort to, RequestCallback callback) { + checkArgument(message.payload.allowsPotentialTransactionConflicts()); Node.Id id = endpointMapper.mappedId(to); - Mutation mutation = message.payload; - AccordInteropReadRepair readRepair = new AccordInteropReadRepair(id, executes, txnId, readScope, executeAt.epoch(), mutation); + AccordInteropReadRepair readRepair = new AccordInteropReadRepair(id, executes, txnId, readScope, executeAt.epoch(), message.payload); node.send(id, readRepair, executor, new AccordInteropReadRepair.ReadRepairCallback(id, to, message, callback, this)); } @@ -260,8 +260,8 @@ private AsyncChain readChains() // This should only rarely occur when coordinators start a transaction in a migrating range // because they haven't yet updated their cluster metadata. - // It would be harmless to do the read, but we can respond faster skipping it - // and getting the transaction on the correct protocol + // It would be harmless to do the read, because it will be rejected in `TxnQuery` anyways, + // but it's faster to skip the read TableMigrationState tms = ConsensusTableMigration.getTableMigrationState(command.metadata().id); AccordClientRequestMetrics metrics = txn.kind().isWrite() ? accordWriteMetrics : accordReadMetrics; if (ConsensusRequestRouter.instance.isKeyInMigratingOrMigratedRangeFromAccord(command.metadata(), tms, command.partitionKey())) @@ -376,13 +376,7 @@ public ReadCommand maybeAllowOutOfRangeReads(ReadCommand readCommand) return readCommand.allowOutOfRangeReads(); } - @Override - public Mutation maybeAllowOutOfRangeMutations(Mutation m) - { - return m.allowOutOfRangeMutations(); - } - - // Prrovide request callbacks with a way to send maximal commits on Insufficient responses + // Provide request callbacks with a way to send maximal commits on Insufficient responses @Override public void sendMaximalCommit(Id to) { diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java index 36dcedd09e23..89b6577b83bc 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadCallback.java @@ -20,12 +20,14 @@ import javax.annotation.Nonnull; +import accord.coordinate.Timeout; import accord.local.Node; import accord.messages.Callback; import accord.messages.ReadData.ReadOk; import accord.messages.ReadData.ReadReply; import accord.utils.Invariants; import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.RequestCallback; @@ -78,7 +80,14 @@ else if (reply == Insufficient) public void onFailure(Node.Id from, Throwable failure) { - wrapped.onFailure(endpoint, RequestFailure.UNKNOWN); + RequestFailure requestFailure; + // Convert from Accord's timeout exception to our failure reason because timeout is something + // that is useful for metrics and can be handled differently + if (failure instanceof Timeout) + requestFailure = new RequestFailure(RequestFailureReason.TIMEOUT, failure); + else + requestFailure = new RequestFailure(RequestFailureReason.UNKNOWN, failure); + wrapped.onFailure(endpoint, requestFailure); } public void onCallbackFailure(Node.Id from, Throwable failure) diff --git a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java index 924662b38da3..18fb045475df 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java +++ b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.repair; import java.math.BigInteger; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.Executor; @@ -89,20 +90,21 @@ public Epoch minEpoch() return minEpoch; } - public void repair() throws Throwable + public Ranges repair() throws Throwable { + List repairedRanges = new ArrayList<>(); for (accord.primitives.Range range : ranges) - repairRange((TokenRange)range); + repairedRanges.addAll(repairRange((TokenRange)range)); + return Ranges.of(repairedRanges.toArray(new accord.primitives.Range[0])); } - public Future repair(Executor executor) + public Future repair(Executor executor) { - AsyncPromise future = new AsyncPromise<>(); + AsyncPromise future = new AsyncPromise<>(); executor.execute(() -> { try { - repair(); - future.trySuccess(null); + future.trySuccess(repair()); } catch (Throwable e) { @@ -117,8 +119,9 @@ protected void abort(@Nullable Throwable reason) shouldAbort = reason == null ? new RuntimeException("Abort") : reason; } - private void repairRange(TokenRange range) throws Throwable + private List repairRange(TokenRange range) throws Throwable { + List repairedRanges = new ArrayList<>(); int rangeStepUpdateInterval = ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL.getInt(); RoutingKey remainingStart = range.start(); BigInteger rangeSize = splitter.sizeOf(range); @@ -154,7 +157,7 @@ private void repairRange(TokenRange range) throws Throwable if (remainingStart.equals(range.end())) { logger.info("Completed barriers for {} in {} iterations", range, iteration - 1); - return; + return repairedRanges; } // Final repair is whatever remains @@ -169,14 +172,13 @@ private void repairRange(TokenRange range) throws Throwable checkState(lastRepaired == null || toRepair.start().equals(lastRepaired.end()), "Next range should directly follow previous range"); lastRepaired = toRepair; + Ranges barrieredRanges; if (requireAllEndpoints) - { - AccordService.instance().repairWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false, endpoints); - } + barrieredRanges = (Ranges)AccordService.instance().repairWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false, endpoints); else - { - AccordService.instance().barrierWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false); - } + barrieredRanges = (Ranges)AccordService.instance().barrierWithRetries(Seekables.of(toRepair), minEpoch.getEpoch(), BarrierType.global_sync, false); + for (accord.primitives.Range barrieredRange : barrieredRanges) + repairedRanges.add(barrieredRange); remainingStart = toRepair.end(); } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index defa96b554d8..7051e328c34d 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -50,6 +50,11 @@ public abstract class TxnQuery implements Query { + /** + * Used by transaction statements which will have Accord pass back to the C* coordinator code all the data that is + * read even if it is not returned as part of the result to the client. TxnDataName.returning() will fetch the data + * that is returned from TxnData. + */ public static final TxnQuery ALL = new TxnQuery() { @Override @@ -65,6 +70,10 @@ public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, } }; + /** + * For transactions that return no results but do still care that they don't apply if the tokens/ranges + * are not owned/managed by Accord + */ public static final TxnQuery NONE = new TxnQuery() { @Override @@ -80,6 +89,9 @@ public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, } }; + /** + * For supporting CQL CAS compatible transactions + */ public static final TxnQuery CONDITION = new TxnQuery() { @Override @@ -114,7 +126,13 @@ else if (txnData.isEmpty()) } }; - public static final TxnQuery EMPTY = new TxnQuery() + /** + * UNSAFE_EMPTY doesn't validate that the range is owned by Accord so you want to be careful and use NONE + * if your transaction simply doesn't have results because that will validate that Accord owns the range + * for things like blind writes. Empty is used by Accord for things like sync points which may need to exeucte + * for ranges Accord used to manage, but no longer does. + */ + public static final TxnQuery UNSAFE_EMPTY = new TxnQuery() { @Override @@ -171,7 +189,7 @@ public long estimatedSizeOnHeap() @Override public void serialize(TxnQuery query, DataOutputPlus out, int version) throws IOException { - Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == EMPTY); + Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == UNSAFE_EMPTY); out.writeByte(query == null ? 0 : query.type()); } @@ -185,14 +203,14 @@ public TxnQuery deserialize(DataInputPlus in, int version) throws IOException case 1: return ALL; case 2: return NONE; case 3: return CONDITION; - case 4: return EMPTY; + case 4: return UNSAFE_EMPTY; } } @Override public long serializedSize(TxnQuery query, int version) { - Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == EMPTY); + Preconditions.checkArgument(query == null | query == ALL | query == NONE | query == CONDITION | query == UNSAFE_EMPTY); return TypeSizes.sizeof((byte)2); } }; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index 563af8167564..674cf5cb23ad 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -69,7 +69,8 @@ public class TxnUpdate extends AccordUpdate { - private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null, null)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null, null, false)); + private static final int FLAG_PRESERVE_TIMESTAMPS = 0x1; private final Keys keys; private final ByteBuffer[] fragments; @@ -78,10 +79,15 @@ public class TxnUpdate extends AccordUpdate @Nullable private final ConsistencyLevel cassandraCommitCL; + // Hints and batchlog want to write with the lower timestamp they generated when applying their writes via Accord + // so they don't resurrect data if they are applied at a later time. Accord should be fine with this because + // the writes are still deterministic from the perspective of coordinators/recovery coordinators. + private final boolean preserveTimestamps; + // Memoize computation of condition private Boolean conditionResult; - public TxnUpdate(List fragments, TxnCondition condition, @Nullable ConsistencyLevel cassandraCommitCL) + public TxnUpdate(List fragments, TxnCondition condition, @Nullable ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) { checkArgument(cassandraCommitCL == null || IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(cassandraCommitCL)); // TODO: Figure out a way to shove keys into TxnCondition, and have it implement slice/merge. @@ -90,14 +96,16 @@ public TxnUpdate(List fragments, TxnCondition condition, @Nul this.fragments = toSerializedValuesArray(keys, fragments, fragment -> fragment.key, TxnWrite.Fragment.serializer); this.condition = serialize(condition, TxnCondition.serializer); this.cassandraCommitCL = cassandraCommitCL; + this.preserveTimestamps = preserveTimestamps; } - private TxnUpdate(Keys keys, ByteBuffer[] fragments, ByteBuffer condition, ConsistencyLevel cassandraCommitCL) + private TxnUpdate(Keys keys, ByteBuffer[] fragments, ByteBuffer condition, ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) { this.keys = keys; this.fragments = fragments; this.condition = condition; this.cassandraCommitCL = cassandraCommitCL; + this.preserveTimestamps = preserveTimestamps; } @Override @@ -141,12 +149,19 @@ public Keys keys() return keys; } + // Batch log and hints want to keep their lower timestamp for the applied writes to avoid resurrecting old data + // when they are applied later, possibly after further updates have already been acknowledged. + public boolean preserveTimestamps() + { + return preserveTimestamps; + } + @Override public Update slice(Ranges ranges) { Keys keys = this.keys.slice(ranges); // TODO: Slice the condition. - return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL); + return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL, preserveTimestamps); } @Override @@ -154,7 +169,7 @@ public Update intersecting(Participants participants) { Keys keys = this.keys.intersecting(participants); // TODO: Slice the condition. - return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL); + return new TxnUpdate(keys, select(this.keys, keys, fragments), condition, cassandraCommitCL, preserveTimestamps); } private static ByteBuffer[] select(Keys in, Keys out, ByteBuffer[] from) @@ -176,7 +191,7 @@ public Update merge(Update update) TxnUpdate that = (TxnUpdate) update; Keys mergedKeys = this.keys.with(that.keys); ByteBuffer[] mergedFragments = merge(this.keys, that.keys, this.fragments, that.fragments, mergedKeys.size()); - return new TxnUpdate(mergedKeys, mergedFragments, condition, cassandraCommitCL); + return new TxnUpdate(mergedKeys, mergedFragments, condition, cassandraCommitCL, preserveTimestamps); } private static ByteBuffer[] merge(Keys leftKeys, Keys rightKeys, ByteBuffer[] left, ByteBuffer[] right, int outputSize) @@ -207,7 +222,6 @@ public TxnWrite apply(Timestamp executeAt, Data data) QueryOptions options = QueryOptions.forProtocolVersion(ProtocolVersion.CURRENT); AccordUpdateParameters parameters = new AccordUpdateParameters((TxnData) data, options); - // First completes all fragments and join them with the repairs pending for those partitions for (TxnWrite.Fragment fragment : fragments) // Filter out fragments that already constitute complete updates to avoid persisting them via TxnWrite: if (!fragment.isComplete()) @@ -233,6 +247,7 @@ public List completeUpdatesForKey(RoutableKey key) @Override public void serialize(TxnUpdate update, DataOutputPlus out, int version) throws IOException { + out.writeByte(update.preserveTimestamps ? FLAG_PRESERVE_TIMESTAMPS : 0); KeySerializers.keys.serialize(update.keys, out, version); writeWithVIntLength(update.condition, out); serializeArray(update.fragments, out, version, ByteBufferUtil.byteBufferSerializer); @@ -242,17 +257,20 @@ public void serialize(TxnUpdate update, DataOutputPlus out, int version) throws @Override public TxnUpdate deserialize(DataInputPlus in, int version) throws IOException { + int flags = in.readByte(); + boolean preserveTimestamps = (FLAG_PRESERVE_TIMESTAMPS & flags) == 1; Keys keys = KeySerializers.keys.deserialize(in, version); ByteBuffer condition = readWithVIntLength(in); ByteBuffer[] fragments = deserializeArray(in, version, ByteBufferUtil.byteBufferSerializer, ByteBuffer[]::new); ConsistencyLevel consistencyLevel = deserializeNullable(in, version, consistencyLevelSerializer); - return new TxnUpdate(keys, fragments, condition, consistencyLevel); + return new TxnUpdate(keys, fragments, condition, consistencyLevel, preserveTimestamps); } @Override public long serializedSize(TxnUpdate update, int version) { - long size = KeySerializers.keys.serializedSize(update.keys, version); + long size = 1; // flags + size += KeySerializers.keys.serializedSize(update.keys, version); size += serializedSizeWithVIntLength(update.condition); size += serializedArraySize(update.fragments, version, ByteBufferUtil.byteBufferSerializer); size += serializedNullableSize(update.cassandraCommitCL, version, consistencyLevelSerializer); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 5e3e9ff831ef..7d6e67ad8419 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -38,8 +38,8 @@ import accord.api.Key; import accord.api.Write; import accord.impl.AbstractSafeCommandStore; -import accord.impl.TimestampsForKeys; import accord.impl.TimestampsForKey; +import accord.impl.TimestampsForKeys; import accord.local.SafeCommandStore; import accord.primitives.PartialTxn; import accord.primitives.RoutableKey; @@ -138,10 +138,12 @@ public String toString() '}'; } - public AsyncChain write(@Nonnull Function cellToMaybeNewListPath, long timestamp, int nowInSeconds) + public AsyncChain write(boolean preserveTimestamps, @Nonnull Function cellToMaybeNewListPath, long timestamp, int nowInSeconds) { - PartitionUpdate update = new PartitionUpdate.Builder(get(), 0).updateTimesAndPathsForAccord(cellToMaybeNewListPath, timestamp, nowInSeconds).build(); - Mutation mutation = new Mutation(update); + PartitionUpdate update = get(); + if (!preserveTimestamps) + update = new PartitionUpdate.Builder(get(), 0).updateTimesAndPathsForAccord(cellToMaybeNewListPath, timestamp, nowInSeconds).build(); + Mutation mutation = new Mutation(update, true); return AsyncChains.ofRunnable(Stage.MUTATION.executor(), mutation::apply); } @@ -383,10 +385,11 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam List> results = new ArrayList<>(); + boolean preserveTimestamps = ((TxnUpdate)txn.update()).preserveTimestamps(); // Apply updates not specified fully by the client but built from fragments completed by data from reads. // This occurs, for example, when an UPDATE statement uses a value assigned by a LET statement. Function accordListPathSuppler = accordListPathSupplier(timestamp); - forEachWithKey((PartitionKey) key, write -> results.add(write.write(accordListPathSuppler, timestamp, nowInSeconds))); + forEachWithKey((PartitionKey) key, write -> results.add(write.write(preserveTimestamps, accordListPathSuppler, timestamp, nowInSeconds))); if (isConditionMet) { @@ -396,7 +399,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, Timestam TxnUpdate txnUpdate = (TxnUpdate) txn.update(); assert txnUpdate != null : "PartialTxn should contain an update if we're applying a write!"; List updates = txnUpdate.completeUpdatesForKey((RoutableKey) key); - updates.forEach(update -> results.add(update.write(accordListPathSuppler, timestamp, nowInSeconds))); + updates.forEach(update -> results.add(update.write(preserveTimestamps, accordListPathSuppler, timestamp, nowInSeconds))); } if (results.isEmpty()) diff --git a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java index 0efcbf1ffdad..25bbc0e6f917 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java @@ -47,7 +47,7 @@ /** * This update is used to support blocking read repair from non-transactional Cassandra reads. Cassandra creates - * a read repair mutation per node and this enables some partitiosn to be readable that would otherwise run into messages + * a read repair mutation per node and this enables some partitions to be readable that would otherwise run into messages * size limits. * * This update is used during the `Execute` phase to apply the repair mutations directly in AccordInteropExecution similar @@ -58,7 +58,8 @@ * The state for this update is always kept in memory and is never serialized. Only the Id is propagated so the cache * can evict the update and then load it back. We don't need to persist it or have it be recoverable because if the original * coordinator fails to complete the transaction then the dependent Cassandra read that triggered the read repair will - * also fail and it doesn't matter if the read repair is partially applied or not applied at all. + * also fail and it doesn't matter if the read repair is partially applied or not applied at all since it doesn't propose + * new values. */ public class UnrecoverableRepairUpdate, P extends ReplicaPlan.ForRead> extends AccordUpdate { @@ -117,6 +118,7 @@ private UnrecoverableRepairUpdate(Node.Id nodeId, BlockingReadRepair paren this.keys = keys; this.dk = dk; this.mutations = mutations; + mutations.values().forEach(Mutation::allowPotentialTransactionConflicts); this.writePlan = writePlan; this.updateKey = new Key(nodeId.id, nextCounter.getAndIncrement()); } diff --git a/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java index 2bcaee3ce213..25524404ec79 100644 --- a/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java +++ b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java @@ -113,6 +113,10 @@ public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) return consistencyLevel; } + // TODO (required): This won't work for migration directly from none to full because there is no safe system to read from + // during the first phase (repair). Accord won't read correctly beacuse it won't honor the CL and miss non-transactional writes that haven't been repaired and non-transactional + // reads will miss all the writes being routed through Accord since they occur asynchronously. Something has to give here where either writes routed through are Accord are synchronous at CL + // or reads are routed through Accord and read at quorum as long as the range has not completed the first phase (repair). public ConsistencyLevel readCLForStrategy(ConsistencyLevel consistencyLevel) { if (ignoresSuppliedConsistencyLevel) diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java index 9e051ccc5e89..816e24257548 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusKeyMigrationState.java @@ -46,6 +46,7 @@ import org.apache.cassandra.db.WriteType; import org.apache.cassandra.dht.Range; import org.apache.cassandra.exceptions.CasWriteTimeoutException; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -173,7 +174,7 @@ public void maybePerformAccordToPaxosKeyMigration(boolean isForWrite) // TODO (desired): Better query start time TableMigrationState tms = tableMigrationState; - repairKeyAccord(key, tms.keyspaceName, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), Dispatcher.RequestTime.forImmediateExecution(), false, isForWrite); + repairKeyAccord(key, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), Dispatcher.RequestTime.forImmediateExecution(), false, isForWrite); } private boolean paxosReadSatisfiedByKeyMigration() @@ -264,7 +265,6 @@ public static KeyMigrationState getKeyMigrationState(TableId tableId, DecoratedK * Trigger a distributed repair of Accord state for this key. */ static void repairKeyAccord(DecoratedKey key, - String keyspace, TableId tableId, long minEpoch, Dispatcher.RequestTime requestTime, @@ -283,7 +283,9 @@ static void repairKeyAccord(DecoratedKey key, // will soon be ready to execute, but only waits for the local replica to be ready // Local will only create a transaction if it can't find an existing one to wait on BarrierType barrierType = global ? BarrierType.global_async : BarrierType.local; - AccordService.instance().barrier(Seekables.of(new PartitionKey(tableId, key)), minEpoch, requestTime, DatabaseDescriptor.getTransactionTimeout(TimeUnit.NANOSECONDS), barrierType, isForWrite); + Seekables keysOrRanges = AccordService.instance().barrier(Seekables.of(new PartitionKey(tableId, key)), minEpoch, requestTime, DatabaseDescriptor.getTransactionTimeout(TimeUnit.NANOSECONDS), barrierType, isForWrite); + if (keysOrRanges.isEmpty()) + throw new RetryOnDifferentSystemException(); // We don't save the state to the cache here. Accord will notify the agent every time a barrier happens. } finally diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java new file mode 100644 index 000000000000..32997bfb0c3b --- /dev/null +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java @@ -0,0 +1,359 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.consensus.migration; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.coordinate.CoordinationFailed; +import accord.primitives.Keys; +import accord.primitives.Txn; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnCondition; +import org.apache.cassandra.service.accord.txn.TxnQuery; +import org.apache.cassandra.service.accord.txn.TxnRead; +import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; +import org.apache.cassandra.service.accord.txn.TxnUpdate; +import org.apache.cassandra.service.accord.txn.TxnWrite; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tracing.Tracing; +import org.apache.cassandra.transport.Dispatcher; + +import static com.google.common.base.Preconditions.checkState; +import static java.util.function.Predicate.not; +import static org.apache.cassandra.dht.Range.isInNormalizedRanges; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.getTableMetadata; + +/** + * Applying mutations can fail with RetryOnDifferentSystemException if a + * mutation conflicts with a table and range that needs to be managed + * transactionally. This impacts mutations, logged/unlogged batches, hints,and blocking read repair. + * + * This class contains the logic needed for managing these retry loops and splitting the mutations up + */ +public class ConsensusMigrationMutationHelper +{ + private static final Logger logger = LoggerFactory.getLogger(ConsensusMigrationMutationHelper.class); + + private static ConsensusMigrationMutationHelper instance = new ConsensusMigrationMutationHelper(); + + public static ConsensusMigrationMutationHelper instance() + { + return instance; + } + + @VisibleForTesting + public static void replaceInstanceForTest(ConsensusMigrationMutationHelper testInstance) + { + instance = testInstance; + } + + @VisibleForTesting + public static void resetInstanceForTest() + { + instance = new ConsensusMigrationMutationHelper(); + } + + public ConsensusMigrationMutationHelper() {} + + private static ConsistencyLevel consistencyLevelForCommit(ClusterMetadata cm, Collection mutations, @Nullable ConsistencyLevel consistencyLevel) + { + // Null means no specific consistency behavior is required from Accord, it's functionally similar to ANY + // if you aren't reading the result back via Accord + if (consistencyLevel == null) + return null; + + for (IMutation mutation : mutations) + { + for (TableId tableId : mutation.getTableIds()) + { + TransactionalMode mode = getTableMetadata(cm, tableId).params.transactionalMode; + // commitCLForStrategy should return either null or the supplied consistency level + // in which case we will commit everything at that CL since Accord doesn't support per table + // commit consistency + ConsistencyLevel commitCL = mode.commitCLForStrategy(consistencyLevel); + if (commitCL != null) + return commitCL; + } + } + return null; + } + + /** + * Result of splitting mutations across Accord and non-transactional boundaries + */ + public static class SplitMutations implements SplitConsumer + { + @Nullable + private List accordMutations; + + @Nullable + private List normalMutations; + + private SplitMutations() {} + + public List accordMutations() + { + return accordMutations; + } + + public List normalMutations() + { + return normalMutations; + } + + @Override + public void consume(@Nullable T accordMutation, @Nullable T normalMutation, List mutations, int mutationIndex) + { + // Avoid allocating an ArrayList in common single mutation single system case + if (mutations.size() == 1 && (accordMutation != null ^ normalMutation != null)) + { + if (accordMutation != null) + accordMutations = mutations; + else + normalMutations = mutations; + return; + } + + if (accordMutation != null) + { + if (accordMutations == null) + accordMutations = new ArrayList<>(Math.min(mutations.size(), 10)); + accordMutations.add(accordMutation); + } + if (normalMutation != null) + { + if (normalMutations == null) + normalMutations = new ArrayList<>(Math.min(mutations.size(), 10)); + normalMutations.add(normalMutation); + } + } + } + + public interface SplitConsumer + { + void consume(@Nullable T accordMutation, @Nullable T normalMutation, List mutations, int mutationIndex); + } + + public static SplitMutations splitMutationsIntoAccordAndNormal(ClusterMetadata cm, List mutations) + { + SplitMutations splitMutations = new SplitMutations<>(); + splitMutationsIntoAccordAndNormal(cm, mutations, splitMutations); + return splitMutations; + } + + public static void splitMutationsIntoAccordAndNormal(ClusterMetadata cm, List mutations, SplitConsumer splitConsumer) + { + for (int i=0,mi=mutations.size(); i splitMutation = instance.splitMutationIntoAccordAndNormal(mutations.get(i), cm); + splitConsumer.consume(splitMutation.accordMutation, splitMutation.normalMutation, mutations, i); + } + } + + /** + * Result of splitting a mutation across Accord and non-transactional boundaries + */ + public static class SplitMutation + { + @Nullable + public final T accordMutation; + @Nullable + public final T normalMutation; + + public SplitMutation(@Nullable T accordMutation, @Nullable T normalMutation) + { + this.accordMutation = accordMutation; + this.normalMutation = normalMutation; + } + } + + public SplitMutation splitMutationIntoAccordAndNormal(T mutation, ClusterMetadata cm) + { + if (mutation.allowsPotentialTransactionConflicts()) + return new SplitMutation<>(null, mutation); + + Token token = mutation.key().getToken(); + Predicate isAccordUpdate = tableId -> tokenShouldBeWrittenThroughAccord(cm, tableId, token); + + T accordMutation = (T)mutation.filter(isAccordUpdate); + T normalMutation = (T)mutation.filter(not(isAccordUpdate)); + for (PartitionUpdate pu : mutation.getPartitionUpdates()) + checkState((accordMutation == null ? false : accordMutation.hasUpdateForTable(pu.metadata().id)) + || (normalMutation == null ? false : normalMutation.hasUpdateForTable(pu.metadata().id)), + "All partition updates should still be present after splitting"); + return new SplitMutation(accordMutation, normalMutation); + } + + public AsyncTxnResult mutateWithAccordAsync(ClusterMetadata cm, Mutation mutation, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + return mutateWithAccordAsync(cm, ImmutableList.of(mutation), consistencyLevel, requestTime); + } + + public static AsyncTxnResult mutateWithAccordAsync(ClusterMetadata cm, Collection mutations, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + int fragmentIndex = 0; + List fragments = new ArrayList<>(mutations.size()); + List partitionKeys = new ArrayList<>(mutations.size()); + for (IMutation mutation : mutations) + { + for (PartitionUpdate update : mutation.getPartitionUpdates()) + { + PartitionKey pk = PartitionKey.of(update); + partitionKeys.add(pk); + fragments.add(new TxnWrite.Fragment(PartitionKey.of(update), fragmentIndex++, update, TxnReferenceOperations.empty())); + } + } + // Potentially ignore commit consistency level if the TransactionalMode specifies full + ConsistencyLevel clForCommit = consistencyLevelForCommit(cm, mutations, consistencyLevel); + TxnUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit, true); + Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.EMPTY, TxnQuery.NONE, update); + IAccordService accordService = AccordService.instance(); + try + { + return accordService.coordinateAsync(txn, consistencyLevel, requestTime); + } + catch (CoordinationFailed coordinationFailed) + { + AsyncTxnResult failure = new AsyncTxnResult(coordinationFailed.txnId()); + failure.setFailure(coordinationFailed.wrap()); + return failure; + } + } + + public static void validateSafeToExecuteNonTransactionally(IMutation mutation) throws RetryOnDifferentSystemException + { + if (mutation.allowsPotentialTransactionConflicts()) + return; + + // System keyspaces are never managed by Accord + if (SchemaConstants.isSystemKeyspace(mutation.getKeyspaceName())) + return; + + // Local keyspaces are never managed by Accord + if (Schema.instance.localKeyspaces().containsKeyspace(mutation.getKeyspaceName())) + return; + + ClusterMetadata cm = ClusterMetadata.current(); + + DecoratedKey dk = mutation.key(); + // Check all the partition updates and if any can't be done return an error response + // and the coordinator can retry with things correctly routed + boolean throwRetryOnDifferentSystem = false; + // Track CFS so we only mark each one once + Set markedColumnFamilies = null; + for (PartitionUpdate pu : mutation.getPartitionUpdates()) + { + TableId tableId = pu.metadata().id; + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); + if (tokenShouldBeWrittenThroughAccord(cm, tableId, dk.getToken())) + { + throwRetryOnDifferentSystem = true; + if (markedColumnFamilies == null) + markedColumnFamilies = new HashSet<>(); + if (markedColumnFamilies.add(tableId)) + cfs.metric.mutationsRejectedOnWrongSystem.mark(); + logger.debug("Rejecting mutation on wrong system to table {}.{}", cfs.keyspace.getName(), cfs.name); + Tracing.trace("Rejecting mutation on wrong system to table {}.{} token {}", cfs.keyspace.getName(), cfs.name, dk.getToken()); + } + } + if (throwRetryOnDifferentSystem) + throw new RetryOnDifferentSystemException(); + } + + public static boolean tokenShouldBeWrittenThroughAccord(@Nonnull ClusterMetadata cm, @Nonnull TableId tableId, @Nonnull Token token) + { + TableMetadata tm = getTableMetadata(cm, tableId); + if (tm == null) + return false; + + boolean transactionalModeWritesThroughAccord = tm.params.transactionalMode.writesThroughAccord; + TransactionalMigrationFromMode transactionalMigrationFromMode = tm.params.transactionalMigrationFrom; + boolean migrationFromWritesThroughAccord = transactionalMigrationFromMode.writesThroughAccord(); + if (transactionalModeWritesThroughAccord && migrationFromWritesThroughAccord) + return true; + + // Could be migrating or could be completely migrated, if it's migrating check if the key for this mutation + if (transactionalModeWritesThroughAccord || migrationFromWritesThroughAccord) + { + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tm.id); + + if (tms == null) + { + if (transactionalMigrationFromMode == TransactionalMigrationFromMode.none) + // There is no migration and no TMS so do what the schema says since no migration should be required + return transactionalModeWritesThroughAccord; + else + // If we are migrating from something and there is no migration state the migration hasn't begun + // so continue to do what we are migrating from does until the range is marked as migrating + return migrationFromWritesThroughAccord; + } + + // This logic is driven by the fact that Paxos is not picky about how data is written since it's txn recovery + // is deterministic in the face of non-deterministic reads because consensus is agreeing on the writes that will be done to the database + // Accord agrees on what computation will produce those writes and then asynchronously executes those computations, potentially multiple times + // with different results if Accord reads non-transactionally written data that could be seen differently by different coordinators + + // If the current mode writes through Accord then we should always write though Accord for ranges managed by Accord. + // Accord needs to do synchronous commit and respect the consistency level so that Accord will later be able to + // read its own writes + if (transactionalModeWritesThroughAccord) + return isInNormalizedRanges(token, tms.migratingAndMigratedRanges); + + // If we are migrating from a mode that used to write to Accord then any range that isn't migrating/migrated + // should continue to write through Accord. + // It's not completely symmetrical because Paxos is able to read Accord's writes by performing a single key barrier + // and regular mutations will be able to do the same thing (needs to be added along with non-transactional reads) + // This means that migrating ranges don't need to be written through Accord because we are running Paxos now + // and not Accord. When migrating to Accord we need to do all the writes through Accord even if we aren't + // reading through Accord so that repair + Accord metadata is sufficient for Accord to be able to read + // safely and deterministically from any coordinator + if (migrationFromWritesThroughAccord) + return !isInNormalizedRanges(token, tms.migratingAndMigratedRanges); + } + return false; + } +} diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java index 2215c734c8f1..b7eb69978f8a 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairResult.java @@ -18,40 +18,46 @@ package org.apache.cassandra.service.consensus.migration; +import javax.annotation.Nullable; + +import accord.primitives.Ranges; import org.apache.cassandra.tcm.Epoch; import static com.google.common.base.Preconditions.checkArgument; public class ConsensusMigrationRepairResult { - private static final ConsensusMigrationRepairResult INELIGIBLE = new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY); + private static final ConsensusMigrationRepairResult INELIGIBLE = new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.ineligible, Epoch.EMPTY, null); public final ConsensusMigrationRepairType type; public final Epoch minEpoch; + @Nullable + public final Ranges barrieredRanges; - private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch minEpoch) + private ConsensusMigrationRepairResult(ConsensusMigrationRepairType type, Epoch minEpoch, @Nullable Ranges barrieredRanges) { this.type = type; this.minEpoch = minEpoch; + this.barrieredRanges = barrieredRanges; } - public static ConsensusMigrationRepairResult fromRepair(Epoch minEpoch, boolean paxosAndDataRepaired, boolean accordRepaired, boolean deadNodesExcluded) + public static ConsensusMigrationRepairResult fromRepair(Epoch minEpoch, Ranges barrieredRanges, boolean paxosAndDataRepaired, boolean accordRepaired, boolean deadNodesExcluded) { checkArgument((!paxosAndDataRepaired && !accordRepaired) || minEpoch.isAfter(Epoch.EMPTY), "Epoch should not be empty if Paxos and regular repairs were performed"); if (deadNodesExcluded) return INELIGIBLE; - if (paxosAndDataRepaired && accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.either, minEpoch); - if (paxosAndDataRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch); - if (accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch); + if (paxosAndDataRepaired && accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.either, minEpoch, barrieredRanges); + if (paxosAndDataRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.paxos, minEpoch, barrieredRanges); + if (accordRepaired) return new ConsensusMigrationRepairResult(ConsensusMigrationRepairType.accord, minEpoch, barrieredRanges); return INELIGIBLE; } public static ConsensusMigrationRepairResult fromPaxosOnlyRepair(Epoch minEpoch, boolean deadNodesExcluded) { - return fromRepair(minEpoch, false, false, deadNodesExcluded); + return fromRepair(minEpoch, null, false, false, deadNodesExcluded); } - public static ConsensusMigrationRepairResult fromAccordOnlyRepair(Epoch minEpoch, boolean deadNodesExcluded) + public static ConsensusMigrationRepairResult fromAccordOnlyRepair(Epoch minEpoch, Ranges barrieredRanges, boolean deadNodesExcluded) { - return fromRepair(minEpoch, false, true, deadNodesExcluded); + return fromRepair(minEpoch, barrieredRanges, false, true, deadNodesExcluded); } } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java index 3866db0ad094..36452a9691d7 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationRepairType.java @@ -24,16 +24,22 @@ public enum ConsensusMigrationRepairType { - ineligible(0), - paxos(1), - accord(2), - either(3); + ineligible(0, false, false), + paxos(1, false, true), + accord(2, true, false), + either(3, true, true); public final byte value; - ConsensusMigrationRepairType(int value) + public final boolean accordMigrationEligible; + + public final boolean paxosMigrationEligible; + + ConsensusMigrationRepairType(int value, boolean accordMigrationEligible, boolean paxosMigrationEligible) { this.value = SignedBytes.checkedCast(value); + this.accordMigrationEligible = accordMigrationEligible; + this.paxosMigrationEligible = paxosMigrationEligible; } public static ConsensusMigrationRepairType fromString(String repairType) diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java index 70c2c396609a..3763fc995f2b 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusRequestRouter.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service.consensus.migration; +import java.util.Optional; import javax.annotation.Nonnull; import com.google.common.annotations.VisibleForTesting; @@ -26,9 +27,12 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.locator.EndpointsForToken; import org.apache.cassandra.locator.ReplicaLayout; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.consensus.TransactionalMode; @@ -41,12 +45,11 @@ import static com.google.common.base.Preconditions.checkState; import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.getConsensusMigratedAt; +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.accord; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV1; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision.paxosV2; -import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; - /** * Helper class to decide where to route a request that requires consensus, migrating a key if necessary * before rerouting. @@ -84,13 +87,30 @@ ConsensusRoutingDecision decisionFor(TransactionalMode transactionalMode) return pickPaxos(); } + /* + * Accord never handles local tables, but if the table doesn't exist then we need to generate the correct + * InvalidRequestException. + */ private static TableMetadata metadata(ClusterMetadata cm, String keyspace, String table) { - KeyspaceMetadata ksm = cm.schema.getKeyspaceMetadata(keyspace); - TableMetadata tbm = ksm != null ? ksm.getTableOrViewNullable(table) : null; - + Optional ksm = cm.schema.maybeGetKeyspaceMetadata(keyspace); + if (ksm.isEmpty()) + { + // It's a non-distributed table which is fine, but we want to error if it doesn't exist + // We should never actually reach here unless there is a race with dropping the table + Keyspaces localKeyspaces = Schema.instance.localKeyspaces(); + KeyspaceMetadata ksm2 = localKeyspaces.getNullable(keyspace); + if (ksm2 == null) + throw new InvalidRequestException("Keyspace " + keyspace + " does not exist"); + // Explicitly including views in case they get used in non-distributed tables + TableMetadata tbm2 = ksm2.getTableOrViewNullable(table); + if (tbm2 == null) + throw new InvalidRequestException("Table " + keyspace + "." + table + " does not exist"); + return null; + } + TableMetadata tbm = ksm.get().getTableNullable(table); if (tbm == null) - throw new IllegalStateException("Can't route consensus request to nonexistent CFS %s.%s".format(keyspace, table)); + throw new InvalidRequestException("Table " + keyspace + "." + table + " does not exist"); return tbm; } @@ -99,18 +119,39 @@ public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, { ClusterMetadata cm = ClusterMetadata.current(); TableMetadata metadata = metadata(cm, keyspace, table); + + // Non-distributed tables always take the Paxos path + if (metadata == null) + return pickPaxos(); return routeAndMaybeMigrate(cm, metadata, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); } public ConsensusRoutingDecision routeAndMaybeMigrate(@Nonnull DecoratedKey key, @Nonnull TableId tableId, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime, long timeoutNanos, boolean isForWrite) { ClusterMetadata cm = ClusterMetadata.current(); - TableMetadata metadata = cm.schema.getTableMetadata(tableId); + TableMetadata metadata = getTableMetadata(cm, tableId); + // Non-distributed tables always take the Paxos path if (metadata == null) - throw new IllegalStateException("Can't route consensus request for nonexistent table %s".format(tableId.toString())); + pickPaxos(); return routeAndMaybeMigrate(cm, metadata, key, consistencyLevel, requestTime, timeoutNanos, isForWrite); } + public static TableMetadata getTableMetadata(ClusterMetadata cm, TableId tableId) + { + TableMetadata tm = cm.schema.getTableMetadata(tableId); + if (tm == null) + { + // It's a non-distributed table which is fine, but we want to error if it doesn't exist + // We should never actually reach here unless there is a race with dropping the table + Keyspaces localKeyspaces = Schema.instance.localKeyspaces(); + TableMetadata tm2 = localKeyspaces.getTableOrViewNullable(tableId); + if (tm2 == null) + throw new InvalidRequestException("Table with id " + tableId + " does not exist"); + return null; + } + return tm; + } + protected static boolean mayWriteThroughAccord(TableMetadata metadata) { return metadata.params.transactionalMode.writesThroughAccord || metadata.params.transactionalMigrationFrom.writesThroughAccord(); @@ -170,7 +211,7 @@ private static ConsensusRoutingDecision pickBasedOnKeyMigrationStatus(ClusterMet ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tmd.id); if (cfs == null) - throw new IllegalStateException("Can't route consensus request to nonexistent CFS %s.%s".format(tmd.keyspace, tmd.name)); + throw new InvalidRequestException("Can't route consensus request to nonexistent CFS %s.%s".format(tmd.keyspace, tmd.name)); // If it is locally replicated we can check our local migration state to see if it was already migrated EndpointsForToken naturalReplicas = ReplicaLayout.forNonLocalStrategyTokenRead(cm, cfs.keyspace.getMetadata(), key.getToken()); boolean isLocallyReplicated = naturalReplicas.lookup(FBUtilities.getBroadcastAddressAndPort()) != null; @@ -191,7 +232,7 @@ private static ConsensusRoutingDecision pickBasedOnKeyMigrationStatus(ClusterMet // barrier transactions to accomplish the migration // They still might need to go through the fast local path for barrier txns // at each replica, but they won't create their own txn since we created it here - ConsensusKeyMigrationState.repairKeyAccord(key, tms.keyspaceName, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), requestTime, true, isForWrite); + ConsensusKeyMigrationState.repairKeyAccord(key, tms.tableId, tms.minMigrationEpoch(key.getToken()).getEpoch(), requestTime, true, isForWrite); return paxosV2; } // Fall through for repairKeyPaxos diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java index d8c207dc64db..62d449d5d656 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java @@ -46,6 +46,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.streaming.PreviewKind; @@ -56,6 +57,7 @@ import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; +import static java.lang.String.format; import static java.util.Collections.emptyList; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; @@ -97,9 +99,24 @@ public void onSuccess(@Nullable RepairResult repairResult) if (!tms.targetProtocol.isMigratedBy(repairResult.consensusMigrationRepairResult.type)) return; + List> paxosRepairedRanges = ImmutableList.of(); + if (migrationResult.type.paxosMigrationEligible) + // Paxos always repairs all ranges requested by the repair although there should be nothing + // repaired in the migrated and Accord managed ranges + paxosRepairedRanges = ImmutableList.copyOf(desc.ranges); + + List> accordBarrieredRanges = ImmutableList.of(); + if (migrationResult.type.accordMigrationEligible) + // Accord only barriers ranges it thinks it manages and repair collects which it barriered + // precisely which doesn't have to match what the entire repair covers + accordBarrieredRanges = migrationResult.barrieredRanges.stream() + .map(range -> ((TokenRange)range).toKeyspaceRange()) + .collect(toImmutableList()); + accordBarrieredRanges = Range.normalize(accordBarrieredRanges); + ClusterMetadataService.instance().commit( new MaybeFinishConsensusMigrationForTableAndRange( - desc.keyspace, desc.columnFamily, ImmutableList.copyOf(desc.ranges), + desc.keyspace, desc.columnFamily, paxosRepairedRanges, accordBarrieredRanges, migrationResult.minEpoch, migrationResult.type)); } @@ -283,7 +300,7 @@ private static List keyspacesAndTablesToTableIds(@Nonnull List .map(tableName -> { TableMetadata tm = Schema.instance.getTableMetadata(keyspaceName, tableName); if (tm == null) - throw new IllegalArgumentException("Unknown table %s.%s".format(keyspaceName, tableName)); + throw new IllegalArgumentException(format("Unknown table %s.%s", keyspaceName, tableName)); return tm.id; }) .collect(toImmutableList())); diff --git a/src/java/org/apache/cassandra/service/paxos/Commit.java b/src/java/org/apache/cassandra/service/paxos/Commit.java index 3aa8d65bcef0..12c3606f7431 100644 --- a/src/java/org/apache/cassandra/service/paxos/Commit.java +++ b/src/java/org/apache/cassandra/service/paxos/Commit.java @@ -23,25 +23,25 @@ import java.io.IOException; import java.util.function.BiFunction; - import javax.annotation.Nullable; import com.google.common.base.Objects; -import org.apache.cassandra.db.*; -import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.DeserializationHelper; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.TableMetadata; -import static org.apache.cassandra.db.SystemKeyspace.*; +import static org.apache.cassandra.db.SystemKeyspace.legacyPaxosTtlSec; import static org.apache.cassandra.service.paxos.Commit.CompareResult.AFTER; import static org.apache.cassandra.service.paxos.Commit.CompareResult.BEFORE; import static org.apache.cassandra.service.paxos.Commit.CompareResult.IS_REPROPOSAL; -import static org.apache.cassandra.service.paxos.Commit.CompareResult.WAS_REPROPOSED_BY; import static org.apache.cassandra.service.paxos.Commit.CompareResult.SAME; +import static org.apache.cassandra.service.paxos.Commit.CompareResult.WAS_REPROPOSED_BY; import static org.apache.cassandra.utils.FBUtilities.nowInSeconds; public class Commit @@ -314,7 +314,7 @@ public boolean hasSameBallot(Commit other) public Mutation makeMutation() { - return new Mutation(update); + return new Mutation(update, true); } @Override diff --git a/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java index d9d8a7b562ab..90d11f887f05 100644 --- a/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java +++ b/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java @@ -71,9 +71,9 @@ default ReadCommand maybeAllowOutOfRangeReads(ReadCommand command) void sendReadCommand(Message message, InetAddressAndPort to, RequestCallback callback); default void notifyOfInitialContacts(EndpointsForToken fullDataRequests, EndpointsForToken transientRequests, EndpointsForToken digestRequests) {} void sendReadRepairMutation(Message message, InetAddressAndPort to, RequestCallback callback); - default Mutation maybeAllowOutOfRangeMutations(Mutation m) + default boolean allowsPotentialTransactionConflicts() { - return m; + return !isEventuallyConsistent(); } boolean isEventuallyConsistent(); } diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java index 973fbcb45dfa..309bbeeb51e1 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingPartitionRepair.java @@ -52,6 +52,7 @@ import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static com.google.common.base.Preconditions.checkArgument; import static org.apache.cassandra.net.Verb.READ_REPAIR_REQ; import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.concurrent.CountDownLatch.newCountDownLatch; @@ -154,6 +155,7 @@ private PartitionUpdate mergeUnackedUpdates() @VisibleForTesting protected void sendRR(Message message, InetAddressAndPort endpoint) { + checkArgument(message.payload.allowsPotentialTransactionConflicts() == coordinator.allowsPotentialTransactionConflicts(), "Mutation allowing transaction conflicts should match coordinator"); coordinator.sendReadRepairMutation(message, endpoint, this); } @@ -165,8 +167,8 @@ public void sendInitialRepairs() for (Map.Entry entry: pendingRepairs.entrySet()) { Replica destination = entry.getKey(); - Preconditions.checkArgument(destination.isFull(), "Can't send repairs to transient replicas: %s", destination); - Mutation mutation = coordinator.maybeAllowOutOfRangeMutations(entry.getValue()); + checkArgument(destination.isFull(), "Can't send repairs to transient replicas: %s", destination); + Mutation mutation = entry.getValue(); TableId tableId = extractUpdate(mutation).metadata().id; Tracing.trace("Sending read-repair-mutation to {}", destination); @@ -239,7 +241,7 @@ public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) if (mutation == null) { - mutation = BlockingReadRepairs.createRepairMutation(update, repairPlan.consistencyLevel(), replica.endpoint(), true); + mutation = BlockingReadRepairs.createRepairMutation(update, repairPlan.consistencyLevel(), replica.endpoint(), true, coordinator.allowsPotentialTransactionConflicts()); versionedMutations[versionIdx] = mutation; } @@ -250,7 +252,6 @@ public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) continue; } - mutation = coordinator.maybeAllowOutOfRangeMutations(mutation); Tracing.trace("Sending speculative read-repair-mutation to {}", replica); sendRR(Message.out(READ_REPAIR_REQ, mutation), replica.endpoint()); ReadRepairDiagnostics.speculatedWrite(this, replica.endpoint(), mutation); diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java index 4f02b1a06060..98b5dc99b22b 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.reads.repair; -import java.util.Collection; import java.util.Map; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; @@ -34,13 +33,11 @@ import accord.primitives.Txn; import com.codahale.metrics.Meter; import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.ReadCommand; -import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.exceptions.ReadTimeoutException; import org.apache.cassandra.locator.Endpoints; @@ -54,6 +51,7 @@ import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.UnrecoverableRepairUpdate; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tracing.Tracing; @@ -195,70 +193,83 @@ public void awaitWrites() @Override public void repairPartition(DecoratedKey dk, Map mutations, ReplicaPlan.ForWrite writePlan) { - TransactionalMode transactionalMode = command.metadata().params.transactionalMode; - if (coordinator.isEventuallyConsistent() && transactionalMode.blockingReadRepairThroughAccord) - { - Collection partitionUpdates = Mutation.merge(mutations.values()).getPartitionUpdates(); - checkState(partitionUpdates.size() == 1, "Expect only one PartitionUpdate"); - PartitionUpdate update = partitionUpdates.iterator().next(); - PartitionKey partitionKey = PartitionKey.of(update); - Keys key = Keys.of(partitionKey); - // This is going create a new BlockingReadRepair inside an Accord transaction which will go down - // the !isEventuallyConsistent path and apply the repairs through Accord command stores using AccordInteropExecution - UnrecoverableRepairUpdate repairUpdate = UnrecoverableRepairUpdate.create(AccordService.instance().nodeId(), this, key, dk, mutations, writePlan); - Future repairFuture; - try - { - Txn txn = new Txn.InMemory(Txn.Kind.Read, key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); - repairFuture = Stage.ACCORD_MIGRATION.submit(() -> { - try - { - return AccordService.instance().coordinate(txn, ConsistencyLevel.ANY, requestTime); - } - finally - { - // If we successfully ran the repair txn then the update should definitely - // be there for us to clear which means we are sure it was there to be sent - checkNotNull(UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey)); - } - }); - } - catch (Throwable t) - { - UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey); - throw t; - } + // non-Accord reads only ever touch one table and key so all mutations need to be applied either transactionally + // or non-transactionally (not a mix). There is no retry loop here because read repair is relatively rare so it racing + // with changes to migrating ranges should also be pretty rare so it isn't worth the added complexity. If you were + // to add a retry loop you would need to be careful to correctly set/unset allowPotentialTransactionConflicts in the mutations + // since that is set if it is routed to Accord + // + // If this is an Accord transaction that is in interoperability mode and executing a read repair + // then we take the non-transactional path and the mutations are intercepted in ReadCoordinator.sendRepairMutation + // which will ensure the repair mutation runs in the command store thread after preceding transactions are done + ClusterMetadata cm = ClusterMetadata.current(); + if (coordinator.isEventuallyConsistent() && ConsensusMigrationMutationHelper.tokenShouldBeWrittenThroughAccord(cm, command.metadata().id, dk.getToken())) + repairTransactionally(dk, mutations, writePlan); + else + repairNonTransactionally(dk, mutations, writePlan); + } - repairs.add(new PendingPartitionRepair() - { - @Override - public boolean awaitRepairs(long remaining, TimeUnit timeUnit) throws InterruptedException, ExecutionException + private void repairTransactionally(DecoratedKey dk, Map accordMutations, ForWrite writePlan) + { + checkState(coordinator.isEventuallyConsistent(), "Should only repair transactionally for an eventually consistent read coordinator"); + PartitionKey partitionKey = new PartitionKey(command.metadata().id, dk); + Keys key = Keys.of(partitionKey); + // This is going create a new BlockingReadRepair inside an Accord transaction which will go down + // the !isEventuallyConsistent path and apply the repairs through Accord command stores using AccordInteropExecution + UnrecoverableRepairUpdate repairUpdate = UnrecoverableRepairUpdate.create(AccordService.instance().nodeId(), this, key, dk, accordMutations, writePlan); + Future repairFuture; + try + { + Txn txn = new Txn.InMemory(Txn.Kind.Read, key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); + repairFuture = Stage.ACCORD_MIGRATION.submit(() -> { + try { - try - { - repairFuture.get(remaining, timeUnit); - return true; - } - catch (TimeoutException e) - { - - return false; - } + return AccordService.instance().coordinate(txn, ConsistencyLevel.ANY, requestTime); } - - @Override - public ForWrite repairPlan() + finally { - return writePlan; + // If we successfully ran the repair txn then the update should definitely + // be there for us to clear which means we are sure it was there to be sent + checkNotNull(UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey)); } }); } - else + catch (Throwable t) { - BlockingPartitionRepair blockingRepair = new BlockingPartitionRepair(coordinator, dk, mutations, writePlan); - blockingRepair.sendInitialRepairs(); - repairs.add(blockingRepair); + UnrecoverableRepairUpdate.removeInflightUpdate(repairUpdate.updateKey); + throw t; } + + repairs.add(new PendingPartitionRepair() + { + @Override + public boolean awaitRepairs(long remaining, TimeUnit timeUnit) throws InterruptedException, ExecutionException + { + try + { + repairFuture.get(remaining, timeUnit); + return true; + } + catch (TimeoutException e) + { + + return false; + } + } + + @Override + public ForWrite repairPlan() + { + return writePlan; + } + }); + } + + private void repairNonTransactionally(DecoratedKey dk, Map mutations, ForWrite writePlan) + { + BlockingPartitionRepair blockingRepair = new BlockingPartitionRepair(coordinator, dk, mutations, writePlan); + blockingRepair.sendInitialRepairs(); + repairs.add(blockingRepair); } public void repairPartitionDirectly(ReadCoordinator readCoordinator, DecoratedKey dk, Map mutations, ForWrite writePlan) @@ -268,4 +279,10 @@ public void repairPartitionDirectly(ReadCoordinator readCoordinator, DecoratedKe delegateRR.maybeSendAdditionalWrites(); delegateRR.awaitWrites(); } + + @Override + public boolean coordinatorAllowsPotentialTransactionConflicts() + { + return coordinator.allowsPotentialTransactionConflicts(); + } } diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepairs.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepairs.java index de49f5a5636f..c16fba3674ec 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepairs.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepairs.java @@ -46,13 +46,13 @@ public class BlockingReadRepairs * Create a read repair mutation from the given update, if the mutation is not larger than the maximum * mutation size, otherwise return null. Or, if we're configured to be strict, throw an exception. */ - public static Mutation createRepairMutation(PartitionUpdate update, ConsistencyLevel consistency, InetAddressAndPort destination, boolean suppressException) + public static Mutation createRepairMutation(PartitionUpdate update, ConsistencyLevel consistency, InetAddressAndPort destination, boolean suppressException, boolean allowPotentialTransactionConflicts) { if (update == null) return null; DecoratedKey key = update.partitionKey(); - Mutation mutation = new Mutation(update); + Mutation mutation = new Mutation(update, allowPotentialTransactionConflicts); int messagingVersion = MessagingService.instance().versions.get(destination); try diff --git a/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java index bff068a6d77e..2b73ab83b10b 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/ReadRepair.java @@ -100,4 +100,9 @@ ReadRepair create(ReadCoordinator coordinator, ReadCommand command, Replic * Repairs a partition using the provided read coordinator */ void repairPartitionDirectly(ReadCoordinator coordinator, DecoratedKey partitionKey, Map mutations, ReplicaPlan.ForWrite writePlan); + + default boolean coordinatorAllowsPotentialTransactionConflicts() + { + return false; + } } diff --git a/src/java/org/apache/cassandra/service/reads/repair/RowIteratorMergeListener.java b/src/java/org/apache/cassandra/service/reads/repair/RowIteratorMergeListener.java index acddf45d94ce..be6718fc4719 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/RowIteratorMergeListener.java +++ b/src/java/org/apache/cassandra/service/reads/repair/RowIteratorMergeListener.java @@ -390,7 +390,7 @@ public void close() else if (repairs[i] != null) update = repairs[i].build(); - Mutation mutation = BlockingReadRepairs.createRepairMutation(update, readPlan.consistencyLevel(), replica.endpoint(), false); + Mutation mutation = BlockingReadRepairs.createRepairMutation(update, readPlan.consistencyLevel(), replica.endpoint(), false, readRepair.coordinatorAllowsPotentialTransactionConflicts()); if (mutation == null) continue; diff --git a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java index 1c1f3985fa6d..15bfdda65c92 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java @@ -38,8 +38,8 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationRepairType; -import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.tcm.ClusterMetadata; @@ -56,7 +56,8 @@ import static org.apache.cassandra.dht.Range.intersects; import static org.apache.cassandra.dht.Range.normalize; import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; - +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.accord; +import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget.paxos; public class MaybeFinishConsensusMigrationForTableAndRange implements Transformation { @@ -71,7 +72,10 @@ public class MaybeFinishConsensusMigrationForTableAndRange implements Transforma public final String cf; @Nonnull - public final List> repairedRanges; + public final List> paxosRepairedRanges; + + @Nonnull + public final List> accordBarrieredRanges; @Nonnull public final Epoch minEpoch; @@ -81,21 +85,23 @@ public class MaybeFinishConsensusMigrationForTableAndRange implements Transforma public MaybeFinishConsensusMigrationForTableAndRange(@Nonnull String keyspace, @Nonnull String cf, - @Nonnull List> repairedRanges, + @Nonnull List> paxosRepairedRanges, + @Nonnull List> accordBarrieredRanges, @Nonnull Epoch minEpoch, @Nonnull ConsensusMigrationRepairType repairType) { checkNotNull(keyspace, "keyspace should not be null"); checkNotNull(cf, "cf should not be null"); - checkNotNull(repairedRanges, "repairedRanges should not be null"); - checkArgument(!repairedRanges.isEmpty(), "repairedRanges should not be empty"); + checkNotNull(paxosRepairedRanges, "paxosRepairedRanges should not be null"); + checkNotNull(accordBarrieredRanges, "accordBarrierRanges should not be null"); checkNotNull(minEpoch, "minEpoch should not be null"); checkArgument(minEpoch.isAfter(Epoch.EMPTY), "minEpoch should not be empty"); checkNotNull(repairType, "repairType is null"); checkArgument(repairType != ConsensusMigrationRepairType.ineligible, "Shouldn't attempt to finish migration with ineligible repair"); this.keyspace = keyspace; this.cf = cf; - this.repairedRanges = repairedRanges; + this.paxosRepairedRanges = paxosRepairedRanges; + this.accordBarrieredRanges = accordBarrieredRanges; this.minEpoch = minEpoch; this.repairType = repairType; } @@ -125,7 +131,7 @@ private static Transformer resetMigrationOnSchema(ClusterMetadata prev, Transfor public Result execute(@Nonnull ClusterMetadata metadata) { - logger.info("Completed repair {} ranges {}", repairType, repairedRanges); + logger.info("Completed repair eligibiliy '{}' paxos repaired ranges {}, accord repaired ranges {}", repairType, paxosRepairedRanges, accordBarrieredRanges); checkNotNull(metadata, "clusterMetadata should not be null"); String ksAndCF = keyspace + "." + cf; TableMetadata tbm = Schema.instance.getTableMetadata(keyspace, cf); @@ -138,8 +144,15 @@ public Result execute(@Nonnull ClusterMetadata metadata) return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); if (!tms.targetProtocol.isMigratedBy(repairType)) - return new Rejected(INVALID, format("Table %s is not currently performing consensus migration to %s and the repair was a %s repair", ksAndCF, tms.targetProtocol, repairType)); - + return new Rejected(INVALID, format("Table %s has a target protocol of %s and is the repair type %s is not eligible/needed to progress the migration", ksAndCF, tms.targetProtocol, repairType)); + + List> repairedRanges; + if (tms.targetProtocol == accord) + repairedRanges = paxosRepairedRanges; + else if (tms.targetProtocol == paxos) + repairedRanges = accordBarrieredRanges; + else + throw new IllegalStateException("Unhandled migration target " + tms.targetProtocol); List> normalizedRepairedRanges = normalize(repairedRanges); // Bail out if repair doesn't actually intersect with any migrating ranges @@ -148,6 +161,8 @@ public Result execute(@Nonnull ClusterMetadata metadata) Transformer next = metadata.transformer(); ConsensusMigrationState migrationState = metadata.consensusMigrationState.withRangesRepairedAtEpoch(tbm, normalizedRepairedRanges, minEpoch); + logger.debug("Original migration state {}"); + logger.debug("New migration state {}"); next = next.with(migrationState); // reset the migration value on the table if the migration has completed @@ -165,7 +180,8 @@ public void serialize(Transformation t, DataOutputPlus out, Version version) thr MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; out.writeUTF(v.keyspace); out.writeUTF(v.cf); - ConsensusTableMigration.rangesSerializer.serialize(v.repairedRanges, out, version); + ConsensusTableMigration.rangesSerializer.serialize(v.paxosRepairedRanges, out, version); + ConsensusTableMigration.rangesSerializer.serialize(v.accordBarrieredRanges, out, version); Epoch.serializer.serialize(v.minEpoch, out, version); out.write(v.repairType.value); } @@ -174,10 +190,11 @@ public MaybeFinishConsensusMigrationForTableAndRange deserialize(DataInputPlus i { String keyspace = in.readUTF(); String cf = in.readUTF(); - List> repairedRanges = ConsensusTableMigration.rangesSerializer.deserialize(in, version); + List> paxosRepairedRanges = ConsensusTableMigration.rangesSerializer.deserialize(in, version); + List> accordBarrieredRanges = ConsensusTableMigration.rangesSerializer.deserialize(in, version); Epoch minEpoch = Epoch.serializer.deserialize(in, version); ConsensusMigrationRepairType repairType = ConsensusMigrationRepairType.fromValue(in.readByte()); - return new MaybeFinishConsensusMigrationForTableAndRange(keyspace, cf, repairedRanges, minEpoch, repairType); + return new MaybeFinishConsensusMigrationForTableAndRange(keyspace, cf, paxosRepairedRanges, accordBarrieredRanges, minEpoch, repairType); } public long serializedSize(Transformation t, Version version) @@ -185,7 +202,8 @@ public long serializedSize(Transformation t, Version version) MaybeFinishConsensusMigrationForTableAndRange v = (MaybeFinishConsensusMigrationForTableAndRange)t; return TypeSizes.sizeof(v.keyspace) + TypeSizes.sizeof(v.cf) - + ConsensusTableMigration.rangesSerializer.serializedSize(v.repairedRanges, version) + + ConsensusTableMigration.rangesSerializer.serializedSize(v.paxosRepairedRanges, version) + + ConsensusTableMigration.rangesSerializer.serializedSize(v.accordBarrieredRanges, version) + Epoch.serializer.serializedSize(v.minEpoch) + TypeSizes.sizeof(v.repairType.value); } diff --git a/src/java/org/apache/cassandra/transport/Dispatcher.java b/src/java/org/apache/cassandra/transport/Dispatcher.java index f701434d0068..6a3818482904 100644 --- a/src/java/org/apache/cassandra/transport/Dispatcher.java +++ b/src/java/org/apache/cassandra/transport/Dispatcher.java @@ -151,6 +151,11 @@ public static RequestTime forImmediateExecution() return new RequestTime(MonotonicClock.Global.preciseTime.now()); } + public RequestTime withStartedAt(long startedAtNanos) + { + return new RequestTime(enqueuedAtNanos, startedAtNanos); + } + public long startedAtNanos() { return startedAtNanos; @@ -426,7 +431,7 @@ private static Message.Response processRequest(ServerConnection connection, Mess connection.applyStateTransition(request.type, response.type); return response; } - + /** * Note: this method may be executed on the netty event loop. */ diff --git a/src/java/org/apache/cassandra/triggers/TriggerExecutor.java b/src/java/org/apache/cassandra/triggers/TriggerExecutor.java index 1a0d39e1adb4..335131d805be 100644 --- a/src/java/org/apache/cassandra/triggers/TriggerExecutor.java +++ b/src/java/org/apache/cassandra/triggers/TriggerExecutor.java @@ -19,13 +19,18 @@ package org.apache.cassandra.triggers; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.*; import java.util.concurrent.TimeUnit; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.ListMultimap; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.slf4j.Logger; @@ -34,7 +39,10 @@ import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.db.*; +import org.apache.cassandra.db.CounterMutation; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.exceptions.CassandraException; import org.apache.cassandra.exceptions.InvalidRequestException; @@ -120,7 +128,7 @@ public PartitionUpdate execute(PartitionUpdate updates) throws InvalidRequestExc * @throws InvalidRequestException if additional mutations were generated, but * the initial mutations contains counter updates */ - public Collection execute(Collection mutations) throws InvalidRequestException + public List execute(Collection mutations) throws InvalidRequestException { boolean hasCounters = false; List augmentedMutations = null; @@ -156,7 +164,7 @@ public Collection execute(Collection mutations) t return mergeMutations(Iterables.concat(originalMutations, augmentedMutations)); } - private Collection mergeMutations(Iterable mutations) + private List mergeMutations(Iterable mutations) { ListMultimap, Mutation> groupedMutations = ArrayListMultimap.create(); diff --git a/src/java/org/apache/cassandra/utils/NoSpamLogger.java b/src/java/org/apache/cassandra/utils/NoSpamLogger.java index 5c37a043d870..e16caa930f85 100644 --- a/src/java/org/apache/cassandra/utils/NoSpamLogger.java +++ b/src/java/org/apache/cassandra/utils/NoSpamLogger.java @@ -21,11 +21,10 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; +import com.google.common.annotations.VisibleForTesting; import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.slf4j.Logger; -import com.google.common.annotations.VisibleForTesting; - import static org.apache.cassandra.utils.Clock.Global; /** @@ -78,7 +77,7 @@ public NoSpamLogStatement(String statement, long minIntervalNanos) this.minIntervalNanos = minIntervalNanos; } - private boolean shouldLog(long nowNanos) + public boolean shouldLog(long nowNanos) { long expected = get(); return nowNanos >= expected && compareAndSet(expected, nowNanos + minIntervalNanos); diff --git a/src/java/org/apache/cassandra/utils/Throwables.java b/src/java/org/apache/cassandra/utils/Throwables.java index 242c7688089a..df9ad111380b 100644 --- a/src/java/org/apache/cassandra/utils/Throwables.java +++ b/src/java/org/apache/cassandra/utils/Throwables.java @@ -39,6 +39,8 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static com.google.common.base.Throwables.getStackTraceAsString; + public final class Throwables { public enum FileOpType { READ, WRITE } @@ -48,6 +50,23 @@ public interface DiscreteAction void perform() throws E; } + public interface ThrowingRunnable + { + void run() throws Exception; + } + + public static void runUnchecked(ThrowingRunnable runnable) + { + try + { + runnable.run(); + } + catch (Exception e) + { + throwAsUncheckedException(e); + } + } + public static boolean isCausedBy(Throwable t, Predicate cause) { return cause.test(t) || (t.getCause() != null && cause.test(t.getCause())); @@ -340,4 +359,16 @@ public static void assertAnyCause(Throwable err, Class... c if (Arrays.stream(causeClasses).noneMatch(c -> anyCauseMatches(err, c::isInstance))) throw new AssertionError("The exception is not caused by any of " + Arrays.toString(causeClasses), err); } + + public static Object getStackTraceAsToString(Throwable t) + { + return new Object() + { + @Override + public String toString() + { + return getStackTraceAsString(t); + } + }; + } } diff --git a/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java b/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java index 5b27d3d44f00..4adaff4c4d0e 100644 --- a/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/api/ICoordinator.java @@ -18,13 +18,13 @@ package org.apache.cassandra.distributed.api; +import org.apache.cassandra.distributed.shared.FutureUtils; + import java.util.Iterator; import java.util.UUID; import java.util.concurrent.Future; import java.util.function.BiConsumer; -import org.apache.cassandra.distributed.shared.FutureUtils; - // The cross-version API requires that a Coordinator can be constructed without any constructor arguments public interface ICoordinator { @@ -82,6 +82,7 @@ default Future asyncExecuteWithTracing(UUID sessionId, String query, } Future asyncExecuteWithTracingWithResult(UUID sessionId, String query, ConsistencyLevel consistencyLevel, Object... boundValues); + Future asyncExecuteWithResult(String query, ConsistencyLevel consistencyLevel, Object... boundValues); default Object[][] executeWithTracing(UUID sessionId, String query, ConsistencyLevel consistencyLevel, Object... boundValues) { diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java index 8d17177884ac..5053735336f0 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java @@ -102,6 +102,12 @@ public Future asyncExecuteWithTracingWithResult(UUID sessionI }).call(); } + @Override + public Future asyncExecuteWithResult(String query, ConsistencyLevel consistencyLevelOrigin, Object... boundValues) + { + return instance.async(() -> unsafeExecuteInternal(query, consistencyLevelOrigin, boundValues)).call(); + } + public static org.apache.cassandra.db.ConsistencyLevel toCassandraCL(ConsistencyLevel cl) { try diff --git a/test/distributed/org/apache/cassandra/distributed/impl/TestChangeListener.java b/test/distributed/org/apache/cassandra/distributed/impl/TestChangeListener.java index a40ee9a96f94..977a515170b8 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/TestChangeListener.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/TestChangeListener.java @@ -18,10 +18,8 @@ package org.apache.cassandra.distributed.impl; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.function.Predicate; +import java.util.NavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +28,9 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.utils.concurrent.CountDownLatch; import org.apache.cassandra.utils.concurrent.WaitQueue; +import org.apache.cassandra.utils.concurrent.WaitQueue.Signal; public class TestChangeListener implements ChangeListener { @@ -43,77 +43,68 @@ public static void register() ClusterMetadataService.instance().log().addListener(instance); } - private final List> preCommitPredicates = new ArrayList<>(); - private final List> postCommitPredicates = new ArrayList<>(); + NavigableMap preCommitBarriers = new ConcurrentSkipListMap<>(); + NavigableMap postCommitBarriers = new ConcurrentSkipListMap<>(); private final WaitQueue waiters = WaitQueue.newWaitQueue(); - @Override - public void notifyPreCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) + private class CommitBarrier { - Iterator> iter = preCommitPredicates.iterator(); - while (iter.hasNext()) + private final CountDownLatch waiting = CountDownLatch.newCountDownLatch(1); + private final Runnable onPaused; + private final String desc; + + private CommitBarrier(Runnable onPaused, String desc) + { + this.onPaused = onPaused; + this.desc = desc; + } + + private void await() { - if (iter.next().test(next.epoch)) - { - logger.debug("Epoch matches pre-commit predicate, pausing"); - pause(); - iter.remove(); - } + logger.debug("Notifying paused: {}", desc); + Signal s = waiters.register(); + waiting.decrement(); + onPaused.run(); + s.awaitUninterruptibly(); + logger.debug("Unpaused: {}", desc); } } + @Override + public void notifyPreCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) + { + CommitBarrier commitBarrier = preCommitBarriers.remove(next.epoch); + if (commitBarrier != null) + commitBarrier.await(); + } @Override public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) { - Iterator> iter = postCommitPredicates.iterator(); - while (iter.hasNext()) - { - if (iter.next().test(next.epoch)) - { - logger.debug("Epoch matches post-commit predicate, pausing"); - pause(); - iter.remove(); - } - } + CommitBarrier commitBarrier = postCommitBarriers.remove(next.epoch); + if (commitBarrier != null) + commitBarrier.await(); } - public void pauseBefore(Epoch epoch, Runnable onMatch) + public void pauseBefore(Epoch epoch, Runnable onPaused) { - logger.debug("Requesting pause before enacting {}", epoch); - preCommitPredicates.add((e) -> { - if (e.is(epoch)) - { - onMatch.run(); - return true; - } - return false; - }); + preCommitBarriers.put(epoch, new CommitBarrier(onPaused, "pre-commit " + epoch)); } - public void pauseAfter(Epoch epoch, Runnable onMatch) + public void pauseAfter(Epoch epoch, Runnable onPaused) { - logger.debug("Requesting pause after enacting {}", epoch); - postCommitPredicates.add((e) -> { - if (e.is(epoch)) - { - onMatch.run(); - return true; - } - return false; - }); + postCommitBarriers.put(epoch, new CommitBarrier(onPaused, "post-commit " + epoch)); } - public void pause() + public void unpause() { - WaitQueue.Signal signal = waiters.register(); - logger.debug("Log follower is paused, waiting..."); - signal.awaitUninterruptibly(); - logger.debug("Resumed log follower..."); + logger.info("Unpausing all precommit and post commit barriers"); + waiters.signalAll(); } - public void unpause() + public void clearAndUnpause() { - logger.debug("Unpausing log follower"); + preCommitBarriers.clear(); + postCommitBarriers.clear(); waiters.signalAll(); } } diff --git a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java index cbb6c420463c..79daae158361 100644 --- a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java +++ b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java @@ -600,6 +600,11 @@ public static void unpauseEnactment(IInvokableInstance instance) instance.runOnInstance(() -> TestChangeListener.instance.unpause()); } + public static void clearAndUnpause(IInvokableInstance instance) + { + instance.runOnInstance(() -> TestChangeListener.instance.clearAndUnpause()); + } + public static boolean isMigrating(IInvokableInstance instance) { return instance.callOnInstance(() -> ClusterMetadataService.instance().isMigrating()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/CASAddTest.java b/test/distributed/org/apache/cassandra/distributed/test/CASAddTest.java index 50ddd091580f..37e44f7794b6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/CASAddTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/CASAddTest.java @@ -72,13 +72,13 @@ public void testAdditionNotExists() throws Throwable assertRows(cluster.coordinator(1).execute("SELECT * FROM " + KEYSPACE + ".tbl WHERE pk = 1", ConsistencyLevel.SERIAL), row(1, null, null)); // this section is testing current limitations... if they start to fail due to the limitations going away... update this test to include those cases - Assertions.assertThatThrownBy(() -> cluster.coordinator(1).execute(batch( + Assertions.assertThatThrownBy(() -> cluster.coordinator(1).execute(unloggedBatch( "INSERT INTO " + KEYSPACE + ".tbl (pk, a, b) VALUES (1, 0, '') IF NOT EXISTS", "UPDATE " + KEYSPACE + ".tbl SET a = a + 1, b = b + 'success' WHERE pk = 1 IF EXISTS" ), ConsistencyLevel.QUORUM)) .is(AssertionUtils.is(InvalidRequestException.class)) .hasMessage("Cannot mix IF EXISTS and IF NOT EXISTS conditions for the same row"); - Assertions.assertThatThrownBy(() -> cluster.coordinator(1).execute(batch( + Assertions.assertThatThrownBy(() -> cluster.coordinator(1).execute(unloggedBatch( "INSERT INTO " + KEYSPACE + ".tbl (pk, a, b) VALUES (1, 0, '') IF NOT EXISTS", "UPDATE " + KEYSPACE + ".tbl SET a = a + 1, b = b + 'success' WHERE pk = 1" diff --git a/test/distributed/org/apache/cassandra/distributed/test/CASMultiDCTest.java b/test/distributed/org/apache/cassandra/distributed/test/CASMultiDCTest.java index 382cb543c70e..8fa3b0fe94fd 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/CASMultiDCTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/CASMultiDCTest.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -36,7 +37,10 @@ import org.apache.cassandra.service.paxos.PaxosCommit; import org.apache.cassandra.utils.ByteBufferUtil; -import static org.apache.cassandra.distributed.api.ConsistencyLevel.*; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.LOCAL_QUORUM; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.LOCAL_SERIAL; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.SERIAL; public class CASMultiDCTest { @@ -75,6 +79,12 @@ public static void beforeClass() throws Throwable })); } + @AfterClass + public static void afterClass() throws Throwable + { + CLUSTER.close(); + } + @Before public void setUp() { diff --git a/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java b/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java index ffe65c49dc66..ac4e952fe626 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/MessageFiltersTest.java @@ -275,7 +275,8 @@ public void hintSerializationTest() throws Exception try (Cluster cluster = init(builder().withNodes(3) .withConfig(config -> config.with(GOSSIP) .with(NETWORK) - .set("hinted_handoff_enabled", true)) + .set("hinted_handoff_enabled", true) + .set("accord.enabled", false)) .start())) { cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (k int PRIMARY KEY, v int)")); diff --git a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java index 36220e3853cf..528e5ad00c6a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java @@ -146,7 +146,6 @@ public void shouldExposeCAS() throws Throwable // Issue a read to unblock the read generated by the original CAS operation: SESSION.executeAsync("SELECT * FROM " + KEYSPACE + ".cas_tbl WHERE k = 0"); - waitForQueriesToFinish(); } @@ -174,7 +173,7 @@ private static void assertCasVisible() @Test public void shouldExposeTransaction() throws Throwable { - SHARED_CLUSTER.schemaChange("CREATE TABLE " + KEYSPACE + ".accord_tbl (k int primary key, v int) WITH transactional_mode='full'"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + KEYSPACE + ".accord_tbl (k int primary key, v int) WITH transactional_mode='mixed_reads'"); // Disable recovery to make sure only one local read occurs: for (IInvokableInstance instance : SHARED_CLUSTER) diff --git a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java index ce3df571d8f7..2e1cb4b16da5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java @@ -30,44 +30,45 @@ import com.google.common.util.concurrent.Uninterruptibles; import org.junit.Assert; - -import org.apache.cassandra.Util; -import org.apache.cassandra.concurrent.SEPExecutor; -import org.apache.cassandra.config.CassandraRelevantProperties; -import org.apache.cassandra.dht.Token; -import org.apache.cassandra.distributed.shared.WithProperties; -import org.apache.cassandra.locator.AbstractReplicationStrategy; -import org.apache.cassandra.locator.EndpointsForToken; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.ReplicaLayout; -import org.apache.cassandra.locator.ReplicaUtils; -import org.apache.cassandra.service.snapshot.SnapshotManager; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.Throwables; import org.junit.Test; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import net.bytebuddy.implementation.bind.annotation.SuperCall; +import org.apache.cassandra.Util; +import org.apache.cassandra.concurrent.SEPExecutor; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IIsolatedExecutor; +import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.StatsComponent; import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.locator.AbstractReplicationStrategy; +import org.apache.cassandra.locator.EndpointsForToken; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.ReplicaLayout; +import org.apache.cassandra.locator.ReplicaUtils; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.StorageProxy.LocalReadRunnable; +import org.apache.cassandra.service.reads.ReadCoordinator; +import org.apache.cassandra.service.snapshot.SnapshotManager; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.DiagnosticSnapshotService; +import org.apache.cassandra.utils.Throwables; import static net.bytebuddy.matcher.ElementMatchers.named; import static net.bytebuddy.matcher.ElementMatchers.takesArguments; @@ -440,7 +441,7 @@ public static void install(ClassLoader classLoader, Integer num) .load(classLoader, ClassLoadingStrategy.Default.INJECTION); new ByteBuddy().rebase(ReplicaLayout.class) - .method(named("forTokenReadSorted").and(takesArguments(ClusterMetadata.class, Keyspace.class, AbstractReplicationStrategy.class, Token.class))) + .method(named("forTokenReadSorted").and(takesArguments(ClusterMetadata.class, Keyspace.class, AbstractReplicationStrategy.class, TableId.class, Token.class, ReadCoordinator.class))) .intercept(MethodDelegation.to(BBHelper.class)) .make() .load(classLoader, ClassLoadingStrategy.Default.INJECTION); @@ -475,7 +476,7 @@ public static UnfilteredPartitionIterator executeLocally(ReadExecutionController } @SuppressWarnings({ "unused" }) - public static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, Token token) + public static ReplicaLayout.ForTokenRead forTokenReadSorted(ClusterMetadata metadata, Keyspace keyspace, AbstractReplicationStrategy replicationStrategy, TableId tableId, Token token, org.apache.cassandra.service.reads.ReadCoordinator coordinator) { try { diff --git a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java index 103b1fc4c00b..4d2ae2df7ad3 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java @@ -28,6 +28,7 @@ import java.util.function.Function; import java.util.stream.IntStream; +import com.google.common.collect.ImmutableList; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -90,7 +91,7 @@ public class ShortReadProtectionTest extends TestBaseImpl public static Collection data() { List result = new ArrayList<>(); - for (TransactionalMode mode : TransactionalMode.values()) + for (TransactionalMode mode : ImmutableList.of(TransactionalMode.mixed_reads, TransactionalMode.off)) for (ConsistencyLevel readConsistencyLevel : Arrays.asList(ALL, QUORUM, SERIAL)) for (boolean flush : BOOLEANS) for (boolean paging : BOOLEANS) diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index b96c0463fd4d..7eadb7e0b470 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -33,6 +33,8 @@ import java.util.stream.Collectors; import com.google.common.collect.ImmutableSet; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListenableFutureTask; import org.junit.After; import org.junit.BeforeClass; @@ -58,8 +60,11 @@ import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ICluster; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IInstance; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.NodeToolResult; import org.apache.cassandra.distributed.shared.DistributedTestBase; import org.apache.cassandra.service.accord.AccordStateCache; @@ -69,6 +74,7 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.RESET_BOOTSTRAP_PROGRESS; import static org.apache.cassandra.config.CassandraRelevantProperties.SKIP_GC_INSPECTOR; import static org.apache.cassandra.distributed.action.GossipHelper.withProperty; +import static org.assertj.core.api.Assertions.fail; // checkstyle: suppress below 'blockSystemPropertyUsage' public class TestBaseImpl extends DistributedTestBase @@ -140,10 +146,17 @@ public static ByteBuffer tuple(Object... values) return tupleType.pack(bbs, ByteBufferAccessor.instance); } - public static String batch(String... queries) + public static String unloggedBatch(String... queries) + { + return batch(false, queries); + } + + public static String batch(boolean logged, String... queries) { StringBuilder sb = new StringBuilder(); - sb.append("BEGIN UNLOGGED BATCH\n"); + sb.append("BEGIN "); + sb.append(logged ? "" : "UNLOGGED "); + sb.append("BATCH\n"); for (String q : queries) sb.append(q).append(";\n"); sb.append("APPLY BATCH;"); @@ -253,4 +266,31 @@ protected static void disableCompaction(Cluster cluster, String keyspace, String for (int i = 1; i < cluster.size() + 1; i++) cluster.get(i).nodetool("disableautocompaction", keyspace, table); } + + public static String nodetool(IInstance instance, String... commandAndArgs) + { + NodeToolResult nodetoolResult = instance.nodetoolResult(commandAndArgs); + if (!nodetoolResult.getStdout().isEmpty()) + System.out.println(nodetoolResult.getStdout()); + if (!nodetoolResult.getStderr().isEmpty()) + System.err.println(nodetoolResult.getStderr()); + if (nodetoolResult.getError() != null) + fail("Failed nodetool " + Arrays.asList(commandAndArgs), nodetoolResult.getError()); + // TODO why does standard out end up in stderr in nodetool? + return nodetoolResult.getStdout(); + } + + public static String nodetool(ICoordinator coordinator, String... commandAndArgs) + { + return nodetool(coordinator.instance(), commandAndArgs); + } + + public static ListenableFuture nodetoolAsync(ICoordinator coordinator, String... commandAndArgs) + { + ListenableFutureTask task = ListenableFutureTask.create(() -> nodetool(coordinator, commandAndArgs)); + Thread asyncThread = new Thread(task, "NodeTool: " + Arrays.asList(commandAndArgs)); + asyncThread.setDaemon(true); + asyncThread.start(); + return task; + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java index c5f92a6c70c6..7ede0b3cdd80 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java @@ -18,14 +18,37 @@ package org.apache.cassandra.distributed.test.accord; -import accord.primitives.Unseekables; -import accord.topology.Topologies; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.primitives.Unseekables; +import accord.topology.Topologies; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.functions.types.utils.Bytes; -import org.apache.cassandra.db.marshal.*; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.db.marshal.MapType; +import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; @@ -37,24 +60,13 @@ import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; import org.assertj.core.api.Assertions; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.*; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.stream.Collectors; import static java.util.Collections.singletonList; import static org.apache.cassandra.cql3.CQLTester.row; import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; -import static org.junit.Assert.*; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; public abstract class AccordCQLTestBase extends AccordTestBase { @@ -86,11 +98,11 @@ public void testMultiPartitionReturn() throws Exception for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "(k, c, v) VALUES (?, ?, ?);", ConsistencyLevel.ALL, i, j, i + j); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + "(k, c, v) VALUES (?, ?, ?);", ConsistencyLevel.ALL, i, j, i + j); } // multi row String cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=? AND c IN (?, ?);\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=? AND c IN (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(cql, ConsistencyLevel.ANY, 0, 0, 1); assertThat(result).isEqualTo(QueryResults.builder() @@ -101,7 +113,7 @@ public void testMultiPartitionReturn() throws Exception // Results should be in Partiton/Clustering order, so make sure // multi partition cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k IN (?, ?) AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k IN (?, ?) AND c = ?;\n" + "COMMIT TRANSACTION"; for (boolean asc : Arrays.asList(true, false)) { @@ -116,7 +128,7 @@ public void testMultiPartitionReturn() throws Exception // multi-partition, multi-clustering cql = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k IN (?, ?) AND c IN (?, ?);\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k IN (?, ?) AND c IN (?, ?);\n" + "COMMIT TRANSACTION"; for (boolean asc : Arrays.asList(true, false)) { @@ -195,14 +207,14 @@ public void testScalarBindVariables() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + - " LET row2 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + " IF row1 IS NULL AND row2.v = ? THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -216,7 +228,7 @@ public void testScalarBindVariables() throws Throwable assertEquals(3, result[0][0]); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); }); @@ -225,13 +237,13 @@ public void testScalarBindVariables() throws Throwable @Test public void testRegularScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + testScalarIsNull("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testStaticScalarIsNull() throws Throwable { - testScalarIsNull("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + testScalarIsNull("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } private void testScalarIsNull(String tableDDL) throws Exception { @@ -239,25 +251,25 @@ private void testScalarIsNull(String tableDDL) throws Exception { cluster -> { String insertNull = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.k, row0.v;\n" + " IF row0.v IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, null);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, null);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null, null }, insertNull, 0, 0); String insert = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.k, row0.v;\n" + " IF row0.v IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, null }, insert, 0, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT k, c, v FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " SELECT k, c, v FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check); }); @@ -266,36 +278,36 @@ private void testScalarIsNull(String tableDDL) throws Exception { @Test public void testQueryStaticColumn() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { // select partition key, clustering key and static column, restrict on partition and clustering testQueryStaticColumn(cluster, - "LET row0 = (SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0);\n" + + "LET row0 = (SELECT k, c, s, v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0);\n" + "SELECT row0.k, row0.c, row0.s, row0.v;\n", - "SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0"); + "SELECT k, c, s, v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0"); // select partition key, clustering key and static column, restrict on partition and limit to 1 row testQueryStaticColumn(cluster, - "LET row0 = (SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + + "LET row0 = (SELECT k, c, s, v FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1);\n" + "SELECT row0.k, row0.c, row0.s, row0.v;\n", - "SELECT k, c, s, v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1"); + "SELECT k, c, s, v FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1"); // select static column and regular column, restrict on partition and clustering testQueryStaticColumn(cluster, - "LET row0 = (SELECT s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0);\n" + + "LET row0 = (SELECT s, v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0);\n" + "SELECT row0.s, row0.v;\n", - "SELECT s, v FROM " + qualifiedTableName + " WHERE k = ? AND c = 0"); + "SELECT s, v FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0"); // select just static column, restrict on partition and limit to 1 row testQueryStaticColumn(cluster, - "LET row0 = (SELECT s FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + + "LET row0 = (SELECT s FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1);\n" + "SELECT row0.s;\n", - "SELECT s FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1"); + "SELECT s FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1"); }); } @@ -305,22 +317,22 @@ private void testQueryStaticColumn(Cluster cluster, String accordReadQuery, Stri int key = 10; assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, s) VALUES (?, null);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedAccordTableName + " (k, s) VALUES (?, null);", ConsistencyLevel.ALL, key); logger().info("null -> static column"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, s) VALUES (?, 1);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedAccordTableName + " (k, s) VALUES (?, 1);", ConsistencyLevel.ALL, key); logger().info("Inserted 1 -> static column"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key++); - cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedTableName + " (k, c) VALUES (?, 0);", ConsistencyLevel.ALL, key); + cluster.get(1).coordinator().execute("INSERT INTO " + qualifiedAccordTableName + " (k, c) VALUES (?, 0);", ConsistencyLevel.ALL, key); logger().info("Inserted 0 -> clustering"); assertResultsFromAccordMatches(cluster, accordReadQuery, simpleReadQuery, key); } @Test public void testUpdateStaticColumn() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + '\'', + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + '\'', cluster -> { checkUpdateStatic(cluster, "SET s=1 WHERE k=?", 101, "[[101, null, 1, null]]", "[]"); @@ -336,16 +348,16 @@ public void testUpdateStaticColumn() throws Exception { private void checkUpdateStatic(Cluster cluster, String update, int key, String expPart, String expClust) { Object[][] r1, r2, r3, r4, r; - r = cluster.get(1).coordinator().execute("UPDATE " + qualifiedTableName + " " + update + " IF s = NULL;", ConsistencyLevel.QUORUM, key); + r = cluster.get(1).coordinator().execute("UPDATE " + qualifiedAccordTableName + " " + update + " IF s = NULL;", ConsistencyLevel.QUORUM, key); Assertions.assertThat(Arrays.deepToString(r)).isEqualTo("[[true]]"); - r1 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1;", ConsistencyLevel.SERIAL, key); - r2 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = 0;", ConsistencyLevel.SERIAL, key); - cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedTableName, ConsistencyLevel.ALL); + r1 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1;", ConsistencyLevel.SERIAL, key); + r2 = cluster.get(1).coordinator().execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0;", ConsistencyLevel.SERIAL, key); + cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedAccordTableName, ConsistencyLevel.ALL); - executeAsTxn(cluster, "UPDATE " + qualifiedTableName + " " + update + ";", key); - r3 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1;", key).toObjectArrays(); - r4 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = 0;", key).toObjectArrays(); - cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedTableName, ConsistencyLevel.ALL); + executeAsTxn(cluster, "UPDATE " + qualifiedAccordTableName + " " + update + ";", key); + r3 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1;", key).toObjectArrays(); + r4 = executeAsTxn(cluster, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = 0;", key).toObjectArrays(); + cluster.get(1).coordinator().execute("TRUNCATE " + qualifiedAccordTableName, ConsistencyLevel.ALL); Assertions.assertThat(Arrays.deepToString(r1)).isEqualTo(expPart); Assertions.assertThat(Arrays.deepToString(r2)).isEqualTo(expClust); @@ -416,12 +428,12 @@ public void testScalarGte() throws Throwable @Test public void testStaticScalarEQ() throws Throwable { - testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", 3, "=", 3, "="); + testScalarCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int static, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", 3, "=", 3, "="); } private void testScalarCondition(int lhs, String operator, int rhs, String reversedOperator) throws Exception { - testScalarCondition("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", lhs, operator, rhs, reversedOperator); + testScalarCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", lhs, operator, rhs, reversedOperator); } private void testScalarCondition(String tableDDL, int lhs, String operator, int rhs, String reversedOperator) throws Exception @@ -429,27 +441,27 @@ private void testScalarCondition(String tableDDL, int lhs, String operator, int test(tableDDL, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, " + lhs + ");", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, " + lhs + ");", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1);\n" + " SELECT row1.v;\n" + " IF row1.v " + operator + " ? THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, query, 0, rhs, 1, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, 0, 1 }, check, 1, 0); String queryWithReversed = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k = ? LIMIT 1);\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k = ? LIMIT 1);\n" + " SELECT row1.v;\n" + " IF ? " + reversedOperator + " row1.v THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { lhs }, queryWithReversed, 0, rhs, 2, 0, 1); @@ -463,7 +475,7 @@ public void testReadOnlyTx() throws Exception test(cluster -> { String query = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); assertFalse(result.hasNext()); @@ -476,13 +488,13 @@ public void testWriteOnlyTx() throws Exception test(cluster -> { String query = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1); assertFalse(result.hasNext()); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=? AND c=?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=? AND c=?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check, 0, 0); }); @@ -493,14 +505,14 @@ public void testReturningLetReferences() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + - " LET row2 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + + " LET row2 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.v, row2.k, row2.c, row2.v;\n" + " IF row1 IS NULL AND row2.v = ? THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 3, 0, 0, 1); @@ -508,7 +520,7 @@ public void testReturningLetReferences() throws Throwable assertThat(result).hasSize(1).contains(null, 1, 0, 3); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 1}, check); }); @@ -519,14 +531,14 @@ public void testFailedConditionWithCompleteInsert() throws Throwable { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 0, 3);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.v;\n" + " IF row0 IS NULL AND row1.v = ? THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY, 0, 0, 1, 0, 2, 0, 0, 1); @@ -534,7 +546,7 @@ public void testFailedConditionWithCompleteInsert() throws Throwable assertThat(result).hasSize(1).contains(3); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, check); }); @@ -543,22 +555,22 @@ public void testFailedConditionWithCompleteInsert() throws Throwable @Test public void testReversedClusteringReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1);\n" + " SELECT row1.k, row1.c, row1.v;\n" + " IF row1.c = 1 THEN\n" + - " UPDATE " + qualifiedTableName + " SET v += row1.c WHERE k=1 AND c=1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v += row1.c WHERE k=1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); }); @@ -578,20 +590,20 @@ public void testScalarShorthandSubtraction() throws Exception private void testScalarShorthandOperation(int startingValue, String operation, int endingvalue) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v) VALUES (1, ?)", ConsistencyLevel.ALL, startingValue); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.v;\n" + - " UPDATE " + qualifiedTableName + " SET v " + operation + " 1 WHERE k = 1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v " + operation + " 1 WHERE k = 1;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { startingValue }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = 1;\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, check); }); @@ -600,20 +612,20 @@ private void testScalarShorthandOperation(int startingValue, String operation, i @Test public void testConstantNonStaticRowReadBeforeUpdate() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 2);\n" + " SELECT row1.v;\n" + - " UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = 1 AND c = 2;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = 1 AND c = 2;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 3 }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2;\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 2;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); }); @@ -622,21 +634,21 @@ public void testConstantNonStaticRowReadBeforeUpdate() throws Exception @Test public void testRangeDeletion() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 3, ?)", ConsistencyLevel.ALL, 4); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 4, ?)", ConsistencyLevel.ALL, 5); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 2, ?)", ConsistencyLevel.ALL, 3); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 3, ?)", ConsistencyLevel.ALL, 4); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 4, ?)", ConsistencyLevel.ALL, 5); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 2);\n" + " SELECT row1.v;\n" + - " DELETE FROM " + qualifiedTableName + " WHERE k = 1 AND c >=3 AND c <= 4;\n" + + " DELETE FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c >=3 AND c <= 4;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 3 }, update); - Object[][] check = cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1;", ConsistencyLevel.SERIAL); + Object[][] check = cluster.coordinator(1).execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1;", ConsistencyLevel.SERIAL); assertArrayEquals(new Object[] { 1, 2, 3 }, check[0]); assertEquals(1, check.length); }); @@ -646,22 +658,22 @@ public void testRangeDeletion() throws Exception @Test public void testPartitionKeyReferenceCondition() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k INT, c INT, v INT, PRIMARY KEY (k, c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1);\n" + " SELECT row1.k, row1.c, row1.v;\n" + " IF row1.k = 1 THEN\n" + - " UPDATE " + qualifiedTableName + " SET v += row1.k WHERE k=1 AND c=1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v += row1.k WHERE k=1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 2}, check); }); @@ -670,22 +682,22 @@ public void testPartitionKeyReferenceCondition() throws Exception @Test public void testMultiPartitionKeyReferenceCondition() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (pk1 INT, pk2 INT, c INT, v INT, PRIMARY KEY ((pk1, pk2), c)) WITH CLUSTERING ORDER BY (c DESC)", + test("CREATE TABLE " + qualifiedAccordTableName + " (pk1 INT, pk2 INT, c INT, v INT, PRIMARY KEY ((pk1, pk2), c)) WITH CLUSTERING ORDER BY (c DESC) AND transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (pk1, pk2, c, v) VALUES (1, 1, 1, 1)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (pk1, pk2, c, v) VALUES (1, 1, 1, 1)", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1);\n" + " SELECT row1.pk1, row1.pk2, row1.c, row1.v;\n" + " IF row1.pk1 = 1 THEN\n" + - " UPDATE " + qualifiedTableName + " SET v += row1.pk2 WHERE pk1 = 1 AND pk2 = 1 AND c=1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v += row1.pk2 WHERE pk1 = 1 AND pk2 = 1 AND c=1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 1}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE pk1 = 1 AND pk2 = 1 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{1, 1, 1, 2}, check); }); @@ -694,13 +706,13 @@ public void testMultiPartitionKeyReferenceCondition() throws Exception @Test public void testMultiCellListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListEqCondition() throws Exception { - testListEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testListEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListEqCondition(String ddl) throws Exception @@ -713,7 +725,7 @@ private void testListEqCondition(String ddl) throws Exception ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialListBytes); assertFalse(result.hasNext()); @@ -722,16 +734,16 @@ private void testListEqCondition(String ddl) throws Exception ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = ? THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialList}, update, 0, initialListBytes, updatedListBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedList}, check, 0); } @@ -741,13 +753,13 @@ private void testListEqCondition(String ddl) throws Exception @Test public void testMultiCellSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testSetEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetEqCondition() throws Exception { - testSetEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testSetEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetEqCondition(String ddl) throws Exception @@ -760,7 +772,7 @@ private void testSetEqCondition(String ddl) throws Exception ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialSetBytes); assertFalse(result.hasNext()); @@ -769,16 +781,16 @@ private void testSetEqCondition(String ddl) throws Exception ByteBuffer updatedSetBytes = setType.getSerializer().serialize(updatedSet); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = ? THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {initialSet}, update, 0, initialSetBytes, updatedSetBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, updatedSet}, check, 0); } @@ -788,13 +800,13 @@ private void testSetEqCondition(String ddl) throws Exception @Test public void testMultiCellMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); + testMapEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenMapEqCondition() throws Exception { - testMapEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testMapEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exception @@ -807,7 +819,7 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialMapBytes); assertFalse(result.hasNext()); @@ -816,16 +828,16 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio ByteBuffer updatedMapBytes = mapType.getSerializer().serialize(updatedMap); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map = ? THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, initialMapBytes, updatedMapBytes, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, check, 0); } @@ -835,13 +847,13 @@ private void testMapEqCondition(String ddl, boolean isMultiCell) throws Exceptio @Test public void testMultiCellUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testUDTEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTEqCondition() throws Exception { - testUDTEqCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testUDTEqCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTEqCondition(String tableDDL) throws Exception @@ -853,7 +865,7 @@ private void testUDTEqCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); assertFalse(result.hasNext()); @@ -862,16 +874,16 @@ private void testUDTEqCondition(String tableDDL) throws Exception ByteBuffer updatedPersonBuffer = CQLTester.makeByteBuffer(updatedPersonValue, null); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer = ? THEN\n" + - " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, initialPersonBuffer, updatedPersonBuffer, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, check, 0); } @@ -881,14 +893,14 @@ private void testUDTEqCondition(String tableDDL) throws Exception @Test public void testTupleEqCondition() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, pair) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialTupleBuffer); assertFalse(result.hasNext()); @@ -897,16 +909,16 @@ public void testTupleEqCondition() throws Exception ByteBuffer updatedTupleBuffer = CQLTester.makeByteBuffer(updatedTupleValue, null); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.pair;\n" + " IF row1.pair = ? THEN\n" + - " UPDATE " + qualifiedTableName + " SET pair = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET pair = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, update, 0, initialTupleBuffer, updatedTupleBuffer, 0); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedTupleBuffer }, check, 0); } @@ -916,31 +928,31 @@ public void testTupleEqCondition() throws Exception @Test public void testIsNullWithComplexDeletion() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ListType listType = ListType.getInstance(Int32Type.instance, true); List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 0, ?);", ConsistencyLevel.ALL, initialListBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, int_list) VALUES (0, 0, ?);", ConsistencyLevel.ALL, initialListBytes); cluster.forEach(i -> i.flush(KEYSPACE)); - cluster.coordinator(1).execute("DELETE int_list FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0;", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("DELETE int_list FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0;", ConsistencyLevel.ALL); List updatedList = Arrays.asList(1, 2, 3); ByteBuffer updatedListBytes = listType.getSerializer().serialize(updatedList); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (?, ?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, int_list) VALUES (?, ?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, 0, 0, updatedListBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, updatedList }, check, 0, 0); } @@ -950,13 +962,13 @@ public void testIsNullWithComplexDeletion() throws Exception @Test public void testNullMultiCellListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testNullListConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenListConditions() throws Exception { - testNullListConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testNullListConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullListConditions(String ddl) throws Exception @@ -964,31 +976,31 @@ private void testNullListConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, null);", ConsistencyLevel.ALL); ListType listType = ListType.getInstance(Int32Type.instance, true); List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialListBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialList}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1002,13 +1014,13 @@ private void testNullListConditions(String ddl) throws Exception @Test public void testNullMultiCellSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testNullSetConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenSetConditions() throws Exception { - testNullSetConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testNullSetConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullSetConditions(String ddl) throws Exception @@ -1016,31 +1028,31 @@ private void testNullSetConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, null);", ConsistencyLevel.ALL); SetType setType = SetType.getInstance(Int32Type.instance, true); Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1054,13 +1066,13 @@ private void testNullSetConditions(String ddl) throws Exception @Test public void testNullMultiCellMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); + testNullMapConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testNullFrozenMapConditions() throws Exception { - testNullMapConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testNullMapConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testNullMapConditions(String ddl, boolean isMultiCell) throws Exception @@ -1068,31 +1080,31 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialMapBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1101,7 +1113,7 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, updatedMapBytes, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); } @@ -1111,13 +1123,13 @@ private void testNullMapConditions(String ddl, boolean isMultiCell) throws Excep @Test public void testNullMultiCellUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testNullUDTCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenUDTCondition() throws Exception { - testNullUDTCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testNullUDTCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullUDTCondition(String tableDDL) throws Exception @@ -1129,24 +1141,24 @@ private void testNullUDTCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1155,7 +1167,7 @@ private void testNullUDTCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -1165,13 +1177,13 @@ private void testNullUDTCondition(String tableDDL) throws Exception @Test public void testNullMultiCellSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testNullSetElementConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenSetElementConditions() throws Exception { - testNullSetElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testNullSetElementConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullSetElementConditions(String ddl) throws Exception @@ -1179,31 +1191,31 @@ private void testNullSetElementConditions(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, {1});", ConsistencyLevel.ALL); SetType setType = SetType.getInstance(Int32Type.instance, true); Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set[2];\n" + " IF row1.int_set[2] IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {null}, insert, 0, 0, initialSetBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, initialSet}, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set[2] IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_set = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1217,13 +1229,13 @@ private void testNullSetElementConditions(String ddl) throws Exception @Test public void testNullMultiCellMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); + testNullMapElementConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testNullFrozenMapElementConditions() throws Exception { - testNullMapElementConditions("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testNullMapElementConditions("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testNullMapElementConditions(String ddl, boolean isMultiCell) throws Exception @@ -1231,31 +1243,31 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, null);", ConsistencyLevel.ALL); MapType mapType = MapType.getInstance(UTF8Type.instance, Int32Type.instance, isMultiCell); Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, "one", 0, initialMapBytes); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialMap }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_map = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1264,7 +1276,7 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialMap }, update, 0, "two", updatedMapBytes, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedMap }, checkUpdate, 0); } @@ -1274,13 +1286,13 @@ private void testNullMapElementConditions(String ddl, boolean isMultiCell) throw @Test public void testNullMultiCellUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testNullFrozenUDTFieldCondition() throws Exception { - testNullUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testNullUDTFieldCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testNullUDTFieldCondition(String tableDDL) throws Exception @@ -1292,24 +1304,24 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, insert, 0, 0, initialPersonBuffer); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -1318,7 +1330,7 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -1328,13 +1340,13 @@ private void testNullUDTFieldCondition(String tableDDL) throws Exception @Test public void testMultiCellListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", true); + testListSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenListSubstitution() throws Exception { - testListSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testListSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testListSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1346,19 +1358,19 @@ private void testListSubstitution(String ddl, boolean isMultiCell) throws Except List initialList = Arrays.asList(1, 2); ByteBuffer initialListBytes = listType.getSerializer().serialize(initialList); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, ?);", ConsistencyLevel.ALL, initialListBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, ?);", ConsistencyLevel.ALL, initialListBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (?, row1.int_list);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (?, row1.int_list);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialList }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialList }, check, 1); } @@ -1368,13 +1380,13 @@ private void testListSubstitution(String ddl, boolean isMultiCell) throws Except @Test public void testMultiCellSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", true); + testSetSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenSetSubstitution() throws Exception { - testSetSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testSetSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testSetSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1386,19 +1398,19 @@ private void testSetSubstitution(String ddl, boolean isMultiCell) throws Excepti Set initialSet = ImmutableSet.of(1, 2); ByteBuffer initialSetBytes = setType.getSerializer().serialize(initialSet); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, ?);", ConsistencyLevel.ALL, initialSetBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, ?);", ConsistencyLevel.ALL, initialSetBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (?, row1.int_set);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (?, row1.int_set);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialSet }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialSet }, check, 1); } @@ -1408,13 +1420,13 @@ private void testSetSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); + testMapSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", true); } @Test public void testFrozenMapSubstitution() throws Exception { - testMapSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); + testMapSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'", false); } private void testMapSubstitution(String ddl, boolean isMultiCell) throws Exception @@ -1426,19 +1438,19 @@ private void testMapSubstitution(String ddl, boolean isMultiCell) throws Excepti Map initialMap = ImmutableMap.of("one", 1, "two", 2); ByteBuffer initialMapBytes = mapType.getSerializer().serialize(initialMap); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, ?);", ConsistencyLevel.ALL, initialMapBytes); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, ?);", ConsistencyLevel.ALL, initialMapBytes); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (?, row1.int_map);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (?, row1.int_map);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialMap }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialMap }, check, 1); } @@ -1448,13 +1460,13 @@ private void testMapSubstitution(String ddl, boolean isMultiCell) throws Excepti @Test public void testMultiCellUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testUDTSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTSubstitution() throws Exception { - testUDTSubstitution("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testUDTSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTSubstitution(String tableDDL) throws Exception @@ -1464,19 +1476,19 @@ private void testUDTSubstitution(String tableDDL) throws Exception { Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, row1.customer);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, row1.customer);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ initialPersonBuffer }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialPersonBuffer }, check, 1); } @@ -1486,24 +1498,24 @@ private void testUDTSubstitution(String tableDDL) throws Exception @Test public void testTupleSubstitution() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, pair tuple) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object initialTupleValue = CQLTester.tuple("age", 37); ByteBuffer initialTupleBuffer = CQLTester.makeByteBuffer(initialTupleValue, null); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (0, ?);", ConsistencyLevel.ALL, initialTupleBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, pair) VALUES (0, ?);", ConsistencyLevel.ALL, initialTupleBuffer); String insert = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.pair;\n" + " IF row1.pair IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, pair) VALUES (?, row1.pair);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, pair) VALUES (?, row1.pair);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialTupleBuffer }, insert, 0, 1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1, initialTupleBuffer }, check, 1); } @@ -1513,13 +1525,13 @@ public void testTupleSubstitution() throws Exception @Test public void testMultiCellListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListReplacement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListReplacement() throws Exception { - testListReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testListReplacement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListReplacement(String ddl) throws Exception @@ -1527,20 +1539,20 @@ private void testListReplacement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list = row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list = row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(3, 4)}, check); } @@ -1550,13 +1562,13 @@ private void testListReplacement(String ddl) throws Exception @Test public void testMultiCellSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testSetReplacement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetReplacement() throws Exception { - testSetReplacement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testSetReplacement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetReplacement(String ddl) throws Exception @@ -1564,20 +1576,20 @@ private void testSetReplacement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = {3, 4} THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_set = row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set = row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(3, 4) }, check); } @@ -1587,23 +1599,23 @@ private void testSetReplacement(String ddl) throws Exception @Test public void testListAppendFromReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list += row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list += row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2, 3, 4)}, check); } @@ -1613,13 +1625,13 @@ public void testListAppendFromReference() throws Exception @Test public void testSetByIndexFromMultiCellListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list list, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, src_int_list list, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSetByIndexFromFrozenListElement() throws Exception { - testListSetByIndexFromListElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListSetByIndexFromListElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, src_int_list frozen>, dest_int_list list) WITH transactional_mode='" + transactionalMode + "'"); } private void testListSetByIndexFromListElement(String ddl) throws Exception @@ -1627,18 +1639,18 @@ private void testListSetByIndexFromListElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, dest_int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, src_int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, dest_int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, src_int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.src_int_list;\n" + - " UPDATE " + qualifiedTableName + " SET dest_int_list[0] = row1.src_int_list[0] WHERE k = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET dest_int_list[0] = row1.src_int_list[0] WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT dest_int_list FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT dest_int_list FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2)}, check); } @@ -1648,20 +1660,20 @@ private void testListSetByIndexFromListElement(String ddl) throws Exception @Test public void testListSetByIndexFromScalar() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0);\n" + " SELECT row0.int_list;\n" + - " UPDATE " + qualifiedTableName + " SET int_list[0] = 2 WHERE k = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list[0] = 2 WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(1, 2)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT int_list FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT int_list FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(2, 2)}, check); } @@ -1671,21 +1683,21 @@ public void testListSetByIndexFromScalar() throws Exception @Test public void testAutoReadSelectionConstruction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, counter int, other_counter int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, other_counter) VALUES (0, 1, 1, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, counter, other_counter) VALUES (0, 0, 1, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, counter, other_counter) VALUES (0, 1, 1, 1);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row0.counter, row0.other_counter;\n" + - " UPDATE " + qualifiedTableName + " SET other_counter += 1, counter += row0.counter WHERE k = 0 AND c = 1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET other_counter += 1, counter += row0.counter WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 1, 1 }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT counter, other_counter FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1;\n" + + " SELECT counter, other_counter FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2, 2 }, check); } @@ -1695,21 +1707,21 @@ public void testAutoReadSelectionConstruction() throws Exception @Test public void testMultiMutationsSameKey() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, counter int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, counter, int_list) VALUES (0, 0, 0, [1, 2]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row0.counter, row0.int_list;\n" + - " UPDATE " + qualifiedTableName + " SET int_list[0] = 42 WHERE k = 0 AND c = 0;\n" + - " UPDATE " + qualifiedTableName + " SET counter += 1 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list[0] = 42 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET counter += 1 WHERE k = 0 AND c = 0;\n" + "COMMIT TRANSACTION"; assertRowEquals(cluster, new Object[] { 0, Arrays.asList(1, 2) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT counter, int_list FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0;\n" + + " SELECT counter, int_list FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, Arrays.asList(42, 2)}, check); } @@ -1720,10 +1732,10 @@ public void testMultiMutationsSameKey() throws Exception public void testLetLargerThanOneWithPK() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0 LIMIT 2);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0 LIMIT 2);\n" + " SELECT row1.v;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[]{ 0 }, cql, 1); @@ -1734,10 +1746,10 @@ public void testLetLargerThanOneWithPK() throws Exception public void testLetLimitUsingBind() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 0);", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 LIMIT ?);\n" + " SELECT row1.v;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, cql, 1); @@ -1747,24 +1759,24 @@ public void testLetLimitUsingBind() throws Exception @Test public void testListSetByIndexMultiRow() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, int_list list, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, int_list) VALUES (0, 1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, int_list) VALUES (0, 0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, int_list) VALUES (0, 1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 1);\n" + " SELECT row0.int_list;\n" + - " UPDATE " + qualifiedTableName + " SET int_list[0] = row1.int_list[0] WHERE k = 0 AND c = 0;\n" + - " UPDATE " + qualifiedTableName + " SET int_list[0] = row0.int_list[0] WHERE k = 0 AND c = 1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list[0] = row1.int_list[0] WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list[0] = row0.int_list[0] WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { Arrays.asList(1, 2) }, update); String check = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 1);\n" + " SELECT row0.int_list, row1.int_list;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 2), Arrays.asList(1, 4)}, check); @@ -1775,21 +1787,21 @@ public void testListSetByIndexMultiRow() throws Exception @Test public void testSetAppend() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + - " UPDATE " + qualifiedTableName + " SET int_set += row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set += row1.int_set WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2, 3, 4) }, check); } @@ -1799,13 +1811,13 @@ public void testSetAppend() throws Exception @Test public void testAssignmentFromMultiCellSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenSetElement() throws Exception { - testAssignmentFromSetElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromSetElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromSetElement(String ddl) throws Exception @@ -1813,18 +1825,18 @@ private void testAssignmentFromSetElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_set) VALUES (0, 0, {1, 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_set) VALUES (1, 0, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, int_set) VALUES (0, 0, {1, 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, int_set) VALUES (1, 0, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + - " UPDATE " + qualifiedTableName + " SET v = row1.int_set[4] WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v = row1.int_set[4] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); } @@ -1834,21 +1846,21 @@ private void testAssignmentFromSetElement(String ddl) throws Exception @Test public void testMapAppend() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + qualifiedTableName + " SET int_map += row1.int_map WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map += row1.int_map WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2, "three", 4) }, check); } @@ -1858,13 +1870,13 @@ public void testMapAppend() throws Exception @Test public void testAssignmentFromMultiCellMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map map) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenMapElement() throws Exception { - testAssignmentFromMapElement("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromMapElement("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromMapElement(String ddl) throws Exception @@ -1872,18 +1884,18 @@ private void testAssignmentFromMapElement(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_map) VALUES (0, 0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, int_map) VALUES (1, 0, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, int_map) VALUES (0, 0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, int_map) VALUES (1, 0, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + qualifiedTableName + " SET v = row1.int_map[?] WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v = row1.int_map[?] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check); } @@ -1893,13 +1905,13 @@ private void testAssignmentFromMapElement(String ddl) throws Exception @Test public void testAssignmentFromMultiCellUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testAssignmentFromFrozenUDTField() throws Exception { - testAssignmentFromUDTField("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, v int, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testAssignmentFromUDTField("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, v int, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testAssignmentFromUDTField(String tableDDL) throws Exception @@ -1909,18 +1921,18 @@ private void testAssignmentFromUDTField(String tableDDL) throws Exception { Object initialPersonValue = CQLTester.userType("height", 74, "age", 37); ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, customer) VALUES (0, 0, null);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, v, customer) VALUES (1, 0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, customer) VALUES (0, 0, null);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, v, customer) VALUES (1, 0, ?);", ConsistencyLevel.ALL, initialPersonBuffer); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.customer;\n" + - " UPDATE " + qualifiedTableName + " SET v = row1.customer.age WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v = row1.customer.age WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, check); } @@ -1930,21 +1942,21 @@ private void testAssignmentFromUDTField(String tableDDL) throws Exception @Test public void testSetMapElementFromMapElementReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + - " UPDATE " + qualifiedTableName + " SET int_map[?] = row1.int_map[?] WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map[?] = row1.int_map[?] WHERE k=0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "one", "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT int_map[?] FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT int_map[?] FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 4 }, check, "one"); } @@ -1954,7 +1966,7 @@ public void testSetMapElementFromMapElementReference() throws Exception @Test public void testSetUDTFieldFromUDTFieldReference() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'", cluster -> { Object youngPerson = CQLTester.userType("height", 58, "age", 9); @@ -1962,18 +1974,18 @@ public void testSetUDTFieldFromUDTFieldReference() throws Exception Object adultPerson = CQLTester.userType("height", 74, "age", 37); ByteBuffer adultPersonBuffer = CQLTester.makeByteBuffer(adultPerson, null); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, youngPersonBuffer); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (1, ?);", ConsistencyLevel.ALL, adultPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (0, ?);", ConsistencyLevel.ALL, youngPersonBuffer); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (1, ?);", ConsistencyLevel.ALL, adultPersonBuffer); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.customer;\n" + - " UPDATE " + qualifiedTableName + " SET customer.age = row1.customer.age WHERE k = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET customer.age = row1.customer.age WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { adultPersonBuffer }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT customer.height, customer.age FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT customer.height, customer.age FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 58, 37 }, check); } @@ -1983,13 +1995,13 @@ public void testSetUDTFieldFromUDTFieldReference() throws Exception @Test public void testMultiCellListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListElementCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListElementCondition() throws Exception { - testListElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testListElementCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListElementCondition(String ddl) throws Exception @@ -1997,20 +2009,20 @@ private void testListElementCondition(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, [1, 2]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list[1] = 4 THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list = [3, 4] WHERE k = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list = [3, 4] WHERE k = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableList.of(3, 4) }, check); } @@ -2020,13 +2032,13 @@ private void testListElementCondition(String ddl) throws Exception @Test public void testMultiCellMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); + testMapElementCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenMapElementCondition() throws Exception { - testMapElementCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testMapElementCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapElementCondition(String ddl) throws Exception @@ -2034,20 +2046,20 @@ private void testMapElementCondition(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, {'one': 2});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (1, {'three': 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + " IF row1.int_map[?] = 4 THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_map = {'three': 4} WHERE k = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map = {'three': 4} WHERE k = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("three", 4) }, update, "three"); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("three", 4) }, check); } @@ -2057,13 +2069,13 @@ private void testMapElementCondition(String ddl) throws Exception @Test public void testMultiCellUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testUDTFieldCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenUDTFieldCondition() throws Exception { - testUDTFieldCondition("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testUDTFieldCondition("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testUDTFieldCondition(String tableDDL) throws Exception @@ -2075,21 +2087,21 @@ private void testUDTFieldCondition(String tableDDL) throws Exception ByteBuffer initialPersonBuffer = CQLTester.makeByteBuffer(initialPersonValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, initialPersonBuffer); assertFalse(result.hasNext()); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, initialPersonBuffer }, check, 0); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row1.customer;\n" + " IF row1.customer.age = 37 THEN\n" + - " UPDATE " + qualifiedTableName + " SET customer = ? WHERE k = ?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET customer = ? WHERE k = ?;\n" + " END IF\n" + "COMMIT TRANSACTION"; @@ -2098,7 +2110,7 @@ private void testUDTFieldCondition(String tableDDL) throws Exception assertRowEqualsWithPreemptedRetry(cluster, new Object[] { initialPersonBuffer }, update, 0, updatedPersonBuffer, 0); String checkUpdate = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, updatedPersonBuffer }, checkUpdate, 0); } @@ -2108,23 +2120,23 @@ private void testUDTFieldCondition(String tableDDL) throws Exception @Test public void testListSubtraction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (0, [1, 2, 3, 4]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (1, [3, 4]);", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + " IF row1.int_list = [3, 4] THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_list -= row1.int_list WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_list -= row1.int_list WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {Arrays.asList(3, 4)}, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, Arrays.asList(1, 2)}, check); } @@ -2134,23 +2146,23 @@ public void testListSubtraction() throws Exception @Test public void testSetSubtraction() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'", cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (0, {1, 2, 3, 4});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (1, {3, 4});", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = {3, 4} THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_set -= row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_set -= row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(3, 4) }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableSet.of(1, 2) }, check); } @@ -2160,13 +2172,13 @@ public void testSetSubtraction() throws Exception @Test public void testMultiCellMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testMapSubtraction("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenMapSubtraction() throws Exception { - testMapSubtraction("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testMapSubtraction("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapSubtraction(String ddl) throws Exception @@ -2174,20 +2186,20 @@ private void testMapSubtraction(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (0, { 'one': 2, 'three': 4 });", ConsistencyLevel.ALL); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, { 'three' });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (0, { 'one': 2, 'three': 4 });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (1, { 'three' });", ConsistencyLevel.ALL); String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + " IF row1.int_set = { 'three' } THEN\n" + - " UPDATE " + qualifiedTableName + " SET int_map -= row1.int_set WHERE k=0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET int_map -= row1.int_set WHERE k=0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of("three") }, update); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = 0;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, ImmutableMap.of("one", 2), null}, check); } @@ -2197,13 +2209,13 @@ private void testMapSubtraction(String ddl) throws Exception @Test public void testMultiCellListSelection() throws Exception { - testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); + testListSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list list) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenListSelection() throws Exception { - testListSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testListSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_list frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testListSelection(String ddl) throws Exception @@ -2211,16 +2223,16 @@ private void testListSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_list) VALUES (1, [10, 20, 30, 40]);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_list) VALUES (1, [10, 20, 30, 40]);", ConsistencyLevel.ALL); String selectEntireSet = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableList.of(10, 20, 30, 40) }, selectEntireSet); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_list[0];\n" + "COMMIT TRANSACTION"; @@ -2235,13 +2247,13 @@ private void testListSelection(String ddl) throws Exception @Test public void testMultiCellSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); + testSetSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set set) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenSetSelection() throws Exception { - testSetSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testSetSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_set frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testSetSelection(String ddl) throws Exception @@ -2249,16 +2261,16 @@ private void testSetSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_set) VALUES (1, {10, 20, 30, 40});", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_set) VALUES (1, {10, 20, 30, 40});", ConsistencyLevel.ALL); String selectEntireSet = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableSet.of(10, 20, 30, 40) }, selectEntireSet); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_set[10];\n" + "COMMIT TRANSACTION"; @@ -2273,13 +2285,13 @@ private void testSetSelection(String ddl) throws Exception @Test public void testMultiCellMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); + testMapSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map map) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testFrozenMapSelection() throws Exception { - testMapSelection("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); + testMapSelection("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, int_map frozen>) WITH transactional_mode='" + transactionalMode + "'"); } private void testMapSelection(String ddl) throws Exception @@ -2287,16 +2299,16 @@ private void testMapSelection(String ddl) throws Exception test(ddl, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, int_map) VALUES (1, { 'ten': 20, 'thirty': 40 });", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, int_map) VALUES (1, { 'ten': 20, 'thirty': 40 });", ConsistencyLevel.ALL); String selectEntireMap = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { ImmutableMap.of("ten", 20, "thirty", 40) }, selectEntireMap); String selectSingleElement = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 1);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1);\n" + " SELECT row1.int_map['ten'];\n" + "COMMIT TRANSACTION"; @@ -2312,25 +2324,25 @@ public void testScalarUpdateSubstitution() { String KEYSPACE = "ks" + System.currentTimeMillis(); SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "1 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + "2 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedAccordTableName + "1 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedAccordTableName + "2 (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'"); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); - SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedTableName + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + "1 (k, c, v) VALUES (1, 2, 3);", ConsistencyLevel.ALL); + SHARED_CLUSTER.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + "2 (k, c, v) VALUES (2, 2, 4);", ConsistencyLevel.ALL); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2);\n" + - " LET row2 = (SELECT * FROM " + qualifiedTableName + "2 WHERE k=2 AND c=2);\n" + - " SELECT v FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2;\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + "1 WHERE k=1 AND c=2);\n" + + " LET row2 = (SELECT * FROM " + qualifiedAccordTableName + "2 WHERE k=2 AND c=2);\n" + + " SELECT v FROM " + qualifiedAccordTableName + "1 WHERE k=1 AND c=2;\n" + " IF row1.v = 3 AND row2.v = 4 THEN\n" + - " UPDATE " + qualifiedTableName + "1 SET v = row2.v WHERE k=1 AND c=2;\n" + + " UPDATE " + qualifiedAccordTableName + "1 SET v = row2.v WHERE k=1 AND c=2;\n" + " END IF\n" + "COMMIT TRANSACTION"; Object[][] result = SHARED_CLUSTER.coordinator(1).execute(query, ConsistencyLevel.ANY); assertEquals(3, result[0][0]); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + "1 WHERE k=1 AND c=2;\n" + + " SELECT * FROM " + qualifiedAccordTableName + "1 WHERE k=1 AND c=2;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(SHARED_CLUSTER, new Object[]{1, 2, 4}, check); } @@ -2338,13 +2350,13 @@ public void testScalarUpdateSubstitution() @Test public void testRegularScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testStaticScalarInsertSubstitution() throws Exception { - testScalarInsertSubstitution("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int static, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); + testScalarInsertSubstitution("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int static, PRIMARY KEY (k, c)) WITH transactional_mode='" + transactionalMode + "'"); } private void testScalarInsertSubstitution(String tableDDL) throws Exception @@ -2352,19 +2364,19 @@ private void testScalarInsertSubstitution(String tableDDL) throws Exception test(tableDDL, cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 1);", ConsistencyLevel.ALL); String insert = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 LIMIT 1);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 LIMIT 1);\n" + " SELECT row0.v;\n" + " IF row0.v IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 1, row0.v);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 1, row0.v);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, insert); String check = "BEGIN TRANSACTION\n" + - " SELECT k, c, v FROM " + qualifiedTableName + " WHERE k = 0 AND c = 1;\n" + + " SELECT k, c, v FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 1;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 1, 1 }, check); } @@ -2374,13 +2386,13 @@ private void testScalarInsertSubstitution(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testSelectUDTReference("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSelectFrozenUDTReference() throws Exception { - testSelectUDTReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testSelectUDTReference("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testSelectUDTReference(String tableDDL) throws Exception @@ -2392,13 +2404,13 @@ private void testSelectUDTReference(String tableDDL) throws Exception ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); assertFalse(result.hasNext()); String read = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row0.customer;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { personBuffer }, read, 0); @@ -2409,13 +2421,13 @@ private void testSelectUDTReference(String tableDDL) throws Exception @Test public void testSelectMultiCellUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer person) WITH transactional_mode='" + transactionalMode + "'"); } @Test public void testSelectFrozenUDTFieldReference() throws Exception { - testSelectUDTFieldReference("CREATE TABLE " + qualifiedTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); + testSelectUDTFieldReference("CREATE TABLE " + qualifiedAccordTableName + " (k int PRIMARY KEY, customer frozen) WITH transactional_mode='" + transactionalMode + "'"); } private void testSelectUDTFieldReference(String tableDDL) throws Exception @@ -2427,13 +2439,13 @@ private void testSelectUDTFieldReference(String tableDDL) throws Exception ByteBuffer personBuffer = CQLTester.makeByteBuffer(personValue, null); String insert = "BEGIN TRANSACTION\n" + - " INSERT INTO " + qualifiedTableName + " (k, customer) VALUES (?, ?);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, customer) VALUES (?, ?);\n" + "COMMIT TRANSACTION"; SimpleQueryResult result = cluster.coordinator(1).executeWithResult(insert, ConsistencyLevel.ANY, 0, personBuffer); assertFalse(result.hasNext()); String read = "BEGIN TRANSACTION\n" + - " LET row0 = (SELECT * FROM " + qualifiedTableName + " WHERE k = ?);\n" + + " LET row0 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?);\n" + " SELECT row0.customer.age;\n" + "COMMIT TRANSACTION"; result = assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 37 }, read, 0); @@ -2446,33 +2458,33 @@ private void testSelectUDTFieldReference(String tableDDL) throws Exception @Test public void testMultiKeyQueryAndInsert() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { String query1 = "BEGIN TRANSACTION\n" + - " LET select1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + - " LET select2 = (SELECT * FROM " + qualifiedTableName + " WHERE k=1 AND c=0);\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0;\n" + + " LET select1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=1 AND c=0);\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0;\n" + " IF select1 IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0);\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 0);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 0);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 0, 0);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, query1); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 0}, check, 0, 0); assertRowEqualsWithPreemptedRetry(cluster, new Object[] {1, 0, 0}, check, 1, 0); String query2 = "BEGIN TRANSACTION\n" + - " LET select1 = (SELECT * FROM " + qualifiedTableName + " WHERE k=1 AND c=0);\n" + - " LET select2 = (SELECT * FROM " + qualifiedTableName + " WHERE k=2 AND c=0);\n" + - " SELECT v FROM " + qualifiedTableName + " WHERE k=1 AND c=0;\n" + + " LET select1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=1 AND c=0);\n" + + " LET select2 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=2 AND c=0);\n" + + " SELECT v FROM " + qualifiedAccordTableName + " WHERE k=1 AND c=0;\n" + " IF select1.v = ? THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (1, 0, 1);\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (2, 0, 1);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (1, 0, 1);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (2, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0 }, query2, 0); @@ -2533,12 +2545,12 @@ public void demoTest() throws Throwable public void testReferenceArithmeticInInsert() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET a = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET a = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + " IF a IS NOT NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 1, a.v + 1);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 1, a.v + 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, cql); @@ -2551,12 +2563,12 @@ public void testReferenceArithmeticInInsert() throws Exception public void testReferenceArithmeticInUpdate() throws Exception { test(cluster -> { - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 0)", ConsistencyLevel.ALL); String cql = "BEGIN TRANSACTION\n" + - " LET a = (SELECT * FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET a = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + " IF a IS NOT NULL THEN\n" + - " UPDATE " + qualifiedTableName + " SET v = a.v + 1 WHERE k = 0 and c = 1;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v = a.v + 1 WHERE k = 0 and c = 1;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertEmptyWithPreemptedRetry(cluster, cql); @@ -2568,39 +2580,39 @@ public void testReferenceArithmeticInUpdate() throws Exception @Test public void testCASAndSerialRead() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c)) WITH transactional_mode='" + transactionalMode + "';", + test("CREATE TABLE " + qualifiedAccordTableName + " (id int, c int, v int, s int static, PRIMARY KEY ((id), c)) WITH transactional_mode='" + transactionalMode + "';", cluster -> { ICoordinator coordinator = cluster.coordinator(1); int startingAccordCoordinateCount = getAccordCoordinateCount(); - assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF EXISTS"); - assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); - assertRowEquals(cluster, new Object[]{ false, 4 }, "UPDATE " + qualifiedTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedAccordTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowEquals(cluster, new Object[]{false}, "UPDATE " + qualifiedAccordTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (id, c, v, s) VALUES (1, 2, 3, 5);", ConsistencyLevel.ALL); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 3, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedAccordTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{ false, 4 }, "UPDATE " + qualifiedAccordTableName + " SET v = 4 WHERE id = 1 AND c = 2 IF v = 3"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); // Test working with a static column - assertRowEquals(cluster, new Object[]{ false, 5 }, "UPDATE " + qualifiedTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 4"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 5"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET s = 6 WHERE id = 1 IF s = 5"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); + assertRowEquals(cluster, new Object[]{ false, 5 }, "UPDATE " + qualifiedAccordTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 4"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 4, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedAccordTableName + " SET v = 5 WHERE id = 1 AND c = 2 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 5); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedAccordTableName + " SET s = 6 WHERE id = 1 IF s = 5"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 5, 6); // Test that read before write works with CAS - assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS"); - assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1 AND c = 2", 1, 2, 6, 7); + assertRowEquals(cluster, new Object[]{true}, "UPDATE " + qualifiedAccordTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS"); + assertRowSerial(cluster, "SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c = 2", 1, 2, 6, 7); // Check range deletion works - coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v, s) VALUES (1, 2, 6, 7);", ConsistencyLevel.ALL); - coordinator.execute("INSERT INTO " + qualifiedTableName + " (id, c, v) VALUES (1, 3, 3);", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (id, c, v, s) VALUES (1, 2, 6, 7);", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (id, c, v) VALUES (1, 3, 3);", ConsistencyLevel.ALL); assertRowEquals(cluster, new Object[]{true}, "BEGIN BATCH \n" + - "UPDATE " + qualifiedTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS; \n" + - "DELETE FROM " + qualifiedTableName + " WHERE id = 1 AND c > 0 AND c < 10; \n" + + "UPDATE " + qualifiedAccordTableName + " SET s +=1, v += 1 WHERE id = 1 AND c = 2 IF EXISTS; \n" + + "DELETE FROM " + qualifiedAccordTableName + " WHERE id = 1 AND c > 0 AND c < 10; \n" + "APPLY BATCH;"); - Object[][] rangeDeletionCheck = coordinator.execute("SELECT id, c, v, s FROM " + qualifiedTableName + " WHERE id = 1", ConsistencyLevel.SERIAL); + Object[][] rangeDeletionCheck = coordinator.execute("SELECT id, c, v, s FROM " + qualifiedAccordTableName + " WHERE id = 1", ConsistencyLevel.SERIAL); assertArrayEquals(new Object[] { 1, 2, 7, 8 }, rangeDeletionCheck[0]); assertEquals(1, rangeDeletionCheck.length); @@ -2617,10 +2629,10 @@ public void testCASAndSerialRead() throws Exception @Test public void testCASSimulatorLite() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); - coordinator.execute("INSERT INTO " + qualifiedTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); ExecutorService es = Executors.newCachedThreadPool(); @@ -2628,12 +2640,12 @@ public void testCASSimulatorLite() throws Exception for (int ii = 0; ii < 10; ii++) { int id = ii; - futures.add(es.submit(() -> coordinator.execute("UPDATE " + qualifiedTableName + " SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS", ConsistencyLevel.ALL, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id))), 1))); + futures.add(es.submit(() -> coordinator.execute("UPDATE " + qualifiedAccordTableName + " SET count = count + 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk = ? IF EXISTS", ConsistencyLevel.ALL, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id))), 1))); } for (Future f : futures) f.get(); - Object[][] result = coordinator.execute("SELECT pk, count, seq1, seq2 FROM " + qualifiedTableName + " WHERE pk = 1", ConsistencyLevel.SERIAL); + Object[][] result = coordinator.execute("SELECT pk, count, seq1, seq2 FROM " + qualifiedAccordTableName + " WHERE pk = 1", ConsistencyLevel.SERIAL); int[] seq1 = Arrays.stream(((String) result[0][2]).split(",")) .filter(s -> !s.isEmpty()) @@ -2649,11 +2661,11 @@ public void testCASSimulatorLite() throws Exception @Test public void testTransactionCasSimulatorLite() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (pk int, count int, seq1 text, seq2 list, PRIMARY KEY (pk)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); - coordinator.execute("INSERT INTO " + qualifiedTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (pk, count, seq1, seq2) VALUES (1, 0, '', []) USING TIMESTAMP 0", ConsistencyLevel.ALL); ListType LIST_TYPE = ListType.getInstance(Int32Type.instance, true); ExecutorService es = Executors.newCachedThreadPool(); @@ -2662,8 +2674,8 @@ public void testTransactionCasSimulatorLite() throws Exception { int id = ii; String update = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE pk = 1);\n" + - " UPDATE " + qualifiedTableName + " SET count += 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk=1;\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE pk = 1);\n" + + " UPDATE " + qualifiedAccordTableName + " SET count += 1, seq1 = seq1 + ?, seq2 = seq2 + ? WHERE pk=1;\n" + "COMMIT TRANSACTION"; futures.add(es.submit(() -> coordinator.executeWithResult(update, ConsistencyLevel.ANY, id + ",", ByteBufferUtil.getArray(LIST_TYPE.decompose(singletonList(id)))))); } @@ -2671,7 +2683,7 @@ public void testTransactionCasSimulatorLite() throws Exception f.get(); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE pk = 1;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE pk = 1;\n" + "COMMIT TRANSACTION"; Object[][] result = coordinator.execute(check, ConsistencyLevel.ALL); @@ -2690,15 +2702,15 @@ public void testTransactionCasSimulatorLite() throws Exception @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='" + transactionalMode + "'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='" + transactionalMode + "'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) - coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); } ); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index ac7ab99f2627..452d9dc53a5c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -36,12 +36,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.local.cfk.CommandsForKey; import accord.impl.SimpleProgressLog; import accord.local.Node; import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.Status; +import accord.local.cfk.CommandsForKey; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -51,6 +51,7 @@ import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IIsolatedExecutor; import org.apache.cassandra.gms.FailureDetector; @@ -147,13 +148,15 @@ protected Logger logger() public static void setupClass() throws Throwable { CassandraRelevantProperties.ACCORD_AGENT_CLASS.setString(BarrierRecordingAgent.class.getName()); -// setupCluster(opt -> opt.withConfig(conf -> conf.with(Feature.NETWORK, Feature.GOSSIP)), 3); - setupCluster(opt -> opt, 3); + setupCluster(opt -> opt.withConfig(conf -> conf.with(Feature.NETWORK, Feature.GOSSIP)), 3); +// setupCluster(opt -> opt, 3); } @After public void tearDown() { + for (IInvokableInstance instance : SHARED_CLUSTER) + instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = false); SHARED_CLUSTER.filters().reset(); } @@ -248,16 +251,16 @@ private static TxnId awaitLocalApplyOnKey(PartitionKey key) @Test public void txnRepairTest() throws Throwable { - SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, tableName)); + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, accordTableName)); final String keyspace = KEYSPACE; - final String table = tableName; + final String table = accordTableName; SHARED_CLUSTER.filters().allVerbs().to(3).drop(); awaitEndpointDown(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); executeWithRetry(SHARED_CLUSTER, format("BEGIN TRANSACTION\n" + "INSERT INTO %s (k, v) VALUES (1, 1);\n" + - "COMMIT TRANSACTION", qualifiedTableName)); + "COMMIT TRANSACTION", qualifiedAccordTableName)); SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); @@ -291,7 +294,7 @@ public void txnRepairTest() throws Throwable }); SHARED_CLUSTER.filters().reset(); awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); - SHARED_CLUSTER.get(1).nodetool("repair", KEYSPACE); + nodetool(SHARED_CLUSTER.get(1), "repair", KEYSPACE); SHARED_CLUSTER.forEach(instance -> { instance.runOnInstance(() -> { @@ -307,9 +310,9 @@ public void txnRepairTest() throws Throwable private void testSingleNodeWrite(TransactionalMode mode) { - SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='%s';", KEYSPACE, tableName, mode)); + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='%s';", KEYSPACE, accordTableName, mode)); final String keyspace = KEYSPACE; - final String table = tableName; + final String table = accordTableName; SHARED_CLUSTER.get(3).runOnInstance(() -> { QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (k, v) VALUES (1, 2);", keyspace, table)); @@ -344,7 +347,7 @@ private void testSingleNodeWrite(TransactionalMode mode) agent().reset(); })); - SHARED_CLUSTER.get(1).nodetool("repair", KEYSPACE); + nodetool(SHARED_CLUSTER.get(1), "repair", KEYSPACE); SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> { Assert.assertFalse( agent().executedBarriers().isEmpty()); ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); @@ -382,9 +385,9 @@ public void fullRepairTest() @Test public void onlyAccordTest() { - SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, tableName)); + SHARED_CLUSTER.schemaChange(format("CREATE TABLE %s.%s (k int primary key, v int) WITH transactional_mode='full' AND fast_path={'size':2};", KEYSPACE, accordTableName)); final String keyspace = KEYSPACE; - final String table = tableName; + final String table = accordTableName; SHARED_CLUSTER.filters().allVerbs().to(3).drop(); awaitEndpointDown(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); @@ -392,7 +395,7 @@ public void onlyAccordTest() executeWithRetry(SHARED_CLUSTER, format("BEGIN TRANSACTION\n" + "INSERT INTO %s (k, v) VALUES (1, 1);\n" + - "COMMIT TRANSACTION", qualifiedTableName)); + "COMMIT TRANSACTION", qualifiedAccordTableName)); SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); @@ -403,7 +406,7 @@ public void onlyAccordTest() SHARED_CLUSTER.filters().reset(); awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); - SHARED_CLUSTER.get(1).nodetool("repair", "--accord-only", KEYSPACE); + nodetool(SHARED_CLUSTER.get(1), "repair", "--accord-only", KEYSPACE); SHARED_CLUSTER.forEach(instance -> { logger().info("checking instance {}", instance.broadcastAddress()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java index e269e4e27a21..0ba67894d2c7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -59,10 +59,10 @@ public void testRecovery() throws Exception IMessageFilters.Filter lostCommit = cluster.filters().verbs(Verb.ACCORD_COMMIT_REQ.id).to(2).drop(); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; // row1.v shouldn't have existed when the txn's SELECT was executed @@ -73,24 +73,24 @@ public void testRecovery() throws Exception // Querying again should trigger recovery... query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1.v = 1 THEN\n" + - " UPDATE " + qualifiedTableName + " SET v=2 WHERE k = 0 AND c = 0;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v=2 WHERE k = 0 AND c = 0;\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 1 }, query); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] {0, 0, 2}, check, 0, 0); query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT v FROM " + qualifiedTableName + " WHERE k=0 AND c=0);\n" + + " LET row1 = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k=0 AND c=0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 3);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 3);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 2 }, query); @@ -113,16 +113,16 @@ public void testLostCommitReadTriggersFallbackRead() throws Exception })).drop(); String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM " + qualifiedTableName + " WHERE k = 0 AND c = 0);\n" + + " LET row1 = (SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 0 AND c = 0);\n" + " SELECT row1.v;\n" + " IF row1 IS NULL THEN\n" + - " INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, 0, 1);\n" + + " INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, 0, 1);\n" + " END IF\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { null }, query); String check = "BEGIN TRANSACTION\n" + - " SELECT * FROM " + qualifiedTableName + " WHERE k = ? AND c = ?;\n" + + " SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ? AND c = ?;\n" + "COMMIT TRANSACTION"; assertRowEqualsWithPreemptedRetry(cluster, new Object[] { 0, 0, 1 }, check, 0, 0); }); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java index acef419889a8..1b8bfe898500 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java @@ -48,15 +48,15 @@ public static void setupClass() throws IOException @Test public void testSerialReadDescending() throws Throwable { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='full'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='full'", cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) - coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); - assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); + assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 4", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80), AssertUtils.row(7, 70)); } ); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 187f08a11ccc..c5aa059a4cf4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -64,7 +64,7 @@ public static void setUp() throws IOException @Test public void testLoad() throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, v int, PRIMARY KEY(k)) WITH transactional_mode = 'full'", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, v int, PRIMARY KEY(k)) WITH transactional_mode = 'full'", cluster -> { final ConcurrentHashMap verbs = new ConcurrentHashMap<>(); @@ -86,7 +86,7 @@ public boolean matches(int i, int i1, IMessage iMessage) final float readChance = 0.33f; long nextRepairAt = repairInterval; for (int i = 1; i <= keyCount; i++) - coordinator.execute("INSERT INTO " + qualifiedTableName + " (k, v) VALUES (0, 0) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, v) VALUES (0, 0) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i); Random random = new Random(); // CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); @@ -109,7 +109,7 @@ public boolean matches(int i, int i1, IMessage iMessage) inFlight.release(); if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); // else exceptions.add(fail); - }, "SELECT * FROM " + qualifiedTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, random.nextInt(keyCount)); + }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, random.nextInt(keyCount)); } else { @@ -117,7 +117,7 @@ public boolean matches(int i, int i1, IMessage iMessage) inFlight.release(); if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); // else exceptions.add(fail); - }, "UPDATE " + qualifiedTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); + }, "UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); } } @@ -125,7 +125,7 @@ public boolean matches(int i, int i1, IMessage iMessage) { nextRepairAt += repairInterval; System.out.println("repairing..."); - cluster.coordinator(1).instance().nodetool("repair", qualifiedTableName); + cluster.coordinator(1).instance().nodetool("repair", qualifiedAccordTableName); } final Date date = new Date(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index 0e9a47f71c2c..f6fdba672ffe 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -73,16 +73,16 @@ public static void setupClass() throws IOException String writeCql() { return "BEGIN TRANSACTION\n" + - " LET val = (SELECT v FROM " + qualifiedTableName + " WHERE k=? AND c=?);\n" + + " LET val = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k=? AND c=?);\n" + " SELECT val.v;\n" + - " UPDATE " + qualifiedTableName + " SET v = v + 1 WHERE k=? AND c=?;\n" + + " UPDATE " + qualifiedAccordTableName + " SET v = v + 1 WHERE k=? AND c=?;\n" + "COMMIT TRANSACTION"; } String readCql() { return "BEGIN TRANSACTION\n" + - " LET val = (SELECT v FROM " + qualifiedTableName + " WHERE k=? AND c=?);\n" + + " LET val = (SELECT v FROM " + qualifiedAccordTableName + " WHERE k=? AND c=?);\n" + " SELECT val.v;\n" + "COMMIT TRANSACTION"; } @@ -93,7 +93,7 @@ String readCql() public void beforeTest() { SHARED_CLUSTER.filters().reset(); - SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH " + TransactionalMode.full.asCqlParam()); + SHARED_CLUSTER.schemaChange("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY (k, c)) WITH " + TransactionalMode.full.asCqlParam()); } @Test diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java new file mode 100644 index 000000000000..e5865ef9d310 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java @@ -0,0 +1,771 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import com.google.common.base.Stopwatch; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListenableFutureTask; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.api.RoutingKey; +import accord.messages.PreAccept; +import accord.primitives.PartialKeyRoute; +import accord.primitives.PartialRoute; +import accord.primitives.Routable.Domain; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.Util; +import org.apache.cassandra.batchlog.BatchlogManager; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.Config.PaxosVariant; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IInstance; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IMessage; +import org.apache.cassandra.distributed.api.IMessageSink; +import org.apache.cassandra.distributed.api.QueryResults; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.impl.Instance; +import org.apache.cassandra.distributed.impl.TestChangeListener; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.exceptions.CoordinatorBehindException; +import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.hints.HintsService; +import org.apache.cassandra.metrics.AccordClientRequestMetrics; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; +import org.apache.cassandra.metrics.HintsServiceMetrics; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Promise; +import org.eclipse.jetty.util.ConcurrentHashSet; + +import static java.lang.String.format; +import static org.apache.cassandra.Util.expectException; +import static org.apache.cassandra.Util.spinAssertEquals; +import static org.apache.cassandra.config.CassandraRelevantProperties.HINT_DISPATCH_INTERVAL_MS; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; +import static org.apache.cassandra.distributed.shared.ClusterUtils.getNextEpoch; +import static org.apache.cassandra.distributed.shared.ClusterUtils.pauseAfterEnacting; +import static org.apache.cassandra.distributed.shared.ClusterUtils.pauseBeforeEnacting; +import static org.apache.cassandra.distributed.shared.ClusterUtils.unpauseEnactment; +import static org.apache.cassandra.distributed.test.accord.AccordMigrationRaceTestBase.Scenario.BATCHLOG_FAILED_ROUTING_THEN_HINT; +import static org.apache.cassandra.distributed.test.accord.AccordMigrationRaceTestBase.Scenario.BATCHLOG_FAILED_TIMEOUT_THEN_HINT; +import static org.apache.cassandra.distributed.test.accord.AccordMigrationRaceTestBase.Scenario.BATCHLOG_SUCCESSFUL_ROUTING; +import static org.apache.cassandra.distributed.test.accord.AccordMigrationRaceTestBase.Scenario.HINT; +import static org.apache.cassandra.distributed.test.accord.AccordMigrationRaceTestBase.Scenario.MUTATION; +import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; +import static org.apache.cassandra.exceptions.RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; +import static org.apache.cassandra.utils.Throwables.runUnchecked; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/* + * Test that non-transactional write operations such as regular mutations, batch log, and hints + * all detect when a migration is in progress, and then retry on the correct system. + */ +public abstract class AccordMigrationRaceTestBase extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMigrationRaceTestBase.class); + + private static final int CLUSTERING_VALUE = 1; + + private static final String TABLE_FMT = "CREATE TABLE %s (id int, c int, v int, PRIMARY KEY ((id), c));"; + + public static final int PKEY_ACCORD = 3; + public static final int PKEY_NORMAL = 0; + + private static IPartitioner partitioner; + + private static Token minToken; + + private static Token maxToken; + + private static Token midToken; + + private static Token upperMidToken; + + private static Token lowerMidToken; + + private static ICoordinator coordinator; + + private final static TestMessageSink messageSink = new TestMessageSink(); + private static class TestMessageSink implements IMessageSink + { + private final Queue> messages = new ConcurrentLinkedQueue<>(); + private final Set blackholed = new ConcurrentHashSet<>(); + + public void reset() + { + messages.clear(); + blackholed.clear(); + } + + @Override + public void accept(InetSocketAddress to, IMessage message) { + messages.offer(Pair.create(to,message)); + IInstance i = SHARED_CLUSTER.get(to); + if (blackholed.contains(to) || blackholed.contains(message.from())) + return; + if (i != null) + i.receiveMessage(message); + } + } + + enum Scenario + { + // Apply the mutation from the coordinator directly without going through hinting + MUTATION(false, false, false, false, false), + // Hint from the initial mutation coordination + HINT(true, false, true, false, true), + // Apply the mutation from the batchlog directly + BATCHLOG_SUCCESSFUL_ROUTING(false, true, true, true, false), + // Have the batchlog use hints to apply the mutation after failing to route, migrating back from Accord this is a timeout because you can't get Accord to fail at routing + // it either executes correctly in the old epoch or times out waiting for the new one to arrive + BATCHLOG_FAILED_ROUTING_THEN_HINT(false, true, true, true, true), + // Have the batchlog use hints to apply the mutation after a timeout + BATCHLOG_FAILED_TIMEOUT_THEN_HINT(false, true, true, true, true), + ; + + final boolean initiallyEnableHints; + final boolean initiallyEnableBatchlogReplay; + final boolean initiallyBlockTestKeyspaceMutations; + final boolean passesThroughBatchlog; + final boolean deliversViaHint; + + Scenario(boolean initiallyEnableHints, boolean initiallyEnableBatchlogReplay, boolean initiallyBlockTestKeyspaceMutations, boolean passesThroughBatchlog, boolean deliversViaHint) + { + this.initiallyEnableHints = initiallyEnableHints; + this.initiallyEnableBatchlogReplay = initiallyEnableBatchlogReplay; + this.initiallyBlockTestKeyspaceMutations = initiallyBlockTestKeyspaceMutations; + this.passesThroughBatchlog = passesThroughBatchlog; + this.deliversViaHint = deliversViaHint; + } + } + + private final boolean migrateAwayFromAccord; + + protected AccordMigrationRaceTestBase() + { + this.migrateAwayFromAccord = migratingAwayFromAccord(); + } + + protected abstract boolean migratingAwayFromAccord(); + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + HINT_DISPATCH_INTERVAL_MS.setLong(100); + ServerTestUtils.daemonInitialization(); + // Otherwise repair complains if you don't specify a keyspace + CassandraRelevantProperties.SYSTEM_TRACES_DEFAULT_RF.setInt(3); + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("paxos_variant", PaxosVariant.v2.name()) + .set("write_request_timeout", "2s") + .set("accord.range_migration", "explicit")), 3); + partitioner = FBUtilities.newPartitioner(SHARED_CLUSTER.get(1).callsOnInstance(() -> DatabaseDescriptor.getPartitioner().getClass().getSimpleName()).call()); + StorageService.instance.setPartitionerUnsafe(partitioner); + ServerTestUtils.prepareServerNoRegister(); + minToken = partitioner.getMinimumToken(); + maxToken = partitioner.getMaximumTokenForSplitting(); + midToken = partitioner.midpoint(minToken, maxToken); + upperMidToken = partitioner.midpoint(midToken, maxToken); + lowerMidToken = partitioner.midpoint(minToken, midToken); + coordinator = SHARED_CLUSTER.coordinator(1); + SHARED_CLUSTER.setMessageSink(messageSink); + } + + @AfterClass + public static void tearDownClass() + { + StorageService.instance.resetPartitionerUnsafe(); + } + + @After + public void tearDown() throws Exception + { + messageSink.reset(); + forEach(() -> { + BatchlogManager.instance.resumeReplay(); + HintsService.instance.deleteAllHintsUnsafe(); + HintsService.instance.resumeDispatch(); + }); + SHARED_CLUSTER.forEach(ClusterUtils::clearAndUnpause); + super.tearDown(); + // Reset migration state + forEach(() -> { + ConsensusRequestRouter.resetInstance(); + ConsensusKeyMigrationState.reset(); + }); + truncateSystemTables(); + } + + private ListenableFuture alterTableTransactionalModeAsync(TransactionalMode mode) + { + ListenableFutureTask task = ListenableFutureTask.create(() -> { + coordinator.execute(format("ALTER TABLE %s WITH %s", qualifiedAccordTableName, mode.asCqlParam()), ALL); + }, null); + Thread asyncThread = new Thread(task, "Alter table transaction mode " + mode); + asyncThread.setDaemon(true); + asyncThread.start(); + return task; + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchTwoTablesOnePkey() throws Throwable + { + testSplitAndRetryMutationCoordination(twoTableBatchInsert(false, PKEY_ACCORD, PKEY_ACCORD, 1), validateTwoTable(PKEY_ACCORD)); + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchTwoTablesOnePkeyHinting() throws Throwable + { + // Accord doesn't hint if a write times out + if (!migrateAwayFromAccord) + testSplitAndRetryHintDelivery(twoTableBatchInsert(false, PKEY_ACCORD, PKEY_ACCORD, 1), validateTwoTable(PKEY_ACCORD)); + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchTwoTablesTwoPkey() throws Throwable + { + testSplitAndRetryMutationCoordination(twoTableBatchInsert(false, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchTwoTablesHinting() throws Throwable + { + // Accord doesn't hint if a write times out + if (!migrateAwayFromAccord) + testSplitAndRetryHintDelivery(twoTableBatchInsert(false, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchSingleTable() throws Throwable + { + testSplitAndRetryMutationCoordination(singleTableBatchInsert(false, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + @Test + public void testSplitAndRetryNonSerialUnloggedBatchSingleTableHinting() throws Throwable + { + // Accord doesn't hint if a write times out + if (!migrateAwayFromAccord) + testSplitAndRetryHintDelivery(singleTableBatchInsert(false, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + /* + * This doesn't really test much since on top of testSplitAndRetryNonSerialUnloggedBatchTwoTablesOnePkey since it is + * a single table & key and will be converted to an unlogged batch + */ + @Test + public void testSplitAndRetryNonSerialLoggedBatchTwoTablesOnePkey() throws Throwable + { + testSplitAndRetryMutationCoordination(twoTableBatchInsert(true, PKEY_ACCORD, PKEY_ACCORD, 1), validateTwoTable(PKEY_ACCORD)); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchTwoTablesTwoPkey() throws Throwable + { + testSplitAndRetryMutationCoordination(twoTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchTwoTablesTwoPkeyDeliverViaBatchLog() throws Throwable + { + testSplitAndRetryBatchlogDelivery(twoTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchTwoTablesTwoPkeyHintedViaBatchLogTimeout() throws Throwable + { + testSplitAndRetryHintDeliveryAfterBatchlogTimeout(twoTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchTwoTablesTwoPkeyHintedViaBatchLogRoutingFailure() throws Throwable + { + testSplitAndRetryHintDeliveryAfterBatchlogRoutingFailure(twoTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), validateTwoTable(PKEY_NORMAL)); + } + + /* + * Test that a logged batch writing to a migrating table and a non-migrating table can + */ + @Test + public void testSplitAndRetryNonSerialLoggedBatchSingleTable() throws Throwable + { + testSplitAndRetryBatchlogDelivery(singleTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchSingleTableDeliverViaBatchLog() throws Throwable + { + testSplitAndRetryBatchlogDelivery(singleTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchSingleTableHintedViaBatchLogTimeout() throws Throwable + { + testSplitAndRetryHintDeliveryAfterBatchlogTimeout(singleTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + @Test + public void testSplitAndRetryNonSerialLoggedBatchSingleTableHintedViaBatchLogRoutingFailure() throws Throwable + { + testSplitAndRetryHintDeliveryAfterBatchlogRoutingFailure(singleTableBatchInsert(true, PKEY_ACCORD, PKEY_NORMAL, 1), this::validateSingleTable); + } + + private void testSplitAndRetryMutationCoordination(String batchCQL, Consumer validation) throws Throwable + { + testSplitAndRetry(batchCQL, validation, MUTATION); + } + + private void testSplitAndRetryBatchlogDelivery(String batchCQL, Consumer validation) throws Throwable + { + testSplitAndRetry(batchCQL, validation, BATCHLOG_SUCCESSFUL_ROUTING); + } + + private void testSplitAndRetryHintDeliveryAfterBatchlogTimeout(String batchCQL, Consumer validation) throws Throwable + { + testSplitAndRetry(batchCQL, validation, BATCHLOG_FAILED_TIMEOUT_THEN_HINT); + } + + private void testSplitAndRetryHintDeliveryAfterBatchlogRoutingFailure(String batchCQL, Consumer validation) throws Throwable + { + testSplitAndRetry(batchCQL, validation, BATCHLOG_FAILED_ROUTING_THEN_HINT); + } + + private void testSplitAndRetryHintDelivery(String batchCQL, Consumer validation) throws Throwable + { + testSplitAndRetry(batchCQL, validation, HINT); + } + + private void validateSingleTable(Cluster cluster) + { + SimpleQueryResult expected = QueryResults.builder() + .columns("id", "c", "v") + .row(PKEY_NORMAL, 1, 1) + .row(PKEY_ACCORD, 1, 1) + .build(); + cluster.forEach(instance -> { + assertThat(instance.executeInternalWithResult("SELECT * FROM " + qualifiedAccordTableName)).isEqualTo(expected); + }); + } + + private Consumer validateTwoTable(int secondPkey) + { + return cluster -> { + SimpleQueryResult expectedAccord = QueryResults.builder() + .columns("id", "c", "v") + .row(PKEY_ACCORD, 1, 1) + .build(); + cluster.forEach(instance -> assertThat(instance.executeInternalWithResult("SELECT * FROM " + qualifiedAccordTableName)).isEqualTo(expectedAccord)); + + SimpleQueryResult expectedNormal = QueryResults.builder() + .columns("id", "c", "v") + .row(secondPkey, 1, 1) + .build(); + cluster.forEach(instance -> assertThat(instance.executeInternalWithResult("SELECT * FROM " + qualifiedRegularTableName)).isEqualTo(expectedNormal)); + }; + } + + /* + * Test if the coordinator is behind that the request can be re-split and routed to the correct systems + * without surfacing an error + */ + private void testSplitAndRetry(String batchCQL, Consumer validation, Scenario scenario) throws Throwable + { + test(createTables(TABLE_FMT, qualifiedRegularTableName, qualifiedAccordTableName), + cluster -> { + // Only enable these when testing it works from a specific instance + forEach(() -> BatchlogManager.instance.pauseReplay()); + forEach(() -> HintsService.instance.pauseDispatch()); + + // Node 3 is always the out of sync node + IInvokableInstance outOfSyncInstance = setUpOutOfSyncNode(cluster); + + // Force the batchlog Accord txn to run after this write txn in the new epoch where it + // will trigger RetryDifferentSystem + if (scenario == BATCHLOG_FAILED_ROUTING_THEN_HINT && migrateAwayFromAccord) + writeAccordRowViaAccord(); + + // Need to be able to block writing to the test keyspace forcing batchlog replay + // without also failing writes to the batch log + if (scenario.initiallyBlockTestKeyspaceMutations) + cluster.filters().outbound().messagesMatching((from, to, message) -> { + if (message.verb() == Verb.MUTATION_REQ.id) + { + String keyspace = cluster.get(to).callsOnInstance(() -> ((Message)Instance.deserializeMessage(message)).payload.getKeyspaceName()).call(); + if (keyspace.equals(KEYSPACE)) + return true; + } + if (message.verb() == Verb.ACCORD_PRE_ACCEPT_REQ.id && !migrateAwayFromAccord) + { + boolean drop = cluster.get(to).callsOnInstance(() -> { + PreAccept preAccept = (PreAccept)Instance.deserializeMessage(message).payload; + PartialRoute route = preAccept.scope; + if (route.domain() == Domain.Key) + for (RoutingKey key : (PartialKeyRoute)route) + { + AccordRoutingKey routingKey = (AccordRoutingKey)key; + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(routingKey.table()); + if (cfs.getKeyspaceName().equals(KEYSPACE)) + return true; + } + return false; + }).call(); + if (drop) + return true; + } + return false; + }).drop(); + + forEach(() -> BatchlogManager.instance.pauseReplay()); + + // If testing batch log delivery the coordinator needs to be a node other than the node that is behind on + // topology updates so that the batch log writes (and thus replay) can be done on the node that is out of sync + int coordinatorIndex = scenario.initiallyEnableBatchlogReplay ? 2 : 3; + IInvokableInstance instance = cluster.get(coordinatorIndex); + ICoordinator coordinator = instance.coordinator(); + int startRetryCount = getRetryOnDifferentSystemCount(coordinatorIndex); + // If testing routing at mutation coordination then Node 1 and 2 will both rejected the mutation because it is in a migrating range + int startRejectedCount = getMutationsRejectedOnWrongSystemCount(); + logger.info("Executing batch insert"); + Future resultFuture = coordinator.asyncExecuteWithResult(batchCQL, ALL); + + // Testing either batch log delivery or hint delivery via batchlog + if (scenario.initiallyBlockTestKeyspaceMutations) + { + // Expect initial write failure + expectException(() -> { + try + { + return resultFuture.get(); + } + catch (ExecutionException e) + { + throw (Exception) e.getCause(); + } + }, WriteTimeoutException.class); + } + + if (scenario.passesThroughBatchlog) + { + // At this stage we want the batch log to fail because it misrouted the queries to the wrong system + // not because it timed out not getting a response. We only did that with mutations as a quick + // way to populate the batch log. Could almost as easily have constructed the mutation and put it + // in the batch log directly + if (scenario == Scenario.BATCHLOG_FAILED_ROUTING_THEN_HINT || scenario == BATCHLOG_SUCCESSFUL_ROUTING) + cluster.filters().reset(); + + // We only want the batch log to have access to the correct topology if we are testing its + // ability to handle misrouted things + if (scenario == Scenario.BATCHLOG_SUCCESSFUL_ROUTING) + unpauseEnactment(outOfSyncInstance); + + // Unfortunately the batch won't be replayed until some time has passed because the starting time + // for replay is the current time - timeout + // Don't wait here for the batchlog if we need to spin on the creation of the Accord transaction + // and then unpause to test Accord routing failure + boolean unpauseAfterBatchLogCreatesTransaction = migrateAwayFromAccord && scenario == BATCHLOG_FAILED_ROUTING_THEN_HINT; + if (!unpauseAfterBatchLogCreatesTransaction) + Thread.sleep(BatchlogManager.BATCHLOG_REPLAY_TIMEOUT + DatabaseDescriptor.getWriteRpcTimeout(TimeUnit.MILLISECONDS)); + messageSink.reset(); + + // Force batch log delivery (or hint delivery) on the node that was out of sync, but should be in sync once we unpause + // This demonstrates it can split the mutation correctly or forward it to hinting if it fails + outOfSyncInstance.runOnInstance(() -> runUnchecked(() -> { + // We don't want hints for any reason that might apply the mutation and make the test look like it succeeded + assertTrue(HintsService.instance.isDispatchPaused()); + // The failed write will have written hints + HintsService.instance.deleteAllHintsUnsafe(); + assertFalse(hasPendingHints()); + BatchlogManager.instance.resumeReplay(); + + // Unpausing needs to be done async because it waits for the batch log replay + Promise unpaused = new AsyncPromise<>(); + if (unpauseAfterBatchLogCreatesTransaction) + { + logger.info("Creating thread to unpause after batchlog creates Accord transaction"); + new Thread(() -> + { + try + { + // Unpause so it can route incorrectly instead of timing out waiting to fetch the epoch, need the transaction to be created first + // otherwise it will just be routed straight to non-Accord. + logger.info("Spinning waiting on a transaction"); + Util.spinUntilTrue(() -> !((AccordService)AccordService.instance()).node().coordinating().isEmpty(), 20); + logger.info("Foudn transaction, unpausing"); + TestChangeListener.instance.unpause(); + unpaused.trySuccess(null); + } + catch (Throwable t) + { + unpaused.tryFailure(t); + } + }).start(); + } + else + { + // Force replay so mosts tests don't have to wait + BatchlogManager.instance.forceBatchlogReplay(); + unpaused.trySuccess(null); + } + // Fetch errors + unpaused.get(); + // Ensure the batch log did or didn't create pending hints depending on the test scenario + spinAssertEquals(scenario == BATCHLOG_FAILED_TIMEOUT_THEN_HINT || scenario == BATCHLOG_FAILED_ROUTING_THEN_HINT, () -> hasPendingHints(), 20); + })); + } + + // Mutation successfully applied from the coordinator after retrying scenario + if (scenario == MUTATION) + { + // Don't want to mistakenly have hints applying the mutation + forEach(() -> assertTrue(HintsService.instance.isDispatchPaused())); + // Check for the error differently depending on what system should be seeing an error + if (migrateAwayFromAccord) + { + // Accord will block until we unpause enactment so to test the routing we wait until the transaction + // has started so the epoch it is created in is the old one + Util.spinUntilTrue(() -> outOfSyncInstance.callOnInstance(() -> !((AccordService)AccordService.instance()).node().coordinating().isEmpty()), 20); + logger.info("Accord node is now coordinating something"); + try + { + validation.accept(cluster); + throw new AssertionError("Expected validation to fail"); + } + catch (AssertionError e) + { + //ignored + } + } + else + { + Stopwatch sw = Stopwatch.createStarted(); + spinAssertEquals(startRejectedCount + 2, 10, () -> getMutationsRejectedOnWrongSystemCount() - startRejectedCount); + logger.info("Took {}ms to get mutations rejected on wrong system", sw.elapsed(TimeUnit.MILLISECONDS)); + } + + logger.info("Unpausing out of sync instance"); + // Testing regular mutation coordination retry loop let coordinator get up to date and retry + unpauseEnactment(outOfSyncInstance); + + try + { + resultFuture.get(); + } + catch (ExecutionException e) + { + // This is expected when inverting the migration + if (migrateAwayFromAccord && e.getCause() instanceof CoordinatorBehindException) + throw e; + throw e; + } + + if (!migrateAwayFromAccord) + { + int endRetryCount = getRetryOnDifferentSystemCount(coordinatorIndex); + int endRejectedCount = getMutationsRejectedOnWrongSystemCount(); + assertEquals(1, endRetryCount - startRetryCount); + // Expect only two nodes to reject since they enacted the new epoch + assertEquals(2, endRejectedCount - startRejectedCount); + } + } + + // Anything related to making sure hints are delivered goes here + if (scenario.deliversViaHint) + { + // Don't want to mistakenly have hints applying the mutation before we enable it on just one instance + forEach(() -> assertTrue(HintsService.instance.isDispatchPaused())); + // The filters wouldn't have been reset yet if they were needed to make the batchlog or original mutation time out + // Need to reset so Hints can use Accord txns + cluster.filters().reset(); + long startingAccordTimeouts = outOfSyncInstance.callOnInstance(() -> ClientRequestsMetricsHolder.accordWriteMetrics.timeouts.getCount()); + long startingAccordPreempted = outOfSyncInstance.callOnInstance(() -> ClientRequestsMetricsHolder.accordWriteMetrics.preempted.getCount()); + long startingAccordMigrationRejects = outOfSyncInstance.callOnInstance(() -> ClientRequestsMetricsHolder.accordWriteMetrics.accordMigrationRejects.getCount()); + long startingHintTimeouts = outOfSyncInstance.callOnInstance(() -> HintsServiceMetrics.hintsTimedOut.getCount()); + outOfSyncInstance.runOnInstance(() -> HintsService.instance.resumeDispatch()); + // The initial hinting attempt should fail, unless it's a batchlog routing failure in which + // case the coordinator has already caught up so the hint will succeed on the first try + // Can only really have this case for BATCHLOG_FAILED_TIMEOUT_THEN_HINT becuase Accord timeouts don't + // write hints so there is nothing to test + if (migrateAwayFromAccord && scenario == BATCHLOG_FAILED_TIMEOUT_THEN_HINT) + { + Callable test = () -> outOfSyncInstance.callOnInstance(() -> { + logger.info("startingAccordTimeouts {}, startingAccordPreempts {}, startingAccordMigrationRejects {}, startingHintTimeouts {}, accord timeouts {}, accordPreempts {}, accordMigrationRejects {}, hint timeouts {}", startingAccordTimeouts, startingAccordPreempted, startingAccordMigrationRejects, startingHintTimeouts, ClientRequestsMetricsHolder.accordWriteMetrics.timeouts.getCount(), ClientRequestsMetricsHolder.accordWriteMetrics.preempted.getCount(), ClientRequestsMetricsHolder.accordWriteMetrics.accordMigrationRejects.getCount(), HintsServiceMetrics.hintsTimedOut.getCount()); + AccordClientRequestMetrics accordMetrics = ClientRequestsMetricsHolder.accordWriteMetrics; + return accordMetrics.timeouts.getCount() >= (startingAccordTimeouts + 1) && HintsServiceMetrics.hintsTimedOut.getCount() >= (startingHintTimeouts + 1); + }); + Util.spinUntilTrue(test, 40); + } + else if (!migrateAwayFromAccord) + { + // Expect two retry on different system responses when migrating from Paxos to Accord, one from each + // node that knows it is on the wrong system + Util.spinUntilTrue(() -> messageSink.messages.stream().filter(p -> { + if (p.right.verb() != Verb.FAILURE_RSP.id) + return false; + if (!p.left.equals(outOfSyncInstance.broadcastAddress())) + return false; + RequestFailureReason reason = ((RequestFailure) Instance.deserializeMessage(p.right).payload).reason; + if (reason == RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM) + return true; + return false; + }).count() == 2, 20); + } + // After this hints should deliver and the final validation should succeed + // if we don't unpause enactment + unpauseEnactment(outOfSyncInstance); + } + + // Accord commit is async and might take a while, but the data should end up as expected + Util.spinUntilSuccess(() -> validation.accept(cluster)); + }); + } + + /* + * Set up 3 to be behind and unaware of the migration while 1 and 2 are aware + */ + private IInvokableInstance setUpOutOfSyncNode(Cluster cluster) throws Throwable + { + IInvokableInstance i1 = cluster.get(1); + IInvokableInstance i2 = cluster.get(2); + IInvokableInstance i3 = cluster.get(3); + alterTableTransactionalMode(TransactionalMode.full); + Epoch nextEpoch = getNextEpoch(i1); + // Node 3 will coordinate the query and not be aware that the migration has begun + Callable pausedBeforeEnacting = pauseBeforeEnacting(i3, nextEpoch); + // In batch log delivery cases i2 will be the coordinator and we need to be sure that it has enacted the latest epoch + Callable i2PausedAfterEnacting = pauseAfterEnacting(i2, nextEpoch); + + ListenableFuture result = nodetoolAsync(coordinator, "consensus_admin", "begin-migration", "-st", midToken.toString(), "-et", maxToken.toString(), "-tp", "accord", KEYSPACE, accordTableName); + + if (migrateAwayFromAccord) + { + pausedBeforeEnacting.call(); + i2PausedAfterEnacting.call(); + unpauseEnactment(i2); + unpauseEnactment(i3); + result.get(); + long migratingEpoch = nextEpoch.getEpoch(); + Util.spinUntilTrue(() -> cluster.stream().allMatch(instance -> instance.callOnInstance(() -> ClusterMetadata.current().epoch.equals(Epoch.create(migratingEpoch)))), 10); + nextEpoch = getNextEpoch(i1); + pausedBeforeEnacting = pauseBeforeEnacting(i3, nextEpoch); + i2PausedAfterEnacting = pauseAfterEnacting(i2, nextEpoch); + // In the reverse direction doing the alter automatically reverses the migratin without a need to call begin migration on any ranges + result = alterTableTransactionalModeAsync(TransactionalMode.off); + } + + // Wait for everyone to get to where they are supposed to be + try + { + pausedBeforeEnacting.call(); + } + catch (Throwable t) + { + if (result.isDone()) + { + try + { + result.get(); + } + catch (ExecutionException e) + { + t.addSuppressed(e); + throw t; + } + } + throw t; + } + i2PausedAfterEnacting.call(); + // Unpause on 1 and 2 where we want them aware of the migration + unpauseEnactment(i1); + unpauseEnactment(i2); + // nodetool should be able to complete now + result.get(); + return i3; + } + + private String twoTableBatchInsert(boolean logged, int pkey1, int pkey2, int value) + { + return batch(logged, + insertCQL(qualifiedAccordTableName, pkey1, value), + insertCQL(qualifiedRegularTableName, pkey2, value)); + } + + private String singleTableBatchInsert(boolean logged, int pkey1, int pkey2, int value) + { + return batch(logged, + insertCQL(qualifiedAccordTableName, pkey1, value), + insertCQL(qualifiedAccordTableName, pkey2, value)); + } + + private static String insertCQL(String qualifiedTableName, int pkey, int value) + { + return format("INSERT INTO %s ( id, c, v ) VALUES ( %d, %d, %d )", qualifiedTableName, pkey, CLUSTERING_VALUE, value); + } + + // Prevents the creation of transactions in an older epoch because later writes need to order after earlier + private void writeAccordRowViaAccord() + { + logger.info("Initiating Accord row write"); + SHARED_CLUSTER.coordinator(1).execute(insertCQL(qualifiedAccordTableName, PKEY_ACCORD, 99), ConsistencyLevel.QUORUM); + logger.info("Finished Accord row write"); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index 2eea1f5a392c..73c15d99cee5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -52,14 +51,16 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.Mutation.SimpleBuilder; import org.apache.cassandra.db.SimpleBuilders.PartitionUpdateBuilder; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInvokableInstance; -import org.apache.cassandra.distributed.api.NodeToolResult; import org.apache.cassandra.distributed.api.Row; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.gms.EndpointState; @@ -97,8 +98,8 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static java.util.Collections.emptyList; +import static org.apache.cassandra.Util.dk; import static org.apache.cassandra.Util.spinUntilSuccess; -import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; import static org.apache.cassandra.db.SystemKeyspace.PAXOS; import static org.apache.cassandra.dht.Range.normalize; @@ -160,7 +161,7 @@ public static void setupClass() throws IOException ServerTestUtils.daemonInitialization(); // Otherwise repair complains if you don't specify a keyspace CassandraRelevantProperties.SYSTEM_TRACES_DEFAULT_RF.setInt(3); - AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("paxos_variant", PaxosVariant.v2.name()) + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.with(Feature.NETWORK).set("paxos_variant", PaxosVariant.v2.name()) .set("accord.range_migration", "explicit")), 3); partitioner = FBUtilities.newPartitioner(SHARED_CLUSTER.get(1).callsOnInstance(() -> DatabaseDescriptor.getPartitioner().getClass().getSimpleName()).call()); StorageService.instance.setPartitionerUnsafe(partitioner); @@ -192,19 +193,6 @@ public void tearDown() throws Exception SHARED_CLUSTER.coordinators().forEach(coordinator -> coordinator.execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, PAXOS), ALL)); } - private static String nodetool(ICoordinator coordinator, String... commandAndArgs) - { - NodeToolResult nodetoolResult = coordinator.instance().nodetoolResult(commandAndArgs); - if (!nodetoolResult.getStdout().isEmpty()) - System.out.println(nodetoolResult.getStdout()); - if (!nodetoolResult.getStderr().isEmpty()) - System.err.println(nodetoolResult.getStderr()); - if (nodetoolResult.getError() != null) - fail("Failed nodetool " + Arrays.asList(commandAndArgs), nodetoolResult.getError()); - // TODO why does standard out end up in stderr in nodetool? - return nodetoolResult.getStdout(); - } - private static int getKeyBetweenTokens(Token left, Token right) { return getKeysBetweenTokens(left, right).next(); @@ -398,10 +386,10 @@ private static void validateKeyMigrations(List> expectedM @Test public void testPaxosToAccordCAS() throws Exception { - test(format(TABLE_FMT, qualifiedTableName), + test(format(TABLE_FMT, qualifiedAccordTableName), cluster -> { List> expectedKeyMigrations = new ArrayList<>(); - String table = tableName; + String table = accordTableName; UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, table).getTableId().asUUID()); cluster.forEach(node -> node.runOnInstance(() -> { TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, table); @@ -409,7 +397,7 @@ public void testPaxosToAccordCAS() throws Exception Assert.assertEquals(TransactionalMigrationFromMode.none, tbl.params.transactionalMigrationFrom); })); - cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, tableName, TransactionalMode.full)); + cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, accordTableName, TransactionalMode.full)); cluster.forEach(node -> node.runOnInstance(() -> { TableMetadata tbl = Schema.instance.getTableMetadata(KEYSPACE, table); @@ -417,11 +405,11 @@ public void testPaxosToAccordCAS() throws Exception Assert.assertEquals(TransactionalMigrationFromMode.off, tbl.params.transactionalMigrationFrom); })); - String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); + String casCQL = format(CAS_FMT, qualifiedAccordTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); Consumer runCasApplies = key -> assertRowEquals(cluster, new Object[]{true}, casCQL, key); Consumer runCasOnSecondNode = key -> assertEquals( "[applied]", cluster.coordinator(2).executeWithResult(casCQL, ANY, key).names().get(0)); - String tableName = qualifiedTableName.split("\\.")[1]; + String tableName = qualifiedAccordTableName.split("\\.")[1]; int migratingKey = getKeyBetweenTokens(midToken, maxToken); int notMigratingKey = getKeyBetweenTokens(minToken, midToken); Range migratingRange = new Range(midToken, maxToken); @@ -493,8 +481,15 @@ public void testPaxosToAccordCAS() throws Exception // Update inserted row so the condition can apply, if the condition check doesn't apply // then it won't get to propose/accept migratingKey = testingKeys.next(); - String query = "UPDATE " + qualifiedTableName + " SET v = 42 WHERE id = ? AND c = ?"; - Consumer makeCASApply = key -> cluster.forEach(instance -> instance.runOnInstance(() -> executeInternal(query, key, CLUSTERING_VALUE))); + String keyspace = KEYSPACE; + Integer clusteringValue = CLUSTERING_VALUE; + String mutationTableName = accordTableName; + Consumer makeCASApply = key -> cluster.forEach(instance -> instance.runOnInstance(() -> { + SimpleBuilder mutationBuilder = Mutation.simpleBuilder(keyspace, dk(key)).allowPotentialTransactionConflicts(); + mutationBuilder.update(mutationTableName).row(clusteringValue).add("v", 42); + Mutation m = mutationBuilder.build(); + m.applyUnsafe(); + })); makeCASApply.accept(migratingKey); // This will force the request to run on Paxos up to Accept @@ -552,13 +547,13 @@ public void testPaxosToAccordCAS() throws Exception @Test public void testPaxosToAccordSerialRead() throws Exception { - test(format(TABLE_FMT, qualifiedTableName), + test(format(TABLE_FMT, qualifiedAccordTableName), cluster -> { - String table = tableName; + String table = accordTableName; UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, table).getTableId().asUUID()); List> expectedKeyMigrations = new ArrayList<>(); - cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, tableName, TransactionalMode.full)); - String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", qualifiedTableName, CLUSTERING_VALUE); + cluster.schemaChange(format("ALTER TABLE %s.%s WITH transactional_mode='%s'", KEYSPACE, accordTableName, TransactionalMode.full)); + String readCQL = format("SELECT * FROM %s WHERE id = ? and c = %s", qualifiedAccordTableName, CLUSTERING_VALUE); Function runRead = key -> cluster.coordinator(1).execute(readCQL, SERIAL, key); Range migratingRange = new Range<>(new LongToken(Long.MIN_VALUE + 1), new LongToken(Long.MIN_VALUE)); List> migratingRanges = ImmutableList.of(migratingRange); @@ -566,15 +561,15 @@ public void testPaxosToAccordSerialRead() throws Exception assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 0, 1, 0, 0, 0); // Mark wrap around range as migrating - nodetool(coordinator, "consensus_admin", "begin-migration", "-st", String.valueOf(Long.MIN_VALUE + 1), "-et", String.valueOf(Long.MIN_VALUE), "-tp", "accord", KEYSPACE, tableName); - assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); + nodetool(coordinator, "consensus_admin", "begin-migration", "-st", String.valueOf(Long.MIN_VALUE + 1), "-et", String.valueOf(Long.MIN_VALUE), "-tp", "accord", KEYSPACE, accordTableName); + assertMigrationState(accordTableName, ConsensusMigrationTarget.accord, emptyList(), migratingRanges, 1); // Should run directly on accord, migrate the key, and perform a quorum read from Accord, Paxos repair will run prepare once addExpectedMigratedKey(expectedKeyMigrations, key, tableUUID); assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 1, 1, 1, 0, 0); key++; // Should run up to accept with both nodes refusing to accept - savePromisedAndCommittedPaxosProposal(tableName, key); + savePromisedAndCommittedPaxosProposal(accordTableName, key); cluster.get(1).runOnInstance(() -> ConsensusRequestRouter.setInstance(new PaxosToAccordMigrationNotHappeningUpToBegin())); addExpectedMigratedKey(expectedKeyMigrations, key, tableUUID); assertTargetAccordRead(runRead, 1, key, expectedKeyMigrations, 1, 2, 1, 0, 1); @@ -582,11 +577,6 @@ public void testPaxosToAccordSerialRead() throws Exception }); } - private void alterTableTransactionalMode(TransactionalMode mode) - { - SHARED_CLUSTER.schemaChange(format("ALTER TABLE %s WITH %s", qualifiedTableName, mode.asCqlParam())); - } - private void assertTransactionalModes(String keyspace, String table, TransactionalMode mode, TransactionalMigrationFromMode migration) { forEach(() -> { @@ -598,17 +588,17 @@ private void assertTransactionalModes(String keyspace, String table, Transaction private void assertTransactionalModes(TransactionalMode mode, TransactionalMigrationFromMode migration) { - assertTransactionalModes(KEYSPACE, tableName, mode, migration); + assertTransactionalModes(KEYSPACE, accordTableName, mode, migration); } @Test public void testAccordToPaxos() throws Exception { - test(format(TABLE_FMT, qualifiedTableName), + test(format(TABLE_FMT, qualifiedAccordTableName), cluster -> { - String casCQL = format(CAS_FMT, qualifiedTableName, CLUSTERING_VALUE); + String casCQL = format(CAS_FMT, qualifiedAccordTableName, CLUSTERING_VALUE); Consumer runCasNoApply = key -> assertRowEquals(cluster, new Object[]{false}, casCQL, key); - String tableName = qualifiedTableName.split("\\.")[1]; + String tableName = qualifiedAccordTableName.split("\\.")[1]; UUID tableUUID = cluster.get(1).callOnInstance(() -> ColumnFamilyStore.getIfExists(KEYSPACE, tableName).getTableId().asUUID()); alterTableTransactionalMode(TransactionalMode.mixed_reads); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 4b0a90b17362..cfec7f44e92d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -24,17 +24,17 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; -import accord.coordinate.Invalidated; import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; import com.google.common.primitives.Ints; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -42,15 +42,25 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.api.RoutingKey; +import accord.coordinate.Invalidated; import accord.impl.SimpleProgressLog; +import accord.messages.PreAccept; +import accord.primitives.PartialKeyRoute; +import accord.primitives.PartialRoute; +import accord.primitives.Routable.Domain; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import net.bytebuddy.implementation.bind.annotation.SuperCall; import net.bytebuddy.implementation.bind.annotation.This; +import org.apache.cassandra.batchlog.BatchlogManager; import org.apache.cassandra.cql3.statements.ModificationStatement; import org.apache.cassandra.cql3.statements.TransactionStatement; import org.apache.cassandra.cql3.transactions.ReferenceValue; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.Cluster.Builder; @@ -60,24 +70,37 @@ import org.apache.cassandra.distributed.api.IIsolatedExecutor.SerializableRunnable; import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.impl.Instance; import org.apache.cassandra.distributed.shared.AssertUtils; import org.apache.cassandra.distributed.shared.Metrics; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.distributed.util.QueryResultUtil; +import org.apache.cassandra.hints.HintsService; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FailingConsumer; +import static java.lang.String.format; import static net.bytebuddy.matcher.ElementMatchers.named; +import static org.apache.cassandra.db.SystemKeyspace.CONSENSUS_MIGRATION_STATE; +import static org.apache.cassandra.db.SystemKeyspace.PAXOS; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; +import static org.apache.cassandra.schema.SchemaConstants.SYSTEM_KEYSPACE_NAME; import static org.junit.Assert.assertArrayEquals; public abstract class AccordTestBase extends TestBaseImpl @@ -89,8 +112,10 @@ public abstract class AccordTestBase extends TestBaseImpl protected static Cluster SHARED_CLUSTER; - protected String tableName; - protected String qualifiedTableName; + protected String accordTableName; + protected String qualifiedAccordTableName; + protected String regularTableName; + protected String qualifiedRegularTableName; public static void setupCluster(Function options, int nodes) throws IOException { @@ -107,8 +132,10 @@ public static void teardown() @Before public void setup() { - tableName = "tbl" + COUNTER.getAndIncrement(); - qualifiedTableName = KEYSPACE + '.' + tableName; + accordTableName = "accordtbl" + COUNTER.getAndIncrement(); + qualifiedAccordTableName = KEYSPACE + '.' + accordTableName; + regularTableName = "regulartbl" + COUNTER.getAndIncrement(); + qualifiedRegularTableName = KEYSPACE + '.' + regularTableName; } @After @@ -135,6 +162,14 @@ protected void test(String tableDDL, FailingConsumer fn) throws Excepti test(Collections.singletonList(tableDDL), fn); } + protected List createTables(String tableFormat, String... qualifiedTables) + { + ImmutableList.Builder builder = ImmutableList.builder(); + for (String qualifiedTable : qualifiedTables) + builder.add(format(tableFormat, qualifiedTable)); + return builder.build(); + } + public static void ensureTableIsAccordManaged(Cluster cluster, String ksname, String tableName) { cluster.get(1).runOnInstance(() -> { @@ -165,7 +200,7 @@ protected void test(List ddls, FailingConsumer fn) throws Excep protected void test(FailingConsumer fn) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", fn); + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", fn); } protected static ConsensusMigrationState getMigrationStateSnapshot(IInvokableInstance instance) throws IOException @@ -198,6 +233,19 @@ protected static int getCasWriteCount(int coordinatorIndex) return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.Latency.CASWrite")); } + protected static int getRetryOnDifferentSystemCount(int coordinatorIndex) + { + return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.ClientRequest.RetryDifferentSystem.Write")); + } + + protected int getMutationsRejectedOnWrongSystemCount() + { + long sum = 0; + for (IInvokableInstance instance : SHARED_CLUSTER) + sum += instance.metrics().getCounter("org.apache.cassandra.metrics.Table.MutationsRejectedOnWrongSystem." + qualifiedAccordTableName); + return Ints.checkedCast(sum); + } + protected static int getCasPrepareCount(int coordinatorIndex) { return Ints.checkedCast(getMetrics(coordinatorIndex).getCounter("org.apache.cassandra.metrics.keyspace.CasPrepareLatency.distributed_test_keyspace")); @@ -279,7 +327,7 @@ private static Cluster createCluster(int nodes, Function optio // disable vnode for now, but should enable before trunk Cluster.Builder builder = Cluster.build(nodes) .withoutVNodes() - .withConfig(c -> c.with(Feature.NETWORK, Feature.GOSSIP).set("write_request_timeout", "10s") + .withConfig(c -> c.with(Feature.GOSSIP).set("write_request_timeout", "10s") .set("transaction_timeout", "15s") .set("transaction_timeout", "15s")) .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install); @@ -464,4 +512,86 @@ public static void install(ClassLoader classLoader, Integer num) } protected abstract Logger logger(); + + protected void alterTableTransactionalMode(TransactionalMode mode) + { + SHARED_CLUSTER.schemaChange(format("ALTER TABLE %s WITH %s", qualifiedAccordTableName, mode.asCqlParam())); + } + + protected static void pauseHints() + { + forEach(() -> HintsService.instance.pauseDispatch()); + } + + protected static void deleteAllHints() + { + forEach(() -> HintsService.instance.deleteAllHintsUnsafe()); + } + + protected static void pauseBatchlog() + { + forEach(() -> BatchlogManager.instance.pauseReplay()); + } + + protected static void unpauseHints() + { + forEach(() -> HintsService.instance.resumeDispatch()); + } + + protected static void unpauseBatchlog() + { + forEach(() -> BatchlogManager.instance.resumeReplay()); + } + + protected static void blockMutationAndPreAccept(Cluster cluster) + { + cluster.filters().outbound().messagesMatching((from, to, message) -> { + if (message.verb() == Verb.MUTATION_REQ.id) + { + String keyspace = cluster.get(to).callsOnInstance(() -> ((Message) Instance.deserializeMessage(message)).payload.getKeyspaceName()).call(); + if (keyspace.equals(KEYSPACE)) + return true; + } + if (message.verb() == Verb.ACCORD_PRE_ACCEPT_REQ.id) + { + boolean drop = cluster.get(to).callsOnInstance(() -> { + PreAccept preAccept = (PreAccept)Instance.deserializeMessage(message).payload; + PartialRoute route = preAccept.scope; + if (route.domain() == Domain.Key) + for (RoutingKey key : (PartialKeyRoute)route) + { + AccordRoutingKey routingKey = (AccordRoutingKey)key; + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(routingKey.table()); + if (cfs.getKeyspaceName().equals(KEYSPACE)) + return true; + } + return false; + }).call(); + if (drop) + return true; + } + return false; + }).drop(); + } + + protected static void truncateSystemTables() + { + SHARED_CLUSTER.coordinator(1).execute("TRUNCATE " + SYSTEM_KEYSPACE_NAME + "." + SystemKeyspace.BATCHES, ALL); + SHARED_CLUSTER.coordinator(1).execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, CONSENSUS_MIGRATION_STATE), ALL); + SHARED_CLUSTER.coordinator(1).execute(format("TRUNCATE TABLE %s.%s", SYSTEM_KEYSPACE_NAME, PAXOS), ALL); + } + + protected static Stream hostIds() + { + return Stream.concat(ClusterMetadata.current().directory.peerIds() + .stream() + .map(ClusterMetadata.current().directory::hostId), + Stream.of(HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID)); + } + + protected static boolean hasPendingHints() + { + return hostIds().map(HintsService.instance::getTotalHintsSize) + .anyMatch(size -> size > 0); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTimestampPreservationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTimestampPreservationTest.java new file mode 100644 index 000000000000..353c591c6d19 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTimestampPreservationTest.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import com.google.common.collect.ImmutableList; +import org.junit.After; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.SimpleBuilders.MutationBuilder; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.locator.ReplicaPlans; +import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; + +import static java.lang.String.format; +import static org.apache.cassandra.Util.spinAssertEquals; +import static org.apache.cassandra.config.CassandraRelevantProperties.HINT_DISPATCH_INTERVAL_MS; +import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +/* + * Test that non-transactional updates have their timestamps preserved when written through Accord so that + * `USING TIMESTAMP` continues to work and so that hints and batch log retry attempts are inserted with their + * original timestamp and not a later Accord timestamp which could cause data resurrection. + */ +public class AccordTimestampPreservationTest extends AccordTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordTimestampPreservationTest.class); + + private static final int CLUSTERING_VALUE = 1; + + private static final String NORMAL_TABLE_FMT = "CREATE TABLE %s (id int, c int, v int, PRIMARY KEY ((id), c))"; + + private static final String ACCORD_TABLE_FMT = NORMAL_TABLE_FMT + " WITH transactional_mode='full'"; + + private static ICoordinator coordinator; + + private static final String expectedResult = "[[42]]"; + + private static final int PKEY1 = 77; + private static final int PKEY2 = 78; + private static final int VALUE = 66; + + private static final long TIMESTAMP = 42; + + @Override + protected Logger logger() + { + return logger; + } + + @BeforeClass + public static void setupClass() throws IOException + { + HINT_DISPATCH_INTERVAL_MS.setLong(100); + ServerTestUtils.daemonInitialization(); + // Otherwise repair complains if you don't specify a keyspace + CassandraRelevantProperties.SYSTEM_TRACES_DEFAULT_RF.setInt(3); + AccordTestBase.setupCluster(builder -> builder.appendConfig(config -> config.set("write_request_timeout", "2s")), 3); + ServerTestUtils.prepareServerNoRegister(); + coordinator = SHARED_CLUSTER.coordinator(1); + } + + @After + public void tearDown() throws Exception + { + unpauseBatchlog(); + deleteAllHints(); + unpauseHints(); + super.tearDown(); + // Reset migration state + forEach(() -> { + ConsensusRequestRouter.resetInstance(); + ConsensusKeyMigrationState.reset(); + }); + truncateSystemTables(); + } + + @Test + public void testMutationPreservesTimestamp() throws Exception + { + test(createTables(ACCORD_TABLE_FMT, qualifiedAccordTableName), cluster -> { + long startCount = getAccordCoordinateCount(); + coordinator.executeWithResult(insertCQL(qualifiedAccordTableName, PKEY1, VALUE), ALL); + assertEquals(startCount + 1, getAccordWriteCount()); + int id = 1; + for (IInvokableInstance instance : cluster) + { + logger.info("Checking instance " + id); + id++; + spinAssertEquals(expectedResult, () -> instance.executeInternalWithResult(checkCQL()).toString(), 20); + } + }); + } + + @Test + public void testBatchlogPreservesTimestamp() throws Exception + { + test(ImmutableList.of(format(NORMAL_TABLE_FMT, qualifiedRegularTableName), format(ACCORD_TABLE_FMT, qualifiedAccordTableName)), cluster -> { + pauseHints(); + blockMutationAndPreAccept(cluster); + try + { + // Insert must span both Accord and non-Accord ranges or tables otherwise it bypasses the batchlog entirely + coordinator.executeWithResult(batchInsert(true, PKEY1, PKEY2, VALUE), ALL); + fail("Should have thrown WTE"); + } + catch (Throwable t) + { + assertEquals(t.getClass().getName(), WriteTimeoutException.class.getName()); + } + cluster.filters().reset(); + + int id = 1; + for (IInvokableInstance instance : cluster) + { + logger.info("Checking instance " + id); + id++; + spinAssertEquals(expectedResult, () -> instance.executeInternalWithResult(checkCQL()).toString(), 20); + } + }); + } + + @Test + public void testHintsPreservesTimestamp() throws Exception + { + test(createTables(ACCORD_TABLE_FMT, qualifiedAccordTableName), cluster -> { + String keyspace = KEYSPACE; + int pkey1 = PKEY1; + long timestamp = TIMESTAMP; + int clustering = CLUSTERING_VALUE; + String tableName = accordTableName; + cluster.get(1).runOnInstance(() -> { + ByteBuffer keyBuf = Int32Type.instance.fromString(Integer.toString(pkey1)); + DecoratedKey dk = DatabaseDescriptor.getPartitioner().decorateKey(keyBuf); + MutationBuilder mutationBuilder = new MutationBuilder(KEYSPACE, dk); + mutationBuilder.timestamp(timestamp); + mutationBuilder.update(tableName).row(clustering).add("v", VALUE); + Mutation m = mutationBuilder.build(); + ReplicaPlan.ForWrite plan = ReplicaPlans.forWrite(Keyspace.open(keyspace), ConsistencyLevel.ALL, dk.getToken(), ReplicaPlans.writeAll); + for (Replica replica : plan.live().withoutSelf()) + StorageProxy.submitHint(m, replica, null); + }); + for (int i = 2; i <= 3; i++) + { + int instance = i; + spinAssertEquals(expectedResult, () -> cluster.get(instance).executeInternalWithResult(checkCQL()).toString(), 20); + } + }); + } + + private String batchInsert(boolean logged, int pkey1, int pkey2, int value) + { + return batch(logged, + insertCQL(qualifiedAccordTableName, pkey1, value), + insertCQL(qualifiedRegularTableName, pkey2, value)); + } + + private String insertCQL(String table, int pkey, int value) + { + return insertCQL(table, pkey, value, false); + } + + private String insertCQL(String table, int pkey, int value, boolean cas) + { + return format("INSERT INTO %s ( id, c, v ) VALUES ( %d, %d, %d )%s USING TIMESTAMP %d", table, pkey, CLUSTERING_VALUE, value, cas ? " IF NOT EXISTS" : "", TIMESTAMP); + } + + private String checkCQL() + { + return format("SELECT WRITETIME(v) from %s WHERE id = %d", qualifiedAccordTableName, PKEY1); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationFromAccordRaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationFromAccordRaceTest.java new file mode 100644 index 000000000000..985bdc7dd9bc --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationFromAccordRaceTest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +public class MigrationFromAccordRaceTest extends AccordMigrationRaceTestBase +{ + protected boolean migratingAwayFromAccord() + { + return true; + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordRaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordRaceTest.java new file mode 100644 index 000000000000..aa00d9564ba6 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordRaceTest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +public class MigrationToAccordRaceTest extends AccordMigrationRaceTestBase +{ + protected boolean migratingAwayFromAccord() + { + return false; + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java index 0fc9bff174b8..d834919cb843 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java @@ -142,6 +142,7 @@ public void testHintsServiceMetrics() throws Exception assertThat(countHintsSucceeded(node)).isEqualTo(0); assertThat(countHintsFailed(node)).isEqualTo(0); assertThat(countHintsTimedOut(node)).isEqualTo(0); + assertThat(countHintsRetryDifferentSystem(node)).isEqualTo(0); assertThat(countGlobalDelays(node)).isEqualTo(0); cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0)); } @@ -180,6 +181,12 @@ private static Long countHintsTimedOut(IInvokableInstance node) return node.callOnInstance(() -> HintsServiceMetrics.hintsTimedOut.getCount()); } + @SuppressWarnings("Convert2MethodRef") + private static Long countHintsRetryDifferentSystem(IInvokableInstance node) + { + return node.callOnInstance(() -> HintsServiceMetrics.hintsRetryDifferentSystem.getCount()); + } + private static Long countGlobalDelays(IInvokableInstance node) { return getHistogramCount(node, "org.apache.cassandra.metrics.HintsService.Hint_delays"); diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java index 431eb7e454c8..3dfbe1116bd0 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/ShortPaxosSimulationTest.java @@ -105,11 +105,11 @@ public void simulationTest() throws IOException public void casOnAccordSimulationTest() throws IOException { PaxosSimulationRunner.main(new String[] { "run", - "--lwt-strategy", "migration", + "--lwt-strategy", "mixed_reads", "-n", "3...6", "-t", "1000", "--cluster-action-limit", "0", - "--consensus-action-limit", "-1", + "--consensus-action-limit", "0", "--consensus-actions", "ACCORD_MIGRATE", "-c", "10", "-s", "30"}); diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 74cf51de9210..7b4ab79a9f79 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -110,11 +110,11 @@ import org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.gms.ApplicationState; import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.gms.VersionedValue; +import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTableId; import org.apache.cassandra.io.sstable.SSTableLoader; @@ -152,10 +152,10 @@ import org.apache.cassandra.utils.FilterFactory; import org.apache.cassandra.utils.OutputHandler; import org.awaitility.Awaitility; +import org.awaitility.core.ThrowingRunnable; import org.hamcrest.Matcher; import org.mockito.Mockito; import org.mockito.internal.stubbing.defaultanswers.ForwardsInvocations; -import org.awaitility.core.ThrowingRunnable; import static com.google.common.base.Preconditions.checkState; import static org.hamcrest.MatcherAssert.assertThat; @@ -179,6 +179,11 @@ public static DecoratedKey dk(String key) return testPartitioner().decorateKey(ByteBufferUtil.bytes(key)); } + public static DecoratedKey dk(int key) + { + return dk(String.valueOf(key), Int32Type.instance); + } + public static DecoratedKey dk(String key, AbstractType type) { return testPartitioner().decorateKey(type.fromString(key)); @@ -375,7 +380,8 @@ public static void expectException(Callable callable, Class exception) } catch (Throwable e) { - assert e.getClass().equals(exception) : e.getClass().getName() + " is not " + exception.getName(); + // Use name because in-jvm dtests will have different instances of the class + assert e.getClass().getName().equals(exception.getName()) : e.getClass().getName() + " is not " + exception.getName(); thrown = true; } @@ -752,6 +758,21 @@ public static void spinAssertEquals(String message, T expected, long timeout .untilAsserted(() -> assertThat(message, call.call(), equalTo(expected))); } + public static void spinUntilTrue(Callable test, long timeoutInSeconds) + { + spinUntilTrue(test, timeoutInSeconds, TimeUnit.SECONDS); + } + + public static void spinUntilTrue(Callable test, long timeout, TimeUnit unit) + { + Awaitility.await() + .pollInterval(Duration.ofMillis(100)) + .pollDelay(0, TimeUnit.MILLISECONDS) + .atMost(timeout, unit) + .ignoreExceptions() + .untilAsserted(() -> assertThat(test.call(), equalTo(true))); + } + public static void spinUntilSuccess(ThrowingRunnable runnable) { spinUntilSuccess(runnable, 10); diff --git a/test/unit/org/apache/cassandra/hints/HintsServiceTest.java b/test/unit/org/apache/cassandra/hints/HintsServiceTest.java index 1e12d3911ebb..eb94473c2923 100644 --- a/test/unit/org/apache/cassandra/hints/HintsServiceTest.java +++ b/test/unit/org/apache/cassandra/hints/HintsServiceTest.java @@ -20,6 +20,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; import javax.annotation.Nullable; import com.google.common.util.concurrent.Futures; @@ -32,6 +33,12 @@ import com.datastax.driver.core.utils.MoreFutures; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.IMutation; +import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.metrics.HintsServiceMetrics; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MockMessagingService; @@ -40,11 +47,24 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; - +import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.MockFailureDetector; + +import static org.apache.cassandra.Util.spinAssertEquals; +import static org.apache.cassandra.config.CassandraRelevantProperties.HINT_DISPATCH_INTERVAL_MS; import static org.apache.cassandra.hints.HintsTestUtil.sendHintsAndResponses; +import static org.apache.cassandra.hints.HintsTestUtil.sendHintsWithRetryDifferentSystemUUID; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.notNull; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; public class HintsServiceTest { @@ -63,12 +83,15 @@ public static void defineSchema() KeyspaceParams.simple(1), SchemaLoader.standardCFMD(KEYSPACE, TABLE)); metadata = Schema.instance.getTableMetadata(KEYSPACE, TABLE); + HINT_DISPATCH_INTERVAL_MS.setLong(100); + DatabaseDescriptor.setHintsFlushPeriodInMS(100); } @After public void cleanup() { MockMessagingService.cleanup(); + ConsensusMigrationMutationHelper.resetInstanceForTest(); } @Before @@ -173,4 +196,57 @@ public void testPageSeek() throws InterruptedException, ExecutionException assertTrue(dispatchOffset != null); assertTrue(((ChecksummedDataInput.Position) dispatchOffset).sourcePosition > 0); } + + /* + * Make sure that if hints from the batchlog end up needing to be executed without Accord + * that they are turned into + */ + @Test + public void testHintsNeedingRehinting() throws Throwable + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(metadata.id); + long startWrites = cfs.metric.writeLatency.latency.getCount(); + HintsService.instance = spy(HintsService.instance); + AtomicInteger accordTxnCount = new AtomicInteger(); + ConsensusMigrationMutationHelper.replaceInstanceForTest( + new ConsensusMigrationMutationHelper() + { + int count = 0; + + @Override + public SplitMutation splitMutationIntoAccordAndNormal(T mutation, ClusterMetadata cm) + { + if (count > 2) + return super.splitMutationIntoAccordAndNormal(mutation, cm); + + SplitMutation split; + if (count % 2 == 0) + split = new SplitMutation(mutation, null); + else + split = new SplitMutation<>(null, mutation); + count++; + return split; + } + + @Override + public AsyncTxnResult mutateWithAccordAsync(ClusterMetadata cm, Mutation mutation, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + accordTxnCount.incrementAndGet(); + AsyncTxnResult asyncTxnResult = new AsyncTxnResult(AccordTestUtils.txnId(42, 43, 44)); + asyncTxnResult.setSuccess(new TxnData()); + return asyncTxnResult; + } + }); + sendHintsWithRetryDifferentSystemUUID(metadata); + // Two should be Accord transactions + spinAssertEquals(2, accordTxnCount::get, 10); + Thread.sleep(1000); + // An attempt should be made to write to all replicas + verify(HintsService.instance, times(1)).writeForAllReplicas(notNull()); + // And it should be written locally + spinAssertEquals(startWrites + 1L, cfs.metric.writeLatency.latency::getCount, 10); + + // Hints that are rehinted are treated as succeeding immediately for the ACCORD_HINT_ENDPOINT + assertEquals(3, HintsServiceMetrics.getDelayCount(HintsServiceMetrics.ACCORD_HINT_ENDPOINT)); + } } diff --git a/test/unit/org/apache/cassandra/hints/HintsTestUtil.java b/test/unit/org/apache/cassandra/hints/HintsTestUtil.java index 8ab65998c4ae..b3f3250481fa 100644 --- a/test/unit/org/apache/cassandra/hints/HintsTestUtil.java +++ b/test/unit/org/apache/cassandra/hints/HintsTestUtil.java @@ -32,12 +32,12 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.Clock; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; import static org.apache.cassandra.Util.dk; import static org.apache.cassandra.net.MockMessagingService.verb; import static org.apache.cassandra.net.Verb.HINT_REQ; import static org.apache.cassandra.net.Verb.HINT_RSP; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; final class HintsTestUtil { @@ -88,4 +88,20 @@ static MockMessagingSpy sendHintsAndResponses(TableMetadata metadata, int noOfHi } return spy; } + + static void sendHintsWithRetryDifferentSystemUUID(TableMetadata metadata) + { + // create and write three hints, two that should be routed to Accord, and one should need rehinting since + // it doesn't end up routed to Accord + UUID hostId = HintsService.RETRY_ON_DIFFERENT_SYSTEM_UUID; + for (int i = 0; i < 3; i++) + { + long now = Clock.Global.currentTimeMillis(); + DecoratedKey dkey = dk(String.valueOf(i)); + PartitionUpdate.SimpleBuilder builder = PartitionUpdate.simpleBuilder(metadata, dkey).timestamp(now); + builder.row("column0").add("val", "value0"); + Hint hint = Hint.create(builder.buildAsMutation(), now); + HintsService.instance.write(hostId, hint); + } + } } diff --git a/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java b/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java index 59c03fc3c7b2..b9876e0eb8eb 100644 --- a/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java +++ b/test/unit/org/apache/cassandra/schema/TransactionalConfigSchemaTest.java @@ -92,4 +92,15 @@ public void incompleteMigrationFailure() table, TransactionalMode.off, TransactionalMigrationFromMode.none); assertTransactionalMode(table, TransactionalMode.off, TransactionalMigrationFromMode.none); } + + @Test + public void alterCanSkipMigration() + { + String table = "alter_skips_migration_table"; + process("CREATE TABLE ks.%s (k int primary key, v int)", table); + assertTransactionalMode(table, TransactionalMode.off, TransactionalMigrationFromMode.none); + + process("ALTER TABLE ks.%s WITH transactional_mode='%s' AND transactional_migration_from='%s'", table, TransactionalMode.full, TransactionalMigrationFromMode.none); + assertTransactionalMode(table, TransactionalMode.full, TransactionalMigrationFromMode.none); + } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java index 901ed572cd31..2711b20336dc 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordReadRepairTest.java @@ -60,7 +60,7 @@ public static void setupClass() throws IOException @Test public void testSerialReadRepair() throws Exception { - testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.SERIAL), new Object[][] {{1, 1, 1, 1}}); } @@ -68,7 +68,7 @@ public void testSerialReadRepair() throws Exception public void testCASFailedConditionReadRepair() throws Exception { // Even if the condition fails to apply the data checked when applying the condition should be repaired - testReadRepair(cluster -> cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v1) VALUES (1, 1, 99) IF NOT EXISTS;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v1) VALUES (1, 1, 99) IF NOT EXISTS;", ConsistencyLevel.SERIAL), new Object[][] {{false, 1, 1, 1, 1}}); } @@ -76,7 +76,7 @@ public void testCASFailedConditionReadRepair() throws Exception public void testCASReadRepair() throws Exception { // If the condition applies the read repair should preserve the existing timestamp - testReadRepair(cluster -> cluster.coordinator(1).execute("UPDATE " + qualifiedTableName + " SET v2 = 99 WHERE k = 1 and c = 1 IF EXISTS;", ConsistencyLevel.SERIAL), + testReadRepair(cluster -> cluster.coordinator(1).execute("UPDATE " + qualifiedAccordTableName + " SET v2 = 99 WHERE k = 1 and c = 1 IF EXISTS;", ConsistencyLevel.SERIAL), new Object[][] {{Boolean.TRUE}}); } @@ -88,20 +88,20 @@ public void testCASReadRepair() throws Exception public void testNonSerialReadRepair() throws Exception { for (ConsistencyLevel cl : ImmutableList.of(ConsistencyLevel.QUORUM)) - testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", cl), + testReadRepair(cluster -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;", cl), new Object[][] {{1, 1, 1, 1}}); } void testReadRepair(Function accordTxn, Object[][] expected) throws Exception { - test("CREATE TABLE " + qualifiedTableName + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c)) WITH transactional_mode='unsafe_writes';", + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v1 int, v2 int, PRIMARY KEY ((k), c)) WITH transactional_mode='unsafe_writes';", cluster -> { Filter mutationFilter = cluster.filters().verbs(Verb.MUTATION_REQ.id).to(2).drop().on(); cluster.filters().verbs(Verb.HINT_REQ.id, Verb.HINT_RSP.id).drop().on(); - cluster.coordinator(1).execute("INSERT INTO " + qualifiedTableName + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); + cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v1, v2) VALUES (1, 1, 1, 1) USING TIMESTAMP 42;", ConsistencyLevel.ONE); mutationFilter.off(); Filter blockNodeOneReads = cluster.filters().verbs(Verb.READ_REQ.id).to(1).drop().on(); - assertThat(cluster.coordinator(2).executeWithResult("SELECT * FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + assertThat(cluster.coordinator(2).executeWithResult("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) .isEmpty(); blockNodeOneReads.off(); // Should perform read repair @@ -109,7 +109,7 @@ void testReadRepair(Function accordTxn, Object[][] expected assertRows(result, expected); blockNodeOneReads.on(); // Side effect of the read repair should be visible now - assertThat(cluster.coordinator(2).executeWithResult("SELECT k, c, v1, WRITETIME(v1) FROM " + qualifiedTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) + assertThat(cluster.coordinator(2).executeWithResult("SELECT k, c, v1, WRITETIME(v1) FROM " + qualifiedAccordTableName + " WHERE k = 1 AND c = 1;", ConsistencyLevel.ONE)) .isEqualTo(1, 1, 1, 42L); }); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java index ea88d119778d..3abdcf808040 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java @@ -22,13 +22,16 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.function.LongSupplier; +import java.util.function.Supplier; import org.junit.Test; import accord.coordinate.Exhausted; import accord.coordinate.Preempted; import accord.coordinate.Timeout; +import accord.impl.IntKey; +import accord.primitives.Ranges; +import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.utils.Blocking; import org.assertj.core.api.Condition; @@ -47,12 +50,12 @@ public class AccordServiceTest public void retryExpectedFailures() throws InterruptedException { Blocking blocking = Mockito.mock(Blocking.class); - class Task implements LongSupplier + class Task implements Supplier { private int attempts = 0; @Override - public long getAsLong() + public Seekables get() { switch (attempts) { @@ -75,12 +78,12 @@ public long getAsLong() attempts++; throw AccordService.newBarrierExhausted(TxnId.NONE, true); default: - return 42; + return Ranges.of(IntKey.range(1, 2)); } } } Task failing = new Task(); - assertThat(doWithRetries(blocking, failing, Integer.MAX_VALUE, 100, 1000)).isEqualTo(42); + assertThat(doWithRetries(blocking, failing, Integer.MAX_VALUE, 100, 1000)).isEqualTo(Ranges.of(IntKey.range(1,2))); verify(blocking).sleep(100); verify(blocking).sleep(200); verify(blocking).sleep(400); @@ -100,10 +103,10 @@ public void retryThrowsTimeout() timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); Collections.shuffle(timeoutFailures, rs.asJdkRandom()); Iterator it = timeoutFailures.iterator(); - LongSupplier failing = () -> { + Supplier failing = () -> { if (!it.hasNext()) throw new IllegalStateException("Called too many times"); it.next().run(); // this throws... - return 42; + return Ranges.EMPTY; }; assertThatThrownBy(() -> doWithRetries(blocking, failing, timeoutFailures.size(), 100, 1000)).is(new Condition<>(AccordService::isTimeout, "timeout")); assertThat(it).isExhausted(); @@ -123,10 +126,10 @@ public void retryThrowsNonTimeout() timeoutFailures.add(() -> {throw new Exhausted(null, null);}); Collections.shuffle(timeoutFailures, rs.asJdkRandom()); Iterator it = timeoutFailures.iterator(); - LongSupplier failing = () -> { + Supplier failing = () -> { if (!it.hasNext()) throw new IllegalStateException("Called too many times"); it.next().run(); // this throws... - return 42; + return Ranges.EMPTY; }; assertThatThrownBy(() -> doWithRetries(blocking, failing, timeoutFailures.size(), 100, 1000)).isInstanceOf(Exhausted.class); assertThat(it).isExhausted(); @@ -172,10 +175,10 @@ public void run() } } Iterator it = failures.iterator(); - LongSupplier failing = () -> { + Supplier failing = () -> { if (!it.hasNext()) throw new IllegalStateException("Called too many times"); it.next().run(); // this throws... - return 42; + return Ranges.EMPTY; }; Blocking blocking = Mockito.mock(Blocking.class); assertThatThrownBy(() -> doWithRetries(blocking, failing, failures.size(), 100, 1000)).isInstanceOf(isError ? AssertionError.class : NullPointerException.class); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index ffd0d311867a..999ae875766e 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -27,9 +27,9 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; +import java.util.function.ToLongFunction; import java.util.stream.Collectors; import java.util.stream.IntStream; - import javax.annotation.Nullable; import com.google.common.collect.Sets; @@ -97,6 +97,7 @@ import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.Condition; @@ -330,7 +331,7 @@ public static Txn createWriteTxn(int key) public static Txn createTxn(Txn.Kind kind, Seekables seekables) { - return AGENT.emptyTxn(kind, seekables); + return new Txn.InMemory(kind, seekables, TxnRead.EMPTY, TxnQuery.NONE, null); } public static Ranges fullRange(Txn txn) @@ -371,15 +372,16 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); - Topology topology = new Topology(1, new Shard(range, new SortedArrayList<>(new Id[] { node }), Sets.newHashSet(node), Collections.emptySet())); NodeTimeService time = new NodeTimeService() { + private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); + @Override public Id id() { return node;} @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } @Override - public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } + public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } }; SingleEpochRanges holder = new SingleEpochRanges(Ranges.of(range)); @@ -397,12 +399,14 @@ public static AccordCommandStore createAccordCommandStore( { NodeTimeService time = new NodeTimeService() { + private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); + @Override public Id id() { return node;} @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } @Override - public long unix(TimeUnit timeUnit) { return NodeTimeService.unixWrapper(TimeUnit.MICROSECONDS, this::now).applyAsLong(timeUnit); } + public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } }; diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index ec0eb60121dd..6bc85b6f1eeb 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -45,14 +45,17 @@ import com.google.common.collect.Sets; import org.junit.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.ConfigurationService; import accord.api.ConfigurationService.EpochReady; +import accord.api.Scheduler; +import accord.config.LocalConfig; import accord.impl.SizeOfIntersectionSorter; +import accord.impl.TestAgent; import accord.local.Node; +import accord.local.NodeTimeService; import accord.primitives.Ranges; import accord.topology.Topology; import accord.topology.TopologyManager; @@ -596,7 +599,8 @@ private class Instance this.id = node; this.token = token; this.epoch = epoch; - this.topology = new TopologyManager(SizeOfIntersectionSorter.SUPPLIER, id); + // TODO (review): Should there be a real scheduler here? Is it possible to adapt the Scheduler interface to scheduler used in this test? + this.topology = new TopologyManager(SizeOfIntersectionSorter.SUPPLIER, new TestAgent.RethrowAgent(), id, Scheduler.NEVER_RUN_SCHEDULED, NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MILLISECONDS, globalExecutor::currentTimeMillis), LocalConfig.DEFAULT); AccordConfigurationService.DiskStateManager instance = MockDiskStateManager.instance; config = new AccordConfigurationService(node, messagingService, failureDetector, instance, scheduler); config.registerListener(new ConfigurationService.Listener() diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 83db8accef0b..dfcfbebdd485 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -108,7 +108,7 @@ public SimulatedAccordCommandStore(RandomSource rs) this.nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.currentNullable().myNodeId()); this.timeService = new NodeTimeService() { - private final ToLongFunction unixWrapper = NodeTimeService.unixWrapper(TimeUnit.NANOSECONDS, this::now); + private final ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.NANOSECONDS, this::now); @Override public Node.Id id() @@ -129,9 +129,9 @@ public long now() } @Override - public long unix(TimeUnit unit) + public long elapsed(TimeUnit unit) { - return unixWrapper.applyAsLong(unit); + return elapsed.applyAsLong(unit); } @Override From a7c2bcafcd898e3c59b4617bd858442741ed8e09 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 23 Aug 2024 09:49:54 -0700 Subject: [PATCH 128/340] CommandsForRanges does not support slice which cause over returned data being sent patch by David Capwell; reviewed by Alex Petrov for CASSANDRA-19857 --- .../accord/AccordSafeCommandStore.java | 10 +- .../accord/AccordSafeCommandsForRanges.java | 2 +- .../service/accord/CommandsForRanges.java | 32 ++++- .../accord/CommandsForRangesLoader.java | 10 +- .../service/accord/async/AsyncOperation.java | 2 +- .../service/accord/CommandsForRangesTest.java | 129 ++++++++++++++++++ .../accord/SimulatedAccordCommandStore.java | 6 + .../SimulatedAccordCommandStoreTestBase.java | 42 +----- .../service/accord/SimulatedDepsTest.java | 102 ++------------ .../accord/SimulatedMultiKeyAndRangeTest.java | 16 +-- ...ulatedRandomKeysWithRangeConflictTest.java | 31 +---- .../accord/async/AsyncOperationTest.java | 4 +- .../serializers/DepsSerializerTest.java | 21 ++- 13 files changed, 226 insertions(+), 181 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index a5215eadea08..2b60bfcd514b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -179,6 +179,7 @@ public RangesForEpoch ranges() @Override public void registerHistoricalTransactions(Deps deps) { + if (deps.isEmpty()) return; // used in places such as accord.local.CommandStore.fetchMajorityDeps // We find a set of dependencies for a range then update CommandsFor to know about them Ranges allRanges = ranges.all(); @@ -221,28 +222,27 @@ private O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunct { if (commandsForRanges == null) return accumulate; + CommandsForRanges cfr = commandsForRanges.current().slice(slice); switch (keysOrRanges.domain()) { case Key: { AbstractKeys keys = (AbstractKeys) keysOrRanges.slice(slice, Routables.Slice.Minimal); - if (!commandsForRanges.ranges().intersects(keys)) + if (!cfr.ranges.intersects(keys)) return accumulate; - accumulate = map.apply(commandsForRanges.current(), accumulate); } break; case Range: { AbstractRanges ranges = (AbstractRanges) keysOrRanges.slice(slice, Routables.Slice.Minimal); - if (!commandsForRanges.ranges().intersects(ranges)) + if (!cfr.ranges.intersects(ranges)) return accumulate; - accumulate = map.apply(commandsForRanges.current(), accumulate); } break; default: throw new AssertionError("Unknown domain: " + keysOrRanges.domain()); } - return accumulate; + return map.apply(cfr, accumulate); } private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java index 848df1d27031..1a90c0a70089 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java @@ -49,7 +49,7 @@ public void preExecute() Pair> pair = AsyncChains.getUnchecked(chain); pair.left.close(); pair.left.get().entrySet().forEach(e -> pair.right.put(e.getKey(), e.getValue())); - original = new CommandsForRanges(key, pair.right); + original = CommandsForRanges.create(key, pair.right); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 720013762a69..4da915448c20 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -26,6 +26,8 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; + import accord.impl.CommandsSummary; import accord.local.SafeCommandStore.CommandFunction; import accord.local.SafeCommandStore.TestDep; @@ -44,16 +46,28 @@ import static accord.local.SafeCommandStore.TestStatus.ANY_STATUS; import static accord.local.Status.Stable; import static accord.local.Status.Truncated; +import static accord.primitives.Routables.Slice.Minimal; public class CommandsForRanges implements CommandsSummary { - private final Ranges ranges; + public final Ranges ranges; private final NavigableMap map; - public CommandsForRanges(Ranges ranges, NavigableMap map) + private CommandsForRanges(Ranges ranges, NavigableMap map) { this.ranges = ranges; - this.map = (NavigableMap) (NavigableMap) map; + this.map = map; + } + + public static CommandsForRanges create(Ranges ranges, NavigableMap map) + { + return new CommandsForRanges(ranges, (NavigableMap) (NavigableMap) map); + } + + @VisibleForTesting + public int size() + { + return map.size(); } @Override @@ -160,4 +174,16 @@ private T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId te return accumulate; } + + public CommandsForRanges slice(Ranges slice) + { + Ranges ranges = this.ranges.slice(slice, Minimal); + NavigableMap copy = new TreeMap<>(); + for (Map.Entry e : map.entrySet()) + { + if (!e.getValue().ranges.intersects(slice)) continue; + copy.put(e.getKey(), e.getValue().slice(slice)); + } + return new CommandsForRanges(ranges, copy); + } } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index f90d57b32e4b..0b4b357128be 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -31,6 +31,7 @@ import java.util.function.BiFunction; import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import accord.local.Command; @@ -41,6 +42,7 @@ import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.Routables; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -256,7 +258,8 @@ public static class Summary public final Ranges ranges; public final List depsIds; - private Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatus, Ranges ranges, List depsIds) + @VisibleForTesting + Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatus, Ranges ranges, List depsIds) { this.txnId = txnId; this.executeAt = executeAt; @@ -265,6 +268,11 @@ private Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatu this.depsIds = depsIds; } + public Summary slice(Ranges slice) + { + return new Summary(txnId, executeAt, saveStatus, ranges.slice(slice, Routables.Slice.Minimal), depsIds); + } + @Override public String toString() { diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 7a39b0393f1d..34d80250a249 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -304,7 +304,7 @@ public void run() } catch (Throwable t) { - logger.error(String.format("Operation %s failed", this), t); + logger.error("Operation {} failed", this, t); fail(t); } finally diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java new file mode 100644 index 000000000000..5993d1d1338c --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.TreeMap; + +import org.junit.Test; + +import accord.api.RoutingKey; +import accord.impl.IntKey; +import accord.local.SaveStatus; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; + +import static accord.utils.Property.qt; +import static org.assertj.core.api.Assertions.assertThat; + +public class CommandsForRangesTest +{ + private static final AccordGens.RangeFactory ROUTING_RANGE_FACTORY = (i, a, b) -> IntKey.range(a, b); + private static final Gen.IntGen SMALL_INTS = Gens.ints().between(1, 10); + private static final Gen RANGES_GEN = AccordGens.ranges(SMALL_INTS, AccordGens.intRoutingKey(), ROUTING_RANGE_FACTORY); + private static final Gen TXN_ID_GEN = AccordGens.txnIds(); + private static final Gen CFK_GEN = rs -> { + Ranges ranges = RANGES_GEN.next(rs); + int numTxn = 10; + TreeMap map = new TreeMap<>(); + for (int i = 0; i < numTxn; i++) + { + TxnId id = TXN_ID_GEN.next(rs); + map.put(id, new CommandsForRangesLoader.Summary(id, id, SaveStatus.ReadyToExecute, ranges, Collections.emptyList())); + } + return CommandsForRanges.create(ranges, map); + }; + private static final IntKey.Routing MIN = IntKey.routing(Integer.MIN_VALUE); + private static final IntKey.Routing MAX = IntKey.routing(Integer.MAX_VALUE); + + @Test + public void sliceEmptyWhenOutside() + { + qt().check(rs -> { + CommandsForRanges cfr = CFK_GEN.next(rs); + + for (Range range : allOutside(cfr.ranges)) + { + Ranges slice = Ranges.single(range); + CommandsForRanges subset = cfr.slice(slice); + assertThat(subset.ranges).isEmpty(); + assertThat(subset.size()).isEqualTo(0); + } + }); + } + + @Test + public void sliceSameNoop() + { + qt().check(rs -> { + CommandsForRanges cfr = CFK_GEN.next(rs); + CommandsForRanges subset = cfr.slice(cfr.ranges); + assertThat(subset.ranges).isEqualTo(cfr.ranges); + assertThat(subset.size()).isEqualTo(cfr.size()); + }); + } + + private static List allOutside(Ranges ranges) + { + if (ranges.isEmpty()) return Collections.emptyList(); + List matches = new ArrayList<>(); + { + Range first = ranges.get(0); + if (!first.start().equals(MIN)) + { + int start = Integer.MIN_VALUE; + int end = key(first.start()); + matches.add(IntKey.range(start, end)); + } + } + if (ranges.size() > 1) + { + { + Range last = ranges.get(ranges.size() - 1); + if (!last.end().equals(MAX)) + { + int start = key(last.end()); + int end = Integer.MAX_VALUE; + matches.add(IntKey.range(start, end)); + } + } + for (int i = 1; i < ranges.size(); i++) + { + Range previous = ranges.get(i - 1); + Range next = ranges.get(i - 1); + int start = key(previous.end()); + int end = key(next.start()); + if (start < end) + matches.add(IntKey.range(start, end)); + } + } + return matches; + } + + private static int key(RoutingKey key) + { + return ((IntKey.Routing) key).key; + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index dfcfbebdd485..6e947051b39f 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -46,6 +46,7 @@ import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; +import accord.primitives.Routables; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -200,6 +201,11 @@ public void onEvict(AccordCachingState state) shouldCompact = boolSource(rs.fork()); } + public Ranges slice(Ranges ranges) + { + return ranges.slice(topology.ranges(), Routables.Slice.Minimal); + } + private static BooleanSupplier boolSource(RandomSource rs) { var gen = Gens.bools().mixedDistribution().next(rs); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java index 2c313566df65..1c05c0a0ad98 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -133,30 +133,6 @@ public void init() ServerTestUtils.markCMS(); } - protected static void safeBlock(List> asyncs) throws InterruptedException, ExecutionException - { - int counter = 0; - for (var chain : asyncs) - { - Assertions.assertThat(chain.isDone()) - .describedAs("The %dth async task is blocked!", counter++) - .isTrue(); - AsyncChains.getBlocking(chain); - } - } - - protected static void safeBlock(List> asyncs, List details) throws InterruptedException, ExecutionException - { - int counter = 0; - for (var chain : asyncs) - { - Assertions.assertThat(chain.isDone()) - .describedAs("The %dth async task %s is blocked!", counter, details.get(counter++)) - .isTrue(); - AsyncChains.getBlocking(chain); - } - } - protected static TokenRange fullRange(TableId id) { return new TokenRange(AccordRoutingKey.SentinelKey.min(id), AccordRoutingKey.SentinelKey.max(id)); @@ -174,25 +150,19 @@ protected static AccordRoutingKey.TokenKey tokenKey(TableId id, long token) protected static Map> keyConflicts(List list, Keys keys) { + if (list.isEmpty()) return Collections.emptyMap(); Map> kc = Maps.newHashMapWithExpectedSize(keys.size()); for (Key key : keys) - { - if (list.isEmpty()) - continue; kc.put(key, list); - } return kc; } protected static Map> rangeConflicts(List list, Ranges ranges) { + if (list.isEmpty()) return Collections.emptyMap(); Map> kc = Maps.newHashMapWithExpectedSize(ranges.size()); for (Range range : ranges) - { - if (list.isEmpty()) - continue; kc.put(range, list); - } return kc; } @@ -217,14 +187,6 @@ protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, return pair.left; } - protected static Pair> assertDepsMessageAsync(SimulatedAccordCommandStore instance, - DepsMessage messageType, - Txn txn, FullRoute route, - Map> keyConflicts) - { - return assertDepsMessageAsync(instance, messageType, txn, route, keyConflicts, Collections.emptyMap()); - } - protected static Pair> assertDepsMessageAsync(SimulatedAccordCommandStore instance, DepsMessage messageType, Txn txn, FullRoute route, diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java index 66485cf8d622..9fda5cd16f45 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; -import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -38,7 +37,6 @@ import accord.primitives.Ranges; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.utils.async.AsyncResult; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.schema.TableMetadata; @@ -47,7 +45,6 @@ import static accord.utils.Property.qt; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; -@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedDepsTest extends SimulatedAccordCommandStoreTestBase { @Test @@ -66,33 +63,17 @@ public void keyConflicts() try (var instance = new SimulatedAccordCommandStore(rs)) { List conflicts = new ArrayList<>(numSamples); - boolean concurrent = rs.nextBoolean(); - List> asyncs = !concurrent ? null : new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { instance.maybeCacheEvict(keys, Ranges.EMPTY); - if (concurrent) - { - var pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys)); - conflicts.add(pair.left); - asyncs.add(pair.right); - } - else - { - conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys))); - } - } - if (concurrent) - { - instance.processAll(); - safeBlock(asyncs); + conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys))); } } }); } @Test - public void concurrentRangePartialKeyMatch() + public void rangePartialKeyMatch() { var tbl = reverseTokenTbl; int numSamples = 250; @@ -104,6 +85,7 @@ public void concurrentRangePartialKeyMatch() { long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - 1, token)); + Ranges partialRangeSliced = instance.slice(partialRange); long outOfRangeToken = token - 10; if (outOfRangeToken == Long.MIN_VALUE) // if this wraps around that is fine, just can't be min outOfRangeToken++; @@ -121,38 +103,24 @@ public void concurrentRangePartialKeyMatch() Keys conflictingKeys = (Keys) conflictingKeyTxn.keys(); FullRoute conflictingRoute = conflictingKeys.toRoute(conflictingKeys.get(0).toUnseekable()); - FullRangeRoute rangeRoute = partialRange.toRoute(keys.get(0).toUnseekable()); + FullRangeRoute rangeRoute = partialRange.toRoute(key.toUnseekable()); Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); List keyConflicts = new ArrayList<>(numSamples); List outOfRangeKeyConflicts = new ArrayList<>(numSamples); List rangeConflicts = new ArrayList<>(numSamples); - List> asyncs = new ArrayList<>(numSamples * 2 + numSamples * numConflictKeyTxns); - List asyncIds = new ArrayList<>(numSamples * 2 + numSamples * numConflictKeyTxns); for (int i = 0; i < numSamples; i++) { instance.maybeCacheEvict((Keys) keyTxn.keys(), partialRange); for (int j = 0; j < numConflictKeyTxns; j++) - { - var p = instance.enqueuePreAccept(conflictingKeyTxn, conflictingRoute); - outOfRangeKeyConflicts.add(p.left); - asyncs.add(p.right); - asyncIds.add(p.left); - } + outOfRangeKeyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), conflictingKeyTxn, conflictingRoute, Map.of(outOfRangeKey, outOfRangeKeyConflicts))); - var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts, outOfRangeKey, outOfRangeKeyConflicts), Collections.emptyMap()); - keyConflicts.add(k.left); - outOfRangeKeyConflicts.add(k.left); - asyncs.add(k.right); - asyncIds.add(k.left); + TxnId id = assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts, outOfRangeKey, outOfRangeKeyConflicts)); + keyConflicts.add(id); + outOfRangeKeyConflicts.add(id); - var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, Map.of(key, keyConflicts), rangeConflicts(rangeConflicts, partialRange)); - rangeConflicts.add(r.left); - asyncs.add(r.right); - asyncIds.add(r.left); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, Map.of(key, keyConflicts), rangeConflicts(rangeConflicts, partialRangeSliced))); } - instance.processAll(); - safeBlock(asyncs, asyncIds); } }); } @@ -183,30 +151,11 @@ public void simpleRangeConflicts() List keyConflicts = new ArrayList<>(numSamples); List rangeConflicts = new ArrayList<>(numSamples); - boolean concurrent = rs.nextBoolean(); - List> asyncs = !concurrent ? null : new ArrayList<>(numSamples * 2); for (int i = 0; i < numSamples; i++) { instance.maybeCacheEvict(keys, ranges); - if (concurrent) - { - var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys)); - keyConflicts.add(k.left); - asyncs.add(k.right); - var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, ranges)); - rangeConflicts.add(r.left); - asyncs.add(r.right); - } - else - { - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, ranges))); - } - } - if (concurrent) - { - instance.processAll(); - safeBlock(asyncs); + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, instance.slice(ranges)))); } } }); @@ -218,7 +167,7 @@ public void expandingRangeConflicts() var tbl = reverseTokenTbl; int numSamples = 100; - qt().withSeed(6484101342775432632L).withExamples(10).check(rs -> { + qt().withExamples(10).check(rs -> { AccordKeyspace.unsafeClear(); try (var instance = new SimulatedAccordCommandStore(rs)) { @@ -231,9 +180,6 @@ public void expandingRangeConflicts() List keyConflicts = new ArrayList<>(numSamples); Map> rangeConflicts = new HashMap<>(); - boolean concurrent = rs.nextBoolean(); - List> asyncs = !concurrent ? null : new ArrayList<>(numSamples); - List info = !concurrent ? null : new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - i - 1, token + i)); @@ -242,23 +188,8 @@ public void expandingRangeConflicts() try { instance.maybeCacheEvict(keys, partialRange); - if (concurrent) - { - var pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys)); - info.add(pair.left); - keyConflicts.add(pair.left); - asyncs.add(pair.right); - - pair = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts); - info.add(pair.left); - rangeConflicts.put(partialRange.get(0), Collections.singletonList(pair.left)); - asyncs.add(pair.right); - } - else - { - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); - rangeConflicts.put(partialRange.get(0), Collections.singletonList(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts))); - } + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + rangeConflicts.put(partialRange.get(0), Collections.singletonList(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts))); } catch (Throwable t) { @@ -267,11 +198,6 @@ public void expandingRangeConflicts() throw t; } } - if (concurrent) - { - instance.processAll(); - safeBlock(asyncs, info); - } } }); } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java index c345a2686626..11497133b3cd 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java @@ -29,7 +29,6 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -44,7 +43,6 @@ import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.Gens; -import accord.utils.async.AsyncResult; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.RTree; import org.apache.cassandra.utils.RangeTree; @@ -53,7 +51,6 @@ import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; -@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedMultiKeyAndRangeTest extends SimulatedAccordCommandStoreTestBase { @Test @@ -78,7 +75,6 @@ public void test() Gen msgGen = msgDistribution.next(rs); Map> keyConflicts = new HashMap<>(); RangeTree rangeConflicts = RTree.create(RangeTreeRangeAccessor.instance); - List> asyncs = new ArrayList<>(numSamples); Gen.IntGen keyCountGen = keyDistribution.next(rs); Gen.IntGen rangeCountGen = rangeDistribution.next(rs); @@ -106,9 +102,8 @@ public void test() Map> expectedConflicts = new HashMap<>(); keys.forEach(k -> expectedConflicts.put(k, keyConflicts.computeIfAbsent(k, ignore -> new ArrayList<>()))); - var p = assertDepsMessageAsync(instance, msgGen.next(rs), txn, route, expectedConflicts, Collections.emptyMap()); - keys.forEach(k -> keyConflicts.get(k).add(p.left)); - asyncs.add(p.right); + TxnId id = assertDepsMessage(instance, msgGen.next(rs), txn, route, expectedConflicts, Collections.emptyMap()); + keys.forEach(k -> keyConflicts.get(k).add(id)); } break; case Range: @@ -151,17 +146,14 @@ public void test() l.clear(); l.addAll(sortedDedup); }); - var p = assertDepsMessageAsync(instance, msgGen.next(rs), txn, route, expectedKeyConflicts, expectedRangeConflicts); - asyncs.add(p.right); - ranges.forEach(r -> rangeConflicts.add(r, p.left)); + TxnId id = assertDepsMessage(instance, msgGen.next(rs), txn, route, expectedKeyConflicts, expectedRangeConflicts); + ranges.forEach(r -> rangeConflicts.add(r, id)); } break; default: throw new AssertionError(); } } - instance.processAll(); - safeBlock(asyncs); } }); } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java index b3df25bdfb80..31880d3297ec 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -25,7 +25,6 @@ import java.util.List; import java.util.Map; -import org.junit.Ignore; import org.junit.Test; import accord.api.Key; @@ -35,14 +34,12 @@ import accord.primitives.Ranges; import accord.primitives.Txn; import accord.primitives.TxnId; -import accord.utils.async.AsyncResult; import org.apache.cassandra.service.accord.api.PartitionKey; import static accord.utils.Property.qt; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; -@Ignore // TODO (required): This class relies on removed ExecutionOrder for correctness, and needs to be adjusted public class SimulatedRandomKeysWithRangeConflictTest extends SimulatedAccordCommandStoreTestBase { @Test @@ -60,8 +57,6 @@ public void keysAllOverConflictingWithRange() { Map> keyConflicts = new HashMap<>(); List rangeConflicts = new ArrayList<>(numSamples); - boolean concurrent = rs.nextBoolean(); - List> asyncs = !concurrent ? null : new ArrayList<>(numSamples * 2); for (int i = 0; i < numSamples; i++) { long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); @@ -73,27 +68,11 @@ public void keysAllOverConflictingWithRange() instance.maybeCacheEvict((Keys) keyTxn.keys(), wholeRange); - if (concurrent) - { - var k = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); - keyConflicts.get(key).add(k.left); - asyncs.add(k.right); - - var r = assertDepsMessageAsync(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRange)); - rangeConflicts.add(r.left); - asyncs.add(r.right); - } - else - { - var k = assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); - keyConflicts.get(key).add(k); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRange))); - } - } - if (concurrent) - { - instance.processAll(); - safeBlock(asyncs); + // the full range is (-Inf, +Inf] but the store could be [(-Inf, Number], (Number, +Inf]], so need to slice to the store to get a matching range + Ranges wholeRangeSlicedShard = instance.slice(wholeRange); + var k = assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); + keyConflicts.get(key).add(k); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRangeSlicedShard))); } } }); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index ffdb46eaafde..11ed33b42fab 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -350,7 +350,7 @@ public void loadFail() Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); qt().withPure(false) - .withSeed(-3537445084098883509L).withExamples(50) + .withExamples(50) .forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)) .check((rs, ids) -> { before(); // truncate tables @@ -413,7 +413,7 @@ public void consumerFails() Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); AtomicInteger counter = new AtomicInteger(); - qt().withPure(false).withSeed(3131884991952253478L).withExamples(100).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { + qt().withPure(false).withExamples(100).forAll(Gens.random(), Gens.lists(txnIdGen).ofSizeBetween(1, 10)).check((rs, ids) -> { logger.info("Test #{}", counter.incrementAndGet()); before(); // truncate tables diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java index 4ee49b24b975..4238f8a6871e 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java @@ -24,13 +24,17 @@ import accord.primitives.Deps; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.IVersionedSerializers; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaProvider; +import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.AccordGenerators; import org.mockito.Mockito; @@ -51,8 +55,8 @@ public class DepsSerializerTest public void serde() { DataOutputBuffer buffer = new DataOutputBuffer(); - qt().withSeed(-4368731546033726179L).check(rs -> { - IPartitioner partitioner = AccordGenerators.partitioner().next(rs); + qt().check(rs -> { + IPartitioner partitioner = AccordGenerators.partitioner().map(DepsSerializerTest::normalize).next(rs); Schema.instance = Mockito.mock(SchemaProvider.class); DatabaseDescriptor.setPartitionerUnsafe(partitioner); Mockito.when(Schema.instance.getExistingTablePartitioner(Mockito.any())).thenReturn(partitioner); @@ -61,4 +65,17 @@ public void serde() IVersionedSerializers.testSerde(buffer, DepsSerializer.deps, deps, version.value); }); } + + private static IPartitioner normalize(IPartitioner partitioner) + { + // serializers require tokens to fit within 1 << 16, but that makes the test flakey when LocalPartitioner with a nested type is found... + if (!(partitioner instanceof LocalPartitioner)) return partitioner; + if (!shouldSimplify(partitioner.getTokenValidator())) return partitioner; + return new LocalPartitioner(Int32Type.instance); + } + + private static boolean shouldSimplify(AbstractType type) + { + return AbstractTypeGenerators.contains(type, t -> t.isCollection()); + } } \ No newline at end of file From c16297f862ce1ef365dacd9ef1a3759712efbf75 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Fri, 19 Jul 2024 18:01:16 +0200 Subject: [PATCH 129/340] Add an ability to reconstruct arbitrary epoch state from the log to TCM Patch by Alex Petrov; reviewed by Marcus Eriksson for CASSANDRA-19790. --- .../apache/cassandra/metrics/TCMMetrics.java | 2 + src/java/org/apache/cassandra/net/Verb.java | 3 + .../DistributedMetadataLogKeyspace.java | 35 +++++ .../cassandra/tcm/AbstractLocalProcessor.java | 2 +- .../tcm/AtomicLongBackedProcessor.java | 10 ++ .../cassandra/tcm/ClusterMetadataService.java | 5 + .../cassandra/tcm/PaxosBackedProcessor.java | 5 + .../org/apache/cassandra/tcm/Processor.java | 4 + .../cassandra/tcm/ReconstructLogState.java | 86 +++++++++++++ .../apache/cassandra/tcm/RemoteProcessor.java | 24 ++++ .../tcm/StubClusterMetadataService.java | 6 + .../apache/cassandra/tcm/log/LocalLog.java | 7 +- .../apache/cassandra/tcm/log/LogReader.java | 49 +++++++ .../apache/cassandra/tcm/log/LogStorage.java | 12 ++ .../tcm/migration/GossipProcessor.java | 6 + .../test/log/CoordinatorPathTestBase.java | 5 + .../test/log/ReconstructEpochTest.java | 121 ++++++++++++++++++ .../distributed/test/log/TestProcessor.java | 6 + .../cassandra/tcm/log/LocalLogTest.java | 5 + 19 files changed, 391 insertions(+), 2 deletions(-) create mode 100644 src/java/org/apache/cassandra/tcm/ReconstructLogState.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java diff --git a/src/java/org/apache/cassandra/metrics/TCMMetrics.java b/src/java/org/apache/cassandra/metrics/TCMMetrics.java index 01061b725a1d..909280c5771e 100644 --- a/src/java/org/apache/cassandra/metrics/TCMMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TCMMetrics.java @@ -66,6 +66,7 @@ public class TCMMetrics public final Meter coordinatorBehindSchema; public final Meter coordinatorBehindPlacements; public final Gauge epochAwareDebounceTrackerSize; + public final Meter reconstructLogStateCall; private TCMMetrics() { @@ -127,6 +128,7 @@ private TCMMetrics() coordinatorBehindSchema = Metrics.meter(factory.createMetricName("CoordinatorBehindSchema")); coordinatorBehindPlacements = Metrics.meter(factory.createMetricName("CoordinatorBehindPlacements")); + reconstructLogStateCall = Metrics.meter(factory.createMetricName("ReconstructLogStateCall")); } public void recordCommitFailureLatency(long latency, TimeUnit timeUnit, boolean isRejection) diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index ff2cb4ef7a01..e6c3264c4893 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -130,6 +130,7 @@ import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.FetchCMSLog; import org.apache.cassandra.tcm.FetchPeerLog; +import org.apache.cassandra.tcm.ReconstructLogState; import org.apache.cassandra.tcm.migration.CMSInitializationResponse; import org.apache.cassandra.tcm.migration.Election; import org.apache.cassandra.tcm.migration.CMSInitializationRequest; @@ -300,6 +301,8 @@ public enum Verb TCM_DISCOVER_REQ (813, P0, rpcTimeout, INTERNAL_METADATA, () -> NoPayload.serializer, () -> Discovery.instance.requestHandler, TCM_DISCOVER_RSP ), TCM_FETCH_PEER_LOG_RSP (818, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), TCM_FETCH_PEER_LOG_REQ (819, P0, rpcTimeout, FETCH_LOG, () -> FetchPeerLog.serializer, () -> FetchPeerLog.Handler.instance, TCM_FETCH_PEER_LOG_RSP ), + TCM_RECONSTRUCT_EPOCH_RSP (820, P0, rpcTimeout, FETCH_LOG, MessageSerializers::logStateSerializer, () -> ResponseVerbHandler.instance ), + TCM_RECONSTRUCT_EPOCH_REQ (821, P0, rpcTimeout, FETCH_LOG, () -> ReconstructLogState.serializer, () -> ReconstructLogState.Handler.instance, TCM_FETCH_PEER_LOG_RSP ), INITIATE_DATA_MOVEMENTS_RSP (814, P1, rpcTimeout, MISC, () -> NoPayload.serializer, RESPONSE_HANDLER ), INITIATE_DATA_MOVEMENTS_REQ (815, P1, rpcTimeout, MISC, () -> DataMovement.serializer, () -> DataMovementVerbHandler.instance, INITIATE_DATA_MOVEMENTS_RSP ), diff --git a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java index 1e4a41c9ff1d..087fa51b8c33 100644 --- a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java +++ b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java @@ -163,6 +163,20 @@ public static LogState getLogState(Epoch since, boolean consistentFetch) return (consistentFetch ? serialLogReader : localLogReader).getLogState(since); } + /** + * Reconstructs the log state by returning a _consistent_ base snapshot of a start epoch, and + * a list of transformations between start and end. + * + * TODO: this is a rather expensive operation, and should be use sparingly. If we decide we need to + * rely on reconstructing arbitrary epochs during normal operation, we need to add a caching mechanism + * here. One more alternative is to keep a lazily-initialized AccordTopology table on CMS nodes for a + * number of recent epochs, and keep a node-local cache of this table on other nodes. + */ + public static LogState getLogState(Epoch start, Epoch end) + { + return serialLogReader.getLogState(start, end); + } + public static class DistributedTableLogReader implements LogReader { private final ConsistencyLevel consistencyLevel; @@ -200,6 +214,27 @@ public EntryHolder getEntries(Epoch since) throws IOException return entryHolder; } + public EntryHolder getEntries(Epoch since, Epoch until) throws IOException + { + // during gossip upgrade we have epoch = Long.MIN_VALUE + 1 (and the reverse partitioner doesn't support negative keys) + since = since.isBefore(Epoch.EMPTY) ? Epoch.EMPTY : since; + // note that we want all entries with epoch >= since - but since we use a reverse partitioner, we actually + // want all entries where the token is less than token(since) + UntypedResultSet resultSet = execute(String.format("SELECT epoch, kind, transformation, entry_id FROM %s.%s WHERE token(epoch) <= token(?) AND token(epoch) >= token(?)", + SchemaConstants.METADATA_KEYSPACE_NAME, TABLE_NAME), + consistencyLevel, since.getEpoch(), until.getEpoch()); + EntryHolder entryHolder = new EntryHolder(since); + for (UntypedResultSet.Row row : resultSet) + { + long entryId = row.getLong("entry_id"); + Epoch epoch = Epoch.create(row.getLong("epoch")); + Transformation.Kind kind = Transformation.Kind.fromId(row.getInt("kind")); + Transformation transform = kind.fromVersionedBytes(row.getBlob("transformation")); + entryHolder.add(new Entry(new Entry.Id(entryId), epoch, transform)); + } + return entryHolder; + } + @Override public MetadataSnapshots snapshots() { diff --git a/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java b/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java index da4e2e53ba88..6d126becc8c8 100644 --- a/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java @@ -191,7 +191,7 @@ private LogState toLogState(Epoch lastKnown) // We can use local log here since we always call this method only if local log is up-to-date: // in case of a successful commit, we apply against latest metadata locally before committing, // and in case of a rejection, we fetch latest entries to verify linearizability. - logState = log.getCommittedEntries(lastKnown); + logState = log.getLocalEntries(lastKnown); } return logState; diff --git a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java index df00253433ba..41efdb245b15 100644 --- a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java @@ -76,6 +76,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retry) return log.waitForHighestConsecutive(); } + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + return log.getLocalEntries(lowEpoch); + } + public static class InMemoryStorage implements LogStorage { private final List entries; @@ -130,6 +135,11 @@ public synchronized EntryHolder getEntries(Epoch since) { throw new IllegalStateException("We have overridden all callers of this method, it should never be called"); } + + public EntryHolder getEntries(Epoch since, Epoch until) + { + throw new IllegalStateException("We have overridden all callers of this method, it should never be called"); + } } public static class InMemoryMetadataSnapshots implements MetadataSnapshots diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java index 3e3d8389ae2f..b9063adf5fc8 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java @@ -902,6 +902,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return delegate().fetchLogAndWait(waitFor, retryPolicy); } + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + return delegate().reconstruct(lowEpoch, highEpoch, retryPolicy); + } + public String toString() { return "SwitchableProcessor{" + diff --git a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java index 45b5945cbc5c..dbaac24041a0 100644 --- a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java @@ -167,6 +167,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy throw new ReadTimeoutException(ConsistencyLevel.QUORUM, blockFor - collected.size(), blockFor, false); } + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + return DistributedMetadataLogKeyspace.getLogState(lowEpoch, highEpoch); + } + private static T unwrap(Promise promise) { if (!promise.isDone() || !promise.isSuccess()) diff --git a/src/java/org/apache/cassandra/tcm/Processor.java b/src/java/org/apache/cassandra/tcm/Processor.java index fdb4cf23bb4f..3d29b43375a0 100644 --- a/src/java/org/apache/cassandra/tcm/Processor.java +++ b/src/java/org/apache/cassandra/tcm/Processor.java @@ -24,6 +24,7 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.tcm.log.Entry; +import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.Clock; public interface Processor @@ -100,5 +101,8 @@ default ClusterMetadata fetchLogAndWait(Epoch waitFor) Retry.Deadline.after(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), new Retry.Jitter(TCMMetrics.instance.fetchLogRetries))); } + ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy); + + LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy); } diff --git a/src/java/org/apache/cassandra/tcm/ReconstructLogState.java b/src/java/org/apache/cassandra/tcm/ReconstructLogState.java new file mode 100644 index 000000000000..f6a60f070a04 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/ReconstructLogState.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm; + +import java.io.IOException; + +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.metrics.TCMMetrics; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; +import org.apache.cassandra.tcm.log.LogState; +import org.apache.cassandra.utils.FBUtilities; + +public class ReconstructLogState +{ + public static final Serializer serializer = new Serializer(); + + public final Epoch lowerBound; + public final Epoch higherBound; + + public ReconstructLogState(Epoch lowerBound, Epoch higherBound) + { + this.lowerBound = lowerBound; + this.higherBound = higherBound; + } + + static class Serializer implements IVersionedSerializer + { + + public void serialize(ReconstructLogState t, DataOutputPlus out, int version) throws IOException + { + Epoch.serializer.serialize(t.lowerBound, out); + Epoch.serializer.serialize(t.higherBound, out); + } + + public ReconstructLogState deserialize(DataInputPlus in, int version) throws IOException + { + Epoch lowerBound = Epoch.serializer.deserialize(in); + Epoch higherBound = Epoch.serializer.deserialize(in); + return new ReconstructLogState(lowerBound, higherBound); + } + + public long serializedSize(ReconstructLogState t, int version) + { + return Epoch.serializer.serializedSize(t.lowerBound) + + Epoch.serializer.serializedSize(t.higherBound); + } + } + + public static class Handler implements IVerbHandler + { + public static final Handler instance = new Handler(); + + public void doVerb(Message message) throws IOException + { + TCMMetrics.instance.reconstructLogStateCall.mark(); + ReconstructLogState request = message.payload; + + if (!ClusterMetadataService.instance().isCurrentMember(FBUtilities.getBroadcastAddressAndPort())) + throw new NotCMSException("This node is not in the CMS, can't generate a consistent log fetch response to " + message.from()); + + LogState result = DistributedMetadataLogKeyspace.getLogState(request.lowerBound, request.higherBound); + MessagingService.instance().send(message.responseWith(result), message.from()); + } + } +} diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index ed10512a8894..635be54cf9e2 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -151,6 +151,30 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } } + @Override + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + try + { + Promise request = new AsyncPromise<>(); + List candidates = new ArrayList<>(log.metadata().fullCMSMembers()); + sendWithCallbackAsync(request, + Verb.TCM_RECONSTRUCT_EPOCH_REQ, + new ReconstructLogState(lowEpoch, highEpoch), + new CandidateIterator(candidates), + new Retry.Backoff(TCMMetrics.instance.fetchLogRetries)); + return request.get(retryPolicy.remainingNanos(), TimeUnit.NANOSECONDS); + } + catch (InterruptedException e) + { + throw new RuntimeException("Can not reconstruct during shutdown", e); + } + catch (ExecutionException | TimeoutException e) + { + throw new RuntimeException("Could not reconstruct", e); + } + } + public static ClusterMetadata fetchLogAndWait(CandidateIterator candidateIterator, LocalLog log) { try diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index d3318ecb743f..8a69f9acc464 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -33,6 +33,7 @@ import org.apache.cassandra.tcm.Commit.Replicator; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; +import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.ownership.DataPlacements; import org.apache.cassandra.tcm.ownership.PlacementProvider; @@ -145,6 +146,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy { throw new UnsupportedOperationException(); } + + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + throw new UnsupportedOperationException(); + } } diff --git a/src/java/org/apache/cassandra/tcm/log/LocalLog.java b/src/java/org/apache/cassandra/tcm/log/LocalLog.java index 6e54e4657fa5..a84f0920e0e3 100644 --- a/src/java/org/apache/cassandra/tcm/log/LocalLog.java +++ b/src/java/org/apache/cassandra/tcm/log/LocalLog.java @@ -351,11 +351,16 @@ public Optional highestPending() } } - public LogState getCommittedEntries(Epoch since) + public LogState getLocalEntries(Epoch since) { return storage.getLogState(since, false); } + public LogState getLocalEntries(Epoch since, Epoch until) + { + return storage.getLogState(since, until); + } + public ClusterMetadata waitForHighestConsecutive() { runOnce(); diff --git a/src/java/org/apache/cassandra/tcm/log/LogReader.java b/src/java/org/apache/cassandra/tcm/log/LogReader.java index 688f1c76531b..b8d62e75876a 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogReader.java +++ b/src/java/org/apache/cassandra/tcm/log/LogReader.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Ordering; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.MetadataSnapshots; @@ -37,6 +38,7 @@ public interface LogReader * Gets all entries where epoch >= since - could be empty if since is a later epoch than the current highest seen */ EntryHolder getEntries(Epoch since) throws IOException; + EntryHolder getEntries(Epoch since, Epoch until) throws IOException; MetadataSnapshots snapshots(); /** @@ -117,6 +119,53 @@ else if (!allowSnapshots) } } + default LogState getLogState(Epoch start, Epoch end) + { + try + { + ClusterMetadata closestSnapshot = snapshots().getSnapshotBefore(start); + + // Snapshot could not be found, fetch enough epochs to reconstruct the start metadata + if (closestSnapshot == null) + { + closestSnapshot = new ClusterMetadata(DatabaseDescriptor.getPartitioner()); + ImmutableList.Builder entries = new ImmutableList.Builder<>(); + EntryHolder entryHolder = getEntries(Epoch.EMPTY, end); + for (Entry entry : entryHolder.entries) + { + if (entry.epoch.isAfter(start)) + entries.add(entry); + else + closestSnapshot = entry.transform.execute(closestSnapshot).success().metadata; + } + return new LogState(closestSnapshot, entries.build()); + } + else if (closestSnapshot.epoch.isBefore(start)) + { + ImmutableList.Builder entries = new ImmutableList.Builder<>(); + EntryHolder entryHolder = getEntries(closestSnapshot.epoch, end); + for (Entry entry : entryHolder.entries) + { + if (entry.epoch.isAfter(start)) + entries.add(entry); + else + closestSnapshot = entry.transform.execute(closestSnapshot).success().metadata; + } + return new LogState(closestSnapshot, entries.build()); + } + else + { + assert closestSnapshot.epoch.isEqualOrAfter(start) : String.format("Got %s, but requested snapshot of %s", closestSnapshot.epoch, start); + EntryHolder entryHolder = getEntries(closestSnapshot.epoch.nextEpoch(), end); + return new LogState(closestSnapshot, ImmutableList.copyOf(entryHolder.entries)); + } + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + class EntryHolder { SortedSet entries; diff --git a/src/java/org/apache/cassandra/tcm/log/LogStorage.java b/src/java/org/apache/cassandra/tcm/log/LogStorage.java index 3d5e681b168c..7772d7d07e70 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogStorage.java +++ b/src/java/org/apache/cassandra/tcm/log/LogStorage.java @@ -56,6 +56,12 @@ public LogState getLogState(Epoch startEpoch, boolean allowSnapshots) return LogState.EMPTY; } + @Override + public LogState getLogState(Epoch start, Epoch end) + { + return LogState.EMPTY; + } + @Override public LogState getPersistedLogState() { @@ -68,6 +74,12 @@ public EntryHolder getEntries(Epoch since) return null; } + @Override + public EntryHolder getEntries(Epoch since, Epoch until) + { + return null; + } + @Override public MetadataSnapshots snapshots() { diff --git a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java index 0cb654f19a49..6c02318f4806 100644 --- a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java +++ b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java @@ -25,6 +25,7 @@ import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.log.LogState; public class GossipProcessor implements Processor { @@ -39,4 +40,9 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy { return ClusterMetadata.current(); } + + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + throw new IllegalStateException("Can't reconstruct log state when running in gossip mode. Enable the ClusterMetadataService with `nodetool addtocms`."); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java index cfa0ec4be7ca..2bf1047b5587 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java @@ -752,6 +752,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy log.append(logState); return log.waitForHighestConsecutive(); } + + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + return log.getLocalEntries(lowEpoch, highEpoch); + } }, (a,b) -> {}, false); diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java new file mode 100644 index 000000000000..5166ae89981a --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.log; + +import java.util.Iterator; +import java.util.concurrent.TimeUnit; + +import org.junit.Assert; +import org.junit.Test; + +import com.codahale.metrics.Meter; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.metrics.TCMMetrics; +import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.Retry; +import org.apache.cassandra.tcm.log.Entry; +import org.apache.cassandra.tcm.log.LogState; +import org.apache.cassandra.utils.Clock; + +public class ReconstructEpochTest extends TestBaseImpl +{ + @Test + public void logReaderTest() throws Exception + { + try (Cluster cluster = init(builder().withNodes(2).start())) + { + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (id int primary key)")); + for (int i = 0; i < 30; i++) + { + if (i > 0 && i % 5 == 0) + cluster.get(1).runOnInstance(() -> ClusterMetadataService.instance().triggerSnapshot()); + cluster.schemaChange(withKeyspace("ALTER TABLE %s.tbl WITH comment = '" + i + "'")); + } + + cluster.get(1).runOnInstance(() -> { + for (int[] cfg : new int[][]{ new int[]{ 6, 9 }, + new int[]{ 2, 20 }, + new int[]{ 5, 5 }, + new int[]{ 15, 20 }}) + { + int start = cfg[0]; + int end = cfg[1]; + LogState logState = DistributedMetadataLogKeyspace.getLogState(Epoch.create(start), Epoch.create(end)); + Assert.assertEquals(start, logState.baseState.epoch.getEpoch()); + Iterator iter = logState.entries.iterator(); + for (int i = start + 1; i <= end; i++) + Assert.assertEquals(i, iter.next().epoch.getEpoch()); + } + }); + + + cluster.get(2).runOnInstance(() -> { + for (int[] cfg : new int[][]{ new int[]{ 6, 9 }, + new int[]{ 2, 20 }, + new int[]{ 5, 5 }, + new int[]{ 15, 20 }}) + { + int start = cfg[0]; + int end = cfg[1]; + LogState logState = ClusterMetadataService.instance() + .processor() + .reconstruct(Epoch.create(start), + Epoch.create(end), + unsafeRetryIndefinitely()); + + Assert.assertEquals(start, logState.baseState.epoch.getEpoch()); + Iterator iter = logState.entries.iterator(); + for (int i = start + 1; i <= end; i++) + Assert.assertEquals(i, iter.next().epoch.getEpoch()); + } + }); + } + } + + private static Retry.Deadline unsafeRetryIndefinitely() + { + long timeoutNanos = DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS); + Meter retryMeter = TCMMetrics.instance.commitRetries; + return new Retry.Deadline(Clock.Global.nanoTime() + timeoutNanos, + new Retry.Jitter(retryMeter)) + { + @Override + public boolean reachedMax() + { + return false; + } + + @Override + public long remainingNanos() + { + return timeoutNanos; + } + + public String toString() + { + return String.format("RetryIndefinitely{tries=%d}", currentTries()); + } + }; + } + +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java index f5fabfb4acd0..6f359af057c9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java @@ -32,6 +32,7 @@ import org.apache.cassandra.tcm.Retry; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.log.Entry; +import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.concurrent.WaitQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,6 +70,11 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return delegate.fetchLogAndWait(waitFor, retryPolicy); } + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + return delegate.reconstruct(lowEpoch, highEpoch, retryPolicy); + } + protected void waitIfPaused() { if (isPaused()) diff --git a/test/unit/org/apache/cassandra/tcm/log/LocalLogTest.java b/test/unit/org/apache/cassandra/tcm/log/LocalLogTest.java index fbdafb131d63..20bb5c79b06c 100644 --- a/test/unit/org/apache/cassandra/tcm/log/LocalLogTest.java +++ b/test/unit/org/apache/cassandra/tcm/log/LocalLogTest.java @@ -145,6 +145,11 @@ public EntryHolder getEntries(Epoch since) throws IOException return new EntryHolder(since); } + public EntryHolder getEntries(Epoch since, Epoch until) throws IOException + { + return new EntryHolder(since); + } + @Override public MetadataSnapshots snapshots() { From 66c50d796a5dfc87e4d089818904f386e504ba80 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 26 Aug 2024 12:54:54 +0200 Subject: [PATCH 130/340] Revert acccord module to absolute path --- .gitmodules | 2 +- src/java/org/apache/cassandra/tcm/Retry.java | 30 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 60a9510e7ad5..616dacf610a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "modules/accord"] path = modules/accord - url = ../cassandra-accord.git + url = https://github.com/apache/cassandra-accord.git branch = trunk diff --git a/src/java/org/apache/cassandra/tcm/Retry.java b/src/java/org/apache/cassandra/tcm/Retry.java index 3277531444a6..bf2e0fbf2be1 100644 --- a/src/java/org/apache/cassandra/tcm/Retry.java +++ b/src/java/org/apache/cassandra/tcm/Retry.java @@ -27,6 +27,7 @@ import org.apache.cassandra.utils.Clock; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; +import static org.apache.cassandra.tcm.Retry.Jitter.MAX_JITTER_MS; public abstract class Retry { @@ -160,6 +161,35 @@ public static Deadline after(long timeoutNanos, Retry delegate) return new Deadline(Clock.Global.nanoTime() + timeoutNanos, delegate); } + /** + * Since we are using message expiration for communicating timeouts to CMS nodes, we have to be careful not + * to overflow the long, since messaging is using only 32 bits for deadlines. To achieve that, we are + * giving `timeoutNanos` every time we retry, but will retry indefinitely. + */ + public static Deadline retryIndefinitely(long timeoutNanos, Meter retryMeter) + { + return new Deadline(Clock.Global.nanoTime() + timeoutNanos, + new Retry.Jitter(Integer.MAX_VALUE, MAX_JITTER_MS, new Random(), retryMeter)) + { + @Override + public boolean reachedMax() + { + return false; + } + + @Override + public long remainingNanos() + { + return timeoutNanos; + } + + public String toString() + { + return String.format("RetryIndefinitely{tries=%d}", currentTries()); + } + }; + } + @Override public boolean reachedMax() { From 788caeeef30dbfa8494422926b8df97409b983b6 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 26 Aug 2024 12:51:29 +0200 Subject: [PATCH 131/340] Switch to infinite loop executor instead of a while-loop thread. Patch by Alex Petrov; reviewed by David Capwell for CASSANDRA-19864 --- .../service/accord/AccordJournal.java | 142 ++++++++++-------- 1 file changed, 83 insertions(+), 59 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 1e84338f5fe3..09bee3464112 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -52,6 +52,8 @@ import accord.utils.Invariants; import org.agrona.collections.Long2ObjectHashMap; import org.agrona.collections.LongArrayList; +import org.apache.cassandra.concurrent.InfiniteLoopExecutor; +import org.apache.cassandra.concurrent.Interruptible; import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; @@ -105,6 +107,9 @@ import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; +import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; @@ -165,7 +170,7 @@ public void shutdown() { Invariants.checkState(status == Status.STARTED); status = Status.TERMINATING; - delayedRequestProcessor.runOnce(); + delayedRequestProcessor.shutdown(); journal.shutdown(); status = Status.TERMINATED; } @@ -625,12 +630,18 @@ private static int msVersion(int version) * Handling topology changes / epoch shift */ - private final class DelayedRequestProcessor extends Thread + private class DelayedRequestProcessor implements Interruptible.Task { private final ManyToOneConcurrentLinkedQueue delayedRequests = new ManyToOneConcurrentLinkedQueue<>(); private final LongArrayList waitForEpochs = new LongArrayList(); private final Long2ObjectHashMap> byEpoch = new Long2ObjectHashMap<>(); private final AtomicReference signal = new AtomicReference<>(Condition.newOneTimeCondition()); + private volatile Interruptible executor; + + public void start() + { + executor = executorFactory().infiniteLoop("AccordJournal-delayed-request-processor", this::run, SAFE, InfiniteLoopExecutor.Daemon.NON_DAEMON, InfiniteLoopExecutor.Interrupts.SYNCHRONIZED); + } private void delay(RequestContext requestContext) { @@ -643,81 +654,94 @@ private void runOnce() signal.get().signal(); } - public void run() + @Override + public void run(Interruptible.State state) { - while (!Thread.currentThread().isInterrupted() && isRunnable(status)) + if (state != NORMAL || Thread.currentThread().isInterrupted() || !isRunnable(status)) + return; + + try { - try + Condition signal = Condition.newOneTimeCondition(); + this.signal.set(signal); + // First, poll delayed requests, put them into by epoch + while (!delayedRequests.isEmpty()) { - Condition signal = Condition.newOneTimeCondition(); - this.signal.set(signal); - // First, poll delayed requests, put them into by epoch - while (!delayedRequests.isEmpty()) + RequestContext context = delayedRequests.poll(); + long waitForEpoch = context.waitForEpoch; + + List l = byEpoch.computeIfAbsent(waitForEpoch, (ignore) -> new ArrayList<>()); + if (l.isEmpty()) + waitForEpochs.pushLong(waitForEpoch); + l.add(context); + BiConsumer withEpochCallback = new BiConsumer<>() { - RequestContext context = delayedRequests.poll(); - long waitForEpoch = context.waitForEpoch; - - List l = byEpoch.computeIfAbsent(waitForEpoch, (ignore) -> new ArrayList<>()); - if (l.isEmpty()) - waitForEpochs.pushLong(waitForEpoch); - l.add(context); - BiConsumer withEpochCallback = new BiConsumer<>() + @Override + public void accept(Void unused, Throwable withEpochFailure) { - @Override - public void accept(Void unused, Throwable withEpochFailure) + if (withEpochFailure != null) { - if (withEpochFailure != null) + // Nothing to do but keep waiting + if (withEpochFailure instanceof Timeout) { - // Nothing to do but keep waiting - if (withEpochFailure instanceof Timeout) - { - node.withEpoch(waitForEpoch, this); - return; - } - else - throw new RuntimeException(withEpochFailure); + node.withEpoch(waitForEpoch, this); + return; } - runOnce(); + else + throw new RuntimeException(withEpochFailure); } - }; - node.withEpoch(waitForEpoch, withEpochCallback); - } + runOnce(); + } + }; + node.withEpoch(waitForEpoch, withEpochCallback); + } - // Next, process all delayed epochs - for (int i = 0; i < waitForEpochs.size(); i++) + // Next, process all delayed epochs + for (int i = 0; i < waitForEpochs.size(); i++) + { + long epoch = waitForEpochs.getLong(i); + if (node.topology().hasEpoch(epoch)) { - long epoch = waitForEpochs.getLong(i); - if (node.topology().hasEpoch(epoch)) + List requests = byEpoch.remove(epoch); + assert requests != null : String.format("%s %s (%d)", byEpoch, waitForEpochs, epoch); + for (RequestContext request : requests) { - List requests = byEpoch.remove(epoch); - assert requests != null : String.format("%s %s (%d)", byEpoch, waitForEpochs, epoch); - for (RequestContext request : requests) + try { - try - { - request.process(node, endpointMapper); - } - catch (Throwable t) - { - logger.error(String.format("Caught an exception while processing a delayed request %s", request), t); - } + request.process(node, endpointMapper); + } + catch (Throwable t) + { + logger.error("Caught an exception while processing a delayed request {}", request, t); } } } + } - waitForEpochs.removeIfLong(epoch -> !byEpoch.containsKey(epoch)); + waitForEpochs.removeIfLong(epoch -> !byEpoch.containsKey(epoch)); - signal.await(); - } - catch (InterruptedException e) - { - logger.info("Delayed request processor thread interrupted. Shutting down."); - return; - } - catch (Throwable t) - { - logger.error("Caught an exception in delayed processor", t); - } + signal.await(); + } + catch (InterruptedException e) + { + logger.info("Delayed request processor thread interrupted. Shutting down."); + } + catch (Throwable t) + { + logger.error("Caught an exception in delayed processor", t); + } + } + + private void shutdown() + { + executor.shutdown(); + try + { + executor.awaitTermination(1, TimeUnit.MINUTES); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); } } } From b757cb873083176ab7c16a5a620f7e2bce71202e Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 22 Jul 2024 10:11:42 +0200 Subject: [PATCH 132/340] Add size to the segment index for safer journal reads Patch by Alex Petrov; reviewed by Marcus Eriksson for CASSANDRA-19871 --- modules/accord | 2 +- .../cassandra/journal/ActiveSegment.java | 22 ++-- .../org/apache/cassandra/journal/Flusher.java | 10 +- .../cassandra/journal/InMemoryIndex.java | 58 +++++---- .../org/apache/cassandra/journal/Index.java | 50 +++++++- .../org/apache/cassandra/journal/Journal.java | 35 +++-- .../apache/cassandra/journal/OnDiskIndex.java | 115 +++++++++++++---- .../org/apache/cassandra/journal/Params.java | 5 + .../org/apache/cassandra/journal/Segment.java | 20 +-- .../cassandra/journal/SegmentWriter.java | 5 +- .../cassandra/journal/StaticSegment.java | 4 +- .../accord/AccordIncrementalRepairTest.java | 3 + .../apache/cassandra/journal/IndexTest.java | 120 ++++++++++++------ 13 files changed, 318 insertions(+), 131 deletions(-) diff --git a/modules/accord b/modules/accord index 129a4862df43..178952b41f05 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 129a4862df43fdc5893687922a77bb0288f8cb83 +Subproject commit 178952b41f05bfa307aef03dcc013e37fb6230b4 diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index ebbd672b8057..a815d231997d 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -125,9 +125,9 @@ StaticSegment asStatic() * Expects the caller to acquire the ref to the segment and the record to exist. */ @Override - boolean read(int offset, EntrySerializer.EntryHolder into) + boolean read(int offset, int size, EntrySerializer.EntryHolder into) { - ByteBuffer duplicate = buffer.duplicate().position(offset).limit(buffer.capacity()); + ByteBuffer duplicate = buffer.duplicate().position(offset).limit(offset + size); try { EntrySerializer.read(into, keySupport, duplicate, descriptor.userVersion); @@ -394,7 +394,7 @@ Allocation allocate(int entrySize, Set hosts) opGroup.close(); return null; } - return new Allocation(opGroup, buffer.duplicate().position(position).limit(position + totalSize)); + return new Allocation(opGroup, buffer.duplicate().position(position).limit(position + totalSize), totalSize); } catch (Throwable t) { @@ -431,13 +431,15 @@ final class Allocation { private final OpOrder.Group appendOp; private final ByteBuffer buffer; - private final int position; + private final int start; + private final int length; - Allocation(OpOrder.Group appendOp, ByteBuffer buffer) + Allocation(OpOrder.Group appendOp, ByteBuffer buffer, int length) { this.appendOp = appendOp; this.buffer = buffer; - this.position = buffer.position(); + this.start = buffer.position(); + this.length = length; } RecordPointer write(K id, ByteBuffer record, Set hosts) @@ -445,9 +447,9 @@ RecordPointer write(K id, ByteBuffer record, Set hosts) try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) { EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); - index.update(id, position); + index.update(id, start, length); metadata.update(hosts); - return new RecordPointer(descriptor.timestamp, position); + return new RecordPointer(descriptor.timestamp, start); } catch (IOException e) { @@ -465,7 +467,7 @@ void writeInternal(K id, ByteBuffer record, Set hosts) try (BufferedDataOutputStreamPlus out = new DataOutputBufferFixed(buffer)) { EntrySerializer.write(id, record, hosts, keySupport, out, descriptor.userVersion); - index.update(id, position); + index.update(id, start, length); metadata.update(hosts); } catch (IOException e) @@ -482,7 +484,7 @@ void awaitFlush(Timer waitingOnFlush) { try (Timer.Context ignored = waitingOnFlush.time()) { - waitForFlush(position); + waitForFlush(start); } } } diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index 52cf89f2fca1..a0aa4ef11730 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -100,11 +100,15 @@ void start() flushExecutor = executorFactory().infiniteLoop(flushExecutorName, new FlushRunnable(preciseTime), SAFE, NON_DAEMON, SYNCHRONIZED); } - void shutdown() + void shutdown() throws InterruptedException { flushExecutor.shutdown(); + flushExecutor.awaitTermination(1, MINUTES); if (fsyncExecutor != null) - fsyncExecutor.shutdown(); + { + fsyncExecutor.shutdownNow(); // `now` to interrupt potentially parked runnable + fsyncExecutor.awaitTermination(1, MINUTES); + } } @Simulate(with={MONITORS,GLOBAL_CLOCK,LOCK_SUPPORT}) @@ -525,4 +529,4 @@ public interface Callbacks void onFlushFailed(Throwable cause); } -} \ No newline at end of file +} diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java index 1ff4a28d7a1b..5417bfea408c 100644 --- a/src/java/org/apache/cassandra/journal/InMemoryIndex.java +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -34,9 +34,9 @@ */ final class InMemoryIndex extends Index { - private static final int[] EMPTY = new int[0]; + private static final long[] EMPTY = new long[0]; - private final NavigableMap index; + private final NavigableMap index; // CSLM#lastKey() can be costly, so track lastId separately; // TODO: this could easily be premature and misguided; @@ -48,29 +48,31 @@ static InMemoryIndex create(KeySupport keySupport) return new InMemoryIndex<>(keySupport, new ConcurrentSkipListMap<>(keySupport)); } - private InMemoryIndex(KeySupport keySupport, NavigableMap index) + private InMemoryIndex(KeySupport keySupport, NavigableMap index) { super(keySupport); this.index = index; this.lastId = new AtomicReference<>(); } - public void update(K id, int offset) + public void update(K id, int offset, int size) { - index.merge(id, new int[] { offset }, (current, value) -> - { - int idx = Arrays.binarySearch(current, offset); - if (idx >= 0) // repeat update() call; shouldn't occur, but we might as well allow this NOOP - return current; - - /* Merge the new offset with existing values */ - int pos = -idx - 1; - int[] merged = new int[current.length + 1]; - System.arraycopy(current, 0, merged, 0, pos); - merged[pos] = offset; - System.arraycopy(current, pos, merged, pos + 1, current.length - pos); - return merged; - }); + long currentOffsetAndSize = composeOffsetAndSize(offset, size); + index.merge(id, new long[] { currentOffsetAndSize }, + (current, value) -> + { + int idx = Arrays.binarySearch(current, currentOffsetAndSize); + if (idx >= 0) // repeat update() call; shouldn't occur, but we might as well allow this NOOP + return current; + + /* Merge the new offset with existing values */ + int pos = -idx - 1; + long[] merged = new long[current.length + 1]; + System.arraycopy(current, 0, merged, 0, pos); + merged[pos] = currentOffsetAndSize; + System.arraycopy(current, pos, merged, pos + 1, current.length - pos); + return merged; + }); lastId.accumulateAndGet(id, (current, update) -> (null == current || keySupport.compare(current, update) < 0) ? update : current); } @@ -90,20 +92,20 @@ public K lastId() } @Override - public int[] lookUp(K id) + public long[] lookUp(K id) { return mayContainId(id) ? index.getOrDefault(id, EMPTY) : EMPTY; } @Override - public int lookUpFirst(K id) + public long lookUpFirst(K id) { - int[] offests = lookUp(id); - return offests.length == 0 ? -1 : offests[0]; + long[] offsets = lookUp(id); + return offsets.length == 0 ? -1 : offsets[0]; } @Override - int[] lookUpAll(K id) + long[] lookUpAll(K id) { return lookUp(id); } @@ -128,10 +130,18 @@ public void persist(Descriptor descriptor) static InMemoryIndex rebuild(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) { InMemoryIndex index = new InMemoryIndex<>(keySupport, new TreeMap<>(keySupport)); + try (StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, keySupport, fsyncedLimit)) { + int last = -1; while (reader.advance()) - index.update(reader.id(), reader.offset()); + { + int current = reader.offset(); + if (last >= 0) + index.update(reader.id(), last, current); + last = current; + } + } return index; } diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java index cd2b69f2e426..f42a42d5edda 100644 --- a/src/java/org/apache/cassandra/journal/Index.java +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -41,7 +41,7 @@ abstract class Index implements Closeable * * @return the found offsets into the segment, if any; can be empty */ - abstract int[] lookUp(K id); + abstract long[] lookUp(K id); /** * Look up offsets by id. It's possible, due to retries, for a segment @@ -50,8 +50,9 @@ abstract class Index implements Closeable * * @return the first offset into the segment, or -1 is none were found */ - abstract int lookUpFirst(K id); - abstract int[] lookUpAll(K id); + abstract long lookUpFirst(K id); + + abstract long[] lookUpAll(K id); /** * @return the first (smallest) id in the index @@ -83,4 +84,47 @@ boolean mayContainIds(Iterable ids) { return any(ids, this::mayContainId); } + + interface IndexIterator + { + boolean hasNext(); + K currentKey(); + int currentOffset(); + int currentSize(); + void next(); + } + + /** + * Helper methods + */ + + public static int readOffset(long record) + { + return (int) (0xffffffffL & (record >> 32)); + } + + public static long writeOffset(long record, int offset) + { + record &= 0x00000000ffffffffL; //unset all higher bits + record |= ((long) offset) << 32; + return record; + } + + public static int readSize(long record) + { + return (int) (0xffffffffL & record); + } + + public static long writeSize(long record, int size) + { + record &= 0xffffffff00000000L; // unset all lower bits + record |= (long) size; + return record; + } + + public static long composeOffsetAndSize(int offset, int size) + { + return writeSize(writeOffset(0, offset), size); + } + } diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 37fd1fa92bee..db86106d42c0 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -244,13 +244,22 @@ public boolean isTerminated() public void shutdown() { - allocator.shutdown(); - //compactor.stop(); - //invalidator.stop(); - flusher.shutdown(); - closer.shutdown(); - closeAllSegments(); - metrics.deregister(); + try + { + allocator.shutdown(); + allocator.awaitTermination(1, TimeUnit.MINUTES); + //compactor.stop(); + //invalidator.stop(); + flusher.shutdown(); + closer.shutdown(); + closer.awaitTermination(1, TimeUnit.MINUTES); + closeAllSegments(); + metrics.deregister(); + } + catch (InterruptedException e) + { + logger.error("Could not shutdown journal", e); + } } @Override @@ -274,7 +283,7 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted * * @return deserialized record if present, null otherwise */ - public V read(long segmentTimestamp, int offset) + public V read(long segmentTimestamp, int offset, int size) { try (ReferencedSegment referenced = selectAndReference(segmentTimestamp)) { @@ -283,7 +292,7 @@ public V read(long segmentTimestamp, int offset) return null; EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - segment.read(offset, holder); + segment.read(offset, size, holder); try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) { @@ -383,11 +392,13 @@ public V readFirstMatching(K id, Predicate condition) { for (Segment segment : segments.all()) { - int[] offsets = segment.index().lookUp(id); - for (int offset : offsets) + long[] offsets = segment.index().lookUp(id); + for (long offsetAndSize : offsets) { + int offset = Index.readOffset(offsetAndSize); + int size = Index.readSize(offsetAndSize); holder.clear(); - if (segment.read(offset, holder)) + if (segment.read(offset, size, holder)) { try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) { diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java index 4cbb3d4e5772..ba769d6163b5 100644 --- a/src/java/org/apache/cassandra/journal/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -28,6 +28,7 @@ import javax.annotation.Nullable; +import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; @@ -36,6 +37,7 @@ import static org.apache.cassandra.journal.Journal.validateCRC; import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; +import static org.apache.cassandra.utils.FBUtilities.updateChecksumLong; /** * An on-disk (memory-mapped) index for a completed flushed segment. @@ -44,10 +46,10 @@ */ final class OnDiskIndex extends Index { - private static final int[] EMPTY = new int[0]; + private static final long[] EMPTY = new long[0]; private static final int FILE_PREFIX_SIZE = 4 + 4; // count of entries, CRC - private static final int VALUE_SIZE = 4; // int offset + private static final int VALUE_SIZE = Long.BYTES; // int offset + int size private final int KEY_SIZE; private final int ENTRY_SIZE; @@ -146,7 +148,7 @@ void validate() throws IOException } static void write( - NavigableMap entries, KeySupport keySupport, DataOutputPlus out, int userVersion) throws IOException + NavigableMap entries, KeySupport keySupport, DataOutputPlus out, int userVersion) throws IOException { CRC32 crc = Crc.crc32(); @@ -158,16 +160,25 @@ static void write( updateChecksumInt(crc, size); out.writeInt((int) crc.getValue()); - for (Map.Entry entry : entries.entrySet()) + for (Map.Entry entry : entries.entrySet()) { - for (int offset : entry.getValue()) + long prev = -1; + for (long offsetAndSize : entry.getValue()) { K key = entry.getKey(); keySupport.serialize(key, out, userVersion); keySupport.updateChecksum(crc, key, userVersion); - out.writeInt(offset); - updateChecksumInt(crc, offset); + if (prev != -1) + { + long tmp = prev; + Invariants.checkState(readOffset(offsetAndSize) > readOffset(prev), + () -> String.format("Offsets should be strictly monotonic, but found %d following %d", + readOffset(offsetAndSize), readOffset(tmp))); + } + out.writeLong(offsetAndSize); + updateChecksumLong(crc, offsetAndSize); + prev = offsetAndSize; } } @@ -189,7 +200,7 @@ public K lastId() } @Override - public int[] lookUp(K id) + public long[] lookUp(K id) { if (!mayContainId(id)) return EMPTY; @@ -198,7 +209,7 @@ public int[] lookUp(K id) if (keyIndex < 0) return EMPTY; - int[] offsets = new int[] { offsetAtIndex(keyIndex) }; + long[] records = new long[] { recordAtIndex(keyIndex) }; /* * Duplicate entries are possible within one segment (but should be rare). @@ -207,27 +218,27 @@ public int[] lookUp(K id) for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) { - int length = offsets.length; - offsets = Arrays.copyOf(offsets, length + 1); - offsets[length] = offsetAtIndex(i); + int length = records.length; + records = Arrays.copyOf(records, length + 1); + records[length] = recordAtIndex(i); } for (int i = keyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) { - int length = offsets.length; - offsets = Arrays.copyOf(offsets, length + 1); - offsets[length] = offsetAtIndex(i); + int length = records.length; + records = Arrays.copyOf(records, length + 1); + records[length] = recordAtIndex(i); } - Arrays.sort(offsets); - return offsets; + Arrays.sort(records); + return records; } @Override - public int lookUpFirst(K id) + public long lookUpFirst(K id) { if (!mayContainId(id)) - return -1; + return -1L; int keyIndex = binarySearch(id); @@ -238,14 +249,14 @@ public int lookUpFirst(K id) for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) keyIndex = i; - return keyIndex < 0 ? -1 : offsetAtIndex(keyIndex); + return keyIndex < 0 ? -1 : recordAtIndex(keyIndex); } @Override - public int[] lookUpAll(K id) + public long[] lookUpAll(K id) { if (!mayContainId(id)) - return new int[0]; + return new long[0]; int start = binarySearch(id); int firstKeyIndex = start; @@ -254,31 +265,81 @@ public int[] lookUpAll(K id) firstKeyIndex = i; if (firstKeyIndex < 0) - return new int[0]; + return new long[0]; int lastKeyIndex = start; for (int i = lastKeyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) lastKeyIndex = i; - int[] all = new int[lastKeyIndex - firstKeyIndex + 1]; + long[] all = new long[lastKeyIndex - firstKeyIndex + 1]; int idx = firstKeyIndex; for (int i = 0; i < all.length; i++) { - all[i] = offsetAtIndex(idx); + all[i] = recordAtIndex(idx); idx++; } return all; } + public IndexIterator iterator() + { + return new IndexIteratorImpl(); + } + + private class IndexIteratorImpl implements IndexIterator + { + int currentIdx; + K currentKey; + int currentOffset; + int currentSize; + + IndexIteratorImpl() + { + currentIdx = -1; + } + + @Override + public boolean hasNext() + { + return currentIdx < (entryCount - 1); + } + + @Override + public K currentKey() + { + return currentKey; + } + + @Override + public int currentOffset() + { + return currentOffset; + } + + @Override + public int currentSize() + { + return currentSize; + } + + public void next() + { + currentIdx++; + currentKey = keyAtIndex(currentIdx); + long record = recordAtIndex(currentIdx); + currentOffset = Index.readOffset(record); + currentSize = Index.readSize(record); + } + } private K keyAtIndex(int index) { return keySupport.deserialize(buffer, FILE_PREFIX_SIZE + index * ENTRY_SIZE, descriptor.userVersion); } - private int offsetAtIndex(int index) + private long recordAtIndex(int index) { - return buffer.getInt(FILE_PREFIX_SIZE + index * ENTRY_SIZE + KEY_SIZE); + return buffer.getLong(FILE_PREFIX_SIZE + index * ENTRY_SIZE + KEY_SIZE); } /* diff --git a/src/java/org/apache/cassandra/journal/Params.java b/src/java/org/apache/cassandra/journal/Params.java index 46b382ea278c..17e719ce5d7f 100644 --- a/src/java/org/apache/cassandra/journal/Params.java +++ b/src/java/org/apache/cassandra/journal/Params.java @@ -43,6 +43,11 @@ enum FailurePolicy { STOP, STOP_JOURNAL, IGNORE, DIE } */ int flushPeriodMillis(); + default int flushPeriodNanos() + { + return flushPeriodMillis() * 1_000_000; + } + /** * @return milliseconds to block writes for while waiting for a slow disk flush to complete * when in {@link FlushMode#PERIODIC} mode diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index 6700cb144579..e548c521286b 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -58,12 +58,14 @@ abstract class Segment implements Closeable, RefCounted> boolean readFirst(K id, RecordConsumer consumer) { - int offset = index().lookUpFirst(id); - if (offset == -1) + long offsetAndSize = index().lookUpFirst(id); + if (offsetAndSize == -1) return false; EntrySerializer.EntryHolder into = new EntrySerializer.EntryHolder<>(); - if (read(offset, into)) + int offset = Index.readOffset(offsetAndSize); + int size = Index.readSize(offset); + if (read(offset, size, into)) { Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); consumer.accept(descriptor.timestamp, offset, id, into.value, into.hosts, descriptor.userVersion); @@ -74,8 +76,8 @@ boolean readFirst(K id, RecordConsumer consumer) boolean readFirst(K id, EntrySerializer.EntryHolder into) { - int offset = index().lookUpFirst(id); - if (offset == -1 || !read(offset, into)) + long offsetAndSize = index().lookUpFirst(id); + if (offsetAndSize == -1 || !read(Index.readOffset(offsetAndSize), Index.readSize(offsetAndSize), into)) return false; Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); return true; @@ -83,14 +85,16 @@ boolean readFirst(K id, EntrySerializer.EntryHolder into) void readAll(K id, EntrySerializer.EntryHolder into, Runnable onEntry) { - int[] all = index().lookUpAll(id); + long[] all = index().lookUpAll(id); for (int i = 0; i < all.length; i++) { - Invariants.checkState(read(all[i], into), "Read should always return true"); + int offset = Index.readOffset(all[i]); + int size = Index.readSize(all[i]); + Invariants.checkState(read(offset, size, into), "Read should always return true"); onEntry.run(); } } - abstract boolean read(int offset, EntrySerializer.EntryHolder into); + abstract boolean read(int offset, int size, EntrySerializer.EntryHolder into); } diff --git a/src/java/org/apache/cassandra/journal/SegmentWriter.java b/src/java/org/apache/cassandra/journal/SegmentWriter.java index b8436aed6688..09797c363ed6 100644 --- a/src/java/org/apache/cassandra/journal/SegmentWriter.java +++ b/src/java/org/apache/cassandra/journal/SegmentWriter.java @@ -70,10 +70,9 @@ int write(K key, ByteBuffer record, Set hosts) int position = position(); try { - index.update(key, position); - metadata.update(hosts); - EntrySerializer.write(key, record, hosts, keySupport, trackedOut, descriptor.userVersion); + index.update(key, position, position() - position); + metadata.update(hosts); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index a25fab04836f..f63701771ca9 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -205,9 +205,9 @@ StaticSegment asStatic() * Expects the record to have been written at this offset, but potentially not flushed and lost. */ @Override - boolean read(int offset, EntrySerializer.EntryHolder into) + boolean read(int offset, int size, EntrySerializer.EntryHolder into) { - ByteBuffer duplicate = buffer.duplicate().position(offset); + ByteBuffer duplicate = buffer.duplicate().position(offset).limit(offset + size); try (DataInputBuffer in = new DataInputBuffer(duplicate, false)) { return EntrySerializer.tryRead(into, keySupport, duplicate, in, syncedOffsets.syncedOffset(), descriptor.userVersion); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index 452d9dc53a5c..69d2622830e3 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -248,6 +248,9 @@ private static TxnId awaitLocalApplyOnKey(PartitionKey key) return txnId; } + // TODO (required): After conversation with Ariel: it's a known issue that I am not sure we need to fix now. + // The problem is that we don't flush after Accord repair, but before data repair when running incremental + // repair so it doesn't see the repaired sstables it is checking for. @Test public void txnRepairTest() throws Throwable { diff --git a/test/unit/org/apache/cassandra/journal/IndexTest.java b/test/unit/org/apache/cassandra/journal/IndexTest.java index 8301ea988e31..5e7314a33860 100644 --- a/test/unit/org/apache/cassandra/journal/IndexTest.java +++ b/test/unit/org/apache/cassandra/journal/IndexTest.java @@ -19,21 +19,28 @@ import java.io.IOException; import java.nio.file.Files; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Random; +import java.util.TreeMap; import java.util.stream.Collectors; import com.google.common.collect.Maps; +import org.junit.Assert; import org.junit.Test; import org.agrona.collections.IntHashSet; import org.apache.cassandra.io.util.File; import org.apache.cassandra.utils.Generators; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; import org.quicktheories.core.Gen; import org.quicktheories.impl.Constraint; +import static org.apache.cassandra.journal.Index.composeOffsetAndSize; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertArrayEquals; @@ -44,7 +51,7 @@ public class IndexTest { - private static final int[] EMPTY = new int[0]; + private static final long[] EMPTY = new long[0]; @Test public void testInMemoryIndexBasics() @@ -70,17 +77,18 @@ public void testInMemoryIndexBasics() int val32 = 3200; int val33 = 3300; - index.update(key1, val11); - index.update(key2, val21); - index.update(key2, val22); - index.update(key3, val31); - index.update(key3, val32); - index.update(key3, val33); + index.update(key1, val11, 1); + index.update(key2, val21, 2); + index.update(key2, val22, 3); + index.update(key3, val31, 4); + index.update(key3, val32, 5); + index.update(key3, val33, 6); assertArrayEquals(EMPTY, index.lookUp(key0)); - assertArrayEquals(new int[] { val11 }, index.lookUp(key1)); - assertArrayEquals(new int[] { val21, val22 }, index.lookUp(key2)); - assertArrayEquals(new int[] { val31, val32, val33 }, index.lookUp(key3)); + + assertArrayEquals(new long[] { composeOffsetAndSize(val11, 1) }, index.lookUp(key1)); + assertArrayEquals(new long[] { composeOffsetAndSize(val21, 2), composeOffsetAndSize(val22, 3) }, index.lookUp(key2)); + assertArrayEquals(new long[] { composeOffsetAndSize(val31, 4), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val33, 6) }, index.lookUp(key3)); assertArrayEquals(EMPTY, index.lookUp(key4)); assertEquals(key1, index.firstId()); @@ -111,12 +119,12 @@ public void testInMemoryIndexPersists() throws IOException int val32 = 3200; int val33 = 3300; - inMemory.update(key1, val11); - inMemory.update(key2, val21); - inMemory.update(key2, val22); - inMemory.update(key3, val31); - inMemory.update(key3, val32); - inMemory.update(key3, val33); + inMemory.update(key1, val11, 1); + inMemory.update(key2, val21, 2); + inMemory.update(key2, val22, 3); + inMemory.update(key3, val31, 4); + inMemory.update(key3, val32, 5); + inMemory.update(key3, val33, 6); File directory = new File(Files.createTempDirectory(null)); directory.deleteOnExit(); @@ -126,9 +134,9 @@ public void testInMemoryIndexPersists() throws IOException try (OnDiskIndex onDisk = OnDiskIndex.open(descriptor, TimeUUIDKeySupport.INSTANCE)) { assertArrayEquals(EMPTY, onDisk.lookUp(key0)); - assertArrayEquals(new int[] { val11 }, onDisk.lookUp(key1)); - assertArrayEquals(new int[] { val21, val22 }, onDisk.lookUp(key2)); - assertArrayEquals(new int[] { val31, val32, val33 }, onDisk.lookUp(key3)); + assertArrayEquals(new long[] { composeOffsetAndSize(val11, 1) }, onDisk.lookUp(key1)); + assertArrayEquals(new long[] { composeOffsetAndSize(val21, 2), composeOffsetAndSize(val22, 3) }, onDisk.lookUp(key2)); + assertArrayEquals(new long[] { composeOffsetAndSize(val31, 4), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val33, 6) }, onDisk.lookUp(key3)); assertArrayEquals(EMPTY, onDisk.lookUp(key4)); assertEquals(key1, onDisk.firstId()); @@ -149,34 +157,34 @@ public void prop() throws IOException Constraint valueSizeConstraint = Constraint.between(0, 10); Constraint positionConstraint = Constraint.between(0, Integer.MAX_VALUE); Gen keyGen = Generators.timeUUID(); - Gen valueGen = rs -> { - int[] array = new int[(int) rs.next(valueSizeConstraint)]; + Gen valueGen = rs -> { + long[] array = new long[(int) rs.next(valueSizeConstraint)]; IntHashSet uniq = new IntHashSet(); for (int i = 0; i < array.length; i++) { - int value = (int) rs.next(positionConstraint); - while (!uniq.add(value)) - value = (int) rs.next(positionConstraint); - array[i] = value; + int offset = (int) rs.next(positionConstraint); + while (!uniq.add(offset)) + offset = (int) rs.next(positionConstraint); + array[i] = Index.composeOffsetAndSize(offset, (int) rs.next(positionConstraint)); } return array; }; - Gen> gen = rs -> { + Gen> gen = rs -> { int size = (int) rs.next(sizeConstraint); - Map map = Maps.newHashMapWithExpectedSize(size); + Map map = Maps.newHashMapWithExpectedSize(size); for (int i = 0; i < size; i++) { TimeUUID key = keyGen.generate(rs); while (map.containsKey(key)) key = keyGen.generate(rs); - int[] value = valueGen.generate(rs); + long[] value = valueGen.generate(rs); map.put(key, value); } return map; }; gen = gen.describedAs(map -> { StringBuilder sb = new StringBuilder(); - for (Map.Entry entry : map.entrySet()) + for (Map.Entry entry : map.entrySet()) sb.append('\n').append(entry.getKey()).append('\t').append(Arrays.toString(entry.getValue())); return sb.toString(); }); @@ -185,19 +193,19 @@ public void prop() throws IOException qt().forAll(gen).checkAssert(map -> test(directory, map)); } - private static void test(File directory, Map map) + private static void test(File directory, Map map) { InMemoryIndex inMemory = InMemoryIndex.create(TimeUUIDKeySupport.INSTANCE); - for (Map.Entry e : map.entrySet()) + for (Map.Entry e : map.entrySet()) { TimeUUID key = e.getKey(); assertThat(inMemory.lookUp(key)).isEmpty(); - int[] value = e.getValue(); + long[] value = e.getValue(); if (value.length == 0) continue; - for (int i : value) - inMemory.update(key, i); + for (long i : value) + inMemory.update(key, Index.readOffset(i), Index.readSize(i)); Arrays.sort(value); } assertIndex(map, inMemory); @@ -208,10 +216,29 @@ private static void test(File directory, Map map) try (OnDiskIndex onDisk = OnDiskIndex.open(descriptor, TimeUUIDKeySupport.INSTANCE)) { assertIndex(map, onDisk); + + List> sortedEntries = new ArrayList<>(); + for (Map.Entry entry : new TreeMap<>(map).entrySet()) + { + for (long l : entry.getValue()) + sortedEntries.add(Pair.create(entry.getKey(), l)); + } + + Index.IndexIterator iter = onDisk.iterator(); + Iterator> expectedIter = sortedEntries.iterator(); + while (iter.hasNext()) + { + iter.next(); + Pair expected = expectedIter.next(); + Assert.assertEquals(iter.currentKey(), expected.left); + Assert.assertEquals(iter.currentSize(), Index.readSize(expected.right)); + Assert.assertEquals(iter.currentOffset(), Index.readOffset(expected.right)); + } } } - private static void assertIndex(Map expected, Index actual) + + private static void assertIndex(Map expected, Index actual) { List keys = expected.entrySet() .stream() @@ -231,11 +258,11 @@ private static void assertIndex(Map expected, Index a assertThat(actual.lastId()).describedAs("Index %s had wrong lastId", actual).isEqualTo(keys.get(keys.size() - 1)); } - for (Map.Entry e : expected.entrySet()) + for (Map.Entry e : expected.entrySet()) { TimeUUID key = e.getKey(); - int[] value = e.getValue(); - int[] read = actual.lookUp(key); + long[] value = e.getValue(); + long[] read = actual.lookUp(key); if (value.length == 0) { @@ -248,4 +275,21 @@ private static void assertIndex(Map expected, Index a } } } + + @Test + public void testHelperMethods() + { + Random r = new Random(); + for (int i = 0; i < 1000000; i++) + { + long record = 0; + int size = Math.abs(r.nextInt()); + record = Index.writeSize(record, size); + int offset = Math.abs(r.nextInt()); + record = Index.writeOffset(record, offset); + assertEquals(size, Index.readSize(record)); + assertEquals(offset, Index.readOffset(record)); + assertEquals(record, composeOffsetAndSize(offset, size)); + } + } } \ No newline at end of file From 26d517f4ff54d550358c702532af503044ba0f6a Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 29 Aug 2024 13:34:45 -0700 Subject: [PATCH 133/340] txns that update a static row when the desired row doesn't exist leads to an error patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-19855 --- .../service/accord/txn/TxnWrite.java | 2 +- .../test/accord/AccordCQLTestBase.java | 22 +++++++++++++++++++ .../test/accord/AccordTestBase.java | 6 +++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index 7d6e67ad8419..f7081927eb65 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -263,7 +263,7 @@ public Update complete(AccordUpdateParameters parameters) if (!staticRow.isEmpty()) updateBuilder.add(staticRow); - Row existing = !baseUpdate.isEmpty() ? Iterables.getOnlyElement(baseUpdate) : null; + Row existing = baseUpdate.hasRows() ? Iterables.getOnlyElement(baseUpdate) : null; Row row = applyUpdates(existing, referenceOps.regulars, key, referenceOps.clustering, up, data); if (row != null) updateBuilder.add(row); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java index 7ede0b3cdd80..c0956391b22c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java @@ -59,6 +59,7 @@ import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FailingConsumer; import org.assertj.core.api.Assertions; import static java.util.Collections.singletonList; @@ -91,6 +92,27 @@ public static void setupClass() throws IOException SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); } + @Override + protected void test(FailingConsumer fn) throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), fn); + } + + @Test + public void testNonExistingKeyWithStaticUpdate() throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), cluster -> { + for (int i = 0; i < 10; i++) + cluster.coordinator(1).execute(wrapInTxn("UPDATE " + qualifiedAccordTableName + " SET v += ?, s=? WHERE k=? AND c=?"), ConsistencyLevel.ANY, 1, i, 0, i); + + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(wrapInTxn("SELECT * FROM " + qualifiedAccordTableName + " WHERE k=? LIMIT 1"), ConsistencyLevel.ANY, 0); + AssertUtils.assertRows(result, QueryResults.builder() + .columns("k", "c", "s", "v") + .row(0, null, 9, null) + .build()); + }); + } + @Test public void testMultiPartitionReturn() throws Exception { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index cfec7f44e92d..f30ef762cc9c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -439,12 +439,14 @@ private static boolean isIdempotent(Cluster cluster, String cql) }); } - private static String wrapInTxn(String statement) + protected static String wrapInTxn(String statement) { if (!statement.trim().toUpperCase().startsWith("BEGIN TRANSACTION")) { statement = statement.trim(); - statement = Arrays.stream(statement.split("\\n")).collect(Collectors.joining("\n ", "BEGIN TRANSACTION\n ", "\nCOMMIT TRANSACTION")); + statement = Arrays.stream(statement.split("\\n")) + .map(line -> line.trim().endsWith(";") ? line : line + ';') + .collect(Collectors.joining("\n ", "BEGIN TRANSACTION\n ", "\nCOMMIT TRANSACTION")); } return statement; } From 325e48ac39c47641da8879829c1fd9712c3a42f0 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Wed, 28 Aug 2024 17:18:36 +0200 Subject: [PATCH 134/340] Switch to streaming serialization of SavedCommand Patch by Alex Petrov; reviewed by David Capwell for CASSANDRA-19865 Co-authored-by: dcapwell --- modules/accord | 2 +- .../org/apache/cassandra/journal/Journal.java | 44 +- .../service/accord/AccordCommandStore.java | 6 +- .../service/accord/AccordJournal.java | 360 ++------ .../service/accord/AccordKeyspace.java | 3 + .../service/accord/AccordObjectSizes.java | 2 +- .../service/accord/AccordSafeCommand.java | 2 +- .../cassandra/service/accord/IJournal.java | 8 +- .../cassandra/service/accord/JournalKey.java | 37 +- .../service/accord/SavedCommand.java | 833 ++++++++---------- .../service/accord/async/AsyncOperation.java | 27 +- .../serializers/WaitingOnSerializer.java | 4 +- .../CompactionAccordIteratorsTest.java | 4 +- .../accord/AccordJournalOrderTest.java | 22 +- .../service/accord/AccordJournalTest.java | 3 +- .../service/accord/AccordTestUtils.java | 14 +- .../cassandra/service/accord/MockJournal.java | 324 ++++++- .../service/accord/SavedCommandTest.java | 144 +++ .../accord/async/AsyncOperationTest.java | 15 + .../CommandsForKeySerializerTest.java | 4 +- .../serializers/WaitingOnSerializerTest.java | 2 +- .../cassandra/utils/AccordGenerators.java | 172 +++- 22 files changed, 1159 insertions(+), 873 deletions(-) create mode 100644 test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java diff --git a/modules/accord b/modules/accord index 178952b41f05..449b2b4d0bf4 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 178952b41f05bfa307aef03dcc013e37fb6230b4 +Subproject commit 449b2b4d0bf4bb44d55a3c57f712a4d5a15e7220 diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index db86106d42c0..c092363af79d 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -47,11 +47,13 @@ import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.PathUtils; import org.apache.cassandra.journal.Segments.ReferencedSegment; import org.apache.cassandra.journal.Segments.ReferencedSegments; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.SavedCommand; import org.apache.cassandra.utils.Crc; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Simulate; @@ -343,11 +345,16 @@ public V readFirst(K id) return null; } - // TODO: This should be improved with new index that should take better care of handling multiple items public List readAll(K id) { - EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); List res = new ArrayList<>(2); + readAll(id, (in, userVersion) -> res.add(valueSerializer.deserialize(id, in, userVersion))); + return res; + } + + public void readAll(K id, Reader reader) + { + EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); try (ReferencedSegments segments = selectAndReference(id)) { for (Segment segment : segments.all()) @@ -357,7 +364,7 @@ public List readAll(K id) { Invariants.checkState(Objects.equals(holder.key, id), "%s != %s", holder.key, id); - res.add(valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion)); + reader.read(in, segment.descriptor.userVersion); holder.clear(); } catch (IOException e) @@ -368,7 +375,6 @@ public List readAll(K id) }); } } - return res; } /** @@ -504,11 +510,28 @@ public void blockingWrite(K id, V record, Set hosts) * @param hosts hosts expected to invalidate the record */ public RecordPointer asyncWrite(K id, V record, Set hosts) + { + return asyncWrite(id, new SavedCommand.Writer<>() + { + public void write(DataOutputPlus out, int userVersion) throws IOException + { + valueSerializer.serialize(id, record, out, params.userVersion()); + } + + public K key() + { + return id; + } + }, + hosts); + } + + public RecordPointer asyncWrite(K id, Writer writer, Set hosts) { RecordPointer recordPointer; try (DataOutputBuffer dob = DataOutputBuffer.scratchBuffer.get()) { - valueSerializer.serialize(id, record, dob, params.userVersion()); + writer.write(dob, params.userVersion()); ActiveSegment.Allocation alloc = allocate(dob.getLength(), hosts); recordPointer = alloc.write(id, dob.unsafeGetBufferAndFlip(), hosts); flusher.asyncFlush(alloc); @@ -521,6 +544,7 @@ public RecordPointer asyncWrite(K id, V record, Set hosts) return recordPointer; } + private ActiveSegment.Allocation allocate(int entrySize, Set hosts) { ActiveSegment segment = currentSegment; @@ -942,4 +966,14 @@ public void truncateForTesting() advanceSegment(null); segments.set(Segments.none()); } + + public interface Writer + { + void write(DataOutputPlus out, int userVersion) throws IOException; + } + + public interface Reader + { + void read(DataInputBuffer in, int userVersion) throws IOException; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 929a1695d781..39370a1cfc2a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -351,7 +351,9 @@ public Runnable appendToKeyspace(Command before, Command after) @VisibleForTesting public void appendToLog(Command before, Command after, Runnable runnable) { - journal.appendCommand(id, Collections.singletonList(SavedCommand.SavedDiff.diff(before, after)), null, runnable); + journal.appendCommand(id, + Collections.singletonList(SavedCommand.diffWriter(before, after)), + null, runnable); } boolean validateCommand(TxnId txnId, Command evicting) @@ -574,7 +576,7 @@ protected void setRedundantBefore(RedundantBefore newRedundantBefore) public NavigableMap bootstrapBeganAt() { return super.bootstrapBeganAt(); } public NavigableMap safeToRead() { return super.safeToRead(); } - public void appendCommands(List commands, List sanityCheck, Runnable onFlush) + public void appendCommands(List> commands, List sanityCheck, Runnable onFlush) { journal.appendCommand(id, commands, sanityCheck, onFlush); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 09bee3464112..eda7155a3880 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -21,9 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -31,23 +29,15 @@ import java.util.function.BiConsumer; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; -import com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.coordinate.Timeout; import accord.local.Command; import accord.local.Node; -import accord.messages.AbstractEpochRequest; -import accord.messages.Commit; import accord.messages.LocalRequest; -import accord.messages.Message; -import accord.messages.MessageType; import accord.messages.ReplyContext; import accord.messages.Request; -import accord.messages.TxnRequest; -import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; import org.agrona.collections.Long2ObjectHashMap; @@ -57,7 +47,6 @@ import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; @@ -69,52 +58,12 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.net.Verb; -import org.apache.cassandra.service.accord.interop.AccordInteropApply; -import org.apache.cassandra.service.accord.interop.AccordInteropCommit; -import org.apache.cassandra.service.accord.serializers.AcceptSerializers; -import org.apache.cassandra.service.accord.serializers.ApplySerializers; -import org.apache.cassandra.service.accord.serializers.BeginInvalidationSerializers; -import org.apache.cassandra.service.accord.serializers.CommitSerializers; -import org.apache.cassandra.service.accord.serializers.EnumSerializer; -import org.apache.cassandra.service.accord.serializers.FetchSerializers; -import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; -import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; -import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; -import org.apache.cassandra.service.accord.serializers.RecoverySerializers; -import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.concurrent.Condition; -import static accord.messages.MessageType.ACCEPT_INVALIDATE_REQ; -import static accord.messages.MessageType.ACCEPT_REQ; -import static accord.messages.MessageType.APPLY_MAXIMAL_REQ; -import static accord.messages.MessageType.APPLY_MINIMAL_REQ; -import static accord.messages.MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ; -import static accord.messages.MessageType.BEGIN_INVALIDATE_REQ; -import static accord.messages.MessageType.BEGIN_RECOVER_REQ; -import static accord.messages.MessageType.COMMIT_INVALIDATE_REQ; -import static accord.messages.MessageType.COMMIT_MAXIMAL_REQ; -import static accord.messages.MessageType.COMMIT_SLOW_PATH_REQ; -import static accord.messages.MessageType.INFORM_DURABLE_REQ; -import static accord.messages.MessageType.INFORM_OF_TXN_REQ; -import static accord.messages.MessageType.PRE_ACCEPT_REQ; -import static accord.messages.MessageType.PROPAGATE_APPLY_MSG; -import static accord.messages.MessageType.PROPAGATE_OTHER_MSG; -import static accord.messages.MessageType.PROPAGATE_PRE_ACCEPT_MSG; -import static accord.messages.MessageType.PROPAGATE_STABLE_MSG; -import static accord.messages.MessageType.SET_GLOBALLY_DURABLE_REQ; -import static accord.messages.MessageType.SET_SHARD_DURABLE_REQ; -import static accord.messages.MessageType.STABLE_FAST_PATH_REQ; -import static accord.messages.MessageType.STABLE_MAXIMAL_REQ; -import static accord.messages.MessageType.STABLE_SLOW_PATH_REQ; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; -import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MAXIMAL_REQ; -import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_APPLY_MINIMAL_REQ; -import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MAXIMAL_REQ; -import static org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType.INTEROP_COMMIT_MINIMAL_REQ; -import static org.apache.cassandra.service.accord.serializers.ReadDataSerializers.applyThenWaitUntilApplied; public class AccordJournal implements IJournal, Shutdownable { @@ -128,7 +77,7 @@ public class AccordJournal implements IJournal, Shutdownable private static final Set SENTINEL_HOSTS = Collections.singleton(0); - static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[23]); + static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[22]); public final Journal journal; private final AccordEndpointMapper endpointMapper; @@ -144,7 +93,25 @@ enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } public AccordJournal(AccordEndpointMapper endpointMapper, Params params) { File directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); - this.journal = new Journal<>("AccordJournal", directory, params, JournalKey.SUPPORT, RECORD_SERIALIZER); + this.journal = new Journal<>("AccordJournal", directory, params, JournalKey.SUPPORT, + // In Accord, we are using streaming serialization, i.e. Reader/Writer interfaces instead of materializing objects + new ValueSerializer() + { + public int serializedSize(JournalKey key, Object value, int userVersion) + { + throw new UnsupportedOperationException(); + } + + public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) throws IOException + { + throw new UnsupportedOperationException(); + } + + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException + { + throw new UnsupportedOperationException(); + } + }); this.endpointMapper = endpointMapper; } @@ -224,26 +191,33 @@ public void processLocalRequest(LocalRequest request, BiConsumer diffs = loadDiffs(commandStoreId, txnId); - if (diffs.isEmpty()) - return null; - return SavedCommand.reconstructFromDiff(diffs); + try + { + return loadDiffs(commandStoreId, txnId).construct(); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } } @VisibleForTesting - public List loadDiffs(int commandStoreId, Timestamp txnId) + public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) { - return (List)(List) journal.readAll(new JournalKey(txnId, Type.SAVED_COMMAND, commandStoreId)); + SavedCommand.Builder builder = new SavedCommand.Builder(); + journal.readAll(new JournalKey(txnId, commandStoreId), + builder::deserializeNext); + return builder; } @Override - public void appendCommand(int commandStoreId, List outcomes, List sanityCheck, Runnable onFlush) + public void appendCommand(int commandStoreId, List> outcomes, List sanityCheck, Runnable onFlush) { RecordPointer pointer = null; - for (int i = 0; i < outcomes.size(); i++) + for (SavedCommand.Writer outcome : outcomes) { - SavedCommand.SavedDiff outcome = outcomes.get(i); - JournalKey key = new JournalKey(outcome.txnId, Type.SAVED_COMMAND, commandStoreId); + JournalKey key = new JournalKey(outcome.key(), commandStoreId); pointer = journal.asyncWrite(key, outcome, SENTINEL_HOSTS); } @@ -273,14 +247,23 @@ public void closeCurrentSegmentForTesting() public void sanityCheck(int commandStoreId, Command orig) { - List diffs = loadDiffs(commandStoreId, orig.txnId()); - // We can only use strict equality if we supply result. - Command reconstructed = SavedCommand.reconstructFromDiff(diffs, orig.result()); - Invariants.checkState(orig.equals(reconstructed), - "\n" + - "Original: %s\n" + - "Reconstructed: %s\n" + - "Diffs: %s", orig, reconstructed, diffs); + try + { + SavedCommand.Builder diffs = loadDiffs(commandStoreId, orig.txnId()); + diffs.forceResult(orig.result()); + // We can only use strict equality if we supply result. + Command reconstructed = diffs.construct(); + Invariants.checkState(orig.equals(reconstructed), + "\n" + + "Original: %s\n" + + "Reconstructed: %s\n" + + "Diffs: %s", orig, reconstructed, diffs); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } } /* @@ -391,241 +374,6 @@ public long expiresAtNanos() } } - /* - * Records ser/de in the Journal - */ - - private static final ValueSerializer RECORD_SERIALIZER = new ValueSerializer<>() - { - @Override - public int serializedSize(JournalKey key, Object record, int userVersion) - { - return Ints.checkedCast(key.type.serializedSize(key, record, userVersion)); - } - - @Override - public void serialize(JournalKey key, Object record, DataOutputPlus out, int userVersion) throws IOException - { - key.type.serialize(key, record, out, userVersion); - } - - @Override - public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException - { - return key.type.deserialize(key, in, userVersion); - } - }; - - /* Adapts vanilla message serializers to journal-expected signatures; converts user version to MS version */ - static final class MessageSerializer implements ValueSerializer - { - final IVersionedSerializer wrapped; - - private MessageSerializer(IVersionedSerializer wrapped) - { - this.wrapped = wrapped; - } - - static MessageSerializer wrap(IVersionedSerializer wrapped) - { - return new MessageSerializer(wrapped); - } - - @Override - public int serializedSize(JournalKey key, Object message, int userVersion) - { - return Ints.checkedCast(wrapped.serializedSize((Message) message, msVersion(userVersion))); - } - - @Override - public void serialize(JournalKey key, Object message, DataOutputPlus out, int userVersion) throws IOException - { - wrapped.serialize((Message) message, out, msVersion(userVersion)); - } - - @Override - public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException - { - return wrapped.deserialize(in, msVersion(userVersion)); - } - } - - @FunctionalInterface - interface TxnIdProvider - { - TxnId txnId(Message message); - } - - private static final TxnIdProvider EPOCH = msg -> ((AbstractEpochRequest) msg).txnId; - private static final TxnIdProvider TXN = msg -> ((TxnRequest) msg).txnId; - private static final TxnIdProvider LOCAL = msg -> ((LocalRequest) msg).primaryTxnId(); - private static final TxnIdProvider INVL = msg -> ((Commit.Invalidate) msg).primaryTxnId(); - - /** - * Accord Message type - consequently the kind of persisted record. - *

      - * Note: {@link EnumSerializer} is intentionally not being reused here, for two reasons: - * 1. This is an internal enum, fully under our control, not part of an external library - * 2. It's persisted in the record key, so has the additional constraint of being fixed size and - * shouldn't be using varint encoding - */ - public enum Type implements ValueSerializer - { - /* Auxiliary journal records */ - SAVED_COMMAND (1, SavedCommand.serializer), - - /* Accord protocol requests */ - PRE_ACCEPT (64, PRE_ACCEPT_REQ, PreacceptSerializers.request, TXN ), - ACCEPT (65, ACCEPT_REQ, AcceptSerializers.request, TXN ), - ACCEPT_INVALIDATE (66, ACCEPT_INVALIDATE_REQ, AcceptSerializers.invalidate, EPOCH), - COMMIT_SLOW_PATH (67, COMMIT_SLOW_PATH_REQ, CommitSerializers.request, TXN ), - COMMIT_MAXIMAL (68, COMMIT_MAXIMAL_REQ, CommitSerializers.request, TXN ), - STABLE_FAST_PATH (87, STABLE_FAST_PATH_REQ, CommitSerializers.request, TXN ), - STABLE_SLOW_PATH (88, STABLE_SLOW_PATH_REQ, CommitSerializers.request, TXN ), - STABLE_MAXIMAL (89, STABLE_MAXIMAL_REQ, CommitSerializers.request, TXN ), - COMMIT_INVALIDATE (69, COMMIT_INVALIDATE_REQ, CommitSerializers.invalidate, INVL ), - APPLY_MINIMAL (70, APPLY_MINIMAL_REQ, ApplySerializers.request, TXN ), - APPLY_MAXIMAL (71, APPLY_MAXIMAL_REQ, ApplySerializers.request, TXN ), - APPLY_THEN_WAIT_UNTIL_APPLIED (72, APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, applyThenWaitUntilApplied, EPOCH), - - BEGIN_RECOVER (73, BEGIN_RECOVER_REQ, RecoverySerializers.request, TXN ), - BEGIN_INVALIDATE (74, BEGIN_INVALIDATE_REQ, BeginInvalidationSerializers.request, EPOCH), - INFORM_OF_TXN (75, INFORM_OF_TXN_REQ, InformOfTxnIdSerializers.request, EPOCH), - INFORM_DURABLE (76, INFORM_DURABLE_REQ, InformDurableSerializers.request, TXN ), - SET_SHARD_DURABLE (77, SET_SHARD_DURABLE_REQ, SetDurableSerializers.shardDurable, EPOCH), - SET_GLOBALLY_DURABLE (78, SET_GLOBALLY_DURABLE_REQ, SetDurableSerializers.globallyDurable, EPOCH), - - /* Accord local messages */ - PROPAGATE_PRE_ACCEPT (79, PROPAGATE_PRE_ACCEPT_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_STABLE (80, PROPAGATE_STABLE_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_APPLY (81, PROPAGATE_APPLY_MSG, FetchSerializers.propagate, LOCAL), - PROPAGATE_OTHER (82, PROPAGATE_OTHER_MSG, FetchSerializers.propagate, LOCAL), - - /* C* interop messages */ - INTEROP_COMMIT (83, INTEROP_COMMIT_MINIMAL_REQ, STABLE_FAST_PATH_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_COMMIT_MAXIMAL (84, INTEROP_COMMIT_MAXIMAL_REQ, STABLE_MAXIMAL_REQ, AccordInteropCommit.serializer, TXN), - INTEROP_APPLY_MINIMAL (85, INTEROP_APPLY_MINIMAL_REQ, APPLY_MINIMAL_REQ, AccordInteropApply.serializer, TXN), - INTEROP_APPLY_MAXIMAL (86, INTEROP_APPLY_MAXIMAL_REQ, APPLY_MAXIMAL_REQ, AccordInteropApply.serializer, TXN), - ; - - final int id; - - /** - * An incoming message of a given type from Accord's perspective might have multiple - * concrete implementations some of which are supplied by the Cassandra integration. - * The incoming type specifies the handling for writing out a message to the journal. - */ - final MessageType incomingType; - - /** - * The outgoing type is the type that will be returned to Accord; must be a subclass of the incoming type. - *

      - * This type will always be from accord.messages.MessageType and never from the extended types in the integration. - */ - final MessageType outgoingType; - - final TxnIdProvider txnIdProvider; - final ValueSerializer serializer; - - Type(int id, ValueSerializer serializer) - { - this(id, null, null, serializer, null); - } - - - Type(int id, MessageType incomingType, MessageType outgoingType, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) - { - //noinspection unchecked - this(id, incomingType, outgoingType, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); - } - - Type(int id, MessageType type, IVersionedSerializer serializer, TxnIdProvider txnIdProvider) - { - //noinspection unchecked - this(id, type, type, MessageSerializer.wrap((IVersionedSerializer) serializer), txnIdProvider); - } - - Type(int id, MessageType incomingType, MessageType outgoingType, ValueSerializer serializer, TxnIdProvider txnIdProvider) - { - if (id < 0) - throw new IllegalArgumentException("Negative Type id " + id); - if (id > Byte.MAX_VALUE) - throw new IllegalArgumentException("Type id doesn't fit in a single byte: " + id); - - this.id = id; - this.incomingType = incomingType; - this.outgoingType = outgoingType; - //noinspection unchecked - this.serializer = (ValueSerializer) serializer; - this.txnIdProvider = txnIdProvider; - } - - private static final Type[] idToTypeMapping; - - static - { - Type[] types = values(); - - int maxId = -1; - for (Type type : types) - maxId = Math.max(type.id, maxId); - - Type[] idToType = new Type[maxId + 1]; - for (Type type : types) - { - if (null != idToType[type.id]) - throw new IllegalStateException("Duplicate Type id " + type.id); - idToType[type.id] = type; - } - idToTypeMapping = idToType; - - Map msgTypeToType = new HashMap<>(); - for (Type type : types) - { - if (null != type.incomingType && null != msgTypeToType.put(type.incomingType, type)) - throw new IllegalStateException("Duplicate MessageType " + type.incomingType); - } - ImmutableMap.copyOf(msgTypeToType); - } - - static Type fromId(int id) - { - if (id < 0 || id >= idToTypeMapping.length) - throw new IllegalArgumentException("Out or range Type id " + id); - Type type = idToTypeMapping[id]; - if (null == type) - throw new IllegalArgumentException("Unknown Type id " + id); - return type; - } - - @Override - public int serializedSize(JournalKey key, Object record, int userVersion) - { - return serializer.serializedSize(key, record, userVersion); - } - - @Override - public void serialize(JournalKey key, Object record, DataOutputPlus out, int userVersion) throws IOException - { - serializer.serialize(key, record, out, userVersion); - } - - @Override - public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException - { - return serializer.deserialize(key, in, userVersion); - } - } - - private static int msVersion(int version) - { - switch (version) - { - default: throw new IllegalArgumentException(); - case 1: return MessagingService.VERSION_51; - } - } - /* * Handling topology changes / epoch shift */ diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index eab58afefb1a..08fb1ce4c9b1 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -765,6 +765,9 @@ public static Mutation getCommandMutation(AccordCommandStore commandStore, Accor public static Mutation getCommandMutation(int storeId, Command original, Command command, long timestampMicros) { + if (command.saveStatus() == SaveStatus.Uninitialised) + return null; + try { Invariants.checkArgument(original != command); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 46e0d8c15810..4262749991bb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -299,7 +299,7 @@ private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, null));; final static long ACCEPTED = measure(Command.SerializerSupport.accepted(attrs(true, false), SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, null)); - final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.EMPTY, EMPTY_WRITES, EMPTY_RESULT)); + final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.empty(EMPTY_TXNID.domain()), EMPTY_WRITES, EMPTY_RESULT)); final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(attrs(false, false), SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID, null)); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 0cafdfec0868..28220cbf471d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -127,7 +127,7 @@ public Command original() return original; } - public SavedCommand.SavedDiff diff() + public SavedCommand.Writer diff() { return SavedCommand.diff(original, current); } diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index eb0f627c1c39..8e44ad2aacf2 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -29,9 +29,9 @@ public interface IJournal /** * Append outcomes to the log. - * - * Returns whether an async flush was requested. If it returns false, all commands are guaranteed to be flushed by that time. - * If it returns false, onFlush runnable will run whenever flush is done. */ - void appendCommand(int commandStoreId, List command, List sanityCheck, Runnable onFlush); + void appendCommand(int commandStoreId, + List> command, + List sanityCheck, + Runnable onFlush); } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index 04688e1f2a64..aa0c7473965e 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -30,7 +30,6 @@ import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.utils.ByteArrayUtil; -import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; @@ -38,19 +37,18 @@ public final class JournalKey { final Timestamp timestamp; - final AccordJournal.Type type; // TODO (desired): do we even need type here anymore? + // TODO: command store id _before_ timestamp final int commandStoreId; - JournalKey(Timestamp timestamp, AccordJournal.Type type) + JournalKey(Timestamp timestamp) { - this(timestamp, type, -1); + this(timestamp, -1); } - JournalKey(Timestamp timestamp, AccordJournal.Type type, int commandStoreId) + JournalKey(Timestamp timestamp, int commandStoreId) { - if (timestamp == null) throw new NullPointerException("Null timestamp for type " + type); + if (timestamp == null) throw new NullPointerException("Null timestamp"); this.timestamp = timestamp; - this.type = type; this.commandStoreId = commandStoreId; } @@ -67,8 +65,7 @@ public final class JournalKey private static final int HLC_OFFSET = 0; private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; - private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; - private static final int CS_ID_OFFSET = TYPE_OFFSET + BYTE_SIZE; + private static final int CS_ID_OFFSET = NODE_OFFSET + INT_SIZE; @Override public int serializedSize(int userVersion) @@ -77,7 +74,6 @@ public int serializedSize(int userVersion) + 6 // timestamp.epoch() + 2 // timestamp.flags() + INT_SIZE // timestamp.node - + BYTE_SIZE // type + SHORT_SIZE; // commandStoreId } @@ -85,33 +81,29 @@ public int serializedSize(int userVersion) public void serialize(JournalKey key, DataOutputPlus out, int userVersion) throws IOException { serializeTimestamp(key.timestamp, out); - out.writeByte(key.type.id); out.writeShort(key.commandStoreId); } private void serialize(JournalKey key, byte[] out) { serializeTimestamp(key.timestamp, out); - out[20] = (byte) (key.type.id & 0xFF); - ByteArrayUtil.putShort(out, 21, (short) key.commandStoreId); + ByteArrayUtil.putShort(out, 20, (short) key.commandStoreId); } @Override public JournalKey deserialize(DataInputPlus in, int userVersion) throws IOException { Timestamp timestamp = deserializeTimestamp(in); - int type = in.readByte(); int commandStoreId = in.readShort(); - return new JournalKey(timestamp, AccordJournal.Type.fromId(type), commandStoreId); + return new JournalKey(timestamp, commandStoreId); } @Override public JournalKey deserialize(ByteBuffer buffer, int position, int userVersion) { Timestamp timestamp = deserializeTimestamp(buffer, position); - int type = buffer.get(position + TYPE_OFFSET); int commandStoreId = buffer.getShort(position + CS_ID_OFFSET); - return new JournalKey(timestamp, AccordJournal.Type.fromId(type), commandStoreId); + return new JournalKey(timestamp, commandStoreId); } private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException @@ -158,10 +150,6 @@ public int compareWithKeyAt(JournalKey k, ByteBuffer buffer, int position, int u int cmp = compareWithTimestampAt(k.timestamp, buffer, position); if (cmp != 0) return cmp; - byte type = buffer.get(position + TYPE_OFFSET); - cmp = Byte.compare((byte) k.type.id, type); - if (cmp != 0) return cmp; - short commandStoreId = buffer.getShort(position + CS_ID_OFFSET); cmp = Short.compare((byte) k.commandStoreId, commandStoreId); return cmp; @@ -186,7 +174,6 @@ private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int p public int compare(JournalKey k1, JournalKey k2) { int cmp = compare(k1.timestamp, k2.timestamp); - if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); if (cmp == 0) cmp = Short.compare((short) k1.commandStoreId, (short) k2.commandStoreId); return cmp; } @@ -225,22 +212,20 @@ public boolean equals(Object other) boolean equals(JournalKey other) { - return this.type == other.type && - this.timestamp.equals(other.timestamp) && + return this.timestamp.equals(other.timestamp) && this.commandStoreId == other.commandStoreId; } @Override public int hashCode() { - return Objects.hash(timestamp, type, commandStoreId); + return Objects.hash(timestamp, commandStoreId); } public String toString() { return "Key{" + "timestamp=" + timestamp + - ", type=" + type + ", commandStoreId=" + commandStoreId + '}'; } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 282773c6d981..bb1fe6da7383 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -20,9 +20,10 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.List; import java.util.function.Function; +import javax.annotation.Nullable; + import com.google.common.annotations.VisibleForTesting; import accord.api.Result; @@ -39,27 +40,25 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; -import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.journal.ValueSerializer; +import org.apache.cassandra.journal.Journal; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.DepsSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.utils.Throwables; -import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; +import static accord.utils.Invariants.illegalState; public class SavedCommand { - public static final ValueSerializer serializer = new SavedCommandSerializer(); - // This enum is order-dependent - private enum HasFields + public enum Fields { TXN_ID, EXECUTE_AT, + EXECUTES_AT_LEAST, SAVE_STATUS, DURABILITY, ACCEPTED, @@ -73,536 +72,474 @@ private enum HasFields LISTENERS } - public final TxnId txnId; + public interface Writer extends Journal.Writer + { + void write(DataOutputPlus out, int userVersion) throws IOException; + K key(); + } - public final Timestamp executeAt; - public final SaveStatus saveStatus; - public final Status.Durability durability; + public static class DiffWriter implements Writer + { + private final Command before; + private final Command after; + private final TxnId txnId; - public final Ballot acceptedOrCommitted; - public final Ballot promised; + public DiffWriter(Command before, Command after) + { + this(after.txnId(), before, after); + } - public final Route route; - public final PartialTxn partialTxn; - public final PartialDeps partialDeps; - public final Seekables additionalKeysOrRanges; + public DiffWriter(TxnId txnId, Command before, Command after) + { + this.txnId = txnId; + this.before = before; + this.after = after; + } - public final Writes writes; - public final Listeners.Immutable listeners; + @VisibleForTesting + public Command before() + { + return before; + } - public SavedCommand(TxnId txnId, - Timestamp executeAt, - SaveStatus saveStatus, - Status.Durability durability, + @VisibleForTesting + public Command after() + { + return after; + } - Ballot acceptedOrCommitted, - Ballot promised, + public void write(DataOutputPlus out, int userVersion) throws IOException + { + serialize(before, after, out, userVersion); + } - Route route, - PartialTxn partialTxn, - PartialDeps partialDeps, - Seekables additionalKeysOrRanges, + public TxnId key() + { + return txnId; + } + } - Writes writes, - Listeners.Immutable listeners) + @Nullable + public static Writer diff(Command original, Command current) { - this.txnId = txnId; - this.executeAt = executeAt; - this.saveStatus = saveStatus; - this.durability = durability; - - this.acceptedOrCommitted = acceptedOrCommitted; - this.promised = promised; + if (original == current + || current == null + || current.saveStatus() == SaveStatus.Uninitialised) + return null; + return new SavedCommand.DiffWriter(original, current); + } - this.route = route; - this.partialTxn = partialTxn; - this.partialDeps = partialDeps; - this.additionalKeysOrRanges = additionalKeysOrRanges; - this.writes = writes; - this.listeners = listeners; + public static Writer diffWriter(Command before, Command after) + { + return new DiffWriter(before, after); } - public static SavedDiff diff(Command before, Command after) + public static void serialize(Command before, Command after, DataOutputPlus out, int userVersion) throws IOException { - if (before == after) - return null; + int flags = getFlags(before, after); + + out.writeInt(flags); + + // We encode all changed fields unless their value is null + if (getFieldChanged(Fields.TXN_ID, flags) && after.txnId() != null) + CommandSerializers.txnId.serialize(after.txnId(), out, userVersion); + if (getFieldChanged(Fields.EXECUTE_AT, flags) && after.executeAt() != null) + CommandSerializers.timestamp.serialize(after.executeAt(), out, userVersion); + // TODO (desired): check if this can fold into executeAt + if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags) && after.executesAtLeast() != null) + CommandSerializers.timestamp.serialize(after.executesAtLeast(), out, userVersion); + if (getFieldChanged(Fields.SAVE_STATUS, flags)) + out.writeInt(after.saveStatus().ordinal()); + if (getFieldChanged(Fields.DURABILITY, flags) && after.durability() != null) + out.writeInt(after.durability().ordinal()); + + if (getFieldChanged(Fields.ACCEPTED, flags) && after.acceptedOrCommitted() != null) + CommandSerializers.ballot.serialize(after.acceptedOrCommitted(), out, userVersion); + if (getFieldChanged(Fields.PROMISED, flags) && after.promised() != null) + CommandSerializers.ballot.serialize(after.promised(), out, userVersion); + + if (getFieldChanged(Fields.ROUTE, flags) && after.route() != null) + AccordKeyspace.LocalVersionedSerializers.route.serialize(after.route(), out); // TODO (required): user version + if (getFieldChanged(Fields.PARTIAL_TXN, flags) && after.partialTxn() != null) + CommandSerializers.partialTxn.serialize(after.partialTxn(), out, userVersion); + if (getFieldChanged(Fields.PARTIAL_DEPS, flags) && after.partialDeps() != null) + DepsSerializer.partialDeps.serialize(after.partialDeps(), out, userVersion); + if (getFieldChanged(Fields.ADDITIONAL_KEYS, flags) && after.additionalKeysOrRanges() != null) + KeySerializers.seekables.serialize(after.additionalKeysOrRanges(), out, userVersion); - // TODO: we do not need to save `waitingOn` _every_ time. Command.WaitingOn waitingOn = getWaitingOn(after); - return new SavedDiff(after.txnId(), - ifNotEqual(before, after, Command::executeAt, true), - ifNotEqual(before, after, Command::saveStatus, false), - ifNotEqual(before, after, Command::durability, false), - - ifNotEqual(before, after, Command::acceptedOrCommitted, false), - ifNotEqual(before, after, Command::promised, false), - - ifNotEqual(before, after, Command::route, true), - ifNotEqual(before, after, Command::partialTxn, false), - ifNotEqual(before, after, Command::partialDeps, false), - ifNotEqual(before, after, Command::additionalKeysOrRanges, false), - - waitingOn, - ifNotEqual(before, after, Command::writes, false), - ifNotEqual(before, after, Command::durableListeners, true)); - } + if (getFieldChanged(Fields.WAITING_ON, flags) && waitingOn != null) + { + long size = WaitingOnSerializer.serializedSize(waitingOn); + ByteBuffer serialized = WaitingOnSerializer.serialize(after.txnId(), waitingOn); + out.writeInt((int) size); + out.write(serialized); + } - static Command reconstructFromDiff(List diffs) - { - return reconstructFromDiff(diffs, CommandSerializers.APPLIED); + if (getFieldChanged(Fields.WRITES, flags) && after.writes() != null) + CommandSerializers.writes.serialize(after.writes(), out, userVersion); + + if (getFieldChanged(Fields.LISTENERS, flags) && after.durableListeners() != null) + { + out.writeByte(after.durableListeners().size()); + for (Command.DurableAndIdempotentListener listener : after.durableListeners()) + AccordKeyspace.LocalVersionedSerializers.listeners.serialize(listener, out); + } } - /** - * @param result is exposed because we are _not_ persisting result, since during loading or replay - * we do not expect we will have to send a result to the client, and data results - * can potentially contain a large number of entries, so it's best if they are not - * written into the log. - */ @VisibleForTesting - static Command reconstructFromDiff(List diffs, Result result) + static int getFlags(Command before, Command after) { - TxnId txnId = null; + int flags = 0; - Timestamp executeAt = null; - SaveStatus saveStatus = null; - Status.Durability durability = null; + flags = collectFlags(before, after, Command::txnId, true, Fields.TXN_ID, flags); + flags = collectFlags(before, after, Command::executeAt, true, Fields.EXECUTE_AT, flags); + flags = collectFlags(before, after, Command::executesAtLeast, true, Fields.EXECUTES_AT_LEAST, flags); + flags = collectFlags(before, after, Command::saveStatus, false, Fields.SAVE_STATUS, flags); + flags = collectFlags(before, after, Command::durability, false, Fields.DURABILITY, flags); - Ballot acceptedOrCommitted = Ballot.ZERO; - Ballot promised = null; + flags = collectFlags(before, after, Command::acceptedOrCommitted, false, Fields.ACCEPTED, flags); + flags = collectFlags(before, after, Command::promised, false, Fields.PROMISED, flags); - Route route = null; - PartialTxn partialTxn = null; - PartialDeps partialDeps = null; - Seekables additionalKeysOrRanges = null; + flags = collectFlags(before, after, Command::route, true, Fields.ROUTE, flags); + flags = collectFlags(before, after, Command::partialTxn, false, Fields.PARTIAL_TXN, flags); + flags = collectFlags(before, after, Command::partialDeps, false, Fields.PARTIAL_DEPS, flags); + flags = collectFlags(before, after, Command::additionalKeysOrRanges, false, Fields.ADDITIONAL_KEYS, flags); - WaitingOnProvider waitingOnProvider = null; - Writes writes = null; - Listeners.Immutable listeners = null; + flags = collectFlags(before, after, SavedCommand::getWaitingOn, false, Fields.WAITING_ON, flags); - for (LoadedDiff diff : diffs) - { - if (diff.txnId != null) - txnId = diff.txnId; - if (diff.executeAt != null) - executeAt = diff.executeAt; - if (diff.saveStatus != null) - saveStatus = diff.saveStatus; - if (diff.durability != null) - durability = diff.durability; - - if (diff.acceptedOrCommitted != null) - acceptedOrCommitted = diff.acceptedOrCommitted; - if (diff.promised != null) - promised = diff.promised; - - if (diff.route != null) - route = diff.route; - if (diff.partialTxn != null) - partialTxn = diff.partialTxn; - if (diff.partialDeps != null) - partialDeps = diff.partialDeps; - if (diff.additionalKeysOrRanges != null) - additionalKeysOrRanges = diff.additionalKeysOrRanges; - - if (diff.waitingOn != null) - waitingOnProvider = diff.waitingOn; - if (diff.writes != null) - writes = diff.writes; - if (diff.listeners != null) - listeners = diff.listeners; - } + flags = collectFlags(before, after, Command::writes, false, Fields.WRITES, flags); + flags = collectFlags(before, after, c -> c.durableListeners().isEmpty() ? null : c.durableListeners(), true, Fields.LISTENERS, flags); - CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); - if (partialTxn != null) - attrs.partialTxn(partialTxn); - if (durability != null) - attrs.durability(durability); - if (route != null) - attrs.route(route); - if (partialDeps != null && - (saveStatus.known.deps != Status.KnownDeps.NoDeps && - saveStatus.known.deps != Status.KnownDeps.DepsErased && - saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) - attrs.partialDeps(partialDeps); - if (additionalKeysOrRanges != null) - attrs.additionalKeysOrRanges(additionalKeysOrRanges); - if (listeners != null && !listeners.isEmpty()) - attrs.setListeners(listeners); - - Command.WaitingOn waitingOn = null; - if (waitingOnProvider != null) - waitingOn = waitingOnProvider.provide(txnId, partialDeps); - - Invariants.checkState(saveStatus != null, - "Save status is null after applying %s", diffs); - switch (saveStatus.status) - { - case NotDefined: - return saveStatus == SaveStatus.Uninitialised ? Command.NotDefined.uninitialised(attrs.txnId()) - : Command.NotDefined.notDefined(attrs, promised); - case PreAccepted: - return Command.PreAccepted.preAccepted(attrs, executeAt, promised); - case AcceptedInvalidate: - case Accepted: - case PreCommitted: - return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); - case Committed: - case Stable: - return Command.Committed.committed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn); - case PreApplied: - case Applied: - return Command.Executed.executed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn, writes, result); - case Truncated: - case Invalidated: - default: - throw new IllegalStateException(); - } + return flags; + } + + static Command.WaitingOn getWaitingOn(Command command) + { + if (command instanceof Command.Committed) + return command.asCommitted().waitingOn(); + + return null; } - // TODO (required): this convert function was added only because AsyncOperationTest was failing without it; maybe after switching to loading from the log we can just pass l and r directly or remove != null checks. - private static VAL ifNotEqual(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch) + private static int collectFlags(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch, Fields field, int oldFlags) { VAL l = null; VAL r = null; if (lo != null) l = convert.apply(lo); if (ro != null) r = convert.apply(ro); + if (r == null) + oldFlags = setFieldIsNull(field, oldFlags); + if (l == r) - return null; + return oldFlags; // no change + if (l == null || r == null) - return r; + return setFieldChanged(field, oldFlags); + assert allowClassMismatch || l.getClass() == r.getClass() : String.format("%s != %s", l.getClass(), r.getClass()); if (l.equals(r)) - return null; + return oldFlags; // no change - return r; + return setFieldChanged(field, oldFlags); } - static Command.WaitingOn getWaitingOn(Command command) + private static int setFieldChanged(Fields field, int oldFlags) { - if (command instanceof Command.Committed) - return command.asCommitted().waitingOn(); - - return null; + return oldFlags | (1 << (field.ordinal() + Short.SIZE)); } - public static class SavedDiff extends SavedCommand + @VisibleForTesting + static boolean getFieldChanged(Fields field, int oldFlags) { - public final Command.WaitingOn waitingOn; + return (oldFlags & (1 << (field.ordinal() + Short.SIZE))) != 0; + } - public SavedDiff(TxnId txnId, - Timestamp executeAt, - SaveStatus saveStatus, - Status.Durability durability, + @VisibleForTesting + static boolean getFieldIsNull(Fields field, int oldFlags) + { + return (oldFlags & (1 << field.ordinal())) != 0; + } - Ballot acceptedOrCommitted, - Ballot promised, + private static int setFieldIsNull(Fields field, int oldFlags) + { + return oldFlags | (1 << field.ordinal()); + } - Route route, - PartialTxn partialTxn, - PartialDeps partialDeps, - Seekables additionalKeysOrRanges, - Command.WaitingOn waitingOn, - Writes writes, - Listeners.Immutable listeners) - { - super(txnId, executeAt, saveStatus, durability, acceptedOrCommitted, promised, route, partialTxn, partialDeps, additionalKeysOrRanges, writes, listeners); - this.waitingOn = waitingOn; - } + public static class Builder + { + TxnId txnId = null; - @Override - public String toString() - { - return "SavedDiff{" + - " txnId=" + txnId + - ", executeAt=" + executeAt + - ", saveStatus=" + saveStatus + - ", durability=" + durability + - ", acceptedOrCommitted=" + acceptedOrCommitted + - ", promised=" + promised + - ", route=" + route + - ", partialTxn=" + partialTxn + - ", partialDeps=" + partialDeps + - ", writes=" + writes + - ", waitingOn=" + waitingOn + - '}'; - } - } + Timestamp executeAt = null; + Timestamp executeAtLeast = null; + SaveStatus saveStatus = null; + Status.Durability durability = null; - public static class LoadedDiff extends SavedCommand - { - public final WaitingOnProvider waitingOn; + Ballot acceptedOrCommitted = Ballot.ZERO; + Ballot promised = null; - public LoadedDiff(TxnId txnId, - Timestamp executeAt, - SaveStatus saveStatus, - Status.Durability durability, + Route route = null; + PartialTxn partialTxn = null; + PartialDeps partialDeps = null; + Seekables additionalKeysOrRanges = null; - Ballot acceptedOrCommitted, - Ballot promised, + SavedCommand.WaitingOnProvider waitingOn = (txn, deps) -> null; + Writes writes = null; + Listeners.Immutable listeners = null; + Result result = CommandSerializers.APPLIED; - Route route, - PartialTxn partialTxn, - PartialDeps partialDeps, - Seekables additionalKeysOrRanges, + boolean nextCalled = false; + int count = 0; - WaitingOnProvider waitingOn, - Writes writes, - Listeners.Immutable listeners) + public int count() { - super(txnId, executeAt, saveStatus, durability, acceptedOrCommitted, promised, route, partialTxn, partialDeps, additionalKeysOrRanges, writes, listeners); - this.waitingOn = waitingOn; + return count; } - public String toString() - { - return "LoadedDiff{" + - "waitingOn=" + waitingOn + - '}'; - } - } - - final static class SavedCommandSerializer implements ValueSerializer - { - @Override - public int serializedSize(JournalKey key, Object value, int userVersion) + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void deserializeNext(DataInputPlus in, int userVersion) throws IOException { - SavedDiff diff = (SavedDiff) value; - long size = 0; - size += SHORT_SIZE; // flags - - if (diff.txnId != null) - size += CommandSerializers.txnId.serializedSize(diff.txnId, userVersion); - if (diff.executeAt != null) - size += CommandSerializers.timestamp.serializedSize(diff.executeAt, userVersion); - if (diff.saveStatus != null) - size += Integer.BYTES; - if (diff.durability != null) - size += Integer.BYTES; - - if (diff.acceptedOrCommitted != null) - size += CommandSerializers.ballot.serializedSize(diff.acceptedOrCommitted, userVersion); - if (diff.promised != null) - size += CommandSerializers.ballot.serializedSize(diff.promised, userVersion); - - if (diff.route != null) - size += AccordKeyspace.LocalVersionedSerializers.route.serializedSize(diff.route); - if (diff.partialTxn != null) - CommandSerializers.partialTxn.serializedSize(diff.partialTxn, userVersion); - if (diff.partialDeps != null) - DepsSerializer.partialDeps.serializedSize(diff.partialDeps, userVersion); - if (diff.additionalKeysOrRanges != null) - KeySerializers.seekables.serializedSize(diff.additionalKeysOrRanges, userVersion); - - if (diff.waitingOn != null) + nextCalled = true; + count++; + + final int flags = in.readInt(); + + if (getFieldChanged(Fields.TXN_ID, flags)) { - size += Integer.BYTES; - size += WaitingOnSerializer.serializedSize(diff.waitingOn); + if (getFieldIsNull(Fields.TXN_ID, flags)) + txnId = null; + else + txnId = CommandSerializers.txnId.deserialize(in, userVersion); } - if (diff.writes != null) - CommandSerializers.writes.serializedSize(diff.writes, userVersion); + if (getFieldChanged(Fields.EXECUTE_AT, flags)) + { + if (getFieldIsNull(Fields.EXECUTE_AT, flags)) + executeAt = null; + else + executeAt = CommandSerializers.timestamp.deserialize(in, userVersion); + } - if (diff.listeners != null && !diff.listeners.isEmpty()) + if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags)) { - size += Byte.BYTES; - for (Command.DurableAndIdempotentListener listener : diff.listeners) - size += AccordKeyspace.LocalVersionedSerializers.listeners.serializedSize(listener); + if (getFieldIsNull(Fields.EXECUTES_AT_LEAST, flags)) + executeAtLeast = null; + else + executeAtLeast = CommandSerializers.timestamp.deserialize(in, userVersion); } - return (int) size; - } - @Override - public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) throws IOException - { - SavedDiff diff = (SavedDiff) value; - int flags = getFlags(diff); - - out.writeShort(flags); - - if (diff.txnId != null) - CommandSerializers.txnId.serialize(diff.txnId, out, userVersion); - if (diff.executeAt != null) - CommandSerializers.timestamp.serialize(diff.executeAt, out, userVersion); - if (diff.saveStatus != null) - out.writeInt(diff.saveStatus.ordinal()); - if (diff.durability != null) - out.writeInt(diff.durability.ordinal()); - - if (diff.acceptedOrCommitted != null) - CommandSerializers.ballot.serialize(diff.acceptedOrCommitted, out, userVersion); - if (diff.promised != null) - CommandSerializers.ballot.serialize(diff.promised, out, userVersion); - - if (diff.route != null) - AccordKeyspace.LocalVersionedSerializers.route.serialize(diff.route, out); // TODO (required): user version - if (diff.partialTxn != null) - CommandSerializers.partialTxn.serialize(diff.partialTxn, out, userVersion); - if (diff.partialDeps != null) - DepsSerializer.partialDeps.serialize(diff.partialDeps, out, userVersion); - if (diff.additionalKeysOrRanges != null) - KeySerializers.seekables.serialize(diff.additionalKeysOrRanges, out, userVersion); - - if (diff.waitingOn != null) + if (getFieldChanged(Fields.SAVE_STATUS, flags)) { - long size = WaitingOnSerializer.serializedSize(diff.waitingOn); - ByteBuffer serialized = WaitingOnSerializer.serialize(diff.txnId, diff.waitingOn); - out.writeInt((int) size); - out.write(serialized); + if (getFieldIsNull(Fields.SAVE_STATUS, flags)) + saveStatus = null; + else + saveStatus = SaveStatus.values()[in.readInt()]; + } + if (getFieldChanged(Fields.DURABILITY, flags)) + { + if (getFieldIsNull(Fields.DURABILITY, flags)) + durability = null; + else + durability = Status.Durability.values()[in.readInt()]; } - if (diff.writes != null) - CommandSerializers.writes.serialize(diff.writes, out, userVersion); + if (getFieldChanged(Fields.ACCEPTED, flags)) + { + if (getFieldIsNull(Fields.ACCEPTED, flags)) + acceptedOrCommitted = null; + else + acceptedOrCommitted = CommandSerializers.ballot.deserialize(in, userVersion); + } - if (diff.listeners != null && !diff.listeners.isEmpty()) + if (getFieldChanged(Fields.PROMISED, flags)) { - out.writeByte(diff.listeners.size()); - for (Command.DurableAndIdempotentListener listener : diff.listeners) - AccordKeyspace.LocalVersionedSerializers.listeners.serialize(listener, out); + if (getFieldIsNull(Fields.PROMISED, flags)) + promised = null; + else + promised = CommandSerializers.ballot.deserialize(in, userVersion); } - } + if (getFieldChanged(Fields.ROUTE, flags)) + { + if (getFieldIsNull(Fields.ROUTE, flags)) + route = null; + else + route = AccordKeyspace.LocalVersionedSerializers.route.deserialize(in); + } - private static int getFlags(SavedDiff diff) - { - int flags = 0; - - if (diff.txnId != null) - flags = setBit(flags, HasFields.TXN_ID.ordinal()); - if (diff.executeAt != null) - flags = setBit(flags, HasFields.EXECUTE_AT.ordinal()); - if (diff.saveStatus != null) - flags = setBit(flags, HasFields.SAVE_STATUS.ordinal()); - if (diff.durability != null) - flags = setBit(flags, HasFields.DURABILITY.ordinal()); - - if (diff.acceptedOrCommitted != null) - flags = setBit(flags, HasFields.ACCEPTED.ordinal()); - if (diff.promised != null) - flags = setBit(flags, HasFields.PROMISED.ordinal()); - - if (diff.route != null) - flags = setBit(flags, HasFields.ROUTE.ordinal()); - if (diff.partialTxn != null) - flags = setBit(flags, HasFields.PARTIAL_TXN.ordinal()); - if (diff.partialDeps != null) - flags = setBit(flags, HasFields.PARTIAL_DEPS.ordinal()); - if (diff.additionalKeysOrRanges != null) - flags = setBit(flags, HasFields.ADDITIONAL_KEYS.ordinal()); - - if (diff.waitingOn != null) - flags = setBit(flags, HasFields.WAITING_ON.ordinal()); - if (diff.writes != null) - flags = setBit(flags, HasFields.WRITES.ordinal()); - if (diff.listeners != null && !diff.listeners.isEmpty()) - flags = setBit(flags, HasFields.LISTENERS.ordinal()); - return flags; - } + if (getFieldChanged(Fields.PARTIAL_TXN, flags)) + { + if (getFieldIsNull(Fields.PARTIAL_TXN, flags)) + partialTxn = null; + else + partialTxn = CommandSerializers.partialTxn.deserialize(in, userVersion); + } - @Override - public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException - { - int flags = in.readShort(); - - TxnId txnId = null; - Timestamp executedAt = null; - SaveStatus saveStatus = null; - Status.Durability durability = null; - - Ballot acceptedOrCommitted = null; - Ballot promised = null; - Route route = null; - - PartialTxn partialTxn = null; - PartialDeps partialDeps = null; - Seekables additionalKeysOrRanges = null; - - WaitingOnProvider waitingOn = (txn, deps) -> null; - Writes writes = null; - Listeners.Immutable listeners = null; - - if (isSet(flags, HasFields.TXN_ID.ordinal())) - txnId = CommandSerializers.txnId.deserialize(in, userVersion); - if (isSet(flags, HasFields.EXECUTE_AT.ordinal())) - executedAt = CommandSerializers.timestamp.deserialize(in, userVersion); - if (isSet(flags, HasFields.SAVE_STATUS.ordinal())) - saveStatus = SaveStatus.values()[in.readInt()]; - if (isSet(flags, HasFields.DURABILITY.ordinal())) - durability = Status.Durability.values()[in.readInt()]; - - if (isSet(flags, HasFields.ACCEPTED.ordinal())) - acceptedOrCommitted = CommandSerializers.ballot.deserialize(in, userVersion); - if (isSet(flags, HasFields.PROMISED.ordinal())) - promised = CommandSerializers.ballot.deserialize(in, userVersion); - - if (isSet(flags, HasFields.ROUTE.ordinal())) - route = AccordKeyspace.LocalVersionedSerializers.route.deserialize(in); - if (isSet(flags, HasFields.PARTIAL_TXN.ordinal())) - partialTxn = CommandSerializers.partialTxn.deserialize(in, userVersion); - if (isSet(flags, HasFields.PARTIAL_DEPS.ordinal())) - partialDeps = DepsSerializer.partialDeps.deserialize(in, userVersion); - if (isSet(flags, HasFields.ADDITIONAL_KEYS.ordinal())) - additionalKeysOrRanges = KeySerializers.seekables.deserialize(in, userVersion); - - if (isSet(flags, HasFields.WAITING_ON.ordinal())) + if (getFieldChanged(Fields.PARTIAL_DEPS, flags)) { - int size = in.readInt(); - byte[] bytes = new byte[size]; - in.readFully(bytes); - ByteBuffer buffer = ByteBuffer.wrap(bytes); - waitingOn = (localTxnId, deps) -> { - try - { - return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps, deps.directKeyDeps, buffer); - } - catch (IOException e) - { - throw Throwables.unchecked(e); - } - }; + if (getFieldIsNull(Fields.PARTIAL_DEPS, flags)) + partialDeps = null; + else + partialDeps = DepsSerializer.partialDeps.deserialize(in, userVersion); } - if (isSet(flags, HasFields.WRITES.ordinal())) - writes = CommandSerializers.writes.deserialize(in, userVersion); - if (isSet(flags, HasFields.LISTENERS.ordinal())) + if (getFieldChanged(Fields.ADDITIONAL_KEYS, flags)) { - Listeners builder = Listeners.Immutable.EMPTY.mutable(); - int cnt = in.readByte(); - for (int i = 0; i < cnt; i++) - builder.add(AccordKeyspace.LocalVersionedSerializers.listeners.deserialize(in)); - listeners = new Listeners.Immutable(builder); + if (getFieldIsNull(Fields.ADDITIONAL_KEYS, flags)) + additionalKeysOrRanges = null; + else + additionalKeysOrRanges = KeySerializers.seekables.deserialize(in, userVersion); } - return new LoadedDiff(txnId, - executedAt, - saveStatus, - durability, + if (getFieldChanged(Fields.WAITING_ON, flags)) + { + if (getFieldIsNull(Fields.WAITING_ON, flags)) + { + waitingOn = null; + } + else + { + int size = in.readInt(); + byte[] bytes = new byte[size]; + in.readFully(bytes); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + waitingOn = (localTxnId, deps) -> { + try + { + return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps, deps.directKeyDeps, buffer); + } + catch (IOException e) + { + throw Throwables.unchecked(e); + } + }; + } + } - acceptedOrCommitted, - promised, + if (getFieldChanged(Fields.WRITES, flags)) + { + if (getFieldIsNull(Fields.WRITES, flags)) + writes = null; + else + writes = CommandSerializers.writes.deserialize(in, userVersion); + } - route, - partialTxn, - partialDeps, - additionalKeysOrRanges, + if (getFieldChanged(Fields.LISTENERS, flags)) + { + if (getFieldIsNull(Fields.LISTENERS, flags)) + { + listeners = null; + } + else + { + Listeners builder = Listeners.Immutable.EMPTY.mutable(); + int cnt = in.readByte(); + for (int i = 0; i < cnt; i++) + builder.add(AccordKeyspace.LocalVersionedSerializers.listeners.deserialize(in)); + listeners = new Listeners.Immutable(builder); + } + } + } - waitingOn, - writes, - listeners); + public void forceResult(Result newValue) + { + this.result = newValue; } - } - static int setBit(int value, int bit) - { - return value | (1 << bit); - } + public Command construct() throws IOException + { + if (!nextCalled) + return null; + + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + if (partialTxn != null) + attrs.partialTxn(partialTxn); + if (durability != null) + attrs.durability(durability); + if (route != null) + attrs.route(route); + if (partialDeps != null && + (saveStatus.known.deps != Status.KnownDeps.NoDeps && + saveStatus.known.deps != Status.KnownDeps.DepsErased && + saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) + attrs.partialDeps(partialDeps); + if (additionalKeysOrRanges != null) + attrs.additionalKeysOrRanges(additionalKeysOrRanges); + if (listeners != null && !listeners.isEmpty()) + attrs.setListeners(listeners); + + Command.WaitingOn waitingOn = null; + if (this.waitingOn != null) + waitingOn = this.waitingOn.provide(txnId, partialDeps); + + switch (saveStatus.status) + { + case NotDefined: + return saveStatus == SaveStatus.Uninitialised ? Command.NotDefined.uninitialised(attrs.txnId()) + : Command.NotDefined.notDefined(attrs, promised); + case PreAccepted: + return Command.PreAccepted.preAccepted(attrs, executeAt, promised); + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); + case Committed: + case Stable: + return Command.Committed.committed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn); + case PreApplied: + case Applied: + return Command.Executed.executed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn, writes, result); + case Truncated: + case Invalidated: + return truncated(attrs, saveStatus, executeAt, executeAtLeast, writes, result); + default: + throw new IllegalStateException(); + } + } - static boolean isSet(int value, int bit) - { - return (value & (1 << bit)) != 0; + private static Command.Truncated truncated(CommonAttributes.Mutable attrs, SaveStatus status, Timestamp executeAt, Timestamp executesAtLeast, Writes writes, Result result) + { + switch (status) + { + default: + throw illegalState("Unhandled SaveStatus: " + status); + case TruncatedApplyWithOutcome: + case TruncatedApplyWithDeps: + case TruncatedApply: + if (attrs.txnId().kind().awaitsOnlyDeps()) + return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, executesAtLeast); + return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, null); + case ErasedOrInvalidOrVestigial: + return Command.Truncated.erasedOrInvalidOrVestigial(attrs.txnId(), attrs.durability(), attrs.route()); + case Erased: + return Command.Truncated.erased(attrs.txnId(), attrs.durability(), attrs.route()); + case Invalidated: + return Command.Truncated.invalidated(attrs.txnId(), attrs.durableListeners()); + } + } + + public String toString() + { + return "Diff {" + + "txnId=" + txnId + + ", executeAt=" + executeAt + + ", saveStatus=" + saveStatus + + ", durability=" + durability + + ", acceptedOrCommitted=" + acceptedOrCommitted + + ", promised=" + promised + + ", route=" + route + + ", partialTxn=" + partialTxn + + ", partialDeps=" + partialDeps + + ", additionalKeysOrRanges=" + additionalKeysOrRanges + + ", waitingOn=" + waitingOn + + ", writes=" + writes + + ", listeners=" + listeners + + '}'; + } } public interface WaitingOnProvider diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 34d80250a249..0b984446dc27 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -251,34 +251,31 @@ protected boolean runInternal(boolean loadOnly) result = apply(safeStore); // TODO (required): currently, we are not very efficient about ensuring that we persist the absolute minimum amount of state. Improve that. - List diffs = null; + List> diffs = null; for (AccordSafeCommand commandState : context.commands.values()) { - SavedCommand.SavedDiff diff = commandState.diff(); - if (diff != null) + SavedCommand.Writer diff = commandState.diff(); + if (diff == null) + continue; + if (diffs == null) + diffs = new ArrayList<>(context.commands.size()); + diffs.add(diff); + if (CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED.getBoolean()) { - if (diffs == null) - diffs = new ArrayList<>(context.commands.size()); - diffs.add(diff); - if (CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED.getBoolean()) - { - if (sanityCheck == null) - sanityCheck = new ArrayList<>(context.commands.size()); - sanityCheck.add(commandState.current()); - } + if (sanityCheck == null) + sanityCheck = new ArrayList<>(context.commands.size()); + sanityCheck.add(commandState.current()); } } commandStore.completeOperation(safeStore); context.releaseResources(commandStore); + state(COMPLETING); if (diffs != null) { - state(COMPLETING); this.commandStore.appendCommands(diffs, sanityCheck, () -> finish(result, null)); return false; } - - state(COMPLETING); case COMPLETING: finish(result, null); case FINISHED: diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 9bd2e0082a65..fab09f235423 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -92,7 +92,7 @@ public static WaitingOn deserialize(TxnId txnId, Keys keys, RangeDeps directRang int a = VIntCoding.readUnsignedVInt32(in, position); position += TypeSizes.sizeofUnsignedVInt(a); int b = VIntCoding.readUnsignedVInt32(in, position); - position += TypeSizes.sizeofUnsignedVInt(a); + position += TypeSizes.sizeofUnsignedVInt(b); ImmutableBitSet waitingOn = deserialize(position, waitingOnLength, in); ImmutableBitSet appliedOrInvalidated = null; if (txnId.domain() == Routable.Domain.Range) @@ -110,7 +110,7 @@ private static ImmutableBitSet deserialize(int position, int length, ByteBuffer for (int i = 0 ; i < length ; ++i) { bits[i] = in.getLong(position); - position += 8; + position += Long.BYTES; } return ImmutableBitSet.SerializationSupport.construct(bits); } diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 3e1f75f62aa3..a955a9c315dc 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -118,7 +118,7 @@ public class CompactionAccordIteratorsTest private static final TxnId LT_TXN_ID = AccordTestUtils.txnId(EPOCH, HLC_START, NODE); private static final TxnId TXN_ID = AccordTestUtils.txnId(EPOCH, LT_TXN_ID.hlc() + 1, NODE); private static final TxnId SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read); - private static final TxnId RANGE_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read, Routable.Domain.Range); + private static final TxnId RANGE_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 2, NODE, Kind.Read, Routable.Domain.Range); private static final TxnId GT_TXN_ID = SECOND_TXN_ID; // For CommandsForKey where we test with two commands private static final TxnId[] TXN_IDS = new TxnId[]{ TXN_ID, SECOND_TXN_ID }; @@ -380,10 +380,10 @@ Consumer> expectAccordCommandsNoChange() }; } - private static RedundantBefore redundantBefore(TxnId txnId) { Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); + txnId = txnId.as(Kind.Read, Routable.Domain.Range); return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, LT_TXN_ID); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java index 0f2b9d395be2..2c6dc9221c19 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java @@ -20,7 +20,6 @@ import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Random; @@ -75,22 +74,10 @@ public void simpleKeyTest() for (int i = 0; i < 10_000; i++) { TxnId txnId = randomSource.nextBoolean() ? id1 : id2; - JournalKey key = new JournalKey(txnId, AccordJournal.Type.SAVED_COMMAND, randomSource.nextInt(5)); + JournalKey key = new JournalKey(txnId, randomSource.nextInt(5)); res.compute(key, (k, prev) -> prev == null ? 1 : prev + 1); accordJournal.appendCommand(key.commandStoreId, - Collections.singletonList(new SavedCommand.SavedDiff(txnId, - AccordGens.timestamps().next(randomSource), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null)), + Collections.singletonList(new SavedCommand.DiffWriter(txnId, null, null)), null, () -> {}); } @@ -98,8 +85,9 @@ public void simpleKeyTest() Runnable check = () -> { for (JournalKey key : res.keySet()) { - List diffs = accordJournal.loadDiffs(key.commandStoreId, key.timestamp); - Assert.assertEquals(diffs.size(), res.get(key).intValue()); + SavedCommand.Builder diffs = accordJournal.loadDiffs(key.commandStoreId, (TxnId) key.timestamp); + Assert.assertEquals(String.format("%d != %d for key %s", diffs.count(), res.get(key).intValue(), key), + diffs.count(), res.get(key).intValue()); } }; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java index 375fffa8fecb..75a07196e220 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java @@ -122,7 +122,6 @@ private static ByteBuffer toBuffer(JournalKey k) private Gen keyGen() { Gen txnIdGen = AccordGens.txnIds(); - Gen typeGen = Gens.enums().all(AccordJournal.Type.class); - return rs -> new JournalKey(txnIdGen.next(rs), typeGen.next(rs)); + return rs -> new JournalKey(txnIdGen.next(rs)); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 999ae875766e..bfbf4e509594 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -152,7 +152,7 @@ public static Command stable(TxnId txnId, PartialTxn txn, Timestamp executeAt) executeAt, Ballot.ZERO, Ballot.ZERO, - Command.WaitingOn.EMPTY); + Command.WaitingOn.empty(txnId.domain())); } private static FullRoute route(PartialTxn txn) @@ -517,12 +517,10 @@ public static void appendCommandsBlocking(AccordCommandStore commandStore, Comma public static void appendCommandsBlocking(AccordCommandStore commandStore, Command before, Command after) { - SavedCommand.SavedDiff diff = SavedCommand.diff(before, after); - if (diff != null) - { - Condition condition = Condition.newOneTimeCondition(); - commandStore.appendCommands(Collections.singletonList(diff), null, condition::signal); - condition.awaitUninterruptibly(30, TimeUnit.SECONDS); - } + SavedCommand.Writer diff = SavedCommand.diff(before, after); + if (diff == null) return; + Condition condition = Condition.newOneTimeCondition(); + commandStore.appendCommands(Collections.singletonList(diff), null, condition::signal); + condition.awaitUninterruptibly(30, TimeUnit.SECONDS); } } diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index d09ff6eefff0..8f3192afa5b0 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -18,52 +18,324 @@ package org.apache.cassandra.service.accord; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; +import java.util.function.Function; +import com.google.common.annotations.VisibleForTesting; + +import accord.api.Result; import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.Listeners; +import accord.local.SaveStatus; +import accord.local.Status; +import accord.primitives.Ballot; +import accord.primitives.PartialDeps; +import accord.primitives.PartialTxn; +import accord.primitives.Route; +import accord.primitives.Seekables; +import accord.primitives.Timestamp; import accord.primitives.TxnId; -import org.apache.cassandra.service.accord.AccordJournal.Type; +import accord.primitives.Writes; +import accord.utils.Invariants; +import org.apache.cassandra.service.accord.serializers.CommandSerializers; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; public class MockJournal implements IJournal { - private final Map> commands = new HashMap<>(); + private final Map> commands = new HashMap<>(); @Override public Command loadCommand(int commandStoreId, TxnId txnId) { - Type type = Type.SAVED_COMMAND; - JournalKey key = new JournalKey(txnId, type, commandStoreId); - List saved = commands.get(key); + JournalKey key = new JournalKey(txnId, commandStoreId); + List saved = commands.get(key); if (saved == null) return null; - return SavedCommand.reconstructFromDiff(new ArrayList<>(saved)); + return reconstructFromDiff(new ArrayList<>(saved)); } @Override - public void appendCommand(int commandStoreId, List diffs, List sanityCheck, Runnable onFlush) + public void appendCommand(int commandStoreId, List> diffs, List sanityCheck, Runnable onFlush) { - Type type = Type.SAVED_COMMAND; - for (SavedCommand.SavedDiff diff : diffs) + for (SavedCommand.Writer diff : diffs) { - JournalKey key = new JournalKey(diff.txnId, type, commandStoreId); + SavedCommand.DiffWriter writer = (SavedCommand.DiffWriter) diff; + + JournalKey key = new JournalKey(diff.key(), commandStoreId); commands.computeIfAbsent(key, (ignore_) -> new ArrayList<>()) - .add(new SavedCommand.LoadedDiff(diff.txnId, - diff.executeAt, - diff.saveStatus, - diff.durability, - diff.acceptedOrCommitted, - diff.promised, - diff.route, - diff.partialTxn, - diff.partialDeps, - diff.additionalKeysOrRanges, - (i1, i2) -> diff.waitingOn, - diff.writes, - diff.listeners)); + .add(diff(writer.before(), writer.after())); } onFlush.run(); } + + /** + * Emulating journal behaviour + */ + + public static LoadedDiff diff(Command before, Command after) + { + if (before == after) + return null; + + // TODO: we do not need to save `waitingOn` _every_ time. + Command.WaitingOn waitingOn = getWaitingOn(after); + return new LoadedDiff(after.txnId(), + ifNotEqual(before, after, Command::executeAt, true), + ifNotEqual(before, after, Command::saveStatus, false), + ifNotEqual(before, after, Command::durability, false), + + ifNotEqual(before, after, Command::acceptedOrCommitted, false), + ifNotEqual(before, after, Command::promised, false), + + ifNotEqual(before, after, Command::route, true), + ifNotEqual(before, after, Command::partialTxn, false), + ifNotEqual(before, after, Command::partialDeps, false), + ifNotEqual(before, after, Command::additionalKeysOrRanges, false), + + new NewValue<>((k, deps) -> waitingOn), + ifNotEqual(before, after, Command::writes, false), + ifNotEqual(before, after, Command::durableListeners, true)); + } + + static Command reconstructFromDiff(List diffs) + { + return reconstructFromDiff(diffs, CommandSerializers.APPLIED); + } + + /** + * @param result is exposed because we are _not_ persisting result, since during loading or replay + * we do not expect we will have to send a result to the client, and data results + * can potentially contain a large number of entries, so it's best if they are not + * written into the log. + */ + @VisibleForTesting + static Command reconstructFromDiff(List diffs, Result result) + { + TxnId txnId = null; + + Timestamp executeAt = null; + SaveStatus saveStatus = null; + Status.Durability durability = null; + + Ballot acceptedOrCommitted = Ballot.ZERO; + Ballot promised = null; + + Route route = null; + PartialTxn partialTxn = null; + PartialDeps partialDeps = null; + Seekables additionalKeysOrRanges = null; + + SavedCommand.WaitingOnProvider waitingOnProvider = null; + Writes writes = null; + Listeners.Immutable listeners = null; + + for (LoadedDiff diff : diffs) + { + if (diff.txnId != null) + txnId = diff.txnId; + if (diff.executeAt != null) + executeAt = diff.executeAt.get(); + if (diff.saveStatus != null) + saveStatus = diff.saveStatus.get(); + if (diff.durability != null) + durability = diff.durability.get(); + + if (diff.acceptedOrCommitted != null) + acceptedOrCommitted = diff.acceptedOrCommitted.get(); + if (diff.promised != null) + promised = diff.promised.get(); + + if (diff.route != null) + route = diff.route.get(); + if (diff.partialTxn != null) + partialTxn = diff.partialTxn.get(); + if (diff.partialDeps != null) + partialDeps = diff.partialDeps.get(); + if (diff.additionalKeysOrRanges != null) + additionalKeysOrRanges = diff.additionalKeysOrRanges.get(); + + if (diff.waitingOn != null) + waitingOnProvider = diff.waitingOn.get(); + if (diff.writes != null) + writes = diff.writes.get(); + if (diff.listeners != null) + listeners = diff.listeners.get(); + } + + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); + if (partialTxn != null) + attrs.partialTxn(partialTxn); + if (durability != null) + attrs.durability(durability); + if (route != null) + attrs.route(route); + if (partialDeps != null && + (saveStatus.known.deps != Status.KnownDeps.NoDeps && + saveStatus.known.deps != Status.KnownDeps.DepsErased && + saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) + attrs.partialDeps(partialDeps); + if (additionalKeysOrRanges != null) + attrs.additionalKeysOrRanges(additionalKeysOrRanges); + if (listeners != null && !listeners.isEmpty()) + attrs.setListeners(listeners); + + Command.WaitingOn waitingOn = null; + if (waitingOnProvider != null) + waitingOn = waitingOnProvider.provide(txnId, partialDeps); + + Invariants.checkState(saveStatus != null, + "Save status is null after applying %s", diffs); + switch (saveStatus.status) + { + case NotDefined: + return saveStatus == SaveStatus.Uninitialised ? Command.NotDefined.uninitialised(attrs.txnId()) + : Command.NotDefined.notDefined(attrs, promised); + case PreAccepted: + return Command.PreAccepted.preAccepted(attrs, executeAt, promised); + case AcceptedInvalidate: + case Accepted: + case PreCommitted: + return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); + case Committed: + case Stable: + return Command.Committed.committed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn); + case PreApplied: + case Applied: + return Command.Executed.executed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn, writes, result); + case Truncated: + case Invalidated: + default: + throw new IllegalStateException(); + } + } + + // TODO (required): this convert function was added only because AsyncOperationTest was failing without it; + // maybe after switching to loading from the log we can just pass l and r directly or remove != null checks. + private static NewValue ifNotEqual(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch) + { + VAL l = null; + VAL r = null; + if (lo != null) l = convert.apply(lo); + if (ro != null) r = convert.apply(ro); + + if (l == r) + return null; // null here means there was no change + + if (l == null || r == null) + return NewValue.of(r); + + assert allowClassMismatch || l.getClass() == r.getClass() : String.format("%s != %s", l.getClass(), r.getClass()); + + if (l.equals(r)) + return null; + + return NewValue.of(r); + } + + + public static class NewValue + { + final T value; + + private NewValue(T value) + { + this.value = value; + } + + public boolean isNull() + { + return value == null; + } + + public T get() + { + return value; + } + + public static NewValue of(T value) + { + return new NewValue<>(value); + } + + public String toString() + { + return "" + value; + } + } + + static Command.WaitingOn getWaitingOn(Command command) + { + if (command instanceof Command.Committed) + return command.asCommitted().waitingOn(); + + return null; + } + + public static class LoadedDiff extends SavedCommand + { + public final TxnId txnId; + + public final NewValue executeAt; + public final NewValue saveStatus; + public final NewValue durability; + + public final NewValue acceptedOrCommitted; + public final NewValue promised; + + public final NewValue> route; + public final NewValue partialTxn; + public final NewValue partialDeps; + public final NewValue> additionalKeysOrRanges; + + public final NewValue writes; + public final NewValue> listeners; + public final NewValue waitingOn; + + public LoadedDiff(TxnId txnId, + NewValue executeAt, + NewValue saveStatus, + NewValue durability, + + NewValue acceptedOrCommitted, + NewValue promised, + + NewValue> route, + NewValue partialTxn, + NewValue partialDeps, + NewValue> additionalKeysOrRanges, + + NewValue waitingOn, + NewValue writes, + NewValue> listeners) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.saveStatus = saveStatus; + this.durability = durability; + + this.acceptedOrCommitted = acceptedOrCommitted; + this.promised = promised; + + this.route = route; + this.partialTxn = partialTxn; + this.partialDeps = partialDeps; + this.additionalKeysOrRanges = additionalKeysOrRanges; + + this.writes = writes; + this.listeners = listeners; + + this.waitingOn = waitingOn; + } + + public String toString() + { + return "LoadedDiff{" + + "waitingOn=" + waitingOn + + '}'; + } + } } diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java new file mode 100644 index 000000000000..5135bde35f8c --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.EnumSet; +import java.util.Set; + +import com.google.common.collect.Sets; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.Command; +import accord.local.SaveStatus; +import accord.primitives.TxnId; +import accord.utils.Gen; +import accord.utils.LazyToString; +import accord.utils.ReflectionUtils; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.SavedCommand.Fields; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.AccordGenerators; +import org.assertj.core.api.SoftAssertions; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.SavedCommand.getFlags; + +public class SavedCommandTest +{ + private static final EnumSet ALL = EnumSet.allOf(Fields.class); + + @BeforeClass + public static void beforeClass() throws Throwable + { + SchemaLoader.prepareServer(); + SchemaLoader.createKeyspace("ks", KeyspaceParams.simple(1), + parse("CREATE TABLE tbl (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'", "ks")); + TableMetadata tbl = Schema.instance.getTableMetadata("ks", "tbl"); + Assert.assertEquals(TransactionalMode.full, tbl.params.transactionalMode); + StorageService.instance.initServer(); + } + + @Test + public void allNull() + { + int flags = getFlags(null, null); + assertMissing(flags, ALL); + } + + @Test + public void simpleNullChangeCheck() + { + int flags = getFlags(null, Command.NotDefined.uninitialised(TxnId.NONE)); + EnumSet has = EnumSet.of(Fields.TXN_ID, Fields.SAVE_STATUS, Fields.DURABILITY, Fields.PROMISED, + Fields.ACCEPTED /* this is Zero... which kinda means null... */); + Set missing = Sets.difference(ALL, has); + assertHas(flags, has); + assertMissing(flags, missing); + } + + @Test + public void serde() + { + Gen gen = AccordGenerators.commandsBuilder(); + try (DataOutputBuffer out = new DataOutputBuffer()) + { + qt().forAll(gen).check(cmdBuilder -> { + int userVersion = 1; //TODO (maintance): where can we fetch all supported versions? + SoftAssertions checks = new SoftAssertions(); + for (SaveStatus saveStatus : SaveStatus.values()) + { + if (saveStatus == SaveStatus.TruncatedApplyWithDeps) continue; + out.clear(); + Command orig = cmdBuilder.build(saveStatus); + SavedCommand.serialize(null, orig, out, userVersion); + SavedCommand.Builder builder = new SavedCommand.Builder(); + builder.deserializeNext(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), userVersion); + // We are not persisting the result, so force it for strict equality + builder.forceResult(orig.result()); + + Command reconstructed = builder.construct(); + + checks.assertThat(reconstructed) + .describedAs("lhs=expected\nrhs=actual\n%s", new LazyToString(() -> ReflectionUtils.recursiveEquals(orig, reconstructed).toString())) + .isEqualTo(orig); + } + checks.assertAll(); + }); + } + } + + private void assertHas(int flags, Set missing) + { + SoftAssertions checks = new SoftAssertions(); + for (Fields field : missing) + { + checks.assertThat(SavedCommand.getFieldChanged(field, flags)) + .describedAs("field %s changed", field). + isTrue(); + checks.assertThat(SavedCommand.getFieldIsNull(field, flags)) + .describedAs("field %s not null", field) + .isFalse(); + } + checks.assertAll(); + } + + private void assertMissing(int flags, Set missing) + { + SoftAssertions checks = new SoftAssertions(); + for (Fields field : missing) + { + checks.assertThat(SavedCommand.getFieldChanged(field, flags)) + .describedAs("field %s changed", field) + .isFalse(); + checks.assertThat(SavedCommand.getFieldIsNull(field, flags)) + .describedAs("field %s not null", field) + .isTrue(); + } + checks.assertAll(); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 11ed33b42fab..f80e036b5061 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -143,6 +143,21 @@ public void optionalCommandTest() throws Throwable Assert.assertTrue(result.isEmpty()); } + @Test + public void touchUnknownTxn() throws Throwable + { + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + TxnId txnId = txnId(1, clock.incrementAndGet(), 1); + + getUninterruptibly(commandStore.execute(contextFor(txnId), safe -> { + SafeCommand command = safe.get(txnId, txnId, safe.ranges().currentRanges()); + Assert.assertNotNull(command); + })); + + UntypedResultSet result = AccordKeyspace.loadCommandRow(commandStore, txnId); + Assert.assertTrue(result.isEmpty()); + } + @Test public void optionalCommandsForKeyTest() throws Throwable { diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 81462c0e6552..c2a2b32af159 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -166,12 +166,12 @@ Command toCommand() case Stable: case ReadyToExecute: - return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.EMPTY); + return Command.SerializerSupport.committed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.empty(txnId.domain())); case PreApplied: case Applying: case Applied: - return Command.SerializerSupport.executed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.EMPTY, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); + return Command.SerializerSupport.executed(attributes(), saveStatus, executeAt, ballot, ballot, Command.WaitingOn.empty(txnId.domain()), new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); case TruncatedApplyWithDeps: case TruncatedApply: diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index f83a1099773d..ce5aabba9fd7 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -73,7 +73,7 @@ private static Gen waitingOnGen() Gen sets = Gens.enums().all(WaitingOnSets.class); return rs -> { Deps deps = depsGen.next(rs); - if (deps.isEmpty()) return Command.WaitingOn.EMPTY; + if (deps.isEmpty()) return Command.WaitingOn.empty(Routable.Domain.Key); int txnIdCount = deps.rangeDeps.txnIdCount() + deps.directKeyDeps.txnIdCount(); int keyCount = deps.keyDeps.keys().size(); int[] selected = Gens.arrays(Gens.ints().between(0, txnIdCount + keyCount - 1)).unique().ofSizeBetween(0, txnIdCount + keyCount).next(rs); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index f587fd85af30..19c21f321576 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -27,22 +28,30 @@ import java.util.stream.Stream; import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.Listeners; import accord.local.RedundantBefore; +import accord.local.SaveStatus; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.KeyDeps; +import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Range; import accord.primitives.RangeDeps; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Writes; import accord.utils.AccordGens; import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; +import accord.utils.TriFunction; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.dht.IPartitioner; @@ -52,8 +61,11 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnWrite; import org.quicktheories.impl.JavaRandom; +import static accord.local.Status.Durability.NotDurable; import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; @@ -71,7 +83,7 @@ public static Gen partitioner() } private enum SupportedCommandTypes - {notDefined, preaccepted, committed} + {notDefined, preaccepted, committed, stable} public static Gen commands() { @@ -81,13 +93,18 @@ public static Gen commands() //TODO goes against fuzz testing, and also limits to a very specific table existing... // There is a branch that can generate random transactions, so maybe look into that? PartialTxn txn = createPartialTxn(0); - FullRoute route = txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null)); return rs -> { TxnId id = ids.next(rs); - Timestamp executeAt = id; + TxnId executeAt = id; if (rs.nextBoolean()) executeAt = ids.next(rs); + if (executeAt.compareTo(id) < 0) + { + TxnId tmp = id; + id = executeAt; + executeAt = tmp; + } SupportedCommandTypes targetType = supportedTypes.next(rs); switch (targetType) { @@ -97,12 +114,159 @@ public static Gen commands() return AccordTestUtils.Commands.preaccepted(id, txn, executeAt); case committed: return AccordTestUtils.Commands.committed(id, txn, executeAt); + case stable: + return AccordTestUtils.Commands.stable(id, txn, executeAt); default: throw new UnsupportedOperationException("Unexpected type: " + targetType); } }; } + public enum RecoveryStatus { None, Started, Complete } + + public static Gen commandsBuilder() + { + return commandsBuilder(AccordGens.txnIds(), Gens.bools().all(), Gens.enums().all(RecoveryStatus.class), (rs, txnId, txn) -> AccordGens.depsFor(txnId, txn).next(rs)); + } + + public static Gen commandsBuilder(Gen txnIdGen, Gen fastPath, Gen recover, TriFunction depsGen) + { + return rs -> { + TxnId txnId = txnIdGen.next(rs); + Txn txn = AccordTestUtils.createTxn(0, 0); + Deps deps = depsGen.apply(rs, txnId, txn); + Timestamp executeAt = fastPath.next(rs) ? txnId + : AccordGens.timestamps(AccordGens.epochs(txnId.epoch()), + AccordGens.hlcs(txnId.hlc()), + AccordGens.flags(), + RandomSource::nextInt).next(rs); + Ranges slice = AccordTestUtils.fullRange(txn); + PartialTxn partialTxn = txn.slice(slice, true); //TODO (correctness): find the case where includeQuery=false and replicate + PartialDeps partialDeps = deps.intersecting(slice); + Ballot promised; + Ballot accepted; + switch (recover.next(rs)) + { + case None: + { + promised = Ballot.ZERO; + accepted = Ballot.ZERO; + } + break; + case Started: + { + promised = AccordGens.ballot(AccordGens.epochs(executeAt.epoch()), + AccordGens.hlcs(executeAt.hlc()), + AccordGens.flags(), + RandomSource::nextInt).next(rs); + accepted = Ballot.ZERO; + } + break; + case Complete: + { + promised = accepted = AccordGens.ballot(AccordGens.epochs(executeAt.epoch()), + AccordGens.hlcs(executeAt.hlc()), + AccordGens.flags(), + RandomSource::nextInt).next(rs); + } + break; + default: + throw new UnsupportedOperationException(); + } + + Command.WaitingOn waitingOn = Command.WaitingOn.none(txnId.domain(), deps); + return new CommandBuilder(txnId, txn, executeAt, partialTxn, partialDeps, promised, accepted, waitingOn); + }; + } + + public static class CommandBuilder + { + public final TxnId txnId; + public final FullRoute route; + public final Seekables keysOrRanges; + private final Timestamp executeAt; + private final PartialTxn partialTxn; + private final PartialDeps partialDeps; + private final Ballot promised, accepted; + private final Command.WaitingOn waitingOn; + + public CommandBuilder(TxnId txnId, Txn txn, Timestamp executeAt, PartialTxn partialTxn, PartialDeps partialDeps, Ballot promised, Ballot accepted, Command.WaitingOn waitingOn) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.partialTxn = partialTxn; + this.partialDeps = partialDeps; + this.promised = promised; + this.accepted = accepted; + this.waitingOn = waitingOn; + this.route = txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null)); + this.keysOrRanges = txn.keys(); + } + + private CommonAttributes attributes(SaveStatus saveStatus) + { + CommonAttributes.Mutable mutable = new CommonAttributes.Mutable(txnId); + if (saveStatus.known.isDefinitionKnown()) + mutable.partialTxn(partialTxn); + if (saveStatus.known.deps.hasProposedOrDecidedDeps()) + mutable.partialDeps(partialDeps); + + mutable.route(route); + mutable.durability(NotDurable); + + return mutable; + } + + public Command build(SaveStatus saveStatus) + { + switch (saveStatus) + { + default: throw new AssertionError("Unhandled saveStatus: " + saveStatus); + case TruncatedApplyWithDeps: + throw new IllegalArgumentException("TruncatedApplyWithDeps is not a valid state for a Command to be in, its for FetchData"); + case Uninitialised: + case NotDefined: + return Command.SerializerSupport.notDefined(attributes(saveStatus), Ballot.ZERO); + case PreAccepted: + return Command.SerializerSupport.preaccepted(attributes(saveStatus), executeAt, Ballot.ZERO); + case Accepted: + case AcceptedInvalidate: + case AcceptedWithDefinition: + case AcceptedInvalidateWithDefinition: + case PreCommittedWithDefinition: + case PreCommittedWithDefinitionAndAcceptedDeps: + case PreCommittedWithAcceptedDeps: + case PreCommitted: + return Command.SerializerSupport.accepted(attributes(saveStatus), saveStatus, executeAt, promised, accepted); + + case Committed: + return Command.SerializerSupport.committed(attributes(saveStatus), saveStatus, executeAt, promised, accepted, null); + + case Stable: + case ReadyToExecute: + return Command.SerializerSupport.committed(attributes(saveStatus), saveStatus, executeAt, promised, accepted, waitingOn); + + case PreApplied: + case Applying: + case Applied: + return Command.SerializerSupport.executed(attributes(saveStatus), saveStatus, executeAt, promised, accepted, waitingOn, new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)), new TxnData()); + + case TruncatedApply: + if (txnId.kind().awaitsOnlyDeps()) return Command.SerializerSupport.truncatedApply(attributes(saveStatus), saveStatus, executeAt, null, null, txnId); + else return Command.SerializerSupport.truncatedApply(attributes(saveStatus), saveStatus, executeAt, null, null); + + case TruncatedApplyWithOutcome: + if (txnId.kind().awaitsOnlyDeps()) return Command.SerializerSupport.truncatedApply(attributes(saveStatus), saveStatus, executeAt, new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)), new TxnData(), txnId); + else return Command.SerializerSupport.truncatedApply(attributes(saveStatus), saveStatus, executeAt, new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)), new TxnData()); + + case Erased: + case ErasedOrInvalidOrVestigial: + case Invalidated: + return Command.SerializerSupport.invalidated(txnId, Listeners.Immutable.EMPTY); + } + } + } + public static Gen keys() { return keys(fromQT(CassandraGenerators.TABLE_ID_GEN), @@ -186,7 +350,7 @@ public static Gen ranges(Gen> tableIdGen, Gen while (offset.compareTo(size) < 0) { BigInteger end = offset.add(update); - TokenRange r = (TokenRange) splitter.subRange(range, offset, end); + TokenRange r = splitter.subRange(range, offset, end); for (TableId id : tables) { ranges.add(r.withTable(id)); From 2b01b5fa79369179cb264ddcb4b3013a878efa04 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Tue, 16 Jul 2024 17:40:49 -0500 Subject: [PATCH 135/340] Command to Exclude Replicas from Durability Status Coordination patch by Caleb Rackliffe; reviewed by David Capwell, Sam Tunnicliffe, and Benedict Elliott Smith for CASSANDRA-19321 --- modules/accord | 2 +- .../cassandra/service/CassandraDaemon.java | 2 + .../service/accord/AccordOperations.java | 76 +++++++++ .../service/accord/AccordOperationsMBean.java | 31 ++++ .../service/accord/AccordStaleReplicas.java | 137 ++++++++++++++++ .../service/accord/AccordTopology.java | 25 ++- .../serializers/TopologySerializers.java | 5 +- .../apache/cassandra/tcm/ClusterMetadata.java | 80 ++++++++-- .../apache/cassandra/tcm/MetadataKeys.java | 2 + .../tcm/StubClusterMetadataService.java | 4 +- .../apache/cassandra/tcm/Transformation.java | 4 + .../tcm/compatibility/GossipHelper.java | 10 +- .../cassandra/tcm/serialization/Version.java | 1 + .../transformations/AccordMarkRejoining.java | 123 ++++++++++++++ .../tcm/transformations/AccordMarkStale.java | 151 ++++++++++++++++++ .../org/apache/cassandra/tools/NodeProbe.java | 10 ++ .../org/apache/cassandra/tools/NodeTool.java | 7 + .../cassandra/tools/nodetool/AccordAdmin.java | 69 ++++++++ .../mock/nodetool/InternalNodeProbe.java | 2 + .../test/accord/AccordNodetoolTest.java | 99 ++++++++++++ .../test/log/ClusterMetadataTestHelper.java | 10 +- .../cassandra/locator/MetaStrategyTest.java | 10 +- .../accord/AccordFastPathCoordinatorTest.java | 17 +- .../accord/AccordStaleReplicasTest.java | 54 +++++++ .../ClusterMetadataTransformationTest.java | 2 + .../AsymmetricMetadataSerializers.java | 42 +++++ .../AccordMarkRejoiningTest.java | 56 +++++++ .../transformations/AccordMarkStaleTest.java | 56 +++++++ 28 files changed, 1047 insertions(+), 40 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordOperations.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordOperationsMBean.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/AccordMarkRejoining.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/AccordMarkStale.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/AccordAdmin.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordNodetoolTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/serialization/AsymmetricMetadataSerializers.java create mode 100644 test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java diff --git a/modules/accord b/modules/accord index 449b2b4d0bf4..81c02769f9ad 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 449b2b4d0bf4bb44d55a3c57f712a4d5a15e7220 +Subproject commit 81c02769f9ad73ef3aba0675c2217fc74b8a4a4c diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 57f38e93e9a0..45740f19169a 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -78,6 +78,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.security.ThreadAwareSecurityManager; +import org.apache.cassandra.service.accord.AccordOperations; import org.apache.cassandra.service.paxos.PaxosState; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.streaming.StreamManager; @@ -269,6 +270,7 @@ protected void setup() Startup.initialize(DatabaseDescriptor.getSeeds()); disableAutoCompaction(Schema.instance.distributedKeyspaces().names()); CMSOperations.initJmx(); + AccordOperations.initJmx(); if (ClusterMetadata.current().myNodeId() != null) RegistrationStatus.instance.onRegistration(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordOperations.java b/src/java/org/apache/cassandra/service/accord/AccordOperations.java new file mode 100644 index 000000000000..e7820919e170 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordOperations.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.transformations.AccordMarkStale; +import org.apache.cassandra.tcm.transformations.AccordMarkRejoining; +import org.apache.cassandra.utils.MBeanWrapper; + +public class AccordOperations implements AccordOperationsMBean +{ + public static final String MBEAN_OBJECT_NAME = "org.apache.cassandra.service.accord:type=AccordOperations"; + public static final AccordOperations instance = new AccordOperations(ClusterMetadataService.instance()); + + private final ClusterMetadataService cms; + + public static void initJmx() + { + MBeanWrapper.instance.registerMBean(instance, MBEAN_OBJECT_NAME); + } + + private AccordOperations(ClusterMetadataService cms) + { + this.cms = cms; + } + + @Override + public Map describe() + { + Map info = new HashMap<>(); + ClusterMetadata metadata = ClusterMetadata.current(); + + info.put("EPOCH", Long.toString(metadata.epoch.getEpoch())); + String staleReplicas = metadata.accordStaleReplicas.ids().stream().sorted().map(Object::toString).collect(Collectors.joining(",")); + info.put("STALE_REPLICAS", staleReplicas); + return info; + } + + @Override + public void accordMarkStale(List nodeIdStrings) + { + Set nodeIds = nodeIdStrings.stream().map(NodeId::fromString).collect(Collectors.toSet()); + cms.commit(new AccordMarkStale(nodeIds)); + } + + @Override + public void accordMarkRejoining(List nodeIdStrings) + { + Set nodeIds = nodeIdStrings.stream().map(NodeId::fromString).collect(Collectors.toSet()); + cms.commit(new AccordMarkRejoining(nodeIds)); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordOperationsMBean.java b/src/java/org/apache/cassandra/service/accord/AccordOperationsMBean.java new file mode 100644 index 000000000000..e0b0884733c3 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordOperationsMBean.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.List; +import java.util.Map; + +public interface AccordOperationsMBean +{ + Map describe(); + + void accordMarkStale(List nodeIds); + + void accordMarkRejoining(List nodeIds); +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java b/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java new file mode 100644 index 000000000000..2502fa1a6324 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; + +import javax.annotation.concurrent.Immutable; + +import com.google.common.collect.ImmutableSet; + +import accord.local.Node; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.serializers.TopologySerializers; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MetadataValue; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.CollectionSerializers; + +@Immutable +public class AccordStaleReplicas implements MetadataValue +{ + public static final AccordStaleReplicas EMPTY = new AccordStaleReplicas(ImmutableSet.of(), Epoch.EMPTY); + + private final Set staleIds; + private final Epoch lastModified; + + AccordStaleReplicas(Set staleIds, Epoch lastModified) + { + this.staleIds = staleIds; + this.lastModified = lastModified; + } + + @Override + public AccordStaleReplicas withLastModified(Epoch epoch) + { + return new AccordStaleReplicas(staleIds, epoch); + } + + @Override + public Epoch lastModified() + { + return lastModified; + } + + public AccordStaleReplicas withNodeIds(Set ids) + { + ImmutableSet.Builder builder = new ImmutableSet.Builder<>(); + Set newIds = builder.addAll(staleIds).addAll(ids).build(); + return new AccordStaleReplicas(newIds, lastModified); + } + + public AccordStaleReplicas without(Set ids) + { + ImmutableSet.Builder builder = new ImmutableSet.Builder<>(); + + for (Node.Id staleId : staleIds) + if (!ids.contains(staleId)) + builder.add(staleId); + + return new AccordStaleReplicas(builder.build(), lastModified); + } + + public boolean contains(Node.Id nodeId) + { + return staleIds.contains(nodeId); + } + + public Set ids() + { + return staleIds; + } + + @Override + public String toString() + { + return "AccordStaleReplicas{staleIds=" + staleIds + ", lastModified=" + lastModified + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordStaleReplicas that = (AccordStaleReplicas) o; + return Objects.equals(staleIds, that.staleIds) && Objects.equals(lastModified, that.lastModified); + } + + @Override + public int hashCode() + { + return Objects.hash(staleIds, lastModified); + } + + public static final MetadataSerializer serializer = new MetadataSerializer<>() + { + @Override + public void serialize(AccordStaleReplicas replicas, DataOutputPlus out, Version version) throws IOException + { + CollectionSerializers.serializeCollection(replicas.staleIds, out, version, TopologySerializers.nodeId); + Epoch.serializer.serialize(replicas.lastModified, out, version); + } + + @Override + public AccordStaleReplicas deserialize(DataInputPlus in, Version version) throws IOException + { + return new AccordStaleReplicas(CollectionSerializers.deserializeSet(in, version, TopologySerializers.nodeId), + Epoch.serializer.deserialize(in, version)); + } + + @Override + public long serializedSize(AccordStaleReplicas replicas, Version version) + { + return CollectionSerializers.serializedCollectionSize(replicas.staleIds, version, TopologySerializers.nodeId) + + Epoch.serializer.serializedSize(replicas.lastModified, version); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index 7bbfc0c250ce..45f3b3fd76dd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -78,7 +78,7 @@ private Shard createOrReuse(accord.primitives.Range range, SortedArrayList range; @@ -139,7 +139,7 @@ private static KeyspaceShard forRange(KeyspaceMetadata keyspace, Range ra return new KeyspaceShard(keyspace, range, nodes, pending); } - public static List forKeyspace(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory, ShardLookup lookup) + public static List forKeyspace(KeyspaceMetadata keyspace, DataPlacements placements, Directory directory) { ReplicationParams replication = keyspace.params.replication; DataPlacement placement = placements.get(replication); @@ -154,6 +154,16 @@ public static List forKeyspace(KeyspaceMetadata keyspace, DataPla } return shards; } + + public List nodes() + { + return nodes; + } + + public Range range() + { + return range; + } } static TokenRange minRange(TableId table, Token token) @@ -219,7 +229,9 @@ private static Map createDCMap(Directory directory) return builder.build(); } - public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, Directory directory, AccordFastPath accordFastPath, ShardLookup lookup) + public static Topology createAccordTopology(Epoch epoch, DistributedSchema schema, DataPlacements placements, + Directory directory, AccordFastPath accordFastPath, ShardLookup lookup, + AccordStaleReplicas staleReplicas) { List shards = new ArrayList<>(); Set unavailable = accordFastPath.unavailableIds(); @@ -230,17 +242,18 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem List tables = keyspace.tables.stream().filter(TableMetadata::requiresAccordSupport).collect(Collectors.toList()); if (tables.isEmpty()) continue; - List ksShards = KeyspaceShard.forKeyspace(keyspace, placements, directory, lookup); + List ksShards = KeyspaceShard.forKeyspace(keyspace, placements, directory); tables.forEach(table -> ksShards.forEach(shard -> shards.add(shard.createForTable(table, unavailable, dcMap, lookup)))); } shards.sort((a, b) -> a.range.compare(b.range)); - return new Topology(epoch.getEpoch(), shards.toArray(new Shard[0])); + + return new Topology(epoch.getEpoch(), staleReplicas.ids(), shards.toArray(new Shard[0])); } public static Topology createAccordTopology(ClusterMetadata metadata, ShardLookup lookup) { - return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, metadata.accordFastPath, lookup); + return createAccordTopology(metadata.epoch, metadata.schema, metadata.placements, metadata.directory, metadata.accordFastPath, lookup, metadata.accordStaleReplicas); } public static Topology createAccordTopology(ClusterMetadata metadata, Topology current) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java index 782ecbf5ed5d..73708c125fa9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -159,6 +159,7 @@ public void serialize(Topology topology, DataOutputPlus out, int version) throws { out.writeLong(topology.epoch()); ArraySerializers.serializeArray(topology.unsafeGetShards(), out, version, shard); + CollectionSerializers.serializeCollection(topology.staleIds(), out, version, TopologySerializers.nodeId); } @Override @@ -166,7 +167,8 @@ public Topology deserialize(DataInputPlus in, int version) throws IOException { long epoch = in.readLong(); Shard[] shards = ArraySerializers.deserializeArray(in, version, shard, Shard[]::new); - return new Topology(epoch, shards); + Set staleIds = CollectionSerializers.deserializeSet(in, version, TopologySerializers.nodeId); + return new Topology(epoch, staleIds, shards); } @Override @@ -175,6 +177,7 @@ public long serializedSize(Topology topology, int version) long size = 0; size += TypeSizes.LONG_SIZE; // epoch size += ArraySerializers.serializedArraySize(topology.unsafeGetShards(), version, shard); + size += CollectionSerializers.serializedCollectionSize(topology.staleIds(), version, TopologySerializers.nodeId); return size; } }; diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 95c3e1ba2cb2..9c497e2d1524 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -57,6 +58,8 @@ import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordStaleReplicas; +import org.apache.cassandra.service.accord.AccordTopology; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.tcm.extensions.ExtensionKey; @@ -100,6 +103,7 @@ public class ClusterMetadata public final InProgressSequences inProgressSequences; public final ConsensusMigrationState consensusMigrationState; public final ImmutableMap, ExtensionValue> extensions; + public final AccordStaleReplicas accordStaleReplicas; // This isn't serialized as part of ClusterMetadata it's really just a view over the Directory. public final Locator locator; @@ -135,7 +139,8 @@ public ClusterMetadata(IPartitioner partitioner, Directory directory, Distribute LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } public ClusterMetadata(Epoch epoch, @@ -148,7 +153,8 @@ public ClusterMetadata(Epoch epoch, LockedRanges lockedRanges, InProgressSequences inProgressSequences, ConsensusMigrationState consensusMigrationState, - Map, ExtensionValue> extensions) + Map, ExtensionValue> extensions, + AccordStaleReplicas accordStaleReplicas) { this(EMPTY_METADATA_IDENTIFIER, epoch, @@ -161,7 +167,8 @@ public ClusterMetadata(Epoch epoch, lockedRanges, inProgressSequences, consensusMigrationState, - extensions); + extensions, + accordStaleReplicas); } private ClusterMetadata(int metadataIdentifier, @@ -175,7 +182,8 @@ private ClusterMetadata(int metadataIdentifier, LockedRanges lockedRanges, InProgressSequences inProgressSequences, ConsensusMigrationState consensusMigrationState, - Map, ExtensionValue> extensions) + Map, ExtensionValue> extensions, + AccordStaleReplicas accordStaleReplicas) { // TODO: token map is a feature of the specific placement strategy, and so may not be a relevant component of // ClusterMetadata in the long term. We need to consider how the actual components of metadata can be evolved @@ -194,16 +202,17 @@ private ClusterMetadata(int metadataIdentifier, this.consensusMigrationState = consensusMigrationState; this.extensions = ImmutableMap.copyOf(extensions); this.locator = Locator.usingDirectory(directory); + this.accordStaleReplicas = accordStaleReplicas; } public ClusterMetadata withDirectory(Directory directory) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions, accordStaleReplicas); } public ClusterMetadata withPlacements(DataPlacements placements) { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions, accordStaleReplicas); } public Set fullCMSMembers() @@ -259,7 +268,8 @@ public ClusterMetadata forceEpoch(Epoch epoch) capLastModified(lockedRanges, epoch), capLastModified(inProgressSequences, epoch), capLastModified(consensusMigrationState, epoch), - capLastModified(extensions, epoch)); + capLastModified(extensions, epoch), + capLastModified(accordStaleReplicas, epoch)); } public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) @@ -281,7 +291,8 @@ public ClusterMetadata initializeClusterIdentifier(int clusterIdentifier) lockedRanges, inProgressSequences, consensusMigrationState, - extensions); + extensions, + accordStaleReplicas); } private static Map, ExtensionValue> capLastModified(Map, ExtensionValue> original, Epoch maxEpoch) @@ -410,6 +421,7 @@ public static class Transformer private ConsensusMigrationState consensusMigrationState; private final Map, ExtensionValue> extensions; private final Set modifiedKeys; + private AccordStaleReplicas accordStaleReplicas; private Transformer(ClusterMetadata metadata, Epoch epoch) { @@ -426,6 +438,7 @@ private Transformer(ClusterMetadata metadata, Epoch epoch) this.consensusMigrationState = metadata.consensusMigrationState; extensions = new HashMap<>(metadata.extensions); modifiedKeys = new HashSet<>(); + accordStaleReplicas = metadata.accordStaleReplicas; } public Epoch epoch() @@ -456,6 +469,11 @@ public Transformer unregister(NodeId nodeId) directory = directory.withoutRackAndDC(nodeId).without(nodeId); if (!tokenMap.tokens(nodeId).isEmpty()) tokenMap = tokenMap.unassignTokens(nodeId); + + Node.Id accordId = AccordTopology.tcmIdToAccord(nodeId); + if (accordStaleReplicas.contains(accordId)) + accordStaleReplicas = accordStaleReplicas.without(Collections.singleton(accordId)); + return this; } @@ -523,6 +541,11 @@ public Transformer replaced(NodeId replaced, NodeId replacement) directory = directory.without(replaced) .withRackAndDC(replacement) .withNodeState(replacement, NodeState.JOINED); + + Node.Id accordId = AccordTopology.tcmIdToAccord(replaced); + if (accordStaleReplicas.contains(accordId)) + accordStaleReplicas = accordStaleReplicas.without(Collections.singleton(accordId)); + return this; } @@ -551,6 +574,18 @@ public Transformer withFastPathStatusSince(Node.Id node, AccordFastPath.Status s accordFastPath = accordFastPath.withNodeStatusSince(node, status, updateTimeMillis, updateDelayMillis); return this; } + + public Transformer markStaleReplicas(Set ids) + { + accordStaleReplicas = accordStaleReplicas.withNodeIds(ids); + return this; + } + + public Transformer unmarkStaleReplicas(Set ids) + { + accordStaleReplicas = accordStaleReplicas.without(ids); + return this; + } public Transformer with(LockedRanges lockedRanges) { @@ -677,6 +712,12 @@ public Transformed build() modifiedKeys.add(MetadataKeys.ACCORD_FAST_PATH); accordFastPath = accordFastPath.withLastModified(epoch); } + + if (accordStaleReplicas != base.accordStaleReplicas) + { + modifiedKeys.add(MetadataKeys.ACCORD_STALE_REPLICAS); + accordStaleReplicas = accordStaleReplicas.withLastModified(epoch); + } if (lockedRanges != base.lockedRanges) { @@ -712,7 +753,8 @@ public Transformed build() lockedRanges, inProgressSequences, consensusMigrationState, - extensions), + extensions, + accordStaleReplicas), ImmutableSet.copyOf(modifiedKeys)); } @@ -729,7 +771,8 @@ public ClusterMetadata buildForGossipMode() lockedRanges, inProgressSequences, consensusMigrationState, - extensions); + extensions, + accordStaleReplicas); } @Override @@ -861,6 +904,7 @@ public boolean equals(Object o) lockedRanges.equals(that.lockedRanges) && inProgressSequences.equals(that.inProgressSequences) && consensusMigrationState.equals(that.consensusMigrationState) && + accordStaleReplicas.equals(that.accordStaleReplicas) && extensions.equals(that.extensions); } @@ -909,7 +953,7 @@ public void dumpDiff(ClusterMetadata other) @Override public int hashCode() { - return Objects.hash(epoch, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions); + return Objects.hash(epoch, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, accordStaleReplicas, extensions); } public static ClusterMetadata current() @@ -990,7 +1034,9 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers { AccordFastPath.serializer.serialize(metadata.accordFastPath, out, version); ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); + AccordStaleReplicas.serializer.serialize(metadata.accordStaleReplicas, out, version); } + LockedRanges.serializer.serialize(metadata.lockedRanges, out, version); InProgressSequences.serializer.serialize(metadata.inProgressSequences, out, version); out.writeInt(metadata.extensions.size()); @@ -1027,18 +1073,24 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE Directory dir = Directory.serializer.deserialize(in, version); TokenMap tokenMap = TokenMap.serializer.deserialize(in, version); DataPlacements placements = DataPlacements.serializer.deserialize(in, version); + AccordFastPath accordFastPath; ConsensusMigrationState consensusMigrationState; + AccordStaleReplicas staleReplicas; + if (version.isAtLeast(V2)) { accordFastPath = AccordFastPath.serializer.deserialize(in, version); consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); + staleReplicas = AccordStaleReplicas.serializer.deserialize(in, version); } else { accordFastPath = AccordFastPath.EMPTY; consensusMigrationState = ConsensusMigrationState.EMPTY; + staleReplicas = AccordStaleReplicas.EMPTY; } + LockedRanges lockedRanges = LockedRanges.serializer.deserialize(in, version); InProgressSequences ips = InProgressSequences.serializer.deserialize(in, version); int items = in.readInt(); @@ -1061,7 +1113,8 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE lockedRanges, ips, consensusMigrationState, - extensions); + extensions, + staleReplicas); } @Override @@ -1085,7 +1138,8 @@ public long serializedSize(ClusterMetadata metadata, Version version) if (version.isAtLeast(V2)) { size += AccordFastPath.serializer.serializedSize(metadata.accordFastPath, version) + - ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version); + ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version) + + AccordStaleReplicas.serializer.serializedSize(metadata.accordStaleReplicas, version); } size += LockedRanges.serializer.serializedSize(metadata.lockedRanges, version) + diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index 68306ce313dc..0aed60581e0a 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -40,6 +40,7 @@ public class MetadataKeys public static final MetadataKey TOKEN_MAP = make(CORE_NS, "ownership", "token_map"); public static final MetadataKey DATA_PLACEMENTS = make(CORE_NS, "ownership", "data_placements"); public static final MetadataKey ACCORD_FAST_PATH = make(CORE_NS, "ownership", "accord_fast_path"); + public static final MetadataKey ACCORD_STALE_REPLICAS = make(CORE_NS, "ownership", "accord_stale_replicas"); public static final MetadataKey LOCKED_RANGES = make(CORE_NS, "sequences", "locked_ranges"); public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); public static final MetadataKey CONSENSUS_MIGRATION_STATE = make(CORE_NS, "consensus", "migration_state"); @@ -49,6 +50,7 @@ public class MetadataKeys TOKEN_MAP, DATA_PLACEMENTS, ACCORD_FAST_PATH, + ACCORD_STALE_REPLICAS, LOCKED_RANGES, IN_PROGRESS_SEQUENCES, CONSENSUS_MIGRATION_STATE); diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 8a69f9acc464..16cf324799a4 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -29,6 +29,7 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.service.accord.AccordStaleReplicas; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.Commit.Replicator; import org.apache.cassandra.tcm.log.Entry; @@ -184,7 +185,8 @@ public StubClusterMetadataService build() LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } return new StubClusterMetadataService(new UniformRangePlacement(), snapshots != null ? snapshots : MetadataSnapshots.NO_OP, diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index 1ccdb683e9e2..b8ce1cbc9a5d 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -38,6 +38,8 @@ import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.transformations.AccordMarkRejoining; +import org.apache.cassandra.tcm.transformations.AccordMarkStale; import org.apache.cassandra.tcm.transformations.AlterSchema; import org.apache.cassandra.tcm.transformations.AlterTopology; import org.apache.cassandra.tcm.transformations.Assassinate; @@ -242,6 +244,8 @@ enum Kind BEGIN_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(37, () -> BeginConsensusMigrationForTableAndRange.serializer), MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), + ACCORD_MARK_STALE(39, () -> AccordMarkStale.serializer), + ACCORD_MARK_REJOINING(40, () -> AccordMarkRejoining.serializer), ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java index e82572f53b02..f569a9d1cbc5 100644 --- a/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java +++ b/src/java/org/apache/cassandra/tcm/compatibility/GossipHelper.java @@ -54,6 +54,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaKeyspace; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordStaleReplicas; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; @@ -300,7 +301,8 @@ public static ClusterMetadata emptyWithSchemaFromSystemTables(Set allKno LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, - Collections.emptyMap()); + Collections.emptyMap(), + AccordStaleReplicas.EMPTY); } public static ClusterMetadata fromEndpointStates(DistributedSchema schema, Map epStates) @@ -389,7 +391,8 @@ public static ClusterMetadata fromEndpointStates(Map epstates) diff --git a/src/java/org/apache/cassandra/tcm/serialization/Version.java b/src/java/org/apache/cassandra/tcm/serialization/Version.java index da99e726a0fb..245ef32bbc46 100644 --- a/src/java/org/apache/cassandra/tcm/serialization/Version.java +++ b/src/java/org/apache/cassandra/tcm/serialization/Version.java @@ -37,6 +37,7 @@ public enum Version * - Added version to PlacementForRange serializer * - Serialize MemtableParams when serializing TableParams * - Added AccordFastPath + * - Added AccordStaleReplicas */ V2(2), /** diff --git a/src/java/org/apache/cassandra/tcm/transformations/AccordMarkRejoining.java b/src/java/org/apache/cassandra/tcm/transformations/AccordMarkRejoining.java new file mode 100644 index 000000000000..402d05cf50a0 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/AccordMarkRejoining.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Node; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.AccordTopology; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.CollectionSerializers; + +import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; + +public class AccordMarkRejoining implements Transformation +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMarkRejoining.class); + + private final Set ids; + + public AccordMarkRejoining(Set ids) + { + this.ids = ids; + } + + @Override + public Kind kind() + { + return Kind.ACCORD_MARK_REJOINING; + } + + @Override + public Result execute(ClusterMetadata prev) + { + for (NodeId id : ids) + if (!prev.directory.peerIds().contains(id)) + return new Rejected(INVALID, String.format("Can not unmark node %s as it is not present in the directory.", id)); + + Set accordIds = ids.stream().map(AccordTopology::tcmIdToAccord).collect(Collectors.toSet()); + + for (Node.Id id : accordIds) + if (!prev.accordStaleReplicas.contains(id)) + return new Rejected(INVALID, String.format("Can not unmark node %s as it is not stale.", id)); + + logger.info("Unmarking " + ids + ". They will now participate in durability status coordination..."); + ClusterMetadata.Transformer next = prev.transformer().unmarkStaleReplicas(accordIds); + return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); + } + + @Override + public String toString() + { + return "AccordMarkRejoining{ids=" + ids + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordMarkRejoining that = (AccordMarkRejoining) o; + return Objects.equals(ids, that.ids); + } + + @Override + public int hashCode() + { + return Objects.hash(ids); + } + + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer<>() + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + assert t instanceof AccordMarkRejoining; + AccordMarkRejoining mark = (AccordMarkRejoining) t; + CollectionSerializers.serializeCollection(mark.ids, out, version, NodeId.serializer); + } + + @Override + public AccordMarkRejoining deserialize(DataInputPlus in, Version version) throws IOException + { + return new AccordMarkRejoining(CollectionSerializers.deserializeSet(in, version, NodeId.serializer)); + } + + @Override + public long serializedSize(Transformation t, Version version) + { + assert t instanceof AccordMarkRejoining; + AccordMarkRejoining mark = (AccordMarkRejoining) t; + return CollectionSerializers.serializedCollectionSize(mark.ids, version, NodeId.serializer); + } + }; +} diff --git a/src/java/org/apache/cassandra/tcm/transformations/AccordMarkStale.java b/src/java/org/apache/cassandra/tcm/transformations/AccordMarkStale.java new file mode 100644 index 000000000000..261e0a4eb256 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/AccordMarkStale.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.Node; +import accord.topology.Shard; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.accord.AccordTopology; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.CollectionSerializers; + +import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; + +public class AccordMarkStale implements Transformation +{ + private static final Logger logger = LoggerFactory.getLogger(AccordMarkStale.class); + + private final Set ids; + + public AccordMarkStale(Set ids) + { + this.ids = ids; + } + + @Override + public Kind kind() + { + return Kind.ACCORD_MARK_STALE; + } + + @Override + public Result execute(ClusterMetadata prev) + { + for (NodeId id : ids) + if (!prev.directory.peerIds().contains(id)) + return new Rejected(INVALID, String.format("Can not mark node %s stale as it is not present in the directory.", id)); + + Set accordIds = ids.stream().map(AccordTopology::tcmIdToAccord).collect(Collectors.toSet()); + + for (Node.Id id : accordIds) + if (prev.accordStaleReplicas.contains(id)) + return new Rejected(INVALID, String.format("Can not mark node %s stale as it already is.", id)); + + for (KeyspaceMetadata keyspace : prev.schema.getKeyspaces().without(SchemaConstants.REPLICATED_SYSTEM_KEYSPACE_NAMES)) + { + List shards = AccordTopology.KeyspaceShard.forKeyspace(keyspace, prev.placements, prev.directory); + + for (AccordTopology.KeyspaceShard shard : shards) + { + // We're trying to mark a node in this shard stale... + if (!Collections.disjoint(shard.nodes(), accordIds)) + { + int quorumSize = Shard.slowPathQuorumSize(shard.nodes().size()); + Set nonStaleNodes = new HashSet<>(shard.nodes()); + nonStaleNodes.removeAll(accordIds); + nonStaleNodes.removeAll(prev.accordStaleReplicas.ids()); + + // ...but reject the transformation if this would bring us below quorum. + if (nonStaleNodes.size() < quorumSize) + return new Rejected(INVALID, String.format("Can not mark nodes %s stale as that would leave fewer than a quorum of nodes active for range %s in keyspace '%s'.", + accordIds, shard.range(), keyspace.name)); + } + } + } + + logger.info("Marking " + ids + " stale. They will no longer participate in durability status coordination..."); + ClusterMetadata.Transformer next = prev.transformer().markStaleReplicas(accordIds); + return Transformation.success(next, LockedRanges.AffectedRanges.EMPTY); + } + + @Override + public String toString() + { + return "AccordMarkStale{ids=" + ids + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AccordMarkStale that = (AccordMarkStale) o; + return Objects.equals(ids, that.ids); + } + + @Override + public int hashCode() + { + return Objects.hash(ids); + } + + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer<>() + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + assert t instanceof AccordMarkStale; + AccordMarkStale mark = (AccordMarkStale) t; + CollectionSerializers.serializeCollection(mark.ids, out, version, NodeId.serializer); + } + + @Override + public AccordMarkStale deserialize(DataInputPlus in, Version version) throws IOException + { + return new AccordMarkStale(CollectionSerializers.deserializeSet(in, version, NodeId.serializer)); + } + + @Override + public long serializedSize(Transformation t, Version version) + { + assert t instanceof AccordMarkStale; + AccordMarkStale mark = (AccordMarkStale) t; + return CollectionSerializers.serializedCollectionSize(mark.ids, version, NodeId.serializer); + } + }; +} diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 9c8fd5039133..e733667b1e81 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -120,6 +120,8 @@ import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.StorageProxyMBean; import org.apache.cassandra.service.StorageServiceMBean; +import org.apache.cassandra.service.accord.AccordOperations; +import org.apache.cassandra.service.accord.AccordOperationsMBean; import org.apache.cassandra.streaming.StreamManagerMBean; import org.apache.cassandra.streaming.StreamState; import org.apache.cassandra.streaming.management.StreamStateCompositeData; @@ -154,6 +156,7 @@ public class NodeProbe implements AutoCloseable protected StorageServiceMBean ssProxy; protected SnapshotManagerMBean snapshotProxy; protected CMSOperationsMBean cmsProxy; + protected AccordOperationsMBean accordProxy; protected GossiperMBean gossProxy; protected MemoryMXBean memProxy; protected GCInspectorMXBean gcProxy; @@ -270,6 +273,8 @@ protected void connect() throws IOException snapshotProxy = JMX.newMBeanProxy(mbeanServerConn, name, SnapshotManagerMBean.class); name = new ObjectName(CMSOperations.MBEAN_OBJECT_NAME); cmsProxy = JMX.newMBeanProxy(mbeanServerConn, name, CMSOperationsMBean.class); + name = new ObjectName(AccordOperations.MBEAN_OBJECT_NAME); + accordProxy = JMX.newMBeanProxy(mbeanServerConn, name, AccordOperationsMBean.class); name = new ObjectName(MessagingService.MBEAN_NAME); msProxy = JMX.newMBeanProxy(mbeanServerConn, name, MessagingServiceMBean.class); name = new ObjectName(StreamManagerMBean.OBJECT_NAME); @@ -1296,6 +1301,11 @@ public CMSOperationsMBean getCMSOperationsProxy() return cmsProxy; } + public AccordOperationsMBean getAccordOperationsProxy() + { + return accordProxy; + } + public GossiperMBean getGossProxy() { return gossProxy; diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 00f2e66dfa45..d82173558695 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -278,6 +278,13 @@ public int execute(String... args) .withCommands(ConsensusMigrationAdmin.ListCmd.class) .withCommands(ConsensusMigrationAdmin.FinishMigration.class); + builder.withGroup("accord") + .withDescription("Manage the operation of Accord") + .withDefaultCommand(AccordAdmin.Describe.class) + .withCommand(AccordAdmin.Describe.class) + .withCommand(AccordAdmin.MarkStale.class) + .withCommand(AccordAdmin.MarkRejoining.class); + Cli parser = builder.build(); int status = 0; diff --git a/src/java/org/apache/cassandra/tools/nodetool/AccordAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/AccordAdmin.java new file mode 100644 index 000000000000..ff7e88ca9f61 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/AccordAdmin.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.util.List; +import java.util.Map; + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; + +public abstract class AccordAdmin extends NodeTool.NodeToolCmd +{ + @Command(name = "describe", description = "Describe current cluster metadata relating to Accord") + public static class Describe extends NodeTool.NodeToolCmd + { + @Override + protected void execute(NodeProbe probe) + { + Map info = probe.getAccordOperationsProxy().describe(); + output.out.printf("Accord Service:%n"); + output.out.printf("Epoch: %s%n", info.get("EPOCH")); + output.out.printf("Stale Replicas: %s%n", info.get("STALE_REPLICAS")); + } + } + + @Command(name = "mark_stale", description = "Mark a replica as being stale and no longer able to participate in durability status coordination") + public static class MarkStale extends AccordAdmin + { + @Arguments(required = true, description = "One or more node IDs to mark stale", usage = "+") + public List nodeIds; + + @Override + protected void execute(NodeProbe probe) + { + probe.getAccordOperationsProxy().accordMarkStale(nodeIds); + } + } + + @Command(name = "mark_rejoining", description = "Mark a stale replica as being allowed to participate in durability status coordination again") + public static class MarkRejoining extends AccordAdmin + { + @Arguments(required = true, description = "One or more node IDs to mark no longer stale", usage = "+") + public List nodeIds; + + @Override + protected void execute(NodeProbe probe) + { + probe.getAccordOperationsProxy().accordMarkRejoining(nodeIds); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java b/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java index bf6a0879cedd..1ab4f843b7fb 100644 --- a/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java +++ b/test/distributed/org/apache/cassandra/distributed/mock/nodetool/InternalNodeProbe.java @@ -46,6 +46,7 @@ import org.apache.cassandra.service.GCInspector; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordOperations; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.streaming.StreamManager; import org.apache.cassandra.tcm.CMSOperations; @@ -74,6 +75,7 @@ protected void connect() ssProxy = StorageService.instance; snapshotProxy = SnapshotManager.instance; cmsProxy = CMSOperations.instance; + accordProxy = AccordOperations.instance; msProxy = MessagingService.instance(); streamProxy = StreamManager.instance; compactionProxy = CompactionManager.instance; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordNodetoolTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordNodetoolTest.java new file mode 100644 index 000000000000..9af94b783dd1 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordNodetoolTest.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import accord.local.Node; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.test.TestBaseImpl; + + +import static org.junit.Assert.assertEquals; +import static org.apache.cassandra.distributed.shared.ClusterUtils.getNodeId; + +public class AccordNodetoolTest extends TestBaseImpl +{ + @Test + public void testMarkSingleNode() throws Throwable + { + try (Cluster cluster = init(builder().withNodes(3).withConfig((config) -> config.with(Feature.NETWORK, Feature.GOSSIP)).start())) + { + cluster.get(1).nodetoolResult("accord", "mark_stale", "1").asserts().success(); + cluster.get(1).runOnInstance(() -> assertEquals(ImmutableSet.of(new Node.Id(1)), ClusterMetadata.current().accordStaleReplicas.ids())); + cluster.get(1).nodetoolResult("accord", "describe").asserts().stdoutContains("Stale Replicas: 1"); + + // Reject the operation if the target node is already stale: + cluster.get(1).nodetoolResult("accord", "mark_stale", "1").asserts().failure().errorContains("it already is"); + + // Reject the operation if marking the node stale brings us below a quorum of non-stale nodes: + cluster.get(1).nodetoolResult("accord", "mark_stale", "2").asserts().failure().errorContains("that would leave fewer than a quorum"); + + // Reject the operation if the target node doesn't exist: + cluster.get(1).nodetoolResult("accord", "mark_stale", "4").asserts().failure().errorContains("not present in the directory"); + + cluster.get(1).nodetoolResult("accord", "mark_rejoining", "1").asserts().success(); + cluster.get(1).runOnInstance(() -> assertEquals(Collections.emptySet(), ClusterMetadata.current().accordStaleReplicas.ids())); + + cluster.get(1).nodetoolResult("accord", "mark_rejoining", "1").asserts().failure().errorContains("it is not stale"); + cluster.get(1).nodetoolResult("accord", "mark_rejoining", "4").asserts().failure().errorContains("not present in the directory"); + } + } + + @Test + public void testMarkMultipleNodes() throws Throwable + { + try (Cluster cluster = init(builder().withNodes(5).withConfig((config) -> config.with(Feature.NETWORK, Feature.GOSSIP)).start())) + { + // Reject the operation if marking the node stale brings us below a quorum of non-stale nodes: + cluster.get(1).nodetoolResult("accord", "mark_stale", "1", "2", "3").asserts().failure().errorContains("that would leave fewer than a quorum"); + + cluster.get(1).nodetoolResult("accord", "mark_stale", "1", "2").asserts().success(); + cluster.get(1).runOnInstance(() -> assertEquals(ImmutableSet.of(new Node.Id(1), new Node.Id(2)), ClusterMetadata.current().accordStaleReplicas.ids())); + cluster.get(1).nodetoolResult("accord", "describe").asserts().stdoutContains("Stale Replicas: 1,2"); + + // Reject the operation if a target node is already stale: + cluster.get(1).nodetoolResult("accord", "mark_stale", "1", "2").asserts().failure().errorContains("it already is"); + + // Reject the operation if a target node doesn't exist: + cluster.get(1).nodetoolResult("accord", "mark_stale", "4", "6").asserts().failure().errorContains("not present in the directory"); + + Map nodeIdToNode = new HashMap<>(); + for (int i = 1; i <= 5; i++) + nodeIdToNode.put(getNodeId(cluster.get(i)).id(), i); + + // Remove the second stale node, and ensure the set of stale replicas is updated: + cluster.get(nodeIdToNode.get(2)).shutdown().get(); + cluster.get(1).nodetoolResult("removenode", "2", "--force").asserts().success(); + cluster.get(1).nodetoolResult("cms", "unregister", "2").asserts().success(); + cluster.get(1).runOnInstance(() -> assertEquals(ImmutableSet.of(new Node.Id(1)), ClusterMetadata.current().accordStaleReplicas.ids())); + + cluster.get(1).nodetoolResult("accord", "mark_rejoining", "1", "3").asserts().failure().errorContains("it is not stale"); + cluster.get(1).nodetoolResult("accord", "mark_rejoining", "1", "6").asserts().failure().errorContains("not present in the directory"); + cluster.get(1).runOnInstance(() -> assertEquals(ImmutableSet.of(new Node.Id(1)), ClusterMetadata.current().accordStaleReplicas.ids())); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 1481e2a3be1b..69d4001bd135 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -58,6 +58,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaTransformation; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.AccordStaleReplicas; import org.apache.cassandra.tcm.AtomicLongBackedProcessor; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -158,7 +159,8 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit LockedRanges.EMPTY, InProgressSequences.EMPTY, null, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } public static ClusterMetadata minimalForTesting(IPartitioner partitioner) @@ -173,7 +175,8 @@ public static ClusterMetadata minimalForTesting(IPartitioner partitioner) null, null, null, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) @@ -188,7 +191,8 @@ public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) null, null, null, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } public static ClusterMetadataService syncInstanceForTest() diff --git a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java index 2b22b0be3b64..35ab27d426b9 100644 --- a/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java +++ b/test/unit/org/apache/cassandra/locator/MetaStrategyTest.java @@ -26,14 +26,15 @@ import java.util.Set; import com.google.common.collect.ImmutableMap; -import org.apache.cassandra.service.accord.AccordFastPath; -import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.junit.Assert; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.service.accord.AccordStaleReplicas; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.Directory; @@ -93,7 +94,8 @@ public static ClusterMetadata metadata(NodeConfiguration... configurations) LockedRanges.EMPTY, InProgressSequences.EMPTY, ConsensusMigrationState.EMPTY, - ImmutableMap.of()); + ImmutableMap.of(), + AccordStaleReplicas.EMPTY); } @Test @@ -159,4 +161,4 @@ public static Location location(String dc, String rack) { return new Location(dc, rack); } -} \ No newline at end of file +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java index 143f63a26c42..4bec25b4d8fa 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordFastPathCoordinatorTest.java @@ -18,6 +18,15 @@ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + import accord.local.Node; import accord.topology.Shard; import accord.topology.Topology; @@ -28,14 +37,6 @@ import org.apache.cassandra.schema.*; import org.apache.cassandra.service.accord.AccordFastPath.Status; import org.apache.cassandra.tcm.ClusterMetadata; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.concurrent.TimeUnit; import static org.apache.cassandra.service.accord.AccordTestUtils.*; diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java new file mode 100644 index 000000000000..83eced9b6628 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; + +import accord.local.Node; +import accord.utils.AccordGens; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.tcm.serialization.Version; + +import static accord.utils.Property.qt; + +public class AccordStaleReplicasTest +{ + @Test + public void serde() + { + try (DataOutputBuffer buffer = new DataOutputBuffer()) + { + Gen> nodesGen = Gens.lists(AccordGens.nodes()).unique().ofSizeBetween(0, 9).map(nodes -> new HashSet<>(nodes)); + Gen epochGen = AccordGens.epochs().map(Epoch::create); + + qt().check(rs -> { + Epoch epoch = epochGen.next(rs); + Set nodes = nodesGen.next(rs); + AsymmetricMetadataSerializers.testSerde(buffer, AccordStaleReplicas.serializer, new AccordStaleReplicas(nodes, epoch), Version.V2); + }); + } + } +} diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index 68f02de123c8..1219502e9867 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -301,6 +301,8 @@ else if (key == ACCORD_FAST_PATH) return metadata.accordFastPath; else if (key == CONSENSUS_MIGRATION_STATE) return metadata.consensusMigrationState; + else if (key == ACCORD_STALE_REPLICAS) + return metadata.accordStaleReplicas; throw new IllegalArgumentException("Unknown metadata key " + key); } diff --git a/test/unit/org/apache/cassandra/tcm/serialization/AsymmetricMetadataSerializers.java b/test/unit/org/apache/cassandra/tcm/serialization/AsymmetricMetadataSerializers.java new file mode 100644 index 000000000000..bd3cd4547e0e --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/serialization/AsymmetricMetadataSerializers.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.serialization; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; + +import accord.utils.LazyToString; +import accord.utils.ReflectionUtils; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; + +public class AsymmetricMetadataSerializers +{ + public static void testSerde(DataOutputBuffer output, AsymmetricMetadataSerializer serializer, In input, Version version) throws IOException + { + output.clear(); + long expectedSize = serializer.serializedSize(input, version); + serializer.serialize(input, output, version); + Assertions.assertThat(output.getLength()).describedAs("The serialized size and bytes written do not match").isEqualTo(expectedSize); + DataInputBuffer in = new DataInputBuffer(output.unsafeGetBufferAndFlip(), false); + Out read = serializer.deserialize(in, version); + Assertions.assertThat(read).describedAs("The deserialized output does not match the serialized input; difference %s", new LazyToString(() -> ReflectionUtils.recursiveEquals(read, input).toString())).isEqualTo(input); + } +} diff --git a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java new file mode 100644 index 000000000000..032c8dd1ec7a --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Collections; + +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.tcm.serialization.Version; + +public class AccordMarkRejoiningTest +{ + @Test + public void shouldSerializeEmpty() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, new AccordMarkRejoining(Collections.emptySet()), Version.V2); + } + + @Test + public void shouldSerializeSingleton() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AccordMarkRejoining markStale = new AccordMarkRejoining(Collections.singleton(NodeId.fromString("1"))); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.V2); + } + + @Test + public void shouldSerializeMulti() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AccordMarkRejoining markStale = new AccordMarkRejoining(ImmutableSet.of(NodeId.fromString("1"), NodeId.fromString("2"))); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.V2); + } +} diff --git a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java new file mode 100644 index 000000000000..d794b3a2a9b5 --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Collections; + +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.tcm.serialization.Version; + +public class AccordMarkStaleTest +{ + @Test + public void shouldSerializeEmpty() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, new AccordMarkStale(Collections.emptySet()), Version.V2); + } + + @Test + public void shouldSerializeSingleton() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AccordMarkStale markStale = new AccordMarkStale(Collections.singleton(NodeId.fromString("1"))); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.V2); + } + + @Test + public void shouldSerializeMulti() throws IOException + { + DataOutputBuffer buffer = new DataOutputBuffer(); + AccordMarkStale markStale = new AccordMarkStale(ImmutableSet.of(NodeId.fromString("1"), NodeId.fromString("2"))); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.V2); + } +} From a21c0a75aa35d9c0fe945e0d232590a5329db9fd Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 16 Aug 2024 09:53:00 -0700 Subject: [PATCH 136/340] Add a table to inspect the current state of a txn patch by David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-19838 --- modules/accord | 2 +- .../db/virtual/AccordVirtualTables.java | 118 ++++++++- .../cassandra/db/virtual/VirtualKeyspace.java | 13 +- .../cassandra/db/virtual/VirtualTable.java | 8 + .../service/accord/AccordService.java | 206 +++++++++++++--- .../accord/CommandStoreTxnBlockedGraph.java | 127 ++++++++++ .../service/accord/IAccordService.java | 3 + .../exceptions/ReadExhaustedException.java | 13 +- .../org/apache/cassandra/cql3/CQLTester.java | 196 ++++++++++++++- .../db/virtual/AccordVirtualTablesTest.java | 227 ++++++++++++++++++ .../service/accord/AccordServiceTest.java | 19 +- .../service/accord/AccordTestUtils.java | 5 + 12 files changed, 878 insertions(+), 59 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java create mode 100644 test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java diff --git a/modules/accord b/modules/accord index 81c02769f9ad..e2ccee4f51fe 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 81c02769f9ad73ef3aba0675c2217fc74b8a4a4c +Subproject commit e2ccee4f51fe4c7c7f3ea8911897135ed7e37114 diff --git a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java index 1b2e041c16e9..b75d081d1d47 100644 --- a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java +++ b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java @@ -18,29 +18,39 @@ package org.apache.cassandra.db.virtual; +import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.local.CommandStores; +import accord.local.Status; import accord.primitives.TxnId; +import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.FieldIdentifier; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.InvalidRequestException; @@ -51,12 +61,15 @@ import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.CommandStoreTxnBlockedGraph; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.Clock; import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.apache.cassandra.utils.ByteBufferUtil.bytes; public class AccordVirtualTables { @@ -70,7 +83,8 @@ public static Collection getAll(String keyspace) return List.of( new CommandStoreCache(keyspace), new MigrationState(keyspace), - new CoordinationStatus(keyspace) + new CoordinationStatus(keyspace), + new TxnBlockedByTable(keyspace) ); } @@ -246,6 +260,108 @@ public DataSet data() } } + public static class TxnBlockedByTable extends AbstractVirtualTable + { + enum Reason { Self, Txn, Key } + private final UserType partitionKeyType; + + protected TxnBlockedByTable(String keyspace) + { + super(TableMetadata.builder(keyspace, "txn_blocked_by") + .kind(TableMetadata.Kind.VIRTUAL) + .addPartitionKeyColumn("txn_id", UTF8Type.instance) + .addClusteringColumn("store_id", Int32Type.instance) + .addClusteringColumn("depth", Int32Type.instance) + .addClusteringColumn("blocked_by", UTF8Type.instance) + .addClusteringColumn("reason", UTF8Type.instance) + .addRegularColumn("save_status", UTF8Type.instance) + .addRegularColumn("execute_at", UTF8Type.instance) + .addRegularColumn("key", pkType(keyspace)) + .build()); + partitionKeyType = pkType(keyspace); + } + + private static UserType pkType(String keyspace) + { + return new UserType(keyspace, bytes("partition_key"), + Arrays.asList(FieldIdentifier.forQuoted("table"), FieldIdentifier.forQuoted("token")), + Arrays.asList(UTF8Type.instance, UTF8Type.instance), false); + } + + private ByteBuffer pk(PartitionKey pk) + { + TableMetadata tm = Schema.instance.getTableMetadata(pk.table()); + return partitionKeyType.pack(UTF8Type.instance.decompose(tm.toString()), + UTF8Type.instance.decompose(pk.token().toString())); + } + + @Override + public Iterable userTypes() + { + return Arrays.asList(partitionKeyType); + } + + @Override + public DataSet data(DecoratedKey partitionKey) + { + TxnId id = TxnId.parse(UTF8Type.instance.compose(partitionKey.getKey())); + List shards = AccordService.instance().debugTxnBlockedGraph(id); + + SimpleDataSet ds = new SimpleDataSet(metadata()); + for (CommandStoreTxnBlockedGraph shard : shards) + { + Set processed = new HashSet<>(); + process(ds, shard, processed, id, 0, id, Reason.Self, null); + // everything was processed right? + if (!shard.txns.isEmpty() && !shard.txns.keySet().containsAll(processed)) + throw new IllegalStateException("Skipped txns: " + Sets.difference(shard.txns.keySet(), processed)); + } + + return ds; + } + + private void process(SimpleDataSet ds, CommandStoreTxnBlockedGraph shard, Set processed, TxnId userTxn, int depth, TxnId txnId, Reason reason, Runnable onDone) + { + if (!processed.add(txnId)) + throw new IllegalStateException("Double processed " + txnId); + CommandStoreTxnBlockedGraph.TxnState txn = shard.txns.get(txnId); + if (txn == null) + { + Invariants.checkState(reason == Reason.Self, "Txn %s unknown for reason %s", txnId, reason); + return; + } + // was it applied? If so ignore it + if (reason != Reason.Self && txn.saveStatus.hasBeen(Status.Applied)) + return; + ds.row(userTxn.toString(), shard.storeId, depth, reason == Reason.Self ? "" : txn.txnId.toString(), reason.name()); + ds.column("save_status", txn.saveStatus.name()); + if (txn.executeAt != null) + ds.column("execute_at", txn.executeAt.toString()); + if (onDone != null) + onDone.run(); + if (txn.isBlocked()) + { + for (TxnId blockedBy : txn.blockedBy) + { + if (processed.contains(blockedBy)) continue; // already listed + process(ds, shard, processed, userTxn, depth + 1, blockedBy, Reason.Txn, null); + } + for (PartitionKey blockedBy : txn.blockedByKey) + { + TxnId blocking = shard.keys.get(blockedBy); + if (processed.contains(blocking)) continue; // already listed + process(ds, shard, processed, userTxn, depth + 1, blocking, Reason.Key, () -> ds.column("key", pk(blockedBy))); + } + } + } + + @Override + public DataSet data() + { + throw new InvalidRequestException("Must select a single txn_id"); + } + } + private static TableMetadata parse(String keyspace, String comment, String query) { return CreateTableStatement.parse(query, keyspace) diff --git a/src/java/org/apache/cassandra/db/virtual/VirtualKeyspace.java b/src/java/org/apache/cassandra/db/virtual/VirtualKeyspace.java index 044c11476bb1..3a5440efd17f 100644 --- a/src/java/org/apache/cassandra/db/virtual/VirtualKeyspace.java +++ b/src/java/org/apache/cassandra/db/virtual/VirtualKeyspace.java @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Tables; @@ -50,7 +51,17 @@ public VirtualKeyspace(String name, Collection tables) if (!duplicates.isEmpty()) throw new IllegalArgumentException(String.format("Duplicate table names in virtual keyspace %s: %s", name, duplicates)); - metadata = KeyspaceMetadata.virtual(name, Tables.of(Iterables.transform(tables, VirtualTable::metadata))); + KeyspaceMetadata metadata = KeyspaceMetadata.virtual(name, Tables.of(Iterables.transform(tables, VirtualTable::metadata))); + for (VirtualTable t : tables) + { + for (UserType udt : t.userTypes()) + { + if (metadata.types.getNullable(udt.name) != null) + throw new IllegalStateException("UDT " + udt.getNameAsString() + " already exists"); + metadata = metadata.withUpdatedUserType(udt); + } + } + this.metadata = metadata; } public String name() diff --git a/src/java/org/apache/cassandra/db/virtual/VirtualTable.java b/src/java/org/apache/cassandra/db/virtual/VirtualTable.java index 770cb139830f..6d6829a56f29 100644 --- a/src/java/org/apache/cassandra/db/virtual/VirtualTable.java +++ b/src/java/org/apache/cassandra/db/virtual/VirtualTable.java @@ -17,11 +17,14 @@ */ package org.apache.cassandra.db.virtual; +import java.util.Collections; + import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.filter.ClusteringIndexFilter; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.schema.TableMetadata; @@ -90,4 +93,9 @@ default boolean allowFilteringImplicitly() { return true; } + + default Iterable userTypes() + { + return Collections.emptyList(); + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index b3e03cb8c854..6db8610d4735 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -18,7 +18,9 @@ package org.apache.cassandra.service.accord; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -27,6 +29,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.BiFunction; +import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -37,7 +40,6 @@ import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Throwables; -import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,13 +59,20 @@ import accord.impl.CoordinateDurabilityScheduling; import accord.impl.SimpleProgressLog; import accord.impl.SizeOfIntersectionSorter; +import accord.local.Command; +import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.DurableBefore; +import accord.local.KeyHistory; import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeTimeService; +import accord.local.PreLoadContext; import accord.local.RedundantBefore; +import accord.local.SaveStatus; import accord.local.ShardDistributor.EvenSplit; +import accord.local.Status; +import accord.local.cfk.CommandsForKey; import accord.messages.LocalRequest; import accord.messages.Request; import accord.primitives.Keys; @@ -148,10 +157,8 @@ public class AccordService implements IAccordService, Shutdownable { private static final Logger logger = LoggerFactory.getLogger(AccordService.class); - private enum State { INIT, STARTED, SHUTDOWN} + private enum State {INIT, STARTED, SHUTDOWN} - public static final AccordClientRequestMetrics readMetrics = new AccordClientRequestMetrics("AccordRead"); - public static final AccordClientRequestMetrics writeMetrics = new AccordClientRequestMetrics("AccordWrite"); private static final Future BOOTSTRAP_SUCCESS = ImmediateFuture.success(null); private final Node node; @@ -263,6 +270,12 @@ public CompactionInfo getCompactionInfo() { return new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); } + + @Override + public List debugTxnBlockedGraph(TxnId txnId) + { + return Collections.emptyList(); + } }; private static volatile IAccordService instance = null; @@ -380,7 +393,7 @@ public synchronized void startup() return; journal.start(node); configService.start(); - ClusterMetadataService.instance().log().addListener(configService); + fastPathCoordinator.start(); ClusterMetadataService.instance().log().addListener(fastPathCoordinator); durabilityScheduling.setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); @@ -400,7 +413,7 @@ public IVerbHandler verbHandler() private > Seekables barrier(@Nonnull S keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction>> syncPoint) { Stopwatch sw = Stopwatch.createStarted(); - keysOrRanges = (S)intersectionWithAccordManagedRanges(keysOrRanges); + keysOrRanges = (S) intersectionWithAccordManagedRanges(keysOrRanges); // It's possible none of them were Accord managed and we aren't going to treat that as an error if (keysOrRanges.isEmpty()) { @@ -409,11 +422,9 @@ public IVerbHandler verbHandler() } AccordClientRequestMetrics metrics = isForWrite ? accordWriteMetrics : accordReadMetrics; - TxnId txnId = null; try { logger.debug("Starting barrier key: {} epoch: {} barrierType: {} isForWrite {}", keysOrRanges, epoch, barrierType, isForWrite); - txnId = node.nextTxnId(Kind.SyncPoint, keysOrRanges.domain()); AsyncResult asyncResult = syncPoint == null ? Barrier.barrier(node, keysOrRanges, epoch, barrierType) : Barrier.barrier(node, keysOrRanges, epoch, barrierType, syncPoint); @@ -430,21 +441,24 @@ public IVerbHandler verbHandler() Throwable cause = Throwables.getRootCause(e); if (cause instanceof Timeout) { + TxnId txnId = ((Timeout) cause).txnId(); metrics.timeouts.mark(); - throw newBarrierTimeout(txnId, barrierType.global); + throw newBarrierTimeout(txnId, barrierType, isForWrite, keysOrRanges); } if (cause instanceof Preempted) { + TxnId txnId = ((Preempted) cause).txnId(); //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match - throw newBarrierPreempted(txnId, barrierType.global); + throw newBarrierPreempted(txnId, barrierType, isForWrite, keysOrRanges); } if (cause instanceof Exhausted) { + TxnId txnId = ((Exhausted) cause).txnId(); // this case happens when a non-timeout exception is seen, and we are unable to move forward metrics.failures.mark(); - throw newBarrierExhausted(txnId, barrierType.global); + throw newBarrierExhausted(txnId, barrierType, isForWrite, keysOrRanges); } // unknown error metrics.failures.mark(); @@ -458,7 +472,7 @@ public IVerbHandler verbHandler() catch (TimeoutException e) { metrics.timeouts.mark(); - throw newBarrierTimeout(txnId, barrierType.global); + throw newBarrierTimeout(null, barrierType, isForWrite, keysOrRanges); } finally { @@ -486,16 +500,16 @@ public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher. return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, repairSyncPoint(allNodes)); } - private static > Seekables intersectionWithAccordManagedRanges(Seekables keysOrRanges) + private static > Seekables intersectionWithAccordManagedRanges(Seekables keysOrRanges) { TableId tableId = null; for (Seekable seekable : keysOrRanges) { TableId newTableId; if (keysOrRanges.domain() == Key) - newTableId = ((PartitionKey)seekable).table(); + newTableId = ((PartitionKey) seekable).table(); else if (keysOrRanges.domain() == Range) - newTableId = ((TokenRange)seekable).table(); + newTableId = ((TokenRange) seekable).table(); else throw new IllegalStateException("Unexpected domain " + keysOrRanges.domain()); @@ -534,22 +548,21 @@ else if (!tableId.equals(newTableId)) } @VisibleForTesting - static ReadTimeoutException newBarrierTimeout(TxnId txnId, boolean global) + static ReadTimeoutException newBarrierTimeout(TxnId txnId, BarrierType barrierType, boolean isForWrite, Seekables keysOrRanges) { - return new ReadTimeoutException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); + return new ReadTimeoutException(barrierType.global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, String.format("Timeout waiting on barrier %s / %s / %s; impacted ranges %s", txnId, barrierType, isForWrite ? "write" : "not write", keysOrRanges)); } @VisibleForTesting - static ReadTimeoutException newBarrierPreempted(TxnId txnId, boolean global) + static ReadTimeoutException newBarrierPreempted(TxnId txnId, BarrierType barrierType, boolean isForWrite, Seekables keysOrRanges) { - return new ReadPreemptedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, txnId.toString()); + return new ReadPreemptedException(barrierType.global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, String.format("Preempted waiting on barrier %s / %s / %s; impacted ranges %s", txnId, barrierType, isForWrite ? "write" : "not write", keysOrRanges)); } @VisibleForTesting - static ReadExhaustedException newBarrierExhausted(TxnId txnId, boolean global) + static ReadExhaustedException newBarrierExhausted(TxnId txnId, BarrierType barrierType, boolean isForWrite, Seekables keysOrRanges) { - //TODO (usability): not being able to show the txn is a bad UX, this becomes harder to trace back in logs - return new ReadExhaustedException(global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, ImmutableMap.of()); + return new ReadExhaustedException(barrierType.global ? ConsistencyLevel.ANY : ConsistencyLevel.QUORUM, 0, 0, false, String.format("Exhausted (too many failures from peers) waiting on barrier %s / %s / %s; impacted ranges %s", txnId, barrierType, isForWrite ? "write" : "not write", keysOrRanges)); } @VisibleForTesting @@ -668,12 +681,12 @@ public TopologyManager topology() Throwable cause = failure != null ? Throwables.getRootCause(failure) : null; if (success != null) { - if (((TxnResult)success).kind() == TxnResult.Kind.retry_new_protocol) + if (((TxnResult) success).kind() == TxnResult.Kind.retry_new_protocol) { metrics.retryDifferentSystem.mark(); Tracing.trace("Got retry different system error from Accord, will retry"); } - asyncTxnResult.trySuccess((TxnResult)success); + asyncTxnResult.trySuccess((TxnResult) success); return; } @@ -727,7 +740,7 @@ public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @N throw (RequestTimeoutException) cause; } else if (cause instanceof RuntimeException) - throw (RuntimeException)cause; + throw (RuntimeException) cause; else throw new RuntimeException(cause); } @@ -756,7 +769,7 @@ private static RequestTimeoutException newTimeout(TxnId txnId, boolean isWrite, if (consistencyLevel == null) consistencyLevel = ConsistencyLevel.ANY; return isWrite ? new WriteTimeoutException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) - : new ReadTimeoutException(consistencyLevel, 0, 0, false, txnId.toString()); + : new ReadTimeoutException(consistencyLevel, 0, 0, false, txnId.toString()); } private static RuntimeException newPreempted(TxnId txnId, boolean isWrite, ConsistencyLevel consistencyLevel) @@ -764,7 +777,7 @@ private static RuntimeException newPreempted(TxnId txnId, boolean isWrite, Consi if (consistencyLevel == null) consistencyLevel = ConsistencyLevel.ANY; return isWrite ? new WritePreemptedException(WriteType.CAS, consistencyLevel, 0, 0, txnId.toString()) - : new ReadPreemptedException(consistencyLevel, 0, 0, false, txnId.toString()); + : new ReadPreemptedException(consistencyLevel, 0, 0, false, txnId.toString()); } @Override @@ -835,6 +848,143 @@ public Id nodeId() return node.id(); } + @Override + public List debugTxnBlockedGraph(TxnId txnId) + { + AsyncChain> states = loadDebug(txnId); + try + { + return AsyncChains.getBlocking(states); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e.getCause()); + } + } + + public AsyncChain> loadDebug(TxnId original) + { + CommandStores commandStores = node.commandStores(); + if (commandStores.count() == 0) + return AsyncChains.success(Collections.emptyList()); + int[] ids = commandStores.ids(); + List> chains = new ArrayList<>(ids.length); + for (int id : ids) + chains.add(loadDebug(original, commandStores.forId(id))); + return AsyncChains.all(chains); + } + + private AsyncChain loadDebug(TxnId txnId, CommandStore store) + { + CommandStoreTxnBlockedGraph.Builder state = new CommandStoreTxnBlockedGraph.Builder(store.id()); + return populate(state, store, txnId).map(ignore -> state.build()); + } + + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, CommandStore store, TxnId txnId) + { + AsyncChain> submit = store.submit(PreLoadContext.contextFor(txnId), in -> { + AsyncChain chain = populate(state, (AccordSafeCommandStore) in, txnId); + return chain == null ? AsyncChains.success(null) : chain; + }); + return submit.flatMap(Function.identity()); + } + + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, CommandStore commandStore, PartitionKey blockedBy, TxnId txnId, Timestamp executeAt) + { + AsyncChain> submit = commandStore.submit(PreLoadContext.contextFor(txnId, Keys.of(blockedBy), KeyHistory.COMMANDS), in -> { + AsyncChain chain = populate(state, (AccordSafeCommandStore) in, blockedBy, txnId, executeAt); + return chain == null ? AsyncChains.success(null) : chain; + }); + return submit.flatMap(Function.identity()); + } + + @Nullable + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, AccordSafeCommandStore safeStore, TxnId txnId) + { + AccordSafeCommand safeCommand = safeStore.getIfLoaded(txnId); + Invariants.nonNull(safeCommand, "Txn %s is not in the cache", txnId); + if (safeCommand.current() == null || safeCommand.current().saveStatus() == SaveStatus.Uninitialised) + return null; + CommandStoreTxnBlockedGraph.TxnState cmdTxnState = populate(state, safeCommand.current()); + if (cmdTxnState.notBlocked()) + return null; + //TODO (safety): check depth + List> chains = new ArrayList<>(); + for (TxnId blockedBy : cmdTxnState.blockedBy) + { + if (state.knows(blockedBy)) continue; + // need to fetch the state + if (safeStore.getIfLoaded(blockedBy) != null) + { + AsyncChain chain = populate(state, safeStore, blockedBy); + if (chain != null) + chains.add(chain); + } + else + { + // go fetch it + chains.add(populate(state, safeStore.commandStore(), blockedBy)); + } + } + for (PartitionKey blockedBy : cmdTxnState.blockedByKey) + { + if (state.keys.containsKey(blockedBy)) continue; + if (safeStore.getCommandsForKeyIfLoaded(blockedBy) != null) + { + AsyncChain chain = populate(state, safeStore, blockedBy, txnId, safeCommand.current().executeAt()); + if (chain != null) + chains.add(chain); + } + else + { + // go fetch it + chains.add(populate(state, safeStore.commandStore(), blockedBy, txnId, safeCommand.current().executeAt())); + } + } + if (chains.isEmpty()) + return null; + return AsyncChains.all(chains).map(ignore -> null); + } + + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, AccordSafeCommandStore safeStore, PartitionKey pk, TxnId txnId, Timestamp executeAt) + { + AccordSafeCommandsForKey commandsForKey = safeStore.getCommandsForKeyIfLoaded(pk); + TxnId blocking = commandsForKey.current().blockedOnTxnId(txnId, executeAt); + if (blocking instanceof CommandsForKey.TxnInfo) + blocking = ((CommandsForKey.TxnInfo) blocking).plainTxnId(); + state.keys.put(pk, blocking); + if (state.txns.containsKey(blocking)) return null; + if (safeStore.getIfLoaded(blocking) != null) return populate(state, safeStore, blocking); + return populate(state, safeStore.commandStore(), blocking); + } + + private static CommandStoreTxnBlockedGraph.TxnState populate(CommandStoreTxnBlockedGraph.Builder state, Command cmd) + { + CommandStoreTxnBlockedGraph.Builder.TxnBuilder cmdTxnState = state.txn(cmd.txnId(), cmd.executeAt(), cmd.saveStatus()); + if (!cmd.hasBeen(Status.Applied) && cmd.isCommitted()) + { + // check blocking state + Command.WaitingOn waitingOn = cmd.asCommitted().waitingOn(); + waitingOn.waitingOn.reverseForEach(null, null, null, null, (i1, i2, i3, i4, i) -> { + if (i < waitingOn.txnIdCount()) + { + // blocked on txn + cmdTxnState.blockedBy.add(waitingOn.txnId(i)); + } + else + { + // blocked on key + cmdTxnState.blockedByKey.add((PartitionKey) waitingOn.keys.get(i - waitingOn.txnIdCount())); + } + }); + } + return cmdTxnState.build(); + } + public Node node() { return node; @@ -923,7 +1073,7 @@ public AccordConfigurationService configurationService() public CompactionInfo getCompactionInfo() { Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); - Int2ObjectHashMapranges = new Int2ObjectHashMap<>(); + Int2ObjectHashMap ranges = new Int2ObjectHashMap<>(); AtomicReference durableBefore = new AtomicReference<>(DurableBefore.EMPTY); AsyncChains.getBlockingAndRethrow(node.commandStores().forEach(safeStore -> { synchronized (redundantBefores) diff --git a/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java b/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java new file mode 100644 index 000000000000..c7d0147add86 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import accord.local.SaveStatus; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.service.accord.api.PartitionKey; + +public class CommandStoreTxnBlockedGraph +{ + public final int storeId; + public final Map txns; + public final Map keys; + + public CommandStoreTxnBlockedGraph(Builder builder) + { + storeId = builder.storeId; + txns = ImmutableMap.copyOf(builder.txns); + keys = ImmutableMap.copyOf(builder.keys); + } + + public static class TxnState + { + public final TxnId txnId; + public final Timestamp executeAt; + public final SaveStatus saveStatus; + public final List blockedBy; + public final Set blockedByKey; + + public TxnState(Builder.TxnBuilder builder) + { + txnId = builder.txnId; + executeAt = builder.executeAt; + saveStatus = builder.saveStatus; + blockedBy = ImmutableList.copyOf(builder.blockedBy); + blockedByKey = ImmutableSet.copyOf(builder.blockedByKey); + } + + public boolean isBlocked() + { + return !notBlocked(); + } + + public boolean notBlocked() + { + return blockedBy.isEmpty() && blockedByKey.isEmpty(); + } + } + + public static class Builder + { + final int storeId; + final Map txns = new LinkedHashMap<>(); + final Map keys = new LinkedHashMap<>(); + + public Builder(int storeId) + { + this.storeId = storeId; + } + + boolean knows(TxnId id) + { + return txns.containsKey(id); + } + + public CommandStoreTxnBlockedGraph build() + { + return new CommandStoreTxnBlockedGraph(this); + } + + public TxnBuilder txn(TxnId txnId, Timestamp executeAt, SaveStatus saveStatus) + { + return new TxnBuilder(txnId, executeAt, saveStatus); + } + + public class TxnBuilder + { + final TxnId txnId; + final Timestamp executeAt; + final SaveStatus saveStatus; + List blockedBy = new ArrayList<>(); + Set blockedByKey = new LinkedHashSet<>(); + + public TxnBuilder(TxnId txnId, Timestamp executeAt, SaveStatus saveStatus) + { + this.txnId = txnId; + this.executeAt = executeAt; + this.saveStatus = saveStatus; + } + + public TxnState build() + { + TxnState state = new TxnState(this); + txns.put(txnId, state); + return state; + } + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 4a8c0be6bb42..e01dcc313778 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -55,6 +55,7 @@ import static com.google.common.base.Preconditions.checkNotNull; + public interface IAccordService { Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.LOCAL_ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); @@ -144,4 +145,6 @@ public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2O CompactionInfo getCompactionInfo(); default Id nodeId() { throw new UnsupportedOperationException(); } + + List debugTxnBlockedGraph(TxnId txnId); } diff --git a/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java b/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java index 4ebfc8fdb095..c9fc1bd14b14 100644 --- a/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java +++ b/src/java/org/apache/cassandra/service/accord/exceptions/ReadExhaustedException.java @@ -18,22 +18,15 @@ package org.apache.cassandra.service.accord.exceptions; -import java.util.Map; +import com.google.common.collect.ImmutableMap; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.exceptions.ReadFailureException; -import org.apache.cassandra.exceptions.RequestFailureReason; -import org.apache.cassandra.locator.InetAddressAndPort; public class ReadExhaustedException extends ReadFailureException { - public ReadExhaustedException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, Map failureReasonByEndpoint) + public ReadExhaustedException(ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, String msg) { - super(consistency, received, blockFor, dataPresent, failureReasonByEndpoint); - } - - protected ReadExhaustedException(String msg, ConsistencyLevel consistency, int received, int blockFor, boolean dataPresent, Map failureReasonByEndpoint) - { - super(msg, consistency, received, blockFor, dataPresent, failureReasonByEndpoint); + super(msg, consistency, received, blockFor, dataPresent, ImmutableMap.of()); } } diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index f53955b93a1d..c47b244d0578 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -68,6 +68,7 @@ import com.google.common.collect.Iterables; import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.virtual.SystemViewsKeyspace; import org.assertj.core.api.Assertions; import org.awaitility.Awaitility; import org.apache.commons.lang3.ArrayUtils; @@ -519,6 +520,11 @@ protected static void addMetricsKeyspace() VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(VIRTUAL_METRICS, createMetricsKeyspaceTables())); } + protected static void addVirtualKeyspace() + { + VirtualKeyspaceRegistry.instance.register(SystemViewsKeyspace.instance); + } + protected void resetSchema() throws Throwable { for (TableMetadata table : SchemaKeyspace.metadata().tables) @@ -2062,6 +2068,133 @@ protected void assertRowCountNet(ResultSet r1, int expectedCount) Assert.assertEquals(String.format("expected %d rows but received %d", expectedCount, actualRowCount), expectedCount, actualRowCount); } + public abstract static class CellValidator + { + public abstract ByteBuffer expected(); + public abstract boolean equals(ByteBuffer bb); + + @Override + public boolean equals(Object obj) + { + if (obj instanceof ByteBuffer) + return equals((ByteBuffer) obj); + return false; + } + + public abstract String describe(); + } + + protected static CellValidator any() + { + return new CellValidator() + { + @Override + public ByteBuffer expected() + { + return ByteBufferUtil.EMPTY_BYTE_BUFFER; + } + + @Override + public boolean equals(ByteBuffer bb) + { + return true; + } + + @Override + public String describe() + { + return "any"; + } + }; + } + + protected static CellValidator anyNonNull() + { + return new CellValidator() + { + @Override + public ByteBuffer expected() + { + return ByteBufferUtil.EMPTY_BYTE_BUFFER; + } + + @Override + public boolean equals(ByteBuffer bb) + { + return !(bb == null || !bb.hasRemaining()); + } + + @Override + public String describe() + { + return "any non-null"; + } + }; + } + + protected static CellValidator anyInt() + { + return new CellValidator() + { + @Override + public ByteBuffer expected() + { + return ByteBufferUtil.bytes(0); + } + + @Override + public boolean equals(ByteBuffer bb) + { + if (bb == null) return false; + Int32Type.instance.validate(bb); + return bb.hasRemaining(); + } + + @Override + public String describe() + { + return "any non-null int"; + } + }; + } + + protected static CellValidator anyOf(String... values) + { + return anyOf(UTF8Type.instance, values); + } + + protected static CellValidator anyOf(AbstractType type, T... values) + { + assert values.length > 0; + ByteBuffer[] bbs = new ByteBuffer[values.length]; + for (int i = 0; i < values.length; i++) + bbs[i] = type.decompose(values[i]); + return new CellValidator() + { + @Override + public ByteBuffer expected() + { + return bbs[0]; + } + + @Override + public boolean equals(ByteBuffer bb) + { + for (int i = 0; i < bbs.length; i++) + { + if (Objects.equal(bbs[i], bb)) return true; + } + return false; + } + + @Override + public String describe() + { + return formatValue(bbs[0], type); + } + }; + } + public static void assertRows(UntypedResultSet result, Object[]... rows) { if (result == null) @@ -2085,24 +2218,22 @@ public static void assertRows(UntypedResultSet result, Object[]... rows) for (int j = 0; j < meta.size(); j++) { ColumnSpecification column = meta.get(j); - ByteBuffer expectedByteValue = makeByteBuffer(expected == null ? null : expected[j], column.type); + CellValidator cellValidator = makeCellValidator(expected == null ? null : expected[j], column.type); ByteBuffer actualValue = actual.getBytes(column.name.toString()); - if (expectedByteValue != null) - expectedByteValue = expectedByteValue.duplicate(); - if (!Objects.equal(expectedByteValue, actualValue)) + if (!((cellValidator == null && actualValue == null) || (cellValidator != null && cellValidator.equals(actualValue)))) { Object actualValueDecoded = actualValue == null ? null : column.type.getSerializer().deserialize(actualValue); if (!Objects.equal(expected != null ? expected[j] : null, actualValueDecoded)) { - if (isEmptyContainerNull(column.type, expectedByteValue, actualValue)) + if (isEmptyContainerNull(column.type, cellValidator != null ? cellValidator.expected() : null, actualValue)) continue; error.append(String.format("Invalid value for row %d column %d (%s of type %s), expected <%s> but got <%s>", i, j, column.name, column.type.asCQL3Type(), - formatValue(expectedByteValue != null ? expectedByteValue.duplicate() : null, column.type), + cellValidator != null ? cellValidator.describe() : "null", formatValue(actualValue, column.type))).append("\n"); } } @@ -2126,14 +2257,30 @@ public static void assertRows(UntypedResultSet result, Object[]... rows) ByteBuffer actualValue = actual.getBytes(column.name.toString()); str.append(String.format("%s=%s ", column.name, formatValue(actualValue, column.type))); } - logger.info("Extra row num {}: {}", i, str.toString()); + logger.info("Extra row num {}: {}", i, str); } - Assert.fail(String.format("Got more rows than expected. Expected %d but got %d.", rows.length, i)); + Assert.fail(String.format("Got more rows than expected. Expected %d but got %d.\nExpected: %s\nActual: %s", rows.length, i, toString(rows), result.toStringUnsafe())); } Assert.assertTrue(String.format("Got %s rows than expected. Expected %d but got %d", rows.length>i ? "less" : "more", rows.length, i), i == rows.length); } + private static String toString(Object o) + { + if (o == null) + return "null"; + if (o instanceof CellValidator) + return ((CellValidator) o).describe(); + if (o instanceof Object[]) + return toString((Object[]) o); + return o.toString(); + } + + private static String toString(Object[] array) + { + return Stream.of(array).map(CQLTester::toString).collect(Collectors.joining(", ", "[", "]")); + } + /** * Like assertRows(), but ignores the ordering of rows. */ @@ -2740,11 +2887,42 @@ public static ByteBuffer makeByteBuffer(Object value, AbstractType type) return ((TupleValue)value).toByteBuffer(); if (value instanceof ByteBuffer) - return (ByteBuffer)value; + return ((ByteBuffer)value); return type.decomposeUntyped(serializeTuples(value)); } + public static CellValidator makeCellValidator(Object value, AbstractType type) + { + if (value == null) + return null; + if (value instanceof CellValidator) + return (CellValidator) value; + + ByteBuffer byteBuffer = makeByteBuffer(value, type); + return new CellValidator() + { + @Override + public ByteBuffer expected() + { + return byteBuffer; + } + + @Override + public boolean equals(ByteBuffer bb) + { + if (bb == null) return false; + return byteBuffer.equals(bb); + } + + @Override + public String describe() + { + return formatValue(byteBuffer, type); + } + }; + } + private static String formatValue(ByteBuffer bb, AbstractType type) { if (bb == null) diff --git a/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java b/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java new file mode 100644 index 000000000000..59eae7ab0c1a --- /dev/null +++ b/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.BiPredicate; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.local.SaveStatus; +import accord.messages.TxnRequest; +import accord.primitives.Routable; +import accord.primitives.Txn; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.OptionaldPositiveInt; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.concurrent.Condition; +import org.awaitility.Awaitility; + +import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; + +public class AccordVirtualTablesTest extends CQLTester +{ + private static final Logger logger = LoggerFactory.getLogger(AccordVirtualTablesTest.class); + + private static final String QUERY_TXN_BLOCKED_BY = "SELECT * FROM system_views.txn_blocked_by WHERE txn_id=?"; + private static final String QUERY_TXN_STATUS = "SELECT save_status FROM system_views.txn_blocked_by WHERE txn_id=? LIMIT 1"; + + @BeforeClass + public static void setUpClass() + { + daemonInitialization(); + DatabaseDescriptor.getAccord().shard_count = new OptionaldPositiveInt(1); + + CQLTester.setUpClass(); + + AccordService.startup(ClusterMetadata.current().myNodeId()); + addVirtualKeyspace(); + requireNetwork(); + } + + @Test + public void unknownIsEmpty() + { + createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'"); + assertRows(execute(QUERY_TXN_BLOCKED_BY, TxnId.NONE.toString())); + } + + @Test + public void completedTxn() throws ExecutionException, InterruptedException + { + String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'"); + var accord = accord(); + TxnId id = accord.node().nextTxnId(Txn.Kind.Write, Routable.Domain.Key); + Txn txn = createTxn(wrapInTxn(String.format("INSERT INTO %s.%s(k, c, v) VALUES (?, ?, ?)", KEYSPACE, tableName)), 0, 0, 0); + AsyncChains.getBlocking(accord.node().coordinate(id, txn)); + + assertRows(execute(QUERY_TXN_BLOCKED_BY, id.toString()), + row(id.toString(), anyInt(), 0, "", "Self", any(), null, anyOf(SaveStatus.Applying.name(), SaveStatus.Applied.name()))); + } + + @Test + public void inflight() throws ExecutionException, InterruptedException + { + AccordMsgFilter filter = new AccordMsgFilter(); + MessagingService.instance().outboundSink.add(filter); + try + { + String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'"); + var accord = accord(); + TxnId id = accord.node().nextTxnId(Txn.Kind.Write, Routable.Domain.Key); + Txn txn = createTxn(wrapInTxn(String.format("INSERT INTO %s.%s(k, c, v) VALUES (?, ?, ?)", KEYSPACE, tableName)), 0, 0, 0); + accord.node().coordinate(id, txn); + + filter.preAccept.awaitThrowUncheckedOnInterrupt(); + + assertRows(execute(QUERY_TXN_BLOCKED_BY, id.toString()), + row(id.toString(), anyInt(), 0, "", "Self", any(), null, anyOf(SaveStatus.PreAccepted.name(), SaveStatus.ReadyToExecute.name()))); + + filter.apply.awaitThrowUncheckedOnInterrupt(); + assertRows(execute(QUERY_TXN_BLOCKED_BY, id.toString()), + row(id.toString(), anyInt(), 0, "", "Self", any(), null, SaveStatus.ReadyToExecute.name())); + } + finally + { + MessagingService.instance().outboundSink.remove(filter); + } + } + + @Test + public void blocked() throws ExecutionException, InterruptedException + { + AccordMsgFilter filter = new AccordMsgFilter(); + MessagingService.instance().outboundSink.add(filter); + try + { + String tableName = createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c)) WITH transactional_mode = 'full'"); + var accord = accord(); + TxnId first = accord.node().nextTxnId(Txn.Kind.Write, Routable.Domain.Key); + accord.node().coordinate(first, createTxn(wrapInTxn(String.format("INSERT INTO %s.%s(k, c, v) VALUES (?, ?, ?)", KEYSPACE, tableName)), 0, 0, 0)); + + filter.preAccept.awaitThrowUncheckedOnInterrupt(); + assertRows(execute(QUERY_TXN_BLOCKED_BY, first.toString()), + row(first.toString(), anyInt(), 0, "", "Self", any(), null, anyOf(SaveStatus.PreAccepted.name(), SaveStatus.ReadyToExecute.name()))); + + filter.apply.awaitThrowUncheckedOnInterrupt(); + assertRows(execute(QUERY_TXN_BLOCKED_BY, first.toString()), + row(first.toString(), anyInt(), 0, "", "Self", anyNonNull(), null, SaveStatus.ReadyToExecute.name())); + + filter.reset(); + + TxnId second = accord.node().nextTxnId(Txn.Kind.Write, Routable.Domain.Key); + accord.node().coordinate(second, createTxn(wrapInTxn(String.format("INSERT INTO %s.%s(k, c, v) VALUES (?, ?, ?)", KEYSPACE, tableName)), 0, 0, 0)); + + filter.commit.awaitThrowUncheckedOnInterrupt(); + + Awaitility.await("waiting on key").atMost(1, TimeUnit.MINUTES) + .until(() -> { + UntypedResultSet rs = execute(QUERY_TXN_BLOCKED_BY, second.toString()); + return rs.size() == 2; + }); + assertRows(execute(QUERY_TXN_BLOCKED_BY, second.toString()), + row(second.toString(), anyInt(), 0, "", "Self", anyNonNull(), null, SaveStatus.Stable.name()), + row(second.toString(), anyInt(), 1, first.toString(), "Key", anyNonNull(), anyNonNull(), SaveStatus.ReadyToExecute.name())); + } + finally + { + MessagingService.instance().outboundSink.remove(filter); + } + } + + private static AccordService accord() + { + return (AccordService) AccordService.instance(); + } + + private static class AccordMsgFilter implements BiPredicate, InetAddressAndPort> + { + volatile Condition preAccept = Condition.newOneTimeCondition(); + volatile Condition commit = Condition.newOneTimeCondition(); + volatile Condition apply = Condition.newOneTimeCondition(); + + void reset() + { + preAccept = Condition.newOneTimeCondition(); + commit = Condition.newOneTimeCondition(); + apply = Condition.newOneTimeCondition(); + } + + ConcurrentMap> txnToVerbs = new ConcurrentHashMap<>(); + + @Override + public boolean test(Message msg, InetAddressAndPort to) + { + if (!msg.verb().name().startsWith("ACCORD_")) + return true; + TxnId txnId = null; + if (msg.payload instanceof TxnRequest) + { + txnId = ((TxnRequest) msg.payload).txnId; + } + Set seen = null; + if (txnId != null) + { + seen = txnToVerbs.computeIfAbsent(txnId, ignore -> new ConcurrentSkipListSet<>()); + seen.add(msg.verb()); + } + switch (msg.verb()) + { + case ACCORD_APPLY_REQ: + case ACCORD_APPLY_AND_WAIT_REQ: + apply.signalAll(); + case ACCORD_BEGIN_RECOVER_REQ: + return false; + case ACCORD_PRE_ACCEPT_RSP: + preAccept.signalAll(); + return true; + case ACCORD_COMMIT_REQ: + commit.signalAll(); + return true; + case ACCORD_PRE_ACCEPT_REQ: + case ACCORD_CHECK_STATUS_REQ: + case ACCORD_CHECK_STATUS_RSP: + case ACCORD_READ_RSP: + return true; + default: + // many code paths don't log the error... + UnsupportedOperationException e = new UnsupportedOperationException(msg.verb().name()); + logger.error("Unexpected verb {}", msg.verb(), e); + throw e; + } + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java index 3abdcf808040..f6918422e926 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java @@ -26,6 +26,7 @@ import org.junit.Test; +import accord.api.BarrierType; import accord.coordinate.Exhausted; import accord.coordinate.Preempted; import accord.coordinate.Timeout; @@ -64,19 +65,19 @@ public Seekables get() throw new Timeout(null, null); case 1: attempts++; - throw AccordService.newBarrierTimeout(TxnId.NONE, true); + throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY); case 2: attempts++; throw new Preempted(null, null); case 3: attempts++; - throw AccordService.newBarrierPreempted(TxnId.NONE, true); + throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY); case 4: attempts++; throw new Exhausted(null, null); case 5: attempts++; - throw AccordService.newBarrierExhausted(TxnId.NONE, true); + throw AccordService.newBarrierExhausted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY); default: return Ranges.of(IntKey.range(1, 2)); } @@ -98,9 +99,9 @@ public void retryThrowsTimeout() qt().check(rs -> { List timeoutFailures = new ArrayList<>(4); timeoutFailures.add(() -> {throw new Timeout(null, null);}); - timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); timeoutFailures.add(() -> {throw new Preempted(null, null);}); - timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); Collections.shuffle(timeoutFailures, rs.asJdkRandom()); Iterator it = timeoutFailures.iterator(); Supplier failing = () -> { @@ -120,9 +121,9 @@ public void retryThrowsNonTimeout() qt().check(rs -> { List timeoutFailures = new ArrayList<>(5); timeoutFailures.add(() -> {throw new Timeout(null, null);}); - timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); timeoutFailures.add(() -> {throw new Preempted(null, null);}); - timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); timeoutFailures.add(() -> {throw new Exhausted(null, null);}); Collections.shuffle(timeoutFailures, rs.asJdkRandom()); Iterator it = timeoutFailures.iterator(); @@ -158,9 +159,9 @@ public void run() qt().check(rs -> { List failures = new ArrayList<>(6); failures.add(() -> {throw new Timeout(null, null);}); - failures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, true);}); + failures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); failures.add(() -> {throw new Preempted(null, null);}); - failures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, true);}); + failures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); failures.add(() -> {throw new Exhausted(null, null);}); boolean isError = rs.nextBoolean(); failures.add(new Unexpected(isError)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index bfbf4e509594..54a311a67b91 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -287,6 +287,11 @@ public static Txn createTxn(String query) return createTxn(query, QueryOptions.DEFAULT); } + public static Txn createTxn(String query, Object... binds) + { + return createTxn(query, Arrays.asList(binds)); + } + public static Txn createTxn(String query, List binds) { TransactionStatement statement = parse(query); From f1db115e73f7050e82b84a4f642a065c7030414e Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 30 Aug 2024 09:06:39 -0700 Subject: [PATCH 137/340] Create a fuzz test that randomizes topology changes, cluster actions, and CQL operations patch by David Capwell; reviewed by Alex Petrov for CASSANDRA-19847 --- modules/accord | 2 +- .../index/accord/RouteIndexTest.java | 129 +++++++----------- .../cassandra/io/util/ChecksumedDataTest.java | 5 +- .../service/accord/EpochSyncTest.java | 107 ++++++--------- .../utils/StatefulRangeTreeTest.java | 98 ++++--------- 5 files changed, 127 insertions(+), 214 deletions(-) diff --git a/modules/accord b/modules/accord index e2ccee4f51fe..a171322f417c 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit e2ccee4f51fe4c7c7f3ea8911897135ed7e37114 +Subproject commit a171322f417c117733ca5b514d03a5202b1ac202 diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index 17de14e4158e..edc7f2c517a8 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -49,7 +48,6 @@ import accord.utils.Gen; import accord.utils.Gens; import accord.utils.Property.Command; -import accord.utils.Property.Commands; import accord.utils.Property.UnitCommand; import accord.utils.RandomSource; import org.agrona.collections.Int2ObjectHashMap; @@ -73,6 +71,7 @@ import org.apache.cassandra.utils.RangeTree; import org.assertj.core.api.Assertions; +import static accord.utils.Property.commands; import static accord.utils.Property.stateful; public class RouteIndexTest extends CQLTester.InMemory @@ -109,85 +108,61 @@ public void test() cfs().disableAutoCompaction(); // let the test control compaction //TODO (coverage): include with the ability to mark ranges as durable for compaction cleanup AccordService.unsafeSetNoop(); // disable accord service since compaction touches it. It would be nice to include this for cleanup support.... - stateful().withExamples(50).check(new Commands() - { - @Override - public Gen genInitialState() - { - return rs -> new State(rs); - } - - @Override - public ColumnFamilyStore createSut(State state) - { - return cfs(); - } + stateful().withExamples(50).check(commands(() -> State::new, i -> cfs()) + .destroySut(sut -> sut.truncateBlocking()) + .add(FLUSH) + .add(COMPACT) + .add((rs, state) -> { + int storeId = rs.nextInt(0, state.numStores); + Domain domain = state.domainGen.next(rs); + TxnId txnId = state.nextTxnId(domain); + Route route = createRoute(state, rs, domain, rs.nextInt(1, 20)); + return new InsertTxn(storeId, txnId, SaveStatus.PreAccepted, Durability.NotDurable, route); + }) + .add((rs, state) -> new RangeSearch(rs.nextInt(0, state.numStores), state.rangeGen.next(rs))) + .addIf(state -> !state.storeToTableToRangesToTxns.isEmpty(), RouteIndexTest::rangeSearch) + .build()); + } - @Override - public Gen> commands(State state) + private static RangeSearch rangeSearch(RandomSource rs, State state) + { + int storeId = rs.pickUnorderedSet(state.storeToTableToRangesToTxns.keySet()); + var tables = state.storeToTableToRangesToTxns.get(storeId); + TableId tableId = rs.pickUnorderedSet(tables.keySet()); + var ranges = tables.get(tableId); + TreeSet distinctRanges = ranges.stream().map(Map.Entry::getKey).collect(Collectors.toCollection(() -> new TreeSet<>(TokenRange::compareTo))); + TokenRange range; + if (distinctRanges.size() == 1) + { + range = Iterables.getFirst(distinctRanges, null); + } + else + { + switch (rs.nextInt(0, 2)) { - Map>, Integer> possible = new LinkedHashMap<>(); - possible.put(ignore -> FLUSH, 1); - possible.put(ignore -> COMPACT, 1); - possible.put(rs -> { - int storeId = rs.nextInt(0, state.numStores); - Domain domain = state.domainGen.next(rs); - TxnId txnId = state.nextTxnId(domain); - Route route = createRoute(state, rs, domain, rs.nextInt(1, 20)); - return new InsertTxn(storeId, txnId, SaveStatus.PreAccepted, Durability.NotDurable, route); - }, 10); - possible.put(rs -> new RangeSearch(rs.nextInt(0, state.numStores), state.rangeGen.next(rs)), 1); - if (!state.storeToTableToRangesToTxns.isEmpty()) + case 0: // perfect match + range = rs.pickOrderedSet(distinctRanges); + break; + case 1: // mutli-match { - possible.put(rs -> { - int storeId = rs.pickUnorderedSet(state.storeToTableToRangesToTxns.keySet()); - var tables = state.storeToTableToRangesToTxns.get(storeId); - TableId tableId = rs.pickUnorderedSet(tables.keySet()); - var ranges = tables.get(tableId); - TreeSet distinctRanges = ranges.stream().map(Map.Entry::getKey).collect(Collectors.toCollection(() -> new TreeSet<>(TokenRange::compareTo))); - TokenRange range; - if (distinctRanges.size() == 1) - { - range = Iterables.getFirst(distinctRanges, null); - } - else - { - switch (rs.nextInt(0, 2)) - { - case 0: // perfect match - range = rs.pickOrderedSet(distinctRanges); - break; - case 1: // mutli-match - { - TokenRange a = rs.pickOrderedSet(distinctRanges); - TokenRange b = rs.pickOrderedSet(distinctRanges); - while (a.equals(b)) - b = rs.pickOrderedSet(distinctRanges); - if (b.compareTo(a) < 0) - { - TokenRange tmp = a; - a = b; - b = tmp; - } - range = new TokenRange((AccordRoutingKey) a.start(), (AccordRoutingKey) b.end()); - } - break; - default: - throw new AssertionError(); - } - } - return new RangeSearch(storeId, range); - }, 5); + TokenRange a = rs.pickOrderedSet(distinctRanges); + TokenRange b = rs.pickOrderedSet(distinctRanges); + while (a.equals(b)) + b = rs.pickOrderedSet(distinctRanges); + if (b.compareTo(a) < 0) + { + TokenRange tmp = a; + a = b; + b = tmp; + } + range = new TokenRange((AccordRoutingKey) a.start(), (AccordRoutingKey) b.end()); } - return Gens.oneOf(possible); + break; + default: + throw new AssertionError(); } - - @Override - public void destroySut(ColumnFamilyStore sut) - { - cfs().truncateBlocking(); - } - }); + } + return new RangeSearch(storeId, range); } private static ColumnFamilyStore cfs() @@ -343,7 +318,7 @@ public String toString() } } - private class RangeSearch implements Command> + private static class RangeSearch implements Command> { private final int storeId; private final TokenRange range; diff --git a/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java b/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java index d7b6754a2d44..5389c79bb7d5 100644 --- a/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java +++ b/test/unit/org/apache/cassandra/io/util/ChecksumedDataTest.java @@ -24,6 +24,8 @@ import java.util.zip.CRC32C; import java.util.zip.Checksum; +import javax.annotation.Nullable; + import org.junit.Test; import accord.utils.Gen; @@ -149,8 +151,9 @@ public List> createSut(ChecksumedDataOutputPlus check } @Override - public void destroySut(List> sut) throws Throwable + public void destroySut(List> sut, @Nullable Throwable t) throws Throwable { + if (t != null) return; ChecksumedDataInputPlus in = new ChecksumedDataInputPlus(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), CHECKSUM_SUPPLIER); for (StatefulChecksumCommand cmd : sut) { diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index 6bc85b6f1eeb..101ca38a7364 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -28,7 +28,6 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NavigableSet; @@ -60,10 +59,7 @@ import accord.topology.Topology; import accord.topology.TopologyManager; import accord.utils.Gen; -import accord.utils.Gens; import accord.utils.Invariants; -import accord.utils.Property.Command; -import accord.utils.Property.Commands; import accord.utils.Property.UnitCommand; import accord.utils.RandomSource; import accord.utils.async.AsyncChain; @@ -109,6 +105,7 @@ import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; +import static accord.utils.Property.commands; import static accord.utils.Property.stateful; public class EpochSyncTest @@ -126,72 +123,48 @@ public class EpochSyncTest @Test public void test() { - stateful().withExamples(50).check(new Commands() - { - @Override - public Gen genInitialState() - { - return Cluster::new; - } - - @Override - public Void createSut(Cluster Cluster) - { - return null; - } + stateful().withExamples(50).check(commands(() -> Cluster::new) + .destroyState(cluster -> { + cluster.processAll(); + cluster.validate(true); + }) + .addIf(cluster -> cluster.alive().size() <= cluster.maxNodes, EpochSyncTest::addNode) + .addIf(cluster -> cluster.alive().size() > cluster.minNodes, EpochSyncTest::removeNode) + .addIf(cluster -> cluster.hasWork(), EpochSyncTest::processSome) + .add(rs -> new SimpleCommand("Validate", c -> c.validate(false))) + .add((rs, cluster) -> new SimpleCommand("Bump Epoch " + (cluster.current.epoch.getEpoch() + 1), Cluster::bumpEpoch)) + .build()); + } - @Override - public Gen> commands(Cluster cluster) - { - List alive = cluster.alive(); - Map>, Integer> possible = new LinkedHashMap<>(); - if (alive.size() < cluster.maxNodes) - { - // add node - possible.put(rs -> { - Node.Id id = new Node.Id(++cluster.nodeCounter); - long token = cluster.tokenGen.nextLong(rs); - while (cluster.tokens.contains(token)) - token = cluster.tokenGen.nextLong(rs); - long epoch = cluster.current.epoch.getEpoch() + 1; - long finalToken = token; - return new SimpleCommand("Add Node " + id + "; token=" + token + ", epoch=" + epoch, - c -> c.addNode(id, finalToken)); - }, 5); - } - if (alive.size() > cluster.minNodes) - { - possible.put(rs -> { - Node.Id pick = rs.pick(alive); - long token = cluster.instances.get(pick).token; - long epoch = cluster.current.epoch.getEpoch() + 1; - return new SimpleCommand("Remove Node " + pick + "; token=" + token + "; epoch=" + epoch, c -> c.removeNode(pick)); - }, 3); - } - if (cluster.hasWork()) - { - possible.put(rs -> new SimpleCommand("Process Some", - c -> {//noinspection StatementWithEmptyBody - for (int i = 0, attempts = rs.nextInt(1, 100); i < attempts && c.processOne(); i++) - { - } - }), 10); - } + private static SimpleCommand addNode(RandomSource rs, Cluster cluster) + { + Node.Id id = new Node.Id(++cluster.nodeCounter); + long token = cluster.tokenGen.nextLong(rs); + while (cluster.tokens.contains(token)) + token = cluster.tokenGen.nextLong(rs); + long epoch = cluster.current.epoch.getEpoch() + 1; + long finalToken = token; + return new SimpleCommand("Add Node " + id + "; token=" + token + ", epoch=" + epoch, + c -> c.addNode(id, finalToken)); + } - possible.put(rs -> new SimpleCommand("Validate", - c -> c.validate(false)), 1); - possible.put(rs -> new SimpleCommand("Bump Epoch " + (cluster.current.epoch.getEpoch() + 1), - Cluster::bumpEpoch), 10); - return Gens.oneOf(possible); - } + private static SimpleCommand removeNode(RandomSource rs, Cluster cluster) + { + List alive = cluster.alive(); + Node.Id pick = rs.pick(alive); + long token = cluster.instances.get(pick).token; + long epoch = cluster.current.epoch.getEpoch() + 1; + return new SimpleCommand("Remove Node " + pick + "; token=" + token + "; epoch=" + epoch, c -> c.removeNode(pick)); + } - @Override - public void destroyState(Cluster cluster) - { - cluster.processAll(); - cluster.validate(true); - } - }); + private static SimpleCommand processSome(RandomSource rs) + { + return new SimpleCommand("Process Some", + c -> {//noinspection StatementWithEmptyBody + for (int i = 0, attempts = rs.nextInt(1, 100); i < attempts && c.processOne(); i++) + { + } + }); } private static class SimpleCommand implements UnitCommand diff --git a/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java index e3e471b550ca..139f172dfb23 100644 --- a/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java +++ b/test/unit/org/apache/cassandra/utils/StatefulRangeTreeTest.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeSet; @@ -35,12 +34,12 @@ import accord.utils.Gen; import accord.utils.Gens; import accord.utils.Property.Command; -import accord.utils.Property.Commands; import accord.utils.Property.UnitCommand; import accord.utils.RandomSource; import org.apache.cassandra.service.accord.RangeTreeRangeAccessor; import org.assertj.core.api.Assertions; +import static accord.utils.Property.commands; import static accord.utils.Property.stateful; public class StatefulRangeTreeTest @@ -61,7 +60,7 @@ public class StatefulRangeTreeTest /** * Stateful test for RTree. - * + *

      * This test is very similar to {@link RangeTreeTest#test} but is fully mutable, so can not * use the immutable search trees (else rebuidling becomes a large cost). Both tests should exist as they use different * models, which helps build confidence that the RTree does the correct thing; that test also covers start and end @@ -70,57 +69,25 @@ public class StatefulRangeTreeTest @Test public void test() { - stateful().check(new Commands() - { - @Override - public Gen genInitialState() - { - return rs -> { - Gen rangeGen = rangeGen(rs); - int numChildren = NUM_CHILDREN_GEN.nextInt(rs); - int sizeTarget = SIZE_TARGET_DISTRIBUTION.next(rs).filter(s -> s > numChildren).nextInt(rs); - int createWeight = rs.nextInt(1, 100); - int updateWeight = rs.nextInt(1, 20); - int deleteWeight = rs.nextInt(1, 20); - int clearWeight = rs.nextInt(0, 2); // either disabled or enabled with weight=1 - int readWeight = rs.nextInt(1, 20); - return new State(sizeTarget, numChildren, - TOKEN_DISTRIBUTION.next(rs), rangeGen, - createWeight, updateWeight, deleteWeight, clearWeight, readWeight); - }; - } - - @Override - public Sut createSut(State state) - { - return new Sut(state.sizeTarget, state.numChildren); - } - - @Override - public Gen> commands(State state) - { - Map>, Integer> possible = new LinkedHashMap<>(); - possible.put(rs -> new Create(state.newRange(rs), SMALL_INT_GEN.nextInt(rs)), state.createWeight); - possible.put(rs -> new Read(state.newRange(rs)), state.readWeight); - possible.put(rs -> new KeyRead(IntKey.routing(state.tokenGen.nextInt(rs))), state.readWeight); - possible.put(rs -> new RangeRead(state.rangeGen.next(rs)), state.readWeight); - possible.put(ignore -> Iterate.instance, state.readWeight); - possible.put(ignore -> Clear.instance, state.clearWeight); - if (!state.uniqRanges.isEmpty()) - { - possible.put(rs -> new Read(rs.pickOrderedSet(state.uniqRanges)), state.readWeight); - possible.put(rs -> { - Range range = rs.pickOrderedSet(state.uniqRanges); - int token = rs.nextInt(((IntKey.Routing) range.start()).key, ((IntKey.Routing) range.end()).key) + 1; - return new KeyRead(IntKey.routing(token)); - }, state.readWeight); - possible.put(rs -> new RangeRead(rs.pickOrderedSet(state.uniqRanges)), state.readWeight); - possible.put(rs -> new Update(rs.pickOrderedSet(state.uniqRanges), SMALL_INT_GEN.nextInt(rs)), state.updateWeight); - possible.put(rs -> new Delete(rs.pickOrderedSet(state.uniqRanges)), state.deleteWeight); - } - return Gens.oneOf(possible); - } - }); + stateful().check(commands(() -> State::new, state -> new Sut(state.sizeTarget, state.numChildren)) + .add((rs, state) -> new Create(state.newRange(rs), SMALL_INT_GEN.nextInt(rs))) + .add((rs, state) -> new Read(state.newRange(rs))) + .add((rs, state) -> new KeyRead(IntKey.routing(state.tokenGen.nextInt(rs)))) + .add((rs, state) -> new RangeRead(state.rangeGen.next(rs))) + .add(Iterate.instance) + .add(Clear.instance) + .addAllIf(state -> !state.uniqRanges.isEmpty(), + b -> b.add((rs, state) -> new Read(rs.pickOrderedSet(state.uniqRanges))) + .add((rs, state) -> { + Range range = rs.pickOrderedSet(state.uniqRanges); + int token = rs.nextInt(((IntKey.Routing) range.start()).key, ((IntKey.Routing) range.end()).key) + 1; + return new KeyRead(IntKey.routing(token)); + }) + .add((rs, state) -> new RangeRead(rs.pickOrderedSet(state.uniqRanges))) + .add((rs, state) -> new Update(rs.pickOrderedSet(state.uniqRanges), SMALL_INT_GEN.nextInt(rs))) + .add((rs, state) -> new Delete(rs.pickOrderedSet(state.uniqRanges))) + ) + .build()); } private static Gen rangeGen(RandomSource rand) @@ -401,6 +368,7 @@ public String detailed(State state) static class Iterate extends AbstractRead> { static final Iterate instance = new Iterate(); + public Iterate() { super(COMPARATOR); @@ -432,27 +400,21 @@ private static class State private final int sizeTarget, numChildren; private final Gen.IntGen tokenGen; private final Gen rangeGen; - private final int createWeight, updateWeight, deleteWeight, clearWeight, readWeight; - private State(int sizeTarget, int numChildren, - Gen.IntGen tokenGen, Gen rangeGen, - int createWeight, int updateWeight, int deleteWeight, int clearWeight, int readWeight) + private State(RandomSource rs) { - this.sizeTarget = sizeTarget; - this.numChildren = numChildren; - this.tokenGen = tokenGen; - this.rangeGen = rangeGen; - this.createWeight = createWeight; - this.updateWeight = updateWeight; - this.deleteWeight = deleteWeight; - this.clearWeight = clearWeight; - this.readWeight = readWeight; + this.numChildren = NUM_CHILDREN_GEN.nextInt(rs); + this.sizeTarget = SIZE_TARGET_DISTRIBUTION.next(rs).filter(s -> s > numChildren).nextInt(rs); + this.tokenGen = TOKEN_DISTRIBUTION.next(rs); + this.rangeGen = rangeGen(rs); } public Range newRange(RandomSource rs) { Range range; - while ((uniqRanges.contains(range = rangeGen.next(rs)))) {} + while ((uniqRanges.contains(range = rangeGen.next(rs)))) + { + } return range; } From 00ebcdc9f20d6e459cd277cfa42312a2be4dfc17 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 26 Aug 2024 20:24:41 +0200 Subject: [PATCH 138/340] Fix a problem with static segments being opened with an empty offset after switch unless active segments offset file was closed Patch by Alex Petrov; reviewed by Aleksey Yeschenko for CASSANDRA-19867 --- src/java/org/apache/cassandra/journal/ActiveSegment.java | 1 + src/java/org/apache/cassandra/journal/SyncedOffsets.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index a815d231997d..1bee25a96fed 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -183,6 +183,7 @@ void persistComponents() { index.persist(descriptor); metadata.persist(descriptor); + syncedOffsets.fsync(); SyncUtil.trySyncDir(descriptor.directory); } diff --git a/src/java/org/apache/cassandra/journal/SyncedOffsets.java b/src/java/org/apache/cassandra/journal/SyncedOffsets.java index cd05e6f8ac6c..8a89f72ad767 100644 --- a/src/java/org/apache/cassandra/journal/SyncedOffsets.java +++ b/src/java/org/apache/cassandra/journal/SyncedOffsets.java @@ -135,6 +135,7 @@ public void mark(int offset, boolean fsync) { output.writeInt(offset); output.writeInt((int) crc.getValue()); + output.flush(); } catch (IOException e) { From 1866b8ec73c222b241172e154c84e7e8b9888a61 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 9 Sep 2024 15:05:32 +0200 Subject: [PATCH 139/340] Fix Journal segment allocation/switch race condition Patch by Alex Petrov; reviewed by Aleksey Yeschenko for CASSANDRA-19866 --- src/java/org/apache/cassandra/journal/Journal.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index c092363af79d..50a3058ec9ee 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -198,6 +198,10 @@ public Journal(String name, public boolean isFlushed(RecordPointer recordPointer) { + Segment current = currentSegment; + if (current.descriptor.timestamp == recordPointer.segment) + return current.isFlushed(recordPointer.position); + return segments.get().isFlushed(recordPointer); } From 9ebce5d6df0ba3aa93ad168f42002152c9c4a2a1 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 22 Aug 2024 21:12:48 +0100 Subject: [PATCH 140/340] Redesign progress mechanisms to be memory efficient, use fewer messages and to resolve dependency chains promptly. The SimpleProgressLog had a number of problems: 1. It polled for progress with no attempt to determine whether progress could realistically be made, so: - as the number of pending transactions grew, the proportion of useful work dropped (as many would be unable to make progress without earlier transactions completing) - each transaction in the chain could recover only on average 1/2 poll interval behind the last transaction to complete 2. It requested full transaction state from every replica on each attempt 3. It maintained a lot of in-memory state 4. Polling happened en-masse, allowing for little per-transaction control We also separately maintained fairly expensive per-command listener state that negatively affected our command loading and caching. The new DefaultProgressLog makes use of several new features: LocalListeners, RemoteListeners, Timers and Await messages. - LocalListeners provide a memory-efficient collection for managing each CommandStore<80><99>s transaction listeners, with dedicated record keeping for inter-transaction relationships. - RemoteListeners provide a mechanism for request/response pairs that may be separated by longer than the normal Cassandra message timeout, and require minimal state on sender and recipient. This permits replicas to cheaply update their local state machine as soon as distributed information becomes available. The DefaultProgressLog tracks each transaction with separate timers to handle per-transaction scheduling, backoff etc, and a succinct state machine. To reduce overhead correspondence is preferentially limited to a handful of replicas, and limited to the home shard where appropriate. patch by Benedict; reviewed by Ariel Weisberg for CASSANDRA-19870 --- conf/cassandra.yaml | 4 +- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 2 +- .../db/compaction/CompactionIterator.java | 4 + .../cassandra/metrics/AccordMetrics.java | 8 +- src/java/org/apache/cassandra/net/Verb.java | 21 ++- .../service/accord/AccordCachingState.java | 34 ----- .../service/accord/AccordCommandStore.java | 11 +- .../service/accord/AccordCommandStores.java | 11 +- .../service/accord/AccordConfiguration.java | 10 +- .../service/accord/AccordJournal.java | 37 ----- .../service/accord/AccordKeyspace.java | 43 +++--- .../service/accord/AccordMessageSink.java | 11 +- .../service/accord/AccordObjectSizes.java | 14 +- .../service/accord/AccordSafeCommand.java | 23 ---- .../service/accord/AccordService.java | 34 ++--- .../service/accord/AccordStateCache.java | 17 +-- .../service/accord/AccordSyncPropagator.java | 8 +- .../accord/CommandsForRangesLoader.java | 2 +- .../service/accord/SavedCommand.java | 32 +---- .../service/accord/api/AccordAgent.java | 83 +++++++++++ .../service/accord/api/AccordScheduler.java | 6 + .../accord/interop/AccordInteropApply.java | 35 ++--- .../accord/interop/AccordInteropCommit.java | 6 +- .../interop/AccordInteropExecution.java | 4 +- .../accord/interop/AccordInteropPersist.java | 4 +- .../interop/AccordInteropReadRepair.java | 2 +- .../accord/serializers/AcceptSerializers.java | 6 +- .../accord/serializers/ApplySerializers.java | 9 +- .../accord/serializers/AwaitSerializer.java | 105 ++++++++++++++ .../BeginInvalidationSerializers.java | 13 +- ...ers.java => CalculateDepsSerializers.java} | 28 ++-- .../serializers/CommandsForKeySerializer.java | 33 +++-- .../accord/serializers/CommitSerializers.java | 8 +- .../accord/serializers/FetchSerializers.java | 102 -------------- .../GetEphmrlReadDepsSerializers.java | 4 +- .../GetMaxConflictSerializers.java | 4 +- .../serializers/InformDurableSerializers.java | 10 +- .../InformHomeDurableSerializers.java | 60 -------- .../serializers/InformOfTxnIdSerializers.java | 53 ------- .../serializers/ListenerSerializers.java | 113 --------------- .../serializers/PreacceptSerializers.java | 10 +- .../serializers/RecoverySerializers.java | 4 +- .../serializers/TxnRequestSerializer.java | 23 ++-- .../serializers/WaitOnCommitSerializer.java | 77 ----------- .../cassandra/triggers/TriggerExecutor.java | 5 - test/conf/cassandra.yaml | 1 - .../cassandra/distributed/impl/Instance.java | 2 +- .../distributed/impl/InstanceConfig.java | 2 +- .../distributed/test/QueriesTableTest.java | 4 +- .../accord/AccordIncrementalRepairTest.java | 6 +- .../test/accord/AccordIntegrationTest.java | 4 +- .../accord/AccordMigrationRaceTestBase.java | 4 +- .../test/accord/AccordProgressLogTest.java | 130 ++++++++++++++++++ .../test/accord/AccordTestBase.java | 8 +- .../CompactionAccordIteratorsTest.java | 23 ++-- .../cassandra/dht/AccordSplitterTest.java | 4 +- .../service/accord/AccordAgentTest.java | 70 ++++++++++ .../accord/AccordCommandStoreTest.java | 1 - .../service/accord/AccordCommandTest.java | 13 +- .../service/accord/AccordMessageSinkTest.java | 13 -- .../service/accord/AccordServiceTest.java | 6 +- .../service/accord/AccordTestUtils.java | 38 ++--- .../service/accord/EpochSyncTest.java | 6 +- .../cassandra/service/accord/MockJournal.java | 14 +- .../accord/SimulatedAccordCommandStore.java | 12 +- .../accord/async/AsyncOperationTest.java | 16 +-- .../CommandsForKeySerializerTest.java | 16 +-- .../cassandra/utils/AccordGenerators.java | 3 +- 69 files changed, 670 insertions(+), 861 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java rename src/java/org/apache/cassandra/service/accord/serializers/{GetDepsSerializers.java => CalculateDepsSerializers.java} (62%) delete mode 100644 src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java delete mode 100644 src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java delete mode 100644 src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java delete mode 100644 src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordAgentTest.java diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 4341422dbde7..8832243f6e5f 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2659,8 +2659,8 @@ storage_compatibility_mode: NONE # # The number of Accord shards on this node; -1 means use the number of cores # shard_count: -1 # -# # Progress log scheduling delay -# progress_log_schedule_delay: 1s +# # Recover delay: the time between a transaction being initiated and a remote replica being willing to interrupt it to complete it +# recover_delay: 1s # # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s diff --git a/modules/accord b/modules/accord index a171322f417c..fb3efe9b8a87 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit a171322f417c117733ca5b514d03a5202b1ac202 +Subproject commit fb3efe9b8a87f0a182545791a2b0563690d52d00 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 2e0d614957af..b4d25d66890e 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -30,7 +30,7 @@ public class AccordSpec public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; - public volatile DurationSpec.IntMillisecondsBound progress_log_schedule_delay = new DurationSpec.IntMillisecondsBound(100); + public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(1000); /** * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 54d30a64453b..924a960571b9 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -96,6 +96,7 @@ import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.invalidated; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.maybeDropTruncatedCommandColumns; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeysAccessor; @@ -849,6 +850,9 @@ protected Row applyToRow(Row row) // We can still encounter sliced command state just because compaction inputs are random return BTreeRow.emptyDeletedRow(row.clustering(), new Row.Deletion(DeletionTime.build(row.primaryKeyLivenessInfo().timestamp(), nowInSec), false)); + case INVALIDATE: + return invalidated(cleanup.appliesIfNot, row, nowInSec); + case TRUNCATE_WITH_OUTCOME: case TRUNCATE: if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) diff --git a/src/java/org/apache/cassandra/metrics/AccordMetrics.java b/src/java/org/apache/cassandra/metrics/AccordMetrics.java index 765fe154eedb..c9d8e2968730 100644 --- a/src/java/org/apache/cassandra/metrics/AccordMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordMetrics.java @@ -206,7 +206,7 @@ else if (txnId.isRead()) @Override public void onStable(Command cmd) { - long now = AccordService.uniqueNow(); + long now = AccordService.now(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -218,7 +218,7 @@ public void onStable(Command cmd) @Override public void onExecuted(Command cmd) { - long now = AccordService.uniqueNow(); + long now = AccordService.now(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -232,7 +232,7 @@ public void onExecuted(Command cmd) @Override public void onApplied(Command cmd, long applyStartTimestamp) { - long now = AccordService.uniqueNow(); + long now = AccordService.now(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -270,7 +270,7 @@ public void onRecover(TxnId txnId, Timestamp recoveryTimestamp) AccordMetrics metrics = forTransaction(txnId); if (metrics != null) { - long now = AccordService.uniqueNow(); + long now = AccordService.now(); metrics.recoveryDuration.update(now - recoveryTimestamp.hlc(), MICROSECONDS); metrics.recoveryDelay.update(recoveryTimestamp.hlc() - txnId.hlc(), MICROSECONDS); diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index e6c3264c4893..e2d3696a62e4 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -92,18 +92,16 @@ import org.apache.cassandra.service.accord.serializers.CommitSerializers; import org.apache.cassandra.service.accord.serializers.EnumSerializer; import org.apache.cassandra.service.accord.serializers.FetchSerializers; -import org.apache.cassandra.service.accord.serializers.GetDepsSerializers; +import org.apache.cassandra.service.accord.serializers.CalculateDepsSerializers; import org.apache.cassandra.service.accord.serializers.GetEphmrlReadDepsSerializers; import org.apache.cassandra.service.accord.serializers.GetMaxConflictSerializers; import org.apache.cassandra.service.accord.serializers.InformDurableSerializers; -import org.apache.cassandra.service.accord.serializers.InformHomeDurableSerializers; -import org.apache.cassandra.service.accord.serializers.InformOfTxnIdSerializers; import org.apache.cassandra.service.accord.serializers.PreacceptSerializers; import org.apache.cassandra.service.accord.serializers.QueryDurableBeforeSerializers; import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; import org.apache.cassandra.service.accord.serializers.RecoverySerializers; import org.apache.cassandra.service.accord.serializers.SetDurableSerializers; -import org.apache.cassandra.service.accord.serializers.WaitOnCommitSerializer; +import org.apache.cassandra.service.accord.serializers.AwaitSerializer; import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.ConsensusKeyMigrationFinished; import org.apache.cassandra.service.paxos.Commit; @@ -321,21 +319,20 @@ public enum Verb ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), ACCORD_APPLY_RSP (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), + ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), ACCORD_BEGIN_RECOVER_REQ (132, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), ACCORD_BEGIN_INVALIDATE_REQ (134, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), - ACCORD_WAIT_ON_COMMIT_RSP (136, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.reply, RESPONSE_HANDLER ), - ACCORD_WAIT_ON_COMMIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> WaitOnCommitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_WAIT_ON_COMMIT_RSP ), - ACCORD_WAIT_UNTIL_APPLIED_REQ (137, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_INFORM_OF_TXN_REQ (138, P2, writeTimeout, IMMEDIATE, () -> InformOfTxnIdSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_INFORM_HOME_DURABLE_REQ (139, P2, writeTimeout, IMMEDIATE, () -> InformHomeDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_AWAIT_RSP (136, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.syncReply, RESPONSE_HANDLER ), + ACCORD_AWAIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_AWAIT_RSP ), + ACCORD_AWAIT_ASYNC_RSP_REQ (137, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.asyncReply, AccordService::verbHandlerOrNoop ), + ACCORD_WAIT_UNTIL_APPLIED_REQ (138, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), - ACCORD_GET_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> GetDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_DEPS_RSP ), + ACCORD_CALCULATE_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.reply, RESPONSE_HANDLER ), + ACCORD_CALCULATE_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CALCULATE_DEPS_RSP), ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 304befe83a52..35ceec450dfe 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -25,8 +25,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; -import accord.local.Command.TransientListener; -import accord.local.Listeners; import accord.utils.IntrusiveLinkedListNode; import accord.utils.Invariants; import accord.utils.async.AsyncChain; @@ -69,11 +67,6 @@ static Factory defaultFactory() final byte index; private boolean shouldUpdateSize; - /** - * Transient listeners aren't meant to survive process restart, but must survive cache eviction. - */ - private Listeners transientListeners; - AccordCachingState(K key, int index) { this.key = key; @@ -157,33 +150,6 @@ public Status status() return complete().status(); } - public void addListener(TransientListener listener) - { - if (transientListeners == null) - transientListeners = new Listeners<>(); - transientListeners.add(listener); - } - - public boolean removeListener(TransientListener listener) - { - return transientListeners != null && transientListeners.remove(listener); - } - - public void listeners(Listeners listeners) - { - transientListeners = listeners; - } - - public Listeners listeners() - { - return transientListeners == null ? Listeners.EMPTY : transientListeners; - } - - public boolean hasListeners() - { - return !listeners().isEmpty(); - } - State complete() { return state.isCompleteable() ? state(state.complete()) : state; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 39370a1cfc2a..c1ce231da91e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -39,6 +39,7 @@ import accord.api.Agent; import accord.api.DataStore; +import accord.api.LocalListeners; import accord.api.ProgressLog; import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; @@ -114,11 +115,12 @@ public AccordCommandStore(int id, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, + LocalListeners.Factory listenerFactory, EpochUpdateHolder epochUpdateHolder, IJournal journal, AccordStateCacheMetrics cacheMetrics) { - this(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor(), cacheMetrics); + this(id, time, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor(), cacheMetrics); } private static void registerJfrListener(int id, AccordStateCache.Instance instance, String name) @@ -194,13 +196,14 @@ public AccordCommandStore(int id, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, + LocalListeners.Factory listenerFactory, EpochUpdateHolder epochUpdateHolder, IJournal journal, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor, AccordStateCacheMetrics cacheMetrics) { - super(id, time, agent, dataStore, progressLogFactory, epochUpdateHolder); + super(id, time, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder); this.journal = journal; loggingId = String.format("[%s]", id); executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); @@ -257,8 +260,8 @@ public AccordCommandStore(int id, static Factory factory(AccordJournal journal, AccordStateCacheMetrics cacheMetrics) { - return (id, time, agent, dataStore, progressLogFactory, rangesForEpoch) -> - new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, rangesForEpoch, journal, cacheMetrics); + return (id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch) -> + new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch, journal, cacheMetrics); } public CommandsForRangesLoader diskCommandsForRanges() diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 7328a31de6f9..60cbcf84ffb8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -22,6 +22,7 @@ import accord.api.Agent; import accord.api.ConfigurationService.EpochReady; import accord.api.DataStore; +import accord.api.LocalListeners; import accord.api.ProgressLog; import accord.local.CommandStores; import accord.local.Node; @@ -45,17 +46,19 @@ public class AccordCommandStores extends CommandStores implements CacheSize private long cacheSize; AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, - ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, AccordJournal journal) + ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, LocalListeners.Factory listenerFactory, + AccordJournal journal) { - super(time, agent, store, random, shardDistributor, progressLogFactory, AccordCommandStore.factory(journal, new AccordStateCacheMetrics(ACCORD_STATE_CACHE))); + super(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, + AccordCommandStore.factory(journal, new AccordStateCacheMetrics(ACCORD_STATE_CACHE))); setCapacity(DatabaseDescriptor.getAccordCacheSizeInMiB() << 20); this.cacheSizeMetrics = new CacheSizeMetrics(ACCORD_STATE_CACHE, this); } static Factory factory(AccordJournal journal) { - return (time, agent, store, random, shardDistributor, progressLogFactory) -> - new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, journal); + return (time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory) -> + new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, journal); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java index d87e6c96cd90..a0da2d99af71 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfiguration.java @@ -18,9 +18,7 @@ package org.apache.cassandra.service.accord; -import java.time.Duration; - -import accord.config.LocalConfig; +import accord.api.LocalConfig; import org.apache.cassandra.config.Config; // TODO (expected): should this be merged with AccordSpec? @@ -32,10 +30,4 @@ public AccordConfiguration(Config config) { this.config = config; } - - @Override - public Duration getProgressLogScheduleDelay() - { - return config.accord.progress_log_schedule_delay.toDuration(); - } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index eda7155a3880..932d31356a17 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -35,7 +35,6 @@ import accord.coordinate.Timeout; import accord.local.Command; import accord.local.Node; -import accord.messages.LocalRequest; import accord.messages.ReplyContext; import accord.messages.Request; import accord.primitives.TxnId; @@ -175,19 +174,6 @@ public void processRemoteRequest(Request request, ResponseContext context) delayedRequestProcessor.delay(requestContext); } - /** - * Accord protocol messages originating from local node, e.g. Propagate. - */ - @SuppressWarnings("rawtypes, unchecked") - public void processLocalRequest(LocalRequest request, BiConsumer callback) - { - LocalRequestContext requestContext = LocalRequestContext.create(request, callback); - if (node.topology().hasEpoch(request.waitForEpoch())) - request.process(node, requestContext.callback); - else - delayedRequestProcessor.delay(requestContext); - } - @Override public Command loadCommand(int commandStoreId, TxnId txnId) { @@ -281,29 +267,6 @@ static abstract class RequestContext implements ReplyContext public abstract void process(Node node, AccordEndpointMapper endpointMapper); } - private static class LocalRequestContext extends RequestContext - { - private final BiConsumer callback; - private final LocalRequest request; - - LocalRequestContext(long waitForEpoch, LocalRequest request, BiConsumer callback) - { - super(waitForEpoch); - this.callback = callback; - this.request = request; - } - - public void process(Node node, AccordEndpointMapper endpointMapper) - { - request.process(node, callback); - } - - static LocalRequestContext create(LocalRequest request, BiConsumer callback) - { - return new LocalRequestContext<>(request.waitForEpoch(), request, callback); - } - } - /** * Barebones response context not holding a reference to the entire message */ diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 08fb1ce4c9b1..84b096b71685 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -53,7 +53,6 @@ import accord.local.Command; import accord.local.CommandStore; import accord.local.DurableBefore; -import accord.local.Listeners; import accord.local.Node; import accord.local.RedundantBefore; import accord.local.SaveStatus; @@ -146,7 +145,6 @@ import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; -import org.apache.cassandra.service.accord.serializers.ListenerSerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock.Global; @@ -250,7 +248,6 @@ static TokenType valueOf(Token token) public static class LocalVersionedSerializers { static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); - static final LocalVersionedSerializer listeners = localSerializer(ListenerSerializers.listener); static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); static final LocalVersionedSerializer durableBefore = localSerializer(CommandStoreSerializers.durableBefore); @@ -284,6 +281,7 @@ public static class CommandsColumns public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); public static final ColumnMetadata[] TRUNCATE_FIELDS = new ColumnMetadata[] { durability, execute_at, route, status }; + public static final ColumnMetadata[] INVALIDATE_FIELDS = new ColumnMetadata[] { status }; static { @@ -368,6 +366,31 @@ private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSave return newLeaf; } + public static Row invalidated(SaveStatus newSaveStatus, Row row, long nowInSec) + { + long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); + long newTimestamp = oldTimestamp + 1; + + Object[] newLeaf = invalidatedLeaf(newTimestamp, newSaveStatus); + + // Including a deletion allows future compactions to drop data before it gets to the purger + // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns + // regardless + Row.Deletion deletion = new Row.Deletion(DeletionTime.build(oldTimestamp, nowInSec), false); + return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), deletion, newLeaf); + } + + private static Object[] invalidatedLeaf(long newTimestamp, SaveStatus newSaveStatus) + { + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(INVALIDATE_FIELDS.length); + int colIndex = 0; + // Status always needs to use the new timestamp since we are replacing the existing value + // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion + //noinspection UnusedAssignment + newLeaf[colIndex++] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); + return newLeaf; + } + public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean withOutcome) { checkArgument(durabilityCell.column() == CommandsColumns.durability); @@ -624,7 +647,7 @@ public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.E if (current == null) return null; - CommandsForKey updated = current.withRedundantBeforeAtLeast(redundantBefore); + CommandsForKey updated = current.withRedundantBeforeAtLeast(redundantBefore.shardRedundantBefore()); if (current == updated) return row; @@ -1138,18 +1161,6 @@ public static Route deserializeRouteOrNull(Cell cell) } } - private static Listeners.Immutable deserializeListeners(UntypedResultSet.Row row) throws IOException - { - Set serialized = row.getSet("listeners", BytesType.instance); - if (serialized == null || serialized.isEmpty()) - return Listeners.Immutable.EMPTY; - - Listeners result = new Listeners<>(); - for (ByteBuffer bytes : serialized) - result.add(deserialize(bytes, LocalVersionedSerializers.listeners)); - return new Listeners.Immutable(result); - } - public static PartitionKey deserializeKey(ByteBuffer buffer) { List split = KEY_TYPE.unpack(buffer, ByteBufferAccessor.instance); diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 5a514219e35c..bf6c5ae06f19 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -122,8 +122,8 @@ private VerbMapping() builder.put(MessageType.ACCEPT_REQ, Verb.ACCORD_ACCEPT_REQ); builder.put(MessageType.ACCEPT_RSP, Verb.ACCORD_ACCEPT_RSP); builder.put(MessageType.ACCEPT_INVALIDATE_REQ, Verb.ACCORD_ACCEPT_INVALIDATE_REQ); - builder.put(MessageType.GET_DEPS_REQ, Verb.ACCORD_GET_DEPS_REQ); - builder.put(MessageType.GET_DEPS_RSP, Verb.ACCORD_GET_DEPS_RSP); + builder.put(MessageType.CALCULATE_DEPS_REQ, Verb.ACCORD_CALCULATE_DEPS_REQ); + builder.put(MessageType.CALCULATE_DEPS_RSP, Verb.ACCORD_CALCULATE_DEPS_RSP); builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_REQ, Verb.ACCORD_GET_EPHMRL_READ_DEPS_REQ); builder.put(MessageType.GET_EPHEMERAL_READ_DEPS_RSP, Verb.ACCORD_GET_EPHMRL_READ_DEPS_RSP); builder.put(MessageType.GET_MAX_CONFLICT_REQ, Verb.ACCORD_GET_MAX_CONFLICT_REQ); @@ -144,13 +144,12 @@ private VerbMapping() builder.put(MessageType.BEGIN_RECOVER_RSP, Verb.ACCORD_BEGIN_RECOVER_RSP); builder.put(MessageType.BEGIN_INVALIDATE_REQ, Verb.ACCORD_BEGIN_INVALIDATE_REQ); builder.put(MessageType.BEGIN_INVALIDATE_RSP, Verb.ACCORD_BEGIN_INVALIDATE_RSP); - builder.put(MessageType.WAIT_ON_COMMIT_REQ, Verb.ACCORD_WAIT_ON_COMMIT_REQ); - builder.put(MessageType.WAIT_ON_COMMIT_RSP, Verb.ACCORD_WAIT_ON_COMMIT_RSP); + builder.put(MessageType.AWAIT_REQ, Verb.ACCORD_AWAIT_REQ); + builder.put(MessageType.AWAIT_RSP, Verb.ACCORD_AWAIT_RSP); + builder.put(MessageType.ASYNC_AWAIT_COMPLETE_REQ, Verb.ACCORD_AWAIT_ASYNC_RSP_REQ); builder.put(MessageType.WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_WAIT_UNTIL_APPLIED_REQ); builder.put(MessageType.APPLY_THEN_WAIT_UNTIL_APPLIED_REQ, Verb.ACCORD_APPLY_AND_WAIT_REQ); - builder.put(MessageType.INFORM_OF_TXN_REQ, Verb.ACCORD_INFORM_OF_TXN_REQ); builder.put(MessageType.INFORM_DURABLE_REQ, Verb.ACCORD_INFORM_DURABLE_REQ); - builder.put(MessageType.INFORM_HOME_DURABLE_REQ, Verb.ACCORD_INFORM_HOME_DURABLE_REQ); builder.put(MessageType.CHECK_STATUS_REQ, Verb.ACCORD_CHECK_STATUS_REQ); builder.put(MessageType.CHECK_STATUS_RSP, Verb.ACCORD_CHECK_STATUS_RSP); builder.put(MessageType.FETCH_DATA_REQ, Verb.ACCORD_FETCH_DATA_REQ); diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 4262749991bb..53fd58155182 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -69,7 +69,7 @@ import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.ObjectSizes; -import static accord.local.cfk.CommandsForKey.NO_TXNIDS; +import static accord.primitives.TxnId.NO_TXNIDS; import static org.apache.cassandra.utils.ObjectSizes.measure; public class AccordObjectSizes @@ -266,14 +266,6 @@ public static long results(Result result) return ((TxnResult) result).estimatedSizeOnHeap(); } - private static final long EMPTY_COMMAND_LISTENER = measure(new Command.ProxyListener(null)); - public static long listener(Command.DurableAndIdempotentListener listener) - { - if (listener instanceof Command.ProxyListener) - return EMPTY_COMMAND_LISTENER + timestamp(((Command.ProxyListener) listener).txnId()); - throw new IllegalArgumentException("Unhandled listener type: " + listener.getClass()); - } - private static class CommandEmptySizes { private final static TokenKey EMPTY_KEY = new TokenKey(EMPTY_ID, null); @@ -301,7 +293,7 @@ private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, null)); final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.empty(EMPTY_TXNID.domain()), EMPTY_WRITES, EMPTY_RESULT)); final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(attrs(false, false), SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); - final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID, null)); + final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID)); private static long emptySize(Command command) { @@ -343,8 +335,6 @@ public static long command(Command command) long size = CommandEmptySizes.emptySize(command); size += sizeNullable(command.route(), AccordObjectSizes::route); size += sizeNullable(command.promised(), AccordObjectSizes::timestamp); - for (Command.DurableAndIdempotentListener listener : command.durableListeners()) - size += listener(listener); size += sizeNullable(command.executeAt(), AccordObjectSizes::timestamp); size += sizeNullable(command.partialTxn(), AccordObjectSizes::txn); size += sizeNullable(command.partialDeps(), AccordObjectSizes::dependencies); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 28220cbf471d..43bc8d54b90b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -24,8 +24,6 @@ import com.google.common.annotations.VisibleForTesting; import accord.local.Command; -import accord.local.Command.TransientListener; -import accord.local.Listeners; import accord.local.SafeCommand; import accord.primitives.TxnId; import accord.utils.Invariants; @@ -152,27 +150,6 @@ public boolean invalidated() return invalidated; } - @Override - public void addListener(TransientListener listener) - { - checkNotInvalidated(); - global.addListener(listener); - } - - @Override - public boolean removeListener(TransientListener listener) - { - checkNotInvalidated(); - return global.removeListener(listener); - } - - @Override - public Listeners transientListeners() - { - checkNotInvalidated(); - return global.listeners(); - } - public static Function, AccordSafeCommand> safeRefFactory() { return Invariants.testParanoia(LINEAR, LINEAR, HIGH) ? DebugAccordSafeCommand::new : AccordSafeCommand::new; diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 6db8610d4735..43bceacd590d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -27,7 +27,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -37,7 +36,6 @@ import javax.annotation.concurrent.GuardedBy; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Throwables; import com.google.common.primitives.Ints; @@ -45,8 +43,8 @@ import org.slf4j.LoggerFactory; import accord.api.BarrierType; +import accord.api.LocalConfig; import accord.api.Result; -import accord.config.LocalConfig; import accord.coordinate.Barrier; import accord.coordinate.CoordinateSyncPoint; import accord.coordinate.CoordinationFailed; @@ -57,10 +55,13 @@ import accord.coordinate.TopologyMismatch; import accord.impl.AbstractConfigurationService; import accord.impl.CoordinateDurabilityScheduling; -import accord.impl.SimpleProgressLog; +import accord.impl.DefaultLocalListeners; +import accord.impl.DefaultRemoteListeners; +import accord.impl.DefaultRequestTimeouts; import accord.impl.SizeOfIntersectionSorter; import accord.local.Command; import accord.local.CommandStore; +import accord.impl.progresslog.DefaultProgressLogs; import accord.local.CommandStores; import accord.local.DurableBefore; import accord.local.KeyHistory; @@ -73,7 +74,6 @@ import accord.local.ShardDistributor.EvenSplit; import accord.local.Status; import accord.local.cfk.CommandsForKey; -import accord.messages.LocalRequest; import accord.messages.Request; import accord.primitives.Keys; import accord.primitives.Ranges; @@ -340,18 +340,11 @@ public static IAccordService instance() return i; } - public static long uniqueNow() + public static long now() { - // TODO (now, correctness): This is not unique it's just currentTimeMillis as microseconds return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); } - public static long unix(TimeUnit timeUnit) - { - Preconditions.checkArgument(timeUnit != TimeUnit.NANOSECONDS, "Nanoseconds since the epoch doesn't fit in a long"); - return timeUnit.convert(Clock.Global.currentTimeMillis(), TimeUnit.MILLISECONDS); - } - private AccordService(Id localId) { Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); @@ -366,9 +359,8 @@ private AccordService(Id localId) this.journal = new AccordJournal(configService, DatabaseDescriptor.getAccord().journal); this.node = new Node(localId, messageSink, - this::handleLocalRequest, configService, - AccordService::uniqueNow, + AccordService::now, NodeTimeService.elapsedWrapperFromMonotonicSource(NANOSECONDS, Clock.Global::nanoTime), () -> dataStore, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), @@ -377,7 +369,10 @@ private AccordService(Id localId) scheduler, CompositeTopologySorter.create(SizeOfIntersectionSorter.SUPPLIER, new AccordTopologySorter.Supplier(configService, DatabaseDescriptor.getNodeProximity())), - SimpleProgressLog::new, + DefaultRemoteListeners::new, + DefaultRequestTimeouts::new, + DefaultProgressLogs::new, + DefaultLocalListeners.Factory::new, AccordCommandStores.factory(journal), new AccordInteropFactory(agent, configService), configuration); @@ -756,13 +751,6 @@ else if (cause instanceof RuntimeException) } } - private void handleLocalRequest(LocalRequest request, BiConsumer callback, Node node) - { - // currently, we only create LocalRequests that have side effects and need to be persisted - Invariants.checkState(request.type().hasSideEffects()); - journal.processLocalRequest(request, callback); - } - private static RequestTimeoutException newTimeout(TxnId txnId, boolean isWrite, ConsistencyLevel consistencyLevel) { // Client protocol doesn't handle null consistency level so use ANY diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 5fa5c6a941c3..42d4a4d2438e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -216,18 +216,11 @@ private void evict(AccordCachingState node) if (node.status() == LOADED && VALIDATE_LOAD_ON_EVICT) instance.validateLoadEvicted(node); - if (!node.hasListeners()) - { - AccordCachingState self = instances.get(node.index).cache.remove(node.key()); - Invariants.checkState(self.references == 0); - checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); - if (instance.listeners != null) - instance.listeners.forEach(l -> l.onEvict((AccordCachingState) node)); - } - else - { - node.markEvicted(); // keep the node in the cache to prevent transient listeners from being GCd - } + AccordCachingState self = instances.get(node.index).cache.remove(node.key()); + Invariants.checkState(self.references == 0); + checkState(self == node, "Leaked node detected; was attempting to remove %s but cache had %s", node, self); + if (instance.listeners != null) + instance.listeners.forEach(l -> l.onEvict((AccordCachingState) node)); } public ImmutableStats stats() diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java index 2c9626718d80..6f1e479f91a9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -113,7 +113,7 @@ Notification closed(Ranges addClosed) if (closed.containsAll(addClosed)) return null; - addClosed = addClosed.subtract(closed); + addClosed = addClosed.without(closed); closed = closed.with(addClosed); return new Notification(epoch, Collections.emptySet(), addClosed, Ranges.EMPTY); } @@ -123,7 +123,7 @@ Notification redundant(Ranges addRedundant) if (redundant.containsAll(addRedundant)) return null; - addRedundant = addRedundant.subtract(redundant); + addRedundant = addRedundant.without(redundant); redundant = redundant.with(addRedundant); return new Notification(epoch, Collections.emptySet(), Ranges.EMPTY, addRedundant); } @@ -140,8 +140,8 @@ boolean ack(Notification notification) if (notification.syncComplete.containsAll(syncComplete)) syncComplete = ImmutableSet.of(); else syncComplete = ImmutableSet.copyOf(Iterables.filter(syncComplete, v -> !notification.syncComplete.contains(v))); } - closed = closed.subtract(notification.closed); - redundant = redundant.subtract(notification.redundant); + closed = closed.without(notification.closed); + redundant = redundant.without(notification.redundant); return syncComplete.isEmpty() && closed.isEmpty() && redundant.isEmpty(); } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index 0b4b357128be..c6eb33cc6363 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -233,7 +233,7 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable Durable accum.add(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end)); return accum; }, new ArrayList(), ignore -> false).toArray(Range[]::new)); - Ranges newRanges = ranges.subtract(durableAlready); + Ranges newRanges = ranges.without(durableAlready); if (newRanges.isEmpty()) return null; diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index bb1fe6da7383..090e4da75544 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -29,7 +29,6 @@ import accord.api.Result; import accord.local.Command; import accord.local.CommonAttributes; -import accord.local.Listeners; import accord.local.SaveStatus; import accord.local.Status; import accord.primitives.Ballot; @@ -69,7 +68,6 @@ public enum Fields ADDITIONAL_KEYS, WAITING_ON, WRITES, - LISTENERS } public interface Writer extends Journal.Writer @@ -179,13 +177,6 @@ public static void serialize(Command before, Command after, DataOutputPlus out, if (getFieldChanged(Fields.WRITES, flags) && after.writes() != null) CommandSerializers.writes.serialize(after.writes(), out, userVersion); - - if (getFieldChanged(Fields.LISTENERS, flags) && after.durableListeners() != null) - { - out.writeByte(after.durableListeners().size()); - for (Command.DurableAndIdempotentListener listener : after.durableListeners()) - AccordKeyspace.LocalVersionedSerializers.listeners.serialize(listener, out); - } } @VisibleForTesting @@ -210,7 +201,6 @@ static int getFlags(Command before, Command after) flags = collectFlags(before, after, SavedCommand::getWaitingOn, false, Fields.WAITING_ON, flags); flags = collectFlags(before, after, Command::writes, false, Fields.WRITES, flags); - flags = collectFlags(before, after, c -> c.durableListeners().isEmpty() ? null : c.durableListeners(), true, Fields.LISTENERS, flags); return flags; } @@ -289,7 +279,6 @@ public static class Builder SavedCommand.WaitingOnProvider waitingOn = (txn, deps) -> null; Writes writes = null; - Listeners.Immutable listeners = null; Result result = CommandSerializers.APPLIED; boolean nextCalled = false; @@ -427,22 +416,6 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio else writes = CommandSerializers.writes.deserialize(in, userVersion); } - - if (getFieldChanged(Fields.LISTENERS, flags)) - { - if (getFieldIsNull(Fields.LISTENERS, flags)) - { - listeners = null; - } - else - { - Listeners builder = Listeners.Immutable.EMPTY.mutable(); - int cnt = in.readByte(); - for (int i = 0; i < cnt; i++) - builder.add(AccordKeyspace.LocalVersionedSerializers.listeners.deserialize(in)); - listeners = new Listeners.Immutable(builder); - } - } } public void forceResult(Result newValue) @@ -469,8 +442,6 @@ public Command construct() throws IOException attrs.partialDeps(partialDeps); if (additionalKeysOrRanges != null) attrs.additionalKeysOrRanges(additionalKeysOrRanges); - if (listeners != null && !listeners.isEmpty()) - attrs.setListeners(listeners); Command.WaitingOn waitingOn = null; if (this.waitingOn != null) @@ -518,7 +489,7 @@ private static Command.Truncated truncated(CommonAttributes.Mutable attrs, SaveS case Erased: return Command.Truncated.erased(attrs.txnId(), attrs.durability(), attrs.route()); case Invalidated: - return Command.Truncated.invalidated(attrs.txnId(), attrs.durableListeners()); + return Command.Truncated.invalidated(attrs.txnId()); } } @@ -537,7 +508,6 @@ public String toString() ", additionalKeysOrRanges=" + additionalKeysOrRanges + ", waitingOn=" + waitingOn + ", writes=" + writes + - ", listeners=" + listeners + '}'; } } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 7caeead9b1fc..441277e7574d 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -21,29 +21,45 @@ import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Agent; import accord.api.EventsListener; +import accord.api.ProgressLog.BlockedUntil; import accord.api.Result; +import accord.api.RoutingKey; import accord.local.Command; import accord.local.Node; +import accord.local.SafeCommand; +import accord.local.SafeCommandStore; +import accord.messages.ReplyContext; import accord.primitives.Ranges; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; import accord.primitives.TxnId; +import accord.topology.Shard; +import accord.utils.DefaultRandom; +import accord.utils.Invariants; +import accord.utils.RandomSource; +import accord.utils.SortedList; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.AccordMetrics; +import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.JVMStabilityInspector; import static accord.primitives.Routable.Domain.Key; import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.maybeSaveAccordKeyMigrationLocally; @@ -55,6 +71,7 @@ public class AccordAgent implements Agent // TODO (required): this should be configurable and have exponential back-off, escaping to operator input past a certain number of retries private long retryBootstrapDelayMicros = SECONDS.toMicros(1L); + private final RandomSource random = new DefaultRandom(); public void setRetryBootstrapDelay(long delay, TimeUnit units) { @@ -147,4 +164,70 @@ public EventsListener metricsEventsListener() { return AccordMetrics.Listener.instance; } + + @Override + public long replyTimeout(ReplyContext replyContext, TimeUnit units) + { + return Math.max(1, units.convert(((ResponseContext)replyContext).expiresAtNanos() - Clock.Global.nanoTime(), NANOSECONDS)); + } + + @Override + public long attemptCoordinationDelay(Node node, SafeCommandStore safeStore, TxnId txnId, TimeUnit units, int retryCount) + { + SafeCommand safeCommand = safeStore.ifInitialised(txnId); + Invariants.nonNull(safeCommand); + + Command command = safeCommand.current(); + Invariants.nonNull(command); + + Timestamp mostRecentAttempt = Timestamp.max(command.txnId(), command.promised()); + RoutingKey homeKey = command.route().homeKey(); + Shard shard = node.topology().forEpochIfKnown(homeKey, command.txnId().epoch()); + + // TODO (expected): make this a configurable calculation on normal request latencies (like ContentionStrategy) + long oneSecond = SECONDS.toMicros(1L); + long startTime = mostRecentAttempt.hlc() + DatabaseDescriptor.getAccord().recover_delay.to(MICROSECONDS) + + (retryCount == 0 ? 0 : random.nextLong(oneSecond << Math.min(retryCount, 4))); + + startTime = nonClashingStartTime(startTime, shard == null ? null : shard.nodes, node.id(), oneSecond, random); + long nowMicros = MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + return units.convert(Math.max(1, startTime - nowMicros), MICROSECONDS); + } + + @VisibleForTesting + public static long nonClashingStartTime(long startTime, SortedList nodes, Node.Id id, long granularity, RandomSource random) + { + long perSecondStartTime; + if (nodes != null) + { + int position = nodes.indexOf(id); + perSecondStartTime = position * (SECONDS.toMicros(1) / nodes.size()); + } + else + { + // we've raced with topology update, this should be rare so just pick a random start time + perSecondStartTime = random.nextLong(granularity); + } + + // TODO (expected): make this a configurable calculation on normal request latencies (like ContentionStrategy) + long subSecondRemainder = startTime % granularity; + long newStartTime = startTime - subSecondRemainder + perSecondStartTime; + if (newStartTime < startTime) + newStartTime += granularity; + return newStartTime; + } + + @Override + public long seekProgressDelay(Node node, SafeCommandStore safeStore, TxnId txnId, int retryCount, BlockedUntil blockedUntil, TimeUnit units) + { + // TODO (required): make this configurable and dependent upon normal request latencies, and perhaps offset from txnId.hlc() + return units.convert((1L << Math.min(retryCount, 4)), SECONDS); + } + + @Override + public long retryAwaitTimeout(Node node, SafeCommandStore safeStore, TxnId txnId, int retryCount, BlockedUntil retrying, TimeUnit units) + { + // TODO (expected): integrate with contention backoff + return units.convert((1L << Math.min(retryCount, 4)), SECONDS); + } } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java index df83a92932f0..a616cf05d49c 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java @@ -46,6 +46,12 @@ public void cancel() { future.cancel(false); } + + @Override + public boolean isDone() + { + return future.isDone(); + } } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java index 49c06e811c72..8e3758a381ea 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -21,10 +21,10 @@ import java.util.BitSet; import javax.annotation.Nullable; +import accord.api.LocalListeners; import accord.api.Result; import accord.local.Command; import accord.local.Node.Id; -import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.Status; @@ -34,8 +34,8 @@ import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -47,9 +47,9 @@ import org.apache.cassandra.service.accord.AccordMessageSink.AccordMessageType; import org.apache.cassandra.service.accord.serializers.ApplySerializers.ApplySerializer; import org.apache.cassandra.service.accord.txn.AccordUpdate; +import org.jctools.queues.MpscChunkedArrayQueue; import static accord.utils.Invariants.checkState; -import static accord.utils.MapReduceConsume.forEach; import static com.google.common.base.Preconditions.checkArgument; /** @@ -58,7 +58,7 @@ * // and these all are a bit copy pasta in terms of managing things like waiting on, obsoletion, cancellation/listeners, insufficient etc. and it would be less fragile * // in the long run to not duplicate these kind of difficult to get right mechanism and have a single pluggable framework to request each specific behavior */ -public class AccordInteropApply extends Apply implements Command.TransientListener +public class AccordInteropApply extends Apply implements LocalListeners.ComplexListener { public static final Apply.Factory FACTORY = new Apply.Factory() { @@ -77,7 +77,7 @@ public Apply create(Kind kind, Id to, Topologies participates, TxnId txnId, Full public static final IVersionedSerializer serializer = new ApplySerializer() { @Override - protected AccordInteropApply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { return new AccordInteropApply(kind, txnId, scope, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); } @@ -85,8 +85,9 @@ protected AccordInteropApply deserializeApply(TxnId txnId, PartialRoute scope transient BitSet waitingOn; transient int waitingOnCount; + final MpscChunkedArrayQueue listeners = new MpscChunkedArrayQueue<>(4, 1 << 30); - private AccordInteropApply(Kind kind, TxnId txnId, PartialRoute route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + private AccordInteropApply(Kind kind, TxnId txnId, Route route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { super(kind, txnId, route, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); } @@ -137,7 +138,7 @@ public ApplyReply apply(SafeCommandStore safeStore) waitingOn.set(safeStore.commandStore().id()); ++waitingOnCount; } - safeCommand.addListener(this); + listeners.add(safeStore.register(txnId, this)); break; case Applied: @@ -190,11 +191,7 @@ else if (failure != null) private void cancel() { - node.commandStores().mapReduceConsume(this, waitingOn.stream(), forEach(safeStore -> { - SafeCommand safeCommand = safeStore.ifInitialised(txnId); - if (safeCommand != null) - safeCommand.removeListener(this); - }, node.agent())); + listeners.drain(LocalListeners.Registered::cancel); } @Override @@ -234,7 +231,7 @@ public String toString() } @Override - public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) + public boolean notify(SafeCommandStore safeStore, SafeCommand safeCommand) { Command command = safeCommand.current(); @@ -248,20 +245,14 @@ public void onChange(SafeCommandStore safeStore, SafeCommand safeCommand) case PreCommitted: case Committed: case PreApplied: - return; + return true; case Applied: case Invalidated: case Truncated: } - if (safeCommand.removeListener(this)) - ack(); - } - - @Override - public PreLoadContext listenerPreLoadContext(TxnId caller) - { - return PreLoadContext.contextFor(txnId); + ack(); + return false; } } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java index cd13b99f93fc..d1a26d22b4d0 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java @@ -29,8 +29,8 @@ import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -46,13 +46,13 @@ public class AccordInteropCommit extends Commit public static final IVersionedSerializer serializer = new CommitSerializer(AccordInteropRead.class, AccordInteropRead.requestSerializer) { @Override - protected AccordInteropCommit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected AccordInteropCommit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); } }; - public AccordInteropCommit(Kind kind, TxnId txnId, PartialRoute scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) + public AccordInteropCommit(Kind kind, TxnId txnId, Route scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) { super(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, readData); } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 5cff4f818466..7d0a4dadd003 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -34,6 +34,7 @@ import accord.api.Agent; import accord.api.Data; import accord.api.Result; +import accord.coordinate.CoordinationAdapter; import accord.local.AgentExecutor; import accord.local.CommandStore; import accord.local.Node; @@ -93,7 +94,6 @@ import org.apache.cassandra.transport.Dispatcher; import static accord.coordinate.CoordinationAdapter.Factory.Step.Continue; -import static accord.coordinate.CoordinationAdapter.Invoke.persist; import static accord.utils.Invariants.checkArgument; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; @@ -345,7 +345,7 @@ public void start() CommandStore cs = node.commandStores().select(route.homeKey()); result.beginAsResult().withExecutor(cs).begin((data, failure) -> { if (failure == null) - persist(node.coordinationAdapter(txnId, Continue), node, executes, route, txnId, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); + ((CoordinationAdapter)node.coordinationAdapter(txnId, Continue)).persist(node, executes, route, txnId, txn, executeAt, deps, txn.execute(txnId, executeAt, data), txn.result(txnId, executeAt, data), callback); else callback.accept(null, failure); }); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java index 857d082a2cf7..a0fd42229fd7 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java @@ -22,7 +22,7 @@ import accord.api.Result; import accord.coordinate.Persist; -import accord.coordinate.tracking.AppliedTracker; +import accord.coordinate.tracking.AllTracker; import accord.coordinate.tracking.QuorumTracker; import accord.coordinate.tracking.RequestStatus; import accord.coordinate.tracking.ResponseTracker; @@ -110,7 +110,7 @@ public void registerClientCallback(Result result, BiConsumer noop_data_serializer = new IVersionedSerializer() + private static final IVersionedSerializer noop_data_serializer = new IVersionedSerializer<>() { @Override public void serialize(Data t, DataOutputPlus out, int version) throws IOException {} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java index 8377991354dc..99c7f820ccf1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -22,7 +22,7 @@ import accord.messages.Accept; import accord.messages.Accept.AcceptReply; -import accord.primitives.PartialRoute; +import accord.primitives.Route; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -47,9 +47,9 @@ public void serializeBody(Accept accept, DataOutputPlus out, int version) throws } @Override - public Accept deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + public Accept deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { - return create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, + return create(txnId, scope, waitForEpoch, minEpoch, CommandSerializers.ballot.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), KeySerializers.seekables.deserialize(in, version), diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 102ffb57b923..8370d59b80dc 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -24,8 +24,8 @@ import accord.messages.Apply; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -58,7 +58,6 @@ public long serializedSize(Apply.Kind t, int version) } }; -// public static final IVersionedSerializer request = new TxnRequestSerializer() public abstract static class ApplySerializer extends TxnRequestSerializer { @Override @@ -73,11 +72,11 @@ public void serializeBody(A apply, DataOutputPlus out, int version) throws IOExc CommandSerializers.writes.serialize(apply.writes, out, version); } - protected abstract A deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + protected abstract A deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result); @Override - public A deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public A deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { return deserializeApply(txnId, scope, waitForEpoch, kind.deserialize(in, version), @@ -106,7 +105,7 @@ public long serializedBodySize(A apply, int version) public static final IVersionedSerializer request = new ApplySerializer() { @Override - protected Apply deserializeApply(TxnId txnId, PartialRoute scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + protected Apply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result) { return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, keys, executeAt, deps, txn, fullRoute, writes, result); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java new file mode 100644 index 000000000000..f18b40f47327 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; + +import accord.api.ProgressLog.BlockedUntil; +import accord.local.SaveStatus; +import accord.messages.Await; +import accord.messages.Await.AsyncAwaitComplete; +import accord.messages.Await.AwaitOk; +import accord.primitives.Participants; +import accord.primitives.Route; +import accord.primitives.TxnId; +import accord.utils.Invariants; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.vint.VIntCoding; + +public class AwaitSerializer +{ + public static final IVersionedSerializer request = new IVersionedSerializer<>() + { + @Override + public void serialize(Await await, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(await.txnId, out, version); + KeySerializers.participants.serialize(await.scope, out, version); + out.writeByte(await.blockedUntil.ordinal()); + out.writeUnsignedVInt32(await.callbackId + 1); + Invariants.checkState(await.callbackId >= -1); + } + + @Override + public Await deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Participants scope = KeySerializers.participants.deserialize(in, version); + BlockedUntil blockedUntil = BlockedUntil.forOrdinal(in.readByte()); + int callbackId = in.readUnsignedVInt32() - 1; + Invariants.checkState(callbackId >= -1); + return Await.SerializerSupport.create(txnId, scope, blockedUntil, callbackId); + } + + @Override + public long serializedSize(Await await, int version) + { + return CommandSerializers.txnId.serializedSize(await.txnId, version) + + KeySerializers.participants.serializedSize(await.scope, version) + + TypeSizes.BYTE_SIZE + + VIntCoding.computeUnsignedVIntSize(await.callbackId + 1); + } + }; + + public static final IVersionedSerializer syncReply = new EnumSerializer<>(AwaitOk.class); + + public static final IVersionedSerializer asyncReply = new IVersionedSerializer<>() + { + @Override + public void serialize(AsyncAwaitComplete ok, DataOutputPlus out, int version) throws IOException + { + CommandSerializers.txnId.serialize(ok.txnId, out, version); + KeySerializers.route.serialize(ok.route, out, version); + out.writeByte(ok.newStatus.ordinal()); + out.writeUnsignedVInt32(ok.callbackId); + } + + @Override + public AsyncAwaitComplete deserialize(DataInputPlus in, int version) throws IOException + { + TxnId txnId = CommandSerializers.txnId.deserialize(in, version); + Route scope = KeySerializers.route.deserialize(in, version); + SaveStatus newStatus = SaveStatus.forOrdinal(in.readByte()); + int callbackId = in.readUnsignedVInt32(); + return new AsyncAwaitComplete(txnId, scope, newStatus, callbackId); + } + + @Override + public long serializedSize(AsyncAwaitComplete ok, int version) + { + return CommandSerializers.txnId.serializedSize(ok.txnId, version) + + KeySerializers.route.serializedSize(ok.route, version) + + TypeSizes.BYTE_SIZE + + VIntCoding.computeVIntSize(ok.callbackId); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java index 54c130ac2baa..94390bd905e8 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java @@ -21,7 +21,7 @@ import java.io.IOException; import accord.api.RoutingKey; -import accord.local.Status; +import accord.local.SaveStatus; import accord.messages.BeginInvalidation; import accord.messages.BeginInvalidation.InvalidateReply; import accord.primitives.Ballot; @@ -67,7 +67,8 @@ public void serialize(InvalidateReply reply, DataOutputPlus out, int version) th { CommandSerializers.nullableBallot.serialize(reply.supersededBy, out, version); CommandSerializers.ballot.serialize(reply.accepted, out, version); - CommandSerializers.status.serialize(reply.status, out, version); + CommandSerializers.saveStatus.serialize(reply.maxStatus, out, version); + CommandSerializers.saveStatus.serialize(reply.maxKnowledgeStatus, out, version); out.writeBoolean(reply.acceptedFastPath); KeySerializers.nullableRoute.serialize(reply.route, out, version); KeySerializers.nullableRoutingKey.serialize(reply.homeKey, out, version); @@ -78,11 +79,12 @@ public InvalidateReply deserialize(DataInputPlus in, int version) throws IOExcep { Ballot supersededBy = CommandSerializers.nullableBallot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); - Status status = CommandSerializers.status.deserialize(in, version); + SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); + SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in, version); boolean acceptedFastPath = in.readBoolean(); Route route = KeySerializers.nullableRoute.deserialize(in, version); RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); - return new InvalidateReply(supersededBy, accepted, status, acceptedFastPath, route, homeKey); + return new InvalidateReply(supersededBy, accepted, maxStatus, maxKnowledgeStatus, acceptedFastPath, route, homeKey); } @Override @@ -90,7 +92,8 @@ public long serializedSize(InvalidateReply reply, int version) { return CommandSerializers.nullableBallot.serializedSize(reply.supersededBy, version) + CommandSerializers.ballot.serializedSize(reply.accepted, version) - + CommandSerializers.status.serializedSize(reply.status, version) + + CommandSerializers.saveStatus.serializedSize(reply.maxStatus, version) + + CommandSerializers.saveStatus.serializedSize(reply.maxKnowledgeStatus, version) + TypeSizes.BOOL_SIZE + KeySerializers.nullableRoute.serializedSize(reply.route, version) + KeySerializers.nullableRoutingKey.serializedSize(reply.homeKey, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java similarity index 62% rename from src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java rename to src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java index 37f9302ae36c..e10425a32135 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetDepsSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java @@ -20,9 +20,9 @@ import java.io.IOException; -import accord.messages.GetDeps; -import accord.messages.GetDeps.GetDepsOk; -import accord.primitives.PartialRoute; +import accord.messages.CalculateDeps; +import accord.messages.CalculateDeps.CalculateDepsOk; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -30,49 +30,49 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -public class GetDepsSerializers +public class CalculateDepsSerializers { - public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() { @Override - public void serializeBody(GetDeps msg, DataOutputPlus out, int version) throws IOException + public void serializeBody(CalculateDeps msg, DataOutputPlus out, int version) throws IOException { KeySerializers.seekables.serialize(msg.keys, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); } @Override - public GetDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + public CalculateDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { Seekables keys = KeySerializers.seekables.deserialize(in, version); Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); - return GetDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, keys, executeAt); + return CalculateDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, keys, executeAt); } @Override - public long serializedBodySize(GetDeps msg, int version) + public long serializedBodySize(CalculateDeps msg, int version) { return KeySerializers.seekables.serializedSize(msg.keys, version) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version); } }; - public static final IVersionedSerializer reply = new IVersionedSerializer() + public static final IVersionedSerializer reply = new IVersionedSerializer() { @Override - public void serialize(GetDepsOk reply, DataOutputPlus out, int version) throws IOException + public void serialize(CalculateDepsOk reply, DataOutputPlus out, int version) throws IOException { DepsSerializer.partialDeps.serialize(reply.deps, out, version); } @Override - public GetDepsOk deserialize(DataInputPlus in, int version) throws IOException + public CalculateDepsOk deserialize(DataInputPlus in, int version) throws IOException { - return new GetDepsOk(DepsSerializer.partialDeps.deserialize(in, version)); + return new CalculateDepsOk(DepsSerializer.partialDeps.deserialize(in, version)); } @Override - public long serializedSize(GetDepsOk reply, int version) + public long serializedSize(CalculateDepsOk reply, int version) { return DepsSerializer.partialDeps.serializedSize(reply.deps, version); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index 369fe57d8e60..4dc9ed1805e9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -41,7 +41,7 @@ import org.apache.cassandra.utils.vint.VIntCoding; import static accord.local.cfk.CommandsForKey.NO_PENDING_UNMANAGED; -import static accord.local.cfk.CommandsForKey.NO_TXNIDS; +import static accord.primitives.TxnId.NO_TXNIDS; import static accord.primitives.Txn.Kind.Read; import static accord.primitives.Txn.Kind.SyncPoint; import static accord.primitives.Txn.Kind.Write; @@ -117,19 +117,21 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) if (commandCount == 0) return ByteBuffer.allocate(1); - int[] nodeIds = cachedInts().getInts(Math.min(64, commandCount)); + int[] nodeIds = cachedInts().getInts(Math.min(64, Math.max(4, commandCount))); try { // first compute the unique Node Ids and some basic characteristics of the data, such as // whether we have any missing transactions to encode, any executeAt that are not equal to their TxnId // and whether there are any non-standard flag bits to encode boolean hasNonStandardFlags = false; - int nodeIdCount = 0, missingIdCount = 0, executeAtCount = 0, ballotCount = 0; + int nodeIdCount, missingIdCount = 0, executeAtCount = 0, ballotCount = 0; int bitsPerExecuteAtEpoch = 0, bitsPerExecuteAtFlags = 0, bitsPerExecuteAtHlc = 1; // to permit us to use full 64 bits and encode in 5 bits we force at least one hlc bit { + nodeIds[0] = cfk.redundantBefore().node.id; + nodeIdCount = 1; for (int i = 0 ; i < commandCount ; ++i) { - if (nodeIdCount + 2 >= nodeIds.length) + if (nodeIdCount + 3 >= nodeIds.length) { nodeIdCount = compact(nodeIds); if (nodeIdCount > nodeIds.length/2 || nodeIdCount + 2 >= nodeIds.length) @@ -176,10 +178,10 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int maxHeaderBits = minHeaderBits; int totalBytes = 0; - long prevEpoch = cfk.get(0).epoch(); - long prevHlc = cfk.get(0).hlc(); int prunedBeforeIndex = cfk.prunedBefore().equals(TxnId.NONE) ? -1 : cfk.indexOf(cfk.prunedBefore()); + long prevEpoch = cfk.redundantBefore().epoch(); + long prevHlc = cfk.redundantBefore().hlc(); int[] bytesHistogram = cachedInts().getInts(12); Arrays.fill(bytesHistogram, 0); for (int i = 0 ; i < commandCount ; ++i) @@ -277,10 +279,12 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) cachedInts().forceDiscard(bytesHistogram); - prevEpoch = cfk.get(0).epoch(); - prevHlc = cfk.get(0).hlc(); + prevEpoch = cfk.redundantBefore().epoch(); + prevHlc = cfk.redundantBefore().hlc(); totalBytes += TypeSizes.sizeofUnsignedVInt(prevEpoch); totalBytes += TypeSizes.sizeofUnsignedVInt(prevHlc); + totalBytes += TypeSizes.sizeofUnsignedVInt(cfk.redundantBefore().flags()); + totalBytes += TypeSizes.sizeofUnsignedVInt(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id)); totalBytes += TypeSizes.sizeofUnsignedVInt(prunedBeforeIndex + 1); int bitsPerBallotEpoch = 0, bitsPerBallotHlc = 1, bitsPerBallotFlags = 0; @@ -348,8 +352,11 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) VIntCoding.writeUnsignedVInt32(nodeIds[i] - nodeIds[i-1], out); out.putShort((short)flags); + VIntCoding.writeUnsignedVInt(prevEpoch, out); VIntCoding.writeUnsignedVInt(prevHlc, out); + VIntCoding.writeUnsignedVInt32(cfk.redundantBefore().flags(), out); + VIntCoding.writeUnsignedVInt32(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id), out); VIntCoding.writeUnsignedVInt32(prunedBeforeIndex + 1, out); int executeAtMask = executeAtCount > 0 ? 1 : 0; @@ -372,7 +379,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) bits |= hasExecuteAt << bitIndex; bitIndex += statusHasInfo & executeAtMask; - long hasMissingIds = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).missing != CommandsForKey.NO_TXNIDS ? 1 : 0; + long hasMissingIds = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).missing != NO_TXNIDS ? 1 : 0; bits |= hasMissingIds << bitIndex; bitIndex += statusHasInfo & missingDepsMask; @@ -639,6 +646,12 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) long prevEpoch = VIntCoding.readUnsignedVInt(in); long prevHlc = VIntCoding.readUnsignedVInt(in); + TxnId redundantBefore; + { + int flags = VIntCoding.readUnsignedVInt32(in); + Node.Id node = nodeIds[VIntCoding.readUnsignedVInt32(in)]; + redundantBefore = TxnId.fromValues(prevEpoch, prevHlc, flags, node); + } int prunedBeforeIndex = VIntCoding.readUnsignedVInt32(in) - 1; for (int i = 0 ; i < commandCount ; ++i) @@ -879,7 +892,7 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) } cachedTxnIds().forceDiscard(txnIds, commandCount); - return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex]); + return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, redundantBefore, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex]); } private static int getHlcBytes(int lookup, int index) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index 629da34f7716..9e439233ae3e 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -26,8 +26,8 @@ import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -68,13 +68,13 @@ public void serializeBody(C msg, DataOutputPlus out, int version) throws IOExcep serializeNullable(msg.readData, out, version, read); } - protected abstract C deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, + protected abstract C deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read); @Override - public C deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public C deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { Commit.Kind kind = CommitSerializers.kind.deserialize(in, version); Ballot ballot = CommandSerializers.ballot.deserialize(in, version); @@ -104,7 +104,7 @@ public long serializedBodySize(C msg, int version) public static final IVersionedSerializer request = new CommitSerializer(ReadData.class, ReadDataSerializers.readData) { @Override - protected Commit deserializeCommit(TxnId txnId, PartialRoute scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected Commit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 4512776154fb..9389bcc51b5c 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -21,25 +21,11 @@ import java.io.IOException; import accord.api.Data; -import accord.api.Result; -import accord.api.RoutingKey; import accord.impl.AbstractFetchCoordinator.FetchRequest; import accord.impl.AbstractFetchCoordinator.FetchResponse; -import accord.local.SaveStatus; -import accord.local.Status.Durability; -import accord.local.Status.Known; -import accord.messages.CheckStatus; -import accord.messages.Propagate; import accord.messages.ReadData.CommitOrReadNack; import accord.messages.ReadData.ReadReply; -import accord.primitives.Ballot; -import accord.primitives.PartialDeps; -import accord.primitives.PartialTxn; import accord.primitives.Ranges; -import accord.primitives.Route; -import accord.primitives.Timestamp; -import accord.primitives.TxnId; -import accord.primitives.Writes; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -134,92 +120,4 @@ public long serializedSize(ReadReply reply, int version) } }; - public static final IVersionedSerializer propagate = new IVersionedSerializer() - { - @Override - public void serialize(Propagate p, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(p.txnId, out, version); - KeySerializers.route.serialize(p.route, out, version); - CommandSerializers.saveStatus.serialize(p.maxKnowledgeSaveStatus, out, version); - CommandSerializers.saveStatus.serialize(p.maxSaveStatus, out, version); - CommandSerializers.ballot.serialize(p.ballot, out, version); - CommandSerializers.durability.serialize(p.durability, out, version); - KeySerializers.nullableRoutingKey.serialize(p.homeKey, out, version); - KeySerializers.nullableRoutingKey.serialize(p.progressKey, out, version); - CommandSerializers.known.serialize(p.achieved, out, version); - CheckStatusSerializers.foundKnownMap.serialize(p.known, out, version); - out.writeBoolean(p.isShardTruncated); - CommandSerializers.nullablePartialTxn.serialize(p.partialTxn, out, version); - DepsSerializer.nullablePartialDeps.serialize(p.stableDeps, out, version); - out.writeLong(p.toEpoch); - CommandSerializers.nullableTimestamp.serialize(p.committedExecuteAt, out, version); - CommandSerializers.nullableWrites.serialize(p.writes, out, version); - } - - @Override - public Propagate deserialize(DataInputPlus in, int version) throws IOException - { - TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Route route = KeySerializers.route.deserialize(in, version); - SaveStatus maxKnowledgeSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); - SaveStatus maxSaveStatus = CommandSerializers.saveStatus.deserialize(in, version); - Ballot ballot = CommandSerializers.ballot.deserialize(in, version); - Durability durability = CommandSerializers.durability.deserialize(in, version); - RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); - RoutingKey progressKey = KeySerializers.nullableRoutingKey.deserialize(in, version); - Known achieved = CommandSerializers.known.deserialize(in, version); - CheckStatus.FoundKnownMap known = CheckStatusSerializers.foundKnownMap.deserialize(in, version); - boolean isTruncated = in.readBoolean(); - PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); - PartialDeps committedDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); - long toEpoch = in.readLong(); - Timestamp committedExecuteAt = CommandSerializers.nullableTimestamp.deserialize(in, version); - Writes writes = CommandSerializers.nullableWrites.deserialize(in, version); - - Result result = null; - if (achieved.outcome.isOrWasApply()) - result = CommandSerializers.APPLIED; - - return Propagate.SerializerSupport.create(txnId, - route, - maxKnowledgeSaveStatus, - maxSaveStatus, - ballot, - durability, - homeKey, - progressKey, - achieved, - known, - isTruncated, - partialTxn, - committedDeps, - toEpoch, - committedExecuteAt, - writes, - result); - } - - @Override - public long serializedSize(Propagate p, int version) - { - return CommandSerializers.txnId.serializedSize(p.txnId, version) - + KeySerializers.route.serializedSize(p.route, version) - + CommandSerializers.saveStatus.serializedSize(p.maxKnowledgeSaveStatus, version) - + CommandSerializers.saveStatus.serializedSize(p.maxSaveStatus, version) - + CommandSerializers.ballot.serializedSize(p.ballot, version) - + CommandSerializers.durability.serializedSize(p.durability, version) - + KeySerializers.nullableRoutingKey.serializedSize(p.homeKey, version) - + KeySerializers.nullableRoutingKey.serializedSize(p.progressKey, version) - + CommandSerializers.known.serializedSize(p.achieved, version) - + CheckStatusSerializers.foundKnownMap.serializedSize(p.known, version) - + TypeSizes.BOOL_SIZE - + CommandSerializers.nullablePartialTxn.serializedSize(p.partialTxn, version) - + DepsSerializer.nullablePartialDeps.serializedSize(p.stableDeps, version) - + TypeSizes.sizeof(p.toEpoch) - + CommandSerializers.nullableTimestamp.serializedSize(p.committedExecuteAt, version) - + CommandSerializers.nullableWrites.serializedSize(p.writes, version) - ; - } - }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java index 44d202d716b5..7fe67842de5f 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java @@ -23,7 +23,7 @@ import accord.messages.GetEphemeralReadDeps; import accord.messages.GetEphemeralReadDeps.GetEphemeralReadDepsOk; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; @@ -43,7 +43,7 @@ public void serializeBody(GetEphemeralReadDeps msg, DataOutputPlus out, int vers } @Override - public GetEphemeralReadDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + public GetEphemeralReadDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { Seekables keys = KeySerializers.seekables.deserialize(in, version); long executionEpoch = in.readUnsignedVInt(); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java index d50e6993b9c5..ad8af3ba88d5 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java @@ -22,7 +22,7 @@ import accord.messages.GetMaxConflict; import accord.messages.GetMaxConflict.GetMaxConflictOk; -import accord.primitives.PartialRoute; +import accord.primitives.Route; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -43,7 +43,7 @@ public void serializeBody(GetMaxConflict msg, DataOutputPlus out, int version) t } @Override - public GetMaxConflict deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + public GetMaxConflict deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { Seekables keys = KeySerializers.seekables.deserialize(in, version); long executionEpoch = in.readUnsignedVInt(); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java index 66c649eb9216..d23e6c99b92e 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java @@ -22,7 +22,7 @@ import accord.local.Status; import accord.messages.InformDurable; -import accord.primitives.PartialRoute; +import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; @@ -36,14 +36,14 @@ public class InformDurableSerializers @Override public void serializeBody(InformDurable msg, DataOutputPlus out, int version) throws IOException { - CommandSerializers.timestamp.serialize(msg.executeAt, out, version); + CommandSerializers.nullableTimestamp.serialize(msg.executeAt, out, version); CommandSerializers.durability.serialize(msg.durability, out, version); } @Override - public InformDurable deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public InformDurable deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { - Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); + Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in, version); Status.Durability durability = CommandSerializers.durability.deserialize(in, version); return InformDurable.SerializationSupport.create(txnId, scope, waitForEpoch, executeAt, durability); } @@ -51,7 +51,7 @@ public InformDurable deserializeBody(DataInputPlus in, int version, TxnId txnId, @Override public long serializedBodySize(InformDurable msg, int version) { - return CommandSerializers.timestamp.serializedSize(msg.executeAt, version) + return CommandSerializers.nullableTimestamp.serializedSize(msg.executeAt, version) + CommandSerializers.durability.serializedSize(msg.durability, version); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java deleted file mode 100644 index c4f215557715..000000000000 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformHomeDurableSerializers.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.serializers; - -import java.io.IOException; - -import accord.messages.InformHomeDurable; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; - -public class InformHomeDurableSerializers -{ - public static final IVersionedSerializer request = new IVersionedSerializer<>() - { - @Override - public void serialize(InformHomeDurable inform, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(inform.txnId, out, version); - KeySerializers.route.serialize(inform.route, out, version); - CommandSerializers.timestamp.serialize(inform.executeAt, out, version); - CommandSerializers.durability.serialize(inform.durability, out, version); - } - - @Override - public InformHomeDurable deserialize(DataInputPlus in, int version) throws IOException - { - return new InformHomeDurable(CommandSerializers.txnId.deserialize(in, version), - KeySerializers.route.deserialize(in, version), - CommandSerializers.timestamp.deserialize(in, version), - CommandSerializers.durability.deserialize(in, version)); - } - - @Override - public long serializedSize(InformHomeDurable inform, int version) - { - return CommandSerializers.txnId.serializedSize(inform.txnId, version) - + KeySerializers.route.serializedSize(inform.route, version) - + CommandSerializers.timestamp.serializedSize(inform.executeAt, version) - + CommandSerializers.durability.serializedSize(inform.durability, version); - } - - }; -} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java deleted file mode 100644 index c6f2098a16b1..000000000000 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformOfTxnIdSerializers.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.serializers; - -import java.io.IOException; - -import accord.messages.InformOfTxnId; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; - -public class InformOfTxnIdSerializers -{ - public static final IVersionedSerializer request = new IVersionedSerializer() - { - @Override - public void serialize(InformOfTxnId inform, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(inform.txnId, out, version); - KeySerializers.route.serialize(inform.someRoute, out, version); - } - - @Override - public InformOfTxnId deserialize(DataInputPlus in, int version) throws IOException - { - return new InformOfTxnId(CommandSerializers.txnId.deserialize(in, version), - KeySerializers.route.deserialize(in, version)); - } - - @Override - public long serializedSize(InformOfTxnId inform, int version) - { - return CommandSerializers.txnId.serializedSize(inform.txnId, version) - + KeySerializers.route.serializedSize(inform.someRoute, version); - } - }; -} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java deleted file mode 100644 index 150c5af3940b..000000000000 --- a/src/java/org/apache/cassandra/service/accord/serializers/ListenerSerializers.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.serializers; - -import java.io.IOException; - -import accord.local.Command; -import org.apache.cassandra.db.TypeSizes; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; - -public class ListenerSerializers -{ - public enum Kind - { - COMMAND; - - private static Kind of(Command.DurableAndIdempotentListener listener) - { - if (listener instanceof Command.ProxyListener) - return COMMAND; - - throw new IllegalArgumentException("Unsupported listener type: " + listener.getClass().getName()); - } - } - - - private static final IVersionedSerializer commandListener = new IVersionedSerializer() - { - @Override - public void serialize(Command.ProxyListener listener, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(listener.txnId(), out, version); - } - - @Override - public Command.ProxyListener deserialize(DataInputPlus in, int version) throws IOException - { - return new Command.ProxyListener(CommandSerializers.txnId.deserialize(in, version)); - } - - @Override - public long serializedSize(Command.ProxyListener listener, int version) - { - return CommandSerializers.txnId.serializedSize(listener.txnId(), version); - } - }; - - public static final IVersionedSerializer listener = new IVersionedSerializer() - { - @Override - public void serialize(Command.DurableAndIdempotentListener listener, DataOutputPlus out, int version) throws IOException - { - Kind kind = Kind.of(listener); - out.write(kind.ordinal()); - switch (kind) - { - case COMMAND: - commandListener.serialize((Command.ProxyListener) listener, out, version); - break; - default: - throw new IllegalArgumentException(); - } - } - - @Override - public Command.DurableAndIdempotentListener deserialize(DataInputPlus in, int version) throws IOException - { - Kind kind = Kind.values()[in.readByte()]; - switch (kind) - { - case COMMAND: - return commandListener.deserialize(in, version); - default: - throw new IllegalArgumentException(); - } - } - - @Override - public long serializedSize(Command.DurableAndIdempotentListener listener, int version) - { - Kind kind = Kind.of(listener); - long size = TypeSizes.BYTE_SIZE; - switch (kind) - { - case COMMAND: - size += commandListener.serializedSize((Command.ProxyListener) listener, version); - break; - default: - throw new IllegalArgumentException(); - } - - return size; - } - }; -} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java index c12e92e18b94..89f4abdec113 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java @@ -25,8 +25,8 @@ import accord.messages.PreAccept.PreAcceptOk; import accord.messages.PreAccept.PreAcceptReply; import accord.primitives.FullRoute; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -49,16 +49,16 @@ public void serializeBody(PreAccept msg, DataOutputPlus out, int version) throws { CommandSerializers.partialTxn.serialize(msg.partialTxn, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); - out.writeUnsignedVInt(msg.maxEpoch - msg.minUnsyncedEpoch); + out.writeUnsignedVInt(msg.maxEpoch - msg.minEpoch); } @Override - public PreAccept deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException + public PreAccept deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); @Nullable FullRoute fullRoute = deserializeNullable(in, version, KeySerializers.fullRoute); long maxEpoch = in.readUnsignedVInt() + minEpoch; - return PreAccept.SerializerSupport.create(txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey, + return PreAccept.SerializerSupport.create(txnId, scope, waitForEpoch, minEpoch, maxEpoch, partialTxn, fullRoute); } @@ -67,7 +67,7 @@ public long serializedBodySize(PreAccept msg, int version) { return CommandSerializers.partialTxn.serializedSize(msg.partialTxn, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) - + TypeSizes.sizeofUnsignedVInt(msg.maxEpoch - msg.minUnsyncedEpoch); + + TypeSizes.sizeofUnsignedVInt(msg.maxEpoch - msg.minEpoch); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index 346d1c8bdf48..5caab8fb2b83 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -33,8 +33,8 @@ import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.LatestDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; @@ -60,7 +60,7 @@ public void serializeBody(BeginRecovery recover, DataOutputPlus out, int version } @Override - public BeginRecovery deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public BeginRecovery deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); Ballot ballot = CommandSerializers.ballot.deserialize(in, version); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java index c9da12801e41..6dc5f9e56494 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java @@ -21,7 +21,7 @@ import java.io.IOException; import accord.messages.TxnRequest; -import accord.primitives.PartialRoute; +import accord.primitives.Route; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -33,7 +33,7 @@ public abstract class TxnRequestSerializer> implements I void serializeHeader(T msg, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(msg.txnId, out, version); - KeySerializers.partialRoute.serialize(msg.scope, out, version); + KeySerializers.route.serialize(msg.scope, out, version); out.writeUnsignedVInt(msg.waitForEpoch); } @@ -46,13 +46,13 @@ public final void serialize(T msg, DataOutputPlus out, int version) throws IOExc serializeBody(msg, out, version); } - public abstract T deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException; + public abstract T deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException; @Override public final T deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - PartialRoute scope = KeySerializers.partialRoute.deserialize(in, version); + Route scope = KeySerializers.route.deserialize(in, version); // TODO: there should be a base epoch long waitForEpoch = in.readUnsignedVInt(); return deserializeBody(in, version, txnId, scope, waitForEpoch); @@ -61,7 +61,7 @@ public final T deserialize(DataInputPlus in, int version) throws IOException long serializedHeaderSize(T msg, int version) { return CommandSerializers.txnId.serializedSize(msg.txnId, version) - + KeySerializers.partialRoute.serializedSize(msg.scope(), version) + + + KeySerializers.route.serializedSize(msg.scope(), version) + TypeSizes.sizeofUnsignedVInt(msg.waitForEpoch); } @@ -79,26 +79,23 @@ public static abstract class WithUnsyncedSerializer scope, long waitForEpoch, long minEpoch, boolean doNotComputeProgressKey) throws IOException; + public abstract T deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException; @Override - public final T deserializeBody(DataInputPlus in, int version, TxnId txnId, PartialRoute scope, long waitForEpoch) throws IOException + public final T deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { long minEpoch = in.readUnsignedVInt(); - boolean doNotComputeProgressKey = in.readBoolean(); - return deserializeBody(in, version, txnId, scope, waitForEpoch, minEpoch, doNotComputeProgressKey); + return deserializeBody(in, version, txnId, scope, waitForEpoch, minEpoch); } @Override long serializedHeaderSize(T msg, int version) { long size = super.serializedHeaderSize(msg, version); - size += TypeSizes.sizeofUnsignedVInt(msg.minUnsyncedEpoch); - size += TypeSizes.BOOL_SIZE; + size += TypeSizes.sizeofUnsignedVInt(msg.minEpoch); return size; } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java deleted file mode 100644 index 821b8d9a3025..000000000000 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitOnCommitSerializer.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord.serializers; - -import java.io.IOException; - -import accord.messages.WaitOnCommit; -import accord.messages.WaitOnCommit.WaitOnCommitOk; -import accord.primitives.Participants; -import accord.primitives.TxnId; -import org.apache.cassandra.io.IVersionedSerializer; -import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputPlus; - -public class WaitOnCommitSerializer -{ - public static final IVersionedSerializer request = new IVersionedSerializer() - { - @Override - public void serialize(WaitOnCommit wait, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.txnId.serialize(wait.txnId, out, version); - KeySerializers.participants.serialize(wait.scope, out, version); - } - - @Override - public WaitOnCommit deserialize(DataInputPlus in, int version) throws IOException - { - TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Participants scope = KeySerializers.participants.deserialize(in, version); - return WaitOnCommit.SerializerSupport.create(txnId, scope); - } - - @Override - public long serializedSize(WaitOnCommit wait, int version) - { - return CommandSerializers.txnId.serializedSize(wait.txnId, version) - + KeySerializers.participants.serializedSize(wait.scope, version); - } - }; - - public static final IVersionedSerializer reply = new IVersionedSerializer() - { - @Override - public void serialize(WaitOnCommitOk ok, DataOutputPlus out, int version) throws IOException - { - } - - @Override - public WaitOnCommitOk deserialize(DataInputPlus in, int version) throws IOException - { - return WaitOnCommitOk.INSTANCE; - } - - @Override - public long serializedSize(WaitOnCommitOk ok, int version) - { - return 0; - } - }; -} diff --git a/src/java/org/apache/cassandra/triggers/TriggerExecutor.java b/src/java/org/apache/cassandra/triggers/TriggerExecutor.java index 335131d805be..c3fe269559e6 100644 --- a/src/java/org/apache/cassandra/triggers/TriggerExecutor.java +++ b/src/java/org/apache/cassandra/triggers/TriggerExecutor.java @@ -19,11 +19,6 @@ package org.apache.cassandra.triggers; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; import java.util.*; import java.util.concurrent.TimeUnit; diff --git a/test/conf/cassandra.yaml b/test/conf/cassandra.yaml index e78416c883cd..84d9478d406b 100644 --- a/test/conf/cassandra.yaml +++ b/test/conf/cassandra.yaml @@ -119,4 +119,3 @@ accord: enabled: true journal_directory: build/test/cassandra/accord_journal shard_count: 4 - progress_log_schedule_delay: 1s diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index 43e4749ba4a1..59d46c52efa8 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -447,7 +447,7 @@ public static IMessage serializeMessage(InetAddressAndPort from, InetAddressAndP byte[] bytes = out.toByteArray(); if (messageOut.serializedSize(toVersion) != bytes.length) throw new AssertionError(String.format("Message serializedSize(%s) does not match what was written with serialize(out, %s) for verb %s and serializer %s; " + - "expected %s, actual %s ", toVersion, toVersion, messageOut.verb(), Message.serializer.getClass(), + "expected %s, actual %s ", toVersion, toVersion, messageOut.verb(), messageOut.verb().serializer().getClass(), messageOut.serializedSize(toVersion), bytes.length)); return new MessageImpl(messageOut.verb().id, bytes, messageOut.id(), toVersion, messageOut.expiresAtNanos(), fromCassandraInetAddressAndPort(from)); } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java index 3c6d2caf6ceb..81deb3e2fe03 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/InstanceConfig.java @@ -100,7 +100,7 @@ private InstanceConfig(int num, .set("accord.enabled", accord.enabled) .set("accord.journal_directory", accord.journal_directory) .set("accord.shard_count", accord.shard_count.toString()) - .set("accord.progress_log_schedule_delay", accord.progress_log_schedule_delay.toString()) + .set("accord.recover_delay", accord.recover_delay.toString()) .set("partitioner", "org.apache.cassandra.dht.Murmur3Partitioner") .set("start_native_transport", true) .set("concurrent_writes", 2) diff --git a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java index 528e5ad00c6a..3ccdad799744 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/QueriesTableTest.java @@ -22,6 +22,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.CyclicBarrier; +import accord.impl.progresslog.DefaultProgressLogs; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; @@ -31,7 +32,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import accord.impl.SimpleProgressLog; import com.datastax.driver.core.Session; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Mutation; @@ -177,7 +177,7 @@ public void shouldExposeTransaction() throws Throwable // Disable recovery to make sure only one local read occurs: for (IInvokableInstance instance : SHARED_CLUSTER) - instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = true); + instance.runOnInstance(() -> DefaultProgressLogs.unsafePauseForTesting(true)); String update = "BEGIN TRANSACTION\n" + " LET row1 = (SELECT * FROM " + KEYSPACE + ".accord_tbl WHERE k = 0);\n" + diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index 69d2622830e3..f295ce6e1d82 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -36,7 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.impl.SimpleProgressLog; +import accord.impl.progresslog.DefaultProgressLogs; import accord.local.Node; import accord.local.PreLoadContext; import accord.local.SafeCommand; @@ -156,7 +156,7 @@ public static void setupClass() throws Throwable public void tearDown() { for (IInvokableInstance instance : SHARED_CLUSTER) - instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = false); + instance.runOnInstance(() -> DefaultProgressLogs.unsafePauseForTesting(false)); SHARED_CLUSTER.filters().reset(); } @@ -292,7 +292,7 @@ public void txnRepairTest() throws Throwable // heal partition and wait for node 1 to see node 3 again for (IInvokableInstance instance : SHARED_CLUSTER) instance.runOnInstance(() -> { - SimpleProgressLog.PAUSE_FOR_TEST = true; + DefaultProgressLogs.unsafePauseForTesting(true); Assert.assertTrue(agent().executedBarriers().isEmpty()); }); SHARED_CLUSTER.filters().reset(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java index 0ba67894d2c7..572ffaae492d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIntegrationTest.java @@ -26,7 +26,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.impl.SimpleProgressLog; +import accord.impl.progresslog.DefaultProgressLogs; import accord.messages.Commit; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IMessageFilters; @@ -131,6 +131,6 @@ public void testLostCommitReadTriggersFallbackRead() throws Exception private void pauseSimpleProgressLog() { for (IInvokableInstance instance : SHARED_CLUSTER) - instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = true); + instance.runOnInstance(() -> DefaultProgressLogs.unsafePauseForTesting(true)); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java index e5865ef9d310..1be3fd686dc2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java @@ -42,8 +42,8 @@ import accord.api.RoutingKey; import accord.messages.PreAccept; import accord.primitives.PartialKeyRoute; -import accord.primitives.PartialRoute; import accord.primitives.Routable.Domain; +import accord.primitives.Route; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.Util; import org.apache.cassandra.batchlog.BatchlogManager; @@ -458,7 +458,7 @@ private void testSplitAndRetry(String batchCQL, Consumer validation, Sc { boolean drop = cluster.get(to).callsOnInstance(() -> { PreAccept preAccept = (PreAccept)Instance.deserializeMessage(message).payload; - PartialRoute route = preAccept.scope; + Route route = preAccept.scope; if (route.domain() == Domain.Key) for (RoutingKey key : (PartialKeyRoute)route) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java new file mode 100644 index 000000000000..b69f6a6b48f5 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.consensus.TransactionalMode; + +public class AccordProgressLogTest extends TestBaseImpl +{ + private static final Logger logger = LoggerFactory.getLogger(AccordProgressLogTest.class); + + @Test + public void testRecoveryTimeWindow() throws Throwable + { + try (Cluster cluster = init(Cluster.build(3) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "true")) + .start())) + { + cluster.schemaChange("CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor': 3}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key (k, c)) WITH " + TransactionalMode.full.asCqlParam()); + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + + IMessageFilters.Filter dropCommit = cluster.filters().outbound().from(1).verbs(Verb.ACCORD_COMMIT_REQ.id).drop(); + AtomicLong recoveryStartedAt = new AtomicLong(); + Semaphore waitForRecovery = new Semaphore(0); + IMessageFilters.Filter recovery = cluster.filters().outbound().messagesMatching((from, to, message) -> { + if (message.verb() == Verb.ACCORD_BEGIN_RECOVER_RSP.id) + { + recoveryStartedAt.compareAndSet(0, System.nanoTime()); + waitForRecovery.release(); + } + return false; + }).drop(); + + long coordinationStartedAt = System.nanoTime(); + boolean failed = false; + try + { + cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); + } + catch (Throwable e) + { + failed = true; + } + Assert.assertTrue(failed); + + waitForRecovery.acquire(); + long timeDeltaMillis = TimeUnit.NANOSECONDS.toMillis(recoveryStartedAt.get() - coordinationStartedAt); + Assert.assertTrue("Recovery started in " + timeDeltaMillis + "ms", timeDeltaMillis >= 1000); + Assert.assertTrue("Recovery started in " + timeDeltaMillis + "ms", timeDeltaMillis <= 3000); + } + } + + @Test + public void testFetchTimeWindow() throws Throwable + { + try (Cluster cluster = init(Cluster.build(3) + .withoutVNodes() + .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "true")) + .start())) + { + cluster.schemaChange("CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor': 3}"); + cluster.schemaChange("CREATE TABLE ks.tbl (k int, c int, v int, primary key (k, c)) WITH " + TransactionalMode.full.asCqlParam()); + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl WHERE k=0 AND c=0;\n" + + "COMMIT TRANSACTION"; + + IMessageFilters.Filter dropApply = cluster.filters().outbound().from(1).verbs(Verb.ACCORD_APPLY_REQ.id).drop(); + AtomicLong fetchStartedAt = new AtomicLong(); + Semaphore waitForFetch = new Semaphore(0); + IMessageFilters.Filter fetch = cluster.filters().outbound().messagesMatching((from, to, message) -> { + if (message.verb() == Verb.ACCORD_AWAIT_REQ.id) + { + fetchStartedAt.compareAndSet(0, System.nanoTime()); + waitForFetch.release(); + } + return false; + }).drop(); + + long coordinationStartedAt = System.nanoTime(); + try + { + cluster.coordinator(1).executeWithResult(query, ConsistencyLevel.ANY); + } + catch (Throwable e) + { + } + + waitForFetch.acquire(); + logger.info("Coordinated at {}", coordinationStartedAt); + logger.info("Awaited at {}", fetchStartedAt.get()); + long timeDeltaMillis = TimeUnit.NANOSECONDS.toMillis(fetchStartedAt.get() - coordinationStartedAt); + Assert.assertTrue("Fetch started in " + timeDeltaMillis + "ms", timeDeltaMillis >= 100); + Assert.assertTrue("Fetch started in " + timeDeltaMillis + "ms", timeDeltaMillis <= 2000); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index f30ef762cc9c..c60c064e6888 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -44,11 +44,11 @@ import accord.api.RoutingKey; import accord.coordinate.Invalidated; -import accord.impl.SimpleProgressLog; +import accord.impl.progresslog.DefaultProgressLogs; import accord.messages.PreAccept; import accord.primitives.PartialKeyRoute; -import accord.primitives.PartialRoute; import accord.primitives.Routable.Domain; +import accord.primitives.Route; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; @@ -142,7 +142,7 @@ public void setup() public void tearDown() throws Exception { for (IInvokableInstance instance : SHARED_CLUSTER) - instance.runOnInstance(() -> SimpleProgressLog.PAUSE_FOR_TEST = false); + instance.runOnInstance(() -> DefaultProgressLogs.unsafePauseForTesting(false)); } protected static void assertRowSerial(Cluster cluster, String query, int k, int c, int v, int s) @@ -558,7 +558,7 @@ protected static void blockMutationAndPreAccept(Cluster cluster) { boolean drop = cluster.get(to).callsOnInstance(() -> { PreAccept preAccept = (PreAccept)Instance.deserializeMessage(message).payload; - PartialRoute route = preAccept.scope; + Route route = preAccept.scope; if (route.domain() == Domain.Key) for (RoutingKey key : (PartialKeyRoute)route) { diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index a955a9c315dc..e69bffb8e17e 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -32,8 +32,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; -import accord.primitives.Routable; import accord.local.CommandStores; +import accord.primitives.Route; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.service.accord.*; @@ -58,7 +58,6 @@ import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Seekable; @@ -95,6 +94,7 @@ import static accord.impl.TimestampsForKey.NO_LAST_EXECUTED_HLC; import static accord.local.KeyHistory.COMMANDS; import static accord.local.PreLoadContext.contextFor; +import static accord.primitives.Routable.Domain.Range; import static accord.utils.async.AsyncChains.getUninterruptibly; import static org.apache.cassandra.Util.spinAssertEquals; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -118,7 +118,7 @@ public class CompactionAccordIteratorsTest private static final TxnId LT_TXN_ID = AccordTestUtils.txnId(EPOCH, HLC_START, NODE); private static final TxnId TXN_ID = AccordTestUtils.txnId(EPOCH, LT_TXN_ID.hlc() + 1, NODE); private static final TxnId SECOND_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 1, NODE, Kind.Read); - private static final TxnId RANGE_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 2, NODE, Kind.Read, Routable.Domain.Range); + private static final TxnId RANGE_TXN_ID = AccordTestUtils.txnId(EPOCH, TXN_ID.hlc() + 2, NODE, Kind.Read, Range); private static final TxnId GT_TXN_ID = SECOND_TXN_ID; // For CommandsForKey where we test with two commands private static final TxnId[] TXN_IDS = new TxnId[]{ TXN_ID, SECOND_TXN_ID }; @@ -383,8 +383,8 @@ Consumer> expectAccordCommandsNoChange() private static RedundantBefore redundantBefore(TxnId txnId) { Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); - txnId = txnId.as(Kind.Read, Routable.Domain.Range); - return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, LT_TXN_ID); + txnId = txnId.as(Kind.Read, Range); + return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, LT_TXN_ID.as(Range)); } enum DurableBeforeType @@ -468,25 +468,22 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add Txn txn = txnId.kind().isWrite() ? writeTxn : readTxn; PartialDeps partialDeps = Deps.NONE.intersecting(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); - PartialRoute partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); + Route partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToKeyspace(commandStore)); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, txnId, partialDeps, appendDiffToKeyspace(commandStore)); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, txnId, partialDeps, appendDiffToKeyspace(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); - CheckedCommands.apply(safe, txnId, route, null, txnId, partialDeps, partialTxn, result.left, result.right, appendDiffToKeyspace(commandStore)); - }).beginAsResult()); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - safe.get(txnId, txnId, route).addListener(new Command.ProxyListener(RANGE_TXN_ID)); // add a junk listener just to test it in compaction + CheckedCommands.apply(safe, txnId, route, txnId, partialDeps, partialTxn, result.left, result.right, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); // The apply chain is asychronous, so it is easiest to just spin until it is applied diff --git a/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java b/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java index 2ddec3906fd1..532e0efe4d58 100644 --- a/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java +++ b/test/unit/org/apache/cassandra/dht/AccordSplitterTest.java @@ -98,7 +98,7 @@ public void split() Assertions.assertThat(ranges).describedAs("num splits not as expected for partitioner %s", partitioner).hasSizeBetween(numSplits, numSplits + 1); Ranges split = Ranges.of(ranges.toArray(new Range[0])).mergeTouching(); - Ranges missing = Ranges.of(range).subtract(split); + Ranges missing = Ranges.of(range).without(split); Assertions.assertThat(missing).isEmpty(); testEventSplit(partitioner, range, rs, numSplits); @@ -115,7 +115,7 @@ private static void testEventSplit(IPartitioner partitioner, Range range, Random Assertions.assertThat(ranges).describedAs("num splits not as expected for partitioner %s", partitioner).hasSize(numSplits); Ranges split = ranges.stream().reduce(Ranges.EMPTY, Ranges::with).mergeTouching(); - Ranges missing = topLevel.subtract(split); + Ranges missing = topLevel.without(split); Assertions.assertThat(missing).isEmpty(); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordAgentTest.java b/test/unit/org/apache/cassandra/service/accord/AccordAgentTest.java new file mode 100644 index 000000000000..bc5f51cbcc8e --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/AccordAgentTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +import org.junit.Test; + +import accord.local.Node; +import accord.utils.RandomTestRunner; +import accord.utils.SortedArrays.SortedArrayList; +import org.apache.cassandra.service.accord.api.AccordAgent; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.junit.Assert.assertTrue; + +public class AccordAgentTest +{ + @Test + public void testNonClashingStartTimes() + { + RandomTestRunner.test().check(rnd -> { + SortedArrayList nodes; { + Node.Id[] ids = new Node.Id[rnd.nextInt(4, 16)]; + for (int i = 0 ; i < ids.length ; ++i) + ids[i] = new Node.Id(i); + nodes = new SortedArrayList<>(ids); + } + + long[] startTimes = new long[nodes.size()]; + long oneSecond = SECONDS.toMicros(1); + long targetDelta = oneSecond / nodes.size(); + for (int i = 0 ; i < 10000 ; ++i) + { + long startTime = rnd.nextLong(1, TimeUnit.DAYS.toMicros(100L)); + for (int j = 0 ; j < startTimes.length ; ++j) + { + long nonClashingStartTime = AccordAgent.nonClashingStartTime(startTime, nodes, nodes.get(j), oneSecond, rnd); + assertTrue(nonClashingStartTime >= startTime); + startTimes[j] = nonClashingStartTime; + } + + Arrays.sort(startTimes); + for (int j = 1 ; j < startTimes.length ; ++j) + { + long actualDelta = startTimes[j] - startTimes[j - 1]; + assertTrue(Math.abs(targetDelta - actualDelta) <= startTimes.length); + } + } + }); + } + +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index dde88ca46827..036e513ae3fc 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -135,7 +135,6 @@ public void commandLoadSave() throws Throwable SimpleBitSet waitingOnApply = new SimpleBitSet(3); waitingOnApply.set(1); Command.WaitingOn waitingOn = new Command.WaitingOn(dependencies.keyDeps.keys(), dependencies.rangeDeps, dependencies.directKeyDeps, new ImmutableBitSet(waitingOnApply), new ImmutableBitSet(2)); - attrs.addListener(new Command.ProxyListener(oldTxnId1)); Pair result = AccordTestUtils.processTxnResult(commandStore, txnId, txn, executeAt); Command expected = Command.SerializerSupport.executed(attrs, SaveStatus.Applied, executeAt, promised, accepted, diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 70a66f2be4fa..8f01b8f6859b 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -41,7 +41,6 @@ import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Route; import accord.primitives.Timestamp; @@ -100,9 +99,9 @@ public void basicCycleTest() throws Throwable Key key = (Key)txn.keys().get(0); RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); - PartialRoute route = fullRoute.slice(fullRange(txn)); + Route route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.intersecting(route, true); - PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, false, 1, partialTxn, fullRoute); + PreAccept preAccept = PreAccept.SerializerSupport.create(txnId, route, 1, 1, 1, partialTxn, fullRoute); // Check preaccept getUninterruptibly(commandStore.execute(preAccept, safeStore -> { @@ -141,7 +140,7 @@ public void basicCycleTest() throws Throwable builder.add(key, txnId2); deps = builder.build(); } - Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, false, Ballot.ZERO, executeAt, partialTxn.keys(), deps); + Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, Ballot.ZERO, executeAt, partialTxn.keys(), deps); getUninterruptibly(commandStore.execute(accept, safeStore -> { Command before = safeStore.ifInitialised(txnId).current(); @@ -192,9 +191,9 @@ public void computeDeps() throws Throwable Key key = (Key)txn.keys().get(0); RoutingKey homeKey = key.toUnseekable(); FullRoute fullRoute = txn.keys().toRoute(homeKey); - PartialRoute route = fullRoute.slice(fullRange(txn)); + Route route = fullRoute.slice(fullRange(txn)); PartialTxn partialTxn = txn.intersecting(route, true); - PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, false, 1, partialTxn, fullRoute); + PreAccept preAccept1 = PreAccept.SerializerSupport.create(txnId1, route, 1, 1, 1, partialTxn, fullRoute); getUninterruptibly(commandStore.execute(preAccept1, safeStore -> { persistDiff(commandStore, safeStore, txnId1, route, () -> { @@ -204,7 +203,7 @@ public void computeDeps() throws Throwable // second preaccept should identify txnId1 as a dependency TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); - PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, false, 1, partialTxn, fullRoute); + PreAccept preAccept2 = PreAccept.SerializerSupport.create(txnId2, route, 1, 1, 1, partialTxn, fullRoute); getUninterruptibly(commandStore.execute(preAccept2, safeStore -> { persistDiff(commandStore, safeStore, txnId2, route, () -> { PreAccept.PreAcceptReply reply = preAccept2.apply(safeStore); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 1ece95f16477..7cc2d081dc35 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -32,12 +32,9 @@ import accord.impl.AbstractFetchCoordinator; import accord.impl.IntKey; import accord.local.Node; -import accord.messages.InformOfTxnId; -import accord.messages.MessageType; import accord.messages.ReadTxnData; import accord.messages.Reply; import accord.messages.Request; -import accord.messages.SimpleReply; import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; @@ -72,16 +69,6 @@ public static void setup() ClusterMetadataService.initializeForClients(); } - @Test - public void informOfTxn() - { - // There was an issue where the reply was the wrong verb - // see CASSANDRA-18375 - InformOfTxnId request = Mockito.mock(InformOfTxnId.class); - Mockito.when(request.type()).thenReturn(MessageType.INFORM_OF_TXN_REQ); - checkRequestReplies(request, SimpleReply.Ok); - } - @Test public void bootstrapRead() { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java index f6918422e926..afdea318830d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordServiceTest.java @@ -74,7 +74,7 @@ public Seekables get() throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY); case 4: attempts++; - throw new Exhausted(null, null); + throw new Exhausted(null, null, null); case 5: attempts++; throw AccordService.newBarrierExhausted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY); @@ -124,7 +124,7 @@ public void retryThrowsNonTimeout() timeoutFailures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); timeoutFailures.add(() -> {throw new Preempted(null, null);}); timeoutFailures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); - timeoutFailures.add(() -> {throw new Exhausted(null, null);}); + timeoutFailures.add(() -> {throw new Exhausted(null, null, null);}); Collections.shuffle(timeoutFailures, rs.asJdkRandom()); Iterator it = timeoutFailures.iterator(); Supplier failing = () -> { @@ -162,7 +162,7 @@ public void run() failures.add(() -> {throw AccordService.newBarrierTimeout(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); failures.add(() -> {throw new Preempted(null, null);}); failures.add(() -> {throw AccordService.newBarrierPreempted(TxnId.NONE, BarrierType.local, true, Ranges.EMPTY);}); - failures.add(() -> {throw new Exhausted(null, null);}); + failures.add(() -> {throw new Exhausted(null, null, null);}); boolean isError = rs.nextBoolean(); failures.add(new Unexpected(isError)); Collections.shuffle(failures, rs.asJdkRandom()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 54a311a67b91..a2fd50d5f937 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -30,10 +30,13 @@ import java.util.function.ToLongFunction; import java.util.stream.Collectors; import java.util.stream.IntStream; -import javax.annotation.Nullable; import com.google.common.collect.Sets; +import accord.api.LocalListeners; +import accord.api.ProgressLog.NoOpProgressLog; +import accord.api.RemoteListeners; +import accord.impl.DefaultLocalListeners; import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; @@ -41,7 +44,6 @@ import org.junit.Assert; import accord.api.Data; -import accord.api.ProgressLog; import accord.api.Result; import accord.api.RoutingKey; import accord.impl.InMemoryCommandStore; @@ -56,17 +58,14 @@ import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.SaveStatus; -import accord.local.SaveStatus.LocalExecution; import accord.primitives.Ballot; import accord.primitives.FullKeyRoute; import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; -import accord.primitives.Participants; import accord.primitives.Ranges; import accord.primitives.Routable; -import accord.primitives.Route; import accord.primitives.Seekable; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -202,21 +201,6 @@ public static void testLoad(ManualExecutor executor, AccordSafeState blockedOnRoute, Participants blockedOnParticipants) {} - @Override public void waiting(TxnId blockedBy, LocalExecution blockedUntil, @Nullable Route blockedOnRoute, @Nullable Participants blockedOnParticipants) {} - }; - public static TxnId txnId(long epoch, long hlc, int node) { return txnId(epoch, hlc, node, Txn.Kind.Write); @@ -390,11 +374,8 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS }; SingleEpochRanges holder = new SingleEpochRanges(Ranges.of(range)); - InMemoryCommandStore.Synchronized result = new InMemoryCommandStore.Synchronized(0, - time, - new AccordAgent(), - null, - cs -> null, holder); + InMemoryCommandStore.Synchronized result = new InMemoryCommandStore.Synchronized(0, time, new AccordAgent(), + null, null, cs -> null, holder); holder.set(result); return result; } @@ -425,7 +406,12 @@ public static AccordCommandStore createAccordCommandStore( time, new AccordAgent(), null, - cs -> NOOP_PROGRESS_LOG, + cs -> new NoOpProgressLog(), + cs -> new DefaultLocalListeners(new RemoteListeners.NoOpRemoteListeners(), new DefaultLocalListeners.NotifySink() + { + @Override public void notify(SafeCommandStore safeStore, SafeCommand safeCommand, TxnId listener) {} + @Override public boolean notify(SafeCommandStore safeStore, SafeCommand safeCommand, LocalListeners.ComplexListener listener) { return false; } + }), holder, journal, loadExecutor, diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index 101ca38a7364..55aa527e9465 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -50,7 +50,7 @@ import accord.api.ConfigurationService; import accord.api.ConfigurationService.EpochReady; import accord.api.Scheduler; -import accord.config.LocalConfig; +import accord.api.LocalConfig; import accord.impl.SizeOfIntersectionSorter; import accord.impl.TestAgent; import accord.local.Node; @@ -352,7 +352,7 @@ void validate(boolean isDone) Assertions.assertThat(tm.hasEpoch(epoch)).describedAs("node%s does not have epoch %d", id, epoch).isTrue(); Ranges ranges = tm.globalForEpoch(epoch).ranges().mergeTouching(); Ranges actual = tm.syncComplete(epoch).mergeTouching(); - Assertions.assertThat(actual).describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s", id, epoch, ranges.subtract(actual)).isEqualTo(ranges); + Assertions.assertThat(actual).describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s", id, epoch, ranges.without(actual)).isEqualTo(ranges); } else { @@ -369,7 +369,7 @@ void validate(boolean isDone) if (!ranges.equals(actual) && tm.minEpoch() != epoch && !ranges.equals(tm.syncComplete(epoch - 1).mergeTouching())) continue; Assertions.assertThat(actual) - .describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s; peers=%s; previous epochs %s", id, epoch, ranges.subtract(actual), topology.nodes(), + .describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s; peers=%s; previous epochs %s", id, epoch, ranges.without(actual), topology.nodes(), LongStream.range(inst.epoch.getEpoch(), epoch + 1).mapToObj(e -> e + " -> " + conf.getEpochSnapshot(e).syncStatus + "(synced=" + globalSynced(e) + "): " + tm.syncComplete(e)).collect(Collectors.joining("\n"))) .isEqualTo(ranges); } diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index 8f3192afa5b0..a20ee61a0807 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -26,7 +26,6 @@ import accord.api.Result; import accord.local.Command; import accord.local.CommonAttributes; -import accord.local.Listeners; import accord.local.SaveStatus; import accord.local.Status; import accord.primitives.Ballot; @@ -97,8 +96,7 @@ public static LoadedDiff diff(Command before, Command after) ifNotEqual(before, after, Command::additionalKeysOrRanges, false), new NewValue<>((k, deps) -> waitingOn), - ifNotEqual(before, after, Command::writes, false), - ifNotEqual(before, after, Command::durableListeners, true)); + ifNotEqual(before, after, Command::writes, false)); } static Command reconstructFromDiff(List diffs) @@ -131,7 +129,6 @@ static Command reconstructFromDiff(List diffs, Result result) SavedCommand.WaitingOnProvider waitingOnProvider = null; Writes writes = null; - Listeners.Immutable listeners = null; for (LoadedDiff diff : diffs) { @@ -162,8 +159,6 @@ static Command reconstructFromDiff(List diffs, Result result) waitingOnProvider = diff.waitingOn.get(); if (diff.writes != null) writes = diff.writes.get(); - if (diff.listeners != null) - listeners = diff.listeners.get(); } CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); @@ -180,8 +175,6 @@ static Command reconstructFromDiff(List diffs, Result result) attrs.partialDeps(partialDeps); if (additionalKeysOrRanges != null) attrs.additionalKeysOrRanges(additionalKeysOrRanges); - if (listeners != null && !listeners.isEmpty()) - attrs.setListeners(listeners); Command.WaitingOn waitingOn = null; if (waitingOnProvider != null) @@ -292,7 +285,6 @@ public static class LoadedDiff extends SavedCommand public final NewValue> additionalKeysOrRanges; public final NewValue writes; - public final NewValue> listeners; public final NewValue waitingOn; public LoadedDiff(TxnId txnId, @@ -309,8 +301,7 @@ public LoadedDiff(TxnId txnId, NewValue> additionalKeysOrRanges, NewValue waitingOn, - NewValue writes, - NewValue> listeners) + NewValue writes) { this.txnId = txnId; this.executeAt = executeAt; @@ -326,7 +317,6 @@ public LoadedDiff(TxnId txnId, this.additionalKeysOrRanges = additionalKeysOrRanges; this.writes = writes; - this.listeners = listeners; this.waitingOn = waitingOn; } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 6e947051b39f..cb8929bf37af 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -28,6 +28,10 @@ import java.util.function.Predicate; import java.util.function.ToLongFunction; +import accord.api.LocalListeners; +import accord.api.ProgressLog; +import accord.api.RemoteListeners; +import accord.impl.DefaultLocalListeners; import accord.impl.SizeOfIntersectionSorter; import accord.impl.TestAgent; import accord.local.Command; @@ -36,6 +40,7 @@ import accord.local.Node; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.messages.BeginRecovery; import accord.messages.PreAccept; @@ -164,7 +169,12 @@ public void onUncaughtException(Throwable t) } }, null, - ignore -> AccordTestUtils.NOOP_PROGRESS_LOG, + ignore -> new ProgressLog.NoOpProgressLog(), + cs -> new DefaultLocalListeners(new RemoteListeners.NoOpRemoteListeners(), new DefaultLocalListeners.NotifySink() + { + @Override public void notify(SafeCommandStore safeStore, SafeCommand safeCommand, TxnId listener) {} + @Override public boolean notify(SafeCommandStore safeStore, SafeCommand safeCommand, LocalListeners.ComplexListener listener) { return false; } + }), updateHolder, journal, new AccordStateCacheMetrics("test")); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index f80e036b5061..4d4424e1b2d7 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -27,6 +27,7 @@ import java.util.function.BiConsumer; import java.util.function.Consumer; +import accord.primitives.Route; import accord.utils.DefaultRandom; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; @@ -52,7 +53,6 @@ import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.PartialDeps; -import accord.primitives.PartialRoute; import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Timestamp; @@ -212,8 +212,8 @@ private static Command createStableUsingFastLifeCycle(AccordCommandStore command try { Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToLog(commandStore)); - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); }).beginAsResult()); @@ -253,16 +253,16 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); Ranges ranges = AccordTestUtils.fullRange(partialTxn.keys()); - PartialRoute partialRoute = route.slice(ranges); + Route partialRoute = route.slice(ranges); PartialDeps deps = PartialDeps.builder(ranges).build(); try { Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { - CheckedCommands.preaccept(safe, txnId, partialTxn, route, null, appendDiffToLog(commandStore)); - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), null, executeAt, deps, appendDiffToLog(commandStore)); - CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); - CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, null, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToLog(commandStore)); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); }).beginAsResult()); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index c2a2b32af159..93d66eb1ec3e 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -46,7 +46,6 @@ import accord.local.cfk.CommandsForKey.Unmanaged; import accord.local.CommonAttributes; import accord.local.CommonAttributes.Mutable; -import accord.local.Listeners; import accord.local.Node; import accord.local.SaveStatus; import accord.local.Status; @@ -151,8 +150,9 @@ Command toCommand() return Command.SerializerSupport.notDefined(attributes(), Ballot.ZERO); case PreAccepted: return Command.SerializerSupport.preaccepted(attributes(), executeAt, Ballot.ZERO); - case Accepted: case AcceptedInvalidate: + return Command.SerializerSupport.acceptedInvalidateWithoutDefinition(attributes(), ballot, ballot); + case Accepted: case AcceptedWithDefinition: case AcceptedInvalidateWithDefinition: case PreCommittedWithDefinition: @@ -185,7 +185,7 @@ Command toCommand() case Erased: case ErasedOrInvalidOrVestigial: case Invalidated: - return Command.SerializerSupport.invalidated(txnId, Listeners.Immutable.EMPTY); + return Command.SerializerSupport.invalidated(txnId); } } @@ -356,7 +356,7 @@ private static Function timestampSupplier(Se @Test public void serde() { - testOne(-669467611022826851L); + testOne(-8928257345122888710L); Random random = new Random(); for (int i = 0 ; i < 10000 ; ++i) { @@ -508,7 +508,7 @@ public void test() for (int i = 0; i < info.length; i++) { InternalStatus status = rs.pick(InternalStatus.values()); - info[i] = TxnInfo.create(ids[i], status, ids[i], CommandsForKey.NO_TXNIDS, Ballot.ZERO); + info[i] = TxnInfo.create(ids[i], status, ids[i], TxnId.NO_TXNIDS, Ballot.ZERO); } Gen pendingGen = Gens.enums().allMixedDistribution(Unmanaged.Pending.class).next(rs); @@ -537,7 +537,7 @@ public void test() } else unmanaged = CommandsForKey.NO_PENDING_UNMANAGED; - CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged, TxnId.NONE); + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged, TxnId.NONE, TxnId.NONE); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); @@ -553,8 +553,8 @@ public void thereAndBackAgain() PartitionKey pk = new PartitionKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), key); TxnId txnId = TxnId.fromValues(11,34052499,2,1); CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, - new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, txnId, CommandsForKey.NO_TXNIDS, Ballot.ZERO) }, - CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE); + new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, txnId, TxnId.NO_TXNIDS, Ballot.ZERO) }, + CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE, TxnId.NONE); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index 19c21f321576..f8d08185a390 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -29,7 +29,6 @@ import accord.local.Command; import accord.local.CommonAttributes; -import accord.local.Listeners; import accord.local.RedundantBefore; import accord.local.SaveStatus; import accord.primitives.Ballot; @@ -262,7 +261,7 @@ public Command build(SaveStatus saveStatus) case Erased: case ErasedOrInvalidOrVestigial: case Invalidated: - return Command.SerializerSupport.invalidated(txnId, Listeners.Immutable.EMPTY); + return Command.SerializerSupport.invalidated(txnId); } } } From ef5f793dab49d89e4eebfb19a05419fe3174216f Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Thu, 22 Aug 2024 18:48:03 +0200 Subject: [PATCH 141/340] Journal segment compaction Patch by Alex Petrov and Aleksey Yeschenko, reviewed by Aleksey Yeschenko and Alex Petrov for CASSANDRA-19876 --- .../apache/cassandra/config/AccordSpec.java | 14 ++ .../cassandra/journal/ActiveSegment.java | 1 + .../apache/cassandra/journal/Compactor.java | 102 ++++++++ .../apache/cassandra/journal/Descriptor.java | 13 +- .../cassandra/journal/EntrySerializer.java | 49 ++-- .../cassandra/journal/InMemoryIndex.java | 4 +- .../org/apache/cassandra/journal/Index.java | 13 +- .../org/apache/cassandra/journal/Journal.java | 164 ++++++------- .../apache/cassandra/journal/Metadata.java | 2 +- .../apache/cassandra/journal/OnDiskIndex.java | 63 ++--- .../org/apache/cassandra/journal/Params.java | 4 + .../cassandra/journal/RecordConsumer.java | 1 + .../org/apache/cassandra/journal/Segment.java | 11 +- .../cassandra/journal/SegmentCompactor.java | 34 +++ .../apache/cassandra/journal/Segments.java | 42 ++-- .../cassandra/journal/StaticSegment.java | 204 ++++++++++++---- .../service/accord/AccordJournal.java | 29 ++- .../service/accord/AccordJournalTable.java | 227 ++++++++++++++++++ .../service/accord/AccordKeyspace.java | 21 +- .../accord/AccordSegmentCompactor.java | 119 +++++++++ .../service/accord/IAccordService.java | 3 + .../cassandra/service/accord/JournalKey.java | 6 +- .../service/accord/SavedCommand.java | 137 +++++++++-- ...java => AccordJournalIntegrationTest.java} | 2 +- .../journal/AccordJournalCompactionTest.java | 137 +++++++++++ .../test/AccordJournalSimulationTest.java | 4 +- .../apache/cassandra/journal/IndexTest.java | 12 +- .../apache/cassandra/journal/JournalTest.java | 36 ++- .../apache/cassandra/journal/SegmentTest.java | 10 +- .../apache/cassandra/journal/TestParams.java | 12 + .../service/accord/AccordTestUtils.java | 2 +- .../cassandra/utils/AccordGenerators.java | 4 +- .../cassandra/utils/ConfigGenBuilderTest.java | 2 + 33 files changed, 1194 insertions(+), 290 deletions(-) create mode 100644 src/java/org/apache/cassandra/journal/Compactor.java create mode 100644 src/java/org/apache/cassandra/journal/SegmentCompactor.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordJournalTable.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java rename test/distributed/org/apache/cassandra/distributed/test/accord/{AccordJournalTest.java => AccordJournalIntegrationTest.java} (98%) create mode 100644 test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index b4d25d66890e..102ae68b67c7 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -28,6 +28,8 @@ public class AccordSpec public volatile String journal_directory; + public volatile boolean enable_journal_compaction = true; + public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(1000); @@ -101,6 +103,18 @@ public FlushMode flushMode() return flushMode; } + @Override + public boolean enableCompaction() + { + return DatabaseDescriptor.getAccord().enable_journal_compaction; + } + + @Override + public int compactionPeriodMillis() + { + return 60_000; + } + @JsonIgnore @Override public int flushPeriodMillis() diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index 1bee25a96fed..1fd99054909c 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -197,6 +197,7 @@ private void discard() descriptor.fileFor(Component.SYNCED_OFFSETS).deleteIfExists(); } + @Override void release() { selfRef.release(); diff --git a/src/java/org/apache/cassandra/journal/Compactor.java b/src/java/org/apache/cassandra/journal/Compactor.java new file mode 100644 index 000000000000..846dd62ba8f8 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/Compactor.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.concurrent.Shutdownable; + +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; + +final class Compactor implements Runnable, Shutdownable +{ + private final Journal journal; + private final SegmentCompactor segmentCompactor; + private final ScheduledExecutorPlus executor; + + Compactor(Journal journal, SegmentCompactor segmentCompactor) + { + this.executor = executorFactory().scheduled(false, journal.name + "-compactor"); + this.journal = journal; + this.segmentCompactor = segmentCompactor; + } + + void start() + { + if (journal.params.enableCompaction()) + { + executor.scheduleWithFixedDelay(this, + journal.params.compactionPeriodMillis(), + journal.params.compactionPeriodMillis(), + TimeUnit.MILLISECONDS); + } + } + + @Override + public void run() + { + Set> toCompact = new HashSet<>(); + journal.segments().selectStatic(toCompact); + if (toCompact.size() < 2) + return; + + try + { + Collection> newSegments = segmentCompactor.compact(toCompact, journal.keySupport); + for (StaticSegment segment : newSegments) + toCompact.remove(segment); + + journal.replaceCompactedSegments(toCompact, newSegments); + for (StaticSegment segment : toCompact) + segment.discard(); + } + catch (IOException e) + { + throw new RuntimeException("Could not compact segments: " + toCompact); + } + } + + @Override + public boolean isTerminated() + { + return executor.isTerminated(); + } + + @Override + public void shutdown() + { + executor.shutdown(); + } + + @Override + public Object shutdownNow() + { + return executor.shutdownNow(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit units) throws InterruptedException + { + return executor.awaitTermination(timeout, units); + } +} diff --git a/src/java/org/apache/cassandra/journal/Descriptor.java b/src/java/org/apache/cassandra/journal/Descriptor.java index 176a12e10917..cea68c353e14 100644 --- a/src/java/org/apache/cassandra/journal/Descriptor.java +++ b/src/java/org/apache/cassandra/journal/Descriptor.java @@ -66,20 +66,20 @@ public final class Descriptor implements Comparable static final int CURRENT_JOURNAL_VERSION = JOURNAL_VERSION_1; final File directory; - final long timestamp; - final int generation; + public final long timestamp; + public final int generation; /** * Serialization version for journal components; bumped as journal * implementation evolves over time. */ - final int journalVersion; + public final int journalVersion; /** * Serialization version for user content - specifically journal keys * and journal values; bumped when user logic evolves. */ - final int userVersion; + public final int userVersion; Descriptor(File directory, long timestamp, int generation, int journalVersion, int userVersion) { @@ -114,11 +114,6 @@ static Descriptor fromFile(File file) return fromName(file.parent(), file.name()); } - Descriptor withIncrementedGeneration() - { - return new Descriptor(directory, timestamp, generation + 1, journalVersion, userVersion); - } - File fileFor(Component component) { return new File(directory, formatFileName(component)); diff --git a/src/java/org/apache/cassandra/journal/EntrySerializer.java b/src/java/org/apache/cassandra/journal/EntrySerializer.java index a2a61cfce371..2a707e7d73e2 100644 --- a/src/java/org/apache/cassandra/journal/EntrySerializer.java +++ b/src/java/org/apache/cassandra/journal/EntrySerializer.java @@ -35,7 +35,7 @@ import static org.apache.cassandra.utils.FBUtilities.updateChecksumInt; import static org.apache.cassandra.utils.FBUtilities.updateChecksumShort; -final class EntrySerializer +public final class EntrySerializer { static void write(K key, ByteBuffer record, @@ -73,14 +73,14 @@ static void write(K key, static void read(EntryHolder into, KeySupport keySupport, - ByteBuffer buffer, + ByteBuffer from, int userVersion) throws IOException { CRC32 crc = Crc.crc32(); into.clear(); - try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + try (DataInputBuffer in = new DataInputBuffer(from, false)) { K key = keySupport.deserialize(in, userVersion); keySupport.updateChecksum(crc, key, userVersion); @@ -101,9 +101,11 @@ static void read(EntryHolder into, into.hosts.add(hostId); } + // TODO: try to avoid allocating another buffer here ByteBuffer entry = ByteBufferUtil.read(in, entrySize); updateChecksum(crc, entry); into.value = entry; + into.userVersion = userVersion; validateCRC(crc, in.readInt()); } @@ -111,7 +113,7 @@ static void read(EntryHolder into, static boolean tryRead(EntryHolder into, KeySupport keySupport, - ByteBuffer buffer, + ByteBuffer from, DataInputBuffer in, int syncedOffset, int userVersion) @@ -121,11 +123,11 @@ static boolean tryRead(EntryHolder into, into.clear(); int fixedSize = EntrySerializer.fixedEntrySize(keySupport, userVersion); - if (buffer.remaining() < fixedSize) - return handleReadException(new EOFException(), buffer.limit(), syncedOffset); + if (from.remaining() < fixedSize) + return handleReadException(new EOFException(), from.limit(), syncedOffset); - updateChecksum(crc, buffer, buffer.position(), fixedSize - TypeSizes.INT_SIZE); - int fixedCrc = buffer.getInt(buffer.position() + fixedSize - TypeSizes.INT_SIZE); + updateChecksum(crc, from, from.position(), fixedSize - TypeSizes.INT_SIZE); + int fixedCrc = from.getInt(from.position() + fixedSize - TypeSizes.INT_SIZE); try { @@ -133,7 +135,7 @@ static boolean tryRead(EntryHolder into, } catch (IOException e) { - return handleReadException(e, buffer.position() + fixedSize, syncedOffset); + return handleReadException(e, from.position() + fixedSize, syncedOffset); } int hostCount, recordSize; @@ -150,11 +152,11 @@ static boolean tryRead(EntryHolder into, } int variableSize = EntrySerializer.variableEntrySize(hostCount, recordSize); - if (buffer.remaining() < variableSize) - return handleReadException(new EOFException(), buffer.limit(), syncedOffset); + if (from.remaining() < variableSize) + return handleReadException(new EOFException(), from.limit(), syncedOffset); - updateChecksum(crc, buffer, buffer.position(), variableSize - TypeSizes.INT_SIZE); - int variableCrc = buffer.getInt(buffer.position() + variableSize - TypeSizes.INT_SIZE); + updateChecksum(crc, from, from.position(), variableSize - TypeSizes.INT_SIZE); + int variableCrc = from.getInt(from.position() + variableSize - TypeSizes.INT_SIZE); try { @@ -162,7 +164,7 @@ static boolean tryRead(EntryHolder into, } catch (IOException e) { - return handleReadException(e, buffer.position() + variableSize, syncedOffset); + return handleReadException(e, from.position() + variableSize, syncedOffset); } for (int i = 0; i < hostCount; i++) @@ -179,9 +181,10 @@ static boolean tryRead(EntryHolder into, throw new AssertionError(); // can't happen } - into.value = buffer.duplicate() - .position(buffer.position() - recordSize) - .limit(buffer.position()); + into.value = from.duplicate() + .position(from.position() - recordSize) + .limit(from.position()); + into.userVersion = userVersion; in.skipBytesFully(TypeSizes.INT_SIZE); return true; @@ -210,13 +213,15 @@ static int variableEntrySize(int hostCount, int recordSize) + TypeSizes.INT_SIZE; // CRC } - static final class EntryHolder + public static final class EntryHolder { - K key; - ByteBuffer value; - IntHashSet hosts = new IntHashSet(); + public K key; + public ByteBuffer value; + public IntHashSet hosts = new IntHashSet(); - void clear() + public int userVersion; + + public void clear() { key = null; value = null; diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java index 5417bfea408c..2c71d8c4ffd6 100644 --- a/src/java/org/apache/cassandra/journal/InMemoryIndex.java +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -131,14 +131,14 @@ static InMemoryIndex rebuild(Descriptor descriptor, KeySupport keySupp { InMemoryIndex index = new InMemoryIndex<>(keySupport, new TreeMap<>(keySupport)); - try (StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, keySupport, fsyncedLimit)) + try (StaticSegment.SequentialReader reader = StaticSegment.sequentialReader(descriptor, keySupport, fsyncedLimit)) { int last = -1; while (reader.advance()) { int current = reader.offset(); if (last >= 0) - index.update(reader.id(), last, current); + index.update(reader.key(), last, current); last = current; } diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java index f42a42d5edda..bf6ab5d0c11e 100644 --- a/src/java/org/apache/cassandra/journal/Index.java +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -22,6 +22,7 @@ import org.apache.cassandra.utils.Closeable; import static com.google.common.collect.Iterables.any; + /** * Mapping of client supplied ids to in-segment offsets */ @@ -85,15 +86,6 @@ boolean mayContainIds(Iterable ids) return any(ids, this::mayContainId); } - interface IndexIterator - { - boolean hasNext(); - K currentKey(); - int currentOffset(); - int currentSize(); - void next(); - } - /** * Helper methods */ @@ -118,7 +110,7 @@ public static int readSize(long record) public static long writeSize(long record, int size) { record &= 0xffffffff00000000L; // unset all lower bits - record |= (long) size; + record |= size; return record; } @@ -126,5 +118,4 @@ public static long composeOffsetAndSize(int offset, int size) { return writeSize(writeOffset(0, offset), size); } - } diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 50a3058ec9ee..0baf8e5af323 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -18,12 +18,11 @@ package org.apache.cassandra.journal; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import java.nio.file.FileStore; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -53,7 +52,6 @@ import org.apache.cassandra.journal.Segments.ReferencedSegment; import org.apache.cassandra.journal.Segments.ReferencedSegments; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.SavedCommand; import org.apache.cassandra.utils.Crc; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Simulate; @@ -100,8 +98,7 @@ public class Journal implements Shutdownable final Metrics metrics; final Flusher flusher; - //final Invalidator invalidator; - //final Compactor compactor; + final Compactor compactor; volatile long replayLimit; final AtomicLong nextSegmentId = new AtomicLong(); @@ -120,7 +117,6 @@ public class Journal implements Shutdownable private final FlusherCallbacks flusherCallbacks; SequentialExecutorPlus closer; - //private final Set invalidations = Collections.newSetFromMap(new ConcurrentHashMap<>()); private class FlusherCallbacks implements Flusher.Callbacks { @@ -180,7 +176,8 @@ public Journal(String name, File directory, Params params, KeySupport keySupport, - ValueSerializer valueSerializer) + ValueSerializer valueSerializer, + SegmentCompactor segmentCompactor) { this.name = name; this.directory = directory; @@ -192,8 +189,7 @@ public Journal(String name, this.metrics = new Metrics<>(name); this.flusherCallbacks = new FlusherCallbacks(); this.flusher = new Flusher<>(this, flusherCallbacks); - //this.invalidator = new Invalidator<>(this); - //this.compactor = new Compactor<>(this); + this.compactor = new Compactor<>(this, segmentCompactor); } public boolean isFlushed(RecordPointer recordPointer) @@ -229,8 +225,13 @@ public void start() allocator = executorFactory().infiniteLoop(name + "-allocator", new AllocateRunnable(), SAFE, NON_DAEMON, SYNCHRONIZED); advanceSegment(null); flusher.start(); - //invalidator.start(); - //compactor.start(); + compactor.start(); + } + + @VisibleForTesting + void runCompactorForTesting() + { + compactor.run(); } /** @@ -254,8 +255,8 @@ public void shutdown() { allocator.shutdown(); allocator.awaitTermination(1, TimeUnit.MINUTES); - //compactor.stop(); - //invalidator.stop(); + compactor.shutdown(); + compactor.awaitTermination(1, TimeUnit.MINUTES); flusher.shutdown(); closer.shutdown(); closer.awaitTermination(1, TimeUnit.MINUTES); @@ -284,34 +285,6 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted return r; } - /** - * Read an entry by its address (segment timestamp + offest) - * - * @return deserialized record if present, null otherwise - */ - public V read(long segmentTimestamp, int offset, int size) - { - try (ReferencedSegment referenced = selectAndReference(segmentTimestamp)) - { - Segment segment = referenced.segment(); - if (null == segment) - return null; - - EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - segment.read(offset, size, holder); - - try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) - { - return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); - } - catch (IOException e) - { - // can only throw if serializer is buggy - throw new RuntimeException(e); - } - } - } - /** * Looks up a record by the provided id. *

      @@ -324,19 +297,20 @@ public V read(long segmentTimestamp, int offset, int size) * @param id user-provided record id, expected to roughly correlate with time and go up * @return deserialized record if found, null otherwise */ + @SuppressWarnings("unused") public V readFirst(K id) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.all()) + for (Segment segment : segments.allSorted()) { if (segment.readFirst(id, holder)) { try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) { - return valueSerializer.deserialize(holder.key, in, segment.descriptor.userVersion); + return valueSerializer.deserialize(holder.key, in, holder.userVersion); } catch (IOException e) { @@ -349,36 +323,34 @@ public V readFirst(K id) return null; } - public List readAll(K id) - { - List res = new ArrayList<>(2); - readAll(id, (in, userVersion) -> res.add(valueSerializer.deserialize(id, in, userVersion))); - return res; - } - - public void readAll(K id, Reader reader) + public void readAll(K id, RecordConsumer consumer) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.all()) + consumer.init(); + + for (Segment segment : segments.allSorted()) + segment.readAll(id, holder, consumer); + } + } + + @SuppressWarnings("unused") + public List readAll(K id) + { + List res = new ArrayList<>(2); + readAll(id, (segment, position, key, buffer, hosts, userVersion) -> { + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) { - segment.readAll(id, holder, () -> { - try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) - { - Invariants.checkState(Objects.equals(holder.key, id), - "%s != %s", holder.key, id); - reader.read(in, segment.descriptor.userVersion); - holder.clear(); - } - catch (IOException e) - { - // can only throw if serializer is buggy - throw new RuntimeException(e); - } - }); + res.add(valueSerializer.deserialize(key, in, userVersion)); } - } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + }); + return res; } /** @@ -394,6 +366,7 @@ public void readAll(K id, Reader reader) * @param condition predicate to test the record against * @return deserialized record if found, null otherwise */ + @SuppressWarnings("unused") public V readFirstMatching(K id, Predicate condition) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); @@ -441,6 +414,7 @@ public V readFirstMatching(K id, Predicate condition) * @param consumer function to consume the raw record (bytes and invalidation set) if found * @return true if the record was found, false otherwise */ + @SuppressWarnings("unused") public boolean readFirst(K id, RecordConsumer consumer) { try (ReferencedSegments segments = selectAndReference(id)) @@ -457,6 +431,7 @@ public boolean readFirst(K id, RecordConsumer consumer) * * @return subset of ids to test that have been found in the journal */ + @SuppressWarnings("unused") public Set test(Set test) { Set present = new ObjectHashSet<>(test.size() + 1, 0.9f); @@ -515,19 +490,7 @@ public void blockingWrite(K id, V record, Set hosts) */ public RecordPointer asyncWrite(K id, V record, Set hosts) { - return asyncWrite(id, new SavedCommand.Writer<>() - { - public void write(DataOutputPlus out, int userVersion) throws IOException - { - valueSerializer.serialize(id, record, out, params.userVersion()); - } - - public K key() - { - return id; - } - }, - hosts); + return asyncWrite(id, (out, userVersion) -> valueSerializer.serialize(id, record, out, userVersion), hosts); } public RecordPointer asyncWrite(K id, Writer writer, Set hosts) @@ -548,7 +511,6 @@ public RecordPointer asyncWrite(K id, Writer writer, Set hosts) return recordPointer; } - private ActiveSegment.Allocation allocate(int entrySize, Set hosts) { ActiveSegment segment = currentSegment; @@ -658,6 +620,11 @@ private void runNormal() throws InterruptedException Thread.yield(); } } + catch (JournalWriteError e) + { + if (!(e.getCause() instanceof ClosedByInterruptException)) + throw e; + } catch (Throwable t) { if (!handleError("Failed allocating journal segments", t)) @@ -727,26 +694,38 @@ private void closeAllSegments() } /** - * Select segments that could potentially have any entry with the specified ids and + * Select segments that could potentially have any entry with the specified id and * attempt to grab references to them all. * * @return a subset of segments with references to them */ - ReferencedSegments selectAndReference(Iterable ids) + ReferencedSegments selectAndReference(K id) { while (true) { - ReferencedSegments referenced = segments().selectAndReference(ids); + ReferencedSegments referenced = segments().selectAndReference(s -> s.index().mayContainId(id)); if (null != referenced) return referenced; } } - ReferencedSegments selectAndReference(K id) + /** + * Select segments that could potentially have any entry with the specified ids and + * attempt to grab references to them all. + * + * @return a subset of segments with references to them + */ + ReferencedSegments selectAndReference(Iterable ids) { - return selectAndReference(Collections.singleton(id)); + while (true) + { + ReferencedSegments referenced = segments().selectAndReference(s -> s.index().mayContainIds(ids)); + if (null != referenced) + return referenced; + } } + @SuppressWarnings("unused") ReferencedSegment selectAndReference(long segmentTimestamp) { while (true) @@ -757,7 +736,7 @@ ReferencedSegment selectAndReference(long segmentTimestamp) } } - private Segments segments() + Segments segments() { return segments.get(); } @@ -784,9 +763,9 @@ private void replaceCompletedSegment(ActiveSegment activeSegment, StaticSe swapSegments(current -> current.withCompletedSegment(activeSegment, staticSegment)); } - private void replaceCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + void replaceCompactedSegments(Collection> oldSegments, Collection> compactedSegments) { - swapSegments(current -> current.withCompactedSegment(oldSegment, newSegment)); + swapSegments(current -> current.withCompactedSegments(oldSegments, compactedSegments)); } void selectSegmentToFlush(Collection> into) @@ -836,7 +815,7 @@ else if (timestamp > currentSegmentTimestamp) if (segment == null) throw new IllegalArgumentException("Request the active segment " + timestamp + " but this segment does not exist"); if (!segment.isActive()) - throw new IllegalArgumentException("Request the active segment " + timestamp + " but this segment is not active"); + throw new IllegalArgumentException(String.format("Request the active segment %d but this segment is not active: %s", timestamp, segment)); return segment.asActive(); } } @@ -975,9 +954,4 @@ public interface Writer { void write(DataOutputPlus out, int userVersion) throws IOException; } - - public interface Reader - { - void read(DataInputBuffer in, int userVersion) throws IOException; - } } diff --git a/src/java/org/apache/cassandra/journal/Metadata.java b/src/java/org/apache/cassandra/journal/Metadata.java index bc521cc83c4c..e8224ca64e50 100644 --- a/src/java/org/apache/cassandra/journal/Metadata.java +++ b/src/java/org/apache/cassandra/journal/Metadata.java @@ -191,7 +191,7 @@ static Metadata rebuild(Descriptor descriptor, KeySupport keySupport, int Int2IntHashMap recordsPerHost = new Int2IntHashMap(Integer.MIN_VALUE); int recordsCount = 0; - try (StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, keySupport, fsyncedLimit)) + try (StaticSegment.SequentialReader reader = StaticSegment.sequentialReader(descriptor, keySupport, fsyncedLimit)) { while (reader.advance()) { diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java index ba769d6163b5..fe2c2713b99f 100644 --- a/src/java/org/apache/cassandra/journal/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -256,7 +256,7 @@ public long lookUpFirst(K id) public long[] lookUpAll(K id) { if (!mayContainId(id)) - return new long[0]; + return EMPTY; int start = binarySearch(id); int firstKeyIndex = start; @@ -265,7 +265,7 @@ public long[] lookUpAll(K id) firstKeyIndex = i; if (firstKeyIndex < 0) - return new long[0]; + return EMPTY; int lastKeyIndex = start; @@ -282,56 +282,61 @@ public long[] lookUpAll(K id) return all; } - public IndexIterator iterator() + IndexReader reader() { - return new IndexIteratorImpl(); + return new IndexReader(); } - private class IndexIteratorImpl implements IndexIterator + public class IndexReader { - int currentIdx; - K currentKey; - int currentOffset; - int currentSize; + int idx; + K key; + int offset; + int size; - IndexIteratorImpl() + IndexReader() { - currentIdx = -1; + idx = -1; } - @Override - public boolean hasNext() + public K key() { - return currentIdx < (entryCount - 1); + ensureAdvanced(); + return key; } - @Override - public K currentKey() + public int offset() { - return currentKey; + ensureAdvanced(); + return offset; } - @Override - public int currentOffset() + public int recordSize() { - return currentOffset; + ensureAdvanced(); + return size; } - @Override - public int currentSize() + public boolean advance() { - return currentSize; + if (idx >= entryCount - 1) + return false; + + idx++; + key = keyAtIndex(idx); + long record = recordAtIndex(idx); + offset = Index.readOffset(record); + size = Index.readSize(record); + return true; } - public void next() + private void ensureAdvanced() { - currentIdx++; - currentKey = keyAtIndex(currentIdx); - long record = recordAtIndex(currentIdx); - currentOffset = Index.readOffset(record); - currentSize = Index.readSize(record); + if (idx < 0) + throw new IllegalStateException("Must call advance() before accessing entry content"); } } + private K keyAtIndex(int index) { return keySupport.deserialize(buffer, FILE_PREFIX_SIZE + index * ENTRY_SIZE, descriptor.userVersion); diff --git a/src/java/org/apache/cassandra/journal/Params.java b/src/java/org/apache/cassandra/journal/Params.java index 17e719ce5d7f..56bacce1d9e2 100644 --- a/src/java/org/apache/cassandra/journal/Params.java +++ b/src/java/org/apache/cassandra/journal/Params.java @@ -38,6 +38,10 @@ enum FailurePolicy { STOP, STOP_JOURNAL, IGNORE, DIE } */ FlushMode flushMode(); + boolean enableCompaction(); + + int compactionPeriodMillis(); + /** * @return milliseconds between journal flushes */ diff --git a/src/java/org/apache/cassandra/journal/RecordConsumer.java b/src/java/org/apache/cassandra/journal/RecordConsumer.java index e16194001dd2..3403cd0f2301 100644 --- a/src/java/org/apache/cassandra/journal/RecordConsumer.java +++ b/src/java/org/apache/cassandra/journal/RecordConsumer.java @@ -24,5 +24,6 @@ @FunctionalInterface public interface RecordConsumer { + default void init() {} void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion); } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index e548c521286b..0da59118b701 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -24,7 +24,7 @@ import org.apache.cassandra.utils.*; import org.apache.cassandra.utils.concurrent.RefCounted; -abstract class Segment implements Closeable, RefCounted> +public abstract class Segment implements Closeable, RefCounted> { final File file; final Descriptor descriptor; @@ -64,7 +64,7 @@ boolean readFirst(K id, RecordConsumer consumer) EntrySerializer.EntryHolder into = new EntrySerializer.EntryHolder<>(); int offset = Index.readOffset(offsetAndSize); - int size = Index.readSize(offset); + int size = Index.readSize(offsetAndSize); if (read(offset, size, into)) { Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); @@ -83,18 +83,19 @@ boolean readFirst(K id, EntrySerializer.EntryHolder into) return true; } - void readAll(K id, EntrySerializer.EntryHolder into, Runnable onEntry) + void readAll(K id, EntrySerializer.EntryHolder into, RecordConsumer onEntry) { long[] all = index().lookUpAll(id); - for (int i = 0; i < all.length; i++) { int offset = Index.readOffset(all[i]); int size = Index.readSize(all[i]); Invariants.checkState(read(offset, size, into), "Read should always return true"); - onEntry.run(); + onEntry.accept(descriptor.timestamp, offset, into.key, into.value, into.hosts, into.userVersion); } } abstract boolean read(int offset, int size, EntrySerializer.EntryHolder into); + + abstract void release(); } diff --git a/src/java/org/apache/cassandra/journal/SegmentCompactor.java b/src/java/org/apache/cassandra/journal/SegmentCompactor.java new file mode 100644 index 000000000000..5c95b539fcf3 --- /dev/null +++ b/src/java/org/apache/cassandra/journal/SegmentCompactor.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.util.Collection; + +public interface SegmentCompactor +{ + SegmentCompactor NOOP = (SegmentCompactor) (segments, keySupport) -> segments; + + static SegmentCompactor noop() + { + //noinspection unchecked + return (SegmentCompactor) NOOP; + } + + Collection> compact(Collection> segments, KeySupport keySupport) throws IOException; +} diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index 18dfc3bdaf9c..a779aebf23fd 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -17,7 +17,11 @@ */ package org.apache.cassandra.journal; +import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.function.Predicate; import accord.utils.Invariants; import org.agrona.collections.Long2ObjectHashMap; @@ -68,27 +72,37 @@ Segments withCompletedSegment(ActiveSegment activeSegment, StaticSeg return new Segments<>(newSegments); } - Segments withCompactedSegment(StaticSegment oldSegment, StaticSegment newSegment) + Segments withCompactedSegments(Collection> oldSegments, Collection> compactedSegments) { - Invariants.checkArgument(oldSegment.descriptor.timestamp == newSegment.descriptor.timestamp); - Invariants.checkArgument(oldSegment.descriptor.generation < newSegment.descriptor.generation); Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); - Segment oldValue = newSegments.put(newSegment.descriptor.timestamp, newSegment); - Invariants.checkState(oldValue == oldSegment); + for (StaticSegment oldSegment : oldSegments) + { + Segment oldValue = newSegments.remove(oldSegment.descriptor.timestamp); + Invariants.checkState(oldValue == oldSegment); + } + + for (StaticSegment compactedSegment : compactedSegments) + { + Segment oldValue = newSegments.put(compactedSegment.descriptor.timestamp, compactedSegment); + Invariants.checkState(oldValue == null); + } + return new Segments<>(newSegments); } - Segments withoutInvalidatedSegment(StaticSegment staticSegment) + Iterable> all() { - Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); - if (!newSegments.remove(staticSegment.descriptor.timestamp, staticSegment)) - throw new IllegalStateException(); - return new Segments<>(newSegments); + return this.segments.values(); } - Iterable> all() + /** + * Returns segments in timestamp order. Will allocate and sort the segment collection. + */ + List> allSorted() { - return segments.values(); + List> segments = new ArrayList<>(this.segments.values()); + segments.sort(Comparator.comparing(s -> s.descriptor)); + return segments; } void selectActive(long maxTimestamp, Collection> into) @@ -136,12 +150,12 @@ void selectStatic(Collection> into) * @return a subset of segments with references to them, or {@code null} if failed to grab the refs */ @SuppressWarnings("resource") - ReferencedSegments selectAndReference(Iterable ids) + ReferencedSegments selectAndReference(Predicate> test) { Long2ObjectHashMap> selectedSegments = null; for (Segment segment : segments.values()) { - if (segment.index().mayContainIds(ids)) + if (test.test(segment)) { if (null == selectedSegments) selectedSegments = newMap(10); diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index f63701771ca9..c7ac7ce4103b 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -23,7 +23,10 @@ import java.nio.channels.FileChannel; import java.nio.file.NoSuchFileException; import java.nio.file.StandardOpenOption; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.locks.LockSupport; import org.agrona.collections.IntHashSet; import org.apache.cassandra.io.util.DataInputBuffer; @@ -38,7 +41,7 @@ * Can be compacted with input from {@code PersistedInvalidations} into a new smaller segment, * with invalidated entries removed. */ -final class StaticSegment extends Segment +public final class StaticSegment extends Segment { final FileChannel channel; @@ -111,7 +114,6 @@ static StaticSegment open(Descriptor descriptor, KeySupport keyS } } - @SuppressWarnings("resource") private static StaticSegment internalOpen( Descriptor descriptor, SyncedOffsets syncedOffsets, OnDiskIndex index, Metadata metadata, KeySupport keySupport) throws IOException @@ -125,7 +127,43 @@ private static StaticSegment internalOpen( @Override public void close() { - selfRef.release(); + try + { + channel.close(); + } + catch (IOException e) + { + throw new RuntimeException("Could not close static segment " + descriptor, e); + } + + release(); + } + + /** + * Waits until this segment is unreferenced, closes it, and deltes all files associated with it. + */ + void discard() + { + // TODO: consider moving deletion logic to Tidier instead of busy-looping here + waitUntilUnreferenced(); + close(); + for (Component component : Component.values()) + { + File file = descriptor.fileFor(component); + if (file.exists()) + file.delete(); + } + } + + public void waitUntilUnreferenced() + { + while (true) + { + if (selfRef.globalCount() == 1) + return; + + LockSupport.parkNanos(100); + } } @Override @@ -140,6 +178,18 @@ public Ref> ref() return selfRef.ref(); } + @Override + void release() + { + selfRef.release(); + } + + @Override + public String toString() + { + return "StaticSegment{" + descriptor + '}'; + } + private static final class Tidier implements Tidy { private final Descriptor descriptor; @@ -223,57 +273,38 @@ boolean read(int offset, int size, EntrySerializer.EntryHolder into) */ void forEachRecord(RecordConsumer consumer) { - try (SequentialReader reader = reader(descriptor, keySupport, syncedOffsets.syncedOffset())) + try (SequentialReader reader = sequentialReader(descriptor, keySupport, syncedOffsets.syncedOffset())) { while (reader.advance()) { - consumer.accept(descriptor.timestamp, reader.offset(), reader.id(), reader.record(), reader.hosts(), descriptor.userVersion); + consumer.accept(descriptor.timestamp, reader.offset(), reader.key(), reader.record(), reader.hosts(), descriptor.userVersion); } } } /* - * Sequential reading (replay and components rebuild) + * Sequential and in-key order reading (replay and components rebuild) */ - static SequentialReader reader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) - { - return SequentialReader.open(descriptor, keySupport, fsyncedLimit); - } - - /** - * A sequential data segment reader to use for journal replay and rebuilding - * missing auxilirary components (index and metadata). - *

      - * Unexpected EOF and CRC mismatches in synced portions of segments are treated - * strictly, throwing {@link JournalReadError}. Errors encountered in unsynced portions - * of segments are treated as segment EOF. - */ - static final class SequentialReader implements Closeable + static abstract class Reader implements Closeable { - private final Descriptor descriptor; - private final KeySupport keySupport; - private final int fsyncedLimit; // exclusive + enum State { RESET, ADVANCED, EOF } - private final File file; - private final FileChannel channel; - private final MappedByteBuffer buffer; - private final DataInputBuffer in; + public final Descriptor descriptor; + protected final KeySupport keySupport; - private int offset = -1; - private final EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - private State state = State.RESET; + protected final File file; + protected final FileChannel channel; + protected final MappedByteBuffer buffer; - static SequentialReader open(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) - { - return new SequentialReader<>(descriptor, keySupport, fsyncedLimit); - } + protected final EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); + protected int offset = -1; + protected State state = State.RESET; - SequentialReader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + Reader(Descriptor descriptor, KeySupport keySupport) { this.descriptor = descriptor; this.keySupport = keySupport; - this.fsyncedLimit = fsyncedLimit; file = descriptor.fileFor(Component.DATA); try @@ -289,7 +320,6 @@ static SequentialReader open(Descriptor descriptor, KeySupport keySupp { throw new JournalReadError(descriptor, file, e); } - in = new DataInputBuffer(buffer, false); } @Override @@ -299,37 +329,72 @@ public void close() FileUtils.clean(buffer); } - int offset() + public abstract boolean advance(); + + public int offset() { ensureHasAdvanced(); return offset; } - K id() + public K key() { ensureHasAdvanced(); return holder.key; } - IntHashSet hosts() + public IntHashSet hosts() { ensureHasAdvanced(); return holder.hosts; } - ByteBuffer record() + public ByteBuffer record() { ensureHasAdvanced(); return holder.value; } - private void ensureHasAdvanced() + protected void ensureHasAdvanced() { if (state != State.ADVANCED) throw new IllegalStateException("Must call advance() before accessing entry content"); } - boolean advance() + protected boolean eof() + { + state = State.EOF; + return false; + } + } + + static SequentialReader sequentialReader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + return new SequentialReader<>(descriptor, keySupport, fsyncedLimit); + } + + /** + * A sequential data segment reader to use for journal replay and rebuilding + * missing auxilirary components (index and metadata). + *

      + * Unexpected EOF and CRC mismatches in synced portions of segments are treated + * strictly, throwing {@link JournalReadError}. Errors encountered in unsynced portions + * of segments are treated as segment EOF. + */ + static final class SequentialReader extends Reader + { + private final int fsyncedLimit; // exclusive + private final DataInputBuffer in; + + SequentialReader(Descriptor descriptor, KeySupport keySupport, int fsyncedLimit) + { + super(descriptor, keySupport); + this.fsyncedLimit = fsyncedLimit; + in = new DataInputBuffer(buffer, false); + } + + @Override + public boolean advance() { if (state == State.EOF) return false; @@ -361,13 +426,56 @@ private void reset() holder.clear(); state = State.RESET; } + } + + public StaticSegment.KeyOrderReader keyOrderReader() + { + return new StaticSegment.KeyOrderReader<>(descriptor, keySupport, index.reader()); + } + + public static final class KeyOrderReader extends Reader implements Comparable> + { + private final OnDiskIndex.IndexReader indexReader; + + KeyOrderReader(Descriptor descriptor, KeySupport keySupport, OnDiskIndex.IndexReader indexReader) + { + super(descriptor, keySupport); + this.indexReader = indexReader; + } - private boolean eof() + @Override + public boolean advance() { - state = State.EOF; - return false; + if (!indexReader.advance()) + return eof(); + + offset = indexReader.offset(); + + buffer.limit(offset + indexReader.recordSize()) + .position(offset); + try + { + EntrySerializer.read(holder, keySupport, buffer, descriptor.userVersion); + } + catch (IOException e) + { + throw new JournalReadError(descriptor, file, e); + } + + state = State.ADVANCED; + return true; } - enum State { RESET, ADVANCED, EOF } + @Override + public int compareTo(KeyOrderReader that) + { + this.ensureHasAdvanced(); + that.ensureHasAdvanced(); + + int cmp = keySupport.compare(this.key(), that.key()); + return cmp != 0 + ? cmp + : this.descriptor.compareTo(that.descriptor); + } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 932d31356a17..80ff9739cee0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; @@ -78,7 +77,8 @@ public class AccordJournal implements IJournal, Shutdownable static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[22]); - public final Journal journal; + private final Journal journal; + private final AccordJournalTable journalTable; private final AccordEndpointMapper endpointMapper; private final DelayedRequestProcessor delayedRequestProcessor = new DelayedRequestProcessor(); @@ -94,24 +94,26 @@ public AccordJournal(AccordEndpointMapper endpointMapper, Params params) File directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); this.journal = new Journal<>("AccordJournal", directory, params, JournalKey.SUPPORT, // In Accord, we are using streaming serialization, i.e. Reader/Writer interfaces instead of materializing objects - new ValueSerializer() + new ValueSerializer<>() { public int serializedSize(JournalKey key, Object value, int userVersion) { throw new UnsupportedOperationException(); } - public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) throws IOException + public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) { throw new UnsupportedOperationException(); } - public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throws IOException + public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) { throw new UnsupportedOperationException(); } - }); + }, + new AccordSegmentCompactor<>()); this.endpointMapper = endpointMapper; + this.journalTable = new AccordJournalTable<>(journal, JournalKey.SUPPORT, params.userVersion()); } public AccordJournal start(Node node) @@ -153,7 +155,7 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted { try { - ExecutorUtils.awaitTermination(timeout, units, Arrays.asList(journal)); + ExecutorUtils.awaitTermination(timeout, units, Collections.singletonList(journal)); return true; } catch (TimeoutException e) @@ -191,9 +193,9 @@ public Command loadCommand(int commandStoreId, TxnId txnId) @VisibleForTesting public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) { + JournalKey key = new JournalKey(txnId, commandStoreId); SavedCommand.Builder builder = new SavedCommand.Builder(); - journal.readAll(new JournalKey(txnId, commandStoreId), - builder::deserializeNext); + journalTable.readAll(key, (ignore, in, userVersion) -> builder.deserializeNext(in, userVersion)); return builder; } @@ -240,7 +242,7 @@ public void sanityCheck(int commandStoreId, Command orig) // We can only use strict equality if we supply result. Command reconstructed = diffs.construct(); Invariants.checkState(orig.equals(reconstructed), - "\n" + + '\n' + "Original: %s\n" + "Reconstructed: %s\n" + "Diffs: %s", orig, reconstructed, diffs); @@ -255,6 +257,7 @@ public void sanityCheck(int commandStoreId, Command orig) /* * Context necessary to process log records */ + static abstract class RequestContext implements ReplyContext { final long waitForEpoch; @@ -351,7 +354,7 @@ private class DelayedRequestProcessor implements Interruptible.Task public void start() { - executor = executorFactory().infiniteLoop("AccordJournal-delayed-request-processor", this::run, SAFE, InfiniteLoopExecutor.Daemon.NON_DAEMON, InfiniteLoopExecutor.Interrupts.SYNCHRONIZED); + executor = executorFactory().infiniteLoop("AccordJournal-delayed-request-processor", this, SAFE, InfiniteLoopExecutor.Daemon.NON_DAEMON, InfiniteLoopExecutor.Interrupts.SYNCHRONIZED); } private void delay(RequestContext requestContext) @@ -457,9 +460,9 @@ private void shutdown() } } - public boolean isRunnable(Status status) + private boolean isRunnable(Status status) { - return status != Status.TERMINATING && status != status.TERMINATED; + return status != Status.TERMINATING && status != Status.TERMINATED; } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java new file mode 100644 index 000000000000..642f49e417ce --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import accord.utils.Invariants; +import org.agrona.collections.IntHashSet; +import org.agrona.collections.LongHashSet; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.ColumnFamilyStore.RefViewFragment; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.StorageHook; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.SSTableSet; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.db.rows.UnfilteredRowIterators; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.journal.EntrySerializer.EntryHolder; +import org.apache.cassandra.journal.Journal; +import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.journal.RecordConsumer; +import org.apache.cassandra.schema.ColumnMetadata; + +import static org.apache.cassandra.io.sstable.SSTableReadsListener.NOOP_LISTENER; + +public class AccordJournalTable +{ + private static final IntHashSet SENTINEL_HOSTS = new IntHashSet(); + + private final Journal journal; + private final ColumnFamilyStore cfs; + + private final ColumnMetadata recordColumn; + private final ColumnMetadata versionColumn; + + private final KeySupport keySupport; + private final int accordJournalVersion; + + public AccordJournalTable(Journal journal, KeySupport keySupport, int accordJournalVersion) + { + this.journal = journal; + this.cfs = Keyspace.open(AccordKeyspace.metadata().name).getColumnFamilyStore(AccordKeyspace.JOURNAL); + this.recordColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("record", false)); + this.versionColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("user_version", false)); + this.keySupport = keySupport; + this.accordJournalVersion = accordJournalVersion; + } + + public interface Reader + { + void read(K key, DataInputPlus input, int userVersion) throws IOException; + } + + private abstract class AbstractRecordConsumer implements RecordConsumer + { + protected final Reader reader; + + AbstractRecordConsumer(Reader reader) + { + this.reader = reader; + } + + @Override + public void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion) + { + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + { + reader.read(key, in, userVersion); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + } + } + + private class TableRecordConsumer extends AbstractRecordConsumer + { + protected LongHashSet visited = null; + + TableRecordConsumer(Reader reader) + { + super(reader); + } + + void visit(long segment) + { + if (visited == null) + visited = new LongHashSet(); + visited.add(segment); + } + + boolean visited(long segment) + { + return visited != null && visited.contains(segment); + } + + @Override + public void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion) + { + visit(segment); + super.accept(segment, position, key, buffer, hosts, userVersion); + } + } + + private class JournalAndTableRecordConsumer extends AbstractRecordConsumer + { + private final K key; + private final TableRecordConsumer tableRecordConsumer; + + JournalAndTableRecordConsumer(K key, Reader reader) + { + super(reader); + this.key = key; + this.tableRecordConsumer = new TableRecordConsumer(reader); + } + + @Override + public void init() + { + readAllFromTable(key, tableRecordConsumer); + } + + @Override + public void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion) + { + if (!tableRecordConsumer.visited(segment)) + super.accept(segment, position, key, buffer, hosts, userVersion); + } + } + + /** + * Perform a read from Journal table, followed by the reads from all journal segments. + *

      + * When reading from journal segments, skip descriptors that were read from the table. + */ + public void readAll(K key, Reader reader) + { + journal.readAll(key, new JournalAndTableRecordConsumer(key, reader)); + } + + private void readAllFromTable(K key, TableRecordConsumer onEntry) + { + DecoratedKey pk = makePartitionKey(cfs, key, keySupport, accordJournalVersion); + + try (RefViewFragment view = cfs.selectAndReference(View.select(SSTableSet.LIVE, pk))) + { + if (view.sstables.isEmpty()) + return; + + List iters = new ArrayList<>(view.sstables.size()); + for (SSTableReader sstable : view.sstables) + if (sstable.mayContainAssumingKeyIsInRange(pk)) + iters.add(StorageHook.instance.makeRowIterator(cfs, sstable, pk, Slices.ALL, ColumnFilter.all(cfs.metadata()), false, NOOP_LISTENER)); + + if (!iters.isEmpty()) + { + EntryHolder into = new EntryHolder<>(); + try (UnfilteredRowIterator iter = UnfilteredRowIterators.merge(iters)) + { + while (iter.hasNext()) readRow(key, iter.next(), into, onEntry); + } + } + } + } + + public static DecoratedKey makePartitionKey(ColumnFamilyStore cfs, K key, KeySupport keySupport, int version) + { + try (DataOutputBuffer out = new DataOutputBuffer(keySupport.serializedSize(version))) + { + keySupport.serialize(key, out, version); + return cfs.decorateKey(out.buffer(false)); + } + catch (IOException e) + { + // can only throw if (key) serializer is buggy + throw new RuntimeException("Could not serialize key " + key + ", this shouldn't be possible", e); + } + } + + private void readRow(K key, Unfiltered unfiltered, EntryHolder into, RecordConsumer onEntry) + { + Invariants.checkState(unfiltered.isRow()); + Row row = (Row) unfiltered; + + long descriptor = LongType.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(0))); + int position = Int32Type.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(1))); + + into.key = key; + into.value = row.getCell(recordColumn).buffer(); + into.hosts = SENTINEL_HOSTS; + into.userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); + + onEntry.accept(descriptor, position, into.key, into.value, into.hosts, into.userVersion); + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 84b096b71685..fd4771e3b01e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -170,6 +170,7 @@ public class AccordKeyspace { private static final Logger logger = LoggerFactory.getLogger(AccordKeyspace.class); + public static final String JOURNAL = "journal"; public static final String COMMANDS = "commands"; public static final String TIMESTAMPS_FOR_KEY = "timestamps_for_key"; public static final String COMMANDS_FOR_KEY = "commands_for_key"; @@ -179,7 +180,8 @@ public class AccordKeyspace public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS, TIMESTAMPS_FOR_KEY, COMMANDS_FOR_KEY, TOPOLOGIES, EPOCH_METADATA, - COMMAND_STORE_METADATA); + COMMAND_STORE_METADATA, + JOURNAL); private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); private static final String TIMESTAMP_TUPLE = TIMESTAMP_TYPE.asCQL3Type().toString(); @@ -224,6 +226,21 @@ static TokenType valueOf(Token token) } } + public static final TableMetadata Journal = + parse(JOURNAL, + "accord journal", + "CREATE TABLE %s (" + + "key blob," + + "descriptor bigint," + + "offset int," + + "user_version int," + + "record blob," + + "PRIMARY KEY(key, descriptor, offset)" + + ')') + .partitioner(new LocalPartitioner(BytesType.instance)) + .build(); + + // TODO: store timestamps as blobs (confirm there are no negative numbers, or offset) public static final TableMetadata Commands = parse(COMMANDS, @@ -718,7 +735,7 @@ public static KeyspaceMetadata metadata() public static Tables tables() { - return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata); + return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata, Journal); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java new file mode 100644 index 000000000000..e3f10cb64444 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import java.util.Collection; +import java.util.Collections; +import java.util.PriorityQueue; + +import com.google.common.base.Throwables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.utils.Invariants; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.SerializationHeader; +import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.rows.EncodingStats; +import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.SSTableTxnWriter; +import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.journal.SegmentCompactor; +import org.apache.cassandra.journal.StaticSegment; +import org.apache.cassandra.journal.StaticSegment.KeyOrderReader; + +/** + * Segment compactor: takes static segments and compacts them into a single SSTable. + */ +public class AccordSegmentCompactor implements SegmentCompactor +{ + private static final Logger logger = LoggerFactory.getLogger(AccordSegmentCompactor.class); + + @Override + public Collection> compact(Collection> segments, KeySupport keySupport) + { + Invariants.checkState(segments.size() >= 2, () -> String.format("Can only compact 2 or more segments, but got %d", segments.size())); + logger.info("Compacting {} static segments: {}", segments.size(), segments); + + PriorityQueue> readers = new PriorityQueue<>(); + for (StaticSegment segment : segments) + { + KeyOrderReader reader = segment.keyOrderReader(); + if (reader.advance()) + readers.add(reader); + } + + // nothing to compact (all segments empty, should never happen, but it is theoretically possible?) - exit early + // TODO: investigate how this comes to be, check if there is a cleanup issue + if (readers.isEmpty()) + return segments; + + ColumnFamilyStore cfs = Keyspace.open(AccordKeyspace.metadata().name).getColumnFamilyStore(AccordKeyspace.JOURNAL); + Descriptor descriptor = cfs.newSSTableDescriptor(cfs.getDirectories().getDirectoryForNewSSTables()); + SerializationHeader header = new SerializationHeader(true, cfs.metadata(), cfs.metadata().regularAndStaticColumns(), EncodingStats.NO_STATS); + + try (SSTableTxnWriter writer = SSTableTxnWriter.create(cfs, descriptor, 0, 0, null, false, header)) + { + K key = null; + PartitionUpdate.SimpleBuilder partitionBuilder = null; + + try + { + KeyOrderReader reader; + while ((reader = readers.poll()) != null) + { + if (!reader.key().equals(key)) // first ever - or new - key + { + if (partitionBuilder != null) // append previous partition if any + writer.append(partitionBuilder.build().unfilteredIterator()); + + key = reader.key(); + partitionBuilder = PartitionUpdate.simpleBuilder( + AccordKeyspace.Journal, AccordJournalTable.makePartitionKey(cfs, key, keySupport, reader.descriptor.userVersion) + ); + } + + boolean advanced; + do + { + partitionBuilder.row(reader.descriptor.timestamp, reader.offset()) + .add("record", reader.record()) + .add("user_version", reader.descriptor.userVersion); + } + while ((advanced = reader.advance()) && reader.key().equals(key)); + + if (advanced) readers.offer(reader); // there is more to this reader, but not with this key + } + + //noinspection DataFlowIssue + writer.append(partitionBuilder.build().unfilteredIterator()); // append the last partition + } + catch (Throwable t) + { + Throwable accumulate = writer.abort(t); + Throwables.throwIfUnchecked(accumulate); + throw new RuntimeException(accumulate); + } + + cfs.addSSTables(writer.finish(true)); + return Collections.emptyList(); + } + } +} + diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index e01dcc313778..2daa31e27228 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -22,6 +22,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -127,6 +128,8 @@ public AsyncTxnResult(@Nonnull TxnId txnId) class CompactionInfo { + static final Supplier NO_OP = () -> new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); + public final Int2ObjectHashMap redundantBefores; public final Int2ObjectHashMap ranges; public final DurableBefore durableBefore; diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index aa0c7473965e..c31c33788213 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -36,9 +36,9 @@ public final class JournalKey { - final Timestamp timestamp; + public final Timestamp timestamp; // TODO: command store id _before_ timestamp - final int commandStoreId; + public final int commandStoreId; JournalKey(Timestamp timestamp) { @@ -60,7 +60,7 @@ public final class JournalKey * when ordering timestamps. This is done for more precise elimination of candidate * segments by min/max record key in segment. */ - static final KeySupport SUPPORT = new KeySupport<>() + public static final KeySupport SUPPORT = new KeySupport<>() { private static final int HLC_OFFSET = 0; private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 090e4da75544..bef75a70ef0d 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -259,30 +259,128 @@ private static int setFieldIsNull(Fields field, int oldFlags) return oldFlags | (1 << field.ordinal()); } - public static class Builder { - TxnId txnId = null; + TxnId txnId; + + Timestamp executeAt; + Timestamp executeAtLeast; + SaveStatus saveStatus; + Status.Durability durability; + + Ballot acceptedOrCommitted; + Ballot promised; + + Route route; + PartialTxn partialTxn; + PartialDeps partialDeps; + Seekables additionalKeysOrRanges; + + SavedCommand.WaitingOnProvider waitingOn; + Writes writes; + Result result; + + boolean nextCalled; + int count; + + public Builder() + { + clear(); + } + + public TxnId txnId() + { + return txnId; + } + + public Timestamp executeAt() + { + return executeAt; + } + + public SaveStatus saveStatus() + { + return saveStatus; + } + + public Status.Durability durability() + { + return durability; + } + + public Ballot acceptedOrCommitted() + { + return acceptedOrCommitted; + } + + public Ballot promised() + { + return promised; + } - Timestamp executeAt = null; - Timestamp executeAtLeast = null; - SaveStatus saveStatus = null; - Status.Durability durability = null; + public Route route() + { + return route; + } + + public PartialTxn partialTxn() + { + return partialTxn; + } + + public PartialDeps partialDeps() + { + return partialDeps; + } + + public Seekables additionalKeysOrRanges() + { + return additionalKeysOrRanges; + } + + public SavedCommand.WaitingOnProvider waitingOn() + { + return waitingOn; + } - Ballot acceptedOrCommitted = Ballot.ZERO; - Ballot promised = null; + public Writes writes() + { + return writes; + } + + public Result result() + { + return result; + } + + public void clear() + { + txnId = null; - Route route = null; - PartialTxn partialTxn = null; - PartialDeps partialDeps = null; - Seekables additionalKeysOrRanges = null; + executeAt = null; + saveStatus = null; + durability = null; - SavedCommand.WaitingOnProvider waitingOn = (txn, deps) -> null; - Writes writes = null; - Result result = CommandSerializers.APPLIED; + acceptedOrCommitted = Ballot.ZERO; + promised = null; - boolean nextCalled = false; - int count = 0; + route = null; + partialTxn = null; + partialDeps = null; + additionalKeysOrRanges = null; + + waitingOn = (txn, deps) -> null; + writes = null; + result = CommandSerializers.APPLIED; + + nextCalled = false; + count = 0; + } + + public boolean isEmpty() + { + return !nextCalled; + } public int count() { @@ -455,6 +553,11 @@ public Command construct() throws IOException case PreAccepted: return Command.PreAccepted.preAccepted(attrs, executeAt, promised); case AcceptedInvalidate: + if (saveStatus == SaveStatus.AcceptedInvalidateWithDefinition) + return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); + else + return Command.AcceptedInvalidateWithoutDefinition.acceptedInvalidate(attrs, promised, acceptedOrCommitted); + case Accepted: case PreCommitted: return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java similarity index 98% rename from test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java rename to test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java index 7f7dfb076746..80d48b6091b6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java @@ -31,7 +31,7 @@ import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.utils.concurrent.CountDownLatch; -public class AccordJournalTest extends TestBaseImpl +public class AccordJournalIntegrationTest extends TestBaseImpl { @Test public void saveLoadSanityCheck() throws Throwable diff --git a/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java new file mode 100644 index 000000000000..a2161c438661 --- /dev/null +++ b/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.journal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.service.accord.AccordJournalTable; +import org.apache.cassandra.service.accord.AccordSegmentCompactor; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.TimeUUID; + +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; + +public class AccordJournalCompactionTest +{ + private static final Set SENTINEL_HOSTS = Collections.singleton(0); + + @BeforeClass + public static void setUp() + { + DatabaseDescriptor.daemonInitialization(); + ServerTestUtils.prepareServer(); + } + + @Test + public void segmentMergeTest() throws IOException + { + File directory = new File(Files.createTempDirectory(null)); + directory.deleteOnExit(); + + Journal journal = journal(directory); + AccordJournalTable journalTable = new AccordJournalTable<>(journal, journal.keySupport, journal.params.userVersion()); + journal.start(); + + Map> uuids = new HashMap<>(); + + int count = 0; + for (int i = 0; i < 1024 * 5; i++) + { + TimeUUID uuid = nextTimeUUID(); + for (long j = 0; j < 5; j++) + { + ByteBuffer buf = ByteBuffer.allocate(1024); + for (int k = 0; k < 1024; k++) + buf.put((byte) count); + count++; + buf.rewind(); + uuids.computeIfAbsent(uuid, (k) -> new ArrayList<>()) + .add(buf); + journal.asyncWrite(uuid, buf, SENTINEL_HOSTS); + } + } + + journal.closeCurrentSegmentForTesting(); + Runnable checkAll = () -> { + for (Map.Entry> e : uuids.entrySet()) + { + List expected = e.getValue(); + + List actual = new ArrayList<>(); + journalTable.readAll(e.getKey(), (key, in, userVersion) -> actual.add(journal.valueSerializer.deserialize(key, in, userVersion))); + Assert.assertEquals(actual.size(), expected.size()); + for (int i = 0; i < actual.size(); i++) + { + if (!actual.get(i).equals(expected.get(i))) + { + StringBuilder sb = new StringBuilder(); + sb.append("Actual:\n"); + for (ByteBuffer bb : actual) + sb.append(ByteBufferUtil.bytesToHex(bb)).append('\n'); + sb.append("Expected:\n"); + for (ByteBuffer bb : expected) + sb.append(ByteBufferUtil.bytesToHex(bb)).append('\n'); + throw new AssertionError(sb.toString()); + } + } + } + }; + + checkAll.run(); + journal.runCompactorForTesting(); + checkAll.run(); + journal.shutdown(); + } + + private static Journal journal(File directory) + { + return new Journal<>("TestJournal", directory, + new TestParams() { + @Override + public int segmentSize() + { + return 1024 * 1024; + } + + @Override + public boolean enableCompaction() + { + return false; + } + }, + TimeUUIDKeySupport.INSTANCE, + JournalTest.ByteBufferSerializer.INSTANCE, + new AccordSegmentCompactor<>()); + } +} diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 5a3b23076eec..8ff893864511 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -38,6 +38,7 @@ import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.RecordPointer; +import org.apache.cassandra.journal.SegmentCompactor; import org.apache.cassandra.journal.ValueSerializer; import org.junit.Assert; @@ -78,7 +79,8 @@ public void simpleRWTest() new File("/journal"), new AccordSpec.JournalSpec(), new IdentityKeySerializer(), - new IdentityValueSerializer()); + new IdentityValueSerializer(), + SegmentCompactor.noop()); }), () -> check()); } diff --git a/test/unit/org/apache/cassandra/journal/IndexTest.java b/test/unit/org/apache/cassandra/journal/IndexTest.java index 5e7314a33860..8f1046f2c319 100644 --- a/test/unit/org/apache/cassandra/journal/IndexTest.java +++ b/test/unit/org/apache/cassandra/journal/IndexTest.java @@ -224,20 +224,18 @@ private static void test(File directory, Map map) sortedEntries.add(Pair.create(entry.getKey(), l)); } - Index.IndexIterator iter = onDisk.iterator(); + OnDiskIndex.IndexReader iter = onDisk.reader(); Iterator> expectedIter = sortedEntries.iterator(); - while (iter.hasNext()) + while (iter.advance()) { - iter.next(); Pair expected = expectedIter.next(); - Assert.assertEquals(iter.currentKey(), expected.left); - Assert.assertEquals(iter.currentSize(), Index.readSize(expected.right)); - Assert.assertEquals(iter.currentOffset(), Index.readOffset(expected.right)); + Assert.assertEquals(iter.key(), expected.left); + Assert.assertEquals(iter.recordSize(), Index.readSize(expected.right)); + Assert.assertEquals(iter.offset(), Index.readOffset(expected.right)); } } } - private static void assertIndex(Map expected, Index actual) { List keys = expected.entrySet() diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index 241e465ba89b..bab37ca1504f 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -18,27 +18,33 @@ package org.apache.cassandra.journal; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.util.Collections; +import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; import org.apache.cassandra.utils.TimeUUID; -import static org.junit.Assert.assertEquals; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; +import static org.junit.Assert.assertEquals; public class JournalTest { + private static final Set SENTINEL_HOSTS = Collections.singleton(0); + @BeforeClass public static void setUp() { DatabaseDescriptor.daemonInitialization(); + ServerTestUtils.prepareServer(); } @Test @@ -48,7 +54,8 @@ public void testSimpleReadWrite() throws IOException directory.deleteRecursiveOnExit(); Journal journal = - new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE, SegmentCompactor.noop()); + journal.start(); @@ -69,7 +76,7 @@ public void testSimpleReadWrite() throws IOException journal.shutdown(); - journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE); + journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE, SegmentCompactor.noop()); journal.start(); assertEquals(1L, (long) journal.readFirst(id1)); @@ -80,6 +87,29 @@ public void testSimpleReadWrite() throws IOException journal.shutdown(); } + static class ByteBufferSerializer implements ValueSerializer + { + static final ByteBufferSerializer INSTANCE = new ByteBufferSerializer(); + + public int serializedSize(TimeUUID key, ByteBuffer value, int userVersion) + { + return Integer.BYTES + value.capacity(); + } + + public void serialize(TimeUUID key, ByteBuffer value, DataOutputPlus out, int userVersion) throws IOException + { + out.writeInt(value.capacity()); + out.write(value); + } + + public ByteBuffer deserialize(TimeUUID key, DataInputPlus in, int userVersion) throws IOException + { + byte[] bytes = new byte[in.readInt()]; + in.readFully(bytes); + return ByteBuffer.wrap(bytes); + } + } + static class LongSerializer implements ValueSerializer { static final LongSerializer INSTANCE = new LongSerializer(); diff --git a/test/unit/org/apache/cassandra/journal/SegmentTest.java b/test/unit/org/apache/cassandra/journal/SegmentTest.java index 2e59d701cb75..573ba4c9e059 100644 --- a/test/unit/org/apache/cassandra/journal/SegmentTest.java +++ b/test/unit/org/apache/cassandra/journal/SegmentTest.java @@ -203,26 +203,26 @@ public void testReadClosedSegmentSequentially() throws IOException activeSegment.close(); - StaticSegment.SequentialReader reader = StaticSegment.reader(descriptor, TimeUUIDKeySupport.INSTANCE, 0); + StaticSegment.SequentialReader reader = StaticSegment.sequentialReader(descriptor, TimeUUIDKeySupport.INSTANCE, 0); // read all 4 entries sequentially and compare with originals assertTrue(reader.advance()); - assertEquals(id1, reader.id()); + assertEquals(id1, reader.key()); assertEquals(hosts1, reader.hosts()); assertEquals(record1, reader.record()); assertTrue(reader.advance()); - assertEquals(id2, reader.id()); + assertEquals(id2, reader.key()); assertEquals(hosts2, reader.hosts()); assertEquals(record2, reader.record()); assertTrue(reader.advance()); - assertEquals(id3, reader.id()); + assertEquals(id3, reader.key()); assertEquals(hosts3, reader.hosts()); assertEquals(record3, reader.record()); assertTrue(reader.advance()); - assertEquals(id4, reader.id()); + assertEquals(id4, reader.key()); assertEquals(hosts4, reader.hosts()); assertEquals(record4, reader.record()); diff --git a/test/unit/org/apache/cassandra/journal/TestParams.java b/test/unit/org/apache/cassandra/journal/TestParams.java index 9a9254ce9ba7..5773c4763abe 100644 --- a/test/unit/org/apache/cassandra/journal/TestParams.java +++ b/test/unit/org/apache/cassandra/journal/TestParams.java @@ -41,6 +41,18 @@ public FlushMode flushMode() return FlushMode.GROUP; } + @Override + public boolean enableCompaction() + { + return false; + } + + @Override + public int compactionPeriodMillis() + { + return 60_000; + } + @Override public int flushPeriodMillis() { diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index a2fd50d5f937..80a7b4176918 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -398,7 +398,7 @@ public static AccordCommandStore createAccordCommandStore( if (new File(DatabaseDescriptor.getAccordJournalDirectory()).exists()) ServerTestUtils.cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); - AccordJournal journal = new AccordJournal(null, new AccordSpec.JournalSpec()); + AccordJournal journal = new AccordJournal(SimpleAccordEndpointMapper.INSTANCE, new AccordSpec.JournalSpec()); journal.start(null); SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index f8d08185a390..e827bb33d655 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -228,8 +228,10 @@ public Command build(SaveStatus saveStatus) return Command.SerializerSupport.notDefined(attributes(saveStatus), Ballot.ZERO); case PreAccepted: return Command.SerializerSupport.preaccepted(attributes(saveStatus), executeAt, Ballot.ZERO); - case Accepted: case AcceptedInvalidate: + return Command.AcceptedInvalidateWithoutDefinition.acceptedInvalidate(attributes(saveStatus), promised, Ballot.ZERO); + + case Accepted: case AcceptedWithDefinition: case AcceptedInvalidateWithDefinition: case PreCommittedWithDefinition: diff --git a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java index 9b5280025068..7b1091d4c507 100644 --- a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java +++ b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java @@ -32,12 +32,14 @@ import org.apache.cassandra.locator.SimpleSeedProvider; import static accord.utilsfork.Property.qt; +import static org.apache.cassandra.config.CassandraRelevantProperties.STORAGE_DIR; public class ConfigGenBuilderTest { static { File.unsafeSetFilesystem(Jimfs.newFileSystem("testing")); + STORAGE_DIR.setString("/data"); } private static final Gen> GEN = new ConfigGenBuilder().build(); From 058640be4e2b9c6c277e1148ab2870da1dcfebb3 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Mon, 26 Aug 2024 15:56:22 -0700 Subject: [PATCH 142/340] CEP-15 (C*) increase message timeouts for range barrier messages Patch by Blake Eggleston; Reviewed by Ariel Weisberg for CASSANDRA-19926 --- .../service/accord/AccordMessageSink.java | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index bf6c5ae06f19..eda0f75b874e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -26,10 +26,13 @@ import java.util.Map; import java.util.Set; +import accord.messages.*; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.utils.Clock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,11 +40,6 @@ import accord.api.MessageSink; import accord.local.AgentExecutor; import accord.local.Node; -import accord.messages.Callback; -import accord.messages.MessageType; -import accord.messages.Reply; -import accord.messages.ReplyContext; -import accord.messages.Request; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.Message; @@ -227,12 +225,33 @@ public void send(Node.Id to, Request request) messaging.send(message, endpoint); } + private static boolean isRangeBarrier(Request request) + { + if (!(request instanceof TxnRequest)) + return false; + + TxnRequest txnRequest = (TxnRequest) request; + if (!txnRequest.txnId.kind().isSyncPoint()) + return false; + + return txnRequest.txnId.domain().isRange(); + } + @Override public void send(Node.Id to, Request request, AgentExecutor executor, Callback callback) { Verb verb = getVerb(request); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); - Message message = Message.out(verb, request); + Message message; + if (isRangeBarrier(request)) + { + long nowNanos = Clock.Global.nanoTime(); + message = Message.out(verb, request, nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos()); + } + else + { + message = Message.out(verb, request); + } InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); logger.trace("Sending {} {} to {}", verb, message.payload, endpoint); messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback, endpointMapper)); From 0f51ee407bbcc710f2619c8f36bdd1c12329e619 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Tue, 13 Aug 2024 15:24:10 -0700 Subject: [PATCH 143/340] CEP-15: C* - Early repair failures hang nodetool Patch by Blake Eggleston; Reviewed by Ariel Weisberg for CASSANDRA-19834 --- .../cassandra/service/StorageService.java | 10 +-- .../service/StorageServiceMBean.java | 8 ++- .../migration/ConsensusTableMigration.java | 28 +++++---- .../org/apache/cassandra/tools/NodeProbe.java | 32 ++++++---- .../apache/cassandra/tools/RepairRunner.java | 63 +++++++++++++++++-- .../nodetool/ConsensusMigrationAdmin.java | 50 +++++++++++---- 6 files changed, 142 insertions(+), 49 deletions(-) diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index ef4068e992e1..b5de6eae4230 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -169,6 +169,7 @@ import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.service.disk.usage.DiskUsageBroadcaster; import org.apache.cassandra.service.paxos.Paxos; import org.apache.cassandra.service.paxos.PaxosCommit; @@ -1686,12 +1687,13 @@ public void migrateConsensusProtocol(@Nonnull String targetProtocol, } @Override - public List finishConsensusMigration(@Nonnull String keyspace, - @Nullable List maybeTableNames, - @Nullable String maybeRangesStr) + public Integer finishConsensusMigration(@Nonnull String keyspace, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr, + @Nonnull ConsensusMigrationTarget target) { checkArgument(!keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)); - return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr)); + return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr), target); } @Override diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index c58205898d88..d95ea0f77952 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -36,6 +36,7 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.utils.BreaksJMX; public interface StorageServiceMBean extends NotificationEmitter @@ -1147,9 +1148,10 @@ void migrateConsensusProtocol(@Nonnull String targetProtocol, @Nullable List maybeTableNames, @Nullable String maybeRangesStr); - List finishConsensusMigration(@Nonnull String keyspace, - @Nullable List maybeTableNames, - @Nullable String maybeRangesStr); + Integer finishConsensusMigration(@Nonnull String keyspace, + @Nullable List maybeTableNames, + @Nullable String maybeRangesStr, + @Nonnull ConsensusMigrationTarget target); String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @Nonnull String format); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java index 62d449d5d656..05e351f13d1b 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusTableMigration.java @@ -181,11 +181,13 @@ public static void startMigrationToConsensusProtocol(@Nonnull String targetProto ClusterMetadataService.instance().commit(new BeginConsensusMigrationForTableAndRange(targetProtocol, ranges, tableIds)); } - public static List finishMigrationToConsensusProtocol(@Nonnull String keyspace, + public static Integer finishMigrationToConsensusProtocol(@Nonnull String keyspace, @Nonnull Optional> maybeTables, - @Nonnull Optional maybeRangesStr) + @Nonnull Optional maybeRangesStr, + ConsensusMigrationTarget target) { checkArgument(!maybeTables.isPresent() || !maybeTables.get().isEmpty(), "Must provide at least 1 table if Optional is not empty"); + checkNotNull(target); Optional>> localKeyspaceRanges = Optional.of(ImmutableList.copyOf(StorageService.instance.getLocalReplicas(keyspace).onlyFull().ranges())); List> ranges = maybeRangesToRanges(maybeRangesStr, localKeyspaceRanges); @@ -215,17 +217,17 @@ public static List finishMigrationToConsensusProtocol(@Nonnull String k tableMigrationStates.add(tms); }); - List migratingToAccord = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.accord).collect(toImmutableList()); - List migratingToPaxos = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.paxos).collect(toImmutableList());; - - Integer accordRepairCmd = finishMigrationToAccord(keyspace, migratingToAccord, ranges); - Integer paxosRepairCmd = finishMigrationToPaxos(keyspace, migratingToPaxos, ranges); - List result = new ArrayList<>(); - if (accordRepairCmd != null) - result.add(accordRepairCmd); - if (paxosRepairCmd != null) - result.add(paxosRepairCmd); - return result; + switch (target) + { + case accord: + List migratingToAccord = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.accord).collect(toImmutableList()); + return finishMigrationToAccord(keyspace, migratingToAccord, ranges); + case paxos: + List migratingToPaxos = tableMigrationStates.stream().filter(tms -> tms.targetProtocol == ConsensusMigrationTarget.paxos).collect(toImmutableList());; + return finishMigrationToPaxos(keyspace, migratingToPaxos, ranges); + default: + throw new IllegalArgumentException("Unsupported target: " + target); + } } private interface MigrationFinisher diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index e733667b1e81..37b7ce627ae2 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -30,6 +30,7 @@ import java.rmi.server.RMISocketFactory; import java.util.AbstractMap; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -114,7 +115,6 @@ import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.CacheServiceMBean; import org.apache.cassandra.service.snapshot.SnapshotManagerMBean; -import org.apache.cassandra.tcm.CMSOperationsMBean; import org.apache.cassandra.service.GCInspector; import org.apache.cassandra.service.GCInspectorMXBean; import org.apache.cassandra.service.StorageProxy; @@ -126,6 +126,8 @@ import org.apache.cassandra.streaming.StreamState; import org.apache.cassandra.streaming.management.StreamStateCompositeData; import org.apache.cassandra.tcm.CMSOperations; +import org.apache.cassandra.tcm.CMSOperationsMBean; +import org.apache.cassandra.tools.RepairRunner.RepairCmd; import org.apache.cassandra.tools.nodetool.GetTimeout; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; import org.apache.cassandra.utils.NativeLibrary; @@ -525,18 +527,28 @@ public String getKeyspaceReplicationInfo(String keyspaceName) public void repairAsync(final PrintStream out, final String keyspace, Map options) throws IOException { - blockOnAsyncRepair(out, keyspace, ssProxy.repairAsync(keyspace, options)); + startAndBlockOnAsyncRepairs(out, Collections.singleton(new RepairCmd(keyspace) + { + @Override + public Integer start() + { + return ssProxy.repairAsync(keyspace, options); + } + })); } - public void blockOnAsyncRepair(final PrintStream out, final String keyspace, Integer cmd) throws IOException + public void startAndBlockOnAsyncRepairs(final PrintStream out, Collection cmds) throws IOException { - RepairRunner runner = new RepairRunner(out, ssProxy, keyspace, cmd); + List runners = new ArrayList<>(cmds.size()); + for (RepairCmd cmd : cmds) + runners.add(new RepairRunner(out, jmxc, ssProxy, cmd)); + try { - if (jmxc != null) - jmxc.addConnectionNotificationListener(runner, null, null); - ssProxy.addNotificationListener(runner, null, null); - runner.run(); + runners.forEach(RepairRunner::start); + + for (RepairRunner runner : runners) + runner.run(); } catch (Exception e) { @@ -546,9 +558,7 @@ public void blockOnAsyncRepair(final PrintStream out, final String keyspace, Int { try { - ssProxy.removeNotificationListener(runner); - if (jmxc != null) - jmxc.removeConnectionNotificationListener(runner); + runners.forEach(RepairRunner::close); } catch (Throwable e) { diff --git a/src/java/org/apache/cassandra/tools/RepairRunner.java b/src/java/org/apache/cassandra/tools/RepairRunner.java index 3a4f77ccd2d7..8451de291820 100644 --- a/src/java/org/apache/cassandra/tools/RepairRunner.java +++ b/src/java/org/apache/cassandra/tools/RepairRunner.java @@ -22,6 +22,9 @@ import java.text.SimpleDateFormat; import java.util.List; +import javax.management.ListenerNotFoundException; +import javax.management.remote.JMXConnector; + import org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus; import org.apache.cassandra.service.StorageServiceMBean; import org.apache.cassandra.utils.concurrent.Condition; @@ -42,30 +45,78 @@ public class RepairRunner extends JMXNotificationProgressListener { + public static abstract class RepairCmd + { + private final String keyspace; + + public RepairCmd(String keyspace) + { + this.keyspace = keyspace; + } + + + + public abstract Integer start(); + } private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); private final PrintStream out; + private final JMXConnector jmxc; private final StorageServiceMBean ssProxy; - private final String keyspace; private final Condition condition = newOneTimeCondition(); + private final RepairCmd repairCmd; private Integer cmd; private volatile Exception error; - public RepairRunner(PrintStream out, StorageServiceMBean ssProxy, String keyspace, Integer cmd) + public RepairRunner(PrintStream out, JMXConnector jmxc, StorageServiceMBean ssProxy, RepairCmd repairCmd) { this.out = out; + this.jmxc = jmxc; this.ssProxy = ssProxy; - this.keyspace = keyspace; - this.cmd = cmd; + this.repairCmd = repairCmd; + } + + public void start() + { + if (jmxc != null) + jmxc.addConnectionNotificationListener(this, null, null); + ssProxy.addNotificationListener(this, null, null); + this.cmd = repairCmd.start(); + } + + public void close() + { + try + { + ssProxy.removeNotificationListener(this); + } + catch (ListenerNotFoundException e) + { + // noop - there may be double removes with error handling + } + if (jmxc != null) + { + try + { + jmxc.removeConnectionNotificationListener(this); + } + catch (ListenerNotFoundException e) + { + // noop - there may be double removes with error handling + } + } } public void run() throws Exception { + if (cmd == null) + return; + if (cmd <= 0) { // repairAsync can only return 0 for replication factor 1. - String message = String.format("Replication factor is 1. No repair is needed for keyspace '%s'", keyspace); + String message = String.format("Replication factor is 1. No repair is needed for keyspace '%s'", repairCmd.keyspace); printMessage(message); } else @@ -117,7 +168,7 @@ public void handleConnectionFailed(long timestamp, String message) { error = new IOException(String.format("[%s] JMX connection closed. You should check server log for repair status of keyspace %s" + "(Subsequent keyspaces are not going to be repaired).", - format.format(timestamp), keyspace)); + format.format(timestamp), repairCmd.keyspace)); condition.signalAll(); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java index 9878034f88ce..e7e1fd9cfcf1 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java +++ b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java @@ -27,8 +27,10 @@ import io.airlift.airline.Arguments; import io.airlift.airline.Command; import io.airlift.airline.Option; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.tools.NodeProbe; import org.apache.cassandra.tools.NodeTool; +import org.apache.cassandra.tools.RepairRunner.RepairCmd; import static com.google.common.base.Preconditions.checkArgument; import static java.util.Collections.singleton; @@ -96,26 +98,50 @@ public static class FinishMigration extends ConsensusMigrationAdmin @Arguments(usage = "[ ...]", description = "The keyspace followed by one or many tables") private List schemaArgs = new ArrayList<>(); + private static class FinishMigrationRepairCommand extends RepairCmd + { + private final NodeProbe probe; + private final String keyspace; + private final List maybeTableNames; + private final String maybeRangesStr; + private final ConsensusMigrationTarget target; + + public FinishMigrationRepairCommand(NodeProbe probe, String keyspace, List maybeTableNames, String maybeRangesStr, ConsensusMigrationTarget target) + { + super(keyspace); + this.probe = probe; + this.keyspace = keyspace; + this.maybeTableNames = maybeTableNames; + this.maybeRangesStr = maybeRangesStr; + this.target = target; + } + + @Override + public Integer start() + { + return probe.getStorageService().finishConsensusMigration(keyspace, maybeTableNames, maybeRangesStr, target); + } + } + protected void execute(NodeProbe probe) { checkArgument((endToken != null) == (startToken != null), "Start and end token must be specified together"); String maybeRangesStr = startToken != null ? startToken + ":" + endToken : null; List keyspaceNames = parseOptionalKeyspace(schemaArgs, probe, KeyspaceSet.ACCORD_MANAGED); List maybeTableNames = schemaArgs.size() > 1 ? schemaArgs.subList(1, schemaArgs.size()) : null; + List repairCmds = new ArrayList<>(keyspaceNames.size() * 2); for (String keyspace : keyspaceNames) { - List commands = probe.getStorageService().finishConsensusMigration(keyspace, maybeTableNames, maybeRangesStr); - for (Integer command : commands) - { - try - { - probe.blockOnAsyncRepair(probe.output().out, keyspace, command); - } - catch (IOException e) - { - throw new RuntimeException("Error occurred attempting to finish migration for keyspace " + keyspace + " tables " + maybeTableNames + " and ranges " + maybeRangesStr, e); - } - } + repairCmds.add(new FinishMigrationRepairCommand(probe, keyspace, maybeTableNames, maybeRangesStr, ConsensusMigrationTarget.paxos)); + repairCmds.add(new FinishMigrationRepairCommand(probe, keyspace, maybeTableNames, maybeRangesStr, ConsensusMigrationTarget.accord)); + } + try + { + probe.startAndBlockOnAsyncRepairs(probe.output().out, repairCmds); + } + catch (IOException e) + { + throw new RuntimeException("Error occurred attempting to finish migration for keyspace(s) " + keyspaceNames + " tables " + maybeTableNames + " and ranges " + maybeRangesStr, e); } probe.output().out.printf("Finished consensus migration range (%s) of keyspaces %s and tables %s%n", maybeRangesStr, keyspaceNames, maybeTableNames); } From f87c22cbbd7dcd013e96b4cd1220139c60e7b289 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 17 Sep 2024 09:15:22 -0700 Subject: [PATCH 144/340] Rebase fixup: SerializationsTest needed to recreate the service.SyncComplete.bin file --- .../serialization/5.1/service.SyncComplete.bin | Bin 256 -> 258 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/data/serialization/5.1/service.SyncComplete.bin b/test/data/serialization/5.1/service.SyncComplete.bin index b5f3633e7b69f3ec86fafaa921547ef4f08d5724..5e27e345fdd589b2af7accc3b3b70c3b6b8c7b78 100644 GIT binary patch delta 77 zcmZo*YGRs@ZFfWJ!i7?P!S_uK+#GCeD_=7(Ft7qK2&8}rMoa~aUqBK-z{M~j1fm!K DbQ=>W delta 75 zcmZo-YG9g>ZBx@2$P%0CC$+@t`uzxzyHyMf46Hy50x3YuSg({*8pY+EF+=A}(h14+ R2|%GQAW0zLnh*j}3IG-d5#9g* From 6265ce5187509b597d5dd2b562737df6da8b86b9 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 17 Sep 2024 09:35:32 -0700 Subject: [PATCH 145/340] Ninja fix: RandomSource.asJdkRandom did not provide a seed. This should have been fixed in CASSANDRA-19847 as it was fixed on Cassadnra trunk --- modules/accord | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/accord b/modules/accord index fb3efe9b8a87..486cd4bc15d3 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit fb3efe9b8a87f0a182545791a2b0563690d52d00 +Subproject commit 486cd4bc15d33500b7b896f9e4691a38d946b679 From 8f7532bc1080a6bd0ccf131dda9e15e1b8c51e2b Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 17 Sep 2024 20:00:51 -0700 Subject: [PATCH 146/340] Rebase fixup: Accord should follow the pattern and use requestTime.computeDeadline like the rest of the code, and Accord timeout MUST be less than user timeout Rebase fixup: when a local keyspace is being open but it isnt present return null so error msg can be provided Rebase fixup: improved metrics error msg when the exception doesnt match what is expected Rebase improvement: when we see a timeout or preempt use the new vtable to show the status cross the cluster Rebase improvement: Cluster.checkForThreadLeaks now groups similar stack traces to make the output less dense --- .../org/apache/cassandra/config/Config.java | 2 +- .../org/apache/cassandra/schema/Schema.java | 5 +- .../service/accord/AccordService.java | 2 +- .../distributed/impl/AbstractCluster.java | 18 ++++-- .../test/accord/AccordMetricsTest.java | 12 ++-- .../test/accord/AccordTestBase.java | 63 ++++++++++++++++--- 6 files changed, 80 insertions(+), 22 deletions(-) diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 8163e1d36623..06c9a5477f11 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -175,7 +175,7 @@ public static Set splitCommaDelimited(String src) public volatile DurationSpec.LongMillisecondsBound stream_transfer_task_timeout = new DurationSpec.LongMillisecondsBound("12h"); - public volatile DurationSpec.LongMillisecondsBound transaction_timeout = new DurationSpec.LongMillisecondsBound("30s"); + public volatile DurationSpec.LongMillisecondsBound transaction_timeout = new DurationSpec.LongMillisecondsBound("10s"); public volatile DurationSpec.LongMillisecondsBound cms_await_timeout = new DurationSpec.LongMillisecondsBound("120000ms"); public volatile int cms_default_max_retries = 10; diff --git a/src/java/org/apache/cassandra/schema/Schema.java b/src/java/org/apache/cassandra/schema/Schema.java index 24d76239d7b8..31a76f33b130 100644 --- a/src/java/org/apache/cassandra/schema/Schema.java +++ b/src/java/org/apache/cassandra/schema/Schema.java @@ -141,7 +141,10 @@ public Keyspace getKeyspaceInstance(String keyspaceName) if (SchemaConstants.isVirtualSystemKeyspace(keyspaceName)) return null; else if (SchemaConstants.isLocalSystemKeyspace(keyspaceName)) - return localKeyspaceInstances.get(keyspaceName).get(); + { + Supplier supplier = localKeyspaceInstances.get(keyspaceName); + return supplier == null ? null : supplier.get(); + } else return ClusterMetadata.current().schema.getKeyspace(keyspaceName); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 43bceacd590d..9026de81a929 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -719,7 +719,7 @@ public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @N AccordClientRequestMetrics metrics = isWrite ? accordWriteMetrics : accordReadMetrics; try { - long deadlineNanos = requestTime.startedAtNanos() + DatabaseDescriptor.getTransactionTimeout(NANOSECONDS); + long deadlineNanos = requestTime.computeDeadline(DatabaseDescriptor.getTransactionTimeout(NANOSECONDS)); TxnResult result = asyncTxnResult.get(deadlineNanos - nanoTime(), NANOSECONDS); return result; } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java index a36c26c4f1cb..13a7dae53531 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/AbstractCluster.java @@ -29,6 +29,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -55,6 +56,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.LinkedHashMultimap; import org.junit.Assume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1161,19 +1163,23 @@ private IllegalStateException checkForThreadLeaks() //This is an alternate version of the thread leak check that just checks to see if any threads are still alive // with the context classloader. Map allThreads = Thread.getAllStackTraces(); - StringBuilder sb = new StringBuilder(); + var groupByStacktrace = LinkedHashMultimap., String>create(); for (Map.Entry e : allThreads.entrySet()) { if (!(e.getKey().getContextClassLoader() instanceof InstanceClassLoader)) continue; e.getKey().setContextClassLoader(null); - sb.append(e.getKey().getName()).append(":\n"); - for (StackTraceElement s : e.getValue()) + groupByStacktrace.put(Arrays.asList(e.getValue()), e.getKey().getName()); + } + if (groupByStacktrace.isEmpty()) return null; + StringBuilder sb = new StringBuilder(); + for (Map.Entry, Collection> e : groupByStacktrace.asMap().entrySet()) + { + sb.append("Threads: ").append(e.getValue()).append(":\n"); + for (StackTraceElement s : e.getKey()) sb.append("\t").append(s).append("\n"); } - return sb.length() > 0 - ? new IllegalStateException("Unterminated threads detected; active threads:\n" + sb) - : null; + return new IllegalStateException("Unterminated threads detected; active threads:\n" + sb); } public List tokens() diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java index f6fdba672ffe..7a0c2cdfb96e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMetricsTest.java @@ -23,8 +23,6 @@ import java.util.Map; import java.util.function.Function; -import com.google.common.base.Throwables; - import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -45,6 +43,8 @@ import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.utils.AssertionUtils; +import org.assertj.core.api.Assertions; import org.assertj.core.data.Offset; import static org.assertj.core.api.Assertions.assertThat; @@ -133,7 +133,7 @@ public void testPreemptionMetrics() } catch (RuntimeException ex) { - assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(WritePreemptedException.class.getName()); + Assertions.assertThat(ex).is(AssertionUtils.rootCauseIs(WritePreemptedException.class)); } assertCoordinatorMetrics(0, "rw", 1, 0, 1, 0, 0); @@ -151,7 +151,7 @@ public void testPreemptionMetrics() } catch (RuntimeException ex) { - assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(ReadPreemptedException.class.getName()); + Assertions.assertThat(ex).is(AssertionUtils.rootCauseIs(ReadPreemptedException.class)); } assertCoordinatorMetrics(0, "ro", 1, 0, 1, 0, 0); @@ -176,7 +176,7 @@ public void testTimeoutMetrics() } catch (RuntimeException ex) { - assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(ReadTimeoutException.class.getName()); + Assertions.assertThat(ex).is(AssertionUtils.rootCauseIs(ReadTimeoutException.class)); } assertCoordinatorMetrics(0, "ro", 0, 0, 0, 1, 0); @@ -194,7 +194,7 @@ public void testTimeoutMetrics() } catch (RuntimeException ex) { - assertThat(Throwables.getCausalChain(ex).stream().map(t -> t.getClass().getName())).contains(WriteTimeoutException.class.getName()); + Assertions.assertThat(ex).is(AssertionUtils.rootCauseIs(WriteTimeoutException.class)); } assertCoordinatorMetrics(0, "rw", 0, 0, 0, 1, 0); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index c60c064e6888..6180a5fc540d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -49,6 +49,7 @@ import accord.primitives.PartialKeyRoute; import accord.primitives.Routable.Domain; import accord.primitives.Route; +import accord.primitives.TxnId; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; @@ -75,6 +76,8 @@ import org.apache.cassandra.distributed.shared.Metrics; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.distributed.util.QueryResultUtil; +import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.WriteTimeoutException; import org.apache.cassandra.hints.HintsService; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; @@ -101,6 +104,7 @@ import static org.apache.cassandra.db.SystemKeyspace.PAXOS; import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; import static org.apache.cassandra.schema.SchemaConstants.SYSTEM_KEYSPACE_NAME; +import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_VIEWS; import static org.junit.Assert.assertArrayEquals; public abstract class AccordTestBase extends TestBaseImpl @@ -326,11 +330,12 @@ private static Cluster createCluster(int nodes, Function optio // need to up the timeout else tests get flaky // disable vnode for now, but should enable before trunk Cluster.Builder builder = Cluster.build(nodes) - .withoutVNodes() - .withConfig(c -> c.with(Feature.GOSSIP).set("write_request_timeout", "10s") - .set("transaction_timeout", "15s") - .set("transaction_timeout", "15s")) - .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install); + .withoutVNodes() + .withConfig(c -> c.with(Feature.GOSSIP) + .set("write_request_timeout", "10s") + .set("transaction_timeout", "15s") + .set("native_transport_timeout", "30s")) + .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install); builder = options.apply(builder); return init(builder.start()); } @@ -395,7 +400,7 @@ private static SimpleQueryResult assertRowWithPreemptedRetry(Cluster cluster, Si return result; } - private static boolean hasRootCause(RuntimeException ex, Class klass) + private static boolean hasRootCause(Throwable ex, Class klass) { return AssertionUtils.rootCauseIs(klass).matches(ex); @@ -414,11 +419,55 @@ private static SimpleQueryResult executeWithRetry0(int count, Cluster cluster, S logger.warn("[Retry attempt={}] Preempted failure for\n{}", count, check); return executeWithRetry0(count + 1, cluster, check, boundValues); } - + TxnId txnId = maybeExtractId(ex); + if (txnId != null) + { + // query the cluster to find its status... + String cql = String.format("SELECT * FROM %s.txn_blocked_by WHERE txn_id=?", VIRTUAL_VIEWS); + StringBuilder sb = new StringBuilder(); + sb.append("Txn ").append(txnId).append(" timed out...\n"); + for (IInvokableInstance inst : cluster) + { + if (inst.isShutdown()) + { + sb.append(inst).append(": is down\n"); + continue; + } + sb.append(inst).append(":\n"); + SimpleQueryResult result = inst.executeInternalWithResult(cql, txnId.toString()); + if (!result.names().isEmpty()) + sb.append(result.names()).append('\n'); + while (result.hasNext()) + { + var row = result.next(); + sb.append(Arrays.asList(row.toObjectArray())).append('\n'); + } + } + throw new AssertionError(sb.toString(), ex.getCause()); + } throw ex; } } + private static TxnId maybeExtractId(Throwable ex) + { + if (hasRootCause(ex, ReadPreemptedException.class) + || hasRootCause(ex, WritePreemptedException.class) + || hasRootCause(ex, ReadTimeoutException.class) + || hasRootCause(ex, WriteTimeoutException.class)) + { + try + { + return TxnId.parse(ex.getMessage()); + } + catch (Throwable t) + { + // ignore + } + } + return null; + } + public static SimpleQueryResult executeWithRetry(Cluster cluster, String check, Object... boundValues) { check = wrapInTxn(check); From dd1230f2a22680def7f42a982a09507e53ffbc26 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Fri, 20 Sep 2024 13:12:20 -0700 Subject: [PATCH 147/340] CEP-15 (C*): Read accord repair cfk keys from sstable index. Patch by Blake Eggleston; Reviewed by David Capwell for CASSANDRA-19920 --- .../db/marshal/AbstractCompositeType.java | 5 + .../dht/LocalCompositePrefixPartitioner.java | 341 +++++++++++++++++ .../cassandra/dht/LocalPartitioner.java | 2 +- .../cassandra/io/sstable/KeyIterator.java | 35 +- .../io/sstable/format/SSTableReader.java | 4 +- .../io/sstable/format/big/BigTableReader.java | 27 +- .../io/sstable/format/bti/BtiTableReader.java | 20 + .../cassandra/service/StorageService.java | 4 +- .../service/StorageServiceMBean.java | 3 +- .../service/accord/AccordKeyspace.java | 350 +++++++----------- .../service/accord/repair/AccordRepair.java | 3 +- .../nodetool/ConsensusMigrationAdmin.java | 2 +- .../apache/cassandra/utils/MergeIterator.java | 17 + .../format/ForwardingSSTableReader.java | 6 + .../db/marshal/AbstractTypeTest.java | 9 + .../cassandra/dht/IPartitionerTest.java | 57 +++ .../LocalCompositePrefixPartitionerTest.java | 115 ++++++ .../service/accord/AccordKeyspaceTest.java | 24 +- .../cassandra/utils/AccordGenerators.java | 2 +- .../cassandra/utils/CassandraGenerators.java | 28 +- .../utils/CassandraGeneratorsTest.java | 44 +++ 21 files changed, 836 insertions(+), 262 deletions(-) create mode 100644 src/java/org/apache/cassandra/dht/LocalCompositePrefixPartitioner.java create mode 100644 test/unit/org/apache/cassandra/dht/LocalCompositePrefixPartitionerTest.java create mode 100644 test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractCompositeType.java b/src/java/org/apache/cassandra/db/marshal/AbstractCompositeType.java index 737954d09bdb..60bbdd31ecec 100644 --- a/src/java/org/apache/cassandra/db/marshal/AbstractCompositeType.java +++ b/src/java/org/apache/cassandra/db/marshal/AbstractCompositeType.java @@ -98,6 +98,11 @@ public int compareCustom(VL left, ValueAccessor accessorL, VR right ++i; } + return compareCustomRemainder(left, accessorL, offsetL, right, accessorR, offsetR); + } + + protected int compareCustomRemainder(VL left, ValueAccessor accessorL, int offsetL, VR right, ValueAccessor accessorR, int offsetR) + { if (accessorL.isEmptyFromOffset(left, offsetL)) return accessorR.sizeFromOffset(right, offsetR) == 0 ? 0 : -1; diff --git a/src/java/org/apache/cassandra/dht/LocalCompositePrefixPartitioner.java b/src/java/org/apache/cassandra/dht/LocalCompositePrefixPartitioner.java new file mode 100644 index 000000000000..c0a056e043fc --- /dev/null +++ b/src/java/org/apache/cassandra/dht/LocalCompositePrefixPartitioner.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import accord.utils.Invariants; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.cassandra.db.*; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.CompositeType; +import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.io.sstable.SSTableReadsListener; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.AbstractIterator; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.CloseableIterator; +import org.apache.cassandra.utils.MergeIterator; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Local partitioner that supports doing range scans of composite primary keys using composite prefixes using the iterator + * methods it provides. This is neccessary for correctly handling exclusive start and inclusive end prefixes, since + * these won't work as intended given normal byte/component comparisons + */ +public class LocalCompositePrefixPartitioner extends LocalPartitioner +{ + /** + * Composite type that only compares + */ + private static class PrefixCompositeType extends CompositeType + { + public PrefixCompositeType(List> types) + { + super(types); + } + + @Override + protected int compareCustomRemainder(VL left, ValueAccessor accessorL, int offsetL, VR right, ValueAccessor accessorR, int offsetR) + { + return 0; + } + } + + public abstract class AbstractCompositePrefixToken extends LocalToken + { + public AbstractCompositePrefixToken(ByteBuffer token) + { + super(token); + } + + @Override + public int compareTo(Token o) + { + Invariants.checkArgument(o instanceof AbstractCompositePrefixToken); + AbstractCompositePrefixToken that = (AbstractCompositePrefixToken) o; + CompositeType comparator = comparatorForPrefixLength(Math.min(this.prefixSize(), that.prefixSize())); + return comparator.compare(this.token, that.token); + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof AbstractCompositePrefixToken)) + return false; + return compareTo((AbstractCompositePrefixToken) obj) == 0; + } + + @Override + public ByteSource asComparableBytes(ByteComparable.Version version) + { + return comparatorForPrefixLength(prefixSize()).asComparableBytes(ByteBufferAccessor.instance, token, version); + } + + ByteBuffer token() + { + return token; + } + + abstract int prefixSize(); + } + + public class FullToken extends AbstractCompositePrefixToken + { + + public FullToken(ByteBuffer token) + { + super(token); + } + + @Override + int prefixSize() + { + return prefixComparators.size(); + } + } + + public class PrefixToken extends AbstractCompositePrefixToken + { + final int prefixSize; + public PrefixToken(ByteBuffer token, int prefixSize) + { + super(token); + Invariants.checkArgument(prefixSize > 0); + this.prefixSize = prefixSize; + } + + @Override + int prefixSize() + { + return prefixSize; + } + } + + private final Token.TokenFactory tokenFactory = new Token.TokenFactory() + { + @Override + public Token fromComparableBytes(ByteSource.Peekable comparableBytes, ByteComparable.Version version) + { + ByteBuffer tokenData = comparator.fromComparableBytes(ByteBufferAccessor.instance, comparableBytes, version); + return new FullToken(tokenData); + } + + @Override + public ByteBuffer toByteArray(Token token) + { + return ((FullToken)token).token(); + } + + @Override + public Token fromByteArray(ByteBuffer bytes) + { + return new FullToken(bytes); + } + + @Override + public String toString(Token token) + { + return comparator.getString(((FullToken)token).token()); + } + + @Override + public void validate(String token) + { + comparator.validate(comparator.fromString(token)); + } + + @Override + public Token fromString(String string) + { + return new FullToken(comparator.fromString(string)); + } + }; + + private final List prefixComparators; + + public LocalCompositePrefixPartitioner(CompositeType comparator) + { + super(comparator); + ArrayList comparators = new ArrayList<>(comparator.subTypes().size()); + comparators.add(comparator); + + List> subtypes = comparator.subTypes(); + subtypes = subtypes.subList(0, subtypes.size() - 1); + while (!subtypes.isEmpty()) + { + comparators.add(new PrefixCompositeType(subtypes)); + subtypes = subtypes.subList(0, subtypes.size() - 1); + } + + prefixComparators = ImmutableList.copyOf(Lists.reverse(comparators)); + } + + + @SuppressWarnings("rawtypes") + public LocalCompositePrefixPartitioner(AbstractType... types) + { + this(CompositeType.getInstance(types)); + } + + private CompositeType comparatorForPrefixLength(int size) + { + return prefixComparators.get(size - 1); + } + + public ByteBuffer createPrefixKey(Object... values) + { + return comparatorForPrefixLength(values.length).decompose(values); + } + + public AbstractCompositePrefixToken createPrefixToken(Object... values) + { + ByteBuffer key = createPrefixKey(values); + return values.length == prefixComparators.size() ? new FullToken(key) : new PrefixToken(key, values.length); + } + + public DecoratedKey decoratedKey(Object... values) + { + Invariants.checkArgument(values.length == prefixComparators.size()); + ByteBuffer key = createPrefixKey(values); + return decorateKey(key); + } + + + @Override + public LocalToken getToken(ByteBuffer key) + { + return new FullToken(key); + } + + @Override + public LocalToken getMinimumToken() + { + return new FullToken(ByteBufferUtil.EMPTY_BYTE_BUFFER); + } + + @Override + public Token.TokenFactory getTokenFactory() + { + return tokenFactory; + } + + + /** + * Returns a DecoratedKey iterator for the given range. Skips reading data files for sstable formats with a partition index file + */ + private static CloseableIterator keyIterator(Memtable memtable, AbstractBounds range) + { + + AbstractBounds memtableRange = range.withNewRight(memtable.metadata().partitioner.getMinimumToken().maxKeyBound()); + DataRange dataRange = new DataRange(memtableRange, new ClusteringIndexSliceFilter(Slices.ALL, false)); + UnfilteredPartitionIterator iter = memtable.partitionIterator(ColumnFilter.NONE, dataRange, SSTableReadsListener.NOOP_LISTENER); + + int rangeStartCmpMin = range.isStartInclusive() ? 0 : 1; + int rangeEndCmpMax = range.isEndInclusive() ? 0 : -1; + + return new AbstractIterator<>() + { + @Override + protected DecoratedKey computeNext() + { + while (iter.hasNext()) + { + DecoratedKey key = iter.next().partitionKey(); + if (key.compareTo(range.left) < rangeStartCmpMin) + continue; + + if (key.compareTo(range.right) > rangeEndCmpMax) + break; + + return key; + } + return endOfData(); + } + + @Override + public void close() + { + iter.close(); + } + }; + } + + public static CloseableIterator keyIterator(TableMetadata metadata, AbstractBounds range) throws IOException + { + ColumnFamilyStore cfs = Keyspace.openAndGetStore(metadata); + ColumnFamilyStore.ViewFragment view = cfs.select(View.selectLive(range)); + + List> closeableIterators = new ArrayList<>(); + List> iterators = new ArrayList<>(); + + try + { + for (Memtable memtable : view.memtables) + { + CloseableIterator iter = keyIterator(memtable, range); + iterators.add(iter); + closeableIterators.add(iter); + } + + for (SSTableReader sstable : view.sstables) + { + CloseableIterator iter = sstable.keyIterator(range); + iterators.add(iter); + closeableIterators.add(iter); + } + } + catch (Throwable e) + { + for (CloseableIterator iter: closeableIterators) + { + try + { + iter.close(); + } + catch (Throwable e2) + { + e.addSuppressed(e2); + } + } + throw e; + } + + return MergeIterator.get(iterators, DecoratedKey::compareTo, new MergeIterator.Reducer.Trivial<>()); + } +} diff --git a/src/java/org/apache/cassandra/dht/LocalPartitioner.java b/src/java/org/apache/cassandra/dht/LocalPartitioner.java index c2886fd53986..0a1ede356b22 100644 --- a/src/java/org/apache/cassandra/dht/LocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/LocalPartitioner.java @@ -40,7 +40,7 @@ public class LocalPartitioner implements IPartitioner { private static final long EMPTY_SIZE = ObjectSizes.measure(new LocalPartitioner(null).new LocalToken()); - final AbstractType comparator; // package-private to avoid access workarounds in embedded LocalToken. + protected final AbstractType comparator; public LocalPartitioner(AbstractType comparator) { diff --git a/src/java/org/apache/cassandra/io/sstable/KeyIterator.java b/src/java/org/apache/cassandra/io/sstable/KeyIterator.java index dbe501f36e7e..c8c1709503d9 100644 --- a/src/java/org/apache/cassandra/io/sstable/KeyIterator.java +++ b/src/java/org/apache/cassandra/io/sstable/KeyIterator.java @@ -21,12 +21,15 @@ import java.util.concurrent.locks.ReadWriteLock; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.utils.AbstractIterator; import org.apache.cassandra.utils.CloseableIterator; public class KeyIterator extends AbstractIterator implements CloseableIterator { + private final AbstractBounds bounds; private final IPartitioner partitioner; private final KeyReader it; private final ReadWriteLock fileAccessLock; @@ -34,8 +37,9 @@ public class KeyIterator extends AbstractIterator implements Close private boolean initialized = false; - public KeyIterator(KeyReader it, IPartitioner partitioner, long totalBytes, ReadWriteLock fileAccessLock) + public KeyIterator(AbstractBounds bounds, KeyReader it, IPartitioner partitioner, long totalBytes, ReadWriteLock fileAccessLock) { + this.bounds = bounds; this.it = it; this.partitioner = partitioner; this.totalBytes = totalBytes; @@ -48,19 +52,26 @@ protected DecoratedKey computeNext() fileAccessLock.readLock().lock(); try { - if (!initialized) + while (true) { - initialized = true; - return it.isExhausted() - ? endOfData() - : partitioner.decorateKey(it.key()); - } - else - { - return it.advance() - ? partitioner.decorateKey(it.key()) - : endOfData(); + if (!initialized) + { + initialized = true; + if (it.isExhausted()) + break; + } + else if (!it.advance()) + break; + + DecoratedKey key = partitioner.decorateKey(it.key()); + if (bounds == null || bounds.contains(key)) + return key; + + if (key.compareTo(bounds.right) >= 0) + break; } + + return endOfData(); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java index ff488694dc82..3e011f831415 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java @@ -940,9 +940,11 @@ public UnfilteredRowIterator simpleIterator(FileDataInput file, DecoratedKey key */ public KeyIterator keyIterator() throws IOException { - return new KeyIterator(keyReader(), getPartitioner(), uncompressedLength(), new ReentrantReadWriteLock()); + return new KeyIterator(null, keyReader(), getPartitioner(), uncompressedLength(), new ReentrantReadWriteLock()); } + public abstract KeyIterator keyIterator(AbstractBounds range) throws IOException; + /** * Finds and returns the first key beyond a given token in this SSTable or null if no such key exists. */ diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java index 0864a64cee4c..f339bd9f7a7b 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java @@ -25,11 +25,15 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.locks.ReentrantReadWriteLock; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.io.sstable.*; +import org.apache.cassandra.utils.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,16 +51,6 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterators; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; -import org.apache.cassandra.io.sstable.CorruptSSTableException; -import org.apache.cassandra.io.sstable.Descriptor; -import org.apache.cassandra.io.sstable.Downsampling; -import org.apache.cassandra.io.sstable.ISSTableScanner; -import org.apache.cassandra.io.sstable.IVerifier; -import org.apache.cassandra.io.sstable.IndexInfo; -import org.apache.cassandra.io.sstable.KeyReader; -import org.apache.cassandra.io.sstable.SSTable; -import org.apache.cassandra.io.sstable.SSTableReadsListener; import org.apache.cassandra.io.sstable.SSTableReadsListener.SelectionReason; import org.apache.cassandra.io.sstable.SSTableReadsListener.SkippingReason; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -72,9 +66,6 @@ import org.apache.cassandra.io.util.FileHandle; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.io.util.RandomAccessReader; -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.IFilter; -import org.apache.cassandra.utils.OutputHandler; import static org.apache.cassandra.utils.concurrent.SharedCloseable.sharedCopyOrNull; @@ -155,6 +146,16 @@ public KeyReader keyReader() throws IOException return BigTableKeyReader.create(ifile, rowIndexEntrySerializer); } + @Override + public KeyIterator keyIterator(AbstractBounds range) throws IOException + { + + RandomAccessReader ifileReader = ifile.createReader(); + ifileReader.seek(getIndexScanPosition(range.left)); + BigTableKeyReader keyReader = BigTableKeyReader.create(ifileReader, rowIndexEntrySerializer); + return new KeyIterator(range, keyReader, getPartitioner(), uncompressedLength(), new ReentrantReadWriteLock()); + } + @Override public KeyReader keyReader(PartitionPosition key) throws IOException { diff --git a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java index 9807e0255f00..791bcb9d18ae 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.concurrent.locks.ReentrantReadWriteLock; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -45,6 +46,7 @@ import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.IVerifier; +import org.apache.cassandra.io.sstable.KeyIterator; import org.apache.cassandra.io.sstable.KeyReader; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableReadsListener; @@ -474,6 +476,24 @@ public UnfilteredPartitionIterator partitionIterator(ColumnFilter columnFilter, return BtiTableScanner.getScanner(this, columnFilter, dataRange, listener); } + @Override + public KeyIterator keyIterator(AbstractBounds range) + { + PartitionIterator iter; + try + { + iter = PartitionIterator.create(partitionIndex, metadata().partitioner, rowIndexFile, dfile, + range.left, bounds.inclusiveLeft() ? -1 : 0, + null, 0, descriptor.version); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + + return new KeyIterator(range, iter, metadata().partitioner, uncompressedLength(), new ReentrantReadWriteLock()); + } + @Override public IVerifier getVerifier(ColumnFamilyStore cfs, OutputHandler outputHandler, boolean isOffline, IVerifier.Options options) { diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index b5de6eae4230..60d6c329ee52 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -1690,10 +1690,10 @@ public void migrateConsensusProtocol(@Nonnull String targetProtocol, public Integer finishConsensusMigration(@Nonnull String keyspace, @Nullable List maybeTableNames, @Nullable String maybeRangesStr, - @Nonnull ConsensusMigrationTarget target) + @Nonnull String target) { checkArgument(!keyspace.equals(SchemaConstants.METADATA_KEYSPACE_NAME)); - return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr), target); + return finishMigrationToConsensusProtocol(keyspace, Optional.ofNullable(maybeTableNames), Optional.ofNullable(maybeRangesStr), ConsensusMigrationTarget.valueOf(target)); } @Override diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index d95ea0f77952..deb0b249d33c 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -36,7 +36,6 @@ import org.apache.cassandra.db.ColumnFamilyStoreMBean; import org.apache.cassandra.exceptions.ConfigurationException; -import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.utils.BreaksJMX; public interface StorageServiceMBean extends NotificationEmitter @@ -1151,7 +1150,7 @@ void migrateConsensusProtocol(@Nonnull String targetProtocol, Integer finishConsensusMigration(@Nonnull String keyspace, @Nullable List maybeTableNames, @Nullable String maybeRangesStr, - @Nonnull ConsensusMigrationTarget target); + @Nonnull String target); String listConsensusMigrations(@Nullable Set keyspaceNames, @Nullable Set tableNames, @Nonnull String format); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index fd4771e3b01e..e51017996e84 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -30,7 +29,6 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.Executor; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -43,12 +41,10 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Key; -import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; @@ -58,17 +54,15 @@ import accord.local.SaveStatus; import accord.local.Status; import accord.local.Status.Durability; +import accord.local.cfk.CommandsForKey; import accord.primitives.Ranges; -import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.Topology; import accord.utils.Invariants; import accord.utils.ReducingRangeMap; import accord.utils.async.Observable; -import org.apache.cassandra.concurrent.DebuggableTask; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.QueryOptions; @@ -86,9 +80,11 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.SinglePartitionReadCommand; +import org.apache.cassandra.db.WriteContext; import org.apache.cassandra.db.filter.ClusteringIndexFilter; import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; import org.apache.cassandra.db.filter.ColumnFilter; @@ -111,10 +107,16 @@ import org.apache.cassandra.db.rows.Row.Deletion; import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.transform.FilteredPartitions; +import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.Bounds; import org.apache.cassandra.dht.ByteOrderedPartitioner; +import org.apache.cassandra.dht.ExcludingBounds; import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.IncludingExcludingBounds; +import org.apache.cassandra.dht.LocalCompositePrefixPartitioner; import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.accord.RouteIndex; import org.apache.cassandra.io.IVersionedSerializer; @@ -148,9 +150,11 @@ import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock.Global; +import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.btree.BTreeSet; import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.concurrent.OpOrder; import static accord.utils.Invariants.checkArgument; import static accord.utils.Invariants.checkState; @@ -188,15 +192,23 @@ public class AccordKeyspace private static final TupleType KEY_TYPE = new TupleType(Arrays.asList(UUIDType.instance, BytesType.instance)); private static final String KEY_TUPLE = KEY_TYPE.asCQL3Type().toString(); - // shared LocalPartitioner for all *_for_key Accord tables with (store_id, key_token, key) partition key - private static final LocalPartitioner FOR_KEYS_LOCAL_PARTITIONER = - new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE)); - private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexNamesFilter(BTreeSet.of(new ClusteringComparator(), Clustering.EMPTY), false); //TODO (now, performance): should this be partitioner rather than TableId? As of this patch distributed tables should only have 1 partitioner... private static final ConcurrentMap TABLE_SERIALIZERS = new ConcurrentHashMap<>(); + private static AccordRoutingKeyByteSource.Serializer getRoutingKeySerializer(AccordRoutingKey key) + { + return TABLE_SERIALIZERS.computeIfAbsent(key.table(), ignore -> { + IPartitioner partitioner; + if (key.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) + partitioner = key.asTokenKey().token().getPartitioner(); + else + partitioner = SchemaHolder.schema.getTablePartitioner(key.table()); + return AccordRoutingKeyByteSource.variableLength(partitioner); + }); + } + // Schema needs all system keyspace, and this is a system keyspace! So can not touch schema in init private static class SchemaHolder { @@ -475,14 +487,13 @@ public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCe + format("last_write_timestamp %s, ", TIMESTAMP_TUPLE) + "PRIMARY KEY((store_id, key_token, key))" + ')') - .partitioner(FOR_KEYS_LOCAL_PARTITIONER) + .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) .build(); public static class TimestampsForKeyColumns { static final ClusteringComparator keyComparator = TimestampsForKeys.partitionKeyAsClusteringComparator(); static final CompositeType partitionKeyType = (CompositeType) TimestampsForKeys.partitionKeyType; - static final ColumnFilter allColumns = ColumnFilter.all(TimestampsForKeys); static final ColumnMetadata store_id = getColumn(TimestampsForKeys, "store_id"); static final ColumnMetadata key_token = getColumn(TimestampsForKeys, "key_token"); static final ColumnMetadata key = getColumn(TimestampsForKeys, "key"); @@ -579,22 +590,24 @@ public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastE } } + private static final LocalCompositePrefixPartitioner CFKPartitioner = new LocalCompositePrefixPartitioner(Int32Type.instance, UUIDType.instance, BytesType.instance, BytesType.instance); private static final TableMetadata CommandsForKeys = commandsForKeysTable(COMMANDS_FOR_KEY); private static TableMetadata commandsForKeysTable(String tableName) { return parse(tableName, - "accord commands per key", - "CREATE TABLE %s (" - + "store_id int, " - + "key_token blob, " // can't use "token" as this is restricted word in CQL - + format("key %s, ", KEY_TUPLE) - + "data blob, " - + "PRIMARY KEY((store_id, key_token, key))" - + ')' - + " WITH compression = {'class':'NoopCompressor'};") - .partitioner(FOR_KEYS_LOCAL_PARTITIONER) - .build(); + "accord commands per key", + "CREATE TABLE %s (" + + "store_id int, " + + "table_id uuid, " + + "key_token blob, " // can't use "token" as this is restricted word in CQL + + "key blob, " + + "data blob, " + + "PRIMARY KEY((store_id, table_id, key_token, key))" + + ')' + + " WITH compression = {'class':'NoopCompressor'};") + .partitioner(CFKPartitioner) + .build(); } public static class CommandsForKeyAccessor @@ -604,6 +617,7 @@ public static class CommandsForKeyAccessor final CompositeType partitionKeyType; final ColumnFilter allColumns; final ColumnMetadata store_id; + final ColumnMetadata table_id; final ColumnMetadata key_token; final ColumnMetadata key; final ColumnMetadata data; @@ -617,6 +631,7 @@ public CommandsForKeyAccessor(TableMetadata table) this.partitionKeyType = (CompositeType) table.partitionKeyType; this.allColumns = ColumnFilter.all(table); this.store_id = getColumn(table, "store_id"); + this.table_id = getColumn(table, "table_id"); this.key_token = getColumn(table, "key_token"); this.key = getColumn(table, "key"); this.data = getColumn(table, "data"); @@ -633,6 +648,11 @@ public int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } + public TableId getTableId(ByteBuffer[] partitionKeyComponents) + { + return TableId.fromUUID(UUIDType.instance.compose(partitionKeyComponents[table_id.position()])); + } + public PartitionKey getKey(DecoratedKey key) { return getKey(splitPartitionKey(key)); @@ -640,7 +660,12 @@ public PartitionKey getKey(DecoratedKey key) public PartitionKey getKey(ByteBuffer[] partitionKeyComponents) { - return deserializeKey(partitionKeyComponents[key.position()]); + TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(partitionKeyComponents[table_id.position()])); + ByteBuffer keyBytes = partitionKeyComponents[key.position()]; + IPartitioner partitioner = SchemaHolder.schema.getTablePartitioner(tableId); + if (partitioner == null) + throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); + return new PartitionKey(tableId, partitioner.decorateKey(keyBytes)); } public CommandsForKey getCommandsForKey(PartitionKey key, Row row) @@ -652,6 +677,13 @@ public CommandsForKey getCommandsForKey(PartitionKey key, Row row) return CommandsForKeySerializer.fromBytes(key, cell.buffer()); } + @VisibleForTesting + public ByteBuffer serializeKeyNoTable(AccordRoutingKey key) + { + byte[] bytes = getRoutingKeySerializer(key).serializeNoTable(key); + return ByteBuffer.wrap(bytes); + } + // TODO (expected): garbage-free filtering, reusing encoding public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.Entry redundantBefore) { @@ -674,6 +706,19 @@ public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.E ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(updated); return BTreeRow.singleCellRow(Clustering.EMPTY, BufferCell.live(data, cell.timestamp(), buffer)); } + + public LocalCompositePrefixPartitioner.AbstractCompositePrefixToken getPrefixToken(int commandStore, AccordRoutingKey key) + { + if (key.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) + { + ByteBuffer tokenBytes = ByteBuffer.wrap(getRoutingKeySerializer(key).serializeNoTable(key)); + return CFKPartitioner.createPrefixToken(commandStore, key.table().asUUID(), tokenBytes); + } + else + { + return CFKPartitioner.createPrefixToken(commandStore, key.table().asUUID()); + } + } } public static final CommandsForKeyAccessor CommandsForKeysAccessor = new CommandsForKeyAccessor(CommandsForKeys); @@ -940,201 +985,70 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t txnId.msb, txnId.lsb, txnId.node.id); } - private static abstract class TableWalk implements Runnable, DebuggableTask + /** + * Calculates token bounds based on key prefixes. + */ + public static void findAllKeysBetween(int commandStore, + AccordRoutingKey start, boolean startInclusive, + AccordRoutingKey end, boolean endInclusive, + Observable callback) { - private final long creationTimeNanos = Global.nanoTime(); - private final Executor executor; - private final Observable callback; - private long startTimeNanos = -1; - private int numQueries = 0; - private UntypedResultSet.Row lastSeen = null; - private TableWalk(Executor executor, Observable callback) - { - this.executor = executor; - this.callback = callback; - } + Token startToken = CommandsForKeysAccessor.getPrefixToken(commandStore, start); + Token endToken = CommandsForKeysAccessor.getPrefixToken(commandStore, end); - protected abstract UntypedResultSet query(UntypedResultSet.Row lastSeen); + if (start instanceof AccordRoutingKey.SentinelKey) + startInclusive = true; + if (end instanceof AccordRoutingKey.SentinelKey) + endInclusive = true; - public final void schedule() - { - executor.execute(this); - } + PartitionPosition startPosition = startInclusive ? startToken.minKeyBound() : startToken.maxKeyBound(); + PartitionPosition endPosition = endInclusive ? endToken.maxKeyBound() : endToken.minKeyBound(); + AbstractBounds bounds; + if (startInclusive && endInclusive) + bounds = new Bounds<>(startPosition, endPosition); + else if (endInclusive) + bounds = new Range<>(startPosition, endPosition); + else if (startInclusive) + bounds = new IncludingExcludingBounds<>(startPosition, endPosition); + else + bounds = new ExcludingBounds<>(startPosition, endPosition); - @Override - public final void run() - { - try + Stage.READ.executor().submit(() -> { + ColumnFamilyStore baseCfs = Keyspace.openAndGetStore(CommandsForKeys); + try (OpOrder.Group baseOp = baseCfs.readOrdering.start(); + WriteContext writeContext = baseCfs.keyspace.getWriteHandler().createContextForRead(); + CloseableIterator iter = LocalCompositePrefixPartitioner.keyIterator(CommandsForKeys, bounds)) { - if (startTimeNanos == -1) - startTimeNanos = Global.nanoTime(); - numQueries++; - UntypedResultSet result = query(lastSeen); - if (result.isEmpty()) + // Need the second try to handle callback errors vs read errors. + // Callback will see the read errors, but if the callback fails the outer try will see those errors + try { + while (iter.hasNext()) + { + PartitionKey pk = CommandsForKeysAccessor.getKey(iter.next()); + callback.onNext(pk); + } callback.onCompleted(); - return; } - UntypedResultSet.Row lastRow = null; - for (UntypedResultSet.Row row : result) + catch (Exception e) { - callback.onNext(row); - lastRow = row; + callback.onError(e); } - lastSeen = lastRow; - schedule(); } - catch (Throwable t) - { - callback.onError(t); - } - } - - @Override - public long creationTimeNanos() - { - return creationTimeNanos; - } - - @Override - public long startTimeNanos() - { - return startTimeNanos; - } - - @Override - public String description() - { - return format("Table Walker for %s; queries = %d", getClass().getSimpleName(), numQueries); - } - } - - private static String selection(TableMetadata metadata, Set requiredColumns, Set forIteration) - { - StringBuilder selection = new StringBuilder(); - if (requiredColumns.isEmpty()) - selection.append("*"); - else - { - Sets.SetView other = Sets.difference(requiredColumns, forIteration); - for (String name : other) - { - ColumnMetadata meta = metadata.getColumn(new ColumnIdentifier(name, true)); - if (meta == null) - throw new IllegalArgumentException("Unknown column: " + name); - } - List names = new ArrayList<>(forIteration.size() + other.size()); - names.addAll(forIteration); - names.addAll(other); - // this sort is to make sure the CQL is determanistic - Collections.sort(names); - for (int i = 0; i < names.size(); i++) - { - if (i > 0) - selection.append(", "); - selection.append(names.get(i)); - } - } - return selection.toString(); - } - - private static class WalkCommandsForDomain extends TableWalk - { - private static final Set COLUMNS_FOR_ITERATION = ImmutableSet.of("txn_id", "store_id", "domain"); - private final String cql; - private final int storeId, domain; - - private WalkCommandsForDomain(int commandStore, Routable.Domain domain, Set requiredColumns, Executor executor, Observable callback) - { - super(executor, callback); - this.storeId = commandStore; - this.domain = domain.ordinal(); - cql = format("SELECT %s " + - "FROM %s " + - "WHERE store_id = ? " + - " AND domain = ? " + - " AND token(store_id, domain, txn_id) > token(?, ?, (?, ?, ?)) " + - "ALLOW FILTERING", selection(Commands, requiredColumns, COLUMNS_FOR_ITERATION), Commands); - } - - @Override - protected UntypedResultSet query(UntypedResultSet.Row lastSeen) - { - TxnId lastTxnId = lastSeen == null ? - new TxnId(0, 0, Txn.Kind.Read, Routable.Domain.Key, Node.Id.NONE) - : deserializeTxnId(lastSeen); - return executeInternal(cql, storeId, domain, storeId, domain, lastTxnId.msb, lastTxnId.lsb, lastTxnId.node.id); - } - } - - public static void findAllKeysBetween(int commandStore, - AccordRoutingKey start, boolean startInclusive, - AccordRoutingKey end, boolean endInclusive, - Observable callback) - { - //TODO (optimize) : CQL doesn't look smart enough to only walk Index.db, and ends up walking the Data.db file for each row in the partitions found (for frequent keys, this cost adds up) - // it would be possible to find all SSTables that "could" intersect this range, then have a merge iterator over the Index.db (filtered to the range; index stores partition liveness)... - KeysBetween work = new KeysBetween(commandStore, - AccordKeyspace.serializeRoutingKey(start), startInclusive, - AccordKeyspace.serializeRoutingKey(end), endInclusive, - ImmutableSet.of("key"), - Stage.READ.executor(), Observable.distinct(callback).map(AccordKeyspace::deserializeKey)); - work.schedule(); - } - - private static class KeysBetween extends TableWalk - { - private static final Set COLUMNS_FOR_ITERATION = ImmutableSet.of("store_id", "key_token"); - - private final int storeId; - private final ByteBuffer start, end; - private final String cqlFirst; - private final String cqlContinue; - - private KeysBetween(int storeId, - ByteBuffer start, boolean startInclusive, - ByteBuffer end, boolean endInclusive, - Set requiredColumns, - Executor executor, Observable callback) - { - super(executor, callback); - this.storeId = storeId; - this.start = start; - this.end = end; - - String selection = selection(CommandsForKeys, requiredColumns, COLUMNS_FOR_ITERATION); - this.cqlFirst = format("SELECT DISTINCT %s\n" + - "FROM %s\n" + - "WHERE store_id = ?\n" + - (startInclusive ? " AND key_token >= ?\n" : " AND key_token > ?\n") + - (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + - "ALLOW FILTERING", - selection, CommandsForKeys); - this.cqlContinue = format("SELECT DISTINCT %s\n" + - "FROM %s\n" + - "WHERE store_id = ?\n" + - " AND key_token > ?\n" + - " AND key > ?\n" + - (endInclusive ? " AND key_token <= ?\n" : " AND key_token < ?\n") + - "ALLOW FILTERING", - selection, CommandsForKeys); - } - - @Override - protected UntypedResultSet query(UntypedResultSet.Row lastSeen) - { - if (lastSeen == null) - { - return executeInternal(cqlFirst, storeId, start, end); - } - else + catch (IOException e) { - ByteBuffer previousToken = lastSeen.getBytes("key_token"); - ByteBuffer previousKey = lastSeen.getBytes("key"); - return executeInternal(cqlContinue, storeId, previousToken, previousKey, end); + try + { + callback.onError(e); + } + catch (Throwable t) + { + e.addSuppressed(t); + } + throw new RuntimeException(e); } - } + }); } public static TxnId deserializeTxnId(UntypedResultSet.Row row) @@ -1272,26 +1186,24 @@ public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore com private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId, PartitionKey key) { ByteBuffer pk = accessor.keyComparator.make(storeId, + UUIDSerializer.instance.serialize(key.table().asUUID()), serializeRoutingKey(key.toUnseekable()), - serializeKey(key)).serializeAsPartitionKey(); + key.partitionKey().getKey()).serializeAsPartitionKey(); return accessor.table.partitioner.decorateKey(pk); } @VisibleForTesting public static ByteBuffer serializeRoutingKey(AccordRoutingKey routingKey) { - AccordRoutingKeyByteSource.Serializer serializer = TABLE_SERIALIZERS.computeIfAbsent(routingKey.table(), ignore -> { - IPartitioner partitioner; - if (routingKey.kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) - partitioner = routingKey.asTokenKey().token().getPartitioner(); - else - partitioner = SchemaHolder.schema.getTablePartitioner(routingKey.table()); - return AccordRoutingKeyByteSource.variableLength(partitioner); - }); - byte[] bytes = serializer.serialize(routingKey); + byte[] bytes = getRoutingKeySerializer(routingKey).serialize(routingKey); return ByteBuffer.wrap(bytes); } + public static ByteBuffer serializeRoutingKeyNoTable(AccordRoutingKey key) + { + return CommandsForKeysAccessor.serializeKeyNoTable(key); + } + private static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, CommandsForKey commandsForKey, long timestampMicros) { ByteBuffer bytes = CommandsForKeySerializer.toBytesWithoutKey(commandsForKey); @@ -1475,7 +1387,7 @@ public static EpochDiskState loadEpochDiskState() /** * Update the disk state for this epoch, if it's higher than the one we have one disk. - * + *

      * This is meant to be called before any update involving the new epoch, not after. This way if the update * fails, we can detect and cleanup. If we updated disk state after an update and it failed, we could "forget" * about (now acked) topology updates after a restart. @@ -1624,7 +1536,6 @@ public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws I Ranges redundant = row.has("redundant") ? blobMapToRanges(row.getMap("redundant", BytesType.instance, BytesType.instance)) : Ranges.EMPTY; consumer.load(epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant); - } public static EpochDiskState loadTopologies(TopologyLoadConsumer consumer) @@ -1710,8 +1621,8 @@ public static Future updateSafeToRead(CommandStore commandStore, NavigableMap public interface CommandStoreMetadataConsumer { void accept(ReducingRangeMap rejectBefore, DurableBefore durableBefore, RedundantBefore redundantBefore, NavigableMap bootstrapBeganAt, NavigableMap safeToRead); - } + public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer consumer) { UntypedResultSet result = executeOnceInternal(format("SELECT * FROM %s.%s WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA), id); @@ -1758,5 +1669,4 @@ public static void unsafeClear() TABLE_SERIALIZERS.clear(); SchemaHolder.schema = Schema.instance; } - } diff --git a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java index 18fb045475df..51497792cd43 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java +++ b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java @@ -124,10 +124,11 @@ private List repairRange(TokenRange range) throws Throw List repairedRanges = new ArrayList<>(); int rangeStepUpdateInterval = ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL.getInt(); RoutingKey remainingStart = range.start(); + // TODO (expected): repair ranges should have a configurable lower limit of split size so already small repairs aren't broken up into excessively tiny ones BigInteger rangeSize = splitter.sizeOf(range); if (rangeStep == null) { - BigInteger divide = splitter.divide(rangeSize, 1000); + BigInteger divide = splitter.divide(rangeSize, 10000); rangeStep = divide.equals(BigInteger.ZERO) ? rangeSize : BigInteger.ONE.max(divide); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java index e7e1fd9cfcf1..7eea5a021b83 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java +++ b/src/java/org/apache/cassandra/tools/nodetool/ConsensusMigrationAdmin.java @@ -119,7 +119,7 @@ public FinishMigrationRepairCommand(NodeProbe probe, String keyspace, List { + public static class Trivial extends Reducer + { + private T reduced = null; + + @Override + public boolean trivialReduceIsTrivial() { return true; } + + @Override + public void reduce(int idx, T current) { reduced = current; } + + @Override + protected T getReduced() { return reduced; } + + @Override + protected void onKeyChange() { reduced = null; } + } + /** * @return true if Out is the same as In for the case of a single source iterator */ diff --git a/test/distributed/org/apache/cassandra/io/sstable/format/ForwardingSSTableReader.java b/test/distributed/org/apache/cassandra/io/sstable/format/ForwardingSSTableReader.java index 60168f65196c..952663c50e14 100644 --- a/test/distributed/org/apache/cassandra/io/sstable/format/ForwardingSSTableReader.java +++ b/test/distributed/org/apache/cassandra/io/sstable/format/ForwardingSSTableReader.java @@ -254,6 +254,12 @@ public KeyIterator keyIterator() throws IOException return delegate.keyIterator(); } + @Override + public KeyIterator keyIterator(AbstractBounds range) throws IOException + { + return delegate.keyIterator(range); + } + @Override public DecoratedKey firstKeyBeyond(PartitionPosition token) { diff --git a/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java b/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java index 630c322fce3a..348559e4fd3b 100644 --- a/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/AbstractTypeTest.java @@ -233,6 +233,8 @@ public void allTypesCovered() continue; if (isTestType(klass)) continue; + if (isPrefixCompositeType(klass)) + continue; String name = klass.getCanonicalName(); if (name == null) name = klass.getName(); @@ -259,6 +261,13 @@ private boolean isTestType(Class klass) return "test".equals(new File(src.getLocation().getPath()).name()); } + @SuppressWarnings("rawtypes") + private boolean isPrefixCompositeType(Class klass) + { + String name = klass.getCanonicalName(); + return name.contains("PrefixCompositeType"); + } + @Test public void isConstrainedTest() { diff --git a/test/unit/org/apache/cassandra/dht/IPartitionerTest.java b/test/unit/org/apache/cassandra/dht/IPartitionerTest.java index 5e46f09ed6e0..e531e25da5b0 100644 --- a/test/unit/org/apache/cassandra/dht/IPartitionerTest.java +++ b/test/unit/org/apache/cassandra/dht/IPartitionerTest.java @@ -18,21 +18,78 @@ package org.apache.cassandra.dht; +import java.lang.reflect.Modifier; +import java.security.CodeSource; +import java.security.ProtectionDomain; import java.util.Objects; +import java.util.Set; +import com.google.common.collect.Sets; import org.junit.Test; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.AccordGenerators; import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.bytecomparable.ByteComparable; import org.apache.cassandra.utils.bytecomparable.ByteSource; import org.assertj.core.api.Assertions; +import org.reflections.Reflections; +import org.reflections.scanners.Scanners; +import org.reflections.util.ConfigurationBuilder; import static accord.utils.Property.qt; public class IPartitionerTest { + //TODO (now, maintaince): this is copied from AbstractTypeTest + private static final Reflections reflections = new Reflections(new ConfigurationBuilder() + .forPackage("org.apache.cassandra") + .setScanners(Scanners.SubTypes) + .setExpandSuperTypes(true) + .setParallel(true)); + + @Test + public void allCovered() + { + Set> subTypes = reflections.getSubTypesOf(IPartitioner.class); + Set> coverage = CassandraGenerators.knownPartitioners(); + StringBuilder sb = new StringBuilder(); + for (Class klass : Sets.difference(subTypes, coverage)) + { + if (Modifier.isAbstract(klass.getModifiers())) + continue; + if (isTestType(klass)) + continue; + if (ReversedLongLocalPartitioner.class.equals(klass)) + continue; + String name = klass.getCanonicalName(); + if (name == null) + name = klass.getName(); + sb.append(name).append('\n'); + } + if (sb.length() > 0) + throw new AssertionError("Uncovered types:\n" + sb); + } + + private boolean isTestType(Class klass) + { + String name = klass.getCanonicalName(); + if (name == null) + name = klass.getName(); + if (name == null) + name = klass.toString(); + if (name.contains("Test")) + return true; + if (name.equals(LengthPartitioner.class.getCanonicalName())) + return true; + ProtectionDomain domain = klass.getProtectionDomain(); + if (domain == null) return false; + CodeSource src = domain.getCodeSource(); + if (src == null) return false; + return "test".equals(new File(src.getLocation().getPath()).name()); + } + @Test public void byteCompareSerde() { diff --git a/test/unit/org/apache/cassandra/dht/LocalCompositePrefixPartitionerTest.java b/test/unit/org/apache/cassandra/dht/LocalCompositePrefixPartitionerTest.java new file mode 100644 index 000000000000..565e5a59fd65 --- /dev/null +++ b/test/unit/org/apache/cassandra/dht/LocalCompositePrefixPartitionerTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.dht; + +import com.google.common.collect.Lists; +import org.apache.cassandra.CassandraTestBase; +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.BytesType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.TableMetadata; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static java.lang.String.format; +import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; +import static org.apache.cassandra.utils.ByteBufferUtil.hexToBytes; + +public class LocalCompositePrefixPartitionerTest extends CassandraTestBase +{ + private static final String KEYSPACE = "ks"; + @BeforeClass + public static void setupClass() + { + SchemaLoader.prepareServer(); + } + + private static TableMetadata.Builder parse(String keyspace, String name, String cql) + { + return CreateTableStatement.parse(format(cql, name), KEYSPACE) + .gcGraceSeconds((int) TimeUnit.DAYS.toSeconds(90)); + } + + private static LocalCompositePrefixPartitioner partitioner(AbstractType... types) + { + return new LocalCompositePrefixPartitioner(types); + } + + private static void assertKeysMatch(Iterable expected, Iterator actual) + { + List expectedList = Lists.newArrayList(expected); + List actualList = Lists.newArrayList(actual); + Assert.assertEquals(expectedList, actualList); + } + + @Test + public void keyIteratorTest() throws Throwable + { + String keyspaceName = "ks"; + String tableName = "tbl"; + LocalCompositePrefixPartitioner partitioner = partitioner(Int32Type.instance, BytesType.instance, Int32Type.instance); + TableMetadata metadata = parse(keyspaceName, tableName, + "CREATE TABLE %s (" + + "p1 int," + + "p2 blob," + + "p3 int," + + "v int," + + "PRIMARY KEY ((p1, p2, p3))" + + ")").partitioner(partitioner).build(); + SchemaLoader.createKeyspace(keyspaceName, KeyspaceParams.local(), metadata); + + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (1, 0x00, 5, 0)", keyspaceName, tableName)); + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (1, 0x0000, 5, 0)", keyspaceName, tableName)); + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (2, 0x00, 5, 0)", keyspaceName, tableName)); + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (2, 0x0100, 5, 0)", keyspaceName, tableName)); + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (2, 0x02, 5, 0)", keyspaceName, tableName)); + executeOnceInternal(String.format("INSERT INTO %s.%s (p1, p2, p3, v) VALUES (2, 0x02, 6, 0)", keyspaceName, tableName)); + + Token startToken = partitioner.createPrefixToken(1, hexToBytes("0000")); + Token endToken1 = partitioner.createPrefixToken(2, hexToBytes("0100")); + Token endToken2 = partitioner.createPrefixToken(2, hexToBytes("02")); + + + assertKeysMatch(List.of(partitioner.decoratedKey(2, hexToBytes("00"), 5), + partitioner.decoratedKey(2, hexToBytes("0100"), 5) + ), partitioner.keyIterator(metadata, new Range<>(startToken.maxKeyBound(), endToken1.maxKeyBound()))); + + assertKeysMatch(List.of(partitioner.decoratedKey(1, hexToBytes("0000"), 5), + partitioner.decoratedKey(2, hexToBytes("00"), 5), + partitioner.decoratedKey(2, hexToBytes("0100"), 5), + partitioner.decoratedKey(2, hexToBytes("02"), 5), + partitioner.decoratedKey(2, hexToBytes("02"), 6) + ), partitioner.keyIterator(metadata, new Bounds<>(startToken.minKeyBound(), endToken2.maxKeyBound()))); + + assertKeysMatch(List.of(partitioner.decoratedKey(1, hexToBytes("0000"), 5), + partitioner.decoratedKey(2, hexToBytes("00"), 5), + partitioner.decoratedKey(2, hexToBytes("0100"), 5) + ), partitioner.keyIterator(metadata, new IncludingExcludingBounds<>(startToken.minKeyBound(), endToken2.minKeyBound()))); + + } +} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 717cda1de385..02d823cfd453 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -60,6 +60,7 @@ import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.dht.ReversedLongLocalPartitioner; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.MemtableParams; import org.apache.cassandra.schema.Schema; @@ -183,8 +184,13 @@ public void findOverlappingKeys() { TableId tableId = rs.pickOrderedSet(tables.navigableKeySet()); IPartitioner partitioner = tables.get(tableId); - ByteBuffer data = !(partitioner instanceof LocalPartitioner) ? Int32Type.instance.decompose(rs.nextInt()) - : fromQT(getTypeSupport(partitioner.getTokenValidator()).bytesGen()).next(rs); + ByteBuffer data; + if (partitioner instanceof ReversedLongLocalPartitioner) + data = fromQT(CassandraGenerators.reversedLongLocalKeys()).next(rs); + else if (partitioner instanceof LocalPartitioner) + data = fromQT(getTypeSupport(partitioner.getTokenValidator()).bytesGen()).next(rs); + else + data = Int32Type.instance.decompose(rs.nextInt()); PartitionKey key = new PartitionKey(tableId, tables.get(tableId).decorateKey(data)); if (keys.add(key)) { @@ -200,8 +206,8 @@ public void findOverlappingKeys() // The memtable will allow the write, but it will be dropped when writing to the SSTable... //TODO (now, correctness): since we store the user token + user key, if a key is close to the PK limits then we could tip over and loose our CFK // new Mutation(AccordKeyspace.getCommandsForKeyPartitionUpdate(store, pk, 42, ByteBufferUtil.EMPTY_BYTE_BUFFER)).apply(); - execute("INSERT INTO system_accord.commands_for_key (store_id, key_token, key) VALUES (?, ?, ?)", - store, AccordKeyspace.serializeRoutingKey(pk.toUnseekable()), AccordKeyspace.serializeKey(pk)); + execute("INSERT INTO system_accord.commands_for_key (store_id, table_id, key_token, key) VALUES (?, ?, ?, ?)", + store, pk.table().asUUID(), AccordKeyspace.serializeRoutingKeyNoTable(pk.toUnseekable()), pk.partitionKey().getKey()); } catch (IllegalArgumentException | InvalidRequestException e) { @@ -235,17 +241,21 @@ public void findOverlappingKeys() for (var e : storesToKeys.entrySet()) { int store = e.getKey(); - expectedCqlStoresToKeys.put(store, new TreeSet<>(e.getValue().stream().map(p -> AccordKeyspace.serializeRoutingKey(p.toUnseekable())).collect(Collectors.toList()))); + SortedSet keys = e.getValue(); + if (keys.isEmpty()) + continue; + expectedCqlStoresToKeys.put(store, new TreeSet<>(keys.stream().map(p -> AccordKeyspace.serializeRoutingKeyNoTable(p.toUnseekable())).collect(Collectors.toList()))); } // make sure no data loss... when this test was written sstable had all the rows but the sstable didn't... this // is mostly a santity check to detect that case early - var resultSet = execute("SELECT store_id, key_token FROM system_accord.commands_for_key ALLOW FILTERING"); + var resultSet = execute("SELECT store_id, table_id, key_token FROM system_accord.commands_for_key ALLOW FILTERING"); TreeMap> cqlStoresToKeys = new TreeMap<>(); for (var row : resultSet) { int storeId = row.getInt("store_id"); ByteBuffer bb = row.getBytes("key_token"); + // FIXME: include table_id cqlStoresToKeys.computeIfAbsent(storeId, ignore -> new TreeSet<>()).add(bb); } Assertions.assertThat(cqlStoresToKeys).isEqualTo(expectedCqlStoresToKeys); @@ -255,6 +265,8 @@ public void findOverlappingKeys() { int store = rs.pickOrderedSet(storesToKeys.navigableKeySet()); var keysForStore = new ArrayList<>(storesToKeys.get(store)); + if (keysForStore.isEmpty()) + continue; int offset; int offsetEnd; diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index e827bb33d655..cf23494d8463 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -70,7 +70,7 @@ public class AccordGenerators { - private static final Gen PARTITIONER_GEN = fromQT(CassandraGenerators.partitioners()); + private static final Gen PARTITIONER_GEN = fromQT(CassandraGenerators.nonLocalPartitioners()); private AccordGenerators() { diff --git a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java index 1fc46ff2096d..a114c4605e30 100644 --- a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java +++ b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java @@ -78,6 +78,7 @@ import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.LocalCompositePrefixPartitioner; import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner; @@ -1303,6 +1304,16 @@ public static Gen localPartitioner() return AbstractTypeGenerators.safeTypeGen().map(LocalPartitioner::new); } + public static Gen localCompositePrefixPartitioner() + { + return AbstractTypeGenerators.safeTypeGen().map(type -> { + if (type instanceof CompositeType) + return new LocalCompositePrefixPartitioner((CompositeType) type); + else + return new LocalCompositePrefixPartitioner(type); + }); + } + public static Gen localPartitionerToken() { var lpGen = localPartitioner(); @@ -1313,6 +1324,16 @@ public static Gen localPartitionerToken() }; } + public static Gen localCompositePrefixPartitionerToken() + { + var lpGen = localCompositePrefixPartitioner(); + return rs -> { + var lp = lpGen.generate(rs); + var bytes = AbstractTypeGenerators.getTypeSupport(lp.getTokenValidator()).bytesGen(); + return lp.getToken(bytes.generate(rs)); + }; + } + public static Gen reversedLongLocalToken() { Constraint range = Constraint.between(0, Long.MAX_VALUE); @@ -1348,7 +1369,8 @@ private enum SupportedPartitioners ByteOrdered(ByteOrderedPartitioner.class, ignore -> ByteOrderedPartitioner.instance), Random(RandomPartitioner.class, ignore -> RandomPartitioner.instance), Local(LocalPartitioner.class, localPartitioner()), - OrderPreserving(OrderPreservingPartitioner.class, ignore -> OrderPreservingPartitioner.instance); + OrderPreserving(OrderPreservingPartitioner.class, ignore -> OrderPreservingPartitioner.instance), + LocalCompositePrefix(LocalCompositePrefixPartitioner.class, localCompositePrefixPartitioner()); private final Class clazz; private final Gen partitioner; @@ -1388,7 +1410,8 @@ public static Gen partitioners() public static Gen nonLocalPartitioners() { return SourceDSL.arbitrary().enumValues(SupportedPartitioners.class) - .assuming(p -> p != SupportedPartitioners.Local) + .assuming(p -> p != SupportedPartitioners.Local && + p != SupportedPartitioners.LocalCompositePrefix) .flatMap(SupportedPartitioners::partitioner); } @@ -1420,6 +1443,7 @@ public static Gen token(IPartitioner partitioner) if (partitioner instanceof Murmur3Partitioner) return murmurToken(); if (partitioner instanceof ByteOrderedPartitioner) return byteOrderToken(); if (partitioner instanceof RandomPartitioner) return randomPartitionerToken(); + if (partitioner instanceof LocalCompositePrefixPartitioner) return localCompositePrefixPartitionerToken(); if (partitioner instanceof LocalPartitioner) return localPartitionerToken((LocalPartitioner) partitioner); if (partitioner instanceof OrderPreservingPartitioner) return orderPreservingToken(); throw new UnsupportedOperationException("Unsupported partitioner: " + partitioner.getClass()); diff --git a/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java b/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java new file mode 100644 index 000000000000..0e2c60a34da1 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import org.junit.Test; + +import accord.utilsfork.Gens; +import org.assertj.core.api.Assertions; + +import static accord.utilsfork.Property.qt; +import static org.apache.cassandra.utils.Generators.toGen; + +public class CassandraGeneratorsTest +{ + @Test + public void partitionerToToken() + { + qt().forAll(Gens.random(), toGen(CassandraGenerators.partitioners())) + .check((rs, p) -> Assertions.assertThat(toGen(CassandraGenerators.token(p)).next(rs)).isNotNull()); + } + + @Test + public void partitionerKeys() + { + qt().forAll(Gens.random(), toGen(CassandraGenerators.partitioners())) + .check((rs, p) -> Assertions.assertThat(toGen(CassandraGenerators.decoratedKeys(i -> p)).next(rs)).isNotNull()); + } +} From fbfb633bda1211d8b64f46d4c03ce26e7526ce39 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Mon, 9 Sep 2024 14:14:40 -0700 Subject: [PATCH 148/340] CEP-15 (C*) - misc accord perf improvements Patch by Blake Eggleston; Reviewed by David Capwell for CASSANDRA-19940 Changes: Increase accord repair range splitting Streamline table metadata fetching - removes some unnecessary abstraction from the table metadata lookup path Remote unnecessary set building when building lists of overlapping keys Add separate recover delay for repair and increase default recover delay --- .../org/apache/cassandra/config/AccordSpec.java | 16 +++++++++++++++- .../org/apache/cassandra/schema/Schema.java | 11 ++++++++--- .../service/accord/api/AccordAgent.java | 2 +- .../service/accord/async/AsyncLoader.java | 17 +++++++++-------- .../service/accord/repair/AccordRepair.java | 1 + .../accord/AccordIncrementalRepairTest.java | 2 +- .../test/accord/AccordProgressLogTest.java | 4 +++- 7 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 102ae68b67c7..39fefbf187e3 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -18,10 +18,15 @@ package org.apache.cassandra.config; +import accord.primitives.Routable; +import accord.primitives.Txn; +import accord.primitives.TxnId; import com.fasterxml.jackson.annotation.JsonIgnore; import org.apache.cassandra.journal.Params; import org.apache.cassandra.service.consensus.TransactionalMode; +import java.util.concurrent.TimeUnit; + public class AccordSpec { public volatile boolean enabled = false; @@ -32,7 +37,16 @@ public class AccordSpec public volatile OptionaldPositiveInt shard_count = OptionaldPositiveInt.UNDEFINED; - public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(1000); + // TODO (expected): we should be able to support lower recover delays, at least for txns + public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(5000); + public volatile DurationSpec.IntMillisecondsBound range_sync_recover_delay = new DurationSpec.IntMillisecondsBound(10000); + + public long recoveryDelayFor(TxnId txnId, TimeUnit unit) + { + if (txnId.kind() == Txn.Kind.SyncPoint && txnId.domain() == Routable.Domain.Range) + return range_sync_recover_delay.to(unit); + return recover_delay.to(unit); + } /** * When a barrier transaction is requested how many times to repeat attempting the barrier before giving up diff --git a/src/java/org/apache/cassandra/schema/Schema.java b/src/java/org/apache/cassandra/schema/Schema.java index 31a76f33b130..fcfae3f1515d 100644 --- a/src/java/org/apache/cassandra/schema/Schema.java +++ b/src/java/org/apache/cassandra/schema/Schema.java @@ -279,10 +279,15 @@ public TableMetadata getTableMetadata(String keyspace, String table) @Override public TableMetadata getTableMetadata(TableId id) { - return ObjectUtils.getFirstNonNull(() -> localKeyspaces.getTableOrViewNullable(id), - () -> distributedKeyspaces().getTableOrViewNullable(id), - () -> VirtualKeyspaceRegistry.instance.getTableMetadataNullable(id)); + TableMetadata metadata = localKeyspaces.getTableOrViewNullable(id); + if (metadata != null) + return metadata; + metadata = distributedKeyspaces().getTableOrViewNullable(id); + if (metadata != null) + return metadata; + + return VirtualKeyspaceRegistry.instance.getTableMetadataNullable(id); } public TableMetadata getTableMetadata(Descriptor descriptor) diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 441277e7574d..bf351c323fda 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -186,7 +186,7 @@ public long attemptCoordinationDelay(Node node, SafeCommandStore safeStore, TxnI // TODO (expected): make this a configurable calculation on normal request latencies (like ContentionStrategy) long oneSecond = SECONDS.toMicros(1L); - long startTime = mostRecentAttempt.hlc() + DatabaseDescriptor.getAccord().recover_delay.to(MICROSECONDS) + long startTime = mostRecentAttempt.hlc() + DatabaseDescriptor.getAccord().recoveryDelayFor(txnId, MICROSECONDS) + (retryCount == 0 ? 0 : random.nextLong(oneSecond << Math.min(retryCount, 4))); startTime = nonClashingStartTime(startTime, shard == null ? null : shard.nodes, node.id(), oneSecond, random); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index c6c508ebc57d..2b1cfa55ccf0 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -27,6 +27,7 @@ import accord.utils.async.AsyncResult; import accord.utils.async.Observable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.cassandra.service.accord.*; @@ -202,30 +203,30 @@ public void onAdd(AccordCachingState state) return AsyncChains.all(root); } - private AsyncChain> findOverlappingKeys(Ranges ranges) + private AsyncChain> findOverlappingKeys(Ranges ranges) { if (ranges.isEmpty()) { // During topology changes some shards may be included with empty ranges - return AsyncChains.success(Collections.emptySet()); + return AsyncChains.success(Collections.emptyList()); } - List>> chains = new ArrayList<>(ranges.size()); + List>> chains = new ArrayList<>(ranges.size()); for (Range range : ranges) chains.add(findOverlappingKeys(range)); - return AsyncChains.reduce(chains, (a, b) -> ImmutableSet.builder().addAll(a).addAll(b).build()); + return AsyncChains.reduce(chains, (a, b) -> ImmutableList.builderWithExpectedSize(a.size() + b.size()).addAll(a).addAll(b).build()); } - private AsyncChain> findOverlappingKeys(Range range) + private AsyncChain> findOverlappingKeys(Range range) { // save to a variable as java gets confused when `.map` is called on the result of asChain - AsyncChain> map = Observable.asChain(callback -> + AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(commandStore.id(), (AccordRoutingKey) range.start(), range.startInclusive(), (AccordRoutingKey) range.end(), range.endInclusive(), callback), - Collectors.toSet()); - return map.map(s -> ImmutableSet.builder().addAll(s).build()); + Collectors.toList()); + return map.map(ImmutableList::copyOf); } @VisibleForTesting diff --git a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java index 51497792cd43..02780219d233 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java +++ b/src/java/org/apache/cassandra/service/accord/repair/AccordRepair.java @@ -125,6 +125,7 @@ private List repairRange(TokenRange range) throws Throw int rangeStepUpdateInterval = ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL.getInt(); RoutingKey remainingStart = range.start(); // TODO (expected): repair ranges should have a configurable lower limit of split size so already small repairs aren't broken up into excessively tiny ones + // TODO (expected): we should support lower range divisions for accord repair BigInteger rangeSize = splitter.sizeOf(range); if (rangeStep == null) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index f295ce6e1d82..e30706a47ef4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -148,7 +148,7 @@ protected Logger logger() public static void setupClass() throws Throwable { CassandraRelevantProperties.ACCORD_AGENT_CLASS.setString(BarrierRecordingAgent.class.getName()); - setupCluster(opt -> opt.withConfig(conf -> conf.with(Feature.NETWORK, Feature.GOSSIP)), 3); + setupCluster(opt -> opt.withConfig(conf -> conf.with(Feature.NETWORK, Feature.GOSSIP).set("accord.recover_delay", "1s")), 3); // setupCluster(opt -> opt, 3); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java index b69f6a6b48f5..e326f49d2543 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordProgressLogTest.java @@ -44,7 +44,9 @@ public void testRecoveryTimeWindow() throws Throwable { try (Cluster cluster = init(Cluster.build(3) .withoutVNodes() - .withConfig(c -> c.with(Feature.NETWORK).set("accord.enabled", "true")) + .withConfig(c -> c.with(Feature.NETWORK) + .set("accord.enabled", "true") + .set("accord.recover_delay", "1s")) .start())) { cluster.schemaChange("CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor': 3}"); From 1ed52038ce350b37c563b9176fab995dcbef32b0 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Tue, 18 Jun 2024 13:55:38 -0700 Subject: [PATCH 149/340] This commits contains the following two patches in order to reduce the amount of conflicts resolution necessary for future rebasing: (Accord): C* stores table in Range which will cause ranges to be removed from Accord when DROP TABLE is performed patch by David Capwell, Sam Tunnicliffe; reviewed by Sam Tunnicliffe for CASSANDRA-18675 CEP-15: (Accord) sequence EpochReady.coordinating to allow syncComplete to be learned from newer epochs patch by David Capwell; reviewed by Alex Petrov, Blake Eggleston for CASSANDRA-19769 --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 9 + .../config/CassandraRelevantProperties.java | 4 + .../cql3/statements/TransactionStatement.java | 6 + .../schema/AlterSchemaStatement.java | 7 +- .../schema/AlterTableStatement.java | 3 + .../schema/CreateViewStatement.java | 5 + .../schema/DropKeyspaceStatement.java | 29 +- .../statements/schema/DropTableStatement.java | 27 + .../index/sai/utils/IndexTermType.java | 2 +- src/java/org/apache/cassandra/net/Verb.java | 3 + .../cassandra/schema/KeyspaceParams.java | 8 +- .../cassandra/schema/ReplicationParams.java | 5 + .../apache/cassandra/schema/TableParams.java | 57 +- .../accord/AccordConfigurationService.java | 64 +- .../accord/AccordSafeCommandStore.java | 9 + .../service/accord/AccordService.java | 446 ++++++- .../service/accord/AccordStaleReplicas.java | 2 +- .../service/accord/AccordTopology.java | 7 +- .../service/accord/FetchMinEpoch.java | 209 +++ .../service/accord/IAccordService.java | 13 + .../cassandra/service/accord/TokenRange.java | 35 +- .../service/accord/api/AccordAgent.java | 12 + .../InheritKeyspaceFastPathStrategy.java | 2 +- .../ParameterizedFastPathStrategy.java | 8 +- .../fastpath/SimpleFastPathStrategy.java | 2 +- .../migration/ConsensusMigrationState.java | 3 + .../TransactionalMigrationFromMode.java | 10 +- .../apache/cassandra/tcm/CMSOperations.java | 17 + .../cassandra/tcm/CMSOperationsMBean.java | 2 + .../apache/cassandra/tcm/ClusterMetadata.java | 24 +- .../apache/cassandra/tcm/MetadataKeys.java | 39 +- .../cassandra/tcm/MultiStepOperation.java | 4 +- .../org/apache/cassandra/tcm/Processor.java | 20 + .../org/apache/cassandra/tcm/Startup.java | 16 +- .../tcm/StubClusterMetadataService.java | 11 +- .../apache/cassandra/tcm/Transformation.java | 4 + .../tcm/sequences/DropAccordTable.java | 324 +++++ .../tcm/sequences/InProgressSequences.java | 18 +- .../cassandra/tcm/serialization/Version.java | 41 +- .../tcm/transformations/AlterSchema.java | 2 +- .../FinishDropAccordTable.java | 143 ++ .../PrepareDropAccordTable.java | 115 ++ .../org/apache/cassandra/tools/NodeTool.java | 3 +- .../cassandra/tools/nodetool/CMSAdmin.java | 12 + .../distributed/impl/Coordinator.java | 2 +- .../distributed/shared/ClusterUtils.java | 60 + .../distributed/test/TestBaseImpl.java | 23 + .../test/accord/AccordCQLTestBase.java | 1 + .../test/accord/AccordDropKeyspaceTest.java | 63 + .../test/accord/AccordDropTableBase.java | 159 +++ .../test/accord/AccordDropTableTest.java | 63 + .../accord/AccordHostReplacementTest.java | 105 ++ .../test/accord/AccordTestBase.java | 67 +- .../test/accord/NewSchemaTest.java | 22 +- .../test/cql3/CasMultiNodeTableWalkBase.java | 4 +- .../test/cql3/MultiNodeTableWalkBase.java | 2 +- .../MultiNodeTableWalkWithReadRepairTest.java | 2 +- ...ltiNodeTableWalkWithoutReadRepairTest.java | 2 +- .../test/cql3/MultiNodeTokenConflictTest.java | 4 +- .../cql3/PaxosV1MultiNodeTableWalkTest.java | 2 +- .../cql3/PaxosV2MultiNodeTableWalkTest.java | 2 +- .../test/cql3/SingleNodeTableWalkTest.java | 15 +- .../cql3/SingleNodeTokenConflictTest.java | 18 +- .../test/cql3/StatefulASTBase.java | 21 +- .../test/log/ClusterMetadataTestHelper.java | 32 +- .../fuzz/snapshots/SnapshotsTest.java | 8 +- .../topology/AccordTopologyMixupTest.java | 321 +++++ .../fuzz/topology/HarryTopologyMixupTest.java | 228 ---- .../fuzz/topology/TopologyMixupTestBase.java | 495 ++----- .../apache/cassandra/harry/SchemaSpec.java | 29 + .../harry/execution/DataTracker.java | 2 +- .../harry/gen/EntropyRandomSource.java} | 46 +- .../cassandra/harry/gen/Generators.java | 13 +- .../harry/gen/InvertibleGenerator.java | 2 +- .../harry/gen/RandomSourceEntropySource.java} | 57 +- .../harry/model/ASTSingleTableModel.java | 5 +- .../harry/test/SimpleBijectionTest.java | 2 +- .../simulator/test/HarrySimulatorTest.java | 2 +- test/unit/accord/utilsfork/Gen.java | 237 ---- test/unit/accord/utilsfork/Gens.java | 1152 ----------------- test/unit/accord/utilsfork/Invariants.java | 339 ----- test/unit/accord/utilsfork/Property.java | 1075 --------------- test/unit/accord/utilsfork/RandomSource.java | 429 ------ test/unit/accord/utilsfork/SeedProvider.java | 51 - .../accord/utilsfork/async/TimeoutUtils.java | 70 - test/unit/accord/utilsfork/random/Picker.java | 118 -- .../concurrent/SimulatedExecutorFactory.java | 4 +- .../config/DatabaseDescriptorRefTest.java | 1 + ...WithColumnCqlConstraintValidationTest.java | 2 +- ...mnOctetLengthConstraintValidationTest.java | 2 +- .../org/apache/cassandra/cql3/CQLTester.java | 167 ++- .../cql3/PreparedStatementsTest.java | 6 + .../cassandra/cql3/RandomSchemaTest.java | 51 +- .../cassandra/cql3/ast/ExpressionTest.java | 6 +- .../org/apache/cassandra/cql3/ast/Select.java | 20 +- .../cql3/conditions/ColumnConditionTest.java | 8 +- .../cassandra/gms/VersionedValueTest.java | 4 +- .../sai/cql/AbstractSimpleEqTestBase.java | 6 +- .../index/sai/cql/AllTypesSimpleEqTest.java | 4 +- .../io/util/CompressedChunkReaderTest.java | 6 +- .../cassandra/net/MessageDeliveryTest.java | 4 +- .../net/SimulatedMessageDelivery.java | 4 +- .../ConcurrentIrWithPreviewFuzzTest.java | 6 +- .../cassandra/repair/FailedAckTest.java | 6 +- .../repair/FailingRepairFuzzTest.java | 6 +- .../apache/cassandra/repair/FuzzTestBase.java | 8 +- .../cassandra/repair/HappyPathFuzzTest.java | 6 +- .../cassandra/repair/SlowMessageFuzzTest.java | 6 +- .../cassandra/schema/MemtableParamsTest.java | 4 +- .../cassandra/schema/TableParamsTest.java | 52 + .../service/accord/AccordKeyspaceTest.java | 2 +- .../accord/AccordStaleReplicasTest.java | 2 +- .../accord/AccordSyncPropagatorTest.java | 9 +- .../service/accord/EpochSyncTest.java | 464 ++++--- .../service/accord/FetchMinEpochTest.java | 284 ++++ .../accord/SimulatedAccordCommandStore.java | 2 +- .../accord/async/AsyncOperationTest.java | 2 +- .../AccordRoutingKeyByteSourceTest.java | 2 +- .../serializers/DepsSerializerTest.java | 20 +- .../tcm/ClusterMetadataMetadataKeyTest.java | 101 ++ .../tcm/ClusterMetadataSerializerTest.java | 83 ++ .../ClusterMetadataTransformationTest.java | 4 +- .../tcm/ValidatingClusterMetadataService.java | 107 ++ .../tcm/log/LogListenerNotificationTest.java | 2 +- .../tcm/sequences/DropAccordTableTest.java | 223 ++++ .../AccordMarkRejoiningTest.java | 6 +- .../transformations/AccordMarkStaleTest.java | 6 +- .../cassandra/transport/CBUtilTest.java | 4 +- .../utils/AbstractTypeGeneratorsTest.java | 53 + .../cassandra/utils/CassandraGenerators.java | 220 +++- .../utils/CassandraGeneratorsTest.java | 40 +- .../cassandra/utils/ConfigGenBuilder.java | 6 +- .../cassandra/utils/ConfigGenBuilderTest.java | 4 +- .../apache/cassandra/utils/Generators.java | 8 +- .../cassandra/utils/SimulatedMiniCluster.java | 620 +++++++++ 136 files changed, 4988 insertions(+), 4809 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/FetchMinEpoch.java create mode 100644 src/java/org/apache/cassandra/tcm/sequences/DropAccordTable.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/FinishDropAccordTable.java create mode 100644 src/java/org/apache/cassandra/tcm/transformations/PrepareDropAccordTable.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropKeyspaceTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/accord/AccordHostReplacementTest.java create mode 100644 test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java delete mode 100644 test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java rename test/{unit/accord/utilsfork/WrappedRandomSource.java => harry/main/org/apache/cassandra/harry/gen/EntropyRandomSource.java} (60%) rename test/{unit/accord/utilsfork/DefaultRandom.java => harry/main/org/apache/cassandra/harry/gen/RandomSourceEntropySource.java} (68%) delete mode 100644 test/unit/accord/utilsfork/Gen.java delete mode 100644 test/unit/accord/utilsfork/Gens.java delete mode 100644 test/unit/accord/utilsfork/Invariants.java delete mode 100644 test/unit/accord/utilsfork/Property.java delete mode 100644 test/unit/accord/utilsfork/RandomSource.java delete mode 100644 test/unit/accord/utilsfork/SeedProvider.java delete mode 100644 test/unit/accord/utilsfork/async/TimeoutUtils.java delete mode 100644 test/unit/accord/utilsfork/random/Picker.java create mode 100644 test/unit/org/apache/cassandra/schema/TableParamsTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/FetchMinEpochTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/ClusterMetadataMetadataKeyTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/ClusterMetadataSerializerTest.java create mode 100644 test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java create mode 100644 test/unit/org/apache/cassandra/tcm/sequences/DropAccordTableTest.java create mode 100644 test/unit/org/apache/cassandra/utils/AbstractTypeGeneratorsTest.java create mode 100644 test/unit/org/apache/cassandra/utils/SimulatedMiniCluster.java diff --git a/modules/accord b/modules/accord index 486cd4bc15d3..4844e64945b7 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 486cd4bc15d33500b7b896f9e4691a38d946b679 +Subproject commit 4844e64945b720c802dce11d811e25665f9da826 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 39fefbf187e3..0350e587c629 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -90,6 +90,15 @@ public enum TransactionalRangeMigration public boolean ephemeralReadEnabled = false; public boolean state_cache_listener_jfr_enabled = true; public final JournalSpec journal = new JournalSpec(); + public final MinEpochRetrySpec minEpochSyncRetry = new MinEpochRetrySpec(); + + public static class MinEpochRetrySpec extends RetrySpec + { + public MinEpochRetrySpec() + { + maxAttempts = new MaxAttempt(3); + } + } public static class JournalSpec implements Params { diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 10cf8d6b1045..bb7dc7c55186 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -25,6 +25,7 @@ import com.google.common.primitives.Ints; +import accord.utils.Invariants; import org.apache.cassandra.db.virtual.LogMessagesTable; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.FBUtilities; @@ -38,6 +39,9 @@ public enum CassandraRelevantProperties { ACCORD_AGENT_CLASS("cassandra.test.accord.agent"), + ACCORD_KEY_PARANOIA_COSTFACTOR(Invariants.KEY_PARANOIA_COSTFACTOR), + ACCORD_KEY_PARANOIA_CPU(Invariants.KEY_PARANOIA_CPU), + ACCORD_KEY_PARANOIA_MEMORY(Invariants.KEY_PARANOIA_MEMORY), ACCORD_REPAIR_RANGE_STEP_UPDATE_INTERVAL("cassandra.accord.repair.range_step_update_interval", "100"), ACQUIRE_RETRY_SECONDS("cassandra.acquire_retry_seconds", "60"), ACQUIRE_SLEEP_MS("cassandra.acquire_sleep_ms", "1000"), diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 950eb8a6dad4..1320325c3858 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -102,6 +102,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; public static final String TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE = "Accord transactions are disabled on table (See transactional_mode in table options); %s statement %s"; + public static final String TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE = "Accord transactions are disabled on table (table is being dropped); %s statement %s"; public static final String NO_COUNTERS_IN_TXNS_MESSAGE = "Counter columns cannot be accessed within a transaction; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; @@ -532,6 +533,8 @@ public CQLStatement prepare(ClientState state) if (!prepared.table.isAccordEnabled()) throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); + if (prepared.table.params.pendingDrop) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, "SELECT", prepared.source); if (prepared.table.isCounter()) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); @@ -552,6 +555,8 @@ public CQLStatement prepare(ClientState state) if (!prepared.table.isAccordEnabled()) throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); + if (prepared.table.params.pendingDrop) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, "SELECT", prepared.source); if (prepared.table.isCounter()) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); @@ -578,6 +583,7 @@ public CQLStatement prepare(ClientState state) ModificationStatement prepared = parsed.prepare(state, bindVariables); checkTrue(prepared.metadata().isAccordEnabled(), TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, prepared.type, prepared.source); + checkFalse(prepared.metadata().params.pendingDrop, TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java index 115a6a3374f1..3901e65cfb86 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java @@ -181,7 +181,7 @@ public ResultMessage execute(QueryState state) // cluster, as config can be heterogenous falling back to safe defaults may occur on some nodes. ClusterMetadata metadata = ClusterMetadata.current(); apply(metadata); - ClusterMetadata result = Schema.instance.submit(this); + ClusterMetadata result = commit(metadata); KeyspacesDiff diff = Keyspaces.diff(metadata.schema.getKeyspaces(), result.schema.getKeyspaces()); clientWarnings(diff).forEach(ClientWarn.instance::warn); @@ -206,6 +206,11 @@ public ResultMessage execute(QueryState state) return new ResultMessage.SchemaChange(schemaChangeEvent(diff)); } + protected ClusterMetadata commit(ClusterMetadata metadata) + { + return Schema.instance.submit(this); + } + private void validateKeyspaceName() { if (!SchemaConstants.isValidName(keyspaceName)) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index a7abcbada0e0..f8a24b072444 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -124,6 +124,9 @@ public Keyspaces apply(ClusterMetadata metadata) return schema; } + if (table.params.pendingDrop) + throw ire("Cannot use ALTER TABLE on a table that is being dropped."); + if (table.isView()) throw ire("Cannot use ALTER TABLE on a materialized view; use ALTER MATERIALIZED VIEW instead"); diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateViewStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateViewStatement.java index 40cbee967b60..e68e0c003ddb 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateViewStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateViewStatement.java @@ -173,6 +173,11 @@ public Keyspaces apply(ClusterMetadata metadata) viewName, tableName); } + if (table.params.pendingDrop) + throw ire("Cannot create materialized view '%s' for base table " + + "'%s' as it is being dropped.", + viewName, tableName); + /* * Process SELECT clause */ diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/DropKeyspaceStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/DropKeyspaceStatement.java index e074da54a33b..e7159cda8d41 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/DropKeyspaceStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/DropKeyspaceStatement.java @@ -17,13 +17,18 @@ */ package org.apache.cassandra.cql3.statements.schema; +import java.util.List; +import java.util.stream.Collectors; + import org.apache.cassandra.audit.AuditLogContext; import org.apache.cassandra.audit.AuditLogEntryType; import org.apache.cassandra.auth.Permission; import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.db.guardrails.Guardrails; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Event.SchemaChange; @@ -45,8 +50,30 @@ public Keyspaces apply(ClusterMetadata metadata) Guardrails.dropKeyspaceEnabled.ensureEnabled(state); Keyspaces schema = metadata.schema.getKeyspaces(); - if (schema.containsKeyspace(keyspaceName)) + KeyspaceMetadata keyspace = schema.getNullable(keyspaceName); + if (keyspace != null) + { + // check that no accord tables in the keyspace are currently in the process of being dropped + List pendingDrop = keyspace.tables.stream() + .filter(t -> t.params.pendingDrop) + .collect(Collectors.toList()); + if (!pendingDrop.isEmpty()) + throw ire("Cannot drop keyspace '%s' as it contains accord tables which are currently being dropped. " + + "Please wait for those operations to complete before dropping the keyspace. (%s)", + keyspaceName, pendingDrop.stream() + .map(Object::toString) + .collect(Collectors.joining(","))); + + List accordTables = keyspace.tables.stream() + .filter(TableMetadata::isAccordEnabled) + .collect(Collectors.toList()); + if (!accordTables.isEmpty()) + throw ire("Cannot drop keyspace '%s' as it contains accord tables. (%s)", + keyspaceName, accordTables.stream() + .map(Object::toString) + .collect(Collectors.joining(","))); return schema.without(keyspaceName); + } if (ifExists) return schema; diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/DropTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/DropTableStatement.java index 56848a8c2275..a008b4548823 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/DropTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/DropTableStatement.java @@ -27,6 +27,10 @@ import org.apache.cassandra.schema.Keyspaces.KeyspacesDiff; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.sequences.DropAccordTable.TableReference; +import org.apache.cassandra.tcm.sequences.InProgressSequences; +import org.apache.cassandra.tcm.transformations.PrepareDropAccordTable; import org.apache.cassandra.transport.Event.SchemaChange; import org.apache.cassandra.transport.Event.SchemaChange.Change; import org.apache.cassandra.transport.Event.SchemaChange.Target; @@ -48,6 +52,26 @@ public DropTableStatement(String keyspaceName, String tableName, boolean ifExist this.ifExists = ifExists; } + @Override + protected ClusterMetadata commit(ClusterMetadata metadata) + { + KeyspaceMetadata keyspace = metadata.schema.getKeyspaces().getNullable(keyspaceName); + TableMetadata table = null == keyspace + ? null + : keyspace.getTableOrViewNullable(tableName); + if (table == null // this can happen when ifExists=true... since its already been validated can skip + || !table.isAccordEnabled()) + return super.commit(metadata); + + // Multi-Step Operation + // 1) mark the table as pending delete + // 2) await for Accord to finish transactions + // 3) drop table + TableReference ref = TableReference.from(table); + ClusterMetadataService.instance().commit(new PrepareDropAccordTable(ref)); + return InProgressSequences.finishInProgressSequences(ref); + } + public Keyspaces apply(ClusterMetadata metadata) { Guardrails.dropTruncateTableEnabled.ensureEnabled(state); @@ -70,6 +94,9 @@ public Keyspaces apply(ClusterMetadata metadata) if (table.isView()) throw ire("Cannot use DROP TABLE on a materialized view. Please use DROP MATERIALIZED VIEW instead."); + if (table.isAccordEnabled() && table.params.pendingDrop) + throw ire("Table '%s.%s' is already being dropped", keyspaceName, tableName); + Iterable views = keyspace.views.forTable(table.id); if (!isEmpty(views)) { diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java index 2f07f76ad44f..279f7441e479 100644 --- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java +++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java @@ -788,7 +788,7 @@ private AbstractType collectionCellValueType(AbstractType type, IndexTarge return CompositeType.getInstance(collection.nameComparator(), collection.valueComparator()); } default: - throw new IllegalArgumentException("Unsupported collection type: " + collection.kind); + throw new IllegalArgumentException("Unsupported collection type: " + collection.kind + "; index type: " + indexType.name()); } } diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index e2d3696a62e4..42bce20c8c75 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -81,6 +81,7 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordSyncPropagator; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; +import org.apache.cassandra.service.accord.FetchMinEpoch; import org.apache.cassandra.service.accord.interop.AccordInteropApply; import org.apache.cassandra.service.accord.interop.AccordInteropCommit; import org.apache.cassandra.service.accord.interop.AccordInteropRead; @@ -356,6 +357,8 @@ public enum Verb ACCORD_INTEROP_READ_REPAIR_RSP (157, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.replySerializer, RESPONSE_HANDLER), ACCORD_INTEROP_READ_REPAIR_REQ (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_REPAIR_RSP), ACCORD_INTEROP_APPLY_REQ (160, P2, writeTimeout, IMMEDIATE, () -> AccordInteropApply.serializer, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP), + ACCORD_FETCH_MIN_EPOCH_RSP (166, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.Response.serializer, RESPONSE_HANDLER), + ACCORD_FETCH_MIN_EPOCH_REQ (165, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.serializer, () -> FetchMinEpoch.handler, ACCORD_FETCH_MIN_EPOCH_RSP), // generic failure response FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailure.serializer, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/schema/KeyspaceParams.java b/src/java/org/apache/cassandra/schema/KeyspaceParams.java index fe05b10b5d55..09afed84ba89 100644 --- a/src/java/org/apache/cassandra/schema/KeyspaceParams.java +++ b/src/java/org/apache/cassandra/schema/KeyspaceParams.java @@ -33,7 +33,7 @@ import org.apache.cassandra.tcm.serialization.MetadataSerializer; import org.apache.cassandra.tcm.serialization.Version; -import static org.apache.cassandra.tcm.serialization.Version.V2; +import static org.apache.cassandra.tcm.serialization.Version.MIN_ACCORD_VERSION; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** @@ -158,7 +158,7 @@ public void serialize(KeyspaceParams t, DataOutputPlus out, Version version) thr { ReplicationParams.serializer.serialize(t.replication, out, version); out.writeBoolean(t.durableWrites); - if (version.isAtLeast(V2)) + if (version.isAtLeast(MIN_ACCORD_VERSION)) FastPathStrategy.serializer.serialize(t.fastPath, out, version); } @@ -166,7 +166,7 @@ public KeyspaceParams deserialize(DataInputPlus in, Version version) throws IOEx { ReplicationParams params = ReplicationParams.serializer.deserialize(in, version); boolean durableWrites = in.readBoolean(); - FastPathStrategy fastPath = version.isAtLeast(V2) + FastPathStrategy fastPath = version.isAtLeast(MIN_ACCORD_VERSION) ? FastPathStrategy.serializer.deserialize(in, version) : FastPathStrategy.simple(); return new KeyspaceParams(durableWrites, params, fastPath); @@ -176,7 +176,7 @@ public long serializedSize(KeyspaceParams t, Version version) { return ReplicationParams.serializer.serializedSize(t.replication, version) + TypeSizes.sizeof(t.durableWrites) + - (version.isAtLeast(V2) ? FastPathStrategy.serializer.serializedSize(t.fastPath, version) : 0); + (version.isAtLeast(MIN_ACCORD_VERSION) ? FastPathStrategy.serializer.serializedSize(t.fastPath, version) : 0); } } } diff --git a/src/java/org/apache/cassandra/schema/ReplicationParams.java b/src/java/org/apache/cassandra/schema/ReplicationParams.java index da44292e0d63..40a92c803fe6 100644 --- a/src/java/org/apache/cassandra/schema/ReplicationParams.java +++ b/src/java/org/apache/cassandra/schema/ReplicationParams.java @@ -70,6 +70,11 @@ public static ReplicationParams fromStrategy(AbstractReplicationStrategy strateg return new ReplicationParams(strategy.getClass(), strategy.configOptions); } + public static ReplicationParams copy(AbstractReplicationStrategy strategy) + { + return new ReplicationParams(strategy.getClass(), strategy.configOptions); + } + public static ReplicationParams local() { return new ReplicationParams(LocalStrategy.class, ImmutableMap.of()); diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 8cdc41f6871d..569fc27b1c23 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -75,7 +75,8 @@ public enum Option READ_REPAIR, FAST_PATH, TRANSACTIONAL_MODE, - TRANSACTIONAL_MIGRATION_FROM; + TRANSACTIONAL_MIGRATION_FROM, + PENDING_DROP; @Override public String toString() @@ -106,6 +107,7 @@ public String toString() public final FastPathStrategy fastPath; public final TransactionalMode transactionalMode; public final TransactionalMigrationFromMode transactionalMigrationFrom; + public final boolean pendingDrop; private TableParams(Builder builder) { @@ -133,6 +135,7 @@ private TableParams(Builder builder) fastPath = builder.fastPath; transactionalMode = builder.transactionalMode != null ? builder.transactionalMode : TransactionalMode.off; transactionalMigrationFrom = builder.transactionalMigrationFrom; + pendingDrop = builder.pendingDrop; } public static Builder builder() @@ -163,7 +166,8 @@ public static Builder builder(TableParams params) .readRepair(params.readRepair) .fastPath(params.fastPath) .transactionalMode(params.transactionalMode) - .transactionalMigrationFrom(params.transactionalMigrationFrom); + .transactionalMigrationFrom(params.transactionalMigrationFrom) + .pendingDrop(params.pendingDrop); } public Builder unbuild() @@ -257,7 +261,8 @@ public boolean equals(Object o) && readRepair == p.readRepair && fastPath.equals(fastPath) && transactionalMode == p.transactionalMode - && transactionalMigrationFrom == p.transactionalMigrationFrom; + && transactionalMigrationFrom == p.transactionalMigrationFrom + && pendingDrop == p.pendingDrop; } @Override @@ -284,7 +289,8 @@ public int hashCode() readRepair, fastPath, transactionalMode, - transactionalMigrationFrom); + transactionalMigrationFrom, + pendingDrop); } @Override @@ -314,6 +320,7 @@ public String toString() .add(Option.FAST_PATH.toString(), fastPath) .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) + .add(PENDING_DROP.toString(), pendingDrop) .toString(); } @@ -401,6 +408,7 @@ public static final class Builder private FastPathStrategy fastPath = FastPathStrategy.inheritKeyspace(); private TransactionalMode transactionalMode = TransactionalMode.off; public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; + public boolean pendingDrop = false; public Builder() { @@ -542,6 +550,12 @@ public Builder extensions(Map val) extensions = ImmutableMap.copyOf(val); return this; } + + public Builder pendingDrop(boolean pendingDrop) + { + this.pendingDrop = pendingDrop; + return this; + } } public static class Serializer implements MetadataSerializer @@ -559,10 +573,7 @@ public void serialize(TableParams t, DataOutputPlus out, Version version) throws out.writeUTF(t.speculativeRetry.toString()); out.writeUTF(t.additionalWritePolicy.toString()); if (version.isAtLeast(Version.V2)) - { out.writeUTF(t.memtable.configurationKey()); - FastPathStrategy.serializer.serialize(t.fastPath, out, version); - } serializeMap(t.caching.asMap(), out); serializeMap(t.compaction.asMap(), out); serializeMap(t.compression.asMap(), out); @@ -573,8 +584,13 @@ public void serialize(TableParams t, DataOutputPlus out, Version version) throws { out.writeBoolean(t.allowAutoSnapshot); out.writeBoolean(t.incrementalBackups); + } + if (version.isAtLeast(Version.MIN_ACCORD_VERSION)) + { + FastPathStrategy.serializer.serialize(t.fastPath, out, version); out.writeInt(t.transactionalMode.ordinal()); out.writeInt(t.transactionalMigrationFrom.ordinal()); + out.writeBoolean(t.pendingDrop); } } @@ -592,7 +608,6 @@ public TableParams deserialize(DataInputPlus in, Version version) throws IOExcep .speculativeRetry(SpeculativeRetryPolicy.fromString(in.readUTF())) .additionalWritePolicy(SpeculativeRetryPolicy.fromString(in.readUTF())) .memtable(version.isAtLeast(Version.V2) ? MemtableParams.get(in.readUTF()) : MemtableParams.DEFAULT) - .fastPath(version.isAtLeast(Version.V2) ? FastPathStrategy.serializer.deserialize(in, version) : FastPathStrategy.simple()) .caching(CachingParams.fromMap(deserializeMap(in))) .compaction(CompactionParams.fromMap(deserializeMap(in))) .compression(CompressionParams.fromMap(deserializeMap(in))) @@ -600,15 +615,20 @@ public TableParams deserialize(DataInputPlus in, Version version) throws IOExcep .cdc(in.readBoolean()) .readRepair(ReadRepairStrategy.fromString(in.readUTF())) .allowAutoSnapshot(!version.isAtLeast(Version.V4) || in.readBoolean()) - .incrementalBackups(!version.isAtLeast(Version.V4) || in.readBoolean()) - .transactionalMode(version.isAtLeast(Version.V4) ? TransactionalMode.fromOrdinal(in.readInt()) : TransactionalMode.off) - .transactionalMigrationFrom(version.isAtLeast(Version.V4) ? TransactionalMigrationFromMode.fromOrdinal(in.readInt()) : TransactionalMigrationFromMode.off); + .incrementalBackups(!version.isAtLeast(Version.V4) || in.readBoolean()); + if (version.isAtLeast(Version.MIN_ACCORD_VERSION)) + { + builder.fastPath(FastPathStrategy.serializer.deserialize(in, version)) + .transactionalMode(TransactionalMode.fromOrdinal(in.readInt())) + .transactionalMigrationFrom(TransactionalMigrationFromMode.fromOrdinal(in.readInt())) + .pendingDrop(in.readBoolean()); + } return builder.build(); } public long serializedSize(TableParams t, Version version) { - return sizeof(t.comment) + + long size = sizeof(t.comment) + sizeof(t.bloomFilterFpChance) + sizeof(t.crcCheckChance) + sizeof(t.gcGraceSeconds) + @@ -619,7 +639,6 @@ public long serializedSize(TableParams t, Version version) sizeof(t.speculativeRetry.toString()) + sizeof(t.additionalWritePolicy.toString()) + (version.isAtLeast(Version.V2) ? sizeof(t.memtable.configurationKey()) : 0) + - (version.isAtLeast(Version.V2) ? FastPathStrategy.serializer.serializedSize(t.fastPath, version) : 0) + serializedSizeMap(t.caching.asMap()) + serializedSizeMap(t.compaction.asMap()) + serializedSizeMap(t.compression.asMap()) + @@ -627,9 +646,15 @@ public long serializedSize(TableParams t, Version version) sizeof(t.cdc) + sizeof(t.readRepair.name()) + (version.isAtLeast(Version.V4) ? sizeof(t.allowAutoSnapshot) : 0) + - (version.isAtLeast(Version.V4) ? sizeof(t.incrementalBackups) : 0) + - (version.isAtLeast(Version.V4) ? sizeof(t.transactionalMode.ordinal()) : 0) + - (version.isAtLeast(Version.V4) ? sizeof(t.transactionalMigrationFrom.ordinal()) : 0); + (version.isAtLeast(Version.V4) ? sizeof(t.incrementalBackups) : 0); + if (version.isAtLeast(Version.MIN_ACCORD_VERSION)) + { + size += FastPathStrategy.serializer.serializedSize(t.fastPath, version) + + sizeof(t.transactionalMode.ordinal()) + + sizeof(t.transactionalMigrationFrom.ordinal()) + + sizeof(t.pendingDrop); + } + return size; } private void serializeMap(Map map, DataOutputPlus out) throws IOException diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 1e6cb1d76919..e0d1d84020ab 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -19,8 +19,10 @@ package org.apache.cassandra.service.accord; import java.util.Objects; +import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -30,6 +32,7 @@ import accord.impl.AbstractConfigurationService; import accord.local.Node; import accord.primitives.Ranges; +import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; import accord.utils.async.AsyncResult; @@ -207,12 +210,18 @@ protected EpochHistory createEpochHistory() return new EpochHistory(); } + @VisibleForTesting public synchronized void start() + { + start(ignore -> {}); + } + + public synchronized void start(Consumer callback) { Invariants.checkState(state == State.INITIALIZED, "Expected state to be INITIALIZED but was %s", state); state = State.LOADING; - updateMapping(ClusterMetadata.current()); EndpointMapping snapshot = mapping; + //TODO (restart): if there are topologies loaded then there is likely failures if reporting is needed, as mapping is not setup yet diskState = diskStateManager.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { if (topology != null) reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); @@ -230,6 +239,7 @@ public synchronized void start() receiveRedundant(redundant, epoch); })); state = State.STARTED; + callback.accept(diskState.isEmpty() ? OptionalLong.empty() : OptionalLong.of(diskState.maxEpoch)); ClusterMetadataService.instance().log().addListener(this); } @@ -293,27 +303,54 @@ synchronized void updateMapping(ClusterMetadata metadata) private void reportMetadata(ClusterMetadata metadata) { - Stage.MISC.submit(() -> { - synchronized (AccordConfigurationService.this) + Stage.MISC.submit(() -> reportMetadataInternal(metadata)); + } + + synchronized void reportMetadataInternal(ClusterMetadata metadata) + { + updateMapping(metadata); + Topology topology = AccordTopology.createAccordTopology(metadata); + if (Invariants.isParanoid()) + { + for (Node.Id node : topology.nodes()) { - updateMapping(metadata); - Topology topology = AccordTopology.createAccordTopology(metadata); - Topology current = isEmpty() ? Topology.EMPTY : currentTopology(); - reportTopology(topology); - Sets.SetView removedNodes = Sets.difference(current.nodes(), topology.nodes()); - if (!removedNodes.isEmpty()) - onNodesRemoved(topology.epoch(), removedNodes); + if (mapping.mappedEndpointOrNull(node) == null) + throw new IllegalStateException("Epoch " + topology.epoch() + " has node " + node + " but mapping does not!"); } - }); + } + Topology current = isEmpty() ? Topology.EMPTY : currentTopology(); + reportTopology(topology); + // for all nodes removed, or pending removal, mark them as removed so we don't wait on their replies + Sets.SetView removedNodes = Sets.difference(current.nodes(), topology.nodes()); + if (!removedNodes.isEmpty()) + { + onNodesRemoved(topology.epoch(), removedNodes); + for (Node.Id node : removedNodes) + { + if (shareShard(current, node, localId)) + AccordService.instance().tryMarkRemoved(current, node); + } + } + } + + private static boolean shareShard(Topology current, Node.Id target, Node.Id self) + { + for (Shard shard : current.shards()) + { + if (!shard.contains(target)) continue; + if (shard.contains(self)) return true; + } + return false; } - private synchronized void onNodesRemoved(long epoch, Set removed) + public synchronized void onNodesRemoved(long epoch, Set removed) { + if (removed.isEmpty()) return; syncPropagator.onNodesRemoved(removed); for (long oldEpoch : nonCompletedEpochsBefore(epoch)) { for (Node.Id node : removed) - receiveRemoteSyncComplete(node, oldEpoch); + receiveRemoteSyncCompletePreListenerNotify(node, oldEpoch); } listeners.forEach(l -> l.onRemoveNodes(epoch, removed)); } @@ -350,7 +387,6 @@ void maybeReportMetadata(ClusterMetadata metadata) @Override public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean fromSnapshot) { - maybeReportMetadata(prev); maybeReportMetadata(next); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 2b60bfcd514b..8dabbc7bb3f9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -20,9 +20,12 @@ import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import java.util.function.BiFunction; import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; + import accord.api.Agent; import accord.api.DataStore; import accord.api.Key; @@ -79,6 +82,12 @@ public static AccordSafeCommandStore create(PreLoadContext preLoadContext, return new AccordSafeCommandStore(preLoadContext, commands, timestampsForKey, commandsForKey, commandsForRanges, commandStore); } + @VisibleForTesting + public Set commandsForKeysKeys() + { + return commandsForKeys.keySet(); + } + @Override protected AccordSafeCommand getCommandInternal(TxnId txnId) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 9026de81a929..a90ca24351e3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -18,10 +18,15 @@ package org.apache.cassandra.service.accord; +import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -38,79 +43,102 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import com.google.common.base.Throwables; +import com.google.common.collect.Sets; import com.google.common.primitives.Ints; + +import accord.coordinate.Barrier; +import accord.coordinate.CoordinateSyncPoint; +import accord.coordinate.Exhausted; +import accord.coordinate.FailureAccumulator; +import accord.coordinate.Invalidated; +import accord.coordinate.TopologyMismatch; +import accord.impl.CoordinateDurabilityScheduling; +import accord.local.Command; +import accord.local.PreLoadContext; +import accord.primitives.Ranges; +import accord.primitives.SyncPoint; +import accord.topology.Topology; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.cql3.statements.RequestValidations; +import org.apache.cassandra.exceptions.RequestExecutionException; +import org.apache.cassandra.repair.SharedContext; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; +import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.ownership.DataPlacement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.BarrierType; import accord.api.LocalConfig; import accord.api.Result; -import accord.coordinate.Barrier; -import accord.coordinate.CoordinateSyncPoint; import accord.coordinate.CoordinationFailed; -import accord.coordinate.Exhausted; -import accord.coordinate.FailureAccumulator; import accord.coordinate.Preempted; import accord.coordinate.Timeout; -import accord.coordinate.TopologyMismatch; +import accord.api.RoutingKey; +import accord.coordinate.ExecuteSyncPoint; +import accord.coordinate.tracking.AllTracker; +import accord.coordinate.tracking.RequestStatus; import accord.impl.AbstractConfigurationService; -import accord.impl.CoordinateDurabilityScheduling; import accord.impl.DefaultLocalListeners; import accord.impl.DefaultRemoteListeners; import accord.impl.DefaultRequestTimeouts; import accord.impl.SizeOfIntersectionSorter; -import accord.local.Command; -import accord.local.CommandStore; import accord.impl.progresslog.DefaultProgressLogs; +import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.DurableBefore; import accord.local.KeyHistory; import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeTimeService; -import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.SaveStatus; import accord.local.ShardDistributor.EvenSplit; import accord.local.Status; import accord.local.cfk.CommandsForKey; +import accord.messages.Callback; +import accord.messages.ReadData; import accord.messages.Request; +import accord.messages.WaitUntilApplied; import accord.primitives.Keys; -import accord.primitives.Ranges; import accord.primitives.Seekable; import accord.primitives.Seekables; -import accord.primitives.SyncPoint; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; import accord.primitives.TxnId; +import accord.topology.Topologies; import accord.topology.TopologyManager; import accord.utils.DefaultRandom; import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.concurrent.Shutdownable; -import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.statements.RequestValidations; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; +import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.exceptions.ReadTimeoutException; -import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; @@ -118,20 +146,15 @@ import org.apache.cassandra.service.accord.api.AccordTopologySorter; import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; -import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; -import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.TableMigrationState; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.tracing.Tracing; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Blocking; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; @@ -276,6 +299,19 @@ public List debugTxnBlockedGraph(TxnId txnId) { return Collections.emptyList(); } + + @Nullable + @Override + public Long minEpoch(Collection ranges) + { + return null; + } + + @Override + public void tryMarkRemoved(Topology topology, Id node) + { + + } }; private static volatile IAccordService instance = null; @@ -350,6 +386,7 @@ private AccordService(Id localId) Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); logger.info("Starting accord with nodeId {}", localId); AccordAgent agent = FBUtilities.construct(CassandraRelevantProperties.ACCORD_AGENT_CLASS.getString(AccordAgent.class.getName()), "AccordAgent"); + agent.setNodeId(localId); this.configService = new AccordConfigurationService(localId); this.fastPathCoordinator = AccordFastPathCoordinator.create(localId, configService); this.messageSink = new AccordMessageSink(agent, configService); @@ -387,10 +424,51 @@ public synchronized void startup() if (state != State.INIT) return; journal.start(node); - configService.start(); + ClusterMetadataService cms = ClusterMetadataService.instance(); + class Ref { List historic = Collections.emptyList();} + Ref ref = new Ref(); + configService.start((optMaxEpoch -> { + // when max epoch isn't know, this means the node started for the first time; check cluster's min epoch + // when max epoch is known, then there is no reason to discover min epoch (we already did it) + if (optMaxEpoch.isPresent()) return; + List historic = ref.historic = discoverHistoric(node, cms); + for (ClusterMetadata m : historic) + configService.reportMetadataInternal(m); + })); + ClusterMetadata current = cms.metadata(); + if (!ref.historic.isEmpty()) + { + List historic = ref.historic; + long lastHistoric = ref.historic.get(historic.size() - 1).epoch.getEpoch(); + if (lastHistoric + 1 < current.epoch.getEpoch()) + { + // new epochs added while loading... load the deltas + for (ClusterMetadata metadata : tcmLoadRange(lastHistoric + 1, current.epoch.getEpoch())) + { + historic.add(metadata); + configService.reportMetadataInternal(metadata); + } + } + + // sync doesn't happen when this node isn't in the epoch + //TODO (now, correctness): sync points use "closed" and not "syncComplete", so need to call TM.epochRedundant or TM.onEpochClosed + // epochRedundant implies all txn that have been proposed for this epoch have been executed "globally" - we don't have this knowlege + // epochClosed implies no "new" txn can be proposed + for (ClusterMetadata m : historic) + { + Topology t = AccordTopology.createAccordTopology(m); + long epoch = t.epoch(); + for (Id id : t.nodes()) + node.onRemoteSyncComplete(id, epoch); + //TODO (correctness): is this true? + node.onEpochClosed(t.ranges(), t.epoch()); + node.onEpochRedundant(t.ranges(), t.epoch()); + } + } + configService.reportMetadataInternal(current); fastPathCoordinator.start(); - ClusterMetadataService.instance().log().addListener(fastPathCoordinator); + cms.log().addListener(fastPathCoordinator); durabilityScheduling.setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); @@ -399,6 +477,74 @@ public synchronized void startup() state = State.STARTED; } + private List discoverHistoric(Node node, ClusterMetadataService cms) + { + ClusterMetadata current = cms.metadata(); + Topology topology = AccordTopology.createAccordTopology(current); + Ranges localRanges = topology.rangesForNode(node.id()); + if (!localRanges.isEmpty()) // already joined, nothing to see here + return Collections.emptyList(); + + Map> peers = new HashMap<>(); + for (KeyspaceMetadata keyspace : current.schema.getKeyspaces()) + { + List tables = keyspace.tables.stream().filter(TableMetadata::requiresAccordSupport).collect(Collectors.toList()); + if (tables.isEmpty()) + continue; + DataPlacement placement = current.placements.get(keyspace.params.replication); + DataPlacement whenSettled = current.writePlacementAllSettled(keyspace); + Sets.SetView alive = Sets.intersection(whenSettled.writes.byEndpoint().keySet(), placement.writes.byEndpoint().keySet()); + InetAddressAndPort self = FBUtilities.getBroadcastAddressAndPort(); + whenSettled.writes.forEach((range, group) -> { + if (group.endpoints().contains(self)) + { + for (InetAddressAndPort peer : group.endpoints()) + { + if (!alive.contains(peer)) continue; + for (TableMetadata table : tables) + peers.computeIfAbsent(peer, i -> new HashSet<>()).add(AccordTopology.fullRange(table.id)); + } + } + }); + } + if (peers.isEmpty()) + return Collections.emptyList(); + + Long minEpoch = findMinEpoch(SharedContext.Global.instance, peers); + if (minEpoch == null) + return Collections.emptyList(); + return tcmLoadRange(minEpoch, current.epoch.getEpoch()); + } + + private static List tcmLoadRange(long min, long max) + { + List afterLoad = ClusterMetadataService.instance().processor().reconstructFull(Epoch.create(min - 1), Epoch.create(max)); + while (!afterLoad.isEmpty() && afterLoad.get(0).epoch.getEpoch() < min) + afterLoad.remove(0); + assert !afterLoad.isEmpty() : String.format("TCM was unable to return the needed epochs: %d -> %d", min, max); + assert afterLoad.get(0).epoch.getEpoch() == min : String.format("Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); + assert afterLoad.get(afterLoad.size() - 1).epoch.getEpoch() == max : String.format("Unexpected epoch: expected %d but given %d", max, afterLoad.get(afterLoad.size() - 1).epoch.getEpoch()); + return afterLoad; + } + + @VisibleForTesting + static Long findMinEpoch(SharedContext context, Map> peers) + { + try + { + return FetchMinEpoch.fetch(context, peers).get(); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e.getCause()); + } + } + @Override public IVerbHandler verbHandler() { @@ -437,12 +583,14 @@ public IVerbHandler verbHandler() if (cause instanceof Timeout) { TxnId txnId = ((Timeout) cause).txnId(); + ((AccordAgent) node.agent()).onFailedBarrier(txnId, keysOrRanges, cause); metrics.timeouts.mark(); throw newBarrierTimeout(txnId, barrierType, isForWrite, keysOrRanges); } if (cause instanceof Preempted) { TxnId txnId = ((Preempted) cause).txnId(); + ((AccordAgent) node.agent()).onFailedBarrier(txnId, keysOrRanges, cause); //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. // Protocol also doesn't have a way to denote "unknown" outcome, so using a timeout as the closest match @@ -451,6 +599,7 @@ public IVerbHandler verbHandler() if (cause instanceof Exhausted) { TxnId txnId = ((Exhausted) cause).txnId(); + ((AccordAgent) node.agent()).onFailedBarrier(txnId, keysOrRanges, cause); // this case happens when a non-timeout exception is seen, and we are unable to move forward metrics.failures.mark(); throw newBarrierExhausted(txnId, barrierType, isForWrite, keysOrRanges); @@ -973,6 +1122,30 @@ private static CommandStoreTxnBlockedGraph.TxnState populate(CommandStoreTxnBloc return cmdTxnState.build(); } + @Nullable + @Override + public Long minEpoch(Collection ranges) + { + return node.topology().minEpoch(); + } + + @Override + public void tryMarkRemoved(Topology topology, Id target) + { + if (node.commandStores().count() == 0) return; // when starting up stores can be empty, so ignore + Ranges ranges = topology.rangesForNode(target); + if (ranges.isEmpty()) return; + tryMarkRemoved(ranges, 0).begin(node().agent()); + } + + private AsyncChain> tryMarkRemoved(Ranges ranges, int attempt) + { + return CoordinateSyncPoint.exclusive(node, ranges) + .recover(t -> + //TODO (operability): make this configurable / monitorable? + attempt <= 3 && t instanceof Invalidated || t instanceof Preempted || t instanceof Timeout ? tryMarkRemoved(ranges, attempt + 1) : null); + } + public Node node() { return node; @@ -1073,4 +1246,227 @@ public CompactionInfo getCompactionInfo() })); return new CompactionInfo(redundantBefores, ranges, durableBefore.get()); } + + @Override + public void awaitTableDrop(TableId id) + { + // Need to make sure no existing txn are still being processed for this table... this is only used by DROP TABLE so NEW txn are expected to be blocked, so just need to "wait" for existing ones to complete + Topology topology = node.topology().current(); + List ranges = topology.reduce(new ArrayList<>(), + s -> ((TokenRange) s.range).table().equals(id), + (accum, s) -> { + accum.add((TokenRange) s.range); + return accum; + }); + if (ranges.isEmpty()) return; // nothing to see here + + ColumnFamilyStore cfs = Schema.instance.getColumnFamilyStoreInstance(id); + Invariants.checkState(cfs != null, "Unable to find table %s", id); + BigInteger targetSplitSize = BigInteger.valueOf(Math.max(1, cfs.estimateKeys() / 1_000_000)); + + List> syncs = new ArrayList<>(ranges.size()); + for (TokenRange range : ranges) + syncs.add(awaitTableDrop(cfs, range, targetSplitSize)); + AsyncChain all = AsyncChains.allOf(syncs); + try + { + AsyncChains.getBlocking(all); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + } + + private AsyncChain awaitTableDrop(ColumnFamilyStore cfs, TokenRange range, BigInteger targetSplitSize) + { + List splits = split(cfs, range, targetSplitSize); + List> syncs = new ArrayList<>(splits.size()); + for (TokenRange tr : splits) + syncs.add(awaitTableDropSubRange(tr)); + return AsyncChains.allOf(syncs); + } + + private List split(ColumnFamilyStore cfs, TokenRange range, BigInteger targetSplitSize) + { + if (targetSplitSize.equals(BigInteger.ONE)) return Collections.singletonList(range); + + AccordSplitter splitter = cfs.getPartitioner().accordSplitter().apply(Ranges.single(range)); + RoutingKey remainingStart = range.start(); + + BigInteger rangeSize = splitter.sizeOf(range); + BigInteger divide = splitter.divide(rangeSize, targetSplitSize); + BigInteger rangeStep = divide.equals(BigInteger.ZERO) ? rangeSize : BigInteger.ONE.max(divide); + BigInteger offset = BigInteger.ZERO; + List result = new ArrayList<>(); + + while (splitter.compare(offset, rangeSize) < 0) + { + BigInteger remaining = rangeSize.subtract(offset); + BigInteger length = remaining.min(rangeStep); + + TokenRange next = splitter.subRange(range, offset, splitter.add(offset, length)); + result.add(next); + remainingStart = next.end(); + offset = offset.add(length); + } + + if (!remainingStart.equals(range.end())) + result.add(range.newRange(remainingStart, range.end())); + assert result.get(0).start().equals(range.start()) : String.format("Starting range %s does not have the same start as %s", result.get(0), range); + assert result.get(result.size() - 1).end().equals(range.end()) : String.format("Ending range %s does not have the same end as %s", result.get(result.size() - 1), range); + return result; + } + + private AsyncChain awaitTableDropSubRange(TokenRange range) + { + return awaitTableDropSubRange(Ranges.single(range), 0); + } + + private AsyncChain awaitTableDropSubRange(Ranges ranges, int attempt) + { + return exclusiveSyncPoint(ranges, attempt) + .flatMap(s -> s == null ? AsyncChains.success(null) : Await.coordinate(node, s)); + } + + private AsyncChain> exclusiveSyncPoint(Ranges ranges, int attempt) + { + //TODO (on merge): CASSANDRA-19769 has the same logic... should this be refactored? Would make it nice so we could split the range on retries? + return CoordinateSyncPoint.exclusive(node, ranges) + .recover(t -> { + //TODO (operability): make this configurable / monitorable? + if (attempt > 3) return null; + switch (shouldRetry(t)) + { + case SUCCESS: + return AsyncChains.success(null); + case RETRY: + return exclusiveSyncPoint(ranges, attempt + 1); + case FAIL: + return null; + default: + throw new UnsupportedOperationException(); + } + }); + } + + private enum RetryDecission { SUCCESS, RETRY, FAIL } + private static RetryDecission shouldRetry(Throwable t) + { + if (t.getClass() == ExecuteSyncPoint.SyncPointErased.class) + return RetryDecission.SUCCESS; + if (t instanceof Invalidated || t instanceof Preempted || t instanceof Timeout) + return RetryDecission.RETRY; + return RetryDecission.FAIL; + } + + // TODO (duplication): this is 95% of accord.coordinate.CoordinateShardDurable + // we already report all this information to EpochState; would be better to use that + // Taken from ListStore... + private static class Await extends AsyncResults.SettableResult> implements Callback + { + private final Node node; + private final AllTracker tracker; + private final SyncPoint exclusiveSyncPoint; + + private Await(Node node, SyncPoint exclusiveSyncPoint) + { + Topologies topologies = node.topology().forEpoch(exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.sourceEpoch()); + this.node = node; + this.tracker = new AllTracker(topologies); + this.exclusiveSyncPoint = exclusiveSyncPoint; + } + + public static AsyncChain coordinate(Node node, SyncPoint sp) + { + return node.withEpoch(sp.sourceEpoch(), () -> { + Await coordinate = new Await(node, sp); + coordinate.start(); + AsyncChain chain = coordinate.map(i -> null); + return chain.recover(t -> { + switch (shouldRetry(t)) + { + case SUCCESS: return AsyncChains.success(null); + case RETRY: return coordinate(node, sp); + case FAIL: return null; + default: throw new UnsupportedOperationException(); + } + }); + }); + } + + private void start() + { + node.send(tracker.nodes(), to -> new WaitUntilApplied(to, tracker.topologies(), exclusiveSyncPoint.syncId, exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.syncId.epoch()), this); + } + @Override + public void onSuccess(Node.Id from, ReadData.ReadReply reply) + { + if (!reply.isOk()) + { + ReadData.CommitOrReadNack nack = (ReadData.CommitOrReadNack) reply; + switch (nack) + { + default: throw new AssertionError("Unhandled: " + reply); + + case Insufficient: + CoordinateSyncPoint.sendApply(node, from, exclusiveSyncPoint); + return; + case Rejected: + tryFailure(new RuntimeException(nack.name())); + case Redundant: + tryFailure(new ExecuteSyncPoint.SyncPointErased()); + return; + case Invalid: + tryFailure(new Invalidated(exclusiveSyncPoint.syncId, exclusiveSyncPoint.homeKey)); + return; + } + } + else + { + if (tracker.recordSuccess(from) == RequestStatus.Success) + { + node.configService().reportEpochRedundant(exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.syncId.epoch()); + trySuccess(exclusiveSyncPoint); + } + } + } + + private Throwable cause; + + @Override + public void onFailure(Node.Id from, Throwable failure) + { + synchronized (this) + { + if (cause == null) cause = failure; + else + { + try + { + cause.addSuppressed(failure); + } + catch (Throwable t) + { + // can not always add suppress + node.agent().onUncaughtException(failure); + } + } + failure = cause; + } + if (tracker.recordFailure(from) == RequestStatus.Failed) + tryFailure(failure); + } + + @Override + public void onCallbackFailure(Node.Id from, Throwable failure) + { + tryFailure(failure); + } + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java b/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java index 2502fa1a6324..f729759fe6b9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStaleReplicas.java @@ -44,7 +44,7 @@ public class AccordStaleReplicas implements MetadataValue private final Set staleIds; private final Epoch lastModified; - AccordStaleReplicas(Set staleIds, Epoch lastModified) + public AccordStaleReplicas(Set staleIds, Epoch lastModified) { this.staleIds = staleIds; this.lastModified = lastModified; diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index 45f3b3fd76dd..eb8f660b0af4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -65,16 +65,17 @@ public static Node.Id tcmIdToAccord(NodeId nodeId) private static class ShardLookup extends HashMap { - private Shard createOrReuse(accord.primitives.Range range, SortedArrayList nodes, Set fastPathElectorate, Set joining) + private Shard createOrReuse(boolean pendingRemoval, accord.primitives.Range range, SortedArrayList nodes, Set fastPathElectorate, Set joining) { Shard prev = get(range); if (prev != null + && prev.pendingRemoval == pendingRemoval && Objects.equals(prev.nodes, nodes) && Objects.equals(prev.fastPathElectorate, fastPathElectorate) && Objects.equals(prev.joining, joining)) return prev; - return new Shard(range, nodes, fastPathElectorate, joining); + return new Shard(range, nodes, fastPathElectorate, joining, pendingRemoval); } } @@ -109,7 +110,7 @@ Shard createForTable(TableMetadata metadata, Set unavailable, Map fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); - return lookup.createOrReuse(tokenRange, nodes, fastPath, pending); + return lookup.createOrReuse(metadata.params.pendingDrop, tokenRange, nodes, fastPath, pending); } private static KeyspaceShard forRange(KeyspaceMetadata keyspace, Range range, Directory directory, VersionedEndpoints.ForRange reads, VersionedEndpoints.ForRange writes) diff --git a/src/java/org/apache/cassandra/service/accord/FetchMinEpoch.java b/src/java/org/apache/cassandra/service/accord/FetchMinEpoch.java new file mode 100644 index 000000000000..0e40afc6c12d --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/FetchMinEpoch.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterators; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.repair.SharedContext; +import org.apache.cassandra.utils.Backoff; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + +public class FetchMinEpoch +{ + public static final IVersionedSerializer serializer = new IVersionedSerializer<>() + { + + @Override + public void serialize(FetchMinEpoch t, DataOutputPlus out, int version) throws IOException + { + out.writeUnsignedVInt32(t.ranges.size()); + for (TokenRange range : t.ranges) + TokenRange.serializer.serialize(range, out, version); + } + + @Override + public FetchMinEpoch deserialize(DataInputPlus in, int version) throws IOException + { + int size = in.readUnsignedVInt32(); + List ranges = new ArrayList<>(size); + for (int i = 0; i < size; i++) + ranges.add(TokenRange.serializer.deserialize(in, version)); + return new FetchMinEpoch(ranges); + } + + @Override + public long serializedSize(FetchMinEpoch t, int version) + { + long size = TypeSizes.sizeofUnsignedVInt(t.ranges.size()); + for (TokenRange range : t.ranges) + size += TokenRange.serializer.serializedSize(range, version); + return size; + } + }; + public static final IVerbHandler handler = new IVerbHandler() + { + @Override + public void doVerb(Message message) throws IOException + { + Long epoch = AccordService.instance().minEpoch(message.payload.ranges); + MessagingService.instance().respond(new Response(epoch), message); + } + }; + public final Collection ranges; + + public FetchMinEpoch(Collection ranges) + { + this.ranges = ranges; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FetchMinEpoch that = (FetchMinEpoch) o; + return Objects.equals(ranges, that.ranges); + } + + @Override + public int hashCode() + { + return Objects.hash(ranges); + } + + @Override + public String toString() + { + return "FetchMinEpoch{" + + "ranges=" + ranges + + '}'; + } + + public static Future fetch(SharedContext context, Map> peers) + { + List> accum = new ArrayList<>(peers.size()); + for (Map.Entry> e : peers.entrySet()) + accum.add(fetch(context, e.getKey(), e.getValue())); + return FutureCombiner.successfulOf(accum).map(ls -> { + Long min = null; + for (Long l : ls) + { + if (l == null) continue; + if (min == null) min = l; + else min = Math.min(min, l); + } + return min; + }); + } + + @VisibleForTesting + static Future fetch(SharedContext context, InetAddressAndPort to, Set value) + { + FetchMinEpoch req = new FetchMinEpoch(value); + Backoff backoff = Backoff.fromConfig(context, DatabaseDescriptor.getAccord().minEpochSyncRetry); + return context.messaging().sendWithRetries(backoff, context.optionalTasks()::schedule, + Verb.ACCORD_FETCH_MIN_EPOCH_REQ, req, + Iterators.cycle(to), + (i1, i2, i3) -> true, + (i1, i2, i3, i4) -> null) + .map(m -> m.payload.minEpoch); + } + + public static class Response + { + public static final IVersionedSerializer serializer = new IVersionedSerializer() + { + @Override + public void serialize(Response t, DataOutputPlus out, int version) throws IOException + { + out.writeBoolean(t.minEpoch != null); + if (t.minEpoch != null) + out.writeUnsignedVInt(t.minEpoch); + } + + @Override + public Response deserialize(DataInputPlus in, int version) throws IOException + { + boolean notNull = in.readBoolean(); + return new Response(notNull ? in.readUnsignedVInt() : null); + } + + @Override + public long serializedSize(Response t, int version) + { + int size = TypeSizes.BOOL_SIZE; + if (t.minEpoch != null) + size += TypeSizes.sizeofUnsignedVInt(t.minEpoch); + return size; + } + }; + @Nullable + public final Long minEpoch; + + public Response(@Nullable Long minEpoch) + { + this.minEpoch = minEpoch; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Response response = (Response) o; + return Objects.equals(minEpoch, response.minEpoch); + } + + @Override + public int hashCode() + { + return Objects.hash(minEpoch); + } + + @Override + public String toString() + { + return "Response{" + + "minEpoch=" + minEpoch + + '}'; + } + } +} diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 2daa31e27228..a27a29f10496 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -31,6 +31,7 @@ import accord.api.BarrierType; import accord.local.CommandStores; import accord.local.DurableBefore; +import accord.local.Node; import accord.local.Node.Id; import accord.local.RedundantBefore; import accord.messages.Request; @@ -38,6 +39,7 @@ import accord.primitives.Seekables; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.topology.Topology; import accord.topology.TopologyManager; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.db.ColumnFamilyStore; @@ -47,6 +49,7 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.tcm.Epoch; @@ -54,6 +57,8 @@ import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; +import java.util.Collection; + import static com.google.common.base.Preconditions.checkNotNull; @@ -150,4 +155,12 @@ public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2O default Id nodeId() { throw new UnsupportedOperationException(); } List debugTxnBlockedGraph(TxnId txnId); + @Nullable + Long minEpoch(Collection ranges); + + void tryMarkRemoved(Topology topology, Node.Id node); + default void awaitTableDrop(TableId id) + { + + } } diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index be3eaa80ca09..aed027097970 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -48,13 +48,30 @@ public TokenRange(AccordRoutingKey start, AccordRoutingKey end) public TableId table() { - return ((AccordRoutingKey) start()).table(); + return start().table(); + } + + @Override + public AccordRoutingKey start() + { + return (AccordRoutingKey) super.start(); + } + + @Override + public AccordRoutingKey end() + { + return (AccordRoutingKey) super.end(); + } + + public boolean isFullRange() + { + return start().kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL && end().kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.SENTINEL; } @VisibleForTesting public Range withTable(TableId table) { - return new TokenRange(((AccordRoutingKey) start()).withTable(table), ((AccordRoutingKey) end()).withTable(table)); + return new TokenRange(start().withTable(table), end().withTable(table)); } public static TokenRange fullRange(TableId table) @@ -80,20 +97,20 @@ public RoutingKey someIntersectingRoutingKey(Ranges ranges) public org.apache.cassandra.dht.Range toKeyspaceRange () { IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); - AccordRoutingKey start = (AccordRoutingKey) start(); - AccordRoutingKey end = (AccordRoutingKey) end(); + AccordRoutingKey start = start(); + AccordRoutingKey end = end(); Token left = start instanceof SentinelKey ? partitioner.getMinimumToken() : start.token(); Token right = end instanceof SentinelKey ? partitioner.getMinimumToken() : end.token(); return new org.apache.cassandra.dht.Range<>(left, right); } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final IVersionedSerializer serializer = new IVersionedSerializer<>() { @Override public void serialize(TokenRange range, DataOutputPlus out, int version) throws IOException { - AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.start(), out, version); - AccordRoutingKey.serializer.serialize((AccordRoutingKey) range.end(), out, version); + AccordRoutingKey.serializer.serialize(range.start(), out, version); + AccordRoutingKey.serializer.serialize(range.end(), out, version); } @Override @@ -106,8 +123,8 @@ public TokenRange deserialize(DataInputPlus in, int version) throws IOException @Override public long serializedSize(TokenRange range, int version) { - return AccordRoutingKey.serializer.serializedSize((AccordRoutingKey) range.start(), version) - + AccordRoutingKey.serializer.serializedSize((AccordRoutingKey) range.end(), version); + return AccordRoutingKey.serializer.serializedSize(range.start(), version) + + AccordRoutingKey.serializer.serializedSize(range.end(), version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index bf351c323fda..23a35208535e 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -69,10 +69,17 @@ public class AccordAgent implements Agent { private static final Logger logger = LoggerFactory.getLogger(AccordAgent.class); + protected Node.Id self; + // TODO (required): this should be configurable and have exponential back-off, escaping to operator input past a certain number of retries private long retryBootstrapDelayMicros = SECONDS.toMicros(1L); private final RandomSource random = new DefaultRandom(); + public void setNodeId(Node.Id id) + { + self = id; + } + public void setRetryBootstrapDelay(long delay, TimeUnit units) { retryBootstrapDelayMicros = units.toMicros(delay); @@ -93,6 +100,11 @@ public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp n throw error; } + public void onFailedBarrier(TxnId id, Seekables keysOrRanges, Throwable cause) + { + + } + @Override public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throwable failure) { diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java index 2ffc3c7cbf8c..39fae55d929b 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/InheritKeyspaceFastPathStrategy.java @@ -28,7 +28,7 @@ public class InheritKeyspaceFastPathStrategy implements FastPathStrategy { - static final FastPathStrategy instance = new InheritKeyspaceFastPathStrategy(); + public static final FastPathStrategy instance = new InheritKeyspaceFastPathStrategy(); private static final Map SCHEMA_PARAMS = ImmutableMap.of(Kind.KEY, Kind.INHERIT_KEYSPACE.name()); diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java index 10828202b68a..13eebb8ce816 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/ParameterizedFastPathStrategy.java @@ -50,8 +50,8 @@ public class ParameterizedFastPathStrategy implements FastPathStrategy { - static final String SIZE = "size"; - static final String DCS = "dcs"; + public static final String SIZE = "size"; + public static final String DCS = "dcs"; private static final Joiner DC_JOINER = Joiner.on(','); private static final Pattern COMMA_SEPARATOR = Pattern.compile(","); private static final Pattern COLON_SEPARATOR = Pattern.compile(":"); @@ -64,7 +64,7 @@ static class WeightedDc implements Comparable public void serialize(WeightedDc dc, DataOutputPlus out, Version version) throws IOException { out.writeUTF(dc.name); - out.writeUnsignedVInt(dc.weight); + out.writeUnsignedVInt32(dc.weight); out.writeBoolean(dc.autoWeight); } @@ -237,7 +237,7 @@ private static ConfigurationException cfe(String fmt, Object... args) return new ConfigurationException(String.format(fmt, args)); } - static ParameterizedFastPathStrategy fromMap(Map map) + public static ParameterizedFastPathStrategy fromMap(Map map) { if (!map.containsKey(SIZE)) throw cfe("fast_path must be set to 'keyspace' or 'default' or a map defining '%s' and optionally '%s'", SIZE, DCS); diff --git a/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java b/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java index 37d51b6c8248..8a278faaa291 100644 --- a/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java +++ b/src/java/org/apache/cassandra/service/accord/fastpath/SimpleFastPathStrategy.java @@ -31,7 +31,7 @@ public class SimpleFastPathStrategy implements FastPathStrategy { - static final SimpleFastPathStrategy instance = new SimpleFastPathStrategy(); + public static final SimpleFastPathStrategy instance = new SimpleFastPathStrategy(); private static final Map SCHEMA_PARAMS = ImmutableMap.of(Kind.KEY, Kind.SIMPLE.name()); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java index fa6b146d77c2..731aabb73507 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java @@ -34,6 +34,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -204,6 +205,8 @@ public ConsensusMigrationState withRangesRepairedAtEpoch(TableMetadata metadata, public ConsensusMigrationState withMigrationsRemovedFor(Set removed) { + if (tableStates.isEmpty() || Sets.intersection(tableStates.keySet(), removed).isEmpty()) + return this; ImmutableMap.Builder updated = ImmutableMap.builder(); putUnchanged(tableStates, updated, removed); return new ConsensusMigrationState(lastModified, updated.build()); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java index 8cbef514d810..517e8fd87268 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java @@ -19,7 +19,8 @@ package org.apache.cassandra.service.consensus.migration; import org.apache.cassandra.service.consensus.TransactionalMode; -import org.apache.cassandra.utils.LocalizeString; + +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** * This tracks the state of a migration either from Paxos -> Accord, Accord [interop mode a] -> Accord [interop mode b] or Accord -> Paxos. @@ -64,7 +65,7 @@ public static TransactionalMigrationFromMode fromOrdinal(int ordinal) public static TransactionalMigrationFromMode fromString(String name) { - return valueOf(LocalizeString.toLowerCaseLocalized(name)); + return valueOf(toLowerCaseLocalized(name)); } public boolean migratingFromAccord() @@ -81,4 +82,9 @@ public boolean isMigrating() { return this != none; } + + public String asCqlParam() + { + return String.format("transactional_migration_from = '%s'", toLowerCaseLocalized(this.name())); + } } diff --git a/src/java/org/apache/cassandra/tcm/CMSOperations.java b/src/java/org/apache/cassandra/tcm/CMSOperations.java index 5b21acd1428e..a0917584d925 100644 --- a/src/java/org/apache/cassandra/tcm/CMSOperations.java +++ b/src/java/org/apache/cassandra/tcm/CMSOperations.java @@ -34,6 +34,7 @@ import org.apache.cassandra.db.virtual.ClusterMetadataDirectoryTable; import org.apache.cassandra.db.virtual.ClusterMetadataLogTable; import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.membership.NodeState; import org.apache.cassandra.tcm.membership.NodeVersion; @@ -42,6 +43,7 @@ import org.apache.cassandra.tcm.sequences.InProgressSequences; import org.apache.cassandra.tcm.sequences.ReconfigureCMS; import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.sequences.DropAccordTable; import org.apache.cassandra.tcm.transformations.Unregister; import org.apache.cassandra.tcm.transformations.cms.AdvanceCMSReconfiguration; import org.apache.cassandra.utils.FBUtilities; @@ -288,4 +290,19 @@ private Map> convertToStringValues(Map nodeIds); public Map> dumpDirectory(boolean includeTokens); public Map> dumpLog(long startEpoch, long endEpoch); + + public void resumeDropAccordTable(String tableId); } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index 9c497e2d1524..321a69b7d3fe 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -82,7 +82,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static org.apache.cassandra.config.CassandraRelevantProperties.LINE_SEPARATOR; import static org.apache.cassandra.db.TypeSizes.sizeof; -import static org.apache.cassandra.tcm.serialization.Version.V2; +import static org.apache.cassandra.tcm.serialization.Version.MIN_ACCORD_VERSION; public class ClusterMetadata { @@ -205,16 +205,6 @@ private ClusterMetadata(int metadataIdentifier, this.accordStaleReplicas = accordStaleReplicas; } - public ClusterMetadata withDirectory(Directory directory) - { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions, accordStaleReplicas); - } - - public ClusterMetadata withPlacements(DataPlacements placements) - { - return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions, accordStaleReplicas); - } - public Set fullCMSMembers() { if (fullCMSEndpoints == null) @@ -632,7 +622,7 @@ public Transformer with(ConsensusMigrationState consensusMigrationState) public Transformer with(ExtensionKey key, ExtensionValue obj) { - if (MetadataKeys.CORE_METADATA.contains(key)) + if (MetadataKeys.CORE_METADATA.containsKey(key)) throw new IllegalArgumentException("Core cluster metadata objects should be addressed directly, " + "not using the associated MetadataKey"); @@ -655,7 +645,7 @@ public Transformer withIfAbsent(ExtensionKey key, ExtensionValue obj) public Transformer without(ExtensionKey key) { - if (MetadataKeys.CORE_METADATA.contains(key)) + if (MetadataKeys.CORE_METADATA.containsKey(key)) throw new IllegalArgumentException("Core cluster metadata objects should be addressed directly, " + "not using the associated MetadataKey"); if (extensions.remove(key) != null) @@ -754,7 +744,7 @@ public Transformed build() inProgressSequences, consensusMigrationState, extensions, - accordStaleReplicas), + accordStaleReplicas), ImmutableSet.copyOf(modifiedKeys)); } @@ -1030,7 +1020,7 @@ public void serialize(ClusterMetadata metadata, DataOutputPlus out, Version vers Directory.serializer.serialize(metadata.directory, out, version); TokenMap.serializer.serialize(metadata.tokenMap, out, version); DataPlacements.serializer.serialize(metadata.placements, out, version); - if (version.isAtLeast(V2)) + if (version.isAtLeast(MIN_ACCORD_VERSION)) { AccordFastPath.serializer.serialize(metadata.accordFastPath, out, version); ConsensusMigrationState.serializer.serialize(metadata.consensusMigrationState, out, version); @@ -1078,7 +1068,7 @@ public ClusterMetadata deserialize(DataInputPlus in, Version version) throws IOE ConsensusMigrationState consensusMigrationState; AccordStaleReplicas staleReplicas; - if (version.isAtLeast(V2)) + if (version.isAtLeast(MIN_ACCORD_VERSION)) { accordFastPath = AccordFastPath.serializer.deserialize(in, version); consensusMigrationState = ConsensusMigrationState.serializer.deserialize(in, version); @@ -1135,7 +1125,7 @@ public long serializedSize(ClusterMetadata metadata, Version version) TokenMap.serializer.serializedSize(metadata.tokenMap, version) + DataPlacements.serializer.serializedSize(metadata.placements, version); - if (version.isAtLeast(V2)) + if (version.isAtLeast(MIN_ACCORD_VERSION)) { size += AccordFastPath.serializer.serializedSize(metadata.accordFastPath, version) + ConsensusMigrationState.serializer.serializedSize(metadata.consensusMigrationState, version) + diff --git a/src/java/org/apache/cassandra/tcm/MetadataKeys.java b/src/java/org/apache/cassandra/tcm/MetadataKeys.java index 0aed60581e0a..0be621b20189 100644 --- a/src/java/org/apache/cassandra/tcm/MetadataKeys.java +++ b/src/java/org/apache/cassandra/tcm/MetadataKeys.java @@ -24,6 +24,7 @@ import java.util.Set; import java.util.function.Function; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.cassandra.tcm.extensions.ExtensionKey; @@ -45,15 +46,18 @@ public class MetadataKeys public static final MetadataKey IN_PROGRESS_SEQUENCES = make(CORE_NS, "sequences", "in_progress"); public static final MetadataKey CONSENSUS_MIGRATION_STATE = make(CORE_NS, "consensus", "migration_state"); - public static final ImmutableSet CORE_METADATA = ImmutableSet.of(SCHEMA, - NODE_DIRECTORY, - TOKEN_MAP, - DATA_PLACEMENTS, - ACCORD_FAST_PATH, - ACCORD_STALE_REPLICAS, - LOCKED_RANGES, - IN_PROGRESS_SEQUENCES, - CONSENSUS_MIGRATION_STATE); + public static final ImmutableMap>> CORE_METADATA + = ImmutableMap.>>builder() + .put(SCHEMA, cm -> cm.schema) + .put(NODE_DIRECTORY, cm -> cm.directory) + .put(TOKEN_MAP, cm -> cm.tokenMap) + .put(DATA_PLACEMENTS, cm -> cm.placements) + .put(LOCKED_RANGES, cm -> cm.lockedRanges) + .put(IN_PROGRESS_SEQUENCES, cm -> cm.inProgressSequences) + .put(ACCORD_FAST_PATH, cm -> cm.accordFastPath) + .put(ACCORD_STALE_REPLICAS, cm -> cm.accordStaleReplicas) + .put(CONSENSUS_MIGRATION_STATE, cm -> cm.consensusMigrationState) + .build(); public static MetadataKey make(String...parts) { @@ -67,6 +71,15 @@ public static MetadataKey make(String...parts) return new MetadataKey(b.toString()); } + public static MetadataValue extract(ClusterMetadata cm, MetadataKey key) + { + if (CORE_METADATA.containsKey(key)) + return CORE_METADATA.get(key).apply(cm); + if (!(key instanceof ExtensionKey)) + throw new IllegalArgumentException("Unknown key: " + key); + return cm.extensions.get(key); + } + public static ImmutableSet diffKeys(ClusterMetadata before, ClusterMetadata after) { ImmutableSet.Builder builder = new ImmutableSet.Builder<>(); @@ -76,12 +89,8 @@ public static ImmutableSet diffKeys(ClusterMetadata before, Cluster private static void diffKeys(ClusterMetadata before, ClusterMetadata after, ImmutableSet.Builder builder) { - checkKey(before, after, builder, cm -> cm.schema, MetadataKeys.SCHEMA); - checkKey(before, after, builder, cm -> cm.directory, MetadataKeys.NODE_DIRECTORY); - checkKey(before, after, builder, cm -> cm.tokenMap, MetadataKeys.TOKEN_MAP); - checkKey(before, after, builder, cm -> cm.placements, MetadataKeys.DATA_PLACEMENTS); - checkKey(before, after, builder, cm -> cm.lockedRanges, MetadataKeys.LOCKED_RANGES); - checkKey(before, after, builder, cm -> cm.inProgressSequences, MetadataKeys.IN_PROGRESS_SEQUENCES); + for (Map.Entry>> e : CORE_METADATA.entrySet()) + checkKey(before, after, builder, e.getValue(), e.getKey()); Set> added = new HashSet<>(after.extensions.keySet()); for (Map.Entry, ExtensionValue> entry : before.extensions.entrySet()) diff --git a/src/java/org/apache/cassandra/tcm/MultiStepOperation.java b/src/java/org/apache/cassandra/tcm/MultiStepOperation.java index d447974f85d5..3d42772518bb 100644 --- a/src/java/org/apache/cassandra/tcm/MultiStepOperation.java +++ b/src/java/org/apache/cassandra/tcm/MultiStepOperation.java @@ -33,6 +33,7 @@ import org.apache.cassandra.tcm.sequences.UnbootstrapAndLeave; import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.sequences.DropAccordTable; /** * Represents a multi-step process performed in order to transition the cluster to some state. @@ -67,7 +68,8 @@ public enum Kind LEAVE(UnbootstrapAndLeave.serializer), REMOVE(UnbootstrapAndLeave.serializer), - RECONFIGURE_CMS(ReconfigureCMS.serializer) + RECONFIGURE_CMS(ReconfigureCMS.serializer), + DROP_ACCORD_TABLE(DropAccordTable.serializer), ; public final AsymmetricMetadataSerializer, ? extends MultiStepOperation> serializer; diff --git a/src/java/org/apache/cassandra/tcm/Processor.java b/src/java/org/apache/cassandra/tcm/Processor.java index 3d29b43375a0..46fce6aeab34 100644 --- a/src/java/org/apache/cassandra/tcm/Processor.java +++ b/src/java/org/apache/cassandra/tcm/Processor.java @@ -18,6 +18,8 @@ package org.apache.cassandra.tcm; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.TimeUnit; import com.codahale.metrics.Meter; @@ -27,6 +29,8 @@ import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.Clock; +import static java.util.concurrent.TimeUnit.NANOSECONDS; + public interface Processor { /** @@ -105,4 +109,20 @@ default ClusterMetadata fetchLogAndWait(Epoch waitFor) ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy); LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy); + + default List reconstructFull(Epoch lowEpoch, Epoch highEpoch) + { + LogState logState = reconstruct(lowEpoch, highEpoch, Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), + TCMMetrics.instance.commitRetries)); + List cms = new ArrayList<>(logState.entries.size()); + ClusterMetadata accum = logState.baseState; + for (Entry entry : logState.entries) + { + Transformation.Result res = entry.transform.execute(accum); + assert res.isSuccess() : res.toString(); + accum = res.success().metadata; + cms.add(accum); + } + return cms; + } } diff --git a/src/java/org/apache/cassandra/tcm/Startup.java b/src/java/org/apache/cassandra/tcm/Startup.java index 7acfe643c6e3..537a505eb66e 100644 --- a/src/java/org/apache/cassandra/tcm/Startup.java +++ b/src/java/org/apache/cassandra/tcm/Startup.java @@ -416,13 +416,21 @@ public static void startup(Supplier initialTransformation, boole { ClusterMetadata metadata = ClusterMetadata.current(); NodeId self = metadata.myNodeId(); - AccordService.startup(self); // finish in-progress sequences first InProgressSequences.finishInProgressSequences(self, true); metadata = ClusterMetadata.current(); - switch (metadata.directory.peerState(self)) + NodeState startingstate = metadata.directory.peerState(self); + switch (startingstate) + { + case REGISTERED: + case LEFT: + break; + default: + AccordService.startup(self); + } + switch (startingstate) { case REGISTERED: case LEFT: @@ -430,6 +438,10 @@ public static void startup(Supplier initialTransformation, boole ReconfigureCMS.maybeReconfigureCMS(metadata, DatabaseDescriptor.getReplaceAddress()); ClusterMetadataService.instance().commit(initialTransformation.get()); + // When Accord starts up it needs to check for any historic epochs that it needs to know about (in order + // to handle pending transactions), in order to know what nodes to check with it needs to know what the + // settled placement is (so it knows what peers to reach out to). + AccordService.startup(self); InProgressSequences.finishInProgressSequences(self, true); // potentially finish the MSO committed above metadata = ClusterMetadata.current(); diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 16cf324799a4..fc89ec79d9d1 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -74,7 +74,7 @@ public static StubClusterMetadataService forTesting(ClusterMetadata metadata) private ClusterMetadata metadata; - private StubClusterMetadataService(ClusterMetadata initial) + protected StubClusterMetadataService(ClusterMetadata initial) { super(new UniformRangePlacement(), MetadataSnapshots.NO_OP, @@ -105,15 +105,20 @@ private StubClusterMetadataService(PlacementProvider placement, @Override public T1 commit(Transformation transform, CommitSuccessHandler onSuccess, CommitFailureHandler onFailure) { - Transformation.Result result = transform.execute(metadata); + Transformation.Result result = execute(transform); if (result.isSuccess()) { - metadata = result.success().metadata; + setMetadata(result.success().metadata); return onSuccess.accept(result.success().metadata); } return onFailure.accept(result.rejected().code, result.rejected().reason); } + protected Transformation.Result execute(Transformation transform) + { + return transform.execute(metadata()); + } + @Override public ClusterMetadata fetchLogFromCMS(Epoch awaitAtLeast) { diff --git a/src/java/org/apache/cassandra/tcm/Transformation.java b/src/java/org/apache/cassandra/tcm/Transformation.java index b8ce1cbc9a5d..928ce59aca12 100644 --- a/src/java/org/apache/cassandra/tcm/Transformation.java +++ b/src/java/org/apache/cassandra/tcm/Transformation.java @@ -46,8 +46,10 @@ import org.apache.cassandra.tcm.transformations.BeginConsensusMigrationForTableAndRange; import org.apache.cassandra.tcm.transformations.CancelInProgressSequence; import org.apache.cassandra.tcm.transformations.CustomTransformation; +import org.apache.cassandra.tcm.transformations.FinishDropAccordTable; import org.apache.cassandra.tcm.transformations.ForceSnapshot; import org.apache.cassandra.tcm.transformations.MaybeFinishConsensusMigrationForTableAndRange; +import org.apache.cassandra.tcm.transformations.PrepareDropAccordTable; import org.apache.cassandra.tcm.transformations.PrepareJoin; import org.apache.cassandra.tcm.transformations.PrepareLeave; import org.apache.cassandra.tcm.transformations.PrepareMove; @@ -246,6 +248,8 @@ enum Kind MAYBE_FINISH_CONSENSUS_MIGRATION_FOR_TABLE_AND_RANGE(38, () -> MaybeFinishConsensusMigrationForTableAndRange.serializer), ACCORD_MARK_STALE(39, () -> AccordMarkStale.serializer), ACCORD_MARK_REJOINING(40, () -> AccordMarkRejoining.serializer), + PREPARE_DROP_ACCORD_TABLE(41, () -> PrepareDropAccordTable.serializer), + FINISH_DROP_ACCORD_TABLE(42, () -> FinishDropAccordTable.serializer), ; private final Supplier> serializer; diff --git a/src/java/org/apache/cassandra/tcm/sequences/DropAccordTable.java b/src/java/org/apache/cassandra/tcm/sequences/DropAccordTable.java new file mode 100644 index 000000000000..46047ef49422 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/sequences/DropAccordTable.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.sequences; + +import java.io.IOException; +import java.time.Duration; +import java.util.Objects; +import java.util.concurrent.ExecutionException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MultiStepOperation; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.transformations.FinishDropAccordTable; +import org.apache.cassandra.utils.JVMStabilityInspector; + +import static org.apache.cassandra.tcm.Transformation.Kind.FINISH_DROP_ACCORD_TABLE; +import static org.apache.cassandra.tcm.sequences.SequenceState.continuable; +import static org.apache.cassandra.tcm.sequences.SequenceState.error; +import static org.apache.cassandra.tcm.sequences.SequenceState.halted; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + * A slightly atypical implementation as it consists of only a single step. To perform the drop of an + * Accord table, we first commit a PrepareDropAccordTable transformation. Upon enactement, that + * marks the table as pending drop, which blocks any new transactions from being started. It also + * instantiates an instance of this operation and adds it to the set of in progress operations. + * + * The intention is to introduce a barrier which blocks until the Accord service acknowledges that + * it was learned of the epoch in which the table was marked for deletion and that all prior transactions + * are completed. Once this is complete, we can proceed to actually drop the table. The transformation + * which performs that schema modification also removes this MSO from ClusterMetadata's in-flight set. + * This obviates the need to 'advance' this MSO in the way that other implementations with more steps do. + * + */ +public class DropAccordTable extends MultiStepOperation +{ + private static final Logger logger = LoggerFactory.getLogger(DropAccordTable.class); + + public static final Serializer serializer = new Serializer(); + + public final TableReference table; + + public static DropAccordTable newSequence(TableReference table, Epoch preparedAt) + { + return new DropAccordTable(table, preparedAt); + } + + /** + * Used by factory method for external callers and by the serializer. + * We don't need to include the serialized FinishDropAccordTable step in the serialization + * of the MSO itself because they have no parameters other than the table reference and so + * we can just construct a new one when we execute it + */ + private DropAccordTable(TableReference table, Epoch latestModification) + { + super(0, latestModification); + this.table = table; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DropAccordTable that = (DropAccordTable) o; + return latestModification.equals(that.latestModification) + && table.equals(that.table); + } + + @Override + public int hashCode() + { + return Objects.hash(latestModification, table); + } + + @Override + public Kind kind() + { + return Kind.DROP_ACCORD_TABLE; + } + + @Override + protected SequenceKey sequenceKey() + { + return table; + } + + @Override + public MetadataSerializer keySerializer() + { + return TableReference.serializer; + } + + @Override + public Transformation.Kind nextStep() + { + return FINISH_DROP_ACCORD_TABLE; + } + + @Override + public SequenceState executeNext() + { + try + { + SequenceState failure = awaitSafeFromAccord(); + if (failure != null) return failure; + } + catch (Throwable t) + { + JVMStabilityInspector.inspectThrowable(t); + logger.warn("Exception while waiting for Accord service to notify all table txns are complete", t); + // this is actually continuable as we can simply retry + return continuable(); + } + try + { + // Now we're satisfied that all Accord txns have finished for the table, + // go ahead and actually drop it + ClusterMetadataService.instance().commit(new FinishDropAccordTable(table)); + return continuable(); + } + catch (Throwable t) + { + JVMStabilityInspector.inspectThrowable(t); + logger.warn("Exception committing finish_drop_accord_table. " + + "Accord service has acknowledged the operation but table remains present in schema", t); + return halted(); + } + } + + private SequenceState awaitSafeFromAccord() throws ExecutionException, InterruptedException + { + // make sure that Accord sees the current epoch, which must necessarily follow the + // one which marked the table as pending drop + ClusterMetadata metadata = ClusterMetadata.current(); + // just for the sake of paranoia, assert that the table is actually marked as being dropped + if (!verifyTableMarked(metadata.schema.getKeyspaces())) + return error(new IllegalStateException(String.format("Table %s is in an invalid state to be dropped", table))); + + long startNanos = nanoTime(); + AccordService.instance().epochReady(metadata.epoch).get(); + long epochEndNanos = nanoTime(); + + // As of this writing this logic is based off ExclusiveSyncPoints which is a bit heavy weight for what is needed, this could cause timeouts for clusters that have a lot of data. + // There are retries baked into this call, but trying to handle timeouts more broadly is put on hold as there is active work to define a EpochSyncPoint that should be far cheaper + // which would avoid the timeout issues + // NOTE: ExclusiveSyncPoint must find all keys in the range, then make sure nothing is blocking them... this causes a lot of IO. EpochSyncPoint just needs to validate that the last txn processed is in the newer epoch, this can work with in-memory state. + AccordService.instance().awaitTableDrop(table.id); + long awaitEndNanos = nanoTime(); + logger.info("Wait for Accord to see the drop table was success. " + + "Took {} to wait for Accord to learn about the change, then {} to process everything", + Duration.ofNanos(epochEndNanos - startNanos), Duration.ofNanos(awaitEndNanos - epochEndNanos)); + return null; + } + + private boolean verifyTableMarked(Keyspaces keyspaces) + { + TableMetadata tm = keyspaces.getTableOrViewNullable(table.id); + if (tm == null) + { + logger.warn("Unable to drop accord table {}, table not found", table); + return false; + } + + if (!tm.params.pendingDrop) + { + logger.warn("Unexpected state, table {} was not marked pending drop", table); + return false; + } + + return true; + } + + @Override + public Transformation.Result applyTo(ClusterMetadata metadata) + { + // note: that this will apply the finish drop transformation to the supplied metadata. It's + // not used to actually execute the MSO, but to determine what the metadata state will/would + // be if it were executed. + return new FinishDropAccordTable(table).execute(metadata); + } + + @Override + public DropAccordTable advance(Epoch epoch) + { + // note: this isn't really used by this MSO impl as it consists of a single step so there's nothing + // to advance. An action of the single step is to remove the MSO from the set of in progress sequences + return new DropAccordTable(this.table, epoch); + } + + @Override + public ProgressBarrier barrier() + { + return ProgressBarrier.immediate(); + } + + public static class TableReference implements SequenceKey, Comparable + { + public static final Serializer serializer = new Serializer(); + + public final TableId id; + + public TableReference(TableId id) + { + this.id = id; + } + + public static TableReference from(TableMetadata metadata) + { + return new TableReference(metadata.id); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TableReference that = (TableReference) o; + return id.equals(that.id); + } + + @Override + public int hashCode() + { + return Objects.hash(id); + } + + @Override + public int compareTo(TableReference o) + { + return id.compareTo(o.id); + } + + @Override + public String toString() + { + return "TableReference{id=" + id + '}'; + } + + public static class Serializer implements MetadataSerializer + { + @Override + public void serialize(TableReference t, DataOutputPlus out, Version version) throws IOException + { + t.id.serialize(out); + } + + @Override + public TableReference deserialize(DataInputPlus in, Version version) throws IOException + { + TableId id = TableId.deserialize(in); + return new TableReference(id); + } + + @Override + public long serializedSize(TableReference t, Version version) + { + return t.id.serializedSize(); + } + } + } + + public static class Serializer implements AsymmetricMetadataSerializer, DropAccordTable> + { + @Override + public void serialize(MultiStepOperation t, DataOutputPlus out, Version version) throws IOException + { + DropAccordTable plan = (DropAccordTable) t; + Epoch.serializer.serialize(plan.latestModification, out, version); + // This type of sequence only has a single step so no need to include the index in serde. + // Similarly, the only parameter to that single step (FinishDropAccordTable) is the table + // reference, so that's all we really need to include in the serialization. + TableReference.serializer.serialize(plan.table, out, version); + } + + @Override + public DropAccordTable deserialize(DataInputPlus in, Version version) throws IOException + { + Epoch lastModified = Epoch.serializer.deserialize(in, version); + TableReference table = TableReference.serializer.deserialize(in, version); + return new DropAccordTable(table, lastModified); + } + + @Override + public long serializedSize(MultiStepOperation t, Version version) + { + DropAccordTable plan = (DropAccordTable) t; + long size = 0; + size += Epoch.serializer.serializedSize(plan.latestModification, version); + size += TableReference.serializer.serializedSize(plan.table, version); + return size; + } + } +} diff --git a/src/java/org/apache/cassandra/tcm/sequences/InProgressSequences.java b/src/java/org/apache/cassandra/tcm/sequences/InProgressSequences.java index 735a7f693571..840c2505f423 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/InProgressSequences.java +++ b/src/java/org/apache/cassandra/tcm/sequences/InProgressSequences.java @@ -27,6 +27,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -59,27 +60,27 @@ private InProgressSequences(Epoch lastModified, ImmutableMap sequence = metadata.inProgressSequences.get(sequenceKey); if (sequence == null) - break; + return metadata; if (onlyStartupSafeSequences && !sequence.finishDuringStartup()) - break; + return metadata; if (isLeave(sequence)) StorageService.instance.maybeInitializeServices(); if (resume(sequence)) metadata = ClusterMetadata.current(); else - return; + return metadata; } } @@ -225,6 +226,11 @@ public Iterator> iterator() return state.values().iterator(); } + public ImmutableSet keys() + { + return state.keySet(); + } + public static class Serializer implements MetadataSerializer { public void serialize(InProgressSequences t, DataOutputPlus out, Version version) throws IOException diff --git a/src/java/org/apache/cassandra/tcm/serialization/Version.java b/src/java/org/apache/cassandra/tcm/serialization/Version.java index 245ef32bbc46..f0b56c99176e 100644 --- a/src/java/org/apache/cassandra/tcm/serialization/Version.java +++ b/src/java/org/apache/cassandra/tcm/serialization/Version.java @@ -18,7 +18,10 @@ package org.apache.cassandra.tcm.serialization; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.cassandra.tcm.ClusterMetadata; @@ -36,30 +39,33 @@ public enum Version /** * - Added version to PlacementForRange serializer * - Serialize MemtableParams when serializing TableParams - * - Added AccordFastPath - * - Added AccordStaleReplicas */ V2(2), + /** - * - down nodes serialized in PrepareCMSReconfiguration + * - Added AccordFastPath + * - Added ConsensusMigrationState + * - Added AccordStaleReplicas + * - TableParam now has pendingDrop (accord table drop is multistep) */ V3(3), - /** - * - Serialize allowAutoSnapshot and incrementalBackups when serializing TableParams - */ + + // Padding V4(4), - /** - * - AlterSchema includes execution timestamp - * - PreInitialize includes datacenter (affects local serialization on first CMS node only) - */ V5(5), + V6(6), /** - * CEP-42 - Constraints framework. New version due to modifications in table metadata serialization. + * - Accord */ - V6(6), + V7(7), UNKNOWN(Integer.MAX_VALUE); + /** + * The version that Accord was added to TCM. + */ + public static final Version MIN_ACCORD_VERSION = V3; + private static Map values = new HashMap<>(); static { @@ -113,4 +119,15 @@ public static Version fromInt(int i) throw new IllegalArgumentException("Unsupported metadata version (" + i + ")"); } + + public List greaterThanOrEqual() + { + Version[] all = Version.values(); + if (ordinal() == all.length - 1) + return Collections.singletonList(this); + List values = new ArrayList<>(all.length - ordinal()); + for (int i = ordinal(); i < all.length; i++) + values.add(all[i]); + return values; + } } diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java index 3fe49e63bb23..ba72930fb23c 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java @@ -261,7 +261,7 @@ private static Map> groupByReplication( return byReplication; } - private Transformer maybeUpdateConsensusMigrationState(ConsensusMigrationState prev, Transformer next, ImmutableList altered, Keyspaces dropped) + public static Transformer maybeUpdateConsensusMigrationState(ConsensusMigrationState prev, Transformer next, ImmutableList altered, Keyspaces dropped) { ConsensusMigrationState migrationState = prev; diff --git a/src/java/org/apache/cassandra/tcm/transformations/FinishDropAccordTable.java b/src/java/org/apache/cassandra/tcm/transformations/FinishDropAccordTable.java new file mode 100644 index 000000000000..2019adb8ad39 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/FinishDropAccordTable.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.DropAccordTable.TableReference; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static org.apache.cassandra.tcm.Transformation.Kind.FINISH_DROP_ACCORD_TABLE; + +/** + * Dropping an Accord table is a three-step process. + *

        + *
      1. Mark the table as pending drop
      2. + *
      3. Await all in-flight txns to finish
      4. + *
      5. Drop the table from schema (this step)
      6. + *
      + *

      + * Hypothetically it is possible that after {1} has been committed, but before {3} is executed + * interleaving metadata changes occur. These could include dropping the table's keyspace, or + * modifying the transactional mode of the table to make it a non-accord table. Validation + * exists to prevent these schema changes from being committed while the drop is in-flight. + * However, if something like this did happen, by the time we come to execute this transformation, + * there's nothing really to do other than return success (as the table has indeed already been dropped). + */ +public class FinishDropAccordTable implements Transformation +{ + private static final Logger logger = LoggerFactory.getLogger(FinishDropAccordTable.class); + + public static final Serializer serializer = new Serializer(); + public final TableReference tableRef; + + public FinishDropAccordTable(TableReference tableRef) + { + this.tableRef = tableRef; + } + + @Override + public Kind kind() + { + return FINISH_DROP_ACCORD_TABLE; + } + + @Override + public Result execute(ClusterMetadata prev) + { + // In every case we remove the operation to drop this table from the set of in-flight sequences + ClusterMetadata.Transformer proposed = prev.transformer() + .with(prev.inProgressSequences.without(tableRef)); + + Keyspaces keyspaces = prev.schema.getKeyspaces(); + TableMetadata table = keyspaces.getTableOrViewNullable(tableRef.id); + // Table was already dropped + if (table == null) + { + logger.warn("Table {} was dropped while drop accord table sequence was in flight", tableRef); + return Transformation.success(proposed, LockedRanges.AffectedRanges.EMPTY); + } + KeyspaceMetadata keyspace = keyspaces.getNullable(table.keyspace); + + // Actually drop the table + Keyspaces withoutTable = keyspaces.withAddedOrUpdated(keyspace.withSwapped(keyspace.tables.without(table))); + + Keyspaces.KeyspacesDiff diff = Keyspaces.diff(prev.schema.getKeyspaces(), withoutTable); + + proposed = AlterSchema.maybeUpdateConsensusMigrationState(prev.consensusMigrationState, proposed, diff.altered, Keyspaces.NONE); + + proposed = proposed.with(new DistributedSchema(withoutTable)); + return Transformation.success(proposed, LockedRanges.AffectedRanges.EMPTY); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (!(o instanceof FinishDropAccordTable)) return false; + + FinishDropAccordTable that = (FinishDropAccordTable) o; + + return Objects.equals(tableRef, that.tableRef); + } + + @Override + public int hashCode() + { + return Objects.hash(tableRef); + } + + public static class Serializer implements AsymmetricMetadataSerializer + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + FinishDropAccordTable plan = (FinishDropAccordTable) t; + TableReference.serializer.serialize(plan.tableRef, out, version); + } + + @Override + public FinishDropAccordTable deserialize(DataInputPlus in, Version version) throws IOException + { + TableReference table = TableReference.serializer.deserialize(in, version); + return new FinishDropAccordTable(table); + } + + @Override + public long serializedSize(Transformation t, Version version) + { + FinishDropAccordTable plan = (FinishDropAccordTable) t; + return TableReference.serializer.serializedSize(plan.tableRef, version); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/tcm/transformations/PrepareDropAccordTable.java b/src/java/org/apache/cassandra/tcm/transformations/PrepareDropAccordTable.java new file mode 100644 index 000000000000..2c960a0bd3a0 --- /dev/null +++ b/src/java/org/apache/cassandra/tcm/transformations/PrepareDropAccordTable.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.transformations; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.cassandra.exceptions.ExceptionCode; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.sequences.DropAccordTable; +import org.apache.cassandra.tcm.sequences.DropAccordTable.TableReference; +import org.apache.cassandra.tcm.sequences.LockedRanges; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +public class PrepareDropAccordTable implements Transformation +{ + public static final Serializer serializer = new Serializer(); + + public final TableReference tableRef; + + public PrepareDropAccordTable(TableReference tableRef) + { + this.tableRef = tableRef; + } + + @Override + public Kind kind() + { + return Kind.PREPARE_DROP_ACCORD_TABLE; + } + + @Override + public Result execute(ClusterMetadata prev) + { + TableMetadata metadata = prev.schema.getKeyspaces().getTableOrViewNullable(tableRef.id); + if (metadata == null) + return new Rejected(ExceptionCode.INVALID, "Table " + tableRef + " is not known"); + if (!metadata.isAccordEnabled()) + return new Rejected(ExceptionCode.INVALID, "Table " + metadata + " is not an Accord table and should be dropped normally"); + if (metadata.params.pendingDrop) + return new Rejected(ExceptionCode.INVALID, "Table " + metadata + " is in the process of being dropped"); + + KeyspaceMetadata ks = prev.schema.getKeyspaceMetadata(metadata.keyspace); + metadata = metadata.unbuild().params(metadata.params.unbuild().pendingDrop(true).build()).build(); + ks = ks.withSwapped(ks.tables.withSwapped(metadata)); + + DropAccordTable operation = DropAccordTable.newSequence(tableRef, prev.nextEpoch()); + ClusterMetadata.Transformer proposed = prev.transformer() + .with(new DistributedSchema(prev.schema.getKeyspaces().withAddedOrUpdated(ks))) + .with(prev.inProgressSequences.with(tableRef, operation)); + return Transformation.success(proposed, LockedRanges.AffectedRanges.EMPTY); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PrepareDropAccordTable that = (PrepareDropAccordTable) o; + return tableRef.equals(that.tableRef); + } + + @Override + public int hashCode() + { + return Objects.hash(tableRef); + } + + public static class Serializer implements AsymmetricMetadataSerializer + { + @Override + public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException + { + PrepareDropAccordTable plan = (PrepareDropAccordTable) t; + TableReference.serializer.serialize(plan.tableRef, out, version); + } + + @Override + public PrepareDropAccordTable deserialize(DataInputPlus in, Version version) throws IOException + { + TableReference table = TableReference.serializer.deserialize(in, version); + return new PrepareDropAccordTable(table); + } + + @Override + public long serializedSize(Transformation t, Version version) + { + PrepareDropAccordTable plan = (PrepareDropAccordTable) t; + return TableReference.serializer.serializedSize(plan.tableRef, version); + } + } +} diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index d82173558695..2823bf112b03 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -269,7 +269,8 @@ public int execute(String... args) .withCommand(CMSAdmin.Unregister.class) .withCommand(CMSAdmin.AbortInitialization.class) .withCommand(CMSAdmin.DumpDirectory.class) - .withCommand(CMSAdmin.DumpLog.class); + .withCommand(CMSAdmin.DumpLog.class) + .withCommand(CMSAdmin.ResumeDropAccordTable.class); builder.withGroup("consensus_admin") .withDescription("List and mark ranges as migrating between consensus protocols") diff --git a/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java b/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java index 7f54fdd9be0c..84d23dc3f82a 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java +++ b/src/java/org/apache/cassandra/tools/nodetool/CMSAdmin.java @@ -257,4 +257,16 @@ private static int keywidth(Map> map) assert !map.isEmpty(); return map.entrySet().iterator().next().getValue().keySet().stream().max(Comparator.comparingInt(String::length)).get().length() + 1; } + + @Command(name = "resumedropaccordtable", description = "Resume a drop accord table operation which has stalled") + public static class ResumeDropAccordTable extends NodeTool.NodeToolCmd + { + @Arguments(usage = "[tableId]", description = "Table id of the table being dropped") + private String tableId; + @Override + public void execute(NodeProbe probe) + { + probe.getCMSOperationsProxy().resumeDropAccordTable(tableId); + } + } } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java index 5053735336f0..50ffe781f518 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Coordinator.java @@ -29,7 +29,7 @@ import com.google.common.collect.Iterators; -import accord.utilsfork.Invariants; +import accord.utils.Invariants; import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; diff --git a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java index 79daae158361..0cbb1f7cad6d 100644 --- a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java +++ b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java @@ -25,11 +25,13 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; @@ -52,8 +54,18 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.primitives.TxnId; import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.ICluster; +import org.apache.cassandra.distributed.api.IInstance; +import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IMessageFilters; +import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.impl.AbstractCluster; import org.apache.cassandra.distributed.impl.InstanceConfig; import org.apache.cassandra.distributed.impl.TestChangeListener; @@ -67,6 +79,8 @@ import org.apache.cassandra.net.RequestCallback; import org.apache.cassandra.net.Verb; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -85,6 +99,7 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.REPLACE_ADDRESS_FIRST_BOOT; import static org.apache.cassandra.config.CassandraRelevantProperties.RING_DELAY; import static org.apache.cassandra.distributed.impl.TestEndpointCache.toCassandraInetAddressAndPort; +import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_VIEWS; import static org.assertj.core.api.Assertions.assertThat; /** @@ -1555,5 +1570,50 @@ public static void assertModeJoined(IInvokableInstance inst) .describedAs("Unexpected StorageService operation mode") .isEqualTo(StorageService.Mode.NORMAL); } + + public static LinkedHashMap queryTxnState(AbstractCluster cluster, TxnId txnId, int... nodes) + { + String cql = String.format("SELECT * FROM %s.txn_blocked_by WHERE txn_id=?", VIRTUAL_VIEWS); + LinkedHashMap map = new LinkedHashMap<>(); + Iterable it = nodes.length == 0 ? cluster::iterator : cluster.get(nodes); + for (T i : it) + { + if (i.isShutdown()) + continue; + SimpleQueryResult result = i.executeInternalWithResult(cql, txnId.toString()); + map.put(i.toString(), result); + } + return map; + } + + public static String queryTxnStateAsString(AbstractCluster cluster, TxnId txnId, int... nodes) + { + StringBuilder sb = new StringBuilder(); + queryTxnStateAsString(sb, cluster, txnId, nodes); + return sb.toString(); + } + + public static void queryTxnStateAsString(StringBuilder sb, AbstractCluster cluster, TxnId txnId, int... nodes) + { + LinkedHashMap map = queryTxnState(cluster, txnId, nodes); + for (var e : map.entrySet()) + { + sb.append(e.getKey()).append(":\n"); + SimpleQueryResult result = e.getValue(); + if (!result.names().isEmpty()) + sb.append(result.names()).append('\n'); + while (result.hasNext()) + { + var row = result.next(); + sb.append(Arrays.asList(row.toObjectArray())).append('\n'); + } + } + } + + public static TableId tableId(Cluster cluster, String ks, String table) + { + String str = cluster.getFirstRunningInstance().callOnInstance(() -> Schema.instance.getKeyspaceInstance(ks).getColumnFamilyStore(table).getTableId().toString()); + return TableId.fromUUID(UUID.fromString(str)); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java index 7eadb7e0b470..14ac84b4045e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/TestBaseImpl.java @@ -293,4 +293,27 @@ public static ListenableFuture nodetoolAsync(ICoordinator coordinator, S asyncThread.start(); return task; } + + /** + * @see org.apache.cassandra.cql3.CQLTester#wrapInTxn(String...) + */ + protected static String wrapInTxn(String... stmts) + { + return wrapInTxn(Arrays.asList(stmts)); + } + + protected static String wrapInTxn(List stmts) + { + StringBuilder sb = new StringBuilder(); + sb.append("BEGIN TRANSACTION\n"); + for (String stmt : stmts) + { + sb.append('\t').append(stmt); + if (!stmt.endsWith(";")) + sb.append(';'); + sb.append('\n'); + } + sb.append("COMMIT TRANSACTION"); + return sb.toString(); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java index c0956391b22c..824498b229c0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java @@ -389,6 +389,7 @@ private void checkUpdateStatic(Cluster cluster, String update, int key, String e private void assertResultsFromAccordMatches(Cluster cluster, String accordRead, String simpleRead, int key) { + accordRead = wrapInTxn(accordRead); Object[][] simpleReadResult; if (transactionalMode.ignoresSuppliedConsistencyLevel) // With accord non-SERIAL write strategy the commit CL is effectively ANY so we need to read at SERIAL diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropKeyspaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropKeyspaceTest.java new file mode 100644 index 000000000000..353980f55f07 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropKeyspaceTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.schema.TableId; + +public class AccordDropKeyspaceTest extends AccordDropTableBase +{ + @Test + public void dropKeyspace() throws IOException + { + int examples = 5; + int steps = 5; + try (Cluster cluster = Cluster.build(3) + .withoutVNodes() + .withConfig(c -> c.with(Feature.values()) + .set("auto_snapshot", false)) + .start()) + { + fixDistributedSchemas(cluster); + for (int i = 0; i < examples; i++) + { + int j = 0; + try + { + addChaos(cluster, i); + init(cluster); + TableId id = createTable(cluster); + for (j = 0; j < steps; j++) + doTxn(cluster, j); + dropKeyspace(cluster); + validateAccord(cluster, id); + } + catch (Throwable t) + { + throw new AssertionError("Error at example " + i + ", " + j, t); + } + } + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java new file mode 100644 index 000000000000..732fe303cb7a --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.util.UUID; + +import com.google.common.base.Throwables; + +import accord.api.Key; +import accord.local.CommandStores; +import accord.local.KeyHistory; +import accord.local.PreLoadContext; +import accord.local.cfk.CommandsForKey; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import accord.utils.async.AsyncChains; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.ICoordinator; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.net.Verb; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; +import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.TokenRange; +import org.assertj.core.api.Assertions; + +import static org.apache.cassandra.service.accord.AccordTestUtils.wrapInTxn; + +public class AccordDropTableBase extends TestBaseImpl +{ + protected static void addChaos(Cluster cluster, int example) + { + cluster.filters().reset(); + cluster.filters().verbs(Verb.ACCORD_APPLY_REQ.id).from(1).to(3).drop(); + } + + protected static void doTxn(Cluster cluster, int step) + { + int stepId = step % 3; + int partitionId = step % 10; + int coordinatorId = (step % 2) + 1; // avoid node3 as it can't get applies from node1, so leads to user errors + ICoordinator coordinator = cluster.coordinator(coordinatorId); + switch (stepId) + { + case 0: // insert + retry(3, () -> coordinator.executeWithResult(wrapInTxn(withKeyspace("INSERT INTO %s.tbl(pk, v) VALUES (?, ?);")), ConsistencyLevel.ANY, partitionId, step)); + break; + case 1: // insert + read + retry(3, () -> coordinator.executeWithResult(wrapInTxn(withKeyspace("UPDATE %s.tbl SET v+=1 WHERE pk=?;")), ConsistencyLevel.ANY, partitionId)); + break; + case 2: // read + retry(3, () -> coordinator.executeWithResult(wrapInTxn(withKeyspace("SELECT * FROM %s.tbl WHERE pk=?")), ConsistencyLevel.ANY, partitionId)); + break; + default: + throw new UnsupportedOperationException(); + } + } + + protected static void retry(int maxAttempts, Runnable fn) + { + for (int i = 0; i < maxAttempts; i++) + { + try + { + fn.run(); + } + catch (Throwable t) + { + if (i == (maxAttempts - 1)) + throw t; + } + } + } + + protected static TableId createTable(Cluster cluster) + { + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl(pk int PRIMARY KEY, v int) WITH transactional_mode='full'")); + return ClusterUtils.tableId(cluster, KEYSPACE, "tbl"); + } + + protected void dropKeyspace(Cluster cluster) + { + // drop keyspace should be rejected as there is an accord table... so validate that is true then do both + try + { + cluster.schemaChange(withKeyspace("DROP KEYSPACE %s")); + } + catch (Throwable t) + { + Assertions.assertThat(Throwables.getRootCause(t)) + .hasMessage("Cannot drop keyspace 'distributed_test_keyspace' as it contains accord tables. (distributed_test_keyspace.tbl)"); + } + + // now do it for real + dropTable(cluster); + cluster.schemaChange(withKeyspace("DROP KEYSPACE %s")); + } + + protected static void dropTable(Cluster cluster) + { + cluster.schemaChange(withKeyspace("DROP TABLE %s.tbl")); + } + + protected static void validateAccord(Cluster cluster, TableId id) + { + String s = id.toString(); + for (IInvokableInstance inst : cluster) + { + inst.runOnInstance(() -> { + TableId tableId = TableId.fromUUID(UUID.fromString(s)); + AccordService accord = (AccordService) AccordService.instance(); + PreLoadContext ctx = PreLoadContext.contextFor(Ranges.single(TokenRange.fullRange(tableId)), KeyHistory.COMMANDS); + CommandStores stores = accord.node().commandStores(); + for (int storeId : stores.ids()) + { + AccordCommandStore store = (AccordCommandStore) stores.forId(storeId); + AsyncChains.getUnchecked(store.submit(ctx, input -> { + AccordSafeCommandStore safe = (AccordSafeCommandStore) input; + for (Key key : safe.commandsForKeysKeys()) + { + AccordSafeCommandsForKey safeCFK = safe.maybeCommandsForKey(key); + if (safeCFK == null) // we read and found a key, but its null at load time... so ignore it + continue; + CommandsForKey cfk = safeCFK.current(); + CommandsForKey.TxnInfo minUndecided = cfk.minUndecided(); + if (minUndecided != null) + throw new AssertionError("Undecided txn: " + minUndecided); + TxnId next = cfk.nextWaitingToApply(); + if (next != null) + throw new AssertionError("Unapplied txn: " + next); + } + return null; + })); + } + }); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableTest.java new file mode 100644 index 000000000000..b13bcb05ce8e --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.schema.TableId; + +public class AccordDropTableTest extends AccordDropTableBase +{ + @Test + public void dropTable() throws IOException + { + int examples = 5; + int steps = 5; + try (Cluster cluster = Cluster.build(3) + .withoutVNodes() + .withConfig(c -> c.with(Feature.values()) + .set("auto_snapshot", false)) + .start()) + { + fixDistributedSchemas(cluster); + init(cluster); + for (int i = 0; i < examples; i++) + { + int j = 0; + try + { + addChaos(cluster, i); + TableId id = createTable(cluster); + for (j = 0; j < steps; j++) + doTxn(cluster, j); + dropTable(cluster); + validateAccord(cluster, id); + } + catch (Throwable t) + { + throw new AssertionError("Error at example " + i + ", " + j, t); + } + } + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordHostReplacementTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordHostReplacementTest.java new file mode 100644 index 000000000000..0105ad7eae4c --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordHostReplacementTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.accord; + +import java.io.IOException; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.harry.SchemaSpec; +import org.apache.cassandra.harry.dsl.HistoryBuilder; +import org.apache.cassandra.harry.dsl.ReplayingHistoryBuilder; +import org.apache.cassandra.harry.execution.RingAwareInJvmDTestVisitExecutor; +import org.apache.cassandra.harry.gen.Generator; +import org.apache.cassandra.harry.gen.Generators; +import org.apache.cassandra.harry.gen.SchemaGenerators; +import org.apache.cassandra.harry.model.TokenPlacementModel; +import org.apache.cassandra.service.consensus.TransactionalMode; + +import static org.apache.cassandra.distributed.shared.ClusterUtils.stopUnchecked; +import static org.apache.cassandra.distributed.shared.ClusterUtils.waitForCMSToQuiesce; +import static org.apache.cassandra.harry.checker.TestHelper.withRandom; + +public class AccordHostReplacementTest extends TestBaseImpl +{ + private static final Generator transactionalModeGen = Generators.pick(Stream.of(TransactionalMode.values()).filter(t -> t.accordIsEnabled).collect(Collectors.toList())); + + @Test + public void hostReplace() throws IOException + { + // start 3 node cluster, then do a host replacement of one of the nodes + Cluster.Builder clusterBuilder = Cluster.build(3) + .withConfig(c -> c.with(Feature.values()) + .set("accord.command_store_shard_count", "1") + .set("write_request_timeout", "10s") + .set("read_request_timeout", "10s") + .set("accord.queue_shard_count", "1") + ); + TokenSupplier tokenRing = TokenSupplier.evenlyDistributedTokens(3, clusterBuilder.getTokenCount()); + int nodeToReplace = 2; + clusterBuilder = clusterBuilder.withTokenSupplier((TokenSupplier) node -> tokenRing.tokens(node == 4 ? nodeToReplace : node)); + try (Cluster cluster = clusterBuilder.start()) + { + fixDistributedSchemas(cluster); + init(cluster); + + withRandom(rng -> { + Generator schemaGen = SchemaGenerators.schemaSpecGen(KEYSPACE, "host_replace", 1000, + SchemaSpec.optionsBuilder().withTransactionalMode(transactionalModeGen.generate(rng))); + SchemaSpec schema = schemaGen.generate(rng); + Generators.TrackingGenerator pkGen = Generators.tracking(Generators.int32(0, Math.min(schema.valueGenerators.pkPopulation(), 1000))); + + HistoryBuilder history = historyBuilder(schema, cluster); + waitForCMSToQuiesce(cluster, cluster.get(1)); + + for (int i = 0; i < 1000; i++) + history.insert(pkGen.generate(rng)); + for (int pk : pkGen.generated()) + history.selectPartition(pk); + + history.custom(() -> { + stopUnchecked(cluster.get(nodeToReplace)); + ClusterUtils.replaceHostAndStart(cluster, cluster.get(nodeToReplace)); + }, "Replace"); + + for (int pk : pkGen.generated()) + history.selectPartition(pk); + }); + } + } + + private static HistoryBuilder historyBuilder(SchemaSpec schema, Cluster cluster) + { + HistoryBuilder history = new ReplayingHistoryBuilder(schema.valueGenerators, + hb -> RingAwareInJvmDTestVisitExecutor.builder() + .replicationFactor(new TokenPlacementModel.SimpleReplicationFactor(3)) + .consistencyLevel(ConsistencyLevel.ALL) + .build(schema, hb, cluster)); + history.customThrowing(() -> cluster.schemaChange(schema.compile()), "Setup"); + return history; + } +} \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 6180a5fc540d..318b922d2924 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -35,6 +35,12 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.primitives.Ints; + +import org.apache.cassandra.cql3.CQLStatement; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableMetadata; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -84,11 +90,9 @@ import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.Verb; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; import org.apache.cassandra.service.consensus.TransactionalMode; @@ -104,7 +108,6 @@ import static org.apache.cassandra.db.SystemKeyspace.PAXOS; import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL; import static org.apache.cassandra.schema.SchemaConstants.SYSTEM_KEYSPACE_NAME; -import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_VIEWS; import static org.junit.Assert.assertArrayEquals; public abstract class AccordTestBase extends TestBaseImpl @@ -406,43 +409,27 @@ private static boolean hasRootCause(Throwable ex, Class { - TransactionStatement stmt = AccordTestUtils.parse(cql); - return isIdempotent(stmt); + return inst.callOnInstance(() -> { + CQLStatement.Raw parsed = QueryProcessor.parseStatement(cql); + if (parsed instanceof TransactionStatement.Parsed) + { + TransactionStatement stmt = (TransactionStatement) parsed.prepare(ClientState.forInternalCalls()); + return isIdempotent(stmt); + } + else if (parsed instanceof ModificationStatement.Parsed) + { + ModificationStatement stmt = (ModificationStatement) parsed.prepare(ClientState.forInternalCalls()); + return isIdempotent(stmt); + } + else + { + throw new IllegalArgumentException("Unexpected type: " + parsed.getClass()); + } }); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java index 82c9e2f806f5..4a6fab41fe39 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/NewSchemaTest.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.List; import org.junit.BeforeClass; @@ -27,8 +28,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.cql3.ast.Select; +import org.apache.cassandra.cql3.ast.Txn; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.service.accord.AccordService; +import org.assertj.core.api.Assertions; import static java.util.function.UnaryOperator.identity; @@ -54,25 +58,29 @@ public void test() for (int i = 0; i < 20; i++) { String ks = "ks" + i; - String table = ks + ".tbl" + i; + String tableName = "tbl" + i; + String table = ks + "." + tableName; SHARED_CLUSTER.schemaChange("CREATE KEYSPACE " + ks + " WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 1}"); SHARED_CLUSTER.schemaChange(String.format("CREATE TABLE %s (pk blob primary key) WITH transactional_mode='full'", table)); SHARED_CLUSTER.forEach(node -> node.runOnInstance(() -> AccordService.instance().setCacheSize(0))); List keys = tokensToKeys(tokens()); - read(table, keys).exec(); + read(ks, tableName, keys).exec(); } } - private static Query read(String table, List keys) + private static Query read(String ks, String table, List keys) { assert !keys.isEmpty(); - StringBuilder sb = new StringBuilder(); + Txn.Builder builder = new Txn.Builder(); for (int i = 0; i < keys.size(); i++) - sb.append("let row").append(i).append(" = (select * from ").append(table).append(" where pk = ?);\n"); - sb.append("SELECT row0.pk;"); - return new Query(sb.toString(), keys.toArray()); + builder.addLet("row" + i, new Select.Builder().wildcard().table(ks, table).value("pk", keys.get(i))); + builder.addReturnReferences("row0.pk"); + Txn txn = builder.build(); + ByteBuffer[] binds = txn.bindsEncoded(); + Assertions.assertThat(Arrays.asList(binds)).isEqualTo(keys); + return new Query(txn.toCQL(), binds); } private static class Query diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java index ee2be216f0c8..31d1aab31f15 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/CasMultiNodeTableWalkBase.java @@ -18,8 +18,8 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utilsfork.Gen; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.RandomSource; import org.apache.cassandra.config.Config; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.CasCondition; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java index 5de56c923074..d6c01834737f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java @@ -20,7 +20,7 @@ import java.io.IOException; -import accord.utilsfork.RandomSource; +import accord.utils.RandomSource; import net.bytebuddy.ByteBuddy; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java index 7d1b7ab71d83..7727e3a76ab3 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java @@ -20,7 +20,7 @@ import org.junit.Ignore; -import accord.utilsfork.Property; +import accord.utils.Property; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java index f0c3d57ec4c0..5a0ce66ccca9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utilsfork.Property; +import accord.utils.Property; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java index 64d6b91ea33c..969b0756432b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java @@ -20,8 +20,8 @@ import java.io.IOException; -import accord.utilsfork.Property; -import accord.utilsfork.RandomSource; +import accord.utils.Property; +import accord.utils.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInstanceConfig; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java index 1d8a5919f170..0cf333d2ab84 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV1MultiNodeTableWalkTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utilsfork.Property; +import accord.utils.Property; import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.Cluster; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java index d8d8bcb1bd1b..fa098edaacbc 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/PaxosV2MultiNodeTableWalkTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.distributed.test.cql3; -import accord.utilsfork.Property; +import accord.utils.Property; import org.apache.cassandra.config.Config; import org.apache.cassandra.distributed.Cluster; diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 5bffefb11186..60b1090dc1c9 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -38,10 +38,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.Property; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property; +import accord.utils.RandomSource; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.Bind; import org.apache.cassandra.cql3.ast.Conditional; @@ -72,8 +72,8 @@ import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.ImmutableUniqueList; -import static accord.utilsfork.Property.commands; -import static accord.utilsfork.Property.stateful; +import static accord.utils.Property.commands; +import static accord.utils.Property.stateful; import static org.apache.cassandra.utils.AbstractTypeGenerators.getTypeSupport; import static org.apache.cassandra.utils.Generators.toGen; @@ -371,7 +371,8 @@ public void test() throws IOException .addIf(State::allowNonPartitionMultiColumnQuery, this::multiColumnQuery) .addIf(State::allowPartitionQuery, this::partitionRestrictedQuery) .destroyState(State::close) - .onSuccess(onSuccess(logger)) + // TODO: add back when accord-core Property supports it +// .onSuccess(onSuccess(logger)) .build()); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java index df276143972c..3a7930ee4269 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java @@ -37,10 +37,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.Property; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property; +import accord.utils.RandomSource; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.Conditional.Where.Inequality; @@ -66,8 +66,8 @@ import org.apache.cassandra.utils.ImmutableUniqueList; import org.quicktheories.generators.SourceDSL; -import static accord.utilsfork.Property.commands; -import static accord.utilsfork.Property.stateful; +import static accord.utils.Property.commands; +import static accord.utils.Property.stateful; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.utils.Generators.toGen; @@ -267,7 +267,8 @@ public void test() throws IOException .addIf(State::hasEnoughMemtable, StatefulASTBase::flushTable) .addIf(State::hasEnoughSSTables, StatefulASTBase::compactTable) .destroyState(State::close) - .onSuccess(onSuccess(logger)) + // TODO: add back when accord-core Property supports it + //.onSuccess(onSuccess(logger)) .build()); } } @@ -402,7 +403,8 @@ private LinkedHashSet randomPks(RandomSource rs) LinkedHashSet pks = new LinkedHashSet<>(); for (int i = 0; i < numPks; i++) { - ByteBuffer value = rs.pickOrderedSet(available); + // TODO: add back when accord-core Property supports it + ByteBuffer value = null;//rs.pickOrderedSet(available); pks.add(value); available.remove(value); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index 90a538566772..423bfd4477e4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -35,12 +35,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; -import org.slf4j.Logger; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.Property; -import accord.utilsfork.RandomSource; + +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Property; +import accord.utils.RandomSource; import com.datastax.driver.core.ColumnDefinitions; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; @@ -86,7 +86,7 @@ import org.apache.cassandra.utils.Generators; import org.quicktheories.generators.SourceDSL; -import static accord.utilsfork.Property.multistep; +import static accord.utils.Property.multistep; import static org.apache.cassandra.distributed.test.JavaDriverUtils.toDriverCL; import static org.apache.cassandra.utils.AbstractTypeGenerators.overridePrimitiveTypeSupport; import static org.apache.cassandra.utils.AbstractTypeGenerators.stringComparator; @@ -177,10 +177,11 @@ protected static Cluster createCluster(int nodeCount, Consumer return cluster; } - protected Property.StatefulSuccess onSuccess(Logger logger) - { - return (state, sut, history) -> logger.info("Successful for the following:\nState {}\nHistory:\n{}", state, Property.formatList("\t\t", history)); - } + // TODO: add back when accord-core Property supports it +// protected Property.StatefulSuccess onSuccess(Logger logger) +// { +// return (state, sut, history) -> logger.info("Successful for the following:\nState {}\nHistory:\n{}", state, Property.formatList("\t\t", history)); +// } protected static Property.Command flushTable(RandomSource rs, S state) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java index 69d4001bd135..b606a94fc4a0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ClusterMetadataTestHelper.java @@ -59,6 +59,7 @@ import org.apache.cassandra.schema.SchemaTransformation; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.AccordStaleReplicas; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.AtomicLongBackedProcessor; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; @@ -158,41 +159,22 @@ public static ClusterMetadata minimalForTesting(Epoch epoch, IPartitioner partit AccordFastPath.EMPTY, LockedRanges.EMPTY, InProgressSequences.EMPTY, - null, + ConsensusMigrationState.EMPTY, ImmutableMap.of(), AccordStaleReplicas.EMPTY); } public static ClusterMetadata minimalForTesting(IPartitioner partitioner) { - return new ClusterMetadata(Epoch.EMPTY, - partitioner, - null, - null, - null, - DataPlacements.empty(), - AccordFastPath.EMPTY, - null, - null, - null, - ImmutableMap.of(), - AccordStaleReplicas.EMPTY); + return minimalForTesting(Epoch.EMPTY, partitioner); } public static ClusterMetadata minimalForTesting(Keyspaces keyspaces) { - return new ClusterMetadata(Epoch.EMPTY, - Murmur3Partitioner.instance, - new DistributedSchema(keyspaces), - null, - null, - DataPlacements.empty(), - AccordFastPath.EMPTY, - null, - null, - null, - ImmutableMap.of(), - AccordStaleReplicas.EMPTY); + return minimalForTesting(Murmur3Partitioner.instance).transformer() + .with(new DistributedSchema(keyspaces)) + .build() + .metadata.forceEpoch(Epoch.EMPTY); } public static ClusterMetadataService syncInstanceForTest() diff --git a/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java b/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java index 233f49976d48..2145a0ac6af3 100644 --- a/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java +++ b/test/distributed/org/apache/cassandra/fuzz/snapshots/SnapshotsTest.java @@ -41,8 +41,8 @@ import com.google.common.util.concurrent.Uninterruptibles; import org.junit.Test; -import accord.utilsfork.Property.StateOnlyCommand; -import accord.utilsfork.RandomSource; +import accord.utils.Property.StateOnlyCommand; +import accord.utils.RandomSource; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.IInvokableInstance; @@ -65,8 +65,8 @@ import org.quicktheories.generators.SourceDSL; import org.quicktheories.impl.JavaRandom; -import static accord.utilsfork.Property.commands; -import static accord.utilsfork.Property.stateful; +import static accord.utils.Property.commands; +import static accord.utils.Property.stateful; import static com.google.common.collect.Sets.difference; import static java.lang.String.format; import static java.util.UUID.randomUUID; diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java new file mode 100644 index 000000000000..b07747969628 --- /dev/null +++ b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.fuzz.topology; + +import java.nio.ByteBuffer; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ExecutionException; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.coordinate.Exhausted; +import accord.coordinate.Preempted; +import accord.coordinate.Timeout; +import accord.local.Node; +import accord.primitives.Ranges; +import accord.primitives.Seekables; +import accord.primitives.TxnId; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Invariants; +import accord.utils.Property; +import accord.utils.Property.Command; +import accord.utils.RandomSource; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.Config; +import org.apache.cassandra.cql3.ast.Mutation; +import org.apache.cassandra.cql3.ast.Statement; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.SimpleQueryResult; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.distributed.test.accord.AccordTestBase; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordAgent; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.utils.ASTGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.Isolated; +import org.apache.cassandra.utils.Shared; + +import static org.apache.cassandra.utils.AccordGenerators.fromQT; + +public class AccordTopologyMixupTest extends TopologyMixupTestBase +{ + private static final Logger logger = LoggerFactory.getLogger(AccordTopologyMixupTest.class); + + static + { + CassandraRelevantProperties.ACCORD_AGENT_CLASS.setString(InterceptAgent.class.getName()); + // enable most expensive debugging checks + CassandraRelevantProperties.ACCORD_KEY_PARANOIA_CPU.setString(Invariants.Paranoia.QUADRATIC.name()); + CassandraRelevantProperties.ACCORD_KEY_PARANOIA_MEMORY.setString(Invariants.Paranoia.QUADRATIC.name()); + CassandraRelevantProperties.ACCORD_KEY_PARANOIA_COSTFACTOR.setString(Invariants.ParanoiaCostFactor.HIGH.name()); + } + + private static final List TRANSACTIONAL_MODES = Stream.of(TransactionalMode.values()).filter(t -> t.accordIsEnabled).collect(Collectors.toList()); + + @Override + protected Gen> stateGen() + { + return AccordState::new; + } + + @Override + protected void preCheck(Property.StatefulBuilder builder) + { + // if a failing seed is detected, populate here + // Example: builder.withSeed(42L); + } + + private static Spec createSchemaSpec(RandomSource rs, Cluster cluster) + { + TransactionalMode mode = rs.pick(TRANSACTIONAL_MODES); + boolean enableMigration = allowsMigration(mode) && rs.nextBoolean(); + TableMetadata metadata = fromQT(new CassandraGenerators.TableMetadataBuilder() + .withKeyspaceName(KEYSPACE) + .withTableKinds(TableMetadata.Kind.REGULAR) + .withKnownMemtables() + //TODO (coverage): include "fast_path = 'keyspace'" override + .withTransactionalMode(enableMigration ? TransactionalMode.off : mode) + .withoutEmpty() + .build()) + .next(rs); + maybeCreateUDTs(cluster, metadata); + String schemaCQL = metadata.toCqlString(false, false, false); + logger.info("Creating test table:\n{}", schemaCQL); + cluster.schemaChange(schemaCQL); + if (enableMigration) + { + cluster.schemaChange("ALTER TABLE " + metadata + " WITH " + mode.asCqlParam()); + cluster.get(1).nodetoolResult("consensus_admin", "begin-migration", "--target-protocol", "accord", metadata.keyspace, metadata.name).asserts().success(); + } + return new Spec(mode, enableMigration, metadata); + } + + private static BiFunction, Command, Void, ?>> cqlOperations(Spec spec) + { + Gen select = (Gen) (Gen) fromQT(new ASTGenerators.SelectGenBuilder(spec.metadata).withLimit1().build()); + Gen mutation = (Gen) (Gen) fromQT(new ASTGenerators.MutationGenBuilder(spec.metadata).withoutTimestamp().build()); + Gen txn = (Gen) (Gen) fromQT(new ASTGenerators.TxnGenBuilder(spec.metadata).build()); + Map, Integer> operations = new LinkedHashMap<>(); + operations.put(select, 1); + operations.put(mutation, 1); + operations.put(txn, 1); + Gen statementGen = Gens.oneOf(operations); + return (rs, state) -> cqlOperation(rs, state, statementGen); + } + + private static Command, Void, ?> cqlOperation(RandomSource rs, State state, Gen statementGen) + { + Statement stmt = statementGen.next(rs); + String cql; + //TODO (usability): are there any transaction_modes that actually need simple mutations/select to be wrapped in a BEGIN TRANSACTION? If not then this logica can be simplified + if (stmt.kind() == Statement.Kind.TXN || stmt.kind() == Statement.Kind.MUTATION && ((Mutation) stmt).isCas()) + cql = stmt.toCQL(); + else cql = wrapInTxn(stmt.toCQL()); + IInvokableInstance node = state.cluster.get(rs.pickInt(state.topologyHistory.up())); + return new Property.SimpleCommand<>(node + ": " + stmt.kind() + "; epoch=" + state.currentEpoch.get(), s2 -> executeTxn(s2.cluster, node, cql, stmt.bindsEncoded())); + } + + private static boolean allowsMigration(TransactionalMode mode) + { + switch (mode) + { + case unsafe_writes: + case mixed_reads: + case full: + return true; + default: + return false; + } + } + + private static SimpleQueryResult executeTxn(Cluster cluster, IInvokableInstance node, String stmt, ByteBuffer[] binds) + { + if (!AccordTestBase.isIdempotent(node, stmt)) + { + // won't be able to retry... + return node.coordinator().executeWithResult(stmt, ConsistencyLevel.ANY, (Object[]) binds); + } + return AccordTestBase.executeWithRetry(cluster, node, stmt, (Object[]) binds); + } + + private static void maybeCreateUDTs(Cluster cluster, TableMetadata metadata) + { + CassandraGenerators.visitUDTs(metadata, next -> { + String cql = next.toCqlString(false, false, false); + logger.warn("Creating UDT {}", cql); + cluster.schemaChange(cql); + }); + } + + public static class Spec implements TopologyMixupTestBase.SchemaSpec + { + private final TransactionalMode mode; + private final boolean enableMigration; + private final TableMetadata metadata; + + public Spec(TransactionalMode mode, boolean enableMigration, TableMetadata metadata) + { + this.mode = mode; + this.enableMigration = enableMigration; + this.metadata = metadata; + } + + @Override + public String name() + { + return metadata.name; + } + + @Override + public String keyspaceName() + { + return metadata.keyspace; + } + } + + private static class AccordState extends State implements SharedState.Listener + { + private final List onError = new CopyOnWriteArrayList<>(); + + public AccordState(RandomSource rs) + { + super(rs, AccordTopologyMixupTest::createSchemaSpec, AccordTopologyMixupTest::cqlOperations); + + SharedState.listeners.add(this); + } + + @Override + protected void onConfigure(IInstanceConfig c) + { + c.set("accord.shard_count", 1) + .set("paxos_variant", Config.PaxosVariant.v2.name()); + } + + @Override + protected void onStartupComplete(long tcmEpoch) + { + cluster.forEach(i -> { + if (i.isShutdown()) return; + i.runOnInstance(() -> { + try + { + AccordService.instance().epochReady(Epoch.create(tcmEpoch)).get(); + } + catch (InterruptedException | ExecutionException e) + { + throw new RuntimeException(e); + } + }); + }); + } + + @Override + public void debugTxn(@Nullable Node.Id exclude, String type, TxnId txnId) + { + onError.add(new Runnable() + { + @Override + public void run() + { + // this runs in the main thread, so is actually thread safe + int[] up = topologyHistory.up(); + logger.error("{} failed with txn id {}; global debug summary:\n{}", type, txnId, ClusterUtils.queryTxnStateAsString(cluster, txnId, up)); + onError.remove(this); + } + }); + } + + @Override + public void close() throws Exception + { + for (Runnable r : onError) + { + try + { + r.run(); + } + catch (Throwable t) + { + // TODO (correctness): how to handle? + logger.error("Unhandled error in onError listeners", t); + } + } + onError.clear(); + SharedState.listeners.remove(this); + super.close(); + } + } + + @Shared + public static class SharedState + { + public interface Listener + { + void debugTxn(Node.Id node, String type, TxnId txnId); + } + + public static final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList<>(); + + public static void debugTxn(@Nullable Integer node, String type, String id) + { + Node.Id nodeId = node == null ? null : new Node.Id(node); + TxnId txnId = TxnId.parse(id); + listeners.forEach(l -> l.debugTxn(nodeId, type, txnId)); + } + } + + @Isolated + public static class InterceptAgent extends AccordAgent + { + @Override + public void onFailedBarrier(TxnId id, Seekables keysOrRanges, Throwable cause) + { + if (cause instanceof Timeout || cause instanceof Preempted) + { + SharedState.debugTxn(null, "Repair Barrier", id.toString()); + } + } + + @Override + public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throwable failure) + { + if (failure instanceof Exhausted) + { + Exhausted e = (Exhausted) failure; + SharedState.debugTxn(self.id, "Bootstrap#" + phase, e.txnId().toString()); + } + super.onFailedBootstrap(phase, ranges, retry, failure); + } + } +} diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java b/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java deleted file mode 100644 index 73f59c2d4309..000000000000 --- a/test/distributed/org/apache/cassandra/fuzz/topology/HarryTopologyMixupTest.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.fuzz.topology; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.BiFunction; -import java.util.function.Consumer; -import java.util.function.Function; -import javax.annotation.Nullable; - -import com.google.common.base.Throwables; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import accord.utilsfork.Gen; -import accord.utilsfork.Property; -import accord.utilsfork.Property.Command; -import accord.utilsfork.Property.PreCheckResult; -import accord.utilsfork.Property.SimpleCommand; -import accord.utilsfork.RandomSource; -import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.api.IInstanceConfig; -import org.apache.cassandra.exceptions.RequestTimeoutException; -import org.apache.cassandra.harry.SchemaSpec; -import org.apache.cassandra.harry.dsl.HistoryBuilder; -import org.apache.cassandra.harry.dsl.ReplayingHistoryBuilder; -import org.apache.cassandra.harry.execution.InJvmDTestVisitExecutor; -import org.apache.cassandra.harry.gen.EntropySource; -import org.apache.cassandra.harry.gen.Generator; -import org.apache.cassandra.harry.gen.Generators; -import org.apache.cassandra.harry.gen.SchemaGenerators; -import org.apache.cassandra.harry.gen.rng.JdkRandomEntropySource; -import org.apache.cassandra.utils.AssertionUtils; -import org.assertj.core.api.Condition; - -import static org.apache.cassandra.distributed.shared.ClusterUtils.waitForCMSToQuiesce; - -public class HarryTopologyMixupTest extends TopologyMixupTestBase -{ - protected static final Condition TIMEOUT_CHECKER = AssertionUtils.isInstanceof(RequestTimeoutException.class); - private static final Logger logger = LoggerFactory.getLogger(HarryTopologyMixupTest.class); - - public HarryTopologyMixupTest() - { - } - - @Override - protected Gen> stateGen() - { - return HarryState::new; - } - - @Override - protected void preCheck(Property.StatefulBuilder builder) - { - // if a failing seed is detected, populate here - // Example: builder.withSeed(42L); - } - - @Override - protected void destroyState(State state, @Nullable Throwable cause) - { - if (cause != null) return; - if (((HarryState) state).numInserts > 0) - { - for (Integer pkIdx : state.schema.pkGen.generated()) - state.schema.harry.selectPartition(pkIdx); - } - } - - private static BiFunction createSchemaSpec() - { - return (rs, cluster) -> { - EntropySource rng = new JdkRandomEntropySource(rs.nextLong()); - Generator schemaGen = SchemaGenerators.schemaSpecGen("harry", "table", 1000);; - SchemaSpec schema = schemaGen.generate(rng); - - HistoryBuilder harry = new ReplayingHistoryBuilder(schema.valueGenerators, - hb -> { - InJvmDTestVisitExecutor.Builder builder = InJvmDTestVisitExecutor.builder(); - return builder.nodeSelector(new InJvmDTestVisitExecutor.NodeSelector() - { - private final AtomicLong cnt = new AtomicLong(); - - @Override - public int select(long lts) - { - for (int i = 0; i < 42; i++) - { - int selected = (int) (cnt.getAndIncrement() % cluster.size() + 1); - if (!cluster.get(selected).isShutdown()) - return selected; - } - throw new IllegalStateException("Unable to find an alive instance"); - } - }) - .retryPolicy(t -> { - t = Throwables.getRootCause(t); - if (!TIMEOUT_CHECKER.matches(t)) - return false; - return false; - }) - .build(schema, hb, cluster); - }); - cluster.schemaChange(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};", schema.keyspace)); - cluster.schemaChange(schema.compile()); - waitForCMSToQuiesce(cluster, cluster.get(1)); - return new Spec(harry, schema); - }; - } - - private static class HarryCommand extends SimpleCommand> - { - HarryCommand(Function, String> name, Consumer> fn) - { - super(name, fn); - } - - @Override - public PreCheckResult checkPreconditions(State state) - { - int clusterSize = state.topologyHistory.up().length; - return clusterSize >= 3 ? PreCheckResult.Ok : PreCheckResult.Ignore; - } - } - - private static CommandGen cqlOperations(Spec spec) - { - Command, Void, ?> insert = new HarryCommand(state -> "Harry Insert" + state.commandNamePostfix(), state -> { - spec.harry.insert(); - ((HarryState) state).numInserts++; - }); - return (rs, state) -> { - HarryState harryState = (HarryState) state; - TopologyHistory history = state.topologyHistory; - // if any topology change happened, then always validate all - if (harryState.generation != history.generation()) - { - harryState.generation = history.generation(); - return validateAll(state); - } - if ((harryState.numInserts > 0 && rs.decide(0.2))) // 20% of the time do reads - return validateAll(state); - return insert; - }; - } - - private static Command, Void, ?> validateAll(State state) - { - Spec spec = state.schema; - List, Void, ?>> reads = new ArrayList<>(); - - for (Integer pkIdx : spec.pkGen.generated()) - { - long pd = spec.harry.valueGenerators().pkGen().descriptorAt(pkIdx); - reads.add(new HarryCommand(s -> String.format("Harry Validate pd=%d%s", pd, state.commandNamePostfix()), s -> spec.harry.selectPartition(pkIdx))); - } - reads.add(new HarryCommand(s -> "Reset Harry Write State" + state.commandNamePostfix(), s -> ((HarryState) s).numInserts = 0)); - return Property.multistep(reads); - } - - public static class Spec implements Schema - { - private final Generators.TrackingGenerator pkGen; - private final HistoryBuilder harry; - private final SchemaSpec schema; - - public Spec(HistoryBuilder harry, SchemaSpec schema) - { - this.harry = harry; - this.schema = schema; - this.pkGen = Generators.tracking(Generators.int32(0, schema.valueGenerators.pkPopulation())); - } - - @Override - public String table() - { - return schema.table; - } - - @Override - public String keyspace() - { - return schema.keyspace; - } - - @Override - public String createSchema() - { - return schema.compile(); - } - } - - public class HarryState extends State - { - private long generation; - private int numInserts = 0; - - public HarryState(RandomSource rs) - { - super(rs, createSchemaSpec(), HarryTopologyMixupTest::cqlOperations); - } - - @Override - protected void onConfigure(IInstanceConfig config) - { - config.set("metadata_snapshot_frequency", 5); - } - } -} \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java index 74988fb61890..a0275188479b 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java @@ -18,7 +18,6 @@ package org.apache.cassandra.fuzz.topology; -import javax.annotation.Nullable; import java.io.IOException; import java.io.UncheckedIOException; import java.net.InetSocketAddress; @@ -26,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.EnumSet; import java.util.HashSet; import java.util.LinkedHashMap; @@ -33,76 +33,64 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.Stream; +import javax.annotation.Nullable; -import com.google.common.base.Throwables; -import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import org.agrona.collections.Int2ObjectHashMap; -import org.agrona.collections.IntArrayList; -import org.agrona.collections.IntHashSet; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.Invariants; -import accord.utilsfork.Property; -import accord.utilsfork.Property.Command; -import accord.utilsfork.Property.SimpleCommand; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Invariants; +import accord.utils.Property; +import accord.utils.Property.Command; +import accord.utils.Property.SimpleCommand; +import accord.utils.RandomSource; +import org.agrona.collections.Int2ObjectHashMap; +import org.agrona.collections.IntArrayList; +import org.agrona.collections.IntHashSet; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.YamlConfigurationLoader; import org.apache.cassandra.distributed.Cluster; -import org.apache.cassandra.distributed.Constants; import org.apache.cassandra.distributed.api.Feature; -import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.NodeToolResult; -import org.apache.cassandra.distributed.api.Row; -import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.api.TokenSupplier; import org.apache.cassandra.distributed.impl.INodeProvisionStrategy; import org.apache.cassandra.distributed.impl.InstanceConfig; import org.apache.cassandra.distributed.shared.ClusterUtils; import org.apache.cassandra.distributed.test.TestBaseImpl; -import org.apache.cassandra.harry.model.TokenPlacementModelHelper; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; -import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; -import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.tcm.Retry; +import org.apache.cassandra.tcm.log.Entry; +import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.ConfigGenBuilder; -import org.apache.cassandra.utils.Retry; - -import static accord.utilsfork.Property.commands; -import static accord.utilsfork.Property.ignoreCommand; -import static accord.utilsfork.Property.multistep; -import static accord.utilsfork.Property.stateful; -import static org.apache.cassandra.harry.model.TokenPlacementModel.Range; -import static org.apache.cassandra.harry.model.TokenPlacementModel.Replica; -import static org.apache.cassandra.harry.model.TokenPlacementModel.ReplicatedRanges; -import static org.apache.cassandra.harry.model.TokenPlacementModel.ReplicationFactor; -import static org.apache.cassandra.harry.model.TokenPlacementModel.SimpleReplicationFactor; + +import static accord.utils.Property.commands; +import static accord.utils.Property.ignoreCommand; +import static accord.utils.Property.multistep; +import static accord.utils.Property.stateful; +import static java.util.concurrent.TimeUnit.NANOSECONDS; /** * These tests can create many instances, so mac users may need to run the following to avoid address bind failures *

      * {@code for id in $(seq 0 15); do sudo ifconfig lo0 alias "127.0.0.$id"; done;} */ -public abstract class TopologyMixupTestBase extends TestBaseImpl +public abstract class TopologyMixupTestBase extends TestBaseImpl { private static final Logger logger = LoggerFactory.getLogger(TopologyMixupTestBase.class); @@ -116,9 +104,9 @@ private enum TopologyChange AddNode, RemoveNode, HostReplace, - StopNode, - StartNode, //TODO (coverage): add the following states once supported +// StopNode, +// StartNode, // MoveToken //TODO (coverage): node migrate to another rack or dc (unsupported on trunk as of this writing, but planned work for TCM) // MoveNodeToNewRack, @@ -139,95 +127,14 @@ private enum RemoveType // common commands private Command, Void, ?> repairCommand(int toCoordinate) { - return new SimpleCommand<>(state -> "nodetool repair " + state.schema.keyspace() + ' ' + state.schema.table() + " from node" + toCoordinate + state.commandNamePostfix(), - state -> state.cluster.get(toCoordinate).nodetoolResult("repair", state.schema.keyspace(), state.schema.table(), "--force").asserts().success()); - } - - private static Command, Void, ?> repairCommand(int toCoordinate, String ks, String... tables) { - return new SimpleCommand<>(state -> "nodetool repair " + ks + (tables.length == 0 ? "" : " " + Arrays.asList(tables)) + " from node" + toCoordinate + state.commandNamePostfix(), - state -> { - if (tables.length == 0) { - state.cluster.get(toCoordinate).nodetoolResult("repair", ks, "--force").asserts().success(); - return; - } - List args = new ArrayList<>(3 + tables.length); - args.add("repair"); - args.add(ks); - args.addAll(Arrays.asList(tables)); - args.add("--force"); - state.cluster.get(toCoordinate).nodetoolResult(args.toArray(String[]::new)).asserts().success(); - }); + return new SimpleCommand<>(state -> "nodetool repair " + state.schemaSpec.keyspaceName() + ' ' + state.schemaSpec.name() + " from node" + toCoordinate + state.commandNamePostfix(), + state -> state.cluster.get(toCoordinate).nodetoolResult("repair", state.schemaSpec.keyspaceName(), state.schemaSpec.name()).asserts().success()); } private Command, Void, ?> waitForCMSToQuiesce() { - return new Property.StateOnlyCommand<>() - { - private Epoch maxEpoch = null; - @Override - public String detailed(State state) - { - if (maxEpoch == null) - maxEpoch = ClusterUtils.maxEpoch(state.cluster, state.topologyHistory.up()); - return "Waiting for CMS to Quiesce on epoch " + maxEpoch.getEpoch() + state.commandNamePostfix(); - } - - @Override - public void applyUnit(State state) - { - Invariants.nonNull(maxEpoch, "detailed was not called before calling apply"); - ClusterUtils.waitForCMSToQuiesce(state.cluster, maxEpoch, true); - } - }; - } - - private Command, Void, ?> waitForGossipToSettle() - { - return new SimpleCommand<>(state -> "Waiting for Ring to Settle" + state.commandNamePostfix(), - state -> { - int[] up = state.topologyHistory.up(); - for (int node : up) - { - IInvokableInstance instance = state.cluster.get(node); - ClusterUtils.awaitRingJoin(state.cluster, up, instance); - } - }); - } - - private Command, Void, ?> waitAllNodesInPeers() - { - return new SimpleCommand<>(state -> "Waiting for all alive nodes to be in peers" + state.commandNamePostfix(), - state -> { - int[] up = state.topologyHistory.up(); - for (int node : up) - { - IInvokableInstance instance = state.cluster.get(node); - ClusterUtils.awaitInPeers(state.cluster, up, instance); - } - }); - } - - private Command, Void, ?> stopInstance(RandomSource rs, State state) - { - int toStop = rs.pickInt(state.upAndSafe()); - return stopInstance(toStop, "Normal Stop"); - } - - private Command, Void, ?> startInstance(RandomSource rs, State state) - { - int toStop = rs.pickInt(state.topologyHistory.down()); - return startInstance(toStop); - } - - private Command, Void, ?> startInstance(int toStart) - { - return new SimpleCommand<>(state -> "Start Node" + toStart + state.commandNamePostfix(), - state -> { - IInvokableInstance inst = state.cluster.get(toStart); - TopologyHistory.Node node = state.topologyHistory.node(toStart); - inst.startup(); - node.up(); - }); + return new SimpleCommand<>(state -> "Waiting for CMS to Quiesce" + state.commandNamePostfix(), + state -> ClusterUtils.waitForCMSToQuiesce(state.cluster, state.cmsGroup)); } private Command, Void, ?> stopInstance(int toRemove, String why) @@ -248,14 +155,13 @@ public void applyUnit(State state) TopologyHistory.Node n = state.topologyHistory.addNode(); IInvokableInstance newInstance = ClusterUtils.addInstance(state.cluster, n.dc, n.rack, c -> c.set("auto_bootstrap", true)); newInstance.startup(state.cluster); - ClusterUtils.assertModeJoined(newInstance); n.up(); }); } private Command, Void, ?> removeNodeDecommission(RandomSource rs, State state) { - int toRemove = rs.pickInt(state.upAndSafe()); + int toRemove = rs.pickInt(state.topologyHistory.up()); return new SimpleCommand<>("nodetool decommission node" + toRemove + state.commandNamePostfix(), s2 -> { IInvokableInstance inst = s2.cluster.get(toRemove); TopologyHistory.Node node = s2.topologyHistory.node(toRemove); @@ -269,7 +175,7 @@ public void applyUnit(State state) private Command, Void, ?> removeNode(RandomSource rs, State state) { int[] up = state.topologyHistory.up(); - int toRemove = rs.pickInt(state.upAndSafe()); + int toRemove = rs.pickInt(up); int toCoordinate; { int picked; @@ -294,7 +200,14 @@ public void applyUnit(State state) private Command, Void, ?> removeNodeAssassinate(RandomSource rs, State state) { - int toRemove = rs.pickInt(state.upAndSafe()); + //TODO (correctness): assassinate CMS member isn't allowed + IntHashSet up = asSet(state.topologyHistory.up()); + IntHashSet cmsGroup = asSet(state.cmsGroup); + Sets.SetView upAndNotInCMS = Sets.difference(up, cmsGroup); + if (upAndNotInCMS.isEmpty()) throw new AssertionError("Every node is a CMS member"); + List allowed = new ArrayList<>(upAndNotInCMS); + allowed.sort(Comparator.naturalOrder()); + int toRemove = rs.pick(allowed); int toCoordinate; { int[] upInt = state.topologyHistory.up(); @@ -338,17 +251,19 @@ public void applyUnit(State state) private Command, Void, ?> hostReplace(RandomSource rs, State state) { - int nodeToReplace = rs.pickInt(state.upAndSafe()); + int nodeToReplace = rs.pickInt(state.topologyHistory.up()); IInvokableInstance toReplace = state.cluster.get(nodeToReplace); TopologyHistory.Node adding = state.topologyHistory.replace(nodeToReplace); TopologyHistory.Node removing = state.topologyHistory.nodes.get(nodeToReplace); - return multistep(stopInstance(nodeToReplace, "HostReplace; Node" + adding.id), + return multistep(new SimpleCommand<>("Stop Node" + nodeToReplace + " for HostReplace; Node" + adding.id + state.commandNamePostfix(), s2 -> { + ClusterUtils.stopUnchecked(toReplace); + removing.down(); + }), new SimpleCommand<>("Host Replace Node" + nodeToReplace + "; Node" + adding.id + state.commandNamePostfix(), s2 -> { logger.info("node{} starting host replacement; epoch={}", adding.id, HackSerialization.tcmEpochAndSync(s2.cluster.getFirstRunningInstance())); removing.status = TopologyHistory.Node.Status.BeingReplaced; IInvokableInstance inst = ClusterUtils.replaceHostAndStart(s2.cluster, toReplace); - ClusterUtils.assertModeJoined(inst); s2.topologyHistory.replaced(removing, adding); long epoch = HackSerialization.tcmEpoch(inst); s2.currentEpoch.set(epoch); @@ -374,7 +289,7 @@ protected void destroyState(State state, @Nullable Throwable cause) throws Th @Test public void test() { - Property.StatefulBuilder statefulBuilder = stateful().withSteps(20).withStepTimeout(Duration.ofMinutes(3)).withExamples(1); + Property.StatefulBuilder statefulBuilder = stateful().withSteps(20).withStepTimeout(Duration.ofMinutes(2)).withExamples(1); preCheck(statefulBuilder); statefulBuilder.check(commands(this::stateGen) .preCommands(state -> state.preActions.forEach(Runnable::run)) @@ -391,12 +306,6 @@ public void test() TopologyMixupTestBase.this.destroyState(state, cause); } }) - .commandsTransformer((state, gen) -> { - for (BiFunction, Gen, Void, ?>>, Gen, Void, ?>>> fn : state.commandsTransformers) - gen = fn.apply(state, gen); - return gen; - }) - .onSuccess((state, sut, history) -> logger.info("Successful for the following:\nState {}\nHistory:\n{}", state, Property.formatList("\t\t", history))) .build()); } @@ -405,32 +314,18 @@ private EnumSet possibleTopologyChanges(State state) EnumSet possibleTopologyChanges = EnumSet.noneOf(TopologyChange.class); // up or down is logically more correct, but since this runs sequentially and after the topology changes are complete, we don't have downed nodes at this point // so up is enough to know the topology size - int up = state.topologyHistory.up().length; - int down = state.topologyHistory.down().length; - int[] upAndSafe = state.upAndSafe(); - int total = up + down; - if (total < state.topologyHistory.maxNodes) + int size = state.topologyHistory.up().length; + if (size < state.topologyHistory.maxNodes) possibleTopologyChanges.add(TopologyChange.AddNode); - if (upAndSafe.length > 0) + if (size > state.topologyHistory.quorum()) { - // can't remove the node if all nodes are CMS nodes - if (!Sets.difference(asSet(upAndSafe), asSet(state.cmsGroup)).isEmpty()) + if (size > TARGET_RF) possibleTopologyChanges.add(TopologyChange.RemoveNode); possibleTopologyChanges.add(TopologyChange.HostReplace); - possibleTopologyChanges.add(TopologyChange.StopNode); } - if (down > 0) - possibleTopologyChanges.add(TopologyChange.StartNode); return possibleTopologyChanges; } - private Command, Void, ?> awaitClusterStable() - { - return multistep(waitForCMSToQuiesce(), - waitForGossipToSettle(), - waitAllNodesInPeers()); - } - private Gen, Void, ?>> topologyCommand(State state, EnumSet possibleTopologyChanges) { Map, Void, ?>>, Integer> possible = new LinkedHashMap<>(); @@ -439,19 +334,13 @@ private EnumSet possibleTopologyChanges(State state) switch (task) { case AddNode: - possible.put(ignore -> multistep(addNode(), awaitClusterStable()), 1); + possible.put(ignore -> multistep(addNode(), waitForCMSToQuiesce()), 1); break; case RemoveNode: - possible.put(rs -> multistep(removeNodeRandomizedDispatch(rs, state), awaitClusterStable()), 1); + possible.put(rs -> multistep(removeNodeRandomizedDispatch(rs, state), waitForCMSToQuiesce()), 1); break; case HostReplace: - possible.put(rs -> multistep(hostReplace(rs, state), awaitClusterStable()), 1); - break; - case StartNode: - possible.put(rs -> startInstance(rs, state), 1); - break; - case StopNode: - possible.put(rs -> stopInstance(rs, state), 1); + possible.put(rs -> multistep(hostReplace(rs, state), waitForCMSToQuiesce()), 1); break; default: throw new UnsupportedOperationException(task.name()); @@ -468,66 +357,28 @@ private static IntHashSet asSet(int[] array) return set; } - public interface Schema + public interface SchemaSpec { - String table(); - String keyspace(); - String createSchema(); - } + String name(); - protected interface CommandGen - { - Command, Void, ?> apply(RandomSource rs, State state); + String keyspaceName(); } - private static class LoggingCommand extends Property.ForwardingCommand - { - private static final Logger logger = LoggerFactory.getLogger(LoggingCommand.class); - - private LoggingCommand(Command delegate) - { - super(delegate); - } - - @Override - public Result apply(State s) throws Throwable - { - String name = detailed(s); - long startNanos = Clock.Global.nanoTime(); - try - { - logger.info("Starting command: {}", name); - Result o = super.apply(s); - logger.info("Command {} was success after {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos)); - return o; - } - catch (Throwable t) - { - logger.warn("Command {} failed after {}: {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos), t.toString()); // don't want stack trace, just type/msg - throw t; - } - } - } - - protected static class State implements AutoCloseable + protected static class State implements AutoCloseable { final TopologyHistory topologyHistory; final Cluster cluster; - final S schema; - final List, Gen, Void, ?>>, Gen, Void, ?>>>> commandsTransformers = new ArrayList<>(); + final S schemaSpec; final List preActions = new CopyOnWriteArrayList<>(); final AtomicLong currentEpoch = new AtomicLong(); - final CommandGen statementGen; + final BiFunction, Command, Void, ?>> statementGen; final Gen removeTypeGen; private final Map yamlConfigOverrides; int[] cmsGroup = new int[0]; - private ReplicationFactor rf; - private final RingModel ring = new RingModel(); - public State(RandomSource rs, BiFunction schemaSpecGen, Function> cqlOperationsGen) + public State(RandomSource rs, BiFunction schemaSpecGen, Function, Command, Void, ?>>> cqlOperationsGen) { this.topologyHistory = new TopologyHistory(rs.fork(), 2, 4); - rf = new SimpleReplicationFactor(2); try { @@ -536,18 +387,7 @@ public State(RandomSource rs, BiFunction schemaSpecGen .withTokenSupplier(topologyHistory) .withConfig(c -> { c.with(Feature.values()) - .set("write_request_timeout", "10s") - .set("read_request_timeout", "10s") - .set("range_request_timeout", "20s") - .set("request_timeout", "20s") - .set("native_transport_timeout", "30s") - // bound startup to some value larger than the task timeout, this is to allow the - // tests to stop blocking when a startup issue is detected. The main reason for - // this is that startup blocks forever, waiting for accord and streaming to - // complete... but if there are bugs at these layers then the startup will never - // exit, blocking the JVM from giving the needed information (logs/seed) to debug. - .set(Constants.KEY_DTEST_STARTUP_TIMEOUT, "4m") - .set(Constants.KEY_DTEST_API_STARTUP_FAILURE_AS_SHUTDOWN, false); + .set("write_request_timeout", "10s"); //TODO (maintenance): where to put this? Anything touching ConfigGenBuilder with jvm-dtest needs this... ((InstanceConfig) c).remove("commitlog_sync_period_in_ms"); for (Map.Entry e : yamlConfigOverrides.entrySet()) @@ -585,25 +425,6 @@ public String ipAddress(int nodeNum) { throw new UncheckedIOException(e); } - cluster.setUncaughtExceptionsFilter((node, t) -> { - // api is "ignore" so false means include, - var rootCause = Throwables.getRootCause(t); - if (rootCause.getMessage() != null) - { - if (rootCause.getMessage().startsWith("Queried for epoch") && rootCause.getMessage().contains("but could not catch up. Current epoch:")) - return true; - if (rootCause.getMessage().startsWith("Operation timed out")) - { - // is this due to TCM fetching epochs? PaxosBackedProcessor.getLogState is costly and more likely to timeout... so ignore those - Optional match = Stream.of(rootCause.getStackTrace()) - .filter(s -> s.getClassName().equals("org.apache.cassandra.tcm.PaxosBackedProcessor") && s.getMethodName().equals("getLogState")) - .findFirst(); - if (match.isPresent()) - return true; - } - } - return false; - }); fixDistributedSchemas(cluster); init(cluster, TARGET_RF); // fix TCM @@ -612,71 +433,31 @@ public String ipAddress(int nodeNum) result.asserts().success(); logger.info("CMS reconfigure: {}", result.getStdout()); } - commandsTransformers.add(new BiFunction, Gen, Void, ?>>, Gen, Void, ?>>>() { + preActions.add(new Runnable() + { // in order to remove this action, an anonymous class is needed so "this" works, lambda "this" is the parent class @Override - public Gen, Void, ?>> apply(State state, Gen, Void, ?>> commandGen) { - if (topologyHistory.up().length < TARGET_RF) - return commandGen; - SimpleCommand> reconfig = new SimpleCommand<>("nodetool cms reconfigure " + TARGET_RF, ignore -> { + public void run() + { + if (topologyHistory.up().length == TARGET_RF) + { NodeToolResult result = cluster.get(1).nodetoolResult("cms", "reconfigure", Integer.toString(TARGET_RF)); result.asserts().success(); logger.info("CMS reconfigure: {}", result.getStdout()); - }); - SimpleCommand> fixDistributedSchemas = new SimpleCommand<>("Set system distributed keyspaces to RF=" + TARGET_RF, ignore -> - fixDistributedSchemas(cluster)); - SimpleCommand> fixTestKeyspace = new SimpleCommand<>("Set " + KEYSPACE + " keyspace to RF=" + TARGET_RF, s -> { - cluster.schemaChange("ALTER KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': " + TARGET_RF + "}"); - rf = new SimpleReplicationFactor(TARGET_RF); - }); - var self = this; - return rs -> { - Command, Void, ?> next = commandGen.next(rs); - if (next.checkPreconditions(state) == Property.PreCheckResult.Ignore) - return next; - commandsTransformers.remove(self); - int[] up = state.topologyHistory.up(); - List, Void, ?>> commands = new ArrayList<>(); - commands.add(fixDistributedSchemas); - for (String ks : Arrays.asList("system_auth", "system_traces")) - { - int coordinator = rs.pickInt(up); - commands.add(repairCommand(coordinator, ks)); - } - commands.add(fixTestKeyspace); - { - int coordinator = rs.pickInt(up); - commands.add(repairCommand(coordinator, KEYSPACE)); - } - commands.add(reconfig); - commands.add(next); - return multistep(commands); - }; + preActions.remove(this); + } } }); - commandsTransformers.add((state, commandGen) -> rs2 -> { - Command, Void, ?> c = commandGen.next(rs2); - if (!(c instanceof Property.MultistepCommand)) - return new LoggingCommand<>(c); - Property.MultistepCommand, Void> multistep = (Property.MultistepCommand, Void>) c; - List, Void, ?>> subcommands = new ArrayList<>(); - for (var sub : multistep) - subcommands.add(new LoggingCommand<>(sub)); - return multistep(subcommands); - }); preActions.add(() -> { int[] up = topologyHistory.up(); // use the most recent node just in case the cluster isn't in-sync IInvokableInstance node = cluster.get(up[up.length - 1]); cmsGroup = HackSerialization.cmsGroup(node); currentEpoch.set(HackSerialization.tcmEpoch(node)); - - ring.rebuild(cluster.coordinator(up[0]), rf, up); - // ring must know about the up nodes }); preActions.add(() -> cluster.checkAndResetUncaughtExceptions()); - this.schema = schemaSpecGen.apply(rs, cluster); - statementGen = cqlOperationsGen.apply(schema); + this.schemaSpec = schemaSpecGen.apply(rs, cluster); + statementGen = cqlOperationsGen.apply(schemaSpec); removeTypeGen = REMOVE_TYPE_DISTRIBUTION.next(rs); @@ -697,38 +478,7 @@ protected void onConfigure(IInstanceConfig config) protected String commandNamePostfix() { - return "; epoch=" + currentEpoch.get() + ", cms=" + Arrays.toString(cmsGroup) + ", up=" + Arrays.toString(topologyHistory.up()) + ", down=" + Arrays.toString(topologyHistory.down()); - } - - public int[] upAndSafe() - { - IntHashSet up = asSet(topologyHistory.up()); - int quorum = topologyHistory.quorum(); - // find what ranges are able to handle 1 node loss - Set safeRanges = new HashSet<>(); - ring.rangesToReplicas((range, replicas) -> { - IntHashSet alive = new IntHashSet(); - for (int peer : replicas) - { - if (up.contains(peer)) - alive.add(peer); - } - if (quorum < alive.size()) - safeRanges.add(range); - }); - - // filter nodes where 100% of their ranges are "safe" - IntArrayList safeNodes = new IntArrayList(); - for (int id : up) - { - List ranges = ring.ranges(id); - if (ranges.stream().allMatch(safeRanges::contains)) - safeNodes.add(id); - } - - int[] upAndSafe = safeNodes.toIntArray(); - Arrays.sort(upAndSafe); - return upAndSafe; + return "; epoch=" + currentEpoch.get() + ", cms=" + Arrays.toString(cmsGroup); } @Override @@ -736,8 +486,6 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Yaml Config:\n").append(YamlConfigurationLoader.toYaml(this.yamlConfigOverrides)); - String cql = schema.createSchema(); - sb.append("\n-- Setup Schema\n").append(cql); sb.append("\nTopology:\n").append(topologyHistory); sb.append("\nCMS Voting Group: ").append(Arrays.toString(cmsGroup)); if (epochHistory != null) @@ -750,28 +498,14 @@ public String toString() @Override public void close() throws Exception { - var cmsNodesUp = Sets.intersection(asSet(cmsGroup), asSet(topologyHistory.up())); - int cmsNode = Iterables.getFirst(cmsNodesUp, null); - try - { - SimpleQueryResult qr = Retry.retryWithBackoffBlocking(5, () -> cluster.get(cmsNode).executeInternalWithResult("SELECT epoch, kind, transformation FROM system_views.cluster_metadata_log")); - TableBuilder builder = new TableBuilder(" | "); - builder.add(qr.names()); - while (qr.hasNext()) - { - Row next = qr.next(); - builder.add(Stream.of(next.toObjectArray()) - .map(Objects::toString) - .map(s -> s.length() > 100 ? s.substring(0, 100) + "..." : s) - .collect(Collectors.toList())); - } - epochHistory = "Epochs:\n" + builder; - } - catch (Throwable t) - { - logger.warn("Unable to fetch epoch history on node{}", cmsNode, t); - } - logger.info("Shutting down clusters"); + epochHistory = cluster.get(cmsGroup[0]).callOnInstance(() -> { + LogState all = ClusterMetadataService.instance().processor().reconstruct(Epoch.EMPTY, Epoch.create(Long.MAX_VALUE), Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), + TCMMetrics.instance.commitRetries)); + StringBuilder sb = new StringBuilder("Epochs:"); + for (Entry e : all.entries) + sb.append("\n").append(e.epoch.getEpoch()).append(": ").append(e.transform); + return sb.toString(); + }); cluster.close(); } } @@ -834,21 +568,11 @@ public Collection tokens(int i) } public int[] up() - { - return nodes(Node.Status.Up); - } - - public int[] down() - { - return nodes(Node.Status.Down); - } - - private int[] nodes(Node.Status target) { IntArrayList up = new IntArrayList(nodes.size(), -1); for (Map.Entry n : nodes.entrySet()) { - if (n.getValue().status == target) + if (n.getValue().status == Node.Status.Up) up.add(n.getKey()); } int[] ints = up.toIntArray(); @@ -988,63 +712,4 @@ private static int addressToNodeId(InetAddressAndPort addressAndPort) return Integer.parseInt(parts[3]); } } - - private static class RingModel - { - ReplicatedRanges ring = null; - Int2ObjectHashMap idToReplica = null; - - private void rebuild(ICoordinator coordinator, ReplicationFactor rf, int[] up) - { - ring = TokenPlacementModelHelper.getRing(coordinator, rf); - - Int2ObjectHashMap idToReplica = new Int2ObjectHashMap<>(); - for (Map.Entry> e : ring.asMap().entrySet()) - { - for (var replica : e.getValue()) - idToReplica.put(toNodeId(replica), replica); - } - this.idToReplica = idToReplica; - - IntHashSet upSet = asSet(up); - if (!idToReplica.keySet().containsAll(upSet)) - { - int coordinatorNode = coordinator.instance().config().num(); - Sets.SetView diff = Sets.difference(upSet, idToReplica.keySet()); - throw new AssertionError("Unable to find nodes " + diff + " in the ring on node" + coordinatorNode); - } - } - - private static int toNodeId(Replica replica) - { - //TODO (fix test api): NodeId is in the API but is always null. Cheapest way to get the id is to assume the address has it - // same issue with address... - // /127.0.0.2 - String harryId = replica.node().id(); - int index = harryId.lastIndexOf('.'); - int peer = Integer.parseInt(harryId.substring(index + 1)); - return peer; - } - - List ranges(int node) - { - Replica replica = idToReplica.get(node); - if (replica == null) - throw new AssertionError("Unknown node" + node); - List ranges = ring.ranges(replica); - if (ranges == null) - throw new AssertionError("node" + node + " some how does not have ranges..."); - return ranges; - } - - private void rangesToReplicas(BiConsumer fn) - { - for (Map.Entry> e : ring.asMap().entrySet()) - { - int[] replicas = e.getValue().stream().mapToInt(RingModel::toNodeId).toArray(); - Arrays.sort(replicas); - fn.accept(e.getKey(), replicas); - } - } - } } diff --git a/test/harry/main/org/apache/cassandra/harry/SchemaSpec.java b/test/harry/main/org/apache/cassandra/harry/SchemaSpec.java index dbfa6e5f53b6..b103d82e37b4 100644 --- a/test/harry/main/org/apache/cassandra/harry/SchemaSpec.java +++ b/test/harry/main/org/apache/cassandra/harry/SchemaSpec.java @@ -30,6 +30,7 @@ import org.apache.cassandra.harry.gen.Generators; import org.apache.cassandra.harry.gen.ValueGenerators; import org.apache.cassandra.harry.util.IteratorsUtil; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteArrayUtil; import static org.apache.cassandra.harry.gen.InvertibleGenerator.MAX_ENTROPY; @@ -177,6 +178,15 @@ public String compile() shouldAppendAnd = true; } + if (options.transactionalMode() != null) + { + appendWith.run(); + if (shouldAppendAnd) + sb.append(" AND"); + sb.append(" ").append(options.transactionalMode().asCqlParam()); + shouldAppendAnd = true; + } + if (options.disableReadRepair()) { appendWith.run(); @@ -339,6 +349,7 @@ public int hashCode() public interface Options { + TransactionalMode transactionalMode(); boolean addWriteTimestamps(); boolean disableReadRepair(); String compactionStrategy(); @@ -354,6 +365,7 @@ public static OptionsBuilder optionsBuilder() public static class OptionsBuilder implements Options { + private TransactionalMode transactionalMode = null; private boolean addWriteTimestamps = true; private boolean disableReadRepair = false; private String compactionStrategy = null; @@ -365,6 +377,23 @@ private OptionsBuilder() { } + public Options build() + { + return this; + } + + public OptionsBuilder withTransactionalMode(TransactionalMode mode) + { + this.transactionalMode = mode; + return this; + } + + @Override + public TransactionalMode transactionalMode() + { + return transactionalMode; + } + public OptionsBuilder addWriteTimestamps(boolean newValue) { this.addWriteTimestamps = newValue; diff --git a/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java b/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java index df4d44cbc92b..37b40f5acc4c 100644 --- a/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java +++ b/test/harry/main/org/apache/cassandra/harry/execution/DataTracker.java @@ -27,7 +27,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicLong; -import accord.utilsfork.Invariants; +import accord.utils.Invariants; import org.apache.cassandra.harry.op.Visit; import org.apache.cassandra.harry.op.Operations; import org.apache.cassandra.harry.model.Model; diff --git a/test/unit/accord/utilsfork/WrappedRandomSource.java b/test/harry/main/org/apache/cassandra/harry/gen/EntropyRandomSource.java similarity index 60% rename from test/unit/accord/utilsfork/WrappedRandomSource.java rename to test/harry/main/org/apache/cassandra/harry/gen/EntropyRandomSource.java index 39e899cb1df5..63f62ab7e22e 100644 --- a/test/unit/accord/utilsfork/WrappedRandomSource.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/EntropyRandomSource.java @@ -16,82 +16,74 @@ * limitations under the License. */ -package accord.utilsfork; +package org.apache.cassandra.harry.gen; -import java.util.Random; +import accord.utils.RandomSource; -class WrappedRandomSource implements accord.utilsfork.RandomSource +public class EntropyRandomSource implements RandomSource { - private final Random random; + private final EntropySource delegate; - WrappedRandomSource(Random random) + public EntropyRandomSource(EntropySource delegate) { - this.random = random; - } - - @Override - public Random asJdkRandom() - { - return random; + this.delegate = delegate; } @Override public void nextBytes(byte[] bytes) { - random.nextBytes(bytes); + for (int i = 0, len = bytes.length; i < len; ) + for (int rnd = nextInt(), + n = Math.min(len - i, Integer.SIZE/Byte.SIZE); + n-- > 0; rnd >>= Byte.SIZE) + bytes[i++] = (byte)rnd; } @Override public boolean nextBoolean() { - return random.nextBoolean(); + return delegate.nextBoolean(); } @Override public int nextInt() { - return random.nextInt(); - } - - @Override - public int nextInt(int maxExclusive) - { - return random.nextInt(maxExclusive); + return delegate.nextInt(); } @Override public long nextLong() { - return random.nextLong(); + return ((long) nextInt() << 32) + nextInt(); } @Override public float nextFloat() { - return random.nextFloat(); + return delegate.nextFloat(); } @Override public double nextDouble() { - return random.nextDouble(); + throw new UnsupportedOperationException("TODO: Implement"); } @Override public double nextGaussian() { - return random.nextGaussian(); + throw new UnsupportedOperationException("TODO: Implement"); } @Override public void setSeed(long seed) { - random.setSeed(seed); + delegate.seed(seed); } @Override public RandomSource fork() { - return new WrappedRandomSource(new Random(nextLong())); + return new EntropyRandomSource(delegate.derive()); } } diff --git a/test/harry/main/org/apache/cassandra/harry/gen/Generators.java b/test/harry/main/org/apache/cassandra/harry/gen/Generators.java index 1f39b0cd2f25..e3391099b413 100644 --- a/test/harry/main/org/apache/cassandra/harry/gen/Generators.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/Generators.java @@ -31,7 +31,8 @@ import java.util.UUID; import java.util.function.Supplier; -import accord.utilsfork.Invariants; +import accord.utils.Gen; +import accord.utils.Invariants; import org.apache.cassandra.harry.util.BitSet; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.TimeUUID; @@ -460,4 +461,14 @@ public static Generator constant(Supplier constant) { return (random) -> constant.get(); } + + public static Gen toAccord(Generator gen) + { + return rng -> gen.generate(new RandomSourceEntropySource(rng)); + } + + public static Generator toHarry(Gen gen) + { + return rng -> gen.next(new EntropyRandomSource(rng)); + } } diff --git a/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java b/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java index 50a42f67baab..ae8f4a8f716a 100644 --- a/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/InvertibleGenerator.java @@ -27,7 +27,7 @@ import java.util.Map; import java.util.stream.Collectors; -import accord.utilsfork.Invariants; +import accord.utils.Invariants; import org.agrona.collections.IntHashSet; import org.apache.cassandra.harry.ColumnSpec; import org.apache.cassandra.harry.MagicConstants; diff --git a/test/unit/accord/utilsfork/DefaultRandom.java b/test/harry/main/org/apache/cassandra/harry/gen/RandomSourceEntropySource.java similarity index 68% rename from test/unit/accord/utilsfork/DefaultRandom.java rename to test/harry/main/org/apache/cassandra/harry/gen/RandomSourceEntropySource.java index 49f9085569a6..b1aed196db0a 100644 --- a/test/unit/accord/utilsfork/DefaultRandom.java +++ b/test/harry/main/org/apache/cassandra/harry/gen/RandomSourceEntropySource.java @@ -16,33 +16,35 @@ * limitations under the License. */ -package accord.utilsfork; +package org.apache.cassandra.harry.gen; -import java.util.Random; +import accord.utils.RandomSource; -public class DefaultRandom implements RandomSource +public class RandomSourceEntropySource implements EntropySource { - private final Random delegate; - public DefaultRandom() + private final RandomSource delegate; + + public RandomSourceEntropySource(RandomSource delegate) { - this.delegate = new Random(); + this.delegate = delegate; } - public DefaultRandom(long seed) + @Override + public long next() { - this.delegate = new Random(seed); + return delegate.nextLong(); } @Override - public void nextBytes(byte[] bytes) + public void seed(long seed) { - delegate.nextBytes(bytes); + delegate.setSeed(seed); } @Override - public boolean nextBoolean() + public EntropySource derive() { - return delegate.nextBoolean(); + return new RandomSourceEntropySource(delegate.fork()); } @Override @@ -52,43 +54,32 @@ public int nextInt() } @Override - public long nextLong() + public int nextInt(int max) { - return delegate.nextLong(); + return delegate.nextInt(max); } @Override - public float nextFloat() + public int nextInt(int min, int max) { - return delegate.nextFloat(); + return delegate.nextInt(min, max); } @Override - public double nextDouble() - { - return delegate.nextDouble(); - } - - @Override - public double nextGaussian() + public float nextFloat() { - return delegate.nextGaussian(); + return delegate.nextFloat(); } @Override - public void setSeed(long seed) + public double nextDouble() { - delegate.setSeed(seed); - } - - @Override - public DefaultRandom fork() { - return new DefaultRandom(nextLong()); + return delegate.nextDouble(); } @Override - public Random asJdkRandom() + public boolean nextBoolean() { - return delegate; + return delegate.nextBoolean(); } } diff --git a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java index 0382f7987225..e1df90e0c269 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java +++ b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java @@ -45,12 +45,13 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import accord.utilsfork.Invariants; + +import accord.utils.Invariants; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.AssignmentOperator; import org.apache.cassandra.cql3.ast.CasCondition; -import org.apache.cassandra.cql3.ast.Conditional; import org.apache.cassandra.cql3.ast.Conditional.Where.Inequality; +import org.apache.cassandra.cql3.ast.Conditional; import org.apache.cassandra.cql3.ast.Element; import org.apache.cassandra.cql3.ast.Expression; import org.apache.cassandra.cql3.ast.ExpressionEvaluator; diff --git a/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java b/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java index cc6531e132e6..fde20aa363f3 100644 --- a/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java +++ b/test/harry/main/org/apache/cassandra/harry/test/SimpleBijectionTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.harry.test; -import accord.utilsfork.Invariants; +import accord.utils.Invariants; import org.apache.cassandra.harry.ColumnSpec; import org.apache.cassandra.harry.SchemaSpec; import org.apache.cassandra.harry.dsl.HistoryBuilder; diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java index 872f6611df1a..b870c214bb7f 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java @@ -41,7 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.Invariants; +import accord.utils.Invariants; import io.airlift.airline.Command; import io.airlift.airline.HelpOption; import io.airlift.airline.Option; diff --git a/test/unit/accord/utilsfork/Gen.java b/test/unit/accord/utilsfork/Gen.java deleted file mode 100644 index e9468cb24e89..000000000000 --- a/test/unit/accord/utilsfork/Gen.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.function.IntPredicate; -import java.util.function.IntSupplier; -import java.util.function.IntUnaryOperator; -import java.util.function.LongPredicate; -import java.util.function.LongSupplier; -import java.util.function.LongUnaryOperator; -import java.util.function.Predicate; -import java.util.function.Supplier; -import java.util.function.ToIntFunction; -import java.util.function.ToLongFunction; -import java.util.stream.IntStream; -import java.util.stream.LongStream; -import java.util.stream.Stream; - -public interface Gen { - /** - * For cases where method handles isn't able to detect the proper type, this method acts as a cast - * to inform the compiler of the desired type. - */ - static Gen of(Gen fn) - { - return fn; - } - - A next(accord.utilsfork.RandomSource random); - - default Gen map(Function fn) - { - return r -> fn.apply(this.next(r)); - } - - default Gen map(BiFunction fn) - { - return r -> fn.apply(r, this.next(r)); - } - - default IntGen mapToInt(ToIntFunction fn) - { - return r -> fn.applyAsInt(next(r)); - } - - default LongGen mapToLong(ToLongFunction fn) - { - return r -> fn.applyAsLong(next(r)); - } - - default Gen flatMap(Function> mapper) - { - return rs -> mapper.apply(this.next(rs)).next(rs); - } - - default Gen flatMap(BiFunction> mapper) - { - return rs -> mapper.apply(rs, this.next(rs)).next(rs); - } - - default Gen filter(Predicate fn) - { - Gen self = this; - return r -> { - A value; - do { - value = self.next(r); - } - while (!fn.test(value)); - return value; - }; - } - - default Gen filter(int maxAttempts, A defaultValue, Predicate fn) - { - Invariants.checkArgument(maxAttempts > 0, "Max attempts must be positive; given %d", maxAttempts); - Gen self = this; - return r -> { - for (int i = 0; i < maxAttempts; i++) - { - A v = self.next(r); - if (fn.test(v)) - return v; - - } - return defaultValue; - }; - } - - default Supplier asSupplier(accord.utilsfork.RandomSource rs) - { - return () -> next(rs); - } - - default Stream asStream(accord.utilsfork.RandomSource rs) - { - return Stream.generate(() -> next(rs)); - } - - interface Int2IntMapFunction - { - int applyAsInt(accord.utilsfork.RandomSource rs, int value); - } - - interface Int2LongMapFunction - { - long applyAsLong(accord.utilsfork.RandomSource rs, int value); - } - - interface Long2LongMapFunction - { - long applyAsLong(accord.utilsfork.RandomSource rs, long value); - } - - interface IntGen extends Gen - { - int nextInt(accord.utilsfork.RandomSource random); - - @Override - default Integer next(accord.utilsfork.RandomSource random) - { - return nextInt(random); - } - - default IntGen mapAsInt(IntUnaryOperator fn) - { - return r -> fn.applyAsInt(nextInt(r)); - } - - default IntGen mapAsInt(Int2IntMapFunction fn) - { - return r -> fn.applyAsInt(r, nextInt(r)); - } - - default LongGen mapAsLong(Int2LongMapFunction fn) - { - return r -> fn.applyAsLong(r, nextInt(r)); - } - - default Gen.IntGen filterAsInt(IntPredicate fn) - { - return rs -> { - int value; - do - { - value = nextInt(rs); - } - while (!fn.test(value)); - return value; - }; - } - - @Override - default Gen.IntGen filter(Predicate fn) - { - return filterAsInt(i -> fn.test(i)); - } - - default IntSupplier asIntSupplier(accord.utilsfork.RandomSource rs) - { - return () -> nextInt(rs); - } - - default IntStream asIntStream(accord.utilsfork.RandomSource rs) - { - return IntStream.generate(() -> nextInt(rs)); - } - } - - interface LongGen extends Gen - { - long nextLong(accord.utilsfork.RandomSource random); - - @Override - default Long next(accord.utilsfork.RandomSource random) - { - return nextLong(random); - } - - default LongGen mapAsLong(LongUnaryOperator fn) - { - return r -> fn.applyAsLong(nextLong(r)); - } - - default LongGen mapAsLong(Long2LongMapFunction fn) - { - return r -> fn.applyAsLong(r, nextLong(r)); - } - - default Gen.LongGen filterAsLong(LongPredicate fn) - { - return rs -> { - long value; - do - { - value = nextLong(rs); - } - while (!fn.test(value)); - return value; - }; - } - - @Override - default Gen.LongGen filter(Predicate fn) - { - return filterAsLong(i -> fn.test(i)); - } - - default LongSupplier asLongSupplier(accord.utilsfork.RandomSource rs) - { - return () -> nextLong(rs); - } - - default LongStream asLongStream(RandomSource rs) - { - return LongStream.generate(() -> nextLong(rs)); - } - } -} diff --git a/test/unit/accord/utilsfork/Gens.java b/test/unit/accord/utilsfork/Gens.java deleted file mode 100644 index 72eccbf232a1..000000000000 --- a/test/unit/accord/utilsfork/Gens.java +++ /dev/null @@ -1,1152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import java.lang.reflect.Array; -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.EnumMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.NavigableSet; -import java.util.Objects; -import java.util.Set; -import java.util.function.BooleanSupplier; -import java.util.function.Function; -import java.util.function.Supplier; -import java.util.stream.IntStream; -import java.util.stream.LongStream; -import java.util.stream.Stream; - -import com.google.common.collect.Iterables; - -import accord.utilsfork.random.Picker; - -public class Gens { - private Gens() { - } - - public static Gen flatten(Gen> gen) - { - return rs -> gen.next(rs).next(rs); - } - - public static Gen constant(T constant) - { - return ignore -> constant; - } - - public static Gen constant(Supplier constant) - { - return ignore -> constant.get(); - } - - public static Gen oneOf(Gen... gens) - { - switch (gens.length) - { - case 0: throw new IllegalArgumentException("Unable to select oneOf an empty list"); - case 1: return (Gen) gens[0]; - } - return oneOf(Arrays.asList(gens)); - } - - public static Gen oneOf(List> gens) - { - switch (gens.size()) - { - case 0: throw new IllegalArgumentException("Unable to select oneOf an empty list"); - case 1: return (Gen) gens.get(0); - } - return rs -> rs.pick(gens).next(rs); - } - - public static Gen oneOf(Map, Integer> values) - { - Gen> gen = pick(values); - return rs -> gen.next(rs).next(rs); - } - - public static OneOfBuilder oneOf() - { - return new OneOfBuilder<>(); - } - - public static class OneOfBuilder - { - private final Map, Integer> weighted = new LinkedHashMap<>(); - private final Set> unweighted = new LinkedHashSet<>(); - private Gen.IntGen unknownWeightGen = Gens.ints().between(1, 10); - - public OneOfBuilder add(Gen gen) - { - unweighted.add(gen); - return this; - } - - public OneOfBuilder add(int weight, Gen gen) - { - weighted.put(gen, weight); - return this; - } - - public OneOfBuilder unknownWeights(Gen.IntGen gen) - { - this.unknownWeightGen = gen; - return this; - } - - public Gen> buildWithDynamicWeights() - { - if (unweighted.isEmpty()) - { - Gen gen = build(); - return i -> gen; - } - return rs -> { - Map, Integer> commands = new LinkedHashMap<>(); - commands.putAll(weighted); - for (var gen : unweighted) - commands.put(gen, unknownWeightGen.nextInt(rs)); - var top = pick(commands); - return rs2 -> top.next(rs2).next(rs2); - }; - } - - public Gen build() - { - Map, Integer> commands = new LinkedHashMap<>(); - commands.putAll(weighted); - for (var gen : unweighted) - commands.put(gen, 1); - var top = pick(commands); - return rs -> top.next(rs).next(rs); - } - } - - public static Gen.IntGen pickInt(int... ts) - { - return rs -> ts[rs.nextInt(0, ts.length)]; - } - - public static Gen pick(T... ts) - { - return pick(Arrays.asList(ts)); - } - - public static Gen pick(List ts) - { - Gen.IntGen offset = ints().between(0, ts.size() - 1); - return rs -> ts.get(offset.nextInt(rs)); - } - - public static > Gen pick(Set set) - { - List list = new ArrayList<>(set); - // Non-ordered sets may have different iteration order on different environments, which would make a seed produce different histories! - // To avoid such a problem, make sure to apply a deterministic function (sort). - if (!(set instanceof NavigableSet)) - list.sort(Comparator.naturalOrder()); - return pick(list); - } - - public static Gen pick(Map values) - { - if (values == null || values.isEmpty()) - throw new IllegalArgumentException("values is empty"); - // if 2 values have the same weight we need some way to tie-break, but that isn't always possible... - // this method relies on the map having some order and will reject any map that doesn't define a deterministic order - if (!(values instanceof EnumMap || values instanceof LinkedHashMap)) - throw new IllegalArgumentException("pick(Map) requires a map with deterministic iteration; given " + values.getClass()); - if (values.size() == 1) - return constant(Objects.requireNonNull(Iterables.getFirst(values.keySet(), null))); - double totalWeight = values.values().stream().mapToDouble(Integer::intValue).sum(); - List> list = new ArrayList<>(values.size()); - Iterator> it = values.entrySet().iterator(); - for (int i = 0; it.hasNext(); i++) - { - Map.Entry e = it.next(); - list.add(new Weight<>(e.getKey(), e.getValue(), i)); - } - Collections.sort(list); - return rs -> { - double value = rs.nextDouble() * totalWeight; - for (Weight w : list) - { - value -= w.weight; - if (value <= 0) - return w.value; - } - return list.get(list.size() - 1).value; - }; - } - - public static Gen.IntGen pickZipf(int[] array) - { - if (array == null || array.length == 0) - throw new IllegalArgumentException("Empty array given"); - if (array.length == 1) - return ignore -> array[0]; - BigDecimal[] weights = new BigDecimal[array.length]; - BigDecimal base = BigDecimal.valueOf(Math.pow(2, array.length)); - weights[0] = base; - for (int i = 1; i < array.length; i++) - weights[i] = base.divide(BigDecimal.valueOf(i + 1), RoundingMode.UP); - BigDecimal totalWeights = Stream.of(weights).reduce(BigDecimal.ZERO, BigDecimal::add); - - return rs -> { - BigDecimal value = BigDecimal.valueOf(rs.nextDouble()).multiply(totalWeights); - for (int i = 0; i < weights.length; i++) - { - value = value.subtract(weights[i]); - if (value.compareTo(BigDecimal.ZERO) <= 0) - return array[i]; - } - return array[array.length - 1]; - }; - } - - public static Gen.LongGen pickZipf(long[] array) - { - if (array == null || array.length == 0) - throw new IllegalArgumentException("Empty array given"); - if (array.length == 1) - return ignore -> array[0]; - BigDecimal[] weights = new BigDecimal[array.length]; - BigDecimal base = BigDecimal.valueOf(Math.pow(2, array.length)); - weights[0] = base; - for (int i = 1; i < array.length; i++) - weights[i] = base.divide(BigDecimal.valueOf(i + 1), RoundingMode.UP); - BigDecimal totalWeights = Stream.of(weights).reduce(BigDecimal.ZERO, BigDecimal::add); - - return rs -> { - BigDecimal value = BigDecimal.valueOf(rs.nextDouble()).multiply(totalWeights); - for (int i = 0; i < weights.length; i++) - { - value = value.subtract(weights[i]); - if (value.compareTo(BigDecimal.ZERO) <= 0) - return array[i]; - } - return array[array.length - 1]; - }; - } - - public static Gen pickZipf(T... array) - { - return pickZipf(Arrays.asList(array)); - } - - public static Gen pickZipf(List array) - { - if (array == null || array.isEmpty()) - throw new IllegalArgumentException("Empty array given"); - if (array.size() == 1) - return ignore -> array.get(0); - BigDecimal[] weights = new BigDecimal[array.size()]; - BigDecimal base = BigDecimal.valueOf(Math.pow(2, array.size())); - weights[0] = base; - for (int i = 1; i < array.size(); i++) - weights[i] = base.divide(BigDecimal.valueOf(i + 1), RoundingMode.UP); - BigDecimal totalWeights = Stream.of(weights).reduce(BigDecimal.ZERO, BigDecimal::add); - - return rs -> { - BigDecimal value = BigDecimal.valueOf(rs.nextDouble()).multiply(totalWeights); - for (int i = 0; i < weights.length; i++) - { - value = value.subtract(weights[i]); - if (value.compareTo(BigDecimal.ZERO) <= 0) - return array.get(i); - } - return array.get(array.size() - 1); - }; - } - - public static Gen randomWeights(int[] array) - { - return rs -> { - float[] weights = Picker.randomWeights(rs, array.length); - return r -> array[index(r, weights)]; - }; - } - - public static Gen randomWeights(long[] array) - { - return rs -> { - float[] weights = Picker.randomWeights(rs, array.length); - return r -> array[index(r, weights)]; - }; - } - - public static Gen> randomWeights(T[] array) - { - return rs -> { - float[] weights = Picker.randomWeights(rs, array.length); - return r -> array[index(r, weights)]; - }; - } - - public static Gen> randomWeights(List array) - { - return rs -> { - float[] weights = Picker.randomWeights(rs, array.size()); - return r -> array.get(index(r, weights)); - }; - } - - private static int index(RandomSource rs, float[] weights) - { - int i = Arrays.binarySearch(weights, rs.nextFloat()); - if (i < 0) i = -1 - i; - return i; - } - - public static Gen mixedDistribution(int minInclusive, int maxExclusive, int numBuckets) - { - int domainSize = (maxExclusive - minInclusive); - if (domainSize < 0) - throw new IllegalArgumentException("Range is too large; min=" + minInclusive + ", max=" + maxExclusive); - if (numBuckets <= 0 || numBuckets > domainSize) - throw new IllegalArgumentException("Num buckets must be between 1 and " + domainSize + "; given " + numBuckets); - int[] bucket, indexes; - bucket = new int[numBuckets]; - int delta = domainSize / numBuckets; - for (int i = 0; i < numBuckets; i++) - bucket[i] = minInclusive + i * delta; - indexes = IntStream.range(0, bucket.length).toArray(); - Gen indexDistro = mixedDistribution(indexes); - return rs -> { - Gen.IntGen indexGen = indexDistro.next(rs); - switch (rs.nextInt(0, 2)) - { - case 0: // uniform - { - return r -> { - int idx = indexGen.next(rs); - int start = bucket[idx]; - int end = idx == bucket.length - 1 ? maxExclusive : bucket[idx + 1]; - return r.nextInt(start, end); - }; - } - case 1: // median biased - { - int medians[] = new int[bucket.length]; - for (int i = 0; i < medians.length; i++) - { - int start = bucket[i]; - int end = i == bucket.length - 1 ? maxExclusive : bucket[i + 1]; - medians[i] = rs.nextInt(start, end); - } - return r -> { - int idx = indexGen.next(rs); - int start = bucket[idx]; - int end = idx == bucket.length - 1 ? maxExclusive : bucket[idx + 1]; - int median = medians[idx]; - return r.nextBiasedInt(start, median, end); - }; - } - default: - throw new AssertionError(); - } - }; - } - - public static Gen mixedDistribution(int minInclusive, int maxExclusive) - { - int domainSize = (maxExclusive - minInclusive + 1); - if (domainSize < 0) - throw new IllegalArgumentException("Range is too large; min=" + minInclusive + ", max=" + maxExclusive); - int[] array, indexes; - if (domainSize > 200) // randomly selected - { - int numBuckets = 10; - int delta = domainSize / numBuckets; - array = new int[numBuckets]; - for (int i = 0; i < numBuckets; i++) - array[i] = minInclusive + i * delta; - indexes = IntStream.range(0, array.length).toArray(); - } - else - { - array = IntStream.range(minInclusive, maxExclusive).toArray(); - indexes = null; - } - return rs -> { - switch (rs.nextInt(0, 4)) - { - case 0: // uniform - return r -> r.nextInt(minInclusive, maxExclusive); - case 1: // median biased - int median = rs.nextInt(minInclusive, maxExclusive); - return r -> r.nextBiasedInt(minInclusive, median, maxExclusive); - case 2: // zipf - if (indexes == null) - return Gens.pickZipf(rs.nextBoolean() ? reverseAndCopy(array) : array); - return Gens.pickZipf(rs.nextBoolean() ? reverseAndCopy(indexes) : indexes).mapAsInt((r, index) -> { - int start = array[index]; - int end = index == array.length - 1 ? maxExclusive : array[index + 1]; - return r.nextInt(start, end); - }); - case 3: // random weight - if (indexes == null) - return randomWeights(array).next(rs); - return randomWeights(indexes).next(rs).mapAsInt((r, index) -> { - int start = array[index]; - int end = index == array.length - 1 ? maxExclusive : array[index + 1]; - return r.nextInt(start, end); - }); - default: - throw new AssertionError(); - } - }; - } - - private static int[] reverseAndCopy(int[] array) - { - array = Arrays.copyOf(array, array.length); - for (int i = 0, mid = array.length / 2, j = array.length - 1; i < mid; i++, j--) - { - int tmp = array[i]; - array[i] = array[j]; - array[j] = tmp; - } - return array; - } - - public static Gen mixedDistribution(long minInclusive, long maxExclusive) - { - long domainSize = (maxExclusive - minInclusive + 1); - if (domainSize < 0) - throw new IllegalArgumentException("Range is too large; min=" + minInclusive + ", max=" + maxExclusive); - long[] array; - int[] indexes; - if (domainSize > 200) // randomly selected - { - int numBuckets = 10; - long delta = domainSize / numBuckets; - array = new long[numBuckets]; - for (int i = 0; i < numBuckets; i++) - array[i] = minInclusive + i * delta; - indexes = IntStream.range(0, array.length).toArray(); - } - else - { - array = LongStream.range(minInclusive, maxExclusive).toArray(); - indexes = null; - } - return rs -> { - switch (rs.nextInt(0, 4)) - { - case 0: // uniform - return r -> r.nextLong(minInclusive, maxExclusive); - case 1: // median biased - long median = rs.nextLong(minInclusive, maxExclusive); - return r -> r.nextBiasedLong(minInclusive, median, maxExclusive); - case 2: // zipf - if (indexes == null) - return Gens.pickZipf(rs.nextBoolean() ? reverseAndCopy(array) : array); - return Gens.pickZipf(rs.nextBoolean() ? reverseAndCopy(indexes) : indexes).mapAsLong((r, index) -> { - long start = array[index]; - long end = index == array.length - 1 ? maxExclusive : array[index + 1]; - return r.nextLong(start, end); - }); - case 3: // random weight - if (indexes == null) - return randomWeights(array).next(rs); - return randomWeights(indexes).next(rs).mapAsLong((r, index) -> { - long start = array[index]; - long end = index == array.length - 1 ? maxExclusive : array[index + 1]; - return r.nextLong(start, end); - }); - default: - throw new AssertionError(); - } - }; - } - - private static long[] reverseAndCopy(long[] array) - { - array = Arrays.copyOf(array, array.length); - for (int i = 0, mid = array.length / 2, j = array.length - 1; i < mid; i++, j--) - { - long tmp = array[i]; - array[i] = array[j]; - array[j] = tmp; - } - return array; - } - - public static Gen> mixedDistribution(T... list) - { - return mixedDistribution(Arrays.asList(list)); - } - - public static Gen> mixedDistribution(List list) - { - return rs -> { - switch (rs.nextInt(0, 4)) - { - case 0: // uniform - return r -> list.get(rs.nextInt(0, list.size())); - case 1: // median biased - int median = rs.nextInt(0, list.size()); - return r -> list.get(r.nextBiasedInt(0, median, list.size())); - case 2: // zipf - List array = list; - if (rs.nextBoolean()) - { - array = new ArrayList<>(list); - Collections.reverse(array); - } - return pickZipf(array); - case 3: // random weight - return randomWeights(list).next(rs); - default: - throw new AssertionError(); - } - }; - } - - public static Gen mixedDistribution(int[] list) - { - return rs -> { - switch (rs.nextInt(0, 4)) - { - case 0: // uniform - return r -> list[rs.nextInt(0, list.length)]; - case 1: // median biased - int median = rs.nextInt(0, list.length); - return r -> list[r.nextBiasedInt(0, median, list.length)]; - case 2: // zipf - int[] array = list; - if (rs.nextBoolean()) - { - array = Arrays.copyOf(array, array.length); - reverse(array); - } - return pickZipf(array); - case 3: // random weight - return randomWeights(list).next(rs); - default: - throw new AssertionError(); - } - }; - } - - /** - * This is a change from accord as that uses {@link accord.utils.Utils#reverse}, which doesn't exist in this forward port. - * - * To avoid adding another class and merge conflicts to cep-15-accord, this method was inlined - */ - private static void reverse(int[] array) - { - for (int i = 0; i < array.length / 2; i++) - { - int tmp = array[i]; - array[i] = array[array.length- 1 - i]; - array[array.length - 1 - i] = tmp; - } - } - - public static Gen charArray(Gen.IntGen sizes, char[] domain) - { - return charArray(sizes, domain, (a, b) -> true); - } - - public interface IntCharBiPredicate - { - boolean test(int a, char b); - } - - public static Gen charArray(Gen.IntGen sizes, char[] domain, IntCharBiPredicate fn) - { - Gen.IntGen indexGen = ints().between(0, domain.length - 1); - return rs -> { - int size = sizes.nextInt(rs); - char[] is = new char[size]; - for (int i = 0; i != size; i++) - { - char c; - do - { - c = domain[indexGen.nextInt(rs)]; - } - while (!fn.test(i, c)); - is[i] = c; - } - return is; - }; - } - - public static Gen random() { - return r -> r; - } - - public static BooleanDSL bools() - { - return new BooleanDSL(); - } - - public static IntDSL ints() - { - return new IntDSL(); - } - - public static LongDSL longs() { - return new LongDSL(); - } - - public static ListDSL lists(Gen fn) { - return new ListDSL<>(fn); - } - - public static ArrayDSL arrays(Class type, Gen fn) { - return new ArrayDSL<>(type, fn); - } - - public static IntArrayDSL arrays(Gen.IntGen fn) { - return new IntArrayDSL(fn); - } - - public static LongArrayDSL arrays(Gen.LongGen fn) { - return new LongArrayDSL(fn); - } - - public static EnumDSL enums() - { - return new EnumDSL(); - } - - public static StringDSL strings() - { - return new StringDSL(); - } - - public static BooleanSupplier supplier(Gen gen, accord.utilsfork.RandomSource rs) - { - return () -> gen.next(rs); - } - - public static class BooleanDSL - { - public Gen all() - { - return accord.utilsfork.RandomSource::nextBoolean; - } - - public Gen biasedRepeatingRuns(double ratio, int maxRuns) - { - accord.utilsfork.Invariants.checkArgument(ratio > 0 && ratio <= 1, "Expected %d to be larger than 0 and <= 1", ratio); - double lower = ratio * .8; - double upper = ratio * 1.2; - return new Gen() { - // run represents how many consecutaive true values should be returned; -1 implies no active "run" exists - private int run = -1; - private long falseCount = 0, trueCount = 0; - @Override - public Boolean next(accord.utilsfork.RandomSource rs) - { - if (run != -1) - { - run--; - trueCount++; - return true; - } - double currentRatio = trueCount / (double) (falseCount + trueCount); - if (currentRatio < lower) - { - // not enough true - trueCount++; - return true; - } - if (currentRatio > upper) - { - // not enough false - falseCount++; - return false; - } - if (rs.decide(ratio)) - { - run = rs.nextInt(maxRuns); - run--; - trueCount++; - return true; - } - falseCount++; - return false; - } - }; - } - - public Gen> mixedDistribution() - { - return rs -> { - int selection = rs.nextInt(0, 4); - switch (selection) - { - case 0: // uniform 50/50 - return r -> r.nextBoolean(); - case 1: // variable frequency - var freq = rs.nextFloat(); - return r -> r.decide(freq); - case 2: // fixed result - boolean result = rs.nextBoolean(); - return ignore -> result; - case 3: // biased repeating runs - return biasedRepeatingRuns(rs.nextDouble(), rs.nextInt(1, 100)); - default: - throw new IllegalStateException("Unexpected int for bool selection: " + selection); - } - }; - } - } - - public static class IntDSL - { - public Gen.IntGen of(int value) - { - return r -> value; - } - - public Gen.IntGen all() - { - return accord.utilsfork.RandomSource::nextInt; - } - - public Gen.IntGen between(int min, int max) - { - accord.utilsfork.Invariants.checkArgument(max >= min, "max (%d) < min (%d)", max, min); - if (min == max) - return of(min); - // since bounds is exclusive, if max == max_value unable to do +1 to include... so will return a gen - // that does not include - if (max == Integer.MAX_VALUE) - return r -> r.nextInt(min, max); - return r -> r.nextInt(min, max + 1); - } - - public Gen mixedDistribution(int minInclusive, int maxExclusive) - { - return Gens.mixedDistribution(minInclusive, maxExclusive); - } - - public Gen mixedDistribution(int minInclusive, int maxExclusive, int numBuckets) - { - return Gens.mixedDistribution(minInclusive, maxExclusive, numBuckets); - } - } - - public static class LongDSL { - public Gen.LongGen of(long value) - { - return r -> value; - } - - public Gen.LongGen all() { - return accord.utilsfork.RandomSource::nextLong; - } - - public Gen.LongGen between(long min, long max) { - Invariants.checkArgument(max >= min); - if (min == max) - return of(min); - // since bounds is exclusive, if max == max_value unable to do +1 to include... so will return a gen - // that does not include - if (max == Long.MAX_VALUE) - return r -> r.nextLong(min, max); - return r -> r.nextLong(min, max + 1); - } - } - - public static class EnumDSL - { - public > Gen all(Class klass) - { - return pick(klass.getEnumConstants()); - } - - public > Gen> allMixedDistribution(Class klass) - { - return mixedDistribution(klass.getEnumConstants()); - } - - public > Gen allWithWeights(Class klass, int... weights) - { - T[] constants = klass.getEnumConstants(); - if (constants.length != weights.length) - throw new IllegalArgumentException(String.format("Total number of weights (%s) does not match the enum (%s)", Arrays.toString(weights), Arrays.toString(constants))); - Map values = new EnumMap<>(klass); - for (int i = 0; i < constants.length; i++) - values.put(constants[i], weights[i]); - return pick(values); - } - } - - public static class StringDSL - { - public Gen of(Gen.IntGen sizes, char[] domain) - { - // note, map is overloaded so String::new is ambugious to javac, so need a lambda here - return charArray(sizes, domain).map(c -> new String(c)); - } - - public SizeBuilder of(char[] domain) - { - return new SizeBuilder<>(sizes -> of(sizes, domain)); - } - - public Gen of(Gen.IntGen sizes, char[] domain, IntCharBiPredicate fn) - { - // note, map is overloaded so String::new is ambugious to javac, so need a lambda here - return charArray(sizes, domain, fn).map(c -> new String(c)); - } - - public SizeBuilder of(char[] domain, IntCharBiPredicate fn) - { - return new SizeBuilder<>(sizes -> of(sizes, domain, fn)); - } - - public Gen all(Gen.IntGen sizes) - { - return betweenCodePoints(sizes, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); - } - - public SizeBuilder all() - { - return new SizeBuilder<>(this::all); - } - - public Gen ascii(Gen.IntGen sizes) - { - return betweenCodePoints(sizes, 0, 127); - } - - public SizeBuilder ascii() - { - return new SizeBuilder<>(this::ascii); - } - - public Gen betweenCodePoints(Gen.IntGen sizes, int min, int max) - { - Gen.IntGen codePointGen = ints().between(min, max).filter(Character::isDefined); - return rs -> { - int[] array = new int[sizes.nextInt(rs)]; - for (int i = 0; i < array.length; i++) - array[i] = codePointGen.nextInt(rs); - return new String(array, 0, array.length); - }; - } - - public SizeBuilder betweenCodePoints(int min, int max) - { - return new SizeBuilder<>(sizes -> betweenCodePoints(sizes, min, max)); - } - } - - public static class SizeBuilder - { - private final Function> fn; - - public SizeBuilder(Function> fn) - { - this.fn = fn; - } - - public Gen ofLength(int fixed) - { - return ofLengthBetween(fixed, fixed); - } - - public Gen ofLengthBetween(int min, int max) - { - return fn.apply(ints().between(min, max)); - } - } - - public static class ListDSL implements BaseSequenceDSL, List> { - private final Gen fn; - - public ListDSL(Gen fn) { - this.fn = Objects.requireNonNull(fn); - } - - @Override - public ListDSL unique() - { - return new ListDSL<>(new GenReset<>(fn, false)); - } - - public ListDSL uniqueBestEffort() - { - return new ListDSL<>(new GenReset<>(fn, true)); - } - - @Override - public Gen> ofSizeBetween(int minSize, int maxSize) { - Gen.IntGen sizeGen = ints().between(minSize, maxSize); - return r -> - { - Reset.tryReset(fn); - int size = sizeGen.nextInt(r); - List list = new ArrayList<>(size); - for (int i = 0; i < size; i++) - { - try - { - list.add(fn.next(r)); - } - catch (IgnoreGenResult e) - { - // ignore - } - } - return list; - }; - } - } - - public static class ArrayDSL implements BaseSequenceDSL, T[]> { - private final Class type; - private final Gen fn; - - public ArrayDSL(Class type, Gen fn) { - this.type = Objects.requireNonNull(type); - this.fn = Objects.requireNonNull(fn); - } - - @Override - public ArrayDSL unique() - { - return new ArrayDSL<>(type, new GenReset<>(fn, false)); - } - - public ArrayDSL uniqueBestEffort() - { - return new ArrayDSL<>(type, new GenReset<>(fn, true)); - } - - @Override - public Gen ofSizeBetween(int minSize, int maxSize) { - Gen.IntGen sizeGen = ints().between(minSize, maxSize); - return r -> - { - Reset.tryReset(fn); - int size = sizeGen.nextInt(r); - T[] list = (T[]) Array.newInstance(type, size); - for (int i = 0; i < size; i++) - list[i] = fn.next(r); - return list; - }; - } - } - - public static class IntArrayDSL implements BaseSequenceDSL { - private final Gen.IntGen fn; - - public IntArrayDSL(Gen.IntGen fn) { - this.fn = Objects.requireNonNull(fn); - } - - @Override - public IntArrayDSL unique() - { - return new IntArrayDSL(new IntGenReset(fn)); - } - - @Override - public Gen ofSizeBetween(int minSize, int maxSize) { - Gen.IntGen sizeGen = ints().between(minSize, maxSize); - return r -> - { - int size = sizeGen.nextInt(r); - int[] list = new int[size]; - for (int i = 0; i < size; i++) - list[i] = fn.nextInt(r); - return list; - }; - } - } - - public static class LongArrayDSL implements BaseSequenceDSL { - private final Gen.LongGen fn; - - public LongArrayDSL(Gen.LongGen fn) { - this.fn = Objects.requireNonNull(fn); - } - - @Override - public LongArrayDSL unique() - { - return new LongArrayDSL(new LongGenReset(fn)); - } - - @Override - public Gen ofSizeBetween(int minSize, int maxSize) { - Gen.IntGen sizeGen = ints().between(minSize, maxSize); - return r -> - { - int size = sizeGen.nextInt(r); - long[] list = new long[size]; - for (int i = 0; i < size; i++) - list[i] = fn.nextLong(r); - return list; - }; - } - } - - public interface BaseSequenceDSL, B> - { - A unique(); - - Gen ofSizeBetween(int min, int max); - - default Gen ofSize(int size) { - return ofSizeBetween(size, size); - } - } - - protected interface Reset { - static void tryReset(Object o) - { - if (o instanceof Reset) - ((Reset) o).reset(); - } - - void reset(); - } - - private static final class IgnoreGenResult extends RuntimeException - { - private static final IgnoreGenResult INSTANCE = new IgnoreGenResult(); - private IgnoreGenResult() - { - super(null, null, false, false); - } - } - - private static class GenReset implements Gen, Reset - { - private final Set seen = new HashSet<>(); - private final Gen fn; - private final boolean bestEffort; - - private GenReset(Gen fn, boolean bestEffort) - { - this.fn = fn; - this.bestEffort = bestEffort; - } - - @Override - public T next(accord.utilsfork.RandomSource random) - { - if (!bestEffort) - { - T value; - // 10k attempts - for (int i = 0; i < 10_000; i++) - { - if (seen.add((value = fn.next(random)))) - return value; - } - - throw new IllegalArgumentException("Could not generate a unique value after 10k attempts"); - } - else - { - T value = null; - int i; - for (i = 0; i < 42 && !seen.add((value = fn.next(random))); i++) {} - if (i == 42) throw IgnoreGenResult.INSTANCE; - return value; - } - } - - @Override - public void reset() - { - seen.clear(); - } - } - - private static class IntGenReset implements Gen.IntGen, Reset - { - private final GenReset base; - - private IntGenReset(IntGen fn) - { - this.base = new GenReset<>(fn, false); - } - @Override - public int nextInt(accord.utilsfork.RandomSource random) { - return base.next(random); - } - - @Override - public void reset() { - base.reset(); - } - } - - private static class LongGenReset implements Gen.LongGen, Reset - { - private final GenReset base; - - private LongGenReset(LongGen fn) - { - this.base = new GenReset<>(fn, false); - } - @Override - public long nextLong(RandomSource random) { - return base.next(random); - } - - @Override - public void reset() { - base.reset(); - } - } - - private static class Weight implements Comparable> - { - private final T value; - private final double weight; - private final int index; - - private Weight(T value, double weight, int index) { - this.value = value; - this.weight = weight; - this.index = index; - } - - @Override - public int compareTo(Weight o) { - int rc = Double.compare(weight, o.weight); - if (rc == 0) - rc = Integer.compare(index, o.index); - return rc; - } - } -} diff --git a/test/unit/accord/utilsfork/Invariants.java b/test/unit/accord/utilsfork/Invariants.java deleted file mode 100644 index 6028b69078ac..000000000000 --- a/test/unit/accord/utilsfork/Invariants.java +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import net.nicoulaj.compilecommand.annotations.Inline; - -import javax.annotation.Nullable; -import java.util.function.Predicate; -import java.util.function.Supplier; - -import static java.lang.String.format; - -public class Invariants -{ - private static final boolean PARANOID = true; - private static final boolean DEBUG = true; - - public static boolean isParanoid() - { - return PARANOID; - } - public static boolean debug() - { - return DEBUG; - } - - public static IllegalStateException createIllegalState(String msg) - { - return new IllegalStateException(msg); - } - - public static IllegalStateException illegalState(String msg) - { - throw createIllegalState(msg); - } - - private static void illegalState() - { - illegalState(null); - } - - private static void illegalArgument(String msg) - { - throw new IllegalArgumentException(msg); - } - - - private static void illegalArgument() - { - illegalArgument(null); - } - - public static T2 checkType(T1 cast) - { - return (T2)cast; - } - - public static T2 checkType(Class to, T1 cast) - { - if (cast != null && !to.isInstance(cast)) - illegalState(); - return (T2)cast; - } - - public static T2 checkType(Class to, T1 cast, String msg) - { - if (cast != null && !to.isInstance(cast)) - illegalState(msg); - return (T2)cast; - } - - public static void paranoid(boolean condition) - { - if (PARANOID && !condition) - illegalState(); - } - - public static void checkState(boolean condition) - { - if (!condition) - illegalState(); - } - - public static void checkState(boolean condition, Supplier msg) - { - if (!condition) - throw illegalState(msg.get()); - } - - public static void checkState(boolean condition, String msg) - { - if (!condition) - illegalState(msg); - } - - public static void checkState(boolean condition, String fmt, int p1) - { - if (!condition) - illegalState(format(fmt, p1)); - } - - public static void checkState(boolean condition, String fmt, int p1, int p2) - { - if (!condition) - illegalState(format(fmt, p1, p2)); - } - - public static void checkState(boolean condition, String fmt, long p1) - { - if (!condition) - illegalState(format(fmt, p1)); - } - - public static void checkState(boolean condition, String fmt, long p1, long p2) - { - if (!condition) - illegalState(format(fmt, p1, p2)); - } - - public static void checkState(boolean condition, String fmt, @Nullable Object p1) - { - if (!condition) - illegalState(format(fmt, p1)); - } - - public static void checkState(boolean condition, String fmt, @Nullable Object p1, @Nullable Object p2) - { - if (!condition) - illegalState(format(fmt, p1, p2)); - } - - public static void checkState(boolean condition, String fmt, Object... args) - { - if (!condition) - illegalState(format(fmt, args)); - } - - public static T nonNull(T param) - { - if (param == null) - throw new NullPointerException(); - return param; - } - - public static T nonNull(T param, String fmt, Object... args) - { - if (param == null) - throw new NullPointerException(format(fmt, args)); - return param; - } - - public static int isNatural(int input) - { - if (input < 0) - illegalState(); - return input; - } - - public static long isNatural(long input) - { - if (input < 0) - illegalState(); - return input; - } - - public static void checkArgument(boolean condition) - { - if (!condition) - illegalArgument(); - } - - public static void checkArgument(boolean condition, String msg) - { - if (!condition) - illegalArgument(msg); - } - - public static void checkArgument(boolean condition, String fmt, int p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - } - - public static void checkArgument(boolean condition, String fmt, int p1, int p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - } - - public static void checkArgument(boolean condition, String fmt, long p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - } - - public static void checkArgument(boolean condition, String fmt, long p1, long p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - } - - public static void checkArgument(boolean condition, String fmt, @Nullable Object p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - } - - public static void checkArgument(boolean condition, String fmt, @Nullable Object p1, @Nullable Object p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - } - - public static void checkArgument(boolean condition, String fmt, Object... args) - { - if (!condition) - illegalArgument(format(fmt, args)); - } - - public static T checkArgument(T param, boolean condition) - { - if (!condition) - illegalArgument(); - return param; - } - - public static T checkArgument(T param, boolean condition, String msg) - { - if (!condition) - illegalArgument(msg); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, int p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, int p1, int p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, long p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, long p1, long p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, @Nullable Object p1) - { - if (!condition) - illegalArgument(format(fmt, p1)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, @Nullable Object p1, @Nullable Object p2) - { - if (!condition) - illegalArgument(format(fmt, p1, p2)); - return param; - } - - public static T checkArgument(T param, boolean condition, String fmt, Object... args) - { - if (!condition) - illegalArgument(format(fmt, args)); - return param; - } - - @Inline - public static T checkArgument(T param, Predicate condition) - { - if (!condition.test(param)) - illegalArgument(); - return param; - } - - @Inline - public static T checkArgument(T param, Predicate condition, String msg) - { - if (!condition.test(param)) - illegalArgument(msg); - return param; - } - - public static O cast(Object o, Class klass) - { - try - { - return klass.cast(o); - } - catch (ClassCastException e) - { - throw new IllegalArgumentException(format("Unable to cast %s to %s", o, klass.getName())); - } - } - - public static void checkIndexInBounds(int realLength, int offset, int length) - { - if (realLength == 0 || length == 0) - throw new IndexOutOfBoundsException("Unable to access offset " + offset + "; empty"); - if (offset < 0) - throw new IndexOutOfBoundsException("Offset " + offset + " must not be negative"); - if (length < 0) - throw new IndexOutOfBoundsException("Length " + length + " must not be negative"); - int endOffset = offset + length; - if (endOffset > realLength) - throw new IndexOutOfBoundsException(String.format("Offset %d, length = %d; real length was %d", offset, length, realLength)); - } -} diff --git a/test/unit/accord/utilsfork/Property.java b/test/unit/accord/utilsfork/Property.java deleted file mode 100644 index fbf1f4c7c575..000000000000 --- a/test/unit/accord/utilsfork/Property.java +++ /dev/null @@ -1,1075 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import accord.utilsfork.async.TimeoutUtils; - -import java.time.Duration; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import java.util.function.BiFunction; -import java.util.function.Consumer; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import javax.annotation.Nullable; - -import org.agrona.collections.LongArrayList; - -public class Property -{ - public static abstract class Common> - { - protected long seed = SeedProvider.instance.nextSeed(); - protected int examples = 1000; - - protected boolean pure = true; - @Nullable - protected Duration timeout = null; - - protected Common() { - } - - protected Common(Common other) { - this.seed = other.seed; - this.examples = other.examples; - this.pure = other.pure; - this.timeout = other.timeout; - } - - public T withSeed(long seed) - { - this.seed = seed; - return (T) this; - } - - public T withExamples(int examples) - { - if (examples <= 0) - throw new IllegalArgumentException("Examples must be positive"); - this.examples = examples; - return (T) this; - } - - public T withPure(boolean pure) - { - this.pure = pure; - return (T) this; - } - - public T withTimeout(Duration timeout) - { - this.timeout = timeout; - this.pure = false; - return (T) this; - } - - protected void checkWithTimeout(Runnable fn) - { - try - { - TimeoutUtils.runBlocking(timeout, "property with timeout", fn::run); - } - catch (ExecutionException e) - { - throw new PropertyError(propertyError(this, e.getCause())); - } - catch (InterruptedException e) - { - throw new PropertyError(propertyError(this, e)); - } - catch (TimeoutException e) - { - TimeoutException override = new TimeoutException("property test did not complete within " + this.timeout); - override.setStackTrace(new StackTraceElement[0]); - throw new PropertyError(propertyError(this, override)); - } - } - } - - public static class ForBuilder extends Common - { - public void check(FailingConsumer fn) - { - forAll(accord.utilsfork.Gens.random()).check(fn); - } - - public SingleBuilder forAll(accord.utilsfork.Gen gen) - { - return new SingleBuilder<>(gen, this); - } - - public DoubleBuilder forAll(accord.utilsfork.Gen a, accord.utilsfork.Gen b) - { - return new DoubleBuilder<>(a, b, this); - } - - public TrippleBuilder forAll(accord.utilsfork.Gen a, accord.utilsfork.Gen b, accord.utilsfork.Gen c) - { - return new TrippleBuilder<>(a, b, c, this); - } - } - - private static Object normalizeValue(Object value) - { - if (value == null) - return null; - // one day java arrays will have a useful toString... one day... - if (value.getClass().isArray()) - { - Class subType = value.getClass().getComponentType(); - if (!subType.isPrimitive()) - return Arrays.asList((Object[]) value); - if (Byte.TYPE == subType) - return Arrays.toString((byte[]) value); - if (Character.TYPE == subType) - return Arrays.toString((char[]) value); - if (Short.TYPE == subType) - return Arrays.toString((short[]) value); - if (Integer.TYPE == subType) - return Arrays.toString((int[]) value); - if (Long.TYPE == subType) - return Arrays.toString((long[]) value); - if (Float.TYPE == subType) - return Arrays.toString((float[]) value); - if (Double.TYPE == subType) - return Arrays.toString((double[]) value); - } - try - { - String result = value.toString(); - if (result != null && result.length() > 100 && value instanceof Collection) - result = ((Collection) value).stream().map(o -> "\n\t " + o).collect(Collectors.joining(",", "[", "]")); - return result; - } - catch (Throwable t) - { - return "Object.toString failed: " + t.getClass().getCanonicalName() + ": " + t.getMessage(); - } - } - - private static StringBuilder propertyErrorCommon(Common input, Throwable cause) - { - StringBuilder sb = new StringBuilder(); - // return "Seed=" + seed + "\nExamples=" + examples; - sb.append("Property error detected:\nSeed = ").append(input.seed).append('\n'); - sb.append("Examples = ").append(input.examples).append('\n'); - sb.append("Pure = ").append(input.pure).append('\n'); - if (cause != null) - { - String msg = cause.getMessage(); - sb.append("Error: "); - // to improve readability, if a newline is detected move the error msg to the next line - if (msg != null && msg.contains("\n")) - msg = "\n\t" + msg.replace("\n", "\n\t"); - if (msg == null) - msg = cause.getClass().getCanonicalName(); - sb.append(msg).append('\n'); - } - return sb; - } - - private static String propertyError(Common input, Throwable cause, Object... values) - { - StringBuilder sb = propertyErrorCommon(input, cause); - if (values != null) - { - sb.append("Values:\n"); - for (int i = 0; i < values.length; i++) - sb.append('\t').append(i).append(" = ").append(normalizeValue(values[i])).append(": ").append(values[i] == null ? "unknown type" : values[i].getClass().getCanonicalName()).append('\n'); - } - return sb.toString(); - } - - private static String statefulPropertyError(StatefulBuilder input, Throwable cause, Object state, List history) - { - StringBuilder sb = propertyErrorCommon(input, cause); - sb.append("Steps: ").append(input.steps).append('\n'); - sb.append("Values:\n"); - String stateStr = state == null ? null : state.toString().replace("\n", "\n\t\t"); - sb.append("\tState: ").append(stateStr).append(": ").append(state == null ? "unknown type" : state.getClass().getCanonicalName()).append('\n'); - sb.append("\tHistory:").append('\n'); - addList(sb, "\t\t", history); - return sb.toString(); - } - - private static void addList(StringBuilder sb, String prefix, List list) - { - int idx = 0; - for (var event : list) - sb.append(prefix).append(++idx).append(": ").append(event).append('\n'); - } - - public static String formatList(String prefix, List list) - { - StringBuilder sb = new StringBuilder(); - addList(sb, prefix, list); - return sb.toString(); - } - - public interface FailingConsumer - { - void accept(A value) throws Exception; - } - - public static class SingleBuilder extends Common> - { - private final accord.utilsfork.Gen gen; - - private SingleBuilder(accord.utilsfork.Gen gen, Common other) { - super(other); - this.gen = Objects.requireNonNull(gen); - } - - public void check(FailingConsumer fn) - { - if (timeout != null) - { - checkWithTimeout(() -> checkInternal(fn)); - return; - } - checkInternal(fn); - } - - private void checkInternal(FailingConsumer fn) - { - accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); - for (int i = 0; i < examples; i++) - { - T value = null; - try - { - checkInterrupted(); - fn.accept(value = gen.next(random)); - } - catch (Throwable t) - { - throw new PropertyError(propertyError(this, t, value), t); - } - if (pure) - { - seed = random.nextLong(); - random.setSeed(seed); - } - } - } - } - - public interface FailingBiConsumer - { - void accept(A a, B b) throws Exception; - } - - public static class DoubleBuilder extends Common> - { - private final accord.utilsfork.Gen aGen; - private final accord.utilsfork.Gen bGen; - - private DoubleBuilder(accord.utilsfork.Gen aGen, accord.utilsfork.Gen bGen, Common other) { - super(other); - this.aGen = Objects.requireNonNull(aGen); - this.bGen = Objects.requireNonNull(bGen); - } - - public void check(FailingBiConsumer fn) - { - if (timeout != null) - { - checkWithTimeout(() -> checkInternal(fn)); - return; - } - checkInternal(fn); - } - - private void checkInternal(FailingBiConsumer fn) - { - accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); - for (int i = 0; i < examples; i++) - { - A a = null; - B b = null; - try - { - checkInterrupted(); - fn.accept(a = aGen.next(random), b = bGen.next(random)); - } - catch (Throwable t) - { - throw new PropertyError(propertyError(this, t, a, b), t); - } - if (pure) - { - seed = random.nextLong(); - random.setSeed(seed); - } - } - } - } - - public interface FailingTriConsumer - { - void accept(A a, B b, C c) throws Exception; - } - - public static class TrippleBuilder extends Common> - { - private final accord.utilsfork.Gen as; - private final accord.utilsfork.Gen bs; - private final accord.utilsfork.Gen cs; - - public TrippleBuilder(accord.utilsfork.Gen as, accord.utilsfork.Gen bs, accord.utilsfork.Gen cs, Common other) - { - super(other); - this.as = as; - this.bs = bs; - this.cs = cs; - } - - public void check(FailingTriConsumer fn) - { - if (timeout != null) - { - checkWithTimeout(() -> checkInternal(fn)); - return; - } - checkInternal(fn); - } - - private void checkInternal(FailingTriConsumer fn) - { - accord.utilsfork.RandomSource random = new accord.utilsfork.DefaultRandom(seed); - for (int i = 0; i < examples; i++) - { - A a = null; - B b = null; - C c = null; - try - { - checkInterrupted(); - fn.accept(a = as.next(random), b = bs.next(random), c = cs.next(random)); - } - catch (Throwable t) - { - throw new PropertyError(propertyError(this, t, a, b, c), t); - } - if (pure) - { - seed = random.nextLong(); - random.setSeed(seed); - } - } - } - } - - private static void checkInterrupted() throws InterruptedException - { - if (Thread.currentThread().isInterrupted()) - throw new InterruptedException(); - } - - public static class PropertyError extends AssertionError - { - public PropertyError(String message, Throwable cause) - { - super(message, cause); - } - - public PropertyError(String message) - { - super(message); - } - } - - public static ForBuilder qt() - { - return new ForBuilder(); - } - - public static StatefulBuilder stateful() - { - return new StatefulBuilder(); - } - - public static class StatefulBuilder extends Common - { - protected int steps = 1000; - @Nullable - protected Duration stepTimeout = null; - - public StatefulBuilder() - { - examples = 500; - } - - public StatefulBuilder withSteps(int steps) - { - this.steps = steps; - return this; - } - - public StatefulBuilder withStepTimeout(Duration duration) - { - stepTimeout = duration; - return this; - } - - @SuppressWarnings("rawtypes") - public void check(Commands commands) - { - accord.utilsfork.RandomSource rs = new DefaultRandom(seed); - for (int i = 0; i < examples; i++) - { - State state = null; - List history = new ArrayList<>(steps); - LongArrayList historyTiming = stepTimeout == null ? null : new LongArrayList(); - try - { - checkInterrupted(); - - state = commands.genInitialState().next(rs); - SystemUnderTest sut = commands.createSut(state); - - try - { - for (int j = 0; j < steps; j++) - { - accord.utilsfork.Gen> cmdGen = commands.commands(state); - Command cmd = cmdGen.next(rs); - for (int a = 0; cmd.checkPreconditions(state) != PreCheckResult.Ok && a < 42; a++) - { - if (a == 41) - throw new IllegalArgumentException("Unable to find next command"); - cmd = cmdGen.next(rs); - } - if (cmd instanceof MultistepCommand) - { - for (Command sub : ((MultistepCommand) cmd)) - { - history.add(sub.detailed(state)); - process(sub, state, sut, history.size(), historyTiming); - } - } - else - { - history.add(cmd.detailed(state)); - process(cmd, state, sut, history.size(), historyTiming); - } - } - commands.destroySut(sut, null); - commands.destroyState(state, null); - commands.onSuccess(state, sut, maybeRewriteHistory(history, historyTiming)); - } - catch (Throwable t) - { - try - { - commands.destroySut(sut, t); - commands.destroyState(state, t); - } - catch (Throwable t2) - { - t.addSuppressed(t2); - } - throw t; - } - } - catch (Throwable t) - { - - throw new PropertyError(statefulPropertyError(this, t, state, maybeRewriteHistory(history, historyTiming)), t); - } - if (pure) - { - seed = rs.nextLong(); - rs.setSeed(seed); - } - } - } - - private static List maybeRewriteHistory(List history, @Nullable LongArrayList historyTiming) - { - if (historyTiming == null) return history; - List newHistory = new ArrayList<>(history.size()); - for (int i = 0; i < history.size(); i++) - { - String step = history.get(i); - long timeNanos = historyTiming.getLong(i); - newHistory.add(step + ";\tDuration " + Duration.ofNanos(timeNanos)); - } - return newHistory; - } - - private void process(Command cmd, State state, SystemUnderTest sut, int id, @Nullable LongArrayList stepTiming) throws Throwable - { - if (stepTimeout == null) - { - cmd.process(state, sut); - return; - } - long startNanos = System.nanoTime(); - try - { - TimeoutUtils.runBlocking(stepTimeout, "Stateful Step " + id + ": " + cmd.detailed(state), () -> cmd.process(state, sut)); - } - finally - { - stepTiming.add(System.nanoTime() - startNanos); - } - } - } - - public enum PreCheckResult { Ok, Ignore } - public interface Command - { - default PreCheckResult checkPreconditions(State state) {return PreCheckResult.Ok;} - Result apply(State state) throws Throwable; - Result run(SystemUnderTest sut) throws Throwable; - default void checkPostconditions(State state, Result expected, - SystemUnderTest sut, Result actual) throws Throwable {} - default String detailed(State state) {return this.toString();} - default void process(State state, SystemUnderTest sut) throws Throwable - { - checkPostconditions(state, apply(state), - sut, run(sut)); - } - } - - public static class ForwardingCommand implements Command - { - private final Command delegate; - - public ForwardingCommand(Command delegate) - { - this.delegate = delegate; - } - - protected Command delegate() - { - return delegate; - } - - @Override - public PreCheckResult checkPreconditions(State state) - { - return delegate().checkPreconditions(state); - } - - @Override - public Result apply(State state) throws Throwable - { - return delegate().apply(state); - } - - @Override - public Result run(SystemUnderTest sut) throws Throwable - { - return delegate().run(sut); - } - - @Override - public void checkPostconditions(State state, Result expected, SystemUnderTest sut, Result actual) throws Throwable - { - delegate().checkPostconditions(state, expected, sut, actual); - } - - @Override - public String detailed(State state) - { - return delegate().detailed(state); - } - - @Override - public void process(State state, SystemUnderTest sut) throws Throwable - { - // don't call delegate here else the process function calls the delegate and not this class - Command.super.process(state, sut); - } - } - - public static MultistepCommand multistep(Command... cmds) - { - return multistep(Arrays.asList(cmds)); - } - - public static MultistepCommand multistep(List> cmds) - { - List> result = new ArrayList<>(cmds.size()); - for (Command c : cmds) - { - if (c instanceof MultistepCommand) result.addAll(flatten((MultistepCommand) c)); - else result.add(c); - } - return result::iterator; - } - - private static Collection> flatten(MultistepCommand mc) - { - List> result = new ArrayList<>(); - for (Command c : mc) - { - if (c instanceof MultistepCommand) result.addAll(flatten((MultistepCommand) c)); - else result.add(c); - } - return result; - } - - public interface MultistepCommand extends Command, Iterable> - { - @Override - default PreCheckResult checkPreconditions(State state) - { - for (Command cmd : this) - { - PreCheckResult result = cmd.checkPreconditions(state); - if (result != PreCheckResult.Ok) return result; - } - return PreCheckResult.Ok; - } - - @Override - default Object apply(State state) throws Throwable - { - throw new UnsupportedOperationException(); - } - - @Override - default Object run(SystemUnderTest sut) throws Throwable - { - throw new UnsupportedOperationException(); - } - - @Override - default void checkPostconditions(State state, Object expected, SystemUnderTest sut, Object actual) throws Throwable - { - throw new UnsupportedOperationException(); - } - - @Override - default String detailed(State state) - { - throw new UnsupportedOperationException(); - } - - @Override - default void process(State state, SystemUnderTest sut) throws Throwable - { - throw new UnsupportedOperationException(); - } - } - - public static Command ignoreCommand() - { - return new Command<>() - { - @Override - public PreCheckResult checkPreconditions(State state) - { - return PreCheckResult.Ignore; - } - - @Override - public Result apply(State state) throws Throwable - { - throw new UnsupportedOperationException(); - } - - @Override - public Result run(SystemUnderTest sut) throws Throwable - { - throw new UnsupportedOperationException(); - } - - @Override - public String detailed(State state) - { - throw new UnsupportedOperationException(); - } - }; - } - - public interface UnitCommand extends Command - { - void applyUnit(State state) throws Throwable; - void runUnit(SystemUnderTest sut) throws Throwable; - - @Override - default Void apply(State state) throws Throwable - { - applyUnit(state); - return null; - } - - @Override - default Void run(SystemUnderTest sut) throws Throwable - { - runUnit(sut); - return null; - } - } - - public interface StateOnlyCommand extends UnitCommand - { - @Override - default void runUnit(Void sut) throws Throwable {} - } - - public static class SimpleCommand implements StateOnlyCommand - { - private final Function name; - private final Consumer fn; - - public SimpleCommand(String name, Consumer fn) - { - this.name = ignore -> name; - this.fn = fn; - } - - public SimpleCommand(Function name, Consumer fn) - { - this.name = name; - this.fn = fn; - } - - @Override - public String detailed(State state) - { - return name.apply(state); - } - - @Override - public void applyUnit(State state) - { - fn.accept(state); - } - } - - public interface Commands - { - accord.utilsfork.Gen genInitialState() throws Throwable; - SystemUnderTest createSut(State state) throws Throwable; - default void onSuccess(State state, SystemUnderTest sut, List history) throws Throwable {} - default void destroyState(State state, @Nullable Throwable cause) throws Throwable {} - default void destroySut(SystemUnderTest sut, @Nullable Throwable cause) throws Throwable {} - accord.utilsfork.Gen> commands(State state) throws Throwable; - } - - public static CommandsBuilder commands(Supplier> stateGen, Function sutFactory) - { - return new CommandsBuilder<>(stateGen, sutFactory); - } - - public static CommandsBuilder commands(Supplier> stateGen) - { - return new CommandsBuilder<>(stateGen, ignore -> null); - } - - public interface StatefulSuccess - { - void apply(State state, SystemUnderTest sut, List history) throws Throwable; - } - - public static class CommandsBuilder - { - public interface Setup - { - Command setup(accord.utilsfork.RandomSource rs, State state); - } - private final Supplier> stateGen; - private final Function sutFactory; - private final Map, Integer> knownWeights = new LinkedHashMap<>(); - @Nullable - private Set> unknownWeights = null; - @Nullable - private Map, List>> conditionalCommands = null; - private accord.utilsfork.Gen.IntGen unknownWeightGen = accord.utilsfork.Gens.ints().between(1, 10); - @Nullable - private FailingConsumer preCommands = null; - @Nullable - private FailingBiConsumer destroyState = null; - @Nullable - private FailingBiConsumer destroySut = null; - @Nullable - private BiFunction>, accord.utilsfork.Gen>> commandsTransformer = null; - private final List> onSuccess = new ArrayList<>(); - - public CommandsBuilder(Supplier> stateGen, Function sutFactory) - { - this.stateGen = stateGen; - this.sutFactory = sutFactory; - } - - public CommandsBuilder preCommands(FailingConsumer preCommands) - { - this.preCommands = preCommands; - return this; - } - - public CommandsBuilder destroyState(FailingConsumer destroyState) - { - return destroyState((success, failure) -> { - if (failure == null) - destroyState.accept(success); - }); - } - - public CommandsBuilder destroyState(FailingBiConsumer destroyState) - { - this.destroyState = destroyState; - return this; - } - - public CommandsBuilder destroySut(FailingConsumer destroySut) - { - return destroySut((success, failure) -> { - if (failure == null) - destroySut.accept(success); - }); - } - - public CommandsBuilder destroySut(FailingBiConsumer destroySut) - { - this.destroySut = destroySut; - return this; - } - - public CommandsBuilder add(int weight, Command cmd) - { - return add(weight, (i1, i2) -> cmd); - } - - public CommandsBuilder add(int weight, accord.utilsfork.Gen> cmd) - { - return add(weight, (rs, state) -> cmd.next(rs)); - } - - public CommandsBuilder add(int weight, Setup cmd) - { - knownWeights.put(cmd, weight); - return this; - } - - public CommandsBuilder add(Command cmd) - { - return add((i1, i2) -> cmd); - } - - public CommandsBuilder add(accord.utilsfork.Gen> cmd) - { - return add((rs, state) -> cmd.next(rs)); - } - - public CommandsBuilder add(Setup cmd) - { - if (unknownWeights == null) - unknownWeights = new LinkedHashSet<>(); - unknownWeights.add(cmd); - return this; - } - - public CommandsBuilder addIf(Predicate predicate, accord.utilsfork.Gen> cmd) - { - return addIf(predicate, (rs, state) -> cmd.next(rs)); - } - - public CommandsBuilder addIf(Predicate predicate, Command cmd) - { - return addIf(predicate, (rs, state) -> cmd); - } - - public CommandsBuilder addIf(Predicate predicate, Setup cmd) - { - if (conditionalCommands == null) - conditionalCommands = new LinkedHashMap<>(); - conditionalCommands.computeIfAbsent(predicate, i -> new ArrayList<>()).add(cmd); - return this; - } - - public CommandsBuilder addAllIf(Predicate predicate, Consumer> sub) - { - sub.accept(new IfBuilder<>() - { - @Override - public IfBuilder add(Setup cmd) - { - CommandsBuilder.this.addIf(predicate, cmd); - return this; - } - - @Override - public IfBuilder addIf(Predicate nextPredicate, Setup cmd) { - CommandsBuilder.this.addIf(predicate.and(nextPredicate), cmd); - return this; - } - }); - return this; - } - - public interface IfBuilder - { - IfBuilder add(Setup cmd); - IfBuilder addIf(Predicate predicate, Setup cmd); - } - - public CommandsBuilder unknownWeight(accord.utilsfork.Gen.IntGen unknownWeightGen) - { - this.unknownWeightGen = Objects.requireNonNull(unknownWeightGen); - return this; - } - - public CommandsBuilder commandsTransformer(BiFunction>, accord.utilsfork.Gen>> commandsTransformer) - { - this.commandsTransformer = commandsTransformer; - return this; - } - - public CommandsBuilder onSuccess(StatefulSuccess fn) - { - onSuccess.add(fn); - return this; - } - - public Commands build() - { - accord.utilsfork.Gen> commandsGen; - if (unknownWeights == null && conditionalCommands == null) - { - commandsGen = accord.utilsfork.Gens.pick(new LinkedHashMap<>(knownWeights)); - } - else - { - class DynamicWeightsGen implements accord.utilsfork.Gen>, accord.utilsfork.Gens.Reset - { - LinkedHashMap, Integer> weights; - LinkedHashMap, Integer> conditionalWeights; - accord.utilsfork.Gen> nonConditional; - @Override - public Setup next(RandomSource rs) - { - if (weights == null) - { - // create random weights - weights = new LinkedHashMap<>(knownWeights); - if (unknownWeights != null) - { - for (Setup s : unknownWeights) - weights.put(s, unknownWeightGen.nextInt(rs)); - } - nonConditional = accord.utilsfork.Gens.pick(weights); - if (conditionalCommands != null) - { - conditionalWeights = new LinkedHashMap<>(); - for (List> commands : conditionalCommands.values()) - { - for (Setup c : commands) - conditionalWeights.put(c, unknownWeightGen.nextInt(rs)); - } - } - } - if (conditionalWeights == null) return nonConditional.next(rs); - return (r, s) -> { - // need to figure out what conditions apply... - LinkedHashMap, Integer> clone = new LinkedHashMap<>(weights); - for (Map.Entry, List>> e : conditionalCommands.entrySet()) - { - if (e.getKey().test(s)) - e.getValue().forEach(c -> clone.put(c, conditionalWeights.get(c))); - } - Setup select = accord.utilsfork.Gens.pick(clone).next(r); - return select.setup(r, s); - }; - } - - @Override - public void reset() - { - weights = null; - nonConditional = null; - conditionalWeights = null; - } - } - commandsGen = new DynamicWeightsGen(); - } - return new Commands<>() - { - @Override - public accord.utilsfork.Gen genInitialState() throws Throwable - { - return stateGen.get(); - } - - @Override - public SystemUnderTest createSut(State state) throws Throwable - { - return sutFactory.apply(state); - } - - @Override - public accord.utilsfork.Gen> commands(State state) throws Throwable - { - if (preCommands != null) - preCommands.accept(state); - accord.utilsfork.Gen> map = commandsGen.map((rs, setup) -> setup.setup(rs, state)); - return commandsTransformer == null ? map : commandsTransformer.apply(state, map); - } - - @Override - public void destroyState(State state, @Nullable Throwable cause) throws Throwable - { - accord.utilsfork.Gens.Reset.tryReset(commandsGen); - if (destroyState != null) - destroyState.accept(state, cause); - } - - @Override - public void destroySut(SystemUnderTest sut, @Nullable Throwable cause) throws Throwable - { - if (destroySut != null) - destroySut.accept(sut, cause); - } - - @Override - public void onSuccess(State state, SystemUnderTest sut, List history) throws Throwable - { - for (var fn : onSuccess) - fn.apply(state, sut, history); - } - }; - } - - public interface FailingConsumer - { - void accept(T value) throws Throwable; - } - - public interface FailingBiConsumer - { - void accept(A a, B b) throws Throwable; - } - } -} diff --git a/test/unit/accord/utilsfork/RandomSource.java b/test/unit/accord/utilsfork/RandomSource.java deleted file mode 100644 index b3e37087092f..000000000000 --- a/test/unit/accord/utilsfork/RandomSource.java +++ /dev/null @@ -1,429 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import java.util.ArrayList; -import java.util.Comparator; -import java.util.EnumSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.SortedSet; -import java.util.function.BooleanSupplier; -import java.util.function.IntSupplier; -import java.util.function.LongSupplier; -import java.util.function.Supplier; - -import com.google.common.collect.Iterables; - -import accord.utilsfork.random.Picker; - -// TODO (expected): merge with C* RandomSource -public interface RandomSource -{ - static RandomSource wrap(Random random) - { - return new accord.utilsfork.WrappedRandomSource(random); - } - //TODO (maintaince): once the rebase is over remove this... - static RandomSource wrap(accord.utils.RandomSource rs) - { - return new WrappedRandomSource(rs.asJdkRandom()); - } - - void nextBytes(byte[] bytes); - - boolean nextBoolean(); - default BooleanSupplier uniformBools() { return this::nextBoolean; } - default BooleanSupplier biasedUniformBools(float chance) { return () -> decide(chance); } - default Supplier biasedUniformBoolsSupplier(float minChance) - { - return () -> { - float chance = minChance + (1 - minChance)*nextFloat(); - return () -> decide(chance); - }; - } - - /** - * Returns true with a probability of {@code chance}. This is logically the same as - *

      {@code nextFloat() < chance}
      - * - * @param chance cumulative probability in range [0..1] - */ - default boolean decide(float chance) - { - return nextFloat() < chance; - } - - /** - * Returns true with a probability of {@code chance}. This is logically the same as - *
      {@code nextDouble() < chance}
      - * - * @param chance cumulative probability in range [0..1] - */ - default boolean decide(double chance) - { - return nextDouble() < chance; - } - - int nextInt(); - default int nextInt(int maxExclusive) { return nextInt(0, maxExclusive); } - default int nextInt(int minInclusive, int maxExclusive) - { - // this is diff behavior than ThreadLocalRandom, which returns nextInt - if (minInclusive >= maxExclusive) - throw new IllegalArgumentException(String.format("Min (%s) should be less than max (%d).", minInclusive, maxExclusive)); - - int result = nextInt(); - int delta = maxExclusive - minInclusive; - int mask = delta - 1; - if ((delta & mask) == 0) // power of two - result = (result & mask) + minInclusive; - else if (delta > 0) - { - // reject over-represented candidates - for (int u = result >>> 1; // ensure nonnegative - u + mask - (result = u % delta) < 0; // rejection check - u = nextInt() >>> 1) // retry - ; - result += minInclusive; - } - else - { - // range not representable as int - while (result < minInclusive || result >= maxExclusive) - result = nextInt(); - } - return result; - } - default int nextBiasedInt(int minInclusive, int median, int maxExclusive) - { - checkBiasedUniform(minInclusive, median, maxExclusive); - - int range = Math.max(maxExclusive - median, median - minInclusive) * 2; - int next = nextInt(range) - range/2; - next += median; - return next >= median ? next < maxExclusive ? next : nextInt(median, maxExclusive) - : next >= minInclusive ? next : minInclusive == median ? median : nextInt(minInclusive, median); - } - - default IntSupplier uniformInts(int minInclusive, int maxExclusive) { return () -> nextInt(minInclusive, maxExclusive); } - default IntSupplier biasedUniformInts(int minInclusive, int median, int maxExclusive) - { - checkBiasedUniform(minInclusive, median, maxExclusive); - return () -> nextBiasedInt(minInclusive, median, maxExclusive); - } - default Supplier biasedUniformIntsSupplier(int absoluteMinInclusive, int absoluteMaxExclusive, int minMedian, int maxMedian, int minRange, int maxRange) - { - return biasedUniformIntsSupplier(absoluteMinInclusive, absoluteMaxExclusive, minMedian, (minMedian+maxMedian)/2, maxMedian, minRange, (minRange+maxRange)/2, maxRange); - } - default Supplier biasedUniformIntsSupplier(int absoluteMinInclusive, int absoluteMaxExclusive, int minMedian, int medianMedian, int maxMedian, int minRange, int medianRange, int maxRange) - { - checkBiasedUniform(minMedian, medianMedian, maxMedian); - checkBiasedUniform(minRange, medianRange, maxRange); - if (minMedian < absoluteMinInclusive) - throw new IllegalArgumentException(String.format("absoluteMin (%s) should be less than or equal to minMedian (%s)", absoluteMinInclusive, minMedian)); - if (maxMedian > absoluteMaxExclusive) - throw new IllegalArgumentException(String.format("absoluteMax (%s) should be greater than or equal to maxMedian (%s)", absoluteMaxExclusive, maxMedian)); - if (minRange < 1) - throw new IllegalArgumentException(String.format("minRange (%s) should be greater than or equal to 1", minRange)); - return () -> { - int median = nextBiasedInt(minMedian, medianMedian, maxMedian); - int minInclusive = Math.max(absoluteMinInclusive, median - nextBiasedInt(minRange, medianRange, maxRange)/2); - int maxExclusive = Math.min(absoluteMaxExclusive, median + (nextBiasedInt(minRange, medianRange, maxRange)+1)/2); - return biasedUniformInts(minInclusive, median, maxExclusive); - }; - } - - long nextLong(); - default long nextLong(long maxExclusive) { return nextLong(0, maxExclusive); } - default long nextLong(long minInclusive, long maxExclusive) - { - // this is diff behavior than ThreadLocalRandom, which returns nextLong - if (minInclusive >= maxExclusive) - throw new IllegalArgumentException(String.format("Min (%s) should be less than max (%d).", minInclusive, maxExclusive)); - - long result = nextLong(); - long delta = maxExclusive - minInclusive; - long mask = delta - 1; - if ((delta & mask) == 0L) // power of two - result = (result & mask) + minInclusive; - else if (delta > 0L) - { - // reject over-represented candidates - for (long u = result >>> 1; // ensure nonnegative - u + mask - (result = u % delta) < 0L; // rejection check - u = nextLong() >>> 1) // retry - ; - result += minInclusive; - } - else - { - // range not representable as long - while (result < minInclusive || result >= maxExclusive) - result = nextLong(); - } - return result; - } - default long nextBiasedLong(long minInclusive, long median, long maxExclusive) - { - checkBiasedUniform(minInclusive, median, maxExclusive); - - long range = Math.max(maxExclusive - median, median - minInclusive) * 2; - long next = nextLong(range) - range/2; - next += median; - return next >= median ? next < maxExclusive ? next : nextLong(median, maxExclusive) - : next >= minInclusive ? next : minInclusive == median ? median : nextLong(minInclusive, median); - } - - default LongSupplier uniformLongs(long minInclusive, long maxExclusive) { return () -> nextLong(minInclusive, maxExclusive); } - default LongSupplier biasedUniformLongs(long minInclusive, long median, long maxExclusive) - { - checkBiasedUniform(minInclusive, median, maxExclusive); - return () -> nextBiasedLong(minInclusive, median, maxExclusive); - } - default Supplier biasedUniformLongsSupplier(long absoluteMinInclusive, long absoluteMaxExclusive, long minMedian, long maxMedian, long minRange, long maxRange) - { - return biasedUniformLongsSupplier(absoluteMinInclusive, absoluteMaxExclusive, minMedian, (minMedian+maxMedian)/2, maxRange, minRange, (minRange+maxRange)/2, maxRange); - } - default Supplier biasedUniformLongsSupplier(long absoluteMinInclusive, long absoluteMaxExclusive, long minMedian, long medianMedian, long maxMedian, long minRange, long medianRange, long maxRange) - { - checkBiasedUniform(minMedian, medianMedian, maxMedian); - checkBiasedUniform(minRange, medianRange, maxRange); - if (minMedian < absoluteMinInclusive) - throw new IllegalArgumentException(String.format("absoluteMin (%s) should be less than or equal to minMedian (%s)", absoluteMinInclusive, minMedian)); - if (maxMedian > absoluteMaxExclusive) - throw new IllegalArgumentException(String.format("absoluteMax (%s) should be greater than or equal to maxMedian (%s)", absoluteMaxExclusive, maxMedian)); - if (minRange < 1) - throw new IllegalArgumentException(String.format("minRange (%s) should be greater than or equal to 1", minRange)); - return () -> { - long median = nextBiasedLong(minMedian, medianMedian, maxMedian); - long minInclusive = Math.max(absoluteMinInclusive, median - nextBiasedLong(minRange, medianRange, maxRange)/2); - long maxExclusive = Math.min(absoluteMaxExclusive, median + (1+nextBiasedLong(minRange, medianRange, maxRange))/2); - return biasedUniformLongs(minInclusive, median, maxExclusive); - }; - } - - static void checkBiasedUniform(long minInclusive, long median, long maxExclusive) - { - if (minInclusive > median) - throw new IllegalArgumentException(String.format("Min (%s) should be equal to or less than median (%d).", minInclusive, median)); - if (median >= maxExclusive) - throw new IllegalArgumentException(String.format("Median (%s) should be less than max (%d).", median, maxExclusive)); - } - - float nextFloat(); - - double nextDouble(); - default double nextDouble(double maxExclusive) { return nextDouble(0, maxExclusive); } - default double nextDouble(double minInclusive, double maxExclusive) - { - if (minInclusive >= maxExclusive) - throw new IllegalArgumentException(String.format("Min (%s) should be less than max (%d).", minInclusive, maxExclusive)); - - double result = nextDouble(); - result = result * (maxExclusive - minInclusive) + minInclusive; - if (result >= maxExclusive) // correct for rounding - result = Double.longBitsToDouble(Double.doubleToLongBits(maxExclusive) - 1); - return result; - } - - double nextGaussian(); - - default int pickInt(int first, int second, int... rest) - { - int offset = nextInt(0, rest.length + 2); - switch (offset) - { - case 0: return first; - case 1: return second; - default: return rest[offset - 2]; - } - } - - default int pickInt(int[] array) - { - return pickInt(array, 0, array.length); - } - - default int pickInt(int[] array, int offset, int length) - { - accord.utilsfork.Invariants.checkIndexInBounds(array.length, offset, length); - if (length == 1) - return array[offset]; - return array[nextInt(offset, offset + length)]; - } - - default long pickLong(long first, long second, long... rest) - { - int offset = nextInt(0, rest.length + 2); - switch (offset) - { - case 0: return first; - case 1: return second; - default: return rest[offset - 2]; - } - } - - default long pickLong(long[] array) - { - return pickLong(array, 0, array.length); - } - - default long pickLong(long[] array, int offset, int length) - { - accord.utilsfork.Invariants.checkIndexInBounds(array.length, offset, length); - if (length == 1) - return array[offset]; - return array[nextInt(offset, offset + length)]; - } - - default T pickOrderedSet(SortedSet set) - { - int offset = nextInt(0, set.size()); - return Iterables.get(set, offset); - } - - default T pickOrderedSet(LinkedHashSet set) - { - int offset = nextInt(0, set.size()); - return Iterables.get(set, offset); - } - - default > T pickOrderedSet(EnumSet set) - { - int offset = nextInt(0, set.size()); - return Iterables.get(set, offset); - } - - default > T pickUnorderedSet(Set set) - { - if (set instanceof SortedSet) - return pickOrderedSet((SortedSet) set); - List values = new ArrayList<>(set); - // Non-ordered sets may have different iteration order on different environments, which would make a seed produce different histories! - // To avoid such a problem, make sure to apply a deterministic function (sort). - values.sort(Comparator.naturalOrder()); - return pick(values); - } - - default T pick(T first, T second, T... rest) - { - int offset = nextInt(0, rest.length + 2); - switch (offset) - { - case 0: return first; - case 1: return second; - default: return rest[offset - 2]; - } - } - - default T pick(T[] array) - { - return array[nextInt(array.length)]; - } - - default T pick(List values) - { - return pick(values, 0, values.size()); - } - - default T pick(List values, int offset, int length) - { - Invariants.checkIndexInBounds(values.size(), offset, length); - if (length == 1) - return values.get(offset); - return values.get(nextInt(offset, offset + length)); - } - - default Supplier randomWeightedPicker(T[] objects) { return Picker.WeightedObjectPicker.randomWeighted(this, objects); } - default Supplier randomWeightedPicker(T[] objects, float[] bias) { return Picker.WeightedObjectPicker.randomWeighted(this, objects, bias); } - default Supplier weightedPicker(T[] objects, float[] proportionalWeights) { return Picker.WeightedObjectPicker.weighted(this, objects, proportionalWeights); } - - void setSeed(long seed); - RandomSource fork(); - - default long reset() - { - long seed = nextLong(); - setSeed(seed); - return seed; - } - - default Random asJdkRandom() - { - return new Random(nextLong()) - { - @Override - public void setSeed(long seed) - { - RandomSource.this.setSeed(seed); - } - - @Override - public void nextBytes(byte[] bytes) - { - RandomSource.this.nextBytes(bytes); - } - - @Override - public int nextInt() - { - return RandomSource.this.nextInt(); - } - - @Override - public int nextInt(int bound) - { - return RandomSource.this.nextInt(bound); - } - - @Override - public long nextLong() - { - return RandomSource.this.nextLong(); - } - - @Override - public boolean nextBoolean() - { - return RandomSource.this.nextBoolean(); - } - - @Override - public float nextFloat() - { - return RandomSource.this.nextFloat(); - } - - @Override - public double nextDouble() - { - return RandomSource.this.nextDouble(); - } - - @Override - public double nextGaussian() - { - return RandomSource.this.nextGaussian(); - } - }; - } -} diff --git a/test/unit/accord/utilsfork/SeedProvider.java b/test/unit/accord/utilsfork/SeedProvider.java deleted file mode 100644 index ded732f42f2b..000000000000 --- a/test/unit/accord/utilsfork/SeedProvider.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork; - -import java.util.concurrent.atomic.AtomicLong; - -/** - * Utility class for creating seeds. This class mostly matches the semantics of {@link java.util.Random} but makes the logic work - * for any random source. This class should be used in replacement of most seed methods, and should always replace {@link java.util.concurrent.ThreadLocalRandom} - * as that randomness will have a bias twords the same seed after a restart (if you rerun randomized tests by restarting - * the JVM you will run with the same seed over and over again). - */ -public class SeedProvider -{ - public static final SeedProvider instance = new SeedProvider(); - private final AtomicLong seedUniquifier = new AtomicLong(8682522807148012L); - - private long seedUniquifier() - { - // L'Ecuyer, "Tables of Linear Congruential Generators of - // Different Sizes and Good Lattice Structure", 1999 - for (; ; ) - { - long current = seedUniquifier.get(); - long next = current * 1181783497276652981L; - if (seedUniquifier.compareAndSet(current, next)) - return next; - } - } - - public long nextSeed() - { - return seedUniquifier() ^ System.nanoTime(); - } -} diff --git a/test/unit/accord/utilsfork/async/TimeoutUtils.java b/test/unit/accord/utilsfork/async/TimeoutUtils.java deleted file mode 100644 index 2008918ac1db..000000000000 --- a/test/unit/accord/utilsfork/async/TimeoutUtils.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork.async; - -import java.time.Duration; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import org.apache.cassandra.utils.concurrent.AsyncPromise; - -public class TimeoutUtils -{ - public interface FailingRunnable - { - void run() throws Throwable; - } - - public static void runBlocking(Duration timeout, String threadName, FailingRunnable fn) throws ExecutionException, InterruptedException, TimeoutException - { - // MAINTENANCE: Once the accord branch merges to trunk this can be dropped and will be AsyncChain again, but since this is forked into C* (that doesn't have AsyncChain) need to use Futures -// AsyncResult.Settable promise = AsyncResults.settable(); - AsyncPromise promise = new AsyncPromise<>(); - Thread t = new Thread(() -> { - try - { - fn.run(); - promise.setSuccess(null); - } - catch (Throwable e) - { - promise.setFailure(e); - } - }); - t.setName(threadName); - t.setDaemon(true); - t.start(); - try - { -// AsyncChains.getBlocking(promise, timeout.toNanos(), TimeUnit.NANOSECONDS); - promise.get(timeout.toNanos(), TimeUnit.NANOSECONDS); - } - catch (InterruptedException e) - { - t.interrupt(); - throw e; - } - catch (TimeoutException e) - { - t.interrupt(); - throw e; - } - } -} diff --git a/test/unit/accord/utilsfork/random/Picker.java b/test/unit/accord/utilsfork/random/Picker.java deleted file mode 100644 index f9584a5f98cf..000000000000 --- a/test/unit/accord/utilsfork/random/Picker.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package accord.utilsfork.random; - -import java.util.Arrays; -import java.util.function.Supplier; - -import accord.utils.Invariants; -import accord.utilsfork.RandomSource; - -public class Picker -{ - public static float[] randomWeights(RandomSource random, int length) - { - float[] weights = new float[length - 1]; - float sum = 0; - for (int i = 0 ; i < weights.length ; ++i) - weights[i] = sum += random.nextFloat(); - sum += random.nextFloat(); - for (int i = 0 ; i < weights.length ; ++i) - weights[i] /= sum; - return weights; - } - - static abstract class Weighted - { - final RandomSource random; - final float[] weights; - - public Weighted(RandomSource random, float[] weights) - { - this.random = random; - this.weights = weights; - } - - - static float[] randomWeights(RandomSource random, float[] bias) - { - float[] weights = new float[bias.length - 1]; - float sum = 0; - for (int i = 0 ; i < weights.length ; ++i) - weights[i] = sum += random.nextFloat() * bias[i]; - sum += random.nextFloat() * bias[weights.length]; - for (int i = 0 ; i < weights.length ; ++i) - weights[i] /= sum; - return weights; - } - - static float[] normaliseWeights(float[] input) - { - float[] output = new float[input.length - 1]; - float sum = 0; - for (int i = 0 ; i < output.length ; ++i) - output[i] = sum += input[i]; - sum += input[output.length]; - for (int i = 0 ; i < output.length ; ++i) - output[i] /= sum; - return output; - } - - int pickIndex() - { - int i = Arrays.binarySearch(weights, random.nextFloat()); - if (i < 0) i = -1 - i; - return i; - } - } - - public static class WeightedObjectPicker extends Weighted implements Supplier - { - final T[] values; - - private WeightedObjectPicker(RandomSource random, T[] values, float[] weights) - { - super(random, weights); - this.values = values; - } - - @Override - public T get() - { - return values[pickIndex()]; - } - - public static WeightedObjectPicker randomWeighted(RandomSource random, T[] values) - { - return new WeightedObjectPicker<>(random, values, Picker.randomWeights(random, values.length)); - } - - public static WeightedObjectPicker randomWeighted(RandomSource random, T[] values, float[] bias) - { - Invariants.checkArgument(values.length == bias.length); - return new WeightedObjectPicker<>(random, values, randomWeights(random, bias)); - } - - public static WeightedObjectPicker weighted(RandomSource random, T[] values, float[] proportionalWeights) - { - Invariants.checkArgument(values.length == proportionalWeights.length); - return new WeightedObjectPicker<>(random, values, normaliseWeights(proportionalWeights)); - } - } -} diff --git a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java index 839488875c77..c7a03dc79cd6 100644 --- a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java +++ b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java @@ -39,8 +39,8 @@ import javax.annotation.Nullable; -import accord.utilsfork.Gens; -import accord.utilsfork.RandomSource; +import accord.utils.Gens; +import accord.utils.RandomSource; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.concurrent.Future; diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 697214f76a4f..058871776c8b 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -79,6 +79,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.auth.IRoleManager", "org.apache.cassandra.config.AccordSpec", "org.apache.cassandra.config.AccordSpec$JournalSpec", + "org.apache.cassandra.config.AccordSpec$MinEpochRetrySpec", "org.apache.cassandra.config.AccordSpec$TransactionalRangeMigration", "org.apache.cassandra.config.CassandraRelevantProperties", "org.apache.cassandra.config.CassandraRelevantProperties$PropertyConverter", diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java index 4c9ec9fdc1f5..857ec85f408b 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java @@ -33,7 +33,7 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.quicktheories.generators.SourceDSL.doubles; import static org.quicktheories.generators.SourceDSL.integers; diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java index 9d0816e6440e..6f9260f022ec 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java @@ -29,7 +29,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.utils.Generators; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.quicktheories.generators.SourceDSL.integers; diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index c47b244d0578..c9e0ced9791a 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -67,8 +67,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.db.virtual.SystemViewsKeyspace; import org.assertj.core.api.Assertions; import org.awaitility.Awaitility; import org.apache.commons.lang3.ArrayUtils; @@ -85,10 +83,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.DefaultRandom; -import accord.utilsfork.Gen; -import accord.utilsfork.Property; -import accord.utilsfork.RandomSource; +import accord.utils.DefaultRandom; +import accord.utils.Gen; +import accord.utils.Property; +import accord.utils.RandomSource; import com.codahale.metrics.Gauge; import com.datastax.driver.core.CloseFuture; import com.datastax.driver.core.Cluster; @@ -131,6 +129,7 @@ import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BooleanType; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ByteType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.CollectionType; @@ -152,6 +151,7 @@ import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.db.marshal.VectorType; +import org.apache.cassandra.db.virtual.SystemViewsKeyspace; import org.apache.cassandra.db.virtual.VirtualKeyspace; import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry; import org.apache.cassandra.db.virtual.VirtualSchemaKeyspace; @@ -191,13 +191,17 @@ import org.apache.cassandra.transport.TlsTestUtils; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.ConfigGenBuilder; import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.Generators; import org.apache.cassandra.utils.JMXServerUtils; import org.apache.cassandra.utils.LazyToString; import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; +import static org.apache.cassandra.utils.CassandraGenerators.regularKeyspace; +import static org.apache.cassandra.utils.CassandraGenerators.regularTable; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -302,6 +306,7 @@ public static final ProtocolVersion getDefaultVersion() private List keyspaces = new ArrayList<>(); private List tables = new ArrayList<>(); + private List indexes = new ArrayList<>(); private List views = new ArrayList<>(); private List types = new ArrayList<>(); private List functions = new ArrayList<>(); @@ -385,6 +390,7 @@ private static void checkProtocolVersion() public static void prepareServer() { ServerTestUtils.prepareServer(); + AccordStateCache.validateLoadOnEvict(true); } public static void cleanup() @@ -442,7 +448,6 @@ public static void setUpClass() // Once per-JVM is enough prepareServer(); - AccordStateCache.validateLoadOnEvict(true); } protected static void prePrepareServer() @@ -508,6 +513,7 @@ public void afterTest() throws Throwable keyspaces = null; tables = null; + indexes = null; views = null; types = null; functions = null; @@ -525,6 +531,27 @@ protected static void addVirtualKeyspace() VirtualKeyspaceRegistry.instance.register(SystemViewsKeyspace.instance); } + protected void clearSchema() + { + ServerTestUtils.resetCMS(); + keyspaces.clear(); + tables.clear(); + indexes.clear();; + views.clear(); + types.clear(); + functions.clear(); + aggregates.clear(); + } + + protected void clearState() + { + clearSchema(); + usePrepared = USE_PREPARED_VALUES; + reusePrepared = REUSE_PREPARED; + + seqNumber.set(0); + } + protected void resetSchema() throws Throwable { for (TableMetadata table : SchemaKeyspace.metadata().tables) @@ -2224,7 +2251,10 @@ public static void assertRows(UntypedResultSet result, Object[]... rows) if (!((cellValidator == null && actualValue == null) || (cellValidator != null && cellValidator.equals(actualValue)))) { Object actualValueDecoded = actualValue == null ? null : column.type.getSerializer().deserialize(actualValue); - if (!Objects.equal(expected != null ? expected[j] : null, actualValueDecoded)) + Object expectedValueDecoded = expected != null ? expected[j] : null; + if (expectedValueDecoded instanceof ByteBuffer && !(actualValueDecoded instanceof ByteBuffer)) + expectedValueDecoded = column.type.getSerializer().deserialize(((ByteBuffer) expectedValueDecoded).duplicate()); + if (!Objects.equal(expectedValueDecoded, actualValueDecoded)) { if (isEmptyContainerNull(column.type, cellValidator != null ? cellValidator.expected() : null, actualValue)) continue; @@ -2654,6 +2684,116 @@ protected void assertUnauthorizedQuery(String errorMessage, String query, Object values); } + protected CassandraGenerators.KeyspaceMetadataBuilder createKeyspaceMetadataBuilder() + { + return regularKeyspace() + .withName(createKeyspaceName()) + .withReplication(new CassandraGenerators.AbstractReplicationStrategyBuilder() + .withUserAllowed() + .withDatacenters("datacenter1") + .withRf(1)); + } + + protected KeyspaceMetadata createKeyspace(RandomSource rs) + { + KeyspaceMetadata metadata = Generators.toGen(createKeyspaceMetadataBuilder().build()).next(rs); + String fullQuery = metadata.toCqlString(false, false, false); + logger.info(fullQuery); + schemaChange(fullQuery); + return metadata; + } + + protected CassandraGenerators.TableMetadataBuilder createTableMetadataBuilder() + { + String ks = currentKeyspace(); + if (ks == null) + ks = KEYSPACE; + return createTableMetadataBuilder(ks); + } + + protected CassandraGenerators.TableMetadataBuilder createTableMetadataBuilder(String ks) + { + return regularTable() + .withKeyspaceName(ks) + .withSimpleColumnNames(); + } + + protected TableMetadata createTable(RandomSource rs) + { + TableMetadata metadata = Generators.toGen(createTableMetadataBuilder().build()).next(rs); + maybeCreateUDTs(metadata); + String fullQuery = metadata.toCqlString(false, false, false); + logger.info(fullQuery); + schemaChange(fullQuery); + return metadata; + } + + protected TableMetadata createTable(RandomSource rs, String keyspace) + { + TableMetadata metadata = Generators.toGen(createTableMetadataBuilder(keyspace).build()).next(rs); + maybeCreateUDTs(metadata); + String fullQuery = metadata.toCqlString(false, false, false); + logger.info(fullQuery); + schemaChange(fullQuery); + return Schema.instance.getTableMetadata(keyspace, metadata.name); + } + + protected void maybeCreateUDTs(TableMetadata metadata) + { + CassandraGenerators.visitUDTs(metadata, next -> { + String cql = next.toCqlString(false, false, true); + logger.warn("Creating UDT {}", cql); + schemaChange(cql); + }); + } + +// protected String createIndexName() +// { +// String name = createSchemaElementName(SchemaElement.SchemaElementType.INDEX, null); +// indexes.add(name); +// return name; +// } +// +// protected UntypedResultSet execute(org.apache.cassandra.cql3.ast.Statement stmt) +// { +// return executeFormattedQuery(stmt.toCQL(), stmt.bindsEncoded()); +// } +// +// protected ResultSet executeNet(ProtocolVersion protocolVersion, org.apache.cassandra.cql3.ast.Statement stmt) +// { +// return sessionNet(protocolVersion).execute(stmt.toCQL(), stmt.bindsEncoded()); +// } +// +// protected Mutation nonTransactionMutation(RandomSource rs, TableMetadata metadata) +// { +// return Generators.toGen(new ASTGenerators.MutationGenBuilder(metadata).withoutTransaction().build()).next(rs); +// } + +// protected Select select(Mutation mutation) +// { +// // select * from table where +// return new Select(Collections.emptyList(), +// Optional.of(new TableReference(mutation.table.keyspace, mutation.table.name)), +// where(mutation.primaryKeys()), +// Optional.empty(), +// Optional.empty()); +// } + +// private Optional where(Map keys) +// { +// if (keys.isEmpty()) +// throw new IllegalArgumentException("Unable to create a where clause from empty keys"); +// Conditional.Builder builder = new Conditional.Builder(); +// for (Map.Entry e : keys.entrySet()) +// builder.where(Where.Inequalities.EQUAL, e.getKey(), e.getValue()); +// return Optional.of(builder.build()); +// } + +// protected Object[][] rows(Mutation mutation) +// { +// return mutation.kind == Mutation.Kind.DELETE ? new Object[0][] : new Object[][]{row(mutation.toRowEncoded())}; +// } + @FunctionalInterface public interface CheckedFunction { @@ -2933,8 +3073,15 @@ private static String formatValue(ByteBuffer bb, AbstractType type) // CollectionType override getString() to use hexToBytes. We can't change that // without breaking SSTable2json, but the serializer for collection have the // right getString so using it directly instead. - TypeSerializer ser = type.getSerializer(); - return ser.toString(ser.deserialize(bb)); + try + { + TypeSerializer ser = type.getSerializer(); + return ser.toString(ser.deserialize(bb)); + } + catch (Throwable t) + { + return "TypeSerializer.toString failed for type " + type.asCQL3Type() + ": " + t.getMessage(); + } } try diff --git a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java index 70e49bc7fec2..c6559ee8513e 100644 --- a/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/PreparedStatementsTest.java @@ -218,6 +218,7 @@ public void testInvalidatePreparedStatementsOnDrop() session.execute(preparedBatch.bind(2, 2, "value2")); session.execute(preparedTxn.bind(3, 3, "value3")); + sessionSchemaUpdate(session, dropTableStatement); // since this is an accord table, need to drop the table before the keyspace sessionSchemaUpdate(session, dropKsStatement); sessionSchemaUpdate(session, createKsStatement); sessionSchemaUpdate(session, createTableStatement); @@ -229,6 +230,7 @@ public void testInvalidatePreparedStatementsOnDrop() session.execute(prepared.bind(1, 1, "value")); session.execute(preparedBatch.bind(2, 2, "value2")); session.execute(preparedTxn.bind(3, 3, "value3")); + sessionSchemaUpdate(session, dropTableStatement); // since this is an accord table, need to drop the table before the keyspace sessionSchemaUpdate(session, dropKsStatement); } @@ -249,6 +251,7 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo Session session = sessionNet(version); String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int) WITH transactional_mode='unsafe';"; String alterTableStatement = "ALTER TABLE " + KEYSPACE + ".qp_cleanup ADD d int;"; + String dropTableStatement = "DROP TABLE IF EXISTS " + KEYSPACE + ".qp_cleanup;"; sessionSchemaUpdate(session, dropKsStatement); sessionSchemaUpdate(session, createKsStatement); @@ -315,6 +318,7 @@ private void testInvalidatePreparedStatementOnAlter(ProtocolVersion version, boo } } + sessionSchemaUpdate(session, dropTableStatement); sessionSchemaUpdate(session, dropKsStatement); } @@ -335,6 +339,7 @@ private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVer Session session = sessionNet(version); String createTableStatement = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + ".qp_cleanup (a int PRIMARY KEY, b int, c int) WITH transactional_mode='unsafe';"; String alterTableStatement = "ALTER TABLE " + KEYSPACE + ".qp_cleanup ADD d int;"; + String dropTableStatement = "DROP TABLE IF EXISTS " + KEYSPACE + ".qp_cleanup;"; sessionSchemaUpdate(session, dropKsStatement); sessionSchemaUpdate(session, createKsStatement); @@ -382,6 +387,7 @@ private void testInvalidatePreparedStatementOnAlterUnchangedMetadata(ProtocolVer Assertions.assertThat(columnNames(rs)).containsExactlyInAnyOrder("a", "b", "c"); } + sessionSchemaUpdate(session, dropTableStatement); sessionSchemaUpdate(session, dropKsStatement); } diff --git a/test/unit/org/apache/cassandra/cql3/RandomSchemaTest.java b/test/unit/org/apache/cassandra/cql3/RandomSchemaTest.java index 00abdc9ca166..075fbf8e91df 100644 --- a/test/unit/org/apache/cassandra/cql3/RandomSchemaTest.java +++ b/test/unit/org/apache/cassandra/cql3/RandomSchemaTest.java @@ -20,27 +20,18 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.Deque; -import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeMap; import java.util.stream.Collectors; import com.google.common.collect.ImmutableList; import org.junit.Assert; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.io.sstable.format.SSTableFormat; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -65,8 +56,6 @@ public class RandomSchemaTest extends CQLTester.InMemory { - private static final Logger logger = LoggerFactory.getLogger(RandomSchemaTest.class); - static { // make sure blob is always the same @@ -80,20 +69,17 @@ public void test() { // in accord branch there is a much cleaner api for this pattern... Gen domainGen = SourceDSL.integers().between(1, 100).map(i -> i < 2 ? AbstractTypeGenerators.ValueDomain.NULL : i < 4 ? AbstractTypeGenerators.ValueDomain.EMPTY_BYTES : AbstractTypeGenerators.ValueDomain.NORMAL); - // make sure ordering is determanstic, else repeatability breaks - NavigableMap> formats = new TreeMap<>(DatabaseDescriptor.getSSTableFormats()); - Gen> ssTableFormatGen = SourceDSL.arbitrary().pick(new ArrayList<>(formats.values())); + + Gen> sstableFormatGen = CassandraGenerators.sstableFormat(); qt().checkAssert(random -> { resetSchema(); // TODO : when table level override of sstable format is allowed, migrate to that - SSTableFormat sstableFormat = ssTableFormatGen.generate(random); - DatabaseDescriptor.setSelectedSSTableFormat(sstableFormat); + DatabaseDescriptor.setSelectedSSTableFormat(sstableFormatGen.generate(random)); Gen udtName = Generators.unique(IDENTIFIER_GEN); TypeGenBuilder withoutUnsafeEquality = AbstractTypeGenerators.withoutUnsafeEquality() - .withUserTypeKeyspace(KEYSPACE) .withUDTNames(udtName); TableMetadata metadata = new TableMetadataBuilder() .withKeyspaceName(KEYSPACE) @@ -101,7 +87,6 @@ public void test() .withKnownMemtables() .withDefaultTypeGen(AbstractTypeGenerators.builder() .withoutEmpty() - .withUserTypeKeyspace(KEYSPACE) .withMaxDepth(2) .withDefaultSetKey(withoutUnsafeEquality) .withoutTypeKinds(AbstractTypeGenerators.TypeKind.COUNTER) @@ -187,36 +172,6 @@ private void serde(ClusterMetadata metadata, TableMetadata tableMetadata) throws } } - private void maybeCreateUDTs(TableMetadata metadata) - { - Set udts = CassandraGenerators.extractUDTs(metadata); - if (!udts.isEmpty()) - { - Deque pending = new ArrayDeque<>(udts); - Set created = new HashSet<>(); - while (!pending.isEmpty()) - { - UserType next = pending.poll(); - Set subTypes = AbstractTypeGenerators.extractUDTs(next); - subTypes.remove(next); // it includes self - if (subTypes.isEmpty() || subTypes.stream().allMatch(t -> created.contains(t.name))) - { - String cql = next.toCqlString(true, false, false); - logger.warn("Creating UDT {}", cql); - schemaChange(cql); - created.add(next.name); - } - else - { - logger.warn("Unable to create UDT {}; following sub-types still not created: {}", - next.getCqlTypeName(), - subTypes.stream().filter(t -> !created.contains(t.name)).collect(Collectors.toSet())); - pending.add(next); - } - } - } - } - private static int primaryColumnCount(TableMetadata metadata) { return metadata.partitionKeyColumns().size() + metadata.clusteringColumns().size(); diff --git a/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java b/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java index fe4f73efe086..a2805e85690c 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java +++ b/test/unit/org/apache/cassandra/cql3/ast/ExpressionTest.java @@ -20,14 +20,14 @@ import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.cql3.ast.Conditional.And; import org.apache.cassandra.cql3.ast.Conditional.Where; import org.apache.cassandra.db.marshal.Int32Type; import org.assertj.core.api.Assertions; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class ExpressionTest { diff --git a/test/unit/org/apache/cassandra/cql3/ast/Select.java b/test/unit/org/apache/cassandra/cql3/ast/Select.java index 10d98dee6305..eea9ded9ad0b 100644 --- a/test/unit/org/apache/cassandra/cql3/ast/Select.java +++ b/test/unit/org/apache/cassandra/cql3/ast/Select.java @@ -170,11 +170,11 @@ public void toCQL(StringBuilder sb, CQLFormatter formatter) public Stream stream() { List es = new ArrayList<>(selections.size() - + (source.isPresent() ? 1 : 0) - + (where.isPresent() ? 1 : 0) - + (orderBy.isPresent() ? 1 : 0) - + (perPartitionLimit.isPresent() ? 1 : 0) - + (limit.isPresent() ? 1 : 0)); + + (source.isPresent() ? 1 : 0) + + (where.isPresent() ? 1 : 0) + + (orderBy.isPresent() ? 1 : 0) + + (perPartitionLimit.isPresent() ? 1 : 0) + + (limit.isPresent() ? 1 : 0)); es.addAll(selections); if (source.isPresent()) es.add(source.get()); @@ -445,11 +445,11 @@ public T limit(int limit) public Select build() { return new Select((selections == null || selections.isEmpty()) ? Collections.emptyList() : ImmutableList.copyOf(selections), - source, - where.isEmpty() ? Optional.empty() : Optional.of(where.build()), - orderBy.isEmpty() ? Optional.empty() : Optional.of(orderBy.build()), - perPartitionLimit, limit, - allowFiltering); + source, + where.isEmpty() ? Optional.empty() : Optional.of(where.build()), + orderBy.isEmpty() ? Optional.empty() : Optional.of(orderBy.build()), + perPartitionLimit, limit, + allowFiltering); } } diff --git a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java index 4d72bae35c3e..368687b5afb1 100644 --- a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java +++ b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java @@ -20,9 +20,9 @@ import java.nio.ByteBuffer; import java.util.*; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; import org.apache.cassandra.cql3.terms.*; import org.junit.Assert; import org.junit.Test; @@ -58,7 +58,7 @@ import org.mockito.Mockito; import org.quicktheories.generators.SourceDSL; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; diff --git a/test/unit/org/apache/cassandra/gms/VersionedValueTest.java b/test/unit/org/apache/cassandra/gms/VersionedValueTest.java index 42728040ecf4..af0177e3573d 100644 --- a/test/unit/org/apache/cassandra/gms/VersionedValueTest.java +++ b/test/unit/org/apache/cassandra/gms/VersionedValueTest.java @@ -20,7 +20,7 @@ import org.junit.Test; -import accord.utilsfork.Gen; +import accord.utils.Gen; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializers; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -28,7 +28,7 @@ import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.Generators; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class VersionedValueTest { diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java index 1663fbe5c3d8..fae644f3899f 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java @@ -24,8 +24,8 @@ import java.util.TreeMap; import javax.annotation.Nullable; -import accord.utilsfork.Gen; -import accord.utilsfork.Property; +import accord.utils.Gen; +import accord.utils.Property; import org.agrona.collections.IntArrayList; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.UntypedResultSet; @@ -33,7 +33,7 @@ import org.apache.cassandra.index.sai.SAITester; import org.assertj.core.api.Assertions; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public abstract class AbstractSimpleEqTestBase extends SAITester { diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java index 9cca64d3d575..91bff0de97bd 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java @@ -28,8 +28,8 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.cql3.CQL3Type; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.DecimalType; diff --git a/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java b/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java index e1a42552d1fb..af4b458fecc4 100644 --- a/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java +++ b/test/unit/org/apache/cassandra/io/util/CompressedChunkReaderTest.java @@ -18,8 +18,8 @@ package org.apache.cassandra.io.util; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ClusteringComparator; import org.apache.cassandra.io.compress.CompressedSequentialWriter; @@ -36,7 +36,7 @@ import java.nio.file.Files; import java.util.concurrent.atomic.AtomicInteger; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class CompressedChunkReaderTest { diff --git a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java index e8fcf286a62e..9d2fa5de21e2 100644 --- a/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java +++ b/test/unit/org/apache/cassandra/net/MessageDeliveryTest.java @@ -30,7 +30,7 @@ import org.junit.Assert; import org.junit.Test; -import accord.utilsfork.RandomSource; +import accord.utils.RandomSource; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.concurrent.SimulatedExecutorFactory; import org.apache.cassandra.config.DatabaseDescriptor; @@ -46,7 +46,7 @@ import org.apache.cassandra.utils.Backoff; import org.mockito.Mockito; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.assertj.core.api.Assertions.assertThat; public class MessageDeliveryTest diff --git a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java index dd9472a91189..2f5014abd8a8 100644 --- a/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java +++ b/test/unit/org/apache/cassandra/net/SimulatedMessageDelivery.java @@ -29,8 +29,8 @@ import java.util.function.LongSupplier; import javax.annotation.Nullable; -import accord.utilsfork.Gens; -import accord.utilsfork.RandomSource; +import accord.utils.Gens; +import accord.utils.RandomSource; import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.locator.InetAddressAndPort; diff --git a/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java b/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java index e4791f5a54f9..4eba7f0e36d5 100644 --- a/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/ConcurrentIrWithPreviewFuzzTest.java @@ -24,8 +24,8 @@ import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.db.ColumnFamilyStore; @@ -35,7 +35,7 @@ import org.apache.cassandra.utils.FailingBiConsumer; import org.assertj.core.api.Assertions; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class ConcurrentIrWithPreviewFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FailedAckTest.java b/test/unit/org/apache/cassandra/repair/FailedAckTest.java index f96bf3732db9..c77a812f92ef 100644 --- a/test/unit/org/apache/cassandra/repair/FailedAckTest.java +++ b/test/unit/org/apache/cassandra/repair/FailedAckTest.java @@ -23,8 +23,8 @@ import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.db.compaction.ICompactionManager; @@ -39,7 +39,7 @@ import org.assertj.core.api.Assertions; import org.mockito.Mockito; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class FailedAckTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java index 7e58fd13b57e..ccce173d4486 100644 --- a/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/FailingRepairFuzzTest.java @@ -29,8 +29,8 @@ import com.google.common.collect.ImmutableList; import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.locator.InetAddressAndPort; @@ -42,7 +42,7 @@ import org.assertj.core.api.AbstractStringAssert; import org.assertj.core.api.Assertions; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class FailingRepairFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 1665ef80c6b2..ad814f0032d3 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -53,10 +53,10 @@ import org.junit.BeforeClass; -import accord.utilsfork.DefaultRandom; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.RandomSource; +import accord.utils.DefaultRandom; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; import org.agrona.collections.LongHashSet; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.concurrent.ExecutorBuilder; diff --git a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java index 980942193114..f8e570b5c103 100644 --- a/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/HappyPathFuzzTest.java @@ -28,14 +28,14 @@ import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.agrona.collections.LongArrayList; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.utils.Closeable; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class HappyPathFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java b/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java index 6160532e8fe9..03c151ec683e 100644 --- a/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java +++ b/test/unit/org/apache/cassandra/repair/SlowMessageFuzzTest.java @@ -23,13 +23,13 @@ import org.junit.Test; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.RetrySpec; import org.apache.cassandra.utils.Closeable; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class SlowMessageFuzzTest extends FuzzTestBase { diff --git a/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java b/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java index acd0e0f8f822..e34d299a1505 100644 --- a/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java +++ b/test/unit/org/apache/cassandra/schema/MemtableParamsTest.java @@ -21,7 +21,7 @@ import java.util.LinkedHashMap; import java.util.Map; -import accord.utilsfork.Gen; +import accord.utils.Gen; import com.google.common.collect.ImmutableMap; import org.apache.cassandra.config.Config; import org.apache.cassandra.utils.ConfigGenBuilder; @@ -33,7 +33,7 @@ import org.apache.cassandra.db.memtable.SkipListMemtableFactory; import org.apache.cassandra.exceptions.ConfigurationException; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.apache.cassandra.config.YamlConfigurationLoader.fromMap; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; diff --git a/test/unit/org/apache/cassandra/schema/TableParamsTest.java b/test/unit/org/apache/cassandra/schema/TableParamsTest.java new file mode 100644 index 000000000000..e8bf30fe0aaa --- /dev/null +++ b/test/unit/org/apache/cassandra/schema/TableParamsTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.schema; + +import org.junit.Test; + +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.utils.CassandraGenerators.TableParamsBuilder; +import org.apache.cassandra.utils.FailingConsumer; +import org.quicktheories.core.Gen; + +import static org.quicktheories.QuickTheory.qt; + + +public class TableParamsTest +{ + @Test + public void serdeLatest() + { + DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(tableParams()).checkAssert(FailingConsumer.orFail(params -> { + AsymmetricMetadataSerializers.testSerde(output, TableParams.serializer, params, NodeVersion.CURRENT_METADATA_VERSION); + })); + } + + private static Gen tableParams() + { + return new TableParamsBuilder() + .withKnownMemtables() + .withTransactionalMode() + .withFastPathStrategy() + .build(); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 02d823cfd453..3939371ad93b 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -135,7 +135,7 @@ public void serde() public void findOverlappingKeys() { var tableIdGen = fromQT(CassandraGenerators.TABLE_ID_GEN); - var partitionGen = fromQT(CassandraGenerators.partitioners()); + var partitionGen = fromQT(CassandraGenerators.partitioners()).map(CassandraGenerators::simplify); var sstableFormats = DatabaseDescriptor.getSSTableFormats(); List sstableFormatNames = new ArrayList<>(sstableFormats.keySet()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java index 83eced9b6628..bce9a3fb1447 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStaleReplicasTest.java @@ -47,7 +47,7 @@ public void serde() qt().check(rs -> { Epoch epoch = epochGen.next(rs); Set nodes = nodesGen.next(rs); - AsymmetricMetadataSerializers.testSerde(buffer, AccordStaleReplicas.serializer, new AccordStaleReplicas(nodes, epoch), Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordStaleReplicas.serializer, new AccordStaleReplicas(nodes, epoch), Version.MIN_ACCORD_VERSION); }); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 12d5c75c01de..5969770e2e18 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -58,7 +58,6 @@ import org.apache.cassandra.concurrent.AdaptingScheduledExecutorPlus; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.gms.EndpointState; import org.apache.cassandra.gms.Gossiper; @@ -70,8 +69,8 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.RequestCallback; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.StubClusterMetadataService; +import org.apache.cassandra.tcm.ValidatingClusterMetadataService; +import org.apache.cassandra.tcm.serialization.Version; import org.apache.cassandra.utils.AccordGenerators; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.concurrent.Future; @@ -86,9 +85,7 @@ public class AccordSyncPropagatorTest public static void setup() throws NoSuchFieldException, IllegalAccessException { DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); - ClusterMetadataService.unsetInstance(); - ClusterMetadataService.setInstance(StubClusterMetadataService.forTesting()); + ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); } @Test diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index 55aa527e9465..f5faef62c93f 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -20,7 +20,6 @@ import java.net.UnknownHostException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -28,6 +27,7 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableSet; @@ -38,7 +38,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; -import java.util.function.Consumer; +import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.LongStream; @@ -49,8 +49,8 @@ import accord.api.ConfigurationService; import accord.api.ConfigurationService.EpochReady; -import accord.api.Scheduler; import accord.api.LocalConfig; +import accord.api.Scheduler; import accord.impl.SizeOfIntersectionSorter; import accord.impl.TestAgent; import accord.local.Node; @@ -60,7 +60,7 @@ import accord.topology.TopologyManager; import accord.utils.Gen; import accord.utils.Invariants; -import accord.utils.Property.UnitCommand; +import accord.utils.Property.SimpleCommand; import accord.utils.RandomSource; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; @@ -74,17 +74,14 @@ import org.apache.cassandra.gms.IFailureDetectionEventListener; import org.apache.cassandra.gms.IFailureDetector; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.Replica; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.SimulatedMessageDelivery; import org.apache.cassandra.net.SimulatedMessageDelivery.Action; import org.apache.cassandra.net.Verb; -import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; @@ -94,13 +91,19 @@ import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.MultiStepOperation; import org.apache.cassandra.tcm.StubClusterMetadataService; -import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.ValidatingClusterMetadataService; import org.apache.cassandra.tcm.membership.Location; import org.apache.cassandra.tcm.membership.NodeAddresses; import org.apache.cassandra.tcm.membership.NodeId; -import org.apache.cassandra.tcm.ownership.DataPlacement; -import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.tcm.membership.NodeState; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.UniformRangePlacement; +import org.apache.cassandra.tcm.sequences.LeaveStreams; +import org.apache.cassandra.tcm.transformations.PrepareJoin; +import org.apache.cassandra.tcm.transformations.PrepareLeave; import org.apache.cassandra.utils.ByteArrayUtil; import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; @@ -123,85 +126,75 @@ public class EpochSyncTest @Test public void test() { - stateful().withExamples(50).check(commands(() -> Cluster::new) - .destroyState(cluster -> { - cluster.processAll(); - cluster.validate(true); - }) - .addIf(cluster -> cluster.alive().size() <= cluster.maxNodes, EpochSyncTest::addNode) - .addIf(cluster -> cluster.alive().size() > cluster.minNodes, EpochSyncTest::removeNode) - .addIf(cluster -> cluster.hasWork(), EpochSyncTest::processSome) - .add(rs -> new SimpleCommand("Validate", c -> c.validate(false))) - .add((rs, cluster) -> new SimpleCommand("Bump Epoch " + (cluster.current.epoch.getEpoch() + 1), Cluster::bumpEpoch)) - .build()); + stateful().withExamples(50).withSteps(500).check(commands(() -> Cluster::new) + .destroyState(cluster -> { + finishPendingWork(cluster); + cluster.processAll(); + cluster.validate(true); + }) + .addAllIf(Cluster::hasPendingWork, b -> + b.addIf(c -> !c.status(s -> s == Cluster.Status.Registered).isEmpty(), (rs, state) -> { + long epoch = state.cms.metadata().epoch.getEpoch() + 1; + Node.Id pick = rs.pick(state.status(s -> s == Cluster.Status.Registered)); + return new SimpleCommand<>(pick + " Start Joining; epoch=" + epoch, + c -> c.increment(pick)); + }) + .addIf(c -> !c.cms.metadata().inProgressSequences.isEmpty(), + (rs, state) -> new SimpleCommand<>("Next Epoch Step; epoch=" + (state.cms.metadata().epoch.getEpoch() + 1), + Cluster::incrementInProgressSequences)) + ) + .addAllIf(Cluster::hasNoPendingWork, b -> + b.addIf(cluster -> cluster.joined().size() <= cluster.maxNodes, EpochSyncTest::addNode) + .addIf(cluster -> cluster.joined().size() > cluster.minNodes, EpochSyncTest::removeNode) + ) + .addIf(Cluster::hasWork, EpochSyncTest::processSome) + .add(rs -> new SimpleCommand<>("Validate", c -> c.validate(false))) + .add((rs, cluster) -> new SimpleCommand<>("Bump Epoch " + (cluster.cms.metadata().epoch.getEpoch() + 1), Cluster::bumpEpoch)) + .build()); + } + + private static void finishPendingWork(Cluster cluster) + { + List registered = cluster.status(s -> s == Cluster.Status.Registered); + if (!registered.isEmpty()) + registered.forEach(cluster::increment); + while (!cluster.cms.metadata().inProgressSequences.isEmpty()) + cluster.incrementInProgressSequences(); } - private static SimpleCommand addNode(RandomSource rs, Cluster cluster) + private static SimpleCommand addNode(RandomSource rs, Cluster cluster) { Node.Id id = new Node.Id(++cluster.nodeCounter); long token = cluster.tokenGen.nextLong(rs); while (cluster.tokens.contains(token)) token = cluster.tokenGen.nextLong(rs); - long epoch = cluster.current.epoch.getEpoch() + 1; + long epoch = cluster.cms.metadata().epoch.getEpoch() + 1; long finalToken = token; - return new SimpleCommand("Add Node " + id + "; token=" + token + ", epoch=" + epoch, - c -> c.addNode(id, finalToken)); + return new SimpleCommand<>("Start Node " + id + "; token=" + token + ", epoch=" + epoch, + c -> c.registerNode(id, finalToken)); } - private static SimpleCommand removeNode(RandomSource rs, Cluster cluster) + private static SimpleCommand removeNode(RandomSource rs, Cluster cluster) { - List alive = cluster.alive(); + List alive = cluster.joined(); Node.Id pick = rs.pick(alive); long token = cluster.instances.get(pick).token; - long epoch = cluster.current.epoch.getEpoch() + 1; - return new SimpleCommand("Remove Node " + pick + "; token=" + token + "; epoch=" + epoch, c -> c.removeNode(pick)); + long epoch = cluster.cms.metadata().epoch.getEpoch() + 1; + return new SimpleCommand<>("Remove Node " + pick + "; token=" + token + "; epoch=" + epoch, c -> c.removeNode(pick)); } - private static SimpleCommand processSome(RandomSource rs) - { - return new SimpleCommand("Process Some", - c -> {//noinspection StatementWithEmptyBody - for (int i = 0, attempts = rs.nextInt(1, 100); i < attempts && c.processOne(); i++) - { - } - }); - } - - private static class SimpleCommand implements UnitCommand - { - private final String name; - private final Consumer fn; - - private SimpleCommand(String name, Consumer fn) - { - this.name = name; - this.fn = fn; - } - - @Override - public String detailed(Cluster Cluster) - { - return name; - } - - @Override - public void applyUnit(Cluster Cluster) - { - fn.accept(Cluster); - } - - @Override - public void runUnit(Void Void) - { - - } + private static SimpleCommand processSome(RandomSource rs) { + return new SimpleCommand<>("Process Some", + c -> {//noinspection StatementWithEmptyBody + for (int i = 0, attempts = rs.nextInt(1, 100); i < attempts && c.processOne(); i++) { + } + }); } private static class Cluster { private static final int rf = 2; private static final ReplicationParams replication_params = ReplicationParams.simple(rf); - private static final ReplicationParams meta = ReplicationParams.simpleMeta(1, Collections.singleton("dc1")); private final RandomSource rs; private final int minNodes, maxNodes; @@ -213,16 +206,13 @@ private static class Cluster private final SimulatedExecutorFactory globalExecutor; private final ScheduledExecutorPlus scheduler; private int nodeCounter = 0; - private ClusterMetadata current = new ClusterMetadata(Murmur3Partitioner.instance, Directory.EMPTY, - new DistributedSchema(Keyspaces.of( - DistributedMetadataLogKeyspace.initialMetadata(Collections.singleton("dc1")), - KeyspaceMetadata.create("test", KeyspaceParams.simple(rf), Tables.of(TableMetadata.minimal("test", "tb1").unbuild().params(TableParams.builder().transactionalMode(TransactionalMode.full).build()).build()))))); + private final ValidatingClusterMetadataService cms = ValidatingClusterMetadataService.createAndRegister(NodeVersion.CURRENT_METADATA_VERSION); private final IFailureDetector fd = new IFailureDetector() { @Override public boolean isAlive(InetAddressAndPort ep) { - return !removed.contains(nodeId(ep)); + return instances.get(nodeId(ep)).status != Status.Removed; } @Override @@ -262,47 +252,16 @@ public void unregisterFailureDetectionEventListener(IFailureDetectionEventListen } }; - private static InetAddressAndPort address(Node.Id id) - { - try - { - return InetAddressAndPort.getByAddress(ByteArrayUtil.bytes(id.id)); - } - catch (UnknownHostException e) - { - throw new AssertionError("Unable to create address for id " + id, e); - } - } - - public enum EpochTracker { topologyManager, accordSyncPropagator, configurationService} - - Set globalSynced(long epoch) - { - return alive().stream() - .filter(n -> instances.get(n).epoch.getEpoch() <= epoch) - .map(n -> instances.get(n).synced(epoch)) - .reduce(EnumSet.allOf(EpochTracker.class), Sets::intersection); - } - - boolean allSynced(long epoch) - { - Set done = globalSynced(epoch); - return done.contains(EpochTracker.topologyManager); - } - - private static Node.Id nodeId(InetAddressAndPort address) - { - return new Node.Id(ByteArrayUtil.getInt(address.addressBytes)); - } - public Cluster(RandomSource rs) { + // add the test keyspace + createTestKeyspaceAndTable(); this.rs = rs; this.minNodes = 3; this.maxNodes = 10; this.tokenGen = rs2 -> rs2.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); - this.globalExecutor = new SimulatedExecutorFactory(accord.utilsfork.RandomSource.wrap(rs.asJdkRandom()), failures::add); + this.globalExecutor = new SimulatedExecutorFactory(rs, failures::add); this.scheduler = globalExecutor.scheduled("ignored"); Stage.MISC.unsafeSetExecutor(scheduler); @@ -321,7 +280,7 @@ public Cluster(RandomSource rs) else { // add partition - List alive = alive(); + List alive = notRemoved(); InetAddressAndPort a = address(rs.pick(alive)); InetAddressAndPort b = address(rs.pick(alive)); while (a.equals(b)) @@ -331,15 +290,110 @@ public Cluster(RandomSource rs) }, 1, 1, TimeUnit.MINUTES); } + private static InetAddressAndPort address(Node.Id id) + { + try + { + return InetAddressAndPort.getByAddress(ByteArrayUtil.bytes(id.id)); + } + catch (UnknownHostException e) + { + throw new AssertionError("Unable to create address for id " + id, e); + } + } + + private boolean hasPendingWork() + { + return !status(s -> s == Cluster.Status.Registered).isEmpty() + || !cms.metadata().inProgressSequences.isEmpty(); + } + + private boolean hasNoPendingWork() + { + return !hasPendingWork(); + } + + private Transformation.Success process(Transformation transformation) + { + Transformation.Result result = transformation.execute(cms.metadata()); + if (result.isRejected()) + throw new IllegalStateException("Unable to make TCM transition: " + result.rejected()); + return result.success(); + } + + private Transformation.Success process(MultiStepOperation transformation) + { + Transformation.Result result = transformation.applyTo(cms.metadata()); + if (result.isRejected()) + throw new IllegalStateException("Unable to make TCM transition"); + return result.success(); + } + + public void incrementInProgressSequences() + { + if (cms.metadata().inProgressSequences.isEmpty()) + throw new IllegalStateException("Attempted to bump epoch when nothing was pending"); + Iterator> it = cms.metadata().inProgressSequences.iterator(); + Invariants.checkState(it.hasNext()); + notify(process(it.next()).metadata); + } + + private static boolean left(ClusterMetadata metadata, Node.Id id) + { + return metadata.directory.peerState(new NodeId(id.id)) == NodeState.LEFT; + } + + private static boolean joined(ClusterMetadata metadata, Node.Id id) + { + NodeAddresses address = metadata.directory.getNodeAddresses(new NodeId(id.id)); + return metadata.placements.get(replication_params).reads.byEndpoint().keySet().contains(address.broadcastAddress); + } + + public enum EpochTracker { topologyManager, accordSyncPropagator, configurationService} + + Set globalSynced(long epoch) + { + return notRemoved().stream() + .filter(n -> instances.get(n).epoch.getEpoch() <= epoch) + .map(n -> instances.get(n).synced(epoch)) + .reduce(EnumSet.allOf(EpochTracker.class), Sets::intersection); + } + + boolean allSynced(long epoch) + { + Set done = globalSynced(epoch); + return done.contains(EpochTracker.topologyManager); + } + + private static Node.Id nodeId(InetAddressAndPort address) + { + return new Node.Id(ByteArrayUtil.getInt(address.addressBytes)); + } + + private void createTestKeyspaceAndTable() + { + ClusterMetadata current = cms.metadata(); + Tables tables = Tables.of(TableMetadata.minimal("test", "tb1").unbuild() + .partitioner(Murmur3Partitioner.instance) + .params(TableParams.builder().transactionalMode(TransactionalMode.full).build()) + .build()); + KeyspaceMetadata ks = KeyspaceMetadata.create("test", KeyspaceParams.simple(rf), tables); + + cms.setMetadata(current.transformer() + .with(new DistributedSchema(current.schema.getKeyspaces().with(ks))) + .build() + .metadata); + } + void validate(boolean isDone) { - for (Node.Id id : alive()) + for (Node.Id id : notRemoved()) { Instance inst = instances.get(id); if (removed.contains(id)) continue; // ignore removed nodes AccordConfigurationService conf = inst.config; TopologyManager tm = inst.topology; - for (long epoch = inst.epoch.getEpoch(); epoch <= current.epoch.getEpoch(); epoch++) + for (long epoch = inst.epoch.getEpoch(); epoch <= cms.metadata().epoch.getEpoch(); epoch++) { // validate config EpochSnapshot snapshot = conf.getEpochSnapshot(epoch); @@ -352,7 +406,9 @@ void validate(boolean isDone) Assertions.assertThat(tm.hasEpoch(epoch)).describedAs("node%s does not have epoch %d", id, epoch).isTrue(); Ranges ranges = tm.globalForEpoch(epoch).ranges().mergeTouching(); Ranges actual = tm.syncComplete(epoch).mergeTouching(); - Assertions.assertThat(actual).describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s", id, epoch, ranges.without(actual)).isEqualTo(ranges); + Assertions.assertThat(actual) + .describedAs("node%s does not have all expected sync ranges for epoch %d; missing %s", id, epoch, ranges.without(actual)) + .isEqualTo(ranges); } else { @@ -379,13 +435,32 @@ void validate(boolean isDone) String displayTopology() { - List alive = alive(); - List> withToken = new ArrayList<>(alive.size()); - for (Node.Id n : alive) - withToken.add(Pair.create(n, instances.get(n).token)); - withToken.sort(Comparator.comparing(a -> a.right)); + class Hold { + final Cluster.Status status; + final long token; + + Hold(Status status, long token) + { + this.status = status; + this.token = token; + } + + @Override + public String toString() + { + return status + "\t" + (status == Status.Registered ? "?" : Long.toString(token)); + } + } + List notRemoved = notRemoved(); + List> list = new ArrayList<>(notRemoved.size()); + for (Node.Id n : notRemoved) + { + Instance instance = instances.get(n); + list.add(Pair.create(n, new Hold(instance.status, instance.token))); + } + list.sort(Comparator.comparing(a -> a.right.token)); StringBuilder sb = new StringBuilder(); - for (var p : withToken) + for (var p : list) sb.append(p.left).append('\t').append(p.right).append('\n'); return sb.toString(); } @@ -427,7 +502,24 @@ public void checkFailures() throw error; } - List alive() + List joined() + { + return status(s -> s == Status.Joined); + } + + List status(Predicate fn) + { + List ids = new ArrayList<>(instances.size()); + for (Instance i : instances.values()) + { + if (fn.test(i.status)) + ids.add(i.id); + } + ids.sort(Comparator.naturalOrder()); + return ids; + } + + List notRemoved() { ArrayList ids = new ArrayList<>(Sets.difference(instances.keySet(), removed)); ids.sort(Comparator.naturalOrder()); @@ -459,73 +551,74 @@ private SimulatedMessageDelivery createMessaging(Node.Id id) return rs.nextBoolean() ? Action.DELIVER_WITH_FAILURE : Action.FAILURE; return Action.DELIVER; }, - SimulatedMessageDelivery.randomDelay(accord.utilsfork.RandomSource.wrap(rs.asJdkRandom())), + SimulatedMessageDelivery.randomDelay(rs.fork()), (to, msg) -> instances.get(nodeId(to)).reciver.recieve(msg), - (action, to, msg) -> logger.warn("{} message {}", action, msg), + (action, to, msg) -> logger.trace("{} message {}", action, msg), scheduler::schedule, failures::add); } - void addNode(Node.Id id, long token) + void registerNode(Node.Id id, long token) { Invariants.checkState(!tokens.contains(token), "Attempted to add token %d for node %s but token is already taken", token, id); - Epoch epoch = Epoch.create(current.epoch.getEpoch() + 1); + Invariants.checkState(!instances.containsKey(id), "Attempted to add node %s; but already exists", id); - Instance instance = new Instance(id, token, epoch, createMessaging(id), fd); + ClusterMetadata.Transformer builder = cms.metadata().transformer(); + + Instance instance = new Instance(id, token, builder.epoch(), createMessaging(id), fd); instances.put(id, instance); tokens.add(token); - current = current.forceEpoch(epoch) - .withPlacements(DataPlacements.builder(2) - .with(meta, DataPlacement.empty()) - .with(replication_params, rebuildPlacements(epoch)) - .build()) - .withDirectory(current.directory.with(new NodeAddresses(address(id)), new Location("dc1", "r1"))); - notify(current); + builder.register(new NodeAddresses(address(id)), new Location("dc1", "r1"), NodeVersion.CURRENT); + notify(builder.build().metadata); } - void removeNode(Node.Id pick) + void increment(Node.Id pick) { Instance inst = Objects.requireNonNull(instances.get(pick), "Unknown id " + pick); - Invariants.checkState(!removed.contains(pick), "Can not remove node twice; node " + pick); - tokens.remove(inst.token); - removed.add(pick); - inst.stop(); - current = current.forceEpoch(Epoch.create(current.epoch.getEpoch() + 1)) - .withDirectory(current.directory.without(new NodeId(pick.id))); - current = current.withPlacements(DataPlacements.builder(2) - .with(meta, DataPlacement.empty()) - .with(replication_params, rebuildPlacements(current.epoch)) - .build()); - notify(current); + switch (inst.status) + { + case Init: + case Joined: + case Removed: + throw new IllegalStateException("Unexpected status: " + inst.status); + case Registered: + inst.status = Status.Joining; + PrepareJoin task = new PrepareJoin(new NodeId(pick.id), Collections.singleton(new LongToken(inst.token)), new UniformRangePlacement(), true, false); + notify(process(task).metadata); + break; + default: + throw new UnsupportedOperationException("Unknown status: " + inst.status); + } } - private DataPlacement rebuildPlacements(Epoch epoch) + void removeNode(Node.Id pick) { - DataPlacement.Builder builder = DataPlacement.builder(); - for (Node.Id inst : alive()) - for (Replica replica : instances.get(inst).replica()) - builder.withReadReplica(epoch, replica).withWriteReplica(epoch, replica); - return builder.build(); + Instance inst = Objects.requireNonNull(instances.get(pick), "Unknown id " + pick); + Invariants.checkState(!removed.contains(pick), "Can not remove node twice; node " + pick); + removed.add(pick); + inst.status = Status.Leaving; + PrepareLeave prepareLeave = new PrepareLeave(new NodeId(pick.id), false, new UniformRangePlacement(), LeaveStreams.Kind.REMOVENODE); + notify(process(prepareLeave).metadata); } void bumpEpoch() { - current = current.forceEpoch(Epoch.create(current.epoch.getEpoch() + 1)); - notify(current); + notify(cms.metadata().forceEpoch(Epoch.create(cms.metadata().epoch.getEpoch() + 1))); } private void notify(ClusterMetadata current) { - Ranges ranges = AccordTopology.createAccordTopology(current).ranges().mergeTouching(); - if (!current.directory.isEmpty()) + Topology t = AccordTopology.createAccordTopology(current); + Ranges ranges = t.ranges().mergeTouching(); + if (!current.placements.get(replication_params).reads.isEmpty()) Assertions.assertThat(ranges).hasSize(1); - ((StubClusterMetadataService) ClusterMetadataService.instance()).setMetadata(current); - for (Node.Id id : alive()) + cms.setMetadata(current); + for (Node.Id id : status(s -> s != Status.Removed)) { Instance inst = instances.get(id); - inst.maybeStart(); + inst.maybeTransition(current, t); inst.config.maybeReportMetadata(current); } } @@ -555,7 +648,7 @@ protected void start(BiConsumer callback) }; } - private enum Status { Init, Started} + private enum Status { Init, Registered, Joining, Joined, Leaving, Removed} private class Instance { private final Node.Id id; @@ -634,12 +727,47 @@ public void onEpochRedundant(Ranges ranges, long epoch) this.reciver = messagingService.receiver(new SimulatedMessageDelivery.SimpleVerbHandler(handlers)); } - void maybeStart() + @Override + public String toString() + { + return "Instance{" + + "id=" + id + + ", token=" + token + + ", epoch=" + epoch + + ", status=" + status + + '}'; + } + + void maybeTransition(ClusterMetadata current, Topology t) { - if (status == Status.Init) + switch (status) { - start(); - status = Status.Started; + case Init: + Invariants.checkState(!t.nodes().contains(id), "Node was in Init state but present in the Topology!"); + Invariants.checkState(current.directory.peerId(address(id)) != null, "Node exists but not in TCM"); + start(); + status = Status.Registered; + break; + case Registered: + Invariants.checkState(!t.nodes().contains(id), "Node was in Init state but present in the Topology!"); + Invariants.checkState(current.directory.peerId(address(id)) != null, "Node exists but not in TCM"); + if (current.placements.get(replication_params).writes.byEndpoint().keySet().contains(address(id))) + status = Status.Joining; + break; + case Joining: + Invariants.checkState(current.directory.peerId(address(id)) != null, "Node exists but not in TCM"); + if (joined(current, id)) + status = Status.Joined; + case Removed: + case Joined: + // nothing to do + break; + case Leaving: + if (left(current, id)) + stop(); + break; + default: + throw new UnsupportedOperationException("Unknown status: " + status); } } @@ -653,20 +781,6 @@ TopologyManager topology() return topology; } - Collection replica() - { - InetAddressAndPort address = Cluster.address(id); - SortedSet lessThan = tokens.headSet(token); - if (lessThan.isEmpty()) - { - // wrap around - return Arrays.asList(new Replica(address, new LongToken(Long.MIN_VALUE), new LongToken(token), true), - new Replica(address, new LongToken(tokens.last()), new LongToken(Long.MIN_VALUE), true)); - } - - return Collections.singletonList(new Replica(address, new LongToken(lessThan.last()), new LongToken(token), true)); - } - Set synced(long epoch) { if (epoch < this.epoch.getEpoch()) throw new IllegalArgumentException("Asked for epoch before this instance existed"); @@ -683,6 +797,8 @@ Set synced(long epoch) void stop() { + status = Status.Removed; + tokens.remove(token); messaging.stop(); } } diff --git a/test/unit/org/apache/cassandra/service/accord/FetchMinEpochTest.java b/test/unit/org/apache/cassandra/service/accord/FetchMinEpochTest.java new file mode 100644 index 000000000000..53319a7ec2f7 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/FetchMinEpochTest.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.collect.ImmutableMap; +import org.junit.Assert; +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.RetrySpec; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.IVersionedSerializers; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.net.SimulatedMessageDelivery.Action; +import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.SimulatedMiniCluster; +import org.apache.cassandra.utils.SimulatedMiniCluster.Node; +import org.apache.cassandra.utils.concurrent.Future; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.net.MessagingService.Version.VERSION_51; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; +import static org.assertj.core.api.Assertions.assertThat; + +public class FetchMinEpochTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + private static final Gen> ACTION_DISTRIBUTION = Gens.enums().allMixedDistribution(Action.class); + private static final List SUPPORTED = Stream.of(MessagingService.Version.values()).filter(v -> v.compareTo(VERSION_51) >= 0).collect(Collectors.toList()); + + private static void boundedRetries(int retries) + { + DatabaseDescriptor.getAccord().minEpochSyncRetry.maxAttempts = new RetrySpec.MaxAttempt(retries); + } + + @Test + public void requestSerde() + { + DataOutputBuffer output = new DataOutputBuffer(); + Gen gen = fromQT(CassandraGenerators.partitioners()) + .map(CassandraGenerators::simplify) + .flatMap(partitioner -> + Gens.lists(AccordGenerators.range(partitioner) + .map(r -> (TokenRange) r)) + .ofSizeBetween(0, 10) + .map(FetchMinEpoch::new)); + qt().forAll(gen).check(req -> { + maybeSetPartitioner(req); + for (MessagingService.Version version : SUPPORTED) + IVersionedSerializers.testSerde(output, FetchMinEpoch.serializer, req, version.value); + }); + } + + @Test + public void responseSerde() + { + Gen all = Gens.longs().all(); + Gen nulls = ignore -> null; + Gen domain = rs -> rs.nextBoolean() ? nulls.next(rs) : all.next(rs); + DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(domain.map(FetchMinEpoch.Response::new)).check(rsp -> { + for (MessagingService.Version version : SUPPORTED) + IVersionedSerializers.testSerde(output, FetchMinEpoch.Response.serializer, rsp, version.value); + }); + } + + @Test + public void fetchOneNodeAlwaysFails() + { + int expectedMaxAttempts = 3; + boundedRetries(expectedMaxAttempts); + qt().check(rs -> { + SimulatedMiniCluster cluster = new SimulatedMiniCluster.Builder(rs, node -> msg -> {throw new IllegalStateException();}).build(); + Node from = cluster.createNodeAndJoin(); + Node to = cluster.createNodeAndJoin(); + + Future f = FetchMinEpoch.fetch(from, to.broadcastAddressAndPort(), Collections.emptySet()); + assertThat(f).isNotDone(); + cluster.processAll(); + assertThat(f).isDone(); + MessageDelivery.MaxRetriesException maxRetries = getMaxRetriesException(f); + Assertions.assertThat(maxRetries.attempts).isEqualTo(expectedMaxAttempts); + }); + } + + @Test + public void fetchOneNode() + { + int maxRetries = 42; + boundedRetries(maxRetries); + qt().check(rs -> { + long epoch = rs.nextLong(0, Long.MAX_VALUE); + SimulatedMiniCluster cluster = new SimulatedMiniCluster.Builder(rs, node -> msg -> node.messaging().respond(new FetchMinEpoch.Response(epoch), msg)).build(); + Node from = cluster.createNodeAndJoin(); + { + Supplier safeActionGen = actionGen(rs, maxRetries); + from.messagingActions((self, msg, to) -> safeActionGen.get()); + } + Node to = cluster.createNodeAndJoin(); + + Future f = FetchMinEpoch.fetch(from, to.broadcastAddressAndPort(), Collections.emptySet()); + assertThat(f).isNotDone(); + cluster.processAll(); + assertThat(f).isDone(); + assertThat(f.get()).isEqualTo(epoch); + }); + } + + @Test + public void fetchManyNodesAllNodesFail() + { + int expectedMaxAttempts = 3; + boundedRetries(expectedMaxAttempts); + qt().check(rs -> { + SimulatedMiniCluster cluster = new SimulatedMiniCluster.Builder(rs, node -> msg -> {throw new IllegalStateException();}).build(); + + Node from = cluster.createNodeAndJoin(); + Node to1 = cluster.createNodeAndJoin(); + Node to2 = cluster.createNodeAndJoin(); + Node to3 = cluster.createNodeAndJoin(); + Node to4 = cluster.createNodeAndJoin(); + + Future f = FetchMinEpoch.fetch(from, ImmutableMap.of(to1.broadcastAddressAndPort(), Collections.emptySet(), + to2.broadcastAddressAndPort(), Collections.emptySet(), + to3.broadcastAddressAndPort(), Collections.emptySet(), + to4.broadcastAddressAndPort(), Collections.emptySet())); + assertThat(f).isNotDone(); + cluster.processAll(); + assertThat(f).isDone(); + assertThat(f.get()).isNull(); + }); + } + + @Test + public void fetchManyNodes() + { + boundedRetries(Integer.MAX_VALUE); // networking should be unbounded, but the actions should be bounded + int maxRetries = 3; + qt().check(rs -> { + Map nodeToEpoch = new HashMap<>(); + Long min = null; + for (int i = 2; i < 6; i++) + { + Long epoch = rs.nextBoolean() ? null : rs.nextLong(); + nodeToEpoch.put(i, epoch); + if (min == null) min = epoch; + else if (epoch != null) min = Math.min(min, epoch); + } + + SimulatedMiniCluster cluster = new SimulatedMiniCluster.Builder(rs, node -> msg -> node.messaging().respond(new FetchMinEpoch.Response(nodeToEpoch.get(node.id().id())), msg)).build(); + + Node from = cluster.createNodeAndJoin(); + Node to1 = cluster.createNodeAndJoin(); + Node to2 = cluster.createNodeAndJoin(); + Node to3 = cluster.createNodeAndJoin(); + Node to4 = cluster.createNodeAndJoin(); + Map> nodeToActions = ImmutableMap.of(to1.broadcastAddressAndPort(), actionGen(rs, maxRetries), + to2.broadcastAddressAndPort(), actionGen(rs, maxRetries), + to3.broadcastAddressAndPort(), actionGen(rs, maxRetries), + to4.broadcastAddressAndPort(), actionGen(rs, maxRetries)); + from.messagingActions((self, msg, to) -> nodeToActions.get(to).get()); + + Future f = FetchMinEpoch.fetch(from, ImmutableMap.of(to1.broadcastAddressAndPort(), Collections.emptySet(), + to2.broadcastAddressAndPort(), Collections.emptySet(), + to3.broadcastAddressAndPort(), Collections.emptySet(), + to4.broadcastAddressAndPort(), Collections.emptySet())); + assertThat(f).isNotDone(); + cluster.processAll(); + assertThat(f).isDone(); + assertThat(f.get()).isEqualTo(min); + }); + } + + private static Supplier actionGen(RandomSource rs, int maxRetries) + { + RandomSource actionSource = rs.fork(); + Gen actionGen = ACTION_DISTRIBUTION.next(actionSource); + // it is very possible that DELIVER is very rare, which will cause the test to run for a long time and could fail in CI, + // when a long history of non-DELIVER is seen, start to force DELIVER to bound the amount of processing in the test + Gen safeActionGen = new Gen<>() + { + private int notDelivers = 0; + @Override + public Action next(RandomSource rng) + { + if (notDelivers > maxRetries - 1) + return Action.DELIVER; + Action action = actionGen.next(rng); + if (action == Action.DELIVER) notDelivers = 0; + else notDelivers++; + return action; + } + }; + return safeActionGen.asSupplier(actionSource); + } + + private static void maybeSetPartitioner(FetchMinEpoch req) + { + IPartitioner partitioner = null; + for (TokenRange r : req.ranges) + { + IPartitioner rangePartitioner = null; + if (r.start().kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) + rangePartitioner = r.start().token().getPartitioner(); + if (rangePartitioner == null && r.end().kindOfRoutingKey() == AccordRoutingKey.RoutingKeyKind.TOKEN) + rangePartitioner = r.end().token().getPartitioner(); + if (rangePartitioner == null) + continue; + if (partitioner == null) + { + partitioner = rangePartitioner; + } + else + { + Assertions.assertThat(rangePartitioner).isEqualTo(partitioner); + } + } + if (partitioner != null) + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + } + + private static MessageDelivery.MaxRetriesException getMaxRetriesException(Future f) throws InterruptedException, ExecutionException + { + MessageDelivery.MaxRetriesException maxRetries; + try + { + f.get(); + Assert.fail("Future should have failed"); + throw new AssertionError("Unreachable"); + } + catch (ExecutionException e) + { + if (e.getCause() instanceof MessageDelivery.MaxRetriesException) + { + maxRetries = (MessageDelivery.MaxRetriesException) e.getCause(); + } + else + { + throw e; + } + } + return maxRetries; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index cb8929bf37af..68eadd0d1b7c 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -102,7 +102,7 @@ public class SimulatedAccordCommandStore implements AutoCloseable public SimulatedAccordCommandStore(RandomSource rs) { - globalExecutor = new SimulatedExecutorFactory(accord.utilsfork.RandomSource.wrap(rs).fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); + globalExecutor = new SimulatedExecutorFactory(rs.fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); this.unorderedScheduled = globalExecutor.scheduled("ignored"); ExecutorFactory.Global.unsafeSet(globalExecutor); Stage.READ.unsafeSetExecutor(unorderedScheduled); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 4d4424e1b2d7..aca870245406 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -303,7 +303,7 @@ private static void assertFutureState(AccordStateCache.Instance { + qt().forAll(fromQT(CassandraGenerators.partitioners().map(CassandraGenerators::simplify).flatMap(CassandraGenerators::token))).check(token -> { var serializer = AccordRoutingKeyByteSource.create(token.getPartitioner()); byte[] min = ByteSourceInverse.readBytes(serializer.minAsComparableBytes()); byte[] max = ByteSourceInverse.readBytes(serializer.maxAsComparableBytes()); diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java index 4238f8a6871e..5283a8b6ea75 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/DepsSerializerTest.java @@ -24,18 +24,15 @@ import accord.primitives.Deps; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.db.marshal.AbstractType; -import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.IVersionedSerializers; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaProvider; -import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; import org.mockito.Mockito; import static accord.utils.Property.qt; @@ -56,7 +53,7 @@ public void serde() { DataOutputBuffer buffer = new DataOutputBuffer(); qt().check(rs -> { - IPartitioner partitioner = AccordGenerators.partitioner().map(DepsSerializerTest::normalize).next(rs); + IPartitioner partitioner = AccordGenerators.partitioner().map(CassandraGenerators::simplify).next(rs); Schema.instance = Mockito.mock(SchemaProvider.class); DatabaseDescriptor.setPartitionerUnsafe(partitioner); Mockito.when(Schema.instance.getExistingTablePartitioner(Mockito.any())).thenReturn(partitioner); @@ -65,17 +62,4 @@ public void serde() IVersionedSerializers.testSerde(buffer, DepsSerializer.deps, deps, version.value); }); } - - private static IPartitioner normalize(IPartitioner partitioner) - { - // serializers require tokens to fit within 1 << 16, but that makes the test flakey when LocalPartitioner with a nested type is found... - if (!(partitioner instanceof LocalPartitioner)) return partitioner; - if (!shouldSimplify(partitioner.getTokenValidator())) return partitioner; - return new LocalPartitioner(Int32Type.instance); - } - - private static boolean shouldSimplify(AbstractType type) - { - return AbstractTypeGenerators.contains(type, t -> t.isCollection()); - } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataMetadataKeyTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataMetadataKeyTest.java new file mode 100644 index 000000000000..291be79bf438 --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataMetadataKeyTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.utils.FBUtilities; +import org.assertj.core.api.Assertions; + +/** + * This test is to make sure that the fields of {@link ClusterMetadata} have a matching {@link MetadataKey} and the + * utility functions linking key to field are maintained. + * + * If this test is failing it likely means a new field was added to {@link ClusterMetadata} and {@link MetadataKeys} was + * not updated to know about it. + */ +public class ClusterMetadataMetadataKeyTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + } + + private static final Map NAME_TO_KEY; + + static + { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (Field field : MetadataKeys.class.getDeclaredFields()) + { + if (field.getType() == MetadataKey.class + && Modifier.isStatic(field.getModifiers()) + && Modifier.isPublic(field.getModifiers())) + builder.put(field.getName(), field); + } + NAME_TO_KEY = builder.build(); + } + + @Test + public void metadataKeyExists() throws IllegalAccessException + { + ClusterMetadata empty = new ClusterMetadata(Murmur3Partitioner.instance); + // Theese are fields that should not have MetadataKeys and should be ignored. + Set exclude = ImmutableSet.of("metadataIdentifier", + "epoch", + "partitioner", + "extensions", + "locator"); + // Mapping of ClusterMetadata field names to MetadataKey name; mapping is only needed if the names don't match. + Map mapping = ImmutableMap.of("directory", "node_directory", + "placements", "data_placements"); + for (Field field : ClusterMetadata.class.getDeclaredFields()) + { + if (Modifier.isStatic(field.getModifiers()) + || !Modifier.isPublic(field.getModifiers()) + || !Modifier.isFinal(field.getModifiers())) + continue; + String name = field.getName(); + if (exclude.contains(name)) continue; + if (mapping.containsKey(name)) + name = mapping.get(name); + String snakeName = FBUtilities.camelToSnake(name).toUpperCase(Locale.ROOT); + Assertions.assertThat(NAME_TO_KEY.keySet()) + .describedAs("Unable to locate MetadataKey for %s", snakeName) + .contains(snakeName); + MetadataKey expectedKey = (MetadataKey) NAME_TO_KEY.get(snakeName).get(null); + if (!MetadataKeys.CORE_METADATA.containsKey(expectedKey)) + throw new IllegalStateException("MetadataKeys.CORE_METADATA is missing key " + expectedKey + " for field " + name); + + Assertions.assertThat(field.get(empty)) + .describedAs("Extraction function does not seem to match the field %s and key %s", name, snakeName) + .isSameAs(MetadataKeys.CORE_METADATA.get(expectedKey).apply(empty)); + } + } +} diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataSerializerTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataSerializerTest.java new file mode 100644 index 000000000000..1117b4ab8858 --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataSerializerTest.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.service.accord.AccordStaleReplicas; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.utils.CassandraGenerators.ClusterMetadataBuilder; +import org.assertj.core.api.Assertions; +import org.quicktheories.core.Gen; + +import static org.apache.cassandra.utils.FailingConsumer.orFail; +import static org.quicktheories.QuickTheory.qt; + +public class ClusterMetadataSerializerTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + } + + @Test + public void serdeLatest() + { + DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(new ClusterMetadataBuilder().build()).checkAssert(orFail(cm -> { + AsymmetricMetadataSerializers.testSerde(output, ClusterMetadata.serializer, cm, NodeVersion.CURRENT_METADATA_VERSION); + })); + } + + @Test + public void serdeWithoutAccord() + { + DataOutputBuffer output = new DataOutputBuffer(); + Gen gen = new ClusterMetadataBuilder().build().assuming(cm -> { + if (!cm.consensusMigrationState.equals(ConsensusMigrationState.EMPTY)) + return true; + if (!cm.accordStaleReplicas.equals(AccordStaleReplicas.EMPTY)) + return true; + if (!cm.accordFastPath.equals(AccordFastPath.EMPTY)) + return true; + return false; + }); + qt().forAll(gen).checkAssert(orFail(cm -> { + output.clear(); + Version version = Version.V2; // this is the version before accord + long expectedSize = ClusterMetadata.serializer.serializedSize(cm, version); + ClusterMetadata.serializer.serialize(cm, output, version); + Assertions.assertThat(output.getLength()).describedAs("The serialized size and bytes written do not match").isEqualTo(expectedSize); + DataInputBuffer in = new DataInputBuffer(output.unsafeGetBufferAndFlip(), false); + ClusterMetadata read = ClusterMetadata.serializer.deserialize(in, version); + Assertions.assertThat(read).isNotEqualTo(cm); + + Assertions.assertThat(read.consensusMigrationState).isEqualTo(ConsensusMigrationState.EMPTY); + Assertions.assertThat(read.accordStaleReplicas).isEqualTo(AccordStaleReplicas.EMPTY); + Assertions.assertThat(read.accordFastPath).isEqualTo(AccordFastPath.EMPTY); + })); + } +} diff --git a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java index 1219502e9867..cf8bdcb79c16 100644 --- a/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java +++ b/test/unit/org/apache/cassandra/tcm/ClusterMetadataTransformationTest.java @@ -267,7 +267,7 @@ private static void assertModifications(Transformed transformed, MetadataKey... // anything modified by in this transformation, and therefore included in the modified keys, // should have the same epoch as the CM itself. Anything not modified now must have a strictly // earlier epoch - for (MetadataKey key : Iterables.concat(MetadataKeys.CORE_METADATA, transformed.metadata.extensions.keySet())) + for (MetadataKey key : Iterables.concat(MetadataKeys.CORE_METADATA.keySet(), transformed.metadata.extensions.keySet())) { MetadataValue value = valueFor(key, transformed.metadata); if (transformed.modifiedKeys.contains(key)) @@ -279,7 +279,7 @@ private static void assertModifications(Transformed transformed, MetadataKey... private static MetadataValue valueFor(MetadataKey key, ClusterMetadata metadata) { - if (!MetadataKeys.CORE_METADATA.contains(key)) + if (!MetadataKeys.CORE_METADATA.containsKey(key)) { assert key instanceof ExtensionKey; return metadata.extensions.get((ExtensionKey)key); diff --git a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java new file mode 100644 index 000000000000..fd37346aa07c --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm; + +import java.io.IOException; +import java.util.List; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; +import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; +import org.apache.cassandra.tcm.serialization.Version; +import org.assertj.core.api.Assertions; + +public class ValidatingClusterMetadataService extends StubClusterMetadataService +{ + private final List supportedVersions; + + private ValidatingClusterMetadataService(List supportedVersions) + { + super(new ClusterMetadata(safeGetPartitioner())); + this.supportedVersions = supportedVersions; + } + + public static ValidatingClusterMetadataService createAndRegister(Version minVersion) + { + return createAndRegister(minVersion.greaterThanOrEqual()); + } + + public static ValidatingClusterMetadataService createAndRegister(List supportedVersions) + { + ValidatingClusterMetadataService cms = new ValidatingClusterMetadataService(supportedVersions); + + ClusterMetadataService.unsetInstance(); + ClusterMetadataService.setInstance(cms); + return cms; + } + + private static IPartitioner safeGetPartitioner() + { + IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); + return partitioner == null ? Murmur3Partitioner.instance : partitioner; + } + + private void testSerde(AsymmetricMetadataSerializer serializer, In input) + { + for (Version version : supportedVersions) + { + try (DataOutputBuffer buffer = DataOutputBuffer.scratchBuffer.get()) + { + AsymmetricMetadataSerializers.testSerde(buffer, serializer, input, version); + } + catch (IOException e) + { + throw new AssertionError(String.format("Serde error for version=%s; input=%s", version, input), e); + } + } + } + + @Override + protected Transformation.Result execute(Transformation transform) + { + Transformation.Result result = super.execute(transform); + if (result.isSuccess()) + { + Transformation.Success success = result.success(); + Assertions.assertThat(success.affectedMetadata) + .describedAs("Affected Metadata keys do not match") + .isEqualTo(MetadataKeys.diffKeys(metadata(), success.metadata)); + } + return result; + } + + @Override + public T1 commit(Transformation transform, CommitSuccessHandler onSuccess, CommitFailureHandler onFailure) + { + testSerde(transform.kind().serializer(), transform); + return super.commit(transform, onSuccess, onFailure); + } + + @Override + public void setMetadata(ClusterMetadata metadata) + { + if (!metadata.epoch.equals(metadata().epoch.nextEpoch())) + throw new AssertionError("Epochs were not sequential: expected " + metadata().epoch.nextEpoch() + " but given " + metadata.epoch); + testSerde(ClusterMetadata.serializer, metadata); + super.setMetadata(metadata); + } +} diff --git a/test/unit/org/apache/cassandra/tcm/log/LogListenerNotificationTest.java b/test/unit/org/apache/cassandra/tcm/log/LogListenerNotificationTest.java index 59aafa37e0d2..55df4e868e4a 100644 --- a/test/unit/org/apache/cassandra/tcm/log/LogListenerNotificationTest.java +++ b/test/unit/org/apache/cassandra/tcm/log/LogListenerNotificationTest.java @@ -111,7 +111,7 @@ static ClusterMetadata cm() static Set affectedMetadata(Random random) { - List src = new ArrayList<>(CORE_METADATA); + List src = new ArrayList<>(CORE_METADATA.keySet()); int required = random.nextInt(src.size()); Set keys = new HashSet<>(); while (keys.size() < required) diff --git a/test/unit/org/apache/cassandra/tcm/sequences/DropAccordTableTest.java b/test/unit/org/apache/cassandra/tcm/sequences/DropAccordTableTest.java new file mode 100644 index 000000000000..8b183688092d --- /dev/null +++ b/test/unit/org/apache/cassandra/tcm/sequences/DropAccordTableTest.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tcm.sequences; + +import java.util.TreeSet; +import java.util.stream.Stream; + +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Property; +import accord.utils.Property.Command; +import accord.utils.RandomSource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.locator.MetaStrategy; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.Keyspaces; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Types; +import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.MultiStepOperation; +import org.apache.cassandra.tcm.StubClusterMetadataService; +import org.apache.cassandra.tcm.ValidatingClusterMetadataService; +import org.apache.cassandra.tcm.serialization.Version; +import org.apache.cassandra.tcm.transformations.PrepareDropAccordTable; +import org.apache.cassandra.tcm.sequences.DropAccordTable.TableReference; +import org.apache.cassandra.utils.AbstractTypeGenerators; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.CassandraGenerators.TableMetadataBuilder; +import org.apache.cassandra.utils.Generators; +import org.assertj.core.api.Assertions; +import org.quicktheories.generators.SourceDSL; + +import static accord.utils.Property.commands; +import static accord.utils.Property.qt; +import static accord.utils.Property.stateful; +import static org.apache.cassandra.utils.CassandraGenerators.TABLE_ID_GEN; + +public class DropAccordTableTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + } + + private static final TransactionalMode[] ACCORD_ENABLED_MODES = Stream.of(TransactionalMode.values()) + .filter(t -> t.accordIsEnabled) + .toArray(TransactionalMode[]::new); + + private static final Gen TABLE_GEN = Generators.toGen(defaultTableMetadataBuilder().build()); + + private static TableMetadataBuilder defaultTableMetadataBuilder() + { + return new TableMetadataBuilder() + .withUseCounter(false) + .withPartitioner(Murmur3Partitioner.instance) + .withTransactionalMode(SourceDSL.arbitrary().pick(ACCORD_ENABLED_MODES)); + } + + @Test + public void e2e() + { + qt().check(rs -> { + ValidatingClusterMetadataService cms = createCMS(); + TableMetadata metadata = TABLE_GEN.next(rs); + addTable(cms, metadata); // hack this table into the schema... + + TableReference table = TableReference.from(metadata); + + cms.commit(new PrepareDropAccordTable(table)); + + // This is only here because "applyTo" is not touched without it... + for (KeyspaceMetadata ks : cms.metadata().schema.getKeyspaces()) + cms.metadata().writePlacementAllSettled(ks); + + Assertions.assertThat(cms.metadata().inProgressSequences.isEmpty()).isFalse(); + InProgressSequences.finishInProgressSequences(table); + Assertions.assertThat(cms.metadata().inProgressSequences.isEmpty()).isTrue(); + + // table is dropped + Assertions.assertThat(cms.metadata().schema.getTableMetadata(metadata.id)).isNull(); + }); + } + + @Test + public void multi() + { + stateful().withExamples(50).withSteps(500).check(commands(() -> State::new) + .destroyState(DropAccordTableTest::validate) + .add(DropAccordTableTest::addTable) + .addIf(s -> !s.aliveTables.isEmpty(), DropAccordTableTest::dropTable) + .addIf(s -> !s.cms.metadata().inProgressSequences.isEmpty(), DropAccordTableTest::inProgressSequences) + .build()); + } + + private static void validate(State state) + { + while (!state.cms.metadata().inProgressSequences.isEmpty()) + { + for (MultiStepOperation opt : state.cms.metadata().inProgressSequences) + InProgressSequences.resume(opt); + } + // all tables are dropped, unless they were never dropped + Keyspaces keyspaces = state.cms.metadata().schema.getKeyspaces(); + for (KeyspaceMetadata k : keyspaces) + { + if (k.tables.size() == 0) continue; + if (k.replicationStrategy instanceof MetaStrategy) continue; + for (TableMetadata t : k.tables) + { + Assertions.assertThat(t.params.pendingDrop).isFalse(); + Assertions.assertThat(state.aliveTables).contains(t.id); + } + } + } + + private static Command addTable(RandomSource rs, State state) + { + TableMetadata metadata = Generators.toGen(defaultTableMetadataBuilder() + .withKeyspaceName(CassandraGenerators.KEYSPACE_NAME_GEN.assuming(name -> !state.cms.metadata().schema.getKeyspaces().containsKeyspace(name))) + .withTableId(TABLE_ID_GEN.assuming(id -> state.cms.metadata().schema.getTableMetadata(id) == null)) + // other tests better cover serialization so can speed up tests by only doing primitive types + .withDefaultTypeGen(CassandraGenerators.TableMetadataBuilder.defaultTypeGen().withTypeKinds(AbstractTypeGenerators.TypeKind.PRIMITIVE)) + .build()) + .next(rs); + return new Property.SimpleCommand<>("Add Table " + metadata, s2 -> { + addTable(s2.cms, metadata); + s2.aliveTables.add(metadata.id); + }); + } + + private static Command dropTable(RandomSource rs, State state) + { + TableId id = rs.pickOrderedSet(state.aliveTables); + TableMetadata metadata = state.cms.metadata().schema.getTableMetadata(id); + return new Property.SimpleCommand<>("Drop Table " + metadata, s2 -> { + TableReference table = TableReference.from(metadata); + + s2.cms.commit(new PrepareDropAccordTable(table)); + s2.aliveTables.remove(id); + }); + } + + private static Command inProgressSequences(RandomSource rs, State state) + { + ClusterMetadata current = state.cms.metadata(); + TreeSet pending = new TreeSet<>(); + for (MultiStepOperation opt : current.inProgressSequences) + { + if (!(opt instanceof DropAccordTable)) throw new AssertionError("Only DropAccordTable should exist in this test; found " + opt); + pending.add(((DropAccordTable) opt).table); + } + TableReference ref = rs.pickOrderedSet(pending); + MultiStepOperation seq = current.inProgressSequences.get(ref); + Assertions.assertThat(seq).isNotNull(); + return new Property.SimpleCommand<>("Progress for " + ref + ": " + seq.nextStep(), s2 -> InProgressSequences.resume(seq)); + } + + public static class State + { + private final StubClusterMetadataService cms; + private final TreeSet aliveTables = new TreeSet<>(); + + public State(RandomSource rs) + { + // With validation enabled the test runtime is dominated by serialization checks, so enable them rarely + // just so tests do run with them, but the whole test runtime isn't serde testing. + if (rs.decide(0.01)) + { + cms = ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); + } + else + { + cms = StubClusterMetadataService.forTesting(new ClusterMetadata(Murmur3Partitioner.instance)); + ClusterMetadataService.unsetInstance(); + ClusterMetadataService.setInstance(cms); + } + } + } + + private static ValidatingClusterMetadataService createCMS() + { + return ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); + } + + private static void addTable(StubClusterMetadataService cms, TableMetadata table) + { + class Ref { Types types;} + // first mock out a keyspace + ClusterMetadata prev = cms.metadata(); + KeyspaceMetadata schema = KeyspaceMetadata.create(table.keyspace, KeyspaceParams.simple(3)); + Ref ref = new Ref(); + ref.types = schema.types; + CassandraGenerators.visitUDTs(table, udt -> ref.types = ref.types.with(udt.unfreeze())); + schema = schema.withSwapped(ref.types); + schema = schema.withSwapped(schema.tables.with(table)); + Keyspaces keyspaces = prev.schema.getKeyspaces().withAddedOrUpdated(schema); + ClusterMetadata metadata = prev.transformer().with(new DistributedSchema(keyspaces)).build().metadata; + cms.setMetadata(metadata); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java index 032c8dd1ec7a..32999a8aae6b 100644 --- a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java +++ b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkRejoiningTest.java @@ -35,7 +35,7 @@ public class AccordMarkRejoiningTest public void shouldSerializeEmpty() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, new AccordMarkRejoining(Collections.emptySet()), Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, new AccordMarkRejoining(Collections.emptySet()), Version.MIN_ACCORD_VERSION); } @Test @@ -43,7 +43,7 @@ public void shouldSerializeSingleton() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); AccordMarkRejoining markStale = new AccordMarkRejoining(Collections.singleton(NodeId.fromString("1"))); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.MIN_ACCORD_VERSION); } @Test @@ -51,6 +51,6 @@ public void shouldSerializeMulti() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); AccordMarkRejoining markStale = new AccordMarkRejoining(ImmutableSet.of(NodeId.fromString("1"), NodeId.fromString("2"))); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkRejoining.serializer, markStale, Version.MIN_ACCORD_VERSION); } } diff --git a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java index d794b3a2a9b5..baa4936a0732 100644 --- a/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java +++ b/test/unit/org/apache/cassandra/tcm/transformations/AccordMarkStaleTest.java @@ -35,7 +35,7 @@ public class AccordMarkStaleTest public void shouldSerializeEmpty() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, new AccordMarkStale(Collections.emptySet()), Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, new AccordMarkStale(Collections.emptySet()), Version.MIN_ACCORD_VERSION); } @Test @@ -43,7 +43,7 @@ public void shouldSerializeSingleton() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); AccordMarkStale markStale = new AccordMarkStale(Collections.singleton(NodeId.fromString("1"))); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.MIN_ACCORD_VERSION); } @Test @@ -51,6 +51,6 @@ public void shouldSerializeMulti() throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); AccordMarkStale markStale = new AccordMarkStale(ImmutableSet.of(NodeId.fromString("1"), NodeId.fromString("2"))); - AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.V2); + AsymmetricMetadataSerializers.testSerde(buffer, AccordMarkStale.serializer, markStale, Version.MIN_ACCORD_VERSION); } } diff --git a/test/unit/org/apache/cassandra/transport/CBUtilTest.java b/test/unit/org/apache/cassandra/transport/CBUtilTest.java index 3ce860307855..4409655d334c 100644 --- a/test/unit/org/apache/cassandra/transport/CBUtilTest.java +++ b/test/unit/org/apache/cassandra/transport/CBUtilTest.java @@ -22,13 +22,13 @@ import org.junit.Assert; import org.junit.Test; -import accord.utilsfork.Gens; +import accord.utils.Gens; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.PooledByteBufAllocator; import org.assertj.core.api.Assertions; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; public class CBUtilTest { diff --git a/test/unit/org/apache/cassandra/utils/AbstractTypeGeneratorsTest.java b/test/unit/org/apache/cassandra/utils/AbstractTypeGeneratorsTest.java new file mode 100644 index 000000000000..2cd5470a6b54 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/AbstractTypeGeneratorsTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.util.ArrayList; + +import org.junit.Test; + +import accord.utils.LazyToString; +import org.apache.cassandra.db.marshal.AbstractType; +import org.assertj.core.api.Assertions; +import org.quicktheories.core.Gen; +import org.quicktheories.generators.SourceDSL; + +import static org.quicktheories.QuickTheory.qt; + +public class AbstractTypeGeneratorsTest +{ + @Test + public void withoutPrimitive() + { + Gen> primitiveGen = SourceDSL.arbitrary().pick(new ArrayList<>(AbstractTypeGenerators.primitiveTypes())); + qt().forAll(r -> r).checkAssert(rs -> { + AbstractType primitiveType = primitiveGen.generate(rs); + Gen> gen = AbstractTypeGenerators.builder().withoutPrimitive(primitiveType).build(); + for (int i = 0; i < 1000; i++) + { + AbstractType type = gen.generate(rs); + Assertions.assertThat(AbstractTypeGenerators.contains(type, primitiveType)) + .describedAs("Expected type %s not to be found in %s", primitiveType.asCQL3Type(), new LazyToString(() -> AbstractTypeGenerators.typeTree(type))) + .isFalse(); + if (type.subTypes().isEmpty()) + break; // not worth checking this type again... + } + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java index a114c4605e30..469a4868078d 100644 --- a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java +++ b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java @@ -38,6 +38,7 @@ import java.util.Set; import java.util.TreeMap; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -49,9 +50,19 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import org.apache.cassandra.schema.*; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; +import org.apache.cassandra.tcm.extensions.ExtensionKey; +import org.apache.cassandra.tcm.extensions.ExtensionValue; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.ownership.DataPlacements; +import org.apache.cassandra.tcm.ownership.TokenMap; +import org.apache.cassandra.tcm.sequences.InProgressSequences; +import org.apache.cassandra.tcm.sequences.LockedRanges; import org.apache.commons.lang3.builder.MultilineRecursiveToStringStyle; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; +import accord.local.Node; import org.apache.cassandra.config.DataStorageSpec; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -78,7 +89,6 @@ import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.dht.ByteOrderedPartitioner; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.dht.LocalCompositePrefixPartitioner; import org.apache.cassandra.dht.LocalPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.OrderPreservingPartitioner; @@ -105,22 +115,13 @@ import org.apache.cassandra.net.NoPayload; import org.apache.cassandra.net.PingRequest; import org.apache.cassandra.net.Verb; -import org.apache.cassandra.schema.CachingParams; -import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.CompactionParams; -import org.apache.cassandra.schema.CompressionParams; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.KeyspaceParams; -import org.apache.cassandra.schema.MemtableParams; -import org.apache.cassandra.schema.ReplicationParams; -import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.schema.TableParams; -import org.apache.cassandra.schema.Tables; -import org.apache.cassandra.schema.Types; -import org.apache.cassandra.schema.UserFunctions; -import org.apache.cassandra.schema.Views; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; +import org.apache.cassandra.service.accord.AccordFastPath; +import org.apache.cassandra.service.accord.AccordStaleReplicas; +import org.apache.cassandra.service.accord.fastpath.InheritKeyspaceFastPathStrategy; +import org.apache.cassandra.service.accord.fastpath.ParameterizedFastPathStrategy; +import org.apache.cassandra.service.accord.fastpath.SimpleFastPathStrategy; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.AbstractTypeGenerators.TypeGenBuilder; @@ -158,7 +159,7 @@ public final class CassandraGenerators return InetAddressAndPort.getByAddressOverrideDefaults(address, NETWORK_PORT_GEN.generate(rnd)); }; - public static final Gen TABLE_ID_GEN = Generators.UUID_RANDOM_GEN.map(TableId::fromUUID); + public static final Gen TABLE_ID_GEN = Generate.booleans().flatMap(uuid -> uuid ? Generators.UUID_RANDOM_GEN.map(TableId::fromUUID) : Generate.longRange(Long.MIN_VALUE, Long.MAX_VALUE).map(TableId::fromLong)); private static final Gen TABLE_KIND_GEN = SourceDSL.arbitrary().pick(TableMetadata.Kind.REGULAR, TableMetadata.Kind.INDEX, TableMetadata.Kind.VIRTUAL); public static final Gen TABLE_METADATA_GEN = gen(rnd -> createTableMetadata(IDENTIFIER_GEN.generate(rnd), rnd)).describedAs(CassandraGenerators::toStringRecursive); @@ -748,6 +749,10 @@ public static class TableParamsBuilder private Gen compactionParamsGen = null; @Nullable private Gen compressionParamsGen = null; + @Nullable + private Gen transactionalMode = null; + @Nullable + private Gen fastPathStrategy = null; public TableParamsBuilder withKnownMemtables() { @@ -776,6 +781,81 @@ public TableParamsBuilder withCompression() return this; } + public TableParamsBuilder withTransactionalMode(Gen transactionalMode) + { + this.transactionalMode = transactionalMode; + return this; + } + + public TableParamsBuilder withTransactionalMode() + { + return withTransactionalMode(SourceDSL.arbitrary().enumValues(TransactionalMode.class)); + } + + public TableParamsBuilder withTransactionalMode(TransactionalMode transactionalMode) + { + return withTransactionalMode(SourceDSL.arbitrary().constant(transactionalMode)); + } + + public TableParamsBuilder withFastPathStrategy() + { + fastPathStrategy = rnd -> { + FastPathStrategy.Kind kind = SourceDSL.arbitrary().enumValues(FastPathStrategy.Kind.class).generate(rnd); + switch (kind) + { + case SIMPLE: + return SimpleFastPathStrategy.instance; + case INHERIT_KEYSPACE: + return InheritKeyspaceFastPathStrategy.instance; + case PARAMETERIZED: + { + Map map = new HashMap<>(); + int size = SourceDSL.integers().between(1, Integer.MAX_VALUE).generate(rnd); + map.put(ParameterizedFastPathStrategy.SIZE, Integer.toString(size)); + Set names = new HashSet<>(); + Gen nameGen = SourceDSL.strings().allPossible().ofLengthBetween(1, 10) + // If : is in the name then the parser will fail; we have validation to disalow this + .map(s -> s.replace(":", "_")) + // Names are used for DCs and those are seperated by , + .map(s -> s.replace(",", "_")) + .assuming(s -> !s.trim().isEmpty()); + int numNames = SourceDSL.integers().between(1, 10).generate(rnd); + for (int i = 0; i < numNames; i++) + { + while (!names.add(nameGen.generate(rnd))) + { + } + } + List sortedNames = new ArrayList<>(names); + sortedNames.sort(Comparator.naturalOrder()); + List dcs = new ArrayList<>(names.size()); + boolean auto = SourceDSL.booleans().all().generate(rnd); + if (auto) + { + dcs.addAll(sortedNames); + } + else + { + for (String name : sortedNames) + { + int weight = SourceDSL.integers().between(0, 10).generate(rnd); + dcs.add(name + ":" + weight); + } + } + // str: dcFormat(,dcFormat)* + // dcFormat: name | weight + // weight: int: >= 0 + // note: can't mix auto and user defined weight; need one or the other. Names must be unique + map.put(ParameterizedFastPathStrategy.DCS, String.join(",", dcs)); + return ParameterizedFastPathStrategy.fromMap(map); + } + default: + throw new UnsupportedOperationException(kind.name()); + } + }; + return this; + } + public Gen build() { return rnd -> { @@ -788,6 +868,10 @@ public Gen build() params.compaction(compactionParamsGen.generate(rnd)); if (compressionParamsGen != null) params.compression(compressionParamsGen.generate(rnd)); + if (transactionalMode != null) + params.transactionalMode(transactionalMode.generate(rnd)); + if (fastPathStrategy != null) + params.fastPath(fastPathStrategy.generate(rnd)); return params.build(); }; } @@ -864,6 +948,18 @@ public TableMetadataBuilder withUseCounter(Gen useCounter) return this; } + public TableMetadataBuilder withTransactionalMode(Gen transactionalMode) + { + paramsBuilder.withTransactionalMode(transactionalMode); + return this; + } + + public TableMetadataBuilder withTransactionalMode(TransactionalMode transactionalMode) + { + paramsBuilder.withTransactionalMode(transactionalMode); + return this; + } + public TableMetadataBuilder withKnownMemtables() { paramsBuilder.withKnownMemtables(); @@ -1081,6 +1177,11 @@ public TableMetadata build(RandomnessSource rnd) } } + public static Gen columnMetadataGen() + { + return columnMetadataGen(SourceDSL.arbitrary().enumValues(ColumnMetadata.Kind.class), AbstractTypeGenerators.typeGen()); + } + public static Gen columnMetadataGen(Gen kindGen, Gen> typeGen) { Gen ksNameGen = CassandraGenerators.KEYSPACE_NAME_GEN; @@ -1304,16 +1405,6 @@ public static Gen localPartitioner() return AbstractTypeGenerators.safeTypeGen().map(LocalPartitioner::new); } - public static Gen localCompositePrefixPartitioner() - { - return AbstractTypeGenerators.safeTypeGen().map(type -> { - if (type instanceof CompositeType) - return new LocalCompositePrefixPartitioner((CompositeType) type); - else - return new LocalCompositePrefixPartitioner(type); - }); - } - public static Gen localPartitionerToken() { var lpGen = localPartitioner(); @@ -1324,16 +1415,6 @@ public static Gen localPartitionerToken() }; } - public static Gen localCompositePrefixPartitionerToken() - { - var lpGen = localCompositePrefixPartitioner(); - return rs -> { - var lp = lpGen.generate(rs); - var bytes = AbstractTypeGenerators.getTypeSupport(lp.getTokenValidator()).bytesGen(); - return lp.getToken(bytes.generate(rs)); - }; - } - public static Gen reversedLongLocalToken() { Constraint range = Constraint.between(0, Long.MAX_VALUE); @@ -1369,8 +1450,7 @@ private enum SupportedPartitioners ByteOrdered(ByteOrderedPartitioner.class, ignore -> ByteOrderedPartitioner.instance), Random(RandomPartitioner.class, ignore -> RandomPartitioner.instance), Local(LocalPartitioner.class, localPartitioner()), - OrderPreserving(OrderPreservingPartitioner.class, ignore -> OrderPreservingPartitioner.instance), - LocalCompositePrefix(LocalCompositePrefixPartitioner.class, localCompositePrefixPartitioner()); + OrderPreserving(OrderPreservingPartitioner.class, ignore -> OrderPreservingPartitioner.instance); private final Class clazz; private final Gen partitioner; @@ -1410,8 +1490,7 @@ public static Gen partitioners() public static Gen nonLocalPartitioners() { return SourceDSL.arbitrary().enumValues(SupportedPartitioners.class) - .assuming(p -> p != SupportedPartitioners.Local && - p != SupportedPartitioners.LocalCompositePrefix) + .assuming(p -> p != SupportedPartitioners.Local) .flatMap(SupportedPartitioners::partitioner); } @@ -1443,7 +1522,6 @@ public static Gen token(IPartitioner partitioner) if (partitioner instanceof Murmur3Partitioner) return murmurToken(); if (partitioner instanceof ByteOrderedPartitioner) return byteOrderToken(); if (partitioner instanceof RandomPartitioner) return randomPartitionerToken(); - if (partitioner instanceof LocalCompositePrefixPartitioner) return localCompositePrefixPartitionerToken(); if (partitioner instanceof LocalPartitioner) return localPartitionerToken((LocalPartitioner) partitioner); if (partitioner instanceof OrderPreservingPartitioner) return orderPreservingToken(); throw new UnsupportedOperationException("Unsupported partitioner: " + partitioner.getClass()); @@ -1757,4 +1835,62 @@ public static Gen epochs() return Epoch.create(SourceDSL.longs().between(2, Long.MAX_VALUE).generate(rnd)); }; } + + public static Gen accordNodeId() + { + return SourceDSL.integers().between(0, Integer.MAX_VALUE).map(Node.Id::new); + } + + public static Gen accordStaleReplicas() + { + Gen> staleIdsGen = Generators.set(accordNodeId(), SourceDSL.integers().between(0, 10)); + Gen epochGen = epochs(); + return rnd -> new AccordStaleReplicas(staleIdsGen.generate(rnd), epochGen.generate(rnd)); + } + + public static Gen accordFastPath() + { + Gen> nodesGen = Generators.uniqueList(accordNodeId(), SourceDSL.integers().between(0, 10)); + Gen statusGen = SourceDSL.arbitrary().enumValues(AccordFastPath.Status.class); + Gen updateTimeMillis = TIMESTAMP_NANOS.map(TimeUnit.NANOSECONDS::toMillis); + Gen updateDelayMillis = SourceDSL.longs().between(0, TimeUnit.HOURS.toMillis(2)); + return rnd -> { + AccordFastPath accum = AccordFastPath.EMPTY; + for (Node.Id node : nodesGen.generate(rnd)) + { + AccordFastPath.Status status = statusGen.generate(rnd); + // can't add a NORMAL node that doesn't exist, it must be ab-NORMAL first... + if (status == AccordFastPath.Status.NORMAL) + accum = accum.withNodeStatusSince(node, AccordFastPath.Status.UNAVAILABLE, 0, 0); + accum = accum.withNodeStatusSince(node, status, updateTimeMillis.generate(rnd), updateDelayMillis.generate(rnd)); + } + return accum; + }; + } + + public static class ClusterMetadataBuilder + { + private Gen epochGen = epochs(); + private Gen partitionerGen = nonLocalPartitioners(); + private Gen accordStaleReplicasGen = accordStaleReplicas(); + private Gen accordFastPathGen = accordFastPath(); + public Gen build() + { + return rnd -> { + Epoch epoch = epochGen.generate(rnd); + IPartitioner partitioner = partitionerGen.generate(rnd); + Directory directory = Directory.EMPTY; + DistributedSchema schema = DistributedSchema.first(directory.knownDatacenters()); + TokenMap tokenMap = new TokenMap(partitioner); + DataPlacements placements = DataPlacements.EMPTY; + AccordFastPath accordFastPath = accordFastPathGen.generate(rnd); + LockedRanges lockedRanges = LockedRanges.EMPTY; + InProgressSequences inProgressSequences = InProgressSequences.EMPTY; + ConsensusMigrationState consensusMigrationState = ConsensusMigrationState.EMPTY; + Map, ExtensionValue> extensions = ImmutableMap.of(); + AccordStaleReplicas accordStaleReplicas = accordStaleReplicasGen.generate(rnd); + return new ClusterMetadata(epoch, partitioner, schema, directory, tokenMap, placements, accordFastPath, lockedRanges, inProgressSequences, consensusMigrationState, extensions, accordStaleReplicas); + }; + } + } } diff --git a/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java b/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java index 0e2c60a34da1..1c1f0eead180 100644 --- a/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java +++ b/test/unit/org/apache/cassandra/utils/CassandraGeneratorsTest.java @@ -18,20 +18,36 @@ package org.apache.cassandra.utils; -import org.junit.Test; +import java.util.Arrays; +import java.util.List; -import accord.utilsfork.Gens; import org.assertj.core.api.Assertions; +import org.junit.Test; + +import accord.utils.Gens; +import accord.utils.LazyToString; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.CounterColumnType; +import org.apache.cassandra.db.marshal.DecimalType; +import org.apache.cassandra.db.marshal.DurationType; +import org.apache.cassandra.db.marshal.EmptyType; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.utils.CassandraGenerators.TableMetadataBuilder; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.apache.cassandra.utils.Generators.toGen; public class CassandraGeneratorsTest { + private static final List> NOT_ALLOWED_IN_PRIMARY_KEY = Arrays.asList(EmptyType.instance, + DurationType.instance, + DecimalType.instance, + CounterColumnType.instance); + @Test public void partitionerToToken() { - qt().forAll(Gens.random(), toGen(CassandraGenerators.partitioners())) + qt().forAll(Gens.random(), toGen(CassandraGenerators.partitioners().map(CassandraGenerators::simplify))) .check((rs, p) -> Assertions.assertThat(toGen(CassandraGenerators.token(p)).next(rs)).isNotNull()); } @@ -41,4 +57,20 @@ public void partitionerKeys() qt().forAll(Gens.random(), toGen(CassandraGenerators.partitioners())) .check((rs, p) -> Assertions.assertThat(toGen(CassandraGenerators.decoratedKeys(i -> p)).next(rs)).isNotNull()); } + + @Test + public void primaryKeysNoUnsafeTypes() + { + qt().forAll(toGen(new TableMetadataBuilder().build())).check(table -> { + for (ColumnMetadata pk : table.primaryKeyColumns()) + { + for (AbstractType t : NOT_ALLOWED_IN_PRIMARY_KEY) + { + Assertions.assertThat(AbstractTypeGenerators.contains(pk.type, t)) + .describedAs("Expected type %s not to be found in %s", t.asCQL3Type(), new LazyToString(() -> AbstractTypeGenerators.typeTree(pk.type))) + .isFalse(); + } + } + }); + } } diff --git a/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java b/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java index dcc54f5157f3..c103a8dc6d8b 100644 --- a/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java +++ b/test/unit/org/apache/cassandra/utils/ConfigGenBuilder.java @@ -25,9 +25,9 @@ import com.google.common.collect.ImmutableMap; -import accord.utilsfork.Gen; -import accord.utilsfork.Gens; -import accord.utilsfork.RandomSource; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.dht.IPartitioner; diff --git a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java index 7b1091d4c507..7bf1d12fd8ca 100644 --- a/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java +++ b/test/unit/org/apache/cassandra/utils/ConfigGenBuilderTest.java @@ -23,7 +23,7 @@ import com.google.common.jimfs.Jimfs; import org.junit.Test; -import accord.utilsfork.Gen; +import accord.utils.Gen; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.ParameterizedClass; @@ -31,7 +31,7 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.SimpleSeedProvider; -import static accord.utilsfork.Property.qt; +import static accord.utils.Property.qt; import static org.apache.cassandra.config.CassandraRelevantProperties.STORAGE_DIR; public class ConfigGenBuilderTest diff --git a/test/unit/org/apache/cassandra/utils/Generators.java b/test/unit/org/apache/cassandra/utils/Generators.java index 2acedf174bd2..6bb7f56a8d5c 100644 --- a/test/unit/org/apache/cassandra/utils/Generators.java +++ b/test/unit/org/apache/cassandra/utils/Generators.java @@ -42,8 +42,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.utilsfork.DefaultRandom; -import accord.utilsfork.RandomSource; +import accord.utils.DefaultRandom; +import accord.utils.RandomSource; import org.apache.cassandra.cql3.ReservedKeywords; import org.quicktheories.core.Gen; import org.quicktheories.core.RandomnessSource; @@ -613,7 +613,7 @@ public static Gen> forwardRanges(int min, int max) .map(end -> Range.closed(start, end))); } - public static accord.utilsfork.Gen toGen(org.quicktheories.core.Gen qt) + public static accord.utils.Gen toGen(org.quicktheories.core.Gen qt) { return rs -> { JavaRandom r = new JavaRandom(rs.asJdkRandom()); @@ -621,7 +621,7 @@ public static accord.utilsfork.Gen toGen(org.quicktheories.core.Gen qt }; } - public static org.quicktheories.core.Gen fromGen(accord.utilsfork.Gen accord) + public static org.quicktheories.core.Gen fromGen(accord.utils.Gen accord) { return rnd -> { RandomSource rs = new DefaultRandom(rnd.next(Constraint.none())); diff --git a/test/unit/org/apache/cassandra/utils/SimulatedMiniCluster.java b/test/unit/org/apache/cassandra/utils/SimulatedMiniCluster.java new file mode 100644 index 000000000000..c60a6330691c --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/SimulatedMiniCluster.java @@ -0,0 +1,620 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; +import javax.annotation.Nullable; + +import com.google.common.collect.Iterables; + +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.Invariants; +import accord.utils.RandomSource; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.concurrent.SequentialExecutorPlus; +import org.apache.cassandra.concurrent.SimulatedExecutorFactory; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.compaction.ICompactionManager; +import org.apache.cassandra.db.repair.CassandraTableRepairManager; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.gms.ApplicationState; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.HeartBeatState; +import org.apache.cassandra.gms.IEndpointStateChangeSubscriber; +import org.apache.cassandra.gms.IFailureDetector; +import org.apache.cassandra.gms.IGossiper; +import org.apache.cassandra.gms.VersionedValue; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Locator; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.MessageDelivery; +import org.apache.cassandra.net.SimulatedMessageDelivery; +import org.apache.cassandra.net.SimulatedMessageDelivery.ActionSupplier; +import org.apache.cassandra.repair.IValidationManager; +import org.apache.cassandra.repair.SharedContext; +import org.apache.cassandra.repair.StreamExecutor; +import org.apache.cassandra.repair.TableRepairManager; +import org.apache.cassandra.repair.ValidationManager; +import org.apache.cassandra.service.ActiveRepairService; +import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; +import org.apache.cassandra.streaming.StreamEventHandler; +import org.apache.cassandra.streaming.StreamState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.MultiStepOperation; +import org.apache.cassandra.tcm.StubClusterMetadataService; +import org.apache.cassandra.tcm.Transformation; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.UniformRangePlacement; +import org.apache.cassandra.tcm.transformations.PrepareJoin; +import org.mockito.Mockito; + +import static org.apache.cassandra.utils.AccordGenerators.fromQT; + +public class SimulatedMiniCluster +{ + private final RandomSource rs; + private final Function> verbHandlerFactory; + private final SimulatedExecutorFactory executorFactory; + private final SequentialExecutorPlus orderedExecutor; + private final ScheduledExecutorPlus unorderedScheduled; + private final IFailureDetector failureDetector = Mockito.mock(IFailureDetector.class); + private final Locator locator = Mockito.mock(Locator.class); + private final MBeanWrapper mbean = Mockito.mock(MBeanWrapper.class); + private final SimulatedGossip gossiper = new SimulatedGossip(); + private final List failures = new ArrayList<>(); + private final IPartitioner partitioner; + private final Map> dcsToRacks; + private final List dcs; + private final int tokensPerInstance; + private final Gen tokenGen; + private ClusterMetadata current; + private final Map nodes = new LinkedHashMap<>(); + private final TreeSet knownTokens = new TreeSet<>(); // includes bootstraping nodes tokens (aka tokens not in the ring) + + private SimulatedMiniCluster(Builder builder) + { + this.rs = builder.rs; + this.verbHandlerFactory = builder.verbHandlerFactory; + this.executorFactory = new SimulatedExecutorFactory(rs, failures::add); + this.orderedExecutor = executorFactory.configureSequential("ignore").build(); + this.unorderedScheduled = executorFactory.scheduled("ignored"); + this.partitioner = fromQT(CassandraGenerators.nonLocalPartitioners()).next(rs); + this.dcsToRacks = createDcRackDetails(rs); + this.dcs = new ArrayList<>(dcsToRacks.keySet()); + dcs.sort(Comparator.naturalOrder()); + this.tokensPerInstance = rs.nextBoolean() ? 1 : 4; + this.tokenGen = fromQT(CassandraGenerators.token(partitioner)).filter(t -> !knownTokens.contains(t)); + // setup Directory with known dcs + this.current = new ClusterMetadata(partitioner); + ClusterMetadataService.unsetInstance(); + ClusterMetadataService.setInstance(StubClusterMetadataService.forTesting(current)); + } + + public Node node(int id) + { + return node(new NodeId(id)); + } + + public Node node(NodeId id) + { + Node node = nodes.get(id); + if (node == null) + throw new AssertionError("Unable to find node for id " + id); + return node; + } + + public Node node(InetAddressAndPort address) + { + //TODO (performance): don't walk, keep index? + for (Node node : nodes.values()) + { + if (node.broadcastAddressAndPort.equals(address)) + return node; + } + throw new AssertionError("Unable to find node for address " + address); + } + + private Collection nextUnknownTokens() + { + if (tokensPerInstance == 1) return Collections.singleton(tokenGen.next(rs)); + return Gens.lists(tokenGen).unique().ofSize(tokensPerInstance).next(rs); + } + + public Node createNode() + { + if (nodes.isEmpty()) + return createFirstNode(); + + NodeId id = new NodeId(nodes.size() + 1); + UUID hostId = id.toUUID(); + Collection tokens = nextUnknownTokens(); + String dc = rs.pick(dcs); + String rack = rs.pick(dcsToRacks.get(dc)); + Node node = new Node(id, hostId, address(id), tokens, dc, rack); + register(node); + return node; + } + + public Node createNodeAndJoin() + { + if (nodes.isEmpty()) + return createFirstNode(); + + NodeId id = new NodeId(nodes.size() + 1); + UUID hostId = id.toUUID(); + Collection tokens = nextUnknownTokens(); + String dc = rs.pick(dcs); + String rack = rs.pick(dcsToRacks.get(dc)); + Node node = new Node(id, hostId, address(id), tokens, dc, rack); + registerAndJoin(node); + return node; + } + + private Node createFirstNode() + { + NodeId id = new NodeId(nodes.size() + 1); + UUID hostId = id.toUUID(); + Collection tokens = nextUnknownTokens(); + String dc = dcs.get(0); + String rack = dcsToRacks.get(dc).get(0); + Node node = new Node(id, hostId, address(id), tokens, dc, rack); + registerAndJoin(node); + return node; + } + + private void registerAndJoin(Node node) + { + register(node); + prepareJoin(node.id); + while (!current.inProgressSequences.isEmpty()) + bumpInProgress(); + } + + private void register(Node node) + { + nodes.put(node.id, node); + knownTokens.addAll(node.tokens); + registerWithSnitch(node); + registerWithGossip(node); + registerWithCMS(node); + } + + private void registerWithCMS(Node node) + { + if (node.id.id() == 1) + { + // rebuild metadata from scratch + Directory directory = Directory.EMPTY.with(new NodeAddresses(node.hostId, node.broadcastAddressAndPort, node.broadcastAddressAndPort, node.broadcastAddressAndPort), new Location(node.dc, node.rack)); + notifyMetadataChange(new ClusterMetadata(partitioner, directory)); + } + else + { + notifyMetadataChange(current.transformer().register(new NodeAddresses(node.hostId, node.broadcastAddressAndPort, node.broadcastAddressAndPort, node.broadcastAddressAndPort), + new Location(node.dc, node.rack), + NodeVersion.CURRENT) + .build().metadata); + } + } + + private void prepareJoin(NodeId id) + { + Node node = nodes.get(id); + if (node == null) + throw new IllegalArgumentException("Unknown " + id); + PrepareJoin task = new PrepareJoin(id, new HashSet<>(node.tokens), new UniformRangePlacement(), true, false); + notifyMetadataChange(process(task).metadata); + } + + private void bumpInProgress() + { + if (current.inProgressSequences.isEmpty()) + throw new IllegalStateException("Attempted to bump epoch when nothing was pending"); + Iterator> it = current.inProgressSequences.iterator(); + Invariants.checkState(it.hasNext()); + notifyMetadataChange(process(it.next()).metadata); + } + + protected void notifyMetadataChange(ClusterMetadata current) + { + this.current = current; + ((StubClusterMetadataService) ClusterMetadataService.instance()).setMetadata(current); + } + + private Transformation.Success process(Transformation transformation) + { + Transformation.Result result = transformation.execute(current); + if (result.isRejected()) + throw new IllegalStateException("Unable to make TCM transition"); + return result.success(); + } + + private Transformation.Success process(MultiStepOperation transformation) + { + Transformation.Result result = transformation.applyTo(current); + if (result.isRejected()) + throw new IllegalStateException("Unable to make TCM transition"); + return result.success(); + } + + private static InetAddressAndPort address(NodeId id) + { + try + { + return InetAddressAndPort.getByAddress(ByteArrayUtil.bytes(id.id())); + } + catch (UnknownHostException e) + { + throw new AssertionError("Unable to create address for id " + id, e); + } + } + + private static Map> createDcRackDetails(RandomSource rs) + { + int numDCs = rs.nextInt(1, 4); + Map> map = new LinkedHashMap<>(); + for (int i = 0; i < numDCs; i++) + { + String name = "DC" + (i + 1); + int numRacks = rs.nextInt(1, 10); + List racks = Gens.lists(Gens.strings().ascii().ofLength(5).map(s -> "R" + s)).unique().ofSize(numRacks).next(rs); + racks.sort(Comparator.naturalOrder()); + map.put(name, racks); + } + return map; + } + + public boolean hasWork() + { + return executorFactory.hasWork(); + } + + public boolean processAny() + { + return executorFactory.processAny(); + } + + public boolean processOne() + { + return executorFactory.processOne(); + } + + public void processAll() + { + executorFactory.processAll();; + } + + public void simulateStages(Stage... stages) + { + for (Stage stage : stages) + { + switch (stage) + { + case GOSSIP: + case ANTI_ENTROPY: + case MIGRATION: + case MISC: + case TRACING: + case FETCH_LOG: + stage.unsafeSetExecutor(orderedExecutor); + break; + default: + stage.unsafeSetExecutor(unorderedScheduled); + } + } + } + + private void registerWithSnitch(Node node) + { + Mockito.when(locator.location(Mockito.eq(node.broadcastAddressAndPort))).thenReturn(new Location(node.dc, node.rack)); + } + + private void registerWithGossip(Node node) + { + VersionedValue.VersionedValueFactory valueFactory = node.valueFactory; + EndpointState state = new EndpointState(new HeartBeatState(42, 42)); + state.addApplicationState(ApplicationState.STATUS, valueFactory.normal(node.tokens)); + state.addApplicationState(ApplicationState.STATUS_WITH_PORT, valueFactory.normal(node.tokens)); + state.addApplicationState(ApplicationState.HOST_ID, valueFactory.hostId(node.hostId)); + state.addApplicationState(ApplicationState.TOKENS, valueFactory.tokens(node.tokens)); + state.addApplicationState(ApplicationState.DC, valueFactory.datacenter(node.dc)); + state.addApplicationState(ApplicationState.RACK, valueFactory.rack(node.rack)); + state.addApplicationState(ApplicationState.RELEASE_VERSION, valueFactory.releaseVersion()); + + gossiper.endpoints.put(node.broadcastAddressAndPort, state); + } + + public static class Builder + { + private final RandomSource rs; + private final Function> verbHandlerFactory; + + public Builder(RandomSource rs, Function> verbHandlerFactory) + { + this.rs = rs; + this.verbHandlerFactory = verbHandlerFactory; + } + + public SimulatedMiniCluster build() + { + return new SimulatedMiniCluster(this); + } + } + + private enum NodeStatus { Init, Registered, Joining, Joined, Leaving, Removed} + + public class Node implements SharedContext + { + private final ICompactionManager compactionManager = Mockito.mock(ICompactionManager.class); + private final NodeId id; + private final UUID hostId; + private final InetAddressAndPort broadcastAddressAndPort; + private final Collection tokens; + private final String dc, rack; + private final VersionedValue.VersionedValueFactory valueFactory; + private final SimulatedMessageDelivery messaging; + private final SimulatedMessageDelivery.SimulatedMessageReceiver receiver; + private final ActiveRepairService activeRepairService; + private final PaxosRepairState paxosRepairState; + private final IValidationManager validationManager; + private final StreamExecutor streamExecutor; + private NodeStatus status = NodeStatus.Init; + private ActionSupplier messagingActions = (self, msg, to) -> SimulatedMessageDelivery.Action.DELIVER; + + public Node(NodeId id, UUID hostId, InetAddressAndPort broadcastAddressAndPort, Collection tokens, String dc, String rack) + { + this.id = id; + this.hostId = hostId; + this.broadcastAddressAndPort = broadcastAddressAndPort; + this.tokens = tokens; + this.dc = dc; + this.rack = rack; + + IPartitioner partitioner = Iterables.getFirst(tokens, null).getPartitioner(); + this.valueFactory = new VersionedValue.VersionedValueFactory(partitioner); + this.messaging = new SimulatedMessageDelivery(broadcastAddressAndPort, + messagingActions::get, + SimulatedMessageDelivery.randomDelay(rs), + (to, msg) -> unorderedScheduled.submit(() -> node(to).receiver.recieve(msg)), + (action, to, msg) -> {}, + unorderedScheduled::schedule, + failures::add); + this.activeRepairService = new ActiveRepairService(this); + this.paxosRepairState = new PaxosRepairState(this); + this.validationManager = (cfs, validator) -> unorderedScheduled.submit(() -> { + try + { + ValidationManager.doValidation(cfs, validator); + } + catch (Throwable e) + { + validator.fail(e); + } + }); + this.streamExecutor = plan -> { + long delayNanos = rs.nextLong(TimeUnit.SECONDS.toNanos(5), TimeUnit.MINUTES.toNanos(10)); + unorderedScheduled.schedule(() -> { + StreamState success = new StreamState(plan.planId(), plan.streamOperation(), Collections.emptySet()); + for (StreamEventHandler handler : plan.handlers()) + handler.onSuccess(success); + }, delayNanos, TimeUnit.NANOSECONDS); + return null; + }; + + // setup last as "this" is leaking, so make sure all final fields are defined first + this.receiver = messaging.receiver(verbHandlerFactory.apply(this)); + } + + public NodeId id() + { + return id; + } + + public UUID hostId() + { + return hostId; + } + + public void messagingActions(ActionSupplier messagingActions) + { + this.messagingActions = Objects.requireNonNull(messagingActions); + } + + @Override + public InetAddressAndPort broadcastAddressAndPort() + { + return broadcastAddressAndPort; + } + + @Override + public Supplier random() + { + return () -> rs.fork().asJdkRandom(); + } + + @Override + public Clock clock() + { + return executorFactory; + } + + @Override + public ExecutorFactory executorFactory() + { + return executorFactory; + } + + @Override + public MBeanWrapper mbean() + { + return mbean; + } + + @Override + public ScheduledExecutorPlus optionalTasks() + { + return unorderedScheduled; + } + + @Override + public ScheduledExecutorPlus nonPeriodicTasks() + { + return unorderedScheduled; + } + + @Override + public ScheduledExecutorPlus scheduledTasks() + { + return unorderedScheduled; + } + + + @Override + public IFailureDetector failureDetector() + { + return failureDetector; + } + + @Override + public Locator locator() + { + return locator; + } + + @Override + public IGossiper gossiper() + { + return gossiper; + } + + @Override + public MessageDelivery messaging() + { + return messaging; + } + + @Override + public ActiveRepairService repair() + { + return activeRepairService; + } + + @Override + public PaxosRepairState paxosRepairState() + { + return paxosRepairState; + } + + @Override + public ICompactionManager compactionManager() + { + return compactionManager; + } + + @Override + public IValidationManager validationManager() + { + return validationManager; + } + + @Override + public TableRepairManager repairManager(ColumnFamilyStore store) + { + return new CassandraTableRepairManager(store, this) + { + @Override + public void snapshot(String name, Collection> ranges, boolean force) + { + // no-op + } + }; + } + + @Override + public StreamExecutor streamExecutor() + { + return streamExecutor; + } + } + + private class SimulatedGossip implements IGossiper + { + private final Map endpoints = new HashMap<>(); + + @Override + public void register(IEndpointStateChangeSubscriber subscriber) + { + + } + + @Override + public void unregister(IEndpointStateChangeSubscriber subscriber) + { + + } + + @Nullable + @Override + public EndpointState getEndpointStateForEndpoint(InetAddressAndPort ep) + { + return endpoints.get(ep); + } + + @Override + public void notifyFailureDetector(Map remoteEpStateMap) + { + + } + + @Override + public void applyStateLocally(Map epStateMap) + { + // If we were testing paxos this would be wrong... + // CASSANDRA-18917 added support for simulating Gossip, but gossip issues were found so couldn't merge that patch... + // For the paxos repair, since we don't care about paxos messages, this is ok to no-op for now, but if paxos cleanup + // ever was to be tested this logic would need to be implemented + } + } +} From 3ef74f594847a0417ff12d1b136b965c6d19e191 Mon Sep 17 00:00:00 2001 From: Aleksey Yeschenko Date: Sat, 28 Sep 2024 19:25:23 +0100 Subject: [PATCH 150/340] Change MaxConflicts to use a BTree under the hood patch by Aleksey Yeschenko; reviewed by Benedict Elliott Smith for CASSANDRA-19952 --- modules/accord | 2 +- .../cassandra/service/accord/api/AccordAgent.java | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/accord b/modules/accord index 4844e64945b7..593e042535d6 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 4844e64945b720c802dce11d811e25665f9da826 +Subproject commit 593e042535d60e773cfa5f7c4b6a63e2fb6e5b30 diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 23a35208535e..11d7dd01cfc3 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -161,6 +161,19 @@ public int cfkPruneInterval() return 32; } + // TODO (expected): we probably want additional configuration here + @Override + public long maxConflictsHlcPruneDelta() + { + return SECONDS.toMicros(1); + } + + @Override + public long maxConflictsPruneInterval() + { + return 100; + } + /** * Create an empty transaction that Accord can use for its internal transactions. This is not suitable * for tests since it skips validation done by regular transactions. From 1c7c311a2d5196a00d5ddfe267c4b44e5d92b082 Mon Sep 17 00:00:00 2001 From: Blake Eggleston Date: Sat, 28 Sep 2024 13:30:45 -0700 Subject: [PATCH 151/340] ninja: handle serialization of truncated accept responses --- .../service/accord/serializers/AcceptSerializers.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java index 99c7f820ccf1..11733d3cc234 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -118,6 +118,10 @@ public void serialize(AcceptReply reply, DataOutputPlus out, int version) throws case RejectedBallot: out.writeByte(4); CommandSerializers.ballot.serialize(reply.supersededBy, out, version); + break; + case Truncated: + out.writeByte(5); + break; } } @@ -136,6 +140,8 @@ public AcceptReply deserialize(DataInputPlus in, int version) throws IOException return AcceptReply.REDUNDANT; case 4: return new AcceptReply(CommandSerializers.ballot.deserialize(in, version)); + case 5: + return AcceptReply.TRUNCATED; } } @@ -151,6 +157,7 @@ public long serializedSize(AcceptReply reply, int version) size += DepsSerializer.partialDeps.serializedSize(reply.deps, version); break; case Redundant: + case Truncated: break; case RejectedBallot: size += CommandSerializers.ballot.serializedSize(reply.supersededBy, version); From 1c269348a4522e2b188fa06b8382908aefee5239 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Thu, 12 Sep 2024 13:45:10 +0200 Subject: [PATCH 152/340] Implement Journal replay on startup: * reconstruct CFK, TFK, progressLog * migrate CommandStore collection state from Accord table to the log * make memtable writes non-durable; reconstruct memtable state from Writes Patch by Alex Petrov and Benedict Elliott Smith; reviewed by Benedict Elliott Smith and Alex Petrov for CASSANDRA-19869 --- modules/accord | 2 +- .../org/apache/cassandra/cql3/Relation.java | 1 - .../cassandra/db/ColumnFamilyStore.java | 1 + .../db/compaction/CompactionIterator.java | 63 ++- .../cassandra/db/memtable/Memtable.java | 6 + .../db/memtable/ShardedSkipListMemtable.java | 20 + .../db/memtable/SkipListMemtable.java | 10 + .../db/virtual/AccordVirtualTables.java | 8 +- src/java/org/apache/cassandra/dht/Token.java | 28 +- .../index/accord/RangeMemoryIndex.java | 8 +- .../cassandra/index/accord/RouteIndex.java | 8 +- .../index/accord/RoutesSearcher.java | 6 +- .../io/LocalVersionedSerializer.java | 5 + .../apache/cassandra/journal/Compactor.java | 6 +- .../org/apache/cassandra/journal/Journal.java | 94 +++- .../cassandra/journal/SegmentCompactor.java | 4 +- .../cassandra/journal/ValueSerializer.java | 3 - .../org/apache/cassandra/schema/TableId.java | 2 +- .../cassandra/service/StorageService.java | 4 + .../service/accord/AccordCommandStore.java | 333 ++++++++++--- .../accord/AccordConfigurationService.java | 20 +- .../service/accord/AccordDataStore.java | 138 ++++++ .../accord/AccordFetchCoordinator.java | 2 +- .../service/accord/AccordJournal.java | 461 +++++++---------- .../service/accord/AccordJournalTable.java | 198 +++++++- .../accord/AccordJournalValueSerializers.java | 383 +++++++++++++++ .../service/accord/AccordKeyspace.java | 462 +++++++----------- .../service/accord/AccordObjectSizes.java | 22 +- .../service/accord/AccordSafeCommand.java | 2 +- .../accord/AccordSafeCommandStore.java | 179 ++++--- .../accord/AccordSafeCommandsForKey.java | 10 +- .../accord/AccordSafeTimestampsForKey.java | 12 +- .../accord/AccordSegmentCompactor.java | 77 ++- .../service/accord/AccordService.java | 100 ++-- .../service/accord/AccordStateCache.java | 19 + .../service/accord/AccordTopology.java | 50 +- .../service/accord/AccordVerbHandler.java | 10 +- .../accord/CommandStoreTxnBlockedGraph.java | 12 +- .../service/accord/CommandsForRanges.java | 6 +- .../accord/CommandsForRangesLoader.java | 4 +- .../service/accord/IAccordService.java | 14 +- .../cassandra/service/accord/IJournal.java | 26 +- .../cassandra/service/accord/JournalKey.java | 107 +++- .../service/accord/SavedCommand.java | 196 +++++--- .../service/accord/api/AccordAgent.java | 29 +- .../service/accord/api/AccordRoutingKey.java | 32 ++ .../service/accord/async/AsyncLoader.java | 36 +- .../service/accord/async/AsyncOperation.java | 50 +- .../accord/interop/AccordInteropAdapter.java | 4 +- .../accord/interop/AccordInteropApply.java | 23 +- .../accord/interop/AccordInteropCommit.java | 11 +- .../accord/interop/AccordInteropRead.java | 4 +- .../interop/AccordInteropReadRepair.java | 2 +- .../accord/repair/RepairSyncPointAdapter.java | 18 +- .../accord/serializers/AcceptSerializers.java | 44 +- .../AccordRoutingKeyByteSource.java | 66 ++- .../accord/serializers/ApplySerializers.java | 14 +- .../accord/serializers/AwaitSerializer.java | 7 +- .../BeginInvalidationSerializers.java | 15 +- .../serializers/CalculateDepsSerializers.java | 16 +- .../serializers/CheckStatusSerializers.java | 84 ++-- .../serializers/CommandSerializers.java | 128 +++-- .../serializers/CommandStoreSerializers.java | 51 +- .../serializers/CommandsForKeySerializer.java | 203 +++++--- .../accord/serializers/CommitSerializers.java | 38 +- .../accord/serializers/DepsSerializer.java | 47 +- .../GetEphmrlReadDepsSerializers.java | 16 +- .../GetMaxConflictSerializers.java | 8 +- .../IVersionedWithKeysSerializer.java | 54 +- .../serializers/InformDurableSerializers.java | 2 +- .../accord/serializers/KeySerializers.java | 3 + .../serializers/PreacceptSerializers.java | 18 +- .../serializers/ReadDataSerializers.java | 7 +- .../serializers/RecoverySerializers.java | 24 +- .../serializers/SetDurableSerializers.java | 20 +- .../serializers/TopologySerializers.java | 4 +- .../serializers/WaitingOnSerializer.java | 4 +- .../service/accord/txn/TxnWrite.java | 7 +- .../cassandra/tcm/membership/Directory.java | 5 +- .../apache/cassandra/utils/FBUtilities.java | 16 +- .../utils/btree/AbstractBTreeMap.java | 5 +- .../cassandra/utils/vint/VIntCoding.java | 5 + .../test/accord/AccordBootstrapTest.java | 2 +- .../test/accord/AccordDropTableBase.java | 6 +- .../accord/AccordIncrementalRepairTest.java | 60 ++- .../accord/AccordJournalIntegrationTest.java | 48 +- .../test/accord/AccordTestBase.java | 3 +- .../journal/AccordJournalCompactionTest.java | 137 ------ .../accord/AccordJournalCompactionTest.java | 186 +++++++ .../test/AccordJournalSimulationTest.java | 8 +- .../cql3/conditions/ColumnConditionTest.java | 5 - .../CompactionAccordIteratorsTest.java | 38 +- .../db/virtual/AccordVirtualTablesTest.java | 2 +- .../index/accord/AccordIndexStressTest.java | 13 +- .../index/accord/RouteIndexTest.java | 17 +- .../apache/cassandra/journal/JournalTest.java | 27 - .../cassandra/service/StorageServiceTest.java | 1 + .../accord/AccordCommandStoreTest.java | 16 +- .../service/accord/AccordCommandTest.java | 23 +- .../accord/AccordJournalOrderTest.java | 8 +- .../service/accord/AccordJournalTest.java | 22 +- .../service/accord/AccordKeyspaceTest.java | 37 +- .../accord/AccordSyncPropagatorTest.java | 4 +- .../service/accord/AccordTestUtils.java | 38 +- .../service/accord/CommandsForRangesTest.java | 2 +- .../cassandra/service/accord/MockJournal.java | 149 ++++-- .../service/accord/SavedCommandTest.java | 6 +- ...SimpleSimulatedAccordCommandStoreTest.java | 6 +- .../accord/SimulatedAccordCommandStore.java | 20 +- .../SimulatedAccordCommandStoreTestBase.java | 36 +- .../service/accord/SimulatedDepsTest.java | 31 +- .../accord/SimulatedMultiKeyAndRangeTest.java | 12 +- ...ulatedRandomKeysWithRangeConflictTest.java | 13 +- .../service/accord/async/AsyncLoaderTest.java | 69 ++- .../accord/async/AsyncOperationTest.java | 45 +- .../async/SimulatedAsyncOperationTest.java | 17 +- .../CheckStatusSerializersTest.java | 12 +- .../CommandsForKeySerializerTest.java | 41 +- .../cassandra/utils/AccordGenerators.java | 101 +++- 119 files changed, 3493 insertions(+), 2010 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java delete mode 100644 test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java create mode 100644 test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java diff --git a/modules/accord b/modules/accord index 593e042535d6..2a7aceb96cb1 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 593e042535d60e773cfa5f7c4b6a63e2fb6e5b30 +Subproject commit 2a7aceb96cb1e03bcfe150403b9d245b1d2562f9 diff --git a/src/java/org/apache/cassandra/cql3/Relation.java b/src/java/org/apache/cassandra/cql3/Relation.java index 9fbd3adf68e9..31d6f771ce6d 100644 --- a/src/java/org/apache/cassandra/cql3/Relation.java +++ b/src/java/org/apache/cassandra/cql3/Relation.java @@ -35,7 +35,6 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import static org.apache.cassandra.cql3.statements.RequestValidations.*; -import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; /** * The parsed version of a {@code SimpleRestriction} as outputed by the CQL parser. diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 0fcdb192fd16..e81c0d7be8e6 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -237,6 +237,7 @@ public enum FlushReason SCHEMA_CHANGE, OWNED_RANGES_CHANGE, ACCORD, + ACCORD_TXN_GC, UNIT_TESTS // explicitly requested flush needed for a test } diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 924a960571b9..f9b4b9e8ce06 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -35,9 +35,10 @@ import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; import accord.local.RedundantBefore; -import accord.local.SaveStatus; -import accord.local.Status.Durability; -import accord.primitives.Route; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; +import accord.primitives.Status; +import accord.primitives.Status.Durability; import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.agrona.collections.Int2ObjectHashMap; @@ -85,19 +86,19 @@ import org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.paxos.PaxosRepairHistory; import org.apache.cassandra.service.paxos.uncommitted.PaxosRows; import org.apache.cassandra.utils.TimeUUID; import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; -import static accord.local.Cleanup.shouldCleanup; +import static accord.local.Cleanup.shouldCleanupPartial; import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.config.Config.PaxosStatePurging.legacy; import static org.apache.cassandra.config.DatabaseDescriptor.paxosStatePurging; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.invalidated; -import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.maybeDropTruncatedCommandColumns; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.expungePartial; +import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.saveStatusOnly; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandRows.truncatedApply; import static org.apache.cassandra.service.accord.AccordKeyspace.CommandsForKeysAccessor; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_executed_micros; @@ -105,7 +106,7 @@ import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyColumns.last_write_timestamp; import static org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows.truncateTimestampsForKeyRow; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeDurabilityOrNull; -import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeRouteOrNull; +import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeParticipantsOrNull; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeSaveStatusOrNull; import static org.apache.cassandra.service.accord.AccordKeyspace.deserializeTimestampOrNull; @@ -816,33 +817,31 @@ protected Row applyToRow(Row row) if (redundantBefore == null) return row; - // When commands end up being sliced by compaction we need this to discard tombstones and slices - // without enough information to run the rest of the cleanup logic - if (Cleanup.isSafeToCleanup(durableBefore, txnId, ranges.get(storeId).allAt(txnId.epoch()))) - return null; - Cell durabilityCell = row.getCell(CommandsColumns.durability); Durability durability = deserializeDurabilityOrNull(durabilityCell); Cell executeAtCell = row.getCell(CommandsColumns.execute_at); - Timestamp executeAt = deserializeTimestampOrNull(executeAtCell); - Cell routeCell = row.getCell(CommandsColumns.route); - Route route = deserializeRouteOrNull(routeCell); + Cell participantsCell = row.getCell(CommandsColumns.participants); + StoreParticipants participants = deserializeParticipantsOrNull(participantsCell); Cell statusCell = row.getCell(CommandsColumns.status); SaveStatus saveStatus = deserializeSaveStatusOrNull(statusCell); - // With a sliced row we might not have enough columns to determine what to do so output the - // the row unmodified and we will try again later once it merges with the rest of the command state - // or is dropped by `durableBefore.min(txnId) == Universal` - if (executeAt == null || durability == null || saveStatus == null || route == null) + if (saveStatus == null) return row; - Cleanup cleanup = shouldCleanup(txnId, saveStatus.status, - durability, executeAt, route, - redundantBefore, durableBefore, - false); + if (saveStatus.is(Status.Invalidated)) + return saveStatusOnly(saveStatus, row, nowInSec); + + Cleanup cleanup = shouldCleanupPartial(txnId, saveStatus, durability, participants, + redundantBefore, durableBefore); switch (cleanup) { default: throw new AssertionError(String.format("Unexpected cleanup task: %s", cleanup)); + case EXPUNGE: + return null; + + case EXPUNGE_PARTIAL: + return expungePartial(row, durabilityCell, executeAtCell, participantsCell); + case ERASE: // Emit a tombstone so if this is slicing the command and making it not possible to determine if it // can be truncated later it can still be dropped via the tombstone. @@ -850,24 +849,20 @@ protected Row applyToRow(Row row) // We can still encounter sliced command state just because compaction inputs are random return BTreeRow.emptyDeletedRow(row.clustering(), new Row.Deletion(DeletionTime.build(row.primaryKeyLivenessInfo().timestamp(), nowInSec), false)); + case VESTIGIAL: case INVALIDATE: - return invalidated(cleanup.appliesIfNot, row, nowInSec); + return saveStatusOnly(cleanup.appliesIfNot, row, nowInSec); case TRUNCATE_WITH_OUTCOME: case TRUNCATE: - if (saveStatus.compareTo(cleanup.appliesIfNot) >= 0) - return maybeDropTruncatedCommandColumns(row, durabilityCell, executeAtCell, routeCell, statusCell); - return truncatedApply(cleanup.appliesIfNot, - row, nowInSec, durability, durabilityCell, executeAtCell, routeCell, cleanup == TRUNCATE_WITH_OUTCOME); + return truncatedApply(cleanup.appliesIfNot, row, nowInSec, durability, durabilityCell, executeAtCell, participantsCell, cleanup == TRUNCATE_WITH_OUTCOME); case NO: + // TODO (required): when we port this to journal, make sure to expunge extra fields beyond those we need to retain return row; } } - - - @Override protected Row applyToStatic(Row row) { @@ -880,7 +875,7 @@ class AccordTimestampsForKeyPurger extends AbstractPurger { final Int2ObjectHashMap redundantBefores; int storeId; - PartitionKey partitionKey; + TokenKey partitionKey; AccordTimestampsForKeyPurger(Supplier accordService) { @@ -955,7 +950,7 @@ class AccordCommandsForKeyPurger extends AbstractPurger final CommandsForKeyAccessor accessor; final Int2ObjectHashMap redundantBefores; int storeId; - PartitionKey partitionKey; + TokenKey partitionKey; AccordCommandsForKeyPurger(CommandsForKeyAccessor accessor, Supplier accordService) { diff --git a/src/java/org/apache/cassandra/db/memtable/Memtable.java b/src/java/org/apache/cassandra/db/memtable/Memtable.java index f722ec20a907..b34a617ef4a7 100644 --- a/src/java/org/apache/cassandra/db/memtable/Memtable.java +++ b/src/java/org/apache/cassandra/db/memtable/Memtable.java @@ -30,6 +30,7 @@ import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.EncodingStats; import org.apache.cassandra.db.rows.UnfilteredSource; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.transactions.UpdateTransaction; import org.apache.cassandra.io.sstable.format.SSTableWriter; import org.apache.cassandra.schema.TableMetadata; @@ -441,4 +442,9 @@ public LastCommitLogPosition(CommitLogPosition copy) super(copy.segmentId, copy.position); } } + + default Token lastToken() + { + throw new UnsupportedOperationException("lastToken is not supported"); + } } diff --git a/src/java/org/apache/cassandra/db/memtable/ShardedSkipListMemtable.java b/src/java/org/apache/cassandra/db/memtable/ShardedSkipListMemtable.java index 92cdbbad9fe0..eb4a44ebd228 100644 --- a/src/java/org/apache/cassandra/db/memtable/ShardedSkipListMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/ShardedSkipListMemtable.java @@ -50,6 +50,7 @@ import org.apache.cassandra.dht.Bounds; import org.apache.cassandra.dht.IncludingExcludingBounds; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.transactions.UpdateTransaction; import org.apache.cassandra.io.sstable.SSTableReadsListener; import org.apache.cassandra.schema.TableMetadata; @@ -112,6 +113,25 @@ public boolean isClean() return true; } + @Override + public Token lastToken() + { + Token lastToken = null; + for (MemtableShard shard : shards) + { + Iterator ppIterator = shard.partitions.descendingKeySet().iterator(); + if (ppIterator.hasNext()) + { + Token token = ppIterator.next().getToken(); + if (lastToken == null) + lastToken = token; + else if (lastToken.compareTo(token) < 0) + lastToken = token; + } + } + return lastToken; + } + /** * Should only be called by ColumnFamilyStore.apply via Keyspace.apply, which supplies the appropriate * OpOrdering. diff --git a/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java b/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java index 8871b03bd69f..3f6fbcbd5200 100644 --- a/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java +++ b/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java @@ -50,6 +50,7 @@ import org.apache.cassandra.dht.Bounds; import org.apache.cassandra.dht.IncludingExcludingBounds; import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.index.transactions.UpdateTransaction; import org.apache.cassandra.io.sstable.SSTableReadsListener; import org.apache.cassandra.schema.TableMetadata; @@ -97,6 +98,15 @@ public boolean isClean() return partitions.isEmpty(); } + @Override + public Token lastToken() + { + Iterator iterator = partitions.keySet().iterator(); + if (iterator.hasNext()) + return iterator.next().getToken(); + return null; + } + /** * Should only be called by ColumnFamilyStore.apply via Keyspace.apply, which supplies the appropriate * OpOrdering. diff --git a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java index b75d081d1d47..0918d73a5dab 100644 --- a/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java +++ b/src/java/org/apache/cassandra/db/virtual/AccordVirtualTables.java @@ -37,7 +37,7 @@ import org.slf4j.LoggerFactory; import accord.local.CommandStores; -import accord.local.Status; +import accord.primitives.Status; import accord.primitives.TxnId; import accord.utils.Invariants; import accord.utils.async.AsyncChain; @@ -62,7 +62,7 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.CommandStoreTxnBlockedGraph; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.TableMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; @@ -288,7 +288,7 @@ private static UserType pkType(String keyspace) Arrays.asList(UTF8Type.instance, UTF8Type.instance), false); } - private ByteBuffer pk(PartitionKey pk) + private ByteBuffer pk(TokenKey pk) { TableMetadata tm = Schema.instance.getTableMetadata(pk.table()); return partitionKeyType.pack(UTF8Type.instance.decompose(tm.toString()), @@ -346,7 +346,7 @@ private void process(SimpleDataSet ds, CommandStoreTxnBlockedGraph shard, Set, Serializable { @@ -95,7 +96,7 @@ public void serialize(Token token, DataOutputPlus out) throws IOException out.write(toByteArray(token)); } - public void serialize(Token token, ByteBuffer out) throws IOException + public void serialize(Token token, ByteBuffer out) { out.put(toByteArray(token)); } @@ -202,6 +203,26 @@ public void serialize(Token token, DataOutputPlus out, int version) throws IOExc p.getTokenFactory().serialize(token, out); } + public void serialize(Token token, ByteBuffer out) + { + IPartitioner p = token.getPartitioner(); + if (logPartitioner && serializePartitioners.add(p.getClass())) + logger.debug("Serializing token with partitioner " + p); + if (!p.isFixedLength()) + VIntCoding.writeUnsignedVInt32(p.getTokenFactory().byteSize(token), out); + p.getTokenFactory().serialize(token, out); + } + + public Token deserialize(ByteBuffer in, IPartitioner p) + { + int size = p.isFixedLength() ? p.getMaxTokenSize() : VIntCoding.readUnsignedVInt32(in); + if (logPartitioner && deserializePartitioners.add(p.getClass())) + logger.debug("Deserializing token with partitioner " + p); + byte[] bytes = new byte[size]; + in.get(bytes); + return p.getTokenFactory().fromByteArray(ByteBuffer.wrap(bytes)); + } + public Token deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException { int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); @@ -213,6 +234,11 @@ public Token deserialize(DataInputPlus in, IPartitioner p, int version) throws I } public long serializedSize(Token object, int version) + { + return serializedSize(object); + } + + public long serializedSize(Token object) { IPartitioner p = object.getPartitioner(); int byteSize = p.getTokenFactory().byteSize(object); diff --git a/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java b/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java index 5581708171ba..804f0926434e 100644 --- a/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java +++ b/src/java/org/apache/cassandra/index/accord/RangeMemoryIndex.java @@ -105,7 +105,7 @@ public synchronized long add(DecoratedKey key, Clustering clustering, ByteBuf Route route; try { - route = AccordKeyspace.deserializeRouteOrNull(value); + route = AccordKeyspace.deserializeParticipantsRouteOnlyOrNull(value); } catch (IOException e) { @@ -118,7 +118,7 @@ public synchronized long add(DecoratedKey key, Clustering clustering, ByteBuf public synchronized long add(DecoratedKey key, Route route) { - if (route.domain() != Routable.Domain.Range) + if (route == null || route.domain() != Routable.Domain.Range) return 0; long sum = 0; for (Unseekable keyOrRange : route) @@ -146,7 +146,7 @@ protected long add(DecoratedKey key, Unseekable keyOrRange) return TableId.EMPTY_SIZE + range.unsharedHeapSize(); } - public NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) + public synchronized NavigableSet search(int storeId, TableId tableId, byte[] start, boolean startInclusive, byte[] end, boolean endInclusive) { RangeTree rangesToPks = map.get(new Group(storeId, tableId)); if (rangesToPks == null || rangesToPks.isEmpty()) @@ -172,7 +172,7 @@ public synchronized boolean isEmpty() return map.isEmpty(); } - public Segment write(IndexDescriptor id) throws IOException + public synchronized Segment write(IndexDescriptor id) throws IOException { if (map.isEmpty()) throw new AssertionError("Unable to write empty index"); diff --git a/src/java/org/apache/cassandra/index/accord/RouteIndex.java b/src/java/org/apache/cassandra/index/accord/RouteIndex.java index 8dcaf2067c8a..2f5026a8a364 100644 --- a/src/java/org/apache/cassandra/index/accord/RouteIndex.java +++ b/src/java/org/apache/cassandra/index/accord/RouteIndex.java @@ -117,11 +117,11 @@ public RouteIndex(ColumnFamilyStore baseCfs, IndexMetadata indexMetadata) TableMetadata tableMetadata = baseCfs.metadata(); Pair target = TargetParser.parse(tableMetadata, indexMetadata); - if (!AccordKeyspace.CommandsColumns.route.name.equals(target.left.name)) - throw new IllegalArgumentException("Attempted to index the wrong column; needed " + AccordKeyspace.CommandsColumns.route.name + " but given " + target.left.name); + if (!AccordKeyspace.CommandsColumns.participants.name.equals(target.left.name)) + throw new IllegalArgumentException("Attempted to index the wrong column; needed " + AccordKeyspace.CommandsColumns.participants.name + " but given " + target.left.name); if (target.right != IndexTarget.Type.VALUES) - throw new IllegalArgumentException("Attempted to index " + AccordKeyspace.CommandsColumns.route.name + " with index type " + target.right + "; only " + IndexTarget.Type.VALUES + " is supported"); + throw new IllegalArgumentException("Attempted to index " + AccordKeyspace.CommandsColumns.participants.name + " with index type " + target.right + "; only " + IndexTarget.Type.VALUES + " is supported"); this.baseCfs = baseCfs; this.indexMetadata = indexMetadata; @@ -383,7 +383,7 @@ public Searcher searcherFor(ReadCommand command) Integer storeId = null; for (RowFilter.Expression e : expressions) { - if (e.column() == AccordKeyspace.CommandsColumns.route) + if (e.column() == AccordKeyspace.CommandsColumns.participants) { switch (e.operator()) { diff --git a/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java index 1ade3b8a4a98..7975b95c7b6a 100644 --- a/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java +++ b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java @@ -48,7 +48,7 @@ public class RoutesSearcher { private final ColumnFamilyStore cfs = Keyspace.open("system_accord").getColumnFamilyStore("commands"); private final Index index = cfs.indexManager.getIndexByName("route");; - private final ColumnMetadata route = AccordKeyspace.CommandsColumns.route; + private final ColumnMetadata participants = AccordKeyspace.CommandsColumns.participants; private final ColumnMetadata store_id = AccordKeyspace.CommandsColumns.store_id; private final ColumnMetadata txn_id = AccordKeyspace.CommandsColumns.txn_id; private final ColumnFilter columnFilter = ColumnFilter.selectionBuilder().add(store_id).add(txn_id).build(); @@ -58,8 +58,8 @@ public class RoutesSearcher private CloseableIterator searchKeysAccord(int store, AccordRoutingKey start, AccordRoutingKey end) { RowFilter rowFilter = RowFilter.create(false); - rowFilter.add(route, Operator.GT, OrderedRouteSerializer.serializeRoutingKey(start)); - rowFilter.add(route, Operator.LTE, OrderedRouteSerializer.serializeRoutingKey(end)); + rowFilter.add(participants, Operator.GT, OrderedRouteSerializer.serializeRoutingKey(start)); + rowFilter.add(participants, Operator.LTE, OrderedRouteSerializer.serializeRoutingKey(end)); rowFilter.add(store_id, Operator.EQ, Int32Type.instance.decompose(store)); PartitionRangeReadCommand cmd = PartitionRangeReadCommand.create(cfs.metadata(), diff --git a/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java b/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java index 739f00784675..cc1cdb29532b 100644 --- a/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java +++ b/src/java/org/apache/cassandra/io/LocalVersionedSerializer.java @@ -48,6 +48,11 @@ public LocalVersionedSerializer(V currentVers this.serializer = Objects.requireNonNull(serializer); } + public MessageVersionProvider deserializeVersion(DataInputPlus in) throws IOException + { + return versionSerializer.deserialize(in, currentVersion.messageVersion()); + } + public IVersionedSerializer serializer() { return serializer; diff --git a/src/java/org/apache/cassandra/journal/Compactor.java b/src/java/org/apache/cassandra/journal/Compactor.java index 846dd62ba8f8..a4638266fec9 100644 --- a/src/java/org/apache/cassandra/journal/Compactor.java +++ b/src/java/org/apache/cassandra/journal/Compactor.java @@ -62,7 +62,11 @@ public void run() try { - Collection> newSegments = segmentCompactor.compact(toCompact, journal.keySupport); + Collection> newSegments = segmentCompactor.compact(toCompact); + // No-op compaction + if (newSegments == null) + return; + for (StaticSegment segment : newSegments) toCompact.remove(segment); diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 0baf8e5af323..624250d60583 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -17,12 +17,14 @@ */ package org.apache.cassandra.journal; +import java.io.Closeable; import java.io.IOException; import java.nio.channels.ClosedByInterruptException; import java.nio.file.FileStore; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.PriorityQueue; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -59,7 +61,6 @@ import org.jctools.queues.MpscUnboundedArrayQueue; import static java.lang.String.format; -import static java.util.Comparator.comparing; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; @@ -203,7 +204,8 @@ public boolean isFlushed(RecordPointer recordPointer) public void onFlush(RecordPointer recordPointer, Runnable runnable) { - flusherCallbacks.submit(recordPointer, runnable); + if (isFlushed(recordPointer)) runnable.run(); + else flusherCallbacks.submit(recordPointer, runnable); } public void start() @@ -229,7 +231,7 @@ public void start() } @VisibleForTesting - void runCompactorForTesting() + public void runCompactorForTesting() { compactor.run(); } @@ -254,6 +256,7 @@ public void shutdown() try { allocator.shutdown(); + wakeAllocator(); // Wake allocator to force it into shutdown allocator.awaitTermination(1, TimeUnit.MINUTES); compactor.shutdown(); compactor.awaitTermination(1, TimeUnit.MINUTES); @@ -329,7 +332,6 @@ public void readAll(K id, RecordConsumer consumer) try (ReferencedSegments segments = selectAndReference(id)) { consumer.init(); - for (Segment segment : segments.allSorted()) segment.readAll(id, holder, consumer); } @@ -736,6 +738,17 @@ ReferencedSegment selectAndReference(long segmentTimestamp) } } + @SuppressWarnings("unused") + ReferencedSegments selectAndReference(Predicate> selector) + { + while (true) + { + ReferencedSegments referenced = segments().selectAndReference(selector); + if (null != referenced) + return referenced; + } + } + Segments segments() { return segments.get(); @@ -854,24 +867,6 @@ void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) closer.execute(new CloseActiveSegmentRunnable(activeSegment)); } - /* - * Replay logic - */ - - /** - * Iterate over and invoke the supplied callback on every record, - * with segments iterated in segment timestamp order. Only visits - * finished, on-disk segments. - */ - public void replayStaticSegments(RecordConsumer consumer) - { - List> staticSegments = new ArrayList<>(); - segments().selectStatic(staticSegments); - staticSegments.sort(comparing(s -> s.descriptor)); - for (StaticSegment segment : staticSegments) - segment.forEachRecord(consumer); - } - @VisibleForTesting public void closeCurrentSegmentForTesting() { @@ -954,4 +949,59 @@ public interface Writer { void write(DataOutputPlus out, int userVersion) throws IOException; } + + public StaticSegmentIterator staticSegmentIterator() + { + return new StaticSegmentIterator(); + } + + /** + * Static segment iterator iterates all _static_ segments in _key_ order. + */ + public class StaticSegmentIterator implements Closeable + { + private final PriorityQueue> readers; + private final ReferencedSegments segments; + + private StaticSegmentIterator() + { + this.segments = selectAndReference(Segment::isStatic); + this.readers = new PriorityQueue<>((o1, o2) -> keySupport.compare(o1.key(), o2.key())); + for (Segment segment : this.segments.all()) + { + StaticSegment staticSegment = (StaticSegment)segment; + StaticSegment.KeyOrderReader reader = staticSegment.keyOrderReader(); + if (reader.advance()) + this.readers.add(reader); + } + } + + public K key() + { + StaticSegment.KeyOrderReader reader = readers.peek(); + if (reader == null) + return null; + return reader.key(); + } + + public void readAllForKey(K key, RecordConsumer reader) + { + while (true) + { + StaticSegment.KeyOrderReader next = readers.peek(); + if (next == null || !next.key().equals(key)) + break; + Invariants.checkState(next == readers.poll()); + + reader.accept(next.descriptor.timestamp, next.offset, next.key(), next.record(), next.hosts(), next.descriptor.userVersion); + if (next.advance()) + readers.add(next); + } + } + + public void close() + { + segments.close(); + } + } } diff --git a/src/java/org/apache/cassandra/journal/SegmentCompactor.java b/src/java/org/apache/cassandra/journal/SegmentCompactor.java index 5c95b539fcf3..7b84ea82e12d 100644 --- a/src/java/org/apache/cassandra/journal/SegmentCompactor.java +++ b/src/java/org/apache/cassandra/journal/SegmentCompactor.java @@ -22,7 +22,7 @@ public interface SegmentCompactor { - SegmentCompactor NOOP = (SegmentCompactor) (segments, keySupport) -> segments; + SegmentCompactor NOOP = (SegmentCompactor) (segments) -> segments; static SegmentCompactor noop() { @@ -30,5 +30,5 @@ static SegmentCompactor noop() return (SegmentCompactor) NOOP; } - Collection> compact(Collection> segments, KeySupport keySupport) throws IOException; + Collection> compact(Collection> segments) throws IOException; } diff --git a/src/java/org/apache/cassandra/journal/ValueSerializer.java b/src/java/org/apache/cassandra/journal/ValueSerializer.java index 610770ca66d9..69690d39b28a 100644 --- a/src/java/org/apache/cassandra/journal/ValueSerializer.java +++ b/src/java/org/apache/cassandra/journal/ValueSerializer.java @@ -24,9 +24,6 @@ public interface ValueSerializer { - // TODO (required): this is completely unused in Journal - int serializedSize(K key, V value, int userVersion); - void serialize(K key, V value, DataOutputPlus out, int userVersion) throws IOException; /** diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index def82d1c722a..ac486f71d985 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -190,7 +190,7 @@ public static TableId deserialize(DataInput in) throws IOException return new TableId(new UUID(in.readLong(), in.readLong())); } - public static TableId deserialize(V src, ValueAccessor accessor, int offset) throws IOException + public static TableId deserialize(V src, ValueAccessor accessor, int offset) { return new TableId(new UUID(accessor.getLong(src, offset), accessor.getLong(src, offset + TypeSizes.LONG_SIZE))); } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 60d6c329ee52..98da35158b96 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -168,6 +168,7 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableMetadataRef; import org.apache.cassandra.schema.ViewMetadata; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.service.disk.usage.DiskUsageBroadcaster; @@ -3794,6 +3795,9 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I logger.debug(msg); transientMode = Optional.of(Mode.DRAINING); + if (DatabaseDescriptor.getAccordTransactionsEnabled()) + AccordService.instance().shutdownAndWait(1, MINUTES); + try { /* not clear this is reasonable time, but propagated from prior embedded behaviour */ diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index c1ce231da91e..ca654ddaae3f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; -import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; @@ -32,7 +31,6 @@ import java.util.function.Function; import javax.annotation.Nullable; -import accord.api.Key; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,22 +39,31 @@ import accord.api.DataStore; import accord.api.LocalListeners; import accord.api.ProgressLog; +import accord.api.RoutingKey; import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; +import accord.local.Cleanup; import accord.local.Command; import accord.local.CommandStore; +import accord.local.CommandStores; +import accord.local.Commands; import accord.local.DurableBefore; +import accord.local.KeyHistory; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; +import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.primitives.Keys; +import accord.primitives.Deps; +import accord.primitives.Participants; +import accord.primitives.Range; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.RoutableKey; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; -import accord.utils.ReducingRangeMap; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.cache.CacheSize; @@ -66,12 +73,21 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Mutation; import org.apache.cassandra.metrics.AccordStateCacheMetrics; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.events.CacheEvents; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Promise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static accord.primitives.SaveStatus.Applying; +import static accord.primitives.Status.Committed; +import static accord.primitives.Status.Invalidated; +import static accord.primitives.Status.PreApplied; +import static accord.primitives.Status.Stable; +import static accord.primitives.Status.Truncated; +import static accord.utils.Invariants.checkState; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; public class AccordCommandStore extends CommandStore implements CacheSize @@ -103,8 +119,8 @@ private static long getThreadId(ExecutorService executor) private final ExecutorService executor; private final AccordStateCache stateCache; private final AccordStateCache.Instance commandCache; - private final AccordStateCache.Instance timestampsForKeyCache; - private final AccordStateCache.Instance commandsForKeyCache; + private final AccordStateCache.Instance timestampsForKeyCache; + private final AccordStateCache.Instance commandsForKeyCache; private AsyncOperation currentOperation = null; private AccordSafeCommandStore current = null; private long lastSystemTimestampMicros = Long.MIN_VALUE; @@ -120,7 +136,17 @@ public AccordCommandStore(int id, IJournal journal, AccordStateCacheMetrics cacheMetrics) { - this(id, time, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder, journal, Stage.READ.executor(), Stage.MUTATION.executor(), cacheMetrics); + this(id, + time, + agent, + dataStore, + progressLogFactory, + listenerFactory, + epochUpdateHolder, + journal, + Stage.READ.executor(), + Stage.MUTATION.executor(), + cacheMetrics); } private static void registerJfrListener(int id, AccordStateCache.Instance instance, String name) @@ -219,7 +245,7 @@ public AccordCommandStore(int id, AccordObjectSizes::command); registerJfrListener(id, commandCache, "Command"); timestampsForKeyCache = - stateCache.instance(Key.class, + stateCache.instance(RoutingKey.class, AccordSafeTimestampsForKey.class, AccordSafeTimestampsForKey::new, this::loadTimestampsForKey, @@ -228,7 +254,7 @@ public AccordCommandStore(int id, AccordObjectSizes::timestampsForKey); registerJfrListener(id, timestampsForKeyCache, "TimestampsForKey"); commandsForKeyCache = - stateCache.instance(Key.class, + stateCache.instance(RoutingKey.class, AccordSafeCommandsForKey.class, AccordSafeCommandsForKey::new, this::loadCommandsForKey, @@ -240,20 +266,12 @@ public AccordCommandStore(int id, this.commandsForRangesLoader = new CommandsForRangesLoader(this); - AccordKeyspace.loadCommandStoreMetadata(id, ((rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead) -> { - executor.submit(() -> { - if (rejectBefore != null) - super.setRejectBefore(rejectBefore); - if (durableBefore != null) - super.setDurableBefore(durableBefore); - if (redundantBefore != null) - super.setRedundantBefore(redundantBefore); - if (bootstrapBeganAt != null) - super.setBootstrapBeganAt(bootstrapBeganAt); - if (safeToRead != null) - super.setSafeToRead(safeToRead); - }); - })); + loadRedundantBefore(journal.loadRedundantBefore(id())); + loadDurableBefore(journal.loadDurableBefore(id())); + loadBootstrapBeganAt(journal.loadBootstrapBeganAt(id())); + loadSafeToRead(journal.loadSafeToRead(id())); + loadRangesForEpoch(journal.loadRangesForEpoch(id())); + loadHistoricalTransactions(journal.loadHistoricalTransactions(id())); executor.execute(() -> CommandStore.register(this)); } @@ -269,6 +287,12 @@ public CommandsForRangesLoader diskCommandsForRanges() return commandsForRangesLoader; } + public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ranges ranges) + { + store.snapshot(ranges, globalSyncId); + super.markShardDurable(safeStore, globalSyncId, ranges); + } + @Override public boolean inStore() { @@ -304,14 +328,14 @@ public long weightedSize() public void checkInStoreThread() { - Invariants.checkState(inStore()); + checkState(inStore()); } public void checkNotInStoreThread() { if (!CHECK_THREADS) return; - Invariants.checkState(!inStore()); + checkState(!inStore()); } public ExecutorService executor() @@ -324,12 +348,12 @@ public AccordStateCache.Instance commandCache return commandCache; } - public AccordStateCache.Instance timestampsForKeyCache() + public AccordStateCache.Instance timestampsForKeyCache() { return timestampsForKeyCache; } - public AccordStateCache.Instance commandsForKeyCache() + public AccordStateCache.Instance commandsForKeyCache() { return commandsForKeyCache; } @@ -338,7 +362,7 @@ public AccordStateCache.Instance @VisibleForTesting public Runnable appendToKeyspace(Command before, Command after) { - if (after.keysOrRanges() != null && after.keysOrRanges() instanceof Keys) + if (after.txnId().is(Routable.Domain.Key)) return null; Mutation mutation = AccordKeyspace.getCommandMutation(this.id, before, after, nextSystemTimestampMicros()); @@ -350,13 +374,17 @@ public Runnable appendToKeyspace(Command before, Command after) return null; } + public void persistFieldUpdates(AccordSafeCommandStore.FieldUpdates fieldUpdates, Runnable onFlush) + { + journal.persistStoreState(id, fieldUpdates, onFlush); + } + + @Nullable @VisibleForTesting - public void appendToLog(Command before, Command after, Runnable runnable) + public void appendToLog(Command before, Command after, Runnable onFlush) { - journal.appendCommand(id, - Collections.singletonList(SavedCommand.diffWriter(before, after)), - null, runnable); + journal.appendCommand(id, SavedCommand.diff(before, after), onFlush); } boolean validateCommand(TxnId txnId, Command evicting) @@ -368,23 +396,29 @@ boolean validateCommand(TxnId txnId, Command evicting) return Objects.equals(evicting, reloaded); } + @VisibleForTesting + public void sanityCheckCommand(Command command) + { + ((AccordJournal) journal).sanityCheck(id, command); + } + boolean validateTimestampsForKey(RoutableKey key, TimestampsForKey evicting) { if (!Invariants.isParanoid()) return true; - TimestampsForKey reloaded = AccordKeyspace.unsafeLoadTimestampsForKey(this, (PartitionKey) key); + TimestampsForKey reloaded = AccordKeyspace.unsafeLoadTimestampsForKey(this, (TokenKey) key); return Objects.equals(evicting, reloaded); } TimestampsForKey loadTimestampsForKey(RoutableKey key) { - return AccordKeyspace.loadTimestampsForKey(this, (PartitionKey) key); + return AccordKeyspace.loadTimestampsForKey(this, (TokenKey) key); } CommandsForKey loadCommandsForKey(RoutableKey key) { - return AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); + return AccordKeyspace.loadCommandsForKey(this, (TokenKey) key); } boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) @@ -392,7 +426,7 @@ boolean validateCommandsForKey(RoutableKey key, CommandsForKey evicting) if (!Invariants.isParanoid()) return true; - CommandsForKey reloaded = AccordKeyspace.loadCommandsForKey(this, (PartitionKey) key); + CommandsForKey reloaded = AccordKeyspace.loadCommandsForKey(this, (TokenKey) key); return Objects.equals(evicting, reloaded); } @@ -424,19 +458,19 @@ public void unsafeClearCache() public void setCurrentOperation(AsyncOperation operation) { - Invariants.checkState(currentOperation == null); + checkState(currentOperation == null); currentOperation = operation; } public AsyncOperation getContext() { - Invariants.checkState(currentOperation != null); + checkState(currentOperation != null); return currentOperation; } public void unsetCurrentOperation(AsyncOperation operation) { - Invariants.checkState(currentOperation == operation); + checkState(currentOperation == operation); currentOperation = null; } @@ -496,11 +530,11 @@ public void executeBlocking(Runnable runnable) public AccordSafeCommandStore beginOperation(PreLoadContext preLoadContext, Map commands, - NavigableMap timestampsForKeys, - NavigableMap commandsForKeys, + NavigableMap timestampsForKeys, + NavigableMap commandsForKeys, @Nullable AccordSafeCommandsForRanges commandsForRanges) { - Invariants.checkState(current == null); + checkState(current == null); commands.values().forEach(AccordSafeState::preExecute); commandsForKeys.values().forEach(AccordSafeState::preExecute); timestampsForKeys.values().forEach(AccordSafeState::preExecute); @@ -518,7 +552,7 @@ public boolean hasSafeStore() public void completeOperation(AccordSafeCommandStore store) { - Invariants.checkState(current == store); + checkState(current == store); try { current.postExecute(); @@ -538,55 +572,210 @@ public void abortCurrentOperation() public void shutdown() { executor.shutdown(); + try + { + executor.awaitTermination(20, TimeUnit.SECONDS); + } + catch (InterruptedException t) + { + throw new RuntimeException("Could not shut down command store " + this); + } } - protected void setRejectBefore(ReducingRangeMap newRejectBefore) + public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore) { - super.setRejectBefore(newRejectBefore); - // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases - AccordKeyspace.updateRejectBefore(this, newRejectBefore); + if (deps.isEmpty()) return; + + CommandStores.RangesForEpoch ranges = safeStore.ranges(); + // used in places such as accord.local.CommandStore.fetchMajorityDeps + // We find a set of dependencies for a range then update CommandsFor to know about them + Ranges allRanges = safeStore.ranges().all(); + deps.keyDeps.keys().forEach(allRanges, key -> { + // TODO (now): batch register to minimise GC + deps.keyDeps.forEach(key, (txnId, txnIdx) -> { + // TODO (desired, efficiency): this can be made more efficient by batching by epoch + if (ranges.coordinates(txnId).contains(key)) + return; // already coordinates, no need to replicate + if (!ranges.allBefore(txnId.epoch()).contains(key)) + return; + + safeStore.get(key).registerHistorical(safeStore, txnId); + }); + }); + for (int i = 0; i < deps.rangeDeps.rangeCount(); i++) + { + Range range = deps.rangeDeps.range(i); + if (!allRanges.intersects(range)) + continue; + deps.rangeDeps.forEach(range, txnId -> { + // TODO (desired, efficiency): this can be made more efficient by batching by epoch + if (ranges.coordinates(txnId).intersects(range)) + return; // already coordinates, no need to replicate + if (!ranges.allBefore(txnId.epoch()).intersects(range)) + return; + + diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(range).slice(allRanges), Ranges::with); + }); + } } - protected void setBootstrapBeganAt(NavigableMap newBootstrapBeganAt) + public NavigableMap safeToRead() { return super.safeToRead(); } + + public void appendCommands(List diffs, Runnable onFlush) { - super.setBootstrapBeganAt(newBootstrapBeganAt); - // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases - AccordKeyspace.updateBootstrapBeganAt(this, newBootstrapBeganAt); + for (int i = 0; i < diffs.size(); i++) + { + boolean isLast = i == diffs.size() - 1; + SavedCommand.DiffWriter writer = diffs.get(i); + journal.appendCommand(id, writer, isLast ? onFlush : null); + } } - protected void setSafeToRead(NavigableMap newSafeToRead) + @VisibleForTesting + public Command loadCommand(TxnId txnId) { - super.setSafeToRead(newSafeToRead); - // TODO (required, correctness): rework to persist via journal once available, this can lose updates in some edge cases - AccordKeyspace.updateSafeToRead(this, newSafeToRead); + return journal.loadCommand(id, txnId); } - @Override - public void setDurableBefore(DurableBefore newDurableBefore) + public interface Loader { - super.setDurableBefore(newDurableBefore); - AccordKeyspace.updateDurableBefore(this, newDurableBefore); + Promise load(Command next); + Promise apply(Command next); } - @Override - protected void setRedundantBefore(RedundantBefore newRedundantBefore) + public Loader loader() { - super.setRedundantBefore(newRedundantBefore); - // TODO (required): this needs to be synchronous, or at least needs to take effect before we rely upon it - AccordKeyspace.updateRedundantBefore(this, newRedundantBefore); + return new Loader() + { + private PreLoadContext context(Command command, KeyHistory keyHistory) + { + TxnId txnId = command.txnId(); + Participants keys = null; + List deps = null; + if (CommandsForKey.manages(txnId)) + keys = command.hasBeen(Committed) ? command.participants().hasTouched() : command.participants().touches(); + else if (!CommandsForKey.managesExecution(txnId) && command.hasBeen(Status.Stable) && !command.hasBeen(Status.Truncated)) + keys = command.asCommitted().waitingOn.keys; + + if (command.partialDeps() != null) + deps = command.partialDeps().txnIds(); + + if (keys != null) + { + if (deps != null) + return PreLoadContext.contextFor(txnId, deps, keys, keyHistory); + + return PreLoadContext.contextFor(txnId, keys, keyHistory); + } + + return PreLoadContext.contextFor(txnId); + } + + public Promise load(Command command) + { + TxnId txnId = command.txnId(); + + AsyncPromise future = new AsyncPromise<>(); + execute(context(command, KeyHistory.COMMANDS), + safeStore -> { + Command local = command; + if (local.status() != Truncated && local.status() != Invalidated) + { + Cleanup cleanup = Cleanup.shouldCleanup(AccordCommandStore.this, local, local.participants()); + switch (cleanup) + { + case NO: + break; + case INVALIDATE: + case TRUNCATE_WITH_OUTCOME: + case TRUNCATE: + case ERASE: + local = Commands.purge(local, local.participants(), cleanup); + } + } + + local = safeStore.unsafeGet(txnId).update(safeStore, local); + if (local.status() == Truncated) + safeStore.progressLog().clear(local.txnId()); + }) + .begin((unused, throwable) -> { + if (throwable != null) + future.setFailure(throwable); + else + future.setSuccess(null); + }); + return future; + } + + public Promise apply(Command command) + { + TxnId txnId = command.txnId(); + + AsyncPromise future = new AsyncPromise<>(); + PreLoadContext context = context(command, KeyHistory.TIMESTAMPS); + execute(context, + safeStore -> { + SafeCommand safeCommand = safeStore.unsafeGet(txnId); + Command local = safeCommand.current(); + if (local.is(Stable) || local.is(PreApplied)) + Commands.maybeExecute(safeStore, safeCommand, local, true, true); + else if (local.saveStatus().compareTo(Applying) >= 0 && !local.hasBeen(Truncated)) + Commands.applyWrites(safeStore, context, local).begin(agent); + }) + .begin((unused, throwable) -> { + if (throwable != null) + future.setFailure(throwable); + else + future.setSuccess(null); + }); + return future; + } + }; } - public NavigableMap bootstrapBeganAt() { return super.bootstrapBeganAt(); } - public NavigableMap safeToRead() { return super.safeToRead(); } + /** + * Replay/state reloading + */ - public void appendCommands(List> commands, List sanityCheck, Runnable onFlush) + void loadRedundantBefore(RedundantBefore redundantBefore) { - journal.appendCommand(id, commands, sanityCheck, onFlush); + if (redundantBefore != null) + unsafeSetRedundantBefore(redundantBefore); } - @VisibleForTesting - public Command loadCommand(TxnId txnId) + void loadDurableBefore(DurableBefore durableBefore) { - return journal.loadCommand(id, txnId); + if (durableBefore != null) + unsafeSetDurableBefore(durableBefore); + } + + void loadBootstrapBeganAt(NavigableMap bootstrapBeganAt) + { + if (bootstrapBeganAt != null) + unsafeSetBootstrapBeganAt(bootstrapBeganAt); + } + + void loadSafeToRead(NavigableMap safeToRead) + { + if (safeToRead != null) + unsafeSetSafeToRead(safeToRead); + } + + void loadRangesForEpoch(CommandStores.RangesForEpoch.Snapshot rangesForEpoch) + { + if (rangesForEpoch != null) + unsafeSetRangesForEpoch(new CommandStores.RangesForEpoch(rangesForEpoch.epochs, rangesForEpoch.ranges, this)); + } + + void loadHistoricalTransactions(List deps) + { + if (deps != null) + { + execute(PreLoadContext.empty(), + safeStore -> { + for (Deps dep : deps) + registerHistoricalTransactions(dep, safeStore); + }); + } } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index e0d1d84020ab..09a04140c9e2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -29,6 +29,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.impl.AbstractConfigurationService; import accord.local.Node; import accord.primitives.Ranges; @@ -50,7 +53,10 @@ import org.apache.cassandra.service.accord.AccordKeyspace.EpochDiskState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.listeners.ChangeListener; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Simulate; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; @@ -61,6 +67,8 @@ @Simulate(with=MONITORS) public class AccordConfigurationService extends AbstractConfigurationService implements ChangeListener, AccordEndpointMapper, AccordSyncPropagator.Listener, Shutdownable { + private static final Logger logger = LoggerFactory.getLogger(AccordConfigurationService.class); + private final AccordSyncPropagator syncPropagator; private final DiskStateManager diskStateManager; @@ -393,8 +401,16 @@ public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean @Override protected void fetchTopologyInternal(long epoch) { - // TODO: need a non-blocking way to inform CMS of an unknown epoch -// ClusterMetadataService.instance().maybeCatchup(Epoch.create(epoch)); + ClusterMetadata metadata = ClusterMetadata.current(); + if (metadata.directory.peerIds().size() < 2) + return; // just let CMS handle it when it's ready + + // TODO (desired): randomise + NodeId first = metadata.directory.peerIds().first(); + InetAddressAndPort peer = metadata.directory.getNodeAddresses(first).broadcastAddress; + if (FBUtilities.getBroadcastAddressAndPort().equals(peer)) + peer = metadata.directory.getNodeAddresses(metadata.directory.peerIds().higher(first)).broadcastAddress;; + ClusterMetadataService.instance().fetchLogFromPeerOrCMSAsync(metadata, peer, Epoch.create(epoch)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java index fee2d633a152..185e1068718c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java @@ -18,11 +18,41 @@ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + import accord.api.DataStore; import accord.local.Node; import accord.local.SafeCommandStore; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.SyncPoint; +import accord.primitives.TxnId; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; +import org.agrona.collections.Object2ObjectHashMap; +import org.apache.cassandra.concurrent.ScheduledExecutors; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DataRange; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.commitlog.CommitLogPosition; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.db.memtable.Memtable; +import org.apache.cassandra.db.memtable.TrieMemtable; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.SSTableReadsListener; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.concurrent.FutureCombiner; + +import static accord.utils.Invariants.checkState; +import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.ACCORD_TXN_GC; public class AccordDataStore implements DataStore { @@ -33,4 +63,112 @@ public FetchResult fetch(Node node, SafeCommandStore safeStore, Ranges ranges, S coordinator.start(); return coordinator.result(); } + + static class SnapshotBounds + { + final List> ranges = new ArrayList<>(); + CommitLogPosition position; + } + + @Override + public AsyncResult snapshot(Ranges ranges, TxnId before) // TODO: does this have to go to journal, too? + { + AsyncResults.SettableResult result = new AsyncResults.SettableResult<>(); + // TODO: maintain a list of Accord tables, perhaps in ClusterMetadata? + ClusterMetadata metadata = ClusterMetadata.current(); + Object2ObjectHashMap tables = new Object2ObjectHashMap<>(); + for (Range range : ranges) + { + tables.computeIfAbsent(((TokenRange)range).table(), ignore -> new SnapshotBounds()) + .ranges.add(((TokenRange) range).toKeyspaceRange()); + } + + for (Map.Entry e : tables.entrySet()) + { + TableMetadata tableMetadata = metadata.schema.getTableMetadata(e.getKey()); + if (!tableMetadata.isAccordEnabled()) + continue; + + ColumnFamilyStore cfs = Keyspace.openAndGetStoreIfExists(tableMetadata); + // TODO (required): when we can safely map TxnId.hlc() -> local timestamp, consult Memtable timestamps + e.getValue().position = cfs.getCurrentMemtable().getCommitLogLowerBound(); + } + + ScheduledExecutors.scheduledTasks.schedule(() -> { + List> futures = new ArrayList<>(); + for (Map.Entry e : tables.entrySet()) + { + TableMetadata tableMetadata = metadata.schema.getTableMetadata(e.getKey()); + SnapshotBounds bounds = e.getValue(); + ColumnFamilyStore cfs = Keyspace.openAndGetStoreIfExists(tableMetadata); + View view = cfs.getTracker().getView(); + for (Memtable memtable : view.getAllMemtables()) + { + if (memtable.getCommitLogLowerBound().compareTo(bounds.position) > 0) continue; + if (!intersects(cfs, memtable, bounds.ranges)) continue; + + futures.add(cfs.forceFlush(ACCORD_TXN_GC)); + break; + } + } + + FutureCombiner.allOf(futures).addCallback((objects, throwable) -> { + if (throwable != null) + result.setFailure(throwable); + else + result.setSuccess(null); + }); + }, 5L, TimeUnit.MINUTES); + + return result; + } + + private boolean intersects(ColumnFamilyStore cfs, Memtable memtable, List> tableRanges) + { + boolean intersects = false; + // TrieMemtable doesn't support reverse iteration so can't find the last token + if (memtable instanceof TrieMemtable) + intersects = true; + else + { + Token firstToken = null; + try (UnfilteredPartitionIterator iterator = memtable.partitionIterator(ColumnFilter.all(cfs.metadata()), DataRange.allData(cfs.getPartitioner()), SSTableReadsListener.NOOP_LISTENER)) + { + if (iterator.hasNext()) + firstToken = iterator.next().partitionKey().getToken(); + } + Token lastToken = memtable.lastToken(); + + if (firstToken != null) + { + checkState(lastToken != null); + if (firstToken.equals(lastToken)) + { + for (org.apache.cassandra.dht.Range tableRange : tableRanges) + { + if (tableRange.contains(firstToken)) + { + intersects = true; + break; + } + } + } + else + { + checkState(firstToken.compareTo(lastToken) < 0); + org.apache.cassandra.dht.Range memtableRange = new org.apache.cassandra.dht.Range<>(firstToken, lastToken); + for (org.apache.cassandra.dht.Range tableRange : tableRanges) + { + if (tableRange.intersects(memtableRange)) + { + intersects = true; + break; + } + } + } + } + } + + return intersects; + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java index e4ad2aa5591e..61cb7a4e2810 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java @@ -292,7 +292,7 @@ public AsyncChain read(Seekable key, SafeCommandStore commandStore, Timest public Read slice(Ranges ranges) { return new StreamingRead(to, this.ranges.slice(ranges)); } @Override - public Read intersecting(Participants participants) { return new StreamingRead(to, this.ranges.intersecting(ranges)); } + public Read intersecting(Participants participants) { return new StreamingRead(to, this.ranges.slice(ranges)); } @Override public Read merge(Read other) { throw new UnsupportedOperationException(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 80ff9739cee0..f497b591b260 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -20,31 +20,33 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.List; +import java.util.NavigableMap; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiConsumer; +import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.coordinate.Timeout; import accord.local.Command; +import accord.local.CommandStores; +import accord.local.CommandStores.RangesForEpoch; +import accord.local.DurableBefore; import accord.local.Node; -import accord.messages.ReplyContext; -import accord.messages.Request; +import accord.local.RedundantBefore; +import accord.primitives.SaveStatus; +import accord.primitives.Deps; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; -import org.agrona.collections.Long2ObjectHashMap; -import org.agrona.collections.LongArrayList; -import org.apache.cassandra.concurrent.InfiniteLoopExecutor; -import org.apache.cassandra.concurrent.Interruptible; -import org.apache.cassandra.concurrent.ManyToOneConcurrentLinkedQueue; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; @@ -52,19 +54,20 @@ import org.apache.cassandra.journal.Params; import org.apache.cassandra.journal.RecordPointer; import org.apache.cassandra.journal.ValueSerializer; -import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.net.ResponseContext; -import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.utils.concurrent.Condition; -import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; -import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; +import static accord.primitives.Status.Truncated; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; public class AccordJournal implements IJournal, Shutdownable { + + private final AtomicBoolean isReplay = new AtomicBoolean(false); + static { // make noise early if we forget to update our version mappings @@ -75,13 +78,10 @@ public class AccordJournal implements IJournal, Shutdownable private static final Set SENTINEL_HOSTS = Collections.singleton(0); - static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[22]); + static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[23]); private final Journal journal; private final AccordJournalTable journalTable; - private final AccordEndpointMapper endpointMapper; - - private final DelayedRequestProcessor delayedRequestProcessor = new DelayedRequestProcessor(); Node node; @@ -89,30 +89,26 @@ enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } private volatile Status status = Status.INITIALIZED; @VisibleForTesting - public AccordJournal(AccordEndpointMapper endpointMapper, Params params) + public AccordJournal(Params params) { File directory = new File(DatabaseDescriptor.getAccordJournalDirectory()); this.journal = new Journal<>("AccordJournal", directory, params, JournalKey.SUPPORT, // In Accord, we are using streaming serialization, i.e. Reader/Writer interfaces instead of materializing objects new ValueSerializer<>() { - public int serializedSize(JournalKey key, Object value, int userVersion) - { - throw new UnsupportedOperationException(); - } - + @Override public void serialize(JournalKey key, Object value, DataOutputPlus out, int userVersion) { throw new UnsupportedOperationException(); } + @Override public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) { throw new UnsupportedOperationException(); } }, - new AccordSegmentCompactor<>()); - this.endpointMapper = endpointMapper; + new AccordSegmentCompactor<>(JournalKey.SUPPORT, params.userVersion())); this.journalTable = new AccordJournalTable<>(journal, JournalKey.SUPPORT, params.userVersion()); } @@ -122,7 +118,6 @@ public AccordJournal start(Node node) this.node = node; status = Status.STARTING; journal.start(); - delayedRequestProcessor.start(); status = Status.STARTED; return this; } @@ -138,7 +133,6 @@ public void shutdown() { Invariants.checkState(status == Status.STARTED); status = Status.TERMINATING; - delayedRequestProcessor.shutdown(); journal.shutdown(); status = Status.TERMINATED; } @@ -164,310 +158,223 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted } } - /** - * Accord protocol messages originating from remote nodes. - */ - public void processRemoteRequest(Request request, ResponseContext context) - { - RemoteRequestContext requestContext = RemoteRequestContext.forLive(request, context); - if (node.topology().hasEpoch(request.waitForEpoch())) - requestContext.process(node, endpointMapper); - else - delayedRequestProcessor.delay(requestContext); - } - @Override public Command loadCommand(int commandStoreId, TxnId txnId) { - try - { - return loadDiffs(commandStoreId, txnId).construct(); - } - catch (IOException e) - { - // can only throw if serializer is buggy - throw new RuntimeException(e); - } + return loadDiffs(commandStoreId, txnId).construct(); } @VisibleForTesting - public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) + public RedundantBefore loadRedundantBefore(int store) { - JournalKey key = new JournalKey(txnId, commandStoreId); - SavedCommand.Builder builder = new SavedCommand.Builder(); - journalTable.readAll(key, (ignore, in, userVersion) -> builder.deserializeNext(in, userVersion)); - return builder; + RedundantBeforeAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.REDUNDANT_BEFORE, store)); + return accumulator.get(); } @Override - public void appendCommand(int commandStoreId, List> outcomes, List sanityCheck, Runnable onFlush) + public DurableBefore loadDurableBefore(int store) { - RecordPointer pointer = null; - for (SavedCommand.Writer outcome : outcomes) - { - JournalKey key = new JournalKey(outcome.key(), commandStoreId); - pointer = journal.asyncWrite(key, outcome, SENTINEL_HOSTS); - } - - // If we need to perform sanity check, we can only rely on blocking flushes. Otherwise, we may see into the future. - if (sanityCheck != null) - { - Condition condition = Condition.newOneTimeCondition(); - journal.onFlush(pointer, condition::signal); - condition.awaitUninterruptibly(); - - for (Command check : sanityCheck) - sanityCheck(commandStoreId, check); - - onFlush.run(); - } - else - { - journal.onFlush(pointer, onFlush); - } + DurableBeforeAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.DURABLE_BEFORE, store)); + return accumulator.get(); } - @VisibleForTesting - public void closeCurrentSegmentForTesting() + @Override + public NavigableMap loadBootstrapBeganAt(int store) { - journal.closeCurrentSegmentForTesting(); + IdentityAccumulator> accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store)); + return accumulator.get(); } - public void sanityCheck(int commandStoreId, Command orig) + @Override + public NavigableMap loadSafeToRead(int store) { - try - { - SavedCommand.Builder diffs = loadDiffs(commandStoreId, orig.txnId()); - diffs.forceResult(orig.result()); - // We can only use strict equality if we supply result. - Command reconstructed = diffs.construct(); - Invariants.checkState(orig.equals(reconstructed), - '\n' + - "Original: %s\n" + - "Reconstructed: %s\n" + - "Diffs: %s", orig, reconstructed, diffs); - } - catch (IOException e) - { - // can only throw if serializer is buggy - throw new RuntimeException(e); - } + IdentityAccumulator> accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.SAFE_TO_READ, store)); + return accumulator.get(); } - /* - * Context necessary to process log records - */ - - static abstract class RequestContext implements ReplyContext + @Override + public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) { - final long waitForEpoch; - - RequestContext(long waitForEpoch) - { - this.waitForEpoch = waitForEpoch; - } - - public abstract void process(Node node, AccordEndpointMapper endpointMapper); + IdentityAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store)); + return accumulator.get(); } - /** - * Barebones response context not holding a reference to the entire message - */ - private abstract static class RemoteRequestContext extends RequestContext implements ResponseContext + @Override + public List loadHistoricalTransactions(int store) { - private final Request request; - - RemoteRequestContext(long waitForEpoch, Request request) - { - super(waitForEpoch); - this.request = request; - } + HistoricalTransactionsAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store)); + return accumulator.get(); + } - static LiveRemoteRequestContext forLive(Request request, ResponseContext context) + @Override + public void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush) + { + if (value == null || isReplay.get()) { - return new LiveRemoteRequestContext(request, context.id(), context.from(), context.verb(), context.expiresAtNanos()); + if (onFlush != null) + onFlush.run(); + return; } - @Override - public void process(Node node, AccordEndpointMapper endpointMapper) - { - this.request.process(node, endpointMapper.mappedId(from()), this); - } + // TODO: use same API for commands as for the other states? + JournalKey key = new JournalKey(value.key(), JournalKey.Type.COMMAND_DIFF, store); + RecordPointer pointer = journal.asyncWrite(key, value, SENTINEL_HOSTS); + if (onFlush != null) + journal.onFlush(pointer, onFlush); + } - @Override public abstract long id(); - @Override public abstract InetAddressAndPort from(); - @Override public abstract Verb verb(); - @Override public abstract long expiresAtNanos(); + @Override + public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fieldUpdates, Runnable onFlush) + { + RecordPointer pointer = null; + // TODO: avoid allocating keys + if (fieldUpdates.redundantBefore != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.REDUNDANT_BEFORE, store), fieldUpdates.redundantBefore); + if (fieldUpdates.durableBefore != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.DURABLE_BEFORE, store), fieldUpdates.durableBefore); + if (fieldUpdates.bootstrapBeganAt != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store), fieldUpdates.bootstrapBeganAt); + if (fieldUpdates.safeToRead != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.SAFE_TO_READ, store), fieldUpdates.safeToRead); + if (fieldUpdates.rangesForEpoch != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store), fieldUpdates.rangesForEpoch); + if (fieldUpdates.historicalTransactions != null) + pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store), fieldUpdates.historicalTransactions); + + if (onFlush == null) + return; + + if (pointer != null) + journal.onFlush(pointer, onFlush); + else + onFlush.run(); } - // TODO: avoid distinguishing between live and non live - private static class LiveRemoteRequestContext extends RemoteRequestContext + @VisibleForTesting + public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) { - private final long id; - private final InetAddressAndPort from; - private final Verb verb; - private final long expiresAtNanos; + JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, commandStoreId); + SavedCommand.Builder builder = new SavedCommand.Builder(); + journalTable.readAll(key, builder::deserializeNext); + return builder; + } - LiveRemoteRequestContext(Request request, long id, InetAddressAndPort from, Verb verb, long expiresAtNanos) - { - super(request.waitForEpoch(), request); - this.id = id; - this.from = from; - this.verb = verb; - this.expiresAtNanos = expiresAtNanos; - } + private BUILDER readAll(JournalKey key) + { + BUILDER builder = (BUILDER) key.type.serializer.mergerFor(key); + // TODO: this can be further improved to avoid allocating lambdas + AccordJournalValueSerializers.FlyweightSerializer serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; + journalTable.readAll(key, (in, userVersion) -> serializer.deserialize(key, builder, in, userVersion)); + return builder; + } + private RecordPointer appendInternal(JournalKey key, Object write) + { + AccordJournalValueSerializers.FlyweightSerializer serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; + return journal.asyncWrite(key, (out, userVersion) -> serializer.serialize(key, write, out, userVersion), SENTINEL_HOSTS); + } - @Override - public long id() - { - return id; - } - @Override - public InetAddressAndPort from() - { - return from; - } - @Override - public Verb verb() - { - return verb; - } - @Override - public long expiresAtNanos() - { - return expiresAtNanos; - } + @VisibleForTesting + public void closeCurrentSegmentForTesting() + { + journal.closeCurrentSegmentForTesting(); } - /* - * Handling topology changes / epoch shift - */ + public void sanityCheck(int commandStoreId, Command orig) + { + SavedCommand.Builder diffs = loadDiffs(commandStoreId, orig.txnId()); + diffs.forceResult(orig.result()); + // We can only use strict equality if we supply result. + Command reconstructed = diffs.construct(); + Invariants.checkState(orig.equals(reconstructed), + '\n' + + "Original: %s\n" + + "Reconstructed: %s\n" + + "Diffs: %s", orig, reconstructed, diffs); + } - private class DelayedRequestProcessor implements Interruptible.Task + @VisibleForTesting + public void truncateForTesting() { - private final ManyToOneConcurrentLinkedQueue delayedRequests = new ManyToOneConcurrentLinkedQueue<>(); - private final LongArrayList waitForEpochs = new LongArrayList(); - private final Long2ObjectHashMap> byEpoch = new Long2ObjectHashMap<>(); - private final AtomicReference signal = new AtomicReference<>(Condition.newOneTimeCondition()); - private volatile Interruptible executor; + journal.truncateForTesting(); + } - public void start() - { - executor = executorFactory().infiniteLoop("AccordJournal-delayed-request-processor", this, SAFE, InfiniteLoopExecutor.Daemon.NON_DAEMON, InfiniteLoopExecutor.Interrupts.SYNCHRONIZED); - } + @VisibleForTesting + public void runCompactorForTesting() + { + journal.runCompactorForTesting(); + } - private void delay(RequestContext requestContext) + public void replay() + { + // TODO: optimize replay memory footprint + class ToApply { - delayedRequests.add(requestContext); - runOnce(); - } + final JournalKey key; + final Command command; - private void runOnce() - { - signal.get().signal(); + ToApply(JournalKey key, Command command) + { + this.key = key; + this.command = command; + } } - @Override - public void run(Interruptible.State state) + List toApply = new ArrayList<>(); + try (AccordJournalTable.KeyOrderIterator iter = journalTable.readAll()) { - if (state != NORMAL || Thread.currentThread().isInterrupted() || !isRunnable(status)) - return; + isReplay.set(true); - try + JournalKey key = null; + SavedCommand.Builder builder = new SavedCommand.Builder(); + while ((key = iter.key()) != null) { - Condition signal = Condition.newOneTimeCondition(); - this.signal.set(signal); - // First, poll delayed requests, put them into by epoch - while (!delayedRequests.isEmpty()) + builder.clear(); + if (key.type != JournalKey.Type.COMMAND_DIFF) { - RequestContext context = delayedRequests.poll(); - long waitForEpoch = context.waitForEpoch; - - List l = byEpoch.computeIfAbsent(waitForEpoch, (ignore) -> new ArrayList<>()); - if (l.isEmpty()) - waitForEpochs.pushLong(waitForEpoch); - l.add(context); - BiConsumer withEpochCallback = new BiConsumer<>() - { - @Override - public void accept(Void unused, Throwable withEpochFailure) - { - if (withEpochFailure != null) - { - // Nothing to do but keep waiting - if (withEpochFailure instanceof Timeout) - { - node.withEpoch(waitForEpoch, this); - return; - } - else - throw new RuntimeException(withEpochFailure); - } - runOnce(); - } - }; - node.withEpoch(waitForEpoch, withEpochCallback); + // TODO (required): add "skip" for the key to avoid getting stuck + iter.readAllForKey(key, (segment, position, key1, buffer, hosts, userVersion) -> {}); + continue; } - // Next, process all delayed epochs - for (int i = 0; i < waitForEpochs.size(); i++) - { - long epoch = waitForEpochs.getLong(i); - if (node.topology().hasEpoch(epoch)) + JournalKey finalKey = key; + iter.readAllForKey(key, (segment, position, local, buffer, hosts, userVersion) -> { + Invariants.checkState(finalKey.equals(local)); + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) { - List requests = byEpoch.remove(epoch); - assert requests != null : String.format("%s %s (%d)", byEpoch, waitForEpochs, epoch); - for (RequestContext request : requests) - { - try - { - request.process(node, endpointMapper); - } - catch (Throwable t) - { - logger.error("Caught an exception while processing a delayed request {}", request, t); - } - } + builder.deserializeNext(in, userVersion); } - } - - waitForEpochs.removeIfLong(epoch -> !byEpoch.containsKey(epoch)); + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + }); - signal.await(); - } - catch (InterruptedException e) - { - logger.info("Delayed request processor thread interrupted. Shutting down."); + if (builder.nextCalled) + { + Command command = builder.construct(); + AccordCommandStore commandStore = (AccordCommandStore) node.commandStores().forId(key.commandStoreId); + commandStore.loader().load(command).get(); + if (command.saveStatus().compareTo(SaveStatus.Stable) >= 0 && !command.hasBeen(Truncated)) + toApply.add(new ToApply(key, command)); + } } - catch (Throwable t) + + toApply.sort(Comparator.comparing(v -> v.command.executeAt())); + for (ToApply apply : toApply) { - logger.error("Caught an exception in delayed processor", t); + AccordCommandStore commandStore = (AccordCommandStore) node.commandStores().forId(apply.key.commandStoreId); + commandStore.loader().apply(apply.command); } } - - private void shutdown() + catch (Throwable t) { - executor.shutdown(); - try - { - executor.awaitTermination(1, TimeUnit.MINUTES); - } - catch (InterruptedException e) - { - throw new RuntimeException(e); - } + throw new RuntimeException("Can not replay journal.", t); + } + finally + { + isReplay.set(false); } - } - - private boolean isRunnable(Status status) - { - return status != Status.TERMINATING && status != Status.TERMINATED; - } - - @VisibleForTesting - public void truncateForTesting() - { - journal.truncateForTesting(); } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java index 642f49e417ce..ef3ac9eff3a9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.service.accord; +import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -29,6 +30,7 @@ import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ColumnFamilyStore.RefViewFragment; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.EmptyIterators; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.StorageHook; @@ -37,10 +39,13 @@ import org.apache.cassandra.db.lifecycle.View; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.Unfiltered; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.db.rows.UnfilteredRowIterators; +import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; @@ -76,16 +81,16 @@ public AccordJournalTable(Journal journal, KeySupport keySupport, int a this.accordJournalVersion = accordJournalVersion; } - public interface Reader + public interface Reader { - void read(K key, DataInputPlus input, int userVersion) throws IOException; + void read(DataInputPlus input, int userVersion) throws IOException; } private abstract class AbstractRecordConsumer implements RecordConsumer { - protected final Reader reader; + protected final Reader reader; - AbstractRecordConsumer(Reader reader) + AbstractRecordConsumer(Reader reader) { this.reader = reader; } @@ -93,15 +98,7 @@ private abstract class AbstractRecordConsumer implements RecordConsumer @Override public void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion) { - try (DataInputBuffer in = new DataInputBuffer(buffer, false)) - { - reader.read(key, in, userVersion); - } - catch (IOException e) - { - // can only throw if serializer is buggy - throw new RuntimeException(e); - } + readBuffer(buffer, reader, userVersion); } } @@ -109,7 +106,7 @@ private class TableRecordConsumer extends AbstractRecordConsumer { protected LongHashSet visited = null; - TableRecordConsumer(Reader reader) + TableRecordConsumer(Reader reader) { super(reader); } @@ -139,7 +136,7 @@ private class JournalAndTableRecordConsumer extends AbstractRecordConsumer private final K key; private final TableRecordConsumer tableRecordConsumer; - JournalAndTableRecordConsumer(K key, Reader reader) + JournalAndTableRecordConsumer(K key, Reader reader) { super(reader); this.key = key; @@ -165,7 +162,7 @@ public void accept(long segment, int position, K key, ByteBuffer buffer, IntHash *

      A strategy for making retry timing decisions for operations. + * The strategy is defined by four factors:

      + * + *

      The first three represent time periods, and may be defined dynamically based on a simple calculation over:

        + *
      • {@code pX()} recent experienced latency distribution for successful operations, + * e.g. {@code p50(rw)} the maximum of read and write median latencies, + * {@code p999(r)} the 99.9th percentile of read latencies + *
      • {@code attempts} the number of failed attempts made by the operation so far + *
      • {@code constant} a user provided floating point constant + *
      + * + *

      Their calculation may take any of these forms + *

    1. constant {@code $constant$[mu]s} + *
    2. dynamic constant {@code pX() * constant} + *
    3. dynamic linear {@code pX() * constant * attempts} + *
    4. dynamic exponential {@code pX() * constant ^ attempts} + * + *

      Furthermore, the dynamic calculations can be bounded with a min/max, like so: + * {@code min[mu]s <= dynamic expr <= max[mu]s} + * + * e.g. + *

    5. {@code 10ms <= p50(rw)*0.66} + *
    6. {@code 10ms <= p95(rw)*1.8^attempts <= 100ms} + *
    7. {@code 5ms <= p50(rw)*0.5} + * + *

      These calculations are put together to construct a range from which we draw a random number. + * The period we wait for {@code X} will be drawn so that {@code min <= X < max}. + * + *

      With the constraint that {@code max} must be {@code spread} greater than {@code min}, + * but no greater than its expression-defined maximum. {@code max} will be increased up until + * this point, after which {@code min} will be decreased until this gap is imposed. + * + *

      The {@link #waitRandomizer} property specifies the manner in which a random value is drawn from the range. + * It is defined using one of the following specifiers: + *

    8. uniform + *
    9. exp($power$) or exponential($power$) + *
    10. qexp($power$) or qexponential($power$) or quantizedexponential($power$) + * + * The uniform specifier is self-explanatory, selecting all values in the range with equal probability. + * The exponential specifier draws values towards the end of the range with higher probability, raising + * a floating point number in the range [0..1.0) to the power provided, and translating the resulting value + * to a uniform value in the range. + * The quantized exponential specifier partitions the range into {@code attempts} buckets, then applies the pure + * exponential approach to draw values from [0..attempts), before drawing a uniform value from the corresponding bucket + */ +public class RetryStrategy +{ + private static final Pattern RANDOMIZER = Pattern.compile( + "uniform|exp(onential)?[(](?[0-9.]+)[)]|q(uantized)?exp(onential)?[(](?[0-9.]+)[)]"); + + final static WaitRandomizerFactory randomizers = new WaitRandomizerFactory(){}; + + protected interface WaitRandomizer + { + long wait(long min, long max, int attempts); + } + + interface WaitRandomizerFactory + { + default LongBinaryOperator uniformLongSupplier() { return (min, max) -> ThreadLocalRandom.current().nextLong(min, max); } // DO NOT USE METHOD HANDLES (want to fetch afresh each time) + default DoubleSupplier uniformDoubleSupplier() { return () -> ThreadLocalRandom.current().nextDouble(); } + + default WaitRandomizer uniform() { return new Uniform(uniformLongSupplier()); } + default WaitRandomizer exponential(double power) { return new Exponential(uniformLongSupplier(), uniformDoubleSupplier(), power); } + default WaitRandomizer quantizedExponential(double power) { return new QuantizedExponential(uniformLongSupplier(), uniformDoubleSupplier(), power); } + + static class Uniform implements WaitRandomizer + { + final LongBinaryOperator uniformLong; + + public Uniform(LongBinaryOperator uniformLong) + { + this.uniformLong = uniformLong; + } + + @Override + public long wait(long min, long max, int attempts) + { + return uniformLong.applyAsLong(min, max); + } + } + + static abstract class AbstractExponential implements WaitRandomizer + { + final LongBinaryOperator uniformLong; + final DoubleSupplier uniformDouble; + final double power; + + public AbstractExponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) + { + this.uniformLong = uniformLong; + this.uniformDouble = uniformDouble; + this.power = power; + } + } + + static class Exponential extends AbstractExponential + { + public Exponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) + { + super(uniformLong, uniformDouble, power); + } + + @Override + public long wait(long min, long max, int attempts) + { + if (attempts == 1) + return uniformLong.applyAsLong(min, max); + + double p = uniformDouble.getAsDouble(); + long delta = max - min; + delta *= Math.pow(p, power); + return max - delta; + } + } + + static class QuantizedExponential extends AbstractExponential + { + public QuantizedExponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) + { + super(uniformLong, uniformDouble, power); + } + + @Override + public long wait(long min, long max, int attempts) + { + long quanta = (max - min) / attempts; + if (attempts == 1 || quanta == 0) + return uniformLong.applyAsLong(min, max); + + double p = uniformDouble.getAsDouble(); + int base = (int) (attempts * Math.pow(p, power)); + return max - ThreadLocalRandom.current().nextLong(quanta * base, quanta * (base + 1)); + } + } + } + + public final WaitRandomizer waitRandomizer; + public final Wait min, max, spread; + + public RetryStrategy(String waitRandomizer, String min, String max, String spread, LatencySourceFactory latencies) + { + this.waitRandomizer = parseWaitRandomizer(waitRandomizer); + this.min = parseBound(min, true, latencies); + this.max = parseBound(max, false, latencies); + this.spread = parseBound(spread, true, latencies); + } + + protected RetryStrategy(WaitRandomizer waitRandomizer, Wait min, Wait max, Wait spread) + { + this.waitRandomizer = waitRandomizer; + this.min = min; + this.max = max; + this.spread = spread; + } + + protected Wait parseBound(String spec, boolean isMin, LatencySourceFactory latencies) + { + long defaultMaxMicros = DatabaseDescriptor.getRpcTimeout(MICROSECONDS); + return parseWait(spec, 0, defaultMaxMicros, isMin ? 0 : defaultMaxMicros, latencies); + } + + protected long computeWaitUntil(int attempts) + { + long wait = computeWait(attempts); + return nanoTime() + MICROSECONDS.toNanos(wait); + } + + protected long computeWait(int attempts) + { + long minWaitMicros = min.get(attempts); + long maxWaitMicros = max.get(attempts); + long spreadMicros = spread.get(attempts); + + if (minWaitMicros + spreadMicros > maxWaitMicros) + { + maxWaitMicros = minWaitMicros + spreadMicros; + if (maxWaitMicros > this.max.max) + { + maxWaitMicros = this.max.max; + minWaitMicros = max(this.min.min, min(this.min.max, maxWaitMicros - spreadMicros)); + } + } + + return waitRandomizer.wait(minWaitMicros, maxWaitMicros, attempts); + } + + public static class ParsedStrategy + { + public final String waitRandomizer, min, max, spread; + public final RetryStrategy strategy; + + protected ParsedStrategy(String waitRandomizer, String min, String max, String spread, RetryStrategy strategy) + { + this.waitRandomizer = waitRandomizer; + this.min = min; + this.max = max; + this.spread = spread; + this.strategy = strategy; + } + + public String toString() + { + return "min=" + min + ",max=" + max + ",spread=" + spread + ",random=" + waitRandomizer; + } + } + + @VisibleForTesting + public static ParsedStrategy parseStrategy(String spec, LatencySourceFactory latencies, ParsedStrategy defaultStrategy) + { + String[] args = spec.split(","); + String waitRandomizer = find(args, "random"); + String min = find(args, "min"); + String max = find(args, "max"); + String spread = find(args, "spread"); + if (spread == null) + spread = find(args, "delta"); + + if (waitRandomizer == null) waitRandomizer = defaultStrategy.waitRandomizer; + if (min == null) min = defaultStrategy.min; + if (max == null) max = defaultStrategy.max; + if (spread == null) spread = defaultStrategy.spread; + + RetryStrategy strategy = new RetryStrategy(waitRandomizer, min, max, spread, latencies); + return new ParsedStrategy(waitRandomizer, min, max, spread, strategy); + } + + protected static String find(String[] args, String param) + { + return stream(args).filter(s -> s.startsWith(param + '=')) + .map(s -> s.substring(param.length() + 1)) + .findFirst().orElse(null); + } + + static WaitRandomizer parseWaitRandomizer(String input) + { + return parseWaitRandomizer(input, randomizers); + } + + static WaitRandomizer parseWaitRandomizer(String input, WaitRandomizerFactory randomizers) + { + Matcher m = RANDOMIZER.matcher(input); + if (!m.matches()) + throw new IllegalArgumentException(input + " does not match" + RANDOMIZER); + + String exp; + exp = m.group("exp"); + if (exp != null) + return randomizers.exponential(Double.parseDouble(exp)); + exp = m.group("qexp"); + if (exp != null) + return randomizers.quantizedExponential(Double.parseDouble(exp)); + return randomizers.uniform(); + } +} diff --git a/src/java/org/apache/cassandra/service/TimeoutStrategy.java b/src/java/org/apache/cassandra/service/TimeoutStrategy.java new file mode 100644 index 000000000000..8cbc67698f81 --- /dev/null +++ b/src/java/org/apache/cassandra/service/TimeoutStrategy.java @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Snapshot; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.metrics.ClientRequestMetrics; +import org.apache.cassandra.utils.NoSpamLogger; + +import static java.lang.Double.parseDouble; +import static java.lang.Integer.parseInt; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + *

      A strategy for making timeout decisions for operations. This is a simplified single-value version of + * the RetryStrategy + * + *

      This represent a computed time period, that may be defined dynamically based on a simple calculation over:

        + *
      • {@code pX()} recent experienced latency distribution for successful operations, + * e.g. {@code p50(rw)} the maximum of read and write median latencies, + * {@code p999(r)} the 99.9th percentile of read latencies + *
      • {@code attempts} the number of failed attempts made by the operation so far + *
      • {@code constant} a user provided floating point constant + *
      + * + *

      The calculation may take any of these forms + *

    11. constant {@code $constant$[mu]s} + *
    12. dynamic constant {@code pX() * constant} + *
    13. dynamic linear {@code pX() * constant * attempts} + *
    14. dynamic exponential {@code pX() * constant ^ attempts} + * + *

      Furthermore, the dynamic calculations can be bounded with a min/max, like so: + * {@code min[mu]s <= dynamic expr <= max[mu]s} + * + * e.g. + *

    15. {@code 10ms <= p50(rw)*0.66} + *
    16. {@code 10ms <= p95(rw)*1.8^attempts <= 100ms} + *
    17. {@code 5ms <= p50(rw)*0.5} + * + * TODO (expected): permit simple constant addition (e.g. p50+5ms) + * TODO (required): track separate stats per-DC as inputs to these decisions + */ +public class TimeoutStrategy +{ + private static final Logger logger = LoggerFactory.getLogger(TimeoutStrategy.class); + + static final Pattern BOUND = Pattern.compile( + "(?0|[0-9]+[mu]s)" + + "|((?0|[0-9]+[mu]s) *<= *)?" + + "(p(?[0-9]+)(\\((?r|w|rw|wr)\\))?|(?0|[0-9]+[mu]s))" + + "\\s*([*]\\s*(?[0-9.]+)?\\s*(?[*^]\\s*attempts)?)?" + + "( *<= *(?0|[0-9]+[mu]s))?"); + static final Pattern TIME = Pattern.compile( + "0|([0-9]+)ms|([0-9]+)us"); + + // Factories can be useful for testing purposes, to supply custom implementations of selectors and modifiers. + final static LatencySupplierFactory selectors = new LatencySupplierFactory(){}; + final static LatencyModifierFactory modifiers = new LatencyModifierFactory(){}; + + interface LatencyModifierFactory + { + default LatencyModifier identity() { return (l, a) -> l; } + default LatencyModifier multiply(double constant) { return (l, a) -> saturatedCast(l * constant); } + default LatencyModifier multiplyByAttempts(double multiply) { return (l, a) -> saturatedCast(l * multiply * a); } + default LatencyModifier multiplyByAttemptsExp(double base) { return (l, a) -> saturatedCast(l * pow(base, a)); } + } + + interface LatencySupplier + { + long get(); + } + + public interface LatencySource + { + long get(double percentile); + } + + interface LatencySupplierFactory + { + default LatencySupplier constant(long latency) { return () -> latency; } + default LatencySupplier percentile(double percentile, LatencySource latencies) { return () -> latencies.get(percentile); } + } + + public interface LatencySourceFactory + { + LatencySource source(String params); + + static LatencySourceFactory rw(ClientRequestMetrics reads, ClientRequestMetrics writes) + { + return new ReadWriteLatencySourceFactory(reads, writes); + } + + static LatencySourceFactory of(ClientRequestMetrics latencies) + { + LatencySource source = new TimeLimitedLatencySupplier(latencies.latency::getSnapshot, 10, SECONDS); + return ignore -> source; + } + } + + public static class ReadWriteLatencySourceFactory implements LatencySourceFactory + { + final LatencySource reads, writes; + + public ReadWriteLatencySourceFactory(ClientRequestMetrics reads, ClientRequestMetrics writes) + { + this(reads.latency::getSnapshot, writes.latency::getSnapshot); + } + + public ReadWriteLatencySourceFactory(Supplier reads, Supplier writes) + { + this.reads = new TimeLimitedLatencySupplier(reads, 10, SECONDS); + this.writes = new TimeLimitedLatencySupplier(writes, 10, SECONDS); + } + + @Override + public LatencySource source(String rw) + { + if (rw.length() == 2) + return percentile -> Math.max(reads.get(percentile), writes.get(percentile)); + else if ("r".equals(rw)) + return reads; + else + return writes; + } + } + + interface LatencyModifier + { + long modify(long latency, int attempts); + } + + static class SnapshotAndTime + { + final long validUntil; + final Snapshot snapshot; + + SnapshotAndTime(long validUntil, Snapshot snapshot) + { + this.validUntil = validUntil; + this.snapshot = snapshot; + } + } + + static class TimeLimitedLatencySupplier extends AtomicReference implements LatencySource + { + final Supplier snapshotSupplier; + final long validForNanos; + + TimeLimitedLatencySupplier(Supplier snapshotSupplier, long time, TimeUnit units) + { + this.snapshotSupplier = snapshotSupplier; + this.validForNanos = units.toNanos(time); + } + + private Snapshot getSnapshot() + { + long now = nanoTime(); + + SnapshotAndTime cur = get(); + if (cur != null && cur.validUntil > now) + return cur.snapshot; + + Snapshot newSnapshot = snapshotSupplier.get(); + SnapshotAndTime next = new SnapshotAndTime(now + validForNanos, newSnapshot); + if (compareAndSet(cur, next)) + return next.snapshot; + + return accumulateAndGet(next, (a, b) -> a.validUntil > b.validUntil ? a : b).snapshot; + } + + @Override + public long get(double percentile) + { + return (long)getSnapshot().getValue(percentile); + } + } + + public static class Wait + { + final long min, max, onFailure; + final LatencyModifier modifier; + final LatencySupplier supplier; + + Wait(long min, long max, long onFailure, LatencyModifier modifier, LatencySupplier supplier) + { + Preconditions.checkArgument(min<=max, "min (%s) must be less than or equal to max (%s)", min, max); + this.min = min; + this.max = max; + this.onFailure = onFailure; + this.modifier = modifier; + this.supplier = supplier; + } + + long get(int attempts) + { + try + { + long base = supplier.get(); + return max(min, min(max, modifier.modify(base, attempts))); + } + catch (Throwable t) + { + NoSpamLogger.getLogger(logger, 1L, MINUTES).info("", t); + return onFailure; + } + } + + public String toString() + { + return "Bound{" + + "min=" + min + + ", max=" + max + + ", onFailure=" + onFailure + + ", modifier=" + modifier + + ", supplier=" + supplier + + '}'; + } + } + + final Wait wait; + + public TimeoutStrategy(String spec, LatencySourceFactory latencies) + { + this.wait = parseWait(spec, latencies); + } + + public long computeWait(int attempts, TimeUnit units) + { + return units.convert(wait.get(attempts), MICROSECONDS); + } + + public long computeWaitUntil(int attempts) + { + long nanos = computeWait(attempts, NANOSECONDS); + return nanoTime() + nanos; + } + + protected Wait parseWait(String spec, LatencySourceFactory latencies) + { + long defaultMicros = DatabaseDescriptor.getRpcTimeout(MICROSECONDS); + return parseWait(spec, 0, defaultMicros, defaultMicros, latencies); + } + + private static LatencySupplier parseLatencySupplier(Matcher m, LatencySupplierFactory selectors, LatencySourceFactory latenciesFactory) + { + String perc = m.group("perc"); + if (perc == null) + return selectors.constant(parseInMicros(m.group("constbase"))); + + LatencySource latencies = latenciesFactory.source(m.group("rw")); + double percentile = parseDouble("0." + perc); + return selectors.percentile(percentile, latencies); + } + + private static LatencyModifier parseLatencyModifier(Matcher m, LatencyModifierFactory modifiers) + { + String mod = m.group("mod"); + if (mod == null) + return modifiers.identity(); + + double modifier = parseDouble(mod); + + String modkind = m.group("modkind"); + if (modkind == null) + return modifiers.multiply(modifier); + + if (modkind.startsWith("*")) + return modifiers.multiplyByAttempts(modifier); + else if (modkind.startsWith("^")) + return modifiers.multiplyByAttemptsExp(modifier); + else + throw new IllegalArgumentException("Unrecognised attempt modifier: " + modkind); + } + + static long saturatedCast(double v) + { + if (v > Long.MAX_VALUE) + return Long.MAX_VALUE; + return (long) v; + } + + public static Wait parseWait(String input, long defaultMin, long defaultMax, long onFailure, LatencySourceFactory latencies) + { + return parseWait(input, defaultMin, defaultMax, onFailure, latencies, selectors, modifiers); + } + + @VisibleForTesting + public static Wait parseWait(String input, long defaultMinMicros, long defaultMaxMicros, long onFailure, LatencySourceFactory latencies, LatencySupplierFactory selectors, LatencyModifierFactory modifiers) + { + Matcher m = BOUND.matcher(input); + if (!m.matches()) + throw new IllegalArgumentException(input + " does not match " + BOUND); + + String maybeConst = m.group("const"); + if (maybeConst != null) + { + long v = parseInMicros(maybeConst); + return new Wait(v, v, v, modifiers.identity(), selectors.constant(v)); + } + + long min = parseInMicros(m.group("min"), defaultMinMicros); + long max = parseInMicros(m.group("max"), defaultMaxMicros); + return new Wait(min, max, onFailure, parseLatencyModifier(m, modifiers), parseLatencySupplier(m, selectors, latencies)); + } + + private static long parseInMicros(String input, long orElse) + { + if (input == null) + return orElse; + + return parseInMicros(input); + } + + private static long parseInMicros(String input) + { + Matcher m = TIME.matcher(input); + if (!m.matches()) + throw new IllegalArgumentException(input + " does not match " + TIME); + + String text; + if (null != (text = m.group(1))) + return parseInt(text) * 1000; + else if (null != (text = m.group(2))) + return parseInt(text); + else + return 0; + } + + private static String orElse(Supplier get, String orElse) + { + String result = get.get(); + return result != null ? result : orElse; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 455154b4ad5a..bba5a70826f9 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -80,11 +80,8 @@ import org.apache.cassandra.utils.concurrent.Promise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; -import static accord.primitives.SaveStatus.Applying; import static accord.primitives.Status.Committed; import static accord.primitives.Status.Invalidated; -import static accord.primitives.Status.PreApplied; -import static accord.primitives.Status.Stable; import static accord.primitives.Status.Truncated; import static accord.utils.Invariants.checkState; @@ -310,7 +307,6 @@ public void persistFieldUpdates(AccordSafeCommandStore.FieldUpdates fieldUpdates journal.persistStoreState(id, fieldUpdates, onFlush); } - @Nullable @VisibleForTesting public void appendToLog(Command before, Command after, Runnable onFlush) @@ -626,10 +622,7 @@ public Promise apply(Command command) safeStore -> { SafeCommand safeCommand = safeStore.unsafeGet(txnId); Command local = safeCommand.current(); - if (local.is(Stable) || local.is(PreApplied)) - Commands.maybeExecute(safeStore, safeCommand, local, true, true); - else if (local.saveStatus().compareTo(Applying) >= 0 && !local.hasBeen(Truncated)) - Commands.applyWrites(safeStore, context, local).begin(agent); + Commands.maybeExecute(safeStore, safeCommand, local, true, true); }) .begin((unused, throwable) -> { if (throwable != null) diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 38bd9f9101d2..26b868e5d132 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.NavigableMap; import java.util.Set; @@ -32,6 +31,7 @@ import org.slf4j.LoggerFactory; import accord.impl.ErasedSafeCommand; +import accord.impl.TimestampsForKey; import accord.local.Cleanup; import accord.local.Command; import accord.local.CommandStores; @@ -113,7 +113,7 @@ public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) throw new UnsupportedOperationException(); } }, - new AccordSegmentCompactor<>(JournalKey.SUPPORT, params.userVersion())); + new AccordSegmentCompactor<>(params.userVersion())); this.journalTable = new AccordJournalTable<>(journal, JournalKey.SUPPORT, params.userVersion()); this.params = params; } @@ -151,7 +151,7 @@ public boolean isTerminated() @Override public void shutdown() { - Invariants.checkState(status == Status.REPLAY || status == Status.STARTED); + Invariants.checkState(status == Status.REPLAY || status == Status.STARTED, "%s", status); status = Status.TERMINATING; journal.shutdown(); status = Status.TERMINATED; @@ -357,23 +357,10 @@ public void runCompactorForTesting() public void replay() { logger.info("Starting journal replay."); + TimestampsForKey.unsafeSetReplay(true); CommandsForKey.disableLinearizabilityViolationsReporting(); AccordKeyspace.truncateAllCaches(); - // TODO (expected): optimize replay memory footprint - class ToApply - { - final JournalKey key; - final Command command; - - ToApply(JournalKey key, Command command) - { - this.key = key; - this.command = command; - } - } - - List toApply = new ArrayList<>(); try (AccordJournalTable.KeyOrderIterator iter = journalTable.readAll()) { JournalKey key; @@ -406,23 +393,17 @@ class ToApply { Command command = builder.construct(); AccordCommandStore commandStore = (AccordCommandStore) node.commandStores().forId(key.commandStoreId); - commandStore.loader().load(command).get(); + AccordCommandStore.Loader loader = commandStore.loader(); + loader.load(command).get(); if (command.saveStatus().compareTo(SaveStatus.Stable) >= 0 && !command.hasBeen(Truncated)) - toApply.add(new ToApply(key, command)); + loader.apply(command); } } - toApply.sort(Comparator.comparing(v -> v.command.executeAt())); - for (ToApply apply : toApply) - { - AccordCommandStore commandStore = (AccordCommandStore) node.commandStores().forId(apply.key.commandStoreId); - logger.info("Apply {}", apply.command); - commandStore.loader().apply(apply.command); - } - logger.info("Waiting for command stores to quiesce."); ((AccordCommandStores)node.commandStores()).waitForQuiescense(); CommandsForKey.enableLinearizabilityViolationsReporting(); + TimestampsForKey.unsafeSetReplay(false); logger.info("Finished journal replay."); status = Status.STARTED; } @@ -488,4 +469,9 @@ public void checkAllCommands() } } } + + public void unsafeSetStarted() + { + status = Status.STARTED; + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java index 5935a910b5f8..5212b575ea74 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java @@ -49,7 +49,6 @@ import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; -import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.journal.EntrySerializer.EntryHolder; import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.KeySupport; @@ -58,7 +57,7 @@ import static org.apache.cassandra.io.sstable.SSTableReadsListener.NOOP_LISTENER; -public class AccordJournalTable +public class AccordJournalTable { private static final IntHashSet SENTINEL_HOSTS = new IntHashSet(); @@ -170,8 +169,7 @@ public void readAll(K key, Reader reader) private void readAllFromTable(K key, TableRecordConsumer onEntry) { - DecoratedKey pk = makePartitionKey(cfs, key, keySupport, accordJournalVersion); - + DecoratedKey pk = AccordKeyspace.JournalColumns.decorate(key); try (RefViewFragment view = cfs.selectAndReference(View.select(SSTableSet.LIVE, pk))) { if (view.sstables.isEmpty()) @@ -209,20 +207,6 @@ private void readRow(K key, Unfiltered unfiltered, EntryHolder into, RecordCo onEntry.accept(descriptor, position, into.key, into.value, into.hosts, into.userVersion); } - public static DecoratedKey makePartitionKey(ColumnFamilyStore cfs, K key, KeySupport keySupport, int version) - { - try (DataOutputBuffer out = new DataOutputBuffer(keySupport.serializedSize(version))) - { - keySupport.serialize(key, out, version); - return cfs.decorateKey(out.buffer(false)); - } - catch (IOException e) - { - // can only throw if (key) serializer is buggy - throw new RuntimeException("Could not serialize key " + key + ", this shouldn't be possible", e); - } - } - @SuppressWarnings("resource") // Auto-closeable iterator will release related resources public KeyOrderIterator readAll() { @@ -249,7 +233,7 @@ private TableIterator() : UnfilteredPartitionIterators.merge(scanners, UnfilteredPartitionIterators.MergeListener.NOOP); } - public K key() + public JournalKey key() { if (partition == null) { @@ -259,7 +243,7 @@ public K key() return null; } - return keySupport.deserialize(partition.partitionKey().getKey(), 0, accordJournalVersion); + return AccordKeyspace.JournalColumns.getJournalKey(partition.partitionKey()); } protected void readAllForKey(K key, RecordConsumer recordConsumer) @@ -318,7 +302,8 @@ private JournalAndTableKeyIterator() @Override public K key() { - K tableKey = tableIterator.key(); + // TODO (expected): fix generics mismatch here + K tableKey = (K)tableIterator.key(); K journalKey = staticSegmentIterator.key(); if (tableKey == null) return journalKey; @@ -331,7 +316,7 @@ public K key() @Override public void readAllForKey(K key, RecordConsumer reader) { - K tableKey = tableIterator.key(); + K tableKey = (K)tableIterator.key(); K journalKey = staticSegmentIterator.key(); if (journalKey != null && keySupport.compare(journalKey, key) == 0) staticSegmentIterator.readAllForKey(key, (segment, position, key1, buffer, hosts, userVersion) -> { diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 4a17c437941d..7a4760b7f633 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -83,6 +83,7 @@ import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.marshal.ByteArrayAccessor; import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.ByteType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.Int32Type; @@ -230,12 +231,14 @@ static TokenType valueOf(Token token) parse(JOURNAL, "accord journal", "CREATE TABLE %s (" - + "key blob," + + "store_id int," + + "type tinyint," + + "id blob," + "descriptor bigint," + "offset int," + "user_version int," + "record blob," - + "PRIMARY KEY(key, descriptor, offset)" + + "PRIMARY KEY((store_id, type, id), descriptor, offset)" + ") WITH CLUSTERING ORDER BY (descriptor DESC, offset DESC) WITH compression = {'class':'NoopCompressor'};") .partitioner(new LocalPartitioner(BytesType.instance)) .build(); @@ -1350,6 +1353,68 @@ public int hashCode() } } + public static class JournalColumns + { + static final ClusteringComparator keyComparator = Journal.partitionKeyAsClusteringComparator(); + static final CompositeType partitionKeyType = (CompositeType) Journal.partitionKeyType; + public static final ColumnMetadata store_id = getColumn(Journal, "store_id"); + public static final ColumnMetadata type = getColumn(Journal, "type"); + public static final ColumnMetadata id = getColumn(Journal, "id"); + public static final ColumnMetadata record = getColumn(Journal, "record"); + + public static DecoratedKey decorate(JournalKey key) + { + ByteBuffer id = ByteBuffer.allocate(CommandSerializers.txnId.serializedSize()); + CommandSerializers.txnId.serialize(key.id, id); + id.flip(); + ByteBuffer pk = keyComparator.make(key.commandStoreId, (byte)key.type.id, id).serializeAsPartitionKey(); + Invariants.checkState(getTxnId(splitPartitionKey(pk)).equals(key.id)); + return Journal.partitioner.decorateKey(pk); + } + + public static ByteBuffer[] splitPartitionKey(DecoratedKey key) + { + return JournalColumns.partitionKeyType.split(key.getKey()); + } + + public static ByteBuffer[] splitPartitionKey(ByteBuffer key) + { + return JournalColumns.partitionKeyType.split(key); + } + + public static int getStoreId(DecoratedKey pk) + { + return getStoreId(splitPartitionKey(pk)); + } + + public static int getStoreId(ByteBuffer[] partitionKeyComponents) + { + return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); + } + + public static JournalKey.Type getType(ByteBuffer[] partitionKeyComponents) + { + return JournalKey.Type.fromId(ByteType.instance.compose(partitionKeyComponents[type.position()])); + } + + public static TxnId getTxnId(DecoratedKey key) + { + return getTxnId(splitPartitionKey(key)); + } + + public static TxnId getTxnId(ByteBuffer[] partitionKeyComponents) + { + ByteBuffer buffer = partitionKeyComponents[id.position()]; + return CommandSerializers.txnId.deserialize(buffer, buffer.position()); + } + + public static JournalKey getJournalKey(DecoratedKey key) + { + ByteBuffer[] parts = splitPartitionKey(key); + return new JournalKey(getTxnId(parts), getType(parts), getStoreId(parts)); + } + } + private static EpochDiskState saveEpochDiskState(EpochDiskState diskState) { String cql = "INSERT INTO " + ACCORD_KEYSPACE_NAME + '.' + EPOCH_METADATA + ' ' + diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index eda0f75b874e..968068481dca 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -26,12 +26,18 @@ import java.util.Map; import java.util.Set; +import accord.impl.RequestCallbacks; import accord.messages.*; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; + +import org.apache.cassandra.config.AccordSpec; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; +import org.apache.cassandra.service.TimeoutStrategy; +import org.apache.cassandra.service.TimeoutStrategy.LatencySourceFactory; import org.apache.cassandra.utils.Clock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,6 +56,7 @@ import org.apache.cassandra.net.Verb; import static accord.messages.MessageType.Kind.REMOTE; +import static java.util.concurrent.TimeUnit.NANOSECONDS; public class AccordMessageSink implements MessageSink { @@ -201,17 +208,28 @@ private static Verb getVerb(Request request) private final Agent agent; private final MessageDelivery messaging; private final AccordEndpointMapper endpointMapper; + private final RequestCallbacks callbacks; + // TODO (required): make hot property + private TimeoutStrategy slowPreaccept, slowRead; - public AccordMessageSink(Agent agent, MessageDelivery messaging, AccordEndpointMapper endpointMapper) + public AccordMessageSink(Agent agent, MessageDelivery messaging, AccordEndpointMapper endpointMapper, RequestCallbacks callbacks) { + AccordSpec config = DatabaseDescriptor.getAccord(); + if (config != null) + { + // TODO (expected): introduce better metrics, esp. for preaccept, but also to disambiguate DC latencies + slowPreaccept = new TimeoutStrategy(config.slowPreAccept, LatencySourceFactory.of(ClientRequestsMetricsHolder.accordReadMetrics)); + slowRead = new TimeoutStrategy(config.slowRead, LatencySourceFactory.of(ClientRequestsMetricsHolder.accordReadMetrics)); + } this.agent = agent; this.messaging = messaging; this.endpointMapper = endpointMapper; + this.callbacks = callbacks; } - public AccordMessageSink(Agent agent, AccordConfigurationService endpointMapper) + public AccordMessageSink(Agent agent, AccordConfigurationService endpointMapper, RequestCallbacks callbacks) { - this(agent, MessagingService.instance(), endpointMapper); + this(agent, MessagingService.instance(), endpointMapper, callbacks); } @Override @@ -237,24 +255,42 @@ private static boolean isRangeBarrier(Request request) return txnRequest.txnId.domain().isRange(); } + // TODO (expected): permit bulk send to save esp. on callback registration (and combine records) @Override public void send(Node.Id to, Request request, AgentExecutor executor, Callback callback) { Verb verb = getVerb(request); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); - Message message; - if (isRangeBarrier(request)) - { - long nowNanos = Clock.Global.nanoTime(); - message = Message.out(verb, request, nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos()); - } - else + long nowNanos = Clock.Global.nanoTime(); + long expiresAtNanos; + if (isRangeBarrier(request)) expiresAtNanos = nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(); + else expiresAtNanos = nowNanos + verb.expiresAfterNanos(); + long delayedAtNanos = Long.MAX_VALUE; + switch (verb) { - message = Message.out(verb, request); + case ACCORD_COMMIT_REQ: + if (((Commit)request).readData == null) + break; + + case ACCORD_READ_REQ: + if (slowRead == null || isRangeBarrier(request)) + break; + + case ACCORD_CHECK_STATUS_REQ: + delayedAtNanos = nowNanos + slowRead.computeWait(1, NANOSECONDS); + break; + + case ACCORD_PRE_ACCEPT_REQ: + if (slowPreaccept == null || isRangeBarrier(request)) + break; + delayedAtNanos = nowNanos + slowPreaccept.computeWait(1, NANOSECONDS); } + + Message message = Message.out(verb, request, expiresAtNanos); InetAddressAndPort endpoint = endpointMapper.mappedEndpoint(to); logger.trace("Sending {} {} to {}", verb, message.payload, endpoint); - messaging.sendWithCallback(message, endpoint, new AccordCallback<>(executor, (Callback) callback, endpointMapper)); + callbacks.registerAt(message.id(), executor, callback, to, nowNanos, delayedAtNanos, expiresAtNanos, NANOSECONDS); + messaging.send(message, endpoint); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java new file mode 100644 index 000000000000..f5a06f9be816 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service.accord; + +import accord.coordinate.Timeout; +import accord.impl.RequestCallbacks; +import accord.local.Node; +import accord.messages.Reply; +import org.apache.cassandra.exceptions.RequestFailure; +import org.apache.cassandra.exceptions.RequestFailureReason; +import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.tracing.Tracing; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; + +class AccordResponseVerbHandler implements IVerbHandler +{ + private final RequestCallbacks callbacks; + private final AccordEndpointMapper endpointMapper; + + AccordResponseVerbHandler(RequestCallbacks callbacks, AccordEndpointMapper endpointMapper) + { + this.callbacks = callbacks; + this.endpointMapper = endpointMapper; + } + + @Override + public void doVerb(Message message) + { + Node.Id from = endpointMapper.mappedId(message.from()); + if (message.isFailureResponse()) + { + Tracing.trace("Processing failure response from {}", message.from()); + callbacks.onFailure(message.id(), from, convertFailureMessage((RequestFailure) message.payload)); + } + else + { + Tracing.trace("Processing response from {}", message.from()); + boolean remove = !(message.payload instanceof Reply) || ((Reply) message.payload).isFinal(); + RequestCallbacks.CallbackEntry cbe = callbacks.onSuccess(message.id(), from, message.payload, remove); + if (cbe == null) + return; + + long latencyNanos = approxTime.now() - cbe.registeredAt(NANOSECONDS); + MessagingService.instance().latencySubscribers.add(message.from(), latencyNanos, NANOSECONDS); + } + } + + private static Throwable convertFailureMessage(RequestFailure failure) + { + return failure.reason == RequestFailureReason.TIMEOUT ? + new Timeout(null, null) : + new RuntimeException(failure.failure); + } + +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 624fcc378bba..f97f8336b7cb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -35,7 +35,7 @@ import accord.impl.CommandsSummary; import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; -import accord.local.NodeTimeService; +import accord.local.NodeCommandStoreService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.cfk.CommandsForKey; @@ -179,7 +179,7 @@ public ProgressLog progressLog() } @Override - public NodeTimeService time() + public NodeCommandStoreService node() { // TODO: safe command store should not have arbitrary time return commandStore.node(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java index c6fba012a562..db72c9f93271 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -36,7 +36,6 @@ import org.apache.cassandra.io.sstable.SSTableTxnWriter; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.SegmentCompactor; import org.apache.cassandra.journal.StaticSegment; import org.apache.cassandra.journal.StaticSegment.KeyOrderReader; @@ -49,12 +48,10 @@ public class AccordSegmentCompactor implements SegmentCompactor keySupport; - public AccordSegmentCompactor(KeySupport keySupport, int userVersion) + public AccordSegmentCompactor(int userVersion) { this.userVersion = userVersion; - this.keySupport = keySupport; } @Override @@ -148,7 +145,7 @@ private void maybeWritePartition(ColumnFamilyStore cfs, SSTableTxnWriter writer, { if (builder != null) { - SimpleBuilder partitionBuilder = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, AccordJournalTable.makePartitionKey(cfs, key, keySupport, userVersion)); + SimpleBuilder partitionBuilder = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, AccordKeyspace.JournalColumns.decorate(key)); try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) { serializer.reserialize(key, builder, out, userVersion); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 7acabd776c1d..ae45577e14b8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -69,7 +69,7 @@ import accord.impl.CoordinateDurabilityScheduling; import accord.impl.DefaultLocalListeners; import accord.impl.DefaultRemoteListeners; -import accord.impl.DefaultRequestTimeouts; +import accord.impl.RequestCallbacks; import accord.impl.SizeOfIntersectionSorter; import accord.impl.progresslog.DefaultProgressLogs; import accord.local.Command; @@ -80,13 +80,13 @@ import accord.local.KeyHistory; import accord.local.Node; import accord.local.Node.Id; -import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; import accord.local.cfk.CommandsForKey; import accord.messages.Callback; import accord.messages.ReadData; +import accord.messages.Reply; import accord.messages.Request; import accord.messages.WaitUntilApplied; import accord.primitives.FullRoute; @@ -142,6 +142,7 @@ import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.AccordScheduler; +import org.apache.cassandra.service.accord.api.AccordTimeService; import org.apache.cassandra.service.accord.api.AccordTopologySorter; import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.api.PartitionKey; @@ -161,7 +162,6 @@ import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Blocking; -import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.concurrent.AsyncPromise; @@ -200,6 +200,7 @@ private enum State {INIT, STARTED, SHUTDOWN} private final AccordJournal journal; private final CoordinateDurabilityScheduling durabilityScheduling; private final AccordVerbHandler requestHandler; + private final AccordResponseVerbHandler responseHandler; private final LocalConfig configuration; @GuardedBy("this") @@ -208,7 +209,13 @@ private enum State {INIT, STARTED, SHUTDOWN} private static final IAccordService NOOP_SERVICE = new IAccordService() { @Override - public IVerbHandler verbHandler() + public IVerbHandler requestHandler() + { + return null; + } + + @Override + public IVerbHandler responseHandler() { return null; } @@ -346,10 +353,16 @@ public static boolean isSetup() return instance != null; } - public static IVerbHandler verbHandlerOrNoop() + public static IVerbHandler requestHandlerOrNoop() + { + if (!isSetup()) return ignore -> {}; + return instance().requestHandler(); + } + + public static IVerbHandler responseHandlerOrNoop() { if (!isSetup()) return ignore -> {}; - return instance().verbHandler(); + return instance().responseHandler(); } public synchronized static void startup(NodeId tcmId) @@ -395,20 +408,17 @@ public static IAccordService instance() return i; } - public static long now() - { - return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); - } - private AccordService(Id localId) { Invariants.checkState(localId != null, "static localId must be set before instantiating AccordService"); logger.info("Starting accord with nodeId {}", localId); AccordAgent agent = FBUtilities.construct(CassandraRelevantProperties.ACCORD_AGENT_CLASS.getString(AccordAgent.class.getName()), "AccordAgent"); agent.setNodeId(localId); + AccordTimeService time = new AccordTimeService(); + final RequestCallbacks callbacks = new RequestCallbacks(time); this.configService = new AccordConfigurationService(localId); this.fastPathCoordinator = AccordFastPathCoordinator.create(localId, configService); - this.messageSink = new AccordMessageSink(agent, configService); + this.messageSink = new AccordMessageSink(agent, configService, callbacks); this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); this.configuration = new AccordConfiguration(DatabaseDescriptor.getRawConfig()); @@ -416,8 +426,7 @@ private AccordService(Id localId) this.node = new Node(localId, messageSink, configService, - AccordService::now, - NodeTimeService.elapsedWrapperFromMonotonicSource(NANOSECONDS, Clock.Global::nanoTime), + time, () -> dataStore, new KeyspaceSplitter(new EvenSplit<>(DatabaseDescriptor.getAccordShardCount(), getPartitioner().accordSplitter())), agent, @@ -426,7 +435,7 @@ private AccordService(Id localId) CompositeTopologySorter.create(SizeOfIntersectionSorter.SUPPLIER, new AccordTopologySorter.Supplier(configService, DatabaseDescriptor.getNodeProximity())), DefaultRemoteListeners::new, - DefaultRequestTimeouts::new, + ignore -> callbacks, DefaultProgressLogs::new, DefaultLocalListeners.Factory::new, AccordCommandStores.factory(journal), @@ -436,6 +445,7 @@ private AccordService(Id localId) this.nodeShutdown = toShutdownable(node); this.durabilityScheduling = new CoordinateDurabilityScheduling(node); this.requestHandler = new AccordVerbHandler<>(node, configService); + this.responseHandler = new AccordResponseVerbHandler<>(callbacks, configService); } @Override @@ -568,11 +578,17 @@ static Long findMinEpoch(SharedContext context, Map verbHandler() + public IVerbHandler requestHandler() { return requestHandler; } + @Override + public IVerbHandler responseHandler() + { + return responseHandler; + } + private Seekables barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction, AsyncSyncPoint> syncPoint) { Stopwatch sw = Stopwatch.createStarted(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 8439a05f2eae..019b888deda8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -291,7 +291,6 @@ public class Instance> implements CacheSiz private final BiFunction validateFunction; private final ToLongFunction heapEstimator; private long bytesCached; -// private int itemsCached; @VisibleForTesting final CacheAccessMetrics instanceMetrics; @@ -382,7 +381,6 @@ public S acquireIfExists(K key) return safeRefFactory.apply(acquireExisting(node, false)); } - public void maybeLoad(K key, V initial) { AccordCachingState node = (AccordCachingState) cache.get(key); diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 34c7b26bd95d..59fc056b90dd 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -60,7 +60,7 @@ public void doVerb(Message message) throws IOException Node.Id fromNodeId = endpointMapper.mappedId(message.from()); long waitForEpoch = request.waitForEpoch(); - if (node.topology().hasEpoch(waitForEpoch)) + if (node.topology().hasAtLeastEpoch(waitForEpoch)) request.process(node, fromNodeId, message); else node.withEpoch(waitForEpoch, (ignored, withEpochFailure) -> { diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index 6b564b7573da..02e492f3c6fc 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -35,11 +35,11 @@ import accord.local.KeyHistory; import accord.local.RedundantBefore; import accord.primitives.PartialDeps; +import accord.primitives.Routable.Domain; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.Routable; import accord.primitives.Routables; import accord.primitives.Seekables; import accord.primitives.Timestamp; @@ -54,16 +54,34 @@ import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; -public class CommandsForRangesLoader +public class CommandsForRangesLoader implements AccordStateCache.Listener { private final RoutesSearcher searcher = new RoutesSearcher(); //TODO (now, durability): find solution for this... private final NavigableMap historicalTransaction = new TreeMap<>(); private final AccordCommandStore store; + private final ObjectHashSet cachedRangeTxns = new ObjectHashSet<>(); public CommandsForRangesLoader(AccordCommandStore store) { this.store = store; + store.commandCache().register(this); + } + + @Override + public void onAdd(AccordCachingState state) + { + TxnId txnId = state.key(); + if (txnId.is(Domain.Range)) + cachedRangeTxns.add(txnId); + } + + @Override + public void onEvict(AccordCachingState state) + { + TxnId txnId = state.key(); + if (txnId.is(Domain.Range)) + cachedRangeTxns.remove(txnId); } public AsyncResult>> get(@Nullable TxnId primaryTxnId, KeyHistory keyHistory, Ranges ranges) @@ -136,7 +154,7 @@ public NavigableMap get() @Override public void onAdd(AccordCachingState n) { - if (n.key().domain() != Routable.Domain.Range) + if (n.key().domain() != Domain.Range) return; if (n.key().compareTo(minTxnId) < 0 || n.key().compareTo(maxTxnId) >= 0) @@ -197,7 +215,8 @@ public void close() private Watcher fromCache(@Nullable TxnId findAsDep, Ranges ranges, TxnId minTxnId, Timestamp maxTxnId, RedundantBefore redundantBefore) { Watcher watcher = new Watcher(ranges, findAsDep, minTxnId, maxTxnId, redundantBefore); - store.commandCache().stream().forEach(watcher::onAdd); + for (TxnId rangeTxnId : cachedRangeTxns) + watcher.onAdd(store.commandCache().getUnsafe(rangeTxnId)); store.commandCache().register(watcher); return watcher; } @@ -234,11 +253,12 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f return null; Seekables> keysOrRanges = cmd.partialTxn().keys(); - if (keysOrRanges.domain() != Routable.Domain.Range) + if (keysOrRanges.domain() != Domain.Range) throw new AssertionError(String.format("Txn keys are not range for %s", cmd.partialTxn())); Ranges ranges = (Ranges) keysOrRanges; - if (!ranges.intersects(cacheRanges)) + ranges = ranges.slice(cacheRanges, Routables.Slice.Minimal); + if (ranges.isEmpty()) return null; if (redundantBefore != null) diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index e5e2d125f1cc..bba67a91607d 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -34,6 +34,7 @@ import accord.local.Node; import accord.local.Node.Id; import accord.local.RedundantBefore; +import accord.messages.Reply; import accord.messages.Request; import accord.primitives.Ranges; import accord.primitives.Seekables; @@ -68,7 +69,8 @@ public interface IAccordService Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.LOCAL_ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); Set SUPPORTED_READ_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL); - IVerbHandler verbHandler(); + IVerbHandler requestHandler(); + IVerbHandler responseHandler(); Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index b8b09ef236b8..99e068ca26f4 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -71,13 +71,13 @@ public JournalKey(TxnId id, Type type, int commandStoreId) public static final class JournalKeySupport implements KeySupport { - private static final int MSB_OFFSET = 0; + private static final int CS_ID_OFFSET = 0; + private static final int TYPE_OFFSET = INT_SIZE; + private static final int MSB_OFFSET = TYPE_OFFSET + BYTE_SIZE; private static final int LSB_OFFSET = MSB_OFFSET + LONG_SIZE; private static final int NODE_OFFSET = LSB_OFFSET + LONG_SIZE; - private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; - private static final int CS_ID_OFFSET = TYPE_OFFSET + BYTE_SIZE; // TODO (required): revisit commandStoreId - this can go arbitrarily high so may want to use vint - public static final int TOTAL_SIZE = CS_ID_OFFSET + INT_SIZE; + public static final int TOTAL_SIZE = NODE_OFFSET + INT_SIZE; @Override public int serializedSize(int userVersion) @@ -88,33 +88,33 @@ public int serializedSize(int userVersion) @Override public void serialize(JournalKey key, DataOutputPlus out, int userVersion) throws IOException { - serializeTxnId(key.id, out); - out.writeByte(key.type.id); out.writeInt(key.commandStoreId); + out.writeByte(key.type.id); + serializeTxnId(key.id, out); } private void serialize(JournalKey key, byte[] out) { - serializeTxnId(key.id, out); - out[TYPE_OFFSET] = (byte) (key.type.id & 0xFF); ByteArrayUtil.putInt(out, CS_ID_OFFSET, key.commandStoreId); + out[TYPE_OFFSET] = (byte) (key.type.id & 0xFF); + serializeTxnId(key.id, out); } @Override public JournalKey deserialize(DataInputPlus in, int userVersion) throws IOException { - TxnId txnId = deserializeTxnId(in); - int type = in.readByte(); int commandStoreId = in.readInt(); + int type = in.readByte(); + TxnId txnId = deserializeTxnId(in); return new JournalKey(txnId, Type.fromId(type), commandStoreId); } @Override public JournalKey deserialize(ByteBuffer buffer, int position, int userVersion) { - TxnId txnId = deserializeTxnId(buffer, position); - int type = buffer.get(position + TYPE_OFFSET); int commandStoreId = buffer.getInt(position + CS_ID_OFFSET); + int type = buffer.get(position + TYPE_OFFSET); + TxnId txnId = deserializeTxnId(buffer, position); return new JournalKey(txnId, Type.fromId(type), commandStoreId); } @@ -159,15 +159,15 @@ public void updateChecksum(Checksum crc, JournalKey key, int userVersion) @Override public int compareWithKeyAt(JournalKey k, ByteBuffer buffer, int position, int userVersion) { - int cmp = compareWithTxnIdAt(k.id, buffer, position); + int commandStoreId = buffer.getInt(position + CS_ID_OFFSET); + int cmp = Integer.compare(k.commandStoreId, commandStoreId); if (cmp != 0) return cmp; byte type = buffer.get(position + TYPE_OFFSET); cmp = Byte.compare((byte) k.type.id, type); if (cmp != 0) return cmp; - int commandStoreId = buffer.getInt(position + CS_ID_OFFSET); - cmp = Integer.compare(k.commandStoreId, commandStoreId); + cmp = compareWithTxnIdAt(k.id, buffer, position); return cmp; } @@ -189,9 +189,9 @@ private int compareWithTxnIdAt(TxnId txnId, ByteBuffer buffer, int position) @Override public int compare(JournalKey k1, JournalKey k2) { - int cmp = k1.id.compareTo(k2.id); + int cmp = Integer.compare(k1.commandStoreId, k2.commandStoreId); if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); - if (cmp == 0) cmp = Integer.compare(k1.commandStoreId, k2.commandStoreId); + if (cmp == 0) cmp = k1.id.compareTo(k2.id); return cmp; } }; diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index a0cd86bb5b68..5a0ae5d8ef56 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -35,6 +35,7 @@ import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Route; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Timestamp; @@ -57,24 +58,25 @@ import static accord.primitives.Known.KnownDeps.NoDeps; import static accord.primitives.Status.Durability.NotDurable; import static accord.utils.Invariants.illegalState; +import static org.apache.cassandra.service.accord.SavedCommand.Fields.PARTICIPANTS; public class SavedCommand { // This enum is order-dependent public enum Fields { + PARTICIPANTS, // stored first so we can index it + SAVE_STATUS, + PARTIAL_DEPS, EXECUTE_AT, EXECUTES_AT_LEAST, - SAVE_STATUS, DURABILITY, ACCEPTED, PROMISED, - PARTICIPANTS, - PARTIAL_TXN, - PARTIAL_DEPS, WAITING_ON, + PARTIAL_TXN, WRITES, - CLEANUP + CLEANUP, ; public static final Fields[] FIELDS = values(); @@ -233,7 +235,7 @@ static int getFlags(Command before, Command after) flags = collectFlags(before, after, Command::acceptedOrCommitted, false, Fields.ACCEPTED, flags); flags = collectFlags(before, after, Command::promised, false, Fields.PROMISED, flags); - flags = collectFlags(before, after, Command::participants, true, Fields.PARTICIPANTS, flags); + flags = collectFlags(before, after, Command::participants, true, PARTICIPANTS, flags); flags = collectFlags(before, after, Command::partialTxn, false, Fields.PARTIAL_TXN, flags); flags = collectFlags(before, after, Command::partialDeps, false, Fields.PARTIAL_DEPS, flags); @@ -540,7 +542,7 @@ public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean in } if (participants != null) { - builder.flags = setFieldChanged(Fields.PARTICIPANTS, builder.flags); + builder.flags = setFieldChanged(PARTICIPANTS, builder.flags); builder.participants = participants; } if (includeOutcome && builder.writes != null) @@ -579,6 +581,16 @@ public ByteBuffer asByteBuffer(int userVersion) throws IOException } } + public static Route deserializeRouteOrNull(DataInputPlus in, int userVersion) throws IOException + { + int flags = in.readInt(); + + if (!getFieldChanged(PARTICIPANTS, flags) || getFieldIsNull(PARTICIPANTS, flags)) + return null; + + return CommandSerializers.participants.deserializeRouteOnly(in, userVersion); + } + public void serialize(DataOutputPlus out, int userVersion) throws IOException { out.writeInt(flags); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordTimeService.java b/src/java/org/apache/cassandra/service/accord/api/AccordTimeService.java new file mode 100644 index 000000000000..2f13983ea8df --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/api/AccordTimeService.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.api; + +import java.util.concurrent.TimeUnit; + +import accord.local.TimeService; +import org.apache.cassandra.utils.Clock; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +public class AccordTimeService implements TimeService +{ + @Override + public long now() + { + return nowMicros(); + } + + public static long nowMicros() + { + return TimeUnit.MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + } + + @Override + public long elapsed(TimeUnit unit) + { + return unit.convert(nanoTime(), NANOSECONDS); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java index bb70132c045b..a9f004b3503c 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordTopologySorter.java @@ -28,6 +28,10 @@ import accord.topology.Topologies; import accord.topology.Topology; import accord.utils.SortedList; +import org.apache.cassandra.gms.ApplicationState; +import org.apache.cassandra.gms.EndpointState; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.gms.VersionedValue; import org.apache.cassandra.locator.DynamicEndpointSnitch; import org.apache.cassandra.locator.Endpoint; import org.apache.cassandra.locator.InetAddressAndPort; @@ -71,8 +75,8 @@ private AccordTopologySorter create(SortedList nodes) } private final AccordEndpointMapper mapper; - private final Comparator comparator; + private AccordTopologySorter(AccordEndpointMapper mapper, Comparator comparator) { this.mapper = mapper; @@ -95,6 +99,27 @@ public int compare(Node.Id node1, Node.Id node2, ShardSelection shards) return comparator.compare(() -> mapper.mappedEndpoint(node1), () -> mapper.mappedEndpoint(node2)); } + @Override + public boolean isFaulty(Node.Id node) + { + InetAddressAndPort ep = mapper.mappedEndpointOrNull(node); + if (ep == null) + return true; + + EndpointState epState = Gossiper.instance.getEndpointStateForEndpoint(ep); + if (epState == null) + return true; + + if (!epState.isAlive()) + return true; + + VersionedValue event = epState.getApplicationState(ApplicationState.SEVERITY); + if (event == null) + return false; + + return Double.parseDouble(event.value) == 0.0; + } + private static class EndpointTuple implements Endpoint { final InetAddressAndPort endpoint; diff --git a/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java b/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java index 3886cde12d9b..597e4aad8667 100644 --- a/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java +++ b/src/java/org/apache/cassandra/service/accord/api/CompositeTopologySorter.java @@ -81,4 +81,15 @@ public int compare(Node.Id node1, Node.Id node2, ShardSelection shards) } return 0; } + + @Override + public boolean isFaulty(Node.Id node) + { + for (int i = 0; i < delegates.length; i++) + { + if (delegates[i].isFaulty(node)) + return true; + } + return false; + } } diff --git a/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java index 58c9f4b65ff3..767e57fd9f7d 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java +++ b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java @@ -61,7 +61,7 @@ public RepairSyncPointAdapter(Collection requiredResponses) public void execute(Node node, Topologies all, FullRoute route, ExecutePath path, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer, Throwable> callback) { RequiredResponseTracker tracker = new RequiredResponseTracker(requiredResponses, all); - ExecuteSyncPoint.ExecuteBlocking execute = new ExecuteSyncPoint.ExecuteBlocking<>(node, new SyncPoint(txnId, deps, (FullRoute) route), tracker, executeAt); + ExecuteSyncPoint.ExecuteBlocking execute = new ExecuteSyncPoint.ExecuteBlocking<>(node, new SyncPoint<>(txnId, deps, (FullRoute) route), tracker, executeAt); execute.addCallback(callback); execute.start(); } diff --git a/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java b/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java index 130e91496902..ac2651dcd33c 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java +++ b/src/java/org/apache/cassandra/service/accord/repair/RequiredResponseTracker.java @@ -21,9 +21,9 @@ import java.util.HashSet; import java.util.Set; -import accord.coordinate.tracking.AbstractSimpleTracker; import accord.coordinate.tracking.RequestStatus; import accord.coordinate.tracking.ShardTracker; +import accord.coordinate.tracking.SimpleTracker; import accord.local.Node; import accord.topology.Shard; import accord.topology.Topologies; @@ -32,7 +32,7 @@ import static accord.coordinate.tracking.AbstractTracker.ShardOutcomes.NoChange; import static accord.coordinate.tracking.AbstractTracker.ShardOutcomes.Success; -public class RequiredResponseTracker extends AbstractSimpleTracker +public class RequiredResponseTracker extends SimpleTracker { public static class RequiredResponseShardTracker extends ShardTracker { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 6ef51b957d3b..c537434d9569 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -145,7 +145,7 @@ public long serializedSize(ReadTxnData read, int version) } }; - private static final ReadDataSerializer readEphemeralTxnData = new ReadDataSerializer() + public static final ReadDataSerializer readEphemeralTxnData = new ReadDataSerializer<>() { @Override public void serialize(ReadEphemeralTxnData read, DataOutputPlus out, int version) throws IOException diff --git a/src/java/org/apache/cassandra/service/paxos/ContentionStrategy.java b/src/java/org/apache/cassandra/service/paxos/ContentionStrategy.java index 59ee5505123e..e513bd1a176d 100644 --- a/src/java/org/apache/cassandra/service/paxos/ContentionStrategy.java +++ b/src/java/org/apache/cassandra/service/paxos/ContentionStrategy.java @@ -19,345 +19,81 @@ package org.apache.cassandra.service.paxos; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; -import com.codahale.metrics.Snapshot; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.RetryStrategy; +import org.apache.cassandra.service.TimeoutStrategy; +import org.apache.cassandra.service.TimeoutStrategy.LatencySourceFactory; +import org.apache.cassandra.service.TimeoutStrategy.ReadWriteLatencySourceFactory; +import org.apache.cassandra.service.TimeoutStrategy.Wait; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.NoSpamLogger; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.DoubleSupplier; -import java.util.function.LongBinaryOperator; import java.util.function.Supplier; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import static java.lang.Double.parseDouble; -import static java.lang.Integer.parseInt; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import static java.lang.Math.*; -import static java.util.Arrays.stream; import static java.util.concurrent.TimeUnit.*; import static org.apache.cassandra.config.DatabaseDescriptor.*; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.casWriteMetrics; -import static org.apache.cassandra.utils.Clock.Global.nanoTime; import static org.apache.cassandra.utils.Clock.waitUntil; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** - *

      A strategy for making back-off decisions for Paxos operations that fail to make progress because of other paxos operations. - * The strategy is defined by four factors:

        - *
      • {@link #min} - *
      • {@link #max} - *
      • {@link #minDelta} - *
      • {@link #waitRandomizer} - *
      - * - *

      The first three represent time periods, and may be defined dynamically based on a simple calculation over:

        - *
      • {@code pX()} recent experienced latency distribution for successful operations, - * e.g. {@code p50(rw)} the maximum of read and write median latencies, - * {@code p999(r)} the 99.9th percentile of read latencies - *
      • {@code attempts} the number of failed attempts made by the operation so far - *
      • {@code constant} a user provided floating point constant - *
      - * - *

      Their calculation may take any of these forms - *

    18. constant {@code $constant$[mu]s} - *
    19. dynamic constant {@code pX() * constant} - *
    20. dynamic linear {@code pX() * constant * attempts} - *
    21. dynamic exponential {@code pX() * constant ^ attempts} - * - *

      Furthermore, the dynamic calculations can be bounded with a min/max, like so: - * {@code min[mu]s <= dynamic expr <= max[mu]s} - * - * e.g. - *

    22. {@code 10ms <= p50(rw)*0.66} - *
    23. {@code 10ms <= p95(rw)*1.8^attempts <= 100ms} - *
    24. {@code 5ms <= p50(rw)*0.5} - * - *

      These calculations are put together to construct a range from which we draw a random number. - * The period we wait for {@code X} will be drawn so that {@code min <= X < max}. - * - *

      With the constraint that {@code max} must be {@code minDelta} greater than {@code min}, - * but no greater than its expression-defined maximum. {@code max} will be increased up until - * this point, after which {@code min} will be decreased until this gap is imposed. - * - *

      The {@link #waitRandomizer} property specifies the manner in which a random value is drawn from the range. - * It is defined using one of the following specifiers: - *

    25. uniform - *
    26. exp($power$) or exponential($power$) - *
    27. qexp($power$) or qexponential($power$) or quantizedexponential($power$) - * - * The uniform specifier is self-explanatory, selecting all values in the range with equal probability. - * The exponential specifier draws values towards the end of the range with higher probability, raising - * a floating point number in the range [0..1.0) to the power provided, and translating the resulting value - * to a uniform value in the range. - * The quantized exponential specifier partitions the range into {@code attempts} buckets, then applies the pure - * exponential approach to draw values from [0..attempts), before drawing a uniform value from the corresponding bucket - * - *

      Finally, there is also a {@link #traceAfterAttempts} property that permits initiating tracing of operations - * that experience a certain minimum number of failed paxos rounds due to contention. A setting of 0 or 1 will initiate - * a trace session after the first failed ballot. + * See {@link RetryStrategy} */ -public class ContentionStrategy +public class ContentionStrategy extends RetryStrategy { - private static final Logger logger = LoggerFactory.getLogger(ContentionStrategy.class); - - private static final Pattern BOUND = Pattern.compile( - "(?0|[0-9]+[mu]s)" + - "|((?0|[0-9]+[mu]s) *<= *)?" + - "(p(?[0-9]+)\\((?r|w|rw|wr)\\)|(?0|[0-9]+[mu]s))" + - "\\s*([*]\\s*(?[0-9.]+)?\\s*(?[*^]\\s*attempts)?)?" + - "( *<= *(?0|[0-9]+[mu]s))?"); - private static final Pattern TIME = Pattern.compile( - "0|([0-9]+)ms|([0-9]+)us"); - private static final Pattern RANDOMIZER = Pattern.compile( - "uniform|exp(onential)?[(](?[0-9.]+)[)]|q(uantized)?exp(onential)?[(](?[0-9.]+)[)]"); - private static final String DEFAULT_WAIT_RANDOMIZER = "qexp(1.5)"; // at least 0ms, and at least 66% of median latency - private static final String DEFAULT_MIN = "0 <= p50(rw)*0.66"; // at least 0ms, and at least 66% of median latency - private static final String DEFAULT_MAX = "10ms <= p95(rw)*1.8^attempts <= 100ms"; // p95 latency with exponential back-off at rate of 1.8^attempts - private static final String DEFAULT_MIN_DELTA = "5ms <= p50(rw)*0.5"; // at least 5ms, and at least 50% of median latency + private static final Logger logger = LoggerFactory.getLogger(RetryStrategy.class); - private static volatile ContentionStrategy current; - - // Factories can be useful for testing purposes, to supply custom implementations of selectors and modifiers. - final static LatencySelectorFactory selectors = new LatencySelectorFactory(){}; - final static LatencyModifierFactory modifiers = new LatencyModifierFactory(){}; - final static WaitRandomizerFactory randomizers = new WaitRandomizerFactory(){}; + private static final String DEFAULT_WAIT_RANDOMIZER = "uniform"; + private static final String DEFAULT_MIN = "0"; + private static final String DEFAULT_MAX = "100ms"; + private static final String DEFAULT_SPREAD = "100ms"; + private static final LatencySourceFactory LATENCIES = new ReadWriteLatencySourceFactory(casReadMetrics, casWriteMetrics); + private static volatile ContentionStrategy current; + private static volatile ParsedStrategy currentParsed; + private static final RetryStrategy.ParsedStrategy defaultStrategy; static { - current = new ContentionStrategy(defaultWaitRandomizer(), defaultMinWait(), defaultMaxWait(), defaultMinDelta(), Integer.MAX_VALUE); - } + defaultStrategy = new ParsedStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD, Integer.MAX_VALUE, + new ContentionStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD, Integer.MAX_VALUE)); - static interface LatencyModifierFactory - { - default LatencyModifier identity() { return (l, a) -> l; } - default LatencyModifier multiply(double constant) { return (l, a) -> saturatedCast(l * constant); } - default LatencyModifier multiplyByAttempts(double multiply) { return (l, a) -> saturatedCast(l * multiply * a); } - default LatencyModifier multiplyByAttemptsExp(double base) { return (l, a) -> saturatedCast(l * pow(base, a)); } - } - - static interface LatencySupplier - { - abstract long get(double percentile); - } - - static interface LatencySelector - { - abstract long select(LatencySupplier readLatencyHistogram, LatencySupplier writeLatencyHistogram); - } - - static interface LatencySelectorFactory - { - default LatencySelector constant(long latency) { return (read, write) -> latency; } - default LatencySelector read(double percentile) { return (read, write) -> read.get(percentile); } - default LatencySelector write(double percentile) { return (read, write) -> write.get(percentile); } - default LatencySelector maxReadWrite(double percentile) { return (read, write) -> max(read.get(percentile), write.get(percentile)); } - } + String waitRandomizer = orElse(DatabaseDescriptor::getPaxosContentionWaitRandomizer, DEFAULT_WAIT_RANDOMIZER); + String min = orElse(DatabaseDescriptor::getPaxosContentionMinWait, DEFAULT_MIN); + String max = orElse(DatabaseDescriptor::getPaxosContentionMaxWait, DEFAULT_MAX); + String spread = orElse(DatabaseDescriptor::getPaxosContentionMinDelta, DEFAULT_SPREAD); - static interface LatencyModifier - { - long modify(long latency, int attempts); - } - - static interface WaitRandomizer - { - abstract long wait(long min, long max, int attempts); + current = new ContentionStrategy(waitRandomizer, min, max, spread, Integer.MAX_VALUE); + currentParsed = new ParsedStrategy(waitRandomizer, min, max, spread, Integer.MAX_VALUE, current); } - static interface WaitRandomizerFactory - { - default LongBinaryOperator uniformLongSupplier() { return (min, max) -> ThreadLocalRandom.current().nextLong(min, max); } // DO NOT USE METHOD HANDLES (want to fetch afresh each time) - default DoubleSupplier uniformDoubleSupplier() { return () -> ThreadLocalRandom.current().nextDouble(); } - - default WaitRandomizer uniform() { return new Uniform(uniformLongSupplier()); } - default WaitRandomizer exponential(double power) { return new Exponential(uniformLongSupplier(), uniformDoubleSupplier(), power); } - default WaitRandomizer quantizedExponential(double power) { return new QuantizedExponential(uniformLongSupplier(), uniformDoubleSupplier(), power); } - - static class Uniform implements WaitRandomizer - { - final LongBinaryOperator uniformLong; - - public Uniform(LongBinaryOperator uniformLong) - { - this.uniformLong = uniformLong; - } - - @Override - public long wait(long min, long max, int attempts) - { - return uniformLong.applyAsLong(min, max); - } - } - - static abstract class AbstractExponential implements WaitRandomizer - { - final LongBinaryOperator uniformLong; - final DoubleSupplier uniformDouble; - final double power; - - public AbstractExponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) - { - this.uniformLong = uniformLong; - this.uniformDouble = uniformDouble; - this.power = power; - } - } - - static class Exponential extends AbstractExponential - { - public Exponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) - { - super(uniformLong, uniformDouble, power); - } - - @Override - public long wait(long min, long max, int attempts) - { - if (attempts == 1) - return uniformLong.applyAsLong(min, max); - - double p = uniformDouble.getAsDouble(); - long delta = max - min; - delta *= Math.pow(p, power); - return max - delta; - } - } - - static class QuantizedExponential extends AbstractExponential - { - public QuantizedExponential(LongBinaryOperator uniformLong, DoubleSupplier uniformDouble, double power) - { - super(uniformLong, uniformDouble, power); - } - - @Override - public long wait(long min, long max, int attempts) - { - long quanta = (max - min) / attempts; - if (attempts == 1 || quanta == 0) - return uniformLong.applyAsLong(min, max); - - double p = uniformDouble.getAsDouble(); - int base = (int) (attempts * Math.pow(p, power)); - return max - ThreadLocalRandom.current().nextLong(quanta * base, quanta * (base + 1)); - } - } - } - - static class SnapshotAndTime - { - final long validUntil; - final Snapshot snapshot; - - SnapshotAndTime(long validUntil, Snapshot snapshot) - { - this.validUntil = validUntil; - this.snapshot = snapshot; - } - } + final int traceAfterAttempts; - static class TimeLimitedLatencySupplier extends AtomicReference implements LatencySupplier + public ContentionStrategy(String waitRandomizer, String min, String max, String spread, int traceAfterAttempts) { - final Supplier snapshotSupplier; - final long validForNanos; - - TimeLimitedLatencySupplier(Supplier snapshotSupplier, long time, TimeUnit units) - { - this.snapshotSupplier = snapshotSupplier; - this.validForNanos = units.toNanos(time); - } - - private Snapshot getSnapshot() - { - long now = nanoTime(); - - SnapshotAndTime cur = get(); - if (cur != null && cur.validUntil > now) - return cur.snapshot; - - Snapshot newSnapshot = snapshotSupplier.get(); - SnapshotAndTime next = new SnapshotAndTime(now + validForNanos, newSnapshot); - if (compareAndSet(cur, next)) - return next.snapshot; - - return accumulateAndGet(next, (a, b) -> a.validUntil > b.validUntil ? a : b).snapshot; - } - - @Override - public long get(double percentile) - { - return (long)getSnapshot().getValue(percentile); - } + super(waitRandomizer, min, max, spread, LATENCIES); + this.traceAfterAttempts = traceAfterAttempts; } - static class Bound + public ContentionStrategy(WaitRandomizer waitRandomizer, Wait min, Wait max, Wait spread, int traceAfterAttempts) { - final long min, max, onFailure; - final LatencyModifier modifier; - final LatencySelector selector; - final LatencySupplier reads, writes; - - Bound(long min, long max, long onFailure, LatencyModifier modifier, LatencySelector selector) - { - Preconditions.checkArgument(min<=max, "min (%s) must be less than or equal to max (%s)", min, max); - this.min = min; - this.max = max; - this.onFailure = onFailure; - this.modifier = modifier; - this.selector = selector; - this.reads = new TimeLimitedLatencySupplier(casReadMetrics.latency::getSnapshot, 10L, SECONDS); - this.writes = new TimeLimitedLatencySupplier(casWriteMetrics.latency::getSnapshot, 10L, SECONDS); - } - - long get(int attempts) - { - try - { - long base = selector.select(reads, writes); - return max(min, min(max, modifier.modify(base, attempts))); - } - catch (Throwable t) - { - NoSpamLogger.getLogger(logger, 1L, MINUTES).info("", t); - return onFailure; - } - } - - public String toString() - { - return "Bound{" + - "min=" + min + - ", max=" + max + - ", onFailure=" + onFailure + - ", modifier=" + modifier + - ", selector=" + selector + - '}'; - } + super(waitRandomizer, min, max, spread); + this.traceAfterAttempts = traceAfterAttempts; } - final WaitRandomizer waitRandomizer; - final Bound min, max, minDelta; - final int traceAfterAttempts; - - public ContentionStrategy(String waitRandomizer, String min, String max, String minDelta, int traceAfterAttempts) + @Override + protected Wait parseBound(String spec, boolean isMin, LatencySourceFactory latencies) { - this.waitRandomizer = parseWaitRandomizer(waitRandomizer); - this.min = parseBound(min, true); - this.max = parseBound(max, false); - this.minDelta = parseBound(minDelta, true); - this.traceAfterAttempts = traceAfterAttempts; + return TimeoutStrategy.parseWait(spec, 0, maxQueryTimeoutMicros(), isMin ? 0 : maxQueryTimeoutMicros(), latencies); } public enum Type @@ -395,25 +131,10 @@ long computeWaitUntilForContention(int attempts, TableMetadata table, DecoratedK Tracing.instance.getSessionId()); } - long minWaitMicros = min.get(attempts); - long maxWaitMicros = max.get(attempts); - long minDeltaMicros = minDelta.get(attempts); - - if (minWaitMicros + minDeltaMicros > maxWaitMicros) - { - maxWaitMicros = minWaitMicros + minDeltaMicros; - if (maxWaitMicros > this.max.max) - { - maxWaitMicros = this.max.max; - minWaitMicros = max(this.min.min, min(this.min.max, maxWaitMicros - minDeltaMicros)); - } - } - - long wait = waitRandomizer.wait(minWaitMicros, maxWaitMicros, attempts); - return nanoTime() + MICROSECONDS.toNanos(wait); + return super.computeWaitUntil(attempts); } - boolean doWaitForContention(long deadline, int attempts, TableMetadata table, DecoratedKey partitionKey, ConsistencyLevel consistency, Type type) + public boolean doWaitForContention(long deadline, int attempts, TableMetadata table, DecoratedKey partitionKey, ConsistencyLevel consistency, Type type) { long until = computeWaitUntilForContention(attempts, table, partitionKey, consistency, type); if (until >= deadline) @@ -441,201 +162,52 @@ static long waitUntilForContention(int attempts, TableMetadata table, DecoratedK return current.computeWaitUntilForContention(attempts, table, partitionKey, consistency, type); } - static class ParsedStrategy + public static class ParsedStrategy extends RetryStrategy.ParsedStrategy { - final String waitRandomizer, min, max, minDelta; - final ContentionStrategy strategy; + public final int trace; + public final ContentionStrategy strategy; - ParsedStrategy(String waitRandomizer, String min, String max, String minDelta, ContentionStrategy strategy) + ParsedStrategy(String waitRandomizer, String min, String max, String minDelta, int trace, ContentionStrategy strategy) { - this.waitRandomizer = waitRandomizer; - this.min = min; - this.max = max; - this.minDelta = minDelta; + super(waitRandomizer, min, max, minDelta, strategy); + this.trace = trace; this.strategy = strategy; } + + @Override + public String toString() + { + return super.toString() + (trace == Integer.MAX_VALUE ? "" : ",trace=" + current.traceAfterAttempts); + } } @VisibleForTesting - static ParsedStrategy parseStrategy(String spec) + public static ParsedStrategy parseStrategy(String spec) { + RetryStrategy.ParsedStrategy parsed = RetryStrategy.parseStrategy(spec, LATENCIES, defaultStrategy); String[] args = spec.split(","); - String waitRandomizer = find(args, "random"); - String min = find(args, "min"); - String max = find(args, "max"); - String minDelta = find(args, "delta"); - String trace = find(args, "trace"); - if (waitRandomizer == null) waitRandomizer = defaultWaitRandomizer(); - if (min == null) min = defaultMinWait(); - if (max == null) max = defaultMaxWait(); - if (minDelta == null) minDelta = defaultMinDelta(); + String trace = find(args, "trace"); int traceAfterAttempts = trace == null ? current.traceAfterAttempts: Integer.parseInt(trace); - ContentionStrategy strategy = new ContentionStrategy(waitRandomizer, min, max, minDelta, traceAfterAttempts); - return new ParsedStrategy(waitRandomizer, min, max, minDelta, strategy); + ContentionStrategy strategy = new ContentionStrategy(parsed.strategy.waitRandomizer, parsed.strategy.min, parsed.strategy.max, parsed.strategy.spread, traceAfterAttempts); + return new ParsedStrategy(parsed.waitRandomizer, parsed.min, parsed.max, parsed.spread, traceAfterAttempts, strategy); } - - public static void setStrategy(String spec) + public static synchronized void setStrategy(String spec) { ParsedStrategy parsed = parseStrategy(spec); + currentParsed = parsed; current = parsed.strategy; setPaxosContentionWaitRandomizer(parsed.waitRandomizer); setPaxosContentionMinWait(parsed.min); setPaxosContentionMaxWait(parsed.max); - setPaxosContentionMinDelta(parsed.minDelta); + setPaxosContentionMinDelta(parsed.spread); } public static String getStrategySpec() { - return "min=" + defaultMinWait() - + ",max=" + defaultMaxWait() - + ",delta=" + defaultMinDelta() - + ",random=" + defaultWaitRandomizer() - + ",trace=" + current.traceAfterAttempts; - } - - private static String find(String[] args, String param) - { - return stream(args).filter(s -> s.startsWith(param + '=')) - .map(s -> s.substring(param.length() + 1)) - .findFirst().orElse(null); - } - - private static LatencySelector parseLatencySelector(Matcher m, LatencySelectorFactory selectors) - { - String perc = m.group("perc"); - if (perc == null) - return selectors.constant(parseInMicros(m.group("constbase"))); - - double percentile = parseDouble("0." + perc); - String rw = m.group("rw"); - if (rw.length() == 2) - return selectors.maxReadWrite(percentile); - else if ("r".equals(rw)) - return selectors.read(percentile); - else - return selectors.write(percentile); - } - - private static LatencyModifier parseLatencyModifier(Matcher m, LatencyModifierFactory modifiers) - { - String mod = m.group("mod"); - if (mod == null) - return modifiers.identity(); - - double modifier = parseDouble(mod); - - String modkind = m.group("modkind"); - if (modkind == null) - return modifiers.multiply(modifier); - - if (modkind.startsWith("*")) - return modifiers.multiplyByAttempts(modifier); - else if (modkind.startsWith("^")) - return modifiers.multiplyByAttemptsExp(modifier); - else - throw new IllegalArgumentException("Unrecognised attempt modifier: " + modkind); - } - - static long saturatedCast(double v) - { - if (v > Long.MAX_VALUE) - return Long.MAX_VALUE; - return (long) v; - } - - static WaitRandomizer parseWaitRandomizer(String input) - { - return parseWaitRandomizer(input, randomizers); - } - - static WaitRandomizer parseWaitRandomizer(String input, WaitRandomizerFactory randomizers) - { - Matcher m = RANDOMIZER.matcher(input); - if (!m.matches()) - throw new IllegalArgumentException(input + " does not match" + RANDOMIZER); - - String exp; - exp = m.group("exp"); - if (exp != null) - return randomizers.exponential(Double.parseDouble(exp)); - exp = m.group("qexp"); - if (exp != null) - return randomizers.quantizedExponential(Double.parseDouble(exp)); - return randomizers.uniform(); - } - - static Bound parseBound(String input, boolean isMin) - { - return parseBound(input, isMin, selectors, modifiers); - } - - @VisibleForTesting - static Bound parseBound(String input, boolean isMin, LatencySelectorFactory selectors, LatencyModifierFactory modifiers) - { - Matcher m = BOUND.matcher(input); - if (!m.matches()) - throw new IllegalArgumentException(input + " does not match " + BOUND); - - String maybeConst = m.group("const"); - if (maybeConst != null) - { - long v = parseInMicros(maybeConst); - return new Bound(v, v, v, modifiers.identity(), selectors.constant(v)); - } - - long min = parseInMicros(m.group("min"), 0); - long max = parseInMicros(m.group("max"), maxQueryTimeoutMicros() / 2); - return new Bound(min, max, isMin ? min : max, parseLatencyModifier(m, modifiers), parseLatencySelector(m, selectors)); - } - - private static long parseInMicros(String input, long orElse) - { - if (input == null) - return orElse; - - return parseInMicros(input); - } - - private static long parseInMicros(String input) - { - Matcher m = TIME.matcher(input); - if (!m.matches()) - throw new IllegalArgumentException(input + " does not match " + TIME); - - String text; - if (null != (text = m.group(1))) - return parseInt(text) * 1000; - else if (null != (text = m.group(2))) - return parseInt(text); - else - return 0; - } - - @VisibleForTesting - static String defaultWaitRandomizer() - { - return orElse(DatabaseDescriptor::getPaxosContentionWaitRandomizer, DEFAULT_WAIT_RANDOMIZER); - } - - @VisibleForTesting - static String defaultMinWait() - { - return orElse(DatabaseDescriptor::getPaxosContentionMinWait, DEFAULT_MIN); - } - - @VisibleForTesting - static String defaultMaxWait() - { - return orElse(DatabaseDescriptor::getPaxosContentionMaxWait, DEFAULT_MAX); - } - - @VisibleForTesting - static String defaultMinDelta() - { - return orElse(DatabaseDescriptor::getPaxosContentionMinDelta, DEFAULT_MIN_DELTA); + return currentParsed.toString(); } @VisibleForTesting diff --git a/test/distributed/org/apache/cassandra/distributed/test/ForBenchmarks.java b/test/distributed/org/apache/cassandra/distributed/test/ForBenchmarks.java new file mode 100644 index 000000000000..fc4dc57a7b82 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/ForBenchmarks.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; + +import java.io.IOException; + +public class ForBenchmarks extends TestBaseImpl { + public static void main(String[] args) throws IOException, InterruptedException { + try (Cluster cluster = Cluster.build(3) + .withConfig(c -> c.with(Feature.values())) + .start()) { + cluster.get(1).nodetoolResult("cms", "reconfigure", "3").asserts().success(); + + Thread.currentThread().join(); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index d9315cf2c7c6..d750e8684474 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -27,6 +27,9 @@ import java.util.Map; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -61,7 +64,8 @@ public class AccordLoadTest extends AccordTestBase public static void setUp() throws IOException { CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); - AccordTestBase.setupCluster(builder -> builder, 2); + AccordTestBase.setupCluster(builder -> builder, 3); +// AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config.with(Feature.values())), 3); } @Ignore @@ -71,151 +75,187 @@ public void testLoad() throws Exception test("CREATE TABLE " + qualifiedAccordTableName + " (k int, v int, PRIMARY KEY(k)) WITH transactional_mode = 'full'", cluster -> { - final ConcurrentHashMap verbs = new ConcurrentHashMap<>(); - cluster.filters().outbound().messagesMatching(new IMessageFilters.Matcher() + try { - @Override - public boolean matches(int i, int i1, IMessage iMessage) + + final ConcurrentHashMap verbs = new ConcurrentHashMap<>(); + cluster.filters().outbound().messagesMatching(new IMessageFilters.Matcher() { - verbs.computeIfAbsent(Verb.fromId(iMessage.verb()), ignore -> new AtomicInteger()).incrementAndGet(); - return false; - } - }).drop(); - - cluster.forEach(i -> i.runOnInstance(() -> { - ((AccordService) AccordService.instance()).journal().compactor().updateCompactionPeriod(1, SECONDS); -// ((AccordSpec.JournalSpec)((AccordService) AccordService.instance()).journal().configuration()).segmentSize = 128 << 10; - })); - - ICoordinator coordinator = cluster.coordinator(1); - final int repairInterval = 3000; - final int compactionInterval = 3000; - final int flushInterval = 1000; - final int batchSizeLimit = 1000; - final long batchTime = TimeUnit.SECONDS.toNanos(10); - final int concurrency = 100; - final int ratePerSecond = 1000; - final int keyCount = 1000000; - final float readChance = 0.33f; - long nextRepairAt = repairInterval; - long nextCompactionAt = compactionInterval; - long nextFlushAt = flushInterval; - final BitSet initialised = new BitSet(); - - Random random = new Random(); -// CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); - final Semaphore inFlight = new Semaphore(concurrency); - final RateLimiter rateLimiter = RateLimiter.create(ratePerSecond); -// long testStart = System.nanoTime(); -// while (NANOSECONDS.toMinutes(System.nanoTime() - testStart) < 10 && exceptions.size() < 10000) - while (true) - { - final EstimatedHistogram histogram = new EstimatedHistogram(200); - long batchStart = System.nanoTime(); - long batchEnd = batchStart + batchTime; - int batchSize = 0; - while (batchSize < batchSizeLimit) + @Override + public boolean matches(int i, int i1, IMessage iMessage) + { + verbs.computeIfAbsent(Verb.fromId(iMessage.verb()), ignore -> new AtomicInteger()).incrementAndGet(); + return false; + } + }).drop(); + + ICoordinator coordinator = cluster.coordinator(1); + final int repairInterval = Integer.MAX_VALUE; + // final int repairInterval = 3000; + final int compactionInterval = Integer.MAX_VALUE; + // final int compactionInterval = 3000; + final int flushInterval = Integer.MAX_VALUE; + // final int flushInterval = 1000; + final int compactionPeriodSeconds = -1; + final int restartInterval = 150_000_000; + final int batchSizeLimit = 1000; + final long batchTime = TimeUnit.SECONDS.toNanos(10); + final int concurrency = 100; + final int ratePerSecond = 1000; + final int keyCount = 1000000; + final float readChance = 0.33f; + long nextRepairAt = repairInterval; + long nextCompactionAt = compactionInterval; + long nextFlushAt = flushInterval; + long nextRestartAt = restartInterval; + final ExecutorService restartExecutor = Executors.newSingleThreadExecutor(); + final BitSet initialised = new BitSet(); + + cluster.get(1).nodetoolResult("cms", "reconfigure", "3").asserts().success(); + cluster.forEach(i -> i.runOnInstance(() -> { + if (compactionPeriodSeconds > 0) + ((AccordService) AccordService.instance()).journal().compactor().updateCompactionPeriod(1, SECONDS); + // ((AccordSpec.JournalSpec)((AccordService) AccordService.instance()).journal().configuration()).segmentSize = 128 << 10; + })); + + Random random = new Random(); + // CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); + final Semaphore inFlight = new Semaphore(concurrency); + final RateLimiter rateLimiter = RateLimiter.create(ratePerSecond); + // long testStart = System.nanoTime(); + // while (NANOSECONDS.toMinutes(System.nanoTime() - testStart) < 10 && exceptions.size() < 10000) + while (true) { - inFlight.acquire(); - rateLimiter.acquire(); - long commandStart = System.nanoTime(); - int k = random.nextInt(keyCount); - if (random.nextFloat() < readChance) + final EstimatedHistogram histogram = new EstimatedHistogram(200); + long batchStart = System.nanoTime(); + long batchEnd = batchStart + batchTime; + int batchSize = 0; + while (batchSize < batchSizeLimit) { - coordinator.executeWithResult((success, fail) -> { - inFlight.release(); - if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); - // else exceptions.add(fail); - }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, k); + inFlight.acquire(); + rateLimiter.acquire(); + long commandStart = System.nanoTime(); + int k = random.nextInt(keyCount); + if (random.nextFloat() < readChance) + { + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, k); + } + else if (initialised.get(k)) + { + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); + } + else + { + initialised.set(k); + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "UPDATE " + qualifiedAccordTableName + " SET v = 0 WHERE k = ? IF NOT EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); + } + batchSize++; + if (System.nanoTime() >= batchEnd) + break; } - else if (initialised.get(k)) + + if ((nextRepairAt -= batchSize) <= 0) { - coordinator.executeWithResult((success, fail) -> { - inFlight.release(); - if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); - // else exceptions.add(fail); - }, "UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); + nextRepairAt += repairInterval; + System.out.println("repairing..."); + cluster.coordinator(1).instance().nodetool("repair", qualifiedAccordTableName); } - else + + if ((nextCompactionAt -= batchSize) <= 0) { - initialised.set(k); - coordinator.executeWithResult((success, fail) -> { - inFlight.release(); - if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); - // else exceptions.add(fail); - }, "UPDATE " + qualifiedAccordTableName + " SET v = 0 WHERE k = ? IF NOT EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); + nextCompactionAt += compactionInterval; + System.out.println("compacting accord..."); + cluster.forEach(i -> { + i.nodetool("compact", "system_accord.journal"); + i.runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().checkAllCommands(); + }); + }); } - batchSize++; - if (System.nanoTime() >= batchEnd) - break; - } - if ((nextRepairAt -= batchSize) <= 0) - { - nextRepairAt += repairInterval; - System.out.println("repairing..."); - cluster.coordinator(1).instance().nodetool("repair", qualifiedAccordTableName); - } - - if ((nextCompactionAt -= batchSize) <= 0) - { - nextCompactionAt += compactionInterval; - System.out.println("compacting accord..."); - cluster.forEach(i -> { - i.nodetool("compact", "system_accord.journal"); - i.runOnInstance(() -> { + if ((nextFlushAt -= batchSize) <= 0) + { + nextFlushAt += flushInterval; + System.out.println("flushing journal..."); + cluster.forEach(i -> i.runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTestingIfNonEmpty(); ((AccordService) AccordService.instance()).journal().checkAllCommands(); - }); - }); + })); + } - } + if ((nextRestartAt -= batchSize) <= 0) + { + nextRestartAt += restartInterval; + int nodeIdx = random.nextInt(cluster.size()); - if ((nextFlushAt -= batchSize) <= 0) - { - nextFlushAt += flushInterval; - System.out.println("flushing journal..."); - cluster.forEach(i -> i.runOnInstance(() -> { - ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTestingIfNonEmpty(); - ((AccordService) AccordService.instance()).journal().checkAllCommands(); - })); - } + restartExecutor.submit(() -> { + System.out.printf("restarting node %d...\n", nodeIdx); + try + { + cluster.get(nodeIdx).shutdown().get(); + cluster.get(nodeIdx).startup(); + return null; + } + catch (InterruptedException | ExecutionException e) + { + throw new RuntimeException(e); + } + }); + } - final Date date = new Date(); - System.out.printf("%tT rate: %.2f/s (%d total)\n", date, (((float)batchSizeLimit * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart)), batchSize); - System.out.printf("%tT percentiles: %d %d %d %d\n", date, histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); + final Date date = new Date(); + System.out.printf("%tT rate: %.2f/s (%d total)\n", date, (((float)batchSizeLimit * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart)), batchSize); + System.out.printf("%tT percentiles: %d %d %d %d\n", date, histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); - class VerbCount - { - final Verb verb; - final int count; + class VerbCount + { + final Verb verb; + final int count; - VerbCount(Verb verb, int count) + VerbCount(Verb verb, int count) + { + this.verb = verb; + this.count = count; + } + } + List verbCounts = new ArrayList<>(); + for (Map.Entry e : verbs.entrySet()) { - this.verb = verb; - this.count = count; + int count = e.getValue().getAndSet(0); + if (count != 0) verbCounts.add(new VerbCount(e.getKey(), count)); } - } - List verbCounts = new ArrayList<>(); - for (Map.Entry e : verbs.entrySet()) - { - int count = e.getValue().getAndSet(0); - if (count != 0) verbCounts.add(new VerbCount(e.getKey(), count)); - } - verbCounts.sort(Comparator.comparing(v -> -v.count)); + verbCounts.sort(Comparator.comparing(v -> -v.count)); - StringBuilder verbSummary = new StringBuilder(); - for (VerbCount vs : verbCounts) - { + StringBuilder verbSummary = new StringBuilder(); + for (VerbCount vs : verbCounts) { - if (verbSummary.length() > 0) - verbSummary.append(", "); - verbSummary.append(vs.verb); - verbSummary.append(": "); - verbSummary.append(vs.count); + { + if (verbSummary.length() > 0) + verbSummary.append(", "); + verbSummary.append(vs.verb); + verbSummary.append(": "); + verbSummary.append(vs.count); + } } + System.out.printf("%tT verbs: %s\n", date, verbSummary); } - System.out.printf("%tT verbs: %s\n", date, verbSummary); - } + } + catch (Throwable t) + { + t.printStackTrace(); + } } ); } diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index 1e69174d3085..3367c7261106 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -117,6 +117,7 @@ public boolean enableCompaction() try { journal.start(null); + journal.unsafeSetStarted(); Timestamp timestamp = Timestamp.NONE; RandomSource rs = new DefaultRandom(1); diff --git a/test/unit/org/apache/cassandra/journal/IndexTest.java b/test/unit/org/apache/cassandra/journal/IndexTest.java index ce5ed00ebe15..9b6a69505e05 100644 --- a/test/unit/org/apache/cassandra/journal/IndexTest.java +++ b/test/unit/org/apache/cassandra/journal/IndexTest.java @@ -136,8 +136,8 @@ public void testInMemoryIndexPersists() throws IOException { assertArrayEquals(EMPTY, onDisk.lookUp(key0)); assertArrayEquals(new long[] { composeOffsetAndSize(val11, 1) }, onDisk.lookUp(key1)); - assertArrayEquals(new long[] { composeOffsetAndSize(val21, 2), composeOffsetAndSize(val22, 3) }, onDisk.lookUp(key2)); - assertArrayEquals(new long[] { composeOffsetAndSize(val31, 4), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val33, 6) }, onDisk.lookUp(key3)); + assertArrayEquals(new long[] { composeOffsetAndSize(val22, 3), composeOffsetAndSize(val21, 2) }, onDisk.lookUp(key2)); + assertArrayEquals(new long[] { composeOffsetAndSize(val33, 6), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val31, 4) }, onDisk.lookUp(key3)); assertArrayEquals(EMPTY, onDisk.lookUp(key4)); assertEquals(key1, onDisk.firstId()); diff --git a/test/unit/org/apache/cassandra/service/RetryStrategyTest.java b/test/unit/org/apache/cassandra/service/RetryStrategyTest.java new file mode 100644 index 000000000000..e0fed7475cbf --- /dev/null +++ b/test/unit/org/apache/cassandra/service/RetryStrategyTest.java @@ -0,0 +1,482 @@ +///* +// * Licensed to the Apache Software Foundation (ASF) under one +// * or more contributor license agreements. See the NOTICE file +// * distributed with this work for additional information +// * regarding copyright ownership. The ASF licenses this file +// * to you under the Apache License, Version 2.0 (the +// * "License"); you may not use this file except in compliance +// * with the License. You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +//package org.apache.cassandra.service; +// +//import java.util.List; +//import java.util.Random; +//import java.util.concurrent.ThreadLocalRandom; +//import java.util.concurrent.TimeUnit; +//import java.util.concurrent.atomic.AtomicReference; +//import java.util.function.BiFunction; +//import java.util.function.Consumer; +//import java.util.function.DoubleSupplier; +//import java.util.function.LongBinaryOperator; +// +//import com.google.common.collect.ImmutableList; +//import org.junit.Assert; +//import org.junit.Test; +// +//import org.slf4j.Logger; +//import org.slf4j.LoggerFactory; +// +//import net.nicoulaj.compilecommand.annotations.Inline; +//import org.apache.cassandra.config.DatabaseDescriptor; +//import org.apache.cassandra.service.TimeoutStrategy.LatencyModifier; +//import org.apache.cassandra.service.TimeoutStrategy.LatencyModifierFactory; +//import org.apache.cassandra.service.TimeoutStrategy.LatencySource; +//import org.apache.cassandra.service.TimeoutStrategy.LatencySupplierFactory; +//import org.apache.cassandra.service.TimeoutStrategy.LatencySupplier; +//import org.apache.cassandra.service.TimeoutStrategy.Wait; +//import org.apache.cassandra.service.paxos.ContentionStrategy; +// +//import static org.apache.cassandra.service.RetryStrategy.*; +//import static org.apache.cassandra.service.RetryStrategy.WaitRandomizerFactory.*; +//import static org.apache.cassandra.service.RetryStrategyTest.WaitRandomizerType.*; +//import static org.apache.cassandra.service.TimeoutStrategy.modifiers; +//import static org.apache.cassandra.service.TimeoutStrategy.parseWait; +//import static org.apache.cassandra.service.TimeoutStrategy.selectors; +// +//public class RetryStrategyTest +//{ +// private static final Logger logger = LoggerFactory.getLogger(RetryStrategyTest.class); +// +// static +// { +// DatabaseDescriptor.daemonInitialization(); +// } +// +// private static final long MAX = DatabaseDescriptor.getRpcTimeout(TimeUnit.MICROSECONDS); +// +// private static final String DEFAULT_WAIT_RANDOMIZER = "qexp(1.5)"; // at least 0ms, and at least 66% of median latency +// private static final String DEFAULT_MIN = "0 <= p50(rw)*0.66"; // at least 0ms, and at least 66% of median latency +// private static final String DEFAULT_MAX = "10ms <= p95(rw)*1.8^attempts <= 100ms"; // p95 latency with exponential back-off at rate of 1.8^attempts +// private static final String DEFAULT_SPREAD = "5ms <= p50(rw)*0.5"; // at least 5ms, and at least 50% of median latency +// +// private static final WaitRandomizerParseValidator DEFAULT_WAIT_RANDOMIZER_VALIDATOR = new WaitRandomizerParseValidator(DEFAULT_WAIT_RANDOMIZER, QEXP, 1.5); +// private static final WaitParseValidator DEFAULT_MIN_VALIDATOR = new WaitParseValidator(DEFAULT_MIN, true, assertWait(0, MAX, 0, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.66)); +// private static final WaitParseValidator DEFAULT_MAX_VALIDATOR = new WaitParseValidator(DEFAULT_MAX, false, assertWait(10000, 100000, 100000, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)); +// private static final WaitParseValidator DEFAULT_MIN_DELTA_VALIDATOR = new WaitParseValidator(DEFAULT_SPREAD, true, assertWait(5000, MAX, 5000, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.5)); +// private static final RetryStrategy.ParsedStrategy DEFAULT = new RetryStrategy.ParsedStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD, +// new RetryStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD)); +// +// private static List VALIDATE = ImmutableList.of( +// new WaitParseValidator("p95(rw)", false, assertWait(0, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.identity().getClass(), 1)), +// new WaitParseValidator("5ms<=p50(rw)*0.66", false, assertWait(5000, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0).getClass(), 0.66)), +// new WaitParseValidator("5us <= p50(r)*1.66*attempts", true, assertWait(5, MAX, 5, selectors.read(0f).getClass(), 0.50, 0, modifiers.multiplyByAttempts(0f).getClass(), 1.66)), +// new WaitParseValidator("0<=p50(w)*0.66^attempts", true, assertWait(0, MAX, 0, selectors.write(0f).getClass(), 0.50, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 0.66)), +// new WaitParseValidator("125us", true, assertWait(125, 125, 125, selectors.constant(0).getClass(), 0.0f, 125, modifiers.identity().getClass(), 1)), +// new WaitParseValidator("5us <= p95(r)*1.8^attempts <= 100us", true, assertWait(5, 100, 5, selectors.read(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)), +// DEFAULT_MIN_VALIDATOR, DEFAULT_MAX_VALIDATOR, DEFAULT_MIN_DELTA_VALIDATOR +// ); +// +// private static List VALIDATE_RANDOMIZER = ImmutableList.of( +// new WaitRandomizerParseValidator("quantizedexponential(0.5)", QEXP, 0.5), +// new WaitRandomizerParseValidator("exponential(2.5)", EXP, 2.5), +// new WaitRandomizerParseValidator("exp(10)", EXP, 10), +// new WaitRandomizerParseValidator("uniform", UNIFORM, 0), +// DEFAULT_WAIT_RANDOMIZER_VALIDATOR +// ); +// +// static class WaitParseValidator +// { +// final String spec; +// final boolean isMin; +// final Consumer validator; +// +// WaitParseValidator(String spec, boolean isMin, Consumer validator) +// { +// this.spec = spec; +// this.isMin = isMin; +// this.validator = validator; +// } +// +// void validate(Wait Wait) +// { +// validator.accept(Wait); +// } +// } +// +// enum WaitRandomizerType +// { +// UNIFORM(Uniform.class, (p, f) -> f.uniform()), +// EXP(Exponential.class, (p, f) -> f.exponential(p)), +// QEXP(QuantizedExponential.class, (p, f) -> f.quantizedExponential(p)); +// +// final Class clazz; +// final BiFunction getter; +// +// WaitRandomizerType(Class clazz, BiFunction getter) +// { +// this.clazz = clazz; +// this.getter = getter; +// } +// } +// +// static class WaitRandomizerParseValidator +// { +// final String spec; +// final WaitRandomizerType type; +// final double power; +// +// WaitRandomizerParseValidator(String spec, WaitRandomizerType type, double power) +// { +// this.spec = spec; +// this.type = type; +// this.power = power; +// } +// +// void validate(WaitRandomizer randomizer) +// { +// Assert.assertSame(type.clazz, randomizer.getClass()); +// if (AbstractExponential.class.isAssignableFrom(type.clazz)) +// Assert.assertEquals(power, ((AbstractExponential) randomizer).power, 0.00001); +// } +// } +// +// private static class WaitRandomizerOutputValidator +// { +// static void validate(WaitRandomizerType type, long seed, int trials, int samplesPerTrial) +// { +// Random random = new Random(seed); +// WaitRandomizer randomizer = type.getter.apply(2d, new WaitRandomizerFactory() +// { +// @Override public LongBinaryOperator uniformLongSupplier() { return (min, max) -> min + random.nextInt((int) (max - min)); } +// @Override public DoubleSupplier uniformDoubleSupplier() { return random::nextDouble; } +// }); +// +// for (int i = 0 ; i < trials ; ++i) +// { +// int min = random.nextInt(1 << 20); +// int max = min + 1024 + random.nextInt(1 << 20); +// double minMean = minMean(type, min, max); +// double maxMean = maxMean(type, min, max); +// double sampleMean = sampleMean(samplesPerTrial, min, max, randomizer); +// Assert.assertTrue(minMean <= sampleMean); +// Assert.assertTrue(maxMean >= sampleMean); +// } +// } +// +// private static double minMean(WaitRandomizerType type, int min, int max) +// { +// switch (type) +// { +// case UNIFORM: return min + (max - min) * (4d/10); +// case EXP: case QEXP: return min + (max - min) * (6d/10); +// default: throw new IllegalStateException(); +// } +// } +// +// private static double maxMean(WaitRandomizerType type, int min, int max) +// { +// switch (type) +// { +// case UNIFORM: return min + (max - min) * (6d/10); +// case EXP: case QEXP: return min + (max - min) * (8d/10); +// default: throw new IllegalStateException(); +// } +// } +// +// private static double sampleMean(int samples, int min, int max, WaitRandomizer randomizer) +// { +// double sum = 0; +// int attempts = 1; +// for (int i = 0 ; i < samples ; ++i) +// { +// long wait = randomizer.wait(min, max, attempts = (attempts & 15) + 1); +// Assert.assertTrue(wait >= min); +// Assert.assertTrue(wait <= max); +// sum += wait; +// } +// double mean = sum / samples; +// Assert.assertTrue(mean >= min); +// Assert.assertTrue(mean <= max); +// return mean; +// } +// } +// +// private static Consumer assertWait( +// long min, long max, long onFailure, +// Class selectorClass, +// double selectorPercentile, +// long selectorConst, +// Class modifierClass, +// double modifierVal +// ) +// { +// return Wait -> { +// Assert.assertEquals(min, Wait.min); +// Assert.assertEquals(max, Wait.max); +// Assert.assertEquals(onFailure, Wait.onFailure); +// Assert.assertSame(selectorClass, Wait.selector.getClass()); +// if (selectorClass == selectors.constant(0).getClass()) +// { +// LatencySupplier fail = v -> { throw new UnsupportedOperationException(); }; +// Assert.assertEquals(selectorConst, Wait.selector.select(fail, fail)); +// } +// else +// { +// AtomicReference percentile = new AtomicReference<>(); +// LatencySource set = v -> { percentile.set(v); return 0; }; +// Wait.selector.select(set, set); +// Assert.assertNotNull(percentile.get()); +// Assert.assertEquals(selectorPercentile, percentile.get(), 0.00001); +// } +// Assert.assertSame(modifierClass, Wait.modifier.getClass()); +// Assert.assertEquals(1000000L * modifierVal, Wait.modifier.modify(1000000, 1), 0.00001); +// }; +// } +// +// private static void assertParseFailure(String spec) +// { +// +// try +// { +// Wait Wait = parseWait(spec, 0, 0, 0); +// Assert.fail("expected parse failure, but got " + Wait); +// } +// catch (IllegalArgumentException e) +// { +// // expected +// } +// } +// +// @Test +// public void strategyParseTest() +// { +// for (WaitParseValidator min : VALIDATE.stream().filter(v -> v.isMin).toArray(WaitParseValidator[]::new)) +// { +// for (WaitParseValidator max : VALIDATE.stream().filter(v -> !v.isMin).toArray(WaitParseValidator[]::new)) +// { +// for (WaitParseValidator minDelta : VALIDATE.stream().filter(v -> v.isMin).toArray(WaitParseValidator[]::new)) +// { +// for (WaitRandomizerParseValidator random : VALIDATE_RANDOMIZER) +// { +// { +// ParsedStrategy parsed = parseStrategy("min=" + min.spec + ",max=" + max.spec + ",delta=" + minDelta.spec + ",random=" + random.spec, DEFAULT); +// Assert.assertEquals(parsed.min, min.spec); +// min.validate(parsed.strategy.min); +// Assert.assertEquals(parsed.max, max.spec); +// max.validate(parsed.strategy.max); +// Assert.assertEquals(parsed.spread, minDelta.spec); +// minDelta.validate(parsed.strategy.spread); +// Assert.assertEquals(parsed.waitRandomizer, random.spec); +// random.validate(parsed.strategy.waitRandomizer); +// } +// ParsedStrategy parsed = parseStrategy("random=" + random.spec, DEFAULT); +// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); +// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); +// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); +// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); +// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); +// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); +// Assert.assertEquals(parsed.waitRandomizer, random.spec); +// random.validate(parsed.strategy.waitRandomizer); +// } +// ParsedStrategy parsed = parseStrategy("delta=" + minDelta.spec, DEFAULT); +// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); +// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); +// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); +// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); +// Assert.assertEquals(parsed.spread, minDelta.spec); +// minDelta.validate(parsed.strategy.spread); +// } +// ParsedStrategy parsed = parseStrategy("max=" + max.spec, DEFAULT); +// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); +// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); +// Assert.assertEquals(parsed.max, max.spec); +// max.validate(parsed.strategy.max); +// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); +// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); +// } +// ParsedStrategy parsed = parseStrategy("min=" + min.spec, DEFAULT); +// Assert.assertEquals(parsed.min, min.spec); +// min.validate(parsed.strategy.min); +// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); +// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); +// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); +// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); +// } +// } +// +// @Test +// public void testParseRoundTrip() +// { +// LatencySupplierFactory selectorFactory = new LatencySupplierFactory() +// { +// LatencySupplierFactory delegate = TimeoutStrategy.selectors; +// public LatencySelector constant(long latency) { return selector(delegate.constant(latency), String.format("%dms", latency)); } +// public LatencySelector read(double percentile) { return selector(delegate.read(percentile), String.format("p%d(r)", (int) (percentile * 100))); } +// public LatencySelector write(double percentile) { return selector(delegate.write(percentile), String.format("p%d(w)", (int) (percentile * 100))); } +// public LatencySelector maxReadWrite(double percentile) { return selector(delegate.maxReadWrite(percentile), String.format("p%d(rw)", (int) percentile * 100)); } +// +// private LatencySelector selector(LatencySelector selector, String str) { +// return new LatencySelector() +// { +// public long select(LatencySupplier read, LatencySupplier write) +// { +// return selector.select(read, write); +// } +// +// public String toString() +// { +// return str; +// } +// }; +// } +// }; +// +// LatencyModifierFactory modifierFactory = new LatencyModifierFactory() +// { +// LatencyModifierFactory delegate = modifiers; +// public LatencyModifier identity() { return modifier(delegate.identity(), ""); } +// public LatencyModifier multiply(double constant) { return modifier(delegate.multiply(constant), String.format(" * %.2f", constant)); } +// public LatencyModifier multiplyByAttempts(double multiply) { return modifier(delegate.multiplyByAttempts(multiply), String.format(" * %.2f * attempts", multiply)); } +// public LatencyModifier multiplyByAttemptsExp(double base) { return modifier(delegate.multiplyByAttemptsExp(base), String.format(" * %.2f ^ attempts", base)); } +// +// private LatencyModifier modifier(LatencyModifier modifier, String str) { +// return new LatencyModifier() +// { +// @Inline +// public long modify(long latency, int attempts) +// { +// return modifier.modify(latency, attempts); +// } +// +// public String toString() +// { +// return str; +// } +// }; +// } +// }; +// +// LatencyModifier[] latencyModifiers = new LatencyModifier[]{ +// modifierFactory.multiply(0.5), +// modifierFactory.multiplyByAttempts(0.5), +// modifierFactory.multiplyByAttemptsExp(0.5) +// }; +// +// LatencySelector[] latencySelectors = new LatencySelector[]{ +// selectorFactory.read(0.5), +// selectorFactory.write(0.5), +// selectorFactory.maxReadWrite(0.99) +// }; +// +// for (boolean min : new boolean[] { true, false}) +// { +// String left = min ? "10ms <= " : ""; +// for (boolean max : new boolean[] { true, false}) +// { +// String right = max ? " <= 10ms" : ""; +// +// for (LatencySelector selector : latencySelectors) +// { +// for (LatencyModifier modifier : latencyModifiers) +// { +// String mid = String.format("%s%s", selector, modifier); +// String input = left + mid + right; +// Wait Wait = parseWait(input, 0, MAX, MAX, selectorFactory, modifierFactory); +// Assert.assertTrue(String.format("Wait: %d" , Wait.min), !min || Wait.min == 10000); +// Assert.assertTrue(String.format("Wait: %d" , Wait.max), !max || Wait.max == 10000); +// Assert.assertEquals(selector.toString(), Wait.selector.toString()); +// Assert.assertEquals(modifier.toString(), Wait.modifier.toString()); +// } +// } +// } +// } +// } +// +// @Test +// public void WaitParseTest() +// { +// VALIDATE.forEach(v -> v.validate(parseWait(v.spec, 0, MAX, v.isMin ? 0 : MAX))); +// } +// +// @Test +// public void waitRandomizerParseTest() +// { +// VALIDATE_RANDOMIZER.forEach(v -> v.validate(parseWaitRandomizer(v.spec))); +// } +// +// @Test +// public void waitRandomizerSampleTest() +// { +// waitRandomizerSampleTest(2); +// } +// +// private void waitRandomizerSampleTest(int count) +// { +// while (count-- > 0) +// { +// long seed = ThreadLocalRandom.current().nextLong(); +// logger.info("Seed {}", seed); +// for (WaitRandomizerType type : WaitRandomizerType.values()) +// { +// WaitRandomizerOutputValidator.validate(type, seed, 100, 1000000); +// } +// } +// } +// +// @Test +// public void WaitParseFailureTest() +// { +// assertParseFailure("10ms <= p95(r) <= 5ms"); +// assertParseFailure("10 <= p95(r)"); +// assertParseFailure("10 <= 20 <= 30"); +// assertParseFailure("p95(r) < 5"); +// assertParseFailure("p95(x)"); +// assertParseFailure("p95()"); +// assertParseFailure("p95"); +// assertParseFailure("p50(rw)+0.66"); +// } +// +// @Test +// public void testBackoffTime() +// { +// RetryStrategy strategy = parseStrategy("min=0ms,max=100ms,random=uniform", DEFAULT).strategy; +// double total = 0; +// int count = 100000; +// for (int i = 0 ; i < count ; ++i) +// { +// long now = System.nanoTime(); +// long waitUntil = strategy.computeWaitUntil(1); +// long waitLength = Math.max(waitUntil - now, 0); +// total += waitLength; +// } +// Assert.assertTrue(Math.abs(TimeUnit.MILLISECONDS.toNanos(50) - (total / count)) < TimeUnit.MILLISECONDS.toNanos(1L)); +// } +// +// @Test +// public void testBackoffTimeElapsed() +// { +// ContentionStrategy strategy = ContentionStrategy.parseStrategy("min=0ms,max=10ms,random=uniform").strategy; +// double total = 0; +// int count = 1000; +// for (int i = 0 ; i < count ; ++i) +// { +// long start = System.nanoTime(); +// strategy.doWaitForContention(Long.MAX_VALUE, 1, null, null, null, null); +// long end = System.nanoTime(); +// total += end - start; +// } +// // make sure we have slept at least 4ms on average, given a mean wait time of 5ms +// double avg = total / count; +// double nanos = avg - TimeUnit.MILLISECONDS.toNanos(4); +// Assert.assertTrue(nanos > 0); +// } +//} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java index 7cc2d081dc35..491f193faca3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordMessageSinkTest.java @@ -21,9 +21,13 @@ import org.junit.BeforeClass; import org.junit.Test; +import accord.api.TopologySorter; +import accord.api.TopologySorter.StaticSorter; +import accord.impl.RequestCallbacks; import accord.messages.ReadData; import accord.messages.ReadData.CommitOrReadNack; import accord.topology.TopologyUtils; +import org.apache.cassandra.service.accord.api.AccordTimeService; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -56,10 +60,10 @@ public class AccordMessageSinkTest private static final Node.Id node = new Node.Id(1); private static final AccordEndpointMapper mapping = SimpleAccordEndpointMapper.INSTANCE; private static final Topology topology = TopologyUtils.initialTopology(new Node.Id[] { node}, Ranges.of(IntKey.range(0, 100)), 1); - private static final Topologies topologies = new Topologies.Single((a, b, ignore) -> 0, topology); + private static final Topologies topologies = new Topologies.Single((TopologySorter) (StaticSorter)(a, b, ignore) -> 0, topology); private static final MessageDelivery messaging = Mockito.mock(MessageDelivery.class); - private static final AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging, mapping); + private static final AccordMessageSink sink = new AccordMessageSink(Mockito.mock(Agent.class), messaging, mapping, new RequestCallbacks(new AccordTimeService())); @BeforeClass public static void setup() diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index c75690f2e2cc..a40200a6a62e 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -50,7 +50,7 @@ import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeCommandStoreService; -import accord.local.NodeTimeService; +import accord.local.TimeService; import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; @@ -368,12 +368,14 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS Node.Id node = new Id(1); NodeCommandStoreService time = new NodeCommandStoreService() { - private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); + private ToLongFunction elapsed = TimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); @Override public Id id() { return node;} @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } + @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } + @Override public Timestamp uniqueNow() { return uniqueNow(Timestamp.NONE); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } @Override public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } }; @@ -390,12 +392,13 @@ public static AccordCommandStore createAccordCommandStore( { NodeCommandStoreService time = new NodeCommandStoreService() { - private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); + private ToLongFunction elapsed = TimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } @Override public Id id() { return node;} @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } + @Override public Timestamp uniqueNow() { return uniqueNow(Timestamp.NONE); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } @Override public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index f5faef62c93f..8344c09dc62f 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -54,7 +54,7 @@ import accord.impl.SizeOfIntersectionSorter; import accord.impl.TestAgent; import accord.local.Node; -import accord.local.NodeTimeService; +import accord.local.TimeService; import accord.primitives.Ranges; import accord.topology.Topology; import accord.topology.TopologyManager; @@ -666,7 +666,7 @@ private class Instance this.token = token; this.epoch = epoch; // TODO (review): Should there be a real scheduler here? Is it possible to adapt the Scheduler interface to scheduler used in this test? - this.topology = new TopologyManager(SizeOfIntersectionSorter.SUPPLIER, new TestAgent.RethrowAgent(), id, Scheduler.NEVER_RUN_SCHEDULED, NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MILLISECONDS, globalExecutor::currentTimeMillis), LocalConfig.DEFAULT); + this.topology = new TopologyManager(SizeOfIntersectionSorter.SUPPLIER, new TestAgent.RethrowAgent(), id, Scheduler.NEVER_RUN_SCHEDULED, TimeService.ofNonMonotonic(globalExecutor::currentTimeMillis, TimeUnit.MILLISECONDS), LocalConfig.DEFAULT); AccordConfigurationService.DiskStateManager instance = MockDiskStateManager.instance; config = new AccordConfigurationService(node, messagingService, failureDetector, instance, scheduler); config.registerListener(new ConfigurationService.Listener() diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 72581b0cf211..7da1a51e13de 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -41,7 +41,7 @@ import accord.local.DurableBefore; import accord.local.Node; import accord.local.NodeCommandStoreService; -import accord.local.NodeTimeService; +import accord.local.TimeService; import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; @@ -118,10 +118,16 @@ public SimulatedAccordCommandStore(RandomSource rs) this.nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.currentNullable().myNodeId()); this.storeService = new NodeCommandStoreService() { - private final ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.NANOSECONDS, this::now); + private final ToLongFunction elapsed = TimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.NANOSECONDS, this::now); @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } + @Override + public Timestamp uniqueNow() + { + return uniqueNow(Timestamp.NONE); + } + @Override public Node.Id id() { diff --git a/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java b/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java index 022543bb3a8e..f2900a3595c6 100644 --- a/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/repair/RequiredResponseTrackerTest.java @@ -27,6 +27,7 @@ import org.junit.Test; import accord.api.TopologySorter; +import accord.api.TopologySorter.StaticSorter; import accord.coordinate.tracking.RequestStatus; import accord.local.Node; import accord.topology.Topologies; @@ -58,7 +59,7 @@ public class RequiredResponseTrackerTest private static final Location LOCATION = new Location("DC1", "RACK1"); private static final List> RANGES = ImmutableList.of(range(-100, 0), range(0, 100), range(100, -100)); - private static final TopologySorter TOPOLOGY_SORTER = (node1, node2, shards) -> node1.compareTo(node2); + private static final TopologySorter TOPOLOGY_SORTER = (StaticSorter)(node1, node2, shards) -> node1.compareTo(node2); @BeforeClass public static void beforeClass() throws Throwable diff --git a/test/unit/org/apache/cassandra/service/paxos/ContentionStrategyTest.java b/test/unit/org/apache/cassandra/service/paxos/ContentionStrategyTest.java deleted file mode 100644 index 8b67c425b1d1..000000000000 --- a/test/unit/org/apache/cassandra/service/paxos/ContentionStrategyTest.java +++ /dev/null @@ -1,466 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.cassandra.service.paxos; - -import java.util.List; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiFunction; -import java.util.function.Consumer; -import java.util.function.DoubleSupplier; -import java.util.function.LongBinaryOperator; - -import com.google.common.collect.ImmutableList; -import org.junit.Assert; -import org.junit.Test; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import net.nicoulaj.compilecommand.annotations.Inline; -import org.apache.cassandra.config.DatabaseDescriptor; - -import static org.apache.cassandra.service.paxos.ContentionStrategy.*; -import static org.apache.cassandra.service.paxos.ContentionStrategy.WaitRandomizerFactory.*; -import static org.apache.cassandra.service.paxos.ContentionStrategyTest.WaitRandomizerType.*; - -public class ContentionStrategyTest -{ - private static final Logger logger = LoggerFactory.getLogger(ContentionStrategyTest.class); - - static - { - DatabaseDescriptor.daemonInitialization(); - } - - private static final long MAX = maxQueryTimeoutMicros()/2; - - private static final WaitParseValidator DEFAULT_WAIT_RANDOMIZER_VALIDATOR = new WaitParseValidator(defaultWaitRandomizer(), QEXP, 1.5); - private static final BoundParseValidator DEFAULT_MIN_VALIDATOR = new BoundParseValidator(defaultMinWait(), true, assertBound(0, MAX, 0, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.66)); - private static final BoundParseValidator DEFAULT_MAX_VALIDATOR = new BoundParseValidator(defaultMaxWait(), false, assertBound(10000, 100000, 100000, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)); - private static final BoundParseValidator DEFAULT_MIN_DELTA_VALIDATOR = new BoundParseValidator(defaultMinDelta(), true, assertBound(5000, MAX, 5000, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.5)); - - private static List VALIDATE = ImmutableList.of( - new BoundParseValidator("p95(rw)", false, assertBound(0, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.identity().getClass(), 1)), - new BoundParseValidator("5ms<=p50(rw)*0.66", false, assertBound(5000, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0).getClass(), 0.66)), - new BoundParseValidator("5us <= p50(r)*1.66*attempts", true, assertBound(5, MAX, 5, selectors.read(0f).getClass(), 0.50, 0, modifiers.multiplyByAttempts(0f).getClass(), 1.66)), - new BoundParseValidator("0<=p50(w)*0.66^attempts", true, assertBound(0, MAX, 0, selectors.write(0f).getClass(), 0.50, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 0.66)), - new BoundParseValidator("125us", true, assertBound(125, 125, 125, selectors.constant(0).getClass(), 0.0f, 125, modifiers.identity().getClass(), 1)), - new BoundParseValidator("5us <= p95(r)*1.8^attempts <= 100us", true, assertBound(5, 100, 5, selectors.read(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)), - DEFAULT_MIN_VALIDATOR, DEFAULT_MAX_VALIDATOR, DEFAULT_MIN_DELTA_VALIDATOR - ); - - private static List VALIDATE_RANDOMIZER = ImmutableList.of( - new WaitParseValidator("quantizedexponential(0.5)", QEXP, 0.5), - new WaitParseValidator("exponential(2.5)", EXP, 2.5), - new WaitParseValidator("exp(10)", EXP, 10), - new WaitParseValidator("uniform", UNIFORM, 0), - DEFAULT_WAIT_RANDOMIZER_VALIDATOR - ); - - static class BoundParseValidator - { - final String spec; - final boolean isMin; - final Consumer validator; - - BoundParseValidator(String spec, boolean isMin, Consumer validator) - { - this.spec = spec; - this.isMin = isMin; - this.validator = validator; - } - - void validate(Bound bound) - { - validator.accept(bound); - } - } - - enum WaitRandomizerType - { - UNIFORM(Uniform.class, (p, f) -> f.uniform()), - EXP(Exponential.class, (p, f) -> f.exponential(p)), - QEXP(QuantizedExponential.class, (p, f) -> f.quantizedExponential(p)); - - final Class clazz; - final BiFunction getter; - - WaitRandomizerType(Class clazz, BiFunction getter) - { - this.clazz = clazz; - this.getter = getter; - } - } - - - static class WaitParseValidator - { - final String spec; - final WaitRandomizerType type; - final double power; - - WaitParseValidator(String spec, WaitRandomizerType type, double power) - { - this.spec = spec; - this.type = type; - this.power = power; - } - - void validate(WaitRandomizer randomizer) - { - Assert.assertSame(type.clazz, randomizer.getClass()); - if (AbstractExponential.class.isAssignableFrom(type.clazz)) - Assert.assertEquals(power, ((AbstractExponential) randomizer).power, 0.00001); - } - } - - private static class WaitRandomizerOutputValidator - { - static void validate(WaitRandomizerType type, long seed, int trials, int samplesPerTrial) - { - Random random = new Random(seed); - WaitRandomizer randomizer = type.getter.apply(2d, new WaitRandomizerFactory() - { - @Override public LongBinaryOperator uniformLongSupplier() { return (min, max) -> min + random.nextInt((int) (max - min)); } - @Override public DoubleSupplier uniformDoubleSupplier() { return random::nextDouble; } - }); - - for (int i = 0 ; i < trials ; ++i) - { - int min = random.nextInt(1 << 20); - int max = min + 1024 + random.nextInt(1 << 20); - double minMean = minMean(type, min, max); - double maxMean = maxMean(type, min, max); - double sampleMean = sampleMean(samplesPerTrial, min, max, randomizer); - Assert.assertTrue(minMean <= sampleMean); - Assert.assertTrue(maxMean >= sampleMean); - } - } - - private static double minMean(WaitRandomizerType type, int min, int max) - { - switch (type) - { - case UNIFORM: return min + (max - min) * (4d/10); - case EXP: case QEXP: return min + (max - min) * (6d/10); - default: throw new IllegalStateException(); - } - } - - private static double maxMean(WaitRandomizerType type, int min, int max) - { - switch (type) - { - case UNIFORM: return min + (max - min) * (6d/10); - case EXP: case QEXP: return min + (max - min) * (8d/10); - default: throw new IllegalStateException(); - } - } - - private static double sampleMean(int samples, int min, int max, WaitRandomizer randomizer) - { - double sum = 0; - int attempts = 1; - for (int i = 0 ; i < samples ; ++i) - { - long wait = randomizer.wait(min, max, attempts = (attempts & 15) + 1); - Assert.assertTrue(wait >= min); - Assert.assertTrue(wait <= max); - sum += wait; - } - double mean = sum / samples; - Assert.assertTrue(mean >= min); - Assert.assertTrue(mean <= max); - return mean; - } - } - - private static Consumer assertBound( - long min, long max, long onFailure, - Class selectorClass, - double selectorPercentile, - long selectorConst, - Class modifierClass, - double modifierVal - ) - { - return bound -> { - Assert.assertEquals(min, bound.min); - Assert.assertEquals(max, bound.max); - Assert.assertEquals(onFailure, bound.onFailure); - Assert.assertSame(selectorClass, bound.selector.getClass()); - if (selectorClass == selectors.constant(0).getClass()) - { - LatencySupplier fail = v -> { throw new UnsupportedOperationException(); }; - Assert.assertEquals(selectorConst, bound.selector.select(fail, fail)); - } - else - { - AtomicReference percentile = new AtomicReference<>(); - LatencySupplier set = v -> { percentile.set(v); return 0; }; - bound.selector.select(set, set); - Assert.assertNotNull(percentile.get()); - Assert.assertEquals(selectorPercentile, percentile.get(), 0.00001); - } - Assert.assertSame(modifierClass, bound.modifier.getClass()); - Assert.assertEquals(1000000L * modifierVal, bound.modifier.modify(1000000, 1), 0.00001); - }; - } - - private static void assertParseFailure(String spec) - { - - try - { - Bound bound = parseBound(spec, false); - Assert.fail("expected parse failure, but got " + bound); - } - catch (IllegalArgumentException e) - { - // expected - } - } - - @Test - public void strategyParseTest() - { - for (BoundParseValidator min : VALIDATE.stream().filter(v -> v.isMin).toArray(BoundParseValidator[]::new)) - { - for (BoundParseValidator max : VALIDATE.stream().filter(v -> !v.isMin).toArray(BoundParseValidator[]::new)) - { - for (BoundParseValidator minDelta : VALIDATE.stream().filter(v -> v.isMin).toArray(BoundParseValidator[]::new)) - { - for (WaitParseValidator random : VALIDATE_RANDOMIZER) - { - { - ParsedStrategy parsed = parseStrategy("min=" + min.spec + ",max=" + max.spec + ",delta=" + minDelta.spec + ",random=" + random.spec); - Assert.assertEquals(parsed.min, min.spec); - min.validate(parsed.strategy.min); - Assert.assertEquals(parsed.max, max.spec); - max.validate(parsed.strategy.max); - Assert.assertEquals(parsed.minDelta, minDelta.spec); - minDelta.validate(parsed.strategy.minDelta); - Assert.assertEquals(parsed.waitRandomizer, random.spec); - random.validate(parsed.strategy.waitRandomizer); - } - ParsedStrategy parsed = parseStrategy("random=" + random.spec); - Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); - DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); - Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); - DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); - Assert.assertEquals(parsed.minDelta, DEFAULT_MIN_DELTA_VALIDATOR.spec); - DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.minDelta); - Assert.assertEquals(parsed.waitRandomizer, random.spec); - random.validate(parsed.strategy.waitRandomizer); - } - ParsedStrategy parsed = parseStrategy("delta=" + minDelta.spec); - Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); - DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); - Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); - DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); - Assert.assertEquals(parsed.minDelta, minDelta.spec); - minDelta.validate(parsed.strategy.minDelta); - } - ParsedStrategy parsed = parseStrategy("max=" + max.spec); - Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); - DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); - Assert.assertEquals(parsed.max, max.spec); - max.validate(parsed.strategy.max); - Assert.assertEquals(parsed.minDelta, DEFAULT_MIN_DELTA_VALIDATOR.spec); - DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.minDelta); - } - ParsedStrategy parsed = parseStrategy("min=" + min.spec); - Assert.assertEquals(parsed.min, min.spec); - min.validate(parsed.strategy.min); - Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); - DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); - Assert.assertEquals(parsed.minDelta, DEFAULT_MIN_DELTA_VALIDATOR.spec); - DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.minDelta); - } - } - - @Test - public void testParseRoundTrip() - { - LatencySelectorFactory selectorFactory = new LatencySelectorFactory() - { - LatencySelectorFactory delegate = ContentionStrategy.selectors; - public LatencySelector constant(long latency) { return selector(delegate.constant(latency), String.format("%dms", latency)); } - public LatencySelector read(double percentile) { return selector(delegate.read(percentile), String.format("p%d(r)", (int) (percentile * 100))); } - public LatencySelector write(double percentile) { return selector(delegate.write(percentile), String.format("p%d(w)", (int) (percentile * 100))); } - public LatencySelector maxReadWrite(double percentile) { return selector(delegate.maxReadWrite(percentile), String.format("p%d(rw)", (int) percentile * 100)); } - - private LatencySelector selector(LatencySelector selector, String str) { - return new LatencySelector() - { - public long select(LatencySupplier read, LatencySupplier write) - { - return selector.select(read, write); - } - - public String toString() - { - return str; - } - }; - } - }; - - LatencyModifierFactory modifierFactory = new LatencyModifierFactory() - { - LatencyModifierFactory delegate = ContentionStrategy.modifiers; - public LatencyModifier identity() { return modifier(delegate.identity(), ""); } - public LatencyModifier multiply(double constant) { return modifier(delegate.multiply(constant), String.format(" * %.2f", constant)); } - public LatencyModifier multiplyByAttempts(double multiply) { return modifier(delegate.multiplyByAttempts(multiply), String.format(" * %.2f * attempts", multiply)); } - public LatencyModifier multiplyByAttemptsExp(double base) { return modifier(delegate.multiplyByAttemptsExp(base), String.format(" * %.2f ^ attempts", base)); } - - private LatencyModifier modifier(LatencyModifier modifier, String str) { - return new LatencyModifier() - { - @Inline - public long modify(long latency, int attempts) - { - return modifier.modify(latency, attempts); - } - - public String toString() - { - return str; - } - }; - } - }; - - LatencyModifier[] latencyModifiers = new LatencyModifier[]{ - modifierFactory.multiply(0.5), - modifierFactory.multiplyByAttempts(0.5), - modifierFactory.multiplyByAttemptsExp(0.5) - }; - - LatencySelector[] latencySelectors = new LatencySelector[]{ - selectorFactory.read(0.5), - selectorFactory.write(0.5), - selectorFactory.maxReadWrite(0.99) - }; - - for (boolean min : new boolean[] { true, false}) - { - String left = min ? "10ms <= " : ""; - for (boolean max : new boolean[] { true, false}) - { - String right = max ? " <= 10ms" : ""; - - for (LatencySelector selector : latencySelectors) - { - for (LatencyModifier modifier : latencyModifiers) - { - String mid = String.format("%s%s", selector, modifier); - String input = left + mid + right; - Bound bound = parseBound(input, false, selectorFactory, modifierFactory); - Assert.assertTrue(String.format("Bound: %d" , bound.min), !min || bound.min == 10000); - Assert.assertTrue(String.format("Bound: %d" , bound.max), !max || bound.max == 10000); - Assert.assertEquals(selector.toString(), bound.selector.toString()); - Assert.assertEquals(modifier.toString(), bound.modifier.toString()); - } - } - } - } - } - - @Test - public void boundParseTest() - { - VALIDATE.forEach(v -> v.validate(parseBound(v.spec, v.isMin))); - } - - @Test - public void waitRandomizerParseTest() - { - VALIDATE_RANDOMIZER.forEach(v -> v.validate(parseWaitRandomizer(v.spec))); - } - - @Test - public void waitRandomizerSampleTest() - { - waitRandomizerSampleTest(2); - } - - private void waitRandomizerSampleTest(int count) - { - while (count-- > 0) - { - long seed = ThreadLocalRandom.current().nextLong(); - logger.info("Seed {}", seed); - for (WaitRandomizerType type : WaitRandomizerType.values()) - { - WaitRandomizerOutputValidator.validate(type, seed, 100, 1000000); - } - } - } - - @Test - public void boundParseFailureTest() - { - assertParseFailure("10ms <= p95(r) <= 5ms"); - assertParseFailure("10 <= p95(r)"); - assertParseFailure("10 <= 20 <= 30"); - assertParseFailure("p95(r) < 5"); - assertParseFailure("p95(x)"); - assertParseFailure("p95()"); - assertParseFailure("p95"); - assertParseFailure("p50(rw)+0.66"); - } - - @Test - public void testBackoffTime() - { - ContentionStrategy strategy = parseStrategy("min=0ms,max=100ms,random=uniform").strategy; - double total = 0; - int count = 100000; - for (int i = 0 ; i < count ; ++i) - { - long now = System.nanoTime(); - long waitUntil = strategy.computeWaitUntilForContention(1, null, null, null, null); - long waitLength = Math.max(waitUntil - now, 0); - total += waitLength; - } - Assert.assertTrue(Math.abs(TimeUnit.MILLISECONDS.toNanos(50) - (total / count)) < TimeUnit.MILLISECONDS.toNanos(1L)); - } - - @Test - public void testBackoffTimeElapsed() - { - ContentionStrategy strategy = parseStrategy("min=0ms,max=10ms,random=uniform").strategy; - double total = 0; - int count = 1000; - for (int i = 0 ; i < count ; ++i) - { - long start = System.nanoTime(); - strategy.doWaitForContention(Long.MAX_VALUE, 1, null, null, null, null); - long end = System.nanoTime(); - total += end - start; - } - // make sure we have slept at least 4ms on average, given a mean wait time of 5ms - double avg = total / count; - double nanos = avg - TimeUnit.MILLISECONDS.toNanos(4); - Assert.assertTrue(nanos > 0); - } -} From aa6ebc140f20482ed4c5fe871594798f12db2f0c Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 10 Oct 2024 10:19:50 +0100 Subject: [PATCH 164/340] Halve cache memory consumption by not retaining 'original' to diff; dedup RoutingKey tableId; avoid calculating rejectsFastPath in more cases; delay retry of fetchMajorityDeps; fix SetShardDurable marking shards durable --- modules/accord | 2 +- .../org/apache/cassandra/schema/TableId.java | 6 +++ .../service/accord/AccordCachingState.java | 23 ++++------ .../service/accord/AccordCommandStore.java | 14 +++--- .../service/accord/AccordKeyspace.java | 44 +++++++++++++------ .../accord/AccordSafeCommandStore.java | 12 ++--- .../service/accord/AccordStateCache.java | 10 ++--- .../service/accord/api/AccordRoutingKey.java | 6 +-- .../service/accord/api/PartitionKey.java | 4 +- .../service/accord/async/AsyncOperation.java | 22 +++++----- .../CompactionAccordIteratorsTest.java | 2 +- .../service/accord/AccordStateCacheTest.java | 18 ++++---- .../service/accord/async/AsyncLoaderTest.java | 16 +++---- 13 files changed, 96 insertions(+), 83 deletions(-) diff --git a/modules/accord b/modules/accord index 841e139bc8a9..8bce46bee749 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 841e139bc8a974ac674ce8eae847bd52255ca544 +Subproject commit 8bce46bee7497262a8c16c6b779c08558968604f diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index 302d7db6bf13..03fd3dc49028 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -200,6 +200,12 @@ public static TableId deserialize(V src, ValueAccessor accessor, int offs return new TableId(new UUID(accessor.getLong(src, offset), accessor.getLong(src, offset + TypeSizes.LONG_SIZE))); } + public TableId tryIntern() + { + TableMetadata metadata = Schema.instance.getTableMetadata(this); + return metadata == null ? this : metadata.id; + } + @Override public int compareTo(TableId o) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 50a48be1fb79..b8bb61e00c3a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; import java.util.concurrent.Callable; -import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.ToLongFunction; @@ -208,10 +207,10 @@ public void set(V value) * has completed, the state save will have either completed or failed. */ @VisibleForTesting - public void save(ExecutorPlus executor, BiFunction saveFunction) + public void save(ExecutorPlus executor, Function saveFunction) { @SuppressWarnings("unchecked") - State savingOrLoaded = state.save((BiFunction) saveFunction); + State savingOrLoaded = state.save((Function) saveFunction); if (savingOrLoaded.status() == SAVING) executor.submit(savingOrLoaded.saving()); state(savingOrLoaded); @@ -319,7 +318,7 @@ default State set(V value) throw illegalState(this, "set(value)"); } - default State save(BiFunction saveFunction) + default State save(Function saveFunction) { throw illegalState(this, "save(saveFunction)"); } @@ -447,7 +446,7 @@ public V get() @Override public State set(V value) { - return value == original ? this : new Modified<>(original, value); + return value == original ? this : new Modified<>(value); } @Override @@ -499,12 +498,10 @@ public Evicted evict() static class Modified implements State { - final V original; V current; - Modified(V original, V current) + Modified(V current) { - this.original = original; this.current = current; } @@ -523,17 +520,14 @@ public V get() @Override public State set(V value) { - if (value == original) // change reverted - return new Loaded<>(original); - current = value; return this; } @Override - public State save(BiFunction saveFunction) + public State save(Function saveFunction) { - Runnable runnable = saveFunction.apply(original, current); + Runnable runnable = saveFunction.apply(current); if (null == runnable) // null mutation -> null Runnable -> no change on disk return new Loaded<>(current); else @@ -543,8 +537,7 @@ public State save(BiFunction saveFunction) @Override public long estimateOnHeapSize(ToLongFunction estimateFunction) { - return (null == original ? 0 : estimateFunction.applyAsLong(original)) - + (null == current ? 0 : estimateFunction.applyAsLong(current)); + return (null == current ? 0 : estimateFunction.applyAsLong(current)); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index bba5a70826f9..a7a89698dd6e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -288,12 +288,12 @@ public AccordStateCache.Instance commands, - NavigableMap timestampsForKeys, - NavigableMap commandsForKeys, + Map timestampsForKeys, + Map commandsForKeys, @Nullable AccordSafeCommandsForRanges commandsForRanges) { checkState(current == null); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 7a4760b7f633..cf36e148e936 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -805,6 +805,18 @@ private static void addCellIfModified(ColumnMetadata column, Function void addCell(ColumnMetadata column, Function get, SerializeFunction serialize, Row.Builder builder, long timestampMicros, int nowInSeconds, C current) throws IOException + { + V newValue = get.apply(current); + if (newValue == null) builder.addCell(tombstone(column, timestampMicros, nowInSeconds)); + else builder.addCell(live(column, timestampMicros, serialize.apply(newValue))); + } + + private static void addCell(ColumnMetadata column, Function get, LocalVersionedSerializer serializer, Row.Builder builder, long timestampMicros, int nowInSeconds, C command) throws IOException + { + addCell(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, nowInSeconds, command); + } + private static void addCellIfModified(ColumnMetadata column, Function get, LocalVersionedSerializer serializer, Row.Builder builder, long timestampMicros, int nowInSeconds, C original, C command) throws IOException { addCellIfModified(column, get, v -> serializeOrNull(v, serializer), builder, timestampMicros, nowInSeconds, original, command); @@ -817,29 +829,34 @@ private static > void addEnumCellIfModified addCellIfModified(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, nowInSeconds, original, command); } + private static > void addEnumCell(ColumnMetadata column, Function get, Row.Builder builder, long timestampMicros, int nowInSeconds, C command) throws IOException + { + // TODO: convert to byte arrays + ValueAccessor accessor = ByteBufferAccessor.instance; + addCell(column, get, v -> accessor.valueOf(v.ordinal()), builder, timestampMicros, nowInSeconds, command); + } + public static Mutation getCommandMutation(AccordCommandStore commandStore, AccordSafeCommand liveCommand, long timestampMicros) { - return getCommandMutation(commandStore.id(), liveCommand.original(), liveCommand.current(), timestampMicros); + return getCommandMutation(commandStore.id(), liveCommand.current(), timestampMicros); } - public static Mutation getCommandMutation(int storeId, Command original, Command command, long timestampMicros) + public static Mutation getCommandMutation(int storeId, Command command, long timestampMicros) { if (command.saveStatus() == SaveStatus.Uninitialised) return null; try { - Invariants.checkArgument(original != command); - Row.Builder builder = BTreeRow.unsortedBuilder(); builder.newRow(Clustering.EMPTY); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(timestampMicros, nowInSeconds)); - addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.participants, Command::participants, LocalVersionedSerializers.participants, builder, timestampMicros, nowInSeconds, original, command); - addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); + addEnumCell(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, command); + addCell(CommandsColumns.participants, Command::participants, LocalVersionedSerializers.participants, builder, timestampMicros, nowInSeconds, command); + addEnumCell(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, command); + addCell(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, command); Row row = builder.build(); if (row.columnCount() == 0) @@ -1080,11 +1097,10 @@ public static TokenKey deserializeTokenKeySeparateTable(TableId tableId, ByteBuf return (TokenKey) AccordRoutingKeyByteSource.Serializer.fromComparableBytes(ByteBufferAccessor.instance, tokenBytes, tableId, currentVersion, null); } - public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey original, TimestampsForKey current, long timestampMicros) + public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey current, long timestampMicros) { try { - Invariants.checkArgument(original != current); // TODO: convert to byte arrays ValueAccessor accessor = ByteBufferAccessor.instance; @@ -1093,9 +1109,9 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); LivenessInfo livenessInfo = LivenessInfo.create(timestampMicros, nowInSeconds); builder.addPrimaryKeyLivenessInfo(livenessInfo); - addCellIfModified(TimestampsForKeyColumns.last_executed_timestamp, TimestampsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); - addCellIfModified(TimestampsForKeyColumns.last_executed_micros, TimestampsForKey::rawLastExecutedHlc, accessor::valueOf, builder, timestampMicros, nowInSeconds, original, current); - addCellIfModified(TimestampsForKeyColumns.last_write_timestamp, TimestampsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, current); + addCell(TimestampsForKeyColumns.last_executed_timestamp, TimestampsForKey::lastExecutedTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, current); + addCell(TimestampsForKeyColumns.last_executed_micros, TimestampsForKey::rawLastExecutedHlc, accessor::valueOf, builder, timestampMicros, nowInSeconds, current); + addCell(TimestampsForKeyColumns.last_write_timestamp, TimestampsForKey::lastWriteTimestamp, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, current); Row row = builder.build(); if (row.columnCount() == 0) @@ -1113,7 +1129,7 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey public static Mutation getTimestampsForKeyMutation(AccordCommandStore commandStore, AccordSafeTimestampsForKey liveTimestamps, long timestampMicros) { - return getTimestampsForKeyMutation(commandStore.id(), liveTimestamps.original(), liveTimestamps.current(), timestampMicros); + return getTimestampsForKeyMutation(commandStore.id(), liveTimestamps.current(), timestampMicros); } public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore, TokenKey key) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index f97f8336b7cb..4025049d394d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -53,8 +53,8 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final NavigableMap commandsForKeys; - private final NavigableMap timestampsForKeys; + private final Map commandsForKeys; + private final Map timestampsForKeys; private final @Nullable AccordSafeCommandsForRanges commandsForRanges; private final AccordCommandStore commandStore; private RangesForEpoch ranges; @@ -62,8 +62,8 @@ public class AccordSafeCommandStore extends AbstractSafeCommandStore commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, + Map timestampsForKey, + Map commandsForKey, @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { @@ -80,8 +80,8 @@ private AccordSafeCommandStore(PreLoadContext context, public static AccordSafeCommandStore create(PreLoadContext preLoadContext, Map commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, + Map timestampsForKey, + Map commandsForKey, @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 019b888deda8..ccbc90fdb288 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -240,7 +240,7 @@ public > Instance instance( Class valClass, Function, S> safeRefFactory, Function loadFunction, - BiFunction saveFunction, + Function saveFunction, BiFunction validateFunction, ToLongFunction heapEstimator, AccordCachingState.Factory nodeFactory) @@ -262,7 +262,7 @@ public > Instance instance( Class valClass, Function, S> safeRefFactory, Function loadFunction, - BiFunction saveFunction, + Function saveFunction, BiFunction validateFunction, ToLongFunction heapEstimator) { @@ -287,7 +287,7 @@ public class Instance> implements CacheSiz private final Class keyClass; private final Function, S> safeRefFactory; private Function loadFunction; - private BiFunction saveFunction; + private Function saveFunction; private final BiFunction validateFunction; private final ToLongFunction heapEstimator; private long bytesCached; @@ -303,7 +303,7 @@ public Instance( int index, Class keyClass, Function, S> safeRefFactory, Function loadFunction, - BiFunction saveFunction, + Function saveFunction, BiFunction validateFunction, ToLongFunction heapEstimator, AccordCachingState.Factory nodeFactory) @@ -643,7 +643,7 @@ public void unsafeSetLoadFunction(Function loadFunction) } @VisibleForTesting - public void unsafeSetSaveFunction(BiFunction saveFunction) + public void unsafeSetSaveFunction(Function saveFunction) { this.saveFunction = saveFunction; } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 6d8d2b818453..deec2b21abb9 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -185,7 +185,7 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public SentinelKey deserialize(DataInputPlus in, int version) throws IOException { - TableId table = TableId.deserialize(in); + TableId table = TableId.deserialize(in).tryIntern(); boolean isMin = in.readBoolean(); return new SentinelKey(table, isMin); } @@ -287,14 +287,14 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public TokenKey deserialize(DataInputPlus in, int version) throws IOException { - TableId table = TableId.deserialize(in); + TableId table = TableId.deserialize(in).tryIntern(); Token token = Token.compactSerializer.deserialize(in, getPartitioner(), version); return new TokenKey(table, token); } public TokenKey fromBytes(ByteBuffer bytes, IPartitioner partitioner) { - TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0); + TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0).tryIntern(); bytes.position(tableId.serializedSize()); Token token = Token.compactSerializer.deserialize(bytes, partitioner); return new TokenKey(tableId, token); diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index fc78fe669262..aaa1264ea04e 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -154,7 +154,7 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public PartitionKey deserialize(DataInputPlus in, int version) throws IOException { - TableId tableId = TableId.deserialize(in); + TableId tableId = TableId.deserialize(in).tryIntern(); IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); DecoratedKey key = partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); return new PartitionKey(tableId, key); @@ -162,7 +162,7 @@ public PartitionKey deserialize(DataInputPlus in, int version) throws IOExceptio public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException { - TableId tableId = TableId.deserialize(src, accessor, offset); + TableId tableId = TableId.deserialize(src, accessor, offset).tryIntern(); offset += tableId.serializedSize(); TableMetadata metadata = Schema.instance.getTableMetadata(tableId); int numBytes = accessor.getShort(src, offset); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index cc5bee6d32ca..463350adf8d2 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -18,9 +18,7 @@ package org.apache.cassandra.service.accord.async; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.TreeMap; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; @@ -39,13 +37,13 @@ import accord.primitives.Unseekables; import accord.utils.Invariants; import accord.utils.async.AsyncChains; +import org.agrona.collections.Object2ObjectHashMap; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordSafeCommand; import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordSafeCommandsForKey; import org.apache.cassandra.service.accord.AccordSafeCommandsForRanges; -import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.SavedCommand; import org.apache.cassandra.utils.concurrent.Condition; @@ -71,28 +69,28 @@ private static class LoggingProps static class Context { - final HashMap commands = new HashMap<>(); - final TreeMap timestampsForKey = new TreeMap<>(); - final TreeMap commandsForKey = new TreeMap<>(); + final Object2ObjectHashMap commands = new Object2ObjectHashMap<>(); + final Object2ObjectHashMap timestampsForKey = new Object2ObjectHashMap<>(); + final Object2ObjectHashMap commandsForKey = new Object2ObjectHashMap<>(); @Nullable AccordSafeCommandsForRanges commandsForRanges = null; void releaseResources(AccordCommandStore commandStore) { // TODO (expected): we should destructively iterate to avoid invoking second time in fail; or else read and set to null - commands.values().forEach(commandStore.commandCache()::release); + commands.forEach((k, v) -> commandStore.commandCache().release(v)); commands.clear(); - timestampsForKey.values().forEach(commandStore.timestampsForKeyCache()::release); + timestampsForKey.forEach((k, v) -> commandStore.timestampsForKeyCache().release(v)); timestampsForKey.clear(); - commandsForKey.values().forEach(commandStore.commandsForKeyCache()::release); + commandsForKey.forEach((k, v) -> commandStore.commandsForKeyCache().release(v)); commandsForKey.clear(); } void revertChanges() { - commands.values().forEach(AccordSafeState::revert); - timestampsForKey.values().forEach(AccordSafeState::revert); - commandsForKey.values().forEach(AccordSafeState::revert); + commands.forEach((k, v) -> v.revert()); + timestampsForKey.forEach((k, v) -> v.revert()); + commandsForKey.forEach((k, v) -> v.revert()); if (commandsForRanges != null) commandsForRanges.revert(); } diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index a329be58640c..ca59f202b780 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -522,7 +522,7 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add private static BiConsumer appendDiffToKeyspace(AccordCommandStore commandStore) { return (before, after) -> { - AccordKeyspace.getCommandMutation(commandStore.id(), before, after, commandStore.nextSystemTimestampMicros()).applyUnsafe(); + AccordKeyspace.getCommandMutation(commandStore.id(), after, commandStore.nextSystemTimestampMicros()).applyUnsafe(); }; } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index d82f14305477..6f10d977b211 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -183,7 +183,7 @@ public void testAcquisitionAndRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("1"); @@ -215,9 +215,9 @@ public void testCachingMetricsWithTwoInstances() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance stringInstance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true,String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true,String::length); AccordStateCache.Instance intInstance = - cache.instance(Integer.class, SafeInt.class, SafeInt::new, key -> key, (original, current) -> null, (k, v) -> true,ignored -> Integer.BYTES); + cache.instance(Integer.class, SafeInt.class, SafeInt::new, key -> key, (current) -> null, (k, v) -> true,ignored -> Integer.BYTES); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = stringInstance.acquire("1"); @@ -255,7 +255,7 @@ public void testRotation() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 5, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[3]; @@ -295,7 +295,7 @@ public void testEvictionOnAcquire() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 5, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -341,7 +341,7 @@ public void testEvictionOnRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 4, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString[] items = new SafeString[5]; @@ -380,7 +380,7 @@ public void testMultiAcquireRelease() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, DEFAULT_NODE_SIZE * 4, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString1 = instance.acquire("0"); @@ -411,7 +411,7 @@ public void evictionBlockedOnSaving() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, nodeSize(1) * 3 + nodeSize(3), cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString item = instance.acquire(Integer.toString(0)); @@ -450,7 +450,7 @@ public void testUpdates() ManualExecutor executor = new ManualExecutor(); AccordStateCache cache = new AccordStateCache(executor, executor, 500, cacheMetrics); AccordStateCache.Instance instance = - cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (original, current) -> null, (k, v) -> true, String::length); + cache.instance(String.class, SafeString.class, SafeString::new, key -> key, (current) -> null, (k, v) -> true, String::length); assertCacheState(cache, 0, 0, 0); SafeString safeString = instance.acquire("1"); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 680777bde8ed..8c893c4a21dd 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -18,7 +18,7 @@ package org.apache.cassandra.service.accord.async; -import java.util.TreeMap; +import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -155,7 +155,7 @@ public void loadTest() timestamps.preExecute(); timestamps.initialize(); - AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, timestamps.current(), commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), timestamps.current(), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), TIMESTAMPS); @@ -203,7 +203,7 @@ public void partialLoadTest() testLoad(executor, safeCommand, notDefined(txnId, txn)); commandCache.release(safeCommand); - AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, new TimestampsForKey(key), commandStore.nextSystemTimestampMicros()).apply(); + AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), new TimestampsForKey(key), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), TIMESTAMPS); @@ -353,7 +353,7 @@ public void inProgressCommandSaveTest() // acquire / release commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); - commandCache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + commandCache.unsafeSetSaveFunction((after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); AccordSafeCommand safeCommand = commandCache.acquire(txnId); testLoad(executor, safeCommand, notDefined(txnId, txn)); @@ -361,7 +361,7 @@ public void inProgressCommandSaveTest() commandCache.release(safeCommand); Assert.assertEquals(AccordCachingState.Status.MODIFIED, commandCache.getUnsafe(txnId).status()); - commandCache.getUnsafe(txnId).save(executor, (before, after) -> () -> {}); + commandCache.getUnsafe(txnId).save(executor, (after) -> () -> {}); Assert.assertEquals(AccordCachingState.Status.SAVING, commandCache.getUnsafe(txnId).status()); // since the command is still saving, the loader shouldn't be able to acquire a reference @@ -402,7 +402,7 @@ public void inProgressTFKSaveTest() inProgressCFKSaveTest(TIMESTAMPS, AccordCommandStore::timestampsForKeyCache, context -> context.timestampsForKey, TimestampsForKey::new, (tfk, c) -> new TimestampsForKey(tfk.key(), c.executeAt(), c.executeAt().hlc(), c.txnId(), c.executeAt())); } - private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) + private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) { AtomicLong clock = new AtomicLong(0); ManualExecutor executor = new ManualExecutor(); @@ -410,7 +410,7 @@ private , C extends AccordStateCa createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); C cache = getter.apply(commandStore); - cache.unsafeSetSaveFunction((before, after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); + cache.unsafeSetSaveFunction((after) -> () -> { throw new AssertionError("nodes expected to be saved manually"); }); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); @@ -424,7 +424,7 @@ private , C extends AccordStateCa cache.release(safe); Assert.assertEquals(AccordCachingState.Status.MODIFIED, cache.getUnsafe(key).status()); - cache.getUnsafe(key).save(executor, (before, after) -> () -> {}); + cache.getUnsafe(key).save(executor, (after) -> () -> {}); Assert.assertEquals(AccordCachingState.Status.SAVING, cache.getUnsafe(key).status()); // since the command is still saving, the loader shouldn't be able to acquire a reference From 25b1bd74b8c7fe59a65a3074f2e9e3e6745811eb Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 10 Oct 2024 12:28:48 +0100 Subject: [PATCH 165/340] Enable and test purging --- modules/accord | 2 +- .../org/apache/cassandra/config/AccordSpec.java | 1 + .../cassandra/config/DatabaseDescriptor.java | 5 +++++ .../service/accord/AccordDataStore.java | 6 ++++-- .../distributed/test/accord/AccordLoadTest.java | 16 +++++++++------- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/modules/accord b/modules/accord index 8bce46bee749..8f11c206ca17 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8bce46bee7497262a8c16c6b779c08558968604f +Subproject commit 8f11c206ca178dffbe67a4b3d7e23288e9545d3a diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 451bfeaa5495..383af67d9242 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -67,6 +67,7 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound gc_delay = new DurationSpec.IntSecondsBound(300); public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(120); public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(10); public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(5, TimeUnit.MINUTES); diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 9a2c680b47f2..1bd8f0f33561 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5338,6 +5338,11 @@ public static void setAccordFastPathUpdateDelaySeconds(long seconds) conf.accord.fast_path_update_delay = new DurationSpec.IntSecondsBound(seconds); } + public static long getAccordGCDelay(TimeUnit unit) + { + return conf.accord.gc_delay.to(unit); + } + public static long getAccordScheduleDurabilityFrequency(TimeUnit unit) { return conf.accord.schedule_durability_frequency.to(unit); diff --git a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java index 185e1068718c..19c30b0f38c8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordDataStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordDataStore.java @@ -34,6 +34,7 @@ import accord.utils.async.AsyncResults; import org.agrona.collections.Object2ObjectHashMap; import org.apache.cassandra.concurrent.ScheduledExecutors; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.Keyspace; @@ -91,7 +92,8 @@ public AsyncResult snapshot(Ranges ranges, TxnId before) // TODO: does thi ColumnFamilyStore cfs = Keyspace.openAndGetStoreIfExists(tableMetadata); // TODO (required): when we can safely map TxnId.hlc() -> local timestamp, consult Memtable timestamps - e.getValue().position = cfs.getCurrentMemtable().getCommitLogLowerBound(); + Memtable memtable = cfs.getCurrentMemtable(); + e.getValue().position = memtable.getCommitLogLowerBound(); } ScheduledExecutors.scheduledTasks.schedule(() -> { @@ -118,7 +120,7 @@ public AsyncResult snapshot(Ranges ranges, TxnId before) // TODO: does thi else result.setSuccess(null); }); - }, 5L, TimeUnit.MINUTES); + }, DatabaseDescriptor.getAccordGCDelay(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); return result; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index d750e8684474..7cdf4ae1c526 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -64,8 +64,10 @@ public class AccordLoadTest extends AccordTestBase public static void setUp() throws IOException { CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); - AccordTestBase.setupCluster(builder -> builder, 3); -// AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config.with(Feature.values())), 3); +// AccordTestBase.setupCluster(builder -> builder, 3); + AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config + .set("accord.schedule_durability_frequency", "5s") + .set("accord.gc_delay", "5s")), 3); } @Ignore @@ -92,11 +94,11 @@ public boolean matches(int i, int i1, IMessage iMessage) ICoordinator coordinator = cluster.coordinator(1); final int repairInterval = Integer.MAX_VALUE; // final int repairInterval = 3000; - final int compactionInterval = Integer.MAX_VALUE; - // final int compactionInterval = 3000; - final int flushInterval = Integer.MAX_VALUE; - // final int flushInterval = 1000; - final int compactionPeriodSeconds = -1; +// final int compactionInterval = Integer.MAX_VALUE; + final int compactionInterval = 3000; +// final int flushInterval = Integer.MAX_VALUE; + final int flushInterval = 1000; + final int compactionPeriodSeconds = 1; final int restartInterval = 150_000_000; final int batchSizeLimit = 1000; final long batchTime = TimeUnit.SECONDS.toNanos(10); From b8f3b745870de4ffe0e5654ff58e5165cebb7ca4 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 10 Oct 2024 13:39:12 +0100 Subject: [PATCH 166/340] Journal diff serialization: validateFlags and WaitingOn size --- modules/accord | 2 +- .../service/accord/SavedCommand.java | 41 ++++++++++--------- .../serializers/WaitingOnSerializer.java | 4 +- .../test/accord/AccordLoadTest.java | 1 + .../serializers/WaitingOnSerializerTest.java | 4 +- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/modules/accord b/modules/accord index 8f11c206ca17..8d1204045f1e 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8f11c206ca178dffbe67a4b3d7e23288e9545d3a +Subproject commit 8d1204045f1e1d4f0c3e09d1e2fa47837e55f819 diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 5a0ae5d8ef56..56206fcdde85 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -147,22 +147,22 @@ public static DiffWriter diff(Command original, Command current) return new SavedCommand.DiffWriter(original, current); } - // TODO (required): this is very inefficient + // TODO (required): calculate flags once private static boolean anyFieldChanged(Command before, Command after) { - int flags = getFlags(before, after); - for (Fields field : Fields.values()) - { - if (getFieldChanged(field, flags)) - return true; - } + int flags = validateFlags(getFlags(before, after)); + return (flags >>> 16) != 0; + } - return false; - } + private static int validateFlags(int flags) + { + Invariants.checkState(0 == (~(flags >>> 16) & (flags & 0xffff))); + return flags; + } public static void serialize(Command before, Command after, DataOutputPlus out, int userVersion) throws IOException { - int flags = getFlags(before, after); + int flags = validateFlags(getFlags(before, after)); out.writeInt(flags); int iterable = toIterableSetFields(flags); @@ -207,7 +207,8 @@ public static void serialize(Command before, Command after, DataOutputPlus out, case WAITING_ON: Command.WaitingOn waitingOn = getWaitingOn(after); long size = WaitingOnSerializer.serializedSize(waitingOn); - ByteBuffer serialized = WaitingOnSerializer.serialize(after.txnId(), waitingOn); + ByteBuffer serialized = WaitingOnSerializer.serialize(waitingOn); + Invariants.checkState(serialized.remaining() == size); out.writeInt((int) size); out.write(serialized); break; @@ -254,28 +255,28 @@ static Command.WaitingOn getWaitingOn(Command command) return null; } - private static int collectFlags(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch, Fields field, int oldFlags) + private static int collectFlags(OBJ lo, OBJ ro, Function convert, boolean allowClassMismatch, Fields field, int flags) { VAL l = null; VAL r = null; if (lo != null) l = convert.apply(lo); if (ro != null) r = convert.apply(ro); - if (r == null) - oldFlags = setFieldIsNull(field, oldFlags); - if (l == r) - return oldFlags; // no change + return flags; // no change + + if (r == null) + flags = setFieldIsNull(field, flags); if (l == null || r == null) - return setFieldChanged(field, oldFlags); + return setFieldChanged(field, flags); assert allowClassMismatch || l.getClass() == r.getClass() : String.format("%s != %s", l.getClass(), r.getClass()); if (l.equals(r)) - return oldFlags; // no change + return flags; // no change - return setFieldChanged(field, oldFlags); + return setFieldChanged(field, flags); } private static int setFieldChanged(Fields field, int oldFlags) @@ -593,7 +594,7 @@ public static Route deserializeRouteOrNull(DataInputPlus in, int userVersion) public void serialize(DataOutputPlus out, int userVersion) throws IOException { - out.writeInt(flags); + out.writeInt(validateFlags(flags)); int iterable = toIterableSetFields(flags); while (iterable != 0) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 021f01ec753a..79511355141b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -57,13 +57,13 @@ public static long serializedSize(int length, SimpleBitSet write) return (long) TypeSizes.LONG_SIZE * length; } - public static ByteBuffer serialize(TxnId txnId, WaitingOn waitingOn) throws IOException + public static ByteBuffer serialize(WaitingOn waitingOn) throws IOException { int keyCount = waitingOn.keys.size(); int txnIdCount = waitingOn.txnIdCount(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; int appliedOrInvalidatedLength = 0; - if (txnId.domain() == Routable.Domain.Range) + if (waitingOn.appliedOrInvalidated != null) appliedOrInvalidatedLength = (txnIdCount + 63) / 64; ByteBuffer out = ByteBuffer.allocate(TypeSizes.sizeofUnsignedVInt(keyCount) + TypeSizes.sizeofUnsignedVInt(txnIdCount) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 7cdf4ae1c526..4c249a1a280d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -67,6 +67,7 @@ public static void setUp() throws IOException // AccordTestBase.setupCluster(builder -> builder, 3); AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config .set("accord.schedule_durability_frequency", "5s") + .set("accord.ephemeral_read_enabled", "true") .set("accord.gc_delay", "5s")), 3); } diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index ce5aabba9fd7..1f869d22f9e6 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -55,12 +55,12 @@ public void serde() TxnId txnId = TxnId.NONE; if (waitingOn.appliedOrInvalidated != null) txnId = new TxnId(txnId.epoch(), txnId.hlc(), txnId.kind(), Routable.Domain.Range, txnId.node); long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); - ByteBuffer bb = WaitingOnSerializer.serialize(txnId, waitingOn); + ByteBuffer bb = WaitingOnSerializer.serialize(waitingOn); Assertions.assertThat(bb.remaining()).isEqualTo(expectedSize); Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, bb); Assertions.assertThat(read) .isEqualTo(waitingOn) - .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, WaitingOnSerializer.serialize(txnId, waitingOn))); + .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, WaitingOnSerializer.serialize(waitingOn))); }); } From 17340110322983d6360663d2e84b8f04d3b80743 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 10 Oct 2024 21:36:00 +0100 Subject: [PATCH 167/340] Fix truncatedApply deserialization --- .../apache/cassandra/service/accord/AccordObjectSizes.java | 2 +- .../org/apache/cassandra/service/accord/SavedCommand.java | 7 +++++-- .../service/accord/serializers/WaitingOnSerializer.java | 6 ++++-- .../accord/serializers/WaitingOnSerializerTest.java | 6 +++--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index d9351f8daea7..3c55c06cf2e7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -352,7 +352,7 @@ public static long command(Command command) return size; Command.Committed committed = command.asCommitted(); - size += WaitingOnSerializer.serializedSize(committed.waitingOn); + size += WaitingOnSerializer.serializedSize(committed.txnId(), committed.waitingOn); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 56206fcdde85..be2f9c6289f4 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -56,6 +56,7 @@ import static accord.primitives.Known.KnownDeps.DepsErased; import static accord.primitives.Known.KnownDeps.DepsUnknown; import static accord.primitives.Known.KnownDeps.NoDeps; +import static accord.primitives.SaveStatus.TruncatedApplyWithOutcome; import static accord.primitives.Status.Durability.NotDurable; import static accord.utils.Invariants.illegalState; import static org.apache.cassandra.service.accord.SavedCommand.Fields.PARTICIPANTS; @@ -206,8 +207,8 @@ public static void serialize(Command before, Command after, DataOutputPlus out, break; case WAITING_ON: Command.WaitingOn waitingOn = getWaitingOn(after); - long size = WaitingOnSerializer.serializedSize(waitingOn); - ByteBuffer serialized = WaitingOnSerializer.serialize(waitingOn); + long size = WaitingOnSerializer.serializedSize(after.txnId(), waitingOn); + ByteBuffer serialized = WaitingOnSerializer.serialize(after.txnId(), waitingOn); Invariants.checkState(serialized.remaining() == size); out.writeInt((int) size); out.write(serialized); @@ -852,6 +853,8 @@ private static Command.Truncated truncated(CommonAttributes.Mutable attrs, SaveS case TruncatedApplyWithOutcome: case TruncatedApplyWithDeps: case TruncatedApply: + if (status != TruncatedApplyWithOutcome) + result = null; if (attrs.txnId().kind().awaitsOnlyDeps()) return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, executesAtLeast); return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, null); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index 79511355141b..4c474d26c796 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -35,8 +35,9 @@ public class WaitingOnSerializer { - public static long serializedSize(WaitingOn waitingOn) + public static long serializedSize(TxnId txnId, WaitingOn waitingOn) { + Invariants.checkState(txnId.is(Routable.Domain.Key) == (waitingOn.appliedOrInvalidated == null)); int keyCount = waitingOn.keys.size(); int txnIdCount = waitingOn.txnIdCount(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; @@ -57,8 +58,9 @@ public static long serializedSize(int length, SimpleBitSet write) return (long) TypeSizes.LONG_SIZE * length; } - public static ByteBuffer serialize(WaitingOn waitingOn) throws IOException + public static ByteBuffer serialize(TxnId txnId, WaitingOn waitingOn) throws IOException { + Invariants.checkState(txnId.is(Routable.Domain.Key) == (waitingOn.appliedOrInvalidated == null)); int keyCount = waitingOn.keys.size(); int txnIdCount = waitingOn.txnIdCount(); int waitingOnLength = (txnIdCount + keyCount + 63) / 64; diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java index 1f869d22f9e6..2820760c0719 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/WaitingOnSerializerTest.java @@ -54,13 +54,13 @@ public void serde() qt().forAll(waitingOnGen()).check(waitingOn -> { TxnId txnId = TxnId.NONE; if (waitingOn.appliedOrInvalidated != null) txnId = new TxnId(txnId.epoch(), txnId.hlc(), txnId.kind(), Routable.Domain.Range, txnId.node); - long expectedSize = WaitingOnSerializer.serializedSize(waitingOn); - ByteBuffer bb = WaitingOnSerializer.serialize(waitingOn); + long expectedSize = WaitingOnSerializer.serializedSize(txnId, waitingOn); + ByteBuffer bb = WaitingOnSerializer.serialize(txnId, waitingOn); Assertions.assertThat(bb.remaining()).isEqualTo(expectedSize); Command.WaitingOn read = WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, bb); Assertions.assertThat(read) .isEqualTo(waitingOn) - .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, WaitingOnSerializer.serialize(waitingOn))); + .isEqualTo(WaitingOnSerializer.deserialize(txnId, waitingOn.keys, waitingOn.directRangeDeps, waitingOn.directKeyDeps, WaitingOnSerializer.serialize(txnId, waitingOn))); }); } From 0769af2c2efa312cfec0eadcdd502bbddd288f06 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 10 Oct 2024 22:51:31 -0700 Subject: [PATCH 168/340] Ninja: fast path now updates after 60s rather than 5s. Added toString to ReconfigureAccordFastPath so the TCM logs/table gives the debug info needed --- src/java/org/apache/cassandra/config/AccordSpec.java | 2 +- .../transformations/ReconfigureAccordFastPath.java | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 383af67d9242..832e844ac7b4 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -65,7 +65,7 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); - public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound("60s"); public volatile DurationSpec.IntSecondsBound gc_delay = new DurationSpec.IntSecondsBound(300); public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(120); diff --git a/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java b/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java index 628f4482f1ab..32478c0d7645 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java +++ b/src/java/org/apache/cassandra/tcm/transformations/ReconfigureAccordFastPath.java @@ -66,6 +66,17 @@ public Result execute(ClusterMetadata metadata) } } + @Override + public String toString() + { + return "ReconfigureAccordFastPath{" + + "node=" + node + + ", status=" + status + + ", updateTimeMillis=" + updateTimeMillis + + ", updateDelayMillis=" + updateDelayMillis + + '}'; + } + public static final AsymmetricMetadataSerializer serializer = new AsymmetricMetadataSerializer() { public void serialize(Transformation t, DataOutputPlus out, Version version) throws IOException From c4e0f3cf8f83eee883db79f111ec97ed8c143440 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 09:45:45 +0100 Subject: [PATCH 169/340] Follow-up to: Do not contact faulty replicas, and support reporting slow replies for preaccept/read. Do not wait for stale or left nodes for durability. --- modules/accord | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/accord b/modules/accord index 8d1204045f1e..eedd13ac4b7b 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8d1204045f1e1d4f0c3e09d1e2fa47837e55f819 +Subproject commit eedd13ac4b7b9f61d574badb4bfc47611a838739 From 9b6467d81e64a30f762ff1b39d24fd6e7cc05914 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 09:47:17 +0100 Subject: [PATCH 170/340] Ninja: fast path now updates after 3600s rather than 60s. --- src/java/org/apache/cassandra/config/AccordSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 832e844ac7b4..3efcac6566c8 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -65,7 +65,7 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); - public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound("60s"); + public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound("3600s"); public volatile DurationSpec.IntSecondsBound gc_delay = new DurationSpec.IntSecondsBound(300); public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(120); From aeaa07b000c40291b2b138f2eec130a33cd4f2e5 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 12:10:19 +0100 Subject: [PATCH 171/340] increase timeout for CalculateDepsReq --- .../cassandra/service/accord/AccordMessageSink.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 968068481dca..756de92e3f48 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -261,11 +261,15 @@ public void send(Node.Id to, Request request, AgentExecutor executor, Callback c { Verb verb = getVerb(request); Preconditions.checkNotNull(verb, "Verb is null for type %s", request.type()); + long nowNanos = Clock.Global.nanoTime(); - long expiresAtNanos; - if (isRangeBarrier(request)) expiresAtNanos = nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(); - else expiresAtNanos = nowNanos + verb.expiresAfterNanos(); long delayedAtNanos = Long.MAX_VALUE; + long expiresAtNanos; + if (isRangeBarrier(request) || verb == Verb.ACCORD_CALCULATE_DEPS_REQ) + expiresAtNanos = nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(); + else + expiresAtNanos = nowNanos + verb.expiresAfterNanos(); + switch (verb) { case ACCORD_COMMIT_REQ: From 570d65249f2fee1ca17f67f9147d5c5d7c2d586c Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 14:51:08 +0100 Subject: [PATCH 172/340] disable TableId interning --- .../org/apache/cassandra/service/accord/AccordService.java | 2 +- .../cassandra/service/accord/CommandsForRangesLoader.java | 4 ++-- .../cassandra/service/accord/api/AccordRoutingKey.java | 6 +++--- .../apache/cassandra/service/accord/api/PartitionKey.java | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index ae45577e14b8..95188cddd4c8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -890,7 +890,7 @@ public TopologyManager topology() asyncTxnResult.tryFailure(newTimeout(txnId, txn.isWrite(), consistencyLevel)); return; } - if (cause instanceof Preempted) + if (cause instanceof Preempted || cause instanceof Invalidated) { metrics.preempted.mark(); //TODO need to improve diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index 02e492f3c6fc..dca807e171bd 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -35,13 +35,13 @@ import accord.local.KeyHistory; import accord.local.RedundantBefore; import accord.primitives.PartialDeps; +import accord.primitives.Participants; import accord.primitives.Routable.Domain; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routables; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.async.AsyncChains; @@ -252,7 +252,7 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f if (cmd.partialTxn() == null) return null; - Seekables> keysOrRanges = cmd.partialTxn().keys(); + Participants keysOrRanges = cmd.participants().touches(); if (keysOrRanges.domain() != Domain.Range) throw new AssertionError(String.format("Txn keys are not range for %s", cmd.partialTxn())); Ranges ranges = (Ranges) keysOrRanges; diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index deec2b21abb9..6d8d2b818453 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -185,7 +185,7 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public SentinelKey deserialize(DataInputPlus in, int version) throws IOException { - TableId table = TableId.deserialize(in).tryIntern(); + TableId table = TableId.deserialize(in); boolean isMin = in.readBoolean(); return new SentinelKey(table, isMin); } @@ -287,14 +287,14 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public TokenKey deserialize(DataInputPlus in, int version) throws IOException { - TableId table = TableId.deserialize(in).tryIntern(); + TableId table = TableId.deserialize(in); Token token = Token.compactSerializer.deserialize(in, getPartitioner(), version); return new TokenKey(table, token); } public TokenKey fromBytes(ByteBuffer bytes, IPartitioner partitioner) { - TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0).tryIntern(); + TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0); bytes.position(tableId.serializedSize()); Token token = Token.compactSerializer.deserialize(bytes, partitioner); return new TokenKey(tableId, token); diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index aaa1264ea04e..fc78fe669262 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -154,7 +154,7 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public PartitionKey deserialize(DataInputPlus in, int version) throws IOException { - TableId tableId = TableId.deserialize(in).tryIntern(); + TableId tableId = TableId.deserialize(in); IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); DecoratedKey key = partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); return new PartitionKey(tableId, key); @@ -162,7 +162,7 @@ public PartitionKey deserialize(DataInputPlus in, int version) throws IOExceptio public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException { - TableId tableId = TableId.deserialize(src, accessor, offset).tryIntern(); + TableId tableId = TableId.deserialize(src, accessor, offset); offset += tableId.serializedSize(); TableMetadata metadata = Schema.instance.getTableMetadata(tableId); int numBytes = accessor.getShort(src, offset); From 91def312841123088562f5c3e59bb1b003772be3 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 15:35:32 +0100 Subject: [PATCH 173/340] do not schedule additional durability attempts while some in flight; plus minor performance improvements --- modules/accord | 2 +- src/java/org/apache/cassandra/db/DecoratedKey.java | 2 +- src/java/org/apache/cassandra/dht/IPartitioner.java | 10 ++++++++++ .../org/apache/cassandra/dht/LocalPartitioner.java | 5 +++++ .../io/sstable/format/big/BigTableReader.java | 9 ++++++++- .../service/accord/CommandsForRangesLoader.java | 5 ++--- 6 files changed, 27 insertions(+), 6 deletions(-) diff --git a/modules/accord b/modules/accord index eedd13ac4b7b..dd04c81214be 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit eedd13ac4b7b9f61d574badb4bfc47611a838739 +Subproject commit dd04c81214be9213cf503025bf6ed4cbf118be00 diff --git a/src/java/org/apache/cassandra/db/DecoratedKey.java b/src/java/org/apache/cassandra/db/DecoratedKey.java index 03d6374112a4..7dd709f08855 100644 --- a/src/java/org/apache/cassandra/db/DecoratedKey.java +++ b/src/java/org/apache/cassandra/db/DecoratedKey.java @@ -97,7 +97,7 @@ public static int compareTo(IPartitioner partitioner, ByteBuffer key, PartitionP return -position.compareTo(partitioner.decorateKey(key)); DecoratedKey otherKey = (DecoratedKey) position; - int cmp = partitioner.getToken(key).compareTo(otherKey.getToken()); + int cmp = partitioner.compareToken(key, otherKey.getToken()); return cmp == 0 ? ByteBufferUtil.compareUnsigned(key, otherKey.getKey()) : cmp; } diff --git a/src/java/org/apache/cassandra/dht/IPartitioner.java b/src/java/org/apache/cassandra/dht/IPartitioner.java index 0c4ae9dbb384..8e30ba65006c 100644 --- a/src/java/org/apache/cassandra/dht/IPartitioner.java +++ b/src/java/org/apache/cassandra/dht/IPartitioner.java @@ -91,6 +91,16 @@ default boolean supportsSplitting() */ public Token getToken(ByteBuffer key); + /** + * @return a Token that can be used to route a given key + * (This is NOT a method to create a Token from its string representation; + * for that, use TokenFactory.fromString.) + */ + default int compareToken(ByteBuffer key, Token token) + { + return getToken(key).compareTo(token); + } + /** * @return a randomly generated token */ diff --git a/src/java/org/apache/cassandra/dht/LocalPartitioner.java b/src/java/org/apache/cassandra/dht/LocalPartitioner.java index 0a1ede356b22..2f060c6b6783 100644 --- a/src/java/org/apache/cassandra/dht/LocalPartitioner.java +++ b/src/java/org/apache/cassandra/dht/LocalPartitioner.java @@ -72,6 +72,11 @@ public LocalToken getToken(ByteBuffer key) return new LocalToken(key); } + public int compareToken(ByteBuffer key, Token token) + { + return comparator.compare(key, ((LocalToken)token).token); + } + public LocalToken getRandomToken() { throw new UnsupportedOperationException(); diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java index f339bd9f7a7b..d44bd7f71429 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java @@ -311,6 +311,7 @@ public RowIndexEntry getRowIndexEntry(PartitionPosition key, // of the next interval). int i = 0; String path = null; + ByteBuffer indexKey = null; try (FileDataInput in = ifile.createReader(sampledPosition)) { path = in.getPath(); @@ -318,7 +319,13 @@ public RowIndexEntry getRowIndexEntry(PartitionPosition key, { i++; - ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(in); + int length = in.readUnsignedShort(); + if (indexKey == null || indexKey.capacity() < length) + indexKey = ByteBuffer.allocate(length); + + in.readFully(indexKey.array(), 0, length); + indexKey.position(0); + indexKey.limit(length); boolean opSatisfied; // did we find an appropriate position for the op requested boolean exactMatch; // is the current position an exact match for the key, suitable for caching diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index dca807e171bd..45f69e67f1d7 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -35,7 +35,6 @@ import accord.local.KeyHistory; import accord.local.RedundantBefore; import accord.primitives.PartialDeps; -import accord.primitives.Participants; import accord.primitives.Routable.Domain; import accord.primitives.SaveStatus; import accord.primitives.Status; @@ -252,7 +251,7 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f if (cmd.partialTxn() == null) return null; - Participants keysOrRanges = cmd.participants().touches(); + Ranges keysOrRanges = cmd.participants().touches().toRanges(); if (keysOrRanges.domain() != Domain.Range) throw new AssertionError(String.format("Txn keys are not range for %s", cmd.partialTxn())); Ranges ranges = (Ranges) keysOrRanges; @@ -276,7 +275,7 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f } PartialDeps partialDeps = cmd.partialDeps(); - boolean hasAsDep = findAsDep != null && partialDeps.rangeDeps.intersects(findAsDep, ranges); + boolean hasAsDep = findAsDep != null && partialDeps != null && partialDeps.rangeDeps.intersects(findAsDep, ranges); return new Summary(cmd.txnId(), cmd.executeAt(), saveStatus, ranges, findAsDep, hasAsDep); } From 22df42c7bec23b77de670ce1773e76424225ac00 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 16:27:33 +0100 Subject: [PATCH 174/340] Store historical transactions per epoch update durability scheduling and majority deps fetching do not deserialize deps in CommandsForRangesLoader unless required AccordJournalPurger should use shouldCleanupPartial load historical transactions when loading topology --- modules/accord | 2 +- .../apache/cassandra/concurrent/Stage.java | 33 +++---- .../apache/cassandra/config/AccordSpec.java | 22 ++--- .../cassandra/config/DatabaseDescriptor.java | 28 +++++- .../org/apache/cassandra/journal/Journal.java | 1 + .../org/apache/cassandra/net/Message.java | 39 ++++++-- .../org/apache/cassandra/schema/TableId.java | 17 +++- .../cassandra/service/CassandraDaemon.java | 3 +- .../service/accord/AccordCachingState.java | 7 +- .../service/accord/AccordCommandStore.java | 78 +++++++++++----- .../service/accord/AccordCommandStores.java | 18 ---- .../accord/AccordConfigurationService.java | 8 +- .../accord/AccordFastPathCoordinator.java | 2 +- .../service/accord/AccordJournal.java | 33 +++++-- .../accord/AccordJournalValueSerializers.java | 28 ++++-- .../service/accord/AccordMessageSink.java | 30 ++++--- .../accord/AccordResponseVerbHandler.java | 16 ++++ .../accord/AccordSafeCommandStore.java | 23 ++++- .../service/accord/AccordService.java | 7 +- .../service/accord/AccordStateCache.java | 36 +++++--- .../service/accord/AccordVerbHandler.java | 9 +- .../accord/CommandsForRangesLoader.java | 77 +++++++++++++--- .../cassandra/service/accord/IJournal.java | 5 +- .../cassandra/service/accord/JournalKey.java | 9 +- .../service/accord/SavedCommand.java | 90 ++++++++++++++++--- .../service/accord/api/AccordRoutingKey.java | 4 +- .../service/accord/api/PartitionKey.java | 7 +- .../service/accord/async/AsyncLoader.java | 80 ++++++++++++----- .../service/accord/async/AsyncOperation.java | 11 ++- .../apache/cassandra/utils/NoSpamLogger.java | 15 +++- .../cassandra/utils/memory/BufferPool.java | 5 +- .../test/accord/AccordLoadTest.java | 5 +- .../accord/AccordJournalCompactionTest.java | 11 ++- .../config/YamlConfigurationLoaderTest.java | 6 +- .../service/accord/EpochSyncTest.java | 9 +- .../cassandra/service/accord/MockJournal.java | 19 +++- .../service/accord/SavedCommandTest.java | 4 +- 37 files changed, 595 insertions(+), 202 deletions(-) diff --git a/modules/accord b/modules/accord index dd04c81214be..d914ee69816e 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit dd04c81214be9213cf503025bf6ed4cbf118be00 +Subproject commit d914ee69816ebfdf88b2120ff1d8e0bc16edecbc diff --git a/src/java/org/apache/cassandra/concurrent/Stage.java b/src/java/org/apache/cassandra/concurrent/Stage.java index 135b5d078eac..910ac40a5b7e 100644 --- a/src/java/org/apache/cassandra/concurrent/Stage.java +++ b/src/java/org/apache/cassandra/concurrent/Stage.java @@ -43,22 +43,23 @@ public enum Stage { - READ (false, "ReadStage", "request", DatabaseDescriptor::getConcurrentReaders, DatabaseDescriptor::setConcurrentReaders, Stage::multiThreadedLowSignalStage), - MUTATION (true, "MutationStage", "request", DatabaseDescriptor::getConcurrentWriters, DatabaseDescriptor::setConcurrentWriters, Stage::multiThreadedLowSignalStage), - COUNTER_MUTATION (true, "CounterMutationStage", "request", DatabaseDescriptor::getConcurrentCounterWriters, DatabaseDescriptor::setConcurrentCounterWriters, Stage::multiThreadedLowSignalStage), - VIEW_MUTATION (true, "ViewMutationStage", "request", DatabaseDescriptor::getConcurrentViewWriters, DatabaseDescriptor::setConcurrentViewWriters, Stage::multiThreadedLowSignalStage), - ACCORD_MIGRATION (false, "AccordMigrationReadStage", "request", DatabaseDescriptor::getConcurrentAccordOps, DatabaseDescriptor::setConcurrentAccordOps, Stage::multiThreadedLowSignalStage), - GOSSIP (true, "GossipStage", "internal", () -> 1, null, Stage::singleThreadedStage), - REQUEST_RESPONSE (false, "RequestResponseStage", "request", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedLowSignalStage), - ANTI_ENTROPY (false, "AntiEntropyStage", "internal", () -> 1, null, Stage::singleThreadedStage), - MIGRATION (false, "MigrationStage", "internal", () -> 1, null, Stage::migrationStage), - MISC (false, "MiscStage", "internal", () -> 1, null, Stage::singleThreadedStage), - TRACING (false, "TracingStage", "internal", () -> 1, null, Stage::tracingStage), - INTERNAL_RESPONSE (false, "InternalResponseStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), - IMMEDIATE (false, "ImmediateStage", "internal", () -> 0, null, Stage::immediateExecutor), - PAXOS_REPAIR (false, "PaxosRepairStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), - INTERNAL_METADATA (false, "InternalMetadataStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), - FETCH_LOG (false, "MetadataFetchLogStage", "internal", () -> 1, null, Stage::singleThreadedStage) + READ (false, "ReadStage", "request", DatabaseDescriptor::getConcurrentReaders, DatabaseDescriptor::setConcurrentReaders, Stage::multiThreadedLowSignalStage), + MUTATION (true, "MutationStage", "request", DatabaseDescriptor::getConcurrentWriters, DatabaseDescriptor::setConcurrentWriters, Stage::multiThreadedLowSignalStage), + COUNTER_MUTATION (true, "CounterMutationStage", "request", DatabaseDescriptor::getConcurrentCounterWriters, DatabaseDescriptor::setConcurrentCounterWriters, Stage::multiThreadedLowSignalStage), + VIEW_MUTATION (true, "ViewMutationStage", "request", DatabaseDescriptor::getConcurrentViewWriters, DatabaseDescriptor::setConcurrentViewWriters, Stage::multiThreadedLowSignalStage), + ACCORD_MIGRATION (false, "AccordMigrationStage", "request", DatabaseDescriptor::getConcurrentAccordOps, DatabaseDescriptor::setConcurrentAccordOps, Stage::multiThreadedLowSignalStage), + GOSSIP (true, "GossipStage", "internal", () -> 1, null, Stage::singleThreadedStage), + REQUEST_RESPONSE (false, "RequestResponseStage", "request", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedLowSignalStage), + ANTI_ENTROPY (false, "AntiEntropyStage", "internal", () -> 1, null, Stage::singleThreadedStage), + MIGRATION (false, "MigrationStage", "internal", () -> 1, null, Stage::migrationStage), + MISC (false, "MiscStage", "internal", () -> 1, null, Stage::singleThreadedStage), + TRACING (false, "TracingStage", "internal", () -> 1, null, Stage::tracingStage), + INTERNAL_RESPONSE (false, "InternalResponseStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), + IMMEDIATE (false, "ImmediateStage", "internal", () -> 0, null, Stage::immediateExecutor), + PAXOS_REPAIR (false, "PaxosRepairStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), + INTERNAL_METADATA (false, "InternalMetadataStage", "internal", FBUtilities::getAvailableProcessors, null, Stage::multiThreadedStage), + FETCH_LOG (false, "MetadataFetchLogStage", "internal", () -> 1, null, Stage::singleThreadedStage), + ACCORD_RANGE_LOADER(false, "AccordRangeLoader", "internal", () -> 4, null, Stage::multiThreadedStage), ; public final String jmxName; private final Supplier executorSupplier; diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 3efcac6566c8..74504326abe2 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -18,14 +18,14 @@ package org.apache.cassandra.config; -import accord.primitives.Routable; -import accord.primitives.Txn; +import java.util.concurrent.TimeUnit; + import accord.primitives.TxnId; import com.fasterxml.jackson.annotation.JsonIgnore; import org.apache.cassandra.journal.Params; import org.apache.cassandra.service.consensus.TransactionalMode; -import java.util.concurrent.TimeUnit; +import static accord.primitives.Routable.Domain.Range; public class AccordSpec { @@ -39,13 +39,13 @@ public class AccordSpec // TODO (expected): we should be able to support lower recover delays, at least for txns public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(5000); - public volatile DurationSpec.IntMillisecondsBound range_sync_recover_delay = new DurationSpec.IntMillisecondsBound(10000); + public volatile DurationSpec.IntMillisecondsBound range_sync_recover_delay = new DurationSpec.IntMillisecondsBound("5m"); public String slowPreAccept = "30ms <= p50*2 <= 100ms"; public String slowRead = "30ms <= p50*2 <= 100ms"; public long recoveryDelayFor(TxnId txnId, TimeUnit unit) { - if (txnId.kind() == Txn.Kind.SyncPoint && txnId.domain() == Routable.Domain.Range) + if (txnId.isSyncPoint() && txnId.is(Range)) return range_sync_recover_delay.to(unit); return recover_delay.to(unit); } @@ -65,13 +65,15 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); - public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound("3600s"); + public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound("60m"); - public volatile DurationSpec.IntSecondsBound gc_delay = new DurationSpec.IntSecondsBound(300); - public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(120); - public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(10); - public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(5, TimeUnit.MINUTES); + public volatile DurationSpec.IntSecondsBound gc_delay = new DurationSpec.IntSecondsBound("5m"); + public volatile int shard_durability_target_splits = 128; + public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(15, TimeUnit.MINUTES); public volatile DurationSpec.IntSecondsBound global_durability_cycle = new DurationSpec.IntSecondsBound(10, TimeUnit.MINUTES); + public volatile DurationSpec.IntSecondsBound default_durability_retry_delay = new DurationSpec.IntSecondsBound(10, TimeUnit.SECONDS); + public volatile DurationSpec.IntSecondsBound max_durability_retry_delay = new DurationSpec.IntSecondsBound(10, TimeUnit.MINUTES); public enum TransactionalRangeMigration { diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 1bd8f0f33561..05f15148d822 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5343,14 +5343,14 @@ public static long getAccordGCDelay(TimeUnit unit) return conf.accord.gc_delay.to(unit); } - public static long getAccordScheduleDurabilityFrequency(TimeUnit unit) + public static int getAccordShardDurabilityTargetSplits() { - return conf.accord.schedule_durability_frequency.to(unit); + return conf.accord.shard_durability_target_splits; } - public static void setAccordScheduleDurabilityFrequencySeconds(long seconds) + public static void setAccordShardDurabilityTargetSplits(int number) { - conf.accord.schedule_durability_frequency = new DurationSpec.IntSecondsBound(seconds); + conf.accord.shard_durability_target_splits = number; } public static long getAccordScheduleDurabilityTxnIdLag(TimeUnit unit) @@ -5373,6 +5373,26 @@ public static void setAccordGlobalDurabilityCycleSeconds(long seconds) conf.accord.global_durability_cycle = new DurationSpec.IntSecondsBound(seconds); } + public static long getAccordDefaultDurabilityRetryDelay(TimeUnit unit) + { + return conf.accord.default_durability_retry_delay.to(unit); + } + + public static void setAccordDefaultDurabilityRetryDelaySeconds(long seconds) + { + conf.accord.default_durability_retry_delay = new DurationSpec.IntSecondsBound(seconds); + } + + public static long getAccordMaxDurabilityRetryDelay(TimeUnit unit) + { + return conf.accord.max_durability_retry_delay.to(unit); + } + + public static void setAccordMaxDurabilityRetryDelaySeconds(long seconds) + { + conf.accord.max_durability_retry_delay = new DurationSpec.IntSecondsBound(seconds); + } + public static long getAccordShardDurabilityCycle(TimeUnit unit) { return conf.accord.shard_durability_cycle.to(unit); diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 5501146d8d97..34aff492ea45 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -130,6 +130,7 @@ private class FlusherCallbacks implements Flusher.Callbacks @Override public void onFlush(long segment, int position) { + // TODO (required): this seems to be a big source of allocations waitingFor.drain(drained::add); List remaining = new ArrayList<>(); for (WaitingFor wait : drained) diff --git a/src/java/org/apache/cassandra/net/Message.java b/src/java/org/apache/cassandra/net/Message.java index 77edd1d0a247..4bf114972bb7 100644 --- a/src/java/org/apache/cassandra/net/Message.java +++ b/src/java/org/apache/cassandra/net/Message.java @@ -91,6 +91,12 @@ public class Message implements ResponseContext this.payloadSerializer = verb().serializer(); } + /** Whether the message has crossed the node boundary, that is whether it originated from another node. */ + public boolean isCrossNode() + { + return !from().equals(getBroadcastAddressAndPort()); + } + /** Sender of the message. */ @Override public InetAddressAndPort from() @@ -98,12 +104,6 @@ public InetAddressAndPort from() return header.from; } - /** Whether the message has crossed the node boundary, that is whether it originated from another node. */ - public boolean isCrossNode() - { - return !from().equals(getBroadcastAddressAndPort()); - } - /** * id of the request/message. In 4.0+ can be shared between multiple messages of the same logical request, * whilst in versions above a new id would be allocated for each message sent. @@ -520,7 +520,7 @@ public String toString() * Split into a separate object to allow partial message deserialization without wasting work and allocation * afterwards, if the entire message is necessary and available. */ - public static class Header + public static class Header implements ResponseContext { public final long id; public final Epoch epoch; @@ -607,6 +607,31 @@ InetAddressAndPort respondTo() return respondTo; } + /** Sender of the message. */ + @Override + public InetAddressAndPort from() + { + return from; + } + + @Override + public long id() + { + return id; + } + + @Override + public Verb verb() + { + return verb; + } + + @Override + public long expiresAtNanos() + { + return expiresAtNanos; + } + @Nullable public TimeUUID traceSession() { diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index 03fd3dc49028..5cdaf1c2b6bf 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -22,9 +22,12 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; +import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.commons.lang3.ArrayUtils; @@ -53,6 +56,8 @@ public class TableId implements Comparable public static final long MAGIC = 1956074401491665062L; public static final long EMPTY_SIZE = ObjectSizes.measureDeep(new UUID(0, 0)); + private static final ConcurrentHashMap internCache = new ConcurrentHashMap<>(); + private final UUID id; private TableId(UUID id) @@ -200,10 +205,10 @@ public static TableId deserialize(V src, ValueAccessor accessor, int offs return new TableId(new UUID(accessor.getLong(src, offset), accessor.getLong(src, offset + TypeSizes.LONG_SIZE))); } - public TableId tryIntern() + public TableId intern() { - TableMetadata metadata = Schema.instance.getTableMetadata(this); - return metadata == null ? this : metadata.id; + TableId interned = internCache.putIfAbsent(this, this); + return interned == null ? this : interned; } @Override @@ -253,4 +258,10 @@ public long serializedSize(TableId t, Version version) return t.serializedSize(); } }; + + public static void scheduleCachePruning() + { + ScheduledExecutors.scheduledFastTasks.scheduleSelfRecurring(internCache::clear, 1, TimeUnit.HOURS); + } + } diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 45740f19169a..7f628f893113 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -77,6 +77,7 @@ import org.apache.cassandra.net.StartupClusterConnectivityChecker; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.security.ThreadAwareSecurityManager; import org.apache.cassandra.service.accord.AccordOperations; import org.apache.cassandra.service.paxos.PaxosState; @@ -422,7 +423,7 @@ protected void setup() logger.info("Prewarming of auth caches is disabled"); PaxosState.startAutoRepairs(); - + TableId.scheduleCachePruning(); completeSetup(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index b8bb61e00c3a..52d6b7c09019 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -547,7 +547,12 @@ static class Saving extends RunnableResult implements State Saving(V current, Runnable saveRunnable) { - super(() -> { saveRunnable.run(); return null; }); + this(current, () -> { saveRunnable.run(); return null; }); + } + + Saving(V current, Callable saveCallable) + { + super(saveCallable); this.current = current; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index a7a89698dd6e..3157fb8355d2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -28,6 +28,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; @@ -38,6 +39,7 @@ import org.slf4j.LoggerFactory; import accord.api.Agent; +import accord.api.ConfigurationService; import accord.api.DataStore; import accord.api.LocalListeners; import accord.api.ProgressLog; @@ -49,6 +51,7 @@ import accord.local.CommandStores; import accord.local.Commands; import accord.local.KeyHistory; +import accord.local.Node; import accord.local.NodeCommandStoreService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; @@ -67,23 +70,30 @@ import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.SequentialExecutorPlus; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Mutation; +import org.apache.cassandra.service.accord.SavedCommand.MinimalCommand; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.events.CacheEvents; import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Promise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static accord.api.ConfigurationService.EpochReady.DONE; +import static accord.local.KeyHistory.COMMANDS; import static accord.primitives.Status.Committed; import static accord.primitives.Status.Invalidated; import static accord.primitives.Status.Truncated; import static accord.utils.Invariants.checkState; +import static org.apache.cassandra.service.accord.SavedCommand.Load.MINIMAL; public class AccordCommandStore extends CommandStore { @@ -218,7 +228,6 @@ public AccordCommandStore(int id, loadBootstrapBeganAt(journal.loadBootstrapBeganAt(id())); loadSafeToRead(journal.loadSafeToRead(id())); loadRangesForEpoch(journal.loadRangesForEpoch(id())); - loadHistoricalTransactions(journal.loadHistoricalTransactions(id())); executor.execute(() -> CommandStore.register(this)); } @@ -488,7 +497,41 @@ public void shutdown() { } - public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore) + protected ConfigurationService.EpochReady syncInternal(Node node, Ranges ranges, long epoch, boolean isLoad) + { + if (!isLoad) + return super.syncInternal(node, ranges, epoch, false); + + List> loaded = journal.loadHistoricalTransactions(epoch, id); + // synchronously load and register historical, so we don't have unlimited numbers of epochs in flight + for (Pair pair : loaded) + { + cancelFetch(pair.left, epoch); + try + { + logger.info("Restoring sync'd deps for {} at epoch {}", pair.left, epoch); + AsyncChains.getBlocking(submit(PreLoadContext.contextFor(null, pair.right.keyDeps.keys(), COMMANDS), safeStore -> { + registerHistoricalTransactions(pair.left, pair.right, safeStore); + return null; + }).beginAsResult(), 5L, TimeUnit.MINUTES); + } + catch (InterruptedException | TimeoutException | ExecutionException e) + { + throw new RuntimeException(e); + } + ranges = ranges.without(Ranges.of(pair.left)); + } + + if (ranges.isEmpty()) + { + AsyncResult done = AsyncResults.success(null); + return new ConfigurationService.EpochReady(epoch, DONE, done, done, done); + } + + return super.syncInternal(node, ranges, epoch, false); + } + + public void registerHistoricalTransactions(Range range, Deps deps, SafeCommandStore safeStore) { if (deps.isEmpty()) return; @@ -509,19 +552,19 @@ public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore }); for (int i = 0; i < deps.rangeDeps.rangeCount(); i++) { - Range range = deps.rangeDeps.range(i); - if (!allRanges.intersects(range)) + Range r = deps.rangeDeps.range(i); + if (!allRanges.intersects(r)) continue; - deps.rangeDeps.forEach(range, txnId -> { + deps.rangeDeps.forEach(r, txnId -> { // TODO (desired, efficiency): this can be made more efficient by batching by epoch - if (ranges.coordinates(txnId).intersects(range)) + if (ranges.coordinates(txnId).intersects(r)) return; // already coordinates, no need to replicate - if (!ranges.allBefore(txnId.epoch()).intersects(range)) + if (!ranges.allBefore(txnId.epoch()).intersects(r)) return; // TODO (required): this is potentially not safe - it should not be persisted until we save in journal // but, preferable to retire historical transactions as a concept entirely, and rely on ExclusiveSyncPoints instead - diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(range).slice(allRanges), Ranges::with); + diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(r).slice(allRanges), Ranges::with); }); } } @@ -542,6 +585,11 @@ public Command loadCommand(TxnId txnId) return journal.loadCommand(id, txnId, unsafeGetRedundantBefore(), durableBefore()); } + public MinimalCommand loadMinimal(TxnId txnId) + { + return journal.loadMinimal(id, txnId, MINIMAL, unsafeGetRedundantBefore(), durableBefore()); + } + public interface Loader { Promise load(Command next); @@ -581,7 +629,7 @@ public Promise load(Command command) TxnId txnId = command.txnId(); AsyncPromise future = new AsyncPromise<>(); - execute(context(command, KeyHistory.COMMANDS), + execute(context(command, COMMANDS), safeStore -> { Command local = command; if (local.status() != Truncated && local.status() != Invalidated) @@ -663,18 +711,6 @@ void loadRangesForEpoch(CommandStores.RangesForEpoch.Snapshot rangesForEpoch) unsafeSetRangesForEpoch(new CommandStores.RangesForEpoch(rangesForEpoch.epochs, rangesForEpoch.ranges, this)); } - void loadHistoricalTransactions(List deps) - { - if (deps != null) - { - execute(PreLoadContext.empty(), - safeStore -> { - for (Deps dep : deps) - registerHistoricalTransactions(dep, safeStore); - }); - } - } - public static class CommandStoreExecutor implements CacheSize { final AccordStateCache stateCache; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 620bad8b1699..af0bebdb4eb2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -22,10 +22,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; import accord.api.Agent; -import accord.api.ConfigurationService.EpochReady; import accord.api.DataStore; import accord.api.LocalListeners; import accord.api.ProgressLog; @@ -143,22 +141,6 @@ synchronized void refreshCacheSizes() executor.execute(() -> executor.setCapacity(perExecutor)); } - @Override - public synchronized Supplier updateTopology(Node node, Topology newTopology, boolean startSync) - { - Supplier start = super.updateTopology(node, newTopology, startSync); - return () -> { - EpochReady ready = start.get(); - ready.metadata.addCallback(() -> { - synchronized (this) - { - refreshCacheSizes(); - } - }); - return ready; - }; - } - public void waitForQuiescense() { boolean hadPending; diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 0a2a192aa196..4f6f3d16662f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -234,7 +234,8 @@ public synchronized void start(Consumer callback) AtomicReference previousRef = new AtomicReference<>(null); diskState = diskStateManager.loadTopologies(((epoch, metadata, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { updateMapping(metadata); - reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); + reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED, true); + Topology previous = previousRef.get(); if (previous != null) { @@ -326,6 +327,11 @@ private void reportMetadata(ClusterMetadata metadata) } synchronized void reportMetadataInternal(ClusterMetadata metadata) + { + reportMetadataInternal(metadata, false); + } + + synchronized void reportMetadataInternal(ClusterMetadata metadata, boolean isLoad) { updateMapping(metadata); Topology topology = AccordTopology.createAccordTopology(metadata); diff --git a/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java index 74a9603a39df..a794b91abbb4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFastPathCoordinator.java @@ -332,7 +332,7 @@ synchronized boolean isPeer(Node.Id node) } @Override - public AsyncResult onTopologyUpdate(Topology topology, boolean startSync) + public AsyncResult onTopologyUpdate(Topology topology, boolean isLoad, boolean startSync) { updatePeers(topology); return SUCCESS; diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 26b868e5d132..330c78e32cf3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -41,6 +41,7 @@ import accord.local.RedundantBefore; import accord.local.cfk.CommandsForKey; import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.SaveStatus; import accord.primitives.Timestamp; @@ -65,11 +66,13 @@ import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.service.accord.JournalKey.JournalKeySupport; import org.apache.cassandra.utils.ExecutorUtils; +import org.apache.cassandra.utils.Pair; import static accord.primitives.SaveStatus.ErasedOrVestigial; import static accord.primitives.Status.Truncated; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; +import static org.apache.cassandra.service.accord.JournalKey.keyForHistoricalTransactions; public class AccordJournal implements IJournal, Shutdownable { @@ -193,6 +196,21 @@ public Command loadCommand(int commandStoreId, TxnId txnId, RedundantBefore redu return builder.construct(); } + @Override + public SavedCommand.MinimalCommand loadMinimal(int commandStoreId, TxnId txnId, SavedCommand.Load load, RedundantBefore redundantBefore, DurableBefore durableBefore) + { + SavedCommand.Builder builder = loadDiffs(commandStoreId, txnId, load); + Cleanup cleanup = builder.shouldCleanup(redundantBefore, durableBefore); + switch (cleanup) + { + case EXPUNGE_PARTIAL: + case EXPUNGE: + case ERASE: + return null; + } + return builder.asMinimal(); + } + @VisibleForTesting public RedundantBefore loadRedundantBefore(int store) { @@ -222,9 +240,9 @@ public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) } @Override - public List loadHistoricalTransactions(int store) + public List> loadHistoricalTransactions(long epoch, int store) { - HistoricalTransactionsAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store)); + HistoricalTransactionsAccumulator accumulator = readAll(keyForHistoricalTransactions(epoch, store)); return accumulator.get(); } @@ -287,7 +305,7 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie if (fieldUpdates.newRangesForEpoch != null) pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store), fieldUpdates.newRangesForEpoch); if (fieldUpdates.addHistoricalTransactions != null) - pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store), fieldUpdates.addHistoricalTransactions); + pointer = appendInternal(JournalKey.keyForHistoricalTransactions(fieldUpdates.addHistoricalTransactions.epoch, store), Pair.create(fieldUpdates.addHistoricalTransactions.range, fieldUpdates.addHistoricalTransactions.deps)); if (onFlush == null) return; @@ -299,14 +317,19 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie } @VisibleForTesting - public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) + public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId, SavedCommand.Load load) { JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, commandStoreId); - SavedCommand.Builder builder = new SavedCommand.Builder(txnId); + SavedCommand.Builder builder = new SavedCommand.Builder(txnId, load); journalTable.readAll(key, builder::deserializeNext); return builder; } + public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) + { + return loadDiffs(commandStoreId, txnId, SavedCommand.Load.ALL); + } + private BUILDER readAll(JournalKey key) { BUILDER builder = (BUILDER) key.type.serializer.mergerFor(key); diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java index 60a1ef4f5817..f232b9213e9f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -28,6 +28,7 @@ import accord.local.DurableBefore; import accord.local.RedundantBefore; import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -36,8 +37,10 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.utils.Pair; import static accord.local.CommandStores.RangesForEpoch; +import static org.apache.cassandra.service.accord.SavedCommand.Load.ALL; import static org.apache.cassandra.service.accord.serializers.DepsSerializer.deps; // TODO (required): test with large collection values, and perhaps split out some fields if they have a tendency to grow larger @@ -63,7 +66,7 @@ public static class CommandDiffSerializer @Override public SavedCommand.Builder mergerFor(JournalKey journalKey) { - return new SavedCommand.Builder(journalKey.id); + return new SavedCommand.Builder(journalKey.id, ALL); } @Override @@ -338,7 +341,7 @@ public void deserialize(JournalKey key, IdentityAccumulator, Deps> + public static class HistoricalTransactionsAccumulator extends Accumulator>, Pair> { public HistoricalTransactionsAccumulator() { @@ -346,14 +349,14 @@ public HistoricalTransactionsAccumulator() } @Override - protected List accumulate(List oldValue, Deps deps) + protected List> accumulate(List> oldValue, Pair deps) { accumulated.add(deps); // we can keep it mutable return accumulated; } } - public static class HistoricalTransactionsSerializer implements FlyweightSerializer + public static class HistoricalTransactionsSerializer implements FlyweightSerializer, HistoricalTransactionsAccumulator> { @Override public HistoricalTransactionsAccumulator mergerFor(JournalKey key) @@ -362,18 +365,22 @@ public HistoricalTransactionsAccumulator mergerFor(JournalKey key) } @Override - public void serialize(JournalKey key, Deps from, DataOutputPlus out, int userVersion) throws IOException + public void serialize(JournalKey key, Pair from, DataOutputPlus out, int userVersion) throws IOException { out.writeUnsignedVInt32(1); - deps.serialize(from, out, messagingVersion); + TokenRange.serializer.serialize((TokenRange) from.left, out, messagingVersion); + deps.serialize(from.right, out, messagingVersion); } @Override public void reserialize(JournalKey key, HistoricalTransactionsAccumulator from, DataOutputPlus out, int userVersion) throws IOException { out.writeUnsignedVInt32(from.get().size()); - for (Deps d : from.get()) - deps.serialize(d, out, messagingVersion); + for (Pair d : from.get()) + { + TokenRange.serializer.serialize((TokenRange) d.left, out, messagingVersion); + deps.serialize(d.right, out, messagingVersion); + } } @Override @@ -381,7 +388,10 @@ public void deserialize(JournalKey key, HistoricalTransactionsAccumulator into, { int count = in.readUnsignedVInt32(); for (int i = 0; i < count; i++) - into.update(deps.deserialize(in, messagingVersion)); + { + Range range = TokenRange.serializer.deserialize(in, messagingVersion); + into.update(Pair.create(range, deps.deserialize(in, messagingVersion))); + } } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java index 756de92e3f48..e42b08b993bc 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java +++ b/src/java/org/apache/cassandra/service/accord/AccordMessageSink.java @@ -26,36 +26,42 @@ import java.util.Map; import java.util.Set; -import accord.impl.RequestCallbacks; -import accord.messages.*; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; - -import org.apache.cassandra.config.AccordSpec; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; -import org.apache.cassandra.service.TimeoutStrategy; -import org.apache.cassandra.service.TimeoutStrategy.LatencySourceFactory; -import org.apache.cassandra.utils.Clock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.Agent; import accord.api.MessageSink; +import accord.impl.RequestCallbacks; import accord.local.AgentExecutor; import accord.local.Node; +import accord.messages.Callback; +import accord.messages.Commit; +import accord.messages.MessageType; +import accord.messages.Reply; +import accord.messages.ReplyContext; +import accord.messages.Request; +import accord.messages.TxnRequest; +import org.apache.cassandra.config.AccordSpec; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessageFlag; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.TimeoutStrategy; +import org.apache.cassandra.service.TimeoutStrategy.LatencySourceFactory; +import org.apache.cassandra.utils.Clock; import static accord.messages.MessageType.Kind.REMOTE; +import static accord.primitives.Routable.Domain.Range; import static java.util.concurrent.TimeUnit.NANOSECONDS; public class AccordMessageSink implements MessageSink @@ -249,10 +255,10 @@ private static boolean isRangeBarrier(Request request) return false; TxnRequest txnRequest = (TxnRequest) request; - if (!txnRequest.txnId.kind().isSyncPoint()) + if (!txnRequest.txnId.isSyncPoint()) return false; - return txnRequest.txnId.domain().isRange(); + return txnRequest.txnId.is(Range); } // TODO (expected): permit bulk send to save esp. on callback registration (and combine records) @@ -265,7 +271,7 @@ public void send(Node.Id to, Request request, AgentExecutor executor, Callback c long nowNanos = Clock.Global.nanoTime(); long delayedAtNanos = Long.MAX_VALUE; long expiresAtNanos; - if (isRangeBarrier(request) || verb == Verb.ACCORD_CALCULATE_DEPS_REQ) + if (isRangeBarrier(request)) expiresAtNanos = nowNanos + DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(); else expiresAtNanos = nowNanos + verb.expiresAfterNanos(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java index f5a06f9be816..6ee810fb04f0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java @@ -17,6 +17,11 @@ */ package org.apache.cassandra.service.accord; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.coordinate.Timeout; import accord.impl.RequestCallbacks; import accord.local.Node; @@ -27,12 +32,17 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.tracing.Tracing; +import org.apache.cassandra.utils.NoSpamLogger; +import org.apache.cassandra.utils.NoSpamLogger.NoSpamLogStatement; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.utils.MonotonicClock.Global.approxTime; class AccordResponseVerbHandler implements IVerbHandler { + private static final Logger logger = LoggerFactory.getLogger(AccordResponseVerbHandler.class); + private static final NoSpamLogStatement dropping = NoSpamLogger.getStatement(logger, "Dropping response {} from {}", 1L, TimeUnit.SECONDS); + private final RequestCallbacks callbacks; private final AccordEndpointMapper endpointMapper; @@ -45,6 +55,12 @@ class AccordResponseVerbHandler implements IVerbHandler @Override public void doVerb(Message message) { + if (!((AccordService)AccordService.instance()).shouldAcceptMessages()) + { + dropping.debug(message.verb(), message.from()); + return; + } + Node.Id from = endpointMapper.mappedId(message.from()); if (message.isFailureResponse()) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 4025049d394d..c260a013c638 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -42,6 +42,7 @@ import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routables; import accord.primitives.Timestamp; @@ -336,12 +337,12 @@ public RedundantBefore redundantBefore() } @Override - protected void registerHistoricalTransactions(Deps deps) + public void registerHistoricalTransactions(long epoch, Range range, Deps deps) { - ensureFieldUpdates().addHistoricalTransactions = deps; + ensureFieldUpdates().addHistoricalTransactions = new HistoricalTransactions(epoch, range, deps); // TODO (required): it is potentially unsafe to propagate this synchronously, as if we fail to write to the journal we may be in an inconsistent state // however, we can and should retire the concept of historical transactions in favour of ExclusiveSyncPoints ensuring their deps are known - super.registerHistoricalTransactions(deps); + super.registerHistoricalTransactions(epoch, range, deps); } private FieldUpdates ensureFieldUpdates() @@ -379,6 +380,20 @@ public static class FieldUpdates public NavigableMap newBootstrapBeganAt; public NavigableMap newSafeToRead; public RangesForEpoch.Snapshot newRangesForEpoch; - public Deps addHistoricalTransactions; + public HistoricalTransactions addHistoricalTransactions; + } + + public static class HistoricalTransactions + { + public final long epoch; + public final Range range; + public final Deps deps; + + public HistoricalTransactions(long epoch, Range range, Deps deps) + { + this.epoch = epoch; + this.range = range; + this.deps = deps; + } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 95188cddd4c8..dcb6fdf70d65 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -463,7 +463,7 @@ class Ref { List historic = Collections.emptyList();} ? tcmLoadRange(optMaxEpoch.getAsLong(), Long.MAX_VALUE) : discoverHistoric(node, cms); for (ClusterMetadata m : historic) - configService.reportMetadataInternal(m); + configService.reportMetadataInternal(m, true); })); ClusterMetadata current = cms.metadata(); if (!ref.historic.isEmpty()) @@ -499,10 +499,12 @@ class Ref { List historic = Collections.emptyList();} fastPathCoordinator.start(); cms.log().addListener(fastPathCoordinator); + durabilityScheduling.setDefaultRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordDefaultDurabilityRetryDelay(SECONDS)), SECONDS); + durabilityScheduling.setMaxRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordMaxDurabilityRetryDelay(SECONDS)), SECONDS); + durabilityScheduling.setTargetShardSplits(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityTargetSplits())); durabilityScheduling.setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); - durabilityScheduling.setFrequency(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityFrequency(SECONDS)), SECONDS); durabilityScheduling.start(); state = State.STARTED; } @@ -1267,6 +1269,7 @@ public Object shutdownNow() @Override public boolean awaitTermination(long timeout, TimeUnit units) { + // TODO (required): expose awaitTermination in Node // node doesn't offer return true; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index ccbc90fdb288..4c3f068976c4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -28,6 +28,8 @@ import java.util.function.ToLongFunction; import java.util.stream.Stream; +import javax.annotation.Nullable; + import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -365,7 +367,7 @@ public S acquireOrInitialize(K key, Function valueFactory) listeners.forEach(l -> l.onAdd(finalNode)); } } - AccordCachingState acquired = acquireExisting(node, true); + AccordCachingState acquired = acquireExisting(node, true, null); Invariants.checkState(acquired != null, "%s could not be acquired", node); return safeRefFactory.apply(acquired); } @@ -378,7 +380,7 @@ public S acquireIfExists(K key) if (node == null) return null; - return safeRefFactory.apply(acquireExisting(node, false)); + return safeRefFactory.apply(acquireExisting(node, false, null)); } public void maybeLoad(K key, V initial) @@ -401,37 +403,49 @@ public void maybeLoad(K key, V initial) public S acquire(K key) { - AccordCachingState node = acquire(key, false); - return safeRefFactory.apply(node); + return acquire(key, null); } public S acquireIfLoaded(K key) { - AccordCachingState node = acquire(key, true); + return acquireIfLoaded(key, null); + } + + public S acquire(K key, @Nullable ExecutorPlus loadExecutor) + { + AccordCachingState node = acquire(key, false, loadExecutor); + return safeRefFactory.apply(node); + } + + public S acquireIfLoaded(K key, @Nullable ExecutorPlus loadExecutor) + { + AccordCachingState node = acquire(key, true, loadExecutor); if (node == null) return null; return safeRefFactory.apply(node); } - private AccordCachingState acquire(K key, boolean onlyIfLoaded) + private AccordCachingState acquire(K key, boolean onlyIfLoaded, @Nullable ExecutorPlus loadExecutor) { incrementCacheQueries(); @SuppressWarnings("unchecked") AccordCachingState node = (AccordCachingState) cache.get(key); return node == null - ? acquireAbsent(key, onlyIfLoaded) - : acquireExisting(node, onlyIfLoaded); + ? acquireAbsent(key, onlyIfLoaded, loadExecutor) + : acquireExisting(node, onlyIfLoaded, loadExecutor); } /* * Can only return a LOADING Node (or null) */ - private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) + private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded, @Nullable ExecutorPlus loadExecutor) { incrementCacheMisses(); if (onlyIfLoaded) return null; AccordCachingState node = nodeFactory.create(key, index); + if (loadExecutor == null) + loadExecutor = AccordStateCache.this.loadExecutor; node.load(loadExecutor, loadFunction); node.references++; @@ -448,7 +462,7 @@ private AccordCachingState acquireAbsent(K key, boolean onlyIfLoaded) /* * Can't return EVICTED or INITIALIZED */ - private AccordCachingState acquireExisting(AccordCachingState node, boolean onlyIfLoaded) + private AccordCachingState acquireExisting(AccordCachingState node, boolean onlyIfLoaded, @Nullable ExecutorPlus loadExecutor) { Status status = node.status(); // status() completes @@ -462,6 +476,8 @@ private AccordCachingState acquireExisting(AccordCachingState node, if (node.references == 0) { + if (loadExecutor == null) + loadExecutor = AccordStateCache.this.loadExecutor; if (status == FAILED_TO_LOAD || status == EVICTED) node.reset().load(loadExecutor, loadFunction); diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 59fc056b90dd..067257ce2d16 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service.accord; import java.io.IOException; +import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -26,10 +27,12 @@ import accord.messages.Request; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; +import org.apache.cassandra.utils.NoSpamLogger; public class AccordVerbHandler implements IVerbHandler { private static final Logger logger = LoggerFactory.getLogger(AccordVerbHandler.class); + private static final NoSpamLogger.NoSpamLogStatement dropping = NoSpamLogger.getStatement(logger, "Dropping message {} from {}", 1L, TimeUnit.SECONDS); private final Node node; private final AccordEndpointMapper endpointMapper; @@ -45,7 +48,7 @@ public void doVerb(Message message) throws IOException { if (!((AccordService)AccordService.instance()).shouldAcceptMessages()) { - logger.debug("Dropping message {} from {}", message.verb(), message.from()); + dropping.debug(message.verb(), message.from()); return; } @@ -61,12 +64,12 @@ public void doVerb(Message message) throws IOException long waitForEpoch = request.waitForEpoch(); if (node.topology().hasAtLeastEpoch(waitForEpoch)) - request.process(node, fromNodeId, message); + request.process(node, fromNodeId, message.header); else node.withEpoch(waitForEpoch, (ignored, withEpochFailure) -> { if (withEpochFailure != null) throw new RuntimeException("Timed out waiting for epoch when processing message from " + fromNodeId + " to " + node + " message " + message, withEpochFailure); - request.process(node, fromNodeId, message); + request.process(node, fromNodeId, message.header); }); } } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index 45f69e67f1d7..f4162d02b895 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -60,6 +59,7 @@ public class CommandsForRangesLoader implements AccordStateCache.Listener historicalTransaction = new TreeMap<>(); private final AccordCommandStore store; private final ObjectHashSet cachedRangeTxns = new ObjectHashSet<>(); + // TODO (required): make this configurable, or perhaps backed by READ stage with concurrency limit public CommandsForRangesLoader(AccordCommandStore store) { @@ -91,7 +91,7 @@ public AsyncResult>> get(@Nullable Tx TxnId findAsDep = primaryTxnId != null && keyHistory == KeyHistory.RECOVERY ? primaryTxnId : null; Watcher watcher = fromCache(findAsDep, ranges, minTxnId, maxTxnId, redundantBefore); ImmutableMap before = ImmutableMap.copyOf(watcher.get()); - return AsyncChains.ofCallable(Stage.READ.executor(), () -> get(ranges, before, findAsDep, minTxnId, maxTxnId, redundantBefore)) + return AsyncChains.ofCallable(Stage.ACCORD_RANGE_LOADER.executor(), () -> get(ranges, before, findAsDep, minTxnId, maxTxnId, redundantBefore)) .map(map -> Pair.create(watcher, map), store) .beginAsResult(); } @@ -229,13 +229,27 @@ private NavigableMap load(Ranges ranges, Map cac { if (cacheHits.containsKey(txnId)) continue; - Command cmd = store.loadCommand(txnId); - if (cmd == null) - continue; // unknown command - Summary summary = create(cmd, ranges, findAsDep, redundantBefore); - if (summary == null) - continue; - map.put(txnId, summary); + if (findAsDep == null) + { + SavedCommand.MinimalCommand cmd = store.loadMinimal(txnId); + if (cmd == null) + continue; // unknown command + Summary summary = create(cmd, ranges, redundantBefore); + if (summary == null) + continue; + map.put(txnId, summary); + + } + else + { + Command cmd = store.loadCommand(txnId); + if (cmd == null) + continue; // unknown command + Summary summary = create(cmd, ranges, findAsDep, redundantBefore); + if (summary == null) + continue; + map.put(txnId, summary); + } } return map; } @@ -262,13 +276,11 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f if (redundantBefore != null) { - Ranges durableAlready = Ranges.of(redundantBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { + Ranges newRanges = redundantBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { if (e.gcBefore.compareTo(cmd.txnId()) < 0) return accum; - accum.add(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end)); - return accum; - }, new ArrayList(), ignore -> false).toArray(Range[]::new)); - Ranges newRanges = ranges.without(durableAlready); + return accum.without(Ranges.of(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end))); + }, ranges, ignore -> false); if (newRanges.isEmpty()) return null; @@ -279,6 +291,43 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId f return new Summary(cmd.txnId(), cmd.executeAt(), saveStatus, ranges, findAsDep, hasAsDep); } + private static Summary create(SavedCommand.MinimalCommand cmd, Ranges cacheRanges, @Nullable RedundantBefore redundantBefore) + { + //TODO (required, correctness): C* did Invalidated, accord-core did Erased... what is correct? + SaveStatus saveStatus = cmd.saveStatus; + if (saveStatus == null + || saveStatus == SaveStatus.Invalidated + || saveStatus == SaveStatus.Erased + || !saveStatus.hasBeen(Status.PreAccepted)) + return null; + + if (cmd.participants == null) + return null; + + Ranges keysOrRanges = cmd.participants.touches().toRanges(); + if (keysOrRanges.domain() != Domain.Range) + throw new AssertionError(String.format("Txn keys are not range for %s", cmd.participants)); + Ranges ranges = (Ranges) keysOrRanges; + + ranges = ranges.slice(cacheRanges, Routables.Slice.Minimal); + if (ranges.isEmpty()) + return null; + + if (redundantBefore != null) + { + Ranges newRanges = redundantBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { + if (e.gcBefore.compareTo(cmd.txnId) < 0) + return accum; + return accum.without(Ranges.of(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end))); + }, ranges, ignore -> false); + + if (newRanges.isEmpty()) + return null; + } + + return new Summary(cmd.txnId, cmd.executeAt, saveStatus, ranges, null, false); + } + public void mergeHistoricalTransaction(TxnId txnId, Ranges ranges, BiFunction remappingFunction) { historicalTransaction.merge(txnId, ranges, remappingFunction); diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index 7de9d68602d5..635e5e2d1581 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -26,20 +26,23 @@ import accord.local.DurableBefore; import accord.local.RedundantBefore; import accord.primitives.Deps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.PersistentField.Persister; +import org.apache.cassandra.utils.Pair; public interface IJournal { Command loadCommand(int commandStoreId, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore); + SavedCommand.MinimalCommand loadMinimal(int commandStoreId, TxnId txnId, SavedCommand.Load load, RedundantBefore redundantBefore, DurableBefore durableBefore); RedundantBefore loadRedundantBefore(int commandStoreId); NavigableMap loadBootstrapBeganAt(int commandStoreId); NavigableMap loadSafeToRead(int commandStoreId); CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int commandStoreId); - List loadHistoricalTransactions(int store); + List> loadHistoricalTransactions(long epoch, int store); void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush); Persister durableBeforePersister(); diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index 99e068ca26f4..8266b3ac64ff 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -23,8 +23,11 @@ import java.util.Objects; import java.util.zip.Checksum; +import accord.local.Node; import accord.local.Node.Id; +import accord.primitives.Routable; import accord.primitives.Timestamp; +import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; @@ -277,5 +280,9 @@ static Type fromId(int id) } } - + public static JournalKey keyForHistoricalTransactions(long epoch, int store) + { + TxnId txnId = new TxnId(epoch, 0l, Txn.Kind.LocalOnly, Routable.Domain.Range, Node.Id.NONE); + return new JournalKey(txnId, JournalKey.Type.HISTORICAL_TRANSACTIONS, store); + } } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index be2f9c6289f4..0bd21ef05660 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -59,7 +59,12 @@ import static accord.primitives.SaveStatus.TruncatedApplyWithOutcome; import static accord.primitives.Status.Durability.NotDurable; import static accord.utils.Invariants.illegalState; +import static org.apache.cassandra.service.accord.SavedCommand.Fields.DURABILITY; +import static org.apache.cassandra.service.accord.SavedCommand.Fields.EXECUTE_AT; import static org.apache.cassandra.service.accord.SavedCommand.Fields.PARTICIPANTS; +import static org.apache.cassandra.service.accord.SavedCommand.Fields.SAVE_STATUS; +import static org.apache.cassandra.service.accord.SavedCommand.Fields.WRITES; +import static org.apache.cassandra.service.accord.SavedCommand.Load.ALL; public class SavedCommand { @@ -231,8 +236,8 @@ static int getFlags(Command before, Command after) flags = collectFlags(before, after, Command::executeAt, true, Fields.EXECUTE_AT, flags); flags = collectFlags(before, after, Command::executesAtLeast, true, Fields.EXECUTES_AT_LEAST, flags); - flags = collectFlags(before, after, Command::saveStatus, false, Fields.SAVE_STATUS, flags); - flags = collectFlags(before, after, Command::durability, false, Fields.DURABILITY, flags); + flags = collectFlags(before, after, Command::saveStatus, false, SAVE_STATUS, flags); + flags = collectFlags(before, after, Command::durability, false, DURABILITY, flags); flags = collectFlags(before, after, Command::acceptedOrCommitted, false, Fields.ACCEPTED, flags); flags = collectFlags(before, after, Command::promised, false, Fields.PROMISED, flags); @@ -243,7 +248,7 @@ static int getFlags(Command before, Command after) flags = collectFlags(before, after, SavedCommand::getWaitingOn, false, Fields.WAITING_ON, flags); - flags = collectFlags(before, after, Command::writes, false, Fields.WRITES, flags); + flags = collectFlags(before, after, Command::writes, false, WRITES, flags); return flags; } @@ -323,8 +328,51 @@ private static int unsetFieldIsNull(Fields field, int oldFlags) return oldFlags & ~(1 << field.ordinal()); } + public enum Load + { + ALL(0), + PURGEABLE(SAVE_STATUS, PARTICIPANTS, DURABILITY, EXECUTE_AT, WRITES), + MINIMAL(SAVE_STATUS, PARTICIPANTS, EXECUTE_AT); + + final int mask; + + Load(int mask) + { + this.mask = mask; + } + + Load(Fields ... fields) + { + int mask = -1; + for (Fields field : fields) + mask &= ~(1<< field.ordinal()); + this.mask = mask; + } + } + + public static class MinimalCommand + { + public final TxnId txnId; + public final SaveStatus saveStatus; + public final StoreParticipants participants; + public final Status.Durability durability; + public final Timestamp executeAt; + public final Writes writes; + + public MinimalCommand(TxnId txnId, SaveStatus saveStatus, StoreParticipants participants, Status.Durability durability, Timestamp executeAt, Writes writes) + { + this.txnId = txnId; + this.saveStatus = saveStatus; + this.participants = participants; + this.durability = durability; + this.executeAt = executeAt; + this.writes = writes; + } + } + public static class Builder { + final int mask; int flags; TxnId txnId; @@ -350,13 +398,25 @@ public static class Builder boolean nextCalled; int count; - public Builder(TxnId txnId) + public Builder(TxnId txnId, Load load) { + this.mask = load.mask; init(txnId); } + public Builder(TxnId txnId) + { + this(txnId, ALL); + } + + public Builder(Load load) + { + this.mask = load.mask; + } + public Builder() { + this(ALL); } public TxnId txnId() @@ -484,7 +544,7 @@ public Cleanup shouldCleanup(RedundantBefore redundantBefore, DurableBefore dura if (saveStatus == null || participants == null) return Cleanup.NO; - Cleanup cleanup = Cleanup.shouldCleanup(txnId, saveStatus, durability, participants, redundantBefore, durableBefore); + Cleanup cleanup = Cleanup.shouldCleanupPartial(txnId, saveStatus, durability, participants, redundantBefore, durableBefore); if (this.cleanup != null && this.cleanup.compareTo(cleanup) > 0) cleanup = this.cleanup; return cleanup; @@ -522,13 +582,13 @@ public Builder maybeCleanup(Cleanup cleanup) public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean includeOutcome) { Invariants.checkState(txnId != null); - Builder builder = new Builder(txnId); + Builder builder = new Builder(txnId, ALL); builder.count++; builder.nextCalled = true; Invariants.checkState(saveStatus != null); - builder.flags = setFieldChanged(Fields.SAVE_STATUS, builder.flags); + builder.flags = setFieldChanged(SAVE_STATUS, builder.flags); builder.saveStatus = saveStatus; builder.flags = setFieldChanged(Fields.CLEANUP, builder.flags); builder.cleanup = cleanup; @@ -539,7 +599,7 @@ public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean in } if (durability != null) { - builder.flags = setFieldChanged(Fields.DURABILITY, builder.flags); + builder.flags = setFieldChanged(DURABILITY, builder.flags); builder.durability = durability; } if (participants != null) @@ -549,7 +609,7 @@ public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean in } if (includeOutcome && builder.writes != null) { - builder.flags = setFieldChanged(Fields.WRITES, builder.flags); + builder.flags = setFieldChanged(WRITES, builder.flags); builder.writes = writes; } @@ -559,7 +619,7 @@ public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean in public Builder saveStatusOnly() { Invariants.checkState(txnId != null); - Builder builder = new Builder(txnId); + Builder builder = new Builder(txnId, ALL); builder.count++; builder.nextCalled = true; @@ -567,7 +627,7 @@ public Builder saveStatusOnly() // TODO: these accesses can be abstracted away if (saveStatus != null) { - builder.flags = setFieldChanged(Fields.SAVE_STATUS, builder.flags); + builder.flags = setFieldChanged(SAVE_STATUS, builder.flags); builder.saveStatus = saveStatus; } @@ -583,6 +643,11 @@ public ByteBuffer asByteBuffer(int userVersion) throws IOException } } + public MinimalCommand asMinimal() + { + return new MinimalCommand(txnId, saveStatus, participants, durability, executeAt, writes); + } + public static Route deserializeRouteOrNull(DataInputPlus in, int userVersion) throws IOException { int flags = in.readInt(); @@ -595,6 +660,7 @@ public static Route deserializeRouteOrNull(DataInputPlus in, int userVersion) public void serialize(DataOutputPlus out, int userVersion) throws IOException { + Invariants.checkState(mask == 0); out.writeInt(validateFlags(flags)); int iterable = toIterableSetFields(flags); @@ -665,7 +731,7 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio while (iterable != 0) { Fields field = nextSetField(iterable); - if (getFieldChanged(field, this.flags)) + if (getFieldChanged(field, this.flags) || getFieldIsNull(field, mask)) { if (!getFieldIsNull(field, flags)) skip(field, in, userVersion); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 6d8d2b818453..68ab84888543 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -287,14 +287,14 @@ public void skip(DataInputPlus in, int version) throws IOException @Override public TokenKey deserialize(DataInputPlus in, int version) throws IOException { - TableId table = TableId.deserialize(in); + TableId table = TableId.deserialize(in).intern(); Token token = Token.compactSerializer.deserialize(in, getPartitioner(), version); return new TokenKey(table, token); } public TokenKey fromBytes(ByteBuffer bytes, IPartitioner partitioner) { - TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0); + TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0).intern(); bytes.position(tableId.serializedSize()); Token token = Token.compactSerializer.deserialize(bytes, partitioner); return new TokenKey(tableId, token); diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index fc78fe669262..a01ae3828439 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -146,15 +146,14 @@ public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int @Override public void skip(DataInputPlus in, int version) throws IOException { - TableId tableId = TableId.deserialize(in); - IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); + in.skipBytesFully(TableId.staticSerializedSize()); ByteBufferUtil.skipShortLength(in); } @Override public PartitionKey deserialize(DataInputPlus in, int version) throws IOException { - TableId tableId = TableId.deserialize(in); + TableId tableId = TableId.deserialize(in).intern(); IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); DecoratedKey key = partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); return new PartitionKey(tableId, key); @@ -162,7 +161,7 @@ public PartitionKey deserialize(DataInputPlus in, int version) throws IOExceptio public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException { - TableId tableId = TableId.deserialize(src, accessor, offset); + TableId tableId = TableId.deserialize(src, accessor, offset).intern(); offset += tableId.serializedSize(); TableMetadata metadata = Schema.instance.getTableMetadata(tableId); int numBytes = accessor.getShort(src, offset); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 71758981a646..7f6b30fb7c66 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -17,34 +17,53 @@ */ package org.apache.cassandra.service.accord.async; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.api.RoutingKey; -import accord.local.cfk.CommandsForKey; import accord.local.KeyHistory; import accord.local.PreLoadContext; -import accord.primitives.*; +import accord.local.cfk.CommandsForKey; +import accord.primitives.AbstractKeys; +import accord.primitives.AbstractRanges; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.Observable; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import org.apache.cassandra.service.accord.*; +import org.apache.cassandra.concurrent.ExecutorPlus; +import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.service.accord.AccordCachingState; +import org.apache.cassandra.service.accord.AccordCommandStore; +import org.apache.cassandra.service.accord.AccordKeyspace; +import org.apache.cassandra.service.accord.AccordSafeCommandsForRanges; +import org.apache.cassandra.service.accord.AccordSafeState; +import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.CommandsForRangesLoader; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.function.BiConsumer; -import java.util.stream.Collectors; -import javax.annotation.Nullable; - public class AsyncLoader { private static final Logger logger = LoggerFactory.getLogger(AsyncLoader.class); @@ -89,7 +108,16 @@ private static > void referenceAndAssemble AccordStateCache.Instance cache, List> listenChains) { - S safeRef = cache.acquire(key); + referenceAndAssembleReadsForKey(key, context, cache, listenChains, null); + } + + private static > void referenceAndAssembleReadsForKey(K key, + Map context, + AccordStateCache.Instance cache, + List> listenChains, + @Nullable ExecutorPlus loadExecutor) + { + S safeRef = cache.acquire(key, loadExecutor); if (context.putIfAbsent(key, safeRef) != null) { noSpamLogger.warn("Context {} contained key {} more than once", context, key); @@ -120,16 +148,24 @@ private static > void referenceAndAssemble private void referenceAndAssembleReadsForKey(RoutingKey key, AsyncOperation.Context context, List> listenChains) + { + referenceAndAssembleReadsForKey(key, context, listenChains, null); + } + + private void referenceAndAssembleReadsForKey(RoutingKey key, + AsyncOperation.Context context, + List> listenChains, + @Nullable ExecutorPlus loadExecutor) { // recovery operations also need the deps data for their preaccept logic switch (keyHistory) { case TIMESTAMPS: - referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); + referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains, loadExecutor); break; case COMMANDS: case RECOVERY: - referenceAndAssembleReadsForKey(key, context.commandsForKey, commandStore.commandsForKeyCache(), listenChains); + referenceAndAssembleReadsForKey(key, context.commandsForKey, commandStore.commandsForKeyCache(), listenChains, loadExecutor); case NONE: break; default: throw new IllegalArgumentException("Unhandled keyhistory: " + keyHistory); @@ -168,11 +204,15 @@ private AsyncResult referenceAndDispatchReads(@Nullable TxnId primaryTxnId, A private AsyncChain referenceAndDispatchReadsForRange(@Nullable TxnId primaryTxnId, AsyncOperation.Context context) { + if (keyHistory == KeyHistory.NONE) + return AsyncChains.success(null); + Ranges ranges = ((AbstractRanges) keysOrRanges).toRanges(); List> root = new ArrayList<>(ranges.size() + 1); class Watcher implements AccordStateCache.Listener { + // TODO (required): streams prohibited in hot path private final Set cached = commandStore.commandsForKeyCache().stream() .map(n -> (TokenKey) n.key()) .filter(ranges::contains) @@ -196,7 +236,7 @@ public void onAdd(AccordCachingState state) return AsyncChains.success(null); Set set = ImmutableSet.builder().addAll(watcher.cached).addAll(keys).build(); List> chains = new ArrayList<>(); - set.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); + set.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains, Stage.ACCORD_RANGE_LOADER.executor())); return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); }, commandStore)); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 463350adf8d2..51dbb0a6c22d 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -37,6 +37,7 @@ import accord.primitives.Unseekables; import accord.utils.Invariants; import accord.utils.async.AsyncChains; +import accord.utils.async.Cancellable; import org.agrona.collections.Object2ObjectHashMap; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.service.accord.AccordCommandStore; @@ -57,7 +58,7 @@ import static org.apache.cassandra.service.accord.async.AsyncOperation.State.PREPARING; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.RUNNING; -public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function +public abstract class AsyncOperation extends AsyncChains.Head implements Runnable, Function, Cancellable { private static final Logger logger = LoggerFactory.getLogger(AsyncOperation.class); @@ -360,12 +361,18 @@ private boolean preRun() } @Override - public void start(BiConsumer callback) + public Cancellable start(BiConsumer callback) { Invariants.checkState(this.callback == null); this.callback = callback; if (!commandStore.inStore() || preRun()) commandStore.executor().execute(this); + return this; + } + + @Override + public void cancel() + { } static class ForFunction extends AsyncOperation diff --git a/src/java/org/apache/cassandra/utils/NoSpamLogger.java b/src/java/org/apache/cassandra/utils/NoSpamLogger.java index e16caa930f85..94c5e7d103d8 100644 --- a/src/java/org/apache/cassandra/utils/NoSpamLogger.java +++ b/src/java/org/apache/cassandra/utils/NoSpamLogger.java @@ -46,7 +46,7 @@ public class NoSpamLogger */ public enum Level { - INFO, WARN, ERROR + DEBUG, INFO, WARN, ERROR } @VisibleForTesting @@ -99,6 +99,9 @@ private boolean logNoCheck(Level l, Object... objects) { switch (l) { + case DEBUG: + wrapped.debug(statement, objects); + break; case INFO: wrapped.info(statement, objects); break; @@ -114,6 +117,16 @@ private boolean logNoCheck(Level l, Object... objects) return true; } + public boolean debug(long nowNanos, Object... objects) + { + return NoSpamLogStatement.this.log(Level.DEBUG, nowNanos, objects); + } + + public boolean debug(Object... objects) + { + return NoSpamLogStatement.this.debug(CLOCK.nanoTime(), objects); + } + public boolean info(long nowNanos, Object... objects) { return NoSpamLogStatement.this.log(Level.INFO, nowNanos, objects); diff --git a/src/java/org/apache/cassandra/utils/memory/BufferPool.java b/src/java/org/apache/cassandra/utils/memory/BufferPool.java index e46b0e4d692a..90c95228f9fe 100644 --- a/src/java/org/apache/cassandra/utils/memory/BufferPool.java +++ b/src/java/org/apache/cassandra/utils/memory/BufferPool.java @@ -130,6 +130,7 @@ public class BufferPool public static final int TINY_CHUNK_SIZE = NORMAL_ALLOCATION_UNIT; public static final int TINY_ALLOCATION_UNIT = TINY_CHUNK_SIZE / 64; public static final int TINY_ALLOCATION_LIMIT = TINY_CHUNK_SIZE / 2; + private static final boolean REF_TRACE_ENABLED = Ref.TRACE_ENABLED; private static final Logger logger = LoggerFactory.getLogger(BufferPool.class); private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(logger, 15L, TimeUnit.MINUTES); @@ -1330,7 +1331,7 @@ static Chunk getParentChunk(ByteBuffer buffer) void setAttachment(ByteBuffer buffer) { - if (Ref.TRACE_ENABLED) + if (REF_TRACE_ENABLED) MemoryUtil.setAttachment(buffer, new DirectBufferRef<>(this, null)); else MemoryUtil.setAttachment(buffer, this); @@ -1342,7 +1343,7 @@ boolean releaseAttachment(ByteBuffer buffer) if (attachment == null) return false; - if (Ref.TRACE_ENABLED) + if (REF_TRACE_ENABLED) ((DirectBufferRef) attachment).release(); return true; diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 4c249a1a280d..2dd741f5b23e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -66,7 +66,8 @@ public static void setUp() throws IOException CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); // AccordTestBase.setupCluster(builder -> builder, 3); AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config - .set("accord.schedule_durability_frequency", "5s") + .set("accord.shard_durability_target_splits", "64") + .set("accord.shard_durability_cycle", "5m") .set("accord.ephemeral_read_enabled", "true") .set("accord.gc_delay", "5s")), 3); } @@ -105,7 +106,7 @@ public boolean matches(int i, int i1, IMessage iMessage) final long batchTime = TimeUnit.SECONDS.toNanos(10); final int concurrency = 100; final int ratePerSecond = 1000; - final int keyCount = 1000000; + final int keyCount = 1_000_000; final float readChance = 0.33f; long nextRepairAt = repairInterval; long nextCompactionAt = compactionInterval; diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index 3367c7261106..578c6a84b9c7 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -34,6 +34,7 @@ import accord.local.RedundantBefore; import accord.primitives.Deps; import accord.primitives.KeyDeps; +import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -52,6 +53,7 @@ import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.Pair; import static accord.local.CommandStores.RangesForEpoch; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; @@ -98,6 +100,7 @@ public void segmentMergeTest() throws InterruptedException Gen durableBeforeGen = AccordGenerators.durableBeforeGen(DatabaseDescriptor.getPartitioner()); Gen> safeToReadGen = AccordGenerators.safeToReadGen(DatabaseDescriptor.getPartitioner()); Gen rangesForEpochGen = AccordGenerators.rangesForEpoch(DatabaseDescriptor.getPartitioner()); + Gen rangeGen = AccordGenerators.range(DatabaseDescriptor.getPartitioner()); Gen historicalTransactionsGen = depsGen(); AccordJournal journal = new AccordJournal(new TestParams() @@ -134,7 +137,7 @@ public boolean enableCompaction() // updates.newRedundantBefore = redundantBefore = RedundantBefore.merge(redundantBefore, updates.addRedundantBefore); updates.newSafeToRead = safeToReadGen.next(rs); updates.newRangesForEpoch = rangesForEpochGen.next(rs); - updates.addHistoricalTransactions = historicalTransactionsGen.next(rs); + updates.addHistoricalTransactions = new AccordSafeCommandStore.HistoricalTransactions(0l, rangeGen.next(rs), historicalTransactionsGen.next(rs)); journal.durableBeforePersister().persist(addDurableBefore, null); journal.persistStoreState(1, updates, null); @@ -147,7 +150,7 @@ public boolean enableCompaction() safeToReadAtAccumulator = updates.newSafeToRead; if (updates.newRangesForEpoch != null) rangesForEpochAccumulator = updates.newRangesForEpoch; - historicalTransactionsAccumulator.update(updates.addHistoricalTransactions); + historicalTransactionsAccumulator.update(Pair.create(updates.addHistoricalTransactions.range, updates.addHistoricalTransactions.deps)); if (i % 100 == 0) journal.closeCurrentSegmentForTestingIfNonEmpty(); @@ -160,9 +163,9 @@ public boolean enableCompaction() Assert.assertEquals(bootstrapBeganAtAccumulator, journal.loadBootstrapBeganAt(1)); Assert.assertEquals(safeToReadAtAccumulator, journal.loadSafeToRead(1)); Assert.assertEquals(rangesForEpochAccumulator, journal.loadRangesForEpoch(1)); - List historical = historicalTransactionsAccumulator.get(); + List> historical = historicalTransactionsAccumulator.get(); Collections.reverse(historical); - Assert.assertEquals(historical, journal.loadHistoricalTransactions(1)); + Assert.assertEquals(historical, journal.loadHistoricalTransactions(0l, 1)); } finally { diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 0bdc561818c3..6a4546f43500 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -507,13 +507,15 @@ public void testBackwardCompatibilityOfAuthenticatorPropertyAsString() throws IO public void testAccordConfig() { Map accordSpec = ImmutableMap.of("fast_path_update_delay", "60s", - "schedule_durability_frequency", "60s", + "default_durability_retry_delay", "60s", + "max_durability_retry_delay", "60s", "durability_txnid_lag", "60s", "shard_durability_cycle", "60s", "global_durability_cycle", "60s"); AccordSpec spec = from("accord", accordSpec).accord; assertThat(spec.fast_path_update_delay.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); - assertThat(spec.schedule_durability_frequency.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.default_durability_retry_delay.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.max_durability_retry_delay.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); assertThat(spec.durability_txnid_lag.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); assertThat(spec.shard_durability_cycle.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); assertThat(spec.global_durability_cycle.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); diff --git a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java index 8344c09dc62f..b252d13f760a 100644 --- a/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java +++ b/test/unit/org/apache/cassandra/service/accord/EpochSyncTest.java @@ -36,6 +36,7 @@ import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.Callable; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Predicate; @@ -65,6 +66,7 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; +import accord.utils.async.Cancellable; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; import org.apache.cassandra.concurrent.SimulatedExecutorFactory; import org.apache.cassandra.concurrent.Stage; @@ -629,9 +631,9 @@ private AsyncChain schedule(long time, TimeUnit unit, Callable task) return new AsyncChains.Head<>() { @Override - protected void start(BiConsumer callback) + protected Cancellable start(BiConsumer callback) { - scheduler.schedule(() -> { + Future future = scheduler.schedule(() -> { T value; try { @@ -644,6 +646,7 @@ protected void start(BiConsumer callback) } callback.accept(value, null); }, time, unit); + return () -> future.cancel(true); } }; } @@ -672,7 +675,7 @@ private class Instance config.registerListener(new ConfigurationService.Listener() { @Override - public AsyncResult onTopologyUpdate(Topology topology, boolean startSync) + public AsyncResult onTopologyUpdate(Topology topology, boolean isLoad, boolean startSync) { // EpochReady ready = EpochReady.done(topology.epoch()); AsyncResult metadata = schedule(rs.nextInt(1, 10), TimeUnit.SECONDS, (Callable) () -> null).beginAsResult(); diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index 64caa97216d3..ef0e264def85 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -36,6 +36,7 @@ import accord.local.RedundantBefore; import accord.local.StoreParticipants; import accord.primitives.Known; +import accord.primitives.Range; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Ballot; @@ -54,7 +55,10 @@ import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; +import org.apache.cassandra.service.accord.SavedCommand.Load; +import org.apache.cassandra.service.accord.SavedCommand.MinimalCommand; import org.apache.cassandra.service.accord.serializers.CommandSerializers; +import org.apache.cassandra.utils.Pair; public class MockJournal implements IJournal { @@ -81,6 +85,17 @@ public Command loadCommand(int store, TxnId txnId, RedundantBefore redundantBefo return reconstructFromDiff(new ArrayList<>(saved)); } + @Override + public MinimalCommand loadMinimal(int store, TxnId txnId, Load load, RedundantBefore redundantBefore, DurableBefore durableBefore) + { + JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, store); + List saved = commands.get(key); + if (saved == null) + return null; + Command command = reconstructFromDiff(new ArrayList<>(saved)); + return new MinimalCommand(command.txnId(), command.saveStatus(), command.participants(), command.durability(), command.executeAt(), command.writes()); + } + @Override public RedundantBefore loadRedundantBefore(int store) { @@ -126,7 +141,7 @@ public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) } @Override - public List loadHistoricalTransactions(int store) + public List> loadHistoricalTransactions(long epoch, int store) { return fieldUpdates(store).historicalTransactionsAccumulator.get(); } @@ -163,7 +178,7 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie if (fieldUpdates.newRangesForEpoch != null) updates.rangesForEpochAccumulator.update(fieldUpdates.newRangesForEpoch); if (fieldUpdates.addHistoricalTransactions != null) - updates.historicalTransactionsAccumulator.update(fieldUpdates.addHistoricalTransactions); + updates.historicalTransactionsAccumulator.update(Pair.create(fieldUpdates.addHistoricalTransactions.range, fieldUpdates.addHistoricalTransactions.deps)); onFlush.run(); } diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java index 99963c3af013..3cfc1c91c590 100644 --- a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -40,12 +40,14 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.SavedCommand.Fields; +import org.apache.cassandra.service.accord.SavedCommand.Load; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.AccordGenerators; import org.assertj.core.api.SoftAssertions; import static accord.utils.Property.qt; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; +import static org.apache.cassandra.service.accord.SavedCommand.Load.ALL; import static org.apache.cassandra.service.accord.SavedCommand.getFlags; public class SavedCommandTest @@ -96,7 +98,7 @@ public void serde() out.clear(); Command orig = cmdBuilder.build(saveStatus); SavedCommand.serialize(null, orig, out, userVersion); - SavedCommand.Builder builder = new SavedCommand.Builder(orig.txnId()); + SavedCommand.Builder builder = new SavedCommand.Builder(orig.txnId(), Load.ALL); builder.deserializeNext(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), userVersion); // We are not persisting the result, so force it for strict equality builder.forceResult(orig.result()); From 042a6e97694534743ea8a3040e3bbe1b2a9867a8 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Sat, 5 Oct 2024 11:37:36 +0200 Subject: [PATCH 175/340] Add bounce to load test --- .../service/accord/AccordCommandStores.java | 22 ++++++++++++++----- .../service/accord/AccordJournal.java | 2 +- .../service/accord/AccordService.java | 12 +++++----- .../apache/cassandra/utils/ExecutorUtils.java | 11 +++++++--- .../test/accord/AccordLoadTest.java | 2 ++ 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index af0bebdb4eb2..d51223243d14 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -36,6 +36,7 @@ import accord.topology.Topology; import accord.utils.RandomSource; import org.apache.cassandra.cache.CacheSize; +import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.AccordStateCacheMetrics; @@ -46,6 +47,10 @@ import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.Stage.ACCORD_MIGRATION; +import static org.apache.cassandra.concurrent.Stage.ACCORD_RANGE_LOADER; +import static org.apache.cassandra.concurrent.Stage.MUTATION; +import static org.apache.cassandra.concurrent.Stage.READ; public class AccordCommandStores extends CommandStores implements CacheSize { @@ -146,17 +151,24 @@ public void waitForQuiescense() boolean hadPending; try { + List executors = new ArrayList<>(); + for (CommandStoreExecutor executor : this.executors) + executors.add(executor.delegate); + + executors.add(READ.executor()); + executors.add(MUTATION.executor()); + executors.add(ACCORD_MIGRATION.executor()); + executors.add(ACCORD_RANGE_LOADER.executor()); + do { hadPending = false; List> futures = new ArrayList<>(); - for (CommandStoreExecutor executor : executors) + for (ExecutorPlus executor : executors) { - if (executor.hasTasks()) - { - futures.add(executor.submit(() -> {})); + if (!hadPending && (executor.getPendingTaskCount() > 0 || executor.getActiveTaskCount() > 0)) hadPending = true; - } + futures.add(executor.submit(() -> {})); } for (Future future : futures) future.get(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 330c78e32cf3..a575da3f8bf0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -419,7 +419,7 @@ public void replay() AccordCommandStore.Loader loader = commandStore.loader(); loader.load(command).get(); if (command.saveStatus().compareTo(SaveStatus.Stable) >= 0 && !command.hasBeen(Truncated)) - loader.apply(command); + loader.apply(command).get(); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index dcb6fdf70d65..4fe985105980 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -186,7 +186,7 @@ public class AccordService implements IAccordService, Shutdownable { private static final Logger logger = LoggerFactory.getLogger(AccordService.class); - private enum State {INIT, STARTED, SHUTDOWN} + private enum State {INIT, STARTED, SHUTTING_DOWN, SHUTDOWN} private static final Future BOOTSTRAP_SUCCESS = ImmediateFuture.success(null); @@ -399,6 +399,7 @@ public boolean shouldAcceptMessages() { return state == State.STARTED && journal.started(); } + public static IAccordService instance() { if (!DatabaseDescriptor.getAccordTransactionsEnabled()) @@ -987,7 +988,8 @@ public synchronized void shutdown() { if (state != State.STARTED) return; - ExecutorUtils.shutdownSequentiallyAndWait(shutdownableSubsystems(), 1, TimeUnit.MINUTES); + state = State.SHUTTING_DOWN; + shutdownAndWait(1, TimeUnit.MINUTES); state = State.SHUTDOWN; } @@ -1019,10 +1021,10 @@ private List shutdownableSubsystems() @VisibleForTesting @Override - public void shutdownAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException + public void shutdownAndWait(long timeout, TimeUnit unit) { - shutdown(); - ExecutorUtils.shutdownAndWait(timeout, unit, this); + if (!ExecutorUtils.shutdownSequentiallyAndWait(shutdownableSubsystems(), timeout, unit)) + logger.error("One or more subsystems did not shut down cleanly."); } @Override diff --git a/src/java/org/apache/cassandra/utils/ExecutorUtils.java b/src/java/org/apache/cassandra/utils/ExecutorUtils.java index 83fc72530a9c..b37c8ac9627c 100644 --- a/src/java/org/apache/cassandra/utils/ExecutorUtils.java +++ b/src/java/org/apache/cassandra/utils/ExecutorUtils.java @@ -79,10 +79,11 @@ else if (executor != null) } } - public static void shutdownSequentiallyAndWait(Iterable executors, long timeout, TimeUnit unit) + public static boolean shutdownSequentiallyAndWait(Iterable executors, long timeout, TimeUnit unit) { long deadline = nanoTime() + unit.toNanos(timeout); + boolean shutdown = true; for (Object executor : executors) { try @@ -90,12 +91,14 @@ public static void shutdownSequentiallyAndWait(Iterable executors, long timeo if (executor instanceof ExecutorService) { ((ExecutorService) executor).shutdown(); - ((ExecutorService) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS); + if (!((ExecutorService) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS)) + shutdown = false; } else if (executor instanceof Shutdownable) { ((Shutdownable) executor).shutdown(); - ((Shutdownable) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS); + if (!((Shutdownable) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS)) + shutdown = false; } else throw new IllegalArgumentException(executor.toString()); @@ -105,6 +108,8 @@ else if (executor instanceof Shutdownable) throw new IllegalStateException("Caught interrupt while shutting down " + executor); } } + + return shutdown; } public static void shutdown(ExecutorService ... executors) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 2dd741f5b23e..8f2dabe624e1 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -43,6 +43,7 @@ import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IMessage; import org.apache.cassandra.distributed.api.IMessageFilters; @@ -66,6 +67,7 @@ public static void setUp() throws IOException CassandraRelevantProperties.SIMULATOR_STARTED.setString(Long.toString(MILLISECONDS.toSeconds(currentTimeMillis()))); // AccordTestBase.setupCluster(builder -> builder, 3); AccordTestBase.setupCluster(builder -> builder.withConfig(config -> config + .with(Feature.NETWORK, Feature.GOSSIP) .set("accord.shard_durability_target_splits", "64") .set("accord.shard_durability_cycle", "5m") .set("accord.ephemeral_read_enabled", "true") From a51bed1c775d8dfe42a5a689d3f984a3985f21ed Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Tue, 24 Sep 2024 16:03:34 -0400 Subject: [PATCH 176/340] Accord should not block partition restricted index queries Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-19955 Non-serial single partition reads on Accord Patch by Ariel Weisberg; Reviewed by Benedict Elliott Smith for CASSANDRA-19951 --- .../cql3/statements/CQL3CasRequest.java | 14 +- .../cql3/statements/TransactionStatement.java | 21 +- .../schema/AlterTableStatement.java | 15 +- .../db/SinglePartitionReadCommand.java | 26 +++ .../apache/cassandra/db/filter/RowFilter.java | 7 + .../cassandra/schema/TableMetadata.java | 4 + .../apache/cassandra/service/CASRequest.java | 3 +- .../cassandra/service/StorageProxy.java | 127 +++++++++--- .../accord/AccordConfigurationService.java | 1 - .../service/accord/IAccordService.java | 11 +- .../cassandra/service/accord/txn/TxnData.java | 11 + .../service/accord/txn/TxnDataName.java | 5 + .../service/accord/txn/TxnNamedRead.java | 9 +- .../cassandra/service/accord/txn/TxnRead.java | 13 +- .../service/consensus/TransactionalMode.java | 54 +++-- .../ConsensusMigrationMutationHelper.java | 3 + .../migration/ConsensusMigrationState.java | 23 ++- .../TransactionalMigrationFromMode.java | 5 + .../tcm/transformations/AlterSchema.java | 16 +- .../test/ShortReadProtectionTest.java | 49 ++--- .../test/accord/AccordCQLTestBase.java | 189 +++++++++++++++++- .../accord/AccordInteroperabilityTest.java | 76 ++++++- .../test/accord/AccordTestBase.java | 1 + .../fuzz/sai/AccordMultiNodeSAITest.java | 35 ++++ .../fuzz/sai/AccordSingleNodeSAITest.java | 27 +++ .../cassandra/fuzz/sai/MultiNodeSAITest.java | 2 +- .../fuzz/sai/MultiNodeSAITestBase.java | 28 ++- .../cassandra/fuzz/sai/SingleNodeSAITest.java | 2 +- .../fuzz/sai/SingleNodeSAITestBase.java | 7 +- .../config/DatabaseDescriptorRefTest.java | 1 + .../statements/TransactionStatementTest.java | 77 +------ .../validation/entities/TupleTypeTest.java | 2 +- 32 files changed, 667 insertions(+), 197 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/fuzz/sai/AccordMultiNodeSAITest.java create mode 100644 test/distributed/org/apache/cassandra/fuzz/sai/AccordSingleNodeSAITest.java diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index 41967b619b86..5dfdce4969ff 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -17,9 +17,6 @@ */ package org.apache.cassandra.cql3.statements; -import org.apache.cassandra.db.marshal.TimeUUIDType; -import org.apache.cassandra.index.IndexRegistry; -import org.apache.cassandra.schema.TableMetadata; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -52,11 +49,15 @@ import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.marshal.TimeUUIDType; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.Partition; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.index.IndexRegistry; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.CASRequest; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.txn.TxnCondition; @@ -69,6 +70,7 @@ import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.service.paxos.Ballot; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.TimeUUID; @@ -78,6 +80,7 @@ import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult.casResult; import static org.apache.cassandra.service.accord.txn.TxnDataName.Kind.CAS_READ; import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.getTableMetadata; /** @@ -495,13 +498,14 @@ public String toString() } @Override - public Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs) + public Txn toAccordTxn(ClusterMetadata cm, ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs) { SinglePartitionReadCommand readCommand = readCommand(nowInSecs); Update update = createUpdate(clientState, commitConsistencyLevel); // If the write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely - consistencyLevel = metadata.params.transactionalMode.readCLForStrategy(consistencyLevel); + TableParams tableParams = getTableMetadata(cm, metadata.id).params; + consistencyLevel = tableParams.transactionalMode.readCLForStrategy(tableParams.transactionalMigrationFrom, consistencyLevel, cm, metadata.id, readCommand.partitionKey().getToken()); TxnRead read = TxnRead.createCasRead(readCommand, consistencyLevel); // In a CAS requesting only one key is supported and writes // can't be dependent on any data that is read (only conditions) diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 1320325c3858..eee992ef7502 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -62,6 +62,7 @@ import org.apache.cassandra.db.SinglePartitionReadQuery; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.FilteredPartition; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.ClientState; @@ -79,8 +80,8 @@ import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.accord.txn.TxnUpdate; import org.apache.cassandra.service.accord.txn.TxnWrite; -import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.messages.ResultMessage; import org.apache.cassandra.utils.FBUtilities; @@ -98,6 +99,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, private static final Logger logger = LoggerFactory.getLogger(TransactionStatement.class); public static final String DUPLICATE_TUPLE_NAME_MESSAGE = "The name '%s' has already been used by a LET assignment."; + public static final String INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE = "SELECT must specify either all partition key elements. Partition key elements must be always specified with equality operators; %s %s"; public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; @@ -108,6 +110,7 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord.enabled in cassandra.yaml)"; public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction; %s %s"; + public static final String UNSUPPORTED_MIGRATION = "Transaction Statement is unsupported when migrating away from Accord or before migration to Accord is complete for a range"; static class NamedSelect { @@ -384,14 +387,11 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. for (NamedSelect assignment : assignments) checkFalse(isSelectingMultipleClusterings(assignment.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment", assignment.select.source); - if (returningSelect != null) - checkFalse(isSelectingMultipleClusterings(returningSelect.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning SELECT", returningSelect.select.source); - Txn txn = createTxn(state.getClientState(), options); TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); if (txnResult.kind() == retry_new_protocol) - throw new IllegalStateException("Transaction statement should never be required to switch consensus protocols"); + throw new InvalidRequestException(UNSUPPORTED_MIGRATION); TxnData data = (TxnData)txnResult; if (returningSelect != null) @@ -561,7 +561,7 @@ public CQLStatement prepare(ClientState state) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); returningSelect = new NamedSelect(TxnDataName.returning(), prepared); - checkAtMostOneRowSpecified(returningSelect.select, "returning select"); + checkAtMostOnePartitionSpecified(returningSelect.select, "returning select"); } List returningReferences = null; @@ -601,6 +601,15 @@ public CQLStatement prepare(ClientState state) return new TransactionStatement(preparedAssignments, returningSelect, returningReferences, preparedUpdates, preparedConditions, bindVariables); } + /** + * Do not use this method in execution!!! It is only allowed during prepare because it outputs a query raw text. + * We don't want it print it for a user who provided an identifier of someone's else prepared statement. + */ + private static void checkAtMostOnePartitionSpecified(SelectStatement select, String name) + { + checkTrue(select.getRestrictions().hasPartitionKeyRestrictions(), INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE, name, select.source); + } + /** * Do not use this method in execution!!! It is only allowed during prepare because it outputs a query raw text. * We don't want it print it for a user who provided an identifier of someone's else prepared statement. diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index f8a24b072444..2475e1dcaac5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -67,6 +67,7 @@ import org.apache.cassandra.schema.ViewMetadata; import org.apache.cassandra.schema.Views; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; import org.apache.cassandra.tcm.ClusterMetadata; @@ -88,6 +89,9 @@ public abstract class AlterTableStatement extends AlterSchemaStatement { private static final Logger logger = LoggerFactory.getLogger(AlterTableStatement.class); + public static final String ACCORD_COUNTER_TABLES_UNSUPPORTED = "Counters are not supported with Accord for table %s.%s"; + public static final String ACCORD_COUNTER_COLUMN_UNSUPPORTED = "Cannot add a counter column to Accord table %s.%s with transactional mode %s and transactional migration from %s"; + protected final String tableName; private final boolean ifExists; protected ClientState state; @@ -330,6 +334,9 @@ private void addColumn(KeyspaceMetadata keyspace, return; } + if (type.isCounter() && (table.params.transactionalMode.accordIsEnabled || table.params.transactionalMigrationFrom.migratingFromAccord())) + throw ire(format(ACCORD_COUNTER_COLUMN_UNSUPPORTED, keyspaceName, tableName, table.params.transactionalMode, table.params.transactionalMigrationFrom)); + if (table.isCompactTable()) throw ire("Cannot add new column to a COMPACT STORAGE table"); @@ -591,7 +598,7 @@ public void validate(ClientState state) validateDefaultTimeToLive(attrs.asNewTableParams()); } - private TableParams validateAndUpdateTransactionalMigration(TableParams prev, TableParams next) + private TableParams validateAndUpdateTransactionalMigration(boolean isCounter, TableParams prev, TableParams next) { if (next.transactionalMode.accordIsEnabled && SchemaConstants.isSystemKeyspace(keyspaceName)) throw ire("Cannot enable accord on system tables (%s.%s)", keyspaceName, tableName); @@ -601,6 +608,10 @@ private TableParams validateAndUpdateTransactionalMigration(TableParams prev, Ta boolean explicitlySetMigrationFrom = attrs.hasOption(Option.TRANSACTIONAL_MIGRATION_FROM); // set table to migrating TransactionalMigrationFromMode newMigrateFrom = TransactionalMigrationFromMode.fromMode(prev.transactionalMode, next.transactionalMode); + + if (isCounter && (next.transactionalMode != TransactionalMode.off || newMigrateFrom != TransactionalMigrationFromMode.none || next.transactionalMigrationFrom != TransactionalMigrationFromMode.none)) + throw ire(format(ACCORD_COUNTER_TABLES_UNSUPPORTED, keyspaceName, tableName)); + boolean forceMigrationChange = modeChange && explicitlySetMigrationFrom && next.transactionalMigrationFrom != newMigrateFrom; if (modeChange && next.transactionalMode.accordIsEnabled && !DatabaseDescriptor.getAccordTransactionsEnabled()) @@ -655,7 +666,7 @@ public KeyspaceMetadata apply(Epoch epoch, KeyspaceMetadata keyspace, TableMetad if (!params.compression.isEnabled()) Guardrails.uncompressedTablesEnabled.ensureEnabled(state); - params = validateAndUpdateTransactionalMigration(table.params, params); + params = validateAndUpdateTransactionalMigration(table.isCounter(), table.params, params); return keyspace.withSwapped(keyspace.tables.withSwapped(table.withSwapped(params))); } diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index 921d90a47b9e..cdff02aa4f81 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -1273,6 +1273,32 @@ public boolean isRangeRequest() return false; } + /* + * The execution method does not need to perform reconciliation so the read command + * should execute in a mannager suited to not needing reconciliation. Such as when + * executing transactionally at a single replica and doing an index scan where the index + * scan should not return extra rows and expect post filtering at the coordinator. + */ + public SinglePartitionReadCommand withoutReconciliation() + { + if (rowFilter().isEmpty()) + return this; + return create(serializedAtEpoch(), + isDigestQuery(), + digestVersion(), + acceptsTransient(), + allowsOutOfRangeReads(), + metadata(), + nowInSec(), + columnFilter(), + rowFilter().withoutReconciliation(), + limits(), + partitionKey(), + clusteringIndexFilter(), + indexQueryPlan(), + isTrackingWarnings()); + } + /** * Groups multiple single partition read commands. */ diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index 037f077a9127..1303a33feae6 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -396,6 +396,13 @@ public RowFilter without(ColumnMetadata column, Operator op, ByteBuffer value) return withNewExpressions(newExpressions); } + public RowFilter withoutReconciliation() + { + if (needsReconciliation) + return new RowFilter(expressions, false); + return this; + } + public boolean hasNonKeyExpression() { for (Expression e : expressions) diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 6d663c94151f..c6a6f687f299 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -72,6 +72,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.reads.SpeculativeRetryPolicy; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.serialization.UDTAndFunctionsAwareMetadataSerializer; @@ -81,6 +82,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.github.jamm.Unmetered; +import static accord.utils.Invariants.checkState; import static com.google.common.collect.Iterables.any; import static com.google.common.collect.Iterables.transform; import static java.lang.String.format; @@ -616,6 +618,8 @@ public void validate() throw new InvalidRequestException(e.getMessage(), e); } } + + checkState((params.transactionalMode == TransactionalMode.off && params.transactionalMigrationFrom == TransactionalMigrationFromMode.none) || !isCounter(), "Counters are not supported with Accord for table " + this); } /** diff --git a/src/java/org/apache/cassandra/service/CASRequest.java b/src/java/org/apache/cassandra/service/CASRequest.java index fb78daa2a597..9a0592f4cd2e 100644 --- a/src/java/org/apache/cassandra/service/CASRequest.java +++ b/src/java/org/apache/cassandra/service/CASRequest.java @@ -25,6 +25,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.paxos.Ballot; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.Dispatcher; import static org.apache.cassandra.service.StorageProxy.ConsensusAttemptResult; @@ -53,7 +54,7 @@ public interface CASRequest */ PartitionUpdate makeUpdates(FilteredPartition current, ClientState clientState, Ballot ballot) throws InvalidRequestException; - Txn toAccordTxn(ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs); + Txn toAccordTxn(ClusterMetadata cm, ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs); ConsensusAttemptResult toCasResult(TxnResult txnResult); } diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index 240c58f8950c..cfbbd96e63d0 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -37,6 +37,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; +import java.util.function.IntPredicate; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -49,7 +50,6 @@ import org.slf4j.LoggerFactory; import accord.primitives.Txn; -import accord.utils.Invariants; import org.apache.cassandra.batchlog.Batch; import org.apache.cassandra.batchlog.BatchlogManager; import org.apache.cassandra.concurrent.DebuggableTask.RunnableDebuggableTask; @@ -128,10 +128,12 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.IAccordService.AsyncTxnResult; import org.apache.cassandra.service.accord.txn.TxnData; +import org.apache.cassandra.service.accord.txn.TxnDataName; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnResult; @@ -139,6 +141,7 @@ import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitConsumer; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.SplitMutations; import org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.service.paxos.Ballot; import org.apache.cassandra.service.paxos.Commit; import org.apache.cassandra.service.paxos.ContentionStrategy; @@ -172,6 +175,7 @@ import static accord.primitives.Txn.Kind.EphemeralRead; import static accord.primitives.Txn.Kind.Read; import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.Iterables.concat; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; @@ -200,6 +204,7 @@ import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.mutateWithAccordAsync; import static org.apache.cassandra.service.consensus.migration.ConsensusMigrationMutationHelper.splitMutationsIntoAccordAndNormal; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.ConsensusRoutingDecision; +import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.getTableMetadata; import static org.apache.cassandra.service.paxos.Ballot.Flag.GLOBAL; import static org.apache.cassandra.service.paxos.Ballot.Flag.LOCAL; import static org.apache.cassandra.service.paxos.BallotGenerator.Global.nextBallot; @@ -347,7 +352,6 @@ public static RowIterator cas(String keyspaceName, Dispatcher.RequestTime requestTime) throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException, CasWriteUnknownResultException { - TableMetadata metadata = Schema.instance.validateTable(keyspaceName, cfName); if (DatabaseDescriptor.getPartitionDenylistEnabled() && DatabaseDescriptor.getDenylistWritesEnabled() && !partitionDenylist.isKeyPermitted(keyspaceName, cfName, key.getKey())) { denylistMetrics.incrementWritesRejected(); @@ -358,6 +362,8 @@ public static RowIterator cas(String keyspaceName, ConsensusAttemptResult lastAttemptResult; do { + ClusterMetadata cm = ClusterMetadata.current(); + TableMetadata metadata = Schema.instance.validateTable(keyspaceName, cfName); ConsensusRoutingDecision decision = consensusRouting(metadata, key, consistencyForPaxos, requestTime, true); switch (decision) { @@ -380,10 +386,11 @@ public static RowIterator cas(String keyspaceName, requestTime); break; case accord: - Txn txn = request.toAccordTxn(consistencyForPaxos, - consistencyForCommit, - clientState, - nowInSeconds); + Txn txn = request.toAccordTxn(cm, + consistencyForPaxos, + consistencyForCommit, + clientState, + nowInSeconds); IAccordService accordService = AccordService.instance(); TxnResult txnResult = accordService.coordinate(txn, consistencyForPaxos, @@ -2138,6 +2145,7 @@ private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Gr ConsensusAttemptResult lastResult; do { + ClusterMetadata cm = ClusterMetadata.current(); SinglePartitionReadCommand command = group.queries.get(0); ConsensusRoutingDecision decision = consensusRouting(group.metadata(), command.partitionKey(), consistencyLevel, requestTime, false); switch (decision) @@ -2149,7 +2157,7 @@ private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Gr lastResult = legacyReadWithPaxos(group, consistencyLevel, requestTime); break; case accord: - lastResult = readWithAccord(group, consistencyLevel, requestTime); + lastResult = readWithAccord(cm, group, consistencyLevel, requestTime); break; default: throw new IllegalStateException("Unsupported consensus " + decision); @@ -2158,28 +2166,79 @@ private static PartitionIterator readWithConsensus(SinglePartitionReadCommand.Gr return lastResult.serialReadResult; } - private static ConsensusAttemptResult readWithAccord(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + private static ConsistencyLevel consistencyLevelForAccordRead(ClusterMetadata cm, SinglePartitionReadCommand.Group group, @Nullable ConsistencyLevel consistencyLevel) { - if (group.queries.size() > 1) - throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time"); - SinglePartitionReadCommand readCommand = group.queries.get(0); + // Null means no specific consistency behavior is required from Accord, it's functionally similar to + // reading at ONE if you are reading data that wasn't written via Accord + if (consistencyLevel == null) + return null; + + TableId tableId = group.queries.get(0).metadata().id; + TableParams tableParams = getTableMetadata(cm, tableId).params; + TransactionalMode mode = tableParams.transactionalMode; + TransactionalMigrationFromMode migrationFromMode = tableParams.transactionalMigrationFrom; + for (SinglePartitionReadCommand command : group.queries) + { + // readCLForStrategy should return either null or the supplied consistency level + // in which case we will read everything at that CL since Accord doesn't support per table + // read consistency + ConsistencyLevel commitCL = mode.readCLForStrategy(migrationFromMode, consistencyLevel, cm, tableId, command.partitionKey().getToken()); + if (commitCL != null) + return commitCL; + } + return null; + } + + private static ConsensusAttemptResult readWithAccord(ClusterMetadata cm, SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) + { + if (consistencyLevel != null && !IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new InvalidRequestException(consistencyLevel + " is not supported by Accord"); // If the non-SERIAL write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely TransactionalMode transactionalMode = group.metadata().params.transactionalMode; - consistencyLevel = transactionalMode.readCLForStrategy(consistencyLevel); - TxnRead read = TxnRead.createSerialRead(readCommand, consistencyLevel); - Invariants.checkState(read.keys().size() == 1, "Ephemeral reads are only strict-serializable for single partition reads"); - Txn txn = new Txn.InMemory(transactionalMode == TransactionalMode.full && DatabaseDescriptor.getAccordEphemeralReadEnabledEnabled() ? EphemeralRead : Read, read.keys(), read, TxnQuery.ALL, null); - IAccordService accordService = AccordService.instance(); - TxnResult txnResult = accordService.coordinate(txn, consistencyLevel, requestTime); + consistencyLevel = consistencyLevelForAccordRead(cm, group, consistencyLevel); + TxnRead read = TxnRead.createSerialRead(group.queries, consistencyLevel); + Txn.Kind kind = Read; + if (transactionalMode == TransactionalMode.full && DatabaseDescriptor.getAccordEphemeralReadEnabledEnabled() && group.queries.size() == 1) + kind = EphemeralRead; + Txn txn = new Txn.InMemory(kind, read.keys(), read, TxnQuery.ALL, null); + AsyncTxnResult asyncTxnResult = AccordService.instance().coordinateAsync(txn, consistencyLevel, requestTime); + return getConsensusAttemptResultFromAsyncTxnResult(asyncTxnResult, group.queries.size(), index -> group.queries.get(index).isReversed(), consistencyLevel, requestTime); + } + + /* + * Used for both the SERIAL and non-SERIAL read path into Accord + */ + public static ConsensusAttemptResult getConsensusAttemptResultFromAsyncTxnResult(AsyncTxnResult asyncTxnResult, int numQueries, IntPredicate isQueryReversed, ConsistencyLevel cl, Dispatcher.RequestTime requestTime) + { + TxnResult txnResult = AccordService.instance().getTxnResult(asyncTxnResult, false, cl, requestTime); + // TODO (required): Converge on a single approach to RETRY_NEW_PROTOCOL, this works for now because reads don't support it anyways if (txnResult.kind() == retry_new_protocol) return RETRY_NEW_PROTOCOL; - TxnData data = (TxnData)txnResult; - FilteredPartition partition = data.get(TxnRead.SERIAL_READ); - if (partition != null) - return serialReadResult(PartitionIterators.singletonIterator(partition.rowIterator(readCommand.isReversed()))); - else + TxnData data = (TxnData) txnResult; + + if (data.isEmpty()) + { return serialReadResult(EmptyIterators.partition()); + } + else if (data.size() == 1) + { + FilteredPartition value = data.values().iterator().next(); + return serialReadResult(PartitionIterators.singletonIterator(value.rowIterator(isQueryReversed.test(0)))); + } + else + { + // TODO (review): 95% sure this isn't actually needed and the consumer is going consume these by DecoratedKey not iteration order, but the non-transactional path does preserve the order of the iterators + List partitionIterators = new ArrayList<>(numQueries); + for (int i = 0; i < numQueries; i++) + partitionIterators.add(null); + for (Map.Entry e : data.entrySet()) + { + int queryIndex = Integer.valueOf(e.getKey().part(0)); + partitionIterators.set(queryIndex, PartitionIterators.singletonIterator(e.getValue().rowIterator(isQueryReversed.test(queryIndex)))); + } + return serialReadResult(partitionIterators.size() == 1 ? partitionIterators.get(0) : PartitionIterators.concat(partitionIterators)); + } } private static ConsensusAttemptResult legacyReadWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) @@ -2284,7 +2343,29 @@ public static PartitionIterator readRegular(SinglePartitionReadCommand.Group gro long start = nanoTime(); try { - PartitionIterator result = fetchRows(group.queries, consistencyLevel, coordinator, requestTime); + ClusterMetadata cm = ClusterMetadata.current(); + TableId tableId = group.queries.get(0).metadata().id; + // Returns null for local tables + TableMetadata tableMetadata = getTableMetadata(cm, tableId); + if (tableMetadata == null) + tableMetadata = Schema.instance.localKeyspaces().getTableOrViewNullable(tableId); + TableParams tableParams = tableMetadata.params; + + TransactionalMode transactionalMode = tableParams.transactionalMode; +// TransactionalMigrationFromMode transactionalMigrationFromMode = tableParams.transactionalMigrationFrom; + // TODO (required): Tests would fail with this and we need to add live migration support anyways so for now allow it +// if (transactionalMigrationFromMode != TransactionalMigrationFromMode.none) +// throw new UnsupportedOperationException("Live migration is not supported, can't safely read when migrating from " + transactionalMigrationFromMode + " to " + transactionalMode); + + PartitionIterator result; + if (transactionalMode.readsThroughAccord && coordinator.isEventuallyConsistent()) + { + ConsensusAttemptResult consensusAttemptResult = readWithAccord(cm, group, consistencyLevel, requestTime); + checkState(!consensusAttemptResult.shouldRetryOnNewConsensusProtocol, "Live migration is not supported with non-SERIAL reads yet"); + result = consensusAttemptResult.serialReadResult; + } + else + result = fetchRows(group.queries, consistencyLevel, coordinator, requestTime); // Note that the only difference between the command in a group must be the partition key on which // they applied. boolean enforceStrictLiveness = group.queries.get(0).metadata().enforceStrictLiveness(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 4f6f3d16662f..08461249c36d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -29,7 +29,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index bba67a91607d..4d3505f626ea 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -18,16 +18,15 @@ package org.apache.cassandra.service.accord; +import java.util.Collection; +import java.util.EnumSet; import java.util.List; -import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import com.google.common.collect.ImmutableSet; - import accord.api.BarrierType; import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; @@ -59,15 +58,13 @@ import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Future; -import java.util.Collection; - import static com.google.common.base.Preconditions.checkNotNull; public interface IAccordService { - Set SUPPORTED_COMMIT_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.LOCAL_ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); - Set SUPPORTED_READ_CONSISTENCY_LEVELS = ImmutableSet.of(ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL); + EnumSet SUPPORTED_COMMIT_CONSISTENCY_LEVELS = EnumSet.of(ConsistencyLevel.ANY, ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); + EnumSet SUPPORTED_READ_CONSISTENCY_LEVELS = EnumSet.of(ConsistencyLevel.ONE, ConsistencyLevel.QUORUM, ConsistencyLevel.SERIAL, ConsistencyLevel.ALL); IVerbHandler requestHandler(); IVerbHandler responseHandler(); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java index 9c2ae88f838c..6ca4fc508357 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnData.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnData.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.txn; import java.io.IOException; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -80,6 +81,16 @@ public Set> entrySet() return data.entrySet(); } + public Collection values() + { + return data.values(); + } + + public int size() + { + return data.size(); + } + public boolean isEmpty() { return data.isEmpty(); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java b/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java index 4f3edbff0b59..562547385754 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnDataName.java @@ -144,6 +144,11 @@ public List getParts() return Collections.unmodifiableList(Arrays.asList(parts)); } + public String part(int index) + { + return parts[index]; + } + public DecoratedKey getDecoratedKey(TableMetadata metadata) { checkKind(Kind.AUTO_READ); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index 4787e2105b18..d2c92583afcc 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -33,12 +33,12 @@ import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.DebuggableTask; import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.db.partitions.PartitionIterator; -import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -119,7 +119,7 @@ public PartitionKey key() return key; } - public AsyncChain read(Timestamp executeAt) + public AsyncChain read(ConsistencyLevel consistencyLevel, Timestamp executeAt) { SinglePartitionReadCommand command = (SinglePartitionReadCommand) get(); // TODO (required, safety): before release, double check reasoning that this is safe @@ -129,6 +129,8 @@ public AsyncChain read(Timestamp executeAt) // this simply looks like the transaction witnessed TTL'd data and the data then expired // immediately after the transaction executed, and this simplifies things a great deal int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); + if (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE) + command = command.withoutReconciliation(); return performLocalRead(command, nowInSeconds); } @@ -144,8 +146,7 @@ private AsyncChain performLocalRead(SinglePartitionReadCommand command, in SinglePartitionReadCommand read = command.withNowInSec(nowInSeconds); try (ReadExecutionController controller = read.executionController(); - UnfilteredPartitionIterator partition = read.executeLocally(controller); - PartitionIterator iterator = UnfilteredPartitionIterators.filter(partition, read.nowInSec())) + PartitionIterator iterator = UnfilteredPartitionIterators.filter(read.executeLocally(controller), read.nowInSec())) { TxnData result = new TxnData(); if (iterator.hasNext()) diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index 694c3f225a30..f9d09409fb2c 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -36,10 +36,10 @@ import accord.primitives.Ranges; import accord.primitives.Seekable; import accord.primitives.Timestamp; -import org.apache.cassandra.db.SinglePartitionReadCommand; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -100,10 +100,13 @@ public static TxnRead createTxnRead(@Nonnull List items, @Nonnull return new TxnRead(items, txnKeys, consistencyLevel); } - public static TxnRead createSerialRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel) + public static TxnRead createSerialRead(List readCommands, ConsistencyLevel consistencyLevel) { - TxnNamedRead read = new TxnNamedRead(SERIAL_READ, readCommand); - return new TxnRead(ImmutableList.of(read), Keys.of(read.key()), consistencyLevel); + List reads = new ArrayList<>(readCommands.size()); + for (int i = 0; i < readCommands.size(); i++) + reads.add(new TxnNamedRead(TxnDataName.user(String.valueOf(i)), readCommands.get(i))); + Keys keys = Keys.of(reads, TxnNamedRead::key); + return new TxnRead(reads, keys, consistencyLevel); } public static TxnRead createCasRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel) @@ -197,7 +200,7 @@ public Read merge(Read read) public AsyncChain read(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store) { List> results = new ArrayList<>(); - forEachWithKey((PartitionKey) key, read -> results.add(read.read(executeAt))); + forEachWithKey((PartitionKey) key, read -> results.add(read.read(cassandraConsistencyLevel, executeAt))); if (results.isEmpty()) // Result type must match everywhere diff --git a/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java index 25524404ec79..9a50c4da7cad 100644 --- a/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java +++ b/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java @@ -19,9 +19,17 @@ package org.apache.cassandra.service.consensus; import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.LocalizeString; +import static com.google.common.base.Preconditions.checkState; + /* * Configure the transactional behavior of a table. Enables accord on a table and defines how it mixes with non-serial writes * @@ -58,53 +66,54 @@ public enum TransactionalMode { // Running on Paxos V1 or V2 with Accord disabled - off(false, false, false, false), + off(false, false, false, false, false), /* * Execute writes through Cassandra via StorageProxy's normal write path. This can lead Accord to compute * multiple outcomes for a transaction that depends on data written by non-SERIAL writes. */ - unsafe(true, false, false, false), + unsafe(true, false, false, false, false), /* * Allow mixing of non-SERIAL writes and Accord, but still force BRR through Accord. * This mode makes it safe to perform non-SERIAL or SERIAL reads of Accord data, but unsafe * to write data that Accord may attempt to read. */ - unsafe_writes(true, false, false, true), + unsafe_writes(true, false, false, false, true), /* * Execute writes through Accord skipping StorageProxy's normal write path, but commit * writes at the provided consistency level so they can be read via non-SERIAL consistency levels. * This mode makes it safe to read/write data that Accord will read/write. */ - mixed_reads(true, false, true, true), + mixed_reads(true, false, true, false, true), /* * Execute writes through Accord skipping StorageProxy's normal write path. Ignores the provided consistency level * which makes Accord commit writes at ANY similar to Paxos with commit consistency level ANY. */ - full(true, true, true, true); + full(true, true, true, true, true); public final boolean accordIsEnabled; - public final boolean ignoresSuppliedConsistencyLevel; + public final boolean ignoresSuppleidCommitCL; public final boolean writesThroughAccord; - + public final boolean readsThroughAccord; public final boolean blockingReadRepairThroughAccord; private final String cqlParam; - TransactionalMode(boolean accordIsEnabled, boolean ignoresSuppliedConsistencyLevel, boolean writesThroughAccord, boolean blockingReadRepairThroughAccord) + TransactionalMode(boolean accordIsEnabled, boolean ignoresSuppleidCommitCL, boolean writesThroughAccord, boolean readsThroughAccord, boolean blockingReadRepairThroughAccord) { this.accordIsEnabled = accordIsEnabled; - this.ignoresSuppliedConsistencyLevel = ignoresSuppliedConsistencyLevel; + this.ignoresSuppleidCommitCL = ignoresSuppleidCommitCL; this.writesThroughAccord = writesThroughAccord; + this.readsThroughAccord = readsThroughAccord; this.blockingReadRepairThroughAccord = blockingReadRepairThroughAccord; this.cqlParam = String.format("transactional_mode = '%s'", LocalizeString.toLowerCaseLocalized(this.name())); } public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) { - if (ignoresSuppliedConsistencyLevel) + if (ignoresSuppleidCommitCL) return null; if (!IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) @@ -113,14 +122,25 @@ public ConsistencyLevel commitCLForStrategy(ConsistencyLevel consistencyLevel) return consistencyLevel; } - // TODO (required): This won't work for migration directly from none to full because there is no safe system to read from - // during the first phase (repair). Accord won't read correctly beacuse it won't honor the CL and miss non-transactional writes that haven't been repaired and non-transactional - // reads will miss all the writes being routed through Accord since they occur asynchronously. Something has to give here where either writes routed through are Accord are synchronous at CL - // or reads are routed through Accord and read at quorum as long as the range has not completed the first phase (repair). - public ConsistencyLevel readCLForStrategy(ConsistencyLevel consistencyLevel) + private boolean ignoresSuppliedReadCL() { - if (ignoresSuppliedConsistencyLevel) - return null; + return writesThroughAccord && blockingReadRepairThroughAccord; + } + + public ConsistencyLevel readCLForStrategy(TransactionalMigrationFromMode fromMode, ConsistencyLevel consistencyLevel, ClusterMetadata cm, TableId tableId, Token token) + { + if (ignoresSuppliedReadCL()) + { + TableMigrationState tms = cm.consensusMigrationState.tableStates.get(tableId); + checkState(tms != null || fromMode == TransactionalMigrationFromMode.none); + + // Only ignore the supplied consistency level if the token is not migrating + // otherwise honor it because we might read through Accord for non-SERIAL reads before repair is run + // this is OK to do because BRR still works and Accord isn't computing a write so recovery + // determinism isn't an issue + if (tms == null || Range.isInNormalizedRanges(token, tms.migratedRanges)) + return null; + } if (!IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(consistencyLevel)) throw new UnsupportedOperationException("Consistency level " + consistencyLevel + " is unsupported with Accord for read, supported are ONE, QUORUM, and SERIAL"); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java index 32997bfb0c3b..b3d742209029 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java @@ -42,6 +42,7 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; @@ -235,6 +236,8 @@ public AsyncTxnResult mutateWithAccordAsync(ClusterMetadata cm, Mutation mutatio public static AsyncTxnResult mutateWithAccordAsync(ClusterMetadata cm, Collection mutations, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { + if (consistencyLevel != null && !IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) + throw new InvalidRequestException(consistencyLevel + " is not supported by Accord"); int fragmentIndex = 0; List fragments = new ArrayList<>(mutations.size()); List partitionKeys = new ArrayList<>(mutations.size()); diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java index 731aabb73507..b2d1195b1a8a 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationState.java @@ -50,6 +50,7 @@ import org.apache.cassandra.utils.PojoToString; import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; import static org.apache.cassandra.utils.CollectionSerializers.deserializeMap; import static org.apache.cassandra.utils.CollectionSerializers.newHashMap; import static org.apache.cassandra.utils.CollectionSerializers.serializeMap; @@ -136,16 +137,20 @@ public ConsensusMigrationState withReversedMigrations(Map current, ImmutableMap.Builder next, TableMetadata metadata, List> ranges, boolean overwrite) { - TableMigrationState tableState; + TableMigrationState tableState = current.get(metadata.id); + checkState(tableState != null || overwrite, "Can't begin migrating a table without first altering the schema to set transactional mode"); + TransactionalMigrationFromMode migrationFromMode = metadata.params.transactionalMigrationFrom; ConsensusMigrationTarget target = ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode); - if (!overwrite && current.containsKey(metadata.id)) - { - tableState = current.get(metadata.id).withRangesMigrating(ranges, target); - } + checkState(migrationFromMode != null && migrationFromMode != TransactionalMigrationFromMode.none, "Table transactional migration from can't be null or none"); + + Map>> migratingRangesByEpoch = ImmutableMap.of(); + if (!ranges.isEmpty()) + ImmutableMap.of(Epoch.EMPTY, ranges); + + if (overwrite) + tableState = new TableMigrationState(metadata.keyspace, metadata.name, metadata.id, target, ImmutableSet.of(), migratingRangesByEpoch); else - { - tableState = new TableMigrationState(metadata.keyspace, metadata.name, metadata.id, target, ImmutableSet.of(), ImmutableMap.of(Epoch.EMPTY, ranges)); - } + tableState = tableState.withRangesMigrating(ranges, target); next.put(metadata.id, tableState); } @@ -238,7 +243,7 @@ public void validateAgainstSchema(DistributedSchema schema) { tableStates.forEach((id, migrationState) -> { TableMetadata metadata = schema.getTableMetadata(id); - Preconditions.checkState(ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode).equals(migrationState.targetProtocol)); + checkState(ConsensusMigrationTarget.fromTransactionalMode(metadata.params.transactionalMode).equals(migrationState.targetProtocol)); }); } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java index 517e8fd87268..5b7bc0a87bc1 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/TransactionalMigrationFromMode.java @@ -78,6 +78,11 @@ public boolean writesThroughAccord() return from != null && from.writesThroughAccord; } + public boolean readsThroughAccord() + { + return from != null && from.writesThroughAccord; + } + public boolean isMigrating() { return this != none; diff --git a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java index ba72930fb23c..09c5b68bb5f1 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java +++ b/src/java/org/apache/cassandra/tcm/transformations/AlterSchema.java @@ -30,13 +30,14 @@ import java.util.stream.Stream; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; import com.google.common.collect.Streams; -import org.apache.cassandra.config.AccordSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement; +import org.apache.cassandra.config.AccordSpec; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.AlreadyExistsException; @@ -70,8 +71,9 @@ import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.vint.VIntCoding; -import static org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement.NO_EXECUTION_TIMESTAMP; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static org.apache.cassandra.cql3.statements.schema.AlterSchemaStatement.NO_EXECUTION_TIMESTAMP; import static org.apache.cassandra.exceptions.ExceptionCode.ALREADY_EXISTS; import static org.apache.cassandra.exceptions.ExceptionCode.CONFIG_ERROR; import static org.apache.cassandra.exceptions.ExceptionCode.INVALID; @@ -298,6 +300,9 @@ public static Transformer maybeUpdateConsensusMigrationState(ConsensusMigrationS .map(alt -> alt.after) .collect(Collectors.toUnmodifiableSet()); + Set startedAndReversed = Sets.intersection(started.stream().map(TableMetadata::id).collect(Collectors.toSet()), reversals.keySet()); + checkState(startedAndReversed.isEmpty(), "Set of tables starting migration and reversing migration should not intersect"); + if (!started.isEmpty()) { List> ranges; @@ -314,8 +319,9 @@ public static Transformer maybeUpdateConsensusMigrationState(ConsensusMigrationS break; } - if (!ranges.isEmpty()) - migrationState = migrationState.withRangesMigrating(started, ranges, true); + // Always create the migration state even if nothing is currently migrating, the empty state + // signals that a migration is in progress with no migrating ranges and corresponds to transactionalMigrationFrom != none + migrationState = migrationState.withRangesMigrating(started, ranges, true); } migrationState = migrationState.withReversedMigrations(reversals, next.epoch()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java index 4d2ae2df7ad3..c9ab6b30cc54 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ShortReadProtectionTest.java @@ -120,12 +120,7 @@ public static void teardownCluster() @Before public void setupTester() { - tester = new Tester(readConsistencyLevel, flush, paging); - } - - private String transactionalModeCQL() - { - return " WITH transactional_mode='" + transactionalMode + '\''; + tester = new Tester(readConsistencyLevel, flush, paging, transactionalMode); } @After @@ -144,7 +139,7 @@ public void teardownTester() @Test public void testSkinnyTableWithoutLiveRows() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") .allNodes("INSERT INTO %s (id) VALUES (0) USING TIMESTAMP 0") .toNode1("DELETE FROM %s WHERE id = 0") .assertRows("SELECT DISTINCT id FROM %s WHERE id = 0") @@ -161,7 +156,7 @@ public void testSkinnyTableWithoutLiveRows() @Test public void testSkinnyTableWithLiveRows() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") .allNodes(0, 10, i -> format("INSERT INTO %%s (id) VALUES (%d) USING TIMESTAMP 0", i)) // order is 5,1,8,0,2,4,7,6,9,3 .toNode1("DELETE FROM %s WHERE id IN (1, 0, 4, 6, 3)") // delete every other row .assertRows("SELECT DISTINCT token(id), id FROM %s", @@ -178,7 +173,7 @@ public void testSkinnyTableWithLiveRows() @Test public void testSkinnyTableWithComplementaryDeletions() { - tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (id int PRIMARY KEY)") .allNodes(0, 10, i -> format("INSERT INTO %%s (id) VALUES (%d) USING TIMESTAMP 0", i)) // order is 5,1,8,0,2,4,7,6,9,3 .toNode1("DELETE FROM %s WHERE id IN (5, 8, 2, 7, 9)") // delete every other row .toNode2("DELETE FROM %s WHERE id IN (1, 0, 4, 6)") // delete every other row but the last one @@ -196,7 +191,7 @@ public void testSkinnyTableWithComplementaryDeletions() @Test public void testMultipleMissedRows() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") .allNodes(0, 4, i -> format("INSERT INTO %%s (pk, ck) VALUES (0, %d) USING TIMESTAMP 0", i)) .toNode1("DELETE FROM %s WHERE pk = 0 AND ck IN (1, 2, 3)", "INSERT INTO %s (pk, ck) VALUES (0, 5)") @@ -215,7 +210,7 @@ public void testMultipleMissedRows() @Test public void testAscendingOrder() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") .allNodes(1, 10, i -> format("INSERT INTO %%s (k, c, v) VALUES (0, %d, %d) USING TIMESTAMP 0", i, i * 10)) .toNode1("DELETE FROM %s WHERE k=0 AND c=1") .toNode2("DELETE FROM %s WHERE k=0 AND c=2") @@ -237,7 +232,7 @@ public void testAscendingOrder() @Test public void testDescendingOrder() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") .allNodes(1, 10, i -> format("INSERT INTO %%s (k, c, v) VALUES (0, %d, %d) USING TIMESTAMP 0", i, i * 10)) .toNode1("DELETE FROM %s WHERE k=0 AND c=7") .toNode2("DELETE FROM %s WHERE k=0 AND c=8") @@ -260,7 +255,7 @@ public void testDescendingOrder() @Test public void testDeletePartition() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") .allNodes("INSERT INTO %s (k, c, v) VALUES (0, 1, 10) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0") @@ -273,7 +268,7 @@ public void testDeletePartition() @Test public void testDeletePartitionWithStatic() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))") .allNodes("INSERT INTO %s (k, c, v, s) VALUES (0, 1, 10, 100) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0") @@ -286,7 +281,7 @@ public void testDeletePartitionWithStatic() @Test public void testDeleteClustering() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY(k, c))") .allNodes("INSERT INTO %s (k, c, v) VALUES (0, 1, 10) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0 AND c=1") @@ -301,7 +296,7 @@ public void testDeleteClustering() @Test public void testDeleteClusteringWithStatic() { - tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (k int, c int, v int, s int STATIC, PRIMARY KEY(k, c))") .allNodes("INSERT INTO %s (k, c, v, s) VALUES (0, 1, 10, 100) USING TIMESTAMP 0", "INSERT INTO %s (k, c, v) VALUES (0, 2, 20) USING TIMESTAMP 0") .toNode2("DELETE FROM %s WHERE k=0 AND c=1") @@ -318,7 +313,7 @@ public void testDeleteClusteringWithStatic() @Test public void testGroupByRegularRow() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") .toNode1("INSERT INTO %s (pk, ck) VALUES (1, 1) USING TIMESTAMP 0", "DELETE FROM %s WHERE pk=0 AND ck=0", "INSERT INTO %s (pk, ck) VALUES (2, 2) USING TIMESTAMP 0") @@ -341,7 +336,7 @@ public void testGroupByRegularRow() @Test public void testGroupByStaticRow() { - tester.createTable("CREATE TABLE %s (pk int, ck int, s int static, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, s int static, PRIMARY KEY (pk, ck))") .toNode1("INSERT INTO %s (pk, s) VALUES (1, 1) USING TIMESTAMP 0", "INSERT INTO %s (pk, s) VALUES (0, null)", "INSERT INTO %s (pk, s) VALUES (2, 2) USING TIMESTAMP 0") @@ -364,7 +359,7 @@ public void testGroupByStaticRow() @Test public void testSkipEarlyTermination() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0)") .toNode2("DELETE FROM %s WHERE pk = 0 AND ck IN (1, 2)") .assertRows("SELECT DISTINCT pk FROM %s", row(0)); @@ -381,7 +376,7 @@ public void testSkipEarlyTermination() @Test public void testSkipEarlyTerminationRows() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (0, 1) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (2, 0) USING TIMESTAMP 0", @@ -405,7 +400,7 @@ public void testSkipEarlyTerminationRows() @Test public void testSkipEarlyTerminationPartitions() { - tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))" + transactionalModeCQL()) + tester.createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck))") .toNode1("INSERT INTO %s (pk, ck) VALUES (0, 0) USING TIMESTAMP 0", "INSERT INTO %s (pk, ck) VALUES (0, 1) USING TIMESTAMP 0", "DELETE FROM %s USING TIMESTAMP 42 WHERE pk = 2 AND ck IN (0, 1)") @@ -432,16 +427,18 @@ private static class Tester private final boolean flush, paging; private final String table; private final String qualifiedTableName; + private final TransactionalMode transactionalMode; private boolean flushed = false; - private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean paging) + private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean paging, TransactionalMode transactionalMode) { this.readConsistencyLevel = readConsistencyLevel; this.flush = flush; this.paging = paging; this.table = "t_" + seqNumber.getAndIncrement(); qualifiedTableName = KEYSPACE + '.' + table; + this.transactionalMode = transactionalMode; assert readConsistencyLevel == ALL || readConsistencyLevel == QUORUM || readConsistencyLevel == SERIAL : "Only ALL and QUORUM consistency levels are supported"; @@ -449,7 +446,13 @@ private Tester(ConsistencyLevel readConsistencyLevel, boolean flush, boolean pag private Tester createTable(String query) { - cluster.schemaChange(format(query) + " AND read_repair='NONE'"); + cluster.schemaChange(format(query) + " WITH read_repair='NONE'"); + if (transactionalMode != TransactionalMode.off) + { + // For test purposes we create the table and require migration otherwise Accord + // won't bother to do interop reads with short read protection + cluster.schemaChange(format("ALTER TABLE %s WITH transactional_mode='" + transactionalMode + "\' AND transactional_migration_from = \'off\'")); + } return this; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java index 824498b229c0..bbf9f30b6ea4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordCQLTestBase.java @@ -55,19 +55,25 @@ import org.apache.cassandra.distributed.api.QueryResults; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.consensus.TransactionalMode; +import org.apache.cassandra.service.consensus.migration.TransactionalMigrationFromMode; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FailingConsumer; import org.assertj.core.api.Assertions; +import static java.lang.String.format; import static java.util.Collections.singletonList; import static org.apache.cassandra.cql3.CQLTester.row; +import static org.apache.cassandra.cql3.statements.schema.AlterTableStatement.ACCORD_COUNTER_COLUMN_UNSUPPORTED; +import static org.apache.cassandra.cql3.statements.schema.AlterTableStatement.ACCORD_COUNTER_TABLES_UNSUPPORTED; import static org.apache.cassandra.distributed.util.QueryResultUtil.assertThat; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; public abstract class AccordCQLTestBase extends AccordTestBase { @@ -92,12 +98,189 @@ public static void setupClass() throws IOException SHARED_CLUSTER.schemaChange("CREATE TYPE " + KEYSPACE + ".person (height int, age int)"); } + @Test + public void testCounterCreateTableTransactionalModeFails() throws Exception + { + try + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v counter, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), cluster -> {}); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(IllegalStateException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_TABLES_UNSUPPORTED, KEYSPACE, accordTableName), t.getMessage()); + } + } + + @Test + public void testCounterCreateTableTransactionalMigrationFromModeFails() throws Exception + { + try + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v counter, primary key (k, c)) WITH transactional_migration_from = '" + transactionalMode.name() + "'", cluster -> {}); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(IllegalStateException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_TABLES_UNSUPPORTED, KEYSPACE, accordTableName), t.getMessage()); + } + } + + @Test + public void testCounterAlterTableTransactionalModeFails() throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v counter, primary key (k, c))", cluster -> { + try + { + cluster.coordinator(1).execute("ALTER TABLE " + qualifiedAccordTableName + " WITH transactional_mode = '" + transactionalMode.name() + "';", ConsistencyLevel.ALL); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_TABLES_UNSUPPORTED, KEYSPACE, accordTableName), t.getMessage()); + } + }); + } + + @Test + public void testCounterAlterTableTransactionalMigrationFromModeFails() throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v counter, primary key (k, c))", cluster -> { + try + { + cluster.coordinator(1).execute("ALTER TABLE " + qualifiedAccordTableName + " WITH transactional_migration_from = '" + transactionalMode.name() + "';", ConsistencyLevel.ALL); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_TABLES_UNSUPPORTED, KEYSPACE, accordTableName), t.getMessage()); + } + }); + } + + @Test + public void testCounterAddColumnFailsWithAccord() throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), cluster -> { + try + { + cluster.coordinator(1).execute("ALTER TABLE " + qualifiedAccordTableName + " ADD (v2 counter);", ConsistencyLevel.ALL); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_COLUMN_UNSUPPORTED, KEYSPACE, accordTableName, transactionalMode, TransactionalMigrationFromMode.none), t.getMessage()); + } + }); + } + + @Test + public void testCounterAddColumnFailsWithMigration() throws Exception + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, s int static, v int, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), cluster -> { + try + { + cluster.coordinator(1).execute("ALTER TABLE " + qualifiedAccordTableName + " WITH transactional_mode = '" + TransactionalMode.off + "';", ConsistencyLevel.ALL); + cluster.coordinator(1).execute("ALTER TABLE " + qualifiedAccordTableName + " ADD (v2 counter);", ConsistencyLevel.ALL); + fail("Expected exception"); + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + assertEquals(format(ACCORD_COUNTER_COLUMN_UNSUPPORTED, KEYSPACE, accordTableName, TransactionalMode.off, transactionalMode), t.getMessage()); + } + }); + } + @Override protected void test(FailingConsumer fn) throws Exception { test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, primary key (k, c)) WITH " + transactionalMode.asCqlParam(), fn); } + @Test + public void testPartitionMultiRowReturn() throws Exception + { + test(cluster -> { + for (int i = 0; i < 3; i++) + cluster.coordinator(1).execute(wrapInTxn("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?)"), ConsistencyLevel.ALL, 42, 43 + i, 44 + i); + + String txn = "BEGIN TRANSACTION " + + "SELECT * " + + "FROM " + qualifiedAccordTableName + " " + + "WHERE k = 42;" + + "COMMIT TRANSACTION;"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(txn, ConsistencyLevel.SERIAL); + assertThat(result).hasSize(3) + .contains(42, 43, 44) + .contains(42, 44, 45) + .contains(42, 45, 46); + }); + } + + @Test + public void testSaiMultiRowReturn() throws Exception + { + test(cluster -> { + cluster.schemaChange("CREATE INDEX ON " + qualifiedAccordTableName + "(v) USING 'sai';"); + for (int i = 0; i < 3; i++) + cluster.coordinator(1).execute(wrapInTxn("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?)"), ConsistencyLevel.ALL, 42, 43 + i, 44 + i); + + String txn = "BEGIN TRANSACTION " + + "SELECT * " + + "FROM " + qualifiedAccordTableName + " " + + "WHERE k = 42 AND v = 45;" + + "COMMIT TRANSACTION;"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(txn, ConsistencyLevel.SERIAL); + assertThat(result).hasSize(1) + .contains(42, 44, 45); + }); + } + + // This fails and it is expected, mostly just here as documentation until it is fixed + @Test + public void testSasiMultiRowReturn() throws Exception + { + test(cluster -> { + cluster.schemaChange("CREATE INDEX ON " + qualifiedAccordTableName + "(v) USING 'org.apache.cassandra.index.sasi.SASIIndex';"); + for (int i = 0; i < 3; i++) + cluster.coordinator(1).execute(wrapInTxn("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?)"), ConsistencyLevel.ALL, 42, 43 + i, 44 + i); + + String txn = "BEGIN TRANSACTION " + + "SELECT * " + + "FROM " + qualifiedAccordTableName + " " + + "WHERE k = 42 AND v = 45;" + + "COMMIT TRANSACTION;"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(txn, ConsistencyLevel.SERIAL); + assertThat(result).hasSize(1) + .contains(42, 44, 45); + }); + } + + @Test + public void testLegacy2iMultiRowReturn() throws Exception + { + test(cluster -> { + cluster.schemaChange("CREATE INDEX ON " + qualifiedAccordTableName + "(v);"); + for (int i = 0; i < 3; i++) + cluster.coordinator(1).execute(wrapInTxn("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, ?, ?)"), ConsistencyLevel.ALL, 42, 43 + i, 44 + i); + + String txn = "BEGIN TRANSACTION " + + "SELECT * " + + "FROM " + qualifiedAccordTableName + " " + + "WHERE k = 42 AND v = 45;" + + "COMMIT TRANSACTION;"; + SimpleQueryResult result = cluster.coordinator(1).executeWithResult(txn, ConsistencyLevel.SERIAL); + assertThat(result).hasSize(1) + .contains(42, 44, 45); + }); + } + @Test public void testNonExistingKeyWithStaticUpdate() throws Exception { @@ -204,8 +387,8 @@ public void testMultipleShards() throws Exception cluster.get(1).runOnInstance(() -> { StringBuilder sb = new StringBuilder("BEGIN TRANSACTION\n"); for (int i = 0; i < keyStrings.size() - 1; i++) - sb.append(String.format("LET row%d = (SELECT * FROM %s WHERE k=%s AND c=0);\n", i, currentTable, keyStrings.get(i))); - sb.append(String.format("SELECT * FROM %s WHERE k=%s AND c=0;\n", currentTable, keyStrings.get(keyStrings.size() - 1))); + sb.append(format("LET row%d = (SELECT * FROM %s WHERE k=%s AND c=0);\n", i, currentTable, keyStrings.get(i))); + sb.append(format("SELECT * FROM %s WHERE k=%s AND c=0;\n", currentTable, keyStrings.get(keyStrings.size() - 1))); sb.append("COMMIT TRANSACTION"); Unseekables routables = AccordTestUtils.createTxn(sb.toString()).keys().toParticipants(); @@ -391,7 +574,7 @@ private void assertResultsFromAccordMatches(Cluster cluster, String accordRead, { accordRead = wrapInTxn(accordRead); Object[][] simpleReadResult; - if (transactionalMode.ignoresSuppliedConsistencyLevel) + if (transactionalMode.ignoresSuppleidCommitCL) // With accord non-SERIAL write strategy the commit CL is effectively ANY so we need to read at SERIAL simpleReadResult = cluster.coordinator(1).execute(simpleRead, ConsistencyLevel.SERIAL, key); else diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java index 1b8bfe898500..fcd9b1dabebb 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordInteroperabilityTest.java @@ -19,15 +19,21 @@ package org.apache.cassandra.distributed.test.accord; import java.io.IOException; +import java.util.function.Function; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.shared.AssertUtils; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.service.accord.IAccordService; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; public class AccordInteroperabilityTest extends AccordTestBase { @@ -52,7 +58,7 @@ public void testSerialReadDescending() throws Throwable cluster -> { ICoordinator coordinator = cluster.coordinator(1); for (int i = 1; i <= 10; i++) - coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i, i * 10); + coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (0, ?, ?) USING TIMESTAMP 0;", org.apache.cassandra.distributed.api.ConsistencyLevel.ALL, i, i * 10); assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 1", AssertUtils.row(10, 100)); assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 2", AssertUtils.row(10, 100), AssertUtils.row(9, 90)); assertRowSerial(cluster, "SELECT c, v FROM " + qualifiedAccordTableName + " WHERE k=0 ORDER BY c DESC LIMIT 3", AssertUtils.row(10, 100), AssertUtils.row(9, 90), AssertUtils.row(8, 80)); @@ -60,4 +66,70 @@ public void testSerialReadDescending() throws Throwable } ); } + + private static Object[][] assertTargetAccordRead(Function query, int coordinatorIndex, int key, int expectedAccordReadCount) + { + int startingReadCount = getAccordReadCount(coordinatorIndex); + Object[][] result = query.apply(key); + assertEquals("Accord reads", expectedAccordReadCount, getAccordReadCount(coordinatorIndex) - startingReadCount); + return result; + } + + private static Object[][] assertTargetAccordWrite(Function query, int coordinatorIndex, int key, int expectedAccordWriteCount) + { + int startingWriteCount = getAccordWriteCount(coordinatorIndex); + Object[][] result = query.apply(key); + assertEquals("Accord writes", expectedAccordWriteCount, getAccordWriteCount(coordinatorIndex) - startingWriteCount); + return result; + } + + @Test + public void testNonSerialReadIsThroughAccordFull() throws Throwable + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='full'", + cluster -> { + for (ConsistencyLevel cl : ConsistencyLevel.values()) + { + try + { + if (cl == ConsistencyLevel.ANY || cl == ConsistencyLevel.NODE_LOCAL) + continue; + assertTargetAccordRead(key -> cluster.coordinator(1).execute("SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?", org.apache.cassandra.distributed.api.ConsistencyLevel.valueOf(cl.name()), key), 1, 1, 1); + if (!IAccordService.SUPPORTED_READ_CONSISTENCY_LEVELS.contains(cl)) + fail("Unsupported consistency level succeeded"); + + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + assertEquals(cl + " is not supported by Accord", t.getMessage()); + } + } + }); + } + + @Test + public void testNonSerialWriteIsThroughAccordFull() throws Throwable + { + test("CREATE TABLE " + qualifiedAccordTableName + " (k int, c int, v int, PRIMARY KEY(k, c)) WITH transactional_mode='full'", + cluster -> { + for (ConsistencyLevel cl : ConsistencyLevel.values()) + { + try + { + assertTargetAccordWrite(key -> cluster.coordinator(1).execute("INSERT INTO " + qualifiedAccordTableName + " (k, c, v) VALUES (?, 43, 44)", org.apache.cassandra.distributed.api.ConsistencyLevel.valueOf(cl.name()), key), 1, 1, 1); + if (!IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(cl)) + fail("Unsupported consistency level succeeded"); + } + catch (Throwable t) + { + assertEquals(InvalidRequestException.class.getName(), t.getClass().getName()); + if (cl == ConsistencyLevel.SERIAL || cl == ConsistencyLevel.LOCAL_SERIAL) + assertEquals("You must use conditional updates for serializable writes", t.getMessage()); + else + assertEquals(cl + " is not supported by Accord", t.getMessage()); + } + } + }); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 92e81c73f378..b3e2407af28d 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -335,6 +335,7 @@ private static Cluster createCluster(int nodes, Function optio Cluster.Builder builder = Cluster.build(nodes) .withoutVNodes() .withConfig(c -> c.with(Feature.GOSSIP) + .set("sasi_indexes_enabled", "true") .set("write_request_timeout", "10s") .set("transaction_timeout", "15s") .set("native_transport_timeout", "30s") diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/AccordMultiNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/AccordMultiNodeSAITest.java new file mode 100644 index 000000000000..a69f83b530e3 --- /dev/null +++ b/test/distributed/org/apache/cassandra/fuzz/sai/AccordMultiNodeSAITest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.fuzz.sai; + +import org.junit.BeforeClass; + +public class AccordMultiNodeSAITest extends MultiNodeSAITestBase +{ + @BeforeClass + public static void before() throws Throwable + { + MultiNodeSAITestBase.before(true); + } + + public AccordMultiNodeSAITest() + { + super(true); + } +} diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/AccordSingleNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/AccordSingleNodeSAITest.java new file mode 100644 index 000000000000..40164cdcf747 --- /dev/null +++ b/test/distributed/org/apache/cassandra/fuzz/sai/AccordSingleNodeSAITest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.fuzz.sai; + +public class AccordSingleNodeSAITest extends SingleNodeSAITestBase +{ + public AccordSingleNodeSAITest() + { + super(true); + } +} diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITest.java index 9ca536921d92..b4774f4c813f 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITest.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITest.java @@ -22,6 +22,6 @@ public class MultiNodeSAITest extends MultiNodeSAITestBase { public MultiNodeSAITest() { - super(); + super(false); } } \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITestBase.java b/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITestBase.java index 860e779970f0..addf0788f156 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/MultiNodeSAITestBase.java @@ -29,24 +29,30 @@ public abstract class MultiNodeSAITestBase extends SingleNodeSAITestBase { - public MultiNodeSAITestBase() + public MultiNodeSAITestBase(boolean withAccord) { - super(); + super(withAccord); } @BeforeClass public static void before() throws Throwable + { + before(false); + } + + @BeforeClass + public static void before(boolean withAccord) throws Throwable { cluster = Cluster.build() - .withNodes(2) - // At lower fetch sizes, queries w/ hundreds or thousands of matches can take a very long time. - .withConfig(defaultConfig().andThen(c -> c.set("range_request_timeout", "180s") - .set("read_request_timeout", "180s") - .set("write_request_timeout", "180s") - .set("native_transport_timeout", "180s") - .set("slow_query_log_timeout", "180s") - .with(GOSSIP).with(NETWORK))) - .createWithoutStarting(); + .withNodes(2) + // At lower fetch sizes, queries w/ hundreds or thousands of matches can take a very long time. + .withConfig(defaultConfig().andThen(c -> c.set("range_request_timeout", "180s") + .set("read_request_timeout", "180s") + .set("write_request_timeout", "180s") + .set("native_transport_timeout", "180s") + .set("slow_query_log_timeout", "180s") + .with(GOSSIP).with(NETWORK))) + .createWithoutStarting(); cluster.startup(); cluster = init(cluster); } diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITest.java index 37a9b1c184b9..84ad2f013b93 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITest.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITest.java @@ -22,6 +22,6 @@ public class SingleNodeSAITest extends SingleNodeSAITestBase { public SingleNodeSAITest() { - super(); + super(false); } } diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITestBase.java b/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITestBase.java index b915d1748e17..98d924b09bc5 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/SingleNodeSAITestBase.java @@ -78,7 +78,12 @@ public abstract class SingleNodeSAITestBase extends TestBaseImpl protected static final Logger logger = LoggerFactory.getLogger(SingleNodeSAITest.class); protected static Cluster cluster; - protected SingleNodeSAITestBase() {} + protected boolean withAccord; + + public SingleNodeSAITestBase(boolean withAccord) + { + this.withAccord = withAccord; + } @BeforeClass public static void before() throws Throwable diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 058871776c8b..2aa0fe2c8365 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -191,6 +191,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.db.guardrails.Values$Config", "org.apache.cassandra.db.rows.UnfilteredSource", "org.apache.cassandra.dht.IPartitioner", + "org.apache.cassandra.dht.RingPosition", "org.apache.cassandra.distributed.api.IInstance", "org.apache.cassandra.distributed.api.IInvokableInstance", "org.apache.cassandra.distributed.api.IIsolatedExecutor", diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java index 6da221862fc8..da7d9213e8b4 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -18,8 +18,6 @@ package org.apache.cassandra.cql3.statements; -import org.apache.cassandra.transport.Dispatcher; -import org.assertj.core.api.Assertions; import org.junit.BeforeClass; import org.junit.Test; @@ -32,14 +30,16 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.messages.ResultMessage; +import org.assertj.core.api.Assertions; import static org.apache.cassandra.cql3.statements.TransactionStatement.DUPLICATE_TUPLE_NAME_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.EMPTY_TRANSACTION_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.ILLEGAL_RANGE_QUERY_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_CONDITIONS_IN_UPDATES_MESSAGE; -import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_COUNTERS_IN_TXNS_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.SELECT_REFS_NEED_COLUMN_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE; @@ -58,7 +58,6 @@ public class TransactionStatementTest private static final TableId TABLE4_ID = TableId.fromString("00000000-0000-0000-0000-000000000004"); private static final TableId TABLE5_ID = TableId.fromString("00000000-0000-0000-0000-000000000005"); private static final TableId TABLE6_ID = TableId.fromString("00000000-0000-0000-0000-000000000006"); - private static final TableId TABLE7_ID = TableId.fromString("00000000-0000-0000-0000-000000000007"); @BeforeClass public static void beforeClass() throws Exception @@ -70,45 +69,7 @@ public static void beforeClass() throws Exception parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int) WITH transactional_mode = 'full'", "ks").id(TABLE3_ID), parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list) WITH transactional_mode = 'full'", "ks").id(TABLE4_ID), parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full'", "ks").id(TABLE5_ID), - parse("CREATE TABLE tbl6 (k int PRIMARY KEY, c counter) WITH transactional_mode = 'full'", "ks").id(TABLE6_ID), - parse("CREATE TABLE tbl7 (k int PRIMARY KEY, v int) WITH transactional_mode = 'off'", "ks").id(TABLE7_ID)); - } - - @Test - public void shouldRejectCounterMutation() - { - String query = "BEGIN TRANSACTION\n" + - " UPDATE ks.tbl6 SET c += 100 WHERE k = 0;\n" + - "COMMIT TRANSACTION"; - - Assertions.assertThatThrownBy(() -> prepare(query)) - .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "UPDATE", "at [2:5]")); - } - - @Test - public void shouldRejectCounterReadInLet() - { - String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM ks.tbl6 WHERE k=0);\n" + - " SELECT row1.c;\n" + - "COMMIT TRANSACTION"; - - Assertions.assertThatThrownBy(() -> prepare(query)) - .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", "at [2:15]")); - } - - @Test - public void shouldRejectCounterReadInSelect() - { - String query = "BEGIN TRANSACTION\n" + - " SELECT * FROM ks.tbl6 WHERE k=0;\n" + - "COMMIT TRANSACTION"; - - Assertions.assertThatThrownBy(() -> prepare(query)) - .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", "at [2:3]")); + parse("CREATE TABLE tbl6 (k int PRIMARY KEY, v int) WITH transactional_mode = 'off'", "ks").id(TABLE6_ID)); } @Test @@ -236,28 +197,6 @@ public void shouldRejectIncompletePrimaryKeyInLet() .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment row1", "at [2:15]")); } - @Test - public void shouldRejectIllegalLimitInSelect() - { - String select = "SELECT * FROM ks.tbl1 WHERE k = 1 LIMIT 2"; - String query = "BEGIN TRANSACTION\n" + select + ";\nCOMMIT TRANSACTION"; - - Assertions.assertThatThrownBy(() -> prepare(query)) - .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning select", "at [2:1]")); - } - - @Test - public void shouldRejectIncompletePrimaryKeyInSelect() - { - String select = "SELECT * FROM ks.tbl1 WHERE k = 1"; - String query = "BEGIN TRANSACTION\n" + select + ";\nCOMMIT TRANSACTION"; - - Assertions.assertThatThrownBy(() -> prepare(query)) - .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "returning select", "at [2:1]")); - } - @Test public void shouldRejectUpdateWithCondition() { @@ -386,7 +325,7 @@ public void shouldRejectNormalSelectWithIncompletePartitionKey() Assertions.assertThatThrownBy(() -> prepare(query)) .isInstanceOf(InvalidRequestException.class) - .hasMessageContaining(String.format(ILLEGAL_RANGE_QUERY_MESSAGE, "returning select", "at [2:1]")); + .hasMessageContaining(String.format(INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE, "returning select", "at [2:1]")); } @Test @@ -407,7 +346,7 @@ public void shouldRejectLetSelectWithIncompletePartitionKey() public void shouldRejectLetSelectOnNonTransactionalTable() { String query = "BEGIN TRANSACTION\n" + - " LET row1 = (SELECT * FROM ks.tbl7 WHERE k = 0);\n" + + " LET row1 = (SELECT * FROM ks.tbl6 WHERE k = 0);\n" + " INSERT INTO ks.tbl5 (k, v) VALUES (1, 2);\n" + "COMMIT TRANSACTION;"; @@ -420,7 +359,7 @@ public void shouldRejectLetSelectOnNonTransactionalTable() public void shouldRejectSelectOnNonTransactionalTable() { String query = "BEGIN TRANSACTION\n" + - " SELECT * FROM ks.tbl7 WHERE k = 0;\n" + + " SELECT * FROM ks.tbl6 WHERE k = 0;\n" + "COMMIT TRANSACTION;"; Assertions.assertThatThrownBy(() -> prepare(query)) @@ -432,7 +371,7 @@ public void shouldRejectSelectOnNonTransactionalTable() public void shouldRejectUpdateOnNonTransactionalTable() { String query = "BEGIN TRANSACTION\n" + - " INSERT INTO ks.tbl7 (k, v) VALUES (1, 2);\n" + + " INSERT INTO ks.tbl6 (k, v) VALUES (1, 2);\n" + "COMMIT TRANSACTION;"; Assertions.assertThatThrownBy(() -> prepare(query)) diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/TupleTypeTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/TupleTypeTest.java index 90c9778df41d..2a7bb8e74a13 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/TupleTypeTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/TupleTypeTest.java @@ -174,7 +174,7 @@ public void testTupleWithUnsetValues() throws Throwable createIndex("CREATE INDEX tuple_index ON %s (t)"); // select using unset - assertInvalidMessage("Invalid unset value for tuple field number 0", "SELECT * FROM %s WHERE k = ? and t = (?,?,?)", unset(), unset(), unset(), unset()); + assertInvalidMessage("Invalid unset value for tuple field number 0", "SELECT * FROM %s WHERE k = ? and t = (?,?,?)", 42, unset(), unset(), unset()); } /** From d1af5627053377dcfd87320720130b1dea9d2feb Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 21 Oct 2024 13:51:46 +0200 Subject: [PATCH 177/340] Shut down scheduler with "now" Fix NPE in MockJournal on null onFlush Fix SavedCommandTest. After the serialization change that serializes "changed" before "is null", null flag can no be written. --- .../DistributedMetadataLogKeyspace.java | 4 +-- .../service/accord/AccordService.java | 9 ++++- .../service/accord/api/AccordScheduler.java | 6 +++- .../cassandra/tcm/AbstractLocalProcessor.java | 6 +--- .../tcm/AtomicLongBackedProcessor.java | 14 ++++---- .../cassandra/tcm/ClusterMetadataService.java | 16 ++++++--- src/java/org/apache/cassandra/tcm/Epoch.java | 1 + .../org/apache/cassandra/tcm/FetchCMSLog.java | 22 ++++++++---- .../apache/cassandra/tcm/FetchPeerLog.java | 9 +++-- .../cassandra/tcm/PaxosBackedProcessor.java | 11 ++++-- .../apache/cassandra/tcm/PeerLogFetcher.java | 2 +- .../org/apache/cassandra/tcm/Processor.java | 35 +++++++++++++------ .../cassandra/tcm/ReconstructLogState.java | 30 +++++++++++++--- .../apache/cassandra/tcm/RemoteProcessor.java | 15 +++++--- .../tcm/StubClusterMetadataService.java | 9 ++++- .../apache/cassandra/tcm/log/LocalLog.java | 4 +-- .../apache/cassandra/tcm/log/LogReader.java | 4 +-- .../apache/cassandra/tcm/log/LogStorage.java | 2 +- .../tcm/migration/GossipProcessor.java | 9 ++++- .../distributed/test/CASTestBase.java | 3 ++ .../distributed/test/PaxosRepair2Test.java | 1 - .../test/log/CoordinatorPathTestBase.java | 13 +++++-- .../test/log/ReconstructEpochTest.java | 9 ++--- .../distributed/test/log/TestProcessor.java | 11 ++++-- .../fuzz/topology/TopologyMixupTestBase.java | 7 ++-- .../cassandra/service/accord/MockJournal.java | 3 +- .../service/accord/SavedCommandTest.java | 8 +++-- .../tcm/ValidatingClusterMetadataService.java | 8 ++++- 28 files changed, 196 insertions(+), 75 deletions(-) diff --git a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java index 087fa51b8c33..edc9afe17f9c 100644 --- a/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java +++ b/src/java/org/apache/cassandra/schema/DistributedMetadataLogKeyspace.java @@ -172,9 +172,9 @@ public static LogState getLogState(Epoch since, boolean consistentFetch) * here. One more alternative is to keep a lazily-initialized AccordTopology table on CMS nodes for a * number of recent epochs, and keep a node-local cache of this table on other nodes. */ - public static LogState getLogState(Epoch start, Epoch end) + public static LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot) { - return serialLogReader.getLogState(start, end); + return serialLogReader.getLogState(start, end, includeSnapshot); } public static class DistributedTableLogReader implements LogReader diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 4fe985105980..85f491f0f4f6 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -127,6 +127,7 @@ import org.apache.cassandra.journal.Params; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.AccordClientRequestMetrics; +import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; @@ -157,6 +158,7 @@ import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; +import org.apache.cassandra.tcm.Retry; import org.apache.cassandra.tcm.membership.NodeId; import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tracing.Tracing; @@ -551,7 +553,12 @@ private List discoverHistoric(Node node, ClusterMetadataService public static List tcmLoadRange(long min, long max) { - List afterLoad = ClusterMetadataService.instance().processor().reconstructFull(Epoch.create(min), Epoch.create(max)); + List afterLoad = ClusterMetadataService.instance() + .processor() + .reconstruct(Epoch.create(min), Epoch.create(max), + Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), + TCMMetrics.instance.fetchLogRetries)); + if (Invariants.isParanoid()) Invariants.checkState(afterLoad.get(0).epoch.getEpoch() == min, "Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); while (!afterLoad.isEmpty() && afterLoad.get(0).epoch.getEpoch() < min) diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java index a616cf05d49c..50720952a4f5 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java @@ -86,7 +86,11 @@ public boolean isTerminated() @Override public void shutdown() { - scheduledExecutor.shutdown(); + for (Runnable c : shutdownNow()) + { + if (c instanceof java.util.concurrent.Future) + ((java.util.concurrent.Future) c).cancel(false); + } } @Override diff --git a/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java b/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java index 6d126becc8c8..e5c58ef3adc7 100644 --- a/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AbstractLocalProcessor.java @@ -172,7 +172,6 @@ private Transformation.Result executeStrictly(ClusterMetadata metadata, Transfor } } - private LogState toLogState(Transformation.Success success, Entry.Id entryId, Epoch lastKnown, Transformation transform) { if (lastKnown == null || lastKnown.isDirectlyBefore(success.metadata.epoch)) @@ -197,9 +196,6 @@ private LogState toLogState(Epoch lastKnown) return logState; } - - @Override public abstract ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy); protected abstract boolean tryCommitOne(Entry.Id entryId, Transformation transform, Epoch previousEpoch, Epoch nextEpoch); - -} \ No newline at end of file +} diff --git a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java index 9a43c3eee992..1bf81b60489f 100644 --- a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java @@ -81,7 +81,13 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retry) } @Override - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return getLogState(start, end, includeSnapshot, retryPolicy); + } + + @Override + public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) { try { @@ -130,11 +136,7 @@ public synchronized void append(Entry entry) @Override public synchronized LogState getLogState(Epoch startEpoch) { - ImmutableList.Builder builder = ImmutableList.builder(); - ClusterMetadata latest = metadataSnapshots.getLatestSnapshot(); - Epoch actualSince = latest != null && latest.epoch.isAfter(startEpoch) ? latest.epoch : startEpoch; - entries.stream().filter(e -> e.epoch.isAfter(actualSince)).forEach(builder::add); - return new LogState(latest, builder.build()); + return getLogState(startEpoch, Epoch.MAX); } @Override diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java index 8195c7955109..933c02bd494b 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java @@ -168,15 +168,15 @@ public static State state(ClusterMetadata metadata) { log = logSpec.sync().withStorage(new AtomicLongBackedProcessor.InMemoryStorage()).createLog(); localProcessor = wrapProcessor.apply(new AtomicLongBackedProcessor(log, logSpec.isReset())); - fetchLogHandler = new FetchCMSLog.Handler((e, ignored) -> logSpec.storage().getLogState(e)); } else { log = logSpec.async().createLog(); localProcessor = wrapProcessor.apply(new PaxosBackedProcessor(log)); - fetchLogHandler = new FetchCMSLog.Handler(); } + fetchLogHandler = new FetchCMSLog.Handler(); + Commit.Replicator replicator = CassandraRelevantProperties.TCM_USE_NO_OP_REPLICATOR.getBoolean() ? Commit.Replicator.NO_OP : new Commit.DefaultReplicator(() -> log.metadata().directory); @@ -825,6 +825,7 @@ public boolean commitsPaused() { return commitsPaused.get(); } + /** * Switchable implementation that allow us to go between local and remote implementation whenever we need it. * When the node becomes a member of CMS, it switches back to being a regular member of a cluster, and all @@ -902,9 +903,16 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return delegate().fetchLogAndWait(waitFor, retryPolicy); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return delegate().getLocalState(start, end, includeSnapshot, retryPolicy); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return delegate().reconstruct(lowEpoch, highEpoch, retryPolicy); + return delegate().getLogState(start, end, includeSnapshot, retryPolicy); } public String toString() diff --git a/src/java/org/apache/cassandra/tcm/Epoch.java b/src/java/org/apache/cassandra/tcm/Epoch.java index d15030e3ec9a..eeeefac35961 100644 --- a/src/java/org/apache/cassandra/tcm/Epoch.java +++ b/src/java/org/apache/cassandra/tcm/Epoch.java @@ -57,6 +57,7 @@ public long serializedSize(Epoch t, int version) }; public static final Epoch FIRST = new Epoch(1); + public static final Epoch MAX = new Epoch(Long.MAX_VALUE); public static final Epoch EMPTY = new Epoch(0); public static final Epoch UPGRADE_STARTUP = new Epoch(Long.MIN_VALUE); public static final Epoch UPGRADE_GOSSIP = new Epoch(Long.MIN_VALUE + 1); diff --git a/src/java/org/apache/cassandra/tcm/FetchCMSLog.java b/src/java/org/apache/cassandra/tcm/FetchCMSLog.java index 38ef550ba587..3878a9c4cb38 100644 --- a/src/java/org/apache/cassandra/tcm/FetchCMSLog.java +++ b/src/java/org/apache/cassandra/tcm/FetchCMSLog.java @@ -19,11 +19,13 @@ package org.apache.cassandra.tcm; import java.io.IOException; -import java.util.function.BiFunction; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -32,7 +34,6 @@ import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.FBUtilities; @@ -89,16 +90,16 @@ static class Handler implements IVerbHandler * to node-local (which only relevant in cases of CMS expansions/shrinks, and can only be requested by the * CMS node that collects the highest epoch from the quorum of peers). */ - private final BiFunction logStateSupplier; + private final Supplier processor; public Handler() { - this(DistributedMetadataLogKeyspace::getLogState); + this(() -> ClusterMetadataService.instance().processor()); } - public Handler(BiFunction logStateSupplier) + public Handler(Supplier processor) { - this.logStateSupplier = logStateSupplier; + this.processor = processor; } public void doVerb(Message message) throws IOException @@ -114,7 +115,14 @@ public void doVerb(Message message) throws IOException // If both we and the other node believe it should be caught up with a linearizable read boolean consistentFetch = request.consistentFetch && !ClusterMetadataService.instance().isCurrentMember(message.from()); - LogState delta = logStateSupplier.apply(message.payload.lowerBound, consistentFetch); + Retry.Deadline retry = Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), + TCMMetrics.instance.fetchLogRetries); + LogState delta; + if (consistentFetch) + delta = processor.get().getLogState(message.payload.lowerBound, Epoch.MAX, false, retry); + else + delta = processor.get().getLocalState(message.payload.lowerBound, Epoch.MAX, false, retry); + TCMMetrics.instance.cmsLogEntriesServed(message.payload.lowerBound, delta.latestEpoch()); logger.info("Responding to {}({}) with log delta: {}", message.from(), request, delta); MessagingService.instance().send(message.responseWith(delta), message.from()); diff --git a/src/java/org/apache/cassandra/tcm/FetchPeerLog.java b/src/java/org/apache/cassandra/tcm/FetchPeerLog.java index 1347dcf049ee..1e79d6cb7c8a 100644 --- a/src/java/org/apache/cassandra/tcm/FetchPeerLog.java +++ b/src/java/org/apache/cassandra/tcm/FetchPeerLog.java @@ -19,10 +19,12 @@ package org.apache.cassandra.tcm; import java.io.IOException; +import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -31,7 +33,6 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.tcm.log.LogState; -import org.apache.cassandra.tcm.log.LogStorage; public class FetchPeerLog { @@ -82,7 +83,11 @@ public void doVerb(Message message) throws IOException ClusterMetadata metadata = ClusterMetadata.current(); logger.info("Received peer log fetch request {} from {}: start = {}, current = {}", request, message.from(), message.payload.start, metadata.epoch); - LogState delta = LogStorage.SystemKeyspace.getLogState(message.payload.start); + LogState delta = ClusterMetadataService.instance() + .processor() + .getLocalState(message.payload.start, Epoch.MAX, false, + Retry.Deadline.after(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), + new Retry.Jitter(TCMMetrics.instance.fetchLogRetries))); TCMMetrics.instance.peerLogEntriesServed(message.payload.start, delta.latestEpoch()); logger.info("Responding with log delta: {}", delta); MessagingService.instance().send(message.responseWith(delta), message.from()); diff --git a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java index dbaac24041a0..dcdca627dbc6 100644 --- a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java @@ -167,9 +167,16 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy throw new ReadTimeoutException(ConsistencyLevel.QUORUM, blockFor - collected.size(), blockFor, false); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return log.storage().getLogState(start, end, includeSnapshot); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return DistributedMetadataLogKeyspace.getLogState(lowEpoch, highEpoch); + return DistributedMetadataLogKeyspace.getLogState(start, end, includeSnapshot); } private static T unwrap(Promise promise) diff --git a/src/java/org/apache/cassandra/tcm/PeerLogFetcher.java b/src/java/org/apache/cassandra/tcm/PeerLogFetcher.java index 3564ab93f70f..7192551c6898 100644 --- a/src/java/org/apache/cassandra/tcm/PeerLogFetcher.java +++ b/src/java/org/apache/cassandra/tcm/PeerLogFetcher.java @@ -108,7 +108,7 @@ private Future fetchLogEntriesAndWaitInternal(InetAddressAndPor } else { - throw new IllegalStateException(String.format("Queried for epoch %s, but could not catch up", awaitAtleast)); + throw new IllegalStateException(String.format("Queried for epoch %s, but could not catch up. Current epoch: %s", awaitAtleast, fetched.epoch)); } }); diff --git a/src/java/org/apache/cassandra/tcm/Processor.java b/src/java/org/apache/cassandra/tcm/Processor.java index 168b7f9c786b..2791e014ab0c 100644 --- a/src/java/org/apache/cassandra/tcm/Processor.java +++ b/src/java/org/apache/cassandra/tcm/Processor.java @@ -24,14 +24,14 @@ import java.util.concurrent.TimeUnit; import com.codahale.metrics.Meter; + +import accord.utils.Invariants; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.Clock; -import static java.util.concurrent.TimeUnit.NANOSECONDS; - public interface Processor { /** @@ -109,23 +109,36 @@ default ClusterMetadata fetchLogAndWait(Epoch waitFor) ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy); - LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy); + /** + * Queries node's _local_ state. It is not guaranteed to be contiguous, but can be used for restoring CMS state/ + */ + LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy); - default List reconstructFull(Epoch lowEpoch, Epoch highEpoch) + /** + * Queries global log state. + */ + LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy); + + /** + * Reconstructs + */ + default List reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) { - LogState logState = reconstruct(lowEpoch, highEpoch, Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), - TCMMetrics.instance.commitRetries)); + LogState logState = getLogState(lowEpoch, highEpoch, true, retryPolicy); if (logState.isEmpty()) return Collections.emptyList(); List cms = new ArrayList<>(logState.entries.size()); - ClusterMetadata accum = logState.baseState; - cms.add(accum); + + ClusterMetadata acc = logState.baseState; + cms.add(acc); for (Entry entry : logState.entries) { - Transformation.Result res = entry.transform.execute(accum); + Invariants.checkState(entry.epoch.isDirectlyAfter(acc.epoch), "%s should have been directly after %s", entry.epoch, acc.epoch); + Transformation.Result res = entry.transform.execute(acc); assert res.isSuccess() : res.toString(); - accum = res.success().metadata; - cms.add(accum); + acc = res.success().metadata; + cms.add(acc); } return cms; } + } diff --git a/src/java/org/apache/cassandra/tcm/ReconstructLogState.java b/src/java/org/apache/cassandra/tcm/ReconstructLogState.java index f6a60f070a04..c8930853ad99 100644 --- a/src/java/org/apache/cassandra/tcm/ReconstructLogState.java +++ b/src/java/org/apache/cassandra/tcm/ReconstructLogState.java @@ -19,7 +19,11 @@ package org.apache.cassandra.tcm; import java.io.IOException; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -27,7 +31,6 @@ import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.schema.DistributedMetadataLogKeyspace; import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.utils.FBUtilities; @@ -37,11 +40,13 @@ public class ReconstructLogState public final Epoch lowerBound; public final Epoch higherBound; + public final boolean includeSnapshot; - public ReconstructLogState(Epoch lowerBound, Epoch higherBound) + public ReconstructLogState(Epoch lowerBound, Epoch higherBound, boolean includeSnapshot) { this.lowerBound = lowerBound; this.higherBound = higherBound; + this.includeSnapshot = includeSnapshot; } static class Serializer implements IVersionedSerializer @@ -51,19 +56,21 @@ public void serialize(ReconstructLogState t, DataOutputPlus out, int version) th { Epoch.serializer.serialize(t.lowerBound, out); Epoch.serializer.serialize(t.higherBound, out); + out.writeBoolean(t.includeSnapshot); } public ReconstructLogState deserialize(DataInputPlus in, int version) throws IOException { Epoch lowerBound = Epoch.serializer.deserialize(in); Epoch higherBound = Epoch.serializer.deserialize(in); - return new ReconstructLogState(lowerBound, higherBound); + return new ReconstructLogState(lowerBound, higherBound, in.readBoolean()); } public long serializedSize(ReconstructLogState t, int version) { return Epoch.serializer.serializedSize(t.lowerBound) + - Epoch.serializer.serializedSize(t.higherBound); + Epoch.serializer.serializedSize(t.higherBound) + + TypeSizes.BOOL_SIZE; } } @@ -71,6 +78,16 @@ public static class Handler implements IVerbHandler { public static final Handler instance = new Handler(); + private final Supplier processor; + + public Handler() + { + this(() -> ClusterMetadataService.instance().processor()); + } + public Handler(Supplier processor) + { + this.processor = processor; + } public void doVerb(Message message) throws IOException { TCMMetrics.instance.reconstructLogStateCall.mark(); @@ -79,7 +96,10 @@ public void doVerb(Message message) throws IOException if (!ClusterMetadataService.instance().isCurrentMember(FBUtilities.getBroadcastAddressAndPort())) throw new NotCMSException("This node is not in the CMS, can't generate a consistent log fetch response to " + message.from()); - LogState result = DistributedMetadataLogKeyspace.getLogState(request.lowerBound, request.higherBound); + LogState result = processor.get().getLogState(request.lowerBound, request.higherBound, request.includeSnapshot, + Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), + TCMMetrics.instance.fetchLogRetries)); + MessagingService.instance().send(message.responseWith(result), message.from()); } } diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index 635be54cf9e2..e9417adfec10 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -152,7 +152,13 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return log.getLocalEntries(start, end, includeSnapshot); + } + + @Override + public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) { try { @@ -160,9 +166,9 @@ public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retr List candidates = new ArrayList<>(log.metadata().fullCMSMembers()); sendWithCallbackAsync(request, Verb.TCM_RECONSTRUCT_EPOCH_REQ, - new ReconstructLogState(lowEpoch, highEpoch), + new ReconstructLogState(lowEpoch, highEpoch, includeSnapshot), new CandidateIterator(candidates), - new Retry.Backoff(TCMMetrics.instance.fetchLogRetries)); + retryPolicy); return request.get(retryPolicy.remainingNanos(), TimeUnit.NANOSECONDS); } catch (InterruptedException e) @@ -187,8 +193,7 @@ public static ClusterMetadata fetchLogAndWait(CandidateIterator candidateIterato } } - private static Future fetchLogAndWaitInternal(CandidateIterator candidates, - LocalLog log) + private static Future fetchLogAndWaitInternal(CandidateIterator candidates, LocalLog log) { try (Timer.Context ctx = TCMMetrics.instance.fetchCMSLogLatency.time()) { diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index fc89ec79d9d1..30e7f52e0be6 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -153,7 +153,14 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy throw new UnsupportedOperationException(); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + throw new UnsupportedOperationException(); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { throw new UnsupportedOperationException(); } diff --git a/src/java/org/apache/cassandra/tcm/log/LocalLog.java b/src/java/org/apache/cassandra/tcm/log/LocalLog.java index 32f775096bfd..01b6c22016f1 100644 --- a/src/java/org/apache/cassandra/tcm/log/LocalLog.java +++ b/src/java/org/apache/cassandra/tcm/log/LocalLog.java @@ -361,9 +361,9 @@ public LogState getLocalEntries(Epoch since) return storage.getLogState(since, false); } - public LogState getLocalEntries(Epoch since, Epoch until) + public LogState getLocalEntries(Epoch since, Epoch until, boolean includeSnapshot) { - return storage.getLogState(since, until); + return storage.getLogState(since, until, includeSnapshot); } public ClusterMetadata waitForHighestConsecutive() diff --git a/src/java/org/apache/cassandra/tcm/log/LogReader.java b/src/java/org/apache/cassandra/tcm/log/LogReader.java index b1e7ab326419..effc4d756145 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogReader.java +++ b/src/java/org/apache/cassandra/tcm/log/LogReader.java @@ -120,7 +120,7 @@ else if (!allowSnapshots) } } - default LogState getLogState(Epoch start, Epoch end) + default LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot) { try { @@ -136,7 +136,7 @@ default LogState getLogState(Epoch start, Epoch end) { if (entry.epoch.isAfter(start)) entries.add(entry); - else + else if (includeSnapshot) closestSnapshot = entry.transform.execute(closestSnapshot).success().metadata; } return new LogState(closestSnapshot, entries.build()); diff --git a/src/java/org/apache/cassandra/tcm/log/LogStorage.java b/src/java/org/apache/cassandra/tcm/log/LogStorage.java index 7772d7d07e70..e739a8aae799 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogStorage.java +++ b/src/java/org/apache/cassandra/tcm/log/LogStorage.java @@ -57,7 +57,7 @@ public LogState getLogState(Epoch startEpoch, boolean allowSnapshots) } @Override - public LogState getLogState(Epoch start, Epoch end) + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot) { return LogState.EMPTY; } diff --git a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java index 6c02318f4806..36baa59eb307 100644 --- a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java +++ b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java @@ -41,7 +41,14 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return ClusterMetadata.current(); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + throw new IllegalStateException("Can't reconstruct log state when running in gossip mode. Enable the ClusterMetadataService with `nodetool addtocms`."); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { throw new IllegalStateException("Can't reconstruct log state when running in gossip mode. Enable the ClusterMetadataService with `nodetool addtocms`."); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/CASTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/CASTestBase.java index 58b47cc9b79e..1c441ded3ef7 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/CASTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/CASTestBase.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.junit.Assert; @@ -185,6 +186,8 @@ public static void addToRing(boolean bootstrapping, IInstance peer) public static void assertVisibleInRing(IInstance peer) { InetAddressAndPort endpoint = InetAddressAndPort.getByAddress(peer.broadcastAddress()); + long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(30); + while (System.nanoTime() < deadline && !Gossiper.instance.isAlive(endpoint)); Assert.assertTrue(Gossiper.instance.isAlive(endpoint)); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java b/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java index 4fb99e62a4be..175f70c7973f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java +++ b/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java @@ -194,7 +194,6 @@ public void paxosRepairPreventsStaleReproposal() throws Throwable Ballot staleBallot = Paxos.newBallot(Ballot.none(), org.apache.cassandra.db.ConsistencyLevel.SERIAL); try (Cluster cluster = init(Cluster.create(3, cfg -> cfg .set("paxos_variant", "v2") - .set("accord.enabled", false) // this test monkeys with TCM which can cause confussion for Accord while it fetches epochs... .set("paxos_purge_grace_period", "0s") .set("truncate_request_timeout_in_ms", 1000L))) ) diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java index 458dbe13ba98..42cd999eb774 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java @@ -734,6 +734,7 @@ public void init() log, new Processor() { + @Override public Commit.Result commit(Entry.Id entryId, Transformation event, Epoch lastKnown, Retry.Deadline retryPolicy) { if (lastKnown == null) @@ -747,6 +748,7 @@ public Commit.Result commit(Entry.Id entryId, Transformation event, Epoch lastKn return result; } + @Override public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy) { Epoch since = log.waitForHighestConsecutive().epoch; @@ -755,9 +757,16 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return log.waitForHighestConsecutive(); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return log.getLocalEntries(lowEpoch, highEpoch); + return getLogState(start, end, includeSnapshot, retryPolicy); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return log.getLocalEntries(start, end, includeSnapshot); } }, (a,b) -> {}, diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java index 5166ae89981a..a38050a8952f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/ReconstructEpochTest.java @@ -56,11 +56,11 @@ public void logReaderTest() throws Exception for (int[] cfg : new int[][]{ new int[]{ 6, 9 }, new int[]{ 2, 20 }, new int[]{ 5, 5 }, - new int[]{ 15, 20 }}) + new int[]{ 15, 20 } }) { int start = cfg[0]; int end = cfg[1]; - LogState logState = DistributedMetadataLogKeyspace.getLogState(Epoch.create(start), Epoch.create(end)); + LogState logState = DistributedMetadataLogKeyspace.getLogState(Epoch.create(start), Epoch.create(end), true); Assert.assertEquals(start, logState.baseState.epoch.getEpoch()); Iterator iter = logState.entries.iterator(); for (int i = start + 1; i <= end; i++) @@ -73,14 +73,15 @@ public void logReaderTest() throws Exception for (int[] cfg : new int[][]{ new int[]{ 6, 9 }, new int[]{ 2, 20 }, new int[]{ 5, 5 }, - new int[]{ 15, 20 }}) + new int[]{ 15, 20 } }) { int start = cfg[0]; int end = cfg[1]; LogState logState = ClusterMetadataService.instance() .processor() - .reconstruct(Epoch.create(start), + .getLogState(Epoch.create(start), Epoch.create(end), + true, unsafeRetryIndefinitely()); Assert.assertEquals(start, logState.baseState.epoch.getEpoch()); diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java index 6f359af057c9..6ee5e975eaa0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java @@ -70,9 +70,16 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy return delegate.fetchLogAndWait(waitFor, retryPolicy); } - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + @Override + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return delegate.getLocalState(start, end, includeSnapshot, retryPolicy); + } + + @Override + public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return delegate.reconstruct(lowEpoch, highEpoch, retryPolicy); + return delegate.getLogState(start, end, includeSnapshot, retryPolicy); } protected void waitIfPaused() diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java index 57a1efa5e98b..9e3f6ee82f63 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java @@ -530,8 +530,11 @@ public String toString() public void close() throws Exception { epochHistory = cluster.get(cmsGroup[0]).callOnInstance(() -> { - LogState all = ClusterMetadataService.instance().processor().reconstruct(Epoch.EMPTY, Epoch.create(Long.MAX_VALUE), Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), - TCMMetrics.instance.commitRetries)); + LogState all = ClusterMetadataService.instance() + .processor() + .getLogState(Epoch.EMPTY, Epoch.create(Long.MAX_VALUE), false, + Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), + TCMMetrics.instance.commitRetries)); StringBuilder sb = new StringBuilder("Epochs:"); for (Entry e : all.entries) sb.append("\n").append(e.epoch.getEpoch()).append(": ").append(e.transform); diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index ef0e264def85..b7b2fecb5762 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -180,7 +180,8 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie if (fieldUpdates.addHistoricalTransactions != null) updates.historicalTransactionsAccumulator.update(Pair.create(fieldUpdates.addHistoricalTransactions.range, fieldUpdates.addHistoricalTransactions.deps)); - onFlush.run(); + if (onFlush != null) + onFlush.run(); } /** diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java index 3cfc1c91c590..0108cd9cac96 100644 --- a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -89,8 +89,9 @@ public void serde() Gen gen = AccordGenerators.commandsBuilder(); try (DataOutputBuffer out = new DataOutputBuffer()) { - qt().forAll(gen).withSeed(3447978952908153749L).check(cmdBuilder -> { - int userVersion = 1; //TODO (maintance): where can we fetch all supported versions? + qt().forAll(gen) + .check(cmdBuilder -> { + int userVersion = 1; //TODO (maintenance): where can we fetch all supported versions? SoftAssertions checks = new SoftAssertions(); for (SaveStatus saveStatus : SaveStatus.values()) { @@ -138,9 +139,10 @@ private void assertMissing(int flags, Set missing) checks.assertThat(SavedCommand.getFieldChanged(field, flags)) .describedAs("field %s changed", field) .isFalse(); + // Is null flag can not be set on a field that has not changed checks.assertThat(SavedCommand.getFieldIsNull(field, flags)) .describedAs("field %s not null", field) - .isTrue(); + .isFalse(); } checks.assertAll(); } diff --git a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java index 128fdeca7b71..0d7bbf7f8e2f 100644 --- a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java +++ b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java @@ -132,7 +132,13 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return getLogState(lowEpoch, highEpoch, includeSnapshot, retryPolicy); + } + + @Override + public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) { if (!epochs.containsKey(lowEpoch)) throw new AssertionError("Unknown epoch: " + lowEpoch); From 5afe99b7cae96f3cee61496466a243b655108ee0 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 21 Oct 2024 19:59:06 -0700 Subject: [PATCH 178/340] Accord metrics are isolated which cause existing coordination metrics to be empty, should also populate there as well patch by David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-20017 --- .../cql3/statements/TransactionStatement.java | 117 ++++++++---------- .../service/accord/AccordService.java | 34 ++++- .../CoordinatorReadLatencyMetricTest.java | 78 +++++++++++- 3 files changed, 156 insertions(+), 73 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index eee992ef7502..4f567cb95fc0 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -38,8 +38,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import accord.api.Key; import accord.primitives.Keys; @@ -96,8 +94,6 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, CQLStatement.ReturningCQLStatement { - private static final Logger logger = LoggerFactory.getLogger(TransactionStatement.class); - public static final String DUPLICATE_TUPLE_NAME_MESSAGE = "The name '%s' has already been used by a LET assignment."; public static final String INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE = "SELECT must specify either all partition key elements. Partition key elements must be always specified with equality operators; %s %s"; public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; @@ -381,82 +377,73 @@ public ResultMessage execute(QueryState state, QueryOptions options, Dispatcher. { checkTrue(DatabaseDescriptor.getAccordTransactionsEnabled(), TRANSACTIONS_DISABLED_MESSAGE); - try - { - // check again since now we have query options; note that statements are quaranted to be single partition reads at this point - for (NamedSelect assignment : assignments) - checkFalse(isSelectingMultipleClusterings(assignment.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment", assignment.select.source); + // check again since now we have query options; note that statements are quaranted to be single partition reads at this point + for (NamedSelect assignment : assignments) + checkFalse(isSelectingMultipleClusterings(assignment.select, options), INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE, "LET assignment", assignment.select.source); - Txn txn = createTxn(state.getClientState(), options); + Txn txn = createTxn(state.getClientState(), options); - TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); - if (txnResult.kind() == retry_new_protocol) - throw new InvalidRequestException(UNSUPPORTED_MIGRATION); - TxnData data = (TxnData)txnResult; + TxnResult txnResult = AccordService.instance().coordinate(txn, options.getConsistency(), requestTime); + if (txnResult.kind() == retry_new_protocol) + throw new InvalidRequestException(UNSUPPORTED_MIGRATION); + TxnData data = (TxnData)txnResult; - if (returningSelect != null) + if (returningSelect != null) + { + @SuppressWarnings("unchecked") + SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) returningSelect.select.getQuery(options, 0); + Selection.Selectors selectors = returningSelect.select.getSelection().newSelectors(options); + ResultSetBuilder result = new ResultSetBuilder(resultMetadata, selectors, false); + if (selectQuery.queries.size() == 1) + { + FilteredPartition partition = data.get(TxnDataName.returning()); + boolean reversed = selectQuery.queries.get(0).isReversed(); + if (partition != null) + returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, FBUtilities.nowInSeconds()); + } + else { - @SuppressWarnings("unchecked") - SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) returningSelect.select.getQuery(options, 0); - Selection.Selectors selectors = returningSelect.select.getSelection().newSelectors(options); - ResultSetBuilder result = new ResultSetBuilder(resultMetadata, selectors, false); - if (selectQuery.queries.size() == 1) + long nowInSec = FBUtilities.nowInSeconds(); + for (int i = 0; i < selectQuery.queries.size(); i++) { - FilteredPartition partition = data.get(TxnDataName.returning()); - boolean reversed = selectQuery.queries.get(0).isReversed(); + FilteredPartition partition = data.get(TxnDataName.returning(i)); + boolean reversed = selectQuery.queries.get(i).isReversed(); if (partition != null) - returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, FBUtilities.nowInSeconds()); - } - else - { - long nowInSec = FBUtilities.nowInSeconds(); - for (int i = 0; i < selectQuery.queries.size(); i++) - { - FilteredPartition partition = data.get(TxnDataName.returning(i)); - boolean reversed = selectQuery.queries.get(i).isReversed(); - if (partition != null) - returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, nowInSec); - } + returningSelect.select.processPartition(partition.rowIterator(reversed), options, result, nowInSec); } - return new ResultMessage.Rows(result.build()); } + return new ResultMessage.Rows(result.build()); + } - if (returningReferences != null) - { - List> resultType = new ArrayList<>(returningReferences.size()); - List columns = new ArrayList<>(returningReferences.size()); - - for (RowDataReference reference : returningReferences) - { - ColumnMetadata forMetadata = reference.toResultMetadata(); - resultType.add(forMetadata.type); - columns.add(reference.column()); - } + if (returningReferences != null) + { + List> resultType = new ArrayList<>(returningReferences.size()); + List columns = new ArrayList<>(returningReferences.size()); - ResultSetBuilder result = new ResultSetBuilder(resultMetadata, Selection.noopSelector(), false); - result.newRow(options.getProtocolVersion(), null, null, columns); + for (RowDataReference reference : returningReferences) + { + ColumnMetadata forMetadata = reference.toResultMetadata(); + resultType.add(forMetadata.type); + columns.add(reference.column()); + } - for (int i = 0; i < returningReferences.size(); i++) - { - RowDataReference reference = returningReferences.get(i); - TxnReference txnReference = reference.toTxnReference(options); - ByteBuffer buffer = txnReference.toByteBuffer(data, resultType.get(i)); - result.add(buffer); - } + ResultSetBuilder result = new ResultSetBuilder(resultMetadata, Selection.noopSelector(), false); + result.newRow(options.getProtocolVersion(), null, null, columns); - return new ResultMessage.Rows(result.build()); + for (int i = 0; i < returningReferences.size(); i++) + { + RowDataReference reference = returningReferences.get(i); + TxnReference txnReference = reference.toTxnReference(options); + ByteBuffer buffer = txnReference.toByteBuffer(data, resultType.get(i)); + result.add(buffer); } - // In the case of a write-only transaction, just return and empty result. - // TODO: This could be modified to return an indication of whether a condition (if present) succeeds. - return new ResultMessage.Void(); - } - catch (Throwable t) - { - //TODO remove before merge to trunk - logger.error("Unexpected error with transaction: {}", t.toString()); - throw t; + return new ResultMessage.Rows(result.build()); } + + // In the case of a write-only transaction, just return and empty result. + // TODO: This could be modified to return an indication of whether a condition (if present) succeeds. + return new ResultMessage.Void(); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 85f491f0f4f6..167428a759be 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -128,6 +128,8 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.metrics.TCMMetrics; +import org.apache.cassandra.metrics.ClientRequestMetrics; +import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; @@ -874,12 +876,24 @@ public TopologyManager topology() public @Nonnull AsyncTxnResult coordinateAsync(Txn txn, ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { TxnId txnId = node.nextTxnId(txn.kind(), txn.keys().domain()); - AccordClientRequestMetrics metrics = txn.isWrite() ? accordWriteMetrics : accordReadMetrics; + ClientRequestMetrics sharedMetrics; + AccordClientRequestMetrics metrics; + if (txn.isWrite()) + { + sharedMetrics = ClientRequestsMetricsHolder.writeMetrics; + metrics = accordWriteMetrics; + } + else + { + sharedMetrics = ClientRequestsMetricsHolder.readMetrics; + metrics = accordReadMetrics; + } metrics.keySize.update(txn.keys().size()); AsyncResult asyncResult = node.coordinate(txnId, txn); AsyncTxnResult asyncTxnResult = new AsyncTxnResult(txnId); asyncResult.addCallback((success, failure) -> { long durationNanos = nanoTime() - requestTime.startedAtNanos(); + sharedMetrics.addNano(durationNanos); metrics.addNano(durationNanos); Throwable cause = failure != null ? Throwables.getRootCause(failure) : null; if (success != null) @@ -902,6 +916,7 @@ public TopologyManager topology() } if (cause instanceof Preempted || cause instanceof Invalidated) { + sharedMetrics.timeouts.mark(); metrics.preempted.mark(); //TODO need to improve // Coordinator "could" query the accord state to see whats going on but that doesn't exist yet. @@ -909,6 +924,7 @@ public TopologyManager topology() asyncTxnResult.tryFailure(newPreempted(txnId, txn.isWrite(), consistencyLevel)); return; } + sharedMetrics.failures.mark(); if (cause instanceof TopologyMismatch) { metrics.topologyMismatches.mark(); @@ -924,7 +940,18 @@ public TopologyManager topology() @Override public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @Nullable ConsistencyLevel consistencyLevel, Dispatcher.RequestTime requestTime) { - AccordClientRequestMetrics metrics = isWrite ? accordWriteMetrics : accordReadMetrics; + ClientRequestMetrics sharedMetrics; + AccordClientRequestMetrics metrics; + if (isWrite) + { + sharedMetrics = ClientRequestsMetricsHolder.writeMetrics; + metrics = accordWriteMetrics; + } + else + { + sharedMetrics = ClientRequestsMetricsHolder.readMetrics; + metrics = accordReadMetrics; + } try { long deadlineNanos = requestTime.computeDeadline(DatabaseDescriptor.getTransactionTimeout(NANOSECONDS)); @@ -939,6 +966,7 @@ public TxnResult getTxnResult(AsyncTxnResult asyncTxnResult, boolean isWrite, @N { // Mark here instead of in coordinate async since this is where the request timeout actually occurs metrics.timeouts.mark(); + sharedMetrics.timeouts.mark(); cause.addSuppressed(e); throw (RequestTimeoutException) cause; } @@ -950,11 +978,13 @@ else if (cause instanceof RuntimeException) catch (InterruptedException e) { metrics.failures.mark(); + sharedMetrics.failures.mark(); throw new UncheckedInterruptedException(e); } catch (TimeoutException e) { metrics.timeouts.mark(); + sharedMetrics.timeouts.mark(); throw newTimeout(asyncTxnResult.txnId, isWrite, consistencyLevel); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/metrics/CoordinatorReadLatencyMetricTest.java b/test/distributed/org/apache/cassandra/distributed/test/metrics/CoordinatorReadLatencyMetricTest.java index ab3de57cb66b..31de169030a5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/metrics/CoordinatorReadLatencyMetricTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/metrics/CoordinatorReadLatencyMetricTest.java @@ -18,23 +18,67 @@ package org.apache.cassandra.distributed.test.metrics; +import java.io.IOException; import java.util.concurrent.TimeUnit; +import java.util.function.LongSupplier; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.cassandra.config.Config; +import org.apache.cassandra.cql3.ast.Conditional; +import org.apache.cassandra.cql3.ast.Select; +import org.apache.cassandra.cql3.ast.Txn; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.metrics.ClientRequestsMetricsHolder; +import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.paxos.Paxos; import static org.junit.Assert.assertTrue; public class CoordinatorReadLatencyMetricTest extends TestBaseImpl { + @Test + public void singleRowTest() throws IOException + { + try (Cluster cluster = init(builder().withNodes(1).start())) + { + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck int, v int, PRIMARY KEY (pk, ck))")); + for (int i = 0; i < 100; i++) + cluster.coordinator(1).execute(withKeyspace("insert into %s.tbl (pk, ck ,v) values (0, ?, 1)"), ConsistencyLevel.ALL, i); + + var select = Select.builder() + //TODO (now, correctness, coverage): count(v) breaks accord as we get mutliple rows rather than the count of rows... +// .withSelection(FunctionCall.count("v")) + .table(KEYSPACE, "tbl") + .value("pk", 0) + .where("ck", Conditional.Where.Inequality.LESS_THAN, 42) + .limit(1) + .build(); + + verifyTableLatency(cluster, 1, () -> verifyLatencyMetrics(cluster, select.toCQL(), ConsistencyLevel.QUORUM)); + cluster.get(1).runOnInstance(() -> Paxos.setPaxosVariant(Config.PaxosVariant.v1)); + verifyTableLatency(cluster, 1, () -> verifyLatencyMetrics(cluster, select.toCQL(), ConsistencyLevel.SERIAL)); + cluster.get(1).runOnInstance(() -> Paxos.setPaxosVariant(Config.PaxosVariant.v2)); + verifyTableLatency(cluster, 1, () -> verifyLatencyMetrics(cluster, select.toCQL(), ConsistencyLevel.SERIAL)); + + cluster.schemaChange(withKeyspace("ALTER TABLE %s.tbl WITH " + TransactionalMode.full.asCqlParam())); + verifyTableLatency(cluster, 1, () -> verifyLatencyMetrics(cluster, Txn.wrap(select).toCQL(), ConsistencyLevel.QUORUM)); + + var let = Txn.builder() + .addLet("a", select) + .addReturnReferences("a.v") + .build(); + verifyTableLatency(cluster, 1, () -> verifyLatencyMetrics(cluster, let.toCQL(), ConsistencyLevel.QUORUM)); + } + } + @Test public void internalPagingWithAggregateTest() throws Throwable { @@ -91,16 +135,26 @@ public void multiplePartitionKeyInClauseTest() throws Throwable } } - private void verifyLatencyMetricsWhenPaging(Cluster cluster, - int pagesize, - int expectedQueries, - String query, - ConsistencyLevel consistencyLevel) + private static void verifyLatencyMetricsWhenPaging(Cluster cluster, + int pagesize, + int expectedQueries, + String query, + ConsistencyLevel consistencyLevel) + { + verifyLatencyMetrics(cluster, expectedQueries, () -> cluster.coordinator(1).executeWithPaging(query, consistencyLevel, pagesize)); + } + + private static void verifyLatencyMetrics(Cluster cluster, String query, ConsistencyLevel consistencyLevel) + { + verifyLatencyMetrics(cluster, 1, () -> cluster.coordinator(1).execute(query, consistencyLevel)); + } + + private static void verifyLatencyMetrics(Cluster cluster, int expectedQueries, Runnable query) { long countBefore = cluster.get(1).callOnInstance(() -> ClientRequestsMetricsHolder.readMetrics.latency.getCount()); long totalLatencyBefore = cluster.get(1).callOnInstance(() -> ClientRequestsMetricsHolder.readMetrics.totalLatency.getCount()); long startTime = System.nanoTime(); - cluster.coordinator(1).executeWithPaging(query, consistencyLevel, pagesize); + query.run(); long elapsedTime = System.nanoTime() - startTime; long countAfter = cluster.get(1).callOnInstance(() -> ClientRequestsMetricsHolder.readMetrics.latency.getCount()); long totalLatencyAfter = cluster.get(1).callOnInstance(() -> ClientRequestsMetricsHolder.readMetrics.totalLatency.getCount()); @@ -113,4 +167,16 @@ private void verifyLatencyMetricsWhenPaging(Cluster cluster, totalLatencyRecorded <= elapsedTime); } + private static void verifyTableLatency(Cluster cluster, int expectedQueries, Runnable query) + { + IInvokableInstance inst = cluster.get(1); + LongSupplier tableMetric = () -> inst.callOnInstance(() -> Keyspace.open("distributed_test_keyspace").getColumnFamilyStore("tbl").getMetrics().readLatency.latency.getCount()); + + long tableBefore = tableMetric.getAsLong(); + query.run(); + long tableAfter = tableMetric.getAsLong(); + + Assertions.assertThat(tableAfter - tableBefore).isEqualTo(expectedQueries); + } + } From 1f80f99b710bd7a0809b06215f654367ff3cd09f Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 23 Oct 2024 09:56:01 -0700 Subject: [PATCH 179/340] Accord should block currently unsafe operations patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20020 --- .../cql3/statements/SelectStatement.java | 7 +- .../cql3/statements/TransactionStatement.java | 49 ++++-- .../statements/TransactionStatementTest.java | 153 +++++++++++++++++- 3 files changed, 192 insertions(+), 17 deletions(-) diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 4b104e0a79de..45f2499af7c7 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -422,6 +422,11 @@ public AggregationSpecification getAggregationSpec(QueryOptions options) return aggregationSpecFactory == null ? null : aggregationSpecFactory.newInstance(options); } + public boolean hasAggregation() + { + return aggregationSpecFactory != null; + } + public ReadQuery getQuery(QueryOptions options, long nowInSec) throws RequestValidationException { Selectors selectors = selection.newSelectors(options); @@ -1229,7 +1234,7 @@ public static class RawStatement extends QualifiedStatement public final Term.Raw limit; public final Term.Raw perPartitionLimit; private ClientState state; - private final StatementSource source; + public final StatementSource source; public RawStatement(QualifiedName cfName, Parameters parameters, diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index 4f567cb95fc0..3b5bf6697631 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -58,6 +58,7 @@ import org.apache.cassandra.cql3.transactions.SelectReferenceSource; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.db.SinglePartitionReadQuery; +import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.FilteredPartition; import org.apache.cassandra.exceptions.InvalidRequestException; @@ -99,14 +100,19 @@ public class TransactionStatement implements CQLStatement.CompositeCQLStatement, public static final String INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE = "SELECT must specify either all primary key elements or all partition key elements and LIMIT 1. In both cases partition key elements must be always specified with equality operators; %s %s"; public static final String NO_CONDITIONS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify their own conditions; %s statement %s"; public static final String NO_TIMESTAMPS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom timestamps; %s statement %s"; + public static final String NO_TTLS_IN_UPDATES_MESSAGE = "Updates within transactions may not specify custom ttls; %s statement %s"; public static final String TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE = "Accord transactions are disabled on table (See transactional_mode in table options); %s statement %s"; public static final String TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE = "Accord transactions are disabled on table (table is being dropped); %s statement %s"; public static final String NO_COUNTERS_IN_TXNS_MESSAGE = "Counter columns cannot be accessed within a transaction; %s statement %s"; + public static final String NO_AGGREGATION_IN_TXNS_MESSAGE = "No aggregation functions allowed within a transaction; %s statement %s"; + public static final String NO_ORDER_BY_IN_TXNS_MESSAGE = "No ORDER BY clause allowed within a transaction; %s statement %s"; + public static final String NO_GROUP_BY_IN_TXNS_MESSAGE = "No GROUP BY clause allowed within a transaction; %s statement %s"; public static final String EMPTY_TRANSACTION_MESSAGE = "Transaction contains no reads or writes"; public static final String SELECT_REFS_NEED_COLUMN_MESSAGE = "SELECT references must specify a column."; public static final String TRANSACTIONS_DISABLED_MESSAGE = "Accord transactions are disabled. (See accord.enabled in cassandra.yaml)"; public static final String ILLEGAL_RANGE_QUERY_MESSAGE = "Range queries are not allowed for reads within a transaction; %s %s"; public static final String UNSUPPORTED_MIGRATION = "Transaction Statement is unsupported when migrating away from Accord or before migration to Accord is complete for a range"; + public static final String NO_PARTITION_IN_CLAUSE_WITH_LIMIT = "Partition key is present in IN clause and there is a LIMIT... this is currently not supported; %s statement %s"; static class NamedSelect { @@ -465,6 +471,29 @@ public boolean eligibleAsPreparedStatement() return false; } + private static void validate(SelectStatement.RawStatement select) + { + if (select.parameters.orderings != null && !select.parameters.orderings.isEmpty()) + throw invalidRequest(NO_ORDER_BY_IN_TXNS_MESSAGE, "SELECT", select.source); + if (select.parameters.groups != null && !select.parameters.groups.isEmpty()) + throw invalidRequest(NO_GROUP_BY_IN_TXNS_MESSAGE, "SELECT", select.source); + } + + private static void validate(SelectStatement prepared) + { + if (!prepared.table.isAccordEnabled()) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); + if (prepared.table.params.pendingDrop) + throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, "SELECT", prepared.source); + if (prepared.table.isCounter()) + throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); + if (prepared.hasAggregation()) + throw invalidRequest(NO_AGGREGATION_IN_TXNS_MESSAGE, "SELECT", prepared.source); + + if (prepared.getRestrictions().keyIsInRelation()) + checkTrue(prepared.getLimit(null) == DataLimits.NO_LIMIT, NO_PARTITION_IN_CLAUSE_WITH_LIMIT, "SELECT", prepared.source); + } + public static class Parsed extends QualifiedStatement.Composite { private final List assignments; @@ -515,15 +544,10 @@ public CQLStatement prepare(ClientState state) checkNotNull(select.parameters.refName, "Assignments must be named"); TxnDataName name = TxnDataName.user(select.parameters.refName); checkTrue(selectNames.add(name), DUPLICATE_TUPLE_NAME_MESSAGE, name.name()); + validate(select); SelectStatement prepared = select.prepare(bindVariables); - - if (!prepared.table.isAccordEnabled()) - throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); - if (prepared.table.params.pendingDrop) - throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, "SELECT", prepared.source); - if (prepared.table.isCounter()) - throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); + validate(prepared); NamedSelect namedSelect = new NamedSelect(name, prepared); checkAtMostOneRowSpecified(namedSelect.select, "LET assignment " + name.name()); @@ -538,15 +562,9 @@ public CQLStatement prepare(ClientState state) NamedSelect returningSelect = null; if (select != null) { + validate(select); SelectStatement prepared = select.prepare(bindVariables); - - if (!prepared.table.isAccordEnabled()) - throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE, "SELECT", prepared.source); - if (prepared.table.params.pendingDrop) - throw invalidRequest(TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, "SELECT", prepared.source); - if (prepared.table.isCounter()) - throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, "SELECT", prepared.source); - + validate(prepared); returningSelect = new NamedSelect(TxnDataName.returning(), prepared); checkAtMostOnePartitionSpecified(returningSelect.select, "returning select"); } @@ -573,6 +591,7 @@ public CQLStatement prepare(ClientState state) checkFalse(prepared.metadata().params.pendingDrop, TRANSACTIONS_DISABLED_ON_TABLE_BEING_DROPPED_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.hasConditions(), NO_CONDITIONS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); checkFalse(prepared.isTimestampSet(), NO_TIMESTAMPS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); + checkFalse(prepared.attrs.isTimeToLiveSet(), NO_TTLS_IN_UPDATES_MESSAGE, prepared.type, prepared.source); if (prepared.metadata().isCounter()) throw invalidRequest(NO_COUNTERS_IN_TXNS_MESSAGE, prepared.type, prepared.source); diff --git a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java index da7d9213e8b4..2e3371e5429e 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/TransactionStatementTest.java @@ -18,16 +18,20 @@ package org.apache.cassandra.cql3.statements; +import org.apache.cassandra.cql3.ast.*; import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.ast.Conditional.Is; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.transport.Dispatcher; @@ -39,8 +43,13 @@ import static org.apache.cassandra.cql3.statements.TransactionStatement.ILLEGAL_RANGE_QUERY_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PARTITION_KEY_SELECT_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.INCOMPLETE_PRIMARY_KEY_SELECT_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_AGGREGATION_IN_TXNS_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_CONDITIONS_IN_UPDATES_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_GROUP_BY_IN_TXNS_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_ORDER_BY_IN_TXNS_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_PARTITION_IN_CLAUSE_WITH_LIMIT; import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TIMESTAMPS_IN_UPDATES_MESSAGE; +import static org.apache.cassandra.cql3.statements.TransactionStatement.NO_TTLS_IN_UPDATES_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.SELECT_REFS_NEED_COLUMN_MESSAGE; import static org.apache.cassandra.cql3.statements.TransactionStatement.TRANSACTIONS_DISABLED_ON_TABLE_MESSAGE; import static org.apache.cassandra.cql3.statements.UpdateStatement.CANNOT_SET_KEY_WITH_REFERENCE_MESSAGE; @@ -58,6 +67,7 @@ public class TransactionStatementTest private static final TableId TABLE4_ID = TableId.fromString("00000000-0000-0000-0000-000000000004"); private static final TableId TABLE5_ID = TableId.fromString("00000000-0000-0000-0000-000000000005"); private static final TableId TABLE6_ID = TableId.fromString("00000000-0000-0000-0000-000000000006"); + private static final TableId TABLE7_ID = TableId.fromString("00000000-0000-0000-0000-000000000007"); @BeforeClass public static void beforeClass() throws Exception @@ -69,7 +79,18 @@ public static void beforeClass() throws Exception parse("CREATE TABLE tbl3 (k int PRIMARY KEY, \"with spaces\" int, \"with\"\"quote\" int, \"MiXeD_CaSe\" int) WITH transactional_mode = 'full'", "ks").id(TABLE3_ID), parse("CREATE TABLE tbl4 (k int PRIMARY KEY, int_list list) WITH transactional_mode = 'full'", "ks").id(TABLE4_ID), parse("CREATE TABLE tbl5 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full'", "ks").id(TABLE5_ID), - parse("CREATE TABLE tbl6 (k int PRIMARY KEY, v int) WITH transactional_mode = 'off'", "ks").id(TABLE6_ID)); + parse("CREATE TABLE tbl6 (k int PRIMARY KEY, v int) WITH transactional_mode = 'off'", "ks").id(TABLE6_ID), + parse("CREATE TABLE tbl7 (k int PRIMARY KEY, v vector) WITH transactional_mode = 'full'", "ks").id(TABLE7_ID)); + } + + private static TableMetadata tbl(int num) + { + return Keyspace.open("ks").getColumnFamilyStore("tbl" + num).metadata(); + } + + private static TableMetadata tbl5() + { + return tbl(5); } @Test @@ -342,6 +363,136 @@ public void shouldRejectLetSelectWithIncompletePartitionKey() .hasMessageContaining(String.format(ILLEGAL_RANGE_QUERY_MESSAGE, "LET assignment row1", "at [2:15]")); } + @Test + public void shouldRejectTTL() + { + for (Mutation.Kind kind : Mutation.Kind.values()) + { + if (kind == Mutation.Kind.DELETE) continue; // deletes don't support TTL + Mutation mutation; + switch (kind) + { + case INSERT: + mutation = Mutation.insert(tbl5()) + .value("k", 1) + .value("v", 2) + .ttl(42) + .build(); + break; + case UPDATE: + mutation = Mutation.update(tbl5()) + .value("k", 1) + .set("v", 2) + .ttl(42) + .build(); + break; + default: + throw new UnsupportedOperationException(kind.name()); + } + String query = Txn.wrap(mutation).toCQL(); + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_TTLS_IN_UPDATES_MESSAGE, kind.name(), "at")); + + var txn = Txn.builder() + .addLet("a", Select.builder() + .table(tbl5()) + .value("k", 1) + .build()) + .addIf(new Is("a", Is.Kind.Null), mutation) + .build(); + Assertions.assertThatThrownBy(() -> prepare(txn.toCQL())) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_TTLS_IN_UPDATES_MESSAGE, kind.name(), "at")); + } + } + + @Test + public void shouldRejectAggFunctions() + { + var select = Select.builder() + .selection(FunctionCall.count("v")) + .table(tbl5()) + .value("k",0) + .build(); + + Assertions.assertThatThrownBy(() -> prepare(Txn.wrap(select).toCQL())) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_AGGREGATION_IN_TXNS_MESSAGE, "SELECT", "at")); + + var txn = Txn.builder() + .addLet("a", select) + .addReturnReferences("a.count") + .build(); + + Assertions.assertThatThrownBy(() -> prepare(txn.toCQL())) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_AGGREGATION_IN_TXNS_MESSAGE, "SELECT", "at")); + } + + @Test + public void shouldRejectOrderBy() + { + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl7 WHERE k=0 ORDER BY v ANN OF [42] LIMIT 1;" + + "COMMIT TRANSACTION;"; + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_ORDER_BY_IN_TXNS_MESSAGE, "SELECT", "at")); + + // The below code is left commented out as a reminder to think about this case... As of this writing ORDER BY does not parse in a LET clause... if that is ever fixed we should block it right away! +// String query2 = "BEGIN TRANSACTION\n" + +// " LET a = (SELECT * FROM ks.tbl7 WHERE k=0 ORDER BY v ANN OF [42] LIMIT 1;)" + +// " SELECT a.v" + +// "COMMIT TRANSACTION;"; +// Assertions.assertThatThrownBy(() -> prepare(query2)) +// .isInstanceOf(InvalidRequestException.class) +// .hasMessageContaining(String.format(NO_ORDER_BY_IN_TXNS_MESSAGE, "SELECT", "at")); + } + + @Test + public void shouldRejectGroupBy() + { + String query = "BEGIN TRANSACTION\n" + + " SELECT * FROM ks.tbl1 WHERE k=0 GROUP BY c LIMIT 1;" + + "COMMIT TRANSACTION;"; + Assertions.assertThatThrownBy(() -> prepare(query)) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_GROUP_BY_IN_TXNS_MESSAGE, "SELECT", "at")); + + // The below code is left commented out as a reminder to think about this case... As of this writing GROUP BY does not parse in a LET clause... if that is ever fixed we should block it right away! +// String query2 = "BEGIN TRANSACTION\n" + +// " LET a = (SELECT * FROM ks.tbl1 WHERE k=0 GROUP BY c LIMIT 1;)" + +// " SELECT a.v" + +// "COMMIT TRANSACTION;"; +// Assertions.assertThatThrownBy(() -> prepare(query2)) +// .isInstanceOf(InvalidRequestException.class) +// .hasMessageContaining(String.format(NO_GROUP_BY_IN_TXNS_MESSAGE, "SELECT", "at")); + } + + @Test + public void shouldRejectInClauseInLet() + { + // this is blocked not because this isn't safe, but that the logic to handle this is currently in the read coordinator, which Accord doesn't call. + // So rather than return bad results to users, IN w/ LIMIT is blocked... until we can fix + Select select = Select.builder() + .table(tbl(1)) + .in("k", 0, 1) + .limit(Literal.of(1)) + .build(); + + Assertions.assertThatThrownBy(() -> prepare(Txn.wrap(select).toCQL())) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_PARTITION_IN_CLAUSE_WITH_LIMIT, "SELECT", "at")); + + Assertions.assertThatThrownBy(() -> prepare(Txn.builder() + .addLet("a", select) + .addReturnReferences("a.k") + .build().toCQL())) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining(String.format(NO_PARTITION_IN_CLAUSE_WITH_LIMIT, "SELECT", "at")); + } + @Test public void shouldRejectLetSelectOnNonTransactionalTable() { From e94d86b1e5cd0ec73ac3bfdae75782e12cae840f Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 2 Oct 2024 14:54:29 -0700 Subject: [PATCH 180/340] TopologyMixupTestBase does not fix replication factor for Keyspaces after reaching rf=3 patch by David Capwell; reviewed by Alex Petrov for CASSANDRA-19975 --- modules/accord | 2 +- src/java/org/apache/cassandra/schema/DistributedSchema.java | 6 ++++++ .../BeginConsensusMigrationForTableAndRange.java | 3 +-- .../MaybeFinishConsensusMigrationForTableAndRange.java | 3 +-- .../cassandra/fuzz/topology/AccordTopologyMixupTest.java | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/modules/accord b/modules/accord index d914ee69816e..25f23ffec439 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit d914ee69816ebfdf88b2120ff1d8e0bc16edecbc +Subproject commit 25f23ffec439a921387ca249908798b9cc7d4620 diff --git a/src/java/org/apache/cassandra/schema/DistributedSchema.java b/src/java/org/apache/cassandra/schema/DistributedSchema.java index e0658739c292..b13cfc7db3f6 100644 --- a/src/java/org/apache/cassandra/schema/DistributedSchema.java +++ b/src/java/org/apache/cassandra/schema/DistributedSchema.java @@ -140,6 +140,12 @@ public TableMetadata getTableMetadata(TableId id) return tables.get(id); } + public TableMetadata getTableMetadata(String keyspace, String cf) + { + KeyspaceMetadata ks = keyspaces.getNullable(keyspace); + return ks == null ? null : ks.tables.getNullable(cf); + } + public static DistributedSchema fromSystemTables(Keyspaces keyspaces, Set knownDatacenters) { if (!keyspaces.containsKeyspace(SchemaConstants.METADATA_KEYSPACE_NAME)) diff --git a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java index e74a18c81f52..59379852cc19 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java @@ -29,7 +29,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; @@ -83,7 +82,7 @@ public Kind kind() public Result execute(ClusterMetadata prev) { Transformer transformer = prev.transformer(); - Collection metadata = tables.stream().map(Schema.instance::getTableMetadata).collect(Collectors.toList()); + Collection metadata = tables.stream().map(prev.schema::getTableMetadata).collect(Collectors.toList()); ConsensusMigrationState consensusMigrationState = prev.consensusMigrationState.withRangesMigrating(metadata, ranges, false); return Transformation.success(transformer.with(consensusMigrationState), LockedRanges.AffectedRanges.EMPTY); } diff --git a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java index 15bfdda65c92..16ff97e6745c 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/MaybeFinishConsensusMigrationForTableAndRange.java @@ -33,7 +33,6 @@ import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Keyspaces; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.schema.TableParams; @@ -134,7 +133,7 @@ public Result execute(@Nonnull ClusterMetadata metadata) logger.info("Completed repair eligibiliy '{}' paxos repaired ranges {}, accord repaired ranges {}", repairType, paxosRepairedRanges, accordBarrieredRanges); checkNotNull(metadata, "clusterMetadata should not be null"); String ksAndCF = keyspace + "." + cf; - TableMetadata tbm = Schema.instance.getTableMetadata(keyspace, cf); + TableMetadata tbm = metadata.schema.getTableMetadata(keyspace, cf); if (tbm == null) return new Rejected(INVALID, format("Table %s is not currently performing consensus migration", ksAndCF)); diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java index b07747969628..fd9dbd4bc395 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java @@ -124,7 +124,7 @@ private static Spec createSchemaSpec(RandomSource rs, Cluster cluster) private static BiFunction, Command, Void, ?>> cqlOperations(Spec spec) { Gen select = (Gen) (Gen) fromQT(new ASTGenerators.SelectGenBuilder(spec.metadata).withLimit1().build()); - Gen mutation = (Gen) (Gen) fromQT(new ASTGenerators.MutationGenBuilder(spec.metadata).withoutTimestamp().build()); + Gen mutation = (Gen) (Gen) fromQT(new ASTGenerators.MutationGenBuilder(spec.metadata).withoutTimestamp().withoutTtl().build()); Gen txn = (Gen) (Gen) fromQT(new ASTGenerators.TxnGenBuilder(spec.metadata).build()); Map, Integer> operations = new LinkedHashMap<>(); operations.put(select, 1); From 4e9110e88180f3aae6c46582c4f15b2cc425bc38 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Wed, 23 Oct 2024 14:31:12 +0200 Subject: [PATCH 181/340] Check for splittable ranges Patch by Alex Petrov; reviewed by Ariel Weisberg for CASSANDRA-20032 Accord Deps tests have incorrect range semantics patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20029 --- modules/accord | 2 +- .../apache/cassandra/dht/AccordSplitter.java | 7 ++ .../service/accord/AccordCommandStore.java | 15 ++- .../tcm/AtomicLongBackedProcessor.java | 8 +- .../apache/cassandra/tcm/log/LogReader.java | 17 ++-- .../apache/cassandra/tcm/log/LogState.java | 5 + ...ginConsensusMigrationForTableAndRange.java | 2 +- .../test/log/SystemKeyspaceStorageTest.java | 13 ++- .../accord/SimulatedAccordCommandStore.java | 11 ++- .../SimulatedAccordCommandStoreTestBase.java | 95 ++++++++++++++++++- .../service/accord/SimulatedDepsTest.java | 16 ++-- .../accord/SimulatedMultiKeyAndRangeTest.java | 32 +------ ...ulatedRandomKeysWithRangeConflictTest.java | 26 ++--- .../CommandsForKeySerializerTest.java | 28 ++++-- 14 files changed, 194 insertions(+), 83 deletions(-) diff --git a/modules/accord b/modules/accord index 25f23ffec439..a63cac24a219 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 25f23ffec439a921387ca249908798b9cc7d4620 +Subproject commit a63cac24a2198a5893874cdf72946073854a8d4d diff --git a/src/java/org/apache/cassandra/dht/AccordSplitter.java b/src/java/org/apache/cassandra/dht/AccordSplitter.java index 467ac2a1055a..916e4796794c 100644 --- a/src/java/org/apache/cassandra/dht/AccordSplitter.java +++ b/src/java/org/apache/cassandra/dht/AccordSplitter.java @@ -21,6 +21,7 @@ import java.math.BigInteger; import accord.local.ShardDistributor; +import accord.primitives.Range; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey; @@ -60,6 +61,12 @@ public TokenRange subRange(accord.primitives.Range range, BigInteger startOffset endOffset.compareTo(sizeOfRange) >= 0 ? endBound : new TokenKey(tableId, tokenForValue(start.add(endOffset)))); } + @Override + public boolean splittable(Range range, int numSplits) + { + return sizeOf(range).compareTo(BigInteger.valueOf(numSplits)) >= 0; + } + @Override public BigInteger zero() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 3157fb8355d2..c46c953a610e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -89,6 +89,7 @@ import static accord.api.ConfigurationService.EpochReady.DONE; import static accord.local.KeyHistory.COMMANDS; +import static accord.primitives.SaveStatus.Applying; import static accord.primitives.Status.Committed; import static accord.primitives.Status.Invalidated; import static accord.primitives.Status.Truncated; @@ -102,6 +103,7 @@ public class AccordCommandStore extends CommandStore public final String loggingId; private final IJournal journal; + private final CommandStoreExecutor executor; private final AccordStateCache.Instance commandCache; private final AccordStateCache.Instance timestampsForKeyCache; @@ -295,6 +297,13 @@ public AccordStateCache.Instance apply(Command command) safeStore -> { SafeCommand safeCommand = safeStore.unsafeGet(txnId); Command local = safeCommand.current(); - Commands.maybeExecute(safeStore, safeCommand, local, true, true); + if (local.hasBeen(Truncated)) + return; + + if (local.saveStatus().compareTo(Applying) >= 0) Commands.applyWrites(safeStore, context, local).begin(agent); + else Commands.maybeExecute(safeStore, safeCommand, local, true, true); }) .begin((unused, throwable) -> { if (throwable != null) diff --git a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java index 1bf81b60489f..55f4d964063b 100644 --- a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java @@ -32,6 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.utils.Invariants; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; @@ -177,7 +178,7 @@ public synchronized EntryHolder getEntries(Epoch since, Epoch until) public LogState getLogState(Epoch start, Epoch end) { EntryHolder state = getEntries(Epoch.EMPTY); - ClusterMetadata metadata = new ClusterMetadata(DatabaseDescriptor.getPartitioner());; + ClusterMetadata metadata = new ClusterMetadata(DatabaseDescriptor.getPartitioner()); Iterator iter = state.iterator(); ImmutableList.Builder rest = new ImmutableList.Builder<>(); while (iter.hasNext()) @@ -186,8 +187,11 @@ public LogState getLogState(Epoch start, Epoch end) if (current.epoch.isAfter(end)) break; if (current.epoch.isEqualOrBefore(start)) + { + Invariants.checkState(current.epoch.isDirectlyAfter(metadata.epoch)); metadata = current.transform.execute(metadata).success().metadata; - else + } + else if (current.epoch.isAfter(start)) rest.add(current); } diff --git a/src/java/org/apache/cassandra/tcm/log/LogReader.java b/src/java/org/apache/cassandra/tcm/log/LogReader.java index effc4d756145..7f2b80602a22 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogReader.java +++ b/src/java/org/apache/cassandra/tcm/log/LogReader.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Ordering; +import accord.utils.Invariants; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; @@ -124,12 +125,15 @@ default LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot) { try { - ClusterMetadata closestSnapshot = snapshots().getSnapshotBefore(start); + ClusterMetadata closestSnapshot = null; + if (includeSnapshot) + closestSnapshot = snapshots().getSnapshotBefore(start); // Snapshot could not be found, fetch enough epochs to reconstruct the start metadata if (closestSnapshot == null) { - closestSnapshot = new ClusterMetadata(DatabaseDescriptor.getPartitioner()); + if (includeSnapshot) + closestSnapshot = new ClusterMetadata(DatabaseDescriptor.getPartitioner()); ImmutableList.Builder entries = new ImmutableList.Builder<>(); EntryHolder entryHolder = getEntries(Epoch.EMPTY, end); for (Entry entry : entryHolder.entries) @@ -144,20 +148,21 @@ else if (includeSnapshot) else if (closestSnapshot.epoch.isBefore(start)) { ImmutableList.Builder entries = new ImmutableList.Builder<>(); - EntryHolder entryHolder = getEntries(closestSnapshot.epoch, end); + EntryHolder entryHolder = getEntries(closestSnapshot.epoch.nextEpoch(), end); for (Entry entry : entryHolder.entries) { if (entry.epoch.isAfter(start)) entries.add(entry); - else + else if (includeSnapshot) closestSnapshot = entry.transform.execute(closestSnapshot).success().metadata; } return new LogState(closestSnapshot, entries.build()); } else { - assert closestSnapshot.epoch.isEqualOrAfter(start) : String.format("Got %s, but requested snapshot of %s", closestSnapshot.epoch, start); - EntryHolder entryHolder = getEntries(closestSnapshot.epoch.nextEpoch(), end); + Invariants.checkState(closestSnapshot.epoch.isEqualOrAfter(start), + "Got %s, but requested snapshot of %s", closestSnapshot.epoch, start); + EntryHolder entryHolder = getEntries(closestSnapshot.epoch, end); return new LogState(closestSnapshot, ImmutableList.copyOf(entryHolder.entries)); } } diff --git a/src/java/org/apache/cassandra/tcm/log/LogState.java b/src/java/org/apache/cassandra/tcm/log/LogState.java index 03294e9ffb77..fcb1a05a51d7 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogState.java +++ b/src/java/org/apache/cassandra/tcm/log/LogState.java @@ -27,6 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.utils.Invariants; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.SystemKeyspace; @@ -72,6 +73,10 @@ public static IVersionedSerializer messageSerializer(Version version) // Uses Replication rather than an just a list of entries primarily to avoid duplicating the existing serializer public LogState(ClusterMetadata baseState, ImmutableList entries) { + Invariants.checkState(baseState == null || + entries.isEmpty() || + entries.get(0).epoch.isDirectlyAfter(baseState.epoch), + "Base state: %s, first entry: %s", baseState == null ? null : baseState.epoch, entries.isEmpty() ? null : entries.get(0).epoch); this.baseState = baseState; this.entries = entries; } diff --git a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java index 59379852cc19..be7e9a763f55 100644 --- a/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java +++ b/src/java/org/apache/cassandra/tcm/transformations/BeginConsensusMigrationForTableAndRange.java @@ -31,9 +31,9 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.service.consensus.migration.ConsensusMigrationTarget; import org.apache.cassandra.service.consensus.migration.ConsensusTableMigration; -import org.apache.cassandra.service.consensus.migration.ConsensusMigrationState; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Transformation; import org.apache.cassandra.tcm.sequences.LockedRanges; diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/SystemKeyspaceStorageTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/SystemKeyspaceStorageTest.java index 2e131bcf497b..28ef23bf6271 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/SystemKeyspaceStorageTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/SystemKeyspaceStorageTest.java @@ -98,14 +98,13 @@ public void testLogStateQuery() throws Throwable cluster.get(1).runOnInstance(() -> deleteSnapshot(toRemoveSnapshot.getEpoch())); } } - Epoch latestSnapshot = remainingSnapshots.get(remainingSnapshots.size() - 1); Epoch lastEpoch = allEpochs.stream().max(Comparator.naturalOrder()).get(); repeat(10, () -> { repeat(100, () -> { Epoch since = allEpochs.get(rng.nextInt(allEpochs.size())); - for (boolean consistentReplay : new boolean[]{ true, false }) + for (boolean consistentFetch : new boolean[]{ true, false }) { - LogState logState = simulatedCluster.node(2).requestResponse(new FetchCMSLog(since, consistentReplay)); + LogState logState = simulatedCluster.node(2).requestResponse(new FetchCMSLog(since, consistentFetch)); // if we return a snapshot it is always the most recent one // we don't return a snapshot if there is only 1 snapshot after `since` Epoch start = since; @@ -119,12 +118,16 @@ public void testLogStateQuery() throws Throwable } else { - assertEquals(latestSnapshot, logState.baseState.epoch); + assertEquals(since, logState.baseState.epoch); start = logState.baseState.epoch; if (logState.entries.isEmpty()) // no entries, snapshot should have the same epoch as since assertEquals(since, start); else // first epoch in entries should be snapshot epoch + 1 + { + if (!start.nextEpoch().equals(logState.entries.get(0).epoch)) + System.out.println(1); assertEquals(start.nextEpoch(), logState.entries.get(0).epoch); + } } for (Entry entry : logState.entries) @@ -174,7 +177,7 @@ public static void repeat(int num, ExecUtil.ThrowingSerializableRunnable r) } catch (Throwable throwable) { - throw new AssertionError(throwable); + throw new AssertionError(String.format("Failed on %dth/%d repetition", i, num), throwable); } } } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 7da1a51e13de..f96c3b438e51 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -109,8 +109,8 @@ public SimulatedAccordCommandStore(RandomSource rs) globalExecutor = new SimulatedExecutorFactory(rs.fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); this.unorderedScheduled = globalExecutor.scheduled("ignored"); ExecutorFactory.Global.unsafeSet(globalExecutor); - Stage.READ.unsafeSetExecutor(unorderedScheduled); - Stage.MUTATION.unsafeSetExecutor(unorderedScheduled); + for (Stage stage : Arrays.asList(Stage.READ, Stage.MUTATION, Stage.ACCORD_RANGE_LOADER)) + stage.unsafeSetExecutor(unorderedScheduled); for (Stage stage : Arrays.asList(Stage.MISC, Stage.ACCORD_MIGRATION, Stage.READ, Stage.MUTATION)) stage.unsafeSetExecutor(globalExecutor.configureSequential("ignore").build()); @@ -216,11 +216,16 @@ public void onEvict(AccordCachingState state) this.topology = AccordTopology.createAccordTopology(ClusterMetadata.current()); this.topologies = new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology); var rangesForEpoch = new CommandStores.RangesForEpoch(topology.epoch(), topology.ranges(), store); + store.unsafeSetRangesForEpoch(rangesForEpoch); updateHolder.add(topology.epoch(), rangesForEpoch, topology.ranges()); updateHolder.updateGlobal(topology.ranges()); shouldEvict = boolSource(rs.fork()); - shouldFlush = boolSource(rs.fork()); + { + // tests used to take 1m but after many changes in accord they now take many minutes and its due to flush... so lower the frequency of flushing + var fork = rs.fork(); + shouldFlush = () -> fork.decide(.01); + } shouldCompact = boolSource(rs.fork()); } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java index 74ee74a4d124..05f4dffe94c0 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeSet; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -44,7 +46,9 @@ import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.Routables; import accord.primitives.RoutingKeys; +import accord.primitives.Seekables; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Unseekables; @@ -67,6 +71,8 @@ import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.utils.RTree; +import org.apache.cassandra.utils.RangeTree; import org.assertj.core.api.Assertions; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; @@ -168,6 +174,17 @@ protected static Map> rangeConflicts(List list, Ranges return kc; } + protected static void assertDepsMessage(SimulatedAccordCommandStore instance, + DepsMessage messageType, + Txn txn, FullRoute route, + DepsModel model) throws ExecutionException, InterruptedException + { + TxnId id = assertDepsMessage(instance, messageType, txn, route, + model.keyConflicts(txn.keys()), + model.rangeConflicts(txn.keys())); + model.register(id, txn); + } + protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, DepsMessage messageType, Txn txn, FullRoute route, @@ -294,7 +311,6 @@ protected static void assertDeps(TxnId txnId, Deps deps, else { List actualRanges = IntStream.range(0, deps.rangeDeps.rangeCount()).mapToObj(deps.rangeDeps::range).collect(Collectors.toList()); -// Assertions.assertThat(deps.rangeDeps.rangeCount()).describedAs("Txn %s Expected ranges size; %s", txnId, deps.rangeDeps).isEqualTo(rangeConflicts.size()); Assertions.assertThat(Ranges.of(actualRanges.toArray(Range[]::new))) .describedAs("Txn %s had different ranges than expected", txnId) .isEqualTo(Ranges.of(rangeConflicts.keySet().toArray(Range[]::new))); @@ -380,4 +396,81 @@ protected static Gen>> randomTxn(Gen dom } }; } + + public static class DepsModel + { + private final Map> keyConflicts = new HashMap<>(); + private final RangeTree rangeConflicts = RTree.create(RangeTreeRangeAccessor.instance); + private final Ranges storeRanges; + + public DepsModel(Ranges storeRanges) + { + this.storeRanges = storeRanges; + } + + public Map> keyConflicts(Seekables keysOrRanges) + { + keysOrRanges = keysOrRanges.slice(storeRanges, Routables.Slice.Minimal); + switch (keysOrRanges.domain()) + { + case Key: + { + Keys keys = (Keys) keysOrRanges; + Map> expectedConflicts = new HashMap<>(); + keys.forEach(k -> expectedConflicts.put(k.toUnseekable(), keyConflicts.getOrDefault(k.toUnseekable(), Collections.emptyList()))); + return expectedConflicts; + } + case Range: + { + Ranges ranges = (Ranges) keysOrRanges; + return keyConflicts.entrySet().stream() + .filter(e -> ranges.contains(e.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + default: + throw new UnsupportedOperationException(); + } + } + + public Map> rangeConflicts(Seekables keysOrRanges) + { + // there is a patch pending to add range support for keys... that isn't here yet so not handled + if (keysOrRanges.domain() != Routable.Domain.Range) + return Collections.emptyMap(); + keysOrRanges = keysOrRanges.slice(storeRanges, Routables.Slice.Minimal); + + Ranges ranges = (Ranges) keysOrRanges; + Map> conflicts = new HashMap<>(); + ranges.forEach(r -> rangeConflicts.search(r, e -> { + for (Range range : Ranges.single(e.getKey()).slice(ranges, Routables.Slice.Minimal)) + conflicts.computeIfAbsent(range, ignore -> new ArrayList<>()).add(e.getValue()); + })); + // need to dedup/sort txns + conflicts.values().forEach(l -> { + var sortedDedup = new ArrayList<>(new TreeSet<>(l)); + l.clear(); + l.addAll(sortedDedup); + }); + return conflicts; + } + + public void register(TxnId txnId, Txn txn) + { + for (var s : txn.keys()) + { + switch (s.domain()) + { + case Key: + keyConflicts.computeIfAbsent(s.asKey().toUnseekable(), i -> new ArrayList<>()).add(txnId); + break; + case Range: + rangeConflicts.add(s.asRange(), txnId); + break; + default: + throw new UnsupportedOperationException(); + } + } + } + } } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java index 961a1dfdb3a2..1d9935084c32 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -186,13 +186,12 @@ public void simpleRangeConflicts() FullRangeRoute rangeRoute = ranges.toRoute(pk.toUnseekable()); Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); - List keyConflicts = new ArrayList<>(numSamples); - List rangeConflicts = new ArrayList<>(numSamples); + DepsModel model = new DepsModel(instance.store.unsafeRangesForEpoch().currentRanges()); for (int i = 0; i < numSamples; i++) { instance.maybeCacheEvict(keyRoute, ranges); - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts(rangeConflicts, ranges))); + assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, model); + assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, model); } } }); @@ -259,21 +258,18 @@ public void overlappingRangeConflicts() Range left = tokenRange(tbl.id, token - 10, token + 5); Range right = tokenRange(tbl.id, token - 5, token + 10); - List keyConflicts = new ArrayList<>(numSamples); - Map> rangeConflicts = new HashMap<>(); - rangeConflicts.put(left, new ArrayList<>()); - rangeConflicts.put(right, new ArrayList<>()); + DepsModel model = new DepsModel(instance.store.unsafeRangesForEpoch().currentRanges()); for (int i = 0; i < numSamples; i++) { Ranges partialRange = Ranges.of(rs.nextBoolean() ? left : right); try { instance.maybeCacheEvict(keyRoute, partialRange); - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); + assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, model); FullRangeRoute rangeRoute = partialRange.toRoute(pk.toUnseekable()); Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); - rangeConflicts.get(partialRange.get(0)).add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts)); + assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, model); } catch (Throwable t) { diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java index feaddeff8c4e..16a4d8306568 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java @@ -19,11 +19,8 @@ package org.apache.cassandra.service.accord; import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -40,12 +37,9 @@ import accord.primitives.Ranges; import accord.primitives.Routable.Domain; import accord.primitives.Txn; -import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.Gens; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.utils.RTree; -import org.apache.cassandra.utils.RangeTree; import static accord.utils.Property.qt; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; @@ -73,12 +67,12 @@ public void test() Gen.LongGen tokenGen = tokenDistribution.next(rs); Gen domainGen = domainDistribution.next(rs); Gen msgGen = msgDistribution.next(rs); - Map> keyConflicts = new HashMap<>(); - RangeTree rangeConflicts = RTree.create(RangeTreeRangeAccessor.instance); Gen.IntGen keyCountGen = keyDistribution.next(rs); Gen.IntGen rangeCountGen = rangeDistribution.next(rs); + DepsModel model = new DepsModel(instance.store.unsafeRangesForEpoch().currentRanges()); + for (int i = 0; i < numSamples; i++) { switch (domainGen.next(rs)) @@ -99,11 +93,7 @@ public void test() Txn txn = createTxn(wrapInTxn(inserts), binds); FullRoute route = keys.toRoute(keys.get(0).toUnseekable()); - Map> expectedConflicts = new HashMap<>(); - route.forEach(k -> expectedConflicts.put(k, keyConflicts.computeIfAbsent(k, ignore -> new ArrayList<>()))); - - TxnId id = assertDepsMessage(instance, msgGen.next(rs), txn, route, expectedConflicts, Collections.emptyMap()); - route.forEach(k -> keyConflicts.get(k).add(id)); + assertDepsMessage(instance, msgGen.next(rs), txn, route, model); } break; case Range: @@ -133,21 +123,7 @@ public void test() FullRangeRoute route = ranges.toRoute(ranges.get(0).end()); Txn txn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); - Map> expectedKeyConflicts = keyConflicts.entrySet().stream() - .filter(e -> ranges.contains(e.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - Map> expectedRangeConflicts = new HashMap<>(); - ranges.forEach(r -> - rangeConflicts.search(r, e -> - expectedRangeConflicts.computeIfAbsent(e.getKey(), ignore -> new ArrayList<>()).add(e.getValue()))); - // need to dedup/sort txns - expectedRangeConflicts.values().forEach(l -> { - var sortedDedup = new ArrayList<>(new TreeSet<>(l)); - l.clear(); - l.addAll(sortedDedup); - }); - TxnId id = assertDepsMessage(instance, msgGen.next(rs), txn, route, expectedKeyConflicts, expectedRangeConflicts); - ranges.forEach(r -> rangeConflicts.add(r, id)); + assertDepsMessage(instance, msgGen.next(rs), txn, route, model); } break; default: diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java index 30f0f0aa7fa2..d1edbb445523 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -25,21 +25,13 @@ import accord.primitives.Ranges; import accord.primitives.RoutingKeys; import accord.primitives.Txn; -import accord.primitives.TxnId; import accord.utils.Property; import accord.utils.RandomSource; -import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.utils.FailingConsumer; import org.junit.Test; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; import static accord.utils.Property.commands; import static accord.utils.Property.stateful; @@ -51,16 +43,14 @@ public class SimulatedRandomKeysWithRangeConflictTest extends SimulatedAccordCom private static Property.SimpleCommand insertKey(RandomSource rs, State state) { long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); - RoutingKey key = new TokenKey(state.tbl.id, new LongToken(token)); Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + state.tbl + "(pk, value) VALUES (?, ?)"), - Arrays.asList(keyForToken(token), 42)); + Arrays.asList(keyForToken(token), 42)); Keys keys = (Keys) keyTxn.keys(); FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); return new Property.SimpleCommand<>("Write Txn: " + keys, FailingConsumer.orFail(s -> { s.instance.maybeCacheEvict(keyRoute, s.wholeRange); - var k = assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, s.keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); - s.keyConflicts.get(key).add(k); + assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, s.model); })); } @@ -68,7 +58,7 @@ private static Property.SimpleCommand insertRange(RandomSource rs, State { return new Property.SimpleCommand<>("Range Txn: " + state.wholeRange, FailingConsumer.orFail(s -> { s.instance.maybeCacheEvict(RoutingKeys.EMPTY, s.wholeRange); - s.rangeConflicts.add(assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), s.rangeTxn, s.rangeRoute, s.keyConflicts, rangeConflicts(s.rangeConflicts, s.wholeRange))); + assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), s.rangeTxn, s.rangeRoute, s.model); })); } @@ -77,27 +67,27 @@ private static Property.SimpleCommand insertRange(RandomSource rs, State public void keysAllOverConflictingWithRange() { stateful().withSteps(State.steps).check(commands(() -> State::new) - .add(SimulatedRandomKeysWithRangeConflictTest::insertKey) - .add(SimulatedRandomKeysWithRangeConflictTest::insertRange) - .build()); + .add(SimulatedRandomKeysWithRangeConflictTest::insertKey) + .add(SimulatedRandomKeysWithRangeConflictTest::insertRange) + .build()); } public static class State { static final int steps = 300; final SimulatedAccordCommandStore instance; - final Map> keyConflicts = new HashMap<>(); - final List rangeConflicts = new ArrayList<>(steps); final TableMetadata tbl = reverseTokenTbl; final Ranges wholeRange = Ranges.of(fullRange(tbl.id)); final FullRangeRoute rangeRoute = wholeRange.toRoute(wholeRange.get(0).end()); final Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, wholeRange); + final DepsModel model; public State(RandomSource rs) { AccordKeyspace.unsafeClear(); this.instance = new SimulatedAccordCommandStore(rs); + this.model = new DepsModel(instance.store.unsafeRangesForEpoch().currentRanges()); } @Override diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index ae11f3fa5980..052736200c89 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -34,29 +34,31 @@ import java.util.function.Supplier; import org.apache.commons.lang3.ArrayUtils; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import accord.api.Key; import accord.api.RoutingKey; +import accord.local.Command; +import accord.local.CommonAttributes; +import accord.local.CommonAttributes.Mutable; +import accord.local.Node; import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; import accord.local.cfk.CommandsForKey.InternalStatus; -import accord.local.Command; import accord.local.cfk.CommandsForKey.TxnInfo; import accord.local.cfk.CommandsForKey.Unmanaged; -import accord.local.CommonAttributes; -import accord.local.CommonAttributes.Mutable; -import accord.local.Node; -import accord.primitives.SaveStatus; -import accord.primitives.Status; import accord.primitives.Ballot; import accord.primitives.KeyDeps; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.RangeDeps; import accord.primitives.Routable; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -82,9 +84,9 @@ import org.apache.cassandra.utils.CassandraGenerators; import static accord.local.cfk.CommandsForKey.NO_BOUNDS_INFO; -import static accord.primitives.Status.Durability.NotDurable; import static accord.primitives.Known.KnownExecuteAt.ExecuteAtErased; import static accord.primitives.Known.KnownExecuteAt.ExecuteAtUnknown; +import static accord.primitives.Status.Durability.NotDurable; import static accord.utils.Property.qt; import static accord.utils.SortedArrays.Search.FAST; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -103,6 +105,18 @@ public static void beforeClass() throws Throwable StorageService.instance.initServer(); } + @Before + public void before() throws Throwable + { + CommandsForKey.disableLinearizabilityViolationsReporting(); + } + + @After + public void after() throws Throwable + { + CommandsForKey.enableLinearizabilityViolationsReporting(); + } + static class Cmd { final TxnId txnId; From 5a12875b3a7a5c7c6b152a8fc19e1b2b2b6c46e8 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Oct 2024 23:09:22 +0100 Subject: [PATCH 182/340] Use ExclusiveSyncPoints to join a new topology For correctness, the dependencies we adopt on joining a new topology must exclude the possibility of respondents accepting additional transactions with a lower TxnId, so proxying on the existing `ExclusiveSyncPoint` mechanisms is logical for the time-being. This patch removes the `FetchMajorityDeps` logic in favour of simply waiting for a suitable `ExclusiveSyncPoint` to be proposed. patch by Benedict, reviewed by Alex Petrov for CASSANDRA-20056 --- modules/accord | 2 +- .../service/accord/AccordCommandStore.java | 95 +++++-------------- .../service/accord/AccordJournal.java | 14 --- .../accord/AccordJournalValueSerializers.java | 60 ------------ .../accord/AccordSafeCommandStore.java | 26 ----- .../service/accord/AccordService.java | 17 ++-- .../accord/CommandsForRangesLoader.java | 27 ++++-- .../cassandra/service/accord/IJournal.java | 5 - .../cassandra/service/accord/JournalKey.java | 11 --- .../service/accord/api/AccordScheduler.java | 7 ++ .../accord/AccordJournalCompactionTest.java | 10 -- .../cassandra/service/accord/MockJournal.java | 13 --- .../service/accord/SavedCommandTest.java | 1 - 13 files changed, 60 insertions(+), 228 deletions(-) diff --git a/modules/accord b/modules/accord index a63cac24a219..d77bdd1a4cd9 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit a63cac24a2198a5893874cdf72946073854a8d4d +Subproject commit d77bdd1a4cd96120868279b665e0abe4ab509a80 diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index c46c953a610e..c7ee8aa6cd3b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -28,7 +28,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; @@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory; import accord.api.Agent; -import accord.api.ConfigurationService; import accord.api.DataStore; import accord.api.LocalListeners; import accord.api.ProgressLog; @@ -51,16 +49,14 @@ import accord.local.CommandStores; import accord.local.Commands; import accord.local.KeyHistory; -import accord.local.Node; import accord.local.NodeCommandStoreService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.cfk.CommandsForKey; -import accord.primitives.Deps; import accord.primitives.Participants; -import accord.primitives.Range; +import accord.primitives.RangeDeps; import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; @@ -70,8 +66,6 @@ import accord.utils.Invariants; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.SequentialExecutorPlus; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -82,12 +76,10 @@ import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.events.CacheEvents; import org.apache.cassandra.utils.Clock; -import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.concurrent.AsyncPromise; import org.apache.cassandra.utils.concurrent.Promise; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; -import static accord.api.ConfigurationService.EpochReady.DONE; import static accord.local.KeyHistory.COMMANDS; import static accord.primitives.SaveStatus.Applying; import static accord.primitives.Status.Committed; @@ -117,7 +109,7 @@ private static void registerJfrListener(int id, AccordStateCache.Instance { if (!DatabaseDescriptor.getAccordStateCacheListenerJFREnabled()) return; - instance.register(new AccordStateCache.Listener() { + instance.register(new AccordStateCache.Listener<>() { private final IdentityHashMap, CacheEvents.Evict> pendingEvicts = new IdentityHashMap<>(); @Override @@ -249,6 +241,7 @@ public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ran { store.snapshot(ranges, globalSyncId); super.markShardDurable(safeStore, globalSyncId, ranges); + commandsForRangesLoader.gcBefore(globalSyncId, ranges); } @Override @@ -506,75 +499,37 @@ public void shutdown() { } - protected ConfigurationService.EpochReady syncInternal(Node node, Ranges ranges, long epoch, boolean isLoad) + public void registerTransitive(SafeCommandStore safeStore, RangeDeps rangeDeps) { - if (!isLoad) - return super.syncInternal(node, ranges, epoch, false); - - List> loaded = journal.loadHistoricalTransactions(epoch, id); - // synchronously load and register historical, so we don't have unlimited numbers of epochs in flight - for (Pair pair : loaded) - { - cancelFetch(pair.left, epoch); - try - { - logger.info("Restoring sync'd deps for {} at epoch {}", pair.left, epoch); - AsyncChains.getBlocking(submit(PreLoadContext.contextFor(null, pair.right.keyDeps.keys(), COMMANDS), safeStore -> { - registerHistoricalTransactions(pair.left, pair.right, safeStore); - return null; - }).beginAsResult(), 5L, TimeUnit.MINUTES); - } - catch (InterruptedException | TimeoutException | ExecutionException e) - { - throw new RuntimeException(e); - } - ranges = ranges.without(Ranges.of(pair.left)); - } - - if (ranges.isEmpty()) - { - AsyncResult done = AsyncResults.success(null); - return new ConfigurationService.EpochReady(epoch, DONE, done, done, done); - } - - return super.syncInternal(node, ranges, epoch, false); - } - - public void registerHistoricalTransactions(Range range, Deps deps, SafeCommandStore safeStore) - { - if (deps.isEmpty()) return; + if (rangeDeps.isEmpty()) + return; + RedundantBefore redundantBefore = unsafeGetRedundantBefore(); CommandStores.RangesForEpoch ranges = safeStore.ranges(); // used in places such as accord.local.CommandStore.fetchMajorityDeps // We find a set of dependencies for a range then update CommandsFor to know about them Ranges allRanges = safeStore.ranges().all(); - deps.keyDeps.keys().forEach(allRanges, key -> { - // TODO (desired): batch register to minimise GC - deps.keyDeps.forEach(key, (txnId, txnIdx) -> { - if (ranges.coordinates(txnId).contains(key)) - return; // already coordinates, no need to replicate - if (!ranges.allBefore(txnId.epoch()).contains(key)) - return; - - safeStore.get(key).registerHistorical(safeStore, txnId); - }); - }); - for (int i = 0; i < deps.rangeDeps.rangeCount(); i++) + Ranges coordinateRanges = Ranges.EMPTY; + long coordinateEpoch = -1; + for (int i = 0; i < rangeDeps.txnIdCount(); i++) { - Range r = deps.rangeDeps.range(i); - if (!allRanges.intersects(r)) + TxnId txnId = rangeDeps.txnId(i); + AccordCachingState state = commandCache.getUnsafe(txnId); + if (state != null && state.isLoaded() && state.get() != null && state.get().known().isDefinitionKnown()) continue; - deps.rangeDeps.forEach(r, txnId -> { - // TODO (desired, efficiency): this can be made more efficient by batching by epoch - if (ranges.coordinates(txnId).intersects(r)) - return; // already coordinates, no need to replicate - if (!ranges.allBefore(txnId.epoch()).intersects(r)) - return; - // TODO (required): this is potentially not safe - it should not be persisted until we save in journal - // but, preferable to retire historical transactions as a concept entirely, and rely on ExclusiveSyncPoints instead - diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(r).slice(allRanges), Ranges::with); - }); + Ranges addRanges = rangeDeps.ranges(i).slice(allRanges); + if (addRanges.isEmpty()) continue; + + if (coordinateEpoch != txnId.epoch()) + { + coordinateEpoch = txnId.epoch(); + coordinateRanges = ranges.allAt(txnId.epoch()); + } + if (addRanges.intersects(coordinateRanges)) continue; + addRanges = redundantBefore.removeShardRedundant(txnId, txnId, addRanges); + if (addRanges.isEmpty()) continue; + diskCommandsForRanges().mergeTransitive(txnId, addRanges, Ranges::with); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index a575da3f8bf0..aa79a492a94c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -40,8 +40,6 @@ import accord.local.Node; import accord.local.RedundantBefore; import accord.local.cfk.CommandsForKey; -import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.SaveStatus; import accord.primitives.Timestamp; @@ -62,17 +60,14 @@ import org.apache.cassandra.journal.RecordPointer; import org.apache.cassandra.journal.ValueSerializer; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.service.accord.JournalKey.JournalKeySupport; import org.apache.cassandra.utils.ExecutorUtils; -import org.apache.cassandra.utils.Pair; import static accord.primitives.SaveStatus.ErasedOrVestigial; import static accord.primitives.Status.Truncated; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; -import static org.apache.cassandra.service.accord.JournalKey.keyForHistoricalTransactions; public class AccordJournal implements IJournal, Shutdownable { @@ -239,13 +234,6 @@ public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) return accumulator.get(); } - @Override - public List> loadHistoricalTransactions(long epoch, int store) - { - HistoricalTransactionsAccumulator accumulator = readAll(keyForHistoricalTransactions(epoch, store)); - return accumulator.get(); - } - @Override public void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush) { @@ -304,8 +292,6 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.SAFE_TO_READ, store), fieldUpdates.newSafeToRead); if (fieldUpdates.newRangesForEpoch != null) pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store), fieldUpdates.newRangesForEpoch); - if (fieldUpdates.addHistoricalTransactions != null) - pointer = appendInternal(JournalKey.keyForHistoricalTransactions(fieldUpdates.addHistoricalTransactions.epoch, store), Pair.create(fieldUpdates.addHistoricalTransactions.range, fieldUpdates.addHistoricalTransactions.deps)); if (onFlush == null) return; diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java index f232b9213e9f..c6a2a46bf4eb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -19,16 +19,12 @@ package org.apache.cassandra.service.accord; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.NavigableMap; import com.google.common.collect.ImmutableSortedMap; import accord.local.DurableBefore; import accord.local.RedundantBefore; -import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; @@ -37,11 +33,9 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.KeySerializers; -import org.apache.cassandra.utils.Pair; import static accord.local.CommandStores.RangesForEpoch; import static org.apache.cassandra.service.accord.SavedCommand.Load.ALL; -import static org.apache.cassandra.service.accord.serializers.DepsSerializer.deps; // TODO (required): test with large collection values, and perhaps split out some fields if they have a tendency to grow larger // TODO (required): alert on metadata size @@ -340,58 +334,4 @@ public void deserialize(JournalKey key, IdentityAccumulator>, Pair> - { - public HistoricalTransactionsAccumulator() - { - super(new ArrayList<>()); - } - - @Override - protected List> accumulate(List> oldValue, Pair deps) - { - accumulated.add(deps); // we can keep it mutable - return accumulated; - } - } - - public static class HistoricalTransactionsSerializer implements FlyweightSerializer, HistoricalTransactionsAccumulator> - { - @Override - public HistoricalTransactionsAccumulator mergerFor(JournalKey key) - { - return new HistoricalTransactionsAccumulator(); - } - - @Override - public void serialize(JournalKey key, Pair from, DataOutputPlus out, int userVersion) throws IOException - { - out.writeUnsignedVInt32(1); - TokenRange.serializer.serialize((TokenRange) from.left, out, messagingVersion); - deps.serialize(from.right, out, messagingVersion); - } - - @Override - public void reserialize(JournalKey key, HistoricalTransactionsAccumulator from, DataOutputPlus out, int userVersion) throws IOException - { - out.writeUnsignedVInt32(from.get().size()); - for (Pair d : from.get()) - { - TokenRange.serializer.serialize((TokenRange) d.left, out, messagingVersion); - deps.serialize(d.right, out, messagingVersion); - } - } - - @Override - public void deserialize(JournalKey key, HistoricalTransactionsAccumulator into, DataInputPlus in, int userVersion) throws IOException - { - int count = in.readUnsignedVInt32(); - for (int i = 0; i < count; i++) - { - Range range = TokenRange.serializer.deserialize(in, messagingVersion); - into.update(Pair.create(range, deps.deserialize(in, messagingVersion))); - } - } - } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index c260a013c638..eef12d6a7e59 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -41,8 +41,6 @@ import accord.local.cfk.CommandsForKey; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; -import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routables; import accord.primitives.Timestamp; @@ -336,15 +334,6 @@ public RedundantBefore redundantBefore() return super.redundantBefore(); } - @Override - public void registerHistoricalTransactions(long epoch, Range range, Deps deps) - { - ensureFieldUpdates().addHistoricalTransactions = new HistoricalTransactions(epoch, range, deps); - // TODO (required): it is potentially unsafe to propagate this synchronously, as if we fail to write to the journal we may be in an inconsistent state - // however, we can and should retire the concept of historical transactions in favour of ExclusiveSyncPoints ensuring their deps are known - super.registerHistoricalTransactions(epoch, range, deps); - } - private FieldUpdates ensureFieldUpdates() { if (fieldUpdates == null) fieldUpdates = new FieldUpdates(); @@ -380,20 +369,5 @@ public static class FieldUpdates public NavigableMap newBootstrapBeganAt; public NavigableMap newSafeToRead; public RangesForEpoch.Snapshot newRangesForEpoch; - public HistoricalTransactions addHistoricalTransactions; - } - - public static class HistoricalTransactions - { - public final long epoch; - public final Range range; - public final Deps deps; - - public HistoricalTransactions(long epoch, Range range, Deps deps) - { - this.epoch = epoch; - this.range = range; - this.deps = deps; - } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 167428a759be..36710717d0c2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -66,7 +66,6 @@ import accord.coordinate.tracking.AllTracker; import accord.coordinate.tracking.RequestStatus; import accord.impl.AbstractConfigurationService; -import accord.impl.CoordinateDurabilityScheduling; import accord.impl.DefaultLocalListeners; import accord.impl.DefaultRemoteListeners; import accord.impl.RequestCallbacks; @@ -202,7 +201,6 @@ private enum State {INIT, STARTED, SHUTTING_DOWN, SHUTDOWN} private final AccordScheduler scheduler; private final AccordDataStore dataStore; private final AccordJournal journal; - private final CoordinateDurabilityScheduling durabilityScheduling; private final AccordVerbHandler requestHandler; private final AccordResponseVerbHandler responseHandler; private final LocalConfig configuration; @@ -448,7 +446,6 @@ private AccordService(Id localId) journal.durableBeforePersister(), configuration); this.nodeShutdown = toShutdownable(node); - this.durabilityScheduling = new CoordinateDurabilityScheduling(node); this.requestHandler = new AccordVerbHandler<>(node, configService); this.responseHandler = new AccordResponseVerbHandler<>(callbacks, configService); } @@ -504,13 +501,13 @@ class Ref { List historic = Collections.emptyList();} fastPathCoordinator.start(); cms.log().addListener(fastPathCoordinator); - durabilityScheduling.setDefaultRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordDefaultDurabilityRetryDelay(SECONDS)), SECONDS); - durabilityScheduling.setMaxRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordMaxDurabilityRetryDelay(SECONDS)), SECONDS); - durabilityScheduling.setTargetShardSplits(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityTargetSplits())); - durabilityScheduling.setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); - durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); - durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); - durabilityScheduling.start(); + node.durabilityScheduling().setDefaultRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordDefaultDurabilityRetryDelay(SECONDS)), SECONDS); + node.durabilityScheduling().setMaxRetryDelay(Ints.checkedCast(DatabaseDescriptor.getAccordMaxDurabilityRetryDelay(SECONDS)), SECONDS); + node.durabilityScheduling().setTargetShardSplits(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityTargetSplits())); + node.durabilityScheduling().setGlobalCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordGlobalDurabilityCycle(SECONDS)), SECONDS); + node.durabilityScheduling().setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); + node.durabilityScheduling().setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); + node.durabilityScheduling().start(); state = State.STARTED; } diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index f4162d02b895..c338e1116be1 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -20,6 +20,7 @@ import java.util.Collection; import java.util.Collections; +import java.util.Iterator; import java.util.Map; import java.util.NavigableMap; import java.util.Set; @@ -55,9 +56,8 @@ public class CommandsForRangesLoader implements AccordStateCache.Listener { private final RoutesSearcher searcher = new RoutesSearcher(); - //TODO (now, durability): find solution for this... - private final NavigableMap historicalTransaction = new TreeMap<>(); private final AccordCommandStore store; + private final NavigableMap transitive = new TreeMap<>(); private final ObjectHashSet cachedRangeTxns = new ObjectHashSet<>(); // TODO (required): make this configurable, or perhaps backed by READ stage with concurrency limit @@ -86,7 +86,7 @@ public void onEvict(AccordCachingState state) public AsyncResult>> get(@Nullable TxnId primaryTxnId, KeyHistory keyHistory, Ranges ranges) { RedundantBefore redundantBefore = store.unsafeGetRedundantBefore(); - TxnId minTxnId = redundantBefore.minGcBefore(ranges); + TxnId minTxnId = redundantBefore.min(ranges, e -> e.gcBefore); Timestamp maxTxnId = primaryTxnId == null || keyHistory == KeyHistory.RECOVERY || !primaryTxnId.is(ExclusiveSyncPoint) ? Timestamp.MAX : primaryTxnId; TxnId findAsDep = primaryTxnId != null && keyHistory == KeyHistory.RECOVERY ? primaryTxnId : null; Watcher watcher = fromCache(findAsDep, ranges, minTxnId, maxTxnId, redundantBefore); @@ -110,11 +110,11 @@ private Collection intersects(Range range, TxnId minTxnId, Timestamp maxT { assert range instanceof TokenRange : "Require TokenRange but given " + range.getClass(); Set intersects = searcher.intersects(store.id(), (TokenRange) range, minTxnId, maxTxnId); - if (!historicalTransaction.isEmpty()) + if (!transitive.isEmpty()) { if (intersects.isEmpty()) intersects = new ObjectHashSet<>(); - for (Map.Entry e : historicalTransaction.tailMap(minTxnId, true).entrySet()) + for (Map.Entry e : transitive.tailMap(minTxnId, true).entrySet()) { if (e.getValue().intersects(range)) intersects.add(e.getKey()); @@ -328,9 +328,22 @@ private static Summary create(SavedCommand.MinimalCommand cmd, Ranges cacheRange return new Summary(cmd.txnId, cmd.executeAt, saveStatus, ranges, null, false); } - public void mergeHistoricalTransaction(TxnId txnId, Ranges ranges, BiFunction remappingFunction) + public void mergeTransitive(TxnId txnId, Ranges ranges, BiFunction remappingFunction) { - historicalTransaction.merge(txnId, ranges, remappingFunction); + transitive.merge(txnId, ranges, remappingFunction); + } + + public void gcBefore(TxnId gcBefore, Ranges ranges) + { + Iterator> iterator = transitive.headMap(gcBefore).entrySet().iterator(); + while (iterator.hasNext()) + { + Map.Entry e = iterator.next(); + Ranges newRanges = e.getValue().without(ranges); + if (newRanges.isEmpty()) + iterator.remove(); + e.setValue(newRanges); + } } public static class Summary diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index 635e5e2d1581..849173c9b5e7 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -18,20 +18,16 @@ package org.apache.cassandra.service.accord; -import java.util.List; import java.util.NavigableMap; import accord.local.Command; import accord.local.CommandStores; import accord.local.DurableBefore; import accord.local.RedundantBefore; -import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.PersistentField.Persister; -import org.apache.cassandra.utils.Pair; public interface IJournal { @@ -42,7 +38,6 @@ public interface IJournal NavigableMap loadBootstrapBeganAt(int commandStoreId); NavigableMap loadSafeToRead(int commandStoreId); CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int commandStoreId); - List> loadHistoricalTransactions(long epoch, int store); void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush); Persister durableBeforePersister(); diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index 8266b3ac64ff..4292387ee712 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -23,11 +23,8 @@ import java.util.Objects; import java.util.zip.Checksum; -import accord.local.Node; import accord.local.Node.Id; -import accord.primitives.Routable; import accord.primitives.Timestamp; -import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; @@ -37,7 +34,6 @@ import org.apache.cassandra.service.accord.AccordJournalValueSerializers.CommandDiffSerializer; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeSerializer; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.FlyweightSerializer; -import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsSerializer; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeSerializer; import org.apache.cassandra.utils.ByteArrayUtil; @@ -237,7 +233,6 @@ public enum Type SAFE_TO_READ (3, new SafeToReadSerializer()), BOOTSTRAP_BEGAN_AT (4, new BootstrapBeganAtSerializer()), RANGES_FOR_EPOCH (5, new RangesForEpochSerializer()), - HISTORICAL_TRANSACTIONS (6, new HistoricalTransactionsSerializer()) ; public final int id; @@ -279,10 +274,4 @@ static Type fromId(int id) return type; } } - - public static JournalKey keyForHistoricalTransactions(long epoch, int store) - { - TxnId txnId = new TxnId(epoch, 0l, Txn.Kind.LocalOnly, Routable.Domain.Range, Node.Id.NONE); - return new JournalKey(txnId, JournalKey.Type.HISTORICAL_TRANSACTIONS, store); - } } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java index 50720952a4f5..dec0cbb22590 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordScheduler.java @@ -68,6 +68,13 @@ public Scheduled once(Runnable run, long delay, TimeUnit units) return new ScheduledFutureWrapper(future); } + @Override + public Scheduled selfRecurring(Runnable run, long delay, TimeUnit units) + { + ScheduledFuture future = scheduledExecutor.scheduleSelfRecurring(run, delay, units); + return new ScheduledFutureWrapper(future); + } + @Override public void now(Runnable task) { diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index 578c6a84b9c7..ac17ddad15b6 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -19,8 +19,6 @@ package org.apache.cassandra.service.accord; import java.nio.file.Files; -import java.util.Collections; -import java.util.List; import java.util.NavigableMap; import java.util.concurrent.atomic.AtomicInteger; @@ -51,9 +49,7 @@ import org.apache.cassandra.journal.TestParams; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.utils.AccordGenerators; -import org.apache.cassandra.utils.Pair; import static accord.local.CommandStores.RangesForEpoch; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; @@ -94,7 +90,6 @@ public void segmentMergeTest() throws InterruptedException NavigableMap safeToReadAtAccumulator = ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY); NavigableMap bootstrapBeganAtAccumulator = ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY); RangesForEpoch.Snapshot rangesForEpochAccumulator = null; - HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); Gen redundantBeforeGen = AccordGenerators.redundantBefore(DatabaseDescriptor.getPartitioner()); Gen durableBeforeGen = AccordGenerators.durableBeforeGen(DatabaseDescriptor.getPartitioner()); @@ -137,7 +132,6 @@ public boolean enableCompaction() // updates.newRedundantBefore = redundantBefore = RedundantBefore.merge(redundantBefore, updates.addRedundantBefore); updates.newSafeToRead = safeToReadGen.next(rs); updates.newRangesForEpoch = rangesForEpochGen.next(rs); - updates.addHistoricalTransactions = new AccordSafeCommandStore.HistoricalTransactions(0l, rangeGen.next(rs), historicalTransactionsGen.next(rs)); journal.durableBeforePersister().persist(addDurableBefore, null); journal.persistStoreState(1, updates, null); @@ -150,7 +144,6 @@ public boolean enableCompaction() safeToReadAtAccumulator = updates.newSafeToRead; if (updates.newRangesForEpoch != null) rangesForEpochAccumulator = updates.newRangesForEpoch; - historicalTransactionsAccumulator.update(Pair.create(updates.addHistoricalTransactions.range, updates.addHistoricalTransactions.deps)); if (i % 100 == 0) journal.closeCurrentSegmentForTestingIfNonEmpty(); @@ -163,9 +156,6 @@ public boolean enableCompaction() Assert.assertEquals(bootstrapBeganAtAccumulator, journal.loadBootstrapBeganAt(1)); Assert.assertEquals(safeToReadAtAccumulator, journal.loadSafeToRead(1)); Assert.assertEquals(rangesForEpochAccumulator, journal.loadRangesForEpoch(1)); - List> historical = historicalTransactionsAccumulator.get(); - Collections.reverse(historical); - Assert.assertEquals(historical, journal.loadHistoricalTransactions(0l, 1)); } finally { diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index b7b2fecb5762..7d7c49153e97 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -36,11 +36,9 @@ import accord.local.RedundantBefore; import accord.local.StoreParticipants; import accord.primitives.Known; -import accord.primitives.Range; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Ballot; -import accord.primitives.Deps; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Ranges; @@ -52,13 +50,11 @@ import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; -import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; import org.apache.cassandra.service.accord.SavedCommand.Load; import org.apache.cassandra.service.accord.SavedCommand.MinimalCommand; import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import org.apache.cassandra.utils.Pair; public class MockJournal implements IJournal { @@ -70,7 +66,6 @@ private static class FieldUpdates final IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); final IdentityAccumulator> safeToReadAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); final IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); - final HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); } final DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); @@ -140,12 +135,6 @@ public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) return fieldUpdates(store).rangesForEpochAccumulator.get(); } - @Override - public List> loadHistoricalTransactions(long epoch, int store) - { - return fieldUpdates(store).historicalTransactionsAccumulator.get(); - } - @Override public void appendCommand(int store, SavedCommand.DiffWriter diff, Runnable onFlush) { @@ -177,8 +166,6 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie updates.safeToReadAccumulator.update(fieldUpdates.newSafeToRead); if (fieldUpdates.newRangesForEpoch != null) updates.rangesForEpochAccumulator.update(fieldUpdates.newRangesForEpoch); - if (fieldUpdates.addHistoricalTransactions != null) - updates.historicalTransactionsAccumulator.update(Pair.create(fieldUpdates.addHistoricalTransactions.range, fieldUpdates.addHistoricalTransactions.deps)); if (onFlush != null) onFlush.run(); diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java index 0108cd9cac96..0e2c57bb97a9 100644 --- a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -47,7 +47,6 @@ import static accord.utils.Property.qt; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; -import static org.apache.cassandra.service.accord.SavedCommand.Load.ALL; import static org.apache.cassandra.service.accord.SavedCommand.getFlags; public class SavedCommandTest From 9fe1a977b5291af51a78c4543929450e0d788faf Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 28 Oct 2024 09:54:05 -0700 Subject: [PATCH 183/340] Get Harry working on top of Accord and fix various issues found by TopologyMixupTestBase patch by David Capwell; reviewed by Alex Petrov, David Capwell for CASSANDRA-20054 --- modules/accord | 2 +- .../org/apache/cassandra/config/Config.java | 1 + .../cassandra/config/DatabaseDescriptor.java | 9 +- .../service/accord/AccordFastPath.java | 11 +- .../service/accord/AccordService.java | 35 +- .../tcm/AtomicLongBackedProcessor.java | 14 +- .../cassandra/tcm/ClusterMetadataService.java | 4 +- .../org/apache/cassandra/tcm/FetchCMSLog.java | 2 +- .../apache/cassandra/tcm/FetchPeerLog.java | 6 +- .../cassandra/tcm/PaxosBackedProcessor.java | 19 +- .../org/apache/cassandra/tcm/Processor.java | 2 +- .../apache/cassandra/tcm/RemoteProcessor.java | 4 +- src/java/org/apache/cassandra/tcm/Retry.java | 71 ++- .../tcm/StubClusterMetadataService.java | 2 +- .../apache/cassandra/tcm/log/LogReader.java | 3 +- .../tcm/migration/GossipProcessor.java | 2 +- .../org/apache/cassandra/utils/Backoff.java | 5 + .../cassandra/utils/concurrent/Ref.java | 2 + .../cassandra/distributed/impl/Instance.java | 40 +- .../distributed/shared/ClusterUtils.java | 22 +- .../test/HungBootstrapDoesNotHangTest.java | 139 +++++ .../test/log/CoordinatorPathTestBase.java | 6 +- .../distributed/test/log/TestProcessor.java | 4 +- .../cassandra/harry/dsl/HistoryBuilder.java | 8 +- .../cassandra/service/RetryStrategyTest.java | 482 ------------------ .../service/accord/AccordStateCacheTest.java | 4 +- .../tcm/ValidatingClusterMetadataService.java | 16 +- .../tcm/log/DistributedLogStateTest.java | 4 +- .../tcm/log/LocalStorageLogStateTest.java | 4 +- .../cassandra/tcm/log/LogStateTestBase.java | 125 ++++- 30 files changed, 493 insertions(+), 555 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/HungBootstrapDoesNotHangTest.java delete mode 100644 test/unit/org/apache/cassandra/service/RetryStrategyTest.java diff --git a/modules/accord b/modules/accord index d77bdd1a4cd9..4ec8d262a750 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit d77bdd1a4cd96120868279b665e0abe4ab509a80 +Subproject commit 4ec8d262a750a76744b7f6991b711f85fa41a89a diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 06c9a5477f11..c33c9bef24c1 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -180,6 +180,7 @@ public static Set splitCommaDelimited(String src) public volatile DurationSpec.LongMillisecondsBound cms_await_timeout = new DurationSpec.LongMillisecondsBound("120000ms"); public volatile int cms_default_max_retries = 10; public volatile DurationSpec.IntMillisecondsBound cms_default_retry_backoff = new DurationSpec.IntMillisecondsBound("50ms"); + public volatile DurationSpec.IntMillisecondsBound cms_default_max_retry_backoff = new DurationSpec.IntMillisecondsBound("1s"); public volatile int epoch_aware_debounce_inflight_tracker_max_size = 100; /** * How often we should snapshot the cluster metadata. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 05f15148d822..4f6cebbd52d7 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -3661,7 +3661,7 @@ public static boolean paxoTopologyRepairStrictEachQuorum() public static AccordSpec getAccord() { - return conf.accord; + return conf == null ? null : conf.accord; } public static AccordSpec.TransactionalRangeMigration getTransactionalRangeMigration() @@ -5730,11 +5730,16 @@ public static void setCmsDefaultRetryMaxTries(int value) conf.cms_default_max_retries = value; } - public static DurationSpec getDefaultRetryBackoff() + public static DurationSpec.IntMillisecondsBound getDefaultRetryBackoff() { return conf.cms_default_retry_backoff; } + public static DurationSpec.IntMillisecondsBound getDefaultMaxRetryBackoff() + { + return conf.cms_default_max_retry_backoff; + } + public static DurationSpec getCmsAwaitTimeout() { return conf.cms_await_timeout; diff --git a/src/java/org/apache/cassandra/service/accord/AccordFastPath.java b/src/java/org/apache/cassandra/service/accord/AccordFastPath.java index 3c45241c2f12..ac04488d131d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFastPath.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFastPath.java @@ -122,6 +122,15 @@ public int hashCode() return Objects.hash(status, updated); } + @Override + public String toString() + { + return "NodeInfo{" + + "status=" + status + + ", updated=" + updated + + '}'; + } + private static final MetadataSerializer serializer = new MetadataSerializer() { @Override @@ -194,7 +203,7 @@ public AccordFastPath withNodeStatusSince(Node.Id node, Status status, long upda } if (!canUpdateNodeTo(current, status, updateTimeMillis, updateDelayMillis)) - throw new InvalidRequestException(String.format("cannot transition %s to %s at %s", node, status, updateTimeMillis)); + throw new InvalidRequestException(String.format("cannot transition %s to %s at %s; current %s", node, status, updateTimeMillis, current)); ImmutableMap.Builder builder = ImmutableMap.builder(); builder.put(node, new NodeInfo(status, updateTimeMillis)); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 36710717d0c2..5433b091457c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord; import java.math.BigInteger; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -165,6 +166,7 @@ import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Blocking; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ExecutorUtils; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.concurrent.AsyncPromise; @@ -552,11 +554,7 @@ private List discoverHistoric(Node node, ClusterMetadataService public static List tcmLoadRange(long min, long max) { - List afterLoad = ClusterMetadataService.instance() - .processor() - .reconstruct(Epoch.create(min), Epoch.create(max), - Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), - TCMMetrics.instance.fetchLogRetries)); + List afterLoad = reconstruct(min, max); if (Invariants.isParanoid()) Invariants.checkState(afterLoad.get(0).epoch.getEpoch() == min, "Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); @@ -568,6 +566,15 @@ public static List tcmLoadRange(long min, long max) return afterLoad; } + private static List reconstruct(long min, long max) + { + Epoch start = Epoch.create(min); + Epoch end = Epoch.create(max); + Retry.Deadline deadline = Retry.Deadline.wrap(new Retry.ExponentialBackoff(TCMMetrics.instance.fetchLogRetries)); + return ClusterMetadataService.instance().processor() + .reconstruct(start, end, deadline); + } + @VisibleForTesting static Long findMinEpoch(SharedContext context, Map> peers) { @@ -1222,15 +1229,27 @@ public void tryMarkRemoved(Topology topology, Id target) if (node.commandStores().count() == 0) return; // when starting up stores can be empty, so ignore Ranges ranges = topology.rangesForNode(target); if (ranges.isEmpty()) return; - tryMarkRemoved(ranges, 0).begin(node().agent()); + long startNanos = Clock.Global.nanoTime(); + exclusiveSyncPointWithRetries(ranges, 0) + .begin((s, f) -> { + if (f != null) + { + logger.warn("Unable to mark the ranges for {} as durable after node left; took {}", target, Duration.ofNanos(Clock.Global.nanoTime() - startNanos), f); + node.agent().onUncaughtException(f); + } + else + { + logger.info("Marked {} ranges as durable after node left; took {}", target, Duration.ofNanos(Clock.Global.nanoTime() - startNanos)); + } + }); } - private AsyncChain> tryMarkRemoved(Ranges ranges, int attempt) + private AsyncChain> exclusiveSyncPointWithRetries(Ranges ranges, int attempt) { return CoordinateSyncPoint.exclusiveSyncPoint(node, ranges) .recover(t -> //TODO (operability): make this configurable / monitorable? - attempt <= 3 && t instanceof Invalidated || t instanceof Preempted || t instanceof Timeout ? tryMarkRemoved(ranges, attempt + 1) : null); + attempt <= 3 && t instanceof Invalidated || t instanceof Preempted || t instanceof Timeout ? exclusiveSyncPointWithRetries(ranges, attempt + 1) : null); } public Node node() diff --git a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java index 55f4d964063b..7c5747003a6f 100644 --- a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java @@ -82,13 +82,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retry) } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) - { - return getLogState(start, end, includeSnapshot, retryPolicy); - } - - @Override - public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot) { try { @@ -114,6 +108,12 @@ public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnap } } + @Override + public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return getLocalState(lowEpoch, highEpoch, includeSnapshot); + } + public static class InMemoryStorage implements LogStorage { private final List entries; diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java index 933c02bd494b..a7498df1eafd 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java @@ -904,9 +904,9 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { - return delegate().getLocalState(start, end, includeSnapshot, retryPolicy); + return delegate().getLocalState(start, end, includeSnapshot); } @Override diff --git a/src/java/org/apache/cassandra/tcm/FetchCMSLog.java b/src/java/org/apache/cassandra/tcm/FetchCMSLog.java index 3878a9c4cb38..943c3b08fc49 100644 --- a/src/java/org/apache/cassandra/tcm/FetchCMSLog.java +++ b/src/java/org/apache/cassandra/tcm/FetchCMSLog.java @@ -121,7 +121,7 @@ public void doVerb(Message message) throws IOException if (consistentFetch) delta = processor.get().getLogState(message.payload.lowerBound, Epoch.MAX, false, retry); else - delta = processor.get().getLocalState(message.payload.lowerBound, Epoch.MAX, false, retry); + delta = processor.get().getLocalState(message.payload.lowerBound, Epoch.MAX, false); TCMMetrics.instance.cmsLogEntriesServed(message.payload.lowerBound, delta.latestEpoch()); logger.info("Responding to {}({}) with log delta: {}", message.from(), request, delta); diff --git a/src/java/org/apache/cassandra/tcm/FetchPeerLog.java b/src/java/org/apache/cassandra/tcm/FetchPeerLog.java index 1e79d6cb7c8a..ab55dcf8f0dc 100644 --- a/src/java/org/apache/cassandra/tcm/FetchPeerLog.java +++ b/src/java/org/apache/cassandra/tcm/FetchPeerLog.java @@ -19,12 +19,10 @@ package org.apache.cassandra.tcm; import java.io.IOException; -import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -85,9 +83,7 @@ public void doVerb(Message message) throws IOException logger.info("Received peer log fetch request {} from {}: start = {}, current = {}", request, message.from(), message.payload.start, metadata.epoch); LogState delta = ClusterMetadataService.instance() .processor() - .getLocalState(message.payload.start, Epoch.MAX, false, - Retry.Deadline.after(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS), - new Retry.Jitter(TCMMetrics.instance.fetchLogRetries))); + .getLocalState(message.payload.start, Epoch.MAX, false); TCMMetrics.instance.peerLogEntriesServed(message.payload.start, delta.latestEpoch()); logger.info("Responding with log delta: {}", delta); MessagingService.instance().send(message.responseWith(delta), message.from()); diff --git a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java index dcdca627dbc6..e7ad50d9de53 100644 --- a/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/PaxosBackedProcessor.java @@ -136,7 +136,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy while (iter.hasNext()) { FetchLogRequest request = iter.next(); - if (request.condition.awaitUninterruptibly(Math.max(0, nextTimeout - Clock.Global.nanoTime()), TimeUnit.NANOSECONDS) && + if (request.condition.awaitThrowUncheckedOnInterrupt(Math.max(0, nextTimeout - Clock.Global.nanoTime()), TimeUnit.NANOSECONDS) && request.condition.isSuccess()) { collected.add(request.to.endpoint()); @@ -168,7 +168,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { return log.storage().getLogState(start, end, includeSnapshot); } @@ -176,7 +176,20 @@ public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, R @Override public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return DistributedMetadataLogKeyspace.getLogState(start, end, includeSnapshot); + while (!retryPolicy.reachedMax()) + { + if (Thread.currentThread().isInterrupted()) + throw new RuntimeException("Can not reconstruct during shutdown", new InterruptedException()); + try + { + return DistributedMetadataLogKeyspace.getLogState(start, end, includeSnapshot); + } + catch (RuntimeException e) // honestly best to only retry timeouts, but everything gets wrapped in a RuntimeException... + { + retryPolicy.maybeSleep(); + } + } + throw new RuntimeException(String.format("Could not reconstruct range %d, %d", start.getEpoch(), end.getEpoch()), new TimeoutException()); } private static T unwrap(Promise promise) diff --git a/src/java/org/apache/cassandra/tcm/Processor.java b/src/java/org/apache/cassandra/tcm/Processor.java index 2791e014ab0c..b370a7c27eca 100644 --- a/src/java/org/apache/cassandra/tcm/Processor.java +++ b/src/java/org/apache/cassandra/tcm/Processor.java @@ -112,7 +112,7 @@ default ClusterMetadata fetchLogAndWait(Epoch waitFor) /** * Queries node's _local_ state. It is not guaranteed to be contiguous, but can be used for restoring CMS state/ */ - LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy); + LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot); /** * Queries global log state. diff --git a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java index e9417adfec10..b0ee46b2c4d9 100644 --- a/src/java/org/apache/cassandra/tcm/RemoteProcessor.java +++ b/src/java/org/apache/cassandra/tcm/RemoteProcessor.java @@ -152,7 +152,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { return log.getLocalEntries(start, end, includeSnapshot); } @@ -177,7 +177,7 @@ public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnap } catch (ExecutionException | TimeoutException e) { - throw new RuntimeException("Could not reconstruct", e); + throw new RuntimeException(String.format("Could not reconstruct range %d, %d", lowEpoch.getEpoch(), highEpoch.getEpoch()), e); } } diff --git a/src/java/org/apache/cassandra/tcm/Retry.java b/src/java/org/apache/cassandra/tcm/Retry.java index bf2e0fbf2be1..b0ab619195a5 100644 --- a/src/java/org/apache/cassandra/tcm/Retry.java +++ b/src/java/org/apache/cassandra/tcm/Retry.java @@ -21,17 +21,20 @@ import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; +import java.util.function.DoubleSupplier; import com.codahale.metrics.Meter; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.utils.Clock; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; -import static org.apache.cassandra.tcm.Retry.Jitter.MAX_JITTER_MS; public abstract class Retry { protected static final int MAX_TRIES = DatabaseDescriptor.getCmsDefaultRetryMaxTries(); + private static final int DEFAULT_BACKOFF_MS = DatabaseDescriptor.getDefaultRetryBackoff().toMilliseconds(); + private static final int DEFAULT_MAX_BACKOFF_MS = DatabaseDescriptor.getDefaultMaxRetryBackoff().toMilliseconds(); + protected final int maxTries; protected int tries; protected Meter retryMeter; @@ -71,15 +74,16 @@ public long computeSleepFor() protected abstract long sleepFor(); + protected abstract long maxWait(); + public static class Jitter extends Retry { - public static final int MAX_JITTER_MS = Math.toIntExact(DatabaseDescriptor.getDefaultRetryBackoff().to(TimeUnit.MILLISECONDS)); private final Random random; private final int maxJitterMs; public Jitter(Meter retryMeter) { - this(MAX_TRIES, MAX_JITTER_MS, new Random(), retryMeter); + this(MAX_TRIES, DEFAULT_BACKOFF_MS, new Random(), retryMeter); } private Jitter(int maxTries, int maxJitterMs, Random random, Meter retryMeter) @@ -95,6 +99,12 @@ public long sleepFor() return random.nextInt(actualBackoff); } + @Override + protected long maxWait() + { + return maxJitterMs; + } + @Override public String toString() { @@ -108,12 +118,11 @@ public String toString() public static class Backoff extends Retry { - private static final int RETRY_BACKOFF_MS = Math.toIntExact(DatabaseDescriptor.getDefaultRetryBackoff().to(TimeUnit.MILLISECONDS)); protected final int backoffMs; public Backoff(Meter retryMeter) { - this(MAX_TRIES, RETRY_BACKOFF_MS, retryMeter); + this(MAX_TRIES, DEFAULT_BACKOFF_MS, retryMeter); } public Backoff(int maxTries, int backoffMs, Meter retryMeter) @@ -127,6 +136,12 @@ public long sleepFor() return (long) tries * backoffMs; } + @Override + protected long maxWait() + { + return backoffMs; + } + @Override public String toString() { @@ -138,6 +153,38 @@ public String toString() } } + public static class ExponentialBackoff extends Retry + { + private final long baseSleepTimeMillis; + private final long maxSleepMillis; + private final DoubleSupplier randomSource; + + public ExponentialBackoff(int maxAttempts, long baseSleepTimeMillis, long maxSleepMillis, DoubleSupplier randomSource, Meter retryMeter) + { + super(maxAttempts, retryMeter); + this.baseSleepTimeMillis = baseSleepTimeMillis; + this.maxSleepMillis = maxSleepMillis; + this.randomSource = randomSource; + } + + public ExponentialBackoff(Meter retryMeter) + { + this(MAX_TRIES, DEFAULT_BACKOFF_MS, DEFAULT_MAX_BACKOFF_MS, ThreadLocalRandom.current()::nextDouble, retryMeter); + } + + @Override + protected long sleepFor() + { + return org.apache.cassandra.utils.Backoff.ExponentialBackoff.computeWaitTime(tries, baseSleepTimeMillis, maxSleepMillis, randomSource); + } + + @Override + protected long maxWait() + { + return maxSleepMillis; + } + } + public static class Deadline extends Retry { public final long deadlineNanos; @@ -169,7 +216,7 @@ public static Deadline after(long timeoutNanos, Retry delegate) public static Deadline retryIndefinitely(long timeoutNanos, Meter retryMeter) { return new Deadline(Clock.Global.nanoTime() + timeoutNanos, - new Retry.Jitter(Integer.MAX_VALUE, MAX_JITTER_MS, new Random(), retryMeter)) + new Retry.Jitter(Integer.MAX_VALUE, DEFAULT_BACKOFF_MS, new Random(), retryMeter)) { @Override public boolean reachedMax() @@ -190,6 +237,12 @@ public String toString() }; } + public static Deadline wrap(Retry delegate) + { + long deadlineMillis = delegate.maxTries * delegate.maxWait(); + return new Deadline(Clock.Global.nanoTime() + TimeUnit.MILLISECONDS.toNanos(deadlineMillis), delegate); + } + @Override public boolean reachedMax() { @@ -213,6 +266,12 @@ public long sleepFor() return delegate.sleepFor(); } + @Override + protected long maxWait() + { + return deadlineNanos; + } + public String toString() { return String.format("Deadline{remainingMs=%d, tries=%d/%d}", TimeUnit.NANOSECONDS.toMillis(remainingNanos()), currentTries(), delegate.maxTries); diff --git a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java index 30e7f52e0be6..ffacb786270a 100644 --- a/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/StubClusterMetadataService.java @@ -154,7 +154,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { throw new UnsupportedOperationException(); } diff --git a/src/java/org/apache/cassandra/tcm/log/LogReader.java b/src/java/org/apache/cassandra/tcm/log/LogReader.java index 7f2b80602a22..92b57a057553 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogReader.java +++ b/src/java/org/apache/cassandra/tcm/log/LogReader.java @@ -148,7 +148,8 @@ else if (includeSnapshot) else if (closestSnapshot.epoch.isBefore(start)) { ImmutableList.Builder entries = new ImmutableList.Builder<>(); - EntryHolder entryHolder = getEntries(closestSnapshot.epoch.nextEpoch(), end); + // start is exclusive, so use the closest snapshot + EntryHolder entryHolder = getEntries(closestSnapshot.epoch, end); for (Entry entry : entryHolder.entries) { if (entry.epoch.isAfter(start)) diff --git a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java index 36baa59eb307..be853d89e79f 100644 --- a/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java +++ b/src/java/org/apache/cassandra/tcm/migration/GossipProcessor.java @@ -42,7 +42,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { throw new IllegalStateException("Can't reconstruct log state when running in gossip mode. Enable the ClusterMetadataService with `nodetool addtocms`."); } diff --git a/src/java/org/apache/cassandra/utils/Backoff.java b/src/java/org/apache/cassandra/utils/Backoff.java index 7974dbf346fb..79a68fdbc1fd 100644 --- a/src/java/org/apache/cassandra/utils/Backoff.java +++ b/src/java/org/apache/cassandra/utils/Backoff.java @@ -113,6 +113,11 @@ public boolean mayRetry(int attempt) @Override public long computeWaitTime(int retryCount) + { + return computeWaitTime(retryCount, baseSleepTimeMillis, maxSleepMillis, randomSource); + } + + public static long computeWaitTime(int retryCount, long baseSleepTimeMillis, long maxSleepMillis, DoubleSupplier randomSource) { long baseTimeMillis = baseSleepTimeMillis * (1L << retryCount); // it's possible that this overflows, so fall back to max; diff --git a/src/java/org/apache/cassandra/utils/concurrent/Ref.java b/src/java/org/apache/cassandra/utils/concurrent/Ref.java index 911c1db8819e..6f0836b3e3b8 100644 --- a/src/java/org/apache/cassandra/utils/concurrent/Ref.java +++ b/src/java/org/apache/cassandra/utils/concurrent/Ref.java @@ -601,6 +601,8 @@ void traverse(final RefCounted.Tidy rootObject) InProgressVisit inProgress = null; while (inProgress != null || !path.isEmpty()) { + if (Thread.currentThread().isInterrupted()) + throw new UncheckedInterruptedException(new InterruptedException()); //If necessary fetch the next object to start tracing if (inProgress == null) inProgress = path.pollLast(); diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java index 59d46c52efa8..281b95c0b037 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java @@ -42,6 +42,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Stream; +import javax.annotation.Nullable; import javax.management.ListenerNotFoundException; import javax.management.Notification; import javax.management.NotificationListener; @@ -66,6 +67,7 @@ import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.YamlConfigurationLoader; import org.apache.cassandra.cql3.CQLStatement; import org.apache.cassandra.cql3.QueryHandler; @@ -162,6 +164,7 @@ import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Throwables; import org.apache.cassandra.utils.concurrent.Ref; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import org.apache.cassandra.utils.logging.LoggingSupportFactory; import org.apache.cassandra.utils.memory.BufferPools; import org.apache.cassandra.utils.progress.jmx.JMXBroadcastExecutor; @@ -612,6 +615,18 @@ public ExecutorPlus executorFor(int verbId) return Verb.fromId(verbId).stage.executor(); } + @Nullable + private DurationSpec startupTimeout() + { + Object c = config.get(Constants.KEY_DTEST_STARTUP_TIMEOUT); + if (c == null) return null; + if (c instanceof String) + return new DurationSpec.LongNanosecondsBound((String) c); + if (c instanceof Number) + return new DurationSpec.LongNanosecondsBound(((Number) c).longValue()); + throw new IllegalArgumentException("Key " + Constants.KEY_DTEST_STARTUP_TIMEOUT + " only allowed to have string/number values, but given " + c + ": " + c.getClass()); + } + @Override public void startup(ICluster cluster) { @@ -621,7 +636,7 @@ public void startup(ICluster cluster) // commit to extend the functionality of the @Shared annotation to app classes. assert startedAt.compareAndSet(0L, System.nanoTime()) : String.format("startedAt on instance %d expected to be 0, but was %d", config().num(), startedAt.get()); - sync(() -> { + Future result = async(() -> { inInstancelogger = LoggerFactory.getLogger(Instance.class); try { @@ -651,7 +666,28 @@ public void startup(ICluster cluster) throw (RuntimeException) t; throw new RuntimeException(t); } - }).run(); + }).call(); + DurationSpec timeout = startupTimeout(); + if (timeout == null) + { + waitOn(result); + } + else + { + try + { + result.get(timeout.quantity(), timeout.unit()); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + throw new UncheckedInterruptedException(e); + } + catch (TimeoutException | ExecutionException e) + { + throw new RuntimeException(e); + } + } initialized = true; } diff --git a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java index 0cbb1f7cad6d..acf6112c7599 100644 --- a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java +++ b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java @@ -34,6 +34,7 @@ import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.BiConsumer; @@ -48,7 +49,6 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.Futures; -import org.apache.cassandra.distributed.api.*; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.slf4j.Logger; @@ -65,6 +65,7 @@ import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.api.IMessageFilters; import org.apache.cassandra.distributed.api.NodeToolResult; +import org.apache.cassandra.distributed.api.Row; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.distributed.impl.AbstractCluster; import org.apache.cassandra.distributed.impl.InstanceConfig; @@ -82,6 +83,7 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Commit; @@ -1615,5 +1617,21 @@ public static TableId tableId(Cluster cluster, String ks, String table) String str = cluster.getFirstRunningInstance().callOnInstance(() -> Schema.instance.getKeyspaceInstance(ks).getColumnFamilyStore(table).getTableId().toString()); return TableId.fromUUID(UUID.fromString(str)); } -} + public static void awaitAccordEpochReady(Cluster cluster, long epoch) + { + cluster.forEach(i -> { + if (i.isShutdown()) return; + i.runOnInstance(() -> { + try + { + AccordService.instance().epochReady(Epoch.create(epoch)).get(); + } + catch (InterruptedException | ExecutionException e) + { + throw new RuntimeException(e); + } + }); + }); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/HungBootstrapDoesNotHangTest.java b/test/distributed/org/apache/cassandra/distributed/test/HungBootstrapDoesNotHangTest.java new file mode 100644 index 000000000000..474c555ab491 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/HungBootstrapDoesNotHangTest.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.ForkJoinPool; + +import org.junit.Test; + +import net.bytebuddy.ByteBuddy; +import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; +import net.bytebuddy.implementation.MethodDelegation; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.Constants; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ownership.MovementMap; +import org.apache.cassandra.tcm.sequences.BootstrapAndJoin; +import org.apache.cassandra.utils.Shared; +import org.apache.cassandra.utils.concurrent.CountDownLatch; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import org.assertj.core.api.Assertions; + +import static net.bytebuddy.matcher.ElementMatchers.named; +import static net.bytebuddy.matcher.ElementMatchers.takesArguments; + +/** + * When bootstrap hangs it can hang forever, but this can be a problem in CI as the test reports as "timeout" and all logs and history is lost. This test makes sure that JVM-DTest instances do shutdown properly even in this case + */ +public class HungBootstrapDoesNotHangTest extends TestBaseImpl +{ + @Test + public void test() throws IOException + { + TokenSupplier tokenSupplier = TokenSupplier.evenlyDistributedTokens(2); + try (Cluster cluster = Cluster.build(1) + .withTokenSupplier(tokenSupplier) + .withConfig(c -> c.set("auto_bootstrap", true).with(Feature.values())) + .withInstanceInitializer(BBHelper::install) + .createWithoutStarting()) + { + cluster.get(1).startup(cluster); // should work fine + IInvokableInstance node2 = ClusterUtils.addInstance(cluster, c -> c.set(Constants.KEY_DTEST_STARTUP_TIMEOUT, "1m") + .set(Constants.KEY_DTEST_API_STARTUP_FAILURE_AS_SHUTDOWN, false)); + ForkJoinPool.commonPool().execute(() -> { + node2.startup(); // should hang and never reach the next line + State.notBlocked(); + }); + State.awaitBlocked(); + + Assertions.assertThat(State.wasBlocked()).describedAs("node2 was supposed to get blocked by ByteBuddy but didnt").isEqualTo(true); + + // node1 is up, node2 is blocked in bootstrap... now let the cluster close + } + } + + @Shared + public static class State + { + private static final CountDownLatch blocked = CountDownLatch.newCountDownLatch(1); + private static volatile boolean wasBlocked = true; + + public static void blocked() + { + blocked.decrement(); + } + + public static void notBlocked() + { + wasBlocked = false; + blocked(); + } + + public static void awaitBlocked() + { + blocked.awaitThrowUncheckedOnInterrupt(); + } + + public static boolean wasBlocked() + { + return wasBlocked; + } + } + + public static class BBHelper + { + public static void install(ClassLoader cl, int id) + { + if (id != 2) return; + new ByteBuddy().rebase(BootstrapAndJoin.class) + .method(named("bootstrap").and(takesArguments(6))) + .intercept(MethodDelegation.to(BBHelper.class)) + .make() + .load(cl, ClassLoadingStrategy.Default.INJECTION); + } + + public static boolean bootstrap(final Collection tokens, + long bootstrapTimeoutMillis, + ClusterMetadata metadata, + InetAddressAndPort beingReplaced, + MovementMap movements, + MovementMap strictMovements) + { + try + { + State.blocked(); + Thread.currentThread().join(); + return false; + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + } + + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java index 42cd999eb774..95533778479c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java @@ -758,15 +758,15 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { - return getLogState(start, end, includeSnapshot, retryPolicy); + return log.getLocalEntries(start, end, includeSnapshot); } @Override public LogState getLogState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) { - return log.getLocalEntries(start, end, includeSnapshot); + return getLocalState(start, end, includeSnapshot); } }, (a,b) -> {}, diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java index 6ee5e975eaa0..2b2906bc07b6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/TestProcessor.java @@ -71,9 +71,9 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch start, Epoch end, boolean includeSnapshot) { - return delegate.getLocalState(start, end, includeSnapshot, retryPolicy); + return delegate.getLocalState(start, end, includeSnapshot); } @Override diff --git a/test/harry/main/org/apache/cassandra/harry/dsl/HistoryBuilder.java b/test/harry/main/org/apache/cassandra/harry/dsl/HistoryBuilder.java index c4565a0a74c6..8766ff7fd0b4 100644 --- a/test/harry/main/org/apache/cassandra/harry/dsl/HistoryBuilder.java +++ b/test/harry/main/org/apache/cassandra/harry/dsl/HistoryBuilder.java @@ -18,13 +18,7 @@ package org.apache.cassandra.harry.dsl; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; import org.apache.cassandra.harry.ColumnSpec; diff --git a/test/unit/org/apache/cassandra/service/RetryStrategyTest.java b/test/unit/org/apache/cassandra/service/RetryStrategyTest.java deleted file mode 100644 index e0fed7475cbf..000000000000 --- a/test/unit/org/apache/cassandra/service/RetryStrategyTest.java +++ /dev/null @@ -1,482 +0,0 @@ -///* -// * Licensed to the Apache Software Foundation (ASF) under one -// * or more contributor license agreements. See the NOTICE file -// * distributed with this work for additional information -// * regarding copyright ownership. The ASF licenses this file -// * to you under the Apache License, Version 2.0 (the -// * "License"); you may not use this file except in compliance -// * with the License. You may obtain a copy of the License at -// * -// * http://www.apache.org/licenses/LICENSE-2.0 -// * -// * Unless required by applicable law or agreed to in writing, software -// * distributed under the License is distributed on an "AS IS" BASIS, -// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// * See the License for the specific language governing permissions and -// * limitations under the License. -// */ -//package org.apache.cassandra.service; -// -//import java.util.List; -//import java.util.Random; -//import java.util.concurrent.ThreadLocalRandom; -//import java.util.concurrent.TimeUnit; -//import java.util.concurrent.atomic.AtomicReference; -//import java.util.function.BiFunction; -//import java.util.function.Consumer; -//import java.util.function.DoubleSupplier; -//import java.util.function.LongBinaryOperator; -// -//import com.google.common.collect.ImmutableList; -//import org.junit.Assert; -//import org.junit.Test; -// -//import org.slf4j.Logger; -//import org.slf4j.LoggerFactory; -// -//import net.nicoulaj.compilecommand.annotations.Inline; -//import org.apache.cassandra.config.DatabaseDescriptor; -//import org.apache.cassandra.service.TimeoutStrategy.LatencyModifier; -//import org.apache.cassandra.service.TimeoutStrategy.LatencyModifierFactory; -//import org.apache.cassandra.service.TimeoutStrategy.LatencySource; -//import org.apache.cassandra.service.TimeoutStrategy.LatencySupplierFactory; -//import org.apache.cassandra.service.TimeoutStrategy.LatencySupplier; -//import org.apache.cassandra.service.TimeoutStrategy.Wait; -//import org.apache.cassandra.service.paxos.ContentionStrategy; -// -//import static org.apache.cassandra.service.RetryStrategy.*; -//import static org.apache.cassandra.service.RetryStrategy.WaitRandomizerFactory.*; -//import static org.apache.cassandra.service.RetryStrategyTest.WaitRandomizerType.*; -//import static org.apache.cassandra.service.TimeoutStrategy.modifiers; -//import static org.apache.cassandra.service.TimeoutStrategy.parseWait; -//import static org.apache.cassandra.service.TimeoutStrategy.selectors; -// -//public class RetryStrategyTest -//{ -// private static final Logger logger = LoggerFactory.getLogger(RetryStrategyTest.class); -// -// static -// { -// DatabaseDescriptor.daemonInitialization(); -// } -// -// private static final long MAX = DatabaseDescriptor.getRpcTimeout(TimeUnit.MICROSECONDS); -// -// private static final String DEFAULT_WAIT_RANDOMIZER = "qexp(1.5)"; // at least 0ms, and at least 66% of median latency -// private static final String DEFAULT_MIN = "0 <= p50(rw)*0.66"; // at least 0ms, and at least 66% of median latency -// private static final String DEFAULT_MAX = "10ms <= p95(rw)*1.8^attempts <= 100ms"; // p95 latency with exponential back-off at rate of 1.8^attempts -// private static final String DEFAULT_SPREAD = "5ms <= p50(rw)*0.5"; // at least 5ms, and at least 50% of median latency -// -// private static final WaitRandomizerParseValidator DEFAULT_WAIT_RANDOMIZER_VALIDATOR = new WaitRandomizerParseValidator(DEFAULT_WAIT_RANDOMIZER, QEXP, 1.5); -// private static final WaitParseValidator DEFAULT_MIN_VALIDATOR = new WaitParseValidator(DEFAULT_MIN, true, assertWait(0, MAX, 0, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.66)); -// private static final WaitParseValidator DEFAULT_MAX_VALIDATOR = new WaitParseValidator(DEFAULT_MAX, false, assertWait(10000, 100000, 100000, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)); -// private static final WaitParseValidator DEFAULT_MIN_DELTA_VALIDATOR = new WaitParseValidator(DEFAULT_SPREAD, true, assertWait(5000, MAX, 5000, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0f).getClass(), 0.5)); -// private static final RetryStrategy.ParsedStrategy DEFAULT = new RetryStrategy.ParsedStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD, -// new RetryStrategy(DEFAULT_WAIT_RANDOMIZER, DEFAULT_MIN, DEFAULT_MAX, DEFAULT_SPREAD)); -// -// private static List VALIDATE = ImmutableList.of( -// new WaitParseValidator("p95(rw)", false, assertWait(0, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.95, 0, modifiers.identity().getClass(), 1)), -// new WaitParseValidator("5ms<=p50(rw)*0.66", false, assertWait(5000, MAX, MAX, selectors.maxReadWrite(0f).getClass(), 0.50, 0, modifiers.multiply(0).getClass(), 0.66)), -// new WaitParseValidator("5us <= p50(r)*1.66*attempts", true, assertWait(5, MAX, 5, selectors.read(0f).getClass(), 0.50, 0, modifiers.multiplyByAttempts(0f).getClass(), 1.66)), -// new WaitParseValidator("0<=p50(w)*0.66^attempts", true, assertWait(0, MAX, 0, selectors.write(0f).getClass(), 0.50, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 0.66)), -// new WaitParseValidator("125us", true, assertWait(125, 125, 125, selectors.constant(0).getClass(), 0.0f, 125, modifiers.identity().getClass(), 1)), -// new WaitParseValidator("5us <= p95(r)*1.8^attempts <= 100us", true, assertWait(5, 100, 5, selectors.read(0f).getClass(), 0.95, 0, modifiers.multiplyByAttemptsExp(0f).getClass(), 1.8)), -// DEFAULT_MIN_VALIDATOR, DEFAULT_MAX_VALIDATOR, DEFAULT_MIN_DELTA_VALIDATOR -// ); -// -// private static List VALIDATE_RANDOMIZER = ImmutableList.of( -// new WaitRandomizerParseValidator("quantizedexponential(0.5)", QEXP, 0.5), -// new WaitRandomizerParseValidator("exponential(2.5)", EXP, 2.5), -// new WaitRandomizerParseValidator("exp(10)", EXP, 10), -// new WaitRandomizerParseValidator("uniform", UNIFORM, 0), -// DEFAULT_WAIT_RANDOMIZER_VALIDATOR -// ); -// -// static class WaitParseValidator -// { -// final String spec; -// final boolean isMin; -// final Consumer validator; -// -// WaitParseValidator(String spec, boolean isMin, Consumer validator) -// { -// this.spec = spec; -// this.isMin = isMin; -// this.validator = validator; -// } -// -// void validate(Wait Wait) -// { -// validator.accept(Wait); -// } -// } -// -// enum WaitRandomizerType -// { -// UNIFORM(Uniform.class, (p, f) -> f.uniform()), -// EXP(Exponential.class, (p, f) -> f.exponential(p)), -// QEXP(QuantizedExponential.class, (p, f) -> f.quantizedExponential(p)); -// -// final Class clazz; -// final BiFunction getter; -// -// WaitRandomizerType(Class clazz, BiFunction getter) -// { -// this.clazz = clazz; -// this.getter = getter; -// } -// } -// -// static class WaitRandomizerParseValidator -// { -// final String spec; -// final WaitRandomizerType type; -// final double power; -// -// WaitRandomizerParseValidator(String spec, WaitRandomizerType type, double power) -// { -// this.spec = spec; -// this.type = type; -// this.power = power; -// } -// -// void validate(WaitRandomizer randomizer) -// { -// Assert.assertSame(type.clazz, randomizer.getClass()); -// if (AbstractExponential.class.isAssignableFrom(type.clazz)) -// Assert.assertEquals(power, ((AbstractExponential) randomizer).power, 0.00001); -// } -// } -// -// private static class WaitRandomizerOutputValidator -// { -// static void validate(WaitRandomizerType type, long seed, int trials, int samplesPerTrial) -// { -// Random random = new Random(seed); -// WaitRandomizer randomizer = type.getter.apply(2d, new WaitRandomizerFactory() -// { -// @Override public LongBinaryOperator uniformLongSupplier() { return (min, max) -> min + random.nextInt((int) (max - min)); } -// @Override public DoubleSupplier uniformDoubleSupplier() { return random::nextDouble; } -// }); -// -// for (int i = 0 ; i < trials ; ++i) -// { -// int min = random.nextInt(1 << 20); -// int max = min + 1024 + random.nextInt(1 << 20); -// double minMean = minMean(type, min, max); -// double maxMean = maxMean(type, min, max); -// double sampleMean = sampleMean(samplesPerTrial, min, max, randomizer); -// Assert.assertTrue(minMean <= sampleMean); -// Assert.assertTrue(maxMean >= sampleMean); -// } -// } -// -// private static double minMean(WaitRandomizerType type, int min, int max) -// { -// switch (type) -// { -// case UNIFORM: return min + (max - min) * (4d/10); -// case EXP: case QEXP: return min + (max - min) * (6d/10); -// default: throw new IllegalStateException(); -// } -// } -// -// private static double maxMean(WaitRandomizerType type, int min, int max) -// { -// switch (type) -// { -// case UNIFORM: return min + (max - min) * (6d/10); -// case EXP: case QEXP: return min + (max - min) * (8d/10); -// default: throw new IllegalStateException(); -// } -// } -// -// private static double sampleMean(int samples, int min, int max, WaitRandomizer randomizer) -// { -// double sum = 0; -// int attempts = 1; -// for (int i = 0 ; i < samples ; ++i) -// { -// long wait = randomizer.wait(min, max, attempts = (attempts & 15) + 1); -// Assert.assertTrue(wait >= min); -// Assert.assertTrue(wait <= max); -// sum += wait; -// } -// double mean = sum / samples; -// Assert.assertTrue(mean >= min); -// Assert.assertTrue(mean <= max); -// return mean; -// } -// } -// -// private static Consumer assertWait( -// long min, long max, long onFailure, -// Class selectorClass, -// double selectorPercentile, -// long selectorConst, -// Class modifierClass, -// double modifierVal -// ) -// { -// return Wait -> { -// Assert.assertEquals(min, Wait.min); -// Assert.assertEquals(max, Wait.max); -// Assert.assertEquals(onFailure, Wait.onFailure); -// Assert.assertSame(selectorClass, Wait.selector.getClass()); -// if (selectorClass == selectors.constant(0).getClass()) -// { -// LatencySupplier fail = v -> { throw new UnsupportedOperationException(); }; -// Assert.assertEquals(selectorConst, Wait.selector.select(fail, fail)); -// } -// else -// { -// AtomicReference percentile = new AtomicReference<>(); -// LatencySource set = v -> { percentile.set(v); return 0; }; -// Wait.selector.select(set, set); -// Assert.assertNotNull(percentile.get()); -// Assert.assertEquals(selectorPercentile, percentile.get(), 0.00001); -// } -// Assert.assertSame(modifierClass, Wait.modifier.getClass()); -// Assert.assertEquals(1000000L * modifierVal, Wait.modifier.modify(1000000, 1), 0.00001); -// }; -// } -// -// private static void assertParseFailure(String spec) -// { -// -// try -// { -// Wait Wait = parseWait(spec, 0, 0, 0); -// Assert.fail("expected parse failure, but got " + Wait); -// } -// catch (IllegalArgumentException e) -// { -// // expected -// } -// } -// -// @Test -// public void strategyParseTest() -// { -// for (WaitParseValidator min : VALIDATE.stream().filter(v -> v.isMin).toArray(WaitParseValidator[]::new)) -// { -// for (WaitParseValidator max : VALIDATE.stream().filter(v -> !v.isMin).toArray(WaitParseValidator[]::new)) -// { -// for (WaitParseValidator minDelta : VALIDATE.stream().filter(v -> v.isMin).toArray(WaitParseValidator[]::new)) -// { -// for (WaitRandomizerParseValidator random : VALIDATE_RANDOMIZER) -// { -// { -// ParsedStrategy parsed = parseStrategy("min=" + min.spec + ",max=" + max.spec + ",delta=" + minDelta.spec + ",random=" + random.spec, DEFAULT); -// Assert.assertEquals(parsed.min, min.spec); -// min.validate(parsed.strategy.min); -// Assert.assertEquals(parsed.max, max.spec); -// max.validate(parsed.strategy.max); -// Assert.assertEquals(parsed.spread, minDelta.spec); -// minDelta.validate(parsed.strategy.spread); -// Assert.assertEquals(parsed.waitRandomizer, random.spec); -// random.validate(parsed.strategy.waitRandomizer); -// } -// ParsedStrategy parsed = parseStrategy("random=" + random.spec, DEFAULT); -// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); -// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); -// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); -// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); -// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); -// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); -// Assert.assertEquals(parsed.waitRandomizer, random.spec); -// random.validate(parsed.strategy.waitRandomizer); -// } -// ParsedStrategy parsed = parseStrategy("delta=" + minDelta.spec, DEFAULT); -// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); -// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); -// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); -// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); -// Assert.assertEquals(parsed.spread, minDelta.spec); -// minDelta.validate(parsed.strategy.spread); -// } -// ParsedStrategy parsed = parseStrategy("max=" + max.spec, DEFAULT); -// Assert.assertEquals(parsed.min, DEFAULT_MIN_VALIDATOR.spec); -// DEFAULT_MIN_VALIDATOR.validate(parsed.strategy.min); -// Assert.assertEquals(parsed.max, max.spec); -// max.validate(parsed.strategy.max); -// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); -// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); -// } -// ParsedStrategy parsed = parseStrategy("min=" + min.spec, DEFAULT); -// Assert.assertEquals(parsed.min, min.spec); -// min.validate(parsed.strategy.min); -// Assert.assertEquals(parsed.max, DEFAULT_MAX_VALIDATOR.spec); -// DEFAULT_MAX_VALIDATOR.validate(parsed.strategy.max); -// Assert.assertEquals(parsed.spread, DEFAULT_MIN_DELTA_VALIDATOR.spec); -// DEFAULT_MIN_DELTA_VALIDATOR.validate(parsed.strategy.spread); -// } -// } -// -// @Test -// public void testParseRoundTrip() -// { -// LatencySupplierFactory selectorFactory = new LatencySupplierFactory() -// { -// LatencySupplierFactory delegate = TimeoutStrategy.selectors; -// public LatencySelector constant(long latency) { return selector(delegate.constant(latency), String.format("%dms", latency)); } -// public LatencySelector read(double percentile) { return selector(delegate.read(percentile), String.format("p%d(r)", (int) (percentile * 100))); } -// public LatencySelector write(double percentile) { return selector(delegate.write(percentile), String.format("p%d(w)", (int) (percentile * 100))); } -// public LatencySelector maxReadWrite(double percentile) { return selector(delegate.maxReadWrite(percentile), String.format("p%d(rw)", (int) percentile * 100)); } -// -// private LatencySelector selector(LatencySelector selector, String str) { -// return new LatencySelector() -// { -// public long select(LatencySupplier read, LatencySupplier write) -// { -// return selector.select(read, write); -// } -// -// public String toString() -// { -// return str; -// } -// }; -// } -// }; -// -// LatencyModifierFactory modifierFactory = new LatencyModifierFactory() -// { -// LatencyModifierFactory delegate = modifiers; -// public LatencyModifier identity() { return modifier(delegate.identity(), ""); } -// public LatencyModifier multiply(double constant) { return modifier(delegate.multiply(constant), String.format(" * %.2f", constant)); } -// public LatencyModifier multiplyByAttempts(double multiply) { return modifier(delegate.multiplyByAttempts(multiply), String.format(" * %.2f * attempts", multiply)); } -// public LatencyModifier multiplyByAttemptsExp(double base) { return modifier(delegate.multiplyByAttemptsExp(base), String.format(" * %.2f ^ attempts", base)); } -// -// private LatencyModifier modifier(LatencyModifier modifier, String str) { -// return new LatencyModifier() -// { -// @Inline -// public long modify(long latency, int attempts) -// { -// return modifier.modify(latency, attempts); -// } -// -// public String toString() -// { -// return str; -// } -// }; -// } -// }; -// -// LatencyModifier[] latencyModifiers = new LatencyModifier[]{ -// modifierFactory.multiply(0.5), -// modifierFactory.multiplyByAttempts(0.5), -// modifierFactory.multiplyByAttemptsExp(0.5) -// }; -// -// LatencySelector[] latencySelectors = new LatencySelector[]{ -// selectorFactory.read(0.5), -// selectorFactory.write(0.5), -// selectorFactory.maxReadWrite(0.99) -// }; -// -// for (boolean min : new boolean[] { true, false}) -// { -// String left = min ? "10ms <= " : ""; -// for (boolean max : new boolean[] { true, false}) -// { -// String right = max ? " <= 10ms" : ""; -// -// for (LatencySelector selector : latencySelectors) -// { -// for (LatencyModifier modifier : latencyModifiers) -// { -// String mid = String.format("%s%s", selector, modifier); -// String input = left + mid + right; -// Wait Wait = parseWait(input, 0, MAX, MAX, selectorFactory, modifierFactory); -// Assert.assertTrue(String.format("Wait: %d" , Wait.min), !min || Wait.min == 10000); -// Assert.assertTrue(String.format("Wait: %d" , Wait.max), !max || Wait.max == 10000); -// Assert.assertEquals(selector.toString(), Wait.selector.toString()); -// Assert.assertEquals(modifier.toString(), Wait.modifier.toString()); -// } -// } -// } -// } -// } -// -// @Test -// public void WaitParseTest() -// { -// VALIDATE.forEach(v -> v.validate(parseWait(v.spec, 0, MAX, v.isMin ? 0 : MAX))); -// } -// -// @Test -// public void waitRandomizerParseTest() -// { -// VALIDATE_RANDOMIZER.forEach(v -> v.validate(parseWaitRandomizer(v.spec))); -// } -// -// @Test -// public void waitRandomizerSampleTest() -// { -// waitRandomizerSampleTest(2); -// } -// -// private void waitRandomizerSampleTest(int count) -// { -// while (count-- > 0) -// { -// long seed = ThreadLocalRandom.current().nextLong(); -// logger.info("Seed {}", seed); -// for (WaitRandomizerType type : WaitRandomizerType.values()) -// { -// WaitRandomizerOutputValidator.validate(type, seed, 100, 1000000); -// } -// } -// } -// -// @Test -// public void WaitParseFailureTest() -// { -// assertParseFailure("10ms <= p95(r) <= 5ms"); -// assertParseFailure("10 <= p95(r)"); -// assertParseFailure("10 <= 20 <= 30"); -// assertParseFailure("p95(r) < 5"); -// assertParseFailure("p95(x)"); -// assertParseFailure("p95()"); -// assertParseFailure("p95"); -// assertParseFailure("p50(rw)+0.66"); -// } -// -// @Test -// public void testBackoffTime() -// { -// RetryStrategy strategy = parseStrategy("min=0ms,max=100ms,random=uniform", DEFAULT).strategy; -// double total = 0; -// int count = 100000; -// for (int i = 0 ; i < count ; ++i) -// { -// long now = System.nanoTime(); -// long waitUntil = strategy.computeWaitUntil(1); -// long waitLength = Math.max(waitUntil - now, 0); -// total += waitLength; -// } -// Assert.assertTrue(Math.abs(TimeUnit.MILLISECONDS.toNanos(50) - (total / count)) < TimeUnit.MILLISECONDS.toNanos(1L)); -// } -// -// @Test -// public void testBackoffTimeElapsed() -// { -// ContentionStrategy strategy = ContentionStrategy.parseStrategy("min=0ms,max=10ms,random=uniform").strategy; -// double total = 0; -// int count = 1000; -// for (int i = 0 ; i < count ; ++i) -// { -// long start = System.nanoTime(); -// strategy.doWaitForContention(Long.MAX_VALUE, 1, null, null, null, null); -// long end = System.nanoTime(); -// total += end - start; -// } -// // make sure we have slept at least 4ms on average, given a mean wait time of 5ms -// double avg = total / count; -// double nanos = avg - TimeUnit.MILLISECONDS.toNanos(4); -// Assert.assertTrue(nanos > 0); -// } -//} diff --git a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java index 6f10d977b211..50dd331e8ffd 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordStateCacheTest.java @@ -428,7 +428,7 @@ public void evictionBlockedOnSaving() instance.release(item); } - assertCacheState(cache, 0, 4, nodeSize(1) * 3 + nodeSize(3)); + assertCacheState(cache, 0, 4, nodeSize(1) * 3 + nodeSize(2)); assertCacheMetrics(cache.metrics, 0, 4, 4); assertCacheMetrics(instance.instanceMetrics, 0, 4, 4); @@ -464,7 +464,7 @@ public void testUpdates() safeString.set("11"); instance.release(safeString); - assertCacheState(cache, 0, 1, nodeSize(3)); + assertCacheState(cache, 0, 1, nodeSize(2)); Assert.assertSame(safeString.global, cache.head()); Assert.assertSame(safeString.global, cache.tail()); diff --git a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java index 0d7bbf7f8e2f..0f9ddf0f06e2 100644 --- a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java +++ b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java @@ -132,13 +132,7 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy } @Override - public LogState getLocalState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) - { - return getLogState(lowEpoch, highEpoch, includeSnapshot, retryPolicy); - } - - @Override - public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) + public LogState getLocalState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot) { if (!epochs.containsKey(lowEpoch)) throw new AssertionError("Unknown epoch: " + lowEpoch); @@ -147,7 +141,13 @@ public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnap int id = 0; for (ClusterMetadata cm : epochs.subMap(lowEpoch, false, highEpoch, true).values()) entries.add(new Entry(new Entry.Id(id++), cm.epoch, new MockTransformer(cm))); - return new LogState(base, entries.build()); + return new LogState(includeSnapshot ? base : null, entries.build()); + } + + @Override + public LogState getLogState(Epoch lowEpoch, Epoch highEpoch, boolean includeSnapshot, Retry.Deadline retryPolicy) + { + return getLocalState(lowEpoch, highEpoch, includeSnapshot); } }; } diff --git a/test/unit/org/apache/cassandra/tcm/log/DistributedLogStateTest.java b/test/unit/org/apache/cassandra/tcm/log/DistributedLogStateTest.java index 37cee7237347..656ee5551097 100644 --- a/test/unit/org/apache/cassandra/tcm/log/DistributedLogStateTest.java +++ b/test/unit/org/apache/cassandra/tcm/log/DistributedLogStateTest.java @@ -102,9 +102,9 @@ public void snapshotMetadata() } @Override - public LogState getLogState(Epoch since) + public LogReader reader() { - return reader.getLogState(since); + return reader; } @Override diff --git a/test/unit/org/apache/cassandra/tcm/log/LocalStorageLogStateTest.java b/test/unit/org/apache/cassandra/tcm/log/LocalStorageLogStateTest.java index 5bc6ec0fa831..196c69ed7ec8 100644 --- a/test/unit/org/apache/cassandra/tcm/log/LocalStorageLogStateTest.java +++ b/test/unit/org/apache/cassandra/tcm/log/LocalStorageLogStateTest.java @@ -90,9 +90,9 @@ public void snapshotMetadata() throws IOException } @Override - public LogState getLogState(Epoch since) + public LogReader reader() { - return storage.getLogState(since); + return storage; } @Override diff --git a/test/unit/org/apache/cassandra/tcm/log/LogStateTestBase.java b/test/unit/org/apache/cassandra/tcm/log/LogStateTestBase.java index c7df0141fbbb..5342930da03f 100644 --- a/test/unit/org/apache/cassandra/tcm/log/LogStateTestBase.java +++ b/test/unit/org/apache/cassandra/tcm/log/LogStateTestBase.java @@ -20,18 +20,25 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Objects; +import java.util.stream.Stream; import org.junit.Before; import org.junit.Test; +import accord.utils.Gen; +import accord.utils.Gens; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.MetadataSnapshots; import org.apache.cassandra.tcm.sequences.SequencesUtils; +import org.assertj.core.api.Assertions; +import static accord.utils.Property.qt; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -44,13 +51,42 @@ public abstract class LogStateTestBase static int EXTRA_ENTRIES = 2; static Epoch CURRENT_EPOCH = Epoch.create((NUM_SNAPSHOTS * SNAPSHOT_FREQUENCY) + EXTRA_ENTRIES); static Epoch LATEST_SNAPSHOT_EPOCH = Epoch.create(NUM_SNAPSHOTS * SNAPSHOT_FREQUENCY); + private static final Gen.LongGen EPOCH_GEN = rs -> rs.nextLong(0, CURRENT_EPOCH.getEpoch()) + 1; + private static final Gen BETWEEN_GEN = rs -> { + long a = EPOCH_GEN.nextLong(rs); + long b = EPOCH_GEN.nextLong(rs); + while (b == a) + b = EPOCH_GEN.nextLong(rs); + if (b < a) + { + long tmp = a; + a = b; + b = tmp; + } + return new Between(Epoch.create(a), Epoch.create(b)); + }; + private static final Gen SNAPSHOTS_GEN = Gens.oneOf() + .add(i -> MetadataSnapshots.NO_OP) + .add(i -> throwing()) + .add(rs -> rs.nextBoolean() ? withCorruptSnapshots(LATEST_SNAPSHOT_EPOCH) : withAvailableSnapshots(LATEST_SNAPSHOT_EPOCH)) + .add(rs -> { + Epoch[] queriedEpochs = new Epoch[NUM_SNAPSHOTS]; + for (int i = 0; i < NUM_SNAPSHOTS; i++) + queriedEpochs[i] = SequencesUtils.epoch((NUM_SNAPSHOTS - i) * SNAPSHOT_FREQUENCY); + return rs.nextBoolean() ? withCorruptSnapshots(queriedEpochs) : withAvailableSnapshots(queriedEpochs); + }) + .build(); interface LogStateSUT { void cleanup() throws IOException; void insertRegularEntry() throws IOException; void snapshotMetadata() throws IOException; - LogState getLogState(Epoch since); + LogReader reader(); + default LogState getLogState(Epoch since) + { + return reader().getLogState(since); + } // just for manually checking the test data void dumpTables() throws IOException; @@ -113,6 +149,11 @@ public List listSnapshotsSince(Epoch epoch) return list; } + @Override + public String toString() + { + return (corrupt ? "Corrupted" : "") + "Snapshots{" + Arrays.toString(Stream.of(expected).mapToLong(e -> e.getEpoch()).toArray()) + '}'; + } }; static MetadataSnapshots withCorruptSnapshots(Epoch ... expected) @@ -135,6 +176,12 @@ public ClusterMetadata getSnapshot(Epoch epoch) fail("Did not expect to request a snapshot"); return null; } + + @Override + public String toString() + { + return "Throwing"; + } }; } @@ -244,6 +291,47 @@ public void sinceArbitraryEpochWithMultipleCorruptSnapshots() assertEntries(state.entries, since.nextEpoch(), CURRENT_EPOCH); } + @Test + public void getLogStateBetween() + { + qt().forAll(SNAPSHOTS_GEN, BETWEEN_GEN).check((snapshots, between) -> { + LogStateSUT sut = getSystemUnderTest(snapshots); + LogState state = sut.reader().getLogState(between.start, between.end, true); + Assertions.assertThat(state.entries).describedAs("with and without snapshot should have the same entries").isEqualTo(sut.reader().getLogState(between.start, between.end, false).entries); + Assertions.assertThat(state.baseState.epoch).isEqualTo(between.start); + + List entries = state.entries; + Assertions.assertThat(entries.size()).isEqualTo(between.end.getEpoch() - between.start.getEpoch()); + + long expected = between.start.nextEpoch().getEpoch(); + for (Entry e : entries) + { + long actual = e.epoch.getEpoch(); + Assertions.assertThat(actual).describedAs("Unexpected epoch").isEqualTo(expected); + expected++; + } + }); + } + + @Test + public void getEntriesBetween() + { + qt().forAll(SNAPSHOTS_GEN, BETWEEN_GEN).check((snapshots, between) -> { + LogStateSUT sut = getSystemUnderTest(snapshots); + LogReader.EntryHolder entries = sut.reader().getEntries(between.start, between.end); + Assertions.assertThat(entries.since).isEqualTo(between.start); + Assertions.assertThat(entries.entries.size()).isEqualTo(between.end.getEpoch() - between.start.getEpoch()); + + long expected = between.start.nextEpoch().getEpoch(); + for (Entry e : entries.entries) + { + long actual = e.epoch.getEpoch(); + Assertions.assertThat(actual).describedAs("Unexpected epoch").isEqualTo(expected); + expected++; + } + }); + } + private void assertEntries(List entries, Epoch min, Epoch max) { int idx = 0; @@ -255,4 +343,39 @@ private void assertEntries(List entries, Epoch min, Epoch max) } assertEquals(idx, entries.size()); } + + private static class Between + { + private final Epoch start, end; + + private Between(Epoch start, Epoch end) + { + this.start = start; + this.end = end; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Between between = (Between) o; + return start.equals(between.start) && end.equals(between.end); + } + + @Override + public int hashCode() + { + return Objects.hash(start, end); + } + + @Override + public String toString() + { + return "Between{" + + "start=" + start.getEpoch() + + ", end=" + end.getEpoch() + + '}'; + } + } } From 6c0ad476ed9347a14d63abfcf6fef39dd64d91d5 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Thu, 24 Oct 2024 12:16:19 -0400 Subject: [PATCH 184/340] Miscellaneous migration test fixes Patch by Ariel Weisberg; Reviewed by David Capwell for CASSANDRA-20060 --- build.xml | 26 + conf/cassandra.yaml | 5 +- modules/accord | 2 +- .../concurrent/InfiniteLoopExecutor.java | 6 +- .../apache/cassandra/concurrent/Stage.java | 3 +- .../apache/cassandra/config/AccordSpec.java | 133 +- .../org/apache/cassandra/config/Config.java | 1 - .../cassandra/config/DatabaseDescriptor.java | 178 ++- .../db/compaction/CompactionIterator.java | 13 +- .../cassandra/db/marshal/AbstractType.java | 35 + .../cassandra/db/marshal/CompositeType.java | 67 +- .../db/memtable/AbstractMemtable.java | 9 + .../cassandra/db/memtable/Memtable.java | 11 +- .../db/memtable/ShardedSkipListMemtable.java | 12 +- .../db/memtable/SkipListMemtable.java | 9 +- .../cassandra/db/memtable/TrieMemtable.java | 2 +- .../db/virtual/AccordVirtualTables.java | 48 +- .../cassandra/dht/LocalPartitioner.java | 1 - .../apache/cassandra/hints/HintsBuffer.java | 14 +- .../apache/cassandra/hints/HintsService.java | 8 + .../index/accord/RoutesSearcher.java | 16 +- .../cassandra/io/util/DataOutputBuffer.java | 2 +- .../cassandra/journal/ActiveSegment.java | 212 +-- .../apache/cassandra/journal/Compactor.java | 4 +- .../apache/cassandra/journal/Component.java | 8 +- .../cassandra/journal/EntrySerializer.java | 217 ++- .../org/apache/cassandra/journal/Flusher.java | 224 +-- .../cassandra/journal/InMemoryIndex.java | 5 +- .../org/apache/cassandra/journal/Index.java | 12 +- .../org/apache/cassandra/journal/Journal.java | 162 +-- .../apache/cassandra/journal/KeySupport.java | 2 + .../apache/cassandra/journal/Metadata.java | 21 +- .../org/apache/cassandra/journal/Params.java | 17 +- .../cassandra/journal/RecordPointer.java | 12 + .../org/apache/cassandra/journal/Segment.java | 53 +- .../cassandra/journal/SegmentWriter.java | 114 -- .../apache/cassandra/journal/Segments.java | 100 +- .../cassandra/journal/StaticSegment.java | 100 +- .../cassandra/journal/SyncedOffsets.java | 256 ---- ...heMetrics.java => AccordCacheMetrics.java} | 4 +- .../cassandra/metrics/AccordMetrics.java | 1 + .../org/apache/cassandra/schema/TableId.java | 52 +- .../cassandra/service/StorageProxy.java | 4 +- .../cassandra/service/accord/AccordCache.java | 1290 +++++++++++++++++ .../service/accord/AccordCacheEntry.java | 664 +++++++++ .../service/accord/AccordCachingState.java | 651 --------- .../service/accord/AccordCommandStore.java | 499 +++---- .../service/accord/AccordCommandStores.java | 139 +- .../service/accord/AccordDataStore.java | 7 +- .../service/accord/AccordExecutor.java | 1025 +++++++++++++ .../AccordExecutorAbstractLockLoop.java | 267 ++++ .../AccordExecutorAbstractSemiSyncSubmit.java | 64 + .../accord/AccordExecutorAsyncSubmit.java | 101 ++ .../accord/AccordExecutorInfiniteLoops.java | 101 ++ .../accord/AccordExecutorSemiSyncSubmit.java | 115 ++ .../service/accord/AccordExecutorSimple.java | 160 ++ .../accord/AccordExecutorSyncSubmit.java | 121 ++ .../accord/AccordFetchCoordinator.java | 2 +- .../service/accord/AccordJournal.java | 23 +- .../accord/AccordJournalValueSerializers.java | 4 +- .../service/accord/AccordKeyspace.java | 200 ++- .../service/accord/AccordMessageSink.java | 2 +- .../service/accord/AccordObjectSizes.java | 1 + .../service/accord/AccordSafeCommand.java | 24 +- .../accord/AccordSafeCommandStore.java | 262 +++- .../accord/AccordSafeCommandsForKey.java | 10 +- .../accord/AccordSafeCommandsForRanges.java | 85 -- .../service/accord/AccordSafeState.java | 17 +- .../accord/AccordSafeTimestampsForKey.java | 10 +- .../accord/AccordSegmentCompactor.java | 3 +- .../service/accord/AccordService.java | 54 +- .../service/accord/AccordStateCache.java | 787 ---------- .../cassandra/service/accord/AccordTask.java | 1114 ++++++++++++++ .../service/accord/CommandsForRanges.java | 4 +- .../accord/CommandsForRangesLoader.java | 420 +++--- .../service/accord/IAccordService.java | 4 + .../cassandra/service/accord/IJournal.java | 2 +- .../cassandra/service/accord/JournalKey.java | 32 + .../service/accord/SavedCommand.java | 73 +- .../service/accord/api/AccordAgent.java | 22 +- .../service/accord/api/AccordRoutableKey.java | 3 +- .../service/accord/api/AccordRoutingKey.java | 9 +- .../service/accord/api/AccordScheduler.java | 2 +- .../service/accord/async/AsyncLoader.java | 324 ----- .../service/accord/async/AsyncOperation.java | 423 ------ .../service/accord/events/CacheEvents.java | 2 +- .../accord/interop/AccordInteropApply.java | 58 +- .../serializers/CommandStoreSerializers.java | 11 +- .../serializers/ReadDataSerializers.java | 12 +- .../service/accord/txn/AbstractKeySorted.java | 9 +- .../accord/txn/AbstractSerialized.java | 12 +- .../cassandra/service/accord/txn/TxnRead.java | 6 + .../service/accord/txn/TxnWrite.java | 10 +- .../cassandra/utils/ByteBufferUtil.java | 2 +- .../org/apache/cassandra/utils/UUIDGen.java | 11 +- .../utils/btree/AbstractBTreeMap.java | 22 +- .../cassandra/utils/btree/BTreeBiMap.java | 30 +- .../cassandra/utils/btree/BTreeMap.java | 27 +- .../concurrent/ConcurrentLinkedStack.java | 68 + .../utils/concurrent/IntrusiveStack.java | 18 +- .../utils/concurrent/LockWithAsyncSignal.java | 258 ++++ .../cassandra/utils/concurrent/Ref.java | 5 + .../accord/AccordExecutorBurnTest.java | 91 ++ .../cassandra-jmx-disabled-sslconfig.yaml | 6 + test/conf/cassandra-jmx-pem-sslconfig.yaml | 6 + ...andra-jmx-sslconfig-with-passwordfile.yaml | 6 + test/conf/cassandra-jmx-sslconfig.yaml | 6 + test/conf/cassandra.yaml | 11 +- .../apache/cassandra/distributed/Cluster.java | 5 + .../distributed/impl/AbstractCluster.java | 74 +- .../cassandra/distributed/impl/Instance.java | 23 +- .../distributed/impl/InstanceConfig.java | 6 +- .../distributed/test/TestBaseImpl.java | 4 +- .../test/accord/AccordBootstrapTest.java | 15 +- .../test/accord/AccordDropTableBase.java | 4 +- .../accord/AccordHostReplacementTest.java | 11 +- .../accord/AccordIncrementalRepairTest.java | 6 +- .../test/accord/AccordIntegrationTest.java | 14 +- .../accord/AccordJournalIntegrationTest.java | 1 + .../test/accord/AccordLoadTest.java | 341 +++-- .../test/accord/AccordMetricsTest.java | 39 +- .../accord/AccordMigrationRaceTestBase.java | 6 +- .../test/accord/AccordMigrationTest.java | 56 +- .../test/accord/AccordProgressLogTest.java | 24 +- .../test/accord/AccordTestBase.java | 6 +- .../test/log/BootWithMetadataTest.java | 3 +- .../topology/AccordTopologyMixupTest.java | 3 +- .../accord/AccordJournalCompactionTest.java | 3 +- .../harry/harry2/RectangleToSquares.java | 87 ++ .../simulator/asm/InterceptClasses.java | 4 +- .../simulator/ClusterSimulation.java | 4 +- .../test/AccordJournalSimulationTest.java | 39 +- .../org/apache/cassandra/ServerTestUtils.java | 4 +- .../concurrent/ForwardingExecutorPlus.java | 2 +- .../config/DatabaseDescriptorRefTest.java | 2 + .../config/YamlConfigurationLoaderTest.java | 8 +- .../org/apache/cassandra/cql3/CQLTester.java | 4 +- .../cassandra/db/ColumnFamilyStoreTest.java | 2 +- .../CompactionAccordIteratorsTest.java | 28 +- .../db/virtual/AccordVirtualTablesTest.java | 6 +- .../index/accord/AccordIndexStressTest.java | 4 +- .../index/accord/RouteIndexTest.java | 5 +- .../apache/cassandra/journal/SegmentTest.java | 11 +- .../cassandra/journal/SyncedOffsetsTest.java | 70 - .../apache/cassandra/journal/TestParams.java | 14 +- .../cassandra/journal/TimeUUIDKeySupport.java | 15 + .../service/accord/AccordCacheEntryTest.java | 117 ++ ...ateCacheTest.java => AccordCacheTest.java} | 287 ++-- .../accord/AccordCachingStateTest.java | 185 --- .../accord/AccordCommandStoreTest.java | 8 +- .../service/accord/AccordCommandTest.java | 6 +- .../accord/AccordJournalOrderTest.java | 12 +- .../service/accord/AccordKeyspaceTest.java | 9 +- .../service/accord/AccordMessageSinkTest.java | 3 +- ...OperationTest.java => AccordTaskTest.java} | 226 ++- .../service/accord/AccordTestUtils.java | 79 +- .../service/accord/CommandsForRangesTest.java | 8 +- .../service/accord/EpochSyncTest.java | 6 +- .../cassandra/service/accord/MockJournal.java | 4 +- .../service/accord/SavedCommandTest.java | 2 +- .../accord/SimulatedAccordCommandStore.java | 183 ++- .../SimulatedAccordCommandStore.java.orig | 419 ++++++ ...Test.java => SimulatedAccordTaskTest.java} | 105 +- .../service/accord/SimulatedDepsTest.java | 4 +- .../accord/SimulatedMultiKeyAndRangeTest.java | 2 +- ...ulatedRandomKeysWithRangeConflictTest.java | 3 +- .../service/accord/async/AsyncLoaderTest.java | 455 ------ .../cassandra/utils/AccordGenerators.java | 3 +- 168 files changed, 9218 insertions(+), 5849 deletions(-) delete mode 100644 src/java/org/apache/cassandra/journal/SegmentWriter.java delete mode 100644 src/java/org/apache/cassandra/journal/SyncedOffsets.java rename src/java/org/apache/cassandra/metrics/{AccordStateCacheMetrics.java => AccordCacheMetrics.java} (94%) create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCache.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordCacheEntry.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordCachingState.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutor.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorAbstractLockLoop.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorAbstractSemiSyncSubmit.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorAsyncSubmit.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorInfiniteLoops.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorSemiSyncSubmit.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorSimple.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordExecutorSyncSubmit.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForRanges.java delete mode 100644 src/java/org/apache/cassandra/service/accord/AccordStateCache.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordTask.java delete mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java delete mode 100644 src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java create mode 100644 src/java/org/apache/cassandra/utils/concurrent/ConcurrentLinkedStack.java create mode 100644 src/java/org/apache/cassandra/utils/concurrent/LockWithAsyncSignal.java create mode 100644 test/burn/org/apache/cassandra/service/accord/AccordExecutorBurnTest.java create mode 100644 test/harry/main/org/apache/cassandra/harry/harry2/RectangleToSquares.java delete mode 100644 test/unit/org/apache/cassandra/journal/SyncedOffsetsTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/AccordCacheEntryTest.java rename test/unit/org/apache/cassandra/service/accord/{AccordStateCacheTest.java => AccordCacheTest.java} (54%) delete mode 100644 test/unit/org/apache/cassandra/service/accord/AccordCachingStateTest.java rename test/unit/org/apache/cassandra/service/accord/{async/AsyncOperationTest.java => AccordTaskTest.java} (70%) create mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java.orig rename test/unit/org/apache/cassandra/service/accord/{async/SimulatedAsyncOperationTest.java => SimulatedAccordTaskTest.java} (71%) delete mode 100644 test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java diff --git a/build.xml b/build.xml index e1905a930491..3fe92490d432 100644 --- a/build.xml +++ b/build.xml @@ -1040,6 +1040,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + From 68aea4b15ffbc513f0dc6f99a37bed9a69e93a34 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Tue, 8 Apr 2025 14:15:17 +0200 Subject: [PATCH 263/340] Fix simulator after rebase * avoid running python3-dependent tasks when running simulator tasks * fix a problem with simulated snitch rebase * add a distinction between short-lived daemon threads and infinite loop ones for cases when we need to simulate user-implemented infinite loops Patch by Alex Petrov; reviewed by Benedict Elliott Smith for CASSANDRA-20542 --- .../cassandra/concurrent/ExecutorFactory.java | 36 +++++++---- .../concurrent/InfiniteLoopExecutor.java | 22 +++---- .../AbstractCommitLogSegmentManager.java | 2 +- .../commitlog/AbstractCommitLogService.java | 2 +- .../org/apache/cassandra/journal/Flusher.java | 2 +- .../org/apache/cassandra/journal/Journal.java | 2 +- .../service/accord/AccordExecutorLoops.java | 5 +- .../apache/cassandra/tcm/log/LocalLog.java | 2 +- .../simulator/ClusterSimulation.java | 3 +- .../paxos/AccordSimulationRunner.java | 7 +++ .../systems/InterceptingExecutorFactory.java | 19 ++++-- .../simulator/systems/SimulatedSnitch.java | 59 +++++++------------ .../test/AccordHarrySimulationTest.java | 6 ++ .../simulator/test/HarrySimulatorTest.java | 17 ++++-- .../simulator/test/HarryValidatingQuery.java | 43 +++++++++++--- .../concurrent/ForwardingExecutorFactory.java | 8 +-- .../concurrent/InfiniteLoopExecutorTest.java | 2 +- .../concurrent/SimulatedExecutorFactory.java | 4 +- .../apache/cassandra/repair/FuzzTestBase.java | 8 +-- 19 files changed, 149 insertions(+), 100 deletions(-) diff --git a/src/java/org/apache/cassandra/concurrent/ExecutorFactory.java b/src/java/org/apache/cassandra/concurrent/ExecutorFactory.java index ec3b9c370be1..0b72961e13cc 100644 --- a/src/java/org/apache/cassandra/concurrent/ExecutorFactory.java +++ b/src/java/org/apache/cassandra/concurrent/ExecutorFactory.java @@ -18,15 +18,15 @@ package org.apache.cassandra.concurrent; -import org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon; import org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts; import org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.Shared; -import static java.lang.Thread.*; +import static java.lang.Thread.NORM_PRIORITY; +import static java.lang.Thread.UncaughtExceptionHandler; import static org.apache.cassandra.concurrent.ExecutorFactory.SimulatorSemantics.NORMAL; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.UNSYNCHRONIZED; import static org.apache.cassandra.concurrent.NamedThreadFactory.createThread; import static org.apache.cassandra.concurrent.NamedThreadFactory.setupThread; @@ -78,6 +78,17 @@ public enum SimulatorSemantics NORMAL, DISCARD } + /// Simulator Tag specifies the nature of the created thread: + /// - JOB threads are short-lived and simulation treats them as sub tasks of the task that creates them, + /// so that the strictly ordered property of the simulator ensures the thread terminates before the next + /// task of its parent is scheduled. + /// - DAEMON threads are treated as background tasks, and are neither linked to their parent task or the Work phase that creates them. + /// - INFINITE_LOOP threads detach from their parent task as they are expected to run forever, but unlike DAEMON threads must have + /// no active work for a given Work phase to complete. + public enum SimulatorThreadTag { JOB, DAEMON, INFINITE_LOOP } + + public enum SystemThreadTag { DAEMON, NON_DAEMON } + /** * @return a factory that configures executors that propagate {@link ExecutorLocals} to the executing thread */ @@ -124,10 +135,11 @@ public enum SimulatorSemantics * Create and start a new thread to execute {@code runnable} * @param name the name of the thread * @param runnable the task to execute - * @param daemon flag to indicate whether the thread should be a daemon or not + * @param systemTag flag to indicate whether the loop thread should be a daemon thread or not + * @param simulatorTag flag to indicate the nature of the specific thread to help simulate it * @return the new thread */ - Thread startThread(String name, Runnable runnable, Daemon daemon); + Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag); /** * Create and start a new thread to execute {@code runnable}; this thread will be a daemon thread. @@ -137,7 +149,7 @@ public enum SimulatorSemantics */ default Thread startThread(String name, Runnable runnable) { - return startThread(name, runnable, DAEMON); + return startThread(name, runnable, DAEMON, SimulatorThreadTag.JOB); } /** @@ -148,14 +160,14 @@ default Thread startThread(String name, Runnable runnable) * @param name the name of the thread used to invoke the task repeatedly * @param task the task to execute repeatedly * @param simulatorSafe flag indicating if the loop thread can be intercepted / rescheduled during cluster simulation - * @param daemon flag to indicate whether the loop thread should be a daemon thread or not + * @param systemTag flag to indicate whether the loop thread should be a daemon thread or not * @param interrupts flag to indicate whether to synchronize interrupts of the task execution thread * using the task's monitor this can be used to prevent interruption while performing * IO operations which forbid interrupted threads. * See: {@link org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager#start} * @return the new thread */ - Interruptible infiniteLoop(String name, Interruptible.Task task, SimulatorSafe simulatorSafe, Daemon daemon, Interrupts interrupts); + Interruptible infiniteLoop(String name, Interruptible.Task task, SimulatorSafe simulatorSafe, SystemThreadTag systemTag, Interrupts interrupts); /** * Create and start a new InfiniteLoopExecutor to repeatedly invoke {@code runnable}. @@ -291,9 +303,9 @@ public ScheduledExecutorPlus scheduled(boolean executeOnShutdown, String name, i } @Override - public Thread startThread(String name, Runnable runnable, Daemon daemon) + public Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag) { - Thread thread = setupThread(createThread(threadGroup, runnable, name, daemon == DAEMON), + Thread thread = setupThread(createThread(threadGroup, runnable, name, systemTag == DAEMON), Thread.NORM_PRIORITY, contextClassLoader, uncaughtExceptionHandler); @@ -302,9 +314,9 @@ public Thread startThread(String name, Runnable runnable, Daemon daemon) } @Override - public Interruptible infiniteLoop(String name, Interruptible.Task task, SimulatorSafe simulatorSafe, Daemon daemon, Interrupts interrupts) + public Interruptible infiniteLoop(String name, Interruptible.Task task, SimulatorSafe simulatorSafe, SystemThreadTag systemTag, Interrupts interrupts) { - return new InfiniteLoopExecutor(this, name, task, daemon, interrupts); + return new InfiniteLoopExecutor(this, name, task, systemTag, interrupts); } @Override diff --git a/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java b/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java index c9487e416458..9d8701cb88e4 100644 --- a/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java +++ b/src/java/org/apache/cassandra/concurrent/InfiniteLoopExecutor.java @@ -28,6 +28,8 @@ import java.util.function.BiFunction; import java.util.function.Consumer; +import org.apache.cassandra.concurrent.ExecutorFactory.SimulatorThreadTag; +import org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag; import org.apache.cassandra.utils.Shared; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; @@ -52,14 +54,6 @@ public enum InternalState { SHUTTING_DOWN_NOW, TERMINATED } @Shared(scope = Shared.Scope.SIMULATION) public enum SimulatorSafe { SAFE, UNSAFE } - /** - * Does this loop always block on some external work provision that is going to be simulator-controlled, or does - * it loop periodically? If the latter, it may prevent simulation making progress between phases, and should be - * marked as a DAEMON process. - */ - @Shared(scope = Shared.Scope.SIMULATION) - public enum Daemon { DAEMON, NON_DAEMON } - @Shared(scope = Shared.Scope.SIMULATION) public enum Interrupts { SYNCHRONIZED, UNSYNCHRONIZED } @@ -70,20 +64,20 @@ public enum Interrupts { SYNCHRONIZED, UNSYNCHRONIZED } private final Consumer interruptHandler; private final Condition isTerminated = newOneTimeCondition(); - public InfiniteLoopExecutor(String name, Task task, Daemon daemon) + public InfiniteLoopExecutor(String name, Task task, SystemThreadTag systemTag) { - this(ExecutorFactory.Global.executorFactory(), name, task, daemon, UNSYNCHRONIZED); + this(ExecutorFactory.Global.executorFactory(), name, task, systemTag, UNSYNCHRONIZED); } - public InfiniteLoopExecutor(ExecutorFactory factory, String name, Task task, Daemon daemon) + public InfiniteLoopExecutor(ExecutorFactory factory, String name, Task task, SystemThreadTag systemTag) { - this(factory, name, task, daemon, UNSYNCHRONIZED); + this(factory, name, task, systemTag, UNSYNCHRONIZED); } - public InfiniteLoopExecutor(ExecutorFactory factory, String name, Task task, Daemon daemon, Interrupts interrupts) + public InfiniteLoopExecutor(ExecutorFactory factory, String name, Task task, SystemThreadTag systemTag, Interrupts interrupts) { this.task = task; - this.thread = factory.startThread(name, this::loop, daemon); + this.thread = factory.startThread(name, this::loop, systemTag, SimulatorThreadTag.INFINITE_LOOP); this.interruptHandler = interrupts == SYNCHRONIZED ? interruptHandler(task) : Thread::interrupt; diff --git a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogSegmentManager.java b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogSegmentManager.java index dcd791caf306..33489688818b 100644 --- a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogSegmentManager.java +++ b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogSegmentManager.java @@ -56,7 +56,7 @@ import org.apache.cassandra.utils.concurrent.WaitQueue; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.db.commitlog.CommitLogSegment.Allocation; diff --git a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java index cd3eb56105d6..7bba9c49110d 100644 --- a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java +++ b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java @@ -38,7 +38,7 @@ import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index cd8970949733..d48d80171bc3 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -39,7 +39,7 @@ import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 98150f1cee7e..e70e93b9c6e7 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -64,7 +64,7 @@ import static java.lang.String.format; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.SYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.concurrent.Interruptible.State.NORMAL; diff --git a/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java b/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java index 0e8aab8c8af4..d26e6e5abe4f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java +++ b/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java @@ -24,11 +24,14 @@ import java.util.function.IntFunction; import accord.utils.Invariants; + import io.netty.util.collection.LongObjectHashMap; import org.apache.cassandra.service.accord.AccordExecutor.Mode; import org.apache.cassandra.utils.concurrent.Condition; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.concurrent.ExecutorFactory.SimulatorThreadTag.INFINITE_LOOP; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.service.accord.AccordExecutor.Mode.RUN_WITH_LOCK; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -46,7 +49,7 @@ public AccordExecutorLoops(Mode mode, int threads, IntFunction name, Fun loops = new LongObjectHashMap<>(threads); for (int i = 0; i < threads; ++i) { - Thread thread = executorFactory().startThread(name.apply(i), wrap(loopFactory.apply(mode))); + Thread thread = executorFactory().startThread(name.apply(i), wrap(loopFactory.apply(mode)), NON_DAEMON, INFINITE_LOOP); Thread conflict = loops.putIfAbsent(thread.getId(), thread); Invariants.require(conflict == null || !conflict.isAlive(), "Allocated two threads with the same threadId!"); } diff --git a/src/java/org/apache/cassandra/tcm/log/LocalLog.java b/src/java/org/apache/cassandra/tcm/log/LocalLog.java index 01b6c22016f1..a0f50d16ad8d 100644 --- a/src/java/org/apache/cassandra/tcm/log/LocalLog.java +++ b/src/java/org/apache/cassandra/tcm/log/LocalLog.java @@ -70,7 +70,7 @@ import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.WaitQueue; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.NON_DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.NON_DAEMON; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts.UNSYNCHRONIZED; import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe.SAFE; import static org.apache.cassandra.tcm.Epoch.EMPTY; diff --git a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java index 22ce8581ddab..ed21d7079d50 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/ClusterSimulation.java @@ -784,7 +784,8 @@ public ClusterSimulation(RandomSource random, long seed, int uniqueNum, .set("failure_detector", SimulatedFailureDetector.Instance.class.getName()) .set("commitlog_compression", new ParameterizedClass(LZ4Compressor.class.getName(), emptyMap())) .set("commitlog_sync", "batch") - .set("accord.journal.flush_mode", "BATCH"); + .set("accord.journal.flush_mode", "BATCH") + .set("accord.command_store_shard_count", "4"); // TODO: Add remove() to IInstanceConfig if (config instanceof InstanceConfig) { diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java index b0cd80cf30ac..e782848ea494 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/AccordSimulationRunner.java @@ -23,14 +23,20 @@ import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import io.airlift.airline.Cli; import io.airlift.airline.Command; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.simulator.SimulationRunner; +import org.apache.cassandra.simulator.SimulatorUtils; import org.apache.cassandra.utils.StorageCompatibilityMode; public class AccordSimulationRunner extends SimulationRunner { + private static Logger logger = LoggerFactory.getLogger(AccordSimulationRunner.class); + @BeforeClass public static void beforeAll() { @@ -86,6 +92,7 @@ public static class Help extends HelpCommand {} */ public static void main(String[] args) throws IOException { + SimulatorUtils.verifyAndlogSimulatorArgs(logger, args); AccordClusterSimulation.Builder builder = new AccordClusterSimulation.Builder(); builder.unique(uniqueNum.getAndIncrement()); diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingExecutorFactory.java b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingExecutorFactory.java index c7f4dce8b85a..aa22e3ef8683 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingExecutorFactory.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/InterceptingExecutorFactory.java @@ -30,13 +30,13 @@ import com.google.common.annotations.VisibleForTesting; +import accord.utils.UnhandledEnum; import io.netty.util.concurrent.FastThreadLocal; import org.apache.cassandra.concurrent.ExecutorBuilder; import org.apache.cassandra.concurrent.ExecutorBuilderFactory; import org.apache.cassandra.concurrent.ExecutorFactory; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.concurrent.InfiniteLoopExecutor; -import org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon; import org.apache.cassandra.concurrent.InfiniteLoopExecutor.Interrupts; import org.apache.cassandra.concurrent.InfiniteLoopExecutor.SimulatorSafe; import org.apache.cassandra.concurrent.Interruptible.Task; @@ -69,6 +69,8 @@ import org.apache.cassandra.utils.concurrent.RunnableFuture; import static org.apache.cassandra.simulator.systems.SimulatedAction.Kind.INFINITE_LOOP; +import static org.apache.cassandra.simulator.systems.SimulatedAction.Kind.SCHEDULED_DAEMON; +import static org.apache.cassandra.simulator.systems.SimulatedAction.Kind.THREAD; public class InterceptingExecutorFactory implements ExecutorFactory, Closeable { @@ -327,9 +329,18 @@ public ExecutorPlus pooled(String name, int threads) return configurePooled(name, threads).build(); } - public Thread startThread(String name, Runnable runnable, Daemon daemon) + public Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag) { - return simulatedExecution.intercept().start(SimulatedAction.Kind.THREAD, factory(name)::newThread, runnable); + SimulatedAction.Kind kind; + switch (simulatorTag) + { + default: throw UnhandledEnum.unknown(simulatorTag); + case INFINITE_LOOP: kind = INFINITE_LOOP; break; + case JOB: kind = THREAD; break; + case DAEMON: kind = SCHEDULED_DAEMON; break; + } + + return simulatedExecution.intercept().start(kind, factory(name)::newThread, runnable); } @VisibleForTesting @@ -341,7 +352,7 @@ public InterceptedExecution.InterceptedThreadStart startParked(String name, Runn } @Override - public Interruptible infiniteLoop(String name, Task task, SimulatorSafe simulatorSafe, Daemon daemon, Interrupts interrupts) + public Interruptible infiniteLoop(String name, Task task, SimulatorSafe simulatorSafe, SystemThreadTag systemTag, Interrupts interrupts) { if (simulatorSafe != SimulatorSafe.SAFE) { diff --git a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java index 7692e0ae5c46..55495562982e 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java +++ b/test/simulator/main/org/apache/cassandra/simulator/systems/SimulatedSnitch.java @@ -28,49 +28,18 @@ import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.IInstanceConfig; -import org.apache.cassandra.locator.*; +import org.apache.cassandra.locator.Endpoint; +import org.apache.cassandra.locator.IEndpointSnitch; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaCollection; import org.apache.cassandra.simulator.cluster.NodeLookup; import org.apache.cassandra.utils.Sortable; public class SimulatedSnitch extends NodeLookup { - private static class SimulatedProximity implements NodeProximity - { - @Override - public > C sortedByProximity(InetAddressAndPort address, C addresses) - { - return addresses.sorted(Comparator.comparingInt(SimulatedSnitch::asInt)); - } - - @Override - public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) - { - return Comparator.comparingInt(SimulatedSnitch::asInt).compare(r1, r2); - } - - @Override - public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaCollection l1, ReplicaCollection l2) - { - return false; - } - - @Override - public boolean supportCompareByEndpoint() - { - return true; - } - - @Override - public > Comparator endpointComparator(InetAddressAndPort address, C addresses) - { - return Comparator.comparingInt(SimulatedSnitch::asInt); - } - } - public static class Instance implements IEndpointSnitch { - private final NodeProximity proximity = new SimulatedProximity(); - private static volatile Function LOOKUP_DC; public String getRack(InetAddressAndPort endpoint) @@ -85,12 +54,12 @@ public String getDatacenter(InetAddressAndPort endpoint) public > C sortedByProximity(InetAddressAndPort address, C addresses) { - return proximity.sortedByProximity(address, addresses); + return addresses.sorted(Comparator.comparingInt(SimulatedSnitch::asInt)); } public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) { - return proximity.compareEndpoints(target, r1, r2); + return Comparator.comparingInt(SimulatedSnitch::asInt).compare(r1, r2); } public void gossiperStarting() @@ -99,13 +68,25 @@ public void gossiperStarting() public boolean isWorthMergingForRangeQuery(ReplicaCollection merged, ReplicaCollection l1, ReplicaCollection l2) { - return proximity.isWorthMergingForRangeQuery(merged, l1, l2); + return false; } public static void setup(Function lookupDc) { LOOKUP_DC = lookupDc; } + + @Override + public boolean supportCompareByEndpoint() + { + return true; + } + + @Override + public > Comparator endpointComparator(InetAddressAndPort address, C addresses) + { + return Comparator.comparingInt(SimulatedSnitch::asInt); + } } final int[] numInDcs; diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordHarrySimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordHarrySimulationTest.java index 8506fb615754..c4dc9f10fc88 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordHarrySimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordHarrySimulationTest.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; +import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.harry.SchemaSpec; import org.apache.cassandra.harry.gen.Generator; import org.apache.cassandra.harry.gen.SchemaGenerators; @@ -62,6 +63,11 @@ else if (somewhatLossy.contains(verb)) return schedulers; } + protected ConsistencyLevel validateQueryConsistency() + { + return ConsistencyLevel.QUORUM; + } + public Generator schemaSpecGen(String keyspace, String prefix) { return SchemaGenerators.schemaSpecGen(keyspace, prefix, 1000, SchemaSpec.optionsBuilder().withTransactionalMode(TransactionalMode.full)); diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java index 14073943d700..9bc9b6e759bb 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/HarrySimulatorTest.java @@ -288,7 +288,7 @@ protected void testInternal() throws Exception work.add(interleave("Start generating", HarrySimulatorTest.generateWrites(rowsPerPhase, simulation, cl))); work.add(work("Validate all data locally", - lazy(() -> validateAllLocal(simulation, simulation.nodeState.ring, rf)))); + lazy(() -> validateAllLocal(simulation, simulation.nodeState.ring, rf, validateQueryConsistency(), simulation.rng)))); return arr(work.toArray(new ActionSchedule.Work[0])); }, @@ -339,8 +339,8 @@ protected void testInternal() throws Exception run(() -> simulation.nodeState.decommission(node)))); work.add(work("Check node state", assertNodeState(simulation.simulated, simulation.cluster, node, NodeState.LEFT))); } - work.add(work("Validate data locally", - lazy(() -> validateAllLocal(simulation, simulation.nodeState.ring, rf)))); + work.add(work("Validate data with " + validateQueryConsistency(), + lazy(() -> validateAllLocal(simulation, simulation.nodeState.ring, rf, validateQueryConsistency(), simulation.rng)))); boolean tmp = shouldBootstrap; work.add(work("Output message", run(() -> logger.warn("Finished {} of {} and data validation!\n", tmp ? "bootstrap" : "decommission", node)))); @@ -846,7 +846,7 @@ public SimpleQueryResult call() * Given you have used `generate` methods to generate data with Harry, you can use this method to check whether all * data has been propagated everywhere it should be, be it via streaming, read repairs, or regular writes. */ - public static Action validateAllLocal(HarrySimulation simulation, List owernship, TokenPlacementModel.ReplicationFactor rf) + public static Action validateAllLocal(HarrySimulation simulation, List owernship, TokenPlacementModel.ReplicationFactor rf, ConsistencyLevel consistencyLevel, EntropySource rng) { return new Actions.LambdaAction("Validate", Action.Modifiers.RELIABLE_NO_TIMEOUTS, () -> { @@ -861,12 +861,19 @@ public static Action validateAllLocal(HarrySimulation simulation, List visitedPds(HarrySimulation simulation) { Set pds = new HashSet<>(); diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/HarryValidatingQuery.java b/test/simulator/test/org/apache/cassandra/simulator/test/HarryValidatingQuery.java index 72cd85de3ba4..3e34bf69b10f 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/HarryValidatingQuery.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/HarryValidatingQuery.java @@ -25,7 +25,9 @@ import accord.utils.Invariants; import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInstance; +import org.apache.cassandra.harry.gen.EntropySource; import org.apache.cassandra.harry.op.Visit; import org.apache.cassandra.harry.op.Operations; import org.apache.cassandra.harry.execution.CompiledStatement; @@ -39,6 +41,8 @@ import org.apache.cassandra.simulator.systems.InterceptingExecutor; import org.apache.cassandra.simulator.systems.SimulatedAction; +import static org.apache.cassandra.simulator.SimulatorUtils.failWithOOM; + public class HarryValidatingQuery extends SimulatedAction { private static final Logger logger = LoggerFactory.getLogger(HarryValidatingQuery.class); @@ -51,13 +55,17 @@ public class HarryValidatingQuery extends SimulatedAction private final HarrySimulatorTest.HarrySimulation simulation; private final Visit visit; private final QueryBuildingVisitExecutor queryBuilder; + private final ConsistencyLevel consistencyLevel; + private final EntropySource rng; public HarryValidatingQuery(HarrySimulatorTest.HarrySimulation simulation, Cluster cluster, TokenPlacementModel.ReplicationFactor rf, List owernship, Visit visit, - QueryBuildingVisitExecutor queryBuilder) + QueryBuildingVisitExecutor queryBuilder, + ConsistencyLevel consistencyLevel, + EntropySource rng) { super(visit, Modifiers.RELIABLE_NO_TIMEOUTS, Modifiers.RELIABLE_NO_TIMEOUTS, null, simulation.simulated); this.rf = rf; @@ -67,7 +75,8 @@ public HarryValidatingQuery(HarrySimulatorTest.HarrySimulation simulation, this.visit = visit; this.queryBuilder = queryBuilder; this.simulation = simulation; - + this.consistencyLevel = consistencyLevel; + this.rng = rng; } protected InterceptedExecution task() @@ -78,14 +87,25 @@ public void run() { try { - TokenPlacementModel.ReplicatedRanges ring = rf.replicate(owernship); - Invariants.require(visit.operations.length == 1); - Invariants.require(visit.operations[0] instanceof Operations.SelectStatement); - Operations.SelectStatement select = (Operations.SelectStatement) visit.operations[0]; - for (TokenPlacementModel.Replica replica : ring.replicasFor(token(select.pd))) + if (consistencyLevel == ConsistencyLevel.NODE_LOCAL) + { + TokenPlacementModel.ReplicatedRanges ring = rf.replicate(owernship); + Invariants.require(visit.operations.length == 1); + Invariants.require(visit.operations[0] instanceof Operations.SelectStatement); + Operations.SelectStatement select = (Operations.SelectStatement) visit.operations[0]; + for (TokenPlacementModel.Replica replica : ring.replicasFor(token(select.pd))) + { + CompiledStatement compiled = queryBuilder.compile(visit); + Object[][] objects = executeNodeLocal(compiled.cql(), replica.node(), compiled.bindings()); + List actualRows = InJvmDTestVisitExecutor.rowsToResultSet(simulation.schema, select, objects); + simulation.model.validate(select, actualRows); + } + } + else { + Operations.SelectStatement select = (Operations.SelectStatement) visit.operations[0]; CompiledStatement compiled = queryBuilder.compile(visit); - Object[][] objects = executeNodeLocal(compiled.cql(), replica.node(), compiled.bindings()); + Object[][] objects = execute(compiled.cql(), rng.nextInt(cluster.size()) + 1, compiled.bindings()); List actualRows = InJvmDTestVisitExecutor.rowsToResultSet(simulation.schema, select, objects); simulation.model.validate(select, actualRows); } @@ -93,6 +113,7 @@ public void run() catch (Throwable t) { logger.error("Caught an exception while validating", t); + failWithOOM(); throw t; } } @@ -113,4 +134,10 @@ protected Object[][] executeNodeLocal(String statement, TokenPlacementModel.Node .get(); return instance.executeInternal(statement, bindings); } + + protected Object[][] execute(String statement, int id, Object... bindings) + { + IInstance instance = cluster.get(id); + return instance.coordinator().execute(statement, consistencyLevel, bindings); + } } diff --git a/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorFactory.java b/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorFactory.java index b6bae2f1a4d1..37b77f3dcf11 100644 --- a/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorFactory.java +++ b/test/unit/org/apache/cassandra/concurrent/ForwardingExecutorFactory.java @@ -117,15 +117,15 @@ public ScheduledExecutorPlus scheduled(boolean executeOnShutdown, String name, i } @Override - public Thread startThread(String name, Runnable runnable, InfiniteLoopExecutor.Daemon daemon) + public Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag) { - return delegate().startThread(name, runnable, daemon); + return delegate().startThread(name, runnable, systemTag, simulatorTag); } @Override - public Interruptible infiniteLoop(String name, Interruptible.Task task, InfiniteLoopExecutor.SimulatorSafe simulatorSafe, InfiniteLoopExecutor.Daemon daemon, InfiniteLoopExecutor.Interrupts interrupts) + public Interruptible infiniteLoop(String name, Interruptible.Task task, InfiniteLoopExecutor.SimulatorSafe simulatorSafe, SystemThreadTag systemTag, InfiniteLoopExecutor.Interrupts interrupts) { - return delegate().infiniteLoop(name, task, simulatorSafe, daemon, interrupts); + return delegate().infiniteLoop(name, task, simulatorSafe, systemTag, interrupts); } @Override diff --git a/test/unit/org/apache/cassandra/concurrent/InfiniteLoopExecutorTest.java b/test/unit/org/apache/cassandra/concurrent/InfiniteLoopExecutorTest.java index 9ec702dd5010..29dc7ec8ac8d 100644 --- a/test/unit/org/apache/cassandra/concurrent/InfiniteLoopExecutorTest.java +++ b/test/unit/org/apache/cassandra/concurrent/InfiniteLoopExecutorTest.java @@ -30,7 +30,7 @@ import org.junit.Assert; import org.junit.Test; -import static org.apache.cassandra.concurrent.InfiniteLoopExecutor.Daemon.DAEMON; +import static org.apache.cassandra.concurrent.ExecutorFactory.SystemThreadTag.DAEMON; public class InfiniteLoopExecutorTest { diff --git a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java index c7a03dc79cd6..e4ae9282e73d 100644 --- a/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java +++ b/test/unit/org/apache/cassandra/concurrent/SimulatedExecutorFactory.java @@ -204,7 +204,7 @@ public ScheduledExecutorPlus scheduled(boolean executeOnShutdown, String name, i } @Override - public Thread startThread(String name, Runnable runnable, InfiniteLoopExecutor.Daemon daemon) + public Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag) { throw new UnsupportedOperationException("Thread can't be simualted"); } @@ -213,7 +213,7 @@ public Thread startThread(String name, Runnable runnable, InfiniteLoopExecutor.D public Interruptible infiniteLoop(String name, Interruptible.Task task, InfiniteLoopExecutor.SimulatorSafe simulatorSafe, - InfiniteLoopExecutor.Daemon daemon, + SystemThreadTag systemTag, InfiniteLoopExecutor.Interrupts interrupts) { var delegate = new UnorderedScheduledExecutorService(); diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index ad814f0032d3..8ffd96795b41 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -243,16 +243,16 @@ private boolean shouldMock() } @Override - public Thread startThread(String name, Runnable runnable, InfiniteLoopExecutor.Daemon daemon) + public Thread startThread(String name, Runnable runnable, SystemThreadTag systemTag, SimulatorThreadTag simulatorTag) { if (shouldMock()) return new Thread(); - return delegate.startThread(name, runnable, daemon); + return delegate.startThread(name, runnable, systemTag, simulatorTag); } @Override - public Interruptible infiniteLoop(String name, Interruptible.Task task, InfiniteLoopExecutor.SimulatorSafe simulatorSafe, InfiniteLoopExecutor.Daemon daemon, InfiniteLoopExecutor.Interrupts interrupts) + public Interruptible infiniteLoop(String name, Interruptible.Task task, InfiniteLoopExecutor.SimulatorSafe simulatorSafe, SystemThreadTag systemTag, InfiniteLoopExecutor.Interrupts interrupts) { - return delegate.infiniteLoop(name, task, simulatorSafe, daemon, interrupts); + return delegate.infiniteLoop(name, task, simulatorSafe, systemTag, interrupts); } @Override From 19048ef570a1ef10bbcfe7434a93a973f31f59db Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Mon, 7 Apr 2025 12:46:32 +0100 Subject: [PATCH 264/340] Improve Journal table key serialization Also improve: - TxnId serialization - StoreParticipants serialization - compareUnsigned Node.Id for consistency with serialized TxnId patch by Benedict; reviewed by Alex Petrov for CASSANDRA-20546 --- modules/accord | 2 +- .../concurrent/ExecutionFailure.java | 5 + .../db/marshal/ByteBufferAccessor.java | 8 +- .../cassandra/db/marshal/ValueAccessor.java | 34 +- .../index/accord/RouteIndexFormat.java | 2 +- .../cassandra/index/accord/SSTableIndex.java | 2 +- .../io/AsymmetricUnversionedSerializer.java | 5 + src/java/org/apache/cassandra/net/Verb.java | 7 +- .../AbstractAccordSegmentCompactor.java | 2 +- .../service/accord/AccordCommandStores.java | 2 +- .../service/accord/AccordDataStore.java | 4 +- .../service/accord/AccordExecutorLoops.java | 7 +- .../service/accord/AccordJournal.java | 10 +- .../service/accord/AccordKeyspace.java | 71 +-- .../service/accord/AccordObjectSizes.java | 23 +- .../accord/AccordSafeCommandStore.java | 1 - .../service/accord/AccordService.java | 2 +- .../service/accord/AccordSyncPropagator.java | 1 - .../service/accord/AccordVerbHandler.java | 3 +- .../service/accord/api/AccordAgent.java | 7 +- .../service/accord/api/AccordRoutableKey.java | 2 +- .../service/accord/api/PartitionKey.java | 29 +- .../service/accord/api/TokenKey.java | 4 - .../interop/AccordInteropExecution.java | 6 +- .../interop/AccordInteropReadRepair.java | 1 - .../accord/journal/AccordTopologyUpdate.java | 2 +- .../BeginInvalidationSerializers.java | 36 +- .../serializers/CommandSerializers.java | 512 ++++++++++++----- .../accord/serializers/FetchSerializers.java | 6 +- .../IVersionedWithKeysSerializer.java | 245 ++++---- .../serializers/InformDurableSerializers.java | 6 +- .../accord/serializers/KeySerializers.java | 532 +++++++++++++----- .../accord/serializers/ResultSerializers.java | 2 +- .../cassandra/utils/vint/VIntCoding.java | 21 + .../cql3/validation/entities/VectorsTest.java | 2 - .../CheckpointIntervalArrayIndexTest.java | 2 +- .../org/apache/cassandra/io/Serializers.java | 8 +- .../serializers/CommandSerializersTest.java | 61 +- .../serializers/KeySerializersTest.java | 173 +++++- 39 files changed, 1269 insertions(+), 579 deletions(-) diff --git a/modules/accord b/modules/accord index fc14a154fd51..134df57677bb 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit fc14a154fd514d4ab40b37508fb9497f786835e0 +Subproject commit 134df57677bbd5092994923a4dc2f15cd1d033d1 diff --git a/src/java/org/apache/cassandra/concurrent/ExecutionFailure.java b/src/java/org/apache/cassandra/concurrent/ExecutionFailure.java index 27ab885e234e..dc1262b3c1b2 100644 --- a/src/java/org/apache/cassandra/concurrent/ExecutionFailure.java +++ b/src/java/org/apache/cassandra/concurrent/ExecutionFailure.java @@ -19,6 +19,7 @@ package org.apache.cassandra.concurrent; import java.util.concurrent.Callable; +import java.util.concurrent.CancellationException; import java.util.concurrent.Future; import org.apache.cassandra.concurrent.DebuggableTask.RunnableDebuggableTask; @@ -26,6 +27,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.db.compaction.CompactionInterruptedException; +import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.utils.Closeable; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.WithResources; @@ -49,6 +51,9 @@ public static void handle(Throwable t) { try { + if (t instanceof RequestTimeoutException || t instanceof CancellationException) + return; + if (t instanceof CompactionInterruptedException) { // TODO: should we check to see there aren't nested CompactionInterruptedException? diff --git a/src/java/org/apache/cassandra/db/marshal/ByteBufferAccessor.java b/src/java/org/apache/cassandra/db/marshal/ByteBufferAccessor.java index edc28b58e2b1..381bac4971b4 100644 --- a/src/java/org/apache/cassandra/db/marshal/ByteBufferAccessor.java +++ b/src/java/org/apache/cassandra/db/marshal/ByteBufferAccessor.java @@ -318,13 +318,13 @@ public int putFloat(ByteBuffer dst, int offset, float value) @Override public int putLeastSignificantBytes(ByteBuffer dst, int offset, long register, int bytes) { - if (dst.remaining() < Long.BYTES) + int pos = dst.position() + offset; + if (dst.limit() - pos < Long.BYTES) { return ValueAccessor.putLeastSignificantBytes(this, dst, offset, register, bytes); } else { - int pos = dst.position() + offset; dst.putLong(pos, register << (64 - (bytes * 8))); } return bytes; @@ -333,13 +333,13 @@ public int putLeastSignificantBytes(ByteBuffer dst, int offset, long register, i @Override public long getLeastSignificantBytes(ByteBuffer dst, int offset, int bytes) { - if (dst.remaining() < Long.BYTES) + int pos = dst.position() + offset; + if (dst.limit() - pos < Long.BYTES) { return ValueAccessor.getLeastSignificantBytes(this, dst, offset, bytes); } else { - int pos = dst.position() + offset; return dst.getLong(pos) >>> (64 - (bytes * 8)); } } diff --git a/src/java/org/apache/cassandra/db/marshal/ValueAccessor.java b/src/java/org/apache/cassandra/db/marshal/ValueAccessor.java index 362063d683b9..4916c7326223 100644 --- a/src/java/org/apache/cassandra/db/marshal/ValueAccessor.java +++ b/src/java/org/apache/cassandra/db/marshal/ValueAccessor.java @@ -525,23 +525,23 @@ public static int putLeastSignificantBytes(ValueAccessor accessor, V dst, break; case 3: accessor.putShort(dst, offset, (short)(register >>> 8)); - accessor.putByte(dst, offset, (byte)register); + accessor.putByte(dst, offset + 2, (byte)register); break; case 4: accessor.putInt(dst, offset, (int)register); break; case 5: accessor.putInt(dst, offset, (int)(register >>> 8)); - accessor.putByte(dst, offset, (byte)register); + accessor.putByte(dst, offset + 4, (byte)register); break; case 6: accessor.putInt(dst, offset, (int)(register >>> 16)); - accessor.putShort(dst, offset, (short)register); + accessor.putShort(dst, offset + 4, (short)register); break; case 7: accessor.putInt(dst, offset, (int)(register >>> 24)); - accessor.putShort(dst, offset, (short)(register >> 8)); - accessor.putByte(dst, offset, (byte)register); + accessor.putShort(dst, offset + 4, (short)(register >> 8)); + accessor.putByte(dst, offset + 6, (byte)register); break; case 8: accessor.putLong(dst, offset, register); @@ -557,23 +557,23 @@ public static long getLeastSignificantBytes(ValueAccessor accessor, V dst switch (bytes) { case 0: return 0; - case 1: return accessor.getByte(dst, offset); - case 2: return accessor.getShort(dst, offset); + case 1: return accessor.getByte(dst, offset) & 0xffL; + case 2: return accessor.getShort(dst, offset) & 0xffffL; case 3: - return ((long)accessor.getShort(dst, offset) << 8) - | (long)accessor.getByte(dst, offset + 2); + return ((accessor.getShort(dst, offset) & 0xffffL) << 8) + | (accessor.getByte(dst, offset + 2) & 0xffL); case 4: - return accessor.getInt(dst, offset); + return accessor.getInt(dst, offset) & 0xffffffffL; case 5: - return ((long)accessor.getInt(dst, offset) << 8) - | (long)accessor.getByte(dst, offset + 4); + return ((accessor.getInt(dst, offset) & 0xffffffffL) << 8) + | (accessor.getByte(dst, offset + 4) & 0xffL); case 6: - return ((long)accessor.getInt(dst, offset) << 16) - | (long)accessor.getShort(dst, offset + 4); + return ((accessor.getInt(dst, offset) & 0xffffffffL) << 16) + | (accessor.getShort(dst, offset + 4) & 0xffffL); case 7: - return ((long)accessor.getInt(dst, offset) << 24) - | ((long)accessor.getShort(dst, offset + 4) << 8) - | (long)accessor.getByte(dst, offset + 6); + return ((accessor.getInt(dst, offset) & 0xffffffffL) << 24) + | ((accessor.getShort(dst, offset + 4) & 0xffffL) << 8) + | (accessor.getByte(dst, offset + 6) & 0xffL); case 8: return accessor.getLong(dst, offset); default: throw new IllegalArgumentException(); } diff --git a/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java b/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java index b92bf69e8562..56be6b9eed9d 100644 --- a/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java +++ b/src/java/org/apache/cassandra/index/accord/RouteIndexFormat.java @@ -249,7 +249,7 @@ public void abort(Throwable accumulator, boolean fromIndex) } } - static List readSegements(Map index) throws IOException + static List readSegments(Map index) throws IOException { List segments = new ArrayList<>(); diff --git a/src/java/org/apache/cassandra/index/accord/SSTableIndex.java b/src/java/org/apache/cassandra/index/accord/SSTableIndex.java index c6085b88272b..03c669f4bd38 100644 --- a/src/java/org/apache/cassandra/index/accord/SSTableIndex.java +++ b/src/java/org/apache/cassandra/index/accord/SSTableIndex.java @@ -74,7 +74,7 @@ public static SSTableIndex create(IndexDescriptor id) throws IOException Map files = new EnumMap<>(IndexComponent.class); for (IndexComponent c : id.getLiveComponents()) files.put(c, new FileHandle.Builder(id.fileFor(c)).mmapped(true).complete()); - List segments = RouteIndexFormat.readSegements(files); + List segments = RouteIndexFormat.readSegments(files); files.remove(IndexComponent.SEGMENT).close(); files.remove(IndexComponent.METADATA).close(); Cleanup cleanup = new Cleanup(files); diff --git a/src/java/org/apache/cassandra/io/AsymmetricUnversionedSerializer.java b/src/java/org/apache/cassandra/io/AsymmetricUnversionedSerializer.java index eae92e087e20..570741903f38 100644 --- a/src/java/org/apache/cassandra/io/AsymmetricUnversionedSerializer.java +++ b/src/java/org/apache/cassandra/io/AsymmetricUnversionedSerializer.java @@ -42,6 +42,11 @@ default ByteBuffer serialize(In t) throws IOException } } + default void skip(DataInputPlus in) throws IOException + { + deserialize(in); + } + default ByteBuffer serializeUnchecked(In t) { try diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 6d3e6f95db10..e9258b5f9a78 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -338,7 +338,7 @@ public enum Verb ACCORD_AWAIT_ASYNC_RSP_REQ (139, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(AwaitSerializers.asyncReply), AccordService::requestHandlerOrNoop ), ACCORD_WAIT_UNTIL_APPLIED_REQ (140, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(ReadDataSerializers.waitUntilApplied), AccordService::requestHandlerOrNoop, ACCORD_READ_RSP ), ACCORD_RECOVER_AWAIT_RSP (141, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(AwaitSerializers.recoverReply), AccordService::responseHandlerOrNoop ), - ACCORD_RECOVER_AWAIT_REQ (142, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(AwaitSerializers.recoverRequest), AccordService::requestHandlerOrNoop, ACCORD_RECOVER_AWAIT_RSP), + ACCORD_RECOVER_AWAIT_REQ (142, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(AwaitSerializers.recoverRequest), AccordService::requestHandlerOrNoop, ACCORD_RECOVER_AWAIT_RSP), ACCORD_INFORM_DURABLE_REQ (143, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(InformDurableSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_CHECK_STATUS_RSP (144, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(CheckStatusSerializers.reply), AccordService::responseHandlerOrNoop ), ACCORD_CHECK_STATUS_REQ (145, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(CheckStatusSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), @@ -350,15 +350,14 @@ public enum Verb ACCORD_GET_LATEST_DEPS_REQ (151, P2, readTimeout, IMMEDIATE, () -> accordEmbedded(LatestDepsSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_GET_LATEST_DEPS_RSP), ACCORD_GET_MAX_CONFLICT_RSP (152, P2, readTimeout, IMMEDIATE, () -> accordEmbedded(GetMaxConflictSerializers.reply), AccordService::responseHandlerOrNoop ), ACCORD_GET_MAX_CONFLICT_REQ (153, P2, readTimeout, IMMEDIATE, () -> accordEmbedded(GetMaxConflictSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), - ACCORD_GET_DURABLE_BEFORE_RSP (154, P2, readTimeout, IMMEDIATE, () -> accordEmbedded(GetDurableBeforeSerializers.reply), AccordService::responseHandlerOrNoop ), - ACCORD_GET_DURABLE_BEFORE_REQ (155, P2, readTimeout, IMMEDIATE, () -> accordEmbedded(GetDurableBeforeSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_GET_DURABLE_BEFORE_RSP ), + ACCORD_GET_DURABLE_BEFORE_RSP (154, P2, readTimeout, MISC, () -> accordEmbedded(GetDurableBeforeSerializers.reply), AccordService::responseHandlerOrNoop ), + ACCORD_GET_DURABLE_BEFORE_REQ (155, P2, readTimeout, MISC, () -> accordEmbedded(GetDurableBeforeSerializers.request), AccordService::requestHandlerOrNoop, ACCORD_GET_DURABLE_BEFORE_RSP ), ACCORD_SET_SHARD_DURABLE_REQ (156, P2, rpcTimeout, MISC, () -> accordEmbedded(SetDurableSerializers.shardDurable), AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_SET_GLOBALLY_DURABLE_REQ (157, P2, rpcTimeout, MISC, () -> accordEmbedded(SetDurableSerializers.globallyDurable),AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_SYNC_NOTIFY_RSP (158, P2, writeTimeout, MISC, () -> accordEmbedded(EnumSerializer.simpleReply), RESPONSE_HANDLER), ACCORD_SYNC_NOTIFY_REQ (159, P2, writeTimeout, MISC, () -> accordEmbedded(Notification.serializer), () -> AccordSyncPropagator.verbHandler, ACCORD_SYNC_NOTIFY_RSP ), - CONSENSUS_KEY_MIGRATION (160, P1, writeTimeout, MISC, () -> accordEmbedded(ConsensusKeyMigrationFinished.serializer),() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), ACCORD_INTEROP_READ_RSP (161, P2, writeTimeout, IMMEDIATE, () -> accordEmbedded(AccordInteropRead.replySerializer), AccordService::responseHandlerOrNoop), diff --git a/src/java/org/apache/cassandra/service/accord/AbstractAccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AbstractAccordSegmentCompactor.java index 668736c84260..18f3de0633ba 100644 --- a/src/java/org/apache/cassandra/service/accord/AbstractAccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AbstractAccordSegmentCompactor.java @@ -106,7 +106,7 @@ public Collection> compact(Collection snapshot(Ranges ranges, TxnId before) // TODO: does this have to go to journal, too? + public AsyncResult snapshot(Ranges ranges, TxnId before) { AsyncResults.SettableResult result = new AsyncResults.SettableResult<>(); - // TODO: maintain a list of Accord tables, perhaps in ClusterMetadata? + // TODO (desired): maintain a list of Accord tables, perhaps in ClusterMetadata? ClusterMetadata metadata = ClusterMetadata.current(); Object2ObjectHashMap tables = new Object2ObjectHashMap<>(); for (Range range : ranges) diff --git a/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java b/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java index d26e6e5abe4f..c5442c644da4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java +++ b/src/java/org/apache/cassandra/service/accord/AccordExecutorLoops.java @@ -24,8 +24,7 @@ import java.util.function.IntFunction; import accord.utils.Invariants; - -import io.netty.util.collection.LongObjectHashMap; +import org.agrona.collections.Long2ObjectHashMap; import org.apache.cassandra.service.accord.AccordExecutor.Mode; import org.apache.cassandra.utils.concurrent.Condition; @@ -37,7 +36,7 @@ class AccordExecutorLoops { - private final LongObjectHashMap loops; + private final Long2ObjectHashMap loops; private final AtomicInteger running = new AtomicInteger(); private final Condition terminated = Condition.newOneTimeCondition(); @@ -46,7 +45,7 @@ public AccordExecutorLoops(Mode mode, int threads, IntFunction name, Fun { Invariants.require(mode == RUN_WITH_LOCK ? threads == 1 : threads >= 1); running.addAndGet(threads); - loops = new LongObjectHashMap<>(threads); + loops = new Long2ObjectHashMap<>(threads, 0.65f); for (int i = 0; i < threads; ++i) { Thread thread = executorFactory().startThread(name.apply(i), wrap(loopFactory.apply(mode)), NON_DAEMON, INFINITE_LOOP); diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 7a29afb06194..66bb8bdffa2c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -384,7 +384,7 @@ public Builder load(int commandStoreId, TxnId txnId) private BUILDER readAll(JournalKey key) { BUILDER builder = (BUILDER) key.type.serializer.mergerFor(); - // TODO: this can be further improved to avoid allocating lambdas + // TODO (expected): this can be further improved to avoid allocating lambdas AccordJournalValueSerializers.FlyweightSerializer serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; // TODO (expected): for those where we store an image, read only the first entry we find in DESC order journalTable.readAll(key, (in, userVersion) -> serializer.deserialize(key, builder, in, userVersion)); @@ -572,7 +572,7 @@ private static void serialize(Command command, int flags, DataOutputPlus out, Ve CommandSerializers.ballot.serialize(command.promised(), out); break; case PARTICIPANTS: - CommandSerializers.participants.serialize(command.participants(), out, userVersion); + CommandSerializers.participants.serialize(command.participants(), out); break; case PARTIAL_TXN: CommandSerializers.partialTxn.serialize(command.partialTxn(), out, userVersion); @@ -705,7 +705,7 @@ private void serialize(int flags, DataOutputPlus out, Version userVersion) throw break; case PARTICIPANTS: Invariants.require(participants != null); - CommandSerializers.participants.serialize(participants, out, userVersion); + CommandSerializers.participants.serialize(participants, out); break; case PARTIAL_TXN: Invariants.require(partialTxn != null); @@ -783,7 +783,7 @@ private void deserialize(Field field, DataInputPlus in, Version userVersion) thr promised = CommandSerializers.ballot.deserialize(in); break; case PARTICIPANTS: - participants = CommandSerializers.participants.deserialize(in, userVersion); + participants = CommandSerializers.participants.deserialize(in); break; case PARTIAL_TXN: partialTxn = CommandSerializers.partialTxn.deserialize(in, userVersion); @@ -832,7 +832,7 @@ private static void skip(TxnId txnId, Field field, DataInputPlus in, Version use CommandSerializers.ballot.skip(in); break; case PARTICIPANTS: - CommandSerializers.participants.deserialize(in, userVersion); + CommandSerializers.participants.deserialize(in); break; case PARTIAL_TXN: CommandSerializers.partialTxn.deserialize(in, userVersion); diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index efe9f39b25b3..e87250fa5217 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -63,9 +63,7 @@ import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.lifecycle.View; import org.apache.cassandra.db.marshal.ByteBufferAccessor; -import org.apache.cassandra.db.marshal.ByteType; import org.apache.cassandra.db.marshal.BytesType; -import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.TupleType; @@ -113,6 +111,7 @@ import org.apache.cassandra.utils.MergeIterator; import org.apache.cassandra.utils.btree.BTreeSet; import org.apache.cassandra.utils.concurrent.OpOrder; +import org.apache.cassandra.utils.vint.VIntCoding; import static java.lang.String.format; import static java.util.Collections.emptyMap; @@ -140,14 +139,12 @@ public static TableMetadata journalMetadata(String tableName, boolean index) TableMetadata.Builder builder = parse(tableName, "accord journal", "CREATE TABLE %s (" - + "store_id int," - + "type tinyint," - + "id blob," + + "key blob," + "descriptor bigint," + "offset int," + "user_version int," + "record blob," - + "PRIMARY KEY((store_id, type, id), descriptor, offset)" + + "PRIMARY KEY((key), descriptor, offset)" + ") WITH CLUSTERING ORDER BY (descriptor DESC, offset DESC)" + " WITH compression = {'class':'NoopCompressor'};") .compaction(CompactionParams.lcs(emptyMap())) @@ -417,9 +414,7 @@ else if (startInclusive) */ private static CloseableIterator keyIterator(Memtable memtable, AbstractBounds range) { - // TODO (required): why are we replacing the right bound with max bound? - AbstractBounds memtableRange = range.withNewRight(memtable.metadata().partitioner.getMinimumToken().maxKeyBound()); - DataRange dataRange = new DataRange(memtableRange, new ClusteringIndexSliceFilter(Slices.ALL, false)); + DataRange dataRange = new DataRange(range, new ClusteringIndexSliceFilter(Slices.ALL, false)); UnfilteredPartitionIterator iter = memtable.partitionIterator(ColumnFilter.NONE, dataRange, SSTableReadsListener.NOOP_LISTENER); int rangeStartCmpMin = range.isStartInclusive() ? 0 : 1; @@ -537,7 +532,7 @@ private static ByteBuffer cellValue(Cell cell) return cell.accessor().toBuffer(cell.value()); } - // TODO: convert to byte array + // TODO (desired): convert to byte array private static ByteBuffer cellValue(Row row, ColumnMetadata column) { Cell cell = row.getCell(column); @@ -546,60 +541,38 @@ private static ByteBuffer cellValue(Row row, ColumnMetadata column) public static class JournalColumns { - static final ClusteringComparator keyComparator = Journal.partitionKeyAsClusteringComparator(); - static final CompositeType partitionKeyType = (CompositeType) Journal.partitionKeyType; - public static final ColumnMetadata store_id = getColumn(Journal, "store_id"); - public static final ColumnMetadata type = getColumn(Journal, "type"); - public static final ColumnMetadata id = getColumn(Journal, "id"); + public static final ColumnMetadata key = getColumn(Journal, "key"); public static final ColumnMetadata record = getColumn(Journal, "record"); public static final ColumnMetadata user_version = getColumn(Journal, "user_version"); public static final RegularAndStaticColumns regular = new RegularAndStaticColumns(Columns.NONE, Columns.from(Arrays.asList(record, user_version))); public static DecoratedKey decorate(JournalKey key) { - ByteBuffer id = ByteBuffer.allocate(CommandSerializers.txnId.serializedSize()); - CommandSerializers.txnId.serialize(key.id, id); - id.flip(); - ByteBuffer pk = keyComparator.make(key.commandStoreId, (byte)key.type.id, id).serializeAsPartitionKey(); - Invariants.require(getTxnId(splitPartitionKey(pk)).equals(key.id)); + int commandStoreIdBytes = VIntCoding.computeUnsignedVIntSize(key.commandStoreId); + int length = commandStoreIdBytes + 1; + if (key.type == JournalKey.Type.COMMAND_DIFF) + length += CommandSerializers.txnId.serializedSize(key.id); + ByteBuffer pk = ByteBuffer.allocate(length); + ByteBufferAccessor.instance.putUnsignedVInt32(pk, 0, key.commandStoreId); + pk.put(commandStoreIdBytes, (byte)key.type.id); + if (key.type == JournalKey.Type.COMMAND_DIFF) + CommandSerializers.txnId.serializeComparable(key.id, pk, ByteBufferAccessor.instance, commandStoreIdBytes + 1); return Journal.partitioner.decorateKey(pk); } - public static ByteBuffer[] splitPartitionKey(DecoratedKey key) - { - return JournalColumns.partitionKeyType.split(key.getKey()); - } - - public static ByteBuffer[] splitPartitionKey(ByteBuffer key) - { - return JournalColumns.partitionKeyType.split(key); - } - public static int getStoreId(DecoratedKey pk) { - return getStoreId(splitPartitionKey(pk)); - } - - public static int getStoreId(ByteBuffer[] partitionKeyComponents) - { - return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); - } - - public static JournalKey.Type getType(ByteBuffer[] partitionKeyComponents) - { - return JournalKey.Type.fromId(ByteType.instance.compose(partitionKeyComponents[type.position()])); - } - - public static TxnId getTxnId(ByteBuffer[] partitionKeyComponents) - { - ByteBuffer buffer = partitionKeyComponents[id.position()]; - return CommandSerializers.txnId.deserialize(buffer, buffer.position()); + return VIntCoding.readUnsignedVInt32(pk.getKey(), 0); } public static JournalKey getJournalKey(DecoratedKey key) { - ByteBuffer[] parts = splitPartitionKey(key); - return new JournalKey(getTxnId(parts), getType(parts), getStoreId(parts)); + ByteBuffer bb = key.getKey(); + int storeId = ByteBufferAccessor.instance.getUnsignedVInt32(bb, 0); + int offset = VIntCoding.readLengthOfVInt(bb, 0); + JournalKey.Type type = JournalKey.Type.fromId(bb.get(offset)); + TxnId txnId = type != JournalKey.Type.COMMAND_DIFF ? TxnId.NONE : CommandSerializers.txnId.deserializeComparable(bb, ByteBufferAccessor.instance, offset + 1); + return new JournalKey(txnId, type, storeId); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 54846f5e8ffb..5a5e09d4f69b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -58,6 +58,7 @@ import accord.primitives.Unseekables; import accord.primitives.Writes; import accord.utils.ImmutableBitSet; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.TokenKey; @@ -116,9 +117,12 @@ public static long seekable(Seekable seekable) public static long ranges(Ranges ranges) { long size = EMPTY_RANGES_SIZE; - size += ObjectSizes.sizeOfReferenceArray(ranges.size()); - // TODO: many ranges are fixed size, can compute by multiplication - for (int i = 0, mi = ranges.size() ; i < mi ; i++) + int numberOfRanges = ranges.size(); + size += ObjectSizes.sizeOfReferenceArray(numberOfRanges); + if (numberOfRanges > 1 && DatabaseDescriptor.getPartitioner().isFixedLength()) + return size + numberOfRanges * range(ranges.get(0)); + + for (int i = 0 ; i < numberOfRanges ; i++) size += range(ranges.get(i)); return size; } @@ -145,9 +149,12 @@ public static long seekables(Seekables seekables) private static long routingKeysOnly(AbstractKeys keys) { - // TODO: many routing keys are fixed size, can compute by multiplication - long size = ObjectSizes.sizeOfReferenceArray(keys.size()); - for (int i=0, mi=keys.size(); i 1 && DatabaseDescriptor.getPartitioner().isFixedLength()) + return size + numberOfKeys * key(keys.get(0)); + + for (int i=0 ; i < numberOfKeys; i++) size += key(keys.get(i)); return size; } @@ -163,7 +170,7 @@ public static long fullKeyRoute(FullKeyRoute route) { return EMPTY_FULL_KEY_ROUTE_SIZE + routingKeysOnly(route) - + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error + + key(route.homeKey()); // TODO (desired): we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } private static final long EMPTY_PARTIAL_KEY_ROUTE_KEYS_SIZE = measure(new PartialKeyRoute(new TokenKey(null, null), new RoutingKey[0])); @@ -187,7 +194,7 @@ public static long fullRangeRoute(FullRangeRoute route) { return EMPTY_FULL_RANGE_ROUTE_SIZE + ranges(route) - + key(route.homeKey()); // TODO: we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error + + key(route.homeKey()); // TODO (desired): we will probably dedup homeKey, serializer dependent, but perhaps this is an acceptable error } private static final long EMPTY_PARTIAL_RANGE_ROUTE_KEYS_SIZE = measure(new PartialRangeRoute(new TokenKey(null, null), new Range[0])); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 89f3214dc791..5ace8976c801 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -196,7 +196,6 @@ public ProgressLog progressLog() @Override public NodeCommandStoreService node() { - // TODO: safe command store should not have arbitrary time return commandStore.node(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index d23e0c5598c5..bed4b1396df4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -781,7 +781,7 @@ private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder sta CommandStoreTxnBlockedGraph.TxnState cmdTxnState = populate(state, safeCommand.current()); if (cmdTxnState.notBlocked()) return null; - //TODO (safety): check depth + //TODO (expected): check depth List> chains = new ArrayList<>(); for (TxnId blockedBy : cmdTxnState.blockedBy) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java index 5e857d618e70..66ae868231cb 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSyncPropagator.java @@ -313,7 +313,6 @@ public void onResponse(Message msg) { Invariants.require(msg.payload == SimpleReply.Ok, "Unexpected message: %s", msg); Set completedEpochs = new HashSet<>(); - // TODO review is it a good idea to call the listener while not holding the `AccordSyncPropagator` lock? synchronized (AccordSyncPropagator.this) { pending.ack(to, notification); diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 12fd39a135d8..9562aea86b7e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -57,8 +57,7 @@ public void doVerb(Message message) throws IOException T request = message.payload; /* - * TODO (desired): messages without side-effects don't go through the journal, - * and as such are retained on heap until the node catches up to waitForEpoch, + * TODO (desired): messages are retained on heap until the node catches up to waitForEpoch, * which can be problematic in absense of proper Accord<->Messaging backpressure */ Node.Id fromNodeId = endpointMapper.mappedId(message.from()); diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 7c2cdf972cdf..3bcb271381b4 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -18,6 +18,7 @@ package org.apache.cassandra.service.accord.api; +import java.util.concurrent.CancellationException; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; @@ -59,6 +60,7 @@ import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.metrics.AccordMetrics; import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.service.accord.AccordService; @@ -114,13 +116,12 @@ public void setNodeId(Node.Id id) @Override public void onRecover(Node node, Result success, Throwable fail) { - // TODO: this } @Override public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp next) { - // TODO: this + // TODO (expected): better reporting AssertionError error = new AssertionError("Inconsistent execution timestamp detected for txnId " + command.txnId() + ": " + prev + " != " + next); onUncaughtException(error); throw error; @@ -142,6 +143,8 @@ public void onStale(Timestamp staleSince, Ranges ranges) @Override public void onUncaughtException(Throwable t) { + if (t instanceof RequestTimeoutException || t instanceof CancellationException) + return; logger.error("Uncaught accord exception", t); JVMStabilityInspector.uncaughtException(Thread.currentThread(), t); } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java index 8a2d5c1ce7d2..25d409bfde46 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -56,7 +56,7 @@ public interface AccordSearchableKeySerializer extends AccordKeySerializer static final int PREFIX_MASK = 0xF0; static final int SUFFIX_MASK = 0x0F; - final TableId table; // TODO (desired): use an id (TrM) + final TableId table; // TODO (desired): use a long id (TrM) protected AccordRoutableKey(TableId table) { diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index 2e47d0678fdf..2c0619fd9ea9 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -21,14 +21,10 @@ import java.io.IOException; import java.nio.ByteBuffer; -import com.google.common.base.Preconditions; - import accord.api.Key; -import accord.primitives.Routable; import accord.utils.Invariants; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.SinglePartitionReadCommand; -import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.db.partitions.Partition; @@ -39,6 +35,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.ObjectSizes; +import org.apache.cassandra.utils.vint.VIntCoding; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; @@ -115,23 +112,16 @@ public String suffix() return partitionKey().toString(); } - // TODO: callers to this method are not correctly handling ranges - public static PartitionKey toPartitionKey(Routable routable) - { - return (PartitionKey) routable; - } - public static final Serializer serializer = new Serializer(); public static class Serializer implements AccordKeySerializer { - // TODO: add vint to value accessor and use vints private Serializer() {} @Override public void serialize(PartitionKey key, DataOutputPlus out) throws IOException { key.table().serializeCompact(out); - ByteBufferUtil.writeWithShortLength(key.partitionKey().getKey(), out); + ByteBufferUtil.writeWithVIntLength(key.partitionKey().getKey(), out); } public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int offset) @@ -140,9 +130,8 @@ public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int position += key.table().serializeCompact(dst, accessor, position); ByteBuffer bytes = key.partitionKey().getKey(); Invariants.require(key.partitionKey().getPartitioner() == getPartitioner()); - int numBytes = ByteBufferAccessor.instance.size(bytes); - Preconditions.checkState(numBytes <= Short.MAX_VALUE); - position += accessor.putShort(dst, position, (short) numBytes); + int numBytes = bytes.remaining(); + position += accessor.putUnsignedVInt32(dst, position, numBytes); position += accessor.copyByteBufferTo(bytes, 0, dst, position, numBytes); return position - offset; @@ -152,14 +141,14 @@ public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int public void skip(DataInputPlus in) throws IOException { TableId.skipCompact(in); - ByteBufferUtil.skipShortLength(in); + ByteBufferUtil.skipWithVIntLength(in); } @Override public PartitionKey deserialize(DataInputPlus in) throws IOException { TableId tableId = TableId.deserializeCompact(in).intern(); - DecoratedKey key = getPartitioner().decorateKey(ByteBufferUtil.readWithShortLength(in)); + DecoratedKey key = getPartitioner().decorateKey(ByteBufferUtil.readWithVIntLength(in)); return new PartitionKey(tableId, key); } @@ -167,8 +156,8 @@ public PartitionKey deserialize(V src, ValueAccessor accessor, int offset { TableId tableId = TableId.deserializeCompact(src, accessor, offset).intern(); offset += tableId.serializedCompactSize(); - int numBytes = accessor.getShort(src, offset); - offset += TypeSizes.SHORT_SIZE; + int numBytes = accessor.getUnsignedVInt32(src, offset); + offset += VIntCoding.readLengthOfVInt(src, accessor, offset); ByteBuffer bytes = ByteBuffer.allocate(numBytes); accessor.copyTo(src, offset, bytes, ByteBufferAccessor.instance, 0, numBytes); DecoratedKey key = getPartitioner().decorateKey(bytes); @@ -178,7 +167,7 @@ public PartitionKey deserialize(V src, ValueAccessor accessor, int offset @Override public long serializedSize(PartitionKey key) { - return key.table().serializedCompactSize() + ByteBufferUtil.serializedSizeWithShortLength(key.partitionKey().getKey()); + return key.table().serializedCompactSize() + ByteBufferUtil.serializedSizeWithVIntLength(key.partitionKey().getKey()); } } } diff --git a/src/java/org/apache/cassandra/service/accord/api/TokenKey.java b/src/java/org/apache/cassandra/service/accord/api/TokenKey.java index 24aa2ac9bce6..7511cb70f14b 100644 --- a/src/java/org/apache/cassandra/service/accord/api/TokenKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/TokenKey.java @@ -182,15 +182,11 @@ public RoutingKey toUnseekable() public boolean isMin() { - //TODO (review): some code paths don't care if before/after are used, but some are not fully correct (range.isFullRange) -// return sentinel == MIN_TABLE_SENTINEL; return (sentinel & PREFIX_MASK) == (MIN_TABLE_SENTINEL & PREFIX_MASK); } public boolean isMax() { - //TODO (review): some code paths don't care if before/after are used, but some are not fully correct (range.isFullRange) -// return sentinel == MAX_TABLE_SENTINEL; return (sentinel & PREFIX_MASK) == (MAX_TABLE_SENTINEL & PREFIX_MASK); } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 1bdc29d012b6..3fa21940879f 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -228,9 +228,9 @@ public EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata doNotUse, public void sendReadCommand(Message message, InetAddressAndPort to, RequestCallback callback) { Node.Id id = endpointMapper.mappedId(to); - // TODO (nicetohave): It would be better to use the re-use the command from the transaction but it's fragile - // to try and figure out exactly what changed for things like read repair and short read protection - // Also this read scope doesn't reflect the contents of this particular read and is larger than it needs to be + // TODO (desired): It would be better to use the re-use the command from the transaction but it's fragile + // to try and figure out exactly what changed for things like read repair and short read protection + // Also this read scope doesn't reflect the contents of this particular read and is larger than it needs to be // TODO (required): understand interop and whether StableFastPath is appropriate AccordInteropStableThenRead commit = new AccordInteropStableThenRead(id, allTopologies, txnId, Kind.StableFastPath, executeAt, txn, deps, route, message.payload); node.send(id, commit, executor, new AccordInteropRead.ReadCallback(id, to, message, callback, this)); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java index f74b6625c22a..995d93b6b064 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java @@ -129,7 +129,6 @@ public AccordInteropReadRepair(Node.Id to, Topologies topologies, TxnId txnId, P public AccordInteropReadRepair(TxnId txnId, Participants scope, long executeAtEpoch, Mutation mutation) { - // TODO (review): remove followup read - Is there anything left to be done for this or can I remove it? super(txnId, scope, executeAtEpoch); this.mutation = mutation; } diff --git a/src/java/org/apache/cassandra/service/accord/journal/AccordTopologyUpdate.java b/src/java/org/apache/cassandra/service/accord/journal/AccordTopologyUpdate.java index ff4a6e6ecbe8..a01580363f79 100644 --- a/src/java/org/apache/cassandra/service/accord/journal/AccordTopologyUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/journal/AccordTopologyUpdate.java @@ -111,7 +111,7 @@ public void serialize(Journal.TopologyUpdate from, DataOutputPlus out) throws IO out.writeUnsignedVInt32(e.getKey()); RangesForEpochSerializer.instance.serialize(e.getValue(), out); } - //TODO (performance): local to what? Rather than serializing local we can serialize the node its relative too? that why when we deserialize we do globa.forNode(node) + //TODO (desired): local to what? Rather than serializing local we can serialize the node its relative too? that why when we deserialize we do globa.forNode(node) // this also decreases the size as we don't have redundent shards TopologySerializers.topology.serialize(from.local, out); TopologySerializers.topology.serialize(from.global, out); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java index 6ec96618a10e..25a40a6a418c 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java @@ -27,7 +27,6 @@ import accord.primitives.Participants; import accord.primitives.Route; import accord.primitives.SaveStatus; -import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -63,6 +62,11 @@ public long serializedSize(BeginInvalidation begin) public static final UnversionedSerializer reply = new UnversionedSerializer<>() { + private static final int ACCEPTED_FAST_PATH = 0x1; + private static final int HAS_TRUNCATED = 0x2; + private static final int HAS_ROUTE = 0x4; + private static final int HAS_HOME_KEY = 0x8; + @Override public void serialize(InvalidateReply reply, DataOutputPlus out) throws IOException { @@ -70,24 +74,28 @@ public void serialize(InvalidateReply reply, DataOutputPlus out) throws IOExcept CommandSerializers.ballot.serialize(reply.accepted, out); CommandSerializers.saveStatus.serialize(reply.maxStatus, out); CommandSerializers.saveStatus.serialize(reply.maxKnowledgeStatus, out); - out.writeBoolean(reply.acceptedFastPath); - KeySerializers.nullableParticipants.serialize(reply.truncated, out); - KeySerializers.nullableRoute.serialize(reply.route, out); - KeySerializers.nullableRoutingKey.serialize(reply.homeKey, out); + int flags = (reply.acceptedFastPath ? ACCEPTED_FAST_PATH : 0) + | (reply.truncated != null ? HAS_TRUNCATED : 0) + | (reply.route != null ? HAS_ROUTE : 0) + | (reply.homeKey != null && reply.route == null ? HAS_HOME_KEY : 0); + out.writeByte(flags); + if (reply.truncated != null) KeySerializers.participants.serialize(reply.truncated, out); + if (reply.route != null) KeySerializers.route.serialize(reply.route, out); + else if (reply.homeKey != null) KeySerializers.routingKey.serialize(reply.homeKey, out); } @Override public InvalidateReply deserialize(DataInputPlus in) throws IOException { - // TODO (expected): use headers instead of nullable+bool serializers Ballot supersededBy = CommandSerializers.ballot.deserialize(in); Ballot accepted = CommandSerializers.ballot.deserialize(in); SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in); SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in); - boolean acceptedFastPath = in.readBoolean(); - Participants truncated = KeySerializers.nullableParticipants.deserialize(in); - Route route = KeySerializers.nullableRoute.deserialize(in); - RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in); + byte flags = in.readByte(); + boolean acceptedFastPath = (flags & ACCEPTED_FAST_PATH) != 0; + Participants truncated = (flags & HAS_TRUNCATED) != 0 ? KeySerializers.participants.deserialize(in) : null; + Route route = (flags & HAS_ROUTE) != 0 ? KeySerializers.route.deserialize(in) : null; + RoutingKey homeKey = (flags & HAS_HOME_KEY) != 0 ? KeySerializers.routingKey.deserialize(in) : route != null ? route.homeKey() : null; return new InvalidateReply(supersededBy, accepted, maxStatus, maxKnowledgeStatus, acceptedFastPath, truncated, route, homeKey); } @@ -98,10 +106,10 @@ public long serializedSize(InvalidateReply reply) + CommandSerializers.ballot.serializedSize(reply.accepted) + CommandSerializers.saveStatus.serializedSize(reply.maxStatus) + CommandSerializers.saveStatus.serializedSize(reply.maxKnowledgeStatus) - + TypeSizes.BOOL_SIZE - + KeySerializers.nullableParticipants.serializedSize(reply.truncated) - + KeySerializers.nullableRoute.serializedSize(reply.route) - + KeySerializers.nullableRoutingKey.serializedSize(reply.homeKey); + + 1 + + (reply.truncated != null ? KeySerializers.participants.serializedSize(reply.truncated) : 0) + + (reply.route != null ? KeySerializers.route.serializedSize(reply.route) : 0) + + (reply.homeKey != null && reply.route == null ? KeySerializers.routingKey.serializedSize(reply.homeKey) : 0); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index ec3fa6e109ef..a231b2125156 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -22,7 +22,6 @@ import java.nio.ByteBuffer; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import accord.api.Query; import accord.api.Read; @@ -36,6 +35,7 @@ import accord.primitives.Known.KnownDeps; import accord.primitives.PartialTxn; import accord.primitives.Participants; +import accord.primitives.Routable; import accord.primitives.Route; import accord.primitives.SaveStatus; import accord.primitives.Seekables; @@ -45,9 +45,11 @@ import accord.primitives.TimestampWithUniqueHlc; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.primitives.Writes; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.VersionedSerializer; @@ -67,9 +69,8 @@ private CommandSerializers() { } - public static final TimestampSerializer txnId = new TimestampSerializer<>(TxnId::fromBits); - public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); - public static final UnversionedSerializer nullableTimestamp = NullableSerializer.wrap(timestamp); + public static final VariableWidthTimestampSerializer txnId = new VariableWidthTimestampSerializer<>(TxnId::fromValues); + public static final VariableWidthTimestampSerializer timestamp = new VariableWidthTimestampSerializer<>(Timestamp::fromValues); public static final BallotSerializer ballot = new BallotSerializer(); // permits null public static final UnversionedSerializer kind = EncodeAsVInt32.of(Txn.Kind.class); public static final StoreParticipantsSerializer participants = new StoreParticipantsSerializer(); @@ -314,95 +315,202 @@ private static int flags(Timestamp executeAt, boolean nullable) } } - // TODO (expected): optimise using subset serializers, or perhaps simply with some deduping key serializer - public static class StoreParticipantsSerializer implements IVersionedSerializer + public static class StoreParticipantsSerializer implements UnversionedSerializer { static final int HAS_ROUTE = 0x1; - static final int HAS_TOUCHED_EQUALS_ROUTE = 0x2; - static final int TOUCHES_EQUALS_HAS_TOUCHED = 0x4; - static final int OWNS_EQUALS_TOUCHES = 0x8; - static final int EXECUTES_IS_NULL = 0x10; - static final int EXECUTES_IS_OWNS = 0x20; - static final int WAITSON_IS_OWNS = 0x40; + static final int ROUTE_EQUALS_SUPERSET = 0x2; + static final int HAS_TOUCHED_EQUALS_SUPERSET = 0x4; + static final int TOUCHES_EQUALS_HAS_TOUCHED = 0x8; + static final int OWNS_EQUALS_TOUCHES = 0x10; + static final int EXECUTES_IS_NULL = 0x20; + static final int EXECUTES_IS_OWNS = 0x40; + static final int WAITSON_IS_OWNS = 0x80; @Override - public void serialize(StoreParticipants t, DataOutputPlus out, Version version) throws IOException - { - boolean hasRoute = t.route() != null; - boolean hasTouchedEqualsRoute = t.route() == t.hasTouched(); - boolean touchesEqualsHasTouched = t.touches() == t.hasTouched(); - boolean ownsEqualsTouches = t.owns() == t.touches(); - boolean executesIsNull = t.executes() == null; - boolean executesIsOwns = !executesIsNull && t.executes() == t.owns(); - boolean waitsOnIsOwns = !executesIsNull && t.waitsOn() == t.owns(); + public void serialize(StoreParticipants t, DataOutputPlus out) throws IOException + { + Participants hasTouched = t.hasTouched(); + Route route = t.route(); + Participants owns = t.owns(); + Participants executes = t.executes(); + Participants touches = t.touches(); + boolean hasRoute = route != null; + boolean touchesEqualsHasTouched = touches == hasTouched; + boolean ownsEqualsTouches = owns == touches; + boolean executesIsNull = executes == null; + boolean executesIsOwns = !executesIsNull && executes == owns; + boolean waitsOnIsOwns = !executesIsNull && t.waitsOn() == owns; + boolean encodeSubsets = hasTouched.domain() == Routable.Domain.Key; + Participants superset = !hasRoute ? hasTouched : encodeSubsets ? route.with((Participants)hasTouched) : route; + boolean routeEqualsSuperset = route == superset; + boolean hasTouchedEqualsSuperset = hasTouched == superset; out.writeByte((hasRoute ? HAS_ROUTE : 0) - | (hasTouchedEqualsRoute ? HAS_TOUCHED_EQUALS_ROUTE : 0) + | (routeEqualsSuperset ? ROUTE_EQUALS_SUPERSET : 0) + | (hasTouchedEqualsSuperset ? HAS_TOUCHED_EQUALS_SUPERSET : 0) | (touchesEqualsHasTouched ? TOUCHES_EQUALS_HAS_TOUCHED : 0) | (ownsEqualsTouches ? OWNS_EQUALS_TOUCHES : 0) | (executesIsNull ? EXECUTES_IS_NULL : 0) | (executesIsOwns ? EXECUTES_IS_OWNS : 0) | (waitsOnIsOwns ? WAITSON_IS_OWNS : 0) ); - if (hasRoute) KeySerializers.route.serialize(t.route(), out); - if (!hasTouchedEqualsRoute) KeySerializers.participants.serialize(t.hasTouched(), out); - if (!touchesEqualsHasTouched) KeySerializers.participants.serialize(t.touches(), out); - if (!ownsEqualsTouches) KeySerializers.participants.serialize(t.owns(), out); - if (!executesIsNull && !executesIsOwns) KeySerializers.participants.serialize(t.executes(), out); - if (!executesIsNull && !waitsOnIsOwns) KeySerializers.participants.serialize(t.waitsOn(), out); + + KeySerializers.participants.serialize(superset, out); + if (encodeSubsets) + { + if (hasRoute && !routeEqualsSuperset) KeySerializers.route.serializeSubset(route, superset, out); + if (!hasTouchedEqualsSuperset) KeySerializers.participants.serializeSubset(hasTouched, superset, out); + if (!touchesEqualsHasTouched) KeySerializers.participants.serializeSubset(touches, superset, out); + if (!ownsEqualsTouches) KeySerializers.participants.serializeSubset(owns, superset, out); + if (!executesIsNull && !executesIsOwns) KeySerializers.participants.serializeSubset(executes, superset, out); + if (!executesIsNull && !waitsOnIsOwns) KeySerializers.participants.serializeSubset(t.waitsOn(), superset, out); + } + else + { + if (hasRoute && !routeEqualsSuperset) KeySerializers.route.serialize(route, out); + if (!hasTouchedEqualsSuperset) KeySerializers.participants.serialize(hasTouched, out); + if (!touchesEqualsHasTouched) KeySerializers.participants.serialize(touches, out); + if (!ownsEqualsTouches) KeySerializers.participants.serialize(owns, out); + if (!executesIsNull && !executesIsOwns) KeySerializers.participants.serialize(executes, out); + if (!executesIsNull && !waitsOnIsOwns) KeySerializers.participants.serialize(t.waitsOn(), out); + } } - public void skip(DataInputPlus in, Version version) throws IOException + public void skip(DataInputPlus in) throws IOException { int flags = in.readByte(); - if (0 != (flags & HAS_ROUTE)) KeySerializers.route.skip(in); - if (0 == (flags & HAS_TOUCHED_EQUALS_ROUTE)) KeySerializers.participants.skip(in); - if (0 == (flags & TOUCHES_EQUALS_HAS_TOUCHED)) KeySerializers.participants.skip(in); - if (0 == (flags & OWNS_EQUALS_TOUCHES)) KeySerializers.participants.skip(in); - if (0 == (flags & (EXECUTES_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skip(in); - if (0 == (flags & (WAITSON_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skip(in); + Unseekables.UnseekablesKind kind = KeySerializers.participants.readKind(in); + int supersetCount = KeySerializers.participants.countAndSkip(kind, in); + boolean skipSubset = kind.domain() == Routable.Domain.Key; + if (skipSubset) + { + if (0 != (flags & HAS_ROUTE) && 0 == (flags & ROUTE_EQUALS_SUPERSET)) KeySerializers.route.skipSubset(supersetCount, in); + if (0 == (flags & HAS_TOUCHED_EQUALS_SUPERSET)) KeySerializers.participants.skipSubset(supersetCount, in); + if (0 == (flags & TOUCHES_EQUALS_HAS_TOUCHED)) KeySerializers.participants.skipSubset(supersetCount, in); + if (0 == (flags & OWNS_EQUALS_TOUCHES)) KeySerializers.participants.skipSubset(supersetCount, in); + if (0 == (flags & (EXECUTES_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skipSubset(supersetCount, in); + if (0 == (flags & (WAITSON_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skipSubset(supersetCount, in); + } + else + { + if (0 != (flags & HAS_ROUTE) && 0 == (flags & ROUTE_EQUALS_SUPERSET)) KeySerializers.route.skip(in); + if (0 == (flags & HAS_TOUCHED_EQUALS_SUPERSET)) KeySerializers.participants.skip(in); + if (0 == (flags & TOUCHES_EQUALS_HAS_TOUCHED)) KeySerializers.participants.skip(in); + if (0 == (flags & OWNS_EQUALS_TOUCHES)) KeySerializers.participants.skip(in); + if (0 == (flags & (EXECUTES_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skip(in); + if (0 == (flags & (WAITSON_IS_OWNS | EXECUTES_IS_NULL))) KeySerializers.participants.skip(in); + } } @Override - public StoreParticipants deserialize(DataInputPlus in, Version version) throws IOException + public StoreParticipants deserialize(DataInputPlus in) throws IOException { int flags = in.readByte(); - Route route = 0 == (flags & HAS_ROUTE) ? null : KeySerializers.route.deserialize(in); - Participants hasTouched = 0 != (flags & HAS_TOUCHED_EQUALS_ROUTE) ? route : KeySerializers.participants.deserialize(in); - Participants touches = 0 != (flags & TOUCHES_EQUALS_HAS_TOUCHED) ? hasTouched : KeySerializers.participants.deserialize(in); - Participants owns = 0 != (flags & OWNS_EQUALS_TOUCHES) ? touches : KeySerializers.participants.deserialize(in); - Participants executes = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & EXECUTES_IS_OWNS) ? owns : KeySerializers.participants.deserialize(in); - Participants waitsOn = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & WAITSON_IS_OWNS) ? owns : KeySerializers.participants.deserialize(in); - return StoreParticipants.create(route, owns, executes, waitsOn, touches, hasTouched); + Participants superset = KeySerializers.participants.deserialize(in); + boolean decodeSubset = superset.domain() == Routable.Domain.Key; + if (decodeSubset) + { + Route route = 0 == (flags & HAS_ROUTE) ? null : 0 != (flags & ROUTE_EQUALS_SUPERSET) ? (Route)superset : KeySerializers.route.deserializeSubset(superset, in); + Participants hasTouched = 0 != (flags & HAS_TOUCHED_EQUALS_SUPERSET) ? superset : KeySerializers.participants.deserializeSubset(superset, in); + Participants touches = 0 != (flags & TOUCHES_EQUALS_HAS_TOUCHED) ? hasTouched : KeySerializers.participants.deserializeSubset(superset, in); + Participants owns = 0 != (flags & OWNS_EQUALS_TOUCHES) ? touches : KeySerializers.participants.deserializeSubset(superset, in); + Participants executes = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & EXECUTES_IS_OWNS) ? owns : KeySerializers.participants.deserializeSubset(superset, in); + Participants waitsOn = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & WAITSON_IS_OWNS) ? owns : KeySerializers.participants.deserializeSubset(superset, in); + return StoreParticipants.create(route, owns, executes, waitsOn, touches, hasTouched); + } + else + { + Route route = 0 == (flags & HAS_ROUTE) ? null : 0 != (flags & ROUTE_EQUALS_SUPERSET) ? (Route)superset : KeySerializers.route.deserialize(in); + Participants hasTouched = 0 != (flags & HAS_TOUCHED_EQUALS_SUPERSET) ? superset : KeySerializers.participants.deserialize(in); + Participants touches = 0 != (flags & TOUCHES_EQUALS_HAS_TOUCHED) ? hasTouched : KeySerializers.participants.deserialize(in); + Participants owns = 0 != (flags & OWNS_EQUALS_TOUCHES) ? touches : KeySerializers.participants.deserialize(in); + Participants executes = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & EXECUTES_IS_OWNS) ? owns : KeySerializers.participants.deserialize(in); + Participants waitsOn = 0 != (flags & EXECUTES_IS_NULL) ? null : 0 != (flags & WAITSON_IS_OWNS) ? owns : KeySerializers.participants.deserialize(in); + return StoreParticipants.create(route, owns, executes, waitsOn, touches, hasTouched); + } } @Override - public long serializedSize(StoreParticipants t, Version version) - { - boolean hasRoute = t.route() != null; - boolean hasTouchedEqualsRoute = t.route() == t.hasTouched(); - boolean touchesEqualsHasTouched = t.touches() == t.hasTouched(); - boolean ownsEqualsTouches = t.owns() == t.touches(); - boolean executesIsNotNullAndNotOwns = t.executes() != null && t.owns() != t.executes(); - long size = 1; - if (hasRoute) size += KeySerializers.route.serializedSize(t.route()); - if (!hasTouchedEqualsRoute) size += KeySerializers.participants.serializedSize(t.hasTouched()); - if (!touchesEqualsHasTouched) size += KeySerializers.participants.serializedSize(t.touches()); - if (!ownsEqualsTouches) size += KeySerializers.participants.serializedSize(t.owns()); - if (executesIsNotNullAndNotOwns) size += KeySerializers.participants.serializedSize(t.executes()); + public long serializedSize(StoreParticipants t) + { + Participants hasTouched = t.hasTouched(); + Route route = t.route(); + Participants owns = t.owns(); + Participants executes = t.executes(); + Participants touches = t.touches(); + boolean hasRoute = route != null; + boolean touchesEqualsHasTouched = touches == hasTouched; + boolean ownsEqualsTouches = owns == touches; + boolean executesIsNull = executes == null; + boolean executesIsOwns = !executesIsNull && executes == owns; + boolean waitsOnIsOwns = !executesIsNull && t.waitsOn() == owns; + boolean encodeSubsets = hasTouched.domain() == Routable.Domain.Key; + Participants superset = !hasRoute ? hasTouched : encodeSubsets ? route.with((Participants)hasTouched) : route; + boolean routeEqualsSuperset = route == superset; + boolean hasTouchedEqualsSuperset = hasTouched == superset; + long size = 1 + KeySerializers.participants.serializedSize(superset); + if (encodeSubsets) + { + if (hasRoute && !routeEqualsSuperset) size += KeySerializers.route.serializedSubsetSize(route, superset); + if (!hasTouchedEqualsSuperset) size += KeySerializers.participants.serializedSubsetSize(hasTouched, superset); + if (!touchesEqualsHasTouched) size += KeySerializers.participants.serializedSubsetSize(touches, superset); + if (!ownsEqualsTouches) size += KeySerializers.participants.serializedSubsetSize(owns, superset); + if (!executesIsNull && !executesIsOwns) size += KeySerializers.participants.serializedSubsetSize(executes, superset); + if (!executesIsNull && !waitsOnIsOwns) size += KeySerializers.participants.serializedSubsetSize(t.waitsOn(), superset); + } + else + { + if (hasRoute && !routeEqualsSuperset) size += KeySerializers.route.serializedSize(route); + if (!hasTouchedEqualsSuperset) size += KeySerializers.participants.serializedSize(hasTouched); + if (!touchesEqualsHasTouched) size += KeySerializers.participants.serializedSize(touches); + if (!ownsEqualsTouches) size += KeySerializers.participants.serializedSize(owns); + if (!executesIsNull && !executesIsOwns) size += KeySerializers.participants.serializedSize(executes); + if (!executesIsNull && !waitsOnIsOwns) size += KeySerializers.participants.serializedSize(t.waitsOn()); + } return size; } } - public static class TimestampSerializer implements UnversionedSerializer + public static class VariableWidthTimestampSerializer implements UnversionedSerializer { + private static final int NODE_SHIFT = 0; + private static final int NODE_MASK = 0x3; + private static final int NODE_MIN_LENGTH = 1; + private static final int FLAGS_SHIFT = NODE_SHIFT + Integer.bitCount(NODE_MASK); + private static final int FLAGS_MASK = 0x1; + private static final int FLAGS_MIN_LENGTH = 1; + private static final int HLC_SHIFT = FLAGS_SHIFT + Integer.bitCount(FLAGS_MASK); + private static final int HLC_MASK = 0x3; + private static final int HLC_MIN_LENGTH = 5; + private static final int EPOCH_SHIFT = HLC_SHIFT + Integer.bitCount(HLC_MASK); + private static final int EPOCH_MASK = 0x3; + private static final int EPOCH_MIN_LENGTH = 3; + static final byte NULL_BYTE = (byte) 0x80; + static + { + Invariants.require(EPOCH_MASK << EPOCH_SHIFT >= 0); + } + interface Factory { - T create(long msb, long lsb, Node.Id node); + T create(long epoch, long hlc, int flags, Node.Id node); } - private final TimestampSerializer.Factory factory; + private final VariableWidthTimestampSerializer.Factory factory; + + T decodeSpecial(int encodingFlags) + { + Invariants.require(encodingFlags == NULL_BYTE); + return null; + } - private TimestampSerializer(TimestampSerializer.Factory factory) + byte encodeSpecial(T value) + { + if (value != null) + return 0; + return NULL_BYTE; + } + + private VariableWidthTimestampSerializer(VariableWidthTimestampSerializer.Factory factory) { this.factory = factory; } @@ -410,121 +518,271 @@ private TimestampSerializer(TimestampSerializer.Factory factory) @Override public void serialize(T ts, DataOutputPlus out) throws IOException { - out.writeLong(ts.msb); - out.writeLong(ts.lsb); - TopologySerializers.nodeId.serialize(ts.node, out); + { + byte specialByte = encodeSpecial(ts); + if (specialByte != 0) + { + Invariants.require(specialByte < 0); + out.writeByte(specialByte); + return; + } + } + long epoch = ts.epoch(); + long hlc = ts.hlc(); + int flags = ts.flags(); + int epochLength = length(epoch, EPOCH_MIN_LENGTH); + int hlcLength = length(hlc, HLC_MIN_LENGTH); + int flagsLength = length(flags, FLAGS_MIN_LENGTH); + int nodeLength = length(ts.node.id, NODE_MIN_LENGTH); + int encodingFlags = encodeLength(epochLength, EPOCH_SHIFT, EPOCH_MIN_LENGTH, EPOCH_MASK) + | encodeLength(hlcLength, HLC_SHIFT, HLC_MIN_LENGTH, HLC_MASK) + | encodeLength(flagsLength, FLAGS_SHIFT, FLAGS_MIN_LENGTH, FLAGS_MASK) + | encodeLength(nodeLength, NODE_SHIFT, NODE_MIN_LENGTH, NODE_MASK); + out.writeByte(encodingFlags); + out.writeLeastSignificantBytes(epoch, epochLength); + out.writeLeastSignificantBytes(hlc, hlcLength); + out.writeLeastSignificantBytes(flags, flagsLength); + out.writeLeastSignificantBytes(ts.node.id, nodeLength); + } + + // exactly the same fundamental format as serialize(), only we interleave the length bits with the values, maintaining ordering + public int serializeComparable(T ts, V dst, ValueAccessor accessor, int offset) + { + int position = offset; + Invariants.require(encodeSpecial(ts) == 0); + long epoch = ts.epoch(); + long hlc = ts.hlc(); + int flags = ts.flags(); + int epochLength = length(epoch, EPOCH_MIN_LENGTH); + int hlcLength = length(hlc, HLC_MIN_LENGTH); + int flagsLength = length(flags, FLAGS_MIN_LENGTH); + int nodeLength = length(ts.node.id, NODE_MIN_LENGTH); + + long pack = packLength(epochLength, epochLength * 8, EPOCH_MIN_LENGTH, EPOCH_MASK); + pack |= epoch; + pack <<= 5; + pack |= packLength(hlcLength, 3, HLC_MIN_LENGTH, HLC_MASK); + pack |= hlc >>> ((hlcLength*8)-3); + accessor.putLeastSignificantBytes(dst, position, pack, epochLength + 1); + position += epochLength + 1; + + hlc <<= 3; + hlc |= packLength(flagsLength, 2, FLAGS_MIN_LENGTH, FLAGS_MASK); + hlc |= flags >>> ((flagsLength * 8) - 2); + accessor.putLeastSignificantBytes(dst, position, hlc, hlcLength); + position += hlcLength; + + pack = (long)flags << (2 + nodeLength * 8); + pack |= packLength(nodeLength, nodeLength * 8, NODE_MIN_LENGTH, NODE_MASK); + pack |= ts.node.id & 0xffffffffL; + accessor.putLeastSignificantBytes(dst, position, pack, flagsLength + nodeLength); + position += flagsLength + nodeLength; + return position - offset; } public int serialize(T ts, V dst, ValueAccessor accessor, int offset) { + { + byte specialByte = encodeSpecial(ts); + if (specialByte != 0) + { + Invariants.require(specialByte < 0); + accessor.putByte(dst, offset, specialByte); + return 1; + } + } + + long epoch = ts.epoch(); + long hlc = ts.hlc(); + int flags = ts.flags(); + int epochLength = length(epoch, EPOCH_MIN_LENGTH); + int hlcLength = length(hlc, HLC_MIN_LENGTH); + int flagsLength = length(flags, FLAGS_MIN_LENGTH); + int nodeLength = length(ts.node.id, NODE_MIN_LENGTH); + int encodingFlags = encodeLength(epochLength, EPOCH_SHIFT, EPOCH_MIN_LENGTH, EPOCH_MASK) + | encodeLength(hlcLength, HLC_SHIFT, HLC_MIN_LENGTH, HLC_MASK) + | encodeLength(flagsLength, FLAGS_SHIFT, FLAGS_MIN_LENGTH, FLAGS_MASK) + | encodeLength(nodeLength, NODE_SHIFT, NODE_MIN_LENGTH, NODE_MASK); + int position = offset; - position += accessor.putLong(dst, position, ts.msb); - position += accessor.putLong(dst, position, ts.lsb); - position += TopologySerializers.nodeId.serialize(ts.node, dst, accessor, position); - int size = position - offset; - Preconditions.checkState(size == serializedSize()); - return size; + position += accessor.putByte(dst, position, (byte)encodingFlags); + position += accessor.putLeastSignificantBytes(dst, position, epoch, epochLength); + position += accessor.putLeastSignificantBytes(dst, position, hlc, hlcLength); + position += accessor.putLeastSignificantBytes(dst, position, flags, flagsLength); + position += accessor.putLeastSignificantBytes(dst, position, ts.node.id, nodeLength); + return position - offset; + } + + public ByteBuffer serialize(T ts) + { + int size = Math.toIntExact(serializedSize(ts)); + ByteBuffer result = ByteBuffer.allocate(size); + serialize(ts, result, ByteBufferAccessor.instance, 0); + return result; } public void serialize(T ts, ByteBuffer out) { - out.putLong(ts.msb); - out.putLong(ts.lsb); - TopologySerializers.nodeId.serialize(ts.node, out); + int position = out.position(); + position += serialize(ts, out, ByteBufferAccessor.instance, 0); + out.position(position); } public void skip(DataInputPlus in) throws IOException { - in.skipBytesFully(serializedSize()); + int encodingFlags = in.readByte(); + if (encodingFlags < 0) + return; + int epochLength = decodeLength(encodingFlags, EPOCH_SHIFT, EPOCH_MIN_LENGTH, EPOCH_MASK); + int hlcLength = decodeLength(encodingFlags, HLC_SHIFT, HLC_MIN_LENGTH, HLC_MASK); + int flagsLength = decodeLength(encodingFlags, FLAGS_SHIFT, FLAGS_MIN_LENGTH, FLAGS_MASK); + int nodeLength = decodeLength(encodingFlags, NODE_SHIFT, NODE_MIN_LENGTH, NODE_MASK); + in.skipBytesFully(epochLength + hlcLength + flagsLength + nodeLength); } @Override public T deserialize(DataInputPlus in) throws IOException { - return factory.create(in.readLong(), - in.readLong(), - TopologySerializers.nodeId.deserialize(in)); + int encodingFlags = in.readByte(); + if (encodingFlags < 0) + return decodeSpecial(encodingFlags); + int epochLength = decodeLength(encodingFlags, EPOCH_SHIFT, EPOCH_MIN_LENGTH, EPOCH_MASK); + int hlcLength = decodeLength(encodingFlags, HLC_SHIFT, HLC_MIN_LENGTH, HLC_MASK); + int flagsLength = decodeLength(encodingFlags, FLAGS_SHIFT, FLAGS_MIN_LENGTH, FLAGS_MASK); + int nodeLength = decodeLength(encodingFlags, NODE_SHIFT, NODE_MIN_LENGTH, NODE_MASK); + long epoch = in.readLeastSignificantBytes(epochLength); + long hlc = in.readLeastSignificantBytes(hlcLength); + int flags = Math.toIntExact(in.readLeastSignificantBytes(flagsLength)); + int nodeId = (int)in.readLeastSignificantBytes(nodeLength); + return factory.create(epoch, hlc, flags, new Node.Id(nodeId)); } public T deserialize(V src, ValueAccessor accessor, int offset) { - long msb = accessor.getLong(src, offset); - offset += TypeSizes.LONG_SIZE; - long lsb = accessor.getLong(src, offset); - offset += TypeSizes.LONG_SIZE; - Node.Id node = TopologySerializers.nodeId.deserialize(src, accessor, offset); - return factory.create(msb, lsb, node); + int encodingFlags = accessor.getByte(src, offset); + if (encodingFlags < 0) + return decodeSpecial(encodingFlags); + ++offset; + int epochLength = decodeLength(encodingFlags, EPOCH_SHIFT, EPOCH_MIN_LENGTH, EPOCH_MASK); + int hlcLength = decodeLength(encodingFlags, HLC_SHIFT, HLC_MIN_LENGTH, HLC_MASK); + int flagsLength = decodeLength(encodingFlags, FLAGS_SHIFT, FLAGS_MIN_LENGTH, FLAGS_MASK); + int nodeLength = decodeLength(encodingFlags, NODE_SHIFT, NODE_MIN_LENGTH, NODE_MASK); + long epoch = accessor.getLeastSignificantBytes(src, offset, epochLength); + offset += epochLength; + long hlc = accessor.getLeastSignificantBytes(src, offset, hlcLength); + offset += hlcLength; + int flags = Math.toIntExact(accessor.getLeastSignificantBytes(src, offset, flagsLength)); + offset += flagsLength; + int nodeId = (int)accessor.getLeastSignificantBytes(src, offset, nodeLength); + return factory.create(epoch, hlc, flags, new Node.Id(nodeId)); } public T deserialize(ByteBuffer buffer, int position) { - long msb = buffer.getLong(position); - position += TypeSizes.LONG_SIZE; - long lsb = buffer.getLong(position); - position += TypeSizes.LONG_SIZE; - Node.Id node = TopologySerializers.nodeId.deserialize(buffer, position); - return factory.create(msb, lsb, node); + return deserialize(buffer, ByteBufferAccessor.instance, position); + } + + public T deserialize(ByteBuffer buffer) + { + return deserialize(buffer, ByteBufferAccessor.instance, 0); + } + + // exactly the same fundamental format as deserialize(), only we interleave the length bits with the values, maintaining ordering + public T deserializeComparable(V src, ValueAccessor accessor, int offset) + { + int b = accessor.getByte(src, offset++); + int epochLength = decodeLength(b, 5, EPOCH_MIN_LENGTH, EPOCH_MASK); + long bits64 = accessor.getLeastSignificantBytes(src, offset, epochLength); + offset += epochLength; + long epoch = (b&0x1fL) << (epochLength*8 - 5); + epoch |= bits64 >>> 5; + + int hlcLength = decodeLength((int)bits64, 3, HLC_MIN_LENGTH, HLC_MASK); + long hlc = (bits64 & 0x7L) << (hlcLength*8 - 3); + bits64 = accessor.getLeastSignificantBytes(src, offset, hlcLength); + offset += hlcLength; + hlc |= bits64 >>> 3; + + int flagsLength = decodeLength((int)bits64, 2, FLAGS_MIN_LENGTH, FLAGS_MASK); + int flags = ((int)bits64 & 0x3) << (flagsLength*8-2); + int bits32 = (int) accessor.getLeastSignificantBytes(src, offset, flagsLength); + offset += flagsLength; + flags |= bits32 >>> 2; + + int nodeLength = decodeLength(bits32, 0, NODE_MIN_LENGTH, NODE_MASK); + int node = (int) accessor.getLeastSignificantBytes(src, offset, nodeLength); + return factory.create(epoch, hlc, flags, new Node.Id(node)); } @Override public long serializedSize(T ts) { - return serializedSize(); + if (encodeSpecial(ts) != 0) + return 1; + int epochLength = length(ts.epoch(), EPOCH_MIN_LENGTH); + int hlcLength = length(ts.hlc(), HLC_MIN_LENGTH); + int flagsLength = length(ts.flags(), FLAGS_MIN_LENGTH); + int nodeLength = length(ts.node.id, NODE_MIN_LENGTH); + return 1 + epochLength + hlcLength + flagsLength + nodeLength; } - public int serializedSize() + private static int length(long value, int minLength) { - return Math.toIntExact(TypeSizes.LONG_SIZE + // ts.msb - TypeSizes.LONG_SIZE + // ts.lsb - TopologySerializers.nodeId.serializedSize(null)); // ts.node + int length = ((64 + 7) - Long.numberOfLeadingZeros(value))/8; + return Math.max(length, minLength); } - } - public static class BallotSerializer implements UnversionedSerializer - { - final TimestampSerializer wrapped = new TimestampSerializer<>(Ballot::fromBits); + private static int length(int value, int minLength) + { + int length = ((32 + 7) - Integer.numberOfLeadingZeros(value))/8; + return Math.max(length, minLength); + } - @Override - public void serialize(Ballot t, DataOutputPlus out) throws IOException + private static int encodeLength(int length, int shift, int minLength, int mask) { - if (t == null || t.equals(Ballot.ZERO) || t.equals(Ballot.MAX)) - { - out.writeByte(t == null ? 1 : t.equals(Ballot.ZERO) ? 2 : 3); - } - else - { - out.writeByte(0); - wrapped.serialize(t, out); - } + int encoded = length - minLength; + Invariants.require(encoded <= mask); + return encoded << shift; } - @Override - public Ballot deserialize(DataInputPlus in) throws IOException + private static long packLength(int length, int shift, int minLength, int mask) { - int flags = in.readByte(); - switch (flags) - { - default: throw new IOException("Corrupted input: expected [0..3], received: " + flags); - case 0: return wrapped.deserialize(in); - case 1: return null; - case 2: return Ballot.ZERO; - case 3: return Ballot.MAX; - } + int encoded = length - minLength; + Invariants.require(encoded <= mask); + return (long)encoded << shift; } - public void skip(DataInputPlus in) throws IOException + private static int decodeLength(int encodingFlags, int shift, int minLength, int mask) { - int flags = in.readByte(); - if (flags == 0) - wrapped.skip(in); + return minLength + ((encodingFlags >>> shift) & mask); + } + } + + public static class BallotSerializer extends VariableWidthTimestampSerializer + { + private static final byte ZERO_BYTE = (byte) 0x81; + private static final byte MAX_BYTE = (byte) 0x82; + private BallotSerializer() + { + super(Ballot::fromValues); } @Override - public long serializedSize(Ballot t) + byte encodeSpecial(Ballot value) { - if (t == null || t.equals(Ballot.ZERO) || t.equals(Ballot.MAX)) - return 1; - return 1 + wrapped.serializedSize(); + if (value == null) return NULL_BYTE; + if (value == Ballot.ZERO) return ZERO_BYTE; + if (value == Ballot.MAX) return MAX_BYTE; + return 0; + } + + @Override + Ballot decodeSpecial(int specialByte) + { + if (specialByte == NULL_BYTE) return null; + if (specialByte == ZERO_BYTE) return Ballot.ZERO; + if (specialByte == MAX_BYTE) return Ballot.MAX; + throw new IllegalArgumentException("Unexpected specialByte: " + specialByte); } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java index 35b189673590..7d45a6006de3 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/FetchSerializers.java @@ -92,7 +92,7 @@ public void serialize(ReadReply reply, DataOutputPlus out) throws IOException FetchResponse response = (FetchResponse) reply; serializeNullable(response.unavailable, out, KeySerializers.ranges); serializeNullable(response.data, out, streamDataSerializer); - CommandSerializers.nullableTimestamp.serialize(response.safeToReadAfter, out); + CommandSerializers.timestamp.serialize(response.safeToReadAfter, out); } @Override @@ -104,7 +104,7 @@ public ReadReply deserialize(DataInputPlus in) throws IOException return new FetchResponse(deserializeNullable(in, KeySerializers.ranges), deserializeNullable(in, streamDataSerializer), - CommandSerializers.nullableTimestamp.deserialize(in)); + CommandSerializers.timestamp.deserialize(in)); } @Override @@ -117,7 +117,7 @@ public long serializedSize(ReadReply reply) return TypeSizes.BYTE_SIZE + serializedNullableSize(response.unavailable, KeySerializers.ranges) + serializedNullableSize(response.data, streamDataSerializer) - + CommandSerializers.nullableTimestamp.serializedSize(response.safeToReadAfter); + + CommandSerializers.timestamp.serializedSize(response.safeToReadAfter); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java index 61f7a02dc9f0..eb2cded874a9 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java @@ -19,6 +19,8 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.util.function.BiFunction; +import java.util.function.IntFunction; import accord.api.RoutingKey; import accord.primitives.AbstractKeys; @@ -26,10 +28,11 @@ import accord.primitives.AbstractUnseekableKeys; import accord.primitives.Range; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.RoutableKey; import accord.primitives.Routables; import accord.primitives.RoutingKeys; -import accord.primitives.Unseekables; +import accord.utils.UnhandledEnum; import net.nicoulaj.compilecommand.annotations.DontInline; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; @@ -69,62 +72,13 @@ public interface IVersionedWithKeysSerializer, T> extends */ long serializedSize(K keys, T t, Version version); - final class NullableWithKeysSerializer, T> implements IVersionedWithKeysSerializer - { - final IVersionedWithKeysSerializer wrapped; - public NullableWithKeysSerializer(IVersionedWithKeysSerializer wrapped) - { - this.wrapped = wrapped; - } - - @Override - public void serialize(T t, DataOutputPlus out, Version version) throws IOException - { - out.writeByte(t == null ? 0 : 1); - if (t != null) wrapped.serialize(t, out, version); - } - - @Override - public T deserialize(DataInputPlus in, Version version) throws IOException - { - if (in.readByte() == 0) return null; - return wrapped.deserialize(in, version); - } - - @Override - public long serializedSize(T t, Version version) - { - return t == null ? 1 : 1 + wrapped.serializedSize(t, version); - } - - @Override - public void serialize(K keys, T t, DataOutputPlus out, Version version) throws IOException - { - out.writeByte(t == null ? 0 : 1); - if (t != null) wrapped.serialize(keys, t, out, version); - } - - @Override - public T deserialize(K keys, DataInputPlus in, Version version) throws IOException - { - if (in.readByte() == 0) return null; - return wrapped.deserialize(keys, in, version); - } - - @Override - public long serializedSize(K keys, T t, Version version) - { - return t == null ? 1 : 1 + wrapped.serializedSize(keys, t, version); - } - } - abstract class AbstractWithKeysSerializer { /** * If both ends have a pre-shared superset of the columns we are serializing, we can send them much * more efficiently. Both ends must provide the identically same set of columns. */ - protected void serializeSubset(Routables serialize, Routables superset, DataOutputPlus out) throws IOException + protected void serializeSubsetInternal(Routables serialize, Routables superset, DataOutputPlus out) throws IOException { /** * We weight this towards small sets, and sets where the majority of items are present, since @@ -148,7 +102,7 @@ else if (supersetCount < 64) { switch (serialize.domain()) { - default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + default: throw UnhandledEnum.unknown(serialize.domain()); case Key: out.writeUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); break; @@ -161,7 +115,7 @@ else if (supersetCount < 64) { switch (serialize.domain()) { - default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + default: throw UnhandledEnum.unknown(serialize.domain()); case Key: serializeLargeSubset((AbstractUnseekableKeys)serialize, serializeCount, (AbstractUnseekableKeys)superset, supersetCount, out); break; @@ -172,7 +126,7 @@ else if (supersetCount < 64) } } - public long serializedSubsetSize(Routables serialize, Routables superset) + public long serializedSubsetSizeInternal(Routables serialize, Routables superset) { int columnCount = serialize.size(); int supersetCount = superset.size(); @@ -184,7 +138,7 @@ else if (supersetCount < 64) { switch (serialize.domain()) { - default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + default: throw UnhandledEnum.unknown(serialize.domain()); case Key: return TypeSizes.sizeofUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); case Range: @@ -195,7 +149,7 @@ else if (supersetCount < 64) { switch (serialize.domain()) { - default: throw new AssertionError("Unhandled domain: " + serialize.domain()); + default: throw UnhandledEnum.unknown(serialize.domain()); case Key: return serializeLargeSubsetSize((AbstractUnseekableKeys)serialize, columnCount, (AbstractUnseekableKeys)superset, supersetCount); case Range: @@ -204,55 +158,6 @@ else if (supersetCount < 64) } } - public Unseekables deserializeSubset(Unseekables superset, DataInputPlus in) throws IOException - { - long encoded = in.readUnsignedVInt(); - int supersetCount = superset.size(); - if (encoded == 0L) - { - return superset; - } - else if (supersetCount >= 64) - { - return deserializeLargeSubset(in, superset, supersetCount, (int) encoded); - } - else - { - encoded ^= -1L >>> (64 - supersetCount); - int deserializeCount = Long.bitCount(encoded); - switch (superset.domain()) - { - default: throw new AssertionError("Unhandled domain: " + superset.domain()); - case Key: - { - AbstractUnseekableKeys keys = (AbstractUnseekableKeys) superset; - RoutingKey[] out = new RoutingKey[deserializeCount]; - int count = 0; - while (encoded != 0) - { - long lowestBit = Long.lowestOneBit(encoded); - out[count++] = keys.get(Long.numberOfTrailingZeros(lowestBit)); - encoded ^= lowestBit; - } - return RoutingKeys.ofSortedUnique(out); - } - case Range: - { - AbstractRanges ranges = (AbstractRanges)superset; - Range[] out = new Range[deserializeCount]; - int count = 0; - while (encoded != 0) - { - long lowestBit = Long.lowestOneBit(encoded); - out[count++] = ranges.get(Long.numberOfTrailingZeros(lowestBit)); - encoded ^= lowestBit; - } - return Ranges.ofSortedAndDeoverlapped(out); - } - } - } - } - // encodes a 1 bit for every *missing* column, on the assumption presence is more common, // and because this is consistent with encoding 0 to represent all present private static long encodeBitmap(AbstractKeys serialize, AbstractKeys superset, int supersetCount) @@ -323,42 +228,110 @@ private void serializeLargeSubset(AbstractRanges serialize, int serializeCount, } } + public Routables deserializeSubsetInternal(Routables superset, DataInputPlus in) throws IOException + { + switch (superset.domain()) + { + default: throw UnhandledEnum.unknown(superset.domain()); + case Key: return deserializeRoutingKeySubset((AbstractUnseekableKeys) superset, in, (ks, s) -> ks == null ? s : RoutingKeys.of(ks)); + case Range: return deserializeRangeSubset((AbstractRanges) superset, in, (rs, s) -> rs == null ? s : Ranges.of(rs)); + } + } + + public void skipSubsetInternal(int supersetCount, DataInputPlus in) throws IOException + { + long encoded = in.readUnsignedVInt(); + if (supersetCount <= 64) + return; + + int deserializeCount = supersetCount - (int)encoded; + int count = 0; + while (count < deserializeCount) + { + count += in.readUnsignedVInt32(); + in.readUnsignedVInt32(); + } + } + + public T deserializeRoutingKeySubset(S superset, DataInputPlus in, BiFunction result) throws IOException + { + long encoded = in.readUnsignedVInt(); + int supersetCount = superset.size(); + if (encoded == 0L) + return result.apply(null, superset); + else if (supersetCount >= 64) + return result.apply(deserializeLargeRoutingKeySubset(in, superset, supersetCount, (int) encoded), superset); + else + return result.apply(deserializeSmallRoutingKeySubset(encoded, superset, supersetCount), superset); + } + + public T deserializeRangeSubset(S superset, DataInputPlus in, BiFunction result) throws IOException + { + long encoded = in.readUnsignedVInt(); + int supersetCount = superset.size(); + if (encoded == 0L) + return result.apply(null, superset); + else if (supersetCount >= 64) + return result.apply(deserializeLargeRangeSubset(in, superset, supersetCount, (int) encoded), superset); + else + return result.apply(deserializeSmallRangeSubsetArray(encoded, superset, supersetCount), superset); + } + + private RoutingKey[] deserializeSmallRoutingKeySubset(long encoded, AbstractUnseekableKeys superset, int supersetCount) + { + return deserializeSmallSubsetArray(encoded, superset, supersetCount, RoutingKey[]::new); + } + + private Range[] deserializeSmallRangeSubsetArray(long encoded, AbstractRanges superset, int supersetCount) + { + return deserializeSmallSubsetArray(encoded, superset, supersetCount, Range[]::new); + } + + private R[] deserializeSmallSubsetArray(long encoded, Routables superset, int supersetCount, IntFunction allocator) + { + encoded ^= -1L >>> (64 - supersetCount); + int deserializeCount = Long.bitCount(encoded); + R[] out = allocator.apply(deserializeCount); + int count = 0; + while (encoded != 0) + { + long lowestBit = Long.lowestOneBit(encoded); + out[count++] = superset.get(Long.numberOfTrailingZeros(lowestBit)); + encoded ^= lowestBit; + } + return out; + } + @DontInline - private Unseekables deserializeLargeSubset(DataInputPlus in, Unseekables superset, int supersetCount, int delta) throws IOException + private RoutingKey[] deserializeLargeRoutingKeySubset(DataInputPlus in, AbstractUnseekableKeys superset, int supersetCount, int delta) throws IOException { int deserializeCount = supersetCount - delta; - switch (superset.domain()) + RoutingKey[] out = new RoutingKey[deserializeCount]; + int supersetIndex = 0; + int count = 0; + while (count < deserializeCount) { - default: throw new AssertionError("Unhandled domain: " + superset.domain()); - case Key: - { - AbstractUnseekableKeys keys = (AbstractUnseekableKeys) superset; - RoutingKey[] out = new RoutingKey[deserializeCount]; - int supersetIndex = 0; - int count = 0; - while (count < deserializeCount) - { - int takeCount = in.readUnsignedVInt32(); - while (takeCount-- > 0) out[count++] = keys.get(supersetIndex++); - supersetIndex += in.readUnsignedVInt32(); - } - return RoutingKeys.ofSortedUnique(out); - } - case Range: - { - AbstractRanges ranges = (AbstractRanges)superset; - Range[] out = new Range[deserializeCount]; - int supersetIndex = 0; - int count = 0; - while (count < deserializeCount) - { - int takeCount = in.readUnsignedVInt32(); - while (takeCount-- > 0) out[count++] = ranges.get(supersetIndex++); - supersetIndex += in.readUnsignedVInt32(); - } - return Ranges.ofSortedAndDeoverlapped(out); - } + int takeCount = in.readUnsignedVInt32(); + while (takeCount-- > 0) out[count++] = superset.get(supersetIndex++); + supersetIndex += in.readUnsignedVInt32(); + } + return out; + } + + @DontInline + private Range[] deserializeLargeRangeSubset(DataInputPlus in, AbstractRanges superset, int supersetCount, int delta) throws IOException + { + int deserializeCount = supersetCount - delta; + Range[] out = new Range[deserializeCount]; + int supersetIndex = 0; + int count = 0; + while (count < deserializeCount) + { + int takeCount = in.readUnsignedVInt32(); + while (takeCount-- > 0) out[count++] = superset.get(supersetIndex++); + supersetIndex += in.readUnsignedVInt32(); } + return out; } @DontInline diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java index c800974e882d..a7d181e70558 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java @@ -38,7 +38,7 @@ public void serializeBody(InformDurable msg, DataOutputPlus out, Version version { out.writeVInt(msg.minEpoch - msg.waitForEpoch); out.writeVInt(msg.maxEpoch - msg.waitForEpoch); - CommandSerializers.nullableTimestamp.serialize(msg.executeAt, out); + CommandSerializers.timestamp.serialize(msg.executeAt, out); CommandSerializers.durability.serialize(msg.durability, out); } @@ -47,7 +47,7 @@ public InformDurable deserializeBody(DataInputPlus in, Version version, TxnId tx { long minEpoch = waitForEpoch + in.readVInt(); long maxEpoch = waitForEpoch + in.readVInt(); - Timestamp executeAt = CommandSerializers.nullableTimestamp.deserialize(in); + Timestamp executeAt = CommandSerializers.timestamp.deserialize(in); Status.Durability durability = CommandSerializers.durability.deserialize(in); return InformDurable.SerializationSupport.create(txnId, scope, executeAt, minEpoch, waitForEpoch, maxEpoch, durability); } @@ -57,7 +57,7 @@ public long serializedBodySize(InformDurable msg, Version version) { return TypeSizes.sizeofVInt(msg.minEpoch - msg.waitForEpoch) + TypeSizes.sizeofVInt(msg.maxEpoch - msg.waitForEpoch) - + CommandSerializers.nullableTimestamp.serializedSize(msg.executeAt) + + CommandSerializers.timestamp.serializedSize(msg.executeAt) + CommandSerializers.durability.serializedSize(msg.durability); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index b8c3ceefd281..0c96f24207f5 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -19,11 +19,7 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.EnumSet; -import java.util.Map; import java.util.Objects; -import java.util.TreeMap; import java.util.function.Function; import java.util.function.IntFunction; @@ -33,15 +29,18 @@ import accord.api.RoutingKey; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; +import accord.primitives.AbstractUnseekableKeys; import accord.primitives.FullKeyRoute; import accord.primitives.FullRangeRoute; import accord.primitives.FullRoute; +import accord.primitives.KeyRoute; import accord.primitives.Keys; import accord.primitives.PartialKeyRoute; import accord.primitives.PartialRangeRoute; import accord.primitives.PartialRoute; import accord.primitives.Participants; import accord.primitives.Range; +import accord.primitives.RangeRoute; import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; @@ -53,6 +52,8 @@ import accord.primitives.Unseekables; import accord.primitives.Unseekables.UnseekablesKind; import accord.utils.Invariants; +import accord.utils.TinyEnumSet; +import accord.utils.UnhandledEnum; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -69,14 +70,14 @@ public class KeySerializers { public static final AccordKeySerializer key; - public static final UnversionedSerializer routingKey; + public static final AccordSearchableKeySerializer routingKey; public static final UnversionedSerializer nullableRoutingKey; - public static final AbstractSearchableKeysSerializer routingKeys; + public static final AbstractSearchableRoutingKeysSerializer routingKeys; public static final UnversionedSerializer keys; - public static final AbstractSearchableKeysSerializer partialKeyRoute; - public static final AbstractSearchableKeysSerializer fullKeyRoute; + public static final AbstractSearchableRoutingKeysSerializer partialKeyRoute; + public static final AbstractSearchableRoutingKeysSerializer fullKeyRoute; public static final UnversionedSerializer range; public static final AbstractRangesSerializer ranges; @@ -130,11 +131,11 @@ public static class Impl final AccordSearchableKeySerializer routingKey; final UnversionedSerializer nullableRoutingKey; - final AbstractSearchableKeysSerializer routingKeys; + final AbstractSearchableRoutingKeysSerializer routingKeys; final UnversionedSerializer keys; - final AbstractSearchableKeysSerializer partialKeyRoute; - final AbstractSearchableKeysSerializer fullKeyRoute; + final AbstractSearchableRoutingKeysSerializer partialKeyRoute; + final AbstractSearchableRoutingKeysSerializer fullKeyRoute; final UnversionedSerializer range; final AbstractRangesSerializer ranges; @@ -168,7 +169,7 @@ public Impl(AccordKeySerializer key, this.range = range; this.nullableRoutingKey = NullableSerializer.wrap(routingKey); - this.routingKeys = new AbstractSearchableKeysSerializer<>(routingKey, RoutingKey[]::new) + this.routingKeys = new AbstractSearchableRoutingKeysSerializer<>(routingKey) { @Override RoutingKeys deserialize(DataInputPlus in, RoutingKey[] keys) { @@ -184,53 +185,25 @@ public Impl(AccordKeySerializer key, } }; - this.partialKeyRoute = new AbstractSearchableKeysSerializer<>(routingKey, RoutingKey[]::new) + this.partialKeyRoute = new AbstractKeyRouteSerializer<>(routingKey) { - @Override PartialKeyRoute deserialize(DataInputPlus in, RoutingKey[] keys) throws IOException - { - RoutingKey homeKey = routingKey.deserialize(in); - return PartialKeyRoute.SerializationSupport.create(homeKey, keys); - } - - @Override - public void serialize(PartialKeyRoute route, DataOutputPlus out) throws IOException - { - super.serialize(route, out); - routingKey.serialize(route.homeKey, out); - } - @Override - public long serializedSize(PartialKeyRoute routables) + PartialKeyRoute construct(RoutingKey homeKey, RoutingKey[] keys) { - return super.serializedSize(routables) - + routingKey.serializedSize(routables.homeKey); + return PartialKeyRoute.SerializationSupport.create(homeKey, keys); } }; - this.fullKeyRoute = new AbstractSearchableKeysSerializer<>(routingKey, RoutingKey[]::new) + this.fullKeyRoute = new AbstractKeyRouteSerializer<>(routingKey) { - @Override FullKeyRoute deserialize(DataInputPlus in, RoutingKey[] keys) throws IOException - { - RoutingKey homeKey = routingKey.deserialize(in); - return FullKeyRoute.SerializationSupport.create(homeKey, keys); - } - @Override - public void serialize(FullKeyRoute route, DataOutputPlus out) throws IOException + FullKeyRoute construct(RoutingKey homeKey, RoutingKey[] keys) { - super.serialize(route, out); - routingKey.serialize(route.homeKey, out); - } - - @Override - public long serializedSize(FullKeyRoute routables) - { - return super.serializedSize(routables) - + routingKey.serializedSize(routables.homeKey); + return FullKeyRoute.SerializationSupport.create(homeKey, keys); } }; - this.ranges = new AbstractRangesSerializer<>(routingKey) + this.ranges = new AbstractRangesSerializer<>() { @Override public Ranges deserialize(DataInputPlus in, Range[] ranges) @@ -239,64 +212,35 @@ public Ranges deserialize(DataInputPlus in, Range[] ranges) } }; - - this.partialRangeRoute = new AbstractRangesSerializer<>(routingKey) + this.partialRangeRoute = new AbstractRangeRouteSerializer<>() { - @Override PartialRangeRoute deserialize(DataInputPlus in, Range[] rs) throws IOException - { - RoutingKey homeKey = routingKey.deserialize(in); - return PartialRangeRoute.SerializationSupport.create(homeKey, rs); - } - - @Override - public void serialize(PartialRangeRoute route, DataOutputPlus out) throws IOException - { - super.serialize(route, out); - routingKey.serialize(route.homeKey, out); - } - @Override - public long serializedSize(PartialRangeRoute rs) + PartialRangeRoute construct(RoutingKey homeKey, Range[] rs) { - return super.serializedSize(rs) - + routingKey.serializedSize(rs.homeKey); + return PartialRangeRoute.SerializationSupport.create(homeKey, rs); } }; - this.fullRangeRoute = new AbstractRangesSerializer<>(routingKey) + this.fullRangeRoute = new AbstractRangeRouteSerializer<>() { - @Override FullRangeRoute deserialize(DataInputPlus in, Range[] Ranges) throws IOException - { - RoutingKey homeKey = routingKey.deserialize(in); - return FullRangeRoute.SerializationSupport.create(homeKey, Ranges); - } - - @Override - public void serialize(FullRangeRoute route, DataOutputPlus out) throws IOException - { - super.serialize(route, out); - routingKey.serialize(route.homeKey, out); - } - @Override - public long serializedSize(FullRangeRoute ranges) + FullRangeRoute construct(RoutingKey homeKey, Range[] Ranges) { - return super.serializedSize(ranges) - + routingKey.serializedSize(ranges.homeKey()); + return FullRangeRoute.SerializationSupport.create(homeKey, Ranges); } }; - Function, AbstractRoutablesSerializer> factory = (a) -> new AbstractRoutablesSerializer<>(a, routingKeys, partialKeyRoute, fullKeyRoute, ranges, partialRangeRoute, fullRangeRoute); + Function, AbstractRoutablesSerializer> factory = (a) -> new AbstractRoutablesSerializer<>(a, routingKeys, partialKeyRoute, fullKeyRoute, ranges, partialRangeRoute, fullRangeRoute); - this.route = (AbstractRoutablesSerializer>) factory.apply(EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.FullKeyRoute, UnseekablesKind.PartialRangeRoute, UnseekablesKind.FullRangeRoute)); + this.route = (AbstractRoutablesSerializer>) factory.apply(TinyEnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.FullKeyRoute, UnseekablesKind.PartialRangeRoute, UnseekablesKind.FullRangeRoute)); this.nullableRoute = NullableSerializer.wrap(route); - this.partialRoute = (AbstractRoutablesSerializer>) factory.apply(EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.PartialRangeRoute)); - this.fullRoute = (AbstractRoutablesSerializer>) factory.apply(EnumSet.of(UnseekablesKind.FullKeyRoute, UnseekablesKind.FullRangeRoute)); + this.partialRoute = (AbstractRoutablesSerializer>) factory.apply(TinyEnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.PartialRangeRoute)); + this.fullRoute = (AbstractRoutablesSerializer>) factory.apply(TinyEnumSet.of(UnseekablesKind.FullKeyRoute, UnseekablesKind.FullRangeRoute)); this.nullableFullRoute = NullableSerializer.wrap(fullRoute); - this.unseekables = (AbstractRoutablesSerializer>) factory.apply(EnumSet.allOf(UnseekablesKind.class)); - this.participants = (AbstractRoutablesSerializer>) factory.apply(EnumSet.allOf(UnseekablesKind.class)); + this.unseekables = (AbstractRoutablesSerializer>) factory.apply(TinyEnumSet.allOf(UnseekablesKind.class)); + this.participants = (AbstractRoutablesSerializer>) factory.apply(TinyEnumSet.allOf(UnseekablesKind.class)); this.nullableParticipants = NullableSerializer.wrap(participants); this.seekables = new AbstractSeekablesSerializer(keys, ranges); @@ -305,18 +249,18 @@ public long serializedSize(FullRangeRoute ranges) public static class AbstractRoutablesSerializer> implements UnversionedSerializer { - final EnumSet permitted; - final AbstractSearchableKeysSerializer routingKeys; - final AbstractSearchableKeysSerializer partialKeyRoute; - final AbstractSearchableKeysSerializer fullKeyRoute; + final TinyEnumSet permitted; + final AbstractSearchableRoutingKeysSerializer routingKeys; + final AbstractSearchableRoutingKeysSerializer partialKeyRoute; + final AbstractSearchableRoutingKeysSerializer fullKeyRoute; final AbstractRangesSerializer ranges; final AbstractRangesSerializer partialRangeRoute; final AbstractRangesSerializer fullRangeRoute; - protected AbstractRoutablesSerializer(EnumSet permitted, - AbstractSearchableKeysSerializer routingKeys, - AbstractSearchableKeysSerializer partialKeyRoute, - AbstractSearchableKeysSerializer fullKeyRoute, + protected AbstractRoutablesSerializer(TinyEnumSet permitted, + AbstractSearchableRoutingKeysSerializer routingKeys, + AbstractSearchableRoutingKeysSerializer partialKeyRoute, + AbstractSearchableRoutingKeysSerializer fullKeyRoute, AbstractRangesSerializer ranges, AbstractRangesSerializer partialRangeRoute, AbstractRangesSerializer fullRangeRoute) @@ -334,8 +278,7 @@ protected AbstractRoutablesSerializer(EnumSet permitted, public void serialize(RS t, DataOutputPlus out) throws IOException { UnseekablesKind kind = t.kind(); - if (!permitted.contains(kind)) - throw new IllegalArgumentException(); + Invariants.requireArgument(permitted.contains(kind)); switch (kind) { @@ -367,6 +310,41 @@ public void serialize(RS t, DataOutputPlus out) throws IOException } } + public void serializeSubset(RS t, Unseekables superset, DataOutputPlus out) throws IOException + { + UnseekablesKind kind = t.kind(); + Invariants.requireArgument(permitted.contains(kind)); + + switch (kind) + { + default: throw new AssertionError(); + case RoutingKeys: + out.writeByte(1); + routingKeys.serializeSubset((RoutingKeys)t, superset, out); + break; + case PartialKeyRoute: + out.writeByte(2); + partialKeyRoute.serializeSubset((PartialKeyRoute)t, superset, out); + break; + case FullKeyRoute: + out.writeByte(3); + fullKeyRoute.serializeSubset((FullKeyRoute)t, superset, out); + break; + case RoutingRanges: + out.writeByte(4); + ranges.serializeSubset((Ranges)t, superset, out); + break; + case PartialRangeRoute: + out.writeByte(5); + partialRangeRoute.serializeSubset((PartialRangeRoute)t, superset, out); + break; + case FullRangeRoute: + out.writeByte(6); + fullRangeRoute.serializeSubset((FullRangeRoute)t, superset, out); + break; + } + } + @Override public RS deserialize(DataInputPlus in) throws IOException { @@ -375,7 +353,7 @@ public RS deserialize(DataInputPlus in) throws IOException RS result; switch (b) { - default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4, 5 or 6; received " + b); case 1: kind = UnseekablesKind.RoutingKeys; result = (RS)routingKeys.deserialize(in); break; case 2: kind = UnseekablesKind.PartialKeyRoute; result = (RS)partialKeyRoute.deserialize(in); break; case 3: kind = UnseekablesKind.FullKeyRoute; result = (RS)fullKeyRoute.deserialize(in); break; @@ -387,18 +365,92 @@ public RS deserialize(DataInputPlus in) throws IOException return result; } + public RS deserializeSubset(Unseekables superset, DataInputPlus in) throws IOException + { + byte b = in.readByte(); + UnseekablesKind kind; + RS result; + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + case 1: kind = UnseekablesKind.RoutingKeys; result = (RS)routingKeys.deserializeSubset((AbstractUnseekableKeys) superset, in); break; + case 2: kind = UnseekablesKind.PartialKeyRoute; result = (RS)partialKeyRoute.deserializeSubset((AbstractUnseekableKeys) superset, in); break; + case 3: kind = UnseekablesKind.FullKeyRoute; result = (RS)fullKeyRoute.deserializeSubset((AbstractUnseekableKeys) superset, in); break; + case 4: kind = UnseekablesKind.RoutingRanges; result = (RS)ranges.deserializeSubset((AbstractRanges) superset, in); break; + case 5: kind = UnseekablesKind.PartialRangeRoute; result = (RS)partialRangeRoute.deserializeSubset((AbstractRanges) superset, in); break; + case 6: kind = UnseekablesKind.FullRangeRoute; result = (RS)fullRangeRoute.deserializeSubset((AbstractRanges) superset, in); break; + } + Invariants.require(permitted.contains(kind)); + return result; + } + public void skip(DataInputPlus in) throws IOException + { + countAndSkip(in); + } + + public void skip(UnseekablesKind kind, DataInputPlus in) throws IOException + { + countAndSkip(kind, in); + } + + // return number of elements skipped + public int countAndSkip(DataInputPlus in) throws IOException + { + byte b = in.readByte(); + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + case 1: return routingKeys.countAndSkip(in); + case 2: return partialKeyRoute.countAndSkip(in); + case 3: return fullKeyRoute.countAndSkip(in); + case 4: return ranges.countAndSkip(in); + case 5: return partialRangeRoute.countAndSkip(in); + case 6: return fullRangeRoute.countAndSkip(in); + } + } + + public int countAndSkip(UnseekablesKind kind, DataInputPlus in) throws IOException + { + switch (kind) + { + default: throw UnhandledEnum.unknown(kind); + case RoutingKeys: return routingKeys.countAndSkip(in); + case PartialKeyRoute: return partialKeyRoute.countAndSkip(in); + case FullKeyRoute: return fullKeyRoute.countAndSkip(in); + case RoutingRanges: return ranges.countAndSkip(in); + case PartialRangeRoute: return partialRangeRoute.countAndSkip(in); + case FullRangeRoute: return fullRangeRoute.countAndSkip(in); + } + } + + public Unseekables.UnseekablesKind readKind(DataInputPlus in) throws IOException { byte b = in.readByte(); switch (b) { default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); - case 1: routingKeys.skip(in); break; - case 2: partialKeyRoute.skip(in); break; - case 3: fullKeyRoute.skip(in); break; - case 4: ranges.skip(in); break; - case 5: partialRangeRoute.skip(in); break; - case 6: fullRangeRoute.skip(in); break; + case 1: return UnseekablesKind.RoutingKeys; + case 2: return UnseekablesKind.PartialKeyRoute; + case 3: return UnseekablesKind.FullKeyRoute; + case 4: return UnseekablesKind.RoutingRanges; + case 5: return UnseekablesKind.PartialRangeRoute; + case 6: return UnseekablesKind.FullRangeRoute; + } + } + + public void skipSubset(int supersetCount, DataInputPlus in) throws IOException + { + byte b = in.readByte(); + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + case 1: routingKeys.skipSubset(supersetCount, in); break; + case 2: partialKeyRoute.skipSubset(supersetCount, in); break; + case 3: fullKeyRoute.skipSubset(supersetCount, in); break; + case 4: ranges.skipSubset(supersetCount, in); break; + case 5: partialRangeRoute.skipSubset(supersetCount, in); break; + case 6: fullRangeRoute.skipSubset(supersetCount, in); break; } } @@ -422,6 +474,26 @@ public long serializedSize(RS t) return 1 + fullRangeRoute.serializedSize((FullRangeRoute)t); } } + + public long serializedSubsetSize(RS t, Unseekables superset) + { + switch (t.kind()) + { + default: throw new AssertionError(); + case RoutingKeys: + return 1 + routingKeys.serializedSubsetSize((RoutingKeys)t, superset); + case PartialKeyRoute: + return 1 + partialKeyRoute.serializedSubsetSize((PartialKeyRoute)t, superset); + case FullKeyRoute: + return 1 + fullKeyRoute.serializedSubsetSize((FullKeyRoute)t, superset); + case RoutingRanges: + return 1 + ranges.serializedSubsetSize((Ranges)t, superset); + case PartialRangeRoute: + return 1 + partialRangeRoute.serializedSubsetSize((PartialRangeRoute)t, superset); + case FullRangeRoute: + return 1 + fullRangeRoute.serializedSubsetSize((FullRangeRoute)t, superset); + } + } } public static final UnversionedSerializer seekable = new UnversionedSerializer<>() @@ -574,30 +646,28 @@ public long serializedSize(KS keys) // this serializer is designed to permits using the collection in its serialized form with minimal in-memory state. // it also saves some memory by avoiding duplicating prefixes (which happens to also assist faster lookups) - public abstract static class AbstractSearchableSerializer> implements UnversionedSerializer + public abstract static class AbstractSearchableSerializer> extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements UnversionedSerializer { - final AccordSearchableKeySerializer keySerializer; final IntFunction allocate; - public AbstractSearchableSerializer(AccordSearchableKeySerializer keySerializer, IntFunction allocate) + public AbstractSearchableSerializer(IntFunction allocate) { - this.keySerializer = keySerializer; this.allocate = allocate; } private int serializedSizeOfPrefix(Object prefix) { - return keySerializer.serializedSizeOfPrefix(prefix); + return routingKey.serializedSizeOfPrefix(prefix); } private void serializePrefix(Object prefix, DataOutputPlus out) throws IOException { - keySerializer.serializePrefix(prefix, out); + routingKey.serializePrefix(prefix, out); } private Object deserializePrefix(DataInputPlus in) throws IOException { - return keySerializer.deserializePrefix(in); + return routingKey.deserializePrefix(in); } // if we store Ranges, we have twice as many indexes @@ -651,6 +721,11 @@ public long serializedSize(RS routables) return size; } + public long serializedSubsetSize(RS keys, Routables superset) + { + return serializedSubsetSizeInternal(keys, superset); + } + @Override public void serialize(RS keys, DataOutputPlus out) throws IOException { @@ -696,12 +771,24 @@ private void serializeKeysWithoutPrefix(RS keys, int start, int end, DataOutputP serializeWithoutPrefixOrLength(keys.get(i), out); } + public void serializeSubset(RS keys, Routables superset, DataOutputPlus out) throws IOException + { + serializeSubsetInternal(keys, superset, out); + } + public void skip(DataInputPlus in) throws IOException + { + countAndSkip(in); + } + + // return number of elements skipped + public int countAndSkip(DataInputPlus in) throws IOException { int remaining = in.readUnsignedVInt32(); if (remaining == 0) - return; + return 0; + int total = 0; while (remaining > 0) { int count = remaining - in.readUnsignedVInt32(); @@ -718,7 +805,14 @@ public void skip(DataInputPlus in) throws IOException int end = in.readInt(); in.skipBytesFully(end); } + total += count; } + return total; + } + + public void skipSubset(int supersetCount, DataInputPlus in) throws IOException + { + skipSubsetInternal(supersetCount, in); } @Override @@ -769,17 +863,17 @@ public RS deserialize(DataInputPlus in) throws IOException // this serializer is designed to permits using the collection in its serialized form with minimal in-memory state. // it also saves some memory by avoiding duplicating prefixes (which happens to also assist faster lookups) - public abstract static class AbstractSearchableKeysSerializer> extends AbstractSearchableSerializer implements UnversionedSerializer + public abstract static class AbstractSearchableRoutingKeysSerializer extends AbstractSearchableSerializer implements UnversionedSerializer { - public AbstractSearchableKeysSerializer(AccordSearchableKeySerializer keySerializer, IntFunction allocate) + public AbstractSearchableRoutingKeysSerializer(AccordSearchableKeySerializer serializer) { - super(keySerializer, allocate); + super(RoutingKey[]::new); } @Override final int fixedKeyLengthForPrefix(Object prefix) { - return keySerializer.fixedKeyLengthForPrefix(prefix); + return routingKey.fixedKeyLengthForPrefix(prefix); } @Override @@ -789,15 +883,15 @@ final int recordCountToLengthCount(int recordCount) } @Override - final int serializedSizeWithoutPrefix(K routable) + final int serializedSizeWithoutPrefix(RoutingKey routable) { - return keySerializer.serializedSizeWithoutPrefix(routable); + return routingKey.serializedSizeWithoutPrefix(routable); } @Override - final void serializeWithoutPrefixOrLength(K routable, DataOutputPlus out) throws IOException + final void serializeWithoutPrefixOrLength(RoutingKey routable, DataOutputPlus out) throws IOException { - keySerializer.serializeWithoutPrefixOrLength(routable, out); + routingKey.serializeWithoutPrefixOrLength(routable, out); } @Override @@ -812,29 +906,117 @@ final void serializeOffsets(KS keys, int startIndex, int endIndex, DataOutputPlu } @Override - final K deserializeWithPrefix(Object prefix, int length, DataInputPlus in) throws IOException + final RoutingKey deserializeWithPrefix(Object prefix, int length, DataInputPlus in) throws IOException + { + return routingKey.deserializeWithPrefix(prefix, length, in); + } + + @Override + final RoutingKey deserializeWithPrefix(Object prefix, int lengthIndex, int[] lengths, DataInputPlus in) throws IOException + { + return routingKey.deserializeWithPrefix(prefix, lengths[lengthIndex], in); + } + + public KS deserializeSubset(AbstractUnseekableKeys superset, DataInputPlus in) throws IOException + { + RoutingKey[] keys = deserializeRoutingKeySubset(superset, in, (ks, s) -> ks == null ? s.unsafeKeys() : ks); + return deserialize(in, keys); + } + } + + public abstract static class AbstractKeyRouteSerializer extends AbstractSearchableRoutingKeysSerializer + { + public AbstractKeyRouteSerializer(AccordSearchableKeySerializer serializer) + { + super(serializer); + } + + abstract KS construct(RoutingKey homeKey, RoutingKey[] keys); + + @Override + KS deserialize(DataInputPlus in, RoutingKey[] keys) throws IOException + { + int i = in.readUnsignedVInt32(); + RoutingKey homeKey = i == 0 ? routingKey.deserialize(in) : keys[i - 1]; + return construct(homeKey, keys); + } + + @Override + public int countAndSkip(DataInputPlus in) throws IOException + { + int count = super.countAndSkip(in); + completeSkip(in); + return count; + } + + @Override + public void skipSubset(int supersetCount, DataInputPlus in) throws IOException + { + skipSubsetInternal(supersetCount, in); + completeSkip(in); + } + + @Override + public void serialize(KS route, DataOutputPlus out) throws IOException + { + super.serialize(route, out); + completeSerialize(route, out); + } + + @Override + public void serializeSubset(KS route, Routables superset, DataOutputPlus out) throws IOException + { + super.serializeSubset(route, superset, out); + completeSerialize(route, out); + } + + @Override + public long serializedSize(KS route) { - return keySerializer.deserializeWithPrefix(prefix, length, in); + return super.serializedSize(route) + + completeSerializedSize(route); } @Override - final K deserializeWithPrefix(Object prefix, int lengthIndex, int[] lengths, DataInputPlus in) throws IOException + public long serializedSubsetSize(KS route, Routables superset) { - return keySerializer.deserializeWithPrefix(prefix, lengths[lengthIndex], in); + return super.serializedSubsetSize(route, superset) + + completeSerializedSize(route); + } + + private void completeSerialize(KS route, DataOutputPlus out) throws IOException + { + int i = route.indexOf(route.homeKey()); + out.writeUnsignedVInt32(Math.max(0, 1 + i)); + if (i < 0) routingKey.serialize(route.homeKey, out); + } + + private void completeSkip(DataInputPlus in) throws IOException + { + int i = in.readUnsignedVInt32(); + if (i == 0) routingKey.skip(in); + } + + private long completeSerializedSize(KS route) + { + int i = route.indexOf(route.homeKey()); + long size = TypeSizes.sizeofUnsignedVInt(Math.max(0, 1 + i)); + if (i < 0) size += routingKey.serializedSize(route.homeKey); + return size; } } public abstract static class AbstractRangesSerializer extends AbstractSearchableSerializer implements UnversionedSerializer { - public AbstractRangesSerializer(AccordSearchableKeySerializer keySerializer) + public AbstractRangesSerializer() { - super(keySerializer, Range[]::new); + super(Range[]::new); } @Override int fixedKeyLengthForPrefix(Object prefix) { - return keySerializer.fixedKeyLengthForPrefix(prefix) * 2; + return routingKey.fixedKeyLengthForPrefix(prefix) * 2; } @Override @@ -846,15 +1028,15 @@ int recordCountToLengthCount(int recordCount) @Override final int serializedSizeWithoutPrefix(Range range) { - return keySerializer.serializedSizeWithoutPrefix(range.start()) - + keySerializer.serializedSizeWithoutPrefix(range.end()); + return routingKey.serializedSizeWithoutPrefix(range.start()) + + routingKey.serializedSizeWithoutPrefix(range.end()); } @Override final void serializeWithoutPrefixOrLength(Range key, DataOutputPlus out) throws IOException { - keySerializer.serializeWithoutPrefixOrLength(key.start(), out); - keySerializer.serializeWithoutPrefixOrLength(key.end(), out); + routingKey.serializeWithoutPrefixOrLength(key.start(), out); + routingKey.serializeWithoutPrefixOrLength(key.end(), out); } @Override @@ -864,9 +1046,9 @@ final void serializeOffsets(RS ranges, int startIndex, int endIndex, DataOutputP for (int i = startIndex; i < endIndex; ++i) { Range r = ranges.get(i); - endOffset += keySerializer.serializedSizeWithoutPrefix(r.start()); + endOffset += routingKey.serializedSizeWithoutPrefix(r.start()); out.writeInt(endOffset); - endOffset += keySerializer.serializedSizeWithoutPrefix(r.end()); + endOffset += routingKey.serializedSizeWithoutPrefix(r.end()); out.writeInt(endOffset); } } @@ -874,40 +1056,84 @@ final void serializeOffsets(RS ranges, int startIndex, int endIndex, DataOutputP @Override final Range deserializeWithPrefix(Object prefix, int length, DataInputPlus in) throws IOException { - RoutingKey start = keySerializer.deserializeWithPrefix(prefix, length/2, in); - RoutingKey end = keySerializer.deserializeWithPrefix(prefix, length/2, in); + RoutingKey start = routingKey.deserializeWithPrefix(prefix, length/2, in); + RoutingKey end = routingKey.deserializeWithPrefix(prefix, length/2, in); return start.rangeFactory().newRange(start, end); } @Override final Range deserializeWithPrefix(Object prefix, int lengthIndex, int[] lengths, DataInputPlus in) throws IOException { - RoutingKey start = keySerializer.deserializeWithPrefix(prefix, lengths[lengthIndex * 2], in); - RoutingKey end = keySerializer.deserializeWithPrefix(prefix, lengths[lengthIndex * 2 + 1], in); + RoutingKey start = routingKey.deserializeWithPrefix(prefix, lengths[lengthIndex * 2], in); + RoutingKey end = routingKey.deserializeWithPrefix(prefix, lengths[lengthIndex * 2 + 1], in); return start.rangeFactory().newRange(start, end); } - } - public static Map rangesToBlobMap(Ranges ranges) - { - TreeMap result = new TreeMap<>(); - for (Range range : ranges) + public RS deserializeSubset(AbstractRanges superset, DataInputPlus in) throws IOException { - result.put(TokenKey.serializer.serialize((TokenKey) range.start()), - TokenKey.serializer.serialize((TokenKey) range.end())); + Range[] ranges = deserializeRangeSubset(superset, in, (rs, s) -> rs == null ? s.unsafeRanges() : rs); + return deserialize(in, ranges); } - return result; } - public static Ranges blobMapToRanges(Map blobMap) + public abstract static class AbstractRangeRouteSerializer extends AbstractRangesSerializer { - int i = 0; - Range[] ranges = new Range[blobMap.size()]; - for (Map.Entry e : blobMap.entrySet()) + public AbstractRangeRouteSerializer() + { + super(); + } + + abstract RS construct(RoutingKey homeKey, Range[] ranges); + + @Override + RS deserialize(DataInputPlus in, Range[] ranges) throws IOException { - ranges[i++] = TokenRange.create(TokenKey.serializer.deserialize(e.getKey()), - TokenKey.serializer.deserialize(e.getValue())); + RoutingKey homeKey = routingKey.deserialize(in); + return construct(homeKey, ranges); + } + + @Override + public int countAndSkip(DataInputPlus in) throws IOException + { + int count = super.countAndSkip(in); + routingKey.skip(in); + return count; + } + + @Override + public void skipSubset(int supersetCount, DataInputPlus in) throws IOException + { + super.skipSubset(supersetCount, in); + routingKey.skip(in); + } + + @Override + public void serialize(RS route, DataOutputPlus out) throws IOException + { + super.serialize(route, out); + routingKey.serialize(route.homeKey, out); + } + + @Override + public void serializeSubset(RS route, Routables superset, DataOutputPlus out) throws IOException + { + super.serializeSubset(route, superset, out); + routingKey.serialize(route.homeKey, out); + } + + @Override + public long serializedSize(RS route) + { + return super.serializedSize(route) + + routingKey.serializedSize(route.homeKey); + } + + @Override + public long serializedSubsetSize(RS route, Routables superset) + { + return super.serializedSubsetSize(route, superset) + routingKey.serializedSize(route.homeKey); } - return Ranges.of(ranges); } + + } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java index 0101efa04a1d..0e3413905fb1 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java @@ -26,7 +26,7 @@ public class ResultSerializers { - // TODO (expected): this is meant to encode e.g. whether the transaction's condition met or not + // TODO (expected): this is meant to encode e.g. whether the transaction's condition met or not for clients to later query public static final Result APPLIED = new Result() { @Override diff --git a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java index 9a0e2c5cb76e..a444f4147bc3 100644 --- a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java +++ b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java @@ -308,6 +308,17 @@ public static long getUnsignedVInt(V input, ValueAccessor accessor, int r return retval; } + public static int readLengthOfVInt(V input, ValueAccessor accessor, int position) + { + byte firstByte = accessor.getByte(input, position); + if (firstByte >= 0) + return 1; + + int extraBytes = accord.utils.VIntCoding.numberOfExtraBytesToRead(firstByte); + return 1 + extraBytes; + } + + /** * Computes size of an unsigned vint that starts at readerIndex of the provided ByteBuf. * @@ -380,6 +391,16 @@ public static int readUnsignedVInt32(ByteBuffer input, int position) return checkedCast(readUnsignedVInt(input, position)); } + public static int readLengthOfVInt(ByteBuffer in, int position) + { + byte firstByte = in.get(position); + if (firstByte >= 0) + return 1; + + int extraBytes = numberOfExtraBytesToRead(firstByte); + return 1 + extraBytes; + } + // & this with the first byte to give the value part for a given extraBytesToRead encoded in the byte public static int firstByteValueMask(int extraBytesToRead) { diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/VectorsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/VectorsTest.java index 7154d0295ec7..2d77428dae59 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/VectorsTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/VectorsTest.java @@ -25,8 +25,6 @@ import org.apache.cassandra.cql3.CQLTester; -import static org.apache.cassandra.ServerTestUtils.daemonInitialization; - public class VectorsTest extends CQLTester { @BeforeClass diff --git a/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java b/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java index c9fe2e906071..70506bb722b9 100644 --- a/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/CheckpointIntervalArrayIndexTest.java @@ -396,7 +396,7 @@ private Searcher index(int bytesPerKey, int bytesPerValue, List files = new EnumMap<>(IndexComponent.class); for (IndexComponent c : descriptor.getLiveComponents()) files.put(c, new FileHandle.Builder(descriptor.fileFor(c)).mmapped(true).complete()); - List segments = RouteIndexFormat.readSegements(files); + List segments = RouteIndexFormat.readSegments(files); files.remove(IndexComponent.SEGMENT).close(); files.remove(IndexComponent.METADATA).close(); diff --git a/test/unit/org/apache/cassandra/io/Serializers.java b/test/unit/org/apache/cassandra/io/Serializers.java index d911e07aed49..16b56b9c394c 100644 --- a/test/unit/org/apache/cassandra/io/Serializers.java +++ b/test/unit/org/apache/cassandra/io/Serializers.java @@ -19,6 +19,7 @@ package org.apache.cassandra.io; import java.io.IOException; +import java.nio.ByteBuffer; import accord.utils.LazyToString; import accord.utils.ReflectionUtils; @@ -37,9 +38,14 @@ public static void testSerde(DataOutputBuffer output, AsymmetricUnversionedS long expectedSize = serializer.serializedSize(input); serializer.serialize(input, output); Assertions.assertThat(output.getLength()).describedAs("The serialized size and bytes written do not match").isEqualTo(expectedSize); - DataInputBuffer in = new DataInputBuffer(output.unsafeGetBufferAndFlip(), false); + ByteBuffer buffer = output.unsafeGetBufferAndFlip(); + DataInputBuffer in = new DataInputBuffer(buffer, false); T read = serializer.deserialize(in); Assertions.assertThat(read).describedAs("The deserialized output does not match the serialized input; difference %s", new LazyToString(() -> ReflectionUtils.recursiveEquals(read, input).toString())).isEqualTo(input); + Assertions.assertThat(buffer.remaining()).describedAs("deserialize did not consume all the serialized input").isEqualTo(0); + buffer.flip(); + serializer.skip(in); + Assertions.assertThat(buffer.remaining()).describedAs("skip did not consume all the serialized input").isEqualTo(0); } public static void testSerde(AsymmetricUnversionedSerializer serializer, T input) throws IOException diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java index 18fee68f5fb6..c4635cdde61b 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandSerializersTest.java @@ -19,21 +19,28 @@ package org.apache.cassandra.service.accord.serializers; import java.io.IOException; +import java.nio.ByteBuffer; import org.junit.BeforeClass; import org.junit.Test; +import accord.local.Node; import accord.primitives.PartialTxn; import accord.primitives.Ranges; +import accord.primitives.Timestamp; import accord.primitives.Txn; +import accord.primitives.TxnId; import accord.utils.AccordGens; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.io.Serializers; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.accord.AccordTestUtils; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.FastByteOperations; +import org.assertj.core.api.Assertions; import static accord.utils.Property.qt; import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; @@ -69,6 +76,58 @@ public void txnSerializer() throws IOException public void txnIdSerde() { DataOutputBuffer output = new DataOutputBuffer(); - qt().forAll(AccordGens.txnIds()).check(txnId -> Serializers.testSerde(output, CommandSerializers.txnId, txnId)); + qt().forAll(AccordGens.txnIds()).check(txnId -> { + Serializers.testSerde(output, CommandSerializers.txnId, txnId); + ByteBuffer tmp = output.buffer(); + tmp.clear(); + CommandSerializers.txnId.serialize(txnId, tmp); + tmp.flip(); + TxnId rt = CommandSerializers.txnId.deserialize(tmp); + Assertions.assertThat(rt).isEqualTo(txnId); + }); + } + + @Test + public void txnIdComparable() + { + qt().forAll(AccordGens.txnIds(), AccordGens.txnIds()).check(CommandSerializersTest::testComparable); + qt().forAll(AccordGens.txnIds()).check((a) -> { + ByteBuffer abb = ByteBuffer.allocate((int) CommandSerializers.txnId.serializedSize(a)); + CommandSerializers.txnId.serializeComparable(a, abb, ByteBufferAccessor.instance, 0); + if (a.epoch() < Timestamp.MAX_EPOCH) + testComparable(a, TxnId.fromValues(a.epoch() + 1, a.hlc(), a.flags(), a.node)); + if (a.epoch() > 0) + testComparable(a, TxnId.fromValues(a.epoch() - 1, a.hlc(), a.flags(), a.node)); + if (a.hlc() < Timestamp.MAX.hlc()) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc() + 1, a.flags(), a.node)); + if (a.hlc() > 0) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc() - 1, a.flags(), a.node)); + if (a.flags() < Timestamp.MAX.flags()) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc(), a.flags() + 1, a.node)); + if (a.flags() != 0) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc(), a.flags() - 1, a.node)); + if (a.node.id > 0) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc(), a.flags(), new Node.Id(a.node.id - 1))); + if (a.node.id < Integer.MAX_VALUE) + testComparable(a, TxnId.fromValues(a.epoch(), a.hlc(), a.flags(), new Node.Id(a.node.id + 1))); + }); + } + + private static void testComparable(TxnId a, TxnId b) + { + ByteBuffer abb = ByteBuffer.allocate((int) CommandSerializers.txnId.serializedSize(a)); + CommandSerializers.txnId.serializeComparable(a, abb, ByteBufferAccessor.instance, 0); + TxnId art = CommandSerializers.txnId.deserializeComparable(abb, ByteBufferAccessor.instance, 0); + Assertions.assertThat(art).isEqualTo(a); + testComparable(abb, a, b); + } + + private static void testComparable(ByteBuffer abb, TxnId a, TxnId b) + { + ByteBuffer bbb = ByteBuffer.allocate((int) CommandSerializers.txnId.serializedSize(b)); + CommandSerializers.txnId.serializeComparable(b, bbb, ByteBufferAccessor.instance, 0); + Assertions.assertThat(FastByteOperations.compareUnsigned(abb, bbb)).isEqualTo(a.compareTo(b)); + TxnId brt = CommandSerializers.txnId.deserializeComparable(bbb, ByteBufferAccessor.instance, 0); + Assertions.assertThat(brt).isEqualTo(b); } } diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java index 5b1d5ca42e98..0c964e5d1266 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java @@ -18,18 +18,47 @@ package org.apache.cassandra.service.accord.serializers; +import java.io.IOException; +import java.util.Arrays; + import org.junit.Test; +import accord.api.RoutingKey; +import accord.local.StoreParticipants; +import accord.primitives.AbstractRanges; +import accord.primitives.AbstractUnseekableKeys; +import accord.primitives.FullKeyRoute; +import accord.primitives.FullRangeRoute; +import accord.primitives.KeyRoute; +import accord.primitives.PartialKeyRoute; +import accord.primitives.PartialRangeRoute; +import accord.primitives.Participants; +import accord.primitives.Range; +import accord.primitives.RangeRoute; import accord.primitives.Ranges; +import accord.primitives.Route; +import accord.primitives.RoutingKeys; +import accord.primitives.Unseekable; import accord.utils.Gen; +import accord.utils.RandomSource; +import accord.utils.RandomTestRunner; +import accord.utils.UnhandledEnum; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.Serializers; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.TokenRange; +import org.apache.cassandra.service.accord.api.TokenKey; import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.CassandraGenerators; import static accord.utils.Property.qt; +import static org.apache.cassandra.utils.AccordGenerators.fromQT; import static org.apache.cassandra.utils.AccordGenerators.maybeUpdatePartitioner; +import static org.apache.cassandra.utils.AccordGenerators.partitioner; public class KeySerializersTest { @@ -50,9 +79,149 @@ public void ranges() }); } + @Test + public void storeParticipants() + { + DataOutputBuffer output = new DataOutputBuffer(); + for (int i = 0 ; i < 10000 ; ++i) + { + RandomTestRunner.test().check(rs -> testTwo(rs, output)); + } + } + + private void testTwo(RandomSource rs, DataOutputBuffer output) + { + IPartitioner partitioner = partitioner().next(rs); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + testOne(rs, output, keyRoute(partitioner, rs)); + testOne(rs, output, rangeRoute(partitioner, rs)); + } + + private void testOne(RandomSource rs, DataOutputBuffer output, Participants superset) + { + Route route = null; + if (rs.nextBoolean()) superset = ((Route)superset).participantsOnly(); + else route = (Route)subset(rs, superset, false); + Participants hasTouched = subset(rs, superset, true); + Participants touches = subset(rs, hasTouched, true); + Participants owns = subset(rs, touches, true); + Participants executes = rs.nextBoolean() ? subset(rs, owns, true) : null; + Participants waitsOn = executes != null ? subset(rs, executes, true) : null; + StoreParticipants participants = StoreParticipants.create(route, owns, executes, waitsOn, touches, hasTouched); + try + { + Serializers.testSerde(output, CommandSerializers.participants, participants); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + private static KeyRoute keyRoute(IPartitioner partitioner, RandomSource rs) + { + TableId tableId = fromQT(CassandraGenerators.TABLE_ID_GEN).next(rs); + Gen tokenGen = fromQT(CassandraGenerators.token(partitioner)); + Gen keyGen = AccordGenerators.routingKeyGen(ignore -> tableId, tokenGen, partitioner); + RoutingKey[] ks = new RoutingKey[rs.nextInt(1, 10)]; + for (int i = 0 ; i < ks.length ; ++i) + ks[i] = keyGen.next(rs); + Arrays.sort(ks); + int count = 1; + for (int i = 1 ; i < ks.length ; ++i) + { + if (!ks[count - 1].equals(ks[i])) + ks[count++] = ks[i]; + } + if (count != ks.length) + ks = Arrays.copyOf(ks, count); + + float f = rs.nextFloat(); + if (f < 0.66f) + { + int homeKey = rs.nextInt(ks.length); + return f < 0.33f ? new FullKeyRoute(ks[homeKey], ks) : new PartialKeyRoute(ks[homeKey], ks); + } + return new PartialKeyRoute(keyGen.next(rs), ks); + } + + private static RangeRoute rangeRoute(IPartitioner partitioner, RandomSource rs) + { + TableId tableId = fromQT(CassandraGenerators.TABLE_ID_GEN).next(rs); + Gen tokenGen = fromQT(CassandraGenerators.token(partitioner)); + Gen keyGen = AccordGenerators.routingKeyGen(ignore -> tableId, tokenGen, partitioner); + TokenKey[] ks = new TokenKey[rs.nextInt(1, 10) * 2]; + for (int i = 0 ; i < ks.length ; ++i) + ks[i] = keyGen.next(rs); + Arrays.sort(ks); + int count = 1; + for (int i = 1 ; i < ks.length ; ++i) + { + if (!ks[count - 1].equals(ks[i])) + ks[count++] = ks[i]; + } + Range[] ranges = new Range[count / 2]; + for (int i = 0 ; i < ranges.length ; ++i) + ranges[i] = TokenRange.create(ks[i*2], ks[i*2+1]); + + float f = rs.nextFloat(); + if (ranges.length > 0 && f < 0.66f) + { + RoutingKey homeKey = rs.nextBoolean() ? ks[rs.nextInt(ranges.length * 2)] : ranges[rs.nextInt(ranges.length)].someIntersectingRoutingKey(null); + return f < 0.33f ? new FullRangeRoute(homeKey, ranges) : new PartialRangeRoute(homeKey, ranges); + } + return new PartialRangeRoute(keyGen.next(rs), ranges); + } + + private static Participants subset(RandomSource rs, Participants superset, boolean changeType) + { + if (rs.nextBoolean()) + return changeType && superset instanceof Route && rs.nextBoolean() ? ((Route)superset).participantsOnly() : superset; + + int count = superset.isEmpty() ? 0 : rs.nextInt(superset.size()); + Participants subset = selectSubset(rs, count, superset); + if (superset instanceof Route && (!changeType || rs.nextBoolean())) + return superset.intersecting(subset); + return subset; + } + + private static Participants selectSubset(RandomSource rs, int count, Participants superset) + { + switch (superset.domain()) + { + default: throw UnhandledEnum.unknown(superset.domain()); + case Key: + { + AbstractUnseekableKeys in = (AbstractUnseekableKeys) superset; + RoutingKey[] out = new RoutingKey[count]; + int j = 0; + for (int i = 0 ; i < out.length ; ++i) + { + j += count == (in.size() - j) ? 0 : rs.nextInt(0, in.size() - j); + out[i] = in.get(j); + } + return (Participants) RoutingKeys.of(out); + } + + case Range: + { + AbstractRanges in = (AbstractRanges) superset; + Range[] out = new Range[count]; + int j = 0; + for (int i = 0 ; i < out.length ; ++i) + { + j += count == (in.size() - j) ? 0 : rs.nextInt(0, in.size() - j); + out[i] = in.get(j); + } + return (Participants) Ranges.of(out); + } + + } + + } + private static Gen rangesGen() { - return AccordGenerators.partitioner() - .flatMap(AccordGenerators::rangesSplitOrArbitrary); + return partitioner().flatMap(AccordGenerators::rangesSplitOrArbitrary); } } \ No newline at end of file From dc841f54815b77aa6dcbb70d7f168d0a40f0fb7c Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 11 Apr 2025 12:13:01 -0700 Subject: [PATCH 265/340] Fix RouteIndexTest when run with -latest.yaml patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20549 --- test/unit/org/apache/cassandra/cql3/CQLTester.java | 9 +++++++-- .../apache/cassandra/index/accord/RouteIndexTest.java | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index df2c370d9bc1..91d86d2c811b 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -3556,12 +3556,17 @@ public static void setUpClass() } protected static void prePrepareServer() + { + setupFileSystem(); + + CQLTester.prePrepareServer(); + } + + protected static void setupFileSystem() { fs = FileSystems.newGlobalInMemoryFileSystem(); CassandraRelevantProperties.IGNORE_MISSING_NATIVE_FILE_HINTS.setBoolean(true); FileSystems.maybeCreateTmp(); - - CQLTester.prePrepareServer(); } @Before diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index 3e808b7e11e7..011998e6ac11 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -130,6 +130,8 @@ public static void setUpClass() // this flag disables that flush CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); + setupFileSystem(); + DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setAccordTransactionsEnabled(true); // disable journal compaction so the test can control when it happens @@ -137,7 +139,7 @@ public static void setUpClass() DatabaseDescriptor.setIncrementalBackupsEnabled(false); DatabaseDescriptor.setAutoSnapshot(false); - CQLTester.InMemory.prePrepareServer(); + CQLTester.prePrepareServer(); // Journal will async release segment references and close files... this adds possible race condition issues with truncate // so make these steps happen inline. From 6c6082b5d0b1f51cd27234a09216b3af3fba17f1 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 14 Apr 2025 09:33:24 -0700 Subject: [PATCH 266/340] Accord: switch back to 1g heap to keep CI stable patch by David Capwell; reviewed by Benedict Elliott Smith for CASSANDRA-20553 --- build.xml | 2 +- .../org/apache/cassandra/index/accord/RouteIndexTest.java | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/build.xml b/build.xml index 9c0564337a1c..ef1fdd661dd5 100644 --- a/build.xml +++ b/build.xml @@ -1314,7 +1314,7 @@ - + diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index 011998e6ac11..963ee402c8ee 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -111,7 +111,7 @@ import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; -public class RouteIndexTest extends CQLTester.InMemory +public class RouteIndexTest extends CQLTester { private static final Node.Id NODE = new Node.Id(42); private static final int MIN_TOKEN = 0; @@ -130,8 +130,6 @@ public static void setUpClass() // this flag disables that flush CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); - setupFileSystem(); - DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setAccordTransactionsEnabled(true); // disable journal compaction so the test can control when it happens From 71aedb285181583b57509a117d6596cadfb41c5a Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 14 Apr 2025 13:59:09 -0700 Subject: [PATCH 267/340] Cleanup Accord diff to get it ready for merge patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20548 --- .../cassandra/config/DatabaseDescriptor.java | 2 +- .../cql3/restrictions/SimpleRestriction.java | 3 +- .../cassandra/db/marshal/AbstractType.java | 35 ---------- .../cassandra/db/marshal/CompositeType.java | 64 ------------------- .../db/marshal/MultiElementType.java | 10 --- .../cassandra/db/marshal/TupleType.java | 6 +- .../apache/cassandra/db/marshal/UserType.java | 2 +- .../db/transform/FilteredPartitions.java | 1 - .../repair/RepairMessageVerbHandler.java | 7 +- .../cassandra/repair/SharedContext.java | 6 ++ .../cassandra/schema/ColumnMetadata.java | 6 +- .../org/apache/cassandra/schema/TableId.java | 26 ++------ .../cassandra/service/CassandraDaemon.java | 2 - .../service/accord/api/PartitionKey.java | 4 +- .../service/accord/txn/TxnReference.java | 3 +- .../service/paxos/PaxosRequestCallback.java | 9 ++- .../cleanup/PaxosStartPrepareCleanup.java | 4 +- .../cassandra/tcm/ClusterMetadataService.java | 36 ++++++++++- .../cassandra/tcm/membership/NodeVersion.java | 2 +- .../cassandra/tcm/serialization/Version.java | 26 +++++--- .../org/apache/cassandra/tools/FieldUtil.java | 31 ++------- 21 files changed, 93 insertions(+), 192 deletions(-) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index a1b71849511b..0439dbb00c5f 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5333,7 +5333,7 @@ public static long getAccordRepairTimeoutNanos() public static boolean getAccordTransactionsEnabled() { - return conf.accord.enabled; + return conf == null ? false : conf.accord.enabled; } public static void setAccordTransactionsEnabled(boolean b) diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java index 9a31c09ec114..8592fbbb7b17 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java @@ -33,7 +33,6 @@ import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.cql3.terms.Terms; import org.apache.cassandra.db.filter.RowFilter; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexRegistry; @@ -403,7 +402,7 @@ else if (isIN()) private static ByteBuffer multiInputOperatorValues(ColumnMetadata column, List values) { - return ListType.getInstance(column.type, false).pack(values, ByteBufferAccessor.instance); + return ListType.getInstance(column.type, false).pack(values); } @Override diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractType.java b/src/java/org/apache/cassandra/db/marshal/AbstractType.java index 3317b41978ba..42190e0c2e84 100644 --- a/src/java/org/apache/cassandra/db/marshal/AbstractType.java +++ b/src/java/org/apache/cassandra/db/marshal/AbstractType.java @@ -21,7 +21,6 @@ import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -31,7 +30,6 @@ import javax.annotation.Nullable; -import io.netty.util.concurrent.FastThreadLocal; import org.apache.cassandra.cql3.AssignmentTestable; import org.apache.cassandra.cql3.CQL3Type; import org.apache.cassandra.cql3.ColumnSpecification; @@ -741,39 +739,6 @@ public ByteSource asComparableBytes(ValueAccessor accessor, V value, Byte throw new UnsupportedOperationException(getClass().getSimpleName() + " does not implement asComparableBytes"); } - protected static final FastThreadLocal tmpFlattenBuffer = new FastThreadLocal<>(); - public static byte[] flattenByteSource(ByteSource source) - { - byte[] tmpBytes = tmpFlattenBuffer.get(); - byte[] bytes = tmpBytes; - if (bytes == null) bytes = new byte[16]; - int c = 0; - while (true) - { - int b = source.next(); - if (b == ByteSource.END_OF_STREAM) - break; - - if (c == bytes.length) - bytes = Arrays.copyOf(bytes, c * 2); - - bytes[c++] = (byte)b; - } - - byte[] result = Arrays.copyOf(bytes, c); - if (bytes != tmpBytes) tmpFlattenBuffer.set(bytes); - return result; - } - - public byte[] asFlatComparableBytes(ValueAccessor accessor, V value, ByteComparable.Version version) - { - ByteSource source = asComparableBytes(accessor, value, version); - if (source == null) - return null; - - return flattenByteSource(source); - } - public final ByteSource asComparableBytes(ByteBuffer byteBuffer, ByteComparable.Version version) { return asComparableBytes(ByteBufferAccessor.instance, byteBuffer, version); diff --git a/src/java/org/apache/cassandra/db/marshal/CompositeType.java b/src/java/org/apache/cassandra/db/marshal/CompositeType.java index 78ef0694efb3..c555d928fbad 100644 --- a/src/java/org/apache/cassandra/db/marshal/CompositeType.java +++ b/src/java/org/apache/cassandra/db/marshal/CompositeType.java @@ -29,7 +29,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import accord.utils.Invariants; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.serializers.BytesSerializer; @@ -40,15 +39,9 @@ import org.apache.cassandra.utils.bytecomparable.ByteSource; import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse; -import static accord.utils.Invariants.Paranoia.CONSTANT; -import static accord.utils.Invariants.Paranoia.LINEAR; -import static accord.utils.Invariants.ParanoiaCostFactor.LOW; import static com.google.common.collect.Iterables.any; import static com.google.common.collect.Iterables.transform; import static org.apache.cassandra.utils.bytecomparable.ByteSource.END_OF_STREAM; -import static org.apache.cassandra.utils.bytecomparable.ByteSource.NEXT_COMPONENT; -import static org.apache.cassandra.utils.bytecomparable.ByteSource.NEXT_COMPONENT_NULL; -import static org.apache.cassandra.utils.bytecomparable.ByteSource.TERMINATOR; /* * The encoding of a CompositeType column name should be: @@ -261,63 +254,6 @@ public ByteSource asComparableBytes(ValueAccessor accessor, V data, Versi return ByteSource.withTerminatorMaybeLegacy(version, END_OF_STREAM, srcs); } - @Override - public byte[] asFlatComparableBytes(ValueAccessor accessor, V data, Version version) - { - if (data == null || accessor.isEmpty(data)) - return null; - - byte[] tmpBytes = tmpFlattenBuffer.get(); - byte[] bytes = tmpBytes; - if (bytes == null) bytes = new byte[16]; - - int c = 0; - int length = accessor.size(data); - - // statics go first - boolean isStatic = readIsStaticInternal(data, accessor); - int offset = startingOffsetInternal(isStatic); - bytes[c++] = (byte) (isStatic ? NEXT_COMPONENT_NULL : NEXT_COMPONENT); - bytes[c++] = (byte) (NEXT_COMPONENT); - - int i = 0; - byte lastEoc = 0; - while (offset < length) - { - // Only the end-of-component byte of the last component of this composite can be non-zero, so the - // component before can't have a non-zero end-of-component byte. - assert lastEoc == 0 : lastEoc; - - int componentLength = accessor.getUnsignedShort(data, offset); - offset += 2; - ByteSource tmp = types.get(i).asComparableBytes(accessor, accessor.slice(data, offset, componentLength), version); - while (true) - { - int b = tmp.next(); - if (b == END_OF_STREAM) break; - - if (c == bytes.length) bytes = Arrays.copyOf(bytes, c * 2); - bytes[c++] = (byte)b; - } - offset += componentLength; - lastEoc = accessor.getByte(data, offset); - offset += 1; - if (c == bytes.length) bytes = Arrays.copyOf(bytes, c * 2); - bytes[c++] = (byte) NEXT_COMPONENT; - bytes[c++] = (byte) (lastEoc & 0xFF ^ 0x80); // end-of-component also takes part in comparison as signed byte - bytes[c++] = (byte) (offset < length ? NEXT_COMPONENT : version == Version.LEGACY ? END_OF_STREAM : TERMINATOR); - ++i; - } - - byte[] result = Arrays.copyOf(bytes, c); - if (bytes != tmpBytes) tmpFlattenBuffer.set(bytes); - byte[] test = super.asFlatComparableBytes(accessor, data, version); - if (Invariants.isParanoid() && Invariants.testParanoia(LINEAR, CONSTANT, LOW)) Invariants.require(Arrays.equals(test, result)); - V roundtrip = fromComparableBytes(accessor, ByteSource.peekable(ByteSource.of(result, version)), version); - Invariants.require(accessor.compare(data, roundtrip, accessor) == 0); - return result; - } - @Override public V fromComparableBytes(ValueAccessor accessor, ByteSource.Peekable comparableBytes, Version version) { diff --git a/src/java/org/apache/cassandra/db/marshal/MultiElementType.java b/src/java/org/apache/cassandra/db/marshal/MultiElementType.java index 4519a4341960..99ce762d6abc 100644 --- a/src/java/org/apache/cassandra/db/marshal/MultiElementType.java +++ b/src/java/org/apache/cassandra/db/marshal/MultiElementType.java @@ -57,16 +57,6 @@ public ByteBuffer pack(List elements) return pack(elements, ByteBufferAccessor.instance); } - public final ByteBuffer packBuffer(List elements) - { - return pack(elements, ByteBufferAccessor.instance); - } - - public final byte[] packArray(List elements) - { - return pack(elements, ByteArrayAccessor.instance); - } - /** * Returns the serialized representation of the elements composing the specified value. * diff --git a/src/java/org/apache/cassandra/db/marshal/TupleType.java b/src/java/org/apache/cassandra/db/marshal/TupleType.java index ede083ab39f6..47301b6e97a7 100644 --- a/src/java/org/apache/cassandra/db/marshal/TupleType.java +++ b/src/java/org/apache/cassandra/db/marshal/TupleType.java @@ -383,7 +383,7 @@ public V pack(List elements, ValueAccessor accessor) public ByteBuffer pack(ByteBuffer... components) { - return pack(Arrays.asList(components), ByteBufferAccessor.instance); + return pack(Arrays.asList(components)); } @Override @@ -467,7 +467,7 @@ public ByteBuffer fromString(String source) fields.add(type.fromString(fieldString)); } } - return pack(fields, ByteBufferAccessor.instance); + return pack(fields); } @Override @@ -608,7 +608,7 @@ public ByteBuffer getMaskedValue() for (AbstractType type : types) buffers.add(type.getMaskedValue()); - return serializer.serialize(pack(buffers, ByteBufferAccessor.instance)); + return serializer.serialize(pack(buffers)); } @Override diff --git a/src/java/org/apache/cassandra/db/marshal/UserType.java b/src/java/org/apache/cassandra/db/marshal/UserType.java index d20da2c0ab39..15ab78e82a2a 100644 --- a/src/java/org/apache/cassandra/db/marshal/UserType.java +++ b/src/java/org/apache/cassandra/db/marshal/UserType.java @@ -215,7 +215,7 @@ public ByteBuffer serializeForNativeProtocol(Iterator> cells) while (components.size() < size()) components.add(null); - return pack(components, ByteBufferAccessor.instance); + return pack(components); } public void validateCell(Cell cell) throws MarshalException diff --git a/src/java/org/apache/cassandra/db/transform/FilteredPartitions.java b/src/java/org/apache/cassandra/db/transform/FilteredPartitions.java index d8bdbcefd3f2..3486d275e813 100644 --- a/src/java/org/apache/cassandra/db/transform/FilteredPartitions.java +++ b/src/java/org/apache/cassandra/db/transform/FilteredPartitions.java @@ -25,7 +25,6 @@ import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.rows.RowIterator; -// TODO (review): Why was this final? public class FilteredPartitions extends BasePartitions> implements PartitionIterator { // wrap basic iterator for transformation diff --git a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java index 5f69da549d2d..72252f56ab2d 100644 --- a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java +++ b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java @@ -22,6 +22,7 @@ import java.util.function.BiFunction; import java.util.function.Function; +import org.apache.cassandra.config.DatabaseDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,7 +48,6 @@ import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.PreviewKind; -import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.TimeUUID; @@ -93,9 +93,12 @@ private PreviewKind previewKind(TimeUUID sessionID) throws NoSuchRepairSessionEx return prs != null ? prs.previewKind : PreviewKind.NONE; } + @Override public void doVerb(final Message message) { - ClusterMetadataService.instance().fetchLogFromCMS(message.epoch()); + if (DatabaseDescriptor.getAccordTransactionsEnabled() + && ctx.cms().maybeFetchLogFromPeerOrCMSAsync(ctx.messaging(), message, () -> doVerb(message))) + return; // TODO add cancel/interrupt message RepairJobDesc desc = message.payload.desc; try diff --git a/src/java/org/apache/cassandra/repair/SharedContext.java b/src/java/org/apache/cassandra/repair/SharedContext.java index 3fdb15c9203a..790fe94886b9 100644 --- a/src/java/org/apache/cassandra/repair/SharedContext.java +++ b/src/java/org/apache/cassandra/repair/SharedContext.java @@ -39,6 +39,7 @@ import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.service.paxos.cleanup.PaxosRepairState; import org.apache.cassandra.streaming.StreamPlan; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MBeanWrapper; @@ -88,6 +89,11 @@ default Supplier timeUUID() return TimeUUID.Generator::nextTimeUUID; } + default ClusterMetadataService cms() + { + return ClusterMetadataService.instance(); + } + class Global implements SharedContext { public static final Global instance = new Global(); diff --git a/src/java/org/apache/cassandra/schema/ColumnMetadata.java b/src/java/org/apache/cassandra/schema/ColumnMetadata.java index 75a92e1dc76b..fc8cd984c9af 100644 --- a/src/java/org/apache/cassandra/schema/ColumnMetadata.java +++ b/src/java/org/apache/cassandra/schema/ColumnMetadata.java @@ -735,7 +735,7 @@ public void serialize(ColumnMetadata t, DataOutputPlus out, Version version) thr if (hasConstraints) ColumnConstraints.serializer.serialize(t.columnConstraints, out, version); } - if (version.isAtLeast(Version.V3)) + if (version.isAtLeast(Version.V7)) out.writeInt(t.uniqueId); } @@ -761,7 +761,7 @@ public ColumnMetadata deserialize(DataInputPlus in, Types types, UserFunctions f else constraints = ColumnConstraints.NO_OP; int uniqueId = NO_UNIQUE_ID; - if (version.isAtLeast(Version.V3)) + if (version.isAtLeast(Version.V7)) uniqueId = in.readInt(); return new ColumnMetadata(ksName, tableName, new ColumnIdentifier(nameBB, name), type, uniqueId, position, kind, mask, constraints); } @@ -786,7 +786,7 @@ public long serializedSize(ColumnMetadata t, Version version) BOOL_SIZE + ((t.mask == null) ? 0 : ColumnMask.serializer.serializedSize(t.mask, version)) + constraintsSize + - (version.isAtLeast(Version.V3) ? 4 : 0); + (version.isAtLeast(Version.V7) ? 4 : 0); } } } diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index 8b4b0c9913bd..4c69326bc7f1 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -22,14 +22,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; import java.util.function.LongUnaryOperator; import javax.annotation.Nullable; - -import org.agrona.collections.Hashing; -import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.commons.lang3.ArrayUtils; @@ -58,8 +53,7 @@ public final class TableId implements Comparable { public static final long MAGIC = 1956074401491665062L; public static final long EMPTY_SIZE = ObjectSizes.measureDeep(new UUID(0, 0)); - - private static final ConcurrentHashMap internCache = new ConcurrentHashMap<>(); + private static final int MAGIC_BYTE = (int) ((flipSign(MAGIC) >>> 56) & 0xf0); final long msb, lsb; @@ -170,7 +164,8 @@ public UUID asUUID() @Override public int hashCode() { - return Hashing.hash(msb ^ lsb); + long hilo = msb ^ lsb; + return ((int)(hilo >> 32)) ^ (int) hilo; } @Override @@ -185,7 +180,7 @@ public final boolean equals(Object o) @Override public String toString() { - return new UUID(msb, lsb).toString(); + return asUUID().toString(); } public void serialize(DataOutput out) throws IOException @@ -217,8 +212,6 @@ public final int serializedSize() return 16; } - private static final int MAGIC_BYTE = (int) ((flipSign(MAGIC) >>> 56) & 0xf0); - public void serializeCompact(DataOutputPlus out) throws IOException { serializeCompact(out, Long.compare(msb, MAGIC), msb, lsb); @@ -381,12 +374,6 @@ private static TableId deserializeCompact(V src, ValueAccessor accessor, else return new TableId(MAGIC, transform.applyAsLong(accessor.getLeastSignificantBytes(src, offset, b & 0x0f))); } - public TableId intern() - { - TableId interned = internCache.putIfAbsent(this, this); - return interned == null ? this : interned; - } - @Override public int compareTo(TableId that) { @@ -435,9 +422,4 @@ public long serializedSize(TableId t, Version version) return t.serializedSize(); } }; - - public static void scheduleCachePruning() - { - ScheduledExecutors.scheduledFastTasks.scheduleSelfRecurring(internCache::clear, 1, TimeUnit.HOURS); - } } diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index d52392b31884..d3c787d2e2db 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -78,7 +78,6 @@ import org.apache.cassandra.net.StartupClusterConnectivityChecker; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.TableId; import org.apache.cassandra.security.ThreadAwareSecurityManager; import org.apache.cassandra.service.accord.AccordOperations; import org.apache.cassandra.service.paxos.PaxosState; @@ -424,7 +423,6 @@ protected void setup() logger.info("Prewarming of auth caches is disabled"); PaxosState.startAutoRepairs(); - TableId.scheduleCachePruning(); completeSetup(); } diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index 2c0619fd9ea9..c98ff3e29ab5 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -147,14 +147,14 @@ public void skip(DataInputPlus in) throws IOException @Override public PartitionKey deserialize(DataInputPlus in) throws IOException { - TableId tableId = TableId.deserializeCompact(in).intern(); + TableId tableId = TableId.deserializeCompact(in); DecoratedKey key = getPartitioner().decorateKey(ByteBufferUtil.readWithVIntLength(in)); return new PartitionKey(tableId, key); } public PartitionKey deserialize(V src, ValueAccessor accessor, int offset) throws IOException { - TableId tableId = TableId.deserializeCompact(src, accessor, offset).intern(); + TableId tableId = TableId.deserializeCompact(src, accessor, offset); offset += tableId.serializedCompactSize(); int numBytes = accessor.getUnsignedVInt32(src, offset); offset += VIntCoding.readLengthOfVInt(src, accessor, offset); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java index 52017beba7d3..ff8ec54d11ad 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java @@ -24,7 +24,6 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.CollectionType; import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.SetType; @@ -194,7 +193,7 @@ public ByteBuffer getFrozenFieldValue(Cell udt) { UserType userType = (UserType) column.type; int field = ByteBufferUtil.getUnsignedShort(path.get(0), 0); - return userType.unpack(udt.buffer(), ByteBufferAccessor.instance).get(field); + return userType.unpack(udt.buffer()).get(field); } public AbstractType getFieldSelectionType() diff --git a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java index fce825fa3b84..06509edfc89d 100644 --- a/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java +++ b/src/java/org/apache/cassandra/service/paxos/PaxosRequestCallback.java @@ -20,6 +20,7 @@ import java.util.function.BiFunction; +import org.apache.cassandra.config.DatabaseDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +31,7 @@ import org.apache.cassandra.net.Message; import org.apache.cassandra.service.FailureRecordingCallback; import org.apache.cassandra.tcm.ClusterMetadataService; +import org.apache.cassandra.utils.TriFunction; import static org.apache.cassandra.exceptions.RequestFailure.TIMEOUT; import static org.apache.cassandra.exceptions.RequestFailure.UNKNOWN; @@ -45,7 +47,8 @@ public abstract class PaxosRequestCallback extends FailureRecordingCallback message) { - ClusterMetadataService.instance().fetchLogFromCMS(message.epoch()); + if (DatabaseDescriptor.getAccordTransactionsEnabled()) + ClusterMetadataService.instance().fetchLogFromPeerOrCMS(message.from(), message.epoch()); onResponse(message.payload, message.from()); } @@ -71,10 +74,6 @@ protected void executeOnSelf(I parameter, BiFunction { - D apply(A var1, B var2, C var3); - } - protected void executeOnSelf(I parameter1, J parameter2, TriFunction execute) { T response; diff --git a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java index 21fcb90da49b..71f02d374a5b 100644 --- a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java +++ b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosStartPrepareCleanup.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Set; +import org.apache.cassandra.config.DatabaseDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -185,7 +186,8 @@ public long serializedSize(Request request, int version) public static IVerbHandler createVerbHandler(SharedContext ctx) { return in -> { - ClusterMetadataService.instance().fetchLogFromCMS(in.epoch()); + if (DatabaseDescriptor.getAccordTransactionsEnabled()) + ClusterMetadataService.instance().fetchLogFromPeerOrCMS(in.from(), in.epoch()); ColumnFamilyStore table = Schema.instance.getColumnFamilyStoreInstance(in.payload.tableId); // Note: pre-5.1 we would use gossip state included in the request payload to update topology // prior to cleanup. Topology is no longer derived from gossip state, so this has been removed. diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java index a9fff2dcdef5..15b21e1aa7d7 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java @@ -41,6 +41,7 @@ import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.exceptions.ExceptionCode; +import org.apache.cassandra.exceptions.RequestFailure; import org.apache.cassandra.exceptions.StartupException; import org.apache.cassandra.gms.FailureDetector; import org.apache.cassandra.io.util.FileInputStreamPlus; @@ -48,6 +49,8 @@ import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.TCMMetrics; import org.apache.cassandra.net.IVerbHandler; +import org.apache.cassandra.net.Message; +import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.tcm.listeners.SchemaListener; @@ -739,7 +742,7 @@ public Future fetchLogFromPeerOrCMSAsync(ClusterMetadata metada ScheduledExecutors.optionalTasks.submit(() -> { try { - future.setSuccess(ClusterMetadataService.instance().fetchLogFromPeerOrCMS(metadata, from, awaitAtLeast)); + future.setSuccess(fetchLogFromPeerOrCMS(metadata, from, awaitAtLeast)); } catch (Throwable t) { @@ -751,6 +754,19 @@ public Future fetchLogFromPeerOrCMSAsync(ClusterMetadata metada return future; } + public boolean maybeFetchLogFromPeerOrCMSAsync(MessageDelivery messaging, Message message, Runnable onFetchSuccess) + { + ClusterMetadata metadata = metadata(); + if (metadata.epoch.isEqualOrAfter(metadata.epoch)) + return false; + Future f = fetchLogFromPeerOrCMSAsync(metadata, message.from(), message.epoch()); + f.addCallback((success, failure) -> { + if (failure != null) messaging.respondWithFailure(RequestFailure.UNKNOWN, message); + else message.verb().stage.execute(onFetchSuccess); + }); + return true; + } + /** * Combines {@link #fetchLogFromPeer} with {@link #fetchLogFromCMS} to synchronously fetch and apply log entries * up to the requested epoch. The supplied peer will be contacted first and if after doing so, the current local @@ -787,6 +803,24 @@ public ClusterMetadata fetchLogFromPeerOrCMS(ClusterMetadata metadata, InetAddre return metadata; } + /** + * Combines {@link #fetchLogFromPeer} with {@link #fetchLogFromCMS} to synchronously fetch and apply log entries + * up to the requested epoch. The supplied peer will be contacted first and if after doing so, the current local + * metadata is not caught up to at least the required epoch, a further request is made to the CMS. + * The returned ClusterMetadata is guaranteed to have been published, though it may have also been superceded by + * further updates. + * If the requested epoch is not reached even after fetching from the CMS, an IllegalStateException is thrown. + * @param from Initial peer to contact. Usually this is the sender of a message containing the requested epoch, + * which means it can be assumed that this peer (if available) can supply any missing log entries. + * @param awaitAtLeast The requested epoch. + * @return A published ClusterMetadata with all entries up to (at least) the requested epoch enacted. + * @throws IllegalStateException if the requested epoch could not be reached, even after falling back to CMS catchup + */ + public ClusterMetadata fetchLogFromPeerOrCMS(InetAddressAndPort from, Epoch awaitAtLeast) + { + return fetchLogFromPeerOrCMS(metadata(), from, awaitAtLeast); + } + public ClusterMetadata awaitAtLeast(Epoch epoch) throws InterruptedException, TimeoutException { return log.awaitAtLeast(epoch); diff --git a/src/java/org/apache/cassandra/tcm/membership/NodeVersion.java b/src/java/org/apache/cassandra/tcm/membership/NodeVersion.java index bc1bcc707e3d..64d1cc71c6f5 100644 --- a/src/java/org/apache/cassandra/tcm/membership/NodeVersion.java +++ b/src/java/org/apache/cassandra/tcm/membership/NodeVersion.java @@ -34,7 +34,7 @@ public class NodeVersion implements Comparable { public static final Serializer serializer = new Serializer(); - public static final Version CURRENT_METADATA_VERSION = Version.V6; + public static final Version CURRENT_METADATA_VERSION = Version.V7; public static final NodeVersion CURRENT = new NodeVersion(new CassandraVersion(FBUtilities.getReleaseVersionString()), CURRENT_METADATA_VERSION); private static final CassandraVersion SINCE_VERSION = CassandraVersion.CASSANDRA_5_1; diff --git a/src/java/org/apache/cassandra/tcm/serialization/Version.java b/src/java/org/apache/cassandra/tcm/serialization/Version.java index a10a4ed80752..955c6996e0c0 100644 --- a/src/java/org/apache/cassandra/tcm/serialization/Version.java +++ b/src/java/org/apache/cassandra/tcm/serialization/Version.java @@ -41,22 +41,30 @@ public enum Version * - Serialize MemtableParams when serializing TableParams */ V2(2), - /** - * - Added AccordFastPath - * - Added ConsensusMigrationState - * - Added AccordStaleReplicas - * - TableParam now has pendingDrop (accord table drop is multistep) + * - down nodes serialized in PrepareCMSReconfiguration */ V3(3), - - // Padding + /** + * - Serialize allowAutoSnapshot and incrementalBackups when serializing TableParams + */ V4(4), + /** + * - AlterSchema includes execution timestamp + * - PreInitialize includes datacenter (affects local serialization on first CMS node only) + */ V5(5), + /** + * - CEP-42 - Constraints framework. New version due to modifications in table metadata serialization. + */ V6(6), /** - * - Accord * - Track nodes removed + * - Column Metadata now stores a unique id + * - Added AccordFastPath + * - Added ConsensusMigrationState + * - Added AccordStaleReplicas + * - TableParam now has pendingDrop (accord table drop is multistep) */ V7(7), @@ -65,7 +73,7 @@ public enum Version /** * The version that Accord was added to TCM. */ - public static final Version MIN_ACCORD_VERSION = V3; + public static final Version MIN_ACCORD_VERSION = V7; private static Map values = new HashMap<>(); static diff --git a/src/java/org/apache/cassandra/tools/FieldUtil.java b/src/java/org/apache/cassandra/tools/FieldUtil.java index 413552624144..6c61e2b3f70a 100644 --- a/src/java/org/apache/cassandra/tools/FieldUtil.java +++ b/src/java/org/apache/cassandra/tools/FieldUtil.java @@ -19,9 +19,10 @@ package org.apache.cassandra.tools; import java.lang.reflect.Field; -import java.lang.reflect.Method; import java.lang.reflect.Modifier; +import org.apache.cassandra.utils.ReflectionUtils; + public class FieldUtil { public static void setInstanceUnsafe(Class klass, Object v, String fieldName) @@ -38,32 +39,12 @@ public static void setInstanceUnsafe(Class klass, Object v, String fieldName) private static void setInstanceUnsafeThrowing(Class klass, Object v, String fieldName) throws Throwable { - Field field = klass.getDeclaredField(fieldName); + Field field = ReflectionUtils.getField(klass, fieldName); field.setAccessible(true); - try - { - Field modifiers = Field.class.getDeclaredField("modifiers"); - modifiers.setAccessible(true); - modifiers.setInt(field, field.getModifiers() & ~Modifier.FINAL); - } - catch (NoSuchFieldException t) - { - // jdk17 fallback - Method getDeclaredFields0 = Class.class.getDeclaredMethod("getDeclaredFields0", boolean.class); - getDeclaredFields0.setAccessible(true); - Field[] fields = (Field[]) getDeclaredFields0.invoke(Field.class, false); - - for (Field f : fields) - { - if ("modifiers".equals(f.getName())) - { - f.setAccessible(true); - f.setInt(field, field.getModifiers() & ~Modifier.FINAL); - break; - } - } - } + Field modifiers = ReflectionUtils.getModifiersField(); + modifiers.setAccessible(true); + modifiers.setInt(field, field.getModifiers() & ~Modifier.FINAL); field.set(null, v); } From d3439c671acb88aea7814aff503d68c3f6ea3797 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Mon, 14 Apr 2025 17:25:53 -0500 Subject: [PATCH 268/340] ninja: remove accord_demo.txt and simulator.sh --- accord_demo.txt | 14 -------- simulator.sh | 88 ------------------------------------------------- 2 files changed, 102 deletions(-) delete mode 100644 accord_demo.txt delete mode 100755 simulator.sh diff --git a/accord_demo.txt b/accord_demo.txt deleted file mode 100644 index 63b7d21201d8..000000000000 --- a/accord_demo.txt +++ /dev/null @@ -1,14 +0,0 @@ -ccm create accord-cql-poc -n 3 -ccm start - -bin/cqlsh -e "CREATE KEYSPACE ks WITH replication={'class':'SimpleStrategy', 'replication_factor':3};" -bin/cqlsh -e "CREATE TABLE ks.tbl1 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full';" -bin/cqlsh -e "CREATE TABLE ks.tbl2 (k int PRIMARY KEY, v int) WITH transactional_mode = 'full';" - -BEGIN TRANSACTION - LET row1 = (SELECT * FROM ks.tbl1 WHERE k = 1); - SELECT row1.v; - IF row1 IS NULL THEN - INSERT INTO ks.tbl2 (k, v) VALUES (1, 2); - END IF -COMMIT TRANSACTION; diff --git a/simulator.sh b/simulator.sh deleted file mode 100755 index 516405e974c2..000000000000 --- a/simulator.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -#ant jar simulator-jars - -DIR=`pwd` -JVM_OPTS="$JVM_OPTS -Dcassandra.config=file://$DIR/test/conf/cassandra.yaml" -JVM_OPTS="$JVM_OPTS -Dlogback.configurationFile=file://$DIR/test/conf/logback-simulator.xml" -JVM_OPTS="$JVM_OPTS -Dcassandra.logdir=$DIR/build/test/logs" -#JVM_OPTS="$JVM_OPTS -Djava.library.path=$DIR/lib/sigar-bin" -JVM_OPTS="$JVM_OPTS -Dlegacy-sstable-root=$DIR/test/data/legacy-sstables" -JVM_OPTS="$JVM_OPTS -Dinvalid-legacy-sstable-root=$DIR/test/data/invalid-legacy-sstables" -JVM_OPTS="$JVM_OPTS -Dcassandra.ring_delay_ms=1000" -JVM_OPTS="$JVM_OPTS -Dcassandra.skip_sync=true" -JVM_OPTS="$JVM_OPTS -ea" -JVM_OPTS="$JVM_OPTS -XX:MaxMetaspaceSize=1G" -JVM_OPTS="$JVM_OPTS -XX:SoftRefLRUPolicyMSPerMB=0" -JVM_OPTS="$JVM_OPTS -Dcassandra.strict.runtime.checks=true" -JVM_OPTS="$JVM_OPTS -javaagent:$DIR/build/test/lib/jars/simulator-asm.jar" -JVM_OPTS="$JVM_OPTS -Xbootclasspath/a:$DIR/build/test/lib/jars/simulator-bootstrap.jar" -JVM_OPTS="$JVM_OPTS -XX:ActiveProcessorCount=4" -JVM_OPTS="$JVM_OPTS -XX:-TieredCompilation" -JVM_OPTS="$JVM_OPTS -XX:Tier4CompileThreshold=1000" -JVM_OPTS="$JVM_OPTS -XX:ReservedCodeCacheSize=256M" -JVM_OPTS="$JVM_OPTS -Xmx8G" -JVM_OPTS="$JVM_OPTS -Dcassandra.test.simulator.determinismcheck=strict" -JVM_OPTS="$JVM_OPTS -Dcassandra.debugrefcount=false" -JVM_OPTS="$JVM_OPTS -Dcassandra.skip_sync=true" -JVM_OPTS="$JVM_OPTS -Dcassandra.tolerate_sstable_size=true" -JVM_OPTS="$JVM_OPTS -Dcassandra.test.simulator.debug=true" -JVM_OPTS="$JVM_OPTS -Dcassandra.test.simulator.determinismcheck=strict" -echo $JVM_OPTS - -CLASSPATH="$DIR"/build/test/classes -for dir in "$DIR"/build/classes/*; do - CLASSPATH="$CLASSPATH:$dir" -done - -for jar in "$DIR"/lib/*.jar; do - CLASSPATH="$CLASSPATH:$jar" -done -for jar in "$DIR"/build/*.jar; do - if [[ $jar != *"logback-classic"* ]]; then - CLASSPATH="$CLASSPATH:$jar" - fi -done -for jar in "$DIR"/build/lib/jars/*.jar; do - if [[ $jar != *"logback-classic"* ]]; then - CLASSPATH="$CLASSPATH:$jar" - fi -done -for jar in "$DIR"/build/test/lib/jars/*.jar; do - if [[ $jar != *"logback-classic"* ]]; then - CLASSPATH="$CLASSPATH:$jar" - fi -done - -CLASS="org.apache.cassandra.simulator.paxos.AccordSimulationRunner" -OPTS="run -n 3..6 -t 1000 --cluster-action-limit -1 -c 2 -s 30" - -echo "java -cp <...> $CLASS $OPTS $@" - -while true -do - echo "" - java -cp $CLASSPATH $JVM_OPTS $CLASS $OPTS $@ - status=$? - if [ $status -ne 0 ] ; then - exit $status - fi - -done From 4cdbb5cf2a8dc555ebdac4eaad504078d7e255bd Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 14 Apr 2025 16:24:09 -0700 Subject: [PATCH 269/340] Accord: Test fixes patch by Alex Petrov; reviewed by Benedict Elliott Smith, David Capwell for CASSANDRA-20552 --- src/java/org/apache/cassandra/net/Verb.java | 4 +-- .../service/accord/AccordService.java | 3 +- .../service/reads/AbstractReadExecutor.java | 6 ++-- .../test/log/FetchLogFromPeers2Test.java | 33 +++++++++---------- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index e9258b5f9a78..5253124ffee2 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -293,7 +293,7 @@ public enum Verb // transactional cluster metadata TCM_COMMIT_RSP (801, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::commitResultSerializer, RESPONSE_HANDLER ), TCM_COMMIT_REQ (802, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::commitSerializer, () -> commitRequestHandler(), TCM_COMMIT_RSP ), - TCM_FETCH_CMS_LOG_RSP (803, P0, rpcTimeout, FETCH_METADATA, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), + TCM_FETCH_CMS_LOG_RSP (803, P0, shortTimeout, FETCH_METADATA, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), TCM_FETCH_CMS_LOG_REQ (804, P0, rpcTimeout, FETCH_METADATA, () -> FetchCMSLog.serializer, () -> fetchLogRequestHandler(), TCM_FETCH_CMS_LOG_RSP ), TCM_REPLICATION (805, P0, rpcTimeout, INTERNAL_METADATA, MessageSerializers::logStateSerializer, () -> replicationHandler() ), TCM_NOTIFY_RSP (806, P0, rpcTimeout, INTERNAL_METADATA, () -> Epoch.messageSerializer, RESPONSE_HANDLER ), @@ -304,7 +304,7 @@ public enum Verb TCM_ABORT_MIG (811, P0, rpcTimeout, INTERNAL_METADATA, () -> CMSInitializationRequest.Initiator.serializer,() -> Election.instance.abortHandler, TCM_INIT_MIG_RSP ), TCM_DISCOVER_RSP (812, P0, rpcTimeout, INTERNAL_METADATA, () -> Discovery.serializer, RESPONSE_HANDLER ), TCM_DISCOVER_REQ (813, P0, rpcTimeout, INTERNAL_METADATA, () -> NoPayload.serializer, () -> Discovery.instance.requestHandler, TCM_DISCOVER_RSP ), - TCM_FETCH_PEER_LOG_RSP (818, P0, rpcTimeout, FETCH_METADATA, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), + TCM_FETCH_PEER_LOG_RSP (818, P0, shortTimeout, FETCH_METADATA, MessageSerializers::logStateSerializer, RESPONSE_HANDLER ), TCM_FETCH_PEER_LOG_REQ (819, P0, rpcTimeout, FETCH_METADATA, () -> FetchPeerLog.serializer, () -> FetchPeerLog.Handler.instance, TCM_FETCH_PEER_LOG_RSP ), TCM_RECONSTRUCT_EPOCH_RSP (820, P0, rpcTimeout, FETCH_METADATA, MessageSerializers::logStateSerializer, () -> ResponseVerbHandler.instance ), TCM_RECONSTRUCT_EPOCH_REQ (821, P0, rpcTimeout, FETCH_METADATA, () -> ReconstructLogState.serializer, () -> ReconstructLogState.Handler.instance, TCM_FETCH_PEER_LOG_RSP ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index bed4b1396df4..d2682950afd4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -399,9 +399,10 @@ else if (images.isEmpty()) // First boot, single-node cluster int waitSeconds = 5; while (true) { + Epoch await = Epoch.max(Epoch.create(configService.currentEpoch()), metadata.epoch); try { - epochReady(metadata.epoch).get(waitSeconds, SECONDS); + epochReady(await).get(waitSeconds, SECONDS); break; } catch (TimeoutException e) diff --git a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java index ae76c02d38b1..1774164fb964 100644 --- a/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java +++ b/src/java/org/apache/cassandra/service/reads/AbstractReadExecutor.java @@ -216,6 +216,9 @@ public static AbstractReadExecutor getReadExecutor(ClusterMetadata metadata, if (retry.equals(NeverSpeculativeRetryPolicy.INSTANCE) || consistencyLevel == ConsistencyLevel.EACH_QUORUM) return new NeverSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime, false); + if (retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE)) + return new AlwaysSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime); + // There are simply no extra replicas to speculate. // Handle this separately so it can record failed attempts to speculate due to lack of replicas if (replicaPlan.contacts().size() == replicaPlan.readCandidates().size()) @@ -223,9 +226,6 @@ public static AbstractReadExecutor getReadExecutor(ClusterMetadata metadata, boolean recordFailedSpeculation = consistencyLevel != ConsistencyLevel.ALL; return new NeverSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime, recordFailedSpeculation); } - - if (retry.equals(AlwaysSpeculativeRetryPolicy.INSTANCE)) - return new AlwaysSpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime); else // PERCENTILE or CUSTOM. return new SpeculatingReadExecutor(coordinator, cfs, command, replicaPlan, requestTime); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java index 783583a4fb1f..d42c3796836f 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java @@ -19,7 +19,6 @@ package org.apache.cassandra.distributed.test.log; import java.util.UUID; -import java.util.concurrent.ExecutionException; import org.junit.Test; @@ -31,35 +30,39 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; -import static org.apache.cassandra.distributed.test.log.FetchLogFromPeersTest.*; +import static org.apache.cassandra.distributed.test.log.FetchLogFromPeersTest.ClusterState; +import static org.apache.cassandra.distributed.test.log.FetchLogFromPeersTest.Operation; +import static org.apache.cassandra.distributed.test.log.FetchLogFromPeersTest.coordinator; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class FetchLogFromPeers2Test extends TestBaseImpl { @Test - public void testSchema() throws Exception + public void testSchema() throws Throwable { - try (Cluster cluster = init(builder().withNodes(3) - .start())) + try (Cluster cluster = init(builder().withNodes(3).start())) { - cluster.schemaChange(withKeyspace("alter keyspace %s with replication = {'class':'SimpleStrategy', 'replication_factor':3}")); - cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key)")); - cluster.schemaChange(withKeyspace("create table %s.tbl2 (id int primary key)")); + cluster.schemaChange(withKeyspace("alter keyspace %s with replication = {'class':'SimpleStrategy', 'replication_factor':3} ")); + cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key) WITH speculative_retry = 'ALWAYS';")); for (ClusterState clusterState : ClusterState.values()) + { for (Operation operation : Operation.values()) { + cluster.filters().inbound().from(1, 2).to(1, 2).drop(); setupSchemaBehind(cluster); + cluster.filters().inbound().to(1).to(1).drop(); runQuery(cluster, clusterState, operation); + cluster.filters().reset(); } + } + } } - public void runQuery(Cluster cluster, ClusterState clusterState, Operation operation) throws ExecutionException, InterruptedException + public void runQuery(Cluster cluster, ClusterState clusterState, Operation operation) throws Throwable { - cluster.get(1).shutdown().get(); - // node2 is behind String query; switch (operation) @@ -78,7 +81,7 @@ public void runQuery(Cluster cluster, ClusterState clusterState, Operation opera long metricsBefore = cluster.get(2).callOnInstance(() -> TCMMetrics.instance.fetchedPeerLogEntries.getCount()); if (clusterState == ClusterState.COORDINATOR_BEHIND) { - long [] coordinatorBehindMetricsBefore = new long[cluster.size()]; + long[] coordinatorBehindMetricsBefore = new long[cluster.size()]; try { for (int i = 1; i <= cluster.size(); i++) @@ -102,20 +105,15 @@ public void runQuery(Cluster cluster, ClusterState clusterState, Operation opera } } assertTrue("Metric CoordinatorBehindSchema should have been bumped for at least one replica", metricBumped); - } cluster.coordinator(coordinator).execute(withKeyspace(query), ConsistencyLevel.QUORUM); assertTrue(cluster.get(2).logs().grep(mark, "Fetching log from /127.0.0.3:7012").getResult().size() > 0); long metricsAfter = cluster.get(2).callOnInstance(() -> TCMMetrics.instance.fetchedPeerLogEntries.getCount()); assertTrue(metricsAfter > metricsBefore); - - cluster.get(1).startup(); } public void setupSchemaBehind(Cluster cluster) { - cluster.filters().reset(); - cluster.filters().inbound().from(1).to(2).drop(); long epochBefore = cluster.get(3).callOnInstance(() -> ClusterMetadata.current().epoch.getEpoch()); cluster.coordinator(1).execute(withKeyspace("alter table %s.tbl with comment='test " + UUID.randomUUID() + "'"), ConsistencyLevel.ONE); cluster.get(3).runOnInstance(() -> { @@ -128,6 +126,5 @@ public void setupSchemaBehind(Cluster cluster) throw new RuntimeException(e); } }); - cluster.filters().reset(); } } From 91f29a1f866c15a522ece99d92ad44cb9d502c7d Mon Sep 17 00:00:00 2001 From: David Capwell Date: Thu, 17 Apr 2025 14:57:01 -0700 Subject: [PATCH 270/340] Accord: Hopefully last rebase cleanup patch by David Capwell; reviewed by Ariel Weisberg for CASSANDRA-20568 --- .../cassandra/exceptions/RequestFailure.java | 6 ++-- .../exceptions/RequestFailureReason.java | 25 +++++++++---- .../cassandra/service/StorageService.java | 3 +- .../accord/MigrationToAccordReadRaceTest.java | 8 +---- .../AccordInteropMultiNodeTableWalkBase.java | 8 ----- .../fuzz/sai/AccordFullMultiNodeSAITest.java | 3 ++ .../sai/AccordInteropMultiNodeSAITest.java | 3 ++ .../topology/AccordTopologyMixupTest.java | 2 +- .../cassandra/dht/BootStrapperTest.java | 36 ++++++++++--------- .../exceptions/RequestFailureReasonTest.java | 4 +-- .../apache/cassandra/utils/ASTGenerators.java | 1 + 11 files changed, 53 insertions(+), 46 deletions(-) diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailure.java b/src/java/org/apache/cassandra/exceptions/RequestFailure.java index 9f6d0575ce30..b9bba7fc7061 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailure.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailure.java @@ -43,7 +43,6 @@ public class RequestFailure { public static final RequestFailure UNKNOWN = new RequestFailure(RequestFailureReason.UNKNOWN); - public static final RequestFailure ACCORD_DISABLED = new RequestFailure(RequestFailureReason.ACCORD_DISABLED); public static final RequestFailure READ_TOO_MANY_TOMBSTONES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_TOMBSTONES); public static final RequestFailure TIMEOUT = new RequestFailure(RequestFailureReason.TIMEOUT); public static final RequestFailure INCOMPATIBLE_SCHEMA = new RequestFailure(RequestFailureReason.INCOMPATIBLE_SCHEMA); @@ -55,7 +54,7 @@ public class RequestFailure public static final RequestFailure COORDINATOR_BEHIND = new RequestFailure(RequestFailureReason.COORDINATOR_BEHIND); public static final RequestFailure READ_TOO_MANY_INDEXES = new RequestFailure(RequestFailureReason.READ_TOO_MANY_INDEXES); public static final RequestFailure RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM = new RequestFailure(RequestFailureReason.RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM); - public static final RequestFailure BOOTING = new RequestFailure(RequestFailureReason.BOOTING); + public static final RequestFailure INDEX_BUILD_IN_PROGRESS = new RequestFailure(RequestFailureReason.INDEX_BUILD_IN_PROGRESS); static { @@ -135,7 +134,6 @@ public static RequestFailure forReason(RequestFailureReason reason) { default: throw new IllegalStateException("Unhandled request failure reason " + reason); case UNKNOWN: return UNKNOWN; - case ACCORD_DISABLED: return ACCORD_DISABLED; case READ_TOO_MANY_TOMBSTONES: return READ_TOO_MANY_TOMBSTONES; case TIMEOUT: return TIMEOUT; case INCOMPATIBLE_SCHEMA: return INCOMPATIBLE_SCHEMA; @@ -146,8 +144,8 @@ public static RequestFailure forReason(RequestFailureReason reason) case INDEX_NOT_AVAILABLE: return INDEX_NOT_AVAILABLE; case COORDINATOR_BEHIND: return COORDINATOR_BEHIND; case READ_TOO_MANY_INDEXES: return READ_TOO_MANY_INDEXES; + case INDEX_BUILD_IN_PROGRESS: return INDEX_BUILD_IN_PROGRESS; case RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM: return RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM; - case BOOTING: return BOOTING; } } diff --git a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java index 38b921eb6a41..bafab71752c3 100644 --- a/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java +++ b/src/java/org/apache/cassandra/exceptions/RequestFailureReason.java @@ -18,9 +18,12 @@ package org.apache.cassandra.exceptions; import java.io.IOException; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; +import com.google.common.collect.Sets; + import org.apache.cassandra.db.filter.TombstoneOverwhelmingException; import org.apache.cassandra.index.IndexBuildInProgressException; import org.apache.cassandra.index.IndexNotAvailableException; @@ -47,11 +50,9 @@ public enum RequestFailureReason NOT_CMS (8), INVALID_ROUTING (9), COORDINATOR_BEHIND (10), + RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM (11), // The following codes have been ported from an external fork, where they were offset explicitly to avoid conflicts. INDEX_BUILD_IN_PROGRESS (503), - RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM (504), - BOOTING (505), - ACCORD_DISABLED (506) ; static @@ -71,10 +72,11 @@ public enum RequestFailureReason private static final Map codeToReasonMap = new HashMap<>(); private static final Map, RequestFailureReason> exceptionToReasonMap = new HashMap<>(); - private static final int REASONS_WITHOUT_EXCEPTIONS = 3; // UNKNOWN, NODE_DOWN, and READ_TOO_MANY_INDEXES static { + EnumSet withoutExceptions = EnumSet.of(UNKNOWN, NODE_DOWN, READ_TOO_MANY_INDEXES); + Sets.SetView withExceptions = Sets.difference(EnumSet.allOf(RequestFailureReason.class), withoutExceptions); RequestFailureReason[] reasons = values(); for (RequestFailureReason reason : reasons) @@ -92,9 +94,20 @@ public enum RequestFailureReason exceptionToReasonMap.put(InvalidRoutingException.class, INVALID_ROUTING); exceptionToReasonMap.put(CoordinatorBehindException.class, COORDINATOR_BEHIND); exceptionToReasonMap.put(IndexBuildInProgressException.class, INDEX_BUILD_IN_PROGRESS); + exceptionToReasonMap.put(RetryOnDifferentSystemException.class, RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM); - if (exceptionToReasonMap.size() != reasons.length - REASONS_WITHOUT_EXCEPTIONS) - throw new RuntimeException("A new RequestFailureReasons was probably added and you may need to update the exceptionToReasonMap"); + if (exceptionToReasonMap.size() != reasons.length - withoutExceptions.size()) + { + EnumSet actual = EnumSet.copyOf(exceptionToReasonMap.values()); + Sets.SetView missing = Sets.difference(withExceptions, actual); + Sets.SetView added = Sets.difference(actual, withExceptions); + StringBuilder sb = new StringBuilder(); + if (!missing.isEmpty()) + sb.append("Expected the following RequestFailureReason, but were missing: ").append(missing).append('\n'); + if (!added.isEmpty()) + sb.append("Unexpected RequestFailureReason found: ").append(added); + throw new AssertionError(sb.toString()); + } } public static RequestFailureReason fromCode(int code) diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 90e2f598d251..c97b2ec69523 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -4015,12 +4015,13 @@ synchronized void checkServiceAllowedToStart(String service) // Never ever do this at home. Used by tests. @VisibleForTesting - public void setPartitionerUnsafe(IPartitioner newPartitioner) + public IPartitioner setPartitionerUnsafe(IPartitioner newPartitioner) { checkNotNull(newPartitioner, "newPartitioner is null"); checkState(originalPartitioner == null, "Already changed the partitioner without resetting"); originalPartitioner = DatabaseDescriptor.setPartitionerUnsafe(newPartitioner); valueFactory = new VersionedValue.VersionedValueFactory(newPartitioner); + return originalPartitioner; } @VisibleForTesting diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordReadRaceTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordReadRaceTest.java index 3c47fa5871e0..4faa91ebceb6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordReadRaceTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/MigrationToAccordReadRaceTest.java @@ -20,6 +20,7 @@ import org.junit.Ignore; +@Ignore("Flakey") public class MigrationToAccordReadRaceTest extends AccordMigrationReadRaceTestBase { @Override @@ -27,11 +28,4 @@ protected boolean migratingAwayFromAccord() { return false; } - - @Ignore - @Override - public void testBounds() throws Throwable - { - super.testBounds(); - } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/AccordInteropMultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/AccordInteropMultiNodeTableWalkBase.java index 332b2e2b21ee..6e4c1ad1bda6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/AccordInteropMultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/AccordInteropMultiNodeTableWalkBase.java @@ -23,7 +23,6 @@ import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; -import org.apache.cassandra.distributed.api.IInstanceConfig; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; @@ -69,13 +68,6 @@ static void addUncaughtExceptionsFilter(Cluster cluster) } } - @Override - protected void clusterConfig(IInstanceConfig c) - { - super.clusterConfig(c); - c.set("transaction_timeout", "180s"); - } - @Override protected TableMetadata defineTable(RandomSource rs, String ks) { diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/AccordFullMultiNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/AccordFullMultiNodeSAITest.java index f2a94901a659..23b52833af31 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/AccordFullMultiNodeSAITest.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/AccordFullMultiNodeSAITest.java @@ -18,11 +18,14 @@ package org.apache.cassandra.fuzz.sai; +import org.junit.Ignore; + import org.apache.cassandra.harry.SchemaSpec; import org.apache.cassandra.harry.gen.Generator; import org.apache.cassandra.harry.gen.SchemaGenerators; import org.apache.cassandra.service.consensus.TransactionalMode; +@Ignore("CASSANDRA-20567: Repair is failing due to missing SAI index files when using zero copy streaming") public class AccordFullMultiNodeSAITest extends MultiNodeSAITestBase { public AccordFullMultiNodeSAITest() diff --git a/test/distributed/org/apache/cassandra/fuzz/sai/AccordInteropMultiNodeSAITest.java b/test/distributed/org/apache/cassandra/fuzz/sai/AccordInteropMultiNodeSAITest.java index 6d507ecd5f21..ba937c0f919c 100644 --- a/test/distributed/org/apache/cassandra/fuzz/sai/AccordInteropMultiNodeSAITest.java +++ b/test/distributed/org/apache/cassandra/fuzz/sai/AccordInteropMultiNodeSAITest.java @@ -18,11 +18,14 @@ package org.apache.cassandra.fuzz.sai; +import org.junit.Ignore; + import org.apache.cassandra.harry.SchemaSpec; import org.apache.cassandra.harry.gen.Generator; import org.apache.cassandra.harry.gen.SchemaGenerators; import org.apache.cassandra.service.consensus.TransactionalMode; +@Ignore("CASSANDRA-20567: Repair is failing due to missing SAI index files when using zero copy streaming") public class AccordInteropMultiNodeSAITest extends MultiNodeSAITestBase { public AccordInteropMultiNodeSAITest() diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java index 5fbb93efca37..8d6bc0277331 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/AccordTopologyMixupTest.java @@ -148,7 +148,7 @@ private static Spec createSchemaSpec(RandomSource rs, Cluster cluster) private static CommandGen cqlOperations(Spec spec) { Gen select = (Gen) (Gen) fromQT(new ASTGenerators.SelectGenBuilder(spec.metadata).withLimit1().build()); - Gen mutation = (Gen) (Gen) fromQT(new ASTGenerators.MutationGenBuilder(spec.metadata).withoutTimestamp().withoutTtl().build()); + Gen mutation = (Gen) (Gen) fromQT(new ASTGenerators.MutationGenBuilder(spec.metadata).withoutTimestamp().withoutTtl().withAllowUpdateMultipleClusteringKeys(false).build()); Gen txn = (Gen) (Gen) fromQT(new ASTGenerators.TxnGenBuilder(spec.metadata).build()); Map, Integer> operations = new LinkedHashMap<>(); operations.put(select, 1); diff --git a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java index 9739d3ed5970..c139acbe3cd2 100644 --- a/test/unit/org/apache/cassandra/dht/BootStrapperTest.java +++ b/test/unit/org/apache/cassandra/dht/BootStrapperTest.java @@ -33,10 +33,8 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; -import org.apache.cassandra.CassandraTestBase; -import org.apache.cassandra.CassandraTestBase.PrepareServerNoRegister; -import org.apache.cassandra.CassandraTestBase.UseMurmur3Partitioner; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -50,6 +48,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.streaming.StreamOperation; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.membership.NodeId; @@ -58,15 +57,16 @@ import org.apache.cassandra.utils.Pair; import org.jboss.byteman.contrib.bmunit.BMRule; import org.jboss.byteman.contrib.bmunit.BMRules; +import org.jboss.byteman.contrib.bmunit.BMUnitRunner; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -@UseMurmur3Partitioner -@PrepareServerNoRegister -public class BootStrapperTest extends CassandraTestBase +@RunWith(BMUnitRunner.class) +public class BootStrapperTest { + static IPartitioner oldPartitioner; static Predicate originalAlivePredicate = RangeStreamer.ALIVE_PREDICATE; public static AtomicBoolean nonOptimizationHit = new AtomicBoolean(false); public static AtomicBoolean optimizationHit = new AtomicBoolean(false); @@ -88,6 +88,9 @@ public boolean isAlive(InetAddressAndPort ep) @BeforeClass public static void setup() throws ConfigurationException { + DatabaseDescriptor.daemonInitialization(); + oldPartitioner = StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); + ServerTestUtils.prepareServerNoRegister(); SchemaLoader.startGossiper(); SchemaLoader.schemaDefinition("BootStrapperTest"); RangeStreamer.ALIVE_PREDICATE = Predicates.alwaysTrue(); @@ -97,6 +100,7 @@ public static void setup() throws ConfigurationException @AfterClass public static void tearDown() { + DatabaseDescriptor.setPartitionerUnsafe(oldPartitioner); RangeStreamer.ALIVE_PREDICATE = originalAlivePredicate; } @@ -204,16 +208,16 @@ private RangeStreamer getRangeStreamer() throws UnknownHostException } return new RangeStreamer(metadata, - StreamOperation.BOOTSTRAP, - true, - DatabaseDescriptor.getNodeProximity(), - new StreamStateStore(), - mockFailureDetector, - false, - 1, - movements.left, - movements.right, - true); + StreamOperation.BOOTSTRAP, + true, + DatabaseDescriptor.getNodeProximity(), + new StreamStateStore(), + mockFailureDetector, + false, + 1, + movements.left, + movements.right, + true); } private boolean includesWraparound(Collection> toFetch) diff --git a/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java b/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java index 9162a87e8568..4be82d491c73 100644 --- a/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java +++ b/test/unit/org/apache/cassandra/exceptions/RequestFailureReasonTest.java @@ -40,10 +40,8 @@ public class RequestFailureReasonTest { 8, "NOT_CMS" }, { 9, "INVALID_ROUTING" }, { 10, "COORDINATOR_BEHIND" }, + { 11, "RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM" }, { 503, "INDEX_BUILD_IN_PROGRESS" }, - { 504, "RETRY_ON_DIFFERENT_TRANSACTION_SYSTEM" }, - { 505, "BOOTING" }, - { 506, "ACCORD_DISABLED" } }; @Test diff --git a/test/unit/org/apache/cassandra/utils/ASTGenerators.java b/test/unit/org/apache/cassandra/utils/ASTGenerators.java index 1279f60cb7fe..3e1a228db1ac 100644 --- a/test/unit/org/apache/cassandra/utils/ASTGenerators.java +++ b/test/unit/org/apache/cassandra/utils/ASTGenerators.java @@ -1004,6 +1004,7 @@ public Gen build() .withoutCas() .withoutTimestamp() .withoutTtl() + .withAllowUpdateMultipleClusteringKeys(false) .withReferences(new ArrayList<>(builder.allowedReferences())); if (!allowReferences) mutationBuilder.withReferences(Collections.emptyList()); From 348ffb0ba09f10893e8dedfbd69c950fb129ec53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tibor=20R=C3=A9p=C3=A1si?= Date: Wed, 19 Feb 2025 13:59:36 +0100 Subject: [PATCH 271/340] allow grant permission on virtual keyspaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Tibor Répási; reviewed by Francisco Guerrero, Maxwell Guo for CASSANDRA-20171 --- CHANGES.txt | 1 + .../org/apache/cassandra/auth/DataResource.java | 4 +++- .../apache/cassandra/auth/GrantAndRevokeTest.java | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 67672cd0c51f..4332b7555575 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) diff --git a/src/java/org/apache/cassandra/auth/DataResource.java b/src/java/org/apache/cassandra/auth/DataResource.java index c3f5b3210eae..32120df97a10 100644 --- a/src/java/org/apache/cassandra/auth/DataResource.java +++ b/src/java/org/apache/cassandra/auth/DataResource.java @@ -24,6 +24,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; /** * The primary type of resource in Cassandra. @@ -209,7 +210,8 @@ public boolean exists() case ROOT: return true; case KEYSPACE: - return Schema.instance.getKeyspaces().contains(keyspace); + return SchemaConstants.isVirtualSystemKeyspace(keyspace) || + Schema.instance.getKeyspaces().contains(keyspace); case TABLE: return Schema.instance.getTableMetadata(keyspace, table) != null; } diff --git a/test/unit/org/apache/cassandra/auth/GrantAndRevokeTest.java b/test/unit/org/apache/cassandra/auth/GrantAndRevokeTest.java index 3572fafb118a..c2bdc90857aa 100644 --- a/test/unit/org/apache/cassandra/auth/GrantAndRevokeTest.java +++ b/test/unit/org/apache/cassandra/auth/GrantAndRevokeTest.java @@ -40,6 +40,7 @@ import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.utils.Pair; +import org.apache.cassandra.service.CassandraDaemon; import static java.lang.String.format; import static org.apache.cassandra.schema.SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES; @@ -66,6 +67,7 @@ public static void setUpClass() DatabaseDescriptor.getRoleManager().setup(); DatabaseDescriptor.getAuthenticator().setup(); DatabaseDescriptor.getAuthorizer().setup(); + CassandraDaemon.getInstanceForTesting().setupVirtualKeyspaces(); } @After @@ -302,6 +304,18 @@ public void testGrantOnAllKeyspaces() throws Throwable } + @Test + public void testGrantOnVirtualKeyspaces() throws Throwable + { + Session superuser = session(SUPERUSER); + superuser.execute(String.format("CREATE ROLE %s WITH LOGIN = TRUE AND password='%s'", user, pass)); + + superuser.execute(String.format("GRANT SELECT PERMISSION ON KEYSPACE system_virtual_schema TO %s", user)); + superuser.execute(String.format("GRANT SELECT PERMISSION ON KEYSPACE system_views TO %s", user)); + superuser.execute(String.format("REVOKE SELECT PERMISSION ON KEYSPACE system_virtual_schema FROM %s", user)); + superuser.execute(String.format("REVOKE SELECT PERMISSION ON KEYSPACE system_views FROM %s", user)); + } + private void maybeReadSystemTables(Session session, boolean isSuper) throws Throwable { Set readableKeyspaces = new HashSet<>(Arrays.asList(SchemaConstants.SCHEMA_KEYSPACE_NAME, SchemaConstants.TRACE_KEYSPACE_NAME)); From f327b63db09a907206749a3c88aba38a4554e548 Mon Sep 17 00:00:00 2001 From: Branimir Lambov Date: Tue, 19 Nov 2024 12:41:41 +0200 Subject: [PATCH 272/340] Introduce SSTableSimpleScanner for compaction This removes the usage of index files during compaction and simplifies and improves the performance of compaction. patch by Branimir Lambov; reviewed by Sylvain Lebresne for CASSANDRA-20092 --- CHANGES.txt | 2 + .../apache/cassandra/cache/KeyCacheKey.java | 12 +- .../db/compaction/CompactionIterator.java | 2 +- .../io/compress/CompressionMetadata.java | 8 + .../io/sstable/SSTableIdentityIterator.java | 62 ++++++ .../io/sstable/format/SSTableReader.java | 142 +++++++++++-- .../io/sstable/format/SSTableScanner.java | 16 -- .../sstable/format/SSTableSimpleScanner.java | 195 +++++++++++++++++ .../io/sstable/format/big/BigTableReader.java | 37 ---- .../sstable/format/big/BigTableScanner.java | 21 -- .../io/sstable/format/bti/BtiTableReader.java | 23 -- .../sstable/format/bti/BtiTableScanner.java | 21 -- .../cassandra/tools/SSTablePartitions.java | 4 +- .../SSTableCorruptionDetectionTest.java | 33 +++ .../io/sstable/SSTableReaderTest.java | 198 ++++++++++++++++++ .../io/sstable/SSTableScannerTest.java | 125 +++++++++-- 16 files changed, 741 insertions(+), 160 deletions(-) create mode 100644 src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java diff --git a/CHANGES.txt b/CHANGES.txt index 4af9fb7b63aa..2e5aeed0b512 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ 5.0.5 * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) + * Fix marking an SSTable as suspected and BufferPool leakage in case of a corrupted SSTable read during a compaction (CASSANDRA-20396) + * Introduce SSTableSimpleScanner for compaction (CASSANDRA-20092) * Avoid purging deletions in RowFilter when reconciliation is required (CASSANDRA-20541) * Fixed multiple single-node SAI query bugs relating to static columns (CASSANDRA-20338) * Upgrade com.datastax.cassandra:cassandra-driver-core:3.11.5 to org.apache.cassandra:cassandra-driver-core:3.12.1 (CASSANDRA-17231) diff --git a/src/java/org/apache/cassandra/cache/KeyCacheKey.java b/src/java/org/apache/cassandra/cache/KeyCacheKey.java index ac6f1f969311..1a722b05e6c4 100644 --- a/src/java/org/apache/cassandra/cache/KeyCacheKey.java +++ b/src/java/org/apache/cassandra/cache/KeyCacheKey.java @@ -21,7 +21,6 @@ import java.util.Arrays; import java.util.Objects; -import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.ByteBufferUtil; @@ -31,9 +30,7 @@ public class KeyCacheKey extends CacheKey { public final Descriptor desc; - private static final long EMPTY_SIZE = ObjectSizes.measure(new KeyCacheKey(TableMetadata.builder("ks", "tab") - .addPartitionKeyColumn("pk", UTF8Type.instance) - .build(), null, ByteBufferUtil.EMPTY_BYTE_BUFFER)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new KeyCacheKey()); // keeping an array instead of a ByteBuffer lowers the overhead of the key cache working set, // without extra copies on lookup since client-provided key ByteBuffers will be array-backed already @@ -47,6 +44,13 @@ public KeyCacheKey(TableMetadata tableMetadata, Descriptor desc, ByteBuffer key) assert this.key != null; } + private KeyCacheKey() // Only for EMPTY_SIZE + { + super(null, null); + this.desc = null; + this.key = null; + } + public String toString() { return String.format("KeyCacheKey(%s, %s)", desc, ByteBufferUtil.bytesToHex(ByteBuffer.wrap(key))); diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 589cf39c77a4..00e3dee5af2a 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -290,7 +290,7 @@ private void updateBytesRead() { long n = 0; for (ISSTableScanner scanner : scanners) - n += scanner.getCurrentPosition(); + n += scanner.getBytesScanned(); bytesRead = n; } diff --git a/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java b/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java index 96b4ce841825..d5f5f05655e9 100644 --- a/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java +++ b/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java @@ -539,6 +539,14 @@ public String toString() { return String.format("Chunk", offset, length); } + + /** + * @return the end of the chunk in the file, including the checksum + */ + public long chunkEnd() + { + return offset + length + 4; + } } static class ChunkSerializer implements IVersionedSerializer diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java index cc201b4125c5..072a364af32c 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java @@ -46,6 +46,7 @@ public class SSTableIdentityIterator implements Comparable getPositionsForRanges(Collection> ranges) { @@ -753,27 +753,110 @@ public List getPositionsForRanges(Collection range : Range.normalize(ranges)) { assert !range.isWrapAround() || range.right.isMinimum(); - // truncate the range so it at most covers the sstable AbstractBounds bounds = Range.makeRowRange(range); - PartitionPosition leftBound = bounds.left.compareTo(first) > 0 ? bounds.left : first.getToken().minKeyBound(); - PartitionPosition rightBound = bounds.right.isMinimum() ? last.getToken().maxKeyBound() : bounds.right; + PartitionPositionBounds pb = getPositionsForBounds(bounds); + if (pb != null) + positions.add(pb); + } + return positions; + } - if (leftBound.compareTo(last) > 0 || rightBound.compareTo(first) < 0) - continue; + /** + * Get a list of data positions in this SSTable that correspond to the given list of bounds. This method will remove + * non-covered intervals, but will not correct order or overlap in the supplied list, e.g. if bounds overlap, the + * result will be sections of the data file that repeat the same positions. + * + * @return A sorted list of [offset,end) pairs corresponding to the given boundsList in the datafile for this + * SSTable. + */ + public List getPositionsForBoundsIterator(Iterator> boundsList) + { + // use the index to determine a minimal section for each range + List positions = new ArrayList<>(); + while (boundsList.hasNext()) + { + AbstractBounds bounds = boundsList.next(); + PartitionPositionBounds pb = getPositionsForBounds(bounds); + if (pb != null) + positions.add(pb); + } + return positions; + } - long left = getPosition(leftBound, Operator.GT); - long right = (rightBound.compareTo(last) > 0) - ? uncompressedLength() - : getPosition(rightBound, Operator.GT); + /** + * Determine the data positions in this SSTable that cover the given bounds. + * + * @return An [offset,end) pair that cover the given bounds in the datafile for this SSTable, or null if the range + * is not covered by the sstable or is empty. + */ + public PartitionPositionBounds getPositionsForBounds(AbstractBounds bounds) + { + long left = getPosition(bounds.left, bounds.inclusiveLeft() ? Operator.GE : Operator.GT); + // Note: getPosition will apply a moved start if the sstable is in MOVED_START state. + if (left < 0) // empty range + return null; - if (left == right) - // empty range - continue; + long right = bounds.right.isMinimum() ? -1 + : getPosition(bounds.right, bounds.inclusiveRight() ? Operator.GT + : Operator.GE); + if (right < 0) // right is beyond end + right = uncompressedLength(); // this should also be correct for EARLY readers + + if (left >= right) // empty range + return null; + + return new PartitionPositionBounds(left, right); + } + + /** + * Return an [offset,end) pair that covers the whole file. This could be null if the sstable's moved start has + * made the sstable effectively empty. + */ + public PartitionPositionBounds getPositionsForFullRange() + { + if (openReason != OpenReason.MOVED_START) + return new PartitionPositionBounds(0, uncompressedLength()); + else + { + // query a full range, so that the required adjustments can be applied + PartitionPosition minToken = getPartitioner().getMinimumToken().minKeyBound(); + return getPositionsForBounds(new Range<>(minToken, minToken)); + } + } - assert left < right : String.format("Range=%s openReason=%s first=%s last=%s left=%d right=%d", range, openReason, first, last, left, right); - positions.add(new PartitionPositionBounds(left, right)); + /** + * Calculate a total on-disk (compressed) size for the given partition positions. For uncompressed files this is + * equal to the sum of the size of the covered ranges. For compressed files this is the sum of the size of the + * chunks that contain the requested ranges and may be significantly bigger than the size of the requested ranges. + * + * @param positionBounds a list of [offset,end) pairs that specify the relevant sections of the data file; this must + * be non-overlapping and in ascending order. + */ + public long onDiskSizeForPartitionPositions(Collection positionBounds) + { + long total = 0; + if (!compression) + { + for (PartitionPositionBounds position : positionBounds) + total += position.upperPosition - position.lowerPosition; } - return positions; + else + { + final CompressionMetadata compressionMetadata = getCompressionMetadata(); + long lastEnd = 0; + for (PartitionPositionBounds position : positionBounds) + { + // The end of the chunk that contains the last required byte from the range. + long upperChunkEnd = compressionMetadata.chunkFor(position.upperPosition - 1).chunkEnd(); + // The start of the chunk that contains the first required byte from the range. + long lowerChunkStart = compressionMetadata.chunkFor(position.lowerPosition).offset; + if (lowerChunkStart < lastEnd) // if regions include the same chunk, count it only once + lowerChunkStart = lastEnd; + total += upperChunkEnd - lowerChunkStart; + lastEnd = upperChunkEnd; + } + } + return total; } /** @@ -965,11 +1048,18 @@ public ISSTableScanner getScanner(Range range) } /** - * Direct I/O SSTableScanner over the entirety of the sstable.. + * Direct I/O SSTableScanner over the entirety of the sstable. * * @return A Scanner over the full content of the SSTable. */ - public abstract ISSTableScanner getScanner(); + public ISSTableScanner getScanner() + { + PartitionPositionBounds fullRange = getPositionsForFullRange(); + if (fullRange != null) + return new SSTableSimpleScanner(this, Collections.singletonList(fullRange)); + else + return new SSTableSimpleScanner(this, Collections.emptyList()); + } /** * Direct I/O SSTableScanner over a defined collection of ranges of tokens. @@ -977,15 +1067,25 @@ public ISSTableScanner getScanner(Range range) * @param ranges the range of keys to cover * @return A Scanner for seeking over the rows of the SSTable. */ - public abstract ISSTableScanner getScanner(Collection> ranges); + public ISSTableScanner getScanner(Collection> ranges) + { + if (ranges != null) + return new SSTableSimpleScanner(this, getPositionsForRanges(ranges)); + else + return getScanner(); + } /** * Direct I/O SSTableScanner over an iterator of bounds. * - * @param rangeIterator the keys to cover + * @param boundsIterator the keys to cover * @return A Scanner for seeking over the rows of the SSTable. */ - public abstract ISSTableScanner getScanner(Iterator> rangeIterator); + public ISSTableScanner getScanner(Iterator> boundsIterator) + { + return new SSTableSimpleScanner(this, getPositionsForBoundsIterator(boundsIterator)); + } + /** * Create a {@link FileDataInput} for the data file of the sstable represented by this reader. This method returns diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java index 5136a06bca46..28035a85da0b 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableScanner.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -35,9 +34,7 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.AbstractBounds.Boundary; -import org.apache.cassandra.dht.Bounds; import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.ISSTableScanner; @@ -86,14 +83,6 @@ protected SSTableScanner(S sstable, this.listener = listener; } - protected static List> makeBounds(SSTableReader sstable, Collection> tokenRanges) - { - List> boundsList = new ArrayList<>(tokenRanges.size()); - for (Range range : Range.normalize(tokenRanges)) - addRange(sstable, Range.makeRowRange(range), boundsList); - return boundsList; - } - protected static List> makeBounds(SSTableReader sstable, DataRange dataRange) { List> boundsList = new ArrayList<>(2); @@ -101,11 +90,6 @@ protected static List> makeBounds(SSTableReade return boundsList; } - protected static AbstractBounds fullRange(SSTableReader sstable) - { - return new Bounds<>(sstable.getFirst(), sstable.getLast()); - } - private static void addRange(SSTableReader sstable, AbstractBounds requested, List> boundsList) { if (requested instanceof Range && ((Range) requested).isWrapAround()) diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java new file mode 100644 index 000000000000..1789aff72ca5 --- /dev/null +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.io.sstable.format; + +import java.util.Collection; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; +import org.apache.cassandra.io.sstable.CorruptSSTableException; +import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.SSTableIdentityIterator; +import org.apache.cassandra.io.util.RandomAccessReader; +import org.apache.cassandra.schema.TableMetadata; + +import static org.apache.cassandra.io.sstable.format.SSTableReader.PartitionPositionBounds; + +/// Simple SSTable scanner that reads sequentially through an SSTable without using the index. +/// +/// This is a significant improvement for the performance of compaction over using the full-blown DataRange-capable +/// [SSTableScanner] and enables correct calculation of data sizes to process. +public class SSTableSimpleScanner +implements ISSTableScanner +{ + private final AtomicBoolean isClosed = new AtomicBoolean(false); + private final RandomAccessReader dfile; + private final SSTableReader sstable; + + private final Iterator rangeIterator; + + private long bytesScannedInPreviousRanges; + + private final long sizeInBytes; + private final long compressedSizeInBytes; + + private long currentEndPosition; + private long currentStartPosition; + + private SSTableIdentityIterator currentIterator; + private DecoratedKey lastKey; + + /// Create a new simple scanner over the given sstables and the given ranges of uncompressed positions. + /// Each range must start and end on a partition boundary, and, to satisfy the contract of [ISSTableScanner], the + /// ranges must be non-overlapping and in ascending order. This scanner will throw an [IllegalArgumentException] if + /// the latter is not true. + /// + /// The ranges can be constructed by [SSTableReader#getPositionsForRanges] and similar methods as done by the + /// various [SSTableReader#getScanner] variations. + public SSTableSimpleScanner(SSTableReader sstable, + Collection boundsList) + { + assert sstable != null; + + this.dfile = sstable.openDataReaderForScan(); + this.sstable = sstable; + this.sizeInBytes = boundsList.stream().mapToLong(ppb -> ppb.upperPosition - ppb.lowerPosition).sum(); + this.compressedSizeInBytes = sstable.compression ? sstable.onDiskSizeForPartitionPositions(boundsList) : sizeInBytes; + this.rangeIterator = boundsList.iterator(); + this.currentEndPosition = 0; + this.currentStartPosition = 0; + this.bytesScannedInPreviousRanges = 0; + this.currentIterator = null; + this.lastKey = null; + } + + public void close() + { + if (isClosed.compareAndSet(false, true)) + { + // ensure we report what we have actually processed + bytesScannedInPreviousRanges += dfile.getFilePointer() - currentStartPosition; + dfile.close(); + // close() may change the file pointer, update so that the difference is 0 when reported by getBytesScanned() + currentStartPosition = dfile.getFilePointer(); + } + } + + @Override + public long getLengthInBytes() + { + return sizeInBytes; + } + + + public long getCompressedLengthInBytes() + { + return compressedSizeInBytes; + } + + @Override + public long getCurrentPosition() + { + return dfile.getFilePointer(); + } + + public long getBytesScanned() + { + return bytesScannedInPreviousRanges + dfile.getFilePointer() - currentStartPosition; + } + + @Override + public Set getBackingSSTables() + { + return ImmutableSet.of(sstable); + } + + public TableMetadata metadata() + { + return sstable.metadata(); + } + + public boolean hasNext() + { + if (currentIterator != null) + { + currentIterator.close(); // Ensure that the iterator cannot be used further. No op if already closed. + + // Row iterator must be exhausted to advance to next partition + currentIterator.exhaust(); + currentIterator = null; + } + + if (dfile.getFilePointer() < currentEndPosition) + return true; + + return advanceRange(); + } + + boolean advanceRange() + { + if (!rangeIterator.hasNext()) + return false; + + bytesScannedInPreviousRanges += currentEndPosition - currentStartPosition; + + PartitionPositionBounds nextRange = rangeIterator.next(); + if (currentEndPosition > nextRange.lowerPosition) + throw new IllegalArgumentException("Ranges supplied to SSTableSimpleScanner must be non-overlapping and in ascending order."); + + currentEndPosition = nextRange.upperPosition; + currentStartPosition = nextRange.lowerPosition; + dfile.seek(currentStartPosition); + return true; + } + + public UnfilteredRowIterator next() + { + if (!hasNext()) + throw new NoSuchElementException(); + + currentIterator = SSTableIdentityIterator.create(sstable, dfile, false); + DecoratedKey currentKey = currentIterator.partitionKey(); + if (lastKey != null && lastKey.compareTo(currentKey) >= 0) + { + sstable.markSuspect(); + throw new CorruptSSTableException(new IllegalStateException(String.format("Invalid key order: current %s <= previous %s", + currentKey, + lastKey)), + sstable.getFilename()); + } + lastKey = currentKey; + return currentIterator; + } + + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() + { + return String.format("%s(sstable=%s)", getClass().getSimpleName(), sstable); + } +} diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java index b58dbc532eaf..692cadf34df4 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -46,7 +45,6 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.db.rows.UnfilteredRowIteratorWithLowerBound; import org.apache.cassandra.db.rows.UnfilteredRowIterators; -import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.AbstractRowIndexEntry; @@ -157,41 +155,6 @@ public KeyReader keyReader() throws IOException return BigTableKeyReader.create(ifile, rowIndexEntrySerializer); } - /** - * Direct I/O SSTableScanner over an iterator of bounds. - * - * @param boundsIterator the keys to cover - * @return A Scanner for seeking over the rows of the SSTable. - */ - public ISSTableScanner getScanner(Iterator> boundsIterator) - { - return BigTableScanner.getScanner(this, boundsIterator); - } - - /** - * Direct I/O SSTableScanner over the full sstable. - * - * @return A Scanner for reading the full SSTable. - */ - public ISSTableScanner getScanner() - { - return BigTableScanner.getScanner(this); - } - - /** - * Direct I/O SSTableScanner over a defined collection of ranges of tokens. - * - * @param ranges the range of keys to cover - * @return A Scanner for seeking over the rows of the SSTable. - */ - public ISSTableScanner getScanner(Collection> ranges) - { - if (ranges != null) - return BigTableScanner.getScanner(this, ranges); - else - return getScanner(); - } - /** * Finds and returns the first key beyond a given token in this SSTable or null if no such key exists. */ diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java index 887d99784665..83243529c4c9 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java @@ -18,11 +18,8 @@ package org.apache.cassandra.io.sstable.format.big; import java.io.IOException; -import java.util.Collection; import java.util.Iterator; -import com.google.common.collect.Iterators; - import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -30,8 +27,6 @@ import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.AbstractBounds; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.SSTable; @@ -50,12 +45,6 @@ public class BigTableScanner extends SSTableScanner> tokenRanges) - { - return getScanner(sstable, makeBounds(sstable, tokenRanges).iterator()); - } - - public static ISSTableScanner getScanner(BigTableReader sstable, Iterator> rangeIterator) - { - return new BigTableScanner(sstable, ColumnFilter.all(sstable.metadata()), null, rangeIterator, SSTableReadsListener.NOOP_LISTENER); - } - private BigTableScanner(BigTableReader sstable, ColumnFilter columns, DataRange dataRange, diff --git a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java index c5571e7fbbe3..9a65be1137bb 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableReader.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Iterator; import java.util.List; import com.google.common.annotations.VisibleForTesting; @@ -45,7 +44,6 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.sstable.Descriptor; -import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.IVerifier; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableReadsListener; @@ -382,27 +380,6 @@ public UnfilteredRowIterator rowIterator(FileDataInput dataFileInput, return new SSTableIterator(this, dataFileInput, key, indexEntry, slices, selectedColumns, rowIndexFile); } - @Override - public ISSTableScanner getScanner() - { - return BtiTableScanner.getScanner(this); - } - - @Override - public ISSTableScanner getScanner(Collection> ranges) - { - if (ranges != null) - return BtiTableScanner.getScanner(this, ranges); - else - return getScanner(); - } - - @Override - public ISSTableScanner getScanner(Iterator> rangeIterator) - { - return BtiTableScanner.getScanner(this, rangeIterator); - } - @VisibleForTesting @Override public BtiTableReader cloneAndReplace(IFilter filter) diff --git a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableScanner.java b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableScanner.java index a9f862c68b50..4507ccf7f5e4 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/bti/BtiTableScanner.java @@ -19,11 +19,8 @@ import java.io.Closeable; import java.io.IOException; -import java.util.Collection; import java.util.Iterator; -import com.google.common.collect.Iterators; - import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -31,20 +28,12 @@ import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.AbstractBounds; -import org.apache.cassandra.dht.Range; -import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.SSTableReadsListener; import org.apache.cassandra.io.sstable.format.SSTableScanner; import org.apache.cassandra.io.util.FileUtils; public class BtiTableScanner extends SSTableScanner { - // Full scan of the sstables - public static BtiTableScanner getScanner(BtiTableReader sstable) - { - return getScanner(sstable, Iterators.singletonIterator(fullRange(sstable))); - } - public static BtiTableScanner getScanner(BtiTableReader sstable, ColumnFilter columns, DataRange dataRange, @@ -53,16 +42,6 @@ public static BtiTableScanner getScanner(BtiTableReader sstable, return new BtiTableScanner(sstable, columns, dataRange, makeBounds(sstable, dataRange).iterator(), listener); } - public static BtiTableScanner getScanner(BtiTableReader sstable, Collection> tokenRanges) - { - return getScanner(sstable, makeBounds(sstable, tokenRanges).iterator()); - } - - public static BtiTableScanner getScanner(BtiTableReader sstable, Iterator> rangeIterator) - { - return new BtiTableScanner(sstable, ColumnFilter.all(sstable.metadata()), null, rangeIterator, SSTableReadsListener.NOOP_LISTENER); - } - private BtiTableScanner(BtiTableReader sstable, ColumnFilter columns, DataRange dataRange, diff --git a/src/java/org/apache/cassandra/tools/SSTablePartitions.java b/src/java/org/apache/cassandra/tools/SSTablePartitions.java index 2181346271bb..b435994bbaff 100644 --- a/src/java/org/apache/cassandra/tools/SSTablePartitions.java +++ b/src/java/org/apache/cassandra/tools/SSTablePartitions.java @@ -369,13 +369,15 @@ private static void processSSTable(String[] keys, { while (scanner.hasNext()) { + // hasNext() positions us on the next partition, next() has to advance to read its header. + long startOfPartition = scanner.getCurrentPosition(); try (UnfilteredRowIterator partition = scanner.next()) { ByteBuffer key = partition.partitionKey().getKey(); boolean isExcluded = excludedKeys.contains(metadata.partitionKeyType.getString(key)); PartitionStats partitionStats = new PartitionStats(key, - scanner.getCurrentPosition(), + startOfPartition, partition.partitionLevelDeletion().isLive()); // Consume the partition to populate the stats. diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java b/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java index 21ac51ee865d..c631d5d09979 100644 --- a/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java @@ -37,6 +37,7 @@ import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.Slices; @@ -156,6 +157,12 @@ public void testSSTableScanner() throws Throwable bruteForceCorruptionTest(ssTableReader, sstableScanner()); } + @Test + public void testSSTableSimpleScanner() throws Throwable + { + bruteForceCorruptionTest(ssTableReader, sstableSimpleScanner()); + } + private void bruteForceCorruptionTest(SSTableReader ssTableReader, Consumer walker) throws Throwable { FileChannel fc = new File(ssTableReader.getFilename()).newReadWriteChannel(); @@ -193,6 +200,32 @@ private void bruteForceCorruptionTest(SSTableReader ssTableReader, Consumer sstableScanner() + { + return (SSTableReader sstable) -> { + try (var scanner = sstable.partitionIterator(ColumnFilter.NONE, DataRange.allData(sstable.getPartitioner()), SSTableReadsListener.NOOP_LISTENER)) + { + while (scanner.hasNext()) + { + try (UnfilteredRowIterator rowIter = scanner.next()) + { + if (rowIter.hasNext()) + { + Unfiltered unfiltered = rowIter.next(); + if (unfiltered.isRow()) + { + Row row = (Row) unfiltered; + assertEquals(2, row.clustering().size()); + // no-op read + } + } + } + + } + } + }; + } + + private Consumer sstableSimpleScanner() { return (SSTableReader sstable) -> { try (ISSTableScanner scanner = sstable.getScanner()) diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableReaderTest.java b/test/unit/org/apache/cassandra/io/sstable/SSTableReaderTest.java index 9cb7ca3d14f5..f17301e064bd 100644 --- a/test/unit/org/apache/cassandra/io/sstable/SSTableReaderTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableReaderTest.java @@ -30,8 +30,11 @@ import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import org.junit.Assume; import org.junit.BeforeClass; @@ -89,6 +92,7 @@ import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; +import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -178,6 +182,200 @@ public void testGetPositionsForRanges() } } + @Test + public void testOnDiskSizeForRanges() + { + ColumnFamilyStore store = discardSSTables(KEYSPACE1, CF_STANDARD2); + partitioner = store.getPartitioner(); + int count = 1000; + + // insert data and compact to a single sstable + for (int j = 0; j < count; j++) + { + new RowUpdateBuilder(store.metadata(), 15000, k0(j)) + .clustering("0") + .add("val", ByteBufferUtil.EMPTY_BYTE_BUFFER) + .build() + .applyUnsafe(); + } + store.forceBlockingFlush(UNIT_TESTS); + store.forceMajorCompaction(); + + SSTableReader sstable = store.getLiveSSTables().iterator().next(); + + // Non-compression-dependent checks + // Check several ways of going through the whole file + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(0), 1)), t0(count - 1))))); + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(sstable.getPartitioner().getMinimumToken(), + sstable.getPartitioner().getMinimumToken())))); + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(sstable.getPartitioner().getMinimumToken(), + sstable.getLast().getToken())))); + + // Split at exact match + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t0(347)), + new Range<>(t0(347), t0(count - 1))))); + + // Split at different prefixes pointing to the same position + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t(cut(k0(600), 2))), + new Range<>(t(cut(k0(600), 1)), t0(count - 1))))); + + // Size one row + double oneRowSize = sstable.uncompressedLength() * 1.0 / count; + System.out.println("One row size: " + oneRowSize); + + if (!sstable.compression) + { + double delta = 0.9; + + // Ranges are end-inclusive, indexes are adjusted by one here to account for that. + assertEquals((52 - 38), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t0(37), t0(51)))) / oneRowSize, + delta); + + // Try non-matching positions (inexact indexes are not adjusted for the count). + assertEquals((34 - 30), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(30), 1)), + t0(33)))) / oneRowSize, + delta); + + assertEquals((700 - 554), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t0(553), + t(cut(k0(700), 2))))) / oneRowSize, + delta); + + assertEquals((500 - 30), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(30), 1)), + t(cut(k0(500), 2))))) / oneRowSize, + delta); + + // Try a list + List> ranges = ImmutableList.of(new Range<>(t0(37), t0(51)), + new Range<>(t0(71), t(cut(k0(100), 2))), + new Range<>(t(cut(k0(230), 1)), t0(243)), + new Range<>(t(cut(k0(260), 1)), t(cut(k0(300), 2))), + new Range<>(t0(373), t0(382)), + new Range<>(t0(382), t0(385)), + new Range<>(t(cut(k0(400), 2)), t(cut(k0(400), 1))), // empty range + new Range<>(t0(563), t(cut(k0(600), 2))), // touching ranges + new Range<>(t(cut(k0(600), 1)), t0(621)) + ); + assertEquals((52 - 38 + 100 - 72 + 244 - 230 + 300 - 260 + 383 - 374 + 386 - 383 + 400 - 400 + 622 - 564), + onDiskSizeForRanges(sstable, ranges) / oneRowSize, + delta); + + // Check going through the whole file + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(0), 1)), t0(count - 1))))); + + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t0(347)), + new Range<>(t0(347), t0(count - 1))))); + + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t(cut(k0(600), 2))), + new Range<>(t(cut(k0(600), 1)), t0(count - 1))))); + } + else + { + // It's much harder to test with compression. + + // Check first three rows have the same size (they must be in the same chunk) + final long row0size = onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(0), 1)), t0(0)))); + assertEquals(row0size, onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t0(0), t0(1))))); + assertEquals(row0size, onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t0(1), t0(2))))); + + // As well as the first three rows together + assertEquals(row0size, onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(t(cut(k0(0), 1)), t0(2))))); + + // And also when we query for them in separate ranges + assertEquals(row0size, onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t0(0)), + new Range<>(t0(0), t0(1))))); + assertEquals(row0size, onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t0(0)), + new Range<>(t0(1), t0(2))))); + assertEquals(row0size, onDiskSizeForRanges(sstable, ImmutableList.of(new Range<>(t(cut(k0(0), 1)), t0(0)), + new Range<>(t0(0), t0(1)), + new Range<>(t0(1), t0(2))))); + + // Finally, check that if we query for every second row we get the total size of the file. + assertEquals(sstable.onDiskLength(), + onDiskSizeForRanges(sstable, IntStream.range(0, count) + .filter(i -> i % 2 != 0) + .mapToObj(i -> new Range<>(t0(i), t0(i + 1))) + .collect(Collectors.toList()))); + } + } + + + @Test + public void testOnDiskSizeCompressedBoundaries() + { + ColumnFamilyStore store = discardSSTables(KEYSPACE1, CF_COMPRESSED); + partitioner = store.getPartitioner(); + int count = 100000; + // Use a longish string to let a key align with a chunk boundary + ByteBuffer dataBuf = ByteBufferUtil.bytes(String.format("%43d", 123)); + + // insert data and compact to a single sstable + for (int j = 0; j < count; j++) + { + new RowUpdateBuilder(store.metadata(), 15000, k0(j)) + .clustering("0") + .add("val", dataBuf) + .build() + .applyUnsafe(); + } + store.forceBlockingFlush(UNIT_TESTS); + store.forceMajorCompaction(); + + SSTableReader sstable = store.getLiveSSTables().iterator().next(); + + int chunkLength = sstable.getCompressionMetadata().chunkLength(); + System.out.println("Chunk length: " + chunkLength); + int[] alignedKeys = IntStream.range(0, count).filter(i -> (sstable.getPosition(dk0(i), SSTableReader.Operator.EQ) & (chunkLength - 1)) == 0).toArray(); + assertTrue("Test needs an aligned key, try changing the length of dataBuf", alignedKeys.length > 1); + for (int k : alignedKeys) + assertEquals("Coverage must not include chunk starting at end position", + sstable.getCompressionMetadata().chunkFor(sstable.getPosition(dk0(k), SSTableReader.Operator.EQ)).offset, + onDiskSizeForRanges(sstable, Collections.singleton(new Range<>(partitioner.getMinimumToken(), t0(k - 1))))); // inclusive end + } + + + long onDiskSizeForRanges(SSTableReader sstable, Collection> ranges) + { + return sstable.onDiskSizeForPartitionPositions(sstable.getPositionsForRanges(ranges)); + } + + private Token t(String key) + { + return partitioner.getToken(ByteBufferUtil.bytes(key)); + } + + private String k0(int k) + { + return String.format("%08d", k); + } + + private Token t0(int k) + { + return t(k0(k)); + } + + private DecoratedKey dk0(int k) + { + return partitioner.decorateKey(ByteBufferUtil.bytes(k0(k))); + } + + private String cut(String s, int n) + { + return s.substring(0, s.length() - n); + } + + @Test public void testSpannedIndexPositions() throws IOException { diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java b/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java index 17b8a6cbb2af..73195b0617ab 100644 --- a/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java @@ -21,10 +21,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.function.Consumer; +import java.util.function.Function; import com.google.common.collect.Iterables; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -40,6 +43,7 @@ import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; +import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.ByteOrderedPartitioner; @@ -49,6 +53,7 @@ import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.ByteBufferUtil; +import org.hamcrest.Matchers; import static org.apache.cassandra.dht.AbstractBounds.isEmpty; import static org.junit.Assert.assertEquals; @@ -180,6 +185,12 @@ private static void insertRowWithKey(TableMetadata metadata, int key) } private static void assertScanMatches(SSTableReader sstable, int scanStart, int scanEnd, int ... boundaries) + { + assertScanMatchesUsingScanner(sstable, scanStart, scanEnd, boundaries); + assertScanMatchesUsingSimple(sstable, scanStart, scanEnd, boundaries); + } + + private static void assertScanMatchesUsingScanner(SSTableReader sstable, int scanStart, int scanEnd, int ... boundaries) { assert boundaries.length % 2 == 0; for (DataRange range : dataRanges(sstable.metadata(), scanStart, scanEnd)) @@ -200,6 +211,28 @@ private static void assertScanMatches(SSTableReader sstable, int scanStart, int } } + private static void assertScanMatchesUsingSimple(SSTableReader sstable, int scanStart, int scanEnd, int ... boundaries) + { + assert boundaries.length % 2 == 0; + for (DataRange range : dataRanges(sstable.metadata(), scanStart, scanEnd)) + { + if (range.isWrapAround() && !range.keyRange().right.isMinimum()) // getScanner on AbstractBounds does not handle wraparounds + continue; + + try(UnfilteredPartitionIterator scanner = sstable.getScanner(Collections.singleton(range.keyRange()).iterator())) + { + for (int b = 0; b < boundaries.length; b += 2) + for (int i = boundaries[b]; i <= boundaries[b + 1]; i++) + assertEquals(toKey(i), new String(scanner.next().partitionKey().getKey().array())); + assertFalse(scanner.hasNext()); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + } + } + private static void assertScanEmpty(SSTableReader sstable, int scanStart, int scanEnd) { assertScanMatches(sstable, scanStart, scanEnd); @@ -547,7 +580,30 @@ public void testSingleKeyMultipleRanges() throws IOException assertScanContainsRanges(scanner, 205, 205); } - private static void testRequestNextRowIteratorWithoutConsumingPrevious(Consumer consumer) + private static void testRequestNextRowIteratorWithoutConsumingPrevious(Function makeScanner, + Consumer requestNext, + String messagePattern) + { + final SSTableReader sstable = prepareSmallSSTable(); + + try (UnfilteredPartitionIterator scanner = makeScanner.apply(sstable); + UnfilteredRowIterator currentRowIterator = scanner.next()) + { + assertTrue(currentRowIterator.hasNext()); + try + { + requestNext.accept(scanner); + currentRowIterator.next(); + fail("Should have thrown IllegalStateException"); + } + catch (IllegalStateException e) + { + Assert.assertThat(e.getMessage(), Matchers.matchesPattern(messagePattern)); + } + } + } + + private static SSTableReader prepareSmallSSTable() { Keyspace keyspace = Keyspace.open(KEYSPACE); ColumnFamilyStore store = keyspace.getColumnFamilyStore(TABLE); @@ -557,38 +613,77 @@ private static void testRequestNextRowIteratorWithoutConsumingPrevious(Consumer< store.disableAutoCompaction(); insertRowWithKey(store.metadata(), 0); + insertRowWithKey(store.metadata(), 3); store.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); assertEquals(1, store.getLiveSSTables().size()); SSTableReader sstable = store.getLiveSSTables().iterator().next(); + return sstable; + } - try (ISSTableScanner scanner = sstable.getScanner(); - UnfilteredRowIterator currentRowIterator = scanner.next()) + @Test + public void testSimpleHasNextRowIteratorWithoutConsumingPrevious() + { + testRequestNextRowIteratorWithoutConsumingPrevious(SSTableReader::getScanner, + UnfilteredPartitionIterator::hasNext, + "Iterator used after closing."); + } + + @Test + public void testSimpleNextRowIteratorWithoutConsumingPrevious() + { + testRequestNextRowIteratorWithoutConsumingPrevious(SSTableReader::getScanner, + UnfilteredPartitionIterator::next, + "Iterator used after closing."); + } + + @Test + public void testHasNextRowIteratorWithoutConsumingPrevious() + { + testRequestNextRowIteratorWithoutConsumingPrevious(r -> r.partitionIterator(ColumnFilter.NONE, DataRange.allData(r.getPartitioner()), SSTableReadsListener.NOOP_LISTENER), + UnfilteredPartitionIterator::hasNext, + ".*UnfilteredRowIterator.*must be closed.*"); + } + + @Test + public void testNextRowIteratorWithoutConsumingPrevious() + { + testRequestNextRowIteratorWithoutConsumingPrevious(r -> r.partitionIterator(ColumnFilter.NONE, DataRange.allData(r.getPartitioner()), SSTableReadsListener.NOOP_LISTENER), + UnfilteredPartitionIterator::next, + ".*UnfilteredRowIterator.*must be closed.*"); + } + + private static void testRequestNextRowIteratorAfterClosingPrevious(Function makeScanner) + { + final SSTableReader sstable = prepareSmallSSTable(); + + try (UnfilteredPartitionIterator scanner = makeScanner.apply(sstable)) { - assertTrue(currentRowIterator.hasNext()); - try + try (UnfilteredRowIterator p = scanner.next()) { - consumer.accept(scanner); - fail("Should have thrown IllegalStateException"); + assertEquals(toKey(0), new String(p.partitionKey().getKey().array())); + // do not read it, but close it } - catch (IllegalStateException e) + + try (UnfilteredRowIterator p = scanner.next()) { - assertEquals("The UnfilteredRowIterator returned by the last call to next() was initialized: " + - "it must be closed before calling hasNext() or next() again.", - e.getMessage()); + assertEquals(toKey(3), new String(p.partitionKey().getKey().array())); + assertTrue(p.hasNext()); + assertTrue(p.next() instanceof Row); } } } + @Test - public void testHasNextRowIteratorWithoutConsumingPrevious() + public void testSimpleRequestNextRowIteratorAfterClosingPreviouss() { - testRequestNextRowIteratorWithoutConsumingPrevious(ISSTableScanner::hasNext); + testRequestNextRowIteratorAfterClosingPrevious(SSTableReader::getScanner); } @Test - public void testNextRowIteratorWithoutConsumingPrevious() + public void testRequestNextRowIteratorAfterClosingPrevious() { - testRequestNextRowIteratorWithoutConsumingPrevious(ISSTableScanner::next); + testRequestNextRowIteratorAfterClosingPrevious(r -> r.partitionIterator(ColumnFilter.NONE, DataRange.allData(r.getPartitioner()), SSTableReadsListener.NOOP_LISTENER)); } } From 17cb89208c804680ffd4445d6a826171a67edb79 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Fri, 7 Mar 2025 23:24:22 +0300 Subject: [PATCH 273/340] Fix marking an SSTable as suspected and BufferPool leakage in case of a corrupted SSTable read during a compaction Patch by Dmitry Konstantinov; reviewed by Branimir Lambov for CASSANDRA-20396 --- .../apache/cassandra/cache/ChunkCache.java | 12 +++++- .../io/sstable/SSTableIdentityIterator.java | 30 +++++++++++++ .../sstable/format/SSTableSimpleScanner.java | 42 ++++++++++++++----- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/java/org/apache/cassandra/cache/ChunkCache.java b/src/java/org/apache/cassandra/cache/ChunkCache.java index e7e50296accf..5fb0bc6a2f90 100644 --- a/src/java/org/apache/cassandra/cache/ChunkCache.java +++ b/src/java/org/apache/cassandra/cache/ChunkCache.java @@ -162,8 +162,16 @@ public Buffer load(Key key) { ByteBuffer buffer = bufferPool.get(key.file.chunkSize(), key.file.preferredBufferType()); assert buffer != null; - key.file.readChunk(key.position, buffer); - return new Buffer(buffer, key.position); + try + { + key.file.readChunk(key.position, buffer); + return new Buffer(buffer, key.position); + } + catch (Throwable t) + { + bufferPool.put(buffer); + throw t; + } } @Override diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java index 072a364af32c..d5a1ae8bccc5 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java @@ -76,6 +76,11 @@ public static SSTableIdentityIterator create(SSTableReader sstable, RandomAccess sstable.markSuspect(); throw new CorruptSSTableException(e, file.getPath()); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } } public static SSTableIdentityIterator create(SSTableReader sstable, FileDataInput dfile, long dataPosition, DecoratedKey key, boolean tombstoneOnly) @@ -99,6 +104,11 @@ public static SSTableIdentityIterator create(SSTableReader sstable, FileDataInpu sstable.markSuspect(); throw new CorruptSSTableException(e, dfile.getPath()); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } } public static SSTableIdentityIterator create(SSTableReader sstable, FileDataInput dfile, boolean tombstoneOnly) @@ -121,6 +131,11 @@ public static SSTableIdentityIterator create(SSTableReader sstable, FileDataInpu sstable.markSuspect(); throw new CorruptSSTableException(e, dfile.getPath()); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } } public TableMetadata metadata() @@ -164,6 +179,11 @@ public boolean hasNext() sstable.markSuspect(); throw new CorruptSSTableException(e, filename); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } catch (IOError e) { if (e.getCause() instanceof IOException) @@ -192,6 +212,11 @@ public Unfiltered next() sstable.markSuspect(); throw new CorruptSSTableException(e, filename); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } catch (IOError e) { if (e.getCause() instanceof IOException) @@ -240,6 +265,11 @@ public void exhaust() sstable.markSuspect(); throw new CorruptSSTableException(e, filename); } + catch (CorruptSSTableException e) // to ensure that we marked the sstable as suspected if CorruptSSTableException is thrown from lower levels + { + sstable.markSuspect(); + throw e; + } catch (IOError e) { if (e.getCause() instanceof IOException) diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java index 1789aff72ca5..a649fbea4c33 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java @@ -17,6 +17,8 @@ */ package org.apache.cassandra.io.sstable.format; +import java.io.IOError; +import java.io.IOException; import java.util.Collection; import java.util.Iterator; import java.util.NoSuchElementException; @@ -148,19 +150,39 @@ public boolean hasNext() boolean advanceRange() { - if (!rangeIterator.hasNext()) - return false; + try + { + if (!rangeIterator.hasNext()) + return false; - bytesScannedInPreviousRanges += currentEndPosition - currentStartPosition; + bytesScannedInPreviousRanges += currentEndPosition - currentStartPosition; - PartitionPositionBounds nextRange = rangeIterator.next(); - if (currentEndPosition > nextRange.lowerPosition) - throw new IllegalArgumentException("Ranges supplied to SSTableSimpleScanner must be non-overlapping and in ascending order."); + PartitionPositionBounds nextRange = rangeIterator.next(); + if (currentEndPosition > nextRange.lowerPosition) + throw new IllegalArgumentException("Ranges supplied to SSTableSimpleScanner must be non-overlapping and in ascending order."); - currentEndPosition = nextRange.upperPosition; - currentStartPosition = nextRange.lowerPosition; - dfile.seek(currentStartPosition); - return true; + currentEndPosition = nextRange.upperPosition; + currentStartPosition = nextRange.lowerPosition; + dfile.seek(currentStartPosition); + return true; + } + catch (CorruptSSTableException e) + { + sstable.markSuspect(); + throw e; + } + catch (IOError e) + { + if (e.getCause() instanceof IOException) + { + sstable.markSuspect(); + throw new CorruptSSTableException((Exception)e.getCause(), sstable.getFilename()); + } + else + { + throw e; + } + } } public UnfilteredRowIterator next() From 91bc34fbf86f65d8872c9610fdb7b63561190894 Mon Sep 17 00:00:00 2001 From: Naren Sreedhara Date: Thu, 1 Aug 2024 10:43:46 -0400 Subject: [PATCH 274/340] Added tab-ahead support for new built-in functions patch by Brad Schoening, Naren Sreedhara; reviewed by Stefan Miklosovic, Bernardo Botella Corbi for CASSANDRA-19631 --- CHANGES.txt | 1 + .../cassandra/examples/CQL/to_date.cql | 1 + .../pages/developing/cql/functions.adoc | 11 ++- pylib/cqlshlib/cql3handling.py | 77 +++++++++++++++++-- pylib/cqlshlib/cqlshmain.py | 2 +- pylib/cqlshlib/test/test_cqlsh_completion.py | 65 +++++++++++++--- 6 files changed, 136 insertions(+), 21 deletions(-) create mode 100644 doc/modules/cassandra/examples/CQL/to_date.cql diff --git a/CHANGES.txt b/CHANGES.txt index 5a60651cfc22..764246421ab8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631) * Introduce metadata serialization version V4 (CASSANDRA-19970) * Allow CMS reconfiguration to work around DOWN nodes (CASSANDRA-19943) * Make TableParams.Serializer set allowAutoSnapshots and incrementalBackups (CASSANDRA-19954) diff --git a/doc/modules/cassandra/examples/CQL/to_date.cql b/doc/modules/cassandra/examples/CQL/to_date.cql new file mode 100644 index 000000000000..160dcaab6726 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/to_date.cql @@ -0,0 +1 @@ +SELECT id, to_date(create_ts) FROM myTable diff --git a/doc/modules/cassandra/pages/developing/cql/functions.adoc b/doc/modules/cassandra/pages/developing/cql/functions.adoc index 75786de271a3..d74beb1446fc 100644 --- a/doc/modules/cassandra/pages/developing/cql/functions.adoc +++ b/doc/modules/cassandra/pages/developing/cql/functions.adoc @@ -184,12 +184,12 @@ time where the function is invoked: |=== |Function name |Output type -| `current_timestamp` | `timestamp` - | `current_date` | `date` | `current_time` | `time` +| `current_timestamp` | `timestamp` + | `current_timeuuid` | `timeUUID` |=== @@ -223,6 +223,13 @@ A number of functions are provided to convert a `timeuuid`, a `timestamp` or a ` | `to_unix_timestamp` | `date` | Converts the `date` argument into a `bigInt` raw value |=== +For example, a timestamp can be converted to a date with the following: + +[source,cql] +---- +include::cassandra:example$CQL/to_date.cql[] +---- + ==== Blob conversion functions A number of functions are provided to convert the native types into diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index 457582f3d379..101c39ca6cde 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -602,7 +602,7 @@ def cf_prop_val_mapender_completer(ctxt, cass): @completer_for('tokenDefinition', 'token') def token_word_completer(ctxt, cass): - return ['token('] + return ['TOKEN'] @completer_for('simpleStorageType', 'typename') @@ -741,12 +741,13 @@ def working_on_keyspace(ctxt): ; ::= ( "AND" )* ; - ::= [rel_lhs]= ( "[" "]" )? ( "=" | "<" | ">" | "<=" | ">=" | "!=" | ( "NOT" )? "CONTAINS" ( "KEY" )? ) + ::= [rel_lhs]= ( "[" "]" )? ( "=" | "<" | ">" | "<=" | ">=" | "!=" | ( "NOT" )? "CONTAINS" ( "KEY" )? ) ( | ) | token="TOKEN" "(" [rel_tokname]= ( "," [rel_tokname]= )* ")" ("=" | "<" | ">" | "<=" | ">=") | [rel_lhs]= (( "NOT" )? "IN" ) "(" ( "," )* ")" | [rel_lhs]= "BETWEEN" "AND" + | ; ::= "DISTINCT"? ("AS" )? ("," ("AS" )?)* | "*" @@ -755,14 +756,20 @@ def working_on_keyspace(ctxt): ; ::= [colname]= ( "[" ( ( ".." "]" )? | ".." ) )? | - | "WRITETIME" "(" [colname]= ")" - | "MAXWRITETIME" "(" [colname]= ")" - | "TTL" "(" [colname]= ")" - | "COUNT" "(" star=( "*" | "1" ) ")" | "CAST" "(" "AS" ")" + | "TTL" "(" [colname]= ")" + | "TOKEN" "(" [colname]= ")" + | + | + | + | + | + | + | | | ; + ::= "(" ( ( "," )* )? ")" ; ::= [ordercol]= ( "ASC" | "DESC" )? @@ -775,6 +782,60 @@ def working_on_keyspace(ctxt): ::= [groupcol]= | ; + + ::= "COUNT" "(" star=( "*" | "1" ) ")" + | "AVG" "(" [colname]= ")" + | "MIN" "(" [colname]= ")" + | "MAX" "(" [colname]= ")" + | "SUM" "(" [colname]= ")" + ; + + ::= "ABS" "(" [colname]= ")" + | "EXP" "(" [colname]= ")" + | "LOG" "(" [colname]= ")" + | "LOG10" "(" [colname]= ")" + | "ROUND" "(" [colname]= ")" + ; + + ::= "MAP_KEYS" "(" [colname]= ")" + | "MAP_VALUES" "(" [colname]= ")" + | "COLLECTION_AVG" "(" [colname]= ")" + | "COLLECTION_COUNT" "(" [colname]= ")" + | "COLLECTION_MIN" "(" [colname]= ")" + | "COLLECTION_MAX" "(" [colname]= ")" + | "COLLECTION_SUM" "(" [colname]= ")" + ; + + ::= "CURRENT_DATE()" + | "CURRENT_TIME()" + | "CURRENT_TIMESTAMP()" + | "CURRENT_TIMEUUID()" + ; + + ::= "MASK_DEFAULT" "(" [colname]= ")" + | "MASK_HASH" "(" [colname]= ")" + | "MASK_INNER" "(" [colname]= "," "," ")" + | "MASK_NULL" "(" [colname]= ")" + | "MASK_REPLACE" "(" [colname]= "," ")" + | "MASK_OUTER" "(" [colname]= "," "," ")" + ; + + ::= "TO_DATE" "(" [colname]= ")" + | "TO_TIMESTAMP" "(" [colname]= ")" + | "TO_UNIX_TIMESTAMP" "(" [colname]= ")" + ; + + ::= "MAX_TIMEUUID" "(" [colname]= ")" + | "MIN_TIMEUUID" "(" [colname]= ")" + ; + + ::= "MAX_WRITETIME" "(" [colname]= ")" + | "MIN_WRITETIME" "(" [colname]= ")" + | "WRITETIME" "(" [colname]= ")" + ; + ::= | + ; + ''' @@ -867,7 +928,7 @@ def select_group_column_completer(ctxt, cass): @completer_for('relation', 'token') def relation_token_word_completer(ctxt, cass): - return ['TOKEN('] + return ['TOKEN'] @completer_for('relation', 'rel_tokname') @@ -1001,7 +1062,7 @@ def insert_option_completer(ctxt, cass): @completer_for('updateStatement', 'updateopt') def update_option_completer(ctxt, cass): - opts = set('TIMESTAMP TTL'.split()) + opts = {'TIMESTAMP', 'TTL'} for opt in ctxt.get_binding('updateopt', ()): opts.discard(opt.split()[0]) return opts diff --git a/pylib/cqlshlib/cqlshmain.py b/pylib/cqlshlib/cqlshmain.py index 2cac58ef22c4..2bc79f3c377b 100755 --- a/pylib/cqlshlib/cqlshmain.py +++ b/pylib/cqlshlib/cqlshmain.py @@ -383,7 +383,7 @@ def check_build_versions(self): baseversion = baseversion[0:extra] if baseversion != build_version: print("WARNING: cqlsh was built against {}, but this server is {}. All features may not work!" - .format(build_version, baseversion)) # ToDo: use file=sys.stderr) + .format(build_version, baseversion), file=sys.stderr) @property def batch_mode(self): diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index 112474e7c712..53ab1908ed45 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -114,7 +114,8 @@ def _get_completions(self, inputstring, split_completed_lines=True): def _trycompletions_inner(self, inputstring, immediate='', choices=(), other_choices_ok=False, - split_completed_lines=True): + split_completed_lines=True, + ignore_system_keyspaces=False): """ Test tab completion in cqlsh. Enters in the text in inputstring, then simulates a tab keypress to see what is immediately completed (this @@ -132,17 +133,22 @@ def _trycompletions_inner(self, inputstring, immediate='', choices=(), self.assertEqual(completed, immediate, msg=msg) return + if ignore_system_keyspaces: + completed = list(filter(lambda s: not s.startswith('system'), completed)) + if other_choices_ok: self.assertEqual(set(choices), completed.intersection(choices)) else: self.assertEqual(set(choices), set(completed)) def trycompletions(self, inputstring, immediate='', choices=(), - other_choices_ok=False, split_completed_lines=True): + other_choices_ok=False, split_completed_lines=True, + ignore_system_keyspaces=False): try: self._trycompletions_inner(inputstring, immediate, choices, other_choices_ok=other_choices_ok, - split_completed_lines=split_completed_lines) + split_completed_lines=split_completed_lines, + ignore_system_keyspaces=ignore_system_keyspaces) finally: try: self.cqlsh.send(CTRL_C) # cancel any current line @@ -175,7 +181,43 @@ def test_complete_in_uuid(self): pass def test_complete_in_select(self): - pass + self.trycompletions('SELECT ', + choices=('*', '', + '-', '', '', '', '', + '', '', '', + 'ABS', 'AVG', 'CAST', 'COUNT', 'DISTINCT', + 'EXP', 'JSON', 'LOG', 'LOG10', + 'MAP_KEYS', 'MAP_VALUES', + 'MIN', 'MAX', + 'MIN_WRITETIME', 'MAX_WRITETIME', + 'ROUND', 'SUM', 'TOKEN', + 'TO_DATE', 'TO_TIMESTAMP', 'TO_UNIX_TIMESTAMP', + 'TTL', 'WRITETIME', + 'COLLECTION_AVG', 'COLLECTION_COUNT', 'COLLECTION_MAX', + 'COLLECTION_MIN', 'COLLECTION_SUM', + 'MASK_DEFAULT', 'MASK_HASH', 'MASK_INNER', 'MASK_NULL', + 'MASK_OUTER', 'MASK_REPLACE', + '[', '{', 'false', 'true', 'NULL' + ), + other_choices_ok=True + ) + + def test_complete_in_select_where(self): + self.trycompletions('SELECT * FROM system.peers WHERE ', + choices=('', '', 'peer', 'CURRENT_DATE()', 'CURRENT_TIME()', + 'CURRENT_TIMEUUID()', 'CURRENT_TIMESTAMP()', 'TOKEN', + 'MIN_TIMEUUID', 'MAX_TIMEUUID') + ) + + def test_complete_in_select_where_equal(self): + self.trycompletions('SELECT * FROM system.peers WHERE rack = ', + choices=('-', '', '', '', '', + '', '', '', + '[', '{', 'false', 'true', 'NULL', + 'TOKEN', 'MIN_TIMEUUID', 'MAX_TIMEUUID', + 'CURRENT_DATE()', 'CURRENT_TIME()', 'CURRENT_TIMEUUID()', 'CURRENT_TIMESTAMP()' + ) + ) def test_complete_in_insert(self): self.trycompletions('INSERT INTO ', @@ -376,7 +418,8 @@ def test_complete_in_update(self): self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs'", choices=[',', 'WHERE']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE ", - choices=['TOKEN(', 'lonelykey']) + choices=['CURRENT_DATE()', 'CURRENT_TIME()', 'CURRENT_TIMESTAMP()', + 'CURRENT_TIMEUUID()', 'TOKEN', 'MIN_TIMEUUID', 'MAX_TIMEUUID', 'lonelykey']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE lonel", immediate='ykey ') @@ -385,7 +428,8 @@ def test_complete_in_update(self): self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE lonelykey = 0.0 ", choices=['AND', 'IF', ';']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE lonelykey = 0.0 AND ", - choices=['TOKEN(', 'lonelykey']) + choices=['CURRENT_DATE()', 'CURRENT_TIME()', 'CURRENT_TIMESTAMP()', + 'CURRENT_TIMEUUID()', 'TOKEN', 'MIN_TIMEUUID', 'MAX_TIMEUUID', 'lonelykey']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE TOKEN(lonelykey ", choices=[',', ')']) @@ -397,7 +441,7 @@ def test_complete_in_update(self): choices=['EXISTS', '', '']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE TOKEN(lonelykey) <= TOKEN(13) IF EXISTS ", - choices=['>=', '!=', '<=', 'IN','[', ';', '=', '<', '>', '.', 'CONTAINS']) + choices=['>=', '!=', '<=', 'IN', '[', ';', '=', '<', '>', '.', 'CONTAINS']) self.trycompletions("UPDATE empty_table SET lonelycol = 'eggs' WHERE TOKEN(lonelykey) <= TOKEN(13) IF lonelykey ", choices=['>=', '!=', '<=', 'IN', '=', '<', '>', 'CONTAINS']) @@ -461,10 +505,11 @@ def test_complete_in_delete(self): self.trycompletions('DELETE FROM twenty_rows_composite_table USING TIMESTAMP 0 ', immediate='WHERE ') self.trycompletions('DELETE FROM twenty_rows_composite_table USING TIMESTAMP 0 WHERE ', - choices=['a', 'b', 'TOKEN(']) + choices=['a', 'b', 'CURRENT_DATE()', 'CURRENT_TIME()', 'CURRENT_TIMESTAMP()', + 'CURRENT_TIMEUUID()', 'MAX_TIMEUUID', 'MIN_TIMEUUID', 'TOKEN']) self.trycompletions('DELETE FROM twenty_rows_composite_table USING TIMESTAMP 0 WHERE a ', - choices=['<=', '>=', 'BETWEEN', 'CONTAINS', 'IN', 'NOT' , '[', '=', '<', '>', '!=']) + choices=['<=', '>=', 'BETWEEN', 'CONTAINS', 'IN', 'NOT', '[', '=', '<', '>', '!=']) self.trycompletions('DELETE FROM twenty_rows_composite_table USING TIMESTAMP 0 WHERE TOKEN(', immediate='a ') @@ -476,7 +521,7 @@ def test_complete_in_delete(self): choices=['>=', '<=', '=', '<', '>']) self.trycompletions('DELETE FROM twenty_rows_composite_table USING TIMESTAMP 0 WHERE TOKEN(a) >= ', choices=['false', 'true', '', - 'token(', '-', '', 'TOKEN', + '-', '', 'TOKEN', '', '', '{', '[', 'NULL', '', '', '']) From 95c6fd481a39cc2a41e78c662e1c9dafea64ee8b Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Wed, 2 Apr 2025 11:00:22 +0100 Subject: [PATCH 275/340] Serialization improvements - Share TableMetadatas and PartitionKey for PartialTxn serialization Also: - TxnReference et al should reference uniqueId, and avoid serializing ksName/cfName - Don't double count shared keys when estimating size on heap patch by Benedict; reviewed by David Capwell for CASSANDRA-20578 --- modules/accord | 2 +- .../cassandra/cql3/ColumnsExpression.java | 20 +- .../org/apache/cassandra/cql3/Operations.java | 17 +- .../org/apache/cassandra/cql3/Ordering.java | 18 +- .../cql3/conditions/ColumnCondition.java | 59 +-- .../cql3/statements/BatchStatement.java | 2 +- .../cql3/statements/CQL3CasRequest.java | 32 +- .../cql3/statements/DeleteStatement.java | 2 +- .../statements/ModificationStatement.java | 42 +- .../cql3/statements/TransactionStatement.java | 124 +++--- .../cql3/statements/UpdateStatement.java | 8 +- .../cql3/transactions/ReferenceOperation.java | 12 +- .../cql3/transactions/RowDataReference.java | 20 +- .../transactions/SelectReferenceSource.java | 6 + .../db/PartitionRangeReadCommand.java | 5 + .../org/apache/cassandra/db/ReadCommand.java | 121 +++-- .../db/SinglePartitionReadCommand.java | 45 ++ .../db/partitions/PartitionUpdate.java | 40 ++ .../rows/UnfilteredRowIteratorSerializer.java | 22 +- ...ricParameterisedUnversionedSerializer.java | 78 ++++ ...etricParameterisedVersionedSerializer.java | 104 +++++ .../ParameterisedUnversionedSerializer.java | 23 + .../io/ParameterisedVersionedSerializer.java | 23 + .../cassandra/schema/TableMetadata.java | 14 + .../cassandra/service/StorageProxy.java | 16 +- .../accord/AccordFetchCoordinator.java | 27 +- .../service/accord/AccordObjectSizes.java | 5 +- .../service/accord/AccordSerializers.java | 51 +-- .../service/accord/AccordService.java | 4 +- .../service/accord/api/AccordAgent.java | 3 +- .../service/accord/api/AccordRoutableKey.java | 1 + .../interop/AccordInteropExecution.java | 43 +- .../serializers/AbstractSortedCollector.java | 137 ++++++ .../serializers/CommandSerializers.java | 123 +++--- .../IVersionedWithKeysSerializer.java | 280 +++++------- .../accord/serializers/KeySerializers.java | 6 +- .../accord/serializers/TableMetadatas.java | 415 ++++++++++++++++++ .../serializers/TableMetadatasAndKeys.java | 253 +++++++++++ .../accord/serializers/TxnSerializer.java | 25 ++ .../service/accord/txn/AbstractKeySorted.java | 7 +- .../accord/txn/AbstractSerialized.java | 96 +--- .../service/accord/txn/AccordUpdate.java | 25 +- .../service/accord/txn/TxnCondition.java | 186 +++++--- .../service/accord/txn/TxnNamedRead.java | 104 +++-- .../service/accord/txn/TxnQuery.java | 4 +- .../cassandra/service/accord/txn/TxnRead.java | 79 ++-- .../service/accord/txn/TxnReference.java | 59 ++- .../accord/txn/TxnReferenceOperation.java | 39 +- .../accord/txn/TxnReferenceOperations.java | 44 +- .../service/accord/txn/TxnReferenceValue.java | 55 ++- .../service/accord/txn/TxnUpdate.java | 113 ++--- .../service/accord/txn/TxnWrite.java | 206 +++++---- .../accord/txn/UnrecoverableRepairUpdate.java | 7 +- .../ConsensusMigrationMutationHelper.java | 28 +- .../reads/repair/BlockingReadRepair.java | 9 +- .../cassandra/utils/ArraySerializers.java | 25 ++ .../utils/CollectionSerializers.java | 87 ++++ .../cassandra/utils/FastByteOperations.java | 4 +- .../service/accord/AccordJournalBurnTest.java | 3 +- .../accord/BurnTestKeySerializers.java | 66 +-- .../cql3/conditions/ColumnConditionTest.java | 44 +- .../ClusteringColumnRestrictionsTest.java | 8 +- .../org/apache/cassandra/io/Serializers.java | 30 ++ .../service/accord/AccordTestUtils.java | 13 +- .../CommandsForKeySerializerTest.java | 2 +- .../IVersionedWithKeysSerializerTest.java | 160 +++++++ .../serializers/KeySerializersTest.java | 2 +- .../TableMetadatasAndKeysTest.java | 163 +++++++ .../serializers/TableMetadatasTest.java | 153 +++++++ .../accord/txn/AbstractKeySortedTest.java | 6 - .../service/accord/txn/AccordUpdateTest.java | 6 +- .../service/accord/txn/TxnConditionTest.java | 22 +- .../cassandra/utils/AccordGenerators.java | 78 +++- .../cassandra/utils/CassandraGenerators.java | 3 +- 74 files changed, 3157 insertions(+), 1007 deletions(-) create mode 100644 src/java/org/apache/cassandra/io/AsymmetricParameterisedUnversionedSerializer.java create mode 100644 src/java/org/apache/cassandra/io/AsymmetricParameterisedVersionedSerializer.java create mode 100644 src/java/org/apache/cassandra/io/ParameterisedUnversionedSerializer.java create mode 100644 src/java/org/apache/cassandra/io/ParameterisedVersionedSerializer.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/AbstractSortedCollector.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/TableMetadatas.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeys.java create mode 100644 src/java/org/apache/cassandra/service/accord/serializers/TxnSerializer.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializerTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeysTest.java create mode 100644 test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasTest.java diff --git a/modules/accord b/modules/accord index 134df57677bb..ba151600b1f8 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 134df57677bbd5092994923a4dc2f15cd1d033d1 +Subproject commit ba151600b1f8f6a493f585810ac14fe35371c762 diff --git a/src/java/org/apache/cassandra/cql3/ColumnsExpression.java b/src/java/org/apache/cassandra/cql3/ColumnsExpression.java index 78b0b8e9cd5b..b078fe5196ae 100644 --- a/src/java/org/apache/cassandra/cql3/ColumnsExpression.java +++ b/src/java/org/apache/cassandra/cql3/ColumnsExpression.java @@ -252,18 +252,21 @@ String toCQLString(List identifiers, ElementExpression.Raw raw */ private final List columns; + private final TableMetadata table; + /** * The element if this is an ELEMENT expression, {@code null} otherwise. * Like UDT field or collection element. */ private final ElementExpression element; //Only relevant for ELEMENT kind - ColumnsExpression(Kind kind, AbstractType type, List columns, ElementExpression element) + ColumnsExpression(Kind kind, AbstractType type, List columns, TableMetadata table, ElementExpression element) { assert kind != Kind.ELEMENT || element != null: "Element expression must have an element"; this.kind = kind; this.type = type; this.columns = columns; + this.table = table; this.element = element; // This could be null for kinds that don't use it } @@ -281,9 +284,9 @@ public AbstractType type() * @param column the column * @return an expression for a single column. */ - public static ColumnsExpression singleColumn(ColumnMetadata column) + public static ColumnsExpression singleColumn(ColumnMetadata column, TableMetadata table) { - return new ColumnsExpression(Kind.SINGLE_COLUMN, column.type, ImmutableList.of(column), null); + return new ColumnsExpression(Kind.SINGLE_COLUMN, column.type, ImmutableList.of(column), table, null); } /** @@ -292,10 +295,10 @@ public static ColumnsExpression singleColumn(ColumnMetadata column) * @return an expression for multi-columns. */ @VisibleForTesting - public static ColumnsExpression multiColumns(List columns) + public static ColumnsExpression multiColumns(List columns, TableMetadata table) { AbstractType type = new TupleType(ColumnMetadata.types(columns)); - return new ColumnsExpression(Kind.MULTI_COLUMN, type, ImmutableList.copyOf(columns),null); + return new ColumnsExpression(Kind.MULTI_COLUMN, type, ImmutableList.copyOf(columns), table,null); } /** @@ -307,6 +310,11 @@ public ColumnMetadata firstColumn() return columns().get(0); } + public TableMetadata table() + { + return table; + } + /** * Returns the last column metadata. * @return the last column metadata. @@ -565,7 +573,7 @@ public ColumnsExpression prepare(TableMetadata table) AbstractType type = kind.type(table, columns, elementExpression); - return new ColumnsExpression(kind, type, columns, elementExpression); + return new ColumnsExpression(kind, type, columns, table, elementExpression); } /** diff --git a/src/java/org/apache/cassandra/cql3/Operations.java b/src/java/org/apache/cassandra/cql3/Operations.java index 305d2baa8922..f0dbd91f22e1 100644 --- a/src/java/org/apache/cassandra/cql3/Operations.java +++ b/src/java/org/apache/cassandra/cql3/Operations.java @@ -28,6 +28,7 @@ import org.apache.cassandra.cql3.statements.StatementType; import org.apache.cassandra.cql3.transactions.ReferenceOperation; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; /** * A set of Operations. @@ -63,7 +64,7 @@ public Operations(StatementType type, boolean isForTxn) this.isForTxn = isForTxn; } - private Operations(Operations other) + private Operations(Operations other, TableMetadata tableMetadata) { Preconditions.checkState(!other.isForTxn, "Unable to migrate from txn to txn"); Preconditions.checkState(other.regularSubstitutions.isEmpty() && other.staticSubstitutions.isEmpty(), "Transaction substitutions are defined for a non-transaction operations! regular=%s, static=%s", other.regularSubstitutions, other.staticSubstitutions); @@ -71,12 +72,12 @@ private Operations(Operations other) type = other.type; isForTxn = true; for (Operation opt : other) - add(opt); + add(opt, tableMetadata); } - public Operations forTxn() + public Operations forTxn(TableMetadata tableMetadata) { - return new Operations(this); + return new Operations(this, tableMetadata); } /** @@ -122,13 +123,15 @@ public List staticOperations() /** * Adds the specified Operation to this set of operations. - * @param operation the operation to add + * + * @param operation the operation to add + * @param tableMetadata */ - public void add(Operation operation) + public void add(Operation operation, TableMetadata tableMetadata) { if (isForTxn && (operation.requiresRead() || operation.requiresTimestamp())) { - add(operation.column, ReferenceOperation.create(operation)); + add(operation.column, ReferenceOperation.create(operation, tableMetadata)); return; } if (operation.column.isStatic()) diff --git a/src/java/org/apache/cassandra/cql3/Ordering.java b/src/java/org/apache/cassandra/cql3/Ordering.java index f05245a2f70f..f8485edfbdd5 100644 --- a/src/java/org/apache/cassandra/cql3/Ordering.java +++ b/src/java/org/apache/cassandra/cql3/Ordering.java @@ -46,10 +46,12 @@ public Ordering(Expression expression, Direction direction) public static abstract class Expression { protected final ColumnMetadata columnMetadata; + protected final TableMetadata tableMetadata; - public Expression(ColumnMetadata columnMetadata) + public Expression(ColumnMetadata columnMetadata, TableMetadata tableMetadata) { this.columnMetadata = columnMetadata; + this.tableMetadata = tableMetadata; } public boolean hasNonClusteredOrdering() @@ -73,9 +75,9 @@ public ColumnMetadata getColumn() */ public static class SingleColumn extends Expression { - public SingleColumn(ColumnMetadata columnMetadata) + public SingleColumn(ColumnMetadata columnMetadata, TableMetadata tableMetadata) { - super(columnMetadata); + super(columnMetadata, tableMetadata); } } @@ -86,9 +88,9 @@ public static class Ann extends Expression { final Term vectorValue; - public Ann(ColumnMetadata columnMetadata, Term vectorValue) + public Ann(ColumnMetadata columnMetadata, TableMetadata tableMetadata, Term vectorValue) { - super(columnMetadata); + super(columnMetadata, tableMetadata); this.vectorValue = vectorValue; } @@ -101,7 +103,7 @@ public boolean hasNonClusteredOrdering() @Override public SingleRestriction toRestriction() { - return new SimpleRestriction(ColumnsExpression.singleColumn(columnMetadata), + return new SimpleRestriction(ColumnsExpression.singleColumn(columnMetadata, tableMetadata), Operator.ANN, Terms.of(vectorValue)); } @@ -153,7 +155,7 @@ public static class SingleColumn implements Expression @Override public Ordering.Expression bind(TableMetadata table, VariableSpecifications boundNames) { - return new Ordering.SingleColumn(table.getExistingColumn(column)); + return new Ordering.SingleColumn(table.getExistingColumn(column), table); } } @@ -174,7 +176,7 @@ public Ordering.Expression bind(TableMetadata table, VariableSpecifications boun ColumnMetadata column = table.getExistingColumn(columnId); Term value = vectorValue.prepare(table.keyspace, column); value.collectMarkerSpecification(boundNames); - return new Ordering.Ann(column, value); + return new Ordering.Ann(column, table, value); } } } diff --git a/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java b/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java index b3813d4f2007..1f9ba51862a4 100644 --- a/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java +++ b/src/java/org/apache/cassandra/cql3/conditions/ColumnCondition.java @@ -43,11 +43,12 @@ import org.apache.cassandra.db.rows.ColumnData; import org.apache.cassandra.db.rows.ComplexColumnData; import org.apache.cassandra.db.rows.Row; -import org.apache.cassandra.io.UnversionedSerializer; +import org.apache.cassandra.io.ParameterisedUnversionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.utils.ByteBufferUtil; import static org.apache.cassandra.cql3.statements.RequestValidations.*; @@ -121,21 +122,23 @@ public ColumnCondition.Bound bind(QueryOptions options) private Bound bindSingleColumn(QueryOptions options) { ColumnMetadata column = columnsExpression.firstColumn(); + TableMetadata table = columnsExpression.table(); if (column.type.isMultiCell()) - return new MultiCellBound(column, operator, toValue(column.type, bindAndGetTerms(options))); + return new MultiCellBound(column, table, operator, toValue(column.type, bindAndGetTerms(options))); - return new SimpleBound(column, operator, toValue(column.type, bindAndGetTerms(options))); + return new SimpleBound(column, table, operator, toValue(column.type, bindAndGetTerms(options))); } private ColumnCondition.Bound bindElement(QueryOptions options) { ColumnMetadata column = columnsExpression.firstColumn(); + TableMetadata table = columnsExpression.table(); ByteBuffer keyOrIndex = columnsExpression.element(options); if (column.type.isCollection()) { checkNotNull(keyOrIndex, "Invalid null value for %s element access", column.type instanceof MapType ? "map" : "list"); } - return new ElementOrFieldAccessBound(column, keyOrIndex, operator, toValue(columnsExpression.type(), bindAndGetTerms(options))); + return new ElementOrFieldAccessBound(column, table, keyOrIndex, operator, toValue(columnsExpression.type(), bindAndGetTerms(options))); } private ByteBuffer toValue(AbstractType type, List values) @@ -181,7 +184,7 @@ public String toCQLString() public interface BoundSerializer { default void serialize(T bound, DataOutputPlus out) throws IOException {} - Bound deserialize(DataInputPlus in, ColumnMetadata column, Operator operator, ByteBuffer value) throws IOException; + Bound deserialize(DataInputPlus in, ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) throws IOException; default long serializedSize(T condition) { return 0; } } @@ -216,12 +219,14 @@ public static BoundKind valueOf(int id) public static abstract class Bound { public final ColumnMetadata column; + public final TableMetadata table; public final Operator operator; public final ByteBuffer value; - protected Bound(ColumnMetadata column, Operator operator, ByteBuffer value) + protected Bound(ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) { this.column = column; + this.table = table; this.operator = operator; this.value = value; } @@ -233,11 +238,12 @@ protected Bound(ColumnMetadata column, Operator operator, ByteBuffer value) public abstract BoundKind kind(); - public static final UnversionedSerializer serializer = new UnversionedSerializer<>() { + public static final ParameterisedUnversionedSerializer serializer = new ParameterisedUnversionedSerializer<>() { @Override - public void serialize(Bound bound, DataOutputPlus out) throws IOException + public void serialize(Bound bound, TableMetadatas tables, DataOutputPlus out) throws IOException { - columnMetadataSerializer.serialize(bound.column, out); + tables.serialize(bound.table, out); + columnMetadataSerializer.serialize(bound.column, bound.table, out); bound.operator.writeToUnsignedVInt(out); nullableByteBufferSerializer.serialize(bound.value, out); ColumnCondition.BoundKind kind = bound.kind(); @@ -246,20 +252,22 @@ public void serialize(Bound bound, DataOutputPlus out) throws IOException } @Override - public Bound deserialize(DataInputPlus in) throws IOException + public Bound deserialize(TableMetadatas tables, DataInputPlus in) throws IOException { - ColumnMetadata column = columnMetadataSerializer.deserialize(in); + TableMetadata table = tables.deserialize(in); + ColumnMetadata column = columnMetadataSerializer.deserialize(table, in); Operator operator = Operator.readFromUnsignedVInt(in); ByteBuffer value = nullableByteBufferSerializer.deserialize(in); ColumnCondition.BoundKind boundKind = ColumnCondition.BoundKind.valueOf(in.readUnsignedVInt32()); - return boundKind.serializer.deserialize(in, column, operator, value); + return boundKind.serializer.deserialize(in, column, table, operator, value); } @Override - public long serializedSize(Bound bound) + public long serializedSize(Bound bound, TableMetadatas tables) { ColumnCondition.BoundKind kind = bound.kind(); - return columnMetadataSerializer.serializedSize(bound.column) + return tables.serializedSize(bound.table) + + columnMetadataSerializer.serializedSize(bound.column, bound.table) + bound.operator.sizeAsUnsignedVInt() + nullableByteBufferSerializer.serializedSize(bound.value) + sizeofUnsignedVInt(kind.ordinal()) @@ -273,11 +281,11 @@ public long serializedSize(Bound bound) */ public static class SimpleBound extends Bound { - private static final BoundSerializer serializer = (in, column, operator, value) -> new SimpleBound(column, operator, value); + private static final BoundSerializer serializer = (in, column, table, operator, value) -> new SimpleBound(column, table, operator, value); - public SimpleBound(ColumnMetadata column, Operator operator, ByteBuffer value) + public SimpleBound(ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) { - super(column, operator, value); + super(column, table, operator, value); } @Override @@ -321,9 +329,9 @@ public int hashCode() public static class SimpleClusteringBound extends SimpleBound { - public SimpleClusteringBound(ColumnMetadata column, Operator operator, ByteBuffer value) + public SimpleClusteringBound(ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) { - super(column, operator, value); + super(column, table, operator, value); assert column.isClusteringColumn() : String.format("Column must be a clustering column, but given %s", column); } @@ -348,10 +356,10 @@ public void serialize(ElementOrFieldAccessBound bound, DataOutputPlus out) throw } @Override - public Bound deserialize(DataInputPlus in, ColumnMetadata column, Operator operator, ByteBuffer value) throws IOException + public Bound deserialize(DataInputPlus in, ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) throws IOException { ByteBuffer keyOrIndex = nullableByteBufferSerializer.deserialize(in); - return new ElementOrFieldAccessBound(column, keyOrIndex, operator, value); + return new ElementOrFieldAccessBound(column, table, keyOrIndex, operator, value); } @Override @@ -372,11 +380,12 @@ public long serializedSize(ElementOrFieldAccessBound condition) public ElementOrFieldAccessBound(ColumnMetadata column, + TableMetadata table, ByteBuffer keyOrIndex, Operator operator, ByteBuffer value) { - super(column, operator, value); + super(column, table, operator, value); this.elementType = ((MultiElementType) column.type).elementType(keyOrIndex); this.keyOrIndex = keyOrIndex; } @@ -425,11 +434,11 @@ public int hashCode() */ public static final class MultiCellBound extends Bound { - private static final BoundSerializer serializer = (in, column, operator, value) -> new MultiCellBound(column, operator, value); + private static final BoundSerializer serializer = (in, column, table, operator, value) -> new MultiCellBound(column, table, operator, value); - public MultiCellBound(ColumnMetadata column, Operator operator, ByteBuffer value) + public MultiCellBound(ColumnMetadata column, TableMetadata table, Operator operator, ByteBuffer value) { - super(column, operator, value); + super(column, table, operator, value); assert column.type.isMultiCell() : String.format("Unexpected type: %s", column.type); } diff --git a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java index d12f9e8922d6..68b2885e2951 100644 --- a/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/BatchStatement.java @@ -344,7 +344,7 @@ public List getMutations(ClientState state, } QueryOptions statementOptions = options.forStatement(i); long timestamp = attrs.getTimestamp(batchTimestamp, statementOptions); - statement.addUpdates(collector, partitionKeys.get(i), state, statementOptions, local, timestamp, nowInSeconds, requestTime, false); + statement.addUpdates(collector, partitionKeys.get(i), state, statementOptions, local, timestamp, nowInSeconds, requestTime); } if (tablesWithZeroGcGs != null) diff --git a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java index 03a9876efa50..816fc9a814f9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java +++ b/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java @@ -32,6 +32,7 @@ import org.slf4j.LoggerFactory; import accord.api.Update; +import accord.primitives.Keys; import accord.primitives.Txn; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.UpdateParameters; @@ -60,6 +61,9 @@ import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.CASRequest; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnDataKeyValue; @@ -424,7 +428,7 @@ public String toCQL() public TxnCondition asTxnCondition() { - TxnReference txnReference = new TxnReference(txnDataName(CAS_READ), null); + TxnReference txnReference = new TxnReference(txnDataName(CAS_READ), null, null); return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NULL); } } @@ -449,7 +453,7 @@ public String toCQL() public TxnCondition asTxnCondition() { - TxnReference txnReference = new TxnReference(txnDataName(CAS_READ), null); + TxnReference txnReference = new TxnReference(txnDataName(CAS_READ), null, null); return new TxnCondition.Exists(txnReference, TxnCondition.Kind.IS_NOT_NULL); } } @@ -499,27 +503,30 @@ public String toString() public Txn toAccordTxn(ClusterMetadata cm, ConsistencyLevel consistencyLevel, ConsistencyLevel commitConsistencyLevel, ClientState clientState, long nowInSecs) { SinglePartitionReadCommand readCommand = readCommand(nowInSecs); - Update update = createUpdate(cm, clientState, commitConsistencyLevel); + TableMetadata metadata = getTableMetadata(cm, this.metadata.id); + TableMetadatas.Complete tables = TableMetadatas.of(metadata); + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(tables, Keys.of(new PartitionKey(metadata.id, readCommand.partitionKey()))); + Update update = createUpdate(cm, tables, clientState, commitConsistencyLevel); // If the write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely - TableParams tableParams = getTableMetadata(cm, metadata.id).params; + TableParams tableParams = tables.getMetadata(metadata.id).params; consistencyLevel = tableParams.transactionalMode.readCLForMode(tableParams.transactionalMigrationFrom, consistencyLevel, cm, metadata.id, readCommand.partitionKey().getToken()); - TxnRead read = TxnRead.createCasRead(readCommand, consistencyLevel); + TxnRead read = TxnRead.createCasRead(readCommand, consistencyLevel, tablesAndKeys); // In a CAS requesting only one key is supported and writes // can't be dependent on any data that is read (only conditions) // so the only relevant keys are the read key - return new Txn.InMemory(read.keys(), read, TxnQuery.CONDITION, update); + return new Txn.InMemory(read.keys(), read, TxnQuery.CONDITION, update, tablesAndKeys); } - private Update createUpdate(ClusterMetadata cm, ClientState clientState, ConsistencyLevel commitConsistencyLevel) + private Update createUpdate(ClusterMetadata cm, TableMetadatas.Complete tables, ClientState clientState, ConsistencyLevel commitConsistencyLevel) { // Potentially ignore commit consistency level if TransactionalMode is full // since it is safe to match what non-SERIAL writes do - TableMetadata tableMetadata = getTableMetadata(cm, metadata.id); + TableMetadata tableMetadata = tables.getMetadata(metadata.id); TableParams tableParams = tableMetadata.params; - commitConsistencyLevel = tableParams.transactionalMode.commitCLForMode(tableParams.transactionalMigrationFrom, commitConsistencyLevel, cm, metadata.id, key.getToken()); + commitConsistencyLevel = tableParams.transactionalMode.commitCLForMode(tableParams.transactionalMigrationFrom, commitConsistencyLevel, cm, tableMetadata.id, key.getToken()); // CAS requires using the new txn timestamp to correctly linearize some kinds of updates - return new TxnUpdate(createWriteFragments(clientState), createCondition(), commitConsistencyLevel, false); + return new TxnUpdate(tables, createWriteFragments(clientState), createCondition(), commitConsistencyLevel, false); } private TxnCondition createCondition() @@ -538,6 +545,7 @@ private TxnCondition createCondition() private List createWriteFragments(ClientState state) { + PartitionKey partitionKey = new PartitionKey(metadata.id, key); List fragments = new ArrayList<>(); int idx = 0; for (RowUpdate update : updates) @@ -547,14 +555,14 @@ private List createWriteFragments(ClientState state) // see CASSANDRA-18337 ModificationStatement modification = update.stmt.forTxn(); QueryOptions options = update.options; - TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options, partitionKey); fragments.add(fragment); } for (RangeDeletion rangeDeletion : rangeDeletions) { ModificationStatement modification = rangeDeletion.stmt; QueryOptions options = rangeDeletion.options; - TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options); + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx++, state, options, partitionKey); fragments.add(fragment); } return fragments; diff --git a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java index f2bbec1458d6..e34477dcc8cb 100644 --- a/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/DeleteStatement.java @@ -178,7 +178,7 @@ protected ModificationStatement prepareInternal(ClientState state, Operation op = deletion.prepare(metadata.keyspace, def, metadata); op.collectMarkerSpecification(bindVariables); - operations.add(op); + operations.add(op, metadata); } StatementRestrictions restrictions = newRestrictions(state, diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index cecf839e8be0..a41b8dd4c402 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -111,6 +111,8 @@ import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys.KeyCollector; import org.apache.cassandra.service.accord.txn.TxnReferenceOperation; import org.apache.cassandra.service.accord.txn.TxnReferenceOperations; import org.apache.cassandra.service.accord.txn.TxnWrite; @@ -642,8 +644,8 @@ private ResultMessage executeWithoutCondition(QueryState queryState, QueryOption false, options.getTimestamp(queryState), options.getNowInSeconds(queryState), - requestTime, - false); + requestTime + ); if (!mutations.isEmpty()) { StorageProxy.mutateWithTriggers(mutations, cl, false, requestTime); @@ -808,7 +810,7 @@ public ResultMessage executeInternalWithoutCondition(QueryState queryState, Quer { long timestamp = options.getTimestamp(queryState); long nowInSeconds = options.getNowInSeconds(queryState); - for (IMutation mutation : getMutations(queryState.getClientState(), options, true, timestamp, nowInSeconds, requestTime, false)) + for (IMutation mutation : getMutations(queryState.getClientState(), options, true, timestamp, nowInSeconds, requestTime)) mutation.apply(); return null; } @@ -857,19 +859,18 @@ static RowIterator casInternal(ClientState state, CQL3CasRequest request, long t * @return list of the mutations */ public List getMutations(ClientState state, - QueryOptions options, - boolean local, - long timestamp, - long nowInSeconds, - Dispatcher.RequestTime requestTime, - boolean constructingAccordBaseUpdate) + QueryOptions options, + boolean local, + long timestamp, + long nowInSeconds, + Dispatcher.RequestTime requestTime) { List keys = buildPartitionKeyNames(options, state); if (keys.size() == 1) { SingleTableSinglePartitionUpdatesCollector collector = new SingleTableSinglePartitionUpdatesCollector(metadata, updatedColumns); - addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); + addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime); // local means this is test or internal things that are bypassing distributed system modification/checks return collector.toMutations(state, local ? PotentialTxnConflicts.ALLOW : PotentialTxnConflicts.DISALLOW); } @@ -877,7 +878,7 @@ public List getMutations(ClientState state, { HashMultiset perPartitionKeyCounts = HashMultiset.create(keys); SingleTableUpdatesCollector collector = new SingleTableUpdatesCollector(metadata, updatedColumns, perPartitionKeyCounts); - addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime, constructingAccordBaseUpdate); + addUpdates(collector, keys, state, options, local, timestamp, nowInSeconds, requestTime); // local means this is test or internal things that are bypassing distributed system modification/checks return collector.toMutations(state, local ? PotentialTxnConflicts.ALLOW : PotentialTxnConflicts.DISALLOW); } @@ -885,7 +886,7 @@ public List getMutations(ClientState state, public PartitionUpdate getTxnUpdate(ClientState state, QueryOptions options) { - List mutations = getMutations(state, options, false, 0, 0, new Dispatcher.RequestTime(0, 0), true); + List mutations = getMutations(state, options, false, 0, 0, new Dispatcher.RequestTime(0, 0)); // TODO: Temporary fix for CASSANDRA-20079 if (mutations.isEmpty()) return PartitionUpdate.emptyUpdate(metadata, metadata.partitioner.decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER)); @@ -923,7 +924,7 @@ public ModificationStatement forTxn() { migrated = txnStmt; if (migrated == null) - txnStmt = migrated = withOperations(operations.forTxn()); + txnStmt = migrated = withOperations(operations.forTxn(metadata)); } } return migrated; @@ -937,11 +938,18 @@ public List getSubstitutions() return operations.allSubstitutions(); } - public TxnWrite.Fragment getTxnWriteFragment(int index, ClientState state, QueryOptions options) + public TxnWrite.Fragment getTxnWriteFragment(int index, ClientState state, QueryOptions options, PartitionKey partitionKey) { PartitionUpdate baseUpdate = getTxnUpdate(state, options); TxnReferenceOperations referenceOps = getTxnReferenceOps(options, state); - return new TxnWrite.Fragment(index, baseUpdate, referenceOps); + return new TxnWrite.Fragment(partitionKey, index, baseUpdate, referenceOps); + } + + public TxnWrite.Fragment getTxnWriteFragment(int index, ClientState state, QueryOptions options, KeyCollector keyCollector) + { + PartitionUpdate baseUpdate = getTxnUpdate(state, options); + TxnReferenceOperations referenceOps = getTxnReferenceOps(options, state); + return new TxnWrite.Fragment(keyCollector.collect(baseUpdate.metadata(), baseUpdate.partitionKey()), index, baseUpdate, referenceOps); } final void addUpdates(UpdatesCollector collector, @@ -951,8 +959,7 @@ final void addUpdates(UpdatesCollector collector, boolean local, long timestamp, long nowInSeconds, - Dispatcher.RequestTime requestTime, - boolean constructingAccordBaseUpdate) + Dispatcher.RequestTime requestTime) { if (hasSlices()) { @@ -984,7 +991,6 @@ final void addUpdates(UpdatesCollector collector, else for (Slice slice : slices) addUpdateForKey(updateBuilder, slice, params); - } } else diff --git a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java index f61c394954b5..f27c49cfcba6 100644 --- a/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java @@ -27,8 +27,6 @@ import java.util.Map; import java.util.Set; import java.util.SortedSet; -import java.util.TreeSet; -import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nullable; @@ -65,14 +63,14 @@ import org.apache.cassandra.dht.Token; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableParams; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.service.accord.AccordService; -import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnData; @@ -104,7 +102,6 @@ import static org.apache.cassandra.service.accord.txn.TxnData.txnDataName; import static org.apache.cassandra.service.accord.txn.TxnRead.createTxnRead; import static org.apache.cassandra.service.accord.txn.TxnResult.Kind.retry_new_protocol; -import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.getTableMetadata; import static org.apache.cassandra.service.consensus.migration.ConsensusRequestRouter.shouldReadEphemerally; public class TransactionStatement implements CQLStatement.CompositeCQLStatement, CQLStatement.ReturningCQLStatement @@ -236,7 +233,7 @@ public ResultSet.ResultMetadata getResultMetadata() return resultMetadata; } - TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, ClientState state) + TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, TableMetadatasAndKeys.KeyCollector keyCollector) { SelectStatement select = namedSelect.select; // We reject reads from both LET and SELECT that do not specify a single row. @@ -246,10 +243,11 @@ TxnNamedRead createNamedRead(NamedSelect namedSelect, QueryOptions options, Clie if (selectQuery.queries.size() != 1) throw new IllegalArgumentException("Within a transaction, SELECT statements must select a single partition; found " + selectQuery.queries.size() + " partitions"); - return new TxnNamedRead(namedSelect.name, Iterables.getOnlyElement(selectQuery.queries)); + SinglePartitionReadCommand command = Iterables.getOnlyElement(selectQuery.queries); + return new TxnNamedRead(namedSelect.name, keyCollector.collect(command.metadata(), command.partitionKey()), command, keyCollector.tables); } - List createNamedReads(NamedSelect namedSelect, QueryOptions options, ClientState state) + List createNamedReads(NamedSelect namedSelect, QueryOptions options, TableMetadatasAndKeys.KeyCollector keyCollector) { SelectStatement select = namedSelect.select; // We reject reads from both LET and SELECT that do not specify a single row. @@ -257,32 +255,33 @@ List createNamedReads(NamedSelect namedSelect, QueryOptions option SinglePartitionReadQuery.Group selectQuery = (SinglePartitionReadQuery.Group) select.getQuery(options, 0); if (selectQuery.queries.size() == 1) - return Collections.singletonList(new TxnNamedRead(namedSelect.name, Iterables.getOnlyElement(selectQuery.queries))); + return Collections.singletonList(new TxnNamedRead(namedSelect.name, keyCollector.collect(select.table, selectQuery.queries.get(0).partitionKey()), selectQuery.queries.get(0), keyCollector.tables)); List list = new ArrayList<>(selectQuery.queries.size()); for (int i = 0; i < selectQuery.queries.size(); i++) - list.add(new TxnNamedRead(txnDataName(RETURNING, i), selectQuery.queries.get(i))); + { + SinglePartitionReadCommand readCommand = selectQuery.queries.get(i); + list.add(new TxnNamedRead(txnDataName(RETURNING, i), keyCollector.collect(readCommand.metadata(), readCommand.partitionKey()), readCommand, keyCollector.tables)); + } return list; } - private List createNamedReads(QueryOptions options, ClientState state, @Nullable Int2ObjectHashMap autoReads, Consumer keyConsumer) + private List createNamedReads(QueryOptions options, @Nullable Int2ObjectHashMap autoReads, TableMetadatasAndKeys.KeyCollector keyCollector) { List reads = new ArrayList<>(assignments.size() + 1); for (NamedSelect select : assignments) { - TxnNamedRead read = createNamedRead(select, options, state); - keyConsumer.accept((Key)read.key()); - minEpoch = Math.max(minEpoch, read.command().metadata().epoch.getEpoch()); + TxnNamedRead read = createNamedRead(select, options, keyCollector); + minEpoch = Math.max(minEpoch, select.select.table.epoch.getEpoch()); reads.add(read); } if (returningSelect != null) { - for (TxnNamedRead read : createNamedReads(returningSelect, options, state)) + for (TxnNamedRead read : createNamedReads(returningSelect, options, keyCollector)) { - keyConsumer.accept((Key)read.key()); - minEpoch = Math.max(minEpoch, read.command().metadata().epoch.getEpoch()); + minEpoch = Math.max(minEpoch, returningSelect.select.table.epoch.getEpoch()); reads.add(read); } } @@ -291,8 +290,7 @@ private List createNamedReads(QueryOptions options, ClientState st { for (NamedSelect select : autoReads.values()) { - TxnNamedRead read = createNamedRead(select, options, state); - keyConsumer.accept((Key)read.key()); + TxnNamedRead read = createNamedRead(select, options, keyCollector); reads.add(read); } } @@ -315,14 +313,43 @@ TxnCondition createCondition(QueryOptions options) return new TxnCondition.BooleanGroup(TxnCondition.Kind.AND, result); } - List createWriteFragments(ClientState state, QueryOptions options, Map autoReads, Set keys) + TableMetadatas.Complete collectTables() + { + TableMetadatas.Collector collector = new TableMetadatas.Collector(); + if (updates != null) + { + for (ModificationStatement modification : updates) + collector.add(modification.metadata); + } + if (assignments != null) + { + for (NamedSelect select : assignments) + collector.add(select.select.table); + } + if (returningSelect != null) + { + collector.add(returningSelect.select.table); + } + if (returningReferences != null) + { + for (RowDataReference ref : returningReferences) + collector.add(ref.table()); + } + return collector.build(); + } + + private Keys toKeys(SortedSet keySet) + { + return new Keys(keySet); + } + + List createWriteFragments(ClientState state, QueryOptions options, Map autoReads, TableMetadatasAndKeys.KeyCollector keyCollector) { List fragments = new ArrayList<>(updates.size()); int idx = 0; for (ModificationStatement modification : updates) { - TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx, state, options); - keys.add(fragment.key); + TxnWrite.Fragment fragment = modification.getTxnWriteFragment(idx, state, options, keyCollector); minEpoch = Math.max(minEpoch, fragment.baseUpdate.metadata().epoch.getEpoch()); fragments.add(fragment); @@ -339,20 +366,7 @@ List createWriteFragments(ClientState state, QueryOptions opt return fragments; } - AccordUpdate createUpdate(ClusterMetadata cm, ClientState state, QueryOptions options, Map autoReads, Set keys) - { - checkArgument(keys.isEmpty(), "Construct update before reads so the key set can be used to determine commit consistency level"); - List writeFragments = createWriteFragments(state, options, autoReads, keys); - ConsistencyLevel commitCL = consistencyLevelForAccordCommit(cm, keys, options.getConsistency()); - return new TxnUpdate(writeFragments, createCondition(options), commitCL, false); - } - - Keys toKeys(SortedSet keySet) - { - return new Keys(keySet); - } - - private ConsistencyLevel consistencyLevelForAccordRead(ClusterMetadata cm, Set keys, @Nullable ConsistencyLevel consistencyLevel) + private ConsistencyLevel consistencyLevelForAccordRead(ClusterMetadata cm, TableMetadatas.Complete tables, Keys keys, @Nullable ConsistencyLevel consistencyLevel) { // Write transactions are read/write so it creates a read and ends up needing a consistency level // which is fine to leave null @@ -369,14 +383,14 @@ private ConsistencyLevel consistencyLevelForAccordRead(ClusterMetadata cm, Set keys, @Nullable ConsistencyLevel consistencyLevel) + private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata cm, TableMetadatas.Complete tables, TableMetadatasAndKeys.KeyCollector keys, @Nullable ConsistencyLevel consistencyLevel) { checkArgument(!keys.isEmpty(), "keys should not be empty"); // Null means no specific consistency behavior is required from Accord, it's functionally similar to ANY @@ -405,14 +419,14 @@ private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata // commitCLForMode should return either null or the supplied consistency level // in which case we will commit everything at that CL since Accord doesn't support per table // commit consistency - ConsistencyLevel commitCL = consistencyLevelForAccordCommit(cm, key, consistencyLevel); + ConsistencyLevel commitCL = consistencyLevelForAccordCommit(cm, tables, key, consistencyLevel); if (commitCL != null) return commitCL; } return null; } - private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata cm, Key key, @Nullable ConsistencyLevel consistencyLevel) + private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata cm, TableMetadatas.Complete tables, Key key, @Nullable ConsistencyLevel consistencyLevel) { // Null means no specific consistency behavior is required from Accord, it's functionally similar to ANY // if you aren't reading the result back via Accord @@ -422,7 +436,7 @@ private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata PartitionKey pk = (PartitionKey)key; TableId tableId = pk.table(); Token token = pk.token(); - TableParams tableParams = getTableMetadata(cm, tableId).params; + TableParams tableParams = tables.getMetadata(tableId).params; TransactionalMode mode = tableParams.transactionalMode; TransactionalMigrationFromMode migrationFromMode = tableParams.transactionalMigrationFrom; // commitCLForMode should return either null or the supplied consistency level @@ -434,26 +448,30 @@ private static ConsistencyLevel consistencyLevelForAccordCommit(ClusterMetadata @VisibleForTesting public Txn createTxn(ClientState state, QueryOptions options) { - SortedSet keySet = new TreeSet<>(); ClusterMetadata cm = ClusterMetadata.current(); + TableMetadatas.Complete tables = collectTables(); + TableMetadatasAndKeys.KeyCollector keyCollector = new TableMetadatasAndKeys.KeyCollector(tables); if (updates.isEmpty()) { // TODO: Test case around this... Preconditions.checkState(conditions.isEmpty(), "No condition should exist without updates present"); - List reads = createNamedReads(options, state, null, keySet::add); - Keys txnKeys = toKeys(keySet); - TxnRead read = createTxnRead(reads, consistencyLevelForAccordRead(cm, keySet, options.getSerialConsistency()), Domain.Key); - Txn.Kind kind = shouldReadEphemerally(txnKeys, Schema.instance.getTableMetadata(((AccordRoutableKey) txnKeys.get(0)).table()).params, Read); - return new Txn.InMemory(kind, txnKeys, read, TxnQuery.ALL, null); + List reads = createNamedReads(options, null, keyCollector); + Keys keys = keyCollector.build(); + TxnRead read = createTxnRead(tables, reads, consistencyLevelForAccordRead(cm, tables, keys, options.getSerialConsistency()), Domain.Key); + Txn.Kind kind = shouldReadEphemerally(keys, tables.getMetadata((TableId)keys.get(0).prefix()).params, Read); + return new Txn.InMemory(kind, keys, read, TxnQuery.ALL, null, new TableMetadatasAndKeys(tables, keys)); } else { Int2ObjectHashMap autoReads = new Int2ObjectHashMap<>(); - AccordUpdate update = createUpdate(cm, state, options, autoReads, keySet); - List reads = createNamedReads(options, state, autoReads, keySet::add); - TxnRead read = createTxnRead(reads, null, Domain.Key); - return new Txn.InMemory(toKeys(keySet), read, TxnQuery.ALL, update); + List writeFragments = createWriteFragments(state, options, autoReads, keyCollector); + ConsistencyLevel commitCL = consistencyLevelForAccordCommit(cm, tables, keyCollector, options.getConsistency()); + List reads = createNamedReads(options, autoReads, keyCollector); + Keys keys = keyCollector.build(); + AccordUpdate update = new TxnUpdate(tables, writeFragments, createCondition(options), commitCL, false); + TxnRead read = createTxnRead(tables, reads, null, Domain.Key); + return new Txn.InMemory(keys, read, TxnQuery.ALL, update, new TableMetadatasAndKeys(tables, keys)); } } diff --git a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java index 0eaf58a9bbb3..c48483d0ba0c 100644 --- a/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/UpdateStatement.java @@ -212,14 +212,14 @@ else if (value instanceof ReferenceValue.Raw) { ReferenceValue.Raw raw = (ReferenceValue.Raw) value; ReferenceValue referenceValue = raw.prepare(def, bindVariables); - ReferenceOperation operation = new ReferenceOperation(def, TxnReferenceOperation.Kind.setterFor(def), null, null, referenceValue); + ReferenceOperation operation = new ReferenceOperation(def, metadata, TxnReferenceOperation.Kind.setterFor(def), null, null, referenceValue); operations.add(def, operation); } else { Operation operation = new Operation.SetValue(value).prepare(metadata, def, !conditions.isEmpty()); operation.collectMarkerSpecification(bindVariables); - operations.add(operation); + operations.add(operation, metadata); } } @@ -293,7 +293,7 @@ protected ModificationStatement prepareInternal(ClientState state, { Operation operation = new Operation.SetValue(raw).prepare(metadata, def, !conditions.isEmpty()); operation.collectMarkerSpecification(bindVariables); - operations.add(operation); + operations.add(operation, metadata); } } @@ -414,7 +414,7 @@ protected ModificationStatement prepareInternal(ClientState state, checkFalse(def.isPrimaryKeyColumn(), UPDATING_PRIMARY_KEY_MESSAGE, def.name); Operation operation = entry.right.prepare(metadata, def, !conditions.isEmpty() || isForTxn); operation.collectMarkerSpecification(bindVariables); - operations.add(operation); + operations.add(operation, metadata); } Preconditions.checkState(updates.referenceOps.isEmpty() || isForTxn); diff --git a/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java b/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java index ecf7d8cae795..2b4b6f999f0f 100644 --- a/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java +++ b/src/java/org/apache/cassandra/cql3/transactions/ReferenceOperation.java @@ -45,14 +45,16 @@ public class ReferenceOperation { private final ColumnMetadata receiver; + private final TableMetadata table; private final TxnReferenceOperation.Kind kind; private final FieldIdentifier field; private final Term key; private final ReferenceValue value; - public ReferenceOperation(ColumnMetadata receiver, TxnReferenceOperation.Kind kind, Term key, FieldIdentifier field, ReferenceValue value) + public ReferenceOperation(ColumnMetadata receiver, TableMetadata table, TxnReferenceOperation.Kind kind, Term key, FieldIdentifier field, ReferenceValue value) { this.receiver = receiver; + this.table = table; this.kind = kind; this.key = key; this.field = field; @@ -64,7 +66,7 @@ public ReferenceOperation(ColumnMetadata receiver, TxnReferenceOperation.Kind ki * within a transaction. When the language sees an Operation using a reference one is created already, but for cases * that needs to defer execution (such as when {@link Operation#requiresRead()} is true), this method can be used. */ - public static ReferenceOperation create(Operation operation) + public static ReferenceOperation create(Operation operation, TableMetadata table) { TxnReferenceOperation.Kind kind = TxnReferenceOperation.Kind.from(operation); ColumnMetadata receiver = operation.column; @@ -73,7 +75,7 @@ public static ReferenceOperation create(Operation operation) ReferenceValue value = new ReferenceValue.Constant(operation.term()); Term key = extractKeyOrIndex(operation); FieldIdentifier field = extractField(operation); - return new ReferenceOperation(receiver, kind, key, field, value); + return new ReferenceOperation(receiver, table, kind, key, field, value); } public TxnReferenceOperation.Kind getKind() @@ -100,7 +102,7 @@ public boolean requiresRead() public TxnReferenceOperation bindAndGet(QueryOptions options) { return new TxnReferenceOperation(kind, - receiver, + receiver, table, key != null ? key.bindAndGet(options) : null, field != null ? field.bytes : null, value.bindAndGet(options)); @@ -155,7 +157,7 @@ public ReferenceOperation prepare(TableMetadata metadata, VariableSpecifications } } - return new ReferenceOperation(receiver, kind, key, field, value.prepare(valueReceiver, bindVariables)); + return new ReferenceOperation(receiver, metadata, kind, key, field, value.prepare(valueReceiver, bindVariables)); } } diff --git a/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java b/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java index 6295e4457887..038bd459633d 100644 --- a/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java +++ b/src/java/org/apache/cassandra/cql3/transactions/RowDataReference.java @@ -46,6 +46,7 @@ import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.txn.TxnReference; import org.apache.cassandra.utils.ByteBufferUtil; @@ -59,16 +60,18 @@ public class RowDataReference extends Term.NonTerminal private final String selectName; private final int txnDataName; private final ColumnMetadata column; + private final TableMetadata table; private final Term elementPath; private final CellPath fieldPath; - public RowDataReference(String selectName, int txnDataName, ColumnMetadata column, Term elementPath, CellPath fieldPath) + public RowDataReference(String selectName, int txnDataName, ColumnMetadata column, TableMetadata table, Term elementPath, CellPath fieldPath) { Preconditions.checkArgument(elementPath == null || fieldPath == null, "Cannot specify both element and field paths"); this.selectName = selectName; this.txnDataName = txnDataName; this.column = column; + this.table = table; this.elementPath = elementPath; this.fieldPath = fieldPath; } @@ -183,7 +186,8 @@ public TxnReference toTxnReference(QueryOptions options) { Preconditions.checkState(elementPath == null || column.isComplex() || column.type.isFrozenCollection()); Preconditions.checkState(fieldPath == null || column.isComplex() || column.type.isUDT()); - return new TxnReference(txnDataName, column, bindCellPath(options)); + + return new TxnReference(txnDataName, table, column, bindCellPath(options)); } public ColumnIdentifier getFullyQualifiedName() @@ -199,6 +203,11 @@ public ColumnMetadata column() return column; } + public TableMetadata table() + { + return table; + } + public static class Raw extends Term.Raw { private final Selectable.RawIdentifier tuple; @@ -209,6 +218,7 @@ public static class Raw extends Term.Raw private int tupleName; private ColumnMetadata column; + private TableMetadata table; private Term elementPath = null; private CellPath fieldPath = null; @@ -269,6 +279,7 @@ public void resolveReference(Map sources, Map serializer = new Serializer(); + public static final Serializer serializer = new Serializer(); public enum PotentialTxnConflicts { @@ -185,14 +188,16 @@ public abstract ReadCommand deserialize(DataInputPlus in, protected enum Kind { - SINGLE_PARTITION (SinglePartitionReadCommand.selectionDeserializer), - PARTITION_RANGE (PartitionRangeReadCommand.selectionDeserializer); + SINGLE_PARTITION (SinglePartitionReadCommand.selectionDeserializer, SinglePartitionReadCommand.accordSelectionDeserializer), + PARTITION_RANGE (PartitionRangeReadCommand.selectionDeserializer, ignore -> PartitionRangeReadCommand.selectionDeserializer); private final SelectionDeserializer selectionDeserializer; + private final Function accordSelectionDeserializer; - Kind(SelectionDeserializer selectionDeserializer) + Kind(SelectionDeserializer selectionDeserializer, Function accordSelectionDeserializer) { this.selectionDeserializer = selectionDeserializer; + this.accordSelectionDeserializer = accordSelectionDeserializer; } } @@ -232,6 +237,7 @@ public static ReadCommand getCommand() } protected abstract void serializeSelection(DataOutputPlus out, int version) throws IOException; + protected abstract void serializeSelectionWithoutKey(DataOutputPlus out, int version) throws IOException; protected abstract long selectionSerializedSize(int version); public abstract boolean isLimitedToOnePartition(); @@ -1348,22 +1354,20 @@ private static PotentialTxnConflicts potentialTxnConflicts(int flags) return (flags & ALLOWS_POTENTIAL_TXN_CONFLICTS) != 0 ? PotentialTxnConflicts.ALLOW : PotentialTxnConflicts.DISALLOW; } - public void serialize(ReadCommand command, DataOutputPlus out, int version) throws IOException + private void serializeHeader(ReadCommand command, DataOutputPlus out, int version) throws IOException { out.writeByte(command.kind.ordinal()); out.writeByte( - digestFlag(command.isDigestQuery()) - | indexFlag(null != command.indexQueryPlan()) - | acceptsTransientFlag(command.acceptsTransient()) - | needsReconciliationFlag(command.rowFilter().needsReconciliation()) - | potentialTxnConflicts(command.potentialTxnConflicts) + digestFlag(command.isDigestQuery()) + | indexFlag(null != command.indexQueryPlan()) + | acceptsTransientFlag(command.acceptsTransient()) + | needsReconciliationFlag(command.rowFilter().needsReconciliation()) + | potentialTxnConflicts(command.potentialTxnConflicts) ); - if (command.isDigestQuery()) - out.writeUnsignedVInt32(command.digestVersion()); - command.metadata().id.serialize(out); - if (version >= MessagingService.VERSION_51) - Epoch.serializer.serialize(command.serializedAtEpoch, out); - out.writeInt(version >= MessagingService.VERSION_50 ? CassandraUInt.fromLong(command.nowInSec()) : (int) command.nowInSec()); + } + + private void serializeFiltersAndLimits(ReadCommand command, DataOutputPlus out, int version) throws IOException + { ColumnFilter.serializer.serialize(command.columnFilter(), out, version); RowFilter.serializer.serialize(command.rowFilter(), out, version); DataLimits.serializer.serialize(command.limits(), out, version, command.metadata().comparator); @@ -1372,17 +1376,57 @@ public void serialize(ReadCommand command, DataOutputPlus out, int version) thro // from the index name. if (null != command.indexQueryPlan) IndexMetadata.serializer.serialize(command.indexQueryPlan.getFirst().getIndexMetadata(), out, version); + } + public void serialize(ReadCommand command, DataOutputPlus out, int version) throws IOException + { + serializeHeader(command, out, version); + if (command.isDigestQuery()) + out.writeUnsignedVInt32(command.digestVersion()); + command.metadata().id.serialize(out); + if (version >= MessagingService.VERSION_51) + Epoch.serializer.serialize(command.serializedAtEpoch, out); + out.writeInt(version >= MessagingService.VERSION_50 ? CassandraUInt.fromLong(command.nowInSec()) : (int) command.nowInSec()); + serializeFiltersAndLimits(command, out, version); command.serializeSelection(out, version); } - public ReadCommand deserialize(DataInputPlus in, int version) throws IOException + public void serializeForAccord(ReadCommand command, TableMetadatas tables, DataOutputPlus out, int version) throws IOException + { + Invariants.require(!command.isDigestQuery); + serializeHeader(command, out, version); + tables.serialize(command.metadata(), out); + serializeFiltersAndLimits(command, out, version); + command.serializeSelectionWithoutKey(out, version); + } + + private ReadCommand deserialize(SelectionDeserializer deserializer, int flags, Epoch schemaVersion, int digestVersion, long nowInSec, TableMetadata tableMetadata, DataInputPlus in, int version) throws IOException { - Kind kind = Kind.values()[in.readByte()]; - int flags = in.readByte(); boolean isDigest = isDigest(flags); boolean acceptsTransient = acceptsTransient(flags); PotentialTxnConflicts potentialTxnConflicts = potentialTxnConflicts(flags); + boolean hasIndex = hasIndex(flags); + boolean needsReconciliation = needsReconciliation(flags); + + ColumnFilter columnFilter = ColumnFilter.serializer.deserialize(in, version, tableMetadata); + RowFilter rowFilter = RowFilter.serializer.deserialize(in, version, tableMetadata, needsReconciliation); + DataLimits limits = DataLimits.serializer.deserialize(in, version, tableMetadata); + Index.QueryPlan indexQueryPlan = null; + if (hasIndex) + { + IndexMetadata index = deserializeIndexMetadata(in, version, tableMetadata); + Index.Group indexGroup = Keyspace.openAndGetStore(tableMetadata).indexManager.getIndexGroup(index); + if (indexGroup != null) + indexQueryPlan = indexGroup.queryPlanFor(rowFilter); + } + + return deserializer.deserialize(in, version, schemaVersion, isDigest, digestVersion, acceptsTransient, potentialTxnConflicts, tableMetadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan); + } + + public ReadCommand deserialize(DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.values()[in.readByte()]; + int flags = in.readByte(); // Shouldn't happen or it's a user error (see comment above) but // better complain loudly than doing the wrong thing. if (isForThrift(flags)) @@ -1391,9 +1435,7 @@ public ReadCommand deserialize(DataInputPlus in, int version) throws IOException + "which is unsupported. Make sure to stop using thrift before " + "upgrading to 4.0"); - boolean hasIndex = hasIndex(flags); - int digestVersion = isDigest ? (int)in.readUnsignedVInt() : 0; - boolean needsReconciliation = needsReconciliation(flags); + int digestVersion = isDigest(flags) ? in.readUnsignedVInt32() : 0; TableId tableId = TableId.deserialize(in); Epoch schemaVersion = Epoch.EMPTY; @@ -1416,19 +1458,19 @@ public ReadCommand deserialize(DataInputPlus in, int version) throws IOException throw e; } long nowInSec = version >= MessagingService.VERSION_50 ? CassandraUInt.toLong(in.readInt()) : in.readInt(); - ColumnFilter columnFilter = ColumnFilter.serializer.deserialize(in, version, tableMetadata); - RowFilter rowFilter = RowFilter.serializer.deserialize(in, version, tableMetadata, needsReconciliation); - DataLimits limits = DataLimits.serializer.deserialize(in, version, tableMetadata); - Index.QueryPlan indexQueryPlan = null; - if (hasIndex) - { - IndexMetadata index = deserializeIndexMetadata(in, version, tableMetadata); - Index.Group indexGroup = Keyspace.openAndGetStore(tableMetadata).indexManager.getIndexGroup(index); - if (indexGroup != null) - indexQueryPlan = indexGroup.queryPlanFor(rowFilter); - } + return deserialize(kind.selectionDeserializer, flags, schemaVersion, digestVersion, nowInSec, tableMetadata, in, version); + } + + public ReadCommand deserializeForAccord(Seekable key, TableMetadatas tables, DataInputPlus in, int version) throws IOException + { + Kind kind = Kind.values()[in.readByte()]; + int flags = in.readByte(); + if (isDigest(flags) || isForThrift(flags) || acceptsTransient(flags)) + throw new IllegalStateException("Received an Accord command with a digest/thrift/transient flag set."); + + TableMetadata tableMetadata = tables.deserialize(in); - return kind.selectionDeserializer.deserialize(in, version, schemaVersion, isDigest, digestVersion, acceptsTransient, potentialTxnConflicts, tableMetadata, nowInSec, columnFilter, rowFilter, limits, indexQueryPlan); + return deserialize(kind.accordSelectionDeserializer.apply(key), flags, tableMetadata.epoch, 0, 0, tableMetadata, in, version); } private IndexMetadata deserializeIndexMetadata(DataInputPlus in, int version, TableMetadata metadata) throws IOException @@ -1461,5 +1503,16 @@ public long serializedSize(ReadCommand command, int version) + command.selectionSerializedSize(version) + command.indexSerializedSize(version); } + + public long serializedSizeForAccord(ReadCommand command, TableMetadatas tables, int version) + { + return 2 // kind + flags + + tables.serializedSize(command.metadata()) + + ColumnFilter.serializer.serializedSize(command.columnFilter(), version) + + RowFilter.serializer.serializedSize(command.rowFilter(), version) + + DataLimits.serializer.serializedSize(command.limits(), version, command.metadata().comparator) + + command.selectionSerializedSize(version) + + command.indexSerializedSize(version); + } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index 0b4d25a925cd..ec3b383bb91b 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -26,11 +26,14 @@ import java.util.NavigableSet; import java.util.TreeSet; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import java.util.stream.Collectors; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Sets; +import accord.primitives.Seekable; +import accord.primitives.Seekables; import org.apache.cassandra.cache.IRowCacheEntry; import org.apache.cassandra.cache.RowCacheKey; import org.apache.cassandra.cache.RowCacheSentinel; @@ -77,6 +80,7 @@ import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.StorageProxy; +import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; @@ -89,6 +93,7 @@ public class SinglePartitionReadCommand extends ReadCommand implements SinglePartitionReadQuery { protected static final SelectionDeserializer selectionDeserializer = new Deserializer(); + protected static final Function accordSelectionDeserializer = AccordDeserializer::new; protected final DecoratedKey partitionKey; protected final ClusteringIndexFilter clusteringIndexFilter; @@ -1277,12 +1282,23 @@ protected void serializeSelection(DataOutputPlus out, int version) throws IOExce ClusteringIndexFilter.serializer.serialize(clusteringIndexFilter(), out, version); } + protected void serializeSelectionWithoutKey(DataOutputPlus out, int version) throws IOException + { + ClusteringIndexFilter.serializer.serialize(clusteringIndexFilter(), out, version); + } + protected long selectionSerializedSize(int version) { return metadata().partitionKeyType.writtenLength(partitionKey().getKey()) + ClusteringIndexFilter.serializer.serializedSize(clusteringIndexFilter(), version); } + protected long selectionSerializedSize(Seekables seekables, int version) + { + return metadata().partitionKeyType.writtenLength(partitionKey().getKey()) + + ClusteringIndexFilter.serializer.serializedSize(clusteringIndexFilter(), version); + } + public boolean isLimitedToOnePartition() { return true; @@ -1411,6 +1427,35 @@ public ReadCommand deserialize(DataInputPlus in, } } + private static class AccordDeserializer extends SelectionDeserializer + { + final DecoratedKey key; + + private AccordDeserializer(Seekable seekable) + { + this.key = ((PartitionKey)seekable).partitionKey(); + } + + public ReadCommand deserialize(DataInputPlus in, + int version, + Epoch serializedAtEpoch, + boolean isDigest, + int digestVersion, + boolean acceptsTransient, + PotentialTxnConflicts potentialTxnConflicts, + TableMetadata metadata, + long nowInSec, + ColumnFilter columnFilter, + RowFilter rowFilter, + DataLimits limits, + Index.QueryPlan indexQueryPlan) + throws IOException + { + ClusteringIndexFilter filter = ClusteringIndexFilter.serializer.deserialize(in, version, metadata); + return SinglePartitionReadCommand.create(serializedAtEpoch, isDigest, digestVersion, acceptsTransient, potentialTxnConflicts, metadata, nowInSec, columnFilter, rowFilter, limits, key, filter, indexQueryPlan, false); + } + } + /** * {@code SSTableReaderListener} used to collect metrics about SSTable read access. */ diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java index 011629576f00..2b776e31c098 100644 --- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java +++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java @@ -44,6 +44,7 @@ import org.apache.cassandra.db.Mutation; import org.apache.cassandra.db.RangeTombstone; import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.SerializationHeader; import org.apache.cassandra.db.SimpleBuilders; import org.apache.cassandra.db.Slices; import org.apache.cassandra.db.filter.ColumnFilter; @@ -77,12 +78,15 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.btree.BTree; import org.apache.cassandra.utils.btree.UpdateFunction; import org.apache.cassandra.utils.vint.VIntCoding; +import static org.apache.cassandra.db.SerializationHeader.StableHeaderSerializer.STABLE; import static org.apache.cassandra.db.rows.UnfilteredRowIteratorSerializer.IS_EMPTY; /** @@ -810,6 +814,17 @@ public void serialize(PartitionUpdate update, DataOutputPlus out, int version) t } } + public void serializeWithoutKey(PartitionUpdate update, TableMetadatas tables, DataOutputPlus out, int version) throws IOException + { + try (UnfilteredRowIterator iter = update.unfilteredIterator()) + { + tables.serialize(update.metadata, out); + Epoch.serializer.serialize(update.metadata.epoch, out); + SerializationHeader header = new SerializationHeader(false, update.metadata, iter.columns(), iter.stats()); + UnfilteredRowIteratorSerializer.serializer.serializeWithoutKey(iter, header, out, version, update.rowCount(), STABLE, null); + } + } + public PartitionUpdate deserialize(DataInputPlus in, int version, DeserializationHelper.Flag flag) throws IOException { TableId tableId = TableId.deserialize(in); @@ -833,6 +848,19 @@ public PartitionUpdate deserialize(DataInputPlus in, int version, Deserializatio throw e; } UnfilteredRowIteratorSerializer.Header header = UnfilteredRowIteratorSerializer.serializer.deserializeHeader(tableMetadata, null, in, version, flag); + return deserialize(header, remoteVersion, tableMetadata, in, version, flag); + } + + public PartitionUpdate deserialize(PartitionKey key, TableMetadatas tables, DataInputPlus in, int version, DeserializationHelper.Flag flag) throws IOException + { + TableMetadata tableMetadata = tables.deserialize(in); + Epoch remoteVersion = Epoch.serializer.deserialize(in); + UnfilteredRowIteratorSerializer.Header header = UnfilteredRowIteratorSerializer.serializer.deserializeHeaderWithoutKey(tableMetadata, key.partitionKey(), in, version, flag, STABLE, null); + return deserialize(header, remoteVersion, tableMetadata, in, version, flag); + } + + private PartitionUpdate deserialize(UnfilteredRowIteratorSerializer.Header header, Epoch remoteVersion, TableMetadata tableMetadata, DataInputPlus in, int version, DeserializationHelper.Flag flag) throws IOException + { if (header.isEmpty) return emptyUpdate(tableMetadata, header.key); @@ -895,6 +923,18 @@ public long serializedSize(PartitionUpdate update, int version) + UnfilteredRowIteratorSerializer.serializer.serializedSize(iter, null, version, update.rowCount()); } } + + public long serializedSizeWithoutKey(PartitionUpdate update, TableMetadatas tables, int version) + { + try (UnfilteredRowIterator iter = update.unfilteredIterator()) + { + long size = tables.serializedSize(update.metadata); + size += Epoch.serializer.serializedSize(update.metadata.epoch); + + SerializationHeader header = new SerializationHeader(false, update.metadata, iter.columns(), iter.stats()); + return size + UnfilteredRowIteratorSerializer.serializer.serializedSizeWithoutKey(iter, header, version, update.rowCount(), STABLE, null); + } + } } /** diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java index ed1b6171bb13..46ca484834fb 100644 --- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java +++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java @@ -124,7 +124,11 @@ private

      void serialize(UnfilteredRowIterator iterator, SerializationHeader h assert !header.isForSSTable(); ByteBufferUtil.writeWithVIntLength(iterator.partitionKey().getKey(), out); + serializeWithoutKey(iterator, header, out, version, rowEstimate, serializer, param); + } + public

      void serializeWithoutKey(UnfilteredRowIterator iterator, SerializationHeader header, DataOutputPlus out, int version, int rowEstimate, ParameterizedSerializer

      serializer, P param) throws IOException + { int flags = 0; if (iterator.isReverseOrder()) flags |= IS_REVERSED; @@ -179,16 +183,21 @@ public

      long serializedSize(UnfilteredRowIterator iterator, int version, int iterator.columns(), iterator.stats()); - SerializationHelper helper = new SerializationHelper(header); - assert rowEstimate >= 0; - long size = ByteBufferUtil.serializedSizeWithVIntLength(iterator.partitionKey().getKey()) - + 1; // flags + long size = ByteBufferUtil.serializedSizeWithVIntLength(iterator.partitionKey().getKey()); + return size + serializedSizeWithoutKey(iterator, header, version, rowEstimate, serializer, param); + } + // Please note that this consume the iterator, and as such should not be called unless we have a simple way to + // recreate an iterator for both serialize and serializedSize, which is mostly only PartitionUpdate/ArrayBackedCachedPartition. + public

      long serializedSizeWithoutKey(UnfilteredRowIterator iterator, SerializationHeader header, int version, int rowEstimate, ParameterizedSerializer

      serializer, P param) + { + long size = 1; // flags if (iterator.isEmpty()) return size; + SerializationHelper helper = new SerializationHelper(header); DeletionTime partitionDeletion = iterator.partitionLevelDeletion(); Row staticRow = iterator.staticRow(); boolean hasStatic = staticRow != Rows.EMPTY_STATIC_ROW; @@ -219,6 +228,11 @@ public Header deserializeHeader(TableMetadata metadata, ColumnFilter selection, public

      Header deserializeHeader(TableMetadata metadata, DataInputPlus in, int version, DeserializationHelper.Flag flag, ParameterizedSerializer

      serializer, P param) throws IOException { DecoratedKey key = metadata.partitioner.decorateKey(ByteBufferUtil.readWithVIntLength(in)); + return deserializeHeaderWithoutKey(metadata, key, in, version, flag, serializer, param); + } + + public

      Header deserializeHeaderWithoutKey(TableMetadata metadata, DecoratedKey key, DataInputPlus in, int version, DeserializationHelper.Flag flag, ParameterizedSerializer

      serializer, P param) throws IOException + { int flags = in.readUnsignedByte(); boolean isReversed = (flags & IS_REVERSED) != 0; if ((flags & IS_EMPTY) != 0) diff --git a/src/java/org/apache/cassandra/io/AsymmetricParameterisedUnversionedSerializer.java b/src/java/org/apache/cassandra/io/AsymmetricParameterisedUnversionedSerializer.java new file mode 100644 index 000000000000..69b1bd2e0cb8 --- /dev/null +++ b/src/java/org/apache/cassandra/io/AsymmetricParameterisedUnversionedSerializer.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; + +public interface AsymmetricParameterisedUnversionedSerializer +{ + void serialize(In t, P p, DataOutputPlus out) throws IOException; + default ByteBuffer serialize(In t, P p) throws IOException + { + int size = Math.toIntExact(serializedSize(t, p)); + try (DataOutputBuffer buffer = new DataOutputBuffer(size)) + { + serialize(t, p, buffer); + ByteBuffer bb = buffer.buffer(); + assert size == bb.remaining() : String.format("Expected to write %d but wrote %d", size, bb.remaining()); + return bb; + } + } + + default ByteBuffer serializeUnchecked(In t, P p) + { + try + { + return serialize(t, p); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + Out deserialize(P p, DataInputPlus in) throws IOException; + default Out deserialize(P p, ByteBuffer buffer) throws IOException + { + try (DataInputBuffer in = new DataInputBuffer(buffer, true)) + { + return deserialize(p, in); + } + } + + default Out deserializeUnchecked(P p, ByteBuffer buffer) + { + try + { + return deserialize(p, buffer); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + long serializedSize(In t, P p); +} diff --git a/src/java/org/apache/cassandra/io/AsymmetricParameterisedVersionedSerializer.java b/src/java/org/apache/cassandra/io/AsymmetricParameterisedVersionedSerializer.java new file mode 100644 index 000000000000..a1a295f83d89 --- /dev/null +++ b/src/java/org/apache/cassandra/io/AsymmetricParameterisedVersionedSerializer.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; + +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; + +public interface AsymmetricParameterisedVersionedSerializer +{ + void serialize(In t, P p, DataOutputPlus out, Version version) throws IOException; + + default ByteBuffer serialize(In t, P p, Version version) throws IOException + { + int size = Math.toIntExact(serializedSize(t, p, version)); + try (DataOutputBuffer buffer = new DataOutputBuffer(size)) + { + serialize(t, p, buffer, version); + ByteBuffer bb = buffer.buffer(); + assert size == bb.remaining() : String.format("Expected to write %d but wrote %d", size, bb.remaining()); + return bb; + } + } + + default ByteBuffer serializeUnchecked(In t, P p, Version version) + { + try + { + return serialize(t, p, version); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + Out deserialize(P p, DataInputPlus in, Version version) throws IOException; + + default Out deserialize(P p, ByteBuffer buffer, Version version) throws IOException + { + try (DataInputBuffer in = new DataInputBuffer(buffer, true)) + { + return deserialize(p, in, version); + } + } + + default Out deserializeUnchecked(P p, ByteBuffer buffer, Version version) + { + try + { + return deserialize(p, buffer, version); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + long serializedSize(In t, P p, Version version); + + static AsymmetricParameterisedVersionedSerializer from(AsymmetricParameterisedUnversionedSerializer delegate) + { + return new AsymmetricParameterisedVersionedSerializer<>() + { + @Override + public void serialize(In t, P p, DataOutputPlus out, Version version) throws IOException + { + delegate.serialize(t, p, out); + } + + @Override + public Out deserialize(P p, DataInputPlus in, Version version) throws IOException + { + return delegate.deserialize(p, in); + } + + @Override + public long serializedSize(In t, P p, Version version) + { + return delegate.serializedSize(t, p); + } + }; + } +} diff --git a/src/java/org/apache/cassandra/io/ParameterisedUnversionedSerializer.java b/src/java/org/apache/cassandra/io/ParameterisedUnversionedSerializer.java new file mode 100644 index 000000000000..b93b8aacfc8b --- /dev/null +++ b/src/java/org/apache/cassandra/io/ParameterisedUnversionedSerializer.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +public interface ParameterisedUnversionedSerializer extends AsymmetricParameterisedUnversionedSerializer +{ +} diff --git a/src/java/org/apache/cassandra/io/ParameterisedVersionedSerializer.java b/src/java/org/apache/cassandra/io/ParameterisedVersionedSerializer.java new file mode 100644 index 000000000000..3393c8141c7c --- /dev/null +++ b/src/java/org/apache/cassandra/io/ParameterisedVersionedSerializer.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io; + +public interface ParameterisedVersionedSerializer extends AsymmetricParameterisedVersionedSerializer +{ +} diff --git a/src/java/org/apache/cassandra/schema/TableMetadata.java b/src/java/org/apache/cassandra/schema/TableMetadata.java index 2c3b1f435d11..c2909dcb612f 100644 --- a/src/java/org/apache/cassandra/schema/TableMetadata.java +++ b/src/java/org/apache/cassandra/schema/TableMetadata.java @@ -35,6 +35,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; @@ -770,6 +771,19 @@ public static TableMetadata minimal(String keyspace, String name) .build(); } + /** + * There is a couple of places in the code where we need a TableMetadata object and don't have one readily available + * and know that only the keyspace and name matter. This creates such "fake" metadata. Use only if you know what + * you're doing. + */ + @VisibleForTesting + public static TableMetadata minimal(String keyspace, String name, TableId tableId) + { + return TableMetadata.builder(keyspace, name, tableId) + .addPartitionKeyColumn("key", BytesType.instance) + .build(); + } + public TableMetadata updateIndexTableMetadata(TableParams baseTableParams) { TableParams.Builder builder = baseTableParams.unbuild().gcGraceSeconds(0); diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java index e56073222940..4d0857b40c47 100644 --- a/src/java/org/apache/cassandra/service/StorageProxy.java +++ b/src/java/org/apache/cassandra/service/StorageProxy.java @@ -138,6 +138,8 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.IAccordService.IAccordResult; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnDataKeyValue; import org.apache.cassandra.service.accord.txn.TxnDataValue; @@ -2211,9 +2213,11 @@ public static IAccordResult readWithAccord(ClusterMetadata cm, Partit TableMetadata tableMetadata = getTableMetadata(cm, command.metadata().id); TableParams tableParams = tableMetadata.params; consistencyLevel = tableParams.transactionalMode.readCLForMode(tableParams.transactionalMigrationFrom, consistencyLevel, cm, tableMetadata.id, command.dataRange().keyRange()); - TxnRead read = TxnRead.createRangeRead(command, range, consistencyLevel); + TableMetadatas tables = TableMetadatas.of(tableMetadata); + TxnRead read = TxnRead.createRangeRead(tables, command, range, consistencyLevel); Txn.Kind kind = shouldReadEphemerally(read.keys(), tableParams, Read); - Txn txn = new Txn.InMemory(kind, read.keys(), read, TxnQuery.RANGE_QUERY, null); + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(tables, read.keys()); + Txn txn = new Txn.InMemory(kind, read.keys(), read, TxnQuery.RANGE_QUERY, null, tablesAndKeys); IAccordService accordService = AccordService.instance(); return accordService.coordinateAsync(tableMetadata.epoch.getEpoch(), txn, consistencyLevel, requestTime); } @@ -2226,11 +2230,13 @@ private static IAccordResult readWithAccordAsync(ClusterMetadata cm, // If the non-SERIAL write strategy is sending all writes through Accord there is no need to use the supplied consistency // level since Accord will manage reading safely TableMetadata tableMetadata = getTableMetadata(cm, group.metadata().id); + TableMetadatas tables = TableMetadatas.of(tableMetadata); TableParams tableParams = tableMetadata.params; - consistencyLevel = consistencyLevelForAccordRead(cm, group.queries.get(0).metadata().id, group, consistencyLevel); - TxnRead read = TxnRead.createSerialRead(group.queries, consistencyLevel); + TableMetadatasAndKeys.KeyCollector keyCollector = new TableMetadatasAndKeys.KeyCollector(tables); + consistencyLevel = consistencyLevelForAccordRead(cm, tableMetadata.id, group, consistencyLevel); + TxnRead read = TxnRead.createSerialRead(group.queries, consistencyLevel, keyCollector); Txn.Kind kind = shouldReadEphemerally(read.keys(), tableParams, Read); - Txn txn = new Txn.InMemory(kind, read.keys(), read, TxnQuery.ALL, null); + Txn txn = new Txn.InMemory(kind, read.keys(), read, TxnQuery.ALL, null, keyCollector.buildTablesAndKeys()); return AccordService.instance().coordinateAsync(tableMetadata.epoch.getEpoch(), txn, consistencyLevel, requestTime); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java index d413c75c7986..1ceedc461227 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java +++ b/src/java/org/apache/cassandra/service/accord/AccordFetchCoordinator.java @@ -50,8 +50,8 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; -import org.apache.cassandra.io.VersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.InetAddressAndPort; @@ -61,6 +61,7 @@ import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.streaming.StreamCoordinator; @@ -70,7 +71,6 @@ import org.apache.cassandra.streaming.StreamResultFuture; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.CastingSerializer; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.TimeUUID; @@ -214,24 +214,24 @@ private void maybeListen() public static class StreamingRead implements Read { - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer() { @Override - public void serialize(StreamingRead read, DataOutputPlus out, Version version) throws IOException + public void serialize(StreamingRead read, TableMetadatasAndKeys seekables, DataOutputPlus out, Version version) throws IOException { InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serialize(read.to, out, version.messageVersion()); KeySerializers.ranges.serialize(read.ranges, out); } @Override - public StreamingRead deserialize(DataInputPlus in, Version version) throws IOException + public StreamingRead deserialize(TableMetadatasAndKeys seekables, DataInputPlus in, Version version) throws IOException { return new StreamingRead(InetAddressAndPort.Serializer.inetAddressAndPortSerializer.deserialize(in, version.messageVersion()), KeySerializers.ranges.deserialize(in)); } @Override - public long serializedSize(StreamingRead read, Version version) + public long serializedSize(StreamingRead read, TableMetadatasAndKeys seekables, Version version) { return InetAddressAndPort.Serializer.inetAddressAndPortSerializer.serializedSize(read.to, version.messageVersion()) + KeySerializers.ranges.serializedSize(read.ranges); @@ -305,8 +305,7 @@ public AsyncChain read(Seekable key, SafeCommandStore commandStore, Timest public static class StreamingTxn { - private static final VersionedSerializer read = CastingSerializer.create(StreamingRead.class, - StreamingRead.serializer); + private static final ParameterisedVersionedSerializer read = (ParameterisedVersionedSerializer)StreamingRead.serializer; private static final UnversionedSerializer query = new UnversionedSerializer<>() { @@ -330,22 +329,22 @@ public long serializedSize(Query t) } }; - private static final IVersionedSerializer update = new IVersionedSerializer() + private static final ParameterisedVersionedSerializer update = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(Update t, DataOutputPlus out, Version version) + public void serialize(Update t, TableMetadatasAndKeys seekables, DataOutputPlus out, Version version) { Invariants.requireArgument(t == null); } @Override - public Update deserialize(DataInputPlus in, Version version) + public Update deserialize(TableMetadatasAndKeys seekables, DataInputPlus in, Version version) { return null; } @Override - public long serializedSize(Update t, Version version) + public long serializedSize(Update t, TableMetadatasAndKeys seekables, Version version) { Invariants.requireArgument(t == null); return 0; @@ -353,7 +352,7 @@ public long serializedSize(Update t, Version version) }; // TODO (desired): this could be serialized as an InetAddressAndPort and Ranges if we had a special case PartialTxn implementation - public static final IVersionedSerializer serializer = new CommandSerializers.PartialTxnSerializer(read, query, update); + public static final IVersionedSerializer serializer = new CommandSerializers.PartialTxnSerializer(read, query, update, TableMetadatasAndKeys.serializer); } private final Map streams = new HashMap<>(); @@ -392,7 +391,7 @@ protected void onDone(Ranges success, Throwable failure) protected PartialTxn rangeReadTxn(Ranges ranges) { StreamingRead read = new StreamingRead(FBUtilities.getBroadcastAddressAndPort(), ranges); - return new PartialTxn.InMemory(Txn.Kind.Read, ranges, read, noopQuery, null); + return new PartialTxn.InMemory(Txn.Kind.Read, ranges, read, noopQuery, null, TableMetadatasAndKeys.none(Routable.Domain.Range)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 5a5e09d4f69b..c3b55718c59c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -64,6 +64,7 @@ import org.apache.cassandra.service.accord.api.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.ResultSerializers; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; @@ -219,7 +220,7 @@ public static long route(Unseekables unseekables) } } - private static final long EMPTY_TXN = measure(new PartialTxn.InMemory(null, null, null, null, null)); + private static final long EMPTY_TXN = measure(new PartialTxn.InMemory(null, null, null, null, null, TableMetadatasAndKeys.none(Domain.Key))); public static long txn(PartialTxn txn) { long size = EMPTY_TXN; @@ -314,7 +315,7 @@ private static ICommand attrs(boolean hasDeps, boolean hasTxn, boolean executes) builder.partialDeps(PartialDeps.NONE); if (hasTxn) - builder.partialTxn(new PartialTxn.InMemory(null, null, null, null, null)); + builder.partialTxn(new PartialTxn.InMemory(null, null, null, null, null, TableMetadatasAndKeys.none(Domain.Key))); if (executes) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java index ee6fb4ee656e..2bae9c8a0c84 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSerializers.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import accord.utils.VIntCoding; import org.apache.cassandra.cql3.terms.MultiElements; import org.apache.cassandra.cql3.terms.Term; import org.apache.cassandra.db.ArrayClustering; @@ -33,10 +34,9 @@ import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.ValueAccessor; -import org.apache.cassandra.db.partitions.PartitionUpdate; -import org.apache.cassandra.db.rows.DeserializationHelper; import org.apache.cassandra.io.AsymmetricVersionedSerializer; import org.apache.cassandra.io.EmbeddedAsymmetricVersionedSerializer; +import org.apache.cassandra.io.ParameterisedUnversionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -46,7 +46,6 @@ import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; import org.apache.cassandra.service.accord.serializers.Version; -import org.apache.cassandra.utils.ByteBufferUtil; import static org.apache.cassandra.db.TypeSizes.sizeof; import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; @@ -75,58 +74,28 @@ else if (collectionType.kind == MAP) throw new UnsupportedOperationException("Unsupported collection type: " + type); } - public static final IVersionedSerializer partitionUpdateSerializer = new IVersionedSerializer() + public static final ParameterisedUnversionedSerializer columnMetadataSerializer = new ParameterisedUnversionedSerializer<>() { @Override - public void serialize(PartitionUpdate upd, DataOutputPlus out, Version version) throws IOException + public void serialize(ColumnMetadata column, TableMetadata table, DataOutputPlus out) throws IOException { - PartitionUpdate.serializer.serialize(upd, out, version.messageVersion()); + out.writeUnsignedVInt32(column.uniqueId); } @Override - public PartitionUpdate deserialize(DataInputPlus in, Version version) throws IOException + public ColumnMetadata deserialize(TableMetadata table, DataInputPlus in) throws IOException { - return PartitionUpdate.serializer.deserialize(in, version.messageVersion(), DeserializationHelper.Flag.FROM_REMOTE); + return table.getColumnById(in.readUnsignedVInt32()); } @Override - public long serializedSize(PartitionUpdate upd, Version version) + public long serializedSize(ColumnMetadata column, TableMetadata table) { - return PartitionUpdate.serializer.serializedSize(upd, version.messageVersion()); + return VIntCoding.sizeOfUnsignedVInt(column.uniqueId); } }; - public static final UnversionedSerializer columnMetadataSerializer = new UnversionedSerializer() - { - @Override - public void serialize(ColumnMetadata column, DataOutputPlus out) throws IOException - { - out.writeUTF(column.ksName); - out.writeUTF(column.cfName); - ByteBufferUtil.writeWithShortLength(column.name.bytes, out); - } - - @Override - public ColumnMetadata deserialize(DataInputPlus in) throws IOException - { - String keyspace = in.readUTF(); - String table = in.readUTF(); - ByteBuffer name = ByteBufferUtil.readWithShortLength(in); - return Schema.instance.getColumnMetadata(keyspace, table, name); - } - - @Override - public long serializedSize(ColumnMetadata column) - { - long size = 0; - size += sizeof(column.ksName); - size += sizeof(column.cfName); - size += ByteBufferUtil.serializedSizeWithShortLength(column.name.bytes); - return size; - } - }; - - public static final IVersionedSerializer tableMetadataSerializer = new IVersionedSerializer() + public static final IVersionedSerializer tableMetadataSerializer = new IVersionedSerializer<>() { @Override public void serialize(TableMetadata metadata, DataOutputPlus out, Version version) throws IOException diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index d2682950afd4..acb4c65e8b60 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -110,6 +110,8 @@ import org.apache.cassandra.service.accord.api.TokenKey; import org.apache.cassandra.service.accord.api.TokenKey.KeyspaceSplitter; import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnResult; @@ -524,7 +526,7 @@ private AsyncChain syncInternal(Timestamp minBound, Keys keys, DurabilityS { TxnId txnId = node.nextTxnId(minBound, Write, Key, cardinality(keys)); FullRoute route = node.computeRoute(txnId, keys); - Txn txn = new Txn.InMemory(Write, keys, TxnRead.createNoOpRead(keys), TxnQuery.NONE, TxnUpdate.empty()); + Txn txn = new Txn.InMemory(Write, keys, TxnRead.createNoOpRead(keys), TxnQuery.NONE, TxnUpdate.empty(), new TableMetadatasAndKeys(TableMetadatas.none(), keys)); return CoordinateTransaction.coordinate(node, route, txnId, txn) .map(ignore -> (Void)null).beginAsResult(); } diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 3bcb271381b4..949f7bbf8602 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -64,6 +64,7 @@ import org.apache.cassandra.metrics.AccordMetrics; import org.apache.cassandra.net.ResponseContext; import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.utils.Clock; @@ -204,7 +205,7 @@ public long maxConflictsPruneInterval() @Override public Txn emptySystemTxn(Kind kind, Routable.Domain domain) { - return new Txn.InMemory(kind, domain == Key ? Keys.EMPTY : Ranges.EMPTY, TxnRead.empty(domain), TxnQuery.UNSAFE_EMPTY, null); + return new Txn.InMemory(kind, (domain == Key ? Keys.EMPTY : Ranges.EMPTY), TxnRead.empty(domain), TxnQuery.UNSAFE_EMPTY, null, TableMetadatasAndKeys.none(domain)); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java index 25d409bfde46..d70cf2beea5d 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -114,6 +114,7 @@ public final int compareAsRoutingKey(@Nonnull AccordRoutableKey that) public final int compareTo(AccordRoutableKey that) { + if (this == that) return 0; int c = compareAsRoutingKey(that); if (c != 0) return c; diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 3fa21940879f..2d82cf174982 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -27,9 +27,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import accord.api.Agent; import accord.api.Data; import accord.api.Result; @@ -40,17 +37,19 @@ import accord.local.Node.Id; import accord.messages.Commit; import accord.messages.Commit.Kind; +import accord.primitives.AbstractRanges; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; +import accord.primitives.Keys; import accord.primitives.Participants; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.topology.Shard; import accord.topology.Topologies; import accord.topology.Topology; +import accord.utils.UnhandledEnum; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.concurrent.Stage; @@ -79,8 +78,8 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.TokenKey; -import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.interop.AccordInteropReadCallback.MaximalCommitSender; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnDataKeyValue; @@ -112,8 +111,6 @@ */ public class AccordInteropExecution implements ReadCoordinator, MaximalCommitSender { - private static final Logger logger = LoggerFactory.getLogger(AccordInteropExecution.class); - static class InteropExecutor implements AgentExecutor { private final AccordAgent agent; @@ -249,32 +246,33 @@ public void sendReadRepairMutation(Message message, InetAddressAndPort private List> readChains(Dispatcher.RequestTime requestTime) { - TxnRead read = (TxnRead) txn.read(); - Seekables keys = txn.read().keys(); - switch (keys.domain()) + switch (txnId.domain()) { case Key: - return keyReadChains(read, keys, requestTime); + return keyReadChains((Txn.InMemory)txn, requestTime); case Range: - return rangeReadChains(read, keys, requestTime); + return rangeReadChains((Txn.InMemory)txn, requestTime); default: - throw new IllegalStateException("Unhandled domain " + keys.domain()); + throw UnhandledEnum.unknown(txnId.domain()); } } - private List> keyReadChains(TxnRead read, Seekables keys, Dispatcher.RequestTime requestTime) + private List> keyReadChains(Txn.InMemory txn, Dispatcher.RequestTime requestTime) { + TxnRead read = (TxnRead) txn.read(); + Keys keys = (Keys) read.keys(); + TableMetadatasAndKeys tablesAndKeys = (TableMetadatasAndKeys) txn.implementationDefined; ClusterMetadata cm = ClusterMetadata.current(); List> results = new ArrayList<>(); keys.forEach(key -> { - read.forEachWithKey((PartitionKey) key, fragment -> { - SinglePartitionReadCommand command = (SinglePartitionReadCommand) fragment.command(); + read.forEachWithKey(key, fragment -> { + SinglePartitionReadCommand command = (SinglePartitionReadCommand) fragment.command(tablesAndKeys.tables); // This should only rarely occur when coordinators start a transaction in a migrating range // because they haven't yet updated their cluster metadata. // It would be harmless to do the read, because it will be rejected in `TxnQuery` anyways, // but it's faster to skip the read - AccordClientRequestMetrics metrics = txn.kind().isWrite() ? accordWriteMetrics : accordReadMetrics; + AccordClientRequestMetrics metrics = txnId.isWrite() ? accordWriteMetrics : accordReadMetrics; // TODO (required): This doesn't use the metadata from the correct epoch if (!ConsensusRequestRouter.instance.isKeyManagedByAccordForReadAndWrite(cm, command.metadata().id, command.partitionKey())) { @@ -308,12 +306,15 @@ private List> keyReadChains(TxnRead read, Seekables keys, return results; } - private List> rangeReadChains(TxnRead read, Seekables keys, Dispatcher.RequestTime requestTime) + private List> rangeReadChains(Txn.InMemory txn, Dispatcher.RequestTime requestTime) { + TxnRead read = (TxnRead) txn.read(); + AbstractRanges ranges = (AbstractRanges) read.keys(); + TableMetadatasAndKeys tablesAndKeys = (TableMetadatasAndKeys) txn.implementationDefined; List> results = new ArrayList<>(); - keys.forEach(key -> { - read.forEachWithKey(key, fragment -> { - PartitionRangeReadCommand command = ((PartitionRangeReadCommand) fragment.command()).withTxnReadName(fragment.txnDataName()); + ranges.forEach(range -> { + read.forEachWithKey(range, fragment -> { + PartitionRangeReadCommand command = ((PartitionRangeReadCommand) fragment.command(tablesAndKeys.tables)).withTxnReadName(fragment.txnDataName()); // TODO (required): To make migration work we need to validate that the range is all on Accord diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AbstractSortedCollector.java b/src/java/org/apache/cassandra/service/accord/serializers/AbstractSortedCollector.java new file mode 100644 index 000000000000..d979dab3fa97 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/AbstractSortedCollector.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.util.AbstractList; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.cassandra.utils.BulkIterator; +import org.apache.cassandra.utils.btree.BTree; +import org.apache.cassandra.utils.btree.UpdateFunction; + +import static accord.utils.ArrayBuffers.cachedAny; + +public abstract class AbstractSortedCollector extends AbstractList +{ + private static final int BTREE_THRESHOLD = 16; + + Object buffer; + int count = 0; + + abstract Comparator comparator(); + abstract C empty(); + abstract C of(T one); + abstract C copy(Object[] array, int count); + abstract C copyBtree(Object[] btree, int count); + + public AbstractSortedCollector() + { + } + + public boolean add(T add) + { + return add == collect(add); + } + + protected T collect(T add) + { + if (count == 0) + { + buffer = add; + count = 1; + return add; + } + if (count == 1) + { + if (add.equals(buffer)) + return (T)buffer; + Object[] newBuffer = cachedAny().get(8); + boolean addIsLower = comparator().compare(add, buffer) < 0; + newBuffer[0] = addIsLower ? add : buffer; + newBuffer[1] = addIsLower ? buffer : add; + buffer = newBuffer; + count = 2; + return add; + } + Object[] buffer = (Object[]) this.buffer; + if (count < BTREE_THRESHOLD) + { + int i = Arrays.binarySearch(buffer, 0, count, add, comparator()); + if (i >= 0) + return (T) buffer[i]; + i = -1 - i; + if (count == buffer.length) + this.buffer = buffer = cachedAny().resize(buffer, count, count + 1); + System.arraycopy(buffer, i, buffer, i + 1, count - i); + buffer[i] = add; + if (++count == BTREE_THRESHOLD) + { + Object[] btree = BTree.build(BulkIterator.of(buffer), count, UpdateFunction.noOp()); + cachedAny().forceDiscard(buffer, count); + this.buffer = btree; + } + return add; + } + Object existing = BTree.find(buffer, comparator(), add); + if (existing != null) + return (T)existing; + this.buffer = BTree.update(buffer, BTree.singleton(add), comparator()); + ++count; + return add; + } + + public C build() + { + if (count == 0) + { + return empty(); + } + else if (count == 1) + { + return of((T)buffer); + } + else if (count < BTREE_THRESHOLD) + { + C result = copy((Object[])buffer, count); + cachedAny().forceDiscard((Object[])buffer, count); + return result; + } + else + { + return copyBtree((Object[])buffer, count); + } + } + + @Override + public T get(int index) + { + if (index < 0 || index >= count) throw new IndexOutOfBoundsException(); + if (count == 1) return (T) buffer; + if (count < BTREE_THRESHOLD) + return (T) ((Object[])buffer)[index]; + return BTree.findByIndex((Object[])buffer, index); + } + + @Override + public int size() + { + return count; + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index a231b2125156..1057afe13e09 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -51,16 +51,15 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.db.marshal.ValueAccessor; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.VersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.serializers.IVersionedWithKeysSerializer.AbstractWithKeysSerializer; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnWrite; -import org.apache.cassandra.utils.CastingSerializer; import org.apache.cassandra.utils.NullableSerializer; public class CommandSerializers @@ -786,79 +785,73 @@ Ballot decodeSpecial(int specialByte) } } - public static class PartialTxnSerializer extends AbstractWithKeysSerializer + public static class PartialTxnSerializer implements IVersionedSerializer { - private final VersionedSerializer readSerializer; + private final ParameterisedVersionedSerializer readSerializer; private final UnversionedSerializer querySerializer; - private final VersionedSerializer updateSerializer; + private final ParameterisedVersionedSerializer updateSerializer; + private final UnversionedSerializer tablesAndKeysSerializer; - public PartialTxnSerializer(VersionedSerializer readSerializer, + public PartialTxnSerializer(ParameterisedVersionedSerializer readSerializer, UnversionedSerializer querySerializer, - VersionedSerializer updateSerializer) + ParameterisedVersionedSerializer updateSerializer, + UnversionedSerializer tablesAndKeysSerializer) { this.readSerializer = readSerializer; this.querySerializer = querySerializer; this.updateSerializer = updateSerializer; + this.tablesAndKeysSerializer = tablesAndKeysSerializer; } @Override public void serialize(PartialTxn txn, DataOutputPlus out, Version version) throws IOException { - KeySerializers.seekables.serialize(txn.keys(), out); - serializeWithoutKeys(txn, out, version); - } - - @Override - public PartialTxn deserialize(DataInputPlus in, Version version) throws IOException - { - Seekables keys = KeySerializers.seekables.deserialize(in); - return deserializeWithoutKeys(keys, in, version); - } - - @Override - public long serializedSize(PartialTxn txn, Version version) - { - long size = KeySerializers.seekables.serializedSize(txn.keys()); - size += serializedSizeWithoutKeys(txn, version); - return size; - } - - private void serializeWithoutKeys(PartialTxn txn, DataOutputPlus out, Version version) throws IOException - { + PartialTxn.InMemory cast = (PartialTxn.InMemory)txn; CommandSerializers.kind.serialize(txn.kind(), out); - readSerializer.serialize(txn.read(), out, version); + TableMetadatasAndKeys tablesAndKeys = (TableMetadatasAndKeys) cast.implementationDefined; + if (tablesAndKeys != null) tablesAndKeysSerializer.serialize(tablesAndKeys, out); + else KeySerializers.seekables.serialize(txn.keys(), out); + readSerializer.serialize(txn.read(), tablesAndKeys, out, version); querySerializer.serialize(txn.query(), out); out.writeBoolean(txn.update() != null); if (txn.update() != null) - updateSerializer.serialize(txn.update(), out, version); + updateSerializer.serialize(txn.update(), tablesAndKeys, out, version); } - private PartialTxn deserializeWithoutKeys(Seekables keys, DataInputPlus in, Version version) throws IOException + @Override + public PartialTxn deserialize(DataInputPlus in, Version version) throws IOException { Txn.Kind kind = CommandSerializers.kind.deserialize(in); - Read read = readSerializer.deserialize(in, version); + TableMetadatasAndKeys tablesAndKeys = tablesAndKeysSerializer.deserialize(in); + Seekables keys = tablesAndKeys != null ? tablesAndKeys.keys : KeySerializers.seekables.deserialize(in); + Read read = readSerializer.deserialize(tablesAndKeys, in, version); Query query = querySerializer.deserialize(in); - Update update = in.readBoolean() ? updateSerializer.deserialize(in, version) : null; - return new PartialTxn.InMemory(kind, keys, read, query, update); + Update update = in.readBoolean() ? updateSerializer.deserialize(tablesAndKeys, in, version) : null; + return new PartialTxn.InMemory(kind, keys, read, query, update, tablesAndKeys); } - private long serializedSizeWithoutKeys(PartialTxn txn, Version version) + @Override + public long serializedSize(PartialTxn txn, Version version) { long size = CommandSerializers.kind.serializedSize(txn.kind()); - size += readSerializer.serializedSize(txn.read(), version); + TableMetadatasAndKeys tablesAndKeys = (TableMetadatasAndKeys) ((PartialTxn.InMemory)txn).implementationDefined; + if (tablesAndKeys != null) size += tablesAndKeysSerializer.serializedSize(tablesAndKeys); + else size += KeySerializers.seekables.serializedSize(txn.keys()); + size += readSerializer.serializedSize(txn.read(), tablesAndKeys, version); size += querySerializer.serializedSize(txn.query()); size += TypeSizes.sizeof(txn.update() != null); if (txn.update() != null) - size += updateSerializer.serializedSize(txn.update(), version); + size += updateSerializer.serializedSize(txn.update(), tablesAndKeys, version); return size; } } - public static final VersionedSerializer read; + public static final ParameterisedVersionedSerializer read; public static final UnversionedSerializer query; - public static final VersionedSerializer update; - public static final VersionedSerializer write; + public static final ParameterisedVersionedSerializer update; + public static final ParameterisedVersionedSerializer write; + public static final UnversionedSerializer tablesAndKeys; public static final VersionedSerializer partialTxn; public static final VersionedSerializer nullablePartialTxn; @@ -871,6 +864,7 @@ private long serializedSizeWithoutKeys(PartialTxn txn, Version version) query = querySerializers.query; update = querySerializers.update; write = querySerializers.write; + tablesAndKeys = querySerializers.tablesAndKeys; partialTxn = querySerializers.partialTxn; nullablePartialTxn = querySerializers.nullablePartialTxn; @@ -879,33 +873,37 @@ private long serializedSizeWithoutKeys(PartialTxn txn, Version version) @VisibleForTesting public static class QuerySerializers { - public final VersionedSerializer read; + public final ParameterisedVersionedSerializer read; public final UnversionedSerializer query; - public final VersionedSerializer update; - public final VersionedSerializer write; + public final ParameterisedVersionedSerializer update; + public final ParameterisedVersionedSerializer write; + public final UnversionedSerializer tablesAndKeys; public final VersionedSerializer partialTxn; public final VersionedSerializer nullablePartialTxn; private QuerySerializers() { - this(CastingSerializer.create(TxnRead.class, TxnRead.serializer), - CastingSerializer.create(TxnQuery.class, TxnQuery.serializer), - CastingSerializer.create(AccordUpdate.class, AccordUpdate.serializer), - CastingSerializer.create(TxnWrite.class, TxnWrite.serializer)); + this((ParameterisedVersionedSerializer) TxnRead.serializer, + (UnversionedSerializer) TxnQuery.serializer, + (ParameterisedVersionedSerializer) AccordUpdate.serializer, + (ParameterisedVersionedSerializer) TxnWrite.serializer, + TableMetadatasAndKeys.serializer); } - public QuerySerializers(VersionedSerializer read, + public QuerySerializers(ParameterisedVersionedSerializer read, UnversionedSerializer query, - VersionedSerializer update, - VersionedSerializer write) + ParameterisedVersionedSerializer update, + ParameterisedVersionedSerializer write, + UnversionedSerializer tablesAndKeys) { this.read = read; this.query = query; this.update = update; this.write = write; + this.tablesAndKeys = tablesAndKeys; - this.partialTxn = new PartialTxnSerializer(read, query, update); + this.partialTxn = new PartialTxnSerializer(read, query, update, tablesAndKeys); this.nullablePartialTxn = NullableSerializer.wrap(partialTxn); } } @@ -922,20 +920,23 @@ public void serialize(Writes writes, DataOutputPlus out, Version version) throws txnId.serialize(writes.txnId, out); ExecuteAtSerializer.serialize(writes.txnId, writes.executeAt, out); KeySerializers.seekables.serialize(writes.keys, out); - boolean hasWrites = writes.write != null; - out.writeBoolean(hasWrites); - - if (hasWrites) - CommandSerializers.write.serialize(writes.write, out, version); + boolean hasWrite = writes.write != null; + out.writeBoolean(hasWrite); + if (hasWrite) + CommandSerializers.write.serialize(writes.write, writes.keys, out, version); } @Override public Writes deserialize(DataInputPlus in, Version version) throws IOException { TxnId id = txnId.deserialize(in); - return new Writes(id, ExecuteAtSerializer.deserialize(id, in), - KeySerializers.seekables.deserialize(in), - in.readBoolean() ? CommandSerializers.write.deserialize(in, version) : null); + Timestamp executeAt = ExecuteAtSerializer.deserialize(id, in); + Seekables seekables = KeySerializers.seekables.deserialize(in); + boolean hasWrite = in.readBoolean(); + Write write = null; + if (hasWrite) + write = CommandSerializers.write.deserialize(seekables, in, version); + return new Writes(id, executeAt, seekables, write); } @Override @@ -943,11 +944,11 @@ public long serializedSize(Writes writes, Version version) { long size = txnId.serializedSize(writes.txnId); size += ExecuteAtSerializer.serializedSize(writes.txnId, writes.executeAt); - size += KeySerializers.seekables.serializedSize(writes.keys); boolean hasWrites = writes.write != null; + size += KeySerializers.seekables.serializedSize(writes.keys); size += TypeSizes.sizeof(hasWrites); if (hasWrites) - size += CommandSerializers.write.serializedSize(writes.write, version); + size += CommandSerializers.write.serializedSize(writes.write, writes.keys, version); return size; } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java index eb2cded874a9..d9b9f18005b7 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java @@ -22,10 +22,12 @@ import java.util.function.BiFunction; import java.util.function.IntFunction; +import accord.api.Key; import accord.api.RoutingKey; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.AbstractUnseekableKeys; +import accord.primitives.Keys; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; @@ -34,6 +36,7 @@ import accord.primitives.RoutingKeys; import accord.utils.UnhandledEnum; import net.nicoulaj.compilecommand.annotations.DontInline; +import net.nicoulaj.compilecommand.annotations.Inline; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -100,10 +103,13 @@ protected void serializeSubsetInternal(Routables serialize, Routables supe } else if (supersetCount < 64) { - switch (serialize.domain()) + switch (serialize.domainKind()) { - default: throw UnhandledEnum.unknown(serialize.domain()); - case Key: + default: throw UnhandledEnum.unknown(serialize.domainKind()); + case SeekableKey: + out.writeUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + break; + case UnseekableKey: out.writeUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); break; case Range: @@ -113,10 +119,13 @@ else if (supersetCount < 64) } else { - switch (serialize.domain()) + switch (serialize.domainKind()) { - default: throw UnhandledEnum.unknown(serialize.domain()); - case Key: + default: throw UnhandledEnum.unknown(serialize.domainKind()); + case SeekableKey: + serializeLargeSubset((Keys)serialize, serializeCount, (Keys)superset, supersetCount, out); + break; + case UnseekableKey: serializeLargeSubset((AbstractUnseekableKeys)serialize, serializeCount, (AbstractUnseekableKeys)superset, supersetCount, out); break; case Range: @@ -126,7 +135,7 @@ else if (supersetCount < 64) } } - public long serializedSubsetSizeInternal(Routables serialize, Routables superset) + protected long serializedSubsetSizeInternal(Routables serialize, Routables superset) { int columnCount = serialize.size(); int supersetCount = superset.size(); @@ -136,10 +145,12 @@ public long serializedSubsetSizeInternal(Routables serialize, Routables su } else if (supersetCount < 64) { - switch (serialize.domain()) + switch (serialize.domainKind()) { - default: throw UnhandledEnum.unknown(serialize.domain()); - case Key: + default: throw UnhandledEnum.unknown(serialize.domainKind()); + case SeekableKey: + return TypeSizes.sizeofUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + case UnseekableKey: return TypeSizes.sizeofUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); case Range: return TypeSizes.sizeofUnsignedVInt(encodeBitmap((AbstractRanges)serialize, (AbstractRanges)superset, supersetCount)); @@ -147,10 +158,12 @@ else if (supersetCount < 64) } else { - switch (serialize.domain()) + switch (serialize.domainKind()) { - default: throw UnhandledEnum.unknown(serialize.domain()); - case Key: + default: throw UnhandledEnum.unknown(serialize.domainKind()); + case SeekableKey: + return serializeLargeSubsetSize((Keys)serialize, columnCount, (Keys)superset, supersetCount); + case UnseekableKey: return serializeLargeSubsetSize((AbstractUnseekableKeys)serialize, columnCount, (AbstractUnseekableKeys)superset, supersetCount); case Range: return serializeLargeSubsetSize((AbstractRanges)serialize, columnCount, (AbstractRanges)superset, supersetCount); @@ -158,6 +171,38 @@ else if (supersetCount < 64) } } + @DontInline + private > long serializeLargeSubsetSize(R serialize, int serializeCount, R superset, int supersetCount) + { + long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); + if (serializeCount == 0) return size; + int prevSupersetIndex = 0; + int supersetIndex = 0; + int take = 0; + for (int i = 0; i < serializeCount; i++) + { + int offset = supersetIndex + take; + int nextIndex = superset.findNext(offset, serialize.get(i), FAST); + if (nextIndex == offset) + { + take++; + continue; + } + if (take != 0) // since this is dealing with subsets, the only time take=0 is when i=0 and the first superset offset isn't included + { + size += TypeSizes.sizeofUnsignedVInt(take); + size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); + prevSupersetIndex = supersetIndex; + } + + supersetIndex = nextIndex; + take = 1; + } + size += TypeSizes.sizeofUnsignedVInt(take); + size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); + return size; + } + // encodes a 1 bit for every *missing* column, on the assumption presence is more common, // and because this is consistent with encoding 0 to represent all present private static long encodeBitmap(AbstractKeys serialize, AbstractKeys superset, int supersetCount) @@ -181,117 +226,84 @@ private static long encodeBitmap(AbstractRanges serialize, AbstractRanges supers } @DontInline - private void serializeLargeSubset(AbstractKeys serialize, int serializeCount, AbstractKeys superset, int supersetCount, DataOutputPlus out) throws IOException + private > void serializeLargeSubset(R serialize, int serializeCount, + R superset, int supersetCount, + DataOutputPlus out) throws IOException { out.writeUnsignedVInt32(supersetCount - serializeCount); - int serializeIndex = 0, supersetIndex = 0; - while (serializeIndex < serializeCount) + if (serializeCount == 0) return; + int prevSupersetIndex = 0; + int supersetIndex = 0; + int take = 0; + for (int i = 0; i < serializeCount; i++) { - int prevSupersetIndex = supersetIndex; - int nextSupersetIndex; - do + int offset = supersetIndex + take; + int nextIndex = superset.findNext(offset, serialize.get(i), FAST); + if (nextIndex == offset) { - nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); - if (supersetIndex + 1 != nextSupersetIndex) - break; - supersetIndex++; + take++; + continue; } - while (serializeIndex < serializeCount); - - out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); - out.writeUnsignedVInt32(nextSupersetIndex - supersetIndex); - supersetIndex = nextSupersetIndex; - } - } - - @DontInline - private void serializeLargeSubset(AbstractRanges serialize, int serializeCount, AbstractRanges superset, int supersetCount, DataOutputPlus out) throws IOException - { - out.writeUnsignedVInt32(supersetCount - serializeCount); - int serializeIndex = 0, supersetIndex = 0; - while (serializeIndex < serializeCount) - { - int prevSupersetIndex = supersetIndex; - int nextSupersetIndex; - do + if (take != 0) { - nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); - if (supersetIndex + 1 != nextSupersetIndex) - break; - supersetIndex++; + out.writeUnsignedVInt32(take); + out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); + prevSupersetIndex = supersetIndex; } - while (serializeIndex < serializeCount); - out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); - out.writeUnsignedVInt32(nextSupersetIndex - supersetIndex); - supersetIndex = nextSupersetIndex; + supersetIndex = nextIndex; + take = 1; } + out.writeUnsignedVInt32(take); + out.writeUnsignedVInt32(supersetIndex - prevSupersetIndex); } public Routables deserializeSubsetInternal(Routables superset, DataInputPlus in) throws IOException { - switch (superset.domain()) - { - default: throw UnhandledEnum.unknown(superset.domain()); - case Key: return deserializeRoutingKeySubset((AbstractUnseekableKeys) superset, in, (ks, s) -> ks == null ? s : RoutingKeys.of(ks)); - case Range: return deserializeRangeSubset((AbstractRanges) superset, in, (rs, s) -> rs == null ? s : Ranges.of(rs)); - } - } - - public void skipSubsetInternal(int supersetCount, DataInputPlus in) throws IOException - { - long encoded = in.readUnsignedVInt(); - if (supersetCount <= 64) - return; - - int deserializeCount = supersetCount - (int)encoded; - int count = 0; - while (count < deserializeCount) + switch (superset.domainKind()) { - count += in.readUnsignedVInt32(); - in.readUnsignedVInt32(); + default: throw UnhandledEnum.unknown(superset.domainKind()); + case SeekableKey: return deserializeSubset((Keys) superset, in, (ks, s) -> ks == null ? s : Keys.of(ks), Key[]::new); + case UnseekableKey: return deserializeSubset((AbstractUnseekableKeys) superset, in, (ks, s) -> ks == null ? s : RoutingKeys.of(ks), RoutingKey[]::new); + case Range: return deserializeSubset((AbstractRanges) superset, in, (rs, s) -> rs == null ? s : Ranges.of(rs), Range[]::new); } } - public T deserializeRoutingKeySubset(S superset, DataInputPlus in, BiFunction result) throws IOException - { - long encoded = in.readUnsignedVInt(); - int supersetCount = superset.size(); - if (encoded == 0L) - return result.apply(null, superset); - else if (supersetCount >= 64) - return result.apply(deserializeLargeRoutingKeySubset(in, superset, supersetCount, (int) encoded), superset); - else - return result.apply(deserializeSmallRoutingKeySubset(encoded, superset, supersetCount), superset); - } - - public T deserializeRangeSubset(S superset, DataInputPlus in, BiFunction result) throws IOException + public , T> T deserializeSubset(R superset, DataInputPlus in, BiFunction result, IntFunction allocator) throws IOException { long encoded = in.readUnsignedVInt(); int supersetCount = superset.size(); if (encoded == 0L) return result.apply(null, superset); else if (supersetCount >= 64) - return result.apply(deserializeLargeRangeSubset(in, superset, supersetCount, (int) encoded), superset); + return result.apply(deserializeLargeSubset(in, superset, supersetCount, (int) encoded, allocator), superset); else - return result.apply(deserializeSmallRangeSubsetArray(encoded, superset, supersetCount), superset); - } - - private RoutingKey[] deserializeSmallRoutingKeySubset(long encoded, AbstractUnseekableKeys superset, int supersetCount) - { - return deserializeSmallSubsetArray(encoded, superset, supersetCount, RoutingKey[]::new); + return result.apply(deserializeSmallSubsetArray(encoded, superset, supersetCount, allocator), superset); } - private Range[] deserializeSmallRangeSubsetArray(long encoded, AbstractRanges superset, int supersetCount) + @Inline + private T[] deserializeLargeSubset(DataInputPlus in, Routables superset, int supersetCount, int delta, IntFunction allocator) throws IOException { - return deserializeSmallSubsetArray(encoded, superset, supersetCount, Range[]::new); + int deserializeCount = supersetCount - delta; + T[] out = allocator.apply(deserializeCount); + int count = 0; + int prevSupersetIndex = 0; + while (count < deserializeCount) + { + int take = in.readUnsignedVInt32(); + int supersetIndex = in.readUnsignedVInt32() + prevSupersetIndex; + prevSupersetIndex = supersetIndex; + for (int i = 0; i < take; i++) + out[count++] = superset.get(supersetIndex + i); + } + return out; } - private R[] deserializeSmallSubsetArray(long encoded, Routables superset, int supersetCount, IntFunction allocator) + private K[] deserializeSmallSubsetArray(long encoded, Routables superset, int supersetCount, IntFunction allocator) { encoded ^= -1L >>> (64 - supersetCount); int deserializeCount = Long.bitCount(encoded); - R[] out = allocator.apply(deserializeCount); + K[] out = allocator.apply(deserializeCount); int count = 0; while (encoded != 0) { @@ -302,86 +314,20 @@ private R[] deserializeSmallSubsetArray(long encoded, Routa return out; } - @DontInline - private RoutingKey[] deserializeLargeRoutingKeySubset(DataInputPlus in, AbstractUnseekableKeys superset, int supersetCount, int delta) throws IOException - { - int deserializeCount = supersetCount - delta; - RoutingKey[] out = new RoutingKey[deserializeCount]; - int supersetIndex = 0; - int count = 0; - while (count < deserializeCount) - { - int takeCount = in.readUnsignedVInt32(); - while (takeCount-- > 0) out[count++] = superset.get(supersetIndex++); - supersetIndex += in.readUnsignedVInt32(); - } - return out; - } - - @DontInline - private Range[] deserializeLargeRangeSubset(DataInputPlus in, AbstractRanges superset, int supersetCount, int delta) throws IOException + public void skipSubsetInternal(int supersetCount, DataInputPlus in) throws IOException { - int deserializeCount = supersetCount - delta; - Range[] out = new Range[deserializeCount]; - int supersetIndex = 0; + long encoded = in.readUnsignedVInt(); + if (encoded == 0 || supersetCount < 64) return; + // large + int deserializeCount = supersetCount - ((int) encoded); int count = 0; while (count < deserializeCount) { - int takeCount = in.readUnsignedVInt32(); - while (takeCount-- > 0) out[count++] = superset.get(supersetIndex++); - supersetIndex += in.readUnsignedVInt32(); - } - return out; - } - - @DontInline - private long serializeLargeSubsetSize(AbstractKeys serialize, int serializeCount, AbstractKeys superset, int supersetCount) - { - long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); - int serializeIndex = 0, supersetIndex = 0; - while (serializeIndex < serializeCount) - { - int prevSupersetIndex = supersetIndex; - int nextSupersetIndex; - do - { - nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); - if (supersetIndex + 1 != nextSupersetIndex) - break; - supersetIndex++; - } - while (serializeIndex < serializeCount); - - size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); - size += TypeSizes.sizeofUnsignedVInt(nextSupersetIndex - supersetIndex); - supersetIndex = nextSupersetIndex; - } - return size; - } - - @DontInline - private long serializeLargeSubsetSize(AbstractRanges serialize, int serializeCount, AbstractRanges superset, int supersetCount) - { - long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); - int serializeIndex = 0, supersetIndex = 0; - while (serializeIndex < serializeCount) - { - int prevSupersetIndex = supersetIndex; - int nextSupersetIndex; - do - { - nextSupersetIndex = superset.findNext(supersetIndex, serialize.get(serializeIndex++), FAST); - if (supersetIndex + 1 != nextSupersetIndex) - break; - supersetIndex++; - } - while (serializeIndex < serializeCount); - - size += TypeSizes.sizeofUnsignedVInt(supersetIndex - prevSupersetIndex); - size += TypeSizes.sizeofUnsignedVInt(nextSupersetIndex - supersetIndex); - supersetIndex = nextSupersetIndex; + int take = in.readUnsignedVInt32(); + in.readUnsignedVInt32(); + for (int i = 0; i < take; i++) + count++; } - return size; } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 0c96f24207f5..3ab41a74df57 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -919,7 +919,7 @@ final RoutingKey deserializeWithPrefix(Object prefix, int lengthIndex, int[] len public KS deserializeSubset(AbstractUnseekableKeys superset, DataInputPlus in) throws IOException { - RoutingKey[] keys = deserializeRoutingKeySubset(superset, in, (ks, s) -> ks == null ? s.unsafeKeys() : ks); + RoutingKey[] keys = deserializeSubset(superset, in, (ks, s) -> ks == null ? s.unsafeKeys() : ks, RoutingKey[]::new); return deserialize(in, keys); } } @@ -1071,7 +1071,7 @@ final Range deserializeWithPrefix(Object prefix, int lengthIndex, int[] lengths, public RS deserializeSubset(AbstractRanges superset, DataInputPlus in) throws IOException { - Range[] ranges = deserializeRangeSubset(superset, in, (rs, s) -> rs == null ? s.unsafeRanges() : rs); + Range[] ranges = deserializeSubset(superset, in, (rs, s) -> rs == null ? s.unsafeRanges() : rs, Range[]::new); return deserialize(in, ranges); } } @@ -1134,6 +1134,4 @@ public long serializedSubsetSize(RS route, Routables superset) return super.serializedSubsetSize(route, superset) + routingKey.serializedSize(route.homeKey); } } - - } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatas.java b/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatas.java new file mode 100644 index 000000000000..6e24758961e8 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatas.java @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.AbstractList; +import java.util.Arrays; +import java.util.Comparator; + +import accord.utils.Invariants; +import accord.utils.SortedArrays; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.exceptions.UnknownTableException; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.btree.BTree; + +import static accord.utils.SortedArrays.Search.FAST; + +public abstract class TableMetadatas extends AbstractList +{ + private static final Comparator comparingId = Comparator.comparing(v -> ((TableMetadata) v).id); + public static class Collector extends AbstractSortedCollector + { + @Override + Comparator comparator() + { + return comparingId; + } + + @Override + Complete empty() + { + return TableMetadatas.none(); + } + + @Override + Complete of(TableMetadata one) + { + return TableMetadatas.of(one); + } + + @Override + Complete copy(Object[] array, int count) + { + TableMetadata[] result = new TableMetadata[count]; + System.arraycopy(array, 0, result, 0, count); + return TableMetadatas.ofSortedUnique(result); + } + + @Override + Complete copyBtree(Object[] btree, int count) + { + TableMetadata[] result = new TableMetadata[count]; + int i = 0; + for (TableMetadata v : BTree.iterable(btree)) + result[i++] = v; + return TableMetadatas.ofSortedUnique(result); + } + } + + public abstract int indexOf(TableMetadata find); + public abstract int indexOf(TableId find); + public abstract TableId get(TableId tableId); + + public abstract void serialize(TableMetadata table, DataOutputPlus out) throws IOException; + public abstract TableMetadata deserialize(DataInputPlus in) throws IOException; + public abstract long serializedSize(TableMetadata table); + + public abstract void serializeSelf(DataOutputPlus out) throws IOException; + public abstract long serializedSelfSize(); + + public static Complete none() + { + return Multi.NONE; + } + + public static Complete of(TableMetadata metadata) + { + return new One(metadata); + } + + public static Complete ofSortedUnique(TableMetadata ... metadatas) + { + if (metadatas.length == 0) + return none(); + if (metadatas.length == 1) + return new One(metadatas[0]); + Invariants.requireStrictlyOrdered(comparingId, metadatas); + return new Multi(metadatas); + } + + public static abstract class Complete extends TableMetadatas + { + public abstract TableMetadata getMetadata(TableId tableId); + } + + static class One extends Complete + { + final TableMetadata table; + + One(TableMetadata table) + { + this.table = table; + } + + @Override + public TableId get(int index) + { + Invariants.require(index == 0); + return table.id; + } + + @Override + public int size() + { + return 1; + } + + @Override + public int indexOf(TableMetadata find) + { + int c = find.id == table.id ? 0 : find.id.compareTo(table.id); + if (c == 0) return 0; + else if (c < 0) return -1; + else return -2; + } + + @Override + public void serialize(TableMetadata table, DataOutputPlus out) throws IOException + { + } + + @Override + public void serializeSelf(DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(1); + table.id.serializeCompactComparable(out); + } + + @Override + public TableMetadata deserialize(DataInputPlus in) throws IOException + { + return table; + } + + @Override + public long serializedSize(TableMetadata table) + { + return 0; + } + + @Override + public long serializedSelfSize() + { + return TypeSizes.sizeofUnsignedVInt(1) + table.id.serializedCompactComparableSize(); + } + + @Override + public int indexOf(TableId tableId) + { + if (tableId.equals(table.id)) + return 0; + return -1; + } + + @Override + public TableId get(TableId tableId) + { + if (tableId.equals(table.id)) + return table.id; + return null; + } + + @Override + public TableMetadata getMetadata(TableId tableId) + { + if (tableId.equals(table.id)) + return table; + return null; + } + } + + static class Multi extends Complete + { + static final Complete NONE = new Multi(); + + final TableMetadata[] tables; + + Multi(TableMetadata ... tables) + { + this.tables = tables; + } + + @Override + public TableId get(int index) + { + return tables[index].id; + } + + @Override + public int size() + { + return tables.length; + } + + @Override + public int indexOf(TableMetadata find) + { + return Arrays.binarySearch(tables, find, comparingId); + } + + @Override + public int indexOf(TableId find) + { + return SortedArrays.binarySearch(tables, 0, tables.length, find, (id, metadata) -> id.compareTo(metadata.id), FAST); + } + + @Override + public void serialize(TableMetadata table, DataOutputPlus out) throws IOException + { + int i = indexOf(table); + if (i < 0) + throw new IllegalStateException("TableMetadata for " + table + " not found in " + this); + out.writeUnsignedVInt32(i); + } + + @Override + public void serializeSelf(DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(tables.length); + for (TableMetadata table : tables) + table.id.serializeCompactComparable(out); + } + + @Override + public TableMetadata deserialize(DataInputPlus in) throws IOException + { + return tables[in.readUnsignedVInt32()]; + } + + @Override + public long serializedSize(TableMetadata table) + { + int i = indexOf(table); + if (i < 0) + throw new IllegalStateException("TableMetadata for " + table + " not found in " + this); + return TypeSizes.sizeofUnsignedVInt(indexOf(table)); + } + + @Override + public long serializedSelfSize() + { + long size = TypeSizes.sizeofUnsignedVInt(tables.length); + for (TableMetadata table : tables) + size += table.id.serializedCompactComparableSize(); + return size; + } + + @Override + public TableId get(TableId tableId) + { + int i = indexOf(tableId); + return i >= 0 ? tables[i].id : null; + } + + @Override + public TableMetadata getMetadata(TableId tableId) + { + int i = indexOf(tableId); + return i >= 0 ? tables[i] : null; + } + } + + static class WithUnknown extends TableMetadatas + { + final TableId[] ids; + final TableMetadata[] metadatas; + + WithUnknown(TableId[] ids, TableMetadata[] metadatas) + { + this.ids = ids; + this.metadatas = metadatas; + } + + @Override + public TableId get(int index) + { + return ids[index]; + } + + @Override + public int size() + { + return ids.length; + } + + @Override + public int indexOf(TableMetadata find) + { + return indexOf(find.id); + } + + @Override + public int indexOf(TableId find) + { + return Arrays.binarySearch(ids, find); + } + + @Override + public void serialize(TableMetadata table, DataOutputPlus out) throws IOException + { + int i = indexOf(table); + if (i < 0) + throw new IllegalStateException("TableMetadata for " + table + " not found in " + this); + out.writeUnsignedVInt32(i); + } + + @Override + public void serializeSelf(DataOutputPlus out) throws IOException + { + out.writeUnsignedVInt32(ids.length); + for (TableId id : ids) + id.serializeCompactComparable(out); + } + + @Override + public TableMetadata deserialize(DataInputPlus in) throws IOException + { + int index = in.readUnsignedVInt32(); + TableMetadata metadata = metadatas[index]; + if (metadata == null) + throw new UnknownTableException("Unknown table", ids[index]); + return metadata; + } + + @Override + public long serializedSize(TableMetadata table) + { + int i = indexOf(table); + if (i < 0) + throw new IllegalStateException("TableMetadata for " + table + " not found in " + this); + return TypeSizes.sizeofUnsignedVInt(indexOf(table)); + } + + @Override + public long serializedSelfSize() + { + long size = TypeSizes.sizeofUnsignedVInt(ids.length); + for (TableId id : ids) + size += id.serializedCompactComparableSize(); + return size; + } + + @Override + public TableId get(TableId tableId) + { + int index = indexOf(tableId); + return get(index); + } + } + + public static TableMetadatas deserializeSelf(DataInputPlus in) throws IOException + { + int count = in.readUnsignedVInt32(); + if (count == 0) + return none(); + if (count == 1) + { + TableId id = TableId.deserializeCompactComparable(in); + TableMetadata metadata = Schema.instance.getTableMetadata(id); + if (metadata == null) + return new WithUnknown(new TableId[] { id}, new TableMetadata[] { null }); + return new One(metadata); + } + TableId[] ids = null; + TableMetadata[] metadatas = new TableMetadata[count]; + int i; + for (i = 0 ; i < count ; ++i) + { + TableId id = TableId.deserializeCompactComparable(in); + TableMetadata metadata = Schema.instance.getTableMetadata(id); + metadatas[i] = metadata; + if (ids != null) ids[i] = id; + else if (metadata == null) + { + ids = new TableId[count]; + for (int j = 0 ; j < i ; ++j) + ids[j] = metadatas[j].id; + } + } + if (ids == null) + return new Multi(metadatas); + return new WithUnknown(ids, metadatas); + } +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeys.java b/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeys.java new file mode 100644 index 000000000000..293264bfa511 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeys.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.Comparator; + +import accord.api.Key; +import accord.api.Sliceable; +import accord.primitives.Keys; +import accord.primitives.Participants; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.utils.Invariants; +import accord.utils.VIntCoding; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.io.UnversionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas.Multi; +import org.apache.cassandra.utils.btree.BTreeSet; + +import static accord.primitives.Routable.Domain.Range; +import static accord.primitives.Routables.Slice.Minimal; + +public class TableMetadatasAndKeys extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements Sliceable +{ + public static class KeyCollector extends AbstractSortedCollector + { + private static final Comparator comparator = Comparator.comparing(v -> ((PartitionKey) v)); + + public final TableMetadatas tables; + + public KeyCollector(TableMetadatas tables) + { + this.tables = tables; + } + + public TableMetadatasAndKeys buildTablesAndKeys() + { + return new TableMetadatasAndKeys(tables, build()); + } + + @Override + Comparator comparator() + { + return comparator; + } + + public PartitionKey collect(TableMetadata table, DecoratedKey key) + { + TableId tableId = tables.get(table.id); + if (count == 1) + { + PartitionKey one = (PartitionKey) buffer; + if (one.prefix() == table && one.partitionKey().equals(key)) + return one; + } + return collect(new PartitionKey(tableId, key)); + } + + @Override + Keys empty() + { + return Keys.EMPTY; + } + + @Override + Keys of(PartitionKey one) + { + return Keys.of(one); + } + + @Override + Keys copy(Object[] array, int count) + { + Key[] result = new Key[count]; + System.arraycopy(array, 0, result, 0, count); + return Keys.ofSortedUnique(result); + } + + @Override + Keys copyBtree(Object[] btree, int count) + { + return Keys.ofSortedUnique(new BTreeSet<>(btree, comparator())); + } + } + + private static final TableMetadatasAndKeys NO_KEYS = new TableMetadatasAndKeys(Multi.NONE, Keys.EMPTY); + private static final TableMetadatasAndKeys NO_RANGES = new TableMetadatasAndKeys(Multi.NONE, Ranges.EMPTY); + + public static TableMetadatasAndKeys none(Routable.Domain domain) + { + return domain.isKey() ? NO_KEYS : NO_RANGES; + } + + public final TableMetadatas tables; + public final Seekables keys; + + public TableMetadatasAndKeys(TableMetadatas tables, Seekables keys) + { + this.tables = tables; + this.keys = keys; + } + + public void serializeKeys(Keys keys, DataOutputPlus out) throws IOException + { + serializeSubsetInternal(keys, this.keys, out); + } + + public Keys deserializeKeys(DataInputPlus in) throws IOException + { + return (Keys)deserializeSubsetInternal(this.keys, in); + } + + public void serializeSeekable(Seekable seekable, DataOutputPlus out) throws IOException + { + int index = keys.indexOf(seekable); + if (index >= 0) out.writeUnsignedVInt32(1 + index); + else + { + Invariants.require(seekable.domain() == Range); + out.writeUnsignedVInt32(0); + KeySerializers.seekable.serialize(seekable, out); + } + } + + public void serializeKey(PartitionKey key, DataOutputPlus out) throws IOException + { + int index = keys.indexOf(key); + Invariants.require(index >= 0); + out.writeUnsignedVInt32(index); + } + + public Seekable deserializeSeekable(DataInputPlus in) throws IOException + { + int offset = in.readUnsignedVInt32(); + Seekable key; + if (offset > 0) key = (Seekable) keys.get(offset - 1); + else key = KeySerializers.seekable.deserialize(in); + return key; + } + + public PartitionKey deserializeKey(DataInputPlus in) throws IOException + { + int offset = in.readUnsignedVInt32(); + return (PartitionKey) keys.get(offset); + } + + public long serializedKeysSize(Keys keys) + { + return serializedSubsetSizeInternal(keys, this.keys); + } + + public long serializedSeekableSize(Seekable seekable) + { + int i = keys.indexOf(seekable); + Invariants.require(i >= 0 || seekable.domain() == Range); + return VIntCoding.sizeOfUnsignedVInt(1 + i); + } + + public long serializedKeySize(PartitionKey key) + { + int i = keys.indexOf(key); + Invariants.require(i >= 0); + return VIntCoding.sizeOfUnsignedVInt(i); + } + + public TableMetadatasAndKeys slice(Ranges ranges) + { + return new TableMetadatasAndKeys(tables, keys.slice(ranges, Minimal)); + } + + @Override + public TableMetadatasAndKeys intersecting(Participants participants) + { + return new TableMetadatasAndKeys(tables, keys.intersecting(participants, Minimal)); + } + + @Override + public TableMetadatasAndKeys merge(TableMetadatasAndKeys merge) + { + Invariants.require(tables.equals(merge.tables)); + return new TableMetadatasAndKeys(tables, keys.with(merge.keys)); + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TableMetadatasAndKeys that = (TableMetadatasAndKeys) o; + return tables.equals(that.tables) && keys.equals(that.keys); + } + + @Override + public int hashCode() + { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() + { + return "{tables=" + tables + ",keys=" + keys + '}'; + } + + public static final UnversionedSerializer serializer = new UnversionedSerializer<>() + { + @Override + public void serialize(TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out) throws IOException + { + tablesAndKeys.tables.serializeSelf(out); + KeySerializers.seekables.serialize(tablesAndKeys.keys, out); + } + + @Override + public TableMetadatasAndKeys deserialize(DataInputPlus in) throws IOException + { + TableMetadatas tables = TableMetadatas.deserializeSelf(in); + Seekables keys = KeySerializers.seekables.deserialize(in); + return new TableMetadatasAndKeys(tables, keys); + } + + @Override + public long serializedSize(TableMetadatasAndKeys tablesAndKeys) + { + return tablesAndKeys.tables.serializedSelfSize() + + KeySerializers.seekables.serializedSize(tablesAndKeys.keys); + } + }; +} diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TxnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/TxnSerializer.java new file mode 100644 index 000000000000..cd3819350c16 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/serializers/TxnSerializer.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import org.apache.cassandra.io.ParameterisedVersionedSerializer; + +public interface TxnSerializer extends ParameterisedVersionedSerializer +{ +} diff --git a/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java index 3cc2f6fc2275..ac605b33ab08 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AbstractKeySorted.java @@ -131,6 +131,11 @@ public Seekables keys() return itemKeys; } + public T get(int index) + { + return items[index]; + } + /** * Compare the non-key component of items (since this class handles sorting by key) */ @@ -139,8 +144,6 @@ public Seekables keys() abstract Seekable getKey(T item); abstract T[] newArray(int size); - abstract Domain domain(); - public int compareKey(T left, T right) { int cmp = ((PartitionKey)getKey(left)).compareTo(((PartitionKey)getKey(right))); diff --git a/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java b/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java index 3f8a49cf345c..ac172a221bbb 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AbstractSerialized.java @@ -25,82 +25,54 @@ import javax.annotation.concurrent.NotThreadSafe; import accord.utils.Invariants; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; import org.apache.cassandra.service.accord.serializers.Version; -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.ObjectSizes; /** * Item that is serialized by default */ @NotThreadSafe -public abstract class AbstractSerialized +public abstract class AbstractSerialized { - private static final long EMPTY = ObjectSizes.measure(new AbstractSerialized(null, null) { - @Override - protected IVersionedSerializer serializer() - { - throw new AssertionError(); - } - }); - public final Version version; - private @Nullable final ByteBuffer bytes; + private @Nullable final ByteBuffer latestVersionBytes; private transient @Nullable T memoized = null; - public AbstractSerialized(@Nullable ByteBuffer bytes, Version version) + protected AbstractSerialized(@Nullable ByteBuffer latestVersionBytes) { - this.version = version; - this.bytes = bytes; - } - - public AbstractSerialized(T value) - { - this.version = Version.LATEST; - this.bytes = serializer().serializeUnchecked(Invariants.nonNull(value), version); - this.memoized = value; - } - - public long estimatedSizeOnHeap() - { - return EMPTY + ByteBufferUtil.estimatedSizeOnHeap(bytes); + this.latestVersionBytes = latestVersionBytes; } @Override public boolean equals(Object o) { if (this == o) return true; - if (o == null || !(o instanceof AbstractSerialized)) return false; - - AbstractSerialized that = (AbstractSerialized) o; + if (o == null || (o.getClass() != getClass())) return false; - return Objects.equals(bytes, that.bytes); + AbstractSerialized that = (AbstractSerialized) o; + return Objects.equals(latestVersionBytes, that.latestVersionBytes); } @Override public int hashCode() { - return bytes != null ? bytes.hashCode() : 0; + return latestVersionBytes != null ? latestVersionBytes.hashCode() : 0; } - @Override - public String toString() - { - return Objects.toString(get()); - } - - protected abstract IVersionedSerializer serializer(); + public abstract long estimatedSizeOnHeap(); + protected abstract ByteBuffer serialize(T value, P param, Version version); + protected abstract ByteBuffer reserialize(ByteBuffer bytes, P param, Version srcVersion, Version trgVersion); + protected abstract T deserialize(P param, ByteBuffer bytes, Version version); protected boolean isNull() { - return bytes == null; + return latestVersionBytes == null; } @Nullable - protected T get() + protected T deserialize(P param) { T result = memoized; - if (result == null && bytes != null) - memoized = result = serializer().deserializeUnchecked(bytes, version); + if (result == null && latestVersionBytes != null) + memoized = result = deserialize(param, latestVersionBytes, Version.LATEST); return result; } @@ -112,39 +84,15 @@ public void unmemoize() @Nullable protected ByteBuffer unsafeBytes() { - return bytes; + return latestVersionBytes; } @Nonnull - protected ByteBuffer bytes(Version target) - { - Invariants.nonNull(bytes); - if (version == target) - return bytes; - return serializer().serializeUnchecked(get(), target); - } - - public static AbstractSerialized of(IVersionedSerializer serializer, T value) - { - return new AbstractSerialized(value) - { - @Override - protected IVersionedSerializer serializer() - { - return serializer; - } - }; - } - - public static AbstractSerialized fromBytes(IVersionedSerializer serializer, ByteBuffer bytes, Version version) + protected ByteBuffer bytes(P param, Version target) { - return new AbstractSerialized(bytes, version) - { - @Override - protected IVersionedSerializer serializer() - { - return serializer; - } - }; + Invariants.nonNull(latestVersionBytes); + if (Version.LATEST == target) + return latestVersionBytes; + return reserialize(latestVersionBytes, param, Version.LATEST, target); } } diff --git a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java index 6a04485ab05f..3a9d1bbe7b31 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdate.java @@ -26,7 +26,8 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; +import org.apache.cassandra.service.accord.serializers.TxnSerializer; import org.apache.cassandra.service.accord.serializers.Version; public abstract class AccordUpdate implements Update @@ -77,14 +78,8 @@ public boolean checkCondition(Data data) public abstract long estimatedSizeOnHeap(); - public interface AccordUpdateSerializer extends IVersionedSerializer + public interface AccordUpdateSerializer extends TxnSerializer { - @Override - void serialize(T update, DataOutputPlus out, Version version) throws IOException; - @Override - T deserialize(DataInputPlus in, Version version) throws IOException; - @Override - long serializedSize(T update, Version version); } private static AccordUpdateSerializer serializerFor(AccordUpdate toSerialize) @@ -105,26 +100,26 @@ private static AccordUpdateSerializer serializerFor(Kind kind) } } - public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer() + public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer<>() { @Override - public void serialize(AccordUpdate update, DataOutputPlus out, Version version) throws IOException + public void serialize(AccordUpdate update, TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out, Version version) throws IOException { out.writeByte(update.kind().val); - serializerFor(update).serialize(update, out, version); + serializerFor(update).serialize(update, tablesAndKeys, out, version); } @Override - public AccordUpdate deserialize(DataInputPlus in, Version version) throws IOException + public AccordUpdate deserialize(TableMetadatasAndKeys tablesAndKeys, DataInputPlus in, Version version) throws IOException { Kind kind = Kind.valueOf(in.readByte()); - return serializerFor(kind).deserialize(in, version); + return (AccordUpdate) serializerFor(kind).deserialize(tablesAndKeys, in, version); } @Override - public long serializedSize(AccordUpdate update, Version version) + public long serializedSize(AccordUpdate update, TableMetadatasAndKeys tablesAndKeys, Version version) { - return 1 + serializerFor(update).serializedSize(update, version); + return 1 + serializerFor(update).serializedSize(update, tablesAndKeys, version); } }; } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java index 93431abac1b9..881a3321621d 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnCondition.java @@ -31,6 +31,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import accord.utils.Invariants; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.ColumnCondition.Bound; @@ -43,13 +44,16 @@ import org.apache.cassandra.db.rows.ColumnData; import org.apache.cassandra.db.rows.ComplexColumnData; import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.ObjectSizes; import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.cassandra.service.accord.AccordSerializers.clusteringSerializer; @@ -63,11 +67,50 @@ public abstract class TxnCondition { + public static class SerializedTxnCondition extends AbstractSerialized + { + private static final long EMPTY_SIZE = ObjectSizes.measure(new SerializedTxnCondition(null)); + + protected SerializedTxnCondition(@Nullable ByteBuffer latestVersionBytes) + { + super(latestVersionBytes); + } + + protected SerializedTxnCondition(TxnCondition condition, TableMetadatas param) + { + this(serializer.serializeUnchecked(condition, param, Version.LATEST)); + } + + @Override + public long estimatedSizeOnHeap() + { + return EMPTY_SIZE + ObjectSizes.sizeOnHeapOf(unsafeBytes()); + } + + @Override + protected ByteBuffer serialize(TxnCondition value, TableMetadatas param, Version version) + { + return serializer.serializeUnchecked(value, param, version); + } + + @Override + protected ByteBuffer reserialize(ByteBuffer bytes, TableMetadatas param, Version srcVersion, Version trgVersion) + { + return bytes; + } + + @Override + protected TxnCondition deserialize(TableMetadatas param, ByteBuffer bytes, Version version) + { + return serializer.deserializeUnchecked(param, bytes, version); + } + } + private interface ConditionSerializer { - void serialize(T condition, DataOutputPlus out, Version version) throws IOException; - T deserialize(DataInputPlus in, Version version, Kind kind) throws IOException; - long serializedSize(T condition, Version version); + void serialize(T condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException; + T deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException; + long serializedSize(T condition, TableMetadatas tables, Version version); } public enum Kind @@ -140,6 +183,8 @@ public boolean equals(Object o) return kind == condition.kind; } + public abstract void collect(TableMetadatas.Collector collector); + @Override public int hashCode() { @@ -168,20 +213,25 @@ public String toString() return kind.toString(); } + @Override + public void collect(TableMetadatas.Collector collector) + { + } + @Override public boolean applies(TxnData data) { return true; } - private static final ConditionSerializer serializer = new ConditionSerializer() + private static final ConditionSerializer serializer = new ConditionSerializer<>() { @Override - public void serialize(None condition, DataOutputPlus out, Version version) {} + public void serialize(None condition, TableMetadatas tables, DataOutputPlus out, Version version) {} @Override - public None deserialize(DataInputPlus in, Version version, Kind kind) { return instance; } + public None deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) { return instance; } @Override - public long serializedSize(None condition, Version version) { return 0; } + public long serializedSize(None condition, TableMetadatas tables, Version version) { return 0; } }; } @@ -213,6 +263,14 @@ public boolean equals(Object o) return reference.equals(exists.reference); } + @Override + public void collect(TableMetadatas.Collector collector) + { + TableMetadata table = reference.table(); + if (table != null) + collector.add(table); + } + @Override public int hashCode() { @@ -295,21 +353,21 @@ else if (reference.isFieldSelection()) private static final ConditionSerializer serializer = new ConditionSerializer() { @Override - public void serialize(Exists condition, DataOutputPlus out, Version version) throws IOException + public void serialize(Exists condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - TxnReference.serializer.serialize(condition.reference, out, version); + TxnReference.serializer.serialize(condition.reference, tables, out, version); } @Override - public Exists deserialize(DataInputPlus in, Version version, Kind kind) throws IOException + public Exists deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException { - return new Exists(TxnReference.serializer.deserialize(in, version), kind); + return new Exists(TxnReference.serializer.deserialize(tables, in, version), kind); } @Override - public long serializedSize(Exists condition, Version version) + public long serializedSize(Exists condition, TableMetadatas tables, Version version) { - return TxnReference.serializer.serializedSize(condition.reference, version); + return TxnReference.serializer.serializedSize(condition.reference, tables, version); } }; } @@ -331,6 +389,17 @@ public ColumnConditionsAdapter(Clustering clustering, Collection bound this.clustering = clustering; } + @Override + public void collect(TableMetadatas.Collector collector) + { + for (Bound bound : bounds) + { + TableMetadata table = bound.table; + if (table != null) + collector.add(table); + } + } + @Override public boolean applies(@Nonnull TxnData data) { @@ -348,25 +417,25 @@ public boolean applies(@Nonnull TxnData data) private static final ConditionSerializer serializer = new ConditionSerializer() { @Override - public void serialize(ColumnConditionsAdapter condition, DataOutputPlus out, Version version) throws IOException + public void serialize(ColumnConditionsAdapter condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { clusteringSerializer.serialize(condition.clustering, out); - serializeCollection(condition.bounds, out, Bound.serializer); + serializeCollection(condition.bounds, tables, out, Bound.serializer); } @Override - public ColumnConditionsAdapter deserialize(DataInputPlus in, Version version, Kind ignored) throws IOException + public ColumnConditionsAdapter deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind ignored) throws IOException { Clustering clustering = clusteringSerializer.deserialize(in); - List bounds = deserializeList(in, Bound.serializer); + List bounds = deserializeList(tables, in, Bound.serializer); return new ColumnConditionsAdapter(clustering, bounds); } @Override - public long serializedSize(ColumnConditionsAdapter condition, Version version) + public long serializedSize(ColumnConditionsAdapter condition, TableMetadatas tables, Version version) { return clusteringSerializer.serializedSize(condition.clustering) - + serializedCollectionSize(condition.bounds, Bound.serializer); + + serializedCollectionSize(condition.bounds, tables, Bound.serializer); } }; } @@ -384,8 +453,8 @@ public static class Value extends TxnCondition public Value(TxnReference reference, Kind kind, ByteBuffer value, ProtocolVersion version) { super(kind); - Preconditions.checkArgument(KINDS.contains(kind), "Kind " + kind + " cannot be used with a value condition"); - Preconditions.checkArgument(reference.selectsColumn(), "Reference " + reference + " does not select a column"); + Invariants.requireArgument(KINDS.contains(kind), "Kind " + kind + " cannot be used with a value condition"); + Invariants.requireArgument(reference.selectsColumn(), "Reference " + reference + " does not select a column"); this.reference = reference; this.value = value; this.version = version; @@ -401,6 +470,14 @@ public boolean equals(Object o) return reference.equals(value1.reference) && value.equals(value1.value); } + @Override + public void collect(TableMetadatas.Collector collector) + { + TableMetadata table = reference.table(); + if (table != null) + collector.add(table); + } + @Override public int hashCode() { @@ -416,10 +493,11 @@ public String toString() private Bound getBounds(TxnData data) { ColumnMetadata column = reference.column(); + TableMetadata table = reference.table(); if (column.isPartitionKey()) { ByteBuffer bb = reference.getPartitionKey(data); - return new ColumnCondition.SimpleBound(column, kind.operator, value) + return new ColumnCondition.SimpleBound(column, table, kind.operator, value) { @Override protected ByteBuffer rowValue(Row row) @@ -429,26 +507,26 @@ protected ByteBuffer rowValue(Row row) }; } else if (column.isClusteringColumn()) - return new ColumnCondition.SimpleClusteringBound(column, kind.operator, value); + return new ColumnCondition.SimpleClusteringBound(column, table, kind.operator, value); AbstractType type = column.type; if (type.isCollection()) { if (reference.selectsPath()) - return new ColumnCondition.ElementOrFieldAccessBound(column, reference.path().get(0), kind.operator, value); + return new ColumnCondition.ElementOrFieldAccessBound(column, table, reference.path().get(0), kind.operator, value); if (type.isMultiCell()) - return new ColumnCondition.MultiCellBound(column, kind.operator, value); + return new ColumnCondition.MultiCellBound(column, table, kind.operator, value); } else if (type.isUDT()) { if (reference.isFieldSelection()) { UserType ut = (UserType) type; - return new ColumnCondition.ElementOrFieldAccessBound(column, ut.fieldName(reference.path()).bytes, kind.operator, value); + return new ColumnCondition.ElementOrFieldAccessBound(column, table, ut.fieldName(reference.path()).bytes, kind.operator, value); } if (type.isMultiCell()) - return new ColumnCondition.MultiCellBound(column, kind.operator, value); + return new ColumnCondition.MultiCellBound(column, table, kind.operator, value); } - return new ColumnCondition.SimpleBound(column, kind.operator, value); + return new ColumnCondition.SimpleBound(column, table, kind.operator, value); } @Override @@ -460,27 +538,27 @@ public boolean applies(TxnData data) private static final ConditionSerializer serializer = new ConditionSerializer<>() { @Override - public void serialize(Value condition, DataOutputPlus out, Version version) throws IOException + public void serialize(Value condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - TxnReference.serializer.serialize(condition.reference, out, version); + TxnReference.serializer.serialize(condition.reference, tables, out, version); ByteBufferUtil.writeWithVIntLength(condition.value, out); out.writeUTF(condition.version.name()); } @Override - public Value deserialize(DataInputPlus in, Version version, Kind kind) throws IOException + public Value deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException { - TxnReference reference = TxnReference.serializer.deserialize(in, version); + TxnReference reference = TxnReference.serializer.deserialize(tables, in, version); ByteBuffer value = ByteBufferUtil.readWithVIntLength(in); ProtocolVersion protocolVersion = ProtocolVersion.valueOf(in.readUTF()); return new Value(reference, kind, value, protocolVersion); } @Override - public long serializedSize(Value condition, Version version) + public long serializedSize(Value condition, TableMetadatas tables, Version version) { long size = 0; - size += TxnReference.serializer.serializedSize(condition.reference, version); + size += TxnReference.serializer.serializedSize(condition.reference, tables, version); size += ByteBufferUtil.serializedSizeWithVIntLength(condition.value); size += TypeSizes.sizeof(condition.version.name()); return size; @@ -517,6 +595,13 @@ public boolean equals(Object o) return Objects.equals(conditions, that.conditions); } + @Override + public void collect(TableMetadatas.Collector collector) + { + for (TxnCondition condition : conditions) + condition.collect(collector); + } + @Override public int hashCode() { @@ -537,51 +622,52 @@ public boolean applies(TxnData data) } } - private static final ConditionSerializer serializer = new ConditionSerializer() + private static final ConditionSerializer serializer = new ConditionSerializer<>() { @Override - public void serialize(BooleanGroup condition, DataOutputPlus out, Version version) throws IOException + public void serialize(BooleanGroup condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - serializeList(condition.conditions, out, version, TxnCondition.serializer); + serializeList(condition.conditions, tables, out, version, TxnCondition.serializer); } @Override - public BooleanGroup deserialize(DataInputPlus in, Version version, Kind kind) throws IOException + public BooleanGroup deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException { - return new BooleanGroup(kind, deserializeList(in, version, TxnCondition.serializer)); + return new BooleanGroup(kind, deserializeList(tables, in, version, TxnCondition.serializer)); } @Override - public long serializedSize(BooleanGroup condition, Version version) + public long serializedSize(BooleanGroup condition, TableMetadatas tables, Version version) { - return serializedListSize(condition.conditions, version, TxnCondition.serializer); + return serializedListSize(condition.conditions, tables, version, TxnCondition.serializer); } }; } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @SuppressWarnings("unchecked") @Override - public void serialize(TxnCondition condition, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnCondition condition, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { out.writeUnsignedVInt32(condition.kind.ordinal()); - condition.kind.serializer().serialize(condition, out, version); + condition.kind.serializer().serialize(condition, tables, out, version); } @Override - public TxnCondition deserialize(DataInputPlus in, Version version) throws IOException + public TxnCondition deserialize(TableMetadatas tables, DataInputPlus in, Version version) throws IOException { Kind kind = Kind.values()[in.readUnsignedVInt32()]; - return kind.serializer().deserialize(in, version, kind); + return kind.serializer().deserialize(tables, in, version, kind); } @SuppressWarnings("unchecked") @Override - public long serializedSize(TxnCondition condition, Version version) + public long serializedSize(TxnCondition condition, TableMetadatas tables, Version version) { - return TypeSizes.sizeofUnsignedVInt(condition.kind.ordinal()) - + condition.kind.serializer().serializedSize(condition, version); + long size = TypeSizes.sizeofUnsignedVInt(condition.kind.ordinal()); + size += condition.kind.serializer().serializedSize(condition, tables, version); + return size; } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index a5d6d9e7ff6a..2f84c849e691 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -19,6 +19,7 @@ package org.apache.cassandra.service.accord.txn; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.util.Objects; import java.util.concurrent.Callable; @@ -53,15 +54,17 @@ import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Token; import org.apache.cassandra.dht.Token.KeyBound; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; +import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.TableId; -import org.apache.cassandra.service.accord.AccordObjectSizes; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; -import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.service.accord.txn.TxnData.TxnDataNameKind; import org.apache.cassandra.utils.ByteBufferUtil; @@ -70,38 +73,39 @@ import org.apache.cassandra.utils.ObjectSizes; import static com.google.common.base.Preconditions.checkState; +import static org.apache.cassandra.io.util.DataOutputBuffer.scratchBuffer; import static org.apache.cassandra.utils.ByteBufferUtil.readWithVIntLength; import static org.apache.cassandra.utils.ByteBufferUtil.serializedSizeWithVIntLength; import static org.apache.cassandra.utils.ByteBufferUtil.writeWithVIntLength; -public class TxnNamedRead extends AbstractSerialized +public class TxnNamedRead extends AbstractSerialized { @SuppressWarnings("unused") private static final Logger logger = LoggerFactory.getLogger(TxnNamedRead.class); - private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnNamedRead(0, null, null, Version.LATEST)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnNamedRead(0, null, null)); private final int name; private final Seekable key; - public TxnNamedRead(int name, SinglePartitionReadCommand value) + public TxnNamedRead(int name, PartitionKey key, SinglePartitionReadCommand value, TableMetadatas tables) { - super(value); + super(serializeInternal(value, tables, Version.LATEST)); this.name = name; - this.key = new PartitionKey(value.metadata().id, value.partitionKey()); + this.key = key; } - public TxnNamedRead(int name, AbstractBounds range, PartitionRangeReadCommand value) + public TxnNamedRead(int name, AbstractBounds range, PartitionRangeReadCommand value, TableMetadatas tables) { - super(value); + super(serializeInternal(value, tables, Version.LATEST)); TableId tableId = value.metadata().id; this.name = name; this.key = boundsAsAccordRange(range, tableId); } - public TxnNamedRead(int name, Seekable key, ByteBuffer bytes, Version version) + TxnNamedRead(int name, Seekable key, ByteBuffer bytes) { - super(bytes, version); + super(bytes); this.name = name; this.key = key; } @@ -145,15 +149,47 @@ else if (inclusiveRight && !endIsMinKeyBound) public long estimatedSizeOnHeap() { long size = EMPTY_SIZE; - size += AccordObjectSizes.seekable(key); + // we don't measure the key, as this is shared size += (unsafeBytes() != null ? ByteBufferUtil.estimatedSizeOnHeap(unsafeBytes()) : 0); return size; } @Override - protected IVersionedSerializer serializer() + protected ByteBuffer serialize(ReadCommand value, TableMetadatas param, Version version) { - return readCommandSerializer; + return serializeInternal(value, param, version); + } + + private static ByteBuffer serializeInternal(ReadCommand value, TableMetadatas param, Version version) + { + try (DataOutputBuffer buffer = scratchBuffer.get()) + { + ReadCommand.serializer.serializeForAccord(value, param, buffer, version.messageVersion()); + return buffer.asNewBuffer(); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + @Override + protected ByteBuffer reserialize(ByteBuffer buffer, TableMetadatas param, Version srcVersion, Version trgVersion) + { + return buffer; + } + + @Override + protected ReadCommand deserialize(TableMetadatas param, ByteBuffer bytes, Version version) + { + try (DataInputBuffer buffer = new DataInputBuffer(bytes, true)) + { + return ReadCommand.serializer.deserializeForAccord(key, param, buffer, version.messageVersion()); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } } @Override @@ -175,7 +211,7 @@ public int hashCode() @Override public String toString() { - return "TxnNamedRead{name='" + name + '\'' + ", keys=" + key + ", update=" + get() + '}'; + return "TxnNamedRead{name='" + name + '\'' + ", keys=" + key + '}'; } public int txnDataName() @@ -193,9 +229,9 @@ public static long nowInSeconds(Timestamp executeAt) return TimeUnit.MICROSECONDS.toSeconds(executeAt.hlc()); } - public AsyncChain read(ConsistencyLevel consistencyLevel, Seekable key, Timestamp executeAt) + public AsyncChain read(TableMetadatas tables, ConsistencyLevel consistencyLevel, Seekable key, Timestamp executeAt) { - ReadCommand command = get(); + ReadCommand command = deserialize(tables); if (command == null) return AsyncResults.success(TxnData.NOOP_DATA); @@ -226,7 +262,7 @@ public TxnNamedRead slice(Range range) return this; Invariants.require(((Range)key).contains(range)); - return new TxnNamedRead(txnDataName(), range, unsafeBytes(), version); + return new TxnNamedRead(txnDataName(), range, unsafeBytes()); } public TxnNamedRead merge(TxnNamedRead with) @@ -241,7 +277,7 @@ public TxnNamedRead merge(TxnNamedRead with) RoutingKey start = Comparables.min(thisRange.start(), thatRange.start()); RoutingKey end = Comparables.max(thisRange.end(), thatRange.end()); Range range = thisRange.newRange(start, end); - return new TxnNamedRead(txnDataName(), range, unsafeBytes(), version); + return new TxnNamedRead(txnDataName(), range, unsafeBytes()); } public static boolean readsWithoutReconciliation(ConsistencyLevel consistencyLevel) @@ -251,9 +287,9 @@ public static boolean readsWithoutReconciliation(ConsistencyLevel consistencyLev } - public ReadCommand command() + public ReadCommand command(TableMetadatas tables) { - return get(); + return deserialize(tables); } private AsyncChain performLocalKeyRead(SinglePartitionReadCommand read) @@ -430,17 +466,17 @@ public String description() ); } - static final IVersionedSerializer serializer = new IVersionedSerializer<>() + static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnNamedRead read, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnNamedRead read, TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out, Version version) throws IOException { out.writeInt(read.name); - KeySerializers.seekable.serialize(read.key, out); + tablesAndKeys.serializeSeekable(read.key, out); if (!read.isNull()) { out.write(0); - writeWithVIntLength(read.bytes(version), out); + writeWithVIntLength(read.bytes(tablesAndKeys.tables, version), out); } else { @@ -449,26 +485,26 @@ public void serialize(TxnNamedRead read, DataOutputPlus out, Version version) th } @Override - public TxnNamedRead deserialize(DataInputPlus in, Version version) throws IOException + public TxnNamedRead deserialize(TableMetadatasAndKeys tablesAndKeys, DataInputPlus in, Version version) throws IOException { int name = in.readInt(); - Seekable key = KeySerializers.seekable.deserialize(in); + Seekable key = tablesAndKeys.deserializeSeekable(in); ByteBuffer bytes = in.readByte() == 1 ? null : readWithVIntLength(in); - return new TxnNamedRead(name, key, bytes, version); + if (version != Version.LATEST) + bytes = serializeUnchecked(deserializeUnchecked(tablesAndKeys, bytes, version), tablesAndKeys, Version.LATEST); + return new TxnNamedRead(name, key, bytes); } @Override - public long serializedSize(TxnNamedRead read, Version version) + public long serializedSize(TxnNamedRead read, TableMetadatasAndKeys tablesAndKeys, Version version) { long size = 0; size += TypeSizes.sizeof(read.name); - size += KeySerializers.seekable.serializedSize(read.key); + size += tablesAndKeys.serializedSeekableSize(read.key); size += TypeSizes.BYTE_SIZE; // is null if (!read.isNull()) - size += serializedSizeWithVIntLength(read.bytes(version)); + size += serializedSizeWithVIntLength(read.bytes(tablesAndKeys.tables, version)); return size; } }; - - static final IVersionedSerializer readCommandSerializer = IVersionedSerializer.fromMessaging(ReadCommand.serializer); } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java index fe487cc7d69b..c78401ffa2f3 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java @@ -126,7 +126,7 @@ public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, else if (txnData.isEmpty()) { TxnRead txnKeyRead = (TxnRead)read; - SinglePartitionReadCommand command = (SinglePartitionReadCommand) txnKeyRead.iterator().next().get(); + SinglePartitionReadCommand command = (SinglePartitionReadCommand) txnKeyRead.deserialize(0); // For CAS must return a non-empty result to indicate error even if there was no partition found return TxnData.of(txnDataName(CAS_READ), new TxnDataKeyValue(EmptyIterators.row(command.metadata(), command.partitionKey(), command.isReversed()))); } @@ -183,7 +183,7 @@ public Result doCompute(TxnId txnId, Timestamp executeAt, Seekables keys, private Result concat(TxnData data, Read read) { TxnRead txnRead = (TxnRead) read; - PartitionRangeReadCommand command = (PartitionRangeReadCommand) txnRead.iterator().next().get(); + PartitionRangeReadCommand command = (PartitionRangeReadCommand) txnRead.deserialize(0); TxnDataRangeValue value = (TxnDataRangeValue) data.get(txnDataName(TxnDataNameKind.USER)); Supplier source = value.toPartitionIterator(command.isReversed()); // Because the query was split across multiple command stores the pushed down limit won't be sufficient diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java index e35267fb28e3..d19c241614bf 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java @@ -47,12 +47,15 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.db.PartitionRangeReadCommand; +import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.SinglePartitionReadCommand; import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.api.PartitionKey; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.ObjectSizes; @@ -74,8 +77,8 @@ public class TxnRead extends AbstractKeySorted implements Read { - private static final TxnRead EMPTY_KEY = new TxnRead(Domain.Key); - private static final TxnRead EMPTY_RANGE = new TxnRead(Domain.Range); + private static final TxnRead EMPTY_KEY = new TxnRead(TableMetadatas.none(), Domain.Key); + private static final TxnRead EMPTY_RANGE = new TxnRead(TableMetadatas.none(), Domain.Range); private static final long EMPTY_SIZE = ObjectSizes.measure(EMPTY_KEY); private static final Comparator TXN_NAMED_READ_KEY_COMPARATOR = Comparator.comparing(a -> ((PartitionKey) a.key())); private static final byte TYPE_EMPTY_KEY = 0; @@ -95,6 +98,7 @@ public static TxnRead empty(Domain domain) } } + final TableMetadatas tables; // Cassandra's consistency level used by Accord to safely read data written outside of Accord @Nullable private final ConsistencyLevel cassandraConsistencyLevel; @@ -102,16 +106,18 @@ public static TxnRead empty(Domain domain) // Specifies the domain in case the TxnRead is empty and it can't be inferred private final Domain domain; - private TxnRead(Domain domain) + private TxnRead(TableMetadatas tables, Domain domain) { super(new TxnNamedRead[0], domain); + this.tables = tables; this.domain = domain; this.cassandraConsistencyLevel = null; } - private TxnRead(@Nonnull TxnNamedRead[] items, @Nullable ConsistencyLevel cassandraConsistencyLevel) + private TxnRead(TableMetadatas tables, @Nonnull TxnNamedRead[] items, @Nullable ConsistencyLevel cassandraConsistencyLevel) { super(items, items[0].key().domain()); + this.tables = tables; checkArgument(cassandraConsistencyLevel == null || SUPPORTED_READ_CONSISTENCY_LEVELS.contains(cassandraConsistencyLevel), "Unsupported consistency level for read: %s", cassandraConsistencyLevel); this.cassandraConsistencyLevel = cassandraConsistencyLevel; this.domain = items[0].key().domain(); @@ -121,9 +127,10 @@ private TxnRead(@Nonnull TxnNamedRead[] items, @Nullable ConsistencyLevel cassan Invariants.require(domain == Domain.Key || ((Ranges)keys()).mergeTouching() == keys()); } - private TxnRead(@Nonnull List items, @Nullable ConsistencyLevel cassandraConsistencyLevel) + private TxnRead(TableMetadatas tables, @Nonnull List items, @Nullable ConsistencyLevel cassandraConsistencyLevel) { super(items, items.get(0).key().domain()); + this.tables = tables; checkArgument(cassandraConsistencyLevel == null || SUPPORTED_READ_CONSISTENCY_LEVELS.contains(cassandraConsistencyLevel), "Unsupported consistency level for read: %s", cassandraConsistencyLevel); this.cassandraConsistencyLevel = cassandraConsistencyLevel; this.domain = items.get(0).key().domain(); @@ -137,27 +144,30 @@ private static void sortReads(List reads) reads.sort(TXN_NAMED_READ_KEY_COMPARATOR); } - public static TxnRead createTxnRead(@Nonnull List items, @Nullable ConsistencyLevel consistencyLevel, Domain domain) + public static TxnRead createTxnRead(TableMetadatas tables, @Nonnull List items, @Nullable ConsistencyLevel consistencyLevel, Domain domain) { if (items.isEmpty()) return empty(domain); sortReads(items); - return new TxnRead(items, consistencyLevel); + return new TxnRead(tables, items, consistencyLevel); } - public static TxnRead createSerialRead(List readCommands, ConsistencyLevel consistencyLevel) + public static TxnRead createSerialRead(List readCommands, ConsistencyLevel consistencyLevel, TableMetadatasAndKeys.KeyCollector keyCollector) { List reads = new ArrayList<>(readCommands.size()); for (int i = 0; i < readCommands.size(); i++) - reads.add(new TxnNamedRead(txnDataName(USER, i), readCommands.get(i))); + { + SinglePartitionReadCommand readCommand = readCommands.get(i); + reads.add(new TxnNamedRead(txnDataName(USER, i), keyCollector.collect(readCommand.metadata(), readCommand.partitionKey()), readCommand, keyCollector.tables)); + } sortReads(reads); - return new TxnRead(reads, consistencyLevel); + return new TxnRead(keyCollector.tables, reads, consistencyLevel); } - public static TxnRead createCasRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel) + public static TxnRead createCasRead(SinglePartitionReadCommand readCommand, ConsistencyLevel consistencyLevel, TableMetadatasAndKeys tablesAndKeys) { - TxnNamedRead read = new TxnNamedRead(txnDataName(CAS_READ), readCommand); - return new TxnRead(ImmutableList.of(read), consistencyLevel); + TxnNamedRead read = new TxnNamedRead(txnDataName(CAS_READ), (PartitionKey) tablesAndKeys.keys.get(0), readCommand, tablesAndKeys.tables); + return new TxnRead(tablesAndKeys.tables, ImmutableList.of(read), consistencyLevel); } // A read that declares it will read from keys but doesn't actually read any data so dependent transactions will @@ -166,13 +176,13 @@ public static TxnRead createNoOpRead(Keys keys) { List reads = new ArrayList<>(keys.size()); for (int i = 0; i < keys.size(); i++) - reads.add(new TxnNamedRead(txnDataName(USER, i), keys.get(i), null, Version.LATEST)); - return new TxnRead(reads, null); + reads.add(new TxnNamedRead(txnDataName(USER, i), keys.get(i), null)); + return new TxnRead(TableMetadatas.none(), reads, null); } - public static TxnRead createRangeRead(PartitionRangeReadCommand command, AbstractBounds range, ConsistencyLevel consistencyLevel) + public static TxnRead createRangeRead(TableMetadatas tables, PartitionRangeReadCommand command, AbstractBounds range, ConsistencyLevel consistencyLevel) { - return new TxnRead(ImmutableList.of(new TxnNamedRead(txnDataName(USER), range, command)), consistencyLevel); + return new TxnRead(tables, ImmutableList.of(new TxnNamedRead(txnDataName(USER), range, command, tables)), consistencyLevel); } public long estimatedSizeOnHeap() @@ -189,6 +199,11 @@ int compareNonKeyFields(TxnNamedRead left, TxnNamedRead right) return Integer.compare(left.txnDataName(), right.txnDataName()); } + ReadCommand deserialize(int i) + { + return get(i).deserialize(tables); + } + @Override Seekable getKey(TxnNamedRead read) { @@ -207,12 +222,6 @@ TxnNamedRead[] newArray(int size) return itemKeys; } - @Override - public Domain domain() - { - return domain; - } - public ConsistencyLevel cassandraConsistencyLevel() { return cassandraConsistencyLevel; @@ -280,7 +289,7 @@ private Read select(Seekables select) throw new UnhandledEnum(select.domain()); } - return createTxnRead(reads, cassandraConsistencyLevel, select.domain()); + return createTxnRead(tables, reads, cassandraConsistencyLevel, select.domain()); } @Override @@ -361,7 +370,7 @@ else if (c < 0) break; } } - return createTxnRead(reads, cassandraConsistencyLevel, that.domain); + return createTxnRead(tables, reads, cassandraConsistencyLevel, that.domain); } public void unmemoize() @@ -379,7 +388,7 @@ public AsyncChain read(Seekable key, SafeCommandStore safeStore, Timestamp checkState(cm.epoch.getEpoch() >= executeAt.epoch(), "TCM epoch %d is < executeAt epoch %d", cm.epoch.getEpoch(), executeAt.epoch()); List> results = new ArrayList<>(); - forEachWithKey(key, read -> results.add(read.read(cassandraConsistencyLevel, key, executeAt))); + forEachWithKey(key, read -> results.add(read.read(tables, cassandraConsistencyLevel, key, executeAt))); if (results.isEmpty()) // Result type must match everywhere @@ -391,15 +400,15 @@ public AsyncChain read(Seekable key, SafeCommandStore safeStore, Timestamp return AsyncChains.reduce(results, Data::merge); } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnRead read, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnRead read, TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out, Version version) throws IOException { if (read.items.length > 0) { out.write(TYPE_NOT_EMPTY); - serializeArray(read.items, out, version, TxnNamedRead.serializer); + serializeArray(read.items, tablesAndKeys, out, version, TxnNamedRead.serializer); serializeNullable(read.cassandraConsistencyLevel, out, consistencyLevelSerializer); } else @@ -409,7 +418,7 @@ public void serialize(TxnRead read, DataOutputPlus out, Version version) throws } @Override - public TxnRead deserialize(DataInputPlus in, Version version) throws IOException + public TxnRead deserialize(TableMetadatasAndKeys tablesAndKeys, DataInputPlus in, Version version) throws IOException { byte type = in.readByte(); switch (type) @@ -421,19 +430,19 @@ public TxnRead deserialize(DataInputPlus in, Version version) throws IOException case TYPE_EMPTY_RANGE: return EMPTY_RANGE; case TYPE_NOT_EMPTY: - TxnNamedRead[] items = deserializeArray(in, version, TxnNamedRead.serializer, TxnNamedRead[]::new); + TxnNamedRead[] items = deserializeArray(tablesAndKeys, in, version, TxnNamedRead.serializer, TxnNamedRead[]::new); ConsistencyLevel consistencyLevel = deserializeNullable(in, consistencyLevelSerializer); - return new TxnRead(items, consistencyLevel); + return new TxnRead(tablesAndKeys.tables, items, consistencyLevel); } } @Override - public long serializedSize(TxnRead read, Version version) + public long serializedSize(TxnRead read, TableMetadatasAndKeys tablesAndKeys, Version version) { long size = 1; // type if (read.items.length > 0) { - size += serializedArraySize(read.items, version, TxnNamedRead.serializer); + size += serializedArraySize(read.items, tablesAndKeys, version, TxnNamedRead.serializer); size += serializedNullableSize(read.cassandraConsistencyLevel, consistencyLevelSerializer); } return size; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java index ff8ec54d11ad..59a1c5afb6d3 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReference.java @@ -22,6 +22,7 @@ import java.nio.ByteBuffer; import java.util.Objects; +import accord.utils.VIntCoding; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.CollectionType; @@ -34,10 +35,12 @@ import org.apache.cassandra.db.rows.ColumnData; import org.apache.cassandra.db.rows.ComplexColumnData; import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.utils.ByteBufferUtil; @@ -47,19 +50,21 @@ public class TxnReference { private final int tuple; + private final TableMetadata table; private final ColumnMetadata column; private final CellPath path; - public TxnReference(int tuple, ColumnMetadata column, CellPath path) + public TxnReference(int tuple, TableMetadata table, ColumnMetadata column, CellPath path) { this.tuple = tuple; + this.table = table; this.column = column; this.path = path; } - public TxnReference(int tuple, ColumnMetadata column) + public TxnReference(int tuple, ColumnMetadata column, TableMetadata table) { - this(tuple, column, null); + this(tuple, table, column, null); } @Override @@ -92,7 +97,17 @@ public ColumnMetadata column() { return column; } - + + public TableMetadata table() + { + return table; + } + + public void collect(TableMetadatas.Collector collector) + { + collector.add(table); + } + public CellPath path() { return path; @@ -287,37 +302,49 @@ private boolean selectsFrozenUDTField() return selectsPath() && column.type.isUDT() && !column.type.isMultiCell(); } - static final IVersionedSerializer serializer = new IVersionedSerializer() + static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnReference reference, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnReference reference, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - out.writeInt(reference.tuple); + out.writeUnsignedVInt32(reference.tuple); out.writeBoolean(reference.column != null); if (reference.column != null) - columnMetadataSerializer.serialize(reference.column, out); + { + tables.serialize(reference.table, out); + columnMetadataSerializer.serialize(reference.column, reference.table, out); + } out.writeBoolean(reference.path != null); if (reference.path != null) CollectionType.cellPathSerializer.serialize(reference.path, out); } @Override - public TxnReference deserialize(DataInputPlus in, Version version) throws IOException + public TxnReference deserialize(TableMetadatas tables, DataInputPlus in, Version version) throws IOException { - int name = in.readInt(); - ColumnMetadata column = in.readBoolean() ? columnMetadataSerializer.deserialize(in) : null; + int name = in.readUnsignedVInt32(); + TableMetadata table = null; + ColumnMetadata column = null; + if (in.readBoolean()) + { + table = tables.deserialize(in); + column = columnMetadataSerializer.deserialize(table, in); + } CellPath path = in.readBoolean() ? CollectionType.cellPathSerializer.deserialize(in) : null; - return new TxnReference(name, column, path); + return new TxnReference(name, table, column, path); } @Override - public long serializedSize(TxnReference reference, Version version) + public long serializedSize(TxnReference reference, TableMetadatas tables, Version version) { long size = 0; - size += TypeSizes.INT_SIZE; + size += VIntCoding.sizeOfUnsignedVInt(reference.tuple); size += TypeSizes.BOOL_SIZE; if (reference.column != null) - size += columnMetadataSerializer.serializedSize(reference.column); + { + size += tables.serializedSize(reference.table); + size += columnMetadataSerializer.serializedSize(reference.column, reference.table); + } size += TypeSizes.BOOL_SIZE; if (reference.path != null) size += CollectionType.cellPathSerializer.serializedSize(reference.path); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java index 397c4deada45..21c22b5a57d1 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperation.java @@ -43,11 +43,13 @@ import org.apache.cassandra.db.marshal.TupleType; import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.db.rows.CellPath; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.AccordSerializers; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.utils.ByteBufferUtil; @@ -153,15 +155,17 @@ public Operation toOperation(ColumnMetadata column, Term keyOrIndex, FieldIdenti private final Kind kind; private final ColumnMetadata receiver; + private final TableMetadata table; private final ByteBuffer key; private final ByteBuffer field; private final TxnReferenceValue value; private final AbstractType valueType; - public TxnReferenceOperation(Kind kind, ColumnMetadata receiver, ByteBuffer key, ByteBuffer field, TxnReferenceValue value) + public TxnReferenceOperation(Kind kind, ColumnMetadata receiver, TableMetadata table, ByteBuffer key, ByteBuffer field, TxnReferenceValue value) { this.kind = kind; this.receiver = receiver; + this.table = table; this.key = key; this.field = field; @@ -204,6 +208,12 @@ public boolean equals(Object o) && Objects.equals(value, that.value); } + public void collect(TableMetadatas.Collector collector) + { + collector.add(table); + value.collect(collector); + } + @Override public int hashCode() { @@ -255,14 +265,15 @@ else if (receivingType.isTuple()) return new Constants.Value(bytes); } - static final IVersionedSerializer serializer = new IVersionedSerializer() + static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnReferenceOperation operation, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnReferenceOperation operation, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { out.writeByte(operation.kind.id); - columnMetadataSerializer.serialize(operation.receiver, out); - TxnReferenceValue.serializer.serialize(operation.value, out, version); + tables.serialize(operation.table, out); + columnMetadataSerializer.serialize(operation.receiver, operation.table, out); + TxnReferenceValue.serializer.serialize(operation.value, tables, out, version); out.writeBoolean(operation.key != null); if (operation.key != null) @@ -274,22 +285,24 @@ public void serialize(TxnReferenceOperation operation, DataOutputPlus out, Versi } @Override - public TxnReferenceOperation deserialize(DataInputPlus in, Version version) throws IOException + public TxnReferenceOperation deserialize(TableMetadatas tables, DataInputPlus in, Version version) throws IOException { Kind kind = Kind.from(in.readByte()); - ColumnMetadata receiver = columnMetadataSerializer.deserialize(in); - TxnReferenceValue value = TxnReferenceValue.serializer.deserialize(in, version); + TableMetadata table = tables.deserialize(in); + ColumnMetadata receiver = columnMetadataSerializer.deserialize(table, in); + TxnReferenceValue value = TxnReferenceValue.serializer.deserialize(tables, in, version); ByteBuffer key = in.readBoolean() ? ByteBufferUtil.readWithVIntLength(in) : null; ByteBuffer field = in.readBoolean() ? ByteBufferUtil.readWithVIntLength(in) : null; - return new TxnReferenceOperation(kind, receiver, key, field, value); + return new TxnReferenceOperation(kind, receiver, table, key, field, value); } @Override - public long serializedSize(TxnReferenceOperation operation, Version version) + public long serializedSize(TxnReferenceOperation operation, TableMetadatas tables, Version version) { long size = Byte.BYTES; - size += columnMetadataSerializer.serializedSize(operation.receiver); - size += TxnReferenceValue.serializer.serializedSize(operation.value, version); + size += tables.serializedSize(operation.table); + size += columnMetadataSerializer.serializedSize(operation.receiver, operation.table); + size += TxnReferenceValue.serializer.serializedSize(operation.value, tables, version); if (operation.key != null) size += ByteBufferUtil.serializedSizeWithVIntLength(operation.key); diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java index b326ba61db15..679106be1172 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceOperations.java @@ -27,16 +27,16 @@ import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import static org.apache.cassandra.utils.CollectionSerializers.deserializeList; import static org.apache.cassandra.utils.CollectionSerializers.serializeList; import static org.apache.cassandra.utils.CollectionSerializers.serializedListSize; -import static org.apache.cassandra.service.accord.AccordSerializers.tableMetadataSerializer; public class TxnReferenceOperations { @@ -87,47 +87,59 @@ public boolean isEmpty() return regulars.isEmpty() && statics.isEmpty(); } - static final IVersionedSerializer serializer = new IVersionedSerializer() + static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnReferenceOperations operations, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnReferenceOperations operations, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { out.writeBoolean(!operations.isEmpty()); if (operations.isEmpty()) return; - tableMetadataSerializer.serialize(operations.metadata, out, version); + + tables.serialize(operations.metadata, out); out.writeBoolean(operations.clustering != null); if (operations.clustering != null) Clustering.serializer.serialize(operations.clustering, out, version.messageVersion(), operations.metadata.comparator.subtypes()); - serializeList(operations.regulars, out, version, TxnReferenceOperation.serializer); - serializeList(operations.statics, out, version, TxnReferenceOperation.serializer); - + serializeList(operations.regulars, tables, out, version, TxnReferenceOperation.serializer); + serializeList(operations.statics, tables, out, version, TxnReferenceOperation.serializer); } @Override - public TxnReferenceOperations deserialize(DataInputPlus in, Version version) throws IOException + public TxnReferenceOperations deserialize(TableMetadatas tables, DataInputPlus in, Version version) throws IOException { if (!in.readBoolean()) return TxnReferenceOperations.empty(); - TableMetadata metadata = tableMetadataSerializer.deserialize(in, version); + + TableMetadata metadata = tables.deserialize(in); Clustering clustering = in.readBoolean() ? Clustering.serializer.deserialize(in, version.messageVersion(), metadata.comparator.subtypes()) : null; - return new TxnReferenceOperations(metadata, clustering, deserializeList(in, version, TxnReferenceOperation.serializer), - deserializeList(in, version, TxnReferenceOperation.serializer)); + return new TxnReferenceOperations(metadata, clustering, deserializeList(tables, in, version, TxnReferenceOperation.serializer), + deserializeList(tables, in, version, TxnReferenceOperation.serializer)); } @Override - public long serializedSize(TxnReferenceOperations operations, Version version) + public long serializedSize(TxnReferenceOperations operations, TableMetadatas tables, Version version) { long size = TypeSizes.BOOL_SIZE; if (operations.isEmpty()) return size; - size += tableMetadataSerializer.serializedSize(operations.metadata, version); + size += tables.serializedSize(operations.metadata); size += TypeSizes.BOOL_SIZE; if (operations.clustering != null) size += Clustering.serializer.serializedSize(operations.clustering, version.messageVersion(), operations.metadata.comparator.subtypes()); - size += serializedListSize(operations.regulars, version, TxnReferenceOperation.serializer); - size += serializedListSize(operations.statics, version, TxnReferenceOperation.serializer); + size += serializedListSize(operations.regulars, tables, version, TxnReferenceOperation.serializer); + size += serializedListSize(operations.statics, tables, version, TxnReferenceOperation.serializer); return size; } + + private TableMetadatas tables(TxnReferenceOperations operations) + { + TableMetadatas.Collector collector = new TableMetadatas.Collector(); + collector.add(operations.metadata); + for (TxnReferenceOperation op : operations.regulars) + op.collect(collector); + for (TxnReferenceOperation op : operations.statics) + op.collect(collector); + return collector.build(); + } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java index e778adf4e9b8..7dbcea1c9372 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnReferenceValue.java @@ -24,9 +24,10 @@ import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.utils.ByteBufferUtil; @@ -34,9 +35,9 @@ public abstract class TxnReferenceValue { private interface Serializer { - void serialize(T t, DataOutputPlus out, Version version) throws IOException; - T deserialize(DataInputPlus in, Version version, Kind kind) throws IOException; - long serializedSize(T t, Version version); + void serialize(T t, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException; + T deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException; + long serializedSize(T t, TableMetadatas tables, Version version); } enum Kind @@ -55,6 +56,7 @@ enum Kind protected abstract Kind kind(); abstract ByteBuffer compute(TxnData data, AbstractType receiver); + abstract void collect(TableMetadatas.Collector collector); public static class Constant extends TxnReferenceValue { @@ -103,22 +105,27 @@ public ByteBuffer compute(TxnData data, AbstractType receiver) return value; } + @Override + void collect(TableMetadatas.Collector collector) + { + } + private static final Serializer serializer = new Serializer() { @Override - public void serialize(Constant constant, DataOutputPlus out, Version version) throws IOException + public void serialize(Constant constant, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { ByteBufferUtil.writeWithVIntLength(constant.value, out); } @Override - public Constant deserialize(DataInputPlus in, Version version, Kind kind) throws IOException + public Constant deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException { return new Constant(ByteBufferUtil.readWithVIntLength(in)); } @Override - public long serializedSize(Constant constant, Version version) + public long serializedSize(Constant constant, TableMetadatas tables, Version version) { return ByteBufferUtil.serializedSizeWithVIntLength(constant.value); } @@ -167,50 +174,56 @@ public ByteBuffer compute(TxnData data, AbstractType receiver) return reference.toByteBuffer(data, receiver); } - private static final Serializer serializer = new Serializer() + @Override + void collect(TableMetadatas.Collector collector) + { + reference.collect(collector); + } + + private static final Serializer serializer = new Serializer<>() { @Override - public void serialize(Substitution substitution, DataOutputPlus out, Version version) throws IOException + public void serialize(Substitution substitution, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - TxnReference.serializer.serialize(substitution.reference, out, version); + TxnReference.serializer.serialize(substitution.reference, tables, out, version); } @Override - public Substitution deserialize(DataInputPlus in, Version version, Kind kind) throws IOException + public Substitution deserialize(TableMetadatas tables, DataInputPlus in, Version version, Kind kind) throws IOException { - return new Substitution(TxnReference.serializer.deserialize(in, version)); + return new Substitution(TxnReference.serializer.deserialize(tables, in, version)); } @Override - public long serializedSize(Substitution substitution, Version version) + public long serializedSize(Substitution substitution, TableMetadatas tables, Version version) { - return TxnReference.serializer.serializedSize(substitution.reference, version); + return TxnReference.serializer.serializedSize(substitution.reference, tables, version); } }; } - static final IVersionedSerializer serializer = new IVersionedSerializer() + static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @SuppressWarnings("unchecked") @Override - public void serialize(TxnReferenceValue value, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnReferenceValue value, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { out.writeUnsignedVInt32(value.kind().ordinal()); - value.kind().serializer.serialize(value, out, version); + value.kind().serializer.serialize(value, tables, out, version); } @Override - public TxnReferenceValue deserialize(DataInputPlus in, Version version) throws IOException + public TxnReferenceValue deserialize(TableMetadatas tables, DataInputPlus in, Version version) throws IOException { Kind kind = Kind.values()[in.readUnsignedVInt32()]; - return kind.serializer.deserialize(in, version, kind); + return kind.serializer.deserialize(tables, in, version, kind); } @SuppressWarnings("unchecked") @Override - public long serializedSize(TxnReferenceValue value, Version version) + public long serializedSize(TxnReferenceValue value, TableMetadatas tables, Version version) { - return TypeSizes.sizeofUnsignedVInt(value.kind().ordinal()) + value.kind().serializer.serializedSize(value, version); + return TypeSizes.sizeofUnsignedVInt(value.kind().ordinal()) + value.kind().serializer.serializedSize(value, tables, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index be9e8b562296..8f3baf374e49 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -25,17 +25,16 @@ import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.function.Function; import javax.annotation.Nullable; import accord.api.Data; -import accord.api.Key; import accord.api.Update; import accord.primitives.Keys; import accord.primitives.Participants; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Timestamp; +import accord.utils.Invariants; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.TypeSizes; @@ -45,9 +44,12 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.AccordObjectSizes; import org.apache.cassandra.service.accord.IAccordService; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; -import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; +import org.apache.cassandra.service.accord.txn.TxnCondition.SerializedTxnCondition; +import org.apache.cassandra.service.accord.txn.TxnWrite.Fragment; import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.ByteBufferUtil; @@ -69,12 +71,13 @@ public class TxnUpdate extends AccordUpdate { - private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(null, new ByteBuffer[0], null, null, false)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new TxnUpdate(TableMetadatas.none(), null, new ByteBuffer[0], null, null, false)); private static final int FLAG_PRESERVE_TIMESTAMPS = 0x1; + final TableMetadatas tables; private final Keys keys; private final ByteBuffer[] fragments; - private final AbstractSerialized condition; + private final AbstractSerialized condition; @Nullable private final ConsistencyLevel cassandraCommitCL; @@ -87,23 +90,26 @@ public class TxnUpdate extends AccordUpdate // Memoize computation of condition private Boolean conditionResult; - public TxnUpdate(List fragments, TxnCondition condition, @Nullable ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) + public TxnUpdate(TableMetadatas tables, List fragments, TxnCondition condition, @Nullable ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) { requireArgument(cassandraCommitCL == null || IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(cassandraCommitCL)); - // TODO: Figure out a way to shove keys into TxnCondition, and have it implement slice/merge. + this.tables = tables; this.keys = Keys.of(fragments, fragment -> fragment.key); - fragments.sort(TxnWrite.Fragment::compareKeys); - //TODO (correctness): this node could be on version N while the peers are on N-1, which would have issues as the peers wouldn't know about N yet. - // Can not eagerly serialize until we know the "correct" version, else we need a way to fallback on mismatch. - this.fragments = toSerializedValuesArray(keys, fragments, fragment -> fragment.key, Version.LATEST, TxnWrite.Fragment.serializer); - this.condition = AbstractSerialized.of(TxnCondition.serializer, condition); + fragments.sort(Fragment::compareKeys); + // TODO (required): this node could be on version N while the peers are on N-1, which would have issues as the peers wouldn't know about N yet. + // Can not eagerly serialize until we know the "correct" version, else we need a way to fallback on mismatch. + this.fragments = toSerializedValuesArray(keys, fragments, tables, Version.LATEST); + // TODO (desired): slice TxnCondition, or pick a single shard to persist it + this.condition = new SerializedTxnCondition(condition, tables); this.condition.unmemoize(); + this.condition.deserialize(tables); this.cassandraCommitCL = cassandraCommitCL; this.preserveTimestamps = preserveTimestamps; } - private TxnUpdate(Keys keys, ByteBuffer[] fragments, AbstractSerialized condition, ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) + private TxnUpdate(TableMetadatas tables, Keys keys, ByteBuffer[] fragments, AbstractSerialized condition, ConsistencyLevel cassandraCommitCL, boolean preserveTimestamps) { + this.tables = tables; this.keys = keys; this.fragments = fragments; this.condition = condition; @@ -113,7 +119,7 @@ private TxnUpdate(Keys keys, ByteBuffer[] fragments, AbstractSerialized + public static class Update extends AbstractSerialized { - private static final long EMPTY_SIZE = ObjectSizes.measure(new Update(null, 0, ByteBufferUtil.EMPTY_BYTE_BUFFER, Version.LATEST)); + private static final long EMPTY_SIZE = ObjectSizes.measure(new Update(null, 0, ByteBufferUtil.EMPTY_BYTE_BUFFER)); public final PartitionKey key; public final int index; - public Update(PartitionKey key, int index, PartitionUpdate update) + public Update(PartitionKey key, int index, PartitionUpdate update, TableMetadatas tables) { - super(update); - this.key = key; - this.index = index; + this(key, index, serializeInternal(update, tables, Version.LATEST)); } - private Update(PartitionKey key, int index, ByteBuffer bytes, Version version) + private Update(PartitionKey key, int index, ByteBuffer latestVersionBytes) { - super(bytes, version); + super(latestVersionBytes); this.key = key; this.index = index; } @@ -103,9 +106,8 @@ private Update(PartitionKey key, int index, ByteBuffer bytes, Version version) @Override public long estimatedSizeOnHeap() { - return EMPTY_SIZE - + AccordObjectSizes.key(key) - + ByteBufferUtil.estimatedSizeOnHeap(unsafeBytes()); + // we don't measure the key, as this is shared + return EMPTY_SIZE + ByteBufferUtil.estimatedSizeOnHeap(unsafeBytes()); } @Override @@ -130,51 +132,94 @@ public String toString() return "Complete{" + "key=" + key + ", index=" + index + - ", update=" + get() + '}'; } - public AsyncChain write(boolean preserveTimestamps, long timestamp) + public AsyncChain write(TableMetadatas tables, boolean preserveTimestamps, long timestamp) { - PartitionUpdate update = get(); + PartitionUpdate update = deserialize(tables); if (!preserveTimestamps) - update = new PartitionUpdate.Builder(get(), 0).updateAllTimestamp(timestamp).build(); + update = new PartitionUpdate.Builder(update, 0).updateAllTimestamp(timestamp).build(); Mutation mutation = new Mutation(update, PotentialTxnConflicts.ALLOW); return AsyncChains.ofRunnable(Stage.MUTATION.executor(), mutation::applyUnsafe); } @Override - protected IVersionedSerializer serializer() + protected ByteBuffer serialize(PartitionUpdate value, TableMetadatas tables, Version version) + { + return serializeInternal(value, tables, version); + } + + @Override + protected ByteBuffer reserialize(ByteBuffer bytes, TableMetadatas param, Version srcVersion, Version trgVersion) { - return partitionUpdateSerializer; + return bytes; + } + + @Override + protected PartitionUpdate deserialize(TableMetadatas tables, ByteBuffer bytes, Version version) + { + return deserialize(key, tables, bytes, version); + } + + private static ByteBuffer serializeInternal(PartitionUpdate value, TableMetadatas tables, Version version) + { + try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) + { + PartitionUpdate.serializer.serializeWithoutKey(value, tables, out, version.messageVersion()); + return out.asNewBuffer(); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } + } + + private static PartitionUpdate deserialize(PartitionKey key, TableMetadatas tables, ByteBuffer bytes, Version version) + { + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + return PartitionUpdate.serializer.deserialize(key, tables, in, version.messageVersion(), FROM_REMOTE); + } + catch (IOException e) + { + throw new UncheckedIOException(e); + } } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer() { @Override - public void serialize(Update write, DataOutputPlus out, Version version) throws IOException + public void serialize(Update write, TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out, Version version) throws IOException { - PartitionKey.serializer.serialize(write.key, out); + tablesAndKeys.serializeKey(write.key, out); out.writeInt(write.index); - ByteBufferUtil.writeWithVIntLength(write.bytes(version), out); + ByteBufferUtil.writeWithVIntLength(write.bytes(tablesAndKeys.tables, version), out); + } + + ByteBuffer reserialize(ByteBuffer buffer, TableMetadatasAndKeys tablesAndKeys, Version srcVersion, Version trgVersion) + { + return buffer; } @Override - public Update deserialize(DataInputPlus in, Version version) throws IOException + public Update deserialize(TableMetadatasAndKeys tablesAndKeys, DataInputPlus in, Version version) throws IOException { - PartitionKey key = PartitionKey.serializer.deserialize(in); + PartitionKey key = tablesAndKeys.deserializeKey(in); int index = in.readInt(); ByteBuffer bytes = ByteBufferUtil.readWithVIntLength(in); - return new Update(key, index, bytes, version); + if (version != Version.LATEST) + bytes = reserialize(bytes, tablesAndKeys, version, Version.LATEST); + return new Update(key, index, bytes); } @Override - public long serializedSize(Update write, Version version) + public long serializedSize(Update write, TableMetadatasAndKeys tablesAndKeys, Version version) { long size = 0; - size += PartitionKey.serializer.serializedSize(write.key); + size += tablesAndKeys.serializedKeySize(write.key); size += TypeSizes.INT_SIZE; - size += ByteBufferUtil.serializedSizeWithVIntLength(write.bytes(version)); + size += ByteBufferUtil.serializedSizeWithVIntLength(write.bytes(tablesAndKeys.tables, version)); return size; } }; @@ -198,11 +243,6 @@ public Fragment(PartitionKey key, int index, PartitionUpdate baseUpdate, TxnRefe this.referenceOps = referenceOps; } - public Fragment(int index, PartitionUpdate baseUpdate, TxnReferenceOperations referenceOps) - { - this(PartitionKey.of(baseUpdate), index, baseUpdate, referenceOps); - } - public static int compareKeys(Fragment left, Fragment right) { return left.key.compareTo(right.key); @@ -234,15 +274,15 @@ public boolean isComplete() return referenceOps.isEmpty(); } - public Update toUpdate() + public Update toUpdate(TableMetadatas tables) { - return new Update(key, index, baseUpdate); + return new Update(key, index, baseUpdate, tables); } - public Update complete(AccordUpdateParameters parameters) + public Update complete(AccordUpdateParameters parameters, TableMetadatas tables) { if (isComplete()) - return toUpdate(); + return toUpdate(tables); DecoratedKey key = baseUpdate.partitionKey(); PartitionUpdate.Builder updateBuilder = new PartitionUpdate.Builder(baseUpdate.metadata(), @@ -263,7 +303,7 @@ public Update complete(AccordUpdateParameters parameters) if (row != null) updateBuilder.add(row); - return new Update(this.key, index, updateBuilder.build()); + return new Update(this.key, index, updateBuilder.build(), tables); } private static Columns columns(Columns current, List referenceOps) @@ -271,9 +311,20 @@ private static Columns columns(Columns current, List refe if (referenceOps.isEmpty()) return current; - Set combined = new HashSet<>(current); - referenceOps.forEach(op -> combined.add(op.receiver())); - return Columns.from(combined); + Set missing = null; + for (int i = 0, mi = referenceOps.size() ; i < mi ; ++i) + { + ColumnMetadata cm = referenceOps.get(i).receiver(); + if (!current.contains(cm)) + { + if (missing == null) + missing = new HashSet<>(); + missing.add(cm); + } + } + if (missing == null) + return current; + return current.mergeTo(Columns.from(missing)); } private static RegularAndStaticColumns columns(PartitionUpdate update, TxnReferenceOperations referenceOps) @@ -301,51 +352,50 @@ private static Row applyUpdates(Row existing, List operat return up.buildRow(); } - static final IVersionedSerializer serializer = new IVersionedSerializer<>() + static final FragmentSerializer serializer = new FragmentSerializer(); + static class FragmentSerializer { - @Override - public void serialize(Fragment fragment, DataOutputPlus out, Version version) throws IOException + public void serialize(Fragment fragment, TableMetadatas tables, DataOutputPlus out, Version version) throws IOException { - PartitionKey.serializer.serialize(fragment.key, out); out.writeUnsignedVInt32(fragment.index); - partitionUpdateSerializer.serialize(fragment.baseUpdate, out, version); - TxnReferenceOperations.serializer.serialize(fragment.referenceOps, out, version); + PartitionUpdate.serializer.serializeWithoutKey(fragment.baseUpdate, tables, out, version.messageVersion()); + TxnReferenceOperations.serializer.serialize(fragment.referenceOps, tables, out, version); } - @Override - public Fragment deserialize(DataInputPlus in, Version version) throws IOException + public Fragment deserialize(PartitionKey key, TableMetadatas tables, DataInputPlus in, Version version) throws IOException { - PartitionKey key = PartitionKey.serializer.deserialize(in); int idx = in.readUnsignedVInt32(); - PartitionUpdate baseUpdate = partitionUpdateSerializer.deserialize(in, version); - TxnReferenceOperations referenceOps = TxnReferenceOperations.serializer.deserialize(in, version); + // TODO (required): why FROM_REMOTE? + PartitionUpdate baseUpdate = PartitionUpdate.serializer.deserialize(key, tables, in, version.messageVersion(), FROM_REMOTE); + TxnReferenceOperations referenceOps = TxnReferenceOperations.serializer.deserialize(tables, in, version); return new Fragment(key, idx, baseUpdate, referenceOps); } - @Override - public long serializedSize(Fragment fragment, Version version) + public long serializedSize(Fragment fragment, TableMetadatas tables, Version version) { long size = 0; - size += PartitionKey.serializer.serializedSize(fragment.key); size += TypeSizes.sizeofUnsignedVInt(fragment.index); - size += partitionUpdateSerializer.serializedSize(fragment.baseUpdate, version); - size += TxnReferenceOperations.serializer.serializedSize(fragment.referenceOps, version); + size += PartitionUpdate.serializer.serializedSizeWithoutKey(fragment.baseUpdate, tables, version.messageVersion()); + size += TxnReferenceOperations.serializer.serializedSize(fragment.referenceOps, tables, version); return size; } - }; + } } + public final TableMetadatas tables; private final boolean isConditionMet; - private TxnWrite(Update[] items, boolean isConditionMet) + private TxnWrite(TableMetadatas tables, Update[] items, boolean isConditionMet) { super(items, Domain.Key); + this.tables = tables; this.isConditionMet = isConditionMet; } - public TxnWrite(List items, boolean isConditionMet) + public TxnWrite(TableMetadatas tables, List items, boolean isConditionMet) { super(items, Domain.Key); + this.tables = tables; this.isConditionMet = isConditionMet; } @@ -361,12 +411,6 @@ Seekable getKey(Update item) return item.key; } - @Override - Domain domain() - { - return Domain.Key; - } - @Override Update[] newArray(int size) { @@ -397,12 +441,12 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, TxnId tx boolean preserveTimestamps = txnUpdate.preserveTimestamps(); // Apply updates not specified fully by the client but built from fragments completed by data from reads. // This occurs, for example, when an UPDATE statement uses a value assigned by a LET statement. - forEachWithKey((PartitionKey) key, write -> results.add(write.write(preserveTimestamps, timestamp))); + forEachWithKey(key, write -> results.add(write.write(tables, preserveTimestamps, timestamp))); // Apply updates that are fully specified by the client and not reliant on data from reads. // ex. INSERT INTO tbl (a, b, c) VALUES (1, 2, 3) // These updates are persisted only in TxnUpdate and not in TxnWrite to avoid duplication. List updates = txnUpdate.completeUpdatesForKey((RoutableKey) key); - updates.forEach(write -> results.add(write.write(preserveTimestamps, timestamp))); + updates.forEach(write -> results.add(write.write(tables, preserveTimestamps, timestamp))); } if (results.isEmpty()) @@ -411,7 +455,7 @@ public AsyncChain apply(Seekable key, SafeCommandStore safeStore, TxnId tx if (results.size() == 1) return results.get(0).flatMap(o -> Writes.SUCCESS); - return AsyncChains.allOf(results).flatMap(objects -> Writes.SUCCESS); + return AsyncChains.reduce(results, (i1, i2) -> null, (Void)null).flatMap(ignore -> Writes.SUCCESS); } public long estimatedSizeOnHeap() @@ -422,26 +466,30 @@ public long estimatedSizeOnHeap() return size; } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final ParameterisedVersionedSerializer serializer = new ParameterisedVersionedSerializer<>() { @Override - public void serialize(TxnWrite write, DataOutputPlus out, Version version) throws IOException + public void serialize(TxnWrite write, Seekables keys, DataOutputPlus out, Version version) throws IOException { + write.tables.serializeSelf(out); BooleanSerializer.serializer.serialize(write.isConditionMet, out); - serializeArray(write.items, out, version, Update.serializer); + serializeArray(write.items, new TableMetadatasAndKeys(write.tables, keys), out, version, Update.serializer); } @Override - public TxnWrite deserialize(DataInputPlus in, Version version) throws IOException + public TxnWrite deserialize(Seekables keys, DataInputPlus in, Version version) throws IOException { + TableMetadatas tables = TableMetadatas.deserializeSelf(in); boolean isConditionMet = BooleanSerializer.serializer.deserialize(in); - return new TxnWrite(deserializeArray(in, version, Update.serializer, Update[]::new), isConditionMet); + return new TxnWrite(tables, deserializeArray(new TableMetadatasAndKeys(tables, keys), in, version, Update.serializer, Update[]::new), isConditionMet); } @Override - public long serializedSize(TxnWrite write, Version version) + public long serializedSize(TxnWrite write, Seekables keys, Version version) { - return BooleanSerializer.serializer.serializedSize(write.isConditionMet) + serializedArraySize(write.items, version, Update.serializer); + return write.tables.serializedSelfSize() + + BooleanSerializer.serializer.serializedSize(write.isConditionMet) + + serializedArraySize(write.items, new TableMetadatasAndKeys(write.tables, keys), version, Update.serializer); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java index b1193179b9a6..7167a637ed2c 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/UnrecoverableRepairUpdate.java @@ -40,6 +40,7 @@ import org.apache.cassandra.locator.Endpoints; import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaPlan; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.service.reads.repair.BlockingReadRepair; @@ -182,18 +183,18 @@ public void runBRR(ReadCoordinator readCoordinator) public static final AccordUpdateSerializer serializer = new AccordUpdateSerializer<>() { @Override - public void serialize(UnrecoverableRepairUpdate update, DataOutputPlus out, Version version) + public void serialize(UnrecoverableRepairUpdate update, TableMetadatasAndKeys tablesAndKeys, DataOutputPlus out, Version version) { } @Override - public UnrecoverableRepairUpdate deserialize(DataInputPlus in, Version version) + public UnrecoverableRepairUpdate deserialize(TableMetadatasAndKeys tablesAndKeys, DataInputPlus in, Version version) { return null; } @Override - public long serializedSize(UnrecoverableRepairUpdate update, Version version) + public long serializedSize(UnrecoverableRepairUpdate update, TableMetadatasAndKeys tablesAndKeys, Version version) { return 0; } diff --git a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java index 362bf917d3dd..b3291994db30 100644 --- a/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java +++ b/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java @@ -32,7 +32,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.primitives.Keys; import accord.primitives.Routable.Domain; import accord.primitives.Txn; import org.apache.cassandra.db.ColumnFamilyStore; @@ -53,6 +52,8 @@ import org.apache.cassandra.service.accord.IAccordService; import org.apache.cassandra.service.accord.IAccordService.IAccordResult; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnCondition; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; @@ -242,24 +243,37 @@ public static IAccordResult mutateWithAccordAsync(ClusterMetadata cm, { if (consistencyLevel != null && !IAccordService.SUPPORTED_COMMIT_CONSISTENCY_LEVELS.contains(consistencyLevel)) throw new InvalidRequestException(consistencyLevel + " is not supported by Accord"); + + TableMetadatas tables; + { + TableMetadatas.Collector tableCollector = new TableMetadatas.Collector(); + for (IMutation mutation : mutations) + { + for (TableId tableId : mutation.getTableIds()) + tableCollector.add(cm.schema.getTableMetadata(tableId)); + } + tables = tableCollector.build(); + } + + TableMetadatasAndKeys.KeyCollector keyCollector = new TableMetadatasAndKeys.KeyCollector(tables); + int fragmentIndex = 0; List fragments = new ArrayList<>(mutations.size()); - List partitionKeys = new ArrayList<>(mutations.size()); long minEpoch = Epoch.EMPTY.getEpoch(); for (IMutation mutation : mutations) { for (PartitionUpdate update : mutation.getPartitionUpdates()) { - PartitionKey pk = PartitionKey.of(update); - partitionKeys.add(pk); + PartitionKey pk = keyCollector.collect(update.metadata(), update.partitionKey()); minEpoch = Math.max(minEpoch, update.metadata().epoch.getEpoch()); - fragments.add(new TxnWrite.Fragment(PartitionKey.of(update), fragmentIndex++, update, TxnReferenceOperations.empty())); + fragments.add(new TxnWrite.Fragment(pk, fragmentIndex++, update, TxnReferenceOperations.empty())); } } // Potentially ignore commit consistency level if the TransactionalMode specifies full ConsistencyLevel clForCommit = consistencyLevelForCommit(cm, mutations, consistencyLevel); - TxnUpdate update = new TxnUpdate(fragments, TxnCondition.none(), clForCommit, true); - Txn.InMemory txn = new Txn.InMemory(Keys.of(partitionKeys), TxnRead.empty(Domain.Key), TxnQuery.NONE, update); + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(tables, keyCollector.build()); + TxnUpdate update = new TxnUpdate(tables, fragments, TxnCondition.none(), clForCommit, true); + Txn.InMemory txn = new Txn.InMemory(tablesAndKeys.keys, TxnRead.empty(Domain.Key), TxnQuery.NONE, update, tablesAndKeys); return AccordService.instance().coordinateAsync(minEpoch, txn, clForCommit, requestTime); } diff --git a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java index 9e82583aafae..ee66574b016e 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java +++ b/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java @@ -48,6 +48,8 @@ import org.apache.cassandra.metrics.ReadRepairMetrics; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; import org.apache.cassandra.service.accord.txn.TxnResult; @@ -221,10 +223,10 @@ private void repairViaAccordTransaction(DecoratedKey dk, Map checkState(coordinator.isEventuallyConsistent(), "Should only repair transactionally for an eventually consistent read coordinator"); ReadRepairMetrics.repairedBlockingViaAccord.mark(); PartitionKey partitionKey = new PartitionKey(command.metadata().id, dk); - Keys key = Keys.of(partitionKey); + Keys keys = Keys.of(partitionKey); // This is going create a new BlockingReadRepair inside an Accord transaction which will go down // the !isEventuallyConsistent path and apply the repairs through Accord command stores using AccordInteropExecution - UnrecoverableRepairUpdate repairUpdate = new UnrecoverableRepairUpdate(AccordService.instance().nodeId(), this, key, dk, accordMutations, writePlan); + UnrecoverableRepairUpdate repairUpdate = new UnrecoverableRepairUpdate(AccordService.instance().nodeId(), this, keys, dk, accordMutations, writePlan); /* * The motivation for using a read to apply read repair is that we want to apply the writes in the execute phase @@ -242,7 +244,8 @@ private void repairViaAccordTransaction(DecoratedKey dk, Map * since overlapping non-transactional writes with transactional reads will never be deterministic, but it combines * the two things into the same mechanism and we can't tell the origin of the writes needing read repair anyways. */ - Txn txn = new Txn.InMemory(Txn.Kind.Read, key, TxnRead.createNoOpRead(key), TxnQuery.NONE, repairUpdate); + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(TableMetadatas.of(command.metadata()), keys); + Txn txn = new Txn.InMemory(Txn.Kind.Read, keys, TxnRead.createNoOpRead(keys), TxnQuery.NONE, repairUpdate, tablesAndKeys); Future repairFuture = Stage.ACCORD_MIGRATION.submit(() -> AccordService.instance().coordinate(command.metadata().epoch.getEpoch(), txn, ConsistencyLevel.ANY, requestTime)); repairs.add(new PendingPartitionRepair() diff --git a/src/java/org/apache/cassandra/utils/ArraySerializers.java b/src/java/org/apache/cassandra/utils/ArraySerializers.java index 5f40dac7ef70..cad27d68fe1d 100644 --- a/src/java/org/apache/cassandra/utils/ArraySerializers.java +++ b/src/java/org/apache/cassandra/utils/ArraySerializers.java @@ -23,6 +23,7 @@ import org.apache.cassandra.io.AsymmetricVersionedSerializer; import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -52,6 +53,13 @@ public static void serializeArray(T[] items, DataOutputPlus out, Ve serializer.serialize(item, out, version); } + public static void serializeArray(T[] items, P p, DataOutputPlus out, Version version, ParameterisedVersionedSerializer serializer) throws IOException + { + out.writeUnsignedVInt32(items.length); + for (T item : items) + serializer.serialize(item, p, out, version); + } + public static T[] deserializeArray(DataInputPlus in, UnversionedSerializer serializer, IntFunction arrayFactory) throws IOException { int size = in.readUnsignedVInt32(); @@ -79,6 +87,15 @@ public static T[] deserializeArray(DataInputPlus in, Version versio return items; } + public static T[] deserializeArray(P p, DataInputPlus in, Version version, ParameterisedVersionedSerializer serializer, IntFunction arrayFactory) throws IOException + { + int size = in.readUnsignedVInt32(); + T[] items = arrayFactory.apply(size); + for (int i = 0; i < size; i++) + items[i] = serializer.deserialize(p, in, version); + return items; + } + public static long serializedArraySize(T[] array, UnversionedSerializer serializer) { long size = sizeofUnsignedVInt(array.length); @@ -102,4 +119,12 @@ public static long serializedArraySize(T[] array, Version version, size += serializer.serializedSize(item, version); return size; } + + public static long serializedArraySize(T[] array, P p, Version version, ParameterisedVersionedSerializer serializer) + { + long size = sizeofUnsignedVInt(array.length); + for (T item : array) + size += serializer.serializedSize(item, p, version); + return size; + } } diff --git a/src/java/org/apache/cassandra/utils/CollectionSerializers.java b/src/java/org/apache/cassandra/utils/CollectionSerializers.java index 2b61377bb874..81173bdf1e4c 100644 --- a/src/java/org/apache/cassandra/utils/CollectionSerializers.java +++ b/src/java/org/apache/cassandra/utils/CollectionSerializers.java @@ -34,8 +34,12 @@ import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.IPartitionerDependentSerializer; +import org.apache.cassandra.io.AsymmetricParameterisedUnversionedSerializer; +import org.apache.cassandra.io.AsymmetricParameterisedVersionedSerializer; import org.apache.cassandra.io.AsymmetricVersionedSerializer; import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.ParameterisedUnversionedSerializer; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; import org.apache.cassandra.io.VersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -68,6 +72,20 @@ public static void serializeCollection(Collection values, DataOu valueSerializer.serialize(value, out, version); } + public static void serializeCollection(Collection values, P p, DataOutputPlus out, Version version, AsymmetricParameterisedVersionedSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(values.size()); + for (V value : values) + valueSerializer.serialize(value, p, out, version); + } + + public static void serializeCollection(Collection values, P p, DataOutputPlus out, AsymmetricParameterisedUnversionedSerializer valueSerializer) throws IOException + { + out.writeUnsignedVInt32(values.size()); + for (V value : values) + valueSerializer.serialize(value, p, out); + } + public static void serializeCollection(Collection values, DataOutputPlus out, Version version, MetadataSerializer valueSerializer) throws IOException { out.writeUnsignedVInt32(values.size()); @@ -90,6 +108,22 @@ public static > void serializeList(L values, DataOutputPlus valueSerializer.serialize(values.get(i), out); } + public static > void serializeList(L values, P p, DataOutputPlus out, ParameterisedUnversionedSerializer valueSerializer) throws IOException + { + int size = values.size(); + out.writeUnsignedVInt32(size); + for (int i = 0 ; i < size ; ++i) + valueSerializer.serialize(values.get(i), p, out); + } + + public static > void serializeList(L values, P p, DataOutputPlus out, Version version, ParameterisedVersionedSerializer valueSerializer) throws IOException + { + int size = values.size(); + out.writeUnsignedVInt32(size); + for (int i = 0 ; i < size ; ++i) + valueSerializer.serialize(values.get(i), p, out, version); + } + public static > void serializeList(L values, DataOutputPlus out, int version, IVersionedSerializer valueSerializer) throws IOException { int size = values.size(); @@ -188,6 +222,16 @@ public static List deserializeList(DataInputPlus in, Version ver return deserializeCollection(in, version, serializer, newArrayList()); } + public static List deserializeList(P p, DataInputPlus in, Version version, AsymmetricParameterisedVersionedSerializer serializer) throws IOException + { + return deserializeCollection(p, in, version, serializer, newArrayList()); + } + + public static List deserializeList(P p, DataInputPlus in, AsymmetricParameterisedUnversionedSerializer serializer) throws IOException + { + return deserializeCollection(p, in, serializer, newArrayList()); + } + public static List deserializeList(DataInputPlus in, Version version, MetadataSerializer serializer) throws IOException { return deserializeCollection(in, version, serializer, newArrayList()); @@ -340,6 +384,22 @@ public static long serializedCollectionSize(Collection values, V return size; } + public static long serializedCollectionSize(P p, Collection values, Version version, AsymmetricParameterisedVersionedSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(values.size()); + for (V value : values) + size += valueSerializer.serializedSize(value, p, version); + return size; + } + + public static long serializedCollectionSize(Collection values, P p, AsymmetricParameterisedUnversionedSerializer valueSerializer) + { + long size = sizeofUnsignedVInt(values.size()); + for (V value : values) + size += valueSerializer.serializedSize(value, p); + return size; + } + public static long serializedCollectionSize(Collection values, Version version, MetadataSerializer valueSerializer) { long size = sizeofUnsignedVInt(values.size()); @@ -375,6 +435,15 @@ public static , Version> long serializedListSize(L values, return size; } + public static , Version> long serializedListSize(L values, P p, Version version, AsymmetricParameterisedVersionedSerializer valueSerializer) + { + int items = values.size(); + long size = sizeofUnsignedVInt(items); + for (int i = 0 ; i < items ; ++i) + size += valueSerializer.serializedSize(values.get(i), p, version); + return size; + } + public static > long serializedListSize(L values, Version version, MetadataSerializer valueSerializer) { int items = values.size(); @@ -493,6 +562,24 @@ private static , Version> C deserializeCollec return result; } + private static , Version> C deserializeCollection(P p, DataInputPlus in, Version version, AsymmetricParameterisedVersionedSerializer serializer, IntFunction factory) throws IOException + { + int size = in.readUnsignedVInt32(); + C result = factory.apply(size); + while (size-- > 0) + result.add(serializer.deserialize(p, in, version)); + return result; + } + + private static , Version> C deserializeCollection(P p, DataInputPlus in, AsymmetricParameterisedUnversionedSerializer serializer, IntFunction factory) throws IOException + { + int size = in.readUnsignedVInt32(); + C result = factory.apply(size); + while (size-- > 0) + result.add(serializer.deserialize(p, in)); + return result; + } + private static > C deserializeCollection(DataInputPlus in, Version version, MetadataSerializer serializer, IntFunction factory) throws IOException { int size = in.readUnsignedVInt32(); diff --git a/src/java/org/apache/cassandra/utils/FastByteOperations.java b/src/java/org/apache/cassandra/utils/FastByteOperations.java index 2a86712951d2..358d4993ddd8 100644 --- a/src/java/org/apache/cassandra/utils/FastByteOperations.java +++ b/src/java/org/apache/cassandra/utils/FastByteOperations.java @@ -396,9 +396,9 @@ public static int compareTo(Object buffer1, long memoryOffset1, int length1, if (lw != rw) { if (BIG_ENDIAN) - return UnsignedLongs.compare(lw, rw); + return Long.compareUnsigned(lw, rw); - return UnsignedLongs.compare(Long.reverseBytes(lw), Long.reverseBytes(rw)); + return Long.compareUnsigned(Long.reverseBytes(lw), Long.reverseBytes(rw)); } } diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java index 8586a0ba08da..14d1587c5bd6 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java @@ -98,7 +98,8 @@ public static void setUp() throws Throwable FieldUtil.transferFields(new CommandSerializers.QuerySerializers(BurnTestKeySerializers.read, BurnTestKeySerializers.query, BurnTestKeySerializers.update, - BurnTestKeySerializers.write), + BurnTestKeySerializers.write, + BurnTestKeySerializers.tablesAndKeys), CommandSerializers.class); FieldUtil.transferFields(new DepsSerializers.Impl(BurnTestKeySerializers.range), diff --git a/test/distributed/org/apache/cassandra/service/accord/BurnTestKeySerializers.java b/test/distributed/org/apache/cassandra/service/accord/BurnTestKeySerializers.java index 7e732566e026..b11191219f40 100644 --- a/test/distributed/org/apache/cassandra/service/accord/BurnTestKeySerializers.java +++ b/test/distributed/org/apache/cassandra/service/accord/BurnTestKeySerializers.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; - import java.io.IOException; import java.util.Map; import java.util.function.Function; @@ -41,16 +40,15 @@ import accord.primitives.Range; import accord.primitives.Seekables; import accord.primitives.TxnId; -import org.apache.cassandra.io.EmbeddedAsymmetricVersionedSerializer; +import org.apache.cassandra.io.ParameterisedVersionedSerializer; import org.apache.cassandra.io.UnversionedSerializer; -import org.apache.cassandra.io.VersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.api.AccordRoutableKey; import org.apache.cassandra.service.accord.api.AccordRoutableKey.AccordSearchableKeySerializer; import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import org.apache.cassandra.service.accord.serializers.IVersionedSerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.utils.CastingSerializer; @@ -240,10 +238,10 @@ public long serializedSize(PrefixedIntHashKey.Range t) } }; - public static final VersionedSerializer read = CastingSerializer.create(ListRead.class, new IVersionedSerializer<>() + public static final ParameterisedVersionedSerializer read = (ParameterisedVersionedSerializer) new ParameterisedVersionedSerializer() { @Override - public void serialize(ListRead t, DataOutputPlus out, Version version) throws IOException + public void serialize(ListRead t, TableMetadatasAndKeys seekables, DataOutputPlus out, Version version) throws IOException { out.writeBoolean(t.isEphemeralRead); KeySerializers.seekables.serialize(t.userReadKeys, out); @@ -251,7 +249,7 @@ public void serialize(ListRead t, DataOutputPlus out, Version version) throws IO } @Override - public ListRead deserialize(DataInputPlus in, Version version) throws IOException + public ListRead deserialize(TableMetadatasAndKeys seekables, DataInputPlus in, Version version) throws IOException { boolean isEphemeralRead = in.readBoolean(); Seekables userReadKeys = KeySerializers.seekables.deserialize(in); @@ -260,11 +258,11 @@ public ListRead deserialize(DataInputPlus in, Version version) throws IOExceptio } @Override - public long serializedSize(ListRead t, Version version) + public long serializedSize(ListRead t, TableMetadatasAndKeys seekables, Version version) { throw new RuntimeException("not implemented"); } - }); + }; public static final UnversionedSerializer query = CastingSerializer.create(ListQuery.class, new UnversionedSerializer<>() { @@ -305,9 +303,9 @@ public long serializedSize(ListQuery t) } }); - public static final VersionedSerializer update = CastingSerializer.create(ListUpdate.class, new IVersionedSerializer<>() + public static final ParameterisedVersionedSerializer update = (ParameterisedVersionedSerializer) new ParameterisedVersionedSerializer() { - public void serialize(ListUpdate t, DataOutputPlus out, Version version) throws IOException + public void serialize(ListUpdate t, TableMetadatasAndKeys seekables, DataOutputPlus out, Version version) throws IOException { out.writeInt(t.size()); for (Map.Entry e : t.entrySet()) @@ -317,7 +315,7 @@ public void serialize(ListUpdate t, DataOutputPlus out, Version version) throws } } - public ListUpdate deserialize(DataInputPlus in, Version version) throws IOException + public ListUpdate deserialize(TableMetadatasAndKeys seekables, DataInputPlus in, Version version) throws IOException { int size = in.readInt(); ListUpdate listUpdate = new ListUpdate(Function.identity()); @@ -330,15 +328,15 @@ public ListUpdate deserialize(DataInputPlus in, Version version) throws IOExcept return listUpdate; } - public long serializedSize(ListUpdate t, Version version) + public long serializedSize(ListUpdate t, TableMetadatasAndKeys seekables, Version version) { throw new RuntimeException("not implemented"); } - }); + }; - public static final VersionedSerializer write = CastingSerializer.create(ListWrite.class, new IVersionedSerializer<>() + public static final ParameterisedVersionedSerializer write = (ParameterisedVersionedSerializer) new ParameterisedVersionedSerializer() { - public void serialize(ListWrite t, DataOutputPlus out, Version version) throws IOException + public void serialize(ListWrite t, Seekables seekables, DataOutputPlus out, Version version) throws IOException { out.writeInt(t.size()); for (Map.Entry e : t.entrySet()) @@ -350,7 +348,7 @@ public void serialize(ListWrite t, DataOutputPlus out, Version version) throws I } } - public ListWrite deserialize(DataInputPlus in, Version version) throws IOException + public ListWrite deserialize(Seekables seekables, DataInputPlus in, Version version) throws IOException { int size = in.readInt(); ListWrite write = new ListWrite(Function.identity()); @@ -366,16 +364,34 @@ public ListWrite deserialize(DataInputPlus in, Version version) throws IOExcepti return write; } - public long serializedSize(ListWrite t, Version version) + public long serializedSize(ListWrite t, Seekables seekables, Version version) { throw new RuntimeException("not implemented"); } - }); + }; - public static final UnversionedSerializer result = CastingSerializer.create(ListResult.class, new UnversionedSerializer<>() + public static final UnversionedSerializer tablesAndKeys = new UnversionedSerializer<>() { - private final EmbeddedAsymmetricVersionedSerializer unversionedUpdate = AccordSerializers.embedded(Version.LATEST, update); + @Override + public void serialize(TableMetadatasAndKeys t, DataOutputPlus out) throws IOException + { + } + @Override + public TableMetadatasAndKeys deserialize(DataInputPlus in) throws IOException + { + return null; + } + + @Override + public long serializedSize(TableMetadatasAndKeys t) + { + return 0; + } + }; + + public static final UnversionedSerializer result = CastingSerializer.create(ListResult.class, new UnversionedSerializer<>() + { public void serialize(ListResult t, DataOutputPlus out) throws IOException { TopologySerializers.nodeId.serialize(t.client, out); @@ -395,7 +411,7 @@ public void serialize(ListResult t, DataOutputPlus out) throws IOException out.writeInt(t.update == null ? 0 : 1); if (t.update != null) - unversionedUpdate.serialize(t.update, out); + update.serialize(t.update, null, out, Version.LATEST); out.writeInt(t.status.ordinal()); } @@ -417,11 +433,11 @@ public ListResult deserialize(DataInputPlus in) throws IOException } read[i] = v; } - ListUpdate update = null; + ListUpdate upd = null; if (in.readInt() != 0) - update = (ListUpdate) unversionedUpdate.deserialize(in); + upd = (ListUpdate) update.deserialize(null, in, Version.LATEST); ListResult.Status status = ListResult.Status.values()[in.readInt()]; - return new ListResult(status, client, requestId, txnId, readKeys, responseKeys, read, update); + return new ListResult(status, client, requestId, txnId, readKeys, responseKeys, read, upd); } public long serializedSize(ListResult t) diff --git a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java index 510cc622cbaf..ea19dd381d87 100644 --- a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java +++ b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java @@ -61,16 +61,16 @@ import org.apache.cassandra.io.Serializers; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.ColumnMetadata; -import org.apache.cassandra.schema.Schema; -import org.apache.cassandra.schema.SchemaProvider; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.AbstractTypeGenerators.TypeKind; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.CassandraGenerators; import org.apache.cassandra.utils.Generators; +import org.apache.cassandra.utils.Pair; import org.apache.cassandra.utils.TimeUUID; import org.assertj.core.api.Assertions; -import org.mockito.Mockito; import org.quicktheories.generators.SourceDSL; import static accord.utils.Property.qt; @@ -184,7 +184,7 @@ private static Row newRow(ColumnMetadata definition, Map private static boolean appliesSimpleCondition(ByteBuffer rowValue, Operator op, ByteBuffer conditionValue) { ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", Int32Type.instance, ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Terms terms = Terms.of(new Constants.Value(conditionValue)); ColumnCondition condition = new ColumnCondition(column, op, terms); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); @@ -195,7 +195,7 @@ private static boolean appliesListCondition(List rowValue, Operator { ListType type = ListType.getInstance(Int32Type.instance, true); ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", type, ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Term term = conditionValue == null ? Constants.NULL_VALUE : new MultiElements.Value(type, conditionValue); ColumnCondition condition = new ColumnCondition(column, op, Terms.of(term)); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); @@ -205,7 +205,7 @@ private static boolean appliesListCondition(List rowValue, Operator private static boolean conditionContainsApplies(List rowValue, Operator op, ByteBuffer conditionValue) { ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", ListType.getInstance(Int32Type.instance, true), ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Terms terms = Terms.of(new Constants.Value(conditionValue)); ColumnCondition condition = new ColumnCondition(column, op, terms); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); @@ -215,7 +215,7 @@ private static boolean conditionContainsApplies(List rowValue, Opera private static boolean conditionContainsApplies(Map rowValue, Operator op, ByteBuffer conditionValue) { ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", MapType.getInstance(Int32Type.instance, Int32Type.instance, true), ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Terms terms = Terms.of(new Constants.Value(conditionValue)); ColumnCondition condition = new ColumnCondition(column, op, terms); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); @@ -226,7 +226,7 @@ private static boolean appliesSetCondition(SortedSet rowValue, Opera { SetType type = SetType.getInstance(Int32Type.instance, true); ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", type, ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Term term = conditionValue == null ? Constants.NULL_VALUE : new MultiElements.Value(type, new ArrayList<>(conditionValue)); ColumnCondition condition = new ColumnCondition(column, op, Terms.of(term)); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); @@ -236,7 +236,7 @@ private static boolean appliesSetCondition(SortedSet rowValue, Opera private static boolean conditionContainsApplies(SortedSet rowValue, Operator op, ByteBuffer conditionValue) { ColumnMetadata definition = ColumnMetadata.regularColumn("ks", "cf", "c", SetType.getInstance(Int32Type.instance, true), ColumnMetadata.NO_UNIQUE_ID); - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); Terms terms = Terms.of(new Constants.Value(conditionValue)); ColumnCondition condition = new ColumnCondition(column, op, terms); @@ -255,7 +255,7 @@ private boolean conditionUDTApplies(ByteBuffer rowValue, Operator op, ByteBuffer private boolean conditionUDTApplies(UserType ut, ByteBuffer rowValue, Operator op, ByteBuffer conditionValue) { ColumnMetadata column = ColumnMetadata.regularColumn(KEYSPACE, "tbl", "c", ut, ColumnMetadata.NO_UNIQUE_ID); - ColumnCondition.ElementOrFieldAccessBound bounds = new ColumnCondition.ElementOrFieldAccessBound(column, UDT_FIELD_A.bytes, op, conditionValue); + ColumnCondition.ElementOrFieldAccessBound bounds = new ColumnCondition.ElementOrFieldAccessBound(column, null, UDT_FIELD_A.bytes, op, conditionValue); Row row; if (ut.isMultiCell()) { @@ -304,7 +304,7 @@ private static boolean appliesMapCondition(Map rowValue, } term = new MultiElements.Value(type, value); } - ColumnsExpression column = ColumnsExpression.singleColumn(definition); + ColumnsExpression column = ColumnsExpression.singleColumn(definition, null); ColumnCondition condition = new ColumnCondition(column, op, Terms.of(term)); ColumnCondition.Bound bound = condition.bind(QueryOptions.DEFAULT); return bound.appliesTo(newRow(definition, rowValue)); @@ -905,9 +905,8 @@ public void serde() { DataOutputBuffer out = new DataOutputBuffer(); qt().forAll(boundGen()).check(bounds -> { - Schema.instance = Mockito.mock(SchemaProvider.class); - Mockito.when(Schema.instance.getColumnMetadata(Mockito.eq(bounds.column.ksName), Mockito.eq(bounds.column.cfName), Mockito.eq(bounds.column.name.bytes))).thenReturn(bounds.column); - Serializers.testSerde(out, ColumnCondition.Bound.serializer, bounds); + TableMetadatas tables = TableMetadatas.of(bounds.table); + Serializers.testSerde(out, ColumnCondition.Bound.serializer, bounds, tables); }); } @@ -925,9 +924,14 @@ private static org.quicktheories.core.Gen selectColumnKinds return SourceDSL.arbitrary().enumValues(ColumnMetadata.Kind.class); } - private static ColumnMetadata createColumnMetadata(RandomSource rs, ColumnCondition.BoundKind kind) + private static Pair createColumnMetadata(RandomSource rs, ColumnCondition.BoundKind kind) { - return columnMetadataGen(kind).next(rs); + ColumnMetadata cm = columnMetadataGen(kind).next(rs); + TableMetadata.Builder tmb = TableMetadata.builder(cm.ksName, cm.cfName).addColumn(cm); + tmb.addPartitionKeyColumn("", Int32Type.instance); + TableMetadata tm = tmb.build(); + cm = tm.getColumn(cm.name); + return Pair.create(cm, tm); } private static org.quicktheories.core.Gen> selectTypes(ColumnCondition.BoundKind kind) @@ -960,18 +964,18 @@ public static Gen boundGen() return rs -> { ColumnCondition.BoundKind kind = kindGen.next(rs); - ColumnMetadata metadata = createColumnMetadata(rs, kind); + Pair column = createColumnMetadata(rs, kind); Operator operator = operatorGen.next(rs); ByteBuffer value = valueGen.next(rs); switch (kind) { // A condition on a single non-collection column. - case Simple: return new ColumnCondition.SimpleBound(metadata, operator, value); + case Simple: return new ColumnCondition.SimpleBound(column.left, column.right, operator, value); // A condition on a multicell column. // assert column.type.isMultiCell(); - case MultiCell: return new ColumnCondition.MultiCellBound(metadata, operator, value); + case MultiCell: return new ColumnCondition.MultiCellBound(column.left, column.right, operator, value); // The map key, list index or UDT fieldname. - case ElementOrFieldAccess: return new ColumnCondition.ElementOrFieldAccessBound(metadata, Generators.toGen(AbstractTypeGenerators.elementAccess(metadata.type).bytesGen()).next(rs), operator, value); + case ElementOrFieldAccess: return new ColumnCondition.ElementOrFieldAccessBound(column.left, column.right, Generators.toGen(AbstractTypeGenerators.elementAccess(column.left.type).bytesGen()).next(rs), operator, value); default: throw new UnsupportedOperationException(kind.name()); } }; diff --git a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java index a8fda344bc30..3229e8ecd785 100644 --- a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java +++ b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java @@ -1644,7 +1644,7 @@ private static TableMetadata newTableMetadata(Sort... sorts) private static Restriction newSingleRestriction(TableMetadata tableMetadata, int index, Operator operator, ByteBuffer... values) { ColumnMetadata column = getClusteringColumnDefinition(tableMetadata, index); - return new SimpleRestriction(ColumnsExpression.singleColumn(column), operator, toTerms(values)); + return new SimpleRestriction(ColumnsExpression.singleColumn(column, tableMetadata), operator, toTerms(values)); } /** @@ -1666,7 +1666,7 @@ private static Restriction newMultiEq(TableMetadata tableMetadata, int firstInde types.add(column.type); } TupleType tupleType = new TupleType(types); - return new SimpleRestriction(ColumnsExpression.multiColumns(columns), + return new SimpleRestriction(ColumnsExpression.multiColumns(columns, tableMetadata), Operator.EQ, Terms.of(new MultiElements.Value(tupleType, asList(values)))); } @@ -1699,7 +1699,7 @@ private static Restriction newMultiIN(TableMetadata tableMetadata, int firstInde { terms.add(new MultiElements.Value(tupleType, values[i])); } - return new SimpleRestriction(ColumnsExpression.multiColumns(columns), Operator.IN, Terms.of(terms)); + return new SimpleRestriction(ColumnsExpression.multiColumns(columns, tableMetadata), Operator.IN, Terms.of(terms)); } /** @@ -1734,7 +1734,7 @@ private static Restriction newMultiSlice(TableMetadata tableMetadata, int firstI types.add(column.type); } TupleType type = new TupleType(types); - return new SimpleRestriction(ColumnsExpression.multiColumns(columns), + return new SimpleRestriction(ColumnsExpression.multiColumns(columns, tableMetadata), operator, Terms.of(new MultiElements.Value(type, asList(values)))); } diff --git a/test/unit/org/apache/cassandra/io/Serializers.java b/test/unit/org/apache/cassandra/io/Serializers.java index 16b56b9c394c..829d51c395f5 100644 --- a/test/unit/org/apache/cassandra/io/Serializers.java +++ b/test/unit/org/apache/cassandra/io/Serializers.java @@ -48,6 +48,28 @@ public static void testSerde(DataOutputBuffer output, AsymmetricUnversionedS Assertions.assertThat(buffer.remaining()).describedAs("skip did not consume all the serialized input").isEqualTo(0); } + public static void testSerde(DataOutputBuffer output, ParameterisedUnversionedSerializer serializer, T input, P p) throws IOException + { + output.clear(); + long expectedSize = serializer.serializedSize(input, p); + serializer.serialize(input, p, output); + Assertions.assertThat(output.getLength()).describedAs("The serialized size and bytes written do not match").isEqualTo(expectedSize); + DataInputBuffer in = new DataInputBuffer(output.unsafeGetBufferAndFlip(), false); + T read = serializer.deserialize(p, in); + Assertions.assertThat(read).describedAs("The deserialized output does not match the serialized input; difference %s", new LazyToString(() -> ReflectionUtils.recursiveEquals(read, input).toString())).isEqualTo(input); + } + + public static void testSerde(DataOutputBuffer output, ParameterisedVersionedSerializer serializer, T input, P p, Version version) throws IOException + { + output.clear(); + long expectedSize = serializer.serializedSize(input, p, version); + serializer.serialize(input, p, output, version); + Assertions.assertThat(output.getLength()).describedAs("The serialized size and bytes written do not match").isEqualTo(expectedSize); + DataInputBuffer in = new DataInputBuffer(output.unsafeGetBufferAndFlip(), false); + T read = serializer.deserialize(p, in, version); + Assertions.assertThat(read).describedAs("The deserialized output does not match the serialized input; difference %s", new LazyToString(() -> ReflectionUtils.recursiveEquals(read, input).toString())).isEqualTo(input); + } + public static void testSerde(AsymmetricUnversionedSerializer serializer, T input) throws IOException { try (DataOutputBuffer output = new DataOutputBuffer(Math.toIntExact(serializer.serializedSize(input)))) @@ -93,4 +115,12 @@ public static void testSerde(AsymmetricVersionedSerializer void testSerde(ParameterisedVersionedSerializer serializer, T input, P param, Version version) throws IOException + { + try (DataOutputBuffer output = new DataOutputBuffer(Math.toIntExact(serializer.serializedSize(input, param, version)))) + { + testSerde(output, serializer, input, param, version); + } + } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 1bb8614ca4cc..397bc8fc28fd 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -98,6 +98,8 @@ import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; @@ -302,7 +304,10 @@ public static Txn createWriteTxn(int key) public static Txn createTxn(Txn.Kind kind, Seekables seekables) { - return new Txn.InMemory(kind, seekables, TxnRead.empty(seekables.domain()), TxnQuery.NONE, null); + TableMetadatas.Collector tables = new TableMetadatas.Collector(); + for (Seekable seekable : seekables) + tables.add(TableMetadata.minimal("", "", (TableId)seekable.prefix())); + return new Txn.InMemory(kind, seekables, TxnRead.empty(seekables.domain()), TxnQuery.NONE, null, new TableMetadatasAndKeys(tables.build(), seekables)); } public static Ranges fullRange(Txn txn) @@ -319,8 +324,10 @@ public static Ranges fullRange(Seekables keys) public static PartialTxn createPartialTxn(int key) { Txn txn = createTxn(key, key); - Ranges ranges = fullRange(txn); - return new PartialTxn.InMemory(txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update()); + TableMetadatas.Collector tables = new TableMetadatas.Collector(); + for (Seekable seekable : txn.keys()) + tables.add(TableMetadata.minimal("", "", (TableId)seekable.prefix())); + return new PartialTxn.InMemory(txn.kind(), txn.keys(), txn.read(), txn.query(), txn.update(), new TableMetadatasAndKeys(tables.build(), txn.keys())); } public static AccordCommandStore createAccordCommandStore( diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 01299f4e586e..90241b747560 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -201,7 +201,7 @@ ICommand.Builder builder() if (saveStatus.known.outcome() == Known.Outcome.Apply) { if (txnId.is(Kind.Write)) - builder.writes(new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true))); + builder.writes(new Writes(txnId, executeAt, txn.keys(), new TxnWrite(TableMetadatas.none(), Collections.emptyList(), true))); builder.result(new TxnData()); } return builder; diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializerTest.java new file mode 100644 index 000000000000..f06f3c15f138 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializerTest.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.stream.Collectors; + +import org.junit.Test; + +import accord.api.Key; +import accord.api.RoutingKey; +import accord.primitives.Keys; +import accord.primitives.Range; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Routables; +import accord.primitives.RoutingKeys; +import accord.utils.Gen; +import accord.utils.Gens; +import accord.utils.RandomSource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.Serializers; +import org.apache.cassandra.io.UnversionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.AccordGenerators; + +import static accord.utils.Property.qt; + +public class IVersionedWithKeysSerializerTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + @Test + public void test() + { + @SuppressWarnings({ "resource", "IOResourceOpenedButNotSafelyClosed" }) DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(Gens.random(), routables()).check((rs, superset) -> { + var serializer = serializer(superset); + Serializers.testSerde(output, serializer, superset); + if (superset.isEmpty()) return; + // find subsets + Gen> gen = subset(superset); + for (int i = 0; i < 100; i++) + Serializers.testSerde(output, serializer, gen.next(rs)); + }); + } + + private static Gen> routables() + { + Gen partitionerGen = AccordGenerators.partitioner(); + Gen routableKindGen = Gens.enums().all(Routable.Kind.class); + return rs -> { + IPartitioner partitioner = partitionerGen.next(rs); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + switch (routableKindGen.next(rs)) + { + case SeekableKey: return seekablekeysSuperset(rs, partitioner); + case UnseekableKey: return unseekablekeysSuperset(rs, partitioner); + case Range: return rangesSuperset(rs, partitioner); + default: throw new UnsupportedOperationException(); + } + }; + } + + static Keys seekablekeysSuperset(RandomSource rs, IPartitioner partitioner) + { + return Keys.of(Gens.lists(AccordGenerators.keys(partitioner)).unique().ofSizeBetween(0, 100).next(rs)); + } + + private static RoutingKeys unseekablekeysSuperset(RandomSource rs, IPartitioner partitioner) + { + return RoutingKeys.of(Gens.arrays(RoutingKey.class, (Gen) (Gen) AccordGenerators.routingKeysGen(partitioner)).unique().ofSizeBetween(0, 100).next(rs)); + } + + static Ranges rangesSuperset(RandomSource rs, IPartitioner partitioner) + { + return AccordGenerators.rangesSplitOrArbitrary(partitioner, Gens.ints().between(0, 100)).next(rs); + } + + private static Gen> subset(Routables superset) + { + switch (superset.domainKind()) + { + case SeekableKey: return seekablekeysSubset((Keys) superset); + case UnseekableKey: return unseekablekeysSubset((RoutingKeys) superset); + case Range: return rangesSubset((Ranges) superset); + default: throw new UnsupportedOperationException(); + } + } + + private static Gen> seekablekeysSubset(Keys superset) + { + return Gens.select(superset.stream().collect(Collectors.toList())).map(l -> Keys.of(l.toArray(Key[]::new))); + } + + private static Gen> unseekablekeysSubset(RoutingKeys superset) + { + return Gens.select(superset.stream().collect(Collectors.toList())).map(l -> RoutingKeys.of(l.toArray(RoutingKey[]::new))); + } + + private static Gen> rangesSubset(Ranges superset) + { + return Gens.select(superset.stream().collect(Collectors.toList())).map(l -> Ranges.of(l.toArray(Range[]::new))); + } + + private static UnversionedSerializer> serializer(Routables superset) + { + class S extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements UnversionedSerializer> + { + @Override + public void serialize(Routables t, DataOutputPlus out) throws IOException + { + serializeSubsetInternal(t, superset, out); + } + + @Override + public Routables deserialize(DataInputPlus in) throws IOException + { + return deserializeSubsetInternal(superset, in); + } + + @Override + public long serializedSize(Routables t) + { + return serializedSubsetSizeInternal(t, superset); + } + + @Override + public void skip(DataInputPlus in) throws IOException + { + skipSubsetInternal(superset.size(), in); + } + } + return new S(); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java index 0c964e5d1266..6dc45af8bb8c 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/KeySerializersTest.java @@ -222,6 +222,6 @@ private static Participants selectSubset(RandomSource private static Gen rangesGen() { - return partitioner().flatMap(AccordGenerators::rangesSplitOrArbitrary); + return partitioner().flatMap(p -> AccordGenerators.rangesSplitOrArbitrary(p)); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeysTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeysTest.java new file mode 100644 index 000000000000..14a41cb4bb58 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasAndKeysTest.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; + +import org.junit.Test; + +import accord.primitives.Keys; +import accord.primitives.Ranges; +import accord.primitives.Routable; +import accord.primitives.Seekable; +import accord.primitives.Seekables; +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.Serializers; +import org.apache.cassandra.io.UnversionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.AccordGenerators; + +import static accord.utils.Property.qt; +import static org.apache.cassandra.service.accord.serializers.TableMetadatasTest.buildSchema; +import static org.apache.cassandra.service.accord.serializers.TableMetadatasTest.toMetadatas; + +public class TableMetadatasAndKeysTest +{ + static + { + DatabaseDescriptor.clientInitialization(); + DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); + } + + private static final Gen partitionerGen = AccordGenerators.partitioner(); + + @Test + public void test() + { + Gen domainGen = Gens.enums().all(Routable.Domain.class); + @SuppressWarnings({ "resource", "IOResourceOpenedButNotSafelyClosed" }) DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(Gens.random(), TableMetadatasTest.tables().filter(m -> !m.isEmpty())).check((rs, tables) -> { + TableMetadatas metadatas = toMetadatas(tables); + Schema.instance = buildSchema(tables); + + Seekables keysOrRanges; + switch (domainGen.next(rs)) + { + case Key: + keysOrRanges = createKeys(tables).next(rs); + break; + case Range: + keysOrRanges = createRanges(tables).next(rs); + break; + default: throw new UnsupportedOperationException(); + } + + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(metadatas, keysOrRanges); + + Serializers.testSerde(output, TableMetadatasAndKeys.serializer, tablesAndKeys); + var serializer = serializer(tablesAndKeys); + var partitionSerializer = partitionKeySerializer(tablesAndKeys); + for (Seekable s : keysOrRanges) + { + Serializers.testSerde(output, serializer, s); + if (s instanceof PartitionKey) + Serializers.testSerde(output, partitionSerializer, (PartitionKey) s); + } + }); + } + + private static Gen createKeys(LinkedHashMap tables) + { + return rs -> { + IPartitioner partitioner = partitionerGen.next(rs); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + Gen keyGen = AccordGenerators.keys(partitioner, new ArrayList<>(tables.keySet())); + return Keys.of(Gens.lists(keyGen).unique().ofSizeBetween(1, 100).next(rs)); + }; + } + + private static Gen createRanges(LinkedHashMap tables) + { + return rs -> { + var partitioner = partitionerGen.next(rs); + DatabaseDescriptor.setPartitionerUnsafe(partitioner); + return AccordGenerators.rangesSplitOrArbitrary(partitioner, Gens.ints().between(1, 100), Gens.constant(new ArrayList<>(tables.keySet()))).next(rs); + }; + } + + private static UnversionedSerializer serializer(TableMetadatasAndKeys tableAndKeys) + { + return new UnversionedSerializer<>() + { + @Override + public void serialize(Seekable t, DataOutputPlus out) throws IOException + { + tableAndKeys.serializeSeekable(t, out); + } + + @Override + public Seekable deserialize(DataInputPlus in) throws IOException + { + return tableAndKeys.deserializeSeekable(in); + } + + @Override + public long serializedSize(Seekable t) + { + return tableAndKeys.serializedSeekableSize(t); + } + }; + } + + public static UnversionedSerializer partitionKeySerializer(TableMetadatasAndKeys tableAndKeys) + { + return new UnversionedSerializer<>() + { + @Override + public void serialize(PartitionKey t, DataOutputPlus out) throws IOException + { + tableAndKeys.serializeKey(t, out); + } + + @Override + public PartitionKey deserialize(DataInputPlus in) throws IOException + { + return tableAndKeys.deserializeKey(in); + } + + @Override + public long serializedSize(PartitionKey t) + { + return tableAndKeys.serializedKeySize(t); + } + }; + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasTest.java new file mode 100644 index 000000000000..e89f49bffa89 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/accord/serializers/TableMetadatasTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord.serializers; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.junit.Test; + +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.exceptions.UnknownTableException; +import org.apache.cassandra.io.Serializers; +import org.apache.cassandra.io.UnversionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaProvider; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.CassandraGenerators; +import org.apache.cassandra.utils.Generators; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import static accord.utils.Property.qt; + +public class TableMetadatasTest +{ + @Test + public void test() + { + @SuppressWarnings({ "resource", "IOResourceOpenedButNotSafelyClosed" }) DataOutputBuffer output = new DataOutputBuffer(); + qt().forAll(tables()).check(tables -> { + TableMetadatas metadatas = toMetadatas(tables); + Schema.instance = buildSchema(tables); + Serializers.testSerde(output, SelfSerializer.instance, metadatas); + + UnversionedSerializer serializer = tableSerializer(metadatas); + for (var metadata : tables.values()) + Serializers.testSerde(output, serializer, metadata); + }); + } + + static SchemaProvider buildSchema(Map tables) throws UnknownTableException + { + SchemaProvider schema = Mockito.mock(SchemaProvider.class); + Mockito.when(schema.getTableMetadata(Mockito.any())).thenAnswer(new Answer() + { + @Override + public TableMetadata answer(InvocationOnMock invocationOnMock) throws Throwable + { + TableId id = invocationOnMock.getArgument(0); + var metadata = tables.get(id); + if (metadata == null) throw new UnknownTableException("Unknown table " + id, id); + return metadata; + } + }); + return schema; + } + + static TableMetadatas toMetadatas(Map map) + { + TableMetadatas.Collector collector = new TableMetadatas.Collector(); + map.values().forEach(collector::add); + return collector.build(); + } + + static Gen> tables() + { + Gen idGen = Generators.toGen(CassandraGenerators.TABLE_ID_GEN); + return rs -> { + TableId[] ids = Gens.arrays(TableId.class, idGen).unique().ofSizeBetween(0, 100).next(rs); + LinkedHashMap map = new LinkedHashMap<>(); + for (int i = 0; i < ids.length; i++) + map.put(ids[i], forId(ids[i])); + return map; + }; + } + + private static TableMetadata forId(TableId id) + { + TableMetadata metadata = TableMetadata.minimal("ks", "tbl", id); + if (!metadata.id().equals(id)) throw new AssertionError("Unexpected table id: " + metadata.id() + "; expected " + id); + return metadata; + } + + private static UnversionedSerializer tableSerializer(TableMetadatas metadatas) + { + return new UnversionedSerializer<>() + { + @Override + public void serialize(TableMetadata t, DataOutputPlus out) throws IOException + { + metadatas.serialize(t, out); + } + + @Override + public TableMetadata deserialize(DataInputPlus in) throws IOException + { + return metadatas.deserialize(in); + } + + @Override + public long serializedSize(TableMetadata t) + { + return metadatas.serializedSize(t); + } + }; + } + + private enum SelfSerializer implements UnversionedSerializer + { + instance; + + @Override + public void serialize(TableMetadatas t, DataOutputPlus out) throws IOException + { + t.serializeSelf(out); + } + + @Override + public TableMetadatas deserialize(DataInputPlus in) throws IOException + { + return TableMetadatas.deserializeSelf(in); + } + + @Override + public long serializedSize(TableMetadatas t) + { + return t.serializedSelfSize(); + } + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java index cdb16793b744..2c2b89193adf 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/AbstractKeySortedTest.java @@ -106,12 +106,6 @@ Seekable getKey(Item item) return item.key; } - @Override - Domain domain() - { - return Domain.Key; - } - @Override Item[] newArray(int size) { diff --git a/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java index 1ae22aaed394..15df8a781bde 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/AccordUpdateTest.java @@ -28,6 +28,7 @@ import org.apache.cassandra.io.Serializers; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.serializers.TableMetadatasAndKeys; import org.apache.cassandra.service.accord.serializers.Version; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -47,8 +48,9 @@ public static void setupClass() public void predicateSerializer() throws IOException { Txn txn = AccordTestUtils.createTxn(0, 0); - AccordUpdate update = (AccordUpdate) txn.update(); + TxnUpdate update = (TxnUpdate) txn.update(); + TableMetadatasAndKeys tablesAndKeys = new TableMetadatasAndKeys(update.tables, update.keys()); for (Version version : Version.V1.greaterThanOrEqual()) - Serializers.testSerde(AccordUpdate.serializer, update, version); + Serializers.testSerde(AccordUpdate.serializer, update, tablesAndKeys, version); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/txn/TxnConditionTest.java b/test/unit/org/apache/cassandra/service/accord/txn/TxnConditionTest.java index 5c77379411bc..1cb1199f7f1e 100644 --- a/test/unit/org/apache/cassandra/service/accord/txn/TxnConditionTest.java +++ b/test/unit/org/apache/cassandra/service/accord/txn/TxnConditionTest.java @@ -32,12 +32,16 @@ import org.apache.cassandra.cql3.conditions.ColumnCondition; import org.apache.cassandra.cql3.conditions.ColumnConditionTest; import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.io.Serializers; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.MockSchema; import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.serializers.Version; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.ByteBufferUtil; @@ -68,8 +72,17 @@ public class TxnConditionTest }); private static Gen BYTES_GEN = Generators.toGen(Generators.directAndHeapBytes(0, 10)); private static Gen TXN_REF_GEN = rs -> { - return rs.nextBoolean() ? new TxnReference(rs.nextInt(0, Integer.MAX_VALUE), COLUM_METADATA_GEN.next(rs)) - : new TxnReference(rs.nextInt(0, Integer.MAX_VALUE), COLUM_METADATA_GEN.next(rs), CellPath.create(BYTES_GEN.next(rs))); + { + ColumnMetadata cm = COLUM_METADATA_GEN.next(rs); + TableMetadata.Builder builder = TableMetadata.builder("", "", TableId.generate()) + .addColumn(cm); + if (!cm.isPartitionKey()) + builder.addPartitionKeyColumn(cm.name.toString().equals("_") ? "__" : "_", Int32Type.instance); + TableMetadata tm = builder.build(); + cm = tm.getColumn(cm.name); + return rs.nextBoolean() ? new TxnReference(rs.nextInt(0, Integer.MAX_VALUE), cm, tm) + : new TxnReference(rs.nextInt(0, Integer.MAX_VALUE), tm, cm, CellPath.create(BYTES_GEN.next(rs))); + } }; private static Gen> CLUSTERING_GEN = Generators.toGen(CassandraGenerators.CLUSTERING_GEN); private static Gen BOUND_GEN = ColumnConditionTest.boundGen().map(b -> { @@ -82,8 +95,11 @@ public void serde() { DataOutputBuffer output = new DataOutputBuffer(); qt().forAll(txnConditionGen()).check(condition -> { + TableMetadatas.Collector collector = new TableMetadatas.Collector(); + condition.collect(collector); + TableMetadatas tables = collector.build(); for (Version version : Version.V1.greaterThanOrEqual()) - Serializers.testSerde(output, TxnCondition.serializer, condition, version); + Serializers.testSerde(output, TxnCondition.serializer, condition, tables, version); SCHEMA.clear(); }); } diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index 03c230d9c1df..455ed25854e4 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -80,6 +80,7 @@ import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.serializers.TableMetadatas; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.quicktheories.impl.JavaRandom; @@ -100,6 +101,7 @@ public class AccordGenerators { private static final Gen PARTITIONER_GEN = fromQT(CassandraGenerators.nonLocalPartitioners()); + private static final Gen TABLE_ID_GEN = fromQT(CassandraGenerators.TABLE_ID_GEN); private AccordGenerators() { @@ -265,7 +267,7 @@ private ICommand attributes(SaveStatus saveStatus) if (saveStatus.hasBeen(Status.PreApplied) && !saveStatus.hasBeen(Status.Truncated)) { if (txnId.is(Write)) - builder.writes(new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true))); + builder.writes(new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(TableMetadatas.none(), Collections.emptyList(), true))); builder.result(new TxnData()); } return builder; @@ -320,8 +322,8 @@ public Command build(SaveStatus saveStatus) else return Truncated.truncated(command, saveStatus, executeAt, null, null, null, null); case TruncatedApplyWithOutcome: - if (txnId.kind().awaitsOnlyDeps()) return Truncated.truncated(command, saveStatus, executeAt, command.partialDeps(), txnId.is(Write) ? new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)) : null, new TxnData(), txnId); - else return Truncated.truncated(command, saveStatus, executeAt, command.partialDeps(), txnId.is(Write) ? new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)) : null, new TxnData(), null); + if (txnId.kind().awaitsOnlyDeps()) return Truncated.truncated(command, saveStatus, executeAt, command.partialDeps(), txnId.is(Write) ? new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(TableMetadatas.none(), Collections.emptyList(), true)) : null, new TxnData(), txnId); + else return Truncated.truncated(command, saveStatus, executeAt, command.partialDeps(), txnId.is(Write) ? new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(TableMetadatas.none(), Collections.emptyList(), true)) : null, new TxnData(), null); case Erased: case Vestigial: @@ -333,13 +335,20 @@ public Command build(SaveStatus saveStatus) public static Gen keys() { - return keys(fromQT(CassandraGenerators.TABLE_ID_GEN), + return keys(TABLE_ID_GEN, fromQT(CassandraGenerators.decoratedKeys())); } public static Gen keys(IPartitioner partitioner) { - return keys(fromQT(CassandraGenerators.TABLE_ID_GEN), + return keys(TABLE_ID_GEN, + fromQT(CassandraGenerators.decoratedKeys(ignore -> partitioner))); + } + + public static Gen keys(IPartitioner partitioner, List tables) + { + //TODO (correctness): fix Gens.pick to not fail with lists of size 1 + return keys(tables.size() == 1 ? Gens.constant(tables.get(0)) : Gens.pick(tables), fromQT(CassandraGenerators.decoratedKeys(ignore -> partitioner))); } @@ -350,7 +359,7 @@ public static Gen keys(Gen tableIdGen, Gen public static Gen routingKeysGen(IPartitioner partitioner) { - return routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), + return routingKeyGen(TABLE_ID_GEN, fromQT(CassandraGenerators.token(partitioner)), partitioner); } @@ -398,12 +407,17 @@ public static Gen allowBeforeAndAfter(Gen gen) public static Gen range() { - return partitioner().flatMap(partitioner -> range(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.token(partitioner)), partitioner)); + return partitioner().flatMap(partitioner -> range(TABLE_ID_GEN, fromQT(CassandraGenerators.token(partitioner)), partitioner)); } public static Gen range(IPartitioner partitioner) { - return range(fromQT(CassandraGenerators.TABLE_ID_GEN), fromQT(CassandraGenerators.token(partitioner)), partitioner); + return range(TABLE_ID_GEN, fromQT(CassandraGenerators.token(partitioner)), partitioner); + } + + public static Gen range(IPartitioner partitioner, Gen tables) + { + return range(tables, fromQT(CassandraGenerators.token(partitioner)), partitioner); } public static Gen range(Gen tables, Gen tokenGen, IPartitioner partitioner) @@ -430,16 +444,23 @@ private static boolean same(TokenKey a, TokenKey b) public static Gen ranges() { // javac couldn't pick the right constructor with HashSet::new, so had to create new lambda... - return ranges(Gens.lists(fromQT(CassandraGenerators.TABLE_ID_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), partitioner()); + return ranges(Gens.lists(TABLE_ID_GEN).unique().ofSizeBetween(1, 10), partitioner()); } - public static Gen ranges(Gen> tableIdGen, Gen partitionerGen) + public static Gen ranges(Gen> tableIdGen, Gen partitionerGen) + { + Gen.IntGen splitsGen = Gens.ints().between(10, 99); + return ranges(tableIdGen, partitionerGen, splitsGen); + } + + public static Gen ranges(Gen> tableIdGen, Gen partitionerGen, Gen.IntGen splitsGen) { return rs -> { - Set tables = tableIdGen.next(rs); + List tables = tableIdGen.next(rs); IPartitioner partitioner = partitionerGen.next(rs); List ranges = new ArrayList<>(); - int numSplits = rs.nextInt(10, 100); + int numSplits = splitsGen.nextInt(rs); + if (numSplits == 0) return Ranges.EMPTY; TokenRange range = TokenRange.create(TokenKey.min(TABLE_ID1, partitioner), TokenKey.max(TABLE_ID1, partitioner)); AccordSplitter splitter = partitioner.accordSplitter().apply(Ranges.of(range)); BigInteger size = splitter.sizeOf(range); @@ -461,19 +482,34 @@ public static Gen ranges(Gen> tableIdGen, Gen public static Gen ranges(IPartitioner partitioner) { - return ranges(Gens.lists(fromQT(CassandraGenerators.TABLE_ID_GEN)).unique().ofSizeBetween(1, 10).map(l -> new HashSet<>(l)), ignore -> partitioner); + return ranges(Gens.lists(TABLE_ID_GEN).unique().ofSizeBetween(1, 10), ignore -> partitioner); + } + + public static Gen ranges(IPartitioner partitioner, Gen.IntGen splitsGen) + { + return ranges(Gens.lists(TABLE_ID_GEN).unique().ofSizeBetween(1, 10), ignore -> partitioner, splitsGen); } public static Gen ranges(TableId tableId, IPartitioner partitioner) { - Set tables = Collections.singleton(tableId); + List tables = Collections.singletonList(tableId); return ranges(i -> tables, i -> partitioner); } public static Gen rangesArbitrary(IPartitioner partitioner) { - Gen rangeGen = range(partitioner); Gen.IntGen sizeGen = Gens.ints().between(0, 10); + return rangesArbitrary(partitioner, sizeGen); + } + + public static Gen rangesArbitrary(IPartitioner partitioner, Gen.IntGen sizeGen) + { + return rangesArbitrary(partitioner, TABLE_ID_GEN, sizeGen); + } + + public static Gen rangesArbitrary(IPartitioner partitioner, Gen tables, Gen.IntGen sizeGen) + { + Gen rangeGen = range(partitioner, tables); return rs -> { int targetSize = sizeGen.nextInt(rs); List ranges = new ArrayList<>(targetSize); @@ -490,6 +526,18 @@ public static Gen rangesSplitOrArbitrary(IPartitioner partitioner) return rs -> rs.nextBoolean() ? split.next(rs) : arbitrary.next(rs); } + public static Gen rangesSplitOrArbitrary(IPartitioner partitioner, Gen.IntGen sizeGen) + { + return rangesSplitOrArbitrary(partitioner, sizeGen, Gens.lists(TABLE_ID_GEN).unique().ofSizeBetween(1, 10)); + } + + public static Gen rangesSplitOrArbitrary(IPartitioner partitioner, Gen.IntGen sizeGen, Gen> tableIdGen) + { + Gen split = ranges(tableIdGen, i -> partitioner, sizeGen); + Gen arbitrary = rangesArbitrary(partitioner, tableIdGen.map((rs, l) -> rs.pick(l)), sizeGen); + return rs -> rs.nextBoolean() ? split.next(rs) : arbitrary.next(rs); + } + public static Gen keyDepsGen(IPartitioner partitioner) { return AccordGens.keyDeps(AccordGenerators.routingKeysGen(partitioner)); diff --git a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java index ea34b72cb09f..33ebdb6c3a23 100644 --- a/test/unit/org/apache/cassandra/utils/CassandraGenerators.java +++ b/test/unit/org/apache/cassandra/utils/CassandraGenerators.java @@ -1198,6 +1198,7 @@ public static Gen columnMetadataGen(Gen kin { Gen ksNameGen = CassandraGenerators.KEYSPACE_NAME_GEN; Gen tableNameGen = IDENTIFIER_GEN; + return rs -> { String ks = ksNameGen.generate(rs); String table = tableNameGen.generate(rs); @@ -1224,7 +1225,7 @@ private static ColumnMetadata createColumnDefinition(String ks, String table, // empty type is also not supported, so filter out case PARTITION_KEY: case CLUSTERING: - typeGen = Generators.filter(typeGen, t -> t != EmptyType.instance).map(AbstractType::freeze); + typeGen = Generators.filter(typeGen, t -> t != EmptyType.instance && t != CounterColumnType.instance).map(AbstractType::freeze); break; } if (kind == ColumnMetadata.Kind.CLUSTERING) From d4d858d3822c85e6b4b71b1004d8ba6c63fa5134 Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Tue, 22 Apr 2025 14:31:22 -0600 Subject: [PATCH 276/340] Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. patch by Jon Meredith; reviewed by Marcus Eriksson for CASSANDRA-20561 --- CHANGES.txt | 1 + .../org/apache/cassandra/db/LivenessInfo.java | 11 ++- .../org/apache/cassandra/db/rows/Cells.java | 10 +++ .../org/apache/cassandra/db/CellTest.java | 85 ++++++++++++++++++- .../apache/cassandra/db/LivenessInfoTest.java | 5 ++ .../apache/cassandra/db/rows/RowsTest.java | 54 ++++++++++++ 6 files changed, 163 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4332b7555575..ddd87100adb2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) diff --git a/src/java/org/apache/cassandra/db/LivenessInfo.java b/src/java/org/apache/cassandra/db/LivenessInfo.java index f3e6daabad80..2857b0b3814a 100644 --- a/src/java/org/apache/cassandra/db/LivenessInfo.java +++ b/src/java/org/apache/cassandra/db/LivenessInfo.java @@ -193,7 +193,10 @@ public int dataSize() * supersedes, ie. tombstone supersedes. * * If timestamps are the same and both of them are expired livenessInfo(Ideally it shouldn't happen), - * greater localDeletionTime wins. + * greater localDeletionTime wins. If the localDeletion times are the same, prefer the + * lower TTL to make the merge deterministic (it is likely that the row has been rewritten with + * USING TTL/TIMESTAMP with an updated TTL that computes to the same local deletion time -- perhaps + * from rerunning a process to migrate user data between clusters or tables). * * @param other * the {@code LivenessInfo} to compare this info to. @@ -207,7 +210,11 @@ public boolean supersedes(LivenessInfo other) if (isExpired() ^ other.isExpired()) return isExpired(); if (isExpiring() == other.isExpiring()) - return localExpirationTime() > other.localExpirationTime(); + { + return localExpirationTime() > other.localExpirationTime() || + (localExpirationTime() == other.localExpirationTime() && ttl() < other.ttl()); + } + return isExpiring(); } diff --git a/src/java/org/apache/cassandra/db/rows/Cells.java b/src/java/org/apache/cassandra/db/rows/Cells.java index 59f1d3f7fd8b..ce774080eec5 100644 --- a/src/java/org/apache/cassandra/db/rows/Cells.java +++ b/src/java/org/apache/cassandra/db/rows/Cells.java @@ -113,6 +113,16 @@ private static Cell resolveRegular(Cell left, Cell right) // would otherwise always win (unless it had an empty value), until it expired and was translated to a tombstone if (leftLocalDeletionTime != rightLocalDeletionTime) return leftLocalDeletionTime > rightLocalDeletionTime ? left : right; + + // Both cells are either tombstones or expiring at the same timestamp. If expiring and the + // TTLs differ, write the lower one -- the write is probably from a more recent + // UPDATE USING TTL AND TIMESTAMP, so select the most recent one to be deterministic and be + // closest to client intent. + if (!leftIsTombstone && left.ttl() != right.ttl()) + { + assert !rightIsTombstone; + return left.ttl() < right.ttl() ? left : right; + } } return compareValues(left, right) >= 0 ? left : right; diff --git a/test/unit/org/apache/cassandra/db/CellTest.java b/test/unit/org/apache/cassandra/db/CellTest.java index d4dec05e3bff..a7c334bb794b 100644 --- a/test/unit/org/apache/cassandra/db/CellTest.java +++ b/test/unit/org/apache/cassandra/db/CellTest.java @@ -41,7 +41,8 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -import static java.util.Arrays.*; +import static java.util.Arrays.asList; +import static org.junit.Assert.assertEquals; public class CellTest { @@ -62,6 +63,8 @@ public class CellTest .addRegularColumn("v", IntegerType.instance) .addRegularColumn("m", MapType.getInstance(IntegerType.instance, IntegerType.instance, true)) .build(); + public static final ByteBuffer TEST_VALUE = ByteBufferUtil.bytes("a"); + @BeforeClass public static void defineSchema() throws ConfigurationException @@ -253,6 +256,86 @@ public void testExpiringCellReconile() Assert.assertEquals(-1, testExpiring("val", "b", 2, 1, null, "a", null, 1)); } + + public static void assertCellsEqual(Cell cellA, Cell cellB) + { + assertEquals(cellA.timestamp(), cellB.timestamp()); + assertEquals(cellA.ttl(), cellB.ttl()); + assertEquals(cellA.localDeletionTime(), cellB.localDeletionTime()); + assertEquals(cellA.buffer(), cellB.buffer()); + } + + static void checkCommutes(ColumnMetadata cmd, long timestamp, long tsDiff, int ttl, int ttlDiff, int nowInSeconds, int nowDiff) + { + long timestampA = timestamp; + long timestampB = timestampA + tsDiff; + int ttlA = ttl; + int ttlB = ttl + ttlDiff; + int nowInSecsA = nowInSeconds; + int nowInSecsB = nowInSecsA + nowDiff; + if (nowInSecsA < 0 || nowInSecsB < 0) + return; + + Cell cellA = ttlA == 0 ? BufferCell.tombstone(cmd, timestampA, nowInSecsA) : + ttlA < 0 ? BufferCell.live(cmd, timestampA, TEST_VALUE) : + BufferCell.expiring(cmd, timestampA, ttlA, nowInSecsA, TEST_VALUE); + Cell cellB = ttlB == 0 ? BufferCell.tombstone(cmd, timestampB, nowInSecsB) : + ttlB < 0 ? BufferCell.live(cmd, timestampB, TEST_VALUE) : + BufferCell.expiring(cmd, timestampB, ttlB, nowInSecsB, TEST_VALUE); + + Cell cellAB = Cells.reconcile(cellA, cellB); + Cell cellBA = Cells.reconcile(cellB, cellA); + + assertCellsEqual(cellAB, cellBA); + } + + @Test + public void checkSameValueDifferentLivenessCommutes() + { + ColumnMetadata cmd = fakeColumn("c", UTF8Type.instance); + long[] tsDiffs = new long[] {0L, + 1L, // microsecond + 1000L, // millisecond + 1000000L, // second + 60000000L}; // minute + int[] ttls = new int[] { -1, 0, 1, 3600, 24 * 3600, 7 * 24 * 3600, 60 * 24 * 3600, 366 * 24 * 3600 }; + int[] ttlDiffs = new int[] { 0, 1, 60, 3600, 24 * 3600, 7 * 24 * 3600, 60 * 24 * 3600, 366 * 24 * 3600 }; + + int nowInSeconds = FBUtilities.nowInSeconds(); + long timestamp = FBUtilities.timestampMicros(); + + for (long tsDiff: tsDiffs) + { + for (int ttl: ttls) + { + for (int ttlDiff : ttlDiffs) + { + for (Integer nowDiff : ttlDiffs) + checkCommutes(cmd, timestamp, tsDiff, ttl, ttlDiff, nowInSeconds, nowDiff); + } + } + } + } + + // Checks that reconciling a cell with a smaller TTL reconcile commutatively + // Similar to rewriting data retrieved with SELECT v, TTL(v), WRITETIMESTAMP(v) with + // INSERT SET v=? USING TTL ? AND TIMESTAMP ? + @Test + public void rewriteCellWithSmallerTTL() + { + ColumnMetadata cmd = fakeColumn("c", UTF8Type.instance); + int[] nowDiffs = new int[] { 0, 1, 60, 3600, 24 * 3600, 7 * 24 * 3600, 60 * 24 * 3600, 366 * 24 * 3600 }; + long timestamp = FBUtilities.timestampMicros(); + int nowInSeconds = FBUtilities.nowInSeconds(); + int ttl = 3600; + + for (Integer nowDiff : nowDiffs) + { + checkCommutes(cmd, timestamp, 0L, ttl, -nowDiff, nowInSeconds, nowDiff); + } + } + + class SimplePurger implements DeletionPurger { private final int gcBefore; diff --git a/test/unit/org/apache/cassandra/db/LivenessInfoTest.java b/test/unit/org/apache/cassandra/db/LivenessInfoTest.java index 557870672e54..0482423f9d77 100644 --- a/test/unit/org/apache/cassandra/db/LivenessInfoTest.java +++ b/test/unit/org/apache/cassandra/db/LivenessInfoTest.java @@ -75,6 +75,11 @@ public void testSupersedes() first = LivenessInfo.withExpirationTime(100, LivenessInfo.EXPIRED_LIVENESS_TTL, nowInSeconds + 1); second = LivenessInfo.withExpirationTime(100, LivenessInfo.EXPIRED_LIVENESS_TTL, nowInSeconds); assertSupersedes(first, second); + + // rewritten expiring with the same expiration time and a lower TTL, take the lower TTL as likely to be more recent + first = LivenessInfo.withExpirationTime(100, 4, nowInSeconds); + second = LivenessInfo.withExpirationTime(100, 5, nowInSeconds); + assertSupersedes(first, second); } @Test diff --git a/test/unit/org/apache/cassandra/db/rows/RowsTest.java b/test/unit/org/apache/cassandra/db/rows/RowsTest.java index cfeebfd2b1f0..47e42b254240 100644 --- a/test/unit/org/apache/cassandra/db/rows/RowsTest.java +++ b/test/unit/org/apache/cassandra/db/rows/RowsTest.java @@ -45,6 +45,8 @@ import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; +import static org.apache.cassandra.db.CellTest.assertCellsEqual; + public class RowsTest { private static final String KEYSPACE = "rows_test"; @@ -520,6 +522,58 @@ public void mergeRowDeletionSupercedesLiveness() Assert.assertEquals(0, merged.columns().size()); } + + public static BufferCell expiringWithExpirationTime(ColumnMetadata column, long timestamp, int ttl, int localDeletionTime, ByteBuffer value) + { + return expiringWithExpirationTime(column, timestamp, ttl, localDeletionTime, value, null); + } + + public static BufferCell expiringWithExpirationTime(ColumnMetadata column, long timestamp, int ttl, int localDeletionTime, ByteBuffer value, CellPath path) + { + assert ttl != Cell.NO_TTL; + return new BufferCell(column, timestamp, ttl, localDeletionTime, value, path); + } + + @Test + public void mergeRowsWithSameExpiryDifferentTTLCommutesLiveness() + { + int now1 = FBUtilities.nowInSeconds(); + long ts1 = secondToTs(now1); + int ldt = now1 + 1000; + + Row.Builder r1Builder = BTreeRow.unsortedBuilder(); + r1Builder.newRow(c1); + LivenessInfo originalLiveness = LivenessInfo.withExpirationTime(ts1, 100, ldt); + r1Builder.addPrimaryKeyLivenessInfo(originalLiveness); + + Row.Builder r2Builder = BTreeRow.unsortedBuilder(); + r2Builder.newRow(c1); + LivenessInfo loweredTTL = LivenessInfo.withExpirationTime(ts1, 50, ldt); + r2Builder.addPrimaryKeyLivenessInfo(loweredTTL); + + Cell r2v = expiringWithExpirationTime(v, ts1, 75, ldt, BB1); + Cell r2m2 = expiringWithExpirationTime(m, ts1, 50, ldt, BB1, CellPath.create(BB2)); + Cell r2m3 = expiringWithExpirationTime(m, ts1, 75, ldt, BB2, CellPath.create(BB3)); + Cell r2m4 = expiringWithExpirationTime(m, ts1, 100, ldt, BB3, CellPath.create(BB4)); + List> expectedCells = Lists.newArrayList(r2v, r2m2, r2m3, r2m4); + + expectedCells.forEach(r1Builder::addCell); + expectedCells.forEach(r2Builder::addCell); + + Row r1 = r1Builder.build(); + Row r2 = r2Builder.build(); + + Row r1r2 = Rows.merge(r1, r2); + Row r2r1 = Rows.merge(r2, r1); + + DiffListener mergedListener = new DiffListener(); + Rows.diff(mergedListener, r1r2, r2r1); + + mergedListener.liveness.forEach(pair -> Assert.assertEquals(pair.merged, pair.original)); + mergedListener.cells.forEach(pair -> assertCellsEqual(pair.merged, pair.original)); + } + + // Creates a dummy cell for a (regular) column for the provided name and without a cellPath. private static Cell liveCell(ColumnMetadata name) { From bcea8f5815dbea3b1b4c6cba7bc0fdd97ed5bdc0 Mon Sep 17 00:00:00 2001 From: maulin-vasavada Date: Fri, 7 Mar 2025 16:02:00 -0800 Subject: [PATCH 277/340] Refactor EncryptionOptions to use client / server encryption options builders patch by Maulin Vasavada; reviewed by Maxwell Guo, Stefan Miklosovic for CASSANDRA-20404 --- .../auth/MutualTlsAuthenticator.java | 2 +- .../auth/MutualTlsInternodeAuthenticator.java | 2 +- ...lTlsWithPasswordFallbackAuthenticator.java | 2 +- .../org/apache/cassandra/config/Config.java | 2 +- .../cassandra/config/DatabaseDescriptor.java | 14 +- .../cassandra/config/EncryptionOptions.java | 902 ++++++++---------- .../cassandra/config/JMXServerOptions.java | 11 +- .../net/InboundConnectionInitiator.java | 4 +- .../net/InboundConnectionSettings.java | 2 +- .../net/OutboundConnectionInitiator.java | 8 +- .../security/AbstractSslContextFactory.java | 14 +- .../security/ISslContextFactory.java | 4 +- .../apache/cassandra/security/SSLFactory.java | 12 +- .../apache/cassandra/tools/BulkLoader.java | 2 +- .../apache/cassandra/tools/LoaderOptions.java | 39 +- .../apache/cassandra/transport/Client.java | 6 +- .../cassandra/transport/SimpleClient.java | 18 +- .../utils/jmx/AbstractJmxSocketFactory.java | 2 +- .../transport/SimpleClientBurnTest.java | 4 +- .../transport/SimpleClientPerfTest.java | 8 +- .../distributed/impl/IsolatedJmx.java | 32 +- .../test/AbstractEncryptionOptionsImpl.java | 15 +- .../apache/cassandra/auth/AuthConfigTest.java | 7 +- .../auth/MutualTlsAuthenticatorTest.java | 14 +- .../MutualTlsInternodeAuthenticatorTest.java | 13 +- ...WithPasswordFallbackAuthenticatorTest.java | 6 +- .../auth/jmx/JMXAuthJMXServerOptionsTest.java | 2 +- .../config/EncryptionOptionsEqualityTest.java | 114 ++- .../config/EncryptionOptionsTest.java | 124 +-- .../org/apache/cassandra/cql3/CQLTester.java | 18 +- .../db/virtual/SettingsTableTest.java | 30 +- .../apache/cassandra/net/ConnectionTest.java | 10 +- .../apache/cassandra/net/HandshakeTest.java | 35 +- .../cassandra/net/MessagingServiceTest.java | 51 +- .../DefaultSslContextFactoryTest.java | 47 +- .../security/DummySslContextFactoryImpl.java | 4 +- .../FileBasedSslContextFactoryTest.java | 48 +- .../PEMBasedSslContextFactoryTest.java | 36 +- .../cassandra/security/SSLFactoryTest.java | 189 ++-- .../cassandra/service/ClientWarningsTest.java | 6 +- .../service/NativeTransportServiceTest.java | 18 +- .../service/ProtocolBetaVersionTest.java | 4 +- .../cassandra/tools/LoaderOptionsTest.java | 30 + .../transport/EarlyAuthenticationTest.java | 23 +- .../transport/MessagePayloadTest.java | 2 +- .../SimpleClientSslContextFactory.java | 8 +- .../cassandra/transport/TlsTestUtils.java | 13 +- .../stress/settings/SettingsTransport.java | 31 +- .../stress/settings/StressSettings.java | 2 +- .../stress/util/JavaDriverClient.java | 12 +- 50 files changed, 993 insertions(+), 1009 deletions(-) diff --git a/src/java/org/apache/cassandra/auth/MutualTlsAuthenticator.java b/src/java/org/apache/cassandra/auth/MutualTlsAuthenticator.java index 0337291a9dc2..7c5a6654f72f 100644 --- a/src/java/org/apache/cassandra/auth/MutualTlsAuthenticator.java +++ b/src/java/org/apache/cassandra/auth/MutualTlsAuthenticator.java @@ -44,7 +44,7 @@ import org.apache.cassandra.utils.NoSpamLogger; import static org.apache.cassandra.auth.IAuthenticator.AuthenticationMode.MTLS; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; /** * Performs mTLS authentication for client connections by extracting identities from client certificate diff --git a/src/java/org/apache/cassandra/auth/MutualTlsInternodeAuthenticator.java b/src/java/org/apache/cassandra/auth/MutualTlsInternodeAuthenticator.java index c1fcbd6eabf0..91500cbeb2f2 100644 --- a/src/java/org/apache/cassandra/auth/MutualTlsInternodeAuthenticator.java +++ b/src/java/org/apache/cassandra/auth/MutualTlsInternodeAuthenticator.java @@ -49,7 +49,7 @@ import org.apache.cassandra.metrics.MutualTlsMetrics; import org.apache.cassandra.utils.NoSpamLogger; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; /** * Performs mTLS authentication for internode connections by extracting identities from the certificates of incoming diff --git a/src/java/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticator.java b/src/java/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticator.java index 219353d76a1f..f994399f2161 100644 --- a/src/java/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticator.java +++ b/src/java/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticator.java @@ -81,7 +81,7 @@ public SaslNegotiator newSaslNegotiator(InetAddress clientAddress, Certificate[] public void validateConfiguration() throws ConfigurationException { Config config = DatabaseDescriptor.getRawConfig(); - if (config.client_encryption_options.getClientAuth() == EncryptionOptions.ClientAuth.NOT_REQUIRED) + if (config.client_encryption_options.getClientAuth() == EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED) { String msg = "MutualTlsWithPasswordFallbackAuthenticator requires client_encryption_options.require_client_auth to be optional/true"; throw new ConfigurationException(msg); diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 65179fcd7e91..1a3da1033fbc 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -454,7 +454,7 @@ public static class SSTableConfig public String failure_detector = "FailureDetector"; public EncryptionOptions.ServerEncryptionOptions server_encryption_options = new EncryptionOptions.ServerEncryptionOptions(); - public EncryptionOptions client_encryption_options = new EncryptionOptions(); + public EncryptionOptions.ClientEncryptionOptions client_encryption_options = new EncryptionOptions.ClientEncryptionOptions(); public JMXServerOptions jmx_server_options; diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 0439dbb00c5f..616735289741 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -156,7 +156,7 @@ import static org.apache.cassandra.config.DataRateSpec.DataRateUnit.BYTES_PER_SECOND; import static org.apache.cassandra.config.DataRateSpec.DataRateUnit.MEBIBYTES_PER_SECOND; import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.MEBIBYTES; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.db.ConsistencyLevel.ALL; import static org.apache.cassandra.db.ConsistencyLevel.EACH_QUORUM; import static org.apache.cassandra.db.ConsistencyLevel.LOCAL_QUORUM; @@ -1088,10 +1088,10 @@ else if (conf.commitlog_segment_size.toKibibytes() < 2 * conf.max_mutation_size. } else if (JMXServerOptions.isEnabledBySystemProperties()) { - throw new ConfigurationException("Configure either jmx_server_options in cassandra.yaml and comment out " + - "configure_jmx function call in cassandra-env.sh or keep cassandra-env.sh " + - "to call configure_jmx function but you have to keep jmx_server_options " + - "in cassandra.yaml commented out."); + throw new ConfigurationException("Configure either jmx_server_options in cassandra.yaml and comment out " + + "configure_jmx function call in cassandra-env.sh or keep cassandra-env.sh " + + "to call configure_jmx function but you have to keep jmx_server_options " + + "in cassandra.yaml commented out."); } conf.jmx_server_options.jmx_encryption_options.applyConfig(); @@ -3973,7 +3973,7 @@ public static void setInternodeMessagingEncyptionOptions(EncryptionOptions.Serve conf.server_encryption_options = encryptionOptions; } - public static EncryptionOptions getNativeProtocolEncryptionOptions() + public static EncryptionOptions.ClientEncryptionOptions getNativeProtocolEncryptionOptions() { return conf.client_encryption_options; } @@ -3984,7 +3984,7 @@ public static JMXServerOptions getJmxServerOptions() } @VisibleForTesting - public static void updateNativeProtocolEncryptionOptions(Function update) + public static void updateNativeProtocolEncryptionOptions(Function update) { conf.client_encryption_options = update.apply(conf.client_encryption_options); } diff --git a/src/java/org/apache/cassandra/config/EncryptionOptions.java b/src/java/org/apache/cassandra/config/EncryptionOptions.java index 07f78b9a5eea..5f94c14bcbb7 100644 --- a/src/java/org/apache/cassandra/config/EncryptionOptions.java +++ b/src/java/org/apache/cassandra/config/EncryptionOptions.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.config; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -46,10 +45,8 @@ * Examples of such options are: supported cipher-suites, ssl protocol with version, accepted protocols, end-point * verification, require client-auth/cert etc. */ -public class EncryptionOptions +public abstract class EncryptionOptions> { - Logger logger = LoggerFactory.getLogger(EncryptionOptions.class); - public enum TlsEncryptionPolicy { UNENCRYPTED("unencrypted"), @@ -69,39 +66,50 @@ public String description() } } - public enum ClientAuth + public enum ConfigKey { - REQUIRED("true"), - NOT_REQUIRED("false"), - OPTIONAL("optional"); - private final String value; - private static final Map VALUES = new HashMap<>(); - static + KEYSTORE("keystore"), + KEYSTORE_PASSWORD("keystore_password"), + KEYSTORE_PASSWORD_FILE("keystore_password_file"), + OUTBOUND_KEYSTORE("outbound_keystore"), + OUTBOUND_KEYSTORE_PASSWORD("outbound_keystore_password"), + OUTBOUND_KEYSTORE_PASSWORD_FILE("outbound_keystore_password_file"), + TRUSTSTORE("truststore"), + TRUSTSTORE_PASSWORD("truststore_password"), + TRUSTSTORE_PASSWORD_FILE("truststore_password_file"), + CIPHER_SUITES("cipher_suites"), + PROTOCOL("protocol"), + ACCEPTED_PROTOCOLS("accepted_protocols"), + ALGORITHM("algorithm"), + STORE_TYPE("store_type"), + REQUIRE_CLIENT_AUTH("require_client_auth"), + REQUIRE_ENDPOINT_VERIFICATION("require_endpoint_verification"), + ENABLED("enabled"), + OPTIONAL("optional"), + MAX_CERTIFICATE_VALIDITY_PERIOD("max_certificate_validity_period"), + CERTIFICATE_VALIDITY_WARN_THRESHOLD("certificate_validity_warn_threshold"); + + final String keyName; + + ConfigKey(String keyName) { - for (ClientAuth clientAuth : ClientAuth.values()) - { - VALUES.put(clientAuth.value, clientAuth); - VALUES.put(toLowerCaseLocalized(clientAuth.name()), clientAuth); - } + this.keyName = keyName; } - ClientAuth(String value) + public String toString() { - this.value = value; + return keyName; } - public static ClientAuth from(String value) + static Set asSet() { - if (VALUES.containsKey(toLowerCaseLocalized(value))) + Set valueSet = new HashSet<>(); + ConfigKey[] values = values(); + for (ConfigKey key : values) { - return VALUES.get(toLowerCaseLocalized(value)); + valueSet.add(toLowerCaseLocalized(key.toString())); } - throw new ConfigurationException(value + " is not a valid ClientAuth option"); - } - - public String value() - { - return value; + return valueSet; } } @@ -153,53 +161,6 @@ public String value() */ public transient ISslContextFactory sslContextFactoryInstance; - public enum ConfigKey - { - KEYSTORE("keystore"), - KEYSTORE_PASSWORD("keystore_password"), - KEYSTORE_PASSWORD_FILE("keystore_password_file"), - OUTBOUND_KEYSTORE("outbound_keystore"), - OUTBOUND_KEYSTORE_PASSWORD("outbound_keystore_password"), - OUTBOUND_KEYSTORE_PASSWORD_FILE("outbound_keystore_password_file"), - TRUSTSTORE("truststore"), - TRUSTSTORE_PASSWORD("truststore_password"), - TRUSTSTORE_PASSWORD_FILE("truststore_password_file"), - CIPHER_SUITES("cipher_suites"), - PROTOCOL("protocol"), - ACCEPTED_PROTOCOLS("accepted_protocols"), - ALGORITHM("algorithm"), - STORE_TYPE("store_type"), - REQUIRE_CLIENT_AUTH("require_client_auth"), - REQUIRE_ENDPOINT_VERIFICATION("require_endpoint_verification"), - ENABLED("enabled"), - OPTIONAL("optional"), - MAX_CERTIFICATE_VALIDITY_PERIOD("max_certificate_validity_period"), - CERTIFICATE_VALIDITY_WARN_THRESHOLD("certificate_validity_warn_threshold"); - - final String keyName; - - ConfigKey(String keyName) - { - this.keyName = keyName; - } - - public String toString() - { - return keyName; - } - - static Set asSet() - { - Set valueSet = new HashSet<>(); - ConfigKey[] values = values(); - for (ConfigKey key : values) - { - valueSet.add(toLowerCaseLocalized(key.toString())); - } - return valueSet; - } - } - public EncryptionOptions() { ssl_context_factory = new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", @@ -252,7 +213,7 @@ public EncryptionOptions(ParameterizedClass ssl_context_factory, this.certificate_validity_warn_threshold = certificate_validity_warn_threshold; } - public EncryptionOptions(EncryptionOptions options) + public EncryptionOptions(EncryptionOptions options) { ssl_context_factory = options.ssl_context_factory; keystore = options.keystore; @@ -280,7 +241,7 @@ public EncryptionOptions(EncryptionOptions options) * * It also initializes the ISslContextFactory's instance */ - public EncryptionOptions applyConfig() + public T applyConfig() { ensureConfigNotApplied(); @@ -304,7 +265,7 @@ else if (sslContextFactoryInstance.hasKeystore()) // Otherwise if there's no keystore, not possible to establish an optional secure connection isOptional = false; } - return this; + return (T) this; } /** @@ -312,22 +273,22 @@ else if (sslContextFactoryInstance.hasKeystore()) * as the constructor for its implementation. * * @throws IllegalArgumentException in case any pre-defined key, as per {@link ConfigKey}, for the encryption - * options is duplicated in the parameterized keys. + * options is duplicated in the parameterized keys. */ - private void prepareSslContextFactoryParameterizedKeys(Map sslContextFactoryParameters) + private void prepareSslContextFactoryParameterizedKeys(Map sslContextFactoryParameters) { if (ssl_context_factory.parameters != null) { Set configKeys = ConfigKey.asSet(); for (Map.Entry entry : ssl_context_factory.parameters.entrySet()) { - if(configKeys.contains(toLowerCaseLocalized(entry.getKey()))) + if (configKeys.contains(toLowerCaseLocalized(entry.getKey()))) { - throw new IllegalArgumentException("SslContextFactory "+ssl_context_factory.class_name+" should " + - "configure '"+entry.getKey()+"' as encryption_options instead of" + + throw new IllegalArgumentException("SslContextFactory " + ssl_context_factory.class_name + " should " + + "configure '" + entry.getKey() + "' as encryption_options instead of" + " parameterized keys"); } - sslContextFactoryParameters.put(entry.getKey(),entry.getValue()); + sslContextFactoryParameters.put(entry.getKey(), entry.getValue()); } } } @@ -375,7 +336,8 @@ private void initializeSslContextFactory() protected static void putSslContextFactoryParameter(Map existingParameters, ConfigKey configKey, Object value) { - if (value != null) { + if (value != null) + { existingParameters.put(configKey.toString(), value); } } @@ -480,9 +442,9 @@ public List getAcceptedProtocols() return sslContextFactoryInstance == null ? null : sslContextFactoryInstance.getAcceptedProtocols(); } - public ClientAuth getClientAuth() + public ClientEncryptionOptions.ClientAuth getClientAuth() { - return this.require_client_auth == null ? ClientAuth.NOT_REQUIRED : ClientAuth.from(this.require_client_auth); + return this.require_client_auth == null ? ClientEncryptionOptions.ClientAuth.NOT_REQUIRED : ClientEncryptionOptions.ClientAuth.from(this.require_client_auth); } public String[] acceptedProtocolsArray() @@ -517,162 +479,6 @@ else if (getEnabled()) } } - public EncryptionOptions withSslContextFactory(ParameterizedClass sslContextFactoryClass) - { - return new EncryptionOptions(sslContextFactoryClass, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withKeyStore(String keystore) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withKeyStorePassword(String keystore_password) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withKeyStorePasswordFile(String keystore_password_file) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withTrustStore(String truststore) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withTrustStorePassword(String truststore_password) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withTrustStorePasswordFile(String truststore_password_file) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withCipherSuites(List cipher_suites) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withCipherSuites(String... cipher_suites) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, ImmutableList.copyOf(cipher_suites), protocol, - accepted_protocols, algorithm, store_type, require_client_auth, - require_endpoint_verification, enabled, optional, max_certificate_validity_period, - max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withProtocol(String protocol) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - - public EncryptionOptions withAcceptedProtocols(List accepted_protocols) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols == null ? null : - ImmutableList.copyOf(accepted_protocols), - algorithm, store_type, require_client_auth, require_endpoint_verification, - enabled, optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - - public EncryptionOptions withAlgorithm(String algorithm) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withStoreType(String store_type) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withRequireClientAuth(ClientAuth require_client_auth) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth.value, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withRequireEndpointVerification(boolean require_endpoint_verification) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withEnabled(boolean enabled) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withOptional(Boolean optional) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, max_certificate_validity_period).applyConfig(); - } - - public EncryptionOptions withMaxCertificateValidityPeriod(DurationSpec.IntMinutesBound maxCertificateValidityPeriod) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, maxCertificateValidityPeriod, certificate_validity_warn_threshold).applyConfig(); - } - - public EncryptionOptions withCertificateValidityWarnThreshold(DurationSpec.IntMinutesBound certificateValidityWarnThreshold) - { - return new EncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, - truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, - store_type, require_client_auth, require_endpoint_verification, enabled, - optional, max_certificate_validity_period, certificateValidityWarnThreshold).applyConfig(); - } - /** * The method is being mainly used to cache SslContexts therefore, we only consider * fields that would make a difference when the TrustStore or KeyStore files are updated @@ -685,7 +491,7 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) return false; - EncryptionOptions opt = (EncryptionOptions)o; + EncryptionOptions opt = (EncryptionOptions) o; return enabled == opt.enabled && optional == opt.optional && require_client_auth.equals(opt.require_client_auth) && @@ -731,8 +537,258 @@ public int hashCode() return result; } - public static class ServerEncryptionOptions extends EncryptionOptions + public static abstract class Builder> { + ParameterizedClass ssl_context_factory; + String keystore; + String keystore_password; + String keystore_password_file; + String truststore; + String truststore_password; + String truststore_password_file; + List cipher_suites; + String protocol; + List accepted_protocols; + String algorithm; + String store_type; + String require_client_auth; + boolean require_endpoint_verification; + DurationSpec.IntMinutesBound max_certificate_validity_period; + DurationSpec.IntMinutesBound certificate_validity_warn_threshold; + Boolean enabled; + Boolean optional; + Boolean isEnabled; + Boolean isOptional; + + public Builder(EncryptionOptions options) + { + ssl_context_factory = options.ssl_context_factory; + keystore = options.keystore; + keystore_password = options.keystore_password; + keystore_password_file = options.keystore_password_file; + truststore = options.truststore; + truststore_password = options.truststore_password; + truststore_password_file = options.truststore_password_file; + cipher_suites = options.cipher_suites; + protocol = options.protocol; + accepted_protocols = options.accepted_protocols; + algorithm = options.algorithm; + store_type = options.store_type; + require_client_auth = options.require_client_auth; + require_endpoint_verification = options.require_endpoint_verification; + enabled = options.enabled; + optional = options.optional; + max_certificate_validity_period = options.max_certificate_validity_period; + certificate_validity_warn_threshold = options.certificate_validity_warn_threshold; + } + + public Builder withSslContextFactory(ParameterizedClass sslContextFactoryClass) + { + this.ssl_context_factory = sslContextFactoryClass; + return this; + } + + public Builder withKeyStore(String keystore) + { + this.keystore = keystore; + return this; + } + + public Builder withKeyStorePassword(String keystore_password) + { + this.keystore_password = keystore_password; + return this; + } + + public Builder withKeyStorePasswordFile(String keystore_password_file) + { + this.keystore_password_file = keystore_password_file; + return this; + } + + public Builder withTrustStore(String truststore) + { + this.truststore = truststore; + return this; + } + + public Builder withTrustStorePassword(String truststore_password) + { + this.truststore_password = truststore_password; + return this; + } + + public Builder withTrustStorePasswordFile(String truststore_password_file) + { + this.truststore_password_file = truststore_password_file; + return this; + } + + public Builder withCipherSuites(List cipher_suites) + { + this.cipher_suites = cipher_suites; + return this; + } + + public Builder withCipherSuites(String... cipher_suites) + { + this.cipher_suites = ImmutableList.copyOf(cipher_suites); + return this; + } + + public Builder withProtocol(String protocol) + { + this.protocol = protocol; + return this; + } + + public Builder withAcceptedProtocols(List accepted_protocols) + { + this.accepted_protocols = accepted_protocols == null ? null : + ImmutableList.copyOf(accepted_protocols); + return this; + } + + public Builder withAlgorithm(String algorithm) + { + this.algorithm = algorithm; + return this; + } + + public Builder withStoreType(String store_type) + { + this.store_type = store_type; + return this; + } + + public Builder withRequireClientAuth(ClientEncryptionOptions.ClientAuth require_client_auth) + { + this.require_client_auth = require_client_auth.value; + return this; + } + + public Builder withRequireEndpointVerification(boolean require_endpoint_verification) + { + this.require_endpoint_verification = require_endpoint_verification; + return this; + } + + public Builder withEnabled(boolean enabled) + { + this.enabled = enabled; + return this; + } + + public Builder withOptional(Boolean optional) + { + this.optional = optional; + return this; + } + + public Builder withMaxCertificateValidityPeriod(DurationSpec.IntMinutesBound maxCertificateValidityPeriod) + { + this.max_certificate_validity_period = maxCertificateValidityPeriod; + return this; + } + + public Builder withCertificateValidityWarnThreshold(DurationSpec.IntMinutesBound certificateValidityWarnThreshold) + { + this.certificate_validity_warn_threshold = certificateValidityWarnThreshold; + return this; + } + + public abstract T build(); + } + + public static class ClientEncryptionOptions extends EncryptionOptions + { + public ClientEncryptionOptions() + { + } + + public ClientEncryptionOptions(ParameterizedClass ssl_context_factory, + String keystore, String keystore_password, String keystore_password_file, + String truststore, String truststore_password, String truststore_password_file, + List cipher_suites, String protocol, List accepted_protocols, + String algorithm, String store_type, String require_client_auth, + boolean require_endpoint_verification, Boolean enabled, Boolean optional, + DurationSpec.IntMinutesBound max_certificate_validity_period, + DurationSpec.IntMinutesBound certificate_validity_warn_threshold) + { + super(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, truststore_password, + truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, store_type, require_client_auth, + require_endpoint_verification, enabled, optional, max_certificate_validity_period, certificate_validity_warn_threshold); + } + + public ClientEncryptionOptions(ClientEncryptionOptions options) + { + super(options); + } + + public enum ClientAuth + { + REQUIRED("true"), + NOT_REQUIRED("false"), + OPTIONAL("optional"); + private final String value; + private static final Map VALUES = new HashMap<>(); + + static + { + for (ClientAuth clientAuth : ClientAuth.values()) + { + VALUES.put(clientAuth.value, clientAuth); + VALUES.put(toLowerCaseLocalized(clientAuth.name()), clientAuth); + } + } + + ClientAuth(String value) + { + this.value = value; + } + + public static ClientAuth from(String value) + { + if (VALUES.containsKey(toLowerCaseLocalized(value))) + { + return VALUES.get(toLowerCaseLocalized(value)); + } + throw new ConfigurationException(value + " is not a valid ClientAuth option"); + } + + public String value() + { + return value; + } + } + + public static class Builder extends EncryptionOptions.Builder + { + public Builder() + { + this(new ClientEncryptionOptions()); + } + + public Builder(ClientEncryptionOptions options) + { + super(options); + } + + @Override + public ClientEncryptionOptions build() + { + return new ClientEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, truststore, + truststore_password, truststore_password_file, cipher_suites, protocol, accepted_protocols, algorithm, + store_type, require_client_auth, require_endpoint_verification, enabled, + optional, max_certificate_validity_period, certificate_validity_warn_threshold).applyConfig(); + } + } + } + + public static class ServerEncryptionOptions extends EncryptionOptions + { + private static final Logger logger = LoggerFactory.getLogger(ServerEncryptionOptions.class); + public enum InternodeEncryption { all, none, dc, rack @@ -788,6 +844,73 @@ public ServerEncryptionOptions(ServerEncryptionOptions options) this.outbound_keystore_password_file = options.outbound_keystore_password_file; } + public static class Builder extends EncryptionOptions.Builder + { + private InternodeEncryption internode_encryption; + private boolean legacy_ssl_storage_port_enabled; + private String outbound_keystore; + private String outbound_keystore_password; + private String outbound_keystore_password_file; + + public Builder() + { + this(new ServerEncryptionOptions()); + } + + public Builder(ServerEncryptionOptions options) + { + super(options); + this.internode_encryption = options.internode_encryption; + this.legacy_ssl_storage_port_enabled = options.legacy_ssl_storage_port_enabled; + this.outbound_keystore = options.outbound_keystore; + this.outbound_keystore_password = options.outbound_keystore_password; + this.outbound_keystore_password_file = options.outbound_keystore_password_file; + } + + public Builder withInternodeEncryption(InternodeEncryption internode_encryption) + { + this.internode_encryption = internode_encryption; + return this; + } + + public Builder withLegacySslStoragePort(boolean enable_legacy_ssl_storage_port) + { + this.legacy_ssl_storage_port_enabled = enable_legacy_ssl_storage_port; + return this; + } + + public Builder withOutboundKeystore(String outboundKeystore) + { + this.outbound_keystore = outboundKeystore; + return this; + } + + public Builder withOutboundKeystorePassword(String outboundKeystorePassword) + { + this.outbound_keystore_password = outboundKeystorePassword; + return this; + } + + public Builder withOutboundKeystorePasswordFile(String outboundKeystorePasswordFile) + { + this.outbound_keystore_password_file = outboundKeystorePasswordFile; + return this; + } + + @Override + public ServerEncryptionOptions build() + { + return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, + outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, + truststore, truststore_password, truststore_password_file, + cipher_suites, protocol, accepted_protocols, + algorithm, store_type, require_client_auth, + require_endpoint_verification, optional, internode_encryption, + legacy_ssl_storage_port_enabled, max_certificate_validity_period, + certificate_validity_warn_threshold).applyConfig(); + } + } + @Override protected void fillSslContextParams(Map sslContextFactoryParameters) { @@ -798,7 +921,7 @@ protected void fillSslContextParams(Map sslContextFactoryParamet } @Override - public EncryptionOptions applyConfig() + public ServerEncryptionOptions applyConfig() { return applyConfigInternal(); } @@ -814,13 +937,13 @@ private ServerEncryptionOptions applyConfigInternal() logger.warn("Setting server_encryption_options.enabled has no effect, use internode_encryption"); } - if (getClientAuth() != ClientAuth.NOT_REQUIRED && (internode_encryption == InternodeEncryption.rack || internode_encryption == InternodeEncryption.dc)) + if (getClientAuth() != ClientEncryptionOptions.ClientAuth.NOT_REQUIRED && (internode_encryption == InternodeEncryption.rack || internode_encryption == InternodeEncryption.dc)) { logger.warn("Setting require_client_auth is incompatible with 'rack' and 'dc' internode_encryption values." - + " It is possible for an internode connection to pretend to be in the same rack/dc by spoofing" - + " its broadcast address in the handshake and bypass authentication. To ensure that mutual TLS" - + " authentication is not bypassed, please set internode_encryption to 'all'. Continuing with" - + " insecure configuration."); + + " It is possible for an internode connection to pretend to be in the same rack/dc by spoofing" + + " its broadcast address in the handshake and bypass authentication. To ensure that mutual TLS" + + " authentication is not bypassed, please set internode_encryption to 'all'. Continuing with" + + " insecure configuration."); } // regardless of the optional flag, if the internode encryption is set to rack or dc @@ -910,284 +1033,5 @@ public int hashCode() result += 31 * (outbound_keystore_password_file == null ? 0 : outbound_keystore_password_file.hashCode()); return result; } - - @Override - public ServerEncryptionOptions withSslContextFactory(ParameterizedClass sslContextFactoryClass) - { - return new ServerEncryptionOptions(sslContextFactoryClass, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withKeyStore(String keystore) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withKeyStorePassword(String keystore_password) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withKeyStorePasswordFile(String keystore_password_file) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withTrustStore(String truststore) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withTrustStorePassword(String truststore_password) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withTrustStorePasswordFile(String truststore_password_file) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withCipherSuites(List cipher_suites) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withCipherSuites(String... cipher_suites) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - Arrays.asList(cipher_suites), protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withProtocol(String protocol) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withAcceptedProtocols(List accepted_protocols) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withAlgorithm(String algorithm) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withStoreType(String store_type) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withRequireClientAuth(ClientAuth require_client_auth) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth.value, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - @Override - public ServerEncryptionOptions withRequireEndpointVerification(boolean require_endpoint_verification) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withOptional(boolean optional) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withInternodeEncryption(InternodeEncryption internode_encryption) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withLegacySslStoragePort(boolean enable_legacy_ssl_storage_port) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - enable_legacy_ssl_storage_port, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withOutboundKeystore(String outboundKeystore) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outboundKeystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withOutboundKeystorePassword(String outboundKeystorePassword) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outboundKeystorePassword, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - - public ServerEncryptionOptions withOutboundKeystorePasswordFile(String outboundKeystorePasswordFile) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outboundKeystorePasswordFile, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, max_certificate_validity_period, - max_certificate_validity_period).applyConfigInternal(); - } - @Override - public ServerEncryptionOptions withMaxCertificateValidityPeriod(DurationSpec.IntMinutesBound maxCertificateValidityPeriod) - { - return new ServerEncryptionOptions(ssl_context_factory, keystore, keystore_password, keystore_password_file, - outbound_keystore, outbound_keystore_password, outbound_keystore_password_file, - truststore, truststore_password, truststore_password_file, - cipher_suites, protocol, accepted_protocols, - algorithm, store_type, require_client_auth, - require_endpoint_verification, optional, internode_encryption, - legacy_ssl_storage_port_enabled, maxCertificateValidityPeriod, - certificate_validity_warn_threshold).applyConfigInternal(); - } } } diff --git a/src/java/org/apache/cassandra/config/JMXServerOptions.java b/src/java/org/apache/cassandra/config/JMXServerOptions.java index 705ab02bb898..ab80307e5460 100644 --- a/src/java/org/apache/cassandra/config/JMXServerOptions.java +++ b/src/java/org/apache/cassandra/config/JMXServerOptions.java @@ -52,7 +52,7 @@ public class JMXServerOptions public final Boolean authenticate; // ssl options - public final EncryptionOptions jmx_encryption_options; + public final EncryptionOptions.ClientEncryptionOptions jmx_encryption_options; // options for using Cassandra's own authentication mechanisms public final String login_config_name; @@ -71,11 +71,11 @@ public class JMXServerOptions public JMXServerOptions() { this(true, false, 7199, 0, false, - new EncryptionOptions(), null, null, null, + new EncryptionOptions.ClientEncryptionOptions(), null, null, null, null, null); } - public static JMXServerOptions create(boolean enabled, boolean local, int jmxPort, EncryptionOptions options) + public static JMXServerOptions create(boolean enabled, boolean local, int jmxPort, EncryptionOptions.ClientEncryptionOptions options) { return new JMXServerOptions(enabled, !local, jmxPort, 0, false, options, null, null, null, @@ -95,7 +95,7 @@ public JMXServerOptions(Boolean enabled, int jmxPort, int rmiPort, Boolean authenticate, - EncryptionOptions jmx_encryption_options, + EncryptionOptions.ClientEncryptionOptions jmx_encryption_options, String loginConfigName, String loginConfigFile, String passwordFile, @@ -198,7 +198,8 @@ public static JMXServerOptions createParsingSystemProperties() // in the `cassandra.yaml`. Since the JMX SSL Config can also leverage it as per CASSANDRA-18508, password file // support is not added to the JMX SSL configuration via the system properties. Hence, `null` is used as // the password file arguments for the keystore and the truststore while constructing the encryption options here. - EncryptionOptions encryptionOptions = new EncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", new HashMap<>()), + + EncryptionOptions.ClientEncryptionOptions encryptionOptions = new EncryptionOptions.ClientEncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", new HashMap<>()), keystore, keystorePassword, null, diff --git a/src/java/org/apache/cassandra/net/InboundConnectionInitiator.java b/src/java/org/apache/cassandra/net/InboundConnectionInitiator.java index fd5710e29773..c4cbc50a2b2b 100644 --- a/src/java/org/apache/cassandra/net/InboundConnectionInitiator.java +++ b/src/java/org/apache/cassandra/net/InboundConnectionInitiator.java @@ -67,7 +67,7 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.cassandra.auth.IInternodeAuthenticator.InternodeConnectionDirection.INBOUND; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.net.InternodeConnectionUtils.DISCARD_HANDLER_NAME; import static org.apache.cassandra.net.InternodeConnectionUtils.SSL_FACTORY_CONTEXT_DESCRIPTION; import static org.apache.cassandra.net.InternodeConnectionUtils.SSL_HANDLER_NAME; @@ -530,7 +530,7 @@ void setupMessagingPipeline(InetAddressAndPort from, int useMessagingVersion, in private static SslHandler getSslHandler(String description, Channel channel, EncryptionOptions.ServerEncryptionOptions encryptionOptions) throws IOException { - final EncryptionOptions.ClientAuth verifyPeerCertificate = REQUIRED; + final EncryptionOptions.ClientEncryptionOptions.ClientAuth verifyPeerCertificate = REQUIRED; SslContext sslContext = SSLFactory.getOrCreateSslContext(encryptionOptions, verifyPeerCertificate, ISslContextFactory.SocketType.SERVER, SSL_FACTORY_CONTEXT_DESCRIPTION); diff --git a/src/java/org/apache/cassandra/net/InboundConnectionSettings.java b/src/java/org/apache/cassandra/net/InboundConnectionSettings.java index 448da62cbb8c..4a9db7ca8112 100644 --- a/src/java/org/apache/cassandra/net/InboundConnectionSettings.java +++ b/src/java/org/apache/cassandra/net/InboundConnectionSettings.java @@ -146,7 +146,7 @@ public InboundConnectionSettings withLegacySslStoragePortDefaults() ServerEncryptionOptions encryption = this.encryption; if (encryption == null) encryption = DatabaseDescriptor.getInternodeMessagingEncyptionOptions(); - encryption = encryption.withOptional(false).withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + encryption = new ServerEncryptionOptions.Builder(encryption).withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all).withOptional(false).build(); return this.withBindAddress(bindAddress.withPort(DatabaseDescriptor.getSSLStoragePort())) .withEncryption(encryption) diff --git a/src/java/org/apache/cassandra/net/OutboundConnectionInitiator.java b/src/java/org/apache/cassandra/net/OutboundConnectionInitiator.java index 27e55f6105bf..2bddc174f2e8 100644 --- a/src/java/org/apache/cassandra/net/OutboundConnectionInitiator.java +++ b/src/java/org/apache/cassandra/net/OutboundConnectionInitiator.java @@ -66,9 +66,9 @@ import static java.util.concurrent.TimeUnit.*; import static org.apache.cassandra.auth.IInternodeAuthenticator.InternodeConnectionDirection.OUTBOUND; import static org.apache.cassandra.auth.IInternodeAuthenticator.InternodeConnectionDirection.OUTBOUND_PRECONNECT; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.OPTIONAL; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.net.InternodeConnectionUtils.DISCARD_HANDLER_NAME; import static org.apache.cassandra.net.InternodeConnectionUtils.SSL_FACTORY_CONTEXT_DESCRIPTION; import static org.apache.cassandra.net.InternodeConnectionUtils.SSL_HANDLER_NAME; @@ -245,7 +245,7 @@ public void initChannel(SocketChannel channel) throws Exception private SslContext getSslContext(SslFallbackConnectionType connectionType) throws IOException { - EncryptionOptions.ClientAuth requireClientAuth = NOT_REQUIRED; + EncryptionOptions.ClientEncryptionOptions.ClientAuth requireClientAuth = NOT_REQUIRED; if (connectionType == SslFallbackConnectionType.MTLS ) { requireClientAuth = REQUIRED; diff --git a/src/java/org/apache/cassandra/security/AbstractSslContextFactory.java b/src/java/org/apache/cassandra/security/AbstractSslContextFactory.java index c2a7fdb97510..0fc8f94352d8 100644 --- a/src/java/org/apache/cassandra/security/AbstractSslContextFactory.java +++ b/src/java/org/apache/cassandra/security/AbstractSslContextFactory.java @@ -37,8 +37,8 @@ import io.netty.handler.ssl.SslProvider; import org.apache.cassandra.config.EncryptionOptions; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_TCACTIVE_OPENSSL; @@ -68,7 +68,7 @@ abstract public class AbstractSslContextFactory implements ISslContextFactory protected final List accepted_protocols; protected final String algorithm; protected final String store_type; - protected final EncryptionOptions.ClientAuth clientAuth; + protected final EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth; protected final boolean require_endpoint_verification; /* ServerEncryptionOptions does not use the enabled flag at all instead using the existing @@ -105,7 +105,7 @@ protected AbstractSslContextFactory(Map parameters) accepted_protocols = getStringList("accepted_protocols"); algorithm = getString("algorithm"); store_type = getString("store_type", "JKS"); - clientAuth = parameters.get("require_client_auth") == null ? NOT_REQUIRED : EncryptionOptions.ClientAuth.from(getString("require_client_auth")); + clientAuth = parameters.get("require_client_auth") == null ? NOT_REQUIRED : EncryptionOptions.ClientEncryptionOptions.ClientAuth.from(getString("require_client_auth")); require_endpoint_verification = getBoolean("require_endpoint_verification", false); enabled = getBoolean("enabled"); optional = getBoolean("optional"); @@ -158,7 +158,7 @@ public SSLContext createJSSESslContext(boolean verifyPeerCertificate) throws SSL } @Override - public SSLContext createJSSESslContext(EncryptionOptions.ClientAuth clientAuth) throws SSLException + public SSLContext createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) throws SSLException { TrustManager[] trustManagers = null; if (clientAuth != NOT_REQUIRED) @@ -186,7 +186,7 @@ public SslContext createNettySslContext(boolean verifyPeerCertificate, SocketTyp } @Override - public SslContext createNettySslContext(EncryptionOptions.ClientAuth clientAuth, SocketType socketType, + public SslContext createNettySslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, CipherSuiteFilter cipherFilter) throws SSLException { /* @@ -291,7 +291,7 @@ protected SslProvider getSslProvider() */ abstract protected KeyManagerFactory buildOutboundKeyManagerFactory() throws SSLException; - private ClientAuth toNettyClientAuth(EncryptionOptions.ClientAuth clientAuth) + private ClientAuth toNettyClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) { switch (clientAuth) { diff --git a/src/java/org/apache/cassandra/security/ISslContextFactory.java b/src/java/org/apache/cassandra/security/ISslContextFactory.java index 1db5f579b16d..2f8b53d16694 100644 --- a/src/java/org/apache/cassandra/security/ISslContextFactory.java +++ b/src/java/org/apache/cassandra/security/ISslContextFactory.java @@ -72,7 +72,7 @@ public interface ISslContextFactory * @return JSSE's {@link SSLContext} * @throws SSLException in case the Ssl Context creation fails for some reason */ - default SSLContext createJSSESslContext(EncryptionOptions.ClientAuth clientAuth) throws SSLException + default SSLContext createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) throws SSLException { switch (clientAuth) { @@ -112,7 +112,7 @@ SslContext createNettySslContext(boolean verifyPeerCertificate, SocketType socke * @return Netty's {@link SslContext} * @throws SSLException in case the Ssl Context creation fails for some reason */ - default SslContext createNettySslContext(EncryptionOptions.ClientAuth clientAuth, SocketType socketType, + default SslContext createNettySslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, CipherSuiteFilter cipherFilter) throws SSLException { switch (clientAuth) diff --git a/src/java/org/apache/cassandra/security/SSLFactory.java b/src/java/org/apache/cassandra/security/SSLFactory.java index a9b4be9d5c39..65a6dfa24f1a 100644 --- a/src/java/org/apache/cassandra/security/SSLFactory.java +++ b/src/java/org/apache/cassandra/security/SSLFactory.java @@ -47,7 +47,7 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_TCACTIVE_OPENSSL; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; /** @@ -125,7 +125,7 @@ public static List tlsInstanceProtocolSubstitution() /** * Create a JSSE {@link SSLContext}. */ - public static SSLContext createSSLContext(EncryptionOptions options, EncryptionOptions.ClientAuth clientAuth) throws IOException + public static SSLContext createSSLContext(EncryptionOptions options, EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) throws IOException { return options.sslContextFactoryInstance.createJSSESslContext(clientAuth); } @@ -133,7 +133,7 @@ public static SSLContext createSSLContext(EncryptionOptions options, EncryptionO /** * get a netty {@link SslContext} instance */ - public static SslContext getOrCreateSslContext(EncryptionOptions options, EncryptionOptions.ClientAuth clientAuth, + public static SslContext getOrCreateSslContext(EncryptionOptions options, EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, String contextDescription) throws IOException { @@ -157,7 +157,7 @@ public static SslContext getOrCreateSslContext(EncryptionOptions options, Encryp /** * Create a Netty {@link SslContext} */ - static SslContext createNettySslContext(EncryptionOptions options, EncryptionOptions.ClientAuth clientAuth, + static SslContext createNettySslContext(EncryptionOptions options, EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType) throws IOException { return createNettySslContext(options, clientAuth, socketType, @@ -167,7 +167,7 @@ static SslContext createNettySslContext(EncryptionOptions options, EncryptionOpt /** * Create a Netty {@link SslContext} with a supplied cipherFilter */ - static SslContext createNettySslContext(EncryptionOptions options, EncryptionOptions.ClientAuth clientAuth, + static SslContext createNettySslContext(EncryptionOptions options, EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, CipherSuiteFilter cipherFilter) throws IOException { return options.sslContextFactoryInstance.createNettySslContext(clientAuth, socketType, @@ -356,7 +356,7 @@ private static boolean filterOutSSLv2Hello(String string) return !string.equals("SSLv2Hello"); } - public static void validateSslContext(String contextDescription, EncryptionOptions options, EncryptionOptions.ClientAuth clientAuth, boolean logProtocolAndCiphers) throws IOException + public static void validateSslContext(String contextDescription, EncryptionOptions options, EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, boolean logProtocolAndCiphers) throws IOException { if (options != null && options.tlsEncryptionPolicy() != EncryptionOptions.TlsEncryptionPolicy.UNENCRYPTED) { diff --git a/src/java/org/apache/cassandra/tools/BulkLoader.java b/src/java/org/apache/cassandra/tools/BulkLoader.java index ecd6a5d31b70..92ccbb96be7c 100644 --- a/src/java/org/apache/cassandra/tools/BulkLoader.java +++ b/src/java/org/apache/cassandra/tools/BulkLoader.java @@ -48,7 +48,7 @@ import org.apache.cassandra.utils.NativeSSTableLoaderClient; import org.apache.cassandra.utils.OutputHandler; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.utils.Clock.Global.nanoTime; public class BulkLoader diff --git a/src/java/org/apache/cassandra/tools/LoaderOptions.java b/src/java/org/apache/cassandra/tools/LoaderOptions.java index c3d2072ff437..a68fb012eb0b 100644 --- a/src/java/org/apache/cassandra/tools/LoaderOptions.java +++ b/src/java/org/apache/cassandra/tools/LoaderOptions.java @@ -52,7 +52,7 @@ import org.apache.cassandra.tools.BulkLoader.CmdLineOptions; import static org.apache.cassandra.config.DataRateSpec.DataRateUnit.MEBIBYTES_PER_SECOND; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; public class LoaderOptions { @@ -121,7 +121,7 @@ public class LoaderOptions public final int entireSSTableInterDcThrottleMebibytes; public final int storagePort; public final int sslStoragePort; - public final EncryptionOptions clientEncOptions; + public final EncryptionOptions.ClientEncryptionOptions clientEncOptions; public final int connectionsPerHost; public final EncryptionOptions.ServerEncryptionOptions serverEncOptions; public final Set hosts; @@ -172,9 +172,11 @@ static class Builder int storagePort; int sslStoragePort; - EncryptionOptions clientEncOptions = new EncryptionOptions(); + EncryptionOptions.ClientEncryptionOptions clientEncOptions = new EncryptionOptions.ClientEncryptionOptions(); + EncryptionOptions.ClientEncryptionOptions.Builder clientEncOptionsBuilder = new EncryptionOptions.ClientEncryptionOptions.Builder(clientEncOptions); int connectionsPerHost = 1; EncryptionOptions.ServerEncryptionOptions serverEncOptions = new EncryptionOptions.ServerEncryptionOptions(); + EncryptionOptions.ServerEncryptionOptions.Builder serverEncOptionsBuilder = new EncryptionOptions.ServerEncryptionOptions.Builder(serverEncOptions); Set hostsArg = new HashSet<>(); Set ignoresArg = new HashSet<>(); Set hosts = new HashSet<>(); @@ -333,9 +335,10 @@ public Builder sslStoragePort(int sslStoragePort) return this; } - public Builder encOptions(EncryptionOptions encOptions) + public Builder encOptions(EncryptionOptions.ClientEncryptionOptions encOptions) { this.clientEncOptions = encOptions; + this.clientEncOptionsBuilder = new EncryptionOptions.ClientEncryptionOptions.Builder(encOptions); return this; } @@ -348,6 +351,7 @@ public Builder connectionsPerHost(int connectionsPerHost) public Builder serverEncOptions(EncryptionOptions.ServerEncryptionOptions serverEncOptions) { this.serverEncOptions = serverEncOptions; + this.serverEncOptionsBuilder = new EncryptionOptions.ServerEncryptionOptions.Builder(serverEncOptions); return this; } @@ -551,9 +555,10 @@ public Builder parseArgs(String cmdArgs[]) "which is able to handle encrypted communication too."); // Copy the encryption options and apply the config so that argument parsing can accesss isEnabled. - clientEncOptions = config.client_encryption_options.applyConfig(); - serverEncOptions = config.server_encryption_options; - serverEncOptions.applyConfig(); + clientEncOptionsBuilder = new EncryptionOptions.ClientEncryptionOptions.Builder(config.client_encryption_options); + clientEncOptions = clientEncOptionsBuilder.build(); + serverEncOptionsBuilder = new EncryptionOptions.ServerEncryptionOptions.Builder(config.server_encryption_options); + serverEncOptions = serverEncOptionsBuilder.build(); if (cmd.hasOption(NATIVE_PORT_OPTION)) nativePort = Integer.parseInt(cmd.getOptionValue(NATIVE_PORT_OPTION)); @@ -625,51 +630,53 @@ public Builder parseArgs(String cmdArgs[]) if (cmd.hasOption(SSL_TRUSTSTORE) || cmd.hasOption(SSL_TRUSTSTORE_PW) || cmd.hasOption(SSL_KEYSTORE) || cmd.hasOption(SSL_KEYSTORE_PW)) { - clientEncOptions = clientEncOptions.withEnabled(true); + clientEncOptionsBuilder.withEnabled(true); } if (cmd.hasOption(SSL_TRUSTSTORE)) { - clientEncOptions = clientEncOptions.withTrustStore(cmd.getOptionValue(SSL_TRUSTSTORE)); + clientEncOptionsBuilder.withTrustStore(cmd.getOptionValue(SSL_TRUSTSTORE)); } if (cmd.hasOption(SSL_TRUSTSTORE_PW)) { - clientEncOptions = clientEncOptions.withTrustStorePassword(cmd.getOptionValue(SSL_TRUSTSTORE_PW)); + clientEncOptionsBuilder.withTrustStorePassword(cmd.getOptionValue(SSL_TRUSTSTORE_PW)); } if (cmd.hasOption(SSL_KEYSTORE)) { // if a keystore was provided, lets assume we'll need to use - clientEncOptions = clientEncOptions.withKeyStore(cmd.getOptionValue(SSL_KEYSTORE)) + clientEncOptionsBuilder.withKeyStore(cmd.getOptionValue(SSL_KEYSTORE)) .withRequireClientAuth(REQUIRED); } if (cmd.hasOption(SSL_KEYSTORE_PW)) { - clientEncOptions = clientEncOptions.withKeyStorePassword(cmd.getOptionValue(SSL_KEYSTORE_PW)); + clientEncOptionsBuilder.withKeyStorePassword(cmd.getOptionValue(SSL_KEYSTORE_PW)); } if (cmd.hasOption(SSL_PROTOCOL)) { - clientEncOptions = clientEncOptions.withProtocol(cmd.getOptionValue(SSL_PROTOCOL)); + clientEncOptionsBuilder.withProtocol(cmd.getOptionValue(SSL_PROTOCOL)); } if (cmd.hasOption(SSL_ALGORITHM)) { - clientEncOptions = clientEncOptions.withAlgorithm(cmd.getOptionValue(SSL_ALGORITHM)); + clientEncOptionsBuilder.withAlgorithm(cmd.getOptionValue(SSL_ALGORITHM)); } if (cmd.hasOption(SSL_STORE_TYPE)) { - clientEncOptions = clientEncOptions.withStoreType(cmd.getOptionValue(SSL_STORE_TYPE)); + clientEncOptionsBuilder.withStoreType(cmd.getOptionValue(SSL_STORE_TYPE)); } if (cmd.hasOption(SSL_CIPHER_SUITES)) { - clientEncOptions = clientEncOptions.withCipherSuites(cmd.getOptionValue(SSL_CIPHER_SUITES).split(",")); + clientEncOptionsBuilder.withCipherSuites(cmd.getOptionValue(SSL_CIPHER_SUITES).split(",")); } + clientEncOptions = clientEncOptionsBuilder.build(); + if (cmd.hasOption(TARGET_KEYSPACE)) { targetKeyspace = cmd.getOptionValue(TARGET_KEYSPACE); diff --git a/src/java/org/apache/cassandra/transport/Client.java b/src/java/org/apache/cassandra/transport/Client.java index 96fea832a857..6c60f6508c16 100644 --- a/src/java/org/apache/cassandra/transport/Client.java +++ b/src/java/org/apache/cassandra/transport/Client.java @@ -45,9 +45,9 @@ public class Client extends SimpleClient { private final SimpleEventHandler eventHandler = new SimpleEventHandler(); - public Client(String host, int port, ProtocolVersion version, EncryptionOptions encryptionOptions) + public Client(String host, int port, ProtocolVersion version, EncryptionOptions.ClientEncryptionOptions encryptionOptions) { - super(host, port, version, version.isBeta(), new EncryptionOptions(encryptionOptions).applyConfig()); + super(host, port, version, version.isBeta(), encryptionOptions.applyConfig()); setEventHandler(eventHandler); } @@ -260,7 +260,7 @@ public static void main(String[] args) throws Exception int port = Integer.parseInt(args[1]); ProtocolVersion version = args.length == 3 ? ProtocolVersion.decode(Integer.parseInt(args[2]), DatabaseDescriptor.getNativeTransportAllowOlderProtocols()) : ProtocolVersion.CURRENT; - EncryptionOptions encryptionOptions = new EncryptionOptions().applyConfig(); + EncryptionOptions.ClientEncryptionOptions encryptionOptions = new EncryptionOptions.ClientEncryptionOptions().applyConfig(); System.out.println("CQL binary protocol console " + host + "@" + port + " using native protocol version " + version); try (Client client = new Client(host, port, version, encryptionOptions)) diff --git a/src/java/org/apache/cassandra/transport/SimpleClient.java b/src/java/org/apache/cassandra/transport/SimpleClient.java index f86b128be0ff..dea3535757f6 100644 --- a/src/java/org/apache/cassandra/transport/SimpleClient.java +++ b/src/java/org/apache/cassandra/transport/SimpleClient.java @@ -74,7 +74,7 @@ public class SimpleClient implements Closeable public final String host; public final int port; - private final EncryptionOptions encryptionOptions; + private final EncryptionOptions.ClientEncryptionOptions encryptionOptions; private final int largeMessageThreshold; protected final ResponseHandler responseHandler = new ResponseHandler(); @@ -92,7 +92,7 @@ public static class Builder { private final String host; private final int port; - private EncryptionOptions encryptionOptions = new EncryptionOptions(); + private EncryptionOptions.ClientEncryptionOptions encryptionOptions = new EncryptionOptions.ClientEncryptionOptions(); private ProtocolVersion version = ProtocolVersion.CURRENT; private boolean useBeta = false; private int largeMessageThreshold = FrameEncoder.Payload.MAX_SIZE; @@ -103,7 +103,7 @@ private Builder(String host, int port) this.port = port; } - public Builder encryption(EncryptionOptions options) + public Builder encryption(EncryptionOptions.ClientEncryptionOptions options) { this.encryptionOptions = options; return this; @@ -149,22 +149,22 @@ private SimpleClient(Builder builder) this.largeMessageThreshold = builder.largeMessageThreshold; } - public SimpleClient(String host, int port, ProtocolVersion version, EncryptionOptions encryptionOptions) + public SimpleClient(String host, int port, ProtocolVersion version, EncryptionOptions.ClientEncryptionOptions encryptionOptions) { this(host, port, version, false, encryptionOptions); } - public SimpleClient(String host, int port, EncryptionOptions encryptionOptions) + public SimpleClient(String host, int port, EncryptionOptions.ClientEncryptionOptions encryptionOptions) { this(host, port, ProtocolVersion.CURRENT, encryptionOptions); } public SimpleClient(String host, int port, ProtocolVersion version) { - this(host, port, version, new EncryptionOptions()); + this(host, port, version, new EncryptionOptions.ClientEncryptionOptions()); } - public SimpleClient(String host, int port, ProtocolVersion version, boolean useBeta, EncryptionOptions encryptionOptions) + public SimpleClient(String host, int port, ProtocolVersion version, boolean useBeta, EncryptionOptions.ClientEncryptionOptions encryptionOptions) { this.host = host; this.port = port; @@ -172,7 +172,7 @@ public SimpleClient(String host, int port, ProtocolVersion version, boolean useB throw new IllegalArgumentException(String.format("Beta version of server used (%s), but USE_BETA flag is not set", version)); this.version = version; - this.encryptionOptions = new EncryptionOptions(encryptionOptions).applyConfig(); + this.encryptionOptions = encryptionOptions.applyConfig(); this.largeMessageThreshold = FrameEncoder.Payload.MAX_SIZE - Math.max(FrameEncoderCrc.HEADER_AND_TRAILER_LENGTH, FrameEncoderLZ4.HEADER_AND_TRAILER_LENGTH); @@ -180,7 +180,7 @@ public SimpleClient(String host, int port, ProtocolVersion version, boolean useB public SimpleClient(String host, int port) { - this(host, port, new EncryptionOptions()); + this(host, port, new EncryptionOptions.ClientEncryptionOptions()); } public SimpleClient connect(boolean useCompression) throws IOException diff --git a/src/java/org/apache/cassandra/utils/jmx/AbstractJmxSocketFactory.java b/src/java/org/apache/cassandra/utils/jmx/AbstractJmxSocketFactory.java index 3990c64ee7d4..ef68f524861e 100644 --- a/src/java/org/apache/cassandra/utils/jmx/AbstractJmxSocketFactory.java +++ b/src/java/org/apache/cassandra/utils/jmx/AbstractJmxSocketFactory.java @@ -82,7 +82,7 @@ public Map configure(InetAddress serverAddress, JMXServerOptions.setJmxSystemProperties(jmxEncryptionOptions.getAcceptedProtocols(), jmxEncryptionOptions.getCipherSuites()); logger.info("Enabling JMX SSL using jmx_encryption_options"); - boolean requireClientAuth = jmxEncryptionOptions.getClientAuth() == EncryptionOptions.ClientAuth.REQUIRED; + boolean requireClientAuth = jmxEncryptionOptions.getClientAuth() == EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; String[] ciphers = jmxEncryptionOptions.cipherSuitesArray(); String[] protocols = jmxEncryptionOptions.acceptedProtocolsArray(); SSLContext sslContext = jmxEncryptionOptions.sslContextFactoryInstance.createJSSESslContext(jmxEncryptionOptions.getClientAuth()); diff --git a/test/burn/org/apache/cassandra/transport/SimpleClientBurnTest.java b/test/burn/org/apache/cassandra/transport/SimpleClientBurnTest.java index ed3406d8bc04..3247051197ae 100644 --- a/test/burn/org/apache/cassandra/transport/SimpleClientBurnTest.java +++ b/test/burn/org/apache/cassandra/transport/SimpleClientBurnTest.java @@ -138,11 +138,11 @@ public int encodedSize(QueryMessage queryMessage, ProtocolVersion version) Arrays.asList( () -> new SimpleClient(address.getHostAddress(), port, ProtocolVersion.V5, true, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(false), () -> new SimpleClient(address.getHostAddress(), port, ProtocolVersion.V4, false, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(false) ); diff --git a/test/burn/org/apache/cassandra/transport/SimpleClientPerfTest.java b/test/burn/org/apache/cassandra/transport/SimpleClientPerfTest.java index 4417b7cb8957..ca5adfff02da 100644 --- a/test/burn/org/apache/cassandra/transport/SimpleClientPerfTest.java +++ b/test/burn/org/apache/cassandra/transport/SimpleClientPerfTest.java @@ -102,7 +102,7 @@ public void measureSmall() throws Throwable new SizeCaps(10, 20, 5, 10), () -> new SimpleClient(address.getHostAddress(), port, version, true, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(false), version); } @@ -114,7 +114,7 @@ public void measureSmallWithCompression() throws Throwable new SizeCaps(10, 20, 5, 10), () -> new SimpleClient(address.getHostAddress(), port, version, true, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(true), version); } @@ -126,7 +126,7 @@ public void measureLarge() throws Throwable new SizeCaps(1000, 2000, 5, 150), () -> new SimpleClient(address.getHostAddress(), port, version, true, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(false), version); } @@ -138,7 +138,7 @@ public void measureLargeWithCompression() throws Throwable new SizeCaps(1000, 2000, 5, 150), () -> new SimpleClient(address.getHostAddress(), port, version, true, - new EncryptionOptions()) + new EncryptionOptions.ClientEncryptionOptions()) .connect(true), version); } diff --git a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java index 59e9e8ca04de..7ad57a52b6b4 100644 --- a/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java +++ b/test/distributed/org/apache/cassandra/distributed/impl/IsolatedJmx.java @@ -94,7 +94,7 @@ public void startJmx() // CASSANDRA-18508: Sensitive JMX SSL configuration options can be easily exposed Map jmxServerOptionsMap = (Map) config.getParams().get("jmx_server_options"); - EncryptionOptions jmxEncryptionOptions; + EncryptionOptions.ClientEncryptionOptions jmxEncryptionOptions; if (jmxServerOptionsMap == null) { JMXServerOptions parsingSystemProperties = JMXServerOptions.createParsingSystemProperties(); @@ -175,7 +175,7 @@ public void setupMBeanWrapper() * @return EncryptionOptions built object */ @SuppressWarnings("unchecked") - private EncryptionOptions getJmxEncryptionOptions(Map jmxServerOptionsMap) + private EncryptionOptions.ClientEncryptionOptions getJmxEncryptionOptions(Map jmxServerOptionsMap) { if (jmxServerOptionsMap == null) return null; @@ -186,34 +186,34 @@ private EncryptionOptions getJmxEncryptionOptions(Map jmxServerO { return null; } - EncryptionOptions jmxEncryptionOptions = new EncryptionOptions(); + EncryptionOptions.ClientEncryptionOptions.Builder jmxEncryptionOptionsBuilder = new EncryptionOptions.ClientEncryptionOptions.Builder(); String[] cipherSuitesArray = (String[]) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.CIPHER_SUITES.toString()); if (cipherSuitesArray != null) { - jmxEncryptionOptions = jmxEncryptionOptions.withCipherSuites(cipherSuitesArray); + jmxEncryptionOptionsBuilder.withCipherSuites(cipherSuitesArray); } List acceptedProtocols = (List) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.ACCEPTED_PROTOCOLS.toString()); if (acceptedProtocols != null) { - jmxEncryptionOptions = jmxEncryptionOptions.withAcceptedProtocols(acceptedProtocols); + jmxEncryptionOptionsBuilder.withAcceptedProtocols(acceptedProtocols); } Boolean requireClientAuthValue = (Boolean) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.REQUIRE_CLIENT_AUTH.toString()); - EncryptionOptions.ClientAuth requireClientAuth = requireClientAuthValue == null ? - EncryptionOptions.ClientAuth.NOT_REQUIRED : - EncryptionOptions.ClientAuth.from(String.valueOf(requireClientAuthValue)); + EncryptionOptions.ClientEncryptionOptions.ClientAuth requireClientAuth = requireClientAuthValue == null ? + EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED : + EncryptionOptions.ClientEncryptionOptions.ClientAuth.from(String.valueOf(requireClientAuthValue)); Object enabledOption = encryptionOptionsMap.get(EncryptionOptions.ConfigKey.ENABLED.toString()); boolean enabled = enabledOption != null ? (Boolean) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.ENABLED.toString()) : false; //CASSANDRA-18508 NOTE - We do not populate sslContextFactory configuration here for tests, it could be enhanced - jmxEncryptionOptions = jmxEncryptionOptions - .withKeyStore((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.KEYSTORE.toString())) - .withKeyStorePassword((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.KEYSTORE_PASSWORD.toString())) - .withTrustStore((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.TRUSTSTORE.toString())) - .withTrustStorePassword((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.TRUSTSTORE_PASSWORD.toString())) - .withRequireClientAuth(requireClientAuth) - .withEnabled(enabled); - return jmxEncryptionOptions; + return jmxEncryptionOptionsBuilder + .withKeyStore((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.KEYSTORE.toString())) + .withKeyStorePassword((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.KEYSTORE_PASSWORD.toString())) + .withTrustStore((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.TRUSTSTORE.toString())) + .withTrustStorePassword((String) encryptionOptionsMap.get(EncryptionOptions.ConfigKey.TRUSTSTORE_PASSWORD.toString())) + .withRequireClientAuth(requireClientAuth) + .withEnabled(enabled) + .build(); } private void waitForJmxAvailability(Map env) diff --git a/test/distributed/org/apache/cassandra/distributed/test/AbstractEncryptionOptionsImpl.java b/test/distributed/org/apache/cassandra/distributed/test/AbstractEncryptionOptionsImpl.java index 25e9e64b6326..7c3b91127ae0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/AbstractEncryptionOptionsImpl.java +++ b/test/distributed/org/apache/cassandra/distributed/test/AbstractEncryptionOptionsImpl.java @@ -56,7 +56,7 @@ import org.apache.cassandra.security.SSLFactory; import static java.util.concurrent.TimeUnit.SECONDS; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.distributed.test.AbstractEncryptionOptionsImpl.ConnectResult.CONNECTING; import static org.apache.cassandra.distributed.test.AbstractEncryptionOptionsImpl.ConnectResult.UNINITIALIZED; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; @@ -118,10 +118,13 @@ public class TlsConnection final int port; final List acceptedProtocols; final List cipherSuites; - final EncryptionOptions encryptionOptions = new EncryptionOptions() - .withEnabled(true) - .withKeyStore(validKeyStorePath).withKeyStorePassword(validKeyStorePassword) - .withTrustStore(validTrustStorePath).withTrustStorePassword(validTrustStorePassword); + final EncryptionOptions.ClientEncryptionOptions encryptionOptions = new EncryptionOptions.ClientEncryptionOptions.Builder() + .withEnabled(true) + .withKeyStore(validKeyStorePath) + .withKeyStorePassword(validKeyStorePassword) + .withTrustStore(validTrustStorePath) + .withTrustStorePassword(validTrustStorePassword) + .build(); private Throwable lastThrowable; private String lastProtocol; private String lastCipher; @@ -202,7 +205,7 @@ ConnectResult connect() throws Throwable setProtocolAndCipher(null, null); SslContext sslContext = SSLFactory.getOrCreateSslContext( - encryptionOptions.withAcceptedProtocols(acceptedProtocols).withCipherSuites(cipherSuites), + new EncryptionOptions.ClientEncryptionOptions.Builder(encryptionOptions).withAcceptedProtocols(acceptedProtocols).withCipherSuites(cipherSuites).build(), REQUIRED, ISslContextFactory.SocketType.CLIENT, "test"); EventLoopGroup workerGroup = new NioEventLoopGroup(); diff --git a/test/unit/org/apache/cassandra/auth/AuthConfigTest.java b/test/unit/org/apache/cassandra/auth/AuthConfigTest.java index b9bde913be05..580c48eb9f8f 100644 --- a/test/unit/org/apache/cassandra/auth/AuthConfigTest.java +++ b/test/unit/org/apache/cassandra/auth/AuthConfigTest.java @@ -30,6 +30,7 @@ import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.utils.MBeanWrapper; @@ -52,8 +53,10 @@ public void testNewInstanceForMutualTlsInternodeAuthenticator() throws IOExcepti Config config = load("cassandra-mtls.yaml"); config.internode_authenticator.class_name = "org.apache.cassandra.auth.MutualTlsInternodeAuthenticator"; config.internode_authenticator.parameters = Collections.singletonMap("validator_class_name", "org.apache.cassandra.auth.SpiffeCertificateValidator"); - config.server_encryption_options = config.server_encryption_options.withOutboundKeystore("test/conf/cassandra_ssl_test_outbound.keystore") - .withOutboundKeystorePassword("cassandra"); + config.server_encryption_options = new Builder(config.server_encryption_options) + .withOutboundKeystore("test/conf/cassandra_ssl_test_outbound.keystore") + .withOutboundKeystorePassword("cassandra") + .build(); DatabaseDescriptor.setConfig(config); MutualTlsInternodeAuthenticator authenticator = ParameterizedClass.newInstance(config.internode_authenticator, Arrays.asList("", "org.apache.cassandra.auth.")); diff --git a/test/unit/org/apache/cassandra/auth/MutualTlsAuthenticatorTest.java b/test/unit/org/apache/cassandra/auth/MutualTlsAuthenticatorTest.java index 14b31a3c87bc..88e44cf0ab5b 100644 --- a/test/unit/org/apache/cassandra/auth/MutualTlsAuthenticatorTest.java +++ b/test/unit/org/apache/cassandra/auth/MutualTlsAuthenticatorTest.java @@ -48,7 +48,7 @@ import static org.apache.cassandra.auth.AuthTestUtils.getMockInetAddress; import static org.apache.cassandra.auth.AuthTestUtils.initializeIdentityRolesTable; import static org.apache.cassandra.auth.AuthTestUtils.loadCertificateChain; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -79,8 +79,10 @@ public static void setup() StorageService.instance.initServer(); ((CassandraRoleManager)DatabaseDescriptor.getRoleManager()).loadIdentityStatement(); final Config config = DatabaseDescriptor.getRawConfig(); - config.client_encryption_options = config.client_encryption_options.withEnabled(true) - .withRequireClientAuth(REQUIRED); + config.client_encryption_options = new EncryptionOptions.ClientEncryptionOptions.Builder(config.client_encryption_options) + .withEnabled(true) + .withRequireClientAuth(REQUIRED) + .build(); } @After @@ -183,8 +185,10 @@ public void testValidateConfiguration() " & client_encryption_options.require_client_auth to be true"; MutualTlsAuthenticator mutualTlsAuthenticator = createAndInitializeMtlsAuthenticator(); - config.client_encryption_options = config.client_encryption_options.withEnabled(true) - .withRequireClientAuth(EncryptionOptions.ClientAuth.NOT_REQUIRED); + config.client_encryption_options = new EncryptionOptions.ClientEncryptionOptions.Builder(config.client_encryption_options) + .withEnabled(true) + .withRequireClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED) + .build(); expectedException.expect(ConfigurationException.class); expectedException.expectMessage(msg); mutualTlsAuthenticator.validateConfiguration(); diff --git a/test/unit/org/apache/cassandra/auth/MutualTlsInternodeAuthenticatorTest.java b/test/unit/org/apache/cassandra/auth/MutualTlsInternodeAuthenticatorTest.java index 8fdd23a44411..94fe66133bef 100644 --- a/test/unit/org/apache/cassandra/auth/MutualTlsInternodeAuthenticatorTest.java +++ b/test/unit/org/apache/cassandra/auth/MutualTlsInternodeAuthenticatorTest.java @@ -39,6 +39,7 @@ import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.transport.TlsTestUtils; @@ -81,8 +82,10 @@ public static void initialize() public void before() { Config config = DatabaseDescriptor.getRawConfig(); - config.server_encryption_options = config.server_encryption_options.withOutboundKeystore("test/conf/cassandra_ssl_test_outbound.keystore") - .withOutboundKeystorePassword("cassandra"); + config.server_encryption_options = new Builder(config.server_encryption_options) + .withOutboundKeystore("test/conf/cassandra_ssl_test_outbound.keystore") + .withOutboundKeystorePassword("cassandra") + .build(); } String getValidatorClass() @@ -164,8 +167,10 @@ public void testNoValidatorClassNameInConfig() public void testNoIdentitiesInKeystore() { Config config = DatabaseDescriptor.getRawConfig(); - config.server_encryption_options = config.server_encryption_options.withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD); + config.server_encryption_options = new Builder(config.server_encryption_options) + .withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .build(); expectedException.expect(ConfigurationException.class); expectedException.expectMessage(String.format("No identity was extracted from the outbound keystore '%s'", TlsTestUtils.SERVER_KEYSTORE_PATH)); new MutualTlsInternodeAuthenticator(getParams()); diff --git a/test/unit/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticatorTest.java b/test/unit/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticatorTest.java index 95f7c133eab8..f0c642846b3b 100644 --- a/test/unit/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticatorTest.java +++ b/test/unit/org/apache/cassandra/auth/MutualTlsWithPasswordFallbackAuthenticatorTest.java @@ -50,8 +50,10 @@ public static void initialize() DatabaseDescriptor.daemonInitialization(); SchemaLoader.loadSchema(); Config config = DatabaseDescriptor.getRawConfig(); - config.client_encryption_options = config.client_encryption_options.withEnabled(true) - .withRequireClientAuth(EncryptionOptions.ClientAuth.OPTIONAL); + config.client_encryption_options = new EncryptionOptions.ClientEncryptionOptions.Builder(config.client_encryption_options) + .withEnabled(true) + .withRequireClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL) + .build(); Map parameters = Collections.singletonMap("validator_class_name", "org.apache.cassandra.auth.SpiffeCertificateValidator"); fallbackAuthenticator = new MutualTlsWithPasswordFallbackAuthenticator(parameters); fallbackAuthenticator.setup(); diff --git a/test/unit/org/apache/cassandra/auth/jmx/JMXAuthJMXServerOptionsTest.java b/test/unit/org/apache/cassandra/auth/jmx/JMXAuthJMXServerOptionsTest.java index cb83a61eda17..98749ca314ab 100644 --- a/test/unit/org/apache/cassandra/auth/jmx/JMXAuthJMXServerOptionsTest.java +++ b/test/unit/org/apache/cassandra/auth/jmx/JMXAuthJMXServerOptionsTest.java @@ -42,7 +42,7 @@ private static JMXServerOptions getJMXServerOptions() throws Exception String config = Paths.get(ClassLoader.getSystemResource("auth/cassandra-test-jaas.conf").toURI()).toString(); return new JMXServerOptions(true, false, 9999, 0, true, - new EncryptionOptions(), "TestLogin", config, null, null, + new EncryptionOptions.ClientEncryptionOptions(), "TestLogin", config, null, null, NoSuperUserAuthorizationProxy.class.getName()); } } diff --git a/test/unit/org/apache/cassandra/config/EncryptionOptionsEqualityTest.java b/test/unit/org/apache/cassandra/config/EncryptionOptionsEqualityTest.java index 5e6d26b8173e..b2770c23618c 100644 --- a/test/unit/org/apache/cassandra/config/EncryptionOptionsEqualityTest.java +++ b/test/unit/org/apache/cassandra/config/EncryptionOptionsEqualityTest.java @@ -27,8 +27,8 @@ import org.apache.cassandra.security.DummySslContextFactoryImpl; import org.apache.cassandra.transport.TlsTestUtils; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; @@ -40,23 +40,25 @@ public class EncryptionOptionsEqualityTest { private EncryptionOptions.ServerEncryptionOptions createServerEncryptionOptions() { - return new EncryptionOptions.ServerEncryptionOptions() + EncryptionOptions.ServerEncryptionOptions.Builder serverEncryptionOptionsBuilder = new EncryptionOptions.ServerEncryptionOptions.Builder(); + return serverEncryptionOptionsBuilder + .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) .withStoreType("JKS") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); } @Test public void testKeystoreOptions() { - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ServerEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ServerEncryptionOptions.Builder() .withStoreType("JKS") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) @@ -64,10 +66,11 @@ public void testKeystoreOptions() { .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ServerEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ServerEncryptionOptions.Builder() .withStoreType("JKS") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) @@ -75,7 +78,8 @@ public void testKeystoreOptions() { .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); assertEquals(encryptionOptions1, encryptionOptions2); assertEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -83,8 +87,8 @@ public void testKeystoreOptions() { @Test public void testKeystoreOptionsWithPasswordFile() { - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ServerEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ServerEncryptionOptions.Builder() .withStoreType("JKS") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE) @@ -92,10 +96,11 @@ public void testKeystoreOptionsWithPasswordFile() { .withTrustStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ServerEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ServerEncryptionOptions.Builder() .withStoreType("JKS") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE) @@ -103,7 +108,8 @@ public void testKeystoreOptionsWithPasswordFile() { .withTrustStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); assertEquals(encryptionOptions1, encryptionOptions2); assertEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -115,15 +121,17 @@ public void testMismatchForKeystoreOptionsWithPasswordFile() EncryptionOptions.ServerEncryptionOptions encryptionOptions1 = createServerEncryptionOptions(); EncryptionOptions.ServerEncryptionOptions encryptionOptions2 = createServerEncryptionOptions(); - encryptionOptions1 = encryptionOptions1 + encryptionOptions1 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions1) .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(null) - .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE); + .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE) + .build(); - encryptionOptions2 = encryptionOptions2 + encryptionOptions2 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions2) .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(null) - .withKeyStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE); + .withKeyStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE) + .build(); assertNotEquals(encryptionOptions1, encryptionOptions2); assertNotEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -135,22 +143,24 @@ public void testSameCustomSslContextFactoryImplementation() { Map parameters1 = new HashMap<>(); parameters1.put("key1", "value1"); parameters1.put("key2", "value2"); - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters1)) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); Map parameters2 = new HashMap<>(); parameters2.put("key1", "value1"); parameters2.put("key2", "value2"); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters2)) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); assertEquals(encryptionOptions1, encryptionOptions2); assertEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -162,22 +172,24 @@ public void testDifferentCustomSslContextFactoryImplementations() { Map parameters1 = new HashMap<>(); parameters1.put("key1", "value1"); parameters1.put("key2", "value2"); - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters1)) .withProtocol("TLSv1.1") .withRequireClientAuth(NOT_REQUIRED) - .withRequireEndpointVerification(true); + .withRequireEndpointVerification(true) + .build(); Map parameters2 = new HashMap<>(); parameters2.put("key1", "value1"); parameters2.put("key2", "value2"); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DefaultSslContextFactory.class.getName(), parameters2)) .withProtocol("TLSv1.1") .withRequireClientAuth(NOT_REQUIRED) - .withRequireEndpointVerification(true); + .withRequireEndpointVerification(true) + .build(); assertNotEquals(encryptionOptions1, encryptionOptions2); assertNotEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -189,18 +201,20 @@ public void testDifferentCustomSslContextFactoryParameters() { Map parameters1 = new HashMap<>(); parameters1.put("key1", "value11"); parameters1.put("key2", "value12"); - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters1)) - .withProtocol("TLSv1.1"); + .withProtocol("TLSv1.1") + .build(); Map parameters2 = new HashMap<>(); parameters2.put("key1", "value21"); parameters2.put("key2", "value22"); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters2)) - .withProtocol("TLSv1.1"); + .withProtocol("TLSv1.1") + .build(); assertNotEquals(encryptionOptions1, encryptionOptions2); assertNotEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -222,13 +236,15 @@ public void testServerEncryptionOptionsMismatchForOutboundKeystore() EncryptionOptions.ServerEncryptionOptions encryptionOptions1 = createServerEncryptionOptions(); EncryptionOptions.ServerEncryptionOptions encryptionOptions2 = createServerEncryptionOptions(); - encryptionOptions1 = encryptionOptions1 + encryptionOptions1 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions1) .withOutboundKeystore("test/conf/cassandra_outbound1.keystore") - .withOutboundKeystorePassword("cassandra1"); + .withOutboundKeystorePassword("cassandra1") + .build(); - encryptionOptions2 = encryptionOptions2 + encryptionOptions2 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions2) .withOutboundKeystore("test/conf/cassandra_outbound2.keystore") - .withOutboundKeystorePassword("cassandra2"); + .withOutboundKeystorePassword("cassandra2") + .build(); assertNotEquals(encryptionOptions1, encryptionOptions2); assertNotEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); @@ -240,13 +256,15 @@ public void testServerEncryptionOptionsMismatchForInboundKeystore() EncryptionOptions.ServerEncryptionOptions encryptionOptions1 = createServerEncryptionOptions(); EncryptionOptions.ServerEncryptionOptions encryptionOptions2 = createServerEncryptionOptions(); - encryptionOptions1 = encryptionOptions1 + encryptionOptions1 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions1) .withKeyStore("test/conf/cassandra1.keystore") - .withKeyStorePassword("cassandra1"); + .withKeyStorePassword("cassandra1") + .build(); - encryptionOptions2 = encryptionOptions2 + encryptionOptions2 = new EncryptionOptions.ServerEncryptionOptions.Builder(encryptionOptions2) .withKeyStore("test/conf/cassandra2.keystore") - .withKeyStorePassword("cassandra2"); + .withKeyStorePassword("cassandra2") + .build(); assertNotEquals(encryptionOptions1, encryptionOptions2); assertNotEquals(encryptionOptions1.hashCode(), encryptionOptions2.hashCode()); diff --git a/test/unit/org/apache/cassandra/config/EncryptionOptionsTest.java b/test/unit/org/apache/cassandra/config/EncryptionOptionsTest.java index 5ef08eb060a0..cec982d0c52b 100644 --- a/test/unit/org/apache/cassandra/config/EncryptionOptionsTest.java +++ b/test/unit/org/apache/cassandra/config/EncryptionOptionsTest.java @@ -23,11 +23,11 @@ import java.util.Map; import com.google.common.collect.ImmutableMap; -import org.apache.cassandra.io.util.File; import org.junit.Assert; import org.junit.Test; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.io.util.File; import org.assertj.core.api.Assertions; import org.yaml.snakeyaml.constructor.ConstructorException; @@ -46,11 +46,11 @@ public class EncryptionOptionsTest { static class EncryptionOptionsTestCase { - final EncryptionOptions encryptionOptions; + final EncryptionOptions.ClientEncryptionOptions encryptionOptions; final EncryptionOptions.TlsEncryptionPolicy expected; final String description; - public EncryptionOptionsTestCase(EncryptionOptions encryptionOptions, EncryptionOptions.TlsEncryptionPolicy expected, String description) + public EncryptionOptionsTestCase(EncryptionOptions.ClientEncryptionOptions encryptionOptions, EncryptionOptions.TlsEncryptionPolicy expected, String description) { this.encryptionOptions = encryptionOptions; this.expected = expected; @@ -59,25 +59,25 @@ public EncryptionOptionsTestCase(EncryptionOptions encryptionOptions, Encryption public static EncryptionOptionsTestCase of(Boolean optional, String keystorePath, Boolean enabled, EncryptionOptions.TlsEncryptionPolicy expected) { - return new EncryptionOptionsTestCase(new EncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", - new HashMap<>()), - keystorePath, "dummypass", null, - "dummytruststore", "dummypass", null, - Collections.emptyList(), null, null, null, "JKS", "false", false, enabled, optional, null, null) + return new EncryptionOptionsTestCase(new EncryptionOptions.ClientEncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", + new HashMap<>()), + keystorePath, "dummypass", null, + "dummytruststore", "dummypass", null, + Collections.emptyList(), null, null, null, "JKS", "false", false, enabled, optional, null, null) .applyConfig(), expected, String.format("optional=%s keystore=%s enabled=%s", optional, keystorePath, enabled)); } public static EncryptionOptionsTestCase of(Boolean optional, String keystorePath, Boolean enabled, - Map customSslContextFactoryParams, + Map customSslContextFactoryParams, EncryptionOptions.TlsEncryptionPolicy expected) { - return new EncryptionOptionsTestCase(new EncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", - customSslContextFactoryParams), - keystorePath, "dummypass", null, - "dummytruststore", "dummypass", null, - Collections.emptyList(), null, null, null, "JKS", "false", false, enabled, optional, null, null) + return new EncryptionOptionsTestCase(new EncryptionOptions.ClientEncryptionOptions(new ParameterizedClass("org.apache.cassandra.security.DefaultSslContextFactory", + customSslContextFactoryParams), + keystorePath, "dummypass", null, + "dummytruststore", "dummypass", null, + Collections.emptyList(), null, null, null, "JKS", "false", false, enabled, optional, null, null) .applyConfig(), expected, String.format("optional=%s keystore=%s enabled=%s", optional, keystorePath, enabled)); @@ -87,15 +87,15 @@ public static EncryptionOptionsTestCase of(Boolean optional, String keystorePath static final String absentKeystore = "test/conf/missing-keystore-is-not-here"; static final String presentKeystore = "test/conf/keystore.jks"; final EncryptionOptionsTestCase[] encryptionOptionTestCases = { - // Optional Keystore Enabled Expected - EncryptionOptionsTestCase.of(null, absentKeystore, false, UNENCRYPTED), - EncryptionOptionsTestCase.of(null, absentKeystore, true, ENCRYPTED), - EncryptionOptionsTestCase.of(null, presentKeystore, false, OPTIONAL), - EncryptionOptionsTestCase.of(null, presentKeystore, true, ENCRYPTED), - EncryptionOptionsTestCase.of(false, absentKeystore, false, UNENCRYPTED), - EncryptionOptionsTestCase.of(false, absentKeystore, true, ENCRYPTED), - EncryptionOptionsTestCase.of(true, presentKeystore, false, OPTIONAL), - EncryptionOptionsTestCase.of(true, presentKeystore, true, OPTIONAL) + // Optional Keystore Enabled Expected + EncryptionOptionsTestCase.of(null, absentKeystore, false, UNENCRYPTED), + EncryptionOptionsTestCase.of(null, absentKeystore, true, ENCRYPTED), + EncryptionOptionsTestCase.of(null, presentKeystore, false, OPTIONAL), + EncryptionOptionsTestCase.of(null, presentKeystore, true, ENCRYPTED), + EncryptionOptionsTestCase.of(false, absentKeystore, false, UNENCRYPTED), + EncryptionOptionsTestCase.of(false, absentKeystore, true, ENCRYPTED), + EncryptionOptionsTestCase.of(true, presentKeystore, false, OPTIONAL), + EncryptionOptionsTestCase.of(true, presentKeystore, true, OPTIONAL) }; @Test @@ -111,11 +111,11 @@ public void testEncryptionOptionPolicy() static class ServerEncryptionOptionsTestCase { - final EncryptionOptions encryptionOptions; + final EncryptionOptions.ServerEncryptionOptions encryptionOptions; final EncryptionOptions.TlsEncryptionPolicy expected; final String description; - public ServerEncryptionOptionsTestCase(EncryptionOptions encryptionOptions, EncryptionOptions.TlsEncryptionPolicy expected, String description) + public ServerEncryptionOptionsTestCase(EncryptionOptions.ServerEncryptionOptions encryptionOptions, EncryptionOptions.TlsEncryptionPolicy expected, String description) { this.encryptionOptions = encryptionOptions; this.expected = expected; @@ -131,10 +131,10 @@ public static ServerEncryptionOptionsTestCase of(Boolean optional, String keysto keystorePath, "dummypass", null, keystorePath, "dummypass", null, "dummytruststore", "dummypass", null, - Collections.emptyList(), null, null, null, "JKS", "false", false, optional, internodeEncryption, false, null, null) + Collections.emptyList(), null, null, null, "JKS", "false", false, optional, internodeEncryption, false, null, null) .applyConfig(), - expected, - String.format("optional=%s keystore=%s internode=%s", optional, keystorePath, internodeEncryption)); + expected, + String.format("optional=%s keystore=%s internode=%s", optional, keystorePath, internodeEncryption)); } } @@ -143,8 +143,8 @@ public void isEnabledServer() { Map yaml = ImmutableMap.of( "server_encryption_options", ImmutableMap.of( - "isEnabled", false - ) + "isEnabled", false + ) ); Assertions.assertThatThrownBy(() -> YamlConfigurationLoader.fromMap(yaml, Config.class)) @@ -157,8 +157,8 @@ public void isOptionalServer() { Map yaml = ImmutableMap.of( "server_encryption_options", ImmutableMap.of( - "isOptional", false - ) + "isOptional", false + ) ); Assertions.assertThatThrownBy(() -> YamlConfigurationLoader.fromMap(yaml, Config.class)) @@ -171,11 +171,11 @@ public void testMaxCertificateValidityPeriod() { Map yaml = ImmutableMap.of( "server_encryption_options", ImmutableMap.of( - "max_certificate_validity_period", "2d" - ), + "max_certificate_validity_period", "2d" + ), "client_encryption_options", ImmutableMap.of( - "max_certificate_validity_period", "10d" - ) + "max_certificate_validity_period", "10d" + ) ); Config config = YamlConfigurationLoader.fromMap(yaml, Config.class); @@ -188,8 +188,8 @@ public void testFailsToParseInvalidMaxCertificateValidityPeriodValue() { Map yaml = ImmutableMap.of( "server_encryption_options", ImmutableMap.of( - "max_certificate_validity_period", "not-a-valid-input" - ) + "max_certificate_validity_period", "not-a-valid-input" + ) ); Assertions.assertThatThrownBy(() -> YamlConfigurationLoader.fromMap(yaml, Config.class)) @@ -202,8 +202,8 @@ public void testFailsToParseNegativeMaxCertificateValidityPeriod() { Map yaml = ImmutableMap.of( "server_encryption_options", ImmutableMap.of( - "max_certificate_validity_period", "-2d" - ) + "max_certificate_validity_period", "-2d" + ) ); Assertions.assertThatThrownBy(() -> YamlConfigurationLoader.fromMap(yaml, Config.class)) @@ -213,26 +213,26 @@ public void testFailsToParseNegativeMaxCertificateValidityPeriod() final ServerEncryptionOptionsTestCase[] serverEncryptionOptionTestCases = { - // Optional Keystore Internode Expected - ServerEncryptionOptionsTestCase.of(null, absentKeystore, none, UNENCRYPTED), - ServerEncryptionOptionsTestCase.of(null, absentKeystore, rack, OPTIONAL), - ServerEncryptionOptionsTestCase.of(null, absentKeystore, dc, OPTIONAL), - ServerEncryptionOptionsTestCase.of(null, absentKeystore, all, ENCRYPTED), - - ServerEncryptionOptionsTestCase.of(null, presentKeystore, none, OPTIONAL), - ServerEncryptionOptionsTestCase.of(null, presentKeystore, rack, OPTIONAL), - ServerEncryptionOptionsTestCase.of(null, absentKeystore, dc, OPTIONAL), - ServerEncryptionOptionsTestCase.of(null, absentKeystore, all, ENCRYPTED), - - ServerEncryptionOptionsTestCase.of(false, absentKeystore, none, UNENCRYPTED), - ServerEncryptionOptionsTestCase.of(false, absentKeystore, rack, OPTIONAL), - ServerEncryptionOptionsTestCase.of(false, absentKeystore, dc, OPTIONAL), - ServerEncryptionOptionsTestCase.of(false, absentKeystore, all, ENCRYPTED), - - ServerEncryptionOptionsTestCase.of(true, presentKeystore, none, OPTIONAL), - ServerEncryptionOptionsTestCase.of(true, presentKeystore, rack, OPTIONAL), - ServerEncryptionOptionsTestCase.of(true, absentKeystore, dc, OPTIONAL), - ServerEncryptionOptionsTestCase.of(true, absentKeystore, all, OPTIONAL), + // Optional Keystore Internode Expected + ServerEncryptionOptionsTestCase.of(null, absentKeystore, none, UNENCRYPTED), + ServerEncryptionOptionsTestCase.of(null, absentKeystore, rack, OPTIONAL), + ServerEncryptionOptionsTestCase.of(null, absentKeystore, dc, OPTIONAL), + ServerEncryptionOptionsTestCase.of(null, absentKeystore, all, ENCRYPTED), + + ServerEncryptionOptionsTestCase.of(null, presentKeystore, none, OPTIONAL), + ServerEncryptionOptionsTestCase.of(null, presentKeystore, rack, OPTIONAL), + ServerEncryptionOptionsTestCase.of(null, absentKeystore, dc, OPTIONAL), + ServerEncryptionOptionsTestCase.of(null, absentKeystore, all, ENCRYPTED), + + ServerEncryptionOptionsTestCase.of(false, absentKeystore, none, UNENCRYPTED), + ServerEncryptionOptionsTestCase.of(false, absentKeystore, rack, OPTIONAL), + ServerEncryptionOptionsTestCase.of(false, absentKeystore, dc, OPTIONAL), + ServerEncryptionOptionsTestCase.of(false, absentKeystore, all, ENCRYPTED), + + ServerEncryptionOptionsTestCase.of(true, presentKeystore, none, OPTIONAL), + ServerEncryptionOptionsTestCase.of(true, presentKeystore, rack, OPTIONAL), + ServerEncryptionOptionsTestCase.of(true, absentKeystore, dc, OPTIONAL), + ServerEncryptionOptionsTestCase.of(true, absentKeystore, all, OPTIONAL), }; @Test @@ -246,12 +246,12 @@ public void testServerEncryptionOptionPolicy() } } - @Test(expected = IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testMisplacedConfigKey() { Map customSslContextFactoryParams = new HashMap<>(); - for(EncryptionOptions.ConfigKey configKey: EncryptionOptions.ConfigKey.values()) + for (EncryptionOptions.ConfigKey configKey : EncryptionOptions.ConfigKey.values()) { customSslContextFactoryParams.put(configKey.toString(), "my-custom-value"); } diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 91d86d2c811b..05c64f956e18 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -663,13 +663,15 @@ public void shouldUseClientCertificate(boolean useClientCert) public static void requireNativeProtocolClientEncryption() { DatabaseDescriptor.updateNativeProtocolEncryptionOptions((encryptionOptions) -> - encryptionOptions.withEnabled(true) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) - .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) - .withRequireEndpointVerification(false) - .withRequireClientAuth(EncryptionOptions.ClientAuth.OPTIONAL)); + new EncryptionOptions.ClientEncryptionOptions.Builder(encryptionOptions) + .withEnabled(true) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) + .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) + .withRequireEndpointVerification(false) + .withRequireClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL) + .build()); } /** @@ -1691,7 +1693,7 @@ protected Cluster getCluster(ProtocolVersion protocolVersion) protected SimpleClient newSimpleClient(ProtocolVersion version) throws IOException { - return new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, version.isBeta(), new EncryptionOptions().applyConfig()) + return new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, version.isBeta(), new EncryptionOptions.ClientEncryptionOptions()) .connect(false, false); } diff --git a/test/unit/org/apache/cassandra/db/virtual/SettingsTableTest.java b/test/unit/org/apache/cassandra/db/virtual/SettingsTableTest.java index 71b9172da798..78fb0d47d6ec 100644 --- a/test/unit/org/apache/cassandra/db/virtual/SettingsTableTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/SettingsTableTest.java @@ -31,6 +31,7 @@ import com.datastax.driver.core.Row; import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.InternodeEncryption; import org.apache.cassandra.config.JMXServerOptions; import org.apache.cassandra.config.ParameterizedClass; @@ -38,7 +39,7 @@ import org.apache.cassandra.security.SSLFactory; import org.yaml.snakeyaml.introspector.Property; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; public class SettingsTableTest extends CQLTester { @@ -176,52 +177,53 @@ public void testEncryptionOverride() throws Throwable List expectedNames = SettingsTable.PROPERTIES.keySet().stream().filter(n -> n.startsWith("server_encryption")).collect(Collectors.toList()); Assert.assertEquals(expectedNames.size(), executeNet(all).all().size()); + Builder serverEncryptionOptionsBuilder = new Builder(config.server_encryption_options); check(pre + "algorithm", null); - config.server_encryption_options = config.server_encryption_options.withAlgorithm("SUPERSSL"); + config.server_encryption_options = serverEncryptionOptionsBuilder.withAlgorithm("SUPERSSL").build(); check(pre + "algorithm", "SUPERSSL"); check(pre + "cipher_suites", null); - config.server_encryption_options = config.server_encryption_options.withCipherSuites("c1", "c2"); + config.server_encryption_options = serverEncryptionOptionsBuilder.withCipherSuites("c1", "c2").build(); check(pre + "cipher_suites", "[c1, c2]"); // name doesn't match yaml check(pre + "protocol", null); - config.server_encryption_options = config.server_encryption_options.withProtocol("TLSv5"); + config.server_encryption_options = serverEncryptionOptionsBuilder.withProtocol("TLSv5").build(); check(pre + "protocol", "[TLSv5]"); - config.server_encryption_options = config.server_encryption_options.withProtocol("TLS"); + config.server_encryption_options = serverEncryptionOptionsBuilder.withProtocol("TLS").build(); check(pre + "protocol", SSLFactory.tlsInstanceProtocolSubstitution().toString()); - config.server_encryption_options = config.server_encryption_options.withProtocol("TLS"); - config.server_encryption_options = config.server_encryption_options.withAcceptedProtocols(ImmutableList.of("TLSv1.2","TLSv1.1")); + config.server_encryption_options = serverEncryptionOptionsBuilder.withProtocol("TLS").build(); + config.server_encryption_options = serverEncryptionOptionsBuilder.withAcceptedProtocols(ImmutableList.of("TLSv1.2","TLSv1.1")).build(); check(pre + "protocol", "[TLSv1.2, TLSv1.1]"); - config.server_encryption_options = config.server_encryption_options.withProtocol("TLSv2"); - config.server_encryption_options = config.server_encryption_options.withAcceptedProtocols(ImmutableList.of("TLSv1.2","TLSv1.1")); + config.server_encryption_options = serverEncryptionOptionsBuilder.withProtocol("TLSv2").build(); + config.server_encryption_options = serverEncryptionOptionsBuilder.withAcceptedProtocols(ImmutableList.of("TLSv1.2","TLSv1.1")).build(); check(pre + "protocol", "[TLSv1.2, TLSv1.1, TLSv2]"); // protocol goes after the explicit accept list if non-TLS check(pre + "optional", "false"); - config.server_encryption_options = config.server_encryption_options.withOptional(true); + config.server_encryption_options = serverEncryptionOptionsBuilder.withOptional(true).build(); check(pre + "optional", "true"); // name doesn't match yaml check(pre + "client_auth", "false"); - config.server_encryption_options = config.server_encryption_options.withRequireClientAuth(REQUIRED); + config.server_encryption_options = serverEncryptionOptionsBuilder.withRequireClientAuth(REQUIRED).build(); check(pre + "client_auth", "true"); // name doesn't match yaml check(pre + "endpoint_verification", "false"); - config.server_encryption_options = config.server_encryption_options.withRequireEndpointVerification(true); + config.server_encryption_options = serverEncryptionOptionsBuilder.withRequireEndpointVerification(true).build(); check(pre + "endpoint_verification", "true"); check(pre + "internode_encryption", "none"); - config.server_encryption_options = config.server_encryption_options.withInternodeEncryption(InternodeEncryption.all); + config.server_encryption_options = serverEncryptionOptionsBuilder.withInternodeEncryption(InternodeEncryption.all).build(); check(pre + "internode_encryption", "all"); check(pre + "enabled", "true"); // name doesn't match yaml check(pre + "legacy_ssl_storage_port", "false"); - config.server_encryption_options = config.server_encryption_options.withLegacySslStoragePort(true); + config.server_encryption_options = serverEncryptionOptionsBuilder.withLegacySslStoragePort(true).build(); check(pre + "legacy_ssl_storage_port", "true"); } diff --git a/test/unit/org/apache/cassandra/net/ConnectionTest.java b/test/unit/org/apache/cassandra/net/ConnectionTest.java index 70bb0c8046e3..233d067d1215 100644 --- a/test/unit/org/apache/cassandra/net/ConnectionTest.java +++ b/test/unit/org/apache/cassandra/net/ConnectionTest.java @@ -57,6 +57,7 @@ import io.netty.channel.ChannelPromise; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.EncryptionOptions; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.RequestFailure; @@ -72,7 +73,7 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.SECONDS; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; import static org.apache.cassandra.net.MessagingService.VERSION_40; import static org.apache.cassandra.net.NoPayload.noPayload; import static org.apache.cassandra.net.MessagingService.current_version; @@ -176,17 +177,18 @@ Settings override(Settings settings) } } - static final EncryptionOptions.ServerEncryptionOptions encryptionOptions = - new EncryptionOptions.ServerEncryptionOptions() + static final EncryptionOptions.Builder encryptionOptionsBuilder = + new Builder() .withLegacySslStoragePort(true) - .withOptional(true) .withInternodeEncryption(EncryptionOptions.ServerEncryptionOptions.InternodeEncryption.all) + .withOptional(true) .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) .withRequireClientAuth(NOT_REQUIRED) .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA"); + static final EncryptionOptions.ServerEncryptionOptions encryptionOptions = encryptionOptionsBuilder.build(); static final List> MODIFIERS = ImmutableList.of( settings -> settings.outbound(outbound -> outbound.withEncryption(encryptionOptions)) diff --git a/test/unit/org/apache/cassandra/net/HandshakeTest.java b/test/unit/org/apache/cassandra/net/HandshakeTest.java index c84643497d5d..2806bac86d18 100644 --- a/test/unit/org/apache/cassandra/net/HandshakeTest.java +++ b/test/unit/org/apache/cassandra/net/HandshakeTest.java @@ -39,6 +39,7 @@ import io.netty.util.concurrent.Future; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; @@ -49,9 +50,9 @@ import org.apache.cassandra.transport.TlsTestUtils; import org.apache.cassandra.utils.concurrent.AsyncPromise; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.net.ConnectionType.SMALL_MESSAGES; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.net.MessagingService.current_version; import static org.apache.cassandra.net.MessagingService.minimum_version; import static org.apache.cassandra.net.OutboundConnectionInitiator.Result; @@ -279,26 +280,28 @@ public void testOutboundConnectionDoesntFallbackWhenErrorIsNotSSLRelated() throw private ServerEncryptionOptions getServerEncryptionOptions(SslFallbackConnectionType sslConnectionType, boolean optional) { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions().withOptional(optional) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) - .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) - .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) - .withSslContextFactory((new ParameterizedClass(DefaultSslContextFactory.class.getName(), - new HashMap<>()))); + Builder serverEncryptionOptionsBuilder = new Builder(); + + serverEncryptionOptionsBuilder.withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) + .withOptional(optional) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH).withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) + .withSslContextFactory((new ParameterizedClass(DefaultSslContextFactory.class.getName(), + new HashMap<>()))); + if (sslConnectionType == SslFallbackConnectionType.MTLS) { - serverEncryptionOptions = serverEncryptionOptions.withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withRequireClientAuth(REQUIRED); + serverEncryptionOptionsBuilder.withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withRequireClientAuth(REQUIRED); } else if (sslConnectionType == SslFallbackConnectionType.SSL) { - serverEncryptionOptions = serverEncryptionOptions.withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withRequireClientAuth(NOT_REQUIRED); + serverEncryptionOptionsBuilder.withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withRequireClientAuth(NOT_REQUIRED); } - return serverEncryptionOptions; + return serverEncryptionOptionsBuilder.build(); } private InboundSockets getInboundSocket(ServerEncryptionOptions serverEncryptionOptions) diff --git a/test/unit/org/apache/cassandra/net/MessagingServiceTest.java b/test/unit/org/apache/cassandra/net/MessagingServiceTest.java index 95c72f4bc0b3..45746d5e6e06 100644 --- a/test/unit/org/apache/cassandra/net/MessagingServiceTest.java +++ b/test/unit/org/apache/cassandra/net/MessagingServiceTest.java @@ -50,6 +50,7 @@ import org.apache.cassandra.auth.IInternodeAuthenticator; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.distributed.test.log.ClusterMetadataTestHelper; import org.apache.cassandra.exceptions.ConfigurationException; @@ -257,8 +258,7 @@ private static void addDCLatency(long sentAt, long nowTime) public void testFailedOutboundInternodeAuth() throws Exception { // Listen on serverside for connections - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none).build(); DatabaseDescriptor.setInternodeAuthenticator(REJECT_OUTBOUND_AUTHENTICATOR); InetAddress listenAddress = FBUtilities.getJustLocalAddress(); @@ -293,8 +293,7 @@ public void testFailedOutboundInternodeAuth() throws Exception @Test public void testFailedInboundInternodeAuth() throws IOException, InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none).build(); DatabaseDescriptor.setInternodeAuthenticator(ALLOW_NOTHING_AUTHENTICATOR); InetAddress listenAddress = FBUtilities.getJustLocalAddress(); @@ -348,56 +347,54 @@ public void testFailedInboundInternodeAuth() throws IOException, InterruptedExce @Test public void listenPlainConnection() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none).build(); listen(serverEncryptionOptions, false); } @Test public void listenPlainConnectionWithBroadcastAddr() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.none).build(); listen(serverEncryptionOptions, true); } @Test public void listenRequiredSecureConnection() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withOptional(false) - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withLegacySslStoragePort(false); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withLegacySslStoragePort(false) + .withOptional(false) + .build(); listen(serverEncryptionOptions, false); } @Test public void listenRequiredSecureConnectionWithBroadcastAddr() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withOptional(false) - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withLegacySslStoragePort(false); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withLegacySslStoragePort(false) + .withOptional(false) + .build(); listen(serverEncryptionOptions, true); } @Test public void listenRequiredSecureConnectionWithLegacyPort() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withOptional(false) - .withLegacySslStoragePort(true); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withLegacySslStoragePort(true) + .withOptional(false) + .build(); listen(serverEncryptionOptions, false); } @Test public void listenRequiredSecureConnectionWithBroadcastAddrAndLegacyPort() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) - .withOptional(false) - .withLegacySslStoragePort(true); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withLegacySslStoragePort(true) + .withOptional(false) + .build(); listen(serverEncryptionOptions, true); } @@ -406,8 +403,7 @@ public void listenOptionalSecureConnection() throws InterruptedException { for (int i = 0; i < 500; i++) // test used to be flaky, so run in a loop to make sure stable (see CASSANDRA-17033) { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withOptional(true); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withOptional(true).build(); listen(serverEncryptionOptions, false); } } @@ -415,8 +411,7 @@ public void listenOptionalSecureConnection() throws InterruptedException @Test public void listenOptionalSecureConnectionWithBroadcastAddr() throws InterruptedException { - ServerEncryptionOptions serverEncryptionOptions = new ServerEncryptionOptions() - .withOptional(true); + ServerEncryptionOptions serverEncryptionOptions = new Builder().withOptional(true).build(); listen(serverEncryptionOptions, true); } diff --git a/test/unit/org/apache/cassandra/security/DefaultSslContextFactoryTest.java b/test/unit/org/apache/cassandra/security/DefaultSslContextFactoryTest.java index fa3eb7c845ee..5dc6d7e39bdc 100644 --- a/test/unit/org/apache/cassandra/security/DefaultSslContextFactoryTest.java +++ b/test/unit/org/apache/cassandra/security/DefaultSslContextFactoryTest.java @@ -33,17 +33,17 @@ import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslProvider; import org.apache.cassandra.config.EncryptionOptions; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.transport.TlsTestUtils; import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_TCACTIVE_OPENSSL; - -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; public class DefaultSslContextFactoryTest { - private Map commonConfig = new HashMap<>(); + private Map commonConfig = new HashMap<>(); @Before public void setup() @@ -54,7 +54,7 @@ public void setup() commonConfig.put("cipher_suites", Arrays.asList("TLS_RSA_WITH_AES_128_CBC_SHA")); } - private void addKeystoreOptions(Map config) + private void addKeystoreOptions(Map config) { config.put("keystore", TlsTestUtils.SERVER_KEYSTORE_PATH); config.put("keystore_password", TlsTestUtils.SERVER_KEYSTORE_PASSWORD); @@ -69,14 +69,18 @@ private void addOutboundKeystoreOptions(Map config) @Test public void getSslContextOpenSSL() throws IOException { - EncryptionOptions.ServerEncryptionOptions options = new EncryptionOptions.ServerEncryptionOptions().withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) - .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) - .withRequireClientAuth(NOT_REQUIRED) - .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA"); + EncryptionOptions.ServerEncryptionOptions.Builder builder = new Builder(); + EncryptionOptions.ServerEncryptionOptions options = builder + .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) + .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withRequireClientAuth(NOT_REQUIRED) + .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") + .build(); + SslContext sslContext = SSLFactory.getOrCreateSslContext(options, REQUIRED, ISslContextFactory.SocketType.CLIENT, "test"); Assert.assertNotNull(sslContext); if (OpenSsl.isAvailable()) @@ -88,7 +92,7 @@ public void getSslContextOpenSSL() throws IOException @Test(expected = IOException.class) public void buildTrustManagerFactoryWithInvalidTruststoreFile() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); config.put("truststore", "/this/is/probably/not/a/file/on/your/test/machine"); @@ -100,7 +104,7 @@ public void buildTrustManagerFactoryWithInvalidTruststoreFile() throws IOExcepti @Test(expected = IOException.class) public void buildTrustManagerFactoryWithBadPassword() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); config.put("truststore_password", "HomeOfBadPasswords"); @@ -112,7 +116,7 @@ public void buildTrustManagerFactoryWithBadPassword() throws IOException @Test public void buildTrustManagerFactoryHappyPath() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); DefaultSslContextFactory defaultSslContextFactoryImpl = new DefaultSslContextFactory(config); @@ -124,7 +128,7 @@ public void buildTrustManagerFactoryHappyPath() throws IOException @Test(expected = IOException.class) public void buildKeyManagerFactoryWithInvalidKeystoreFile() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); config.put("keystore", "/this/is/probably/not/a/file/on/your/test/machine"); config.put("keystore_password", "ThisWontMatter"); @@ -137,7 +141,7 @@ public void buildKeyManagerFactoryWithInvalidKeystoreFile() throws IOException @Test(expected = IOException.class) public void buildKeyManagerFactoryWithBadPassword() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); addKeystoreOptions(config); config.put("keystore_password", "HomeOfBadPasswords"); @@ -149,7 +153,7 @@ public void buildKeyManagerFactoryWithBadPassword() throws IOException @Test public void buildKeyManagerFactoryHappyPath() throws IOException { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); DefaultSslContextFactory defaultSslContextFactoryImpl = new DefaultSslContextFactory(config); @@ -222,12 +226,13 @@ public void buildOutboundKeyManagerFactoryHappyPath() throws IOException } @Test - public void testDisableOpenSslForInJvmDtests() { + public void testDisableOpenSslForInJvmDtests() + { // The configuration name below is hard-coded intentionally to make sure we don't break the contract without // changing the documentation appropriately try (WithProperties properties = new WithProperties().set(DISABLE_TCACTIVE_OPENSSL, true)) { - Map config = new HashMap<>(); + Map config = new HashMap<>(); config.putAll(commonConfig); DefaultSslContextFactory defaultSslContextFactoryImpl = new DefaultSslContextFactory(config); diff --git a/test/unit/org/apache/cassandra/security/DummySslContextFactoryImpl.java b/test/unit/org/apache/cassandra/security/DummySslContextFactoryImpl.java index ca4f4e86f06e..12649cfb0cc6 100644 --- a/test/unit/org/apache/cassandra/security/DummySslContextFactoryImpl.java +++ b/test/unit/org/apache/cassandra/security/DummySslContextFactoryImpl.java @@ -44,7 +44,7 @@ public SSLContext createJSSESslContext(boolean verifyPeerCertificate) throws SSL } @Override - public SSLContext createJSSESslContext(EncryptionOptions.ClientAuth clientAuth) throws SSLException + public SSLContext createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) throws SSLException { return null; } @@ -56,7 +56,7 @@ public SslContext createNettySslContext(boolean verifyPeerCertificate, SocketTyp } @Override - public SslContext createNettySslContext(EncryptionOptions.ClientAuth clientAuth, SocketType socketType, + public SslContext createNettySslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, CipherSuiteFilter cipherFilter) throws SSLException { return null; diff --git a/test/unit/org/apache/cassandra/security/FileBasedSslContextFactoryTest.java b/test/unit/org/apache/cassandra/security/FileBasedSslContextFactoryTest.java index d6d936ba0e6c..cc6c05af5d25 100644 --- a/test/unit/org/apache/cassandra/security/FileBasedSslContextFactoryTest.java +++ b/test/unit/org/apache/cassandra/security/FileBasedSslContextFactoryTest.java @@ -36,11 +36,12 @@ import static org.apache.cassandra.config.CassandraRelevantProperties.CASSANDRA_CONFIG; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; public class FileBasedSslContextFactoryTest { private EncryptionOptions.ServerEncryptionOptions encryptionOptions; + private EncryptionOptions.ServerEncryptionOptions.Builder encryptionOptionsBuilder; static WithProperties properties; @@ -60,7 +61,10 @@ public static void tearDownDatabaseDescriptor() @Before public void setup() { - encryptionOptions = new EncryptionOptions.ServerEncryptionOptions() + encryptionOptionsBuilder = new EncryptionOptions.ServerEncryptionOptions.Builder(); + encryptionOptions = encryptionOptionsBuilder + .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) .withSslContextFactory(new ParameterizedClass(TestFileBasedSSLContextFactory.class.getName(), new HashMap<>())) .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) @@ -69,8 +73,7 @@ public void setup() .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD); + .build(); } @Test @@ -95,11 +98,12 @@ public void testHappyPath() throws SSLException @Test public void testEmptyKeystorePasswords() throws SSLException { - EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptions + EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptionsBuilder + .withOutboundKeystorePassword("") + .withOutboundKeystore("test/conf/cassandra_ssl_test_nopassword.keystore") .withKeyStorePassword("") .withKeyStore("test/conf/cassandra_ssl_test_nopassword.keystore") - .withOutboundKeystorePassword("") - .withOutboundKeystore("test/conf/cassandra_ssl_test_nopassword.keystore"); + .build(); Assert.assertEquals("org.apache.cassandra.security.FileBasedSslContextFactoryTest$TestFileBasedSSLContextFactory", localEncryptionOptions.ssl_context_factory.class_name); @@ -118,13 +122,14 @@ public void testKeystorePasswordFile() throws SSLException { // Here we only override password configuration and specify password_file configuration since keystore paths // are already loaded in the `encryptionOptions` - EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptions - .withKeyStorePassword(null) - .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE) + EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptionsBuilder .withOutboundKeystorePassword(null) .withOutboundKeystorePasswordFile(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD_FILE) + .withKeyStorePassword(null) + .withKeyStorePasswordFile(TlsTestUtils.SERVER_KEYSTORE_PASSWORD_FILE) .withTrustStorePassword(null) - .withTrustStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE); + .withTrustStorePasswordFile(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD_FILE) + .build(); Assert.assertEquals("org.apache.cassandra.security.FileBasedSslContextFactoryTest$TestFileBasedSSLContextFactory", localEncryptionOptions.ssl_context_factory.class_name); @@ -144,13 +149,14 @@ public void testBadKeystorePasswordFile() throws SSLException { // Here we only override password configuration and specify password_file configuration since keystore paths // are already loaded in the `encryptionOptions` - encryptionOptions - .withKeyStorePassword(null) - .withKeyStorePasswordFile("/path/to/non-existance-password-file") + encryptionOptionsBuilder .withOutboundKeystorePassword(null) .withOutboundKeystorePasswordFile("/path/to/non-existance-password-file") + .withKeyStorePassword(null) + .withKeyStorePasswordFile("/path/to/non-existance-password-file") .withTrustStorePassword(null) - .withTrustStorePasswordFile("/path/to/non-existance-password-file"); + .withTrustStorePasswordFile("/path/to/non-existance-password-file") + .build(); } /** @@ -159,7 +165,9 @@ public void testBadKeystorePasswordFile() throws SSLException @Test(expected = IllegalArgumentException.class) public void testNullKeystorePasswordDisallowed() throws SSLException { - EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptions.withKeyStorePassword(null); + EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptionsBuilder + .withKeyStorePassword(null) + .build(); Assert.assertEquals("org.apache.cassandra.security.FileBasedSslContextFactoryTest$TestFileBasedSSLContextFactory", localEncryptionOptions.ssl_context_factory.class_name); @@ -187,7 +195,9 @@ public void testNullKeystorePasswordDisallowed() throws SSLException @Test public void testOnlyEmptyOutboundKeystorePassword() throws SSLException { - EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptions.withOutboundKeystorePassword(null); + EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptionsBuilder + .withOutboundKeystorePassword(null) + .build(); Assert.assertEquals("org.apache.cassandra.security.FileBasedSslContextFactoryTest$TestFileBasedSSLContextFactory", localEncryptionOptions.ssl_context_factory.class_name); @@ -203,7 +213,9 @@ public void testOnlyEmptyOutboundKeystorePassword() throws SSLException @Test public void testEmptyTruststorePassword() throws SSLException { - EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptions.withTrustStorePassword(null); + EncryptionOptions.ServerEncryptionOptions localEncryptionOptions = encryptionOptionsBuilder + .withTrustStorePassword(null) + .build(); Assert.assertEquals("org.apache.cassandra.security.FileBasedSslContextFactoryTest$TestFileBasedSSLContextFactory", localEncryptionOptions.ssl_context_factory.class_name); Assert.assertNotNull("keystore_password must not be null", localEncryptionOptions.keystore_password); diff --git a/test/unit/org/apache/cassandra/security/PEMBasedSslContextFactoryTest.java b/test/unit/org/apache/cassandra/security/PEMBasedSslContextFactoryTest.java index 9781e9d08a71..18d323286d3e 100644 --- a/test/unit/org/apache/cassandra/security/PEMBasedSslContextFactoryTest.java +++ b/test/unit/org/apache/cassandra/security/PEMBasedSslContextFactoryTest.java @@ -39,8 +39,8 @@ import org.apache.cassandra.transport.TlsTestUtils; import static org.apache.cassandra.config.CassandraRelevantProperties.DISABLE_TCACTIVE_OPENSSL; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.apache.cassandra.security.PEMBasedSslContextFactory.ConfigKey.ENCODED_CERTIFICATES; import static org.apache.cassandra.security.PEMBasedSslContextFactory.ConfigKey.ENCODED_KEY; import static org.apache.cassandra.security.PEMBasedSslContextFactory.ConfigKey.KEY_PASSWORD; @@ -215,12 +215,14 @@ public void getSslContextOpenSSL() throws IOException { ParameterizedClass sslContextFactory = new ParameterizedClass(PEMBasedSslContextFactory.class.getSimpleName() , new HashMap<>()); - EncryptionOptions options = new EncryptionOptions().withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withRequireClientAuth(NOT_REQUIRED) - .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") - .withSslContextFactory(sslContextFactory); + EncryptionOptions.ClientEncryptionOptions options = new EncryptionOptions.ClientEncryptionOptions.Builder() + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withRequireClientAuth(NOT_REQUIRED) + .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") + .withSslContextFactory(sslContextFactory) + .build(); SslContext sslContext = SSLFactory.getOrCreateSslContext(options, REQUIRED, ISslContextFactory.SocketType.SERVER, "test"); Assert.assertNotNull(sslContext); if (OpenSsl.isAvailable()) @@ -234,14 +236,16 @@ public void getSslContextOpenSSLOutboundKeystore() throws IOException { ParameterizedClass sslContextFactory = new ParameterizedClass(PEMBasedSslContextFactory.class.getSimpleName() , new HashMap<>()); - EncryptionOptions.ServerEncryptionOptions options = new EncryptionOptions.ServerEncryptionOptions().withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withRequireClientAuth(NOT_REQUIRED) - .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") - .withSslContextFactory(sslContextFactory); + EncryptionOptions.ServerEncryptionOptions options = + new EncryptionOptions.ServerEncryptionOptions.Builder().withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withRequireClientAuth(NOT_REQUIRED) + .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") + .withSslContextFactory(sslContextFactory) + .build(); SslContext sslContext = SSLFactory.getOrCreateSslContext(options, REQUIRED, ISslContextFactory.SocketType.CLIENT, "test"); Assert.assertNotNull(sslContext); if (OpenSsl.isAvailable()) diff --git a/test/unit/org/apache/cassandra/security/SSLFactoryTest.java b/test/unit/org/apache/cassandra/security/SSLFactoryTest.java index ba46588686bc..7ac95173376c 100644 --- a/test/unit/org/apache/cassandra/security/SSLFactoryTest.java +++ b/test/unit/org/apache/cassandra/security/SSLFactoryTest.java @@ -1,21 +1,21 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.cassandra.security; import java.io.FileInputStream; @@ -46,18 +46,20 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.EncryptionOptions; import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions; +import org.apache.cassandra.config.EncryptionOptions.ServerEncryptionOptions.Builder; import org.apache.cassandra.config.ParameterizedClass; import org.apache.cassandra.io.util.File; import org.apache.cassandra.transport.TlsTestUtils; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; public class SSLFactoryTest { static final SelfSignedCertificate ssc; + static { DatabaseDescriptor.daemonInitialization(); @@ -77,33 +79,41 @@ public class SSLFactoryTest public void setup() { SSLFactory.clearSslContextCache(); - encryptionOptions = new ServerEncryptionOptions() - .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) - .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) - .withRequireClientAuth(NOT_REQUIRED) - .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") - .withSslContextFactory(new ParameterizedClass(TestFileBasedSSLContextFactory.class.getName(), - new HashMap<>())); + encryptionOptions = new Builder().withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PATH) + .withTrustStorePassword(TlsTestUtils.SERVER_TRUSTSTORE_PASSWORD) + .withRequireClientAuth(NOT_REQUIRED) + .withCipherSuites("TLS_RSA_WITH_AES_128_CBC_SHA") + .withSslContextFactory(new ParameterizedClass(TestFileBasedSSLContextFactory.class.getName(), + new HashMap<>())) + .build(); } - private ServerEncryptionOptions addKeystoreOptions(ServerEncryptionOptions options) + private Builder addKeystoreOptions(ServerEncryptionOptions options) { - return options.withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD); + EncryptionOptions.ServerEncryptionOptions.Builder builder = new EncryptionOptions.ServerEncryptionOptions.Builder(options); + + builder.withOutboundKeystorePassword(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PASSWORD) + .withOutboundKeystore(TlsTestUtils.SERVER_OUTBOUND_KEYSTORE_PATH) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH); + + return builder; } - private ServerEncryptionOptions addPEMKeystoreOptions(ServerEncryptionOptions options) + private Builder addPEMKeystoreOptions(ServerEncryptionOptions options) { ParameterizedClass sslContextFactoryClass = new ParameterizedClass("org.apache.cassandra.security.PEMBasedSslContextFactory", new HashMap<>()); - return options.withSslContextFactory(sslContextFactoryClass) - .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) - .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) - .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) - .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH); + EncryptionOptions.ServerEncryptionOptions.Builder builder = new EncryptionOptions.ServerEncryptionOptions.Builder(options); + + builder.withOutboundKeystore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) + .withOutboundKeystorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withSslContextFactory(sslContextFactoryClass) + .withKeyStore(TlsTestUtils.SERVER_KEYSTORE_PATH_PEM) + .withKeyStorePassword(TlsTestUtils.SERVER_KEYSTORE_PASSWORD) + .withTrustStore(TlsTestUtils.SERVER_TRUSTSTORE_PEM_PATH); + + return builder; } @Test @@ -111,9 +121,13 @@ public void testSslContextReload_HappyPath() throws IOException, InterruptedExce { try { - ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions) - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); - ServerEncryptionOptions legacyOptions = options.withOptional(false).withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + Builder optionsBuilder = addKeystoreOptions(encryptionOptions) + .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + ServerEncryptionOptions options = optionsBuilder.build(); + ServerEncryptionOptions legacyOptions = optionsBuilder + .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withOptional(false) + .build(); options.sslContextFactoryInstance.initHotReloading(); legacyOptions.sslContextFactoryInstance.initHotReloading(); @@ -146,8 +160,9 @@ public void testSslContextReload_HappyPath() throws IOException, InterruptedExce public void testServerSocketShouldUseKeystore() throws IOException, CertificateException, KeyStoreException, NoSuchAlgorithmException, NoSuchFieldException, IllegalAccessException, ClassNotFoundException, NoSuchMethodException, InvocationTargetException { ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions) - .withOutboundKeystore("dummyKeystore") - .withOutboundKeystorePassword("dummyPassword"); + .withOutboundKeystore("dummyKeystore") + .withOutboundKeystorePassword("dummyPassword") + .build(); // Server socket type should create a keystore with keystore & keystore password final OpenSslServerContext context = (OpenSslServerContext) SSLFactory.createNettySslContext(options, REQUIRED, ISslContextFactory.SocketType.SERVER); @@ -163,8 +178,9 @@ public void testServerSocketShouldUseKeystore() throws IOException, CertificateE public void testClientSocketShouldUseOutboundKeystore() throws IOException, CertificateException, KeyStoreException, NoSuchAlgorithmException, NoSuchFieldException, ClassNotFoundException, InvocationTargetException, IllegalAccessException, NoSuchMethodException { ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions) - .withKeyStore("dummyKeystore") - .withKeyStorePassword("dummyPassword"); + .withKeyStore("dummyKeystore") + .withKeyStorePassword("dummyPassword") + .build(); // Client socket type should create a keystore with outbound Keystore & outbound password final OpenSslClientContext context = (OpenSslClientContext) SSLFactory.createNettySslContext(options, REQUIRED, ISslContextFactory.SocketType.CLIENT); @@ -181,10 +197,14 @@ public void testPEMSslContextReload_HappyPath() throws IOException { try { - ServerEncryptionOptions options = addPEMKeystoreOptions(encryptionOptions) - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.dc); + Builder optionsBuilder = addPEMKeystoreOptions(encryptionOptions); + ServerEncryptionOptions options = optionsBuilder.withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.dc) + .build(); // emulate InboundSockets and share the cert but with different options, no extra hot reloading init - ServerEncryptionOptions legacyOptions = options.withOptional(false).withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + ServerEncryptionOptions legacyOptions = optionsBuilder + .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withOptional(false) + .build(); options.sslContextFactoryInstance.initHotReloading(); legacyOptions.sslContextFactoryInstance.initHotReloading(); @@ -217,8 +237,9 @@ public void testPEMSslContextReload_HappyPath() throws IOException public void testSslFactorySslInit_BadPassword_ThrowsException() throws IOException { ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions) - .withKeyStorePassword("bad password") - .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withKeyStorePassword("bad password") + .build(); SSLFactory.validateSslContext("testSslFactorySslInit_BadPassword_ThrowsException", options, NOT_REQUIRED, true); } @@ -228,13 +249,17 @@ public void testSslFactoryHotReload_BadPassword_DoesNotClearExistingSslContext() { try { - ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions); + Builder optionsBuilder = addKeystoreOptions(encryptionOptions); + ServerEncryptionOptions options = optionsBuilder.build(); // emulate InboundSockets and share the cert but with different options, no extra hot reloading init - ServerEncryptionOptions legacyOptions = options.withOptional(false).withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all); + ServerEncryptionOptions legacyOptions = optionsBuilder + .withInternodeEncryption(ServerEncryptionOptions.InternodeEncryption.all) + .withOptional(false) + .build(); File testKeystoreFile = new File(options.keystore + ".test"); FileUtils.copyFile(new File(options.keystore).toJavaIOFile(), testKeystoreFile.toJavaIOFile()); - options = options.withKeyStore(testKeystoreFile.path()); + options = new Builder(options).withKeyStore(testKeystoreFile.path()).build(); SSLFactory.initHotReloading(options, options, true); // deliberately not initializing with legacyOptions to match InboundSockets.addBindings @@ -261,11 +286,12 @@ public void testSslFactoryHotReload_CorruptOrNonExistentFile_DoesNotClearExistin { try { - ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions); + Builder optionsBuilder = addKeystoreOptions(encryptionOptions); + ServerEncryptionOptions options = optionsBuilder.build(); File testKeystoreFile = new File(options.keystore + ".test"); FileUtils.copyFile(new File(options.keystore).toJavaIOFile(), testKeystoreFile.toJavaIOFile()); - options = options.withKeyStore(testKeystoreFile.path()); + options = optionsBuilder.withKeyStore(testKeystoreFile.path()).build(); SSLFactory.initHotReloading(options, options, true); @@ -294,8 +320,10 @@ public void testSslFactoryHotReload_CorruptOrNonExistentFile_DoesNotClearExistin @Test public void getSslContext_ParamChanges() throws IOException { - ServerEncryptionOptions options = addKeystoreOptions(encryptionOptions) - .withCipherSuites("TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"); + Builder optionsBuilder = addKeystoreOptions(encryptionOptions); + ServerEncryptionOptions options = optionsBuilder + .withCipherSuites("TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256") + .build(); SslContext ctx1 = SSLFactory.getOrCreateSslContext(options, REQUIRED, ISslContextFactory.SocketType.SERVER, "test"); @@ -303,7 +331,7 @@ public void getSslContext_ParamChanges() throws IOException Assert.assertTrue(ctx1.isServer()); Assert.assertEquals(ctx1.cipherSuites(), options.cipher_suites); - options = options.withCipherSuites("TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"); + options = optionsBuilder.withCipherSuites("TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256").build(); SslContext ctx2 = SSLFactory.getOrCreateSslContext(options, REQUIRED, ISslContextFactory.SocketType.CLIENT, "test"); @@ -313,30 +341,33 @@ public void getSslContext_ParamChanges() throws IOException } @Test - public void testCacheKeyEqualityForCustomSslContextFactory() { + public void testCacheKeyEqualityForCustomSslContextFactory() + { - Map parameters1 = new HashMap<>(); + Map parameters1 = new HashMap<>(); parameters1.put("key1", "value1"); parameters1.put("key2", "value2"); - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters1)) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); SSLFactory.CacheKey cacheKey1 = new SSLFactory.CacheKey(encryptionOptions1, ISslContextFactory.SocketType.SERVER, "test" ); - Map parameters2 = new HashMap<>(); + Map parameters2 = new HashMap<>(); parameters2.put("key1", "value1"); parameters2.put("key2", "value2"); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters2)) .withProtocol("TLSv1.1") .withRequireClientAuth(REQUIRED) - .withRequireEndpointVerification(false); + .withRequireEndpointVerification(false) + .build(); SSLFactory.CacheKey cacheKey2 = new SSLFactory.CacheKey(encryptionOptions2, ISslContextFactory.SocketType.SERVER, "test" ); @@ -345,26 +376,29 @@ public void testCacheKeyEqualityForCustomSslContextFactory() { } @Test - public void testCacheKeyInequalityForCustomSslContextFactory() { + public void testCacheKeyInequalityForCustomSslContextFactory() + { - Map parameters1 = new HashMap<>(); + Map parameters1 = new HashMap<>(); parameters1.put("key1", "value11"); parameters1.put("key2", "value12"); - EncryptionOptions encryptionOptions1 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions1 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters1)) - .withProtocol("TLSv1.1"); + .withProtocol("TLSv1.1") + .build(); SSLFactory.CacheKey cacheKey1 = new SSLFactory.CacheKey(encryptionOptions1, ISslContextFactory.SocketType.SERVER, "test" ); - Map parameters2 = new HashMap<>(); + Map parameters2 = new HashMap<>(); parameters2.put("key1", "value21"); parameters2.put("key2", "value22"); - EncryptionOptions encryptionOptions2 = - new EncryptionOptions() + EncryptionOptions.ClientEncryptionOptions encryptionOptions2 = + new EncryptionOptions.ClientEncryptionOptions.Builder() .withSslContextFactory(new ParameterizedClass(DummySslContextFactoryImpl.class.getName(), parameters2)) - .withProtocol("TLSv1.1"); + .withProtocol("TLSv1.1") + .build(); SSLFactory.CacheKey cacheKey2 = new SSLFactory.CacheKey(encryptionOptions2, ISslContextFactory.SocketType.SERVER, "test" ); @@ -372,7 +406,8 @@ public void testCacheKeyInequalityForCustomSslContextFactory() { Assert.assertNotEquals(cacheKey1, cacheKey2); } - public static class TestFileBasedSSLContextFactory extends FileBasedSslContextFactory { + public static class TestFileBasedSSLContextFactory extends FileBasedSslContextFactory + { public TestFileBasedSSLContextFactory(Map parameters) { super(parameters); diff --git a/test/unit/org/apache/cassandra/service/ClientWarningsTest.java b/test/unit/org/apache/cassandra/service/ClientWarningsTest.java index cf6d6ed40032..fb9f594801b3 100644 --- a/test/unit/org/apache/cassandra/service/ClientWarningsTest.java +++ b/test/unit/org/apache/cassandra/service/ClientWarningsTest.java @@ -70,7 +70,7 @@ public void testUnloggedBatch() throws Exception createTable("CREATE TABLE %s (pk int PRIMARY KEY, v text)"); // v4 and higher - try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions())) + try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions.ClientEncryptionOptions())) { client.connect(false); @@ -90,7 +90,7 @@ public void testLargeBatch() throws Exception createTable("CREATE TABLE %s (pk int PRIMARY KEY, v text)"); // v4 and higher - try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions())) + try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions.ClientEncryptionOptions())) { client.connect(false); @@ -112,7 +112,7 @@ public void testTombstoneWarning() throws Exception final int iterations = 10000; createTable("CREATE TABLE %s (pk int, ck int, v int, PRIMARY KEY (pk, ck))"); - try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions())) + try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, version, true, new EncryptionOptions.ClientEncryptionOptions())) { client.connect(false); diff --git a/test/unit/org/apache/cassandra/service/NativeTransportServiceTest.java b/test/unit/org/apache/cassandra/service/NativeTransportServiceTest.java index 645d5b8f8bca..960289940062 100644 --- a/test/unit/org/apache/cassandra/service/NativeTransportServiceTest.java +++ b/test/unit/org/apache/cassandra/service/NativeTransportServiceTest.java @@ -36,7 +36,7 @@ public class NativeTransportServiceTest { - static EncryptionOptions defaultOptions; + static EncryptionOptions.ClientEncryptionOptions defaultOptions; @BeforeClass public static void setupDD() @@ -48,7 +48,7 @@ public static void setupDD() @After public void resetConfig() { - DatabaseDescriptor.updateNativeProtocolEncryptionOptions(update -> new EncryptionOptions(defaultOptions).applyConfig()); + DatabaseDescriptor.updateNativeProtocolEncryptionOptions(update -> new EncryptionOptions.ClientEncryptionOptions.Builder(defaultOptions).build()); } @Test @@ -127,8 +127,11 @@ public void testPlainDefaultPort() public void testSSLOnly() { // default ssl settings: client encryption enabled and default native transport port used for ssl only - DatabaseDescriptor.updateNativeProtocolEncryptionOptions(options -> options.withEnabled(true) - .withOptional(false)); + DatabaseDescriptor.updateNativeProtocolEncryptionOptions(options -> + new EncryptionOptions.ClientEncryptionOptions.Builder(options) + .withEnabled(true) + .withOptional(false) + .build()); withService((NativeTransportService service) -> { @@ -144,8 +147,11 @@ public void testSSLOnly() public void testSSLOptional() { // default ssl settings: client encryption enabled and default native transport port used for optional ssl - DatabaseDescriptor.updateNativeProtocolEncryptionOptions(options -> options.withEnabled(true) - .withOptional(true)); + DatabaseDescriptor.updateNativeProtocolEncryptionOptions(options -> + new EncryptionOptions.ClientEncryptionOptions.Builder(options) + .withEnabled(true) + .withOptional(true) + .build()); withService((NativeTransportService service) -> { diff --git a/test/unit/org/apache/cassandra/service/ProtocolBetaVersionTest.java b/test/unit/org/apache/cassandra/service/ProtocolBetaVersionTest.java index a5b32bfa1fa7..22324f178b03 100644 --- a/test/unit/org/apache/cassandra/service/ProtocolBetaVersionTest.java +++ b/test/unit/org/apache/cassandra/service/ProtocolBetaVersionTest.java @@ -68,7 +68,7 @@ public void testProtocolBetaVersion() throws Exception createTable("CREATE TABLE %s (pk int PRIMARY KEY, v int)"); assertTrue(betaVersion.isBeta()); // change to another beta version or remove test if no beta version - try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, betaVersion, true, new EncryptionOptions())) + try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, betaVersion, true, new EncryptionOptions.ClientEncryptionOptions())) { client.connect(false); for (int i = 0; i < 10; i++) @@ -103,7 +103,7 @@ public void unforcedProtocolVersionTest() throws Exception } assertTrue(betaVersion.isBeta()); // change to another beta version or remove test if no beta version - try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, betaVersion, false, new EncryptionOptions())) + try (SimpleClient client = new SimpleClient(nativeAddr.getHostAddress(), nativePort, betaVersion, false, new EncryptionOptions.ClientEncryptionOptions())) { client.connect(false); fail("Exception should have been thrown"); diff --git a/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java b/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java index 4fc23256770a..b203af1b4357 100644 --- a/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java +++ b/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java @@ -28,6 +28,7 @@ import org.junit.Test; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.transport.TlsTestUtils; import static org.apache.cassandra.tools.OfflineToolUtils.sstableDirName; import static org.junit.Assert.assertEquals; @@ -85,6 +86,35 @@ public void testEncryptionSettings() throws Exception assertEquals("test.jks", options.clientEncOptions.keystore); } + /** + * Tests for client_encryption_options override from the command line. + */ + @Test + public void testEncryptionSettingsOverride() throws Exception + { + // Default Cassandra config + File config = new File(Paths.get(".", "test", "conf", "cassandra-mtls.yaml").normalize()); + String[] args = { "-d", "127.9.9.1", "-f", config.absolutePath(), + "-ts", "test.jks", "-tspw", "truststorePass1", + "-ks", "test.jks", "-kspw", "testdata1", + "--ssl-ciphers", "TLS_RSA_WITH_AES_256_CBC_SHA", + "--ssl-alg", "SunX509", "--store-type", "JKS", "--ssl-protocol", "TLS", + sstableDirName("legacy_sstables", "legacy_ma_simple") }; + LoaderOptions options = LoaderOptions.builder().parseArgs(args).build(); + // Below two lines validating server encryption options is to verify that we are loading config from the yaml + assertEquals(TlsTestUtils.SERVER_KEYSTORE_PATH, options.serverEncOptions.keystore); + assertEquals(TlsTestUtils.SERVER_KEYSTORE_PASSWORD, options.serverEncOptions.keystore_password); + // Below asserts validate the overrides for the client encryption options from the command line + // Since the values are provided by (and local to) this test, they are hardcoded + assertEquals("JKS", options.clientEncOptions.store_type); + assertEquals("test.jks", options.clientEncOptions.truststore); + assertEquals("truststorePass1", options.clientEncOptions.truststore_password); + assertEquals("test.jks", options.clientEncOptions.keystore); + assertEquals("testdata1", options.clientEncOptions.keystore_password); + assertEquals("TLS_RSA_WITH_AES_256_CBC_SHA", options.clientEncOptions.cipherSuitesArray()[0]); + assertEquals("SunX509", options.clientEncOptions.algorithm); + } + @Test public void testThrottleDefaultSettings() { diff --git a/test/unit/org/apache/cassandra/transport/EarlyAuthenticationTest.java b/test/unit/org/apache/cassandra/transport/EarlyAuthenticationTest.java index ed98140b9b5e..a581086c7645 100644 --- a/test/unit/org/apache/cassandra/transport/EarlyAuthenticationTest.java +++ b/test/unit/org/apache/cassandra/transport/EarlyAuthenticationTest.java @@ -84,23 +84,23 @@ public void initNetwork() throws IOException, TimeoutException }); } - private EncryptionOptions clientEncryptionOptions(boolean presentClientCertificate) + private EncryptionOptions.ClientEncryptionOptions clientEncryptionOptions(boolean presentClientCertificate) { - EncryptionOptions encryptionOptions = new EncryptionOptions() - .withEnabled(true) - .withRequireClientAuth(EncryptionOptions.ClientAuth.OPTIONAL) - .withTrustStore(TlsTestUtils.CLIENT_TRUSTSTORE_PATH) - .withTrustStorePassword(TlsTestUtils.CLIENT_TRUSTSTORE_PASSWORD) - .withSslContextFactory(new ParameterizedClass(SimpleClientSslContextFactory.class.getName())); + EncryptionOptions.ClientEncryptionOptions.Builder builder = new EncryptionOptions.ClientEncryptionOptions.Builder(); + builder.withEnabled(true) + .withRequireClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL) + .withTrustStore(TlsTestUtils.CLIENT_TRUSTSTORE_PATH) + .withTrustStorePassword(TlsTestUtils.CLIENT_TRUSTSTORE_PASSWORD) + .withSslContextFactory(new ParameterizedClass(SimpleClientSslContextFactory.class.getName())); if (presentClientCertificate) { - encryptionOptions = encryptionOptions.withKeyStore(TlsTestUtils.CLIENT_SPIFFE_KEYSTORE_PATH) - .withStoreType("JKS") - .withKeyStorePassword(TlsTestUtils.CLIENT_SPIFFE_KEYSTORE_PASSWORD); + builder.withKeyStore(TlsTestUtils.CLIENT_SPIFFE_KEYSTORE_PATH) + .withStoreType("JKS") + .withKeyStorePassword(TlsTestUtils.CLIENT_SPIFFE_KEYSTORE_PASSWORD); } - return new EncryptionOptions(encryptionOptions); + return new EncryptionOptions.ClientEncryptionOptions(builder.build()); } @Test @@ -180,6 +180,5 @@ public Consumer expectAuthenticationError(final String expecte } }; } - } diff --git a/test/unit/org/apache/cassandra/transport/MessagePayloadTest.java b/test/unit/org/apache/cassandra/transport/MessagePayloadTest.java index a50174bd8702..42c6cbd021ba 100644 --- a/test/unit/org/apache/cassandra/transport/MessagePayloadTest.java +++ b/test/unit/org/apache/cassandra/transport/MessagePayloadTest.java @@ -127,7 +127,7 @@ public void testMessagePayloadBeta() throws Throwable nativePort, ProtocolVersion.V5, true, - new EncryptionOptions()); + new EncryptionOptions.ClientEncryptionOptions()); try { client.connect(false); diff --git a/test/unit/org/apache/cassandra/transport/SimpleClientSslContextFactory.java b/test/unit/org/apache/cassandra/transport/SimpleClientSslContextFactory.java index 1a6871716a53..468c9cbc17b3 100644 --- a/test/unit/org/apache/cassandra/transport/SimpleClientSslContextFactory.java +++ b/test/unit/org/apache/cassandra/transport/SimpleClientSslContextFactory.java @@ -30,12 +30,12 @@ import org.apache.cassandra.config.EncryptionOptions; import org.apache.cassandra.security.FileBasedSslContextFactory; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.NOT_REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.NOT_REQUIRED; /** * A custom implementation of {@link FileBasedSslContextFactory} to be used by tests utilizing {@link SimpleClient}. *

      - * Provides a subtly different implementation of {@link #createNettySslContext(EncryptionOptions.ClientAuth, SocketType, CipherSuiteFilter)} + * Provides a subtly different implementation of {@link #createNettySslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth, SocketType, CipherSuiteFilter)} * that only configures an {@link SslContext} for clients and most importantly only configures a key manager if an * outbound keystore is configured, where the existing implementation always does this. This is useful for tests * that try to create a client that uses encryption but does not provide a certificate. @@ -49,7 +49,7 @@ public SimpleClientSslContextFactory(Map parameters) } @Override - public SSLContext createJSSESslContext(EncryptionOptions.ClientAuth clientAuth) throws SSLException + public SSLContext createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth) throws SSLException { TrustManager[] trustManagers = null; if (clientAuth != NOT_REQUIRED) @@ -76,7 +76,7 @@ public SSLContext createJSSESslContext(EncryptionOptions.ClientAuth clientAuth) } @Override - public SslContext createNettySslContext(EncryptionOptions.ClientAuth clientAuth, SocketType socketType, + public SslContext createNettySslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth clientAuth, SocketType socketType, CipherSuiteFilter cipherFilter) throws SSLException { SslContextBuilder builder = SslContextBuilder.forClient(); diff --git a/test/unit/org/apache/cassandra/transport/TlsTestUtils.java b/test/unit/org/apache/cassandra/transport/TlsTestUtils.java index 30a6054127ca..76faec85b1eb 100644 --- a/test/unit/org/apache/cassandra/transport/TlsTestUtils.java +++ b/test/unit/org/apache/cassandra/transport/TlsTestUtils.java @@ -86,17 +86,18 @@ public class TlsTestUtils public static String CLIENT_TRUSTSTORE_PATH = "test/conf/cassandra_ssl_test.truststore"; public static String CLIENT_TRUSTSTORE_PASSWORD = "cassandra"; - public static EncryptionOptions getClientEncryptionOptions() + public static EncryptionOptions.ClientEncryptionOptions getClientEncryptionOptions() { - return new EncryptionOptions(new EncryptionOptions() + return new EncryptionOptions.ClientEncryptionOptions(new EncryptionOptions.ClientEncryptionOptions.Builder() .withEnabled(true) - .withRequireClientAuth(EncryptionOptions.ClientAuth.OPTIONAL) + .withRequireClientAuth(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL) .withOptional(true) .withKeyStore(SERVER_KEYSTORE_PATH) .withKeyStorePassword(SERVER_KEYSTORE_PASSWORD) .withTrustStore(SERVER_TRUSTSTORE_PATH) .withTrustStorePassword(SERVER_TRUSTSTORE_PASSWORD) - .withRequireEndpointVerification(false)); + .withRequireEndpointVerification(false) + .build()); } public static void configureWithMutualTlsWithPasswordFallbackAuthenticator(Config config) @@ -129,7 +130,7 @@ public static SSLOptions getSSLOptions(boolean provideClientCert) throws SSLExce { return RemoteEndpointAwareJdkSSLOptions.builder() .withSSLContext(getClientSslContextFactory(provideClientCert) - .createJSSESslContext(EncryptionOptions.ClientAuth.OPTIONAL)) + .createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL)) .build(); } @@ -139,7 +140,7 @@ public static SSLOptions getSSLOptions(Path keystorePath, Path truststorePath) t { return RemoteEndpointAwareJdkSSLOptions.builder() .withSSLContext(getClientSslContextFactory(keystorePath, truststorePath) - .createJSSESslContext(EncryptionOptions.ClientAuth.OPTIONAL)) + .createJSSESslContext(EncryptionOptions.ClientEncryptionOptions.ClientAuth.OPTIONAL)) .build(); } catch (SSLException e) diff --git a/tools/stress/src/org/apache/cassandra/stress/settings/SettingsTransport.java b/tools/stress/src/org/apache/cassandra/stress/settings/SettingsTransport.java index cf629998225b..ccdf0a53b207 100644 --- a/tools/stress/src/org/apache/cassandra/stress/settings/SettingsTransport.java +++ b/tools/stress/src/org/apache/cassandra/stress/settings/SettingsTransport.java @@ -44,31 +44,32 @@ public SettingsTransport(TOptions options, SettingsCredentials credentials) this.credentials = credentials; } - public EncryptionOptions getEncryptionOptions() + public EncryptionOptions.ClientEncryptionOptions getEncryptionOptions() { - EncryptionOptions encOptions = new EncryptionOptions().applyConfig(); + EncryptionOptions.ClientEncryptionOptions encOptions = new EncryptionOptions.ClientEncryptionOptions().applyConfig(); if (options.trustStore.present()) { - encOptions = encOptions - .withEnabled(true) - .withTrustStore(options.trustStore.value()) - .withTrustStorePassword(options.trustStorePw.setByUser() ? options.trustStorePw.value() : credentials.transportTruststorePassword) - .withAlgorithm(options.alg.value()) - .withProtocol(options.protocol.value()) - .withCipherSuites(options.ciphers.value().split(",")); + EncryptionOptions.Builder encOptionsBuilder = new EncryptionOptions.ClientEncryptionOptions.Builder(encOptions) + .withEnabled(true) + .withTrustStore(options.trustStore.value()) + .withTrustStorePassword(options.trustStorePw.setByUser() ? options.trustStorePw.value() : credentials.transportTruststorePassword) + .withAlgorithm(options.alg.value()) + .withProtocol(options.protocol.value()) + .withCipherSuites(options.ciphers.value().split(",")); + if (options.keyStore.present()) { - encOptions = encOptions - .withKeyStore(options.keyStore.value()) - .withKeyStorePassword(options.keyStorePw.setByUser() ? options.keyStorePw.value() : credentials.transportKeystorePassword); + encOptionsBuilder.withKeyStore(options.keyStore.value()) + .withKeyStorePassword(options.keyStorePw.setByUser() ? options.keyStorePw.value() : credentials.transportKeystorePassword); } else { // mandatory for SSLFactory.createSSLContext(), see CASSANDRA-9325 - encOptions = encOptions - .withKeyStore(encOptions.truststore) - .withKeyStorePassword(encOptions.truststore_password != null ? encOptions.truststore_password : credentials.transportTruststorePassword); + encOptionsBuilder.withKeyStore(encOptions.truststore) + .withKeyStorePassword(encOptions.truststore_password != null ? encOptions.truststore_password : credentials.transportTruststorePassword); } + + encOptions = encOptionsBuilder.build(); } return encOptions; } diff --git a/tools/stress/src/org/apache/cassandra/stress/settings/StressSettings.java b/tools/stress/src/org/apache/cassandra/stress/settings/StressSettings.java index 6aea048b4cc0..f81c4a9787c9 100644 --- a/tools/stress/src/org/apache/cassandra/stress/settings/StressSettings.java +++ b/tools/stress/src/org/apache/cassandra/stress/settings/StressSettings.java @@ -139,7 +139,7 @@ public JavaDriverClient getJavaDriverClient(String keyspace) if (client != null) return client; - EncryptionOptions encOptions = transport.getEncryptionOptions(); + EncryptionOptions.ClientEncryptionOptions encOptions = transport.getEncryptionOptions(); JavaDriverClient c = new JavaDriverClient(this, node.nodes, port.nativePort, encOptions); c.connect(mode.compression()); if (keyspace != null) diff --git a/tools/stress/src/org/apache/cassandra/stress/util/JavaDriverClient.java b/tools/stress/src/org/apache/cassandra/stress/util/JavaDriverClient.java index 3d72828daf73..f2a92fe5aa7d 100644 --- a/tools/stress/src/org/apache/cassandra/stress/util/JavaDriverClient.java +++ b/tools/stress/src/org/apache/cassandra/stress/util/JavaDriverClient.java @@ -41,7 +41,7 @@ import org.apache.cassandra.security.SSLFactory; import org.apache.cassandra.stress.settings.StressSettings; -import static org.apache.cassandra.config.EncryptionOptions.ClientAuth.REQUIRED; +import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; public class JavaDriverClient { @@ -60,7 +60,7 @@ public class JavaDriverClient public final int connectionsPerHost; private final ProtocolVersion protocolVersion; - private final EncryptionOptions encryptionOptions; + private final EncryptionOptions.ClientEncryptionOptions encryptionOptions; private Cluster cluster; private Session session; private final LoadBalancingPolicy loadBalancingPolicy; @@ -69,15 +69,15 @@ public class JavaDriverClient public JavaDriverClient(StressSettings settings, String host, int port) { - this(settings, Collections.singletonList(host), port, new EncryptionOptions()); + this(settings, Collections.singletonList(host), port, new EncryptionOptions.ClientEncryptionOptions()); } public JavaDriverClient(StressSettings settings, List hosts, int port) { - this(settings, hosts, port, new EncryptionOptions()); + this(settings, hosts, port, new EncryptionOptions.ClientEncryptionOptions()); } - public JavaDriverClient(StressSettings settings, List hosts, int port, EncryptionOptions encryptionOptions) + public JavaDriverClient(StressSettings settings, List hosts, int port, EncryptionOptions.ClientEncryptionOptions encryptionOptions) { this.protocolVersion = settings.mode.protocolVersion; this.hosts = hosts; @@ -85,7 +85,7 @@ public JavaDriverClient(StressSettings settings, List hosts, int port, E this.username = settings.mode.username; this.password = settings.mode.password; this.authProvider = settings.mode.authProvider; - this.encryptionOptions = new EncryptionOptions(encryptionOptions).applyConfig(); + this.encryptionOptions = new EncryptionOptions.ClientEncryptionOptions(encryptionOptions).applyConfig(); this.loadBalancingPolicy = loadBalancingPolicy(settings); this.connectionsPerHost = settings.mode.connectionsPerHost == null ? 8 : settings.mode.connectionsPerHost; From 8fcf309dad871a373b41cb26c8e0b714c8c874ef Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Wed, 9 Apr 2025 13:35:08 +0200 Subject: [PATCH 278/340] Implement appender of slow queries to system_views.slow_queries table patch by Stefan Miklosovic; reviewed by Dmitry Konstantinov, Bernardo Botella for CASSANDRA-13001 --- CHANGES.txt | 1 + conf/cassandra.yaml | 4 + conf/logback.xml | 12 + .../configuration/cass_logback_xml_file.adoc | 68 +++++- .../config/CassandraRelevantProperties.java | 3 + .../cassandra/db/AbstractReadQuery.java | 12 + .../cassandra/db/monitoring/Monitorable.java | 74 ++++++ .../db/monitoring/MonitoringTask.java | 217 ++++++++++++++++-- .../virtual/AbstractLoggerVirtualTable.java | 111 +++++++++ .../db/virtual/LogMessagesTable.java | 112 ++------- .../db/virtual/SlowQueriesTable.java | 195 ++++++++++++++++ .../db/virtual/SystemViewsKeyspace.java | 1 + .../cassandra/service/CassandraDaemon.java | 16 +- .../logging/AbstractVirtualTableAppender.java | 144 ++++++++++++ .../utils/logging/LogbackLoggingSupport.java | 28 ++- .../utils/logging/LoggingSupport.java | 8 +- .../utils/logging/SlowQueriesAppender.java | 45 ++++ .../utils/logging/VirtualTableAppender.java | 92 +------- ...logback-dtest_with_slow_query_appender.xml | 63 +++++ ...dtest_with_slow_query_appender_invalid.xml | 73 ++++++ .../test/AbstractVirtualLogsTableTest.java | 34 +++ .../test/SlowQueriesAppenderTest.java | 73 ++++++ .../distributed/test/SlowQueryDeserTest.java | 66 ++++++ .../test/VirtualTableLogsTest.java | 44 ++-- .../AbstractLoggerVirtualTableTest.java | 169 ++++++++++++++ .../db/virtual/LogMessagesTableTest.java | 144 +++--------- .../db/virtual/SlowQueriesTableTest.java | 145 ++++++++++++ 27 files changed, 1601 insertions(+), 353 deletions(-) create mode 100644 src/java/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTable.java create mode 100644 src/java/org/apache/cassandra/db/virtual/SlowQueriesTable.java create mode 100644 src/java/org/apache/cassandra/utils/logging/AbstractVirtualTableAppender.java create mode 100644 src/java/org/apache/cassandra/utils/logging/SlowQueriesAppender.java create mode 100644 test/conf/logback-dtest_with_slow_query_appender.xml create mode 100644 test/conf/logback-dtest_with_slow_query_appender_invalid.xml create mode 100644 test/distributed/org/apache/cassandra/distributed/test/AbstractVirtualLogsTableTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/SlowQueriesAppenderTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/SlowQueryDeserTest.java create mode 100644 test/unit/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTableTest.java create mode 100644 test/unit/org/apache/cassandra/db/virtual/SlowQueriesTableTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 5b7cb6469ca7..1d86f3078943 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Implement appender of slow queries to system_views.slow_queries table (CASSANDRA-13001) * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index fe80fda1645d..0c68afe500bc 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -1396,6 +1396,10 @@ request_timeout: 10000ms # How long before a node logs slow queries. Select queries that take longer than # this timeout to execute, will generate an aggregated log message, so that slow queries # can be identified. Set this value to zero to disable slow query logging. +# +# It is possible to log slow queries into system_views.slow_queries virtual table. +# Consult logback.xml to uncomment specific appender and logger to enable this functionality. +# # Min unit: ms slow_query_log_timeout: 500ms diff --git a/conf/logback.xml b/conf/logback.xml index 102cf06352a4..a8dabf656448 100644 --- a/conf/logback.xml +++ b/conf/logback.xml @@ -119,6 +119,18 @@ appender reference in the root level section below. --> + + + + + + + + + + diff --git a/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc b/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc index a62dfe91a7be..b6e4d5f54583 100644 --- a/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc +++ b/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc @@ -80,7 +80,7 @@ Specify the format of the message. Part of the rolling policy. %-5level [%thread] %date\{ISO8601} %F:%L - %msg%n -=== Logging to Cassandra virtual table +=== Logging system logs to Cassandra virtual table It is possible to configure logback.xml in such a way that logs would appear in `system_views.system_log` table. This is achieved by appender implemented in class `VirtualTableAppender` which is called `CQLLOG` in the @@ -101,6 +101,72 @@ each message will occupy memory. The appender to virtual table is commented out by default so logging to virtual table is not active. +=== Logging slow queries to Cassandra virual table + +It is possible to log slow queries into `system_views.slow_queries` table. A query is evaluated to be slow +if it takes more than `slow_query_log_timeout` in `cassandra.yaml`. + +To log messages to `system_views.slow_queries` you need to: + +1. uncomment `SLOW_QUERIES_APPENDER` log appender +2. uncomment `appender-ref` pointing to `SLOW_QUERIES_APPENDER` in `slow_queries` logger: + +The respective configuration in `logback.xml` looks like this: + +[source,XML] +---- + + + + + + + + +---- + +By default, slow queries will be logged to `debug.log`. By uncommenting virtual table appender, it will be +logged to `debug.log` as well as to `system_views.slow_queries`. If you want to log it to `system_views.slow_queries` only, you need to comment out `DEBUGLOG` `appender-ref` in `slow_queries` logger declaration. + +If you want to log slow queries to a dedicated log file (which is e.g. rotated), that is also possible +by pointing `slow_queries` logger to a respective file appender of a given reference, similar to `DEBUGLOG` where all logs go by default. + +The structure of a table looks like this: + +[source,cql] +---- +cassandra@cqlsh> DESCRIBE system_views.slow_queries ; + +/* +Warning: Table system_views.slow_queries is a virtual table and cannot be recreated with CQL. +Structure, for reference: +VIRTUAL TABLE system_views.slow_queries ( + keyspace_name text, + table_name text, + timestamp timestamp, + query text, + avg_ms bigint, + cross_node boolean, + max_ms bigint, + min_ms bigint, + times_reported int, + PRIMARY KEY (keyspace_name, table_name, timestamp, query) +) WITH CLUSTERING ORDER BY (table_name ASC, timestamp ASC, query ASC) + AND comment = 'Slow queries'; +---- + +By having slow queries in a virtual table, an operator can check if there are slow queries for some table, see if +some queries violate some time threshold etc. The rows in this table are same data as one would get in `debug.log`, they +are just way more convenient to parse and query. + +`system_views.slow_queries` table is limited on number of rows it can hold, by default 10 000, configurable by `cassandra.virtual.slow_queries.max.rows` system property. If this table is full, the oldest entry is removed and the newest is inserted. This virtual table can be truncated by CQL and deletion on partition key (`keyspace_name` column) is allowed. + +A reader noticed that by placing custom appender implementation of `SLOW_QUERIES_APPENDER` appender on a class path and referencing it in `logback.xml`, it is possible to log slow queries wherever we have an appender for it. + === Contents of default `logback.xml` [source,XML] diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 7459fecb8c34..e2957ce95f4f 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -27,6 +27,7 @@ import accord.utils.Invariants; import org.apache.cassandra.db.virtual.LogMessagesTable; +import org.apache.cassandra.db.virtual.SlowQueriesTable; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.StorageCompatibilityMode; @@ -363,6 +364,8 @@ public enum CassandraRelevantProperties LOG4J2_DISABLE_JMX_LEGACY("log4j2.disable.jmx"), LOG4J_SHUTDOWN_HOOK_ENABLED("log4j.shutdownHookEnabled"), LOGBACK_CONFIGURATION_FILE("logback.configurationFile"), + /** Maximum number of rows in system_views.slow_queries */ + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS("cassandra.virtual.slow_queries.max.rows", convertToString(SlowQueriesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS)), /** Maximum number of rows in system_views.logs table */ LOGS_VIRTUAL_TABLE_MAX_ROWS("cassandra.virtual.logs.max.rows", convertToString(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS)), /** diff --git a/src/java/org/apache/cassandra/db/AbstractReadQuery.java b/src/java/org/apache/cassandra/db/AbstractReadQuery.java index 448069cfca10..2e72c7ec4fc5 100644 --- a/src/java/org/apache/cassandra/db/AbstractReadQuery.java +++ b/src/java/org/apache/cassandra/db/AbstractReadQuery.java @@ -118,4 +118,16 @@ public String toCQLString() } protected abstract void appendCQLWhereClause(StringBuilder sb); + + @Override + public String monitoredOnKeyspace() + { + return metadata().keyspace; + } + + @Override + public String monitoredOnTable() + { + return metadata().name; + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/monitoring/Monitorable.java b/src/java/org/apache/cassandra/db/monitoring/Monitorable.java index 10bd10438aa5..4288a667de3f 100644 --- a/src/java/org/apache/cassandra/db/monitoring/Monitorable.java +++ b/src/java/org/apache/cassandra/db/monitoring/Monitorable.java @@ -20,6 +20,8 @@ public interface Monitorable { + Monitorable NO_OP = new NoOp(); + String name(); long creationTimeNanos(); long timeoutNanos(); @@ -33,4 +35,76 @@ public interface Monitorable boolean abort(); boolean complete(); + + default String monitoredOnKeyspace() { return null; }; + default String monitoredOnTable() { return null; }; + + class NoOp implements Monitorable + { + @Override + public String name() + { + return null; + } + + @Override + public long creationTimeNanos() + { + return 0; + } + + @Override + public long timeoutNanos() + { + return 0; + } + + @Override + public long slowTimeoutNanos() + { + return 0; + } + + @Override + public boolean isInProgress() + { + return false; + } + + @Override + public boolean isAborted() + { + return false; + } + + @Override + public boolean isCompleted() + { + return false; + } + + @Override + public boolean isSlow() + { + return false; + } + + @Override + public boolean isCrossNode() + { + return false; + } + + @Override + public boolean abort() + { + return false; + } + + @Override + public boolean complete() + { + return false; + } + } } diff --git a/src/java/org/apache/cassandra/db/monitoring/MonitoringTask.java b/src/java/org/apache/cassandra/db/monitoring/MonitoringTask.java index 243569910b8a..4d6d995c77bf 100644 --- a/src/java/org/apache/cassandra/db/monitoring/MonitoringTask.java +++ b/src/java/org/apache/cassandra/db/monitoring/MonitoringTask.java @@ -19,6 +19,7 @@ package org.apache.cassandra.db.monitoring; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -32,9 +33,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.core.type.TypeReference; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.utils.Clock; +import org.apache.cassandra.utils.JsonUtils; import org.apache.cassandra.utils.NoSpamLogger; +import org.apache.cassandra.utils.logging.LoggingSupport; +import org.apache.cassandra.utils.logging.LoggingSupportFactory; +import org.apache.cassandra.utils.logging.SlowQueriesAppender; import static java.util.concurrent.TimeUnit.NANOSECONDS; import static org.apache.cassandra.config.CassandraRelevantProperties.MONITORING_MAX_OPERATIONS; @@ -47,8 +58,9 @@ * We also log timed out operations, see CASSANDRA-7392. * Since CASSANDRA-12403 we also log queries that were slow. */ -class MonitoringTask +public class MonitoringTask { + private static final String SLOW_OPERATIONS_LOGGER_NAME = "slow_queries"; private static final String LINE_SEPARATOR = CassandraRelevantProperties.LINE_SEPARATOR.getString(); private static final Logger logger = LoggerFactory.getLogger(MonitoringTask.class); private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(logger, 5L, TimeUnit.MINUTES); @@ -70,6 +82,8 @@ class MonitoringTask private final ScheduledFuture reportingTask; private final OperationsQueue failedOperationsQueue; private final OperationsQueue slowOperationsQueue; + private Logger slowOperationsLogger = logger; + private boolean slowOperationsLoggedToVirtualTable; private long approxLastLogTimeNanos; @@ -97,6 +111,15 @@ private MonitoringTask(int reportIntervalMillis, int maxOperations) reportIntervalMillis, reportIntervalMillis, TimeUnit.MILLISECONDS); + + LoggingSupport support = LoggingSupportFactory.getLoggingSupport(); + if (support.getLogger(SLOW_OPERATIONS_LOGGER_NAME).isPresent()) + { + if (support.getAppender(SlowQueriesAppender.class, SlowQueriesAppender.APPENDER_NAME).isPresent()) + slowOperationsLoggedToVirtualTable = true; + + slowOperationsLogger = LoggerFactory.getLogger(SLOW_OPERATIONS_LOGGER_NAME); + } } public void cancel() @@ -169,14 +192,30 @@ boolean logSlowOperations(long approxCurrentTimeNanos) if (!slowOperations.isEmpty()) { long approxElapsedNanos = approxCurrentTimeNanos - approxLastLogTimeNanos; - noSpamLogger.info("Some operations were slow, details available at debug level (debug.log)"); + noSpamLogger.info("Some operations were slow, details available at debug level (debug.log) or " + + "system_views.slow_queries virtual table (when enabled)."); + + if (slowOperationsLogger.isDebugEnabled()) + { + if (slowOperationsLoggedToVirtualTable) + { + // This is the crux of the patch for appending to vtable. + // Because we can send only Strings to debug method (or objects, on which toString() + // would be eventually called), we need to log a string in such a way that we can + // get Operation object(s) back "on the other side" when dealing with vtables and custom appenders + // as appenders work with LoggingEvent where message is just a string. + // It would be very hard / tricky / error-prone to parse customly crafted log message + // which appears in logs when no vtable appender is used. + slowOperationsLogger.debug(Operation.serialize(slowOperations.getOperations())); + } + else + slowOperationsLogger.debug("{} operations were slow in the last {} msecs:{}{}", + slowOperations.num(), + NANOSECONDS.toMillis(approxElapsedNanos), + LINE_SEPARATOR, + slowOperations.getLogMessage()); + } - if (logger.isDebugEnabled()) - logger.debug("{} operations were slow in the last {} msecs:{}{}", - slowOperations.num(), - NANOSECONDS.toMillis(approxElapsedNanos), - LINE_SEPARATOR, - slowOperations.getLogMessage()); return true; } return false; @@ -274,6 +313,12 @@ public long num() return operations.size() + numDropped; } + private Collection getOperations() + { + return operations.values(); + } + + @JsonIgnore String getLogMessage() { if (isEmpty()) @@ -307,9 +352,16 @@ private static void addOperation(StringBuilder ret, Operation operation) * same name (CQL query text) is reported and store the average, min and max * times. */ - protected abstract static class Operation + @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "id") + @JsonSubTypes({ @JsonSubTypes.Type(value = SlowOperation.class) }) + @VisibleForTesting + public abstract static class Operation { + @JsonProperty + String id = getClass().getName(); + /** The operation that was reported as slow or timed out */ + @JsonIgnore final Monitorable operation; /** The number of times the operation was reported */ @@ -319,24 +371,50 @@ protected abstract static class Operation long totalTimeNanos; /** The maximum time spent by this operation */ - long maxTime; + long maxTimeNanos; /** The minimum time spent by this operation */ - long minTime; + long minTimeNanos; /** The name of the operation, i.e. the SELECT query CQL, * this is set lazily as it takes time to build the query CQL */ private String name; + /** + * creation time of this Operation object, in ms, + * this is different from operation's creationTimeNanos + * which does not follow wall clock and is useless for + * reporting purposes e.g. in virtual tables + */ + private final long timestampMs; + + // optional keyspace and table this operation acts on + // used upon deserialization + private String keyspace; + private String table; + private boolean crossNode; + Operation(Monitorable operation, long failedAtNanos) { this.operation = operation; numTimesReported = 1; totalTimeNanos = failedAtNanos - operation.creationTimeNanos(); - minTime = totalTimeNanos; - maxTime = totalTimeNanos; + minTimeNanos = totalTimeNanos; + maxTimeNanos = totalTimeNanos; + timestampMs = Clock.Global.currentTimeMillis() - (Clock.Global.nanoTime() - operation.creationTimeNanos()) / 1_000_000; + } + + void add(Operation operation) + { + numTimesReported++; + totalTimeNanos += operation.totalTimeNanos; + maxTimeNanos = Math.max(maxTimeNanos, operation.maxTimeNanos); + minTimeNanos = Math.min(minTimeNanos, operation.minTimeNanos); } + public abstract String getLogMessage(); + + @JsonProperty public String name() { if (name == null) @@ -344,15 +422,96 @@ public String name() return name; } - void add(Operation operation) + @JsonProperty + public String keyspace() { - numTimesReported++; - totalTimeNanos += operation.totalTimeNanos; - maxTime = Math.max(maxTime, operation.maxTime); - minTime = Math.min(minTime, operation.minTime); + if (operation != null) + { + String monitored = operation.monitoredOnKeyspace(); + if (monitored != null) + return monitored; + } + return keyspace; } - public abstract String getLogMessage(); + public void setKeyspace(String keyspace) + { + this.keyspace = keyspace; + } + + public void setTable(String table) + { + this.table = table; + } + + @JsonProperty + public String table() + { + if (operation != null) + { + String monitored = operation.monitoredOnTable(); + if (monitored != null) + return monitored; + } + return table; + } + + @JsonProperty + public boolean isCrossNode() + { + if (operation != null) + return operation.isCrossNode(); + + return crossNode; + } + + @JsonProperty + public int numTimesReported() + { + return numTimesReported; + } + + @JsonProperty + public long totalTimeNanos() + { + return totalTimeNanos; + } + + @JsonProperty + public long maxTimeNanos() + { + return maxTimeNanos; + } + + @JsonProperty + public long minTimeNanos() + { + return minTimeNanos; + } + + @JsonIgnore + public long averageTime() + { + return totalTimeNanos / numTimesReported; + } + + @JsonProperty + public long timestampMs() + { + return timestampMs; + } + + public static String serialize(Collection operations) + { + return JsonUtils.writeAsJsonString(operations); + } + + private static final TypeReference> TYPE_REFERENCE = new TypeReference<>() {}; + + public static List deserialize(String message) throws Throwable + { + return JsonUtils.JSON_OBJECT_MAPPER.readValue(message, TYPE_REFERENCE); + } } /** @@ -378,8 +537,8 @@ public String getLogMessage() name(), numTimesReported, NANOSECONDS.toMillis(totalTimeNanos / numTimesReported), - NANOSECONDS.toMillis(minTime), - NANOSECONDS.toMillis(maxTime), + NANOSECONDS.toMillis(minTimeNanos), + NANOSECONDS.toMillis(maxTimeNanos), NANOSECONDS.toMillis(operation.timeoutNanos()), operation.isCrossNode() ? "msec/cross-node" : "msec"); } @@ -388,13 +547,21 @@ public String getLogMessage() /** * An operation (query) that was reported as slow. */ - private final static class SlowOperation extends Operation + @VisibleForTesting + public final static class SlowOperation extends Operation { - SlowOperation(Monitorable operation, long failedAt) + // purely for deserialization purposes + public SlowOperation() + { + this(Monitorable.NO_OP, 0); + } + + public SlowOperation(Monitorable operation, long failedAt) { super(operation, failedAt); } + @JsonIgnore public String getLogMessage() { if (numTimesReported == 1) @@ -408,8 +575,8 @@ public String getLogMessage() name(), numTimesReported, NANOSECONDS.toMillis(totalTimeNanos/ numTimesReported), - NANOSECONDS.toMillis(minTime), - NANOSECONDS.toMillis(maxTime), + NANOSECONDS.toMillis(minTimeNanos), + NANOSECONDS.toMillis(maxTimeNanos), NANOSECONDS.toMillis(operation.slowTimeoutNanos()), operation.isCrossNode() ? "msec/cross-node" : "msec"); } diff --git a/src/java/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTable.java b/src/java/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTable.java new file mode 100644 index 000000000000..008d5d432a2b --- /dev/null +++ b/src/java/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTable.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import ch.qos.logback.classic.spi.LoggingEvent; +import org.apache.cassandra.schema.TableMetadata; + +/** + * This table is inherently limited on number of rows it can hold. + * + * @param type parameter saying what object is stored in internal bounded list for query purposes + */ +public abstract class AbstractLoggerVirtualTable extends AbstractMutableVirtualTable +{ + private static final Logger logger = LoggerFactory.getLogger(AbstractLoggerVirtualTable.class); + + // please be sure operations on this structure are thread-safe + protected final List buffer; + + @VisibleForTesting + protected static int resolveBufferSize(int wantedSize, int max, int defaultSize) + { + return (wantedSize < 1 || wantedSize > max) ? defaultSize : wantedSize; + } + + protected AbstractLoggerVirtualTable(TableMetadata metadata, int maxSize) + { + super(metadata); + this.buffer = BoundedLinkedList.create(maxSize); + logger.debug("capacity of virtual table {} is set to be at most {} rows", metadata().toString(), maxSize); + } + + public void add(LoggingEvent event) + { + List messages = getMessages(event); + if (messages != null) + { + // specifically calling buffer.add to reach BoundedLinkedList's add + // instead of linked list's addAll + for (U message : messages) + buffer.add(message); + } + } + + public abstract List getMessages(LoggingEvent event); + + @Override + public void truncate() + { + synchronized (buffer) + { + buffer.clear(); + } + } + + @Override + public boolean allowFilteringImplicitly() + { + return false; + } + + private static final class BoundedLinkedList extends LinkedList + { + private final int maxSize; + + public static List create(int size) + { + return Collections.synchronizedList(new BoundedLinkedList<>(size)); + } + + private BoundedLinkedList(int maxSize) + { + this.maxSize = maxSize; + } + + @Override + public synchronized boolean add(T t) + { + if (size() == maxSize) + removeLast(); + + addFirst(t); + + return true; + } + } +} diff --git a/src/java/org/apache/cassandra/db/virtual/LogMessagesTable.java b/src/java/org/apache/cassandra/db/virtual/LogMessagesTable.java index 5903ac2ab5f3..87978e3fd966 100644 --- a/src/java/org/apache/cassandra/db/virtual/LogMessagesTable.java +++ b/src/java/org/apache/cassandra/db/virtual/LogMessagesTable.java @@ -18,15 +18,11 @@ package org.apache.cassandra.db.virtual; -import java.util.Collections; import java.util.Date; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import com.google.common.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import ch.qos.logback.classic.spi.LoggingEvent; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -50,11 +46,8 @@ * @see CASSANDRA-18238 * @see org.apache.cassandra.utils.logging.VirtualTableAppender */ -public final class LogMessagesTable extends AbstractMutableVirtualTable +public final class LogMessagesTable extends AbstractLoggerVirtualTable { - private static final Logger logger = LoggerFactory.getLogger(LogMessagesTable.class); - - public static final int LOGS_VIRTUAL_TABLE_MIN_ROWS = 1000; public static final int LOGS_VIRTUAL_TABLE_DEFAULT_ROWS = 50_000; public static final int LOGS_VIRTUAL_TABLE_MAX_ROWS = 100_000; @@ -67,11 +60,11 @@ public final class LogMessagesTable extends AbstractMutableVirtualTable public static final String LEVEL_COLUMN_NAME = "level"; public static final String MESSAGE_COLUMN_NAME = "message"; - private final List buffer; - LogMessagesTable(String keyspace) { - this(keyspace, resolveBufferSize()); + this(keyspace, resolveBufferSize(CassandraRelevantProperties.LOGS_VIRTUAL_TABLE_MAX_ROWS.getInt(), + LOGS_VIRTUAL_TABLE_MAX_ROWS, + LOGS_VIRTUAL_TABLE_DEFAULT_ROWS)); } @VisibleForTesting @@ -85,10 +78,14 @@ public final class LogMessagesTable extends AbstractMutableVirtualTable .addClusteringColumn(ORDER_IN_MILLISECOND_COLUMN_NAME, Int32Type.instance) .addRegularColumn(LOGGER_COLUMN_NAME, UTF8Type.instance) .addRegularColumn(LEVEL_COLUMN_NAME, UTF8Type.instance) - .addRegularColumn(MESSAGE_COLUMN_NAME, UTF8Type.instance).build()); + .addRegularColumn(MESSAGE_COLUMN_NAME, UTF8Type.instance).build(), + size); + } - logger.debug("capacity of virtual table {} is set to be at most {} rows", metadata().toString(), size); - buffer = BoundedLinkedList.create(size); + @Override + public List getMessages(LoggingEvent event) + { + return List.of(event); } @Override @@ -103,12 +100,12 @@ public DataSet data() int index = 0; - Iterator iterator = buffer.listIterator(); + Iterator iterator = buffer.listIterator(); while (iterator.hasNext()) { - LogMessage log = iterator.next(); + LoggingEvent log = iterator.next(); - milliSecondsOfCurrentLog = log.timestamp; + milliSecondsOfCurrentLog = log.getTimeStamp(); if (milliSecondsOfPreviousLog == milliSecondsOfCurrentLog) ++index; else @@ -116,86 +113,13 @@ public DataSet data() milliSecondsOfPreviousLog = milliSecondsOfCurrentLog; - result.row(new Date(log.timestamp), index) - .column(LOGGER_COLUMN_NAME, log.logger) - .column(LEVEL_COLUMN_NAME, log.level) - .column(MESSAGE_COLUMN_NAME, log.message); + result.row(new Date(milliSecondsOfCurrentLog), index) + .column(LOGGER_COLUMN_NAME, log.getLoggerName()) + .column(LEVEL_COLUMN_NAME, log.getLevel().toString()) + .column(MESSAGE_COLUMN_NAME, log.getFormattedMessage()); } } return result; } - - public void add(LoggingEvent event) - { - buffer.add(new LogMessage(event)); - } - - @Override - public void truncate() - { - buffer.clear(); - } - - @Override - public boolean allowFilteringImplicitly() - { - return false; - } - - @VisibleForTesting - static int resolveBufferSize() - { - int size = CassandraRelevantProperties.LOGS_VIRTUAL_TABLE_MAX_ROWS.getInt(); - return (size < LOGS_VIRTUAL_TABLE_MIN_ROWS || size > LOGS_VIRTUAL_TABLE_MAX_ROWS) - ? LOGS_VIRTUAL_TABLE_DEFAULT_ROWS : size; - } - - @VisibleForTesting - public static class LogMessage - { - public final long timestamp; - public final String logger; - public final String level; - public final String message; - - public LogMessage(LoggingEvent event) - { - this(event.getTimeStamp(), event.getLoggerName(), event.getLevel().toString(), event.getFormattedMessage()); - } - - public LogMessage(long timestamp, String logger, String level, String message) - { - this.timestamp = timestamp; - this.logger = logger; - this.level = level; - this.message = message; - } - } - - private static final class BoundedLinkedList extends LinkedList - { - private final int maxSize; - - public static List create(int size) - { - return Collections.synchronizedList(new BoundedLinkedList<>(size)); - } - - private BoundedLinkedList(int maxSize) - { - this.maxSize = maxSize; - } - - @Override - public boolean add(T t) - { - if (size() == maxSize) - removeLast(); - - addFirst(t); - - return true; - } - } } diff --git a/src/java/org/apache/cassandra/db/virtual/SlowQueriesTable.java b/src/java/org/apache/cassandra/db/virtual/SlowQueriesTable.java new file mode 100644 index 000000000000..0d392d0ce27e --- /dev/null +++ b/src/java/org/apache/cassandra/db/virtual/SlowQueriesTable.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import ch.qos.logback.classic.spi.LoggingEvent; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.BooleanType; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.db.marshal.TimestampType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.monitoring.MonitoringTask.Operation; +import org.apache.cassandra.dht.LocalPartitioner; +import org.apache.cassandra.schema.TableMetadata; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +public class SlowQueriesTable extends AbstractLoggerVirtualTable +{ + private static final Logger logger = LoggerFactory.getLogger(SlowQueriesTable.class); + + public static final int LOGS_VIRTUAL_TABLE_DEFAULT_ROWS = 10_000; + public static final int LOGS_VIRTUAL_TABLE_MAX_ROWS = 100_000; + + public static final String TABLE_NAME = "slow_queries"; + private static final String TABLE_COMMENT = "Slow queries"; + + public static final String KEYSPACE_COLUMN_NAME = "keyspace_name"; + public static final String TABLE_COLUMN_NAME = "table_name"; + public static final String TIMESTAMP_COLUMN_NAME = "timestamp"; + public static final String QUERY_COLUMN_NAME = "query"; + public static final String MINIMUM_TIME_COLUMN_NAME = "min_ms"; + public static final String MAXIMUM_TIME_COLUMN_NAME = "max_ms"; + public static final String AVERAGE_TIME_COLUMN_NAME = "avg_ms"; + public static final String TIMES_REPORTED_COLUMN_NAME = "times_reported"; + public static final String CROSS_NODE_COLUMN_NAME = "cross_node"; + + SlowQueriesTable(String keyspace) + { + this(keyspace, resolveBufferSize(CassandraRelevantProperties.LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.getInt(), + LOGS_VIRTUAL_TABLE_MAX_ROWS, + LOGS_VIRTUAL_TABLE_DEFAULT_ROWS)); + } + + @VisibleForTesting + SlowQueriesTable(String keyspace, int size) + { + super(TableMetadata.builder(keyspace, TABLE_NAME) + .comment(TABLE_COMMENT) + .kind(TableMetadata.Kind.VIRTUAL) + .partitioner(new LocalPartitioner(UTF8Type.instance)) + .addPartitionKeyColumn(KEYSPACE_COLUMN_NAME, UTF8Type.instance) + .addClusteringColumn(TABLE_COLUMN_NAME, UTF8Type.instance) + .addClusteringColumn(TIMESTAMP_COLUMN_NAME, TimestampType.instance) + // We are adding query as a clustering column for uniqueness, + // In theory, it might happen that two monitoring operations + // would be emitted for same keyspace, same table at the exact same time + // (in milliseconds). That means that one operation would "shadow" + // another one because primary key would be same for both. + // To make it truly unique, we include query among clustering keys + // as well. If queries were same, then they would be also reported so + // (it would be reflected in "times_reported" column) + .addClusteringColumn(QUERY_COLUMN_NAME, UTF8Type.instance) + .addRegularColumn(MINIMUM_TIME_COLUMN_NAME, LongType.instance) + .addRegularColumn(MAXIMUM_TIME_COLUMN_NAME, LongType.instance) + .addRegularColumn(AVERAGE_TIME_COLUMN_NAME, LongType.instance) + .addRegularColumn(TIMES_REPORTED_COLUMN_NAME, Int32Type.instance) + .addRegularColumn(CROSS_NODE_COLUMN_NAME, BooleanType.instance) + .build(), + size); + } + + @Override + protected void applyPartitionDeletion(ColumnValues partitionKey) + { + String keyspace = partitionKey.value(0); + + synchronized (buffer) + { + buffer.removeIf(o -> o.keyspace().equals(keyspace)); + } + } + + @Override + public DataSet data() + { + SimpleDataSet result = new SimpleDataSet(metadata(), DecoratedKey.comparator.reversed()); + + synchronized (buffer) + { + Iterator iterator = buffer.listIterator(); + while (iterator.hasNext()) + { + Operation operation = iterator.next(); + + result.row(operation.keyspace(), operation.table(), new Date(operation.timestampMs()), operation.name()) + .column(MINIMUM_TIME_COLUMN_NAME, NANOSECONDS.toMillis(operation.minTimeNanos())) + .column(MAXIMUM_TIME_COLUMN_NAME, NANOSECONDS.toMillis(operation.maxTimeNanos())) + .column(AVERAGE_TIME_COLUMN_NAME, NANOSECONDS.toMillis(operation.averageTime())) + .column(TIMES_REPORTED_COLUMN_NAME, operation.numTimesReported()) + .column(CROSS_NODE_COLUMN_NAME, operation.isCrossNode()); + } + } + + return result; + } + + @Override + public List getMessages(LoggingEvent event) + { + try + { + List qualified = new ArrayList<>(); + for (Operation operation : Operation.deserialize(event.getMessage())) + { + + // in (improbable) case there is an operation which does not have + // keyspace / table on it, we just skip this from processing + // as we would have nothing to show for partition key and clustering column + if (operation.keyspace() == null || operation.table() == null) + continue; + + // if cf of an operation is present, take keyspace and table name from it + // instead of having new string instances per operation which might + // take relatively a lot of additional space unnecessarily + Keyspace keyspace = Keyspace.openIfExists(operation.keyspace()); + String keyspaceName; + String tableName; + if (keyspace != null) + { + keyspaceName = keyspace.getName(); + try + { + ColumnFamilyStore table = keyspace.getColumnFamilyStore(operation.table()); + tableName = table.getTableName(); + } + catch (IllegalArgumentException ex) + { + tableName = operation.table(); + } + } + else + { + keyspaceName = operation.keyspace(); + tableName = operation.table(); + } + + operation.setKeyspace(keyspaceName); + operation.setTable(tableName); + qualified.add(operation); + } + + return qualified; + } + catch (Throwable t) + { + logger.trace("Unable to generate list of slow queries", t); + return null; + } + } + + @Override + public boolean allowFilteringImplicitly() + { + return true; + } +} diff --git a/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java b/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java index 3ca8f728a8b3..28c6dc8fef40 100644 --- a/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java +++ b/src/java/org/apache/cassandra/db/virtual/SystemViewsKeyspace.java @@ -61,6 +61,7 @@ private SystemViewsKeyspace() .add(new GossipInfoTable(VIRTUAL_VIEWS)) .add(new QueriesTable(VIRTUAL_VIEWS)) .add(new LogMessagesTable(VIRTUAL_VIEWS)) + .add(new SlowQueriesTable(VIRTUAL_VIEWS)) .add(new SnapshotsTable(VIRTUAL_VIEWS)) .add(new PeersTable(VIRTUAL_VIEWS)) .add(new LocalTable(VIRTUAL_VIEWS)) diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index d3c787d2e2db..171ec47e1e68 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -59,6 +59,8 @@ import org.apache.cassandra.db.SystemKeyspaceMigrator41; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.db.virtual.AccordDebugKeyspace; +import org.apache.cassandra.db.virtual.LogMessagesTable; +import org.apache.cassandra.db.virtual.SlowQueriesTable; import org.apache.cassandra.db.virtual.SystemViewsKeyspace; import org.apache.cassandra.db.virtual.VirtualKeyspace; import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry; @@ -94,6 +96,7 @@ import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.FutureCombiner; import org.apache.cassandra.utils.logging.LoggingSupportFactory; +import org.apache.cassandra.utils.logging.SlowQueriesAppender; import org.apache.cassandra.utils.logging.VirtualTableAppender; import static java.util.concurrent.TimeUnit.NANOSECONDS; @@ -436,7 +439,6 @@ public void runStartupChecks() { exitOrFail(e.returnCode, e.getMessage(), e.getCause()); } - } /** @@ -555,11 +557,17 @@ public void setupVirtualKeyspaces() if (DatabaseDescriptor.getAccord().enable_virtual_debug_only_keyspace) VirtualKeyspaceRegistry.instance.register(AccordDebugKeyspace.instance); - // flush log messages to system_views.system_logs virtual table as there were messages already logged - // before that virtual table was instantiated + // Flush log messages to system_views.system_logs virtual table as there were messages already logged + // before that virtual table was instantiated. + // In general, there is no need to do same treatment for slow queries as by the time queries are processed + // the logging framework if fully setup already but for the sake of it and to be sure, just do it as well. LoggingSupportFactory.getLoggingSupport() .getAppender(VirtualTableAppender.class, VirtualTableAppender.APPENDER_NAME) - .ifPresent(appender -> ((VirtualTableAppender) appender).flushBuffer()); + .ifPresent(appender -> appender.flushBuffer(LogMessagesTable.class, LogMessagesTable.TABLE_NAME)); + + LoggingSupportFactory.getLoggingSupport() + .getAppender(SlowQueriesAppender.class, SlowQueriesAppender.APPENDER_NAME) + .ifPresent(appender -> appender.flushBuffer(SlowQueriesTable.class, SlowQueriesTable.TABLE_NAME)); } public synchronized void initializeClientTransports() diff --git a/src/java/org/apache/cassandra/utils/logging/AbstractVirtualTableAppender.java b/src/java/org/apache/cassandra/utils/logging/AbstractVirtualTableAppender.java new file mode 100644 index 000000000000..7becbc13fcd5 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/logging/AbstractVirtualTableAppender.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.logging; + +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; + +import ch.qos.logback.classic.spi.LoggingEvent; +import ch.qos.logback.core.AppenderBase; +import org.apache.cassandra.db.virtual.AbstractLoggerVirtualTable; +import org.apache.cassandra.db.virtual.SlowQueriesTable; +import org.apache.cassandra.db.virtual.VirtualKeyspace; +import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry; +import org.apache.cassandra.db.virtual.VirtualTable; + +import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_VIEWS; + +public abstract class AbstractVirtualTableAppender extends AppenderBase +{ + private final int defaultRows; + + protected AbstractVirtualTableAppender(int defaultRows) + { + this.defaultRows = defaultRows; + } + + // for holding messages until virtual registry contains logs virtual table + // as it takes some time during startup of a node to initialise virtual tables but messages are + // logged already + protected final List messageBuffer = new LinkedList<>(); + + protected T getVirtualTable(Class vtableClass, String tableName) + { + VirtualKeyspace keyspace = VirtualKeyspaceRegistry.instance.getKeyspaceNullable(VIRTUAL_VIEWS); + + if (keyspace == null) + return null; + + Optional virtualTable = keyspace.tables() + .stream() + .filter(vt -> vt.name().equals(tableName)) + .findFirst(); + + if (virtualTable.isEmpty()) + return null; + + VirtualTable vt = virtualTable.get(); + + if (!vt.getClass().equals(vtableClass)) + throw new IllegalStateException(String.format("Virtual table %s.%s is not backed by an instance of %s but by %s", + VIRTUAL_VIEWS, + tableName, + vtableClass.getName(), + vt.getClass().getName())); + + return (T) vt; + } + + /** + * This method adds an event to virtual table, when present. + * When vtable is null, we will attempt to find it among registered ones. Then not found, we add it to internal + * buffer for later processing. This might happen e.g. for logging tables when log events + * were appended via logging framework sooner than registration of virtual tables was done so after they are registered, + * they would miss logging events happened before being so. + * + * @param vtable vtable to append to + * @param event event to append to + * @param tableName table name of virtual table to append to + * @return vtable or when null, found vtable + */ + protected AbstractLoggerVirtualTable appendToVirtualTable(AbstractLoggerVirtualTable vtable, LoggingEvent event, String tableName) + { + AbstractLoggerVirtualTable foundVtable; + if (vtable == null) + { + foundVtable = getVirtualTable(SlowQueriesTable.class, tableName); + if (foundVtable == null) + addToBuffer(event); + else + foundVtable.add(event); + } + else + { + foundVtable = vtable; + vtable.add(event); + } + + return foundVtable; + } + + @Override + public void stop() + { + synchronized (messageBuffer) + { + messageBuffer.clear(); + super.stop(); + } + } + + /** + * Flushes all log entries which were appended before virtual table was registered. + * + * @see org.apache.cassandra.service.CassandraDaemon#setupVirtualKeyspaces + */ + public void flushBuffer(Class> vtableClass, String tableName) + { + synchronized (messageBuffer) + { + Optional.ofNullable(getVirtualTable(vtableClass, tableName)).ifPresent(vtable -> { + messageBuffer.forEach(vtable::add); + messageBuffer.clear(); + }); + } + } + + protected void addToBuffer(LoggingEvent eventObject) + { + synchronized (messageBuffer) + { + // we restrict how many logging events we can put into buffer, + // so we are not growing without any bound when things go south + if (messageBuffer.size() < defaultRows) + messageBuffer.add(eventObject); + } + } +} diff --git a/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java b/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java index e710d44dd1dc..f32963b73a59 100644 --- a/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java +++ b/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java @@ -60,7 +60,8 @@ public class LogbackLoggingSupport implements LoggingSupport @Override public void onStartup() { - checkOnlyOneVirtualTableAppender(); + checkOnlyOneVirtualTableAppender(VirtualTableAppender.class); + checkOnlyOneVirtualTableAppender(SlowQueriesAppender.class); // The default logback configuration in conf/logback.xml allows reloading the // configuration when the configuration file has changed (every 60 seconds by default). @@ -138,7 +139,20 @@ public Map getLoggingLevels() } @Override - public Optional> getAppender(Class appenderClass, String name) + public Optional getLogger(String loggerName) + { + LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); + for (Logger logBackLogger : lc.getLoggerList()) + { + if (logBackLogger.getName().equals(loggerName)) + return Optional.of(logBackLogger); + } + + return Optional.empty(); + } + + @Override + public > Optional getAppender(Class appenderClass, String appenderName) { LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); for (Logger logBackLogger : lc.getLoggerList()) @@ -146,15 +160,15 @@ public Optional> getAppender(Class appenderClass, String name) for (Iterator> iterator = logBackLogger.iteratorForAppenders(); iterator.hasNext();) { Appender appender = iterator.next(); - if (appender.getClass() == appenderClass && appender.getName().equals(name)) - return Optional.of(appender); + if (appender.getClass() == appenderClass && appender.getName().equals(appenderName)) + return Optional.of(appenderClass.cast(appender)); } } return Optional.empty(); } - private void checkOnlyOneVirtualTableAppender() + private void checkOnlyOneVirtualTableAppender(Class appenderClass) { int count = 0; LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); @@ -164,7 +178,7 @@ private void checkOnlyOneVirtualTableAppender() for (Iterator> iterator = logBackLogger.iteratorForAppenders(); iterator.hasNext();) { Appender appender = iterator.next(); - if (appender instanceof VirtualTableAppender) + if (appenderClass.isAssignableFrom(appender.getClass())) { virtualAppenderNames.add(appender.getName()); count += 1; @@ -174,7 +188,7 @@ private void checkOnlyOneVirtualTableAppender() if (count > 1) throw new IllegalStateException(String.format("There are multiple appenders of class %s of names %s. There is only one appender of such class allowed.", - VirtualTableAppender.class.getName(), String.join(",", virtualAppenderNames))); + appenderClass.getName(), String.join(",", virtualAppenderNames))); } private boolean hasAppenders(Logger logBackLogger) diff --git a/src/java/org/apache/cassandra/utils/logging/LoggingSupport.java b/src/java/org/apache/cassandra/utils/logging/LoggingSupport.java index 35e11975f922..00b40cb966de 100644 --- a/src/java/org/apache/cassandra/utils/logging/LoggingSupport.java +++ b/src/java/org/apache/cassandra/utils/logging/LoggingSupport.java @@ -21,6 +21,7 @@ import java.util.Map; import java.util.Optional; +import ch.qos.logback.classic.Logger; import ch.qos.logback.core.Appender; /** @@ -53,7 +54,12 @@ default void onShutdown() {} */ Map getLoggingLevels(); - default Optional> getAppender(Class appenderClass, String appenderName) + default > Optional getAppender(Class appenderClass, String appenderName) + { + return Optional.empty(); + } + + default Optional getLogger(String loggerName) { return Optional.empty(); } diff --git a/src/java/org/apache/cassandra/utils/logging/SlowQueriesAppender.java b/src/java/org/apache/cassandra/utils/logging/SlowQueriesAppender.java new file mode 100644 index 000000000000..4af2e383077b --- /dev/null +++ b/src/java/org/apache/cassandra/utils/logging/SlowQueriesAppender.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.logging; + +import ch.qos.logback.classic.spi.LoggingEvent; +import org.apache.cassandra.db.virtual.AbstractLoggerVirtualTable; +import org.apache.cassandra.db.virtual.SlowQueriesTable; + +public final class SlowQueriesAppender extends AbstractVirtualTableAppender +{ + public static final String APPENDER_NAME = "SLOW_QUERIES_APPENDER"; + + private AbstractLoggerVirtualTable slowQueries; + + public SlowQueriesAppender() + { + super(SlowQueriesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS); + } + + @Override + protected void append(LoggingEvent eventObject) + { + // slowQueries will be null as long as virtual tables + // are not registered, and we already try to put queries there. + // As soon as vtable is registered (as part of node's startup / initialisation), + // slow queries will never be null again + slowQueries = appendToVirtualTable(slowQueries, eventObject, SlowQueriesTable.TABLE_NAME); + } +} diff --git a/src/java/org/apache/cassandra/utils/logging/VirtualTableAppender.java b/src/java/org/apache/cassandra/utils/logging/VirtualTableAppender.java index 2820b2936f4a..03a142004afd 100644 --- a/src/java/org/apache/cassandra/utils/logging/VirtualTableAppender.java +++ b/src/java/org/apache/cassandra/utils/logging/VirtualTableAppender.java @@ -18,111 +18,35 @@ package org.apache.cassandra.utils.logging; -import java.util.LinkedList; -import java.util.List; -import java.util.Optional; import java.util.Set; import com.google.common.collect.ImmutableSet; import ch.qos.logback.classic.spi.LoggingEvent; -import ch.qos.logback.core.AppenderBase; import org.apache.cassandra.audit.FileAuditLogger; +import org.apache.cassandra.db.virtual.AbstractLoggerVirtualTable; import org.apache.cassandra.db.virtual.LogMessagesTable; -import org.apache.cassandra.db.virtual.VirtualKeyspace; -import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry; -import org.apache.cassandra.db.virtual.VirtualTable; - -import static org.apache.cassandra.db.virtual.LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS; -import static org.apache.cassandra.db.virtual.LogMessagesTable.TABLE_NAME; -import static org.apache.cassandra.schema.SchemaConstants.VIRTUAL_VIEWS; /** * Appends Cassandra logs to virtual table system_views.system_logs */ -public final class VirtualTableAppender extends AppenderBase +public final class VirtualTableAppender extends AbstractVirtualTableAppender { public static final String APPENDER_NAME = "CQLLOG"; private static final Set forbiddenLoggers = ImmutableSet.of(FileAuditLogger.class.getName()); - private LogMessagesTable logs; - - // for holding messages until virtual registry contains logs virtual table - // as it takes some time during startup of a node to initialise virtual tables but messages are - // logged already - private final List messageBuffer = new LinkedList<>(); + private AbstractLoggerVirtualTable logs; - @Override - protected void append(LoggingEvent eventObject) + public VirtualTableAppender() { - if (!forbiddenLoggers.contains(eventObject.getLoggerName())) - { - if (logs == null) - { - logs = getVirtualTable(); - if (logs == null) - addToBuffer(eventObject); - else - logs.add(eventObject); - } - else - logs.add(eventObject); - } + super(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS); } @Override - public void stop() - { - messageBuffer.clear(); - super.stop(); - } - - /** - * Flushes all logs which were appended before virtual table was registered. - * - * @see org.apache.cassandra.service.CassandraDaemon#setupVirtualKeyspaces - */ - public void flushBuffer() - { - Optional.ofNullable(getVirtualTable()).ifPresent(vtable -> { - messageBuffer.forEach(vtable::add); - messageBuffer.clear(); - }); - } - - private LogMessagesTable getVirtualTable() - { - VirtualKeyspace keyspace = VirtualKeyspaceRegistry.instance.getKeyspaceNullable(VIRTUAL_VIEWS); - - if (keyspace == null) - return null; - - Optional logsTable = keyspace.tables() - .stream() - .filter(vt -> vt.name().equals(TABLE_NAME)) - .findFirst(); - - if (!logsTable.isPresent()) - return null; - - VirtualTable vt = logsTable.get(); - - if (!(vt instanceof LogMessagesTable)) - throw new IllegalStateException(String.format("Virtual table %s.%s is not backed by an instance of %s but by %s", - VIRTUAL_VIEWS, - TABLE_NAME, - LogMessagesTable.class.getName(), - vt.getClass().getName())); - - return (LogMessagesTable) vt; - } - - private void addToBuffer(LoggingEvent eventObject) + protected void append(LoggingEvent eventObject) { - // we restrict how many logging events we can put into buffer, - // so we are not growing without any bound when things go south - if (messageBuffer.size() < LOGS_VIRTUAL_TABLE_DEFAULT_ROWS) - messageBuffer.add(eventObject); + if (!forbiddenLoggers.contains(eventObject.getLoggerName())) + logs = appendToVirtualTable(logs, eventObject, LogMessagesTable.TABLE_NAME); } } diff --git a/test/conf/logback-dtest_with_slow_query_appender.xml b/test/conf/logback-dtest_with_slow_query_appender.xml new file mode 100644 index 000000000000..1b6ed7511f32 --- /dev/null +++ b/test/conf/logback-dtest_with_slow_query_appender.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log + + %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + + true + + + + + %-5level %date{HH:mm:ss,SSS} %msg%n + + + WARN + + + + + + %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + + + DEBUG + + + + + + + + + + + + + + + diff --git a/test/conf/logback-dtest_with_slow_query_appender_invalid.xml b/test/conf/logback-dtest_with_slow_query_appender_invalid.xml new file mode 100644 index 000000000000..a2252dd23a79 --- /dev/null +++ b/test/conf/logback-dtest_with_slow_query_appender_invalid.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log + + %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + + true + + + + + %-5level %date{HH:mm:ss,SSS} %msg%n + + + WARN + + + + + + %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + + + DEBUG + + + + + + + + + INFO + + + + + + + + + + + + + + + + diff --git a/test/distributed/org/apache/cassandra/distributed/test/AbstractVirtualLogsTableTest.java b/test/distributed/org/apache/cassandra/distributed/test/AbstractVirtualLogsTableTest.java new file mode 100644 index 000000000000..c8bb32fe7280 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/AbstractVirtualLogsTableTest.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import org.junit.Ignore; + +import static java.lang.String.format; + +@Ignore +public abstract class AbstractVirtualLogsTableTest extends TestBaseImpl +{ + public String query(String template) + { + return format(template, getTableName()); + } + + public abstract String getTableName(); +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/SlowQueriesAppenderTest.java b/test/distributed/org/apache/cassandra/distributed/test/SlowQueriesAppenderTest.java new file mode 100644 index 000000000000..d8b6b3f00f40 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/SlowQueriesAppenderTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import org.junit.Test; + +import org.apache.cassandra.db.virtual.SlowQueriesTable; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.Constants; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.shared.WithProperties; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.utils.logging.SlowQueriesAppender; + +import static java.lang.String.format; +import static org.apache.cassandra.config.CassandraRelevantProperties.LOGBACK_CONFIGURATION_FILE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +/** + * It is inherently tricky / flaky to make some queries to be slow so we just test + * the invalid configuration otherwise the table as such is tested in {@link org.apache.cassandra.db.virtual.SlowQueriesTableTest}. + */ +public class SlowQueriesAppenderTest extends AbstractVirtualLogsTableTest +{ + @Test + public void testMultipleAppendersFailToStartNode() throws Throwable + { + LOGBACK_CONFIGURATION_FILE.setString("test/conf/logback-dtest_with_slow_query_appender_invalid.xml"); + + // NOTE: Because cluster startup is expected to fail in this case, and can leave things in a weird state + // for the next state, create without starting, and set failure as shutdown to false, + // so the try-with-resources can close instances properly. + try (WithProperties properties = new WithProperties().set(LOGBACK_CONFIGURATION_FILE, "test/conf/logback-dtest_with_slow_query_appender_invalid.xml"); + Cluster cluster = Cluster.build(1) + .withConfig(c -> c.with(Feature.values()) + .set(Constants.KEY_DTEST_API_STARTUP_FAILURE_AS_SHUTDOWN, false)) + .createWithoutStarting()) + { + cluster.startup(); + fail("Node should not start as there is supposed to be invalid logback configuration file."); + } + catch (IllegalStateException ex) + { + assertEquals(format("There are multiple appenders of class %s " + + "of names SLOW_QUERIES_APPENDER,SLOW_QUERIES_APPENDER_2. There is only one appender of such class allowed.", + SlowQueriesAppender.class.getName()), + ex.getMessage()); + } + } + + @Override + public String getTableName() + { + return format("%s.%s", SchemaConstants.VIRTUAL_VIEWS, SlowQueriesTable.TABLE_NAME); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/SlowQueryDeserTest.java b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryDeserTest.java new file mode 100644 index 000000000000..ae5bcc966cf1 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryDeserTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import java.util.Collection; +import java.util.List; + +import org.junit.Test; + +import org.apache.cassandra.db.monitoring.MonitorableImpl; +import org.apache.cassandra.db.monitoring.MonitoringTask; +import org.apache.cassandra.db.monitoring.MonitoringTask.SlowOperation; +import org.apache.cassandra.utils.Clock; + +public class SlowQueryDeserTest +{ + @Test + public void testSlowQueryDeser() throws Throwable + { + SlowOperation slowOperation = new SlowOperation(new MonitorableImpl() + { + @Override + public String name() + { + return String.format("select * from %s.%s where id = 5", monitoredOnKeyspace(), monitoredOnTable()); + } + + @Override + public String monitoredOnKeyspace() + { + return "ks"; + } + + @Override + public String monitoredOnTable() + { + return "tb"; + } + + @Override + public boolean isCrossNode() + { + return true; + } + }, Clock.Global.currentTimeMillis()); + + String serialize = MonitoringTask.Operation.serialize(List.of(slowOperation)); + Collection deserialize = MonitoringTask.Operation.deserialize(serialize); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/VirtualTableLogsTest.java b/test/distributed/org/apache/cassandra/distributed/test/VirtualTableLogsTest.java index 71ef4dbe7899..bf9f58123e83 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/VirtualTableLogsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/VirtualTableLogsTest.java @@ -25,7 +25,6 @@ import ch.qos.logback.classic.Level; import org.apache.cassandra.db.virtual.LogMessagesTable; -import org.apache.cassandra.db.virtual.LogMessagesTable.LogMessage; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.Constants; import org.apache.cassandra.distributed.api.Feature; @@ -47,8 +46,14 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -public class VirtualTableLogsTest extends TestBaseImpl +public class VirtualTableLogsTest extends AbstractVirtualLogsTableTest { + @Override + public String getTableName() + { + return format("%s.%s", SchemaConstants.VIRTUAL_VIEWS, LogMessagesTable.TABLE_NAME); + } + @Test public void testVTableOutput() throws Throwable { @@ -56,9 +61,9 @@ public void testVTableOutput() throws Throwable Cluster cluster = Cluster.build(1) .withConfig(c -> c.with(Feature.values())) .start(); - ) + ) { - List rows = getRows(cluster); + List rows = getRows(cluster); assertFalse(rows.isEmpty()); rows.forEach(message -> assertTrue(Level.toLevel(message.level).isGreaterOrEqual(Level.INFO))); @@ -91,39 +96,36 @@ public void testMultipleAppendersFailToStartNode() throws Throwable } } - private List getRows(Cluster cluster) + private List getRows(Cluster cluster) { SimpleQueryResult simpleQueryResult = cluster.coordinator(1).executeWithResult(query("select * from %s"), ONE); - List rows = new ArrayList<>(); + List rows = new ArrayList<>(); simpleQueryResult.forEachRemaining(row -> { long timestamp = row.getTimestamp(TIMESTAMP_COLUMN_NAME).getTime(); String logger = row.getString(LOGGER_COLUMN_NAME); String level = row.getString(LEVEL_COLUMN_NAME); String message = row.getString(MESSAGE_COLUMN_NAME); int order = row.getInteger(ORDER_IN_MILLISECOND_COLUMN_NAME); - TestingLogMessage logMessage = new TestingLogMessage(timestamp, logger, level, message, order); + LogMessage logMessage = new LogMessage(timestamp, logger, level, message, order); rows.add(logMessage); }); return rows; } - private String query(String template) - { - return format(template, getTableName()); - } - - private String getTableName() - { - return format("%s.%s", SchemaConstants.VIRTUAL_VIEWS, LogMessagesTable.TABLE_NAME); - } - - private static class TestingLogMessage extends LogMessage + private static class LogMessage { - private int order; + public final long timestamp; + public final String logger; + public final String level; + public final String message; + public final int order; - public TestingLogMessage(long timestamp, String logger, String level, String message, int order) + public LogMessage(long timestamp, String logger, String level, String message, int order) { - super(timestamp, logger, level, message); + this.timestamp = timestamp; + this.logger = logger; + this.level = level; + this.message = message; this.order = order; } } diff --git a/test/unit/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTableTest.java b/test/unit/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTableTest.java new file mode 100644 index 000000000000..40a926fda73c --- /dev/null +++ b/test/unit/org/apache/cassandra/db/virtual/AbstractLoggerVirtualTableTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.time.Instant; +import java.util.Date; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import org.junit.Ignore; +import org.junit.Test; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.spi.LoggingEvent; +import com.datastax.driver.core.Row; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.DataRange; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +@Ignore +public abstract class AbstractLoggerVirtualTableTest extends CQLTester +{ + protected final String keyspace = createKeyspaceName(); + + protected AbstractLoggerVirtualTable table; + + @Test + public void testTruncate() + { + registerTable(); + + int numberOfRows = 100; + List loggingEvents = getLoggingEvents(numberOfRows); + loggingEvents.forEach(table::add); + + execute(query("truncate %s")); + + assertTrue(executeNet(query("select timestamp from %s")).all().isEmpty()); + } + + @Test + public void testEmpty() throws Throwable + { + registerTable(); + assertEmpty(execute(query("select * from %s"))); + } + + @Test + public void testInsert() + { + registerTable(); + + int numberOfRows = 1000; + List loggingEvents = getLoggingEvents(numberOfRows); + loggingEvents.forEach(table::add); + + assertEquals(numberOfRows, execute(query("select * from %s")).size()); + } + + @Test + public void testLimitedCapacity() + { + registerTable(100); + + int numberOfRows = 1000; + List loggingEvents = getLoggingEvents(numberOfRows); + loggingEvents.forEach(table::add); + + // even we inserted 1000 rows, only 100 are present as its capacity is bounded + assertEquals(100, numberOfPartitions()); + + // the first record in the table will be the last one which we inserted + LoggingEvent firstEvent = loggingEvents.get(999); + assertRowsNet(executeNet(query("select timestamp from %s limit 1")), + new Object[]{ new Date(firstEvent.getTimeStamp()) }); + + // the last record in the table will be 900th we inserted + List all = executeNet(query("select timestamp from %s")).all(); + assertEquals(100, all.size()); + Row row = all.get(all.size() - 1); + Date timestamp = row.getTimestamp(0); + assertEquals(loggingEvents.get(900).getTimeStamp(), timestamp.getTime()); + } + + protected abstract void registerTable(int maxSize); + + protected abstract void registerTable(); + + protected void registerVirtualTable(AbstractLoggerVirtualTable table) + { + this.table = table; + VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(table.metadata.keyspace, ImmutableList.of(this.table))); + } + + protected String query(String query) + { + return String.format(query, table.toString()); + } + + protected List getLoggingEvents(int size) + { + return getLoggingEvents(size, Instant.now(), 1); + } + + protected List getLoggingEvents(int size, Instant firstTimestamp, int logsInMillisecond) + { + List logs = new LinkedList<>(); + int partitions = size / logsInMillisecond; + + for (int i = 0; i < partitions; i++) + { + firstTimestamp = firstTimestamp.plusSeconds(i); + + for (int j = 0; j < logsInMillisecond; j++) + logs.add(getLoggingEvent(firstTimestamp.toEpochMilli())); + } + + return logs; + } + + protected int numberOfPartitions() + { + AbstractVirtualTable.DataSet data = table.data(); + Iterator partitions = data.getPartitions(DataRange.allData(table.metadata.partitioner)); + int numberOfPartitions = 0; + + while (partitions.hasNext()) + { + partitions.next(); + numberOfPartitions += 1; + } + + return numberOfPartitions; + } + + protected LoggingEvent getLoggingEvent(long timestamp) + { + LoggingEvent event = new LoggingEvent(); + event.setLevel(Level.INFO); + event.setMessage(getMessage(timestamp)); + event.setLoggerName(AbstractLoggerVirtualTableTest.class.getName()); + event.setThreadName(Thread.currentThread().getName()); + event.setTimeStamp(timestamp); + + return event; + } + + protected abstract String getMessage(long timestamp); +} diff --git a/test/unit/org/apache/cassandra/db/virtual/LogMessagesTableTest.java b/test/unit/org/apache/cassandra/db/virtual/LogMessagesTableTest.java index dd32058533db..7025e8ad4cb9 100644 --- a/test/unit/org/apache/cassandra/db/virtual/LogMessagesTableTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/LogMessagesTableTest.java @@ -20,69 +20,34 @@ import java.time.Instant; import java.util.Date; -import java.util.Iterator; -import java.util.LinkedList; import java.util.List; -import com.google.common.collect.ImmutableList; import org.junit.Test; -import ch.qos.logback.classic.Level; import ch.qos.logback.classic.spi.LoggingEvent; import com.datastax.driver.core.Row; -import org.apache.cassandra.cql3.CQLTester; -import org.apache.cassandra.db.DataRange; -import org.apache.cassandra.db.marshal.TimestampType; -import org.apache.cassandra.db.virtual.AbstractVirtualTable.DataSet; -import org.apache.cassandra.db.virtual.AbstractVirtualTable.Partition; -import org.apache.cassandra.dht.LocalPartitioner; import static org.apache.cassandra.config.CassandraRelevantProperties.LOGS_VIRTUAL_TABLE_MAX_ROWS; +import static org.apache.cassandra.db.virtual.LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -public class LogMessagesTableTest extends CQLTester +public class LogMessagesTableTest extends AbstractLoggerVirtualTableTest { - private String keyspace = createKeyspaceName(); - private LogMessagesTable table; - - @Test - public void testTruncate() throws Throwable - { - registerVirtualTable(); - - int numberOfRows = 100; - List loggingEvents = getLoggingEvents(numberOfRows); - loggingEvents.forEach(table::add); - - execute(query("truncate %s")); - - assertTrue(executeNet(query("select timestamp from %s")).all().isEmpty()); - } - - @Test - public void empty() throws Throwable - { - registerVirtualTable(); - assertEmpty(execute(query("select * from %s"))); - } - @Test - public void testInsert() + public void testMultipleLogsInSameMillisecond() { - registerVirtualTable(); - - int numberOfRows = 1000; - List loggingEvents = getLoggingEvents(numberOfRows); + registerTable(); + List loggingEvents = getLoggingEvents(10, Instant.now(), 5); loggingEvents.forEach(table::add); - assertEquals(numberOfRows, numberOfPartitions()); + // 2 partitions, 5 rows in each + assertEquals(2, numberOfPartitions()); } @Test - public void testLimitedCapacity() throws Throwable + public void testLimitedCapacity() { - registerVirtualTable(100); + registerTable(100); int numberOfRows = 1000; List loggingEvents = getLoggingEvents(numberOfRows); @@ -94,7 +59,7 @@ public void testLimitedCapacity() throws Throwable // the first record in the table will be the last one which we inserted LoggingEvent firstEvent = loggingEvents.get(999); assertRowsNet(executeNet(query("select timestamp from %s limit 1")), - new Object[] { new Date(firstEvent.getTimeStamp()) }); + new Object[]{ new Date(firstEvent.getTimeStamp()) }); // the last record in the table will be 900th we inserted List all = executeNet(query("select timestamp from %s")).all(); @@ -104,100 +69,47 @@ public void testLimitedCapacity() throws Throwable assertEquals(loggingEvents.get(900).getTimeStamp(), timestamp.getTime()); } - @Test - public void testMultipleLogsInSameMillisecond() - { - registerVirtualTable(10); - List loggingEvents = getLoggingEvents(10, Instant.now(), 5); - loggingEvents.forEach(table::add); - - // 2 partitions, 5 rows in each - assertEquals(2, numberOfPartitions()); - } - @Test public void testResolvingBufferSize() { LOGS_VIRTUAL_TABLE_MAX_ROWS.setInt(-1); - assertEquals(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, LogMessagesTable.resolveBufferSize()); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); LOGS_VIRTUAL_TABLE_MAX_ROWS.setInt(0); - assertEquals(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, LogMessagesTable.resolveBufferSize()); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); LOGS_VIRTUAL_TABLE_MAX_ROWS.setInt(1000001); - assertEquals(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, LogMessagesTable.resolveBufferSize()); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); LOGS_VIRTUAL_TABLE_MAX_ROWS.setInt(999); - assertEquals(LogMessagesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, LogMessagesTable.resolveBufferSize()); + assertEquals(999, resolveBufferSize()); LOGS_VIRTUAL_TABLE_MAX_ROWS.setInt(50001); - assertEquals(50001, LogMessagesTable.resolveBufferSize()); + assertEquals(50001, resolveBufferSize()); } - private void registerVirtualTable() + private int resolveBufferSize() { - registerVirtualTable(LogMessagesTable.LOGS_VIRTUAL_TABLE_MIN_ROWS); + return AbstractLoggerVirtualTable.resolveBufferSize(LOGS_VIRTUAL_TABLE_MAX_ROWS.getInt(), + LogMessagesTable.LOGS_VIRTUAL_TABLE_MAX_ROWS, + LOGS_VIRTUAL_TABLE_DEFAULT_ROWS); } - private void registerVirtualTable(int size) + @Override + protected void registerTable(int maxSize) { - table = new LogMessagesTable(keyspace, size); - VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(keyspace, ImmutableList.of(table))); + registerVirtualTable(new LogMessagesTable(keyspace, maxSize)); } - private int numberOfPartitions() + @Override + protected void registerTable() { - DataSet data = table.data(); - - Iterator partitions = data.getPartitions(DataRange.allData(new LocalPartitioner(TimestampType.instance))); - - int numberOfPartitions = 0; - - while (partitions.hasNext()) - { - partitions.next(); - numberOfPartitions += 1; - } - - return numberOfPartitions; - } - - private String query(String query) - { - return String.format(query, table.toString()); - } - - private List getLoggingEvents(int size) - { - return getLoggingEvents(size, Instant.now(), 1); - } - - private List getLoggingEvents(int size, Instant firstTimestamp, int logsInMillisecond) - { - List logs = new LinkedList<>(); - int partitions = size / logsInMillisecond; - - for (int i = 0; i < partitions; i++) - { - long timestamp = firstTimestamp.toEpochMilli(); - firstTimestamp = firstTimestamp.plusSeconds(1); - - for (int j = 0; j < logsInMillisecond; j++) - logs.add(getLoggingEvent(timestamp)); - } - - return logs; + registerTable(1000); } - private LoggingEvent getLoggingEvent(long timestamp) + @Override + protected String getMessage(long timestamp) { - LoggingEvent event = new LoggingEvent(); - event.setLevel(Level.INFO); - event.setMessage("message " + timestamp); - event.setLoggerName("logger " + timestamp); - event.setThreadName(Thread.currentThread().getName()); - event.setTimeStamp(timestamp); - - return event; + return "message " + timestamp; } } diff --git a/test/unit/org/apache/cassandra/db/virtual/SlowQueriesTableTest.java b/test/unit/org/apache/cassandra/db/virtual/SlowQueriesTableTest.java new file mode 100644 index 000000000000..1c261e7cc95b --- /dev/null +++ b/test/unit/org/apache/cassandra/db/virtual/SlowQueriesTableTest.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.virtual; + +import java.util.List; +import java.util.Random; + +import org.junit.Test; + +import ch.qos.logback.classic.spi.LoggingEvent; +import org.apache.cassandra.db.monitoring.MonitorableImpl; +import org.apache.cassandra.db.monitoring.MonitoringTask; +import org.apache.cassandra.db.monitoring.MonitoringTask.Operation; +import org.apache.cassandra.utils.Generators; +import org.quicktheories.impl.JavaRandom; + +import static org.apache.cassandra.config.CassandraRelevantProperties.LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS; +import static org.apache.cassandra.db.virtual.SlowQueriesTable.LOGS_VIRTUAL_TABLE_DEFAULT_ROWS; +import static org.apache.cassandra.db.virtual.SlowQueriesTable.LOGS_VIRTUAL_TABLE_MAX_ROWS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class SlowQueriesTableTest extends AbstractLoggerVirtualTableTest +{ + private final Random random = new Random(); + private final JavaRandom javaRandom = new JavaRandom(random); + + @Override + protected void registerTable(int maxSize) + { + registerVirtualTable(new SlowQueriesTable(keyspace, maxSize)); + } + + @Override + protected void registerTable() + { + registerTable(1000); + } + + @Test + public void testLimitedCapacity() + { + registerTable(100); + + int numberOfRows = 1000; + List loggingEvents = getLoggingEvents(numberOfRows); + assertEquals(1000, loggingEvents.size()); + loggingEvents.forEach(table::add); + + // even we inserted 1000 rows, only 100 are present as its capacity is bounded + assertEquals(100, executeNet(query("select * from %s")).all().size()); + } + + @Test + public void testDelete() + { + registerTable(); + + int numberOfRows = 100; + List loggingEvents = getLoggingEvents(numberOfRows); + loggingEvents.forEach(table::add); + + Operation operation = table.buffer.get(0); + + assertEquals(100, executeNet(query("select * from %s")).all().size()); + execute(query("delete from %s where keyspace_name = '" + operation.keyspace() + '\'')); + assertTrue(executeNet(query("select * from %s")).all().size() < 100); + } + + @Test + public void testResolvingBufferSize() + { + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.setInt(-1); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); + + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.setInt(0); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); + + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.setInt(1000001); + assertEquals(LOGS_VIRTUAL_TABLE_DEFAULT_ROWS, resolveBufferSize()); + + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.setInt(999); + assertEquals(999, resolveBufferSize()); + + LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.setInt(50001); + assertEquals(50001, resolveBufferSize()); + } + + private int resolveBufferSize() + { + return AbstractLoggerVirtualTable.resolveBufferSize(LOGS_SLOW_QUERIES_VIRTUAL_TABLE_MAX_ROWS.getInt(), + LOGS_VIRTUAL_TABLE_MAX_ROWS, + LOGS_VIRTUAL_TABLE_DEFAULT_ROWS); + } + + + @Override + protected String getMessage(long timestamp) + { + MonitoringTask.SlowOperation slowOperation = new MonitoringTask.SlowOperation(new MonitorableImpl() + { + @Override + public String name() + { + return Generators.SYMBOL_GEN.generate(javaRandom); + } + + @Override + public String monitoredOnKeyspace() + { + return Generators.SYMBOL_GEN.generate(javaRandom); + } + + @Override + public String monitoredOnTable() + { + return Generators.SYMBOL_GEN.generate(javaRandom); + } + + @Override + public boolean isCrossNode() + { + return random.nextBoolean(); + } + }, timestamp); + + return Operation.serialize(List.of(slowOperation)); + } +} From 6753fb49dcba6af6cccc02e62a5d425704d45b20 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Wed, 23 Apr 2025 11:04:36 -0500 Subject: [PATCH 279/340] Automated Repair Inside Cassandra for CEP-37 patch by Jaydeepkumar Chovatia; reviewed by Andy Tolbert, Chris Lohfink, Francisco Guerrero, Kristijonas Zalys for CASSANDRA-19918 --- CHANGES.txt | 4 +- NEWS.txt | 5 + conf/cassandra.yaml | 166 +++ conf/cassandra_latest.yaml | 164 +++ doc/modules/cassandra/nav.adoc | 3 +- .../pages/managing/operating/auto_repair.adoc | 456 +++++++ .../pages/managing/operating/index.adoc | 3 +- .../pages/managing/operating/metrics.adoc | 72 +- .../pages/managing/operating/repair.adoc | 19 +- .../org/apache/cassandra/config/Config.java | 11 + .../cassandra/config/DatabaseDescriptor.java | 37 + .../cassandra/cql3/UntypedResultSet.java | 8 + .../statements/ModificationStatement.java | 2 +- .../statements/schema/TableAttributes.java | 4 + .../db/compaction/CompactionManager.java | 4 +- .../db/streaming/CassandraStreamReceiver.java | 9 +- .../cassandra/locator/InetAddressAndPort.java | 54 + .../cassandra/metrics/AutoRepairMetrics.java | 223 ++++ .../metrics/AutoRepairMetricsManager.java | 37 + .../metrics/CassandraMetricsRegistry.java | 53 +- .../cassandra/metrics/KeyspaceMetrics.java | 10 + .../cassandra/metrics/TableMetrics.java | 39 +- .../cassandra/repair/PreviewRepairTask.java | 18 +- .../repair/RepairMessageVerbHandler.java | 7 + .../cassandra/repair/ValidationManager.java | 5 + .../repair/autorepair/AutoRepair.java | 568 ++++++++ .../repair/autorepair/AutoRepairConfig.java | 599 +++++++++ .../repair/autorepair/AutoRepairState.java | 326 +++++ .../repair/autorepair/AutoRepairUtils.java | 1189 +++++++++++++++++ .../FixedSplitTokenRangeSplitter.java | 156 +++ .../IAutoRepairTokenRangeSplitter.java | 71 + .../autorepair/KeyspaceRepairAssignments.java | 53 + .../repair/autorepair/KeyspaceRepairPlan.java | 71 + .../autorepair/PrioritizedRepairPlan.java | 160 +++ .../repair/autorepair/RepairAssignment.java | 84 ++ .../autorepair/RepairAssignmentIterator.java | 84 ++ .../autorepair/RepairTokenRangeSplitter.java | 949 +++++++++++++ .../repair/consistent/SyncStatSummary.java | 22 +- .../cassandra/schema/AutoRepairParams.java | 189 +++ .../cassandra/schema/SchemaKeyspace.java | 18 +- .../schema/SystemDistributedKeyspace.java | 34 +- .../apache/cassandra/schema/TableParams.java | 34 +- .../service/ActiveRepairService.java | 32 + .../service/ActiveRepairServiceMBean.java | 4 + .../cassandra/service/AutoRepairService.java | 326 +++++ .../service/AutoRepairServiceMBean.java | 77 ++ .../cassandra/service/CassandraDaemon.java | 2 + .../cassandra/service/StorageService.java | 79 +- .../service/StorageServiceMBean.java | 5 + .../tcm/sequences/BootstrapAndJoin.java | 4 + .../tcm/sequences/BootstrapAndReplace.java | 4 + .../tcm/sequences/ReplaceSameAddress.java | 4 + .../org/apache/cassandra/tools/NodeProbe.java | 141 ++ .../org/apache/cassandra/tools/NodeTool.java | 4 + .../tools/nodetool/AutoRepairStatus.java | 79 ++ .../tools/nodetool/GetAutoRepairConfig.java | 45 + .../tools/nodetool/SSTableRepairedSet.java | 108 ++ .../tools/nodetool/SetAutoRepairConfig.java | 178 +++ ...allelReplicaRepairAcrossSchedulesTest.java | 129 ++ .../test/repair/AutoRepairSchedulerTest.java | 196 +++ test/unit/org/apache/cassandra/Util.java | 13 + .../config/DatabaseDescriptorRefTest.java | 11 + .../config/YamlConfigurationLoaderTest.java | 28 +- .../org/apache/cassandra/cql3/CQLTester.java | 6 +- .../statements/DescribeStatementTest.java | 149 ++- .../CassandraStreamReceiverTest.java | 153 +++ .../AutoRepairConfigRepairTypeTest.java | 60 + .../autorepair/AutoRepairConfigTest.java | 509 +++++++ .../autorepair/AutoRepairKeyspaceTest.java | 59 + .../autorepair/AutoRepairMetricsTest.java | 92 ++ .../AutoRepairParameterizedTest.java | 903 +++++++++++++ .../AutoRepairStateFactoryTest.java | 65 + .../autorepair/AutoRepairStateTest.java | 319 +++++ .../AutoRepairTablePropertyTest.java | 85 ++ .../repair/autorepair/AutoRepairTest.java | 164 +++ .../autorepair/AutoRepairUtilsTest.java | 491 +++++++ .../FixedSplitTokenRangeSplitterHelper.java | 201 +++ ...edSplitTokenRangeSplitterNoVNodesTest.java | 82 ++ ...ixedSplitTokenRangeSplitterVNodesTest.java | 82 ++ .../autorepair/PrioritizedRepairPlanTest.java | 164 +++ .../RepairTokenRangeSplitterTest.java | 465 +++++++ .../autorepair/SSTableRepairedAtTest.java | 175 +++ .../service/ActiveRepairServiceTest.java | 110 +- .../service/AutoRepairServiceBasicTest.java | 151 +++ .../AutoRepairServiceRepairTypeTest.java | 88 ++ .../service/AutoRepairServiceSetterTest.java | 153 +++ .../cassandra/tools/JMXStandardsTest.java | 7 + .../tools/nodetool/AutoRepairStatusTest.java | 108 ++ .../nodetool/SSTableRepairedSetTest.java | 132 ++ .../nodetool/SetAutoRepairConfigTest.java | 318 +++++ 90 files changed, 12637 insertions(+), 143 deletions(-) create mode 100644 doc/modules/cassandra/pages/managing/operating/auto_repair.adoc create mode 100644 src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java create mode 100644 src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java create mode 100644 src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java create mode 100644 src/java/org/apache/cassandra/schema/AutoRepairParams.java create mode 100644 src/java/org/apache/cassandra/service/AutoRepairService.java create mode 100644 src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java create mode 100644 src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java create mode 100644 test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java create mode 100644 test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java create mode 100644 test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 1d86f3078943..a0b5da22c5d4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,5 @@ 5.1 - * Implement appender of slow queries to system_views.slow_queries table (CASSANDRA-13001) - * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631) + * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092) * Improve performance when getting writePlacementsAllSettled from ClusterMetadata (CASSANDRA-20526) @@ -237,7 +236,6 @@ Merged from 4.1: * Enforce CQL message size limit on multiframe messages (CASSANDRA-20052) * Fix race condition in DecayingEstimatedHistogramReservoir during rescale (CASSANDRA-19365) Merged from 4.0: - * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) diff --git a/NEWS.txt b/NEWS.txt index b35a3c02745b..2026fb09a762 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -72,6 +72,11 @@ using the provided 'sstableupgrade' tool. New features ------------ [The following is a placeholder, to be revised asap] + - CEP-37 Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This + significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit + and manage repairs. See + https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more + details on the motivation and design. - CEP-21 Transactional Cluster Metadata introduces a distributed log for linearizing modifications to cluster metadata. In the first instance, this encompasses cluster membership, token ownership and schema metadata. See https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-21%3A+Transactional+Cluster+Metadata for more detail on diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 0c68afe500bc..88b0c7271806 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2188,6 +2188,13 @@ report_unconfirmed_repaired_data_mismatches: false # Materialized views are considered experimental and are not recommended for production use. materialized_views_enabled: false +# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair. +# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false, +# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true. +# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations +# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View. +# materialized_views_on_repair_enabled: true + # Enables SASI index creation on this node. # SASI indexes are considered experimental and are not recommended for production use. sasi_indexes_enabled: false @@ -2671,3 +2678,162 @@ storage_compatibility_mode: NONE # # # how quickly the fast path is reconfigured when nodes go up/down # fast_path_update_delay: 5s + +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio: 0.2; + +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# +# auto_repair: +# # Enable/Disable the auto-repair scheduler. +# # If set to false, the scheduler thread will not be started. +# # If set to true, the repair scheduler thread will be created. The thread will +# # check for secondary configuration available for each repair type (full, incremental, +# # and preview_repaired), and based on that, it will schedule repairs. +# enabled: true +# repair_type_overrides: +# full: +# # Enable/Disable full auto-repair +# enabled: true +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid potential data resurrection. +# min_repair_interval: 24h +# token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 +# parameters: +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. +# bytes_per_assignment: 50GiB +# # The maximum number of bytes to cover in an individual schedule. This serves as +# # a mechanism to throttle the work done in each repair cycle. You may reduce this +# # value if the impact of repairs is causing too much load on the cluster or increase it +# # if writes outpace the amount of data being repaired. Alternatively, adjust the +# # min_repair_interval. +# # This is set to a large value for full repair to attempt to repair all data per repair schedule. +# max_bytes_per_schedule: 100000GiB +# incremental: +# enabled: false +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# # NOTE: Please consult +# # https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html#enabling-ir +# # for guidance on enabling incremental repair on ane exiting cluster. +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# # Configured to attempt repairing 50GiB of compressed data per repair. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. +# bytes_per_assignment: 50GiB +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. +# max_bytes_per_schedule: 100GiB +# preview_repaired: +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# bytes_per_assignment: 50GiB +# max_bytes_per_schedule: 100000GiB +# # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule +# # repairs. +# repair_check_interval: 5m +# # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming +# # the node by scheduling too many repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata +# # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. +# history_clear_delete_hosts_buffer_interval: 2h +# # NOTE: Each of the below settings can be overridden per repair type under repair_type_overrides +# global_settings: +# # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired +# # individually. +# repair_by_keyspace: true +# # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool +# # repair. +# number_of_repair_threads: 1 +# # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. +# parallel_repair_count: 3 +# # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value +# # is used. +# parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true +# # Repairs materialized views if true. +# materialized_view_repair_enabled: false +# # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. +# initial_scheduler_delay: 5m +# # Timeout for retrying stuck repair sessions. +# repair_session_timeout: 3h +# # Force immediate repair on new nodes after they join the ring. +# force_repair_new_node: false +# # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good +# # tables. +# sstable_upper_threshold: 10000 +# # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the +# # next table. +# table_max_repair_time: 6h +# # Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data +# # centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded +# # data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to +# # not run repair schedule in certain data centers. +# ignore_dcs: [] +# # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults +# # to true. General advice is to keep this true. +# repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s +# token_range_splitter: +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# parameters: +# # Maximum number of partitions to include in a repair assignment. Used to reduce number of partitions +# # present in merkle tree leaf nodes to avoid overstreaming. +# partitions_per_assignment: 1048576 +# # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, +# # especially in keyspaces with many tables. The splitter avoids batching tables together if they +# # exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. +# max_tables_per_assignment: 64 diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 9c86beeea829..6dfc89a975b9 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2047,6 +2047,13 @@ report_unconfirmed_repaired_data_mismatches: false # Materialized views are considered experimental and are not recommended for production use. materialized_views_enabled: false +# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair. +# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false, +# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true. +# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations +# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View. +# materialized_views_on_repair_enabled: true + # Enables SASI index creation on this node. # SASI indexes are considered experimental and are not recommended for production use. sasi_indexes_enabled: false @@ -2362,3 +2369,160 @@ default_secondary_index_enabled: true # compatibility mode would no longer toggle behaviors as when it was running in the UPGRADING mode. # storage_compatibility_mode: NONE + +# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over +# the given value. Defaults to disabled. +# reject_repair_compaction_threshold: 1024 + +# At least 20% of disk must be unused to run incremental repair. It is useful to avoid disks filling up during +# incremental repair as anti-compaction during incremental repair may contribute to additional space temporarily. +# if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) +# then set the ratio to 0.0 +# incremental_repair_disk_headroom_reject_ratio: 0.2; + +# Configuration for Auto Repair Scheduler. +# +# This feature is disabled by default. +# +# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this +# feature. +# +# auto_repair: +# # Enable/Disable the auto-repair scheduler. +# # If set to false, the scheduler thread will not be started. +# # If set to true, the repair scheduler thread will be created. The thread will +# # check for secondary configuration available for each repair type (full, incremental, +# # and preview_repaired), and based on that, it will schedule repairs. +# enabled: true +# repair_type_overrides: +# full: +# # Enable/Disable full auto-repair +# enabled: true +# # Minimum duration between repairing the same node again. This is useful for tiny clusters, +# # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one +# # round on all nodes in less than this duration, it will not start a new repair round on a given node until +# # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce +# # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than +# # gc_grace_seconds to avoid potential data resurrection. +# min_repair_interval: 24h +# token_range_splitter: +# # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges +# # for repair assignments. +# # +# # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter, +# # FixedTokenRangeSplitter}. +# # +# # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition +# # count. +# # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option. +# # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# +# # Optional parameters can be specified in the form of: +# # parameters: +# # param_key1: param_value1 +# parameters: +# # The target and maximum amount of compressed bytes that should be included in a repair assignment. +# # This scopes the amount of work involved in a repair and includes the data covering the range being +# # repaired. +# bytes_per_assignment: 50GiB +# # The maximum number of bytes to cover in an individual schedule. This serves as +# # a mechanism to throttle the work done in each repair cycle. You may reduce this +# # value if the impact of repairs is causing too much load on the cluster or increase it +# # if writes outpace the amount of data being repaired. Alternatively, adjust the +# # min_repair_interval. +# # This is set to a large value for full repair to attempt to repair all data per repair schedule. +# max_bytes_per_schedule: 100000GiB +# incremental: +# # Enable incremental repair by default for new clusters. +# enabled: true +# # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair +# # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data, +# # so a more frequent schedule such as 1h is recommended. +# min_repair_interval: 1h +# token_range_splitter: +# parameters: +# # Configured to attempt repairing 50GiB of compressed data per repair. +# # This throttles the amount of incremental repair and anticompaction done per schedule after incremental +# # repairs are turned on. +# bytes_per_assignment: 50GiB +# # Restricts the maximum number of bytes to cover in an individual schedule to the configured +# # max_bytes_per_schedule value (defaults to 100GiB for incremental). +# # Consider increasing this value if more data is written than this limit within the min_repair_interval. +# max_bytes_per_schedule: 100GiB +# preview_repaired: +# # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired +# # data set. +# enabled: false +# min_repair_interval: 24h +# token_range_splitter: +# parameters: +# bytes_per_assignment: 50GiB +# max_bytes_per_schedule: 100000GiB +# # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule +# # repairs. +# repair_check_interval: 5m +# # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming +# # the node by scheduling too many repair tasks in a short period of time. +# repair_task_min_duration: 5s +# # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata +# # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. +# history_clear_delete_hosts_buffer_interval: 2h +# # NOTE: Each of the below settings can be overridden per repair type under repair_type_overrides +# global_settings: +# # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired +# # individually. +# repair_by_keyspace: true +# # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool +# # repair. +# number_of_repair_threads: 1 +# # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. +# parallel_repair_count: 3 +# # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value +# # is used. +# parallel_repair_percentage: 3 +# # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. +# # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction +# # conflicts while running incremental repair. +# allow_parallel_replica_repair: false +# # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) +# # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will +# # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when +# # allow_parallel_replica_repair is false. +# allow_parallel_replica_repair_across_schedules: true +# # Repairs materialized views if true. +# materialized_view_repair_enabled: false +# # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart. +# initial_scheduler_delay: 5m +# # Timeout for retrying stuck repair sessions. +# repair_session_timeout: 3h +# # Force immediate repair on new nodes after they join the ring. +# force_repair_new_node: false +# # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good +# # tables. +# sstable_upper_threshold: 10000 +# # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the +# # next table. +# table_max_repair_time: 6h +# # Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data +# # centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded +# # data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to +# # not run repair schedule in certain data centers. +# ignore_dcs: [] +# # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults +# # to true. General advice is to keep this true. +# repair_primary_token_range_only: true +# # Maximum number of retries for a repair session. +# repair_max_retries: 3 +# # Backoff time before retrying a repair session. +# repair_retry_backoff: 30s +# token_range_splitter: +# # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter. +# class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter +# parameters: +# # Maximum number of partitions to include in a repair assignment. Used to reduce number of partitions +# # present in merkle tree leaf nodes to avoid overstreaming. +# partitions_per_assignment: 1048576 +# # Maximum number of tables to include in a repair assignment. This reduces the number of repairs, +# # especially in keyspaces with many tables. The splitter avoids batching tables together if they +# # exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment. +# max_tables_per_assignment: 64 diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index dd9dd1054df9..311bfb16c585 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -99,6 +99,7 @@ ***** xref:cassandra:managing/operating/fqllogging.adoc[Full query logging] **** xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] **** xref:cassandra:managing/operating/repair.adoc[Repair] +**** xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] **** xref:cassandra:managing/operating/read_repair.adoc[Read repair] **** xref:cassandra:managing/operating/security.adoc[Security] **** xref:cassandra:managing/operating/snitch.adoc[Snitches] @@ -126,4 +127,4 @@ *** xref:reference/static.adoc[Static columns] *** xref:reference/vector-data-type.adoc[Vector data type] -** xref:integrating/plugins/index.adoc[] \ No newline at end of file +** xref:integrating/plugins/index.adoc[] diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc new file mode 100644 index 000000000000..3928d0afccef --- /dev/null +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -0,0 +1,456 @@ += Auto Repair +:navtitle: Auto Repair +:description: Auto Repair concepts - How it works, how to configure it, and more. +:keywords: CEP-37, Repair, Incremental, Preview + +Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This +significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit and +manage repairs. + +At a high level, a dedicated thread pool is assigned to the repair scheduler. The repair scheduler in Cassandra +maintains a new replicated table, `system_distributed.auto_repair_history`, which stores the repair history for all +nodes, including details such as the last repair time. The scheduler selects the node(s) to begin repairs and +orchestrates the process to ensure that every table and its token ranges are repaired. + +The algorithm can run repairs simultaneously on multiple nodes and splits token ranges into subranges, with necessary +retries to handle transient failures. Automatic repair starts as soon as a Cassandra cluster is launched, similar to +compaction, and if configured appropriately, does not require human intervention. + +The scheduler currently supports Full, Incremental, and Preview repair types with the following features. New repair +types, such as Paxos repair or other future repair mechanisms, can be integrated with minimal development effort! + + +== Features +- Capability to run repairs on multiple nodes simultaneously. +- A default implementation and an interface to override the dataset being repaired per session. +- Extendable token split algorithms with two implementations readily available: +. Splits token ranges by placing a cap on the size of data repaired in one session and a maximum cap at the schedule +level using xref:#repair-token-range-splitter[RepairTokenRangeSplitter] (default). +. Splits tokens evenly based on the specified number of splits using +xref:#fixed-split-token-range-splitter[FixedSplitTokenRangeSplitter]. +- A new xref:#table-configuration[CQL table property] (`auto_repair`) offering: +. The ability to disable specific repair types at the table level, allowing the scheduler to skip one or more tables. +. Configuring repair priorities for certain tables to prioritize them over others. +- Dynamic enablement or disablement of the scheduler for each repair type. +- Configurable settings tailored to each repair job. +- Rich configuration options for each repair type (e.g., Full, Incremental, or Preview repairs). +- Comprehensive observability features that allow operators to configure alarms as needed. + +== Considerations + +Before enabling Auto Repair, please consult the xref:managing/operating/repair.adoc[Repair] guide to establish a base +understanding of repairs. + +=== Full Repair + +Full Repairs operate over all data in the token range being repaired. It is therefore important to run full repair +with a longer schedule and with smaller assignments. + +=== Incremental Repair + +When enabled from the inception of a cluster, incremental repairs operate over unrepaired data and should finish +quickly when run more frequently. + +Once incremental repair has been run, SSTables will be separated between data that have been incrementally repaired +and data that have not. Therefore, it is important to continually run incremental repair once it has been enabled so +newly written data can be compacted together with previously repaired data, allowing overwritten and expired data to +be eventually purged. + +Running incremental repair more frequently keeps the unrepaired set smaller and thus causes repairs to operate over +a smaller set of data, so a shorter `min_repair_interval` such as `1h` is recommended for new clusters. + +==== Enabling Incremental Repair on existing clusters with a large amount of data +[#enabling-ir] +One should be careful when enabling incremental repair on a cluster for the first time. While +xref:#repair-token-range-splitter[RepairTokenRangeSplitter] includes a default configuration to attempt to gracefully +migrate to incremental repair over time, failure to take proper precaution could overwhelm the cluster with +xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompactions]. + +No matter how one goes about enabling and running incremental repair, it is recommended to run a cycle of full repairs +for the entire cluster as pre-flight step to running incremental repair. This will put the cluster into a more +consistent state which will reduce the amount of streaming between replicas when incremental repair initially runs. + +If you do not have strong data consistency requirements, one may consider using +xref:managing/tools/sstable/sstablerepairedset.adoc[nodetool sstablerepairedset] to mark all SSTables as repaired +before enabling incremental repair scheduling using Auto Repair. This will reduce the burden of initially running +incremental repair because all existing data will be considered as repaired, so subsequent incremental repairs will +only run against new data. + +If you do have strong data consistency requirements, then one must treat all data as initially unrepaired and run +incremental repair against it. Consult +xref:#incremental-repair-defaults[RepairTokenRangeSplitter's Incremental repair defaults]. + +In particular one should be mindful of the xref:managing/operating/compaction/overview.adoc[compaction strategy] +you use for your tables and how it might impact incremental repair before running incremental repair for the first +time: + +- *Large SSTables*: When using xref:managing/operating/compaction/stcs.adoc[SizeTieredCompactionStrategy] or any + compaction strategy which can create large SSTables including many partitions the amount of + xref:managing/operating/compaction/overview.adoc#types-of-compaction[anticompaction] that might be required could be + excessive. Using a small `bytes_per_assignment` might contribute to repeated anticompactions over the same + unrepaired data. +- *Partitions overlapping many SSTables*: If partitions overlap between many SSTables, the amount of SSTables included + in a repair might be large. Therefore it is important to consider that many SSTables may be included in a repair + session and must all be anticompacted. xref:managing/operating/compaction/lcs.adoc[LeveledCompactionStrategy] is less + susceptible to this issue as it prevents overlapping of partitions within levels outside of L0, but if SSTables + start accumulating in L0 between incremental repairs, the cost of anticompaction will increase. + xref:managing/operating/compaction/ucs#sharding[UnifiedCompactionStrategy's sharding] can also be used to avoid + partitions overlapping SSTables. + +The xref:#repair-token-range-splitter[token_range_splitter] configuration for incremental repair includes a default +configuration that attempts to conservatively migrate 100GiB of compressed data every day per node. Depending on +requirements, data set and capability of a cluster's hardware, one may consider tuning these values to be more +aggressive or conservative. + +=== Previewing Repaired Data + +The `preview_repaired` repair type executes repairs over the repaired data set to detect possible data inconsistencies. + +Inconsistencies in the repaired data set should not happen in practice and could indicate a possible bug in incremental +repair. + +Running preview repairs is useful when considering using the +xref:cassandra:managing/operating/compaction/tombstones.adoc#deletion[only_purge_repaired_tombstones] table compaction +option to prevent data from possibly being resurrected when inconsistent replicas are missing tombstones from deletes. + +When enabled, the `BytesPreviewedDesynchronized` and `TokenRangesPreviewedDesynchronized` +xref:cassandra:managing/operating/metrics.adoc#table-metrics[table metrics] can be used to detect inconsistencies in the +repaired data set. + +== Configuring Auto Repair in cassandra.yaml + +Configuration for Auto Repair is managed in the `cassandra.yaml` file by the `auto_repair` property. + +A rich set of configuration exists for configuring Auto Repair with sensible defaults. However, the expectation +is that some tuning might be needed particulary when it comes to tuning how often repair should run +(`min_repair_interval`) and how repair assignments as created (`token_range_splitter`). + +The following is a practical example of an auto_repair configuration that one might use. + +[source, yaml] +---- +auto_repair: + enabled: true + repair_type_overrides: + full: + enabled: true + min_repair_interval: 5d + incremental: + enabled: true + min_repair_interval: 1h + token_range_splitter: + parameters: + bytes_per_assignment: 50GiB + max_bytes_per_schedule: 100GiB + preview_repaired: + enabled: true + min_repair_interval: 1d + global_settings: + repair_by_keyspace: true + parallel_repair_count: 1 +---- + + +=== Top level settings +The following settings are defined at the top level of the configuration file and apply universally across all +repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Enable/Disable the auto-repair scheduler. If set to false, the scheduler thread will not be started. +If set to true, the repair scheduler thread will be created. The thread will check for secondary configuration available +for each repair type (full, incremental, and preview_repaired), and based on that, it will schedule repairs. +| repair_check_interval | 5m | Time interval between successive checks to see if ongoing repairs are complete or if it +is time to schedule repairs. +| repair_max_retries | 3 | Maximum number of retries for a repair session. +| history_clear_delete_hosts_buffer_interval | 2h | The scheduler needs to adjust its order when nodes leave the ring. +Deleted hosts are tracked in metadata for a specified duration to ensure they are indeed removed before adjustments +are made to the schedule. +|=== + + +=== Repair level settings +The following settings can be configured globally using `global_settings` or tailored individually for each repair +type by using `repair_type_overrides`. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| enabled | false | Whether the given repair types should be enabled +| min_repair_interval | 24h | Minimum duration between repairing the same node again. This is useful for tiny clusters, +such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one round on all +nodes in less than this duration, it will not start a new repair round on a given node until this much time has +passed since the last repair completed. Consider increasing to a larger value to reduce the impact of repairs, +however note that one should attempt to run repairs at a smaller interval than gc_grace_seconds to +avoid xref:cassandra:managing/operating/compaction/tombstones.adoc#zombies[data resurrection]. +| token_range_splitter.class_name | org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter | Implementation of +IAutoRepairTokenRangeSplitter to use; responsible for splitting token ranges for repair assignments. Out of the box, +Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,FixedTokenRangeSplitter}. +| repair_by_keyspace | true | If true, attempts to group tables in the same keyspace into one repair; otherwise, +each table is repaired individually. +| number_of_repair_threads | 1 | Number of threads to use for each repair job scheduled by the scheduler. Similar to +the -j option in nodetool repair. +| parallel_repair_count | 3 | Number of nodes running repair in parallel. If `parallel_repair_percentage` is set, the +larger value is used. +| parallel_repair_percentage | 3 | Percentage of nodes in the cluster running repair in parallel. If +`parallel_repair_count is set`, the larger value is used. +| allow_parallel_replica_repair | false | Whether to allow a node to take its turn running repair while one or more of +its replicas are running repair. Defaults to false, as running repairs concurrently on replicas can increase load and +also cause anticompaction conflicts while running incremental repair. +| allow_parallel_replica_repair_across_schedules | true | An addition to allow_parallel_repair that also blocks repairs +when replicas (including this node itself) are repairing in any schedule. +For example, if a replica is executing full repairs, a value of false will prevent starting incremental repairs for this +node. Defaults to true and is only evaluated when allow_parallel_replica_repair is false. +| materialized_view_repair_enabled | false | Repairs materialized views if true. +| initial_scheduler_delay | 5m | Delay before starting repairs after a node restarts to avoid repairs starting +immediately after a restart. +| repair_session_timeout | 3h | Timeout for retrying stuck repair sessions. +| force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. +| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. +| table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair +proceeds to the next table. +| ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify +data centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded data +centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to not run repair +schedule in certain data centers. +| repair_primary_token_range_only | true | Repair only the primary ranges owned by a node. Equivalent to the -pr option +in nodetool repair. General advice is to keep this true. +| repair_retry_backoff | 30s | Backoff time before retrying a repair session. +| repair_task_min_duration | 5s | Minimum duration for the execution of a single repair task. This prevents the +scheduler from overwhelming the node by scheduling too many repair tasks in a short period of time. +|=== + +=== `RepairTokenRangeSplitter` configuration +[#repair-token-range-splitter] + +`RepairTokenRangeSplitter` is the default implementation of `IAutoRepairTokenRangeSplitter` that attempts to create +token range assignments meeting the following goals: + +- *Create smaller, consistent repair times*: Long repairs, such as those lasting 15 hours, can be problematic. If a +node fails 14 hours into the repair, the entire process must be restarted. The goal is to reduce the impact of +disturbances or failures. However, making the repairs too short can lead to overhead from repair orchestration becoming +the main bottleneck. + +- *Minimize the impact on hosts*: Repairs should not heavily affect the host systems. For incremental repairs, this +might involve anti-compaction work. In full repairs, streaming large amounts of data—especially with wide partitions +can lead to issues with disk usage and higher compaction costs. + +- *Reduce overstreaming*: The Merkle tree, which represents data within each partition and range, has a maximum size. +If a repair covers too many partitions, the tree’s leaves represent larger data ranges. Even a small change in a leaf +can trigger excessive data streaming, making the process inefficient. + +- *Reduce number of repairs*: If there are many small tables, it's beneficial to batch these tables together under a +single parent repair. This prevents the repair overhead from becoming a bottleneck, especially when dealing with +hundreds of tables. Running individual repairs for each table can significantly impact performance and efficiency. + +To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions +within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + +==== Parameter defaults + +The following `parameters` include the same defaults for all repair types. + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| partitions_per_assignment | 1048576 | Maximum number of partitions to include in a repair +assignment. Used to reduce number of partitions present in merkle tree leaf nodes to avoid overstreaming. +| max_tables_per_assignment | 64 | Maximum number of tables to include in a repair assignment. +This reduces the number of repairs, especially in keyspaces with many tables. The splitter avoids batching tables +together if they exceed other configuration parameters like `bytes_per_assignment` or `partitions_per_assignment`. +|=== + +==== Full & Preview Repaired repair defaults + +The following `parameters` defaults are established for both `full` and `preview_repaired` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be included in a +repair assignment. *Note*: For full and preview_repaired, only the portion of an SSTable that covers the ranges +being repaired are accounted for in this calculation. +| max_bytes_per_schedule | 100000GiB | The maximum number of bytes to cover in an individual +schedule. This serves as a mechanism to throttle the work done in each repair cycle. You may reduce this value if the +impact of repairs is causing too much load on the cluster or increase it if writes outpace the amount of data being +repaired. Alternatively, adjust the `min_repair_interval`. This is set to a large value for full repair to attempt to +repair all data per repair schedule. +|=== + +==== Incremental repair defaults + +The following `parameters` defaults are established for `incremental` repair scheduling: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| bytes_per_assignment | 50GiB | The target and maximum amount of *compressed* bytes that should be +included in a repair assignment. *Note*: For incremental repair, the *entire size* of *unrepaired* SSTables +including ranges being repaired are accounted for in this calculation. This is to account for the anticompaction +work required to split the candidate data to repair from the data that won't be repaired. +| max_bytes_per_schedule | 100GiB | The maximum number of bytes to cover in an individual schedule. +Consider increasing if more data is written than this limit within the `min_repair_interval`. +|=== + +=== `FixedSplitTokenRangeSplitter` configuration +[#fixed-split-token-range-splitter] + +`FixedSplitTokenRangeSplitter` is a more simple implementation of `IAutoRepairTokenRangeSplitter` that creates repair +assignments by splitting a node's token ranges into an even number of splits. + +The following `parameters` apply for `FixedSplitTokenRangeSplitter` configuration: + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| number_of_subranges | 32 | Number of evenly split subranges to create for each node that repair runs for. +If vnodes are configured using `num_tokens`, attempts to evenly subdivide subranges by each range. For example, for +`num_tokens: 16` and `number_of_subranges: 32`, 2 (32/16) repair assignments will be created for each token range. At +least one repair assignment will be created for each token range. +|=== + +=== Other cassandra.yaml Considerations + +==== Enable `reject_repair_compaction_threshold` + +When enabling auto_repair, it is advisable to configure the top level `reject_repair_compaction_threshold` +configuration in cassandra.yaml as a backpressure mechanism to reject new repairs on instances that have many +pending compactions. + +==== Tune `incremental_repair_disk_headroom_reject_ratio` + +By default, incremental repairs will be rejected if less than 20% of disk is available. If one wishes to be +conservative this top level configuration could be increased to a larger value to prevent filling your data directories. + +== Table configuration + +If Auto Repair is enabled in cassandra.yaml, the `auto_repair` property may be optionally configured at the table +level, e.g.: + +[source,cql] +---- +ALTER TABLE cycling.cyclist_races +WITH auto_repair = {'incremental_enabled': 'false', 'priority': '0'}; +---- + +[cols=",,",options="header",] +|=== +| Name | Default | Description +| priority | 0 | Indicates the priority at which this table should be given when issuing repairs. The higher the number +the more priority will be given to repair the table (e.g. 3 will be repaired before 2). When `repair_by_keyspace` is +set to `true` tables sharing the same priority may be grouped in the same repair assignment. +| full_enabled | true | Whether full repair is enabled for this table. If full.enabled is not true in cassandra.yaml +this will not be evaluated. +| incremental_enabled | true | Whether incremental repair is enabled for this table. If incremental.enabled is not +true in cassandra.yaml this will not be evaluated. +| preview_repaired_enabled | true | Whether preview repair is enabled for this table. If preview_repaired.enabled is +not true in cassandra.yaml this will not be evaluated. +|=== + +== Nodetool Configuration +=== nodetool getautorepairconfig + +Retrieves the runtime configuration of Auto Repair for the targeted node. + +[source,none] +---- +$> nodetool getautorepairconfig +repair scheduler configuration: + repair_check_interval: 5m + repair_max_retries: 3 + history_clear_delete_hosts_buffer_interval: 2h +configuration for repair_type: full + enabled: true + min_repair_interval: 24h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100000GiB +configuration for repair_type: incremental + enabled: true + min_repair_interval: 1h + repair_by_keyspace: true + number_of_repair_threads: 1 + sstable_upper_threshold: 10000 + table_max_repair_time: 6h + ignore_dcs: [] + repair_primary_token_range_only: true + parallel_repair_count: 3 + parallel_repair_percentage: 3 + materialized_view_repair_enabled: false + initial_scheduler_delay: 5m + repair_session_timeout: 3h + force_repair_new_node: false + repair_retry_backoff: 30s + repair_task_min_duration: 5s + token_range_splitter: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter + token_range_splitter.bytes_per_assignment: 50GiB + token_range_splitter.partitions_per_assignment: 1048576 + token_range_splitter.max_tables_per_assignment: 64 + token_range_splitter.max_bytes_per_schedule: 100GiB +configuration for repair_type: preview_repaired + enabled: false +---- + +=== nodetool autorepairstatus + +Provides currently running Auto Repair status. + +[source,none] +---- +$> nodetool autorepairstatus -t incremental +Active Repairs +425cea55-09aa-46e0-8911-9f37a4424574 + + +$> nodetool autorepairstatus -t full +Active Repairs +NONE + +---- + +=== nodetool setautorepairconfig + +Dynamic configuration changes can be made by using `setautorepairconfig`. Note that this only applies on the node being +targeted and these changes are not retained when a node is bounced. + +The following disables the `incremental` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental enabled false +---- + +The following adjusts the `min_repair_interval` option to `5d` specifically for the `full` repair schedule: + +[source,none] +---- +$> nodetool setautorepairconfig -t full min_repair_interval 5d +---- + +The following configures the `bytes_per_assignment` parameter for `incremental` repair's `token_range_splitter` to +`10GiB`: + +[source,none] +---- +$> nodetool setautorepairconfig -t incremental token_range_splitter.bytes_per_assignment 10GiB +---- + +==== More details +https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution[CEP-37] diff --git a/doc/modules/cassandra/pages/managing/operating/index.adoc b/doc/modules/cassandra/pages/managing/operating/index.adoc index 39dd508c4593..492af4dfec3b 100644 --- a/doc/modules/cassandra/pages/managing/operating/index.adoc +++ b/doc/modules/cassandra/pages/managing/operating/index.adoc @@ -14,7 +14,8 @@ * xref:cassandra:managing/operating/metrics.adoc[Monitoring metrics] * xref:cassandra:managing/operating/repair.adoc[Repair] * xref:cassandra:managing/operating/read_repair.adoc[Read repair] +* xref:cassandra:managing/operating/auto_repair.adoc[Auto Repair] * xref:cassandra:managing/operating/security.adoc[Security] * xref:cassandra:managing/operating/topo_changes.adoc[Topology changes] * xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] -* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] \ No newline at end of file +* xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] diff --git a/doc/modules/cassandra/pages/managing/operating/metrics.adoc b/doc/modules/cassandra/pages/managing/operating/metrics.adoc index 4f3d66652c24..6fba0f811711 100644 --- a/doc/modules/cassandra/pages/managing/operating/metrics.adoc +++ b/doc/modules/cassandra/pages/managing/operating/metrics.adoc @@ -249,12 +249,19 @@ during validation. |PartitionsValidated |Histogram |Histogram over the number of partitions read during validation. -|BytesAnticompacted |Counter |How many bytes we anticompacted. +|BytesAnticompacted |Meter |How many bytes we anticompacted. -|BytesMutatedAnticompaction |Counter |How many bytes we avoided +|BytesMutatedAnticompaction |Meter |How many bytes we avoided anticompacting because the sstable was fully contained in the repaired range. +|BytesPreviewed |Meter |Estimated number of bytes that were scanned for local replica during preview repair + +|BytesPreviewedDesynchronized |Meter |Number of desynchronized bytes that were detected among all replicas during preview repair + +|TokenRangesPreviewedDesynchronized |Meter |Number of token ranges among all replicas where desynchronization was found +during preview repair. These ranges would need to be streamed during subsequent repair. + |MutatedAnticompactionGauge |Gauge |Ratio of bytes mutated vs total bytes repaired. |=== @@ -1081,6 +1088,67 @@ partitions processed per logged batch partitions processed per unlogged batch |=== +== Automated Repair Metrics + +Metrics specifc to automated repair. + +Reported name format: + +*Metric Name*:: +`org.apache.cassandra.metrics.AutoRepair.` +*JMX MBean*:: +`org.apache.cassandra.metrics:type=AutoRepair name= repairType=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|RepairsInProgress |Gauge |Repair is in progress +on the node + +|NodeRepairTimeInSec |Gauge |Time taken to repair +the node in seconds + +|ClusterRepairTimeInSec |Gauge |Time taken to repair +the entire Cassandra cluster in seconds + +|LongestUnrepairedSec |Gauge |Time since the last repair +ran on the node in seconds + +|RepairStartLagSec|Gauge |If a repair has not run within min_repair_interval, how long past this value since +repairs last completed. Useful for determining if repairs are behind schedule. + +|SucceededTokenRangesCount |Gauge |Number of token ranges successfully repaired on the node + +|FailedTokenRangesCount |Gauge |Number of token ranges failed to repair on the node + +|SkippedTokenRangesCount |Gauge |Number of token ranges skipped +on the node + +|SkippedTablesCount |Gauge |Number of tables skipped +on the node + +|TotalMVTablesConsideredForRepair |Gauge |Number of materialized +views considered on the node + +|TotalDisabledRepairTables |Gauge |Number of tables on which +the automated repair has been disabled on the node + +|RepairTurnMyTurn |Counter |Represents the node's turn to repair + +|RepairTurnMyTurnDueToPriority |Counter |Represents the node's turn to repair +due to priority set in the automated repair + +|RepairDelayedByReplica |Counter |Represents occurrences of a node's turn being +delayed because a replica was currently taking its turn. Only relevant if +`allow_parallel_replica_repair` is false. + +|RepairDelayedBySchedule |Counter |Represents occurrences of a node's turn being +delayed because it was already being repaired in another schedule. Only relevant +if `allow_parallel_replica_repair_across_schedules` is false. + +|=== + + == JVM Metrics JVM metrics such as memory and garbage collection statistics can either diff --git a/doc/modules/cassandra/pages/managing/operating/repair.adoc b/doc/modules/cassandra/pages/managing/operating/repair.adoc index 1823a6d4ef95..d7eaba171125 100644 --- a/doc/modules/cassandra/pages/managing/operating/repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/repair.adoc @@ -29,10 +29,21 @@ for syncing up missed writes, but it doesn't protect against things like disk corruption, data loss by operator error, or bugs in Cassandra. For this reason, full repairs should still be run occasionally. -== Usage and Best Practices +== Automated Repair Scheduling -Since repair can result in a lot of disk and network io, it's not run -automatically by Cassandra. It is run by the operator via nodetool. +Since repair can result in a lot of disk and network io, it has +traditionally not been run automatically by Cassandra. + +In the latest version of Cassandra, a new feature called +xref:managing/operating/auto_repair.adoc[auto repair] was introduced to +allow Cassandra to submit and manage repairs automatically on a schedule. + +The introduction of this feature does not interfere with existing repair +functionality enabled via nodetool. + +== Submitting Repairs Using Nodetool + +Repairs can also be run by the operator via nodetool. Incremental repair is the default and is run with the following command: @@ -63,7 +74,7 @@ nodetool repair [options] ---- -The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. +The repair command repairs token ranges only on the node being repaired; it does not repair the whole cluster. By default, repair operates on all token ranges replicated by the node on which repair is run, causing duplicate work when running it on every node. Avoid duplicate work by using the `-pr` flag to repair only the "primary" ranges on a node. Do a full cluster repair by running the `nodetool repair -pr` command on each node in each datacenter in the cluster, until all of the nodes and datacenters are repaired. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 1a3da1033fbc..a22b40fa27ee 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -32,6 +32,7 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,6 +43,7 @@ import org.apache.cassandra.index.internal.CassandraIndex; import org.apache.cassandra.io.compress.BufferType; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.StartupChecks.StartupCheckType; import org.apache.cassandra.utils.StorageCompatibilityMode; @@ -368,6 +370,10 @@ public MemtableOptions() // The number of executors to use for building secondary indexes public volatile int concurrent_index_builders = 2; + // at least 20% of disk must be unused to run incremental repair + // if you want to disable this feature (the recommendation is not to, but if you want to disable it for whatever reason) then set the ratio to 0.0 + public volatile double incremental_repair_disk_headroom_reject_ratio = 0.2; + /** * @deprecated retry support removed on CASSANDRA-10992 */ @@ -624,6 +630,10 @@ public static class SSTableConfig @Replaces(oldName = "enable_materialized_views", converter = Converters.IDENTITY, deprecated = true) public boolean materialized_views_enabled = false; + // When true, materialized views data in SSTable go through commit logs during internodes streaming, e.g. repair + // When false, it behaves the same as normal streaming. + public volatile boolean materialized_views_on_repair_enabled = true; + @Replaces(oldName = "enable_transient_replication", converter = Converters.IDENTITY, deprecated = true) public boolean transient_replication_enabled = false; @@ -991,6 +1001,7 @@ public static void setClientMode(boolean clientMode) public volatile boolean password_validator_reconfiguration_enabled = true; public volatile CustomGuardrailConfig password_validator = new CustomGuardrailConfig(); + public volatile AutoRepairConfig auto_repair = new AutoRepairConfig(); /** * The variants of paxos implementation and semantics supported by Cassandra. diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 616735289741..82a26602eb05 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -61,6 +61,7 @@ import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.RateLimiter; + import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -113,6 +114,7 @@ import org.apache.cassandra.locator.ReconnectableSnitchHelper; import org.apache.cassandra.locator.SeedProvider; import org.apache.cassandra.locator.SnitchAdapter; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.security.AbstractCryptoProvider; import org.apache.cassandra.security.EncryptionContext; import org.apache.cassandra.security.JREProvider; @@ -4565,6 +4567,16 @@ public static void setMaterializedViewsEnabled(boolean enableMaterializedViews) conf.materialized_views_enabled = enableMaterializedViews; } + public static boolean isMaterializedViewsOnRepairEnabled() + { + return conf.materialized_views_on_repair_enabled; + } + + public static void setMaterializedViewsOnRepairEnabled(boolean val) + { + conf.materialized_views_on_repair_enabled = val; + } + public static boolean getSASIIndexesEnabled() { return conf.sasi_indexes_enabled; @@ -5899,4 +5911,29 @@ public static boolean getAccordEphemeralReadEnabledEnabled() { return conf.accord.ephemeralReadEnabled; } + + public static AutoRepairConfig getAutoRepairConfig() + { + return conf.auto_repair; + } + + public static double getIncrementalRepairDiskHeadroomRejectRatio() + { + return conf.incremental_repair_disk_headroom_reject_ratio; + } + + public static void setIncrementalRepairDiskHeadroomRejectRatio(double value) + { + if (value < 0.0 || value > 1.0) + { + throw new IllegalArgumentException("Value must be >= 0 and <= 1 for incremental_repair_disk_headroom_reject_ratio"); + } + conf.incremental_repair_disk_headroom_reject_ratio = value; + } + + @VisibleForTesting + public static void setPartitioner(String name) + { + partitioner = FBUtilities.newPartitioner(name); + } } diff --git a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java index 6d2848dad2d2..f70c1211e969 100644 --- a/src/java/org/apache/cassandra/cql3/UntypedResultSet.java +++ b/src/java/org/apache/cassandra/cql3/UntypedResultSet.java @@ -382,6 +382,14 @@ public long getLong(String column) return LongType.instance.compose(data.get(column)); } + // this function will return the default value if the row doesn't have that column or the column data is null + // This function is used to avoid the nullpointerexception + public long getLong(String column, long ifNull) + { + ByteBuffer bytes = data.get(column); + return bytes == null ? ifNull : LongType.instance.compose(bytes); + } + public Set getSet(String column, AbstractType type) { ByteBuffer raw = data.get(column); diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index a41b8dd4c402..90671c1bd6c9 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -143,7 +143,7 @@ public abstract class ModificationStatement implements CQLStatement.SingleKeyspa public static final String CUSTOM_EXPRESSIONS_NOT_ALLOWED = "Custom index expressions cannot be used in WHERE clauses for UPDATE or DELETE statements"; - private static final ColumnIdentifier CAS_RESULT_COLUMN = new ColumnIdentifier("[applied]", false); + public static final ColumnIdentifier CAS_RESULT_COLUMN = new ColumnIdentifier("[applied]", false); protected final StatementType type; diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java index 2e643e5472f7..9ec04f502bc6 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/TableAttributes.java @@ -27,6 +27,7 @@ import org.apache.cassandra.cql3.statements.PropertyDefinitions; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; +import org.apache.cassandra.schema.AutoRepairParams; import org.apache.cassandra.schema.CachingParams; import org.apache.cassandra.schema.CompactionParams; import org.apache.cassandra.schema.CompressionParams; @@ -196,6 +197,9 @@ private TableParams build(TableParams.Builder builder) if (hasOption(Option.TRANSACTIONAL_MIGRATION_FROM)) builder.transactionalMigrationFrom(TransactionalMigrationFromMode.fromString(getString(Option.TRANSACTIONAL_MIGRATION_FROM))); + if (hasOption(Option.AUTO_REPAIR)) + builder.automatedRepair(AutoRepairParams.fromMap(getMap(Option.AUTO_REPAIR))); + return builder.build(); } diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java index 7e052cd8c412..a36d0fc49b07 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionManager.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionManager.java @@ -1015,7 +1015,7 @@ private static void mutateFullyContainedSSTables(ColumnFamilyStore cfs, Set fullyContainedSSTables = findSSTablesToAnticompact(sstableIterator, normalizedRanges, sessionID); - cfs.metric.bytesMutatedAnticompaction.inc(SSTableReader.getTotalBytes(fullyContainedSSTables)); + cfs.metric.bytesMutatedAnticompaction.mark(SSTableReader.getTotalBytes(fullyContainedSSTables)); cfs.getCompactionStrategyManager().mutateRepaired(fullyContainedSSTables, UNREPAIRED_SSTABLE, sessionID, isTransient); // since we're just re-writing the sstable metdata for the fully contained sstables, we don't want // them obsoleted when the anti-compaction is complete. So they're removed from the transaction here @@ -1862,7 +1862,7 @@ private void doAntiCompaction(ColumnFamilyStore cfs, // repairedAt values for these, we still avoid anti-compacting already repaired sstables, as we currently don't // make use of any actual repairedAt value and splitting up sstables just for that is not worth it at this point. Set unrepairedSSTables = sstables.stream().filter((s) -> !s.isRepaired()).collect(Collectors.toSet()); - cfs.metric.bytesAnticompacted.inc(SSTableReader.getTotalBytes(unrepairedSSTables)); + cfs.metric.bytesAnticompacted.mark(SSTableReader.getTotalBytes(unrepairedSSTables)); Collection> groupedSSTables = cfs.getCompactionStrategyManager().groupSSTablesForAntiCompaction(unrepairedSSTables); // iterate over sstables to check if the full / transient / unrepaired ranges intersect them. diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java index 36baaacef5ea..61f64ce3d0af 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReceiver.java @@ -23,8 +23,10 @@ import java.util.List; import java.util.Set; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -190,7 +192,7 @@ private boolean hasCDC(ColumnFamilyStore cfs) return cfs.metadata().params.cdc; } - // returns true iif it is a cdc table and cdc on repair is enabled. + // returns true if it is a cdc table and cdc on repair is enabled. private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) { return DatabaseDescriptor.isCDCOnRepairEnabled() && hasCDC(cfs); @@ -205,11 +207,12 @@ private boolean cdcRequiresWriteCommitLog(ColumnFamilyStore cfs) * For CDC-enabled tables and write path for CDC is enabled, we want to ensure that the mutations are * run through the CommitLog, so they can be archived by the CDC process on discard. */ - private boolean requiresWritePath(ColumnFamilyStore cfs) + @VisibleForTesting + boolean requiresWritePath(ColumnFamilyStore cfs) { return cdcRequiresWriteCommitLog(cfs) || cfs.streamToMemtable() - || (session.streamOperation().requiresViewBuild() && hasViews(cfs)); + || (session.streamOperation().requiresViewBuild() && hasViews(cfs) && DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()); } private void sendThroughWritePath(ColumnFamilyStore cfs, Collection readers) diff --git a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java index 50f3368b2001..e2520659b810 100644 --- a/src/java/org/apache/cassandra/locator/InetAddressAndPort.java +++ b/src/java/org/apache/cassandra/locator/InetAddressAndPort.java @@ -25,6 +25,9 @@ import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; import java.util.List; import java.util.stream.Collectors; @@ -32,8 +35,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; import com.google.common.net.HostAndPort; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -59,6 +66,7 @@ public final class InetAddressAndPort extends InetSocketAddress implements Comparable, Serializable { private static final long serialVersionUID = 0; + private static final Logger logger = LoggerFactory.getLogger(InetAddressAndPort.class); //Store these here to avoid requiring DatabaseDescriptor to be loaded. DatabaseDescriptor will set //these when it loads the config. A lot of unit tests won't end up loading DatabaseDescriptor. @@ -323,6 +331,52 @@ public static void initializeDefaultPort(int port) defaultPort = port; } + public static List stringify(Iterable endpoints) + { + return stringify(endpoints, true); + } + + public static List stringify(Iterable endpoints, boolean withPort) + { + List stringEndpoints = new ArrayList<>(); + for (InetAddressAndPort ep : endpoints) + { + stringEndpoints.add(ep.getHostAddress(withPort)); + } + return stringEndpoints; + } + + /** + * Parses a comma-separated list of hosts to a set of {@link InetAddressAndPort} + * + * @param value the comma-separated list of hosts to parse + * @param failOnError whether to fail when encountering an invalid hostname + * @return the set of parsed {@link InetAddressAndPort} + */ + public static Set parseHosts(String value, boolean failOnError) + { + Set hosts = new HashSet<>(); + for (String host : Splitter.on(',').split(value)) + { + try + { + hosts.add(InetAddressAndPort.getByName(host)); + } + catch (UnknownHostException e) + { + if (failOnError) + { + throw new IllegalArgumentException("Failed to parse host: " + host, e); + } + else + { + logger.warn("Invalid ip address {} from input={}", host, value); + } + } + } + return hosts; + } + static int getDefaultPort() { return defaultPort; diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java new file mode 100644 index 000000000000..3ef24a9eec1b --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetrics.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.metrics; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.service.AutoRepairService; + +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; + +/** + * Metrics related to AutoRepair. + */ +public class AutoRepairMetrics +{ + public static final String TYPE_NAME = "autorepair"; + public final Gauge repairsInProgress; + public final Gauge nodeRepairTimeInSec; + public final Gauge clusterRepairTimeInSec; + public final Gauge longestUnrepairedSec; + public final Gauge repairStartLagSec; + public final Gauge succeededTokenRangesCount; + public final Gauge failedTokenRangesCount; + public final Gauge skippedTokenRangesCount; + public final Gauge skippedTablesCount; + public final Gauge totalMVTablesConsideredForRepair; + public final Gauge totalDisabledRepairTables; + public Counter repairTurnMyTurn; + public Counter repairTurnMyTurnDueToPriority; + public Counter repairTurnMyTurnForceRepair; + public Counter repairDelayedByReplica; + public Counter repairDelayedBySchedule; + + private final RepairType repairType; + + private volatile int repairStartLagSecVal; + + public AutoRepairMetrics(RepairType repairType) + { + this.repairType = repairType; + AutoRepairMetricsFactory factory = new AutoRepairMetricsFactory(repairType); + + repairsInProgress = Metrics.register(factory.createMetricName("RepairsInProgress"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).isRepairInProgress() ? 1 : 0; + } + }); + + nodeRepairTimeInSec = Metrics.register(factory.createMetricName("NodeRepairTimeInSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getNodeRepairTimeInSec(); + } + }); + + clusterRepairTimeInSec = Metrics.register(factory.createMetricName("ClusterRepairTimeInSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getClusterRepairTimeInSec(); + } + }); + + skippedTokenRangesCount = Metrics.register(factory.createMetricName("SkippedTokenRangesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getSkippedTokenRangesCount(); + } + }); + + skippedTablesCount = Metrics.register(factory.createMetricName("SkippedTablesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getSkippedTablesCount(); + } + }); + + longestUnrepairedSec = Metrics.register(factory.createMetricName("LongestUnrepairedSec"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getLongestUnrepairedSec(); + } + }); + + repairStartLagSec = Metrics.register(factory.createMetricName("RepairStartLagSec"), new Gauge() + { + public Integer getValue() + { + return repairStartLagSecVal; + } + }); + + succeededTokenRangesCount = Metrics.register(factory.createMetricName("SucceededTokenRangesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getSucceededTokenRangesCount(); + } + }); + + failedTokenRangesCount = Metrics.register(factory.createMetricName("FailedTokenRangesCount"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getFailedTokenRangesCount(); + } + }); + + repairTurnMyTurn = Metrics.counter(factory.createMetricName("RepairTurnMyTurn")); + repairTurnMyTurnDueToPriority = Metrics.counter(factory.createMetricName("RepairTurnMyTurnDueToPriority")); + repairTurnMyTurnForceRepair = Metrics.counter(factory.createMetricName("RepairTurnMyTurnForceRepair")); + + repairDelayedByReplica = Metrics.counter(factory.createMetricName("RepairDelayedByReplica")); + repairDelayedBySchedule = Metrics.counter(factory.createMetricName("RepairDelayedBySchedule")); + + totalMVTablesConsideredForRepair = Metrics.register(factory.createMetricName("TotalMVTablesConsideredForRepair"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getTotalMVTablesConsideredForRepair(); + } + }); + + totalDisabledRepairTables = Metrics.register(factory.createMetricName("TotalDisabledRepairTables"), new Gauge() + { + public Integer getValue() + { + return AutoRepair.instance.getRepairState(repairType).getTotalDisabledTablesRepairCount(); + } + }); + } + + public void recordTurn(AutoRepairUtils.RepairTurn turn) + { + switch (turn) + { + case MY_TURN: + repairTurnMyTurn.inc(); + break; + case MY_TURN_FORCE_REPAIR: + repairTurnMyTurnForceRepair.inc(); + break; + case MY_TURN_DUE_TO_PRIORITY: + repairTurnMyTurnDueToPriority.inc(); + break; + default: + throw new RuntimeException(String.format("Unrecoginized turn: %s", turn.name())); + } + this.repairStartLagSecVal = 0; + } + + /** + * Record perceived lag in scheduling repair. + *

      + * Takes the current time and subtracts it from the given last repair finish time. It then compares the difference + * with the min repair interval for this repair type, and if that value is greater than 0, records it. + */ + public void recordRepairStartLag(long lastFinishTimeInMs) + { + long now = AutoRepair.instance.currentTimeMs(); + long deltaFinish = now - lastFinishTimeInMs; + long deltaMinRepairInterval = deltaFinish - AutoRepairService.instance + .getAutoRepairConfig().getRepairMinInterval(repairType) + .toMilliseconds(); + this.repairStartLagSecVal = deltaMinRepairInterval > 0 ? (int) MILLISECONDS.toSeconds(deltaMinRepairInterval) : 0; + } + + @VisibleForTesting + protected static class AutoRepairMetricsFactory implements MetricNameFactory + { + private static final String TYPE = "AutoRepair"; + @VisibleForTesting + protected final String repairType; + + protected AutoRepairMetricsFactory(RepairType repairType) + { + this.repairType = toLowerCaseLocalized(repairType.toString()); + } + + @Override + public CassandraMetricsRegistry.MetricName createMetricName(String metricName) + { + StringBuilder mbeanName = new StringBuilder(); + mbeanName.append(DefaultNameFactory.GROUP_NAME).append(':'); + mbeanName.append("type=").append(TYPE); + mbeanName.append(",name=").append(metricName); + mbeanName.append(",repairType=").append(repairType); + + StringBuilder scope = new StringBuilder(); + scope.append("repairType=").append(repairType); + + return new CassandraMetricsRegistry.MetricName(DefaultNameFactory.GROUP_NAME, toLowerCaseLocalized(TYPE), + metricName, scope.toString(), mbeanName.toString()); + } + } +} diff --git a/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java new file mode 100644 index 000000000000..e97ce34e5a73 --- /dev/null +++ b/src/java/org/apache/cassandra/metrics/AutoRepairMetricsManager.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.metrics; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * AutoRepair metrics manager holding all the auto-repair related metrics. + */ +public class AutoRepairMetricsManager +{ + private static final Map metrics = new ConcurrentHashMap<>(); + + public static AutoRepairMetrics getMetrics(RepairType repairType) + { + return metrics.computeIfAbsent(repairType, k -> new AutoRepairMetrics(repairType)); + } +} diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index b11474875916..58540f903fde 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -154,6 +154,7 @@ public class CassandraMetricsRegistry extends MetricRegistry .add(ThreadPoolMetrics.TYPE_NAME) .add(TrieMemtableMetricsView.TYPE_NAME) .add(UnweightedCacheMetrics.TYPE_NAME) + .add(AutoRepairMetrics.TYPE_NAME) .build(); } @@ -304,9 +305,14 @@ public Counter counter(MetricName... name) } public Meter meter(MetricName... name) + { + return meter(false, name); + } + + public Meter meter(boolean gaugeCompatible, MetricName... name) { Meter meter = super.meter(name[0].getMetricName()); - Stream.of(name).forEach(n -> register(n, meter)); + Stream.of(name).forEach(n -> register(gaugeCompatible, n, meter)); return meter; } @@ -373,6 +379,11 @@ public static SnapshottingReservoir createReservoir(TimeUnit durationUnit) } public T register(MetricName name, T metric) + { + return register(false, name, metric); + } + + public T register(boolean gaugeCompatible, MetricName name, T metric) { if (metric instanceof MetricSet) throw new IllegalArgumentException("MetricSet registration using MetricName is not supported"); @@ -380,7 +391,7 @@ public T register(MetricName name, T metric) try { verifyUnknownMetric(name); - registerMBean(metric, name.getMBeanName(), MBeanWrapper.instance); + registerMBean(metric, name.getMBeanName(), MBeanWrapper.instance, gaugeCompatible); return super.register(name.getMetricName(), metric); } catch (IllegalArgumentException e) @@ -494,7 +505,7 @@ public interface MetricNameResolver @Nullable String resolve(String fullName); } - private void registerMBean(Metric metric, ObjectName name, MBeanWrapper mBeanServer) + public void registerMBean(Metric metric, ObjectName name, MBeanWrapper mBeanServer, boolean gaugeCompatible) { AbstractBean mbean; @@ -507,7 +518,18 @@ else if (metric instanceof Histogram) else if (metric instanceof Timer) mbean = new JmxTimer((Timer) metric, name, TimeUnit.SECONDS, DEFAULT_TIMER_UNIT); else if (metric instanceof Metered) - mbean = new JmxMeter((Metered) metric, name, TimeUnit.SECONDS); + { + // If a gauge compatible meter is requested, create a special implementation which + // also yields a 'Value' attribute for backwards compatibility. + if (gaugeCompatible) + { + mbean = new JmxMeterGaugeCompatible((Metered) metric, name, TimeUnit.SECONDS); + } + else + { + mbean = new JmxMeter((Metered) metric, name, TimeUnit.SECONDS); + } + } else throw new IllegalArgumentException("Unknown metric type: " + metric.getClass()); @@ -819,6 +841,29 @@ private String calculateRateUnit(TimeUnit unit) } } + public interface JmxMeterGaugeCompatibleMBean extends JmxMeterMBean, JmxGaugeMBean {} + + /** + * An implementation of {@link JmxMeter} that is compatible with {@link JmxGaugeMBean} in that it also + * implements {@link JmxGaugeMBean}. This is useful for metrics that were migrated from {@link JmxGauge} + * to {@link JmxMeter} like {@link TableMetrics#bytesAnticompacted} and + * {@link TableMetrics#bytesMutatedAnticompaction}. + */ + private static class JmxMeterGaugeCompatible extends JmxMeter implements JmxMeterGaugeCompatibleMBean + { + + private JmxMeterGaugeCompatible(Metered metric, ObjectName objectName, TimeUnit rateUnit) + { + super(metric, objectName, rateUnit); + } + + @Override + public Object getValue() + { + return getCount(); + } + } + /** * Exports a timer as a JMX MBean, check corresponding {@link org.apache.cassandra.db.virtual.model.TimerMetricRow} * for the same functionality for virtual tables. diff --git a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java index 209c5a7a8ede..e205230bd2ec 100644 --- a/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/KeyspaceMetrics.java @@ -190,6 +190,11 @@ public class KeyspaceMetrics public final Meter tooManySSTableIndexesReadWarnings; public final Meter tooManySSTableIndexesReadAborts; + public final Meter bytesAnticompacted; + public final Meter bytesMutatedAnticompaction; + public final Meter bytesPreviewed; + public final Meter tokenRangesPreviewedDesynchronized; + public final Meter bytesPreviewedDesynchronized; public final LatencyMetrics viewSSTableIntervalTree; @@ -309,6 +314,11 @@ public KeyspaceMetrics(final Keyspace ks) outOfRangeTokenPaxosRequests = createKeyspaceCounter("PaxosOutOfRangeToken"); viewSSTableIntervalTree = createLatencyMetrics("ViewSSTableIntervalTree"); + bytesAnticompacted = createKeyspaceMeter("BytesAnticompacted"); + bytesMutatedAnticompaction = createKeyspaceMeter("BytesMutatedAnticompaction"); + bytesPreviewed = createKeyspaceMeter("BytesPreviewed"); + tokenRangesPreviewedDesynchronized = createKeyspaceMeter("TokenRangesPreviewedDesynchronized"); + bytesPreviewedDesynchronized = createKeyspaceMeter("BytesPreviewedDesynchronized"); } /** diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java index 719c8af81164..2622b76daa4a 100644 --- a/src/java/org/apache/cassandra/metrics/TableMetrics.java +++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java @@ -219,9 +219,15 @@ public class TableMetrics /** number of partitions read creating merkle trees */ public final TableHistogram partitionsValidated; /** number of bytes read while doing anticompaction */ - public final Counter bytesAnticompacted; + public final TableMeter bytesAnticompacted; /** number of bytes where the whole sstable was contained in a repairing range so that we only mutated the repair status */ - public final Counter bytesMutatedAnticompaction; + public final TableMeter bytesMutatedAnticompaction; + /** number of bytes that were scanned during preview repair */ + public final TableMeter bytesPreviewed; + /** number of desynchronized token ranges that were detected during preview repair */ + public final TableMeter tokenRangesPreviewedDesynchronized; + /** number of desynchronized bytes that were detected during preview repair */ + public final TableMeter bytesPreviewedDesynchronized; /** ratio of how much we anticompact vs how much we could mutate the repair status*/ public final Gauge mutatedAnticompactionGauge; @@ -831,12 +837,15 @@ public Long getValue() bytesValidated = createTableHistogram("BytesValidated", cfs.keyspace.metric.bytesValidated, false); partitionsValidated = createTableHistogram("PartitionsValidated", cfs.keyspace.metric.partitionsValidated, false); - bytesAnticompacted = createTableCounter("BytesAnticompacted"); - bytesMutatedAnticompaction = createTableCounter("BytesMutatedAnticompaction"); + bytesAnticompacted = createTableMeter("BytesAnticompacted", cfs.keyspace.metric.bytesAnticompacted, true); + bytesMutatedAnticompaction = createTableMeter("BytesMutatedAnticompaction", cfs.keyspace.metric.bytesMutatedAnticompaction, true); + bytesPreviewed = createTableMeter("BytesPreviewed", cfs.keyspace.metric.bytesPreviewed); + tokenRangesPreviewedDesynchronized = createTableMeter("TokenRangesPreviewedDesynchronized", cfs.keyspace.metric.tokenRangesPreviewedDesynchronized); + bytesPreviewedDesynchronized = createTableMeter("BytesPreviewedDesynchronized", cfs.keyspace.metric.bytesPreviewedDesynchronized); mutatedAnticompactionGauge = createTableGauge("MutatedAnticompactionGauge", () -> { - double bytesMutated = bytesMutatedAnticompaction.getCount(); - double bytesAnticomp = bytesAnticompacted.getCount(); + double bytesMutated = bytesMutatedAnticompaction.table.getCount(); + double bytesAnticomp = bytesAnticompacted.table.getCount(); if (bytesAnticomp + bytesMutated > 0) return bytesMutated / (bytesAnticomp + bytesMutated); return 0.0; @@ -1103,16 +1112,21 @@ protected SnapshottingTimer createTableTimer(String name) protected TableMeter createTableMeter(String name, Meter keyspaceMeter) { - return createTableMeter(name, name, keyspaceMeter); + return createTableMeter(name, keyspaceMeter, false); } - protected TableMeter createTableMeter(String name, String alias, Meter keyspaceMeter) + protected TableMeter createTableMeter(String name, Meter keyspaceMeter, boolean globalMeterGaugeCompatible) + { + return createTableMeter(name, name, keyspaceMeter, globalMeterGaugeCompatible); + } + + protected TableMeter createTableMeter(String name, String alias, Meter keyspaceMeter, boolean globalMeterGaugeCompatible) { Meter meter = Metrics.meter(factory.createMetricName(name), aliasFactory.createMetricName(alias)); register(name, alias, meter); return new TableMeter(meter, keyspaceMeter, - Metrics.meter(GLOBAL_FACTORY.createMetricName(name), + Metrics.meter(globalMeterGaugeCompatible, GLOBAL_FACTORY.createMetricName(name), GLOBAL_ALIAS_FACTORY.createMetricName(alias))); } @@ -1170,10 +1184,15 @@ private TableMeter(Meter table, Meter keyspace, Meter global) } public void mark() + { + mark(1L); + } + + public void mark(long val) { for (Meter meter : all) { - meter.mark(); + meter.mark(val); } } } diff --git a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java index 872199156ee5..6323d8e2751d 100644 --- a/src/java/org/apache/cassandra/repair/PreviewRepairTask.java +++ b/src/java/org/apache/cassandra/repair/PreviewRepairTask.java @@ -26,6 +26,7 @@ import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -88,10 +89,10 @@ public Future performUnsafe(ExecutorPlus executor, Sche else { message = (previewKind == PreviewKind.REPAIRED ? "Repaired data is inconsistent\n" : "Preview complete\n") + summary; - RepairMetrics.previewFailures.inc(); if (previewKind == PreviewKind.REPAIRED) maybeSnapshotReplicas(parentSession, keyspace, result.results.get()); // we know its present as summary used it } + emitMetrics(summary); successMessage += "; " + message; coordinator.notification(message); @@ -99,6 +100,21 @@ public Future performUnsafe(ExecutorPlus executor, Sche }); } + private void emitMetrics(SyncStatSummary summary) + { + if (!summary.isEmpty()) + RepairMetrics.previewFailures.inc(); + + summary.getTotals().forEach((key, table) -> { + if (table.isCounter()) + return; + + ColumnFamilyStore cfs = Keyspace.open(key.left).getColumnFamilyStore(key.right); + cfs.metric.tokenRangesPreviewedDesynchronized.mark(table.getRanges()); + cfs.metric.bytesPreviewedDesynchronized.mark(table.getBytes()); + }); + } + private void maybeSnapshotReplicas(TimeUUID parentSession, String keyspace, List results) { if (!DatabaseDescriptor.snapshotOnRepairedDataMismatch()) diff --git a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java index 72252f56ab2d..ea9742609fed 100644 --- a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java +++ b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java @@ -123,6 +123,13 @@ public void doVerb(final Message message) sendFailureResponse(message); return; } + if (!ActiveRepairService.verifyDiskHeadroomThreshold(prepareMessage.parentRepairSession, prepareMessage.previewKind, prepareMessage.isIncremental)) + { + // error is logged in verifyDiskHeadroomThreshold + state.phase.fail("Not enough disk headroom to perform incremental repair"); + sendFailureResponse(message); + return; + } List columnFamilyStores = new ArrayList<>(prepareMessage.tableIds.size()); for (TableId tableId : prepareMessage.tableIds) diff --git a/src/java/org/apache/cassandra/repair/ValidationManager.java b/src/java/org/apache/cassandra/repair/ValidationManager.java index ca7ad3a68eea..f4229751984c 100644 --- a/src/java/org/apache/cassandra/repair/ValidationManager.java +++ b/src/java/org/apache/cassandra/repair/ValidationManager.java @@ -37,6 +37,7 @@ import org.apache.cassandra.metrics.TableMetrics; import org.apache.cassandra.metrics.TopPartitionTracker; import org.apache.cassandra.repair.state.ValidationState; +import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTree; @@ -143,6 +144,10 @@ public static void doValidation(ColumnFamilyStore cfs, Validator validator) thro { cfs.metric.bytesValidated.update(state.estimatedTotalBytes); cfs.metric.partitionsValidated.update(state.partitionsProcessed); + if (validator.getPreviewKind() != PreviewKind.NONE) + { + cfs.metric.bytesPreviewed.mark(state.estimatedTotalBytes); + } if (topPartitionCollector != null) cfs.topPartitions.merge(topPartitionCollector); } diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java new file mode 100644 index 000000000000..8c08ce7c80cb --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -0,0 +1,568 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Supplier; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.Uninterruptibles; + +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.utils.Clock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.utils.concurrent.Condition; +import org.apache.cassandra.utils.concurrent.Future; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; + +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; + +/** + * AutoRepair scheduler responsible for running different types of repairs. + */ +public class AutoRepair +{ + private static final Logger logger = LoggerFactory.getLogger(AutoRepair.class); + private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); + + @VisibleForTesting + protected static Supplier timeFunc = Clock.Global::currentTimeMillis; + + // Sleep for 5 seconds if repair finishes quickly to flush JMX metrics; it happens only for Cassandra nodes with tiny amount of data. + public static DurationSpec.IntSecondsBound SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("5s"); + + @VisibleForTesting + public Map repairStates; + + @VisibleForTesting + protected Map repairExecutors; + + protected Map repairRunnableExecutors; + + @VisibleForTesting + // Auto-repair is likely to be run on multiple nodes independently, we want to avoid running multiple repair + // sessions on overlapping datasets at the same time. Shuffling keyspaces reduces the likelihood of this happening. + protected static Consumer> shuffleFunc = java.util.Collections::shuffle; + + @VisibleForTesting + protected static BiConsumer sleepFunc = Uninterruptibles::sleepUninterruptibly; + + @VisibleForTesting + public boolean isSetupDone = false; + public static AutoRepair instance = new AutoRepair(); + + private AutoRepair() + { + // Private constructor to prevent instantiation + } + + public void setup() + { + // Ensure setup is done only once; this is only for unit tests + // For production, this method should be called only once. + synchronized (this) + { + if (isSetupDone) + { + return; + } + repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairRunnableExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class); + repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-Repair-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairRunnableExecutors.put(repairType, executorFactory().scheduled(false, "AutoRepair-RepairRunnable-" + repairType.getConfigName(), Thread.NORM_PRIORITY)); + repairStates.put(repairType, AutoRepairConfig.RepairType.getAutoRepairState(repairType)); + } + + AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig(); + AutoRepairUtils.setup(); + + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + if (config.isAutoRepairEnabled(repairType)) + AutoRepairService.instance.checkCanRun(repairType); + + repairExecutors.get(repairType).scheduleWithFixedDelay( + () -> repair(repairType), + config.getInitialSchedulerDelay(repairType).toSeconds(), + config.getRepairCheckInterval().toSeconds(), + TimeUnit.SECONDS); + } + isSetupDone = true; + } + } + + /** + * @return The current observed system time in ms. + */ + public long currentTimeMs() + { + return timeFunc.get(); + } + + // repair runs a repair session of the given type synchronously. + public void repair(AutoRepairConfig.RepairType repairType) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (!config.isAutoRepairEnabled(repairType)) + { + logger.debug("Auto-repair is disabled for repair type {}", repairType); + return; + } + AutoRepairService.instance.checkCanRun(repairType); + AutoRepairState repairState = repairStates.get(repairType); + try + { + String localDC = DatabaseDescriptor.getLocalDataCenter(); + if (config.getIgnoreDCs(repairType).contains(localDC)) + { + logger.info("Not running repair as this node belongs to datacenter {}", localDC); + return; + } + + // refresh the longest unrepaired node + repairState.setLongestUnrepairedNode(AutoRepairUtils.getHostWithLongestUnrepairTime(repairType)); + + //consistency level to use for local query + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + + // If it's too soon to run repair, don't bother checking if it's our turn. + if (tooSoonToRunRepair(repairType, repairState, config, myId)) + { + return; + } + + RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, myId); + if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == MY_TURN_FORCE_REPAIR) + { + repairState.recordTurn(turn); + // For normal auto repair, we will use primary range only repairs (Repair with -pr option). + // For some cases, we may set the auto_repair_primary_token_range_only flag to false then we will do repair + // without -pr. We may also do force repair for certain node that we want to repair all the data on one node + // When doing force repair, we want to repair without -pr. + boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) + && turn != MY_TURN_FORCE_REPAIR; + + long startTime = timeFunc.get(); + logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, + config.getRepairPrimaryTokenRangeOnly(repairType)); + AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); + + repairState.setRepairKeyspaceCount(0); + repairState.setRepairInProgress(true); + repairState.setTotalTablesConsideredForRepair(0); + repairState.setTotalMVTablesConsideredForRepair(0); + + CollectedRepairStats collectedRepairStats = new CollectedRepairStats(); + + List keyspaces = new ArrayList<>(); + Keyspace.all().forEach(keyspaces::add); + // Filter out keyspaces and tables to repair and group into a map by keyspace. + Map> keyspacesAndTablesToRepair = new LinkedHashMap<>(); + for (Keyspace keyspace : keyspaces) + { + if (!AutoRepairUtils.shouldConsiderKeyspace(keyspace)) + { + continue; + } + List tablesToBeRepairedList = retrieveTablesToBeRepaired(keyspace, config, repairType, repairState, collectedRepairStats); + keyspacesAndTablesToRepair.put(keyspace.getName(), tablesToBeRepairedList); + } + + // Separate out the keyspaces and tables to repair based on their priority, with each repair plan representing a uniquely occuring priority. + List repairPlans = PrioritizedRepairPlan.build(keyspacesAndTablesToRepair, repairType, shuffleFunc); + + // calculate the repair assignments for each priority:keyspace. + Iterator repairAssignmentsIterator = config.getTokenRangeSplitterInstance(repairType).getRepairAssignments(primaryRangeOnly, repairPlans); + + while (repairAssignmentsIterator.hasNext()) + { + KeyspaceRepairAssignments repairAssignments = repairAssignmentsIterator.next(); + List assignments = repairAssignments.getRepairAssignments(); + if (assignments.isEmpty()) + { + logger.info("Skipping repairs for priorityBucket={} for keyspace={} since it yielded no assignments", repairAssignments.getPriority(), repairAssignments.getKeyspaceName()); + continue; + } + + logger.info("Submitting repairs for priorityBucket={} for keyspace={} with assignmentCount={}", repairAssignments.getPriority(), repairAssignments.getKeyspaceName(), repairAssignments.getRepairAssignments().size()); + repairKeyspace(repairType, primaryRangeOnly, repairAssignments.getKeyspaceName(), repairAssignments.getRepairAssignments(), collectedRepairStats); + } + + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, collectedRepairStats); + } + else + { + logger.info("Waiting for my turn..."); + } + } + catch (Exception e) + { + logger.error("Exception in autorepair:", e); + } + } + + private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean primaryRangeOnly, String keyspaceName, List repairAssignments, CollectedRepairStats collectedRepairStats) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepairState repairState = repairStates.get(repairType); + + // evaluate over each keyspace's repair assignments. + repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1); + + int totalRepairAssignments = repairAssignments.size(); + long keyspaceStartTime = timeFunc.get(); + RepairAssignment previousAssignment = null; + long tableStartTime = timeFunc.get(); + int totalProcessedAssignments = 0; + Set> ranges = new HashSet<>(); + for (RepairAssignment curRepairAssignment : repairAssignments) + { + try + { + totalProcessedAssignments++; + boolean repairOneTableAtATime = !config.getRepairByKeyspace(repairType); + if (previousAssignment != null && repairOneTableAtATime && !previousAssignment.tableNames.equals(curRepairAssignment.tableNames)) + { + // In the repair assignment, all the tables are appended sequnetially. + // Check if we have a different table, and if so, we should reset the table start time. + tableStartTime = timeFunc.get(); + } + previousAssignment = curRepairAssignment; + if (!config.isAutoRepairEnabled(repairType)) + { + logger.error("Auto-repair for type {} is disabled hence not running repair", repairType); + repairState.setRepairInProgress(false); + return; + } + if (AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, keyspaceStartTime, repairAssignments.size())) + { + collectedRepairStats.skippedTokenRanges += totalRepairAssignments - totalProcessedAssignments; + logger.info("Keyspace took too much time to repair hence skipping it {}", + keyspaceName); + break; + } + if (repairOneTableAtATime && AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, tableStartTime)) + { + collectedRepairStats.skippedTokenRanges += 1; + logger.info("Table took too much time to repair hence skipping it table name {}.{}, token range {}", + keyspaceName, curRepairAssignment.tableNames, curRepairAssignment.tokenRange); + continue; + } + + Range tokenRange = curRepairAssignment.getTokenRange(); + logger.debug("Current Token Left side {}, right side {}", + tokenRange.left.toString(), + tokenRange.right.toString()); + + ranges.add(curRepairAssignment.getTokenRange()); + if ((totalProcessedAssignments % config.getRepairThreads(repairType) == 0) || + (totalProcessedAssignments == totalRepairAssignments)) + { + boolean success = false; + int retryCount = 0; + Future f = null; + while (retryCount <= config.getRepairMaxRetries(repairType)) + { + RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); + RepairProgressListener listener = new RepairProgressListener(repairType); + task.addProgressListener(listener); + f = repairRunnableExecutors.get(repairType).submit(task); + try + { + long jobStartTime = timeFunc.get(); + listener.await(config.getRepairSessionTimeout(repairType)); + success = listener.isSuccess(); + soakAfterRepair(jobStartTime, config.getRepairTaskMinDuration().toMilliseconds()); + } + catch (InterruptedException e) + { + logger.error("Exception in cond await:", e); + } + if (success) + { + break; + } + else if (retryCount < config.getRepairMaxRetries(repairType)) + { + boolean cancellationStatus = f.cancel(true); + logger.warn("Repair failed for range {}-{} for {} tables {} with cancellationStatus: {} retrying after {} seconds...", + tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), + cancellationStatus, config.getRepairRetryBackoff(repairType).toSeconds()); + sleepFunc.accept(config.getRepairRetryBackoff(repairType).toSeconds(), TimeUnit.SECONDS); + } + retryCount++; + } + //check repair status + if (success) + { + logger.info("Repair completed for range {}-{} for {} tables {}, total assignments: {}," + + "processed assignments: {}", tokenRange.left, tokenRange.right, + keyspaceName, curRepairAssignment.getTableNames(), totalRepairAssignments, totalProcessedAssignments); + collectedRepairStats.succeededTokenRanges += ranges.size(); + } + else + { + boolean cancellationStatus = true; + if (f != null) + { + cancellationStatus = f.cancel(true); + } + //in the future we can add retry, etc. + logger.error("Repair failed for range {}-{} for {} tables {} after {} retries, total assignments: {}," + + "processed assignments: {}, cancellationStatus: {}", tokenRange.left, tokenRange.right, keyspaceName, + curRepairAssignment.getTableNames(), retryCount, totalRepairAssignments, totalProcessedAssignments, cancellationStatus); + collectedRepairStats.failedTokenRanges += ranges.size(); + } + ranges.clear(); + } + logger.info("Repair completed for {} tables {}, range {}", keyspaceName, curRepairAssignment.getTableNames(), curRepairAssignment.getTokenRange()); + } + catch (Exception e) + { + logger.error("Exception while repairing keyspace {}:", keyspaceName, e); + } + } + } + + private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType, AutoRepairState repairState, AutoRepairConfig config, UUID myId) + { + if (repairState.getLastRepairTime() == 0) + { + // the node has either just boooted or has not run repair before, + // we should check for the node's repair history in the DB + repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType, myId)); + } + /* + * check if it is too soon to run repair. one of the reason we + * should not run frequent repair is that repair triggers + * memtable flush + */ + long timeElapsedSinceLastRepair = TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - repairState.getLastRepairTime()); + if (timeElapsedSinceLastRepair < config.getRepairMinInterval(repairType).toSeconds()) + { + logger.info("Too soon to run repair, last repair was done {} seconds ago", + timeElapsedSinceLastRepair); + return true; + } + return false; + } + + private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairConfig config, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, CollectedRepairStats collectedRepairStats) + { + Tables tables = keyspace.getMetadata().tables; + List tablesToBeRepaired = new ArrayList<>(); + Iterator iter = tables.iterator(); + while (iter.hasNext()) + { + repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair() + 1); + TableMetadata tableMetadata = iter.next(); + String tableName = tableMetadata.name; + + ColumnFamilyStore columnFamilyStore = keyspace.getColumnFamilyStore(tableName); + if (!columnFamilyStore.metadata().params.autoRepair.repairEnabled(repairType)) + { + logger.info("Repair is disabled for keyspace {} for tables: {}", keyspace.getName(), tableName); + repairState.setTotalDisabledTablesRepairCount(repairState.getTotalDisabledTablesRepairCount() + 1); + collectedRepairStats.skippedTables++; + continue; + } + + // this is done to make autorepair safe as running repair on table with more sstables + // may have its own challenges + int totalSSTables = columnFamilyStore.getLiveSSTables().size(); + if (totalSSTables > config.getRepairSSTableCountHigherThreshold(repairType)) + { + logger.info("Too many SSTables for repair for table {}.{}" + + "totalSSTables {}", keyspace.getName(), tableName, totalSSTables); + collectedRepairStats.skippedTables++; + continue; + } + + tablesToBeRepaired.add(tableName); + + // See if we should repair MVs as well that are associated with this given table + List mvs = AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata); + if (!mvs.isEmpty()) + { + tablesToBeRepaired.addAll(mvs); + repairState.setTotalMVTablesConsideredForRepair(repairState.getTotalMVTablesConsideredForRepair() + mvs.size()); + } + } + return tablesToBeRepaired; + } + + private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, + long startTime, CollectedRepairStats collectedRepairStats) throws InterruptedException + { + //if it was due to priority then remove it now + if (turn == MY_TURN_DUE_TO_PRIORITY) + { + logger.info("Remove current host from priority list"); + AutoRepairUtils.removePriorityStatus(repairType, myId); + } + + repairState.setFailedTokenRangesCount(collectedRepairStats.failedTokenRanges); + repairState.setSucceededTokenRangesCount(collectedRepairStats.succeededTokenRanges); + repairState.setSkippedTokenRangesCount(collectedRepairStats.skippedTokenRanges); + repairState.setSkippedTablesCount(collectedRepairStats.skippedTables); + repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); + long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); + logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + + "repairTokenRangesSkipCount {}, repairTablesSkipCount {}", repairType, timeInHours, repairState.getRepairKeyspaceCount(), + repairState.getSucceededTokenRangesCount(), repairState.getFailedTokenRangesCount(), + repairState.getSkippedTokenRangesCount(), repairState.getSkippedTablesCount()); + if (repairState.getLastRepairTime() != 0) + { + repairState.setClusterRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - + repairState.getLastRepairTime())); + logger.info("Cluster repair time for repair type {}: {} day(s)", repairType, + TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); + } + repairState.setLastRepairTime(timeFunc.get()); + if (timeInHours == 0 && SLEEP_IF_REPAIR_FINISHES_QUICKLY.toSeconds() > 0) + { + //If repair finished quickly, happens for an empty instance, in such case + //wait for some duration so that the JMX metrics can detect the repairInProgress + logger.info("Wait for {} for repair type {}.", SLEEP_IF_REPAIR_FINISHES_QUICKLY, repairType); + Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()); + } + repairState.setRepairInProgress(false); + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); + } + + public AutoRepairState getRepairState(AutoRepairConfig.RepairType repairType) + { + return repairStates.get(repairType); + } + + private void soakAfterRepair(long startTimeMilis, long minDurationMilis) + { + long currentTime = timeFunc.get(); + long timeElapsed = currentTime - startTimeMilis; + if (timeElapsed < minDurationMilis) + { + long timeToSoak = minDurationMilis - timeElapsed; + logger.info("Soaking for {} ms after repair", timeToSoak); + sleepFunc.accept(timeToSoak, TimeUnit.MILLISECONDS); + } + } + + static class CollectedRepairStats + { + int failedTokenRanges = 0; + int succeededTokenRanges = 0; + int skippedTokenRanges = 0; + int skippedTables = 0; + } + + @VisibleForTesting + protected static class RepairProgressListener implements ProgressListener + { + private final AutoRepairConfig.RepairType repairType; + @VisibleForTesting + protected boolean success; + @VisibleForTesting + protected final Condition condition = newOneTimeCondition(); + + public RepairProgressListener(AutoRepairConfig.RepairType repairType) + { + this.repairType = repairType; + } + + public void await(DurationSpec.IntSecondsBound repairSessionTimeout) throws InterruptedException + { + //if for some reason we don't hear back on repair progress for sometime + if (!condition.await(repairSessionTimeout.toSeconds(), TimeUnit.SECONDS)) + { + success = false; + } + } + + public boolean isSuccess() + { + return success; + } + + @Override + public void progress(String tag, ProgressEvent event) + { + ProgressEventType type = event.getType(); + String message = String.format("[%s] %s", format.format(timeFunc.get()), event.getMessage()); + if (type == ProgressEventType.ERROR) + { + logger.error("Repair failure for repair {}: {}", repairType.toString(), message); + success = false; + condition.signalAll(); + } + if (type == ProgressEventType.PROGRESS) + { + message = message + " (progress: " + (int) event.getProgressPercentage() + "%)"; + logger.debug("Repair progress for repair {}: {}", repairType.toString(), message); + } + if (type == ProgressEventType.COMPLETE) + { + logger.debug("Repair completed for repair {}: {}", repairType.toString(), message); + success = true; + condition.signalAll(); + } + } + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java new file mode 100644 index 000000000000..9d842888d585 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.io.Serializable; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; + +import javax.annotation.Nonnull; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; + +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.utils.LocalizeString; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.utils.FBUtilities; + +/** + * Defines configurations for AutoRepair. + */ +public class AutoRepairConfig implements Serializable +{ + // Enable/Disable the auto-repair scheduler. + // If set to false, the scheduler thread will not be started. + // If set to true, the repair scheduler thread will be created. The thread will + // check for secondary configuration available for each repair type (full, incremental, + // and preview_repaired), and based on that, it will schedule repairs. + public volatile Boolean enabled; + // Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule + // repairs. + public final DurationSpec.IntSecondsBound repair_check_interval = new DurationSpec.IntSecondsBound("5m"); + // The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata + // for a specified duration to ensure they are indeed removed before adjustments are made to the schedule. + public volatile DurationSpec.IntSecondsBound history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("2h"); + // Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming + // the node by scheduling too many repair tasks in a short period of time. + public volatile DurationSpec.LongSecondsBound repair_task_min_duration = new DurationSpec.LongSecondsBound("5s"); + + // global_settings overides Options.defaultOptions for all repair types + public volatile Options global_settings; + + public static final Class DEFAULT_SPLITTER = RepairTokenRangeSplitter.class; + + // make transient so gets consturcted in the implementation. + private final transient Map tokenRangeSplitters = new EnumMap<>(RepairType.class); + + public enum RepairType implements Serializable + { + FULL, + INCREMENTAL, + PREVIEW_REPAIRED; + + private final String configName; + + RepairType() + { + this.configName = LocalizeString.toLowerCaseLocalized(name()); + } + + /** + * @return Format of the repair type as it should be represented in configuration. + * Canonically this is the enum name in lowerCase. + */ + public String getConfigName() + { + return configName; + } + + public static AutoRepairState getAutoRepairState(RepairType repairType) + { + switch (repairType) + { + case FULL: + return new FullRepairState(); + case INCREMENTAL: + return new IncrementalRepairState(); + case PREVIEW_REPAIRED: + return new PreviewRepairedState(); + } + + throw new IllegalArgumentException("Invalid repair type: " + repairType); + } + + /** + * Case-insensitive parsing of the repair type string into {@link RepairType} + * + * @param repairTypeStr the repair type string + * @return the {@link RepairType} represented by the {@code repairTypeStr} string + * @throws IllegalArgumentException when the repair type string does not match any repair type + */ + public static RepairType parse(String repairTypeStr) + { + return RepairType.valueOf(LocalizeString.toUpperCaseLocalized(Objects.requireNonNull(repairTypeStr, "repairTypeStr cannot be null"))); + } + } + + // repair_type_overrides overrides the global_settings for a specific repair type. String used as key instead + // of enum to allow lower case key in yaml. + public volatile ConcurrentMap repair_type_overrides = Maps.newConcurrentMap(); + + public AutoRepairConfig() + { + this(false); + } + + public AutoRepairConfig(boolean enabled) + { + this.enabled = enabled; + global_settings = Options.getDefaultOptions(); + } + + public DurationSpec.IntSecondsBound getRepairCheckInterval() + { + return repair_check_interval; + } + + public boolean isAutoRepairSchedulingEnabled() + { + return enabled; + } + + @VisibleForTesting + public void setAutoRepairSchedulingEnabled(boolean enabled) + { + this.enabled = enabled; + } + + public DurationSpec.IntSecondsBound getAutoRepairHistoryClearDeleteHostsBufferInterval() + { + return history_clear_delete_hosts_buffer_interval; + } + + public void startScheduler() + { + enabled = true; + AutoRepair.instance.setup(); + } + + public void setAutoRepairHistoryClearDeleteHostsBufferInterval(String duration) + { + history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound(duration); + } + + public DurationSpec.LongSecondsBound getRepairTaskMinDuration() + { + return repair_task_min_duration; + } + + public void setRepairTaskMinDuration(String duration) + { + repair_task_min_duration = new DurationSpec.LongSecondsBound(duration); + } + + public boolean isAutoRepairEnabled(RepairType repairType) + { + return enabled && applyOverrides(repairType, opt -> opt.enabled); + } + + public void setAutoRepairEnabled(RepairType repairType, boolean enabled) + { + getOptions(repairType).enabled = enabled; + } + + public void setRepairByKeyspace(RepairType repairType, boolean repairByKeyspace) + { + getOptions(repairType).repair_by_keyspace = repairByKeyspace; + } + + public boolean getRepairByKeyspace(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_by_keyspace); + } + + public int getRepairThreads(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.number_of_repair_threads); + } + + public void setRepairThreads(RepairType repairType, int repairThreads) + { + getOptions(repairType).number_of_repair_threads = repairThreads; + } + + public DurationSpec.IntSecondsBound getRepairMinInterval(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.min_repair_interval); + } + + public void setRepairMinInterval(RepairType repairType, String minRepairInterval) + { + getOptions(repairType).min_repair_interval = new DurationSpec.IntSecondsBound(minRepairInterval); + } + + public int getRepairSSTableCountHigherThreshold(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.sstable_upper_threshold); + } + + public void setRepairSSTableCountHigherThreshold(RepairType repairType, int sstableHigherThreshold) + { + getOptions(repairType).sstable_upper_threshold = sstableHigherThreshold; + } + + public DurationSpec.IntSecondsBound getAutoRepairTableMaxRepairTime(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.table_max_repair_time); + } + + public void setAutoRepairTableMaxRepairTime(RepairType repairType, String autoRepairTableMaxRepairTime) + { + getOptions(repairType).table_max_repair_time = new DurationSpec.IntSecondsBound(autoRepairTableMaxRepairTime); + } + + public Set getIgnoreDCs(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.ignore_dcs); + } + + public void setIgnoreDCs(RepairType repairType, Set ignoreDCs) + { + getOptions(repairType).ignore_dcs = ignoreDCs; + } + + public boolean getRepairPrimaryTokenRangeOnly(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_primary_token_range_only); + } + + public void setRepairPrimaryTokenRangeOnly(RepairType repairType, boolean primaryTokenRangeOnly) + { + getOptions(repairType).repair_primary_token_range_only = primaryTokenRangeOnly; + } + + public int getParallelRepairPercentage(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.parallel_repair_percentage); + } + + public void setParallelRepairPercentage(RepairType repairType, int percentage) + { + getOptions(repairType).parallel_repair_percentage = percentage; + } + + public int getParallelRepairCount(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.parallel_repair_count); + } + + public void setParallelRepairCount(RepairType repairType, int count) + { + getOptions(repairType).parallel_repair_count = count; + } + + public boolean getAllowParallelReplicaRepair(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair); + } + + public void setAllowParallelReplicaRepair(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair = enabled; + } + + public boolean getAllowParallelReplicaRepairAcrossSchedules(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.allow_parallel_replica_repair_across_schedules); + } + + public void setAllowParallelReplicaRepairAcrossSchedules(RepairType repairType, boolean enabled) + { + getOptions(repairType).allow_parallel_replica_repair_across_schedules = enabled; + } + + public boolean getMaterializedViewRepairEnabled(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.materialized_view_repair_enabled); + } + + public void setMaterializedViewRepairEnabled(RepairType repairType, boolean enabled) + { + getOptions(repairType).materialized_view_repair_enabled = enabled; + } + + public void setForceRepairNewNode(RepairType repairType, boolean forceRepairNewNode) + { + getOptions(repairType).force_repair_new_node = forceRepairNewNode; + } + + public boolean getForceRepairNewNode(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.force_repair_new_node); + } + + public ParameterizedClass getTokenRangeSplitter(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.token_range_splitter); + } + + public IAutoRepairTokenRangeSplitter getTokenRangeSplitterInstance(RepairType repairType) + { + return tokenRangeSplitters.computeIfAbsent(repairType, + key -> newAutoRepairTokenRangeSplitter(key, getTokenRangeSplitter(key))); + } + + public void setInitialSchedulerDelay(RepairType repairType, String initialSchedulerDelay) + { + getOptions(repairType).initial_scheduler_delay = new DurationSpec.IntSecondsBound(initialSchedulerDelay); + } + + public DurationSpec.IntSecondsBound getInitialSchedulerDelay(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.initial_scheduler_delay); + } + + public DurationSpec.IntSecondsBound getRepairSessionTimeout(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_session_timeout); + } + + public void setRepairSessionTimeout(RepairType repairType, String repairSessionTimeout) + { + getOptions(repairType).repair_session_timeout = new DurationSpec.IntSecondsBound(repairSessionTimeout); + } + + public int getRepairMaxRetries(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_max_retries); + } + + public void setRepairMaxRetries(RepairType repairType, int maxRetries) + { + getOptions(repairType).repair_max_retries = maxRetries; + } + + public DurationSpec.LongSecondsBound getRepairRetryBackoff(RepairType repairType) + { + return applyOverrides(repairType, opt -> opt.repair_retry_backoff); + } + + public void setRepairRetryBackoff(RepairType repairType, String interval) + { + getOptions(repairType).repair_retry_backoff = new DurationSpec.LongSecondsBound(interval); + } + + @VisibleForTesting + static IAutoRepairTokenRangeSplitter newAutoRepairTokenRangeSplitter(RepairType repairType, ParameterizedClass parameterizedClass) throws ConfigurationException + { + try + { + Class tokenRangeSplitterClass; + final String className; + if (parameterizedClass.class_name != null && !parameterizedClass.class_name.isEmpty()) + { + className = parameterizedClass.class_name.contains(".") ? + parameterizedClass.class_name : + "org.apache.cassandra.repair.autorepair." + parameterizedClass.class_name; + tokenRangeSplitterClass = FBUtilities.classForName(className, "token_range_splitter"); + } + else + { + // If token_range_splitter.class_name is not defined, just use default, this is for convenience. + tokenRangeSplitterClass = AutoRepairConfig.DEFAULT_SPLITTER; + } + try + { + Map parameters = parameterizedClass.parameters != null ? parameterizedClass.parameters : Collections.emptyMap(); + // first attempt to initialize with RepairType and Map arguments. + return tokenRangeSplitterClass.getConstructor(RepairType.class, Map.class).newInstance(repairType, parameters); + } + catch (NoSuchMethodException nsme) + { + // fall back on no argument constructor. + return tokenRangeSplitterClass.getConstructor().newInstance(); + } + } + catch (Exception ex) + { + throw new ConfigurationException("Unable to create instance of IAutoRepairTokenRangeSplitter", ex); + } + } + + // Options configures auto-repair behavior for a given repair type. + // All fields can be modified dynamically. + public static class Options implements Serializable + { + // defaultOptions defines the default auto-repair behavior when no overrides are defined + @VisibleForTesting + private static Map defaultOptions; + + private static Map initializeDefaultOptions() + { + Map options = new EnumMap<>(AutoRepairConfig.RepairType.class); + options.put(AutoRepairConfig.RepairType.FULL, getDefaultOptions()); + options.put(RepairType.INCREMENTAL, getDefaultOptions()); + options.put(RepairType.PREVIEW_REPAIRED, getDefaultOptions()); + + return options; + } + + public static Map getDefaultOptionsMap() + { + if (defaultOptions == null) + { + synchronized (AutoRepairConfig.class) + { + if (defaultOptions == null) + { + defaultOptions = initializeDefaultOptions(); + } + } + } + return defaultOptions; + } + + public Options() + { + } + + @VisibleForTesting + protected static Options getDefaultOptions() + { + Options opts = new Options(); + + opts.enabled = false; + opts.repair_by_keyspace = true; + opts.number_of_repair_threads = 1; + opts.parallel_repair_count = 3; + opts.parallel_repair_percentage = 3; + opts.allow_parallel_replica_repair = false; + opts.allow_parallel_replica_repair_across_schedules = true; + opts.sstable_upper_threshold = 10000; + opts.ignore_dcs = new HashSet<>(); + opts.repair_primary_token_range_only = true; + opts.force_repair_new_node = false; + opts.table_max_repair_time = new DurationSpec.IntSecondsBound("6h"); + opts.materialized_view_repair_enabled = false; + opts.token_range_splitter = new ParameterizedClass(DEFAULT_SPLITTER.getName(), Collections.emptyMap()); + opts.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5m"); + opts.repair_session_timeout = new DurationSpec.IntSecondsBound("3h"); + opts.min_repair_interval = new DurationSpec.IntSecondsBound("24h"); + + return opts; + } + + // Enable/Disable full or incremental or previewed_repair auto repair + public volatile Boolean enabled; + // If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired + // individually. + public volatile Boolean repair_by_keyspace; + // Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool + // repair. + public volatile Integer number_of_repair_threads; + // Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used. + public volatile Integer parallel_repair_count; + // Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value + // is used. Recommendation is that the repair cycle on the cluster should finish within gc_grace_seconds. + public volatile Integer parallel_repair_percentage; + // Whether to allow a node to take its turn running repair while one or more of its replicas are running repair. + // Defaults to false, as running repairs concurrently on replicas can increase load and also cause + // anticompaction conflicts while running incremental repair. + public volatile Boolean allow_parallel_replica_repair; + // An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself) + // are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will + // prevent starting incremental repairs for this node. Defaults to true and is only evaluated when + // allow_parallel_replica_repair is false. + public volatile Boolean allow_parallel_replica_repair_across_schedules; + // Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good + // tables. + public volatile Integer sstable_upper_threshold; + // Minimum duration between repairing the same node again. This is useful for tiny clusters, such as + // clusters with 5 nodes that finish repairs quickly. The default is 24 hours. This means that if the scheduler + // completes one round on all nodes in less than 24 hours, it will not start a new repair round on a given node + // until 24 hours have passed since the last repair. + public volatile DurationSpec.IntSecondsBound min_repair_interval; + // Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data + // centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded + // data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to + // not run repair schedule in certain data centers. + public volatile Set ignore_dcs; + // Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults + // to true. General advice is to keep this true. + public volatile Boolean repair_primary_token_range_only; + // Force immediate repair on new nodes after they join the ring. + public volatile Boolean force_repair_new_node; + // Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the + // next table. + public volatile DurationSpec.IntSecondsBound table_max_repair_time; + // Repairs materialized views if true. + public volatile Boolean materialized_view_repair_enabled; + /** + * Splitter implementation to use for generating repair assignments. + *

      + * The default is {@link RepairTokenRangeSplitter}. The class should implement {@link IAutoRepairTokenRangeSplitter} + * and have a constructor accepting ({@link RepairType}, {@link java.util.Map}) + */ + public volatile ParameterizedClass token_range_splitter; + // After a node restart, wait for this much delay before scheduler starts running repair; this is to avoid starting repair immediately after a node restart. + public volatile DurationSpec.IntSecondsBound initial_scheduler_delay; + // Timeout for retrying stuck repair sessions. + public volatile DurationSpec.IntSecondsBound repair_session_timeout; + // Maximum number of retries for a repair session. + public volatile Integer repair_max_retries = 3; + // Backoff time before retrying a repair session. + public volatile DurationSpec.LongSecondsBound repair_retry_backoff = new DurationSpec.LongSecondsBound("30s"); + + public String toString() + { + return "Options{" + + "enabled=" + enabled + + ", repair_by_keyspace=" + repair_by_keyspace + + ", number_of_repair_threads=" + number_of_repair_threads + + ", parallel_repair_count=" + parallel_repair_count + + ", parallel_repair_percentage=" + parallel_repair_percentage + + ", allow_parallel_replica_repair=" + allow_parallel_replica_repair + + ", allow_parallel_replica_repair_across_schedules=" + allow_parallel_replica_repair_across_schedules + + ", sstable_upper_threshold=" + sstable_upper_threshold + + ", min_repair_interval=" + min_repair_interval + + ", ignore_dcs=" + ignore_dcs + + ", repair_primary_token_range_only=" + repair_primary_token_range_only + + ", force_repair_new_node=" + force_repair_new_node + + ", table_max_repair_time=" + table_max_repair_time + + ", materialized_view_repair_enabled=" + materialized_view_repair_enabled + + ", token_range_splitter=" + token_range_splitter + + ", intial_scheduler_delay=" + initial_scheduler_delay + + ", repair_session_timeout=" + repair_session_timeout + + '}'; + } + } + + @Nonnull + protected Options getOptions(RepairType repairType) + { + return repair_type_overrides.computeIfAbsent(repairType.getConfigName(), k -> new Options()); + } + + private static T getOverride(Options options, Function optionSupplier) + { + return options != null ? optionSupplier.apply(options) : null; + } + + @VisibleForTesting + protected T applyOverrides(RepairType repairType, Function optionSupplier) + { + // Check option by repair type first + Options repairTypeOverrides = getOptions(repairType); + T val = optionSupplier.apply(repairTypeOverrides); + + if (val != null) + return val; + + // Check option in global settings + if (global_settings != null) + { + val = getOverride(global_settings, optionSupplier); + + if (val != null) + return val; + } + + // Otherwise check defaults + return getOverride(Options.getDefaultOptionsMap().get(repairType), optionSupplier); + } + + public String toString() + { + return "AutoRepairConfig{" + + "enabled=" + enabled + + ", repair_check_interval=" + repair_check_interval + + ", history_clear_delete_hosts_buffer_interval=" + history_clear_delete_hosts_buffer_interval + + ", repair_task_min_duration=" + repair_task_min_duration + + ", global_settings=" + global_settings + + ", repair_type_overrides=" + repair_type_overrides + + "}"; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java new file mode 100644 index 000000000000..6822f20cf023 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.view.TableViews; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.Clock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * AutoRepairState represents the state of automated repair for a given repair type. + */ +public abstract class AutoRepairState +{ + protected static final Logger logger = LoggerFactory.getLogger(AutoRepairState.class); + private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); + @VisibleForTesting + protected static Supplier timeFunc = Clock.Global::currentTimeMillis; + + @VisibleForTesting + protected final RepairType repairType; + @VisibleForTesting + protected int totalTablesConsideredForRepair = 0; + @VisibleForTesting + protected long lastRepairTimeInMs; + @VisibleForTesting + protected int nodeRepairTimeInSec = 0; + @VisibleForTesting + protected int clusterRepairTimeInSec = 0; + @VisibleForTesting + protected boolean repairInProgress = false; + @VisibleForTesting + protected int repairKeyspaceCount = 0; + @VisibleForTesting + protected int totalMVTablesConsideredForRepair = 0; + @VisibleForTesting + protected int totalDisabledTablesRepairCount = 0; + @VisibleForTesting + protected int failedTokenRangesCount = 0; + @VisibleForTesting + protected int succeededTokenRangesCount = 0; + @VisibleForTesting + protected int skippedTokenRangesCount = 0; + @VisibleForTesting + protected int skippedTablesCount = 0; + @VisibleForTesting + protected AutoRepairHistory longestUnrepairedNode; + protected final AutoRepairMetrics metrics; + + protected AutoRepairState(RepairType repairType) + { + metrics = AutoRepairMetricsManager.getMetrics(repairType); + this.repairType = repairType; + } + + public abstract RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly); + + protected RepairCoordinator getRepairRunnable(String keyspace, RepairOption options) + { + return new RepairCoordinator(StorageService.instance, StorageService.nextRepairCommand.incrementAndGet(), + options, keyspace); + } + + public long getLastRepairTime() + { + return lastRepairTimeInMs; + } + + public void setTotalTablesConsideredForRepair(int count) + { + totalTablesConsideredForRepair = count; + } + + public int getTotalTablesConsideredForRepair() + { + return totalTablesConsideredForRepair; + } + + public void setLastRepairTime(long lastRepairTime) + { + lastRepairTimeInMs = lastRepairTime; + } + + public int getClusterRepairTimeInSec() + { + return clusterRepairTimeInSec; + } + + public int getNodeRepairTimeInSec() + { + return nodeRepairTimeInSec; + } + + public void setRepairInProgress(boolean repairInProgress) + { + this.repairInProgress = repairInProgress; + } + + public boolean isRepairInProgress() + { + return repairInProgress; + } + + public int getLongestUnrepairedSec() + { + if (longestUnrepairedNode == null) + { + return 0; + } + return (int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - longestUnrepairedNode.getLastRepairFinishTime()); + } + + public void setTotalMVTablesConsideredForRepair(int count) + { + totalMVTablesConsideredForRepair = count; + } + + public int getTotalMVTablesConsideredForRepair() + { + return totalMVTablesConsideredForRepair; + } + + public void setNodeRepairTimeInSec(int elapsed) + { + nodeRepairTimeInSec = elapsed; + } + + public void setClusterRepairTimeInSec(int seconds) + { + clusterRepairTimeInSec = seconds; + } + + public void setRepairKeyspaceCount(int count) + { + repairKeyspaceCount = count; + } + + public int getRepairKeyspaceCount() + { + return repairKeyspaceCount; + } + + public void setLongestUnrepairedNode(AutoRepairHistory longestUnrepairedNode) + { + this.longestUnrepairedNode = longestUnrepairedNode; + } + + public void setFailedTokenRangesCount(int count) + { + failedTokenRangesCount = count; + } + + public int getFailedTokenRangesCount() + { + return failedTokenRangesCount; + } + + public void setSucceededTokenRangesCount(int count) + { + succeededTokenRangesCount = count; + } + + public int getSucceededTokenRangesCount() + { + return succeededTokenRangesCount; + } + + public void setSkippedTokenRangesCount(int count) + { + skippedTokenRangesCount = count; + } + + public int getSkippedTokenRangesCount() + { + return skippedTokenRangesCount; + } + + public void setSkippedTablesCount(int count) + { + skippedTablesCount = count; + } + + public int getSkippedTablesCount() + { + return skippedTablesCount; + } + + public void recordTurn(AutoRepairUtils.RepairTurn turn) + { + metrics.recordTurn(turn); + } + + public void setTotalDisabledTablesRepairCount(int count) + { + totalDisabledTablesRepairCount = count; + } + + public int getTotalDisabledTablesRepairCount() + { + return totalDisabledTablesRepairCount; + } +} + +class PreviewRepairedState extends AutoRepairState +{ + public PreviewRepairedState() + { + super(RepairType.PREVIEW_REPAIRED); + } + + @Override + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, false, false, PreviewKind.REPAIRED, false, true, true, false, false, false); + + option.getColumnFamilies().addAll(tables); + + return getRepairRunnable(keyspace, option); + } +} + +class IncrementalRepairState extends AutoRepairState +{ + public IncrementalRepairState() + { + super(RepairType.INCREMENTAL); + } + + @Override + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, true, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, + false, false, PreviewKind.NONE, true, true, true, false, false, false); + + option.getColumnFamilies().addAll(filterOutUnsafeTables(keyspace, tables)); + + return getRepairRunnable(keyspace, option); + } + + @VisibleForTesting + protected List filterOutUnsafeTables(String keyspaceName, List tables) + { + Keyspace keyspace = Keyspace.open(keyspaceName); + + return tables.stream() + .filter(table -> { + ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(table); + TableViews views = keyspace.viewManager.forTable(cfs.metadata()); + if (views != null && !views.isEmpty()) + { + logger.debug("Skipping incremental repair for {}.{} as it has materialized views", keyspaceName, table); + return false; + } + + if (cfs.metadata().params != null && cfs.metadata().params.cdc) + { + logger.debug("Skipping incremental repair for {}.{} as it has CDC enabled", keyspaceName, table); + return false; + } + + return true; + }).collect(Collectors.toList()); + } +} + +class FullRepairState extends AutoRepairState +{ + public FullRepairState() + { + super(RepairType.FULL); + } + + @Override + public RepairCoordinator getRepairRunnable(String keyspace, List tables, Set> ranges, boolean primaryRangeOnly) + { + RepairOption option = new RepairOption(RepairParallelism.PARALLEL, primaryRangeOnly, false, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), ranges, + false, false, PreviewKind.NONE, true, true, true, false, false, false); + + option.getColumnFamilies().addAll(tables); + + return getRepairRunnable(keyspace, option); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java new file mode 100644 index 000000000000..6da487e5e06b --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java @@ -0,0 +1,1189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Splitter; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.EndpointsByRange; +import org.apache.cassandra.locator.EndpointsForRange; +import org.apache.cassandra.locator.LocalStrategy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.cql3.statements.ModificationStatement; +import org.apache.cassandra.cql3.statements.SelectStatement; +import org.apache.cassandra.db.ConsistencyLevel; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.gms.Gossiper; +import org.apache.cassandra.locator.AbstractReplicationStrategy; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.MetaStrategy; +import org.apache.cassandra.locator.NetworkTopologyStrategy; +import org.apache.cassandra.locator.RangesAtEndpoint; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.schema.Schema; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.ViewMetadata; +import org.apache.cassandra.serializers.SetSerializer; +import org.apache.cassandra.serializers.UUIDSerializer; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.ClientState; +import org.apache.cassandra.service.QueryState; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.membership.Directory; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.transport.Dispatcher; +import org.apache.cassandra.transport.ProtocolVersion; +import org.apache.cassandra.transport.messages.ResultMessage; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.utils.NoSpamLogger; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR; +import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; + +/** + * This class serves as a utility class for AutoRepair. It contains various helper APIs + * to store/retrieve repair status, decide whose turn is next, etc. + */ +public class AutoRepairUtils +{ + private static final Logger logger = LoggerFactory.getLogger(AutoRepairUtils.class); + static final String COL_REPAIR_TYPE = "repair_type"; + static final String COL_HOST_ID = "host_id"; + static final String COL_REPAIR_START_TS = "repair_start_ts"; + static final String COL_REPAIR_FINISH_TS = "repair_finish_ts"; + static final String COL_REPAIR_PRIORITY = "repair_priority"; + static final String COL_DELETE_HOSTS = "delete_hosts"; // this set stores the host ids which think the row should be deleted + static final String COL_REPAIR_TURN = "repair_turn"; // this record the last repair turn. Normal turn or turn due to priority + static final String COL_DELETE_HOSTS_UPDATE_TIME = "delete_hosts_update_time"; // the time when delete hosts are upated + static final String COL_FORCE_REPAIR = "force_repair"; // if set to true, the node will do non-primary range rapair + + final static String SELECT_REPAIR_HISTORY = String.format( + "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE); + final static String SELECT_REPAIR_PRIORITY = String.format( + "SELECT * FROM %s.%s WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + final static String DEL_REPAIR_PRIORITY = String.format( + "DELETE %s[?] FROM %s.%s WHERE %s = ?", COL_REPAIR_PRIORITY, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_TYPE); + final static String ADD_PRIORITY_HOST = String.format( + "UPDATE %s.%s SET %s = %s + ? WHERE %s = ?", SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_PRIORITY, COL_REPAIR_TYPE); + + final static String INSERT_NEW_REPAIR_HISTORY = String.format( + "INSERT INTO %s.%s (%s, %s, %s, %s, %s, %s) values (?, ? ,?, ?, {}, ?) IF NOT EXISTS", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + COL_HOST_ID, COL_REPAIR_START_TS, COL_REPAIR_FINISH_TS, COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME); + + final static String ADD_HOST_ID_TO_DELETE_HOSTS = String.format( + "UPDATE %s.%s SET %s = %s + ?, %s = ? WHERE %s = ? AND %s = ? IF EXISTS" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID); + + final static String DEL_AUTO_REPAIR_HISTORY = String.format( + "DELETE FROM %s.%s WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, + COL_HOST_ID); + + final static String RECORD_START_REPAIR_HISTORY = String.format( + "UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS, + COL_REPAIR_TYPE, COL_HOST_ID); + + final static String RECORD_FINISH_REPAIR_HISTORY = String.format( + "UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS, + COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID); + + final static String CLEAR_DELETE_HOSTS = String.format( + "UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS, + COL_REPAIR_TYPE, COL_HOST_ID); + + final static String SET_FORCE_REPAIR = String.format( + "UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?" + , SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR, + COL_REPAIR_TYPE, COL_HOST_ID); + + final static String SELECT_LAST_REPAIR_TIME_FOR_NODE = String.format( + "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, + SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE, COL_HOST_ID); + + static ModificationStatement delStatementRepairHistory; + static SelectStatement selectStatementRepairHistory; + static ModificationStatement delStatementPriorityStatus; + static SelectStatement selectStatementRepairPriority; + static SelectStatement selectLastRepairTimeForNode; + static ModificationStatement addPriorityHost; + static ModificationStatement insertNewRepairHistoryStatement; + static ModificationStatement recordStartRepairHistoryStatement; + static ModificationStatement recordFinishRepairHistoryStatement; + static ModificationStatement addHostIDToDeleteHostsStatement; + static ModificationStatement clearDeleteHostsStatement; + static ModificationStatement setForceRepairStatement; + static ConsistencyLevel internalQueryCL; + + public enum RepairTurn + { + MY_TURN, + NOT_MY_TURN, + MY_TURN_DUE_TO_PRIORITY, + MY_TURN_FORCE_REPAIR + } + + public static void setup() + { + selectStatementRepairHistory = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_HISTORY, ClientState + .forInternalCalls()); + selectStatementRepairPriority = (SelectStatement) QueryProcessor.getStatement(SELECT_REPAIR_PRIORITY, ClientState + .forInternalCalls()); + selectLastRepairTimeForNode = (SelectStatement) QueryProcessor.getStatement(SELECT_LAST_REPAIR_TIME_FOR_NODE, ClientState + .forInternalCalls()); + delStatementPriorityStatus = (ModificationStatement) QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState + .forInternalCalls()); + addPriorityHost = (ModificationStatement) QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState + .forInternalCalls()); + insertNewRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(INSERT_NEW_REPAIR_HISTORY, ClientState + .forInternalCalls()); + recordStartRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(RECORD_START_REPAIR_HISTORY, ClientState + .forInternalCalls()); + recordFinishRepairHistoryStatement = (ModificationStatement) QueryProcessor.getStatement(RECORD_FINISH_REPAIR_HISTORY, ClientState + .forInternalCalls()); + addHostIDToDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(ADD_HOST_ID_TO_DELETE_HOSTS, ClientState + .forInternalCalls()); + setForceRepairStatement = (ModificationStatement) QueryProcessor.getStatement(SET_FORCE_REPAIR, ClientState + .forInternalCalls()); + clearDeleteHostsStatement = (ModificationStatement) QueryProcessor.getStatement(CLEAR_DELETE_HOSTS, ClientState + .forInternalCalls()); + delStatementRepairHistory = (ModificationStatement) QueryProcessor.getStatement(DEL_AUTO_REPAIR_HISTORY, ClientState + .forInternalCalls()); + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + internalQueryCL = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.ONE; + } + + public static class AutoRepairHistory + { + UUID hostId; + String repairTurn; + long lastRepairStartTime; + long lastRepairFinishTime; + Set deleteHosts; + long deleteHostsUpdateTime; + boolean forceRepair; + + public AutoRepairHistory(UUID hostId, String repairTurn, long lastRepairStartTime, long lastRepairFinishTime, + Set deleteHosts, long deleteHostsUpateTime, boolean forceRepair) + { + this.hostId = hostId; + this.repairTurn = repairTurn; + this.lastRepairStartTime = lastRepairStartTime; + this.lastRepairFinishTime = lastRepairFinishTime; + this.deleteHosts = deleteHosts; + if (this.deleteHosts == null) + { + this.deleteHosts = new HashSet<>(); + } + this.deleteHostsUpdateTime = deleteHostsUpateTime; + this.forceRepair = forceRepair; + } + + public String toString() + { + return MoreObjects.toStringHelper(this). + add("hostId", hostId). + add("repairTurn", repairTurn). + add("lastRepairStartTime", lastRepairStartTime). + add("lastRepairFinishTime", lastRepairFinishTime). + add("deleteHosts", deleteHosts). + toString(); + } + + public boolean isRepairRunning() + { + // if a repair history record has start time laster than finish time, it means the repair is running + return lastRepairStartTime > lastRepairFinishTime; + } + + public long getLastRepairFinishTime() + { + return lastRepairFinishTime; + } + } + + public static class CurrentRepairStatus + { + public Set hostIdsWithOnGoingRepair; // hosts that is running repair + public Set hostIdsWithOnGoingForceRepair; // hosts that is running repair because of force repair + Set priority; + public AutoRepairHistory myRepairHistory; + List historiesWithoutOnGoingRepair; // hosts that is NOT running repair + + public CurrentRepairStatus(List repairHistories, Set priority, UUID myId) + { + hostIdsWithOnGoingRepair = new HashSet<>(); + hostIdsWithOnGoingForceRepair = new HashSet<>(); + historiesWithoutOnGoingRepair = new ArrayList<>(); + + for (AutoRepairHistory history : repairHistories) + { + if (history.isRepairRunning()) + { + if (history.forceRepair) + { + hostIdsWithOnGoingForceRepair.add(history.hostId); + } + else + { + hostIdsWithOnGoingRepair.add(history.hostId); + } + } + else + { + historiesWithoutOnGoingRepair.add(history); + } + if (history.hostId.equals(myId)) + { + myRepairHistory = history; + } + } + this.priority = priority; + } + + public Set getAllHostsWithOngoingRepair() + { + return Sets.union(hostIdsWithOnGoingRepair, hostIdsWithOnGoingForceRepair); + } + + public String toString() + { + return MoreObjects.toStringHelper(this). + add("hostIdsWithOnGoingRepair", hostIdsWithOnGoingRepair). + add("hostIdsWithOnGoingForceRepair", hostIdsWithOnGoingForceRepair). + add("historiesWithoutOnGoingRepair", historiesWithoutOnGoingRepair). + add("priority", priority). + add("myRepairHistory", myRepairHistory). + toString(); + } + } + + @VisibleForTesting + public static List getAutoRepairHistory(RepairType repairType) + { + UntypedResultSet repairHistoryResult; + + ResultMessage.Rows repairStatusRows = selectStatementRepairHistory.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); + repairHistoryResult = UntypedResultSet.create(repairStatusRows.result); + + List repairHistories = new ArrayList<>(); + if (!repairHistoryResult.isEmpty()) + { + for (UntypedResultSet.Row row : repairHistoryResult) + { + UUID hostId = row.getUUID(COL_HOST_ID); + String repairTurn = null; + if (row.has(COL_REPAIR_TURN)) + repairTurn = row.getString(COL_REPAIR_TURN); + long lastRepairStartTime = row.getLong(COL_REPAIR_START_TS, 0); + long lastRepairFinishTime = row.getLong(COL_REPAIR_FINISH_TS, 0); + Set deleteHosts = row.getSet(COL_DELETE_HOSTS, UUIDType.instance); + long deleteHostsUpdateTime = row.getLong(COL_DELETE_HOSTS_UPDATE_TIME, 0); + boolean forceRepair = row.has(COL_FORCE_REPAIR) && row.getBoolean(COL_FORCE_REPAIR); + repairHistories.add(new AutoRepairHistory(hostId, repairTurn, lastRepairStartTime, lastRepairFinishTime, + deleteHosts, deleteHostsUpdateTime, forceRepair)); + } + return repairHistories; + } + logger.info("No repair history found"); + return null; + } + + // A host may add itself in delete hosts for some other hosts due to restart or some temp gossip issue. If a node's record + // delete_hosts is not growing for more than 2 hours, we consider it as a normal node so we clear the delete_hosts for that node + public static void clearDeleteHosts(RepairType repairType, UUID hostId) + { + clearDeleteHostsStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); + } + + public static void setForceRepairNewNode(RepairType repairType) + { + // this function will be called when a node bootstrap finished + UUID hostId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + // insert the data first + insertNewRepairHistory(repairType, currentTimeMillis(), currentTimeMillis()); + setForceRepair(repairType, hostId); + } + + public static void setForceRepair(RepairType repairType, Set hosts) + { + // this function is used by nodetool + for (InetAddressAndPort host : hosts) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(host); + setForceRepair(repairType, hostId); + } + } + + public static void setForceRepair(RepairType repairType, UUID hostId) + { + setForceRepairStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), + Dispatcher.RequestTime.forImmediateExecution()); + + logger.info("Set force repair repair type: {}, node: {}", repairType, hostId); + } + + public static long getLastRepairTimeForNode(RepairType repairType, UUID hostId) + { + ResultMessage.Rows rows = selectLastRepairTimeForNode.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList( + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), + Dispatcher.RequestTime.forImmediateExecution()); + UntypedResultSet repairTime = UntypedResultSet.create(rows.result); + if (repairTime.isEmpty()) + { + return 0; + } + return repairTime.one().getLong(COL_REPAIR_FINISH_TS); + } + + @VisibleForTesting + public static CurrentRepairStatus getCurrentRepairStatus(RepairType repairType, List autoRepairHistories, UUID myId) + { + if (autoRepairHistories != null) + { + return new CurrentRepairStatus(autoRepairHistories, getPriorityHostIds(repairType), myId); + } + return null; + } + + @VisibleForTesting + protected static TreeSet getHostIdsInCurrentRing(RepairType repairType, Collection allNodesInRing) + { + TreeSet hostIdsInCurrentRing = new TreeSet<>(); + for (NodeAddresses node : allNodesInRing) + { + String nodeDC = DatabaseDescriptor.getLocator().location(node.broadcastAddress).datacenter; + if (AutoRepairService.instance.getAutoRepairConfig().getIgnoreDCs(repairType).contains(nodeDC)) + { + logger.info("Ignore node {} because its datacenter is {}", node, nodeDC); + continue; + } + /* + * Check if endpoint state exists in gossip or not. If it + * does not then this maybe a ghost node so ignore it + */ + if (Gossiper.instance.isAlive(node.broadcastAddress)) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(node.broadcastAddress); + hostIdsInCurrentRing.add(hostId); + } + else + { + logger.warn("Node is not present in Gossip cache node {}, node data center {}", node, nodeDC); + } + } + return hostIdsInCurrentRing; + } + + public static TreeSet getHostIdsInCurrentRing(RepairType repairType) + { + Collection allNodesInRing = ClusterMetadata.current().directory.addresses.values(); + return getHostIdsInCurrentRing(repairType, allNodesInRing); + } + + // This function will return the host ID for the node which has not been repaired for longest time + public static AutoRepairHistory getHostWithLongestUnrepairTime(RepairType repairType) + { + List autoRepairHistories = getAutoRepairHistory(repairType); + return getHostWithLongestUnrepairTime(autoRepairHistories); + } + + /** + * Convenience method to resolve the broadcast address of a host id from {@link ClusterMetadata} + * @return broadcast address if it exists in CMS, otherwise null. + */ + @Nullable + private static InetAddressAndPort getBroadcastAddress(UUID hostId) + { + Directory directory = ClusterMetadata.current().directory; + + NodeId nodeId = directory.nodeIdFromHostId(hostId); + if (nodeId != null) + { + NodeAddresses nodeAddresses = directory.getNodeAddresses(nodeId); + if (nodeAddresses != null) + { + return nodeAddresses.broadcastAddress; + } + } + return null; + } + + /** + * @return Map of broadcast address to host id, if a broadcast address cannot be found for a host, it is + * not included in the map. + */ + private static Map getBroadcastAddressToHostIdMap(Set hosts) + { + // Get a mapping of endpoint : host id + Map broadcastAddressMap = new HashMap<>(hosts.size()); + for (UUID hostId : hosts) + { + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcast address from host id {} in ClusterMetadata can't accurately " + + "determine if this node is a replica of the local node.", hostId); + } + else + { + broadcastAddressMap.put(broadcastAddress, hostId); + } + } + return broadcastAddressMap; + } + + /** + * @return Mapping of unique replication strategy to keyspaces using that strategy that we care about repairing. + */ + private static Map> getReplicationStrategies() + { + // Collect all unique replication strategies among all keyspaces. + Map> replicationStrategies = new HashMap<>(); + for (Keyspace keyspace : Keyspace.all()) + { + if (AutoRepairUtils.shouldConsiderKeyspace(keyspace)) + { + replicationStrategies.computeIfAbsent(keyspace.getReplicationStrategy(), k -> new ArrayList<>()) + .add(keyspace.getName()); + } + } + return replicationStrategies; + } + + /** + * Collects all hosts being repaired among all active repair schedules and their schedule if + * {@link AutoRepairConfig#getAllowParallelReplicaRepairAcrossSchedules(RepairType)} is true for this repairType. + * Accepts the currently evaluated repairType's schedule as an optimization to avoid grabbing its repair status an + * additional time. + * + * @param myRepairType The repair type schedule being evaluated. + * @param myRepairStatus The repair status for that repair type. + * @return All hosts among active schedules currently being repaired. + */ + private static Map getHostsBeingRepaired(RepairType myRepairType, CurrentRepairStatus myRepairStatus) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + + Map hostsBeingRepaired = myRepairStatus.getAllHostsWithOngoingRepair().stream() + .collect(Collectors.toMap((h) -> h, (v) -> myRepairType)); + + // If we don't allow repairing across schedules, iterate over other enabled schedules and include hosts + // actively being repaired. + if (!config.getAllowParallelReplicaRepairAcrossSchedules(myRepairType)) + { + for (RepairType repairType : RepairType.values()) + { + if (myRepairType == repairType) + continue; + + if (config.isAutoRepairEnabled(repairType)) + { + CurrentRepairStatus repairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), null); + if (repairStatus != null) + { + for (UUID hostId : repairStatus.getAllHostsWithOngoingRepair()) + { + hostsBeingRepaired.putIfAbsent(hostId, repairType); + } + } + } + } + } + return hostsBeingRepaired; + } + + /** + * Identifies the most eligible host to repair for nodes preceding or equal to this nodes' lastRepairFinishTime. + * The criteria for this is to find the node with the oldest last repair finish time of which none of its replicas + * are currently under repair. + * @return The most eligible host to repair or null if no candidates before and including this nodes' current repair status. + */ + @VisibleForTesting + public static AutoRepairHistory getMostEligibleHostToRepair(RepairType repairType, CurrentRepairStatus currentRepairStatus, UUID myId) + { + // 0. If this repairType allows parallel replica repair, short circuit and return the host with the longest unrepair time + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (config.getAllowParallelReplicaRepair(repairType)) + { + return getHostWithLongestUnrepairTime(currentRepairStatus.historiesWithoutOnGoingRepair); + } + + // 1. Sort repair histories from oldest completed to newest + Stream finishedRepairHistories = currentRepairStatus.historiesWithoutOnGoingRepair + .stream() + .sorted(Comparator.comparingLong(h -> h.lastRepairFinishTime)); + + // 2. Optimization: Truncate repair histories after myId so we don't evaluate anything more recent as if we + // aren't interested in anything that isn't this node. + final AtomicBoolean myHistoryFound = new AtomicBoolean(false); + finishedRepairHistories = finishedRepairHistories.takeWhile((history) -> { + if (myHistoryFound.get()) return false; + + myHistoryFound.set(history.hostId.equals(myId)); + return true; + }); + + // If there are any hosts with ongoing repair, filter the repair histories to not include nodes whose replicas + // are ongoing repair. + Map hostsBeingRepairedToRepairType = getHostsBeingRepaired(repairType, currentRepairStatus); + + // 3. If I am already actively being repaired in another schedule, defer submitting repairs; if already + // repairing for this type, return node so it can take its turn. + RepairType alreadyRepairingType = hostsBeingRepairedToRepairType.get(myId); + if (alreadyRepairingType != null) + { + if (repairType != alreadyRepairingType) + { + logger.info("Deferring repair because I am already actively repairing in schedule {}", hostsBeingRepairedToRepairType.get(myId)); + AutoRepairMetricsManager.getMetrics(repairType).repairDelayedBySchedule.inc(); + return null; + } + else if (currentRepairStatus.myRepairHistory != null) + { + // if the repair type matches this repair, assume the node was restarted while repairing, return node + // so it can take its turn. + logAlreadyMyTurn(); + return currentRepairStatus.myRepairHistory; + } + } + + if (!hostsBeingRepairedToRepairType.isEmpty()) + { + // 4. Extract InetAddresses for each UUID as replicas are identified by their address. + Map hostsBeingRepaired = getBroadcastAddressToHostIdMap(hostsBeingRepairedToRepairType.keySet()); + + // 5. Collect unique replication strategies and group them up with their keyspaces. + Map> replicationStrategies = getReplicationStrategies(); + + // 6. Filter out repair histories who have a replica being repaired, note that this is lazy, given the stream + // is completed using findFirst, it should stop as soon as the matching criteria is met. + finishedRepairHistories = finishedRepairHistories.filter((history) -> !hasReplicaWithOngoingRepair(history, + myId, + repairType, + hostsBeingRepaired, + hostsBeingRepairedToRepairType, + replicationStrategies)); + } + + // 7. Select the first (oldest lastRepairFinishTime) repair history without replicas being repaired + return finishedRepairHistories.findFirst().orElse(null); + } + + + /** + * @return Whether the host for the given eligibleRepairHistory has any replicas in hostsBeingRepaired. + * @param eligibleHistory History of node to check + * @param myId Host id of this node, if the repair history is for this node, additional logging will take place. + * @param myRepairType repair type being evaluated + * @param hostsBeingRepaired Hosts being repaired. + * @param hostIdToRepairType mapping of hosts being repaired to the repair type its being repaired for. + * @param replicationStrategies Mapping of unique replication strategies to keyspaces having that strategy. + */ + private static boolean hasReplicaWithOngoingRepair(AutoRepairHistory eligibleHistory, + UUID myId, + RepairType myRepairType, + Map hostsBeingRepaired, + Map hostIdToRepairType, + Map> replicationStrategies) + { + // If no broadcast address found for this host id in cluster metadata, just skip it, a node should always + // see itself in cluster metadata. + InetAddressAndPort eligibleBroadcastAddress = getBroadcastAddress(eligibleHistory.hostId); + if (eligibleBroadcastAddress == null) + { + return true; + } + + // For each replication strategy, determine if host being repaired is a replica of the local node. + for (Map.Entry> entry : replicationStrategies.entrySet()) + { + AbstractReplicationStrategy replicationStrategy = entry.getKey(); + EndpointsByRange endpointsByRange = replicationStrategy.getRangeAddresses(ClusterMetadata.current()); + + // get ranges of the eligible address for the given replication strategy. + RangesAtEndpoint rangesAtEndpoint = StorageService.instance.getReplicas(replicationStrategy, eligibleBroadcastAddress); + for (Replica replica : rangesAtEndpoint) + { + // get the endpoints involved in this range. + EndpointsForRange endpointsForRange = endpointsByRange.get(replica.range()); + // For each host in this range... + for (InetAddressAndPort inetAddressAndPort : endpointsForRange.endpoints()) + { + // If the address of the node in the range belongs to a host being repaired, return true. + UUID hostId = hostsBeingRepaired.get(inetAddressAndPort); + if (hostId != null) + { + // log if the repair history matches the current running node. + InetAddressAndPort myBroadcastAddress = getBroadcastAddress(myId); + if (myBroadcastAddress != null && myBroadcastAddress.equals(eligibleBroadcastAddress)) + { + logger.info("Deferring repair because replica {} ({}) with shared ranges for " + + "{} keyspace(s) (e.g. {}) is currently taking its turn for schedule {}", + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + AutoRepairMetricsManager.getMetrics(myRepairType).repairDelayedByReplica.inc(); + } + else if (logger.isDebugEnabled()) + { + logger.debug("Not considering node {} ({}) for repair as it has replica {} ({}) with " + + "shared ranges for {} keyspace(s) (e.g. {}) which is currently taking its " + + "turn for schedule {}", + eligibleHistory.hostId, eligibleBroadcastAddress, + hostId, inetAddressAndPort, entry.getValue().size(), entry.getValue().get(0), + hostIdToRepairType.get(hostId)); + + } + return true; + } + } + } + } + + // No replicas found of eligible host. + return false; + } + + private static AutoRepairHistory getHostWithLongestUnrepairTime(List autoRepairHistories) + { + if (autoRepairHistories == null) + { + return null; + } + AutoRepairHistory rst = null; + long oldestTimestamp = Long.MAX_VALUE; + for (AutoRepairHistory autoRepairHistory : autoRepairHistories) + { + if (autoRepairHistory.lastRepairFinishTime < oldestTimestamp) + { + rst = autoRepairHistory; + oldestTimestamp = autoRepairHistory.lastRepairFinishTime; + } + } + return rst; + } + + public static int getMaxNumberOfNodeRunAutoRepair(RepairType repairType, int groupSize) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (groupSize == 0) + { + return Math.max(config.getParallelRepairCount(repairType), 1); + } + // we will use the max number from config between auto_repair_parallel_repair_count_in_group and auto_repair_parallel_repair_percentage_in_group + int value = Math.max(groupSize * config.getParallelRepairPercentage(repairType) / 100, + config.getParallelRepairCount(repairType)); + // make sure at least one node getting repaired + return Math.max(1, value); + } + + private static void logAlreadyMyTurn() + { + logger.warn("This node already was considered to having an ongoing repair for this repair type, must have " + + "been restarted, taking my turn back"); + } + + @VisibleForTesting + public static RepairTurn myTurnToRunRepair(RepairType repairType, UUID myId) + { + try + { + Collection allNodesInRing = ClusterMetadata.current().directory.addresses.values(); + logger.info("Total nodes in ring {}", allNodesInRing.size()); + TreeSet hostIdsInCurrentRing = getHostIdsInCurrentRing(repairType, allNodesInRing); + logger.info("Total nodes qualified for repair {}", hostIdsInCurrentRing.size()); + + List autoRepairHistories = getAutoRepairHistory(repairType); + Set autoRepairHistoryIds = new HashSet<>(); + + // 1. Remove any node that is not part of group based on gossip info + if (autoRepairHistories != null) + { + for (AutoRepairHistory nodeHistory : autoRepairHistories) + { + autoRepairHistoryIds.add(nodeHistory.hostId); + // clear delete_hosts if the node's delete hosts is not growing for more than two hours + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + if (!nodeHistory.deleteHosts.isEmpty() + && config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds() < TimeUnit.MILLISECONDS.toSeconds( + currentTimeMillis() - nodeHistory.deleteHostsUpdateTime + )) + { + clearDeleteHosts(repairType, nodeHistory.hostId); + logger.info("Delete hosts for {} for repair type {} has not been updated for more than {} seconds. Delete hosts has been cleared. Delete hosts before clear {}" + , nodeHistory.hostId, repairType, config.getAutoRepairHistoryClearDeleteHostsBufferInterval(), nodeHistory.deleteHosts); + } + else if (!hostIdsInCurrentRing.contains(nodeHistory.hostId)) + { + if (nodeHistory.deleteHosts.size() > Math.max(2, hostIdsInCurrentRing.size() * 0.5)) + { + // More than half of the groups thinks the record should be deleted + logger.info("{} think {} is orphan node, will delete auto repair history for repair type {}.", nodeHistory.deleteHosts, nodeHistory.hostId, repairType); + deleteAutoRepairHistory(repairType, nodeHistory.hostId); + } + else + { + // I think this host should be deleted + logger.info("I({}) think {} is not part of ring, vote to delete it for repair type {}.", myId, nodeHistory.hostId, repairType); + addHostIdToDeleteHosts(repairType, myId, nodeHistory.hostId); + } + } + } + } + + // 2. Add node to auto repair history table if a node is in gossip info + for (UUID hostId : hostIdsInCurrentRing) + { + if (!autoRepairHistoryIds.contains(hostId)) + { + logger.info("{} for repair type {} doesn't exist in the auto repair history table, insert a new record.", repairType, hostId); + insertNewRepairHistory(repairType, hostId, currentTimeMillis(), currentTimeMillis()); + } + } + + // get updated current repair status + CurrentRepairStatus currentRepairStatus = getCurrentRepairStatus(repairType, getAutoRepairHistory(repairType), myId); + if (currentRepairStatus != null) + { + if (logger.isDebugEnabled()) + { + logger.debug("Latest repair status {}", currentRepairStatus); + } + //check if I am forced to run repair + for (AutoRepairHistory history : currentRepairStatus.historiesWithoutOnGoingRepair) + { + if (history.forceRepair && history.hostId.equals(myId)) + { + return MY_TURN_FORCE_REPAIR; + } + } + } + + // check if node was already indicated as having an ongoing repair, this may happen when a node restarts + // before finishing repairing. + if (currentRepairStatus != null && currentRepairStatus.getAllHostsWithOngoingRepair().contains(myId)) + { + logAlreadyMyTurn(); + + // use the previously chosen turn. + if (currentRepairStatus.myRepairHistory != null && currentRepairStatus.myRepairHistory.repairTurn != null) + { + return RepairTurn.valueOf(currentRepairStatus.myRepairHistory.repairTurn); + } + else + { + return MY_TURN; + } + } + + int parallelRepairNumber = getMaxNumberOfNodeRunAutoRepair(repairType, + autoRepairHistories == null ? 0 : autoRepairHistories.size()); + logger.info("Will run repairs concurrently on {} node(s)", parallelRepairNumber); + if (currentRepairStatus == null || parallelRepairNumber > currentRepairStatus.hostIdsWithOnGoingRepair.size()) + { + // more repairs can be run, I might be the new one + if (autoRepairHistories != null) + { + logger.info("Auto repair history table has {} records", autoRepairHistories.size()); + } + else + { + // try to fetch again + autoRepairHistories = getAutoRepairHistory(repairType); + if (autoRepairHistories == null) + { + logger.error("No record found"); + return NOT_MY_TURN; + } + + currentRepairStatus = getCurrentRepairStatus(repairType, autoRepairHistories, myId); + } + + UUID priorityHostId = null; + if (currentRepairStatus.priority != null) + { + for (UUID priorityID : currentRepairStatus.priority) + { + // remove ids doesn't belong to this ring + if (!hostIdsInCurrentRing.contains(priorityID)) + { + logger.info("{} is not part of the current ring, will be removed from priority list.", priorityID); + removePriorityStatus(repairType, priorityID); + } + else + { + priorityHostId = priorityID; + break; + } + } + } + + if (priorityHostId != null && !myId.equals(priorityHostId)) + { + logger.info("Priority list is not empty and I'm not the first node in the list, not my turn." + + "First node in priority list is {}", getBroadcastAddress(priorityHostId)); + return NOT_MY_TURN; + } + + if (myId.equals(priorityHostId)) + { + //I have a priority for repair hence its my turn now + return MY_TURN_DUE_TO_PRIORITY; + } + + // Determine if this node is the most eligible host to repair. + AutoRepairHistory nodeToBeRepaired = getMostEligibleHostToRepair(repairType, currentRepairStatus, myId); + if (nodeToBeRepaired != null) + { + if (nodeToBeRepaired.hostId.equals(myId)) + { + logger.info("This node is selected to be repaired for repair type {}", repairType); + return MY_TURN; + } + + // log which node is next, which is helpful for debugging + logger.info("Next node to be repaired for repair type {}: {} ({})", repairType, + getBroadcastAddress(nodeToBeRepaired.hostId), + nodeToBeRepaired); + } + + // If this node is not identified as most eligible, set the repair lag time. + if (currentRepairStatus.myRepairHistory != null) + { + AutoRepairMetricsManager.getMetrics(repairType) + .recordRepairStartLag(currentRepairStatus.myRepairHistory.lastRepairFinishTime); + } + } + else if (currentRepairStatus.hostIdsWithOnGoingForceRepair.contains(myId)) + { + return MY_TURN_FORCE_REPAIR; + } + // for some reason I was not done with the repair hence resume (maybe node restart in-between, etc.) + return currentRepairStatus.hostIdsWithOnGoingRepair.contains(myId) ? MY_TURN : NOT_MY_TURN; + } + catch (Exception e) + { + logger.error("Exception while deciding node's turn:", e); + } + return NOT_MY_TURN; + } + + static void deleteAutoRepairHistory(RepairType repairType, UUID hostId) + { + //delete the given hostId + delStatementRepairHistory.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId))), Dispatcher.RequestTime.forImmediateExecution()); + } + + static void updateStartAutoRepairHistory(RepairType repairType, UUID myId, long timestamp, RepairTurn turn) + { + recordStartRepairHistoryStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(timestamp), + ByteBufferUtil.bytes(turn.name()), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(myId) + )), Dispatcher.RequestTime.forImmediateExecution()); + } + + static void updateFinishAutoRepairHistory(RepairType repairType, UUID myId, long timestamp) + { + recordFinishRepairHistoryStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(timestamp), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(myId) + )), Dispatcher.RequestTime.forImmediateExecution()); + logger.info("Auto repair finished for {}", myId); + } + + public static void insertNewRepairHistory(RepairType repairType, UUID hostId, long startTime, long finishTime) + { + try + { + Keyspace autoRepairKS = Schema.instance.getKeyspaceInstance(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME); + ConsistencyLevel cl = autoRepairKS.getReplicationStrategy().getClass() == NetworkTopologyStrategy.class ? + ConsistencyLevel.LOCAL_SERIAL : null; + + UntypedResultSet resultSet; + ResultMessage.Rows resultMessage = (ResultMessage.Rows) insertNewRepairHistoryStatement.execute( + QueryState.forInternalCalls(), QueryOptions.create(internalQueryCL, Lists.newArrayList( + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostId), + ByteBufferUtil.bytes(startTime), + ByteBufferUtil.bytes(finishTime), + ByteBufferUtil.bytes(currentTimeMillis()) + ), false, -1, null, cl, ProtocolVersion.CURRENT, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME), + Dispatcher.RequestTime.forImmediateExecution()); + resultSet = UntypedResultSet.create(resultMessage.result); + boolean applied = resultSet.one().getBoolean(ModificationStatement.CAS_RESULT_COLUMN.toString()); + if (applied) + { + logger.info("Successfully inserted a new auto repair history record for host id: {}", hostId); + } + else + { + logger.info("Record exists, no need to insert again for host id: {}", hostId); + } + } + catch (Exception e) + { + logger.error("Exception in inserting new repair history:", e); + } + } + + public static void insertNewRepairHistory(RepairType repairType, long startTime, long finishTime) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + insertNewRepairHistory(repairType, hostId, startTime, finishTime); + } + + public static void addHostIdToDeleteHosts(RepairType repairType, UUID myID, UUID hostToBeDeleted) + { + SetSerializer serializer = SetSerializer.getInstance(UUIDSerializer.instance, UTF8Type.instance.comparatorSet); + addHostIDToDeleteHostsStatement.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(serializer.serialize(new HashSet<>(Arrays.asList(myID))), + ByteBufferUtil.bytes(currentTimeMillis()), + ByteBufferUtil.bytes(repairType.toString()), + ByteBufferUtil.bytes(hostToBeDeleted) + )), Dispatcher.RequestTime.forImmediateExecution()); + } + + public static void addPriorityHosts(RepairType repairType, Set hosts) + { + Set hostIds = new HashSet<>(); + for (InetAddressAndPort host : hosts) + { + //find hostId from IP address + UUID hostId = ClusterMetadata.current().directory.hostId(ClusterMetadata.current().directory.peerId(host)); + hostIds.add(hostId); + if (hostId != null) + { + logger.info("Add host {} to the priority list", hostId); + } + } + if (!hostIds.isEmpty()) + { + SetSerializer serializer = SetSerializer.getInstance(UUIDSerializer.instance, UTF8Type.instance.comparatorSet); + addPriorityHost.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(serializer.serialize(hostIds), + ByteBufferUtil.bytes(repairType.toString()))), + Dispatcher.RequestTime.forImmediateExecution()); + } + } + + static void removePriorityStatus(RepairType repairType, UUID hostId) + { + logger.info("Remove host {} from priority list", hostId); + delStatementPriorityStatus.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, + Lists.newArrayList(ByteBufferUtil.bytes(hostId), + ByteBufferUtil.bytes(repairType.toString()))), + Dispatcher.RequestTime.forImmediateExecution()); + } + + public static Set getPriorityHostIds(RepairType repairType) + { + UntypedResultSet repairPriorityResult; + + ResultMessage.Rows repairPriorityRows = selectStatementRepairPriority.execute(QueryState.forInternalCalls(), + QueryOptions.forInternalCalls(internalQueryCL, Lists.newArrayList(ByteBufferUtil.bytes(repairType.toString()))), Dispatcher.RequestTime.forImmediateExecution()); + repairPriorityResult = UntypedResultSet.create(repairPriorityRows.result); + + Set priorities = null; + if (!repairPriorityResult.isEmpty()) + { + // there should be only one row + UntypedResultSet.Row row = repairPriorityResult.one(); + priorities = row.getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + } + if (priorities != null) + { + return priorities; + } + return Collections.emptySet(); + } + + public static Set getPriorityHosts(RepairType repairType) + { + Set hosts = new HashSet<>(); + for (UUID hostId : getPriorityHostIds(repairType)) + { + InetAddressAndPort broadcastAddress = getBroadcastAddress(hostId); + if (broadcastAddress == null) + { + logger.warn("Could not resolve broadcastAddress for {}, skipping considering it as a priority host", hostId); + continue; + } + hosts.add(broadcastAddress); + } + return hosts; + } + + public static boolean shouldConsiderKeyspace(Keyspace ks) + { + AbstractReplicationStrategy replicationStrategy = ks.getReplicationStrategy(); + boolean repair = true; + if (replicationStrategy instanceof NetworkTopologyStrategy) + { + Set datacenters = ((NetworkTopologyStrategy) replicationStrategy).getDatacenters(); + String localDC = DatabaseDescriptor.getLocator().local().datacenter; + if (!datacenters.contains(localDC)) + { + repair = false; + } + } + if (replicationStrategy instanceof LocalStrategy || replicationStrategy instanceof MetaStrategy) + { + repair = false; + } + if (ks.getName().equalsIgnoreCase(SchemaConstants.TRACE_KEYSPACE_NAME)) + { + // by default, ignore the tables under system_traces as they do not have + // that much important data + repair = false; + } + return repair; + } + + public static boolean tableMaxRepairTimeExceeded(RepairType repairType, long startTime) + { + long tableRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds + (currentTimeMillis() - startTime); + return AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() < + tableRepairTimeSoFar; + } + + public static boolean keyspaceMaxRepairTimeExceeded(RepairType repairType, long startTime, int numOfTablesToBeRepaired) + { + long keyspaceRepairTimeSoFar = TimeUnit.MILLISECONDS.toSeconds((currentTimeMillis() - startTime)); + return (long) AutoRepairService.instance.getAutoRepairConfig().getAutoRepairTableMaxRepairTime(repairType).toSeconds() * + numOfTablesToBeRepaired < keyspaceRepairTimeSoFar; + } + + public static List getAllMVs(RepairType repairType, Keyspace keyspace, TableMetadata tableMetadata) + { + List allMvs = new ArrayList<>(); + if (AutoRepairService.instance.getAutoRepairConfig().getMaterializedViewRepairEnabled(repairType) && keyspace.getMetadata().views != null) + { + Iterator views = keyspace.getMetadata().views.forTable(tableMetadata.id).iterator(); + while (views.hasNext()) + { + String viewName = views.next().name(); + logger.info("Adding MV to the list {}.{}.{}", keyspace.getName(), tableMetadata.name, viewName); + allMvs.add(viewName); + } + } + return allMvs; + } + + public static void runRepairOnNewlyBootstrappedNodeIfEnabled() + { + AutoRepairConfig repairConfig = DatabaseDescriptor.getAutoRepairConfig(); + if (repairConfig.isAutoRepairSchedulingEnabled()) + { + for (AutoRepairConfig.RepairType rType : AutoRepairConfig.RepairType.values()) + if (repairConfig.isAutoRepairEnabled(rType) && repairConfig.getForceRepairNewNode(rType)) + AutoRepairUtils.setForceRepairNewNode(rType); + } + } + + public static Collection> split(Range tokenRange, int numberOfSplits) + { + Collection> ranges; + Optional splitter = DatabaseDescriptor.getPartitioner().splitter(); + if (splitter.isEmpty()) + { + NoSpamLogger.log(logger, NoSpamLogger.Level.WARN, 30, TimeUnit.MINUTES, "Partitioner {} does not support splitting, falling back to splitting by token range", DatabaseDescriptor.getPartitioner()); + ranges = Collections.singleton(tokenRange); + } + else + { + ranges = splitter.get().split(Collections.singleton(tokenRange), numberOfSplits); + } + return ranges; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java new file mode 100644 index 000000000000..a6dddb3060bb --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitter.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.service.AutoRepairService; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; + +/** + * An implementation that splits token ranges into a fixed number of subranges. + */ +public class FixedSplitTokenRangeSplitter implements IAutoRepairTokenRangeSplitter +{ + private static final Logger logger = LoggerFactory.getLogger(FixedSplitTokenRangeSplitter.class); + + /** + * Selecting the default value is tricky. If we select a small number, individual repairs would be heavy. + * On the other hand, if we select a large number, too many repair sessions would be created. + *

      + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range + * using the following formula: + *

      + * Math.max(1, numberOfSubranges / tokens.size()) + *

      + * To maintain balance, 32 serves as a good default that accommodates both vnodes and non-vnodes effectively. + */ + public static final int DEFAULT_NUMBER_OF_SUBRANGES = 32; + + /** + * Number of evenly split subranges to create for each node that repair runs for. + *

      + * If vnodes are configured using num_tokens, attempts to evenly subdivide subranges by each range. + * For example, for num_tokens: 16 and number_of_subranges: 32, 2 (32/16) + * repair assignments will be created for each token range. At least one repair assignment will be + * created for each token range. + */ + static final String NUMBER_OF_SUBRANGES = "number_of_subranges"; + + private final AutoRepairConfig.RepairType repairType; + private int numberOfSubranges; + + public FixedSplitTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map parameters) + { + this.repairType = repairType; + + numberOfSubranges = Integer.parseInt(parameters.getOrDefault(NUMBER_OF_SUBRANGES, Integer.toString(DEFAULT_NUMBER_OF_SUBRANGES))); + } + + @Override + public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) + { + return new RepairAssignmentIterator(repairPlans) + { + @Override + protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) + { + return getRepairAssignmentsForKeyspace(primaryRangeOnly, priority, repairPlan); + } + }; + } + + private KeyspaceRepairAssignments getRepairAssignmentsForKeyspace(boolean primaryRangeOnly, int priority, KeyspaceRepairPlan repairPlan) + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + List repairAssignments = new ArrayList<>(); + String keyspaceName = repairPlan.getKeyspaceName(); + List tableNames = repairPlan.getTableNames(); + + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(keyspaceName, FBUtilities.getBroadcastAddressAndPort()); + if (!primaryRangeOnly) + { + // if we need to repair non-primary token ranges, then change the tokens accordingly + tokens = StorageService.instance.getLocalReplicas(keyspaceName).onlyFull().ranges(); + } + + boolean byKeyspace = config.getRepairByKeyspace(repairType); + // collect all token ranges. + List> allRanges = new ArrayList<>(); + // this is done to avoid micro splits in the case of vnodes + int splitsPerRange = Math.max(1, numberOfSubranges / tokens.size()); + for (Range token : tokens) + { + allRanges.addAll(split(token, splitsPerRange)); + } + + if (byKeyspace) + { + for (Range splitRange : allRanges) + { + // add repair assignment for each range entire keyspace's tables + repairAssignments.add(new RepairAssignment(splitRange, keyspaceName, tableNames)); + } + } + else + { + // add repair assignment per table + for (String tableName : tableNames) + { + for (Range splitRange : allRanges) + { + repairAssignments.add(new RepairAssignment(splitRange, keyspaceName, Collections.singletonList(tableName))); + } + } + } + return new KeyspaceRepairAssignments(priority, keyspaceName, repairAssignments); + } + + @Override + public void setParameter(String key, String value) + { + if (!key.equals(NUMBER_OF_SUBRANGES)) + { + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be " + NUMBER_OF_SUBRANGES); + } + logger.info("Setting {} to {} for repair type {}", key, value, repairType); + this.numberOfSubranges = Integer.parseInt(value); + } + + @Override + public Map getParameters() + { + return Collections.singletonMap(NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubranges)); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java new file mode 100644 index 000000000000..8b82eac296db --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/IAutoRepairTokenRangeSplitter.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.repair.autorepair; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.config.ParameterizedClass; + +/** + * Interface that defines how to generate {@link KeyspaceRepairAssignments}. + *

      + * The default is {@link RepairTokenRangeSplitter} which aims to provide sensible defaults for all repair types. + *

      + * Custom implementations class should require a constructor accepting + * ({@link AutoRepairConfig.RepairType}, {@link java.util.Map}) with the {@link java.util.Map} parameter accepting + * custom configuration for your splitter. If such a constructor does not exist, + * {@link AutoRepairConfig#newAutoRepairTokenRangeSplitter(AutoRepairConfig.RepairType, ParameterizedClass)} + * will fall back on invoking a default zero argument constructor. + */ +public interface IAutoRepairTokenRangeSplitter +{ + /** + * Split the token range you wish to repair into multiple assignments. + * The autorepair framework will repair the assignments from returned subrange iterator in the sequence it's + * provided. + * @param primaryRangeOnly Whether to repair only this node's primary ranges or all of its ranges. + * @param repairPlans A list of ordered prioritized repair plans to generate assignments for in order. + * @return iterator of repair assignments, with each element representing a grouping of repair assignments for a given keyspace. + * The iterator is traversed lazily {@link KeyspaceRepairAssignments} at a time with the intent to try to get the + * most up-to-date representation of your data (e.g. how much data exists and is unrepaired at a given time). + */ + Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans); + + /** + * Update a configuration parameter. This is meant to be used by nodetool setautorepairconfig to + * update configuration dynamically. + * @param key parameter to update + * @param value The value to set to. + */ + default void setParameter(String key, String value) + { + throw new IllegalArgumentException(this.getClass().getName() + " does not support custom configuration"); + } + + /** + * @return custom configuration. This is meant to be used by nodetool getautorepairconfig for + * retrieving the splitter configuration. + */ + default Map getParameters() + { + return Collections.emptyMap(); + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java new file mode 100644 index 000000000000..3ea91e9922f9 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairAssignments.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; + +/** + * A grouping of repair assignments that were generated for a particular keyspace for a given priority. + */ +public class KeyspaceRepairAssignments +{ + private final int priority; + private final String keyspaceName; + private final List repairAssignments; + + public KeyspaceRepairAssignments(int priority, String keyspaceName, List repairAssignments) + { + this.priority = priority; + this.keyspaceName = keyspaceName; + this.repairAssignments = repairAssignments; + } + + public int getPriority() + { + return priority; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getRepairAssignments() + { + return repairAssignments; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java new file mode 100644 index 000000000000..3c13e3d80d08 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/KeyspaceRepairPlan.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Objects; + +/** + * Encapsulates an intent to repair the given keyspace's tables + */ +public class KeyspaceRepairPlan +{ + private final String keyspaceName; + + private final List tableNames; + + public KeyspaceRepairPlan(String keyspaceName, List tableNames) + { + this.keyspaceName = keyspaceName; + this.tableNames = tableNames; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getTableNames() + { + return tableNames; + } + + @Override + public boolean equals(Object o) + { + if (o == null || getClass() != o.getClass()) return false; + KeyspaceRepairPlan that = (KeyspaceRepairPlan) o; + return Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); + } + + @Override + public int hashCode() + { + return Objects.hash(keyspaceName, tableNames); + } + + @Override + public String toString() + { + return "KeyspaceRepairPlan{" + + "keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java new file mode 100644 index 000000000000..fbedb71b7c3c --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlan.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeSet; +import java.util.function.Consumer; + +import org.apache.cassandra.db.ColumnFamilyStore; + +/** + * Encapsulates a devised plan to repair tables, grouped by their keyspace and a given priority. This is used + * by {@link AutoRepair} to pass in an organized plan to + * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(boolean, List)} which + * can iterate over this plan in order to generate {@link RepairAssignment}s. + */ +public class PrioritizedRepairPlan +{ + private final int priority; + + private final List keyspaceRepairPlans; + + public PrioritizedRepairPlan(int priority, List keyspaceRepairPlans) + { + this.priority = priority; + this.keyspaceRepairPlans = keyspaceRepairPlans; + } + + public int getPriority() + { + return priority; + } + + public List getKeyspaceRepairPlans() + { + return keyspaceRepairPlans; + } + + @Override + public boolean equals(Object o) + { + if (o == null || getClass() != o.getClass()) return false; + PrioritizedRepairPlan that = (PrioritizedRepairPlan) o; + return priority == that.priority && Objects.equals(keyspaceRepairPlans, that.keyspaceRepairPlans); + } + + @Override + public int hashCode() + { + return Objects.hash(priority, keyspaceRepairPlans); + } + + @Override + public String toString() + { + return "PrioritizedRepairPlan{" + + "priority=" + priority + + ", keyspaceRepairPlans=" + keyspaceRepairPlans + + '}'; + } + + /** + * Builds a list of {@link PrioritizedRepairPlan}s for the given keyspace and table map, ordered by priority from + * highest to lowest, where priority is derived from table schema's defined priority for the given repair type. + *

      + * If a keyspace has tables with differing priorities, those tables will be included in the PrioritizedRepairPlan + * for their given priority. + * + * @param keyspacesToTableNames A mapping keyspace to table names + * @param repairType The repair type that is being executed + * @param orderFunc A function to order keyspace and tables in the returned plan. + * @return Ordered list of plan's by table priorities. + */ + public static List build(Map> keyspacesToTableNames, AutoRepairConfig.RepairType repairType, Consumer> orderFunc) + { + // Build a map of priority -> (keyspace -> tables) + Map>> plans = new HashMap<>(); + for (Map.Entry> keyspaceToTableNames : keyspacesToTableNames.entrySet()) + { + String keyspaceName = keyspaceToTableNames.getKey(); + for (String tableName : keyspaceToTableNames.getValue()) + { + int priority = getPriority(repairType, keyspaceName, tableName); + Map> keyspacesForPriority = plans.computeIfAbsent(priority, p -> new HashMap<>()); + List tableNamesAtPriority = keyspacesForPriority.computeIfAbsent(keyspaceName, k -> new ArrayList<>()); + tableNamesAtPriority.add(tableName); + } + } + + // Extract map into a List ordered by priority from highest to lowest. + List planList = new ArrayList<>(plans.size()); + TreeSet priorities = new TreeSet<>(Comparator.reverseOrder()); + priorities.addAll(plans.keySet()); + for (int priority : priorities) + { + Map> keyspacesAndTables = plans.get(priority); + List keyspaceRepairPlans = new ArrayList<>(keyspacesAndTables.size()); + planList.add(new PrioritizedRepairPlan(priority, keyspaceRepairPlans)); + + // Order keyspace and table names based on the input function (typically, this would shuffle the keyspace + // and table names randomly). + List keyspaceNames = new ArrayList<>(keyspacesAndTables.keySet()); + orderFunc.accept(keyspaceNames); + + for(String keyspaceName : keyspaceNames) + { + List tableNames = keyspacesAndTables.get(keyspaceName); + orderFunc.accept(tableNames); + KeyspaceRepairPlan keyspaceRepairPlan = new KeyspaceRepairPlan(keyspaceName, new ArrayList<>(tableNames)); + keyspaceRepairPlans.add(keyspaceRepairPlan); + } + } + + return planList; + } + + /** + * Convenience method to build a repair plan for a single keyspace with tables. Primarily useful in testing. + * @param keyspaceName Keyspace to repair + * @param tableNames tables to repair for the given keyspace. + * @return Single repair plan. + */ + static List buildSingleKeyspacePlan(AutoRepairConfig.RepairType repairType, String keyspaceName, String ... tableNames) + { + Map> keyspaceMap = new HashMap<>(); + keyspaceMap.put(keyspaceName, Arrays.asList(tableNames)); + return build(keyspaceMap, repairType, (l) -> {}); + } + + /** + * @return The priority of the given table if defined, otherwise 0. + */ + private static int getPriority(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + return cfs != null ? cfs.metadata().params.autoRepair.priority() : 0; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java new file mode 100644 index 000000000000..63f8fbed4426 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignment.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Objects; + +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; + +/** + * Defines a repair assignment to be issued by the autorepair framework. + */ +public class RepairAssignment +{ + final Range tokenRange; + + final String keyspaceName; + + final List tableNames; + + public RepairAssignment(Range tokenRange, String keyspaceName, List tableNames) + { + this.tokenRange = tokenRange; + this.keyspaceName = keyspaceName; + this.tableNames = tableNames; + } + + public Range getTokenRange() + { + return tokenRange; + } + + public String getKeyspaceName() + { + return keyspaceName; + } + + public List getTableNames() + { + return tableNames; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RepairAssignment that = (RepairAssignment) o; + return Objects.equals(tokenRange, that.tokenRange) && Objects.equals(keyspaceName, that.keyspaceName) && Objects.equals(tableNames, that.tableNames); + } + + @Override + public int hashCode() + { + return Objects.hash(tokenRange, keyspaceName, tableNames); + } + + @Override + public String toString() + { + return "RepairAssignment{" + + "tokenRange=" + tokenRange + + ", keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + '}'; + } +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java new file mode 100644 index 000000000000..44d9f5ef5e55 --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairAssignmentIterator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Convenience {@link Iterator} implementation to assist implementations of + * {@link IAutoRepairTokenRangeSplitter#getRepairAssignments(boolean, List)} by passing {@link KeyspaceRepairPlan} + * to a custom {@link #next(int, KeyspaceRepairPlan)} method in priority order. + */ +public abstract class RepairAssignmentIterator implements Iterator +{ + private final Iterator repairPlanIterator; + + private Iterator currentIterator = null; + private PrioritizedRepairPlan currentPlan = null; + + public RepairAssignmentIterator(List repairPlans) + { + this.repairPlanIterator = repairPlans.iterator(); + } + + private synchronized Iterator currentIterator() + { + if (currentIterator == null || !currentIterator.hasNext()) + { + // Advance the repair plan iterator if the current repair plan is exhausted, but only + // if there are more repair plans. + if (repairPlanIterator.hasNext()) + { + currentPlan = repairPlanIterator.next(); + currentIterator = currentPlan.getKeyspaceRepairPlans().iterator(); + } + } + return currentIterator; + } + + @Override + public boolean hasNext() + { + Iterator iterator = currentIterator(); + return (iterator != null && iterator.hasNext()); + } + + @Override + public KeyspaceRepairAssignments next() + { + if (!hasNext()) + { + throw new NoSuchElementException("No remaining repair plans"); + } + + final KeyspaceRepairPlan repairPlan = currentIterator().next(); + return next(currentPlan.getPriority(), repairPlan); + } + + /** + * Invoked by {@link #next()} with the next {@link KeyspaceRepairPlan} for the given priority. + * @param priority current priority being processed. + * @param repairPlan the next keyspace repair plan to process + * @return assignments for the given keyspace at this priority. Should never return null, if one desires to + * short-circuit the iterator, override {@link #hasNext()}. + */ + protected abstract KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan); +} diff --git a/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java new file mode 100644 index 000000000000..20a79adc8e8b --- /dev/null +++ b/src/java/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitter.java @@ -0,0 +1,949 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; + +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.clearspring.analytics.stream.cardinality.CardinalityMergeException; +import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; +import com.clearspring.analytics.stream.cardinality.ICardinality; +import org.apache.cassandra.config.DataStorageSpec; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.lifecycle.SSTableIntervalTree; +import org.apache.cassandra.db.lifecycle.SSTableSet; +import org.apache.cassandra.db.lifecycle.View; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; +import org.apache.cassandra.io.sstable.metadata.MetadataType; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.concurrent.Refs; + +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.split; + +/** + * The default implementation of {@link IAutoRepairTokenRangeSplitter} that attempts to: + *

        + *
      1. Create smaller, consistent repair times
      2. + *
      3. Minimize the impact on hosts
      4. + *
      5. Reduce overstreaming
      6. + *
      7. Reduce number of repairs
      8. + *
      + *

      + * To achieve these goals, this implementation inspects SSTable metadata to estimate the bytes and number of partitions + * within a range and splits it accordingly to bound the size of the token ranges used for repair assignments. + *

      + *

      + * Refer to + * Auto Repair documentation for this implementation + * for a more thorough breakdown of this implementation. + *

      + *

      + * While this splitter has a lot of tuning parameters, the expectation is that the established default configuration + * shall be sensible for all {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}'s. The following + * configuration parameters are offered. + *

      + * + *

      Configuration parameters:

      + *
        + *
      • bytes_per_assignment – Target size (in compressed bytes) for each repair. Throttles incremental repair + * and anticompaction per schedule after incremental repairs are enabled.
      • + * + *
      • max_bytes_per_schedule – Maximum data (in compressed bytes) to cover in a single schedule. Acts as a + * throttle for the repair cycle workload. Tune this up if writes are outpacing repair, or down if repairs are too + * disruptive. Alternatively, adjust {@code min_repair_interval}.
      • + * + *
      • partitions_per_assignment – Maximum number of partitions per repair assignment. Limits the number of + * partitions in Merkle tree leaves to prevent overstreaming.
      • + * + *
      • max_tables_per_assignment – Maximum number of tables to include in a single repair assignment. + * Especially useful for keyspaces with many tables. Prevents excessive batching of tables that exceed other + * parameters like {@code bytes_per_assignment} or {@code partitions_per_assignment}.
      • + *
      + */ +public class RepairTokenRangeSplitter implements IAutoRepairTokenRangeSplitter +{ + private static final Logger logger = LoggerFactory.getLogger(RepairTokenRangeSplitter.class); + + // Default max bytes to 100TiB, which is much more readable than Long.MAX_VALUE + private static final DataStorageSpec.LongBytesBound MAX_BYTES = new DataStorageSpec.LongBytesBound(102_400, DataStorageSpec.DataStorageUnit.GIBIBYTES); + + /** + * The target bytes that should be included in a repair assignment + */ + static final String BYTES_PER_ASSIGNMENT = "bytes_per_assignment"; + + /** + * Maximum number of partitions to include in a repair assignment + */ + static final String PARTITIONS_PER_ASSIGNMENT = "partitions_per_assignment"; + + /** + * Maximum number of tables to include in a repair assignment if {@link AutoRepairConfig.Options#repair_by_keyspace} + * is enabled + */ + static final String MAX_TABLES_PER_ASSIGNMENT = "max_tables_per_assignment"; + + /** + * The maximum number of bytes to cover in an individual schedule + */ + static final String MAX_BYTES_PER_SCHEDULE = "max_bytes_per_schedule"; + + static final List PARAMETERS = Arrays.asList(BYTES_PER_ASSIGNMENT, PARTITIONS_PER_ASSIGNMENT, MAX_TABLES_PER_ASSIGNMENT, MAX_BYTES_PER_SCHEDULE); + + private final AutoRepairConfig.RepairType repairType; + + private final Map givenParameters = new HashMap<>(); + + private DataStorageSpec.LongBytesBound bytesPerAssignment; + private long partitionsPerAssignment; + private int maxTablesPerAssignment; + private DataStorageSpec.LongBytesBound maxBytesPerSchedule; + + /** + * Established default for each {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType}, meant to + * choose sensible defaults for each. + *

      + * Defaults if not specified for the given repair type: + *

    28. + *
        bytes_per_assignment: 50GiB
      + *
        partitions_per_assignment: 1048576 (2^20)
      + *
        max_tables_per_assignment: 64
      + *
        max_bytes_per_schedule: 1000GiB
      + *
    29. + * It's expected that these defaults should work well for everything except incremental, where we set + * max_bytes_per_schedule to 100GiB. This should strike a good balance between the amount of data that will be + * repaired during an initial migration to incremental repair and should move the entire repaired set from + * unrepaired to repaired at steady state, assuming not more the 100GiB of data is written to a node per + * min_repair_interval. + */ + private static final Map DEFAULTS_BY_REPAIR_TYPE = new EnumMap<>(AutoRepairConfig.RepairType.class) + {{ + put(AutoRepairConfig.RepairType.FULL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.FULL) + .build()); + // Restrict incremental repair to 100GiB max bytes per schedule to confine the amount of possible autocompaction. + put(AutoRepairConfig.RepairType.INCREMENTAL, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.INCREMENTAL) + .withMaxBytesPerSchedule(new DataStorageSpec.LongBytesBound("100GiB")) + .build()); + put(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, RepairTypeDefaults.builder(AutoRepairConfig.RepairType.PREVIEW_REPAIRED) + .build()); + }}; + + public RepairTokenRangeSplitter(AutoRepairConfig.RepairType repairType, Map parameters) + { + this.repairType = repairType; + this.givenParameters.putAll(parameters); + + reinitParameters(); + } + + private void reinitParameters() + { + RepairTypeDefaults defaults = DEFAULTS_BY_REPAIR_TYPE.get(repairType); + + DataStorageSpec.LongBytesBound bytesPerAssignmentTmp = getPropertyOrDefault(BYTES_PER_ASSIGNMENT, DataStorageSpec.LongBytesBound::new, defaults.bytesPerAssignment); + DataStorageSpec.LongBytesBound maxBytesPerScheduleTmp = getPropertyOrDefault(MAX_BYTES_PER_SCHEDULE, DataStorageSpec.LongBytesBound::new, defaults.maxBytesPerSchedule); + + // Validate that bytesPerAssignment <= maxBytesPerSchedule + if (bytesPerAssignmentTmp.toBytes() > maxBytesPerScheduleTmp.toBytes()) + { + throw new IllegalArgumentException(String.format("%s='%s' cannot be greater than %s='%s' for %s", + BYTES_PER_ASSIGNMENT, + bytesPerAssignmentTmp, + MAX_BYTES_PER_SCHEDULE, + maxBytesPerScheduleTmp, + repairType.getConfigName())); + } + + bytesPerAssignment = bytesPerAssignmentTmp; + maxBytesPerSchedule = maxBytesPerScheduleTmp; + + partitionsPerAssignment = getPropertyOrDefault(PARTITIONS_PER_ASSIGNMENT, Long::parseLong, defaults.partitionsPerAssignment); + maxTablesPerAssignment = getPropertyOrDefault(MAX_TABLES_PER_ASSIGNMENT, Integer::parseInt, defaults.maxTablesPerAssignment); + + logger.info("Configured {}[{}] with {}={}, {}={}, {}={}, {}={}", RepairTokenRangeSplitter.class.getName(), + repairType.getConfigName(), + BYTES_PER_ASSIGNMENT, bytesPerAssignment, + PARTITIONS_PER_ASSIGNMENT, partitionsPerAssignment, + MAX_TABLES_PER_ASSIGNMENT, maxTablesPerAssignment, + MAX_BYTES_PER_SCHEDULE, maxBytesPerSchedule); + } + + private T getPropertyOrDefault(String propertyName, Function mapper, T defaultValue) + { + return Optional.ofNullable(this.givenParameters.get(propertyName)).map(mapper).orElse(defaultValue); + } + + @Override + public Iterator getRepairAssignments(boolean primaryRangeOnly, List repairPlans) + { + return new BytesBasedRepairAssignmentIterator(primaryRangeOnly, repairPlans); + } + + /** + * A custom {@link RepairAssignmentIterator} that confines the number of repair assignments to + * max_bytes_per_schedule. + */ + private class BytesBasedRepairAssignmentIterator extends RepairAssignmentIterator { + + private final boolean primaryRangeOnly; + private long bytesSoFar = 0; + + BytesBasedRepairAssignmentIterator(boolean primaryRangeOnly, List repairPlans) + { + super(repairPlans); + this.primaryRangeOnly = primaryRangeOnly; + } + + @Override + protected KeyspaceRepairAssignments next(int priority, KeyspaceRepairPlan repairPlan) + { + // short circuit if we've accumulated too many bytes by returning a KeyspaceRepairAssignments with + // no assignments. We do this rather than returning false in hasNext() because we want to signal + // to AutoRepair that a keyspace generated no assignments. + if (bytesSoFar >= maxBytesPerSchedule.toBytes()) + { + return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), Collections.emptyList()); + } + + List> tokenRanges = getTokenRanges(primaryRangeOnly, repairPlan.getKeyspaceName()); + // shuffle token ranges to unbias selection of ranges + Collections.shuffle(tokenRanges); + List repairAssignments = new ArrayList<>(); + // Generate assignments for each range speparately + for (Range tokenRange : tokenRanges) + { + repairAssignments.addAll(getRepairAssignmentsForKeyspace(repairType, repairPlan.getKeyspaceName(), repairPlan.getTableNames(), tokenRange)); + } + + FilteredRepairAssignments filteredRepairAssignments = filterRepairAssignments(priority, repairPlan.getKeyspaceName(), repairAssignments, bytesSoFar); + bytesSoFar = filteredRepairAssignments.newBytesSoFar; + return new KeyspaceRepairAssignments(priority, repairPlan.getKeyspaceName(), filteredRepairAssignments.repairAssignments); + } + } + + @VisibleForTesting + List getRepairAssignmentsForKeyspace(AutoRepairConfig.RepairType repairType, String keyspaceName, List tableNames, Range tokenRange) + { + List repairAssignments = new ArrayList<>(); + // this is used for batching minimal single assignment tables together + List currentAssignments = new ArrayList<>(); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + + // If we can repair by keyspace, sort the tables by size so can batch the smallest ones together + boolean repairByKeyspace = config.getRepairByKeyspace(repairType); + List tablesToProcess = tableNames; + if (repairByKeyspace) + { + tablesToProcess = tableNames.stream().sorted((t1, t2) -> { + ColumnFamilyStore cfs1 = ColumnFamilyStore.getIfExists(keyspaceName, t1); + ColumnFamilyStore cfs2 = ColumnFamilyStore.getIfExists(keyspaceName, t2); + // If for whatever reason the CFS is not retrievable, we can assume it has been deleted, so give the + // other cfs precedence. + if (cfs1 == null) + { + // cfs1 is lesser than because its null + return -1; + } + else if (cfs2 == null) + { + // cfs1 is greather than because cfs2 is null + return 1; + } + return Long.compare(cfs1.metric.totalDiskSpaceUsed.getCount(), cfs2.metric.totalDiskSpaceUsed.getCount()); + }).collect(Collectors.toList()); + } + + for (String tableName : tablesToProcess) + { + List tableAssignments = getRepairAssignmentsForTable(keyspaceName, tableName, tokenRange); + + if (tableAssignments.isEmpty()) + continue; + + // if not repairing by keyspace don't attempt to batch them with others. + if (!repairByKeyspace) + { + repairAssignments.addAll(tableAssignments); + } + // If the table assignments are for the same token range, and we have room to add more tables to the current assignment + else if (tableAssignments.size() == 1 && + currentAssignments.size() < maxTablesPerAssignment && + (currentAssignments.isEmpty() || currentAssignments.get(0).getTokenRange().equals(tableAssignments.get(0).getTokenRange()))) + { + long currentAssignmentsBytes = getEstimatedBytes(currentAssignments); + long tableAssignmentsBytes = getEstimatedBytes(tableAssignments); + // only add assignments together if they don't exceed max bytes per schedule. + if (currentAssignmentsBytes + tableAssignmentsBytes < maxBytesPerSchedule.toBytes()) + { + currentAssignments.addAll(tableAssignments); + } + else + { + // add table assignments by themselves + repairAssignments.addAll(tableAssignments); + } + } + else + { + if (!currentAssignments.isEmpty()) + { + repairAssignments.add(merge(currentAssignments)); + currentAssignments.clear(); + } + repairAssignments.addAll(tableAssignments); + } + } + + if (!currentAssignments.isEmpty()) + repairAssignments.add(merge(currentAssignments)); + + return repairAssignments; + } + + /** + * Given a repair type and map of sized-based repair assignments, confine them by maxBytesPerSchedule. + * @param repairAssignments the assignments to filter. + * @param bytesSoFar repair assignment bytes accumulated so far. + * @return A list of repair assignments confined by maxBytesPerSchedule. + */ + @VisibleForTesting + FilteredRepairAssignments filterRepairAssignments(int priority, String keyspaceName, List repairAssignments, long bytesSoFar) + { + // Confine repair assignments by maxBytesPerSchedule. + long bytesSoFarThisIteration = 0L; + long bytesNotRepaired = 0L; + int assignmentsNotRepaired = 0; + int assignmentsToRepair = 0; + int totalAssignments = 0; + + List assignmentsToReturn = new ArrayList<>(repairAssignments.size()); + for (SizedRepairAssignment repairAssignment : repairAssignments) + { + totalAssignments++; + // skip any repair assignments that would accumulate us past the maxBytesPerSchedule + if (bytesSoFar + repairAssignment.getEstimatedBytes() > maxBytesPerSchedule.toBytes()) + { + // log that repair assignment was skipped. + bytesNotRepaired += repairAssignment.getEstimatedBytes(); + assignmentsNotRepaired++; + logger.warn("Skipping {} because it would increase total repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar + repairAssignment.getEstimatedBytes())); + } + else + { + bytesSoFar += repairAssignment.getEstimatedBytes(); + bytesSoFarThisIteration += repairAssignment.getEstimatedBytes(); + assignmentsToRepair++; + logger.info("Adding {}, increasing repair bytes to {}", + repairAssignment, + getBytesOfMaxBytesPerSchedule(bytesSoFar)); + assignmentsToReturn.add(repairAssignment); + } + } + + String message = "Returning {} assignment(s) for priorityBucket {} and keyspace {}, totaling {} ({} overall)"; + if (assignmentsNotRepaired != 0) + { + message += ". Skipping {} of {} assignment(s), totaling {}"; + if (repairType != AutoRepairConfig.RepairType.INCREMENTAL) + { + message += ". The entire primary range will not be repaired this schedule. " + + "Consider increasing maxBytesPerSchedule, reducing node density or monitoring to ensure " + + "all ranges do get repaired within gc_grace_seconds"; + logger.warn(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, totalAssignments, + FileUtils.stringifyFileSize(bytesNotRepaired)); + } + else + { + logger.info(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar), + assignmentsNotRepaired, totalAssignments, + FileUtils.stringifyFileSize(bytesNotRepaired)); + } + } + else + { + logger.info(message, assignmentsToRepair, priority, keyspaceName, + FileUtils.stringifyFileSize(bytesSoFarThisIteration), + getBytesOfMaxBytesPerSchedule(bytesSoFar)); + } + + return new FilteredRepairAssignments(assignmentsToReturn, bytesSoFar); + } + + @VisibleForTesting + static class FilteredRepairAssignments + { + final List repairAssignments; + final long newBytesSoFar; + + private FilteredRepairAssignments(List repairAssignments, long newBytesSoFar) + { + this.repairAssignments = repairAssignments; + this.newBytesSoFar = newBytesSoFar; + } + } + + private String getBytesOfMaxBytesPerSchedule(long bytes) + { + if (maxBytesPerSchedule.equals(MAX_BYTES)) + return FileUtils.stringifyFileSize(bytes); + else + return String.format("%s of %s", FileUtils.stringifyFileSize(bytes), maxBytesPerSchedule); + } + + /** + * @return The sum of {@link SizedRepairAssignment#getEstimatedBytes()} of all given + * repairAssignments. + * @param repairAssignments The assignments to sum + */ + @VisibleForTesting + protected static long getEstimatedBytes(List repairAssignments) + { + return repairAssignments + .stream() + .mapToLong(SizedRepairAssignment::getEstimatedBytes) + .sum(); + } + + @VisibleForTesting + static SizedRepairAssignment merge(List assignments) + { + if (assignments.isEmpty()) + throw new IllegalStateException("Cannot merge empty assignments"); + + Set mergedTableNames = new HashSet<>(); + Range referenceTokenRange = assignments.get(0).getTokenRange(); + String referenceKeyspaceName = assignments.get(0).getKeyspaceName(); + + for (SizedRepairAssignment assignment : assignments) + { + // These checks _should_ be unnecessary but are here to ensure that the assignments are consistent + if (!assignment.getTokenRange().equals(referenceTokenRange)) + throw new IllegalStateException("All assignments must have the same token range"); + if (!assignment.getKeyspaceName().equals(referenceKeyspaceName)) + throw new IllegalStateException("All assignments must have the same keyspace name"); + + mergedTableNames.addAll(assignment.getTableNames()); + } + + long sizeForAssignment = getEstimatedBytes(assignments); + return new SizedRepairAssignment(referenceTokenRange, referenceKeyspaceName, new ArrayList<>(mergedTableNames), + "full primary range for " + mergedTableNames.size() + " tables", sizeForAssignment); + } + + @VisibleForTesting + protected List getRepairAssignmentsForTable(String keyspaceName, String tableName, Range tokenRange) + { + List sizeEstimates = getRangeSizeEstimate(keyspaceName, tableName, tokenRange); + return getRepairAssignments(sizeEstimates); + } + + private static void logSkippingTable(String keyspaceName, String tableName) + { + logger.warn("Could not resolve table data for {}.{} assuming it has since been deleted, skipping", keyspaceName, tableName); + } + + @VisibleForTesting + protected List getRepairAssignments(List sizeEstimates) + { + List repairAssignments = new ArrayList<>(); + + // since its possible for us to hit maxBytesPerSchedule before seeing all ranges, shuffle so there is chance + // at least of hitting all the ranges _eventually_ for the worst case scenarios + Collections.shuffle(sizeEstimates); + int totalExpectedSubRanges = 0; + for (SizeEstimate estimate : sizeEstimates) + { + if (estimate.sizeForRepair != 0) + { + boolean needsSplitting = estimate.sizeForRepair > bytesPerAssignment.toBytes() || estimate.partitions > partitionsPerAssignment; + if (needsSplitting) + { + totalExpectedSubRanges += calculateNumberOfSplits(estimate); + } + } + } + for (SizeEstimate estimate : sizeEstimates) + { + if (estimate.sizeForRepair == 0) + { + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(estimate.keyspace, estimate.table); + + if (cfs == null) + { + logSkippingTable(estimate.keyspace, estimate.table); + continue; + } + + long memtableSize = cfs.getTracker().getView().getCurrentMemtable().getLiveDataSize(); + if (memtableSize > 0L) + { + logger.debug("Included {}.{} range {}, had no unrepaired SSTables, but memtableSize={}, adding single repair assignment", estimate.keyspace, estimate.table, estimate.tokenRange, memtableSize); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "full primary rangee for table with memtable only detected", memtableSize); + repairAssignments.add(assignment); + } + else + { + logger.debug("Included {}.{} range {}, has no SSTables or memtable data, but adding single repair assignment for entire range in case writes were missed", estimate.keyspace, estimate.table, estimate.tokenRange); + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, Collections.singletonList(estimate.table), "full primary range for table with no data detected", 0L); + repairAssignments.add(assignment); + } + } + else + { + // Check if the estimate needs splitting based on the criteria + boolean needsSplitting = estimate.sizeForRepair > bytesPerAssignment.toBytes() || estimate.partitions > partitionsPerAssignment; + if (needsSplitting) + { + int numberOfSplits = calculateNumberOfSplits(estimate); + long approximateBytesPerSplit = estimate.sizeForRepair / numberOfSplits; + Collection> subranges = split(estimate.tokenRange, numberOfSplits); + for (Range subrange : subranges) + { + SizedRepairAssignment assignment = new SizedRepairAssignment(subrange, estimate.keyspace, Collections.singletonList(estimate.table), + String.format("subrange %d of %d", repairAssignments.size()+1, totalExpectedSubRanges), + approximateBytesPerSplit); + repairAssignments.add(assignment); + } + } + else + { + // No splitting needed, repair the entire range as-is + SizedRepairAssignment assignment = new SizedRepairAssignment(estimate.tokenRange, estimate.keyspace, + Collections.singletonList(estimate.table), + "full primary range for table", estimate.sizeForRepair); + repairAssignments.add(assignment); + } + } + } + return repairAssignments; + } + + private int calculateNumberOfSplits(SizeEstimate estimate) + { + // Calculate the number of splits needed for size and partitions + int splitsForSize = (int) Math.ceil((double) estimate.sizeForRepair / bytesPerAssignment.toBytes()); + int splitsForPartitions = (int) Math.ceil((double) estimate.partitions / partitionsPerAssignment); + + // Split the token range into subranges based on whichever (partitions, bytes) would generate the most splits. + boolean splitBySize = splitsForSize > splitsForPartitions; + int splits = splitBySize ? splitsForSize : splitsForPartitions; + + // calculate approximation for logging purposes + long approximateBytesPerSplit = estimate.sizeForRepair / splits; + long approximatePartitionsPerSplit = estimate.partitions / splits; + + logger.info("Splitting {}.{} for range {} into {} sub ranges by {} (splitsForSize={}, splitsForPartitions={}, " + + "approximateBytesInRange={}, approximatePartitionsInRange={}, " + + "approximateBytesPerSplit={}, approximatePartitionsPerSplit={})", + estimate.keyspace, estimate.table, estimate.tokenRange, + splits, splitBySize ? "size" : "partitions", + splitsForSize, splitsForPartitions, + FileUtils.stringifyFileSize(estimate.sizeForRepair), estimate.partitions, + FileUtils.stringifyFileSize(approximateBytesPerSplit), approximatePartitionsPerSplit + ); + return splits; + } + + private List> getTokenRanges(boolean primaryRangeOnly, String keyspaceName) + { + // Collect all applicable token ranges + Collection> wrappedRanges; + if (primaryRangeOnly) + { + wrappedRanges = TokenRingUtils.getPrimaryRangesForEndpoint(keyspaceName, FBUtilities.getBroadcastAddressAndPort()); + } + else + { + wrappedRanges = StorageService.instance.getLocalRanges(keyspaceName); + } + + // Unwrap each range as we need to account for ranges that overlap the ring + List> ranges = new ArrayList<>(); + for (Range wrappedRange : wrappedRanges) + { + ranges.addAll(wrappedRange.unwrap()); + } + return ranges; + } + + private List getRangeSizeEstimate(String keyspace, String table, Range tokenRange) + { + List sizeEstimates = new ArrayList<>(); + logger.debug("Calculating size estimate for {}.{} for range {}", keyspace, table, tokenRange); + try (Refs refs = getSSTableReaderRefs(repairType, keyspace, table, tokenRange)) + { + SizeEstimate estimate = getSizesForRangeOfSSTables(repairType, keyspace, table, tokenRange, refs); + logger.debug("Generated size estimate {}", estimate); + sizeEstimates.add(estimate); + } + return sizeEstimates; + } + + @VisibleForTesting + static SizeEstimate getSizesForRangeOfSSTables(AutoRepairConfig.RepairType repairType, String keyspace, String table, Range tokenRange, Refs refs) + { + List> singletonRange = Collections.singletonList(tokenRange); + ICardinality cardinality = new HyperLogLogPlus(13, 25); + long approxBytesInRange = 0L; + long totalBytes = 0L; + + for (SSTableReader reader : refs) + { + try + { + if (reader.openReason == SSTableReader.OpenReason.EARLY) + continue; + CompactionMetadata metadata = (CompactionMetadata) reader.descriptor.getMetadataSerializer().deserialize(reader.descriptor, MetadataType.COMPACTION); + if (metadata != null) + cardinality = cardinality.merge(metadata.cardinalityEstimator); + + // use onDiskLength, which is the actual size of the SSTable data file. + long sstableSize = reader.onDiskLength(); + totalBytes += sstableSize; + + // get the on disk size for the token range, note for compressed data this includes the full + // chunks the start and end ranges are found in. + long approximateRangeBytesInSSTable = reader.onDiskSizeForPartitionPositions(reader.getPositionsForRanges(singletonRange)); + approxBytesInRange += Math.min(approximateRangeBytesInSSTable, sstableSize); + } + catch (IOException | CardinalityMergeException e) + { + logger.error("Error calculating size estimate for {}.{} for range {} on {}", keyspace, table, tokenRange, reader, e); + } + } + + long partitions = 0L; + if (totalBytes > 0) + { + // use the ratio from size to estimate the partitions in the range as well + double ratio = approxBytesInRange / (double) totalBytes; + partitions = (long) Math.max(1, Math.ceil(cardinality.cardinality() * ratio)); + } + return new SizeEstimate(repairType, keyspace, table, tokenRange, partitions, approxBytesInRange, totalBytes); + } + + @VisibleForTesting + static Refs getSSTableReaderRefs(AutoRepairConfig.RepairType repairType, String keyspaceName, String tableName, Range tokenRange) + { + final ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(keyspaceName, tableName); + if (cfs == null) + { + logSkippingTable(keyspaceName, tableName); + return Refs.ref(Collections.emptyList()); + } + + Refs refs = null; + while (refs == null) + { + Iterable sstables = cfs.getTracker().getView().select(SSTableSet.CANONICAL); + SSTableIntervalTree tree = SSTableIntervalTree.buildSSTableIntervalTree(ImmutableList.copyOf(sstables)); + Range r = Range.makeRowRange(tokenRange); + List canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + canonicalSSTables = canonicalSSTables.stream().filter((sstable) -> !sstable.isRepaired()).collect(Collectors.toList()); + } + refs = Refs.tryRef(canonicalSSTables); + } + return refs; + } + + @Override + public void setParameter(String key, String value) + { + if (!PARAMETERS.contains(key)) + { + throw new IllegalArgumentException("Unexpected parameter '" + key + "', must be one of " + PARAMETERS); + } + + logger.info("Setting {} to {} for repair type {}", key, value, repairType); + givenParameters.put(key, value); + reinitParameters(); + } + + @Override + public Map getParameters() + { + final Map parameters = new LinkedHashMap<>(); + for (String parameter : PARAMETERS) + { + // Use the parameter as provided if present. + if (givenParameters.containsKey(parameter)) + { + parameters.put(parameter, givenParameters.get(parameter)); + continue; + } + + switch (parameter) + { + case BYTES_PER_ASSIGNMENT: + parameters.put(parameter, bytesPerAssignment.toString()); + continue; + case PARTITIONS_PER_ASSIGNMENT: + parameters.put(parameter, Long.toString(partitionsPerAssignment)); + continue; + case MAX_TABLES_PER_ASSIGNMENT: + parameters.put(parameter, Integer.toString(maxTablesPerAssignment)); + continue; + case MAX_BYTES_PER_SCHEDULE: + parameters.put(parameter, maxBytesPerSchedule.toString()); + continue; + default: + // not expected + parameters.put(parameter, ""); + } + } + return Collections.unmodifiableMap(parameters); + } + + /** + * Represents a size estimate by both bytes and partition count for a given keyspace and table for a token range. + */ + @VisibleForTesting + protected static class SizeEstimate + { + public final AutoRepairConfig.RepairType repairType; + public final String keyspace; + public final String table; + public final Range tokenRange; + public final long partitions; + public final long sizeInRange; + public final long totalSize; + /** + * Size to consider in the repair. For incremental repair, we want to consider the total size + * of the estimate as we have to factor in anticompacting the entire SSTable. + * For full repair, just use the size containing the range. + */ + public final long sizeForRepair; + + public SizeEstimate(AutoRepairConfig.RepairType repairType, + String keyspace, String table, Range tokenRange, + long partitions, long sizeInRange, long totalSize) + { + this.repairType = repairType; + this.keyspace = keyspace; + this.table = table; + this.tokenRange = tokenRange; + this.partitions = partitions; + this.sizeInRange = sizeInRange; + this.totalSize = totalSize; + + this.sizeForRepair = repairType == AutoRepairConfig.RepairType.INCREMENTAL ? totalSize : sizeInRange; + } + + @Override + public String toString() + { + return "SizeEstimate{" + + "repairType=" + repairType + + ", keyspace='" + keyspace + '\'' + + ", table='" + table + '\'' + + ", tokenRange=" + tokenRange + + ", partitions=" + partitions + + ", sizeInRange=" + sizeInRange + + ", totalSize=" + totalSize + + ", sizeForRepair=" + sizeForRepair + + '}'; + } + } + + /** + * Implementation of RepairAssignment that also assigns an estimation of bytes involved + * in the repair. + */ + @VisibleForTesting + protected static class SizedRepairAssignment extends RepairAssignment { + + final String description; + final long estimatedBytes; + + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames) + { + this(tokenRange, keyspaceName, tableNames, "", 0L); + } + + public SizedRepairAssignment(Range tokenRange, String keyspaceName, List tableNames, + String description, + long estimatedBytes) + { + super(tokenRange, keyspaceName, tableNames); + this.description = description; + this.estimatedBytes = estimatedBytes; + } + + /** + * @return Additional metadata about the repair assignment. + */ + public String getDescription() + { + return description; + } + + /** + * Estimated bytes involved in the assignment. Typically Derived from {@link SizeEstimate#sizeForRepair}. + * @return estimated bytes involved in the assignment. + */ + public long getEstimatedBytes() + { + return estimatedBytes; + } + + @Override + public boolean equals(Object o) + { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + SizedRepairAssignment that = (SizedRepairAssignment) o; + return estimatedBytes == that.estimatedBytes && Objects.equals(description, that.description); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), description, estimatedBytes); + } + + @Override + public String toString() + { + return "SizedRepairAssignment{" + + "description='" + description + '\'' + + ", tokenRange=" + tokenRange + + ", keyspaceName='" + keyspaceName + '\'' + + ", tableNames=" + tableNames + + ", estimatedBytes=" + FileUtils.stringifyFileSize(estimatedBytes) + + '}'; + } + } + + /** + * Conveinence builder for establishing defaults by repair type. + */ + protected static class RepairTypeDefaults + { + final AutoRepairConfig.RepairType repairType; + final DataStorageSpec.LongBytesBound bytesPerAssignment; + final long partitionsPerAssignment; + final int maxTablesPerAssignment; + final DataStorageSpec.LongBytesBound maxBytesPerSchedule; + + public RepairTypeDefaults(AutoRepairConfig.RepairType repairType, + DataStorageSpec.LongBytesBound bytesPerAssignment, + long partitionsPerAssignment, + int maxTablesPerAssignment, + DataStorageSpec.LongBytesBound maxBytesPerSchedule) + { + this.repairType = repairType; + this.bytesPerAssignment = bytesPerAssignment; + this.partitionsPerAssignment = partitionsPerAssignment; + this.maxTablesPerAssignment = maxTablesPerAssignment; + this.maxBytesPerSchedule = maxBytesPerSchedule; + } + + static RepairTypeDefaultsBuilder builder(AutoRepairConfig.RepairType repairType) + { + return new RepairTypeDefaultsBuilder(repairType); + } + + static class RepairTypeDefaultsBuilder + { + private final AutoRepairConfig.RepairType repairType; + private DataStorageSpec.LongBytesBound bytesPerAssignment = new DataStorageSpec.LongBytesBound("50GiB"); + // Aims to target at most 1 partitions per leaf assuming a merkle tree of depth 20 (2^20 = 1,048,576) + private long partitionsPerAssignment = 1_048_576; + private int maxTablesPerAssignment = 64; + private DataStorageSpec.LongBytesBound maxBytesPerSchedule = MAX_BYTES; + + private RepairTypeDefaultsBuilder(AutoRepairConfig.RepairType repairType) + { + this.repairType = repairType; + } + + @SuppressWarnings("unused") + public RepairTypeDefaultsBuilder withBytesPerAssignment(DataStorageSpec.LongBytesBound bytesPerAssignment) + { + this.bytesPerAssignment = bytesPerAssignment; + return this; + } + + @SuppressWarnings("unused") + public RepairTypeDefaultsBuilder withPartitionsPerAssignment(long partitionsPerAssignment) + { + this.partitionsPerAssignment = partitionsPerAssignment; + return this; + } + + @SuppressWarnings("unused") + public RepairTypeDefaultsBuilder withMaxTablesPerAssignment(int maxTablesPerAssignment) + { + this.maxTablesPerAssignment = maxTablesPerAssignment; + return this; + } + + public RepairTypeDefaultsBuilder withMaxBytesPerSchedule(DataStorageSpec.LongBytesBound maxBytesPerSchedule) + { + this.maxBytesPerSchedule = maxBytesPerSchedule; + return this; + } + + public RepairTokenRangeSplitter.RepairTypeDefaults build() + { + return new RepairTypeDefaults(repairType, bytesPerAssignment, partitionsPerAssignment, maxTablesPerAssignment, maxBytesPerSchedule); + } + } + } +} diff --git a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java index 855ad4bad344..820c6b011ba6 100644 --- a/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java +++ b/src/java/org/apache/cassandra/repair/consistent/SyncStatSummary.java @@ -81,7 +81,7 @@ public String toString() } } - private static class Table + public static class Table { final String keyspace; @@ -94,7 +94,7 @@ private static class Table final Map, Session> sessions = new HashMap<>(); - Table(String keyspace, String table) + public Table(String keyspace, String table) { this.keyspace = keyspace; this.table = table; @@ -138,7 +138,7 @@ void calculateTotals() totalsCalculated = true; } - boolean isCounter() + public boolean isCounter() { TableMetadata tmd = Schema.instance.getTableMetadata(keyspace, table); return tmd != null && tmd.isCounter(); @@ -174,6 +174,16 @@ public String toString() } return output.toString(); } + + public long getBytes() + { + return this.bytes; + } + + public long getRanges() + { + return this.ranges.size(); + } } private final Map, Table> summaries = new HashMap<>(); @@ -233,6 +243,12 @@ private void calculateTotals() totalsCalculated = true; } + public Map, Table> getTotals() + { + calculateTotals(); + return summaries; + } + public String toString() { List> tables = Lists.newArrayList(summaries.keySet()); diff --git a/src/java/org/apache/cassandra/schema/AutoRepairParams.java b/src/java/org/apache/cassandra/schema/AutoRepairParams.java new file mode 100644 index 000000000000..1fe80f766f3e --- /dev/null +++ b/src/java/org/apache/cassandra/schema/AutoRepairParams.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.schema; + +import java.util.Arrays; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang3.StringUtils; + +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.utils.LocalizeString; + +import static java.lang.String.format; +import static org.apache.cassandra.utils.LocalizeString.toLowerCaseLocalized; + +/** + * AutoRepair table parameters - used to define the auto-repair configuration for a table. + */ +public final class AutoRepairParams +{ + public enum Option + { + FULL_ENABLED, + INCREMENTAL_ENABLED, + PREVIEW_REPAIRED_ENABLED, + PRIORITY; + + @Override + public String toString() + { + return toLowerCaseLocalized(name()); + } + } + + private final ImmutableMap options; + + public static final Map DEFAULT_OPTIONS = ImmutableMap.of( + LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.name()), Boolean.toString(true), + LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.name()), Boolean.toString(true), + LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.name()), Boolean.toString(true), + Option.PRIORITY.toString(), "0" + ); + + AutoRepairParams(Map options) + { + this.options = ImmutableMap.copyOf(options); + } + + public static final AutoRepairParams DEFAULT = + new AutoRepairParams(DEFAULT_OPTIONS); + + public static AutoRepairParams create(Map options) + { + Map optionsMap = new TreeMap<>(DEFAULT_OPTIONS); + if (options != null) + { + for (Map.Entry entry : options.entrySet()) + { + if (Arrays.stream(Option.values()).noneMatch(option -> option.toString().equalsIgnoreCase(entry.getKey()))) + { + throw new ConfigurationException(format("Unknown property '%s'", entry.getKey())); + } + optionsMap.put(entry.getKey(), entry.getValue()); + } + } + return new AutoRepairParams(optionsMap); + } + + public boolean repairEnabled(AutoRepairConfig.RepairType type) + { + String option = LocalizeString.toLowerCaseLocalized(type.toString()) + "_enabled"; + String enabled = options.getOrDefault(option, DEFAULT_OPTIONS.get(option)); + return Boolean.parseBoolean(enabled); + } + + public int priority() + { + String priority = options.getOrDefault(Option.PRIORITY.toString(), DEFAULT_OPTIONS.get(Option.PRIORITY.toString())); + return Integer.parseInt(priority); + } + + public void validate() + { + for (Option option : Option.values()) + { + if (!options.containsKey(LocalizeString.toLowerCaseLocalized(option.toString()))) + { + throw new ConfigurationException(format("Missing repair sub-option '%s'", option)); + } + } + if (options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + options.get(LocalizeString.toLowerCaseLocalized(Option.FULL_ENABLED.toString())), + Option.FULL_ENABLED)); + } + if (options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + options.get(LocalizeString.toLowerCaseLocalized(Option.INCREMENTAL_ENABLED.toString())), + Option.INCREMENTAL_ENABLED)); + } + if (options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())) != null && !isValidBoolean(options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be a boolean", + options.get(LocalizeString.toLowerCaseLocalized(Option.PREVIEW_REPAIRED_ENABLED.toString())), + Option.PREVIEW_REPAIRED_ENABLED)); + } + if (options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())) != null && !isValidInt(options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())))) + { + throw new ConfigurationException(format("Invalid value %s for '%s' repair sub-option - must be an integer", + options.get(LocalizeString.toLowerCaseLocalized(Option.PRIORITY.toString())), + Option.PRIORITY)); + } + } + + public static boolean isValidBoolean(String value) + { + return StringUtils.equalsIgnoreCase(value, "true") || StringUtils.equalsIgnoreCase(value, "false"); + } + + public static boolean isValidInt(String value) + { + return StringUtils.isNumeric(value); + } + + public Map options() + { + return options; + } + + public static AutoRepairParams fromMap(Map map) + { + return create(map); + } + + public Map asMap() + { + return options; + } + + @Override + public String toString() + { + return MoreObjects.toStringHelper(this) + .add("options", options) + .toString(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) + return true; + + if (!(o instanceof AutoRepairParams)) + return false; + + AutoRepairParams cp = (AutoRepairParams) o; + + return options.equals(cp.options); + } + + @Override + public int hashCode() + { + return Objects.hash(options); + } +} diff --git a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java index 393e0ba75e5c..9001480c2494 100644 --- a/src/java/org/apache/cassandra/schema/SchemaKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SchemaKeyspace.java @@ -132,6 +132,7 @@ private SchemaKeyspace() + "cdc boolean," + "read_repair text," + "fast_path frozen>," + + "auto_repair frozen>," + "PRIMARY KEY ((keyspace_name), table_name))"); private static final TableMetadata Columns = @@ -216,6 +217,7 @@ private SchemaKeyspace() + "additional_write_policy text," + "cdc boolean," + "read_repair text," + + "auto_repair frozen>," + "PRIMARY KEY ((keyspace_name), view_name))"); private static final TableMetadata Indexes = @@ -574,7 +576,7 @@ private static void addTableToSchemaMutation(TableMetadata table, boolean withCo } } - private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder, boolean forView) + public static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBuilder builder, boolean forView) { builder.add("bloom_filter_fp_chance", params.bloomFilterFpChance) .add("comment", params.comment) @@ -616,6 +618,14 @@ private static void addTableParamsToRowBuilder(TableParams params, Row.SimpleBui if (DatabaseDescriptor.getAccordTransactionsEnabled() && !forView) builder.add("fast_path", params.fastPath.asMap()); + + // As above, only add the auto_repair column if the scheduler is enabled + // to avoid RTE in pre-5.1 versioned node during upgrades + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.add("auto_repair", params.autoRepair.asMap()); + } } private static void addAlterTableToSchemaMutation(TableMetadata oldTable, TableMetadata newTable, Mutation.SimpleBuilder builder) @@ -1091,6 +1101,12 @@ static TableParams createTableParamsFromRow(UntypedResultSet.Row row) if (row.has("incremental_backups")) builder.incrementalBackups(row.getBoolean("incremental_backups")); + // auto_repair column was introduced in 5.1 + if (row.has("auto_repair")) + { + builder.automatedRepair(AutoRepairParams.fromMap(row.getFrozenTextMap("auto_repair"))); + } + return builder.build(); } diff --git a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java index 2ec600a13d97..d50621a3a15c 100644 --- a/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java +++ b/src/java/org/apache/cassandra/schema/SystemDistributedKeyspace.java @@ -56,6 +56,7 @@ import org.apache.cassandra.utils.TimeUUID; import static java.lang.String.format; + import static org.apache.cassandra.utils.ByteBufferUtil.bytes; public final class SystemDistributedKeyspace @@ -81,10 +82,11 @@ private SystemDistributedKeyspace() * gen 4: compression chunk length reduced to 16KiB, memtable_flush_period_in_ms now unset on all tables in 4.0 * gen 5: add ttl and TWCS to repair_history tables * gen 6: add denylist table + * gen 7: add auto_repair_history and auto_repair_priority tables for AutoRepair feature * * // TODO: TCM - how do we evolve these tables? */ - public static final long GENERATION = 6; + public static final long GENERATION = 7; public static final String REPAIR_HISTORY = "repair_history"; @@ -94,7 +96,11 @@ private SystemDistributedKeyspace() public static final String PARTITION_DENYLIST_TABLE = "partition_denylist"; - public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE); + public static final String AUTO_REPAIR_HISTORY = "auto_repair_history"; + + public static final String AUTO_REPAIR_PRIORITY = "auto_repair_priority"; + + public static final Set TABLE_NAMES = ImmutableSet.of(REPAIR_HISTORY, PARENT_REPAIR_HISTORY, VIEW_BUILD_STATUS, PARTITION_DENYLIST_TABLE, AUTO_REPAIR_HISTORY, AUTO_REPAIR_PRIORITY); public static final String REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + "keyspace_name text," @@ -157,6 +163,28 @@ private SystemDistributedKeyspace() private static final TableMetadata PartitionDenylistTable = parse(PARTITION_DENYLIST_TABLE, "Partition keys which have been denied access", PARTITION_DENYLIST_CQL).build(); + public static final String AUTO_REPAIR_HISTORY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "host_id uuid," + + "repair_type text," + + "repair_turn text," + + "repair_start_ts timestamp," + + "repair_finish_ts timestamp," + + "delete_hosts set," + + "delete_hosts_update_time timestamp," + + "force_repair boolean," + + "PRIMARY KEY (repair_type, host_id))"; + + private static final TableMetadata AutoRepairHistoryTable = + parse(AUTO_REPAIR_HISTORY, "Auto repair history for each node", AUTO_REPAIR_HISTORY_CQL).build(); + + public static final String AUTO_REPAIR_PRIORITY_CQL = "CREATE TABLE IF NOT EXISTS %s (" + + "repair_type text," + + "repair_priority set," + + "PRIMARY KEY (repair_type))"; + + private static final TableMetadata AutoRepairPriorityTable = + parse(AUTO_REPAIR_PRIORITY, "Auto repair priority for each group", AUTO_REPAIR_PRIORITY_CQL).build(); + private static TableMetadata.Builder parse(String table, String description, String cql) { return CreateTableStatement.parse(format(cql, table), SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) @@ -169,7 +197,7 @@ public static KeyspaceMetadata metadata() { return KeyspaceMetadata.create(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, KeyspaceParams.simple(Math.max(DEFAULT_RF, DatabaseDescriptor.getDefaultKeyspaceRF())), - Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable)); + Tables.of(RepairHistory, ParentRepairHistory, ViewBuildStatus, PartitionDenylistTable, AutoRepairHistoryTable, AutoRepairPriorityTable)); } public static void startParentRepair(TimeUUID parent_id, String keyspaceName, String[] cfnames, RepairOption options) diff --git a/src/java/org/apache/cassandra/schema/TableParams.java b/src/java/org/apache/cassandra/schema/TableParams.java index 7c3e52943c8c..e44af5ca5613 100644 --- a/src/java/org/apache/cassandra/schema/TableParams.java +++ b/src/java/org/apache/cassandra/schema/TableParams.java @@ -28,6 +28,7 @@ import com.google.common.collect.Maps; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.Attributes; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.exceptions.ConfigurationException; @@ -98,7 +99,8 @@ public enum Option FAST_PATH, TRANSACTIONAL_MODE, TRANSACTIONAL_MIGRATION_FROM, - PENDING_DROP; + PENDING_DROP, + AUTO_REPAIR; @Override public String toString() @@ -131,6 +133,8 @@ public String toString() public final TransactionalMigrationFromMode transactionalMigrationFrom; public final boolean pendingDrop; + public final AutoRepairParams autoRepair; + private TableParams(Builder builder) { comment = builder.comment; @@ -159,6 +163,7 @@ private TableParams(Builder builder) transactionalMigrationFrom = builder.transactionalMigrationFrom; pendingDrop = builder.pendingDrop; checkNotNull(transactionalMigrationFrom); + autoRepair = builder.autoRepair; } public static Builder builder() @@ -190,7 +195,8 @@ public static Builder builder(TableParams params) .fastPath(params.fastPath) .transactionalMode(params.transactionalMode) .transactionalMigrationFrom(params.transactionalMigrationFrom) - .pendingDrop(params.pendingDrop); + .pendingDrop(params.pendingDrop) + .automatedRepair(params.autoRepair); } public Builder unbuild() @@ -204,7 +210,7 @@ public void validate() compression.validate(); double minBloomFilterFpChanceValue = BloomCalculations.minSupportedBloomFilterFpChance(); - if (bloomFilterFpChance <= minBloomFilterFpChanceValue || bloomFilterFpChance > 1) + if (bloomFilterFpChance <= minBloomFilterFpChanceValue || bloomFilterFpChance > 1) { fail("%s must be larger than %s and less than or equal to 1.0 (got %s)", BLOOM_FILTER_FP_CHANCE, @@ -248,6 +254,8 @@ public void validate() if (transactionalMode.isTestMode() && !CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getBoolean()) fail("Transactional mode " + transactionalMode + " can't be used if " + CassandraRelevantProperties.ACCORD_ALLOW_TEST_MODES.getKey() + " is not set"); + + autoRepair.validate(); } private static void fail(String format, Object... args) @@ -288,7 +296,8 @@ public boolean equals(Object o) && fastPath.equals(fastPath) && transactionalMode == p.transactionalMode && transactionalMigrationFrom == p.transactionalMigrationFrom - && pendingDrop == p.pendingDrop; + && pendingDrop == p.pendingDrop + && autoRepair.equals(p.autoRepair); } @Override @@ -316,7 +325,8 @@ public int hashCode() fastPath, transactionalMode, transactionalMigrationFrom, - pendingDrop); + pendingDrop, + autoRepair); } @Override @@ -347,6 +357,7 @@ public String toString() .add(Option.TRANSACTIONAL_MODE.toString(), transactionalMode) .add(Option.TRANSACTIONAL_MIGRATION_FROM.toString(), transactionalMigrationFrom) .add(PENDING_DROP.toString(), pendingDrop) + .add(Option.AUTO_REPAIR.toString(), autoRepair) .toString(); } @@ -408,6 +419,12 @@ public void appendCqlTo(CqlBuilder builder, boolean isView) } builder.append("AND speculative_retry = ").appendWithSingleQuotes(speculativeRetry.toString()); + if (DatabaseDescriptor.getRawConfig() != null + && DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + builder.newLine() + .append("AND auto_repair = ").append(autoRepair.asMap()); + } } public static final class Builder @@ -436,6 +453,7 @@ public static final class Builder public TransactionalMigrationFromMode transactionalMigrationFrom = TransactionalMigrationFromMode.none; public boolean pendingDrop = false; + private AutoRepairParams autoRepair = AutoRepairParams.DEFAULT; public Builder() { } @@ -582,6 +600,12 @@ public Builder pendingDrop(boolean pendingDrop) this.pendingDrop = pendingDrop; return this; } + + public Builder automatedRepair(AutoRepairParams val) + { + autoRepair = val; + return this; + } } public static class Serializer implements MetadataSerializer diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index e507f129d4d2..bee41a7fa3a3 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -104,6 +104,7 @@ import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.service.paxos.PaxosRepair; import org.apache.cassandra.service.paxos.cleanup.PaxosCleanup; import org.apache.cassandra.service.snapshot.SnapshotManager; @@ -669,6 +670,9 @@ public boolean verifyCompactionsPendingThreshold(TimeUUID parentRepairSession, P public Future prepareForRepair(TimeUUID parentRepairSession, InetAddressAndPort coordinator, Set endpoints, RepairOption options, boolean isForcedRepair, List columnFamilyStores) { + if (!verifyDiskHeadroomThreshold(parentRepairSession, options.getPreviewKind(), options.isIncremental())) + failRepair(parentRepairSession, "Rejecting incoming repair, disk usage above threshold"); // failRepair throws exception + if (!verifyCompactionsPendingThreshold(parentRepairSession, options.getPreviewKind())) failRepair(parentRepairSession, "Rejecting incoming repair, pending compactions above threshold"); // failRepair throws exception @@ -726,6 +730,24 @@ public Future prepareForRepair(TimeUUID parentRepairSession, InetAddressAndPo return promise; } + public static boolean verifyDiskHeadroomThreshold(TimeUUID parentRepairSession, PreviewKind previewKind, boolean isIncremental) + { + if (!isIncremental) // disk headroom is required for anti-compaction which is only performed by incremental repair + return true; + + double diskUsage = DiskUsageMonitor.instance.getDiskUsage(); + double rejectRatio = ActiveRepairService.instance().getIncrementalRepairDiskHeadroomRejectRatio(); + + if (diskUsage + rejectRatio > 1) + { + logger.error("[{}] Rejecting incoming repair, disk usage ({}%) above threshold ({}%)", + previewKind.logPrefix(parentRepairSession), String.format("%.2f", diskUsage * 100), String.format("%.2f", (1 - rejectRatio) * 100)); + return false; + } + + return true; + } + private void sendPrepareWithRetries(TimeUUID parentRepairSession, AtomicInteger pending, Set failedNodes, @@ -1086,6 +1108,16 @@ public void setRepairPendingCompactionRejectThreshold(int value) DatabaseDescriptor.setRepairPendingCompactionRejectThreshold(value); } + public double getIncrementalRepairDiskHeadroomRejectRatio() + { + return DatabaseDescriptor.getIncrementalRepairDiskHeadroomRejectRatio(); + } + + public void setIncrementalRepairDiskHeadroomRejectRatio(double value) + { + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(value); + } + /** * Remove any parent repair sessions matching predicate */ diff --git a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java index 851dc6c802bb..c739b048d68f 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairServiceMBean.java @@ -74,4 +74,8 @@ public interface ActiveRepairServiceMBean int parentRepairSessionsCount(); public int getPaxosRepairParallelism(); public void setPaxosRepairParallelism(int v); + + public double getIncrementalRepairDiskHeadroomRejectRatio(); + + public void setIncrementalRepairDiskHeadroomRejectRatio(double value); } diff --git a/src/java/org/apache/cassandra/service/AutoRepairService.java b/src/java/org/apache/cassandra/service/AutoRepairService.java new file mode 100644 index 000000000000..db1b29c38968 --- /dev/null +++ b/src/java/org/apache/cassandra/service/AutoRepairService.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.utils.MBeanWrapper; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; + +/** + * Implement all the MBeans for AutoRepair. + */ +public class AutoRepairService implements AutoRepairServiceMBean +{ + public static final String MBEAN_NAME = "org.apache.cassandra.db:type=AutoRepairService"; + + @VisibleForTesting + protected AutoRepairConfig config; + + public static final AutoRepairService instance = new AutoRepairService(); + + @VisibleForTesting + protected AutoRepairService() + { + } + + public static void setup() + { + instance.config = DatabaseDescriptor.getAutoRepairConfig(); + } + + static + { + MBeanWrapper.instance.registerMBean(instance, MBEAN_NAME); + } + + public void checkCanRun(String repairType) + { + checkCanRun(RepairType.parse(repairType)); + } + + public void checkCanRun(RepairType repairType) + { + if (!config.isAutoRepairSchedulingEnabled()) + throw new ConfigurationException("Auto-repair scheduler is disabled."); + + if (repairType != RepairType.INCREMENTAL) + return; + + if (config.getMaterializedViewRepairEnabled(repairType) && DatabaseDescriptor.isMaterializedViewsOnRepairEnabled()) + throw new ConfigurationException("Cannot run incremental repair while materialized view replay is enabled. Set materialized_views_on_repair_enabled to false."); + + if (DatabaseDescriptor.isCDCEnabled() && DatabaseDescriptor.isCDCOnRepairEnabled()) + throw new ConfigurationException("Cannot run incremental repair while CDC replay is enabled. Set cdc_on_repair_enabled to false."); + } + + public AutoRepairConfig getAutoRepairConfig() + { + return config; + } + + @Override + public boolean isAutoRepairDisabled() + { + return config == null || !config.isAutoRepairSchedulingEnabled(); + } + + @Override + public String getAutoRepairConfiguration() + { + StringBuilder sb = new StringBuilder(); + sb.append("repair scheduler configuration:"); + appendConfig(sb, "repair_check_interval", config.getRepairCheckInterval()); + appendConfig(sb, "repair_task_min_duration", config.getRepairTaskMinDuration()); + appendConfig(sb, "history_clear_delete_hosts_buffer_interval", config.getAutoRepairHistoryClearDeleteHostsBufferInterval()); + for (RepairType repairType : RepairType.values()) + { + sb.append(formatRepairTypeConfig(repairType, config)); + } + return sb.toString(); + } + + @Override + public void setAutoRepairEnabled(String repairType, boolean enabled) + { + checkCanRun(repairType); + config.setAutoRepairEnabled(RepairType.parse(repairType), enabled); + } + + @Override + public void setRepairThreads(String repairType, int repairThreads) + { + config.setRepairThreads(RepairType.parse(repairType), repairThreads); + } + + @Override + public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) + { + Set hosts = InetAddressAndPort.parseHosts(commaSeparatedHostSet, false); + if (!hosts.isEmpty()) + { + AutoRepairUtils.addPriorityHosts(RepairType.parse(repairType), hosts); + } + } + + @Override + public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet) + { + Set hosts = InetAddressAndPort.parseHosts(commaSeparatedHostSet, false); + if (!hosts.isEmpty()) + { + AutoRepairUtils.setForceRepair(RepairType.parse(repairType), hosts); + } + } + + @Override + public void setRepairMinInterval(String repairType, String minRepairInterval) + { + config.setRepairMinInterval(RepairType.parse(repairType), minRepairInterval); + } + + @Override + public void startScheduler() + { + config.startScheduler(); + } + + @Override + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + { + config.setAutoRepairHistoryClearDeleteHostsBufferInterval(duration); + } + + @Override + public void setAutoRepairMinRepairTaskDuration(String duration) + { + config.setRepairTaskMinDuration(duration); + } + + @Override + public void setRepairSSTableCountHigherThreshold(String repairType, int sstableHigherThreshold) + { + config.setRepairSSTableCountHigherThreshold(RepairType.parse(repairType), sstableHigherThreshold); + } + + @Override + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime) + { + config.setAutoRepairTableMaxRepairTime(RepairType.parse(repairType), autoRepairTableMaxRepairTime); + } + + @Override + public void setIgnoreDCs(String repairType, Set ignoreDCs) + { + config.setIgnoreDCs(RepairType.parse(repairType), ignoreDCs); + } + + @Override + public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) + { + config.setRepairPrimaryTokenRangeOnly(RepairType.parse(repairType), primaryTokenRangeOnly); + } + + @Override + public void setParallelRepairPercentage(String repairType, int percentage) + { + config.setParallelRepairPercentage(RepairType.parse(repairType), percentage); + } + + @Override + public void setParallelRepairCount(String repairType, int count) + { + config.setParallelRepairCount(RepairType.parse(repairType), count); + } + + @Override + public void setAllowParallelReplicaRepair(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepair(RepairType.parse(repairType), enabled); + } + + @Override + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + config.setAllowParallelReplicaRepairAcrossSchedules(RepairType.parse(repairType), enabled); + } + + @Override + public void setMVRepairEnabled(String repairType, boolean enabled) + { + config.setMaterializedViewRepairEnabled(RepairType.parse(repairType), enabled); + } + + @Override + public void setRepairSessionTimeout(String repairType, String timeout) + { + config.setRepairSessionTimeout(RepairType.parse(repairType), timeout); + } + + @Override + public Set getOnGoingRepairHostIds(String repairType) + { + List histories = AutoRepairUtils.getAutoRepairHistory(RepairType.parse(repairType)); + if (histories == null) + { + return Collections.emptySet(); + } + Set hostIds = new HashSet<>(); + AutoRepairUtils.CurrentRepairStatus currentRepairStatus = new AutoRepairUtils.CurrentRepairStatus(histories, AutoRepairUtils.getPriorityHostIds(RepairType.parse(repairType)), null); + for (UUID id : currentRepairStatus.hostIdsWithOnGoingRepair) + { + hostIds.add(id.toString()); + } + for (UUID id : currentRepairStatus.hostIdsWithOnGoingForceRepair) + { + hostIds.add(id.toString()); + } + return Collections.unmodifiableSet(hostIds); + } + + @Override + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value) + { + config.getTokenRangeSplitterInstance(RepairType.parse(repairType)).setParameter(key, value); + } + + @Override + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace) + { + config.setRepairByKeyspace(RepairType.parse(repairType), repairByKeyspace); + } + + @Override + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + config.setRepairMaxRetries(RepairType.parse(repairType), retries); + } + + @Override + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + config.setRepairRetryBackoff(RepairType.parse(repairType), interval); + } + + private String formatRepairTypeConfig(RepairType repairType, AutoRepairConfig config) + { + StringBuilder sb = new StringBuilder(); + sb.append("\nconfiguration for repair_type: ").append(repairType.getConfigName()); + sb.append("\n\tenabled: ").append(config.isAutoRepairEnabled(repairType)); + // Only show configuration if enabled + if (config.isAutoRepairEnabled(repairType)) + { + Set priorityHosts = AutoRepairUtils.getPriorityHosts(repairType); + if (!priorityHosts.isEmpty()) + { + appendConfig(sb, "priority_hosts", Joiner.on(',').skipNulls().join(priorityHosts)); + } + + appendConfig(sb, "min_repair_interval", config.getRepairMinInterval(repairType)); + appendConfig(sb, "repair_by_keyspace", config.getRepairByKeyspace(repairType)); + appendConfig(sb, "number_of_repair_threads", config.getRepairThreads(repairType)); + appendConfig(sb, "sstable_upper_threshold", config.getRepairSSTableCountHigherThreshold(repairType)); + appendConfig(sb, "table_max_repair_time", config.getAutoRepairTableMaxRepairTime(repairType)); + appendConfig(sb, "ignore_dcs", config.getIgnoreDCs(repairType)); + appendConfig(sb, "repair_primary_token_range_only", config.getRepairPrimaryTokenRangeOnly(repairType)); + appendConfig(sb, "parallel_repair_count", config.getParallelRepairCount(repairType)); + appendConfig(sb, "parallel_repair_percentage", config.getParallelRepairPercentage(repairType)); + appendConfig(sb, "allow_parallel_replica_repair", config.getAllowParallelReplicaRepair(repairType)); + appendConfig(sb, "allow_parallel_replica_repair_across_schedules", config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + appendConfig(sb, "materialized_view_repair_enabled", config.getMaterializedViewRepairEnabled(repairType)); + appendConfig(sb, "initial_scheduler_delay", config.getInitialSchedulerDelay(repairType)); + appendConfig(sb, "repair_session_timeout", config.getRepairSessionTimeout(repairType)); + appendConfig(sb, "force_repair_new_node", config.getForceRepairNewNode(repairType)); + appendConfig(sb, "repair_max_retries", config.getRepairMaxRetries(repairType)); + appendConfig(sb, "repair_retry_backoff", config.getRepairRetryBackoff(repairType)); + + final ParameterizedClass splitterClass = config.getTokenRangeSplitter(repairType); + final String splitterClassName = splitterClass.class_name != null ? splitterClass.class_name : AutoRepairConfig.DEFAULT_SPLITTER.getName(); + appendConfig(sb, "token_range_splitter", splitterClassName); + Map tokenRangeSplitterParameters = config.getTokenRangeSplitterInstance(repairType).getParameters(); + if (!tokenRangeSplitterParameters.isEmpty()) + { + for (Map.Entry param : tokenRangeSplitterParameters.entrySet()) + { + appendConfig(sb, String.format("token_range_splitter.%s", param.getKey()), param.getValue()); + } + } + } + + return sb.toString(); + } + + private void appendConfig(StringBuilder sb, String config, T value) + { + sb.append(String.format("%s%s: %s", "\n\t", config, value)); + } +} diff --git a/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java new file mode 100644 index 000000000000..181c6008f533 --- /dev/null +++ b/src/java/org/apache/cassandra/service/AutoRepairServiceMBean.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.service; + + +import java.util.Set; + +/** + * Defines all the MBeans exposed for AutoRepair. + */ +public interface AutoRepairServiceMBean +{ + public void setAutoRepairEnabled(String repairType, boolean enabled); + + public void setRepairThreads(String repairType, int repairThreads); + + public void setRepairPriorityForHosts(String repairType, String commaSeparatedHostSet); + + public void setForceRepairForHosts(String repairType, String commaSeparatedHostSet); + + public void setRepairMinInterval(String repairType, String minRepairInterval); + + void startScheduler(); + + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration); + + public void setAutoRepairMinRepairTaskDuration(String duration); + + public void setRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold); + + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime); + + public void setIgnoreDCs(String repairType, Set ignorDCs); + + public void setPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly); + + public void setParallelRepairPercentage(String repairType, int percentage); + + public void setParallelRepairCount(String repairType, int count); + + public void setAllowParallelReplicaRepair(String repairType, boolean enabled); + + public void setAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled); + + public void setMVRepairEnabled(String repairType, boolean enabled); + + public boolean isAutoRepairDisabled(); + + public String getAutoRepairConfiguration(); + + public void setRepairSessionTimeout(String repairType, String timeout); + + public Set getOnGoingRepairHostIds(String repairType); + + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value); + + public void setRepairByKeyspace(String repairType, boolean repairByKeyspace); + + public void setAutoRepairMaxRetriesCount(String repairType, int retries); + + public void setAutoRepairRetryBackoff(String repairType, String interval); +} diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java index 171ec47e1e68..694725c8586d 100644 --- a/src/java/org/apache/cassandra/service/CassandraDaemon.java +++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java @@ -406,6 +406,8 @@ protected void setup() AuditLogManager.instance.initialize(); + StorageService.instance.doAutoRepairSetup(); + // schedule periodic background compaction task submission. this is simply a backstop against compactions stalling // due to scheduling errors or race conditions ScheduledExecutors.optionalTasks.scheduleWithFixedDelay(ColumnFamilyStore.getBackgroundCompactionTaskSubmitter(), 5, 1, TimeUnit.MINUTES); diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index c97b2ec69523..8ef0c7fc8b2b 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -71,6 +71,9 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Uninterruptibles; + +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -250,6 +253,7 @@ import static org.apache.cassandra.index.SecondaryIndexManager.getIndexName; import static org.apache.cassandra.index.SecondaryIndexManager.isIndexColumnFamily; import static org.apache.cassandra.io.util.FileUtils.ONE_MIB; +import static org.apache.cassandra.locator.InetAddressAndPort.stringify; import static org.apache.cassandra.schema.SchemaConstants.isLocalSystemKeyspace; import static org.apache.cassandra.service.ActiveRepairService.ParentRepairStatus; import static org.apache.cassandra.service.ActiveRepairService.repairCommandExecutor; @@ -384,8 +388,12 @@ public RangesAtEndpoint getLocalReplicas(String keyspaceName) public RangesAtEndpoint getReplicas(String keyspaceName, InetAddressAndPort endpoint) { - return Keyspace.open(keyspaceName).getReplicationStrategy() - .getAddressReplicas(ClusterMetadata.current(), endpoint); + return getReplicas(Keyspace.open(keyspaceName).getReplicationStrategy(), endpoint); + } + + public RangesAtEndpoint getReplicas(AbstractReplicationStrategy replicationStrategy, InetAddressAndPort endpoint) + { + return replicationStrategy.getAddressReplicas(ClusterMetadata.current(), endpoint); } public List> getLocalRanges(String ks) @@ -469,12 +477,12 @@ public enum Mode { STARTING, NORMAL, JOINING, JOINING_FAILED, LEAVING, DECOMMISS private volatile int totalCFs, remainingCFs; - private static final AtomicInteger nextRepairCommand = new AtomicInteger(); - private final List lifecycleSubscribers = new CopyOnWriteArrayList<>(); private final String jmxObjectName; + public static final AtomicInteger nextRepairCommand = new AtomicInteger(); + // true when keeping strict consistency while bootstrapping public static final boolean useStrictConsistency = CONSISTENT_RANGE_MOVEMENT.getBoolean(); private boolean joinRing = JOIN_RING.getBoolean(); @@ -1128,6 +1136,17 @@ public void doAuthSetup(boolean async) } } + public void doAutoRepairSetup() + { + AutoRepairService.setup(); + if (DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + logger.info("Enabling auto-repair scheduling"); + AutoRepair.instance.setup(); + logger.info("AutoRepair setup complete!"); + } + } + public boolean isAuthSetupComplete() { return authSetupComplete; @@ -2612,16 +2631,6 @@ public String getSavedCachesLocation() return FileUtils.getCanonicalPath(DatabaseDescriptor.getSavedCachesLocation()); } - private List stringify(Iterable endpoints, boolean withPort) - { - List stringEndpoints = new ArrayList<>(); - for (InetAddressAndPort ep : endpoints) - { - stringEndpoints.add(ep.getHostAddress(withPort)); - } - return stringEndpoints; - } - public int getCurrentGenerationNumber() { return Gossiper.instance.getCurrentGenerationNumber(getBroadcastAddressAndPort()); @@ -5647,9 +5656,49 @@ public void alterTopology(String changes) AlterTopology transform = new AlterTopology(updates, ClusterMetadataService.instance().placementProvider()); ClusterMetadataService.instance() .commit(transform, - m -> { logger.info("Rack changes committed successfully"); return m; }, + m -> { + logger.info("Rack changes committed successfully"); + return m; + }, (c, r) -> { throw new IllegalArgumentException("Unable to commit rack changes: " + r); }); } + + @Override + public List getTablesForKeyspace(String keyspace) + { + return Keyspace.open(keyspace).getColumnFamilyStores().stream().map(cfs -> cfs.name).collect(Collectors.toList()); + } + + @Override + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tableNames) + { + Map tables = Keyspace.open(keyspace).getColumnFamilyStores() + .stream().collect(Collectors.toMap(c -> c.name, c -> c)); + for (String tableName : tableNames) + { + if (!tables.containsKey(tableName)) + throw new RuntimeException("Table " + tableName + " does not exist in keyspace " + keyspace); + } + + // only select SSTables that are unrepaired when repaired is true and vice versa + Predicate predicate = sst -> repaired != sst.isRepaired(); + + // mutate SSTables + long repairedAt = !repaired ? 0 : currentTimeMillis(); + List sstablesTouched = new ArrayList<>(); + for (String tableName : tableNames) + { + ColumnFamilyStore table = tables.get(tableName); + Set result = table.runWithCompactionsDisabled(() -> { + Set sstables = table.getLiveSSTables().stream().filter(predicate).collect(Collectors.toSet()); + if (!preview) + table.getCompactionStrategyManager().mutateRepaired(sstables, repairedAt, null, false); + return sstables; + }, predicate, OperationType.ANTICOMPACTION, true, false, true); + sstablesTouched.addAll(result.stream().map(sst -> sst.descriptor.baseFile().name()).collect(Collectors.toList())); + } + return sstablesTouched; + } } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index 61c378a4bddd..e188595aa361 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1378,4 +1378,9 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e boolean getPaxosRepairRaceWait(); // Comma delimited list of "nodeId=dc:rack" or "endpoint=dc:rack" void alterTopology(String updates); + /** Gets the names of all tables for the given keyspace */ + public List getTablesForKeyspace(String keyspace); + + /** Mutates the repaired state of all SSTables for the given SSTables */ + public List mutateSSTableRepairedState(boolean repaired, boolean preview, String keyspace, List tables); } diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java index 607ee2f669c3..0e32f18c2507 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndJoin.java @@ -40,6 +40,7 @@ import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordService; @@ -243,6 +244,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midJoin); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java index 2b283d6905af..8bc73d142eb9 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java +++ b/src/java/org/apache/cassandra/tcm/sequences/BootstrapAndReplace.java @@ -44,6 +44,7 @@ import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -240,6 +241,9 @@ public SequenceState executeNext() .filter(cfs -> Schema.instance.getUserKeyspaces().names().contains(cfs.keyspace.getName())) .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); ClusterMetadataService.instance().commit(midReplace); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } else { diff --git a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java index be75538d6888..4580d716a1ba 100644 --- a/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java +++ b/src/java/org/apache/cassandra/tcm/sequences/ReplaceSameAddress.java @@ -29,6 +29,7 @@ import org.apache.cassandra.gms.Gossiper; import org.apache.cassandra.locator.EndpointsByReplica; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.tcm.ClusterMetadata; @@ -95,6 +96,9 @@ public static void streamData(NodeId nodeId, ClusterMetadata metadata, boolean s .forEach(cfs -> cfs.indexManager.executePreJoinTasksBlocking(true)); BootstrapAndReplace.gossipStateToNormal(metadata, metadata.myNodeId()); Gossiper.instance.mergeNodeToGossip(metadata.myNodeId(), metadata); + + // this node might have just bootstrapped; check if we should run repair immediately + AutoRepairUtils.runRepairOnNewlyBootstrappedNodeIfEnabled(); } } } diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 37b7ce627ae2..478fc0e55a00 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -112,6 +112,8 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.MessagingServiceMBean; import org.apache.cassandra.service.ActiveRepairServiceMBean; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.AutoRepairServiceMBean; import org.apache.cassandra.service.CacheService; import org.apache.cassandra.service.CacheServiceMBean; import org.apache.cassandra.service.snapshot.SnapshotManagerMBean; @@ -179,6 +181,7 @@ public class NodeProbe implements AutoCloseable protected CIDRGroupsMappingManagerMBean cmbProxy; protected PermissionsCacheMBean pcProxy; protected RolesCacheMBean rcProxy; + protected AutoRepairServiceMBean autoRepairProxy; protected Output output; private boolean failed; @@ -323,6 +326,9 @@ protected void connect() throws IOException name = new ObjectName(CIDRFilteringMetricsTable.MBEAN_NAME); cfmProxy = JMX.newMBeanProxy(mbeanServerConn, name, CIDRFilteringMetricsTableMBean.class); + + name = new ObjectName(AutoRepairService.MBEAN_NAME); + autoRepairProxy = JMX.newMBeanProxy(mbeanServerConn, name, AutoRepairServiceMBean.class); } catch (MalformedObjectNameException e) { @@ -2553,6 +2559,141 @@ public void abortBootstrap(String nodeId, String endpoint) { ssProxy.abortBootstrap(nodeId, endpoint); } + + public boolean isAutoRepairDisabled() + { + return autoRepairProxy.isAutoRepairDisabled(); + } + + public String autoRepairConfiguration() + { + return autoRepairProxy.getAutoRepairConfiguration(); + } + + public void setAutoRepairTokenRangeSplitterParameter(String repairType, String key, String value) + { + autoRepairProxy.setAutoRepairTokenRangeSplitterParameter(repairType, key, value); + } + + public void setAutoRepairEnabled(String repairType, boolean enabled) + { + autoRepairProxy.setAutoRepairEnabled(repairType, enabled); + } + + public void setAutoRepairThreads(String repairType, int repairThreads) + { + autoRepairProxy.setRepairThreads(repairType, repairThreads); + } + + public void setAutoRepairPriorityForHosts(String repairType, String commaSeparatedHostSet) + { + autoRepairProxy.setRepairPriorityForHosts(repairType, commaSeparatedHostSet); + } + + public void setAutoRepairForceRepairForHosts(String repairType, String commaSeparatedHostSet) + { + autoRepairProxy.setForceRepairForHosts(repairType, commaSeparatedHostSet); + } + + public void setAutoRepairMinInterval(String repairType, String minRepairInterval) + { + autoRepairProxy.setRepairMinInterval(repairType, minRepairInterval); + } + + public void setAutoRepairHistoryClearDeleteHostsBufferDuration(String duration) + { + autoRepairProxy.setAutoRepairHistoryClearDeleteHostsBufferDuration(duration); + } + + public void startAutoRepairScheduler() + { + autoRepairProxy.startScheduler(); + } + + public void setAutoRepairMinRepairTaskDuration(String duration) + { + autoRepairProxy.setAutoRepairMinRepairTaskDuration(duration); + } + + public void setAutoRepairSSTableCountHigherThreshold(String repairType, int ssTableHigherThreshold) + { + autoRepairProxy.setRepairSSTableCountHigherThreshold(repairType, ssTableHigherThreshold); + } + + public void setAutoRepairTableMaxRepairTime(String repairType, String autoRepairTableMaxRepairTime) + { + autoRepairProxy.setAutoRepairTableMaxRepairTime(repairType, autoRepairTableMaxRepairTime); + } + + public void setAutoRepairIgnoreDCs(String repairType, Set ignoreDCs) + { + autoRepairProxy.setIgnoreDCs(repairType, ignoreDCs); + } + + public void setAutoRepairParallelRepairPercentage(String repairType, int percentage) + { + autoRepairProxy.setParallelRepairPercentage(repairType, percentage); + } + + public void setAutoRepairParallelRepairCount(String repairType, int count) + { + autoRepairProxy.setParallelRepairCount(repairType, count); + } + + public void setAutoRepairAllowParallelReplicaRepair(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepair(repairType, enabled); + } + + public void setAutoRepairAllowParallelReplicaRepairAcrossSchedules(String repairType, boolean enabled) + { + autoRepairProxy.setAllowParallelReplicaRepairAcrossSchedules(repairType, enabled); + } + + public void setAutoRepairPrimaryTokenRangeOnly(String repairType, boolean primaryTokenRangeOnly) + { + autoRepairProxy.setPrimaryTokenRangeOnly(repairType, primaryTokenRangeOnly); + } + + public void setAutoRepairMaterializedViewRepairEnabled(String repairType, boolean enabled) + { + autoRepairProxy.setMVRepairEnabled(repairType, enabled); + } + + public List mutateSSTableRepairedState(boolean repair, boolean preview, String keyspace, List tables) + { + return ssProxy.mutateSSTableRepairedState(repair, preview, keyspace, tables); + } + + public List getAutoRepairTablesForKeyspace(String keyspace) + { + return ssProxy.getTablesForKeyspace(keyspace); + } + + public void setAutoRepairSessionTimeout(String repairType, String timeout) + { + autoRepairProxy.setRepairSessionTimeout(repairType, timeout); + } + + public Set getAutoRepairOnGoingRepairHostIds(String repairType) + { + return autoRepairProxy.getOnGoingRepairHostIds(repairType); + } + + public void setAutoRepairRepairByKeyspace(String repairType, boolean enabled) + { + autoRepairProxy.setRepairByKeyspace(repairType, enabled); + } + + public void setAutoRepairMaxRetriesCount(String repairType, int retries) + { + autoRepairProxy.setAutoRepairMaxRetriesCount(repairType, retries); + } + + public void setAutoRepairRetryBackoff(String repairType, String interval) + { + autoRepairProxy.setAutoRepairRetryBackoff(repairType, interval); + } } class ColumnFamilyStoreMBeanIterator implements Iterator> diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java index 2823bf112b03..3e025df1652e 100644 --- a/src/java/org/apache/cassandra/tools/NodeTool.java +++ b/src/java/org/apache/cassandra/tools/NodeTool.java @@ -94,6 +94,7 @@ public int execute(String... args) AbortBootstrap.class, AlterTopology.class, Assassinate.class, + AutoRepairStatus.class, CassHelp.class, CIDRFilteringStats.class, Cleanup.class, @@ -133,6 +134,7 @@ public int execute(String... args) GcStats.class, GetAuditLog.class, GetAuthCacheConfig.class, + GetAutoRepairConfig.class, GetBatchlogReplayTrottle.class, GetCIDRGroupsOfIP.class, GetColumnIndexSize.class, @@ -197,6 +199,7 @@ public int execute(String... args) Ring.class, Scrub.class, SetAuthCacheConfig.class, + SetAutoRepairConfig.class, SetBatchlogReplayThrottle.class, SetCacheCapacity.class, SetCacheKeysToSave.class, @@ -217,6 +220,7 @@ public int execute(String... args) SetTraceProbability.class, Sjk.class, Snapshot.class, + SSTableRepairedSet.class, Status.class, StatusAutoCompaction.class, StatusBackup.class, diff --git a/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java new file mode 100644 index 000000000000..bb594a010ff1 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/AutoRepairStatus.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; + +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; +import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; + +import static com.google.common.base.Preconditions.checkArgument; + +/** + * Provides currently running auto-repair tasks. + */ +@Command(name = "autorepairstatus", description = "Print autorepair status") +public class AutoRepairStatus extends NodeTool.NodeToolCmd +{ + @VisibleForTesting + @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") + protected String repairType; + + @Override + public void execute(NodeProbe probe) + { + checkArgument(repairType != null, "--repair-type is required."); + PrintStream out = probe.output().out; + + if (probe.isAutoRepairDisabled()) + { + out.println("Auto-repair is not enabled"); + return; + } + + TableBuilder table = new TableBuilder(); + table.add("Active Repairs"); + Set ongoingRepairHostIds = probe.getAutoRepairOnGoingRepairHostIds(repairType); + table.add(getSetString(ongoingRepairHostIds)); + table.printTo(out); + } + + private String getSetString(Set hostIds) + { + if (hostIds.isEmpty()) + { + return "NONE"; + } + StringBuilder sb = new StringBuilder(); + for (String id : hostIds) + { + sb.append(id); + sb.append(","); + } + // remove last "," + sb.setLength(Math.max(sb.length() - 1, 0)); + return sb.toString(); + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java new file mode 100644 index 000000000000..9744498de757 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/GetAutoRepairConfig.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; + +import com.google.common.annotations.VisibleForTesting; + +import io.airlift.airline.Command; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool.NodeToolCmd; + +/** + * Prints all the configurations for AutoRepair through nodetool. + */ +@Command(name = "getautorepairconfig", description = "Print autorepair configurations") +public class GetAutoRepairConfig extends NodeToolCmd +{ + @VisibleForTesting + protected static PrintStream out = System.out; + + @Override + public void execute(NodeProbe probe) + { + if (probe.isAutoRepairDisabled()) + out.println("Auto-repair is not enabled"); + else + out.println(probe.autoRepairConfiguration()); + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java new file mode 100644 index 000000000000..2a7b56732ac9 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/SSTableRepairedSet.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool; + +/** + * Provides a way to set the repaired state of SSTables without any downtime through nodetool. + */ +@Command(name = "sstablerepairedset", description = "Set the repaired state of SSTables for given keyspace/tables") +public class SSTableRepairedSet extends NodeTool.NodeToolCmd +{ + @Arguments(usage = "[ ]", description = "Optional keyspace followed by zero or more tables") + protected List args = new ArrayList<>(); + + @Option(title = "really-set", + name = { "--really-set" }, + description = "Really set the repaired state of SSTables. If not set, only print SSTables that would be affected.") + protected boolean reallySet = false; + + @Option(title = "is-repaired", + name = { "--is-repaired" }, + description = "Set SSTables to repaired state.") + protected boolean isRepaired = false; + + @Option(title = "is-unrepaired", + name = { "--is-unrepaired" }, + description = "Set SSTables to unrepaired state.") + protected boolean isUnrepaired = false; + + @Override + public void execute(NodeProbe probe) + { + PrintStream out = probe.output().out; + + if (isRepaired == isUnrepaired) + { + out.println("Exactly one of --is-repaired or --is-unrepaired must be provided."); + return; + } + + String message; + if (reallySet) + message = "Mutating repaired state of SSTables for"; + else + message = "Previewing repaired state mutation of SSTables for"; + + List keyspaces = parseOptionalKeyspace(args, probe, KeyspaceSet.NON_LOCAL_STRATEGY); + List tables = new ArrayList<>(Arrays.asList(parseOptionalTables(args))); + + if (args.isEmpty()) + message += " all keyspaces"; + else + message += tables.isEmpty() ? " all tables" : " tables " + String.join(", ", tables) + + " in keyspace " + keyspaces.get(0); + message += " to " + (isRepaired ? "repaired" : "unrepaired"); + out.println(message); + + List sstableList = new ArrayList<>(); + for (String keyspace : keyspaces) + { + try + { + sstableList.addAll(probe.mutateSSTableRepairedState(isRepaired, !reallySet, keyspace, + tables.isEmpty() + ? probe.getAutoRepairTablesForKeyspace(keyspace) // mutate all tables + : tables)); // mutate specific tables + } + catch (InvalidRequestException e) + { + out.println(e.getMessage()); + } + } + if (!reallySet) + out.println("The following SSTables would be mutated:"); + else + out.println("The following SSTables were mutated:"); + for (String sstable : sstableList) + out.println(sstable); + } +} diff --git a/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java new file mode 100644 index 000000000000..cc00cabd0633 --- /dev/null +++ b/src/java/org/apache/cassandra/tools/nodetool/SetAutoRepairConfig.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.tools.nodetool; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Splitter; + +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.Option; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.NodeTool.NodeToolCmd; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import static com.google.common.base.Preconditions.checkArgument; + +/** + * Allows to set AutoRepair configuration through nodetool. + */ +@Command(name = "setautorepairconfig", description = "sets the autorepair configuration") +public class SetAutoRepairConfig extends NodeToolCmd +{ + @VisibleForTesting + @Arguments(title = " ", usage = " ", + description = "autorepair param and value.\nPossible autorepair parameters are as following: " + + "[start_scheduler|number_of_repair_threads|min_repair_interval|sstable_upper_threshold" + + "|enabled|table_max_repair_time|priority_hosts|forcerepair_hosts|ignore_dcs" + + "|history_clear_delete_hosts_buffer_interval|repair_primary_token_range_only" + + "|parallel_repair_count|parallel_repair_percentage" + + "|allow_parallel_replica_repair|allow_parallel_repair_across_schedules" + + "|materialized_view_repair_enabled|repair_max_retries" + + "|repair_retry_backoff|repair_session_timeout|min_repair_task_duration" + + "|repair_by_keyspace|token_range_splitter.]", + required = true) + protected List args = new ArrayList<>(); + + @VisibleForTesting + @Option(title = "repair type", name = { "-t", "--repair-type" }, description = "Repair type") + protected String repairTypeStr; + + @VisibleForTesting + protected PrintStream out = System.out; + + private static final String TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX = "token_range_splitter."; + + @Override + public void execute(NodeProbe probe) + { + checkArgument(args.size() == 2, "setautorepairconfig requires param-type, and value args."); + String paramType = args.get(0); + String paramVal = args.get(1); + + if (probe.isAutoRepairDisabled() && !paramType.equalsIgnoreCase("start_scheduler")) + { + out.println("Auto-repair is not enabled"); + return; + } + + // options that do not require --repair-type option + switch (paramType) + { + case "start_scheduler": + if (Boolean.parseBoolean(paramVal)) + { + probe.startAutoRepairScheduler(); + } + return; + case "history_clear_delete_hosts_buffer_interval": + probe.setAutoRepairHistoryClearDeleteHostsBufferDuration(paramVal); + return; + case "min_repair_task_duration": + probe.setAutoRepairMinRepairTaskDuration(paramVal); + return; + default: + // proceed to options that require --repair-type option + break; + } + + // options below require --repair-type option + Objects.requireNonNull(repairTypeStr, "--repair-type is required for this parameter."); + + if(paramType.startsWith(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX)) + { + final String key = paramType.replace(TOKEN_RANGE_SPLITTER_PROPERTY_PREFIX, ""); + probe.setAutoRepairTokenRangeSplitterParameter(repairTypeStr, key, paramVal); + return; + } + + switch (paramType) + { + case "enabled": + probe.setAutoRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "number_of_repair_threads": + probe.setAutoRepairThreads(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "min_repair_interval": + probe.setAutoRepairMinInterval(repairTypeStr, paramVal); + break; + case "sstable_upper_threshold": + probe.setAutoRepairSSTableCountHigherThreshold(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "table_max_repair_time": + probe.setAutoRepairTableMaxRepairTime(repairTypeStr, paramVal); + break; + case "priority_hosts": + if (paramVal!= null && !paramVal.isEmpty()) + { + probe.setAutoRepairPriorityForHosts(repairTypeStr, paramVal); + } + break; + case "forcerepair_hosts": + probe.setAutoRepairForceRepairForHosts(repairTypeStr, paramVal); + break; + case "ignore_dcs": + Set ignoreDCs = new HashSet<>(); + for (String dc : Splitter.on(',').split(paramVal)) + { + ignoreDCs.add(dc); + } + probe.setAutoRepairIgnoreDCs(repairTypeStr, ignoreDCs); + break; + case "repair_primary_token_range_only": + probe.setAutoRepairPrimaryTokenRangeOnly(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "parallel_repair_count": + probe.setAutoRepairParallelRepairCount(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "parallel_repair_percentage": + probe.setAutoRepairParallelRepairPercentage(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "allow_parallel_replica_repair": + probe.setAutoRepairAllowParallelReplicaRepair(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "allow_parallel_replica_repair_across_schedules": + probe.setAutoRepairAllowParallelReplicaRepairAcrossSchedules(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "materialized_view_repair_enabled": + probe.setAutoRepairMaterializedViewRepairEnabled(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "repair_session_timeout": + probe.setAutoRepairSessionTimeout(repairTypeStr, paramVal); + break; + case "repair_by_keyspace": + probe.setAutoRepairRepairByKeyspace(repairTypeStr, Boolean.parseBoolean(paramVal)); + break; + case "repair_max_retries": + probe.setAutoRepairMaxRetriesCount(repairTypeStr, Integer.parseInt(paramVal)); + break; + case "repair_retry_backoff": + probe.setAutoRepairRetryBackoff(repairTypeStr, paramVal); + break; + default: + throw new IllegalArgumentException("Unknown parameter: " + paramType); + } + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java new file mode 100644 index 000000000000..a00e713cf298 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.Util; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; + +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertEquals; + +/** + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler's + * allow_parallel_replica_repair_across_schedules feature. + */ +public class AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest extends TestBaseImpl +{ + private static Cluster cluster; + + @BeforeClass + public static void init() throws IOException + { + // Configure a cluster with preview and incremental repair enabled in a way that preview repair can be + // run on all three nodes concurrently, but incremental repair can only be run when there are no parallel + // repairs. We should detect contention in the incremental repair scheduler but not preview repaired + // scheduler as a result. + cluster = Cluster.build(3) + .withConfig(config -> config + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.PREVIEW_REPAIRED.getConfigName(), + ImmutableMap.of( + // Configure preview repair to run frequently to + // provoke contention with incremental scheduler. + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + "allow_parallel_replica_repair", "true", + "min_repair_interval", "5s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "3", + // Don't allow parallel replica repair across + // schedules + "allow_parallel_replica_repair", "false", + "allow_parallel_replica_repair_across_schedules", "false", + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); + + cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + } + + @AfterClass + public static void tearDown() + { + cluster.close(); + } + + @Test + public void testScheduler() + { + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + AutoRepairService.setup(); + AutoRepair.instance.setup(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); + + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + // Expect contention on incremental repair across schedules + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + greaterThan(0L), + incrementalMetrics.repairDelayedBySchedule::getCount, + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for preview repaired since allow_parallel_replica_repair was true + AutoRepairMetrics previewMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.PREVIEW_REPAIRED); + assertEquals(0L, previewMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, previewMetrics.repairDelayedBySchedule.getCount()); + })); + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java new file mode 100644 index 000000000000..e726dc1a17f7 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.Util; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.schema.SystemDistributedKeyspace; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; + +import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertEquals; + +/** + * Distributed tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} scheduler + */ +public class AutoRepairSchedulerTest extends TestBaseImpl +{ + private static Cluster cluster; + static SimpleDateFormat sdf; + + @BeforeClass + public static void init() throws IOException + { + // Define the expected date format pattern + String pattern = "EEE MMM dd HH:mm:ss z yyyy"; + // Create SimpleDateFormat object with the given pattern + sdf = new SimpleDateFormat(pattern); + sdf.setLenient(false); + // Configure a 3-node cluster with num_tokens: 4 and auto_repair enabled + cluster = Cluster.build(3) + .withTokenCount(4) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(3, 4)) + .withConfig(config -> config + .set("num_tokens", 4) + .set("auto_repair", + ImmutableMap.of( + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + "parallel_repair_count", "2", + // Allow parallel replica repair to allow replicas + // to execute full repair at same time. + "allow_parallel_replica_repair", "true", + "min_repair_interval", "15s"), + AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), + ImmutableMap.of( + "initial_scheduler_delay", "5s", + "enabled", "true", + // Set parallel repair count to 3 to provoke + // contention between replicas when scheduling. + "parallel_repair_count", "3", + // Disallow parallel replica repair to prevent + // replicas from issuing incremental repair at + // same time. + "allow_parallel_replica_repair", "false", + // Run more aggressively since full repair is + // less restrictive about when it can run repair, + // so need to check more frequently to allow + // incremental to get an attempt in. + "min_repair_interval", "5s")))) + .set("auto_repair.enabled", "true") + .set("auto_repair.global_settings.repair_by_keyspace", "true") + .set("auto_repair.global_settings.repair_retry_backoff", "5s") + .set("auto_repair.repair_task_min_duration", "0s") + .set("auto_repair.repair_check_interval", "5s")) + .start(); + + cluster.schemaChange("CREATE KEYSPACE IF NOT EXISTS " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3};"); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk int, ck text, v1 int, v2 int, PRIMARY KEY (pk, ck)) WITH read_repair='NONE'")); + } + + @AfterClass + public static void tearDown() + { + cluster.close(); + } + + @Test + public void testScheduler() throws ParseException + { + // ensure there was no history of previous repair runs through the scheduler + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY), ConsistencyLevel.QUORUM); + assertEquals(0, rows.length); + + cluster.forEach(i -> i.runOnInstance(() -> { + try + { + AutoRepairService.setup(); + AutoRepair.instance.setup(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + })); + + // validate that the repair ran on all nodes + cluster.forEach(i -> i.runOnInstance(() -> { + // Reduce sleeping if repair finishes quickly to speed up test but make it non-zero to provoke some + // contention. + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("1s"); + + AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + greaterThan(0L), + () -> incrementalMetrics.nodeRepairTimeInSec.getValue().longValue(), + 5, + TimeUnit.MINUTES); + + // Expect some contention on incremental repair. + Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + greaterThan(0L), + incrementalMetrics.repairDelayedByReplica::getCount, + 5, + TimeUnit.MINUTES); + // Do not expect any contention across schedules since allow_parallel_replica_repairs across schedules + // was not configured. + assertEquals(0L, incrementalMetrics.repairDelayedBySchedule.getCount()); + + AutoRepairMetrics fullMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.FULL); + Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + greaterThan(0L), + () -> fullMetrics.nodeRepairTimeInSec.getValue().longValue(), + 5, + TimeUnit.MINUTES); + + // No repair contention should be observed for full repair since allow_parallel_replica_repair was true + assertEquals(0L, fullMetrics.repairDelayedByReplica.getCount()); + assertEquals(0L, fullMetrics.repairDelayedBySchedule.getCount()); + })); + + validate(AutoRepairConfig.RepairType.FULL.toString()); + validate(AutoRepairConfig.RepairType.INCREMENTAL.toString()); + } + + private void validate(String repairType) throws ParseException + { + Object[][] rows = cluster.coordinator(1).execute(String.format("SELECT repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn FROM %s.%s where repair_type='%s'", DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, repairType), ConsistencyLevel.QUORUM); + assertEquals(3, rows.length); + for (int node = 0; node < rows.length; node++) + { + Object[] row = rows[node]; + // repair_type + Assert.assertEquals(repairType, row[0].toString()); + // host_id + Assert.assertNotNull(UUID.fromString(row[1].toString())); + // ensure there is a legit repair_start_ts and repair_finish_ts + sdf.parse(row[2].toString()); + sdf.parse(row[3].toString()); + // the reason why the repair was scheduled + Assert.assertNotNull(row[4]); + Assert.assertEquals("MY_TURN", row[4].toString()); + } + } +} diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 2eab31c1128d..eb41f5b864d6 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -25,6 +25,7 @@ import java.io.IOError; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Field; import java.math.BigInteger; import java.net.UnknownHostException; import java.nio.ByteBuffer; @@ -55,12 +56,14 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; + import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.utils.Invariants; +import org.apache.cassandra.config.Config; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.AbstractReadCommandBuilder; @@ -129,6 +132,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.locator.ReplicaCollection; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; @@ -1358,4 +1362,13 @@ public static Map listSnapshots(ColumnFamilyStore cfs) return tagSnapshotsMap; } + // Replaces the global auto-repair config with a new config where auto-repair schedulling is enabled/disabled + public static void setAutoRepairEnabled(boolean enabled) throws Exception + { + Config config = DatabaseDescriptor.getRawConfig(); + config.auto_repair = new AutoRepairConfig(enabled); + Field configField = DatabaseDescriptor.class.getDeclaredField("conf"); + configField.setAccessible(true); + configField.set(null, config); + } } diff --git a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java index 7d27271a0dbd..d44dea211548 100644 --- a/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java +++ b/test/unit/org/apache/cassandra/config/DatabaseDescriptorRefTest.java @@ -114,6 +114,16 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.config.Config$CorruptedTombstoneStrategy", "org.apache.cassandra.config.Config$BatchlogEndpointStrategy", "org.apache.cassandra.config.Config$TombstonesMetricGranularity", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$Options", + "org.apache.cassandra.repair.autorepair.AutoRepairConfig$RepairType", + "org.apache.cassandra.repair.autorepair.AutoRepairState", + "org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.FullRepairState", + "org.apache.cassandra.repair.autorepair.IAutoRepairTokenRangeSplitter", + "org.apache.cassandra.repair.autorepair.IncrementalRepairState", + "org.apache.cassandra.repair.autorepair.PreviewRepairedState", + "org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter", "org.apache.cassandra.config.DatabaseDescriptor$ByteUnit", "org.apache.cassandra.config.DataRateSpec", "org.apache.cassandra.config.DataRateSpec$DataRateUnit", @@ -338,6 +348,7 @@ public class DatabaseDescriptorRefTest "org.apache.cassandra.utils.concurrent.RefCounted", "org.apache.cassandra.utils.concurrent.SelfRefCounted", "org.apache.cassandra.utils.concurrent.Transactional", + "org.apache.cassandra.utils.progress.ProgressListener", "org.apache.cassandra.utils.concurrent.UncheckedInterruptedException", }; diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 2c7a26026969..68d5302047be 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -39,6 +39,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.yaml.snakeyaml.error.YAMLException; import static org.apache.cassandra.config.CassandraRelevantProperties.CONFIG_ALLOW_SYSTEM_PROPERTIES; @@ -46,6 +47,7 @@ import static org.apache.cassandra.config.YamlConfigurationLoader.SYSTEM_PROPERTY_PREFIX; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -108,8 +110,8 @@ public void validateTypes() assertEquals("You have wrongly defined a config parameter of abstract type DurationSpec, DataStorageSpec or DataRateSpec." + "Please check the config docs, otherwise Cassandra won't be able to start with this parameter being set in cassandra.yaml.", Arrays.stream(Config.class.getFields()) - .filter(f -> !Modifier.isStatic(f.getModifiers())) - .filter(isDurationSpec.or(isDataRateSpec).or(isDataStorageSpec)).count(), 0); + .filter(f -> !Modifier.isStatic(f.getModifiers())) + .filter(isDurationSpec.or(isDataRateSpec).or(isDataStorageSpec)).count(), 0); } @Test @@ -117,12 +119,12 @@ public void updateInPlace() { Config config = new Config(); Map map = ImmutableMap.builder().put("storage_port", 123) - .put("commitlog_sync", Config.CommitLogSync.batch) - .put("seed_provider.class_name", "org.apache.cassandra.locator.SimpleSeedProvider") - .put("client_encryption_options.cipher_suites", Collections.singletonList("FakeCipher")) - .put("client_encryption_options.optional", false) - .put("client_encryption_options.enabled", true) - .build(); + .put("commitlog_sync", Config.CommitLogSync.batch) + .put("seed_provider.class_name", "org.apache.cassandra.locator.SimpleSeedProvider") + .put("client_encryption_options.cipher_suites", Collections.singletonList("FakeCipher")) + .put("client_encryption_options.optional", false) + .put("client_encryption_options.enabled", true) + .build(); Config updated = YamlConfigurationLoader.updateFromMap(map, true, config); assert updated == config : "Config pointers do not match"; assertThat(config.storage_port).isEqualTo(123); @@ -275,6 +277,12 @@ public void fromMapTest() Map encryptionOptions = ImmutableMap.of("cipher_suites", Collections.singletonList("FakeCipher"), "optional", false, "enabled", true); + Map autoRepairConfig = ImmutableMap.of("enabled", true, + "global_settings", + ImmutableMap.of("number_of_repair_threads", 1), + "repair_type_overrides", + ImmutableMap.of("full", + ImmutableMap.of("number_of_repair_threads", 2))); Map map = new ImmutableMap.Builder() .put("storage_port", storagePort) .put("commitlog_sync", commitLogSync) @@ -283,6 +291,7 @@ public void fromMapTest() .put("internode_socket_send_buffer_size", "5B") .put("internode_socket_receive_buffer_size", "5B") .put("commitlog_sync_group_window_in_ms", "42") + .put("auto_repair", autoRepairConfig) .build(); Config config = YamlConfigurationLoader.fromMap(map, Config.class); @@ -293,6 +302,9 @@ public void fromMapTest() assertEquals(true, config.client_encryption_options.enabled); // Check a nested object assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_send_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) assertEquals(new DataStorageSpec.IntBytesBound("5B"), config.internode_socket_receive_buffer_size); // Check names backward compatibility (CASSANDRA-17141 and CASSANDRA-15234) + assertTrue(config.auto_repair.enabled); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.auto_repair.getAutoRepairTableMaxRepairTime(AutoRepairConfig.RepairType.INCREMENTAL)); + config.auto_repair.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); } @Test diff --git a/test/unit/org/apache/cassandra/cql3/CQLTester.java b/test/unit/org/apache/cassandra/cql3/CQLTester.java index 05c64f956e18..88ab4fa27010 100644 --- a/test/unit/org/apache/cassandra/cql3/CQLTester.java +++ b/test/unit/org/apache/cassandra/cql3/CQLTester.java @@ -206,6 +206,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_CONNECTION_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_DRIVER_READ_TIMEOUT_MS; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_RANDOM_SEED; @@ -459,6 +460,7 @@ protected static void prePrepareServer() StorageService.instance.registerMBeans(); StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); SnapshotManager.instance.registerMBean(); + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); } // So derived classes can get enough intialization to start setting DatabaseDescriptor options @@ -3489,7 +3491,7 @@ protected static long seed() return SEED; } - protected static void setupSeed() + public static void setupSeed() { if (RANDOM != null) return; SEED = TEST_RANDOM_SEED.getLong(new DefaultRandom().nextLong()); @@ -3502,7 +3504,7 @@ public void resetSeed() RANDOM.setSeed(SEED); } - protected static void updateConfigs() + public static void updateConfigs() { if (CONFIG_GEN == null) CONFIG_GEN = new ConfigGenBuilder().build(); diff --git a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java index 8529d8ff8073..5d376dcf0721 100644 --- a/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java +++ b/test/unit/org/apache/cassandra/cql3/statements/DescribeStatementTest.java @@ -270,6 +270,19 @@ public void testDescribeVirtualTables() throws Throwable @Test public void testDescribe() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(false); + helperTestDescribe(); + } + + @Test + public void testDescribeWithAutoRepair() throws Throwable + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(true); + helperTestDescribe(); + } + + public void helperTestDescribe() throws Throwable { try { @@ -833,7 +846,7 @@ public void testDescribeCreateLikeTable() throws Throwable requireNetwork(); DatabaseDescriptor.setDynamicDataMaskingEnabled(true); String souceTable = createTable(KEYSPACE_PER_TEST, - "CREATE TABLE %s (" + + "CREATE TABLE %s (" + " pk1 text, " + " pk2 int MASKED WITH DEFAULT, " + " ck1 int, " + @@ -1122,28 +1135,57 @@ private static String testTableOutput() private static String tableParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND fast_path = 'keyspace'\n" + - " AND default_time_to_live = 0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND transactional_mode = 'off'\n" + - " AND transactional_migration_from = 'none'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND fast_path = 'keyspace'\n" + + " AND default_time_to_live = 0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND transactional_mode = 'off'\n" + + " AND transactional_migration_from = 'none'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String cqlQuoted(Map map) @@ -1153,24 +1195,49 @@ private static String cqlQuoted(Map map) private static String mvParametersCql() { - return "additional_write_policy = '99p'\n" + - " AND allow_auto_snapshot = true\n" + - " AND bloom_filter_fp_chance = 0.01\n" + - " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + - " AND cdc = false\n" + - " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + - " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + - " AND memtable = 'default'\n" + - " AND crc_check_chance = 1.0\n" + - " AND extensions = {}\n" + - " AND gc_grace_seconds = 864000\n" + - " AND incremental_backups = true\n" + - " AND max_index_interval = 2048\n" + - " AND memtable_flush_period_in_ms = 0\n" + - " AND min_index_interval = 128\n" + - " AND read_repair = 'BLOCKING'\n" + - " AND speculative_retry = '99p';"; + if (!DatabaseDescriptor.getAutoRepairConfig().isAutoRepairSchedulingEnabled()) + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p';"; + } + else + { + return "additional_write_policy = '99p'\n" + + " AND allow_auto_snapshot = true\n" + + " AND bloom_filter_fp_chance = 0.01\n" + + " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + + " AND cdc = false\n" + + " AND comment = ''\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + + " AND memtable = 'default'\n" + + " AND crc_check_chance = 1.0\n" + + " AND extensions = {}\n" + + " AND gc_grace_seconds = 864000\n" + + " AND incremental_backups = true\n" + + " AND max_index_interval = 2048\n" + + " AND memtable_flush_period_in_ms = 0\n" + + " AND min_index_interval = 128\n" + + " AND read_repair = 'BLOCKING'\n" + + " AND speculative_retry = '99p'\n" + + " AND auto_repair = {'full_enabled': 'true', 'incremental_enabled': 'true', 'preview_repaired_enabled': 'true', 'priority': '0'};"; + } } private static String keyspaceOutput() diff --git a/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java new file mode 100644 index 000000000000..8bf96329bacb --- /dev/null +++ b/test/unit/org/apache/cassandra/db/streaming/CassandraStreamReceiverTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.streaming; + +import java.util.Collections; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.streaming.StreamOperation; +import org.apache.cassandra.streaming.StreamSession; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link org.apache.cassandra.db.streaming.CassandraStreamReceiver} + */ +public class CassandraStreamReceiverTest extends CQLTester +{ + @Mock + private StreamSession session; + + private static final String CDC_TABLE = "cdc_table"; + private static final String MV_TABLE = "mv_table"; + private static final String CDC_MV_TABLE = "cdc_mv_table"; + private static final String NO_CDC_MV_TABLE = "no_cdc_mv_table"; + + @Before + public void setup() + { + // Set cdc_on_repair_enabled materialized_views_on_repair to true + DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + // Enable materialized views + DatabaseDescriptor.setMaterializedViewsEnabled(true); + + MockitoAnnotations.initMocks(this); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=true", KEYSPACE, CDC_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=false", KEYSPACE, MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW IF NOT EXISTS %s.mv AS SELECT * FROM %s.%s WHERE pk IS NOT NULL PRIMARY KEY (pk)", KEYSPACE, KEYSPACE, MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=true", KEYSPACE, CDC_MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW IF NOT EXISTS %s.mv2 AS SELECT * FROM %s.%s WHERE pk IS NOT NULL PRIMARY KEY (pk)", KEYSPACE, KEYSPACE, CDC_MV_TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int) WITH cdc=false", KEYSPACE, NO_CDC_MV_TABLE)); + } + + @Test + public void testRequiresWritePathRepair() + { + // given a CDC table with a materialized view attached to it. + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + // Should require write path since cdc_on_repair_enabled and materialized_views_on_repair_enabled are both true. + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathBulkLoad() + { + // given a CDC table with a materialized view attached to it. + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + // Should require write path since cdc_on_repair_enabled and materialized_views_on_repair_enabled are both true. + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testDoesNotRequireWritePathNoCDCOrMV() + { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are false + // requiresWritePath should still return false for a non-CDC table. + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(NO_CDC_MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertFalse(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathRepairMVOnly() + { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are true + // requiresWritePath should return true for a table with materialized views. + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testRequiresWritePathRepairCDCOnRepairEnabled() + { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are true + // requiresWritePath should return true for a table with CDC enabled. + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.REPAIR); + CassandraStreamReceiver receiver = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + assertTrue(receiver.requiresWritePath(cfs)); + } + + @Test + public void testDoesNotRequireWritePathRepairCDCOnRepairEnabledFalse() + { + // Given cdc_on_repaired_enabled and materialized_views_on_repair_enabled are false + // requiresWritePath should return false for a table with CDC enabled. + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CDC_TABLE); + when(session.streamOperation()).thenReturn(StreamOperation.BULK_LOAD); + CassandraStreamReceiver receiver1 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + assertFalse(receiver1.requiresWritePath(cfs)); + + // When flipping cdc_on_repair_enabled to true + // requiresWritePath should return true. + DatabaseDescriptor.setCDCOnRepairEnabled(true); + CassandraStreamReceiver receiver2 = new CassandraStreamReceiver(cfs, session, Collections.EMPTY_LIST, 1); + assertTrue(receiver2.requiresWritePath(cfs)); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java new file mode 100644 index 000000000000..ec36bb5cfd0c --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigRepairTypeTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType} + */ +public class AutoRepairConfigRepairTypeTest +{ + @Test + public void testRepairTypeParsing() + { + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("FULL")); + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("FuLl")); + Assert.assertEquals(AutoRepairConfig.RepairType.FULL, AutoRepairConfig.RepairType.parse("full")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("INCREMENTAL")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("incremental")); + Assert.assertEquals(AutoRepairConfig.RepairType.INCREMENTAL, AutoRepairConfig.RepairType.parse("inCRemenTal")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("PREVIEW_REPAIRED")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("preview_repaired")); + Assert.assertEquals(AutoRepairConfig.RepairType.PREVIEW_REPAIRED, AutoRepairConfig.RepairType.parse("Preview_Repaired")); + } + + @Test(expected = NullPointerException.class) + public void testNullRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse(null); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptyRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse(""); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidRepairTypeParsing() + { + AutoRepairConfig.RepairType.parse("very_FULL"); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java new file mode 100644 index 000000000000..0cf51ca40884 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -0,0 +1,509 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.HashSet; +import java.util.Map; +import java.util.Collections; +import java.util.Set; + +import com.google.common.collect.ImmutableSet; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.config.ParameterizedClass; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.Options; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig} + */ +@RunWith(Parameterized.class) +public class AutoRepairConfigTest extends CQLTester +{ + private AutoRepairConfig config; + + private final Set testSet = ImmutableSet.of("dc1"); + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters + public static Object[] repairTypes() + { + return AutoRepairConfig.RepairType.values(); + } + + @Before + public void setUp() + { + config = new AutoRepairConfig(true); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + } + + @Test + public void autoRepairConfigDefaultsAreNotNull() + { + AutoRepairConfig config = new AutoRepairConfig(); + assertNotNull(config.global_settings); + } + + @Test + public void autoRepairConfigRepairTypesAreNotNull() + { + AutoRepairConfig config = new AutoRepairConfig(); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + assertNotNull(config.getOptions(repairType)); + } + } + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsEnabled() + { + config.global_settings.enabled = true; + + assertTrue(config.isAutoRepairEnabled(repairType)); + } + + @Test + public void testRepairMinDuration() + { + config = new AutoRepairConfig(false); + + config.setRepairTaskMinDuration("3s"); + assertEquals(3L, config.getRepairTaskMinDuration().toSeconds()); + } + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledGlobally() + { + config = new AutoRepairConfig(false); + config.global_settings.enabled = true; + assertFalse(config.isAutoRepairEnabled(repairType)); + } + + @Test + public void testIsAutoRepairEnabledReturnsTrueWhenRepairIsDisabledForRepairType() + { + config.global_settings.enabled = true; + config.setAutoRepairEnabled(repairType, false); + assertFalse(config.getOptions(repairType).enabled); + } + + @Test + public void testSetAutoRepairEnabledNoMVOrCDC() + { + DatabaseDescriptor.setCDCEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + config.setAutoRepairEnabled(repairType, true); + + assertTrue(config.getOptions(repairType).enabled); + } + + @Test + public void testSetRepairByKeyspace() + { + // Should default to true. + assertTrue(config.getRepairByKeyspace(repairType)); + + config.setRepairByKeyspace(repairType, false); + + assertFalse(config.getOptions(repairType).repair_by_keyspace); + } + + @Test + public void testGetRepairByKeyspace() + { + config.global_settings.repair_by_keyspace = true; + + boolean result = config.getRepairByKeyspace(repairType); + + assertTrue(result); + } + + @Test + public void testSetRepairThreads() + { + config.setRepairThreads(repairType, 5); + + assertEquals(5, config.getOptions(repairType).number_of_repair_threads.intValue()); + } + + @Test + public void testGetRepairThreads() + { + config.global_settings.number_of_repair_threads = 5; + + int result = config.getRepairThreads(repairType); + + assertEquals(5, result); + } + + @Test + public void testGetRepairMinFrequencyInHours() + { + config.global_settings.min_repair_interval = new DurationSpec.IntSecondsBound("5s"); + + DurationSpec.IntSecondsBound result = config.getRepairMinInterval(repairType); + + assertEquals(5, result.toSeconds()); + } + + @Test + public void testSetRepairMinFrequencyInHours() + { + config.setRepairMinInterval(repairType, "5s"); + + assertEquals(5, config.getOptions(repairType).min_repair_interval.toSeconds()); + } + + @Test + public void testGetAutoRepairHistoryClearDeleteHostsBufferInSec() + { + config.history_clear_delete_hosts_buffer_interval = new DurationSpec.IntSecondsBound("5s"); + + int result = config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds(); + + assertEquals(5, result); + } + + @Test + public void testSetAutoRepairHistoryClearDeleteHostsBufferInSec() + { + config.setAutoRepairHistoryClearDeleteHostsBufferInterval("5s"); + + assertEquals(new DurationSpec.IntSecondsBound("5s"), config.history_clear_delete_hosts_buffer_interval); + } + + @Test + public void testGetRepairSSTableCountHigherThreshold() + { + config.global_settings.sstable_upper_threshold = 5; + + int result = config.getRepairSSTableCountHigherThreshold(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetRepairSSTableCountHigherThreshold() + { + config.setRepairSSTableCountHigherThreshold(repairType, 5); + + assertEquals(5, config.getOptions(repairType).sstable_upper_threshold.intValue()); + } + + @Test + public void testGetAutoRepairTableMaxRepairTimeInSec() + { + config.global_settings.table_max_repair_time = new DurationSpec.IntSecondsBound("5s"); + + DurationSpec.IntSecondsBound result = config.getAutoRepairTableMaxRepairTime(repairType); + + assertEquals(5, result.toSeconds()); + } + + @Test + public void testSetAutoRepairTableMaxRepairTimeInSec() + { + config.setAutoRepairTableMaxRepairTime(repairType, "5s"); + + assertEquals(5, config.getOptions(repairType).table_max_repair_time.toSeconds()); +} + + @Test + public void testGetIgnoreDCs() + { + config.global_settings.ignore_dcs = testSet; + + Set result = config.getIgnoreDCs(repairType); + + assertEquals(testSet, result); + } + + @Test + public void testSetIgnoreDCs() + { + config.setIgnoreDCs(repairType, testSet); + + assertEquals(config.getOptions(repairType).ignore_dcs, testSet); + } + + @Test + public void testGetRepairPrimaryTokenRangeOnly() + { + config.global_settings.repair_primary_token_range_only = true; + + boolean result = config.getRepairPrimaryTokenRangeOnly(repairType); + + assertTrue(result); + } + + @Test + public void testSetRepairPrimaryTokenRangeOnly() + { + config.setRepairPrimaryTokenRangeOnly(repairType, true); + + assertTrue(config.getOptions(repairType).repair_primary_token_range_only); + } + + @Test + public void testGetParallelRepairPercentageInGroup() + { + config.global_settings.parallel_repair_percentage = 5; + + int result = config.getParallelRepairPercentage(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetParallelRepairPercentageInGroup() + { + config.setParallelRepairPercentage(repairType, 5); + + assertEquals(5, config.getOptions(repairType).parallel_repair_percentage.intValue()); + } + + @Test + public void testGetParallelRepairCountInGroup() + { + config.global_settings.parallel_repair_count = 5; + + int result = config.getParallelRepairCount(repairType); + + assertEquals(5, result); + } + + @Test + public void testSetParallelRepairCountInGroup() + { + config.setParallelRepairCount(repairType, 5); + + assertEquals(5, config.getOptions(repairType).parallel_repair_count.intValue()); + } + + @Test + public void testGetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.global_settings.allow_parallel_replica_repair); + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair = true; + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepair() + { + // should default to false + assertFalse(config.getAllowParallelReplicaRepair(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepair(repairType, true); + assertTrue(config.getAllowParallelReplicaRepair(repairType)); + } + + @Test + public void testGetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.global_settings.allow_parallel_replica_repair_across_schedules); + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting global to true should also cause repair type config to inherit. + config.global_settings.allow_parallel_replica_repair_across_schedules = false; + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + } + + @Test + public void testSetAllowParallelReplicaRepairAcrossSchedules() + { + // should default to true + assertTrue(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + + // setting explicitly for repair type should update it + config.setAllowParallelReplicaRepairAcrossSchedules(repairType, false); + assertFalse(config.getAllowParallelReplicaRepairAcrossSchedules(repairType)); + } + + @Test + public void testGetMaterializedViewRepairEnabled() + { + config.global_settings.materialized_view_repair_enabled = true; + + boolean result = config.getMaterializedViewRepairEnabled(repairType); + + assertTrue(result); + } + + @Test + public void testSetMVRepairEnabled() + { + config.setMaterializedViewRepairEnabled(repairType, true); + + assertTrue(config.getOptions(repairType).materialized_view_repair_enabled); + } + + @Test + public void testSetForceRepairNewNode() + { + config.setForceRepairNewNode(repairType, true); + + assertTrue(config.getOptions(repairType).force_repair_new_node); + } + + @Test + public void testGetForceRepairNewNode() + { + config.global_settings.force_repair_new_node = true; + + boolean result = config.getForceRepairNewNode(repairType); + + assertTrue(result); + } + + @Test + public void testIsAutoRepairSchedulingEnabledDefault() + { + config = new AutoRepairConfig(); + + boolean result = config.isAutoRepairSchedulingEnabled(); + + assertFalse(result); + } + + @Test + public void testIsAutoRepairSchedulingEnabledTrue() + { + boolean result = config.isAutoRepairSchedulingEnabled(); + + assertTrue(result); + } + + @Test + public void testGetDefaultOptionsMVRepairIsEnabledByDefault() + { + Options defaultOptions = Options.getDefaultOptions(); + + assertFalse(defaultOptions.materialized_view_repair_enabled); + } + + @Test + public void testGetDefaultOptionsTokenRangeSplitter() + { + Options defaultOptions = Options.getDefaultOptions(); + + ParameterizedClass expectedDefault = new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()); + + assertEquals(expectedDefault, defaultOptions.token_range_splitter); + assertEquals(RepairTokenRangeSplitter.class.getName(), AutoRepairConfig.newAutoRepairTokenRangeSplitter(repairType, defaultOptions.token_range_splitter).getClass().getName()); + } + + @Test(expected = ConfigurationException.class) + public void testInvalidTokenRangeSplitter() + { + AutoRepairConfig.newAutoRepairTokenRangeSplitter(repairType, new ParameterizedClass("invalid-class", Collections.emptyMap())); + } + + @Test + public void testSetInitialSchedulerDelay() + { + config.setInitialSchedulerDelay(repairType, "5s"); + + assertEquals(5, config.getOptions(repairType).initial_scheduler_delay.toSeconds()); + } + + @Test + public void testGetInitialSchedulerDelay() + { + config.global_settings.initial_scheduler_delay = new DurationSpec.IntSecondsBound("5s"); + + int result = config.getInitialSchedulerDelay(repairType).toSeconds(); + + assertEquals(5, result); + } + + @Test + public void testSetRepairSessionTimeout() + { + config.setRepairSessionTimeout(repairType, "1h"); + + assertEquals(3600, config.getOptions(repairType).repair_session_timeout.toSeconds()); + } + + @Test + public void testDefaultOptions() + { + Map defaultOptions = Options.getDefaultOptionsMap(); + Options options = defaultOptions.get(repairType); + assertFalse(options.enabled); + assertTrue(options.repair_by_keyspace); + assertEquals(Integer.valueOf(1), options.number_of_repair_threads); + assertEquals(Integer.valueOf(3), options.parallel_repair_count); + assertEquals(Integer.valueOf(3), options.parallel_repair_percentage); + assertEquals(Integer.valueOf(10000), options.sstable_upper_threshold); + assertEquals(new HashSet<>(), options.ignore_dcs); + assertTrue(options.repair_primary_token_range_only); + assertFalse(options.force_repair_new_node); + assertEquals(new DurationSpec.IntSecondsBound("6h"), options.table_max_repair_time); + assertFalse(options.materialized_view_repair_enabled); + assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), options.token_range_splitter); + assertEquals(new DurationSpec.IntSecondsBound("5m"), options.initial_scheduler_delay); + assertEquals(new DurationSpec.IntSecondsBound("3h"), options.repair_session_timeout); + assertEquals(new DurationSpec.IntSecondsBound("24h"), options.min_repair_interval); + } + + @Test + public void testGlobalOptions() + { + AutoRepairConfig config = new AutoRepairConfig(); + assertFalse(config.global_settings.enabled); + assertTrue(config.global_settings.repair_by_keyspace); + assertEquals(Integer.valueOf(1), config.global_settings.number_of_repair_threads); + assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_count); + assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_percentage); + assertEquals(Integer.valueOf(10000), config.global_settings.sstable_upper_threshold); + assertEquals(new HashSet<>(), config.global_settings.ignore_dcs); + assertTrue(config.global_settings.repair_primary_token_range_only); + assertFalse(config.global_settings.force_repair_new_node); + assertEquals(new DurationSpec.IntSecondsBound("6h"), config.global_settings.table_max_repair_time); + assertFalse(config.global_settings.materialized_view_repair_enabled); + assertEquals(new ParameterizedClass(RepairTokenRangeSplitter.class.getName(), Collections.emptyMap()), config.global_settings.token_range_splitter); + assertEquals(new DurationSpec.IntSecondsBound("5m"), config.global_settings.initial_scheduler_delay); + assertEquals(new DurationSpec.IntSecondsBound("3h"), config.global_settings.repair_session_timeout); + assertEquals(new DurationSpec.IntSecondsBound("24h"), config.global_settings.min_repair_interval); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java new file mode 100644 index 000000000000..ac9dac8236e8 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairKeyspaceTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.schema.TableMetadata; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; + +/** + * Unit tests for {@link org.apache.cassandra.schema.SystemDistributedKeyspace} + */ +public class AutoRepairKeyspaceTest +{ + @BeforeClass + public static void setupDatabaseDescriptor() + { + DatabaseDescriptor.daemonInitialization(); + } + + @Test + public void testEnsureAutoRepairTablesArePresent() + { + KeyspaceMetadata keyspaceMetadata = SystemDistributedKeyspace.metadata(); + Iterator iter = keyspaceMetadata.tables.iterator(); + Set actualDistributedTablesIter = new HashSet<>(); + while (iter.hasNext()) + { + actualDistributedTablesIter.add(iter.next().name); + } + + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + Assert.assertTrue(actualDistributedTablesIter.contains(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java new file mode 100644 index 000000000000..d0b053a58778 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairMetricsTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class AutoRepairMetricsTest extends CQLTester +{ + + private AutoRepairMetrics metrics; + + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + + // Set min repair interval to an hour. + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(RepairType.FULL, "1h"); + } + + @Before + public void setup() + { + metrics = AutoRepairMetricsManager.getMetrics(RepairType.FULL); + } + + @Test + public void testShouldRecordRepairStartLagAndResetOnMyTurn() + { + // record a last finish repair time of one day. + long oneDayAgo = AutoRepair.instance.currentTimeMs() - 86_400_000; + metrics.recordRepairStartLag(oneDayAgo); + + // expect a recorded lag time of approximately 1 day (last repair finish time) - 1 hour (min repair interval) + long expectedLag = 86400 - 3600; + long recordedLag = metrics.repairStartLagSec.getValue(); + assertTrue(String.format("Expected at last 23h of lag (%d) but got (%d)", expectedLag, recordedLag), + recordedLag >= expectedLag); + // Given timing, allow at most 5 seconds of skew. + assertTrue(String.format("Expected 23h of lag (%d) but got a larger value (%d)", expectedLag, recordedLag), + recordedLag <= expectedLag + 5); + + // expect lag time to be restarted when recording a turn. + metrics.recordTurn(RepairTurn.MY_TURN); + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } + + @Test + public void testShouldRecordRepairStartLagOfZeroWhenFinishTimeIsWithinMinRepairInterval() + { + // record a last finish repair time of one 30 minutes + long thirtyMinutesAgo = AutoRepair.instance.currentTimeMs() - 1_800_000; + metrics.recordRepairStartLag(thirtyMinutesAgo); + + // expect 0 lag because last repair finish time was less than min repair interval + assertEquals(0, metrics.repairStartLagSec.getValue().intValue()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java new file mode 100644 index 000000000000..8e6a559d3343 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java @@ -0,0 +1,903 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.collect.Sets; + +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.cql3.statements.schema.TableAttributes; +import org.apache.cassandra.repair.RepairCoordinator; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.locator.LocalStrategy; +import org.apache.cassandra.repair.RepairParallelism; +import org.apache.cassandra.repair.messages.RepairOption; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.apache.cassandra.service.StorageService; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.concurrent.ScheduledExecutorPlus; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.metrics.AutoRepairMetricsManager; +import org.apache.cassandra.metrics.AutoRepairMetrics; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.streaming.PreviewKind; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.apache.cassandra.utils.progress.ProgressEventType; +import org.apache.cassandra.utils.progress.ProgressListener; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.NOT_MY_TURN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ +@RunWith(Parameterized.class) +public class AutoRepairParameterizedTest extends CQLTester +{ + private static final String KEYSPACE = "ks"; + private static final String TABLE = "tbl"; + private static final String TABLE_DISABLED_AUTO_REPAIR = "tbl_disabled_auto_repair"; + private static final String MV = "mv"; + private static TableMetadata cfm; + private static TableMetadata cfmDisabledAutoRepair; + private static Keyspace keyspace; + private static int timeFuncCalls; + @Mock + ScheduledExecutorPlus mockExecutor; + @Mock + AutoRepairState autoRepairState; + @Mock + RepairCoordinator repairRunnable; + + // Expected number of repairs to be executed. + private static int expectedRepairAssignments; + + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + setAutoRepairEnabled(true); + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + DatabaseDescriptor.setCDCEnabled(false); + + // Calculate the expected number of keyspaces to be repaired, this should be all system keyspaces that are + // distributed, plus 1 for the table we created (ks.tbl). + int expectedKeyspacesGoingThroughRepair = 0; + for (Keyspace keyspace : Keyspace.all()) + { + // skip LocalStrategy keyspaces as these aren't repaired. + if (keyspace.getReplicationStrategy() instanceof LocalStrategy) + { + continue; + } + // skip system_traces keyspaces + if (keyspace.getName().equalsIgnoreCase(SchemaConstants.TRACE_KEYSPACE_NAME)) + { + continue; + } + + expectedKeyspacesGoingThroughRepair += 1; + } + // Since the splitter will unwrap a full token range, we expect twice as many repairs. + expectedRepairAssignments = expectedKeyspacesGoingThroughRepair * 2; + } + + @Before + public void setup() + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", KEYSPACE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", KEYSPACE, TABLE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i)) WITH auto_repair = {'full_enabled': 'false', 'incremental_enabled': 'false', 'preview_repaired_enabled': 'false', 'priority': '0'}", KEYSPACE, TABLE_DISABLED_AUTO_REPAIR)); + + QueryProcessor.executeInternal(String.format("CREATE MATERIALIZED VIEW %s.%s AS SELECT i, k from %s.%s " + + "WHERE k IS NOT null AND i IS NOT null PRIMARY KEY (i, k)", KEYSPACE, MV, KEYSPACE, TABLE)); + + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + //noinspection resource + MockitoAnnotations.openMocks(this); + + Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).truncateBlocking(); + Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).disableAutoCompaction(); + + Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).truncateBlocking(); + Keyspace.open(KEYSPACE).getColumnFamilyStore(MV).disableAutoCompaction(); + + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY).truncateBlocking(); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME).getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_HISTORY).truncateBlocking(); + + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); + executeCQL(); + + timeFuncCalls = 0; + AutoRepair.timeFunc = System::currentTimeMillis; + AutoRepair.sleepFunc = (Long startTime, TimeUnit unit) -> {}; + resetCounters(); + resetConfig(); + + AutoRepair.shuffleFunc = java.util.Collections::shuffle; + + keyspace = Keyspace.open(KEYSPACE); + cfm = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE).metadata(); + cfmDisabledAutoRepair = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE_DISABLED_AUTO_REPAIR).metadata(); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + } + + @After + public void tearDown() + { + System.clearProperty("cassandra.streaming.requires_view_build_during_repair"); + } + + private void resetCounters() + { + AutoRepairMetrics metrics = AutoRepairMetricsManager.getMetrics(repairType); + Metrics.removeMatching((name, metric) -> name.startsWith("repairTurn")); + metrics.repairTurnMyTurn = Metrics.counter(String.format("repairTurnMyTurn-%s", repairType)); + metrics.repairTurnMyTurnForceRepair = Metrics.counter(String.format("repairTurnMyTurnForceRepair-%s", repairType)); + metrics.repairTurnMyTurnDueToPriority = Metrics.counter(String.format("repairTurnMyTurnDueToPriority-%s", repairType)); + } + + private void resetConfig() + { + // prepare a fresh default config + AutoRepairConfig defaultConfig = new AutoRepairConfig(true); + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + defaultConfig.setAutoRepairEnabled(repairType, true); + defaultConfig.setMaterializedViewRepairEnabled(repairType, false); + } + + // reset the AutoRepairService config to default + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_type_overrides = defaultConfig.repair_type_overrides; + config.global_settings = defaultConfig.global_settings; + config.history_clear_delete_hosts_buffer_interval = defaultConfig.history_clear_delete_hosts_buffer_interval; + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); + } + + private void executeCQL() + { + QueryProcessor.executeInternal("INSERT INTO ks.tbl (k, s) VALUES ('k', 's')"); + QueryProcessor.executeInternal("SELECT s FROM ks.tbl WHERE k='k'"); + Keyspace.open(SchemaConstants.DISTRIBUTED_KEYSPACE_NAME) + .getColumnFamilyStore(SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY) + .forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } + + @Test + public void testRepairTurn() + { + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); + } + + @Test + public void testRepair() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + long lastRepairTime = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + //if repair was done then lastRepairTime should be non-zero + Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual value lastRepairTime %d", + lastRepairTime), lastRepairTime > 0); + // repair start lag sec should be reset on a successful repair + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).repairStartLagSec.getValue().intValue()); + } + + @Test + public void testTooFrequentRepairs() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + //in the first round let repair run + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + Assert.assertNotEquals(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + 0, consideredTables); + + //if repair was done in last 24 hours then it should not trigger another repair + config.setRepairMinInterval(repairType, "24h"); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertEquals(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + } + + @Test + public void testNonFrequentRepairs() + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual value lastRepairTime1 %d", + lastRepairTime1), lastRepairTime1 > 0); + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertNotEquals("Expected my turn for the repair", + NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertNotSame(String.format("Expected repair time to be same, actual value lastRepairTime1 %d, " + + "lastRepairTime2 %d", lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testGetPriorityHosts() + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType, "0s"); + Assert.assertEquals(String.format("Priority host count is not same, actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); + UUID myId = StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort()); + Assert.assertNotEquals("Expected my turn for the repair", NOT_MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myId)); + AutoRepair.instance.repair(repairType); + AutoRepairUtils.addPriorityHosts(repairType, Sets.newHashSet(FBUtilities.getBroadcastAddressAndPort())); + AutoRepair.instance.repair(repairType); + Assert.assertEquals(String.format("Priority host count is not same actual value %d, expected value %d", + AutoRepairUtils.getPriorityHosts(repairType).size(), 0), 0, AutoRepairUtils.getPriorityHosts(repairType).size()); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testCheckAutoRepairStartStop() throws Throwable + { + Integer prevMetricsCount = AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + long prevCount = state.getTotalMVTablesConsideredForRepair(); + config.setRepairMinInterval(repairType, "0s"); + config.setAutoRepairEnabled(repairType, false); + long lastRepairTime1 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + AutoRepair.instance.repair(repairType); + long lastRepairTime2 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + //Since repair has not happened, both the last repair times should be same + Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and lastRepairTime2 %d to be same", + lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2); + + config.setAutoRepairEnabled(repairType, true); + AutoRepair.instance.repair(repairType); + //since repair is done now, so lastRepairTime1/lastRepairTime2 and lastRepairTime3 should not be same + long lastRepairTime3 = AutoRepair.instance.repairStates.get(repairType).getLastRepairTime(); + Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and lastRepairTime3 %d to be not same", + lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime3); + assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair()); + assertEquals(prevMetricsCount, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue()); + } + + @Test + public void testRepairPrimaryRangesByDefault() + { + Assert.assertTrue("Expected primary range repair only", + AutoRepairService.instance.getAutoRepairConfig().getRepairPrimaryTokenRangeOnly(repairType)); + } + + @Test + public void testGetAllMVs() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMaterializedViewRepairEnabled(repairType, false); + assertFalse(config.getMaterializedViewRepairEnabled(repairType)); + assertEquals(0, AutoRepairUtils.getAllMVs(repairType, keyspace, cfm).size()); + + config.setMaterializedViewRepairEnabled(repairType, true); + + assertTrue(config.getMaterializedViewRepairEnabled(repairType)); + assertEquals(Collections.singletonList(MV), AutoRepairUtils.getAllMVs(repairType, keyspace, cfm)); + config.setMaterializedViewRepairEnabled(repairType, false); + } + + + @Test + public void testMVRepair() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMaterializedViewRepairEnabled(repairType, true); + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + + config.setMaterializedViewRepairEnabled(repairType, false); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + + config.setMaterializedViewRepairEnabled(repairType, true); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis()); + AutoRepair.instance.repair(repairType); + assertEquals(1, AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + } + + @Test + public void testSkipRepairSSTableCountHigherThreshold() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepairState state = AutoRepair.instance.repairStates.get(repairType); + ColumnFamilyStore cfsBaseTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(TABLE); + ColumnFamilyStore cfsMVTable = Keyspace.open(KEYSPACE).getColumnFamilyStore(MV); + Set preBaseTable = cfsBaseTable.getLiveSSTables(); + Set preMVTable = cfsBaseTable.getLiveSSTables(); + config.setRepairMinInterval(repairType, "0s"); + + for (int i = 0; i < 10; i++) + { + QueryProcessor.executeInternal(String.format("INSERT INTO %s.%s (k, i, v) VALUES('k1', %d, 'v1')", KEYSPACE, TABLE, i)); + cfsBaseTable.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + cfsMVTable.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } + + Set postBaseTable = cfsBaseTable.getLiveSSTables(); + Set diffBaseTable = new HashSet<>(postBaseTable); + diffBaseTable.removeAll(preBaseTable); + assert diffBaseTable.size() == 10; + + Set postMVTable = cfsBaseTable.getLiveSSTables(); + Set diffMVTable = new HashSet<>(postMVTable); + diffMVTable.removeAll(preMVTable); + assert diffMVTable.size() == 10; + + int beforeCount = config.getRepairSSTableCountHigherThreshold(repairType); + config.setMaterializedViewRepairEnabled(repairType, true); + config.setRepairSSTableCountHigherThreshold(repairType, 9); + assertEquals(0, state.getSkippedTokenRangesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + state.setLastRepairTime(0); + AutoRepair.instance.repair(repairType); + assertEquals(0, state.getTotalMVTablesConsideredForRepair()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + // skipping both the tables - one table is due to its repair has been disabled, and another one due to high sstable count + assertEquals(0, state.getSkippedTokenRangesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(2, state.getSkippedTablesCount()); + assertEquals(2, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + + // set it to higher value, and this time, the tables should not be skipped + config.setRepairSSTableCountHigherThreshold(repairType, beforeCount); + state.setLastRepairTime(0); + state.setSkippedTablesCount(0); + state.setTotalMVTablesConsideredForRepair(0); + AutoRepair.instance.repair(repairType); + assertEquals(1, state.getTotalMVTablesConsideredForRepair()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertEquals(0, state.getSkippedTokenRangesCount()); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertEquals(1, state.getSkippedTablesCount()); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).skippedTablesCount.getValue().intValue()); + } + + @Test + public void testGetRepairState() + { + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getRepairKeyspaceCount()); + + AutoRepairState state = AutoRepair.instance.getRepairState(repairType); + state.setRepairKeyspaceCount(100); + + assertEquals(100L, AutoRepair.instance.getRepairState(repairType).getRepairKeyspaceCount()); + } + + @Test + public void testMetrics() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMaterializedViewRepairEnabled(repairType, true); + config.setRepairMinInterval(repairType, "0s"); + config.setRepairRetryBackoff(repairType, "0s"); + config.setAutoRepairTableMaxRepairTime(repairType, "0s"); + AutoRepair.timeFunc = () -> { + timeFuncCalls++; + return timeFuncCalls * 1000L; + }; + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L); + + AutoRepair.instance.repair(repairType); + + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).nodeRepairTimeInSec.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).clusterRepairTimeInSec.getValue() > 0); + assertEquals(1, AutoRepairMetricsManager.getMetrics(repairType).repairTurnMyTurn.getCount()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue() > 0); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue().intValue()); + + config.setAutoRepairTableMaxRepairTime(repairType, String.valueOf(Integer.MAX_VALUE-1) + 's'); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())) + .thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + when(autoRepairState.getFailedTokenRangesCount()).thenReturn(10); + when(autoRepairState.getSucceededTokenRangesCount()).thenReturn(11); + when(autoRepairState.getLongestUnrepairedSec()).thenReturn(10); + + AutoRepair.instance.repair(repairType); + assertEquals(0, AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue()); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).succeededTokenRangesCount.getValue() > 0); + assertTrue(AutoRepairMetricsManager.getMetrics(repairType).longestUnrepairedSec.getValue() > 0); + } + + @Test + public void testRepairWaitsForRepairToFinishBeforeSchedullingNewSession() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + when(autoRepairState.getLastRepairTime()).thenReturn((long) 0); + AtomicInteger getRepairRunnableCalls = new AtomicInteger(); + AtomicReference prevListener = new AtomicReference<>(); + doAnswer(invocation -> { + if (getRepairRunnableCalls.getAndIncrement() > 0) + { + // progress listener from previous repair should be signalled before starting new repair + assertTrue(prevListener.get().condition.isSignalled()); + } + getRepairRunnableCalls.incrementAndGet(); + return repairRunnable; + }).when(autoRepairState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); + doAnswer(invocation -> { + // sending out a COMPLETE event with a 10ms delay + Executors.newScheduledThreadPool(1).schedule(() -> { + invocation.getArgument(0, AutoRepair.RepairProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + }, 10, TimeUnit.MILLISECONDS); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + AutoRepair.instance.repair(repairType); + } + + @Test + public void testDisabledAutoRepairForATableThroughTableLevelConfiguration() + { + Assert.assertTrue(cfm.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.FULL)); + Assert.assertTrue(cfm.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.INCREMENTAL)); + Assert.assertFalse(cfmDisabledAutoRepair.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.FULL)); + Assert.assertFalse(cfmDisabledAutoRepair.params.autoRepair.repairEnabled(AutoRepairConfig.RepairType.INCREMENTAL)); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(repairType, "0s"); + int disabledTablesRepairCountBefore = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + AutoRepair.instance.repair(repairType); + int consideredTables = AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair(); + Assert.assertNotSame(String.format("Expected total repaired tables > 0, actual value %s ", consideredTables), + 0, consideredTables); + int disabledTablesRepairCountAfter = AutoRepair.instance.repairStates.get(repairType).getTotalDisabledTablesRepairCount(); + Assert.assertTrue(String.format("A table %s should be skipped from auto repair, expected value: %d, actual value %d ", TABLE_DISABLED_AUTO_REPAIR, disabledTablesRepairCountBefore + 1, disabledTablesRepairCountAfter), + disabledTablesRepairCountBefore < disabledTablesRepairCountAfter); + } + + @Test + public void testTableAttribute() + { + assertTrue(TableAttributes.validKeywords().contains("auto_repair")); + } + + @Test + public void testDefaultAutomatedRepair() + { + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + Assert.assertTrue(String.format("expected repair type %s to be enabled on table %s", repairType, cfm.name), + cfm.params.autoRepair.repairEnabled(repairType)); + Assert.assertFalse(String.format("expected repair type %s to be disabled on table %s", repairType, cfmDisabledAutoRepair.name), + cfmDisabledAutoRepair.params.autoRepair.repairEnabled(repairType)); + } + } + + @Test + public void testRepairShufflesKeyspacesAndTables() + { + AtomicInteger shuffleKeyspacesCall = new AtomicInteger(); + AtomicInteger shuffleTablesCall = new AtomicInteger(); + AtomicInteger keyspaceCount = new AtomicInteger(); + AutoRepair.shuffleFunc = (List list) -> { + // check whether was invoked for keyspaces or tables + if (list.contains(KEYSPACE)) + { + shuffleKeyspacesCall.getAndIncrement(); + keyspaceCount.set(list.size()); + } + else + // presume list not containing a keyspace is for tables. + shuffleTablesCall.getAndIncrement(); + }; + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repair(repairType); + + // Expect a single invocation for keyspaces + assertEquals(1, shuffleKeyspacesCall.get()); + // Expect an invocation for tables for each keyspace + assertNotEquals(0, keyspaceCount.get()); + assertEquals(keyspaceCount.get(), shuffleTablesCall.get()); + } + + @Test + public void testRepairTakesLastRepairTimeFromDB() + { + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.setMaterializedViewRepairEnabled(repairType, true); + long lastRepairTime = System.currentTimeMillis() - 1000; + AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime); + AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0); + config.setRepairMinInterval(repairType, "1h"); + + AutoRepair.instance.repair(repairType); + + // repair scheduler should not attempt to run repair as last repair time in DB is current time - 1s + assertEquals(0, AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair()); + // repair scheduler should load the repair time from the DB + assertEquals(lastRepairTime, AutoRepair.instance.repairStates.get(repairType).getLastRepairTime()); + } + + @Test + public void testRepairMaxRetries() + { + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.SECONDS, unit); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType); + + // Expect configured retries for each keyspace expected to be repaired + assertEquals(config.getRepairMaxRetries(repairType)*expectedRepairAssignments, sleepCalls.get()); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(0); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(expectedRepairAssignments); + } + + @Test + public void testRepairSuccessAfterRetry() + { + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.SECONDS, unit); + assertEquals(config.getRepairRetryBackoff(repairType).toSeconds(), (long) duration); + }; + doAnswer(invocation -> { + if (sleepCalls.get() == 0) + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + } + else + { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + } + + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + config.setRepairMinInterval(repairType, "0s"); + config.setRepairMaxRetries(repairType, 1); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + AutoRepair.instance.repair(repairType); + + assertEquals(1, sleepCalls.get()); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testRepairDoesNotThrowsForIRWithMVReplayButMVRepairDisabled() + { + AutoRepair.instance.setup(); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepairService.instance.getAutoRepairConfig().setMaterializedViewRepairEnabled(repairType, false); + + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + try + { + AutoRepair.instance.repair(repairType); + } + catch (ConfigurationException ignored) + { + fail("ConfigurationException not expected"); + } + } + else + { + AutoRepair.instance.repair(repairType); + } + } + + @Test + public void testRepairThrowsForIRWithMVReplay() + { + AutoRepair.instance.setup(); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepairService.instance.getAutoRepairConfig().setMaterializedViewRepairEnabled(repairType, true); + + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + try + { + AutoRepair.instance.repair(repairType); + fail("Expected ConfigurationException"); + } + catch (ConfigurationException ignored) + { + } + } + else + { + AutoRepair.instance.repair(repairType); + } + } + + @Test + public void testRepairThrowsForIRWithCDCReplay() + { + AutoRepair.instance.setup(); + DatabaseDescriptor.setCDCEnabled(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + + if (repairType == AutoRepairConfig.RepairType.INCREMENTAL) + { + try + { + AutoRepair.instance.repair(repairType); + fail("Expected ConfigurationException"); + } + catch (ConfigurationException ignored) + { + } + } + else + { + AutoRepair.instance.repair(repairType); + } + } + + @Test + public void testSoakAfterImmediateRepair() + { + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("10s"); + AtomicInteger sleepCalls = new AtomicInteger(); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + sleepCalls.getAndIncrement(); + assertEquals(TimeUnit.MILLISECONDS, unit); + assertTrue(config.getRepairTaskMinDuration().toMilliseconds() >= duration); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType); + + assertEquals(1, sleepCalls.get()); + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testNoSoakAfterRepair() + { + when(autoRepairState.getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean())).thenReturn(repairRunnable); + doAnswer(invocation -> { + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + return null; + }).when(repairRunnable).addProgressListener(Mockito.any()); + AutoRepairConfig config = AutoRepairService.instance.getAutoRepairConfig(); + config.repair_task_min_duration = new DurationSpec.LongSecondsBound("0s"); + AutoRepair.sleepFunc = (Long duration, TimeUnit unit) -> { + fail("Should not sleep after repair"); + }; + config.setRepairMinInterval(repairType, "0s"); + AutoRepair.instance.repairStates.put(repairType, autoRepairState); + + AutoRepair.instance.repair(repairType); + + verify(autoRepairState, times(1)).setSucceededTokenRangesCount(expectedRepairAssignments); + verify(autoRepairState, times(1)).setSkippedTokenRangesCount(0); + verify(autoRepairState, times(1)).setFailedTokenRangesCount(0); + } + + @Test + public void testSchedulerIgnoresErrorsFromUnrelatedRepairRunables() + { + RepairOption options = new RepairOption(RepairParallelism.PARALLEL, true, repairType == AutoRepairConfig.RepairType.INCREMENTAL, false, + AutoRepairService.instance.getAutoRepairConfig().getRepairThreads(repairType), Collections.emptySet(), + false, false, PreviewKind.NONE, false, true, true, false, false, false); + AutoRepairState repairState = AutoRepair.instance.repairStates.get(repairType); + AutoRepairState spyState = spy(repairState); + AtomicReference failingListener = new AtomicReference<>(); + AtomicInteger repairRunableCalls = new AtomicInteger(); + doAnswer((InvocationOnMock inv ) -> { + RepairCoordinator runnable = spy(repairState.getRepairRunnable(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2), + inv.getArgument(3))); + if (repairRunableCalls.getAndIncrement() == 0) + { + // this will be used for first repair job + doAnswer(invocation -> { + // repair runnable for the first repair job will immediately fail + failingListener.set(invocation.getArgument(0, AutoRepair.RepairProgressListener.class)); + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + } + else + { + // this will be used for subsequent repair jobs + doAnswer(invocation -> { + if (repairRunableCalls.get() > 0) + { + // repair runnable for the subsequent repair jobs will immediately complete + invocation.getArgument(0, ProgressListener.class).progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0)); + + } + // repair runnable for the first repair job will continue firing ERROR events + failingListener.get().progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0)); + return null; + }).when(runnable).addProgressListener(Mockito.any()); + } + return runnable; + }).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(), Mockito.any(), anyBoolean()); + when(spyState.getLastRepairTime()).thenReturn((long) 0); + AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(repairType, 0); + AutoRepair.instance.repairStates.put(repairType, spyState); + + AutoRepair.instance.repair(repairType); + + assertEquals(1, (int) AutoRepairMetricsManager.getMetrics(repairType).failedTokenRangesCount.getValue()); + // only the first repair job should have failed despite it continuously firing ERROR events + verify(spyState, times(1)).setFailedTokenRangesCount(1); + } + + @Test + public void testProgressError() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.ERROR, 0, 0, "test")); + + assertFalse(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testProgressProgress() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.PROGRESS, 0, 0, "test")); + + assertFalse(listener.success); + assertFalse(listener.condition.isSignalled()); + } + + @Test + public void testProgresComplete() + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + assertTrue(listener.success); + assertTrue(listener.condition.isSignalled()); + } + + @Test + public void testAwait() throws Exception + { + AutoRepair.RepairProgressListener listener = new AutoRepair.RepairProgressListener(repairType); + listener.progress("test", new ProgressEvent(ProgressEventType.COMPLETE, 0, 0, "test")); + + listener.await(new DurationSpec.IntSecondsBound("12h")); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java new file mode 100644 index 000000000000..97e80364eed7 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateFactoryTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.Test; + +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType} + */ +public class AutoRepairStateFactoryTest +{ + @Test + public void testGetRepairState() + { + AutoRepairState state = RepairType.getAutoRepairState(RepairType.FULL); + + assertTrue(state instanceof FullRepairState); + + state = RepairType.getAutoRepairState(RepairType.INCREMENTAL); + + assertTrue(state instanceof IncrementalRepairState); + + state = RepairType.getAutoRepairState(RepairType.PREVIEW_REPAIRED); + + assertTrue(state instanceof PreviewRepairedState); + } + + @Test + public void testGetRepairStateSupportsAllRepairTypes() + { + for (RepairType repairType : RepairType.values()) + { + try + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + assertNotNull(state); + } catch (IllegalArgumentException e) + { + assertNull(e); + } + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java new file mode 100644 index 000000000000..422ebdff5192 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Arrays; +import java.util.Collection; +import java.util.UUID; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.progress.ProgressEvent; +import org.mockito.Mock; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.MockitoAnnotations.initMocks; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairState} + */ +@RunWith(Parameterized.class) +public class AutoRepairStateTest extends CQLTester +{ + private static final String testTable = "test"; + + @Parameterized.Parameter + public RepairType repairType; + + @Mock + ProgressEvent progressEvent; + + @Parameterized.Parameters + public static Collection repairTypes() + { + return Arrays.asList(RepairType.values()); + } + + @Before + public void setUp() + { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + initMocks(this); + createTable(String.format("CREATE TABLE IF NOT EXISTS %s.%s (pk int PRIMARY KEY, v int)", KEYSPACE, testTable)); + } + + @Test + public void testGetRepairRunnable() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairService.setup(); + + Runnable runnable = state.getRepairRunnable(KEYSPACE, ImmutableList.of(testTable), ImmutableSet.of(), false); + + assertNotNull(runnable); + } + + @Test + public void testGetLastRepairTime() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.lastRepairTimeInMs = 1; + + assertEquals(1, state.getLastRepairTime()); + } + + @Test + public void testSetTotalTablesConsideredForRepair() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setTotalTablesConsideredForRepair(1); + + assertEquals(1, state.totalTablesConsideredForRepair); + } + + @Test + public void testGetTotalTablesConsideredForRepair() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.totalTablesConsideredForRepair = 1; + + assertEquals(1, state.getTotalTablesConsideredForRepair()); + } + + @Test + public void testSetLastRepairTimeInMs() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setLastRepairTime(1); + + assertEquals(1, state.lastRepairTimeInMs); + } + + @Test + public void testGetClusterRepairTimeInSec() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.clusterRepairTimeInSec = 1; + + assertEquals(1, state.getClusterRepairTimeInSec()); + } + + @Test + public void testGetNodeRepairTimeInSec() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.nodeRepairTimeInSec = 1; + + assertEquals(1, state.getNodeRepairTimeInSec()); + } + + @Test + public void testSetRepairInProgress() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairInProgress(true); + + assertTrue(state.repairInProgress); + } + + @Test + public void testIsRepairInProgress() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairInProgress = true; + + assertTrue(state.isRepairInProgress()); + } + + @Test + public void testSetSkippedTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setSkippedTokenRangesCount(1); + + assertEquals(1, state.skippedTokenRangesCount); + } + + @Test + public void testGetSkippedTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.skippedTokenRangesCount = 1; + + assertEquals(1, state.getSkippedTokenRangesCount()); + } + + @Test + public void testGetLongestUnrepairedSecNull() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.longestUnrepairedNode = null; + + try + { + assertEquals(0, state.getLongestUnrepairedSec()); + } + catch (Exception e) + { + assertNull(e); + } + } + + @Test + public void testGetLongestUnrepairedSec() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.longestUnrepairedNode = new AutoRepairHistory(UUID.randomUUID(), "", 0, 1000, + null, 0, false); + AutoRepairState.timeFunc = () -> 2000L; + + try + { + assertEquals(1, state.getLongestUnrepairedSec()); + } + catch (Exception e) + { + assertNull(e); + } + } + + @Test + public void testSetTotalMVTablesConsideredForRepair() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setTotalMVTablesConsideredForRepair(1); + + assertEquals(1, state.totalMVTablesConsideredForRepair); + } + + @Test + public void testGetTotalMVTablesConsideredForRepair() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.totalMVTablesConsideredForRepair = 1; + + assertEquals(1, state.getTotalMVTablesConsideredForRepair()); + } + + @Test + public void testSetNodeRepairTimeInSec() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setNodeRepairTimeInSec(1); + + assertEquals(1, state.nodeRepairTimeInSec); + } + + @Test + public void testSetClusterRepairTimeInSec() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setClusterRepairTimeInSec(1); + + assertEquals(1, state.clusterRepairTimeInSec); + } + + @Test + public void testSetRepairKeyspaceCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setRepairKeyspaceCount(1); + + assertEquals(1, state.repairKeyspaceCount); + } + + @Test + public void testGetRepairKeyspaceCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.repairKeyspaceCount = 1; + + assertEquals(1, state.getRepairKeyspaceCount()); + } + + @Test + public void testSetLongestUnrepairedNode() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + + state.setLongestUnrepairedNode(history); + + assertEquals(history, state.longestUnrepairedNode); + } + + @Test + public void testSetSucceededTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setSucceededTokenRangesCount(1); + + assertEquals(1, state.succeededTokenRangesCount); + } + + @Test + public void testGetSucceededTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.succeededTokenRangesCount = 1; + + assertEquals(1, state.getSucceededTokenRangesCount()); + } + + @Test + public void testSetFailedTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + + state.setFailedTokenRangesCount(1); + + assertEquals(1, state.failedTokenRangesCount); + } + + @Test + public void testGetFailedTokenRangesCount() + { + AutoRepairState state = RepairType.getAutoRepairState(repairType); + state.failedTokenRangesCount = 1; + + assertEquals(1, state.getFailedTokenRangesCount()); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java new file mode 100644 index 000000000000..1bf8e52f9849 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTablePropertyTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.Map; + +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.SimpleBuilders; +import org.apache.cassandra.db.rows.ColumnData; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaKeyspace; +import org.apache.cassandra.schema.SchemaKeyspaceTables; +import org.apache.cassandra.utils.ByteBufferUtil; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +/** + * Unit tests that verifies "auto_repair" is not included in Schema mutation + * {@link org.apache.cassandra.schema.SchemaKeyspace} if AutoRepair is disabled + */ +public class AutoRepairTablePropertyTest extends CQLTester +{ + @Test + public void testSchedulerDisabledNoColumnReturned() + { + helperTestTableProperty(false); + } + + @Test + public void testSchedulerEnabledShouldReturnColumnReturned() + { + helperTestTableProperty(true); + } + + public void helperTestTableProperty(boolean autoRepairOn) + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairSchedulingEnabled(autoRepairOn); + DatabaseDescriptor.setAccordTransactionsEnabled(false); + + Map systemSchemaTables = Map.of(SchemaKeyspaceTables.TABLES, "table_name", SchemaKeyspaceTables.VIEWS, "view_name"); + for (Map.Entry systemSchema : systemSchemaTables.entrySet()) + { + ColumnFamilyStore tables = Keyspace.open(SchemaConstants.SCHEMA_KEYSPACE_NAME).getColumnFamilyStore(systemSchema.getKey()); + SimpleBuilders.RowBuilder builder = new SimpleBuilders.RowBuilder(tables.metadata(), systemSchema.getValue()); + SchemaKeyspace.addTableParamsToRowBuilder(tables.metadata().params, builder, false); + Row row = builder.build(); + ColumnMetadata autoRepair = tables.metadata().getColumn(ByteBufferUtil.bytes("auto_repair")); + ColumnData data = row.getCell(autoRepair); + if (autoRepairOn) + { + assertNotNull(data); + } + else + { + // if AutoRepair is not enabled, the column should not be returned + // as part of the system_schema.tables mutation + assertNull(data); + } + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java new file mode 100644 index 000000000000..1eceb386ee25 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.Assert; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.schema.ReplicationParams; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.schema.SchemaTestUtil; +import org.apache.cassandra.service.AutoRepairService; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepair} + */ +public class AutoRepairTest extends CQLTester +{ + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Before + public void setup() + { + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.FULL, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + AutoRepairService.setup(); + } + + @Test + public void testSetup() + { + AutoRepair.instance.setup(); + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) + { + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); + assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), + expectedTasks > 0); + } + } + + @Test + public void testSafeGuardSetupCall() + { + // only one should be setup, and rest should be ignored + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); + AutoRepair.instance.setup(); + + assertEquals(RepairType.values().length, AutoRepair.instance.repairExecutors.size()); + for (RepairType repairType : AutoRepair.instance.repairExecutors.keySet()) + { + int expectedTasks = AutoRepair.instance.repairExecutors.get(repairType).getPendingTaskCount() + + AutoRepair.instance.repairExecutors.get(repairType).getActiveTaskCount(); + assertTrue(String.format("Expected > 0 task in queue for %s but was %s", repairType, expectedTasks), + expectedTasks > 0); + } + } + + @Test(expected = ConfigurationException.class) + public void testSetupFailsWhenIREnabledWithCDCReplay() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + DatabaseDescriptor.setCDCEnabled(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); + } + + @Test + public void testNoFailureIfMVRepairOnButConfigIsOff() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, false); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepair.instance.setup(); + } + + @Test(expected = ConfigurationException.class) + public void testSetupFailsWhenIREnabledWithMVReplay() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.getAutoRepairConfig().setMaterializedViewRepairEnabled(RepairType.INCREMENTAL, true); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + AutoRepair.instance.isSetupDone = false; + AutoRepair.instance.setup(); + } + + @Test + public void testCheckNTSreplicationNodeInsideOutsideDC() + { + String ksname1 = "ks_nts1"; + String ksname2 = "ks_nts2"; + Map configOptions1 = new HashMap<>(); + configOptions1.put("datacenter1", "3"); + configOptions1.put(ReplicationParams.CLASS, "NetworkTopologyStrategy"); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(ksname1, KeyspaceParams.create(false, configOptions1)), false); + Map configOptions2 = new HashMap<>(); + configOptions2.put("datacenter2", "3"); + configOptions2.put(ReplicationParams.CLASS, "NetworkTopologyStrategy"); + SchemaTestUtil.addOrUpdateKeyspace(KeyspaceMetadata.create(ksname2, KeyspaceParams.create(false, configOptions2)), false); + + for (Keyspace ks : Keyspace.all()) + { + if (ks.getName().equals(ksname1)) + { + // case 1 : + // node reside in "datacenter1" + // keyspace has replica in "datacenter1" + Assert.assertTrue(AutoRepairUtils.shouldConsiderKeyspace(ks)); + } + else if (ks.getName().equals(ksname2)) + { + // case 2 : + // node reside in "datacenter1" + // keyspace has replica in "datacenter2" + Assert.assertFalse(AutoRepairUtils.shouldConsiderKeyspace(ks)); + } + } + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java new file mode 100644 index 000000000000..d9723ea193dd --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java @@ -0,0 +1,491 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; + +import com.google.common.collect.ImmutableSet; +import org.apache.cassandra.schema.SystemDistributedKeyspace; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.marshal.UUIDType; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.AutoRepairHistory; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils.CurrentRepairStatus; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.utils.FBUtilities; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_DELETE_HOSTS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_FORCE_REPAIR; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_FINISH_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_PRIORITY; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_START_TS; +import static org.apache.cassandra.repair.autorepair.AutoRepairUtils.COL_REPAIR_TURN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.AutoRepairUtils} + */ +public class AutoRepairUtilsTest extends CQLTester +{ + static final RepairType repairType = RepairType.INCREMENTAL; + static UUID hostId; + + static InetAddressAndPort localEndpoint; + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + localEndpoint = FBUtilities.getBroadcastAddressAndPort(); + hostId = StorageService.instance.getHostIdForEndpoint(localEndpoint); + StorageService.instance.doAutoRepairSetup(); + } + + @Before + public void setup() + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", "ks")); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (k text, s text static, i int, v text, primary key(k,i))", "ks", "tbl")); + + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("0s"); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + } + + @Test + public void testSetForceRepair() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.setForceRepair(repairType, ImmutableSet.of(localEndpoint)); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); + } + + @Test + public void testSetForceRepairNewNode() + { + AutoRepairUtils.setForceRepairNewNode(repairType); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertTrue(result.one().getBoolean(COL_FORCE_REPAIR)); + } + + @Test + public void testClearDeleteHosts() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, delete_hosts, delete_hosts_update_time) VALUES ('%s', %s, { %s }, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId, hostId)); + + AutoRepairUtils.clearDeleteHosts(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT delete_hosts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + Set deleteHosts = result.one().getSet(COL_DELETE_HOSTS, UUIDType.instance); + assertNull(deleteHosts); + } + + @Test + public void testGetAutoRepairHistoryForLocalGroup() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair) VALUES ('%s', %s, false)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + List history = AutoRepairUtils.getAutoRepairHistory(repairType); + assertNotNull(history); + assertEquals(1, history.size()); + assertEquals(hostId, history.get(0).hostId); + } + + @Test + public void testGetAutoRepairHistoryForLocalGroup_empty_history() + { + List history = AutoRepairUtils.getAutoRepairHistory(repairType); + + assertNull(history); + } + + @Test + public void testGetCurrentRepairStatus() + { + UUID forceRepair = UUID.randomUUID(); + UUID regularRepair = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, force_repair, repair_start_ts) VALUES ('%s', %s, true, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), forceRepair)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts) VALUES ('%s', %s, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), regularRepair)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), regularRepair)); + + CurrentRepairStatus status = AutoRepairUtils.getCurrentRepairStatus(repairType, AutoRepairUtils.getAutoRepairHistory(repairType), hostId); + + assertNotNull(status); + assertEquals(1, status.historiesWithoutOnGoingRepair.size()); + assertEquals(hostId, status.historiesWithoutOnGoingRepair.get(0).hostId); + assertEquals(1, status.hostIdsWithOnGoingRepair.size()); + assertTrue(status.hostIdsWithOnGoingRepair.contains(regularRepair)); + assertEquals(1, status.hostIdsWithOnGoingForceRepair.size()); + assertTrue(status.hostIdsWithOnGoingForceRepair.contains(forceRepair)); + assertEquals(1, status.priority.size()); + assertTrue(status.priority.contains(regularRepair)); + assertEquals(hostId, status.myRepairHistory.hostId); + } + + @Test + public void testGetHostIdsInCurrentRing() + { + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(hostId)); + } + + @Test + public void testGetHostIdsInCurrentRing_multiple_nodes() + { + InetAddressAndPort ignoredEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); + InetAddressAndPort deadEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 2); + DatabaseDescriptor.getAutoRepairConfig().setIgnoreDCs(repairType, ImmutableSet.of("dc2")); + + TreeSet hosts = AutoRepairUtils.getHostIdsInCurrentRing(repairType, ImmutableSet.of(new NodeAddresses(localEndpoint), new NodeAddresses(ignoredEndpoint), new NodeAddresses(deadEndpoint))); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(hostId)); + } + + @Test + public void testGetHostWithLongestUnrepairTime() + { + UUID otherHostId = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_finish_ts) VALUES ('%s', %s, toTimestamp(now()))", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + + AutoRepairHistory history = AutoRepairUtils.getHostWithLongestUnrepairTime(repairType); + + assertEquals(hostId, history.hostId); + } + + @Test + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_0_group_size() + { + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); + + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 0); + + assertEquals(2, count); + } + + @Test + public void testGetMaxNumberOfNodeRunAutoRepairInGroup_percentage() + { + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 2); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairPercentage(repairType, 50); + + + int count = AutoRepairUtils.getMaxNumberOfNodeRunAutoRepair(repairType, 10); + + assertEquals(5, count); + } + + @Test + public void testDeleteAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.deleteAutoRepairHistory(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(0, result.size()); + } + + @Test + public void testUpdateStartAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.updateStartAutoRepairHistory(repairType, hostId, 123, AutoRepairUtils.RepairTurn.MY_TURN); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT repair_start_ts, repair_turn FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + UntypedResultSet.Row row = result.one(); + assertEquals(123, row.getLong(COL_REPAIR_START_TS, 0)); + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN.toString(), row.getString(COL_REPAIR_TURN)); + } + + @Test + public void testUpdateFinishAutoRepairHistory() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + + AutoRepairUtils.updateFinishAutoRepairHistory(repairType, hostId, 123); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT repair_finish_ts FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), hostId)); + assertNotNull(result); + assertEquals(1, result.size()); + assertEquals(123, result.one().getLong(COL_REPAIR_FINISH_TS, 0)); + } + + @Test + public void testAddHostIdToDeleteHosts() + { + UUID otherHostId = UUID.randomUUID(); + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, host_id) VALUES ('%s', %s)", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + + AutoRepairUtils.addHostIdToDeleteHosts(repairType, hostId, otherHostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, + repairType.toString(), otherHostId)); + assertNotNull(result); + assertEquals(1, result.size()); + Set deleteHosts = result.one().getSet(COL_DELETE_HOSTS, UUIDType.instance); + assertNotNull(deleteHosts); + assertEquals(1, deleteHosts.size()); + assertTrue(deleteHosts.contains(hostId)); + } + + @Test + public void testAddPriorityHost() + { + AutoRepairUtils.addPriorityHosts(repairType, ImmutableSet.of(localEndpoint)); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString())); + assertNotNull(result); + assertEquals(1, result.size()); + Set repairPriority = result.one().getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + assertNotNull(repairPriority); + assertEquals(1, repairPriority.size()); + assertTrue(repairPriority.contains(hostId)); + } + + @Test + public void testRemovePriorityStatus() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), hostId)); + + AutoRepairUtils.removePriorityStatus(repairType, hostId); + + UntypedResultSet result = QueryProcessor.executeInternal(String.format( + "SELECT * FROM %s.%s WHERE repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString())); + assertNotNull(result); + assertEquals(1, result.size()); + Set repairPriority = result.one().getSet(COL_REPAIR_PRIORITY, UUIDType.instance); + assertNull(repairPriority); + } + + @Test + public void testGetPriorityHosts() + { + QueryProcessor.executeInternal(String.format( + "INSERT INTO %s.%s (repair_type, repair_priority) VALUES ('%s', { %s })", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY, + repairType.toString(), hostId)); + + Set hosts = AutoRepairUtils.getPriorityHosts(repairType); + + assertNotNull(hosts); + assertEquals(1, hosts.size()); + assertTrue(hosts.contains(localEndpoint)); + } + + @Test + public void testCheckNodeContainsKeyspaceReplica() + { + Keyspace ks = Keyspace.open("ks"); + + assertTrue(AutoRepairUtils.shouldConsiderKeyspace(ks)); + } + + @Test + public void testTableMaxRepairTimeExceeded() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + + assertTrue(AutoRepairUtils.tableMaxRepairTimeExceeded(repairType, 0)); + } + + @Test + public void testKeyspaceMaxRepairTimeExceeded() + { + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairTableMaxRepairTime(repairType, "0s"); + + assertTrue(AutoRepairUtils.keyspaceMaxRepairTimeExceeded(repairType, 0, 1)); + } + + @Test + public void testGetLastRepairFinishTime() + { + AutoRepairHistory history = new AutoRepairHistory(UUID.randomUUID(), "", 0, 0, null, 0, false); + + assertEquals(0, history.getLastRepairFinishTime()); + + history.lastRepairFinishTime = 100; + + assertEquals(100, history.getLastRepairFinishTime()); + } + + @Test + public void testMyTurnToRunRepairShouldReturnMyTurnWhenRepairOngoing() + { + UUID myID = UUID.randomUUID(); + UUID otherID = UUID.randomUUID(); + DatabaseDescriptor.getAutoRepairConfig().setParallelRepairCount(repairType, 5); + long currentMillis = System.currentTimeMillis(); + // finish time less than start time means that repair is ongoing + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + // finish time is larger than start time means that repair for other node is finished + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + + assertEquals(AutoRepairUtils.RepairTurn.MY_TURN, AutoRepairUtils.myTurnToRunRepair(repairType, myID)); + } + + @Test + public void testLocalStrategyAndNetworkKeyspace() + { + assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open("system"))); + assertTrue(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(KEYSPACE))); + } + + @Test + public void testGetLastRepairTimeForNode() + { + UUID myID = UUID.randomUUID(); + UUID otherID = UUID.randomUUID(); + long currentMillis = System.currentTimeMillis(); + AutoRepairUtils.insertNewRepairHistory(repairType, myID, currentMillis, currentMillis - 100); + AutoRepairUtils.insertNewRepairHistory(repairType, otherID, currentMillis, currentMillis + 100); + + assertEquals(currentMillis - 100, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + } + + @Test + public void testGetLastRepairTimeForNodeWhenHistoryIsEmpty() + { + UUID myID = UUID.randomUUID(); + + assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType, myID)); + } + + @Test + public void testSkipSystemTraces() + { + assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(SchemaConstants.TRACE_KEYSPACE_NAME))); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java new file mode 100644 index 000000000000..dac4a167d556 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterHelper.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.dht.BootStrapper; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.index.sai.disk.format.Version; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.compatibility.TokenRingUtils; +import org.apache.cassandra.utils.FBUtilities; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.setupSeed; +import static org.apache.cassandra.cql3.CQLTester.Fuzzed.updateConfigs; +import static org.apache.cassandra.repair.autorepair.FixedSplitTokenRangeSplitter.DEFAULT_NUMBER_OF_SUBRANGES; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Helper class for {@link FixedSplitTokenRangeSplitterNoVNodesTest} and {@link FixedSplitTokenRangeSplitterVNodesTest} + */ +public class FixedSplitTokenRangeSplitterHelper +{ + private static final String TABLE1 = "tbl1"; + private static final String TABLE2 = "tbl2"; + private static final String TABLE3 = "tbl3"; + public static final String KEYSPACE = "ks"; + + public static void setupClass(int numTokens) throws Exception + { + setupSeed(); + updateConfigs(); + DatabaseDescriptor.setPartitioner("org.apache.cassandra.dht.Murmur3Partitioner"); + ServerTestUtils.prepareServerNoRegister(); + + Set tokens = BootStrapper.getRandomTokens(ClusterMetadata.current(), numTokens); + ServerTestUtils.registerLocal(tokens); + // Ensure that the on-disk format statics are loaded before the test run + Version.LATEST.onDiskFormat(); + StorageService.instance.doAutoRepairSetup(); + + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", FixedSplitTokenRangeSplitterHelper.KEYSPACE)); + } + + public static void testTokenRangesSplitByTable(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) + { + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, false); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); + assertEquals(numTokens, tokens.size()); + List tables = Arrays.asList(TABLE1, TABLE2, TABLE3); + List> expectedToken = new ArrayList<>(); + for (int i = 0; i < tables.size(); i++) + { + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } + } + + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); + + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) + .getRepairAssignments(true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertEquals(numTokens * numberOfSplits * tables.size(), assignments.size()); + assertEquals(expectedToken.size(), assignments.size()); + + int assignmentsPerTable = numTokens * numberOfSplits; + for (int i = 0; i < tables.size(); i++) + { + List assignmentForATable = new ArrayList<>(); + List> expectedTokensForATable = new ArrayList<>(); + for (int j = 0; j < assignmentsPerTable; j++) + { + assertEquals(Collections.singletonList(tables.get(i)), assignments.get(i * assignmentsPerTable + j).getTableNames()); + assignmentForATable.add(assignments.get(i * assignmentsPerTable + j)); + expectedTokensForATable.add(expectedToken.get(i * assignmentsPerTable + j)); + } + compare(numTokens, numberOfSplits, expectedTokensForATable, assignmentForATable); + } + } + + public static void testTokenRangesSplitByKeyspace(int numTokens, int numberOfSubRanges, AutoRepairConfig.RepairType repairType) + { + int numberOfSplits = calcSplits(numTokens, numberOfSubRanges); + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(repairType, true); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); + assertEquals(numTokens, tokens.size()); + List> expectedToken = new ArrayList<>(); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } + + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1, TABLE2, TABLE3); + + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.singletonMap(FixedSplitTokenRangeSplitter.NUMBER_OF_SUBRANGES, Integer.toString(numberOfSubRanges))) + .getRepairAssignments(true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + assertEquals(numTokens * numberOfSplits, assignments.size()); + assertEquals(expectedToken.size(), assignments.size()); + + compare(numTokens, numberOfSplits, expectedToken, assignments); + } + + public static void testTokenRangesWithDefaultSplit(int numTokens, AutoRepairConfig.RepairType repairType) + { + int numberOfSplits = calcSplits(numTokens, DEFAULT_NUMBER_OF_SUBRANGES); + Collection> tokens = TokenRingUtils.getPrimaryRangesForEndpoint(KEYSPACE, FBUtilities.getBroadcastAddressAndPort()); + assertEquals(numTokens, tokens.size()); + List> expectedToken = new ArrayList<>(); + for (Range range : tokens) + { + expectedToken.addAll(AutoRepairUtils.split(range, numberOfSplits)); + } + + List plan = PrioritizedRepairPlan.buildSingleKeyspacePlan(repairType, KEYSPACE, TABLE1); + + Iterator keyspaceAssignments = new FixedSplitTokenRangeSplitter(repairType, Collections.emptyMap()).getRepairAssignments(true, plan); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // should be 3 entries for the table which covers each token range. + assertEquals(numTokens * numberOfSplits, assignments.size()); + + compare(numTokens, numberOfSplits, expectedToken, assignments); + } + + private static void compare(int numTokens, int numberOfSplits, List> expectedToken, List assignments) + { + assertEquals(expectedToken.size(), assignments.size()); + Set> a = new TreeSet<>(); + Set> b = new TreeSet<>(); + for (int i = 0; i < numTokens * numberOfSplits; i++) + { + a.add(expectedToken.get(i)); + b.add(assignments.get(i).getTokenRange()); + } + assertEquals(a, b); + } + + private static int calcSplits(int numTokens, int subRange) + { + return Math.max(1, subRange / numTokens); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java new file mode 100644 index 000000000000..a30f3aa76246 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterNoVNodesTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Unit tests for a setup that does not have v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterNoVNodesTest +{ + private static final int numTokens = 1; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java new file mode 100644 index 000000000000..6839748d1f01 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/FixedSplitTokenRangeSplitterVNodesTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Unit tests for a setup that has v-nodes {@link FixedSplitTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class FixedSplitTokenRangeSplitterVNodesTest +{ + private static final int numTokens = 16; + + @Parameterized.Parameter(0) + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public int numberOfSubRanges; + + @Parameterized.Parameters(name = "repairType={0}, numberOfSubRanges={1}") + public static Collection parameters() + { + List params = new ArrayList<>(); + for (AutoRepairConfig.RepairType type : AutoRepairConfig.RepairType.values()) + { + for (int subRange : Arrays.asList(1, 2, 4, 8, 16, 32, 64, 128, 256)) + { + params.add(new Object[]{ type, subRange }); + } + } + return params; + } + + @BeforeClass + public static void setupClass() throws Exception + { + FixedSplitTokenRangeSplitterHelper.setupClass(numTokens); + } + + @Test + public void testTokenRangesSplitByTable() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByTable(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesSplitByKeyspace() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesSplitByKeyspace(numTokens, numberOfSubRanges, repairType); + } + + @Test + public void testTokenRangesWithDefaultSplit() + { + FixedSplitTokenRangeSplitterHelper.testTokenRangesWithDefaultSplit(numTokens, repairType); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java new file mode 100644 index 000000000000..38f9c8538846 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/PrioritizedRepairPlanTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.PrioritizedRepairPlan} + */ +public class PrioritizedRepairPlanTest extends CQLTester +{ + @Test + public void testBuildWithDifferentPriorities() + { + // Test reordering assignments with different priorities + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '3'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); + + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); + assertEquals(3, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(3, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(table2, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(2, prioritizedRepairPlans.get(1).getPriority()); + assertEquals(table1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(1, prioritizedRepairPlans.get(2).getPriority()); + assertEquals(table3, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + } + + @Test + public void testBuildWithSamePriority() + { + // Test reordering assignments with the same priority + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table3 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + + // Expect only 1 plan since all tables share the same priority + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1, table2, table3); + assertEquals(1, prioritizedRepairPlans.size()); + + // Verify all tables present in the plan + assertEquals(1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().size()); + KeyspaceRepairPlan keyspaceRepairPlan = prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0); + + List tableNames = keyspaceRepairPlan.getTableNames(); + assertEquals(3, tableNames.size()); + assertEquals(table1, tableNames.get(0)); + assertEquals(table2, tableNames.get(1)); + assertEquals(table3, tableNames.get(2)); + } + + @Test + public void testBuildWithMixedPriorities() + { + String ks1 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); + String table1 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table2 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '3'}"); + String table3 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '2'}"); + String table4 = createTable(ks1, "CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); + // No priority table should be bucketed at priority 0 + String table5 = createTable(ks1,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + + // Create a new keyspace to ensure its tables get grouped with appropriate priority bucket + String ks2 = createKeyspace("CREATE KEYSPACE %s WITH replication={ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"); + String table6 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + String table7 = createTable(ks2,"CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '1'}"); + + Map> keyspaceToTableMap = new HashMap<>(); + keyspaceToTableMap.put(ks1, Lists.newArrayList(table1, table2, table3, table4, table5)); + keyspaceToTableMap.put(ks2, Lists.newArrayList(table6, table7)); + + // Expect 4 plans + List prioritizedRepairPlans = PrioritizedRepairPlan.build(keyspaceToTableMap, AutoRepairConfig.RepairType.FULL, java.util.Collections::sort); + assertEquals(4, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(3, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().size()); + assertEquals(ks1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getKeyspaceName()); + assertEquals(table2, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + + assertEquals(2, prioritizedRepairPlans.get(1).getPriority()); + assertEquals(1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().size()); + + assertEquals(ks1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getKeyspaceName()); + assertEquals(table1, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + assertEquals(table3, prioritizedRepairPlans.get(1).getKeyspaceRepairPlans().get(0).getTableNames().get(1)); + + assertEquals(1, prioritizedRepairPlans.get(2).getPriority()); + // 2 keyspaces should be present at priority 1 + assertEquals(2, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().size()); + // ks1.table4 expected in first plan + assertEquals(ks1, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getKeyspaceName()); + assertEquals(table4, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + // ks2.table7 expected in second plan + assertEquals(ks2, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(1).getKeyspaceName()); + assertEquals(table7, prioritizedRepairPlans.get(2).getKeyspaceRepairPlans().get(1).getTableNames().get(0)); + + // Tables without priority should get bucketed at priority 0 + assertEquals(0, prioritizedRepairPlans.get(3).getPriority()); + // 2 keyspaces expected + assertEquals(2, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().size()); + // ks1.table5 expected in first plan + assertEquals(ks1, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(0).getKeyspaceName()); + assertEquals(table5, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + // ks2.table6 expected in second plan + assertEquals(ks2, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(1).getKeyspaceName()); + assertEquals(table6, prioritizedRepairPlans.get(3).getKeyspaceRepairPlans().get(1).getTableNames().get(0)); + } + + @Test + public void testBuildWithEmptyTableList() + { + // Test with an empty table list (should remain empty) + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE); + assertTrue(prioritizedRepairPlans.isEmpty()); + } + + @Test + public void testBuildWithOneTable() + { + // Test with a single element (should remain unchanged) + String table1 = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT) WITH auto_repair = {'full_enabled': 'true', 'priority': '5'}"); + + // Expect only 1 plans + List prioritizedRepairPlans = PrioritizedRepairPlan.buildSingleKeyspacePlan(AutoRepairConfig.RepairType.FULL, KEYSPACE, table1); + assertEquals(1, prioritizedRepairPlans.size()); + + // Verify the order is by descending priority and matches the expected tables + assertEquals(5, prioritizedRepairPlans.get(0).getPriority()); + assertEquals(table1, prioritizedRepairPlans.get(0).getKeyspaceRepairPlans().get(0).getTableNames().get(0)); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java new file mode 100644 index 000000000000..79fef533f18f --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/RepairTokenRangeSplitterTest.java @@ -0,0 +1,465 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.auth.AuthKeyspace; +import org.apache.cassandra.config.DataStorageSpec.LongMebibytesBound; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.io.sstable.format.bti.BtiFormat; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.FilteredRepairAssignments; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizeEstimate; +import org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.SizedRepairAssignment; +import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.concurrent.Refs; + +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_BYTES_PER_SCHEDULE; +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.BYTES_PER_ASSIGNMENT; +import static org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter.MAX_TABLES_PER_ASSIGNMENT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for {@link org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter} + */ +@RunWith(Parameterized.class) +public class RepairTokenRangeSplitterTest extends CQLTester +{ + private RepairTokenRangeSplitter repairRangeSplitter; + private String tableName; + private static Range FULL_RANGE; + + @Parameterized.Parameter() + public String sstableFormat; + + @Parameterized.Parameters(name = "sstableFormat={0}") + public static Collection sstableFormats() + { + return List.of(BtiFormat.NAME, BigFormat.NAME); + } + + @BeforeClass + public static void setUpClass() + { + CQLTester.setUpClass(); + AutoRepairService.setup(); + FULL_RANGE = new Range<>(DatabaseDescriptor.getPartitioner().getMinimumToken(), DatabaseDescriptor.getPartitioner().getMaximumTokenForSplitting()); + } + + @Before + public void setUp() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); + DatabaseDescriptor.setSelectedSSTableFormat(DatabaseDescriptor.getSSTableFormats().get(sstableFormat)); + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.emptyMap()); + tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + // ensure correct format is selected. + if (sstableFormat.equalsIgnoreCase(BigFormat.NAME)) + { + assertTrue(BigFormat.isSelected()); + } + else + { + assertTrue(BtiFormat.isSelected()); + } + } + + @Test + public void testSizePartitionCount() + { + insertAndFlushTable(tableName, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + try (Refs sstables = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE)) + { + assertEquals(10, sstables.iterator().next().getEstimatedPartitionSize().count()); + SizeEstimate sizes = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, FULL_RANGE, sstables); + assertEquals(10, sizes.partitions); + } + } + + @Test + public void testSizePartitionCountSplit() + { + int partitionCount = 100_000; + int[] values = new int[partitionCount]; + for (int i = 0; i < values.length; i++) + values[i] = i + 1; + insertAndFlushTable(tableName, values); + Iterator> range = AutoRepairUtils.split(FULL_RANGE, 2).iterator(); + Range tokenRange1 = range.next(); + Range tokenRange2 = range.next(); + Assert.assertFalse(range.hasNext()); + + try (Refs sstables1 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange1); + Refs sstables2 = RepairTokenRangeSplitter.getSSTableReaderRefs(RepairType.FULL, KEYSPACE, tableName, tokenRange2)) + { + SizeEstimate sizes1 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange1, sstables1); + SizeEstimate sizes2 = RepairTokenRangeSplitter.getSizesForRangeOfSSTables(RepairType.FULL, KEYSPACE, tableName, tokenRange2, sstables2); + + // +-5% because including entire compression blocks covering token range, HLL merge and the applying of range size approx ratio causes estimation errors + long allowableDelta = (long) (partitionCount * .05); + long estimatedPartitionDelta = Math.abs(partitionCount - (sizes1.partitions + sizes2.partitions)); + assertTrue("Partition count delta was +/-" + estimatedPartitionDelta + " but expected +/- " + allowableDelta, estimatedPartitionDelta <= allowableDelta); + } + } + + @Test + public void testGetRepairAssignmentsForTable_NoSSTables() + { + // Should return 1 assignment if there are no SSTables + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); + assertEquals(1, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_Single() + { + insertAndFlushSingleTable(); + List assignments = repairRangeSplitter.getRepairAssignmentsForTable(CQLTester.KEYSPACE, tableName, FULL_RANGE); + assertEquals(1, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_BatchingTables() + { + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); + + List tableNames = createAndInsertTables(3); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); + + // We expect two assignments, one with table1 and table2 batched, and one with table3 + assertEquals(2, assignments.size()); + assertEquals(2, assignments.get(0).getTableNames().size()); + assertEquals(1, assignments.get(1).getTableNames().size()); + } + + @Test + public void testGetRepairAssignmentsForTable_BatchSize() + { + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "2")); + + List tableNames = createAndInsertTables(2); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); + + // We expect one assignment, with two tables batched + assertEquals(1, assignments.size()); + assertEquals(2, assignments.get(0).getTableNames().size()); + } + + @Test + public void testGetRepairAssignmentsForTable_NoBatching() + { + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "1")); + + List tableNames = createAndInsertTables(3); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); + + assertEquals(3, assignments.size()); + } + + @Test + public void testGetRepairAssignmentsForTable_AllBatched() + { + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.FULL, Collections.singletonMap(MAX_TABLES_PER_ASSIGNMENT, "100")); + + List tableNames = createAndInsertTables(5); + List assignments = repairRangeSplitter.getRepairAssignmentsForKeyspace(RepairType.FULL, KEYSPACE, tableNames, FULL_RANGE); + + assertEquals(1, assignments.size()); + } + + @Test(expected = IllegalStateException.class) + public void testMergeEmptyAssignments() + { + // Test when the list of assignments is empty + List emptyAssignments = Collections.emptyList(); + RepairTokenRangeSplitter.merge(emptyAssignments); + } + + @Test + public void testMergeSingleAssignment() + { + // Test when there is only one assignment in the list + String keyspaceName = "testKeyspace"; + List tableNames = Arrays.asList("table1", "table2"); + + SizedRepairAssignment assignment = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames); + List assignments = Collections.singletonList(assignment); + + SizedRepairAssignment result = RepairTokenRangeSplitter.merge(assignments); + + assertEquals(FULL_RANGE, result.getTokenRange()); + assertEquals(keyspaceName, result.getKeyspaceName()); + assertEquals(new HashSet<>(tableNames), new HashSet<>(result.getTableNames())); + } + + @Test + public void testMergeMultipleAssignmentsWithSameTokenRangeAndKeyspace() + { + // Test merging multiple assignments with the same token range and keyspace + String keyspaceName = "testKeyspace"; + List tableNames1 = Arrays.asList("table1", "table2"); + List tableNames2 = Arrays.asList("table2", "table3"); + + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); + List assignments = Arrays.asList(assignment1, assignment2); + + SizedRepairAssignment result = RepairTokenRangeSplitter.merge(assignments); + + assertEquals(FULL_RANGE, result.getTokenRange()); + assertEquals(keyspaceName, result.getKeyspaceName()); + assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); + } + + @Test(expected = IllegalStateException.class) + public void testMergeDifferentTokenRange() + { + // Test merging assignments with different token ranges + Iterator> range = AutoRepairUtils.split(FULL_RANGE, 2).iterator(); // Split the full range into two ranges ie (0-100, 100-200 + Range tokenRange1 = range.next(); + Range tokenRange2 = range.next(); + Assert.assertFalse(range.hasNext()); + + String keyspaceName = "testKeyspace"; + List tableNames = Arrays.asList("table1", "table2"); + + SizedRepairAssignment assignment1 = new SizedRepairAssignment(tokenRange1, keyspaceName, tableNames); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(tokenRange2, keyspaceName, tableNames); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairTokenRangeSplitter.merge(assignments); // Should throw IllegalStateException + } + + @Test(expected = IllegalStateException.class) + public void testMergeDifferentKeyspaceName() + { + // Test merging assignments with different keyspace names + List tableNames = Arrays.asList("table1", "table2"); + + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, "keyspace1", tableNames); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, "keyspace2", tableNames); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairTokenRangeSplitter.merge(assignments); // Should throw IllegalStateException + } + + @Test + public void testMergeWithDuplicateTables() + { + // Test merging assignments with duplicate table names + String keyspaceName = "testKeyspace"; + List tableNames1 = Arrays.asList("table1", "table2"); + List tableNames2 = Arrays.asList("table2", "table3"); + + SizedRepairAssignment assignment1 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames1); + SizedRepairAssignment assignment2 = new SizedRepairAssignment(FULL_RANGE, keyspaceName, tableNames2); + List assignments = Arrays.asList(assignment1, assignment2); + + RepairAssignment result = RepairTokenRangeSplitter.merge(assignments); + + // The merged result should contain all unique table names + assertEquals(new HashSet<>(Arrays.asList("table1", "table2", "table3")), new HashSet<>(result.getTableNames())); + } + + @Test + public void testGetRepairAssignmentsSplitsBySubrangeSizeAndFilterLimitsByMaxBytesPerSchedule() + { + // Ensures that getRepairAssignments splits by BYTES_PER_ASSIGNMENT and filterRepairAssignments limits by MAX_BYTES_PER_SCHEDULE. + repairRangeSplitter = new RepairTokenRangeSplitter(RepairType.INCREMENTAL, Collections.emptyMap()); + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "50GiB"); + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "100GiB"); + + // Given a size estimate of 1024GiB, we should expect 21 splits (50GiB*21 = 1050GiB < 1024GiB) + SizeEstimate sizeEstimate = sizeEstimateByBytes(new LongMebibytesBound("1024GiB")); + + List assignments = repairRangeSplitter.getRepairAssignments(Collections.singletonList(sizeEstimate)); + + // Should be 21 assignments, each being ~48.76 GiB + assertEquals(21, assignments.size()); + long expectedBytes = 52357696560L; + for (int i = 0; i < assignments.size(); i++) + { + SizedRepairAssignment assignment = assignments.get(i); + assertEquals("Did not get expected value for assignment " + i, 52357696560L, assignment.getEstimatedBytes()); + } + + // When filtering we should only get 2 assignments back (48.76 * 2 < 100GiB) + FilteredRepairAssignments filteredRepairAssignments = repairRangeSplitter.filterRepairAssignments(0, KEYSPACE, assignments, 0); + List finalRepairAssignments = filteredRepairAssignments.repairAssignments; + assertEquals(2, finalRepairAssignments.size()); + assertEquals(expectedBytes * 2, filteredRepairAssignments.newBytesSoFar); + } + + @Test + public void testTokenRangesRepairByKeyspace() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, true); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should only be two assignments (since single node encompasses the whole range, should get 2 primary ranges) + // to account for the range wrapping the ring. + assertEquals(2, assignments.size()); + + for (RepairAssignment assignment : assignments) + { + assertEquals(AuthKeyspace.TABLE_NAMES.size(), assignment.getTableNames().size()); + } + } + + @Test + public void testTokenRangesRepairByKeyspaceFalse() + { + AutoRepairService.instance.getAutoRepairConfig().setRepairByKeyspace(RepairType.FULL, false); + + final KeyspaceRepairPlan repairPlan = new KeyspaceRepairPlan("system_auth", new ArrayList<>(AuthKeyspace.TABLE_NAMES)); + final PrioritizedRepairPlan prioritizedRepairPlan = new PrioritizedRepairPlan(0, List.of(repairPlan)); + + Iterator keyspaceAssignments = repairRangeSplitter.getRepairAssignments(true, List.of(prioritizedRepairPlan)); + + // should be only 1 entry for the keyspace. + assertTrue(keyspaceAssignments.hasNext()); + KeyspaceRepairAssignments keyspace = keyspaceAssignments.next(); + assertFalse(keyspaceAssignments.hasNext()); + + List assignments = keyspace.getRepairAssignments(); + assertNotNull(assignments); + + // Should be two ranges * X system_auth table names assignments + assertEquals(2 * AuthKeyspace.TABLE_NAMES.size(), assignments.size()); + + // each assignment should only include one table. + for (RepairAssignment assignment : assignments) + { + assertEquals(1, assignment.getTableNames().size()); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testSetParameterShouldNotAllowUnknownParameter() + { + repairRangeSplitter.setParameter("unknown", "x"); + } + + @Test(expected = IllegalArgumentException.class) + public void testSetParameterShouldNotAllowSettingBytesPerAssignmentGreaterThanMaxBytesPerSchedule() + { + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "500GiB"); + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "600GiB"); + } + + @Test(expected = IllegalArgumentException.class) + public void testSetParameterShouldNotAllowSettingMaxBytesPerScheduleLessThanBytesPerAssignment() + { + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); + repairRangeSplitter.setParameter(MAX_BYTES_PER_SCHEDULE, "50MiB"); + } + + @Test + public void testGetParameters() + { + repairRangeSplitter.setParameter(BYTES_PER_ASSIGNMENT, "100MiB"); + repairRangeSplitter.setParameter(MAX_TABLES_PER_ASSIGNMENT, "5"); + + Map parameters = repairRangeSplitter.getParameters(); + // Each parameter should be present. + assertEquals(RepairTokenRangeSplitter.PARAMETERS.size(), parameters.size()); + // The parameters we explicitly set should be set exactly as we set them. + assertEquals("100MiB", parameters.get(BYTES_PER_ASSIGNMENT)); + assertEquals("5", parameters.get(MAX_TABLES_PER_ASSIGNMENT)); + } + + private SizeEstimate sizeEstimateByBytes(LongMebibytesBound totalSize) + { + return sizeEstimateByBytes(totalSize, totalSize); + } + + private SizeEstimate sizeEstimateByBytes(LongMebibytesBound sizeInRange, LongMebibytesBound totalSize) + { + return new SizeEstimate(RepairType.INCREMENTAL, KEYSPACE, "table1", FULL_RANGE, 1, sizeInRange.toBytes(), totalSize.toBytes()); + } + + private void insertAndFlushSingleTable() + { + execute("INSERT INTO %s (k, v) values (?, ?)", 1, 1); + flush(); + } + + private List createAndInsertTables(int count) + { + List tableNames = new ArrayList<>(); + for (int i = 0; i < count; i++) + { + String tableName = createTable("CREATE TABLE %s (k INT PRIMARY KEY, v INT)"); + tableNames.add(tableName); + insertAndFlushTable(tableName); + } + return tableNames; + } + + private void insertAndFlushTable(String tableName) + { + insertAndFlushTable(tableName, 1); + } + + private void insertAndFlushTable(String tableName, int... vals) + { + for (int i : vals) + { + executeFormattedQuery("INSERT INTO " + KEYSPACE + '.' + tableName + " (k, v) values (?, ?)", i, i); + } + ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(KEYSPACE, tableName); + cfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + } +} diff --git a/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java new file mode 100644 index 000000000000..14677490ca2d --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/SSTableRepairedAtTest.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import java.net.UnknownHostException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.service.StorageService; + +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + + +/** + * Unit tests to cover AutoRepair functionality inside {@link org.apache.cassandra.service.StorageService} + */ +public class SSTableRepairedAtTest extends CQLTester +{ + public static final String TEST_KEYSPACE = "test_keyspace"; + public static ColumnFamilyStore table1; + public static ColumnFamilyStore table2; + + @BeforeClass + public static void setUp() throws ConfigurationException, UnknownHostException + { + requireNetwork(); + AutoRepairUtils.setup(); + StorageService.instance.doAutoRepairSetup(); + DatabaseDescriptor.setCDCEnabled(false); + } + + @Before + public void clearData() + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + QueryProcessor.executeInternal(String.format("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", TEST_KEYSPACE)); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (key text, val text, primary key(key))", TEST_KEYSPACE, "table1")); + QueryProcessor.executeInternal(String.format("CREATE TABLE %s.%s (key text, val text, primary key(key))", TEST_KEYSPACE, "table2")); + + Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table1").truncateBlocking(); + Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2").truncateBlocking(); + + table1 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table1"); + assert table1 != null; + table2 = Keyspace.open(TEST_KEYSPACE).getColumnFamilyStore("table2"); + assert table2 != null; + } + + @Test + public void testGetTablesForKeyspace() + { + List result = StorageService.instance.getTablesForKeyspace(TEST_KEYSPACE); + + assertEquals(Arrays.asList(table1.name, table2.name), result.stream().sorted().collect(Collectors.toList())); + } + + @Test + public void testGetTablesForKeyspaceNotFound() + { + String missingKeyspace = "MISSING_KEYSPACE"; + try + { + StorageService.instance.getTablesForKeyspace(missingKeyspace); + fail("Expected an AssertionError to be thrown"); + } + catch (AssertionError e) + { + assertEquals("Unknown keyspace " + missingKeyspace, e.getMessage()); + } + } + + @Test + public void testMutateSSTableRepairedStateTableNotFound() + { + try + { + StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, List.of("MISSING_TABLE")); + fail("Expected an InvalidRequestException to be thrown"); + } + catch (RuntimeException e) + { + assertEquals("Table MISSING_TABLE does not exist in keyspace " + TEST_KEYSPACE, e.getMessage()); + // Test passed + } + } + + @Test + public void testMutateSSTableRepairedStateTablePreview() + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(1, table1.getLiveSSTables().size()); + + List result = StorageService.instance.mutateSSTableRepairedState(true, true, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(1, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableRepaired() + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + assertEquals(2, table1.getLiveSSTables().size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + }); + + List result = StorageService.instance.mutateSSTableRepairedState(true, false, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertTrue(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } + + @Test + public void testMutateSSTableRepairedStateTableUnrepaired() throws Exception + { + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + SchemaLoader.insertData(TEST_KEYSPACE, table1.name, 0, 1); + table1.forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); + table1.getCompactionStrategyManager().mutateRepaired(table1.getLiveSSTables(), 1, null, false); + assertEquals(2, table1.getLiveSSTables().stream().filter(SSTableReader::isRepaired).count()); + + List result = StorageService.instance.mutateSSTableRepairedState(false, false, TEST_KEYSPACE, Arrays.asList(table1.name)); + + assertEquals(2, result.size()); + table1.getLiveSSTables().forEach(sstable -> { + assertFalse(sstable.isRepaired()); + assertTrue(result.contains(sstable.descriptor.baseFile().name())); + }); + } +} diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index c59163ae0bc3..5293149b1b59 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -1,21 +1,21 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.cassandra.service; import java.net.UnknownHostException; @@ -36,6 +36,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -61,6 +62,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.schema.KeyspaceParams; +import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; import org.apache.cassandra.service.snapshot.TableSnapshot; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tcm.ClusterMetadata; @@ -72,6 +74,7 @@ import org.apache.cassandra.utils.TimeUUID; import org.apache.cassandra.utils.concurrent.Condition; import org.apache.cassandra.utils.concurrent.Refs; +import org.mockito.Mock; import static org.apache.cassandra.ServerTestUtils.*; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; @@ -81,12 +84,15 @@ import static org.apache.cassandra.repair.messages.RepairOption.INCREMENTAL_KEY; import static org.apache.cassandra.repair.messages.RepairOption.RANGES_KEY; import static org.apache.cassandra.service.ActiveRepairService.UNREPAIRED_SSTABLE; +import static org.apache.cassandra.service.ActiveRepairService.instance; import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.initMocks; public class ActiveRepairServiceTest { @@ -98,6 +104,8 @@ public class ActiveRepairServiceTest public String cfname; public ColumnFamilyStore store; public static InetAddressAndPort LOCAL, REMOTE; + @Mock + public DiskUsageMonitor diskUsageMonitor; @BeforeClass public static void defineSchema() throws ConfigurationException, UnknownHostException @@ -122,6 +130,7 @@ public void prepare() throws Exception NodeId remote = Register.register(new NodeAddresses(REMOTE)); UnsafeJoin.unsafeJoin(local, Collections.singleton(DatabaseDescriptor.getPartitioner().getRandomToken())); UnsafeJoin.unsafeJoin(remote, Collections.singleton(DatabaseDescriptor.getPartitioner().getMinimumToken())); + initMocks(this); } @Test @@ -220,12 +229,12 @@ public void testGetNeighborsTimesTwoInSpecifiedHosts() throws Throwable } expected.remove(FBUtilities.getBroadcastAddressAndPort()); - Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(),expected.get(0).getHostAddressAndPort()); + Collection hosts = Arrays.asList(FBUtilities.getBroadcastAddressAndPort().getHostAddressAndPort(), expected.get(0).getHostAddressAndPort()); Iterable> ranges = StorageService.instance.getLocalReplicas(KEYSPACE5).ranges(); assertEquals(expected.get(0), ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, - ranges.iterator().next(), - null, hosts).endpoints().iterator().next()); + ranges.iterator().next(), + null, hosts).endpoints().iterator().next()); } @Test(expected = IllegalArgumentException.class) @@ -238,7 +247,6 @@ public void testGetNeighborsSpecifiedHostsWithNoLocalHost() throws Throwable ActiveRepairService.instance().getNeighbors(KEYSPACE5, ranges, ranges.iterator().next(), null, hosts); } - @Test public void testParentRepairStatus() throws Throwable { @@ -256,7 +264,6 @@ public void testParentRepairStatus() throws Throwable List failed = StorageService.instance.getParentRepairStatus(3); assertNotNull(failed); assertEquals(ActiveRepairService.ParentRepairStatus.FAILED, ActiveRepairService.ParentRepairStatus.valueOf(failed.get(0))); - } Set addTokens(int max) throws Throwable @@ -331,10 +338,10 @@ private static RepairOption opts(String... params) { assert params.length % 2 == 0 : "unbalanced key value pairs"; Map opt = new HashMap<>(); - for (int i=0; i<(params.length >> 1); i++) + for (int i = 0; i < (params.length >> 1); i++) { int idx = i << 1; - opt.put(params[idx], params[idx+1]); + opt.put(params[idx], params[idx + 1]); } return RepairOption.parse(opt, DatabaseDescriptor.getPartitioner()); } @@ -354,19 +361,19 @@ public void repairedAt() throws Exception Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true)), false)); // subrange incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - RANGES_KEY, "1:2"), false)); + RANGES_KEY, "1:2"), false)); // hosts incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - HOSTS_KEY, "127.0.0.1"), false)); + HOSTS_KEY, "127.0.0.1"), false)); // dc incremental repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - DATACENTERS_KEY, "DC2"), false)); + DATACENTERS_KEY, "DC2"), false)); // forced incremental repair Assert.assertNotEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), false)); + FORCE_REPAIR_KEY, b2s(true)), false)); Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(true), - FORCE_REPAIR_KEY, b2s(true)), true)); + FORCE_REPAIR_KEY, b2s(true)), true)); // full repair Assert.assertEquals(UNREPAIRED_SSTABLE, ActiveRepairService.instance().getRepairedAt(opts(INCREMENTAL_KEY, b2s(false)), false)); @@ -412,7 +419,8 @@ public void testRejectWhenPoolFullStrategy() throws InterruptedException // Submission is unblocked Thread.sleep(250); - validationExecutor.submit(() -> {}); + validationExecutor.submit(() -> { + }); } finally { @@ -449,8 +457,8 @@ public void testQueueWhenPoolFullStrategy() throws InterruptedException allSubmitted.await(TASK_SECONDS + 1, TimeUnit.SECONDS); // Give the tasks we expect to execute immediately chance to be scheduled - Util.spinAssertEquals(2 , ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); - Util.spinAssertEquals(3 , ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); + Util.spinAssertEquals(2, ((ExecutorPlus) validationExecutor)::getActiveTaskCount, 1); + Util.spinAssertEquals(3, ((ExecutorPlus) validationExecutor)::getPendingTaskCount, 1); // verify that we've reached a steady state with 2 threads actively processing and 3 queued tasks Assert.assertEquals(2, ((ExecutorPlus) validationExecutor).getActiveTaskCount()); @@ -489,7 +497,9 @@ public void testRepairSessionSpaceInMiB() activeRepairService.setRepairSessionSpaceInMiB(0); fail("Should have received an IllegalArgumentException for depth of 0"); } - catch (IllegalArgumentException ignored) { } + catch (IllegalArgumentException ignored) + { + } Assert.assertEquals(10, activeRepairService.getRepairSessionSpaceInMiB()); } @@ -499,6 +509,40 @@ public void testRepairSessionSpaceInMiB() } } + public void testVerifyDiskHeadroomThresholdFullRepair() + { + Assert.assertTrue(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, false)); + } + + @Test + public void testVerifyDiskHeadroomThresholdDiskFull() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(1.0); + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(1.0); + + Assert.assertFalse(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, true)); + } + + @Test + public void testVerifyDiskHeadroomThresholdSufficientDisk() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(0.0); + DatabaseDescriptor.setIncrementalRepairDiskHeadroomRejectRatio(0.0); + + Assert.assertTrue(ActiveRepairService.verifyDiskHeadroomThreshold(TimeUUID.maxAtUnixMillis(0), PreviewKind.NONE, true)); + } + + @Test(expected = RuntimeException.class) + public void testPrepareForRepairThrowsExceptionForInsufficientDisk() + { + DiskUsageMonitor.instance = diskUsageMonitor; + when(diskUsageMonitor.getDiskUsage()).thenReturn(1.5); + + instance().prepareForRepair(TimeUUID.maxAtUnixMillis(0), null, null, opts(INCREMENTAL_KEY, b2s(true)), false, null); + } + private static class Task implements Runnable { private final Condition blocked; diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java new file mode 100644 index 000000000000..07b8bcc69ec3 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceBasicTest.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; + +import static org.junit.Assert.assertEquals; + +/** + * Unit tests for {@link org.apache.cassandra.service.AutoRepairService} + */ +public class AutoRepairServiceBasicTest extends CQLTester +{ + private static AutoRepairService autoRepairService; + private static AutoRepairConfig config; + + @Before + public void setUp() + { + DatabaseDescriptor.setCDCOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + config = new AutoRepairConfig(); + autoRepairService = new AutoRepairService(); + autoRepairService.config = config; + } + + @Test + public void testSetup() + { + AutoRepairService.instance.config = null; + + AutoRepairService.setup(); + + assertEquals(DatabaseDescriptor.getAutoRepairConfig(), AutoRepairService.instance.config); + } + + @Test + public void testGetAutoRepairConfigReturnsConfig() + { + assertEquals(config, autoRepairService.getAutoRepairConfig()); + } + + @Test + public void testsetAutoRepairHistoryClearDeleteHostsBufferInSecV2() + { + autoRepairService.setAutoRepairHistoryClearDeleteHostsBufferDuration("100s"); + + assertEquals(100, config.getAutoRepairHistoryClearDeleteHostsBufferInterval().toSeconds()); + } + + @Test + public void testsetAutoRepairMaxRetriesCount() + { + autoRepairService.setAutoRepairMaxRetriesCount(AutoRepairConfig.RepairType.INCREMENTAL.name(), 101); + + assertEquals(101, config.getRepairMaxRetries(AutoRepairConfig.RepairType.INCREMENTAL)); + } + + @Test + public void testsetAutoRepairRetryBackoffInSec() + { + autoRepairService.setAutoRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL.name(), "102s"); + + assertEquals(102, config.getRepairRetryBackoff(AutoRepairConfig.RepairType.INCREMENTAL).toSeconds()); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsWithSchedulerDisabled() + { + autoRepairService.config = new AutoRepairConfig(false); + + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayButMVRepairDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, false); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithMVReplay() + { + autoRepairService.config = new AutoRepairConfig(true); + autoRepairService.config.setMaterializedViewRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithMVReplayDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setMaterializedViewsEnabled(true); + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayButCDCDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setCDCEnabled(false); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test(expected = ConfigurationException.class) + public void testSetAutoRepairEnabledThrowsForIRWithCDCReplay() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCOnRepairEnabled(true); + DatabaseDescriptor.setCDCEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } + + @Test + public void testSetAutoRepairEnabledDoesNotThrowForIRWithCDCReplayDisabled() + { + autoRepairService.config = new AutoRepairConfig(true); + DatabaseDescriptor.setCDCEnabled(true); + autoRepairService.setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL.name(), true); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java new file mode 100644 index 000000000000..9c2af3e1c793 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceRepairTypeTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.UUID; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.apache.cassandra.config.CassandraRelevantProperties.SYSTEM_DISTRIBUTED_DEFAULT_RF; +import static org.junit.Assert.assertEquals; + +/** + * Unit tests covering different repair types for {@link org.apache.cassandra.service.AutoRepairService} + */ +@RunWith(Parameterized.class) +public class AutoRepairServiceRepairTypeTest extends CQLTester +{ + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + private final UUID host1 = UUID.fromString("00000000-0000-0000-0000-000000000001"); + private final UUID host2 = UUID.fromString("00000000-0000-0000-0000-000000000002"); + + private AutoRepairService instance; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @BeforeClass + public static void setupClass() throws Exception + { + SYSTEM_DISTRIBUTED_DEFAULT_RF.setInt(1); + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Before + public void setUpTest() + { + AutoRepairUtils.setup(); + instance = new AutoRepairService(); + } + + @Test + public void testGetOnGoingRepairHostIdsTest() + { + long now = System.currentTimeMillis(); + AutoRepairUtils.insertNewRepairHistory(repairType, host1, now, now - 1000000); + AutoRepairUtils.insertNewRepairHistory(repairType, host2, now, now - 1000000); + + Set hosts = instance.getOnGoingRepairHostIds(repairType.name()); + + assertEquals(ImmutableSet.of(host1.toString(), host2.toString()), hosts); + } +} diff --git a/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java new file mode 100644 index 000000000000..db87e995f558 --- /dev/null +++ b/test/unit/org/apache/cassandra/service/AutoRepairServiceSetterTest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import com.google.common.collect.ImmutableSet; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.cql3.QueryProcessor; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.repair.autorepair.AutoRepairUtils; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SystemDistributedKeyspace; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.UUID; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for (updating parameters through JMX) {@link org.apache.cassandra.service.AutoRepairService} + */ +@RunWith(Parameterized.class) +public class AutoRepairServiceSetterTest extends CQLTester +{ + private static final AutoRepairConfig config = new AutoRepairConfig(true); + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairTypeStr; + + @Parameterized.Parameter(1) + public T arg; + + @Parameterized.Parameter(2) + public BiConsumer setter; + + @Parameterized.Parameter(3) + public Function getter; + + @Parameterized.Parameters(name = "{index}: repairType={0}, arg={1}") + public static Collection testCases() + { + DatabaseDescriptor.setConfig(DatabaseDescriptor.loadConfig()); + return Stream.of( + forEachRepairType(true, AutoRepairService.instance::setAutoRepairEnabled, config::isAutoRepairEnabled), + forEachRepairType(100, AutoRepairService.instance::setRepairThreads, config::getRepairThreads), + forEachRepairType(400, AutoRepairService.instance::setRepairSSTableCountHigherThreshold, config::getRepairSSTableCountHigherThreshold), + forEachRepairType(ImmutableSet.of("dc1", "dc2"), AutoRepairService.instance::setIgnoreDCs, config::getIgnoreDCs), + forEachRepairType(true, AutoRepairService.instance::setPrimaryTokenRangeOnly, config::getRepairPrimaryTokenRangeOnly), + forEachRepairType(600, AutoRepairService.instance::setParallelRepairPercentage, config::getParallelRepairPercentage), + forEachRepairType(700, AutoRepairService.instance::setParallelRepairCount, config::getParallelRepairCount), + forEachRepairType(true, AutoRepairService.instance::setMVRepairEnabled, config::getMaterializedViewRepairEnabled), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setRepairPriorityForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairUtils::getPriorityHosts), + forEachRepairType(InetAddressAndPort.getLocalHost().getHostAddressAndPort(), (repairType, commaSeparatedHostSet) -> AutoRepairService.instance.setForceRepairForHosts(repairType, (String) commaSeparatedHostSet), AutoRepairServiceSetterTest::isLocalHostForceRepair) + ).flatMap(Function.identity()).collect(Collectors.toList()); + } + + private static Set isLocalHostForceRepair(AutoRepairConfig.RepairType type) + { + UUID hostId = StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()); + UntypedResultSet resultSet = QueryProcessor.executeInternal(String.format( + "SELECT force_repair FROM %s.%s WHERE host_id = %s and repair_type = '%s'", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, hostId, type)); + + if (!resultSet.isEmpty() && resultSet.one().getBoolean("force_repair")) + { + return ImmutableSet.of(InetAddressAndPort.getLocalHost()); + } + return ImmutableSet.of(); + } + + private static Stream forEachRepairType(T arg, BiConsumer setter, Function getter) + { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, arg, setter, getter }; + } + + return Arrays.stream(testCases); + } + + @BeforeClass + public static void setup() throws Exception + { + DatabaseDescriptor.daemonInitialization(); + setAutoRepairEnabled(true); + requireNetwork(); + DatabaseDescriptor.setMaterializedViewsEnabled(false); + DatabaseDescriptor.setCDCEnabled(false); + AutoRepairUtils.setup(); + AutoRepairService.instance.config = config; + } + + @Before + public void prepare() + { + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY)); + QueryProcessor.executeInternal(String.format( + "TRUNCATE %s.%s", + SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY)); + } + + @Test + public void testSettersTest() + { + DatabaseDescriptor.setMaterializedViewsOnRepairEnabled(false); + DatabaseDescriptor.setCDCOnRepairEnabled(false); + setter.accept(repairTypeStr.name(), arg); + T actualConfig = getter.apply(repairTypeStr); + if (actualConfig instanceof Set) + // When performing a setRepairPriorityForHosts or setForceRepairForHosts, a comma-separated list of + // ip addresses is provided as input. The configuration is expected to return a Set of Strings that + // represent the configured IP addresses. This especial handling allows verification of this special + // case where one of the entries in the Set must match the configured input. + assertThat(actualConfig).satisfiesAnyOf(entry -> assertThat(entry.toString()).contains(arg.toString())); + else + assertThat(actualConfig).isEqualTo(arg); + } +} diff --git a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java index 87b9ff93fded..df0a2c1be2ea 100644 --- a/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java +++ b/test/unit/org/apache/cassandra/tools/JMXStandardsTest.java @@ -50,6 +50,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.utils.BreaksJMX; import org.assertj.core.api.Assertions; import org.reflections.Reflections; @@ -98,6 +101,10 @@ public class JMXStandardsTest .add(IllegalStateException.class) .add(ClassNotFoundException.class) .add(OpenDataException.class) + .add(InvalidRequestException.class) + .add(AutoRepairConfig.RepairType.class) + .add(InetAddressAndPort.class) + .add(AutoRepairConfig.class) .build(); /** * This list is a set of types under java.* and javax.*, but are too vague that could cause issues; this does not diff --git a/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java new file mode 100644 index 000000000000..82293581d807 --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/AutoRepairStatusTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Collection; + +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.Output; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.AutoRepairStatus} + */ +@RunWith(Parameterized.class) +public class AutoRepairStatusTest +{ + @Mock + private static NodeProbe probe; + + private ByteArrayOutputStream cmdOutput; + + private static AutoRepairStatus cmd; + + @Parameterized.Parameter() + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Collection repairTypes() + { + return Arrays.asList(AutoRepairConfig.RepairType.values()); + } + + @Before + public void setUp() throws Exception + { + MockitoAnnotations.initMocks(this); + cmdOutput = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(cmdOutput); + when(probe.output()).thenReturn(new Output(out, out)); + cmd = new AutoRepairStatus(); + DatabaseDescriptor.daemonInitialization(); + DatabaseDescriptor.loadConfig(); + setAutoRepairEnabled(true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.FULL, true); + DatabaseDescriptor.getAutoRepairConfig().setAutoRepairEnabled(AutoRepairConfig.RepairType.INCREMENTAL, true); + } + + @Test(expected = IllegalArgumentException.class) + public void testExecuteWithoutRepairType() + { + cmd.repairType = null; + cmd.execute(probe); + } + + @Test + public void testExecuteWithNoNodes() + { + cmd.repairType = repairType.name(); + + cmd.execute(probe); + assertEquals("Active Repairs\n" + + "NONE \n", cmdOutput.toString()); + } + + @Test + public void testExecute() + { + when(probe.getAutoRepairOnGoingRepairHostIds(repairType.name())).thenReturn(ImmutableSet.of("host1", "host2", "host3", "host4")); + cmd.repairType = repairType.name(); + + cmd.execute(probe); + + assertEquals("Active Repairs \n" + + "host1,host2,host3,host4\n", cmdOutput.toString()); + } +} diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java new file mode 100644 index 000000000000..5d23d22253ad --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/SSTableRepairedSetTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.cassandra.tools.NodeProbe; +import org.apache.cassandra.tools.Output; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SSTableRepairedSetTest} + */ +public class SSTableRepairedSetTest +{ + @Mock + private NodeProbe probe; + + private SSTableRepairedSet cmd; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + PrintStream noopStream = new PrintStream(new OutputStream() + { + @Override + public void write(int b) + { + } + }); + when(probe.output()).thenReturn(new Output(noopStream, noopStream)); + cmd = new SSTableRepairedSet(); + } + + @Test + public void testNoKeyspace() + { + when(probe.getNonLocalStrategyKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("ks1", "ks2"))); + when(probe.getAutoRepairTablesForKeyspace("ks1")).thenReturn(new ArrayList<>(Arrays.asList("table1", "table2"))); + when(probe.getAutoRepairTablesForKeyspace("ks2")).thenReturn(new ArrayList<>(Arrays.asList("table3", "table4"))); + cmd.isRepaired = true; + cmd.reallySet = true; + + cmd.execute(probe); + + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks1", Arrays.asList("table1", "table2")); + verify(probe, times(1)).mutateSSTableRepairedState(true, false, "ks2", Arrays.asList("table3", "table4")); + } + + @Test + public void testBothRepairedAndUnrepaired() + { + cmd.args = Arrays.asList("keyspace"); + cmd.isRepaired = true; + cmd.isUnrepaired = true; + cmd.execute(probe); + verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + } + + @Test + public void testNeitherRepairedNorUnrepaired() + { + cmd.args = Arrays.asList("keyspace"); + cmd.execute(probe); + verify(probe, never()).mutateSSTableRepairedState(anyBoolean(), anyBoolean(), anyString(), anyList()); + } + + @Test + public void testRepairedPreview() + { + cmd.args = Arrays.asList("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); + cmd.isRepaired = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(true, true, "keyspace", new ArrayList<>()); + } + + @Test + public void testUnrepairedReallySet() + { + cmd.args = Arrays.asList("keyspace"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); + cmd.isUnrepaired = true; + cmd.reallySet = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(false, false, "keyspace", new ArrayList<>()); + } + + @Test + public void testExecuteWithTableNames() + { + cmd.args = Arrays.asList("keyspace", "table1", "table2"); + when(probe.getKeyspaces()).thenReturn(new ArrayList<>(Arrays.asList("keyspace"))); + cmd.isRepaired = true; + cmd.reallySet = true; + cmd.execute(probe); + verify(probe).mutateSSTableRepairedState(true, false, "keyspace", Arrays.asList("table1", "table2")); + } +} diff --git a/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java new file mode 100644 index 000000000000..4ea9516e8ef8 --- /dev/null +++ b/test/unit/org/apache/cassandra/tools/nodetool/SetAutoRepairConfigTest.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools.nodetool; + +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Suite; + +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.tools.NodeProbe; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.junit.Assert.fail; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * Unit tests for {@link org.apache.cassandra.tools.nodetool.SetAutoRepairConfig} + */ +@RunWith(Suite.class) +@Suite.SuiteClasses({ SetAutoRepairConfigTest.NoParamTests.class, SetAutoRepairConfigTest.RepairTypeParamTests.class, + SetAutoRepairConfigTest.RepairTypeAndArgsParamsTests.class }) +public class SetAutoRepairConfigTest +{ + protected static SetAutoRepairConfig cmd; + + public static void before(NodeProbe probeMock, PrintStream outMock) + { + when(probeMock.isAutoRepairDisabled()).thenReturn(false); + cmd = new SetAutoRepairConfig(); + cmd.out = outMock; + } + + public static class NoParamTests + { + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + before(probe, out); + } + + @Test + public void testHistoryDeleteHostsClearBufferInSec() + { + cmd.args = ImmutableList.of("history_clear_delete_hosts_buffer_interval", "1s"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); + + // test scenario when auto repair is disabled + when(probe.isAutoRepairDisabled()).thenReturn(true); + + cmd.execute(probe); + + // test new calls are not made when auto repair is disabled + verify(probe, times(1)).setAutoRepairHistoryClearDeleteHostsBufferDuration("1s"); + } + + @Test + public void testStartScheduler() + { + cmd.args = ImmutableList.of("start_scheduler", "false"); + + cmd.execute(probe); + + verify(probe, times(0)).startAutoRepairScheduler(); + + cmd.args = ImmutableList.of("start_scheduler", "true"); + + cmd.execute(probe); + + verify(probe, times(1)).startAutoRepairScheduler(); + } + + @Test + public void testMinRepairDuration() + { + cmd.args = ImmutableList.of("min_repair_task_duration", "4s"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairMinRepairTaskDuration("4s"); + } + } + + @RunWith(Parameterized.class) + public static class RepairTypeParamTests + { + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameters(name = "repairType={0}") + public static Object[] data() + { + return AutoRepairConfig.RepairType.values(); + } + + private static InetAddressAndPort localEndpoint; + private static InetAddressAndPort otherEndpoint; + + @Before + public void setUp() throws Exception + { + MockitoAnnotations.initMocks(this); + before(probe, out); + localEndpoint = InetAddressAndPort.getByName("127.0.0.1:7000"); + otherEndpoint = localEndpoint.withPort(localEndpoint.getPort() + 1); + } + + @Test(expected = IllegalArgumentException.class) + public void testNoArgs() + { + cmd.repairTypeStr = repairType.name(); + cmd.execute(probe); + } + + @Test + public void testRepairSchedulingDisabled() + { + when(probe.isAutoRepairDisabled()).thenReturn(true); + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("threads", "1"); + + cmd.execute(probe); + + verify(out, times(1)).println("Auto-repair is not enabled"); + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 1); + } + + @Test + public void testRepairTypeDisabled() + { + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("number_of_repair_threads", "1"); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairThreads(repairType.name(), 1); + } + + @Test + public void testV2FlagMissing() + { + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("threads", "1"); + + try + { + cmd.execute(probe); + + fail("expected IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + + verify(probe, times(0)).setAutoRepairThreads(repairType.name(), 0); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidParamType() + { + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("unknown_type", "1"); + + cmd.execute(probe); + } + + @Test + public void testPriorityHosts() + { + String commaSeparatedHostSet = String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1)); + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("priority_hosts", commaSeparatedHostSet); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairPriorityForHosts(repairType.name(), commaSeparatedHostSet); + } + + @Test + public void testForceRepairHosts() + { + String commaSeparatedHostSet = String.join(",", localEndpoint.toString().substring(1), otherEndpoint.toString().substring(1)); + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of("forcerepair_hosts", commaSeparatedHostSet); + + cmd.execute(probe); + + verify(probe, times(1)).setAutoRepairForceRepairForHosts(repairType.name(), commaSeparatedHostSet); + } + } + + @RunWith(Parameterized.class) + public static class RepairTypeAndArgsParamsTests + { + @Parameterized.Parameter + public AutoRepairConfig.RepairType repairType; + + @Parameterized.Parameter(1) + public String paramType; + + @Parameterized.Parameter(2) + public String paramVal; + + @Parameterized.Parameter(3) + public Consumer verifyFunc; + + @Parameterized.Parameters(name = "repairType={0},paramType={1}") + public static Collection testCases() + { + return Stream.of( + forEachRepairType("enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairEnabled(type.name(), true)), + forEachRepairType("number_of_repair_threads", "1", (type) -> verify(probe, times(1)).setAutoRepairThreads(type.name(), 1)), + forEachRepairType("min_repair_interval", "3h", (type) -> verify(probe, times(1)).setAutoRepairMinInterval(type.name(), "3h")), + forEachRepairType("sstable_upper_threshold", "4", (type) -> verify(probe, times(1)).setAutoRepairSSTableCountHigherThreshold(type.name(), 4)), + forEachRepairType("table_max_repair_time", "5s", (type) -> verify(probe, times(1)).setAutoRepairTableMaxRepairTime(type.name(), "5s")), + forEachRepairType("repair_primary_token_range_only", "true", (type) -> verify(probe, times(1)).setAutoRepairPrimaryTokenRangeOnly(type.name(), true)), + forEachRepairType("parallel_repair_count", "6", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairCount(type.name(), 6)), + forEachRepairType("parallel_repair_percentage", "7", (type) -> verify(probe, times(1)).setAutoRepairParallelRepairPercentage(type.name(), 7)), + forEachRepairType("materialized_view_repair_enabled", "true", (type) -> verify(probe, times(1)).setAutoRepairMaterializedViewRepairEnabled(type.name(), true)), + forEachRepairType("ignore_dcs", "dc1,dc2", (type) -> verify(probe, times(1)).setAutoRepairIgnoreDCs(type.name(), ImmutableSet.of("dc1", "dc2"))), + forEachRepairType("token_range_splitter.max_bytes_per_schedule", "500GiB", (type) -> verify(probe, times(1)).setAutoRepairTokenRangeSplitterParameter(type.name(), "max_bytes_per_schedule", "500GiB")), + forEachRepairType("repair_max_retries", "3", (type) -> verify(probe, times(1)).setAutoRepairMaxRetriesCount(type.name(), 3)), + forEachRepairType("repair_retry_backoff", "60s", (type) -> verify(probe, times(1)).setAutoRepairRetryBackoff(type.name(), "60s")) + ).flatMap(Function.identity()).collect(Collectors.toList()); + } + + private static Stream forEachRepairType(String paramType, String paramVal, Consumer verifyFunc) + { + Object[][] testCases = new Object[AutoRepairConfig.RepairType.values().length][4]; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + testCases[repairType.ordinal()] = new Object[]{ repairType, paramType, paramVal, verifyFunc }; + } + + return Arrays.stream(testCases); + } + + @Mock + private static NodeProbe probe; + + @Mock + private static PrintStream out; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + before(probe, out); + } + + @Test + public void test() + { + cmd.repairTypeStr = repairType.name(); + cmd.args = ImmutableList.of(paramType, paramVal); + + cmd.execute(probe); + + verifyFunc.accept(repairType); + + // test scenario when auto repair is disabled + when(probe.isAutoRepairDisabled()).thenReturn(true); + + cmd.execute(probe); + + // test new calls are not made when auto repair is disabled + verifyFunc.accept(repairType); + } + } +} From c3bae31a528a1c751fb312ff9cc1343a9f12c500 Mon Sep 17 00:00:00 2001 From: Jordan West Date: Wed, 9 Apr 2025 12:12:34 -0700 Subject: [PATCH 280/340] Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan patch by Jordan West; Reviewed by Jon Haddad for CASSANDRA-20538 --- CHANGES.txt | 1 + .../cassandra/io/sstable/format/SSTableSimpleScanner.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index a0b5da22c5d4..d37705c1e874 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan (CASSANDRA-20538) * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092) diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java index 190ec42fa939..a649fbea4c33 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableSimpleScanner.java @@ -73,7 +73,7 @@ public SSTableSimpleScanner(SSTableReader sstable, { assert sstable != null; - this.dfile = sstable.openDataReader(); + this.dfile = sstable.openDataReaderForScan(); this.sstable = sstable; this.sizeInBytes = boundsList.stream().mapToLong(ppb -> ppb.upperPosition - ppb.lowerPosition).sum(); this.compressedSizeInBytes = sstable.compression ? sstable.onDiskSizeForPartitionPositions(boundsList) : sizeInBytes; From 97037496f2975ac9216071283584bf514b944996 Mon Sep 17 00:00:00 2001 From: Sunil Ramchandra Pawar Date: Wed, 16 Apr 2025 13:35:41 -0700 Subject: [PATCH 281/340] SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map patch by Sunil Ramchandra Pawar; reviewed by Caleb Rackliffe, David Capwell for CASSANDRA-19891 --- CHANGES.txt | 1 + .../index/sai/utils/IndexTermType.java | 3 +- .../index/sai/cql/ComplexQueryTest.java | 44 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 462d3ed74ff8..2682765f1c9f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map (CASSANDRA-19891) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * Fix marking an SSTable as suspected and BufferPool leakage in case of a corrupted SSTable read during a compaction (CASSANDRA-20396) * Introduce SSTableSimpleScanner for compaction (CASSANDRA-20092) diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java index 7fa226e9582d..f3c7e2c05f96 100644 --- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java +++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java @@ -144,7 +144,8 @@ private IndexTermType(ColumnMetadata columnMetadata, List partit AbstractType baseType = indexType.unwrap(); - if (baseType.subTypes().isEmpty()) + // We only need to inspect subtypes when it is possible for them to be queried individually. + if (baseType.subTypes().isEmpty() || indexTargetType == IndexTarget.Type.SIMPLE || indexTargetType == IndexTarget.Type.FULL) { this.subTypes = Collections.emptyList(); } diff --git a/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java b/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java index 0b4a053d1232..2aa334c29d6f 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/ComplexQueryTest.java @@ -65,4 +65,48 @@ public void splitRowsWithBooleanLogic() var result = execute("SELECT pk FROM %s WHERE str_val = 'A' AND val = 'A'"); assertRows(result, row(3)); } + + @Test + public void compositeTypeWithMapInsideQuery() + { + createTable(KEYSPACE, "CREATE TABLE %s (" + + "pk1 frozenLongType,I=>ByteType,6=>LexicalUUIDType)'>>," + + "pk2 frozen>>>," + + "ck1 frozen>>>," + + "ck2 tinyint," + + "r1 frozenDecimalType,y=>TimestampType,f=>BooleanType)'>> static," + + "r2 'DynamicCompositeType(P=>ShortType)'," + + "r3 'CompositeType(FrozenType(ListType(DoubleType)),FrozenType(MapType(LongType,DoubleType)),DoubleType)'," + + "r4 frozen>>>," + + "r5 'CompositeType(CompositeType(ShortType,SimpleDateType,BooleanType),CompositeType(FloatType),MapType(ByteType,TimeType))'," + + "r6 set," + + "PRIMARY KEY ((pk1, pk2), ck1, ck2))"); + + + + createIndex("CREATE INDEX ON %s (FULL(ck1)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (FULL(pk1)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (FULL(r4)) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (r2) USING 'SAI'"); + createIndex("CREATE INDEX ON %s (r3) USING 'SAI'"); + + + UntypedResultSet withMultipleColumns = execute("SELECT pk1 FROM " + + "%s " + + "WHERE r5 = 0x0010000230bd00000457f0bd31000001000000000700049f647252000000260000000200000001f300000008000001c4e14bba4b00000001260000000800003f2b300d385d00" + + " AND r3 = 0x001c00000002000000083380d171eace676900000008e153bb97fdd5c22e00006d000000030000000897c5493857999fc000000013f08cc4fad0f04d0de51cff28d4ae743d2da1c40000000857108e8c372c868400000013f0cc6bca55f0ee240b27ff12c77a7b7dc3c665000000086c07d25fcdd3403500000013f0745922bdf0ac44c9b5ffd80f025ded9a211d000008200547f5da7a43aa00" + + " AND r2 = 0x8050000255e200 " + + " AND pk2 = ((-1.2651989E-23))" + + " ALLOW FILTERING;"); + + assertRowCount(withMultipleColumns, 0); + + UntypedResultSet withoutSAI = execute("SELECT pk1 FROM " + + "%s " + + " WHERE r5 = 0x001c00000002000000083380d171eace676900000008e153bb97fdd5c22e00006d000000030000000897c5493857999fc000000013f08cc4fad0f04d0de51cff28d4ae743d2da1c40000000857108e8c372c868400000013f0cc6bca55f0ee240b27ff12c77a7b7dc3c665000000086c07d25fcdd3403500000013f0745922bdf0ac44c9b5ffd80f025ded9a211d000008200547f5da7a43aa00" + + " ALLOW FILTERING;"); + + + assertRowCount(withoutSAI, 0); + } } From 07831c9cc7efcb9be7b227260467cf10a7be7724 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 23 Apr 2025 15:18:17 -0700 Subject: [PATCH 282/340] SAI marks an index as non-empty when a partial partition/row modifications is flushed due to repair patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-20567 --- CHANGES.txt | 1 + .../sai/disk/v1/MemtableIndexWriter.java | 15 +-- .../distributed/shared/ClusterUtils.java | 39 ++++++++ .../test/sai/PartialWritesWithRepairTest.java | 96 +++++++++++++++++++ 4 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 test/distributed/org/apache/cassandra/distributed/test/sai/PartialWritesWithRepairTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 2682765f1c9f..21b9f481b211 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * SAI marks an index as non-empty when a partial partition/row modifications is flushed due to repair (CASSANDRA-20567) * SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map (CASSANDRA-19891) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * Fix marking an SSTable as suspected and BufferPool leakage in case of a corrupted SSTable read during a compaction (CASSANDRA-20396) diff --git a/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java b/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java index 04d3185bfc5b..c5f833f4ac16 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/MemtableIndexWriter.java @@ -146,12 +146,15 @@ public void complete(Stopwatch stopwatch) throws IOException { final Iterator> iterator = rowMapping.merge(memtable); - try (MemtableTermsIterator terms = new MemtableTermsIterator(memtable.getMinTerm(), memtable.getMaxTerm(), iterator)) + long cellCount = 0; + if (iterator.hasNext()) { - long cellCount = flush(terms); - - completeIndexFlush(cellCount, start, stopwatch); + try (MemtableTermsIterator terms = new MemtableTermsIterator(memtable.getMinTerm(), memtable.getMaxTerm(), iterator)) + { + cellCount = flush(terms); + } } + completeIndexFlush(cellCount, start, stopwatch); } } catch (Throwable t) @@ -217,8 +220,8 @@ private void flushVectorIndex(long startTime, Stopwatch stopwatch) throws IOExce private void completeIndexFlush(long cellCount, long startTime, Stopwatch stopwatch) throws IOException { - // create a completion marker indicating that the index is complete and not-empty - ColumnCompletionMarkerUtil.create(indexDescriptor, indexIdentifier, false); + // create a completion marker indicating that the index is complete + ColumnCompletionMarkerUtil.create(indexDescriptor, indexIdentifier, cellCount == 0); indexMetrics.memtableIndexFlushCount.inc(); diff --git a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java index 5a8da8c7cf52..3bded5cd1605 100644 --- a/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java +++ b/test/distributed/org/apache/cassandra/distributed/shared/ClusterUtils.java @@ -18,6 +18,7 @@ package org.apache.cassandra.distributed.shared; +import java.io.Serializable; import java.lang.reflect.Field; import java.net.InetSocketAddress; import java.security.Permission; @@ -60,6 +61,7 @@ import org.apache.cassandra.tools.SystemExitException; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Isolated; +import org.apache.cassandra.utils.Shared; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; import static org.apache.cassandra.config.CassandraRelevantProperties.BOOTSTRAP_SCHEMA_DELAY_MS; @@ -977,4 +979,41 @@ public void checkPermission(Permission perm, Object context) } }); } + + @Shared + public static class Range implements Serializable + { + public final String left, right; + + public Range(String left, String right) + { + this.left = left; + this.right = right; + } + + public Range(long left, long right) + { + this(Long.toString(left), Long.toString(right)); + } + + public long left() + { + return Long.parseLong(left); + } + + public long right() + { + return Long.parseLong(right); + } + } + + public static List getPrimaryRanges(IInvokableInstance instance, String keyspace) + { + return instance.callOnInstance(() -> { + var ranges = StorageService.instance.getPrimaryRangesForEndpoint(keyspace, FBUtilities.getBroadcastAddressAndPort()); + return ranges.stream() + .flatMap(r -> r.unwrap().stream().map(r2 -> new Range(r2.left.toString(), r2.right.toString()))) + .collect(Collectors.toList()); + }); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/sai/PartialWritesWithRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/sai/PartialWritesWithRepairTest.java new file mode 100644 index 000000000000..703137c5c15a --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/test/sai/PartialWritesWithRepairTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.sai; + +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.Feature; +import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.shared.ClusterUtils; +import org.apache.cassandra.distributed.shared.ClusterUtils.Range; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +public class PartialWritesWithRepairTest extends TestBaseImpl +{ + @Test + public void test() throws IOException + { + try (Cluster cluster = Cluster.build(2) + .withConfig(c -> c.with(Feature.values())) + .start()) + { + init(cluster); + cluster.schemaChange(withKeyspace("CREATE TABLE %s.tbl (pk vector, ck int, s1 int static, v1 int, v2 int, PRIMARY KEY (pk, ck))")); + cluster.schemaChange(withKeyspace("CREATE INDEX ON %s.tbl(s1) USING 'sai'")); + cluster.schemaChange(withKeyspace("CREATE INDEX ON %s.tbl(v1) USING 'sai'")); + cluster.schemaChange(withKeyspace("CREATE INDEX ON %s.tbl(v2) USING 'sai'")); + IInvokableInstance node1 = cluster.get(1); + IInvokableInstance node2 = cluster.get(2); + // see org.apache.cassandra.service.StorageService.repair + List partialRanges = ClusterUtils.getPrimaryRanges(node1, KEYSPACE); + var completeRanges = completeRanges(partialRanges); + + // write to each column for the complete set + // avoid writing to one of the columns for the partial set + for (var range : completeRanges) + { + ByteBuffer pk = key(range); + node2.executeInternal(withKeyspace("INSERT INTO %s.tbl(pk, ck, s1, v1, v2) VALUES (?, ?, ?, ?, ?)"), pk, 0, 0, 0, 0); + node2.executeInternal(withKeyspace("INSERT INTO %s.tbl(pk, ck, s1, v1, v2) VALUES (?, ?, ?, ?, ?)"), pk, 1, 0, 1, 1); + } + for (var range : partialRanges) + { + ByteBuffer pk = key(range); + node2.executeInternal(withKeyspace("INSERT INTO %s.tbl(pk, ck, v1) VALUES (?, ?, ?)"), pk, 0, 0); + node2.executeInternal(withKeyspace("INSERT INTO %s.tbl(pk, ck, v1) VALUES (?, ?, ?)"), pk, 1, 1); + } + + node1.nodetoolResult("repair", KEYSPACE, "-pr").asserts().success(); + } + } + + private static ByteBuffer key(Range range) + { + return Murmur3Partitioner.LongToken.keyForToken(range.right()); + } + + private static List completeRanges(List ranges) + { + ranges.sort(Comparator.comparingLong(Range::left)); + List list = new ArrayList<>(); + Range previous = ranges.get(0); + if (previous.left() != Long.MIN_VALUE) + list.add(new Range(Long.MIN_VALUE, ranges.get(0).left())); + for (int i = 1; i < ranges.size(); i++) + { + Range next = ranges.get(i); + if (!previous.right.equals(next.left)) + list.add(new Range(previous.right, next.left)); + previous = next; + } + return list; + } +} From ae82efc013cf0529d6156306ab1d1f4d8a9f963a Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Thu, 20 Mar 2025 16:28:50 +0000 Subject: [PATCH 283/340] Add LittleEndianMemoryUtil and NativeEndianMemoryUtil, switch memtable-related off-heap objects and Memory to use them and have Little Endian now. Add BE offsets detection on Summary loading. Add test SSTables in an old format with BE offsets in Summary component to LegacySSTableTest. Patch by Dmitry Konstantinov; reviewed by Branimir Lambov, Michael Semb Wever for CASSANDRA-20190 --- CHANGES.txt | 1 + .../apache/cassandra/db/NativeClustering.java | 25 +- .../cassandra/db/NativeDecoratedKey.java | 9 +- .../apache/cassandra/db/rows/NativeCell.java | 33 +-- .../io/sstable/indexsummary/IndexSummary.java | 12 + .../org/apache/cassandra/io/util/Memory.java | 144 +----------- .../utils/memory/LittleEndianMemoryUtil.java | 146 ++++++++++++ .../cassandra/utils/memory/MemoryUtil.java | 173 +------------- .../utils/memory/NativeEndianMemoryUtil.java | 214 ++++++++++++++++++ .../da-500-bti-CompressionInfo.db | Bin 0 -> 207 bytes .../da-500-bti-Data.db | Bin 0 -> 8602 bytes .../da-500-bti-Digest.crc32 | 1 + .../da-500-bti-Filter.db | Bin 0 -> 24 bytes .../da-500-bti-Partitions.db | Bin 0 -> 62 bytes .../da-500-bti-Rows.db | Bin 0 -> 508 bytes .../da-500-bti-Statistics.db | Bin 0 -> 7312 bytes .../da-500-bti-TOC.txt | 8 + .../ma-306-big-CompressionInfo.db | Bin 0 -> 83 bytes .../ma-306-big-Data.db | Bin 0 -> 5221 bytes .../ma-306-big-Digest.crc32 | 1 + .../ma-306-big-Filter.db | Bin 0 -> 24 bytes .../ma-306-big-Index.db | Bin 0 -> 157553 bytes .../ma-306-big-Statistics.db | Bin 0 -> 7046 bytes .../ma-306-big-Summary.db | Bin 0 -> 47 bytes .../ma-306-big-TOC.txt | 8 + .../mb-307-big-CompressionInfo.db | Bin 0 -> 83 bytes .../mb-307-big-Data.db | Bin 0 -> 5232 bytes .../mb-307-big-Digest.crc32 | 1 + .../mb-307-big-Filter.db | Bin 0 -> 24 bytes .../mb-307-big-Index.db | Bin 0 -> 157553 bytes .../mb-307-big-Statistics.db | Bin 0 -> 7058 bytes .../mb-307-big-Summary.db | Bin 0 -> 47 bytes .../mb-307-big-TOC.txt | 8 + .../mc-3113-big-CompressionInfo.db | Bin 0 -> 83 bytes .../mc-3113-big-Data.db | Bin 0 -> 5215 bytes .../mc-3113-big-Digest.crc32 | 1 + .../mc-3113-big-Filter.db | Bin 0 -> 24 bytes .../mc-3113-big-Index.db | Bin 0 -> 157553 bytes .../mc-3113-big-Statistics.db | Bin 0 -> 7070 bytes .../mc-3113-big-Summary.db | Bin 0 -> 47 bytes .../mc-3113-big-TOC.txt | 8 + .../md-31110-big-CompressionInfo.db | Bin 0 -> 83 bytes .../md-31110-big-Data.db | Bin 0 -> 5206 bytes .../md-31110-big-Digest.crc32 | 1 + .../md-31110-big-Filter.db | Bin 0 -> 24 bytes .../md-31110-big-Index.db | Bin 0 -> 157553 bytes .../md-31110-big-Statistics.db | Bin 0 -> 7134 bytes .../md-31110-big-Summary.db | Bin 0 -> 47 bytes .../md-31110-big-TOC.txt | 8 + .../me-31111-big-CompressionInfo.db | Bin 0 -> 83 bytes .../me-31111-big-Data.db | Bin 0 -> 5207 bytes .../me-31111-big-Digest.crc32 | 1 + .../me-31111-big-Filter.db | Bin 0 -> 24 bytes .../me-31111-big-Index.db | Bin 0 -> 157553 bytes .../me-31111-big-Statistics.db | Bin 0 -> 7151 bytes .../me-31111-big-Summary.db | Bin 0 -> 47 bytes .../me-31111-big-TOC.txt | 8 + .../na-40-big-CompressionInfo.db | Bin 0 -> 207 bytes .../na-40-big-Data.db | Bin 0 -> 8587 bytes .../na-40-big-Digest.crc32 | 1 + .../na-40-big-Filter.db | Bin 0 -> 24 bytes .../na-40-big-Index.db | Bin 0 -> 157553 bytes .../na-40-big-Statistics.db | Bin 0 -> 7160 bytes .../na-40-big-Summary.db | Bin 0 -> 47 bytes .../na-40-big-TOC.txt | 8 + .../nb-400-big-CompressionInfo.db | Bin 0 -> 207 bytes .../nb-400-big-Data.db | Bin 0 -> 8620 bytes .../nb-400-big-Digest.crc32 | 1 + .../nb-400-big-Filter.db | Bin 0 -> 24 bytes .../nb-400-big-Index.db | Bin 0 -> 157553 bytes .../nb-400-big-Statistics.db | Bin 0 -> 7177 bytes .../nb-400-big-Summary.db | Bin 0 -> 47 bytes .../nb-400-big-TOC.txt | 8 + .../oa-500-big-CompressionInfo.db | Bin 0 -> 207 bytes .../oa-500-big-Data.db | Bin 0 -> 8513 bytes .../oa-500-big-Digest.crc32 | 1 + .../oa-500-big-Filter.db | Bin 0 -> 24 bytes .../oa-500-big-Index.db | Bin 0 -> 157498 bytes .../oa-500-big-Statistics.db | Bin 0 -> 7312 bytes .../oa-500-big-Summary.db | Bin 0 -> 47 bytes .../oa-500-big-TOC.txt | 8 + .../db/compaction/CompactionsCQLTest.java | 2 +- .../io/sstable/LegacySSTableTest.java | 79 ++++--- .../memory/LittleEndianMemoryUtilTest.java | 148 ++++++++++++ .../memory/NativeEndianMemoryUtilTest.java | 148 ++++++++++++ 85 files changed, 852 insertions(+), 363 deletions(-) create mode 100644 src/java/org/apache/cassandra/utils/memory/LittleEndianMemoryUtil.java create mode 100644 src/java/org/apache/cassandra/utils/memory/NativeEndianMemoryUtil.java create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-CompressionInfo.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Data.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Digest.crc32 create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Filter.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Partitions.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Rows.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Statistics.db create mode 100644 test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-TOC.txt create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Data.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Filter.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Index.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Statistics.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Summary.db create mode 100644 test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-TOC.txt create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Data.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Filter.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Index.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Statistics.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Summary.db create mode 100644 test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-TOC.txt create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Data.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Filter.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Index.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Statistics.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Summary.db create mode 100644 test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-TOC.txt create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Data.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Filter.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Index.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Statistics.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Summary.db create mode 100644 test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-TOC.txt create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Data.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Filter.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Index.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Statistics.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Summary.db create mode 100644 test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-TOC.txt create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Data.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Filter.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Index.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Statistics.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Summary.db create mode 100644 test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-TOC.txt create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Data.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Filter.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Index.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Statistics.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Summary.db create mode 100644 test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-TOC.txt create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-CompressionInfo.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Data.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Digest.crc32 create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Filter.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Index.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Statistics.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Summary.db create mode 100644 test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-TOC.txt create mode 100644 test/unit/org/apache/cassandra/utils/memory/LittleEndianMemoryUtilTest.java create mode 100644 test/unit/org/apache/cassandra/utils/memory/NativeEndianMemoryUtilTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 21b9f481b211..545ba01dab15 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * SAI marks an index as non-empty when a partial partition/row modifications is flushed due to repair (CASSANDRA-20567) * SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map (CASSANDRA-19891) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) diff --git a/src/java/org/apache/cassandra/db/NativeClustering.java b/src/java/org/apache/cassandra/db/NativeClustering.java index e7c7e8893a17..e83ded06c6e0 100644 --- a/src/java/org/apache/cassandra/db/NativeClustering.java +++ b/src/java/org/apache/cassandra/db/NativeClustering.java @@ -28,6 +28,7 @@ import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.memory.HeapCloner; import org.apache.cassandra.utils.memory.MemoryUtil; +import org.apache.cassandra.utils.memory.NativeEndianMemoryUtil; import org.apache.cassandra.utils.memory.NativeAllocator; public class NativeClustering implements Clustering @@ -50,30 +51,30 @@ public NativeClustering(NativeAllocator allocator, OpOrder.Group writeOp, Cluste peer = allocator.allocate(metadataSize + dataSize + bitmapSize, writeOp); long bitmapStart = peer + metadataSize; - MemoryUtil.setShort(peer, (short) count); - MemoryUtil.setShort(peer + (metadataSize - 2), (short) dataSize); // goes at the end of the other offsets + NativeEndianMemoryUtil.setShort(peer, (short) count); + NativeEndianMemoryUtil.setShort(peer + (metadataSize - 2), (short) dataSize); // goes at the end of the other offsets - MemoryUtil.setByte(bitmapStart, bitmapSize, (byte) 0); + NativeEndianMemoryUtil.setByte(bitmapStart, bitmapSize, (byte) 0); long dataStart = peer + metadataSize + bitmapSize; int dataOffset = 0; for (int i = 0 ; i < count ; i++) { - MemoryUtil.setShort(peer + 2 + i * 2, (short) dataOffset); + NativeEndianMemoryUtil.setShort(peer + 2 + i * 2, (short) dataOffset); ByteBuffer value = clustering.bufferAt(i); if (value == null) { long boffset = bitmapStart + (i >>> 3); - int b = MemoryUtil.getByte(boffset); + int b = NativeEndianMemoryUtil.getByte(boffset); b |= 1 << (i & 7); - MemoryUtil.setByte(boffset, (byte) b); + NativeEndianMemoryUtil.setByte(boffset, (byte) b); continue; } assert value.order() == ByteOrder.BIG_ENDIAN; int size = value.remaining(); - MemoryUtil.setBytes(dataStart + dataOffset, value); + NativeEndianMemoryUtil.setBytes(dataStart + dataOffset, value); dataOffset += size; } } @@ -90,13 +91,13 @@ public ClusteringPrefix clustering() public int size() { - return MemoryUtil.getShort(peer); + return NativeEndianMemoryUtil.getUnsignedShort(peer); } public int dataSize() { int dataSizeOffset = (size() * 2) + 2; // metadataSize - 2 - return MemoryUtil.getShort(peer + dataSizeOffset); + return NativeEndianMemoryUtil.getUnsignedShort(peer + dataSizeOffset); } public ByteBuffer get(int i) @@ -109,12 +110,12 @@ public ByteBuffer get(int i) int metadataSize = (size * 2) + 4; int bitmapSize = ((size + 7) >>> 3); long bitmapStart = peer + metadataSize; - int b = MemoryUtil.getByte(bitmapStart + (i >>> 3)); + int b = NativeEndianMemoryUtil.getByte(bitmapStart + (i >>> 3)); if ((b & (1 << (i & 7))) != 0) return null; - int startOffset = MemoryUtil.getShort(peer + 2 + i * 2); - int endOffset = MemoryUtil.getShort(peer + 4 + i * 2); + int startOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 2 + i * 2); + int endOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 4 + i * 2); return MemoryUtil.getByteBuffer(bitmapStart + bitmapSize + startOffset, endOffset - startOffset, ByteOrder.BIG_ENDIAN); diff --git a/src/java/org/apache/cassandra/db/NativeDecoratedKey.java b/src/java/org/apache/cassandra/db/NativeDecoratedKey.java index bc149084d852..76b2367ae210 100644 --- a/src/java/org/apache/cassandra/db/NativeDecoratedKey.java +++ b/src/java/org/apache/cassandra/db/NativeDecoratedKey.java @@ -26,6 +26,7 @@ import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.memory.MemoryUtil; import org.apache.cassandra.utils.memory.NativeAllocator; +import org.apache.cassandra.utils.memory.NativeEndianMemoryUtil; public class NativeDecoratedKey extends DecoratedKey { @@ -39,7 +40,7 @@ public NativeDecoratedKey(Token token, NativeAllocator allocator, OpOrder.Group int size = key.remaining(); this.peer = allocator.allocate(4 + size, writeOp); - MemoryUtil.setInt(peer, size); + NativeEndianMemoryUtil.setInt(peer, size); MemoryUtil.setBytes(peer + 4, key); } @@ -50,14 +51,14 @@ public NativeDecoratedKey(Token token, NativeAllocator allocator, OpOrder.Group int size = keyBytes.length; this.peer = allocator.allocate(4 + size, writeOp); - MemoryUtil.setInt(peer, size); + NativeEndianMemoryUtil.setInt(peer, size); MemoryUtil.setBytes(peer + 4, keyBytes, 0, size); } @Inline int length() { - return MemoryUtil.getInt(peer); + return NativeEndianMemoryUtil.getInt(peer); } @Inline @@ -75,7 +76,7 @@ public ByteBuffer getKey() @Override public int getKeyLength() { - return MemoryUtil.getInt(peer); + return NativeEndianMemoryUtil.getInt(peer); } @Override diff --git a/src/java/org/apache/cassandra/db/rows/NativeCell.java b/src/java/org/apache/cassandra/db/rows/NativeCell.java index b0613f33f6da..65516ff31fb2 100644 --- a/src/java/org/apache/cassandra/db/rows/NativeCell.java +++ b/src/java/org/apache/cassandra/db/rows/NativeCell.java @@ -28,6 +28,7 @@ import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.memory.MemoryUtil; import org.apache.cassandra.utils.memory.NativeAllocator; +import org.apache.cassandra.utils.memory.NativeEndianMemoryUtil; public class NativeCell extends AbstractCell { @@ -101,11 +102,11 @@ public NativeCell(NativeAllocator allocator, // cellpath? : timestamp : ttl : localDeletionTime : length : : [cell path length] : [] peer = allocator.allocate((int) size, writeOp); - MemoryUtil.setByte(peer + HAS_CELLPATH, (byte)(path == null ? 0 : 1)); - MemoryUtil.setLong(peer + TIMESTAMP, timestamp); - MemoryUtil.setInt(peer + TTL, ttl); - MemoryUtil.setInt(peer + DELETION, localDeletionTimeUnsignedInteger); - MemoryUtil.setInt(peer + LENGTH, value.remaining()); + NativeEndianMemoryUtil.setByte(peer + HAS_CELLPATH, (byte)(path == null ? 0 : 1)); + NativeEndianMemoryUtil.setLong(peer + TIMESTAMP, timestamp); + NativeEndianMemoryUtil.setInt(peer + TTL, ttl); + NativeEndianMemoryUtil.setInt(peer + DELETION, localDeletionTimeUnsignedInteger); + NativeEndianMemoryUtil.setInt(peer + LENGTH, value.remaining()); MemoryUtil.setBytes(peer + VALUE, value); if (path != null) @@ -114,7 +115,7 @@ public NativeCell(NativeAllocator allocator, assert pathbuffer.order() == ByteOrder.BIG_ENDIAN; long offset = peer + VALUE + value.remaining(); - MemoryUtil.setInt(offset, pathbuffer.remaining()); + NativeEndianMemoryUtil.setInt(offset, pathbuffer.remaining()); MemoryUtil.setBytes(offset + 4, pathbuffer); } } @@ -126,17 +127,17 @@ private static long offHeapSizeWithoutPath(int length) public long timestamp() { - return MemoryUtil.getLong(peer + TIMESTAMP); + return NativeEndianMemoryUtil.getLong(peer + TIMESTAMP); } public int ttl() { - return MemoryUtil.getInt(peer + TTL); + return NativeEndianMemoryUtil.getInt(peer + TTL); } public ByteBuffer value()// FIXME: add native accessor { - int length = MemoryUtil.getInt(peer + LENGTH); + int length = NativeEndianMemoryUtil.getInt(peer + LENGTH); return MemoryUtil.getByteBuffer(peer + VALUE, length, ByteOrder.BIG_ENDIAN); } @@ -147,7 +148,7 @@ public ValueAccessor accessor() public int valueSize() { - return MemoryUtil.getInt(peer + LENGTH); + return NativeEndianMemoryUtil.getInt(peer + LENGTH); } public CellPath path() @@ -155,8 +156,8 @@ public CellPath path() if (!hasPath()) return null; - long offset = peer + VALUE + MemoryUtil.getInt(peer + LENGTH); - int size = MemoryUtil.getInt(offset); + long offset = peer + VALUE + NativeEndianMemoryUtil.getInt(peer + LENGTH); + int size = NativeEndianMemoryUtil.getInt(offset); return CellPath.create(MemoryUtil.getByteBuffer(offset + 4, size, ByteOrder.BIG_ENDIAN)); } @@ -194,20 +195,20 @@ public long unsharedHeapSizeExcludingData() public long offHeapSize() { - long size = offHeapSizeWithoutPath(MemoryUtil.getInt(peer + LENGTH)); + long size = offHeapSizeWithoutPath(NativeEndianMemoryUtil.getInt(peer + LENGTH)); if (hasPath()) - size += 4 + MemoryUtil.getInt(peer + size); + size += 4 + NativeEndianMemoryUtil.getInt(peer + size); return size; } private boolean hasPath() { - return MemoryUtil.getByte(peer+ HAS_CELLPATH) != 0; + return NativeEndianMemoryUtil.getByte(peer + HAS_CELLPATH) != 0; } @Override protected int localDeletionTimeAsUnsignedInt() { - return MemoryUtil.getInt(peer + DELETION); + return NativeEndianMemoryUtil.getInt(peer + DELETION); } } diff --git a/src/java/org/apache/cassandra/io/sstable/indexsummary/IndexSummary.java b/src/java/org/apache/cassandra/io/sstable/indexsummary/IndexSummary.java index 6a6546f6a488..23abe117af2a 100644 --- a/src/java/org/apache/cassandra/io/sstable/indexsummary/IndexSummary.java +++ b/src/java/org/apache/cassandra/io/sstable/indexsummary/IndexSummary.java @@ -454,6 +454,18 @@ public IndexSummary deserialize(T in, IP entries.free(); throw ioe; } + + // Before 5.0 offsets were written using Native Endian, now they are stored as Little Endian, + // so we apply a heuristic here to detect + // if the loading index summary was created on a Big Endian machine using Native Endian format + if (offsets.size() > 0) + { + int offset = offsets.getInt(0); + int offsetReversed = Integer.reverseBytes(offset); + if (offsetReversed > 0 && offset > offsetReversed || offset - offsets.size() < 0) + throw new IOException(String.format("Rebuilding index summary because offset value (%d) at position: %d " + + "is Big Endian while Little Endian is expected", offset, 0)); + } // our on-disk representation treats the offsets and the summary data as one contiguous structure, // in which the offsets are based from the start of the structure. i.e., if the offsets occupy // X bytes, the value of the first offset will be X. In memory we split the two regions up, so that diff --git a/src/java/org/apache/cassandra/io/util/Memory.java b/src/java/org/apache/cassandra/io/util/Memory.java index 1d1fca2edf96..1e6f6a215049 100644 --- a/src/java/org/apache/cassandra/io/util/Memory.java +++ b/src/java/org/apache/cassandra/io/util/Memory.java @@ -22,14 +22,15 @@ import net.nicoulaj.compilecommand.annotations.Inline; -import org.apache.cassandra.utils.Architecture; import org.apache.cassandra.utils.FastByteOperations; import org.apache.cassandra.utils.concurrent.Ref; +import org.apache.cassandra.utils.memory.LittleEndianMemoryUtil; import org.apache.cassandra.utils.memory.MemoryUtil; import sun.misc.Unsafe; /** * An off-heap region of memory that must be manually free'd when no longer needed. + * It uses Little Endian (LE). */ public class Memory implements AutoCloseable, ReadableMemory { @@ -90,7 +91,7 @@ public static Memory allocate(long bytes) public void setByte(long offset, byte b) { checkBounds(offset, offset + 1); - unsafe.putByte(peer + offset, b); + LittleEndianMemoryUtil.setByte(peer + offset, b); } public void setMemory(long offset, long bytes, byte b) @@ -103,86 +104,13 @@ public void setMemory(long offset, long bytes, byte b) public void setLong(long offset, long l) { checkBounds(offset, offset + 8); - if (Architecture.IS_UNALIGNED) - unsafe.putLong(peer + offset, Architecture.BIG_ENDIAN ? Long.reverseBytes(l) : l); - else - putLongByByte(peer + offset, l); - } - - private void putLongByByte(long address, long value) - { - if (Architecture.BIG_ENDIAN) - { - unsafe.putByte(address, (byte) (value >> 56)); - unsafe.putByte(address + 1, (byte) (value >> 48)); - unsafe.putByte(address + 2, (byte) (value >> 40)); - unsafe.putByte(address + 3, (byte) (value >> 32)); - unsafe.putByte(address + 4, (byte) (value >> 24)); - unsafe.putByte(address + 5, (byte) (value >> 16)); - unsafe.putByte(address + 6, (byte) (value >> 8)); - unsafe.putByte(address + 7, (byte) (value)); - } - else - { - unsafe.putByte(address + 7, (byte) (value >> 56)); - unsafe.putByte(address + 6, (byte) (value >> 48)); - unsafe.putByte(address + 5, (byte) (value >> 40)); - unsafe.putByte(address + 4, (byte) (value >> 32)); - unsafe.putByte(address + 3, (byte) (value >> 24)); - unsafe.putByte(address + 2, (byte) (value >> 16)); - unsafe.putByte(address + 1, (byte) (value >> 8)); - unsafe.putByte(address, (byte) (value)); - } + LittleEndianMemoryUtil.setLong(peer + offset, l); } public void setInt(long offset, int l) { checkBounds(offset, offset + 4); - if (Architecture.IS_UNALIGNED) - unsafe.putInt(peer + offset, Architecture.BIG_ENDIAN ? Integer.reverseBytes(l) : l); - else - putIntByByte(peer + offset, l); - } - - private void putIntByByte(long address, int value) - { - if (Architecture.BIG_ENDIAN) - { - unsafe.putByte(address, (byte) (value >> 24)); - unsafe.putByte(address + 1, (byte) (value >> 16)); - unsafe.putByte(address + 2, (byte) (value >> 8)); - unsafe.putByte(address + 3, (byte) (value)); - } - else - { - unsafe.putByte(address + 3, (byte) (value >> 24)); - unsafe.putByte(address + 2, (byte) (value >> 16)); - unsafe.putByte(address + 1, (byte) (value >> 8)); - unsafe.putByte(address, (byte) (value)); - } - } - - public void setShort(long offset, short l) - { - checkBounds(offset, offset + 2); - if (Architecture.IS_UNALIGNED) - unsafe.putShort(peer + offset, Architecture.BIG_ENDIAN ? Short.reverseBytes(l) : l); - else - putShortByByte(peer + offset, l); - } - - private void putShortByByte(long address, short value) - { - if (Architecture.BIG_ENDIAN) - { - unsafe.putByte(address, (byte) (value >> 8)); - unsafe.putByte(address + 1, (byte) (value)); - } - else - { - unsafe.putByte(address + 1, (byte) (value >> 8)); - unsafe.putByte(address, (byte) (value)); - } + LittleEndianMemoryUtil.setInt(peer + offset, l); } public void setBytes(long memoryOffset, ByteBuffer buffer) @@ -230,69 +158,19 @@ else if (count == 0) public byte getByte(long offset) { checkBounds(offset, offset + 1); - return unsafe.getByte(peer + offset); + return LittleEndianMemoryUtil.getByte(peer + offset); } public long getLong(long offset) { checkBounds(offset, offset + 8); - if (Architecture.IS_UNALIGNED) - return Architecture.BIG_ENDIAN ? Long.reverseBytes(unsafe.getLong(peer+offset)) : unsafe.getLong(peer+offset); - else - return getLongByByte(peer + offset); - } - - private long getLongByByte(long address) - { - if (Architecture.BIG_ENDIAN) - { - return (((long) unsafe.getByte(address ) ) << 56) | - (((long) unsafe.getByte(address + 1) & 0xff) << 48) | - (((long) unsafe.getByte(address + 2) & 0xff) << 40) | - (((long) unsafe.getByte(address + 3) & 0xff) << 32) | - (((long) unsafe.getByte(address + 4) & 0xff) << 24) | - (((long) unsafe.getByte(address + 5) & 0xff) << 16) | - (((long) unsafe.getByte(address + 6) & 0xff) << 8) | - (((long) unsafe.getByte(address + 7) & 0xff) ); - } - else - { - return (((long) unsafe.getByte(address + 7) ) << 56) | - (((long) unsafe.getByte(address + 6) & 0xff) << 48) | - (((long) unsafe.getByte(address + 5) & 0xff) << 40) | - (((long) unsafe.getByte(address + 4) & 0xff) << 32) | - (((long) unsafe.getByte(address + 3) & 0xff) << 24) | - (((long) unsafe.getByte(address + 2) & 0xff) << 16) | - (((long) unsafe.getByte(address + 1) & 0xff) << 8) | - (((long) unsafe.getByte(address ) & 0xff) ); - } + return LittleEndianMemoryUtil.getLong(peer + offset); } public int getInt(long offset) { checkBounds(offset, offset + 4); - if (Architecture.IS_UNALIGNED) - return Architecture.BIG_ENDIAN ? Integer.reverseBytes(unsafe.getInt(peer+offset)) : unsafe.getInt(peer+offset); - else - return getIntByByte(peer + offset); - } - - private int getIntByByte(long address) - { - if (Architecture.BIG_ENDIAN) - { - return ((unsafe.getByte(address ) ) << 24) | - ((unsafe.getByte(address + 1) & 0xff) << 16) | - ((unsafe.getByte(address + 2) & 0xff) << 8 ) | - ((unsafe.getByte(address + 3) & 0xff) ); - } - else - { - return ((unsafe.getByte(address + 3) ) << 24) | - ((unsafe.getByte(address + 2) & 0xff) << 16) | - ((unsafe.getByte(address + 1) & 0xff) << 8) | - ((unsafe.getByte(address ) & 0xff) ); - } + return LittleEndianMemoryUtil.getInt(peer + offset); } /** @@ -378,18 +256,18 @@ public ByteBuffer[] asByteBuffers(long offset, long length) int size = (int) (size() / result.length); for (int i = 0 ; i < result.length - 1 ; i++) { - result[i] = MemoryUtil.getByteBuffer(peer + offset, size); + result[i] = LittleEndianMemoryUtil.getByteBuffer(peer + offset, size); offset += size; length -= size; } - result[result.length - 1] = MemoryUtil.getByteBuffer(peer + offset, (int) length); + result[result.length - 1] = LittleEndianMemoryUtil.getByteBuffer(peer + offset, (int) length); return result; } public ByteBuffer asByteBuffer(long offset, int length) { checkBounds(offset, offset + length); - return MemoryUtil.getByteBuffer(peer + offset, length); + return LittleEndianMemoryUtil.getByteBuffer(peer + offset, length); } // MUST provide a buffer created via MemoryUtil.getHollowDirectByteBuffer() diff --git a/src/java/org/apache/cassandra/utils/memory/LittleEndianMemoryUtil.java b/src/java/org/apache/cassandra/utils/memory/LittleEndianMemoryUtil.java new file mode 100644 index 000000000000..2553b9314984 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/memory/LittleEndianMemoryUtil.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.utils.Architecture; + +public class LittleEndianMemoryUtil extends MemoryUtil +{ + public static int getUnsignedShort(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + return (Architecture.BIG_ENDIAN ? Short.reverseBytes(unsafe.getShort(address)) : unsafe.getShort(address)) & 0xffff; + else + return getShortByByte(address) & 0xffff; + } + + public static int getInt(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + return Architecture.BIG_ENDIAN ? Integer.reverseBytes(unsafe.getInt(address)) : unsafe.getInt(address); + else + return getIntByByte(address); + } + + public static long getLong(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + return Architecture.BIG_ENDIAN ? Long.reverseBytes(unsafe.getLong(address)) : unsafe.getLong(address); + else + return getLongByByte(address); + } + + public static void setShort(long address, short s) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + unsafe.putShort(address, Architecture.BIG_ENDIAN ? Short.reverseBytes(s) : s); + else + putShortByByte(address, s); + } + + public static void setInt(long address, int l) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + unsafe.putInt(address, Architecture.BIG_ENDIAN ? Integer.reverseBytes(l) : l); + else + putIntByByte(address, l); + } + + public static void setLong(long address, long l) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + unsafe.putLong(address, Architecture.BIG_ENDIAN ? Long.reverseBytes(l) : l); + else + putLongByByte(address, l); + } + + @VisibleForTesting + static long getLongByByte(long address) + { + return (((long) unsafe.getByte(address + 7) ) << 56) | + (((long) unsafe.getByte(address + 6) & 0xff) << 48) | + (((long) unsafe.getByte(address + 5) & 0xff) << 40) | + (((long) unsafe.getByte(address + 4) & 0xff) << 32) | + (((long) unsafe.getByte(address + 3) & 0xff) << 24) | + (((long) unsafe.getByte(address + 2) & 0xff) << 16) | + (((long) unsafe.getByte(address + 1) & 0xff) << 8) | + (((long) unsafe.getByte(address ) & 0xff) ); + } + + @VisibleForTesting + static int getIntByByte(long address) + { + return (((int) unsafe.getByte(address + 3) ) << 24) | + (((int) unsafe.getByte(address + 2) & 0xff) << 16) | + (((int) unsafe.getByte(address + 1) & 0xff) << 8) | + (((int) unsafe.getByte(address ) & 0xff) ); + } + + @VisibleForTesting + static int getShortByByte(long address) + { + return (((int) unsafe.getByte(address + 1) ) << 8) | + (((int) unsafe.getByte(address ) & 0xff) ); + } + + @VisibleForTesting + static void putLongByByte(long address, long value) + { + unsafe.putByte(address + 7, (byte) (value >> 56)); + unsafe.putByte(address + 6, (byte) (value >> 48)); + unsafe.putByte(address + 5, (byte) (value >> 40)); + unsafe.putByte(address + 4, (byte) (value >> 32)); + unsafe.putByte(address + 3, (byte) (value >> 24)); + unsafe.putByte(address + 2, (byte) (value >> 16)); + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + + @VisibleForTesting + static void putIntByByte(long address, int value) + { + unsafe.putByte(address + 3, (byte) (value >> 24)); + unsafe.putByte(address + 2, (byte) (value >> 16)); + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + + @VisibleForTesting + static void putShortByByte(long address, short value) + { + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + + public static ByteBuffer getByteBuffer(long address, int length) + { + return getByteBuffer(address, length, ByteOrder.LITTLE_ENDIAN); + } + + public static ByteBuffer getHollowDirectByteBuffer() + { + return getHollowDirectByteBuffer(ByteOrder.LITTLE_ENDIAN); + } +} diff --git a/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java b/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java index 453f3eda1ba3..86416c49a703 100644 --- a/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java +++ b/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java @@ -24,24 +24,20 @@ import com.sun.jna.Native; -import org.apache.cassandra.utils.Architecture; - import sun.misc.Unsafe; public abstract class MemoryUtil { private static final long UNSAFE_COPY_THRESHOLD = 1024 * 1024L; // copied from java.nio.Bits - private static final Unsafe unsafe; + protected static final Unsafe unsafe; private static final Class DIRECT_BYTE_BUFFER_CLASS, RO_DIRECT_BYTE_BUFFER_CLASS; private static final long DIRECT_BYTE_BUFFER_ADDRESS_OFFSET; private static final long DIRECT_BYTE_BUFFER_CAPACITY_OFFSET; private static final long DIRECT_BYTE_BUFFER_LIMIT_OFFSET; private static final long DIRECT_BYTE_BUFFER_POSITION_OFFSET; private static final long DIRECT_BYTE_BUFFER_ATTACHMENT_OFFSET; - private static final Class BYTE_BUFFER_CLASS; - private static final long BYTE_BUFFER_OFFSET_OFFSET; - private static final long BYTE_BUFFER_HB_OFFSET; + protected static final Class BYTE_BUFFER_CLASS; private static final long BYTE_ARRAY_BASE_OFFSET; static @@ -61,8 +57,6 @@ public abstract class MemoryUtil RO_DIRECT_BYTE_BUFFER_CLASS = ByteBuffer.allocateDirect(0).asReadOnlyBuffer().getClass(); clazz = ByteBuffer.allocate(0).getClass(); - BYTE_BUFFER_OFFSET_OFFSET = unsafe.objectFieldOffset(ByteBuffer.class.getDeclaredField("offset")); - BYTE_BUFFER_HB_OFFSET = unsafe.objectFieldOffset(ByteBuffer.class.getDeclaredField("hb")); BYTE_BUFFER_CLASS = clazz; BYTE_ARRAY_BASE_OFFSET = unsafe.arrayBaseOffset(byte[].class); @@ -104,56 +98,11 @@ public static void setByte(long address, int count, byte b) unsafe.setMemory(address, count, b); } - public static void setShort(long address, short s) - { - unsafe.putShort(address, Architecture.BIG_ENDIAN ? Short.reverseBytes(s) : s); - } - - public static void setInt(long address, int l) - { - if (Architecture.IS_UNALIGNED) - unsafe.putInt(address, Architecture.BIG_ENDIAN ? Integer.reverseBytes(l) : l); - else - putIntByByte(address, l); - } - - public static void setLong(long address, long l) - { - if (Architecture.IS_UNALIGNED) - unsafe.putLong(address, Architecture.BIG_ENDIAN ? Long.reverseBytes(l) : l); - else - putLongByByte(address, l); - } - public static byte getByte(long address) { return unsafe.getByte(address); } - public static int getShort(long address) - { - if (Architecture.IS_UNALIGNED) - return (Architecture.BIG_ENDIAN ? Short.reverseBytes(unsafe.getShort(address)) : unsafe.getShort(address)) & 0xffff; - else - return getShortByByte(address) & 0xffff; - } - - public static int getInt(long address) - { - if (Architecture.IS_UNALIGNED) - return Architecture.BIG_ENDIAN ? Integer.reverseBytes(unsafe.getInt(address)) : unsafe.getInt(address); - else - return getIntByByte(address); - } - - public static long getLong(long address) - { - if (Architecture.IS_UNALIGNED) - return Architecture.BIG_ENDIAN ? Long.reverseBytes(unsafe.getLong(address)) : unsafe.getLong(address); - else - return getLongByByte(address); - } - public static ByteBuffer getByteBuffer(long address, int length) { return getByteBuffer(address, length, ByteOrder.nativeOrder()); @@ -186,21 +135,6 @@ public static ByteBuffer getHollowDirectByteBuffer(ByteOrder order) return instance; } - public static ByteBuffer getHollowByteBuffer() - { - ByteBuffer instance; - try - { - instance = (ByteBuffer) unsafe.allocateInstance(BYTE_BUFFER_CLASS); - } - catch (InstantiationException e) - { - throw new AssertionError(e); - } - instance.order(ByteOrder.nativeOrder()); - return instance; - } - public static boolean isExactlyDirect(ByteBuffer buffer) { return buffer.getClass() == DIRECT_BYTE_BUFFER_CLASS; @@ -250,109 +184,6 @@ public static void setByteBufferCapacity(ByteBuffer instance, int capacity) unsafe.putInt(instance, DIRECT_BYTE_BUFFER_CAPACITY_OFFSET, capacity); } - public static long getLongByByte(long address) - { - if (Architecture.BIG_ENDIAN) - { - return (((long) unsafe.getByte(address ) ) << 56) | - (((long) unsafe.getByte(address + 1) & 0xff) << 48) | - (((long) unsafe.getByte(address + 2) & 0xff) << 40) | - (((long) unsafe.getByte(address + 3) & 0xff) << 32) | - (((long) unsafe.getByte(address + 4) & 0xff) << 24) | - (((long) unsafe.getByte(address + 5) & 0xff) << 16) | - (((long) unsafe.getByte(address + 6) & 0xff) << 8) | - (((long) unsafe.getByte(address + 7) & 0xff) ); - } - else - { - return (((long) unsafe.getByte(address + 7) ) << 56) | - (((long) unsafe.getByte(address + 6) & 0xff) << 48) | - (((long) unsafe.getByte(address + 5) & 0xff) << 40) | - (((long) unsafe.getByte(address + 4) & 0xff) << 32) | - (((long) unsafe.getByte(address + 3) & 0xff) << 24) | - (((long) unsafe.getByte(address + 2) & 0xff) << 16) | - (((long) unsafe.getByte(address + 1) & 0xff) << 8) | - (((long) unsafe.getByte(address ) & 0xff) ); - } - } - - public static int getIntByByte(long address) - { - if (Architecture.BIG_ENDIAN) - { - return (((int) unsafe.getByte(address ) ) << 24) | - (((int) unsafe.getByte(address + 1) & 0xff) << 16) | - (((int) unsafe.getByte(address + 2) & 0xff) << 8 ) | - (((int) unsafe.getByte(address + 3) & 0xff) ); - } - else - { - return (((int) unsafe.getByte(address + 3) ) << 24) | - (((int) unsafe.getByte(address + 2) & 0xff) << 16) | - (((int) unsafe.getByte(address + 1) & 0xff) << 8) | - (((int) unsafe.getByte(address ) & 0xff) ); - } - } - - - public static int getShortByByte(long address) - { - if (Architecture.BIG_ENDIAN) - { - return (((int) unsafe.getByte(address ) ) << 8) | - (((int) unsafe.getByte(address + 1) & 0xff) ); - } - else - { - return (((int) unsafe.getByte(address + 1) ) << 8) | - (((int) unsafe.getByte(address ) & 0xff) ); - } - } - - public static void putLongByByte(long address, long value) - { - if (Architecture.BIG_ENDIAN) - { - unsafe.putByte(address, (byte) (value >> 56)); - unsafe.putByte(address + 1, (byte) (value >> 48)); - unsafe.putByte(address + 2, (byte) (value >> 40)); - unsafe.putByte(address + 3, (byte) (value >> 32)); - unsafe.putByte(address + 4, (byte) (value >> 24)); - unsafe.putByte(address + 5, (byte) (value >> 16)); - unsafe.putByte(address + 6, (byte) (value >> 8)); - unsafe.putByte(address + 7, (byte) (value)); - } - else - { - unsafe.putByte(address + 7, (byte) (value >> 56)); - unsafe.putByte(address + 6, (byte) (value >> 48)); - unsafe.putByte(address + 5, (byte) (value >> 40)); - unsafe.putByte(address + 4, (byte) (value >> 32)); - unsafe.putByte(address + 3, (byte) (value >> 24)); - unsafe.putByte(address + 2, (byte) (value >> 16)); - unsafe.putByte(address + 1, (byte) (value >> 8)); - unsafe.putByte(address, (byte) (value)); - } - } - - public static void putIntByByte(long address, int value) - { - if (Architecture.BIG_ENDIAN) - { - unsafe.putByte(address, (byte) (value >> 24)); - unsafe.putByte(address + 1, (byte) (value >> 16)); - unsafe.putByte(address + 2, (byte) (value >> 8)); - unsafe.putByte(address + 3, (byte) (value)); - } - else - { - unsafe.putByte(address + 3, (byte) (value >> 24)); - unsafe.putByte(address + 2, (byte) (value >> 16)); - unsafe.putByte(address + 1, (byte) (value >> 8)); - unsafe.putByte(address, (byte) (value)); - } - } - public static void setBytes(long address, ByteBuffer buffer) { int start = buffer.position(); diff --git a/src/java/org/apache/cassandra/utils/memory/NativeEndianMemoryUtil.java b/src/java/org/apache/cassandra/utils/memory/NativeEndianMemoryUtil.java new file mode 100644 index 000000000000..3cb5edb28b98 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/memory/NativeEndianMemoryUtil.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.utils.Architecture; + +/** + * Use this API only for data which are stored in-memory + * and not serialized directly (without converting to Java primitives) to disk and network + */ +public class NativeEndianMemoryUtil extends MemoryUtil +{ + public static int getUnsignedShort(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + return unsafe.getShort(address) & 0xffff; + else + return getShortByByte(address) & 0xffff; + } + + public static int getInt(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + return unsafe.getInt(address); + else + return getIntByByte(address); + } + + public static long getLong(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + return unsafe.getLong(address); + else + return getLongByByte(address); + } + + public static void setShort(long address, short s) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + unsafe.putShort(address, s); + else + putShortByByte(address, s); + } + + public static void setInt(long address, int l) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + unsafe.putInt(address, l); + else + putIntByByte(address, l); + } + + public static void setLong(long address, long l) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + unsafe.putLong(address, l); + else + putLongByByte(address, l); + } + + @VisibleForTesting + static long getLongByByte(long address) + { + if (Architecture.BIG_ENDIAN) + { + return (((long) unsafe.getByte(address ) ) << 56) | + (((long) unsafe.getByte(address + 1) & 0xff) << 48) | + (((long) unsafe.getByte(address + 2) & 0xff) << 40) | + (((long) unsafe.getByte(address + 3) & 0xff) << 32) | + (((long) unsafe.getByte(address + 4) & 0xff) << 24) | + (((long) unsafe.getByte(address + 5) & 0xff) << 16) | + (((long) unsafe.getByte(address + 6) & 0xff) << 8) | + (((long) unsafe.getByte(address + 7) & 0xff) ); + } + else + { + return (((long) unsafe.getByte(address + 7) ) << 56) | + (((long) unsafe.getByte(address + 6) & 0xff) << 48) | + (((long) unsafe.getByte(address + 5) & 0xff) << 40) | + (((long) unsafe.getByte(address + 4) & 0xff) << 32) | + (((long) unsafe.getByte(address + 3) & 0xff) << 24) | + (((long) unsafe.getByte(address + 2) & 0xff) << 16) | + (((long) unsafe.getByte(address + 1) & 0xff) << 8) | + (((long) unsafe.getByte(address ) & 0xff) ); + } + } + + @VisibleForTesting + static int getIntByByte(long address) + { + if (Architecture.BIG_ENDIAN) + { + return (((int) unsafe.getByte(address ) ) << 24) | + (((int) unsafe.getByte(address + 1) & 0xff) << 16) | + (((int) unsafe.getByte(address + 2) & 0xff) << 8) | + (((int) unsafe.getByte(address + 3) & 0xff) ); + } + else + { + return (((int) unsafe.getByte(address + 3) ) << 24) | + (((int) unsafe.getByte(address + 2) & 0xff) << 16) | + (((int) unsafe.getByte(address + 1) & 0xff) << 8) | + (((int) unsafe.getByte(address ) & 0xff) ); + } + } + + @VisibleForTesting + static int getShortByByte(long address) + { + if (Architecture.BIG_ENDIAN) + { + return (((int) unsafe.getByte(address ) ) << 8) | + (((int) unsafe.getByte(address + 1) & 0xff) ); + } + else + { + return (((int) unsafe.getByte(address + 1) ) << 8) | + (((int) unsafe.getByte(address ) & 0xff) ); + } + } + + @VisibleForTesting + static void putLongByByte(long address, long value) + { + if (Architecture.BIG_ENDIAN) + { + unsafe.putByte(address , (byte) (value >> 56)); + unsafe.putByte(address + 1, (byte) (value >> 48)); + unsafe.putByte(address + 2, (byte) (value >> 40)); + unsafe.putByte(address + 3, (byte) (value >> 32)); + unsafe.putByte(address + 4, (byte) (value >> 24)); + unsafe.putByte(address + 5, (byte) (value >> 16)); + unsafe.putByte(address + 6, (byte) (value >> 8)); + unsafe.putByte(address + 7, (byte) (value )); + } + else + { + unsafe.putByte(address + 7, (byte) (value >> 56)); + unsafe.putByte(address + 6, (byte) (value >> 48)); + unsafe.putByte(address + 5, (byte) (value >> 40)); + unsafe.putByte(address + 4, (byte) (value >> 32)); + unsafe.putByte(address + 3, (byte) (value >> 24)); + unsafe.putByte(address + 2, (byte) (value >> 16)); + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + } + + @VisibleForTesting + static void putIntByByte(long address, int value) + { + if (Architecture.BIG_ENDIAN) + { + unsafe.putByte(address , (byte) (value >> 24)); + unsafe.putByte(address + 1, (byte) (value >> 16)); + unsafe.putByte(address + 2, (byte) (value >> 8)); + unsafe.putByte(address + 3, (byte) (value )); + } + else + { + unsafe.putByte(address + 3, (byte) (value >> 24)); + unsafe.putByte(address + 2, (byte) (value >> 16)); + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + } + + @VisibleForTesting + static void putShortByByte(long address, short value) + { + if (Architecture.BIG_ENDIAN) + { + unsafe.putByte(address , (byte) (value >> 8)); + unsafe.putByte(address + 1, (byte) (value )); + } + else + { + unsafe.putByte(address + 1, (byte) (value >> 8)); + unsafe.putByte(address , (byte) (value )); + } + } + + public static ByteBuffer getByteBuffer(long address, int length) + { + return getByteBuffer(address, length, ByteOrder.nativeOrder()); + } + + public static ByteBuffer getHollowDirectByteBuffer() + { + return getHollowDirectByteBuffer(ByteOrder.nativeOrder()); + } +} diff --git a/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-CompressionInfo.db b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-CompressionInfo.db new file mode 100644 index 0000000000000000000000000000000000000000..aa6d6e6a102d9d20dfdf2dc252c36979e39f31d2 GIT binary patch literal 207 zcmZSJ^@%cZ&d)6rVeC{ALP!Gn{>~^a zAQC^K6le*G5($tf72H?_p_1JImZ+nYu{2VPvBPL&)B(#Nn%djOq- z`l6ae6=fCWMU|z+HFZ_xwUu=hO?8chOO_NZEo^D2Dy}UqEpMr)scK%ftfZl)uCbwJ zQC(|uT}wqpb4_Jgb#2koy87CpWsOC3G`(K;Ogkpkb@!UpH@x(`-(KeoyK8dzJTrG> zRE5^8_OCgbM{uv(&zal(dHXv2_J+&lK|71B$hJJkc_2ifL-BCZMvA>J)VA6mnN=Df zuKKtw&IKHZ%nlOl@OTq!DPVWrtbyw_xBzdR2&T`<{B<^%nw44DLm=Cgi^+FnzB>xh zWz7M5y+hugI~?RXY?*ZAFE-L=Hy4CMd<`4*5T94Y!qm_uEMK2N%h=35%sLAZ0uS#c z5&XTfd%bj)Wk~o9naqncxUlN`xMW?n1I*bn4hd848O7uOlzvwF-(}8wZ2;8`iC`or zGz`#lgBS@+0`ETx&}(5j+Cq*-htEn}+ZM`caA+2yb3-*nvsQ)1ev%Dm@i%tNCk>p6 z%gWpw2-xc!VSSqZaWEG`%$X-I;L?a<0R(B*lQOja^=+vkOt0ehrBe~J_w4$4p+M(U zWK8Zz)6cW)H2O_!@ts3}6^*ld=tpFCPX|EN-~!wq0_ZWAX#u-q1u(Cb*jEbBXAlv6 zke8BXFcDoqMCQAW3V{vLM8W&31{qYBK?M5xL@q<$WKxWGfxwmkFL(6-71EiY25nA# z(p1T%i8V5UNSep=6?_N2J}E_{QDrqvU&pplb&LoN-pWS8l+C2U2>VgGz79)1b_Sqo zZ~-141n99cPecHE4JJ|>1?IC7Pxb)R3?g3n@3jQ7b5r#7EPy^IIX`IT$@Y&=B)e;B zp1q#UOaOV^V*p%l0MA|kkPF~KkXy;#IhT&-YQR%aCaNdLwnkr0Wscpd&`KNUib@p_1)Z%gw1qSp}Bqa5haWq zsWP)Y{x%`(=|WglubU@U-+r4vYTpY9@@nTyj#~;0@4Y1;p5-)%>R|J5noyIJH z{$wgpPZD?17@%G&*KzGEXN{y!bNG_juIG7Iw6dLQN5GyJ^d>gFKYIHhav)a<`pNV@zQgb4Z6?Bqr&G?CECfm2ni-FkLOISQg#bN9 zB)0p~e1KkK6zXn_8@-iR<>Fh~;32qf90jxIVDxXXx!HZ_j8_PQu@akXaf8_>e167) zT`zH6tUiC3yGbDOnJYD9=~dn3WE%tQM4Em`HJK5h!E#j!+v{pkeo⁢%t@$`fM2XQB^1_)R_&eJ(6~rYGC1cp&;{ zb+XNbJ0OF+BP+fKP+s9p08Lu}LcACPn6w98E(Msh2Y8oE+5@lfPMO#P?+1Ccbl$S( zDXx{-2_m9$qOTOBNaeV-Wzqn97oCf5$*4w)W?M5OjNjr8Wxkz`%}Wiwev0g}Ug8c< z6fR@jBO&UeHKc==ti~XVk~};KuNDT=?HtHPc9q7&{Z)!E=fEfe%{Nep^Tru4P4`bs z=q!jy*UzJaAeye9Aie`)((Uu@NWg9n^4t5Bn+~c_NiX@~?(wpDN5Bd?^s)SGFxd00 zyCPt}ldfjg+x1X0X)qUMtYLKan% zFhu#-jW~}fj)XZg4ot5x2J%hD(r~%YN<3!+QL_?B+J-DE@AF|C3)H^c<)Va+OqlY( z*GdQpON*TgKL6-j6ZV5i8djg-$J5d5VD>65zydCS>YgI4Vt?dk>(N4i5$-JVvoZeo zj8%|egnNr(Vq@p)dnrfEHWo|ncIq!9i=NZ`)I6F|Bs{G@y%Vt46bc!h2Iw=7Lhs9J z1FD&*7-e)dq3k^z5A-y)FS{M_H5O-zK0Y<(Oe=%jdUB`be!`WB-)g)*9CN=CP88jq zDpwodb~+d6nQVWi-UeNsIYR4jXw%a4HugBa%w)oy2C%rFUQCZ!3FdS3fsK2U66$9T z*K{KS=G4#Tf0H@&%OPP-r^uXW*vr%PHpm|6XV?(tOHjYO0%FMJS&6A&BgiX(Dm%oq z5f6#Hr9jndj7dNv?nRy?>Wm<$-Wc^m@hR<>9;ubfW%-mgBFw0NM#`9xZ!3M1k=T#v z?{=&^Ps3ug>6DFj_7r}@y2*?h*wq(dxBi*R2>ceXXHCSRR(4$}wXjEc&5Z>-J6qV) z3%umUvdI}O*b0{hc!|nx6P=f6|7^HW!l>q)!5oxxyzE3YIn^8(kV7@aVyIgNfqF12 zUFTfP7Z&93ny3Iz0Q7O@>LC9(aP^nPW4TE2Z_qAxj_r$>Mg@`7=ybiZ_LMZjE=KLF z!#bk04t6s0zv8>eM(4a9h@iM+GB%%%*oP@r%jvLhQK z;T>fz3>CMj3T{4XEo4ayQvvsJcua95N{tPm*BArSCbcO2R$~1yKrKOx9pv^c9(oVD zbX#nje}nF?PZ0y^RkqpE*kydv^ZtDrDyXh&`A7S9wmCz>{%k(VWl+0pQk)DOCX9)> zm5DBdXy(2oZ!_^piF@En4#mVqN?}h4vkG;B;yGnE2BN^w(?GqZK+Nj?HlRM!Ab8z_ z0JS7)TQ#9s3udrgi5aZ|Ic`_8#XYHY_wK8BDOv7Gv{Pn?gG2w6BdJkNVedbAub9bx zHmX3R{2DTzdWY{nwgOhn^_&Td3Ul9zljmj>K)31ik303O<1$*9p2z(5I~|*1rlp3Y zu@2V-;~m_z9{Fa|dg2RVTI2ayd?v%&4@`DG@-@qP;!9{EtC@+Zp)R)!0af0OErg<> z$ol}*bH)rvX-C(A+e?Aq?$q5|wCkab*2oLscYyPP{agj1P@zZCoMiQO^?-Xup;s0?EFUbU>KNZl%BZ4$Q@3w9oL!itrPpm_Z=>9i1KW7+MSH zyfgMq6t1<1*2O5Xb1iK|+{^qt_f?3sf<& zk&H7hl=Wiy`zh;|H zckY-Soh(cdfz;zRmCfe1a~D!m#MAEig7@~jSb2s5-`RlMpu_ujn-b@Wq;HtUtt*M$ pL8lA4bD7vMv`b)JmrPK}wn%Axq4S9ge*3Z@{;z{n*uHbi{{d%&vg7~& literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Digest.crc32 b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Digest.crc32 new file mode 100644 index 000000000000..c84b9595a002 --- /dev/null +++ b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Digest.crc32 @@ -0,0 +1 @@ +1026070592 \ No newline at end of file diff --git a/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Filter.db b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..8868e5c18008783c106cc95dcb2a5942cf662a48 GIT binary patch literal 24 fcmZQzU|?lnU|?!-Qev>X6N CAqQjt literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Rows.db b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-Rows.db new file mode 100644 index 0000000000000000000000000000000000000000..d2ba4d639b61180cee624a807297dc1255abf3a1 GIT binary patch literal 508 zcmZP)`NO37JAlc^jE$Ko_$O1w&mbmab2etX-%PbXg$}vqCp{XSgNbL7}W=6K`05(G-V-pK2S4$NE4i-fRJ{AT>0|s8+1}0HP zCM`y&9U%-%nG6s+>=~Kr7{GS?2iox;$&PRu+5=9W=dC}O{>o|JnVsF= zJ@4(j@9z8Y%rXokS+~JY%FLBe;Y5VPEF7$GydfMM!#qUt%d`8LutH0=nlIwBa!l$h zzPOk#Fj{!EF~_Qo*<&>&T8t)($(Y1jtY)jZu)t(7RFr-n*XsRh#^6Ho!vL(Kc9wHliXyVi;%toIlH) z7xQO1Wf#hBl-(&SDSJ}(ro58!D#~jphfvm1-b6W`^0Sn)Det9ROSyycAU2`i2iZu) zt(4DWvnwd?$L6T}bIx%xUrmyR>r2D2Wt*|(RDWHjaNMmA+kG5+X(hJm9qi@LWBU$Z zKOBa=?j*K49b4ak9diylX&H8g4|ef0*i}4s6E%MOVsU)jf8KNRi1`1G;v_v4y8(|+ zGX4#UvnhB!vtMF!>0hJz5(ysvlB<_dT>5!6vh25JWS0~>vg?cqdC80yveJmhqi5_- zDDK_09C_u4FY>CvWaKsd_ za%T^IKEs{pc@j5|JFq@6#5wkX`-PMRp1Kd=f?T?&fo6lxqV<(Mro0B@oNoylA6(aq zt}resaoqwf-8qQ*D=RdYz_`5AHUaG#yX`jg68~;ApA_k;D=_ZCCG4VUCUZK4E zHjMkV9KH#?IwHOuI{4cio1nw4>b5>$I0xVhEuS$TPoZJiFkMxh4TDX76H+YRjgZeIYXG%W=c1wJ>*8=SRa`msU z|H>iHo3Q_$SK~WWp#CzxstE1}KBo-bY7qDTVoU|!*VH;^8sD>&%H_z zHw^yR{R;4t$Jc7|w3MA_ur jom`IU8vaxT=d~)z8D{ax-=b?Wgxk&UT6Fa(TVu*!*}lE) literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-TOC.txt b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-TOC.txt new file mode 100644 index 000000000000..298910cfdc58 --- /dev/null +++ b/test/data/legacy-sstables/da/legacy_tables/legacy_da_clust_be_index_summary/da-500-bti-TOC.txt @@ -0,0 +1,8 @@ +Data.db +Statistics.db +Digest.crc32 +TOC.txt +CompressionInfo.db +Filter.db +Partitions.db +Rows.db diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-CompressionInfo.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-CompressionInfo.db new file mode 100644 index 0000000000000000000000000000000000000000..49ebec126f4a4bb6ed7743e7b05d7e31d520a7cc GIT binary patch literal 83 zcmZSJ^@%cZ&d)6T5hgu%WaLUYf7(EJd+48m3b D-$V%C literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Data.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..4b0fa7ef3507df606e3262dd04cfb69b78b4e931 GIT binary patch literal 5221 zcmd6r|5Fs_8NhdigyW=YnI6fy;_UecmSTu3&4#ThkhhG}pOn8k%ao-j0aB#U1iQf;H_;!P?!PHh<0TdVi>~ z&A+Q7+#K@MHF+a-?d`$Fx~95No8R5h;`g|Np5{onHRujDc|zejpU>S|TNh{xwX}!p z-7O){E_Wo**yd{phyC8#K)5l|?rE#_LVKe{&zP{O!KwrM%8vSFQ&1Wy3YbZs8N=zm z=z)Fm>$l4xJX$1^%ptk_Xh1f#=GtaVLh_EdPP6pxb_k00Vv=+eYI)ft?!ZLHaW0i8 zo1|YVm{@Eiau6IWz)Yu+iPaWNEMXJho5w_#k%-l&=O8GYEXB5uWqh8E#W!VBNVI)G zVneb#dKK$_LJ$%%x-(yUSK1Gj3??lb;5am3hwefOONO&d{DF0bM2A2!P$ys$LA2lN)=d#7G7`Ef`$99MLCyzdaaQP>PxGJ5n$yP~+% z_g=?cVievD&NH+{j=$}PHzV5SvQrbo>yX|KVAO8qQ}s(=F}{L9N59bntWfFLc8nG; zNZo-!CzXozKgM7Qm5KE~S1{^YkoqhHg$+utFr~LpjmiB5xN>b1DIxkTWbMjiYlumE zr^)>#6Jk>TI-Dm;wuYGWtEEUfBk`ZSB7Y?`L;GxVkQ zN{P`i>CJvP41{t*xM1HLK*kN=TqW^&rm*z`qPKojTZ`am`@I^46%Xkr7Kz|4-xBo` zVM{vvGsk}cr<~N+jdyZ7Q~gw^O8Y{% j*fy3{htf_D7f`^LgbG>HK+J)6~ysAzR z@uwd=RQy|g@a#bRUt~d|Fa*3M75;{+pxeKerjWW6-di;HMZgjj=e+V0J$xI230OkQQ~rZ+u1@ zJ}f03`?x10jV6spp6m&5d%8ut<~hu;%pVu@z7Ye=^>Vfw7?V~g3_Z)xb2P0gU{qga zA#G|${jeOrG0VEXitLyA!^>YvPnEw6Npl?<%>NcWxOSeQZ)hr*W9TMBx3pJF^oE9J z<4M~++Sy&O?w>vXcXq?Nc_OLzY+{k}T{5!Z0(Ur4A4GH_fS~U-i z|I10dyOPI&SEe#gC4IMdL74dd2SKzxX;h-{iNdQ76F%X17oHyoKM%sg)z_t|A!2-@ z@Y(|76Rs#cc+vKwOrs@G-t{U*?H9G~u^v@^4TFwcqX!JWj=^FolLx`e+!%E(NF~S3 zBQo@ah9}CJAjvR3QPzx{GCrYHvc@Nr7qw-PG`{d-)_8>+NxJdMR-8>=w8{@?dfvrQ zDMK3=dYYz+)ePlRw#TH3?~?9x*$XFW*fvle)6S`^R1uJwOeNy`W#6=&bP&>F>zbw-(d&SrxCYh(I{=Z zV(eZqap%T+bnebg_X6C8dxD(zWF`5SzPqFJ9I<-g_w`PqwUV%oEE27qY2=r3z2MEZ zpjdLkI*MkOOmY^XjQn1&N3$L9)d{}q*`ag`ZUVPwplzw^C^3g1wp2_GWAY00t6(Ua^sfru(cA6wEU8Or-V|AsdqWY{;o&JrP`vY)Bqlu|V=* z9wT|xVOH|sO03r<4}Kd-m%N>02->5@;07p4qp9OILw6Xu%Shhw4@UB;Bdp||Q*>?T zbtGN#&S_@sk2JkD#?UxJ7a6)l)Bb)&^8Evh<~k5 z(A4WUXiX*gv)I6zU;ie$n&Zys-__0%Gxu`R;oiXFaCp>_#qSov#8nF}?pKIG#UGLH zSc~9FCkEGr(O_pLU$5tCzSl*F?^HG_+(C4wk}>#G)ct!acMIBI+ty1B)w0P`iGQx+ JACLWc^nWmKhdBTM literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Digest.crc32 b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Digest.crc32 new file mode 100644 index 000000000000..5f0b313cc8a5 --- /dev/null +++ b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Digest.crc32 @@ -0,0 +1 @@ +1371588035 \ No newline at end of file diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Filter.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..2e1d5d29ca0683d58365c1bda385a89ced35f1f9 GIT binary patch literal 24 fcmZQzU|?lnU|?ckVPs%abZX%^z`&xww2T1&5k3Nr literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Index.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Index.db new file mode 100644 index 0000000000000000000000000000000000000000..ad88ef6efc77d06f68e7b9c413932d4553d203bc GIT binary patch literal 157553 zcmeI*KaW&Z9LDjpK`_grBwCpmJ8O6T0849YDOM)7YGR=cjS($;14?)e8e#*rf9|NU z*MxZi78*lgEC@kN);(u;xD&IxCF)qdxV_erPKXr6LW zF3OdoT%X_F3%PDiwtASkB46Z-d<|Q^YBX<7WL%7kapf4-7k3vS*VMNOf){v!R}8#r z$Tg*0sD)an6+^A=>pP2(YwG(1!3(^=D+XRoxkU4*g<7Z;L#^)TUb%dsAb5cnc*Vf0 zo-2Bj3$#EBv|^yu{dQ+I-)O(CXj;TB?82@Xc2(grC4v@cfmRH(y8D05ZlBz%v|fL| zl;?ge<;6>-y!=xsuUsr;>#I^;eW{e!J}c#o6Q#U$xsOHmCUNQHI z`LJIx_lqzG{US^~pHz#DWJrc&IkeoV#Xi}-Xkw&%!`x}^G26kZ=cEzx(8JF`()Iu%PilNr^2gaoqTRbnG7tiO!^VPW2Vxtynp;ivH zzB}`vqfz&UY08S~1Y-{y8`S}?D6+!{!)u=#x!G^F>_|j z{XS_upHz#DWJrc&`|)9zKU0fs+B9vNHe;sE9-~dT)MDcoe&JUPztmzI5sipO#NK0a zu9|TKOMZ2ZD6{OYMN^eeU4h=o{)HFQqx_xYM} zsl}FY$+%=(F&URyY|GVhjajbyiN}-sv#MNbv5kyIMk8Ym1=V6B7Gfb*4zc=A+Kfvr zwvfxzY-)~~n$==ku9j=ea_x1))z-zPTxzi?7lvba48uFM*itSjmy|0};dYcuEjE0? z7kuU5OD(nm$$%6yAmxB%6`!<*Txzi?7v-W{tKiM!`G2Uzrd*VZaz%c#RZ}iSXU-Kd z9K&N6-l@elC7KdViH~Q3TkW4x&lSDN#kd$3a7Bi5{a?Y= zmVZGT&=a%)JyI?9r9AfpKv}Rmmwb(K)8JCPJCgW0zZMj;mG0U~r4Oe@OL7VQxK`*?yL++4Z zVJN|ZT5QkaP@Y9QgPq|ptFeY$6SdevE>p9qIc92Bi*31Dt})A%lrBqYQ!ZboO1Ur` z!($k(7Ta>QTw|6i<@%3usl`4_Ea>rBetg^wcVqRdM=dtvVqAubqrQ!!7MpTWF3OdwTxzkMD`Gf?$1q$iw&iNM#w=I)!g9W_lX9uW7B?IaKCixaIjf zYDCc@F6Wbsi*Yfo7~@ilZO1$6j>i}I;&HB?YFuiukMmyu0T96RcewpiEw(4-a8C@% zqUuy!9iCS#Rh2a<;!*I3O!x-4@rOSJ% zyWwseye^w@sl{eojEiyQ7}u4J*?g@ucuj3|HoAj1I!}qGWEf9Lqn281%0;;-SB`RB zJw3a%y{lZhH^2+Lz$*q`JIbXNdl>d>4l{>&5+CNBXtmfJgJZ-vhK+4`W2+&TT5KEL z!)$a`&hpA}H;(UasKusSl#6mjezR3mF44Sb9>Xy_hT&?lEmzAmX1SV9m#5QuuINoJ u#s%RJ9)obT*kXCH{1~xfh}BDH6|Tr|uKz2zS~h5R!`=A5Z^w^Ni~TRd%lBUZ literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Statistics.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Statistics.db new file mode 100644 index 0000000000000000000000000000000000000000..2c3a57350c2c8efd1d7dd9be8f3d2f18a7bf0d82 GIT binary patch literal 7046 zcmeI%drVtZ90%}w?rqCsP@%v!x(wX_hr|?wF(BEfuui}%f$`YzvA7Fu2FnJdZ6?E+ znaTbk%5ZVRC40D;5W*e}pbpqGnS#3MHlm_NHcTp-8LciGGC=CBrP=R$^pC|qjK+JC zlY2k>PA})2d(OSTw1OaL1b<0T4os=*l`1NAyHX`VFir_6b@q)Wxn8cSwwbDAheNKl z+GUfq+G)z$=Cl>rtv0*OS}fb0HO`v4TASV6)c8&Q$>{?MZI3rb)Nfj?>kC$mQ#=OE6(oI^Q>an^H= zq`ePt|>=r)6u#W=pb&q!BZFy9YlwD&=2fF8{S0EeHv{XLO+y(e)urjRDsSsh|W8M zE{;KOh($ZrpqpiMJGXuZEg1L2&Dy_@SlllVXBB1WgSbAK=W~dQrMREPkI+)ZC(vG_ z!S%1Xb{XQ@4_d&wpF6?9rCxBzj14?zCJL;#;(CmAz`gI`{EYO%e6rApJG4DB=f48p zvA6?#>0&Cixa1T(U&Vd;Pib5-Ho*Fj`qqcgxTYZ_hg$pWFpO7MSJOb_LA~yA>JZDS z+thR7`e8qXR~W9+c!X3qKpmBt2npbE$F6R^2NI4)G1f1Pf}-Z zc<~Um<*U?n)Fp?vWKdTnzWX@!*2(ZO>X!FA^Qn)O?q5v(aa+@F>hX7mRuQwL@X9{w zs+BA0{%4)Vrvm%m*Nk~z*VFwk+T7Pqlh2Ez$8#EqB`0(5A=V^ZF%(iyOl+t3NBe8* zUi$tBdL(Krm$VOV>uP+PIJD@k`1M~#-lXG?yjIX-Anjx1=6d=*i0v@< zn@BwF!!ZNBUkSJJ3!6#&(Q;2B%?C2le;Ck{xOw=y{^yBTA8;L~{dvZdok7=gZHcA5 zm9%fF>>V5>KJ@j-nReoHN8fVM{<<$F4`tqqGsDZ5r}KylH{F@qdC!7l_8gzVf6l=~ z@)_Uh|0U=0zw-nkjT=AgaV4haTrfdu&IQvU{Oz2}e;GCBQgbe{pqQF-b(853{@I+1 z?OSxBGDZl?^$5B&$Ez+^#~;V89ODk<`wd?W^v%5c#Oe!c;x8$-DwXBlP=Xf%Q3iRc zFRtlIO?@maeQCzB%&ctnOq~OD9jJTZ-~BxBKep6|mHNO|Us~#m`#+Ps1ah#+*^|lN u3`sXWOHsbrO#17ew<=9rWV@qU-fSu_dm^iBTfL3lcT0guTjkC7-TZgiW`T16 literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Summary.db b/test/data/legacy-sstables/ma/legacy_tables/legacy_ma_clust_be_index_summary/ma-306-big-Summary.db new file mode 100644 index 0000000000000000000000000000000000000000..0c575b7c1bf65927bfcd8400c08c799a2e867ef3 GIT binary patch literal 47 icmZQzU}#`qU|T6Bgu%WaLUYfB(EKG3TG#~u D-)ji! literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Data.db b/test/data/legacy-sstables/mb/legacy_tables/legacy_mb_clust_be_index_summary/mb-307-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..4b4fe735f53f800e6f79b95f20b577264b180446 GIT binary patch literal 5232 zcmd6r|8EoL8Nlz7IK(dOlBwxP&~#zSBvMgcCw3BNr9PKcYuZ)CbZSdhZ6pK-JKt;* zo7j;chVdm$TSfyafo)wN3r?T~lC3343$e9TB@I&onx@cBC}h$E{7}KvRVh+;d!F}v zzW2@e1M*9h&+|U--SfWpe%|-af*=Y#f@tkjRW%0xt3t7`Z`f)rEwkCnD=HmT76Dq+ zW9qKheo4rQ#kOtrMm*btbvvS=Krj?+j5aq0x9;=@!`^6fpuTmdr)_70x1qkl+vbl% zwrvf2qD>uJ8v{+vt>KnnxV|IW*tEmn6zZt=G&i>gJ&oS&5uY!xt+Az{rM}hE*}TgaIO2+Fo{nsf~Mii%181{3Ya zg><4~BIo8XvD`=$AUJXbGb@Zt>^+Hzm3-od7ckLbBx3LJUIw?R4($QnYmH3GpBOlUh^ouT& zyb1ZD#)8X4o_Ani)JUj89y1bNEW*IJ5qK#F10NZIm+xWVW8)I{T~N5~l95p}>%!92 z5-c5`lTS~-k zzi0|zlWw;r!qFf+OxOrov)|IM+8IM1Vs~(d7=ed_*BRKTBziZ)^vh*)=JbVO&Pi_# zMr~Fx-5=%o?No6{87Z~*8C7IJ`01Dh_Jc*aeJdUhiW9|h#iHaAOs?dU$scm@c8*1< zdAKls=II=^ z@3fhv>-Ug^d|W_ra{WHr_V-ziZm{%uf}sdJZkakQ{WNn@ z`iG#-!@)_81{6*IvQC&()L%Gck*!BE)OqPEQB}`U`EL$ARQ|hlFn1t-cMc>c1%(-0lmu328QT;D*W0!3&Wa_<9@#isS-2p<01iotR|u@qw!eB#$D zF<8kb7C=l|uE6kM8*(MSrl-IPgI6t*{DGjYh?L~>hk3~#Kt+b+-`+;j7w@-sIJ#@l zX(cJn@OX{K>l~U!c^qR5hijR}X^q-BA1Cu~vx-BzxsU5^K8dJFj;*Ko)_w!y@(S}{ zh>YmFg}lPTVcsKyLE+LVS#KwKK_TCmfV>@?>-n2$nc7i*T1m|J@UrLEFn1;eE{>i? z(qxC43pTKg-(SN~2}`SUIm+YcyV|p*eq%vLQo8%J6a3~o)9?M?2@VVAdnv#F6-M`0YYBR@k8Ne4h%wGe|HIJnC10Is5mDE!^pl<>Arh*RPJD#_L# z8a>hYWbwkoj8EE+!~B8q3t&9l{qamSM3qk#!}~jT+$ZgDD1q{?EK8S!&O2flwT)}l zV}F!>4+iaz88u*VF9yq*Oc0N(_hPhSQK}Du(jyAggo-C!ZsfG`N!KRiwDQSk9K7<$ zXR5Siky5_+0k3>@5%$;5_3G6~y7DjNaAam_<2{bU7!H z%(1e^$!mjb-`9qa^z(enVWv&4&*5CI=S|-}bt)+A`kkTn_BvMWrfTL=r0F?Mbib9B zsEHmYYvCIWRzvzw_m{GL+g5T^fTSPl?E>4@4v*ZJ>W3`f_r=UMmzH$9_I zOrEh&t>M!;b!=k3suYqXGk}o&hUV|)A{&~&n}=*@esW=v<|n_=mPJbQz2m&*19RA4*L>g> zlCJsC6^7)Yt~NL$_4j^x=yeX?@8Gz|M@`V&4* z4!{d2Om@2YgBVi?`Ct!{Zu^**Z5#8kbji)pCXTpw`6TR)#=w(Roamnv(du;351iyh z|711WcQS_~Gmm*J~OG&7ZXn_i7kPsqJ@C6{sX8=VsPR*ZL zK=dGGKL7;?r9%`^h%kGvv&;%va%@NIIpjw*auGI@xzByxbH{ti+GgoKc1^v$;7MPqr>zy1aeGm3!bhuoi~RgE_QrjYe37Rak{p!)8@asSDHDh0@S9 z?tkbC#6S$hh7GYst&3QNRak{p!)8@asT*gfv$K1~b*cZUD-=U96dN|hnsJHa9Z5Kr zaWoZkJa_Zd!nno`6|oDuuq%dL)wsrzdDKEJ)QX|jH@nl>>6USgCz376mSjt^HSA=o zr&Kd8$vop?T#PHmxVl&FPjBz;Dc9v5X0FH=`66G#manQ@6VW{7qFj_KN4efyDA$gV z3%jrjyJFbYj0?3;3$;)yhFaZ+_ouT{E##VPiMB*rqAk(Zu%oS>Qcbx;^OTEnQLY^2 z`t07~Tye79!^{==B46Zd*zz@3t{rnC<6>NlE62D#zqbgvwtSl)c!3vq#lWkETw9b2 zwNMMSVyM-9b$1bRZTUVy@B%OJih);CF3~({p%!YzP^xAmDcO;l=Ad1 zr95}Flox(1<;5$dY=2qGOV5|`@~5S|da9Jy-z(*f-%EM-wX!yvo_Sj>wpYx(Vm|Cw z%>5$FLB9x7&nMMlBN>t*Sq?4dwb*-KU_Vm6VeT|{nmc3W&L&)5V?{00Lai8T-8jE^ z1%IS`1G}&byJFbYjLZ2XYM~Zt#Zc?!zs98&TRbnG7tiO!^VPW2Vxtynp;ivHzCAyk zt*OP1G31;l%O^ePH6(ZFcz387mtVc+i%q#G7v+jlF16U81zMn$1Fi0hbBmBmEjD<87kK5stDY;W#YQdE zLaiKXbziS9LN2w~;00dbl>@J)T#C+|D>_$nu9$PK_;asZzEBXnzze)$;5An+wb-nTwY$QW6B-@V4 zYQN9dj7u%Hj7!EPuN?#-$b; zzwisca`>ed8?g`zv2uvjf6|(9jn!hyxMW;1u9%EVEw<%qxyCHlUN>AlXbjqPCk}ey z%^h-w1Pen67Sv*U7KidI+8OK&hgpp^%Qa@X$`_XNg?Y-Q7F*nK z)VRT7T3$@<#%k_Hrxx3cNIFSppmbK@iVWv$ZmZec+-LWBXamWyYO%L|c=m6#*hBj# zg&COfOdc-xsIP>n#rD)3?x{gqq&>+sqZV6OCM*+{Wrk(9MryI~3%~FyhF@y2Pf~Y{ zTgENV-%%rq7I8VBWL%7kam5&yT5LPsQFlDP$QMs?^;F|hi+z&+0tkQrp1;HGpK7r^ zF^79%P!?r}v1Xcasl}FY$+%=(F&URyY|GVhjajaAcUgCNlE62F5ZA@otrNL{5h+Z;7#LhGR*^#bqlq$WW1bDsB9}v;ATA$D-Li$;mxG zzSDBfz2}_!1qdMuGF774g|fPeRe{xwtcrw0s!4p6WsOd%mP$&^x)RB1l`2gZNoOjx z>C!jY%sCd5*3N)}t0t*oljY%$c;{*rkl>_heik#4tpGANuXx81F$_OX{oJ6nW~ z6BEjziNRnNZerC_Jiiwf-k$Nm!p4P)Zw|g6Dt!&_RmEA&*^hGo=RnRH&LNz`IM3od zoAW%*ahwx5FXo)Z`EAZ?ID4KgEZZSiyOXm6n{v;CF5==g&b`>eOwOCJMeh5>-8il& z#8xI@tCnD^x%v7%#PNWB>_8XxOZC{={n#_!#f})jekC6J)qU8yBJAWn*y+cybEjjk z4#&2>h20=wH*@Q^--zR`D6gmX5sms2#c5Fi_8z=Gsp~3=3;B3I3umy!q6?_MLV?%6 z;_^8ZSN_n5tQu)S_RDu8`%jvYr%i?;YfN}Oh8TZE@v!b0$g^%nAkTK>AWNty; zH1`nl*^>#-!u+G?d=++RuE4k$QG?cp*uBah#uYXGOQDtTJJ5VpRb@3WuI_S) z%a_CbPg`=2`u4x48+CuKf%{)DJ8vEXpBIJ>FRcX@ZPd99SP^|un+-iaz7akj zp6350=bC!w5t7KwAMJ4#lXEU~gXEkG-461vbFQhck#jCN=K>3g$~jjDxET7wg zRMDQ0@Oom>Lj9uTlvKGT-vfCa$a~@WJ`dQ@#f~lcu#yjK`K2YlxL=s$#g}T5$gu%WKLUYf8(EK?NTG$u> D-lYi4 literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Data.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..5208946c1d29206795769cbd5ab863d00e2fca18 GIT binary patch literal 5215 zcmd6r|4&p`7Qo*F24SjwvMH@#nuiVDK)2}|W(F8qJ8z_BgSIKC=_+lyd~;xCWMGE* z0!&jF6%|SAqBaqCO*-o8P)$|nrd90fjNMe(?yl$$8;jkIb+fMJhmAI^u_X5Oocrd@ zyO;V0@Qd+t?z#7!d*^-bnStYYZV$(s`V~cq!v7Vnm>U=~naa!-Yk5Vbt;)#3h_X+4 zJGxEaa-z}p&Tz=p-RkLW+1T9`4hI5})(&^LWn;t}_Oy0+ntR**y}mYA(Cv3Nd)nJW z-lk1$fl!yTqcyNG)YjAz3AA;%dYfF`?QTzVSEMZx^oP7IZ?Gre@9c1QxLSKWL0_lG z+wEiW>&D7UwF(Mm+qWMoG zctn=ZMPYN|ScQa1N2MK8WK*f}!<~cM?1^fZ1D5G0yT^zlQSHXa$7GGeoWmgfx()-M zr~%$#5Z54|))Q9SFmOgss6w956J9OCz*#+E8SMiF;Qe1lNE-s=`{WYZX0!in7d`znUk{*6D!BCQ+3p6#2GV}pM$FNMR_Dtt6 zsquVqy2ibOq((hsH$C0ChbDIqlJZ!pV!Pz-rH8$Z3^mc@uVH8vL#s(qa-q6zpHp`-X4wC*VSZRV z_8h-Uh>_cc=j;aQeTUuPJH&H*SwCz9@1h2B6`jb!gW!e5S?_|`LLy{()!@S+O>WhL zfhjmPxCc72-_=&_kD?ACxC8JeaHuyyt(+LR0_QE7Z{(&I=A%Yw;3`HfCO$nMRFMWx zW7Hbci^u?h&S0>dO2yy=1}mscoc~S?qm^?~4`R?ZCv|ubqQdizUBZ-BLnE%c6W>|$ zd1B3xT1R4dNPR)br4W}wX0pFjLR@;!hs#9Cg#hG7QYQm-F%+Ok95K3s(s+8Ak@~1n zpE&Y5oqFJ%fSXhp@P11q9N&|iDZfof8r>V916T|z2M564QV#iUp%Qi6ydmtxP* zV}D%CP##V3`#@slxi}HzIyBKoQ^jRH4Yt)aGXbt5QLMssLgpR;V`!@6k z^%gi)M>_NWezkBjaU6wLJ%`SAS&71159$0ovE0_1pL9>p;2ybnyd>mFtNZgPsE3 z>v=f&<}d~;*~Hxt6Bf#_Jq10!NxI3RG~a@L!iatg`U$j9IVJiRMXczzBDkI=`b;B| z_VUg&G1N@c|7#dp#Skh-FaenDu;Vs+_uz0yXi(^Svj zK&u{?R#wq-S3XZuO%amT&1;r1V^7iaN)AK03>g`EMExiWpQPsxKd#``M&N%)e9U%buBxXRf<}# zK-ClEH&LqqSz>j{b8A0XK|EiWFPg6FYZ1?D?>yA=^%vk2!SibJZPqEkL-@&JsrK$% z%h!JfhcB8JX6q$_an3NUyKGt1{>9>-7AC_~}I=Z0Zq-S39o z$hzMRY(q~r?49FxOON_er2MY^mi4=(7uVDLZiygiez&|uQ(GfLO$;^Dw5f)nRSd0W z{O&Z7>NLOC!Bq~wo6?=Qvy7e_Sc0Va9VlSN3Tf)*7~&a%qu2Ksxy{fwjNg$NQk~}a zI_N1-U0VOXDSB?-6`Ho3M$-J=a)ue3plN%Ip@TXV)e&nq)K7oDwTRM3=;C0Z0gJQ)3-l-+4zOTDZe5bO} k?5~LKR5D%y>l5hvOH0vp)I<`KvZ1{W|4_%f{<}8kCqN5*b^rhX literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Digest.crc32 b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Digest.crc32 new file mode 100644 index 000000000000..3f9e06243713 --- /dev/null +++ b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Digest.crc32 @@ -0,0 +1 @@ +1373250029 \ No newline at end of file diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Filter.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..2e1d5d29ca0683d58365c1bda385a89ced35f1f9 GIT binary patch literal 24 fcmZQzU|?lnU|?ckVPs%abZX%^z`&xww2T1&5k3Nr literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Index.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Index.db new file mode 100644 index 0000000000000000000000000000000000000000..f1bccb20c57192d8c7181dfe848628cf7c5ce3de GIT binary patch literal 157553 zcmeI*KW`jW7=`g`g(Pwuhy)c9qNB#U>m*J~OG&7ZXn_i7kPsqJ@C6{sX8=VsPR*ZL zK=dGGKL7;?r9%`^h%kGvv&;%va%@NIIpjw*auGI@xzByxbH{ti+GgoKc1^v$;7MPqr>zy1aeGm3!bhuoi~RgE_QrjYe37Rak{p!)8@asSDHDh0@S9 z?tkbC#6S$hh7GYst&3QNRak{p!)8@asT*gfv$K1~b*cZUD-=U96dN|hnsJHa9Z5Kr zaWoZkJa_Zd!nno`6|oDuuq%dL)wsrzdDKEJ)QX|jH@nl>>6USgCz376mSjt^HSA=o zr&Kd8$vop?T#PHmxVl&FPjBz;Dc9v5X0FH=`66G#manQ@6VW{7qFj_KN4efyDA$gV z3%jrjyJFbYj0?3;3$;)yhFaZ+_ouT{E##VPiMB*rqAk(Zu%oS>Qcbx;^OTEnQLY^2 z`t07~Tye79!^{==B46Zd*zz@3t{rnC<6>NlE62D#zqbgvwtSl)c!3vq#lWkETw9b2 zwNMMSVyM-9b$1bRZTUVy@B%OJih);CF3~({p%!YzP^xAmDcO;l=Ad1 zr95}Flox(1<;5$dY=2qGOV5|`@~5S|da9Jy-z(*f-%EM-wX!yvo_Sj>wpYx(Vm|Cw z%>5$FLB9x7&nMMlBN>t*Sq?4dwb*-KU_Vm6VeT|{nmc3W&L&)5V?{00Lai8T-8jE^ z1%IS`1G}&byJFbYjLZ2XYM~Zt#Zc?!zs98&TRbnG7tiO!^VPW2Vxtynp;ivHzCAyk zt*OP1G31;l%O^ePH6(ZFcz387mtVc+i%q#G7v+jlF16U81zMn$1Fi0hbBmBmEjD<87kK5stDY;W#YQdE zLaiKXbziS9LN2w~;00dbl>@J)T#C+|D>_$nu9$PK_;asZzEBXnzze)$;5An+wb-nTwY$QW6B-@V4 zYQN9dj7u%Hj7!EPuN?#-$b; zzwisca`>ed8?g`zv2uvjf6|(9jn!hyxMW;1u9%EVEw<%qxyCHlUN>AlXbjqPCk}ey z%^h-w1Pen67Sv*U7KidI+8OK&hgpp^%Qa@X$`_XNg?Y-Q7F*nK z)VRT7T3$@<#%k_Hrxx3cNIFSppmbK@iVWv$ZmZec+-LWBXamWyYO%L|c=m6#*hBj# zg&COfOdc-xsIP>n#rD)3?x{gqq&>+sqZV6OCM*+{Wrk(9MryI~3%~FyhF@y2Pf~Y{ zTgENV-%%rq7I8VBWL%7kam5&yT5LPsQFlDP$QMs?^;F|hi+z&+0tkQrp1;HGpK7r^ zF^79%P!?r}v1Xcasl}FY$+%=(F&URyY|GVhjajaAcUgCNlE62F5ZA@otrNL{Ik{4fR7={90SjVzc{W@ct}*^mvS-bx#u|IrVNAB@I( zl9PLWeNHd;+$1O-dD&x>Ko=ASA>HX?4ymCb?d&ta6wtWtU5? zwL4{#y~=IM-{E!?IqeRo!)}wE?rL{+U9H1uZfg9_dOG$cnpKmrYj%oMZ+{@Y~ z@4waCCHA~OqaB|ai&DbvO7$22-=&4O|9WUqt_wm7js_1c{%4)&KaCnb1vZgBOm-8r^anFOT;^J=3=h5OK&U?@j zcYf&*#x-SVZ6;c`1|7nUH*^}~`XO|L7k%GewDC>!!l%*kBj^Xx&<`F#o66DI9q9aX zXxjqx<~X$L33RiJ?&Rk0kOkx3#DM445lj39;;g(B-GTFydB23XSc2Nf4q%+^=I zyO*5+e|jOET3m4!_E+(M;R_m<;u~OoNd23_Xk60}ww7A^>?n*^S6AIY;~{;XN$N1m z`difDi36~nBFl}JX*^0Q9HfrPPKNos$Z+Qk8c#U++I8xsD+_w4SA4Z`HFetMs?*fD zn|B?iwtSPmk-GRuO%`=!^1F{xZ@(KkLEZ9EmzDZt$-!mRpS3r&Qcu1!vYwcwL{{`u zSFT$}*FWpBoei#ke>diP-9XpB=@<{*C_A`xd6YGnP4myb=o@@Duj^FTg^mRJ^=qm-iM$&(Q++0uJ2XQCN z111tr{CL7hpI6dNYhg2qKV0Tbrujft=8uC05;u=t8+eiU@k8CG==D6~&CR0oxwhES z*+%*|RrC#w6CeI|>|7`D`D4es>GgUpq>g0I$C>e!i!=Geg-N=(hU;0CEV7u*ivZ|7Wdr%`h*HRmD|im5qQ54jz}Kbv#0 z14}#JY8Aq-Sp{8&%a&I&{KqkW%C=kieZyaa*Jf@#KDKN61%G~N4}M0DOmikZ{gI5! zm07E@b8^*|dJoijpss~~_kN&kU&^+nZdU4st-iF>7x#ZAc?o7+cLNRO7)av;bFP0+ zL(ivpzZp}GJ%3|Z5K{j7-(D4_8rkWplDC=4N*~K9-BIsg_uNxp(sp^T&- literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Summary.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_clust_be_index_summary/mc-3113-big-Summary.db new file mode 100644 index 0000000000000000000000000000000000000000..0c575b7c1bf65927bfcd8400c08c799a2e867ef3 GIT binary patch literal 47 icmZQzU}#`qU|T5hgu%WILUT`o(EP~|T38zZ D+)oIq literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Data.db b/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..a6a94e6b5ec3b3d079087a1576a85916c34c95cd GIT binary patch literal 5206 zcmd6r|5Fs_9mn^OLpU@Go#F_#T}+r3JL&9kcfgUD+pBdPOxi%=bf~>_;5-B_9KvyX zIk?2*C|F6-X~>s}*mPPA${CX=kXDH%aHp9;lSxF=WDJQJ%uG87AU$Kzf8ZrRruZf=)-a$|Rs9BFCniiWm@qg?@SQ%9)P*WnEWn*yy52O_PJ zNZYm@-W|b6YoKXcsI$}OePsLg&Tz2xkzjLMS9ePovb|A%Tf`e~3(4W2EJuO?*&k_b zYz{YVmt}vTt*xug7xFdAVP7EH8T9%)p}g_3d(2qWaQ*IG>tFblWDbdAWkCz6GixNX zJ-&OF)OW=N_IQ~@A`eQg7q&{~@G9FyGoM}&7qf_eaf2z?%Sq54$mJxnI9!U6j=h`~ zX_CysIt(ml0~2`|SiuJ7V_+p4c=&4!bh3drz~c{9Vbw!fALrnckIYGCS+M<;mNV7G`@k+K zxuL}k^Zt$0gpND8!S9AL?Z;G$bDPCYwQekQVI3BFP;&2{A(h&X>shYkrU!7IiFDE= zFW&1!V~WR|VUyb|RG~g$%zA$@8Yd0o!E7|%Pz}t;LH(u?@XPCHOcLW@jXSyZqM;Mw zZ<1KILmf_t-b1A5sS;8E>?QY;6QpcCV*4PS?(YJ%%AWhUzAs{e*w%m{%6Iiec4_QY|^_MgX7V!X_)9tSm2 z@<1NEe!(`Mqfcy#Tf_tT=-Pj1((_@PiHWb#bsRQ|$bijv1I=-_UB1ao)4Ib!}7n&VFr_Y+i{R4GD2zf~+qckPu%R!)fB= zXh?|1hft}>p!PEAkF+`&XH+k(rdk*Uhk!BX=~{9mXpcAztiyIy$a#_t5^6TT+p?^}tI-~a7*A9ICyse{GYQSke`7Z zmG^n?V%`@3MBsgNxi&50z5r+e?hVVdY(#AtdX5hL`RHY$S8l|?Wa7;$XjZU+SJA69 z0x@|WJbr})t5cvUrA#xZR~q^}%XZ3iLER8(wy#ywY>!AO)B8Aq3O7je1WI-HqI{i| zCs=uskvaQVx!;fpG3P}_4Ks-693Vxih>u9gYnxfdR}Rq0D<4OtG7dGae1aK#l2-X0 zloIkczbcF4`pJ>s8MkO_O^Ep+I<(;J;}EK2T(C86fip;L?@-_uRu=PuU(?BjPfQ27 z^``_qz3`{_B$afixNrj%Ef)TaR^KmW)H+7}m{x1^DAchgxStZ%JJgPSZt@o&{9nh0 zZIhR#`@gG~2;G&h12Rex|527AXTbHi5HGIIf(%9Ge+SGeMQ9e`;(l@)$b1Zpp|>Ut zyS)c&0{gqXU==fz7(a-Kc}RO|ZT_ujPu=;pO8YgSUCHB%mnkuVdur`>7U7<3Bv zYz@oxiII2f7`paREMCn=KF85?6dN@l-Hsu2%NK=W>l7-eP|PPCG*^N-3E$2*1W~*@ zdr=#_JAb=~cV{2NdkK6kz+FY}%xH{Mpwm$ce)5tY`*0Mlc7#ewm0m`uD?i}u{3dci_aMNZ5z@8b zhcoTi!|#L-Rhd6VzJea&C3BSkpQhv<@8|(42AuYV3*`$Bp_m<>Sty3loeXq15LcB5HNg`yb+ zleb7vIRV=DC~5<6-(#qahVI*m+5o({VG-cXUUgTb8+zSaEa1)CFkb__*@sF)v$dL0 zn;CU4qwb?syOmL8jIuF+x7$f|+GX8yhy}deLFa~6qtXBm6*6N*j9SB}@6js4F$#Vj zW5p5p1t`FyS4edl;GUOQz@t~`+~_=`{)I{dJo;~XEH=Za^NhM+s2|mmTlS^t*`~*e zIqshSFJ(Tk&-ImX~`U@#lO#)X8-fm{Qm$WiiEfT literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Digest.crc32 b/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Digest.crc32 new file mode 100644 index 000000000000..44c7c2710e14 --- /dev/null +++ b/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Digest.crc32 @@ -0,0 +1 @@ +2461228597 \ No newline at end of file diff --git a/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Filter.db b/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..2e1d5d29ca0683d58365c1bda385a89ced35f1f9 GIT binary patch literal 24 fcmZQzU|?lnU|?ckVPs%abZX%^z`&xww2T1&5k3Nr literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Index.db b/test/data/legacy-sstables/md/legacy_tables/legacy_md_clust_be_index_summary/md-31110-big-Index.db new file mode 100644 index 0000000000000000000000000000000000000000..0860005fb9d80cd863649b7c2093ba2d7bd2fe11 GIT binary patch literal 157553 zcmeI*KWk)F9LDhz;_i}7G@z9rcGe~{v%5)aYw21EwyvF8*oc1$-+;=#24%6y*8aJp zVz1!501HKe#j>cNLd-eI>|G=vm|?ijjyJ(MQVr2Rl2vqw(b0^&5Npj=Tpx2iC%{d2kG^ySuwsg;iLERl{agPpR$cY`Zjc zjrt!t0x=K+v0+23QR^C3VHH+k)v#IBQ|k7W>Fmmhab53!>IlV948?{`v1VN2cv}*- zWo%8wY|q_UpBvZ6rXqG>7k0(4s~XoxGLKrQg<3Jx`r>Fh+h`frXe`;1Y)Q5xTfKc|jH`R$&j%sbjUHx>$QSt{U&EHKs$65yJmsQXlq*NMZv8%;t<{w4 zK*)t%*o9p&>}tk^TBwCus1-x4?(N^Ev-P&GIG%{ML|dXQ(blk|t)5a%xkU4ni*iw} z9Oe4({`7~V6Xn|LVdjW@kuUN!Z279uyg89^F)qfHV_YBKpNCu%PZI<$@B*(Gc-4?= zLb*^2wNNXDTHR;&<{{U_^8~>Qyud34UQM|~^QeVds1-x4?z>*OJWvq4zze)$;8ph( z-N^-7paohn(CU7=H=S*^uPd4su?xGfD~4TFxJ-$l1zMmL1Fi0_!|CCr(@N`&*GhT( z$5NiUS;{ltmh$X=DSMxm^4!yi(o!Y$QW6B+H@YaV_?x_N~WVj)V z2F32xxpJw+#xCr_t{iqX<5G)lPBbT)6JzGY?w!r~zM@)e%0;;-SD12%<|!BDqFg!3 z_0iS&zT()^RKW|pz$*q`$0?UuY;&SH(VUnwCw8A)orhd%vB3+xz$*t{bze~}Hfo_3 zYUNO?`+Q>_a;e1zFYp4d9C$V563vU|Mf0Nhm}tKHzE>^}6a+8u0Ga)B0T zfmRH(x}P^rt9}kli`a!-*cHRBDqN;S&;l*cih)-5_vPu~+W)M@9(}d-t6FS(k@h0( zMds{9o^_^me^MWQj3jW_=R6F{8Ec;L^L8A z5qpoxIcmmbf0A)AF2)sOTxziy7vo}FIiA&@X*Djj*!YEC_|;Qk=tpX?5eu;pYv`Oh z>-?H=sl}FY$+%=(F&URyY|GVhjajZ|iO0qLSye8z*hWSpqmeO(f@-l53$YL@hgkiY zHsex@E#xvao0?;$X0_OstK}NATzlPcw71`s%fnPD7lvba48uFM*itSjmy|0};f|F{ zEjE0?7kuU5OD(nm$$%6yAmxB%8E0BUF16T{i*iw}W$?>k8hQ}~mEw<%qxyCHl#mrf&@>}Y@qC2@57lcE248ql7i{-`gV);Y~tHKo-&LzIt z&Uw#gjohDG?$1?=J^uQMH`HP?Hpa%-vi~60{YkahNQPucmP0bN*d~B;PXIX|cW1gf zGk+5wkOqM8xf6&M#MxDZN_EKh;cD4#uZ~+YOxs?<6>N?#-$b;zwisc za`>ed8?g`zv2uvjpJ~mwMryHTTrw^hS4_sG7Ta>QTw|7NuN#g|8-sSd6RUG;o@?n^ zu6hKb7JJpNFp8nrYJSFR$Te1rE#xvao0?;$X0_OstK}NAT0g0!1uxzeREsU-5^@Q-64_rvF16VBgI z^rl>DvF$5jIEKeCTrIZcYPrTN*PK~y8E1N|Txzj}TtY4(*D`RknAcE?&A1pB9tx{|k;T-V*{`^NW1V&rys04+%Q7mhhJ&htNQl~=0_l^Ex{7d6bGTHt0 zo0gf~nb|K3$8jPzT*B#r(7J+Fp4QE@3LF=b$we1i@^w;)lviNZa@SKbsm5f!4y%>8l_F=4H?9Vum@kGXx z7>6;AVjRPGHsch=%NXY~cHdiAY9Q!&3*&BV!rTusi;3G9U%=)kFy4+WFuyMx#Bnhj zTM>(`oP(`m#_RPG$GtnSeVo{1cVcS~V~_s^JGcw`i)id=hp=@y*ztAPN$0WC$6>Du z!nQ8Ou8^=BnEBgb#Bpcnh=Qv?k;O&l8aCzu@cBV@KDMc>6tt?TI0IH}cl0 z$B{2x)I;;r&!YX6->z3N?(2rRurRitoD7c$LM4r7*6tJNlv3#)VIy z$Aq?__2iqQy$<7kLTWp7Kzsz6&l5DyzlZUV6MOGNPnnt03_bnN<+GuquNRzxPF%I6 z9@_Y~emQi;p|x?)c@bykL9c(|+Xr3wYhyC>iOd62p|8}I?}6_Bv1=hPiS*5Fh0a^B z0Iq-1n10r?{@u-pYo7+Lf8Ol4eGYt|?>#xU3|O!cM>Vh*eodPSJut8t-XF!Ysv7wG zP<2*wJP0~&Xw!js z=6341k4BGVjr|?K-|!Mw`~LPl7merVSm1LCdR?}T$EvKr~>WF zbL}0y!1e$1oNoZWaP(Lc?62cuR9F0HoM?Ak9!vsGUH$as_EB@5zTrLi?Hr5;-*KOQ zE;-lmGmqn9nDL`EPGfS;g)WesbD_(@eQ?e-{4;XSCFfjVLJ>LVY6h2s`?PW{vVTgo z!N{rdZ*a;OYglKZ{>`z=E^ejoN8HsjHp8BpSG&~hcc@LC&%lw0r}g@;Vq#~;&5BP* zl+WaAAkPDNEqvVj0o}gnwk2;?@`f!xwB!f(CzHH*vaS~+C-gXy#&L_>uXE|^6X?+t v@0B)kU4JWmLmK(+f1Bm%)=Cy@fwV@KowX<-Yh#I-ymw2E2T5ugu#9QLUZ>(X#P|PEvy3o D+@}bx literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Data.db b/test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..6fb056218cc9126f17eb56208daaf3025c4d0453 GIT binary patch literal 5207 zcmd6r|8G=P7Qo+Y`wAUoU#pgEYnoS*#uT^urZdxaX4QGmE=vRr6qKY;47Bw7x9POg zX{Rggu)-Z}Tp{oM099LIAnbG*Gr)3h-BU*mGPZKHO3zQgG%C@gY&Y#fYeyR|Lh z?IJfn9ByiA4u(2w8X6lK+p1ezyXu--f(^|zwRM{-DueY^EmhTR{_@J|kgqLRMB{LqlsVQAc1m%t8KP+HTgBy2je`qjQ5!&R@e7}vJAk6dxa!$47C6ZnE!T`D8zmJ{M>YL$A?vCd|-Qiw1qY+m&c>tqLo> zO{&0%8ZEBF$haQiR6$-=qC%z4V|s*F1!=X4rQTeEr4A|TbH_=o&g14P-slQ9u5u#2 z${A5!;eo&+=gL*alZJ)lmD!ke%366b@;mWakg#Sx22NWEDadCC53N$8Yit%rB$j!-B+aQvpHBbo+kCh0aCY>@ijet-9?(7iXv%ulgZpKOXR0w^lQyRX4(Slj2KVqa7eIC8q&jDOSn8kX3&>*z%KhuZwstWp;x`eI z_APkD&zO!D6( zK%axI91suB(>@h9CVnGc<+M+r`^lkSE4u!U9(ZJ^Klg%7viIQZNnX>=68Vpg+*baN zvf@cKy{|NQ=90y zQ{_l;&*iBKW^6M}KRwFOa)wqg^cX{r(~?Wm%io3llhz)QPi^h7t`TW^=Lc{FnZ8du z=sRZGiHjPBPrIm(kgl!xyPx~-1<9OxErXtVEt5)?uiZn_49CcQwl*%ePw&@j-Tztk zfBhPcNm|_I_e$o7%|$1`5iL{vE?Enp%d24@k~=*?3y6OLNwrK=`2#oZ?*3ol2E?zE zwQkJx#PxReK$z(Lu_)Q)1g%@#g)>u$-!uH}{EqSGb-#ZCzqM2`J5lS_1)qVX8Np-x z1$gd~W79lyVJ>V_V*FOgM>b8Xgl)v4+QlcPPV$mFP?&t9Z%)E}$zuti)3GBN|qx9AIdWp@R$! zF*Hom!d`}UGLA2NmU0~HOJXHo_#89&c_dwOs5Y&I9!&Ey1TPKNPSV;K3NX}8Q+g?F zxtzX^)Tet6_1(Hc>N5pRU(ZY~V`u}CZaP$(q0)mHk1$ls(C-*p%FyriFElf^#EtLX zz6H0A5wYWcPeVxm1GJQwtwwMCI7iGbyg}Y^%f!toPIL<%<`l|Z^abY>myy@>GW7VL z#CRjQKFNu}f1&W>1@K=;m(n z4qGO|hj0@)N-QgPw(N`VhKDaX7A2b{Vmr^dj#1}Kd_!0xXng~Nt}m?;FgSz3f?1h2 zAt>4usGL}xJBe&ro_iVD?BlV#bI6wEdGK0Czp%&h;%|2GKAr`?MbLdb?<1UV^l|$L zlHs_0l%X+(jxzKXLvPdM>St&VU4N#>++l{cGW17= zo}uZXT88Qv!_^YXaH#G#tl;Wudak;Lp(l`xR<1tDjIE_MfaBAuVD5kpFQl~ zlFe~P%pb<=Hg3JHaBS&s=Z3;#u3xwfVlz>docITcMg%$BtFQ{IhRv#;QfFrKGo_(x z-2c!Oh=CZ04I5&OT4%8etFQ{IhRv#;QddvT<|pqP*SY?uu22lcP;A%~YsMvxcO>Ch z#?e&F@!Yjz3*#C)RKzaq!mb#0RpS~<=1~i^P%DO7H@9c=^_FpsCz376mSjt^HSA=o zr&Kd8$vop?T#PHmxVqQ=dJuA*?_uVOe338mHEj8+$~6(qQ!dIyxpI{2?R&FZ+co97 zF66>4?82@Xb~WQdE!09S)QX{2_u;+S{8&3zoJ>VqqAk&uXlvNfR!^y>T%vi(MY$+f zj&gl=cQ@p^G1=;2=8AlgFY+~P`Kr;pIgxQOF2l2uX?WN zO)k&^EzpXAR`<)D*?gn@x}s?jyRZwpV%SxM%ajOOpaohn(CYsFeRli!Zl(45JEc7P zb15%eEajyiN_qK0DO+Eb^2&>)y!vS=uOBPr&G$=r>$g(geWR?6W+yJG#rBH1SImd~ zin(8e+3y!&>iMKvY$QW6B+H@YPA&HF_C*sTbas?Y}7(6)XJe& z_x1WB$_Dy*G-Fri*OOHDB&_Cf);3jRt&Vde@@J9uN|@$d;IO`Kh$EIG0m7}%$yl> zuTNUfC)Hvj8ImE{UVIql&(va@HcgwR&6sJk$7mBSwb=NDU-%WnFSXc4L?faRvGAEtSXmUY$KzQ(a4xXLABV3g;ms)Jfh2a<;!|+ZmwvjOtdg@ZebW7M zlqOtivGEJPg#9sLzglb~q7l)EnCN%SxSVb=F2=>UVvI{IHsfMkj4Rc+)MDcoe&JUR zztmzQ7Gfb*4zc=AS~IS(T5K7Yj7!E9lX0oVwp=aOnC05*hO6DipiOsTzZc%zA$Lfy zFqB|HEw*QID9@su!On1y)mTHWiCSzSm#Nv*95Xem#kO26*O=u>N|&XyDVHx(rCb<} z;V}$Xi*31Dt})A%a{WiS)M6hb7WDWmKR)h;yRrJ!qZXTSF)qdx`85kQ^=`|AbF%(?CJN<|EU&xX#b=z12dk< zgXJFel`yr~o|=O_HAsuJN4aLyVhhWJWx}$|uaKCixaIjf zY(&u_F6Wbsi*Yfo7~@ilZO1$8j>i}I;!&=iYFuiukMdst0T96Rcd-3aEw(4-U{4In zqUuy!9iCS#Rh2a<;!*I3O!x-4@rOSJ% zyWwu^zb>0`sl{eojEiyQ7}w>E*?g@ucuj3|HoE;cI!}qGWEf9Lqn281%0;;-SB`RB zIW@breP6kBZ-5tgfmaN?c9csk_Au<%9A*ylBtFbL(Q2_d2FHkT3>(|>##Tcvwb(Yg zhuP??oaL3{ZXDg+P>W5uC>Q05{AR1BT%vi=JceU<48zr8TdtOC%yKoIE>EZRT+y3c uj0?gcJO<%vvBmOY`7vU}5UZEYDqNA_T>n>awQSJthP&~9-;N)l7W-da+4n&J literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Statistics.db b/test/data/legacy-sstables/me/legacy_tables/legacy_me_clust_be_index_summary/me-31111-big-Statistics.db new file mode 100644 index 0000000000000000000000000000000000000000..2482df5a8151944c29d8a248b35b03b27cc8ef24 GIT binary patch literal 7151 zcmeI%drTBZ7y$5@-Ft8dS6EPaM5+jtqB$stK%-v8lSg9$$VpjKm4(AFn0qNW&{T+v|kkRnB)T=xnWzHiz;O#iCsPBPj3 z-8Z+)?9R-7IS3&tGEu_h1C?u@TsgUJmn%<5u$e@aST-8OQn8>=G8Bkbt5|Hdhz4_^ z&5*RsCZ$`tT*HN=xe%<;{I(jkOS@pAL5FcZ+a*ozn|WOy#Hwea_c#Ke|lQc{iL2gPhfj$O!*kO zBJ>FIPiLc{xp^nh{>tss-+*yGs0_^yzAaA&aX?LF#_mwyB7nf`f)1=wqS^&>cb{cYcVIDX&jsVxF%KV7UUh3|vFBgS?E zh=*M46X5d-8%#;70`Uboj&PU{#Kk`7(1Wqv})^SLHj6?nt10C62puit#LV~PQ5uU?4#=N;ZlgwUXV?DoJsqtz&}~8>~uXtP|7${Nna~&e@NjXF?sM)r=Y7t3pZDyIv~8sNxD=btq)tRHpQ+woGz`rP@*ULcKx2v z1}5c)LQN}cR|V>qx71WOv{n0Sm$%g}ZwWR9R<3GUv9`Ugt#xHfaBZ8fzP`HN*VxhC zUe#P%6I|X=7ibQw3RJD>XzuW@Xle2{*7>Refnd$@)pZT4n%AsoXs%w>7O1Xi^!aL< zR|kAm&GmkNaAjL_Q){rcc4cE=O)yy1&=Od)nx;2oyUvVB^)B7C;o70YZgYzzY-=jy z^UUqbX$)=J;NEn!g5V*Wn=|*iE1n;8n}e4t<7O6Hkhcm%8dCZpE^1*c`xd88&0@IbsTrv)(mdd;Aj06(%U;#n&Ck@Mjv=7 zX<2QH&-<5hVd8nrY9nEG=Ed5dT3@LD0DDXlLLSx@X3Y=zH+4`dh z!*De~HOYknHNYIH#6w@Jpg`@1p12rlmqym7s*Q9>QkQQVsJ;{P@%L<@1!$Q@zlA@ zkRq&^de~FQQD>wIv*nqI3UlBIlVq}W5d>#k%8hXk^3EBSo3ouV9E-fW?-p|aPvTC~ z+lc)3C1n|yQhV<9*gJ)|?{&khhHMC~uJ2vsj(jr-8I-q>`1os0p+d_y9|5$_GsSz; z>)cTjWhB}J4<}{-ZOtzNbR?iPI=ElzO>cA9!>1CD2X3 z{`)d+?q}S_xP$3Ex&{l2Ug7z!yTD^sEs9kHwjbGv;;+QrKk{Ka1s+Z$2zwT}qdzL8 z#j_CipgwByJY^T{J&Qci*+mGB8h_(7v>c+hi@#>cZ~OQONwfHOg~dm$8IrKV_I22P@3EHr0m*H4 zM@}u{fwkLiF%7VTI6m)TtGlCW4}zfFE9jqQ4@jXxcT@`@77@lH7=^nY_F@kCDfzU#*Qo{pAk>m)(oUO>AKc`HiCb!vgW~Q5L@x?s-7`r2Tl)WSA<5_N6zgzf>(nXsS zJ|k@{l&xFP`xJ?G05%hClfKPnqB}~3%{-J(MD{B6;WiUpJDFxP(Z_f)Q~s0}e=QE$ zV?3wXe?$95^oDS|&+)CWHZcK+^abAusl>fJ=Q&e}CqQ(?iEnzjNL%(lx0g%C1>{hn zbV3}U=#z&tL$M*7;ZlG#Q-%(*vD`^s3F!ltrw5TQ6szGrK{0$fESx5J!-3YFM`L!A zB#Sc{?!jAy$@E(eAREf|vE2CW$P{6g1M2{)$(~T@D@S{ljvJwY8(a{0|V&>CtIWmREYODaT9660H z3Lop2AEZ{A!B4cq;ML*syTln-uO4Z=;0&PmPh$>0(UBPfgw@wz*?Jk%^ZnS%8+Y-@ zsC3;_mT;ap6LzJfV5_UFMuZ1jI1GErXyC_SqWp|TN=p{fO_F#FH zKcM!{I29IC8M7FB7s1?OUT3oP%4AROF=r>g&0|I`Y7!okg&Rr3P9H$|hYb*<=X&(LcV=*VNLM@k~;b&{`2W5dGWQY8!hQok zfTDkdxknUkNUEVXCg-dM*@1as(ll7oebT7&yr8-`b%~e%C}z5CdOjD5|6s-;D%h@y z=~SU*h)#-j)TGa49vr1*SoM4|oFFU8(=DO`sFG(32a=HQma#y#J{S3JLB4RJH(>wej_a0QHdV#y z3jL+b&@Sw}gy)Y&c7K5^qMp4A7UqgdGE;bJYlMa6ZRW-nXLshv@USiLwa5%Q3wjlE znB`nW0{J3xLH;wH+C@*ew_SYM<<=(?Uv6M+Bdy8a|Uu!poT9wNN8&Wd^oTV}A$(D#mG$sQcGpMB!xpmS=hS zpi1;3MQl_K)Gz^1Pz2F6!SnD<~uhq8{&uZr)kzg^MsII7uY4pOa z8KRM0ODl0-)({*jl2?nmVvbxcT+CSXh6?3*!o<=F{JY~GW?%~`hG!v4ig7dw^YCn- z>BaXUs#)S5)^9mf0FJ{@w`bA#g#ev~VIak;Q$SsYL7>`cfLen3A1@a~{b%It{j=z^ zHUA*Tp+c!TR#J?OXNW%PbW?!c%k4+TT8{CsJ6_?JTA9tw;ieSgR^^;#YK(J5&_lbq lg{86A3kx%_6Bc&!l>sJxvna7_CiANE;-4B!8^@Zm{|^vFy>$Qp literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Digest.crc32 b/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Digest.crc32 new file mode 100644 index 000000000000..7ffa3bc3c73b --- /dev/null +++ b/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Digest.crc32 @@ -0,0 +1 @@ +309317098 \ No newline at end of file diff --git a/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Filter.db b/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..8868e5c18008783c106cc95dcb2a5942cf662a48 GIT binary patch literal 24 fcmZQzU|?lnU|?!-Qe4oV{4sJIuml}Kbkx|po5XHuX-$Phi_$>@1yTTs8$eO+A&O|6KFTa0 zE+A5T0W=ZH9Y7?r=h&NBAxnArhsO4tWoO*R$&!ZVb!o%)l=%;ThslOL*u&H|I`(Vp%{t{n_|tl#PNMQfi+qu6ObD3W67SfmaN?>basfxj+lFKr04X z-Lo>?U$0-vwP#wyF6_dt7~bkzd{fGof0gp}=VfJUy85kJY_FJm#eCSWnEOSTlYSAV zo=>X9MlvKrvK(5@YO${#mNY8gFn5|e&7CoGXA>^3v7#1gp;ipF?p>e1g1@DF1G}&b zyJFbYjLZ2XYM~Zt#Zc?POXE_DEuI(8i|2FV`D$Ehu~7@PP%DR8kFQVnSJYz17;?^& z<&&QD8j?G7zB|;6OENFxl5xqnVs`LucYS`WIPx3Y;00db6$7uTTxzj}TtY4(SI+p_ z-JUC#T5RmXF6_!-S2HfP*ycoYqB${UPVDZi&(9UrVpA^4MY+P1OEgcpC>Q0*QLek! z=I4rPu_+hjqFhnRr4}2sKnt{Tpw->KHV?VfVuKfWfmaT^>bas?Y}7(6)XJe&_h@Y% za;e1zFYp4d9C$V5Qgr5A(Yc~?#hi1+C%tm{LP78XFYt;3%hdIRfWrxXi79Cni6BC#O}rF^!dseYq3YaUVot$o2T(Ko|fZj$9>Xz zKB*QP$&d`mj^o2Hf2J1Ov}xKjZN^NSJw}^wsl~=G{KBsoeyPPaA{r5mh`q<;Ts7lz zKFPQk7vqXCF16TVjCHaj7G*B3aZ6MEW|>r9AfpKv>BILY$2Da z+0+~}HLJz8TrJm_<=X3ptL>epTxzi?7lvba48uFM*itSjmy|0};bzLE78}0c3%+vj zr54+OWI&1;kaEDXj89raF16T{i*iw}W$@1Jw#Md*yE%#5U=ZfCsVqA=iapf47T5QBZEX0Z-Rxfo`xFW;3{;%L_+rOX< z=ndL{&Q*&&{_VXV)nbdtL}VheY;~yTlWMV%49Sozhh%E8%_?uutdg@ZebSS1lqOti zvGEJPg#9sLzglb~q7l)EnCN%SxSVb=F2=>UVvI{IHsfMkj4Rc+)MDcoe&JURztmzQ z7Gfb*4zc=AS~ISZT5K7Yj7!E9lX0oVwp=aOnC05*hO48-piOt;q!-@YA$LfyFqB|H zEw*QID9@su!On1+)mTHWv07{)m#Nv*95Xem#kO26*O=u>N*ATHDVHx(rCb<};V}$X zi*31Dt})A%a{WiS)MB3|7WDWmK0fY-yRrP$qZXTSF)qdx`85kQMJ{i%q#G7v;)TF16Us6)_ycV;HU$+j6yBW0tFYVKHBrrCe&U#SLeT8!V>9#pG@* z=WcXrvCW92lXM13XBDo2kFyom#UG7m| z2~&&hsX5(KgS1F{k!waRwy;cCCM?Sg%N}m2#l|oE!mk*9sl~oX-8F6*w>*DmjVM~g z<$RKHF)qdxV_a&n?RaP1@%SQNyvWs4jY}=|Mg9vQ00MaaPPc!m#rDLU?ukKJlpV&J zX~v}%TgD~hl5xdkTxzi`SIaeKxzgQ5-L1+sR*Ox!FdV~U7_Jt37z4Y#ba5|rH{6Yr z*JU#L$Ywb+!4a#60xZ?pJ#FkCIRCzK%#EJwLsgHEtg6!Mh12xYZb__QA8V! z;tOdgnkpsn(n~@K8mZD&po$M#sP%#pBdsX}lNDQ=c3Yzqm9oybtp5M055^~rbCSu- z$A5Nn&Yatyg%Bdd#z$(1r_7UYRKD!;#mU!4^2HMpX%}0=OG#p-w9+V6iDl(BW0_c8 zEml}1(P%Aq7_-(pYsT4!x)D%j$%BUaRTFH#tRweFn*iyO2!)*?_}J;co3U1<3SfNaR=ib zY;FeQE!aHs{rqkm7mBboDcIUY*gi~ueedG9e;;bJ1#MeG)4{`Q%1+!Z}#&pM*f|DZT6F2vr0=O=YtLUFDD zuV?NjY`*v$sxJt5{tFk*pt$D824wBuZOFa_Ze+hn8*;#8D6-Cq=VOTZHxv)+nvNVk zY(S10%txNxkHzuHQB4UGGAI>(^>%u8=V2Sj(H^%PXBKM&)o-aQLol)}PUTE{>q-D_g`_`mFm&JVd3iR4LL8H(OKeS~-?=NVc z1%0-u?qle&ZwHnF)3~70F6go)OW^vaZI&b6^{-T;?oV`Z{c|?w@b}>J+{l5&wZOcC zI(GmIbNcnU(BtD9;rM9&ZQKdZ51;tZ(M(X^x9L#rH^BaRpZ7|@fh!vRg8kS15^^2( zKjcDAhaS|QCe~NM^B|(t)NKUu=u@M5I9_vZWarj{_`D)l49o{oQ~v1Hfw*b#TK78O zH+HulfbDtDm5~bPb49+nr4iJxEA8wX0p9!T(9ss)o-e*^hwXKqNEk?a7^nKJXC|_M zb64HEyXB#Yw{3V2{(la}fzK$9&r8nbIr9ifX8MoTxExb+F7$%boD01U^29lp=WEoQ zOU=2!gi>>^XF2DhyJk9i5{Tw%6wxM2U%#GbxqpAt5Wh+OjYPS;Lv3pQy(jqCC6pg~ zWAdE}hmw+BPEMJhx*#n*Lw!=;fjSS=weWQB2cB(9-K^9NTRpVYgZr6DUc6b?ov8=; zK9$Ci%pqk@!v-lHId!}%_CI|bSJiJJB<|7wL@PC}5vA&Kaka6i@YVFf^;I_d*fj~| P*NUqjyLv#ySC{Vr3r&}s literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Summary.db b/test/data/legacy-sstables/na/legacy_tables/legacy_na_clust_be_index_summary/na-40-big-Summary.db new file mode 100644 index 0000000000000000000000000000000000000000..0c575b7c1bf65927bfcd8400c08c799a2e867ef3 GIT binary patch literal 47 icmZQzU}#`qU|v?xRzMl!C1u$bK-G|PPm%?8!a zVGiMQE`-qBP(IHo2%oP3LJLSiXu&QBEvyKkMQ%c9u{sDXp#h;KA3RW4S8fsTJH8<5%*EKd*2mIB|)lD^lwkBUqZNu{A ztNbhd)ywL9&CP3kZPhJp^)(HFn${J|0&9bfEv*gJ%Nm#aZfk2<9$Zmh*KphF+F)Bl zU{yn7?g#TN!~YT#m_i<-T8l zVRjUG%mGn4Q2w455?F_BY5SW%LhVVYV&x0isfPFhc8_F+Fk$IuIkb!(_I2JU7vV_{ ziQw*&Z1+Gr+e_ z0JLABjifV=Kv~g|$~?w(b*3^un-8WdmH9bzWgRjN-pfwq@xi|tBM~gJHOUk_W_Q$d zh_;&;R%fb5ygWf(e{X#Q7bi7mx=9<_hP_3ZAx)obyCz#zce8KI>QSyiv|t+|A$4S_ zN7)x9dd9OMI?)eUUclKe(nAEj7+Y6Vl3ajOJpk>g%=5|sI#QW4iU2y3%q)*toCwU7 zN-P~GkX`x{Reg$`Hmk=mOxXy8P^_D?)Z^?lO|%>6>>;khKq+aaZeK{D{@uF57IuIV z>8O_K#dYczK}3qvj0!sG3p z#^b$&;@vk?VGgiMG=K!Cc;VE{fEfNM2X-#3h+OTuqcFhcqWy_>kZrF0pyFEx*~2Q9 z?T%cZpBajT%S~PEBhyy)TO3#mOm4H~=3%$l|CC8E+1d$$PYMgoqV;Q1NL|2wnXS&F z{>Y!15KlRtoC9Qff@hlDKn2>3v=SX@+?|wIv?tQI&+G;2O5=uU z`(5DWR=)J}e;Qz6isi&kVt-ni^;BL|IBj0~tc+7Jxl2wauk7Zwo3n6LZ^jK=E zDE_b~jj)3R*k4DhXCZD(y_T1}h=r^*cP#Woqb>kS{DrTSplOdc4-KQ~(3-vCVYHm_ zhi^yFr2e!twZ}5d(`=va`^O4QA<;7YG<;tkD1Xe$tFL!nJ9!>Y7peht>*v1zvF7{H z_qTli$o!Dnh2?wU#>d&+`Q1pDhWR|Mvh8wHH~T%Z=r~XD=twJKAl;|2Pi1#wcaH=K zWD$nA^jbcy0!8V_jrSyqI3^Ugh;~Ca+SA9aD6t&LA&?WKB!xVccw!1bSCWW~_(H&- zufg&E#X4!)g#B+!Gn!Q9*N5Up^Msd+aQ{sspK?`rZN?#}@*y`CPt<443{m+h?jgYH-2j}MVKWp z22e@%#F^DfrxR(AyrclUy&s?>l^NOw(3#5Y=mh9WF#qA@_x@iU-f4z1>iu;6gbBww z{TU*Ml(0e<_d>M`E52_rG>|FglrQA2TRc4aa@^_Xbm@+y5yK3$R^Ehnt1>GnsKkbrLOigF?X*zIR>SI5*@tpI zw#Q>`5v8|u`n;S<94>p-!?a}s8R|V-hY$(H=-q31wjSZLPvSM0%=Dhmdx1I(Jz*Z# z0dyJ%q1XL^LQnBGP%+3r385@c!636(VZjRYpJ6bXUePg7N*5UNnM(Zz`OxGLmnCh_ zU8osL%u$~qsyN@kEnp5iBTub@Ds#PLE27JkrB<;fTsYw3%i@wF$V98l-z z)^aRUDm0t(?JVTd<@C1_>*nRF>lB)-5s$F$Pp8ll*^LuaRbf-;d;`B24?Yt#&z*J&VM|iJCA{_SBIr#|!Dlg{Qr}ZZ9`OhJs zilHw?dJ~`>b5d1}O0)X1AE+aZx*wpEQ*ZF%o2IS(^+~VtPm1w)Q-VB|9Q*kw~oPCnllZ~4rPc?^1Fhzb9gdY>$oJ!JAxc1Ymyw+eayhuBvHHwiuA6b10+$n^~eS~;_7k{S$8rL%2kfO z_Q|=4!LThZE&=LD>*~n|>NJdl6e|GeN~2zOf>3tv+*xdrDgI`pz2(YbTAdhnPdmrU zKN&P!`S(Juoc?1D71~(>yI^XAS04T@QyW}H z$1uLvE#POMkI^V7X*`1qH;DoINl7d0=yTm*?UeRH`haAUvT_r%gu=B`A)9=bz9vkQ z)S;gV2vuUqS?I?#Z4siLq1A%-Z#yJ#&5XAqT17xR83$Kq$ZfXI<~Rqr6<{6N>NKpc z=@8cEe9@#%W3zEy1`;OWN~5;)V+X%|Hj&og5G@hOAaIE?cBT(MFK{KKvke20-iumw zFEg;UWNq7q_r|?!T_+|a--vTkpC)RZXb?8+6a%dz7k+HnxNI zE3YgzmuY^M#(ij<%OOuqz8C4Y2%oXt^b3=LuF3Jp2!@ml^FVeyK7=9TAN;!af}{L5 zFF7sZbccc3jXi0yJG+274C7#3j{r<8c7vB!>zB_x|30pg_A@q;iB$6IB0ZfkuH~AX zMSJruq{}ocW<4#2{UT3G3sn^@&(vu~_5w=eX>3-u8bA=AspncK#mVl_qOwZ&F#}!G zBJ4$&B*Soc%{~`U*}%pm$}x%feQbXTD0Z4te4gmIe+E#8p(A$FOZh;Z#z6>;oB--d za6i(14Vuw(toVVFw4b$C>vcusWQL@XGRfp;8@L1UQ~c#&9(empys!}2oE#qy{8BiR zW1`6yCu?C}Lf^v4#tdxO*j3!d3}k>;!^N(h4hJKt)l$nctJ~b@#eY4R_U!C8{~zel B&gcLD literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Digest.crc32 b/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Digest.crc32 new file mode 100644 index 000000000000..b0e4cbb28e6d --- /dev/null +++ b/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Digest.crc32 @@ -0,0 +1 @@ +3874015080 \ No newline at end of file diff --git a/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Filter.db b/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..8868e5c18008783c106cc95dcb2a5942cf662a48 GIT binary patch literal 24 fcmZQzU|?lnU|?!-Qe4mBTKQzvvZz(zW1!>EgRR$@bz0)zdbxW{H{Fz zcje9U{ro;!h-g4v`xDTv_Y4czSt%qA%ScO$sg;mpLRj1U|#lfZ0&^7O0 zbOd4`24d5OSfkc;timd+!m4Srs#EITZ1ME)xpCd-Uv-3HD28IwrdTsBal9=F+cLJM zVz%cVyfm)4O-1a&F6@e7S2eD=WFECp3$Kc|jBD7tFm5Z(Z+4hDB46Z-d`(-vs&Z|M<|!BDqFg!3b$e8Eg? zE)Hh(pMeM>Z?22Jmg_bE1v_K2AVxTqr_0QtZjTe>HvoA_{`}b1b+biXR-%9!L zRw=tbmGaU1rF{HjDW7bX^4V9VeEvr%Uw&FPwif5VQH$*(=00LR?T?uILzv_K5T@== zs>Mb!BtxSEvb>kFiS2q-D~wn0N4dm&cFbZzD2Ost>joXT9v5kwyMdMQ&`Zi;Y^Sg<3h( z8g7rur4}2zunW6#*wu_nEw+$L$R*^;3AyfE8ut~|VpA^4MY+P1%fnPD7v-W{Im&hS z;<&G<7MpTWF3J_9TxzjF3$#Eh2U^4Zi(|;878|_43%qjRRreLuVxtynp;ivHhDWn8 zpdfgG7kI_MYpGmnvCWC*L~~-!oH#t2 zy{P(8i;Z2_grfD-~+Uyu@!lf1)zwiscV)&&N+lXjHG$MA7$vJAqWq*=! zF)qdxV_a&n85iSXTsfZA&$JqsT5SBnFZ}9MnEH`gY{Wt=#F{#%jyk_)TxzjpTrw^h zS4_sG7Ta>QTw|8&QR1<_U#rTc7Td^ZWHd77P*5#4Vj&h{LQ!dIyxw^>Y95v-qi%q#G7v;)PF16T{i*iw}9MS4$x>PQ;*zg5k@YSI(^&_>| z1|$Pg%z!j?oSg3Y)sRaqHszvRlv}Rm$wb(K)8JCPJCgW0zZMj;mG0U~Mbu#6qkb zV)Zj^#-$cp$Yp9aHOEZNYOyU>%Qa@XcHMCFBINQgRmz3o7#_p$K`pkFOUfnXN>sRI z%B229Gwp=aOnB{shbJkh;E_GkgC%G6GghO}?!Uwh3h=o{)6+^79i&eNH z!@2$|I6AJ*$rW;i&eG2LO10QKzrK4}E%sTu9pDY#*71I>`;%(1*YTA{Jj6@A@6}=p z%Y{L}Y<{9(W``&F4m;Fh`#kd$(jB%;OUhj7;-p1Qf zS3or`wb-ez9`Yd1I^Mf#vDficM?193zweuIsl}FY$+%=(F&URyY|GVhjajZ#`Bhb} z%B2=NU!C36uin+J#@W6aYOxW^1 zQj1NwC>P}lQm#j{#nZ#m;N^LO5_$qkKw|{7QA;g0YM~Zt#ZXHvw&iNM#w^#IS?)B> zwC*eVBp2g?a0rhD**8 z^KtHO?w$KOSqLG*;-#S)GF_I*H!5F6^2N#5+w#Q|5?X#ecvCxxDzR-giKSvep=2r$ z?RK%)Y7mS0Ob5%g-_)vj9ki$dLQ_mp1< z)$F~so7Rjzdb_!iJN4`@$}F0XRYU9 z=I+JVo3RgLU&eaI0gQtf&t*K1@j}LtjAIzDVw}qODaM-_?_|7>aWmsSY|4xWUCG29 zjL%_na~M}(^UU}2bvQ2MU~6KrwX3ncnEra-#c`ibY+o1lBfGH;?_&o)jcx3~ek=-m z@nLLJE_Qr9c5*v*#%%1o5N!LC*i|BS12ca+EjaE9oALBIqG7+IIL*z*uE+C}y1qqm zE(@<`?jkmy`!%XB2zdSr-Iq{Y^F=kX_P0i4?<_a6Zc0L)H5G`gx8nI2VEGxvgIa@; z=MEZ?=k;YGFT9M$J8~S4cWljPsQ&7q>&P3DaQikHohZKLR|E1J=}pK7CK8ZaF5vew z(1PA4_39DsI_gT{3FySU*A78jzK`Aroq2d$9CShWCu^a%-|-uUuKv6+1^QUl zfd$ZC)mH9-9{spyJuqG3m){Cqux=e(|FkjVjA#8T)u{VjJzW2s-4IEAa^}TCPmY)XpNk7tI1NMEP`X=nZ{=0x1 zu>S$wsm%sZf3{dv3eSU(qvkdfh=+YSY=Gmn=vGQv6^Jj(afQQtATIWo4n2sQ`+jPB z1^9`&rsJ?Z&$trf;CwF5v^3O!`jz=DokPHfejI3T06zD@hfT1(&hwEy@%Q7@@aCn- zWZ<+dBX=wAn|RuW_u#+hU>x|2^7wzrxu(xNLSmTyqctwa)SL^wAT{SguY>&SoNM}P z)SOGrxxj={bFPOu=c4=PzxY-!(Jntiv@y1_?$-Xl?!B*nv`hYtM7caeZ9d39@0d8< zZL$=jB?S*aVgdT6N!_d}Dscrvd$ zGY|58CXFLWKPY<|Hc0UZZ8DbFU*^_U)RC^SYn{5Y-$)}j|1d;c982scWXV7OS1sSP iO|;ny#jU2C?8g(bca%!>fol@VZx^>daCN@EviDC>tEGkj literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Summary.db b/test/data/legacy-sstables/nb/legacy_tables/legacy_nb_clust_be_index_summary/nb-400-big-Summary.db new file mode 100644 index 0000000000000000000000000000000000000000..0c575b7c1bf65927bfcd8400c08c799a2e867ef3 GIT binary patch literal 47 icmZQzU}#`qU|&|>lsT6`CTmV~-bS^>hBISirY JiXpUuC;%KG6Wss+ literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Data.db b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Data.db new file mode 100644 index 0000000000000000000000000000000000000000..7d422681cb7ac00ca473a94a42da8cdf76bdb738 GIT binary patch literal 8513 zcmd5?ZE#f88Qz;@laN#oRI!2|n=r_rL)qPX@9y3x?5?z<(4n$uIp=-O`#$HrU<=x8dX7zUMJC$T_PShdSy4S+pH@;})9JrW`Y$5ua%}mL z$jb1_@}~NwOIzxyD#NYe<|TFY4YjRh6_wRh72&3)#-_@e+RCQp6*aZ3wN0ffE9zE+ z%POj>O3TY?%I_^-(a^NKtZ`ZE%BtFMU1@n!YwPmr>eABsmWJBuWv#8vRpl#dO6x0I zR@60DwbWHCX{>Kr-qcdRbV+kz%#spBDhy zN>Jce`}_p;xdWWFJ>c&<7_c`wU~ zf^Bnxv0Fj_yaMQU-d2Q1U+VGpd^~EY$2*7Uk?ktMf;w_P8YS7rI`57J?F|ljf618; z5A8=gt{$*a%y%QkBHBFC8}&#ex-J+i+mIQt?v-!Kp=~_Dp36R48+mYD@YkIb+(4h~ zepEWkawPnAKdsmwt7^us@8>b@vc1Bb)r%1o<-Sp=@zPh9JL_fwR3A$ZmV$O?qYI#C zT6!jcEms2cTA7j40DV^G+9LopD>J%}K-Q|zgnvLl83Z4?G0>t8t`{b z25^%B^o#~j0^mZ3d&AzQ?gW=3UBJ8rwg>c8lqCLVhBHHq-l@`h*2x`AlKghZ1;0JK zTe3-R;c{IlIJ(3}=d@96%ZYu|`~!RV9_}#04Z6q;{IM_3P`t=uYI56BFB8Ikycpio z=N5&3`3!wI>PM~V1u?nD`Ti28s~0A(^q1@tDKf2i*Q;Mk5jq&J z3zLtl9si@SeM@0`m1>;9H`vMSep|pEh<)bbaaG)ek%USSeEAoK^|YV~oV9t^(#YCc*iiSp?8$5CP842dG(@bA||H zyI}Eevf26lAl^YG5awbqG0(dxb9q@)wRSueOH2mS9f+O3GC=0x=Waq(GIo9(o?$or!rBYR&zi#Z zuvmMHr%538d4}{Iy;@m1>{$W3L&7e$<>{|66r?wtU!D%w!y&R4IkKj};i^eX2Xl#6 zlzx-B#9Pk@bD{Sn73JO%Uyjmd@s?Fb5PkWX{M3m+TCF+?P_at#OKvx+p&bN$pc71w zmHBWKpx4S=9|q`4G5-m;UiN?M#^tZy3a;R{g5LjPZ$bM!>c&z|OwK}=tYlZ-?W|2% zkYVk;n-MrU1q9-s6~yr?j=-jwdZ; z?Q#0Qk=FiN5v*P9FS%c2!?B^FKYT44=z!@`(%b(slo=V(tM26Y%R)9jXAO>XEqxcE zJCo|3B&}f^`Q5lsoaZA02!s5LiBA>g2$?6HYZDu(ax8`z*~VhaJK!ri#0H=~^DOkf4;KT~MsQEhB$T}mhu5>%{`@wi&n%O<^f>EN-m)gd%_DH_ znS3sc^;CW#yd+*fE<_*upTtR114D#6N*;S+%B}D`oiv_PKDBL}A7_if=1@ zOOe=j>CZMCeYOCb(Wa7qZR{!h2K2HZp=`n>IH6B*d!(lVin3co^oP*j0F-CP61X; zqcPj0gFro4m#%X%R>tkcYoY=?45p7W9|-YJlSk_wzK-jZ{31;LUtk+f%5=k?%*dxc zS$j&l$Sy_Zv|}G+iH+=J?ib>t$0q004(On`WW=DIZOy)jgFQ9rI!~s@3Ki&rN-8LB zgDpgmBM6UdOoi)rLSBu@QvFZDTM3f764In+@>T)#7$c#}jlz_SS+H_pO;(~`!)Pr{ z{6M%+neApjr0F)_$&$+WOEN?mrDU5;Vwdx?&l6}y5u{LEQ}{KjjXjYgVc*3(Bh)T~ z%(Xa2e{#Pv(S;Z@CLk!X>Fr#NTk-)bCOSerYlzUKVRZ)*v(AF#`Ot2xjE=*P2Y`A_ zL$NS!fIibSc;DO@sM-i>oWQ(=H?rr`n^*(#oja1Ab*t!i2JV_KzW!VK9Xc*0$=ky@ zDKo_D^}fez#&kA2XD*L={NLn14G!PGY;)OCuIY3b)fw(yCNiw*;t_O69hcL_bU!so z^BqT%9g-T6#yVWX#yenMi92_GlX)fTY{I<8^Q$#qitUw&fyBZM7V`=O5_Ml)-w^0? z%W7_Vag^nb2A#vo31cs$E3@4IvU<)~1!?7ImN@Jo@~ja5wi&(guI0P{rv6UM<>pO; zH<@Y8M>9jLKDT!OmkN!|%+}}O+}*bihtxML>GN16N~4DJ%pG1cQ31l?S;u9>DM86x zF1QHi|c=x}>!aVMWT7sEx^WQkxXz_XE{5 z7OQ_`0k|I1LV-Ru8K^hOJtb~^_H$!uxJ1@lpPE#tV|0cnB>PU%g<(3ojsBdAu#drJ zw9oL!O7Ro6`1mlP>?d@WP}u}3lHeDDeL;BG^R_~EEzdn(U7jA~k|@Qzo~~i-Ifcl+ zyPK0HW}*v8diG#e8L!rL!6#*(?EtEn*qD+GlDy1B1;`sQkBNy@?~Z`#9YI|U(3hq< z#oh0&=Po_L^|IXkOZJk3!^_Si9p?ISYZdQHQ7!FC6EKYivi7q7B%Ue*{G#y^^vWDm>CN>sPf<>fSul^!XJhZ0N zIx(=n7^v4Y5DLA@ozZ8S1g{$fs+OkyUEKQKU(epVqlo^3+I;JC342QXDMOmuKJ^`& z%5LGQun0RmJiz1bxxg#^BDO3?LEOveV_Jj5`?gI8JkzB)_q~ literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Digest.crc32 b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Digest.crc32 new file mode 100644 index 000000000000..e86ec54b54e0 --- /dev/null +++ b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Digest.crc32 @@ -0,0 +1 @@ +1158768921 \ No newline at end of file diff --git a/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Filter.db b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Filter.db new file mode 100644 index 0000000000000000000000000000000000000000..8868e5c18008783c106cc95dcb2a5942cf662a48 GIT binary patch literal 24 fcmZQzU|?lnU|?!-Qe9lWFv@*pp_tY)@ElW$!=?FX{-b-Rcy4e5yiqUV9?JH6q~gUdMAo~ ziY(X)1|(n=LP#X*oRdt>8nB2-mix^8rMjd@X6ByvfB)}$?!8ahI#q^GPi%kj+;;hL z|J13S*?e*O%-P*NM}7nAKrM`$2Po-Ykuv;LQkKn%n{ zY}^oQ)H;n-ScO$sHEvdQN`3wJ^5Dc><2uv-))9)K7>bRXV$HaW<84XUma#Pzvpx6i zt>u-&W?VCyir9r+*cHRBYFsmudDKEJ)QX|jkGIw{uG!pV%Vf)B%VcZZldVpvW?Uxo zjEiwGt{CGQ-n@B#$aS{E%n|t_U*v1t@>P{Q0*QLgvb%C&FEgX2(exljwWP%DO7!^P|C zA=kqD1i=fuz$*q`O}UKbQ46(DD~4Lb<*r;_CSrMx+y6O+Y>UL zkQw(AGQDCQ?e49zuusau4!qo0ydFoL_QkddmwQ(=j2cD_W6Tx&o2~VI_d60dunW7e zD~4UoxNJ(I7HXkZ47I+yZ(Oplr#&$+Gcz;Gs(@--vaqwx8Oor{v{&p;Tg!v3wyP)$ zd)g}j3$ZY(0-AA|%$sqUahY+&%(#YgWxcI9^O!bxffsniz^f{kEbJ^z*4Sz6roCc^ z57x>h3wzot0SmD(s{)#F$->qr>J#;eF@56jd0Fo(%EHdN;7}xrH0>4Jl*?$Ia#1eI zm7`qe57+yOvaqLp0i$pfE~^4o$|Vas>y&{E$V_|14i^sBLoQj^(_RW-2!>e|P={Qy zu(Qq?%Am}&SL|@{&U(lt3wzp20Sv(~s{)#GNjI~vXkXF3V$Qze<*r;_Cx+NtsDad&O4a(j^iu!bP}pgzNV0<+ZKHE}N2!>w$HFmS{O%TFSy6@3}z^9 zgr(!C6j|8gI1h+`2v*|5T|1j`$-*|{GUKwV7_+M=3tPD=*O+o8=10Z6Dwiy5qW*BA zu7-}*kYD4te+^mKh=o`))){D>&A4P?8*=HI@hgU3vapq_a*ZihE$Pvgv?-T2s8TKr z$M6`253;aLxlFlCxe{e=rChSG;S0XtD+gb)ur){;q?iUN2Q0_&N$ZeH7B=OgT$JlL zc=LGv4_Vlhi*iw}$TwRx<&uSMUlGGGJci+eENoq(E>V~GcsjV_{a5O~qC2@57vo}F zImRUm8?g`zv0{kTrLGEBY4up2S3v5RgHh$q3e#P)h7WSqOL8seH_~x6v#nFt*rX=HH zT#PHmxMX27F1Ovg?LN^?t8vM~#xMNBuN;2K!bU8_LaZEO^-tQ2OBS{nml>BCSImq{ z7PfL#t}*4>b;Hr_UR5qx*jh#{qn0s;hqAB{3$YL@hgkiSHsg|oZOEl-)-}g;&9bnS zt8$Gg*RC6m?hU!TL6vf0IEKeCTo$%+Rjx7RO05YD&jF(>Y{td7 z7+2(L7V5sDENsd}xhPkVa$UT$ymD9?yu44)gr0yB&=>)2)RKjbTBwCuG1QWUtz4CB zOu6Rta>wyW>%O8pxfmCOLwF3r2U*yNg;!X|8ljj&}3n=EX#t9Aoxce-va3!89(HfRSzyWRyR3wzT?3e&wb`{tWC zFqDOjSRNJfs8Ax^n{nBc)G_K9b&N3`qbzL3#kd$(s&UD}#xMNBuN;2K!bU8_LaZEO z^-o%7Tr*kNW?W`mW?V5dE?L;hRk_BLYu61&vaqdzO}x>^S>5cjDhu0?%aF^EE0Obc z$TgRRZOEl-)-}g;&9bnSt8$Gg*F-UTa-X>=mp73KjR<6o5rd*$#z9jz3 zE9H`fZAfoOZ%ChmFIm`(i*Yfo$k!~?A(t#{%0;;-SCDea!iFr!f~*{5^-tQAOBS|$ zMQyXTIi_uvg{@qbYfQOz-Eh>NSCoaVW7IL~7;|ta3mdTz3$b#D)jw&ykt=dPw0{Lh SoBcKb=hylDw_rR%7WO})`2M8; literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Statistics.db b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Statistics.db new file mode 100644 index 0000000000000000000000000000000000000000..a548e130d2e9d940c3fad6d002cea5abe37ebfbb GIT binary patch literal 7312 zcmeI$drVtZ7y$5lZXY}b9TfHe9FuK~jj)0R290bL)(IF5$iv~Bkqd30$Sb8mh9fKp z|Da(wWsGDGoe3eZKmg|oPRz0i>gHf%8M+MuTA7*A&dHE51aE0OzVGNC#J^m;C%L)z z*YEV^oO>Sk%Viiwym_migt3(n{zUjg&mSy*yu%+NhIxV%9LyPHLW>MJ3a*&T&ebWh zxsnpDP;1~6+FYX|Zog5NWYFpiI&Cs%FzSu^qC%ZPU0psA-|qcp=Cw>hb!;`7yrAG5)X?Ya3vpZEYJYd>c^_BQcDle~zD} zjtl$QiLx_gSITabWt2TBdsALZc^&1Al!GZNDaTMwp!^c$9LfhMH&E`TJc3QA@gUo& zxRG)nHoJy$6}E`FzsNC9?A65SxV|JDTe<_=iR!QOLmYPudKnAe}KJuFShS6 z_LHI5n@?gZGO(kLW5=DtPF{tb>4RPJB6cl@-Av8j0S%5@{Oz`_BjW!Xij#~q?BjTT z63cZIXH)TdX1~T3Wn4k^#bP}F#aAz*xa7+^Wa-^jWarccWS4mz@``ycWSJJvM^DX9 zDDK^}8hP!QFY>yP6y%MAc)WvW@OX#Ue~#+!82=IZxmeu3mwnABp7)C!`Cw89a?^uo zWYb0bd`3;^c@kHT`>;My$6Mb6j|!>W(DDfU1-Wz~44Mr(i}qLci0m4Si+sz_{1ElL z>H_29GMAmul6@nnztSRo8H_ub%+t^=n%z^-EBt%WdUDT@55c&HD6t>fD=HAp=QXki zM`7Hr?cF=j0TBsZ&_UnsiGdCs%58^MWmcSk*4zl&1D$fRAQCz|@RMhtOK06Dq3gbA zjfZYaZQ20+O=I;T=;<@VyMf6T_pBc1>|MLy`X{Z)XC3R`T8%8cFN5o!)tSdW1^;Kq zPwy-T78!~8FtB*jpga+JX667KAITr}4e@5En*fr_Hegm-EYjwZE z{>#4eyaW61c{QP14(hMsYK!6fz^6sss{nETuO{Viyf)p7Ppk#;kaSBR%m*UFf9{uo zxO(Jv?;F6+AL}>`+w+P=6$$5aVTz`?9@MYSG7XFapSU%8t{J%R)JGk#z2*zS!%<6d zB7f`hTpVy>-u;JFz;C}0v~3Xh#=eHUkR`LKrzb0Of!n;>l@&`Cg?Hthnp-mC=qujS z|DMya;6K*O|4R;MdxtWNlIlO&Gx(U0!=VQx;e;FxOei6Tdz^DP zawH&e@CU}JXo8U{4bEq}h8EAJiWdHp%VKd1wYsU(FD9V$mc#H&w*2$=oh(HGXDG?# z@)hZ6&qk;1FV+$M15I^jSlCm_@QBFmQPC>lN_YmsJP_8xe|tZWE^b@GW+iOc!b?kd zaX&W6iz7#ywO{aOd&0-ep67q#KeRxDWRM{fURS<9e>?Vi&9=U(n=1VjL&8Wz<&5o4 k{&(}&|0xgJdsE4O4_$upx9pk>5tVZJ%dY<3@0mG&0YgH?1ONa4 literal 0 HcmV?d00001 diff --git a/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Summary.db b/test/data/legacy-sstables/oa/legacy_tables/legacy_oa_clust_be_index_summary/oa-500-big-Summary.db new file mode 100644 index 0000000000000000000000000000000000000000..0c575b7c1bf65927bfcd8400c08c799a2e867ef3 GIT binary patch literal 47 icmZQzU}#`qU| new RuntimeException(String.format("No files for verion=%s and table=%s", legacyVersion, table))); + .orElseThrow(() -> new RuntimeException(String.format("No files for path=%s", dir.absolutePath()))); return Descriptor.fromFile(new File(file)); } @@ -493,15 +498,19 @@ private void streamLegacyTables(String legacyVersion) throws Exception streamLegacyTable("legacy_%s_clust", legacyVersion); streamLegacyTable("legacy_%s_clust_counter", legacyVersion); streamLegacyTable("legacy_%s_tuple", legacyVersion); + streamLegacyTable("legacy_%s_clust_be_index_summary", legacyVersion); } private void streamLegacyTable(String tablePattern, String legacyVersion) throws Exception { String table = String.format(tablePattern, legacyVersion); - Descriptor descriptor = getDescriptor(legacyVersion, table); + // streaming can mutate test data (rewrite IndexSummary, so we have to copy them) + File testDataDir = new File(tempFolder.newFolder(LEGACY_TABLES_KEYSPACE, table)); + copySstablesToTestData(legacyVersion, table, testDataDir); + Descriptor descriptor = getDescriptor(testDataDir); if (null != descriptor) { - SSTableReader sstable = SSTableReader.open(null, getDescriptor(legacyVersion, table)); + SSTableReader sstable = SSTableReader.open(null, descriptor); IPartitioner p = sstable.getPartitioner(); List> ranges = new ArrayList<>(); ranges.add(new Range<>(p.getMinimumToken(), p.getToken(ByteBufferUtil.bytes("100")))); @@ -525,6 +534,7 @@ public static void truncateLegacyTables(String legacyVersion) throws Exception Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust", legacyVersion)).truncateBlocking(); Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust_counter", legacyVersion)).truncateBlocking(); Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_tuple", legacyVersion)).truncateBlocking(); + Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust_be_index_summary", legacyVersion)).truncateBlocking(); CacheService.instance.invalidateCounterCache(); CacheService.instance.invalidateKeyCache(); } @@ -537,6 +547,7 @@ private static void compactLegacyTables(String legacyVersion) throws Exception Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust", legacyVersion)).forceMajorCompaction(); Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust_counter", legacyVersion)).forceMajorCompaction(); Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_tuple", legacyVersion)).forceMajorCompaction(); + Keyspace.open(LEGACY_TABLES_KEYSPACE).getColumnFamilyStore(String.format("legacy_%s_clust_be_index_summary", legacyVersion)).forceMajorCompaction(); } public static void loadLegacyTables(String legacyVersion) throws Exception @@ -547,6 +558,7 @@ public static void loadLegacyTables(String legacyVersion) throws Exception loadLegacyTable(legacyVersion, "clust"); loadLegacyTable(legacyVersion, "clust_counter"); loadLegacyTable(legacyVersion, "tuple"); + loadLegacyTable(legacyVersion, "clust_be_index_summary"); } private static void verifyCache(String legacyVersion, long startCount) throws InterruptedException, java.util.concurrent.ExecutionException @@ -584,7 +596,8 @@ private static void verifyReads(String legacyVersion) readSimpleCounterTable(legacyVersion, pkValue); } - readClusteringTable(legacyVersion, ck, ckValue, pkValue); + readClusteringTable("legacy_%s_clust", legacyVersion, ck, ckValue, pkValue); + readClusteringTable("legacy_%s_clust_be_index_summary", legacyVersion, ck, ckValue, pkValue); readClusteringCounterTable(legacyVersion, ckValue, pkValue); } } @@ -600,16 +613,16 @@ private static void readClusteringCounterTable(String legacyVersion, String ckVa Assert.assertEquals(1L, rs.one().getLong("val")); } - private static void readClusteringTable(String legacyVersion, int ck, String ckValue, String pkValue) + private static void readClusteringTable(String tableName, String legacyVersion, int ck, String ckValue, String pkValue) { logger.debug("Read legacy_{}_clust", legacyVersion); UntypedResultSet rs; - rs = QueryProcessor.executeInternal(String.format("SELECT val FROM legacy_tables.legacy_%s_clust WHERE pk=? AND ck=?", legacyVersion), pkValue, ckValue); + rs = QueryProcessor.executeInternal(String.format("SELECT val FROM legacy_tables." + tableName + " WHERE pk=? AND ck=?", legacyVersion), pkValue, ckValue); assertLegacyClustRows(1, rs); String ckValue2 = Integer.toString(ck < 10 ? 40 : ck - 1) + longString; String ckValue3 = Integer.toString(ck > 39 ? 10 : ck + 1) + longString; - rs = QueryProcessor.executeInternal(String.format("SELECT val FROM legacy_tables.legacy_%s_clust WHERE pk=? AND ck IN (?, ?, ?)", legacyVersion), pkValue, ckValue, ckValue2, ckValue3); + rs = QueryProcessor.executeInternal(String.format("SELECT val FROM legacy_tables." + tableName + " WHERE pk=? AND ck IN (?, ?, ?)", legacyVersion), pkValue, ckValue, ckValue2, ckValue3); assertLegacyClustRows(3, rs); } @@ -644,7 +657,7 @@ private static void createTables(String legacyVersion) QueryProcessor.executeInternal(String.format("CREATE TABLE legacy_tables.legacy_%s_simple_counter (pk text PRIMARY KEY, val counter)", legacyVersion)); QueryProcessor.executeInternal(String.format("CREATE TABLE legacy_tables.legacy_%s_clust (pk text, ck text, val text, PRIMARY KEY (pk, ck))", legacyVersion)); QueryProcessor.executeInternal(String.format("CREATE TABLE legacy_tables.legacy_%s_clust_counter (pk text, ck text, val counter, PRIMARY KEY (pk, ck))", legacyVersion)); - + QueryProcessor.executeInternal(String.format("CREATE TABLE legacy_tables.legacy_%s_clust_be_index_summary (pk text, ck text, val text, PRIMARY KEY (pk, ck))", legacyVersion)); QueryProcessor.executeInternal(String.format("CREATE TYPE legacy_tables.legacy_%s_tuple_udt (name tuple)", legacyVersion)); @@ -667,6 +680,7 @@ private static void truncateTables(String legacyVersion) QueryProcessor.executeInternal(String.format("TRUNCATE legacy_tables.legacy_%s_simple_counter", legacyVersion)); QueryProcessor.executeInternal(String.format("TRUNCATE legacy_tables.legacy_%s_clust", legacyVersion)); QueryProcessor.executeInternal(String.format("TRUNCATE legacy_tables.legacy_%s_clust_counter", legacyVersion)); + QueryProcessor.executeInternal(String.format("TRUNCATE legacy_tables.legacy_%s_clust_be_index_summary", legacyVersion)); CacheService.instance.invalidateCounterCache(); CacheService.instance.invalidateKeyCache(); } @@ -746,6 +760,13 @@ public void testGenerateSstables() throws Throwable QueryProcessor.executeInternal(String.format("UPDATE legacy_tables.legacy_%s_clust_counter SET val = val + 1 WHERE pk = '%s' AND ck='%s'", format.getLatestVersion(), valPk, valCk + longString)); + + // note: to emulate BE for offsets in Summary you can comment temporary the following line: + // offset = Integer.reverseBytes(offset); + // in org.apache.cassandra.io.sstable.indexsummary.IndexSummary.IndexSummarySerializer.serialize + QueryProcessor.executeInternal(String.format("INSERT INTO legacy_tables.legacy_%s_clust_be_index_summary (pk, ck, val) VALUES ('%s', '%s', '%s')", + format.getLatestVersion(), valPk, valCk + longString, randomString)); + } } @@ -758,6 +779,7 @@ public void testGenerateSstables() throws Throwable copySstablesFromTestData(format.getLatestVersion(), "legacy_%s_clust", ksDir); copySstablesFromTestData(format.getLatestVersion(), "legacy_%s_clust_counter", ksDir); copySstablesFromTestData(format.getLatestVersion(), "legacy_%s_tuple", ksDir); + copySstablesFromTestData(format.getLatestVersion(), "legacy_%s_clust_be_index_summary", ksDir); } public static void copySstablesFromTestData(Version legacyVersion, String tablePattern, File ksDir) throws IOException @@ -773,42 +795,47 @@ public static void copySstablesFromTestData(Version legacyVersion, String tableP for (File srcDir : Keyspace.open(ks).getColumnFamilyStore(table).getDirectories().getCFDirectories()) { - for (File file : srcDir.tryList()) + for (File sourceFile : srcDir.tryList()) { // Sequence IDs represent the C* version used when creating the SSTable, i.e. with #testGenerateSstables() (if not uuid based) String newSeqId = FBUtilities.getReleaseVersionString().split("-")[0].replaceAll("[^0-9]", ""); - File target = new File(cfDir, file.name().replace(legacyVersion + "-1-", legacyVersion + "-" + newSeqId + "-")); - copyFile(cfDir, file, target); + File target = new File(cfDir, sourceFile.name().replace(legacyVersion + "-1-", legacyVersion + "-" + newSeqId + "-")); + copyFile(sourceFile, target); } } } - private static void copySstablesToTestData(String legacyVersion, String table, File cfDir) throws IOException + private static void copySstablesToTestData(String legacyVersion, String table, File targetDir) throws IOException + { + File testDataTableDir = getTestDataTableDir(legacyVersion, table); + Assert.assertTrue("The table directory " + testDataTableDir + " was not found", testDataTableDir.isDirectory()); + for (File sourceTestFile : testDataTableDir.tryList()) + copyFileToDir(sourceTestFile, targetDir); + } + + private static File getTestDataTableDir(File parentDir, String legacyVersion, String table) { - File tableDir = getTableDir(legacyVersion, table); - Assert.assertTrue("The table directory " + tableDir + " was not found", tableDir.isDirectory()); - for (File file : tableDir.tryList()) - copyFile(cfDir, file); + return new File(parentDir, String.format("%s/legacy_tables/%s", legacyVersion, table)); } - private static File getTableDir(String legacyVersion, String table) + private static File getTestDataTableDir(String legacyVersion, String table) { - return new File(LEGACY_SSTABLE_ROOT, String.format("%s/legacy_tables/%s", legacyVersion, table)); + return getTestDataTableDir(LEGACY_SSTABLE_ROOT, legacyVersion, table); } - public static void copyFile(File cfDir, File file) throws IOException + public static void copyFileToDir(File sourceFile, File targetDir) throws IOException { - copyFile(cfDir, file, new File(cfDir, file.name())); + copyFile(sourceFile, new File(targetDir, sourceFile.name())); } - public static void copyFile(File cfDir, File file, File target) throws IOException + public static void copyFile(File sourceFile, File targetFile) throws IOException { byte[] buf = new byte[65536]; - if (file.isFile()) + if (sourceFile.isFile()) { int rd; - try (FileInputStreamPlus is = new FileInputStreamPlus(file); - FileOutputStreamPlus os = new FileOutputStreamPlus(target);) + try (FileInputStreamPlus is = new FileInputStreamPlus(sourceFile); + FileOutputStreamPlus os = new FileOutputStreamPlus(targetFile);) { while ((rd = is.read(buf)) >= 0) os.write(buf, 0, rd); diff --git a/test/unit/org/apache/cassandra/utils/memory/LittleEndianMemoryUtilTest.java b/test/unit/org/apache/cassandra/utils/memory/LittleEndianMemoryUtilTest.java new file mode 100644 index 000000000000..592d01906bcc --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/memory/LittleEndianMemoryUtilTest.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.junit.Assert; +import org.junit.Test; + +public class LittleEndianMemoryUtilTest +{ + private static final int TEST_BUFFER_LENGTH = 8; + private final ByteBuffer directBuffer = ByteBuffer.allocateDirect(TEST_BUFFER_LENGTH); + { + directBuffer.order(ByteOrder.LITTLE_ENDIAN); + } + private final long address = LittleEndianMemoryUtil.getAddress(directBuffer); + + @Test + public void testGetSetLong() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getLong(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + LittleEndianMemoryUtil.setLong(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getLong(address)); + + } + + @Test + public void testGetSetInt() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getInt(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + LittleEndianMemoryUtil.setInt(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getInt(address)); + + } + + @Test + public void testGetSetUnsighedShort() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue & 0xffff, LittleEndianMemoryUtil.getUnsignedShort(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + LittleEndianMemoryUtil.setShort(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue & 0xffff, LittleEndianMemoryUtil.getUnsignedShort(address)); + } + + @Test + public void testGetSetLongByBytes() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getLongByByte(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + LittleEndianMemoryUtil.putLongByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getLongByByte(address)); + + } + + @Test + public void testGetSetIntByBytes() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getIntByByte(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + LittleEndianMemoryUtil.putIntByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getIntByByte(address)); + + } + + @Test + public void testGetSetShortByBytes() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getShortByByte(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + LittleEndianMemoryUtil.putShortByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue, LittleEndianMemoryUtil.getShortByByte(address)); + } + + + @Test + public void testGetHollowDirectByteBuffer() + { + ByteBuffer byteBuffer = LittleEndianMemoryUtil.getHollowDirectByteBuffer(); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, byteBuffer.order()); + } + + @Test + public void testGetByteBuffer() + { + ByteBuffer byteBuffer = LittleEndianMemoryUtil.getByteBuffer(address, TEST_BUFFER_LENGTH); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, byteBuffer.order()); + Assert.assertEquals(TEST_BUFFER_LENGTH, byteBuffer.capacity()); + Assert.assertEquals(0, byteBuffer.position()); + } +} diff --git a/test/unit/org/apache/cassandra/utils/memory/NativeEndianMemoryUtilTest.java b/test/unit/org/apache/cassandra/utils/memory/NativeEndianMemoryUtilTest.java new file mode 100644 index 000000000000..ba0527b18a0e --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/memory/NativeEndianMemoryUtilTest.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.junit.Assert; +import org.junit.Test; + +public class NativeEndianMemoryUtilTest +{ + private static final int TEST_BUFFER_LENGTH = 8; + private final ByteBuffer directBuffer = ByteBuffer.allocateDirect(TEST_BUFFER_LENGTH); + { + directBuffer.order(ByteOrder.nativeOrder()); + } + private final long address = NativeEndianMemoryUtil.getAddress(directBuffer); + + @Test + public void testGetSetLong() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getLong(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + NativeEndianMemoryUtil.setLong(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getLong(address)); + + } + + @Test + public void testGetSetInt() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getInt(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + NativeEndianMemoryUtil.setInt(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getInt(address)); + + } + + @Test + public void testGetSetUnsighedShort() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue & 0xffff, NativeEndianMemoryUtil.getUnsignedShort(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + NativeEndianMemoryUtil.setShort(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue & 0xffff, NativeEndianMemoryUtil.getUnsignedShort(address)); + } + + @Test + public void testGetSetLongByBytes() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getLongByByte(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + NativeEndianMemoryUtil.putLongByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getLongByByte(address)); + + } + + @Test + public void testGetSetIntByBytes() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getIntByByte(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + NativeEndianMemoryUtil.putIntByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getIntByByte(address)); + + } + + @Test + public void testGetSetShortByBytes() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getShortByByte(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + NativeEndianMemoryUtil.putShortByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue, NativeEndianMemoryUtil.getShortByByte(address)); + } + + + @Test + public void testGetHollowDirectByteBuffer() + { + ByteBuffer byteBuffer = NativeEndianMemoryUtil.getHollowDirectByteBuffer(); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.nativeOrder(), byteBuffer.order()); + } + + @Test + public void testGetByteBuffer() + { + ByteBuffer byteBuffer = NativeEndianMemoryUtil.getByteBuffer(address, TEST_BUFFER_LENGTH); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.nativeOrder(), byteBuffer.order()); + Assert.assertEquals(TEST_BUFFER_LENGTH, byteBuffer.capacity()); + Assert.assertEquals(0, byteBuffer.position()); + } +} From fc97fd1037843ddb320011c52faa5a3896368731 Mon Sep 17 00:00:00 2001 From: Nikolay Izhikov Date: Wed, 9 Apr 2025 13:36:32 +0300 Subject: [PATCH 284/340] Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 patch by Nikolay Izhikov; reviewed by Stefan Miklosovic, Maxim Muzafarov, Brandon Williams for CASSANDRA-20429 --- .build/parent-pom-template.xml | 10 +- CHANGES.txt | 1 + conf/logback.xml | 10 +- .../configuration/cass_logback_xml_file.adoc | 11 +- .../managing/operating/audit_logging.adoc | 2 +- ide/nbproject/project.xml | 2 +- .../service/StorageServiceMBean.java | 1 - .../org/apache/cassandra/tools/NodeProbe.java | 2 +- .../utils/logging/LogbackLoggingSupport.java | 22 ++- test/conf/logback-burntest.xml | 6 +- test/conf/logback-dtest-quiet.xml | 6 +- test/conf/logback-dtest.xml | 8 +- ...logback-dtest_with_slow_query_appender.xml | 8 +- ...dtest_with_slow_query_appender_invalid.xml | 8 +- .../logback-dtest_with_vtable_appender.xml | 8 +- ...ack-dtest_with_vtable_appender_invalid.xml | 8 +- test/conf/logback-jmh.xml | 7 +- test/conf/logback-simulator.xml | 6 +- test/conf/logback-test.xml | 6 +- .../operations/AggregationTest.java | 127 +++++------------- 20 files changed, 98 insertions(+), 161 deletions(-) diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml index 2bb3c2692723..714f18b8e5cc 100644 --- a/.build/parent-pom-template.xml +++ b/.build/parent-pom-template.xml @@ -403,27 +403,27 @@ org.slf4j slf4j-api - 1.7.36 + 2.0.17 org.slf4j log4j-over-slf4j - 1.7.36 + 2.0.17 org.slf4j jcl-over-slf4j - 1.7.36 + 2.0.17 ch.qos.logback logback-core - 1.2.12 + 1.5.18 ch.qos.logback logback-classic - 1.2.12 + 1.5.18 com.fasterxml.jackson.core diff --git a/CHANGES.txt b/CHANGES.txt index f076f8a5d1b7..4fa665883968 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-20429) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan (CASSANDRA-20538) * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918) diff --git a/conf/logback.xml b/conf/logback.xml index a8dabf656448..4855433b99d4 100644 --- a/conf/logback.xml +++ b/conf/logback.xml @@ -23,8 +23,6 @@ appender reference in the root level section below. --> - - @@ -43,7 +41,7 @@ appender reference in the root level section below. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -60,7 +58,7 @@ appender reference in the root level section below. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -80,7 +78,7 @@ appender reference in the root level section below. INFO - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -98,7 +96,7 @@ appender reference in the root level section below. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n --> diff --git a/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc b/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc index b6e4d5f54583..73cae5b54590 100644 --- a/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc +++ b/doc/modules/cassandra/pages/managing/configuration/cass_logback_xml_file.adoc @@ -76,8 +76,8 @@ the rolling policy. Specify the format of the message. Part of the rolling policy. -*Example:* 7 *Example:* -%-5level [%thread] %date\{ISO8601} %F:%L - %msg%n +*Example:* +%-5level [%thread] %date\{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n === Logging system logs to Cassandra virtual table @@ -172,7 +172,6 @@ A reader noticed that by placing custom appender implementation of `SLOW_QUERIES [source,XML] ---- - @@ -192,7 +191,7 @@ A reader noticed that by placing custom appender implementation of `SLOW_QUERIES 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -209,7 +208,7 @@ A reader noticed that by placing custom appender implementation of `SLOW_QUERIES 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -229,7 +228,7 @@ A reader noticed that by placing custom appender implementation of `SLOW_QUERIES INFO - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n diff --git a/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc b/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc index 63f4ba1a1130..c50c9785dc5a 100644 --- a/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc +++ b/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc @@ -213,7 +213,7 @@ the audit log events to flow through separate log file instead of system.log. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n diff --git a/ide/nbproject/project.xml b/ide/nbproject/project.xml index 0a000a9ab654..f770d1e68dd5 100644 --- a/ide/nbproject/project.xml +++ b/ide/nbproject/project.xml @@ -8,7 +8,7 @@ .. - ${project.dir}/build/lib/jars/AmazonCorrettoCryptoProvider-2.2.0-linux-aarch_64.jar:${project.dir}/build/lib/jars/HdrHistogram-2.1.12.jar:${project.dir}/build/lib/jars/ST4-4.0.8.jar:${project.dir}/build/lib/jars/affinity-3.23.3.jar:${project.dir}/build/lib/jars/agrona-1.17.1.jar:${project.dir}/build/lib/jars/airline-0.8.jar:${project.dir}/build/lib/jars/antlr-3.5.2.jar:${project.dir}/build/lib/jars/antlr-runtime-3.5.2.jar:${project.dir}/build/lib/jars/asm-9.4.jar:${project.dir}/build/lib/jars/bcpkix-jdk18on-1.76.jar:${project.dir}/build/lib/jars/bcprov-jdk18on-1.76.jar:${project.dir}/build/lib/jars/bcutil-jdk18on-1.76.jar:${project.dir}/build/lib/jars/big-math-2.3.0.jar:${project.dir}/build/lib/jars/byteman-4.0.20.jar:${project.dir}/build/lib/jars/byteman-bmunit-4.0.20.jar:${project.dir}/build/lib/jars/byteman-install-4.0.20.jar:${project.dir}/build/lib/jars/byteman-submit-4.0.20.jar:${project.dir}/build/lib/jars/caffeine-3.1.8.jar:${project.dir}/build/lib/jars/cassandra-driver-core-3.11.5-shaded.jar:${project.dir}/build/lib/jars/chronicle-bytes-2.23.33.jar:${project.dir}/build/lib/jars/chronicle-core-2.23.36.jar:${project.dir}/build/lib/jars/chronicle-queue-5.23.37.jar:${project.dir}/build/lib/jars/chronicle-threads-2.23.25.jar:${project.dir}/build/lib/jars/chronicle-wire-2.23.39.jar:${project.dir}/build/lib/jars/commons-cli-1.5.0.jar:${project.dir}/build/lib/jars/commons-lang3-3.13.0.jar:${project.dir}/build/lib/jars/commons-math3-3.2.jar:${project.dir}/build/lib/jars/compile-command-annotations-1.2.0.jar:${project.dir}/build/lib/jars/concurrent-trees-2.4.0.jar:${project.dir}/build/lib/jars/ecj-3.33.0.jar:${project.dir}/build/lib/jars/failureaccess-1.0.1.jar:${project.dir}/build/lib/jars/guava-32.0.1-jre.jar:${project.dir}/build/lib/jars/high-scale-lib-1.0.6.jar:${project.dir}/build/lib/jars/hppc-0.8.1.jar:${project.dir}/build/lib/jars/ipaddress-5.3.3.jar:${project.dir}/build/lib/jars/j2objc-annotations-1.3.jar:${project.dir}/build/lib/jars/jackson-annotations-2.15.3.jar:${project.dir}/build/lib/jars/jackson-core-2.15.3.jar:${project.dir}/build/lib/jars/jackson-databind-2.15.3.jar:${project.dir}/build/lib/jars/jackson-datatype-jsr310-2.15.3.jar:${project.dir}/build/lib/jars/jacocoagent.jar:${project.dir}/build/lib/jars/jamm-0.4.0.jar:${project.dir}/build/lib/jars/javax.inject-1.jar:${project.dir}/build/lib/jars/jbcrypt-0.4.jar:${project.dir}/build/lib/jars/jcl-over-slf4j-1.7.36.jar:${project.dir}/build/lib/jars/jcommander-1.30.jar:${project.dir}/build/lib/jars/jctools-core-3.1.0.jar:${project.dir}/build/lib/jars/jffi-1.3.11-native.jar:${project.dir}/build/lib/jars/jffi-1.3.11.jar:${project.dir}/build/lib/jars/jna-5.13.0.jar:${project.dir}/build/lib/jars/jna-platform-5.13.0.jar:${project.dir}/build/lib/jars/jnr-a64asm-1.0.0.jar:${project.dir}/build/lib/jars/jnr-constants-0.10.4.jar:${project.dir}/build/lib/jars/jnr-ffi-2.2.13.jar:${project.dir}/build/lib/jars/jnr-x86asm-1.0.2.jar:${project.dir}/build/lib/jars/jsr305-2.0.2.jar:${project.dir}/build/lib/jars/jvector-1.0.2.jar:${project.dir}/build/lib/jars/jvm-attach-api-1.5.jar:${project.dir}/build/lib/jars/log4j-over-slf4j-1.7.36.jar:${project.dir}/build/lib/jars/logback-classic-1.2.12.jar:${project.dir}/build/lib/jars/logback-core-1.2.12.jar:${project.dir}/build/lib/jars/lucene-analysis-common-9.7.0.jar:${project.dir}/build/lib/jars/lucene-core-9.7.0.jar:${project.dir}/build/lib/jars/lz4-java-1.8.0.jar:${project.dir}/build/lib/jars/metrics-core-4.2.19.jar:${project.dir}/build/lib/jars/metrics-jvm-4.2.19.jar:${project.dir}/build/lib/jars/metrics-logback-4.2.19.jar:${project.dir}/build/lib/jars/mxdump-0.14.jar:${project.dir}/build/lib/jars/netty-all-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-buffer-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-codec-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-common-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-proxy-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-ssl-ocsp-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-resolver-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-linux-aarch_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-linux-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-osx-aarch_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-osx-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-windows-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final.jar:${project.dir}/build/lib/jars/netty-tcnative-classes-2.0.61.Final.jar:${project.dir}/build/lib/jars/netty-transport-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-classes-epoll-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-classes-kqueue-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final-linux-aarch_64.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final-linux-x86_64.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-native-unix-common-4.1.96.Final.jar:${project.dir}/build/lib/jars/ohc-core-0.5.1.jar:${project.dir}/build/lib/jars/ohc-core-j8-0.5.1.jar:${project.dir}/build/lib/jars/oshi-core-6.4.8.jar:${project.dir}/build/lib/jars/posix-2.24ea4.jar:${project.dir}/build/lib/jars/psjava-0.1.19.jar:${project.dir}/build/lib/jars/semver4j-3.1.0.jar:${project.dir}/build/lib/jars/sjk-cli-0.14.jar:${project.dir}/build/lib/jars/sjk-core-0.14.jar:${project.dir}/build/lib/jars/sjk-json-0.14.jar:${project.dir}/build/lib/jars/sjk-stacktrace-0.14.jar:${project.dir}/build/lib/jars/slf4j-api-1.7.36.jar:${project.dir}/build/lib/jars/snakeyaml-2.1.jar:${project.dir}/build/lib/jars/snappy-java-1.1.10.4.jar:${project.dir}/build/lib/jars/stream-2.5.2.jar:${project.dir}/build/lib/jars/zstd-jni-1.5.7-2.jar:${project.dir}/build/test/lib/jars/Saxon-HE-12.2.jar:${project.dir}/build/test/lib/jars/accessors-smart-2.4.7.jar:${project.dir}/build/test/lib/jars/ant-1.10.12.jar:${project.dir}/build/test/lib/jars/ant-junit-1.10.12.jar:${project.dir}/build/test/lib/jars/ant-launcher-1.10.12.jar:${project.dir}/build/test/lib/jars/antlr4-runtime-4.11.1.jar:${project.dir}/build/test/lib/jars/asm-9.4.jar:${project.dir}/build/test/lib/jars/asm-analysis-9.4.jar:${project.dir}/build/test/lib/jars/asm-commons-9.4.jar:${project.dir}/build/test/lib/jars/asm-tree-9.4.jar:${project.dir}/build/test/lib/jars/asm-util-9.4.jar:${project.dir}/build/test/lib/jars/asm-xml-6.0.jar:${project.dir}/build/test/lib/jars/assertj-core-3.24.2.jar:${project.dir}/build/test/lib/jars/awaitility-4.0.3.jar:${project.dir}/build/test/lib/jars/byte-buddy-1.12.21.jar:${project.dir}/build/test/lib/jars/byte-buddy-agent-1.12.13.jar:${project.dir}/build/test/lib/jars/checker-qual-3.27.0.jar:${project.dir}/build/test/lib/jars/checkstyle-10.12.1.jar:${project.dir}/build/test/lib/jars/commons-beanutils-1.9.4.jar:${project.dir}/build/test/lib/jars/commons-codec-1.15.jar:${project.dir}/build/test/lib/jars/commons-collections-3.2.2.jar:${project.dir}/build/test/lib/jars/commons-fileupload-1.4.jar:${project.dir}/build/test/lib/jars/commons-io-2.11.0.jar:${project.dir}/build/test/lib/jars/commons-lang3-3.12.0.jar:${project.dir}/build/test/lib/jars/commons-logging-1.2.jar:${project.dir}/build/test/lib/jars/commons-math3-3.6.1.jar:${project.dir}/build/test/lib/jars/dtest-api-0.0.16.jar:${project.dir}/build/test/lib/jars/error_prone_annotations-2.2.0.jar:${project.dir}/build/test/lib/jars/failureaccess-1.0.1.jar:${project.dir}/build/test/lib/jars/guava-32.0.1-jre.jar:${project.dir}/build/test/lib/jars/guava-testlib-27.0-jre.jar:${project.dir}/build/test/lib/jars/hamcrest-2.2.jar:${project.dir}/build/test/lib/jars/hamcrest-core-2.2.jar:${project.dir}/build/test/lib/jars/handlebars-4.3.1.jar:${project.dir}/build/test/lib/jars/handlebars-helpers-4.3.1.jar:${project.dir}/build/test/lib/jars/http2-common-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/http2-hpack-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/http2-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/httpclient5-5.1.3.jar:${project.dir}/build/test/lib/jars/httpcore5-5.1.3.jar:${project.dir}/build/test/lib/jars/httpcore5-h2-5.1.3.jar:${project.dir}/build/test/lib/jars/j2objc-annotations-1.1.jar:${project.dir}/build/test/lib/jars/jackson-annotations-2.13.4.jar:${project.dir}/build/test/lib/jars/jackson-core-2.15.3.jar:${project.dir}/build/test/lib/jars/jackson-databind-2.15.3.jar:${project.dir}/build/test/lib/jars/jackson-dataformat-yaml-2.15.3.jar:${project.dir}/build/test/lib/jars/jakarta.activation-api-1.2.2.jar:${project.dir}/build/test/lib/jars/jakarta.xml.bind-api-2.3.3.jar:${project.dir}/build/test/lib/jars/java-allocation-instrumenter-3.1.0.jar:${project.dir}/build/test/lib/jars/java-cup-runtime-11b-20160615.jar:${project.dir}/build/test/lib/jars/javassist-3.28.0-GA.jar:${project.dir}/build/test/lib/jars/javax.servlet-api-3.1.0.jar:${project.dir}/build/test/lib/jars/jetty-alpn-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-java-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-java-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-openjdk8-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-openjdk8-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-continuation-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-http-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-io-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-proxy-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-security-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-servlet-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-servlets-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-util-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-util-ajax-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-webapp-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-xml-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jflex-1.8.2.jar:${project.dir}/build/test/lib/jars/jimfs-1.1.jar:${project.dir}/build/test/lib/jars/jmh-core-1.37.jar:${project.dir}/build/test/lib/jars/jmh-generator-annprocess-1.37.jar:${project.dir}/build/test/lib/jars/jopt-simple-5.0.4.jar:${project.dir}/build/test/lib/jars/json-path-2.7.0.jar:${project.dir}/build/test/lib/jars/json-smart-2.4.7.jar:${project.dir}/build/test/lib/jars/json-unit-core-2.36.0.jar:${project.dir}/build/test/lib/jars/jsr305-3.0.2.jar:${project.dir}/build/test/lib/jars/junit-4.12.jar:${project.dir}/build/test/lib/jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:${project.dir}/build/test/lib/jars/mockito-core-4.7.0.jar:${project.dir}/build/test/lib/jars/mockito-inline-4.7.0.jar:${project.dir}/build/test/lib/jars/objenesis-3.2.jar:${project.dir}/build/test/lib/jars/org.jacoco.agent-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.ant-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.core-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.report-0.8.8.jar:${project.dir}/build/test/lib/jars/picocli-4.7.4.jar:${project.dir}/build/test/lib/jars/quicktheories-0.26.jar:${project.dir}/build/test/lib/jars/randomizedtesting-runner-2.1.2.jar:${project.dir}/build/test/lib/jars/reflections-0.10.2.jar:${project.dir}/build/test/lib/jars/semver4j-3.1.0.jar:${project.dir}/build/test/lib/jars/simulator-asm.jar:${project.dir}/build/test/lib/jars/simulator-bootstrap.jar:${project.dir}/build/test/lib/jars/slf4j-api-1.7.32.jar:${project.dir}/build/test/lib/jars/wiremock-jre8-2.35.0.jar:${project.dir}/build/test/lib/jars/xmlresolver-5.1.2-data.jar:${project.dir}/build/test/lib/jars/xmlresolver-5.1.2.jar:${project.dir}/build/test/lib/jars/xmlunit-core-2.9.0.jar:${project.dir}/build/test/lib/jars/xmlunit-legacy-2.9.0.jar:${project.dir}/build/test/lib/jars/xmlunit-placeholders-2.9.0.jar: + ${project.dir}/build/lib/jars/AmazonCorrettoCryptoProvider-2.2.0-linux-aarch_64.jar:${project.dir}/build/lib/jars/HdrHistogram-2.1.12.jar:${project.dir}/build/lib/jars/ST4-4.0.8.jar:${project.dir}/build/lib/jars/affinity-3.23.3.jar:${project.dir}/build/lib/jars/agrona-1.17.1.jar:${project.dir}/build/lib/jars/airline-0.8.jar:${project.dir}/build/lib/jars/antlr-3.5.2.jar:${project.dir}/build/lib/jars/antlr-runtime-3.5.2.jar:${project.dir}/build/lib/jars/asm-9.4.jar:${project.dir}/build/lib/jars/bcpkix-jdk18on-1.76.jar:${project.dir}/build/lib/jars/bcprov-jdk18on-1.76.jar:${project.dir}/build/lib/jars/bcutil-jdk18on-1.76.jar:${project.dir}/build/lib/jars/big-math-2.3.0.jar:${project.dir}/build/lib/jars/byteman-4.0.20.jar:${project.dir}/build/lib/jars/byteman-bmunit-4.0.20.jar:${project.dir}/build/lib/jars/byteman-install-4.0.20.jar:${project.dir}/build/lib/jars/byteman-submit-4.0.20.jar:${project.dir}/build/lib/jars/caffeine-3.1.8.jar:${project.dir}/build/lib/jars/cassandra-driver-core-3.11.5-shaded.jar:${project.dir}/build/lib/jars/chronicle-bytes-2.23.33.jar:${project.dir}/build/lib/jars/chronicle-core-2.23.36.jar:${project.dir}/build/lib/jars/chronicle-queue-5.23.37.jar:${project.dir}/build/lib/jars/chronicle-threads-2.23.25.jar:${project.dir}/build/lib/jars/chronicle-wire-2.23.39.jar:${project.dir}/build/lib/jars/commons-cli-1.5.0.jar:${project.dir}/build/lib/jars/commons-lang3-3.13.0.jar:${project.dir}/build/lib/jars/commons-math3-3.2.jar:${project.dir}/build/lib/jars/compile-command-annotations-1.2.0.jar:${project.dir}/build/lib/jars/concurrent-trees-2.4.0.jar:${project.dir}/build/lib/jars/ecj-3.33.0.jar:${project.dir}/build/lib/jars/failureaccess-1.0.1.jar:${project.dir}/build/lib/jars/guava-32.0.1-jre.jar:${project.dir}/build/lib/jars/high-scale-lib-1.0.6.jar:${project.dir}/build/lib/jars/hppc-0.8.1.jar:${project.dir}/build/lib/jars/ipaddress-5.3.3.jar:${project.dir}/build/lib/jars/j2objc-annotations-1.3.jar:${project.dir}/build/lib/jars/jackson-annotations-2.15.3.jar:${project.dir}/build/lib/jars/jackson-core-2.15.3.jar:${project.dir}/build/lib/jars/jackson-databind-2.15.3.jar:${project.dir}/build/lib/jars/jackson-datatype-jsr310-2.15.3.jar:${project.dir}/build/lib/jars/jacocoagent.jar:${project.dir}/build/lib/jars/jamm-0.4.0.jar:${project.dir}/build/lib/jars/javax.inject-1.jar:${project.dir}/build/lib/jars/jbcrypt-0.4.jar:${project.dir}/build/lib/jars/jcl-over-slf4j-2.0.17.jar:${project.dir}/build/lib/jars/jcommander-1.30.jar:${project.dir}/build/lib/jars/jctools-core-3.1.0.jar:${project.dir}/build/lib/jars/jffi-1.3.11-native.jar:${project.dir}/build/lib/jars/jffi-1.3.11.jar:${project.dir}/build/lib/jars/jna-5.13.0.jar:${project.dir}/build/lib/jars/jna-platform-5.13.0.jar:${project.dir}/build/lib/jars/jnr-a64asm-1.0.0.jar:${project.dir}/build/lib/jars/jnr-constants-0.10.4.jar:${project.dir}/build/lib/jars/jnr-ffi-2.2.13.jar:${project.dir}/build/lib/jars/jnr-x86asm-1.0.2.jar:${project.dir}/build/lib/jars/jsr305-2.0.2.jar:${project.dir}/build/lib/jars/jvector-1.0.2.jar:${project.dir}/build/lib/jars/jvm-attach-api-1.5.jar:${project.dir}/build/lib/jars/log4j-over-slf4j-2.0.17.jar:${project.dir}/build/lib/jars/logback-classic-1.5.18.jar:${project.dir}/build/lib/jars/logback-core-1.5.18.jar:${project.dir}/build/lib/jars/lucene-analysis-common-9.7.0.jar:${project.dir}/build/lib/jars/lucene-core-9.7.0.jar:${project.dir}/build/lib/jars/lz4-java-1.8.0.jar:${project.dir}/build/lib/jars/metrics-core-4.2.19.jar:${project.dir}/build/lib/jars/metrics-jvm-4.2.19.jar:${project.dir}/build/lib/jars/metrics-logback-4.2.19.jar:${project.dir}/build/lib/jars/mxdump-0.14.jar:${project.dir}/build/lib/jars/netty-all-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-buffer-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-codec-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-common-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-proxy-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-handler-ssl-ocsp-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-resolver-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-linux-aarch_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-linux-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-osx-aarch_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-osx-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final-windows-x86_64.jar:${project.dir}/build/lib/jars/netty-tcnative-boringssl-static-2.0.61.Final.jar:${project.dir}/build/lib/jars/netty-tcnative-classes-2.0.61.Final.jar:${project.dir}/build/lib/jars/netty-transport-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-classes-epoll-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-classes-kqueue-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final-linux-aarch_64.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final-linux-x86_64.jar:${project.dir}/build/lib/jars/netty-transport-native-epoll-4.1.96.Final.jar:${project.dir}/build/lib/jars/netty-transport-native-unix-common-4.1.96.Final.jar:${project.dir}/build/lib/jars/ohc-core-0.5.1.jar:${project.dir}/build/lib/jars/ohc-core-j8-0.5.1.jar:${project.dir}/build/lib/jars/oshi-core-6.4.8.jar:${project.dir}/build/lib/jars/posix-2.24ea4.jar:${project.dir}/build/lib/jars/psjava-0.1.19.jar:${project.dir}/build/lib/jars/semver4j-3.1.0.jar:${project.dir}/build/lib/jars/sjk-cli-0.14.jar:${project.dir}/build/lib/jars/sjk-core-0.14.jar:${project.dir}/build/lib/jars/sjk-json-0.14.jar:${project.dir}/build/lib/jars/sjk-stacktrace-0.14.jar:${project.dir}/build/lib/jars/slf4j-api-2.0.17.jar:${project.dir}/build/lib/jars/snakeyaml-2.1.jar:${project.dir}/build/lib/jars/snappy-java-1.1.10.4.jar:${project.dir}/build/lib/jars/stream-2.5.2.jar:${project.dir}/build/lib/jars/zstd-jni-1.5.7-2.jar:${project.dir}/build/test/lib/jars/Saxon-HE-12.2.jar:${project.dir}/build/test/lib/jars/accessors-smart-2.4.7.jar:${project.dir}/build/test/lib/jars/ant-1.10.12.jar:${project.dir}/build/test/lib/jars/ant-junit-1.10.12.jar:${project.dir}/build/test/lib/jars/ant-launcher-1.10.12.jar:${project.dir}/build/test/lib/jars/antlr4-runtime-4.11.1.jar:${project.dir}/build/test/lib/jars/asm-9.4.jar:${project.dir}/build/test/lib/jars/asm-analysis-9.4.jar:${project.dir}/build/test/lib/jars/asm-commons-9.4.jar:${project.dir}/build/test/lib/jars/asm-tree-9.4.jar:${project.dir}/build/test/lib/jars/asm-util-9.4.jar:${project.dir}/build/test/lib/jars/asm-xml-6.0.jar:${project.dir}/build/test/lib/jars/assertj-core-3.24.2.jar:${project.dir}/build/test/lib/jars/awaitility-4.0.3.jar:${project.dir}/build/test/lib/jars/byte-buddy-1.12.21.jar:${project.dir}/build/test/lib/jars/byte-buddy-agent-1.12.13.jar:${project.dir}/build/test/lib/jars/checker-qual-3.27.0.jar:${project.dir}/build/test/lib/jars/checkstyle-10.12.1.jar:${project.dir}/build/test/lib/jars/commons-beanutils-1.9.4.jar:${project.dir}/build/test/lib/jars/commons-codec-1.15.jar:${project.dir}/build/test/lib/jars/commons-collections-3.2.2.jar:${project.dir}/build/test/lib/jars/commons-fileupload-1.4.jar:${project.dir}/build/test/lib/jars/commons-io-2.11.0.jar:${project.dir}/build/test/lib/jars/commons-lang3-3.12.0.jar:${project.dir}/build/test/lib/jars/commons-logging-1.2.jar:${project.dir}/build/test/lib/jars/commons-math3-3.6.1.jar:${project.dir}/build/test/lib/jars/dtest-api-0.0.16.jar:${project.dir}/build/test/lib/jars/error_prone_annotations-2.2.0.jar:${project.dir}/build/test/lib/jars/failureaccess-1.0.1.jar:${project.dir}/build/test/lib/jars/guava-32.0.1-jre.jar:${project.dir}/build/test/lib/jars/guava-testlib-27.0-jre.jar:${project.dir}/build/test/lib/jars/hamcrest-2.2.jar:${project.dir}/build/test/lib/jars/hamcrest-core-2.2.jar:${project.dir}/build/test/lib/jars/handlebars-4.3.1.jar:${project.dir}/build/test/lib/jars/handlebars-helpers-4.3.1.jar:${project.dir}/build/test/lib/jars/http2-common-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/http2-hpack-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/http2-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/httpclient5-5.1.3.jar:${project.dir}/build/test/lib/jars/httpcore5-5.1.3.jar:${project.dir}/build/test/lib/jars/httpcore5-h2-5.1.3.jar:${project.dir}/build/test/lib/jars/j2objc-annotations-1.1.jar:${project.dir}/build/test/lib/jars/jackson-annotations-2.13.4.jar:${project.dir}/build/test/lib/jars/jackson-core-2.15.3.jar:${project.dir}/build/test/lib/jars/jackson-databind-2.15.3.jar:${project.dir}/build/test/lib/jars/jackson-dataformat-yaml-2.15.3.jar:${project.dir}/build/test/lib/jars/jakarta.activation-api-1.2.2.jar:${project.dir}/build/test/lib/jars/jakarta.xml.bind-api-2.3.3.jar:${project.dir}/build/test/lib/jars/java-allocation-instrumenter-3.1.0.jar:${project.dir}/build/test/lib/jars/java-cup-runtime-11b-20160615.jar:${project.dir}/build/test/lib/jars/javassist-3.28.0-GA.jar:${project.dir}/build/test/lib/jars/javax.servlet-api-3.1.0.jar:${project.dir}/build/test/lib/jars/jetty-alpn-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-java-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-java-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-openjdk8-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-openjdk8-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-alpn-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-client-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-continuation-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-http-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-io-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-proxy-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-security-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-server-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-servlet-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-servlets-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-util-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-util-ajax-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-webapp-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jetty-xml-9.4.49.v20220914.jar:${project.dir}/build/test/lib/jars/jflex-1.8.2.jar:${project.dir}/build/test/lib/jars/jimfs-1.1.jar:${project.dir}/build/test/lib/jars/jmh-core-1.37.jar:${project.dir}/build/test/lib/jars/jmh-generator-annprocess-1.37.jar:${project.dir}/build/test/lib/jars/jopt-simple-5.0.4.jar:${project.dir}/build/test/lib/jars/json-path-2.7.0.jar:${project.dir}/build/test/lib/jars/json-smart-2.4.7.jar:${project.dir}/build/test/lib/jars/json-unit-core-2.36.0.jar:${project.dir}/build/test/lib/jars/jsr305-3.0.2.jar:${project.dir}/build/test/lib/jars/junit-4.12.jar:${project.dir}/build/test/lib/jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:${project.dir}/build/test/lib/jars/mockito-core-4.7.0.jar:${project.dir}/build/test/lib/jars/mockito-inline-4.7.0.jar:${project.dir}/build/test/lib/jars/objenesis-3.2.jar:${project.dir}/build/test/lib/jars/org.jacoco.agent-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.ant-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.core-0.8.8.jar:${project.dir}/build/test/lib/jars/org.jacoco.report-0.8.8.jar:${project.dir}/build/test/lib/jars/picocli-4.7.4.jar:${project.dir}/build/test/lib/jars/quicktheories-0.26.jar:${project.dir}/build/test/lib/jars/randomizedtesting-runner-2.1.2.jar:${project.dir}/build/test/lib/jars/reflections-0.10.2.jar:${project.dir}/build/test/lib/jars/semver4j-3.1.0.jar:${project.dir}/build/test/lib/jars/simulator-asm.jar:${project.dir}/build/test/lib/jars/simulator-bootstrap.jar:${project.dir}/build/test/lib/jars/slf4j-api-1.7.32.jar:${project.dir}/build/test/lib/jars/wiremock-jre8-2.35.0.jar:${project.dir}/build/test/lib/jars/xmlresolver-5.1.2-data.jar:${project.dir}/build/test/lib/jars/xmlresolver-5.1.2.jar:${project.dir}/build/test/lib/jars/xmlunit-core-2.9.0.jar:${project.dir}/build/test/lib/jars/xmlunit-legacy-2.9.0.jar:${project.dir}/build/test/lib/jars/xmlunit-placeholders-2.9.0.jar: diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index e188595aa361..5d4781c54651 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -586,7 +586,6 @@ default int upgradeSSTables(String keyspaceName, boolean excludeCurrentVersion, * If classQualifer is not empty but level is empty/null, it will set the level to null for the defined classQualifer
      * If level cannot be parsed, then the level will be defaulted to DEBUG
      *
      - * The logback configuration should have {@code < jmxConfigurator />} set * * @param classQualifier The logger's classQualifer * @param level The log level diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 478fc0e55a00..62d2d164c6ff 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -2363,7 +2363,7 @@ public void setLoggingLevel(String classQualifier, String level) } catch (Exception e) { - throw new RuntimeException("Error setting log for " + classQualifier + " on level " + level + ". Please check logback configuration and ensure to have set", e); + throw new RuntimeException("Error setting log for " + classQualifier + " on level " + level + ". Please check logback configuration.", e); } } diff --git a/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java b/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java index f32963b73a59..d8f83116bbcd 100644 --- a/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java +++ b/src/java/org/apache/cassandra/utils/logging/LogbackLoggingSupport.java @@ -18,7 +18,6 @@ package org.apache.cassandra.utils.logging; -import java.lang.management.ManagementFactory; import java.security.AccessControlException; import java.util.ArrayList; import java.util.Iterator; @@ -26,25 +25,21 @@ import java.util.Map; import java.util.Optional; -import javax.management.JMX; -import javax.management.ObjectName; - -import org.apache.cassandra.security.ThreadAwareSecurityManager; +import com.google.common.collect.Maps; import org.apache.commons.lang3.StringUtils; import org.slf4j.LoggerFactory; -import com.google.common.collect.Maps; - import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import ch.qos.logback.classic.LoggerContext; -import ch.qos.logback.classic.jmx.JMXConfiguratorMBean; import ch.qos.logback.classic.spi.ILoggingEvent; import ch.qos.logback.classic.spi.TurboFilterList; import ch.qos.logback.classic.turbo.ReconfigureOnChangeFilter; import ch.qos.logback.classic.turbo.TurboFilter; +import ch.qos.logback.classic.util.ContextInitializer; import ch.qos.logback.core.Appender; -import ch.qos.logback.core.hook.DelayingShutdownHook; +import ch.qos.logback.core.hook.DefaultShutdownHook; +import org.apache.cassandra.security.ThreadAwareSecurityManager; /** * Encapsulates all logback-specific implementations in a central place. @@ -93,7 +88,7 @@ public void onStartup() @Override public void onShutdown() { - DelayingShutdownHook logbackHook = new DelayingShutdownHook(); + DefaultShutdownHook logbackHook = new DefaultShutdownHook(); logbackHook.setContext((LoggerContext) LoggerFactory.getILoggerFactory()); logbackHook.run(); } @@ -106,10 +101,9 @@ public void setLoggingLevel(String classQualifier, String rawLevel) throws Excep // if both classQualifier and rawLevel are empty, reload from configuration if (StringUtils.isBlank(classQualifier) && StringUtils.isBlank(rawLevel)) { - JMXConfiguratorMBean jmxConfiguratorMBean = JMX.newMBeanProxy(ManagementFactory.getPlatformMBeanServer(), - new ObjectName("ch.qos.logback.classic:Name=default,Type=ch.qos.logback.classic.jmx.JMXConfigurator"), - JMXConfiguratorMBean.class); - jmxConfiguratorMBean.reloadDefaultConfiguration(); + LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); + lc.reset(); + new ContextInitializer(lc).autoConfig(); return; } // classQualifier is set, but blank level given diff --git a/test/conf/logback-burntest.xml b/test/conf/logback-burntest.xml index 3aada72e8f87..f2ade83f2e26 100644 --- a/test/conf/logback-burntest.xml +++ b/test/conf/logback-burntest.xml @@ -20,7 +20,7 @@ - + @@ -36,7 +36,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n false @@ -51,7 +51,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n diff --git a/test/conf/logback-dtest-quiet.xml b/test/conf/logback-dtest-quiet.xml index bb9f983177b9..8f1f1f15fd99 100644 --- a/test/conf/logback-dtest-quiet.xml +++ b/test/conf/logback-dtest-quiet.xml @@ -22,12 +22,12 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n INFO @@ -37,7 +37,7 @@ - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"HH:mm:ss,SSS"} %msg%n ERROR diff --git a/test/conf/logback-dtest.xml b/test/conf/logback-dtest.xml index d854f8c77120..22d2e9faa4a6 100644 --- a/test/conf/logback-dtest.xml +++ b/test/conf/logback-dtest.xml @@ -22,19 +22,19 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n true - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"HH:mm:ss,SSS"} %msg%n WARN @@ -43,7 +43,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/conf/logback-dtest_with_slow_query_appender.xml b/test/conf/logback-dtest_with_slow_query_appender.xml index 1b6ed7511f32..62d112d8008b 100644 --- a/test/conf/logback-dtest_with_slow_query_appender.xml +++ b/test/conf/logback-dtest_with_slow_query_appender.xml @@ -21,19 +21,19 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n true - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"HH:mm:ss,SSS"} %msg%n WARN @@ -42,7 +42,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/conf/logback-dtest_with_slow_query_appender_invalid.xml b/test/conf/logback-dtest_with_slow_query_appender_invalid.xml index a2252dd23a79..1f7f58e86193 100644 --- a/test/conf/logback-dtest_with_slow_query_appender_invalid.xml +++ b/test/conf/logback-dtest_with_slow_query_appender_invalid.xml @@ -21,19 +21,19 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n true - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"HH:mm:ss,SSS"} %msg%n WARN @@ -42,7 +42,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/conf/logback-dtest_with_vtable_appender.xml b/test/conf/logback-dtest_with_vtable_appender.xml index c9fd108c77d8..726c46d524d8 100644 --- a/test/conf/logback-dtest_with_vtable_appender.xml +++ b/test/conf/logback-dtest_with_vtable_appender.xml @@ -21,19 +21,19 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n true - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n WARN @@ -42,7 +42,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/conf/logback-dtest_with_vtable_appender_invalid.xml b/test/conf/logback-dtest_with_vtable_appender_invalid.xml index 1b30c141c2a7..257f85753498 100644 --- a/test/conf/logback-dtest_with_vtable_appender_invalid.xml +++ b/test/conf/logback-dtest_with_vtable_appender_invalid.xml @@ -21,19 +21,19 @@ - + ./build/test/logs/${cassandra.testtag}/${suitename}/${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n true - %-5level %date{HH:mm:ss,SSS} %msg%n + %-5level %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n WARN @@ -42,7 +42,7 @@ - %-5level [%thread] ${instance_id} %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/conf/logback-jmh.xml b/test/conf/logback-jmh.xml index 4138f19c72bc..1f9bb3fd1b86 100644 --- a/test/conf/logback-jmh.xml +++ b/test/conf/logback-jmh.xml @@ -23,7 +23,6 @@ appender reference in the root level section below. --> - @@ -42,7 +41,7 @@ appender reference in the root level section below. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -59,7 +58,7 @@ appender reference in the root level section below. 5GB - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n @@ -79,7 +78,7 @@ appender reference in the root level section below. INFO - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n diff --git a/test/conf/logback-simulator.xml b/test/conf/logback-simulator.xml index fe823383eedb..87d2ae327bc8 100644 --- a/test/conf/logback-simulator.xml +++ b/test/conf/logback-simulator.xml @@ -23,7 +23,7 @@ - + ./build/test/logs/simulator/${run_start}-${run_seed}/history.log @@ -41,14 +41,14 @@ ./build/test/logs/simulator/${run_start}-${run_seed}/cluster-${cluster_id}/${instance_id}/system.log - %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{ISO8601} %msg%n + %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n true - %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{ISO8601} %F:%L - %msg%n + %-5level [%thread] ${instance_id} %replace(CS:%X{command_store} ){'CS\:\s+', ''}%replace(OP:%X{async_op} ){'OP\:\s+', ''}%date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n WARN diff --git a/test/conf/logback-test.xml b/test/conf/logback-test.xml index 3e3349fd82f0..757806e35f2b 100644 --- a/test/conf/logback-test.xml +++ b/test/conf/logback-test.xml @@ -19,7 +19,7 @@ - + @@ -38,14 +38,14 @@ - %-5level [%thread] %date{ISO8601} %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %msg%n false - %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + %-5level [%thread] %date{"yyyy-MM-dd'T'HH:mm:ss,SSS", UTC} %F:%L - %msg%n DEBUG diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/AggregationTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/AggregationTest.java index 24afbb7e802c..72bbc53b76b8 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/AggregationTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/AggregationTest.java @@ -31,18 +31,8 @@ import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.lang3.time.DateUtils; - import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import ch.qos.logback.classic.LoggerContext; -import ch.qos.logback.classic.joran.ReconfigureOnChangeTask; -import ch.qos.logback.classic.spi.TurboFilterList; -import ch.qos.logback.classic.turbo.ReconfigureOnChangeFilter; -import ch.qos.logback.classic.turbo.TurboFilter; -import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; @@ -51,13 +41,13 @@ import org.apache.cassandra.db.marshal.TypeParser; import org.apache.cassandra.exceptions.FunctionExecutionException; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.transport.Event.SchemaChange.Change; import org.apache.cassandra.transport.Event.SchemaChange.Target; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.transport.messages.ResultMessage; -import static ch.qos.logback.core.CoreConstants.RECONFIGURE_ON_CHANGE_TASK; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -1893,91 +1883,48 @@ public void testLogbackReload() throws Throwable { // see https://issues.apache.org/jira/browse/CASSANDRA-11033 - // make logback's scan interval 1ms - boilerplate, but necessary for this test - configureLogbackScanPeriod(1L); - try - { - - createTable("CREATE TABLE %s (" + - " year int PRIMARY KEY," + - " country text," + - " title text)"); - - String[] countries = Locale.getISOCountries(); - ThreadLocalRandom rand = ThreadLocalRandom.current(); - for (int i = 0; i < 10000; i++) - { - execute("INSERT INTO %s (year, country, title) VALUES (1980,?,?)", - countries[rand.nextInt(countries.length)], - "title-" + i); - } - - String albumCountByCountry = createFunction(KEYSPACE, - "map,text,text", - "CREATE FUNCTION IF NOT EXISTS %s(state map,country text, album_title text)\n" + - " RETURNS NULL ON NULL INPUT\n" + - " RETURNS map\n" + - " LANGUAGE java\n" + - " AS $$\n" + - " if(state.containsKey(country)) {\n" + - " Long newCount = (Long)state.get(country) + 1;\n" + - " state.put(country, newCount);\n" + - " } else {\n" + - " state.put(country, 1L);\n" + - " }\n" + - " return state;\n" + - " $$;"); - - String releasesByCountry = createAggregate(KEYSPACE, - "text, text", - " CREATE AGGREGATE IF NOT EXISTS %s(text, text)\n" + - " SFUNC " + shortFunctionName(albumCountByCountry) + '\n' + - " STYPE map\n" + - " INITCOND { };"); - - long tEnd = System.currentTimeMillis() + 150; - while (System.currentTimeMillis() < tEnd) - { - execute("SELECT " + releasesByCountry + "(country,title) FROM %s WHERE year=1980"); - } - } - finally - { - configureLogbackScanPeriod(60000L); - } - } + createTable("CREATE TABLE %s (" + + " year int PRIMARY KEY," + + " country text," + + " title text)"); - private static void configureLogbackScanPeriod(long millis) - { - Logger l = LoggerFactory.getLogger(AggregationTest.class); - ch.qos.logback.classic.Logger logbackLogger = (ch.qos.logback.classic.Logger) l; - LoggerContext ctx = logbackLogger.getLoggerContext(); - TurboFilterList turboFilterList = ctx.getTurboFilterList(); - boolean done = false; - for (TurboFilter turboFilter : turboFilterList) + String[] countries = Locale.getISOCountries(); + ThreadLocalRandom rand = ThreadLocalRandom.current(); + for (int i = 0; i < 10000; i++) { - if (turboFilter instanceof ReconfigureOnChangeFilter) - { - ReconfigureOnChangeFilter reconfigureFilter = (ReconfigureOnChangeFilter) turboFilter; - reconfigureFilter.setContext(ctx); - reconfigureFilter.setRefreshPeriod(millis); - reconfigureFilter.stop(); - reconfigureFilter.start(); // start() sets the next check timestammp - done = true; - break; - } + execute("INSERT INTO %s (year, country, title) VALUES (1980,?,?)", + countries[rand.nextInt(countries.length)], + "title-" + i); } - ReconfigureOnChangeTask roct = (ReconfigureOnChangeTask) ctx.getObject(RECONFIGURE_ON_CHANGE_TASK); - if (roct != null) + String albumCountByCountry = createFunction(KEYSPACE, + "map,text,text", + "CREATE FUNCTION IF NOT EXISTS %s(state map,country text, album_title text)\n" + + " RETURNS NULL ON NULL INPUT\n" + + " RETURNS map\n" + + " LANGUAGE java\n" + + " AS $$\n" + + " if(state.containsKey(country)) {\n" + + " Long newCount = (Long)state.get(country) + 1;\n" + + " state.put(country, newCount);\n" + + " } else {\n" + + " state.put(country, 1L);\n" + + " }\n" + + " return state;\n" + + " $$;"); + + String releasesByCountry = createAggregate(KEYSPACE, + "text, text", + " CREATE AGGREGATE IF NOT EXISTS %s(text, text)\n" + + " SFUNC " + shortFunctionName(albumCountByCountry) + '\n' + + " STYPE map\n" + + " INITCOND { };"); + + long tEnd = System.currentTimeMillis() + 150; + while (System.currentTimeMillis() < tEnd) { - // New functionality in logback - they replaced ReconfigureOnChangeFilter (which runs in the logging code) - // with an async ReconfigureOnChangeTask - i.e. in a thread that does not become sandboxed. - // Let the test run anyway, just we cannot reconfigure it (and it is pointless to reconfigure). - return; + execute("SELECT " + releasesByCountry + "(country,title) FROM %s WHERE year=1980"); } - - assertTrue("ReconfigureOnChangeFilter not in logback's turbo-filter list - do that by adding scan=\"true\" to logback-test.xml's configuration element", done); } @Test From d08c0d6fbbe46485ff34b38d138e70b9d8d21796 Mon Sep 17 00:00:00 2001 From: mck Date: Fri, 25 Apr 2025 18:53:56 +0200 Subject: [PATCH 285/340] Adjust jenkins splits post accord merge patch by Mick Semb Wever; reviewed by David Capwell for CASSANDRA-20602 --- .jenkins/Jenkinsfile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index 7cee6634a390..fd32596c646b 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -164,16 +164,16 @@ def tasks() { // (some buffer on the heaviest split under the 1h max is required, ref `timeout(…)` in `test(…)`) 'cqlsh-test': [splits: 1], 'fqltool-test': [splits: 1, size: 'small'], - 'test-cdc': [splits: 8], - 'test': [splits: 16], - 'test-latest': [splits: 16], - 'test-compression': [splits: 16], + 'test-cdc': [splits: 20], + 'test': [splits: 20], + 'test-latest': [splits: 20], + 'test-compression': [splits: 20], 'stress-test': [splits: 1, size: 'small'], 'test-burn': [splits: 2], 'long-test': [splits: 4], - 'test-oa': [splits: 16], - 'test-system-keyspace-directory': [splits: 16], - 'jvm-dtest': [splits: 12], + 'test-oa': [splits: 20], + 'test-system-keyspace-directory': [splits: 20], + 'jvm-dtest': [splits: 16], 'jvm-dtest-upgrade': [splits: 6], 'simulator-dtest': [splits: 1, size: 'large'], 'dtest': [splits: 64, size: 'large'], @@ -182,10 +182,10 @@ def tasks() { 'dtest-large': [splits: 6, size: 'large'], 'dtest-large-novnode': [splits: 6, size: 'large'], 'dtest-large-latest': [splits: 6, size: 'large'], - 'dtest-upgrade': [splits: 128, size: 'large'], - 'dtest-upgrade-novnode': [splits: 128, size: 'large'], - 'dtest-upgrade-large': [splits: 32, size: 'large'], - 'dtest-upgrade-novnode-large': [splits: 32, size: 'large'], + 'dtest-upgrade': [splits: 160, size: 'large'], + 'dtest-upgrade-novnode': [splits: 160, size: 'large'], + 'dtest-upgrade-large': [splits: 40, size: 'large'], + 'dtest-upgrade-novnode-large': [splits: 40, size: 'large'], ] testSteps.each() { it.value.put('type', 'test') From 4bcae8e57e847200c9f202b325fff97f62eaa7d0 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Thu, 17 Apr 2025 14:35:47 -0500 Subject: [PATCH 286/340] Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column patch by Caleb Rackliffe; reviewed by Ariel Weisberg for CASSANDRA-20566 --- CHANGES.txt | 1 + .../apache/cassandra/db/filter/RowFilter.java | 13 +++++- .../cassandra/db/filter/RowFilterTest.java | 41 ++++++++++++++++--- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 545ba01dab15..5ec727e79ffa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column (CASSANDRA-20566) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * SAI marks an index as non-empty when a partial partition/row modifications is flushed due to repair (CASSANDRA-20567) * SAI fails queries when multiple columns exist and a non-indexed column is a composite with a map (CASSANDRA-19891) diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index f1b095920f4a..2cb0af969dab 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -21,13 +21,16 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import com.google.common.base.Objects; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -173,15 +176,21 @@ public boolean isStrict() */ public boolean isMutableIntersection() { - int count = 0; + Set columns = null; for (Expression e : expressions) { if (e.column.isStatic() && expressions.size() > 1) return true; if (!e.column.isPrimaryKeyColumn()) - if (++count > 1) + { + if (columns == null) + columns = new HashSet<>(expressions.size()); + + columns.add(e.column); + if (columns.size() > 1) return true; + } } return false; } diff --git a/test/unit/org/apache/cassandra/db/filter/RowFilterTest.java b/test/unit/org/apache/cassandra/db/filter/RowFilterTest.java index 8952262e2be0..d2def5ba97b3 100644 --- a/test/unit/org/apache/cassandra/db/filter/RowFilterTest.java +++ b/test/unit/org/apache/cassandra/db/filter/RowFilterTest.java @@ -20,9 +20,10 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.Assert; +import com.google.common.collect.ImmutableList; import org.junit.Test; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -33,6 +34,7 @@ import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.rows.BTreeRow; @@ -45,8 +47,12 @@ import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.btree.BTree; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + public class RowFilterTest { @@ -88,8 +94,8 @@ public void close() closed.set(true); } }), 1); - Assert.assertFalse(iter.hasNext()); - Assert.assertTrue(closed.get()); + assertFalse(iter.hasNext()); + assertTrue(closed.get()); filter = RowFilter.none().withNewExpressions(new ArrayList<>()); filter.add(r, Operator.NEQ, one); @@ -122,9 +128,34 @@ public void close() closed.set(true); } }), 1); - Assert.assertFalse(iter.hasNext()); - Assert.assertTrue(closed.get()); + assertFalse(iter.hasNext()); + assertTrue(closed.get()); } + @Test + public void testMutableIntersections() + { + TableMetadata metadata = TableMetadata.builder("testks", "testcf") + .addPartitionKeyColumn("pk", Int32Type.instance) + .addRegularColumn("r", Int32Type.instance) + .addRegularColumn("t", UTF8Type.instance) + .build(); + + RowFilter filter = RowFilter.none().withNewExpressions(new ArrayList<>()); + assertFalse(filter.isMutableIntersection()); + + ColumnMetadata r = metadata.getColumn(new ColumnIdentifier("r", true)); + RowFilter.Expression gt = new RowFilter.SimpleExpression(r, Operator.GT, ByteBufferUtil.EMPTY_BYTE_BUFFER); + filter = filter.withNewExpressions(Collections.singletonList(gt)); + assertFalse(filter.isMutableIntersection()); + + RowFilter.Expression lt = new RowFilter.SimpleExpression(r, Operator.LT, ByteBufferUtil.EMPTY_BYTE_BUFFER); + filter = filter.withNewExpressions(ImmutableList.of(gt, lt)); + assertFalse(filter.isMutableIntersection()); + ColumnMetadata t = metadata.getColumn(new ColumnIdentifier("t", true)); + RowFilter.Expression eq = new RowFilter.SimpleExpression(t, Operator.EQ, ByteBufferUtil.EMPTY_BYTE_BUFFER); + filter = filter.withNewExpressions(ImmutableList.of(gt, lt, eq)); + assertTrue(filter.isMutableIntersection()); + } } From ec26d53d6cae9f5e811a2b43e0c37ccd5a9dbbf4 Mon Sep 17 00:00:00 2001 From: Mick Semb Wever Date: Tue, 1 Apr 2025 13:12:33 +0200 Subject: [PATCH 287/340] In ubuntu2004_test automatically find the latest available patch version for each C* major.minor Inspired by https://github.com/apache/cassandra-builds/pull/108 Closes https://github.com/apache/cassandra/pull/4025 patch by Mick Semb Wever; reviewed by Siyao (Jane) He for CASSJAVA-40 --- .build/docker/ubuntu2004_test.docker | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.build/docker/ubuntu2004_test.docker b/.build/docker/ubuntu2004_test.docker index 8ffd24d18a7b..9d19baef18b6 100644 --- a/.build/docker/ubuntu2004_test.docker +++ b/.build/docker/ubuntu2004_test.docker @@ -124,22 +124,23 @@ RUN /bin/bash -c "source ${BUILD_HOME}/env3.8/bin/activate && \ ccm create -n 1 -v git:cassandra-4.1 test && ccm remove test && \ ccm create -n 1 -v git:cassandra-4.0 test && ccm remove test" -# Initialize ccm versions. right side of each sequence needs to be updated with new releases. -# this can be checked with: -# `curl -s https://downloads.apache.org/cassandra/ | grep -oP '(?<=href=\")[0-9]+\.[0-9]+\.[0-9]+(?=)' | sort -rV | uniq -w 3` +# Initialize ccm versions. branch heads and all versions iterating through to the latest version found on downloads.apache.org/cassandra RUN bash -c 'source ${BUILD_HOME}/env3.8/bin/activate && \ - for i in {1..14} ; do echo $i ; ccm create --quiet -n 1 -v binary:4.0.$i test && ccm remove test ; done && \ - for i in {1..7} ; do echo $i ; ccm create --quiet -n 1 -v binary:4.1.$i test && ccm remove test ; done' + latest_4_0=$(curl -s https://downloads.apache.org/cassandra/ | grep -oP "(?<=href=\")4\.0\.[0-9]+(?=\")" | sort -V | tail -1 | cut -d"." -f3) && \ + for i in $(seq 1 $latest_4_0); do echo $i ; ccm create --quiet -n 1 -v binary:4.0.$i test && ccm remove test ; done && \ + latest_4_1=$(curl -s https://downloads.apache.org/cassandra/ | grep -oP "(?<=href=\")4\.1\.[0-9]+(?=\")" | sort -V | tail -1 | cut -d"." -f3) && \ + for i in $(seq 1 $latest_4_1); do echo $i ; ccm create --quiet -n 1 -v binary:4.1.$i test && ccm remove test ; done' # 5+ requires java11 RUN sudo update-java-alternatives --set java-1.11.0-openjdk-$(dpkg --print-architecture) -# Initialize the CCM git repo, after removing the git cache, as this also can fail to clone +# Initialize ccm versions. branch heads and all versions iterating through to the latest version found on downloads.apache.org/cassandra RUN rm -fr ${BUILD_HOME}/.ccm/repository/_git_cache_apache RUN /bin/bash -c 'source ${BUILD_HOME}/env3.8/bin/activate && \ ccm create --quiet -n 1 -v git:cassandra-5.0 test && ccm remove test && \ ccm create --quiet -n 1 -v git:trunk test && ccm remove test && \ - for i in {1..2} ; do echo $i ; ccm create --quiet -n 1 -v binary:5.0.$i test && ccm remove test ; done' + latest_5_0=$(curl -s https://downloads.apache.org/cassandra/ | grep -oP "(?<=href=\")5\.0\.[0-9]+(?=\")" | sort -V | tail -1 | cut -d"." -f3) && \ + for i in $(seq 1 $latest_5_0); do echo $i ; ccm create --quiet -n 1 -v binary:5.0.$i test && ccm remove test ; done' # the .git subdirectories to pip installed cassandra-driver breaks virtualenv-clone, so just remove them # and other directories we don't need in image From 5e003af5167661f3426de5701fd3607b28b7eee5 Mon Sep 17 00:00:00 2001 From: Marcus Eriksson Date: Tue, 15 Apr 2025 14:02:44 +0200 Subject: [PATCH 288/340] Fix token restrictions with MIN_TOKEN Patch by marcuse and David Capwell; reviewed by David Capwell for CASSANDRA-20557 --- CHANGES.txt | 1 + .../org/apache/cassandra/cql3/Operator.java | 41 +++++++------ .../ClusteringColumnRestrictions.java | 13 ++-- .../cql3/restrictions/ClusteringElements.java | 59 +++++++++++-------- .../cql3/restrictions/MergedRestriction.java | 5 +- .../PartitionKeyRestrictions.java | 2 +- .../cql3/restrictions/SimpleRestriction.java | 7 ++- .../cql3/restrictions/SingleRestriction.java | 4 +- .../test/cql3/SingleNodeTableWalkTest.java | 18 +++--- .../cql3/SingleNodeTokenConflictTest.java | 1 + .../test/cql3/StatefulASTBase.java | 42 +++++++++++++ .../harry/model/ASTSingleTableModel.java | 45 ++++++++++---- .../cassandra/cql3/SimpleQueryTest.java | 14 ++++- .../restrictions/ClusteringElementsTest.java | 27 +++++---- 14 files changed, 189 insertions(+), 90 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 2572c1f313d6..135f59e5e3cc 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557) * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-20429) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan (CASSANDRA-20538) diff --git a/src/java/org/apache/cassandra/cql3/Operator.java b/src/java/org/apache/cassandra/cql3/Operator.java index 201a046b2f57..93a81fa2f4bb 100644 --- a/src/java/org/apache/cassandra/cql3/Operator.java +++ b/src/java/org/apache/cassandra/cql3/Operator.java @@ -40,6 +40,7 @@ import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.rows.CellPath; import org.apache.cassandra.db.rows.ComplexColumnData; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.ColumnMetadata; @@ -95,12 +96,12 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1 : this + " accept only one single value"; ClusteringElements arg = args.get(0); - rangeSet.removeAll(ClusteringElements.lessThan(arg)); - rangeSet.removeAll(ClusteringElements.greaterThan(arg)); + rangeSet.removeAll(ClusteringElements.lessThan(arg, partitioner)); + rangeSet.removeAll(ClusteringElements.greaterThan(arg, partitioner)); } @Override @@ -144,10 +145,10 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1 : this + " accept only one single value"; - rangeSet.removeAll(ClusteringElements.atLeast(args.get(0))); + rangeSet.removeAll(ClusteringElements.atLeast(args.get(0), partitioner)); } @Override @@ -198,10 +199,10 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1 : this + " accept only one single value"; - rangeSet.removeAll(ClusteringElements.greaterThan(args.get(0))); + rangeSet.removeAll(ClusteringElements.greaterThan(args.get(0), partitioner)); } @Override @@ -252,10 +253,10 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1 : this + " accept only one single value"; - rangeSet.removeAll(ClusteringElements.lessThan(args.get(0))); + rangeSet.removeAll(ClusteringElements.lessThan(args.get(0), partitioner)); } @Override @@ -305,10 +306,10 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1 : this + " accept only one single value"; - rangeSet.removeAll(ClusteringElements.atMost(args.get(0))); + rangeSet.removeAll(ClusteringElements.atMost(args.get(0), partitioner)); } @Override @@ -499,7 +500,7 @@ public boolean isSatisfiedBy(MultiElementType type, ComplexColumnData leftOpe } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 1; rangeSet.remove(ClusteringElements.notEqualTo(args.get(0))); @@ -676,7 +677,7 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { for (ClusteringElements clustering : args) rangeSet.remove(ClusteringElements.notEqualTo(clustering)); @@ -805,12 +806,16 @@ public boolean requiresFilteringOrIndexingFor(ColumnMetadata.Kind columnKind) } @Override - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { assert args.size() == 2 : this + " accepts exactly two values"; - args.sort(ClusteringElements.CQL_COMPARATOR); - rangeSet.removeAll(ClusteringElements.lessThan(args.get(0))); - rangeSet.removeAll(ClusteringElements.greaterThan(args.get(1))); + // avoid sorting when working with token restrictions, otherwise we can't know the difference between these queries: + // select * from x.y where token(id) between 0 and MIN_TOKEN + // select * from x.y where token(id) between MIN_TOKEN and 0 + if (!args.get(0).token) + args.sort(ClusteringElements.CQL_COMPARATOR); + rangeSet.removeAll(ClusteringElements.lessThan(args.get(0), partitioner)); + rangeSet.removeAll(ClusteringElements.greaterThan(args.get(1), partitioner)); } @Override @@ -1074,7 +1079,7 @@ public boolean appliesToMapKeys() * @param rangeSet the range set to restrict * @param args the operator arguments */ - public void restrict(RangeSet rangeSet, List args) + public void restrict(RangeSet rangeSet, List args, IPartitioner partitioner) { throw new UnsupportedOperationException(this + " is not a range operator"); } diff --git a/src/java/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictions.java index 10b3864aec37..dd43bb002387 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictions.java @@ -24,6 +24,7 @@ import com.google.common.collect.RangeSet; import org.apache.cassandra.db.guardrails.Guardrails; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.cql3.QueryOptions; @@ -51,18 +52,22 @@ final class ClusteringColumnRestrictions extends RestrictionSetWrapper */ private final boolean allowFiltering; + private final IPartitioner partitioner; + public ClusteringColumnRestrictions(TableMetadata table, boolean allowFiltering) { - this(table.comparator, RestrictionSet.empty(), allowFiltering); + this(table.comparator, RestrictionSet.empty(), allowFiltering, table.partitioner); } private ClusteringColumnRestrictions(ClusteringComparator comparator, RestrictionSet restrictionSet, - boolean allowFiltering) + boolean allowFiltering, + IPartitioner partitioner) { super(restrictionSet); this.comparator = comparator; this.allowFiltering = allowFiltering; + this.partitioner = partitioner; } public ClusteringColumnRestrictions mergeWith(Restriction restriction, @Nullable IndexRegistry indexRegistry) throws InvalidRequestException @@ -89,7 +94,7 @@ public ClusteringColumnRestrictions mergeWith(Restriction restriction, @Nullable newRestrictionStart.name); } - return new ClusteringColumnRestrictions(this.comparator, newRestrictionSet, allowFiltering); + return new ClusteringColumnRestrictions(this.comparator, newRestrictionSet, allowFiltering, partitioner); } public NavigableSet> valuesAsClustering(QueryOptions options, ClientState state) throws InvalidRequestException @@ -123,7 +128,7 @@ public Slices slices(QueryOptions options) throws InvalidRequestException if (r.isSlice()) { RangeSet rangeSet = ClusteringElements.all(); - r.restrict(rangeSet, options); + r.restrict(rangeSet, options, partitioner); return builder.extend(rangeSet).buildSlices(); } diff --git a/src/java/org/apache/cassandra/cql3/restrictions/ClusteringElements.java b/src/java/org/apache/cassandra/cql3/restrictions/ClusteringElements.java index f8f04ebb5737..104c73d3ef19 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/ClusteringElements.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/ClusteringElements.java @@ -34,6 +34,7 @@ import org.apache.cassandra.db.BufferClusteringBound; import org.apache.cassandra.db.ClusteringBound; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.schema.ColumnMetadata; /** @@ -94,7 +95,7 @@ public int compare(ClusteringElements a, ClusteringElements b) /** * The empty {@code ClusteringElements} instance used to avoid creating unecessary empty instances. */ - private static final ClusteringElements EMPTY = new ClusteringElements(ImmutableList.of(), ImmutableList.of()); + private static final ClusteringElements EMPTY = new ClusteringElements(ImmutableList.of(), ImmutableList.of(), false); /** * A range representing all {@code ClusteringElements}. @@ -112,7 +113,12 @@ public int compare(ClusteringElements a, ClusteringElements b) */ private final ImmutableList values; - private ClusteringElements(ImmutableList columns, ImmutableList values) + /** + * We need to special case token restrictions to properly handle MIN_TOKEN + */ + public final boolean token; + + private ClusteringElements(ImmutableList columns, ImmutableList values, boolean token) { if (columns.size() != values.size()) throw new IllegalArgumentException("columns and values should have the same size"); @@ -121,6 +127,7 @@ private ClusteringElements(ImmutableList columns, this.columns = columns; this.values = values; + this.token = token; } private static void checkColumnsOrder(ImmutableList columns) @@ -163,9 +170,9 @@ public static ClusteringElements of() * @param value the element value * @return a {@code ClusteringElements} with a single element. */ - public static ClusteringElements of(ColumnSpecification column, ByteBuffer value) + public static ClusteringElements of(ColumnSpecification column, ByteBuffer value, boolean onToken) { - return new ClusteringElements(ImmutableList.of(column), ImmutableList.of(value)); + return new ClusteringElements(ImmutableList.of(column), ImmutableList.of(value), onToken); } /** @@ -176,7 +183,7 @@ public static ClusteringElements of(ColumnSpecification column, ByteBuffer value */ public static ClusteringElements of(List columns, List values) { - return new ClusteringElements(ImmutableList.copyOf(columns), ImmutableList.copyOf(values)); + return new ClusteringElements(ImmutableList.copyOf(columns), ImmutableList.copyOf(values), false); } /** @@ -200,9 +207,9 @@ public ClusteringElements extend(ClusteringElements suffix) ImmutableList newColumns = concat(columns, suffix.columns); ImmutableList newValues = concat(values, suffix.values); - return suffix instanceof Top ? new Top(newColumns, newValues) - : suffix instanceof Bottom ? new Bottom(newColumns, newValues) - : new ClusteringElements(newColumns, newValues); + return suffix instanceof Top ? new Top(newColumns, newValues, token) + : suffix instanceof Bottom ? new Bottom(newColumns, newValues, token) + : new ClusteringElements(newColumns, newValues, token); } private void checkSuffix(ClusteringElements suffix) @@ -245,36 +252,36 @@ public static RangeSet all() * Returns a {@code RangeSet} that contains all values less than or equal to endpoint. * @return a {@code RangeSet} that contains all values less than or equal to endpoint. */ - public static RangeSet atMost(ClusteringElements endpoint) + public static RangeSet atMost(ClusteringElements endpoint, IPartitioner partitioner) { - return buildRangeSet(endpoint, true, BoundType.CLOSED); + return buildRangeSet(endpoint, true, BoundType.CLOSED, partitioner); } /** * Returns a {@code RangeSet} that contains all values less than endpoint. * @return a {@code RangeSet} that contains all values less than endpoint. */ - public static RangeSet lessThan(ClusteringElements endpoint) + public static RangeSet lessThan(ClusteringElements endpoint, IPartitioner partitioner) { - return buildRangeSet(endpoint, true, BoundType.OPEN); + return buildRangeSet(endpoint, true, BoundType.OPEN, partitioner); } /** * Returns a {@code RangeSet} that contains all values greater or equal to endpoint. * @return a {@code RangeSet} that contains all values greater or equal to endpoint. */ - public static RangeSet atLeast(ClusteringElements endpoint) + public static RangeSet atLeast(ClusteringElements endpoint, IPartitioner partitioner) { - return buildRangeSet(endpoint, false, BoundType.CLOSED); + return buildRangeSet(endpoint, false, BoundType.CLOSED, partitioner); } /** * Returns a {@code RangeSet} that contains all values greater than endpoint. * @return a {@code RangeSet} that contains all values greater than endpoint. */ - public static RangeSet greaterThan(ClusteringElements endpoint) + public static RangeSet greaterThan(ClusteringElements endpoint, IPartitioner partitioner) { - return buildRangeSet(endpoint, false, BoundType.OPEN); + return buildRangeSet(endpoint, false, BoundType.OPEN, partitioner); } public static Range notEqualTo(ClusteringElements endpoint) @@ -282,7 +289,7 @@ public static Range notEqualTo(ClusteringElements endpoint) return Range.closed(endpoint.bottom(), endpoint.top()); } - private static RangeSet buildRangeSet(ClusteringElements endpoint, boolean upperBound, BoundType boundType) + private static RangeSet buildRangeSet(ClusteringElements endpoint, boolean upperBound, BoundType boundType, IPartitioner partitioner) { TreeRangeSet rangeSet = TreeRangeSet.create(); boolean reversed = endpoint.columnType(0).isReversed(); @@ -312,12 +319,16 @@ private static RangeSet buildRangeSet(ClusteringElements end oppositeEndpoint = upperBound ? e.bottom() : e.top(); } } + boolean minToken = false; + if (endpoint.token && !upperBound) + minToken = partitioner.getTokenFactory().fromByteArray(endpoint.get(0)).isMinimum(); // We need to add the last range or the only one if there was no change of direction. Range range = upperBound ? Range.range(oppositeEndpoint, BoundType.CLOSED, boundType == BoundType.OPEN ? endpoint.bottom() : endpoint.top(), boundType) - : Range.range(boundType == BoundType.OPEN ? endpoint.top() : endpoint.bottom(), + : Range.range(minToken ? oppositeEndpoint + : boundType == BoundType.OPEN ? endpoint.top() : endpoint.bottom(), boundType, oppositeEndpoint, BoundType.CLOSED); @@ -331,7 +342,7 @@ private static RangeSet buildRangeSet(ClusteringElements end */ public ClusteringElements top() { - return new Top(columns, values); + return new Top(columns, values, token); } /** @@ -340,7 +351,7 @@ public ClusteringElements top() */ public ClusteringElements bottom() { - return new Bottom(columns, values); + return new Bottom(columns, values, token); } @Override @@ -472,9 +483,9 @@ private static E last(List elements) */ private static class Bottom extends ClusteringElements { - private Bottom(ImmutableList columns, ImmutableList values) + private Bottom(ImmutableList columns, ImmutableList values, boolean token) { - super(columns, values); + super(columns, values, token); } @Override @@ -491,9 +502,9 @@ public ClusteringBound toBound(boolean isStart, boolean isInclusive) */ private static class Top extends ClusteringElements { - private Top(ImmutableList columns, ImmutableList values) + private Top(ImmutableList columns, ImmutableList values, boolean token) { - super(columns, values); + super(columns, values, token); } @Override diff --git a/src/java/org/apache/cassandra/cql3/restrictions/MergedRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/MergedRestriction.java index 7976b78b7c01..9296e00aede3 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/MergedRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/MergedRestriction.java @@ -29,6 +29,7 @@ import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.functions.Function; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexRegistry; import org.apache.cassandra.schema.ColumnMetadata; @@ -325,11 +326,11 @@ public List values(QueryOptions options) } @Override - public void restrict(RangeSet rangeSet, QueryOptions options) + public void restrict(RangeSet rangeSet, QueryOptions options, IPartitioner partitioner) { for (int i = 0, m = restrictions.size(); i < m; i++) { - restrictions.get(i).restrict(rangeSet, options); + restrictions.get(i).restrict(rangeSet, options, partitioner); } } diff --git a/src/java/org/apache/cassandra/cql3/restrictions/PartitionKeyRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/PartitionKeyRestrictions.java index e4df4c7a6976..a33c32ae9aab 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/PartitionKeyRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/PartitionKeyRestrictions.java @@ -296,7 +296,7 @@ private RangeSet toRangeSet(IPartitioner partitioner, List toRangeSet(IPartitioner partitioner, SingleRestriction slice, QueryOptions options) { RangeSet rangeSet = ClusteringElements.all(); - slice.restrict(rangeSet, options); + slice.restrict(rangeSet, options, partitioner); ImmutableRangeSet.Builder builder = ImmutableRangeSet.builder(); diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java index 8592fbbb7b17..b5bb2f43fa8d 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java @@ -34,6 +34,7 @@ import org.apache.cassandra.cql3.terms.Terms; import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.marshal.ListType; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.index.Index; import org.apache.cassandra.index.IndexRegistry; import org.apache.cassandra.schema.ColumnMetadata; @@ -226,10 +227,10 @@ public List values(QueryOptions options) } @Override - public void restrict(RangeSet rangeSet, QueryOptions options) + public void restrict(RangeSet rangeSet, QueryOptions options, IPartitioner partitioner) { assert operator.isSlice() || operator == Operator.EQ; - operator.restrict(rangeSet, bindAndGetClusteringElements(options)); + operator.restrict(rangeSet, bindAndGetClusteringElements(options), partitioner); } private List bindAndGetClusteringElements(QueryOptions options) @@ -254,7 +255,7 @@ private List bindAndGetSingleTermClusteringElements(QueryOpt List elements = new ArrayList<>(values.size()); for (int i = 0; i < values.size(); i++) - elements.add(ClusteringElements.of(columnsExpression.columnSpecification(), values.get(i))); + elements.add(ClusteringElements.of(columnsExpression.columnSpecification(), values.get(i), isOnToken())); return elements; } diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java index e317e8742da5..7720fb1bb660 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SingleRestriction.java @@ -22,6 +22,7 @@ import com.google.common.collect.RangeSet; import org.apache.cassandra.cql3.QueryOptions; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.index.Index; /** @@ -103,7 +104,8 @@ default SingleRestriction mergeWith(SingleRestriction other) * * @param rangeSet the range set to add to * @param options the query options + * @param partitioner the partitioner, used to identify MIN_TOKEN when using token restrictions * @throws UnsupportedOperationException if the operator is not an operator selecting ranges of data. */ - void restrict(RangeSet rangeSet, QueryOptions options); + void restrict(RangeSet rangeSet, QueryOptions options, IPartitioner partitioner); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 581b664db539..762a2b83bca2 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -44,7 +44,7 @@ import accord.utils.RandomSource; import org.apache.cassandra.cql3.KnownIssue; import org.apache.cassandra.cql3.ast.Bind; -import org.apache.cassandra.cql3.ast.Conditional; +import org.apache.cassandra.cql3.ast.Conditional.Where.Inequality; import org.apache.cassandra.cql3.ast.CreateIndexDDL; import org.apache.cassandra.cql3.ast.FunctionCall; import org.apache.cassandra.cql3.ast.Mutation; @@ -170,7 +170,7 @@ protected List supportedIndexers() Select.Builder builder = Select.builder().table(state.metadata); builder.where(FunctionCall.tokenByColumns(state.model.factory.partitionColumns), - Conditional.Where.Inequality.EQUAL, + Inequality.EQUAL, token(state, ref)); Select select = builder.build(); @@ -213,10 +213,10 @@ protected List supportedIndexers() else { builder.where(pkToken, - startInclusive ? Conditional.Where.Inequality.GREATER_THAN_EQ : Conditional.Where.Inequality.GREATER_THAN, + startInclusive ? Inequality.GREATER_THAN_EQ : Inequality.GREATER_THAN, token(state, start)); builder.where(pkToken, - endInclusive ? Conditional.Where.Inequality.LESS_THAN_EQ : Conditional.Where.Inequality.LESS_THAN, + endInclusive ? Inequality.LESS_THAN_EQ : Inequality.LESS_THAN, token(state, end)); } Select select = builder.build(); @@ -330,7 +330,7 @@ protected List supportedIndexers() private Property.Command simpleRangeSearch(RandomSource rs, State state, Symbol symbol, ByteBuffer value, Select.Builder builder) { // do a simple search, like > or < - Conditional.Where.Inequality kind = state.rangeInequalityGen.next(rs); + Inequality kind = state.rangeInequalityGen.next(rs); builder.where(symbol, kind, value); Select select = builder.build(); var indexed = state.indexes.get(symbol); @@ -368,7 +368,8 @@ public void test() throws IOException .add(StatefulASTBase::fullTableScan) .addIf(State::hasPartitions, this::selectExisting) .addAllIf(State::supportTokens, b -> b.add(this::selectToken) - .add(this::selectTokenRange)) + .add(this::selectTokenRange) + .add(StatefulASTBase::selectMinTokenRange)) .addIf(State::hasEnoughMemtable, StatefulASTBase::flushTable) .addIf(State::hasEnoughSSTables, StatefulASTBase::compactTable) .addIf(State::allowNonPartitionQuery, this::nonPartitionQuery) @@ -557,11 +558,6 @@ private LinkedHashMap createIndexes(RandomSource rs, Tabl return indexed; } - public boolean hasPartitions() - { - return !model.isEmpty(); - } - public boolean supportTokens() { return hasPartitions(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java index 1f754f33606a..090843c4b311 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTokenConflictTest.java @@ -280,6 +280,7 @@ public void test() throws IOException .add(SingleNodeTokenConflictTest::tokenBetween) .add(SingleNodeTokenConflictTest::tokenRange) .add(SingleNodeTokenConflictTest::tokenBoundRange) + .addIf(State::hasPartitions, StatefulASTBase::selectMinTokenRange) .addIf(State::hasEnoughMemtable, StatefulASTBase::flushTable) .addIf(State::hasEnoughSSTables, StatefulASTBase::compactTable) .destroyState(State::close) diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index 3a23e1bfcc0a..a16b47c1d760 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -55,6 +55,7 @@ import org.apache.cassandra.cql3.ast.Bind; import org.apache.cassandra.cql3.ast.CQLFormatter; import org.apache.cassandra.cql3.ast.Conditional; +import org.apache.cassandra.cql3.ast.FunctionCall; import org.apache.cassandra.cql3.ast.Literal; import org.apache.cassandra.cql3.ast.Mutation; import org.apache.cassandra.cql3.ast.Select; @@ -238,6 +239,42 @@ private static Select selectForMutation(S state, Mutatio return state.command(rs, select, "full table scan"); } + protected static Property.Command selectMinTokenRange(RandomSource rs, S state) + { + var key = rs.pickOrderedSet(state.model.partitionKeys()); + FunctionCall tokenCall = FunctionCall.tokenByColumns(state.model.factory.partitionColumns); + Literal min = Literal.of(key.token.getLongValue()); + Literal max = Literal.of(Long.MIN_VALUE); + if (rs.nextBoolean()) + { + Literal tmp = min; + min = max; + max = tmp; + } + Select select; + if (rs.nextBoolean()) + { + select = Select.builder(state.metadata) + .where(tokenCall, state.greaterThanGen.next(rs), min) + .where(tokenCall, state.lessThanGen.next(rs), max) + .build(); + } + else + { + // it's possible that the range was flipped, which is known bug with BETWEEN, so + // make sure the range is not flipped until that bug is fixed + if (IGNORED_ISSUES.contains(KnownIssue.BETWEEN_START_LARGER_THAN_END)) + { + min = Literal.of(key.token.getLongValue()); + max = Literal.of(Long.MIN_VALUE); + } + select = Select.builder(state.metadata) + .between(tokenCall, min, max) + .build(); + } + return state.command(rs, select, "min token range"); + } + protected static abstract class BaseState implements AutoCloseable { protected final RandomSource rs; @@ -294,6 +331,11 @@ protected BaseState(RandomSource rs, Cluster cluster, TableMetadata metadata) createTable(metadata); } + public boolean hasPartitions() + { + return !model.isEmpty(); + } + protected boolean readAfterWrite() { return false; diff --git a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java index e1df90e0c269..a781e10e21d2 100644 --- a/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java +++ b/test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java @@ -71,6 +71,8 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BooleanType; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.harry.model.BytesPartitionState.PrimaryKey; import org.apache.cassandra.harry.util.StringUtils; @@ -1436,7 +1438,7 @@ private List getByTokenSearch(@Nullable TokenCondition toke NavigableSet keys = partitions.navigableKeySet(); // To support the case where 2+ keys share the same token, need to create a token ref before and after the token, to make sure // the head/tail sets find the matches correctly - if (tokenLowerBound != null) + if (tokenLowerBound != null && !tokenLowerBound.token.isMinimum()) { boolean inclusive; switch (tokenLowerBound.inequality) @@ -1454,7 +1456,7 @@ private List getByTokenSearch(@Nullable TokenCondition toke // when inclusive=false the ref should be after the token, that way they are excluded keys = keys.tailSet(factory.createRef(tokenLowerBound.token, !inclusive), inclusive); } - if (tokenUpperBound != null) + if (tokenUpperBound != null && !tokenUpperBound.token.isMinimum()) { boolean inclusive; switch (tokenUpperBound.inequality) @@ -1600,6 +1602,30 @@ private static ByteBuffer eval(Expression e) return ExpressionEvaluator.evalEncoded(e); } + private BytesPartitionState.Ref processToken(Expression e) + { + BytesPartitionState.Ref ref; + if (e instanceof FunctionCall) + { + FunctionCall rhs = (FunctionCall) e; + List pkValues = rhs.arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList()); + ref = factory.createRef(new BufferClustering(pkValues.toArray(ByteBuffer[]::new))); + } + else if (e instanceof Value) + { + var value = (Value) e; + if (value.type() != LongType.instance) + throw new AssertionError("Token values only expected to be bigint but given " + value.type().asCQL3Type()); + var token = new Murmur3Partitioner.LongToken(LongType.instance.compose(value.valueEncoded())); + ref = factory.createRef(token, true); // should this be false? + } + else + { + throw new UnsupportedOperationException(e.getClass().toString()); + } + return ref; + } + private static class Row { private static final Row EMPTY = new Row(ImmutableUniqueList.empty(), ByteBufferUtil.EMPTY_ARRAY); @@ -1727,7 +1753,7 @@ private void maybeNormalizeTokenBounds() if (tokenLowerBound != null && tokenUpperBound != null) { int rc = tokenLowerBound.token.compareTo(tokenUpperBound.token); - if (rc > 0) + if (rc > 0 && !tokenUpperBound.token.isMinimum()) { // where token > 10 and < 0.... nothing matches that! unmatchable = true; @@ -1783,9 +1809,7 @@ else if (w.lhs instanceof FunctionCall) switch (fn.name()) { case "token": - FunctionCall rhs = (FunctionCall) w.rhs; - List pkValues = rhs.arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList()); - BytesPartitionState.Ref ref = factory.createRef(new BufferClustering(pkValues.toArray(ByteBuffer[]::new))); + BytesPartitionState.Ref ref = processToken(w.rhs); switch (w.kind) { case EQUAL: @@ -1881,17 +1905,14 @@ else if (between.ref instanceof FunctionCall) { case "token": // if the ref is a token, the only valid start/end are also token - List start = ((FunctionCall) between.start).arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList()); - Token startToken = factory.createRef(new BufferClustering(start.toArray(ByteBuffer[]::new))).token; - - List end = ((FunctionCall) between.end).arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList()); - Token endToken = factory.createRef(new BufferClustering(end.toArray(ByteBuffer[]::new))).token; + Token startToken = processToken(between.start).token; + Token endToken = processToken(between.end).token; if (startToken.equals(endToken)) { token = startToken; } - else if (startToken.compareTo(endToken) > 0) + else if (startToken.compareTo(endToken) > 0 && !endToken.isMinimum()) { // start is larger than end... no matches unmatchable = true; diff --git a/test/unit/org/apache/cassandra/cql3/SimpleQueryTest.java b/test/unit/org/apache/cassandra/cql3/SimpleQueryTest.java index 0c89f9b599cb..fe160f0af6ad 100644 --- a/test/unit/org/apache/cassandra/cql3/SimpleQueryTest.java +++ b/test/unit/org/apache/cassandra/cql3/SimpleQueryTest.java @@ -559,5 +559,17 @@ public void testSStableTimestampOrdering() throws Throwable execute("DELETE FROM %s USING TIMESTAMP 6 WHERE k1 = 1"); assertRows(execute("SELECT * FROM %s WHERE k1=1"), row(1, 1, 2)); - } + } + + @Test + public void testTokenRestriction() + { + createTable("CREATE TABLE %s (id int primary key)"); + for (int i = 0; i < 10; i++) + execute("INSERT INTO %s (id) values (?)", i); + + assertRows(execute("SELECT * FROM %s where token(id) > 0 AND token(id) < " + Long.MIN_VALUE), row(7), row(6), row(9), row(3)); + assertRows(execute("SELECT * FROM %s where token(id) > 0 AND token(id) <= " + Long.MIN_VALUE), row(7), row(6), row(9), row(3)); + assertRows(execute("SELECT * FROM %s where token(id) BETWEEN 0 AND " + Long.MIN_VALUE), row(7), row(6), row(9), row(3)); + } } diff --git a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringElementsTest.java b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringElementsTest.java index 60e2508be20b..8ad94d4638c9 100644 --- a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringElementsTest.java +++ b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringElementsTest.java @@ -31,6 +31,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.ReversedType; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.harry.util.ByteUtils; import org.apache.cassandra.schema.ColumnMetadata; @@ -187,7 +188,7 @@ public void testAtMostWithOneColumn() ClusteringElements four = elements(type, 4); ClusteringElements six = elements(type, 6); - RangeSet rangeSet = ClusteringElements.atMost(four); + RangeSet rangeSet = ClusteringElements.atMost(four, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(one)); assertTrue(rangeSet.contains(four)); assertFalse(rangeSet.contains(six)); @@ -208,7 +209,7 @@ public void testAtMostWithTwoColumns() ClusteringElements oneFive = elements(columns, 1, 5); ClusteringElements twoFive = elements(columns, 2, 5); - RangeSet rangeSet = ClusteringElements.atMost(oneThree); + RangeSet rangeSet = ClusteringElements.atMost(oneThree, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(zeroZero)); assertTrue(rangeSet.contains(oneZero)); @@ -258,7 +259,7 @@ public void testAtMostWithThreeColumns() ClusteringElements oneFiveOne = elements(columns, 1, 5, 1); ClusteringElements twoFiveFive = elements(columns, 2, 5, 5); - RangeSet rangeSet = ClusteringElements.atMost(oneThreeOne); + RangeSet rangeSet = ClusteringElements.atMost(oneThreeOne, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(zeroZeroZero)); assertTrue(rangeSet.contains(oneZeroOne)); @@ -279,7 +280,7 @@ public void testLessThanWithOneColumn() ClusteringElements four = elements(column, 4); ClusteringElements six = elements(column, 6); - RangeSet rangeSet = ClusteringElements.lessThan(four); + RangeSet rangeSet = ClusteringElements.lessThan(four, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(one)); assertFalse(rangeSet.contains(four)); assertFalse(rangeSet.contains(six)); @@ -300,7 +301,7 @@ public void testLessThanWithTwoColumns() ClusteringElements oneFive = elements(columns, 1, 5); ClusteringElements twoFive = elements(columns, 2, 5); - RangeSet rangeSet = ClusteringElements.lessThan(oneThree); + RangeSet rangeSet = ClusteringElements.lessThan(oneThree, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(zeroZero)); assertTrue(rangeSet.contains(oneZero)); @@ -351,7 +352,7 @@ public void testLessThanWithThreeColumns() ClusteringElements oneFiveOne = elements(columns, 1, 5, 1); ClusteringElements twoFiveFive = elements(columns, 2, 5, 5); - RangeSet rangeSet = ClusteringElements.lessThan(oneThreeOne); + RangeSet rangeSet = ClusteringElements.lessThan(oneThreeOne, Murmur3Partitioner.instance); assertTrue(rangeSet.contains(zeroZeroZero)); assertTrue(rangeSet.contains(oneZeroOne)); @@ -372,7 +373,7 @@ public void testAtLeastWithOneColumn() ClusteringElements four = elements(column, 4); ClusteringElements six = elements(column, 6); - RangeSet rangeSet = ClusteringElements.atLeast(four); + RangeSet rangeSet = ClusteringElements.atLeast(four, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(one)); assertTrue(rangeSet.contains(four)); assertTrue(rangeSet.contains(six)); @@ -393,7 +394,7 @@ public void testAtLeastWithTwoColumns() ClusteringElements oneFive = elements(columns, 1, 5); ClusteringElements twoFive = elements(columns, 2, 5); - RangeSet rangeSet = ClusteringElements.atLeast(oneThree); + RangeSet rangeSet = ClusteringElements.atLeast(oneThree, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(zeroZero)); assertFalse(rangeSet.contains(oneZero)); @@ -444,7 +445,7 @@ public void testAtLeastWithThreeColumns() ClusteringElements oneFiveOne = elements(columns, 1, 5, 1); ClusteringElements twoFiveFive = elements(columns, 2, 5, 5); - RangeSet rangeSet = ClusteringElements.atLeast(oneThreeOne); + RangeSet rangeSet = ClusteringElements.atLeast(oneThreeOne, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(zeroZeroZero)); assertFalse(rangeSet.contains(oneZeroOne)); @@ -465,7 +466,7 @@ public void testGreaterThanWithOneColumn() ClusteringElements four = elements(column, 4); ClusteringElements six = elements(column, 6); - RangeSet rangeSet = ClusteringElements.greaterThan(four); + RangeSet rangeSet = ClusteringElements.greaterThan(four, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(one)); assertFalse(rangeSet.contains(four)); assertTrue(rangeSet.contains(six)); @@ -486,7 +487,7 @@ public void testGreaterThanWithTwoColumns() ClusteringElements oneFive = elements(columns, 1, 5); ClusteringElements twoFive = elements(columns, 2, 5); - RangeSet rangeSet = ClusteringElements.greaterThan(oneThree); + RangeSet rangeSet = ClusteringElements.greaterThan(oneThree, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(zeroZero)); assertFalse(rangeSet.contains(oneZero)); @@ -537,7 +538,7 @@ public void testGreaterThanWithThreeColumns() ClusteringElements oneFiveOne = elements(columns, 1, 5, 1); ClusteringElements twoFiveFive = elements(columns, 2, 5, 5); - RangeSet rangeSet = ClusteringElements.greaterThan(oneThreeOne); + RangeSet rangeSet = ClusteringElements.greaterThan(oneThreeOne, Murmur3Partitioner.instance); assertFalse(rangeSet.contains(zeroZeroZero)); assertFalse(rangeSet.contains(oneZeroOne)); @@ -662,7 +663,7 @@ private void assertUnsupported(String expectedMsg, Runnable r) private static ClusteringElements elements(ColumnMetadata column, int value) { - return ClusteringElements.of(column, bytes(value)); + return ClusteringElements.of(column, bytes(value), false); } private static ClusteringElements elements(List columns, int... values) From f734983287f9713a5eb76cdb5831f8319f5aab10 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 28 Apr 2025 12:33:02 -0700 Subject: [PATCH 289/340] Get accord compiling on JDK 21 patch by Alex Petrov, David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-20608 --- modules/accord | 2 +- .../apache/cassandra/service/accord/AccordJournal.java | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/modules/accord b/modules/accord index ba151600b1f8..3825403cc50e 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit ba151600b1f8f6a493f585810ac14fe35371c762 +Subproject commit 3825403cc50ef7897d5dfb4cbdca5efbc432e8ee diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 66bb8bdffa2c..09477015f3c3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -66,7 +66,6 @@ import org.apache.cassandra.journal.SegmentCompactor; import org.apache.cassandra.journal.StaticSegment; import org.apache.cassandra.journal.ValueSerializer; -import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.FlyweightImage; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import org.apache.cassandra.service.accord.JournalKey.JournalKeySupport; @@ -96,12 +95,6 @@ public class AccordJournal implements accord.api.Journal, RangeSearcher.Supplier, Shutdownable { - static - { - // make noise early if we forget to update our version mappings - Invariants.require(MessagingService.current_version == MessagingService.VERSION_51, "Expected current version to be %d but given %d", MessagingService.VERSION_51, MessagingService.current_version); - } - static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[JournalKeySupport.TOTAL_SIZE]); @VisibleForTesting @@ -855,4 +848,4 @@ private static void skip(TxnId txnId, Field field, DataInputPlus in, Version use } } } -} \ No newline at end of file +} From 79c59347befe88a4f34b09dbeadd414125cf1e24 Mon Sep 17 00:00:00 2001 From: Jordan West Date: Sun, 13 Apr 2025 12:26:55 -0700 Subject: [PATCH 290/340] Don't allocate in ThreadLocalReadAheadBuffer#close() Patch by Jordan West; reviewed by Benedict Elliot Smith for CASSANDRA-20551 --- CHANGES.txt | 1 + .../io/util/ThreadLocalReadAheadBuffer.java | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5ec727e79ffa..1791454d1772 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551) * Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column (CASSANDRA-20566) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * SAI marks an index as non-empty when a partial partition/row modifications is flushed due to repair (CASSANDRA-20567) diff --git a/src/java/org/apache/cassandra/io/util/ThreadLocalReadAheadBuffer.java b/src/java/org/apache/cassandra/io/util/ThreadLocalReadAheadBuffer.java index 824acaa8d88f..bc92407befaf 100644 --- a/src/java/org/apache/cassandra/io/util/ThreadLocalReadAheadBuffer.java +++ b/src/java/org/apache/cassandra/io/util/ThreadLocalReadAheadBuffer.java @@ -131,18 +131,21 @@ public int read(ByteBuffer dest, int length) public void clear(boolean deallocate) { - Block block = getBlock(); + // avoid calling block() here to reduce unintended allocations + Block block = blockMap.get().get(channel.filePath()); + if (block == null) + return; + block.index = -1; + if (block.buffer == null) + return; ByteBuffer blockBuffer = block.buffer; - if (blockBuffer != null) + blockBuffer.clear(); + if (deallocate) { - blockBuffer.clear(); - if (deallocate) - { - FileUtils.clean(blockBuffer); - block.buffer = null; - } + FileUtils.clean(blockBuffer); + block.buffer = null; } } From 58d1cc9b1e23c651513390c4ab50da6f5ae104d8 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Thu, 24 Apr 2025 12:04:22 -0700 Subject: [PATCH 291/340] Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables --- CHANGES.txt | 4 ++++ conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- .../cassandra/pages/managing/operating/auto_repair.adoc | 6 +++--- .../cassandra/repair/autorepair/AutoRepairConfig.java | 2 +- .../cassandra/repair/autorepair/AutoRepairConfigTest.java | 4 ++-- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d2292e1f9b3e..b07586d86667 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,9 +1,12 @@ 5.1 + * Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables & revert three lines removed from CHANGES.txt due to a merge mistake (CASSANDRA-20586) * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557) * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-20429) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan (CASSANDRA-20538) * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918) + * Implement appender of slow queries to system_views.slow_queries table (CASSANDRA-13001) + * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092) * Improve performance when getting writePlacementsAllSettled from ClusterMetadata (CASSANDRA-20526) @@ -243,6 +246,7 @@ Merged from 4.1: * Enforce CQL message size limit on multiframe messages (CASSANDRA-20052) * Fix race condition in DecayingEstimatedHistogramReservoir during rescale (CASSANDRA-19365) Merged from 4.0: + * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) * Update OWASP dependency checker to version 12.1.0 (CASSANDRA-20501) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 88b0c7271806..a7efe5735a0d 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2810,7 +2810,7 @@ storage_compatibility_mode: NONE # force_repair_new_node: false # # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good # # tables. -# sstable_upper_threshold: 10000 +# sstable_upper_threshold: 50000 # # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the # # next table. # table_max_repair_time: 6h diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 6dfc89a975b9..69b4c647f731 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2499,7 +2499,7 @@ storage_compatibility_mode: NONE # force_repair_new_node: false # # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good # # tables. -# sstable_upper_threshold: 10000 +# sstable_upper_threshold: 50000 # # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the # # next table. # table_max_repair_time: 6h diff --git a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc index 3928d0afccef..d5701895d74f 100644 --- a/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc +++ b/doc/modules/cassandra/pages/managing/operating/auto_repair.adoc @@ -207,7 +207,7 @@ node. Defaults to true and is only evaluated when allow_parallel_replica_repair immediately after a restart. | repair_session_timeout | 3h | Timeout for retrying stuck repair sessions. | force_repair_new_node | false | Force immediate repair on new nodes after they join the ring. -| sstable_upper_threshold | 10000 | Threshold to skip repairing tables with too many SSTables. +| sstable_upper_threshold | 50000 | Threshold to skip repairing tables with too many SSTables. | table_max_repair_time | 6h | Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the next table. | ignore_dcs | [] | Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify @@ -365,7 +365,7 @@ configuration for repair_type: full min_repair_interval: 24h repair_by_keyspace: true number_of_repair_threads: 1 - sstable_upper_threshold: 10000 + sstable_upper_threshold: 50000 table_max_repair_time: 6h ignore_dcs: [] repair_primary_token_range_only: true @@ -387,7 +387,7 @@ configuration for repair_type: incremental min_repair_interval: 1h repair_by_keyspace: true number_of_repair_threads: 1 - sstable_upper_threshold: 10000 + sstable_upper_threshold: 50000 table_max_repair_time: 6h ignore_dcs: [] repair_primary_token_range_only: true diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java index 9d842888d585..045e6d21a8c8 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairConfig.java @@ -452,7 +452,7 @@ protected static Options getDefaultOptions() opts.parallel_repair_percentage = 3; opts.allow_parallel_replica_repair = false; opts.allow_parallel_replica_repair_across_schedules = true; - opts.sstable_upper_threshold = 10000; + opts.sstable_upper_threshold = 50000; opts.ignore_dcs = new HashSet<>(); opts.repair_primary_token_range_only = true; opts.force_repair_new_node = false; diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java index 0cf51ca40884..93ef906a287d 100644 --- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairConfigTest.java @@ -474,7 +474,7 @@ public void testDefaultOptions() assertEquals(Integer.valueOf(1), options.number_of_repair_threads); assertEquals(Integer.valueOf(3), options.parallel_repair_count); assertEquals(Integer.valueOf(3), options.parallel_repair_percentage); - assertEquals(Integer.valueOf(10000), options.sstable_upper_threshold); + assertEquals(Integer.valueOf(50000), options.sstable_upper_threshold); assertEquals(new HashSet<>(), options.ignore_dcs); assertTrue(options.repair_primary_token_range_only); assertFalse(options.force_repair_new_node); @@ -495,7 +495,7 @@ public void testGlobalOptions() assertEquals(Integer.valueOf(1), config.global_settings.number_of_repair_threads); assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_count); assertEquals(Integer.valueOf(3), config.global_settings.parallel_repair_percentage); - assertEquals(Integer.valueOf(10000), config.global_settings.sstable_upper_threshold); + assertEquals(Integer.valueOf(50000), config.global_settings.sstable_upper_threshold); assertEquals(new HashSet<>(), config.global_settings.ignore_dcs); assertTrue(config.global_settings.repair_primary_token_range_only); assertFalse(config.global_settings.force_repair_new_node); From 2aeb6a02905a39fb5b26c93dccd53da67d0ba24d Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Tue, 29 Apr 2025 17:47:56 -0400 Subject: [PATCH 292/340] CASSANDRA-20609 - CQLSSTableWriter should support setting the format (5.0) --- CHANGES.txt | 1 + .../io/sstable/CQLSSTableWriter.java | 12 ++++++ .../io/sstable/CQLSSTableWriterTest.java | 39 +++++++++++++++++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1791454d1772..08e0ecfa162b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609) * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551) * Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column (CASSANDRA-20566) * Switch memtable-related off-heap objects to Native Endian and Memory to Little Endian (CASSANDRA-20190) diff --git a/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java index 05b23c49f213..64834792fc96 100644 --- a/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/CQLSSTableWriter.java @@ -636,6 +636,18 @@ public Builder withSSTableProducedListener(Consumer> s return this; } + /** + * Specifies the SSTable format this CQLSSTableWriter instance should use for writing. + * + * @param format The format to use + * @return this builder + */ + public Builder withFormat(SSTableFormat format) + { + this.format = format; + return this; + } + /** * Whether the produced sstable should be open or not. * By default, the writer does not open the produced sstables diff --git a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java index d358185732f7..cc41b0ea2af8 100644 --- a/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/CQLSSTableWriterTest.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -34,6 +36,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiPredicate; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import com.google.common.collect.ImmutableList; @@ -61,8 +64,10 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.index.sai.disk.format.IndexDescriptor; import org.apache.cassandra.index.sai.utils.IndexIdentifier; +import org.apache.cassandra.io.sstable.format.SSTableFormat; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.sstable.format.big.BigFormat; +import org.apache.cassandra.io.sstable.format.bti.BtiFormat; import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.PathUtils; import org.apache.cassandra.schema.Schema; @@ -102,9 +107,22 @@ public void perTestSetup() throws IOException } @Test - public void testUnsortedWriter() throws Exception + public void testUnsortedWriterBig() throws Exception { - try (AutoCloseable switcher = Util.switchPartitioner(ByteOrderedPartitioner.instance)) + BigFormat format = BigFormat.getInstance(); + testWritingSstableWithFormat(format); + } + + @Test + public void testUnsortedWriterBti() throws Exception + { + SSTableFormat btiFormat = new BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()); + testWritingSstableWithFormat(btiFormat); + } + + private void testWritingSstableWithFormat(SSTableFormat format) throws Exception + { + try (AutoCloseable ignored = Util.switchPartitioner(ByteOrderedPartitioner.instance)) { String schema = "CREATE TABLE " + qualifiedTable + " (" + " k int PRIMARY KEY," @@ -115,6 +133,7 @@ public void testUnsortedWriter() throws Exception CQLSSTableWriter writer = CQLSSTableWriter.builder() .inDirectory(dataDir) .forTable(schema) + .withFormat(format) .using(insert).build(); writer.addRow(0, "test1", 24); @@ -124,6 +143,7 @@ public void testUnsortedWriter() throws Exception writer.close(); + validateFilesAreInFormat(format); loadSSTables(dataDir, keyspace); UntypedResultSet rs = QueryProcessor.executeInternal("SELECT * FROM " + qualifiedTable); @@ -140,7 +160,6 @@ public void testUnsortedWriter() throws Exception row = iter.next(); assertEquals(1, row.getInt("k")); assertEquals("test2", row.getString("v1")); - //assertFalse(row.has("v2")); assertEquals(44, row.getInt("v2")); row = iter.next(); @@ -150,11 +169,23 @@ public void testUnsortedWriter() throws Exception row = iter.next(); assertEquals(3, row.getInt("k")); - assertEquals(null, row.getBytes("v1")); // Using getBytes because we know it won't NPE + assertFalse(row.has("v1")); assertEquals(12, row.getInt("v2")); } } + private void validateFilesAreInFormat(SSTableFormat format) throws IOException + { + try (Stream dataFilePaths = Files.list(dataDir.toPath()).filter(p -> p.toString().endsWith("Data.db"))) + { + dataFilePaths.forEach(dataFilePath -> { + File dataFile = new File(dataFilePath.toFile()); + Descriptor descriptor = Descriptor.fromFile(dataFile); + assertEquals(format, descriptor.version.format); + }); + } + } + @Test public void testForbidCounterUpdates() throws Exception { From f2dd1adccd4ce34251b98f010929f431a9307616 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 2 May 2025 11:27:50 -0700 Subject: [PATCH 293/340] Accords gradle build fails due to ephemeral issues with rat and checkstyle patch by David Capwell; reviewed by Michael Semb Wever for CASSANDRA-20590 --- .build/build-accord.xml | 8 +++++++- .build/build-rat.xml | 1 + .build/rat-include-accord.sh | 25 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100755 .build/rat-include-accord.sh diff --git a/.build/build-accord.xml b/.build/build-accord.xml index 6fc716d2d0c2..0d16197c6bea 100644 --- a/.build/build-accord.xml +++ b/.build/build-accord.xml @@ -27,10 +27,16 @@ - + + + + + + + diff --git a/.build/build-rat.xml b/.build/build-rat.xml index 9333d2b8da92..32bf3d736909 100644 --- a/.build/build-rat.xml +++ b/.build/build-rat.xml @@ -30,6 +30,7 @@ + diff --git a/.build/rat-include-accord.sh b/.build/rat-include-accord.sh new file mode 100755 index 000000000000..3c4945c5e429 --- /dev/null +++ b/.build/rat-include-accord.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#set -o xtrace +set -o errexit +set -o pipefail +set -o nounset + +home="$(cd "$(dirname "$0")"/.. > /dev/null; pwd)" + +git --git-dir="$home"/modules/accord/.git ls-tree -r HEAD --name-only | sed 's;^;modules/accord/;' From 80224a97670095e710460f79879462732f1505ae Mon Sep 17 00:00:00 2001 From: Jon Haddad Date: Thu, 1 May 2025 05:58:32 -0700 Subject: [PATCH 294/340] Improved UCS docs with migration advice and example workloads. Patch by Jon Haddad for CASSANDRA-19389 --- .../managing/operating/compaction/ucs.adoc | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc index 4798f0b6fd6e..a41c979ef9da 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc @@ -18,7 +18,86 @@ Thus, a compaction is triggered when more than a given number of SSTables are pr * *size* can be replaced by *density*, allowing SSTables to be split at arbitrary points when the output of a compaction is written, while still producing a leveled hierarchy. Density is defined as the size of an SSTable divided by the width of the token range it covers. -Let's look at the first concept in more detail. +== Migration from Other Strategies + +The Unified Compaction Strategy (UCS) can be configured to behave like other compaction strategies, making migration straightforward. It also provides advanced options for optimizing specific workload patterns. + +=== Examples + +Below are examples for migrating from commonly used strategies. UCS can maintain similar behavior while providing additional benefits such as parallel compaction and the ability to change parameters without requiring full recompaction. + +[cols="1,3a", options="header"] +|=== +| Source Strategy | Migration Command +| Migrating From LCS | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'L10'}; +---- +| Migration from SizeTieredCompactionStrategy | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo WITH +COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T4'}; +---- +|=== + +== Use Case Specific Configurations + +The following configurations are optimized for common workload patterns. The parameters can be adjusted based on your specific requirements. + +These provide a good starting point for common workloads, but you may find you want to tune additional parameters based on your workload characteristics. +Additional details to understand this are in the following section. + +[cols="1,3a,3", options="header"] +|=== +| Use Case | Configuration Example | Explanation +| Read Heavy Key Value | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'L10', + 'target_sstable_size': + '256MiB', + 'base_shard_count': '8' +}; +---- +| Optimizes for read-intensive workloads with a leveled approach similar to LCS. The smaller target SSTable size and higher shard count improve read performance by minimizing the number of SSTables that must be consulted for a query. +| Write Heavy | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T4', + 'target_sstable_size': '1GiB', + 'base_shard_count': '4' +}; +---- +| Optimizes for write-intensive workloads using a tiered approach similar to STCS. The larger target SSTable size reduces write amplification by requiring fewer compactions, while the lower shard count reduces the overhead of managing too many SSTables. +| Time Series | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T8', + 'target_sstable_size': '512MiB', + 'base_shard_count': '8', + 'expired_sstable_check_frequency_seconds': '300' +}; +---- +| Suitable for time-series data with TTLs. The higher tiered scaling parameter (T8) improves write throughput, while the frequent expired SSTable check helps reclaim space from expired data more quickly. The higher shard count allows for greater parallelism in compaction operations. + +Using `scaling_parameters:T8` will result in more SSTables per read. Consider using T4 for time series use cases where lower read latency is desired, and you can afford to perform additional compaction. +|=== + == Read and write amplification From 4971b4c42c79c4b95a4352baf36c912e276beca7 Mon Sep 17 00:00:00 2001 From: Jon Haddad Date: Thu, 1 May 2025 05:58:32 -0700 Subject: [PATCH 295/340] Improved UCS docs with migration advice and example workloads. Patch by Jon Haddad for CASSANDRA-19389 --- .../managing/operating/compaction/ucs.adoc | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc b/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc index 4798f0b6fd6e..a41c979ef9da 100644 --- a/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc +++ b/doc/modules/cassandra/pages/managing/operating/compaction/ucs.adoc @@ -18,7 +18,86 @@ Thus, a compaction is triggered when more than a given number of SSTables are pr * *size* can be replaced by *density*, allowing SSTables to be split at arbitrary points when the output of a compaction is written, while still producing a leveled hierarchy. Density is defined as the size of an SSTable divided by the width of the token range it covers. -Let's look at the first concept in more detail. +== Migration from Other Strategies + +The Unified Compaction Strategy (UCS) can be configured to behave like other compaction strategies, making migration straightforward. It also provides advanced options for optimizing specific workload patterns. + +=== Examples + +Below are examples for migrating from commonly used strategies. UCS can maintain similar behavior while providing additional benefits such as parallel compaction and the ability to change parameters without requiring full recompaction. + +[cols="1,3a", options="header"] +|=== +| Source Strategy | Migration Command +| Migrating From LCS | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'L10'}; +---- +| Migration from SizeTieredCompactionStrategy | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo WITH +COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T4'}; +---- +|=== + +== Use Case Specific Configurations + +The following configurations are optimized for common workload patterns. The parameters can be adjusted based on your specific requirements. + +These provide a good starting point for common workloads, but you may find you want to tune additional parameters based on your workload characteristics. +Additional details to understand this are in the following section. + +[cols="1,3a,3", options="header"] +|=== +| Use Case | Configuration Example | Explanation +| Read Heavy Key Value | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'L10', + 'target_sstable_size': + '256MiB', + 'base_shard_count': '8' +}; +---- +| Optimizes for read-intensive workloads with a leveled approach similar to LCS. The smaller target SSTable size and higher shard count improve read performance by minimizing the number of SSTables that must be consulted for a query. +| Write Heavy | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo +WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T4', + 'target_sstable_size': '1GiB', + 'base_shard_count': '4' +}; +---- +| Optimizes for write-intensive workloads using a tiered approach similar to STCS. The larger target SSTable size reduces write amplification by requiring fewer compactions, while the lower shard count reduces the overhead of managing too many SSTables. +| Time Series | +[source,plaintext] +---- +ALTER TABLE mykeyspace.foo WITH COMPACTION = { + 'class': 'UnifiedCompactionStrategy', + 'scaling_parameters': 'T8', + 'target_sstable_size': '512MiB', + 'base_shard_count': '8', + 'expired_sstable_check_frequency_seconds': '300' +}; +---- +| Suitable for time-series data with TTLs. The higher tiered scaling parameter (T8) improves write throughput, while the frequent expired SSTable check helps reclaim space from expired data more quickly. The higher shard count allows for greater parallelism in compaction operations. + +Using `scaling_parameters:T8` will result in more SSTables per read. Consider using T4 for time series use cases where lower read latency is desired, and you can afford to perform additional compaction. +|=== + == Read and write amplification From 234763f057f84b7060e9bebfc364ff6c15ce19a2 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 2 May 2025 12:51:36 -0700 Subject: [PATCH 296/340] Remove Accord CQL words from the reserved keywords list patch by David Capwell; reviewed by Caleb Rackliffe for CASSANDRA-20613 --- pylib/cqlshlib/cqlhandling.py | 6 +- src/antlr/Parser.g | 5 ++ .../cassandra/cql3/reserved_keywords.txt | 5 -- .../cassandra/cql3/ReservedKeywordsTest.java | 55 ++++++++++++++++--- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/pylib/cqlshlib/cqlhandling.py b/pylib/cqlshlib/cqlhandling.py index 76a516af8038..cd19e39fda6b 100644 --- a/pylib/cqlshlib/cqlhandling.py +++ b/pylib/cqlshlib/cqlhandling.py @@ -25,10 +25,10 @@ Hint = pylexotron.Hint cql_keywords_reserved = {'add', 'allow', 'alter', 'and', 'apply', 'asc', 'authorize', 'batch', 'begin', 'by', - 'columnfamily', 'create', 'commit', 'delete', 'desc', 'describe', 'drop', 'end', 'entries', 'execute', 'from', - 'full', 'grant', 'if', 'in', 'index', 'infinity', 'insert', 'into', 'is', 'keyspace', 'let', 'limit', + 'columnfamily', 'create', 'delete', 'desc', 'describe', 'drop', 'entries', 'execute', 'from', + 'full', 'grant', 'if', 'in', 'index', 'infinity', 'insert', 'into', 'is', 'keyspace', 'limit', 'materialized', 'modify', 'nan', 'norecursive', 'not', 'null', 'of', 'on', 'or', 'order', - 'primary', 'rename', 'revoke', 'schema', 'select', 'set', 'table', 'then', 'to', 'token', 'transaction', 'truncate', + 'primary', 'rename', 'revoke', 'schema', 'select', 'set', 'table', 'to', 'token', 'truncate', 'unlogged', 'update', 'use', 'using', 'view', 'where', 'with'} """ Set of reserved keywords in CQL. diff --git a/src/antlr/Parser.g b/src/antlr/Parser.g index b3fb490dcaf0..c1c79e7448d1 100644 --- a/src/antlr/Parser.g +++ b/src/antlr/Parser.g @@ -2310,5 +2310,10 @@ basic_unreserved_keyword returns [String str] | K_BETWEEN | K_CHECK | K_INDEXES + | K_COMMIT + | K_END + | K_LET + | K_THEN + | K_TRANSACTION ) { $str = $k.text; } ; diff --git a/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt b/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt index 6403600acc89..8a1d2987f9d2 100644 --- a/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt +++ b/src/resources/org/apache/cassandra/cql3/reserved_keywords.txt @@ -9,13 +9,11 @@ BATCH BEGIN BY COLUMNFAMILY -COMMIT CREATE DELETE DESC DESCRIBE DROP -END ENTRIES EXECUTE FROM @@ -29,7 +27,6 @@ INSERT INTO IS KEYSPACE -LET LIMIT MATERIALIZED MODIFY @@ -48,10 +45,8 @@ SCHEMA SELECT SET TABLE -THEN TO TOKEN -TRANSACTION TRUNCATE UNLOGGED UPDATE diff --git a/test/unit/org/apache/cassandra/cql3/ReservedKeywordsTest.java b/test/unit/org/apache/cassandra/cql3/ReservedKeywordsTest.java index eb860ebda02f..fc891bfba4a3 100644 --- a/test/unit/org/apache/cassandra/cql3/ReservedKeywordsTest.java +++ b/test/unit/org/apache/cassandra/cql3/ReservedKeywordsTest.java @@ -18,10 +18,14 @@ package org.apache.cassandra.cql3; +import java.lang.reflect.Modifier; + import org.junit.Test; import org.junit.Assert; + import org.apache.cassandra.exceptions.SyntaxException; +import org.assertj.core.api.SoftAssertions; public class ReservedKeywordsTest { @@ -30,14 +34,51 @@ public void testReservedWordsForColumns() { for (String reservedWord : ReservedKeywords.reservedKeywords) { - try - { - QueryProcessor.parseStatement(String.format("ALTER TABLE ks.t ADD %s TEXT", reservedWord)); + if (isAllowed(reservedWord)) Assert.fail(String.format("Reserved keyword %s should not have parsed", reservedWord)); - } - catch (SyntaxException ignore) - { - } + } + } + + @Test + public void parserAndTextFileMatch() + { + // If this test starts to fail that means that the lexer added a new keyword, and this keyword was not updated + // to be unreserved. + // + // To mark a keyword as unreserved, open "Parser.g" and search for + // basic_unreserved_keyword returns [String str] + // or + // unreserved_keyword returns [String str] + // Add your keyword there and rebuild the jar (to generate the parser). + // + // If it is desired to make this keyword reserved, then you must first go to the mailing list and request a vote + // on this change, if that vote passes then you can update "reserved_keywords.txt" (and pylib/cqlshlib/cqlhandling.py::cql_keywords_reserved). + // Never update "reserved_keywords.txt" without a vote on the mailing list! + SoftAssertions asserts = new SoftAssertions(); + for (var f : Cql_Lexer.class.getDeclaredFields()) + { + if (!Modifier.isStatic(f.getModifiers())) continue; + if (!f.getName().startsWith("K_")) continue; + String name = f.getName(); + String keyword = name.replaceFirst("K_", ""); + + asserts.assertThat(ReservedKeywords.isReserved(keyword)) + .describedAs(keyword) + .isEqualTo(!isAllowed(keyword)); + } + asserts.assertAll(); + } + + private static boolean isAllowed(String keyword) + { + try + { + QueryProcessor.parseStatement(String.format("ALTER TABLE ks.t ADD %s TEXT", keyword)); + return true; + } + catch (SyntaxException ignore) + { + return false; } } } From 4f25e58f267e68df945e0ba47b2a684c1fb33a85 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 2 May 2025 14:23:17 -0700 Subject: [PATCH 297/340] zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space patch by David Capwell; reviewed by Yifan Cai for CASSANDRA-20577 --- CHANGES.txt | 1 + .../io/sstable/SSTableZeroCopyWriter.java | 47 ++++++++++++++----- .../cassandra/io/util/SequentialWriter.java | 9 +++- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 08e0ecfa162b..d30a383b7f3d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space (CASSANDRA-20577) * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609) * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551) * Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column (CASSANDRA-20566) diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java index 3bf21f1155ec..46a490974e3e 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java @@ -38,9 +38,12 @@ import org.apache.cassandra.io.FSWriteError; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.SequentialWriter; +import org.apache.cassandra.io.util.SequentialWriterOption; import org.apache.cassandra.net.AsyncStreamingInputPlus; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.utils.ByteBufferUtil; import static java.lang.String.format; import static org.apache.cassandra.utils.FBUtilities.prettyPrintMemory; @@ -50,7 +53,7 @@ public class SSTableZeroCopyWriter extends SSTable implements SSTableMultiWriter private static final Logger logger = LoggerFactory.getLogger(SSTableZeroCopyWriter.class); private volatile SSTableReader finalReader; - private final Map componentWriters; // indexed by component name + private final Map componentWriters; // indexed by component name public SSTableZeroCopyWriter(Builder builder, LifecycleNewTracker lifecycleNewTracker, @@ -89,12 +92,12 @@ public AbstractBounds getBounds() throw new UnsupportedOperationException(); } - private SequentialWriter makeWriter(Descriptor descriptor, Component component) + private ZeroCopySequentialWriter makeWriter(Descriptor descriptor, Component component) { - return new SequentialWriter(descriptor.fileFor(component), ioOptions.writerOptions, false); + return new ZeroCopySequentialWriter(descriptor.fileFor(component), ioOptions.writerOptions, false); } - private void write(DataInputPlus in, long size, SequentialWriter out) throws FSWriteError + private void write(DataInputPlus in, long size, ZeroCopySequentialWriter out) throws FSWriteError { final int BUFFER_SIZE = 1 << 20; long bytesRead = 0; @@ -128,7 +131,7 @@ public Collection finish(boolean openResult) { setOpenResult(openResult); - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.finish(); return finished(); @@ -170,7 +173,7 @@ public TableId getTableId() @Override public Throwable commit(Throwable accumulate) { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) accumulate = writer.commit(accumulate); return accumulate; } @@ -178,7 +181,7 @@ public Throwable commit(Throwable accumulate) @Override public Throwable abort(Throwable accumulate) { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) accumulate = writer.abort(accumulate); return accumulate; } @@ -186,29 +189,30 @@ public Throwable abort(Throwable accumulate) @Override public void prepareToCommit() { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.prepareToCommit(); } @Override public void close() { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.close(); } public void writeComponent(Component component, DataInputPlus in, long size) throws ClosedChannelException { - SequentialWriter writer = componentWriters.get(component.name); + ZeroCopySequentialWriter writer = componentWriters.get(component.name); logger.info("Writing component {} to {} length {}", component, writer.getPath(), prettyPrintMemory(size)); if (in instanceof AsyncStreamingInputPlus) write((AsyncStreamingInputPlus) in, size, writer); else + // this code path is not valid for production and only exists to simplify unit tests write(in, size, writer); } - private void write(AsyncStreamingInputPlus in, long size, SequentialWriter writer) throws ClosedChannelException + private void write(AsyncStreamingInputPlus in, long size, ZeroCopySequentialWriter writer) throws ClosedChannelException { logger.info("Block Writing component to {} length {}", writer.getPath(), prettyPrintMemory(size)); @@ -233,4 +237,25 @@ private void write(AsyncStreamingInputPlus in, long size, SequentialWriter write throw new FSWriteError(e, writer.getPath()); } } + + private static class ZeroCopySequentialWriter extends SequentialWriter + { + private ZeroCopySequentialWriter(File file, SequentialWriterOption option, boolean strictFlushing) + { + super(file, ByteBufferUtil.EMPTY_BYTE_BUFFER, option, strictFlushing); + } + + /** + * In production, we do not expect this method to be called, as only writeDirectlyToChannel should be invoked for zero-copy. + *

      + * This method only exists for tests. + */ + @Override + public void write(byte[] b, int off, int len) throws IOException + { + if (this.buffer == ByteBufferUtil.EMPTY_BYTE_BUFFER) + this.buffer = option.allocateBuffer(); + super.write(b, off, len); + } + } } diff --git a/src/java/org/apache/cassandra/io/util/SequentialWriter.java b/src/java/org/apache/cassandra/io/util/SequentialWriter.java index 69643be98730..c3a90732eead 100644 --- a/src/java/org/apache/cassandra/io/util/SequentialWriter.java +++ b/src/java/org/apache/cassandra/io/util/SequentialWriter.java @@ -56,7 +56,7 @@ public class SequentialWriter extends BufferedDataOutputStreamPlus implements Tr // whether to do trickling fsync() to avoid sudden bursts of dirty buffer flushing by kernel causing read // latency spikes - private final SequentialWriterOption option; + protected final SequentialWriterOption option; private int bytesSinceTrickleFsync = 0; protected long lastFlushOffset; @@ -163,7 +163,12 @@ public SequentialWriter(File file, SequentialWriterOption option) */ public SequentialWriter(File file, SequentialWriterOption option, boolean strictFlushing) { - super(openChannel(file), option.allocateBuffer()); + this(file, option.allocateBuffer(), option, strictFlushing); + } + + protected SequentialWriter(File file, ByteBuffer buffer, SequentialWriterOption option, boolean strictFlushing) + { + super(openChannel(file), buffer); this.strictFlushing = strictFlushing; this.fchannel = (FileChannel)channel; From 0a4777dae41f260b5de243901e7fdef5b9440105 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 2 May 2025 17:00:20 -0700 Subject: [PATCH 298/340] zero copy streaming allocates direct memory that isnt used, but does help to fragment the memory space patch by David Capwell; reviewed by Yifan Cai for CASSANDRA-20577 --- CHANGES.txt | 1 + modules/accord | 2 +- .../cassandra/config/DatabaseDescriptor.java | 9 + .../test/cql3/MultiNodeTableWalkBase.java | 7 + .../MultiNodeTableWalkWithReadRepairTest.java | 3 - .../test/cql3/MultiNodeTokenConflictTest.java | 1 + .../test/cql3/SingleNodeTableWalkTest.java | 14 +- .../test/cql3/StatefulASTBase.java | 174 +++++++++++---- .../fuzz/topology/TopologyMixupTestBase.java | 42 +--- .../apache/cassandra/repair/FuzzTestBase.java | 63 +----- .../cassandra/repair/RepairGenerators.java | 201 ++++++++++++++++++ .../cassandra/utils/ImmutableUniqueList.java | 8 + .../cassandra/utils/LoggingCommand.java | 76 +++++++ 13 files changed, 458 insertions(+), 143 deletions(-) create mode 100644 test/unit/org/apache/cassandra/repair/RepairGenerators.java create mode 100644 test/unit/org/apache/cassandra/utils/LoggingCommand.java diff --git a/CHANGES.txt b/CHANGES.txt index 35cafede97bb..f214877478b8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -186,6 +186,7 @@ * Add the ability to disable bulk loading of SSTables (CASSANDRA-18781) * Clean up obsolete functions and simplify cql_version handling in cqlsh (CASSANDRA-18787) Merged from 5.0: + * zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space (CASSANDRA-20577) * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609) * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551) * Ensure RowFilter#isMutableIntersection() properly evaluates numeric ranges on a single column (CASSANDRA-20566) diff --git a/modules/accord b/modules/accord index 3825403cc50e..7f95490b1390 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3825403cc50ef7897d5dfb4cbdca5efbc432e8ee +Subproject commit 7f95490b1390b7fc68a4ff4ced7f161bafd8776b diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 82a26602eb05..625fe2be100f 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -5711,6 +5711,15 @@ public static void resetSSTableFormats(Iterable factories return Objects.requireNonNull(selectedSSTableFormat, "Forgot to initialize DatabaseDescriptor?"); } + @VisibleForTesting + public static void setSelectedSSTableFormat(String name) + { + SSTableFormat format = getSSTableFormats().get(name); + if (format == null) + throw new IllegalArgumentException("Unknown sstable format: " + name); + setSelectedSSTableFormat(format); + } + @VisibleForTesting public static void setSelectedSSTableFormat(SSTableFormat format) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java index d6c01834737f..126f9ec90839 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java @@ -66,6 +66,7 @@ protected Cluster createCluster() throws IOException @Override protected void clusterConfig(IInstanceConfig c) { + super.clusterConfig(c); c.set("range_request_timeout", "180s") .set("read_request_timeout", "180s") .set("write_request_timeout", "180s") @@ -100,6 +101,12 @@ protected boolean isMultiNode() return true; } + @Override + protected boolean allowRepair() + { + return hasEnoughMemtableForRepair() || hasEnoughSSTablesForRepair(); + } + @Override protected IInvokableInstance selectInstance(RandomSource rs) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java index 7727e3a76ab3..a8647668715b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithReadRepairTest.java @@ -18,13 +18,10 @@ package org.apache.cassandra.distributed.test.cql3; -import org.junit.Ignore; - import accord.utils.Property; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.service.reads.repair.ReadRepairStrategy; -@Ignore("In order to stay stable RR tests are ignored for now. Once Single node and multi node w/o RR are stable, then this test should be enabled to include RR testing") public class MultiNodeTableWalkWithReadRepairTest extends MultiNodeTableWalkBase { public MultiNodeTableWalkWithReadRepairTest() diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java index 7a6dbaa8590e..081a2006183b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTokenConflictTest.java @@ -63,6 +63,7 @@ protected TableMetadata defineTable(RandomSource rs, String ks) @Override protected void clusterConfig(IInstanceConfig c) { + super.clusterConfig(c); c.set("range_request_timeout", "180s") .set("read_request_timeout", "180s") .set("write_request_timeout", "180s") diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java index 762a2b83bca2..171ccf140cca 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java @@ -60,6 +60,7 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.test.sai.SAIUtil; +import org.apache.cassandra.utils.LoggingCommand; import org.apache.cassandra.harry.model.BytesPartitionState; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; @@ -322,7 +323,7 @@ protected List supportedIndexers() Select select = builder.build(); String annotate = cols.stream().map(symbol -> { var indexed = state.indexes.get(symbol); - return symbol.detailedName() + (indexed == null ? "" : " (indexed with " + indexed.indexDDL.indexer.name() + ")"); + return symbol.detailedName() + (indexed == null ? "" : " (indexed with " + indexed.indexDDL.indexer.name() + ')'); }).collect(Collectors.joining(", ")); return state.command(rs, select, annotate); } @@ -367,15 +368,20 @@ public void test() throws IOException .add(StatefulASTBase::insert) .add(StatefulASTBase::fullTableScan) .addIf(State::hasPartitions, this::selectExisting) - .addAllIf(State::supportTokens, b -> b.add(this::selectToken) - .add(this::selectTokenRange) - .add(StatefulASTBase::selectMinTokenRange)) + .addAllIf(State::supportTokens, + this::selectToken, + this::selectTokenRange, + StatefulASTBase::selectMinTokenRange) .addIf(State::hasEnoughMemtable, StatefulASTBase::flushTable) .addIf(State::hasEnoughSSTables, StatefulASTBase::compactTable) + .addAllIf(BaseState::allowRepair, + StatefulASTBase::incrementalRepair, + StatefulASTBase::previewRepair) .addIf(State::allowNonPartitionQuery, this::nonPartitionQuery) .addIf(State::allowNonPartitionMultiColumnQuery, this::multiColumnQuery) .addIf(State::allowPartitionQuery, this::partitionRestrictedQuery) .destroyState(State::close) + .commandsTransformer(LoggingCommand.factory()) .onSuccess(onSuccess(logger)) .build()); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java index a16b47c1d760..527a7ea65864 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/cql3/StatefulASTBase.java @@ -22,9 +22,9 @@ import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; import java.util.List; -import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; @@ -34,6 +34,7 @@ import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import org.slf4j.Logger; @@ -69,6 +70,7 @@ import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.Feature; @@ -80,6 +82,9 @@ import org.apache.cassandra.exceptions.RequestFailureReason; import org.apache.cassandra.harry.model.ASTSingleTableModel; import org.apache.cassandra.harry.util.StringUtils; +import org.apache.cassandra.repair.RepairGenerators; +import org.apache.cassandra.repair.RepairGenerators.PreviewType; +import org.apache.cassandra.repair.RepairGenerators.RepairType; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.utils.AbstractTypeGenerators; import org.apache.cassandra.utils.CassandraGenerators; @@ -118,6 +123,8 @@ public class StatefulASTBase extends TestBaseImpl .collect(Collectors.toList())); protected static final Gen FETCH_SIZE_DISTRO = Gens.mixedDistribution(new int[] {1, 10, 100, 1000, 5000}); protected static final Gen LIMIT_DISTRO = Gens.mixedDistribution(1, 1001); + protected static final Gen REPAIR_TYPE_EMPTY_MODEL_DISTRO = Gens.mixedDistribution(0, 2); + protected static final Gen REPAIR_TYPE_DISTRO = Gens.mixedDistribution(0, 3); static { @@ -145,7 +152,7 @@ protected static String nextKeyspace() protected void clusterConfig(IInstanceConfig config) { - + config.set("repair.retries.max_attempts", Integer.MAX_VALUE); } protected void clusterInitializer(ClassLoader cl, int node) @@ -185,7 +192,7 @@ protected Property.StatefulSuccess onSuccess(Logg protected static Property.Command flushTable(RandomSource rs, S state) { - return new Property.SimpleCommand<>("nodetool flush " + state.metadata.keyspace + " " + state.metadata.name, s2 -> { + return new Property.SimpleCommand<>("nodetool flush " + state.metadata.keyspace + ' ' + state.metadata.name, s2 -> { s2.cluster.forEach(i -> i.nodetoolResult("flush", s2.metadata.keyspace, s2.metadata.name).asserts().success()); s2.flush(); }); @@ -193,7 +200,7 @@ protected Property.StatefulSuccess onSuccess(Logg protected static Property.Command compactTable(RandomSource rs, S state) { - return new Property.SimpleCommand<>("nodetool compact " + state.metadata.keyspace + " " + state.metadata.name, s2 -> { + return new Property.SimpleCommand<>("nodetool compact " + state.metadata.keyspace + ' ' + state.metadata.name, s2 -> { state.cluster.forEach(i -> i.nodetoolResult("compact", s2.metadata.keyspace, s2.metadata.name).asserts().success()); s2.compact(); }); @@ -211,6 +218,59 @@ protected Property.StatefulSuccess onSuccess(Logg state.commandSafeRandomHistory(selectForMutation(state, mutation), "Select for Mutation Validation")); } + protected static Property.Command incrementalRepair(RandomSource rs, S state) + { + return repair(rs, state, state.repairArgsBuilder().withType(i -> RepairType.IR).withPreviewType(i -> PreviewType.NONE), null); + } + + protected static Property.Command previewRepair(RandomSource rs, S state) + { + return repair(rs, state, state.repairArgsBuilder().withType(i -> RepairType.FULL).withPreviewType(i -> PreviewType.REPAIRED), null); + } + + protected static Property.Command repair(RandomSource rs, S state, RepairGenerators.Builder argsBuilder, @Nullable String annotate) + { + IInvokableInstance inst = state.selectInstance(rs); + Gen> argsGen = argsBuilder.build(); + List args = ImmutableList.builder() + .add("repair") + .addAll(argsGen.next(rs)) + .build(); + boolean preview = RepairGenerators.isPreview(args); + // mimic org.apache.cassandra.repair.state.CoordinatorState.getType + String type; + if (preview) + { + // mimic org.apache.cassandra.tools.nodetool.Repair.getPreviewKind + PreviewType previewType = RepairGenerators.previewType(args); + switch (previewType) + { + case REPAIRED: + type = "preview repaired"; + break; + case UNREPAIRED: + type = RepairGenerators.isFull(args) ? "preview full" : "preview unrepaired"; + break; + default: + throw new UnsupportedOperationException(previewType.name()); + } + } + else + { + type = RepairGenerators.isFull(args) ? "full" : "incremental"; + } + + String postfix = "type " + type + ", on " + inst; + if (annotate == null) annotate = postfix; + else annotate += ", " + postfix; + + return new Property.SimpleCommand<>("nodetool " + String.join(" ", args) + " -- " + annotate, s2 -> { + inst.nodetoolResult(args.toArray(String[]::new)).asserts().success(); + if (!preview) + s2.repair(); + }); + } + private static Select selectForMutation(S state, Mutation mutation) { var select = Select.builder(state.metadata).allowFiltering(); @@ -288,16 +348,19 @@ protected static abstract class BaseState implements AutoCloseable protected final Gen lessThanGen; protected final Gen greaterThanGen; protected final Gen rangeInequalityGen; + protected final Gen.IntGen repairTypeEmptyModelGen, repairTypeGen; protected final Gen.IntGen fetchSizeGen; protected final TableMetadata metadata; protected final TableReference tableRef; protected final ASTSingleTableModel model; + private final String sstableFormatName; private final Visitor debug; - private final int enoughMemtables; - private final int enoughSSTables; + private final int enoughMemtables, enoughMemtablesForRepair; + private final int enoughSSTables, enoughSSTablesForRepair; protected int numMutations, mutationsSinceLastFlush; - protected int numFlushes, flushesSinceLastCompaction; + protected int numFlushes, flushesSinceLastCompaction, flushesSinceLastRepair; protected int numCompact; + protected int numRepairs; protected int operations; protected BaseState(RandomSource rs, Cluster cluster, TableMetadata metadata) @@ -322,13 +385,21 @@ protected BaseState(RandomSource rs, Cluster cluster, TableMetadata metadata) this.perPartitionLimitGen = LIMIT_DISTRO.next(rs); this.limitGen = LIMIT_DISTRO.next(rs); - this.enoughMemtables = rs.pickInt(3, 10, 50); + this.repairTypeEmptyModelGen = REPAIR_TYPE_EMPTY_MODEL_DISTRO.next(rs); + this.repairTypeGen = REPAIR_TYPE_DISTRO.next(rs); + + this.enoughMemtables = rs.pickInt(1, 3, 10, 50); + this.enoughMemtablesForRepair = rs.pickInt(1, 3, 10, 50); this.enoughSSTables = rs.pickInt(3, 10, 50); + this.enoughSSTablesForRepair = rs.pickInt(1, 3, 10, 50); this.metadata = metadata; this.tableRef = TableReference.from(metadata); this.model = new ASTSingleTableModel(metadata, IGNORED_ISSUES); createTable(metadata); + + String sstableFormatName = this.sstableFormatName = Generators.toGen(CassandraGenerators.sstableFormatNames()).next(rs); + cluster.forEach(i -> i.runOnInstance(() -> DatabaseDescriptor.setSelectedSSTableFormat(sstableFormatName))); } public boolean hasPartitions() @@ -364,6 +435,35 @@ private String createKeyspaceCQL(String ks) return command(rs, select, null); } + protected boolean allowRepair() + { + return false; + } + + protected RepairGenerators.Builder repairArgsBuilder() + { + return new RepairGenerators.Builder(i -> Arrays.asList(metadata.keyspace, metadata.name)) + // paxos cleanup's finish prepare is delayed based off CAS/Write timeout, but these tests make that 3 minutes (so CI is stable) + // which means this step is delayed 3 minutes, making repairs suppppper slow... + // see org.apache.cassandra.service.paxos.cleanup.PaxosCleanup#finishPrepare + .withSkipPaxosGen(i -> true) + .withRanges(rs -> { + switch (model.isEmpty() ? repairTypeEmptyModelGen.next(rs) : repairTypeGen.next(rs)) + { + case 0: return RepairGenerators.LOCAL_RANGE; + case 1: return RepairGenerators.PRIMARY_RANGE; + case 2: + { + Token a = rs.pickOrderedSet(model.partitionKeys()).token; + return List.of("--start-token", Long.toString(a.getLongValue() - 1), + "--end-token", a.toString()); + } + default: throw new UnsupportedOperationException(); + } + }) + ; + } + protected boolean allowLimit(Select select) { //TODO (coverage): allow this in the model! @@ -467,11 +567,23 @@ protected boolean hasEnoughMemtable() return mutationsSinceLastFlush > enoughMemtables; } + protected boolean hasEnoughMemtableForRepair() + { + // use last flush rather than last repair as this method cares about data in the memtable + // and not amount of mutations since repair + return mutationsSinceLastFlush > enoughMemtablesForRepair; + } + protected boolean hasEnoughSSTables() { return flushesSinceLastCompaction > enoughSSTables; } + protected boolean hasEnoughSSTablesForRepair() + { + return flushesSinceLastRepair > enoughSSTablesForRepair; + } + protected void mutation() { numMutations++; @@ -483,6 +595,7 @@ protected void flush() mutationsSinceLastFlush = 0; numFlushes++; flushesSinceLastCompaction++; + flushesSinceLastRepair++; } protected void compact() @@ -491,6 +604,15 @@ protected void compact() numCompact++; } + protected void repair() + { + if (mutationsSinceLastFlush > 0) + flush(); + + numRepairs++; + flushesSinceLastRepair = 0; + } + protected Value value(RandomSource rs, ByteBuffer bb, AbstractType type) { return bindOrLiteralGen.next(rs) ? new Bind(bb, type) : new Literal(bb, type); @@ -599,7 +721,8 @@ private String humanReadable(Statement stmt, @Nullable String annotate) protected void toString(StringBuilder sb) { - sb.append(createKeyspaceCQL(metadata.keyspace)); + sb.append("Config:\nsstable:\n\tselected_format: ").append(sstableFormatName); + sb.append('\n').append(createKeyspaceCQL(metadata.keyspace)); CassandraGenerators.visitUDTs(metadata, udt -> sb.append('\n').append(udt.toCqlString(false, false, true)).append(';')); sb.append('\n').append(metadata.toCqlString(false, false, false)); } @@ -620,39 +743,6 @@ public String toString() toString(sb); return sb.toString(); } - - private static final class ValueWithType - { - final ByteBuffer value; - final AbstractType type; - - private ValueWithType(ByteBuffer value, AbstractType type) - { - this.value = value; - this.type = type; - } - - @Override - public boolean equals(Object o) - { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ValueWithType value1 = (ValueWithType) o; - return value.equals(value1.value) && type.equals(value1.type); - } - - @Override - public int hashCode() - { - return Objects.hash(value, type); - } - - @Override - public String toString() - { - return type.toCQLString(value); - } - } } protected static abstract class CommonState extends BaseState diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java index 180741c4f881..a4341dddeb97 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java @@ -84,8 +84,8 @@ import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tools.nodetool.formatter.TableBuilder; -import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.ConfigGenBuilder; +import org.apache.cassandra.utils.LoggingCommand; import org.apache.cassandra.utils.Retry; import static accord.utils.Property.commands; @@ -491,35 +491,6 @@ protected interface CommandGen Command, Void, ?> apply(RandomSource rs, State state); } - private static class LoggingCommand extends Property.ForwardingCommand - { - private static final Logger logger = LoggerFactory.getLogger(LoggingCommand.class); - - private LoggingCommand(Command delegate) - { - super(delegate); - } - - @Override - public Result apply(State s) throws Throwable - { - String name = detailed(s); - long startNanos = Clock.Global.nanoTime(); - try - { - logger.info("Starting command: {}", name); - Result o = super.apply(s); - logger.info("Command {} was success after {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos)); - return o; - } - catch (Throwable t) - { - logger.warn("Command {} failed after {}: {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos), t.toString()); // don't want stack trace, just type/msg - throw t; - } - } - } - protected static class State implements AutoCloseable { final TopologyHistory topologyHistory; @@ -656,16 +627,7 @@ public String ipAddress(int nodeNum) }; } }); - commandsTransformers.add((state, commandGen) -> rs2 -> { - Command, Void, ?> c = commandGen.next(rs2); - if (!(c instanceof Property.MultistepCommand)) - return new LoggingCommand<>(c); - Property.MultistepCommand, Void> multistep = (Property.MultistepCommand, Void>) c; - List, Void, ?>> subcommands = new ArrayList<>(); - for (var sub : multistep) - subcommands.add(new LoggingCommand<>(sub)); - return multistep(subcommands); - }); + commandsTransformers.add(LoggingCommand.factory()); preActions.add(() -> { int[] up = topologyHistory.up(); // use the most recent node just in case the cluster isn't in-sync diff --git a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java index 8ffd96795b41..72148701de69 100644 --- a/test/unit/org/apache/cassandra/repair/FuzzTestBase.java +++ b/test/unit/org/apache/cassandra/repair/FuzzTestBase.java @@ -47,6 +47,7 @@ import java.util.function.Supplier; import javax.annotation.Nullable; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -103,6 +104,8 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.net.SimulatedMessageDelivery; import org.apache.cassandra.net.SimulatedMessageDelivery.SimulatedMessageReceiver; +import org.apache.cassandra.repair.RepairGenerators.PreviewType; +import org.apache.cassandra.repair.RepairGenerators.RepairType; import org.apache.cassandra.repair.messages.RepairMessage; import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.repair.messages.ValidationResponse; @@ -534,12 +537,6 @@ private static void findCorrectRange(MerkleTrees trees, Token token, Consumer tableNames) { return repairOption(rs, coordinator, ks, Gens.lists(Gens.pick(tableNames)).ofSizeBetween(1, tableNames.size()), Gens.enums().all(RepairType.class), Gens.enums().all(PreviewType.class), Gens.enums().all(RepairParallelism.class)); @@ -557,53 +554,13 @@ static RepairOption previewOption(RandomSource rs, Cluster.Node coordinator, Str private static RepairOption repairOption(RandomSource rs, Cluster.Node coordinator, String ks, Gen> tablesGen, Gen repairTypeGen, Gen previewTypeGen, Gen repairParallelismGen) { - RepairType type = repairTypeGen.next(rs); - PreviewType previewType = previewTypeGen.next(rs); - List args = new ArrayList<>(); - args.add(ks); - List tables = tablesGen.next(rs); - args.addAll(tables); - args.add("-pr"); - switch (type) - { - case IR: - // default - break; - case FULL: - args.add("--full"); - break; - default: - throw new AssertionError("Unsupported repair type: " + type); - } - switch (previewType) - { - case NONE: - break; - case REPAIRED: - args.add("--validate"); - break; - case UNREPAIRED: - args.add("--preview"); - break; - default: - throw new AssertionError("Unsupported preview type: " + previewType); - } - RepairParallelism parallelism = repairParallelismGen.next(rs); - switch (parallelism) - { - case SEQUENTIAL: - args.add("--sequential"); - break; - case PARALLEL: - // default - break; - case DATACENTER_AWARE: - args.add("--dc-parallel"); - break; - default: - throw new AssertionError("Unknown parallelism: " + parallelism); - } - if (rs.nextBoolean()) args.add("--optimise-streams"); + List args = new RepairGenerators.Builder(tablesGen.map(l -> ImmutableList.builderWithExpectedSize(l.size() + 1).add(ks).addAll(l).build())) + .withType(repairTypeGen) + .withPreviewType(previewTypeGen) + .withParallelism(repairParallelismGen) + .withRanges(i -> RepairGenerators.PRIMARY_RANGE) + .build() + .next(rs); RepairOption options = RepairOption.parse(Repair.parseOptionMap(() -> "test", args), DatabaseDescriptor.getPartitioner()); if (options.getRanges().isEmpty()) { diff --git a/test/unit/org/apache/cassandra/repair/RepairGenerators.java b/test/unit/org/apache/cassandra/repair/RepairGenerators.java new file mode 100644 index 000000000000..2175a37789d9 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/RepairGenerators.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair; + +import java.util.ArrayList; +import java.util.List; + +import accord.utils.Gen; +import accord.utils.Gens; + +public class RepairGenerators +{ + public static final List LOCAL_RANGE = List.of(); + public static final List PRIMARY_RANGE = List.of("-pr"); // repair calls this partition range, but StorageService calls this primary + + public enum RepairType + { + FULL("--full"), + IR(""); + + public final String arg; + + RepairType(String s) + { + this.arg = s; + } + } + + public enum PreviewType + { + NONE(""), + REPAIRED("--validate"), + UNREPAIRED("--preview"); + + public final String arg; + + PreviewType(String s) + { + this.arg = s; + } + } + + public static boolean isPreview(List args) + { + return args.stream().anyMatch(s -> PreviewType.REPAIRED.arg.equals(s) + || PreviewType.UNREPAIRED.arg.equals(s)); + } + + public static PreviewType previewType(List args) + { + for (String s : args) + { + if (PreviewType.REPAIRED.arg.equals(s)) + return PreviewType.REPAIRED; + if (PreviewType.UNREPAIRED.arg.equals(s)) + return PreviewType.UNREPAIRED; + } + return PreviewType.NONE; + } + + public static boolean isFull(List args) + { + return args.stream().anyMatch(s -> RepairType.FULL.arg.equals(s)); + } + + public static boolean isIncremental(List args) + { + return !isFull(args); + } + + + public static class Builder + { + final Gen> tablesGen; + Gen typeGen = Gens.enums().all(RepairType.class); + Gen previewTypeGen = Gens.enums().all(PreviewType.class); + Gen> ranges = Gens.pick(List.of(), PRIMARY_RANGE); + Gen optimizeStreamsGen = Gens.bools().all(); + Gen parallelismGen = Gens.enums().all(RepairParallelism.class); + Gen skipPaxosGen = i -> false; + Gen skipAccordGen = i -> false; + + public Builder(Gen> tablesGen) + { + this.tablesGen = tablesGen; + } + + public Builder withType(Gen typeGen) + { + this.typeGen = typeGen; + return this; + } + + public Builder withPreviewType(Gen previewTypeGen) + { + this.previewTypeGen = previewTypeGen; + return this; + } + + public Builder withRanges(Gen> ranges) + { + this.ranges = ranges; + return this; + } + + public Builder withOptimizeStreams(Gen optimizeStreamsGen) + { + this.optimizeStreamsGen = optimizeStreamsGen; + return this; + } + + public Builder withParallelism(Gen parallelismGen) + { + this.parallelismGen = parallelismGen; + return this; + } + + public Builder withSkipPaxosGen(Gen skipPaxosGen) + { + this.skipPaxosGen = skipPaxosGen; + return this; + } + + public Builder withSkipAccordGen(Gen skipAccordGen) + { + this.skipAccordGen = skipAccordGen; + return this; + } + + public Gen> build() + { + return rs -> { + RepairType type = typeGen.next(rs); + PreviewType previewType = previewTypeGen.next(rs); + List args = new ArrayList<>(); + args.addAll(tablesGen.next(rs)); + args.addAll(ranges.next(rs)); + if (skipPaxosGen.next(rs)) + args.add("--skip-paxos"); + if (skipAccordGen.next(rs)) + args.add("--skip-accord"); + switch (type) + { + case IR: + // default + break; + case FULL: + args.add(type.arg); + break; + default: + throw new AssertionError("Unsupported repair type: " + type); + } + switch (previewType) + { + case NONE: + break; + case REPAIRED: + case UNREPAIRED: + args.add(previewType.arg); + break; + default: + throw new AssertionError("Unsupported preview type: " + previewType); + } + RepairParallelism parallelism = parallelismGen.next(rs); + switch (parallelism) + { + case SEQUENTIAL: + args.add("--sequential"); + break; + case PARALLEL: + // default + break; + case DATACENTER_AWARE: + args.add("--dc-parallel"); + break; + default: + throw new AssertionError("Unknown parallelism: " + parallelism); + } + if (optimizeStreamsGen.next(rs)) + args.add("--optimise-streams"); + return args; + }; + } + } +} diff --git a/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java b/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java index d4b7393dcd83..7db8b56c1848 100644 --- a/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java +++ b/test/unit/org/apache/cassandra/utils/ImmutableUniqueList.java @@ -62,6 +62,14 @@ public static ImmutableUniqueList empty() return (ImmutableUniqueList) EMPTY; } + public static ImmutableUniqueList of(T... values) + { + Builder builder = builder(values.length); + for (T v : values) + builder.add(v); + return builder.build(); + } + public AsSet asSet() { if (asSet != null) return asSet; diff --git a/test/unit/org/apache/cassandra/utils/LoggingCommand.java b/test/unit/org/apache/cassandra/utils/LoggingCommand.java new file mode 100644 index 000000000000..190192819b36 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/LoggingCommand.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.function.BiFunction; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import accord.utils.Gen; +import accord.utils.Property; +import accord.utils.Property.Command; + +import static accord.utils.Property.multistep; + +public class LoggingCommand extends Property.ForwardingCommand +{ + private static final Logger logger = LoggerFactory.getLogger(LoggingCommand.class); + + public LoggingCommand(Command delegate) + { + super(delegate); + } + + public static BiFunction>, Gen>> factory() + { + return (state, commandGen) -> rs -> { + Command c = commandGen.next(rs); + if (!(c instanceof Property.MultistepCommand)) + return new LoggingCommand<>(c); + Property.MultistepCommand multistep = (Property.MultistepCommand) c; + List> subcommands = new ArrayList<>(); + for (var sub : multistep) + subcommands.add(new LoggingCommand<>(sub)); + return multistep(subcommands); + }; + } + + @Override + public Result apply(State s) throws Throwable + { + String name = detailed(s); + long startNanos = Clock.Global.nanoTime(); + try + { + logger.info("Starting command: {}", name); + Result o = super.apply(s); + logger.info("Command {} was success after {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos)); + return o; + } + catch (Throwable t) + { + logger.warn("Command {} failed after {}: {}", name, Duration.ofNanos(Clock.Global.nanoTime() - startNanos), t.toString()); // don't want stack trace, just type/msg + throw t; + } + } +} From ccc92767ff92a30484cac83bf0c044f53beb55e5 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Fri, 2 May 2025 17:02:03 -0700 Subject: [PATCH 299/340] zero copy streaming allocates direct memory that isnt used, but does help to fragment the memory space patch by David Capwell; reviewed by Yifan Cai for CASSANDRA-20577 --- .../io/sstable/SSTableZeroCopyWriter.java | 47 ++++++++++++++----- .../cassandra/io/util/SequentialWriter.java | 9 +++- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java index 3bf21f1155ec..46a490974e3e 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableZeroCopyWriter.java @@ -38,9 +38,12 @@ import org.apache.cassandra.io.FSWriteError; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.SequentialWriter; +import org.apache.cassandra.io.util.SequentialWriterOption; import org.apache.cassandra.net.AsyncStreamingInputPlus; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.utils.ByteBufferUtil; import static java.lang.String.format; import static org.apache.cassandra.utils.FBUtilities.prettyPrintMemory; @@ -50,7 +53,7 @@ public class SSTableZeroCopyWriter extends SSTable implements SSTableMultiWriter private static final Logger logger = LoggerFactory.getLogger(SSTableZeroCopyWriter.class); private volatile SSTableReader finalReader; - private final Map componentWriters; // indexed by component name + private final Map componentWriters; // indexed by component name public SSTableZeroCopyWriter(Builder builder, LifecycleNewTracker lifecycleNewTracker, @@ -89,12 +92,12 @@ public AbstractBounds getBounds() throw new UnsupportedOperationException(); } - private SequentialWriter makeWriter(Descriptor descriptor, Component component) + private ZeroCopySequentialWriter makeWriter(Descriptor descriptor, Component component) { - return new SequentialWriter(descriptor.fileFor(component), ioOptions.writerOptions, false); + return new ZeroCopySequentialWriter(descriptor.fileFor(component), ioOptions.writerOptions, false); } - private void write(DataInputPlus in, long size, SequentialWriter out) throws FSWriteError + private void write(DataInputPlus in, long size, ZeroCopySequentialWriter out) throws FSWriteError { final int BUFFER_SIZE = 1 << 20; long bytesRead = 0; @@ -128,7 +131,7 @@ public Collection finish(boolean openResult) { setOpenResult(openResult); - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.finish(); return finished(); @@ -170,7 +173,7 @@ public TableId getTableId() @Override public Throwable commit(Throwable accumulate) { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) accumulate = writer.commit(accumulate); return accumulate; } @@ -178,7 +181,7 @@ public Throwable commit(Throwable accumulate) @Override public Throwable abort(Throwable accumulate) { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) accumulate = writer.abort(accumulate); return accumulate; } @@ -186,29 +189,30 @@ public Throwable abort(Throwable accumulate) @Override public void prepareToCommit() { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.prepareToCommit(); } @Override public void close() { - for (SequentialWriter writer : componentWriters.values()) + for (ZeroCopySequentialWriter writer : componentWriters.values()) writer.close(); } public void writeComponent(Component component, DataInputPlus in, long size) throws ClosedChannelException { - SequentialWriter writer = componentWriters.get(component.name); + ZeroCopySequentialWriter writer = componentWriters.get(component.name); logger.info("Writing component {} to {} length {}", component, writer.getPath(), prettyPrintMemory(size)); if (in instanceof AsyncStreamingInputPlus) write((AsyncStreamingInputPlus) in, size, writer); else + // this code path is not valid for production and only exists to simplify unit tests write(in, size, writer); } - private void write(AsyncStreamingInputPlus in, long size, SequentialWriter writer) throws ClosedChannelException + private void write(AsyncStreamingInputPlus in, long size, ZeroCopySequentialWriter writer) throws ClosedChannelException { logger.info("Block Writing component to {} length {}", writer.getPath(), prettyPrintMemory(size)); @@ -233,4 +237,25 @@ private void write(AsyncStreamingInputPlus in, long size, SequentialWriter write throw new FSWriteError(e, writer.getPath()); } } + + private static class ZeroCopySequentialWriter extends SequentialWriter + { + private ZeroCopySequentialWriter(File file, SequentialWriterOption option, boolean strictFlushing) + { + super(file, ByteBufferUtil.EMPTY_BYTE_BUFFER, option, strictFlushing); + } + + /** + * In production, we do not expect this method to be called, as only writeDirectlyToChannel should be invoked for zero-copy. + *

      + * This method only exists for tests. + */ + @Override + public void write(byte[] b, int off, int len) throws IOException + { + if (this.buffer == ByteBufferUtil.EMPTY_BYTE_BUFFER) + this.buffer = option.allocateBuffer(); + super.write(b, off, len); + } + } } diff --git a/src/java/org/apache/cassandra/io/util/SequentialWriter.java b/src/java/org/apache/cassandra/io/util/SequentialWriter.java index 69643be98730..c3a90732eead 100644 --- a/src/java/org/apache/cassandra/io/util/SequentialWriter.java +++ b/src/java/org/apache/cassandra/io/util/SequentialWriter.java @@ -56,7 +56,7 @@ public class SequentialWriter extends BufferedDataOutputStreamPlus implements Tr // whether to do trickling fsync() to avoid sudden bursts of dirty buffer flushing by kernel causing read // latency spikes - private final SequentialWriterOption option; + protected final SequentialWriterOption option; private int bytesSinceTrickleFsync = 0; protected long lastFlushOffset; @@ -163,7 +163,12 @@ public SequentialWriter(File file, SequentialWriterOption option) */ public SequentialWriter(File file, SequentialWriterOption option, boolean strictFlushing) { - super(openChannel(file), option.allocateBuffer()); + this(file, option.allocateBuffer(), option, strictFlushing); + } + + protected SequentialWriter(File file, ByteBuffer buffer, SequentialWriterOption option, boolean strictFlushing) + { + super(openChannel(file), buffer); this.strictFlushing = strictFlushing; this.fchannel = (FileChannel)channel; From f2bf017e6dd20fb6ba7e42a9a6e5cae4e92f8a0c Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 5 May 2025 11:23:26 -0700 Subject: [PATCH 300/340] Fix AutoRepair Flaky InJvm dtest Patch by Jaydeepkumar Chovatia; Reviewed by Andy Tolbert, Chris Lohfink for CASSANDRA-20620 --- CHANGES.txt | 1 + .../repair/autorepair/AutoRepair.java | 29 ++++++++++--------- ...allelReplicaRepairAcrossSchedulesTest.java | 4 ++- .../test/repair/AutoRepairSchedulerTest.java | 15 ++++++---- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f214877478b8..46e0afd6b063 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620) * Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables & revert three lines removed from CHANGES.txt due to a merge mistake (CASSANDRA-20586) * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557) * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 (CASSANDRA-20429) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 8c08ce7c80cb..09e4a62a480f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -196,7 +196,7 @@ public void repair(AutoRepairConfig.RepairType repairType) boolean primaryRangeOnly = config.getRepairPrimaryTokenRangeOnly(repairType) && turn != MY_TURN_FORCE_REPAIR; - long startTime = timeFunc.get(); + long startTimeInMillis = timeFunc.get(); logger.info("My host id: {}, my turn to run repair...repair primary-ranges only? {}", myId, config.getRepairPrimaryTokenRangeOnly(repairType)); AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, timeFunc.get(), turn); @@ -242,7 +242,7 @@ public void repair(AutoRepairConfig.RepairType repairType) repairKeyspace(repairType, primaryRangeOnly, repairAssignments.getKeyspaceName(), repairAssignments.getRepairAssignments(), collectedRepairStats); } - cleanupAndUpdateStats(turn, repairType, repairState, myId, startTime, collectedRepairStats); + cleanupAndUpdateStats(turn, repairType, repairState, myId, startTimeInMillis, collectedRepairStats); } else { @@ -318,8 +318,8 @@ private void repairKeyspace(AutoRepairConfig.RepairType repairType, boolean prim while (retryCount <= config.getRepairMaxRetries(repairType)) { RepairCoordinator task = repairState.getRepairRunnable(keyspaceName, - Lists.newArrayList(curRepairAssignment.getTableNames()), - ranges, primaryRangeOnly); + Lists.newArrayList(curRepairAssignment.getTableNames()), + ranges, primaryRangeOnly); RepairProgressListener listener = new RepairProgressListener(repairType); task.addProgressListener(listener); f = repairRunnableExecutors.get(repairType).submit(task); @@ -449,7 +449,7 @@ private List retrieveTablesToBeRepaired(Keyspace keyspace, AutoRepairCon } private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairType, AutoRepairState repairState, UUID myId, - long startTime, CollectedRepairStats collectedRepairStats) throws InterruptedException + long startTimeInMillis, CollectedRepairStats collectedRepairStats) throws InterruptedException { //if it was due to priority then remove it now if (turn == MY_TURN_DUE_TO_PRIORITY) @@ -457,12 +457,19 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType logger.info("Remove current host from priority list"); AutoRepairUtils.removePriorityStatus(repairType, myId); } - + long repairScheduleElapsedInMillis = timeFunc.get() - startTimeInMillis; + if (repairScheduleElapsedInMillis < SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()) + { + //If repair finished quickly, happens for Cassndra cluster with empty (or tiny) data, in such cases, + //wait for some duration so that the JMX metrics can detect the repairInProgress + logger.info("Wait for {}ms for repair type {}.", SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds() - repairScheduleElapsedInMillis, repairType); + Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds() - repairScheduleElapsedInMillis); + } repairState.setFailedTokenRangesCount(collectedRepairStats.failedTokenRanges); repairState.setSucceededTokenRangesCount(collectedRepairStats.succeededTokenRanges); repairState.setSkippedTokenRangesCount(collectedRepairStats.skippedTokenRanges); repairState.setSkippedTablesCount(collectedRepairStats.skippedTables); - repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTime)); + repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(repairScheduleElapsedInMillis)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + @@ -477,13 +484,7 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec())); } repairState.setLastRepairTime(timeFunc.get()); - if (timeInHours == 0 && SLEEP_IF_REPAIR_FINISHES_QUICKLY.toSeconds() > 0) - { - //If repair finished quickly, happens for an empty instance, in such case - //wait for some duration so that the JMX metrics can detect the repairInProgress - logger.info("Wait for {} for repair type {}.", SLEEP_IF_REPAIR_FINISHES_QUICKLY, repairType); - Thread.sleep(SLEEP_IF_REPAIR_FINISHES_QUICKLY.toMilliseconds()); - } + repairState.setRepairInProgress(false); AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId, timeFunc.get()); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java index a00e713cf298..e1ccbcb8da1b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerDisallowParallelReplicaRepairAcrossSchedulesTest.java @@ -36,6 +36,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.FBUtilities; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; @@ -114,7 +115,8 @@ public void testScheduler() cluster.forEach(i -> i.runOnInstance(() -> { // Expect contention on incremental repair across schedules AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); - Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + Util.spinAssert(String.format("%s: AutoRepair has not observed any replica contention in INCREMENTAL repair", + FBUtilities.getJustBroadcastAddress().toString()), greaterThan(0L), incrementalMetrics.repairDelayedBySchedule::getCount, 5, diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index e726dc1a17f7..4df58213aa52 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -44,6 +44,7 @@ import org.apache.cassandra.repair.autorepair.AutoRepair; import org.apache.cassandra.repair.autorepair.AutoRepairConfig; import org.apache.cassandra.service.AutoRepairService; +import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.hamcrest.Matchers.greaterThan; @@ -78,11 +79,11 @@ public static void init() throws IOException ImmutableMap.of( "initial_scheduler_delay", "5s", "enabled", "true", - "parallel_repair_count", "2", + "parallel_repair_count", "3", // Allow parallel replica repair to allow replicas // to execute full repair at same time. "allow_parallel_replica_repair", "true", - "min_repair_interval", "15s"), + "min_repair_interval", "5s"), AutoRepairConfig.RepairType.INCREMENTAL.getConfigName(), ImmutableMap.of( "initial_scheduler_delay", "5s", @@ -137,19 +138,21 @@ public void testScheduler() throws ParseException // validate that the repair ran on all nodes cluster.forEach(i -> i.runOnInstance(() -> { + String broadcastAddress = FBUtilities.getJustBroadcastAddress().toString(); + // Reduce sleeping if repair finishes quickly to speed up test but make it non-zero to provoke some // contention. - AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("1s"); + AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("2s"); AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); - Util.spinAssert("AutoRepair has not yet completed one INCREMENTAL repair cycle", + Util.spinAssert(String.format("%s: AutoRepair has not yet completed one INCREMENTAL repair cycle", broadcastAddress), greaterThan(0L), () -> incrementalMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, TimeUnit.MINUTES); // Expect some contention on incremental repair. - Util.spinAssert("AutoRepair has not observed any replica contention in INCREMENTAL repair", + Util.spinAssert(String.format("%s: AutoRepair has not observed any replica contention in INCREMENTAL repair", broadcastAddress), greaterThan(0L), incrementalMetrics.repairDelayedByReplica::getCount, 5, @@ -159,7 +162,7 @@ public void testScheduler() throws ParseException assertEquals(0L, incrementalMetrics.repairDelayedBySchedule.getCount()); AutoRepairMetrics fullMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.FULL); - Util.spinAssert("AutoRepair has not yet completed one FULL repair cycle", + Util.spinAssert(String.format("%s: AutoRepair has not yet completed one FULL repair cycle", broadcastAddress), greaterThan(0L), () -> fullMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, From f6eb4a6b31d06108f073dba7dfa04732d2abbf7b Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Mon, 5 May 2025 18:39:07 -0700 Subject: [PATCH 301/340] Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly patch by Jaydeepkumar Chovatia; reviewed by Andy Tolbert for CASSANDRA-20622 --- CHANGES.txt | 1 + .../apache/cassandra/repair/autorepair/AutoRepair.java | 2 +- .../distributed/test/repair/AutoRepairSchedulerTest.java | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 46e0afd6b063..efed7d42d4f0 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly (CASSANDRA-20622) * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620) * Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables & revert three lines removed from CHANGES.txt due to a merge mistake (CASSANDRA-20586) * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index 09e4a62a480f..e5923e3c9c38 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -469,7 +469,7 @@ private void cleanupAndUpdateStats(RepairTurn turn, AutoRepairConfig.RepairType repairState.setSucceededTokenRangesCount(collectedRepairStats.succeededTokenRanges); repairState.setSkippedTokenRangesCount(collectedRepairStats.skippedTokenRanges); repairState.setSkippedTablesCount(collectedRepairStats.skippedTables); - repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(repairScheduleElapsedInMillis)); + repairState.setNodeRepairTimeInSec((int) TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() - startTimeInMillis)); long timeInHours = TimeUnit.SECONDS.toHours(repairState.getNodeRepairTimeInSec()); logger.info("Local {} repair time {} hour(s), stats: repairKeyspaceCount {}, " + "repairTokenRangesSuccessCount {}, repairTokenRangesFailureCount {}, " + diff --git a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java index 4df58213aa52..adca5070828e 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairSchedulerTest.java @@ -48,6 +48,7 @@ import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.junit.Assert.assertEquals; /** @@ -145,8 +146,11 @@ public void testScheduler() throws ParseException AutoRepair.SLEEP_IF_REPAIR_FINISHES_QUICKLY = new DurationSpec.IntSecondsBound("2s"); AutoRepairMetrics incrementalMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.INCREMENTAL); + // Since the AutoRepair sleeps up to SLEEP_IF_REPAIR_FINISHES_QUICKLY if the repair finishes quickly, + // so the "nodeRepairTimeInSec" metric should at least be greater than or equal to + // SLEEP_IF_REPAIR_FINISHES_QUICKLY Util.spinAssert(String.format("%s: AutoRepair has not yet completed one INCREMENTAL repair cycle", broadcastAddress), - greaterThan(0L), + greaterThanOrEqualTo(2L), () -> incrementalMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, TimeUnit.MINUTES); @@ -163,7 +167,7 @@ public void testScheduler() throws ParseException AutoRepairMetrics fullMetrics = AutoRepairMetricsManager.getMetrics(AutoRepairConfig.RepairType.FULL); Util.spinAssert(String.format("%s: AutoRepair has not yet completed one FULL repair cycle", broadcastAddress), - greaterThan(0L), + greaterThanOrEqualTo(2L), () -> fullMetrics.nodeRepairTimeInSec.getValue().longValue(), 5, TimeUnit.MINUTES); From 0669db7a042b30276ee87046e8c748a071ee71dc Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Mon, 5 May 2025 15:20:54 +0200 Subject: [PATCH 302/340] Write user docs for CEP-24 - Password validation / generation patch by Stefan Miklosovic for CASSANDRA-20619 --- doc/modules/cassandra/nav.adoc | 1 + .../pages/managing/operating/index.adoc | 1 + .../operating/password_validation.adoc | 325 ++++++++++++++++++ .../pages/managing/operating/security.adoc | 6 + 4 files changed, 333 insertions(+) create mode 100644 doc/modules/cassandra/pages/managing/operating/password_validation.adoc diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index 311bfb16c585..8c28d631555a 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -106,6 +106,7 @@ **** xref:cassandra:managing/operating/topo_changes.adoc[Topology changes] **** xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] **** xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] +**** xref:cassandra:managing/operating/password_validation.adoc[Password validation] *** xref:cassandra:managing/tools/index.adoc[Tools] **** xref:cassandra:managing/tools/cqlsh.adoc[cqlsh: the CQL shell] **** xref:cassandra:managing/tools/nodetool/nodetool.adoc[nodetool] diff --git a/doc/modules/cassandra/pages/managing/operating/index.adoc b/doc/modules/cassandra/pages/managing/operating/index.adoc index 492af4dfec3b..2fb98594316d 100644 --- a/doc/modules/cassandra/pages/managing/operating/index.adoc +++ b/doc/modules/cassandra/pages/managing/operating/index.adoc @@ -19,3 +19,4 @@ * xref:cassandra:managing/operating/topo_changes.adoc[Topology changes] * xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] * xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] +* xref:cassandra:managing/operating/password_validation.adoc[Password validation] diff --git a/doc/modules/cassandra/pages/managing/operating/password_validation.adoc b/doc/modules/cassandra/pages/managing/operating/password_validation.adoc new file mode 100644 index 000000000000..f6ad0fa812a3 --- /dev/null +++ b/doc/modules/cassandra/pages/managing/operating/password_validation.adoc @@ -0,0 +1,325 @@ += Password validation and generation +:navtitle: Password validation and generation +:description: Password validation and generation - How it works, how to configure it, and more. +:keywords: CEP-24, Password, Generation, Validation, Security + +Here’s the problem: while users have always had the ability to create whatever password they wanted in Cassandra - +from straightforward to incredibly complex and everything in between–this ultimately created a noticeable security vulnerability. + +While organizations might have internal processes for generating secure passwords that adhere to their own security policies, +Cassandra itself did not have the means to enforce these standards. To make the security vulnerability worse, +if a password initially met internal security guidelines, users could later downgrade their password to +a less secure option simply by using `ALTER ROLE` statements. + +When internal password requirements are enforced for an individual, users face the additional +burden of creating compliant passwords. This inevitably involved lots of trial-and-error in attempting +to create a compliant password that satisfied complex security roles. + +But what if there was a way to have Cassandra automatically create passwords that meet all +bespoke security requirements–but without requiring manual effort from users or system operators? + +That’s why we developed https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=228494146[CEP-24: Password validation/generation]. +We recognized that the complexity of secure password management could be significantly reduced (or eliminated entirely) +with the right approach –and improving both security and user experience at the same time. + +== The Goals of CEP-24 + +A Cassandra Enhancement Proposal (or CEP) is a structured process for proposing, creating, and ultimately implementing +new features for the Cassandra project. All CEPs are thoroughly vetted among the Cassandra community before +they are officially integrated into the project. + +These were the key goals we established for CEP-24: + +* Introduce a way to enforce password strength upon role creation or role alteration. +* Implement a reference implementation of a password validator which adheres to a recommended password strength policy, +to be used for Cassandra users out of the box. +* Emit a warning (and proceed) or just reject `CREATE ROLE` and `ALTER ROLE` statements when the provided +password does not meet a certain security level, based on user configuration of Cassandra. +* To be able to implement a custom password validator with its own policy, whatever it might be, +and provide a modular/pluggable mechanism to do so. +* Provide a way for Cassandra to generate a password which would pass the subsequent validation for use by the user. + +The Cassandra Password Validator and Generator builds upon an established framework in Cassandra called Guardrails, +which was originally implemented under CEP-3 (more details https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-3%3A+Guardrails[here]). + +== Implementation and configuration + +The password validator implements a custom guardrail introduced as part of CEP-24. A custom guardrail can validate and +generate values of arbitrary types when properly implemented. In the CEP-24 context, +the password guardrail provides `CassandraPasswordValidator` by extending `ValueValidator`, +while passwords are generated by `CassandraPasswordGenerator` by extending `ValueGenerator`. +Both components work with passwords as `String` type values. + +Password validation and generation are configured in `cassandra.yaml` file under the `password_validator` section. +Let’s explore the key configuration properties available. + +First, the `class_name` and `generator_class_name` parameters +specify which validator and generator classes will be used to validate and generate passwords respectively. + +Cassandra ships `CassandraPasswordValidator` and `CassandraPasswordGenerator` out of the box. +However, if a particular enterprise decides that they need something very custom, they are free to implement their own validators, +put it on Cassandra’s class path and reference it in the configuration behind `class_name` parameter. Same for the validator. + +CEP-24 provides implementations of the validator and generator that the Cassandra team believes will satisfy +the requirements of most users. These default implementations address common password security needs. +However, the framework is designed with flexibility in mind, allowing organizations to implement custom validation +and generation rules that align with their specific security policies and business requirements. + +---- +password_validator: +# Implementation class of a validator. When not in form of FQCN, the +# package name org.apache.cassandra.db.guardrails.validators is prepended. +# By default, there is no validator. + class_name: CassandraPasswordValidator +# Implementation class of related generator which generates values +# which are valid when tested against this validator. +# When not in form of FQCN, the package name +# org.apache.cassandra.db.guardrails.generators is prepended. +# By default, there is no generator. + generator_class_name: CassandraPasswordGenerator +---- + +Password quality might be looked at as the number of _characteristics_ a password satisfies. +There are two levels for any password to be evaluated – warning level and failure level. +Warning and failure levels nicely fit into how Guardrails act. Every guardrail has warning and failure thresholds. +Based on what value a specific guardrail evaluates, it will either emit a warning to a user that its usage +is discouraged (but ultimately allowed), or it will fail to be set altogether. + +This same principle applies to password evaluation – each password is assessed against both warning and failure thresholds. +These thresholds are determined by counting the characteristics present in the password. + +The system evaluates five key characteristics: + +* the password’s overall length +* the number of uppercase characters +* the number of lowercase characters +* the number of special characters +* and the number of digits. + +A comprehensive password security policy can be enforced by configuring minimum requirements for each of these characteristics. + +---- + # There are four characteristics (excluding password's length): + # upper-case, lower-case, special character and digit. + # If this value is set e.g. to 3, a password has to + # consist of 3 out of 4 characteristics. + # For example, it has to contain at least 2 upper-case characters, + # 2 lower-case, and 2 digits to pass, + # but it does not have to contain any special characters. + # If the number of characteristics found in the password is + # less than or equal to this number, it will emit a warning. + characteristic_warn: 3 + # If the number of characteristics found in the password is + #less than or equal to this number, it will emit a failure. + characteristic_fail: 2 +---- + +Next, there are configuration parameters for each characteristic which count towards warning or failure: +---- + +# If the password is shorter than this value, +# the validator will emit a warning. +length_warn: 12 +# If a password is shorter than this value, +# the validator will emit a failure. +length_fail: 8 +# If a password does not contain at least n +# upper-case characters, the validator will emit a warning. +upper_case_warn: 2 +# If a password does not contain at least +# n upper-case characters, the validator will emit a failure. +upper_case_fail: 1 +# If a password does not contain at least +# n lower-case characters, the validator will emit a warning. +lower_case_warn: 2 +# If a password does not contain at least +# n lower-case characters, the validator will emit a failure. +lower_case_fail: 1 +# If a password does not contain at least +# n digits, the validator will emit a warning. +digit_warn: 2 +# If a password does not contain at least +# n digits, the validator will emit a failure. +digit_fail: 1 +# If a password does not contain at least +# n special characters, the validator will emit a warning. +special_warn: 2 +# If a password does not contain at least +# n special characters, the validator will emit a failure. +special_fail: 1 +---- + +It is also possible to say that illegal sequences of certain length found in a password will be forbidden: + +---- +# If a password contains illegal sequences that are at least this long, it is invalid. +# Illegal sequences might be either alphabetical (form 'abcde'), +# numerical (form '34567'), or US qwerty (form 'asdfg') as well +# as sequences from supported character sets. +# The minimum value for this property is 3, +# by default it is set to 5. +illegal_sequence_length: 5 +---- + +Lastly, it is also possible to configure a dictionary of passwords to check against. +That way, we will be checking against password dictionary attacks. +It is up to the operator of a cluster to configure the password dictionary: + +---- +# Dictionary to check the passwords against. Defaults to no dictionary. +# Whole dictionary is cached into memory. Use with caution with relatively big dictionaries. +# Entries in a dictionary, one per line, have to be sorted per String's compareTo contract. +dictionary: /path/to/dictionary/file +---- + +Now that we have gone over all the configuration parameters, let’s take a look at an example of how password +validation and generation look in practice. + +=== Validation and generation of a password + +Consider a scenario where a Cassandra super-user (such as the default ‘cassandra’ role) attempts +to create a new role named ‘alice’. + +---- +cassandra@cqlsh> CREATE ROLE alice WITH PASSWORD = 'cassandraisadatabase' AND LOGIN = true; +InvalidRequest: Error from server: code=2200 [Invalid query] +message="Password was not set as it violated configured password +strength policy. To fix this error, the following has to be resolved: +Password contains the dictionary word 'cassandraisadatabase'. You may also use +'GENERATED PASSWORD' upon role creation or alteration." +---- + +The password is in the dictionary. When an operator sees this, +they will try to fix it by creating some random password not in dictionary: + +---- +cassandra@cqlsh> CREATE ROLE alice WITH PASSWORD = 'T8aum3?' AND LOGIN = true; +InvalidRequest: Error from server: code=2200 [Invalid query] +message="Password was not set as it violated configured password strength +policy. To fix this error, the following has to be resolved: Password +must be 8 or more characters in length. You may also use +'GENERATED PASSWORD' upon role creation or alteration." +---- + +Password is not in the dictionary, but it is not long enough. In the following example, +the password is finally set, but it is not considered to be secure enough. +It satisfies the minimum requirements but our validator identified that not all characteristics were met. + +---- +cassandra@cqlsh> CREATE ROLE alice WITH PASSWORD = 'mYAtt3mp' AND LOGIN = true; + +Warnings: + +Guardrail password violated: Password was set, however it might not be +strong enough according to the configured password strength policy. +To fix this warning, the following has to be resolved: Password must be 12 or more +characters in length. Passwords must contain 2 or more digit characters. Password +must contain 2 or more special characters. Password matches 2 of 4 character rules, +but 4 are required. You may also use 'GENERATED PASSWORD' upon role creation or alteration. +---- + +When an operator saw this, they noticed the note about the `GENERATED PASSWORD` clause which will +generate a password automatically without an operator needing to invent it on their own. +This is a lot of times, as shown, a cumbersome process better to be left on a machine. + +---- +cassandra@cqlsh> ALTER ROLE alice WITH GENERATED PASSWORD; + +generated_password +------------------ + R7tb33?.mcAX +---- + +The generated password shown above will satisfy all the rules we have configured in `cassandra.yaml` automatically. +Every generated password will satisfy all the rules. This is clearly an advantage over manual password generation. + +When the CQL statement is executed, it will be visible in the CQLSH history (`HISTORY` command or in `cqlsh_history` file) +but the password will not be logged, hence it cannot leak. It will also not appear in any auditing logs. +Previously, Cassandra had to obfuscate such statements. This is not necessary anymore. + +We can create a role with generated password like this: + +---- +cassandra@cqlsh> CREATE ROLE alice WITH GENERATED PASSWORD AND LOGIN = true; +---- + +or by `CREATE USER`: + +---- +cassandra@cqlsh> CREATE USER alice WITH GENERATED PASSWORD; +---- + +When a password is generated for `alice` she can log in: + +---- +$ cqlsh -u alice -p R7tb33?.mcAX +... +alice@cqlsh> +---- + +NOTE: It is recommended to save password to ~/.cassandra/credentials, for example: + +---- +[PlainTextAuthProvider] +username = cassandra +password = R7tb33?.mcAX +---- + +and by setting auth_provider in `~/.cassandra/cqlshrc` + +---- +[auth_provider] +module = cassandra.auth +classname = PlainTextAuthProvider +---- + +It is also possible to configure password validators in such a way that a user does not see why a password failed. +This is driven by configuration property for `password_validator` called `detailed_messages`. When set to `false`, +the violations will be very brief: + +---- +alice@cqlsh> ALTER ROLE alice WITH PASSWORD = 'myattempt'; + +InvalidRequest: Error from server: code=2200 [Invalid query] +message="Password was not set as it violated configured password strength policy. +You may also use 'GENERATED PASSWORD' upon role creation or alteration." +---- + +Several potential enhancements to password generation and validation could be implemented in future releases. +One promising extension would be validating new passwords against previous values. +This would prevent users from reusing passwords until after they’ve created a specified number of different passwords. +A related enhancement could include restricting how frequently users can change their passwords, +preventing rapid cycling through passwords to circumvent history-based restrictions. + +These features, while valuable for comprehensive password security, were considered beyond the scope of the initial +implementation and may be addressed in future updates. + +=== Runtime configuration + +Since this solution is based on guardrails which are configurable via JMX in runtime, same hold for +password validator, also configured via `GuardrailsMBean` as any other guardrails. There are two methods exposed: + +* `Map getPasswordValidatorConfig()` - gets password validator configuration +* `void reconfigurePasswordValidator(Map config)` - reconfigures the password validator by reading +and parsing the configuration from the provided map. Reconfiguration of password validator in runtime is considered +to be very sensitive operation. If an operator evaluates the reconfiguration in runtime is not allowed, they +might set `password_validator_reconfiguration_enabled` to `false` in `cassandra.yaml` to disable it. + +=== Diagnostic events + +If diagnostic event's framework is enabled and consumers are subscribed to them, diagnostic events about +warning and failures to generate a password will be published. + +=== Final thoughts and next steps + +The Cassandra Password Validator and Generator implemented under CEP-24 +represents a significant improvement in Cassandra’s security posture. + +By providing robust, configurable password policies with built-in enforcement mechanisms and +convenient password generation capabilities, organizations can now ensure compliance with their +security standards directly at the database level. This not only strengthens overall system security +but also improves the user experience by eliminating guesswork around password requirements. + +As Cassandra continues to evolve as an enterprise-ready database solution, +these security enhancements demonstrate a commitment to meeting the demanding +security requirements of modern applications while maintaining the flexibility that makes Cassandra so powerful. \ No newline at end of file diff --git a/doc/modules/cassandra/pages/managing/operating/security.adoc b/doc/modules/cassandra/pages/managing/operating/security.adoc index cdc76a625dbb..ee846cb13d57 100644 --- a/doc/modules/cassandra/pages/managing/operating/security.adoc +++ b/doc/modules/cassandra/pages/managing/operating/security.adoc @@ -393,6 +393,12 @@ See also: xref:cassandra:developing/cql/security.adoc#grant-permission[`GRANT PE xref:cassandra:developing/cql/security.adoc#grant-all[`GRANT ALL`] and xref:cassandra:developing/cql/security.adoc#revoke-permission[`REVOKE PERMISSION`]. +== Password validation + +If you are interested into the application of a certain security policy for password strength for +user passwords, you are welcome to read about it more in xref:cassandra:managing/operating/password_validation.adoc[here] +which implements https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=228494146[CEP-24]. + [[auth-caching]] == Caching From 6a81306876d151085e41e54794a36096e672e6bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Mi=C4=99=C5=BCa=C5=82?= Date: Thu, 24 Apr 2025 15:06:55 +0200 Subject: [PATCH 303/340] Reading mmapped trie-index exceeding 2GiB results in exception Memory-mapping is done in buffers of size less than 2GiB. When these buffers aren't aligned to 4KiB and the trie-index file spans many buffers then reading it results in going out of buffer bounds. This patch fixes it by making sure that the buffers are correctly aligned. patch by Szymon Miezal; reviewed by blambov and brandonwilliams for CASSANDRA-20351 --- CHANGES.txt | 1 + .../apache/cassandra/io/util/FileHandle.java | 6 +- .../cassandra/io/util/MmappedRegions.java | 53 ++++++------ .../io/util/MmappedRegionsCache.java | 10 +-- .../cassandra/io/util/MmappedRegionsTest.java | 81 ++++++++++++++----- 5 files changed, 94 insertions(+), 57 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d30a383b7f3d..9a6dbe7ae577 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Fix reading mmapped trie-index exceeding 2GiB (CASSANDRA-20351) * zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space (CASSANDRA-20577) * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609) * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551) diff --git a/src/java/org/apache/cassandra/io/util/FileHandle.java b/src/java/org/apache/cassandra/io/util/FileHandle.java index 67bfd239d61e..7e4b214128ac 100644 --- a/src/java/org/apache/cassandra/io/util/FileHandle.java +++ b/src/java/org/apache/cassandra/io/util/FileHandle.java @@ -406,14 +406,14 @@ else if (mmapped) { if (compressionMetadata != null) { - regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, compressionMetadata) + regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, compressionMetadata, bufferSize) : MmappedRegions.map(channel, compressionMetadata); rebuffererFactory = maybeCached(new CompressedChunkReader.Mmap(channel, compressionMetadata, regions, crcCheckChanceSupplier)); } else { - regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, length) - : MmappedRegions.map(channel, length); + regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, length, bufferSize) + : MmappedRegions.map(channel, length, bufferSize); rebuffererFactory = new MmapRebufferer(channel, length, regions); } } diff --git a/src/java/org/apache/cassandra/io/util/MmappedRegions.java b/src/java/org/apache/cassandra/io/util/MmappedRegions.java index 0ab07b8d0f74..578217e279a4 100644 --- a/src/java/org/apache/cassandra/io/util/MmappedRegions.java +++ b/src/java/org/apache/cassandra/io/util/MmappedRegions.java @@ -65,27 +65,22 @@ public class MmappedRegions extends SharedCloseableImpl */ private volatile State copy; - private MmappedRegions(ChannelProxy channel, CompressionMetadata metadata, long length) - { - this(new State(channel), metadata, length); - } - - private MmappedRegions(State state, CompressionMetadata metadata, long length) + private MmappedRegions(State state, long length, int chunkSize) { super(new Tidier(state)); - this.state = state; - - if (metadata != null) - { - assert length == 0 : "expected no length with metadata"; - updateState(metadata); - } - else if (length > 0) + if (length > 0) { - updateState(length); + updateState(length, chunkSize); } + this.copy = new State(state); + } + private MmappedRegions(State state, CompressionMetadata metadata) + { + super(new Tidier(state)); + this.state = state; + updateState(metadata); this.copy = new State(state); } @@ -97,7 +92,7 @@ private MmappedRegions(MmappedRegions original) public static MmappedRegions empty(ChannelProxy channel) { - return new MmappedRegions(channel, null, 0); + return new MmappedRegions(new State(channel), 0, 0); } /** @@ -109,16 +104,16 @@ public static MmappedRegions map(ChannelProxy channel, CompressionMetadata metad { if (metadata == null) throw new IllegalArgumentException("metadata cannot be null"); - - return new MmappedRegions(channel, metadata, 0); + State state = new State(channel); + return new MmappedRegions(state, metadata); } - public static MmappedRegions map(ChannelProxy channel, long length) + public static MmappedRegions map(ChannelProxy channel, long length, int chunkSize) { if (length <= 0) throw new IllegalArgumentException("Length must be positive"); - - return new MmappedRegions(channel, null, length); + State state = new State(channel); + return new MmappedRegions(state, length, chunkSize); } /** @@ -140,8 +135,10 @@ private boolean isCopy() * * @return {@code true} if new regions have been created */ - public boolean extend(long length) + public boolean extend(long length, int chunkSize) { + // We cannot enforce length to be a multiple of chunkSize (at the very least the last extend on a file + // will not satisfy this), so we hope the caller knows what they are doing. if (length < 0) throw new IllegalArgumentException("Length must not be negative"); @@ -151,7 +148,7 @@ public boolean extend(long length) return false; int initialRegions = state.last; - updateState(length); + updateState(length, chunkSize); copy = new State(state); return state.last > initialRegions; } @@ -162,7 +159,7 @@ public boolean extend(long length) * * @return {@code true} if new regions have been created */ - public boolean extend(CompressionMetadata compressionMetadata) + public boolean extend(CompressionMetadata compressionMetadata, int chunkSize) { assert !isCopy() : "Copies cannot be extended"; @@ -171,7 +168,7 @@ public boolean extend(CompressionMetadata compressionMetadata) int initialRegions = state.last; if (compressionMetadata.compressedFileLength - state.length <= MAX_SEGMENT_SIZE) - updateState(compressionMetadata.compressedFileLength); + updateState(compressionMetadata.compressedFileLength, chunkSize); else updateState(compressionMetadata); @@ -183,13 +180,15 @@ public boolean extend(CompressionMetadata compressionMetadata) * Updates state by adding the remaining segments. It starts with the current state last segment end position and * subsequently add new segments until all data up to the provided length are mapped. */ - private void updateState(long length) + private void updateState(long length, int chunkSize) { + // make sure the regions span whole chunks + long maxSize = (long) (MAX_SEGMENT_SIZE / chunkSize) * chunkSize; state.length = length; long pos = state.getPosition(); while (pos < length) { - long size = Math.min(MAX_SEGMENT_SIZE, length - pos); + long size = Math.min(maxSize, length - pos); state.add(pos, size); pos += size; } diff --git a/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java b/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java index dff9561f4f7d..e3ebc34609d1 100644 --- a/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java +++ b/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java @@ -45,12 +45,12 @@ public class MmappedRegionsCache implements AutoCloseable * @param length length of the file * @return a shared copy of the cached mmapped regions */ - public MmappedRegions getOrCreate(ChannelProxy channel, long length) + public MmappedRegions getOrCreate(ChannelProxy channel, long length, int bufferSize) { Preconditions.checkState(!closed); - MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, length)); + MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, length, bufferSize)); Preconditions.checkArgument(regions.isValid(channel)); - regions.extend(length); + regions.extend(length, bufferSize); return regions.sharedCopy(); } @@ -62,12 +62,12 @@ public MmappedRegions getOrCreate(ChannelProxy channel, long length) * @param metadata compression metadata of the file * @return a shared copy of the cached mmapped regions */ - public MmappedRegions getOrCreate(ChannelProxy channel, CompressionMetadata metadata) + public MmappedRegions getOrCreate(ChannelProxy channel, CompressionMetadata metadata, int bufferSize) { Preconditions.checkState(!closed); MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, metadata)); Preconditions.checkArgument(regions.isValid(channel)); - regions.extend(metadata); + regions.extend(metadata, bufferSize); return regions.sharedCopy(); } diff --git a/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java b/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java index e6b5dd0c0962..af4c6f042d70 100644 --- a/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java +++ b/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java @@ -114,10 +114,11 @@ public void testEmpty() throws Exception public void testTwoSegments() throws Exception { ByteBuffer buffer = allocateBuffer(2048); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testTwoSegments", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(1024); + regions.extend(1024, bufSize); for (int i = 0; i < 1024; i++) { MmappedRegions.Region region = regions.floor(i); @@ -126,7 +127,7 @@ public void testTwoSegments() throws Exception assertEquals(1024, region.end()); } - regions.extend(2048); + regions.extend(2048, bufSize); for (int i = 0; i < 2048; i++) { MmappedRegions.Region region = regions.floor(i); @@ -149,14 +150,15 @@ public void testTwoSegments() throws Exception public void testSmallSegmentSize() throws Exception { MmappedRegions.MAX_SEGMENT_SIZE = 1024; + int bufSize = 1024; ByteBuffer buffer = allocateBuffer(4096); try (ChannelProxy channel = new ChannelProxy(writeFile("testSmallSegmentSize", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(1024); - regions.extend(2048); - regions.extend(4096); + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); + regions.extend(4096, bufSize); final int SIZE = MmappedRegions.MAX_SEGMENT_SIZE; for (int i = 0; i < buffer.capacity(); i++) @@ -169,17 +171,45 @@ public void testSmallSegmentSize() throws Exception } } + @Test + public void testSizeIsChunkMultiple() throws Exception + { + final int oldMaxSegmentSize = MmappedRegions.MAX_SEGMENT_SIZE; + final int bufSize = 1024; + MmappedRegions.MAX_SEGMENT_SIZE = 2047; + ByteBuffer buffer = allocateBuffer(4096); + try(ChannelProxy channel = new ChannelProxy(writeFile("testSmallSegmentSize", buffer)); + MmappedRegions regions = MmappedRegions.empty(channel)) + { + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); + regions.extend(4096, bufSize); + for (int i = 0; i < buffer.capacity(); i++) + { + MmappedRegions.Region region = regions.floor(i); + assertNotNull(region); + assertEquals(bufSize * (i / bufSize), region.offset()); + assertEquals(bufSize + (bufSize * (i / bufSize)), region.end()); + } + } + finally + { + MmappedRegions.MAX_SEGMENT_SIZE = oldMaxSegmentSize; + } + } + @Test public void testAllocRegions() throws Exception { MmappedRegions.MAX_SEGMENT_SIZE = 1024; ByteBuffer buffer = allocateBuffer(MmappedRegions.MAX_SEGMENT_SIZE * MmappedRegions.REGION_ALLOC_SIZE * 3); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testAllocRegions", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(buffer.capacity()); + regions.extend(buffer.capacity(), bufSize); final int SIZE = MmappedRegions.MAX_SEGMENT_SIZE; for (int i = 0; i < buffer.capacity(); i++) @@ -196,17 +226,18 @@ public void testAllocRegions() throws Exception public void testCopy() throws Exception { ByteBuffer buffer = allocateBuffer(128 * 1024); + int bufSize = 4096; MmappedRegions snapshot; ChannelProxy channelCopy; try (ChannelProxy channel = new ChannelProxy(writeFile("testSnapshot", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, buffer.capacity() / 4)) + MmappedRegions regions = MmappedRegions.map(channel, buffer.capacity() / 4, bufSize)) { // create 3 more segments, one per quater capacity - regions.extend(buffer.capacity() / 2); - regions.extend(3 * buffer.capacity() / 4); - regions.extend(buffer.capacity()); + regions.extend(buffer.capacity() / 2, bufSize); + regions.extend(3 * buffer.capacity() / 4, bufSize); + regions.extend(buffer.capacity(), bufSize); // make a snapshot snapshot = regions.sharedCopy(); @@ -238,6 +269,7 @@ public void testCopy() throws Exception public void testCopyCannotExtend() throws Exception { ByteBuffer buffer = allocateBuffer(128 * 1024); + int bufSize = 1024; MmappedRegions snapshot; ChannelProxy channelCopy; @@ -245,7 +277,7 @@ public void testCopyCannotExtend() throws Exception try (ChannelProxy channel = new ChannelProxy(writeFile("testSnapshotCannotExtend", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(buffer.capacity() / 2); + regions.extend(buffer.capacity() / 2, bufSize); // make a snapshot snapshot = regions.sharedCopy(); @@ -256,7 +288,7 @@ public void testCopyCannotExtend() throws Exception try { - snapshot.extend(buffer.capacity()); + snapshot.extend(buffer.capacity(), bufSize); } finally { @@ -269,12 +301,13 @@ public void testCopyCannotExtend() throws Exception public void testExtendOutOfOrder() throws Exception { ByteBuffer buffer = allocateBuffer(4096); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testExtendOutOfOrder", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(4096); - regions.extend(1024); - regions.extend(2048); + regions.extend(4096, bufSize); + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); for (int i = 0; i < buffer.capacity(); i++) { @@ -290,10 +323,11 @@ public void testExtendOutOfOrder() throws Exception public void testNegativeExtend() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testNegativeExtend", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(-1); + regions.extend(-1, bufSize); } } @@ -341,8 +375,9 @@ public void testMapForCompressionMetadata() throws Exception public void testIllegalArgForMap1() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testIllegalArgForMap1", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, 0)) + MmappedRegions regions = MmappedRegions.map(channel, 0, bufSize)) { assertTrue(regions.isEmpty()); } @@ -352,8 +387,9 @@ public void testIllegalArgForMap1() throws Exception public void testIllegalArgForMap2() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testIllegalArgForMap2", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, -1L)) + MmappedRegions regions = MmappedRegions.map(channel, -1L, bufSize)) { assertTrue(regions.isEmpty()); } @@ -382,6 +418,7 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, { MmappedRegions.MAX_SEGMENT_SIZE = maxSegmentSize << 10; int size = Arrays.stream(writeSizes).sum() << 10; + int bufSize = 4096; ByteBuffer buffer = allocateBuffer(size); File f = FileUtils.createTempFile("testMapForCompressionMetadata", "1"); @@ -423,10 +460,10 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, writer.sync(); // verify that calling extend for the same (first iteration) or some previous metadata (further iterations) has no effect - assertFalse(regions.extend(metadata)); + assertFalse(regions.extend(metadata, bufSize)); logger.info("Checking extend on compressed chunk for range={} {}..{} / {}", idx, pos, pos + (writeSizes[idx] << 10), size); - checkExtendOnCompressedChunks(f, writer, regions); + checkExtendOnCompressedChunks(f, writer, regions, bufSize); pos += writeSizes[idx] << 10; idx++; } @@ -434,12 +471,12 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, } } - private void checkExtendOnCompressedChunks(File f, CompressedSequentialWriter writer, MmappedRegions regions) + private void checkExtendOnCompressedChunks(File f, CompressedSequentialWriter writer, MmappedRegions regions, int bufSize) { int dataOffset; try (CompressionMetadata metadata = writer.open(writer.getLastFlushOffset())) { - regions.extend(metadata); + regions.extend(metadata, bufSize); assertFalse(regions.isEmpty()); dataOffset = 0; while (dataOffset < metadata.dataLength) From 12c5495d2f70e0775c6d8d1ffbd00123cc8923ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Mi=C4=99=C5=BCa=C5=82?= Date: Thu, 24 Apr 2025 15:06:55 +0200 Subject: [PATCH 304/340] Reading mmapped trie-index exceeding 2GiB results in exception Memory-mapping is done in buffers of size less than 2GiB. When these buffers aren't aligned to 4KiB and the trie-index file spans many buffers then reading it results in going out of buffer bounds. This patch fixes it by making sure that the buffers are correctly aligned. patch by Szymon Miezal; reviewed by blambov and brandonwilliams for CASSANDRA-20351 --- .../apache/cassandra/io/util/FileHandle.java | 6 +- .../cassandra/io/util/MmappedRegions.java | 53 ++++++------ .../io/util/MmappedRegionsCache.java | 10 +-- .../cassandra/io/util/MmappedRegionsTest.java | 81 ++++++++++++++----- 4 files changed, 93 insertions(+), 57 deletions(-) diff --git a/src/java/org/apache/cassandra/io/util/FileHandle.java b/src/java/org/apache/cassandra/io/util/FileHandle.java index 67bfd239d61e..7e4b214128ac 100644 --- a/src/java/org/apache/cassandra/io/util/FileHandle.java +++ b/src/java/org/apache/cassandra/io/util/FileHandle.java @@ -406,14 +406,14 @@ else if (mmapped) { if (compressionMetadata != null) { - regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, compressionMetadata) + regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, compressionMetadata, bufferSize) : MmappedRegions.map(channel, compressionMetadata); rebuffererFactory = maybeCached(new CompressedChunkReader.Mmap(channel, compressionMetadata, regions, crcCheckChanceSupplier)); } else { - regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, length) - : MmappedRegions.map(channel, length); + regions = mmappedRegionsCache != null ? mmappedRegionsCache.getOrCreate(channel, length, bufferSize) + : MmappedRegions.map(channel, length, bufferSize); rebuffererFactory = new MmapRebufferer(channel, length, regions); } } diff --git a/src/java/org/apache/cassandra/io/util/MmappedRegions.java b/src/java/org/apache/cassandra/io/util/MmappedRegions.java index 0ab07b8d0f74..578217e279a4 100644 --- a/src/java/org/apache/cassandra/io/util/MmappedRegions.java +++ b/src/java/org/apache/cassandra/io/util/MmappedRegions.java @@ -65,27 +65,22 @@ public class MmappedRegions extends SharedCloseableImpl */ private volatile State copy; - private MmappedRegions(ChannelProxy channel, CompressionMetadata metadata, long length) - { - this(new State(channel), metadata, length); - } - - private MmappedRegions(State state, CompressionMetadata metadata, long length) + private MmappedRegions(State state, long length, int chunkSize) { super(new Tidier(state)); - this.state = state; - - if (metadata != null) - { - assert length == 0 : "expected no length with metadata"; - updateState(metadata); - } - else if (length > 0) + if (length > 0) { - updateState(length); + updateState(length, chunkSize); } + this.copy = new State(state); + } + private MmappedRegions(State state, CompressionMetadata metadata) + { + super(new Tidier(state)); + this.state = state; + updateState(metadata); this.copy = new State(state); } @@ -97,7 +92,7 @@ private MmappedRegions(MmappedRegions original) public static MmappedRegions empty(ChannelProxy channel) { - return new MmappedRegions(channel, null, 0); + return new MmappedRegions(new State(channel), 0, 0); } /** @@ -109,16 +104,16 @@ public static MmappedRegions map(ChannelProxy channel, CompressionMetadata metad { if (metadata == null) throw new IllegalArgumentException("metadata cannot be null"); - - return new MmappedRegions(channel, metadata, 0); + State state = new State(channel); + return new MmappedRegions(state, metadata); } - public static MmappedRegions map(ChannelProxy channel, long length) + public static MmappedRegions map(ChannelProxy channel, long length, int chunkSize) { if (length <= 0) throw new IllegalArgumentException("Length must be positive"); - - return new MmappedRegions(channel, null, length); + State state = new State(channel); + return new MmappedRegions(state, length, chunkSize); } /** @@ -140,8 +135,10 @@ private boolean isCopy() * * @return {@code true} if new regions have been created */ - public boolean extend(long length) + public boolean extend(long length, int chunkSize) { + // We cannot enforce length to be a multiple of chunkSize (at the very least the last extend on a file + // will not satisfy this), so we hope the caller knows what they are doing. if (length < 0) throw new IllegalArgumentException("Length must not be negative"); @@ -151,7 +148,7 @@ public boolean extend(long length) return false; int initialRegions = state.last; - updateState(length); + updateState(length, chunkSize); copy = new State(state); return state.last > initialRegions; } @@ -162,7 +159,7 @@ public boolean extend(long length) * * @return {@code true} if new regions have been created */ - public boolean extend(CompressionMetadata compressionMetadata) + public boolean extend(CompressionMetadata compressionMetadata, int chunkSize) { assert !isCopy() : "Copies cannot be extended"; @@ -171,7 +168,7 @@ public boolean extend(CompressionMetadata compressionMetadata) int initialRegions = state.last; if (compressionMetadata.compressedFileLength - state.length <= MAX_SEGMENT_SIZE) - updateState(compressionMetadata.compressedFileLength); + updateState(compressionMetadata.compressedFileLength, chunkSize); else updateState(compressionMetadata); @@ -183,13 +180,15 @@ public boolean extend(CompressionMetadata compressionMetadata) * Updates state by adding the remaining segments. It starts with the current state last segment end position and * subsequently add new segments until all data up to the provided length are mapped. */ - private void updateState(long length) + private void updateState(long length, int chunkSize) { + // make sure the regions span whole chunks + long maxSize = (long) (MAX_SEGMENT_SIZE / chunkSize) * chunkSize; state.length = length; long pos = state.getPosition(); while (pos < length) { - long size = Math.min(MAX_SEGMENT_SIZE, length - pos); + long size = Math.min(maxSize, length - pos); state.add(pos, size); pos += size; } diff --git a/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java b/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java index dff9561f4f7d..e3ebc34609d1 100644 --- a/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java +++ b/src/java/org/apache/cassandra/io/util/MmappedRegionsCache.java @@ -45,12 +45,12 @@ public class MmappedRegionsCache implements AutoCloseable * @param length length of the file * @return a shared copy of the cached mmapped regions */ - public MmappedRegions getOrCreate(ChannelProxy channel, long length) + public MmappedRegions getOrCreate(ChannelProxy channel, long length, int bufferSize) { Preconditions.checkState(!closed); - MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, length)); + MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, length, bufferSize)); Preconditions.checkArgument(regions.isValid(channel)); - regions.extend(length); + regions.extend(length, bufferSize); return regions.sharedCopy(); } @@ -62,12 +62,12 @@ public MmappedRegions getOrCreate(ChannelProxy channel, long length) * @param metadata compression metadata of the file * @return a shared copy of the cached mmapped regions */ - public MmappedRegions getOrCreate(ChannelProxy channel, CompressionMetadata metadata) + public MmappedRegions getOrCreate(ChannelProxy channel, CompressionMetadata metadata, int bufferSize) { Preconditions.checkState(!closed); MmappedRegions regions = cache.computeIfAbsent(channel.file(), ignored -> MmappedRegions.map(channel, metadata)); Preconditions.checkArgument(regions.isValid(channel)); - regions.extend(metadata); + regions.extend(metadata, bufferSize); return regions.sharedCopy(); } diff --git a/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java b/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java index e6b5dd0c0962..af4c6f042d70 100644 --- a/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java +++ b/test/unit/org/apache/cassandra/io/util/MmappedRegionsTest.java @@ -114,10 +114,11 @@ public void testEmpty() throws Exception public void testTwoSegments() throws Exception { ByteBuffer buffer = allocateBuffer(2048); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testTwoSegments", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(1024); + regions.extend(1024, bufSize); for (int i = 0; i < 1024; i++) { MmappedRegions.Region region = regions.floor(i); @@ -126,7 +127,7 @@ public void testTwoSegments() throws Exception assertEquals(1024, region.end()); } - regions.extend(2048); + regions.extend(2048, bufSize); for (int i = 0; i < 2048; i++) { MmappedRegions.Region region = regions.floor(i); @@ -149,14 +150,15 @@ public void testTwoSegments() throws Exception public void testSmallSegmentSize() throws Exception { MmappedRegions.MAX_SEGMENT_SIZE = 1024; + int bufSize = 1024; ByteBuffer buffer = allocateBuffer(4096); try (ChannelProxy channel = new ChannelProxy(writeFile("testSmallSegmentSize", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(1024); - regions.extend(2048); - regions.extend(4096); + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); + regions.extend(4096, bufSize); final int SIZE = MmappedRegions.MAX_SEGMENT_SIZE; for (int i = 0; i < buffer.capacity(); i++) @@ -169,17 +171,45 @@ public void testSmallSegmentSize() throws Exception } } + @Test + public void testSizeIsChunkMultiple() throws Exception + { + final int oldMaxSegmentSize = MmappedRegions.MAX_SEGMENT_SIZE; + final int bufSize = 1024; + MmappedRegions.MAX_SEGMENT_SIZE = 2047; + ByteBuffer buffer = allocateBuffer(4096); + try(ChannelProxy channel = new ChannelProxy(writeFile("testSmallSegmentSize", buffer)); + MmappedRegions regions = MmappedRegions.empty(channel)) + { + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); + regions.extend(4096, bufSize); + for (int i = 0; i < buffer.capacity(); i++) + { + MmappedRegions.Region region = regions.floor(i); + assertNotNull(region); + assertEquals(bufSize * (i / bufSize), region.offset()); + assertEquals(bufSize + (bufSize * (i / bufSize)), region.end()); + } + } + finally + { + MmappedRegions.MAX_SEGMENT_SIZE = oldMaxSegmentSize; + } + } + @Test public void testAllocRegions() throws Exception { MmappedRegions.MAX_SEGMENT_SIZE = 1024; ByteBuffer buffer = allocateBuffer(MmappedRegions.MAX_SEGMENT_SIZE * MmappedRegions.REGION_ALLOC_SIZE * 3); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testAllocRegions", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(buffer.capacity()); + regions.extend(buffer.capacity(), bufSize); final int SIZE = MmappedRegions.MAX_SEGMENT_SIZE; for (int i = 0; i < buffer.capacity(); i++) @@ -196,17 +226,18 @@ public void testAllocRegions() throws Exception public void testCopy() throws Exception { ByteBuffer buffer = allocateBuffer(128 * 1024); + int bufSize = 4096; MmappedRegions snapshot; ChannelProxy channelCopy; try (ChannelProxy channel = new ChannelProxy(writeFile("testSnapshot", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, buffer.capacity() / 4)) + MmappedRegions regions = MmappedRegions.map(channel, buffer.capacity() / 4, bufSize)) { // create 3 more segments, one per quater capacity - regions.extend(buffer.capacity() / 2); - regions.extend(3 * buffer.capacity() / 4); - regions.extend(buffer.capacity()); + regions.extend(buffer.capacity() / 2, bufSize); + regions.extend(3 * buffer.capacity() / 4, bufSize); + regions.extend(buffer.capacity(), bufSize); // make a snapshot snapshot = regions.sharedCopy(); @@ -238,6 +269,7 @@ public void testCopy() throws Exception public void testCopyCannotExtend() throws Exception { ByteBuffer buffer = allocateBuffer(128 * 1024); + int bufSize = 1024; MmappedRegions snapshot; ChannelProxy channelCopy; @@ -245,7 +277,7 @@ public void testCopyCannotExtend() throws Exception try (ChannelProxy channel = new ChannelProxy(writeFile("testSnapshotCannotExtend", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(buffer.capacity() / 2); + regions.extend(buffer.capacity() / 2, bufSize); // make a snapshot snapshot = regions.sharedCopy(); @@ -256,7 +288,7 @@ public void testCopyCannotExtend() throws Exception try { - snapshot.extend(buffer.capacity()); + snapshot.extend(buffer.capacity(), bufSize); } finally { @@ -269,12 +301,13 @@ public void testCopyCannotExtend() throws Exception public void testExtendOutOfOrder() throws Exception { ByteBuffer buffer = allocateBuffer(4096); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testExtendOutOfOrder", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(4096); - regions.extend(1024); - regions.extend(2048); + regions.extend(4096, bufSize); + regions.extend(1024, bufSize); + regions.extend(2048, bufSize); for (int i = 0; i < buffer.capacity(); i++) { @@ -290,10 +323,11 @@ public void testExtendOutOfOrder() throws Exception public void testNegativeExtend() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testNegativeExtend", buffer)); MmappedRegions regions = MmappedRegions.empty(channel)) { - regions.extend(-1); + regions.extend(-1, bufSize); } } @@ -341,8 +375,9 @@ public void testMapForCompressionMetadata() throws Exception public void testIllegalArgForMap1() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testIllegalArgForMap1", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, 0)) + MmappedRegions regions = MmappedRegions.map(channel, 0, bufSize)) { assertTrue(regions.isEmpty()); } @@ -352,8 +387,9 @@ public void testIllegalArgForMap1() throws Exception public void testIllegalArgForMap2() throws Exception { ByteBuffer buffer = allocateBuffer(1024); + int bufSize = 1024; try (ChannelProxy channel = new ChannelProxy(writeFile("testIllegalArgForMap2", buffer)); - MmappedRegions regions = MmappedRegions.map(channel, -1L)) + MmappedRegions regions = MmappedRegions.map(channel, -1L, bufSize)) { assertTrue(regions.isEmpty()); } @@ -382,6 +418,7 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, { MmappedRegions.MAX_SEGMENT_SIZE = maxSegmentSize << 10; int size = Arrays.stream(writeSizes).sum() << 10; + int bufSize = 4096; ByteBuffer buffer = allocateBuffer(size); File f = FileUtils.createTempFile("testMapForCompressionMetadata", "1"); @@ -423,10 +460,10 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, writer.sync(); // verify that calling extend for the same (first iteration) or some previous metadata (further iterations) has no effect - assertFalse(regions.extend(metadata)); + assertFalse(regions.extend(metadata, bufSize)); logger.info("Checking extend on compressed chunk for range={} {}..{} / {}", idx, pos, pos + (writeSizes[idx] << 10), size); - checkExtendOnCompressedChunks(f, writer, regions); + checkExtendOnCompressedChunks(f, writer, regions, bufSize); pos += writeSizes[idx] << 10; idx++; } @@ -434,12 +471,12 @@ public void testExtendForCompressionMetadata(int maxSegmentSize, int chunkSize, } } - private void checkExtendOnCompressedChunks(File f, CompressedSequentialWriter writer, MmappedRegions regions) + private void checkExtendOnCompressedChunks(File f, CompressedSequentialWriter writer, MmappedRegions regions, int bufSize) { int dataOffset; try (CompressionMetadata metadata = writer.open(writer.getLastFlushOffset())) { - regions.extend(metadata); + regions.extend(metadata, bufSize); assertFalse(regions.isEmpty()); dataOffset = 0; while (dataOffset < metadata.dataLength) From c24fbd814a3838a74664f108c62834c8b761599c Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Fri, 21 Mar 2025 19:18:33 +0100 Subject: [PATCH 305/340] Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL patch by Stefan Miklosovic; reviewed by Bernardo Botella Corbi for CASSANDRA-20563 Co-authored-by: Bernardo Botella Corbi --- CHANGES.txt | 1 + .../pages/developing/cql/constraints.adoc | 158 ++++++++++--- pylib/cqlshlib/cql3handling.py | 6 +- pylib/cqlshlib/test/test_cqlsh_completion.py | 2 +- src/antlr/Parser.g | 37 ++- .../AbstractFunctionConstraint.java | 4 +- .../cql3/constraints/ColumnConstraint.java | 4 +- .../cql3/constraints/ColumnConstraints.java | 60 +++-- .../cql3/constraints/ConstraintFunction.java | 59 ++++- .../constraints/FunctionColumnConstraint.java | 69 +++--- .../cql3/constraints/JsonConstraint.java | 7 +- .../cql3/constraints/LengthConstraint.java | 5 +- .../cql3/constraints/NotNullConstraint.java | 23 +- .../constraints/OctetLengthConstraint.java | 5 +- .../cql3/constraints/RegexpConstraint.java | 6 +- .../constraints/ScalarColumnConstraint.java | 14 +- .../constraints/UnaryConstraintFunction.java | 5 +- .../UnaryFunctionColumnConstraint.java | 89 +++++--- .../schema/AlterTableStatement.java | 4 + .../schema/CreateTableStatement.java | 29 ++- .../cassandra/schema/ColumnMetadata.java | 1 + .../test/ColumnConstraintsTest.java | 6 +- .../distributed/test/log/SnapshotTest.java | 4 +- ...ableWithTableConstraintValidationTest.java | 40 ++-- .../constraints/ConstraintArgumentsTest.java | 212 ++++++++++++++++++ .../ConstraintsSatisfiabilityTest.java | 11 +- ...WithColumnCqlConstraintValidationTest.java | 84 ++++--- ...ithColumnNotNullConstraintInvalidTest.java | 6 +- ...eWithColumnNotNullConstraintValidTest.java | 2 +- ...mnOctetLengthConstraintValidationTest.java | 68 +++--- .../constraints/JsonConstraintTest.java | 4 +- .../constraints/NotNullConstraintTest.java | 10 +- .../constraints/RegexpConstraintTest.java | 8 +- .../cql3/ColumnSpecificationTest.java | 50 ++--- 34 files changed, 812 insertions(+), 281 deletions(-) create mode 100644 test/unit/org/apache/cassandra/constraints/ConstraintArgumentsTest.java diff --git a/CHANGES.txt b/CHANGES.txt index efed7d42d4f0..80a9d636f61a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563) * Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly (CASSANDRA-20622) * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620) * Increasing default for auto_repair.sstable_upper_threshold considering large Cassandra tables & revert three lines removed from CHANGES.txt due to a merge mistake (CASSANDRA-20586) diff --git a/doc/modules/cassandra/pages/developing/cql/constraints.adoc b/doc/modules/cassandra/pages/developing/cql/constraints.adoc index 390d6c27a979..2e729db01d8b 100644 --- a/doc/modules/cassandra/pages/developing/cql/constraints.adoc +++ b/doc/modules/cassandra/pages/developing/cql/constraints.adoc @@ -5,10 +5,9 @@ column level in a table schema definition and enforcing them at write time. == CREATE CONSTRAINT -Constraints can be created within the column definition, or as part -of the table properties. +Constraints can be created within the column definition. -The main syntax to define a constraint is as follows: +The syntax to define a constraint is as follows: [source,bnf] ---- @@ -20,7 +19,7 @@ CREATE TABLE ks.tb ( ); ---- -As shown in this syntax, more than one constraint can be defined for a given column using the AND keyword. +As shown in this syntax, more than one constraint can be defined for a given column using the `AND` keyword. == ALTER CONSTRAINT @@ -30,21 +29,32 @@ Altering a constraint is done by following the alter column CQL syntax: ALTER TABLE [IF EXISTS] ALTER [IF EXISTS] CHECK ; ---- +There is no way how to alter individual check when multiple checks are specified on a column. Altering constraints +on a column will set constraints to these specified checks. A user can, of course, chain them: + +[source,bnf] +---- +ALTER TABLE [IF EXISTS]
      ALTER [IF EXISTS] CHECK AND +---- + == DROP CONSTRAINT -And DROP can be used to drop constraints for a column as well. +`DROP CHECK` can be used to drop constraints for a column as well. [source,bnf] ---- ALTER TABLE [IF EXISTS]
      ALTER [IF EXISTS] DROP CHECK; ---- +There is no way how to drop individual check when multiple checks are specified on a column. After dropping checks, you +are required to re-define all necessary checks again. + == AVAILABLE CONSTRAINTS === SCALAR CONSTRAINT -Defines a comparator against a numeric type. It support all numeric types supported in Cassandra, with all the regular +Defines a comparator against a numeric type. It supports all numeric types supported in Cassandra, with all regular comparators. -For example, we can define constraints that ensure that i is bigger or equal than 100 but smaller than 1000. +For example, we can define constraints that ensure that `i` is bigger or equal than `100` but smaller than `1000`. [source,bnf] ---- @@ -97,6 +107,39 @@ CREATE TABLE ks.tb ) ---- +There is a basic satistfiability check conducted on checks' definitions so we ensure that unsatisfiable constraint +definitions are invalid as it would be impossible to insert a value for a specific colum which would satisty all constraints. + +For example, imagine a user tries to create the following table (e.g. by mistake): + +---- +CREATE TABLE ks.tb ( + name text, + i int CHECK i < 100 AND i > 1000 + ..., +); +---- + +If we insert `50` for `i`, it will not satisfy `i > 1000`. If we insert `1001` as `i`, it will not satisfy `i < 100`. + +There is a satisfiability check in place which would prevent such constaint definitions: + +---- +[Invalid query] message="Constraints of scalar are not satisfiable: i < 100, i > 1000" +---- + +It is also illegal to specify constraints which are repeating, or they repeat on their operators: + +---- +CREATE TABLE ks.tb7 (id int primary key, i int check i < 100 and i > 1000 and i < 10) ; +[Invalid query] message="There are duplicate constraint definitions on column 'i': [i <]" +---- + +---- +CREATE TABLE ks.tb7 (id int primary key, i int check i > 100 and i > 1000) ; +[Invalid query] message="There are duplicate constraint definitions on column 'i': [i >]" +---- + === LENGTH CONSTRAINT Defines a condition that checks the length of text or binary type. @@ -105,7 +148,7 @@ For example, we can create a constraint that checks that name can't be longer th ---- CREATE TABLE ks.tb ( - name text CHECK LENGTH(name) < 256 + name text CHECK LENGTH() < 256 ..., ); ---- @@ -113,7 +156,7 @@ CREATE TABLE ks.tb ( Altering that constraint can be done with: ---- -ALTER TABLE ks.tb ALTER name LENGTH(name) < 512; +ALTER TABLE ks.tb ALTER name LENGTH() < 512; ---- Finally, the constraint can be removed: @@ -130,25 +173,25 @@ For example, we can create a constraint that checks that name can't be bigger th ---- CREATE TABLE ks.tb ( - name text CHECK OCTET_LENGTH(name) < 2 + name text CHECK OCTET_LENGTH() < 2 ..., ); ---- Inserting a valid row: ---- -INSERT INTO ks.tb (name) VALUES ("f") +INSERT INTO ks.tb (name) VALUES ('f') ---- Inserting an invalid row: ---- -INSERT INTO ks.tb (name) VALUES ("fooooooo") +INSERT INTO ks.tb (name) VALUES ('fooooooo') ERROR: Column value does not satisfy value constraint for column 'name'. It has a length of 8 and and it should be should be < 2 ---- -=== NOT_NULL constraint +=== NOT NULL constraint Defines a constraint that checks if a column is not null in every modification statement. @@ -158,13 +201,33 @@ For example, let's have this table: CREATE TABLE ks.tb ( id int, cl int, - col1 int CHECK NOT_NULL(col1), - col2 int CHECK NOT_NULL(col2), + col1 int CHECK NOT NULL, + col2 int CHECK NOT NULL, PRIMARY KEY (id, cl) ); ---- -then this statement would fail: +It is possible to specify `NOT NULL` before `CHECK` / omit it to be more aligned with SQL syntax. + +---- +CREATE TABLE ks.tb ( + id int, + cl int, + col1 int NOT NULL, + col2 int NOT NULL, + PRIMARY KEY (id, cl) +); +---- + +Of course, mixing these two styles is forbidden: + +---- +-- this is illegal +col1 int NOT NULL CHECK NOT NULL, +[Invalid query] message="Duplicate definition of NOT NULL constraint" +---- + +When `NOT NULL` is specified as above, then this statement would fail: ---- INSERT INTO ks.tb (id, cl, col1) VALUES (1, 2, 3); @@ -177,7 +240,7 @@ as well as this statement: INSERT INTO ks.tb (id, cl, col1, col2) VALUES (1, 2, 3, null); ---- -A column which has `NOT_NULL` constraint has to be specified in every modification statement. +A column which has `NOT NULL` constraint has to be specified in every modification statement. The constraint can be removed: @@ -186,16 +249,55 @@ ALTER TABLE ks.tb ALTER col1 DROP CHECK; ALTER TABLE ks.tb ALTER col2 DROP CHECK; ---- -We can not remove the value of a column where `NOT_NULL` constraint is present: +We can not remove the value of a column where `NOT NULL` constraint is present: ---- DELETE col2 FROM ks.tb WHERE id = 1 AND cl = 2; ... [Invalid query] message="Column 'col2' can not be set to null." ---- -Additionally, `NOT_NULL` can not be specified on any column of a primary key, +Additionally, `NOT NULL` can not be specified on any column of a primary key, being it a partition key or a clustering column. +It is possible to chain `NOT NULL` with other checks, for example, if we require a column to not be null and its +size to be bigger than `0` every time, we do: + +---- +CREATE TABLE ks.tb ( + id int, + cl int, + col1 int CHECK NOT NULL AND col1 > 0, + PRIMARY KEY (id, cl) +); +---- + +As we said that `NOT NULL` can be put in front of `CHECK`, if we want to specify other constraints as well, +this syntax is indeed possible: + +---- +CREATE TABLE ks.tb ( + id int, + cl int, + col1 int NOT NULL CHECK col1 > 0, + PRIMARY KEY (id, cl) +); +---- + +Internally, `NOT NULL` specified before `CHECK` will be stored as any other check - that is after `CHECK`. +(`DESCRIBE` statement on a table will show this fact). This mean of constraint definition is just a syntax suggar. + +It is not possible to use `NOT NULL` before `CHECK` when altering. The following syntax is invalid: + +---- +ALTER TABLE ks.tb ALTER col2 NOT NULL CHECK col2 > 0; +---- + +However, this syntax is valid: + +---- +ALTER TABLE ks.tb ALTER col2 CHECK NOT NULL AND col2 > 0; +---- + === JSON constraint Defines a constraint which checks if a column contains a string which is a valid JSON. @@ -205,7 +307,7 @@ Defines a constraint which checks if a column contains a string which is a valid ---- CREATE TABLE ks.tb ( id int primary key, - val text CHECK JSON(val) + val text CHECK JSON() ); -- valid JSON string @@ -230,26 +332,28 @@ Defines a constraint which checks text-like values againt a regular expression. ---- CREATE TABLE ks.tb ( id int primary key, - value CHECK REGEXP(value) = 'a.*b' + value CHECK REGEXP() = 'a.*b' ) ---- ---- -cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); -cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); +INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); +INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); ... [Invalid query] message="Value does not match regular expression 'a.*b'" ---- Negation can be also used: ---- -ALTER TABLE ks.tb ALTER value CHECK REGEXP(value) != 'a.*b'; +ALTER TABLE ks.tb ALTER value CHECK REGEXP() != 'a.*b'; ---- which would logically invert the condition: ---- -cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); +INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); ... [Invalid query] message="Value does match regular expression 'a.*b'" -cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); ----- \ No newline at end of file +INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); +---- + +`REGEXP` constraint supports only `!=` and `=` operators as other operators are meaningless. \ No newline at end of file diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index 194bd7f94cde..0f6ebe4aaa91 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -327,9 +327,10 @@ def dequote_value(cqlword): ::= "CHECK" ( "AND" )* ; - ::= + ::= "NOT" "NULL" + | | - | + | ; ::= "LENGTH" @@ -338,7 +339,6 @@ def dequote_value(cqlword): ; ::= "JSON" - | "NOT_NULL" ; ::= "MASKED" "WITH" ( "DEFAULT" | ); diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index 58f5c44bef43..3e05cb329f1f 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -1154,7 +1154,7 @@ def test_complete_in_alter_table(self): other_choices_ok=True) self.trycompletions('ALTER TABLE new_table ADD col int C', immediate='HECK ') self.trycompletions('ALTER TABLE new_table ADD col int CHECK ', - choices=['', '', 'JSON', 'LENGTH', 'NOT_NULL', 'OCTET_LENGTH', 'REGEXP'], + choices=['', '', 'JSON', 'LENGTH', 'NOT', 'OCTET_LENGTH', 'REGEXP'], other_choices_ok=True) self.trycompletions('ALTER TABLE IF EXISTS new_table RENAME ', choices=['IF', '', '']) self.trycompletions('ALTER TABLE new_table RENAME ', choices=['IF', '', '']) diff --git a/src/antlr/Parser.g b/src/antlr/Parser.g index c1c79e7448d1..4ebc916f946e 100644 --- a/src/antlr/Parser.g +++ b/src/antlr/Parser.g @@ -967,8 +967,8 @@ tableDefinition[CreateTableStatement.Raw stmt] ; tableColumns[CreateTableStatement.Raw stmt] - @init { boolean isStatic = false; } - : k=ident v=comparatorType (K_STATIC { isStatic = true; })? (mask=columnMask)? (constraints=columnConstraints)? { $stmt.addColumn(k, v, isStatic, mask, constraints); } + @init { boolean isStatic = false; boolean isNotNull = false; } + : k=ident v=comparatorType (K_STATIC { isStatic = true; })? (K_NOT K_NULL { isNotNull = true; })? (mask=columnMask)? (constraints=columnConstraints)? { $stmt.addColumn(k, v, isStatic, isNotNull, mask, constraints); } (K_PRIMARY K_KEY { $stmt.setPartitionKeyColumn(k); })? | K_PRIMARY K_KEY '(' tablePartitionKey[stmt] (',' c=ident { $stmt.markClusteringColumn(c); } )* ')' ; @@ -982,9 +982,30 @@ columnConstraints returns [ColumnConstraints.Raw constraints] ; columnConstraint returns [ColumnConstraint columnConstraint] - : funcName=ident '(' k=ident ')' op=relationType t=value { $columnConstraint = new FunctionColumnConstraint.Raw(funcName, k, op, t.getText()).prepare(); } - | funcName=ident '(' k=ident ')' { $columnConstraint = new UnaryFunctionColumnConstraint.Raw(funcName, k).prepare(); } - | k=ident op=relationType t=value { $columnConstraint = new ScalarColumnConstraint.Raw(k, op, t.getText()).prepare(); } + @init { List arguments = new ArrayList<>(); } + : K_NOT K_NULL + { + $columnConstraint = new UnaryFunctionColumnConstraint.Raw("NOT_NULL").prepare(); + } + | funcName=ident columnConstraintsArguments[arguments] (op=relationType t=value)? + { + if (op != null && t != null) + { + $columnConstraint = new FunctionColumnConstraint.Raw(funcName, arguments, op, t.getText()).prepare(); + } + else + { + $columnConstraint = new UnaryFunctionColumnConstraint.Raw(funcName, arguments).prepare(); + } + } + | k=ident op=relationType t=value + { + $columnConstraint = new ScalarColumnConstraint.Raw(k, op, t.getText()).prepare(); + } + | funcName=ident + { + $columnConstraint = new UnaryFunctionColumnConstraint.Raw(funcName).prepare(); + } ; columnMask returns [ColumnMask.Raw mask] @@ -997,6 +1018,12 @@ columnMaskArguments[List arguments] : '(' ')' | '(' c=term { arguments.add(c); } (',' cn=term { arguments.add(cn); })* ')' ; +columnConstraintsArguments[List arguments] + : '(' ')' + | '(' c=term { try { arguments.add(c.toString()); } catch (Throwable t) { throw new SyntaxException("Constraint function parameters need to be strings."); }; } (',' cn=term { try { arguments.add(cn.toString()); } catch (Throwable t) { throw new SyntaxException("Constraint function parameters need to be strings."); }; })* ')' + | '(' ci=ident { throw new SyntaxException("Constraint function parameters need to be strings."); } (',' cni=ident)* ')' + ; + tablePartitionKey[CreateTableStatement.Raw stmt] @init {List l = new ArrayList();} @after{ $stmt.setPartitionKeyColumns(l); } diff --git a/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionConstraint.java index 6204b96a212c..2f6805bdef10 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/AbstractFunctionConstraint.java @@ -20,7 +20,6 @@ import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.utils.LocalizeString; @@ -30,9 +29,8 @@ public abstract class AbstractFunctionConstraint extends ColumnConstraint protected final Operator relationType; protected final String term; - public AbstractFunctionConstraint(ColumnIdentifier columnName, Operator relationType, String term) + public AbstractFunctionConstraint(Operator relationType, String term) { - super(columnName); this.relationType = relationType; this.term = term; } diff --git a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java index eecc0b8ecce1..bddea571aa4c 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraint.java @@ -40,9 +40,9 @@ */ public abstract class ColumnConstraint { - protected final ColumnIdentifier columnName; + protected ColumnIdentifier columnName; - public ColumnConstraint(ColumnIdentifier columnName) + public void setColumnName(ColumnIdentifier columnName) { this.columnName = columnName; } diff --git a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java index 21b119522866..900fb7047be9 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ColumnConstraints.java @@ -27,6 +27,8 @@ import java.util.Set; import java.util.TreeSet; +import com.google.common.annotations.VisibleForTesting; + import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.CqlBuilder; import org.apache.cassandra.db.TypeSizes; @@ -49,10 +51,17 @@ public class ColumnConstraints extends ColumnConstraint public ColumnConstraints(List> constraints) { - super(null); this.constraints = constraints; } + @Override + public void setColumnName(ColumnIdentifier columnName) + { + this.columnName = columnName; + for (ColumnConstraint constraint : constraints) + constraint.setColumnName(columnName); + } + @Override public String name() { @@ -117,6 +126,17 @@ public boolean hasRelevantConstraints() return false; } + public boolean containsNotNullConstraint() + { + for (ColumnConstraint c : constraints) + { + if (c.toString().equals(NotNullConstraint.CQL_FUNCTION_NAME)) + return true; + } + + return false; + } + @Override public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException { @@ -218,18 +238,26 @@ public ColumnConstraints prepare(ColumnIdentifier column) for (ColumnConstraint constraint : constraints) { - if (constraint.columnName != null && !column.equals(constraint.columnName)) - throw new InvalidConstraintDefinitionException(format("Constraint %s was not specified on a column it operates on: %s but on: %s", - constraint, column.toCQLString(), constraint.columnName)); + // We only check scalar constraints column name, as the rest of the constraints + // imply the name from the column they are defined at + if (constraint.getConstraintType() == ConstraintType.SCALAR) + { + if (!column.equals(constraint.columnName)) + { + throw new InvalidConstraintDefinitionException(format("Constraint %s was not specified on a column it operates on: %s but on: %s", + constraint, column.toCQLString(), constraint.columnName)); + } + } } - return new ColumnConstraints(constraints); + ColumnConstraints columnConstraints = new ColumnConstraints(constraints); + columnConstraints.setColumnName(column); + return columnConstraints; } } public static class Serializer implements MetadataSerializer { - @Override public void serialize(ColumnConstraints columnConstraint, DataOutputPlus out, Version version) throws IOException { @@ -248,13 +276,11 @@ public ColumnConstraints deserialize(DataInputPlus in, Version version) throws I List> columnConstraints = new ArrayList<>(); int numberOfConstraints = in.readInt(); for (int i = 0; i < numberOfConstraints; i++) - { - int serializerPosition = in.readShort(); - ColumnConstraint constraint = (ColumnConstraint) ConstraintType - .getSerializer(serializerPosition) - .deserialize(in, version); - columnConstraints.add(constraint); - } + columnConstraints.add(deserializeConstraint(in, in.readShort(), version)); + + // we are not setting column name here on purpose + // that is deffered in ColumnMetadata's constructor, + // we do not have the access to a column name here anyway return new ColumnConstraints(columnConstraints); } @@ -269,6 +295,14 @@ public long serializedSize(ColumnConstraints columnConstraint, Version version) } return constraintsSize; } + + @VisibleForTesting + public ColumnConstraint deserializeConstraint(DataInputPlus in, int serializerPosition, Version version) throws IOException + { + return (ColumnConstraint) ConstraintType + .getSerializer(serializerPosition) + .deserialize(in, version); + } } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java index ad8424c3f563..e7837c74f76a 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java @@ -19,14 +19,17 @@ package org.apache.cassandra.cql3.constraints; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.functions.types.ParseUtils; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.utils.ByteBufferUtil; +import static java.lang.String.format; import static org.apache.cassandra.cql3.Operator.EQ; import static org.apache.cassandra.cql3.Operator.GT; import static org.apache.cassandra.cql3.Operator.GTE; @@ -41,13 +44,22 @@ public abstract class ConstraintFunction { public static final List DEFAULT_FUNCTION_OPERATORS = List.of(EQ, NEQ, GTE, GT, LTE, LT); - protected final ColumnIdentifier columnName; + protected ColumnIdentifier columnName; protected final String name; + protected final List args; + // args as propagated from cql + protected final List rawArgs; - public ConstraintFunction(ColumnIdentifier columnName, String name) + public ConstraintFunction(String name, List args) { - this.columnName = columnName; this.name = name; + this.rawArgs = args; + this.args = unquote(args); + } + + public List arguments() + { + return args; } /** @@ -84,6 +96,7 @@ public void evaluate(AbstractType valueType, ByteBuffer columnValue) throws C */ public void validate(ColumnMetadata columnMetadata, String term) throws InvalidConstraintDefinitionException { + maybeThrowOnNonEmptyArguments(name); } /** @@ -100,4 +113,44 @@ public void validate(ColumnMetadata columnMetadata, String term) throws InvalidC * @return supported types for given constraint */ public abstract List> getSupportedTypes(); + + /** + * Tells whether implementation supports specifying arguments on its function. + *
      + * In this case, this function will return "true" + *
      +     *     val int check length() < 1024
      +     * 
      + * + * In this case, this function will return "false" + *
      +     *     val int check someconstraint('abc', 'def')
      +     * 
      + * @return true if this constraint does not accept any parameters, false otherwise. + */ + public boolean isParameterless() { return true; } + + @Override + public String toString() + { + return name; + } + + protected void maybeThrowOnNonEmptyArguments(String constraintName) + { + if (!isParameterless()) + return; + + if (args != null && !args.isEmpty()) + throw new InvalidConstraintDefinitionException(format("Constraint %s does not accept any arguments.", constraintName)); + } + + private List unquote(List quotedArgs) + { + List unquotedArgs = new ArrayList<>(); + for (String quotedArg : quotedArgs) + unquotedArgs.add(ParseUtils.unquote(quotedArg)); + + return unquotedArgs; + } } diff --git a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java index a94b4bd0bdcb..a25553bd7bbc 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; import java.util.function.Function; @@ -45,21 +46,21 @@ public class FunctionColumnConstraint extends AbstractFunctionConstraint arguments, Operator relationType, String term) { this.relationType = relationType; - this.columnName = columnName; this.term = term; - function = createConstraintFunction(functionName.toCQLString(), columnName); + if (arguments == null) + arguments = new ArrayList<>(); + function = createConstraintFunction(functionName.toCQLString(), arguments); } public FunctionColumnConstraint prepare() { - return new FunctionColumnConstraint(function, columnName, relationType, term); + return new FunctionColumnConstraint(function, relationType, term); } } @@ -81,23 +82,31 @@ public enum Functions OCTET_LENGTH(OctetLengthConstraint::new), REGEXP(RegexpConstraint::new); - private final Function functionCreator; + private final Function, ConstraintFunction> functionCreator; - Functions(Function functionCreator) + Functions(Function, ConstraintFunction> functionCreator) { this.functionCreator = functionCreator; } } - private static ConstraintFunction createConstraintFunction(String functionName, ColumnIdentifier columnName) + private static ConstraintFunction createConstraintFunction(String functionName, List args) { - return getEnum(Functions.class, functionName).functionCreator.apply(columnName); + return getEnum(Functions.class, functionName).functionCreator.apply(args); } - private FunctionColumnConstraint(ConstraintFunction function, ColumnIdentifier columnName, Operator relationType, String term) + private FunctionColumnConstraint(ConstraintFunction function, Operator relationType, String term) { - super(columnName, relationType, term); + super(relationType, term); this.function = function; + this.columnName = function.columnName; + } + + @Override + public void setColumnName(ColumnIdentifier columnName) + { + this.columnName = columnName; + this.function.columnName = columnName; } public ConstraintFunction function() @@ -156,7 +165,6 @@ protected void internalEvaluate(AbstractType valueType, ByteBuffer columnValu @Override public void validate(ColumnMetadata columnMetadata) { - validateArgs(columnMetadata); validateTypes(columnMetadata); function.validate(columnMetadata, term); } @@ -167,18 +175,11 @@ public ConstraintType getConstraintType() return ConstraintType.FUNCTION; } - void validateArgs(ColumnMetadata columnMetadata) - { - if (!columnMetadata.name.equals(columnName)) - throw new InvalidConstraintDefinitionException(String.format("Parameter of %s constraint should be the column name (%s)", - name(), - columnMetadata.name)); - } - @Override public String toString() { - return function.name + "(" + columnName + ") " + relationType + " " + term; + String arguments = String.join(",", function.rawArgs); + return function.name + '(' + arguments + ") " + relationType + ' ' + term; } public static class Serializer implements MetadataSerializer @@ -187,7 +188,12 @@ public static class Serializer implements MetadataSerializer args = new ArrayList<>(); + int argsSize = in.readInt(); + for (int i = 0; i < argsSize; i++) + args.add(in.readUTF()); + ConstraintFunction function; - String columnNameString = in.readUTF(); - ColumnIdentifier columnName = new ColumnIdentifier(columnNameString, true); try { - function = createConstraintFunction(functionName, columnName); + function = createConstraintFunction(functionName, args); } catch (Exception e) { @@ -209,14 +219,19 @@ public FunctionColumnConstraint deserialize(DataInputPlus in, Version version) t } Operator relationType = Operator.readFrom(in); final String term = in.readUTF(); - return new FunctionColumnConstraint(function, columnName, relationType, term); + return new FunctionColumnConstraint(function, relationType, term); } @Override public long serializedSize(FunctionColumnConstraint columnConstraint, Version version) { + int argsSizes = 0; + for (String arg : columnConstraint.function.args) + argsSizes += TypeSizes.sizeof(arg); + return TypeSizes.sizeof(columnConstraint.function.getClass().getName()) - + TypeSizes.sizeof(columnConstraint.columnName.toCQLString()) + + TypeSizes.sizeof(columnConstraint.function.args.size()) + + argsSizes + TypeSizes.sizeof(columnConstraint.term) + Operator.serializedSize(); } diff --git a/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java index 95fbac5b3c0a..15a19d7954fc 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/JsonConstraint.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; @@ -37,9 +36,9 @@ public class JsonConstraint extends UnaryConstraintFunction public static final String FUNCTION_NAME = "JSON"; - public JsonConstraint(ColumnIdentifier columnName) + public JsonConstraint(List args) { - super(columnName, FUNCTION_NAME); + super(FUNCTION_NAME, args); } @Override @@ -52,7 +51,7 @@ public void internalEvaluate(AbstractType valueType, Operator relationType, S catch (MarshalException ex) { throw new ConstraintViolationException(format("Value for column '%s' violated %s constraint as it is not a valid JSON.", - columnName.toCQLString(), + columnName, name)); } } diff --git a/src/java/org/apache/cassandra/cql3/constraints/LengthConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/LengthConstraint.java index 49954c28fb93..59d78afcaa39 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/LengthConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/LengthConstraint.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; @@ -35,9 +34,9 @@ public class LengthConstraint extends ConstraintFunction private static final String NAME = "LENGTH"; private static final List> SUPPORTED_TYPES = List.of(BytesType.instance, UTF8Type.instance, AsciiType.instance); - public LengthConstraint(ColumnIdentifier columnName) + public LengthConstraint(List args) { - super(columnName, NAME); + super(NAME, args); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java index af79086701f0..d0d050db4544 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java @@ -19,9 +19,9 @@ package org.apache.cassandra.cql3.constraints; import java.nio.ByteBuffer; +import java.util.Collections; import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.schema.ColumnMetadata; @@ -30,11 +30,19 @@ public class NotNullConstraint extends UnaryConstraintFunction { - public static final String FUNCTION_NAME = "NOT_NULL"; + public static final String FUNCTION_NAME = "NOT_NULL"; // as enum item + public static final String CQL_FUNCTION_NAME = "NOT NULL"; - public NotNullConstraint(ColumnIdentifier columnName) + private static final List emptyArguments = Collections.emptyList(); + + public NotNullConstraint() + { + super(FUNCTION_NAME, emptyArguments); + } + + public NotNullConstraint(List args) { - super(columnName, FUNCTION_NAME); + super(FUNCTION_NAME, args); } @Override @@ -46,6 +54,7 @@ public void internalEvaluate(AbstractType valueType, Operator relationType, S @Override public void validate(ColumnMetadata columnMetadata, String term) throws InvalidConstraintDefinitionException { + super.validate(columnMetadata, term); if (columnMetadata.isPrimaryKeyColumn()) throw new InvalidConstraintDefinitionException(format("%s constraint can not be specified on a %s key column '%s'", name, @@ -59,6 +68,12 @@ public List> getSupportedTypes() return null; } + @Override + public String toString() + { + return CQL_FUNCTION_NAME; + } + @Override public boolean equals(Object o) { diff --git a/src/java/org/apache/cassandra/cql3/constraints/OctetLengthConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/OctetLengthConstraint.java index 8147d37d62d2..b55b489465c9 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/OctetLengthConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/OctetLengthConstraint.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; @@ -34,9 +33,9 @@ public class OctetLengthConstraint extends ConstraintFunction { private static final List> SUPPORTED_TYPES = List.of(BytesType.instance, UTF8Type.instance, AsciiType.instance); - public OctetLengthConstraint(ColumnIdentifier columnName) + public OctetLengthConstraint(List args) { - super(columnName, "OCTET_LENGTH"); + super("OCTET_LENGTH", args); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java index a2e439585f05..062a5a1c6a97 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java @@ -23,7 +23,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.functions.types.ParseUtils; import org.apache.cassandra.db.marshal.AbstractType; @@ -43,9 +42,9 @@ public class RegexpConstraint extends ConstraintFunction private Pattern pattern; - public RegexpConstraint(ColumnIdentifier columnName) + public RegexpConstraint(List args) { - super(columnName, FUNCTION_NAME); + super(FUNCTION_NAME, args); } @Override @@ -84,6 +83,7 @@ public List getSupportedOperators() @Override public void validate(ColumnMetadata columnMetadata, String regexp) throws InvalidConstraintDefinitionException { + super.validate(columnMetadata, regexp); try { // compilation of a regexp every single time upon evaluation is not performance friendly diff --git a/src/java/org/apache/cassandra/cql3/constraints/ScalarColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/ScalarColumnConstraint.java index 80671a6bf39f..f230c6b2d61e 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ScalarColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ScalarColumnConstraint.java @@ -96,10 +96,12 @@ public void checkSatisfiability(List> constraints, ColumnMet } private ByteBuffer value; + private AbstractType returnType; - private ScalarColumnConstraint(ColumnIdentifier param, Operator relationType, String term) + private ScalarColumnConstraint(ColumnIdentifier columnName, Operator relationType, String term) { - super(param, relationType, term); + super(relationType, term); + setColumnName(columnName); } @Override @@ -125,11 +127,13 @@ protected void internalEvaluate(AbstractType valueType, ByteBuffer columnValu @Override public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException { + returnType = columnMetadata.type; + validateTypes(columnMetadata); try { - value = columnMetadata.type.fromString(ParseUtils.unquote(term)); + value = returnType.fromString(ParseUtils.unquote(term)); } catch (Throwable t) { @@ -180,9 +184,9 @@ public void serialize(ScalarColumnConstraint columnConstraint, DataOutputPlus ou @Override public ScalarColumnConstraint deserialize(DataInputPlus in, Version version) throws IOException { - ColumnIdentifier param = new ColumnIdentifier(in.readUTF(), true); + ColumnIdentifier columnName = new ColumnIdentifier(in.readUTF(), true); Operator relationType = Operator.readFrom(in); - return new ScalarColumnConstraint(param, relationType, in.readUTF()); + return new ScalarColumnConstraint(columnName, relationType, in.readUTF()); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java b/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java index 8696e81a65a7..0e4b0ddd2de9 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java +++ b/src/java/org/apache/cassandra/cql3/constraints/UnaryConstraintFunction.java @@ -20,14 +20,13 @@ import java.util.List; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; public abstract class UnaryConstraintFunction extends ConstraintFunction { - public UnaryConstraintFunction(ColumnIdentifier columnName, String name) + public UnaryConstraintFunction(String name, List args) { - super(columnName, name); + super(name, args); } public List getSupportedOperators() diff --git a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java index 80fd443e0e4d..c8edd1189ea4 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java @@ -20,9 +20,12 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; import java.util.function.Function; +import com.google.common.annotations.VisibleForTesting; + import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.constraints.SatisfiabilityChecker.UnaryFunctionSatisfiabilityChecker; @@ -45,17 +48,25 @@ public class UnaryFunctionColumnConstraint extends AbstractFunctionConstraint arguments) + { + function = createConstraintFunction(functionName.toString(), arguments); + } + + public Raw(ColumnIdentifier functionName) { - this.columnName = columnName; - function = createConstraintFunction(functionName.toCQLString(), columnName); + function = createConstraintFunction(functionName.toString(), List.of()); } public UnaryFunctionColumnConstraint prepare() { - return new UnaryFunctionColumnConstraint(function, columnName); + return new UnaryFunctionColumnConstraint(function); } } @@ -64,23 +75,31 @@ public enum Functions implements UnaryFunctionSatisfiabilityChecker NOT_NULL(NotNullConstraint::new), JSON(JsonConstraint::new); - private final Function functionCreator; + private final Function, ConstraintFunction> functionCreator; - Functions(Function functionCreator) + Functions(Function, ConstraintFunction> functionCreator) { this.functionCreator = functionCreator; } } - private static ConstraintFunction createConstraintFunction(String functionName, ColumnIdentifier columnName) + private static ConstraintFunction createConstraintFunction(String functionName, List arguments) { - return getEnum(Functions.class, functionName).functionCreator.apply(columnName); + return getEnum(Functions.class, functionName).functionCreator.apply(arguments); } - private UnaryFunctionColumnConstraint(ConstraintFunction function, ColumnIdentifier columnName) + public UnaryFunctionColumnConstraint(ConstraintFunction function) { - super(columnName, null, null); + super(null, null); this.function = function; + this.columnName = function.columnName; + } + + @Override + public void setColumnName(ColumnIdentifier columnName) + { + this.columnName = columnName; + this.function.columnName = columnName; } @Override @@ -89,6 +108,11 @@ public String name() return function.name; } + public ConstraintFunction function() + { + return function; + } + @Override public MetadataSerializer serializer() { @@ -122,7 +146,6 @@ public void internalEvaluate(AbstractType valueType, ByteBuffer columnValue) @Override public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException { - validateArgs(columnMetadata); validateTypes(columnMetadata); function.validate(columnMetadata, term); } @@ -133,18 +156,18 @@ public ConstraintType getConstraintType() return UNARY_FUNCTION; } - void validateArgs(ColumnMetadata columnMetadata) - { - if (!columnMetadata.name.equals(columnName)) - throw new InvalidConstraintDefinitionException(String.format("Parameter of %s constraint should be the column name (%s)", - name(), - columnMetadata.name)); - } - @Override public String toString() { - return function.name + "(" + columnName + ")"; + if (function.isParameterless()) + { + return function.toString(); + } + else + { + String arguments = String.join(",", function.rawArgs); + return function.toString() + '(' + arguments + ')'; + } } public static class Serializer implements MetadataSerializer @@ -153,26 +176,40 @@ public static class Serializer implements MetadataSerializer args = new ArrayList<>(); + int argsSize = in.readInt(); + for (int i = 0; i < argsSize; i++) + args.add(in.readUTF()); + ConstraintFunction function; - String columnNameString = in.readUTF(); - ColumnIdentifier columnName = new ColumnIdentifier(columnNameString, true); try { - function = createConstraintFunction(functionName, columnName); + function = getConstraintFunction(functionName, args); } catch (Exception e) { throw new IOException(e); } - return new UnaryFunctionColumnConstraint(function, columnName); + return new UnaryFunctionColumnConstraint(function); + } + + @VisibleForTesting + public ConstraintFunction getConstraintFunction(String functionName, List args) + { + return createConstraintFunction(functionName, args); } @Override diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index 0da92dc9df95..c7a57139d129 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -278,6 +278,8 @@ private static class Column this.type = type; this.isStatic = isStatic; this.mask = mask; + if (constraints != null) + constraints.prepare(name); this.constraints = constraints; } } @@ -931,6 +933,8 @@ public void constraint(ColumnIdentifier name, ColumnConstraints.Raw rawConstrain { kind = Kind.ALTER_CONSTRAINTS; this.constraintName = name; + if (rawConstraints != null) + rawConstraints.prepare(constraintName); this.constraints = rawConstraints; } diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java index 2e25291ed873..1f12366677ac 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java @@ -39,6 +39,7 @@ import org.apache.cassandra.auth.IResource; import org.apache.cassandra.auth.Permission; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.constraints.ColumnConstraint; import org.apache.cassandra.cql3.constraints.ColumnConstraints; import org.apache.cassandra.cql3.CQL3Type; import org.apache.cassandra.cql3.CQLFragmentParser; @@ -46,6 +47,8 @@ import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.cql3.CqlParser; import org.apache.cassandra.cql3.QualifiedName; +import org.apache.cassandra.cql3.constraints.NotNullConstraint; +import org.apache.cassandra.cql3.constraints.UnaryFunctionColumnConstraint; import org.apache.cassandra.cql3.functions.masking.ColumnMask; import org.apache.cassandra.db.guardrails.Guardrails; import org.apache.cassandra.db.marshal.AbstractType; @@ -606,17 +609,33 @@ public String table() return name.getName(); } - public void addColumn(ColumnIdentifier column, CQL3Type.Raw type, boolean isStatic, ColumnMask.Raw mask, ColumnConstraints.Raw constraints) + public void addColumn(ColumnIdentifier column, CQL3Type.Raw type, boolean isStatic, boolean isNotNull, ColumnMask.Raw mask, ColumnConstraints.Raw constraints) { if (null != rawColumns.put(column, new ColumnProperties.Raw(type, mask))) throw ire("Duplicate column '%s' declaration for table '%s'", column, name); if (isStatic) staticColumns.add(column); - if (null == constraints) - columnConstraints.put(column, ColumnConstraints.NO_OP); - else - columnConstraints.put(column, constraints.prepare(column)); + + ColumnConstraints preparedConstraints = constraints == null ? ColumnConstraints.NO_OP : constraints.prepare(column); + + if (isNotNull) + { + if (preparedConstraints.containsNotNullConstraint()) + throw ire("Duplicate definition of NOT NULL constraint"); + + List> checkConstraints = new ArrayList<>(preparedConstraints.getConstraints()); + checkConstraints.add(new UnaryFunctionColumnConstraint(new NotNullConstraint())); + preparedConstraints = new ColumnConstraints(checkConstraints); + preparedConstraints.setColumnName(column); + } + + columnConstraints.put(column, preparedConstraints); + } + + public void addColumn(ColumnIdentifier column, CQL3Type.Raw type, boolean isStatic, ColumnMask.Raw mask, ColumnConstraints.Raw constraints) + { + addColumn(column, type, isStatic, false, mask, constraints); } public void setCompactStorage() diff --git a/src/java/org/apache/cassandra/schema/ColumnMetadata.java b/src/java/org/apache/cassandra/schema/ColumnMetadata.java index fc8cd984c9af..d5e7b23e4060 100644 --- a/src/java/org/apache/cassandra/schema/ColumnMetadata.java +++ b/src/java/org/apache/cassandra/schema/ColumnMetadata.java @@ -269,6 +269,7 @@ public ColumnMetadata(String ksName, this.comparisonOrder = comparisonOrder(kind, isComplex(), Math.max(0, position), name); this.mask = mask; this.columnConstraints = columnConstraints; + this.columnConstraints.setColumnName(name); } private static Comparator makeCellPathComparator(Kind kind, AbstractType type) diff --git a/test/distributed/org/apache/cassandra/distributed/test/ColumnConstraintsTest.java b/test/distributed/org/apache/cassandra/distributed/test/ColumnConstraintsTest.java index 77b72ccca1d2..4abdd780aa0b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ColumnConstraintsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ColumnConstraintsTest.java @@ -58,7 +58,7 @@ public void testInvalidConstraintsExceptions() throws IOException "org.apache.cassandra.db.marshal.IntegerType, org.apache.cassandra.db.marshal.LongType, " + "org.apache.cassandra.db.marshal.ShortType] but it was class org.apache.cassandra.db.marshal.UTF8Type"); - assertThrowsInvalidConstraintException(cluster, String.format("CREATE TABLE %s (pk int, ck1 int CHECK LENGTH(ck1) < 100, ck2 int, v int, " + + assertThrowsInvalidConstraintException(cluster, String.format("CREATE TABLE %s (pk int, ck1 int CHECK LENGTH() < 100, ck2 int, v int, " + "PRIMARY KEY ((pk), ck1, ck2));", tableName), "Column should be of type class org.apache.cassandra.db.marshal.UTF8Type or " + "class org.apache.cassandra.db.marshal.AsciiType but got class org.apache.cassandra.db.marshal.Int32Type"); @@ -213,7 +213,7 @@ public void testLengthTableLevelConstraint() throws IOException for (Map.Entry relation : RELATIONS_MAP.entrySet()) { String tableName = String.format(KEYSPACE + ".%s_tbl1_%s", type, relation.getKey()); - String createTableStatementSmallerThan = "CREATE TABLE " + tableName + " (pk " + type + " CHECK LENGTH(pk) " + relation.getValue() + " 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2));"; + String createTableStatementSmallerThan = "CREATE TABLE " + tableName + " (pk " + type + " CHECK LENGTH() " + relation.getValue() + " 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2));"; cluster.schemaChange(createTableStatementSmallerThan); } } @@ -299,7 +299,7 @@ public void testNotNullTableLevelConstraint() throws IOException try (Cluster cluster = init(Cluster.build(1).start())) { String tableName = String.format(KEYSPACE + ".%s_tbl1_%s", type, "st"); - String createTableNotNullValue = "CREATE TABLE " + tableName + " (pk int, value int CHECK NOT_NULL(value), PRIMARY KEY (pk));"; + String createTableNotNullValue = "CREATE TABLE " + tableName + " (pk int, value int CHECK NOT NULL, PRIMARY KEY (pk));"; cluster.schemaChange(createTableNotNullValue); Assertions.assertThatThrownBy(() -> cluster.coordinator(1).execute(String.format("INSERT INTO " + tableName + " (pk, value) VALUES (1, null)"), ConsistencyLevel.ALL)) diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/SnapshotTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/SnapshotTest.java index 628df75ca463..11f8f0b1f1e1 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/SnapshotTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/SnapshotTest.java @@ -51,8 +51,8 @@ public void testSimpleSnapshot() throws Throwable .start())) { cluster.schemaChange(withKeyspace("create table %s.tbl (id int primary key, x int)")); - cluster.schemaChange(withKeyspace("create table %s.tblconstraints (id int primary key, x int check x > 100 and x < 200, v text check LENGTH(v) > 10)")); - cluster.schemaChange(withKeyspace("create table %s.tblconstraints2 (id int primary key, x int check NOT_NULL(x), v text check LENGTH(v) > 10)")); + cluster.schemaChange(withKeyspace("create table %s.tblconstraints (id int primary key, x int check x > 100 and x < 200, v text check LENGTH() > 10)")); + cluster.schemaChange(withKeyspace("create table %s.tblconstraints2 (id int primary key, x int check NOT NULL, v text check LENGTH() > 10)")); cluster.schemaChange(withKeyspace("CREATE OR REPLACE FUNCTION %s.fLog (input double) CALLED ON NULL INPUT RETURNS double LANGUAGE java AS 'return Double.valueOf(Math.log(input.doubleValue()));';")); cluster.schemaChange(withKeyspace("CREATE OR REPLACE FUNCTION %s.avgState ( state tuple, val int ) CALLED ON NULL INPUT RETURNS tuple LANGUAGE java AS \n" + " 'if (val !=null) { state.setInt(0, state.getInt(0)+1); state.setLong(1, state.getLong(1)+val.intValue()); } return state;'; ")); diff --git a/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java index 91f21f8b0922..9ecae738dd45 100644 --- a/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/AlterTableWithTableConstraintValidationTest.java @@ -116,12 +116,12 @@ public void testCreateTableAddMultipleMixedConstraints() throws Throwable table, tableCreateStatement)); - execute("ALTER TABLE %s ALTER ck2 CHECK LENGTH(ck2) = 4"); + execute("ALTER TABLE %s ALTER ck2 CHECK LENGTH() = 4"); tableCreateStatement = "CREATE TABLE " + KEYSPACE + "." + table + " (\n" + " pk int,\n" + " ck1 int CHECK ck1 < 100,\n" + - " ck2 text CHECK LENGTH(ck2) = 4,\n" + + " ck2 text CHECK LENGTH() = 4,\n" + " v int,\n" + " PRIMARY KEY (pk, ck1, ck2)\n" + ") WITH CLUSTERING ORDER BY (ck1 ASC, ck2 ASC)\n" + @@ -133,13 +133,13 @@ public void testCreateTableAddMultipleMixedConstraints() throws Throwable table, tableCreateStatement)); - execute("ALTER TABLE %s ALTER v CHECK NOT_NULL(v)"); + execute("ALTER TABLE %s ALTER v CHECK NOT NULL"); tableCreateStatement = "CREATE TABLE " + KEYSPACE + "." + table + " (\n" + " pk int,\n" + " ck1 int CHECK ck1 < 100,\n" + - " ck2 text CHECK LENGTH(ck2) = 4,\n" + - " v int CHECK NOT_NULL(v),\n" + + " ck2 text CHECK LENGTH() = 4,\n" + + " v int CHECK NOT NULL,\n" + " PRIMARY KEY (pk, ck1, ck2)\n" + ") WITH CLUSTERING ORDER BY (ck1 ASC, ck2 ASC)\n" + " AND " + tableParametersCql(); @@ -202,7 +202,7 @@ public void testAlterWithConstraintsAndCdcEnabled() throws Throwable @Test public void testAlterWithCdcAndPKConstraintsEnabled() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK length(pk) = 100, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2));"); + createTable("CREATE TABLE %s (pk text CHECK length() = 100, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2));"); // It works execute("ALTER TABLE %s WITH cdc = true"); } @@ -245,12 +245,6 @@ public void testAlterTableAlterExistingColumnWithCheckOnNonExistingColumn() thro assertInvalidThrowMessage("Constraint ck3 < 100 was not specified on a column it operates on: ck1 but on: ck3", InvalidRequestException.class, "ALTER TABLE %s ALTER ck1 CHECK ck3 < 100"); - assertInvalidThrowMessage("Constraint NOT_NULL(ck3) was not specified on a column it operates on: ck1 but on: ck3", - InvalidRequestException.class, - "ALTER TABLE %s ALTER ck1 CHECK NOT_NULL(ck3)"); - assertInvalidThrowMessage("Constraint LENGTH(ck3) > 10 was not specified on a column it operates on: ck1 but on: ck3", - InvalidRequestException.class, - "ALTER TABLE %s ALTER ck1 CHECK LENGTH(ck3) > 10"); } @Test @@ -261,14 +255,6 @@ public void testAlterTableAddNewColumnWithCheckOnNonExistingColumn() throws Thro assertInvalidThrowMessage("Constraint v3 < 100 was not specified on a column it operates on: v2 but on: v3", InvalidRequestException.class, "ALTER TABLE %s ADD v2 int CHECK v3 < 100"); - - assertInvalidThrowMessage("Constraint NOT_NULL(v3) was not specified on a column it operates on: v2 but on: v3", - InvalidRequestException.class, - "ALTER TABLE %s ADD v2 int CHECK NOT_NULL(v3)"); - - assertInvalidThrowMessage("Constraint LENGTH(v3) > 10 was not specified on a column it operates on: v2 but on: v3", - InvalidRequestException.class, - "ALTER TABLE %s ADD v2 int CHECK LENGTH(v3) > 10"); } @Test @@ -277,4 +263,18 @@ public void testAlterTableAddColumnWithCheck() createTable("CREATE TABLE %s (pk text, col1 int, primary key (pk));"); execute("ALTER TABLE %s ADD col2 int CHECK col2 > 0"); } + + @Test + public void testNotNullSyntax() throws Throwable + { + createTable("CREATE TABLE %s (pk text, col1 int NOT NULL, primary key (pk));"); + createTable("CREATE TABLE %s (pk text, col1 int CHECK NOT NULL, primary key (pk));"); + createTable("CREATE TABLE %s (pk text, col1 int NOT NULL CHECK col1 > 0, primary key (pk));"); + execute("ALTER TABLE %s ALTER col1 CHECK col1 > 100"); + execute("ALTER TABLE %s ALTER col1 CHECK NOT NULL AND col1 > 100"); + + assertInvalidThrowMessage("Duplicate definition of NOT NULL constraint", + InvalidRequestException.class, + "CREATE TABLE %s (pk text, col1 int NOT NULL CHECK NOT NULL, primary key (pk));"); + } } diff --git a/test/unit/org/apache/cassandra/constraints/ConstraintArgumentsTest.java b/test/unit/org/apache/cassandra/constraints/ConstraintArgumentsTest.java new file mode 100644 index 000000000000..289c8ceb64d6 --- /dev/null +++ b/test/unit/org/apache/cassandra/constraints/ConstraintArgumentsTest.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.constraints; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.constraints.ColumnConstraint; +import org.apache.cassandra.cql3.constraints.ColumnConstraints; +import org.apache.cassandra.cql3.constraints.ConstraintFunction; +import org.apache.cassandra.cql3.constraints.ConstraintViolationException; +import org.apache.cassandra.cql3.constraints.InvalidConstraintDefinitionException; +import org.apache.cassandra.cql3.constraints.UnaryConstraintFunction; +import org.apache.cassandra.cql3.constraints.UnaryFunctionColumnConstraint; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.AsciiType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.schema.ColumnMetadata; +import org.apache.cassandra.tcm.serialization.MetadataSerializer; +import org.apache.cassandra.tcm.serialization.Version; + +import static java.lang.String.format; +import static org.apache.cassandra.schema.ColumnMetadata.Kind.REGULAR; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.Assert.assertEquals; + +public class ConstraintArgumentsTest +{ + private static final ColumnIdentifier columnIdentifier = new ColumnIdentifier("a_column", false); + private static final ColumnMetadata columnMetadata = new ColumnMetadata("a", "b", columnIdentifier, UTF8Type.instance, ColumnMetadata.NO_UNIQUE_ID, -1, REGULAR, null); + + @Test + public void testDeserOfContraintsWithArguments() throws Throwable + { + List> checkConstraints = new ArrayList<>(); + checkConstraints.add(new UnaryFunctionColumnConstraint(new Enumeration(List.of("a", "b", "c")))); + ColumnConstraints constraints = new ColumnConstraints(checkConstraints); + constraints.setColumnName(columnIdentifier); + + MetadataSerializer serializer = new TestingSerializer(); + + DataOutputBuffer dataOutputBuffer = new DataOutputBuffer(); + serializer.serialize(constraints, dataOutputBuffer, Version.V7); + + DataInputBuffer dataInputBuffer = new DataInputBuffer(dataOutputBuffer.getData()); + ColumnConstraints deserialize = serializer.deserialize(dataInputBuffer, Version.V7); + + List> deserializeConstraints = deserialize.getConstraints(); + assertEquals(1, deserializeConstraints.size()); + ColumnConstraint constraint = deserializeConstraints.get(0); + + assertEquals(Enumeration.FUNCTION_NAME, constraint.name()); + assertEquals(ColumnConstraint.ConstraintType.UNARY_FUNCTION, constraint.getConstraintType()); + + constraint.validate(columnMetadata); + + UnaryFunctionColumnConstraint c = ((UnaryFunctionColumnConstraint) constraint); + List arguments = c.function().arguments(); + assertEquals(List.of("a", "b", "c"), arguments); + } + + @Test + public void testDeserOfContraintsWithoutArguments() throws Throwable + { + List> checkConstraints = new ArrayList<>(); + checkConstraints.add(new UnaryFunctionColumnConstraint(new ParamerterlessContraint(List.of("a", "b", "c")))); + ColumnConstraints constraints = new ColumnConstraints(checkConstraints); + constraints.setColumnName(columnIdentifier); + + MetadataSerializer serializer = new TestingSerializer(); + + DataOutputBuffer dataOutputBuffer = new DataOutputBuffer(); + serializer.serialize(constraints, dataOutputBuffer, Version.V7); + + DataInputBuffer dataInputBuffer = new DataInputBuffer(dataOutputBuffer.getData()); + ColumnConstraints deserialize = serializer.deserialize(dataInputBuffer, Version.V7); + + List> deserializeConstraints = deserialize.getConstraints(); + assertEquals(1, deserializeConstraints.size()); + ColumnConstraint constraint = deserializeConstraints.get(0); + + assertEquals(ParamerterlessContraint.FUNCTION_NAME, constraint.name()); + assertEquals(ColumnConstraint.ConstraintType.UNARY_FUNCTION, constraint.getConstraintType()); + + assertThatThrownBy(() -> constraint.validate(columnMetadata)) + .isInstanceOf(InvalidConstraintDefinitionException.class) + .hasMessage("Constraint PARAMERTERLESS does not accept any arguments."); + } + + private static class TestingUnaryFunctionSerializer extends UnaryFunctionColumnConstraint.Serializer + { + @Override + public ConstraintFunction getConstraintFunction(String functionName, List args) + { + if (functionName.equals(Enumeration.FUNCTION_NAME)) + return new Enumeration(args); + + if (functionName.equals(ParamerterlessContraint.FUNCTION_NAME)) + return new ParamerterlessContraint(args); + + throw new IllegalStateException("not supported"); + } + } + + private static class TestingSerializer extends ColumnConstraints.Serializer + { + private static final TestingUnaryFunctionSerializer constraintSerializer = new TestingUnaryFunctionSerializer(); + + @Override + public ColumnConstraint deserializeConstraint(DataInputPlus in, int serializerPosition, Version version) throws IOException + { + return constraintSerializer.deserialize(in, version); + } + } + + private static class ParamerterlessContraint extends UnaryConstraintFunction + { + public static final String FUNCTION_NAME = "PARAMERTERLESS"; + + public ParamerterlessContraint(List args) + { + super(FUNCTION_NAME, args); + } + + @Override + protected void internalEvaluate(AbstractType valueType, Operator relationType, String term, ByteBuffer columnValue) + { + + } + + @Override + public List> getSupportedTypes() + { + return null; + } + } + + private static class Enumeration extends UnaryConstraintFunction + { + private static final List> SUPPORTED_TYPES = List.of(UTF8Type.instance, AsciiType.instance); + + public static final String FUNCTION_NAME = "ENUM"; + + public Enumeration(List args) + { + super(FUNCTION_NAME, args); + } + + @Override + public void internalEvaluate(AbstractType valueType, Operator relationType, String term, ByteBuffer columnValue) + { + if (!args.contains(valueType.getString(columnValue))) + { + throw new ConstraintViolationException(format("Value for column '%s' violated %s constraint as its value is not one of %s.", + columnName.toCQLString(), + name, + args)); + } + } + + @Override + public List> getSupportedTypes() + { + return SUPPORTED_TYPES; + } + + @Override + public boolean isParameterless() + { + return false; + } + + @Override + public boolean equals(Object o) + { + if (this == o) + return true; + + if (!(o instanceof Enumeration)) + return false; + + Enumeration other = (Enumeration) o; + + return columnName.equals(other.columnName) && name.equals(other.name); + } + } +} diff --git a/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java b/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java index 6f767ca0a45a..c22bb7ea533d 100644 --- a/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java +++ b/test/unit/org/apache/cassandra/constraints/ConstraintsSatisfiabilityTest.java @@ -18,6 +18,8 @@ package org.apache.cassandra.constraints; +import java.util.List; + import org.junit.Test; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -101,6 +103,13 @@ else if ((op1 == GT && op2 == LTE) || { check(op1, 50, op2, 100, quadFunction, null, columnMetadata); } + else if ((op1 == GT && op2 == LTE) || + (op1 == GT && op2 == LT) || + (op1 == GTE && op2 == LTE) || + (op1 == GTE && op2 == LT)) + { + check(op1, 0, op2, 100, quadFunction, null, columnMetadata); + } else if (!(op1 == NEQ || op2 == NEQ)) { check(op1, 50, op2, 100, quadFunction, null, columnMetadata); @@ -174,7 +183,7 @@ private ScalarColumnConstraint scalar(Operator operator, Integer term) private FunctionColumnConstraint length(Operator operator, Integer term) { return new FunctionColumnConstraint.Raw(lengthFunctionIdentifier, - columnIdentifier, + List.of(), operator, term.toString()).prepare(); } diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java index 857ec85f408b..f68ee1ab6920 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnCqlConstraintValidationTest.java @@ -98,11 +98,11 @@ public void testCreateTableWithColumnMultipleConstraintsDescribeTableNonFunction @Test public void testCreateTableWithColumnNotNamedConstraintDescribeTableFunction() throws Throwable { - String table = createTable(KEYSPACE_PER_TEST, "CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + String table = createTable(KEYSPACE_PER_TEST, "CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); String tableCreateStatement = "CREATE TABLE " + KEYSPACE_PER_TEST + "." + table + " (\n" + " pk int,\n" + - " ck1 text CHECK LENGTH(ck1) = 4,\n" + + " ck1 text CHECK LENGTH() = 4,\n" + " ck2 int,\n" + " v int,\n" + " PRIMARY KEY (pk, ck1, ck2)\n" + @@ -119,13 +119,13 @@ public void testCreateTableWithColumnNotNamedConstraintDescribeTableFunction() t @Test public void testCreateTableWithColumnNotNullConstraintDescribe() throws Throwable { - String table = createTable(KEYSPACE_PER_TEST, "CREATE TABLE %s (pk int, ck1 int, ck2 int, v int CHECK NOT_NULL(v), PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + String table = createTable(KEYSPACE_PER_TEST, "CREATE TABLE %s (pk int, ck1 int, ck2 int, v int CHECK NOT NULL, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); String tableCreateStatement = "CREATE TABLE " + KEYSPACE_PER_TEST + "." + table + " (\n" + " pk int,\n" + " ck1 int,\n" + " ck2 int,\n" + - " v int CHECK NOT_NULL(v),\n" + + " v int CHECK NOT NULL,\n" + " PRIMARY KEY (pk, ck1, ck2)\n" + ") WITH CLUSTERING ORDER BY (ck1 ASC, ck2 ASC)\n" + " AND " + tableParametersCql(); @@ -932,7 +932,7 @@ public void testCreateTableWithColumnWithNotNullCheckScalarFloatConstraints() th @Test public void testCreateTableWithColumnWithClusteringColumnLengthEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fooo', 2, 3)"); @@ -946,7 +946,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthEqualToConstraint @Test public void testCreateTableWithColumnWithClusteringColumnLengthDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'ck1'. It has a length of"; // Valid @@ -960,7 +960,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthDifferentThanCons @Test public void testCreateTableWithColumnWithClusteringColumnLengthBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'foooo', 2, 3)"); @@ -974,7 +974,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthBiggerThanConstra @Test public void testCreateTableWithColumnWithClusteringColumnLengthBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'foooo', 2, 3)"); @@ -988,7 +988,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthBiggerOrEqualThan @Test public void testCreateTableWithColumnWithClusteringColumnLengthSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'foo', 2, 3)"); @@ -1002,7 +1002,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthSmallerThanConstr @Test public void testCreateTableWithColumnWithClusteringColumnLengthSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck1) <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH() <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'foo', 2, 3)"); @@ -1016,7 +1016,7 @@ public void testCreateTableWithColumnWithClusteringColumnLengthSmallerOrEqualTha @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fooo'), 2, 3)"); @@ -1030,7 +1030,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthEqualToConstr @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('foo'), 2, 3)"); @@ -1044,7 +1044,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthDifferentThan @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('foooo'), 2, 3)"); @@ -1058,7 +1058,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthBiggerThanCon @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('foooo'), 2, 3)"); @@ -1072,7 +1072,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthBiggerOrEqual @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('foo'), 2, 3)"); @@ -1086,7 +1086,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthSmallerThanCo @Test public void testCreateTableWithColumnWithClusteringBlobColumnLengthSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH(ck1) <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK LENGTH() <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 ASC);"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('foo'), 2, 3)"); @@ -1101,7 +1101,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnLengthSmallerOrEqua @Test public void testCreateTableWithColumnWithPkColumnLengthEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) = 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() = 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fooo', 1, 2, 3)"); @@ -1115,7 +1115,7 @@ public void testCreateTableWithColumnWithPkColumnLengthEqualToConstraint() throw @Test public void testCreateTableWithColumnWithPkColumnLengthDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) != 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() != 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('foo', 1, 2, 3)"); @@ -1129,7 +1129,7 @@ public void testCreateTableWithColumnWithPkColumnLengthDifferentThanConstraint() @Test public void testCreateTableWithColumnWithPkColumnLengthBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) > 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() > 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('foooo', 1, 2, 3)"); @@ -1143,7 +1143,7 @@ public void testCreateTableWithColumnWithPkColumnLengthBiggerThanConstraint() th @Test public void testCreateTableWithColumnWithPkColumnLengthBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) >= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() >= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('foooo', 1, 2, 3)"); @@ -1157,7 +1157,7 @@ public void testCreateTableWithColumnWithPkColumnLengthBiggerOrEqualThanConstrai @Test public void testCreateTableWithColumnWithPkColumnLengthSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) < 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() < 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('foo', 1, 2, 3)"); @@ -1171,7 +1171,7 @@ public void testCreateTableWithColumnWithPkColumnLengthSmallerThanConstraint() t @Test public void testCreateTableWithColumnWithPkColumnLengthSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) <= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() <= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('foo', 1, 2, 3)"); @@ -1186,7 +1186,7 @@ public void testCreateTableWithColumnWithPkColumnLengthSmallerOrEqualThanConstra @Test public void testCreateTableWithColumnWithRegularColumnLengthEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fooo')"); @@ -1200,7 +1200,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthEqualToConstraint() @Test public void testCreateTableWithColumnWithRegularColumnLengthDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) != 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() != 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'foo')"); @@ -1214,7 +1214,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthDifferentThanConstra @Test public void testCreateTableWithColumnWithRegularColumnLengthBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) > 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() > 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'foooo')"); @@ -1228,7 +1228,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthBiggerThanConstraint @Test public void testCreateTableWithColumnWithRegularColumnLengthBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) >= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() >= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'foooo')"); @@ -1242,7 +1242,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthBiggerOrEqualThanCon @Test public void testCreateTableWithColumnWithRegularColumnLengthSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) < 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() < 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'foo')"); @@ -1256,7 +1256,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthSmallerThanConstrain @Test public void testCreateTableWithColumnWithRegularColumnLengthSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'foo')"); @@ -1270,7 +1270,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthSmallerOrEqualThanCo @Test public void testCreateTableWithColumnWithRegularColumnLengthCheckNullTextConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -1278,7 +1278,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthCheckNullTextConstra @Test public void testCreateTableWithColumnWithRegularColumnLengthCheckNullVarcharConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v varchar CHECK LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v varchar CHECK LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -1286,7 +1286,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthCheckNullVarcharCons @Test public void testCreateTableWithColumnWithRegularColumnLengthCheckNullAsciiConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v ascii CHECK LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v ascii CHECK LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -1294,7 +1294,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthCheckNullAsciiConstr @Test public void testCreateTableWithColumnWithRegularColumnLengthCheckNullBlobConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v blob CHECK LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v blob CHECK LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -1302,7 +1302,7 @@ public void testCreateTableWithColumnWithRegularColumnLengthCheckNullBlobConstra @Test public void testCreateTableWithColumnMixedColumnsLengthConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK LENGTH(pk) = 4, ck1 int, ck2 int, v text CHECK LENGTH(v) = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK LENGTH() = 4, ck1 int, ck2 int, v text CHECK LENGTH() = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fooo', 2, 3, 'fooo')"); @@ -1323,7 +1323,7 @@ public void testCreateTableWithWrongColumnConstraint() throws Throwable { try { - createTable("CREATE TABLE %s (pk text, ck1 int CHECK LENGTH(pk) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text, ck1 int CHECK LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -1338,7 +1338,7 @@ public void testCreateTableWithWrongColumnMultipleConstraint() throws Throwable { try { - createTable("CREATE TABLE %s (pk text, ck1 int CHECK LENGTH(pk) = 4 AND ck1 < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text, ck1 int CHECK LENGTH() = 4 AND ck1 < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -1353,7 +1353,7 @@ public void testCreateTableWithColumnWithClusteringColumnInvalidTypeConstraint() { try { - createTable("CREATE TABLE %s (pk int, ck1 int CHECK LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int CHECK LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -1384,7 +1384,7 @@ public void testCreateTableInvalidFunction() throws Throwable { try { - createTable("CREATE TABLE %s (pk text CHECK not_a_function(pk) = 4, ck1 int, ck2 int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK not_a_function() = 4, ck1 int, ck2 int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -1398,7 +1398,7 @@ public void testCreateTableInvalidFunction() throws Throwable public void testCreateTableWithPKConstraintsAndCDCEnabled() throws Throwable { // It works - createTable("CREATE TABLE %s (pk text CHECK length(pk) = 4, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2)) WITH cdc = true;"); + createTable("CREATE TABLE %s (pk text CHECK length() = 4, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2)) WITH cdc = true;"); } @Test @@ -1444,16 +1444,8 @@ public void testCreateTableWithColumnWithClusteringColumnLessThanScalarConstrain @Test public void testCreateTableAddConstraintWithCheckOnNonExistingColumn() throws Throwable { - assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 text CHECK NOT_NULL(ck3), ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) - .hasRootCauseMessage("Constraint NOT_NULL(ck3) was not specified on a column it operates on: ck1 but on: ck3") - .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); - assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 int CHECK ck3 > 5, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) .hasRootCauseMessage("Constraint ck3 > 5 was not specified on a column it operates on: ck1 but on: ck3") .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); - - assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, ck1 text CHECK LENGTH(ck3) > 10, ck2 text, v int, PRIMARY KEY ((pk),ck1, ck2));")) - .hasRootCauseMessage("Constraint LENGTH(ck3) > 10 was not specified on a column it operates on: ck1 but on: ck3") - .rootCause().isInstanceOf(InvalidConstraintDefinitionException.class); } } diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintInvalidTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintInvalidTest.java index dcb172163807..0fab10870ae5 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintInvalidTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintInvalidTest.java @@ -59,7 +59,7 @@ public static Collection data() @Test public void testCreateTableWithColumnNotNullCheckNonExisting() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 " + typeString + " CHECK NOT_NULL(ck1), ck2 int, v int, PRIMARY KEY (pk));"); + createTable("CREATE TABLE %s (pk int, ck1 " + typeString + " CHECK NOT NULL, ck2 int, v int, PRIMARY KEY (pk));"); // Invalid assertInvalidThrowMessage("Column 'ck1' has to be specified as part of this query.", InvalidRequestException.class, "INSERT INTO %s (pk, ck2, v) VALUES (1, 2, 3)"); @@ -71,12 +71,12 @@ public void testCreateTableWithColumnNotNullCheckNonExisting() throws Throwable @Test public void testInvalidSpecificationOfNotNullConstraintOnPrimaryKeys() throws Throwable { - assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk " + typeString + " CHECK NOT_NULL(pk) PRIMARY KEY)")) + assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk " + typeString + " CHECK NOT NULL PRIMARY KEY)")) .isInstanceOf(InvalidRequestException.class) .hasRootCauseInstanceOf(InvalidConstraintDefinitionException.class) .hasRootCauseMessage("NOT_NULL constraint can not be specified on a partition key column 'pk'"); - assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, cl " + typeString + " CHECK NOT_NULL(cl), PRIMARY KEY (pk, cl))")) + assertThatThrownBy(() -> createTable("CREATE TABLE %s (pk int, cl " + typeString + " CHECK NOT NULL, PRIMARY KEY (pk, cl))")) .isInstanceOf(InvalidRequestException.class) .hasRootCauseInstanceOf(InvalidConstraintDefinitionException.class) .hasRootCauseMessage("NOT_NULL constraint can not be specified on a clustering key column 'cl'"); diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintValidTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintValidTest.java index b1ad79a43f1a..bf91fdbd6bdb 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintValidTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnNotNullConstraintValidTest.java @@ -55,7 +55,7 @@ public static Collection data() @Test public void testCreateTableWithColumnNotNullCheckValid() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 " + typeString + " CHECK NOT_NULL(ck1), ck2 int, v int, PRIMARY KEY (pk));"); + createTable("CREATE TABLE %s (pk int, ck1 " + typeString + " CHECK NOT NULL, ck2 int, v int, PRIMARY KEY (pk));"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, " + value + ", 2, 3)"); diff --git a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java index 6f9260f022ec..092f21f77063 100644 --- a/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java +++ b/test/unit/org/apache/cassandra/constraints/CreateTableWithColumnOctetLengthConstraintValidationTest.java @@ -53,7 +53,7 @@ public static Collection generateData() @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk), ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fooo', 2, 3)"); @@ -69,7 +69,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeEqualToCo @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'ck1'. It has a length of"; // Valid @@ -83,7 +83,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeDifferent @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fñoo', 2, 3)"); @@ -97,7 +97,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeBiggerTha @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fñoo', 2, 3)"); @@ -111,7 +111,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeBiggerOrE @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fñ', 2, 3)"); @@ -125,7 +125,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeSmallerTh @Test public void testCreateTableWithColumnWithClusteringColumnSerializedSizeSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH(ck1) <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 text CHECK OCTET_LENGTH() <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 'fñ', 2, 3)"); @@ -139,7 +139,7 @@ public void testCreateTableWithColumnWithClusteringColumnSerializedSizeSmallerOr @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fño'), 2, 3)"); @@ -153,7 +153,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeEqual @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() != 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fñ'), 2, 3)"); @@ -167,7 +167,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeDiffe @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() > 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fñoo'), 2, 3)"); @@ -181,7 +181,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeBigge @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() >= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fñoo'), 2, 3)"); @@ -195,7 +195,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeBigge @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fñ'), 2, 3)"); @@ -209,7 +209,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeSmall @Test public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH(ck1) <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 blob CHECK OCTET_LENGTH() <= 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, textAsBlob('fñ'), 2, 3)"); @@ -224,7 +224,7 @@ public void testCreateTableWithColumnWithClusteringBlobColumnSerializedSizeSmall @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) = 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() = 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fño', 1, 2, 3)"); @@ -238,7 +238,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeEqualToConstraint @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) != 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() != 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fñ', 1, 2, 3)"); @@ -252,7 +252,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeDifferentThanCons @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) > 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() > 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fñoo', 1, 2, 3)"); @@ -266,7 +266,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeBiggerThanConstra @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) >= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() >= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fñoo', 1, 2, 3)"); @@ -280,7 +280,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeBiggerOrEqualThan @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) < 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() < 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fñ', 1, 2, 3)"); @@ -294,7 +294,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeSmallerThanConstr @Test public void testCreateTableWithColumnWithPkColumnSerializedSizeSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) <= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() <= 4, ck1 int, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fñ', 1, 2, 3)"); @@ -309,7 +309,7 @@ public void testCreateTableWithColumnWithPkColumnSerializedSizeSmallerOrEqualTha @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeEqualToConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fño')"); @@ -323,7 +323,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeEqualToConst @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeDifferentThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) != 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() != 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fñ')"); @@ -337,7 +337,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeDifferentTha @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeBiggerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) > 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() > 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fñoo')"); @@ -351,7 +351,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeBiggerThanCo @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeBiggerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) >= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() >= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fñoo')"); @@ -365,7 +365,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeBiggerOrEqua @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeSmallerThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) < 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() < 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fñ')"); @@ -379,7 +379,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeSmallerThanC @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeSmallerOrEqualThanConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, 'fñ')"); @@ -393,7 +393,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeSmallerOrEqu @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullTextConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -401,7 +401,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullTex @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullVarcharConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v varchar CHECK OCTET_LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v varchar CHECK OCTET_LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -409,7 +409,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullVar @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullAsciiConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v ascii CHECK OCTET_LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v ascii CHECK OCTET_LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -417,7 +417,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullAsc @Test public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullBlobConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v blob CHECK OCTET_LENGTH(v) <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int, ck2 int, v blob CHECK OCTET_LENGTH() <= 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); final String expectedErrorMessage = "Column value does not satisfy value constraint for column 'v' as it is null."; assertInvalidThrowMessage(expectedErrorMessage, InvalidRequestException.class, "INSERT INTO %s (pk, ck1, ck2, v) VALUES (1, 2, 3, null)"); } @@ -425,7 +425,7 @@ public void testCreateTableWithColumnWithRegularColumnSerializedSizeCheckNullBlo @Test public void testCreateTableWithColumnMixedColumnsSerializedSizeConstraint() throws Throwable { - createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH(pk) = 4, ck1 int, ck2 int, v text CHECK OCTET_LENGTH(v) = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK OCTET_LENGTH() = 4, ck1 int, ck2 int, v text CHECK OCTET_LENGTH() = 4, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); // Valid execute("INSERT INTO %s (pk, ck1, ck2, v) VALUES ('fño', 2, 3, 'fño')"); @@ -446,7 +446,7 @@ public void testCreateTableWithWrongColumnConstraint() throws Throwable { try { - createTable("CREATE TABLE %s (pk text, ck1 int CHECK OCTET_LENGTH(pk) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text, ck1 int CHECK OCTET_LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -461,7 +461,7 @@ public void testCreateTableWithWrongColumnMultipleConstraint() throws Throwable { try { - createTable("CREATE TABLE %s (pk text, ck1 int CHECK OCTET_LENGTH(pk) = 4 AND ck1 < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text, ck1 int CHECK OCTET_LENGTH() = 4 AND ck1 < 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -476,7 +476,7 @@ public void testCreateTableWithColumnWithClusteringColumnInvalidTypeConstraint() { try { - createTable("CREATE TABLE %s (pk int, ck1 int CHECK OCTET_LENGTH(ck1) = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk int, ck1 int CHECK OCTET_LENGTH() = 4, ck2 int, v int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -507,7 +507,7 @@ public void testCreateTableInvalidFunction() throws Throwable { try { - createTable("CREATE TABLE %s (pk text CHECK not_a_function(pk) = 4, ck1 int, ck2 int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); + createTable("CREATE TABLE %s (pk text CHECK not_a_function() = 4, ck1 int, ck2 int, PRIMARY KEY ((pk),ck1, ck2)) WITH CLUSTERING ORDER BY (ck1 " + order + ");"); fail(); } catch (InvalidRequestException e) @@ -521,7 +521,7 @@ public void testCreateTableInvalidFunction() throws Throwable public void testCreateTableWithPKConstraintsAndCDCEnabled() throws Throwable { // It works - createTable("CREATE TABLE %s (pk text CHECK length(pk) = 4, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2)) WITH cdc = true;"); + createTable("CREATE TABLE %s (pk text CHECK length() = 4, ck1 int, ck2 int, PRIMARY KEY ((pk), ck1, ck2)) WITH cdc = true;"); } @Test diff --git a/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java b/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java index 1646d5ed0a15..9dc964dd60a1 100644 --- a/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java +++ b/test/unit/org/apache/cassandra/constraints/JsonConstraintTest.java @@ -42,17 +42,19 @@ public class JsonConstraintTest private static final ColumnMetadata regularStringColumn = getColumnOfType(UTF8Type.instance); private static final ColumnMetadata regularAsciiColumn = getColumnOfType(AsciiType.instance); - private static final ColumnConstraints json = new ColumnConstraints(of(new Raw(jsonFunctionIdentifier, columnIdentifier).prepare())); + private final ColumnConstraints json = new ColumnConstraints(of(new Raw(jsonFunctionIdentifier).prepare())); @Test public void testJsonConstraint() throws Throwable { + json.setColumnName(columnIdentifier); run("{}"); run("{\"a\": 5, \"b\": \"1\", \"c\": [1,2,3]}"); run("nonsense", "Value for column 'a_column' violated JSON constraint as it is not a valid JSON."); run("", "Value for column 'a_column' violated JSON constraint as it is not a valid JSON."); } + @Test public void testInvalidTypes() { diff --git a/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java b/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java index d09d8f83b6fe..e033c62c5909 100644 --- a/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java +++ b/test/unit/org/apache/cassandra/constraints/NotNullConstraintTest.java @@ -54,13 +54,16 @@ public class NotNullConstraintTest { private static final ColumnIdentifier columnIdentifier = new ColumnIdentifier("a_column", false); - private static final ColumnConstraints unaryConstraint = new ColumnConstraints(of(new UnaryFunctionColumnConstraint.Raw(new ColumnIdentifier(NotNullConstraint.FUNCTION_NAME, false), columnIdentifier).prepare())); + private static final ColumnConstraints unaryConstraint = new ColumnConstraints(of(new UnaryFunctionColumnConstraint.Raw(new ColumnIdentifier(NotNullConstraint.FUNCTION_NAME, false), List.of()).prepare())); private static final ColumnConstraints scalarConstraint = new ColumnConstraints(of(new ScalarColumnConstraint.Raw(columnIdentifier, GT, "5").prepare())); - private static final ColumnConstraints functionConstraint = new ColumnConstraints(of(new FunctionColumnConstraint.Raw(new ColumnIdentifier("LENGTH", false), columnIdentifier, GT, "5").prepare())); + private static final ColumnConstraints functionConstraint = new ColumnConstraints(of(new FunctionColumnConstraint.Raw(new ColumnIdentifier("LENGTH", false), List.of(), GT, "5").prepare())); @Test public void testNotNullConstraintValidation() { + unaryConstraint.setColumnName(columnIdentifier); + scalarConstraint.setColumnName(columnIdentifier); + functionConstraint.setColumnName(columnIdentifier); // unary unaryConstraint.validate(getColumnOfType(UTF8Type.instance)); assertThatThrownBy(() -> unaryConstraint.evaluate(UTF8Type.instance, EMPTY_BYTE_BUFFER)) @@ -95,6 +98,9 @@ public void testNotNullConstraintValidation() @Test public void testCollections() { + unaryConstraint.setColumnName(columnIdentifier); + scalarConstraint.setColumnName(columnIdentifier); + functionConstraint.setColumnName(columnIdentifier); checkList(false); checkSet(false); checkMap(false); diff --git a/test/unit/org/apache/cassandra/constraints/RegexpConstraintTest.java b/test/unit/org/apache/cassandra/constraints/RegexpConstraintTest.java index 1b93c30baaba..ddbff0c45d1f 100644 --- a/test/unit/org/apache/cassandra/constraints/RegexpConstraintTest.java +++ b/test/unit/org/apache/cassandra/constraints/RegexpConstraintTest.java @@ -18,6 +18,8 @@ package org.apache.cassandra.constraints; +import java.util.List; + import org.junit.Test; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -44,8 +46,8 @@ public class RegexpConstraintTest private static final ColumnMetadata regularStringColumn = getColumnOfType(UTF8Type.instance); private static final ColumnMetadata regularAsciiColumn = getColumnOfType(AsciiType.instance); - private static final ColumnConstraints regexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.EQ, "'a..b'").prepare())); - private static final ColumnConstraints negatedRegexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.NEQ, "'a..b'").prepare())); + private static final ColumnConstraints regexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, List.of(), Operator.EQ, "'a..b'").prepare())); + private static final ColumnConstraints negatedRegexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, List.of(), Operator.NEQ, "'a..b'").prepare())); @Test public void testRegexpConstraint() throws Throwable @@ -59,7 +61,7 @@ public void testRegexpConstraint() throws Throwable @Test public void testInvalidPattern() { - ColumnConstraints invalid = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.EQ, "'*abc'").prepare())); + ColumnConstraints invalid = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, List.of(), Operator.EQ, "'*abc'").prepare())); assertThatThrownBy(() -> invalid.validate(regularStringColumn)) .hasMessage("String '*abc' is not a valid regular expression") .isInstanceOf(InvalidConstraintDefinitionException.class); diff --git a/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java b/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java index 3bcb75eb9efe..3731ead11351 100644 --- a/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java +++ b/test/unit/org/apache/cassandra/cql3/ColumnSpecificationTest.java @@ -40,8 +40,8 @@ public void before() @Test public void testCreateTableWithColumnHavingMaskBeforeCheck() { - createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1);"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } @Test @@ -49,8 +49,8 @@ public void testAlterTableAlterColumnWithMaskAndCheckStandalone() { createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default()"); - execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1;"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ALTER name CHECK NOT NULL AND LENGTH() > 1;"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } @Test @@ -65,30 +65,30 @@ public void testAlterTableAlterColumnWithMask() public void testAlterTableAlterColumnWithCheck() { createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); - execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1;"); - verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ALTER name CHECK NOT NULL AND LENGTH() > 1;"); + verifyColumnSpec("name text CHECK NOT NULL AND LENGTH() > 1"); } @Test public void testAddingCheckToColumnWithMask() { createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default());"); - execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ALTER name CHECK NOT NULL AND LENGTH() > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } @Test public void testAddingMaskToColumnWithCheck() { - createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT NULL AND LENGTH() > 1);"); execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default()"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } @Test public void testDroppingCheckKeepsMask() { - createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1);"); execute("ALTER TABLE %s ALTER name DROP CHECK"); verifyColumnSpec("name text MASKED WITH system.mask_default()"); } @@ -96,17 +96,17 @@ public void testDroppingCheckKeepsMask() @Test public void droppingMaskKeepsCheck() { - createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1);"); + createTable("CREATE TABLE %s (pk text primary key, name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1);"); execute("ALTER TABLE %s ALTER name DROP MASKED"); - verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + verifyColumnSpec("name text CHECK NOT NULL AND LENGTH() > 1"); } @Test public void testAlterTableAddColumnWithCheck() { createTable("CREATE TABLE %s (pk text primary key);"); - execute("ALTER TABLE %s ADD name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); - verifyColumnSpec("name text CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ADD name text CHECK NOT NULL AND LENGTH() > 1"); + verifyColumnSpec("name text CHECK NOT NULL AND LENGTH() > 1"); } @Test @@ -121,16 +121,16 @@ public void testAlterTableAddColumnWithMask() public void testAlterTableAddColumnWithMaskAndCheck() { createTable("CREATE TABLE %s (pk text primary key);"); - execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT_NULL(name)"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name)"); + execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT NULL"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL"); } @Test public void testAlterTableAddColumnWithMaskAndMultipleChecks() { createTable("CREATE TABLE %s (pk text primary key);"); - execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ADD name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } /** @@ -139,7 +139,7 @@ public void testAlterTableAddColumnWithMaskAndMultipleChecks() @Test(expected = RuntimeException.class) public void testFailingCreateTableWithColumnHavingMaskAfterCheck() { - createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT_NULL(name) AND LENGTH(name) > 1 MASKED WITH system.mask_default());"); + createTable("CREATE TABLE %s (pk text primary key, name text CHECK NOT NULL AND LENGTH() > 1 MASKED WITH system.mask_default());"); } /** @@ -149,8 +149,8 @@ public void testFailingCreateTableWithColumnHavingMaskAfterCheck() public void testFailingAlterTableAlterColumnWithCheckAndMask() { createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); - execute("ALTER TABLE %s ALTER name CHECK NOT_NULL(name) AND LENGTH(name) > 1 MASKED WITH system.mask_default();"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ALTER name CHECK NOT NULL AND LENGTH() > 1 MASKED WITH system.mask_default();"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } /** @@ -160,8 +160,8 @@ public void testFailingAlterTableAlterColumnWithCheckAndMask() public void testFailingAlterTableAlterColumnWithMaskAndCheck() { createTable("CREATE TABLE %s (pk text, name text, primary key (pk));"); - execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); - verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT_NULL(name) AND LENGTH(name) > 1"); + execute("ALTER TABLE %s ALTER name MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); + verifyColumnSpec("name text MASKED WITH system.mask_default() CHECK NOT NULL AND LENGTH() > 1"); } private void verifyColumnSpec(String modifiedColumn) @@ -184,7 +184,7 @@ static String tableParametersCql() " AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}\n" + " AND cdc = false\n" + " AND comment = ''\n" + - " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + "\n" + + " AND compaction = " + cqlQuoted(CompactionParams.DEFAULT.asMap()) + '\n' + " AND compression = {'chunk_length_in_kb': '16', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}\n" + " AND memtable = 'default'\n" + " AND crc_check_chance = 1.0\n" + From b9395acad625c876c6b03c0e2cd2ee689af28b54 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Mon, 5 May 2025 15:29:27 -0400 Subject: [PATCH 306/340] Fix AccordMigrationTest not marking nodes down correctly Patch by Ariel Weisberg; Reviewed by Caleb Rackliffe for CASSANDRA-20621 --- .../cassandra/distributed/test/accord/AccordMigrationTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java index fee7e6af63c0..6dbfdca2aa0a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationTest.java @@ -63,6 +63,7 @@ import org.apache.cassandra.distributed.api.Feature; import org.apache.cassandra.distributed.api.ICoordinator; import org.apache.cassandra.distributed.api.IInvokableInstance; +import org.apache.cassandra.distributed.api.IMessageFilters.Filter; import org.apache.cassandra.distributed.api.Row; import org.apache.cassandra.distributed.api.SimpleQueryResult; import org.apache.cassandra.gms.EndpointState; @@ -445,6 +446,7 @@ public void testPaxosToAccordCAS() throws Exception // Forced repair while a node is down shouldn't work, use repair instead of finish-migration because repair exposes --force // and regular Cassandra repairs are eligible to drive migration so it's important they check --force and down nodes InetAddressAndPort secondNodeBroadcastAddress = InetAddressAndPort.getByAddress(cluster.get(2).broadcastAddress()); + Filter blockNode2 = cluster.filters().allVerbs().from(2).drop(); cluster.get(1).runOnInstance(() -> { EndpointState endpointState = Gossiper.instance.getEndpointStateForEndpoint(secondNodeBroadcastAddress); Gossiper.runInGossipStageBlocking(() -> Gossiper.instance.markDead(secondNodeBroadcastAddress, endpointState)); @@ -454,6 +456,7 @@ public void testPaxosToAccordCAS() throws Exception NormalizedRanges alreadyDataRepaired = normalizedRanges(ImmutableList.of(new Range<>(upperMidToken, maxAlignedWithLocalRanges))); NormalizedRanges remainingPendingDataRepair = migratingRanges.subtract(alreadyDataRepaired); assertMigrationState(tableName, ConsensusMigrationTarget.accord, emptyList(), remainingPendingDataRepair, migratingRanges, 1); + blockNode2.off(); cluster.get(1).runOnInstance(() -> { EndpointState endpointState = Gossiper.instance.getEndpointStateForEndpoint(secondNodeBroadcastAddress); Gossiper.runInGossipStageBlocking(() -> Gossiper.instance.realMarkAlive(secondNodeBroadcastAddress, endpointState)); From f91655df061bea988e12a2d9f2e438b7d825ce13 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Sun, 4 May 2025 22:39:30 +0200 Subject: [PATCH 307/340] When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler patch by Stefan Miklosovic; reviewed by Caleb Rackliffe for CASSANDRA-20614 --- CHANGES.txt | 1 + .../cassandra/service/DiskErrorsHandler.java | 5 +- .../service/DiskErrorsHandlerService.java | 4 +- .../service/DiskErrorsHandlerTest.java | 135 ++++++++---------- 4 files changed, 64 insertions(+), 81 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 80a9d636f61a..77d5b4eb7158 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler (CASSANDRA-20614) * Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563) * Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly (CASSANDRA-20622) * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620) diff --git a/src/java/org/apache/cassandra/service/DiskErrorsHandler.java b/src/java/org/apache/cassandra/service/DiskErrorsHandler.java index b4fe9d67db67..14add63feefa 100644 --- a/src/java/org/apache/cassandra/service/DiskErrorsHandler.java +++ b/src/java/org/apache/cassandra/service/DiskErrorsHandler.java @@ -18,8 +18,6 @@ package org.apache.cassandra.service; -import com.google.common.annotations.VisibleForTesting; - import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.sstable.CorruptSSTableException; @@ -43,8 +41,7 @@ class NoOpDiskErrorHandler implements DiskErrorsHandler { public static final DiskErrorsHandler NO_OP = new NoOpDiskErrorHandler(); - @VisibleForTesting - NoOpDiskErrorHandler() {} + private NoOpDiskErrorHandler() {} @Override public void inspectCommitLogError(Throwable t) {} diff --git a/src/java/org/apache/cassandra/service/DiskErrorsHandlerService.java b/src/java/org/apache/cassandra/service/DiskErrorsHandlerService.java index 97e7ecde5fba..98fb7ee609d9 100644 --- a/src/java/org/apache/cassandra/service/DiskErrorsHandlerService.java +++ b/src/java/org/apache/cassandra/service/DiskErrorsHandlerService.java @@ -35,7 +35,7 @@ public class DiskErrorsHandlerService private static volatile DiskErrorsHandler instance = NO_OP; @VisibleForTesting - public static synchronized void set(DiskErrorsHandler newInstance) + public static synchronized void set(DiskErrorsHandler newInstance) throws ConfigurationException { if (newInstance == null) return; @@ -58,7 +58,7 @@ public static synchronized void set(DiskErrorsHandler newInstance) } catch (Throwable t) { - logger.warn("Exception occured while initializing disk error handler of class " + newInstance.getClass().getName(), t); + throw new ConfigurationException("Exception occured while initializing disk error handler of class " + newInstance.getClass().getName(), t); } } diff --git a/test/unit/org/apache/cassandra/service/DiskErrorsHandlerTest.java b/test/unit/org/apache/cassandra/service/DiskErrorsHandlerTest.java index 6465164fe0ee..c8638754af66 100644 --- a/test/unit/org/apache/cassandra/service/DiskErrorsHandlerTest.java +++ b/test/unit/org/apache/cassandra/service/DiskErrorsHandlerTest.java @@ -21,11 +21,13 @@ import org.junit.Test; import org.apache.cassandra.distributed.shared.WithProperties; +import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.sstable.CorruptSSTableException; import static org.apache.cassandra.config.CassandraRelevantProperties.CUSTOM_DISK_ERROR_HANDLER; import static org.apache.cassandra.service.DiskErrorsHandlerService.get; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -35,17 +37,16 @@ public class DiskErrorsHandlerTest @Test public void testSetting() throws Throwable { + DiskErrorsHandler handlerA; + DiskErrorsHandler handlerB; try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, HandlerA.class.getName())) { DiskErrorsHandlerService.configure(); - - assertSame(HandlerA.class, get().getClass()); - - assertTrue(HandlerA.initialized); - assertFalse(HandlerA.closed); - assertFalse(HandlerB.initialized); - assertFalse(HandlerB.closed); + handlerA = get(); + assertSame(HandlerA.class, handlerA.getClass()); + assertInitialized(HandlerA.class, handlerA); + assertNotClosed(HandlerA.class, handlerA); } try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, @@ -53,96 +54,73 @@ public void testSetting() throws Throwable { DiskErrorsHandlerService.configure(); - assertTrue(HandlerA.initialized); - assertTrue(HandlerA.closed); + handlerB = get(); + assertSame(HandlerB.class, handlerB.getClass()); - assertTrue(HandlerB.initialized); - assertFalse(HandlerB.closed); + assertInitialized(HandlerA.class, handlerA); + assertClosed(HandlerA.class, handlerA); - assertSame(HandlerB.class, get().getClass()); + assertInitialized(HandlerB.class, handlerB); + assertNotClosed(HandlerB.class, handlerB); - get().close(); + handlerB.close(); - assertTrue(HandlerB.closed); + assertClosed(HandlerB.class, handlerB); } } @Test public void testFailures() { - // failed closing + DiskErrorsHandler handlerC; try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, HandlerC.class.getName())) { DiskErrorsHandlerService.configure(); - assertTrue(HandlerC.initialized); - assertSame(HandlerC.class, get().getClass()); + handlerC = get(); + assertInitialized(HandlerC.class, handlerC); } - // this will call close() on C handler + DiskErrorsHandler handlerA; + // this will call _not_ close() on C handler try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, - HandlerE.class.getName())) + HandlerA.class.getName())) { DiskErrorsHandlerService.configure(); - assertTrue(HandlerE.initialized); - assertSame(HandlerE.class, get().getClass()); + handlerA = get(); + assertInitialized(HandlerA.class, handlerA); + assertNotClosed(HandlerC.class, handlerC); } try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, HandlerD.class.getName())) { - DiskErrorsHandlerService.configure(); - // still handler E as handler D failed to init - assertSame(HandlerE.class, get().getClass()); - } - } + assertThatThrownBy(DiskErrorsHandlerService::configure) + .isInstanceOf(ConfigurationException.class); - public static class HandlerA extends DummyErrorHandler - { - public static boolean initialized = false; - public static boolean closed = false; - - @Override - public void init() - { - initialized = true; + assertSame(HandlerA.class, get().getClass()); + // still handler A as handler D failed to init + assertInitialized(HandlerA.class, handlerA); + assertNotClosed(HandlerA.class, handlerA); } - @Override - public void close() throws Exception + // what if a user tries to set no-op handler or handler which can not be constructed (constructor is private) + try (WithProperties ignore = new WithProperties().set(CUSTOM_DISK_ERROR_HANDLER, + DiskErrorsHandler.NoOpDiskErrorHandler.class.getName())) { - closed = true; + assertThatThrownBy(DiskErrorsHandlerService::configure) + .isInstanceOf(ConfigurationException.class) + .hasMessageContaining("Default constructor for disk error handler class " + + '\'' + DiskErrorsHandler.NoOpDiskErrorHandler.class.getName() + "' is inaccessible."); } } - public static class HandlerB extends DummyErrorHandler - { - public static boolean initialized = false; - public static boolean closed = false; - - @Override - public void init() - { - initialized = true; - } + public static class HandlerA extends DummyErrorHandler {} - @Override - public void close() throws Exception - { - closed = true; - } - } + public static class HandlerB extends DummyErrorHandler {} public static class HandlerC extends DummyErrorHandler { - public static boolean initialized = false; - - @Override - public void init() - { - initialized = true; - } - @Override public void close() throws Exception { @@ -152,25 +130,35 @@ public void close() throws Exception public static class HandlerD extends DummyErrorHandler { - public static boolean closed = false; - @Override public void init() { throw new RuntimeException("failed to init"); } + } - @Override - public void close() throws Exception - { - closed = true; - } + public void assertClosed(Class handlerClass, DiskErrorsHandler diskErrorsHandler) + { + assertSame(handlerClass, diskErrorsHandler.getClass()); + assertTrue(((DummyErrorHandler) diskErrorsHandler).closed); + } + + public void assertNotClosed(Class handlerClass, DiskErrorsHandler diskErrorsHandler) + { + assertSame(handlerClass, diskErrorsHandler.getClass()); + assertFalse(((DummyErrorHandler) diskErrorsHandler).closed); } - public static class HandlerE extends DummyErrorHandler + public void assertInitialized(Class handlerClass, DiskErrorsHandler diskErrorsHandler) { - public static boolean initialized = false; - public static boolean closed = false; + assertSame(handlerClass, diskErrorsHandler.getClass()); + assertTrue(((DummyErrorHandler) diskErrorsHandler).initialized); + } + + private static abstract class DummyErrorHandler implements DiskErrorsHandler + { + public boolean initialized = false; + public boolean closed = false; @Override public void init() @@ -183,10 +171,7 @@ public void close() throws Exception { closed = true; } - } - private static abstract class DummyErrorHandler implements DiskErrorsHandler - { @Override public void handleCorruptSSTable(CorruptSSTableException e) { From 7a8335c2739c207b77e90c05897285b3cbaba166 Mon Sep 17 00:00:00 2001 From: Sunil Ramchandra Pawar Date: Thu, 8 May 2025 17:12:18 +0530 Subject: [PATCH 308/340] Optimize initial skipping logic for SAI queries on large partitions patch by Sunil Ramchandra Pawar; reviewed by Caleb Rackliffe and David Capwell for CASSANDRA-20191 --- CHANGES.txt | 1 + .../plan/StorageAttachedIndexSearcher.java | 58 +++- .../sai/cql/IntraPartitionSkippingTest.java | 318 ++++++++++++++++++ 3 files changed, 375 insertions(+), 2 deletions(-) create mode 100644 test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 9a6dbe7ae577..c073719105fa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Optimize initial skipping logic for SAI queries on large partitions (CASSANDRA-20191) * Fix reading mmapped trie-index exceeding 2GiB (CASSANDRA-20351) * zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space (CASSANDRA-20577) * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609) diff --git a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java index 9116db0d3107..20a9cad58c45 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java +++ b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java @@ -18,6 +18,7 @@ package org.apache.cassandra.index.sai.plan; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -32,6 +33,8 @@ import io.netty.util.concurrent.FastThreadLocal; import org.apache.cassandra.db.Clustering; +import org.apache.cassandra.db.ClusteringBound; +import org.apache.cassandra.db.ClusteringComparator; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DataRange; import org.apache.cassandra.db.DecoratedKey; @@ -39,6 +42,10 @@ import org.apache.cassandra.db.ReadCommand; import org.apache.cassandra.db.ReadExecutionController; import org.apache.cassandra.db.RegularAndStaticColumns; +import org.apache.cassandra.db.Slices; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter; +import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter; import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.partitions.PartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; @@ -138,6 +145,7 @@ private class ResultRetriever extends AbstractIterator im private final PrimaryKey firstPrimaryKey; private final PrimaryKey lastPrimaryKey; private final Iterator keyRanges; + private final DataRange firstDataRange; private AbstractBounds currentKeyRange; private final KeyRangeIterator resultKeyIterator; @@ -152,7 +160,8 @@ private class ResultRetriever extends AbstractIterator im private ResultRetriever(ReadExecutionController executionController, boolean topK) { this.keyRanges = queryController.dataRanges().iterator(); - this.currentKeyRange = keyRanges.next().keyRange(); + this.firstDataRange = keyRanges.next(); + this.currentKeyRange = firstDataRange.keyRange(); this.resultKeyIterator = Operation.buildIterator(queryController); this.filterTree = Operation.buildFilter(queryController, queryController.usesStrictFiltering()); this.executionController = executionController; @@ -175,7 +184,52 @@ public UnfilteredRowIterator computeNext() // We can't put this code in the constructor because it may throw and the caller // may not be prepared for that. if (lastKey == null) - resultKeyIterator.skipTo(firstPrimaryKey); + { + PrimaryKey skipTarget = firstPrimaryKey; + ClusteringComparator comparator = command.metadata().comparator; + + // If there are no clusterings, the first data range selects an entire partitions, or we have static + // expressions, don't bother trying to skip forward within the partition. + if (comparator.size() > 0 && !firstDataRange.selectsAllPartition() && !command.rowFilter().hasStaticExpression()) + { + // Only attempt to skip if the first data range covers a single partition. + if (currentKeyRange.left.equals(currentKeyRange.right) && currentKeyRange.left instanceof DecoratedKey) + { + DecoratedKey decoratedKey = (DecoratedKey) currentKeyRange.left; + ClusteringIndexFilter filter = firstDataRange.clusteringIndexFilter(decoratedKey); + + if (filter instanceof ClusteringIndexSliceFilter) + { + Slices slices = ((ClusteringIndexSliceFilter) filter).requestedSlices(); + + if (!slices.isEmpty()) + { + ClusteringBound startBound = slices.get(0).start(); + + if (!startBound.isEmpty()) + { + ByteBuffer[] rawValues = startBound.getBufferArray(); + + if (rawValues.length == comparator.size()) + skipTarget = keyFactory.create(decoratedKey, Clustering.make(rawValues)); + } + } + } + else if (filter instanceof ClusteringIndexNamesFilter) + { + ClusteringIndexNamesFilter namesFilter = (ClusteringIndexNamesFilter) filter; + + if (!namesFilter.requestedRows().isEmpty()) + { + Clustering skipClustering = namesFilter.requestedRows().iterator().next(); + skipTarget = keyFactory.create(decoratedKey, skipClustering); + } + } + } + } + + resultKeyIterator.skipTo(skipTarget); + } // Theoretically we wouldn't need this if the caller of computeNext always ran the // returned iterators to the completion. Unfortunately, we have no control over the caller behavior here. diff --git a/test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java b/test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java new file mode 100644 index 000000000000..b9e42640e790 --- /dev/null +++ b/test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.cql; + +import org.junit.Ignore; +import org.junit.Test; + +import org.HdrHistogram.Histogram; +import org.apache.cassandra.index.sai.SAITester; + +/** + * Tests for verifying intra-partition and partition-level skipping optimizations + * introduced in CASSANDRA-20191 for SAI. + *

      + * These tests validate that Cassandra can efficiently skip over rows + * within a partition using clustering filters (name and slice), paging, reversed order, + * and sparse matches. + *

      + * Each test documents a scenario where skipping logic is expected to apply along with few where it doesn't skip. + */ +public class IntraPartitionSkippingTest extends SAITester +{ + @Test + public void testNameFilterExactMatch() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck = 0; ck < 10; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, "val" + ck); + } + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck = 5 AND val = 'val5' ALLOW FILTERING"), + row(1, 5,"val5"))); + } + + @Test + public void testSliceFilterRangeMatch() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck = 0; ck < 100; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, "val" + ck); + } + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck > 90 AND val = 'val99' ALLOW FILTERING"), + row(1, 99,"val99"))); + } + + @Test + public void testReversedClustering() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck)) WITH CLUSTERING ORDER BY (ck DESC)"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck = 0; ck < 20; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, "val" + ck); + } + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck < 10 AND val = 'val5' ALLOW FILTERING"), + row(1,5,"val5"))); + } + + @Test + public void testSkippingWithPaging() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val int, PRIMARY KEY (pk, ck))"); + + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck = 0; ck < 100; ck++) + { + int val = 1000 + ck; + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, val); + } + + beforeAndAfterFlush(() -> assertRowsNet(executeNetWithPaging("SELECT * FROM %s WHERE pk = 1 AND ck > 90 AND val > 1090 ALLOW FILTERING", 5), + row(1, 91, 1091), + row(1, 92, 1092), + row(1, 93, 1093), + row(1, 94, 1094), + row(1, 95, 1095), + row(1, 96, 1096), + row(1, 97, 1097), + row(1, 98, 1098), + row(1, 99, 1099))); + } + + @Test + public void testCompositeClusteringKeySkipping() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck1 int, ck2 int, val text, PRIMARY KEY (pk, ck1, ck2))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck1 = 0; ck1 < 10; ck1++) + for (int ck2 = 0; ck2 < 10; ck2++) + execute("INSERT INTO %s (pk, ck1, ck2, val) VALUES (?, ?, ?, ?)", 1, ck1, ck2, "v" + (ck1*10+ck2)); + + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck1 = 9 AND ck2 = 9 AND val = 'v99' ALLOW FILTERING"), + row(1,9,9,"v99"))); + + } + + @Test + public void testSparseMatch() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int ck = 0; ck < 1000; ck++) + { + String value = (ck % 450 == 0) ? "insert" : "skip"; + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, value); + } + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck > 899 AND val = 'insert' ALLOW FILTERING"), + row(1,900,"insert"))); + + } + + @Test + public void testMultipleNameFilters() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int i = 0; i < 20; i++) + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, i, "v5"); + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck IN (5, 10, 15) AND val = 'v5' ALLOW FILTERING"), + row(1,5,"v5"), row(1,10,"v5"), row(1,15,"v5"))); + + } + + // Multiple partition range scans won't skip + @Test + public void testPartitionRangeSkipping() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int pk = 0; pk < 10; pk++) + for (int ck = 0; ck < 5; ck++) + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk, ck, "value" + pk); + + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE val = 'value9' AND ck > 2 ALLOW FILTERING"), + row(9,3,"value9"), row(9,4,"value9"))); + + } + + @Test + public void testStaticColumns() throws Throwable + { + createTable("CREATE TABLE %S (pk int, ck int, s text static, val text, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + execute("INSERT INTO %s (pk, s) VALUES (?, ?)", 1, "static1"); + + for (int ck = 0; ck < 200; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, "val" + ck); + } + + + // We will not skip + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck > 100 AND s = 'static1' AND val = 'val101' ALLOW FILTERING"), + row(1,101,"static1","val101"))); + + // we will skip + beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck > 100 AND val = 'val101' ALLOW FILTERING"), + row(1,101,"static1","val101"))); + } + + @Test + public void testNextKeyClusteringIndexNamesFilter() throws Throwable + { + createTable("CREATE TABLE %S (" + + "pk int," + + "ck int," + + "v int," + + "PRIMARY KEY (pk, ck))"); + + createIndex("CREATE INDEX ON %s(v) USING 'sai'"); + + int pk = 1; + for (int ck = 0; ck < 10; ck++) + { + int v = ck + 1000; + execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk, ck, v); + } + + int pk1 = 2; + for (int ck = 0; ck < 100; ck++) + { + execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk1, ck, ck); + } + + beforeAndAfterFlush(() -> { + assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck = 5 AND v > 1004 ALLOW FILTERING"), + row(1, 5, 1005)); + + assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck = 5 AND v > 1004 AND v < 20000 ALLOW FILTERING"), + row(1, 5, 1005)); + }); + + + } + + // Performance testing test-cases and can be ingnored. + @Ignore ("performance test case for Index Slice filter.") + @Test + public void testNextKeyPerfClusteringIndexSliceFilter() + { + createTable("CREATE TABLE %S (" + + "pk int, " + + "ck int, " + + "val text, " + + "PRIMARY KEY (pk, ck))"); + + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + int pk = 1; + for (int ck = 0; ck < 10000; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk, ck, "hello1"); + } + + int pk1 = 2; + for (int ck = 0; ck < 100; ck++) + { + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk1, ck, "hello2"); + } + + Histogram histogram = new Histogram(4); + + + for (int i = 0; i < 10000; i++) + { + long start = System.nanoTime(); + execute("SELECT * FROM %s WHERE pk = 1 AND ck > 9000 AND val = 'hello1' ALLOW FILTERING"); + histogram.recordValue(System.nanoTime() - start); + + if (i % 1000 == 0) + { + System.out.println("50th: " + histogram.getValueAtPercentile(0.5)); + System.out.println("95th: " + histogram.getValueAtPercentile(0.95)); + System.out.println("99th: " + histogram.getValueAtPercentile(0.99)); + } + } + + } + + + @Ignore ("performance test case for Index Names filter.") + @Test + public void testNextKeyPerfClusteringIndexNamesFilter() + { + createTable("CREATE TABLE %S (" + + "pk int," + + "ck int," + + "v int," + + "PRIMARY KEY (pk, ck))"); + + createIndex("CREATE INDEX ON %s(v) USING 'sai'"); + + int pk = 1; + for (int ck = 0; ck < 20000; ck++) + { + int v = ck + 10; + execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk, ck, v); + } + + int pk1 = 2; + for (int ck = 0; ck < 100; ck++) + { + execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk1, ck, ck); + } + + Histogram histogram = new Histogram(4); + + for (int i = 0; i < 10000; i++) + { + long start = System.nanoTime(); + execute("SELECT * FROM %s WHERE pk = 1 AND ck = 15000 AND v > 9000 ALLOW FILTERING"); + histogram.recordValue(System.nanoTime() - start); + + if (i % 1000 == 0) + { + System.out.println("50th: " + histogram.getValueAtPercentile(0.5)); + System.out.println("95th: " + histogram.getValueAtPercentile(0.95)); + System.out.println("99th: " + histogram.getValueAtPercentile(0.99)); + } + } + + } + +} From 43746c13c2d07e75adfb9f3c28d7b446d1184b1e Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 25 Apr 2025 14:27:11 -0500 Subject: [PATCH 309/340] Use MAX_PARALLEL_TRANSFERS instead of default Patch by brandonwilliams, reviewed by mck for CASSANDRA-20532 --- CHANGES.txt | 1 + .../cassandra/streaming/async/NettyStreamingMessageSender.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index ddd87100adb2..70761240f14a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) diff --git a/src/java/org/apache/cassandra/streaming/async/NettyStreamingMessageSender.java b/src/java/org/apache/cassandra/streaming/async/NettyStreamingMessageSender.java index 4334382be40a..46480cd7c4cd 100644 --- a/src/java/org/apache/cassandra/streaming/async/NettyStreamingMessageSender.java +++ b/src/java/org/apache/cassandra/streaming/async/NettyStreamingMessageSender.java @@ -89,7 +89,7 @@ public class NettyStreamingMessageSender implements StreamingMessageSender private static final long DEFAULT_CLOSE_WAIT_IN_MILLIS = TimeUnit.MINUTES.toMillis(5); // a simple mechansim for allowing a degree of fairnes across multiple sessions - private static final Semaphore fileTransferSemaphore = new Semaphore(DEFAULT_MAX_PARALLEL_TRANSFERS, true); + private static final Semaphore fileTransferSemaphore = new Semaphore(MAX_PARALLEL_TRANSFERS, true); private final StreamSession session; private final boolean isPreview; From b4484968bd28095950619ff22083cbeedf62002a Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 25 Apr 2025 14:30:06 -0500 Subject: [PATCH 310/340] Use MAX_PARALLEL_TRANSFERS instead of default Patch by brandonwilliams, reviewed by mck for CASSANDRA-20532 --- CHANGES.txt | 1 + .../cassandra/streaming/async/StreamingMultiplexedChannel.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index afb99c4d0bf7..032a80514483 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -6,6 +6,7 @@ * Fix SimpleClient ability to release acquired capacity (CASSANDRA-20202) * Fix WaitQueue.Signal.awaitUninterruptibly may block forever if invoking thread is interrupted (CASSANDRA-20084) Merged from 4.0: + * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Avoid computing prepared statement size for unprepared batches (CASSANDRA-20556) * Fix Dropwizard Meter causes timeouts when infrequently used (CASSANDRA-19332) diff --git a/src/java/org/apache/cassandra/streaming/async/StreamingMultiplexedChannel.java b/src/java/org/apache/cassandra/streaming/async/StreamingMultiplexedChannel.java index 99e613ee2da3..548d95772d73 100644 --- a/src/java/org/apache/cassandra/streaming/async/StreamingMultiplexedChannel.java +++ b/src/java/org/apache/cassandra/streaming/async/StreamingMultiplexedChannel.java @@ -95,7 +95,7 @@ public class StreamingMultiplexedChannel private static final int MAX_PARALLEL_TRANSFERS = parseInt(getProperty(PROPERTY_PREFIX + "streaming.session.parallelTransfers", Integer.toString(DEFAULT_MAX_PARALLEL_TRANSFERS))); // a simple mechansim for allowing a degree of fairness across multiple sessions - private static final Semaphore fileTransferSemaphore = newFairSemaphore(DEFAULT_MAX_PARALLEL_TRANSFERS); + private static final Semaphore fileTransferSemaphore = newFairSemaphore(MAX_PARALLEL_TRANSFERS); private final StreamingChannel.Factory factory; private final InetAddressAndPort to; From f2e3a0a4fc0c70d02a01cc2968c382d8f17697c6 Mon Sep 17 00:00:00 2001 From: Josh McKenzie Date: Tue, 13 Dec 2022 14:58:23 +0100 Subject: [PATCH 311/340] Handle sstable metadata stats file getting a new mtime after compaction has finished Patch by Josh McKenzie and marcuse; reviewed by Josh McKenzie and marcuse for CASSANDRA-18119 --- CHANGES.txt | 1 + .../cassandra/db/lifecycle/LogFile.java | 19 +- .../cassandra/db/lifecycle/LogRecord.java | 69 ++++++- .../db/lifecycle/LogTransaction.java | 2 + .../db/lifecycle/LogTransactionTest.java | 175 +++++++++++++++++- 5 files changed, 256 insertions(+), 10 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 70761240f14a..65e3cb4de0d1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Handle sstable metadata stats file getting a new mtime after compaction has finished (CASSANDRA-18119) * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) * Grant permission on keyspaces system_views and system_virtual_schema not possible (CASSANDRA-20171) diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java index d67019008fb4..cd793d483163 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java @@ -163,6 +163,10 @@ static boolean isLogFile(File file) this.id = id; } + /** + * Check a variety of the internals of the LogRecord as well as the state of the LogRecord vs. the files found on disk + * to ensure they remain correct and nothing was changed external to the process. + */ boolean verify() { records.clear(); @@ -229,6 +233,9 @@ LogRecord setErrorInReplicas(LogRecord record) return record; } + /** + * Sets the {@link LogRecord.Status#error} if something wrong is found with the record. + */ static void verifyRecord(LogRecord record, List existingFiles) { if (record.checksum != record.computeChecksum()) @@ -240,6 +247,7 @@ static void verifyRecord(LogRecord record, List existingFiles) return; } + // If it's not a removal we don't check it since we're not going to take action on it if (record.type != Type.REMOVE) return; @@ -253,6 +261,16 @@ static void verifyRecord(LogRecord record, List existingFiles) // we can have transaction files with mismatching updateTime resolutions due to switching between jdk8 and jdk11, truncate both to be consistent: if (truncateMillis(record.updateTime) != truncateMillis(record.status.onDiskRecord.updateTime) && record.status.onDiskRecord.updateTime > 0) { + // handle the case where we have existing broken transaction file on disk, where the update time is + // based on the stats file. This is just for the first upgrade, patched versions never base the update + // time on the stats file. + LogRecord statsIncluded = LogRecord.make(record.type, existingFiles, existingFiles.size(), record.absolutePath(), true); + if (truncateMillis(statsIncluded.updateTime) == truncateMillis(record.updateTime)) + { + logger.warn("Found a legacy log record {} with updateTime based on the stats file, ignoring to allow startup to continue", record); + return; + } + record.setError(String.format("Unexpected files detected for sstable [%s]: " + "last update time [%tc] (%d) should have been [%tc] (%d)", record.fileName(), @@ -260,7 +278,6 @@ static void verifyRecord(LogRecord record, List existingFiles) record.status.onDiskRecord.updateTime, record.updateTime, record.updateTime)); - } } diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java index 513ad8746084..65ff470ac854 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java @@ -30,12 +30,19 @@ import java.util.stream.Collectors; import java.util.zip.CRC32; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.io.sstable.Component; +import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.utils.FBUtilities; +import static org.apache.cassandra.io.sstable.Descriptor.TMP_EXT; + /** * A decoded line in a transaction log file replica. * @@ -43,6 +50,10 @@ */ final class LogRecord { + private static final Logger logger = LoggerFactory.getLogger(LogRecord.class); + @VisibleForTesting + static boolean INCLUDE_STATS_FOR_TESTS = false; + public enum Type { UNKNOWN, // a record that cannot be parsed @@ -66,7 +77,10 @@ public boolean matches(LogRecord record) return this == record.type; } - public boolean isFinal() { return this == Type.COMMIT || this == Type.ABORT; } + public boolean isFinal() + { + return this == Type.COMMIT || this == Type.ABORT; + } } /** @@ -182,17 +196,66 @@ private static String absolutePath(String baseFilename) public LogRecord withExistingFiles(List existingFiles) { + if (!absolutePath.isPresent()) + throw new IllegalStateException(String.format("Cannot create record from existing files for type %s - file is not present", type)); + return make(type, existingFiles, 0, absolutePath.get()); } + /** + * We create a LogRecord based on the files on disk; there's some subtlety around how we handle stats files as the + * timestamp can be mutated by the async completion of compaction if things race with node shutdown. To work around this, + * we don't take the stats file timestamp into account when calculating nor using the timestamps for all the components + * as we build the LogRecord. + */ public static LogRecord make(Type type, List files, int minFiles, String absolutePath) { + return make(type, files, minFiles, absolutePath, INCLUDE_STATS_FOR_TESTS); + } + + /** + * In most cases we skip including the stats file timestamp entirely as it can be mutated during anticompaction + * and thus "invalidate" the LogRecord. There is an edge case where we have a LogRecord that was written w/the wrong + * timestamp (i.e. included a mutated stats file) and we need the node to come up, so we need to expose the selective + * ability to either include the stats file timestamp or not. + * + * See {@link LogFile#verifyRecord} + */ + static LogRecord make(Type type, List files, int minFiles, String absolutePath, boolean includeStatsFile) + { + List toVerify; + File statsFile = null; + if (!includeStatsFile && !files.isEmpty()) + { + toVerify = new ArrayList<>(files.size() - 1); + for (File f : files) + { + if (!f.getName().endsWith(TMP_EXT)) + { + Component component = Descriptor.fromFilenameWithComponent(f).right; + if (component == Component.STATS) + statsFile = f; + else + toVerify.add(f); + } + } + } + else + { + toVerify = files; + } // CASSANDRA-11889: File.lastModified() returns a positive value only if the file exists, therefore // we filter by positive values to only consider the files that still exists right now, in case things // changed on disk since getExistingFiles() was called - List positiveModifiedTimes = files.stream().map(File::lastModified).filter(lm -> lm > 0).collect(Collectors.toList()); + List positiveModifiedTimes = toVerify.stream().map(File::lastModified).filter(lm -> lm > 0).collect(Collectors.toList()); long lastModified = positiveModifiedTimes.stream().reduce(0L, Long::max); - return new LogRecord(type, absolutePath, lastModified, Math.max(minFiles, positiveModifiedTimes.size())); + + // We need to preserve the file count for the number of existing files found on disk even though we ignored the + // stats file during our timestamp calculation. If the stats file still exists, we add in the count of it as + // a separate validation assumption that it's one of the files considered valid in this LogRecord. + boolean addStatTS = statsFile != null && statsFile.exists(); + int positiveTSCount = addStatTS ? positiveModifiedTimes.size() + 1 : positiveModifiedTimes.size(); + return new LogRecord(type, absolutePath, lastModified, Math.max(minFiles, positiveTSCount)); } private LogRecord(Type type, long updateTime) diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java index a3c3837dc60b..43b9e1f77172 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java @@ -545,6 +545,8 @@ static boolean removeUnfinishedLeftovers(Map.Entry> entry) try(LogFile txn = LogFile.make(entry.getKey(), entry.getValue())) { logger.info("Verifying logfile transaction {}", txn); + // We don't check / include the stats file timestamp on LogRecord creation / verification as that might + // be modified by a race in compaction notification and then needlessly fail subsequent node starts. if (txn.verify()) { Throwable failure = txn.removeUnfinishedLeftovers(null); diff --git a/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java b/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java index 7d1cb39ae3f5..15578d14d504 100644 --- a/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java +++ b/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.db.lifecycle; import java.io.File; -import java.io.IOError; import java.io.IOException; import java.io.RandomAccessFile; import java.io.UncheckedIOException; @@ -39,6 +38,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; +import org.apache.cassandra.db.streaming.ComponentContext; import org.junit.BeforeClass; import org.junit.Test; @@ -71,6 +71,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -1263,10 +1264,172 @@ public void testGetTemporaryFilesThrowsIfCompletingAfterObsoletion() throws Thro logs.finish(); } + @Test + public void testStatsTSMatchOnStart() throws Throwable + { + ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE); + File dataFolder = new Directories(cfs.metadata()).getDirectoryForNewSSTables(); + SSTableReader sstable = sstable(dataFolder, cfs, 0, 128); + + try(LogTransaction log = new LogTransaction(OperationType.COMPACTION)) + { + assertNotNull(log); + log.trackNew(sstable); + + // Confirm we can remove leftovers when they match + LogTransaction.removeUnfinishedLeftovers(cfs.metadata()); + } + + File sFile = new File(sstable.descriptor.filenameFor(Component.STATS)); + assertFalse("Found STATS file but expected it to be cleaned up.", Files.exists(sFile.toPath())); + sstable.selfRef().release(); + } + + @Test + public void testStatsTSMatchDuringList() throws Throwable + { + ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE); + File dataFolder = new Directories(cfs.metadata()).getDirectoryForNewSSTables(); + SSTableReader sstable = sstable(dataFolder, cfs, 0, 128); + + try(LogTransaction log = new LogTransaction(OperationType.COMPACTION)) + { + assertNotNull(log); + log.trackNew(sstable); + + // Confirm we can successfully classify files when they match - this triggers the LogAwareFileLister verify + listFiles(dataFolder, Directories.OnTxnErr.THROW, Directories.FileType.FINAL); + } + sstable.selfRef().release(); + } + + @Test + public void testStatsTSMismatchDuringStart() throws Throwable + { + ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE); + File dataFolder = new Directories(cfs.metadata()).getDirectoryForNewSSTables(); + SSTableReader sstable = sstable(dataFolder, cfs, 0, 128); + + File sFile = new File(sstable.descriptor.filenameFor(Component.STATS)); + assertTrue("STATS file not created successfully in test setup", Files.exists(sFile.toPath())); + + // Confirm we can remove leftovers even if the STATS file doesn't match + try(LogTransaction log = new LogTransaction(OperationType.COMPACTION)) + { + assertNotNull(log); + + // Need to flag the transaction as having a REMOVE entry so it'll trigger the path to calculate stats on list + log.obsoleted(sstable); + + // Need to sleep for long enough to bypass the millisecond truncation logic due to jdk8 and jdk11 change + Thread.sleep(2000); + assertTrue("Failed to set mtime for STATS file to currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis())); + + // Confirm we have an mtime mismatch + File dFile = new File(sstable.descriptor.filenameFor(Component.DATA)); + assertNotEquals(sFile.lastModified(), dFile.lastModified()); + + // We need to add another LogRecord as we allow partial or incorrect entries as the last record... + log.trackNew(sstable(dataFolder, cfs, 2, 128)); + + assertTrue("STATS file gone before removeUnfinished...", Files.exists(sFile.toPath())); + // Confirm we can remove leftovers when the STATS file mismatches + log.prepareToCommit(); // commit so that obsolete sstable components will be removed. + log.commit(); + ComponentContext.create(sstable.descriptor); + assertTrue(LogTransaction.removeUnfinishedLeftovers(cfs.metadata())); + } + + sstable.selfRef().release(); + } + + @Test + public void testWrongTimestampInTxnFile() throws IOException, InterruptedException + { + ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE); + File dataFolder = new Directories(cfs.metadata()).getDirectoryForNewSSTables(); + SSTableReader sstable = sstable(dataFolder, cfs, 0, 128); + + File sFile = new File(sstable.descriptor.filenameFor(Component.STATS)); + assertTrue("STATS file not created successfully in test setup", Files.exists(sFile.toPath())); + + LogRecord.INCLUDE_STATS_FOR_TESTS = true; + + Thread.sleep(2000); + assertTrue("Failed to set mtime for STATS file to currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis())); + + // Confirm we can remove leftovers even if the STATS file doesn't match + try(LogTransaction log = new LogTransaction(OperationType.COMPACTION)) + { + assertNotNull(log); + // Need to flag the transaction as having a REMOVE entry so it'll trigger the path to calculate stats on list + log.obsoleted(sstable); + // Need to sleep for long enough to bypass the millisecond truncation logic due to jdk8 and jdk11 change + // Confirm we have an mtime mismatch + File dFile = new File(sstable.descriptor.filenameFor(Component.DATA)); + assertNotEquals(sFile.lastModified(), dFile.lastModified()); + + // We need to add another LogRecord as we allow partial or incorrect entries as the last record... + log.trackNew(sstable(dataFolder, cfs, 2, 128)); + + assertTrue("STATS file gone before removeUnfinished...", Files.exists(sFile.toPath())); + // Confirm we can remove leftovers when the STATS file mismatches + LogRecord.INCLUDE_STATS_FOR_TESTS = false; + assertTrue(LogTransaction.removeUnfinishedLeftovers(cfs.metadata())); + } + + sstable.selfRef().release(); + } + + /** + * We do not consider the stats file's ts for any cases at this point + */ + @Test + public void testStatsTSMismatchDuringList() throws Throwable + { + SSTableReader sstable = null; + try + { + ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE); + File dataFolder = new Directories(cfs.metadata()).getDirectoryForNewSSTables(); + sstable = sstable(dataFolder, cfs, 0, 128); + + File sFile = new File(sstable.descriptor.filenameFor(Component.STATS)); + assertTrue("STATS file not created successfully in test setup", Files.exists(sFile.toPath())); + + try(LogTransaction log = new LogTransaction(OperationType.COMPACTION)) + { + assertNotNull(log); + + // Need to flag the transaction as having a REMOVE entry so it'll trigger the path to calculate stats on list + log.obsoleted(sstable); + + // Need to sleep for long enough to bypass the millisecond truncation logic due to jdk8 and jdk11 change + Thread.sleep(2000); + assertTrue("Failed to set mtime for STATS file to currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis())); + + // Confirm we have an mtime mismatch + File dFile = new File(sstable.descriptor.filenameFor(Component.DATA)); + assertNotEquals(sFile.lastModified(), dFile.lastModified()); + + // We need to add another LogRecord as we allow partial or incorrect entries as the last record... + log.trackNew(sstable(dataFolder, cfs, 2, 128)); + + // Confirm we don't get a mismatch LogRecord error when the STATS file is different even on listFiles case + listFiles(dataFolder, Directories.OnTxnErr.THROW, Directories.FileType.FINAL); + } + } + finally + { + if (sstable != null) + sstable.selfRef().release(); + } + } + private static SSTableReader sstable(File dataFolder, ColumnFamilyStore cfs, int generation, int size) throws IOException { Descriptor descriptor = new Descriptor(dataFolder, cfs.keyspace.getName(), cfs.getTableName(), generation, SSTableFormat.Type.BIG); - Set components = ImmutableSet.of(Component.DATA, Component.PRIMARY_INDEX, Component.FILTER, Component.TOC); + Set components = ImmutableSet.of(Component.DATA, Component.PRIMARY_INDEX, Component.FILTER, Component.TOC, Component.STATS); for (Component component : components) { File file = new File(descriptor.filenameFor(component)); @@ -1360,12 +1523,12 @@ private static void assertFiles(Iterable existingFiles, Set tempor static Set getTemporaryFiles(File folder) { - return listFiles(folder, Directories.FileType.TEMPORARY); + return listFiles(folder, Directories.OnTxnErr.IGNORE, Directories.FileType.TEMPORARY); } static Set getFinalFiles(File folder) { - return listFiles(folder, Directories.FileType.FINAL); + return listFiles(folder, Directories.OnTxnErr.IGNORE, Directories.FileType.FINAL); } // Used by listFiles - this test is deliberately racing with files being @@ -1390,12 +1553,12 @@ private static Stream toCanonicalIgnoringNotFound(File file) } } - static Set listFiles(File folder, Directories.FileType... types) + static Set listFiles(File folder, Directories.OnTxnErr err, Directories.FileType... types) { Collection match = Arrays.asList(types); return new LogAwareFileLister(folder.toPath(), (file, type) -> match.contains(type), - Directories.OnTxnErr.IGNORE).list() + err).list() .stream() .flatMap(LogTransactionTest::toCanonicalIgnoringNotFound) .collect(Collectors.toSet()); From 08946652434edbce38a6395e71d4068898ea13fa Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Tue, 13 May 2025 11:19:56 +0200 Subject: [PATCH 312/340] Ninja: remove .orig file from repository --- .../SimulatedAccordCommandStore.java.orig | 419 ------------------ 1 file changed, 419 deletions(-) delete mode 100644 test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java.orig diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java.orig b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java.orig deleted file mode 100644 index f48156a3b306..000000000000 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java.orig +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.service.accord; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.function.BooleanSupplier; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.function.ToLongFunction; - -import accord.api.LocalListeners; -import accord.api.ProgressLog; -import accord.api.RemoteListeners; -import accord.api.RoutingKey; -import accord.impl.DefaultLocalListeners; -import accord.impl.SizeOfIntersectionSorter; -import accord.impl.TestAgent; -import accord.local.Command; -import accord.local.CommandStore; -import accord.local.CommandStores; -import accord.local.DurableBefore; -import accord.local.Node; -import accord.local.NodeCommandStoreService; -import accord.local.TimeService; -import accord.local.PreLoadContext; -import accord.local.SafeCommand; -import accord.local.SafeCommandStore; -import accord.messages.BeginRecovery; -import accord.messages.PreAccept; -import accord.messages.TxnRequest; -import accord.primitives.AbstractUnseekableKeys; -import accord.primitives.Ballot; -import accord.primitives.FullRoute; -import accord.primitives.Ranges; -import accord.primitives.Routable; -import accord.primitives.RoutableKey; -import accord.primitives.RoutingKeys; -import accord.primitives.Timestamp; -import accord.primitives.Txn; -import accord.primitives.TxnId; -import accord.primitives.Unseekables; -import accord.topology.Topologies; -import accord.topology.Topology; -import accord.utils.Gens; -import accord.utils.RandomSource; -import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; -import org.apache.cassandra.concurrent.ExecutorFactory; -import org.apache.cassandra.concurrent.ScheduledExecutorPlus; -import org.apache.cassandra.concurrent.SimulatedExecutorFactory; -import org.apache.cassandra.concurrent.Stage; -import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.DataRange; -import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.db.compaction.CompactionManager; -import org.apache.cassandra.db.filter.ColumnFilter; -import org.apache.cassandra.db.memtable.Memtable; -import org.apache.cassandra.metrics.AccordCacheMetrics; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.Generators; -import org.apache.cassandra.utils.Pair; -import org.assertj.core.api.Assertions; - -import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS; -import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; -import static org.apache.cassandra.utils.AccordGenerators.fromQT; - -public class SimulatedAccordCommandStore implements AutoCloseable -{ - private final List failures = new ArrayList<>(); - private final SimulatedExecutorFactory globalExecutor; - private final CommandStore.EpochUpdateHolder updateHolder; - private final BooleanSupplier shouldEvict, shouldFlush, shouldCompact; - - public final NodeCommandStoreService storeService; - public final AccordCommandStore store; - public final Node.Id nodeId; - public final Topology topology; - public final Topologies topologies; - public final MockJournal journal; - public final ScheduledExecutorPlus unorderedScheduled; - public final List evictions = new ArrayList<>(); - public Predicate ignoreExceptions = ignore -> false; - - public SimulatedAccordCommandStore(RandomSource rs) - { - globalExecutor = new SimulatedExecutorFactory(rs.fork(), fromQT(Generators.TIMESTAMP_GEN.map(java.sql.Timestamp::getTime)).mapToLong(TimeUnit.MILLISECONDS::toNanos).next(rs), failures::add); - this.unorderedScheduled = globalExecutor.scheduled("ignored"); - ExecutorFactory.Global.unsafeSet(globalExecutor); - Stage.READ.unsafeSetExecutor(unorderedScheduled); - Stage.MUTATION.unsafeSetExecutor(unorderedScheduled); - for (Stage stage : Arrays.asList(Stage.MISC, Stage.ACCORD_MIGRATION, Stage.READ, Stage.MUTATION)) - stage.unsafeSetExecutor(globalExecutor.configureSequential("ignore").build()); - - this.updateHolder = new CommandStore.EpochUpdateHolder(); - this.nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.currentNullable().myNodeId()); - this.storeService = new NodeCommandStoreService() - { - private final ToLongFunction elapsed = TimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.NANOSECONDS, this::now); - - @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } - - @Override - public Timestamp uniqueNow() - { - return uniqueNow(Timestamp.NONE); - } - - @Override - public Node.Id id() - { - return nodeId; - } - - @Override - public long epoch() - { - return ClusterMetadata.current().epoch.getEpoch(); - } - - @Override - public long now() - { - return globalExecutor.nanoTime(); - } - - @Override - public long elapsed(TimeUnit unit) - { - return elapsed.applyAsLong(unit); - } - - @Override - public Timestamp uniqueNow(Timestamp atLeast) - { - var now = Timestamp.fromValues(epoch(), now(), nodeId); - if (now.compareTo(atLeast) < 0) - throw new UnsupportedOperationException(); - return now; - } - }; - - AccordStateCache stateCache = new AccordStateCache(Stage.READ.executor(), Stage.MUTATION.executor(), 8 << 20, new AccordStateCacheMetrics("test")); - this.journal = new MockJournal(); - this.store = new AccordCommandStore(0, - storeService, - new TestAgent.RethrowAgent() - { - @Override - public long preAcceptTimeout() - { - return Long.MAX_VALUE; - } - - @Override - public void onUncaughtException(Throwable t) - { - if (ignoreExceptions.test(t)) return; - super.onUncaughtException(t); - } - }, - null, - ignore -> new ProgressLog.NoOpProgressLog(), - cs -> new DefaultLocalListeners(new RemoteListeners.NoOpRemoteListeners(), new DefaultLocalListeners.NotifySink() - { - @Override public void notify(SafeCommandStore safeStore, SafeCommand safeCommand, TxnId listener) {} - @Override public boolean notify(SafeCommandStore safeStore, SafeCommand safeCommand, LocalListeners.ComplexListener listener) { return false; } - }), - updateHolder, - journal, -<<<<<<< HEAD - new AccordCommandStore.CommandStoreExecutor(stateCache, executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + 0 + ']'), Thread.currentThread().getId())); -======= - new AccordCommandStoreExecutor(new AccordStateCacheMetrics("test"), executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + 0 + ']'), agent)); - - this.topology = AccordTopology.createAccordTopology(ClusterMetadata.current()); - this.topologies = new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology); - var rangesForEpoch = new CommandStores.RangesForEpoch(topology.epoch(), topology.ranges(), store); - //store.unsafeSetRangesForEpoch(rangesForEpoch); - updateHolder.add(topology.epoch(), rangesForEpoch, topology.ranges()); - updateHolder.updateGlobal(topology.ranges()); - - shouldEvict = boolSource(rs.fork()); - shouldFlush = boolSource(rs.fork()); - shouldCompact = boolSource(rs.fork()); ->>>>>>> 04671b52ef (Set ranges for epoch in AccordCommandStore via super call, not by fixing up Simulated store) - - store.cache().instances().forEach(i -> { - i.register(new AccordStateCache.Listener() - { - @Override - public void onAdd(AccordCachingState state) - { - } - - @Override - public void onRelease(AccordCachingState state) - { - } - - @Override - public void onEvict(AccordCachingState state) - { - evictions.add(i + " evicted " + state); - } - }); - }); - - this.topology = AccordTopology.createAccordTopology(ClusterMetadata.current()); - this.topologies = new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, topology); - var rangesForEpoch = new CommandStores.RangesForEpoch(topology.epoch(), topology.ranges(), store); - updateHolder.add(topology.epoch(), rangesForEpoch, topology.ranges()); - updateHolder.updateGlobal(topology.ranges()); - - shouldEvict = boolSource(rs.fork()); - shouldFlush = boolSource(rs.fork()); - shouldCompact = boolSource(rs.fork()); - } - - private static BooleanSupplier boolSource(RandomSource rs) - { - var gen = Gens.bools().mixedDistribution().next(rs); - return () -> gen.next(rs); - } - - public TxnId nextTxnId(Txn.Kind kind, Routable.Domain domain) - { - return new TxnId(storeService.epoch(), storeService.now(), kind, domain, nodeId); - } - - public void maybeCacheEvict(Unseekables keysOrRanges) - { - switch (keysOrRanges.domain()) - { - case Key: - maybeCacheEvict((AbstractUnseekableKeys) keysOrRanges, Ranges.EMPTY); - break; - case Range: - maybeCacheEvict(RoutingKeys.EMPTY, (Ranges) keysOrRanges); - break; - default: - throw new UnsupportedOperationException("Unknown domain: " + keysOrRanges.domain()); - } - } - - public void maybeCacheEvict(Unseekables keys, Ranges ranges) - { - AccordStateCache cache = store.cache(); - cache.forEach(state -> { - Class keyType = state.key().getClass(); - if (TxnId.class.equals(keyType)) - { - Command command = (Command) state.state().get(); - if (command != null && command.known().definition.isKnown() - && (command.partialTxn().keys().intersects(keys) || ranges.intersects(command.partialTxn().keys())) - && shouldEvict.getAsBoolean()) - cache.maybeEvict(state); - } - else if (RoutableKey.class.isAssignableFrom(keyType)) - { - RoutableKey key = (RoutableKey) state.key(); - if ((keys.contains(key) || ranges.intersects(key)) - && shouldEvict.getAsBoolean()) - cache.maybeEvict(state); - } - else - { - throw new AssertionError("Unexpected key type: " + state.key().getClass()); - } - }); - - for (var store : Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) - { - Memtable memtable = store.getCurrentMemtable(); - if (memtable.partitionCount() == 0 || !intersects(store, memtable, keys, ranges)) - continue; - if (shouldFlush.getAsBoolean()) - store.forceBlockingFlush(UNIT_TESTS); - } - for (var store : Keyspace.open(ACCORD_KEYSPACE_NAME).getColumnFamilyStores()) - { - if (store.getLiveSSTables().size() > 5 && shouldCompact.getAsBoolean()) - { - // compaction no-op since auto-compaction is disabled... so need to enable quickly - store.enableAutoCompaction(); - try - { - FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(store)); - } - finally - { - store.disableAutoCompaction(); - } - } - } - } - - private static boolean intersects(ColumnFamilyStore store, Memtable memtable, Unseekables keys, Ranges ranges) - { - if (keys.isEmpty() && ranges.isEmpty()) // shouldn't happen, but just in case... - return false; - switch (store.name) - { - case "commands_for_key": - // pk = (store_id, routing_key) - // since this is simulating a single store, store_id is a constant, so check key - try (var it = memtable.partitionIterator(ColumnFilter.NONE, DataRange.allData(store.getPartitioner()), null)) - { - while (it.hasNext()) - { - var key = AccordKeyspace.CommandsForKeysAccessor.getKey(it.next().partitionKey()); - if (keys.contains(key) || ranges.intersects(key)) - return true; - } - } - break; - } - return false; - } - - public void checkFailures() - { - if (Thread.interrupted()) - failures.add(new InterruptedException()); - if (failures.isEmpty()) return; - AssertionError error = new AssertionError("Unexpected exceptions found"); - failures.forEach(error::addSuppressed); - failures.clear(); - throw error; - } - - public T process(TxnRequest request) throws ExecutionException, InterruptedException - { - return process(request, request::apply); - } - - public T process(PreLoadContext loadCtx, Function function) throws ExecutionException, InterruptedException - { - var result = processAsync(loadCtx, function); - processAll(); - return AsyncChains.getBlocking(result); - } - - public AsyncResult processAsync(TxnRequest request) - { - return processAsync(request, request::apply); - } - - public AsyncResult processAsync(PreLoadContext loadCtx, Function function) - { - return store.submit(loadCtx, function).beginAsResult(); - } - - public Pair> enqueuePreAccept(Txn txn, FullRoute route) - { - TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); - PreAccept preAccept = new PreAccept(nodeId, topologies, txnId, txn, route); - return Pair.create(txnId, processAsync(preAccept, safe -> { - var reply = preAccept.apply(safe); - Assertions.assertThat(reply.isOk()).isTrue(); - return (PreAccept.PreAcceptOk) reply; - })); - } - - public Pair> enqueueBeginRecovery(Txn txn, FullRoute route) - { - TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); - Ballot ballot = Ballot.fromValues(storeService.epoch(), storeService.now(), nodeId); - BeginRecovery br = new BeginRecovery(nodeId, topologies, txnId, null, txn, route, ballot); - - return Pair.create(txnId, processAsync(br, safe -> { - var reply = br.apply(safe); - Assertions.assertThat(reply.isOk()).isTrue(); - return (BeginRecovery.RecoverOk) reply; - }).beginAsResult()); - } - - public void processAll() - { - while (processOne()) - { - } - } - - private boolean processOne() - { - boolean result = globalExecutor.processOne(); - checkFailures(); - return result; - } - - @Override - public void close() throws Exception - { - store.shutdown(); - } -} From 82fc35b0136cc5f706032759d73ac5ae02c20871 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Thu, 15 May 2025 10:52:53 -0500 Subject: [PATCH 313/340] Prepare debian changelog for 4.1.9 --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 96166a4dd162..6133e726ba36 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -cassandra (4.1.9) UNRELEASED; urgency=medium +cassandra (4.1.9) unstable; urgency=medium * New release - -- Stefan Miklosovic Tue, 04 Feb 2025 09:43:30 +0100 + -- Brandon Williams Thu, 15 May 2025 10:52:43 -0500 cassandra (4.1.8) unstable; urgency=medium From 6cfea18ddaa23022a919d3251f161151be81ebf0 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Thu, 27 Mar 2025 14:31:54 +0100 Subject: [PATCH 314/340] Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair patch by Stefan Miklosovic; reviewed by Francisco Guerrero for CASSANDRA-20490 --- CHANGES.txt | 1 + .../repair/CassandraTableRepairManager.java | 4 +- .../io/sstable/format/SSTableReader.java | 31 +++- .../apache/cassandra/io/util/FileUtils.java | 14 +- .../snapshot/SnapshotDetailsTabularData.java | 5 +- .../service/snapshot/SnapshotManager.java | 31 +++- .../service/snapshot/SnapshotOptions.java | 34 ++-- .../service/snapshot/TableSnapshot.java | 13 +- .../service/snapshot/TakeSnapshotTask.java | 32 +++- .../test/EphemeralSnapshotTest.java | 167 ++++++++++++++++-- .../distributed/test/SnapshotsTest.java | 90 +++++++++- .../service/ActiveRepairServiceTest.java | 25 ++- 12 files changed, 395 insertions(+), 52 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 0fc414363cc5..a030675ce9e4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair (CASSANDRA-20490) * When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler (CASSANDRA-20614) * Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563) * Fix a bug in AutoRepair duration metric calculation if schedule finishes quickly (CASSANDRA-20622) diff --git a/src/java/org/apache/cassandra/db/repair/CassandraTableRepairManager.java b/src/java/org/apache/cassandra/db/repair/CassandraTableRepairManager.java index 3b6535752e6f..bf5ab5a0e1dc 100644 --- a/src/java/org/apache/cassandra/db/repair/CassandraTableRepairManager.java +++ b/src/java/org/apache/cassandra/db/repair/CassandraTableRepairManager.java @@ -87,7 +87,9 @@ public synchronized void snapshot(String name, Collection> ranges, !sstable.metadata().isIndex() && // exclude SSTables from 2i new Bounds<>(sstable.getFirst().getToken(), sstable.getLast().getToken()).intersects(ranges); - SnapshotOptions options = SnapshotOptions.systemSnapshot(name, SnapshotType.REPAIR, predicate, cfs.getKeyspaceTableName()).ephemeral().build(); + SnapshotOptions options = SnapshotOptions.systemSnapshot(name, SnapshotType.REPAIR, predicate, cfs.getKeyspaceTableName()) + .ephemeral() + .build(); SnapshotManager.instance.takeSnapshot(options); } }).get(); diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java index 3e011f831415..1055070cd8d9 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java @@ -92,6 +92,7 @@ import org.apache.cassandra.io.util.FileDataInput; import org.apache.cassandra.io.util.FileHandle; import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.io.util.FileUtils.DuplicateHardlinkException; import org.apache.cassandra.io.util.RandomAccessReader; import org.apache.cassandra.metrics.RestorableMeter; import org.apache.cassandra.schema.SchemaConstants; @@ -1127,15 +1128,29 @@ public void createLinks(String snapshotDirectoryPath) public void createLinks(String snapshotDirectoryPath, RateLimiter rateLimiter) { - createLinks(descriptor, components, snapshotDirectoryPath, rateLimiter); + createLinks(snapshotDirectoryPath, rateLimiter, false); + } + + public void createLinks(String snapshotDirectoryPath, RateLimiter rateLimiter, boolean ephemeralSnapshot) + { + createLinks(descriptor, components, snapshotDirectoryPath, rateLimiter, ephemeralSnapshot); } public static void createLinks(Descriptor descriptor, Set components, String snapshotDirectoryPath) { - createLinks(descriptor, components, snapshotDirectoryPath, null); + createLinks(descriptor, components, snapshotDirectoryPath, null, false); } - public static void createLinks(Descriptor descriptor, Set components, String snapshotDirectoryPath, RateLimiter limiter) + /** + * Create hardlinks for given set of components + * + * @param descriptor descriptor to use + * @param components components to create links for + * @param snapshotDirectoryPath directory path for snapshot + * @param limiter rate limiter to use + * @param force if true, if target link file exists, do not fail, otherwise throw RTE + */ + public static void createLinks(Descriptor descriptor, Set components, String snapshotDirectoryPath, RateLimiter limiter, boolean force) { for (Component component : components) { @@ -1145,7 +1160,15 @@ public static void createLinks(Descriptor descriptor, Set components, if (null != limiter) limiter.acquire(); File targetLink = new File(snapshotDirectoryPath, sourceFile.name()); - FileUtils.createHardLink(sourceFile, targetLink); + try + { + FileUtils.createHardLink(sourceFile, targetLink); + } + catch (DuplicateHardlinkException ex) + { + if (!force) + throw new RuntimeException(ex.getMessage()); + } } } diff --git a/src/java/org/apache/cassandra/io/util/FileUtils.java b/src/java/org/apache/cassandra/io/util/FileUtils.java index d5ea8dcde1be..c8c605d7da4b 100644 --- a/src/java/org/apache/cassandra/io/util/FileUtils.java +++ b/src/java/org/apache/cassandra/io/util/FileUtils.java @@ -166,7 +166,7 @@ public static void createHardLink(String from, String to) public static void createHardLink(File from, File to) { if (to.exists()) - throw new RuntimeException("Tried to create duplicate hard link to " + to); + throw new DuplicateHardlinkException("Tried to create duplicate hard link from " + from + " to " + to); if (!from.exists()) throw new RuntimeException("Tried to hard link to file that does not exist " + from); @@ -195,6 +195,10 @@ public static void createHardLinkWithConfirm(File from, File to) { throw ex; } + catch (DuplicateHardlinkException ex) + { + throw new RuntimeException(ex.getMessage()); + } catch (Throwable t) { throw new RuntimeException(String.format("Unable to hardlink from %s to %s", from, to), t); @@ -818,4 +822,12 @@ public static int getBlockSize(File directory) f.tryDelete(); } } + + public static class DuplicateHardlinkException extends RuntimeException + { + public DuplicateHardlinkException(String message) + { + super(message); + } + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/snapshot/SnapshotDetailsTabularData.java b/src/java/org/apache/cassandra/service/snapshot/SnapshotDetailsTabularData.java index 3d9609f2e740..7b7635b0a45e 100644 --- a/src/java/org/apache/cassandra/service/snapshot/SnapshotDetailsTabularData.java +++ b/src/java/org/apache/cassandra/service/snapshot/SnapshotDetailsTabularData.java @@ -81,7 +81,10 @@ public static void from(TableSnapshot details, TabularDataSupport result, Set T executeTask(AbstractSnapshotTask task) private synchronized void prePopulateSnapshots(TakeSnapshotTask task) { Map snapshotsToCreate = task.getSnapshotsToCreate(); - for (Map.Entry toCreateEntry : snapshotsToCreate.entrySet()) + Map snapshotsToOverwrite = new HashMap<>(); + List toCreate = new ArrayList<>(snapshotsToCreate.values()); + + for (TableSnapshot existingSnapshot : snapshots) { - if (snapshots.contains(toCreateEntry.getValue())) + for (Map.Entry toCreateEntry : snapshotsToCreate.entrySet()) { - throw new RuntimeException(format("Snapshot %s for %s.%s already exists.", - toCreateEntry.getValue().getTag(), - toCreateEntry.getValue().getKeyspaceName(), - toCreateEntry.getValue().getTableName())); + TableSnapshot snapshotToCreate = toCreateEntry.getValue(); + if (existingSnapshot.equals(toCreateEntry.getValue())) + { + if (!task.options.ephemeral) + { + throw new RuntimeException(format("Snapshot %s for %s.%s already exists.", + snapshotToCreate.getTag(), + snapshotToCreate.getKeyspaceName(), + snapshotToCreate.getTableName())); + } + + toCreate.remove(toCreateEntry.getValue()); + snapshotsToOverwrite.put(toCreateEntry.getKey(), existingSnapshot); + } } } - snapshots.addAll(snapshotsToCreate.values()); + snapshotsToCreate.putAll(snapshotsToOverwrite); + + snapshots.addAll(toCreate); } private static ScheduledExecutorPlus createSnapshotCleanupExecutor() diff --git a/src/java/org/apache/cassandra/service/snapshot/SnapshotOptions.java b/src/java/org/apache/cassandra/service/snapshot/SnapshotOptions.java index 409288f8bf17..8f487d24533d 100644 --- a/src/java/org/apache/cassandra/service/snapshot/SnapshotOptions.java +++ b/src/java/org/apache/cassandra/service/snapshot/SnapshotOptions.java @@ -50,27 +50,18 @@ public class SnapshotOptions public final Predicate sstableFilter; public final ColumnFamilyStore cfs; - private SnapshotOptions(SnapshotType type, - String tag, - DurationSpec.IntSecondsBound ttl, - Instant creationTime, - boolean skipFlush, - boolean ephemeral, - String[] entities, - RateLimiter rateLimiter, - Predicate sstableFilter, - ColumnFamilyStore cfs) + private SnapshotOptions(Builder builder) { - this.type = type; - this.tag = tag; - this.ttl = ttl; - this.creationTime = creationTime; - this.skipFlush = skipFlush; - this.ephemeral = ephemeral; - this.entities = entities; - this.rateLimiter = rateLimiter; - this.sstableFilter = sstableFilter; - this.cfs = cfs; + this.type = builder.type; + this.tag = builder.tag; + this.ttl = builder.ttl; + this.creationTime = builder.creationTime; + this.skipFlush = builder.skipFlush; + this.ephemeral = builder.ephemeral; + this.entities = builder.entities; + this.rateLimiter = builder.rateLimiter; + this.sstableFilter = builder.sstableFilter; + this.cfs = builder.cfs; } public static Builder systemSnapshot(String tag, SnapshotType type, String... entities) @@ -214,8 +205,7 @@ public SnapshotOptions build() if (rateLimiter == null) rateLimiter = DatabaseDescriptor.getSnapshotRateLimiter(); - return new SnapshotOptions(type, tag, ttl, creationTime, skipFlush, ephemeral, entities, rateLimiter, - sstableFilter, cfs); + return new SnapshotOptions(this); } } diff --git a/src/java/org/apache/cassandra/service/snapshot/TableSnapshot.java b/src/java/org/apache/cassandra/service/snapshot/TableSnapshot.java index 7698ea1e3da2..a23aa7a2b165 100644 --- a/src/java/org/apache/cassandra/service/snapshot/TableSnapshot.java +++ b/src/java/org/apache/cassandra/service/snapshot/TableSnapshot.java @@ -207,10 +207,21 @@ public boolean isExpiring() } public long computeSizeOnDiskBytes() + { + return computeSizeOnDiskBytes(false); + } + + /** + * + * @param refresh true if a caller wants to recompute otherwise cached size + * @return on disk bytes + */ + public long computeSizeOnDiskBytes(boolean refresh) { long sum = sizeOnDisk; - if (sum == 0) + if (sum == 0 || refresh) { + sum = 0; for (File snapshotDir : snapshotDirs) sum += FileUtils.folderSize(snapshotDir); diff --git a/src/java/org/apache/cassandra/service/snapshot/TakeSnapshotTask.java b/src/java/org/apache/cassandra/service/snapshot/TakeSnapshotTask.java index a0d81a685ccc..236a3ab783aa 100644 --- a/src/java/org/apache/cassandra/service/snapshot/TakeSnapshotTask.java +++ b/src/java/org/apache/cassandra/service/snapshot/TakeSnapshotTask.java @@ -163,7 +163,7 @@ private void createSnapshot(ColumnFamilyStore cfs, TableSnapshot snapshotToCreat for (SSTableReader ssTable : currentView.sstables) { File snapshotDirectory = Directories.getSnapshotDirectory(ssTable.descriptor, snapshotName); - ssTable.createLinks(snapshotDirectory.path(), options.rateLimiter); // hard links + ssTable.createLinks(snapshotDirectory.path(), options.rateLimiter, options.ephemeral); // hard links if (logger.isTraceEnabled()) logger.trace("Snapshot for {} keyspace data file {} created in {}", cfs.keyspace, ssTable.getFilename(), snapshotDirectory); sstables.add(ssTable); @@ -268,12 +268,40 @@ else if (entities != null) } + private SnapshotManifest createSnapshotManifest(SnapshotManifest manifest, File manifestFile) + { + SnapshotManifest oldManifest = null; + if (manifestFile.exists()) + { + try + { + oldManifest = SnapshotManifest.deserializeFromJsonFile(manifestFile); + } + catch (Throwable t) + { + logger.warn("Unable to read the content of old manifest {}", manifestFile); + } + } + + if (oldManifest != null) + { + Set deduplicates = new HashSet<>(); // set to deduplicate + deduplicates.addAll(oldManifest.getFiles()); + deduplicates.addAll(manifest.files); + + return new SnapshotManifest(new ArrayList<>(deduplicates), options.ttl, creationTime, options.ephemeral); + } + + return manifest; + } + private void writeSnapshotManifest(SnapshotManifest manifest, File manifestFile) { try { + SnapshotManifest toCreate = createSnapshotManifest(manifest, manifestFile); manifestFile.parent().tryCreateDirectories(); - manifest.serializeToJsonFile(manifestFile); + toCreate.serializeToJsonFile(manifestFile); } catch (IOException e) { diff --git a/test/distributed/org/apache/cassandra/distributed/test/EphemeralSnapshotTest.java b/test/distributed/org/apache/cassandra/distributed/test/EphemeralSnapshotTest.java index f4d423cf23f8..4a4d9ccf8da5 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/EphemeralSnapshotTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/EphemeralSnapshotTest.java @@ -18,13 +18,22 @@ package org.apache.cassandra.distributed.test; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import javax.management.openmbean.CompositeDataSupport; +import javax.management.openmbean.TabularData; +import javax.management.openmbean.TabularDataSupport; import com.google.common.util.concurrent.Futures; import org.junit.Test; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.IInvokableInstance; @@ -32,6 +41,8 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.service.snapshot.SnapshotManifest; +import org.apache.cassandra.service.snapshot.SnapshotOptions; +import org.apache.cassandra.service.snapshot.SnapshotType; import org.apache.cassandra.utils.Pair; import static java.lang.String.format; @@ -40,6 +51,8 @@ import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NATIVE_PROTOCOL; import static org.apache.cassandra.distributed.api.Feature.NETWORK; +import static org.apache.cassandra.schema.SchemaConstants.LOCAL_SYSTEM_KEYSPACE_NAMES; +import static org.apache.cassandra.schema.SchemaConstants.REPLICATED_SYSTEM_KEYSPACE_NAMES; import static org.awaitility.Awaitility.await; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -58,7 +71,7 @@ public void testStartupRemovesEphemeralSnapshotOnEphemeralFlagInManifest() throw .withConfig(config -> config.with(GOSSIP, NETWORK, NATIVE_PROTOCOL)) .start())) { - Pair initialisationData = initialise(c); + Pair initialisationData = initialise(c, tableName); rewriteManifestToEphemeral(initialisationData.left, initialisationData.right); @@ -76,7 +89,7 @@ public void testStartupRemovesEphemeralSnapshotOnMarkerFile() throws Exception .withConfig(config -> config.with(GOSSIP, NETWORK, NATIVE_PROTOCOL)) .start())) { - Pair initialisationData = initialise(c); + Pair initialisationData = initialise(c, tableName); String tableId = initialisationData.left; String[] dataDirs = initialisationData.right; @@ -106,7 +119,7 @@ public void testEphemeralSnapshotIsNotClearableFromNodetool() throws Exception { IInvokableInstance instance = c.get(1); - Pair initialisationData = initialise(c); + Pair initialisationData = initialise(c, tableName); rewriteManifestToEphemeral(initialisationData.left, initialisationData.right); c.get(1).runOnInstance((IIsolatedExecutor.SerializableRunnable) () -> SnapshotManager.instance.restart(true)); @@ -130,7 +143,7 @@ public void testClearingAllSnapshotsFromNodetoolWillKeepEphemeralSnaphotsIntact( { IInvokableInstance instance = c.get(1); - Pair initialisationData = initialise(c); + Pair initialisationData = initialise(c, tableName); rewriteManifestToEphemeral(initialisationData.left, initialisationData.right); @@ -142,13 +155,104 @@ public void testClearingAllSnapshotsFromNodetoolWillKeepEphemeralSnaphotsIntact( } } - private Pair initialise(Cluster c) + /** + * @see CASSANDRA-20490 + */ + @Test + public void testForceEphemeralSnapshotWhenAlreadyExists() throws Exception + { + try (Cluster c = init(builder().withNodes(1) + .withConfig(config -> config.with(GOSSIP, NETWORK, NATIVE_PROTOCOL)) + .start())) + { + IInvokableInstance instance = c.get(1); + + c.schemaChange(withKeyspace("CREATE TABLE IF NOT EXISTS %s." + tableName + " (cityid int PRIMARY KEY, name text)")); + c.coordinator(1).execute(withKeyspace("INSERT INTO %s." + tableName + "(cityid, name) VALUES (1, 'Canberra');"), ONE); + + instance.flush(KEYSPACE); + + takeEphemeralSnapshotForcibly(c, KEYSPACE, tableName, snapshotName); + assertTrue(instance.nodetoolResult("listsnapshots", "-e").getStdout().contains(snapshotName)); + float firstSnapshotSize = getSnapshotSizeOnDisk(c, KEYSPACE, tableName, snapshotName); + + SnapshotManifest snapshotManifest = SnapshotManifest.deserializeFromJsonFile(new File(findManifest(getDataDirs(c), getTableId(c, KEYSPACE, tableName)))); + assertEquals(1, snapshotManifest.getFiles().size()); + + // list sstables + List snapshotFilesAfterFirstSnapshot = getSnapshotFiles(c, snapshotName); + assertFalse(snapshotFilesAfterFirstSnapshot.isEmpty()); + + // add more data + insertData(c, tableName); + + takeEphemeralSnapshotForcibly(c, KEYSPACE, tableName, snapshotName); + assertTrue(instance.nodetoolResult("listsnapshots", "-e").getStdout().contains(snapshotName)); + SnapshotManifest secondSnapshotManifest = SnapshotManifest.deserializeFromJsonFile(new File(findManifest(getDataDirs(c), getTableId(c, KEYSPACE, tableName)))); + assertEquals(2, secondSnapshotManifest.getFiles().size()); + + List snapshotFilesAfterSecondSnapshot = getSnapshotFiles(c, snapshotName); + assertFalse(snapshotFilesAfterSecondSnapshot.isEmpty()); + + // list again and check it is superset of previous listing + assertTrue(snapshotFilesAfterSecondSnapshot.size() > snapshotFilesAfterFirstSnapshot.size()); + assertTrue(snapshotFilesAfterSecondSnapshot.containsAll(snapshotFilesAfterFirstSnapshot)); + assertTrue(secondSnapshotManifest.getFiles().containsAll(snapshotManifest.getFiles())); + + float secondSnapshotSize = getSnapshotSizeOnDisk(c, KEYSPACE, tableName, snapshotName); + + assertTrue(secondSnapshotSize > firstSnapshotSize); + } + } + + private Float getSnapshotSizeOnDisk(Cluster c, String keyspace, String table, String snapshotName) + { + return c.get(1).applyOnInstance((IIsolatedExecutor.SerializableTriFunction) (ks, tb, name) -> { + + Map stringTabularDataMap = SnapshotManager.instance.listSnapshots(Map.of("include_ephemeral", "true")); + + TabularDataSupport tabularData = (TabularDataSupport) stringTabularDataMap.get(name); + for (Object value : tabularData.values()) + { + CompositeDataSupport cds = (CompositeDataSupport) value; + return Float.parseFloat(((String) cds.get("Size on disk")).split(" ")[0]); + } + + return 0F; + }, keyspace, table, snapshotName); + } + + private void takeEphemeralSnapshotForcibly(Cluster c, String keyspace, String table, String snapshotName) + { + c.get(1).applyOnInstance((IIsolatedExecutor.SerializableTriFunction) (ks, tb, name) -> + { + ColumnFamilyStore cfs = Keyspace.getValidKeyspace(ks).getColumnFamilyStore(tb); + try + { + SnapshotManager.instance.takeSnapshot(SnapshotOptions.systemSnapshot(name, SnapshotType.REPAIR, (sstable) -> true, cfs.getKeyspaceTableName()) + .ephemeral() + .build()); + } + catch (Throwable t) + { + throw new RuntimeException(t.getMessage()); + } + return null; + }, keyspace, table, snapshotName); + } + + private void insertData(Cluster c, String tableName) { c.schemaChange(withKeyspace("CREATE TABLE IF NOT EXISTS %s." + tableName + " (cityid int PRIMARY KEY, name text)")); c.coordinator(1).execute(withKeyspace("INSERT INTO %s." + tableName + "(cityid, name) VALUES (1, 'Canberra');"), ONE); IInvokableInstance instance = c.get(1); - instance.flush(KEYSPACE); + } + + private Pair initialise(Cluster c, String tableName) + { + insertData(c, tableName); + IInvokableInstance instance = c.get(1); assertEquals(0, instance.nodetool("snapshot", "-kt", withKeyspace("%s." + tableName), "-t", snapshotName)); waitForSnapshot(instance, snapshotName); @@ -158,15 +262,17 @@ private Pair initialise(Cluster c) assertEquals(0, instance.nodetool("snapshot", "-kt", withKeyspace("%s." + tableName), "-t", snapshotName2)); waitForSnapshot(instance, snapshotName2); - String tableId = instance.callOnInstance((IIsolatedExecutor.SerializableCallable) () -> { - return Keyspace.open(KEYSPACE).getMetadata().tables.get(tableName).get().id.asUUID().toString().replaceAll("-", ""); - }); + String tableId = getTableId(c, KEYSPACE, tableName); - String[] dataDirs = (String[]) instance.config().get("data_file_directories"); + String[] dataDirs = getDataDirs(c); return Pair.create(tableId, dataDirs); } + private String[] getDataDirs(Cluster c) + { + return (String[]) c.get(1).config().get("data_file_directories"); + } private void verify(IInvokableInstance instance) { @@ -220,4 +326,45 @@ private Path findManifest(String[] dataDirs, String tableId) throw new IllegalStateException("Unable to find manifest!"); } + + private List getSnapshotFiles(Cluster cluster, String snapshotName) + { + return cluster.get(1).applyOnInstance((IIsolatedExecutor.SerializableFunction>) (name) -> { + List result = new ArrayList<>(); + + for (Keyspace keyspace : Keyspace.all()) + { + if (LOCAL_SYSTEM_KEYSPACE_NAMES.contains(keyspace.getName()) || REPLICATED_SYSTEM_KEYSPACE_NAMES.contains(keyspace.getName())) + continue; + + for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores()) + { + for (String dataDir : DatabaseDescriptor.getAllDataFileLocations()) + { + File snapshotDir = new File(dataDir, format("%s/%s-%s/snapshots/%s", keyspace.getName(), cfs.name, cfs.metadata().id.toHexString(), name)); + if (snapshotDir.exists()) + { + try + { + Files.list(snapshotDir.toPath()).forEach(p -> result.add(p.toString())); + } + catch (IOException e) + { + throw new RuntimeException("Unable to list " + snapshotDir.toPath(), e); + } + } + } + } + } + + return result; + }, snapshotName); + } + + private String getTableId(Cluster c, String keyspace, String tableName) + { + return c.get(1).applyOnInstance((IIsolatedExecutor.SerializableBiFunction) (ks, tb) -> { + return Keyspace.open(ks).getMetadata().tables.get(tb).get().id.asUUID().toString().replaceAll("-", ""); + }, keyspace, tableName); + } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/SnapshotsTest.java b/test/distributed/org/apache/cassandra/distributed/test/SnapshotsTest.java index 7227d6613f85..4d2c42bcb604 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/SnapshotsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/SnapshotsTest.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; import java.util.regex.Pattern; import org.junit.After; @@ -43,6 +44,9 @@ import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.io.util.File; import org.apache.cassandra.service.snapshot.SnapshotManager; +import org.apache.cassandra.service.snapshot.SnapshotManifest; +import org.apache.cassandra.service.snapshot.SnapshotOptions; +import org.apache.cassandra.service.snapshot.SnapshotType; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.FBUtilities; @@ -199,7 +203,7 @@ public void testMissingManifestIsCreatedOnStartupWithEnrichmentEnabled() cluster.get(1).shutdown(true); // remove manifest only in the first data dir - removeAllManifests(new String[] {dataDirs[0]}, paths); + removeAllManifests(new String[]{ dataDirs[0]}, paths); // they will be still created for that first dir cluster.get(1).startup(); @@ -555,6 +559,90 @@ public void testListingOfSnapshotsByKeyspaceAndTable() assertTrue(snapshots.get(0).contains("tagks1tbl")); } + @Test + public void testForcedSnapshot() throws Throwable + { + try (Cluster cluster = init(Cluster.build(1) + .withDataDirCount(3) // 3 dirs to disperse SSTables among different dirs + .start())) + { + cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (pk uuid primary key)"); + + cluster.get(1).runOnInstance((IIsolatedExecutor.SerializableRunnable) () -> { + Keyspace.open("distributed_test_keyspace").getColumnFamilyStore("tbl").disableAutoCompaction(); + }); + + for (int i = 0; i < 10; i++) + { + cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + ".tbl (pk) values (?)", UUID.randomUUID()); + cluster.get(1).flush(KEYSPACE); + } + + takeEphemeralSnapshotWithSameName(cluster); + List manifests1 = getManifests(cluster); + List ssTablesFromManifest1 = getSSTablesFromManifest(manifests1.get(0)); + + for (int i = 0; i < 10; i++) + { + cluster.get(1).executeInternal("INSERT INTO " + KEYSPACE + ".tbl (pk) values (?)", UUID.randomUUID()); + cluster.get(1).flush(KEYSPACE); + } + takeEphemeralSnapshotWithSameName(cluster); + List manifests2 = getManifests(cluster); + List ssTablesFromManifest2 = getSSTablesFromManifest(manifests2.get(0)); + + assertEquals(manifests1, manifests2); + assertTrue(ssTablesFromManifest1.size() < ssTablesFromManifest2.size()); + assertTrue(ssTablesFromManifest2.containsAll(ssTablesFromManifest1)); + } + } + + private List getSSTablesFromManifest(File manifest) throws Throwable + { + SnapshotManifest snapshotManifest = SnapshotManifest.deserializeFromJsonFile(manifest); + return snapshotManifest.getFiles(); + } + + private List getManifests(Cluster cluster) + { + List manifestsPaths = cluster.get(1).callOnInstance((SerializableCallable>) () -> { + ColumnFamilyStore cfs = Keyspace.open("distributed_test_keyspace").getColumnFamilyStore("tbl"); + + List allManifests = new ArrayList<>(); + for (File file : cfs.getDirectories().getSnapshotDirsWithoutCreation("a_snapshot")) + { + File maybeManifest = new File(file, "manifest.json"); + if (maybeManifest.exists()) + allManifests.add(maybeManifest.absolutePath()); + } + + assertEquals(3, allManifests.size()); // 3 because 3 data dirs + return allManifests; + }); + + List manifests = new ArrayList<>(); + for (String manifest : manifestsPaths) + manifests.add(new File(manifest)); + + return manifests; + } + + private void takeEphemeralSnapshotWithSameName(Cluster cluster) + { + cluster.get(1).runOnInstance((IIsolatedExecutor.SerializableRunnable) () -> { + try + { + SnapshotManager.instance.takeSnapshot(SnapshotOptions.systemSnapshot("a_snapshot", SnapshotType.REPAIR, (r) -> true, "distributed_test_keyspace.tbl") + .ephemeral() + .build()); + } + catch (Throwable t) + { + throw new RuntimeException(t); + } + }); + } + private void populate(Cluster cluster) { for (int i = 0; i < 100; i++) diff --git a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java index 5293149b1b59..ee4fa293a25e 100644 --- a/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java +++ b/test/unit/org/apache/cassandra/service/ActiveRepairServiceTest.java @@ -36,7 +36,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; - import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -63,6 +62,7 @@ import org.apache.cassandra.repair.messages.RepairOption; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.disk.usage.DiskUsageMonitor; +import org.apache.cassandra.service.snapshot.SnapshotManager; import org.apache.cassandra.service.snapshot.TableSnapshot; import org.apache.cassandra.streaming.PreviewKind; import org.apache.cassandra.tcm.ClusterMetadata; @@ -76,7 +76,7 @@ import org.apache.cassandra.utils.concurrent.Refs; import org.mockito.Mock; -import static org.apache.cassandra.ServerTestUtils.*; +import static org.apache.cassandra.ServerTestUtils.resetCMS; import static org.apache.cassandra.config.CassandraRelevantProperties.ORG_APACHE_CASSANDRA_DISABLE_MBEAN_REGISTRATION; import static org.apache.cassandra.repair.messages.RepairOption.DATACENTERS_KEY; import static org.apache.cassandra.repair.messages.RepairOption.FORCE_REPAIR_KEY; @@ -307,6 +307,26 @@ public void testSnapshotAddSSTables() throws Exception } } + @Test + public void testForcedSnapshot() throws Throwable + { + ColumnFamilyStore store = prepareColumnFamilyStore(); + TimeUUID prsId = nextTimeUUID(); + Collection> ranges = Collections.singleton(new Range<>(store.getPartitioner().getMinimumToken(), store.getPartitioner().getMinimumToken())); + ActiveRepairService.instance().registerParentRepairSession(prsId, FBUtilities.getBroadcastAddressAndPort(), Collections.singletonList(store), + ranges, true, System.currentTimeMillis(), false, PreviewKind.NONE); + + // snapshot twice, would not be possible before CASSANDRA-20490 + store.getRepairManager().snapshot(prsId.toString(), ranges, true); + store.getRepairManager().snapshot(prsId.toString(), ranges, true); + + List snapshots = SnapshotManager.instance.getSnapshots(p -> p.getKeyspaceName().equals(store.getKeyspaceName()) && p.getTableName().equals(store.getTableName())); + Assert.assertEquals(1, snapshots.size()); + TableSnapshot snapshot = snapshots.get(0); + Assert.assertTrue(snapshot.isEphemeral()); + Assert.assertEquals(prsId.toString(), snapshot.getTag()); + } + private ColumnFamilyStore prepareColumnFamilyStore() { Keyspace keyspace = Keyspace.open(KEYSPACE5); @@ -314,6 +334,7 @@ private ColumnFamilyStore prepareColumnFamilyStore() store.truncateBlocking(); store.disableAutoCompaction(); createSSTables(store, 10); + SnapshotManager.instance.clearAllSnapshots(); return store; } From 96b3db90fbdc1eb348c7996622202def3c5c2159 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Thu, 15 May 2025 13:17:06 +0100 Subject: [PATCH 315/340] Relax validation of snapshot name as a part of SSTable files path validation Patch by Dmitry Konstantinov; reviewed by Michael Semb Wever for CASSANDRA-20649 --- CHANGES.txt | 1 + src/java/org/apache/cassandra/io/sstable/Descriptor.java | 4 ++-- test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b61e044cfd0f..878b3dd2db0b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Relax validation of snapshot name as a part of SSTable files path validation (CASSANDRA-20649) * Optimize initial skipping logic for SAI queries on large partitions (CASSANDRA-20191) * Fix reading mmapped trie-index exceeding 2GiB (CASSANDRA-20351) * zero copy streaming allocates direct memory that isn't used, but does help to fragment the memory space (CASSANDRA-20577) diff --git a/src/java/org/apache/cassandra/io/sstable/Descriptor.java b/src/java/org/apache/cassandra/io/sstable/Descriptor.java index bf58fff71761..b9e149804278 100644 --- a/src/java/org/apache/cassandra/io/sstable/Descriptor.java +++ b/src/java/org/apache/cassandra/io/sstable/Descriptor.java @@ -64,14 +64,14 @@ public class Descriptor // to the SSTable naming. static final Pattern SSTABLE_DIR_PATTERN = Pattern.compile(".*/(?\\w+)/" + "(?\\w+)-(?[0-9a-f]{32})/" + - "(backups/|snapshots/(?[\\w-]+)/)?" + + "(backups/|snapshots/(?[^/]+)/)?" + "(\\.(?[\\w-]+)/)?" + "(?[\\w-\\+]+)\\.(?[\\w]+)$"); // Pre 2.1 SSTable directory format is {keyspace}/{tableName}-{tableId}[/backups|/snapshots/{tag}][/.{indexName}]/{component}.db static final Pattern LEGACY_SSTABLE_DIR_PATTERN = Pattern.compile(".*/(?\\w+)/" + "(?\\w+)/" + - "(backups/|snapshots/(?[\\w-]+)/)?" + + "(backups/|snapshots/(?[^/]+)/)?" + "(\\.(?[\\w-]+)/)?" + "(?[\\w-]+)\\.(?[\\w]+)$"); diff --git a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java index 32b4813d29e6..88973414e800 100644 --- a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java @@ -169,6 +169,7 @@ public void testKeyspaceTableParsing() "/path/to/cassandra/data/dir2/dir5/dir6/ks1/tab1-34234234234234234234234234234234/backups/na-1-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/ks1/tab1-34234234234234234234234234234234/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/ks1/tab1-34234234234234234234234234234234/snapshots/snapshot/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", + "/path/to/cassandra/data/dir2/dir5/dir6/ks1/tab1-34234234234234234234234234234234/snapshots/snapshot-12345-1.2.3_TEST#=/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/ks1/tab1-34234234234234234234234234234234/backups/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", }; @@ -230,6 +231,7 @@ public void testKeyspaceTableParsing() "/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/na-1-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/nb-1-big-TOC.txt", //"/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/snapshots/snapshots/na-1-big-Index.db", #not supported (CASSANDRA-14013) + "/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/snapshots/snapshot-12345-1.2.3_TEST#=/na-1-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/backups/na-1-big-Index.db", "/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", //"/path/to/cassandra/data/dir2/dir5/dir6/backups/backups/snapshots/snapshots/nb-3g1m_0nuf_3vj5m2k1125165rxa7-big-Index.db", #not supported (CASSANDRA-14013) From 7ab1e3827cc588aeb4c69ed1cf59e07b875dcc43 Mon Sep 17 00:00:00 2001 From: jaydeepkumar1984 Date: Fri, 16 May 2025 14:45:08 -0700 Subject: [PATCH 316/340] Stop AutoRepair monitoring thread upon Cassandra shutdown patch by Jaydeepkumar Chovatia; reviewed by Bernardo Botella, Andrew Tolbert for CASSANDRA-20623 --- CHANGES.txt | 1 + .../repair/autorepair/AutoRepair.java | 34 +++++++++ .../cassandra/service/StorageService.java | 2 + .../autorepair/AutoRepairShutdownTest.java | 75 +++++++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 test/unit/org/apache/cassandra/repair/autorepair/AutoRepairShutdownTest.java diff --git a/CHANGES.txt b/CHANGES.txt index 0a4a4c9ac26d..4ae4657a9475 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623) * Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair (CASSANDRA-20490) * When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler (CASSANDRA-20614) * Rewrite constraint framework to remove column specification from constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563) diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java index e5923e3c9c38..10d18fae639f 100644 --- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java +++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -102,6 +103,8 @@ public class AutoRepair public boolean isSetupDone = false; public static AutoRepair instance = new AutoRepair(); + public volatile boolean isShutDown = false; + private AutoRepair() { // Private constructor to prevent instantiation @@ -566,4 +569,35 @@ public void progress(String tag, ProgressEvent event) } } } + + public synchronized void shutdownBlocking() throws ExecutionException, InterruptedException + { + if (!isSetupDone) + { + // By default, executors within AutoRepair are not initialized as the feature is opt-in. + // If the AutoRepair has not been set up, then there is no need to worry about shutting it down + return; + } + if (isShutDown) + { + throw new IllegalStateException("AutoRepair has already been shut down"); + } + isShutDown = true; + for (AutoRepairConfig.RepairType repairType : AutoRepairConfig.RepairType.values()) + { + repairRunnableExecutors.get(repairType).shutdown(); + repairExecutors.get(repairType).shutdown(); + } + logger.info("Paused AutoRepair"); + } + + public Map getRepairExecutors() + { + return repairExecutors; + } + + public Map getRepairRunnableExecutors() + { + return repairRunnableExecutors; + } } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 8ef0c7fc8b2b..0caf26a3a6e8 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -3913,6 +3913,8 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I CommitLog.instance.shutdownBlocking(); + AutoRepair.instance.shutdownBlocking(); + // wait for miscellaneous tasks like sstable and commitlog segment deletion ColumnFamilyStore.shutdownPostFlushExecutor(); diff --git a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairShutdownTest.java b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairShutdownTest.java new file mode 100644 index 000000000000..c7de17e0e455 --- /dev/null +++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairShutdownTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.repair.autorepair; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType; + +import static org.apache.cassandra.Util.setAutoRepairEnabled; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Unit tests to validate the executor shutdown inside {@link AutoRepair} + */ +public class AutoRepairShutdownTest extends CQLTester +{ + @BeforeClass + public static void setupClass() throws Exception + { + setAutoRepairEnabled(true); + requireNetwork(); + } + + @Test + public void testAutoRepairShutdown() throws Exception + { + AutoRepair.instance.setup(); + + for (RepairType type : RepairType.values()) + { + assertFalse("RepairRunnableExecutor should not have been shut down", AutoRepair.instance.getRepairRunnableExecutors().get(type).isShutdown()); + assertFalse("RepairExecutor should not have been shut down", AutoRepair.instance.getRepairExecutors().get(type).isShutdown()); + } + assertFalse("AutoRepair should not be marked as shut down", AutoRepair.instance.isShutDown); + + AutoRepair.instance.shutdownBlocking(); + + for (RepairType type : RepairType.values()) + { + assertTrue("RepairRunnableExecutor should be shut down", AutoRepair.instance.getRepairRunnableExecutors().get(type).isShutdown()); + assertTrue("RepairExecutor should be shut down", AutoRepair.instance.getRepairExecutors().get(type).isShutdown()); + } + assertTrue("AutoRepair should be marked as shut down", AutoRepair.instance.isShutDown); + + try + { + AutoRepair.instance.shutdownBlocking(); + fail("A second call to shutdown should have thrown an exception"); + } + catch (IllegalStateException e) + { + // expected + } + } +} From f80a234d1ec33dbe927bc8af28f1a290335819a3 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Tue, 13 May 2025 08:23:50 -0500 Subject: [PATCH 317/340] Migrate sstableloader code to its own tools directory and artifact As part of a broader effort to decouple java driver code from the server code, this moves sstableloader to its own tools directory. As sstableloader is also used as a library (CASSANDRA-10637), added a new artifact 'cassandra-sstableloader' that will get deployed to maven along with 'cassandra-all'. While I expect this is likely a niche use case, this will allow users to continue using BulkExport as a library. Moves sstableloader-specific targets to its own build.xml in tools/sstableloader/build.xml. Also updates IDE project files and circleci to utilize new sstableloader-specific targets. patch by Andy Tolbert; reviewed by Stefan Miklosovic and Mick Semb Wever for CASSANDRA-20328 --- .build/README.md | 1 + .build/build-sonar.xml | 4 +- ...late.xml => cassandra-build-maven-pom.xml} | 0 ...plate.xml => cassandra-deps-maven-pom.xml} | 0 .build/docker/run-tests.sh | 4 +- ...-pom-template.xml => parent-maven-pom.xml} | 2 +- .build/run-tests.sh | 9 +- .build/sstableloader-deps-maven-pom.xml | 48 + .circleci/config.yml | 1308 ++++++++++++++++- .circleci/config.yml.FREE | 1308 ++++++++++++++++- .circleci/config.yml.PAID | 1308 ++++++++++++++++- .circleci/config_template.yml | 113 ++ .circleci/generate.sh | 16 +- .jenkins/Jenkinsfile | 7 +- CHANGES.txt | 1 + bin/sstableloader | 18 +- build.xml | 97 +- debian/cassandra.install | 2 +- debian/rules | 4 + ide/idea-iml-file.xml | 2 + ide/idea/workspace.xml | 1 + ide/nbproject/ide-actions.xml | 1 + ide/nbproject/project.xml | 45 +- redhat/cassandra.spec | 3 + .../cassandra/tools/CmdLineOptions.java | 73 + .../tools/StandaloneSSTableUtil.java | 1 - .../cassandra/tools/StandaloneScrubber.java | 1 - .../cassandra/tools/StandaloneSplitter.java | 2 +- .../cassandra/tools/StandaloneUpgrader.java | 1 - .../cassandra/tools/StandaloneVerifier.java | 1 - tools/bin/cassandra.in.sh | 2 +- tools/bin/sstableloader | 49 + tools/sstableloader/build.xml | 125 ++ .../tools/BulkLoadConnectionFactory.java | 0 .../cassandra/tools/BulkLoadException.java | 0 .../apache/cassandra/tools/BulkLoader.java | 50 - .../apache/cassandra/tools/LoaderOptions.java | 4 +- .../utils/NativeSSTableLoaderClient.java | 0 .../cassandra/tools/LoaderOptionsTest.java | 42 +- 39 files changed, 4455 insertions(+), 198 deletions(-) rename .build/{cassandra-build-deps-template.xml => cassandra-build-maven-pom.xml} (100%) rename .build/{cassandra-deps-template.xml => cassandra-deps-maven-pom.xml} (100%) rename .build/{parent-pom-template.xml => parent-maven-pom.xml} (99%) create mode 100644 .build/sstableloader-deps-maven-pom.xml create mode 100644 src/java/org/apache/cassandra/tools/CmdLineOptions.java create mode 100755 tools/bin/sstableloader create mode 100644 tools/sstableloader/build.xml rename {src/java => tools/sstableloader/src}/org/apache/cassandra/tools/BulkLoadConnectionFactory.java (100%) rename {src/java => tools/sstableloader/src}/org/apache/cassandra/tools/BulkLoadException.java (100%) rename {src/java => tools/sstableloader/src}/org/apache/cassandra/tools/BulkLoader.java (87%) rename {src/java => tools/sstableloader/src}/org/apache/cassandra/tools/LoaderOptions.java (99%) rename {src/java => tools/sstableloader/src}/org/apache/cassandra/utils/NativeSSTableLoaderClient.java (100%) rename {test => tools/sstableloader/test}/unit/org/apache/cassandra/tools/LoaderOptionsTest.java (89%) diff --git a/.build/README.md b/.build/README.md index 4c15297908c2..c62424d89411 100644 --- a/.build/README.md +++ b/.build/README.md @@ -100,6 +100,7 @@ Running other types of tests with docker: .build/docker/run-tests.sh -a test .build/docker/run-tests.sh -a stress-test .build/docker/run-tests.sh -a fqltool-test + .build/docker/run-tests.sh -a sstableloader-test .build/docker/run-tests.sh -a microbench .build/docker/run-tests.sh -a test-cdc .build/docker/run-tests.sh -a test-compression diff --git a/.build/build-sonar.xml b/.build/build-sonar.xml index 250191064c43..31472d007103 100644 --- a/.build/build-sonar.xml +++ b/.build/build-sonar.xml @@ -226,9 +226,9 @@ - + - + diff --git a/.build/cassandra-build-deps-template.xml b/.build/cassandra-build-maven-pom.xml similarity index 100% rename from .build/cassandra-build-deps-template.xml rename to .build/cassandra-build-maven-pom.xml diff --git a/.build/cassandra-deps-template.xml b/.build/cassandra-deps-maven-pom.xml similarity index 100% rename from .build/cassandra-deps-template.xml rename to .build/cassandra-deps-maven-pom.xml diff --git a/.build/docker/run-tests.sh b/.build/docker/run-tests.sh index 7bb8fc9d1ec5..3bfbec718a02 100755 --- a/.build/docker/run-tests.sh +++ b/.build/docker/run-tests.sh @@ -40,7 +40,7 @@ error() { # legacy argument handling case ${1} in - "build_dtest_jars" | "stress-test" | "fqltool-test" | "microbench" | "test-burn" | "long-test" | "cqlsh-test" | "simulator-dtest" | "dtest" | "dtest-novnode" | "dtest-latest" | "dtest-large" | "dtest-large-novnode" | "dtest-upgrade" | "dtest-upgrade-novnode"| "dtest-upgrade-large" | "dtest-upgrade-novnode-large" | "test" | "test-cdc" | "test-compression" | "test-oa" | "test-system-keyspace-directory" | "test-latest" | "jvm-dtest" | "jvm-dtest-upgrade" | "jvm-dtest-novnode" | "jvm-dtest-upgrade-novnode") + "build_dtest_jars" | "stress-test" | "fqltool-test" | "sstableloader-test" | "microbench" | "test-burn" | "long-test" | "cqlsh-test" | "simulator-dtest" | "dtest" | "dtest-novnode" | "dtest-latest" | "dtest-large" | "dtest-large-novnode" | "dtest-upgrade" | "dtest-upgrade-novnode"| "dtest-upgrade-large" | "dtest-upgrade-novnode-large" | "test" | "test-cdc" | "test-compression" | "test-oa" | "test-system-keyspace-directory" | "test-latest" | "jvm-dtest" | "jvm-dtest-upgrade" | "jvm-dtest-novnode" | "jvm-dtest-upgrade-novnode") test_type="-a ${1}" if [[ -z ${2} ]]; then test_list="" @@ -182,7 +182,7 @@ docker_flags="-m 5g --memory-swap 5g" case ${test_target/-repeat/} in "build_dtest_jars") ;; - "stress-test" | "fqltool-test" ) + "stress-test" | "fqltool-test" | "sstableloader-test" ) [[ ${mem} -gt $((1 * 1024 * 1024 * 1024 * ${jenkins_executors})) ]] || { error 1 "${target} require minimum docker memory 1g (per jenkins executor (${jenkins_executors})), found ${mem}"; } ;; # test-burn doesn't have enough tests in it to split beyond 8, and burn and long we want a bit more resources anyway diff --git a/.build/parent-pom-template.xml b/.build/parent-maven-pom.xml similarity index 99% rename from .build/parent-pom-template.xml rename to .build/parent-maven-pom.xml index 714f18b8e5cc..a42991bca455 100644 --- a/.build/parent-pom-template.xml +++ b/.build/parent-maven-pom.xml @@ -671,7 +671,7 @@ org.apache.cassandra cassandra-all - 4.1-alpha2-SNAPSHOT + @version@ io.dropwizard.metrics diff --git a/.build/run-tests.sh b/.build/run-tests.sh index ad2e04b40b12..0c5c5ce558ba 100755 --- a/.build/run-tests.sh +++ b/.build/run-tests.sh @@ -66,7 +66,7 @@ print_help() { # legacy argument handling case ${1} in - "build_dtest_jars" | "stress-test" | "fqltool-test" | "microbench" | "test-burn" | "long-test" | "cqlsh-test" | "simulator-dtest" | "test" | "test-cdc" | "test-compression" | "test-oa" | "test-system-keyspace-directory" | "test-latest" | "jvm-dtest" | "jvm-dtest-upgrade" | "jvm-dtest-novnode" | "jvm-dtest-upgrade-novnode") + "build_dtest_jars" | "stress-test" | "fqltool-test" | "sstableloader-test" | "microbench" | "test-burn" | "long-test" | "cqlsh-test" | "simulator-dtest" | "test" | "test-cdc" | "test-compression" | "test-oa" | "test-system-keyspace-directory" | "test-latest" | "jvm-dtest" | "jvm-dtest-upgrade" | "jvm-dtest-novnode" | "jvm-dtest-upgrade-novnode") test_type="-a ${1}" if [[ -z ${2} ]]; then test_list="" @@ -285,7 +285,7 @@ _main() { # check split_chunk is compatible with target (if not a regexp) if [[ "${_split_chunk}" =~ ^\d+/\d+$ ]] && [[ "1/1" != "${split_chunk}" ]] ; then case ${target} in - "stress-test" | "fqltool-test" | "microbench" | "cqlsh-test" | "simulator-dtest") + "stress-test" | "fqltool-test" | "sstableloader-test" | "microbench" | "cqlsh-test" | "simulator-dtest") error 1 "Target ${target} does not suport splits." ;; *) @@ -344,6 +344,11 @@ _main() { ant fqltool-build-test ${ANT_TEST_OPTS} ant $target ${ANT_TEST_OPTS} || echo "failed ${target} ${split_chunk}" ;; + "sstableloader-test") + # hard fail on test compilation, but dont fail the test run so unstable test reports are processed + ant sstableloader-build-test ${ANT_TEST_OPTS} + ant $target ${ANT_TEST_OPTS} || echo "failed ${target} ${split_chunk}" + ;; "microbench") ant $target ${ANT_TEST_OPTS} -Dmaven.test.failure.ignore=true ;; diff --git a/.build/sstableloader-deps-maven-pom.xml b/.build/sstableloader-deps-maven-pom.xml new file mode 100644 index 000000000000..2eeafd891ed2 --- /dev/null +++ b/.build/sstableloader-deps-maven-pom.xml @@ -0,0 +1,48 @@ + + + + 4.0.0 + + org.apache.cassandra + cassandra-parent + @version@ + @final.name@-parent.pom + + cassandra-sstableloader + @version@ + Apache Cassandra SSTableLoader + Standalone SSTableLoader for Apache Cassandra. + https://cassandra.apache.org + 2025 + + + The Apache Software License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + + + + scm:https://gitbox.apache.org/repos/asf/cassandra.git + scm:https://gitbox.apache.org/repos/asf/cassandra.git + https://gitbox.apache.org/repos/asf?p=cassandra.git + + + + org.apache.cassandra + cassandra-all + + + diff --git a/.circleci/config.yml b/.circleci/config.yml index 864919b8f418..edcbb164c35e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -53,7 +53,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -84,6 +84,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -191,6 +193,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -308,6 +312,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -373,6 +379,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -431,7 +439,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -462,6 +470,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -595,6 +605,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -660,6 +672,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -778,6 +792,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -886,6 +902,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1040,6 +1058,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1172,6 +1192,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1281,6 +1303,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1398,6 +1422,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1457,7 +1483,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1488,6 +1514,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1606,6 +1634,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1714,6 +1744,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1773,7 +1805,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1804,6 +1836,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1912,6 +1946,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2045,6 +2081,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2199,6 +2237,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2316,6 +2356,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2375,7 +2417,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2406,6 +2448,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2514,6 +2558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2622,6 +2668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2681,7 +2729,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2712,6 +2760,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2735,6 +2785,80 @@ jobs: - REPEATED_ANT_TEST_COUNT: 500 - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true j11_utests_compression_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -2770,7 +2894,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2801,6 +2925,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2919,6 +3045,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3019,6 +3147,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -3114,6 +3243,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3199,6 +3330,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3223,6 +3356,97 @@ jobs: - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 4 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 j11_dtests_large_vnode_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -3332,6 +3556,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3417,6 +3643,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3476,7 +3704,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -3507,6 +3735,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3625,6 +3855,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3758,6 +3990,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3876,6 +4110,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3976,6 +4212,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -4071,6 +4308,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4177,6 +4416,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4294,6 +4535,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4353,7 +4596,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4384,6 +4627,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4501,6 +4746,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4572,6 +4819,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4631,7 +4880,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4662,6 +4911,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4721,7 +4972,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4752,6 +5003,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4855,6 +5108,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4927,6 +5182,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4986,7 +5243,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5017,6 +5274,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5130,6 +5389,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5248,6 +5509,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5320,6 +5583,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5379,7 +5644,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5410,6 +5675,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5517,6 +5784,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5649,6 +5918,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5767,6 +6038,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5885,6 +6158,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6018,6 +6293,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6076,7 +6353,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6107,6 +6384,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6192,6 +6471,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6325,6 +6606,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6384,7 +6667,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6415,6 +6698,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6486,6 +6771,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6570,6 +6857,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6678,6 +6967,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6737,7 +7028,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6768,6 +7059,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6827,7 +7120,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6858,6 +7151,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6967,6 +7262,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7031,6 +7328,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7139,6 +7438,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7257,6 +7558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7365,6 +7668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7473,6 +7778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7590,6 +7897,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7697,6 +8006,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7814,6 +8125,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7885,6 +8198,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7991,6 +8306,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8050,7 +8367,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8081,6 +8398,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8166,6 +8485,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8230,6 +8551,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8315,6 +8638,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8424,6 +8749,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8541,6 +8868,173 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 4 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8599,7 +9093,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8630,6 +9124,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8714,6 +9210,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8772,7 +9270,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8803,6 +9301,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8875,6 +9375,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8933,7 +9435,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8964,6 +9466,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9048,6 +9552,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9107,7 +9613,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9138,6 +9644,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9270,6 +9778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9328,7 +9838,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9359,6 +9869,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9417,7 +9929,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9448,6 +9960,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9481,302 +9995,587 @@ workflows: - j11_build: requires: - start_j11_build + upstream: + start_j11_build: + - success - start_j11_unit_tests: type: approval - j11_unit_tests: requires: - start_j11_unit_tests - j11_build + upstream: + start_j11_unit_tests: + - success + j11_build: + - success - start_j11_jvm_dtests: type: approval - j11_jvm_dtests: requires: - start_j11_jvm_dtests - j11_build + upstream: + start_j11_jvm_dtests: + - success + j11_build: + - success - start_j11_jvm_dtests_latest_vnode: type: approval - j11_jvm_dtests_latest_vnode: requires: - start_j11_jvm_dtests_latest_vnode - j11_build + upstream: + start_j11_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j11_build + upstream: + start_j17_jvm_dtests: + - success + j11_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j11_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j11_simulator_dtests: type: approval - j11_simulator_dtests: requires: - start_j11_simulator_dtests - j11_build + upstream: + start_j11_simulator_dtests: + - success + j11_build: + - success - start_j11_cqlshlib_tests: type: approval - j11_cqlshlib_tests: requires: - start_j11_cqlshlib_tests - j11_build + upstream: + start_j11_cqlshlib_tests: + - success + j11_build: + - success - start_j11_cqlshlib_cython_tests: type: approval - j11_cqlshlib_cython_tests: requires: - start_j11_cqlshlib_cython_tests - j11_build + upstream: + start_j11_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j11_build + upstream: + start_j17_cqlshlib_tests: + - success + j11_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j11_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j11_build + upstream: + start_j17_unit_tests: + - success + j11_build: + - success - start_j11_utests_oa: type: approval - j11_utests_oa: requires: - start_j11_utests_oa - j11_build + upstream: + start_j11_utests_oa: + - success + j11_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j11_build + upstream: + start_j17_utests_oa: + - success + j11_build: + - success - start_j11_utests_long: type: approval - j11_utests_long: requires: - start_j11_utests_long - j11_build + upstream: + start_j11_utests_long: + - success + j11_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j11_build + upstream: + start_j17_utests_long: + - success + j11_build: + - success - start_j11_utests_cdc: type: approval - j11_utests_cdc: requires: - start_j11_utests_cdc - j11_build + upstream: + start_j11_utests_cdc: + - success + j11_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j11_build + upstream: + start_j17_utests_cdc: + - success + j11_build: + - success - start_j11_utests_compression: type: approval - j11_utests_compression: requires: - start_j11_utests_compression - j11_build + upstream: + start_j11_utests_compression: + - success + j11_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j11_build + upstream: + start_j17_utests_compression: + - success + j11_build: + - success - start_j11_utests_latest: type: approval - j11_utests_latest: requires: - start_j11_utests_latest - j11_build + upstream: + start_j11_utests_latest: + - success + j11_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j11_build + upstream: + start_j17_utests_latest: + - success + j11_build: + - success - start_j11_utests_stress: type: approval - j11_utests_stress: requires: - start_j11_utests_stress - j11_build + upstream: + start_j11_utests_stress: + - success + j11_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j11_build + upstream: + start_j17_utests_stress: + - success + j11_build: + - success - start_j11_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_j11_utests_fqltool - j11_build + upstream: + start_j11_utests_fqltool: + - success + j11_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j11_build + upstream: + start_j17_utests_fqltool: + - success + j11_build: + - success + - start_j11_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_j11_utests_sstableloader + - j11_build + upstream: + start_j11_utests_sstableloader: + - success + j11_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j11_build + upstream: + start_j17_utests_sstableloader: + - success + j11_build: + - success - start_j11_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - start_j11_utests_system_keyspace_directory - j11_build + upstream: + start_j11_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j11_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j11_dtest_jars_build: type: approval - j11_dtest_jars_build: requires: - j11_build - start_j11_dtest_jars_build + upstream: + j11_build: + - success + start_j11_dtest_jars_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_jvm_upgrade_dtests: requires: - start_jvm_upgrade_dtests - j11_dtest_jars_build + upstream: + start_jvm_upgrade_dtests: + - success + j11_dtest_jars_build: + - success - start_j11_dtests: type: approval - j11_dtests: requires: - start_j11_dtests - j11_build + upstream: + start_j11_dtests: + - success + j11_build: + - success - start_j11_dtests_vnode: type: approval - j11_dtests_vnode: requires: - start_j11_dtests_vnode - j11_build + upstream: + start_j11_dtests_vnode: + - success + j11_build: + - success - start_j11_dtests_latest: type: approval - j11_dtests_latest: requires: - start_j11_dtests_latest - j11_build + upstream: + start_j11_dtests_latest: + - success + j11_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j11_build + upstream: + start_j17_dtests: + - success + j11_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j11_build + upstream: + start_j17_dtests_vnode: + - success + j11_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j11_build + upstream: + start_j17_dtests_latest: + - success + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j11_dtests_large_vnode: type: approval - j11_dtests_large_vnode: requires: - start_j11_dtests_large_vnode - j11_build + upstream: + start_j11_dtests_large_vnode: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j11_build + upstream: + start_j17_dtests_large_vnode: + - success + j11_build: + - success - start_j11_cqlsh_tests: type: approval - j11_cqlsh_dtests_py38: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - start_j11_cqlsh_tests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - start_j17_cqlsh_tests_latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - start_j11_upgrade_dtests - j11_build + upstream: + start_j11_upgrade_dtests: + - success + j11_build: + - success java11_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -9784,207 +10583,428 @@ workflows: - j11_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j11_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - j11_simulator_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_utests_long: type: approval - j11_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - j17_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - start_utests_cdc: type: approval - j11_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - j17_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - start_utests_compression: type: approval - j11_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - j17_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - start_utests_stress: type: approval - j11_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - j17_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - start_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success - j17_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success + - start_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success - start_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j11_build + upstream: + start_utests_system_keyspace_directory: + - success + j11_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_dtest_jars_build: requires: - j11_build - start_jvm_upgrade_dtests + upstream: + j11_build: + - success + start_jvm_upgrade_dtests: + - success - j11_jvm_upgrade_dtests: requires: - j11_dtest_jars_build + upstream: + j11_dtest_jars_build: + - success - j11_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - j11_dtests_large_vnode: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j11_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j11_cqlsh_dtests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - j11_build - start_j11_upgrade_dtests + upstream: + j11_build: + - success + start_j11_upgrade_dtests: + - success java17_separate_tests: jobs: - start_j17_build: @@ -9992,142 +11012,276 @@ workflows: - j17_build: requires: - start_j17_build + upstream: + start_j17_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j17_build + upstream: + start_j17_unit_tests: + - success + j17_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j17_build + upstream: + start_j17_jvm_dtests: + - success + j17_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j17_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j17_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j17_build + upstream: + start_j17_cqlshlib_tests: + - success + j17_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j17_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j17_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j17_build + upstream: + start_j17_dtests: + - success + j17_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j17_build + upstream: + start_j17_dtests_vnode: + - success + j17_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j17_build + upstream: + start_j17_dtests_latest: + - success + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j17_build + upstream: + start_j17_dtests_large_vnode: + - success + j17_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j17_build + upstream: + start_j17_utests_oa: + - success + j17_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j17_build + upstream: + start_j17_utests_long: + - success + j17_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j17_build + upstream: + start_j17_utests_cdc: + - success + j17_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j17_build + upstream: + start_j17_utests_compression: + - success + j17_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j17_build + upstream: + start_j17_utests_latest: + - success + j17_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j17_build + upstream: + start_j17_utests_stress: + - success + j17_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j17_build + upstream: + start_j17_utests_fqltool: + - success + j17_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j17_build + upstream: + start_j17_utests_sstableloader: + - success + j17_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j17_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j17_build: + - success java17_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -10135,101 +11289,207 @@ workflows: - j17_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j17_unit_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_oa: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_latest: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_cython_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_latest: requires: - j17_build + upstream: + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_cqlsh_dtests_py38: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j17_build + upstream: + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_utests_long: type: approval - j17_utests_long: requires: - start_utests_long - j17_build + upstream: + start_utests_long: + - success + j17_build: + - success - start_utests_cdc: type: approval - j17_utests_cdc: requires: - start_utests_cdc - j17_build + upstream: + start_utests_cdc: + - success + j17_build: + - success - start_utests_compression: type: approval - j17_utests_compression: requires: - start_utests_compression - j17_build + upstream: + start_utests_compression: + - success + j17_build: + - success - start_utests_stress: type: approval - j17_utests_stress: requires: - start_utests_stress - j17_build + upstream: + start_utests_stress: + - success + j17_build: + - success - start_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_utests_fqltool - j17_build + upstream: + start_utests_fqltool: + - success + j17_build: + - success + - start_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j17_build + upstream: + start_utests_sstableloader: + - success + j17_build: + - success - start_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j17_build + upstream: + start_utests_system_keyspace_directory: + - success + j17_build: + - success diff --git a/.circleci/config.yml.FREE b/.circleci/config.yml.FREE index 864919b8f418..edcbb164c35e 100644 --- a/.circleci/config.yml.FREE +++ b/.circleci/config.yml.FREE @@ -53,7 +53,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -84,6 +84,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -191,6 +193,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -308,6 +312,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -373,6 +379,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -431,7 +439,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -462,6 +470,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -595,6 +605,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -660,6 +672,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -778,6 +792,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -886,6 +902,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1040,6 +1058,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1172,6 +1192,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1281,6 +1303,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1398,6 +1422,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1457,7 +1483,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1488,6 +1514,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1606,6 +1634,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1714,6 +1744,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1773,7 +1805,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1804,6 +1836,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1912,6 +1946,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2045,6 +2081,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2199,6 +2237,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2316,6 +2356,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2375,7 +2417,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2406,6 +2448,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2514,6 +2558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2622,6 +2668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2681,7 +2729,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2712,6 +2760,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2735,6 +2785,80 @@ jobs: - REPEATED_ANT_TEST_COUNT: 500 - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true j11_utests_compression_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -2770,7 +2894,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2801,6 +2925,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2919,6 +3045,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3019,6 +3147,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -3114,6 +3243,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3199,6 +3330,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3223,6 +3356,97 @@ jobs: - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 4 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 j11_dtests_large_vnode_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -3332,6 +3556,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3417,6 +3643,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3476,7 +3704,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -3507,6 +3735,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3625,6 +3855,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3758,6 +3990,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3876,6 +4110,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3976,6 +4212,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -4071,6 +4308,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4177,6 +4416,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4294,6 +4535,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4353,7 +4596,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4384,6 +4627,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4501,6 +4746,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4572,6 +4819,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4631,7 +4880,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4662,6 +4911,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4721,7 +4972,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4752,6 +5003,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4855,6 +5108,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4927,6 +5182,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4986,7 +5243,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5017,6 +5274,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5130,6 +5389,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5248,6 +5509,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5320,6 +5583,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5379,7 +5644,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5410,6 +5675,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5517,6 +5784,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5649,6 +5918,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5767,6 +6038,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5885,6 +6158,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6018,6 +6293,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6076,7 +6353,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6107,6 +6384,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6192,6 +6471,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6325,6 +6606,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6384,7 +6667,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6415,6 +6698,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6486,6 +6771,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6570,6 +6857,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6678,6 +6967,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6737,7 +7028,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6768,6 +7059,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6827,7 +7120,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6858,6 +7151,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6967,6 +7262,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7031,6 +7328,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7139,6 +7438,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7257,6 +7558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7365,6 +7668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7473,6 +7778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7590,6 +7897,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7697,6 +8006,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7814,6 +8125,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7885,6 +8198,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7991,6 +8306,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8050,7 +8367,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8081,6 +8398,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8166,6 +8485,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8230,6 +8551,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8315,6 +8638,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8424,6 +8749,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8541,6 +8868,173 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 4 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8599,7 +9093,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8630,6 +9124,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8714,6 +9210,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8772,7 +9270,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8803,6 +9301,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8875,6 +9375,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8933,7 +9435,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8964,6 +9466,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9048,6 +9552,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9107,7 +9613,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9138,6 +9644,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9270,6 +9778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9328,7 +9838,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9359,6 +9869,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9417,7 +9929,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9448,6 +9960,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9481,302 +9995,587 @@ workflows: - j11_build: requires: - start_j11_build + upstream: + start_j11_build: + - success - start_j11_unit_tests: type: approval - j11_unit_tests: requires: - start_j11_unit_tests - j11_build + upstream: + start_j11_unit_tests: + - success + j11_build: + - success - start_j11_jvm_dtests: type: approval - j11_jvm_dtests: requires: - start_j11_jvm_dtests - j11_build + upstream: + start_j11_jvm_dtests: + - success + j11_build: + - success - start_j11_jvm_dtests_latest_vnode: type: approval - j11_jvm_dtests_latest_vnode: requires: - start_j11_jvm_dtests_latest_vnode - j11_build + upstream: + start_j11_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j11_build + upstream: + start_j17_jvm_dtests: + - success + j11_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j11_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j11_simulator_dtests: type: approval - j11_simulator_dtests: requires: - start_j11_simulator_dtests - j11_build + upstream: + start_j11_simulator_dtests: + - success + j11_build: + - success - start_j11_cqlshlib_tests: type: approval - j11_cqlshlib_tests: requires: - start_j11_cqlshlib_tests - j11_build + upstream: + start_j11_cqlshlib_tests: + - success + j11_build: + - success - start_j11_cqlshlib_cython_tests: type: approval - j11_cqlshlib_cython_tests: requires: - start_j11_cqlshlib_cython_tests - j11_build + upstream: + start_j11_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j11_build + upstream: + start_j17_cqlshlib_tests: + - success + j11_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j11_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j11_build + upstream: + start_j17_unit_tests: + - success + j11_build: + - success - start_j11_utests_oa: type: approval - j11_utests_oa: requires: - start_j11_utests_oa - j11_build + upstream: + start_j11_utests_oa: + - success + j11_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j11_build + upstream: + start_j17_utests_oa: + - success + j11_build: + - success - start_j11_utests_long: type: approval - j11_utests_long: requires: - start_j11_utests_long - j11_build + upstream: + start_j11_utests_long: + - success + j11_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j11_build + upstream: + start_j17_utests_long: + - success + j11_build: + - success - start_j11_utests_cdc: type: approval - j11_utests_cdc: requires: - start_j11_utests_cdc - j11_build + upstream: + start_j11_utests_cdc: + - success + j11_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j11_build + upstream: + start_j17_utests_cdc: + - success + j11_build: + - success - start_j11_utests_compression: type: approval - j11_utests_compression: requires: - start_j11_utests_compression - j11_build + upstream: + start_j11_utests_compression: + - success + j11_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j11_build + upstream: + start_j17_utests_compression: + - success + j11_build: + - success - start_j11_utests_latest: type: approval - j11_utests_latest: requires: - start_j11_utests_latest - j11_build + upstream: + start_j11_utests_latest: + - success + j11_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j11_build + upstream: + start_j17_utests_latest: + - success + j11_build: + - success - start_j11_utests_stress: type: approval - j11_utests_stress: requires: - start_j11_utests_stress - j11_build + upstream: + start_j11_utests_stress: + - success + j11_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j11_build + upstream: + start_j17_utests_stress: + - success + j11_build: + - success - start_j11_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_j11_utests_fqltool - j11_build + upstream: + start_j11_utests_fqltool: + - success + j11_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j11_build + upstream: + start_j17_utests_fqltool: + - success + j11_build: + - success + - start_j11_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_j11_utests_sstableloader + - j11_build + upstream: + start_j11_utests_sstableloader: + - success + j11_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j11_build + upstream: + start_j17_utests_sstableloader: + - success + j11_build: + - success - start_j11_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - start_j11_utests_system_keyspace_directory - j11_build + upstream: + start_j11_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j11_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j11_dtest_jars_build: type: approval - j11_dtest_jars_build: requires: - j11_build - start_j11_dtest_jars_build + upstream: + j11_build: + - success + start_j11_dtest_jars_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_jvm_upgrade_dtests: requires: - start_jvm_upgrade_dtests - j11_dtest_jars_build + upstream: + start_jvm_upgrade_dtests: + - success + j11_dtest_jars_build: + - success - start_j11_dtests: type: approval - j11_dtests: requires: - start_j11_dtests - j11_build + upstream: + start_j11_dtests: + - success + j11_build: + - success - start_j11_dtests_vnode: type: approval - j11_dtests_vnode: requires: - start_j11_dtests_vnode - j11_build + upstream: + start_j11_dtests_vnode: + - success + j11_build: + - success - start_j11_dtests_latest: type: approval - j11_dtests_latest: requires: - start_j11_dtests_latest - j11_build + upstream: + start_j11_dtests_latest: + - success + j11_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j11_build + upstream: + start_j17_dtests: + - success + j11_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j11_build + upstream: + start_j17_dtests_vnode: + - success + j11_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j11_build + upstream: + start_j17_dtests_latest: + - success + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j11_dtests_large_vnode: type: approval - j11_dtests_large_vnode: requires: - start_j11_dtests_large_vnode - j11_build + upstream: + start_j11_dtests_large_vnode: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j11_build + upstream: + start_j17_dtests_large_vnode: + - success + j11_build: + - success - start_j11_cqlsh_tests: type: approval - j11_cqlsh_dtests_py38: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - start_j11_cqlsh_tests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - start_j17_cqlsh_tests_latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - start_j11_upgrade_dtests - j11_build + upstream: + start_j11_upgrade_dtests: + - success + j11_build: + - success java11_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -9784,207 +10583,428 @@ workflows: - j11_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j11_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - j11_simulator_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_utests_long: type: approval - j11_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - j17_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - start_utests_cdc: type: approval - j11_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - j17_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - start_utests_compression: type: approval - j11_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - j17_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - start_utests_stress: type: approval - j11_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - j17_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - start_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success - j17_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success + - start_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success - start_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j11_build + upstream: + start_utests_system_keyspace_directory: + - success + j11_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_dtest_jars_build: requires: - j11_build - start_jvm_upgrade_dtests + upstream: + j11_build: + - success + start_jvm_upgrade_dtests: + - success - j11_jvm_upgrade_dtests: requires: - j11_dtest_jars_build + upstream: + j11_dtest_jars_build: + - success - j11_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - j11_dtests_large_vnode: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j11_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j11_cqlsh_dtests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - j11_build - start_j11_upgrade_dtests + upstream: + j11_build: + - success + start_j11_upgrade_dtests: + - success java17_separate_tests: jobs: - start_j17_build: @@ -9992,142 +11012,276 @@ workflows: - j17_build: requires: - start_j17_build + upstream: + start_j17_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j17_build + upstream: + start_j17_unit_tests: + - success + j17_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j17_build + upstream: + start_j17_jvm_dtests: + - success + j17_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j17_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j17_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j17_build + upstream: + start_j17_cqlshlib_tests: + - success + j17_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j17_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j17_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j17_build + upstream: + start_j17_dtests: + - success + j17_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j17_build + upstream: + start_j17_dtests_vnode: + - success + j17_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j17_build + upstream: + start_j17_dtests_latest: + - success + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j17_build + upstream: + start_j17_dtests_large_vnode: + - success + j17_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j17_build + upstream: + start_j17_utests_oa: + - success + j17_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j17_build + upstream: + start_j17_utests_long: + - success + j17_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j17_build + upstream: + start_j17_utests_cdc: + - success + j17_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j17_build + upstream: + start_j17_utests_compression: + - success + j17_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j17_build + upstream: + start_j17_utests_latest: + - success + j17_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j17_build + upstream: + start_j17_utests_stress: + - success + j17_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j17_build + upstream: + start_j17_utests_fqltool: + - success + j17_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j17_build + upstream: + start_j17_utests_sstableloader: + - success + j17_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j17_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j17_build: + - success java17_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -10135,101 +11289,207 @@ workflows: - j17_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j17_unit_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_oa: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_latest: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_cython_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_latest: requires: - j17_build + upstream: + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_cqlsh_dtests_py38: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j17_build + upstream: + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_utests_long: type: approval - j17_utests_long: requires: - start_utests_long - j17_build + upstream: + start_utests_long: + - success + j17_build: + - success - start_utests_cdc: type: approval - j17_utests_cdc: requires: - start_utests_cdc - j17_build + upstream: + start_utests_cdc: + - success + j17_build: + - success - start_utests_compression: type: approval - j17_utests_compression: requires: - start_utests_compression - j17_build + upstream: + start_utests_compression: + - success + j17_build: + - success - start_utests_stress: type: approval - j17_utests_stress: requires: - start_utests_stress - j17_build + upstream: + start_utests_stress: + - success + j17_build: + - success - start_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_utests_fqltool - j17_build + upstream: + start_utests_fqltool: + - success + j17_build: + - success + - start_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j17_build + upstream: + start_utests_sstableloader: + - success + j17_build: + - success - start_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j17_build + upstream: + start_utests_system_keyspace_directory: + - success + j17_build: + - success diff --git a/.circleci/config.yml.PAID b/.circleci/config.yml.PAID index 02e3aed428a7..e10097a4263b 100644 --- a/.circleci/config.yml.PAID +++ b/.circleci/config.yml.PAID @@ -53,7 +53,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_UPGRADE_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_UPGRADE_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_UPGRADE_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -84,6 +84,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -191,6 +193,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -308,6 +312,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -373,6 +379,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -431,7 +439,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -462,6 +470,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -595,6 +605,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -660,6 +672,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -778,6 +792,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -886,6 +902,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1040,6 +1058,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1172,6 +1192,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1281,6 +1303,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1398,6 +1422,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1457,7 +1483,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1488,6 +1514,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1606,6 +1634,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1714,6 +1744,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1773,7 +1805,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -1804,6 +1836,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -1912,6 +1946,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2045,6 +2081,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2199,6 +2237,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2316,6 +2356,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2375,7 +2417,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2406,6 +2448,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2514,6 +2558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2622,6 +2668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2681,7 +2729,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_STRESS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_STRESS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_STRESS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=stress-test-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant stress-test-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2712,6 +2760,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2735,6 +2785,80 @@ jobs: - REPEATED_ANT_TEST_COUNT: 500 - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true j11_utests_compression_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -2770,7 +2894,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -2801,6 +2925,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -2919,6 +3045,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3019,6 +3147,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -3114,6 +3243,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3199,6 +3330,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3223,6 +3356,97 @@ jobs: - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 25 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 j11_dtests_large_vnode_repeat: docker: - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest @@ -3332,6 +3556,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3417,6 +3643,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3476,7 +3704,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-system-keyspace-directory\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-system-keyspace-directory $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -3507,6 +3735,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3625,6 +3855,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3758,6 +3990,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3876,6 +4110,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -3976,6 +4212,7 @@ jobs: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -4071,6 +4308,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4177,6 +4416,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4294,6 +4535,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4353,7 +4596,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4384,6 +4627,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4501,6 +4746,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4572,6 +4819,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4631,7 +4880,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4662,6 +4911,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4721,7 +4972,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=true\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -4752,6 +5003,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4855,6 +5108,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4927,6 +5182,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -4986,7 +5243,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5017,6 +5274,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5130,6 +5389,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5248,6 +5509,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5320,6 +5583,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5379,7 +5644,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-cdc\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-cdc $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -5410,6 +5675,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5517,6 +5784,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5649,6 +5918,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5767,6 +6038,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -5885,6 +6158,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6018,6 +6293,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6076,7 +6353,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-latest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-latest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6107,6 +6384,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6192,6 +6471,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6325,6 +6606,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6384,7 +6667,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-compression\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-compression $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6415,6 +6698,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6486,6 +6771,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6570,6 +6857,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6678,6 +6967,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6737,7 +7028,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_SIMULATOR_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_SIMULATOR_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_SIMULATOR_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-simulator-dtest\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-simulator-dtest $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6768,6 +7059,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6827,7 +7120,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -6858,6 +7151,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -6967,6 +7262,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7031,6 +7328,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7139,6 +7438,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7257,6 +7558,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7365,6 +7668,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7473,6 +7778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7590,6 +7897,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7697,6 +8006,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7814,6 +8125,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7885,6 +8198,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -7991,6 +8306,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8050,7 +8367,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8081,6 +8398,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8166,6 +8485,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8230,6 +8551,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8315,6 +8638,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8424,6 +8749,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8541,6 +8868,173 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-17-openjdk-amd64 + j11_utests_sstableloader_repeat: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11-w-dependencies:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 25 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Log Environment Information + command: | + echo '*** id ***' + id + echo '*** cat /proc/cpuinfo ***' + cat /proc/cpuinfo + echo '*** free -m ***' + free -m + echo '*** df -m ***' + df -m + echo '*** ifconfig -a ***' + ifconfig -a + echo '*** uname -a ***' + uname -a + echo '*** mount ***' + mount + echo '*** env ***' + env + echo '*** java ***' + which java + java -version + - run: + name: Repeatedly run new or modifed JUnit tests + no_output_timeout: 15m + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_SSTABLELOADER_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_SSTABLELOADER_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_SSTABLELOADER} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=sstableloader-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant sstableloader-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + - store_test_results: + path: /tmp/results/repeated_utests/output + - store_artifacts: + path: /tmp/results/repeated_utests/stdout + destination: stdout + - store_artifacts: + path: /tmp/results/repeated_utests/output + destination: junitxml + - store_artifacts: + path: /tmp/results/repeated_utests/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + - REPEATED_UTESTS_LONG: null + - REPEATED_UTESTS_LONG_COUNT: 100 + - REPEATED_UTESTS_STRESS: null + - REPEATED_UTESTS_STRESS_COUNT: 500 + - REPEATED_SIMULATOR_DTESTS: null + - REPEATED_SIMULATOR_DTESTS_COUNT: 500 + - REPEATED_JVM_DTESTS: null + - REPEATED_JVM_DTESTS_COUNT: 500 + - REPEATED_JVM_UPGRADE_DTESTS: null + - REPEATED_JVM_UPGRADE_DTESTS_COUNT: 500 + - REPEATED_DTESTS: null + - REPEATED_DTESTS_COUNT: 500 + - REPEATED_LARGE_DTESTS: null + - REPEATED_LARGE_DTESTS_COUNT: 100 + - REPEATED_UPGRADE_DTESTS: null + - REPEATED_UPGRADE_DTESTS_COUNT: 25 + - REPEATED_ANT_TEST_TARGET: testsome + - REPEATED_ANT_TEST_CLASS: null + - REPEATED_ANT_TEST_METHODS: null + - REPEATED_ANT_TEST_VNODES: false + - REPEATED_ANT_TEST_COUNT: 500 + - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - JDK_HOME: /usr/lib/jvm/java-11-openjdk-amd64 + - CASSANDRA_USE_JDK11: true + j17_utests_sstableloader: + docker: + - image: apache/cassandra-testing-ubuntu2004-java11:latest + resource_class: medium + working_directory: ~/ + shell: /bin/bash -eo pipefail -l + parallelism: 1 + steps: + - attach_workspace: + at: /home/cassandra + - run: + name: Run Unit Tests (sstableloader-test) + command: | + export PATH=$JAVA_HOME/bin:$PATH + time mv ~/cassandra /tmp + cd /tmp/cassandra + if [ -d ~/dtest_jars ]; then + cp ~/dtest_jars/dtest* /tmp/cassandra/build/ + fi + ant sstableloader-test -Dno-build-test=true + no_output_timeout: 15m + - store_test_results: + path: /tmp/cassandra/build/test/output/ + - store_artifacts: + path: /tmp/cassandra/build/test/output + destination: junitxml + - store_artifacts: + path: /tmp/cassandra/build/test/logs + destination: logs + environment: + - ANT_HOME: /usr/share/ant + - LANG: en_US.UTF-8 + - KEEP_TEST_DIR: true + - DEFAULT_DIR: /home/cassandra/cassandra-dtest + - PYTHONIOENCODING: utf-8 + - PYTHONUNBUFFERED: true + - CASS_DRIVER_NO_EXTENSIONS: true + - CASS_DRIVER_NO_CYTHON: true + - CASSANDRA_SKIP_SYNC: true + - DTEST_REPO: https://github.com/apache/cassandra-dtest.git + - DTEST_BRANCH: trunk + - CCM_MAX_HEAP_SIZE: 1024M + - CCM_HEAP_NEWSIZE: 256M + - REPEATED_TESTS_STOP_ON_FAILURE: false + - REPEATED_UTESTS: null + - REPEATED_UTESTS_COUNT: 500 + - REPEATED_UTESTS_FQLTOOL: null + - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8599,7 +9093,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_FQLTOOL_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_FQLTOOL_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_FQLTOOL} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=fqltool-test\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant fqltool-test $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8630,6 +9124,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8714,6 +9210,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8772,7 +9270,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8803,6 +9301,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8875,6 +9375,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -8933,7 +9435,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-oa\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-oa $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -8964,6 +9466,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9048,6 +9552,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9107,7 +9613,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_JVM_DTESTS_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_JVM_DTESTS_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_JVM_DTESTS} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=test-jvm-dtest-some\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant test-jvm-dtest-some $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9138,6 +9644,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9270,6 +9778,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9328,7 +9838,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9359,6 +9869,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9417,7 +9929,7 @@ jobs: - run: name: Repeatedly run new or modifed JUnit tests no_output_timeout: 15m - command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" + command: "set -x\nexport PATH=$JAVA_HOME/bin:$PATH\ntime mv ~/cassandra /tmp\ncd /tmp/cassandra\nif [ -d ~/dtest_jars ]; then\n cp ~/dtest_jars/dtest* /tmp/cassandra/build/\nfi\n\n# Calculate the number of test iterations to be run by the current parallel runner.\ncount=$((${REPEATED_UTESTS_LONG_COUNT} / CIRCLE_NODE_TOTAL))\nif (($CIRCLE_NODE_INDEX < (${REPEATED_UTESTS_LONG_COUNT} % CIRCLE_NODE_TOTAL))); then\n count=$((count+1))\nfi\n\n# Put manually specified tests and automatically detected tests together, removing duplicates\ntests=$(echo ${REPEATED_UTESTS_LONG} | sed -e \"s///\" | sed -e \"s/ //\" | tr \",\" \"\\n\" | tr \" \" \"\\n\" | sort -n | uniq -u)\necho \"Tests to be repeated: ${tests}\"\n\n# Prepare the JVM dtests vnodes argument, which is optional.\nvnodes=false\nvnodes_args=\"\"\nif [ \"$vnodes\" = true ] ; then\n vnodes_args=\"-Dtest.jvm.args='-Dcassandra.dtest.num_tokens=16'\"\nfi\n\n# Prepare the testtag for the target, used by the test macro in build.xml to group the output files\ntarget=long-testsome\ntesttag=\"\"\nif [[ $target == \"test-cdc\" ]]; then\n testtag=\"cdc\"\nelif [[ $target == \"test-compression\" ]]; then\n testtag=\"compression\"\nelif [[ $target == \"test-system-keyspace-directory\" ]]; then\n testtag=\"system_keyspace_directory\"\nelif [[ $target == \"test-latest\" ]]; then\n testtag=\"latest\"\nelif [[ $target == \"test-oa\" ]]; then\n testtag=\"oa\"\nfi\n\n# Run each test class as many times as requested.\nexit_code=\"$?\"\nfor test in $tests; do\n\n # Split class and method names from the test name\n if [[ $test =~ \"#\" ]]; then\n class=${test%\"#\"*}\n method=${test#*\"#\"}\n else\n class=$test\n method=\"\"\n fi\n\n # Prepare the -Dtest.name argument.\n # It can be the fully qualified class name or the short class name, depending on the target.\n if [[ $target == \"test\" || \\\n $target == \"test-cdc\" || \\\n $target == \"test-compression\" || \\\n $target == \"test-latest\" || \\\n $target == \"test-oa\" || \\\n $target == \"test-system-keyspace-directory\" || \\\n $target == \"fqltool-test\" || \\\n $target == \"sstableloader-test\" || \\\n $target == \"long-test\" || \\\n $target == \"stress-test\" || \\\n $target == \"test-simulator-dtest\" ]]; then\n name_arg=\"-Dtest.name=${class##*.}\"\n else\n name_arg=\"-Dtest.name=$class\"\n fi\n\n # Prepare the -Dtest.methods argument, which is optional\n if [[ $method == \"\" ]]; then\n methods_arg=\"\"\n else\n methods_arg=\"-Dtest.methods=$method\"\n fi\n\n for i in $(seq -w 1 $count); do\n echo \"Running test $test, iteration $i of $count\"\n\n # run the test\n status=\"passes\"\n if !( set -o pipefail && \\\n ant long-testsome $name_arg $methods_arg $vnodes_args -Dno-build-test=true | \\\n tee stdout.txt \\\n ); then\n status=\"fails\"\n exit_code=1\n fi\n\n # move the stdout output file\n dest=/tmp/results/repeated_utests/stdout/${status}/${i}\n mkdir -p $dest\n mv stdout.txt $dest/${test}.txt\n\n # move the XML output files\n source=build/test/output/${testtag}\n dest=/tmp/results/repeated_utests/output/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" ]]; then\n mv $source/* $dest/\n fi\n\n # move the log files\n source=\"build/test/logs\"\n dest=/tmp/results/repeated_utests/logs/${status}/${i}\n mkdir -p $dest\n if [[ -d $source && -n \"$(ls $source)\" && -n \"$(ls ${source}/${testtag}*)\" ]]; then\n mv $source/${testtag}*/* $dest/\n fi\n \n # maybe stop iterations on test failure\n if [[ ${REPEATED_TESTS_STOP_ON_FAILURE} = true ]] && (( $exit_code > 0 )); then\n break\n fi\n done\ndone\n(exit ${exit_code})\n" - store_test_results: path: /tmp/results/repeated_utests/output - store_artifacts: @@ -9448,6 +9960,8 @@ jobs: - REPEATED_UTESTS_COUNT: 500 - REPEATED_UTESTS_FQLTOOL: null - REPEATED_UTESTS_FQLTOOL_COUNT: 500 + - REPEATED_UTESTS_SSTABLELOADER: null + - REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 - REPEATED_UTESTS_LONG: null - REPEATED_UTESTS_LONG_COUNT: 100 - REPEATED_UTESTS_STRESS: null @@ -9481,302 +9995,587 @@ workflows: - j11_build: requires: - start_j11_build + upstream: + start_j11_build: + - success - start_j11_unit_tests: type: approval - j11_unit_tests: requires: - start_j11_unit_tests - j11_build + upstream: + start_j11_unit_tests: + - success + j11_build: + - success - start_j11_jvm_dtests: type: approval - j11_jvm_dtests: requires: - start_j11_jvm_dtests - j11_build + upstream: + start_j11_jvm_dtests: + - success + j11_build: + - success - start_j11_jvm_dtests_latest_vnode: type: approval - j11_jvm_dtests_latest_vnode: requires: - start_j11_jvm_dtests_latest_vnode - j11_build + upstream: + start_j11_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j11_build + upstream: + start_j17_jvm_dtests: + - success + j11_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j11_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j11_build: + - success - start_j11_simulator_dtests: type: approval - j11_simulator_dtests: requires: - start_j11_simulator_dtests - j11_build + upstream: + start_j11_simulator_dtests: + - success + j11_build: + - success - start_j11_cqlshlib_tests: type: approval - j11_cqlshlib_tests: requires: - start_j11_cqlshlib_tests - j11_build + upstream: + start_j11_cqlshlib_tests: + - success + j11_build: + - success - start_j11_cqlshlib_cython_tests: type: approval - j11_cqlshlib_cython_tests: requires: - start_j11_cqlshlib_cython_tests - j11_build + upstream: + start_j11_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j11_build + upstream: + start_j17_cqlshlib_tests: + - success + j11_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j11_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j11_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j11_build + upstream: + start_j17_unit_tests: + - success + j11_build: + - success - start_j11_utests_oa: type: approval - j11_utests_oa: requires: - start_j11_utests_oa - j11_build + upstream: + start_j11_utests_oa: + - success + j11_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j11_build + upstream: + start_j17_utests_oa: + - success + j11_build: + - success - start_j11_utests_long: type: approval - j11_utests_long: requires: - start_j11_utests_long - j11_build + upstream: + start_j11_utests_long: + - success + j11_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j11_build + upstream: + start_j17_utests_long: + - success + j11_build: + - success - start_j11_utests_cdc: type: approval - j11_utests_cdc: requires: - start_j11_utests_cdc - j11_build + upstream: + start_j11_utests_cdc: + - success + j11_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j11_build + upstream: + start_j17_utests_cdc: + - success + j11_build: + - success - start_j11_utests_compression: type: approval - j11_utests_compression: requires: - start_j11_utests_compression - j11_build + upstream: + start_j11_utests_compression: + - success + j11_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j11_build + upstream: + start_j17_utests_compression: + - success + j11_build: + - success - start_j11_utests_latest: type: approval - j11_utests_latest: requires: - start_j11_utests_latest - j11_build + upstream: + start_j11_utests_latest: + - success + j11_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j11_build + upstream: + start_j17_utests_latest: + - success + j11_build: + - success - start_j11_utests_stress: type: approval - j11_utests_stress: requires: - start_j11_utests_stress - j11_build + upstream: + start_j11_utests_stress: + - success + j11_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j11_build + upstream: + start_j17_utests_stress: + - success + j11_build: + - success - start_j11_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_j11_utests_fqltool - j11_build + upstream: + start_j11_utests_fqltool: + - success + j11_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j11_build + upstream: + start_j17_utests_fqltool: + - success + j11_build: + - success + - start_j11_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_j11_utests_sstableloader + - j11_build + upstream: + start_j11_utests_sstableloader: + - success + j11_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j11_build + upstream: + start_j17_utests_sstableloader: + - success + j11_build: + - success - start_j11_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - start_j11_utests_system_keyspace_directory - j11_build + upstream: + start_j11_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j11_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j11_build: + - success - start_j11_dtest_jars_build: type: approval - j11_dtest_jars_build: requires: - j11_build - start_j11_dtest_jars_build + upstream: + j11_build: + - success + start_j11_dtest_jars_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_jvm_upgrade_dtests: requires: - start_jvm_upgrade_dtests - j11_dtest_jars_build + upstream: + start_jvm_upgrade_dtests: + - success + j11_dtest_jars_build: + - success - start_j11_dtests: type: approval - j11_dtests: requires: - start_j11_dtests - j11_build + upstream: + start_j11_dtests: + - success + j11_build: + - success - start_j11_dtests_vnode: type: approval - j11_dtests_vnode: requires: - start_j11_dtests_vnode - j11_build + upstream: + start_j11_dtests_vnode: + - success + j11_build: + - success - start_j11_dtests_latest: type: approval - j11_dtests_latest: requires: - start_j11_dtests_latest - j11_build + upstream: + start_j11_dtests_latest: + - success + j11_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j11_build + upstream: + start_j17_dtests: + - success + j11_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j11_build + upstream: + start_j17_dtests_vnode: + - success + j11_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j11_build + upstream: + start_j17_dtests_latest: + - success + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j11_dtests_large_vnode: type: approval - j11_dtests_large_vnode: requires: - start_j11_dtests_large_vnode - j11_build + upstream: + start_j11_dtests_large_vnode: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j11_build + upstream: + start_j17_dtests_large_vnode: + - success + j11_build: + - success - start_j11_cqlsh_tests: type: approval - j11_cqlsh_dtests_py38: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - start_j11_cqlsh_tests - j11_build + upstream: + start_j11_cqlsh_tests: + - success + j11_build: + - success - start_j11_cqlsh_tests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_tests_latest - j11_build + upstream: + start_j11_cqlsh_tests_latest: + - success + j11_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j11_build + upstream: + start_j17_cqlsh_tests: + - success + j11_build: + - success - start_j17_cqlsh_tests_latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh_tests_latest - j11_build + upstream: + start_j17_cqlsh_tests_latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - start_j11_upgrade_dtests - j11_build + upstream: + start_j11_upgrade_dtests: + - success + j11_build: + - success java11_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -9784,207 +10583,428 @@ workflows: - j11_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j11_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j11_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - j11_simulator_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlshlib_cython_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_unit_tests: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_oa: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_utests_long: type: approval - j11_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - j17_utests_long: requires: - start_utests_long - j11_build + upstream: + start_utests_long: + - success + j11_build: + - success - start_utests_cdc: type: approval - j11_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - j17_utests_cdc: requires: - start_utests_cdc - j11_build + upstream: + start_utests_cdc: + - success + j11_build: + - success - start_utests_compression: type: approval - j11_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - j17_utests_compression: requires: - start_utests_compression - j11_build + upstream: + start_utests_compression: + - success + j11_build: + - success - start_utests_stress: type: approval - j11_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - j17_utests_stress: requires: - start_utests_stress - j11_build + upstream: + start_utests_stress: + - success + j11_build: + - success - start_utests_fqltool: type: approval - j11_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success - j17_utests_fqltool: requires: - start_utests_fqltool - j11_build + upstream: + start_utests_fqltool: + - success + j11_build: + - success + - start_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + upstream: + start_utests_sstableloader: + - success + j11_build: + - success - start_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: requires: - j11_build + upstream: + j11_build: + - success - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j11_build + upstream: + start_utests_system_keyspace_directory: + - success + j11_build: + - success - start_jvm_upgrade_dtests: type: approval - j11_dtest_jars_build: requires: - j11_build - start_jvm_upgrade_dtests + upstream: + j11_build: + - success + start_jvm_upgrade_dtests: + - success - j11_jvm_upgrade_dtests: requires: - j11_dtest_jars_build + upstream: + j11_dtest_jars_build: + - success - j11_dtests: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_dtests_latest: requires: - j11_build + upstream: + j11_build: + - success - start_j11_dtests_large: type: approval - j11_dtests_large: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - j11_dtests_large_vnode: requires: - start_j11_dtests_large - j11_build + upstream: + start_j11_dtests_large: + - success + j11_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j11_build + upstream: + start_j17_dtests_large: + - success + j11_build: + - success - j11_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j11_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j11_cqlsh_dtests_latest: type: approval - j11_cqlsh_dtests_py38_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j11_cqlsh_dtests_py311_latest: requires: - start_j11_cqlsh_dtests_latest - j11_build + upstream: + start_j11_cqlsh_dtests_latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py38: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j11_build + upstream: + j11_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j11_build + upstream: + j11_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j11_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j11_build: + - success - start_j11_upgrade_dtests: type: approval - j11_upgrade_dtests: requires: - j11_build - start_j11_upgrade_dtests + upstream: + j11_build: + - success + start_j11_upgrade_dtests: + - success java17_separate_tests: jobs: - start_j17_build: @@ -9992,142 +11012,276 @@ workflows: - j17_build: requires: - start_j17_build + upstream: + start_j17_build: + - success - start_j17_unit_tests: type: approval - j17_unit_tests: requires: - start_j17_unit_tests - j17_build + upstream: + start_j17_unit_tests: + - success + j17_build: + - success - start_j17_jvm_dtests: type: approval - j17_jvm_dtests: requires: - start_j17_jvm_dtests - j17_build + upstream: + start_j17_jvm_dtests: + - success + j17_build: + - success - start_j17_jvm_dtests_latest_vnode: type: approval - j17_jvm_dtests_latest_vnode: requires: - start_j17_jvm_dtests_latest_vnode - j17_build + upstream: + start_j17_jvm_dtests_latest_vnode: + - success + j17_build: + - success - start_j17_cqlshlib_tests: type: approval - j17_cqlshlib_tests: requires: - start_j17_cqlshlib_tests - j17_build + upstream: + start_j17_cqlshlib_tests: + - success + j17_build: + - success - start_j17_cqlshlib_cython_tests: type: approval - j17_cqlshlib_cython_tests: requires: - start_j17_cqlshlib_cython_tests - j17_build + upstream: + start_j17_cqlshlib_cython_tests: + - success + j17_build: + - success - start_j17_dtests: type: approval - j17_dtests: requires: - start_j17_dtests - j17_build + upstream: + start_j17_dtests: + - success + j17_build: + - success - start_j17_dtests_vnode: type: approval - j17_dtests_vnode: requires: - start_j17_dtests_vnode - j17_build + upstream: + start_j17_dtests_vnode: + - success + j17_build: + - success - start_j17_dtests_latest: type: approval - j17_dtests_latest: requires: - start_j17_dtests_latest - j17_build + upstream: + start_j17_dtests_latest: + - success + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - start_j17_dtests_large_vnode: type: approval - j17_dtests_large_vnode: requires: - start_j17_dtests_large_vnode - j17_build + upstream: + start_j17_dtests_large_vnode: + - success + j17_build: + - success - start_j17_cqlsh_tests: type: approval - j17_cqlsh_dtests_py38: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - start_j17_cqlsh_tests - j17_build + upstream: + start_j17_cqlsh_tests: + - success + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_j17_utests_oa: type: approval - j17_utests_oa: requires: - start_j17_utests_oa - j17_build + upstream: + start_j17_utests_oa: + - success + j17_build: + - success - start_j17_utests_long: type: approval - j17_utests_long: requires: - start_j17_utests_long - j17_build + upstream: + start_j17_utests_long: + - success + j17_build: + - success - start_j17_utests_cdc: type: approval - j17_utests_cdc: requires: - start_j17_utests_cdc - j17_build + upstream: + start_j17_utests_cdc: + - success + j17_build: + - success - start_j17_utests_compression: type: approval - j17_utests_compression: requires: - start_j17_utests_compression - j17_build + upstream: + start_j17_utests_compression: + - success + j17_build: + - success - start_j17_utests_latest: type: approval - j17_utests_latest: requires: - start_j17_utests_latest - j17_build + upstream: + start_j17_utests_latest: + - success + j17_build: + - success - start_j17_utests_stress: type: approval - j17_utests_stress: requires: - start_j17_utests_stress - j17_build + upstream: + start_j17_utests_stress: + - success + j17_build: + - success - start_j17_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_j17_utests_fqltool - j17_build + upstream: + start_j17_utests_fqltool: + - success + j17_build: + - success + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j17_build + upstream: + start_j17_utests_sstableloader: + - success + j17_build: + - success - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_j17_utests_system_keyspace_directory - j17_build + upstream: + start_j17_utests_system_keyspace_directory: + - success + j17_build: + - success java17_pre-commit_tests: jobs: - start_pre-commit_tests: @@ -10135,101 +11289,207 @@ workflows: - j17_build: requires: - start_pre-commit_tests + upstream: + start_pre-commit_tests: + - success - j17_unit_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_oa: requires: - j17_build + upstream: + j17_build: + - success - j17_utests_latest: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_jvm_dtests_latest_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlshlib_cython_tests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_dtests_latest: requires: - j17_build + upstream: + j17_build: + - success - start_j17_dtests_large: type: approval - j17_dtests_large: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_dtests_large_vnode: requires: - start_j17_dtests_large - j17_build + upstream: + start_j17_dtests_large: + - success + j17_build: + - success - j17_cqlsh_dtests_py38: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py38_vnode: requires: - j17_build + upstream: + j17_build: + - success - j17_cqlsh_dtests_py311_vnode: requires: - j17_build + upstream: + j17_build: + - success - start_j17_cqlsh-dtests-latest: type: approval - j17_cqlsh_dtests_py38_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - j17_cqlsh_dtests_py311_latest: requires: - start_j17_cqlsh-dtests-latest - j17_build + upstream: + start_j17_cqlsh-dtests-latest: + - success + j17_build: + - success - start_utests_long: type: approval - j17_utests_long: requires: - start_utests_long - j17_build + upstream: + start_utests_long: + - success + j17_build: + - success - start_utests_cdc: type: approval - j17_utests_cdc: requires: - start_utests_cdc - j17_build + upstream: + start_utests_cdc: + - success + j17_build: + - success - start_utests_compression: type: approval - j17_utests_compression: requires: - start_utests_compression - j17_build + upstream: + start_utests_compression: + - success + j17_build: + - success - start_utests_stress: type: approval - j17_utests_stress: requires: - start_utests_stress - j17_build + upstream: + start_utests_stress: + - success + j17_build: + - success - start_utests_fqltool: type: approval - j17_utests_fqltool: requires: - start_utests_fqltool - j17_build + upstream: + start_utests_fqltool: + - success + j17_build: + - success + - start_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j17_build + upstream: + start_utests_sstableloader: + - success + j17_build: + - success - start_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: requires: - start_utests_system_keyspace_directory - j17_build + upstream: + start_utests_system_keyspace_directory: + - success + j17_build: + - success diff --git a/.circleci/config_template.yml b/.circleci/config_template.yml index d015b5e8728e..a3e09b1fa01f 100644 --- a/.circleci/config_template.yml +++ b/.circleci/config_template.yml @@ -63,6 +63,13 @@ default_env_vars: &default_env_vars # The number of times that new, modified or manually specified fqltool unit tests should be run. REPEATED_UTESTS_FQLTOOL_COUNT: 500 + # Comma-separated list of tests that should be included in the repeated run for sstableloader unit tests, + # in addition to automatically detected new and modified tests. For example: + # REPEATED_UTESTS_SSTABLELOADER: org.apache.cassandra.tools.LoaderOptionsTest + REPEATED_UTESTS_SSTABLELOADER: + # The number of times that new, modified or manually specified sstableloader unit tests should be run. + REPEATED_UTESTS_SSTABLELOADER_COUNT: 500 + # Comma-separated list of tests that should be included in the repeated run for long unit tests, # in addition to automatically detected new and modified tests. For example: # REPEATED_UTESTS_LONG: org.apache.cassandra.db.commitlog.CommitLogStressTest @@ -537,6 +544,30 @@ j11_separate_jobs: &j11_separate_jobs requires: - start_j17_utests_fqltool_repeat - j11_build + - start_j11_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_j11_utests_sstableloader + - j11_build + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j11_build + - start_j11_utests_sstableloader_repeat: + type: approval + - j11_utests_sstableloader_repeat: + requires: + - start_j11_utests_sstableloader_repeat + - j11_build + - start_j17_utests_sstableloader_repeat: + type: approval + - j17_utests_sstableloader_repeat: + requires: + - start_j17_utests_sstableloader_repeat + - j11_build - start_j11_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: @@ -974,6 +1005,24 @@ j11_pre-commit_jobs: &j11_pre-commit_jobs requires: - start_utests_fqltool - j11_build + - start_utests_sstableloader: + type: approval + - j11_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j11_build + - j11_utests_sstableloader_repeat: + requires: + - start_utests_sstableloader + - j11_build + - j17_utests_sstableloader_repeat: + requires: + - start_utests_sstableloader + - j11_build - start_utests_system_keyspace_directory: type: approval - j11_utests_system_keyspace_directory: @@ -1356,6 +1405,18 @@ j17_separate_jobs: &j17_separate_jobs requires: - start_j17_utests_fqltool_repeat - j17_build + - start_j17_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_j17_utests_sstableloader + - j17_build + - start_j17_utests_sstableloader_repeat: + type: approval + - j17_utests_sstableloader_repeat: + requires: + - start_j17_utests_sstableloader_repeat + - j17_build - start_j17_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: @@ -1541,6 +1602,16 @@ j17_pre-commit_jobs: &j17_pre-commit_jobs requires: - start_utests_fqltool - j17_build + - start_utests_sstableloader: + type: approval + - j17_utests_sstableloader: + requires: + - start_utests_sstableloader + - j17_build + - j17_utests_sstableloader_repeat: + requires: + - start_utests_sstableloader + - j17_build - start_utests_system_keyspace_directory: type: approval - j17_utests_system_keyspace_directory: @@ -1875,6 +1946,22 @@ jobs: - run_junit_tests: target: fqltool-test + j11_utests_sstableloader: + <<: *j11_seq_executor + steps: + - attach_workspace: + at: /home/cassandra + - run_junit_tests: + target: sstableloader-test + + j17_utests_sstableloader: + <<: *j17_seq_executor + steps: + - attach_workspace: + at: /home/cassandra + - run_junit_tests: + target: sstableloader-test + j11_utests_system_keyspace_directory: <<: *j11_par_executor steps: @@ -2380,6 +2467,22 @@ jobs: - log_environment - run_utests_fqltool_repeat + j11_utests_sstableloader_repeat: + <<: *j11_repeated_utest_executor + steps: + - attach_workspace: + at: /home/cassandra + - log_environment + - run_utests_sstableloader_repeat + + j17_utests_sstableloader_repeat: + <<: *j17_repeated_utest_executor + steps: + - attach_workspace: + at: /home/cassandra + - log_environment + - run_utests_sstableloader_repeat + j11_utests_long_repeat: <<: *j11_repeated_utest_executor steps: @@ -3114,6 +3217,14 @@ commands: count: ${REPEATED_UTESTS_FQLTOOL_COUNT} stop_on_failure: ${REPEATED_TESTS_STOP_ON_FAILURE} + run_utests_sstableloader_repeat: + steps: + - run_repeated_utests: + target: sstableloader-test + tests: ${REPEATED_UTESTS_SSTABLELOADER} + count: ${REPEATED_UTESTS_SSTABLELOADER_COUNT} + stop_on_failure: ${REPEATED_TESTS_STOP_ON_FAILURE} + run_utests_stress_repeat: steps: - run_repeated_utests: @@ -3237,6 +3348,7 @@ commands: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then @@ -3365,6 +3477,7 @@ commands: $target == "test-oa" || \ $target == "test-system-keyspace-directory" || \ $target == "fqltool-test" || \ + $target == "sstableloader-test" || \ $target == "long-test" || \ $target == "stress-test" || \ $target == "test-simulator-dtest" ]]; then diff --git a/.circleci/generate.sh b/.circleci/generate.sh index 97a433e2d77c..26d68dc2a3b2 100755 --- a/.circleci/generate.sh +++ b/.circleci/generate.sh @@ -51,6 +51,8 @@ print_help() echo " -e REPEATED_UTESTS_COUNT=500" echo " -e REPEATED_UTESTS_FQLTOOL=org.apache.cassandra.fqltool.FQLCompareTest" echo " -e REPEATED_UTESTS_FQLTOOL_COUNT=500" + echo " -e REPEATED_UTESTS_SSTABLELOADER=org.apache.cassandra.tools.LoaderOptionsTest" + echo " -e REPEATED_UTESTS_SSTABLELOADER_COUNT=500" echo " -e REPEATED_UTESTS_LONG=org.apache.cassandra.db.commitlog.CommitLogStressTest" echo " -e REPEATED_UTESTS_LONG_COUNT=100" echo " -e REPEATED_UTESTS_STRESS=org.apache.cassandra.stress.generate.DistributionGaussianTest" @@ -131,6 +133,8 @@ if $has_env_vars && $check_env_vars; then [ "$key" != "REPEATED_UTESTS_COUNT" ] && [ "$key" != "REPEATED_UTESTS_FQLTOOL" ] && [ "$key" != "REPEATED_UTESTS_FQLTOOL_COUNT" ] && + [ "$key" != "REPEATED_UTESTS_SSTABLELOADER" ] && + [ "$key" != "REPEATED_UTESTS_SSTABLELOADER_COUNT" ] && [ "$key" != "REPEATED_UTESTS_LONG" ] && [ "$key" != "REPEATED_UTESTS_LONG_COUNT" ] && [ "$key" != "REPEATED_UTESTS_STRESS" ] && @@ -171,7 +175,7 @@ if $free; then elif $paid; then ($all || $free) && die "Cannot use option -p with options -a or -f" echo "Generating new config.yml file for paid tier from config_template.yml" - patch -o $BASEDIR/config_template.yml.PAID $BASEDIR/config_template.yml $BASEDIR/config_template.yml.PAID.patch + patch --silent -o $BASEDIR/config_template.yml.PAID $BASEDIR/config_template.yml $BASEDIR/config_template.yml.PAID.patch circleci config process $BASEDIR/config_template.yml.PAID > $BASEDIR/config.yml.PAID.tmp cat $BASEDIR/license.yml $BASEDIR/config.yml.PAID.tmp > $BASEDIR/config.yml rm $BASEDIR/config_template.yml.PAID $BASEDIR/config.yml.PAID.tmp @@ -188,7 +192,7 @@ elif $all; then rm $BASEDIR/config.yml.FREE.tmp # setup config for paid tier - patch -o $BASEDIR/config_template.yml.PAID $BASEDIR/config_template.yml $BASEDIR/config_template.yml.PAID.patch + patch --silent -o $BASEDIR/config_template.yml.PAID $BASEDIR/config_template.yml $BASEDIR/config_template.yml.PAID.patch circleci config process $BASEDIR/config_template.yml.PAID > $BASEDIR/config.yml.PAID.tmp cat $BASEDIR/license.yml $BASEDIR/config.yml.PAID.tmp > $BASEDIR/config.yml.PAID rm $BASEDIR/config_template.yml.PAID $BASEDIR/config.yml.PAID.tmp @@ -241,6 +245,7 @@ if $detect_changed_tests; then add_diff_tests "REPEATED_UTESTS_LONG" "test/long/" "org.apache.cassandra" add_diff_tests "REPEATED_UTESTS_STRESS" "tools/stress/test/unit/" "org.apache.cassandra.stress" add_diff_tests "REPEATED_UTESTS_FQLTOOL" "tools/fqltool/test/unit/" "org.apache.cassandra.fqltool" + add_diff_tests "REPEATED_UTESTS_SSTABLELOADER" "tools/sstableloader/test/unit/" "org.apache.cassandra.tools" add_diff_tests "REPEATED_SIMULATOR_DTESTS" "test/simulator/test/" "org.apache.cassandra.simulator.test" add_diff_tests "REPEATED_JVM_DTESTS" "test/distributed/" "org.apache.cassandra.distributed.test" add_diff_tests "REPEATED_JVM_UPGRADE_DTESTS" "test/distributed/" "org.apache.cassandra.distributed.upgrade" @@ -305,6 +310,10 @@ delete_repeated_jobs() delete_job "$1" "j11_utests_fqltool_repeat" delete_job "$1" "j17_utests_fqltool_repeat" fi + if (! (echo "$env_vars" | grep -q "REPEATED_UTESTS_SSTABLELOADER=")); then + delete_job "$1" "j11_utests_sstableloader_repeat" + delete_job "$1" "j17_utests_sstableloader_repeat" + fi if (! (echo "$env_vars" | grep -q "REPEATED_SIMULATOR_DTESTS=")); then delete_job "$1" "j11_simulator_dtests_repeat" fi @@ -386,6 +395,7 @@ build_dev_min_jobs() delete_job "$1" "j11_utests_cdc" delete_job "$1" "j11_utests_compression" delete_job "$1" "j11_utests_fqltool" + delete_job "$1" "j11_utests_sstableloader" delete_job "$1" "j11_utests_long" delete_job "$1" "j11_utests_stress" delete_job "$1" "j11_utests_system_keyspace_directory" @@ -394,6 +404,7 @@ build_dev_min_jobs() delete_job "$1" "j17_utests_cdc" delete_job "$1" "j17_utests_compression" delete_job "$1" "j17_utests_fqltool" + delete_job "$1" "j17_utests_sstableloader" delete_job "$1" "j17_utests_long" delete_job "$1" "j17_utests_stress" delete_job "$1" "j11_utests_latest" @@ -403,6 +414,7 @@ build_dev_min_jobs() delete_job "$1" "start_utests_stress" delete_job "$1" "start_utests_long" delete_job "$1" "start_utests_fqltool" + delete_job "$1" "start_utests_sstableloader" delete_job "$1" "start_utests_compression" delete_job "$1" "start_utests_cdc" delete_job "$1" "start_j17_cqlsh-dtests-latest" diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index fd32596c646b..60e1f093ccc2 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -127,9 +127,9 @@ def pipelineProfiles() { return [ 'packaging': ['artifacts', 'lint', 'debian', 'redhat'], 'skinny': ['lint', 'cqlsh-test', 'test', 'jvm-dtest', 'simulator-dtest', 'dtest'], - 'pre-commit': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'cqlsh-test', 'test', 'test-latest', 'stress-test', 'test-burn', 'jvm-dtest', 'simulator-dtest', 'dtest', 'dtest-latest'], - 'pre-commit w/ upgrades': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'cqlsh-test', 'test', 'test-latest', 'stress-test', 'test-burn', 'jvm-dtest', 'jvm-dtest-upgrade', 'simulator-dtest', 'dtest', 'dtest-novnode', 'dtest-latest', 'dtest-upgrade'], - 'post-commit': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'cqlsh-test', 'test-cdc', 'test', 'test-latest', 'test-compression', 'stress-test', 'test-burn', 'long-test', 'test-oa', 'test-system-keyspace-directory', 'jvm-dtest', 'jvm-dtest-upgrade', 'simulator-dtest', 'dtest', 'dtest-novnode', 'dtest-latest', 'dtest-large', 'dtest-large-novnode', 'dtest-large-latest', 'dtest-upgrade', 'dtest-upgrade-novnode', 'dtest-upgrade-large', 'dtest-upgrade-novnode-large'], + 'pre-commit': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'sstableloader-test', 'cqlsh-test', 'test', 'test-latest', 'stress-test', 'test-burn', 'jvm-dtest', 'simulator-dtest', 'dtest', 'dtest-latest'], + 'pre-commit w/ upgrades': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'sstableloader-test', 'cqlsh-test', 'test', 'test-latest', 'stress-test', 'test-burn', 'jvm-dtest', 'jvm-dtest-upgrade', 'simulator-dtest', 'dtest', 'dtest-novnode', 'dtest-latest', 'dtest-upgrade'], + 'post-commit': ['artifacts', 'lint', 'debian', 'redhat', 'fqltool-test', 'sstableloader-test', 'cqlsh-test', 'test-cdc', 'test', 'test-latest', 'test-compression', 'stress-test', 'test-burn', 'long-test', 'test-oa', 'test-system-keyspace-directory', 'jvm-dtest', 'jvm-dtest-upgrade', 'simulator-dtest', 'dtest', 'dtest-novnode', 'dtest-latest', 'dtest-large', 'dtest-large-novnode', 'dtest-large-latest', 'dtest-upgrade', 'dtest-upgrade-novnode', 'dtest-upgrade-large', 'dtest-upgrade-novnode-large'], 'custom': [] ] } @@ -164,6 +164,7 @@ def tasks() { // (some buffer on the heaviest split under the 1h max is required, ref `timeout(…)` in `test(…)`) 'cqlsh-test': [splits: 1], 'fqltool-test': [splits: 1, size: 'small'], + 'sstableloader-test': [splits: 1, size: 'small'], 'test-cdc': [splits: 20], 'test': [splits: 20], 'test-latest': [splits: 20], diff --git a/CHANGES.txt b/CHANGES.txt index 4ae4657a9475..abbf17c276fd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Migrate sstableloader code to its own tools directory and artifact (CASSANDRA-20328) * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623) * Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair (CASSANDRA-20490) * When a custom disk error handler fails to initiate, fail the startup of a node instead of using the no-op handler (CASSANDRA-20614) diff --git a/bin/sstableloader b/bin/sstableloader index 9045adfda392..74cc041538e0 100755 --- a/bin/sstableloader +++ b/bin/sstableloader @@ -32,18 +32,16 @@ elif [ -r "$CASSANDRA_INCLUDE" ]; then . "$CASSANDRA_INCLUDE" fi -if [ -z "$CLASSPATH" ]; then - echo "You must set the CLASSPATH var" >&2 - exit 1 -fi +# SSTableLoader has been moved to tools/bin, this script simply +# invokes the script in the new path. +SSTABLELOADER_PATH="$CASSANDRA_HOME/tools/bin/sstableloader" -if [ "x$MAX_HEAP_SIZE" = "x" ]; then - MAX_HEAP_SIZE="256M" +if [ ! -f "$SSTABLELOADER_PATH" ]; then + echo "Error: sstableloader has moved to the tools directory. \ +Detected that $SSTABLELOADER_PATH does not exist." >&2 + exit 2 fi -"$JAVA" $JAVA_AGENT -ea -cp "$CLASSPATH" $JVM_OPTS -Xmx$MAX_HEAP_SIZE \ - -Dcassandra.storagedir="$cassandra_storagedir" \ - -Dlogback.configurationFile=logback-tools.xml \ - org.apache.cassandra.tools.BulkLoader "$@" +"$SSTABLELOADER_PATH" "$@" # vi:ai sw=4 ts=4 tw=0 et diff --git a/build.xml b/build.xml index ef1fdd661dd5..9fbbc815d028 100644 --- a/build.xml +++ b/build.xml @@ -385,26 +385,38 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - @@ -1082,7 +1083,7 @@ - @@ -1101,6 +1102,7 @@ + @@ -1129,7 +1131,7 @@ - + @@ -1141,9 +1143,11 @@ + + @@ -1270,10 +1274,12 @@ + + @@ -2079,8 +2085,10 @@ + + @@ -2140,7 +2148,7 @@ + + + + + @@ -2185,4 +2203,5 @@ + diff --git a/debian/cassandra.install b/debian/cassandra.install index 7ee058bb593e..0573128ddd46 100644 --- a/debian/cassandra.install +++ b/debian/cassandra.install @@ -16,7 +16,6 @@ bin/cassandra.in.sh usr/share/cassandra bin/cassandra usr/sbin bin/nodetool usr/bin bin/sstableutil usr/bin -bin/sstableloader usr/bin bin/cqlsh usr/bin bin/cqlsh.py usr/bin bin/sstablescrub usr/bin @@ -28,6 +27,7 @@ tools/bin/auditlogviewer usr/bin tools/bin/jmxtool usr/bin tools/bin/hash_password usr/bin tools/bin/sstablepartitions usr/bin +tools/bin/sstableloader usr/bin lib/*.jar usr/share/cassandra/lib lib/*.zip usr/share/cassandra/lib lib/x86_64/* usr/share/cassandra/lib/x86_64 diff --git a/debian/rules b/debian/rules index b3de486117c1..70305b41e981 100755 --- a/debian/rules +++ b/debian/rules @@ -67,6 +67,10 @@ install: build dh_install $(BUILD_DIR)/tools/lib/fqltool.jar \ usr/share/cassandra + # Copy sstableloader jars + dh_install $(BUILD_DIR)/tools/lib/sstableloader.jar \ + usr/share/cassandra + dh_link usr/share/cassandra/apache-cassandra-$(VERSION).jar \ usr/share/cassandra/apache-cassandra.jar diff --git a/ide/idea-iml-file.xml b/ide/idea-iml-file.xml index 1d189db8d6bc..4daf6af09613 100644 --- a/ide/idea-iml-file.xml +++ b/ide/idea-iml-file.xml @@ -30,6 +30,8 @@ + + diff --git a/ide/idea/workspace.xml b/ide/idea/workspace.xml index 13018f4052d2..5eb1a70b78b3 100644 --- a/ide/idea/workspace.xml +++ b/ide/idea/workspace.xml @@ -360,6 +360,7 @@ + diff --git a/ide/nbproject/ide-actions.xml b/ide/nbproject/ide-actions.xml index a53cd960712a..b7b98604372b 100644 --- a/ide/nbproject/ide-actions.xml +++ b/ide/nbproject/ide-actions.xml @@ -5,6 +5,7 @@ + diff --git a/ide/nbproject/project.xml b/ide/nbproject/project.xml index f770d1e68dd5..cce2bfdadc75 100644 --- a/ide/nbproject/project.xml +++ b/ide/nbproject/project.xml @@ -35,6 +35,12 @@ ${project.dir}/tools/fqltool/src UTF-8 + + + java + ${project.dir}/tools/sstableloader/src + UTF-8 + java @@ -53,6 +59,12 @@ ${project.dir}/tools/fqltool/test/unit UTF-8 + + + java + ${project.dir}/tools/sstableloader/test/unit + UTF-8 + java @@ -164,6 +176,12 @@ fqltool-build + + folder + ${project.dir}/build/classes/sstableloader + + sstableloader-build + folder ${project.dir}/build/classes/stress @@ -194,6 +212,12 @@ fqltool-build-test + + folder + ${project.dir}/build/test/sstableloader-classes + + sstableloader-build-test + folder ${project.dir}/build/test/stress-classes @@ -214,6 +238,10 @@ ${project.dir}/tools/fqltool/src + + + ${project.dir}/tools/sstableloader/src + ${project.dir}/tools/stress/src @@ -226,6 +254,10 @@ ${project.dir}/tools/fqltool/test/unit + + + ${project.dir}/tools/sstableloader/test/unit + ${project.dir}/tools/stress/test/unit @@ -295,6 +327,11 @@ ${cassandra.classpath.jars}:${project.dir}/build/classes/main ${project.dir}/build/classes/fqltool + + ${project.dir}/tools/sstableloader/src + ${cassandra.classpath.jars}:${project.dir}/build/classes/main + ${project.dir}/build/classes/sstableloader + ${project.dir}/tools/stress/src ${cassandra.classpath.jars}:${project.dir}/build/classes/main @@ -313,7 +350,7 @@ ${project.dir}/test/simulator/test ${project.dir}/test/harry/main - ${cassandra.classpath.jars}:${project.dir}/build/classes/main:${project.dir}/build/classes/fqltool/:${project.dir}/build/classes/stress/ + ${cassandra.classpath.jars}:${project.dir}/build/classes/main:${project.dir}/build/classes/fqltool/:${project.dir}/build/classes/sstableloader:${project.dir}/build/classes/stress/ ${project.dir}/build/test/classes @@ -322,6 +359,12 @@ ${cassandra.classpath.jars}:${project.dir}/build/classes/main:${project.dir}/build/classes/fqltool/ ${project.dir}/build/test/fqltool-classes + + ${project.dir}/tools/sstableloader/test/unit + + ${cassandra.classpath.jars}:${project.dir}/build/classes/main:${project.dir}/build/classes/sstableloader/ + ${project.dir}/build/test/sstableloader-classes + ${project.dir}/tools/stress/test/unit diff --git a/redhat/cassandra.spec b/redhat/cassandra.spec index 90251725ccb9..c151ebaa12b1 100644 --- a/redhat/cassandra.spec +++ b/redhat/cassandra.spec @@ -120,6 +120,9 @@ cp -p %{_get_dist_dir}/tools/lib/stress.jar %{buildroot}/usr/share/%{username}/ # copy fqltool jar cp -p %{_get_dist_dir}/tools/lib/fqltool.jar %{buildroot}/usr/share/%{username}/ +# copy sstableloader jar +cp -p %{_get_dist_dir}/tools/lib/sstableloader.jar %{buildroot}/usr/share/%{username}/ + # copy binaries mv bin/cassandra %{buildroot}/usr/sbin/ cp -p bin/* %{buildroot}/usr/bin/ diff --git a/src/java/org/apache/cassandra/tools/CmdLineOptions.java b/src/java/org/apache/cassandra/tools/CmdLineOptions.java new file mode 100644 index 000000000000..504ac1aaf6ce --- /dev/null +++ b/src/java/org/apache/cassandra/tools/CmdLineOptions.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.tools; + +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class CmdLineOptions extends Options +{ + /** + * Add option with argument and argument name + * + * @param opt shortcut for option name + * @param longOpt complete option name + * @param argName argument name + * @param description description of the option + * @return updated Options object + */ + public Options addOption(String opt, String longOpt, String argName, String description) + { + Option option = new Option(opt, longOpt, true, description); + option.setArgName(argName); + + return addOption(option); + } + + /** + * Add option with argument and argument name that accepts being defined multiple times as a list + * + * @param opt shortcut for option name + * @param longOpt complete option name + * @param argName argument name + * @param description description of the option + * @return updated Options object + */ + public Options addOptionList(String opt, String longOpt, String argName, String description) + { + Option option = new Option(opt, longOpt, true, description); + option.setArgName(argName); + option.setArgs(Option.UNLIMITED_VALUES); + + return addOption(option); + } + + /** + * Add option without argument + * + * @param opt shortcut for option name + * @param longOpt complete option name + * @param description description of the option + * @return updated Options object + */ + public Options addOption(String opt, String longOpt, String description) + { + return addOption(new Option(opt, longOpt, false, description)); + } +} diff --git a/src/java/org/apache/cassandra/tools/StandaloneSSTableUtil.java b/src/java/org/apache/cassandra/tools/StandaloneSSTableUtil.java index 1dd0ba53d6cc..badea964c529 100644 --- a/src/java/org/apache/cassandra/tools/StandaloneSSTableUtil.java +++ b/src/java/org/apache/cassandra/tools/StandaloneSSTableUtil.java @@ -30,7 +30,6 @@ import java.util.function.BiPredicate; import org.apache.cassandra.io.util.File; -import static org.apache.cassandra.tools.BulkLoader.CmdLineOptions; public class StandaloneSSTableUtil { diff --git a/src/java/org/apache/cassandra/tools/StandaloneScrubber.java b/src/java/org/apache/cassandra/tools/StandaloneScrubber.java index fe12e6d723c9..93275ad28c45 100644 --- a/src/java/org/apache/cassandra/tools/StandaloneScrubber.java +++ b/src/java/org/apache/cassandra/tools/StandaloneScrubber.java @@ -51,7 +51,6 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tools.BulkLoader.CmdLineOptions; import org.apache.cassandra.utils.JVMStabilityInspector; import org.apache.cassandra.utils.OutputHandler; import org.apache.cassandra.utils.Pair; diff --git a/src/java/org/apache/cassandra/tools/StandaloneSplitter.java b/src/java/org/apache/cassandra/tools/StandaloneSplitter.java index db9519041520..965186ce7eec 100644 --- a/src/java/org/apache/cassandra/tools/StandaloneSplitter.java +++ b/src/java/org/apache/cassandra/tools/StandaloneSplitter.java @@ -50,7 +50,7 @@ import org.apache.cassandra.utils.JVMStabilityInspector; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_UTIL_ALLOW_TOOL_REINIT_FOR_TEST; -import static org.apache.cassandra.tools.BulkLoader.CmdLineOptions; + import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; public class StandaloneSplitter diff --git a/src/java/org/apache/cassandra/tools/StandaloneUpgrader.java b/src/java/org/apache/cassandra/tools/StandaloneUpgrader.java index 52e9f8c2955a..069fdbe8451f 100644 --- a/src/java/org/apache/cassandra/tools/StandaloneUpgrader.java +++ b/src/java/org/apache/cassandra/tools/StandaloneUpgrader.java @@ -46,7 +46,6 @@ import org.apache.cassandra.utils.OutputHandler; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_UTIL_ALLOW_TOOL_REINIT_FOR_TEST; -import static org.apache.cassandra.tools.BulkLoader.CmdLineOptions; public class StandaloneUpgrader { diff --git a/src/java/org/apache/cassandra/tools/StandaloneVerifier.java b/src/java/org/apache/cassandra/tools/StandaloneVerifier.java index 241fe2d43211..f00c3168f346 100644 --- a/src/java/org/apache/cassandra/tools/StandaloneVerifier.java +++ b/src/java/org/apache/cassandra/tools/StandaloneVerifier.java @@ -53,7 +53,6 @@ import org.apache.cassandra.utils.Throwables; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_UTIL_ALLOW_TOOL_REINIT_FOR_TEST; -import static org.apache.cassandra.tools.BulkLoader.CmdLineOptions; public class StandaloneVerifier { diff --git a/tools/bin/cassandra.in.sh b/tools/bin/cassandra.in.sh index 056bedc71504..79bbfcd34a64 100644 --- a/tools/bin/cassandra.in.sh +++ b/tools/bin/cassandra.in.sh @@ -39,7 +39,7 @@ if [ -d $CASSANDRA_HOME/build ] ; then if [ "$jars_cnt" = "1" ]; then cassandra_bin="`ls -1 $CASSANDRA_HOME/build/apache-cassandra*.jar | grep -v javadoc | grep -v sources`" - cassandra_bin="$cassandra_bin:$CASSANDRA_HOME/build/classes/stress:$CASSANDRA_HOME/build/classes/fqltool" + cassandra_bin="$cassandra_bin:$CASSANDRA_HOME/build/classes/stress:$CASSANDRA_HOME/build/classes/fqltool:$CASSANDRA_HOME/build/classes/sstableloader" CLASSPATH="$CLASSPATH:$cassandra_bin" fi fi diff --git a/tools/bin/sstableloader b/tools/bin/sstableloader new file mode 100755 index 000000000000..9045adfda392 --- /dev/null +++ b/tools/bin/sstableloader @@ -0,0 +1,49 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ "x$CASSANDRA_INCLUDE" = "x" ]; then + # Locations (in order) to use when searching for an include file. + for include in "`dirname "$0"`/cassandra.in.sh" \ + "$HOME/.cassandra.in.sh" \ + /usr/share/cassandra/cassandra.in.sh \ + /usr/local/share/cassandra/cassandra.in.sh \ + /opt/cassandra/cassandra.in.sh; do + if [ -r "$include" ]; then + . "$include" + break + fi + done +elif [ -r "$CASSANDRA_INCLUDE" ]; then + . "$CASSANDRA_INCLUDE" +fi + +if [ -z "$CLASSPATH" ]; then + echo "You must set the CLASSPATH var" >&2 + exit 1 +fi + +if [ "x$MAX_HEAP_SIZE" = "x" ]; then + MAX_HEAP_SIZE="256M" +fi + +"$JAVA" $JAVA_AGENT -ea -cp "$CLASSPATH" $JVM_OPTS -Xmx$MAX_HEAP_SIZE \ + -Dcassandra.storagedir="$cassandra_storagedir" \ + -Dlogback.configurationFile=logback-tools.xml \ + org.apache.cassandra.tools.BulkLoader "$@" + +# vi:ai sw=4 ts=4 tw=0 et diff --git a/tools/sstableloader/build.xml b/tools/sstableloader/build.xml new file mode 100644 index 000000000000..401ba3aea1a8 --- /dev/null +++ b/tools/sstableloader/build.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/java/org/apache/cassandra/tools/BulkLoadConnectionFactory.java b/tools/sstableloader/src/org/apache/cassandra/tools/BulkLoadConnectionFactory.java similarity index 100% rename from src/java/org/apache/cassandra/tools/BulkLoadConnectionFactory.java rename to tools/sstableloader/src/org/apache/cassandra/tools/BulkLoadConnectionFactory.java diff --git a/src/java/org/apache/cassandra/tools/BulkLoadException.java b/tools/sstableloader/src/org/apache/cassandra/tools/BulkLoadException.java similarity index 100% rename from src/java/org/apache/cassandra/tools/BulkLoadException.java rename to tools/sstableloader/src/org/apache/cassandra/tools/BulkLoadException.java diff --git a/src/java/org/apache/cassandra/tools/BulkLoader.java b/tools/sstableloader/src/org/apache/cassandra/tools/BulkLoader.java similarity index 87% rename from src/java/org/apache/cassandra/tools/BulkLoader.java rename to tools/sstableloader/src/org/apache/cassandra/tools/BulkLoader.java index 92ccbb96be7c..a3a4dda35046 100644 --- a/src/java/org/apache/cassandra/tools/BulkLoader.java +++ b/tools/sstableloader/src/org/apache/cassandra/tools/BulkLoader.java @@ -25,8 +25,6 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; import com.datastax.driver.core.AuthProvider; import com.datastax.driver.core.RemoteEndpointAwareJdkSSLOptions; @@ -313,52 +311,4 @@ public StreamingChannel.Factory getConnectionFactory() return new BulkLoadConnectionFactory(serverEncOptions, storagePort); } } - - public static class CmdLineOptions extends Options - { - /** - * Add option with argument and argument name - * @param opt shortcut for option name - * @param longOpt complete option name - * @param argName argument name - * @param description description of the option - * @return updated Options object - */ - public Options addOption(String opt, String longOpt, String argName, String description) - { - Option option = new Option(opt, longOpt, true, description); - option.setArgName(argName); - - return addOption(option); - } - - /** - * Add option with argument and argument name that accepts being defined multiple times as a list - * @param opt shortcut for option name - * @param longOpt complete option name - * @param argName argument name - * @param description description of the option - * @return updated Options object - */ - public Options addOptionList(String opt, String longOpt, String argName, String description) - { - Option option = new Option(opt, longOpt, true, description); - option.setArgName(argName); - option.setArgs(Option.UNLIMITED_VALUES); - - return addOption(option); - } - - /** - * Add option without argument - * @param opt shortcut for option name - * @param longOpt complete option name - * @param description description of the option - * @return updated Options object - */ - public Options addOption(String opt, String longOpt, String description) - { - return addOption(new Option(opt, longOpt, false, description)); - } - } } diff --git a/src/java/org/apache/cassandra/tools/LoaderOptions.java b/tools/sstableloader/src/org/apache/cassandra/tools/LoaderOptions.java similarity index 99% rename from src/java/org/apache/cassandra/tools/LoaderOptions.java rename to tools/sstableloader/src/org/apache/cassandra/tools/LoaderOptions.java index a68fb012eb0b..74940e0df6de 100644 --- a/src/java/org/apache/cassandra/tools/LoaderOptions.java +++ b/tools/sstableloader/src/org/apache/cassandra/tools/LoaderOptions.java @@ -49,7 +49,6 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.File; import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.tools.BulkLoader.CmdLineOptions; import static org.apache.cassandra.config.DataRateSpec.DataRateUnit.MEBIBYTES_PER_SECOND; import static org.apache.cassandra.config.EncryptionOptions.ClientEncryptionOptions.ClientAuth.REQUIRED; @@ -77,9 +76,8 @@ public class LoaderOptions /** * Throttle defined in megabits per second. CASSANDRA-10637 introduced a builder and is the preferred way to * provide options instead of using these constant fields. - * @deprecated Use {@code throttle-mib} instead + * @deprecated Use {@code throttle-mib} instead. See CASSANDRA-17677 */ - /** @deprecated See CASSANDRA-17677 */ @Deprecated(since = "5.0") public static final String THROTTLE_MBITS = "throttle"; public static final String THROTTLE_MEBIBYTES = "throttle-mib"; diff --git a/src/java/org/apache/cassandra/utils/NativeSSTableLoaderClient.java b/tools/sstableloader/src/org/apache/cassandra/utils/NativeSSTableLoaderClient.java similarity index 100% rename from src/java/org/apache/cassandra/utils/NativeSSTableLoaderClient.java rename to tools/sstableloader/src/org/apache/cassandra/utils/NativeSSTableLoaderClient.java diff --git a/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java b/tools/sstableloader/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java similarity index 89% rename from test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java rename to tools/sstableloader/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java index b203af1b4357..04e1380164eb 100644 --- a/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java +++ b/tools/sstableloader/test/unit/org/apache/cassandra/tools/LoaderOptionsTest.java @@ -25,12 +25,11 @@ import java.security.Permission; import com.google.common.net.HostAndPort; +import org.apache.commons.io.FileUtils; import org.junit.Test; import org.apache.cassandra.io.util.File; -import org.apache.cassandra.transport.TlsTestUtils; -import static org.apache.cassandra.tools.OfflineToolUtils.sstableDirName; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -102,8 +101,8 @@ public void testEncryptionSettingsOverride() throws Exception sstableDirName("legacy_sstables", "legacy_ma_simple") }; LoaderOptions options = LoaderOptions.builder().parseArgs(args).build(); // Below two lines validating server encryption options is to verify that we are loading config from the yaml - assertEquals(TlsTestUtils.SERVER_KEYSTORE_PATH, options.serverEncOptions.keystore); - assertEquals(TlsTestUtils.SERVER_KEYSTORE_PASSWORD, options.serverEncOptions.keystore_password); + assertEquals("test/conf/cassandra_ssl_test.keystore", options.serverEncOptions.keystore); + assertEquals("cassandra", options.serverEncOptions.keystore_password); // Below asserts validate the overrides for the client encryption options from the command line // Since the values are provided by (and local to) this test, they are hardcoded assertEquals("JKS", options.clientEncOptions.store_type); @@ -245,5 +244,40 @@ public void checkPermission(Permission perm, Object context) System.setSecurityManager(null); } } + + // Copied from OfflineToolUtils + + public static String sstableDirName(String ks, String cf) throws IOException + { + return sstableDir(ks, cf).absolutePath(); + } + + public static File sstableDir(String ks, String cf) throws IOException + { + File dataDir = copySSTables(); + File ksDir = new File(dataDir, ks); + File[] cfDirs = ksDir.tryList((dir, name) -> cf.equals(name) || name.startsWith(cf + '-')); + return cfDirs[0]; + } + + public static File copySSTables() throws IOException + { + File dataDir = new File("build/test/cassandra/data"); + File srcDir = new File("test/data/legacy-sstables/ma"); + FileUtils.copyDirectory(new File(srcDir, "legacy_tables").toJavaIOFile(), new File(dataDir, "legacy_sstables").toJavaIOFile()); + return dataDir; + } + + // Copied from SystemExitException in unit tests + + private static class SystemExitException extends Error + { + public final int status; + + public SystemExitException(int status) + { + this.status = status; + } + } } From b7619b481d98667e8738538fc4ec632a3a072f00 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Thu, 8 May 2025 15:24:06 -0400 Subject: [PATCH 318/340] Add documentation for Accord ops and CQL developer guide Patch by Ariel Weisberg; Reviewed by Jon Meredith for CASSANDRA-20637 --- doc/modules/cassandra/nav.adoc | 5 + .../accord-architecture.adoc} | 2 +- .../cassandra/pages/architecture/accord.adoc | 7 + .../pages/architecture/cql-on-accord.adoc | 612 ++++++++++++++++++ .../cassandra/pages/architecture/index.adoc | 1 + .../pages/managing/operating/index.adoc | 1 + .../operating/onboarding-to-accord.adoc | 354 ++++++++++ 7 files changed, 981 insertions(+), 1 deletion(-) rename doc/modules/cassandra/pages/{developing/accord/index.adoc => architecture/accord-architecture.adoc} (99%) create mode 100644 doc/modules/cassandra/pages/architecture/accord.adoc create mode 100644 doc/modules/cassandra/pages/architecture/cql-on-accord.adoc create mode 100644 doc/modules/cassandra/pages/managing/operating/onboarding-to-accord.adoc diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc index 8c28d631555a..813aea24f110 100644 --- a/doc/modules/cassandra/nav.adoc +++ b/doc/modules/cassandra/nav.adoc @@ -23,6 +23,9 @@ *** xref:cassandra:architecture/guarantees.adoc[] *** xref:cassandra:architecture/messaging.adoc[] *** xref:cassandra:architecture/streaming.adoc[] +*** xref:cassandra:architecture/accord.adoc[] +**** xref:cassandra:architecture/accord-architecture.adoc[] +**** xref:cassandra:architecture/cql-on-accord.adoc[] ** xref:cassandra:developing/data-modeling/index.adoc[] *** xref:cassandra:developing/data-modeling/intro.adoc[] @@ -107,12 +110,14 @@ **** xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] **** xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] **** xref:cassandra:managing/operating/password_validation.adoc[Password validation] +**** xref:cassandra:managing/operating/onboarding-to-accord.adoc[] *** xref:cassandra:managing/tools/index.adoc[Tools] **** xref:cassandra:managing/tools/cqlsh.adoc[cqlsh: the CQL shell] **** xref:cassandra:managing/tools/nodetool/nodetool.adoc[nodetool] **** xref:cassandra:managing/tools/sstable/index.adoc[SSTable tools] **** xref:cassandra:managing/tools/cassandra_stress.adoc[cassandra-stress] + ** xref:cassandra:troubleshooting/index.adoc[Troubleshooting] *** xref:cassandra:troubleshooting/finding_nodes.adoc[Finding misbehaving nodes] *** xref:cassandra:troubleshooting/reading_logs.adoc[Reading Cassandra logs] diff --git a/doc/modules/cassandra/pages/developing/accord/index.adoc b/doc/modules/cassandra/pages/architecture/accord-architecture.adoc similarity index 99% rename from doc/modules/cassandra/pages/developing/accord/index.adoc rename to doc/modules/cassandra/pages/architecture/accord-architecture.adoc index 8320b49a0a5b..201abd861ec6 100644 --- a/doc/modules/cassandra/pages/developing/accord/index.adoc +++ b/doc/modules/cassandra/pages/architecture/accord-architecture.adoc @@ -1,4 +1,4 @@ -== Accord Intro += Accord Architecture This document is intended to facilitate quick dive into Accord and Cassandra Integration code for anyone interested in the project. Readers diff --git a/doc/modules/cassandra/pages/architecture/accord.adoc b/doc/modules/cassandra/pages/architecture/accord.adoc new file mode 100644 index 000000000000..51f9f953ef5c --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/accord.adoc @@ -0,0 +1,7 @@ += Accord + +Accord is one of the transaction protocols supported by Apache Cassandra. Accord is a separate sub-project that +is implemented as a library that is Cassandra agnostic. + +* xref:architecture/accord-architecture.adoc[] +* xref:architecture/cql-on-accord.adoc[] diff --git a/doc/modules/cassandra/pages/architecture/cql-on-accord.adoc b/doc/modules/cassandra/pages/architecture/cql-on-accord.adoc new file mode 100644 index 000000000000..91a8fc285440 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/cql-on-accord.adoc @@ -0,0 +1,612 @@ += Developers guide to CQL on Accord + +== Intro + +Accord is implemented as a library that is agnostic to the underlying +database it integrates with. It has little to no awareness of schema, +query language, messaging, threading etc. Instead it presents interfaces +for the database to implement that describe the configuration and +topology of the database, what reads and writes need to execute and what +their dependencies are, and how to actually execute reads and writes at +the configured locations. + +This guide describes how Cassandra goes about leveraging those +interfaces to implement reading and writing CQL as well as live +migrating from CQL running on Cassandra to CQL running on Accord. + +This guide doesn't cover how Accord works and doesn't cover all parts of +Accord that are implemented in Cassandra like threading, caching, +persistence, and messaging. It also isn't intended to be a user guide +and doesn't fully overlap with the xref:cassandra:managing/operating/onboarding-to-accord.adoc[user guide]. You should start with the +xref:cassandra:managing/operating/onboarding-to-accord.adoc[user guide] to get any context that may be missing here. + +== Anatomy of a transaction + +The primary way of interacting with Accord is to define a transaction +using +https://github.com/apache/cassandra-accord/blob/134df57677bbd5092994923a4dc2f15cd1d033d1/accord-core/src/main/java/accord/primitives/Txn.java#L42[Txn/Txn.InMemory] +and then asking Accord to execute the transaction. Transactions express +what they touch by declaring a set of keys or ranges that will be +read/written to. This set needs to be declared up front and can't change +during transaction execution and the transaction can be either a key +transaction or range transaction but not both. + +Range transactions are more expensive for Accord to execute as the +dependency tracking work Accord has to do is more CPU and memory +intensive and the transactions are more likely to conflict and block +execution of other transactions. + +Accord is not aware of tables only ranges and keys. Keys and ranges can +span any tables managed by Accord and the keys and ranges encode the +tables they apply to. So a range transaction covering multiple tables +would have a range per table and from Accord's perspective these are +completely different ranges. + +Transactions also declare a `Kind` which can be `Read`, `Write` +(Read/Write), `EphemeralRead`, and `ExclusiveSyncPoint`. `Read`, and +`Write` are what you would expect. `EphemeralRead` is a read that only +provides per key linearizability, but offers better performance compared +to `Read` . + +`ExclusiveSyncPoint` is transaction that can be used to establish a +happens before relationship with its dependencies without interfering +with their execution. `ExclusiveSyncPoint` is used for live migration +and repair to ensure the visibility at `ALL` of all committed Accord +transactions to non-transactional reads. + +=== Keys and Ranges + +`Keys` and `Ranges` are prefixed with `TableId` in the most significant +position to allow Accord to interact with multiple tables without +knowing anything about schema. From Accord's perspective there is just a +set of ranges that it is responsible for replicating and transacting +over, and they can be compared, sorted, and split, but beyond that they +are completely opaque. A follow on effect from this is that token ranges +(or token ring) are per table. + +`Key` is conceptually similar to `DecoratedKey` and is implemented by +`https://github.com/apache/cassandra/blob/63d3538ba7352635b7b61a205b40e035e62b8d5d/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java#L43[PartitionKey]` +. `RoutingKey` is conceptually similar to `Token` and is implemented by +`https://github.com/apache/cassandra/blob/63d3538ba7352635b7b61a205b40e035e62b8d5d/src/java/org/apache/cassandra/service/accord/api/TokenKey.java#L51[TokenKey]` +. + +Accord `Range` is conceptually equivalent to Cassandra's +`Range++<++RingPosition++>++` and is implemented by +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/TokenRange.java[TokenRange]`. +Accord `Range` is start exclusive and end inclusive just like +Cassandra's `Range` and we use it exclusively in that mode. There are no +other forms of inclusive/exclusive bound or range used directly by +`Accord`. Accord `Range`'s implementation suggests support for other +forms of bounds but it's not currently supported. It's theoretically +possible to use something similar to `Range++<++PartitionPosition++>++` +as the implementation of Accord's `Range` but we don't do that because +Cassandra doesn't support splitting partitions. + +To integrate Cassandra with Accord it's necessary to have a few +different versions of `TokenKey` that make it possible to describe +cluster topology and perform query routing to Accord across a range of +partitioners. A `TokenKey` can be a sentinel for a given table which +maps to `-inf` or `{plus}inf` for that table and it's possible to create +a minimum sentinel that is ++<++ `-inf` or ++>++ `{plus}inf` . +Additionally it's possible to declare a `TokenKey` that is between +`token` and either `token - 1` or `token {plus} 1` . + +Accord expects to be able to convert a `RoutingKey` to a `Range` which +is facilitated by being able to create these in between tokens without +requiring the partitioner to support increment or decrement on token. +Partition range reads also leverage these in between tokens to convert +`Range` bounds from inclusive to exclusive and vice versa to match the +inclusivity/exclusivity of the query that is being executed. + +=== Seekable, Unseekable, Routable + +The implementations of these interfaces are always prefixed with +`TableId` most of which were just discussed. + +A `Seekable` has enough information that it can be used to both route a +query and then execute it because it identifies what exactly to read and +write. An `Unseekable` is more compact (just a token) for Accord to work +with and can be used to route and schedule transaction execution. A +`Routable` could be either `Seekable` or `Unseekable` and is generally +used when you need to handle both. + +`Seekable` can be either a `Key` or an Accord `Range`. `Key` has both +routing (token) and partition key/clustering information. `Range` is +`Seekable` but its bounds are only `Routable`. `Range` is in an odd +place in terms of being `Seekable` . It's helpful because APIs can +accept `Seekable` and then handle both `Key` and `Range` domains. + +`Seekables` is the collection version of `Seekable` and can be either +`Keys` or `Ranges`. + +`Unseekable` can either a `RoutingKey` (`TokenKey`) or `Range` +(`TokenRange`) and `Unseekables` is either `RoutingKeys` or `Ranges`. +`Route` and various kinds of `Routables` exists, but are primarily used +inside Accord. + +=== Data + +`https://github.com/apache/cassandra-accord/blob/134df57677bbd5092994923a4dc2f15cd1d033d1/accord-core/src/main/java/accord/api/Data.java#L28[Data]` +is an opaque container for data that has been read during execution of a +transaction. Accord doesn't know anything about the contents and the +only required interface for `Data` is that they can be merged since +Accord will execute multiple reads at different command stores and will +need to merge the result. + +`Data` is implemented by +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnData.java#L47[TxnData]` +which is a glorified map from a unique integer identifying each piece of +data read to `TxnDataValue` which can be either +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnDataKeyValue.java[TxnDataKeyValue]` +or +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnDataRangeValue.java[TxnDataRangeValue]` +. `TxnDataKeyValue` doesn't support merging because Accord only reads +from a single replica, but `TxnDataRangeValue` does because the integer +key for `TxnData` identifies the logical read in the transaction, but +the actual execution of the range read could touch an arbitrary number +of command stores covered by the range and each will produce their own +`TxnDataRangeValue` for their portion of the read. + +=== Result + +`https://github.com/apache/cassandra-accord/blob/134df57677bbd5092994923a4dc2f15cd1d033d1/accord-core/src/main/java/accord/api/Result.java[Result]` +is the interface for what is returned by `Query` and ends up being +returned as the non-error result by Accord to the coordinator of a +transaction. This is also implemented by `TxnData` for key read results +and by `TxnRangeReadResult` for range reads. + +There is also `RetryNewProtocolResult` which can be returned by +Cassandra's integration with Accord during live migration. This retry +error indicates that Accord determined the transaction's execute time is +in an epoch where Accord does not manage some or all of that data for +read or write so the transaction should be retried on whatever system +currently manages that data. + +=== Read + +`https://github.com/apache/cassandra-accord/blob/trunk/accord-core/src/main/java/accord/api/Read.java#L32[Read]` +is where a transaction defines how data should be read during execution +in order to return a result, and it will have its `read` method invoked +along with specific keys to be read at command stores. + +A `Read` has to define all the keys it will access up front and needs to +support `slice/intersecting/merge` so Accord can send only the relevant +parts of a transactions reads to the command stores that are responsible +for persisting metadata about the transaction and executing the read. + +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnRead.java[TxnRead]` +implements `Read` and is a sorted collection of +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java#L77[TxnNamedRead]`. +The name in `TxnNamedRead` refers to what is now the integer identifier +for each logical read in the transaction. `TxnNamedRead` supports both +key and range reads although not both in the same transaction. + +The name for a read is an incrementing integer encoded at planning time with the higher order bits storing +the kind of read and the lower order bits storing the index of the read. Kinds of reads include: + +* USER - let statements +* RETURNING - Returning select in `TransactionStatement` +* AUTO++_++READ - Automatically generated reads like list index set +* CAS++_++READ - Read for CAS statements + +Every read in a transaction is executed concurrently in the read stage +threadpool and the resulting `Data` (`TxnData`) is merged into a single +value. + +`TxnRead` contains a read consistency level that is not visible to +Accord that is used to declare the read consistency level that a +transaction requires. This will be discussed more later when we cover +interoperability, but if this is set then the transaction will actually +read from multiple replicas complete with short read protection and +blocking read repair. + +=== Query + +`https://github.com/apache/cassandra-accord/blob/trunk/accord-core/src/main/java/accord/api/Query.java#L31[Query]` +is the portion of the transaction definition responsible for computing +the `Result` of the transaction that will be returned at the +coordinator. It's implemented by +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnQuery.java[TxnQuery]` +which has several different modes it can operate in. + +`Query` only has one method `compute` to compute the result and is run +on the coordinator of a transaction. There are few things `TxnQuery` is +responsible for such as validating the query is accessing data managed +by Accord generating a retry error if needed. For CAS statements it's +also responsible for checking the CAS condition and returning the +appropriate result. For range reads it's also responsible for merging +the range read results and reapplying the limit. + +`TxnQuery` also has an implementation, `UNSAFE++_++EMPTY`, used for +Accord system transactions that does no validation that Accord owns the +ranges in question. This is because from Accord's perspective it +immediately adopts all the ranges in a table when that table begins +migration to Accord, but from live migration's perspective (which Accord +can't see) there is a +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java[TableMigrationState]` +that specifies which ranges within a table are managed by Accord. + +Accord system transactions only impact Accord metadata so “they don't +exist” from the perspective of live migration and concurrent reading and +writing to data. + +=== Update + +`https://github.com/apache/cassandra-accord/blob/trunk/accord-core/src/main/java/accord/api/Update.java[Update]` +is invoked via the `apply` method on the Accord coordinator and is +responsible for taking in the `Data` from `Read` and producing the +`Write` that contains all the writes that we applied as part of +committing the transaction. + +`Update` requires support for `slice`/`intersecting`/`merge` so that +Accord only needs to distribute and persist the potentially sizable +partial or complete updates to the shards that actually need them. + +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java[TxnUpdate]` +implements `Update` and can contain completed or partial updates which +are completed when `apply` is called with the `TxnData` from `TxnRead`. +Updates that are not data dependent (blind writes) are handled +differently from non-data dependent updates. Data dependent updates are +computed at the coordinator and returned in the `TxnWrite` but non-data +dependent updates are omitted and instead are retrieved from `TxnUpdate` +at each replica when `TxnWrite.apply` is called. + +`TxnUpdate` is also responsible for populating the update with the +monotonic transactional hybrid logical clock for the execution time of +the transaction. This is used instead of the coordinator generated +timestamp for `SERIAL` and `TransactionStatement` writes. Non-SERIAL +writes use the coordinator or user supplied timestamp although this may +change in between the time of this writing and final release. + +`TxnUpdate` has a write consistency level that is not visible to Accord +and is it similar to the commit consistency level for CAS writes. If the +write consistency level is set then Accord will do synchronous commit at +the specified consistency level. Otherwise Accord defaults to +asynchronous commit. How consistency levels are handled will be covered +in interoperability and live migration. + +=== Write + +`https://github.com/apache/cassandra-accord/blob/trunk/accord-core/src/main/java/accord/api/Write.java[Write]` +is produced by invoking `Update.apply` and is not required to be +splittable/mergeable because all writes are sent to all shards. `Write` +is implemented by +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java#L74[TxnWrite]` +which each command store will invoke via `apply` for each intersecting +key. This will cause all writes in a transaction to run concurrently on +the mutation stage. + +=== Putting it all together + +With all the components of a transaction available they can be assembled +and provided to Accord to coordinate to implement all the existing CQL +interfaces as well as the new `TransactionStatement` interface. + +See +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/cql3/statements/TransactionStatement.java#L435[TransactionStatement.createTxn]` +, +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/cql3/statements/CQL3CasRequest.java#L484[CQL3CasRequest.toAccordTxn]`, +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java#L236[ConsensusMigrationHelper.mutateWithAccordAsync]`, +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/StorageProxy.java#L2206[StorageProxy.readWithAccord]`, +and +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/reads/repair/BlockingReadRepair.java#L219[BlockingReadRepair.repairViaAccordTransaction]` +. + +There isn't as much magic as you would think in how Accord executes +transactions when operating with exclusive access to a table. Accord is +able to mostly execute `ReadCommands` unmodified with some +accommodations for the fact that reads are strongly consistent from a +single replica so filtering can be pushed down. The majority of the work +is just making the description of things like CAS serializable so it can +be persisted by Accord for transaction recovery. + +Where things get complicated is live migrating to Accord and supporting +interoperability with non-Accord reads and writes. + +== Live migration + +=== Core challenges + +Accord and Paxos operate fundamentally different in terms of what they +perform consensus on and how the transactions are recovered. Paxos +performs consensus on the exact set of writes to apply and recovering a +transaction only requires the writes to be applied. Accord consensus is +on the transaction definition, a superset of the dependencies, and the +execution timestamp of the transaction. + +Accord needs to recompute the writes during transaction recovery which +means it may need to repeat any reads necessary to compute those writes +which means Accord needs reads to be repeatable during transaction +execution and recovery. Non-Accord writes cause non-determinism for +Accord reads. Accord also reads at `ONE` so it would miss `QUORUM` +writes. + +The big hammer we use to deal with this is to avoid ever requiring +Accord to read data that is not replicated at `ALL`. If we did it would +lead to non-deterministic transaction recovery. This isn't something +that can be addressed by having Accord read at `QUORUM` and then +performing blocking read repair because different Accord coordinators +can still witness different sets of non-Accord writes. + +Accord also defaults to asynchronous commit so when migrating away from +Accord it's not safe for Paxos and non-SERIAL reads to read committed +Accord writes + +=== Bridging the gap + +Cassandra needs to be highly available while transitioning, but +operations that propagate data at `ALL` like Cassandra's Data Repair +{plus} Paxos Repair, or Accord's repair syncs are not highly available. +Going forward these will be referred to as range barriers. + +At every point during migration there has to be some system safely +capable of executing every operation type. Highly available key barriers +solve this problem by allowing the migration of a single key at `QUORUM` +to meet the requirements for execution on the migration target system. + +A key barrier on Paxos uses the existing Paxos repair mechanism to apply +any partially committed transactions at `QUORUM` which can then be +safely read by Accord if Accord read's at `QUORUM`. A key barrier on +Accord uses Accord's sync mechanism to wait until all transactions in an +epoch that could have modified the key are applied at `QUORUM`. + +There is a system table and small in memory cache for key barriers to +avoid repeatedly performing key migrations, but the key migration is +only recorded if the coordinator is a replica to avoid the cache growing +too large. + +=== No non-SERIAL key migration + +One wrinkle is that it is not possible to do key migration for +non-SERIAL Cassandra writes because there is no metadata to check for +uncommitted operations like there is with Paxos and Accord. Non-SERIAL +writes include _all_ sources of non-SERIAL writes such as read repair, +logged batches, and hints. Accord doesn't have this issue as any data +managed by Accord always has metadata available since all operations are +routed through Accord. + +Splitting migration to Accord into two phases solves this issue +because while Accord is unable to safely read non-SERIAL writes it can +safely apply non-SERIAL writes as recovery of blind write transactions +is still deterministic in Accord. In the first phase of migration to +Accord all non-SERIAL writes are executed on Accord and synchronously +applied at the requested consistency level while a data repair (full or +incremental) runs and makes it safe for Accord to read non-SERIAL +writes. Paxos continues to execute all SERIAL writes because Accord is +unable to execute SERIAL writes since it can't read yet. + +After a data repair completes the second phase of migration to Accord +begins and all operations are executed on Accord after Paxos key +migration is run to ensure that the key being read by Accord has no +unapplied Paxos transactions. After a Paxos repair {plus} data repair +(full only) the remaining Paxos writes will be visible at `ALL` and +Accord can begin executing reads at `ONE` instead of the requested +consistency level and performing asynchronous commit and ignore the +requested commit/write consistency level. + +A quirk of incremental repair is that it flushes memtables before Paxos +repair runs and as a result it doesn't replicate at `ALL` the data that +Paxos repair propagated at `QUORUM`. Thus a full repair is required for +the second phase of migration to Accord so that the Paxos data ends up +repaired at `ALL`. It's possible, but difficult, to make the +migration three phases and track the Paxos repair independently so that +you could do Paxos repair and then use IR, but this is not currently +implemented. + +=== Supported consistency levels + +Live migration to/from Accord requires Accord to honor requested +consistency levels for read and write. Cassandra's Accord integration +only adds support for a subset of consistency levels listed in +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/IAccordService.java#L75[IAccordService]` +. DC aware consistency levels are not supported along with `TWO` and +`THREE`. + +Accord will always reject unsupported consistency levels even if it will +not actually be honoring them during execution to ensure that your +application remains ready to migrate away from Accord in the future. + +In the case of `ONE` as a write/commit consistency level the commit will +silently be performed at `QUORUM` + +=== Interoperability support + +Interoperability aims to extend Accord to support reading and writing at +configurable consistency levels as well as to add support for +synchronous commit. This is facilitated by extension points in Accord +that allow injecting custom implementations for various protocol steps +via +`https://github.com/apache/cassandra-accord/blob/134df57677bbd5092994923a4dc2f15cd1d033d1/accord-core/src/main/java/accord/coordinate/CoordinationAdapter.java#L64[CoordinationAdapter]` +and +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java[AccordInteropAdapter]`. + +`AccordInteropAdapter` can inject custom versions of the `execute` and +`persist` phases and does conditionally at transaction execution time +based on the read and write consistency levels provided by `TxnRead` and +`TxnUpdate` . These consistency levels can differ from the ones +requested by the application because live migration may choose to ignore +the consistency levels when they aren't needed. + +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java[AccordInteropExecution]` +allows reading at a requested consistency level. It largely inverts +control of reading in Accord and uses Cassandra's existing Read Executor +functionality to determine what nodes to contact and what commands to +send them while providing short read protection and blocking read +repair. Read executors interface with Accord via the +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/reads/ReadCoordinator.java#L37[ReadCoordinator]` +interface which can either send a regular read message or go through +Accord to send an Accord specific read message which causes the read to +execute at the appropriate command store in the appropriate +transactional context after all dependencies have been applied. + +`ReadCoordinator` also intercepts blocking read repair during execution +of an Accord transaction and executes it through the appropriate command +store. The only legitimate way for this to occur is after Paxos key +migration the data is only propagated at `QUORUM` so it is possible that +Accord reading at `QUORUM` will find replicas to read repair. It's not +strictly necessary as we already know the data is propagated at +`QUORUM`, but the support is there. + +`ReadCoordinator` also helps apply read repair mutations via Accord in +`TransactionalMode.MIXED` and during migration by applying the read +repair mutations in Accord's execute phase instead of waiting for apply. +This is safe because read repair only proposes already committed Accord +writes or already unsafe non-SERIAL writes which aren't allowed anyways. + +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java#L48[AccordInteropPersist]` +adds support for synchronous commit and commit at a requested +consistency level. It sends `AccordInteropApply` which is a synchronous +apply message that only responds once application is complete. + +https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java#L34[`TransactionalMode`] +defines the supported modes and +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java#L140[commitCLForMode]` +determines the commit consistency level and +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/TransactionalMode.java#L170[readCLForMode]` +determines the read consistency level. These two methods take into +account both the requested consistency level, the table specific +migration state, the current transactional mode, and the target +transactional mode in order to decide whether to honor the requested +consistency level. + +=== Routing requests during migration + +During migration, requests race with changes to +https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/migration/TableMigrationState.java[`TableMigrationState`] +to execute and may complete or partially complete on the system they +were originally routed to. This race is resolved by allowing requests to +return a new retry on different system error response that has to be +handled by the coordinator. It's possible that a request may still +complete after receiving a retry different system error because the +target consistency level was still met. + +Migration is per table and per token range so it's possible for part of +a table to be running on Accord and part of it to be running on Paxos. +Requests can end up executing partially on Cassandra and partially on +Accord. + +==== Detecting misrouted requests + +For Paxos this is resolved in the prepare phase where a failure to meet +the required consistency level at the prepare phase means the operation +does not run on Paxos. If the prepare phase is being performed to +recover an existing transaction then it is allowed to proceed because +recovery will deterministically create the same state every time it runs +so it's safe to repeat even after key or range migration has occurred +since those would have already recovered the transaction. + +Accord determines an `executeAt` timestamp, that is deterministic even +during transaction recovery, for each transaction that includes an epoch +that corresponds to the epoch used by `TableMigrationState` and this is +used to check all the tables and keys being touched in a transaction. +`TxnQuery` then returns a retry on different system error if the any +part of the transaction is not eligible to run on Accord. + +`ColumnFamilyStore` checks every `Mutation` to see if it is marked as +allowing potential transaction conflicts. Paxos and Accord always mark +their `Mutation`s as allowing potential transaction conflicts because +they do the work to check for them directly, but non-SERIAL sources of +`Mutation`s will be subject to that check and a +`RetryOnDifferentSystemException` is thrown if the mutation is detected +to be misrouted according to the latest cluster metadata available at +the node attempting to apply that mutation. + +`ReadCommand` has a similar arrangement where each read command is +marked with whether it allows potential transaction conflicts and when +`executeLocally` is run the check is done against cluster metadata to +determine whether or not to throw `RetryOnDifferentSystemException`. +Accord always allows potential transaction conflicts on its read +commands, but Paxos does not because Paxos does not need to read data in +order to recover transactions. + +==== Splitting write requests + +For non-SERIAL writes the `Mutation` is split into the portion that will +execute on Accord and the portion that will execute on Cassandra and the +Accord portion is executed asynchronously while the Cassandra portion is +executed synchronously. If either attempt fails due to misrouting the +write is re-split with updated cluster metadata and retried without +raising an error. + +Logged batches are currently always written to the system table and then +split for execution, and if part of the batch fails then batchlog replay +will replay the entire batch and re-split it in the process. Batchlog +replay only makes a single attempt to replay before converting the batch +contents to hints. If part of the batch was routed to Accord then there +is no node to hint so there is a fake node that a hint is written to and +when that hint is dispatched it will be split and then executed +appropriately. In https://issues.apache.org/jira/browse/CASSANDRA-20588[CASSANDRA-20588] this needs to be simplified to writing the +entire batch through Accord if any part of it should be written through +Accord because it also addresses an atomicity issue with single token +batches which can be torn when part is applied through Accord and part +is applied through Cassandra. + +Hints can be for multiple tables some of which may be Accord and some +non-Accord so splitting occurs. It's also possible a hint will be for an +operation that was sent to Accord (not a real node) via the batchlog and +it's possible that splitting discovers the hint now needs to be executed +without Accord. In that scenario the hint is converted to a hint for +every replica. This conversion can only occur once so the write +amplification is bounded. + +Splitting of mutations is done in +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/consensus/migration/ConsensusMigrationMutationHelper.java#L219[ConsensusMigrationMutationHelper]` +with the retry loop being implemented at each caller (batch mutation, +mutation, batch log, hints). + +Paxos has a retry loop but does not do any splitting because Paxos only +supports a single key. + +==== Partition range reads + +Partition range reads are managed by +`https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java#L75[RangeCommandIterator]` +which continues to split range reads using the existing algorithm that +is agnostic as to how the range command will be executed. Each generated +range read +https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java#L247[is +then split on the boundaries of which system is responsible for reading +that range] and that is wrapped in a +https://github.com/apache/cassandra/blob/122f5300855d56131948575f80ce0594547c9040/src/java/org/apache/cassandra/service/reads/range/RangeCommandIterator.java#L378[retrying +iterator] which repeats the splitting if any part of the range read ends +up routed to the wrong system. + +Range reads do not execute any key barriers and when migrating away from +Accord you will see weaker consistency compared to Paxos because Accord +does not necessarily honor commit consistency levels and does +asynchronous commit. As things currently stand it's uncertain the key +barriers would run fast enough to avoid timing out range read requests +so they are not done. + +Range reads also consume more memory when executed on Accord when a +limit is used. A single range read command is split into intersecting +command store number of range read commands that execute concurrently +and each one can return up to the limit number of results before they +are merged at the coordinator and the limit is re-applied. This could be +improved by applying the limit again before serializing or by executing +the reads serially at command stores until the limit is met. + +=== Transactional modes + +Transactional modes are set per table and define how Accord, Paxos, and +non-SERIAL operations will execute. The three supported modes are +`FULL`, `MIXED++_++READS`, and `OFF`. + +`FULL` routes all reads and writes through Accord once migration is +complete and allows Accord to ignore read and write consistency levels. +This allows Accord to perform asynchronous commit reducing the number of +WAN roundtrips from 2 to 1. + +`MIXED++_++READS` routes all writes through Accord once migration is +complete, but allows non-SERIAL reads to safely execute outside of +Accord and still read Accord writes because Accord will honor the +provided commit consistency level. This means Accord will need to +perform synchronous commit requiring an 1 extra WAN roundtrips for 2 +total. + +`OFF` is the default where everything runs either on Paxos if it is +`SERIAL` or on the usual eventually consistent paths for everything +else. + +Other modes exist for testing purposes and are disabled by default +unless unlocked via system property. diff --git a/doc/modules/cassandra/pages/architecture/index.adoc b/doc/modules/cassandra/pages/architecture/index.adoc index 9e674d95a2bb..893c2f78076d 100644 --- a/doc/modules/cassandra/pages/architecture/index.adoc +++ b/doc/modules/cassandra/pages/architecture/index.adoc @@ -7,3 +7,4 @@ This section describes the general architecture of Apache Cassandra. * xref:architecture/storage-engine.adoc[Storage Engine] * xref:architecture/guarantees.adoc[Guarantees] * xref:architecture/snitch.adoc[Snitches] +* xref:architecture/accord.adoc[Accord] diff --git a/doc/modules/cassandra/pages/managing/operating/index.adoc b/doc/modules/cassandra/pages/managing/operating/index.adoc index 2fb98594316d..8068bd3dc4d9 100644 --- a/doc/modules/cassandra/pages/managing/operating/index.adoc +++ b/doc/modules/cassandra/pages/managing/operating/index.adoc @@ -20,3 +20,4 @@ * xref:cassandra:managing/operating/transientreplication.adoc[Transient replication] * xref:cassandra:managing/operating/virtualtables.adoc[Virtual tables] * xref:cassandra:managing/operating/password_validation.adoc[Password validation] +* xref:cassandra:managing/operating/onboarding-to-accord.adoc[] diff --git a/doc/modules/cassandra/pages/managing/operating/onboarding-to-accord.adoc b/doc/modules/cassandra/pages/managing/operating/onboarding-to-accord.adoc new file mode 100644 index 000000000000..17d451500052 --- /dev/null +++ b/doc/modules/cassandra/pages/managing/operating/onboarding-to-accord.adoc @@ -0,0 +1,354 @@ += Onboarding to Accord + +== Intro + +Accord supports all existing CQL and can be enabled on a per table and +per token range within that table basis. Enabling Accord on existing tables requires a +migration process that can be done on this same per table and per range +basis that safely transitions data from being managed by Cassandra +{plus} Paxos to Cassandra {plus} Accord without downtime. + +A migration is required because Accord can't safely read data written by +non-SERIAL writes. Accord requires deterministic reads in order to have +deterministic transaction recovery and non-SERIAL writes can't be read +deterministically while still being highly available. + +This guide describes how to enable Accord and what differences to expect +when migrating your existing CQL workload to Accord. + +This guide does not cover the new transaction syntax. + +== Configuration + +=== YAML + +You need to set `accord.enabled` to true for Accord to be initialized at +startup. + +`accord.default++_++transactional++_++mode` allows you to set a default +transactional mode for newly created tables which will be used in create +table statements when no `transactional++_++mode` is specified. This +prevents accidentally creating non-Accord tables that will need +migration to Accord. + +`accord.range++_++migration` configures the behavior of altering the +`transactional++_++mode` of a table. When set to `auto` the entire ring +will be marked as migrating when the `transactional++_++mode` of a table +is altered. When set to `explicit` no ranges will be marked as migrating +when the `transactional++_++mode` of a table is altered. + +=== Table parameters + +`transactional++_++mode` can be set when a table is created +`CREATE TABLE foo WITH transactional++_++mode = ‘full'` or it can be set +by altering an existing table +`ALTER TABLE foo WITH transactional++_++mode = ‘full'`. +`transactional++_++mode` designates the target or intended transaction +system for the table and for a newly created table this will be the +transaction system that is used, but for existing tables that are being +altered the table will still need to be migrated to the target system. + +`transactional++_++mode` can be set to `full`, `mixed++_++reads`, and +`off`. `off` means that Paxos will be used and transaction statements +will be rejected. `full` means that all reads and writes will execute on +Accord. `mixed++_++reads` means that all writes will execute on Accord +along with `SERIAL` reads/writes, but non-SERIAL reads/writes will +execute on the existing eventually consistent path. Applying the +mutations for blocking read repair will always be done through Accord in +`full` in and `mixed++_++reads`. + +`transactional++_++migration++_++from` indicates whether a migration is +currently in progress although it does not indicate which ranges are +actively being migrated. This is set automatically when you create a +table or alter `transactional++_++mode` and should not be set manually. +It's possible to manually set `transactional++_++migration++_++from` to +force the completion of migration without actually running the necessary +migration steps. + +`transactional++_++migration++_++from` can be set to `none`, `off`, +`full`, and `mixed++_++reads`. `off`, `full`, and `mixed++_++reads` +correspond to the `transactional++_++mode` being migrated away from and +`none` indicates that no migration is in progress either because the +migration has completed or because the table was created with its +current `transactional++_++mode`. + +=== mixed++_++reads vs full + +When Accord is running with `transactional++_++mode` `full` it will be +able to perform asynchronous commit saving a WAN roundtrip. +`mixed++_++reads` allows non-SERIAL reads to continue to execute using +the original eventually consistent read path. `mixed++_++reads`, unlikes +`full`, always requires Accord to always synchronously commit at the +requested consistency level in order to make acknowledged Accord writes +visible to non-SERIAL reads. + +There is no `transactional++_++mode` that allows non-SERIAL writes +because they break Accord's transaction recovery resulting in +transactions appearing to have different outcomes at different nodes. + +== Accord repair + +Repair can now include an optional Accord repair that `nodetool repair` +will enable by default like Paxos repair. This repair doesn't actually +synchronize any data it just runs a transaction that checks that Accord +has resolved the state of all transactions in the repaired range up to +the point the transaction was created and that the transactions are +applied at `ALL`. + +Accord is normally doing this in the background anyways this just +ensures that it has occurred at `ALL` and hasn't experienced any delays. + +== Migration to Accord + +Migrating an existing table to run on Accord starts by altering the +table: + +.... +ALTER TABLE foo WITH transactional_mode = 'full' +.... + +After the table is altered it is required to run +`nodetool consensus++_++admin begin-migration` on ranges in the table +unless `accord.range++_++migration=auto`. + +When a range is initially marked migrating to Accord all non-SERIAL +writes will execute on Accord while `SERIAL` writes will continue to +execute on Paxos. non-SERIAL writes include regular writes, logged and +unlogged batches, hints, and read repair. Accord will perform +synchronous commit the specified consistency level requiring 2x WAN RTT. + +Tables that are migrating or are partially migrated to Accord (or back to Paxos) can be listed using +`nodetool consensus_admin list` or the sytem table `system_accord_debug.migration_state`. + +Migration to Accord consists of two phases with the first phase starting +when a range is marked migrating, and the second phase starting after a +full or incremental data repair, and then the migration completing after +a second repair which must be a full data repair {plus} Paxos repair. +While marking the range as migrating can be done automatically with +`accord.range++_++migration=auto`, there is not automation for +triggering the repairs. If you regularly run compatible repairs then the +migration will eventually complete, but if you don't run them or want +the migration to complete sooner then you will need to either trigger +them manually or invoke `nodetool consensus++_++admin finish-migration` +to trigger them. + +Any repair that is compatible will drive migration forward whether it +only covers part of the migrating range or whether is started via +`nodetool consensus++_++admin finish-migration` or some other external +process that initiates repair. Force repair with down nodes will not be +eligible to drive any type or phase of migration forward. Force repair +with all nodes up will still work. + +=== First phase + +In the first phase of migration Accord is unable to safely read +non-SERIAL writes so Paxos continues to be used for `SERIAL` operations +and Accord executes all writes and synchronously commits at the +requested consistency level in order to allow Paxos to safely read +Accord writes. Accord's read and write metrics are all counted towards the existing `Read` and `Write` scope +along with the eventually consistent operations, but you should also start to see writes also being counted in the `AccordWrite` scope. + +A data repair either incremental or full replicates all non-SERIAL +writes at `ALL` making it safe for Accord to read non-SERIAL writes that +occurred before the migration started. non-SERIAL writes that occurred +after the migration started were executed through Accord so Accord can +safely read them. + +=== Second phase + +In the second phase all reads and writes execute through Accord +(assuming `transactional++_++mode="full"`). Before an operation can execute on +Accord it is necessary to run a Paxos key repair in order to ensure that +any uncommitted Paxos transactions are committed and this check will +take at least one extra WAN RTT. Additionally Accord has to read at `QUORUM` +(where it would normally only read from a single replica in `transactional++_++mode="full"` and migration completed) because +Paxos writes are only visible at `QUORUM`. + +All reads and CAS operations in the range should start showing up in the +Accord metrics and not the existing metrics. + +Once a key has been repaired, the repaired state of the key is stored in +a small in-memory cache and system table so that it doesn't need to be +repaired again. This information is only stored at replicas of the key +so if the coordinator is not a replica it will not know that it can skip +repairing the key. Use token aware routing to avoid redundant key +repairs. + +A full repair {plus} Paxos repair is necessary to complete the second +phase of migration to Accord. An incremental repair can't currently be +used because incremental repair doesn't include the transactions that +are repaired by Paxos repair because it selects the data to include in +the repair before running the Paxos repair. + +== Migration from Accord + +Migration from Accord to Paxos occurs in a single phase and begins by +altering the table's `transactional++_++mode` to `off` and then +optionally marking ranges as migrating as discussed above. + +Once a range is marked migrating all operations in the migrating range +will stop executing on Accord. Before each operation occurs they will +have to run an Accord key repair similar to the Paxos key repair to +ensure Accord transactions for that key have committed at `QUORUM`. + +An Accord repair needs to be run on the migrating range, triggered +manually or via `nodetool finish-migration`, and once that completes +non-SERIAL operations will run using the usual eventually consistent +path and `SERIAL` operations will execute on Paxos. + +== Migration commands + +All the `nodetool` migration commands are based on new +`StorageServiceMBean` JMX methods. These methods are +`migrateConsensusProtocol`, `finishConsensusMigration`, +`listConsensusMigrations`, `getAccordManagedKeyspaces`, and +`getAccordManagedTables` and can be used by external management tools to +manage consensus migration. The existing methods for starting repairs +can also be used to start the repairs that are needed to complete +migration. + +=== nodetool consensus++_++admin list + +Invoking `nodetool` with +`consensus++_++admin list ++[<++keyspace++>++ ++<++tables++>++...++]++` +will connect to the specified node and retrieve that nodes view of what +tables are currently being migrated from transactional cluster metadata. +Tables that are not being migrated are not listed. + +The results can be printed out in several different formats using the +`format` parameter which supports `json`, `minified-json`, `yaml`, and +`minified-yaml`. + +=== nodetool consensus++_++admin begin-migration + +Invoking `nodetool` with +`consensus++_++admin begin-migration ++[<++keyspace++>++ ++<++tables++>++...++]++` +can be used to mark ranges on a table as migrating. This can only be +done after the migration has been started by altering the tables. +Marking ranges as migrating is a lightweight operation and does not +trigger the repairs that will finish the migration. + +The range to mark migrating needs to be explicitly +provided otherwise the entire ring will be marked migrating for the +specified keyspace and tables. If the entire range is marked migrating it is +only necessary to invoke `begin-migration` on one node. + +This is only needed if +`accord.default++_++transactional++_++mode=explicit` is set in +`cassandra.yaml` otherwise all the ranges will already have been marked +migrating when the alter occurred. + +Ranges that are migrating will require at least an extra WAN roundtrip +for each request that touches a migrating range because both transaction +systems may need to be used to execute the request. + +=== nodetool consensus++_++admin finish-migration + +Invoking `nodetool` with +`consensus++_++admin finish-migration ++[<++keyspace++>++ ++<++tables++>++...` +will run the repairs needed to complete the migration for the specified +ranges. If no range is specified it will default to the primary range of +the node that `nodetool` is connecting to so you can call it once on +every node to complete migration. + +When migrating from Paxos to Accord it will run an incremental data +repair and then a full data repair {plus} Paxos repair. When migrating +from Accord to Paxos it will run an Accord repair. + +== Supported consistency levels + +Migration requires support for read and write consistency levels because +Accord ends up being required to read Paxos writes at `QUORUM` and +Accord needs to execute non-SERIAL writes while Paxos is still being +used for `SERIAL` writes and thus needs to perform synchronous commit at +the requested consistency level. + +Once migration is complete the read and write consistency levels will be +ignored with transactional mode `full` . With transactional mode +`mixed++_++reads` Accord will continue to do synchronous commit and +honor the requested commit/write consistency level. + +Accord will always reject any requests to execute at unsupported +consistency levels to ensure that migration to/from Accord is always +possible. + +Supported read consistency levels are `ONE`, `QUORUM`, `SERIAL`, and +`ALL`. Supported write consistency levels are `ANY`, `ONE`, `QUORUM`, +`SERIAL`, and `ALL`. `LOCAL`, `TWO`, and `THREE` are not supported. +`ANY` is executed as an asynchronous commit similar to Paxos. + +== non-SERIAL consistency + +non-SERIAL operations are not linearizable even when executed on Accord +because Accord will continue to write data using the coordinator +generated timestamp not the transaction's timestamp. + +`USING TIMESTAMP` is allowed and the application of the operations will +occur in a linearizable order, but from the perspective of a reader the +merged result may not appear linearizable. + +Paging runs a separate transaction per page and does not produce a +linearizable result. + +Partition range reads are split into multiple transactions during +execution and will not produce a strict serializable result. +Additionally during migration there are no barriers/repairs executed +before partition range reads. When migrating from Accord to Paxos the +effective commit CL for Accord writes as viewed from partition range +reads will be `ANY`. Adding barriers/repairs before partition range +reads would cause them to time out so they are not done. + +== Batchlog and hints + +Pre-existing batchlog entries and hints will be processed during and +after migration until they are completed. If they need to be executed +through Accord they will be routed through Accord automatically. + +Logged batches that only touch Accord data will not be written to the +batch log because that functionality is redundant with Accord. Batches +that touch Accord and non-Accord data continue to use the batch log. +Before release this is likely to change so that a batch that touches +Accord data will be written entirely via Accord including both the +Accord and non-Accord data. + +Hints are not written for Accord writes although the batch log may +result in new hints because batch log entries are converted to hints +after the first retry. + +== Operations spanning Accord/non-Accord data + +Various operations can access both Accord and non-Accord managed data. +These are transparently split into parts that execute on Accord and +parts that execute outside of Accord and the results are merged. If the +splitting process races with migration then the operations is re-split +and retried without surfacing an error to the client. + +== Partition range read with LIMIT performance + +Partition range reads with a limit use more memory and CPU at the nodes +being read from and at the coordinator. Accord splits the ranges owned +by each node into smaller subranges and each subrange is owned by a +command store. The partition range read will execute at every +intersecting command store on a node and each will return `LIMIT N` +results which are sent back to the coordinator. The coordinator then +merges them and re-applies the limit. + +The additional memory and CPU will be amplified proportional to the +number of command stores which defaults to +`DatabaseDescriptor.getAvailableProcessors()`. + +== Metrics + +Accord's read and write metrics are counted under the existing `Read` and `Write` scope along with eventually consistent +operations. To see Accord specific metrics you can look at the `AccordRead` and `AccordWrite` scope. `CASRead` and `CASWrite` will not track +CAS or `SERIAL` read operations that end up running on Accord and they will instead show up in `AccordRead`/`AccordWrite` and `Read`/`Write`. + +If a single request ends up running on both systems due to misrouting it +will show up as multiple requests. Misrouted requests are counted under the `RetryDifferentSystem` meter and will show +up in `AccordRead` and `AccordWrite` if Accord was the system the request was misrouted to as well as `Read` and `Write`. +If the request was misrouted to non-Accord code then it will show up under `Read` and `Write` metrics or `CASRead` and `CASWrite` metrics. + +Hints can be misrouted and this is tracked in `HintsServiceMetrics` under the `HintsRetryDifferentSystem` meter. + +Partition range reads can also potentially generate additional Accord +transactions depending on how the reads end up having to be split due to intersection with migrating ranges. From 56b7e90f1f0b78063e73f3fd37fccb79594bfdd3 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 19 May 2025 18:51:44 -0500 Subject: [PATCH 319/340] bump build.xml version --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index ca84184c83e7..cc012f84a371 100644 --- a/build.xml +++ b/build.xml @@ -34,7 +34,7 @@ - + From 063e1fe3d20de53a54ad0ec1217d12ca4651698b Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Thu, 24 Apr 2025 18:21:35 +0100 Subject: [PATCH 320/340] Introduce NativeAccessor to avoid new ByteBuffer allocation on flush for each NativeCell Patch by Dmitry Konstantinov; reviewed by Branimir Lambov for CASSANDRA-20173 --- CHANGES.txt | 1 + .../apache/cassandra/db/ArrayClustering.java | 6 + .../apache/cassandra/db/BufferClustering.java | 6 + .../org/apache/cassandra/db/Clustering.java | 7 + .../apache/cassandra/db/ClusteringPrefix.java | 27 +- .../apache/cassandra/db/NativeClustering.java | 86 +++- .../db/marshal/AddressBasedNativeData.java | 81 +++ .../cassandra/db/marshal/NativeAccessor.java | 485 ++++++++++++++++++ .../cassandra/db/marshal/NativeData.java | 32 ++ .../db/marshal/NativeDataAllocator.java | 34 ++ .../apache/cassandra/db/rows/NativeCell.java | 53 +- .../index/sai/plan/QueryController.java | 7 +- .../io/util/BufferedDataOutputStreamPlus.java | 20 + .../cassandra/io/util/DataOutputPlus.java | 6 + .../cassandra/utils/FastByteOperations.java | 79 ++- .../org/apache/cassandra/utils/UUIDGen.java | 7 +- .../utils/memory/BigEndianMemoryUtil.java | 146 ++++++ .../cassandra/utils/memory/MemoryUtil.java | 67 +++ .../cassandra/db/ClusteringHeapSizeTest.java | 6 +- .../cassandra/db/ClusteringPrefixTest.java | 2 +- .../apache/cassandra/db/NativeCellTest.java | 174 ++++++- .../db/marshal/ByteBufferAccessorTest.java | 17 + .../db/marshal/CollectionTypesTest.java | 17 + .../marshal/CompositeAndTupleTypesTest.java | 17 + .../db/marshal/CompositeTypeTest.java | 15 + .../db/marshal/NativeAccessorTest.java | 367 +++++++++++++ .../db/marshal/TestNativeDataAllocator.java | 72 +++ .../db/marshal/ValueAccessorTest.java | 15 + .../cassandra/db/marshal/ValueAccessors.java | 3 +- .../ByteSourceConversionTest.java | 2 +- .../utils/memory/BigEndianMemoryUtilTest.java | 148 ++++++ 31 files changed, 1954 insertions(+), 51 deletions(-) create mode 100644 src/java/org/apache/cassandra/db/marshal/AddressBasedNativeData.java create mode 100644 src/java/org/apache/cassandra/db/marshal/NativeAccessor.java create mode 100644 src/java/org/apache/cassandra/db/marshal/NativeData.java create mode 100644 src/java/org/apache/cassandra/db/marshal/NativeDataAllocator.java create mode 100644 src/java/org/apache/cassandra/utils/memory/BigEndianMemoryUtil.java create mode 100644 test/unit/org/apache/cassandra/db/marshal/NativeAccessorTest.java create mode 100644 test/unit/org/apache/cassandra/db/marshal/TestNativeDataAllocator.java create mode 100644 test/unit/org/apache/cassandra/utils/memory/BigEndianMemoryUtilTest.java diff --git a/CHANGES.txt b/CHANGES.txt index abbf17c276fd..7311a7240561 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Introduce NativeAccessor to avoid new ByteBuffer allocation on flush for each NativeCell (CASSANDRA-20173) * Migrate sstableloader code to its own tools directory and artifact (CASSANDRA-20328) * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623) * Avoid duplicate hardlink error upon forceful taking of ephemeral snapshots during repair (CASSANDRA-20490) diff --git a/src/java/org/apache/cassandra/db/ArrayClustering.java b/src/java/org/apache/cassandra/db/ArrayClustering.java index b04910c434cb..752b3db99517 100644 --- a/src/java/org/apache/cassandra/db/ArrayClustering.java +++ b/src/java/org/apache/cassandra/db/ArrayClustering.java @@ -48,6 +48,12 @@ public long unsharedHeapSizeExcludingData() return EMPTY_SIZE + ObjectSizes.sizeOfArray(values); } + @Override + public Clustering ensureAccessorFactorySupport() + { + return this; + } + public static ArrayClustering make(byte[]... values) { return new ArrayClustering(values); diff --git a/src/java/org/apache/cassandra/db/BufferClustering.java b/src/java/org/apache/cassandra/db/BufferClustering.java index 6cacbd14c910..205036505220 100644 --- a/src/java/org/apache/cassandra/db/BufferClustering.java +++ b/src/java/org/apache/cassandra/db/BufferClustering.java @@ -59,4 +59,10 @@ public static BufferClustering make(ByteBuffer... values) { return new BufferClustering(values); } + + @Override + public Clustering ensureAccessorFactorySupport() + { + return this; + } } diff --git a/src/java/org/apache/cassandra/db/Clustering.java b/src/java/org/apache/cassandra/db/Clustering.java index 426d3279f97d..3e42e4a361b7 100644 --- a/src/java/org/apache/cassandra/db/Clustering.java +++ b/src/java/org/apache/cassandra/db/Clustering.java @@ -190,4 +190,11 @@ public Clustering deserialize(ByteBuffer in, int version, List ensureAccessorFactorySupport(); } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/ClusteringPrefix.java b/src/java/org/apache/cassandra/db/ClusteringPrefix.java index 167d89c6a485..c7687bb80f3e 100644 --- a/src/java/org/apache/cassandra/db/ClusteringPrefix.java +++ b/src/java/org/apache/cassandra/db/ClusteringPrefix.java @@ -258,6 +258,25 @@ default boolean isEmpty() */ public V get(int i); + /** + * The method is introduced to allow to avoid a value object retrieval/allocation for simple checks + */ + public default boolean isNull(int i) + { + return get(i) == null; + } + + /** + * The method is introduced to allow to avoid a value object retrieval/allocation for simple checks + */ + public default boolean isEmpty(int i) + { + V v = get(i); + if (v == null) + return true; + return accessor().isEmpty(v); + } + public ValueAccessor accessor(); default ByteBuffer bufferAt(int i) @@ -402,7 +421,7 @@ public default String clusteringString(List> types) * memory (i.e. in memtables) to minimized on-heap versions. * If the object is already in minimal form, no action will be taken. */ - public ClusteringPrefix retainable(); + public ClusteringPrefix retainable(); public static class Serializer { @@ -549,14 +568,12 @@ void skipValuesWithoutSize(DataInputPlus in, int size, int version, List long makeHeader(ClusteringPrefix clustering, int offset, int limit) { long header = 0; - ValueAccessor accessor = clustering.accessor(); for (int i = offset ; i < limit ; i++) { - V v = clustering.get(i); // no need to do modulo arithmetic for i, since the left-shift execute on the modulus of RH operand by definition - if (v == null) + if (clustering.isNull(i)) header |= (1L << (i * 2) + 1); - else if (accessor.isEmpty(v)) + else if (clustering.isEmpty(i)) header |= (1L << (i * 2)); } return header; diff --git a/src/java/org/apache/cassandra/db/NativeClustering.java b/src/java/org/apache/cassandra/db/NativeClustering.java index e83ded06c6e0..f51ea90a8245 100644 --- a/src/java/org/apache/cassandra/db/NativeClustering.java +++ b/src/java/org/apache/cassandra/db/NativeClustering.java @@ -21,7 +21,10 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import org.apache.cassandra.db.marshal.AddressBasedNativeData; import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.NativeAccessor; +import org.apache.cassandra.db.marshal.NativeData; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.ObjectSizes; @@ -31,7 +34,7 @@ import org.apache.cassandra.utils.memory.NativeEndianMemoryUtil; import org.apache.cassandra.utils.memory.NativeAllocator; -public class NativeClustering implements Clustering +public class NativeClustering implements Clustering { private static final long EMPTY_SIZE = ObjectSizes.measure(new NativeClustering()); @@ -84,7 +87,7 @@ public Kind kind() return Kind.CLUSTERING; } - public ClusteringPrefix clustering() + public ClusteringPrefix clustering() { return this; } @@ -100,9 +103,50 @@ public int dataSize() return NativeEndianMemoryUtil.getUnsignedShort(peer + dataSizeOffset); } - public ByteBuffer get(int i) + public NativeData get(int i) + { + return buildDataObject(i, AddressBasedNativeData::new); + } + + public boolean isNull(int i) + { + return isNull(peer, size(), i); + } + + private static boolean isNull(long peer, int size, int i) + { + if (i >= size) + throw new IndexOutOfBoundsException(); + + int metadataSize = (size * 2) + 4; + long bitmapStart = peer + metadataSize; + int b = NativeEndianMemoryUtil.getByte(bitmapStart + (i >>> 3)); + return ((b & (1 << (i & 7))) != 0); + } + + public boolean isEmpty(int i) + { + int size = size(); + if (isNull(peer, size, i)) + return true; + + int startOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 2 + i * 2); + int endOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 4 + i * 2); + return (endOffset - startOffset) == 0; + } + + + private ByteBuffer getByteBuffer(int i) + { + return buildDataObject(i, (long address, int length) -> MemoryUtil.getByteBuffer(address, length, ByteOrder.BIG_ENDIAN)); + } + + private interface DataObjectBuilder { + D build(long address, int length); + } + + private D buildDataObject(int i, DataObjectBuilder builder) { - // offset at which we store the dataOffset int size = size(); if (i >= size) throw new IndexOutOfBoundsException(); @@ -116,14 +160,15 @@ public ByteBuffer get(int i) int startOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 2 + i * 2); int endOffset = NativeEndianMemoryUtil.getUnsignedShort(peer + 4 + i * 2); - return MemoryUtil.getByteBuffer(bitmapStart + bitmapSize + startOffset, - endOffset - startOffset, - ByteOrder.BIG_ENDIAN); + + long address = bitmapStart + bitmapSize + startOffset; + int length = endOffset - startOffset; + return builder.build(address, length); } - public ByteBuffer[] getRawValues() + public NativeData[] getRawValues() { - ByteBuffer[] values = new ByteBuffer[size()]; + NativeData[] values = new NativeData[size()]; for (int i = 0 ; i < values.length ; i++) values[i] = get(i); return values; @@ -131,13 +176,15 @@ public ByteBuffer[] getRawValues() public ByteBuffer[] getBufferArray() { - return getRawValues(); + ByteBuffer[] values = new ByteBuffer[size()]; + for (int i = 0 ; i < values.length ; i++) + values[i] = getByteBuffer(i); + return values; } - public ValueAccessor accessor() + public ValueAccessor accessor() { - // TODO: add a native accessor - return ByteBufferAccessor.instance; + return NativeAccessor.instance; } public long unsharedHeapSize() @@ -150,6 +197,12 @@ public long unsharedHeapSizeExcludingData() return EMPTY_SIZE; } + @Override + public Clustering ensureAccessorFactorySupport() + { + return retainable(); + } + @Override public final int hashCode() { @@ -162,8 +215,9 @@ public final boolean equals(Object o) return ClusteringPrefix.equals(this, o); } + // data are copied to heap byte buffers to detach from a NativeAllocator lifecycle @Override - public ClusteringPrefix retainable() + public Clustering retainable() { assert kind() == Kind.CLUSTERING; // tombstones are never stored natively @@ -171,10 +225,10 @@ public ClusteringPrefix retainable() ByteBuffer[] values = new ByteBuffer[size()]; for (int i = 0; i < values.length; ++i) { - ByteBuffer value = get(i); + ByteBuffer value = getByteBuffer(i); values[i] = value != null ? HeapCloner.instance.clone(value) : null; } - return accessor().factory().clustering(values); + return ByteBufferAccessor.instance.factory().clustering(values); } } diff --git a/src/java/org/apache/cassandra/db/marshal/AddressBasedNativeData.java b/src/java/org/apache/cassandra/db/marshal/AddressBasedNativeData.java new file mode 100644 index 000000000000..73e686028a32 --- /dev/null +++ b/src/java/org/apache/cassandra/db/marshal/AddressBasedNativeData.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.cassandra.utils.memory.MemoryUtil; + +public class AddressBasedNativeData implements NativeData +{ + // use a real address, just in case + private static final ByteBuffer EMPTY_NATIVE_BUFFER = ByteBuffer.allocateDirect(1); + private static final long EMPTY_VALUE_ADDRESS = MemoryUtil.getAddress(EMPTY_NATIVE_BUFFER); + public static final AddressBasedNativeData EMPTY = new AddressBasedNativeData(EMPTY_VALUE_ADDRESS, 0); + + private final long address; + private final int length; + + public AddressBasedNativeData(long address, int length) + { + this.address = address; + this.length = length; + } + + + @Override + public int nativeDataSize() + { + return length; + } + + @Override + public ByteBuffer asByteBuffer() + { + return MemoryUtil.getByteBuffer(address, length, ByteOrder.BIG_ENDIAN); + } + + @Override + public NativeData slice(int offset, int length) + { + if (offset < 0 || offset > this.length) + throw new IllegalArgumentException("offset must but be >= 0 and < parent length; " + + "offset: " + offset + + ", slice length: " + length + + ", data length: " + this.length); + if (length < 0 || offset + length > this.length) { + throw new IllegalArgumentException("length must but be >= 0 and offset + length > parent length; " + + "offset: " + offset + + ", slice length: " + length + + ", data length: " + this.length); + } + + if (length == 0) { + return EMPTY; + } + return new AddressBasedNativeData(address + offset, length); + } + + @Override + public long getAddress() + { + return address; + } +} diff --git a/src/java/org/apache/cassandra/db/marshal/NativeAccessor.java b/src/java/org/apache/cassandra/db/marshal/NativeAccessor.java new file mode 100644 index 000000000000..70d73041de1b --- /dev/null +++ b/src/java/org/apache/cassandra/db/marshal/NativeAccessor.java @@ -0,0 +1,485 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.util.UUID; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.db.Digest; +import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.paxos.Ballot; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FastByteOperations; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.UUIDGen; +import org.apache.cassandra.utils.memory.BigEndianMemoryUtil; +import org.apache.cassandra.utils.memory.MemoryUtil; + +/** + * ValueAccessor has a lot of different methods are grouped together in a single interface. + * Technically the methods can be classfied to 4 categories: + * 1) basic methods to deal with the existing data as an abstract read-only container of bytes + * 2) deserialization methods to decode the data into different data types + * 3) serialization methods to encode and write different data types into the value entity + * 4) Value object creation methods + * + * NativeAccessor provides a support for real NativeData objects (on top of off-heap memory) for 1-3 categories + * with a focus on 1) category and only emulates 4th category using ByteBufferSliceNativeData on top of heap ByteBuffers. + * We expect NativeData is used only to store data in Memtables with an explicit allocator and memory regions lifecycle + * and not used to create short-living Mutation requests and transfer them between nodes. + */ +public class NativeAccessor implements ValueAccessor +{ + public static final ValueAccessor instance = new NativeAccessor(); + + // ----------------------------------------------------------------------------- + // basic methods to deal with data as a read-only container of bytes + + @Override + public int size(NativeData value) + { + return value.nativeDataSize(); + } + + @Override + public void write(NativeData sourceValue, DataOutputPlus out) throws IOException + { + out.writeMemory(sourceValue.getAddress(), sourceValue.nativeDataSize()); + } + + @Override + public ByteBuffer toBuffer(NativeData value) + { + if (value == null) + return null; + return value.asByteBuffer(); + } + + @Override + public void write(NativeData value, ByteBuffer out) + { + int size = value.nativeDataSize(); + MemoryUtil.getBytes(value.getAddress(), out, size); + out.position(out.position() + size); + + } + + @Override + public int copyTo(NativeData src, int srcOffset, V2 dst, ValueAccessor dstAccessor, int dstOffset, int size) + { + if (dstAccessor == ByteArrayAccessor.instance) + MemoryUtil.getBytes(src.getAddress() + srcOffset, dstAccessor.toArray(dst), dstOffset, size); + else if (dstAccessor == ByteBufferAccessor.instance) + { + ByteBuffer dstBuffer = dstAccessor.toBuffer(dst); + MemoryUtil.getBytes(src.getAddress() + srcOffset, dstBuffer, dstOffset, size); + // note: position of dstBuffer expected to stay the same + } + else if (dstAccessor == NativeAccessor.instance) + MemoryUtil.setBytes(src.getAddress() + srcOffset, ((NativeData) dst).getAddress() + dstOffset, size); + else // just in case of new implementations of ValueAccessor appear + dstAccessor.copyByteBufferTo(src.asByteBuffer(), srcOffset, dst, dstOffset, size); + + return size; + } + + @Override + public int copyByteArrayTo(byte[] src, int srcOffset, NativeData dstNative, int dstOffset, int size) + { + MemoryUtil.setBytes(src, srcOffset, dstNative.getAddress() + dstOffset, size); + return size; + } + + @Override + public int copyByteBufferTo(ByteBuffer src, int srcOffset, NativeData dstNative, int dstOffset, int size) + { + MemoryUtil.setBytes(dstNative.getAddress() + dstOffset, src, srcOffset, size); + return size; + } + + @Override + public void digest(NativeData value, int offset, int size, Digest digest) + { + // not used for NativeData (we copy data to heap during a select) + // so, there is no much reason to optimize to avoid a ByteBuffer object allocation + ByteBuffer byteBuffer = value.asByteBuffer(); + digest.update(byteBuffer, byteBuffer.position() + offset, size); + } + + @Override + public NativeData slice(NativeData input, int offset, int length) + { + return input.slice(offset, length); + } + + @Override + public int compare(NativeData left, VR right, ValueAccessor accessorR) + { + + if (accessorR == ByteArrayAccessor.instance) + return -compareByteArrayTo(accessorR.toArray(right), left); + else if (accessorR == ByteBufferAccessor.instance) + return -compareByteBufferTo(accessorR.toBuffer(right), left); + if (accessorR == NativeAccessor.instance) + { + NativeData rightNative = (NativeData) right; + int leftSize = left.nativeDataSize(); + int rightSize = rightNative.nativeDataSize(); + return FastByteOperations.compareMemoryUnsigned(left.getAddress(), leftSize, rightNative.getAddress(), rightSize); + } else // just in case of new implementations of ValueAccessor appear + return ByteBufferUtil.compareUnsigned(left.asByteBuffer(), accessorR.toBuffer(right)); + } + + @Override + public int compareByteArrayTo(byte[] left, NativeData right) + { + return FastByteOperations.compareWithMemoryUnsigned(left, 0, left.length, right.getAddress(), right.nativeDataSize()); + } + + @Override + public int compareByteBufferTo(ByteBuffer left, NativeData right) + { + return FastByteOperations.compareWithMemoryUnsigned(left, right.getAddress(), right.nativeDataSize()); + } + + // ----------------------------------------------------------------------------- + // Data deserialization methods + + @Override + public byte[] toArray(NativeData value) + { + if (value == null) + return null; + int size = value.nativeDataSize(); + byte[] result = new byte[size]; + MemoryUtil.getBytes(value.getAddress(), result, 0, size); + return result; + } + + @Override + public byte[] toArray(NativeData value, int offset, int length) + { + if (value == null) + return null; + int size = value.nativeDataSize(); + if (length > size) + throw new IllegalArgumentException("length (" + length + ") cannot be more than the value size (" + size + ")"); + + byte[] result = new byte[length]; + MemoryUtil.getBytes(value.getAddress() + offset, result, 0, length); + return result; + } + + @Override + public String toString(NativeData value, Charset charset) throws CharacterCodingException + { + return ByteBufferUtil.string(value.asByteBuffer(), charset); + } + + @Override + public String toHex(NativeData value) + { + return ByteBufferUtil.bytesToHex(value.asByteBuffer()); + } + + @Override + public byte toByte(NativeData value) + { + return getByte(value, 0); + } + + @Override + public byte getByte(NativeData value, int offset) + { + return MemoryUtil.getByte(value.getAddress() + offset); + } + + @Override + public short toShort(NativeData value) + { + return getShort(value, 0); + } + + @Override + public short getShort(NativeData value, int offset) + { + return (short) BigEndianMemoryUtil.getUnsignedShort(value.getAddress() + offset); + } + + @Override + public int getUnsignedShort(NativeData value, int offset) + { + return BigEndianMemoryUtil.getUnsignedShort(value.getAddress() + offset); + } + + @Override + public int toInt(NativeData value) + { + return getInt(value, 0); + } + + @Override + public int getInt(NativeData value, int offset) + { + return BigEndianMemoryUtil.getInt(value.getAddress() + offset); + } + + @Override + public long toLong(NativeData value) + { + return getLong(value, 0); + } + + @Override + public long getLong(NativeData value, int offset) + { + return BigEndianMemoryUtil.getLong(value.getAddress() + offset); + } + + @Override + public float getFloat(NativeData value, int offset) + { + return Float.intBitsToFloat(BigEndianMemoryUtil.getInt(value.getAddress() + offset)); + } + + @Override + public double getDouble(NativeData value, int offset) + { + return Double.longBitsToDouble(BigEndianMemoryUtil.getLong(value.getAddress() + offset)); + } + + @Override + public float toFloat(NativeData value) + { + return getFloat(value, 0); + } + + @Override + public double toDouble(NativeData value) + { + return getDouble(value, 0); + } + + @Override + public UUID toUUID(NativeData value) + { + long mostSigBits = getLong(value, 0); + long leastSigBits = getLong(value, 8); + + return UUIDGen.getUUID(mostSigBits, leastSigBits); + } + + @Override + public TimeUUID toTimeUUID(NativeData value) + { + long mostSigBits = getLong(value, 0); + long leastSigBits = getLong(value, 8); + return TimeUUID.fromBytes(mostSigBits, leastSigBits); + } + + @Override + public Ballot toBallot(NativeData value) + { + long mostSigBits = getLong(value, 0); + long leastSigBits = getLong(value, 8); + return Ballot.fromBytes(mostSigBits, leastSigBits); + } + + @Override + public float[] toFloatArray(NativeData value, int dimension) + { + int arraySize = value.nativeDataSize() / Float.BYTES; + if (arraySize != dimension) + throw new IllegalArgumentException(String.format("Could not convert to a float[] with different dimension. " + + "Was expecting %d but got %d", dimension, arraySize)); + float[] floatArray = new float[arraySize]; + for (int i = 0; i < arraySize; i++) + { + floatArray[i] = Float.intBitsToFloat(getInt(value, i * Float.BYTES)); + } + return floatArray; + } + + + // ----------------------------------------------------------------------------- + // Data serialization methods + @Override + public int putByte(NativeData dstNative, int offset, byte value) + { + BigEndianMemoryUtil.setByte(dstNative.getAddress() + offset, value); + return TypeSizes.BYTE_SIZE; + } + + @Override + public int putShort(NativeData dstNative, int offset, short value) + { + BigEndianMemoryUtil.setShort(dstNative.getAddress() + offset, value); + return TypeSizes.SHORT_SIZE; + } + + @Override + public int putInt(NativeData dstNative, int offset, int value) + { + BigEndianMemoryUtil.setInt(dstNative.getAddress() + offset, value); + return TypeSizes.INT_SIZE; + } + + @Override + public int putLong(NativeData dstNative, int offset, long value) + { + BigEndianMemoryUtil.setLong(dstNative.getAddress() + offset, value); + return TypeSizes.LONG_SIZE; + } + + @Override + public int putFloat(NativeData dstNative, int offset, float value) + { + putInt(dstNative, offset, Float.floatToIntBits(value)); + return TypeSizes.FLOAT_SIZE; + } + + @Override + public NativeData[] createArray(int length) + { + return new NativeData[length]; + } + + // ----------------------------------------------------------------------------- + // Value object creation methods + // We do not expect the methods are used in real logic for NativeData, + // but they are needed to reuse existing unit tests written for other implementation of ValueAccessor. + + private static NativeDataAllocator allocator = NativeDataAllocator.UNSUPPORTED; + + @VisibleForTesting + public static void setNativeMemoryAllocator(NativeDataAllocator allocatorToSet) + { + allocator = allocatorToSet; + } + + @Override + public NativeData read(DataInputPlus in, int length) throws IOException + { + ByteBuffer data = ByteBufferUtil.read(in, length); + return allocator.allocateBasedOnBuffer(data); + } + + @Override + public NativeData empty() + { + return AddressBasedNativeData.EMPTY; + } + + @Override + public NativeData valueOf(byte[] bytes) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(bytes)); + } + + @Override + public NativeData valueOf(ByteBuffer bytes) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(bytes)); + } + + @Override + public NativeData valueOf(String s, Charset charset) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(s, charset)); + } + + @Override + public NativeData valueOf(UUID v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(boolean v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(byte v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(short v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(int v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(long v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(float v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData valueOf(double v) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.valueOf(v)); + } + + @Override + public NativeData convert(V2 src, ValueAccessor accessor) + { + if (accessor == NativeAccessor.instance) + return (NativeData) src; + return allocator.allocateBasedOnBuffer(accessor.toBuffer(src)); + } + + @Override + public NativeData allocate(int size) + { + return allocator.allocateBasedOnBuffer(ByteBufferAccessor.instance.allocate(size)); + } + + @Override + public ObjectFactory factory() + { + // The method is used to de-serialize and create different parts of a Mutation object + // to transfer it between Cassandra nodes. + // The current implementation of NativeData does not support creating of such objects in-flight + // because it requires to have a native memory pool/allocator and manage its lifecycle. + throw new UnsupportedOperationException(); + } +} diff --git a/src/java/org/apache/cassandra/db/marshal/NativeData.java b/src/java/org/apache/cassandra/db/marshal/NativeData.java new file mode 100644 index 000000000000..60ffb8ce515b --- /dev/null +++ b/src/java/org/apache/cassandra/db/marshal/NativeData.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.nio.ByteBuffer; + +public interface NativeData +{ + int nativeDataSize(); + + ByteBuffer asByteBuffer(); + + NativeData slice(int offset, int length); + + public long getAddress(); +} diff --git a/src/java/org/apache/cassandra/db/marshal/NativeDataAllocator.java b/src/java/org/apache/cassandra/db/marshal/NativeDataAllocator.java new file mode 100644 index 000000000000..3d22f9951422 --- /dev/null +++ b/src/java/org/apache/cassandra/db/marshal/NativeDataAllocator.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.nio.ByteBuffer; + +public interface NativeDataAllocator extends AutoCloseable +{ + NativeDataAllocator UNSUPPORTED = data -> { + throw new UnsupportedOperationException("The method is not expected to be used by NativeAccessor outside of tests. " + + "NativeData can be allocated only by a memtable NativeAllocator"); + }; + + NativeData allocateBasedOnBuffer(ByteBuffer data); + + @Override + default void close() {}; +} diff --git a/src/java/org/apache/cassandra/db/rows/NativeCell.java b/src/java/org/apache/cassandra/db/rows/NativeCell.java index 9a191ebab923..b774cb2ce989 100644 --- a/src/java/org/apache/cassandra/db/rows/NativeCell.java +++ b/src/java/org/apache/cassandra/db/rows/NativeCell.java @@ -20,7 +20,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import org.apache.cassandra.db.marshal.ByteBufferAccessor; +import org.apache.cassandra.db.marshal.AddressBasedNativeData; +import org.apache.cassandra.db.marshal.NativeAccessor; +import org.apache.cassandra.db.marshal.NativeData; import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.utils.ByteBufferUtil; @@ -30,7 +32,7 @@ import org.apache.cassandra.utils.memory.NativeAllocator; import org.apache.cassandra.utils.memory.NativeEndianMemoryUtil; -public class NativeCell extends AbstractCell +public class NativeCell extends AbstractCell implements NativeData { private static final long EMPTY_SIZE = ObjectSizes.measure(new NativeCell()); @@ -135,15 +137,20 @@ public int ttl() return NativeEndianMemoryUtil.getInt(peer + TTL); } - public ByteBuffer value()// FIXME: add native accessor + public NativeData value() { - int length = NativeEndianMemoryUtil.getInt(peer + LENGTH); - return MemoryUtil.getByteBuffer(peer + VALUE, length, ByteOrder.BIG_ENDIAN); + return this; } - public ValueAccessor accessor() + public ByteBuffer byteBufferValue() { - return ByteBufferAccessor.instance; // FIXME: add native accessor + int length = valueSize(); + return MemoryUtil.getByteBuffer(getAddress(), length, ByteOrder.BIG_ENDIAN); + } + + public ValueAccessor accessor() + { + return NativeAccessor.instance; } public int valueSize() @@ -156,7 +163,7 @@ public CellPath path() if (!hasPath()) return null; - long offset = peer + VALUE + NativeEndianMemoryUtil.getInt(peer + LENGTH); + long offset = getAddress() + valueSize(); int size = NativeEndianMemoryUtil.getInt(offset); return CellPath.create(MemoryUtil.getByteBuffer(offset + 4, size, ByteOrder.BIG_ENDIAN)); } @@ -169,17 +176,17 @@ public Cell withUpdatedValue(ByteBuffer newValue) @Override public Cell withUpdatedTimestamp(long newTimestamp) { - return new BufferCell(column, newTimestamp, ttl(), localDeletionTime(), value(), path()); + return new BufferCell(column, newTimestamp, ttl(), localDeletionTime(), byteBufferValue(), path()); } public Cell withUpdatedTimestampAndLocalDeletionTime(long newTimestamp, long newLocalDeletionTime) { - return new BufferCell(column, newTimestamp, ttl(), newLocalDeletionTime, value(), path()); + return new BufferCell(column, newTimestamp, ttl(), newLocalDeletionTime, byteBufferValue(), path()); } public Cell withUpdatedColumn(ColumnMetadata column) { - return new BufferCell(column, timestamp(), ttl(), localDeletionTimeAsUnsignedInt(), value(), path()); + return new BufferCell(column, timestamp(), ttl(), localDeletionTimeAsUnsignedInt(), byteBufferValue(), path()); } public Cell withSkippedValue() @@ -217,4 +224,28 @@ protected int localDeletionTimeAsUnsignedInt() { return NativeEndianMemoryUtil.getInt(peer + DELETION); } + + + @Override + public int nativeDataSize() + { + return valueSize(); + } + + @Override + public ByteBuffer asByteBuffer() + { + return byteBufferValue(); + } + @Override + public NativeData slice(int offset, int length) + { + return new AddressBasedNativeData(getAddress() + offset, length); + } + + @Override + public long getAddress() + { + return peer + VALUE; + } } diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java index 49f5a76628f4..53cf23c0d9c5 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java +++ b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java @@ -455,7 +455,12 @@ private ClusteringIndexFilter makeFilter(List keys) { nextClusterings.clear(); for (PrimaryKey key : keys) - nextClusterings.add(key.clustering()); + { + // primary keys privided by SAI may contain NativeCustering + // filter logic may use ValueAccessor.factory() for slicing, which is not supported for NativeCustering + Clustering clustering = key.clustering().ensureAccessorFactorySupport(); + nextClusterings.add(clustering); + } return new ClusteringIndexNamesFilter(nextClusterings, clusteringIndexFilter.isReversed()); } } diff --git a/src/java/org/apache/cassandra/io/util/BufferedDataOutputStreamPlus.java b/src/java/org/apache/cassandra/io/util/BufferedDataOutputStreamPlus.java index 538e52913fe0..201eb619e8a2 100644 --- a/src/java/org/apache/cassandra/io/util/BufferedDataOutputStreamPlus.java +++ b/src/java/org/apache/cassandra/io/util/BufferedDataOutputStreamPlus.java @@ -27,6 +27,7 @@ import net.nicoulaj.compilecommand.annotations.DontInline; import org.apache.cassandra.utils.FastByteOperations; +import org.apache.cassandra.utils.memory.MemoryUtil; import static org.apache.cassandra.config.CassandraRelevantProperties.NIO_DATA_OUTPUT_STREAM_PLUS_BUFFER_SIZE; @@ -138,6 +139,25 @@ public void write(ByteBuffer src) throws IOException buffer.position(buffer.position() + srcCount); } + @Override + public void writeMemory(long address, int length) throws IOException + { + assert buffer != null : "Attempt to use a closed data output"; + long srcPos = address; + int srcCount = length; + int trgAvailable; + while (srcCount > (trgAvailable = buffer.remaining())) + { + MemoryUtil.getBytes(srcPos, buffer, trgAvailable); + buffer.position(buffer.position() + trgAvailable); + srcPos += trgAvailable; + srcCount -= trgAvailable; + doFlush(srcCount); + } + MemoryUtil.getBytes(srcPos, buffer, srcCount); + buffer.position(buffer.position() + srcCount); + } + @Override public void write(int b) throws IOException { diff --git a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java index 18b30263eee9..3c66424065ac 100644 --- a/src/java/org/apache/cassandra/io/util/DataOutputPlus.java +++ b/src/java/org/apache/cassandra/io/util/DataOutputPlus.java @@ -18,6 +18,7 @@ package org.apache.cassandra.io.util; import org.apache.cassandra.utils.Shared; +import org.apache.cassandra.utils.memory.MemoryUtil; import org.apache.cassandra.utils.vint.VIntCoding; import java.io.DataOutput; @@ -44,6 +45,11 @@ default void write(ReadableMemory memory, long offset, long length) throws IOExc write(buffer); } + default void writeMemory(long address, int length) throws IOException + { + write(MemoryUtil.getByteBuffer(address, length)); + } + default void writeVInt(long i) throws IOException { VIntCoding.writeVInt(i, this); diff --git a/src/java/org/apache/cassandra/utils/FastByteOperations.java b/src/java/org/apache/cassandra/utils/FastByteOperations.java index 358d4993ddd8..c37ee21a6ed6 100644 --- a/src/java/org/apache/cassandra/utils/FastByteOperations.java +++ b/src/java/org/apache/cassandra/utils/FastByteOperations.java @@ -70,6 +70,21 @@ public static int compareUnsigned(ByteBuffer b1, ByteBuffer b2) return BestHolder.BEST.compare(b1, b2); } + public static int compareWithMemoryUnsigned(ByteBuffer b1, long address2, int length2) + { + return BestHolder.BEST.compare(b1, address2, length2); + } + + public static int compareWithMemoryUnsigned(byte[] b1, int s1, int l1, long address2, int length2) + { + return BestHolder.BEST.compare(b1, s1,l1, address2, length2); + } + + public static int compareMemoryUnsigned(long address1, int length1, long address2, int length2) + { + return BestHolder.BEST.compare(address1, length1, address2, length2); + } + public static int compareUnsigned(byte[] b1, byte[] b2) { return compareUnsigned(b1, 0, b1.length, b2, 0, b2.length); @@ -102,6 +117,12 @@ abstract public int compare(byte[] buffer1, int offset1, int length1, abstract public int compare(ByteBuffer buffer1, byte[] buffer2, int offset2, int length2); + abstract public int compare(ByteBuffer buffer1, long address2, int length2); + + abstract public int compare(long address1, int length1, long address2, int length2); + + abstract public int compare(byte[] buffer1, int offset1, int length1, long address2, int length2); + abstract public int compare(ByteBuffer buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2); abstract public int compare(ByteBuffer buffer1, ByteBuffer buffer2); @@ -221,6 +242,44 @@ public int compare(ByteBuffer buffer1, byte[] buffer2, int offset2, int length2) return compare(buffer1, buffer1.position(), buffer1.remaining(), buffer2, offset2, length2); } + @Override + public int compare(ByteBuffer buffer1, long address2, int length2) + { + return compare(buffer1, buffer1.position(), buffer1.remaining(), address2, length2); + } + + public int compare(ByteBuffer buffer1, int position1, int length1, long address2, int length2) + { + { + Object obj1; + long offset1; + if (buffer1.hasArray()) + { + obj1 = buffer1.array(); + offset1 = BYTE_ARRAY_BASE_OFFSET + buffer1.arrayOffset() + position1; + } + else + { + obj1 = null; + offset1 = theUnsafe.getLong(buffer1, DIRECT_BUFFER_ADDRESS_OFFSET) + position1; + } + + return compareTo(obj1, offset1, length1, null, address2, length2); + } + } + @Override + public int compare(long address1, int length1, long address2, int length2) + { + return compareTo(null, address1, length1, null, address2, length2); + } + + @Override + public int compare(byte[] buffer1, int offset1, int length1, long address2, int length2) + { + return compareTo(buffer1, BYTE_ARRAY_BASE_OFFSET + offset1, length1, + null, address2, length2); + } + public int compare(ByteBuffer buffer1, int position1, int length1, byte[] buffer2, int offset2, int length2) { Object obj1; @@ -262,7 +321,7 @@ public void copy(byte[] src, int srcPosition, ByteBuffer trg, int trgPosition, i if (trg.hasArray()) System.arraycopy(src, srcPosition, trg.array(), trg.arrayOffset() + trgPosition, length); else - copy(null, srcPosition + theUnsafe.getLong(src, Unsafe.ARRAY_BYTE_BASE_OFFSET), trg, trgPosition, length); + copy((Object) src, (long) srcPosition + Unsafe.ARRAY_BYTE_BASE_OFFSET, trg, trgPosition, length); } public void copy(ByteBuffer srcBuf, int srcPosition, ByteBuffer trgBuf, int trgPosition, int length) @@ -465,6 +524,24 @@ public int compare(ByteBuffer buffer1, byte[] buffer2, int offset2, int length2) return compare(buffer1, ByteBuffer.wrap(buffer2, offset2, length2)); } + @Override + public int compare(ByteBuffer b1, long address2, int length2) + { + throw new UnsupportedOperationException("native memory address is an argument, we cannot do it using a pure Java"); + } + + @Override + public int compare(long address1, int length1, long address2, int length2) + { + throw new UnsupportedOperationException("native memory address is an argument, we cannot do it using a pure Java"); + } + + @Override + public int compare(byte[] b1, int s1, int l1, long address2, int length2) + { + throw new UnsupportedOperationException("native memory address is an argument, we cannot do it using a pure Java"); + } + public int compare(ByteBuffer buffer1, ByteBuffer buffer2) { int end1 = buffer1.limit(); diff --git a/src/java/org/apache/cassandra/utils/UUIDGen.java b/src/java/org/apache/cassandra/utils/UUIDGen.java index 5ece1cbf83f8..b6bdba2cd4d6 100644 --- a/src/java/org/apache/cassandra/utils/UUIDGen.java +++ b/src/java/org/apache/cassandra/utils/UUIDGen.java @@ -30,7 +30,12 @@ public class UUIDGen /** creates a type 1 uuid from raw bytes. */ public static UUID getUUID(ByteBuffer raw) { - return new UUID(raw.getLong(raw.position()), raw.getLong(raw.position() + 8)); + return getUUID(raw.getLong(raw.position()), raw.getLong(raw.position() + 8)); + } + + public static UUID getUUID(long mostSigBits, long leastSigBits) + { + return new UUID(mostSigBits, leastSigBits); } public static ByteBuffer toByteBuffer(UUID uuid) diff --git a/src/java/org/apache/cassandra/utils/memory/BigEndianMemoryUtil.java b/src/java/org/apache/cassandra/utils/memory/BigEndianMemoryUtil.java new file mode 100644 index 000000000000..7641dcb5b2da --- /dev/null +++ b/src/java/org/apache/cassandra/utils/memory/BigEndianMemoryUtil.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.utils.Architecture; + +public class BigEndianMemoryUtil extends MemoryUtil +{ + public static int getUnsignedShort(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + return (Architecture.BIG_ENDIAN ? unsafe.getShort(address) : Short.reverseBytes(unsafe.getShort(address))) & 0xffff; + else + return getShortByByte(address) & 0xffff; + } + + public static int getInt(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + return Architecture.BIG_ENDIAN ? unsafe.getInt(address) : Integer.reverseBytes(unsafe.getInt(address)); + else + return getIntByByte(address); + } + + public static long getLong(long address) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + return Architecture.BIG_ENDIAN ? unsafe.getLong(address) : Long.reverseBytes(unsafe.getLong(address)); + else + return getLongByByte(address); + } + + public static void setShort(long address, short s) + { + if (Architecture.IS_UNALIGNED || (address & 0b1) == 0L) + unsafe.putShort(address, Architecture.BIG_ENDIAN ? s : Short.reverseBytes(s)); + else + putShortByByte(address, s); + } + + public static void setInt(long address, int l) + { + if (Architecture.IS_UNALIGNED || (address & 0b11) == 0L) + unsafe.putInt(address, Architecture.BIG_ENDIAN ? l : Integer.reverseBytes(l)); + else + putIntByByte(address, l); + } + + public static void setLong(long address, long l) + { + if (Architecture.IS_UNALIGNED || (address & 0b111) == 0L) + unsafe.putLong(address, Architecture.BIG_ENDIAN ? l : Long.reverseBytes(l)); + else + putLongByByte(address, l); + } + + @VisibleForTesting + static long getLongByByte(long address) + { + return (((long) unsafe.getByte(address ) ) << 56) | + (((long) unsafe.getByte(address + 1) & 0xff) << 48) | + (((long) unsafe.getByte(address + 2) & 0xff) << 40) | + (((long) unsafe.getByte(address + 3) & 0xff) << 32) | + (((long) unsafe.getByte(address + 4) & 0xff) << 24) | + (((long) unsafe.getByte(address + 5) & 0xff) << 16) | + (((long) unsafe.getByte(address + 6) & 0xff) << 8) | + (((long) unsafe.getByte(address + 7) & 0xff) ); + } + + @VisibleForTesting + static int getIntByByte(long address) + { + return (((int) unsafe.getByte(address ) ) << 24) | + (((int) unsafe.getByte(address + 1) & 0xff) << 16) | + (((int) unsafe.getByte(address + 2) & 0xff) << 8) | + (((int) unsafe.getByte(address + 3) & 0xff) ); + } + + @VisibleForTesting + static int getShortByByte(long address) + { + return (((int) unsafe.getByte(address ) ) << 8) | + (((int) unsafe.getByte(address + 1) & 0xff) ); + } + + @VisibleForTesting + static void putLongByByte(long address, long value) + { + unsafe.putByte(address , (byte) (value >> 56)); + unsafe.putByte(address + 1, (byte) (value >> 48)); + unsafe.putByte(address + 2, (byte) (value >> 40)); + unsafe.putByte(address + 3, (byte) (value >> 32)); + unsafe.putByte(address + 4, (byte) (value >> 24)); + unsafe.putByte(address + 5, (byte) (value >> 16)); + unsafe.putByte(address + 6, (byte) (value >> 8)); + unsafe.putByte(address + 7, (byte) (value )); + } + + @VisibleForTesting + static void putIntByByte(long address, int value) + { + unsafe.putByte(address , (byte) (value >> 24)); + unsafe.putByte(address + 1, (byte) (value >> 16)); + unsafe.putByte(address + 2, (byte) (value >> 8)); + unsafe.putByte(address + 3, (byte) (value )); + } + + @VisibleForTesting + static void putShortByByte(long address, short value) + { + unsafe.putByte(address , (byte) (value >> 8)); + unsafe.putByte(address + 1, (byte) (value )); + } + + public static ByteBuffer getByteBuffer(long address, int length) + { + return getByteBuffer(address, length, ByteOrder.BIG_ENDIAN); + } + + public static ByteBuffer getHollowDirectByteBuffer() + { + return getHollowDirectByteBuffer(ByteOrder.BIG_ENDIAN); + } +} diff --git a/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java b/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java index 86416c49a703..724c673f883f 100644 --- a/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java +++ b/src/java/org/apache/cassandra/utils/memory/MemoryUtil.java @@ -184,6 +184,26 @@ public static void setByteBufferCapacity(ByteBuffer instance, int capacity) unsafe.putInt(instance, DIRECT_BYTE_BUFFER_CAPACITY_OFFSET, capacity); } + /** + * Transfers count bytes to Memory starting at memoryOffset from ByteBuffer starting at bufferOffset + * + * @param targetAddress target start offset in the memory + * @param sourceBuffer the source data buffer + * @param bufferOffset start offset of the buffer + * @param count number of bytes to transfer + */ + public static void setBytes(long targetAddress, ByteBuffer sourceBuffer, int bufferOffset, int count) + { + if (count == 0) + return; + int start = sourceBuffer.position() + bufferOffset; + + if (sourceBuffer.isDirect()) + setBytes(getAddress(sourceBuffer) + start, targetAddress, count); + else + setBytes(targetAddress, sourceBuffer.array(), sourceBuffer.arrayOffset() + start, count); + } + public static void setBytes(long address, ByteBuffer buffer) { int start = buffer.position(); @@ -255,4 +275,51 @@ else if (count == 0) unsafe.copyMemory(null, address, buffer, BYTE_ARRAY_BASE_OFFSET + bufferOffset, count); } + + /** + * Transfers count bytes from Memory starting at address to ByteBuffer starting at bufferOffset + * + * @param sourceAddress start offset in the memory + * @param targetBuffer the target data buffer + * @param bufferOffset start offset of the buffer + * @param length number of bytes to transfer + */ + public static void getBytes(long sourceAddress, ByteBuffer targetBuffer, int bufferOffset, int length) + { + if (targetBuffer == null) + throw new NullPointerException(); + else if (length < 0 || length > targetBuffer.remaining()) + throw new IndexOutOfBoundsException(); + else if (length == 0) + return; + + Object obj; + long offset; + if (targetBuffer.hasArray()) + { + obj = targetBuffer.array(); + offset = BYTE_ARRAY_BASE_OFFSET + targetBuffer.arrayOffset(); + } + else + { + obj = null; + offset = unsafe.getLong(targetBuffer, DIRECT_BYTE_BUFFER_ADDRESS_OFFSET); + } + offset += targetBuffer.position(); + offset += bufferOffset; + + unsafe.copyMemory(null, sourceAddress, obj, offset, length); + } + + /** + * Transfers count bytes from Memory starting at address to ByteBuffer + * + * @param sourceAddress start offset in the memory + * @param targetBuffer the target data buffer + * @param length number of bytes to transfer + */ + public static void getBytes(long sourceAddress, ByteBuffer targetBuffer, int length) + { + getBytes(sourceAddress, targetBuffer, 0, length); + } } diff --git a/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java b/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java index 3ba4ae650e13..07561760f34b 100644 --- a/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java +++ b/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java @@ -25,6 +25,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.apache.cassandra.db.marshal.ValueAccessor; import org.apache.cassandra.utils.ObjectSizes; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.OpOrder; @@ -64,13 +65,14 @@ public void unsharedHeapSizeExcludingDataLTEUnsharedHeapSize() @Test public void testSingletonClusteringHeapSize() { - Clustering clustering = this.clustering.accessor().factory().staticClustering(); + ValueAccessor.ObjectFactory factory = this.clustering.ensureAccessorFactorySupport().accessor().factory(); + Clustering clustering = factory.staticClustering(); Assertions.assertThat(clustering.unsharedHeapSize()) .isEqualTo(0); Assertions.assertThat(clustering.unsharedHeapSizeExcludingData()) .isEqualTo(0); - clustering = this.clustering.accessor().factory().clustering(); + clustering = factory.clustering(); Assertions.assertThat(clustering.unsharedHeapSize()) .isEqualTo(0); Assertions.assertThat(clustering.unsharedHeapSizeExcludingData()) diff --git a/test/unit/org/apache/cassandra/db/ClusteringPrefixTest.java b/test/unit/org/apache/cassandra/db/ClusteringPrefixTest.java index a295b2278694..0bbe535348d0 100644 --- a/test/unit/org/apache/cassandra/db/ClusteringPrefixTest.java +++ b/test/unit/org/apache/cassandra/db/ClusteringPrefixTest.java @@ -180,7 +180,7 @@ public void testRetainable(ValueAccessor.ObjectFactory factory, public void testRetainable(ValueAccessor.ObjectFactory factory, Function allocator, - Function, ClusteringPrefix> mapper) + Function, ClusteringPrefix> mapper) { ClusteringPrefix[] clusterings = new ClusteringPrefix[] { diff --git a/test/unit/org/apache/cassandra/db/NativeCellTest.java b/test/unit/org/apache/cassandra/db/NativeCellTest.java index 133ed53acf92..ebc956947e3b 100644 --- a/test/unit/org/apache/cassandra/db/NativeCellTest.java +++ b/test/unit/org/apache/cassandra/db/NativeCellTest.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.db; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Iterator; import java.util.Random; @@ -30,6 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -37,6 +39,7 @@ import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.rows.*; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.concurrent.ImmediateFuture; import org.apache.cassandra.utils.concurrent.OpOrder; import org.apache.cassandra.utils.memory.HeapCloner; @@ -63,7 +66,7 @@ public static void setUp() } @Test - public void testCells() + public void testCells() throws Exception { for (int run = 0 ; run < 1000 ; run++) { @@ -153,7 +156,7 @@ private static int sanesize(int randomsize) return Math.min(Math.max(1, randomsize), 1 << 26); } - private static void test(Row row) + private static void test(Row row) throws Exception { Row nrow = row.clone(nativeAllocator.cloner(group)); Row brow = row.clone(HeapCloner.instance); @@ -161,21 +164,170 @@ private static void test(Row row) Assert.assertEquals(row, brow); Assert.assertEquals(nrow, brow); - Assert.assertEquals(row.clustering(), nrow.clustering()); - Assert.assertEquals(row.clustering(), brow.clustering()); - Assert.assertEquals(nrow.clustering(), brow.clustering()); + Digest rowDigest = Digest.forReadResponse(); + Digest nativeRowDigest = Digest.forReadResponse(); + Digest byteBufferRowDigest = Digest.forReadResponse(); + row.digest(rowDigest); + nrow.digest(nativeRowDigest); + brow.digest(byteBufferRowDigest); + byte[] rowDigestValue = rowDigest.digest(); + Assert.assertArrayEquals(rowDigestValue, nativeRowDigest.digest()); + Assert.assertArrayEquals(rowDigestValue, byteBufferRowDigest.digest()); - Assert.assertEquals(row.clustering().dataSize(), nrow.clustering().dataSize()); - Assert.assertEquals(row.clustering().dataSize(), brow.clustering().dataSize()); + Assert.assertEquals(row.dataSize(), nrow.dataSize()); + Assert.assertEquals(row.dataSize(), brow.dataSize()); - ClusteringComparator comparator = new ClusteringComparator(UTF8Type.instance); - Assert.assertEquals(0, comparator.compare(row.clustering(), nrow.clustering())); - Assert.assertEquals(0, comparator.compare(row.clustering(), brow.clustering())); - Assert.assertEquals(0, comparator.compare(nrow.clustering(), brow.clustering())); + assertClustering(row, brow, nrow); assertCellsDataSize(row, nrow); assertCellsDataSize(row, brow); + assertCellsWrittenToOutput(row, nrow); + assertCellsWrittenToOutput(row, brow); + + assertCellsSlicing(row, nrow); + assertCellsSlicing(row, brow); + } + + private static void assertClustering(Row row, Row byteBufferRow, Row nativeRow) throws Exception + { + Assert.assertEquals(row.clustering(), nativeRow.clustering()); + Assert.assertEquals(row.clustering(), byteBufferRow.clustering()); + Assert.assertEquals(nativeRow.clustering(), byteBufferRow.clustering()); + + ClusteringComparator comparator = new ClusteringComparator(UTF8Type.instance); + Assert.assertEquals(0, comparator.compare(row.clustering(), nativeRow.clustering())); + Assert.assertEquals(0, comparator.compare(row.clustering(), byteBufferRow.clustering())); + Assert.assertEquals(0, comparator.compare(nativeRow.clustering(), byteBufferRow.clustering())); + Assert.assertEquals(0, comparator.compare(nativeRow.clustering(), row.clustering())); + Assert.assertEquals(0, comparator.compare(nativeRow.clustering(), nativeRow.clustering())); + + + Assert.assertEquals(row.clustering().size(), nativeRow.clustering().size()); + Assert.assertEquals(row.clustering().size(), byteBufferRow.clustering().size()); + + assertByteBufferArrayEquals(row.clustering().getBufferArray(), nativeRow.clustering().getBufferArray()); + assertByteBufferArrayEquals(row.clustering().getBufferArray(), byteBufferRow.clustering().getBufferArray()); + + assertRawValuesEquals(row.clustering(), nativeRow.clustering()); + assertRawValuesEquals(row.clustering(), byteBufferRow.clustering()); + + + for (int i = 0; i < row.clustering().size(); i++) + { + Assert.assertEquals(row.clustering().isEmpty(i), byteBufferRow.clustering().isEmpty(i)); + Assert.assertEquals(row.clustering().isEmpty(i), nativeRow.clustering().isEmpty(i)); + + Assert.assertEquals(row.clustering().isNull(i), byteBufferRow.clustering().isNull(i)); + Assert.assertEquals(row.clustering().isNull(i), nativeRow.clustering().isNull(i)); + } + + assertClusteringElementSizes(row.clustering(), byteBufferRow.clustering()); + assertClusteringElementSizes(row.clustering(), nativeRow.clustering()); + + assertClusteringElementWrittenToOutput(row.clustering(), byteBufferRow.clustering()); + assertClusteringElementWrittenToOutput(row.clustering(), nativeRow.clustering()); + + assertClusteringSlicing(row.clustering(), byteBufferRow.clustering()); + assertClusteringSlicing(row.clustering(), nativeRow.clustering()); + + } + + private static void assertRawValuesEquals(Clustering c1, Clustering c2) + { + V1[] rawValues1 = c1.getRawValues(); + V2[] rawValues2 = c2.getRawValues(); + Assert.assertEquals(rawValues1.length, rawValues2.length); + for (int i = 0; i < c1.size(); i++) + { + if (rawValues1[i] != null) + Assert.assertEquals(0, c1.accessor().compare(rawValues1[i], rawValues2[i], c2.accessor())); + } + } + + private static void assertClusteringElementSizes(Clustering c1, Clustering c2) + { + for (int i = 0; i < c1.size(); i++) + { + if (c1.get(i) != null) + { + int sizeC1 = c1.accessor().size(c1.get(i)); + int sizeC2 = c2.accessor().size(c2.get(i)); + Assert.assertEquals(sizeC1, sizeC2); + } + } + } + + private static void assertClusteringElementWrittenToOutput(Clustering c1, Clustering c2) throws IOException + { + for (int i = 0; i < c1.size(); i++) + { + if (c1.get(i) != null) + { + DataOutputBuffer outputC1 = new DataOutputBuffer(c1.dataSize()); + DataOutputBuffer outputC2 = new DataOutputBuffer(c2.dataSize()); + c1.accessor().write(c1.get(i), outputC1); + c2.accessor().write(c2.get(i), outputC2); + Assert.assertArrayEquals(outputC1.toByteArray(), outputC2.toByteArray()); + } + } + } + + private static void assertClusteringSlicing(Clustering c1, Clustering c2) throws IOException + { + for (int i = 0; i < c1.size(); i++) + { + if (c1.get(i) != null) + { + int offset = c1.accessor().size(c1.get(i)) / 3; + int length = c1.accessor().size(c1.get(i)) / 2; + V1 slice1 = c1.accessor().slice(c1.get(i), offset, length); + V2 slice2 = c2.accessor().slice(c2.get(i), offset, length); + Assert.assertEquals(0, c1.accessor().compare(slice1, slice2, c2.accessor())); + Assert.assertEquals(0, c2.accessor().compare(slice2, slice1, c1.accessor())); + } + } + } + + private static void assertByteBufferArrayEquals(ByteBuffer[] array1, ByteBuffer[] array2) { + Assert.assertEquals(array1.length, array2.length); + for (int i = 0; i < array1.length; i++) { + if (array1[i] != null) + Assert.assertEquals(0, ByteBufferUtil.compareUnsigned(array1[i], array2[i])); + } + } + + private static void assertCellsWrittenToOutput(Row row1, Row row2) throws IOException + { + Iterator> row1Iterator = row1.cells().iterator(); + Iterator> row2Iterator = row2.cells().iterator(); + while (row1Iterator.hasNext()) + { + Cell cell1 = row1Iterator.next(); + Cell cell2 = row2Iterator.next(); + DataOutputBuffer output1 = new DataOutputBuffer(cell1.dataSize()); + DataOutputBuffer output2 = new DataOutputBuffer(cell2.dataSize()); + cell1.accessor().write(cell1.value(), output1); + cell2.accessor().write(cell2.value(), output2); + Assert.assertArrayEquals(output1.toByteArray(), output2.toByteArray()); + } + } + + private static void assertCellsSlicing(Row row1, Row row2) + { + Iterator> row1Iterator = row1.cells().iterator(); + Iterator> row2Iterator = row2.cells().iterator(); + while (row1Iterator.hasNext()) + { + Cell cell1 = row1Iterator.next(); + Cell cell2 = row2Iterator.next(); + int offset = cell1.accessor().size(cell1.value()) / 3; + int length = cell1.accessor().size(cell1.value()) / 2; + Object slice1 = cell1.accessor().slice(cell1.value(), offset, length); + Object slice2 = cell2.accessor().slice(cell2.value(), offset, length); + Assert.assertEquals(0, cell1.accessor().compare(slice1, slice2, cell2.accessor())); + Assert.assertEquals(0, cell2.accessor().compare(slice2, slice1, cell1.accessor())); + } } private static void assertCellsDataSize(Row row1, Row row2) diff --git a/test/unit/org/apache/cassandra/db/marshal/ByteBufferAccessorTest.java b/test/unit/org/apache/cassandra/db/marshal/ByteBufferAccessorTest.java index 6f39d8378d4f..52d7922ca345 100644 --- a/test/unit/org/apache/cassandra/db/marshal/ByteBufferAccessorTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/ByteBufferAccessorTest.java @@ -20,7 +20,9 @@ import java.nio.ByteBuffer; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.utils.ByteBufferUtil; @@ -29,6 +31,21 @@ public class ByteBufferAccessorTest extends ValueAccessorTester { + + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + + private static byte[] array(int start, int size) { byte[] a = new byte[size]; diff --git a/test/unit/org/apache/cassandra/db/marshal/CollectionTypesTest.java b/test/unit/org/apache/cassandra/db/marshal/CollectionTypesTest.java index 889364b59710..6250c0fea2b5 100644 --- a/test/unit/org/apache/cassandra/db/marshal/CollectionTypesTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/CollectionTypesTest.java @@ -27,7 +27,9 @@ import java.util.Random; import java.util.Set; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.cql3.CQL3Type; @@ -36,6 +38,20 @@ public class CollectionTypesTest { + + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + interface TypeFactory { T createType(AbstractType keyType, AbstractType valType); } interface ValueFactory { T createValue(ValueGenerator keyGen, ValueGenerator valGen, int size, Random random); } @@ -68,6 +84,7 @@ static void testSerializationDeserialization(Type Assert.assertEquals(srcString, dstString); } } + allocator.releaseMemory(); } } } diff --git a/test/unit/org/apache/cassandra/db/marshal/CompositeAndTupleTypesTest.java b/test/unit/org/apache/cassandra/db/marshal/CompositeAndTupleTypesTest.java index 0c86871c28e8..65163924c1df 100644 --- a/test/unit/org/apache/cassandra/db/marshal/CompositeAndTupleTypesTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/CompositeAndTupleTypesTest.java @@ -24,7 +24,9 @@ import java.util.List; import java.util.Random; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.cql3.FieldIdentifier; @@ -34,6 +36,20 @@ public class CompositeAndTupleTypesTest { + + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + interface TypeFactory> { T createType(List> types); } interface ValueCombiner { V combine(AbstractType type, ValueAccessor accessor, V[] values); } @@ -107,6 +123,7 @@ public > void testSerializationDeserialization(TypeFa Assert.assertEquals(srcString, dstString); } } + allocator.releaseMemory(); } } } diff --git a/test/unit/org/apache/cassandra/db/marshal/CompositeTypeTest.java b/test/unit/org/apache/cassandra/db/marshal/CompositeTypeTest.java index 0f3714870af7..994b026c7bd5 100644 --- a/test/unit/org/apache/cassandra/db/marshal/CompositeTypeTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/CompositeTypeTest.java @@ -23,6 +23,7 @@ import java.util.*; import com.google.common.collect.Lists; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -70,6 +71,20 @@ public class CompositeTypeTest uuids[i] = nextTimeUUID(); } + + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + @BeforeClass public static void defineSchema() throws ConfigurationException { diff --git a/test/unit/org/apache/cassandra/db/marshal/NativeAccessorTest.java b/test/unit/org/apache/cassandra/db/marshal/NativeAccessorTest.java new file mode 100644 index 000000000000..f53d5361545c --- /dev/null +++ b/test/unit/org/apache/cassandra/db/marshal/NativeAccessorTest.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.FloatBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.StandardCharsets; +import java.util.UUID; +import java.util.function.BiFunction; +import java.util.function.Function; + +import com.google.common.primitives.UnsignedBytes; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.db.Digest; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.service.paxos.Ballot; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.TimeUUID; +import org.apache.cassandra.utils.UUIDGen; +import org.apache.cassandra.utils.memory.BigEndianMemoryUtil; + +import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; +import static org.quicktheories.QuickTheory.qt; +import static org.quicktheories.generators.SourceDSL.integers; + +public class NativeAccessorTest extends ValueAccessorTester +{ + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + + private final ValueAccessor nativeAccessor = NativeAccessor.instance; + private final ValueAccessor bufferAccessor = ByteBufferAccessor.instance; + + @Test + public void testCompare() + { + qt().forAll(accessors(), + byteArrays(integers().between(0, 200)), + byteArrays(integers().between(0, 200)) + ).checkAssert(this::testCompare); + } + + private void testCompare(ValueAccessor rightAccessor, byte[] leftArray, byte[] rightArray) + { + NativeData left = NativeAccessor.instance.valueOf(leftArray); + V right = rightAccessor.valueOf(rightArray); + int expectedResult = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(leftArray, rightArray)); + int actualResult = Integer.signum(NativeAccessor.instance.compare(left, right, rightAccessor)); + Assert.assertEquals(expectedResult, actualResult); + } + + @Test + public void testCopy() + { + qt().forAll(accessors(), + byteArrays(integers().between(10, 100)), + integers().between(0, 9), + integers().between(0, 9) + ).checkAssert(this::testCopy); + } + + private void testCopy(ValueAccessor dstAccessor, byte[] dataToCopy, int srcOffset, int dstOffset) + { + ValueAccessor srcAcccessor = NativeAccessor.instance; + NativeData src = srcAcccessor.valueOf(dataToCopy); + V dst = dstAccessor.valueOf(new byte[dataToCopy.length + dstOffset - srcOffset]); + NativeAccessor.instance.copyTo(src, srcOffset, dst, dstAccessor, dstOffset, dataToCopy.length - srcOffset); + V dstSlice = dstAccessor.slice(dst, dstOffset, dataToCopy.length - srcOffset); + NativeData expectedData = srcAcccessor.slice(src, srcOffset, dataToCopy.length - srcOffset); + + Assert.assertArrayEquals(srcAcccessor.toArray(src, srcOffset, dataToCopy.length - srcOffset), dstAccessor.toArray(dstSlice)); + Assert.assertArrayEquals(srcAcccessor.toArray(expectedData), dstAccessor.toArray(dstSlice)); + } + + @Test + public void testPutMethods() + { + testNativePut((byte) 42, nativeAccessor::putByte, bufferAccessor::getByte); + + testNativePut((short )(Short.MAX_VALUE - 3), nativeAccessor::putShort, bufferAccessor::getShort); + + testNativePut(Integer.MAX_VALUE - 5, nativeAccessor::putInt, bufferAccessor::getInt); + + testNativePut((float) Math.PI, nativeAccessor::putFloat, bufferAccessor::getFloat); + + testNativePut(Long.MAX_VALUE - 2, nativeAccessor::putLong, bufferAccessor::getLong); + + testNativePut(0L, nativeAccessor::putVInt, bufferAccessor::getVInt); + testNativePut(42L, nativeAccessor::putVInt, bufferAccessor::getVInt); + testNativePut(0xFFFFFFL, nativeAccessor::putVInt, bufferAccessor::getVInt); + testNativePut(Long.MAX_VALUE - 1, nativeAccessor::putVInt, bufferAccessor::getVInt); + + testNativePut(42, nativeAccessor::putVInt32, bufferAccessor::getVInt32); + testNativePut(0xFFFFF, nativeAccessor::putVInt32, bufferAccessor::getVInt32); + testNativePut(Integer.MAX_VALUE - 1, nativeAccessor::putVInt32, bufferAccessor::getVInt32); + + testNativePut(42L, nativeAccessor::putUnsignedVInt, bufferAccessor::getUnsignedVInt); + testNativePut(0xFFFFFL, nativeAccessor::putUnsignedVInt, bufferAccessor::getUnsignedVInt); + testNativePut(0xFFFFFFFL, nativeAccessor::putUnsignedVInt, bufferAccessor::getUnsignedVInt); + testNativePut(Long.MAX_VALUE - 1, nativeAccessor::putUnsignedVInt, bufferAccessor::getUnsignedVInt); + } + + @Test + public void testPutDouble() // there is no putDouble method to test it like others + { + Double originalValue = Math.PI; // Double conversion is used to compare values as bit values + NativeData nativeData = nativeAccessor.allocate(25); + ByteBuffer bufferData = bufferAccessor.allocate(25); + int offset = 7; + bufferData.putDouble(offset, originalValue); + nativeAccessor.copyByteBufferTo(bufferData, 0, nativeData, 0, bufferAccessor.size(bufferData)); + Double getValue = nativeAccessor.getDouble(nativeData, offset); + Assert.assertEquals(originalValue, getValue); + + NativeData nativeDataSlice = nativeAccessor.slice(nativeData, offset, nativeData.nativeDataSize() - offset); + Double toValue = nativeAccessor.toDouble(nativeDataSlice); + Assert.assertEquals(originalValue, toValue); + + } + + private void testNativePut(V originalValue, TriFunction putMethod, + BiFunction getMethod) + { + NativeData nativeData = nativeAccessor.allocate(25); + int offset = 2; + putMethod.apply(nativeData, offset, originalValue); + ByteBuffer buffer = nativeAccessor.toBuffer(nativeData); + V getValue = getMethod.apply(buffer, offset); + Assert.assertEquals(originalValue, getValue); + } + + @Test + public void testGetMethods() + { + testNativeGet((byte) 42, bufferAccessor::putByte, nativeAccessor::getByte, nativeAccessor::toByte); + + testNativeGet((short )(Short.MAX_VALUE - 3), bufferAccessor::putShort, nativeAccessor::getShort, nativeAccessor::toShort); + + // nativeAccessor::getUnsignedShort is already tested by org.apache.cassandra.db.marshal.ValueAccessorTest.testUnsignedShort() + + testNativeGet(Integer.MAX_VALUE - 5, bufferAccessor::putInt, nativeAccessor::getInt, nativeAccessor::toInt); + + testNativeGet((float) Math.PI, bufferAccessor::putFloat, nativeAccessor::getFloat, nativeAccessor::toFloat); + + testNativeGet(Long.MAX_VALUE - 2, bufferAccessor::putLong, nativeAccessor::getLong, nativeAccessor::toLong); + + testNativeGet(0L, bufferAccessor::putVInt, nativeAccessor::getVInt, null); + testNativeGet(42L, bufferAccessor::putVInt, nativeAccessor::getVInt, null); + testNativeGet(0xFFFFFFL, bufferAccessor::putVInt, nativeAccessor::getVInt, null); + testNativeGet(Long.MAX_VALUE - 1, bufferAccessor::putVInt, nativeAccessor::getVInt, null); + + testNativeGet(42, bufferAccessor::putVInt32, nativeAccessor::getVInt32, null); + testNativeGet(0xFFFFF, bufferAccessor::putVInt32, nativeAccessor::getVInt32, null); + testNativeGet(Integer.MAX_VALUE - 1, bufferAccessor::putVInt32, nativeAccessor::getVInt32, null); + + testNativeGet(42L, bufferAccessor::putUnsignedVInt, nativeAccessor::getUnsignedVInt, null); + testNativeGet(0xFFFFFL, bufferAccessor::putUnsignedVInt, nativeAccessor::getUnsignedVInt, null); + testNativeGet(0xFFFFFFFL, bufferAccessor::putUnsignedVInt, nativeAccessor::getUnsignedVInt, null); + testNativeGet(Long.MAX_VALUE - 1, bufferAccessor::putUnsignedVInt, nativeAccessor::getUnsignedVInt, null); + } + + private void testNativeGet(V originalValue, TriFunction putMethod, + BiFunction getMethod, Function toMethod) + { + ByteBuffer bufferData = bufferAccessor.allocate(25); + NativeData nativeData = nativeAccessor.allocate(25); + int offset = 2; + putMethod.apply(bufferData, offset, originalValue); + nativeAccessor.copyByteBufferTo(bufferData, 0, nativeData, 0, bufferAccessor.size(bufferData)); + V getValue = getMethod.apply(nativeData, offset); + Assert.assertEquals(originalValue, getValue); + + if (toMethod != null) + { + NativeData nativeDataSlice = nativeAccessor.slice(nativeData, offset, nativeData.nativeDataSize() - offset); + V toValue = toMethod.apply(nativeDataSlice); + Assert.assertEquals(originalValue, toValue); + } + } + + @Test + public void testToUUID() { + UUID originalValue = UUID.randomUUID(); + ByteBuffer encodedOriginalValue = UUIDGen.toByteBuffer(originalValue); + int size = encodedOriginalValue.remaining(); + NativeData nativeData = nativeAccessor.allocate(size); + nativeAccessor.copyByteBufferTo(encodedOriginalValue, 0, nativeData, 0, size); + + UUID nativeUUID = nativeAccessor.toUUID(nativeData); + Assert.assertEquals(originalValue, nativeUUID); + } + + @Test + public void testToTimeUUID() { + TimeUUID originalValue = nextTimeUUID(); + ByteBuffer encodedOriginalValue = originalValue.toBytes(); + int size = encodedOriginalValue.remaining(); + NativeData nativeData = nativeAccessor.allocate(size); + nativeAccessor.copyByteBufferTo(encodedOriginalValue, 0, nativeData, 0, size); + + TimeUUID nativeUUID = nativeAccessor.toTimeUUID(nativeData); + Assert.assertEquals(originalValue, nativeUUID); + } + + @Test + public void testToBullot() { + Ballot originalValue = Ballot.fromUuid(nextTimeUUID().asUUID()); + ByteBuffer encodedOriginalValue = originalValue.toBytes(); + int size = encodedOriginalValue.remaining(); + NativeData nativeData = nativeAccessor.allocate(size); + nativeAccessor.copyByteBufferTo(encodedOriginalValue, 0, nativeData, 0, size); + + Ballot nativeBallot = nativeAccessor.toBallot(nativeData); + Assert.assertEquals(originalValue, nativeBallot); + } + + @Test + public void testToHex() { + int valueSize = 42; + byte[] originalData = new byte[valueSize]; + for (int i = 0; i < valueSize; i++) + originalData[i] = (byte) i; + + ByteBuffer bufferData = bufferAccessor.valueOf(originalData); + String bufferHex = bufferAccessor.toHex(bufferData); + + NativeData nativeData = nativeAccessor.valueOf(originalData); + String nativeHex = nativeAccessor.toHex(nativeData); + Assert.assertEquals(bufferHex, nativeHex); + } + + @Test + public void test() { + NativeData nativeData = nativeAccessor.allocate(4); + BigEndianMemoryUtil.setInt(nativeData.getAddress(), 0x00FF); + + String nativeHex = nativeAccessor.toHex(nativeData); + System.out.println(nativeHex); + } + + @Test + public void testToString() throws CharacterCodingException + { + String originalData = "test string value"; + NativeData nativeData = nativeAccessor.valueOf(originalData, StandardCharsets.UTF_8); + String nativeToString = nativeAccessor.toString(nativeData, StandardCharsets.UTF_8); + Assert.assertEquals(originalData, nativeToString); + } + + @Test + public void testToFloatArray() { + int valueSize = 42; + ByteBuffer buffer = ByteBuffer.allocate(valueSize * Float.BYTES); + FloatBuffer floatBuffer = buffer.asFloatBuffer(); + for (int i = 0; i < valueSize; i++) + floatBuffer.put((float) i); + + NativeData nativeData = nativeAccessor.valueOf(buffer); + float[] decodedFloatArray = nativeAccessor.toFloatArray(nativeData, valueSize); + + for (int i = 0; i < valueSize; i++) + Assert.assertEquals((Float) floatBuffer.get(i), (Float) decodedFloatArray[i]); + // Float conversion is used to compare values as bit values + } + + @Test + public void testDataOutputPlusWrite() throws IOException + { + int valueSize = 25; + NativeData nativeData = nativeAccessor.allocate(valueSize); + byte[] originalData = new byte[valueSize]; + for (int i = 0; i < valueSize; i++) + originalData[i] = (byte) i; + nativeAccessor.putBytes(nativeData, 0, originalData); + + try(DataOutputBuffer dataOutput = new DataOutputBuffer()) + { + nativeAccessor.write(nativeData, dataOutput); + byte[] writenData = dataOutput.toByteArray(); + Assert.assertArrayEquals(originalData, writenData); + } + } + + @Test + public void testHeapByteBufferWrite() + { + testHeapByteBufferWrite(ByteBuffer.allocate(25), 23); + } + + @Test + public void testDirectByteBufferWrite() + { + testHeapByteBufferWrite(ByteBuffer.allocateDirect(25), 23); + } + + private void testHeapByteBufferWrite(ByteBuffer buffer, int valueSize) + { + NativeData nativeData = nativeAccessor.allocate(valueSize); + byte[] originalData = new byte[valueSize]; + for (int i = 0; i < valueSize; i++) + originalData[i] = (byte) i; + nativeAccessor.putBytes(nativeData, 0, originalData); + + int initialPosition = buffer.position(); + nativeAccessor.write(nativeData, buffer); + Assert.assertEquals(valueSize, buffer.position() - initialPosition); + buffer.flip(); + Assert.assertArrayEquals(originalData, ByteBufferUtil.getArray(buffer)); + } + + @Test + public void testDigest() + { + int valueSize = 25; + NativeData nativeData = nativeAccessor.allocate(valueSize); + byte[] originalData = new byte[valueSize]; + for (int i = 0; i < valueSize; i++) + originalData[i] = (byte) i; + nativeAccessor.putBytes(nativeData, 0, originalData); + + Digest byteArrayDigest = Digest.forReadResponse(); + byteArrayDigest.update(originalData, 0, originalData.length); + + Digest nativeDigest = Digest.forReadResponse(); + nativeAccessor.digest(nativeData, nativeDigest); + + Assert.assertArrayEquals(byteArrayDigest.digest(), nativeDigest.digest()); + } + + @FunctionalInterface + interface TriFunction + { + R apply(A a, B b, C c); + } +} diff --git a/test/unit/org/apache/cassandra/db/marshal/TestNativeDataAllocator.java b/test/unit/org/apache/cassandra/db/marshal/TestNativeDataAllocator.java new file mode 100644 index 000000000000..cd45e77f7df2 --- /dev/null +++ b/test/unit/org/apache/cassandra/db/marshal/TestNativeDataAllocator.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.io.Closeable; +import java.nio.ByteBuffer; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.utils.concurrent.ImmediateFuture; +import org.apache.cassandra.utils.concurrent.OpOrder; +import org.apache.cassandra.utils.memory.MemoryUtil; +import org.apache.cassandra.utils.memory.NativeAllocator; +import org.apache.cassandra.utils.memory.NativePool; + +/** + * A primitive NativeData allocator is used for test purposes only + * It releases memory only when releaseMemory() or close() are called + */ +public class TestNativeDataAllocator implements NativeDataAllocator, Closeable +{ + private final NativePool nativePool = new NativePool(0, 10 * 1024 * 1024, 1.0f, + () -> ImmediateFuture.success(true)); + private NativeAllocator nativeAllocator = nativePool.newAllocator("test"); + private final OpOrder order = new OpOrder(); + + @Override + public NativeData allocateBasedOnBuffer(ByteBuffer data) + { + try(OpOrder.Group group = order.start()) + { + long address = nativeAllocator.allocate(data.remaining(), group); + MemoryUtil.setBytes(address, data); + return new AddressBasedNativeData(address, data.remaining()); + } + } + + public void releaseMemory() { + nativeAllocator.setDiscarding(); + nativeAllocator.setDiscarded(); + nativeAllocator = nativePool.newAllocator("test"); + } + + public void close() { + nativeAllocator.setDiscarding(); + nativeAllocator.setDiscarded(); + try + { + nativePool.shutdownAndWait(5, TimeUnit.SECONDS); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + } + +} diff --git a/test/unit/org/apache/cassandra/db/marshal/ValueAccessorTest.java b/test/unit/org/apache/cassandra/db/marshal/ValueAccessorTest.java index cd6681705f1d..cb9d6ee322f3 100644 --- a/test/unit/org/apache/cassandra/db/marshal/ValueAccessorTest.java +++ b/test/unit/org/apache/cassandra/db/marshal/ValueAccessorTest.java @@ -22,7 +22,9 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -69,6 +71,19 @@ private static void testHashCodeAndEquals(byte[] rawBytes, buffer1, buffer2); } + private static final TestNativeDataAllocator allocator = new TestNativeDataAllocator(); + @BeforeClass + public static void setSetMemoryAllocator() + { + NativeAccessor.setNativeMemoryAllocator(allocator); + } + + @AfterClass + public static void releaseMemory() + { + allocator.close(); + } + /** * Identical data should yield identical hashcodes even if the underlying format is different */ diff --git a/test/unit/org/apache/cassandra/db/marshal/ValueAccessors.java b/test/unit/org/apache/cassandra/db/marshal/ValueAccessors.java index e04de761e802..2c65237f9655 100644 --- a/test/unit/org/apache/cassandra/db/marshal/ValueAccessors.java +++ b/test/unit/org/apache/cassandra/db/marshal/ValueAccessors.java @@ -22,7 +22,8 @@ public class ValueAccessors { - public static final ValueAccessor[] ACCESSORS = new ValueAccessor[]{ ByteBufferAccessor.instance, ByteArrayAccessor.instance }; + public static final ValueAccessor[] ACCESSORS = new ValueAccessor[]{ ByteBufferAccessor.instance, ByteArrayAccessor.instance, NativeAccessor.instance }; + public static final ValueAccessor[] FACTORY_SUPPORTED_ACCESSORS = new ValueAccessor[]{ ByteBufferAccessor.instance, ByteArrayAccessor.instance }; public static void assertDataEquals(V1 expected, ValueAccessor expectedAccessor, V2 actual, ValueAccessor actualAccessor) { diff --git a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java index a6f34acadfb3..740234d10035 100644 --- a/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java +++ b/test/unit/org/apache/cassandra/utils/bytecomparable/ByteSourceConversionTest.java @@ -341,7 +341,7 @@ public void testEmptyClustering() void assertClusteringPairConvertsSame(AbstractType t1, AbstractType t2, Object o1, Object o2) { - for (ValueAccessor accessor : ValueAccessors.ACCESSORS) + for (ValueAccessor accessor : ValueAccessors.FACTORY_SUPPORTED_ACCESSORS) assertClusteringPairConvertsSame(accessor, t1, t2, o1, o2, AbstractType::decompose); } diff --git a/test/unit/org/apache/cassandra/utils/memory/BigEndianMemoryUtilTest.java b/test/unit/org/apache/cassandra/utils/memory/BigEndianMemoryUtilTest.java new file mode 100644 index 000000000000..fbc2595ae62f --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/memory/BigEndianMemoryUtilTest.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.utils.memory; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.junit.Assert; +import org.junit.Test; + +public class BigEndianMemoryUtilTest +{ + private static final int TEST_BUFFER_LENGTH = 8; + private final ByteBuffer directBuffer = ByteBuffer.allocateDirect(TEST_BUFFER_LENGTH); + { + directBuffer.order(ByteOrder.BIG_ENDIAN); + } + private final long address = BigEndianMemoryUtil.getAddress(directBuffer); + + @Test + public void testGetSetLong() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getLong(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + BigEndianMemoryUtil.setLong(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getLong(address)); + + } + + @Test + public void testGetSetInt() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getInt(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + BigEndianMemoryUtil.setInt(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getInt(address)); + + } + + @Test + public void testGetSetUnsighedShort() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue & 0xffff, BigEndianMemoryUtil.getUnsignedShort(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + BigEndianMemoryUtil.setShort(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue & 0xffff, BigEndianMemoryUtil.getUnsignedShort(address)); + } + + @Test + public void testGetSetLongByBytes() + { + long originalValue = 0xAB_CD_EF_12_34_56_78_90L; + directBuffer.putLong(originalValue); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getLongByByte(address)); + + directBuffer.rewind(); + directBuffer.putLong(0); + BigEndianMemoryUtil.putLongByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getLong(0)); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getLongByByte(address)); + + } + + @Test + public void testGetSetIntByBytes() + { + int originalValue = 0xAB_CD_EF_12; + directBuffer.putInt(originalValue); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getIntByByte(address)); + + directBuffer.rewind(); + directBuffer.putInt(0); + BigEndianMemoryUtil.putIntByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getInt(0)); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getIntByByte(address)); + + } + + @Test + public void testGetSetShortByBytes() + { + short originalValue = (short) 0xAB_CD; + directBuffer.putShort(originalValue); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getShortByByte(address)); + + directBuffer.rewind(); + directBuffer.putShort((short) 0); + BigEndianMemoryUtil.putShortByByte(address, originalValue); + + Assert.assertEquals(originalValue, directBuffer.getShort(0)); + Assert.assertEquals(originalValue, BigEndianMemoryUtil.getShortByByte(address)); + } + + + @Test + public void testGetHollowDirectByteBuffer() + { + ByteBuffer byteBuffer = BigEndianMemoryUtil.getHollowDirectByteBuffer(); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.BIG_ENDIAN, byteBuffer.order()); + } + + @Test + public void testGetByteBuffer() + { + ByteBuffer byteBuffer = BigEndianMemoryUtil.getByteBuffer(address, TEST_BUFFER_LENGTH); + Assert.assertEquals(directBuffer.getClass(), byteBuffer.getClass()); + Assert.assertEquals(ByteOrder.BIG_ENDIAN, byteBuffer.order()); + Assert.assertEquals(TEST_BUFFER_LENGTH, byteBuffer.capacity()); + Assert.assertEquals(0, byteBuffer.position()); + } +} From fd3f8249d5da3718128a888694aa00717582c19e Mon Sep 17 00:00:00 2001 From: mck Date: Wed, 21 May 2025 22:10:16 +0200 Subject: [PATCH 321/340] Fix cassandra-cqlsh-tests.sh on python <= 3.8 patch by Mick Semb Wever; reviewed by Brandon Williams for CASSANDRA-20669 --- pylib/cassandra-cqlsh-tests.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pylib/cassandra-cqlsh-tests.sh b/pylib/cassandra-cqlsh-tests.sh index b2d2c500ae6f..c37a1c32ae3d 100755 --- a/pylib/cassandra-cqlsh-tests.sh +++ b/pylib/cassandra-cqlsh-tests.sh @@ -81,8 +81,13 @@ fi set -e # enable immediate exit if venv setup fails virtualenv --python=$PYTHON_VERSION venv source venv/bin/activate -# 3.11 needs the newest pip -curl -sS https://bootstrap.pypa.io/get-pip.py | $PYTHON_VERSION +# 3.11 needs the newest pip, 3.8 and older have specific legacy get-pip urls +PYTHON_MAJOR_MINOR=$($PYTHON_VERSION -V 2>&1 | awk '{print $2}' | cut -d. -f1,2) +if [[ "$(printf '%s\n' "$PYTHON_MAJOR_MINOR" "3.8" | sort -V | head -n1)" == "$PYTHON_MAJOR_MINOR" ]]; then + curl -sS https://bootstrap.pypa.io/pip/${PYTHON_MAJOR_MINOR}/get-pip.py | $PYTHON_VERSION +else + curl -sS https://bootstrap.pypa.io/get-pip.py | $PYTHON_VERSION +fi pip install -r ${CASSANDRA_DIR}/pylib/requirements.txt pip freeze From 422b8a6cbd6cae2aa8551e668e3a9c6dc92858c4 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Thu, 22 May 2025 06:35:48 -0500 Subject: [PATCH 322/340] Prepare debian changelog for 4.0.18 --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 5f90075a8317..d5e5e34c2aa2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -assandra (4.0.18) UNRELEASED; urgency=medium +cassandra (4.0.18) unstable; urgency=medium * New release - -- Mick Semb Wever Thu, 07 Feb 2025 10:44:49 +0100 + -- Brandon Williams Thu, 22 May 2025 06:35:42 -0500 cassandra (4.0.17) unstable; urgency=medium From c736d22cf855d0981bb514fce5cb3a149b8c2f43 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Wed, 21 May 2025 14:39:45 -0700 Subject: [PATCH 323/340] Gossip doesn't converge due to race condition when updating EndpointStates multiple fields patch by David Capwell, Matt Byrd; reviewed by Blake Eggleston, Brandon Williams for CASSANDRA-20659 --- CHANGES.txt | 1 + .../apache/cassandra/gms/EndpointState.java | 111 ++++++++++++++---- .../org/apache/cassandra/gms/Gossiper.java | 28 ++--- .../apache/cassandra/gms/HeartBeatState.java | 20 ++-- .../apache/cassandra/gms/GossiperTest.java | 2 +- .../cassandra/gms/SerializationsTest.java | 2 +- 6 files changed, 109 insertions(+), 55 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 65e3cb4de0d1..676522b9bf0a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.18 + * Gossip doesn't converge due to race condition when updating EndpointStates multiple fields (CASSANDRA-20659) * Handle sstable metadata stats file getting a new mtime after compaction has finished (CASSANDRA-18119) * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) diff --git a/src/java/org/apache/cassandra/gms/EndpointState.java b/src/java/org/apache/cassandra/gms/EndpointState.java index 782a72207cf8..6cec0cef349e 100644 --- a/src/java/org/apache/cassandra/gms/EndpointState.java +++ b/src/java/org/apache/cassandra/gms/EndpointState.java @@ -24,6 +24,9 @@ import javax.annotation.Nullable; +import com.google.common.annotations.VisibleForTesting; + +import com.google.common.base.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,16 +40,25 @@ * This abstraction represents both the HeartBeatState and the ApplicationState in an EndpointState * instance. Any state for a given endpoint can be retrieved from this instance. */ - - public class EndpointState { protected static final Logger logger = LoggerFactory.getLogger(EndpointState.class); public final static IVersionedSerializer serializer = new EndpointStateSerializer(); - private volatile HeartBeatState hbState; - private final AtomicReference> applicationState; + private static class View + { + final HeartBeatState hbState; + final Map applicationState; + + private View(HeartBeatState hbState, Map applicationState) + { + this.hbState = hbState; + this.applicationState = applicationState; + } + } + + private final AtomicReference ref; /* fields below do not get serialized */ private volatile long updateTimestamp; @@ -54,46 +66,79 @@ public class EndpointState public EndpointState(HeartBeatState initialHbState) { - this(initialHbState, new EnumMap(ApplicationState.class)); + this(initialHbState, new EnumMap<>(ApplicationState.class)); } public EndpointState(EndpointState other) { - this(new HeartBeatState(other.hbState), new EnumMap<>(other.applicationState.get())); + ref = new AtomicReference<>(other.ref.get()); + updateTimestamp = System.nanoTime(); + isAlive = true; } - EndpointState(HeartBeatState initialHbState, Map states) + @VisibleForTesting + public EndpointState(HeartBeatState initialHbState, Map states) { - hbState = initialHbState; - applicationState = new AtomicReference>(new EnumMap<>(states)); + ref = new AtomicReference<>(new View(initialHbState, new EnumMap<>(states))); updateTimestamp = System.nanoTime(); isAlive = true; } - HeartBeatState getHeartBeatState() + @VisibleForTesting + public HeartBeatState getHeartBeatState() + { + return ref.get().hbState; + } + + public void updateHeartBeat() + { + updateHeartBeat(HeartBeatState::updateHeartBeat); + } + + public void forceNewerGenerationUnsafe() + { + updateHeartBeat(HeartBeatState::forceNewerGenerationUnsafe); + } + + @VisibleForTesting + public void forceHighestPossibleVersionUnsafe() { - return hbState; + updateHeartBeat(HeartBeatState::forceHighestPossibleVersionUnsafe); } - void setHeartBeatState(HeartBeatState newHbState) + void unsafeSetEmptyHeartBeatState() { - updateTimestamp(); - hbState = newHbState; + updateHeartBeat(ignore -> HeartBeatState.empty()); + } + + private void updateHeartBeat(Function fn) + { + HeartBeatState previous = null; + HeartBeatState update = null; + while (true) + { + View view = ref.get(); + if (previous == null || view.hbState != previous) // if this races with updating states then can avoid bumping versions + update = fn.apply(view.hbState); + if (ref.compareAndSet(view, new View(update, view.applicationState))) + return; + previous = view.hbState; + } } public VersionedValue getApplicationState(ApplicationState key) { - return applicationState.get().get(key); + return ref.get().applicationState.get(key); } public boolean containsApplicationState(ApplicationState key) { - return applicationState.get().containsKey(key); + return ref.get().applicationState.containsKey(key); } public Set> states() { - return applicationState.get().entrySet(); + return ref.get().applicationState.entrySet(); } public void addApplicationState(ApplicationState key, VersionedValue value) @@ -107,17 +152,27 @@ public void addApplicationStates(Map values) } public void addApplicationStates(Set> values) + { + addApplicationStates(values, null); + } + + public void addApplicationStates(Set> values, @Nullable HeartBeatState hbState) { while (true) { - Map orig = applicationState.get(); + View view = this.ref.get(); + Map orig = view.applicationState; Map copy = new EnumMap<>(orig); for (Map.Entry value : values) copy.put(value.getKey(), value.getValue()); - if (applicationState.compareAndSet(orig, copy)) + if (this.ref.compareAndSet(view, new View(hbState == null ? view.hbState : hbState, copy))) + { + if (hbState != null) + updateTimestamp(); return; + } } } @@ -125,18 +180,19 @@ void removeMajorVersion3LegacyApplicationStates() { while (hasLegacyFields()) { - Map orig = applicationState.get(); + View view = ref.get(); + Map orig = view.applicationState; Map updatedStates = filterMajorVersion3LegacyApplicationStates(orig); // avoid updating if no state is removed if (orig.size() == updatedStates.size() - || applicationState.compareAndSet(orig, updatedStates)) + || ref.compareAndSet(view, new View(view.hbState, updatedStates))) return; } } private boolean hasLegacyFields() { - Set statesPresent = applicationState.get().keySet(); + Set statesPresent = ref.get().applicationState.keySet(); if (statesPresent.isEmpty()) return false; return (statesPresent.contains(ApplicationState.STATUS) && statesPresent.contains(ApplicationState.STATUS_WITH_PORT)) @@ -193,7 +249,7 @@ void markDead() public boolean isStateEmpty() { - return applicationState.get().isEmpty(); + return ref.get().applicationState.isEmpty(); } /** @@ -201,8 +257,10 @@ public boolean isStateEmpty() */ public boolean isEmptyWithoutStatus() { - Map state = applicationState.get(); - return hbState.isEmpty() && !(state.containsKey(ApplicationState.STATUS_WITH_PORT) || state.containsKey(ApplicationState.STATUS)); + View view = ref.get(); + Map state = view.applicationState; + boolean hasStatus = state.containsKey(ApplicationState.STATUS_WITH_PORT) || state.containsKey(ApplicationState.STATUS); + return view.hbState.isEmpty() && !hasStatus; } public boolean isRpcReady() @@ -253,7 +311,8 @@ public CassandraVersion getReleaseVersion() public String toString() { - return "EndpointState: HeartBeatState = " + hbState + ", AppStateMap = " + applicationState.get(); + View view = ref.get(); + return "EndpointState: HeartBeatState = " + view.hbState + ", AppStateMap = " + view.applicationState; } } diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index 9a7a9935b8e7..12c532b16263 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -310,7 +310,7 @@ public void run() taskLock.lock(); /* Update the local heartbeat counter. */ - endpointStateMap.get(FBUtilities.getBroadcastAddressAndPort()).getHeartBeatState().updateHeartBeat(); + endpointStateMap.get(FBUtilities.getBroadcastAddressAndPort()).updateHeartBeat(); if (logger.isTraceEnabled()) logger.trace("My heartbeat is now {}", endpointStateMap.get(FBUtilities.getBroadcastAddressAndPort()).getHeartBeatState().getHeartBeatVersion()); final List gDigests = new ArrayList<>(); @@ -598,7 +598,7 @@ protected void markAsShutdown(InetAddressAndPort endpoint) epState.addApplicationState(ApplicationState.STATUS_WITH_PORT, shutdown); epState.addApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.shutdown(true)); epState.addApplicationState(ApplicationState.RPC_READY, StorageService.instance.valueFactory.rpcReady(false)); - epState.getHeartBeatState().forceHighestPossibleVersionUnsafe(); + epState.forceHighestPossibleVersionUnsafe(); markDead(endpoint, epState); FailureDetector.instance.forceConviction(endpoint); GossiperDiagnostics.markedAsShutdown(this, endpoint); @@ -778,7 +778,7 @@ public void advertiseRemoving(InetAddressAndPort endpoint, UUID hostId, UUID loc // update the other node's generation to mimic it as if it had changed it itself logger.info("Advertising removal for {}", endpoint); epState.updateTimestamp(); // make sure we don't evict it too soon - epState.getHeartBeatState().forceNewerGenerationUnsafe(); + epState.forceNewerGenerationUnsafe(); Map states = new EnumMap<>(ApplicationState.class); states.put(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.removingNonlocal(hostId)); states.put(ApplicationState.STATUS, StorageService.instance.valueFactory.removingNonlocal(hostId)); @@ -798,7 +798,7 @@ public void advertiseTokenRemoved(InetAddressAndPort endpoint, UUID hostId) { EndpointState epState = endpointStateMap.get(endpoint); epState.updateTimestamp(); // make sure we don't evict it too soon - epState.getHeartBeatState().forceNewerGenerationUnsafe(); + epState.forceNewerGenerationUnsafe(); long expireTime = computeExpireTime(); epState.addApplicationState(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.removedNonlocal(hostId, expireTime)); epState.addApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.removedNonlocal(hostId, expireTime)); @@ -849,7 +849,7 @@ else if (newState.getHeartBeatState().getGeneration() != generation) else if (newState.getHeartBeatState().getHeartBeatVersion() != heartbeat) throw new RuntimeException("Endpoint still alive: " + endpoint + " heartbeat changed while trying to assassinate it"); epState.updateTimestamp(); // make sure we don't evict it too soon - epState.getHeartBeatState().forceNewerGenerationUnsafe(); + epState.forceNewerGenerationUnsafe(); } Collection tokens = null; @@ -1580,15 +1580,7 @@ private void applyNewStates(InetAddressAndPort addr, EndpointState localState, E // don't assert here, since if the node restarts the version will go back to zero int oldVersion = localState.getHeartBeatState().getHeartBeatVersion(); - localState.setHeartBeatState(remoteState.getHeartBeatState()); - if (logger.isTraceEnabled()) - logger.trace("Updating heartbeat state version to {} from {} for {} ...", localState.getHeartBeatState().getHeartBeatVersion(), oldVersion, addr); - - Set> remoteStates = remoteState.states(); - assert remoteState.getHeartBeatState().getGeneration() == localState.getHeartBeatState().getGeneration(); - - - Set> updatedStates = remoteStates.stream().filter(entry -> { + Set> updatedStates = remoteState.states().stream().filter(entry -> { // filter out the states that are already up to date (has the same or higher version) VersionedValue local = localState.getApplicationState(entry.getKey()); return (local == null || local.version < entry.getValue().version); @@ -1601,7 +1593,9 @@ private void applyNewStates(InetAddressAndPort addr, EndpointState localState, E logger.trace("Updating {} state version to {} for {}", entry.getKey().toString(), entry.getValue().version, addr); } } - localState.addApplicationStates(updatedStates); + localState.addApplicationStates(updatedStates, remoteState.getHeartBeatState()); + if (logger.isTraceEnabled()) + logger.trace("Updating heartbeat state version to {} from {} for {} ...", localState.getHeartBeatState().getHeartBeatVersion(), oldVersion, addr); // get rid of legacy fields once the cluster is not in mixed mode if (!hasMajorVersion3OrUnknownNodes()) @@ -1983,7 +1977,7 @@ public void maybeInitializeLocalState(int generationNbr) public void forceNewerGeneration() { EndpointState epstate = endpointStateMap.get(FBUtilities.getBroadcastAddressAndPort()); - epstate.getHeartBeatState().forceNewerGenerationUnsafe(); + epstate.forceNewerGenerationUnsafe(); } @@ -2004,7 +1998,7 @@ public void addSavedEndpoint(InetAddressAndPort ep) if (epState != null) { logger.debug("not replacing a previous epState for {}, but reusing it: {}", ep, epState); - epState.setHeartBeatState(HeartBeatState.empty()); + epState.unsafeSetEmptyHeartBeatState(); } else { diff --git a/src/java/org/apache/cassandra/gms/HeartBeatState.java b/src/java/org/apache/cassandra/gms/HeartBeatState.java index 75f4f56ea7c0..374d346a0a8e 100644 --- a/src/java/org/apache/cassandra/gms/HeartBeatState.java +++ b/src/java/org/apache/cassandra/gms/HeartBeatState.java @@ -33,8 +33,8 @@ public class HeartBeatState public static final IVersionedSerializer serializer = new HeartBeatStateSerializer(); - private volatile int generation; - private volatile int version; + private final int generation; + private final int version; HeartBeatState(int gen) { @@ -67,29 +67,29 @@ public boolean isEmpty() return version == EMPTY_VERSION; } - int getGeneration() + public int getGeneration() { return generation; } - void updateHeartBeat() + HeartBeatState updateHeartBeat() { - version = VersionGenerator.getNextVersion(); + return new HeartBeatState(generation, VersionGenerator.getNextVersion()); } - int getHeartBeatVersion() + public int getHeartBeatVersion() { return version; } - void forceNewerGenerationUnsafe() + HeartBeatState forceNewerGenerationUnsafe() { - generation += 1; + return new HeartBeatState(generation + 1, version); } - void forceHighestPossibleVersionUnsafe() + HeartBeatState forceHighestPossibleVersionUnsafe() { - version = Integer.MAX_VALUE; + return new HeartBeatState(generation, Integer.MAX_VALUE); } public String toString() diff --git a/test/unit/org/apache/cassandra/gms/GossiperTest.java b/test/unit/org/apache/cassandra/gms/GossiperTest.java index 96730baa196c..778d4a6b34c7 100644 --- a/test/unit/org/apache/cassandra/gms/GossiperTest.java +++ b/test/unit/org/apache/cassandra/gms/GossiperTest.java @@ -304,7 +304,7 @@ public void testDuplicatedStateUpdate() throws Exception proposedRemoteState = new EndpointState(proposedRemoteHeartBeat); // Bump the heartbeat version and use the same TOKENS state - proposedRemoteHeartBeat.updateHeartBeat(); + proposedRemoteState.updateHeartBeat(); proposedRemoteState.addApplicationState(ApplicationState.TOKENS, tokensValue); // The following state change should only update heartbeat without updating the TOKENS state diff --git a/test/unit/org/apache/cassandra/gms/SerializationsTest.java b/test/unit/org/apache/cassandra/gms/SerializationsTest.java index 90ce10ba0dff..6ac4729ae1e4 100644 --- a/test/unit/org/apache/cassandra/gms/SerializationsTest.java +++ b/test/unit/org/apache/cassandra/gms/SerializationsTest.java @@ -130,7 +130,7 @@ private static class Statics private static List Digests = new ArrayList(); { - HeartbeatSt.updateHeartBeat(); + EndpointSt.updateHeartBeat(); EndpointSt.addApplicationState(ApplicationState.LOAD, vv0); EndpointSt.addApplicationState(ApplicationState.STATUS_WITH_PORT, vv1); for (int i = 0; i < 100; i++) From 66b973341a540fe4d325427ea3f706bf524a4d5e Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Tue, 20 May 2025 12:12:01 +0200 Subject: [PATCH 324/340] Accord: Retry epoch/topology metadata fetch on all peer nodes Patch by Alex Petrov; reviewed by David Capwell for CASSANDRA-20663 --- .../apache/cassandra/gms/FailureDetector.java | 10 +++- .../service/accord/AccordService.java | 51 +++++++++---------- .../simulator/paxos/PaxosSimulation.java | 5 +- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java index 49b208929748..da90efc3326f 100644 --- a/src/java/org/apache/cassandra/gms/FailureDetector.java +++ b/src/java/org/apache/cassandra/gms/FailureDetector.java @@ -326,7 +326,7 @@ public boolean isAlive(InetAddressAndPort ep) // an error in that case. ClusterMetadata metadata = ClusterMetadata.current(); if (!metadata.directory.allJoinedEndpoints().contains(ep) && !metadata.fullCMSMembers().contains(ep)) - logger.error("Unknown endpoint: " + ep, new IllegalArgumentException("Unknown endpoint: " + ep)); + logger.error("Unknown endpoint: " + ep, new UnknownEndpointException(ep)); } return epState != null && epState.isAlive(); } @@ -437,6 +437,14 @@ public String toString() sb.append("-----------------------------------------------------------------------"); return sb.toString(); } + + public static class UnknownEndpointException extends IllegalArgumentException + { + public UnknownEndpointException(InetAddressAndPort ep) + { + super("Unknown endpoint: " + ep); + } + } } /* diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index acb4c65e8b60..48c92e6296d3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -448,35 +448,30 @@ private TopologyRange fetchTopologies(long from) throws ExecutionException, Inte if (peers.isEmpty()) return null; - Iterator iter = peers.iterator(); - while (iter.hasNext()) + try { - InetAddressAndPort peer = iter.next(); - try - { - logger.info("Fetching topologies for epochs [{}, {}] from {}", from, metadata.epoch.getEpoch(), peer); - Invariants.require(from <= metadata.epoch.getEpoch(), - "Accord epochs should never be ahead of TCM ones, but %d was ahead of %d", from, metadata.epoch.getEpoch()); - - Future futures = FetchTopologies.fetch(SharedContext.Global.instance, - Collections.singleton(peer), - from, - Long.MAX_VALUE); - TopologyRange response = futures.get(); - logger.info("Fetched topologies {}", response); - - // We're behind and need to catch up CMS first. - if (response.current > ClusterMetadata.current().epoch.getEpoch()) - ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(response.current)); - - if (response.current >= from) - return response; - metadata = ClusterMetadata.current(); - } - catch (Throwable e) - { - logger.info("Failed to fetch epochs [{}, {}] from {}", from, metadata.epoch.getEpoch(), peer); - } + logger.info("Fetching topologies for epochs [{}, {}] from {}", from, metadata.epoch.getEpoch(), peers); + Invariants.require(from <= metadata.epoch.getEpoch(), + "Accord epochs should never be ahead of TCM ones, but %d was ahead of %d", from, metadata.epoch.getEpoch()); + + Future futures = FetchTopologies.fetch(SharedContext.Global.instance, + peers, + from, + Long.MAX_VALUE); + TopologyRange response = futures.get(); + logger.info("Fetched topologies {}", response); + + // We're behind and need to catch up CMS first. + if (response.current > ClusterMetadata.current().epoch.getEpoch()) + ClusterMetadataService.instance().fetchLogFromCMS(Epoch.create(response.current)); + + if (response.current >= from) + return response; + metadata = ClusterMetadata.current(); + } + catch (Throwable e) + { + logger.info("Failed to fetch epochs [{}, {}] from {}", from, metadata.epoch.getEpoch(), peers); } // After trying to contact all peers, and retrying according to retry spec on them, we give up. diff --git a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java index fbc590c94257..e58e691962ef 100644 --- a/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java +++ b/test/simulator/main/org/apache/cassandra/simulator/paxos/PaxosSimulation.java @@ -35,6 +35,7 @@ import javax.annotation.Nullable; import com.google.common.base.Throwables; +import org.apache.cassandra.gms.FailureDetector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,7 +86,8 @@ private static String createDescription(int[] primaryKeys, int id, String idStri protected Class[] expectedExceptionsPaxos() { return (Class[]) new Class[] { RequestExecutionException.class, - CancellationException.class }; + CancellationException.class, + FailureDetector.UnknownEndpointException.class}; } @SuppressWarnings("unchecked") @@ -97,6 +99,7 @@ protected Class[] expectedExceptionsAccord() CancellationException.class, CoordinationFailed.class, ClosedChannelException.class, + FailureDetector.UnknownEndpointException.class, StreamReceivedOutOfTokenRangeException.class // should always come in combination with closed channel exception }; } From 7bbeee3b63343b59b0121a727595729540cacd88 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Tue, 20 May 2025 12:10:18 +0200 Subject: [PATCH 325/340] Make metadata components forgiving during startup Patch by Alex Petrov; reviewed by Benedict Elliott Smith for CASSANDRA-20662 --- .../apache/cassandra/journal/Component.java | 10 +++++ .../cassandra/journal/StaticSegment.java | 43 ++++++++++++++++--- .../accord/AccordSegmentCompactor.java | 1 + 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/cassandra/journal/Component.java b/src/java/org/apache/cassandra/journal/Component.java index f7cf944f8619..07da71536a6d 100644 --- a/src/java/org/apache/cassandra/journal/Component.java +++ b/src/java/org/apache/cassandra/journal/Component.java @@ -19,6 +19,9 @@ import java.util.List; +import accord.utils.Invariants; +import org.apache.cassandra.io.util.File; + import static accord.utils.SortedArrays.SortedArrayList.ofSorted; enum Component @@ -44,4 +47,11 @@ boolean existsFor(Descriptor descriptor) { return descriptor.fileFor(this).exists(); } + + void markCorrupted(Descriptor descriptor) + { + File file = descriptor.fileFor(this); + Invariants.require(file.exists()); + file.move(file.withSuffix(".corrupted")); + } } diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index 8c80d32bec14..92dd999c2d31 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -27,6 +27,9 @@ import java.util.Collection; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.utils.Closeable; @@ -41,6 +44,7 @@ */ public final class StaticSegment extends Segment { + public static final Logger logger = LoggerFactory.getLogger(StaticSegment.class); final FileChannel channel; final int fsyncLimit; @@ -91,13 +95,40 @@ static StaticSegment open(Descriptor descriptor, KeySupport keyS if (!Component.DATA.existsFor(descriptor)) throw new IllegalArgumentException("Data file for segment " + descriptor + " doesn't exist"); - Metadata metadata = Component.METADATA.existsFor(descriptor) - ? Metadata.load(descriptor) - : Metadata.rebuildAndPersist(descriptor, keySupport); + Metadata metadata = null; + if (Component.METADATA.existsFor(descriptor)) + { + try + { + metadata = Metadata.load(descriptor); + } + catch (Throwable t) + { + logger.error("Could not load metadata component for {}; rebuilding", descriptor, t); + Component.METADATA.markCorrupted(descriptor); + } + } + + if (metadata == null) + metadata = Metadata.rebuildAndPersist(descriptor, keySupport); + + OnDiskIndex index = null; + + if (Component.INDEX.existsFor(descriptor)) + { + try + { + index = OnDiskIndex.open(descriptor, keySupport); + } + catch (Throwable t) + { + logger.error("Could not load index component for {}; rebuilding", descriptor, t); + Component.INDEX.markCorrupted(descriptor); + } + } - OnDiskIndex index = Component.INDEX.existsFor(descriptor) - ? OnDiskIndex.open(descriptor, keySupport) - : OnDiskIndex.rebuildAndPersist(descriptor, keySupport, metadata.fsyncLimit()); + if (index == null) + index = OnDiskIndex.rebuildAndPersist(descriptor, keySupport, metadata.fsyncLimit()); try { diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java index f92eca8cf866..45633619a4c4 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -56,6 +56,7 @@ void finishAndAddWriter() { cfs.addSSTables(writer.finish(true)); writer.close(); + writer = null; } @Override From 49c8122d8fcf09f75e68dbda665ca12a9ef28fd7 Mon Sep 17 00:00:00 2001 From: Brad Schoening Date: Wed, 9 Apr 2025 17:28:14 -0400 Subject: [PATCH 326/340] Avoid CQLSH throwing an exception loading .cqlshrc on non-supported platforms While we do not support Windows as such (at least on server), reviewers evaluated that this might be fixed as the gains (Windows users using CQLSH to connect to Cassandra running on supported platforms) are justified. patch by Brad Schoening; reviewed by Brandon Williams, Josh McKenzie for CASSANDRA-20478 --- CHANGES.txt | 1 + pylib/cqlshlib/util.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8c64d012f0f2..4aaed8bdb99c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Avoid CQLSH throwing an exception loading .cqlshrc on non-supported platforms (CASSANDRA-20478) * Relax validation of snapshot name as a part of SSTable files path validation (CASSANDRA-20649) * Optimize initial skipping logic for SAI queries on large partitions (CASSANDRA-20191) * Fix reading mmapped trie-index exceeding 2GiB (CASSANDRA-20351) diff --git a/pylib/cqlshlib/util.py b/pylib/cqlshlib/util.py index 144586aae051..90d95b9afe54 100644 --- a/pylib/cqlshlib/util.py +++ b/pylib/cqlshlib/util.py @@ -117,12 +117,15 @@ def trim_if_present(s, prefix): def is_file_secure(filename): try: st = os.stat(filename) + uid = os.getuid() except OSError as e: if e.errno != errno.ENOENT: raise # the file doesn't exist, the security of it is irrelevant return True - uid = os.getuid() + except AttributeError as e: + # not-Unix os + return True # Skip enforcing the file owner and UID matching for the root user (uid == 0). # This is to allow "sudo cqlsh" to work with user owned credentials file. From 1066f6a4276ad315bb704d21c65d3557f5286370 Mon Sep 17 00:00:00 2001 From: mck Date: Sun, 25 May 2025 18:27:30 +0200 Subject: [PATCH 327/340] Remove auto-installation of golang when generating native protocol doc pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch by Mick Semb Wever; reviewed by Štefan Miklošovič for CASSANDRA-20678 --- .build/docker/bullseye-build.docker | 21 +++++- ...process-native-protocol-specs-in-docker.sh | 67 ++++--------------- 2 files changed, 33 insertions(+), 55 deletions(-) diff --git a/.build/docker/bullseye-build.docker b/.build/docker/bullseye-build.docker index 6928ec9f2992..b31bf03b3a75 100644 --- a/.build/docker/bullseye-build.docker +++ b/.build/docker/bullseye-build.docker @@ -15,7 +15,7 @@ # limitations under the License. FROM debian:bullseye -MAINTAINER Apache Cassandra +LABEL org.opencontainers.image.authors="Apache Cassandra " # CONTEXT is expected to be cassandra/.build @@ -52,3 +52,22 @@ RUN pip install --upgrade pip # dependencies for .build/ci/ci_parser.py RUN pip install beautifulsoup4==4.12.3 jinja2==3.1.3 + +# install golang. GO_VERSION_SHA must be updated with VERSION +RUN sh -c '\ + GO_VERSION="1.24.3" ;\ + GO_VERSION_SHAS="3333f6ea53afa971e9078895eaa4ac7204a8c6b5c68c10e6bc9a33e8e391bdd8 a463cb59382bd7ae7d8f4c68846e73c4d589f223c589ac76871b66811ded7836 13e6fe3fcf65689d77d40e633de1e31c6febbdbcb846eb05fc2434ed2213e92b 64a3fa22142f627e78fac3018ce3d4aeace68b743eff0afda8aae0411df5e4fb" ;\ + GO_OS=linux ;\ + [ $(uname) = "Darwin" ] && GO_OS=darwin ;\ + GO_PLATFORM=amd64 ;\ + [ $(uname -m) = "aarch64" ] && GO_PLATFORM=arm64 ;\ + GO_TAR="go${GO_VERSION}.${GO_OS}-${GO_PLATFORM}.tar.gz" ;\ + curl -L --fail --silent --retry 2 --retry-delay 5 --max-time 30 https://go.dev/dl/$GO_TAR -o $GO_TAR ;\ + GO_SHA="$(sha256sum $GO_TAR | cut -d" " -f2)" ;\ + echo "$GO_VERSION_SHAS" | sed "s/ /\n/g" | grep -q "$GO_SHA" || { echo "SHA256 mismatch for $GO_TAR $GO_SHA"; exit 1; } ;\ + tar -C /usr/local -xzf $GO_TAR ;\ + rm $GO_TAR' + +ENV GOROOT="/usr/local/go" +ENV GOPATH="$BUILD_HOME/go" +ENV PATH="$PATH:/usr/local/go/bin" \ No newline at end of file diff --git a/doc/scripts/process-native-protocol-specs-in-docker.sh b/doc/scripts/process-native-protocol-specs-in-docker.sh index 05565c02b93d..332310ab661e 100755 --- a/doc/scripts/process-native-protocol-specs-in-docker.sh +++ b/doc/scripts/process-native-protocol-specs-in-docker.sh @@ -20,73 +20,27 @@ # Variables GO_VERSION="1.23.1" - -GO_OS=linux - -if [ $(uname) = "Darwin" ]; then - GO_OS=darwin -fi - -GO_PLATFORM=amd64 - -if [ $(uname -m) = "aarch64" ]; then - GO_PLATFORM=arm64 -fi - -GO_TAR="go${GO_VERSION}.${GO_OS}-${GO_PLATFORM}.tar.gz" TMPDIR="${TMPDIR:-/tmp}" check_go_version() { if command -v go &>/dev/null; then local installed_version=$(go version | awk '{print $3}' | sed 's/go//') - if [ "$(printf '%s\n' "$GO_VERSION" "$installed_version" | sort -V | head -n1)" = "$GO_VERSION" ]; then - echo "Detected Go $installed_version (>= $GO_VERSION), skipping installation." + echo "Detected Go $installed_version (>= $GO_VERSION)" return 0 else - if [ -z $installed_version ]; then - echo "No Go installation detected, proceeding with installation." - else - echo "Detected Go $installed_version (< $GO_VERSION), proceeding with installation." - fi - return 1 + echo "Detected unsupported Go $installed_version (< $GO_VERSION), please update to supported version." fi else - echo "Go env not found in your system, proceeding with installation." - return 1 + echo "No Go installation detected, please install Go (>= $GO_VERSION)" fi + return 1 } if ! check_go_version; then - - if ls $TMPDIR/go$GO_VERSION > /dev/null 2>&1; then - echo "Reusing cached installation in $TMPDIR/go$GO_VERSION" - export PATH="$PATH:$TMPDIR/go$GO_VERSION/go/bin" - export GOPATH="$TMPDIR/go$GO_VERSION/go/bin" - export GOROOT="$TMPDIR/go$GO_VERSION/go" - else - if ! ls $TMPDIR/$GO_TAR > /dev/null 2>&1; then - echo "Downloading Go $GO_VERSION..." - - curl -L --fail --silent --retry 2 --retry-delay 5 --max-time 30 https://golang.org/dl/$GO_TAR -o $TMPDIR/$GO_TAR - - if [ $? != "0" ]; then - echo "Network error. Specify '-Dant.gen-doc.skip=true' to skip if offline." - exit 1 - fi - fi - - echo "Installing Go $GO_VERSION..." - mkdir -p $TMPDIR/go$GO_VERSION - tar -C "$TMPDIR/go$GO_VERSION" -xzf "$TMPDIR/$GO_TAR" - - # Set Go environment variables - export PATH="$PATH:$TMPDIR/go$GO_VERSION/go/bin" - export GOPATH="$TMPDIR/go$GO_VERSION/go/bin" - export GOROOT="$TMPDIR/go$GO_VERSION/go" - fi -else - echo "Using system-installed Go." + echo " Please install/upgrade Golang for 'ant gen-doc', or specify '-Dant.gen-doc.skip=true' to skip this step." + echo " For download and installation instructions see https://go.dev/doc/install" + exit 1 fi # Step 1: Building the parser @@ -107,7 +61,7 @@ git sparse-checkout set --no-cone /cqlprotodoc git checkout cd "${TMPDIR}/cassandra-website/cqlprotodoc" rm -rf "${TMPDIR}/cqlprotodoc" -$TMPDIR/go$GO_VERSION/go/bin/go build -o "$TMPDIR"/cqlprotodoc +go build -o "$TMPDIR"/cqlprotodoc # Step 2: Process the spec files using the parser echo "Processing the .spec files..." @@ -116,6 +70,11 @@ output_dir="modules/cassandra/attachments" mkdir -p "${output_dir}" "$TMPDIR"/cqlprotodoc . "${output_dir}" +if ! ls ${output_dir}/native_protocol_v*.html > /dev/null 2>&1; then + echo "failed: No native_protocol_v*.html files generated in ${output_dir}" + exit 1 +fi + # Step 4: Generate summary file summary_file="modules/cassandra/pages/reference/native-protocol.adoc" From dd0d8c03b9414afeeffef2be0b37c0078ca72592 Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Wed, 21 May 2025 17:10:41 -0500 Subject: [PATCH 328/340] Avoid lambda usage in TrieMemoryIndex range queries and ensure queue size tracking is per column patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-20668 --- CHANGES.txt | 1 + .../index/sai/memory/TrieMemoryIndex.java | 60 ++++++++----------- .../index/sai/memory/TrieMemoryIndexTest.java | 26 ++++++++ 3 files changed, 51 insertions(+), 36 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4aaed8bdb99c..8b2f843107b1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Avoid lambda usage in TrieMemoryIndex range queries and ensure queue size tracking is per column (CASSANDRA-20668) * Avoid CQLSH throwing an exception loading .cqlshrc on non-supported platforms (CASSANDRA-20478) * Relax validation of snapshot name as a part of SSTable files path validation (CASSANDRA-20649) * Optimize initial skipping logic for SAI queries on large partitions (CASSANDRA-20191) diff --git a/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java b/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java index c8d32a8386c0..1468f4ef8f44 100644 --- a/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java +++ b/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java @@ -23,13 +23,13 @@ import java.util.Map; import java.util.PriorityQueue; import java.util.SortedSet; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.LongAdder; import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import io.netty.util.concurrent.FastThreadLocal; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; @@ -59,6 +59,7 @@ public class TrieMemoryIndex extends MemoryIndex { private static final Logger logger = LoggerFactory.getLogger(TrieMemoryIndex.class); private static final int MAX_RECURSIVE_KEY_LENGTH = 128; + private static final int MINIMUM_PRIORITY_QUEUE_SIZE = 128; private final InMemoryTrie data; private final PrimaryKeysReducer primaryKeysReducer; @@ -66,6 +67,11 @@ public class TrieMemoryIndex extends MemoryIndex private ByteBuffer minTerm; private ByteBuffer maxTerm; + // Maintain the last queue size used on this index to use for the next range match. + // This allows for receiving a stream of wide range queries where the queue size + // is larger than we would want to default the size to. + private final AtomicInteger lastPriorityQueueSize = new AtomicInteger(MINIMUM_PRIORITY_QUEUE_SIZE); + public TrieMemoryIndex(StorageAttachedIndex index) { super(index); @@ -143,7 +149,11 @@ public KeyRangeIterator search(QueryContext queryContext, Expression expression, case CONTAINS_VALUE: return exactMatch(expression, keyRange); case RANGE: - return rangeMatch(expression, keyRange); + KeyRangeIterator keyIterator = rangeMatch(expression, keyRange); + int keyCount = (int) keyIterator.getMaxKeys(); + if (keyCount > MINIMUM_PRIORITY_QUEUE_SIZE) + lastPriorityQueueSize.set(keyCount); + return keyIterator; default: throw new IllegalArgumentException("Unsupported expression: " + expression); } @@ -252,29 +262,15 @@ private KeyRangeIterator exactMatch(Expression expression, AbstractBounds lastQueueSize = new FastThreadLocal<>() - { - protected Integer initialValue() - { - return MINIMUM_QUEUE_SIZE; - } - }; + final PriorityQueue mergedKeys; + final AbstractBounds keyRange; - PrimaryKey minimumKey = null; PrimaryKey maximumKey = null; - final PriorityQueue mergedKeys = new PriorityQueue<>(lastQueueSize.get()); - final AbstractBounds keyRange; - - public Collector(AbstractBounds keyRange) + public Collector(AbstractBounds keyRange, int expectedKeys) { this.keyRange = keyRange; + this.mergedKeys = new PriorityQueue<>(expectedKeys); } public void processContent(PrimaryKeys keys) @@ -296,12 +292,8 @@ public void processContent(PrimaryKeys keys) || primaryKeys.last().partitionKey().compareTo(keyRange.left) < 0) return; - primaryKeys.forEach(this::processKey); - } - - public void updateLastQueueSize() - { - lastQueueSize.set(Math.max(MINIMUM_QUEUE_SIZE, mergedKeys.size())); + for (PrimaryKey primaryKey : primaryKeys) + processKey(primaryKey); } private void processKey(PrimaryKey key) @@ -310,7 +302,7 @@ private void processKey(PrimaryKey key) { mergedKeys.add(key); - minimumKey = minimumKey == null ? key : key.compareTo(minimumKey) < 0 ? key : minimumKey; + // We only track the maximum key, as the minimum can be peeked in constant time on the PQ itself. maximumKey = maximumKey == null ? key : key.compareTo(maximumKey) > 0 ? key : maximumKey; } } @@ -342,20 +334,16 @@ private KeyRangeIterator rangeMatch(Expression expression, AbstractBounds values = data.subtrie(lowerBound, lowerInclusive, upperBound, upperInclusive).valueIterator(); - data.subtrie(lowerBound, lowerInclusive, upperBound, upperInclusive) - .values() - .forEach(cd::processContent); + while (values.hasNext()) + cd.processContent(values.next()); if (cd.mergedKeys.isEmpty()) - { return KeyRangeIterator.empty(); - } - - cd.updateLastQueueSize(); - return new InMemoryKeyRangeIterator(cd.minimumKey, cd.maximumKey, cd.mergedKeys); + return new InMemoryKeyRangeIterator(cd.mergedKeys.peek(), cd.maximumKey, cd.mergedKeys); } private static class PrimaryKeysReducer implements InMemoryTrie.UpsertTransformer diff --git a/test/unit/org/apache/cassandra/index/sai/memory/TrieMemoryIndexTest.java b/test/unit/org/apache/cassandra/index/sai/memory/TrieMemoryIndexTest.java index 0ab4c846f754..963742c02c1c 100644 --- a/test/unit/org/apache/cassandra/index/sai/memory/TrieMemoryIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sai/memory/TrieMemoryIndexTest.java @@ -29,8 +29,10 @@ import java.util.function.IntFunction; import java.util.stream.Collectors; +import org.junit.Ignore; import org.junit.Test; +import org.HdrHistogram.Histogram; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.cql3.statements.schema.IndexTarget; import org.apache.cassandra.db.Clustering; @@ -243,4 +245,28 @@ private TrieMemoryIndex newTrieMemoryIndex(AbstractType columnType) index = new StorageAttachedIndex(cfs, indexMetadata); return new TrieMemoryIndex(index); } + + @Ignore + @Test + public void testMemtableRangeQueryPerformance() + { + createTable("CREATE TABLE %S (pk int, ck int, val int, PRIMARY KEY (pk, ck))"); + createIndex("CREATE INDEX ON %s(val) USING 'sai'"); + + for (int pk = 0; pk < 20; pk++) + for (int ck = 0; ck < 10000; ck++) + execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk, ck, ck); + + Histogram histogram = new Histogram(4); + + for (int i = 0; i < 20000; i++) + { + long start = System.nanoTime(); + execute("SELECT * FROM %s WHERE pk = 5 AND val > ? LIMIT 10", 4000); + histogram.recordValue(System.nanoTime() - start); + } + + System.out.println("50th: " + histogram.getValueAtPercentile(0.5)); + System.out.println("99th: " + histogram.getValueAtPercentile(0.99)); + } } From 823f48663e62eed5ec21e1c9e6c60d5eac839dc2 Mon Sep 17 00:00:00 2001 From: Pranav Shenoy Date: Tue, 27 May 2025 12:47:53 -0700 Subject: [PATCH 329/340] Unified Compaction does not properly validate min and target sizes patch by Pranav Shenoy; reviewed by Branimir Lambov, Claude Warren, David Capwell for CASSANDRA-20398 --- CHANGES.txt | 1 + .../db/compaction/unified/Controller.java | 16 +++-- .../db/compaction/unified/ControllerTest.java | 62 ++++++++++++++++++- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8b2f843107b1..ad743e847f03 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Unified Compaction does not properly validate min and target sizes (CASSANDRA-20398) * Avoid lambda usage in TrieMemoryIndex range queries and ensure queue size tracking is per column (CASSANDRA-20668) * Avoid CQLSH throwing an exception loading .cqlshrc on non-supported platforms (CASSANDRA-20478) * Relax validation of snapshot name as a part of SSTable files path validation (CASSANDRA-20649) diff --git a/src/java/org/apache/cassandra/db/compaction/unified/Controller.java b/src/java/org/apache/cassandra/db/compaction/unified/Controller.java index cd7a35d44d92..2d0869369e6d 100644 --- a/src/java/org/apache/cassandra/db/compaction/unified/Controller.java +++ b/src/java/org/apache/cassandra/db/compaction/unified/Controller.java @@ -495,14 +495,20 @@ public static Map validateOptions(Map options) t { try { - targetSSTableSize = FBUtilities.parseHumanReadableBytes(s); - if (targetSSTableSize < MIN_TARGET_SSTABLE_SIZE) + double targetSize = FBUtilities.parseHumanReadable(s, null, "B"); + if (targetSize >= Long.MAX_VALUE) { + throw new ConfigurationException(String.format("%s %s is out of range of Long.", + TARGET_SSTABLE_SIZE_OPTION, + s)); + } + if (targetSize < MIN_TARGET_SSTABLE_SIZE) { throw new ConfigurationException(String.format("%s %s is not acceptable, size must be at least %s", TARGET_SSTABLE_SIZE_OPTION, s, FBUtilities.prettyPrintMemory(MIN_TARGET_SSTABLE_SIZE))); } + targetSSTableSize = (long) Math.ceil(targetSize); } catch (NumberFormatException e) { @@ -603,12 +609,12 @@ public static Map validateOptions(Map options) t if (sizeInBytes < 0) throw new ConfigurationException(String.format("Invalid configuration, %s should be greater than or equal to 0 (zero)", MIN_SSTABLE_SIZE_OPTION)); - int limit = (int) Math.ceil(targetSSTableSize * INVERSE_SQRT_2); + long limit = (long) Math.ceil(targetSSTableSize * INVERSE_SQRT_2); if (sizeInBytes >= limit) - throw new ConfigurationException(String.format("Invalid configuration, %s (%s) should be less than the target size minimum: %s", + throw new ConfigurationException(String.format("Invalid configuration, %s (%s) should be less than 70%% of the targetSSTableSize (%s)", MIN_SSTABLE_SIZE_OPTION, FBUtilities.prettyPrintMemory(sizeInBytes), - FBUtilities.prettyPrintMemory(limit))); + FBUtilities.prettyPrintMemory(targetSSTableSize))); } catch (NumberFormatException e) { diff --git a/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java b/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java index 202bbc0f984a..b25a1851f9cb 100644 --- a/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/unified/ControllerTest.java @@ -119,6 +119,64 @@ public void testValidateOptionsIntegers() testValidateOptions(true); } + public void targetSSTableSizeValidator(String inputSize) + { + Map options = new HashMap<>(); + options.putIfAbsent(Controller.TARGET_SSTABLE_SIZE_OPTION, inputSize); + assertThatExceptionOfType(ConfigurationException.class) + .describedAs("Should have thrown a ConfigurationException when target_sstable_size is greater than Long.MAX_VALUE") + .isThrownBy(() -> Controller.validateOptions(options)) + .withMessageContaining(format("target_sstable_size %s is out of range of Long.", inputSize)); + } + + @Test + public void testCassandra20398Values() + { + //TARGET_SSTABLE_SIZE_OPTION = 12E899, the value reported in CASSANDRA-20398 + String inputSize = "12E899 B"; + targetSSTableSizeValidator(inputSize); + } + + @Test + public void testValidateOptionsTargetSSTableSizeGTLongMax() + { + //TARGET_SSTABLE_SIZE_OPTION > LONG.MAX_VALUE + // the inputSize is Long.MAX_VALUE + 100 + String inputSize = "9223372036854775907 B"; + targetSSTableSizeValidator(inputSize); + } + + @Test + public void testValidateOptionsTargetSSTableSizeLTMinTargetSize() + { + // TARGET_SSTABLE_SIZE_OPTION < Default MIN_TARGET_SSTABLE_SIZE (1048576) + Map options = new HashMap<>(); + String inputSize = "1048000 B"; + options.putIfAbsent(Controller.TARGET_SSTABLE_SIZE_OPTION, inputSize); + assertThatExceptionOfType(ConfigurationException.class) + .describedAs("Should have thrown a ConfigurationException when target_sstable_size is less than default MIN_TARGET_SSTABLE_SIZE") + .isThrownBy(() -> Controller.validateOptions(options)) + .withMessageContaining(format("target_sstable_size %s is not acceptable, size must be at least %s", inputSize, FBUtilities.prettyPrintMemory(Controller.MIN_TARGET_SSTABLE_SIZE))); + } + + @Test + public void testValidateOptionsTargetSSTableSizeGTIntMax() + { + //TEST 4: Verifying if TARGET_SSTABLE_SIZE_OPTION (3650722199) < MIN_TARGET_SSTABLE_SIZE (2581450423) + // Previously, TARGET_SSTABLE_SIZE_OPTION * 0.7 was stored as Integer which would 3650722199 * 0.7 = 2147483647 + // By storing it in a Long, 3650722199 * 0.7 = 2581450424. If TARGET_SSTABLE_SIZE_OPTION * 0.7 is truncated, + //this test case will fail + try + { + Map options = new HashMap<>(); + options.putIfAbsent(Controller.TARGET_SSTABLE_SIZE_OPTION, "3650722199 B"); + options.putIfAbsent(Controller.MIN_SSTABLE_SIZE_OPTION, "2581450423 B"); + Controller.validateOptions(options); + } catch(ConfigurationException e) { + fail("3650722199 * 0.7 got truncated. " + e.getMessage()); + } + } + void testValidateOptions(boolean useIntegers) { Map options = new HashMap<>(); @@ -577,7 +635,7 @@ public void testMinSSTableSize() assertThatExceptionOfType(ConfigurationException.class) .describedAs("Should have thrown a ConfigurationException when min_sstable_size is greater than target_sstable_size") .isThrownBy(() -> Controller.validateOptions(options)) - .withMessageContaining(format("less than the target size minimum: %s", FBUtilities.prettyPrintMemory(limit))); + .withMessageContaining(format("Invalid configuration, %s (%s) should be less than 70%% of the targetSSTableSize (%s)", Controller.MIN_SSTABLE_SIZE_OPTION, FBUtilities.prettyPrintMemory(limit+1), FBUtilities.prettyPrintMemory(Controller.DEFAULT_TARGET_SSTABLE_SIZE))); // test min < configured target table size * INV_SQRT_2 limit = (int) Math.ceil(Controller.MIN_TARGET_SSTABLE_SIZE * 2 * Controller.INVERSE_SQRT_2); @@ -587,6 +645,6 @@ public void testMinSSTableSize() assertThatExceptionOfType(ConfigurationException.class) .describedAs("Should have thrown a ConfigurationException when min_sstable_size is greater than target_sstable_size") .isThrownBy(() -> Controller.validateOptions(options)) - .withMessageContaining(format("less than the target size minimum: %s", FBUtilities.prettyPrintMemory(limit))); + .withMessageContaining(format("Invalid configuration, %s (%s) should be less than 70%% of the targetSSTableSize (%s)", Controller.MIN_SSTABLE_SIZE_OPTION, FBUtilities.prettyPrintMemory(limit + 1), FBUtilities.prettyPrintMemory(Controller.MIN_TARGET_SSTABLE_SIZE * 2))); } } \ No newline at end of file From d077f695534b3db57b1573ee6b2bd0463c3883b3 Mon Sep 17 00:00:00 2001 From: Andy Tolbert <6889771+tolbertam@users.noreply.github.com> Date: Tue, 13 May 2025 12:34:29 -0500 Subject: [PATCH 330/340] Ensure prepared_statement INSERT timestamp precedes eviction DELETE Updates SystemKeyspace.writePreparedStatement to accept a timestamp associated with the Prepared creation time. Using this timestamp will ensure that an INSERT into system.prepared_statements will always precede the timestamp for the same Prepared in SystemKeyspace.removePreparedStatement. This is needed because Caffeine 2.9.2 may evict an entry as soon as it is inserted if the maximum weight of the cache is exceeded causing the DELETE to be executed before the INSERT. Additionally, any clusters currently experiencing a leaky system.prepared_statements table from this bug may struggle to bounce into a version with this fix as SystemKeyspace.loadPreparedPreparedStatements currently does not paginate the query to system.prepared_statements, causing heap OOMs. To fix this this patch adds pagination at 5000 rows and aborts loading once the cache size is loaded. This should allow nodes to come up and delete older prepared statements that may no longer be used as the cache fills up (which should happen immediately). This patch does not address the issue of Caffeine immediately evicting a prepared statement, however it will prevent the system.prepared_statements table from growing unbounded. For most users this should be adequate, as the cache should only be filled when there are erroneously many unique prepared statements. In such a case we can expect that clients will constantly prepare statements regardless of whether or not the cache is evicting statements. patch by Andy Tolbert; reviewed by Berenguer Blasi and Caleb Rackliffe for CASSANDRA-19703 --- CHANGES.txt | 6 +- .../cassandra/pages/cql/cql_singlefile.adoc | 22 ++ .../apache/cassandra/cql3/QueryHandler.java | 7 + .../apache/cassandra/cql3/QueryProcessor.java | 73 ++++-- .../apache/cassandra/db/SystemKeyspace.java | 53 +++- .../distributed/test/MixedModeFuzzTest.java | 7 +- .../distributed/test/ReprepareFuzzTest.java | 4 +- .../cassandra/cql3/PstmtPersistenceTest.java | 240 +++++++++++++++++- 8 files changed, 377 insertions(+), 35 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 676522b9bf0a..9a036b2119b3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,9 @@ -4.0.18 +4.0.19 + * Ensure prepared_statement INSERT timestamp precedes eviction DELETE (CASSANDRA-19703) * Gossip doesn't converge due to race condition when updating EndpointStates multiple fields (CASSANDRA-20659) + + +4.0.18 * Handle sstable metadata stats file getting a new mtime after compaction has finished (CASSANDRA-18119) * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532) * Updating a column with a new TTL but same expiration time is non-deterministic and causes repair mismatches. (CASSANDRA-20561) diff --git a/doc/modules/cassandra/pages/cql/cql_singlefile.adoc b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc index 89ed359b7390..1044975e35e3 100644 --- a/doc/modules/cassandra/pages/cql/cql_singlefile.adoc +++ b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc @@ -239,6 +239,28 @@ provide values for `LIMIT`, `TIMESTAMP`, and `TTL` clauses. If anonymous bind markers are used, the names for the query parameters will be `[limit]`, `[timestamp]`, and `[ttl]`, respectively. +===== Prepared Statement Caching + +Prepared Statements are cached by cassandra in-memory using a +https://github.com/ben-manes/caffeine[Caffeine]-managed cache which +can be configured using +xref:managing/configuration/cass_yaml_file.adoc#_prepared_statements_cache_size[`prepared_statements_cache_size`]. +The cache is also persisted to the `system.prepared_statements` table +so it can be preloaded into memory on startup. + +To ensure optimal performance, it's important to use a bind `` +for *all non-constant values* in your CQL statements. If you include +literal values directly in the query instead, each variation will be +treated as a unique statement that must be prepared and cached +separately. This will soon overflow the prepared statement cache, +which is small by design. + +When the cache reaches its maximum size, older or less frequently +used statements are +https://github.com/ben-manes/caffeine/wiki/Eviction[evicted], +leading to additional overhead as previously prepared statements must +be re-prepared. + [[dataDefinition]] === Data Definition diff --git a/src/java/org/apache/cassandra/cql3/QueryHandler.java b/src/java/org/apache/cassandra/cql3/QueryHandler.java index e0480a6e4bb2..638b87018c63 100644 --- a/src/java/org/apache/cassandra/cql3/QueryHandler.java +++ b/src/java/org/apache/cassandra/cql3/QueryHandler.java @@ -65,6 +65,12 @@ public static class Prepared public final MD5Digest resultMetadataId; + /** + * Timestamp of when this prepared statement was created. Used in QueryProcessor.preparedStatements cache + * to ensure that the deletion timestamp always succeeds the insert timestamp. + */ + public final long timestamp; + /** * Contains the CQL statement source if the statement has been "regularly" perpared via * {@link QueryHandler#prepare(String, ClientState, Map)}. @@ -81,6 +87,7 @@ public Prepared(CQLStatement statement, String rawCQLStatement, boolean fullyQua this.resultMetadataId = ResultSet.ResultMetadata.fromPrepared(statement).getResultMetadataId(); this.fullyQualified = fullyQualified; this.keyspace = keyspace; + this.timestamp = ClientState.getTimestamp(); } } } diff --git a/src/java/org/apache/cassandra/cql3/QueryProcessor.java b/src/java/org/apache/cassandra/cql3/QueryProcessor.java index c1045548b8f5..861222890a4b 100644 --- a/src/java/org/apache/cassandra/cql3/QueryProcessor.java +++ b/src/java/org/apache/cassandra/cql3/QueryProcessor.java @@ -26,6 +26,7 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.RemovalCause; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.collect.*; @@ -86,23 +87,22 @@ public class QueryProcessor implements QueryHandler // counters. Callers of processStatement are responsible for correctly notifying metrics public static final CQLMetrics metrics = new CQLMetrics(); + // Paging size to use when preloading prepared statements. + public static final int PRELOAD_PREPARED_STATEMENTS_FETCH_SIZE = 5000; + + // Size of the prepared statement cache in bytes. + public static long PREPARED_STATEMENT_CACHE_SIZE_BYTES = capacityToBytes(DatabaseDescriptor.getPreparedStatementsCacheSizeMB()); + private static final AtomicInteger lastMinuteEvictionsCount = new AtomicInteger(0); static { preparedStatements = Caffeine.newBuilder() .executor(MoreExecutors.directExecutor()) - .maximumWeight(capacityToBytes(DatabaseDescriptor.getPreparedStatementsCacheSizeMB())) + .maximumWeight(PREPARED_STATEMENT_CACHE_SIZE_BYTES) .weigher(QueryProcessor::getSizeOfPreparedStatementForCache) - .removalListener((key, prepared, cause) -> { - MD5Digest md5Digest = (MD5Digest) key; - if (cause.wasEvicted()) - { - metrics.preparedStatementsEvicted.inc(); - lastMinuteEvictionsCount.incrementAndGet(); - SystemKeyspace.removePreparedStatement(md5Digest); - } - }).build(); + .removalListener((key, prepared, cause) -> evictPreparedStatement(key, cause)) + .build(); ScheduledExecutors.scheduledTasks.scheduleAtFixedRate(() -> { long count = lastMinuteEvictionsCount.getAndSet(0); @@ -116,6 +116,16 @@ public class QueryProcessor implements QueryHandler DatabaseDescriptor.getPreparedStatementsCacheSizeMB()); } + private static void evictPreparedStatement(MD5Digest key, RemovalCause cause) + { + if (cause.wasEvicted()) + { + metrics.preparedStatementsEvicted.inc(); + lastMinuteEvictionsCount.incrementAndGet(); + SystemKeyspace.removePreparedStatement(key); + } + } + private static long capacityToBytes(long cacheSizeMB) { return cacheSizeMB * 1024 * 1024; @@ -140,6 +150,12 @@ private enum InternalStateInstance } public void preloadPreparedStatements() + { + preloadPreparedStatements(PRELOAD_PREPARED_STATEMENTS_FETCH_SIZE); + } + + @VisibleForTesting + public int preloadPreparedStatements(int pageSize) { int count = SystemKeyspace.loadPreparedStatements((id, query, keyspace) -> { try @@ -154,17 +170,18 @@ public void preloadPreparedStatements() // Preload `null` statement for non-fully qualified statements, since it can't be parsed if loaded from cache and will be dropped if (!prepared.fullyQualified) preparedStatements.get(computeId(query, null), (ignored_) -> prepared); - return true; + return prepared; } catch (RequestValidationException e) { JVMStabilityInspector.inspectThrowable(e); logger.warn(String.format("Prepared statement recreation error, removing statement: %s %s %s", id, query, keyspace)); SystemKeyspace.removePreparedStatement(id); - return false; + return null; } - }); + }, pageSize); logger.info("Preloaded {} prepared statements", count); + return count; } @@ -466,11 +483,33 @@ public static UntypedResultSet execute(String query, ConsistencyLevel cl, QueryS public static UntypedResultSet executeInternalWithPaging(String query, int pageSize, Object... values) { Prepared prepared = prepareInternal(query); - if (!(prepared.statement instanceof SelectStatement)) + + return executeInternalWithPaging(prepared.statement, pageSize, values); + } + + /** + * Executes with a non-prepared statement using paging. Generally {@link #executeInternalWithPaging(String, int, Object...)} + * should be used instead of this, but this may be used in niche cases like + * {@link SystemKeyspace#loadPreparedStatement(MD5Digest, SystemKeyspace.TriFunction)} where prepared statements are + * being loaded into {@link #preparedStatements} so it doesn't make sense to prepare a statement in this context. + */ + public static UntypedResultSet executeOnceInternalWithPaging(String query, int pageSize, Object... values) + { + QueryState queryState = internalQueryState(); + CQLStatement statement = parseStatement(query, queryState.getClientState()); + statement.validate(queryState.getClientState()); + + return executeInternalWithPaging(statement, pageSize, values); + } + + private static UntypedResultSet executeInternalWithPaging(CQLStatement statement, int pageSize, Object... values) + { + if (!(statement instanceof SelectStatement)) throw new IllegalArgumentException("Only SELECTs can be paged"); - SelectStatement select = (SelectStatement)prepared.statement; - QueryPager pager = select.getQuery(makeInternalOptions(prepared.statement, values), FBUtilities.nowInSeconds()).getPager(null, ProtocolVersion.CURRENT); + SelectStatement select = (SelectStatement) statement; + int nowInSec = FBUtilities.nowInSeconds(); + QueryPager pager = select.getQuery(makeInternalOptions(select, values), nowInSec).getPager(null, ProtocolVersion.CURRENT); return UntypedResultSet.create(select, pager, pageSize); } @@ -696,7 +735,7 @@ public static ResultMessage.Prepared storePreparedStatement(String queryString, Prepared previous = preparedStatements.get(statementId, (ignored_) -> prepared); if (previous == prepared) - SystemKeyspace.writePreparedStatement(keyspace, statementId, queryString); + SystemKeyspace.writePreparedStatement(keyspace, statementId, queryString, prepared.timestamp); ResultSet.PreparedMetadata preparedMetadata = ResultSet.PreparedMetadata.fromPrepared(prepared.statement); ResultSet.ResultMetadata resultMetadata = ResultSet.ResultMetadata.fromPrepared(prepared.statement); diff --git a/src/java/org/apache/cassandra/db/SystemKeyspace.java b/src/java/org/apache/cassandra/db/SystemKeyspace.java index 765525775978..56dd03a3c479 100644 --- a/src/java/org/apache/cassandra/db/SystemKeyspace.java +++ b/src/java/org/apache/cassandra/db/SystemKeyspace.java @@ -53,6 +53,7 @@ import org.slf4j.LoggerFactory; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.QueryHandler.Prepared; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.cql3.functions.AggregateFcts; @@ -78,6 +79,7 @@ import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.io.util.RebufferingInputStream; import org.apache.cassandra.locator.IEndpointSnitch; import org.apache.cassandra.locator.InetAddressAndPort; @@ -109,8 +111,10 @@ import static java.lang.String.format; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonMap; +import static org.apache.cassandra.cql3.QueryProcessor.PREPARED_STATEMENT_CACHE_SIZE_BYTES; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; +import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternalWithPaging; public final class SystemKeyspace { @@ -1615,11 +1619,11 @@ private static Range byteBufferToRange(ByteBuffer rawRange, IPartitioner } } - public static void writePreparedStatement(String loggedKeyspace, MD5Digest key, String cql) + public static void writePreparedStatement(String loggedKeyspace, MD5Digest key, String cql, long timestamp) { - executeInternal(format("INSERT INTO %s (logged_keyspace, prepared_id, query_string) VALUES (?, ?, ?)", + executeInternal(format("INSERT INTO %s (logged_keyspace, prepared_id, query_string) VALUES (?, ?, ?) USING TIMESTAMP ?", PreparedStatements.toString()), - loggedKeyspace, key.byteBuffer(), cql); + loggedKeyspace, key.byteBuffer(), cql, timestamp); logger.debug("stored prepared statement for logged keyspace '{}': '{}'", loggedKeyspace, cql); } @@ -1635,17 +1639,50 @@ public static void resetPreparedStatements() preparedStatements.truncateBlockingWithoutSnapshot(); } - public static int loadPreparedStatements(TriFunction onLoaded) + public static int loadPreparedStatements(TriFunction onLoaded) + { + return loadPreparedStatements(onLoaded, QueryProcessor.PRELOAD_PREPARED_STATEMENTS_FETCH_SIZE); + } + + public static int loadPreparedStatements(TriFunction onLoaded, int pageSize) { String query = String.format("SELECT prepared_id, logged_keyspace, query_string FROM %s.%s", SchemaConstants.SYSTEM_KEYSPACE_NAME, PREPARED_STATEMENTS); - UntypedResultSet resultSet = executeOnceInternal(query); + UntypedResultSet resultSet = executeOnceInternalWithPaging(query, pageSize); int counter = 0; + + // As the cache size may be briefly exceeded before statements are evicted, we allow loading 110% the cache size + // to avoid logging early. + long preparedBytesLoadThreshold = (long) (PREPARED_STATEMENT_CACHE_SIZE_BYTES * 1.1); + long preparedBytesLoaded = 0L; for (UntypedResultSet.Row row : resultSet) { - if (onLoaded.accept(MD5Digest.wrap(row.getByteArray("prepared_id")), - row.getString("query_string"), - row.has("logged_keyspace") ? row.getString("logged_keyspace") : null)) + Prepared prepared = onLoaded.accept(MD5Digest.wrap(row.getByteArray("prepared_id")), + row.getString("query_string"), + row.has("logged_keyspace") ? row.getString("logged_keyspace") : null); + if (prepared != null) + { counter++; + preparedBytesLoaded += Math.max(0, prepared.pstmntSize); + + if (preparedBytesLoaded > preparedBytesLoadThreshold) + { + // In the event that we detect that we have loaded more bytes than the cache size return early to + // prevent an indefinite startup time. This is almost certainly caused by the prepared statement cache + // leaking (CASSANDRA-19703) which should not recur after being on a version running this code. + // In such a case it's better to warn and continue startup than to continually page over millions of + // prepared statements that would be immediately evicted. + logger.warn("Detected prepared statement cache filling up during preload after preparing {} " + + "statements (loaded {} with prepared_statements_cache_size being {}). " + + "This could be an indication that prepared statements leaked prior to CASSANDRA-19703 " + + "being fixed. Returning early to prevent indefinite startup. " + + "Consider truncating {}.{} to clear out leaked prepared statements.", + counter, + FileUtils.stringifyFileSize(preparedBytesLoaded), + FileUtils.stringifyFileSize(PREPARED_STATEMENT_CACHE_SIZE_BYTES), + SchemaConstants.SYSTEM_KEYSPACE_NAME, PREPARED_STATEMENTS); + break; + } + } } return counter; } diff --git a/test/distributed/org/apache/cassandra/distributed/test/MixedModeFuzzTest.java b/test/distributed/org/apache/cassandra/distributed/test/MixedModeFuzzTest.java index 954280f0b8c6..609f94ce91b0 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/MixedModeFuzzTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/MixedModeFuzzTest.java @@ -49,7 +49,7 @@ import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; import org.apache.cassandra.cql3.CQLStatement; -import org.apache.cassandra.cql3.QueryHandler; +import org.apache.cassandra.cql3.QueryHandler.Prepared; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.distributed.api.ConsistencyLevel; @@ -268,9 +268,10 @@ public void mixedModeFuzzTest() throws Throwable c.get(nodeWithFix.get() ? 1 : 2).runOnInstance(() -> { SystemKeyspace.loadPreparedStatements((id, query, keyspace) -> { + Prepared prepared = QueryProcessor.instance.getPrepared(id); if (rng.nextBoolean()) QueryProcessor.instance.evictPrepared(id); - return true; + return prepared; }); }); break; @@ -450,7 +451,7 @@ public static ResultMessage.Prepared prepare(String queryString, ClientState cli if (existing != null) return existing; - QueryHandler.Prepared prepared = QueryProcessor.parseAndPrepare(queryString, clientState, false); + Prepared prepared = QueryProcessor.parseAndPrepare(queryString, clientState, false); CQLStatement statement = prepared.statement; int boundTerms = statement.getBindVariables().size(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/ReprepareFuzzTest.java b/test/distributed/org/apache/cassandra/distributed/test/ReprepareFuzzTest.java index f56847f68c9a..9988234743aa 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/ReprepareFuzzTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/ReprepareFuzzTest.java @@ -43,6 +43,7 @@ import net.bytebuddy.dynamic.DynamicType; import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; import net.bytebuddy.implementation.MethodDelegation; +import org.apache.cassandra.cql3.QueryHandler.Prepared; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.distributed.api.ConsistencyLevel; @@ -226,9 +227,10 @@ public void fuzzTest() throws Throwable case CLEAR_CACHES: c.get(1).runOnInstance(() -> { SystemKeyspace.loadPreparedStatements((id, query, keyspace) -> { + Prepared prepared = QueryProcessor.instance.getPrepared(id); if (rng.nextBoolean()) QueryProcessor.instance.evictPrepared(id); - return true; + return prepared; }); }); break; diff --git a/test/unit/org/apache/cassandra/cql3/PstmtPersistenceTest.java b/test/unit/org/apache/cassandra/cql3/PstmtPersistenceTest.java index df4a554e2e23..829f7d04a13b 100644 --- a/test/unit/org/apache/cassandra/cql3/PstmtPersistenceTest.java +++ b/test/unit/org/apache/cassandra/cql3/PstmtPersistenceTest.java @@ -21,31 +21,60 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.apache.cassandra.db.ReadQuery; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.SchemaKeyspaceTables; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.QueryState; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.MD5Digest; +import org.jboss.byteman.contrib.bmunit.BMRule; +import org.jboss.byteman.contrib.bmunit.BMRules; +import org.jboss.byteman.contrib.bmunit.BMUnitRunner; import static org.junit.Assert.*; +@RunWith(BMUnitRunner.class) public class PstmtPersistenceTest extends CQLTester { + private static final CompletableFuture[] futureArray = new CompletableFuture[0]; + + private static final ConcurrentMap preparedStatementLoadTimestamps = new ConcurrentHashMap<>(); + private static final ConcurrentMap preparedStatementRemoveTimestamps = new ConcurrentHashMap<>(); + + // page size passed to preloadPreparedStatements + private static final int PRELOAD_PAGE_SIZE = 100; + + // recorded page invocations in preloadPreparedStatements + private static final AtomicInteger pageInvocations = new AtomicInteger(); + @Before public void setUp() { + preparedStatementLoadTimestamps.clear(); + preparedStatementRemoveTimestamps.clear(); + QueryProcessor.clearPreparedStatements(false); } - + @Test public void testCachedPreparedStatements() throws Throwable { @@ -102,7 +131,7 @@ public void testCachedPreparedStatements() throws Throwable Assert.assertNotNull(prepared); } - // add anther prepared statement and sync it to table + // add another prepared statement and sync it to table prepareStatement(statement2, "foo", "bar", clientState); // statement1 will have two statements prepared because of `setKeyspace` usage @@ -140,12 +169,24 @@ public void testPstmtInvalidation() throws Throwable createTable("CREATE TABLE %s (key int primary key, val int)"); + long initialEvicted = numberOfEvictedStatements(); + for (int cnt = 1; cnt < 10000; cnt++) { prepareStatement("INSERT INTO %s (key, val) VALUES (?, ?) USING TIMESTAMP " + cnt, clientState); - if (numberOfEvictedStatements() > 0) + if (numberOfEvictedStatements() - initialEvicted > 0) { + assertEquals("Number of statements in table and in cache don't match", numberOfStatementsInMemory(), numberOfStatementsOnDisk()); + + // prepare more statements to trigger more evictions + for (int cnt2 = cnt + 1; cnt2 < cnt + 10; cnt2++) + prepareStatement("INSERT INTO %s (key, val) VALUES (?, ?) USING TIMESTAMP " + cnt2, clientState); + + // each new prepared statement should have caused an eviction + assertEquals("eviction count didn't increase by the expected number", 10, numberOfEvictedStatements() - initialEvicted); + assertEquals("Number of statements in memory (expected) and table (actual) don't match", numberOfStatementsInMemory(), numberOfStatementsOnDisk()); + return; } } @@ -153,6 +194,196 @@ public void testPstmtInvalidation() throws Throwable fail("Prepared statement eviction does not work"); } + @Test + @BMRules(rules= { + @BMRule(name = "CaptureWriteTimestamps", + targetClass = "SystemKeyspace", + targetMethod = "writePreparedStatement(String, MD5Digest, String, long)", + targetLocation = "AT INVOKE executeInternal", + action = "org.apache.cassandra.cql3.PstmtPersistenceTest.preparedStatementLoadTimestamps.put($key, $timestamp);" + ), + @BMRule(name = "CaptureEvictTimestamps", + targetClass = "QueryProcessor", + targetMethod = "evictPreparedStatement(MD5Digest, RemovalCause)", + action = "org.apache.cassandra.cql3.PstmtPersistenceTest.preparedStatementRemoveTimestamps.put($key, org.apache.cassandra.service.ClientState.getTimestamp());" + ) + }) + public void testAsyncPstmtInvalidation() throws Throwable + { + ClientState clientState = ClientState.forInternalCalls(); + createTable("CREATE TABLE %s (key int primary key, val int)"); + + // prepare statements concurrently in a thread pool to exercise bug encountered in CASSANDRA-19703 where + // delete from table occurs before the insert due to early eviction. + final ExecutorService executor = Executors.newFixedThreadPool(10); + + long initialEvicted = numberOfEvictedStatements(); + try + { + int initialMaxStatementsToPrepare = 10000; + int maxStatementsToPrepare = initialMaxStatementsToPrepare; + boolean hasEvicted = false; + int concurrency = 100; + List> prepareFutures = new ArrayList<>(concurrency); + + for (int cnt = 1; cnt <= maxStatementsToPrepare; cnt++) + { + final int localCnt = cnt; + prepareFutures.add(CompletableFuture.supplyAsync(() -> prepareStatement("INSERT INTO %s (key, val) VALUES (?, ?) USING TIMESTAMP " + localCnt, clientState), executor)); + + if (prepareFutures.size() == concurrency) + { + // Await completion of current inflight futures + CompletableFuture.allOf(prepareFutures.toArray(futureArray)).get(10, TimeUnit.SECONDS); + prepareFutures.clear(); + } + + // Once we've detected evictions, prepare as many statements as we've prepared so far to initialMaxStatementsToPrepare and then stop. + if (!hasEvicted && numberOfEvictedStatements() - initialEvicted > 0) + { + maxStatementsToPrepare = Math.min(cnt * 2, initialMaxStatementsToPrepare); + hasEvicted = true; + } + } + + long evictedStatements = numberOfEvictedStatements() - initialEvicted; + assertNotEquals("Should have evicted some prepared statements", 0, evictedStatements); + + // Recorded prepared statement removals should match metrics + assertEquals("Actual evicted statements does not match metrics", evictedStatements, preparedStatementRemoveTimestamps.size()); + + // For each prepared statement evicted, assert the time it was deleted is greater than the timestamp + // used for when it was loaded. + for (Map.Entry evictedStatementEntry : preparedStatementRemoveTimestamps.entrySet()) + { + MD5Digest key = evictedStatementEntry.getKey(); + long deletionTimestamp = evictedStatementEntry.getValue(); + long insertionTimestamp = preparedStatementLoadTimestamps.get(key); + + assertTrue(String.format("Expected deletion timestamp for prepared statement (%d) to be greater than insertion timestamp (%d)", + deletionTimestamp, insertionTimestamp), + deletionTimestamp > insertionTimestamp); + } + + // ensure the number of statements on disk match the number in memory, if number of statements on disk eclipses in memory, there was a leak. + assertEquals("Number of statements in memory (expected) and table (actual) don't match", numberOfStatementsInMemory(), numberOfStatementsOnDisk()); + } + finally + { + executor.shutdown(); + } + } + + /** + * Invoked whenever paging happens in testPreloadPreparedStatements, increments PAGE_INVOCATIONS when we detect + * paging happening in the path of QueryProcessor.preloadPreparedStatements with the expected page size. + */ + @SuppressWarnings("unused") + private static void nextPageReadQuery(ReadQuery query, int pageSize) + { + TableMetadata metadata = query.metadata(); + if (metadata.keyspace.equals(SchemaConstants.SYSTEM_KEYSPACE_NAME) && + metadata.name.equals(SystemKeyspace.PREPARED_STATEMENTS) && + pageSize == PRELOAD_PAGE_SIZE) + { + for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) + { + if (stackTraceElement.getClassName().equals(QueryProcessor.class.getName()) && stackTraceElement.getMethodName().equals("preloadPreparedStatements")) + { + pageInvocations.incrementAndGet(); + return; + } + } + } + } + + @Test + @BMRule(name = "CapturePageInvocations", + targetClass = "PartitionRangeQueryPager", + targetMethod = "nextPageReadQuery(int)", + action = "org.apache.cassandra.cql3.PstmtPersistenceTest.nextPageReadQuery($this.query, $pageSize)") + public void testPreloadPreparedStatements() throws Throwable + { + ClientState clientState = ClientState.forInternalCalls(); + createTable("CREATE TABLE %s (key int primary key, val int)"); + + // Prepare more statements than the paging size to ensure paging works properly. + int statementsToPrepare = 750; + + for (int cnt = 1; cnt <= statementsToPrepare; cnt++) + { + prepareStatement("INSERT INTO %s (key, val) VALUES (?, ?) USING TIMESTAMP " + cnt, clientState); + } + + // Capture how many statements are in memory before clearing cache. + long statementsInMemory = numberOfStatementsInMemory(); + long statementsOnDisk = numberOfStatementsOnDisk(); + assertEquals(statementsOnDisk, statementsInMemory); + + // Drop prepared statements from cache only and ensure the cache empties out. + QueryProcessor.clearPreparedStatements(true); + assertEquals(0, numberOfStatementsInMemory()); + + // Load prepared statements and ensure the cache size matches max + QueryProcessor.instance.preloadPreparedStatements(PRELOAD_PAGE_SIZE); + + long statementsInMemoryAfterLoading = numberOfStatementsInMemory(); + // Ensure size of cache matches statements that were on disk before preload + assertEquals("Statements prepared - evicted (expected) does not match statements in memory (actual)", + statementsOnDisk, statementsInMemoryAfterLoading); + + // Number of statements on disk shold match memory + assertEquals(statementsInMemoryAfterLoading, numberOfStatementsOnDisk()); + + // Ensure only executed the expected amount of pages. + int expectedPageInvocations = (int) Math.ceil(statementsInMemoryAfterLoading / (double) PRELOAD_PAGE_SIZE); + assertEquals(expectedPageInvocations, pageInvocations.get()); + } + + @Test + public void testPreloadPreparedStatementsUntilCacheFull() + { + QueryHandler handler = ClientState.getCQLQueryHandler(); + ClientState clientState = ClientState.forInternalCalls(); + createTable("CREATE TABLE %s (key int primary key, val int)"); + + // Fill up and clear the prepared statement cache several times to load up the system.prepared_statements table. + // This simulates a 'leak' of prepared statements akin to CASSANDRA-19703 as the system.prepared_statements + // table is able to grow to a larger size than the in memory prepared statement cache. In such a case we + // should detect a possible leak and defer paging indefinitely by returning early in preloadPreparedStatements. + int statementsLoadedWhenFull = -1; + long accumulatedSize = 0; + // load enough prepared statements to fill the cache 5 times. + for (int cnt = 0; accumulatedSize < QueryProcessor.PREPARED_STATEMENT_CACHE_SIZE_BYTES * 5; cnt++) + { + MD5Digest id = prepareStatement("INSERT INTO %s (key, val) VALUES (?, ?) USING TIMESTAMP " + cnt, clientState); + QueryHandler.Prepared prepared = handler.getPrepared(id); + assertTrue(prepared.pstmntSize > -1); + accumulatedSize += prepared.pstmntSize; + if (statementsLoadedWhenFull == -1 && accumulatedSize > QueryProcessor.PREPARED_STATEMENT_CACHE_SIZE_BYTES) + { + statementsLoadedWhenFull = cnt; + } + // clear cache repeatedly to avoid eviction. + QueryProcessor.clearPreparedStatements(true); + } + + + int preloadedStatements = QueryProcessor.instance.preloadPreparedStatements(PRELOAD_PAGE_SIZE); + + // Should have loaded as many statements as we detected were loaded before cache would be full. + assertTrue(String.format("Preloaded %d statements, expected at least %d", + preloadedStatements, statementsLoadedWhenFull), + preloadedStatements > statementsLoadedWhenFull); + + // We should only expect to load how many statements we were able to load before filling the cache + // + a buffer of 110%, set to 1.5x just to deal with sensitivity of detecting cache filling up. + int atMostPreloadedExpected = (int) (statementsLoadedWhenFull * 1.5); + assertTrue(String.format("Preloaded %d statements, but only expected that we'd load at most %d", + preloadedStatements, atMostPreloadedExpected), + preloadedStatements <= atMostPreloadedExpected); + } + private long numberOfStatementsOnDisk() throws Throwable { UntypedResultSet.Row row = execute("SELECT COUNT(*) FROM " + SchemaConstants.SYSTEM_KEYSPACE_NAME + '.' + SystemKeyspace.PREPARED_STATEMENTS).one(); @@ -176,7 +407,6 @@ private MD5Digest prepareStatement(String stmt, ClientState clientState) private MD5Digest prepareStatement(String stmt, String keyspace, String table, ClientState clientState) { - System.out.println(stmt + String.format(stmt, keyspace + "." + table)); - return QueryProcessor.instance.prepare(String.format(stmt, keyspace + "." + table), clientState).statementId; + return QueryProcessor.instance.prepare(String.format(stmt, keyspace + '.' + table), clientState).statementId; } } From e5c101673a0ec9a097ba41ffd99090944f73124d Mon Sep 17 00:00:00 2001 From: Caleb Rackliffe Date: Fri, 9 May 2025 01:34:13 -0500 Subject: [PATCH 331/340] Ensure replica filtering protection does not trigger unnecessary short read protection reads patch by Caleb Rackliffe; reviewed by Blake Eggleston and Zhao Yang for CASSANDRA-20639 --- CHANGES.txt | 1 + .../db/partitions/PartitionIterators.java | 15 - .../reads/ReplicaFilteringProtection.java | 291 +++++++++++------- .../PartitionIteratorMergeListener.java | 2 +- .../test/sai/StrictFilteringTest.java | 48 +++ 5 files changed, 234 insertions(+), 123 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index e95055e0a363..50f6f851e8dc 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Ensure replica filtering protection does not trigger unnecessary short read protection reads (CASSANDRA-20639) * Unified Compaction does not properly validate min and target sizes (CASSANDRA-20398) * Avoid lambda usage in TrieMemoryIndex range queries and ensure queue size tracking is per column (CASSANDRA-20668) * Avoid CQLSH throwing an exception loading .cqlshrc on non-supported platforms (CASSANDRA-20478) diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionIterators.java b/src/java/org/apache/cassandra/db/partitions/PartitionIterators.java index b8a86d5a1aa2..5375b2cf0f16 100644 --- a/src/java/org/apache/cassandra/db/partitions/PartitionIterators.java +++ b/src/java/org/apache/cassandra/db/partitions/PartitionIterators.java @@ -93,21 +93,6 @@ public static void consume(PartitionIterator iterator) } } - /** - * Consumes all rows in the next partition of the provided partition iterator. - */ - public static void consumeNext(PartitionIterator iterator) - { - if (iterator.hasNext()) - { - try (RowIterator partition = iterator.next()) - { - while (partition.hasNext()) - partition.next(); - } - } - } - /** * Wraps the provided iterator so it logs the returned rows for debugging purposes. *

      diff --git a/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java b/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java index c66c2007d6df..72c1c85fc84b 100644 --- a/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java +++ b/src/java/org/apache/cassandra/service/reads/ReplicaFilteringProtection.java @@ -27,13 +27,14 @@ import java.util.Queue; import java.util.function.Function; +import javax.annotation.concurrent.NotThreadSafe; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ColumnFamilyStore; -import org.apache.cassandra.db.Columns; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.DeletionTime; @@ -46,12 +47,12 @@ import org.apache.cassandra.db.filter.DataLimits; import org.apache.cassandra.db.filter.RowFilter; import org.apache.cassandra.db.partitions.PartitionIterator; -import org.apache.cassandra.db.partitions.PartitionIterators; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; import org.apache.cassandra.db.rows.EncodingStats; import org.apache.cassandra.db.rows.RangeTombstoneMarker; import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.RowIterator; import org.apache.cassandra.db.rows.Rows; import org.apache.cassandra.db.rows.Unfiltered; import org.apache.cassandra.db.rows.UnfilteredRowIterator; @@ -71,6 +72,7 @@ import org.apache.cassandra.service.ClientWarn; import org.apache.cassandra.service.StorageProxy; import org.apache.cassandra.service.reads.repair.NoopReadRepair; +import org.apache.cassandra.service.reads.repair.PartitionIteratorMergeListener; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.NoSpamLogger; @@ -90,6 +92,7 @@ * @see CASSANDRA-15907 * @see CASSANDRA-19018 */ +@NotThreadSafe public class ReplicaFilteringProtection> { private static final Logger logger = LoggerFactory.getLogger(ReplicaFilteringProtection.class); @@ -105,6 +108,8 @@ public class ReplicaFilteringProtection> private final E sources; private final TableMetrics tableMetrics; + private final QueryMergeListener mergeListener; + private final int cachedRowsWarnThreshold; private final int cachedRowsFailThreshold; @@ -119,6 +124,12 @@ public class ReplicaFilteringProtection> */ private final List> originalPartitions; + /** Whether to consume entire partitions or not in {@link #queryProtectedPartitions}. */ + private final boolean consumeEntirePartitions; + + /** Tracks the current partitions when not consuming entire partitions in {@link #queryProtectedPartitions}. */ + private RowIterator currentRowIterator = null; + ReplicaFilteringProtection(Keyspace keyspace, ReadCommand command, ConsistencyLevel consistency, @@ -129,6 +140,7 @@ public class ReplicaFilteringProtection> { this.keyspace = keyspace; this.command = command; + this.consumeEntirePartitions = command.limits().isUnlimited() || !command.isLimitedToOnePartition() || command.rowFilter().hasStaticExpression(); this.consistency = consistency; this.requestTime = requestTime; this.sources = sources; @@ -143,6 +155,8 @@ public class ReplicaFilteringProtection> this.cachedRowsWarnThreshold = cachedRowsWarnThreshold; this.cachedRowsFailThreshold = cachedRowsFailThreshold; + + mergeListener = new QueryMergeListener(); } private UnfilteredPartitionIterator executeReadCommand(ReadCommand cmd, Replica source, ReplicaPlan.Shared replicaPlan) @@ -170,109 +184,136 @@ private UnfilteredPartitionIterator executeReadCommand(ReadCommand cmd, Replica return resolver.getMessages().get(0).payload.makeIterator(command); } - /** - * This listener tracks both the accepted data and the primary keys of the rows that may be incomplete. - * That way, once the query results are merged using this listener, subsequent calls to - * {@link #queryProtectedPartitions(PartitionIterator, int)} will use the collected data to return a copy of the - * data originally collected from the specified replica, completed with the potentially outdated rows. - */ - UnfilteredPartitionIterators.MergeListener mergeController() + private class PartitionMergeListerner implements UnfilteredRowIterators.MergeListener { - return new UnfilteredPartitionIterators.MergeListener() + final DecoratedKey key; + final List builders = new ArrayList<>(sources.size()); + final RegularAndStaticColumns columns; + final EncodingStats stats; + final boolean[] silentRowAt; + final boolean[] silentColumnAt; + + PartitionMergeListerner(DecoratedKey partitionKey, List versions) { - @Override - public void close() - { - // If we hit the failure threshold before consuming a single partition, record the current rows cached. - tableMetrics.rfpRowsCachedPerQuery.update(Math.max(currentRowsCached, maxRowsCached)); - } + key = partitionKey; + columns = PartitionIteratorMergeListener.columns(versions); + stats = EncodingStats.merge(versions, NULL_TO_NO_STATS); - @Override - public UnfilteredRowIterators.MergeListener getRowMergeListener(DecoratedKey partitionKey, List versions) - { - List builders = new ArrayList<>(sources.size()); - RegularAndStaticColumns columns = columns(versions); - EncodingStats stats = EncodingStats.merge(versions, NULL_TO_NO_STATS); + for (int i = 0; i < sources.size(); i++) + builders.add(i, new PartitionBuilder(partitionKey, sources.get(i), columns, stats)); + + silentRowAt = new boolean[builders.size()]; + silentColumnAt = new boolean[builders.size()]; + } + + @Override + public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions) + { + // cache the deletion time versions to be able to regenerate the original row iterator + for (int i = 0; i < versions.length; i++) + builders.get(i).setDeletionTime(versions[i]); + } - for (int i = 0; i < sources.size(); i++) - builders.add(i, new PartitionBuilder(partitionKey, sources.get(i), columns, stats)); + @Override + public void onMergedRows(Row merged, Row[] versions) + { + // Cache the row versions to be able to regenerate the original row iterator: + for (int i = 0; i < versions.length; i++) + builders.get(i).addRow(versions[i]); - boolean[] silentRowAt = new boolean[builders.size()]; - boolean[] silentColumnAt = new boolean[builders.size()]; + // If all versions are empty, there's no divergence to resolve: + if (merged.isEmpty()) + return; - return new UnfilteredRowIterators.MergeListener() + Arrays.fill(silentRowAt, false); + + // Mark replicas silent if they provide no data for the row: + for (int i = 0; i < versions.length; i++) + if (versions[i] == null || (merged.isStatic() && versions[i].isEmpty())) + silentRowAt[i] = true; + + // Even if there are no completely missing rows, replicas may still be silent about individual + // columns, so we need to check for divergence at the column level: + for (ColumnMetadata column : merged.isStatic() ? columns.statics : columns.regulars) + { + Arrays.fill(silentColumnAt, false); + boolean allSilent = true; + + for (int i = 0; i < versions.length; i++) { - @Override - public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions) - { - // cache the deletion time versions to be able to regenerate the original row iterator - for (int i = 0; i < versions.length; i++) - builders.get(i).setDeletionTime(versions[i]); - } + // If the version at this replica is null, we've already marked it as silent: + if (versions[i] != null && versions[i].getColumnData(column) == null) + silentColumnAt[i] = true; + else + allSilent = false; + } - @Override - public void onMergedRows(Row merged, Row[] versions) - { - // Cache the row versions to be able to regenerate the original row iterator: - for (int i = 0; i < versions.length; i++) - builders.get(i).addRow(versions[i]); + for (int i = 0; i < versions.length; i++) + // Mark the replica silent if it is silent about this column and there is actually + // divergence between the replicas. (i.e. If all replicas are silent for this + // column, there is nothing to fetch to complete the row anyway.) + silentRowAt[i] |= silentColumnAt[i] && !allSilent; + } - // If all versions are empty, there's no divergence to resolve: - if (merged.isEmpty()) - return; + for (int i = 0; i < silentRowAt.length; i++) + if (silentRowAt[i]) + builders.get(i).addToFetch(merged); + } - Arrays.fill(silentRowAt, false); + @Override + public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) + { + // cache the marker versions to be able to regenerate the original row iterator + for (int i = 0; i < versions.length; i++) + builders.get(i).addRangeTombstoneMarker(versions[i]); + } - // Mark replicas silent if they provide no data for the row: - for (int i = 0; i < versions.length; i++) - if (versions[i] == null || (merged.isStatic() && versions[i].isEmpty())) - silentRowAt[i] = true; + @Override + public void close() {} - // Even if there are no completely missing rows, replicas may still be silent about individual - // columns, so we need to check for divergence at the column level: - for (ColumnMetadata column : merged.isStatic() ? columns.statics : columns.regulars) - { - Arrays.fill(silentColumnAt, false); - boolean allSilent = true; + public void populate() + { + for (int i = 0; i < sources.size(); i++) + originalPartitions.get(i).add(builders.get(i)); + } + } - for (int i = 0; i < versions.length; i++) - { - // If the version at this replica is null, we've already marked it as silent: - if (versions[i] != null && versions[i].getColumnData(column) == null) - silentColumnAt[i] = true; - else - allSilent = false; - } + private class QueryMergeListener implements UnfilteredPartitionIterators.MergeListener + { + private PartitionMergeListerner currentListener; - for (int i = 0; i < versions.length; i++) - // Mark the replica silent if it is silent about this column and there is actually - // divergence between the replicas. (i.e. If all replicas are silent for this - // column, there is nothing to fetch to complete the row anyway.) - silentRowAt[i] |= silentColumnAt[i] && !allSilent; - } + @Override + public void close() + { + // If we hit the failure threshold before consuming a single partition, record the current rows cached. + tableMetrics.rfpRowsCachedPerQuery.update(Math.max(currentRowsCached, maxRowsCached)); + } - for (int i = 0; i < silentRowAt.length; i++) - if (silentRowAt[i]) - builders.get(i).addToFetch(merged); - } + @Override + public UnfilteredRowIterators.MergeListener getRowMergeListener(DecoratedKey partitionKey, List versions) + { + if (currentListener == null || !currentListener.key.equals(partitionKey)) + currentListener = new PartitionMergeListerner(partitionKey, versions); - @Override - public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions) - { - // cache the marker versions to be able to regenerate the original row iterator - for (int i = 0; i < versions.length; i++) - builders.get(i).addRangeTombstoneMarker(versions[i]); - } + return currentListener; + } - @Override - public void close() - { - for (int i = 0; i < sources.size(); i++) - originalPartitions.get(i).add(builders.get(i)); - } - }; - } - }; + public void populate() + { + if (currentListener != null) + currentListener.populate(); + } + } + + /** + * This listener tracks both the accepted data and the primary keys of the rows that may be incomplete. + * That way, once the query results are merged using this listener, subsequent calls to + * {@link #queryProtectedPartitions(PartitionIterator, int)} will use the collected data to return a copy of the + * data originally collected from the specified replica, completed with the potentially outdated rows. + */ + UnfilteredPartitionIterators.MergeListener mergeController() + { + return mergeListener; } private void incrementCachedRows() @@ -309,22 +350,6 @@ private void releaseCachedRows(int count) currentRowsCached -= count; } - private static RegularAndStaticColumns columns(List versions) - { - Columns statics = Columns.NONE; - Columns regulars = Columns.NONE; - for (UnfilteredRowIterator iter : versions) - { - if (iter == null) - continue; - - RegularAndStaticColumns cols = iter.columns(); - statics = statics.mergeTo(cols.statics); - regulars = regulars.mergeTo(cols.regulars); - } - return new RegularAndStaticColumns(statics, regulars); - } - /** * Returns the protected results for the specified replica. These are generated fetching the extra rows and merging * them with the cached original filtered results for that replica. @@ -346,16 +371,66 @@ public TableMetadata metadata() } @Override - public void close() { } + public void close() + { + if (currentRowIterator != null) + currentRowIterator.close(); + } @Override public boolean hasNext() { // If there are no cached partition builders for this source, advance the first phase iterator, which - // will force the RFP merge listener to load at least the next protected partition. + // will force the RFP merge listener to load rows from the next protected partition. if (partitions.isEmpty()) { - PartitionIterators.consumeNext(merged); + if (consumeEntirePartitions) + { + if (merged.hasNext()) + { + try (RowIterator partition = merged.next()) + { + while (partition.hasNext()) + partition.next(); + + mergeListener.populate(); + } + } + } + else + { + if (currentRowIterator == null || !currentRowIterator.hasNext()) + { + // If there is an iterator, it's done, so just close it. + if (currentRowIterator != null) + { + currentRowIterator.close(); + currentRowIterator = null; + } + + // Take the next filtered partition from the merged partition iterator. + if (merged.hasNext()) + currentRowIterator = merged.next(); + } + + if (currentRowIterator != null) + { + int i = 0; + + // Consume LIMIT filtered rows from the current partition, unless there are fewer results. + // The underlying iterator is short-read protected, and limiting the number of rows we + // consume avoids needless SRP reads when there are many more than LIMIT results. + while (i < command.limits().count() && currentRowIterator.hasNext()) + { + currentRowIterator.next(); + i++; + } + + // If we actually consumed a row, checkpoint to populate the builders. + if (i > 0) + mergeListener.populate(); + } + } } return !partitions.isEmpty(); @@ -487,6 +562,8 @@ public Row staticRow() public void close() { releaseCachedRows(partitionRowsCached); + toFetch = null; + // TODO: the counters might not be accurate for the static row at this point? } @Override diff --git a/src/java/org/apache/cassandra/service/reads/repair/PartitionIteratorMergeListener.java b/src/java/org/apache/cassandra/service/reads/repair/PartitionIteratorMergeListener.java index f77bd4d52ca0..5aacaf43299d 100644 --- a/src/java/org/apache/cassandra/service/reads/repair/PartitionIteratorMergeListener.java +++ b/src/java/org/apache/cassandra/service/reads/repair/PartitionIteratorMergeListener.java @@ -49,7 +49,7 @@ public UnfilteredRowIterators.MergeListener getRowMergeListener(DecoratedKey par return new RowIteratorMergeListener<>(partitionKey, columns(versions), isReversed(versions), replicaPlan, command, readRepair); } - protected RegularAndStaticColumns columns(List versions) + public static RegularAndStaticColumns columns(List versions) { Columns statics = Columns.NONE; Columns regulars = Columns.NONE; diff --git a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java index 6ec80fd0ae19..6ab807f77c3a 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java @@ -27,11 +27,14 @@ import org.junit.Test; import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.test.TestBaseImpl; import org.apache.cassandra.index.sai.plan.StorageAttachedIndexQueryPlan; +import static org.junit.Assert.assertEquals; + import static org.apache.cassandra.distributed.api.Feature.GOSSIP; import static org.apache.cassandra.distributed.api.Feature.NETWORK; import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows; @@ -222,6 +225,51 @@ public void testShortReadWithRegularColumns() assertRows(initialRows, row(0, 1, 2)); } + @Test + public void testNoShortReadAtLimit() + { + CLUSTER.schemaChange(withKeyspace("CREATE TABLE %s.no_srp_at_limit (k int, c int, a int, PRIMARY KEY (k, c)) WITH read_repair = 'NONE'")); + CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.no_srp_at_limit(a) USING 'sai'")); + SAIUtil.waitForIndexQueryable(CLUSTER, KEYSPACE); + + CLUSTER.get(1).executeInternal(withKeyspace("INSERT INTO %s.no_srp_at_limit(k, c, a) VALUES (0, 2, 1) USING TIMESTAMP 5")); + CLUSTER.get(2).executeInternal(withKeyspace("INSERT INTO %s.no_srp_at_limit(k, c, a) VALUES (0, 3, 1) USING TIMESTAMP 6")); + + Long srpRequestsBefore = CLUSTER.get(1).callOnInstance(() -> Keyspace.open(KEYSPACE).getColumnFamilyStore("no_srp_at_limit").metric.shortReadProtectionRequests.getCount()); + + String select = withKeyspace("SELECT * FROM %s.no_srp_at_limit WHERE k = 0 AND a = 1 LIMIT 1"); + Object[][] initialRows = CLUSTER.coordinator(1).execute(select, ConsistencyLevel.ALL); + assertRows(initialRows, row(0, 2, 1)); + + Long srpRequestsAfter = CLUSTER.get(1).callOnInstance(() -> Keyspace.open(KEYSPACE).getColumnFamilyStore("no_srp_at_limit").metric.shortReadProtectionRequests.getCount()); + assertEquals(srpRequestsBefore, srpRequestsAfter); + } + + @Test + public void testNecessaryShortRead() + { + CLUSTER.schemaChange(withKeyspace("CREATE TABLE %s.necessary_short_read (k int, c int, a int, PRIMARY KEY (k, c)) WITH read_repair = 'NONE'")); + CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.necessary_short_read(a) USING 'sai'")); + SAIUtil.waitForIndexQueryable(CLUSTER, KEYSPACE); + + CLUSTER.get(1).executeInternal(withKeyspace("INSERT INTO %s.necessary_short_read(k, c, a) VALUES (0, 2, 1) USING TIMESTAMP 5")); + CLUSTER.get(2).executeInternal(withKeyspace("INSERT INTO %s.necessary_short_read(k, c, a) VALUES (0, 2, 2) USING TIMESTAMP 6")); + + CLUSTER.get(2).executeInternal(withKeyspace("INSERT INTO %s.necessary_short_read(k, c, a) VALUES (0, 3, 1) USING TIMESTAMP 7")); + CLUSTER.get(1).executeInternal(withKeyspace("INSERT INTO %s.necessary_short_read(k, c, a) VALUES (0, 3, 2) USING TIMESTAMP 8")); + + CLUSTER.get(1).executeInternal(withKeyspace("INSERT INTO %s.necessary_short_read(k, c, a) VALUES (0, 4, 1) USING TIMESTAMP 9")); + + Long srpRequestsBefore = CLUSTER.get(1).callOnInstance(() -> Keyspace.open(KEYSPACE).getColumnFamilyStore("necessary_short_read").metric.shortReadProtectionRequests.getCount()); + + String select = withKeyspace("SELECT * FROM %s.necessary_short_read WHERE k = 0 AND a = 1 LIMIT 1"); + Object[][] initialRows = CLUSTER.coordinator(1).execute(select, ConsistencyLevel.ALL); + assertRows(initialRows, row(0, 4, 1)); + + Long srpRequestsAfter = CLUSTER.get(1).callOnInstance(() -> Keyspace.open(KEYSPACE).getColumnFamilyStore("necessary_short_read").metric.shortReadProtectionRequests.getCount()); + assertEquals(srpRequestsBefore + 2L, srpRequestsAfter.longValue()); + } + @Test public void testShortReadWithStaticColumn() { From 00f2b3e9f6b558d50a774ba7bf025bec30dfa885 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 28 May 2025 17:28:39 -0500 Subject: [PATCH 332/340] increment version --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 34e95ba43103..bdc05def0348 100644 --- a/build.xml +++ b/build.xml @@ -34,7 +34,7 @@ - + From 53cdefb1de469c29a3292919c6a1782b092c1fae Mon Sep 17 00:00:00 2001 From: nvharikrishna Date: Tue, 9 May 2023 21:42:09 +0530 Subject: [PATCH 333/340] Enabling single sstable uplevel by default Patch by Venkata Harikrishna Nukala; reviewed by Marcus Eriksson and Sam Tunnicliffe for CASSANDRA-18509 --- .../compaction/LeveledCompactionStrategy.java | 2 +- .../db/compaction/SingleSSTableLCSTask.java | 5 +++++ .../db/compaction/CompactionsCQLTest.java | 6 +++--- .../LeveledCompactionStrategyTest.java | 17 ++++++++++++++--- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java b/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java index fbf2894187e3..1509aa2e0371 100644 --- a/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java +++ b/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java @@ -65,7 +65,7 @@ public LeveledCompactionStrategy(ColumnFamilyStore cfs, Map opti super(cfs, options); int configuredMaxSSTableSize = DEFAULT_MAX_SSTABLE_SIZE_MIB; int configuredLevelFanoutSize = DEFAULT_LEVEL_FANOUT_SIZE; - boolean configuredSingleSSTableUplevel = false; + boolean configuredSingleSSTableUplevel = true; SizeTieredCompactionStrategyOptions localOptions = new SizeTieredCompactionStrategyOptions(options); if (options != null) { diff --git a/src/java/org/apache/cassandra/db/compaction/SingleSSTableLCSTask.java b/src/java/org/apache/cassandra/db/compaction/SingleSSTableLCSTask.java index 2d9768924cfb..7a80451e2dcb 100644 --- a/src/java/org/apache/cassandra/db/compaction/SingleSSTableLCSTask.java +++ b/src/java/org/apache/cassandra/db/compaction/SingleSSTableLCSTask.java @@ -45,6 +45,11 @@ public SingleSSTableLCSTask(ColumnFamilyStore cfs, LifecycleTransaction txn, int this.level = level; } + protected int getLevel() + { + return level; + } + @Override protected void executeInternal(ActiveCompactionsTracker activeCompactions) { diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionsCQLTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionsCQLTest.java index 34b91e151857..b18a20ec98b7 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionsCQLTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionsCQLTest.java @@ -542,10 +542,10 @@ public void testAbortNotifications() throws Throwable getCurrentColumnFamilyStore().forceBlockingFlush(ColumnFamilyStore.FlushReason.UNIT_TESTS); LeveledCompactionStrategy lcs = (LeveledCompactionStrategy) getCurrentColumnFamilyStore().getCompactionStrategyManager().getUnrepairedUnsafe().first(); - LeveledCompactionTask lcsTask; + AbstractCompactionTask lcsTask; while (true) { - lcsTask = (LeveledCompactionTask) Iterables.getOnlyElement(lcs.getNextBackgroundTasks(0), null); + lcsTask = Iterables.getOnlyElement(lcs.getNextBackgroundTasks(0), null); if (lcsTask != null) { lcsTask.execute(CompactionManager.instance.active); @@ -591,7 +591,7 @@ public void testAbortNotifications() throws Throwable // ignored } - lcsTask = (LeveledCompactionTask) Iterables.getOnlyElement(lcs.getNextBackgroundTasks(0), null); + lcsTask = Iterables.getOnlyElement(lcs.getNextBackgroundTasks(0), null); try { assertNotNull(lcsTask); diff --git a/test/unit/org/apache/cassandra/db/compaction/LeveledCompactionStrategyTest.java b/test/unit/org/apache/cassandra/db/compaction/LeveledCompactionStrategyTest.java index 0a339b5b2dc5..00bb8b25ff57 100644 --- a/test/unit/org/apache/cassandra/db/compaction/LeveledCompactionStrategyTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/LeveledCompactionStrategyTest.java @@ -599,9 +599,20 @@ private int getTaskLevel(ColumnFamilyStore cfs) { try { - assertTrue(task instanceof LeveledCompactionTask); - LeveledCompactionTask lcsTask = (LeveledCompactionTask) task; - level = Math.max(level, lcsTask.getLevel()); + if (task instanceof LeveledCompactionTask) + { + LeveledCompactionTask lcsTask = (LeveledCompactionTask) task; + level = Math.max(level, lcsTask.getLevel()); + } + else if (task instanceof SingleSSTableLCSTask) + { + SingleSSTableLCSTask singleSSTableLCSTask = (SingleSSTableLCSTask) task; + level = Math.max(level, singleSSTableLCSTask.getLevel()); + } + else + { + Assert.fail("Got unexpected task of type " + task.getClass().getCanonicalName()); + } } finally { From 8d89e160fd001d489ec7d9785cf38c94c16b2a43 Mon Sep 17 00:00:00 2001 From: Sam Tunnicliffe Date: Thu, 29 May 2025 10:25:25 +0100 Subject: [PATCH 334/340] Ninja fix CHANGES.txt --- CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.txt b/CHANGES.txt index 98d848280158..6fdaa2776c7e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Enable single_sstable_uplevel by default for LCS (CASSANDRA-18509) * Introduce NativeAccessor to avoid new ByteBuffer allocation on flush for each NativeCell (CASSANDRA-20173) * Migrate sstableloader code to its own tools directory and artifact (CASSANDRA-20328) * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623) From b3f035be1ee2d0b08010a4ac773cec20a83ef606 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 11 Apr 2025 11:40:18 +0100 Subject: [PATCH 335/340] Accord Fixes: - cfk pruning+prebootstrap=invalid future dependency - exclude retired ranges when filtering RX stillTouches - propagate uses incorrect lowEpoch when fetch finds additional owned/touched ranges - node.withEpoch should callback with TopologyRetiredException, not throw - Recovery can race with durable-applied pruning; must not send durable unless latest ballot on apply - removeRedundantDependencies was not slicing pre-bootstrap range calculation to participating ranges - NPE in TopologyManager.atLeast caused by referencing an epoch that has been GC'd - use journal durableBeforePersister in burn test, not NOOP_PERSISTER - ServerUtils.cleanupDirectory use tryDeleteRecursive - FsyncRunnable shutdown - fix NPE in AccordJournalBurnTest patch by Benedict; reviewed by Alex Petrov for CASSANDRA-20688 --- modules/accord | 2 +- .../org/apache/cassandra/io/util/File.java | 9 ++++ .../apache/cassandra/io/util/PathUtils.java | 46 +++++++++++++++---- .../org/apache/cassandra/journal/Flusher.java | 18 ++++++-- .../service/accord/AccordCommandStore.java | 4 +- .../accord/AccordConfigurationService.java | 4 +- .../accord/AccordJournalValueSerializers.java | 2 - .../service/accord/AccordKeyspace.java | 1 - .../accord/AccordSafeCommandStore.java | 1 - .../service/accord/AccordService.java | 1 - .../cassandra/service/accord/AccordTask.java | 5 +- .../service/accord/CommandsForRanges.java | 2 +- .../accord/interop/AccordInteropAdapter.java | 21 +++++---- .../accord/interop/AccordInteropApply.java | 19 ++++---- .../interop/AccordInteropExecution.java | 6 ++- .../accord/interop/AccordInteropPersist.java | 5 +- .../accord/serializers/ApplySerializers.java | 14 ++++-- .../serializers/CheckStatusSerializers.java | 7 ++- .../serializers/ReadDataSerializers.java | 1 - .../accord/serializers/ResultSerializers.java | 2 +- .../serializers/TxnRequestSerializer.java | 2 +- .../accord/txn/AccordUpdateParameters.java | 2 - .../service/accord/txn/TxnNamedRead.java | 3 -- .../service/accord/txn/TxnUpdate.java | 6 +-- .../distributed/test/HintsMaxSizeTest.java | 2 - .../SSTableLoaderEncryptionOptionsTest.java | 2 - .../test/log/FetchLogFromPeers2Test.java | 4 +- .../distributed/test/tcm/SplitBrainTest.java | 2 - .../service/accord/AccordJournalBurnTest.java | 23 +++++++++- .../org/apache/cassandra/ServerTestUtils.java | 2 +- .../CommitLogSegmentManagerCDCTest.java | 2 - .../db/commitlog/CommitlogShutdownTest.java | 2 - .../hints/HintServiceBytemanTest.java | 2 - .../io/util/SafeMemoryWriterTest.java | 2 - .../CheckStatusSerializersTest.java | 4 +- .../CommandsForKeySerializerTest.java | 1 - .../tcm/DiscoverySimulationTest.java | 2 - 37 files changed, 143 insertions(+), 90 deletions(-) diff --git a/modules/accord b/modules/accord index 7f95490b1390..c5a984cfe41b 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 7f95490b1390b7fc68a4ff4ced7f161bafd8776b +Subproject commit c5a984cfe41bb8d8f1d7a4cb446c194f829a5dd1 diff --git a/src/java/org/apache/cassandra/io/util/File.java b/src/java/org/apache/cassandra/io/util/File.java index de415388ed9e..e814e37b741a 100644 --- a/src/java/org/apache/cassandra/io/util/File.java +++ b/src/java/org/apache/cassandra/io/util/File.java @@ -225,6 +225,15 @@ public void deleteRecursive() PathUtils.deleteRecursive(toPathForWrite()); } + /** + * Deletes all files and subdirectories under "dir". + * @return false if the root cannot be deleted + */ + public boolean tryDeleteRecursive() + { + return PathUtils.tryDeleteRecursive(toPathForWrite()); + } + /** * Try to delete the file on process exit. */ diff --git a/src/java/org/apache/cassandra/io/util/PathUtils.java b/src/java/org/apache/cassandra/io/util/PathUtils.java index 8ddd939b4c09..fa0a91543e46 100644 --- a/src/java/org/apache/cassandra/io/util/PathUtils.java +++ b/src/java/org/apache/cassandra/io/util/PathUtils.java @@ -346,11 +346,22 @@ public static Throwable delete(Path file, Throwable accumulate, @Nullable RateLi private static void deleteRecursiveUsingNixCommand(Path path, boolean quietly) { String [] cmd = new String[]{ "rm", quietly ? "-rdf" : "-rd", path.toAbsolutePath().toString() }; + IOException failure = null; + if (!quietly && !Files.exists(path)) + failure = new NoSuchFileException(path.toString()); + + if (failure == null) + failure = tryDeleteRecursiveUsingNixCommand(path, quietly); + + if (failure != null) + throw propagateUnchecked(failure, path, true); + } + + private static IOException tryDeleteRecursiveUsingNixCommand(Path path, boolean quietly) + { + String[] cmd = new String[]{ "rm", quietly ? "-rdf" : "-rd", path.toAbsolutePath().toString() }; try { - if (!quietly && !Files.exists(path)) - throw new NoSuchFileException(path.toString()); - Process p = Runtime.getRuntime().exec(cmd); int result = p.waitFor(); @@ -363,24 +374,39 @@ private static void deleteRecursiveUsingNixCommand(Path path, boolean quietly) } if (result != 0 && Files.exists(path)) - { - logger.error("{} returned:\nstdout:\n{}\n\nstderr:\n{}", Arrays.toString(cmd), out, err); - throw new IOException(String.format("%s returned non-zero exit code: %d%nstdout:%n%s%n%nstderr:%n%s", Arrays.toString(cmd), result, out, err)); - } + return new IOException(String.format("%s returned non-zero exit code: %d%nstdout:%n%s%n%nstderr:%n%s", Arrays.toString(cmd), result, out, err)); onDeletion.accept(path); + return null; } catch (IOException e) { - throw propagateUnchecked(e, path, true); + return e; } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new FSWriteError(e, path); + return new IOException("Interrupted while executing command " + Arrays.toString(cmd), e); } } + + /** + * Deletes all files and subdirectories under "path". + * @param path file to be deleted + * @return false if the root cannot be deleted + */ + public static boolean tryDeleteRecursive(Path path) + { + if (USE_NIX_RECURSIVE_DELETE.getBoolean() && path.getFileSystem() == java.nio.file.FileSystems.getDefault()) + return null == tryDeleteRecursiveUsingNixCommand(path, true); + + if (isDirectory(path)) + forEach(path, PathUtils::tryDeleteRecursive); + + // The directory should now be empty, so now it can be smoked + return tryDelete(path); + } + /** * Deletes all files and subdirectories under "path". * @param path file to be deleted diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index d48d80171bc3..02f85df0cbfc 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -143,10 +143,20 @@ public void run(Interruptible.State state) throws InterruptedException } } + private boolean hasWork() + { + return hasWork(fsyncStartedFor); + } + + private boolean hasWork(long lastStartedAt) + { + return fsyncWaitingSince != lastStartedAt; + } + private void awaitWork() throws InterruptedException { long lastStartedAt = fsyncStartedFor; - if (fsyncWaitingSince != lastStartedAt) + if (hasWork(lastStartedAt)) return; awaitingWork = Thread.currentThread(); @@ -158,7 +168,7 @@ private void awaitWork() throws InterruptedException throw new InterruptedException(); } - if (fsyncWaitingSince != lastStartedAt) + if (hasWork(lastStartedAt)) break; LockSupport.park(); @@ -175,7 +185,9 @@ void notify(Thread notify) public void doRun(Interruptible.State state) throws InterruptedException { - awaitWork(); + if (state == NORMAL) awaitWork(); + else if (!hasWork()) return; + if (fsyncing == null) fsyncing = journal.oldestActiveSegment(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 4e8629696ad2..ab74a128a70d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -501,7 +501,9 @@ public AccordCompactionInfo getCompactionInfo() RedundantBefore redundantBefore; if (safeRedundantBefore == null) redundantBefore = RedundantBefore.EMPTY; else redundantBefore = safeRedundantBefore.redundantBefore; - return new AccordCompactionInfo(id, redundantBefore, rangesForEpoch, tableId); + CommandStores.RangesForEpoch ranges = this.rangesForEpoch; + if (ranges == null) ranges = CommandStores.RangesForEpoch.EMPTY; + return new AccordCompactionInfo(id, redundantBefore, ranges, tableId); } public RangeSearcher rangeSearcher() diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index e0ca26ee5d96..fade204b1f7f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -38,6 +38,7 @@ import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; +import accord.utils.SortedListSet; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.agrona.collections.LongArrayList; @@ -449,8 +450,7 @@ protected void localSyncComplete(Topology topology, boolean startSync) epochState.setSyncStatus(SyncStatus.NOTIFYING); } - // TODO (required): replace with SortedArraySet when it is available - Set notify = new HashSet<>(topology.nodes()); + Set notify = SortedListSet.allOf(topology.nodes()); notify.remove(localId); syncPropagator.reportSyncComplete(epoch, notify, localId); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java index e72beaab2c3c..58b238d31f59 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -240,8 +240,6 @@ public void reserialize(JournalKey key, DurableBeforeAccumulator from, DataOutpu @Override public void deserialize(JournalKey journalKey, DurableBeforeAccumulator into, DataInputPlus in, Version userVersion) throws IOException { - // TODO: maybe using local serializer is not the best call here, but how do we distinguish - // between messaging and disk versioning? into.update(CommandStoreSerializers.durableBefore.deserialize(in)); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index e87250fa5217..e597d7d4155c 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -129,7 +129,6 @@ public class AccordKeyspace public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS_FOR_KEY, JOURNAL); - // TODO (desired): implement a custom type so we can get correct sort order public static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); private static final ClusteringIndexFilter FULL_PARTITION = new ClusteringIndexNamesFilter(BTreeSet.of(new ClusteringComparator(), Clustering.EMPTY), false); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 5ace8976c801..bc7c7cf8418e 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -225,7 +225,6 @@ public void visit(Unseekables keysOrRanges, Timestamp startedBefore, commandsForRanges.visit(keysOrRanges, startedBefore, testKind, visitor, p1, p2); } - // TODO (expected): instead of accepting a slice, accept the min/max epoch and let implementation handle it @Override public boolean visit(Unseekables keysOrRanges, TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, Timestamp testStartedAtTimestamp, ComputeIsDep computeIsDep, AllCommandVisitor visit) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 48c92e6296d3..f85aee62afaf 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -467,7 +467,6 @@ private TopologyRange fetchTopologies(long from) throws ExecutionException, Inte if (response.current >= from) return response; - metadata = ClusterMetadata.current(); } catch (Throwable e) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordTask.java b/src/java/org/apache/cassandra/service/accord/AccordTask.java index 61d86db9bf6f..9930d2093eac 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTask.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTask.java @@ -194,11 +194,11 @@ boolean isComplete() private final String loggingId; private static final AtomicLong nextLoggingId = new AtomicLong(Clock.Global.currentTimeMillis()); - // TODO (expected): merge all of these maps into one + // TODO (desired): merge all of these maps into one @Nullable Object2ObjectHashMap commands; @Nullable Object2ObjectHashMap commandsForKey; @Nullable Object2ObjectHashMap> loading; - // TODO (expected): collection supporting faster deletes but still fast poll (e.g. some ordered collection) + // TODO (desired): collection supporting faster deletes but still fast poll (e.g. some ordered collection) @Nullable ArrayDeque> waitingToLoad; @Nullable RangeTxnScanner rangeScanner; boolean hasRanges; @@ -662,7 +662,6 @@ public void run() safeStore = commandStore.begin(this, commandsForRanges); R result = apply(safeStore); - // TODO (required): currently, we are not very efficient about ensuring that we persist the absolute minimum amount of state. Improve that. List changes = null; if (commands != null) { diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index ffd1754d2ca8..470822906f7a 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -46,7 +46,7 @@ import static accord.local.CommandSummaries.SummaryStatus.NOT_DIRECTLY_WITNESSED; -// TODO (required): move to accord-core, merge with existing logic there +// TODO (expected): move to accord-core, merge with existing logic there public class CommandsForRanges extends TreeMap implements CommandSummaries.ByTxnIdSnapshot { public CommandsForRanges(Map m) diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java index 91acc361d1c5..ca466105eb8b 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java @@ -31,6 +31,7 @@ import accord.coordinate.ExecutePath; import accord.local.Node; import accord.messages.Apply; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.Route; @@ -86,23 +87,23 @@ private AccordInteropAdapter(InteropExecutor executor, AccordEndpointMapper endp } @Override - public void execute(Node node, Topologies any, FullRoute route, ExecutePath path, ExecuteFlags executeFlags, TxnId txnId, Txn txn, Timestamp executeAt, Deps stableDeps, Deps sendDeps, BiConsumer callback) + public void execute(Node node, Topologies any, FullRoute route, Ballot ballot, ExecutePath path, ExecuteFlags executeFlags, TxnId txnId, Txn txn, Timestamp executeAt, Deps stableDeps, Deps sendDeps, BiConsumer callback) { - if (!doInteropExecute(node, route, txnId, txn, executeAt, stableDeps, callback)) - super.execute(node, any, route, path, executeFlags, txnId, txn, executeAt, stableDeps, sendDeps, callback); + if (!doInteropExecute(node, route, ballot, txnId, txn, executeAt, stableDeps, callback)) + super.execute(node, any, route, ballot, path, executeFlags, txnId, txn, executeAt, stableDeps, sendDeps, callback); } @Override - public void persist(Node node, Topologies any, Route require, Route sendTo, SelectNodeOwnership selectSendTo, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) + public void persist(Node node, Topologies any, Route require, Route sendTo, SelectNodeOwnership selectSendTo, FullRoute route, Ballot ballot, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) { - if (applyKind == Minimal && doInteropPersist(node, any, require, sendTo, selectSendTo, txnId, txn, executeAt, deps, writes, result, route, callback)) + if (applyKind == Minimal && doInteropPersist(node, any, require, sendTo, selectSendTo, ballot, txnId, txn, executeAt, deps, writes, result, route, callback)) return; - super.persist(node, any, require, sendTo, selectSendTo, route, txnId, txn, executeAt, deps, writes, result, callback); + super.persist(node, any, require, sendTo, selectSendTo, route, ballot, txnId, txn, executeAt, deps, writes, result, callback); } - private boolean doInteropExecute(Node node, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer callback) + private boolean doInteropExecute(Node node, FullRoute route, Ballot ballot, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer callback) { // Unrecoverable repair always needs to be run by AccordInteropExecution AccordUpdate.Kind updateKind = AccordUpdate.kind(txn.update()); @@ -110,12 +111,12 @@ private boolean doInteropExecute(Node node, FullRoute route, TxnId txnId, Txn if (updateKind != AccordUpdate.Kind.UNRECOVERABLE_REPAIR && (consistencyLevel == null || consistencyLevel == ConsistencyLevel.ONE || txn.read().keys().isEmpty())) return false; - new AccordInteropExecution(node, txnId, txn, updateKind, route, executeAt, deps, callback, executor, consistencyLevel, endpointMapper) + new AccordInteropExecution(node, txnId, txn, updateKind, route, ballot, executeAt, deps, callback, executor, consistencyLevel, endpointMapper) .start(); return true; } - private boolean doInteropPersist(Node node, Topologies any, Route require, Route sendTo, SelectNodeOwnership selectSendTo, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute, BiConsumer callback) + private boolean doInteropPersist(Node node, Topologies any, Route require, Route sendTo, SelectNodeOwnership selectSendTo, Ballot ballot, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute, BiConsumer callback) { Update update = txn.update(); ConsistencyLevel consistencyLevel = update instanceof AccordUpdate ? ((AccordUpdate) update).cassandraCommitCL() : null; @@ -123,7 +124,7 @@ private boolean doInteropPersist(Node node, Topologies any, Route require, Ro return false; Topologies all = execution(node, any, sendTo, selectSendTo, fullRoute, txnId, executeAt); - new AccordInteropPersist(node, all, txnId, require, txn, executeAt, deps, writes, result, fullRoute, consistencyLevel, callback) + new AccordInteropPersist(node, all, txnId, require, ballot, txn, executeAt, deps, writes, result, fullRoute, consistencyLevel, callback) .start(Minimal, any, writes, result); return true; } diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java index b9af95db342d..3a5671c97066 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -29,6 +29,7 @@ import accord.local.StoreParticipants; import accord.messages.Apply; import accord.messages.MessageType; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; @@ -65,23 +66,23 @@ public class AccordInteropApply extends Apply implements LocalListeners.ComplexL public static final Apply.Factory FACTORY = new Apply.Factory() { @Override - public Apply create(Kind kind, Id to, Topologies participates, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute) + public Apply create(Kind kind, Id to, Topologies participates, TxnId txnId, Ballot ballot, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute) { checkArgument(kind != Kind.Maximal, "Shouldn't need to send a maximal commit with interop support"); ConsistencyLevel commitCL = txn.update() instanceof AccordUpdate ? ((AccordUpdate) txn.update()).cassandraCommitCL() : null; // Any asynchronous apply option should use the regular Apply that doesn't wait for writes to complete if (commitCL == null || commitCL == ConsistencyLevel.ANY) - return Apply.FACTORY.create(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result, fullRoute); - return new AccordInteropApply(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result, fullRoute); + return Apply.FACTORY.create(kind, to, participates, txnId, ballot, route, txn, executeAt, deps, writes, result, fullRoute); + return new AccordInteropApply(kind, to, participates, txnId, ballot, route, txn, executeAt, deps, writes, result, fullRoute); } }; public static final IVersionedSerializer serializer = new ApplySerializer() { @Override - protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + protected AccordInteropApply deserializeApply(TxnId txnId, Ballot ballot, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - return new AccordInteropApply(kind, txnId, scope, minEpoch, waitForEpoch, maxEpoch, executeAt, deps, txn, fullRoute, writes, result); + return new AccordInteropApply(kind, txnId, ballot, scope, minEpoch, waitForEpoch, maxEpoch, executeAt, deps, txn, fullRoute, writes, result); } }; @@ -89,14 +90,14 @@ protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long transient Int2ObjectHashMap listeners; boolean failed; - private AccordInteropApply(Kind kind, TxnId txnId, Route route, long minEpoch, long waitForEpoch, long maxEpoch, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + private AccordInteropApply(Kind kind, TxnId txnId, Ballot ballot, Route route, long minEpoch, long waitForEpoch, long maxEpoch, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - super(kind, txnId, route, minEpoch, waitForEpoch, maxEpoch, executeAt, deps, txn, fullRoute, writes, result); + super(kind, txnId, ballot, route, minEpoch, waitForEpoch, maxEpoch, executeAt, deps, txn, fullRoute, writes, result); } - private AccordInteropApply(Kind kind, Id to, Topologies participates, TxnId txnId, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute) + private AccordInteropApply(Kind kind, Id to, Topologies participates, TxnId txnId, Ballot ballot, Route route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute) { - super(kind, to, participates, txnId, route, txn, executeAt, deps, writes, result, fullRoute); + super(kind, to, participates, txnId, ballot, route, txn, executeAt, deps, writes, result, fullRoute); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java index 2d82cf174982..ffd42f0c4890 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropExecution.java @@ -144,6 +144,7 @@ public AsyncChain build(Callable task) private final TxnId txnId; private final Txn txn; private final FullRoute route; + private final Ballot ballot; private final Timestamp executeAt; private final Deps deps; private final BiConsumer callback; @@ -161,7 +162,7 @@ public AsyncChain build(Callable task) private final Set contacted; private final AccordUpdate.Kind updateKind; - public AccordInteropExecution(Node node, TxnId txnId, Txn txn, AccordUpdate.Kind updateKind, FullRoute route, Timestamp executeAt, Deps deps, BiConsumer callback, + public AccordInteropExecution(Node node, TxnId txnId, Txn txn, AccordUpdate.Kind updateKind, FullRoute route, Ballot ballot, Timestamp executeAt, Deps deps, BiConsumer callback, AgentExecutor executor, ConsistencyLevel consistencyLevel, AccordEndpointMapper endpointMapper) { requireArgument(!txn.read().keys().isEmpty() || updateKind == AccordUpdate.Kind.UNRECOVERABLE_REPAIR); @@ -169,6 +170,7 @@ public AccordInteropExecution(Node node, TxnId txnId, Txn txn, AccordUpdate.Kind this.txnId = txnId; this.txn = txn; this.route = route; + this.ballot = ballot; this.executeAt = executeAt; this.deps = deps; this.callback = callback; @@ -402,7 +404,7 @@ public void start() CommandStore cs = node.commandStores().select(route.homeKey()); result.beginAsResult().withExecutor(cs).begin((data, failure) -> { if (failure == null) - ((CoordinationAdapter)node.coordinationAdapter(txnId, Standard)).persist(node, executes, route, txnId, txn, executeAt, deps, txnId.is(Write) ? txn.execute(txnId, executeAt, data) : null, txn.result(txnId, executeAt, data), callback); + ((CoordinationAdapter)node.coordinationAdapter(txnId, Standard)).persist(node, executes, route, ballot, txnId, txn, executeAt, deps, txnId.is(Write) ? txn.execute(txnId, executeAt, data) : null, txn.result(txnId, executeAt, data), callback); else callback.accept(null, failure); }); diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java index e61d3934b552..967438ea56f0 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropPersist.java @@ -28,6 +28,7 @@ import accord.coordinate.tracking.ResponseTracker; import accord.local.Node; import accord.messages.Apply; +import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; import accord.primitives.Route; @@ -108,9 +109,9 @@ boolean recordCallbackFailure(Throwable throwable) private final ConsistencyLevel consistencyLevel; private CallbackHolder callback; - public AccordInteropPersist(Node node, Topologies topologies, TxnId txnId, Route sendTo, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute, ConsistencyLevel consistencyLevel, BiConsumer clientCallback) + public AccordInteropPersist(Node node, Topologies topologies, TxnId txnId, Route sendTo, Ballot ballot, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, FullRoute fullRoute, ConsistencyLevel consistencyLevel, BiConsumer clientCallback) { - super(node, topologies, txnId, sendTo, txn, executeAt, deps, writes, result, fullRoute, AccordInteropApply.FACTORY); + super(node, topologies, txnId, ballot, sendTo, txn, executeAt, deps, writes, result, fullRoute, AccordInteropApply.FACTORY); Invariants.requireArgument(consistencyLevel == ConsistencyLevel.QUORUM || consistencyLevel == ConsistencyLevel.ALL || consistencyLevel == ConsistencyLevel.SERIAL || consistencyLevel == ConsistencyLevel.ONE); this.consistencyLevel = consistencyLevel; registerClientCallback(result, clientCallback); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index cf3f449354d3..181634cc508c 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -22,6 +22,7 @@ import accord.api.Result; import accord.messages.Apply; +import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; @@ -65,6 +66,7 @@ public abstract static class ApplySerializer extends TxnRequest @Override public void serializeBody(A apply, DataOutputPlus out, Version version) throws IOException { + CommandSerializers.ballot.serialize(apply.ballot, out); out.writeVInt(apply.minEpoch - apply.waitForEpoch); out.writeUnsignedVInt(apply.maxEpoch - apply.minEpoch); kind.serialize(apply.kind, out); @@ -76,15 +78,16 @@ public void serializeBody(A apply, DataOutputPlus out, Version version) throws I CommandSerializers.writes.serialize(apply.writes, out, version); } - protected abstract A deserializeApply(TxnId txnId, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, + protected abstract A deserializeApply(TxnId txnId, Ballot ballot, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result); @Override public A deserializeBody(DataInputPlus in, Version version, TxnId txnId, Route scope, long waitForEpoch) throws IOException { + Ballot ballot = CommandSerializers.ballot.deserialize(in); long minEpoch = waitForEpoch + in.readVInt(); long maxEpoch = minEpoch + in.readUnsignedVInt(); - return deserializeApply(txnId, scope, minEpoch, waitForEpoch, maxEpoch, + return deserializeApply(txnId, ballot, scope, minEpoch, waitForEpoch, maxEpoch, kind.deserialize(in), ExecuteAtSerializer.deserialize(txnId, in), DepsSerializers.partialDeps.deserialize(in), @@ -97,7 +100,8 @@ public A deserializeBody(DataInputPlus in, Version version, TxnId txnId, Route request = new ApplySerializer<>() { @Override - protected Apply deserializeApply(TxnId txnId, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, + protected Apply deserializeApply(TxnId txnId, Ballot ballot, Route scope, long minEpoch, long waitForEpoch, long maxEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result) { - return Apply.SerializationSupport.create(txnId, scope, minEpoch, waitForEpoch, maxEpoch, kind, executeAt, deps, txn, fullRoute, writes, result); + return Apply.SerializationSupport.create(txnId, ballot, scope, minEpoch, waitForEpoch, maxEpoch, kind, executeAt, deps, txn, fullRoute, writes, result); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index 9933f53f9be3..6981761612fb 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -130,6 +130,7 @@ public void serialize(CheckStatus check, DataOutputPlus out) throws IOException KeySerializers.participants.serialize(check.query, out); out.writeUnsignedVInt(check.sourceEpoch); out.writeByte(check.includeInfo.ordinal()); + CommandSerializers.ballot.serialize(check.bumpBallot, out); } @Override @@ -139,7 +140,8 @@ public CheckStatus deserialize(DataInputPlus in) throws IOException Participants query = KeySerializers.participants.deserialize(in); long sourceEpoch = in.readUnsignedVInt(); CheckStatus.IncludeInfo info = infos[in.readByte()]; - return new CheckStatus(txnId, query, sourceEpoch, info); + Ballot ballot = CommandSerializers.ballot.deserialize(in); + return new CheckStatus(txnId, query, sourceEpoch, info, ballot); } @Override @@ -148,7 +150,8 @@ public long serializedSize(CheckStatus check) return CommandSerializers.txnId.serializedSize(check.txnId) + KeySerializers.participants.serializedSize(check.query) + TypeSizes.sizeofUnsignedVInt(check.sourceEpoch) - + TypeSizes.BYTE_SIZE; + + TypeSizes.BYTE_SIZE + + CommandSerializers.ballot.serializedSize(check.bumpBallot); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 3aedf090ea79..82e702b5c223 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -223,7 +223,6 @@ private static ReadDataSerializer serializerFor(ReadType type) public static final class ReplySerializer implements IVersionedSerializer { - // TODO (expected): use something other than ordinal final CommitOrReadNack[] nacks = CommitOrReadNack.values(); private final VersionedSerializer dataSerializer; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java index 0e3413905fb1..5d2d5efa4603 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ResultSerializers.java @@ -26,7 +26,7 @@ public class ResultSerializers { - // TODO (expected): this is meant to encode e.g. whether the transaction's condition met or not for clients to later query + // TODO (desired): this is meant to encode e.g. whether the transaction's condition met or not for clients to later query public static final Result APPLIED = new Result() { @Override diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java index 800a491b6d39..fe2cbe26136b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TxnRequestSerializer.java @@ -52,7 +52,7 @@ public final T deserialize(DataInputPlus in, Version version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in); Route scope = KeySerializers.route.deserialize(in); - // TODO: there should be a base epoch + // TODO (desired): there should be a base epoch long waitForEpoch = in.readUnsignedVInt(); return deserializeBody(in, version, txnId, scope, waitForEpoch); } diff --git a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java index 1bf46889dd59..efb222b6557f 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java +++ b/src/java/org/apache/cassandra/service/accord/txn/AccordUpdateParameters.java @@ -78,7 +78,6 @@ public UpdateParameters updateParameters(TableMetadata metadata, DecoratedKey dk // For the time being, guardrails are disabled for Accord queries. ClientState disabledGuardrails = null; - // TODO : How should Accord work with TTL? int ttl = metadata.params.defaultTimeToLive; return new RowUpdateParameters(metadata, disabledGuardrails, @@ -103,7 +102,6 @@ private Map prefetchRow(TableMetadata metadata, Decorat checkState(data.entrySet().size() == 1, "CAS read should only have one entry"); return ImmutableMap.of(dk, value); case AUTO_READ: - // TODO (review): Is this the right DK being passed into that matches what we used to store in TxnDataName if (TxnData.txnDataNameIndex(name) == index) return ImmutableMap.of(dk, value); default: diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java index 2f84c849e691..ea27a398c2e8 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnNamedRead.java @@ -235,9 +235,6 @@ public AsyncChain read(TableMetadatas tables, ConsistencyLevel consistency if (command == null) return AsyncResults.success(TxnData.NOOP_DATA); - // TODO (required, safety): before release, double check reasoning that this is safe -// AccordCommandsForKey cfk = ((SafeAccordCommandStore)safeStore).commandsForKey(key); -// int nowInSeconds = cfk.nowInSecondsFor(executeAt, isForWriteTxn); // It's fine for our nowInSeconds to lag slightly our insertion timestamp, as to the user // this simply looks like the transaction witnessed TTL'd data and the data then expired // immediately after the transaction executed, and this simplifies things a great deal diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java index 8f3baf374e49..1fdc0e54f9ee 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnUpdate.java @@ -174,7 +174,7 @@ public boolean preserveTimestamps() public Update slice(Ranges ranges) { Keys keys = this.keys.slice(ranges); - // TODO: Slice the condition. + // TODO (desired): Slice the condition. return new TxnUpdate(tables, keys, select(this.keys, keys, fragments), condition, cassandraCommitCL, preserveTimestamps); } @@ -182,7 +182,7 @@ public Update slice(Ranges ranges) public Update intersecting(Participants participants) { Keys keys = this.keys.intersecting(participants); - // TODO: Slice the condition. + // TODO (desired): Slice the condition. return new TxnUpdate(tables, keys, select(this.keys, keys, fragments), condition, cassandraCommitCL, preserveTimestamps); } @@ -201,9 +201,9 @@ private static ByteBuffer[] select(Keys in, Keys out, ByteBuffer[] from) @Override public Update merge(Update update) { - // TODO: special method for linear merging keyed and non-keyed lists simultaneously TxnUpdate that = (TxnUpdate) update; Keys mergedKeys = this.keys.with(that.keys); + // TODO (desired): special method for linear merging keyed and non-keyed lists simultaneously ByteBuffer[] mergedFragments = merge(this.keys, that.keys, this.fragments, that.fragments, mergedKeys.size()); return new TxnUpdate(tables, mergedKeys, mergedFragments, condition, cassandraCommitCL, preserveTimestamps); } diff --git a/test/distributed/org/apache/cassandra/distributed/test/HintsMaxSizeTest.java b/test/distributed/org/apache/cassandra/distributed/test/HintsMaxSizeTest.java index b1cf19e703e3..7e25f4311ddc 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/HintsMaxSizeTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/HintsMaxSizeTest.java @@ -19,7 +19,6 @@ import java.util.UUID; -import org.junit.Ignore; import org.junit.Test; import org.apache.cassandra.auth.CassandraRoleManager; @@ -44,7 +43,6 @@ @SuppressWarnings("Convert2MethodRef") public class HintsMaxSizeTest extends TestBaseImpl { - @Ignore @Test public void testMaxHintedHandoffSize() throws Exception { diff --git a/test/distributed/org/apache/cassandra/distributed/test/SSTableLoaderEncryptionOptionsTest.java b/test/distributed/org/apache/cassandra/distributed/test/SSTableLoaderEncryptionOptionsTest.java index b3de4edbd6da..ea3c98058605 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/SSTableLoaderEncryptionOptionsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/SSTableLoaderEncryptionOptionsTest.java @@ -26,7 +26,6 @@ import org.apache.commons.io.FileUtils; import org.junit.AfterClass; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; @@ -106,7 +105,6 @@ public void bulkLoaderSuccessfullyStreamsOverSsl() throws Throwable assertRows(CLUSTER.get(1).executeInternal("SELECT count(*) FROM ssl_upload_tables.test"), row(42L)); } - @Ignore @Test public void bulkLoaderSuccessfullyStreamsOverSslWithDeprecatedSslStoragePort() throws Throwable { diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java index d42c3796836f..78c67c9e157b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/FetchLogFromPeers2Test.java @@ -90,7 +90,9 @@ public void runQuery(Cluster cluster, ClusterState clusterState, Operation opera cluster.coordinator(coordinator).execute(withKeyspace(query), ConsistencyLevel.QUORUM); fail("should fail"); } - catch (Exception ignored) {} + catch (Exception ignored) + { + } boolean metricBumped = false; for (int i = 1; i <= cluster.size(); i++) diff --git a/test/distributed/org/apache/cassandra/distributed/test/tcm/SplitBrainTest.java b/test/distributed/org/apache/cassandra/distributed/test/tcm/SplitBrainTest.java index 9e35a71d83fe..1382f8b063ea 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/tcm/SplitBrainTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/tcm/SplitBrainTest.java @@ -30,7 +30,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import org.apache.cassandra.distributed.Cluster; @@ -57,7 +56,6 @@ public class SplitBrainTest extends TestBaseImpl { - @Ignore @Test public void testSplitBrainStartup() throws IOException, TimeoutException { diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java index 14d1587c5bd6..0c0f43dc2d63 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java @@ -40,11 +40,17 @@ import accord.impl.basic.Cluster; import accord.impl.basic.RandomDelayQueue; import accord.local.CommandStores; +import accord.local.DurableBefore; import accord.local.Node; +import accord.local.RedundantBefore; import accord.primitives.EpochSupplier; import accord.utils.DefaultRandom; import accord.utils.Invariants; +import accord.utils.PersistentField; import accord.utils.RandomSource; +import accord.utils.async.AsyncChains; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Directories; @@ -308,6 +314,13 @@ public void replay(CommandStores commandStores) this.closeCurrentSegmentForTestingIfNonEmpty(); super.replay(commandStores); } + + @Override + public PersistentField.Persister durableBeforePersister() + { + // TODO (required): we should be persisting in the journal, but this currently causes the burn test to take far too long + return DurableBefore.NOOP_PERSISTER; + } }; return journal; @@ -330,9 +343,15 @@ public static IAccordService.AccordCompactionInfos getCompactionInfo(Node node, { IAccordService.AccordCompactionInfos compactionInfos = new IAccordService.AccordCompactionInfos(node.durableBefore(), node.topology().minEpoch()); node.commandStores().forEachCommandStore(commandStore -> { + RedundantBefore redundantBefore = commandStore.unsafeGetRedundantBefore(); + if (redundantBefore == null) + redundantBefore = RedundantBefore.EMPTY; + CommandStores.RangesForEpoch rangesForEpoch = commandStore.unsafeGetRangesForEpoch(); + if (rangesForEpoch == null) + rangesForEpoch = CommandStores.RangesForEpoch.EMPTY; compactionInfos.put(commandStore.id(), new IAccordService.AccordCompactionInfo(commandStore.id(), - commandStore.unsafeGetRedundantBefore(), - commandStore.unsafeGetRangesForEpoch(), + redundantBefore, + rangesForEpoch, tableId)); }); return compactionInfos; diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index a9f53a340e6a..702e2b687100 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -238,7 +238,7 @@ private static void cleanupDirectory(File directory) { if (directory.exists()) { - Arrays.stream(directory.tryList()).forEach(File::deleteRecursive); + Arrays.stream(directory.tryList()).forEach(File::tryDeleteRecursive); } } diff --git a/test/unit/org/apache/cassandra/db/commitlog/CommitLogSegmentManagerCDCTest.java b/test/unit/org/apache/cassandra/db/commitlog/CommitLogSegmentManagerCDCTest.java index 08b9df49f102..f23e16788ebd 100644 --- a/test/unit/org/apache/cassandra/db/commitlog/CommitLogSegmentManagerCDCTest.java +++ b/test/unit/org/apache/cassandra/db/commitlog/CommitLogSegmentManagerCDCTest.java @@ -33,7 +33,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; @@ -110,7 +109,6 @@ public void testSegmentFlaggingWithNonblockingOnCreation() throws Throwable testWithNonblockingMode(this::testSegmentFlaggingOnCreation0); } - @Ignore @Test public void testNonblockingShouldMaintainSteadyDiskUsage() throws Throwable { diff --git a/test/unit/org/apache/cassandra/db/commitlog/CommitlogShutdownTest.java b/test/unit/org/apache/cassandra/db/commitlog/CommitlogShutdownTest.java index becc9390dde0..e3e566211283 100644 --- a/test/unit/org/apache/cassandra/db/commitlog/CommitlogShutdownTest.java +++ b/test/unit/org/apache/cassandra/db/commitlog/CommitlogShutdownTest.java @@ -27,7 +27,6 @@ import com.google.common.collect.ImmutableMap; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; @@ -58,7 +57,6 @@ public class CommitlogShutdownTest private final static byte[] entropy = new byte[1024 * 256]; - @Ignore @Test @BMRule(name = "Make removing commitlog segments slow", targetClass = "CommitLogSegment", diff --git a/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java b/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java index e44f8dd97357..1beaa123a8bf 100644 --- a/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java +++ b/test/unit/org/apache/cassandra/hints/HintServiceBytemanTest.java @@ -26,7 +26,6 @@ import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; @@ -92,7 +91,6 @@ public void reinstanciateService() throws Throwable HintsService.instance.startDispatch(); } - @Ignore @Test @BMRule(name = "Delay delivering hints", targetClass = "DispatchHintsTask", diff --git a/test/unit/org/apache/cassandra/io/util/SafeMemoryWriterTest.java b/test/unit/org/apache/cassandra/io/util/SafeMemoryWriterTest.java index fd5075a9cd04..8b37c2def231 100644 --- a/test/unit/org/apache/cassandra/io/util/SafeMemoryWriterTest.java +++ b/test/unit/org/apache/cassandra/io/util/SafeMemoryWriterTest.java @@ -22,7 +22,6 @@ import java.util.Random; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -60,7 +59,6 @@ public void testTrim() throws IOException testSafeMemoryWriter(CHUNK * 5, CHUNK, 65536); } - @Ignore @Test public void testOver2GBuffer() throws IOException { diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java index 7079de790e8e..5a8ac1505b20 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java @@ -69,12 +69,12 @@ private static Gen foundKnownMap() switch (domain) { case Key: - // TODO (coverage): don't hard code murmur + // TODO (desired): don't hard code murmur Gen keyGen = AccordGenerators.routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), Gens.constant(AccordGenerators.RoutingKeyKind.TOKEN), fromQT(CassandraGenerators.murmurToken()), Murmur3Partitioner.instance); TokenKey homeKey = keyGen.next(rs); List forOrdering = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).next(rs); forOrdering.sort(Comparator.naturalOrder()); - // TODO (coverage): don't hard code keys type + // TODO (desired): don't hard code keys type keysOrRanges = new FullKeyRoute(homeKey, forOrdering.toArray(RoutingKey[]::new)); break; case Range: diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 90241b747560..dcea11348d81 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -479,7 +479,6 @@ private static void testOne(long seed) } } - // TODO (expected): we currently don't explore TruncatedApply statuses because we don't transition through all phases and therefore don't adopt the Applied status Choices saveStatusChoices = Choices.uniform(EnumSet.complementOf(EnumSet.of(SaveStatus.TruncatedApply, SaveStatus.TruncatedUnapplied, SaveStatus.TruncatedApplyWithOutcome)).toArray(SaveStatus[]::new)); Supplier saveStatusSupplier = () -> { SaveStatus result = saveStatusChoices.choose(source); diff --git a/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java b/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java index eae6ea2f628c..60213554420f 100644 --- a/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java +++ b/test/unit/org/apache/cassandra/tcm/DiscoverySimulationTest.java @@ -32,7 +32,6 @@ import org.apache.commons.lang3.NotImplementedException; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; @@ -78,7 +77,6 @@ public static void setup() log.readyUnchecked(); } - @Ignore @Test public void discoveryTest() throws Throwable { From 3f03dc7263b0935d88659dcc64faa3a0875d6dd6 Mon Sep 17 00:00:00 2001 From: Stefan Miklosovic Date: Thu, 29 May 2025 15:14:54 +0200 Subject: [PATCH 336/340] ninja: remove unused imports in AccordJournalBurnTest to pass ant checkstyle-test --- .../apache/cassandra/service/accord/AccordJournalBurnTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java index 0c0f43dc2d63..eea399dd609a 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalBurnTest.java @@ -48,9 +48,6 @@ import accord.utils.Invariants; import accord.utils.PersistentField; import accord.utils.RandomSource; -import accord.utils.async.AsyncChains; -import accord.utils.async.AsyncResult; -import accord.utils.async.AsyncResults; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Directories; From 88ca4f8d9a1237a5b284829734befdb15e3e2ee4 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Thu, 29 May 2025 16:56:22 -0400 Subject: [PATCH 337/340] Backport CASSANDRA-20469 (Paxos repair interrupts running transactions) to 4.1 and 5.0 patch by Ariel Weisberg; reviewed by Benedict Elliott Smith for CASSANDRA-20585 --- .../org/apache/cassandra/config/Config.java | 2 ++ .../cassandra/config/DatabaseDescriptor.java | 11 ++++++ .../cassandra/service/StorageService.java | 12 +++++++ .../service/StorageServiceMBean.java | 4 +++ .../cleanup/PaxosCleanupLocalCoordinator.java | 34 +++++++++++++++++-- 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index b841d9b7c0fc..21ca1b595cd1 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -1264,4 +1264,6 @@ public enum CQLStartTime // 3.0 Cassandra Driver has its "read" timeout set to 12 seconds. Our recommendation is match this. public DurationSpec.LongMillisecondsBound native_transport_timeout = new DurationSpec.LongMillisecondsBound("12000ms"); public boolean enforce_native_deadline_for_hints = false; + + public boolean paxos_repair_race_wait = true; } diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index 853b29f6df2e..55556a6308e3 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -4638,4 +4638,15 @@ public static void setRejectOutOfTokenRangeRequests(boolean enabled) { conf.reject_out_of_token_range_requests = enabled; } + + public static boolean getPaxosRepairRaceWait() + { + return conf.paxos_repair_race_wait; + } + + @VisibleForTesting + public static void setPaxosRepairRaceWait(boolean paxosRepairRaceWait) + { + conf.paxos_repair_race_wait = paxosRepairRaceWait; + } } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 6de663910cb1..8d494fc4d6dd 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -7153,4 +7153,16 @@ public void setEnforceNativeDeadlineForHints(boolean value) DatabaseDescriptor.setEnforceNativeDeadlineForHints(value); } + @Override + public void setPaxosRepairRaceWait(boolean paxosRepairRaceWait) + { + DatabaseDescriptor.setPaxosRepairRaceWait(paxosRepairRaceWait); + } + + @Override + public boolean getPaxosRepairRaceWait() + { + return DatabaseDescriptor.getPaxosRepairRaceWait(); + } + } diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java index c43ef2eec768..c7c02d275028 100644 --- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java +++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java @@ -1103,4 +1103,8 @@ public void enableAuditLog(String loggerName, String includedKeyspaces, String e * e.g. keyspace_name -> [reads, writes, paxos]. */ Map getOutOfRangeOperationCounts(); + + void setPaxosRepairRaceWait(boolean paxosRepairCoordinatorWait); + + boolean getPaxosRepairRaceWait(); } diff --git a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java index 3904d54d329f..f7f500f57e1c 100644 --- a/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java +++ b/src/java/org/apache/cassandra/service/paxos/cleanup/PaxosCleanupLocalCoordinator.java @@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,9 +40,15 @@ import org.apache.cassandra.service.paxos.PaxosRepair; import org.apache.cassandra.service.paxos.PaxosState; import org.apache.cassandra.service.paxos.uncommitted.UncommittedPaxosKey; +import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.concurrent.AsyncFuture; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.config.DatabaseDescriptor.getCasContentionTimeout; +import static org.apache.cassandra.config.DatabaseDescriptor.getWriteRpcTimeout; import static org.apache.cassandra.service.paxos.cleanup.PaxosCleanupSession.TIMEOUT_NANOS; import static org.apache.cassandra.utils.Clock.Global.nanoTime; @@ -126,8 +133,10 @@ private void scheduleKeyRepairsOrFinish() return; } + long txnTimeoutMicros = Math.max(getCasContentionTimeout(MICROSECONDS), getWriteRpcTimeout(MICROSECONDS)); + boolean waitForCoordinator = DatabaseDescriptor.getPaxosRepairRaceWait(); while (inflight.size() < parallelism && uncommittedIter.hasNext()) - repairKey(uncommittedIter.next()); + repairKey(uncommittedIter.next(), txnTimeoutMicros, waitForCoordinator); } @@ -135,7 +144,7 @@ private void scheduleKeyRepairsOrFinish() finish(); } - private boolean repairKey(UncommittedPaxosKey uncommitted) + private boolean repairKey(UncommittedPaxosKey uncommitted, long txnTimeoutMicros, boolean waitForCoordinator) { logger.trace("repairing {}", uncommitted); Preconditions.checkState(!inflight.containsKey(uncommitted.getKey())); @@ -146,6 +155,9 @@ private boolean repairKey(UncommittedPaxosKey uncommitted) if (consistency == null) return false; + if (waitForCoordinator) + maybeWaitForOriginalCoordinator(uncommitted, txnTimeoutMicros); + inflight.put(uncommitted.getKey(), tableRepairs.startOrGetOrQueue(uncommitted.getKey(), uncommitted.ballot(), uncommitted.getConsistencyLevel(), table, result -> { if (result.wasSuccessful()) onKeyFinish(uncommitted.getKey()); @@ -155,6 +167,24 @@ private boolean repairKey(UncommittedPaxosKey uncommitted) return true; } + /** + * Wait to repair things that are still potentially executing at the original coordinator to avoid + * causing timeouts. This should only have to happen at most a few times when the repair starts + */ + private static void maybeWaitForOriginalCoordinator(UncommittedPaxosKey uncommitted, long txnTimeoutMicros) + { + long nowMicros = MILLISECONDS.toMicros(Clock.Global.currentTimeMillis()); + long ballotElapsedMicros = nowMicros - uncommitted.ballot().unixMicros(); + if (ballotElapsedMicros < 0 && Math.abs(ballotElapsedMicros) > SECONDS.toMicros(1)) + logger.warn("Encountered ballot that is more than 1 second in the future, is there a clock sync issue? {}", uncommitted.ballot()); + if (ballotElapsedMicros < txnTimeoutMicros) + { + long sleepMicros = txnTimeoutMicros - ballotElapsedMicros; + logger.info("Paxos auto repair encountered a potentially in progress ballot, sleeping {}us to allow the in flight operation to finish", sleepMicros); + Uninterruptibles.sleepUninterruptibly(sleepMicros, MICROSECONDS); + } + } + private synchronized void onKeyFinish(DecoratedKey key) { if (!inflight.containsKey(key)) From 5450a1f20e63e7835f07bafc9602530547bd27be Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Tue, 18 Mar 2025 14:24:32 -0400 Subject: [PATCH 338/340] Support for add and replace in IntervalTree Patch by Ariel Weisberg and Yuqi Yan; Reviewed by Marcus Eriksson for CASSANDRA-20513 Co-authored-by: Yuqi Yan --- CHANGES.txt | 1 + .../db/lifecycle/LifecycleTransaction.java | 57 +- .../db/lifecycle/SSTableIntervalTree.java | 92 ++- .../apache/cassandra/db/lifecycle/View.java | 19 +- .../io/sstable/format/SSTableReader.java | 2 +- .../apache/cassandra/utils/IntervalTree.java | 581 ++++++++++++++---- .../cassandra/utils/IntervalTreeTest.java | 199 +++++- 7 files changed, 801 insertions(+), 150 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 6fdaa2776c7e..b922b57a02a4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Support for add and replace in IntervalTree (CASSANDRA-20513) * Enable single_sstable_uplevel by default for LCS (CASSANDRA-18509) * Introduce NativeAccessor to avoid new ByteBuffer allocation on flush for each NativeCell (CASSANDRA-20173) * Migrate sstableloader code to its own tools directory and artifact (CASSANDRA-20328) diff --git a/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java b/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java index 6cbf0d483e00..15b0417b382f 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java +++ b/src/java/org/apache/cassandra/db/lifecycle/LifecycleTransaction.java @@ -21,9 +21,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.BiPredicate; @@ -32,6 +34,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,6 +69,7 @@ import static org.apache.cassandra.db.lifecycle.Helpers.select; import static org.apache.cassandra.db.lifecycle.Helpers.selectFirst; import static org.apache.cassandra.db.lifecycle.Helpers.setReplaced; +import static org.apache.cassandra.db.lifecycle.View.replaceSSTables; import static org.apache.cassandra.db.lifecycle.View.updateCompacting; import static org.apache.cassandra.db.lifecycle.View.updateLiveSet; import static org.apache.cassandra.utils.Throwables.maybeFail; @@ -294,7 +298,15 @@ public Throwable doAbort(Throwable accumulate) // replace all updated readers with a version restored to its original state List restored = restoreUpdatedOriginals(); List invalid = Lists.newArrayList(Iterables.concat(logged.update, logged.obsolete)); - accumulate = tracker.apply(updateLiveSet(logged.update, restored, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()), accumulate); + + Map replacementMap = Collections.emptyMap(); + if (!isOffline()) + replacementMap = getReplacementMap(logged.update, restored); + if (!replacementMap.isEmpty()) + accumulate = tracker.apply(replaceSSTables(logged.update, restored, replacementMap, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()), accumulate); + else + accumulate = tracker.apply(updateLiveSet(logged.update, restored, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()), accumulate); + accumulate = tracker.notifySSTablesChanged(invalid, restored, OperationType.COMPACTION, accumulate); // setReplaced immediately preceding versions that have not been obsoleted accumulate = setReplaced(logged.update, accumulate); @@ -373,8 +385,15 @@ private Throwable checkpoint(Throwable accumulate) // ensure any new readers are in the compacting set, since we aren't done with them yet // and don't want anyone else messing with them // apply atomically along with updating the live set of readers - tracker.apply(compose(updateCompacting(emptySet(), fresh), - updateLiveSet(toUpdate, staged.update, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()))); + Map replacementMap = Collections.emptyMap(); + if (!isOffline()) + replacementMap = getReplacementMap(toUpdate, staged.update); + if (!replacementMap.isEmpty()) + tracker.apply(compose(updateCompacting(emptySet(), fresh), + replaceSSTables(toUpdate, staged.update, replacementMap, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()))); + else + tracker.apply(compose(updateCompacting(emptySet(), fresh), + updateLiveSet(toUpdate, staged.update, tracker.maybeGetSSTableIntervalTreeLatencyMetrics()))); // log the staged changes and our newly marked readers marked.addAll(fresh); @@ -389,6 +408,38 @@ private Throwable checkpoint(Throwable accumulate) return accumulate; } + // Match the SSTableReaders from the existing ones to the new one to be added (with same ranges) + // Returns the map of toRemove <-> toAdd. Return empty map if such 1-1 replacement doesn't exist + private static Map getReplacementMap(final Set remove, final Collection add) + { + if (remove.size() != add.size()) + return Collections.emptyMap(); + + List toAdds = new ArrayList<>(add); + List toRemoves = new ArrayList<>(remove); + // sort the SSTableReader list by (first, last, descriptor.id). The view is per cfs so id will be unique + Comparator comp = Comparator.comparing((SSTableReader s) -> s.getFirst()) + .thenComparing(s -> s.getLast()) + .thenComparing(SSTableReader.idComparator); + toRemoves.sort(comp); + toAdds.sort(comp); + + Map replacementMap = Maps.newHashMapWithExpectedSize(toAdds.size()); + // toAdd and toRemove have the same size + for (int i = 0; i < toAdds.size(); i++) + { + SSTableReader toRemove = toRemoves.get(i); + SSTableReader toAdd = toAdds.get(i); + // optimization: here we don't check the descriptor. If we're able to match those to be removed with those + // to be added, we ensure that the pairs have the same (first, last) range + if (toRemove.getFirst().equals(toAdd.getFirst()) && toRemove.getLast().equals(toAdd.getLast())) + replacementMap.put(toRemove, toAdd); + else + // stop and return empty map if toAdd and toRemove can't match + return Collections.emptyMap(); + } + return replacementMap; + } /** * update a reader: if !original, this is a reader that is being introduced by this transaction; diff --git a/src/java/org/apache/cassandra/db/lifecycle/SSTableIntervalTree.java b/src/java/org/apache/cassandra/db/lifecycle/SSTableIntervalTree.java index 4d5a87f3991d..0e88193856f5 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/SSTableIntervalTree.java +++ b/src/java/org/apache/cassandra/db/lifecycle/SSTableIntervalTree.java @@ -1,5 +1,4 @@ /* - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -8,29 +7,32 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + package org.apache.cassandra.db.lifecycle; -import java.util.Arrays; +import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; +import java.util.Map; + +import com.google.common.collect.Iterables; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.PartitionPosition; import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.utils.Interval; import org.apache.cassandra.utils.IntervalTree; +import org.apache.cassandra.utils.Pair; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; public class SSTableIntervalTree extends IntervalTree> @@ -42,6 +44,11 @@ public class SSTableIntervalTree extends IntervalTree[] minOrder, Interval[] maxOrder) + { + super(head, modCount, minOrder, maxOrder); + } + private SSTableIntervalTree(Interval[] minOrder, Interval[] maxOrder) { super(minOrder, maxOrder); @@ -53,6 +60,25 @@ protected SSTableIntervalTree create(Interval[ return new SSTableIntervalTree(minOrder, maxOrder); } + @Override + protected SSTableIntervalTree create(IntervalNode head, int modCount, Interval[] minOrder, Interval[] maxOrder) + { + return new SSTableIntervalTree(head, modCount, minOrder, maxOrder); + } + + @Override + protected SSTableIntervalTree create(Collection> intervals) + { + return new SSTableIntervalTree(intervals); + } + + @Override + public SSTableIntervalTree replace(List, Interval>> replacements) + { + checkArgument(!replacements.isEmpty(), "Shouldn't call replace with no replacements"); + return (SSTableIntervalTree) super.replace(replacements); + } + public static SSTableIntervalTree empty() { return EMPTY; @@ -67,12 +93,14 @@ public static SSTableIntervalTree buildSSTableIntervalTree(Collection> buildIntervals(Collection sstables) { - if (sstables == null || sstables.isEmpty()) - return Collections.emptyList(); - return Arrays.asList(buildIntervalsArray(sstables)); + List> intervals = new ArrayList<>(Iterables.size(sstables)); + for (SSTableReader sstable : sstables) + intervals.add(Interval.create(sstable.getFirst(), sstable.getLast(), sstable)); + return intervals; } - public static Interval[] buildIntervalsArray(Collection sstables) + @SuppressWarnings("unchecked") + static Interval[] buildIntervalsArray(Collection sstables) { if (sstables == null || sstables.isEmpty()) return IntervalTree.EMPTY_ARRAY; @@ -107,4 +135,36 @@ public static SSTableIntervalTree update(SSTableIntervalTree tree, Collection replacementMap) + { + checkArgument(!replacementMap.isEmpty(), "Replacement map shouldn't be empty for SSTableIntervalTree.replace"); + List, Interval>> replacementIntervalsMap = new ArrayList<>(); + for (Map.Entry entry : replacementMap.entrySet()) + { + SSTableReader originalSSTable = entry.getKey(); + SSTableReader replacementSSTable = entry.getValue(); + Interval originalInterval = originalSSTable.getInterval(); + Interval replacementInterval = replacementSSTable.getInterval(); + replacementIntervalsMap.add(Pair.create(originalInterval, replacementInterval)); + } + return tree.replace(replacementIntervalsMap); + } + + public static SSTableIntervalTree addSSTables(SSTableIntervalTree tree, Collection additions) + { + return (SSTableIntervalTree) tree.add(buildIntervalsArray(additions)); + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/lifecycle/View.java b/src/java/org/apache/cassandra/db/lifecycle/View.java index 15c02eeb8c50..c6fd7ed52af2 100644 --- a/src/java/org/apache/cassandra/db/lifecycle/View.java +++ b/src/java/org/apache/cassandra/db/lifecycle/View.java @@ -318,6 +318,23 @@ public View apply(View view) }; } + // construct a function to replace the SSTable that have the same [first,last] intervals + static Function replaceSSTables(final Set remove, final Iterable add, final Map replacementMap, LatencyMetrics sstableIntervalTreeLatency) + { + return new Function() + { + public View apply(View view) + { + Map sstableMap = replace(view.sstablesMap, remove, add); + long treeBuildStart = Clock.Global.nanoTime(); + SSTableIntervalTree sstableIntervalTree = SSTableIntervalTree.replace(view.intervalTree, replacementMap); + if (sstableIntervalTreeLatency != null) + sstableIntervalTreeLatency.addNano(Clock.Global.nanoTime() - treeBuildStart); + return new View(view.liveMemtables, view.flushingMemtables, sstableMap, view.compactingMap, sstableIntervalTree); + } + }; + } + // called prior to initiating flush: add newMemtable to liveMemtables, making it the latest memtable static Function switchMemtable(final Memtable newMemtable) { @@ -367,7 +384,7 @@ public View apply(View view) Map sstableMap = replace(view.sstablesMap, emptySet(), flushed); long treeBuildStart = Clock.Global.nanoTime(); - SSTableIntervalTree sstableIntervalTree = SSTableIntervalTree.update(view.intervalTree, null, flushed); + SSTableIntervalTree sstableIntervalTree = SSTableIntervalTree.addSSTables(view.intervalTree, flushed); if (sstableIntervalTreeLatency != null) sstableIntervalTreeLatency.addNano(Clock.Global.nanoTime() - treeBuildStart); return new View(view.liveMemtables, flushingMemtables, sstableMap, view.compactingMap, sstableIntervalTree); diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java index 1055070cd8d9..52986d73dc26 100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java @@ -1907,7 +1907,7 @@ public UniqueIdentifier instanceId() @Override public int compareTo(SSTableReader other) { - // Used in IntervalTree with the expecation that compareTo uniquely identifies an SSTableReader + // Used in IntervalTree with the expectation that compareTo uniquely identifies an SSTableReader // Use accessor for instanceId for mocks return instanceId().compareTo(other.instanceId()); } diff --git a/src/java/org/apache/cassandra/utils/IntervalTree.java b/src/java/org/apache/cassandra/utils/IntervalTree.java index 8677ba9f01dc..03848a414736 100644 --- a/src/java/org/apache/cassandra/utils/IntervalTree.java +++ b/src/java/org/apache/cassandra/utils/IntervalTree.java @@ -17,12 +17,11 @@ */ package org.apache.cassandra.utils; -import java.util.ArrayDeque; +import java.util.AbstractCollection; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Deque; import java.util.Iterator; import java.util.List; import java.util.function.BiPredicate; @@ -30,6 +29,7 @@ import java.util.function.Predicate; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import javax.annotation.Nullable; import com.google.common.base.Joiner; import com.google.common.collect.Iterables; @@ -39,26 +39,45 @@ import org.apache.cassandra.utils.AsymmetricOrdering.Op; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_INTERVAL_TREE_EXPENSIVE_CHECKS; public class IntervalTree, D extends Comparable, I extends Interval> implements Iterable { public static final boolean EXPENSIVE_CHECKS = TEST_INTERVAL_TREE_EXPENSIVE_CHECKS.getBoolean(); + private static final int REBUILD_AT_MOD_COUNT = 20; private static final Logger logger = LoggerFactory.getLogger(IntervalTree.class); + @SuppressWarnings("rawtypes") public static final Interval[] EMPTY_ARRAY = new Interval[0]; - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked", "rawtypes"}) private static final IntervalTree EMPTY_TREE = new IntervalTree(null); private final IntervalNode head; + + /** + * Add can potentially unbalance the interval tree each time so force a rebuild after a certain number + * of adds to bound how unbalanced the worst path in the tree can become. + * + * In practice it's likely the tree will have been rebuilt anyways long before it hits mod count, but it's not + * good to leave it unbounded. + * + * Napkin math is a 100k interval tree is a large tree and lg2(100k) is 16 (lg2(1million) is 20) so by bounding it at 20 then + * the worst possible imbalance is a bit more than double a balanced tree. + */ + protected final int modCount; + private final I[] intervalsByMinOrder; private final I[] intervalsByMaxOrder; + @SuppressWarnings("unchecked") protected IntervalTree(Collection intervals) { + this.modCount = 0; if (intervals == null || intervals.isEmpty()) { this.head = null; @@ -77,14 +96,25 @@ else if (intervals.size() == 1) Arrays.sort(intervalsByMaxOrder, Interval.maxOrdering()); this.head = new IntervalNode(Arrays.asList(intervalsByMinOrder), Arrays.asList(intervalsByMaxOrder)); } + if (EXPENSIVE_CHECKS) + { + if (intervalsByMinOrder.length > 1) + for (int i = 1; i < intervalsByMinOrder.length; i++) + checkState(Interval.minOrdering().compare(intervalsByMinOrder[i - 1], intervalsByMinOrder[i]) <= 0, "%s and %s out of order", intervalsByMinOrder[i-1], intervalsByMinOrder[i]); + if (intervalsByMaxOrder.length > 1) + for (int i = 1; i < intervalsByMaxOrder.length; i++) + checkState(Interval.maxOrdering().compare(intervalsByMaxOrder[i - 1], intervalsByMaxOrder[i]) <= 0, "%s and %s out of order", intervalsByMaxOrder[i-1], intervalsByMaxOrder[i]); + } } /** * This constructor will not modify minSortedIntervals and maxSortedIntervals, but it also won't * make defensive copies and will keep the originals. */ + @SuppressWarnings("unchecked") protected IntervalTree(I[] minSortedIntervals, I[] maxSortedIntervals) { + this.modCount = 0; if (minSortedIntervals == null || minSortedIntervals.length == 0) { this.head = null; @@ -102,11 +132,50 @@ else if (minSortedIntervals.length == 1) intervalsByMaxOrder = maxSortedIntervals; this.head = new IntervalNode(Arrays.asList(minSortedIntervals), Arrays.asList(maxSortedIntervals)); } + + if (EXPENSIVE_CHECKS) + { + if (intervalsByMinOrder.length > 1) + for (int i = 1; i < intervalsByMinOrder.length; i++) + checkState(Interval.minOrdering().compare(intervalsByMinOrder[i - 1], intervalsByMinOrder[i]) < 0, "%s and %s out of order", intervalsByMinOrder[i-1], intervalsByMinOrder[i]); + if (intervalsByMaxOrder.length > 1) + for (int i = 1; i < intervalsByMaxOrder.length; i++) + checkState(Interval.maxOrdering().compare(intervalsByMaxOrder[i - 1], intervalsByMaxOrder[i]) < 0, "%s and %s out of order", intervalsByMaxOrder[i-1], intervalsByMaxOrder[i]); + } + } + + protected IntervalTree(IntervalNode head, int modCount, I[] minSortedIntervals, I[] maxSortedIntervals) + { + checkNotNull(minSortedIntervals, "minSortedIntervals is null"); + checkNotNull(maxSortedIntervals, "maxSortedIntervals is null"); + this.head = head; + this.modCount = modCount; + this.intervalsByMinOrder = minSortedIntervals; + this.intervalsByMaxOrder = maxSortedIntervals; + if (EXPENSIVE_CHECKS) + { + if (intervalsByMinOrder.length > 1) + for (int i = 1; i < intervalsByMinOrder.length; i++) + checkState(Interval.minOrdering().compare(intervalsByMinOrder[i - 1], intervalsByMinOrder[i]) < 0, "%s and %s out of order", intervalsByMinOrder[i-1], intervalsByMinOrder[i]); + if (intervalsByMaxOrder.length > 1) + for (int i = 1; i < intervalsByMaxOrder.length; i++) + checkState(Interval.maxOrdering().compare(intervalsByMaxOrder[i - 1], intervalsByMaxOrder[i]) < 0, "%s and %s out of order", intervalsByMaxOrder[i-1], intervalsByMaxOrder[i]); + } + } + + protected IntervalTree create(IntervalNode head, int modCount, @Nullable I[] minSortedIntervals, @Nullable I[] maxSortedIntervals) + { + return new IntervalTree<>(head, modCount, minSortedIntervals, maxSortedIntervals); } protected IntervalTree create(I[] minOrder, I[] maxOrder) { - return new IntervalTree(minOrder, maxOrder); + return new IntervalTree<>(minOrder, maxOrder); + } + + protected IntervalTree create(Collection intervals) + { + return new IntervalTree<>(intervals); } public static , D extends Comparable, I extends Interval> IntervalTree build(Collection intervals) @@ -165,91 +234,47 @@ public List> matches(Interval searchInterval) return Collections.emptyList(); List> results = new ArrayList<>(); - head.searchInternal(searchInterval, i -> results.add(i)); + head.searchInternal(searchInterval, results::add); return results; } public List> matches(C point) { - return matches(Interval.create(point, point, null)); + return matches(Interval.create(point, point, null)); } public List search(Interval searchInterval) { if (head == null) - return Collections.emptyList(); + return Collections.emptyList(); - List results = new ArrayList(); + List results = new ArrayList<>(); head.searchInternal(searchInterval, i -> results.add(i.data)); return results; } public List search(C point) { - return search(Interval.create(point, point, null)); + return search(Interval.create(point, point, null)); } - public List search(C start, C end) - { - return search(Interval.create(start, end, null)); - } - - /** - * The input arrays aren't defensively copied and will be sorted. The update method doesn't allow duplicates or elements to be removed - * to be missing and this differs from the constructor which does not duplicate checking at all. - * - * It made more sense for update to be stricter because it is tracking removals and additions explicitly instead of building - * a list from scratch and in the targeted use case of a list of SSTables there are no duplicates. At a given point in time - * an sstable represents exactly one interval (although it may switch via removal and addition as in early open). - */ - public IntervalTree update(I[] removals, I[] additions) + @SuppressWarnings("unchecked") + private I[] buildUpdatedArrayForUpdate(I[] existingSorted, + I[] removalsSorted, + I[] additionsSorted, + AsymmetricOrdering, C> cmp) { - if (removals == null) - removals = (I[])EMPTY_ARRAY; - if (additions == null) - additions = (I[])EMPTY_ARRAY; - - if (removals.length == 0 && additions.length == 0) + if (EXPENSIVE_CHECKS) { - return this; + if (existingSorted.length > 1) + for (int i = 1; i < existingSorted.length; i++) + checkState(cmp.compare(existingSorted[i - 1], existingSorted[i]) < 0, "%s and %s out of order", existingSorted[i-1], existingSorted[i]); } - Arrays.sort(removals, Interval.minOrdering()); - Arrays.sort(additions, Interval.minOrdering()); - - for (int i = 1; i < additions.length; i++) - checkState( Interval.minOrdering().compare(additions[i], additions[i-1]) != 0, "Duplicate interval in additions %s", additions[i]); - - I[] newByMin = buildUpdatedArray( - intervalsByMinOrder, - removals, - additions, - Interval.minOrdering() - ); - - Arrays.sort(removals, Interval.maxOrdering()); - Arrays.sort(additions, Interval.maxOrdering()); - - I[] newByMax = buildUpdatedArray( - intervalsByMaxOrder, - removals, - additions, - Interval.maxOrdering() - ); - - return create(newByMin, newByMax); - } - - @SuppressWarnings("unchecked") - private I[] buildUpdatedArray(I[] existingSorted, - I[] removalsSorted, - I[] additionsSorted, - AsymmetricOrdering, C> cmp) - { int finalSize = existingSorted.length + additionsSorted.length - removalsSorted.length; I[] result = (I[]) new Interval[finalSize]; - int existingIndex = 0; + int existingIndex = 0; int removalsIndex = 0; int additionsIndex = 0; int resultIndex = 0; @@ -268,6 +293,7 @@ private I[] buildUpdatedArray(I[] existingSorted, } else { + checkState(removalsSorted[removalsIndex].data == currentExisting.data, "Comparator does not implement identity"); existingIndex++; removalsIndex++; @@ -277,7 +303,7 @@ private I[] buildUpdatedArray(I[] existingSorted, } } - if (existingIndex >= existingSorted.length ) + if (existingIndex >= existingSorted.length) break; while (additionsIndex < additionsSorted.length) @@ -311,12 +337,141 @@ else if (additionCmp < 0) return result; } + /** + * The input arrays aren't defensively copied and will be sorted. This update method doesn't allow duplicates or elements to be removed + * to be missing and this differs from creating the tree from scratch using {@link #build(Collection) build(Collection<I>)} method which allows duplicates. + * + * There is also the requirement that D will implement Comparable<D> and that comparator will implement identity + * which is not part of the normal contract of Comparable<D>. That means that if a.compareTo(b) == 0 then a == b; + * + * It made more sense for update to be stricter because it is tracking removals and additions explicitly instead of building + * a list from scratch and in the targeted use case of a list of SSTables there are no duplicates. At a given point in time + * an sstable represents exactly one interval (although it may switch via removal and addition as in early open). + */ + @SuppressWarnings("unchecked") + public IntervalTree update(I[] removals, I[] additions) + { + if ((removals == null || removals.length == 0) && (additions == null || additions.length == 0)) + return this; + + if (removals == null) + removals = (I[])EMPTY_ARRAY; + if (additions == null) + additions = (I[])EMPTY_ARRAY; + + Arrays.sort(removals, Interval.minOrdering()); + Arrays.sort(additions, Interval.minOrdering()); + + if (EXPENSIVE_CHECKS) + { + for (int i = 1; i < additions.length; i++) + checkState(Interval.minOrdering().compare(additions[i], additions[i - 1]) != 0, "Duplicate interval in additions %s", additions[i]); + } + + I[] newByMin = buildUpdatedArrayForUpdate( + intervalsByMinOrder, + removals, + additions, + Interval.minOrdering() + ); + + Arrays.sort(removals, Interval.maxOrdering()); + Arrays.sort(additions, Interval.maxOrdering()); + + I[] newByMax = buildUpdatedArrayForUpdate( + intervalsByMaxOrder, + removals, + additions, + Interval.maxOrdering() + ); + + return create(newByMin, newByMax); + } + + /** + * The in practice use case here is flush which only adds one interval so do binary search + */ + @SuppressWarnings("unchecked") + private I[] buildUpdatedArrayForAdd(I[] addIntervals, I[] existingIntervals, AsymmetricOrdering, C> ordering) + { + int newSize = existingIntervals.length + addIntervals.length; + Arrays.sort(addIntervals, ordering); + I[] newIntervals = (I[])new Interval[newSize]; + int newIndex = 0; + int existingIndex = 0; + + int i = 0; + for (; i < addIntervals.length; i++) + { + if (existingIndex >= existingIntervals.length) + break; + I addInterval = addIntervals[i]; + int insertionPoint = Arrays.binarySearch(existingIntervals, addInterval, ordering); + checkState(insertionPoint < 0, "Interval being added should not already be present"); + insertionPoint = -1 - insertionPoint; + if (insertionPoint > existingIndex) + { + int toCopy = insertionPoint - existingIndex; + System.arraycopy(existingIntervals, existingIndex, newIntervals, newIndex, toCopy); + newIndex += toCopy; + existingIndex += toCopy; + } + newIntervals[newIndex++] = addInterval; + } + + if (i < addIntervals.length) + System.arraycopy(addIntervals, i, newIntervals, newIndex, addIntervals.length - i); + + if (existingIndex < existingIntervals.length) + System.arraycopy(existingIntervals, existingIndex, newIntervals, newIndex, existingIntervals.length - existingIndex); + + return newIntervals; + } + + public IntervalTree add(I[] intervals) + { + if (head == null) + return create(Arrays.asList(intervals)); + if (intervals.length == 0) + return this; + if (modCount + 1 >= REBUILD_AT_MOD_COUNT) + { + return create(new AbstractCollection<>() + { + @Override + public Iterator iterator() + { + return Iterators.concat(IntervalTree.this.iterator(), Iterators.forArray(intervals)); + } + + @Override + public int size() + { + return intervalsByMinOrder.length + intervals.length; + } + }); + } + + // Add does not preserve iteration order, not even by interval bounds, so it's necessary to compute the arrays that preserve the minOrder + // Or pay to sort and build them later + I[] sortableIntervals = Arrays.copyOf(intervals, intervals.length); + I[] newIntervalsByMinOrder = buildUpdatedArrayForAdd(sortableIntervals, + intervalsByMinOrder, + Interval.minOrdering()); + I[] newIntervalsByMaxOrder = buildUpdatedArrayForAdd(sortableIntervals, + intervalsByMaxOrder, + Interval.maxOrdering()); + + return create(head.add(Arrays.asList(intervals)), modCount + 1, newIntervalsByMinOrder, newIntervalsByMaxOrder); + } + + @Override public Iterator iterator() { if (head == null) return Collections.emptyIterator(); - return new TreeIterator(head); + return Iterators.forArray(intervalsByMinOrder); } public Stream stream() @@ -327,15 +482,16 @@ public Stream stream() @Override public String toString() { - return "<" + Joiner.on(", ").join(Iterables.limit(this, 100)) + ">"; + return '<' + Joiner.on(", ").join(Iterables.limit(this, 100)) + '>'; } + @SuppressWarnings("unchecked") @Override public boolean equals(Object o) { if(!(o instanceof IntervalTree)) return false; - IntervalTree that = (IntervalTree)o; + IntervalTree that = (IntervalTree)o; return Iterators.elementsEqual(iterator(), that.iterator()); } @@ -348,6 +504,116 @@ public final int hashCode() return result; } + private I[] buildUpdatedArrayForReplace(I[] existingSorted, + List> replacements, + AsymmetricOrdering, C> cmp) + { + I[] replacementArray = Arrays.copyOf(existingSorted, existingSorted.length); + for (Pair replacement : replacements) + { + I existingInterval = replacement.left; + I newInterval = replacement.right; + + int removalIdx = Arrays.binarySearch(replacementArray, existingInterval, cmp); + if (removalIdx < 0) + throw new IllegalStateException("Interval to replace not found in the existing tree: " + existingInterval); + checkState(existingInterval.data == replacementArray[removalIdx].data, "Comparator does not implement identity"); + + int insertionIdx = Arrays.binarySearch(replacementArray, newInterval, cmp); + checkState(insertionIdx < 0, "Value to be inserted already exists"); + insertionIdx = -1 - insertionIdx; + + if (insertionIdx > removalIdx) + { + // Shift everything from insertionIdx and left down to removalIdx + System.arraycopy(replacementArray, removalIdx + 1, replacementArray, removalIdx, insertionIdx - removalIdx - 1); + replacementArray[insertionIdx - 1] = newInterval; + } + else if (insertionIdx < removalIdx) + { + // Shift everything from insertionIdx and onward right to removalIdx + System.arraycopy(replacementArray, insertionIdx, replacementArray, insertionIdx + 1, removalIdx - insertionIdx); + replacementArray[Math.min(replacementArray.length, insertionIdx)] = newInterval; + } + else + { + replacementArray[insertionIdx] = newInterval; + } + } + + if (EXPENSIVE_CHECKS) + { + if (replacementArray.length > 1) + for (int i = 1; i < replacementArray.length; i++) + checkState(cmp.compare(replacementArray[i - 1], replacementArray[i]) < 0, "%s and %s out of order", replacementArray[i-1], replacementArray[i]); + } + + return replacementArray; + } + + /** + * This replace method doesn't work correctly with duplicates. If the tree already has duplicates each replacement (or duplicate replacement) + * will replace one instance in the tree. + * + * There is also the requirement that D will implement Comparable<D> and that comparator will implement identity + * which is not part of the normal contract of Comparable<D>. That means that if a.compareTo(b) == 0 then a == b; + */ + public IntervalTree replace(List> replacements) + { + if (head == null) + { + checkArgument(replacements.isEmpty(), "Can't replace intervals in an empty tree"); + return this; + } + + if (replacements.isEmpty()) + return this; + + List> sortableReplacements = new ArrayList<>(replacements); + I[] newIntervalsByMinOrder = buildUpdatedArrayForReplace(intervalsByMinOrder, sortableReplacements, Interval.minOrdering()); + I[] newIntervalsByMaxOrder = buildUpdatedArrayForReplace(intervalsByMaxOrder, sortableReplacements, Interval.maxOrdering()); + + checkState(newIntervalsByMinOrder.length == newIntervalsByMaxOrder.length); + if (EXPENSIVE_CHECKS) + { + boolean[] foundMinOrderReplacement = new boolean[replacements.size()]; + boolean[] foundMaxOrderReplacement = new boolean[replacements.size()]; + for (int i = 0; i < newIntervalsByMinOrder.length; i++) + { + for (int j = 0; j < replacements.size(); j++) + { + Pair replacement = replacements.get(j); + if (newIntervalsByMinOrder[i].min.equals(replacement.left.min) && newIntervalsByMinOrder[i].max.equals(replacement.right.max)) + { + checkState(newIntervalsByMinOrder[i].data != replacement.left.data); + if (newIntervalsByMinOrder[i].data == replacement.right.data) + { + checkState(!foundMinOrderReplacement[j], "Replacement value appears more than once"); + foundMinOrderReplacement[j] = true; + } + } + + if (newIntervalsByMaxOrder[i].min.equals(replacement.left.min) && newIntervalsByMaxOrder[i].max.equals(replacement.right.max)) + { + checkState(newIntervalsByMaxOrder[i].data != replacement.left.data); + if (newIntervalsByMaxOrder[i].data == replacement.right.data) + { + checkState(!foundMaxOrderReplacement[j], "Replacement value appears more than once"); + foundMaxOrderReplacement[j] = true; + } + } + } + } + for (int i = 0; i < foundMaxOrderReplacement.length; i++) + checkState(foundMinOrderReplacement[i] && foundMaxOrderReplacement[i], "Didn't find replacement value that should be present"); + } + + return create(head.replace(head, replacements), + modCount, + newIntervalsByMinOrder, + newIntervalsByMaxOrder); + } + protected class IntervalNode { final C center; @@ -415,7 +681,7 @@ public IntervalNode(List minOrder, List maxOrder) if (EXPENSIVE_CHECKS) { - List allEndpoints = new ArrayList(minOrder.size() * 2); + List allEndpoints = new ArrayList<>(minOrder.size() * 2); for (I interval : minOrder) { allEndpoints.add(interval.min); @@ -429,12 +695,12 @@ public IntervalNode(List minOrder, List maxOrder) // Separate interval in intersecting center, left of center and right of center int initialIntersectionSize = i - j + 1; - intersectsLeft = new ArrayList(initialIntersectionSize); - intersectsRight = new ArrayList(initialIntersectionSize); + intersectsLeft = new ArrayList<>(initialIntersectionSize); + intersectsRight = new ArrayList<>(initialIntersectionSize); int initialChildSize = Math.min(i, j); - List leftSegmentMinOrder = new ArrayList(initialChildSize); + List leftSegmentMinOrder = new ArrayList<>(initialChildSize); List leftSegmentMaxOrder = new ArrayList<>(initialChildSize); - List rightSegmentMinOrder = new ArrayList(initialChildSize); + List rightSegmentMinOrder = new ArrayList<>(initialChildSize); List rightSegmentMaxOrder = new ArrayList<>(initialChildSize); for (I candidate : minOrder) @@ -462,12 +728,22 @@ else if (candidate.min.compareTo(center) > 0) assert (intersectsLeft.size() == intersectsRight.size()); assert (intersectsLeft.size() + leftSegmentMinOrder.size() + rightSegmentMinOrder.size()) == minOrder.size() : - "intersects (" + String.valueOf(intersectsLeft.size()) + - ") + leftSegment (" + String.valueOf(leftSegmentMinOrder.size()) + - ") + rightSegment (" + String.valueOf(rightSegmentMinOrder.size()) + - ") != toBisect (" + String.valueOf(minOrder.size()) + ")"; + "intersects (" + intersectsLeft.size() + + ") + leftSegment (" + leftSegmentMinOrder.size() + + ") + rightSegment (" + rightSegmentMinOrder.size() + + ") != toBisect (" + minOrder.size() + ')'; } + public IntervalNode(C center, C low, C high, List intersectsLeft, List intersectsRight, IntervalNode left, IntervalNode right) + { + this.center = center; + this.low = low; + this.high = high; + this.intersectsLeft = intersectsLeft; + this.intersectsRight = intersectsRight; + this.left = left; + this.right = right; + } void searchInternal(Interval searchInterval, Consumer> results) { @@ -508,46 +784,145 @@ else if (center.compareTo(searchInterval.max) > 0) right.searchInternal(searchInterval, results); } } - } - private class TreeIterator extends AbstractIterator - { - private final Deque stack = new ArrayDeque(); - private Iterator current; - TreeIterator(IntervalNode node) + private IntervalNode replace(IntervalNode node, List> replacements) { - super(); - gotoMinOf(node); - } + if (node == null || replacements.isEmpty()) + return node; - protected I computeNext() - { - while (true) - { - if (current != null && current.hasNext()) - return current.next(); + List> leftSegment = new ArrayList<>(); + List> rightSegment = new ArrayList<>(); + List newIntersectsLeft = null; + List newIntersectsRight = null; + int updated = 0; - IntervalNode node = stack.pollFirst(); - if (node == null) - return endOfData(); + for (Pair entry : replacements) + { + I intervalToRemove = entry.left; + I intervalToAdd = entry.right; + if (node.center.compareTo(intervalToRemove.min) < 0) + { + rightSegment.add(entry); + } + else if (node.center.compareTo(intervalToRemove.max) > 0) + { + leftSegment.add(entry); + } + else + { + // only init once if any interval resides in current node + if (newIntersectsLeft == null) + { + newIntersectsLeft = new ArrayList<>(node.intersectsLeft); + newIntersectsRight = new ArrayList<>(node.intersectsRight); + } + boolean leftUpdated = false; + boolean rightUpdated = false; + + int i = Interval.minOrdering().binarySearchAsymmetric(node.intersectsLeft, intervalToRemove.min, Op.CEIL); + while (i < node.intersectsLeft.size()) + { + if (node.intersectsLeft.get(i).equals(intervalToRemove)) + { + newIntersectsLeft.set(i, intervalToAdd); + leftUpdated = true; + break; + } + i++; + } + + int j = Interval.maxOrdering().binarySearchAsymmetric(node.intersectsRight, intervalToRemove.max, Op.CEIL); + while (j < node.intersectsRight.size()) + { + if (node.intersectsRight.get(j).equals(intervalToRemove)) + { + newIntersectsRight.set(j, intervalToAdd); + rightUpdated = true; + break; + } + j++; + } + assert leftUpdated && rightUpdated : "leftupdated = " + leftUpdated + ", rightupdated = " + rightUpdated; + updated++; + } + } - current = node.intersectsLeft.iterator(); + assert leftSegment.size() + rightSegment.size() + updated == replacements.size() : + "leftSegment size (" + leftSegment.size() + ") + rightSegment size (" + rightSegment.size() + + ") + updated (" + updated + ") != replacementMap size (" + replacements.size() + ')'; + return new IntervalNode(node.center, + node.low, + node.high, + newIntersectsLeft != null ? newIntersectsLeft : node.intersectsLeft, + newIntersectsRight != null ? newIntersectsRight : node.intersectsRight, + replace(node.left, leftSegment), + replace(node.right, rightSegment)); + } - // We know this is the smaller not returned yet, but before doing - // its parent, we must do everyone on it's right. - gotoMinOf(node.right); - } + private IntervalNode add(Collection intervals) + { + return add(this, intervals); } - private void gotoMinOf(IntervalNode node) + private IntervalNode add(IntervalNode root, Collection intervals) { - while (node != null) + if (intervals.isEmpty()) + return root; + + if (root == null) + { + List minSortedIntervals = new ArrayList<>(intervals); + Collections.sort(minSortedIntervals, Interval.minOrdering()); + List maxSortedIntervals = new ArrayList<>(intervals); + Collections.sort(maxSortedIntervals, Interval.maxOrdering()); + return new IntervalNode(minSortedIntervals, maxSortedIntervals); + } + + List leftSegment = new ArrayList<>(); + List rightSegment = new ArrayList<>(); + C newLow = root.low; + C newHigh = root.high; + List newIntersectsLeft = null; + List newIntersectsRight = null; + for (I i : intervals) { - stack.offerFirst(node); - node = node.left; + newLow = newLow.compareTo(i.min) < 0 ? newLow : i.min; + newHigh = newHigh.compareTo(i.max) > 0 ? newHigh : i.max; + if (i.max.compareTo(root.center) < 0) + { + leftSegment.add(i); + } + else if (i.min.compareTo(root.center) > 0) + { + rightSegment.add(i); + } + else + { + if (newIntersectsLeft == null) + { + newIntersectsLeft = new ArrayList<>(root.intersectsLeft); + newIntersectsRight = new ArrayList<>(root.intersectsRight); + } + int leftIdx = Collections.binarySearch(newIntersectsLeft, i, Interval.minOrdering()); + checkState(leftIdx < 0, "Should not add the same interval twice"); + leftIdx = -1 - leftIdx; + newIntersectsLeft.add(leftIdx, i); + + int rightIdx = Collections.binarySearch(newIntersectsRight, i, Interval.maxOrdering()); + checkState(rightIdx < 0, "Should not add the same interval twice"); + rightIdx = -1 - rightIdx; + newIntersectsRight.add(rightIdx, i); + } } + return new IntervalNode(root.center, + newLow, + newHigh, + newIntersectsLeft != null ? newIntersectsLeft : root.intersectsLeft, + newIntersectsRight != null ? newIntersectsRight : root.intersectsRight, + add(root.left, leftSegment), + add(root.right, rightSegment)); } } diff --git a/test/unit/org/apache/cassandra/utils/IntervalTreeTest.java b/test/unit/org/apache/cassandra/utils/IntervalTreeTest.java index 14e70c0b001e..2a8cd2f76f01 100644 --- a/test/unit/org/apache/cassandra/utils/IntervalTreeTest.java +++ b/test/unit/org/apache/cassandra/utils/IntervalTreeTest.java @@ -28,15 +28,17 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.cassandra.config.DatabaseDescriptor; import org.quicktheories.WithQuickTheories; import org.quicktheories.core.Gen; import org.quicktheories.generators.SourceDSL; +import static com.google.common.base.Predicates.not; import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.function.Predicate.not; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_INTERVAL_TREE_EXPENSIVE_CHECKS; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -46,15 +48,26 @@ public class IntervalTreeTest implements WithQuickTheories { + static final int TESTING_SECONDS = 15; + + private final AtomicInteger id = new AtomicInteger(); + @BeforeClass - public static void enableExpensiveRangeChecks() + public static void beforeClass() { assertFalse(TEST_INTERVAL_TREE_EXPENSIVE_CHECKS.getBoolean()); // Expect off by default + DatabaseDescriptor.daemonInitialization(); TEST_INTERVAL_TREE_EXPENSIVE_CHECKS.setBoolean(true); assertTrue(TEST_INTERVAL_TREE_EXPENSIVE_CHECKS.getBoolean()); assertTrue(IntervalTree.EXPENSIVE_CHECKS); } + @Before + public void setUp() + { + id.set(0); + } + @Test public void testSearch() { @@ -314,17 +327,20 @@ public void testPointSearchEquivalence() resultPoint, resultInterval); } + private String intervalData(int lo, int hi) + { + return "(" + lo + "," + hi + "," + id.getAndIncrement() + ")"; + } + private Gen> intervalGen() { - AtomicInteger id = new AtomicInteger(); return SourceDSL.integers().between(-5, 5) .flatMap(start -> SourceDSL.integers().between(-5, 5) .map(end -> { int lo = Math.min(start, end); int hi = Math.max(start, end); - String data = "(" + lo + "," + hi + "," + id.getAndIncrement() + ")"; - return Interval.create(lo, hi, data); + return Interval.create(lo, hi, intervalData(lo, hi)); })); } @@ -369,7 +385,7 @@ private List search(Collection> intervals, Interval< @Test public void qtIntervalTreeTest() { - qt().forAll(intervalsListGen(), queryGen()) + qt().withExamples(-1).withTestingTime(TESTING_SECONDS, SECONDS).forAll(intervalsListGen(), queryGen()) .check((intervals, query) -> { IntervalTree> tree = IntervalTree.build(intervals); @@ -403,12 +419,12 @@ public void qtIntervalTreeTest() } @Test - public void qtUpdateFunctionTest() + public void qtUpdateTest() { - qt().withExamples(-1).withTestingTime(30, SECONDS).forAll(intervalsListGen(), - intervalsListGen(), - SourceDSL.lists().of(queryGen()).ofSizeBetween(1, 4), - SourceDSL.integers().all()) + qt().withExamples(-1).withTestingTime(TESTING_SECONDS, SECONDS).forAll(intervalsListGen(), + intervalsListGen(), + SourceDSL.lists().of(queryGen()).ofSizeBetween(1, 4), + SourceDSL.integers().all()) .check((original, toAdd, queries, seed) -> { IntervalTree> originalTree = IntervalTree.build(original); @@ -423,27 +439,19 @@ public void qtUpdateFunctionTest() toAdd.removeAll(original.stream().filter(not(removals::contains)).collect(Collectors.toList())); - IntervalTree> updatedTree = originalTree.update(removals.toArray(new Interval[0]), toAdd.toArray(new Interval[0])); + Set> expectedFinal = new HashSet<>(original); + expectedFinal.removeAll(removals); + expectedFinal.addAll(toAdd); - Set> naiveFinal = new HashSet<>(original); - naiveFinal.removeAll(removals); - naiveFinal.addAll(toAdd); + IntervalTree> updatedTree = originalTree.update(removals.toArray(new Interval[0]), toAdd.toArray(new Interval[0])); Set> iteratedTree = ImmutableSet.copyOf(updatedTree); - if (!naiveFinal.equals(iteratedTree)) - originalTree.update(removals.toArray(new Interval[0]), toAdd.toArray(new Interval[0])); - assertEquals(naiveFinal, iteratedTree); + assertEquals(expectedFinal, iteratedTree); for (Interval query : queries) { Set actualResults = ImmutableSet.copyOf(updatedTree.search(query)); - Set expectedResults = ImmutableSet.copyOf(search(naiveFinal, query)); - - if (!expectedResults.equals(actualResults)) - { - originalTree.update(removals.toArray(new Interval[0]), toAdd.toArray(new Interval[0])); - updatedTree.search(query); - } + Set expectedResults = ImmutableSet.copyOf(search(expectedFinal, query)); assertEquals(expectedResults, actualResults); @@ -457,4 +465,143 @@ public void qtUpdateFunctionTest() return true; }); } -} \ No newline at end of file + + @Test + public void qtReplaceFunctionTest() + { + qt().withExamples(-1).withTestingTime(TESTING_SECONDS, SECONDS) + .forAll(intervalsListGen(), // Our random list of intervals + SourceDSL.lists().of(queryGen()).ofSizeBetween(1, 4), + SourceDSL.integers().all()) + .check((original, queries, seed) -> { + + IntervalTree> originalTree = IntervalTree.build(original); + java.util.Random rng = new java.util.Random(seed); + List> expectedFinal = new ArrayList<>(original); + + int numReplacements = rng.nextInt(original.size() + 1); + List> replacements = new ArrayList<>(original); + for (int i = 0; i < original.size() - numReplacements; i++) + replacements.remove(rng.nextInt(replacements.size())); + + List, Interval>> toReplace = new ArrayList<>(); + for (int i = 0; i < replacements.size(); i++) + { + Interval oldInterval = replacements.get(i); + + Interval newInterval = Interval.create( + oldInterval.min, + oldInterval.max, + intervalData(oldInterval.min, oldInterval.max) + ); + toReplace.add(Pair.create(oldInterval, newInterval)); + } + + for (Pair, Interval> entry : toReplace) + { + expectedFinal.remove(entry.left); + expectedFinal.add(entry.right); + } + + IntervalTree> replacedTree = originalTree.replace(toReplace); + + Set> iteratedReplaced = ImmutableSet.copyOf(replacedTree); + assertEquals("Iterated intervals should match expected set after replace", + ImmutableSet.copyOf(expectedFinal), iteratedReplaced); + + for (Interval query : queries) + { + List replacedResults = replacedTree.search(query); + List expectedResults = search(expectedFinal, query); + + Set replacedSet = new HashSet<>(replacedResults); + Set expectedSet = new HashSet<>(expectedResults); + assertEquals("Search results mismatch after replace for query " + query, + expectedSet, replacedSet); + + // Also check point-search if min==max + if (query.min.equals(query.max)) + { + List replacedPoint = replacedTree.search(query.min); + assertEquals("Point-search mismatch after replace for point " + query.min, + replacedSet, new HashSet<>(replacedPoint)); + } + } + + return true; + }); + } + + @Test + public void testAddIntervals() + { + List> intervals = new ArrayList<>(); + + intervals.add(Interval.create(-300, -200)); + intervals.add(Interval.create(-3, -2)); + intervals.add(Interval.create(1, 2)); + intervals.add(Interval.create(3, 6)); + intervals.add(Interval.create(2, 4)); + intervals.add(Interval.create(5, 7)); + intervals.add(Interval.create(4, 6)); + intervals.add(Interval.create(15, 20)); + intervals.add(Interval.create(49, 60)); + + + IntervalTree> it = IntervalTree.build(intervals); + + List> intervalsToAdd = new ArrayList<>(); + intervalsToAdd.add(Interval.create(1, 3)); + intervalsToAdd.add(Interval.create(8, 9)); + intervalsToAdd.add(Interval.create(40, 50)); + intervals.addAll(intervalsToAdd); + + it = it.add(intervalsToAdd.toArray(IntervalTree.EMPTY_ARRAY)); + + assertEquals(3, it.search(Interval.create(4, 4)).size()); + assertEquals(4, it.search(Interval.create(4, 5)).size()); + assertEquals(7, it.search(Interval.create(-1, 10)).size()); + assertEquals(0, it.search(Interval.create(-1, -1)).size()); + assertEquals(5, it.search(Interval.create(1, 4)).size()); + assertEquals(2, it.search(Interval.create(0, 1)).size()); + assertEquals(0, it.search(Interval.create(10, 12)).size()); + } + + @Test + public void qtAddTest() + { + qt().withExamples(-1).withTestingTime(TESTING_SECONDS, SECONDS) + .forAll(intervalsListGen(), queryGen()) + .check((intervals, query) -> { + Set> intervalsSet = ImmutableSet.copyOf(intervals); + IntervalTree> tree = IntervalTree.build(ImmutableList.of()); + List> allIntervals = new ArrayList<>(); + for (Interval interval : intervals) + { + allIntervals.add(interval); + tree = tree.add(new Interval[] {interval}); + } + + List expected = search(intervals, query); + List actual = tree.search(query); + + Set setExpected = new HashSet<>(expected); + Set setActual = new HashSet<>(actual); + + assertEquals(setExpected, setActual); + + if (query.min.equals(query.max)) + { + List actualPoint = tree.search(query.min); + assertEquals(setExpected, new HashSet<>(actualPoint)); + } + + List> sortedByMin = new ArrayList<>(intervals); + sortedByMin.sort(Interval.minOrdering()); + + Set> fromTree = ImmutableSet.copyOf(tree); + assertEquals(intervalsSet, fromTree); + return true; + }); + } +} From 3969de6524d58b87f3ec51213c8235accc00edf6 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Thu, 29 May 2025 22:47:03 +0100 Subject: [PATCH 339/340] Mark JDK 17 as production ready patch by Dmitry Konstantinov; reviewed by Michael Semb Wever, Stefan Miklosovic for CASSANDRA-20681 --- CHANGES.txt | 1 + NEWS.txt | 7 +++++++ doc/modules/cassandra/pages/reference/java17.adoc | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 50f6f851e8dc..01e0260626c1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0.5 + * Full Java 17 support (CASSANDRA-20681) * Ensure replica filtering protection does not trigger unnecessary short read protection reads (CASSANDRA-20639) * Unified Compaction does not properly validate min and target sizes (CASSANDRA-20398) * Avoid lambda usage in TrieMemoryIndex range queries and ensure queue size tracking is per column (CASSANDRA-20668) diff --git a/NEWS.txt b/NEWS.txt index efbc8d7ac86a..c6db55636ea1 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -65,6 +65,13 @@ restore snapshots created with the previous major version using the 'sstableloader' tool. You can upgrade the file format of your snapshots using the provided 'sstableupgrade' tool. +5.0.5 +===== + +New features +------------ + - Full support for Java 17, it is not experimental anymore. + 5.0.4 ===== diff --git a/doc/modules/cassandra/pages/reference/java17.adoc b/doc/modules/cassandra/pages/reference/java17.adoc index 1ec3aab36e1e..645711a66881 100644 --- a/doc/modules/cassandra/pages/reference/java17.adoc +++ b/doc/modules/cassandra/pages/reference/java17.adoc @@ -8,7 +8,7 @@ the vertical axis and the run version is along the horizontal axis. [width="68%",cols="34%,30%,36%",] |=== | | Java 11 (Run) | Java 17 (Run) -| Java 11 (Build) | Supported | Experimental Support +| Java 11 (Build) | Supported | Supported | Java 17(Build) | Not Supported | Experimental in CI |=== From a2199e105847c8cc646d1db4a726e7fcfbad7e53 Mon Sep 17 00:00:00 2001 From: maoling Date: Mon, 31 Mar 2025 23:39:00 +0800 Subject: [PATCH 340/340] Add additional metrics around hints patch by Ling Mao; reviewed by Stefan Miklosovic, Maxim Muzafarov for CASSANDRA-20499 Co-authored-by: Stefan Miklosovic --- .../cassandra/hints/HintVerbHandler.java | 12 +++++- .../apache/cassandra/hints/HintsReader.java | 16 ++++++-- .../apache/cassandra/hints/HintsService.java | 11 ++++++ .../metrics/CassandraMetricsRegistry.java | 5 +++ .../metrics/HintsServiceMetrics.java | 22 +++++++++++ .../test/metrics/HintsServiceMetricsTest.java | 39 +++++++++++++++++++ 6 files changed, 100 insertions(+), 5 deletions(-) diff --git a/src/java/org/apache/cassandra/hints/HintVerbHandler.java b/src/java/org/apache/cassandra/hints/HintVerbHandler.java index c91219d0d5d2..be164b9be1ca 100644 --- a/src/java/org/apache/cassandra/hints/HintVerbHandler.java +++ b/src/java/org/apache/cassandra/hints/HintVerbHandler.java @@ -26,6 +26,7 @@ import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.exceptions.RetryOnDifferentSystemException; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.metrics.HintsServiceMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessagingService; @@ -106,7 +107,16 @@ else if (!StorageProxy.instance.appliesLocally(hint.mutation)) try { // the common path - the node is both the destination and a valid replica for the hint. - hint.applyFuture().addCallback(o -> respond(message), e -> logger.debug("Failed to apply hint", e)); + hint.applyFuture().addCallback( + o -> { + HintsServiceMetrics.hintsApplySucceeded.mark(); + respond(message); + }, + e -> { + HintsServiceMetrics.hintsApplyFailed.mark(); + logger.debug("Failed to apply hint", e); + } + ); } catch (RetryOnDifferentSystemException e) { diff --git a/src/java/org/apache/cassandra/hints/HintsReader.java b/src/java/org/apache/cassandra/hints/HintsReader.java index 2738f023f734..117e1ccdd805 100644 --- a/src/java/org/apache/cassandra/hints/HintsReader.java +++ b/src/java/org/apache/cassandra/hints/HintsReader.java @@ -34,6 +34,7 @@ import org.apache.cassandra.exceptions.UnknownTableException; import org.apache.cassandra.io.FSReadError; +import org.apache.cassandra.metrics.HintsServiceMetrics; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.AbstractIterator; @@ -231,8 +232,7 @@ private Hint computeNextInternal() throws IOException private Hint readHint(int size) throws IOException { - if (rateLimiter != null) - rateLimiter.acquire(size); + applyThrottleRateLimit(size); input.limit(size); Hint hint; @@ -338,8 +338,7 @@ private ByteBuffer computeNextInternal() throws IOException private ByteBuffer readBuffer(int size) throws IOException { - if (rateLimiter != null) - rateLimiter.acquire(size); + applyThrottleRateLimit(size); input.limit(size); ByteBuffer buffer = Hint.serializer.readBufferIfLive(input, now, size, descriptor.messagingVersion()); @@ -364,4 +363,13 @@ private static boolean verifyAllZeros(ChecksummedDataInput input) throws IOExcep } return true; } + + private void applyThrottleRateLimit(int size) + { + if (rateLimiter != null) + { + rateLimiter.acquire(size); + HintsServiceMetrics.hintsThrottle.inc(size); + } + } } diff --git a/src/java/org/apache/cassandra/hints/HintsService.java b/src/java/org/apache/cassandra/hints/HintsService.java index e3ae1907d7ca..0372c40e6418 100644 --- a/src/java/org/apache/cassandra/hints/HintsService.java +++ b/src/java/org/apache/cassandra/hints/HintsService.java @@ -292,6 +292,17 @@ public long getTotalHintsSize(UUID hostId) return store.getTotalFileSize(); } + /** + * Get the total hints file size of current node + */ + public long getTotalHintsSizeOfNode() + { + return catalog.stores() + .filter(Objects::nonNull) + .mapToLong(HintsStore::getTotalFileSize) + .sum(); + } + /** * Gracefully and blockingly shut down the service. * diff --git a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java index 58540f903fde..44bcf2d6cf87 100644 --- a/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java +++ b/src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java @@ -328,6 +328,11 @@ public Histogram histogram(MetricName name, MetricName alias, boolean considerZe return histogram; } + public > T gauge(MetricName name, T gauge) + { + return register(name, gauge); + } + public > T gauge(MetricName name, MetricName alias, T gauge) { T gaugeLoc = register(name, gauge); diff --git a/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java b/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java index c3203e74f408..275aeccfa646 100644 --- a/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java +++ b/src/java/org/apache/cassandra/metrics/HintsServiceMetrics.java @@ -17,16 +17,20 @@ */ package org.apache.cassandra.metrics; +import java.io.Serializable; import java.net.UnknownHostException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; import org.apache.cassandra.concurrent.ImmediateExecutor; +import org.apache.cassandra.hints.HintsService; import org.apache.cassandra.locator.InetAddressAndPort; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; @@ -62,6 +66,12 @@ public final class HintsServiceMetrics public static final Meter hintsTimedOut = Metrics.meter(factory.createMetricName("HintsTimedOut")); public static final Meter hintsRetryDifferentSystem = Metrics.meter(factory.createMetricName("HintsRetryDifferentSystem")); + public static final Gauge hintsFileSize = Metrics.gauge(factory.createMetricName("HintsFileSize"), new TotalHintsSizeGauge()); + // Corresponding to the hinted_handoff_throttle_in_kb configuration + public static final Counter hintsThrottle = Metrics.counter(factory.createMetricName("HintsThrottle")); + + public static final Meter hintsApplySucceeded = Metrics.meter(factory.createMetricName("HintsApplySucceeded")); + public static final Meter hintsApplyFailed = Metrics.meter(factory.createMetricName("HintsApplyFailed")); /** Histogram of all hint delivery delays */ private static final Histogram globalDelayHistogram = Metrics.histogram(factory.createMetricName("Hint_delays"), false); @@ -71,6 +81,18 @@ public final class HintsServiceMetrics .executor(ImmediateExecutor.INSTANCE) .build(address -> Metrics.histogram(factory.createMetricName("Hint_delays-"+address.toString().replace(':', '.')), false)); + // because at the time of static hintsFileSize being initialized, + // HintsService.instance is null / is not initialized yet so usage of method reference is not possible, + // so this is the workaround. + private static class TotalHintsSizeGauge implements Gauge, Serializable + { + @Override + public Long getValue() + { + return HintsService.instance.getTotalHintsSizeOfNode(); + } + } + public static void updateDelayMetrics(InetAddressAndPort endpoint, long delay) { if (delay <= 0) diff --git a/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java b/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java index d834919cb843..42b274e628a8 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/metrics/HintsServiceMetricsTest.java @@ -108,6 +108,8 @@ public void testHintsServiceMetrics() throws Exception dropWritesForNode2.set(true); for (int i = 0; i < NUM_ROWS / 2; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i); + // some hints have created for node1, so file size must be greater than 0 + waitUntilAsserted(() -> assertThat(countHintsFileSize(node1)).isGreaterThan(0)); dropWritesForNode2.set(false); // write the second half of the rows with the third node dropping mutations requests, @@ -115,8 +117,15 @@ public void testHintsServiceMetrics() throws Exception dropWritesForNode3.set(true); for (int i = NUM_ROWS / 2; i < NUM_ROWS; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i); + // another hints have created for node1, so file size must be greater than 0 + waitUntilAsserted(() -> assertThat(countHintsFileSize(node1)).isGreaterThan(0)); dropWritesForNode3.set(false); + // Hints Throttle happens in the delivery process, so must be greater than 0 + waitUntilAsserted(() -> assertThat(countHintsThrottle(node1)).isGreaterThan(0)); + waitUntilAsserted(() -> assertThat(countHintsApplySucceeded(node1)).isEqualTo(0)); + waitUntilAsserted(() -> assertThat(countHintsApplyFailed(node1)).isEqualTo(0)); + // wait until all the hints have been successfully applied to the nodes that have been dropping mutations waitUntilAsserted(() -> assertThat(countRows(node2)).isEqualTo(countRows(node3)).isEqualTo(NUM_ROWS)); @@ -143,6 +152,13 @@ public void testHintsServiceMetrics() throws Exception assertThat(countHintsFailed(node)).isEqualTo(0); assertThat(countHintsTimedOut(node)).isEqualTo(0); assertThat(countHintsRetryDifferentSystem(node)).isEqualTo(0); + + assertThat(countHintsFileSize(node)).isEqualTo(0); + assertThat(countHintsThrottle(node)).isEqualTo(0); + // node two and three must apply these hints which belongs to them, so must be greater than 0 + assertThat(countHintsApplySucceeded(node)).isGreaterThan(0); + assertThat(countHintsApplyFailed(node)).isEqualTo(0); + assertThat(countGlobalDelays(node)).isEqualTo(0); cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0)); } @@ -187,6 +203,29 @@ private static Long countHintsRetryDifferentSystem(IInvokableInstance node) return node.callOnInstance(() -> HintsServiceMetrics.hintsRetryDifferentSystem.getCount()); } + private static Long countHintsFileSize(IInvokableInstance node) + { + return node.callOnInstance(HintsServiceMetrics.hintsFileSize::getValue); + } + + @SuppressWarnings("Convert2MethodRef") + private static Long countHintsApplySucceeded(IInvokableInstance node) + { + return node.callOnInstance(() -> HintsServiceMetrics.hintsApplySucceeded.getCount()); + } + + @SuppressWarnings("Convert2MethodRef") + private static Long countHintsApplyFailed(IInvokableInstance node) + { + return node.callOnInstance(() -> HintsServiceMetrics.hintsApplyFailed.getCount()); + } + + @SuppressWarnings("Convert2MethodRef") + private static Long countHintsThrottle(IInvokableInstance node) + { + return node.callOnInstance(() -> HintsServiceMetrics.hintsThrottle.getCount()); + } + private static Long countGlobalDelays(IInvokableInstance node) { return getHistogramCount(node, "org.apache.cassandra.metrics.HintsService.Hint_delays");

    30. * When reading from journal segments, skip descriptors that were read from the table. */ - public void readAll(K key, Reader reader) + public void readAll(K key, Reader reader) { journal.readAll(key, new JournalAndTableRecordConsumer(key, reader)); } @@ -195,6 +192,22 @@ private void readAllFromTable(K key, TableRecordConsumer onEntry) } } + private void readRow(K key, Unfiltered unfiltered, EntryHolder into, RecordConsumer onEntry) + { + Invariants.checkState(unfiltered.isRow()); + Row row = (Row) unfiltered; + + long descriptor = LongType.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(0))); + int position = Int32Type.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(1))); + + into.key = key; + into.value = row.getCell(recordColumn).buffer(); + into.hosts = SENTINEL_HOSTS; + into.userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); + + onEntry.accept(descriptor, position, into.key, into.value, into.hosts, into.userVersion); + } + public static DecoratedKey makePartitionKey(ColumnFamilyStore cfs, K key, KeySupport keySupport, int version) { try (DataOutputBuffer out = new DataOutputBuffer(keySupport.serializedSize(version))) @@ -209,19 +222,152 @@ public static DecoratedKey makePartitionKey(ColumnFamilyStore cfs, K key, Ke } } - private void readRow(K key, Unfiltered unfiltered, EntryHolder into, RecordConsumer onEntry) + @SuppressWarnings("resource") // Auto-closeable iterator will release related resources + public KeyOrderIterator readAll() { - Invariants.checkState(unfiltered.isRow()); - Row row = (Row) unfiltered; + return new JournalAndTableKeyIterator(); + } - long descriptor = LongType.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(0))); - int position = Int32Type.instance.compose(ByteBuffer.wrap((byte[]) row.clustering().get(1))); + private class TableIterator implements Closeable + { + private final UnfilteredPartitionIterator mergeIterator; + private final RefViewFragment view; - into.key = key; - into.value = row.getCell(recordColumn).buffer(); - into.hosts = SENTINEL_HOSTS; - into.userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); + private UnfilteredRowIterator partition; + private LongHashSet visited = null; - onEntry.accept(descriptor, position, into.key, into.value, into.hosts, into.userVersion); + private TableIterator() + { + view = cfs.selectAndReference(v -> v.select(SSTableSet.LIVE)); + List scanners = new ArrayList<>(); + for (SSTableReader sstable : view.sstables) + scanners.add(sstable.getScanner()); + + mergeIterator = view.sstables.isEmpty() + ? EmptyIterators.unfilteredPartition(cfs.metadata()) + : UnfilteredPartitionIterators.merge(scanners, UnfilteredPartitionIterators.MergeListener.NOOP); + } + + public K key() + { + if (partition == null) + { + if (mergeIterator.hasNext()) + partition = mergeIterator.next(); + else + return null; + } + + return keySupport.deserialize(partition.partitionKey().getKey(), 0, accordJournalVersion); + } + + protected void readAllForKey(K key, RecordConsumer recordConsumer) + { + while (partition.hasNext()) + { + EntryHolder into = new EntryHolder<>(); + // TODO: use flyweight to avoid allocating extra lambdas? + readRow(key, partition.next(), into, (segment, position, key1, buffer, hosts, userVersion) -> { + visit(segment); + recordConsumer.accept(segment, position, key1, buffer, hosts, userVersion); + }); + } + + partition = null; + } + + void visit(long segment) + { + if (visited == null) + visited = new LongHashSet(); + visited.add(segment); + } + + boolean visited(long segment) + { + return visited != null && visited.contains(segment); + } + + + void clear() + { + visited = null; + } + + + @Override + public void close() + { + mergeIterator.close(); + view.close(); + } + } + + private class JournalAndTableKeyIterator implements KeyOrderIterator + { + final TableIterator tableIterator; + final Journal.StaticSegmentIterator staticSegmentIterator; + + private JournalAndTableKeyIterator() + { + this.tableIterator = new TableIterator(); + this.staticSegmentIterator = journal.staticSegmentIterator(); + } + + @Override + public K key() + { + K tableKey = tableIterator.key(); + K journalKey = staticSegmentIterator.key(); + if (tableKey == null) + return journalKey; + if (journalKey == null || keySupport.compare(tableKey, journalKey) > 0) + return journalKey; + + return tableKey; + } + + @Override + public void readAllForKey(K key, RecordConsumer reader) + { + K tableKey = tableIterator.key(); + K journalKey = staticSegmentIterator.key(); + if (tableKey != null && keySupport.compare(tableKey, key) == 0) + tableIterator.readAllForKey(key, reader); + + if (journalKey != null && keySupport.compare(journalKey, key) == 0) + staticSegmentIterator.readAllForKey(key, (segment, position, key1, buffer, hosts, userVersion) -> { + if (!tableIterator.visited(segment)) + reader.accept(segment, position, key1, buffer, hosts, userVersion); + }); + + tableIterator.clear(); + } + + public void close() + { + tableIterator.close(); + staticSegmentIterator.close(); + } + } + + public interface KeyOrderIterator extends Closeable + { + K key(); + void readAllForKey(K key, RecordConsumer reader); + void close(); + } + + public static void readBuffer(ByteBuffer buffer, Reader reader, int userVersion) + { + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + { + reader.read(in, userVersion); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java new file mode 100644 index 000000000000..f06ccd4c4fa4 --- /dev/null +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.NavigableMap; + +import com.google.common.collect.ImmutableSortedMap; + +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.primitives.Deps; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; +import org.apache.cassandra.service.accord.serializers.KeySerializers; + +import static accord.local.CommandStores.RangesForEpoch; +import static org.apache.cassandra.service.accord.serializers.DepsSerializer.deps; + +// TODO (required): test with large collection values, and perhaps split out some fields if they have a tendency to grow larger +// TODO (required): alert on metadata size +// TODO (required): versioning +public class AccordJournalValueSerializers +{ + private static final int messagingVersion = MessagingService.VERSION_40; + public interface FlyweightSerializer + { + IMAGE mergerFor(JournalKey key); + + void serialize(JournalKey key, ENTRY from, DataOutputPlus out, int userVersion) throws IOException; + + void reserialize(JournalKey key, IMAGE from, DataOutputPlus out, int userVersion) throws IOException; + + void deserialize(JournalKey key, IMAGE into, DataInputPlus in, int userVersion) throws IOException; + } + + public static class CommandDiffSerializer + implements FlyweightSerializer + { + @Override + public SavedCommand.Builder mergerFor(JournalKey journalKey) + { + return new SavedCommand.Builder(); + } + + @Override + public void serialize(JournalKey key, SavedCommand.DiffWriter writer, DataOutputPlus out, int userVersion) + { + try + { + writer.write(out, userVersion); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Override + public void reserialize(JournalKey key, SavedCommand.Builder from, DataOutputPlus out, int userVersion) throws IOException + { + from.serialize(out, userVersion); + } + + @Override + public void deserialize(JournalKey journalKey, SavedCommand.Builder into, DataInputPlus in, int userVersion) throws IOException + { + into.deserializeNext(in, userVersion); + } + } + + public abstract static class Accumulator + { + protected A accumulated; + + public Accumulator(A initial) + { + this.accumulated = initial; + } + + protected void update(V newValue) + { + accumulated = accumulate(accumulated, newValue); + } + + protected abstract A accumulate(A oldValue, V newValue); + + public A get() + { + return accumulated; + } + } + + public static class IdentityAccumulator extends Accumulator + { + public IdentityAccumulator(T initial) + { + super(initial); + } + + @Override + protected T accumulate(T oldValue, T newValue) + { + return newValue; + } + } + + public static class RedundantBeforeAccumulator extends Accumulator + { + public RedundantBeforeAccumulator() + { + super(RedundantBefore.EMPTY); + } + + @Override + protected RedundantBefore accumulate(RedundantBefore oldValue, RedundantBefore newValue) + { + return RedundantBefore.merge(oldValue, newValue); + } + } + + public static class RedundantBeforeSerializer + implements FlyweightSerializer + { + @Override + public RedundantBeforeAccumulator mergerFor(JournalKey journalKey) + { + return new RedundantBeforeAccumulator(); + } + + @Override + public void serialize(JournalKey key, RedundantBefore entry, DataOutputPlus out, int userVersion) + { + try + { + if (entry == RedundantBefore.EMPTY) + { + out.writeInt(0); + return; + } + out.writeInt(1); + CommandStoreSerializers.redundantBefore.serialize(entry, out, messagingVersion); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Override + public void reserialize(JournalKey key, RedundantBeforeAccumulator from, DataOutputPlus out, int userVersion) throws IOException + { + serialize(key, from.get(), out, userVersion); + } + + @Override + public void deserialize(JournalKey journalKey, RedundantBeforeAccumulator into, DataInputPlus in, int userVersion) throws IOException + { + if (in.readInt() == 0) + { + into.update(RedundantBefore.EMPTY); + return; + } + into.update(CommandStoreSerializers.redundantBefore.deserialize(in, messagingVersion)); + } + } + + public static class DurableBeforeAccumulator extends Accumulator + { + public DurableBeforeAccumulator() + { + super(DurableBefore.EMPTY); + } + + @Override + protected DurableBefore accumulate(DurableBefore oldValue, DurableBefore newValue) + { + return DurableBefore.merge(oldValue, newValue); + } + } + + public static class DurableBeforeSerializer implements FlyweightSerializer + { + public DurableBeforeAccumulator mergerFor(JournalKey journalKey) + { + return new DurableBeforeAccumulator(); + } + + @Override + public void serialize(JournalKey key, DurableBefore entry, DataOutputPlus out, int userVersion) + { + try + { + CommandStoreSerializers.durableBefore.serialize(entry, out, messagingVersion); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Override + public void reserialize(JournalKey key, DurableBeforeAccumulator from, DataOutputPlus out, int userVersion) throws IOException + { + serialize(key, from.get(), out, userVersion); + } + + @Override + public void deserialize(JournalKey journalKey, DurableBeforeAccumulator into, DataInputPlus in, int userVersion) throws IOException + { + // TODO: maybe using local serializer is not the best call here, but how do we distinguish + // between messaging and disk versioning? + into.update(CommandStoreSerializers.durableBefore.deserialize(in, messagingVersion)); + } + } + + public static class BootstrapBeganAtSerializer + implements FlyweightSerializer, IdentityAccumulator>> + { + @Override + public IdentityAccumulator> mergerFor(JournalKey key) + { + return new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); + } + + @Override + public void serialize(JournalKey key, NavigableMap entry, DataOutputPlus out, int userVersion) throws IOException + { + CommandStoreSerializers.bootstrapBeganAt.serialize(entry, out, messagingVersion); + } + + @Override + public void reserialize(JournalKey key, IdentityAccumulator> image, DataOutputPlus out, int userVersion) throws IOException + { + serialize(key, image.get(), out, userVersion); + } + + @Override + public void deserialize(JournalKey key, IdentityAccumulator> into, DataInputPlus in, int userVersion) throws IOException + { + into.update(CommandStoreSerializers.bootstrapBeganAt.deserialize(in, messagingVersion)); + } + } + + public static class SafeToReadSerializer implements FlyweightSerializer, IdentityAccumulator>> + { + @Override + public IdentityAccumulator> mergerFor(JournalKey key) + { + return new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); + } + + @Override + public void serialize(JournalKey key, NavigableMap from, DataOutputPlus out, int userVersion) throws IOException + { + CommandStoreSerializers.safeToRead.serialize(from, out, messagingVersion); + } + + @Override + public void reserialize(JournalKey key, IdentityAccumulator> from, DataOutputPlus out, int userVersion) throws IOException + { + serialize(key, from.get(), out, userVersion); + } + + @Override + public void deserialize(JournalKey key, IdentityAccumulator> into, DataInputPlus in, int userVersion) throws IOException + { + into.update(CommandStoreSerializers.safeToRead.deserialize(in, messagingVersion)); + } + } + + public static class RangesForEpochSerializer + implements FlyweightSerializer> + { + + public IdentityAccumulator mergerFor(JournalKey key) + { + return new IdentityAccumulator<>(null); + } + + @Override + public void serialize(JournalKey key, RangesForEpoch.Snapshot from, DataOutputPlus out, int userVersion) throws IOException + { + out.writeUnsignedVInt32(from.ranges.length); + for (Ranges ranges : from.ranges) + KeySerializers.ranges.serialize(ranges, out, messagingVersion); + + out.writeUnsignedVInt32(from.epochs.length); + for (long epoch : from.epochs) + out.writeLong(epoch); + } + + @Override + public void reserialize(JournalKey key, IdentityAccumulator from, DataOutputPlus out, int userVersion) throws IOException + { + serialize(key, from.get(), out, messagingVersion); + } + + @Override + public void deserialize(JournalKey key, IdentityAccumulator into, DataInputPlus in, int userVersion) throws IOException + { + Ranges[] ranges = new Ranges[in.readUnsignedVInt32()]; + for (int i = 0; i < ranges.length; i++) + ranges[i] = KeySerializers.ranges.deserialize(in, messagingVersion); + + long[] epochs = new long[in.readUnsignedVInt32()]; + for (int i = 0; i < epochs.length; i++) + epochs[i] = in.readLong(); // TODO: assert lengths equal? + + into.update(new RangesForEpoch.Snapshot(epochs, ranges)); + } + } + + public static class HistoricalTransactionsAccumulator extends Accumulator, Deps> + { + public HistoricalTransactionsAccumulator() + { + super(new ArrayList<>()); + } + + @Override + protected List accumulate(List oldValue, Deps deps) + { + accumulated.add(deps); // we can keep it mutable + return accumulated; + } + } + + public static class HistoricalTransactionsSerializer implements FlyweightSerializer + { + @Override + public HistoricalTransactionsAccumulator mergerFor(JournalKey key) + { + return new HistoricalTransactionsAccumulator(); + } + + @Override + public void serialize(JournalKey key, Deps from, DataOutputPlus out, int userVersion) throws IOException + { + out.writeUnsignedVInt32(1); + deps.serialize(from, out, messagingVersion); + } + + @Override + public void reserialize(JournalKey key, HistoricalTransactionsAccumulator from, DataOutputPlus out, int userVersion) throws IOException + { + out.writeUnsignedVInt32(from.get().size()); + for (Deps d : from.get()) + deps.serialize(d, out, messagingVersion); + } + + @Override + public void deserialize(JournalKey key, HistoricalTransactionsAccumulator into, DataInputPlus in, int userVersion) throws IOException + { + int count = in.readUnsignedVInt32(); + for (int i = 0; i < count; i++) + into.update(deps.deserialize(in, messagingVersion)); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index e51017996e84..104096930a9f 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -24,14 +24,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.NavigableMap; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -39,36 +36,31 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.StoreParticipants; +import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; -import accord.local.DurableBefore; import accord.local.Node; import accord.local.RedundantBefore; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.local.Status.Durability; -import accord.local.cfk.CommandsForKey; +import accord.primitives.SaveStatus; +import accord.primitives.Status; +import accord.primitives.Status.Durability; import accord.primitives.Ranges; import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.topology.Topology; import accord.utils.Invariants; -import accord.utils.ReducingRangeMap; import accord.utils.async.Observable; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.cql3.ColumnIdentifier; -import org.apache.cassandra.cql3.QueryOptions; -import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; -import org.apache.cassandra.cql3.statements.ModificationStatement; import org.apache.cassandra.cql3.statements.schema.CreateTableStatement; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.ClusteringComparator; @@ -76,7 +68,6 @@ import org.apache.cassandra.db.Columns; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.DeletionTime; -import org.apache.cassandra.db.IMutation; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.Mutation; @@ -121,6 +112,7 @@ import org.apache.cassandra.index.accord.RouteIndex; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.LocalVersionedSerializer; +import org.apache.cassandra.io.MessageVersionProvider; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.schema.ColumnMetadata; @@ -138,17 +130,14 @@ import org.apache.cassandra.schema.UserFunctions; import org.apache.cassandra.schema.Views; import org.apache.cassandra.serializers.UUIDSerializer; -import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.accord.AccordConfigurationService.SyncStatus; import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.serializers.AccordRoutingKeyByteSource; import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import org.apache.cassandra.service.accord.serializers.CommandStoreSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.TopologySerializers; -import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Clock.Global; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.btree.BTree; @@ -160,12 +149,12 @@ import static accord.utils.Invariants.checkState; import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; -import static org.apache.cassandra.cql3.QueryProcessor.executeOnceInternal; import static org.apache.cassandra.db.partitions.PartitionUpdate.singleRowUpdate; import static org.apache.cassandra.db.rows.BTreeRow.singleCellRow; import static org.apache.cassandra.db.rows.BufferCell.live; import static org.apache.cassandra.db.rows.BufferCell.tombstone; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; +import static org.apache.cassandra.service.accord.serializers.AccordRoutingKeyByteSource.currentVersion; import static org.apache.cassandra.service.accord.serializers.KeySerializers.blobMapToRanges; import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER; import static org.apache.cassandra.utils.ByteBufferUtil.bytes; @@ -180,11 +169,9 @@ public class AccordKeyspace public static final String COMMANDS_FOR_KEY = "commands_for_key"; public static final String TOPOLOGIES = "topologies"; public static final String EPOCH_METADATA = "epoch_metadata"; - public static final String COMMAND_STORE_METADATA = "command_store_metadata"; public static final Set TABLE_NAMES = ImmutableSet.of(COMMANDS, TIMESTAMPS_FOR_KEY, COMMANDS_FOR_KEY, TOPOLOGIES, EPOCH_METADATA, - COMMAND_STORE_METADATA, JOURNAL); private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); @@ -262,27 +249,22 @@ static TokenType valueOf(Token token) + "domain int," // this is stored as part of txn_id, used currently for cheaper scans of the table + format("txn_id %s,", TIMESTAMP_TUPLE) + "status int," - + "route blob," + + "participants blob," + "durability int," + format("execute_at %s,", TIMESTAMP_TUPLE) + "PRIMARY KEY((store_id, domain, txn_id))" + ')') .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, Int32Type.instance, TIMESTAMP_TYPE))) .indexes(Indexes.builder() - .add(IndexMetadata.fromSchemaMetadata("route", IndexMetadata.Kind.CUSTOM, ImmutableMap.of("class_name", RouteIndex.class.getCanonicalName(), "target", "route"))) + .add(IndexMetadata.fromSchemaMetadata("route", IndexMetadata.Kind.CUSTOM, ImmutableMap.of("class_name", RouteIndex.class.getCanonicalName(), "target", "participants"))) .build()) .build(); // TODO: naming is not very clearly distinct from the base serializers public static class LocalVersionedSerializers { - static final LocalVersionedSerializer> route = localSerializer(KeySerializers.route); + static final LocalVersionedSerializer participants = localSerializer(CommandSerializers.participants); static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); - static final LocalVersionedSerializer> rejectBefore = localSerializer(CommandStoreSerializers.rejectBefore); - static final LocalVersionedSerializer durableBefore = localSerializer(CommandStoreSerializers.durableBefore); - static final LocalVersionedSerializer redundantBefore = localSerializer(CommandStoreSerializers.redundantBefore); - static final LocalVersionedSerializer> bootstrapBeganAt = localSerializer(CommandStoreSerializers.bootstrapBeganAt); - static final LocalVersionedSerializer> safeToRead = localSerializer(CommandStoreSerializers.safeToRead); private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) { @@ -305,12 +287,12 @@ public static class CommandsColumns public static final ColumnMetadata txn_id = getColumn(Commands, "txn_id"); public static final ColumnMetadata store_id = getColumn(Commands, "store_id"); public static final ColumnMetadata status = getColumn(Commands, "status"); - public static final ColumnMetadata route = getColumn(Commands, "route"); + public static final ColumnMetadata participants = getColumn(Commands, "participants"); public static final ColumnMetadata durability = getColumn(Commands, "durability"); public static final ColumnMetadata execute_at = getColumn(Commands, "execute_at"); - public static final ColumnMetadata[] TRUNCATE_FIELDS = new ColumnMetadata[] { durability, execute_at, route, status }; - public static final ColumnMetadata[] INVALIDATE_FIELDS = new ColumnMetadata[] { status }; + public static final ColumnMetadata[] TRUNCATE_FIELDS = new ColumnMetadata[] { durability, execute_at, participants, status }; + public static final ColumnMetadata[] SAVE_STATUS_ONLY_FIELDS = new ColumnMetadata[] { status }; static { @@ -374,33 +356,67 @@ public static Status.Durability getDurability(Row row) @Nullable public static Route getRoute(Row row) { - Cell cell = row.getCell(route); - return deserializeRouteOrNull(cell); + Cell cell = row.getCell(participants); + StoreParticipants participants = deserializeParticipantsOrNull(cell); + return participants == null ? null : participants.route(); } - private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean updateTimestamps) + private static Object[] truncatedApplyLeaf(long newTimestamp, SaveStatus newSaveStatus, @Nullable Cell durabilityCell, @Nullable Cell executeAtCell, @Nullable Cell participantsCell, boolean updateTimestamps) { - checkArgument(durabilityCell.column() == CommandsColumns.durability); - checkArgument(executeAtCell.column() == CommandsColumns.execute_at); - checkArgument(routeCell.column() == CommandsColumns.route); - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS.length); + int count = 1 + (durabilityCell != null ? 1 : 0) + (executeAtCell != null ? 1 : 0) + (participantsCell != null ? 1 : 0); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(count); int colIndex = 0; - newLeaf[colIndex++] = updateTimestamps ? durabilityCell.withUpdatedTimestamp(newTimestamp) : durabilityCell; - newLeaf[colIndex++] = updateTimestamps ? executeAtCell.withUpdatedTimestamp(newTimestamp) : executeAtCell; - newLeaf[colIndex++] = updateTimestamps ? routeCell.withUpdatedTimestamp(newTimestamp) : routeCell; - // Status always needs to use the new timestamp since we are replacing the existing value - // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion - //noinspection UnusedAssignment - newLeaf[colIndex++] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); + if (durabilityCell != null) + { + checkArgument(durabilityCell.column() == CommandsColumns.durability); + newLeaf[colIndex++] = updateTimestamps ? durabilityCell.withUpdatedTimestamp(newTimestamp) : durabilityCell; + } + if (executeAtCell != null) + { + checkArgument(executeAtCell.column() == CommandsColumns.execute_at); + newLeaf[colIndex++] = updateTimestamps ? executeAtCell.withUpdatedTimestamp(newTimestamp) : executeAtCell; + } + if (participantsCell != null) + { + checkArgument(participantsCell.column() == CommandsColumns.participants); + newLeaf[colIndex++] = updateTimestamps ? participantsCell.withUpdatedTimestamp(newTimestamp) : participantsCell; + } + newLeaf[colIndex] = BufferCell.live(status, newTimestamp, ByteBufferAccessor.instance.valueOf(newSaveStatus.ordinal())); + return newLeaf; + } + + private static Object[] expungePartialLeaf(Cell durabilityCell, Cell executeAtCell, Cell participantsCell) + { + int count = (durabilityCell != null ? 1 : 0) + (executeAtCell != null ? 1 : 0) + (participantsCell != null ? 1 : 0); + if (count == 0) + return null; + + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(count); + int colIndex = 0; + if (durabilityCell != null) + { + checkArgument(durabilityCell.column() == CommandsColumns.durability); + newLeaf[colIndex++] = durabilityCell; + } + if (executeAtCell != null) + { + checkArgument(executeAtCell.column() == CommandsColumns.execute_at); + newLeaf[colIndex++] = executeAtCell; + } + if (participantsCell != null) + { + checkArgument(participantsCell.column() == CommandsColumns.participants); + newLeaf[colIndex] = participantsCell; + } return newLeaf; } - public static Row invalidated(SaveStatus newSaveStatus, Row row, long nowInSec) + public static Row saveStatusOnly(SaveStatus newSaveStatus, Row row, long nowInSec) { long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); long newTimestamp = oldTimestamp + 1; - Object[] newLeaf = invalidatedLeaf(newTimestamp, newSaveStatus); + Object[] newLeaf = saveStatusOnlyLeaf(newTimestamp, newSaveStatus); // Including a deletion allows future compactions to drop data before it gets to the purger // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns @@ -409,9 +425,9 @@ public static Row invalidated(SaveStatus newSaveStatus, Row row, long nowInSec) return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), deletion, newLeaf); } - private static Object[] invalidatedLeaf(long newTimestamp, SaveStatus newSaveStatus) + private static Object[] saveStatusOnlyLeaf(long newTimestamp, SaveStatus newSaveStatus) { - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(INVALIDATE_FIELDS.length); + Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(SAVE_STATUS_ONLY_FIELDS.length); int colIndex = 0; // Status always needs to use the new timestamp since we are replacing the existing value // All the other columns are being retained unmodified with at most updated timestamps to accomdate deletion @@ -420,17 +436,10 @@ private static Object[] invalidatedLeaf(long newTimestamp, SaveStatus newSaveSta return newLeaf; } - public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, Cell durabilityCell, Cell executeAtCell, Cell routeCell, boolean withOutcome) + public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSec, Durability durability, @Nullable Cell durabilityCell, @Nullable Cell executeAtCell, @Nullable Cell participantsCell, boolean withOutcome) { - checkArgument(durabilityCell.column() == CommandsColumns.durability); - checkArgument(executeAtCell.column() == CommandsColumns.execute_at); - checkArgument(routeCell.column() == CommandsColumns.route); long oldTimestamp = row.primaryKeyLivenessInfo().timestamp(); long newTimestamp = oldTimestamp + 1; - // If durability is not universal we don't want to delete older versions of the row that might have recorded - // a higher durability value. maybeDropTruncatedCommandColumns will take care of dropping things even if we don't drop via tombstones. - // durability should be the only column that could have an older value that is insufficient for propagating forward - // TODO (now): with UniversalOrInvalidated should this change? boolean doDeletion = durability == Durability.Universal; // We may not have what we need to generate a deletion and include the outcome in the truncated row @@ -438,7 +447,7 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe if (withOutcome) doDeletion = false; - Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, routeCell, doDeletion); + Object[] newLeaf = truncatedApplyLeaf(newTimestamp, newSaveStatus, durabilityCell, executeAtCell, participantsCell, doDeletion); // Including a deletion allows future compactions to drop data before it gets to the purger // but it is pretty optional because maybeDropTruncatedCommandColumns will drop the extra columns @@ -447,47 +456,26 @@ public static Row truncatedApply(SaveStatus newSaveStatus, Row row, long nowInSe return BTreeRow.create(row.clustering(), LivenessInfo.create(newTimestamp, nowInSec), deletion, newLeaf); } - public static Row maybeDropTruncatedCommandColumns(Row row, Cell durabilityCell, Cell executeAtCell, Cell routeCell, Cell statusCell) + public static Row expungePartial(Row row, @Nullable Cell durabilityCell, @Nullable Cell executeAtCell, @Nullable Cell participantsCell) { - checkArgument(durabilityCell.column() == CommandsColumns.durability); - checkArgument(executeAtCell.column() == CommandsColumns.execute_at); - checkArgument(routeCell.column() == CommandsColumns.route); - checkArgument(statusCell.column() == CommandsColumns.status); - int colCount = row.columnCount(); - // If it's the exact length of the post truncate column count without outcome fields - // then it is exactly the columns needed for getting this far and withOutcome doesn't matter since - // nothing additional is available to include anyway - if (colCount == TRUNCATE_FIELDS.length) - return row; - - // Construct a replacement with just the available columns that are still needed - Object[] newLeaf = BTree.unsafeAllocateNonEmptyLeaf(TRUNCATE_FIELDS.length); - int colIndex = 0; - newLeaf[colIndex++] = durabilityCell; - newLeaf[colIndex++] = executeAtCell; - newLeaf[colIndex++] = routeCell; - //noinspection UnusedAssignment - newLeaf[colIndex++] = statusCell; - - return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), row.deletion(), newLeaf); + Object[] newLeaf = expungePartialLeaf(durabilityCell, executeAtCell, participantsCell); + return BTreeRow.create(row.clustering(), row.primaryKeyLivenessInfo(), Deletion.LIVE, newLeaf); } } - //TODO (now, performance): do we actually care about the sort ordering? We don't do range scans on this table - //TODO (now, performance): should we remove key_token? We don't need it so its just added space private static final TableMetadata TimestampsForKeys = parse(TIMESTAMPS_FOR_KEY, "accord timestamps per key", "CREATE TABLE %s (" + "store_id int, " - + "key_token blob, " // can't use "token" as this is restricted word in CQL - + format("key %s, ", KEY_TUPLE) + + "routing_key blob, " // can't use "token" as this is restricted word in CQL + format("last_executed_timestamp %s, ", TIMESTAMP_TUPLE) + "last_executed_micros bigint, " + + format("last_write_id %s, ", TIMESTAMP_TUPLE) + format("last_write_timestamp %s, ", TIMESTAMP_TUPLE) - + "PRIMARY KEY((store_id, key_token, key))" + + "PRIMARY KEY((store_id, routing_key))" + ')') - .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance, KEY_TYPE))) + .partitioner(new LocalPartitioner(CompositeType.getInstance(Int32Type.instance, BytesType.instance))) .build(); public static class TimestampsForKeyColumns @@ -495,23 +483,22 @@ public static class TimestampsForKeyColumns static final ClusteringComparator keyComparator = TimestampsForKeys.partitionKeyAsClusteringComparator(); static final CompositeType partitionKeyType = (CompositeType) TimestampsForKeys.partitionKeyType; static final ColumnMetadata store_id = getColumn(TimestampsForKeys, "store_id"); - static final ColumnMetadata key_token = getColumn(TimestampsForKeys, "key_token"); - static final ColumnMetadata key = getColumn(TimestampsForKeys, "key"); + static final ColumnMetadata routing_key = getColumn(TimestampsForKeys, "routing_key"); public static final ColumnMetadata last_executed_timestamp = getColumn(TimestampsForKeys, "last_executed_timestamp"); public static final ColumnMetadata last_executed_micros = getColumn(TimestampsForKeys, "last_executed_micros"); public static final ColumnMetadata last_write_timestamp = getColumn(TimestampsForKeys, "last_write_timestamp"); static final Columns columns = Columns.from(Lists.newArrayList(last_executed_timestamp, last_executed_micros, last_write_timestamp)); - static ByteBuffer makePartitionKey(int storeId, Key key) + static ByteBuffer makeKey(int storeId, RoutingKey key) { - PartitionKey pk = (PartitionKey) key; - return keyComparator.make(storeId, serializeToken(pk.token()), serializeKey(pk)).serializeAsPartitionKey(); + TokenKey pk = (TokenKey) key; + return keyComparator.make(storeId, serializeRoutingKey(pk)).serializeAsPartitionKey(); } - static ByteBuffer makePartitionKey(int storeId, TimestampsForKey timestamps) + static ByteBuffer makeKey(int storeId, TimestampsForKey timestamps) { - return makePartitionKey(storeId, timestamps.key()); + return makeKey(storeId, timestamps.key()); } } @@ -527,9 +514,9 @@ public static int getStoreId(ByteBuffer[] partitionKeyComponents) return Int32Type.instance.compose(partitionKeyComponents[store_id.position()]); } - public static PartitionKey getKey(ByteBuffer[] partitionKeyComponents) + public static TokenKey getKey(ByteBuffer[] partitionKeyComponents) { - return deserializeKey(partitionKeyComponents[key.position()]); + return (TokenKey) deserializeRoutingKey(partitionKeyComponents[routing_key.position()]); } @Nullable @@ -596,18 +583,17 @@ public static Row truncateTimestampsForKeyRow(long nowInSec, Row row, Cell lastE private static TableMetadata commandsForKeysTable(String tableName) { return parse(tableName, - "accord commands per key", - "CREATE TABLE %s (" - + "store_id int, " - + "table_id uuid, " - + "key_token blob, " // can't use "token" as this is restricted word in CQL - + "key blob, " - + "data blob, " - + "PRIMARY KEY((store_id, table_id, key_token, key))" - + ')' - + " WITH compression = {'class':'NoopCompressor'};") - .partitioner(CFKPartitioner) - .build(); + "accord commands per key", + "CREATE TABLE %s (" + + "store_id int, " + + "table_id uuid, " + + "key_token blob, " // can't use "token" as this is restricted word in CQL + + "data blob, " + + "PRIMARY KEY((store_id, table_id, key_token))" + + ')' + + " WITH compression = {'class':'NoopCompressor'};") + .partitioner(CFKPartitioner) + .build(); } public static class CommandsForKeyAccessor @@ -619,7 +605,6 @@ public static class CommandsForKeyAccessor final ColumnMetadata store_id; final ColumnMetadata table_id; final ColumnMetadata key_token; - final ColumnMetadata key; final ColumnMetadata data; final RegularAndStaticColumns columns; @@ -633,7 +618,6 @@ public CommandsForKeyAccessor(TableMetadata table) this.store_id = getColumn(table, "store_id"); this.table_id = getColumn(table, "table_id"); this.key_token = getColumn(table, "key_token"); - this.key = getColumn(table, "key"); this.data = getColumn(table, "data"); this.columns = new RegularAndStaticColumns(Columns.NONE, Columns.from(Lists.newArrayList(data))); } @@ -653,22 +637,18 @@ public TableId getTableId(ByteBuffer[] partitionKeyComponents) return TableId.fromUUID(UUIDType.instance.compose(partitionKeyComponents[table_id.position()])); } - public PartitionKey getKey(DecoratedKey key) + public TokenKey getKey(DecoratedKey key) { return getKey(splitPartitionKey(key)); } - public PartitionKey getKey(ByteBuffer[] partitionKeyComponents) + public TokenKey getKey(ByteBuffer[] partitionKeyComponents) { TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(partitionKeyComponents[table_id.position()])); - ByteBuffer keyBytes = partitionKeyComponents[key.position()]; - IPartitioner partitioner = SchemaHolder.schema.getTablePartitioner(tableId); - if (partitioner == null) - throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); - return new PartitionKey(tableId, partitioner.decorateKey(keyBytes)); + return deserializeTokenKeySeparateTable(tableId, partitionKeyComponents[key_token.position()]); } - public CommandsForKey getCommandsForKey(PartitionKey key, Row row) + public CommandsForKey getCommandsForKey(TokenKey key, Row row) { Cell cell = row.getCell(data); if (cell == null) @@ -685,7 +665,7 @@ public ByteBuffer serializeKeyNoTable(AccordRoutingKey key) } // TODO (expected): garbage-free filtering, reusing encoding - public Row withoutRedundantCommands(PartitionKey key, Row row, RedundantBefore.Entry redundantBefore) + public Row withoutRedundantCommands(TokenKey key, Row row, RedundantBefore.Entry redundantBefore) { Invariants.checkState(row.columnCount() == 1); Cell cell = row.getCell(data); @@ -745,21 +725,6 @@ public LocalCompositePrefixPartitioner.AbstractCompositePrefixToken getPrefixTok "max_epoch bigint " + ')').build(); - private static final TableMetadata CommandStoreMetadata = - parse(COMMAND_STORE_METADATA, - "command store state", - "CREATE TABLE %s (" + - "store_id int, " + - "reject_before blob, " + - "bootstrap_began_at blob, " + - "safe_to_read blob, " + - "redundant_before blob, " + - "durable_before blob, " + - "PRIMARY KEY(store_id)" + - ')').build(); - - private static final AtomicLong commandStoreMetadataTimestamp = new AtomicLong(); - private static TableMetadata.Builder parse(String name, String description, String cql) { return CreateTableStatement.parse(format(cql, name), ACCORD_KEYSPACE_NAME) @@ -780,7 +745,7 @@ public static KeyspaceMetadata metadata() public static Tables tables() { - return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, CommandStoreMetadata, Journal); + return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, Journal); } private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException @@ -863,7 +828,7 @@ public static Mutation getCommandMutation(int storeId, Command original, Command builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(timestampMicros, nowInSeconds)); addEnumCellIfModified(CommandsColumns.durability, Command::durability, builder, timestampMicros, nowInSeconds, original, command); - addCellIfModified(CommandsColumns.route, Command::route, LocalVersionedSerializers.route, builder, timestampMicros, nowInSeconds, original, command); + addCellIfModified(CommandsColumns.participants, Command::participants, LocalVersionedSerializers.participants, builder, timestampMicros, nowInSeconds, original, command); addEnumCellIfModified(CommandsColumns.status, Command::saveStatus, builder, timestampMicros, nowInSeconds, original, command); addCellIfModified(CommandsColumns.execute_at, Command::executeAt, AccordKeyspace::serializeTimestamp, builder, timestampMicros, nowInSeconds, original, command); @@ -888,6 +853,13 @@ public static ByteBuffer serializeToken(Token token) return serializeToken(token, ByteBufferAccessor.instance); } + public static ByteBuffer serializeTableId(TableId tableId) + { + ByteBuffer buffer = ByteBuffer.allocate(tableId.serializedSize()); + tableId.serialize(buffer, ByteBufferAccessor.instance, 0); + return buffer; + } + private static V serializeToken(Token token, ValueAccessor accessor) { TokenType type = TokenType.valueOf(token); @@ -898,12 +870,6 @@ private static V serializeToken(Token token, ValueAccessor accessor) return value; } - @VisibleForTesting - public static ByteBuffer serializeKey(PartitionKey key) - { - return KEY_TYPE.pack(UUIDSerializer.instance.serialize(key.table().asUUID()), key.partitionKey().getKey()); - } - public static ByteBuffer serializeTimestamp(Timestamp timestamp) { return TIMESTAMP_TYPE.pack(bytes(timestamp.msb), bytes(timestamp.lsb), bytes(timestamp.node.id)); @@ -991,7 +957,7 @@ public static UntypedResultSet loadCommandRow(CommandStore commandStore, TxnId t public static void findAllKeysBetween(int commandStore, AccordRoutingKey start, boolean startInclusive, AccordRoutingKey end, boolean endInclusive, - Observable callback) + Observable callback) { Token startToken = CommandsForKeysAccessor.getPrefixToken(commandStore, start); @@ -1026,7 +992,7 @@ else if (startInclusive) { while (iter.hasNext()) { - PartitionKey pk = CommandsForKeysAccessor.getKey(iter.next()); + TokenKey pk = CommandsForKeysAccessor.getKey(iter.next()); callback.onNext(pk); } callback.onCompleted(); @@ -1062,29 +1028,37 @@ public static Status.Durability deserializeDurability(UntypedResultSet.Row row) return Status.Durability.values()[row.getInt("durability", 0)]; } - public static Route deserializeRouteOrNull(ByteBuffer bytes) throws IOException + public static StoreParticipants deserializeParticipantsOrNull(ByteBuffer bytes) throws IOException { - return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, LocalVersionedSerializers.route) : null; + return bytes != null && !ByteBufferAccessor.instance.isEmpty(bytes) ? deserialize(bytes, LocalVersionedSerializers.participants) : null; } - public static ByteBuffer serializeRoute(Route route) throws IOException + public static Route deserializeParticipantsRouteOnlyOrNull(ByteBuffer bytes) throws IOException { - return serialize(route, LocalVersionedSerializers.route); + if (bytes == null ||ByteBufferAccessor.instance.isEmpty(bytes)) + return null; + + try (DataInputBuffer in = new DataInputBuffer(bytes, true)) + { + MessageVersionProvider versionProvider = LocalVersionedSerializers.participants.deserializeVersion(in); + return CommandSerializers.participants.deserializeRouteOnly(in, versionProvider.messageVersion()); + } + } - private static Route deserializeRouteOrNull(UntypedResultSet.Row row) throws IOException + public static ByteBuffer serializeParticipants(StoreParticipants participants) throws IOException { - return deserializeRouteOrNull(row.getBlob("route")); + return serialize(participants, LocalVersionedSerializers.participants); } - public static Route deserializeRouteOrNull(Cell cell) + public static StoreParticipants deserializeParticipantsOrNull(Cell cell) { if (cell == null) return null; try { - return deserializeRouteOrNull(cell.buffer()); + return deserializeParticipantsOrNull(cell.buffer()); } catch (IOException e) { @@ -1092,21 +1066,9 @@ public static Route deserializeRouteOrNull(Cell cell) } } - public static PartitionKey deserializeKey(ByteBuffer buffer) + public static TokenKey deserializeTokenKeySeparateTable(TableId tableId, ByteBuffer tokenBytes) { - List split = KEY_TYPE.unpack(buffer, ByteBufferAccessor.instance); - TableId tableId = TableId.fromUUID(UUIDSerializer.instance.deserialize(split.get(0))); - ByteBuffer key = split.get(1); - - IPartitioner partitioner = SchemaHolder.schema.getTablePartitioner(tableId); - if (partitioner == null) - throw new IllegalStateException("Table with id " + tableId + " could not be found; was it deleted?"); - return new PartitionKey(tableId, partitioner.decorateKey(key)); - } - - public static PartitionKey deserializeKey(UntypedResultSet.Row row) - { - return deserializeKey(row.getBytes("key")); + return (TokenKey) AccordRoutingKeyByteSource.Serializer.fromComparableBytes(ByteBufferAccessor.instance, tokenBytes, tableId, currentVersion, null); } public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey original, TimestampsForKey current, long timestampMicros) @@ -1130,7 +1092,7 @@ public static Mutation getTimestampsForKeyMutation(int storeId, TimestampsForKey if (row.columnCount() == 0) return null; - ByteBuffer key = TimestampsForKeyColumns.makePartitionKey(storeId, current.key()); + ByteBuffer key = TimestampsForKeyColumns.makeKey(storeId, current.key()); PartitionUpdate update = singleRowUpdate(TimestampsForKeys, key, row); return new Mutation(update); } @@ -1145,26 +1107,22 @@ public static Mutation getTimestampsForKeyMutation(AccordCommandStore commandSto return getTimestampsForKeyMutation(commandStore.id(), liveTimestamps.original(), liveTimestamps.current(), timestampMicros); } - public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore, PartitionKey key) + public static UntypedResultSet loadTimestampsForKeyRow(CommandStore commandStore, TokenKey key) { String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + TIMESTAMPS_FOR_KEY + ' ' + "WHERE store_id = ? " + - "AND key_token = ? " + - "AND key=(?, ?)"; + "AND routing_key = ?"; - return executeInternal(cql, - commandStore.id(), - serializeToken(key.token()), - key.table().asUUID(), key.partitionKey().getKey()); + return executeInternal(cql, commandStore.id(), serializeRoutingKey(key)); } - public static TimestampsForKey loadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) + public static TimestampsForKey loadTimestampsForKey(AccordCommandStore commandStore, TokenKey key) { commandStore.checkNotInStoreThread(); return unsafeLoadTimestampsForKey(commandStore, key); } - public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore commandStore, PartitionKey key) + public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore commandStore, TokenKey key) { UntypedResultSet rows = loadTimestampsForKeyRow(commandStore, key); @@ -1174,21 +1132,22 @@ public static TimestampsForKey unsafeLoadTimestampsForKey(AccordCommandStore com } UntypedResultSet.Row row = rows.one(); - checkState(deserializeKey(row).equals(key)); + TokenKey checkKey = (TokenKey) deserializeRoutingKey(row.getBytes("routing_key")); + checkState(checkKey.equals(key)); Timestamp lastExecutedTimestamp = deserializeTimestampOrDefault(row, "last_executed_timestamp", Timestamp::fromBits, Timestamp.NONE); long lastExecutedMicros = row.has("last_executed_micros") ? row.getLong("last_executed_micros") : 0; + TxnId lastWriteId = deserializeTimestampOrDefault(row, "last_write_id", TxnId::fromBits, TxnId.NONE); Timestamp lastWriteTimestamp = deserializeTimestampOrDefault(row, "last_write_timestamp", Timestamp::fromBits, Timestamp.NONE); - return TimestampsForKey.SerializerSupport.create(key, lastExecutedTimestamp, lastExecutedMicros, lastWriteTimestamp); + return TimestampsForKey.SerializerSupport.create(key, lastExecutedTimestamp, lastExecutedMicros, lastWriteId, lastWriteTimestamp); } - private static DecoratedKey makeKey(CommandsForKeyAccessor accessor, int storeId, PartitionKey key) + private static DecoratedKey makeKeySeparateTable(CommandsForKeyAccessor accessor, int storeId, TokenKey key) { ByteBuffer pk = accessor.keyComparator.make(storeId, UUIDSerializer.instance.serialize(key.table().asUUID()), - serializeRoutingKey(key.toUnseekable()), - key.partitionKey().getKey()).serializeAsPartitionKey(); + serializeRoutingKeyNoTable(key)).serializeAsPartitionKey(); return accessor.table.partitioner.decorateKey(pk); } @@ -1204,23 +1163,44 @@ public static ByteBuffer serializeRoutingKeyNoTable(AccordRoutingKey key) return CommandsForKeysAccessor.serializeKeyNoTable(key); } - private static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, CommandsForKey commandsForKey, long timestampMicros) + public static AccordRoutingKey deserializeRoutingKey(ByteBuffer buffer) + { + return AccordRoutingKeyByteSource.Serializer.fromComparableBytes(ByteBufferAccessor.instance, buffer, currentVersion, null); + } + + private static AccordRoutingKeyByteSource.Serializer serializer(AccordRoutingKey routingKey) + { + return serializer(routingKey.table()); + } + + public static AccordRoutingKeyByteSource.Serializer serializer(TableId tableId) + { + return TABLE_SERIALIZERS.computeIfAbsent(tableId, id -> AccordRoutingKeyByteSource.variableLength(partitioner(tableId))); + } + + public static IPartitioner partitioner(TableId tableId) + { + + return SchemaHolder.schema.getTablePartitioner(tableId); + } + + private static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, TokenKey key, CommandsForKey commandsForKey, long timestampMicros) { ByteBuffer bytes = CommandsForKeySerializer.toBytesWithoutKey(commandsForKey); return getCommandsForKeyPartitionUpdate(storeId, key, timestampMicros, bytes); } @VisibleForTesting - public static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, PartitionKey key, long timestampMicros, ByteBuffer bytes) + public static PartitionUpdate getCommandsForKeyPartitionUpdate(int storeId, TokenKey key, long timestampMicros, ByteBuffer bytes) { return singleRowUpdate(CommandsForKeysAccessor.table, - makeKey(CommandsForKeysAccessor, storeId, key), + makeKeySeparateTable(CommandsForKeysAccessor, storeId, key), singleCellRow(Clustering.EMPTY, BufferCell.live(CommandsForKeysAccessor.data, timestampMicros, bytes))); } public static Mutation getCommandsForKeyMutation(int storeId, CommandsForKey update, long timestampMicros) { - return new Mutation(getCommandsForKeyPartitionUpdate(storeId, (PartitionKey) update.key(), update, timestampMicros)); + return new Mutation(getCommandsForKeyPartitionUpdate(storeId, (TokenKey)update.key(), update, timestampMicros)); } private static ByteBuffer cellValue(Cell cell) @@ -1240,22 +1220,22 @@ private static ByteBuffer clusteringValue(Clustering clustering, int idx) return clustering.accessor().toBuffer(clustering.get(idx)); } - private static SinglePartitionReadCommand getCommandsForKeyRead(CommandsForKeyAccessor accessor, int storeId, PartitionKey key, long nowInSeconds) + private static SinglePartitionReadCommand getCommandsForKeyRead(CommandsForKeyAccessor accessor, int storeId, TokenKey key, long nowInSeconds) { return SinglePartitionReadCommand.create(accessor.table, nowInSeconds, accessor.allColumns, RowFilter.none(), DataLimits.NONE, - makeKey(accessor, storeId, key), + makeKeySeparateTable(accessor, storeId, key), FULL_PARTITION); } - public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, PartitionKey key, int nowInSeconds) + public static SinglePartitionReadCommand getCommandsForKeyRead(int storeId, TokenKey key, int nowInSeconds) { return getCommandsForKeyRead(CommandsForKeysAccessor, storeId, key, nowInSeconds); } - static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, AccordCommandStore commandStore, PartitionKey key) + static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, AccordCommandStore commandStore, TokenKey key) { long timestampMicros = TimeUnit.MILLISECONDS.toMicros(Global.currentTimeMillis()); int nowInSeconds = (int) TimeUnit.MICROSECONDS.toSeconds(timestampMicros); @@ -1283,12 +1263,12 @@ static CommandsForKey unsafeLoadCommandsForKey(CommandsForKeyAccessor accessor, } } - public static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + public static CommandsForKey unsafeLoadCommandsForKey(AccordCommandStore commandStore, TokenKey key) { return unsafeLoadCommandsForKey(CommandsForKeysAccessor, commandStore, key); } - public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, PartitionKey key) + public static CommandsForKey loadCommandsForKey(AccordCommandStore commandStore, TokenKey key) { commandStore.checkNotInStoreThread(); return unsafeLoadCommandsForKey(CommandsForKeysAccessor, commandStore, key); @@ -1557,104 +1537,6 @@ public static EpochDiskState loadTopologies(TopologyLoadConsumer consumer) } } - private static IMutation getCommandStoreMetadataMutation(String cql, ByteBuffer... values) - { - ClientState clientState = ClientState.forInternalCalls(); - ModificationStatement statement = (ModificationStatement) QueryProcessor.parseStatement(cql).prepare(ClientState.forInternalCalls()); - QueryOptions options = QueryOptions.forInternalCalls(Arrays.asList(values)); - - long tsMicros = TimeUnit.MILLISECONDS.toMicros(Global.currentTimeMillis()); - - while (true) - { - long prev = commandStoreMetadataTimestamp.get(); - if (prev >= tsMicros) - tsMicros = prev + 1; - - if (commandStoreMetadataTimestamp.compareAndSet(prev, tsMicros)) - break; - } - - return Iterables.getOnlyElement(statement.getMutations(clientState, options, true, tsMicros, (int) TimeUnit.MICROSECONDS.toSeconds(tsMicros), Dispatcher.RequestTime.forImmediateExecution(), false)); - } - - - private static Future updateCommandStoreMetadata(CommandStore commandStore, String column, T value, LocalVersionedSerializer serializer) - { - String cql = format("UPDATE %s.%s SET %s=? WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA, column); - try - { - IMutation mutation = getCommandStoreMetadataMutation(cql, serialize(value, serializer), bytes(commandStore.id())); - return Stage.MUTATION.submit(mutation::apply); - } - catch (IOException e) - { - throw new UncheckedIOException(e); - } - } - - public static Future updateRejectBefore(CommandStore commandStore, ReducingRangeMap rejectBefore) - { - return updateCommandStoreMetadata(commandStore, "reject_before", rejectBefore, LocalVersionedSerializers.rejectBefore); - } - - public static Future updateDurableBefore(CommandStore commandStore, DurableBefore durableBefore) - { - return updateCommandStoreMetadata(commandStore, "durable_before", durableBefore, LocalVersionedSerializers.durableBefore); - } - - public static Future updateRedundantBefore(CommandStore commandStore, RedundantBefore redundantBefore) - { - return updateCommandStoreMetadata(commandStore, "redundant_before", redundantBefore, LocalVersionedSerializers.redundantBefore); - } - - public static Future updateBootstrapBeganAt(CommandStore commandStore, NavigableMap bootstrapBeganAt) - { - return updateCommandStoreMetadata(commandStore, "bootstrap_began_at", bootstrapBeganAt, LocalVersionedSerializers.bootstrapBeganAt); - } - - public static Future updateSafeToRead(CommandStore commandStore, NavigableMap safeToRead) - { - return updateCommandStoreMetadata(commandStore, "safe_to_read", safeToRead, LocalVersionedSerializers.safeToRead); - } - - public interface CommandStoreMetadataConsumer - { - void accept(ReducingRangeMap rejectBefore, DurableBefore durableBefore, RedundantBefore redundantBefore, NavigableMap bootstrapBeganAt, NavigableMap safeToRead); - } - - public static void loadCommandStoreMetadata(int id, CommandStoreMetadataConsumer consumer) - { - UntypedResultSet result = executeOnceInternal(format("SELECT * FROM %s.%s WHERE store_id=?", ACCORD_KEYSPACE_NAME, COMMAND_STORE_METADATA), id); - ReducingRangeMap rejectBefore = null; - DurableBefore durableBefore = null; - RedundantBefore redundantBefore = null; - NavigableMap bootstrapBeganAt = null; - NavigableMap safeToRead = null; - if (!result.isEmpty()) - { - UntypedResultSet.Row row = Iterables.getOnlyElement(result); - try - { - if (row.has("reject_before")) - rejectBefore = deserialize(row.getBlob("reject_before"), LocalVersionedSerializers.rejectBefore); - if (row.has("durable_before")) - durableBefore = deserialize(row.getBlob("durable_before"), LocalVersionedSerializers.durableBefore); - if (row.has("redundant_before")) - redundantBefore = deserialize(row.getBlob("redundant_before"), LocalVersionedSerializers.redundantBefore); - if (row.has("bootstrap_began_at")) - bootstrapBeganAt = deserialize(row.getBlob("bootstrap_began_at"), LocalVersionedSerializers.bootstrapBeganAt); - if (row.has("safe_to_read")) - safeToRead = deserialize(row.getBlob("safe_to_read"), LocalVersionedSerializers.safeToRead); - } - catch (IOException e) - { - throw new UncheckedIOException(e); - } - } - consumer.accept(rejectBefore, durableBefore, redundantBefore, bootstrapBeganAt, safeToRead); - } - @VisibleForTesting public static void unsafeSetSchema(SchemaProvider provider) { diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index 53fd58155182..ff86a41ed066 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -24,6 +24,7 @@ import accord.api.Key; import accord.api.Result; import accord.api.RoutingKey; +import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; import accord.local.cfk.CommandsForKey.TxnInfo; import accord.impl.TimestampsForKey; @@ -32,8 +33,8 @@ import accord.local.cfk.CommandsForKey.TxnInfoExtra; import accord.local.CommonAttributes; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Ballot; @@ -69,6 +70,7 @@ import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.utils.ObjectSizes; +import static accord.local.cfk.CommandsForKey.InternalStatus.ACCEPTED; import static accord.primitives.TxnId.NO_TXNIDS; import static org.apache.cassandra.utils.ObjectSizes.measure; @@ -235,7 +237,7 @@ public static long dependencies(Deps dependencies) // doesn't account for txnIdToKeys, txnIdToRanges, and searchable fields; // fix to accunt for, in case caching isn't redone long size = EMPTY_DEPS_SIZE - EMPTY_KEYS_SIZE - ObjectSizes.sizeOfReferenceArray(0); - size += keys(dependencies.keyDeps.keys()); + size += routingKeys(dependencies.keyDeps.keys()); for (int i = 0 ; i < dependencies.rangeDeps.rangeCount() ; ++i) size += range(dependencies.rangeDeps.range(i)); size += ObjectSizes.sizeOfReferenceArray(dependencies.rangeDeps.rangeCount()); @@ -273,7 +275,9 @@ private static class CommandEmptySizes private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) { - CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(EMPTY_TXNID).route(new FullKeyRoute(EMPTY_KEY, new RoutingKey[]{ EMPTY_KEY })); + FullKeyRoute route = new FullKeyRoute(EMPTY_KEY, new RoutingKey[]{ EMPTY_KEY }); + CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(EMPTY_TXNID) + .setParticipants(StoreParticipants.empty(EMPTY_TXNID, route)); attrs.durability(Status.Durability.NotDurable); if (hasDeps) attrs.partialDeps(PartialDeps.NONE); @@ -284,14 +288,14 @@ private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) return attrs; } - private static final Writes EMPTY_WRITES = new Writes(EMPTY_TXNID, EMPTY_TXNID, Keys.EMPTY, (key, safeStore, executeAt, store, txn) -> null); + private static final Writes EMPTY_WRITES = new Writes(EMPTY_TXNID, EMPTY_TXNID, Keys.EMPTY, (key, safeStore, txnId, executeAt, store, txn) -> null); private static final Result EMPTY_RESULT = new Result() {}; final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(attrs(false, false), Ballot.ZERO)); final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, null));; final static long ACCEPTED = measure(Command.SerializerSupport.accepted(attrs(true, false), SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, null)); - final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.empty(EMPTY_TXNID.domain()), EMPTY_WRITES, EMPTY_RESULT)); + final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.empty(Domain.Key), EMPTY_WRITES, EMPTY_RESULT)); final static long TRUNCATED = measure(Command.SerializerSupport.truncatedApply(attrs(false, false), SaveStatus.TruncatedApply, EMPTY_TXNID, null, null)); final static long INVALIDATED = measure(Command.SerializerSupport.invalidated(EMPTY_TXNID)); @@ -353,7 +357,7 @@ public static long command(Command command) return size; } - private static long EMPTY_TFK_SIZE = measure(TimestampsForKey.SerializerSupport.create(null, null, 0, null)); + private static long EMPTY_TFK_SIZE = measure(TimestampsForKey.SerializerSupport.create(null, null, 0, null, null)); public static long timestampsForKey(TimestampsForKey timestamps) { @@ -364,8 +368,8 @@ public static long timestampsForKey(TimestampsForKey timestamps) } private static long EMPTY_CFK_SIZE = measure(new CommandsForKey(null)); - private static long EMPTY_INFO_SIZE = measure(TxnInfo.createMock(TxnId.NONE, null, null, NO_TXNIDS, Ballot.ZERO)); - private static long EMPTY_INFO_EXTRA_ADDITIONAL_SIZE = EMPTY_INFO_SIZE - measure(TxnInfo.createMock(TxnId.NONE, null, null, null, null)); + private static long EMPTY_INFO_SIZE = measure(CommandsForKey.NO_INFO); + private static long EMPTY_INFO_EXTRA_ADDITIONAL_SIZE = measure(TxnInfo.create(TxnId.NONE, ACCEPTED, false, TxnId.NONE, NO_TXNIDS, Ballot.MAX)) - EMPTY_INFO_SIZE; public static long commandsForKey(CommandsForKey cfk) { long size = EMPTY_CFK_SIZE; diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java index 43bc8d54b90b..0221e9f39443 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommand.java @@ -125,7 +125,7 @@ public Command original() return original; } - public SavedCommand.Writer diff() + public SavedCommand.DiffWriter diff() { return SavedCommand.diff(original, current); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 8dabbc7bb3f9..fae5e4634f5d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -30,36 +30,40 @@ import accord.api.DataStore; import accord.api.Key; import accord.api.ProgressLog; +import accord.api.RoutingKey; import accord.impl.AbstractSafeCommandStore; -import accord.local.cfk.CommandsForKey; import accord.impl.CommandsSummary; +import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; +import accord.local.DurableBefore; import accord.local.NodeTimeService; import accord.local.PreLoadContext; +import accord.local.RedundantBefore; +import accord.local.cfk.CommandsForKey; import accord.primitives.AbstractKeys; import accord.primitives.AbstractRanges; import accord.primitives.Deps; -import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routables; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; public class AccordSafeCommandStore extends AbstractSafeCommandStore { private final Map commands; - private final NavigableMap commandsForKeys; - private final NavigableMap timestampsForKeys; + private final NavigableMap commandsForKeys; + private final NavigableMap timestampsForKeys; private final @Nullable AccordSafeCommandsForRanges commandsForRanges; private final AccordCommandStore commandStore; - private final RangesForEpoch ranges; + private RangesForEpoch ranges; + private FieldUpdates fieldUpdates; private AccordSafeCommandStore(PreLoadContext context, Map commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, + NavigableMap timestampsForKey, + NavigableMap commandsForKey, @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { @@ -69,13 +73,15 @@ private AccordSafeCommandStore(PreLoadContext context, this.commandsForKeys = commandsForKey; this.commandsForRanges = commandsForRanges; this.commandStore = commandStore; - this.ranges = commandStore.updateRangesForEpoch(); + commandStore.updateRangesForEpoch(this); + if (this.ranges == null) + this.ranges = commandStore.unsafeRangesForEpoch(); } public static AccordSafeCommandStore create(PreLoadContext preLoadContext, Map commands, - NavigableMap timestampsForKey, - NavigableMap commandsForKey, + NavigableMap timestampsForKey, + NavigableMap commandsForKey, @Nullable AccordSafeCommandsForRanges commandsForRanges, AccordCommandStore commandStore) { @@ -83,7 +89,7 @@ public static AccordSafeCommandStore create(PreLoadContext preLoadContext, } @VisibleForTesting - public Set commandsForKeysKeys() + public Set commandsForKeysKeys() { return commandsForKeys.keySet(); } @@ -109,7 +115,7 @@ protected AccordSafeCommand getIfLoaded(TxnId txnId) } @Override - protected AccordSafeCommandsForKey getCommandsForKeyInternal(Key key) + protected AccordSafeCommandsForKey getCommandsForKeyInternal(RoutingKey key) { return commandsForKeys.get(key); } @@ -121,7 +127,7 @@ protected void addCommandsForKeyInternal(AccordSafeCommandsForKey cfk) } @Override - protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(Key key) + protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(RoutingKey key) { AccordSafeCommandsForKey cfk = commandStore.commandsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); @@ -129,7 +135,7 @@ protected AccordSafeCommandsForKey getCommandsForKeyIfLoaded(Key key) } @Override - protected AccordSafeTimestampsForKey getTimestampsForKeyInternal(Key key) + protected AccordSafeTimestampsForKey getTimestampsForKeyInternal(RoutingKey key) { return timestampsForKeys.get(key); } @@ -141,7 +147,7 @@ protected void addTimestampsForKeyInternal(AccordSafeTimestampsForKey cfk) } @Override - protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(Key key) + protected AccordSafeTimestampsForKey getTimestampsForKeyIfLoaded(RoutingKey key) { AccordSafeTimestampsForKey cfk = commandStore.timestampsForKeyCache().acquireIfLoaded(key); if (cfk != null) cfk.preExecute(); @@ -182,68 +188,32 @@ public NodeTimeService time() @Override public RangesForEpoch ranges() { - return commandStore().unsafeRangesForEpoch(); + return ranges; } - @Override - public void registerHistoricalTransactions(Deps deps) - { - if (deps.isEmpty()) return; - // used in places such as accord.local.CommandStore.fetchMajorityDeps - // We find a set of dependencies for a range then update CommandsFor to know about them - Ranges allRanges = ranges.all(); - deps.keyDeps.keys().forEach(allRanges, key -> { - // TODO (now): batch register to minimise GC - deps.keyDeps.forEach(key, (txnId, txnIdx) -> { - // TODO (desired, efficiency): this can be made more efficient by batching by epoch - if (ranges.coordinates(txnId).contains(key)) - return; // already coordinates, no need to replicate - if (!ranges.allBefore(txnId.epoch()).contains(key)) - return; - - get(key).registerHistorical(this, txnId); - }); - }); - for (int i = 0; i < deps.rangeDeps.rangeCount(); i++) - { - Range range = deps.rangeDeps.range(i); - if (!allRanges.intersects(range)) - continue; - deps.rangeDeps.forEach(range, txnId -> { - // TODO (desired, efficiency): this can be made more efficient by batching by epoch - if (ranges.coordinates(txnId).intersects(range)) - return; // already coordinates, no need to replicate - if (!ranges.allBefore(txnId.epoch()).intersects(range)) - return; - - commandStore.diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(range).slice(allRanges), Ranges::with); - }); - } - } - - private O mapReduce(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) + private O mapReduce(Routables keysOrRanges, BiFunction map, O accumulate) { - accumulate = mapReduceForRange(keysOrRanges, slice, map, accumulate); - return mapReduceForKey(keysOrRanges, slice, map, accumulate); + accumulate = mapReduceForRange(keysOrRanges, map, accumulate); + return mapReduceForKey(keysOrRanges, map, accumulate); } - private O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) + private O mapReduceForRange(Routables keysOrRanges, BiFunction map, O accumulate) { if (commandsForRanges == null) return accumulate; - CommandsForRanges cfr = commandsForRanges.current().slice(slice); + CommandsForRanges cfr = commandsForRanges.current(); switch (keysOrRanges.domain()) { case Key: { - AbstractKeys keys = (AbstractKeys) keysOrRanges.slice(slice, Routables.Slice.Minimal); + AbstractKeys keys = (AbstractKeys) keysOrRanges; if (!cfr.ranges.intersects(keys)) return accumulate; } break; case Range: { - AbstractRanges ranges = (AbstractRanges) keysOrRanges.slice(slice, Routables.Slice.Minimal); + AbstractRanges ranges = (AbstractRanges) keysOrRanges; if (!cfr.ranges.intersects(ranges)) return accumulate; } @@ -254,7 +224,7 @@ private O mapReduceForRange(Routables keysOrRanges, Ranges slice, BiFunct return map.apply(cfr, accumulate); } - private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunction map, O accumulate) + private O mapReduceForKey(Routables keysOrRanges, BiFunction map, O accumulate) { switch (keysOrRanges.domain()) { @@ -263,10 +233,9 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio case Key: { // TODO: efficiency - AbstractKeys keys = (AbstractKeys) keysOrRanges; - for (Key key : keys) + AbstractKeys keys = (AbstractKeys) keysOrRanges; + for (RoutingKey key : keys) { - if (!slice.contains(key)) continue; CommandsForKey commands = get(key).current(); accumulate = map.apply(commands, accumulate); } @@ -276,13 +245,11 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio { // Assuming the range provided is in the PreLoadContext, then AsyncLoader has populated commandsForKeys with keys that // are contained within the ranges... so walk all keys found in commandsForKeys - Routables sliced = keysOrRanges.slice(slice, Routables.Slice.Minimal); - if (!context.keys().slice(slice, Routables.Slice.Minimal).containsAll(sliced)) + if (!context.keys().containsAll(keysOrRanges)) throw new AssertionError("Range(s) detected not present in the PreLoadContext: expected " + context.keys() + " but given " + keysOrRanges); - for (Key key : commandsForKeys.keySet()) + for (RoutingKey key : commandsForKeys.keySet()) { //TODO (duplicate code): this is a repeat of Key... only change is checking contains in range - if (!sliced.contains(key)) continue; CommandsForKey commands = get(key).current(); accumulate = map.apply(commands, accumulate); } @@ -293,17 +260,17 @@ private O mapReduceForKey(Routables keysOrRanges, Ranges slice, BiFunctio } @Override - public T mapReduceActive(Seekables keysOrRanges, Ranges slice, @Nullable Timestamp withLowerTxnId, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) + public T mapReduceActive(Unseekables keysOrRanges, @Nullable Timestamp withLowerTxnId, Txn.Kind.Kinds testKind, CommandFunction map, P1 p1, T accumulate) { - return mapReduce(keysOrRanges, slice, (summary, in) -> { + return mapReduce(keysOrRanges, (summary, in) -> { return summary.mapReduceActive(withLowerTxnId, testKind, map, p1, in); }, accumulate); } @Override - public T mapReduceFull(Seekables keysOrRanges, Ranges slice, TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) + public T mapReduceFull(Unseekables keysOrRanges, TxnId testTxnId, Txn.Kind.Kinds testKind, TestStartedAt testStartedAt, TestDep testDep, TestStatus testStatus, CommandFunction map, P1 p1, T accumulate) { - return mapReduce(keysOrRanges, slice, (summary, in) -> { + return mapReduce(keysOrRanges, (summary, in) -> { return summary.mapReduceFull(testTxnId, testKind, testStartedAt, testDep, testStatus, map, p1, in); }, accumulate); } @@ -313,4 +280,70 @@ public String toString() { return "AccordSafeCommandStore(id=" + commandStore().id() + ")"; } -} + + @Override + public void upsertRedundantBefore(RedundantBefore addRedundantBefore) + { + // TODO (now): this is a temporary measure, see comment on AccordJournalValueSerializers; upsert instead + // when modifying, only modify together with AccordJournalValueSerializers + ensureFieldUpdates().redundantBefore = RedundantBefore.merge(commandStore.redundantBefore(), addRedundantBefore); + super.upsertRedundantBefore(addRedundantBefore); + } + + @Override + public void setBootstrapBeganAt(NavigableMap newBootstrapBeganAt) + { + ensureFieldUpdates().bootstrapBeganAt = newBootstrapBeganAt; + super.setBootstrapBeganAt(newBootstrapBeganAt); + } + + @Override + public void upsertDurableBefore(DurableBefore addDurableBefore) + { + ensureFieldUpdates().durableBefore = addDurableBefore; + super.upsertDurableBefore(addDurableBefore); + } + + @Override + public void setSafeToRead(NavigableMap newSafeToRead) + { + ensureFieldUpdates().safeToRead = newSafeToRead; + super.setSafeToRead(newSafeToRead); + } + + @Override + public void setRangesForEpoch(CommandStores.RangesForEpoch rangesForEpoch) + { + ensureFieldUpdates().rangesForEpoch = rangesForEpoch.snapshot(); + super.setRangesForEpoch(rangesForEpoch); + ranges = rangesForEpoch; + } + + @Override + protected void registerHistoricalTransactions(Deps deps) + { + ensureFieldUpdates().historicalTransactions = deps; + super.registerHistoricalTransactions(deps); + } + + private FieldUpdates ensureFieldUpdates() + { + if (fieldUpdates == null) fieldUpdates = new FieldUpdates(); + return fieldUpdates; + } + + public FieldUpdates fieldUpdates() + { + return fieldUpdates; + } + + public static class FieldUpdates + { + public RedundantBefore redundantBefore; + public DurableBefore durableBefore; + public NavigableMap bootstrapBeganAt; + public NavigableMap safeToRead; + public RangesForEpoch.Snapshot rangesForEpoch; + public Deps historicalTransactions; + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java index 6f5e8f72d5e5..587444252818 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandsForKey.java @@ -22,18 +22,18 @@ import com.google.common.annotations.VisibleForTesting; -import accord.api.Key; +import accord.api.RoutingKey; import accord.local.cfk.CommandsForKey; import accord.local.cfk.SafeCommandsForKey; -public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState +public class AccordSafeCommandsForKey extends SafeCommandsForKey implements AccordSafeState { private boolean invalidated; - private final AccordCachingState global; + private final AccordCachingState global; private CommandsForKey original; private CommandsForKey current; - public AccordSafeCommandsForKey(AccordCachingState global) + public AccordSafeCommandsForKey(AccordCachingState global) { super(global.key()); this.global = global; @@ -82,7 +82,7 @@ public boolean hasUpdate() } @Override - public AccordCachingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java index 89baee84b950..77ad56c3fd16 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeTimestampsForKey.java @@ -23,21 +23,21 @@ import com.google.common.annotations.VisibleForTesting; -import accord.api.Key; +import accord.api.RoutingKey; import accord.impl.SafeTimestampsForKey; import accord.impl.TimestampsForKey; import accord.primitives.Timestamp; -public class AccordSafeTimestampsForKey extends SafeTimestampsForKey implements AccordSafeState +public class AccordSafeTimestampsForKey extends SafeTimestampsForKey implements AccordSafeState { private boolean invalidated; - private final AccordCachingState global; + private final AccordCachingState global; private TimestampsForKey original; private TimestampsForKey current; - public AccordSafeTimestampsForKey(AccordCachingState global) + public AccordSafeTimestampsForKey(AccordCachingState global) { - super((Key) global.key()); + super(global.key()); this.global = global; this.original = null; this.current = null; @@ -70,7 +70,7 @@ public String toString() } @Override - public AccordCachingState global() + public AccordCachingState global() { checkNotInvalidated(); return global; diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java index e3f10cb64444..1dc8389c2083 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -17,11 +17,11 @@ */ package org.apache.cassandra.service.accord; +import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.PriorityQueue; -import com.google.common.base.Throwables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,31 +30,43 @@ import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.db.SerializationHeader; import org.apache.cassandra.db.partitions.PartitionUpdate; +import org.apache.cassandra.db.partitions.PartitionUpdate.SimpleBuilder; import org.apache.cassandra.db.rows.EncodingStats; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTableTxnWriter; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.journal.SegmentCompactor; import org.apache.cassandra.journal.StaticSegment; import org.apache.cassandra.journal.StaticSegment.KeyOrderReader; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.FlyweightSerializer; /** * Segment compactor: takes static segments and compacts them into a single SSTable. */ -public class AccordSegmentCompactor implements SegmentCompactor +public class AccordSegmentCompactor implements SegmentCompactor { private static final Logger logger = LoggerFactory.getLogger(AccordSegmentCompactor.class); + private final int userVersion; + private final KeySupport keySupport; + + public AccordSegmentCompactor(KeySupport keySupport, int userVersion) + { + this.userVersion = userVersion; + this.keySupport = keySupport; + } @Override - public Collection> compact(Collection> segments, KeySupport keySupport) + public Collection> compact(Collection> segments) { Invariants.checkState(segments.size() >= 2, () -> String.format("Can only compact 2 or more segments, but got %d", segments.size())); logger.info("Compacting {} static segments: {}", segments.size(), segments); - PriorityQueue> readers = new PriorityQueue<>(); - for (StaticSegment segment : segments) + PriorityQueue> readers = new PriorityQueue<>(); + for (StaticSegment segment : segments) { - KeyOrderReader reader = segment.keyOrderReader(); + KeyOrderReader reader = segment.keyOrderReader(); if (reader.advance()) readers.add(reader); } @@ -62,7 +74,7 @@ public Collection> compact(Collection> s // nothing to compact (all segments empty, should never happen, but it is theoretically possible?) - exit early // TODO: investigate how this comes to be, check if there is a cleanup issue if (readers.isEmpty()) - return segments; + return null; ColumnFamilyStore cfs = Keyspace.open(AccordKeyspace.metadata().name).getColumnFamilyStore(AccordKeyspace.JOURNAL); Descriptor descriptor = cfs.newSSTableDescriptor(cfs.getDirectories().getDirectoryForNewSSTables()); @@ -70,50 +82,67 @@ public Collection> compact(Collection> s try (SSTableTxnWriter writer = SSTableTxnWriter.create(cfs, descriptor, 0, 0, null, false, header)) { - K key = null; - PartitionUpdate.SimpleBuilder partitionBuilder = null; - + JournalKey key = null; + Object builder = null; + FlyweightSerializer serializer = null; + long lastDescriptor = -1; + int lastOffset = -1; try { - KeyOrderReader reader; + KeyOrderReader reader; while ((reader = readers.poll()) != null) { - if (!reader.key().equals(key)) // first ever - or new - key + if (key == null || !reader.key().equals(key)) { - if (partitionBuilder != null) // append previous partition if any - writer.append(partitionBuilder.build().unfilteredIterator()); + maybeWritePartition(cfs, writer, key, builder, serializer, lastDescriptor, lastOffset); key = reader.key(); - partitionBuilder = PartitionUpdate.simpleBuilder( - AccordKeyspace.Journal, AccordJournalTable.makePartitionKey(cfs, key, keySupport, reader.descriptor.userVersion) - ); + serializer = (FlyweightSerializer) key.type.serializer; + builder = serializer.mergerFor(key); } boolean advanced; do { - partitionBuilder.row(reader.descriptor.timestamp, reader.offset()) - .add("record", reader.record()) - .add("user_version", reader.descriptor.userVersion); + try (DataInputBuffer in = new DataInputBuffer(reader.record(), false)) + { + serializer.deserialize(key, builder, in, reader.descriptor.userVersion); + lastDescriptor = reader.descriptor.timestamp; + lastOffset = reader.offset(); + } } while ((advanced = reader.advance()) && reader.key().equals(key)); if (advanced) readers.offer(reader); // there is more to this reader, but not with this key } - //noinspection DataFlowIssue - writer.append(partitionBuilder.build().unfilteredIterator()); // append the last partition + maybeWritePartition(cfs, writer, key, builder, serializer, lastDescriptor, lastOffset); } catch (Throwable t) { Throwable accumulate = writer.abort(t); - Throwables.throwIfUnchecked(accumulate); - throw new RuntimeException(accumulate); + throw new RuntimeException(String.format("Caught exception while serializing. Last seen key: %s", key), accumulate); } cfs.addSSTables(writer.finish(true)); return Collections.emptyList(); } } + + private void maybeWritePartition(ColumnFamilyStore cfs, SSTableTxnWriter writer, JournalKey key, Object builder, FlyweightSerializer serializer, long descriptor, int offset) throws IOException + { + if (builder != null) + { + SimpleBuilder partitionBuilder = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, AccordJournalTable.makePartitionKey(cfs, key, keySupport, userVersion)); + try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) + { + serializer.reserialize(key, builder, out, userVersion); + partitionBuilder.row(descriptor, offset) + .add("record", out.asNewBuffer()) + .add("user_version", userVersion); + } + writer.append(partitionBuilder.build().unfilteredIterator()); + } + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index a90ca24351e3..b2f22b2ad140 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -78,6 +78,8 @@ import accord.api.BarrierType; import accord.api.LocalConfig; import accord.api.Result; +import accord.coordinate.Barrier.AsyncSyncPoint; +import accord.coordinate.CoordinationAdapter.Adapters.SyncPointAdapter; import accord.coordinate.CoordinationFailed; import accord.coordinate.Preempted; import accord.coordinate.Timeout; @@ -93,15 +95,14 @@ import accord.impl.progresslog.DefaultProgressLogs; import accord.local.CommandStore; import accord.local.CommandStores; +import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; import accord.local.KeyHistory; import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeTimeService; import accord.local.RedundantBefore; -import accord.local.SaveStatus; import accord.local.ShardDistributor.EvenSplit; -import accord.local.Status; import accord.local.cfk.CommandsForKey; import accord.messages.Callback; import accord.messages.ReadData; @@ -110,6 +111,10 @@ import accord.primitives.Keys; import accord.primitives.Seekable; import accord.primitives.Seekables; +import accord.primitives.FullRoute; +import accord.primitives.RoutingKeys; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; @@ -142,6 +147,7 @@ import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.AccordScheduler; import org.apache.cassandra.service.accord.api.AccordTopologySorter; import org.apache.cassandra.service.accord.api.CompositeTopologySorter; @@ -174,6 +180,7 @@ import static org.apache.cassandra.config.DatabaseDescriptor.getPartitioner; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordReadMetrics; import static org.apache.cassandra.metrics.ClientRequestsMetricsHolder.accordWriteMetrics; +import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.maybeSaveAccordKeyMigrationLocally; import static org.apache.cassandra.utils.Clock.Global.nanoTime; public class AccordService implements IAccordService, Shutdownable @@ -207,19 +214,19 @@ public IVerbHandler verbHandler() } @Override - public Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException + public Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException { throw new UnsupportedOperationException(); } @Override - public Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + public Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) { throw new UnsupportedOperationException("No accord barriers should be executed when accord.enabled = false in cassandra.yaml"); } @Override - public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) { throw new UnsupportedOperationException("No accord repairs should be executed when accord.enabled = false in cassandra.yaml"); } @@ -357,6 +364,8 @@ public synchronized static void startup(NodeId tcmId) as.configurationService().notifyPostCommit(current, current, false); } instance = as; + + as.journal().replay(); } public static void shutdownServiceAndWait(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException @@ -393,7 +402,7 @@ private AccordService(Id localId) this.scheduler = new AccordScheduler(); this.dataStore = new AccordDataStore(); this.configuration = new AccordConfiguration(DatabaseDescriptor.getRawConfig()); - this.journal = new AccordJournal(configService, DatabaseDescriptor.getAccord().journal); + this.journal = new AccordJournal(DatabaseDescriptor.getAccord().journal); this.node = new Node(localId, messageSink, configService, @@ -415,7 +424,7 @@ private AccordService(Id localId) configuration); this.nodeShutdown = toShutdownable(node); this.durabilityScheduling = new CoordinateDurabilityScheduling(node); - this.requestHandler = new AccordVerbHandler<>(node, configService, journal); + this.requestHandler = new AccordVerbHandler<>(node, configService); } @Override @@ -473,7 +482,7 @@ class Ref { List historic = Collections.emptyList();} durabilityScheduling.setShardCycleTime(Ints.checkedCast(DatabaseDescriptor.getAccordShardDurabilityCycle(SECONDS)), SECONDS); durabilityScheduling.setTxnIdLag(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityTxnIdLag(SECONDS)), TimeUnit.SECONDS); durabilityScheduling.setFrequency(Ints.checkedCast(DatabaseDescriptor.getAccordScheduleDurabilityFrequency(SECONDS)), SECONDS); -// durabilityScheduling.start(); + durabilityScheduling.start(); state = State.STARTED; } @@ -551,10 +560,10 @@ public IVerbHandler verbHandler() return requestHandler; } - private > Seekables barrier(@Nonnull S keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction>> syncPoint) + private Seekables barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, BiFunction, AsyncSyncPoint> syncPoint) { Stopwatch sw = Stopwatch.createStarted(); - keysOrRanges = (S) intersectionWithAccordManagedRanges(keysOrRanges); + keysOrRanges = intersectionWithAccordManagedRanges(keysOrRanges); // It's possible none of them were Accord managed and we aren't going to treat that as an error if (keysOrRanges.isEmpty()) { @@ -562,15 +571,22 @@ public IVerbHandler verbHandler() return keysOrRanges; } + FullRoute route = node.computeRoute(epoch, keysOrRanges); AccordClientRequestMetrics metrics = isForWrite ? accordWriteMetrics : accordReadMetrics; try { logger.debug("Starting barrier key: {} epoch: {} barrierType: {} isForWrite {}", keysOrRanges, epoch, barrierType, isForWrite); AsyncResult asyncResult = syncPoint == null - ? Barrier.barrier(node, keysOrRanges, epoch, barrierType) - : Barrier.barrier(node, keysOrRanges, epoch, barrierType, syncPoint); + ? Barrier.barrier(node, keysOrRanges, route, epoch, barrierType) + : Barrier.barrier(node, keysOrRanges, route, epoch, barrierType, syncPoint); + if (keysOrRanges.domain() == Key) + { + PartitionKey key = (PartitionKey)keysOrRanges.get(0); + asyncResult.accept(txnId -> maybeSaveAccordKeyMigrationLocally(key, Epoch.create(txnId.epoch()))); + } long deadlineNanos = requestTime.startedAtNanos() + timeoutNanos; - Timestamp barrierExecuteAt = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); + TxnId txnId = AsyncChains.getBlocking(asyncResult, deadlineNanos - nanoTime(), NANOSECONDS); + ((AccordAgent) node.agent()).onSuccessfulBarrier(txnId, keysOrRanges); logger.debug("Completed barrier attempt in {}ms, {}ms since attempts start, barrier key: {} epoch: {} barrierType: {} isForWrite {}", sw.elapsed(MILLISECONDS), NANOSECONDS.toMillis(nanoTime() - requestTime.startedAtNanos()), @@ -627,33 +643,37 @@ public IVerbHandler verbHandler() } @Override - public Seekables barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) + public Seekables barrier(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite) { return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, null); } - public static > BiFunction>> repairSyncPoint(Set allNodes) + public static BiFunction, AsyncSyncPoint> repairSyncPoint(Set allNodes) { - return (node, seekables) -> CoordinateSyncPoint.coordinate(node, Kind.SyncPoint, seekables, RepairSyncPointAdapter.create(allNodes)); + return (node, route) -> { + TxnId txnId = node.nextTxnId(Kind.SyncPoint, route.domain()); + AsyncResult> async = CoordinateSyncPoint.coordinate(node, Kind.SyncPoint, route, (SyncPointAdapter)RepairSyncPointAdapter.create(allNodes)); + return new AsyncSyncPoint(txnId, async); + }; } @Override - public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) + public Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints) { Set allNodes = allEndpoints.stream().map(configService::mappedId).collect(Collectors.toUnmodifiableSet()); return barrier(keysOrRanges, epoch, requestTime, timeoutNanos, barrierType, isForWrite, repairSyncPoint(allNodes)); } - private static > Seekables intersectionWithAccordManagedRanges(Seekables keysOrRanges) + private static Seekables intersectionWithAccordManagedRanges(Seekables keysOrRanges) { TableId tableId = null; - for (Seekable seekable : keysOrRanges) + for (Seekable keyOrRange : keysOrRanges) { TableId newTableId; if (keysOrRanges.domain() == Key) - newTableId = ((PartitionKey) seekable).table(); + newTableId = ((PartitionKey)keyOrRange).table(); else if (keysOrRanges.domain() == Range) - newTableId = ((TokenRange) seekable).table(); + newTableId = ((TokenRange) keyOrRange).table(); else throw new IllegalStateException("Unexpected domain " + keysOrRanges.domain()); @@ -780,7 +800,7 @@ public Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, Barri } @Override - public Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + public Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException { return doWithRetries(Blocking.Default.instance, () -> AccordService.instance().repair(keysOrRanges, minEpoch, Dispatcher.RequestTime.forImmediateExecution(), DatabaseDescriptor.getAccordRangeBarrierTimeoutNanos(), barrierType, isForWrite, allEndpoints), DatabaseDescriptor.getAccordBarrierRetryAttempts(), @@ -1030,9 +1050,9 @@ private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder sta return submit.flatMap(Function.identity()); } - private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, CommandStore commandStore, PartitionKey blockedBy, TxnId txnId, Timestamp executeAt) + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, CommandStore commandStore, TokenKey blockedBy, TxnId txnId, Timestamp executeAt) { - AsyncChain> submit = commandStore.submit(PreLoadContext.contextFor(txnId, Keys.of(blockedBy), KeyHistory.COMMANDS), in -> { + AsyncChain> submit = commandStore.submit(PreLoadContext.contextFor(txnId, RoutingKeys.of(blockedBy.toUnseekable()), KeyHistory.COMMANDS), in -> { AsyncChain chain = populate(state, (AccordSafeCommandStore) in, blockedBy, txnId, executeAt); return chain == null ? AsyncChains.success(null) : chain; }); @@ -1067,7 +1087,7 @@ private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder sta chains.add(populate(state, safeStore.commandStore(), blockedBy)); } } - for (PartitionKey blockedBy : cmdTxnState.blockedByKey) + for (TokenKey blockedBy : cmdTxnState.blockedByKey) { if (state.keys.containsKey(blockedBy)) continue; if (safeStore.getCommandsForKeyIfLoaded(blockedBy) != null) @@ -1087,7 +1107,7 @@ private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder sta return AsyncChains.all(chains).map(ignore -> null); } - private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, AccordSafeCommandStore safeStore, PartitionKey pk, TxnId txnId, Timestamp executeAt) + private static AsyncChain populate(CommandStoreTxnBlockedGraph.Builder state, AccordSafeCommandStore safeStore, TokenKey pk, TxnId txnId, Timestamp executeAt) { AccordSafeCommandsForKey commandsForKey = safeStore.getCommandsForKeyIfLoaded(pk); TxnId blocking = commandsForKey.current().blockedOnTxnId(txnId, executeAt); @@ -1115,7 +1135,7 @@ private static CommandStoreTxnBlockedGraph.TxnState populate(CommandStoreTxnBloc else { // blocked on key - cmdTxnState.blockedByKey.add((PartitionKey) waitingOn.keys.get(i - waitingOn.txnIdCount())); + cmdTxnState.blockedByKey.add((TokenKey) waitingOn.keys.get(i - waitingOn.txnIdCount())); } }); } @@ -1138,9 +1158,9 @@ public void tryMarkRemoved(Topology topology, Id target) tryMarkRemoved(ranges, 0).begin(node().agent()); } - private AsyncChain> tryMarkRemoved(Ranges ranges, int attempt) + private AsyncChain> tryMarkRemoved(Ranges ranges, int attempt) { - return CoordinateSyncPoint.exclusive(node, ranges) + return CoordinateSyncPoint.exclusiveSyncPoint(node, ranges) .recover(t -> //TODO (operability): make this configurable / monitorable? attempt <= 3 && t instanceof Invalidated || t instanceof Preempted || t instanceof Timeout ? tryMarkRemoved(ranges, attempt + 1) : null); @@ -1234,7 +1254,7 @@ public AccordConfigurationService configurationService() public CompactionInfo getCompactionInfo() { Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); - Int2ObjectHashMap ranges = new Int2ObjectHashMap<>(); + Int2ObjectHashMap ranges = new Int2ObjectHashMap<>(); AtomicReference durableBefore = new AtomicReference<>(DurableBefore.EMPTY); AsyncChains.getBlockingAndRethrow(node.commandStores().forEach(safeStore -> { synchronized (redundantBefores) @@ -1334,10 +1354,10 @@ private AsyncChain awaitTableDropSubRange(Ranges ranges, int attempt) .flatMap(s -> s == null ? AsyncChains.success(null) : Await.coordinate(node, s)); } - private AsyncChain> exclusiveSyncPoint(Ranges ranges, int attempt) + private AsyncChain> exclusiveSyncPoint(Ranges ranges, int attempt) { //TODO (on merge): CASSANDRA-19769 has the same logic... should this be refactored? Would make it nice so we could split the range on retries? - return CoordinateSyncPoint.exclusive(node, ranges) + return CoordinateSyncPoint.exclusiveSyncPoint(node, ranges) .recover(t -> { //TODO (operability): make this configurable / monitorable? if (attempt > 3) return null; @@ -1368,21 +1388,21 @@ private static RetryDecission shouldRetry(Throwable t) // TODO (duplication): this is 95% of accord.coordinate.CoordinateShardDurable // we already report all this information to EpochState; would be better to use that // Taken from ListStore... - private static class Await extends AsyncResults.SettableResult> implements Callback + private static class Await extends AsyncResults.SettableResult> implements Callback { private final Node node; private final AllTracker tracker; - private final SyncPoint exclusiveSyncPoint; + private final SyncPoint exclusiveSyncPoint; - private Await(Node node, SyncPoint exclusiveSyncPoint) + private Await(Node node, SyncPoint exclusiveSyncPoint) { - Topologies topologies = node.topology().forEpoch(exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.sourceEpoch()); + Topologies topologies = node.topology().forEpoch(exclusiveSyncPoint.route, exclusiveSyncPoint.sourceEpoch()); this.node = node; this.tracker = new AllTracker(topologies); this.exclusiveSyncPoint = exclusiveSyncPoint; } - public static AsyncChain coordinate(Node node, SyncPoint sp) + public static AsyncChain coordinate(Node node, SyncPoint sp) { return node.withEpoch(sp.sourceEpoch(), () -> { Await coordinate = new Await(node, sp); @@ -1402,7 +1422,7 @@ public static AsyncChain coordinate(Node node, SyncPoint sp) private void start() { - node.send(tracker.nodes(), to -> new WaitUntilApplied(to, tracker.topologies(), exclusiveSyncPoint.syncId, exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.syncId.epoch()), this); + node.send(tracker.nodes(), to -> new WaitUntilApplied(to, tracker.topologies(), exclusiveSyncPoint.syncId, exclusiveSyncPoint.route, exclusiveSyncPoint.syncId.epoch()), this); } @Override public void onSuccess(Node.Id from, ReadData.ReadReply reply) @@ -1423,7 +1443,7 @@ public void onSuccess(Node.Id from, ReadData.ReadReply reply) tryFailure(new ExecuteSyncPoint.SyncPointErased()); return; case Invalid: - tryFailure(new Invalidated(exclusiveSyncPoint.syncId, exclusiveSyncPoint.homeKey)); + tryFailure(new Invalidated(exclusiveSyncPoint.syncId, exclusiveSyncPoint.route.homeKey())); return; } } @@ -1431,7 +1451,7 @@ public void onSuccess(Node.Id from, ReadData.ReadReply reply) { if (tracker.recordSuccess(from) == RequestStatus.Success) { - node.configService().reportEpochRedundant(exclusiveSyncPoint.keysOrRanges, exclusiveSyncPoint.syncId.epoch()); + node.configService().reportEpochRedundant(exclusiveSyncPoint.route.toRanges(), exclusiveSyncPoint.syncId.epoch()); trySuccess(exclusiveSyncPoint); } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 42d4a4d2438e..04f618e33819 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -379,6 +379,25 @@ public S acquireIfExists(K key) return safeRefFactory.apply(acquireExisting(node, false)); } + + public void maybeLoad(K key, V initial) + { + AccordCachingState node = (AccordCachingState) cache.get(key); + if (node == null) + { + node = nodeFactory.create(key, index); + node.initialize(initial); + Object prev = cache.put(key, node); + Invariants.checkState(prev == null, "%s not absent from cache: %s already present", key, node); + if (listeners != null) + { + AccordCachingState finalNode = node; + listeners.forEach(l -> l.onAdd(finalNode)); + } + maybeUpdateSize(node, heapEstimator); + } + } + public S acquire(K key) { AccordCachingState node = acquire(key, false); diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index eb8f660b0af4..2489929c1672 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -23,12 +23,12 @@ import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; +import accord.local.Node.Id; import accord.primitives.Ranges; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import accord.local.Node; import accord.topology.Shard; import accord.topology.Topology; import accord.utils.Invariants; @@ -58,14 +58,14 @@ */ public class AccordTopology { - public static Node.Id tcmIdToAccord(NodeId nodeId) + public static Id tcmIdToAccord(NodeId nodeId) { - return new Node.Id(nodeId.id()); + return new Id(nodeId.id()); } private static class ShardLookup extends HashMap { - private Shard createOrReuse(boolean pendingRemoval, accord.primitives.Range range, SortedArrayList nodes, Set fastPathElectorate, Set joining) + private Shard createOrReuse(boolean pendingRemoval, accord.primitives.Range range, SortedArrayList nodes, Set fastPathElectorate, Set joining) { Shard prev = get(range); if (prev != null @@ -83,10 +83,10 @@ public static class KeyspaceShard { private final KeyspaceMetadata keyspace; private final Range range; - private final SortedArrayList nodes; - private final Set pending; + private final SortedArrayList nodes; + private final Set pending; - private KeyspaceShard(KeyspaceMetadata keyspace, Range range, SortedArrayList nodes, Set pending) + private KeyspaceShard(KeyspaceMetadata keyspace, Range range, SortedArrayList nodes, Set pending) { this.keyspace = keyspace; this.range = range; @@ -100,15 +100,15 @@ private FastPathStrategy strategyFor(TableMetadata metadata) FastPathStrategy tableStrategy = metadata.params.fastPath; FastPathStrategy strategy = tableStrategy.kind() != FastPathStrategy.Kind.INHERIT_KEYSPACE ? tableStrategy : keyspace.params.fastPath; - Invariants.checkState(strategy.kind() != FastPathStrategy.Kind.INHERIT_KEYSPACE);; + Invariants.checkState(strategy.kind() != FastPathStrategy.Kind.INHERIT_KEYSPACE); return strategy; } - Shard createForTable(TableMetadata metadata, Set unavailable, Map dcMap, ShardLookup lookup) + Shard createForTable(TableMetadata metadata, Set unavailable, Map dcMap, ShardLookup lookup) { TokenRange tokenRange = AccordTopology.range(metadata.id, range); - SortedArrayList fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); + SortedArrayList fastPath = strategyFor(metadata).calculateFastPath(nodes, unavailable, dcMap); return lookup.createOrReuse(metadata.params.pendingDrop, tokenRange, nodes, fastPath, pending); } @@ -124,14 +124,14 @@ private static KeyspaceShard forRange(KeyspaceMetadata keyspace, Range ra Sets.SetView readOnly = Sets.difference(readEndpoints, writeEndpoints); Invariants.checkState(readOnly.isEmpty(), "Read only replicas detected: %s", readOnly); - SortedArrayList nodes = new SortedArrayList<>(writes.endpoints().stream() - .map(directory::peerId) - .map(AccordTopology::tcmIdToAccord) - .sorted().toArray(Node.Id[]::new)); + SortedArrayList nodes = new SortedArrayList<>(writes.endpoints().stream() + .map(directory::peerId) + .map(AccordTopology::tcmIdToAccord) + .sorted().toArray(Id[]::new)); - Set pending = readEndpoints.equals(writeEndpoints) ? - Collections.emptySet() : - writeEndpoints.stream() + Set pending = readEndpoints.equals(writeEndpoints) ? + Collections.emptySet() : + writeEndpoints.stream() .filter(e -> !readEndpoints.contains(e)) .map(directory::peerId) .map(AccordTopology::tcmIdToAccord) @@ -156,7 +156,7 @@ public static List forKeyspace(KeyspaceMetadata keyspace, DataPla return shards; } - public List nodes() + public List nodes() { return nodes; } @@ -213,9 +213,9 @@ public static accord.primitives.Ranges toAccordRanges(String keyspace, Collectio return accordRanges; } - private static Map createDCMap(Directory directory) + private static Map createDCMap(Directory directory) { - ImmutableMap.Builder builder = ImmutableMap.builder(); + ImmutableMap.Builder builder = ImmutableMap.builder(); directory.knownDatacenters().forEach(dc -> { Set dcEndpoints = directory.datacenterEndpoints(dc); // nodes aren't added to the endpointsToDCMap until they've joined @@ -223,7 +223,7 @@ private static Map createDCMap(Directory directory) return; dcEndpoints.forEach(ep -> { NodeId tid = directory.peerId(ep); - Node.Id aid = tcmIdToAccord(tid); + Id aid = tcmIdToAccord(tid); builder.put(aid, dc); }); }); @@ -235,8 +235,8 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem AccordStaleReplicas staleReplicas) { List shards = new ArrayList<>(); - Set unavailable = accordFastPath.unavailableIds(); - Map dcMap = createDCMap(directory); + Set unavailable = accordFastPath.unavailableIds(); + Map dcMap = createDCMap(directory); for (KeyspaceMetadata keyspace : schema.getKeyspaces()) { @@ -249,7 +249,7 @@ public static Topology createAccordTopology(Epoch epoch, DistributedSchema schem shards.sort((a, b) -> a.range.compare(b.range)); - return new Topology(epoch.getEpoch(), staleReplicas.ids(), shards.toArray(new Shard[0])); + return new Topology(epoch.getEpoch(), SortedArrayList.copyUnsorted(staleReplicas.ids(), Id[]::new), shards.toArray(new Shard[0])); } public static Topology createAccordTopology(ClusterMetadata metadata, ShardLookup lookup) @@ -275,7 +275,7 @@ public static EndpointMapping directoryToMapping(EndpointMapping mapping, long e // There are cases where nodes are removed from the cluster (host replacement, decom, etc.), but inflight events may still be happening; // keep the ids around so pending events do not fail with a mapping error - for (Node.Id id : mapping.differenceIds(builder)) + for (Id id : mapping.differenceIds(builder)) builder.add(mapping.mappedEndpoint(id), id); return builder.build(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 5bf50308eacd..9bda1950e910 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -33,13 +33,11 @@ public class AccordVerbHandler implements IVerbHandler private final Node node; private final AccordEndpointMapper endpointMapper; - private final AccordJournal journal; - public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper, AccordJournal journal) + public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper) { this.node = node; this.endpointMapper = endpointMapper; - this.journal = journal; } @Override @@ -50,12 +48,6 @@ public void doVerb(Message message) throws IOException logger.trace("Receiving {} from {}", message.payload, message.from()); T request = message.payload; - if (request.type().hasSideEffects()) - { - journal.processRemoteRequest(request, message); - return; - } - /* * TODO (desired): messages without side-effects don't go through the journal, * and as such are retained on heap until the node catches up to waitForEpoch, diff --git a/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java b/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java index c7d0147add86..5fdc1c2d62e0 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java +++ b/src/java/org/apache/cassandra/service/accord/CommandStoreTxnBlockedGraph.java @@ -29,16 +29,16 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.primitives.Timestamp; import accord.primitives.TxnId; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; public class CommandStoreTxnBlockedGraph { public final int storeId; public final Map txns; - public final Map keys; + public final Map keys; public CommandStoreTxnBlockedGraph(Builder builder) { @@ -53,7 +53,7 @@ public static class TxnState public final Timestamp executeAt; public final SaveStatus saveStatus; public final List blockedBy; - public final Set blockedByKey; + public final Set blockedByKey; public TxnState(Builder.TxnBuilder builder) { @@ -79,7 +79,7 @@ public static class Builder { final int storeId; final Map txns = new LinkedHashMap<>(); - final Map keys = new LinkedHashMap<>(); + final Map keys = new LinkedHashMap<>(); public Builder(int storeId) { @@ -107,7 +107,7 @@ public class TxnBuilder final Timestamp executeAt; final SaveStatus saveStatus; List blockedBy = new ArrayList<>(); - Set blockedByKey = new LinkedHashSet<>(); + Set blockedByKey = new LinkedHashSet<>(); public TxnBuilder(TxnId txnId, Timestamp executeAt, SaveStatus saveStatus) { diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java index 4da915448c20..c664ff62c329 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRanges.java @@ -33,9 +33,9 @@ import accord.local.SafeCommandStore.TestDep; import accord.local.SafeCommandStore.TestStartedAt; import accord.local.SafeCommandStore.TestStatus; -import accord.local.SaveStatus; import accord.primitives.Range; import accord.primitives.Ranges; +import accord.primitives.SaveStatus; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -44,9 +44,9 @@ import static accord.local.SafeCommandStore.TestDep.WITH; import static accord.local.SafeCommandStore.TestStartedAt.STARTED_BEFORE; import static accord.local.SafeCommandStore.TestStatus.ANY_STATUS; -import static accord.local.Status.Stable; -import static accord.local.Status.Truncated; import static accord.primitives.Routables.Slice.Minimal; +import static accord.primitives.Status.Stable; +import static accord.primitives.Status.Truncated; public class CommandsForRanges implements CommandsSummary { diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index c6eb33cc6363..bdf1aa98886f 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -36,9 +36,9 @@ import accord.local.Command; import accord.local.DurableBefore; -import accord.local.SaveStatus; -import accord.local.Status; import accord.primitives.PartialDeps; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index a27a29f10496..c1c7651a80ff 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -29,7 +29,7 @@ import com.google.common.collect.ImmutableSet; import accord.api.BarrierType; -import accord.local.CommandStores; +import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; import accord.local.Node; import accord.local.Node.Id; @@ -69,16 +69,16 @@ public interface IAccordService IVerbHandler verbHandler(); - Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; + Seekables barrierWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite) throws InterruptedException; - Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); + Seekables barrier(@Nonnull Seekables keysOrRanges, long minEpoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite); - default Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException + default Seekables repairWithRetries(Seekables keysOrRanges, long minEpoch, BarrierType barrierType, boolean isForWrite, List allEndpoints) throws InterruptedException { throw new UnsupportedOperationException(); } - Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints); + Seekables repair(@Nonnull Seekables keysOrRanges, long epoch, Dispatcher.RequestTime requestTime, long timeoutNanos, BarrierType barrierType, boolean isForWrite, List allEndpoints); default void postStreamReceivingBarrier(ColumnFamilyStore cfs, List> ranges) { @@ -136,10 +136,10 @@ class CompactionInfo static final Supplier NO_OP = () -> new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); public final Int2ObjectHashMap redundantBefores; - public final Int2ObjectHashMap ranges; + public final Int2ObjectHashMap ranges; public final DurableBefore durableBefore; - public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2ObjectHashMap ranges, DurableBefore durableBefore) + public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2ObjectHashMap ranges, DurableBefore durableBefore) { this.redundantBefores = redundantBefores; this.ranges = ranges; diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index 8e44ad2aacf2..721d69c5a665 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -19,19 +19,31 @@ package org.apache.cassandra.service.accord; import java.util.List; +import java.util.NavigableMap; import accord.local.Command; +import accord.local.CommandStores; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.primitives.Deps; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; import accord.primitives.TxnId; public interface IJournal { Command loadCommand(int commandStoreId, TxnId txnId); - /** - * Append outcomes to the log. - */ - void appendCommand(int commandStoreId, - List> command, - List sanityCheck, - Runnable onFlush); + RedundantBefore loadRedundantBefore(int commandStoreId); + DurableBefore loadDurableBefore(int commandStoreId); + NavigableMap loadBootstrapBeganAt(int commandStoreId); + NavigableMap loadSafeToRead(int commandStoreId); + CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int commandStoreId); + List loadHistoricalTransactions(int store); + + void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush); + void persistStoreState(int store, + // TODO: this class should not live under ASCS + AccordSafeCommandStore.FieldUpdates fieldUpdates, + Runnable onFlush); } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index c31c33788213..97c2e2e0ba1b 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -23,31 +23,38 @@ import java.util.Objects; import java.util.zip.Checksum; -import accord.local.Node; +import accord.local.Node.Id; import accord.primitives.Timestamp; +import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.journal.KeySupport; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.BootstrapBeganAtSerializer; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.CommandDiffSerializer; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeSerializer; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.FlyweightSerializer; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsSerializer; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeSerializer; import org.apache.cassandra.utils.ByteArrayUtil; +import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RangesForEpochSerializer; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.SafeToReadSerializer; public final class JournalKey { + final Type type; public final Timestamp timestamp; - // TODO: command store id _before_ timestamp public final int commandStoreId; - JournalKey(Timestamp timestamp) + public JournalKey(Timestamp timestamp, Type type, int commandStoreId) { - this(timestamp, -1); - } - - JournalKey(Timestamp timestamp, int commandStoreId) - { - if (timestamp == null) throw new NullPointerException("Null timestamp"); + Invariants.nonNull(type); + Invariants.nonNull(timestamp); + this.type = type; this.timestamp = timestamp; this.commandStoreId = commandStoreId; } @@ -65,7 +72,8 @@ public final class JournalKey private static final int HLC_OFFSET = 0; private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; - private static final int CS_ID_OFFSET = NODE_OFFSET + INT_SIZE; + private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; + private static final int CS_ID_OFFSET = TYPE_OFFSET + BYTE_SIZE; @Override public int serializedSize(int userVersion) @@ -74,6 +82,7 @@ public int serializedSize(int userVersion) + 6 // timestamp.epoch() + 2 // timestamp.flags() + INT_SIZE // timestamp.node + + BYTE_SIZE // type + SHORT_SIZE; // commandStoreId } @@ -81,29 +90,33 @@ public int serializedSize(int userVersion) public void serialize(JournalKey key, DataOutputPlus out, int userVersion) throws IOException { serializeTimestamp(key.timestamp, out); + out.writeByte(key.type.id); out.writeShort(key.commandStoreId); } private void serialize(JournalKey key, byte[] out) { serializeTimestamp(key.timestamp, out); - ByteArrayUtil.putShort(out, 20, (short) key.commandStoreId); + out[20] = (byte) (key.type.id & 0xFF); + ByteArrayUtil.putShort(out, 21, (short) key.commandStoreId); } @Override public JournalKey deserialize(DataInputPlus in, int userVersion) throws IOException { Timestamp timestamp = deserializeTimestamp(in); - int commandStoreId = in.readShort(); - return new JournalKey(timestamp, commandStoreId); + int type = in.readByte(); + int commandStoreId = in.readShort(); + return new JournalKey(timestamp, Type.fromId(type), commandStoreId); } @Override public JournalKey deserialize(ByteBuffer buffer, int position, int userVersion) { Timestamp timestamp = deserializeTimestamp(buffer, position); + int type = buffer.get(position + TYPE_OFFSET); int commandStoreId = buffer.getShort(position + CS_ID_OFFSET); - return new JournalKey(timestamp, commandStoreId); + return new JournalKey(timestamp, Type.fromId(type), commandStoreId); } private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException @@ -118,7 +131,7 @@ private Timestamp deserializeTimestamp(DataInputPlus in) throws IOException long hlc = in.readLong(); long epochAndFlags = in.readLong(); int nodeId = in.readInt(); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Node.Id(nodeId)); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); } private void serializeTimestamp(Timestamp timestamp, byte[] out) @@ -133,7 +146,7 @@ private Timestamp deserializeTimestamp(ByteBuffer buffer, int position) long hlc = buffer.getLong(position + HLC_OFFSET); long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); int nodeId = buffer.getInt(position + NODE_OFFSET); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Node.Id(nodeId)); + return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); } @Override @@ -150,6 +163,10 @@ public int compareWithKeyAt(JournalKey k, ByteBuffer buffer, int position, int u int cmp = compareWithTimestampAt(k.timestamp, buffer, position); if (cmp != 0) return cmp; + byte type = buffer.get(position + TYPE_OFFSET); + cmp = Byte.compare((byte) k.type.id, type); + if (cmp != 0) return cmp; + short commandStoreId = buffer.getShort(position + CS_ID_OFFSET); cmp = Short.compare((byte) k.commandStoreId, commandStoreId); return cmp; @@ -174,6 +191,7 @@ private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int p public int compare(JournalKey k1, JournalKey k2) { int cmp = compare(k1.timestamp, k2.timestamp); + if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); if (cmp == 0) cmp = Short.compare((short) k1.commandStoreId, (short) k2.commandStoreId); return cmp; } @@ -213,20 +231,75 @@ public boolean equals(Object other) boolean equals(JournalKey other) { return this.timestamp.equals(other.timestamp) && + this.type == other.type && this.commandStoreId == other.commandStoreId; } @Override public int hashCode() { - return Objects.hash(timestamp, commandStoreId); + return Objects.hash(timestamp, type, commandStoreId); } public String toString() { return "Key{" + "timestamp=" + timestamp + + "type=" + type + ", commandStoreId=" + commandStoreId + '}'; } + + public enum Type + { + COMMAND_DIFF (0, new CommandDiffSerializer()), + REDUNDANT_BEFORE (1, new RedundantBeforeSerializer()), + DURABLE_BEFORE (2, new DurableBeforeSerializer()), + SAFE_TO_READ (3, new SafeToReadSerializer()), + BOOTSTRAP_BEGAN_AT (4, new BootstrapBeganAtSerializer()), + RANGES_FOR_EPOCH (5, new RangesForEpochSerializer()), + HISTORICAL_TRANSACTIONS (6, new HistoricalTransactionsSerializer()) + ; + + final int id; + final FlyweightSerializer serializer; + + Type(int id, FlyweightSerializer serializer) + { + this.id = id; + this.serializer = serializer; + } + + private static final Type[] idToTypeMapping; + + static + { + Type[] types = values(); + + int maxId = -1; + for (Type type : types) + maxId = Math.max(type.id, maxId); + + Type[] idToType = new Type[maxId + 1]; + for (Type type : types) + { + if (null != idToType[type.id]) + throw new IllegalStateException("Duplicate Type id " + type.id); + idToType[type.id] = type; + } + idToTypeMapping = idToType; + } + + static Type fromId(int id) + { + if (id < 0 || id >= idToTypeMapping.length) + throw new IllegalArgumentException("Out or range Type id " + id); + Type type = idToTypeMapping[id]; + if (null == type) + throw new IllegalArgumentException("Unknown Type id " + id); + return type; + } + } + + } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index bef75a70ef0d..a22467a9f372 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.function.Function; - import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -29,13 +28,12 @@ import accord.api.Result; import accord.local.Command; import accord.local.CommonAttributes; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.local.StoreParticipants; import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; -import accord.primitives.Route; -import accord.primitives.Seekables; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; @@ -44,10 +42,12 @@ import org.apache.cassandra.journal.Journal; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.DepsSerializer; -import org.apache.cassandra.service.accord.serializers.KeySerializers; import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.utils.Throwables; +import static accord.primitives.Known.KnownDeps.DepsErased; +import static accord.primitives.Known.KnownDeps.DepsUnknown; +import static accord.primitives.Known.KnownDeps.NoDeps; import static accord.utils.Invariants.illegalState; public class SavedCommand @@ -62,31 +62,28 @@ public enum Fields DURABILITY, ACCEPTED, PROMISED, - ROUTE, + PARTICIPANTS, PARTIAL_TXN, PARTIAL_DEPS, - ADDITIONAL_KEYS, WAITING_ON, WRITES, } - public interface Writer extends Journal.Writer - { - void write(DataOutputPlus out, int userVersion) throws IOException; - K key(); - } - - public static class DiffWriter implements Writer + // TODO: maybe rename this and enclosing classes? + public static class DiffWriter implements Journal.Writer { private final Command before; private final Command after; private final TxnId txnId; + // TODO: improve encapsulationd + @VisibleForTesting public DiffWriter(Command before, Command after) { this(after.txnId(), before, after); } + @VisibleForTesting public DiffWriter(TxnId txnId, Command before, Command after) { this.txnId = txnId; @@ -118,21 +115,29 @@ public TxnId key() } @Nullable - public static Writer diff(Command original, Command current) + public static DiffWriter diff(Command original, Command current) { if (original == current || current == null - || current.saveStatus() == SaveStatus.Uninitialised) + || current.saveStatus() == SaveStatus.Uninitialised + || !anyFieldChanged(original, current)) return null; return new SavedCommand.DiffWriter(original, current); } - - public static Writer diffWriter(Command before, Command after) + // TODO (required): this is very inefficient + private static boolean anyFieldChanged(Command before, Command after) { - return new DiffWriter(before, after); - } + int flags = getFlags(before, after); + for (Fields field : Fields.values()) + { + if (getFieldChanged(field, flags)) + return true; + } + return false; + } + public static void serialize(Command before, Command after, DataOutputPlus out, int userVersion) throws IOException { int flags = getFlags(before, after); @@ -157,14 +162,12 @@ public static void serialize(Command before, Command after, DataOutputPlus out, if (getFieldChanged(Fields.PROMISED, flags) && after.promised() != null) CommandSerializers.ballot.serialize(after.promised(), out, userVersion); - if (getFieldChanged(Fields.ROUTE, flags) && after.route() != null) - AccordKeyspace.LocalVersionedSerializers.route.serialize(after.route(), out); // TODO (required): user version + if (getFieldChanged(Fields.PARTICIPANTS, flags) && after.participants() != null) + CommandSerializers.participants.serialize(after.participants(), out, userVersion); if (getFieldChanged(Fields.PARTIAL_TXN, flags) && after.partialTxn() != null) CommandSerializers.partialTxn.serialize(after.partialTxn(), out, userVersion); if (getFieldChanged(Fields.PARTIAL_DEPS, flags) && after.partialDeps() != null) DepsSerializer.partialDeps.serialize(after.partialDeps(), out, userVersion); - if (getFieldChanged(Fields.ADDITIONAL_KEYS, flags) && after.additionalKeysOrRanges() != null) - KeySerializers.seekables.serialize(after.additionalKeysOrRanges(), out, userVersion); Command.WaitingOn waitingOn = getWaitingOn(after); if (getFieldChanged(Fields.WAITING_ON, flags) && waitingOn != null) @@ -193,10 +196,9 @@ static int getFlags(Command before, Command after) flags = collectFlags(before, after, Command::acceptedOrCommitted, false, Fields.ACCEPTED, flags); flags = collectFlags(before, after, Command::promised, false, Fields.PROMISED, flags); - flags = collectFlags(before, after, Command::route, true, Fields.ROUTE, flags); + flags = collectFlags(before, after, Command::participants, true, Fields.PARTICIPANTS, flags); flags = collectFlags(before, after, Command::partialTxn, false, Fields.PARTIAL_TXN, flags); flags = collectFlags(before, after, Command::partialDeps, false, Fields.PARTIAL_DEPS, flags); - flags = collectFlags(before, after, Command::additionalKeysOrRanges, false, Fields.ADDITIONAL_KEYS, flags); flags = collectFlags(before, after, SavedCommand::getWaitingOn, false, Fields.WAITING_ON, flags); @@ -259,8 +261,15 @@ private static int setFieldIsNull(Fields field, int oldFlags) return oldFlags | (1 << field.ordinal()); } + private static int unsetFieldIsNull(Fields field, int oldFlags) + { + return oldFlags & ~(1 << field.ordinal()); + } + public static class Builder { + int flags; + TxnId txnId; Timestamp executeAt; @@ -271,11 +280,11 @@ public static class Builder Ballot acceptedOrCommitted; Ballot promised; - Route route; + StoreParticipants participants; PartialTxn partialTxn; PartialDeps partialDeps; - Seekables additionalKeysOrRanges; + byte[] waitingOnBytes; SavedCommand.WaitingOnProvider waitingOn; Writes writes; Result result; @@ -298,6 +307,11 @@ public Timestamp executeAt() return executeAt; } + public Timestamp executeAtLeast() + { + return executeAtLeast; + } + public SaveStatus saveStatus() { return saveStatus; @@ -318,9 +332,9 @@ public Ballot promised() return promised; } - public Route route() + public StoreParticipants participants() { - return route; + return participants; } public PartialTxn partialTxn() @@ -333,11 +347,6 @@ public PartialDeps partialDeps() return partialDeps; } - public Seekables additionalKeysOrRanges() - { - return additionalKeysOrRanges; - } - public SavedCommand.WaitingOnProvider waitingOn() { return waitingOn; @@ -355,6 +364,8 @@ public Result result() public void clear() { + flags = 0; + txnId = null; executeAt = null; @@ -364,10 +375,9 @@ public void clear() acceptedOrCommitted = Ballot.ZERO; promised = null; - route = null; + participants = null; partialTxn = null; partialDeps = null; - additionalKeysOrRanges = null; waitingOn = (txn, deps) -> null; writes = null; @@ -387,13 +397,65 @@ public int count() return count; } + public void serialize(DataOutputPlus out, int userVersion) throws IOException + { + out.writeInt(flags); + + // We encode all changed fields unless their value is null + if (getFieldChanged(Fields.TXN_ID, flags) && !getFieldIsNull(Fields.TXN_ID, flags)) + CommandSerializers.txnId.serialize(txnId(), out, userVersion); + if (getFieldChanged(Fields.EXECUTE_AT, flags) && !getFieldIsNull(Fields.EXECUTE_AT, flags)) + CommandSerializers.timestamp.serialize(executeAt(), out, userVersion); + // TODO (desired): check if this can fold into executeAt + if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags) && !getFieldIsNull(Fields.EXECUTES_AT_LEAST, flags)) + CommandSerializers.timestamp.serialize(executeAtLeast(), out, userVersion); + if (getFieldChanged(Fields.SAVE_STATUS, flags) && !getFieldIsNull(Fields.SAVE_STATUS, flags)) + out.writeInt(saveStatus().ordinal()); + if (getFieldChanged(Fields.DURABILITY, flags) && !getFieldIsNull(Fields.DURABILITY, flags)) + out.writeInt(durability().ordinal()); + + if (getFieldChanged(Fields.ACCEPTED, flags) && !getFieldIsNull(Fields.ACCEPTED, flags)) + CommandSerializers.ballot.serialize(acceptedOrCommitted(), out, userVersion); + if (getFieldChanged(Fields.PROMISED, flags) && !getFieldIsNull(Fields.PROMISED, flags)) + CommandSerializers.ballot.serialize(promised(), out, userVersion); + + if (getFieldChanged(Fields.PARTICIPANTS, flags) && !getFieldIsNull(Fields.PARTICIPANTS, flags)) + CommandSerializers.participants.serialize(participants(), out, userVersion); + if (getFieldChanged(Fields.PARTIAL_TXN, flags) && !getFieldIsNull(Fields.PARTIAL_TXN, flags)) + CommandSerializers.partialTxn.serialize(partialTxn(), out, userVersion); + if (getFieldChanged(Fields.PARTIAL_DEPS, flags) && !getFieldIsNull(Fields.PARTIAL_DEPS, flags)) + DepsSerializer.partialDeps.serialize(partialDeps(), out, userVersion); + + if (getFieldChanged(Fields.WAITING_ON, flags) && !getFieldIsNull(Fields.WAITING_ON, flags)) + { + out.writeInt(waitingOnBytes.length); + out.write(waitingOnBytes); + } + + if (getFieldChanged(Fields.WRITES, flags) && !getFieldIsNull(Fields.WRITES, flags)) + CommandSerializers.writes.serialize(writes(), out, userVersion); + } + + + // TODO: we seem to be writing some form of empty transaction @SuppressWarnings({ "rawtypes", "unchecked" }) public void deserializeNext(DataInputPlus in, int userVersion) throws IOException { + final int flags = in.readInt(); nextCalled = true; count++; - final int flags = in.readInt(); + for (Fields field : Fields.values()) + { + if (getFieldChanged(field, flags)) + { + this.flags = setFieldChanged(field, this.flags); + if (getFieldIsNull(field, flags)) + this.flags = setFieldIsNull(field, this.flags); + else + this.flags = unsetFieldIsNull(field, this.flags); + } + } if (getFieldChanged(Fields.TXN_ID, flags)) { @@ -450,12 +512,12 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio promised = CommandSerializers.ballot.deserialize(in, userVersion); } - if (getFieldChanged(Fields.ROUTE, flags)) + if (getFieldChanged(Fields.PARTICIPANTS, flags)) { - if (getFieldIsNull(Fields.ROUTE, flags)) - route = null; + if (getFieldIsNull(Fields.PARTICIPANTS, flags)) + participants = null; else - route = AccordKeyspace.LocalVersionedSerializers.route.deserialize(in); + participants = CommandSerializers.participants.deserialize(in, userVersion); } if (getFieldChanged(Fields.PARTIAL_TXN, flags)) @@ -474,14 +536,6 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio partialDeps = DepsSerializer.partialDeps.deserialize(in, userVersion); } - if (getFieldChanged(Fields.ADDITIONAL_KEYS, flags)) - { - if (getFieldIsNull(Fields.ADDITIONAL_KEYS, flags)) - additionalKeysOrRanges = null; - else - additionalKeysOrRanges = KeySerializers.seekables.deserialize(in, userVersion); - } - if (getFieldChanged(Fields.WAITING_ON, flags)) { if (getFieldIsNull(Fields.WAITING_ON, flags)) @@ -491,9 +545,9 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio else { int size = in.readInt(); - byte[] bytes = new byte[size]; - in.readFully(bytes); - ByteBuffer buffer = ByteBuffer.wrap(bytes); + waitingOnBytes = new byte[size]; + in.readFully(waitingOnBytes); + ByteBuffer buffer = ByteBuffer.wrap(waitingOnBytes); waitingOn = (localTxnId, deps) -> { try { @@ -514,6 +568,7 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio else writes = CommandSerializers.writes.deserialize(in, userVersion); } + } public void forceResult(Result newValue) @@ -521,7 +576,7 @@ public void forceResult(Result newValue) this.result = newValue; } - public Command construct() throws IOException + public Command construct() { if (!nextCalled) return null; @@ -531,15 +586,13 @@ public Command construct() throws IOException attrs.partialTxn(partialTxn); if (durability != null) attrs.durability(durability); - if (route != null) - attrs.route(route); + if (participants != null) + attrs.setParticipants(participants); if (partialDeps != null && - (saveStatus.known.deps != Status.KnownDeps.NoDeps && - saveStatus.known.deps != Status.KnownDeps.DepsErased && - saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) + (saveStatus.known.deps != NoDeps && + saveStatus.known.deps != DepsErased && + saveStatus.known.deps != DepsUnknown)) attrs.partialDeps(partialDeps); - if (additionalKeysOrRanges != null) - attrs.additionalKeysOrRanges(additionalKeysOrRanges); Command.WaitingOn waitingOn = null; if (this.waitingOn != null) @@ -553,14 +606,12 @@ public Command construct() throws IOException case PreAccepted: return Command.PreAccepted.preAccepted(attrs, executeAt, promised); case AcceptedInvalidate: - if (saveStatus == SaveStatus.AcceptedInvalidateWithDefinition) - return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); - else - return Command.AcceptedInvalidateWithoutDefinition.acceptedInvalidate(attrs, promised, acceptedOrCommitted); - case Accepted: case PreCommitted: - return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); + if (saveStatus == SaveStatus.AcceptedInvalidate) + return Command.AcceptedInvalidateWithoutDefinition.acceptedInvalidate(attrs, promised, acceptedOrCommitted); + else + return Command.Accepted.accepted(attrs, saveStatus, executeAt, promised, acceptedOrCommitted); case Committed: case Stable: return Command.Committed.committed(attrs, saveStatus, executeAt, promised, acceptedOrCommitted, waitingOn); @@ -587,10 +638,10 @@ private static Command.Truncated truncated(CommonAttributes.Mutable attrs, SaveS if (attrs.txnId().kind().awaitsOnlyDeps()) return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, executesAtLeast); return Command.Truncated.truncatedApply(attrs, status, executeAt, writes, result, null); - case ErasedOrInvalidOrVestigial: - return Command.Truncated.erasedOrInvalidOrVestigial(attrs.txnId(), attrs.durability(), attrs.route()); + case ErasedOrVestigial: + return Command.Truncated.erasedOrInvalidOrVestigial(attrs.txnId(), attrs.durability(), attrs.participants()); case Erased: - return Command.Truncated.erased(attrs.txnId(), attrs.durability(), attrs.route()); + return Command.Truncated.erased(attrs.txnId(), attrs.durability(), attrs.participants()); case Invalidated: return Command.Truncated.invalidated(attrs.txnId()); } @@ -605,10 +656,9 @@ public String toString() ", durability=" + durability + ", acceptedOrCommitted=" + acceptedOrCommitted + ", promised=" + promised + - ", route=" + route + + ", participants=" + participants + ", partialTxn=" + partialTxn + ", partialDeps=" + partialDeps + - ", additionalKeysOrRanges=" + additionalKeysOrRanges + ", waitingOn=" + waitingOn + ", writes=" + writes + '}'; diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java index 11d7dd01cfc3..d1af6f803cdc 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordAgent.java @@ -19,7 +19,6 @@ package org.apache.cassandra.service.accord.api; import java.util.concurrent.TimeUnit; -import javax.annotation.Nonnull; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; @@ -35,7 +34,9 @@ import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.messages.ReplyContext; +import accord.primitives.Keys; import accord.primitives.Ranges; +import accord.primitives.Routable; import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -52,7 +53,6 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.txn.TxnQuery; import org.apache.cassandra.service.accord.txn.TxnRead; -import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.JVMStabilityInspector; @@ -62,7 +62,6 @@ import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.cassandra.config.DatabaseDescriptor.getReadRpcTimeout; -import static org.apache.cassandra.service.consensus.migration.ConsensusKeyMigrationState.maybeSaveAccordKeyMigrationLocally; // TODO (expected): merge with AccordService public class AccordAgent implements Agent @@ -100,6 +99,11 @@ public void onInconsistentTimestamp(Command command, Timestamp prev, Timestamp n throw error; } + public void onSuccessfulBarrier(TxnId id, Seekables keysOrRanges) + { + + } + public void onFailedBarrier(TxnId id, Seekables keysOrRanges, Throwable cause) { @@ -112,16 +116,6 @@ public void onFailedBootstrap(String phase, Ranges ranges, Runnable retry, Throw AccordService.instance().scheduler().once(retry, retryBootstrapDelayMicros, MICROSECONDS); } - @Override - public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull TxnId txnId) - { - if (keysOrRanges.domain() == Key) - { - PartitionKey key = (PartitionKey)keysOrRanges.get(0); - maybeSaveAccordKeyMigrationLocally(key, Epoch.create(txnId.epoch())); - } - } - @Override public void onStale(Timestamp staleSince, Ranges ranges) { @@ -136,9 +130,10 @@ public void onUncaughtException(Throwable t) } @Override - public void onHandledException(Throwable t) + public void onHandledException(Throwable t, String context) { - // TODO: this + logger.warn(context, t); + JVMStabilityInspector.uncaughtException(Thread.currentThread(), t); } @Override @@ -179,9 +174,9 @@ public long maxConflictsPruneInterval() * for tests since it skips validation done by regular transactions. */ @Override - public Txn emptySystemTxn(Kind kind, Seekables seekables) + public Txn emptySystemTxn(Kind kind, Routable.Domain domain) { - return new Txn.InMemory(kind, seekables, TxnRead.EMPTY, TxnQuery.UNSAFE_EMPTY, null); + return new Txn.InMemory(kind, domain == Key ? Keys.EMPTY : Ranges.EMPTY, TxnRead.EMPTY, TxnQuery.UNSAFE_EMPTY, null); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 3c8672ea9310..1f29c16d9689 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -34,6 +34,7 @@ import accord.primitives.RangeFactory; import accord.primitives.Ranges; import org.apache.cassandra.db.TypeSizes; +import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.IVersionedSerializer; @@ -79,6 +80,18 @@ public TokenKey asTokenKey() return (TokenKey) this; } + @Override + public RoutingKey toUnseekable() + { + return this; + } + + @Override + public RoutingKey asRoutingKey() + { + return asTokenKey(); + } + public static AccordRoutingKey of(Key key) { return (AccordRoutingKey) key; @@ -266,6 +279,25 @@ public TokenKey deserialize(DataInputPlus in, int version) throws IOException return new TokenKey(table, token); } + public TokenKey fromBytes(ByteBuffer bytes, IPartitioner partitioner) + { + TableId tableId = TableId.deserialize(bytes, ByteBufferAccessor.instance, 0); + bytes.position(tableId.serializedSize()); + Token token = Token.compactSerializer.deserialize(bytes, partitioner); + return new TokenKey(tableId, token); + } + + public ByteBuffer toBytes(TokenKey tokenKey) + { + int size = (int) (tokenKey.table.serializedSize() + Token.compactSerializer.serializedSize(tokenKey.token)); + ByteBuffer out = ByteBuffer.allocate(size); + int position = tokenKey.table.serialize(out, ByteBufferAccessor.instance, 0); + out.position(position); + Token.compactSerializer.serialize(tokenKey.token, out); + out.flip(); + return out; + } + @Override public long serializedSize(TokenKey key, int version) { diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 2b1cfa55ccf0..4c71bdcb42a9 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -17,7 +17,7 @@ */ package org.apache.cassandra.service.accord.async; -import accord.api.Key; +import accord.api.RoutingKey; import accord.local.cfk.CommandsForKey; import accord.local.KeyHistory; import accord.local.PreLoadContext; @@ -32,7 +32,7 @@ import com.google.common.collect.Iterables; import org.apache.cassandra.service.accord.*; import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.utils.NoSpamLogger; import org.apache.cassandra.utils.Pair; @@ -61,12 +61,12 @@ enum State private final AccordCommandStore commandStore; private final Iterable txnIds; - private final Seekables keysOrRanges; + private final Unseekables keysOrRanges; private final KeyHistory keyHistory; protected AsyncResult readResult; - public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Seekables keysOrRanges, KeyHistory keyHistory) + public AsyncLoader(AccordCommandStore commandStore, Iterable txnIds, Unseekables keysOrRanges, KeyHistory keyHistory) { this.commandStore = commandStore; this.txnIds = txnIds; @@ -116,7 +116,7 @@ private static > void referenceAndAssemble } } - private void referenceAndAssembleReadsForKey(Key key, + private void referenceAndAssembleReadsForKey(RoutingKey key, AsyncOperation.Context context, List> listenChains) { @@ -151,7 +151,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) switch (keysOrRanges.domain()) { case Key: - AbstractKeys keys = (AbstractKeys) keysOrRanges; + AbstractKeys keys = (AbstractKeys) keysOrRanges; keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); break; case Range: @@ -166,20 +166,20 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) { - Ranges ranges = (Ranges) keysOrRanges; + Ranges ranges = ((AbstractRanges) keysOrRanges).toRanges(); List> root = new ArrayList<>(ranges.size() + 1); - class Watcher implements AccordStateCache.Listener + class Watcher implements AccordStateCache.Listener { - private final Set cached = commandStore.commandsForKeyCache().stream() - .map(n -> (PartitionKey) n.key()) + private final Set cached = commandStore.commandsForKeyCache().stream() + .map(n -> (TokenKey) n.key()) .filter(ranges::contains) .collect(Collectors.toSet()); @Override - public void onAdd(AccordCachingState state) + public void onAdd(AccordCachingState state) { - PartitionKey pk = (PartitionKey) state.key(); + TokenKey pk = (TokenKey) state.key(); if (ranges.contains(pk)) cached.add(pk); } @@ -190,7 +190,7 @@ public void onAdd(AccordCachingState state) commandStore.commandsForKeyCache().unregister(watcher); if (keys.isEmpty() && watcher.cached.isEmpty()) return AsyncChains.success(null); - Set set = ImmutableSet.builder().addAll(watcher.cached).addAll(keys).build(); + Set set = ImmutableSet.builder().addAll(watcher.cached).addAll(keys).build(); List> chains = new ArrayList<>(); set.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); @@ -203,7 +203,7 @@ public void onAdd(AccordCachingState state) return AsyncChains.all(root); } - private AsyncChain> findOverlappingKeys(Ranges ranges) + private AsyncChain> findOverlappingKeys(Ranges ranges) { if (ranges.isEmpty()) { @@ -211,16 +211,16 @@ private AsyncChain> findOverlappingKeys(Ranges ranges) return AsyncChains.success(Collections.emptyList()); } - List>> chains = new ArrayList<>(ranges.size()); + List>> chains = new ArrayList<>(ranges.size()); for (Range range : ranges) chains.add(findOverlappingKeys(range)); - return AsyncChains.reduce(chains, (a, b) -> ImmutableList.builderWithExpectedSize(a.size() + b.size()).addAll(a).addAll(b).build()); + return AsyncChains.reduce(chains, (a, b) -> ImmutableList.builderWithExpectedSize(a.size() + b.size()).addAll(a).addAll(b).build()); } - private AsyncChain> findOverlappingKeys(Range range) + private AsyncChain> findOverlappingKeys(Range range) { // save to a variable as java gets confused when `.map` is called on the result of asChain - AsyncChain> map = Observable.asChain(callback -> + AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(commandStore.id(), (AccordRoutingKey) range.start(), range.startInclusive(), (AccordRoutingKey) range.end(), range.endInclusive(), diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index 0b984446dc27..cbcfe22f1894 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -30,13 +30,13 @@ import org.slf4j.LoggerFactory; import org.slf4j.MDC; -import accord.api.Key; +import accord.api.RoutingKey; import accord.local.Command; import accord.local.CommandStore; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; -import accord.primitives.Seekables; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.utils.Invariants; import accord.utils.async.AsyncChains; import org.apache.cassandra.config.CassandraRelevantProperties; @@ -48,6 +48,7 @@ import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.SavedCommand; +import org.apache.cassandra.utils.concurrent.Condition; import static org.apache.cassandra.service.accord.async.AsyncLoader.txnIds; import static org.apache.cassandra.service.accord.async.AsyncOperation.State.COMPLETING; @@ -71,8 +72,8 @@ private static class LoggingProps static class Context { final HashMap commands = new HashMap<>(); - final TreeMap timestampsForKey = new TreeMap<>(); - final TreeMap commandsForKey = new TreeMap<>(); + final TreeMap timestampsForKey = new TreeMap<>(); + final TreeMap commandsForKey = new TreeMap<>(); @Nullable AccordSafeCommandsForRanges commandsForRanges = null; @@ -189,7 +190,7 @@ private void finish(R result, Throwable failure) } @SuppressWarnings("unchecked") - Seekables keys() + Unseekables keys() { return preLoadContext.keys(); } @@ -251,10 +252,10 @@ protected boolean runInternal(boolean loadOnly) result = apply(safeStore); // TODO (required): currently, we are not very efficient about ensuring that we persist the absolute minimum amount of state. Improve that. - List> diffs = null; + List diffs = null; for (AccordSafeCommand commandState : context.commands.values()) { - SavedCommand.Writer diff = commandState.diff(); + SavedCommand.DiffWriter diff = commandState.diff(); if (diff == null) continue; if (diffs == null) @@ -269,11 +270,22 @@ protected boolean runInternal(boolean loadOnly) } commandStore.completeOperation(safeStore); + context.releaseResources(commandStore); state(COMPLETING); - if (diffs != null) + if (diffs != null || safeStore.fieldUpdates() != null) { - this.commandStore.appendCommands(diffs, sanityCheck, () -> finish(result, null)); + Runnable onFlush = () -> finish(result, null); + if (safeStore.fieldUpdates() != null) + { + if (diffs != null) + appendCommands(diffs, null); + commandStore.persistFieldUpdates(safeStore.fieldUpdates(), onFlush); + } + else + { + appendCommands(diffs, onFlush); + } return false; } case COMPLETING: @@ -286,6 +298,26 @@ protected boolean runInternal(boolean loadOnly) return false; } + private void appendCommands(List diffs, Runnable onFlush) + { + if (sanityCheck != null) + { + Invariants.checkState(CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED.getBoolean()); + Condition condition = Condition.newOneTimeCondition(); + this.commandStore.appendCommands(diffs, condition::signal); + condition.awaitUninterruptibly(); + + for (Command check : sanityCheck) + this.commandStore.sanityCheckCommand(check); + + if (onFlush != null) onFlush.run(); + } + else + { + this.commandStore.appendCommands(diffs, onFlush); + } + } + @Override public void run() { diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java index bac98fd003e8..ed8341a23493 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropAdapter.java @@ -30,6 +30,7 @@ import accord.messages.Apply; import accord.primitives.Deps; import accord.primitives.FullRoute; +import accord.primitives.Participants; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -84,8 +85,9 @@ public void execute(Node node, Topologies all, FullRoute route, ExecutePath p } @Override - public void persist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) + public void persist(Node node, Topologies all, FullRoute route, Participants sendTo, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer callback) { + // TODO (required): we aren't using sendTo if (applyKind == Minimal && doInteropPersist(node, all, route, txnId, txn, executeAt, deps, writes, result, callback)) return; diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java index 8e3758a381ea..45832e8711b2 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropApply.java @@ -27,19 +27,19 @@ import accord.local.Node.Id; import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.local.Status; +import accord.local.StoreParticipants; import accord.messages.Apply; import accord.messages.MessageType; import accord.primitives.Deps; import accord.primitives.FullRoute; -import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Route; -import accord.primitives.Seekables; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.primitives.Writes; import accord.topology.Topologies; import org.apache.cassandra.db.ConsistencyLevel; @@ -77,9 +77,9 @@ public Apply create(Kind kind, Id to, Topologies participates, TxnId txnId, Full public static final IVersionedSerializer serializer = new ApplySerializer() { @Override - protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - return new AccordInteropApply(kind, txnId, scope, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); + return new AccordInteropApply(kind, txnId, scope, waitForEpoch, executeAt, deps, txn, fullRoute, writes, result); } }; @@ -87,9 +87,9 @@ protected AccordInteropApply deserializeApply(TxnId txnId, Route scope, long transient int waitingOnCount; final MpscChunkedArrayQueue listeners = new MpscChunkedArrayQueue<>(4, 1 << 30); - private AccordInteropApply(Kind kind, TxnId txnId, Route route, long waitForEpoch, Seekables keys, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) + private AccordInteropApply(Kind kind, TxnId txnId, Route route, long waitForEpoch, Timestamp executeAt, PartialDeps deps, @Nullable PartialTxn txn, @Nullable FullRoute fullRoute, Writes writes, Result result) { - super(kind, txnId, route, waitForEpoch, keys, executeAt, deps, txn, fullRoute, writes, result); + super(kind, txnId, route, waitForEpoch, executeAt, deps, txn, fullRoute, writes, result); } private AccordInteropApply(Kind kind, Id to, Topologies participates, TxnId txnId, FullRoute route, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result) @@ -106,7 +106,7 @@ public void process() @Override - public ApplyReply apply(SafeCommandStore safeStore) + public ApplyReply apply(SafeCommandStore safeStore, StoreParticipants participants) { ApplyReply reply = super.apply(safeStore); checkState(reply == ApplyReply.Redundant || reply == ApplyReply.Applied || reply == ApplyReply.Insufficient, "Unexpected ApplyReply"); @@ -118,7 +118,7 @@ public ApplyReply apply(SafeCommandStore safeStore) // once the coordinator sends a maximal commit // Applied doesn't actually mean the command is in the Applied state so we still need to check and maybe install // the listener - SafeCommand safeCommand = safeStore.get(txnId, executeAt, scope); + SafeCommand safeCommand = safeStore.get(txnId, participants); Command current = safeCommand.current(); // Don't actually think it is possible for this to reach applied while we are stll running, but just to be safe // check anyways @@ -201,10 +201,9 @@ public TxnId primaryTxnId() } @Override - public Seekables keys() + public Unseekables keys() { - if (txn == null) return Keys.EMPTY; - return txn.keys(); + return scope; } @Override diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java index d1a26d22b4d0..8b945153306d 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropCommit.java @@ -31,7 +31,6 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -43,18 +42,18 @@ public class AccordInteropCommit extends Commit { - public static final IVersionedSerializer serializer = new CommitSerializer(AccordInteropRead.class, AccordInteropRead.requestSerializer) + public static final IVersionedSerializer serializer = new CommitSerializer<>(AccordInteropRead.class, AccordInteropRead.requestSerializer) { @Override - protected AccordInteropCommit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected AccordInteropCommit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, long minEpoch, Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); + return new AccordInteropCommit(kind, txnId, scope, waitForEpoch, minEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); } }; - public AccordInteropCommit(Kind kind, TxnId txnId, Route scope, long waitForEpoch, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) + public AccordInteropCommit(Kind kind, TxnId txnId, Route scope, long waitForEpoch, long minEpoch, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nonnull ReadData readData) { - super(kind, txnId, scope, waitForEpoch, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, readData); + super(kind, txnId, scope, waitForEpoch, minEpoch, ballot, executeAt, partialTxn, partialDeps, fullRoute, readData); } public AccordInteropCommit(Kind kind, Node.Id to, Topology coordinateTopology, Topologies topologies, TxnId txnId, Txn txn, FullRoute route, Timestamp executeAt, Deps deps, AccordInteropRead read) diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java index 8e2ec02a9b7f..d86f724ed516 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropRead.java @@ -51,8 +51,8 @@ import org.apache.cassandra.service.accord.serializers.ReadDataSerializers; import org.apache.cassandra.service.accord.serializers.ReadDataSerializers.ReadDataSerializer; -import static accord.local.SaveStatus.PreApplied; -import static accord.local.SaveStatus.ReadyToExecute; +import static accord.primitives.SaveStatus.PreApplied; +import static accord.primitives.SaveStatus.ReadyToExecute; public class AccordInteropRead extends ReadData { diff --git a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java index 8e4ec2da261e..c714c7857d3f 100644 --- a/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java +++ b/src/java/org/apache/cassandra/service/accord/interop/AccordInteropReadRepair.java @@ -24,12 +24,12 @@ import accord.api.Data; import accord.local.Node; import accord.local.SafeCommandStore; -import accord.local.SaveStatus; import accord.messages.ReadData; import accord.messages.MessageType; import accord.primitives.PartialTxn; import accord.primitives.Participants; import accord.primitives.Ranges; +import accord.primitives.SaveStatus; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.topology.Topologies; diff --git a/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java index 76e29adbc5fa..58c9f4b65ff3 100644 --- a/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java +++ b/src/java/org/apache/cassandra/service/accord/repair/RepairSyncPointAdapter.java @@ -30,11 +30,11 @@ import accord.local.Node; import accord.primitives.Deps; import accord.primitives.FullRoute; -import accord.primitives.Seekables; import accord.primitives.SyncPoint; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekable; import accord.primitives.Writes; import accord.topology.Topologies; @@ -48,7 +48,7 @@ * adapter requires responses from all of the supplied endpoints before completing. Note that shards only block on the * intersection of the provided replicas and their own endpoints. */ -public class RepairSyncPointAdapter> extends CoordinationAdapter.Adapters.AbstractSyncPointAdapter +public class RepairSyncPointAdapter extends CoordinationAdapter.Adapters.AbstractInclusiveSyncPointAdapter { private final ImmutableSet requiredResponses; @@ -58,21 +58,27 @@ public RepairSyncPointAdapter(Collection requiredResponses) } @Override - public void execute(Node node, Topologies all, FullRoute route, ExecutePath path, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer, Throwable> callback) + public void execute(Node node, Topologies all, FullRoute route, ExecutePath path, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, BiConsumer, Throwable> callback) { RequiredResponseTracker tracker = new RequiredResponseTracker(requiredResponses, all); - ExecuteSyncPoint.ExecuteBlocking execute = new ExecuteSyncPoint.ExecuteBlocking<>(node, tracker, new SyncPoint<>(txnId, deps, (S) txn.keys(), route), executeAt); + ExecuteSyncPoint.ExecuteBlocking execute = new ExecuteSyncPoint.ExecuteBlocking<>(node, new SyncPoint(txnId, deps, (FullRoute) route), tracker, executeAt); execute.addCallback(callback); execute.start(); } @Override - public void persist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer, Throwable> callback) + protected void addOrExecuteCallback(ExecuteSyncPoint.ExecuteBlocking execute, BiConsumer, Throwable> callback) + { + execute.addCallback(callback); + } + + @Override + public void persist(Node node, Topologies all, FullRoute route, TxnId txnId, Txn txn, Timestamp executeAt, Deps deps, Writes writes, Result result, BiConsumer, Throwable> callback) { throw new UnsupportedOperationException(); } - public static > CoordinationAdapter> create(Collection requiredResponses) + public static CoordinationAdapter> create(Collection requiredResponses) { return new RepairSyncPointAdapter<>(requiredResponses); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java index 11733d3cc234..9c6052832e64 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -22,8 +22,11 @@ import accord.messages.Accept; import accord.messages.Accept.AcceptReply; +import accord.primitives.Ballot; import accord.primitives.Route; +import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -35,14 +38,13 @@ public class AcceptSerializers { private AcceptSerializers() {} - public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer<>() { @Override public void serializeBody(Accept accept, DataOutputPlus out, int version) throws IOException { CommandSerializers.ballot.serialize(accept.ballot, out, version); CommandSerializers.timestamp.serialize(accept.executeAt, out, version); - KeySerializers.seekables.serialize(accept.keys, out, version); DepsSerializer.partialDeps.serialize(accept.partialDeps, out, version); } @@ -52,7 +54,6 @@ public Accept deserializeBody(DataInputPlus in, int version, TxnId txnId, Route< return create(txnId, scope, waitForEpoch, minEpoch, CommandSerializers.ballot.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), - KeySerializers.seekables.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version)); } @@ -61,7 +62,6 @@ public long serializedBodySize(Accept accept, int version) { return CommandSerializers.ballot.serializedSize(accept.ballot, version) + CommandSerializers.timestamp.serializedSize(accept.executeAt, version) - + KeySerializers.seekables.serializedSize(accept.keys, version) + DepsSerializer.partialDeps.serializedSize(accept.partialDeps, version); } }; @@ -105,43 +105,50 @@ public void serialize(AcceptReply reply, DataOutputPlus out, int version) throws if (reply.deps != null) { out.writeByte(1); - DepsSerializer.partialDeps.serialize(reply.deps, out, version); + DepsSerializer.deps.serialize(reply.deps, out, version); } else { + Invariants.checkState(reply == AcceptReply.ACCEPT_INVALIDATE); out.writeByte(2); } break; - case Redundant: + case Truncated: out.writeByte(3); break; case RejectedBallot: out.writeByte(4); CommandSerializers.ballot.serialize(reply.supersededBy, out, version); break; - case Truncated: - out.writeByte(5); - break; + case Redundant: + int flags = 5 | (reply.supersededBy == null ? 0x8 : 0) | (reply.committedExecuteAt == null ? 0x10 : 0); + out.writeByte(flags); + if (reply.supersededBy != null) + CommandSerializers.ballot.serialize(reply.supersededBy, out, version); + if (reply.committedExecuteAt != null) + CommandSerializers.timestamp.serialize(reply.committedExecuteAt, out, version); } } @Override public AcceptReply deserialize(DataInputPlus in, int version) throws IOException { - int type = in.readByte(); - switch (type) + int flags = in.readByte(); + switch (flags & 0x7) { - default: throw new IllegalStateException("Unexpected AcceptNack type: " + type); + default: throw new IllegalStateException("Unexpected AcceptNack type: " + (flags & 0x7)); case 1: - return new AcceptReply(DepsSerializer.partialDeps.deserialize(in, version)); + return new AcceptReply(DepsSerializer.deps.deserialize(in, version)); case 2: return AcceptReply.ACCEPT_INVALIDATE; case 3: - return AcceptReply.REDUNDANT; + return AcceptReply.TRUNCATED; case 4: return new AcceptReply(CommandSerializers.ballot.deserialize(in, version)); case 5: - return AcceptReply.TRUNCATED; + Ballot supersededBy = (flags & 0x8) == 0 ? null : CommandSerializers.ballot.deserialize(in, version); + Timestamp committedExecuteAt = (flags & 0x10) == 0 ? null : CommandSerializers.timestamp.deserialize(in, version); + return new AcceptReply(supersededBy, committedExecuteAt); } } @@ -154,13 +161,16 @@ public long serializedSize(AcceptReply reply, int version) default: throw new AssertionError(); case Success: if (reply.deps != null) - size += DepsSerializer.partialDeps.serializedSize(reply.deps, version); + size += DepsSerializer.deps.serializedSize(reply.deps, version); break; - case Redundant: case Truncated: break; case RejectedBallot: size += CommandSerializers.ballot.serializedSize(reply.supersededBy, version); + break; + case Redundant: + if (reply.supersededBy != null) size += CommandSerializers.ballot.serializedSize(reply.supersededBy, version); + if (reply.committedExecuteAt != null) size += CommandSerializers.timestamp.serializedSize(reply.committedExecuteAt, version); } return size; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java b/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java index 712d781e8d4c..625d7bf9fe37 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AccordRoutingKeyByteSource.java @@ -21,7 +21,9 @@ import java.io.IOException; import java.util.Arrays; import java.util.UUID; -import java.util.function.Function; +import java.util.function.BiFunction; + +import javax.annotation.Nullable; import org.apache.cassandra.db.marshal.ByteArrayAccessor; import org.apache.cassandra.db.marshal.LongType; @@ -29,6 +31,7 @@ import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.utils.ByteArrayUtil; import org.apache.cassandra.utils.bytecomparable.ByteComparable; @@ -39,6 +42,8 @@ public class AccordRoutingKeyByteSource { + public static final ByteComparable.Version currentVersion = ByteComparable.Version.OSS50; + private static final byte[] MIN_ORDER = { -1 }; private static final byte[] TOKEN_ORDER = { 0 }; private static final byte[] MAX_ORDER = { 1 }; @@ -61,18 +66,18 @@ private static ByteSource maxPrefix() public static Serializer create(IPartitioner partitioner) { if (partitioner.isFixedLength()) - return new FixedLength(partitioner, ByteComparable.Version.OSS50); - return new VariableLength(partitioner, ByteComparable.Version.OSS50); + return new FixedLength(partitioner, currentVersion); + return new VariableLength(partitioner, currentVersion); } public static FixedLength fixedLength(IPartitioner partitioner) { - return new FixedLength(partitioner, ByteComparable.Version.OSS50); + return new FixedLength(partitioner, currentVersion); } public static VariableLength variableLength(IPartitioner partitioner) { - return new VariableLength(partitioner, ByteComparable.Version.OSS50); + return new VariableLength(partitioner, currentVersion); } public static abstract class Serializer @@ -148,6 +153,11 @@ public ByteSource asComparableBytesNoTable(AccordRoutingKey key) } public AccordRoutingKey fromComparableBytes(ValueAccessor accessor, V data) throws IOException + { + return fromComparableBytes(accessor, data, version, partitioner); + } + + public static AccordRoutingKey fromComparableBytes(ValueAccessor accessor, V data, ByteComparable.Version version, @Nullable IPartitioner partitioner) { ByteSource.Peekable bs = ByteSource.peekable(ByteSource.fixedLength(accessor, data)); long[] uuidValues = new long[2]; @@ -160,47 +170,63 @@ public AccordRoutingKey fromComparableBytes(ValueAccessor accessor, V dat uuidValues[i] = value; } TableId tableId = TableId.fromUUID(new UUID(uuidValues[0], uuidValues[1])); - return fromComparableBytes(bs, - isMin -> isMin ? AccordRoutingKey.SentinelKey.min(tableId) : AccordRoutingKey.SentinelKey.max(tableId), - token -> new AccordRoutingKey.TokenKey(tableId, token)); + return fromComparableBytes(bs, tableId, version, partitioner); + } + + public static AccordRoutingKey fromComparableBytes(ValueAccessor accessor, V data, TableId tableId, ByteComparable.Version version, @Nullable IPartitioner partitioner) + { + ByteSource.Peekable bs = ByteSource.peekable(ByteSource.fixedLength(accessor, data)); + return fromComparableBytes(bs, tableId, version, partitioner); } - private AccordRoutingKey fromComparableBytes(ByteSource.Peekable bs, - Function onSentinel, - Function onToken) throws IOException + public static AccordRoutingKey fromComparableBytes(ByteSource.Peekable bs, TableId tableId, ByteComparable.Version version, @Nullable IPartitioner partitioner) + { + if (partitioner == null) + partitioner = AccordKeyspace.partitioner(tableId); + return fromComparableBytes(bs, tableId, + (id, isMin) -> isMin ? AccordRoutingKey.SentinelKey.min(id) : AccordRoutingKey.SentinelKey.max(id), + AccordRoutingKey.TokenKey::new, + version, partitioner + ); + } + + public static AccordRoutingKey fromComparableBytes(ByteSource.Peekable bs, TableId tableId, + BiFunction onSentinel, + BiFunction onToken, + ByteComparable.Version version, IPartitioner partitioner) { if (bs.peek() == ByteSource.TERMINATOR) - throw new IOException("Unable to read prefix"); + throw new IllegalStateException("Unable to read prefix"); ByteSource.Peekable component = progress(bs); byte[] prefix = ByteSourceInverse.getOptionalSignedFixedLength(ByteArrayAccessor.instance, component, 1); if (prefix == null) - throw new IOException("Unable to read prefix; prefix was null"); + throw new IllegalStateException("Unable to read prefix; prefix was null"); if (Arrays.equals(TOKEN_ORDER, prefix)) { component = ByteSourceInverse.nextComponentSource(bs); if (component == null) - throw new IOException("Unable to read token; component was not found"); - return onToken.apply(partitioner.getTokenFactory().fromComparableBytes(component, version)); + throw new IllegalStateException("Unable to read token; component was not found"); + return onToken.apply(tableId, partitioner.getTokenFactory().fromComparableBytes(component, version)); } if (Arrays.equals(MIN_ORDER, prefix)) - return onSentinel.apply(true); + return onSentinel.apply(tableId, true); if (Arrays.equals(MAX_ORDER, prefix)) - return onSentinel.apply(false); + return onSentinel.apply(tableId, false); throw new AssertionError("Unknown prefix"); } - private static ByteSource.Peekable progress(ByteSource.Peekable bs) throws IOException + private static ByteSource.Peekable progress(ByteSource.Peekable bs) { ByteSource.Peekable component = ByteSourceInverse.nextComponentSource(bs); if (component == null) - throw new IOException("Unable to read prefix; component was not found"); + throw new IllegalStateException("Unable to read prefix; component was not found"); if (component.peek() == ByteSource.NEXT_COMPONENT) { // this came from (table, token_or_sentinel) component = ByteSourceInverse.nextComponentSource(bs); if (component == null) - throw new IOException("Unable to read prefix; component was not found"); + throw new IllegalStateException("Unable to read prefix; component was not found"); } return component; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java index 8370d59b80dc..a9091edea3ed 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ApplySerializers.java @@ -26,7 +26,6 @@ import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; @@ -64,7 +63,6 @@ public abstract static class ApplySerializer extends TxnRequest public void serializeBody(A apply, DataOutputPlus out, int version) throws IOException { kind.serialize(apply.kind, out, version); - KeySerializers.seekables.serialize(apply.keys(), out, version); CommandSerializers.timestamp.serialize(apply.executeAt, out, version); DepsSerializer.partialDeps.serialize(apply.deps, out, version); CommandSerializers.nullablePartialTxn.serialize(apply.txn, out, version); @@ -72,7 +70,7 @@ public void serializeBody(A apply, DataOutputPlus out, int version) throws IOExc CommandSerializers.writes.serialize(apply.writes, out, version); } - protected abstract A deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + protected abstract A deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result); @Override @@ -80,7 +78,6 @@ public A deserializeBody(DataInputPlus in, int version, TxnId txnId, Route sc { return deserializeApply(txnId, scope, waitForEpoch, kind.deserialize(in, version), - KeySerializers.seekables.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), CommandSerializers.nullablePartialTxn.deserialize(in, version), @@ -93,7 +90,6 @@ public A deserializeBody(DataInputPlus in, int version, TxnId txnId, Route sc public long serializedBodySize(A apply, int version) { return kind.serializedSize(apply.kind, version) - + KeySerializers.seekables.serializedSize(apply.keys(), version) + CommandSerializers.timestamp.serializedSize(apply.executeAt, version) + DepsSerializer.partialDeps.serializedSize(apply.deps, version) + CommandSerializers.nullablePartialTxn.serializedSize(apply.txn, version) @@ -102,17 +98,17 @@ public long serializedBodySize(A apply, int version) } } - public static final IVersionedSerializer request = new ApplySerializer() + public static final IVersionedSerializer request = new ApplySerializer<>() { @Override - protected Apply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Seekables keys, + protected Apply deserializeApply(TxnId txnId, Route scope, long waitForEpoch, Apply.Kind kind, Timestamp executeAt, PartialDeps deps, PartialTxn txn, FullRoute fullRoute, Writes writes, Result result) { - return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, keys, executeAt, deps, txn, fullRoute, writes, result); + return Apply.SerializationSupport.create(txnId, scope, waitForEpoch, kind, executeAt, deps, txn, fullRoute, writes, result); } }; - public static final IVersionedSerializer reply = new IVersionedSerializer() + public static final IVersionedSerializer reply = new IVersionedSerializer<>() { private final Apply.ApplyReply[] replies = Apply.ApplyReply.values(); diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java index f18b40f47327..54b25a1c876a 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AwaitSerializer.java @@ -21,12 +21,12 @@ import java.io.IOException; import accord.api.ProgressLog.BlockedUntil; -import accord.local.SaveStatus; import accord.messages.Await; import accord.messages.Await.AsyncAwaitComplete; import accord.messages.Await.AwaitOk; import accord.primitives.Participants; import accord.primitives.Route; +import accord.primitives.SaveStatus; import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.db.TypeSizes; @@ -45,6 +45,7 @@ public void serialize(Await await, DataOutputPlus out, int version) throws IOExc CommandSerializers.txnId.serialize(await.txnId, out, version); KeySerializers.participants.serialize(await.scope, out, version); out.writeByte(await.blockedUntil.ordinal()); + out.writeUnsignedVInt(await.awaitEpoch - await.txnId.epoch()); out.writeUnsignedVInt32(await.callbackId + 1); Invariants.checkState(await.callbackId >= -1); } @@ -55,9 +56,10 @@ public Await deserialize(DataInputPlus in, int version) throws IOException TxnId txnId = CommandSerializers.txnId.deserialize(in, version); Participants scope = KeySerializers.participants.deserialize(in, version); BlockedUntil blockedUntil = BlockedUntil.forOrdinal(in.readByte()); + long awaitEpoch = in.readUnsignedVInt() + txnId.epoch(); int callbackId = in.readUnsignedVInt32() - 1; Invariants.checkState(callbackId >= -1); - return Await.SerializerSupport.create(txnId, scope, blockedUntil, callbackId); + return Await.SerializerSupport.create(txnId, scope, blockedUntil, awaitEpoch, callbackId); } @Override @@ -66,6 +68,7 @@ public long serializedSize(Await await, int version) return CommandSerializers.txnId.serializedSize(await.txnId, version) + KeySerializers.participants.serializedSize(await.scope, version) + TypeSizes.BYTE_SIZE + + VIntCoding.computeUnsignedVIntSize(await.awaitEpoch - await.txnId.epoch()) + VIntCoding.computeUnsignedVIntSize(await.callbackId + 1); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java index 94390bd905e8..676e689326b8 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/BeginInvalidationSerializers.java @@ -21,11 +21,12 @@ import java.io.IOException; import accord.api.RoutingKey; -import accord.local.SaveStatus; import accord.messages.BeginInvalidation; import accord.messages.BeginInvalidation.InvalidateReply; import accord.primitives.Ballot; +import accord.primitives.Participants; import accord.primitives.Route; +import accord.primitives.SaveStatus; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -39,7 +40,7 @@ public class BeginInvalidationSerializers public void serialize(BeginInvalidation begin, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(begin.txnId, out, version); - KeySerializers.unseekables.serialize(begin.someUnseekables, out, version); + KeySerializers.participants.serialize(begin.participants, out, version); CommandSerializers.ballot.serialize(begin.ballot, out, version); } @@ -47,7 +48,7 @@ public void serialize(BeginInvalidation begin, DataOutputPlus out, int version) public BeginInvalidation deserialize(DataInputPlus in, int version) throws IOException { return new BeginInvalidation(CommandSerializers.txnId.deserialize(in, version), - KeySerializers.unseekables.deserialize(in, version), + KeySerializers.participants.deserialize(in, version), CommandSerializers.ballot.deserialize(in, version)); } @@ -55,7 +56,7 @@ public BeginInvalidation deserialize(DataInputPlus in, int version) throws IOExc public long serializedSize(BeginInvalidation begin, int version) { return CommandSerializers.txnId.serializedSize(begin.txnId, version) - + KeySerializers.unseekables.serializedSize(begin.someUnseekables, version) + + KeySerializers.participants.serializedSize(begin.participants, version) + CommandSerializers.ballot.serializedSize(begin.ballot, version); } }; @@ -70,6 +71,7 @@ public void serialize(InvalidateReply reply, DataOutputPlus out, int version) th CommandSerializers.saveStatus.serialize(reply.maxStatus, out, version); CommandSerializers.saveStatus.serialize(reply.maxKnowledgeStatus, out, version); out.writeBoolean(reply.acceptedFastPath); + KeySerializers.nullableParticipants.serialize(reply.truncated, out, version); KeySerializers.nullableRoute.serialize(reply.route, out, version); KeySerializers.nullableRoutingKey.serialize(reply.homeKey, out, version); } @@ -77,14 +79,16 @@ public void serialize(InvalidateReply reply, DataOutputPlus out, int version) th @Override public InvalidateReply deserialize(DataInputPlus in, int version) throws IOException { + // TODO (expected): use headers instead of nullable+bool serializers Ballot supersededBy = CommandSerializers.nullableBallot.deserialize(in, version); Ballot accepted = CommandSerializers.ballot.deserialize(in, version); SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in, version); boolean acceptedFastPath = in.readBoolean(); + Participants truncated = KeySerializers.nullableParticipants.deserialize(in, version); Route route = KeySerializers.nullableRoute.deserialize(in, version); RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); - return new InvalidateReply(supersededBy, accepted, maxStatus, maxKnowledgeStatus, acceptedFastPath, route, homeKey); + return new InvalidateReply(supersededBy, accepted, maxStatus, maxKnowledgeStatus, acceptedFastPath, truncated, route, homeKey); } @Override @@ -95,6 +99,7 @@ public long serializedSize(InvalidateReply reply, int version) + CommandSerializers.saveStatus.serializedSize(reply.maxStatus, version) + CommandSerializers.saveStatus.serializedSize(reply.maxKnowledgeStatus, version) + TypeSizes.BOOL_SIZE + + KeySerializers.nullableParticipants.serializedSize(reply.truncated, version) + KeySerializers.nullableRoute.serializedSize(reply.route, version) + KeySerializers.nullableRoutingKey.serializedSize(reply.homeKey, version); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java index e10425a32135..842f2bafde2e 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CalculateDepsSerializers.java @@ -23,7 +23,6 @@ import accord.messages.CalculateDeps; import accord.messages.CalculateDeps.CalculateDepsOk; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; @@ -32,28 +31,25 @@ public class CalculateDepsSerializers { - public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer() + public static final IVersionedSerializer request = new TxnRequestSerializer.WithUnsyncedSerializer<>() { @Override public void serializeBody(CalculateDeps msg, DataOutputPlus out, int version) throws IOException { - KeySerializers.seekables.serialize(msg.keys, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); } @Override public CalculateDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { - Seekables keys = KeySerializers.seekables.deserialize(in, version); Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); - return CalculateDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, keys, executeAt); + return CalculateDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, executeAt); } @Override public long serializedBodySize(CalculateDeps msg, int version) { - return KeySerializers.seekables.serializedSize(msg.keys, version) - + CommandSerializers.timestamp.serializedSize(msg.executeAt, version); + return CommandSerializers.timestamp.serializedSize(msg.executeAt, version); } }; @@ -62,19 +58,19 @@ public long serializedBodySize(CalculateDeps msg, int version) @Override public void serialize(CalculateDepsOk reply, DataOutputPlus out, int version) throws IOException { - DepsSerializer.partialDeps.serialize(reply.deps, out, version); + DepsSerializer.deps.serialize(reply.deps, out, version); } @Override public CalculateDepsOk deserialize(DataInputPlus in, int version) throws IOException { - return new CalculateDepsOk(DepsSerializer.partialDeps.deserialize(in, version)); + return new CalculateDepsOk(DepsSerializer.deps.deserialize(in, version)); } @Override public long serializedSize(CalculateDepsOk reply, int version) { - return DepsSerializer.partialDeps.serializedSize(reply.deps, version); + return DepsSerializer.deps.serializedSize(reply.deps, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java index e506bbf85cd1..7a25f55c544d 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CheckStatusSerializers.java @@ -1,5 +1,5 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one + * Licensed to the Apache Software ation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file @@ -23,99 +23,68 @@ import accord.api.Result; import accord.api.RoutingKey; import accord.coordinate.Infer; -import accord.local.SaveStatus; -import accord.local.Status.Durability; -import accord.local.Status.Known; import accord.messages.CheckStatus; import accord.messages.CheckStatus.CheckStatusNack; import accord.messages.CheckStatus.CheckStatusOk; import accord.messages.CheckStatus.CheckStatusOkFull; import accord.messages.CheckStatus.CheckStatusReply; -import accord.messages.CheckStatus.FoundKnown; -import accord.messages.CheckStatus.FoundKnownMap; import accord.primitives.Ballot; +import accord.primitives.Known; +import accord.primitives.KnownMap; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Participants; import accord.primitives.Route; +import accord.primitives.SaveStatus; +import accord.primitives.Status.Durability; import accord.primitives.Timestamp; import accord.primitives.TxnId; -import accord.primitives.Unseekables; import accord.primitives.Writes; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; -import org.apache.cassandra.utils.NullableSerializer; import static accord.messages.CheckStatus.SerializationSupport.createOk; +import static org.apache.cassandra.service.accord.serializers.CommandSerializers.nullableKnown; public class CheckStatusSerializers { - public static final IVersionedSerializer foundKnown = new IVersionedSerializer<>() + public static final IVersionedSerializer knownMap = new IVersionedSerializer<>() { @Override - public void serialize(FoundKnown known, DataOutputPlus out, int version) throws IOException - { - CommandSerializers.known.serialize(known, out, version); - CommandSerializers.invalidIfNot.serialize(known.invalidIfNot, out, version); - CommandSerializers.isPreempted.serialize(known.isPreempted, out, version); - } - - @Override - public FoundKnown deserialize(DataInputPlus in, int version) throws IOException - { - Known known = CommandSerializers.known.deserialize(in, version); - Infer.InvalidIfNot invalidIfNot = CommandSerializers.invalidIfNot.deserialize(in, version); - Infer.IsPreempted isPreempted = CommandSerializers.isPreempted.deserialize(in, version); - return new FoundKnown(known, invalidIfNot, isPreempted); - } - - @Override - public long serializedSize(FoundKnown known, int version) - { - return CommandSerializers.known.serializedSize(known, version) - + CommandSerializers.invalidIfNot.serializedSize(known.invalidIfNot, version) - + CommandSerializers.isPreempted.serializedSize(known.isPreempted, version); - } - }; - - public static final IVersionedSerializer foundKnownNullable = NullableSerializer.wrap(foundKnown); - - public static final IVersionedSerializer foundKnownMap = new IVersionedSerializer<>() - { - @Override - public void serialize(FoundKnownMap knownMap, DataOutputPlus out, int version) throws IOException + public void serialize(KnownMap knownMap, DataOutputPlus out, int version) throws IOException { int size = knownMap.size(); out.writeUnsignedVInt32(size); for (int i = 0 ; i <= size ; ++i) KeySerializers.routingKey.serialize(knownMap.startAt(i), out, version); for (int i = 0 ; i < size ; ++i) - foundKnownNullable.serialize(knownMap.valueAt(i), out, version); + nullableKnown.serialize(knownMap.valueAt(i), out, version); } @Override - public FoundKnownMap deserialize(DataInputPlus in, int version) throws IOException + public KnownMap deserialize(DataInputPlus in, int version) throws IOException { int size = in.readUnsignedVInt32(); RoutingKey[] starts = new RoutingKey[size + 1]; for (int i = 0 ; i <= size ; ++i) starts[i] = KeySerializers.routingKey.deserialize(in, version); - FoundKnown[] values = new FoundKnown[size]; + Known[] values = new Known[size]; for (int i = 0 ; i < size ; ++i) - values[i] = foundKnownNullable.deserialize(in, version); - return FoundKnownMap.SerializerSupport.create(true, starts, values); + values[i] = nullableKnown.deserialize(in, version); + return KnownMap.SerializerSupport.create(true, starts, values); } @Override - public long serializedSize(FoundKnownMap knownMap, int version) + public long serializedSize(KnownMap knownMap, int version) { int size = knownMap.size(); long result = TypeSizes.sizeofUnsignedVInt(size); for (int i = 0 ; i <= size ; ++i) result += KeySerializers.routingKey.serializedSize(knownMap.startAt(i), version); for (int i = 0 ; i < size ; ++i) - result += foundKnownNullable.serializedSize(knownMap.valueAt(i), version); + result += nullableKnown.serializedSize(knownMap.valueAt(i), version); return result; } }; @@ -128,7 +97,7 @@ public long serializedSize(FoundKnownMap knownMap, int version) public void serialize(CheckStatus check, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(check.txnId, out, version); - KeySerializers.unseekables.serialize(check.query, out, version); + KeySerializers.participants.serialize(check.query, out, version); out.writeUnsignedVInt(check.sourceEpoch); out.writeByte(check.includeInfo.ordinal()); } @@ -137,7 +106,7 @@ public void serialize(CheckStatus check, DataOutputPlus out, int version) throws public CheckStatus deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Unseekables query = KeySerializers.unseekables.deserialize(in, version); + Participants query = KeySerializers.participants.deserialize(in, version); long sourceEpoch = in.readUnsignedVInt(); CheckStatus.IncludeInfo info = infos[in.readByte()]; return new CheckStatus(txnId, query, sourceEpoch, info); @@ -147,7 +116,7 @@ public CheckStatus deserialize(DataInputPlus in, int version) throws IOException public long serializedSize(CheckStatus check, int version) { return CommandSerializers.txnId.serializedSize(check.txnId, version) - + KeySerializers.unseekables.serializedSize(check.query, version) + + KeySerializers.participants.serializedSize(check.query, version) + TypeSizes.sizeofUnsignedVInt(check.sourceEpoch) + TypeSizes.BYTE_SIZE; } @@ -170,7 +139,7 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CheckStatusOk ok = (CheckStatusOk) reply; out.write(reply instanceof CheckStatusOkFull ? FULL : OK); - foundKnownMap.serialize(ok.map, out, version); + knownMap.serialize(ok.map, out, version); CommandSerializers.saveStatus.serialize(ok.maxKnowledgeSaveStatus, out, version); CommandSerializers.saveStatus.serialize(ok.maxSaveStatus, out, version); CommandSerializers.ballot.serialize(ok.maxPromised, out, version); @@ -181,6 +150,7 @@ public void serialize(CheckStatusReply reply, DataOutputPlus out, int version) t CommandSerializers.durability.serialize(ok.durability, out, version); KeySerializers.nullableRoute.serialize(ok.route, out, version); KeySerializers.nullableRoutingKey.serialize(ok.homeKey, out, version); + CommandSerializers.invalidIf.serialize(ok.invalidIf, out, version); if (!(reply instanceof CheckStatusOkFull)) return; @@ -202,7 +172,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce return CheckStatusNack.NotOwned; case OK: case FULL: - FoundKnownMap map = foundKnownMap.deserialize(in, version); + KnownMap map = knownMap.deserialize(in, version); SaveStatus maxKnowledgeStatus = CommandSerializers.saveStatus.deserialize(in, version); SaveStatus maxStatus = CommandSerializers.saveStatus.deserialize(in, version); Ballot maxPromised = CommandSerializers.ballot.deserialize(in, version); @@ -213,10 +183,11 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce Durability durability = CommandSerializers.durability.deserialize(in, version); Route route = KeySerializers.nullableRoute.deserialize(in, version); RoutingKey homeKey = KeySerializers.nullableRoutingKey.deserialize(in, version); + Infer.InvalidIf invalidIf = CommandSerializers.invalidIf.deserialize(in, version); if (kind == OK) return createOk(map, maxKnowledgeStatus, maxStatus, maxPromised, maxAcceptedOrCommitted, acceptedOrCommitted, executeAt, - isCoordinating, durability, route, homeKey); + isCoordinating, durability, route, homeKey, invalidIf); PartialTxn partialTxn = CommandSerializers.nullablePartialTxn.deserialize(in, version); PartialDeps committedDeps = DepsSerializer.nullablePartialDeps.deserialize(in, version); @@ -227,7 +198,7 @@ public CheckStatusReply deserialize(DataInputPlus in, int version) throws IOExce result = CommandSerializers.APPLIED; return createOk(map, maxKnowledgeStatus, maxStatus, maxPromised, maxAcceptedOrCommitted, acceptedOrCommitted, executeAt, - isCoordinating, durability, route, homeKey, partialTxn, committedDeps, writes, result); + isCoordinating, durability, route, homeKey, invalidIf, partialTxn, committedDeps, writes, result); } } @@ -240,7 +211,7 @@ public long serializedSize(CheckStatusReply reply, int version) return size; CheckStatusOk ok = (CheckStatusOk) reply; - size += foundKnownMap.serializedSize(ok.map, version); + size += knownMap.serializedSize(ok.map, version); size += CommandSerializers.saveStatus.serializedSize(ok.maxKnowledgeSaveStatus, version); size += CommandSerializers.saveStatus.serializedSize(ok.maxSaveStatus, version); size += CommandSerializers.ballot.serializedSize(ok.maxPromised, version); @@ -249,8 +220,9 @@ public long serializedSize(CheckStatusReply reply, int version) size += CommandSerializers.nullableTimestamp.serializedSize(ok.executeAt, version); size += TypeSizes.BOOL_SIZE; size += CommandSerializers.durability.serializedSize(ok.durability, version); - size += KeySerializers.nullableRoutingKey.serializedSize(ok.homeKey, version); size += KeySerializers.nullableRoute.serializedSize(ok.route, version); + size += KeySerializers.nullableRoutingKey.serializedSize(ok.homeKey, version); + size += CommandSerializers.invalidIf.serializedSize(ok.invalidIf, version); if (!(reply instanceof CheckStatusOkFull)) return size; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java index cd76550262bd..16acd2ab0ba6 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandSerializers.java @@ -29,10 +29,18 @@ import accord.api.Update; import accord.coordinate.Infer; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.local.Status.Durability; -import accord.local.Status.Known; +import accord.local.StoreParticipants; +import accord.primitives.Known.Definition; +import accord.primitives.Known.KnownDeps; +import accord.primitives.Known.KnownExecuteAt; +import accord.primitives.Known.KnownRoute; +import accord.primitives.Known.Outcome; +import accord.primitives.Participants; +import accord.primitives.Route; +import accord.primitives.SaveStatus; +import accord.primitives.Status; +import accord.primitives.Status.Durability; +import accord.primitives.Known; import accord.primitives.Ballot; import accord.primitives.PartialTxn; import accord.primitives.ProgressToken; @@ -47,7 +55,6 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.service.accord.serializers.IVersionedWithKeysSerializer.AbstractWithKeysSerializer; -import org.apache.cassandra.service.accord.serializers.IVersionedWithKeysSerializer.NullableWithKeysSerializer; import org.apache.cassandra.service.accord.serializers.SmallEnumSerializer.NullableSmallEnumSerializer; import org.apache.cassandra.service.accord.txn.AccordUpdate; import org.apache.cassandra.service.accord.txn.TxnQuery; @@ -71,11 +78,74 @@ public ProgressToken asProgressToken() }; public static final TimestampSerializer txnId = new TimestampSerializer<>(TxnId::fromBits); + public static final IVersionedSerializer nullableTxnId = NullableSerializer.wrap(txnId); public static final TimestampSerializer timestamp = new TimestampSerializer<>(Timestamp::fromBits); public static final IVersionedSerializer nullableTimestamp = NullableSerializer.wrap(timestamp); public static final TimestampSerializer ballot = new TimestampSerializer<>(Ballot::fromBits); public static final IVersionedSerializer nullableBallot = NullableSerializer.wrap(ballot); public static final EnumSerializer kind = new EnumSerializer<>(Txn.Kind.class); + public static final StoreParticipantsSerializer participants = new StoreParticipantsSerializer(); + + // TODO (expected): optimise using subset serializers (but be careful for range txns, e.g. some collections have differently sliced sub ranges) + public static class StoreParticipantsSerializer implements IVersionedSerializer + { + static final int HAS_ROUTE = 0x1; + static final int HAS_TOUCHED_EQUALS_ROUTE = 0x2; + static final int TOUCHES_EQUALS_HAS_TOUCHED = 0x4; + static final int OWNS_EQUALS_TOUCHES = 0x8; + @Override + public void serialize(StoreParticipants t, DataOutputPlus out, int version) throws IOException + { + boolean hasRoute = t.route() != null; + boolean hasTouchedEqualsRoute = t.route() == t.hasTouched(); + boolean touchesEqualsHasTouched = t.touches() == t.hasTouched(); + boolean ownsEqualsTouches = t.owns() == t.touches(); + out.writeByte((hasRoute ? HAS_ROUTE : 0) + | (hasTouchedEqualsRoute ? HAS_TOUCHED_EQUALS_ROUTE : 0) + | (touchesEqualsHasTouched ? TOUCHES_EQUALS_HAS_TOUCHED : 0) + | (ownsEqualsTouches ? OWNS_EQUALS_TOUCHES : 0) + ); + if (hasRoute) KeySerializers.route.serialize(t.route(), out, version); + if (!hasTouchedEqualsRoute) KeySerializers.participants.serialize(t.hasTouched(), out, version); + if (!touchesEqualsHasTouched) KeySerializers.participants.serialize(t.touches(), out, version); + if (!ownsEqualsTouches) KeySerializers.participants.serialize(t.owns(), out, version); + } + + @Override + public StoreParticipants deserialize(DataInputPlus in, int version) throws IOException + { + int flags = in.readByte(); + Route route = 0 == (flags & HAS_ROUTE) ? null : KeySerializers.route.deserialize(in, version); + Participants hasTouched = 0 != (flags & HAS_TOUCHED_EQUALS_ROUTE) ? route : KeySerializers.participants.deserialize(in, version); + Participants touches = 0 != (flags & TOUCHES_EQUALS_HAS_TOUCHED) ? hasTouched : KeySerializers.participants.deserialize(in, version); + Participants owns = 0 != (flags & OWNS_EQUALS_TOUCHES) ? touches : KeySerializers.participants.deserialize(in, version); + return StoreParticipants.SerializationSupport.create(route, owns, touches, hasTouched); + } + + public Route deserializeRouteOnly(DataInputPlus in, int version) throws IOException + { + int flags = in.readByte(); + if (0 == (flags & HAS_ROUTE)) + return null; + + return KeySerializers.route.deserialize(in, version); + } + + @Override + public long serializedSize(StoreParticipants t, int version) + { + boolean hasRoute = t.route() != null; + boolean hasTouchedEqualsRoute = t.route() == t.hasTouched(); + boolean touchesEqualsHasTouched = t.touches() == t.hasTouched(); + boolean ownsEqualsTouches = t.owns() == t.touches(); + long size = 1; + if (hasRoute) size += KeySerializers.route.serializedSize(t.route(), version); + if (!hasTouchedEqualsRoute) size += KeySerializers.participants.serializedSize(t.hasTouched(), version); + if (!touchesEqualsHasTouched) size += KeySerializers.participants.serializedSize(t.touches(), version); + if (!ownsEqualsTouches) size += KeySerializers.participants.serializedSize(t.owns(), version); + return size; + } + } public static class TimestampSerializer implements IVersionedSerializer { @@ -173,7 +243,7 @@ public int serializedSize() } } - public static class PartialTxnSerializer extends AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, PartialTxn> + public static class PartialTxnSerializer extends AbstractWithKeysSerializer implements IVersionedSerializer { private final IVersionedSerializer readSerializer; private final IVersionedSerializer querySerializer; @@ -208,28 +278,6 @@ public long serializedSize(PartialTxn txn, int version) return size; } - @Override - public void serialize(Seekables superset, PartialTxn txn, DataOutputPlus out, int version) throws IOException - { - serializeSubset(txn.keys(), superset, out); - serializeWithoutKeys(txn, out, version); - } - - @Override - public PartialTxn deserialize(Seekables superset, DataInputPlus in, int version) throws IOException - { - Seekables keys = deserializeSubset(superset, in); - return deserializeWithoutKeys(keys, in, version); - } - - @Override - public long serializedSize(Seekables superset, PartialTxn txn, int version) - { - long size = serializedSubsetSize(txn.keys(), superset); - size += serializedSizeWithoutKeys(txn, version); - return size; - } - private void serializeWithoutKeys(PartialTxn txn, DataOutputPlus out, int version) throws IOException { CommandSerializers.kind.serialize(txn.kind(), out, version); @@ -249,7 +297,6 @@ private PartialTxn deserializeWithoutKeys(Seekables keys, DataInputPlus in return new PartialTxn.InMemory(kind, keys, read, query, update); } - private long serializedSizeWithoutKeys(PartialTxn txn, int version) { long size = CommandSerializers.kind.serializedSize(txn.kind(), version); @@ -266,14 +313,14 @@ private long serializedSizeWithoutKeys(PartialTxn txn, int version) private static final IVersionedSerializer query = new CastingSerializer<>(TxnQuery.class, TxnQuery.serializer); private static final IVersionedSerializer update = new CastingSerializer<>(AccordUpdate.class, AccordUpdate.serializer); - public static final IVersionedWithKeysSerializer, PartialTxn> partialTxn = new PartialTxnSerializer(read, query, update); - public static final IVersionedWithKeysSerializer, PartialTxn> nullablePartialTxn = new NullableWithKeysSerializer<>(partialTxn); + public static final IVersionedSerializer partialTxn = new PartialTxnSerializer(read, query, update); + public static final IVersionedSerializer nullablePartialTxn = NullableSerializer.wrap(partialTxn); public static final EnumSerializer saveStatus = new EnumSerializer<>(SaveStatus.class); public static final EnumSerializer status = new EnumSerializer<>(Status.class); public static final EnumSerializer durability = new EnumSerializer<>(Durability.class); - public static final IVersionedSerializer writes = new IVersionedSerializer() + public static final IVersionedSerializer writes = new IVersionedSerializer<>() { @Override public void serialize(Writes writes, DataOutputPlus out, int version) throws IOException @@ -311,14 +358,13 @@ public long serializedSize(Writes writes, int version) public static final IVersionedSerializer nullableWrites = NullableSerializer.wrap(writes); - public static final SmallEnumSerializer knownRoute = new SmallEnumSerializer<>(Status.KnownRoute.class); - public static final SmallEnumSerializer definition = new SmallEnumSerializer<>(Status.Definition.class); - public static final SmallEnumSerializer knownExecuteAt = new SmallEnumSerializer<>(Status.KnownExecuteAt.class); - public static final SmallEnumSerializer knownDeps = new SmallEnumSerializer<>(Status.KnownDeps.class); - public static final NullableSmallEnumSerializer nullableKnownDeps = new NullableSmallEnumSerializer<>(knownDeps); - public static final SmallEnumSerializer outcome = new SmallEnumSerializer<>(Status.Outcome.class); - public static final SmallEnumSerializer invalidIfNot = new SmallEnumSerializer<>(Infer.InvalidIfNot.class); - public static final SmallEnumSerializer isPreempted = new SmallEnumSerializer<>(Infer.IsPreempted.class); + public static final SmallEnumSerializer knownRoute = new SmallEnumSerializer<>(KnownRoute.class); + public static final SmallEnumSerializer definition = new SmallEnumSerializer<>(Definition.class); + public static final SmallEnumSerializer knownExecuteAt = new SmallEnumSerializer<>(KnownExecuteAt.class); + public static final SmallEnumSerializer knownDeps = new SmallEnumSerializer<>(KnownDeps.class); + public static final NullableSmallEnumSerializer nullableKnownDeps = new NullableSmallEnumSerializer<>(knownDeps); + public static final SmallEnumSerializer outcome = new SmallEnumSerializer<>(Outcome.class); + public static final SmallEnumSerializer invalidIf = new SmallEnumSerializer<>(Infer.InvalidIf.class); public static final IVersionedSerializer known = new IVersionedSerializer<>() { @@ -352,4 +398,6 @@ public long serializedSize(Known known, int version) + outcome.serializedSize(known.outcome, version); } }; + + public static final IVersionedSerializer nullableKnown = NullableSerializer.wrap(known); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java index 5af4b53a3607..bca3b763c1b7 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandStoreSerializers.java @@ -26,6 +26,7 @@ import accord.api.RoutingKey; import accord.local.DurableBefore; import accord.local.RedundantBefore; +import accord.local.RejectBefore; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Timestamp; @@ -61,29 +62,31 @@ public ReducingRangeMapSerializer(IVersionedSerializer valueSerializer, IntFu public void serialize(R map, DataOutputPlus out, int version) throws IOException { out.writeBoolean(map.inclusiveEnds()); - int size = map.size(); - out.writeUnsignedVInt32(size); + int mapSize = map.size(); + out.writeUnsignedVInt32(mapSize); - for (int i=0; i 0) + KeySerializers.routingKey.serialize(map.startAt(mapSize), out, version); } public R deserialize(DataInputPlus in, int version) throws IOException { boolean inclusiveEnds = in.readBoolean(); - int size = in.readUnsignedVInt32(); - RoutingKey[] keys = new RoutingKey[size + 1]; - T[] values = newValueArray.apply(size); - for (int i=0; i 0) + keys[mapSize] = KeySerializers.routingKey.deserialize(in, version); return constructor.apply(inclusiveEnds, keys, values); } @@ -97,14 +100,15 @@ public long serializedSize(R map, int version) size += KeySerializers.routingKey.serializedSize(map.startAt(i), version); size += valueSerializer.serializedSize(map.valueAt(i), version); } - size += KeySerializers.routingKey.serializedSize(map.startAt(mapSize), version); + if (mapSize > 0) + size += KeySerializers.routingKey.serializedSize(map.startAt(mapSize), version); return size; } } - public static IVersionedSerializer> rejectBefore = new ReducingRangeMapSerializer<>(CommandSerializers.nullableTimestamp, Timestamp[]::new, ReducingRangeMap.SerializerSupport::create); - public static IVersionedSerializer durableBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(new IVersionedSerializer() + public static IVersionedSerializer rejectBefore = new ReducingRangeMapSerializer<>(CommandSerializers.nullableTxnId, TxnId[]::new, RejectBefore.SerializerSupport::create); + public static IVersionedSerializer durableBefore = new ReducingRangeMapSerializer<>(NullableSerializer.wrap(new IVersionedSerializer<>() { @Override public void serialize(DurableBefore.Entry t, DataOutputPlus out, int version) throws IOException @@ -135,12 +139,15 @@ public long serializedSize(DurableBefore.Entry t, int version) public void serialize(RedundantBefore.Entry t, DataOutputPlus out, int version) throws IOException { TokenRange.serializer.serialize((TokenRange) t.range, out, version); - Invariants.checkState(t.startEpoch <= t.endEpoch); - out.writeUnsignedVInt(t.startEpoch); - if (t.endEpoch == Long.MAX_VALUE) out.writeUnsignedVInt(0L); - else out.writeUnsignedVInt(1 + t.endEpoch - t.startEpoch); + Invariants.checkState(t.startOwnershipEpoch <= t.endOwnershipEpoch); + out.writeUnsignedVInt(t.startOwnershipEpoch); + if (t.endOwnershipEpoch == Long.MAX_VALUE) out.writeUnsignedVInt(0L); + else out.writeUnsignedVInt(1 + t.endOwnershipEpoch - t.startOwnershipEpoch); CommandSerializers.txnId.serialize(t.locallyAppliedOrInvalidatedBefore, out, version); + CommandSerializers.txnId.serialize(t.locallyDecidedAndAppliedOrInvalidatedBefore, out, version); CommandSerializers.txnId.serialize(t.shardAppliedOrInvalidatedBefore, out, version); + CommandSerializers.txnId.serialize(t.shardOnlyAppliedOrInvalidatedBefore, out, version); + CommandSerializers.txnId.serialize(t.gcBefore, out, version); CommandSerializers.txnId.serialize(t.bootstrappedAt, out, version); CommandSerializers.nullableTimestamp.serialize(t.staleUntilAtLeast, out, version); } @@ -154,20 +161,26 @@ public RedundantBefore.Entry deserialize(DataInputPlus in, int version) throws I if (endEpoch == 0) endEpoch = Long.MAX_VALUE; else endEpoch = endEpoch - 1 + startEpoch; TxnId locallyAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId locallyDecidedAndAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); TxnId shardAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId shardOnlyAppliedOrInvalidatedBefore = CommandSerializers.txnId.deserialize(in, version); + TxnId gcBefore = CommandSerializers.txnId.deserialize(in, version); TxnId bootstrappedAt = CommandSerializers.txnId.deserialize(in, version); Timestamp staleUntilAtLeast = CommandSerializers.nullableTimestamp.deserialize(in, version); - return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast); + return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, locallyDecidedAndAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, shardOnlyAppliedOrInvalidatedBefore, gcBefore, bootstrappedAt, staleUntilAtLeast); } @Override public long serializedSize(RedundantBefore.Entry t, int version) { long size = TokenRange.serializer.serializedSize((TokenRange) t.range, version); - size += TypeSizes.sizeofUnsignedVInt(t.startEpoch); - size += TypeSizes.sizeofUnsignedVInt(t.endEpoch == Long.MAX_VALUE ? 0 : 1 + t.endEpoch - t.startEpoch); + size += TypeSizes.sizeofUnsignedVInt(t.startOwnershipEpoch); + size += TypeSizes.sizeofUnsignedVInt(t.endOwnershipEpoch == Long.MAX_VALUE ? 0 : 1 + t.endOwnershipEpoch - t.startOwnershipEpoch); size += CommandSerializers.txnId.serializedSize(t.locallyAppliedOrInvalidatedBefore, version); + size += CommandSerializers.txnId.serializedSize(t.locallyDecidedAndAppliedOrInvalidatedBefore, version); size += CommandSerializers.txnId.serializedSize(t.shardAppliedOrInvalidatedBefore, version); + size += CommandSerializers.txnId.serializedSize(t.shardOnlyAppliedOrInvalidatedBefore, version); + size += CommandSerializers.txnId.serializedSize(t.gcBefore, version); size += CommandSerializers.txnId.serializedSize(t.bootstrappedAt, version); size += CommandSerializers.nullableTimestamp.serializedSize(t.staleUntilAtLeast, version); return size; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index 4dc9ed1805e9..a12b17098ed4 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -21,9 +21,12 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import javax.annotation.Nonnull; + import com.google.common.primitives.Ints; -import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.RedundantBefore; import accord.local.cfk.CommandsForKey; import accord.local.cfk.CommandsForKey.TxnInfo; import accord.local.cfk.CommandsForKey.InternalStatus; @@ -40,18 +43,16 @@ import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.utils.vint.VIntCoding; +import static accord.local.cfk.CommandsForKey.NO_BOUNDS_INFO; import static accord.local.cfk.CommandsForKey.NO_PENDING_UNMANAGED; +import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; import static accord.primitives.TxnId.NO_TXNIDS; import static accord.primitives.Txn.Kind.Read; import static accord.primitives.Txn.Kind.SyncPoint; import static accord.primitives.Txn.Kind.Write; import static accord.utils.ArrayBuffers.cachedInts; import static accord.utils.ArrayBuffers.cachedTxnIds; -import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.EXTENDED; -import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.EXTENDED_BITS; -import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.RAW; import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.RAW_BITS; -import static org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer.TxnIdFlags.STANDARD; import static org.apache.cassandra.utils.ByteBufferUtil.readLeastSignificantBytes; import static org.apache.cassandra.utils.ByteBufferUtil.writeLeastSignificantBytes; import static org.apache.cassandra.utils.ByteBufferUtil.writeMostSignificantBytes; @@ -63,7 +64,8 @@ public class CommandsForKeySerializer private static final int HAS_MISSING_DEPS_HEADER_BIT = 0x1; private static final int HAS_EXECUTE_AT_HEADER_BIT = 0x2; private static final int HAS_BALLOT_HEADER_BIT = 0x4; - private static final int HAS_NON_STANDARD_FLAGS = 0x8; + private static final int HAS_STATUS_OVERRIDES = 0x8; + private static final int HAS_NON_STANDARD_FLAGS = 0x10; /** * We read/write a fixed number of intial bytes for each command, with an initial flexible number of flag bits @@ -77,8 +79,9 @@ public class CommandsForKeySerializer * bit 0 is set if there are any missing ids; * bit 1 is set if there are any executeAt specified * bit 2 is set if there are any ballots specified - * bit 3 is set if there are any queries present besides reads/writes - * bits 4-5 number of header bytes to read for each command + * bit 3 is set if there are any non-standard TxnId.Kind present + * bit 4 is set if there are any queries with override flags + * bits 6-7 number of header bytes to read for each command * bits 8-9: level 0 extra hlc bytes to read * bits 10-11: level 1 extra hlc bytes to read (+ 1 + level 0) * bits 12-13: level 2 extra hlc bytes to read (+ 1 + level 1) @@ -86,9 +89,10 @@ public class CommandsForKeySerializer * * In order, for each command, we consume: * 3 bits for the InternalStatus of the command + * 1 optional bit: if any command has override flags; 2 bits more to read if this bit is set * 1 optional bit: if the status encodes an executeAt, indicating if the executeAt is not the TxnId * 1 optional bit: if the status encodes any dependencies and there are non-zero missing ids, indicating if there are any missing for this command - * 1 or 2 bits for the kind of the TxnId: 0=key read, 1=key write, 2=exclusive sync point,3=read 16 bits + * 2 or 3 bits for the kind of the TxnId * 1 bit encoding if the epoch has changed * 2 optional bits: if the prior bit is set, indicating how many bits should be read for the epoch increment: 0=none (increment by 1); 1=4, 2=8, 3=32 * 4 option bits: if prior bits=01, epoch delta @@ -124,7 +128,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) // whether we have any missing transactions to encode, any executeAt that are not equal to their TxnId // and whether there are any non-standard flag bits to encode boolean hasNonStandardFlags = false; - int nodeIdCount, missingIdCount = 0, executeAtCount = 0, ballotCount = 0; + int nodeIdCount, missingIdCount = 0, executeAtCount = 0, ballotCount = 0, overrideCount = 0; int bitsPerExecuteAtEpoch = 0, bitsPerExecuteAtFlags = 0, bitsPerExecuteAtHlc = 1; // to permit us to use full 64 bits and encode in 5 bits we force at least one hlc bit { nodeIds[0] = cfk.redundantBefore().node.id; @@ -139,13 +143,13 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } TxnInfo txn = cfk.get(i); - - hasNonStandardFlags |= txnIdFlags(txn) != STANDARD; + overrideCount += txn.statusOverrides() > 0 ? 1 : 0; + hasNonStandardFlags |= hasNonStandardFlags(txn); nodeIds[nodeIdCount++] = txn.node.id; if (txn.executeAt != txn) { - Invariants.checkState(txn.status.hasExecuteAtOrDeps); + Invariants.checkState(txn.status().hasExecuteAtOrDeps); nodeIds[nodeIdCount++] = txn.executeAt.node.id; bitsPerExecuteAtEpoch = Math.max(bitsPerExecuteAtEpoch, numberOfBitsToRepresent(txn.executeAt.epoch() - txn.epoch())); bitsPerExecuteAtHlc = Math.max(bitsPerExecuteAtHlc, numberOfBitsToRepresent(txn.executeAt.hlc() - txn.hlc())); @@ -159,7 +163,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) missingIdCount += extra.missing.length; if (extra.ballot != Ballot.ZERO) { - Invariants.checkArgument(txn.status.hasBallot); + Invariants.checkArgument(txn.status().hasBallot); nodeIds[nodeIdCount++] = extra.ballot.node.id; ballotCount += 1; } @@ -172,7 +176,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) // We can now use this information to calculate the fixed header size, compute the amount // of additional space we'll need to store the TxnId and its basic info int bitsPerNodeId = numberOfBitsToRepresent(nodeIdCount); - int minHeaderBits = 7 + bitsPerNodeId + (hasNonStandardFlags ? 1 : 0); + int minHeaderBits = 8 + bitsPerNodeId + (hasNonStandardFlags ? 1 : 0) + (overrideCount > 0 ? 1 : 0); int infoHeaderBits = (executeAtCount > 0 ? 1 : 0) + (missingIdCount > 0 ? 1 : 0); int ballotHeaderBits = (ballotCount > 0 ? 1 : 0); int maxHeaderBits = minHeaderBits; @@ -216,14 +220,16 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) prevHlc = hlc; } - if (hasNonStandardFlags && txnIdFlags(txnId) == RAW) + if (txnIdFlagsBits(txnId, hasNonStandardFlags) == RAW_BITS) totalBytes += 2; TxnInfo info = cfk.get(i); - if (info.status.hasExecuteAtOrDeps) + if (info.status().hasExecuteAtOrDeps) headerBits += infoHeaderBits; - if (info.status.hasBallot) + if (info.status().hasBallot) headerBits += ballotHeaderBits; + if (info.statusOverrides() != 0) + headerBits += 2; maxHeaderBits = Math.max(headerBits, maxHeaderBits); int basicBytes = (headerBits + payloadBits + 7)/8; bytesHistogram[basicBytes]++; @@ -242,10 +248,11 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int flags = (missingIdCount > 0 ? HAS_MISSING_DEPS_HEADER_BIT : 0) | (executeAtCount > 0 ? HAS_EXECUTE_AT_HEADER_BIT : 0) | (ballotCount > 0 ? HAS_BALLOT_HEADER_BIT : 0) - | (hasNonStandardFlags ? HAS_NON_STANDARD_FLAGS : 0); + | (hasNonStandardFlags ? HAS_NON_STANDARD_FLAGS : 0) + | (overrideCount > 0 ? HAS_STATUS_OVERRIDES : 0); int headerBytes = (maxHeaderBits+7)/8; - flags |= Invariants.checkArgument(headerBytes - 1, headerBytes <= 4) << 4; + flags |= Invariants.checkArgument(headerBytes - 1, headerBytes <= 4) << 6; int hlcBytesLookup; { // 2bits per size, first value may be zero and remainder may be increments of 1-4; @@ -281,7 +288,14 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) prevEpoch = cfk.redundantBefore().epoch(); prevHlc = cfk.redundantBefore().hlc(); - totalBytes += TypeSizes.sizeofUnsignedVInt(prevEpoch); + { + RedundantBefore.Entry boundsInfo = cfk.boundsInfo(); + long start = boundsInfo.startOwnershipEpoch; + long end = boundsInfo.endOwnershipEpoch; + totalBytes += VIntCoding.computeUnsignedVIntSize(start); + totalBytes += VIntCoding.computeUnsignedVIntSize(end == Long.MAX_VALUE ? 0 : (1 + end - start)); + totalBytes += VIntCoding.computeVIntSize(prevEpoch - start); + } totalBytes += TypeSizes.sizeofUnsignedVInt(prevHlc); totalBytes += TypeSizes.sizeofUnsignedVInt(cfk.redundantBefore().flags()); totalBytes += TypeSizes.sizeofUnsignedVInt(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id)); @@ -297,7 +311,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { TxnInfo txn = cfk.get(i); if (txn.getClass() != TxnInfoExtra.class) continue; - if (!txn.status.hasBallot) continue; + if (!txn.status().hasBallot) continue; TxnInfoExtra extra = (TxnInfoExtra) txn; if (extra.ballot == Ballot.ZERO) continue; if (prevBallot != null) @@ -353,7 +367,14 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) out.putShort((short)flags); - VIntCoding.writeUnsignedVInt(prevEpoch, out); + { + RedundantBefore.Entry boundsInfo = cfk.boundsInfo(); + long start = boundsInfo.startOwnershipEpoch; + long end = boundsInfo.endOwnershipEpoch; + VIntCoding.writeUnsignedVInt(start, out); + VIntCoding.writeUnsignedVInt(end == Long.MAX_VALUE ? 0 : (1 + end - start), out); + VIntCoding.writeVInt(prevEpoch - start, out); + } VIntCoding.writeUnsignedVInt(prevHlc, out); VIntCoding.writeUnsignedVInt32(cfk.redundantBefore().flags(), out); VIntCoding.writeUnsignedVInt32(Arrays.binarySearch(nodeIds, 0, nodeIdCount, cfk.redundantBefore().node.id), out); @@ -362,32 +383,37 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) int executeAtMask = executeAtCount > 0 ? 1 : 0; int missingDepsMask = missingIdCount > 0 ? 1 : 0; int ballotMask = ballotCount > 0 ? 1 : 0; - int flagsIncrement = hasNonStandardFlags ? 2 : 1; + int noOverrideIncrement = overrideCount > 0 ? 1 : 0; + int flagsIncrement = hasNonStandardFlags ? 3 : 2; // TODO (desired): check this loop compiles correctly to only branch on epoch case, for binarySearch and flushing for (int i = 0 ; i < commandCount ; ++i) { - TxnId txnId = cfk.txnId(i); - TxnInfo info = cfk.get(i); - InternalStatus status = info.status; + TxnInfo txn = cfk.get(i); + InternalStatus status = txn.status(); long bits = status.ordinal(); int bitIndex = 3; int statusHasInfo = status.hasExecuteAtOrDeps ? 1 : 0; int statusHasBallot = status.hasBallot ? 1 : 0; - long hasExecuteAt = info.executeAt != txnId ? 1 : 0; + long hasExecuteAt = txn.executeAt != txn ? 1 : 0; bits |= hasExecuteAt << bitIndex; bitIndex += statusHasInfo & executeAtMask; - long hasMissingIds = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).missing != NO_TXNIDS ? 1 : 0; + long hasMissingIds = txn.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)txn).missing != NO_TXNIDS ? 1 : 0; bits |= hasMissingIds << bitIndex; bitIndex += statusHasInfo & missingDepsMask; - long hasBallot = info.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)info).ballot != Ballot.ZERO ? 1 : 0; + long hasBallot = txn.getClass() == TxnInfoExtra.class && ((TxnInfoExtra)txn).ballot != Ballot.ZERO ? 1 : 0; bits |= hasBallot << bitIndex; bitIndex += statusHasBallot & ballotMask; - long flagBits = txnIdFlagsBits(txnId); + long statusOverrides = (long) txn.statusOverrides() << 1; + statusOverrides |= statusOverrides != 0 ? 1 : 0; + bits |= statusOverrides << bitIndex; + bitIndex += statusOverrides != 0 ? 3 : noOverrideIncrement; + + long flagBits = txnIdFlagsBits(txn, hasNonStandardFlags); boolean writeFullFlags = flagBits == RAW_BITS; bits |= flagBits << bitIndex; bitIndex += flagsIncrement; @@ -395,9 +421,9 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) long hlcBits; int extraEpochDeltaBytes = 0; { - long epoch = txnId.epoch(); + long epoch = txn.epoch(); long delta = epoch - prevEpoch; - long hlc = txnId.hlc(); + long hlc = txn.hlc(); hlcBits = hlc - prevHlc; if (delta == 0) { @@ -432,7 +458,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) prevHlc = hlc; } - bits |= ((long)Arrays.binarySearch(nodeIds, 0, nodeIdCount, txnId.node.id)) << bitIndex; + bits |= ((long)Arrays.binarySearch(nodeIds, 0, nodeIdCount, txn.node.id)) << bitIndex; bitIndex += bitsPerNodeId; bits |= hlcBits << (bitIndex + 2); @@ -444,7 +470,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) writeLeastSignificantBytes(hlcBits, getHlcBytes(hlcBytesLookup, hlcFlag), out); if (writeFullFlags) - out.putShort((short)txnId.flags()); + out.putShort((short)txn.flags()); if (extraEpochDeltaBytes > 0) { @@ -608,7 +634,7 @@ private static long flushBits(long buffer, int bufferCount, long add, int addCou } } - public static CommandsForKey fromBytes(Key key, ByteBuffer in) + public static CommandsForKey fromBytes(RoutingKey key, ByteBuffer in) { if (!in.hasRemaining()) return null; @@ -632,19 +658,26 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) nodeIds[i] = new Node.Id(prev += VIntCoding.readUnsignedVInt32(in)); } - int missingDepsMasks, executeAtMasks, ballotMasks, txnIdFlagsMask; + int missingDepsMasks, executeAtMasks, ballotMasks, txnIdFlagsMask, overrideMask; int headerByteCount, hlcBytesLookup; { int flags = in.getShort(); missingDepsMasks = 0 != (flags & HAS_MISSING_DEPS_HEADER_BIT) ? 1 : 0; executeAtMasks = 0 != (flags & HAS_EXECUTE_AT_HEADER_BIT) ? 1 : 0; ballotMasks = 0 != (flags & HAS_BALLOT_HEADER_BIT) ? 1 : 0; - txnIdFlagsMask = 0 != (flags & HAS_NON_STANDARD_FLAGS) ? 3 : 1; - headerByteCount = 1 + ((flags >>> 4) & 0x3); + overrideMask = 0 != (flags & HAS_STATUS_OVERRIDES) ? 1 : 0; + txnIdFlagsMask = 0 != (flags & HAS_NON_STANDARD_FLAGS) ? 7 : 3; + headerByteCount = 1 + ((flags >>> 6) & 0x3); hlcBytesLookup = setHlcByteDeltas((flags >>> 8) & 0x3, (flags >>> 10) & 0x3, (flags >>> 12) & 0x3, (flags >>> 14) & 0x3); } - long prevEpoch = VIntCoding.readUnsignedVInt(in); + long minEpoch = VIntCoding.readUnsignedVInt(in); + long maxEpoch; { + long offset = VIntCoding.readUnsignedVInt(in); + maxEpoch = offset == 0 ? Long.MAX_VALUE : minEpoch + offset - 1; + } + RedundantBefore.Entry boundsInfo = NO_BOUNDS_INFO.withEpochs(minEpoch, maxEpoch); + long prevEpoch = minEpoch + VIntCoding.readVInt(in); long prevHlc = VIntCoding.readUnsignedVInt(in); TxnId redundantBefore; { @@ -661,7 +694,7 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) int commandDecodeFlags = (int)(header & 0x7); InternalStatus status = InternalStatus.get(commandDecodeFlags); header >>>= 3; - commandDecodeFlags <<= 3; + commandDecodeFlags <<= 6; { int infoMask = status.hasExecuteAtOrDeps ? 1 : 0; @@ -669,15 +702,21 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) int missingDepsMask = infoMask & missingDepsMasks; commandDecodeFlags |= ((int)header & executeAtMask) << 1; header >>>= executeAtMask; - commandDecodeFlags |= (int)header & missingDepsMask; + commandDecodeFlags |= ((int)header & missingDepsMask); header >>>= missingDepsMask; int ballotMask = status.hasBallot ? ballotMasks : 0; commandDecodeFlags |= ((int)header & ballotMask) << 2; header >>>= ballotMask; + commandDecodeFlags |= (header & 0x7) << 3; + header >>= (header & overrideMask) == 0 ? overrideMask : 3; decodeFlags[i] = commandDecodeFlags; } - Txn.Kind kind = TXN_ID_FLAG_BITS_KIND_LOOKUP[((int)header & txnIdFlagsMask)]; + Txn.Kind kind; Domain domain; { + int flags = (int)header & txnIdFlagsMask; + kind = kindLookup(flags); + domain = domainLookup(flags); + } header >>>= Integer.bitCount(txnIdFlagsMask); boolean hlcIsNegative = false; @@ -725,7 +764,7 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) if (readEpochBytes > 0) epoch += readEpochBytes == 1 ? (in.get() & 0xff) : in.getInt(); - txnIds[i] = kind != null ? new TxnId(epoch, hlc, kind, Domain.Key, node) + txnIds[i] = kind != null ? new TxnId(epoch, hlc, kind, domain, node) : TxnId.fromValues(epoch, hlc, flags, node); prevEpoch = epoch; @@ -880,7 +919,9 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) prevBallot = ballot; } - txns[i] = TxnInfo.create(txnId, InternalStatus.get(commandDecodeFlags >>> 3), executeAt, missing, ballot); + InternalStatus status = InternalStatus.get(commandDecodeFlags >>> 6); + int statusOverrides = ((commandDecodeFlags >>> 3) & overrideMask) == 0 ? 0 : commandDecodeFlags >>> 4; + txns[i] = create(boundsInfo, txnId, status, statusOverrides, executeAt, missing, ballot); } cachedTxnIds().forceDiscard(missingIdBuffer, maxIdBufferCount); @@ -888,13 +929,25 @@ public static CommandsForKey fromBytes(Key key, ByteBuffer in) else { for (int i = 0 ; i < commandCount ; ++i) - txns[i] = TxnInfo.create(txnIds[i], InternalStatus.get(decodeFlags[i] >>> 3), txnIds[i], Ballot.ZERO); + { + int commandDecodeFlags = decodeFlags[i]; + InternalStatus status = InternalStatus.get(commandDecodeFlags >>> 6); + int statusOverrides = ((commandDecodeFlags >>> 3) & overrideMask) == 0 ? 0 : commandDecodeFlags >>> 4; + txns[i] = create(boundsInfo, txnIds[i], status, statusOverrides, txnIds[i], NO_TXNIDS, Ballot.ZERO); + } } cachedTxnIds().forceDiscard(txnIds, commandCount); return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, redundantBefore, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex]); } + private static TxnInfo create(RedundantBefore.Entry boundsInfo, @Nonnull TxnId txnId, InternalStatus status, int statusOverrides, @Nonnull Timestamp executeAt, @Nonnull TxnId[] missing, @Nonnull Ballot ballot) + { + boolean mayExecute = status.isCommittedToExecute() ? CommandsForKey.executes(boundsInfo, txnId, executeAt) + : CommandsForKey.mayExecute(boundsInfo, txnId); + return TxnInfo.create(txnId, status, mayExecute, statusOverrides, executeAt, missing, ballot); + } + private static int getHlcBytes(int lookup, int index) { return (lookup >>> (index * 4)) & 0xf; @@ -998,40 +1051,48 @@ else if (in.remaining() >= 8) enum TxnIdFlags { STANDARD, EXTENDED, RAW; - static final int EXTENDED_BITS = 0x2; - static final int RAW_BITS = 0x3; + static final int RAW_BITS = 0; + } + + private static boolean hasNonStandardFlags(TxnId txnId) + { + if (txnId.flags() > Timestamp.IDENTITY_FLAGS) + return false; + + int flagBits = txnIdFlagsBits(txnId, true); + return flagBits > 3; } - private static TxnIdFlags txnIdFlags(TxnId txnId) + private static int txnIdFlagsBits(TxnId txnId, boolean permitNonStandardFlags) { - if (txnId.flags() > Timestamp.IDENTITY_FLAGS || txnId.domain() != Domain.Key) - return RAW; - switch (txnId.kind()) + Txn.Kind kind = txnId.kind(); + Domain domain = txnId.domain(); + if (!permitNonStandardFlags && domain == Domain.Range) + return 0; + + int offset = domain == Domain.Range ? 3 : 0; + switch (kind) { - default: throw new AssertionError("Unhandled Kind: " + txnId.kind()); - case Read: - case Write: - return STANDARD; - case SyncPoint: - return EXTENDED; + case Read: return offset + 1; + case Write: return offset + 2; + case SyncPoint: return offset + 3; case ExclusiveSyncPoint: - case LocalOnly: - case EphemeralRead: - return RAW; + if (domain == Domain.Range) + return 7; + default: + return 0; } } - private static long txnIdFlagsBits(TxnId txnId) + private static Domain domainLookup(int flags) { - switch (txnIdFlags(txnId)) - { - default: throw new AssertionError("Unhandled TxnIdFlag: " + txnIdFlags(txnId)); - case RAW: return RAW_BITS; - case EXTENDED: return EXTENDED_BITS; - case STANDARD: - return txnId.kind() == Read ? 0 : 1; - } + return flags <= 4 ? Domain.Key : Domain.Range; + } + + private static Txn.Kind kindLookup(int flags) + { + return TXN_ID_FLAG_BITS_KIND_LOOKUP[flags]; } - private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { Read, Write, SyncPoint, null }; + private static final Txn.Kind[] TXN_ID_FLAG_BITS_KIND_LOOKUP = new Txn.Kind[] { null, Read, Write, SyncPoint, Read, Write, SyncPoint, ExclusiveSyncPoint }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java index 9e439233ae3e..c286cd330abb 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommitSerializers.java @@ -27,11 +27,10 @@ import accord.primitives.FullRoute; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; +import accord.primitives.Participants; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; -import accord.primitives.Unseekables; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; @@ -46,7 +45,7 @@ public class CommitSerializers { private static final IVersionedSerializer kind = new EnumSerializer<>(Commit.Kind.class); - public abstract static class CommitSerializer extends TxnRequestSerializer + public abstract static class CommitSerializer extends TxnRequestSerializer.WithUnsyncedSerializer { private final IVersionedSerializer read; @@ -61,30 +60,28 @@ public void serializeBody(C msg, DataOutputPlus out, int version) throws IOExcep kind.serialize(msg.kind, out, version); CommandSerializers.ballot.serialize(msg.ballot, out, version); CommandSerializers.timestamp.serialize(msg.executeAt, out, version); - KeySerializers.seekables.serialize(msg.keys, out, version); - CommandSerializers.nullablePartialTxn.serialize(msg.keys, msg.partialTxn, out, version); - DepsSerializer.partialDeps.serialize(msg.keys, msg.partialDeps, out, version); + CommandSerializers.nullablePartialTxn.serialize(msg.partialTxn, out, version); + DepsSerializer.partialDeps.serialize(msg.scope, msg.partialDeps, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); serializeNullable(msg.readData, out, version, read); } - protected abstract C deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Commit.Kind kind, + protected abstract C deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, long minEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, - Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, + @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read); @Override - public C deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException + public C deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { Commit.Kind kind = CommitSerializers.kind.deserialize(in, version); Ballot ballot = CommandSerializers.ballot.deserialize(in, version); Timestamp executeAt = CommandSerializers.timestamp.deserialize(in, version); - Seekables keys = KeySerializers.seekables.deserialize(in, version); - PartialTxn txn = CommandSerializers.nullablePartialTxn.deserialize(keys, in, version); - PartialDeps deps = DepsSerializer.partialDeps.deserialize(keys, in, version); + PartialTxn txn = CommandSerializers.nullablePartialTxn.deserialize(in, version); + PartialDeps deps = DepsSerializer.partialDeps.deserialize(scope, in, version); FullRoute route = deserializeNullable(in, version, KeySerializers.fullRoute); ReadData read = deserializeNullable(in, version, this.read); - return deserializeCommit(txnId, scope, waitForEpoch, kind, ballot, executeAt, keys, txn, deps, route, read); + return deserializeCommit(txnId, scope, waitForEpoch, minEpoch, kind, ballot, executeAt, txn, deps, route, read); } @Override @@ -93,9 +90,8 @@ public long serializedBodySize(C msg, int version) return kind.serializedSize(msg.kind, version) + CommandSerializers.ballot.serializedSize(msg.ballot, version) + CommandSerializers.timestamp.serializedSize(msg.executeAt, version) - + KeySerializers.seekables.serializedSize(msg.keys, version) - + CommandSerializers.nullablePartialTxn.serializedSize(msg.keys, msg.partialTxn, version) - + DepsSerializer.partialDeps.serializedSize(msg.keys, msg.partialDeps, version) + + CommandSerializers.nullablePartialTxn.serializedSize(msg.partialTxn, version) + + DepsSerializer.partialDeps.serializedSize(msg.scope, msg.partialDeps, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) + serializedNullableSize(msg.readData, version, read); } @@ -104,9 +100,9 @@ public long serializedBodySize(C msg, int version) public static final IVersionedSerializer request = new CommitSerializer(ReadData.class, ReadDataSerializers.readData) { @Override - protected Commit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, Seekables keys, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) + protected Commit deserializeCommit(TxnId txnId, Route scope, long waitForEpoch, long minEpoch, Commit.Kind kind, Ballot ballot, Timestamp executeAt, @Nullable PartialTxn partialTxn, PartialDeps partialDeps, @Nullable FullRoute fullRoute, @Nullable ReadData read) { - return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, kind, ballot, executeAt, keys, partialTxn, partialDeps, fullRoute, read); + return Commit.SerializerSupport.create(txnId, scope, waitForEpoch, minEpoch, kind, ballot, executeAt, partialTxn, partialDeps, fullRoute, read); } }; @@ -116,7 +112,7 @@ protected Commit deserializeCommit(TxnId txnId, Route scope, long waitForEpoc public void serialize(Commit.Invalidate invalidate, DataOutputPlus out, int version) throws IOException { CommandSerializers.txnId.serialize(invalidate.txnId, out, version); - KeySerializers.unseekables.serialize(invalidate.scope, out, version); + KeySerializers.participants.serialize(invalidate.scope, out, version); out.writeUnsignedVInt(invalidate.waitForEpoch); out.writeUnsignedVInt(invalidate.invalidateUntilEpoch - invalidate.waitForEpoch); } @@ -125,7 +121,7 @@ public void serialize(Commit.Invalidate invalidate, DataOutputPlus out, int vers public Commit.Invalidate deserialize(DataInputPlus in, int version) throws IOException { TxnId txnId = CommandSerializers.txnId.deserialize(in, version); - Unseekables scope = KeySerializers.unseekables.deserialize(in, version); + Participants scope = KeySerializers.participants.deserialize(in, version); long waitForEpoch = in.readUnsignedVInt(); long invalidateUntilEpoch = in.readUnsignedVInt() + waitForEpoch; return Commit.Invalidate.SerializerSupport.create(txnId, scope, waitForEpoch, invalidateUntilEpoch); @@ -135,7 +131,7 @@ public Commit.Invalidate deserialize(DataInputPlus in, int version) throws IOExc public long serializedSize(Commit.Invalidate invalidate, int version) { return CommandSerializers.txnId.serializedSize(invalidate.txnId, version) - + KeySerializers.unseekables.serializedSize(invalidate.scope, version) + + KeySerializers.participants.serializedSize(invalidate.scope, version) + TypeSizes.sizeofUnsignedVInt(invalidate.waitForEpoch) + TypeSizes.sizeofUnsignedVInt(invalidate.invalidateUntilEpoch - invalidate.waitForEpoch); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java index 841d89882ef6..72dfc5f969d3 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/DepsSerializer.java @@ -21,15 +21,16 @@ import com.google.common.primitives.Ints; +import accord.primitives.AbstractUnseekableKeys; import accord.primitives.Deps; import accord.primitives.KeyDeps; -import accord.primitives.Keys; import accord.primitives.PartialDeps; import accord.primitives.Participants; import accord.primitives.Range; import accord.primitives.RangeDeps; -import accord.primitives.Seekables; +import accord.primitives.RoutingKeys; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -43,7 +44,7 @@ import static accord.primitives.Routable.Domain.Key; import static org.apache.cassandra.db.TypeSizes.sizeofUnsignedVInt; -public abstract class DepsSerializer extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, D> +public abstract class DepsSerializer extends IVersionedWithKeysSerializer.AbstractWithKeysSerializer implements IVersionedWithKeysSerializer, D> { public static final DepsSerializer deps = new DepsSerializer<>() { @@ -72,7 +73,7 @@ public void serialize(PartialDeps partialDeps, DataOutputPlus out, int version) } @Override - public void serialize(Seekables superset, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException + public void serialize(Unseekables superset, PartialDeps partialDeps, DataOutputPlus out, int version) throws IOException { super.serialize(superset, partialDeps, out, version); KeySerializers.participants.serialize(partialDeps.covering, out, version); @@ -86,7 +87,7 @@ public long serializedSize(PartialDeps partialDeps, int version) } @Override - public long serializedSize(Seekables keys, PartialDeps partialDeps, int version) + public long serializedSize(Unseekables keys, PartialDeps partialDeps, int version) { return super.serializedSize(keys, partialDeps, version) + KeySerializers.participants.serializedSize(partialDeps.covering, version); @@ -100,48 +101,48 @@ public long serializedSize(Seekables keys, PartialDeps partialDeps, int ve @Override public void serialize(D deps, DataOutputPlus out, int version) throws IOException { - KeySerializers.keys.serialize(deps.keyDeps.keys(), out, version); + KeySerializers.routingKeys.serialize(deps.keyDeps.keys(), out, version); serializeWithoutKeys(deps, out, version); } @Override - public void serialize(Seekables superset, D deps, DataOutputPlus out, int version) throws IOException + public void serialize(Unseekables superset, D deps, DataOutputPlus out, int version) throws IOException { if (superset.domain() == Key) serializeSubset(deps.keyDeps.keys(), superset, out); - else KeySerializers.keys.serialize(deps.keyDeps.keys(), out, version); + else KeySerializers.routingKeys.serialize(deps.keyDeps.keys(), out, version); serializeWithoutKeys(deps, out, version); } @Override public D deserialize(DataInputPlus in, int version) throws IOException { - Keys keys = KeySerializers.keys.deserialize(in, version); + RoutingKeys keys = KeySerializers.routingKeys.deserialize(in, version); return deserializeWithoutKeys(keys, in, version); } @Override - public D deserialize(Seekables superset, DataInputPlus in, int version) throws IOException + public D deserialize(Unseekables superset, DataInputPlus in, int version) throws IOException { - Keys keys; - if (superset.domain() == Key) keys = (Keys)deserializeSubset(superset, in); - else keys = KeySerializers.keys.deserialize(in, version); + RoutingKeys keys; + if (superset.domain() == Key) keys = ((AbstractUnseekableKeys)deserializeSubset(superset, in)).toParticipants(); + else keys = KeySerializers.routingKeys.deserialize(in, version); return deserializeWithoutKeys(keys, in, version); } @Override public long serializedSize(D deps, int version) { - long size = KeySerializers.keys.serializedSize(deps.keyDeps.keys(), version); + long size = KeySerializers.routingKeys.serializedSize(deps.keyDeps.keys(), version); size += serializedSizeWithoutKeys(deps, version); return size; } @Override - public long serializedSize(Seekables keys, D deps, int version) + public long serializedSize(Unseekables keys, D deps, int version) { long size; if (keys.domain() == Key) size = serializedSubsetSize(deps.keyDeps.keys(), keys); - else size = KeySerializers.keys.serializedSize(deps.keyDeps.keys(), version); + else size = KeySerializers.routingKeys.serializedSize(deps.keyDeps.keys(), version); size += serializedSizeWithoutKeys(deps, version); return size; } @@ -169,11 +170,11 @@ private void serializeWithoutKeys(D deps, DataOutputPlus out, int version) throw } { - Keys keys = deps.directKeyDeps.keys(); + RoutingKeys keys = deps.directKeyDeps.keys(); boolean isSubset = isSubset(keys, deps.keyDeps.keys()); out.writeBoolean(isSubset); if (isSubset) serializeSubset(keys, deps.keyDeps.keys(), out); - else KeySerializers.keys.serialize(keys, out, version); + else KeySerializers.routingKeys.serialize(keys, out, version); serializeKeyDepsWithoutKeys(deps.directKeyDeps, out, version); } @@ -192,7 +193,7 @@ private void serializeKeyDepsWithoutKeys(KeyDeps keyDeps, DataOutputPlus out, in out.writeUnsignedVInt32(keysToTxnIds(keyDeps, i)); } - private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throws IOException + private D deserializeWithoutKeys(RoutingKeys keys, DataInputPlus in, int version) throws IOException { KeyDeps keyDeps = deserializeKeyDeps(keys, in, version); @@ -219,14 +220,14 @@ private D deserializeWithoutKeys(Keys keys, DataInputPlus in, int version) throw KeyDeps directKeyDeps; { boolean isSubset = in.readBoolean(); - Keys directKeys = isSubset ? (Keys)deserializeSubset(keys, in) : KeySerializers.keys.deserialize(in, version); + RoutingKeys directKeys = isSubset ? (RoutingKeys)deserializeSubset(keys, in) : KeySerializers.routingKeys.deserialize(in, version); directKeyDeps = deserializeKeyDeps(directKeys, in, version); } return deserialize(keyDeps, rangeDeps, directKeyDeps, in, version); } - private static KeyDeps deserializeKeyDeps(Keys keys, DataInputPlus in, int version) throws IOException + private static KeyDeps deserializeKeyDeps(RoutingKeys keys, DataInputPlus in, int version) throws IOException { int txnIdCount = in.readUnsignedVInt32(); TxnId[] txnIds = new TxnId[txnIdCount]; @@ -266,7 +267,7 @@ private long serializedSizeWithoutKeys(D deps, int version) { boolean isSubset = isSubset(deps.directKeyDeps.keys(), deps.keyDeps.keys()); size += 1; - size += isSubset ? serializedSubsetSize(deps.directKeyDeps.keys(), deps.keyDeps.keys()) : KeySerializers.keys.serializedSize(deps.directKeyDeps.keys(), version); + size += isSubset ? serializedSubsetSize(deps.directKeyDeps.keys(), deps.keyDeps.keys()) : KeySerializers.routingKeys.serializedSize(deps.directKeyDeps.keys(), version); size += serializedSizeOfKeyDepsWithoutKeys(deps.directKeyDeps, version); } return size; @@ -286,7 +287,7 @@ private static long serializedSizeOfKeyDepsWithoutKeys(KeyDeps keyDeps, int vers return size; } - private static boolean isSubset(Keys test, Keys superset) + private static boolean isSubset(RoutingKeys test, RoutingKeys superset) { return test.foldl(superset, (k, p, v, i) -> v + 1, 0, 0, 0) == test.size(); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java index 7fe67842de5f..c716b2afd5e5 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetEphmrlReadDepsSerializers.java @@ -22,9 +22,8 @@ import accord.messages.GetEphemeralReadDeps; import accord.messages.GetEphemeralReadDeps.GetEphemeralReadDepsOk; -import accord.primitives.PartialDeps; +import accord.primitives.Deps; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -38,23 +37,20 @@ public class GetEphmrlReadDepsSerializers @Override public void serializeBody(GetEphemeralReadDeps msg, DataOutputPlus out, int version) throws IOException { - KeySerializers.seekables.serialize(msg.keys, out, version); out.writeUnsignedVInt(msg.executionEpoch); } @Override public GetEphemeralReadDeps deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { - Seekables keys = KeySerializers.seekables.deserialize(in, version); long executionEpoch = in.readUnsignedVInt(); - return GetEphemeralReadDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, keys, executionEpoch); + return GetEphemeralReadDeps.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, executionEpoch); } @Override public long serializedBodySize(GetEphemeralReadDeps msg, int version) { - return KeySerializers.seekables.serializedSize(msg.keys, version) - + TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); + return TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); } }; @@ -63,14 +59,14 @@ public long serializedBodySize(GetEphemeralReadDeps msg, int version) @Override public void serialize(GetEphemeralReadDepsOk reply, DataOutputPlus out, int version) throws IOException { - DepsSerializer.partialDeps.serialize(reply.deps, out, version); + DepsSerializer.deps.serialize(reply.deps, out, version); out.writeUnsignedVInt(reply.latestEpoch); } @Override public GetEphemeralReadDepsOk deserialize(DataInputPlus in, int version) throws IOException { - PartialDeps deps = DepsSerializer.partialDeps.deserialize(in, version); + Deps deps = DepsSerializer.deps.deserialize(in, version); long latestEpoch = in.readUnsignedVInt(); return new GetEphemeralReadDepsOk(deps, latestEpoch); } @@ -78,7 +74,7 @@ public GetEphemeralReadDepsOk deserialize(DataInputPlus in, int version) throws @Override public long serializedSize(GetEphemeralReadDepsOk reply, int version) { - return DepsSerializer.partialDeps.serializedSize(reply.deps, version) + return DepsSerializer.deps.serializedSize(reply.deps, version) + TypeSizes.sizeofUnsignedVInt(reply.latestEpoch); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java index ad8af3ba88d5..bb580690c409 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/GetMaxConflictSerializers.java @@ -23,7 +23,6 @@ import accord.messages.GetMaxConflict; import accord.messages.GetMaxConflict.GetMaxConflictOk; import accord.primitives.Route; -import accord.primitives.Seekables; import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.db.TypeSizes; @@ -38,23 +37,20 @@ public class GetMaxConflictSerializers @Override public void serializeBody(GetMaxConflict msg, DataOutputPlus out, int version) throws IOException { - KeySerializers.seekables.serialize(msg.keys, out, version); out.writeUnsignedVInt(msg.executionEpoch); } @Override public GetMaxConflict deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { - Seekables keys = KeySerializers.seekables.deserialize(in, version); long executionEpoch = in.readUnsignedVInt(); - return GetMaxConflict.SerializationSupport.create(scope, waitForEpoch, minEpoch, keys, executionEpoch); + return GetMaxConflict.SerializationSupport.create(scope, waitForEpoch, minEpoch, executionEpoch); } @Override public long serializedBodySize(GetMaxConflict msg, int version) { - return KeySerializers.seekables.serializedSize(msg.keys, version) - + TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); + return TypeSizes.sizeofUnsignedVInt(msg.executionEpoch); } }; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java index ef4b6d420201..3b4acb00a587 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/IVersionedWithKeysSerializer.java @@ -20,14 +20,16 @@ import java.io.IOException; -import accord.api.Key; +import accord.api.RoutingKey; import accord.primitives.AbstractKeys; -import accord.primitives.Keys; +import accord.primitives.AbstractRanges; +import accord.primitives.AbstractUnseekableKeys; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.RoutableKey; import accord.primitives.Routables; -import accord.primitives.Seekables; +import accord.primitives.RoutingKeys; +import accord.primitives.Unseekables; import net.nicoulaj.compilecommand.annotations.DontInline; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.IVersionedSerializer; @@ -123,7 +125,7 @@ abstract class AbstractWithKeysSerializer * If both ends have a pre-shared superset of the columns we are serializing, we can send them much * more efficiently. Both ends must provide the identically same set of columns. */ - protected void serializeSubset(Seekables serialize, Seekables superset, DataOutputPlus out) throws IOException + protected void serializeSubset(Routables serialize, Routables superset, DataOutputPlus out) throws IOException { /** * We weight this towards small sets, and sets where the majority of items are present, since @@ -149,10 +151,10 @@ else if (supersetCount < 64) { default: throw new AssertionError("Unhandled domain: " + serialize.domain()); case Key: - out.writeUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + out.writeUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); break; case Range: - out.writeUnsignedVInt(encodeBitmap((Ranges)serialize, (Ranges)superset, supersetCount)); + out.writeUnsignedVInt(encodeBitmap((AbstractRanges)serialize, (AbstractRanges)superset, supersetCount)); break; } } @@ -162,16 +164,16 @@ else if (supersetCount < 64) { default: throw new AssertionError("Unhandled domain: " + serialize.domain()); case Key: - serializeLargeSubset((Keys)serialize, serializeCount, (Keys)superset, supersetCount, out); + serializeLargeSubset((AbstractUnseekableKeys)serialize, serializeCount, (AbstractUnseekableKeys)superset, supersetCount, out); break; case Range: - serializeLargeSubset((Ranges)serialize, serializeCount, (Ranges)superset, supersetCount, out); + serializeLargeSubset((AbstractRanges)serialize, serializeCount, (AbstractRanges)superset, supersetCount, out); break; } } } - public long serializedSubsetSize(Seekables serialize, Seekables superset) + public long serializedSubsetSize(Routables serialize, Routables superset) { int columnCount = serialize.size(); int supersetCount = superset.size(); @@ -185,9 +187,9 @@ else if (supersetCount < 64) { default: throw new AssertionError("Unhandled domain: " + serialize.domain()); case Key: - return TypeSizes.sizeofUnsignedVInt(encodeBitmap((Keys)serialize, (Keys)superset, supersetCount)); + return TypeSizes.sizeofUnsignedVInt(encodeBitmap((AbstractUnseekableKeys)serialize, (AbstractUnseekableKeys)superset, supersetCount)); case Range: - return TypeSizes.sizeofUnsignedVInt(encodeBitmap((Ranges)serialize, (Ranges)superset, supersetCount)); + return TypeSizes.sizeofUnsignedVInt(encodeBitmap((AbstractRanges)serialize, (AbstractRanges)superset, supersetCount)); } } else @@ -196,14 +198,14 @@ else if (supersetCount < 64) { default: throw new AssertionError("Unhandled domain: " + serialize.domain()); case Key: - return serializeLargeSubsetSize((Keys)serialize, columnCount, (Keys)superset, supersetCount); + return serializeLargeSubsetSize((AbstractUnseekableKeys)serialize, columnCount, (AbstractUnseekableKeys)superset, supersetCount); case Range: - return serializeLargeSubsetSize((Ranges)serialize, columnCount, (Ranges)superset, supersetCount); + return serializeLargeSubsetSize((AbstractRanges)serialize, columnCount, (AbstractRanges)superset, supersetCount); } } } - public Seekables deserializeSubset(Seekables superset, DataInputPlus in) throws IOException + public Unseekables deserializeSubset(Unseekables superset, DataInputPlus in) throws IOException { long encoded = in.readUnsignedVInt(); int supersetCount = superset.size(); @@ -224,8 +226,8 @@ else if (supersetCount >= 64) default: throw new AssertionError("Unhandled domain: " + superset.domain()); case Key: { - Keys keys = (Keys)superset; - Key[] out = new Key[deserializeCount]; + AbstractUnseekableKeys keys = (AbstractUnseekableKeys) superset; + RoutingKey[] out = new RoutingKey[deserializeCount]; int count = 0; while (encoded != 0) { @@ -233,11 +235,11 @@ else if (supersetCount >= 64) out[count++] = keys.get(Long.numberOfTrailingZeros(lowestBit)); encoded ^= lowestBit; } - return Keys.ofSortedUnique(out); + return RoutingKeys.ofSortedUnique(out); } case Range: { - Ranges ranges = (Ranges)superset; + AbstractRanges ranges = (AbstractRanges)superset; Range[] out = new Range[deserializeCount]; int count = 0; while (encoded != 0) @@ -264,7 +266,7 @@ private static long encodeBitmap(AbstractKeys seriali return bitmap; } - private static long encodeBitmap(Ranges serialize, Ranges superset, int supersetCount) + private static long encodeBitmap(AbstractRanges serialize, AbstractRanges superset, int supersetCount) { // the index we would encounter next if all columns are present long bitmap = superset.foldl(serialize, (k, p1, v, i) -> { @@ -299,7 +301,7 @@ private void serializeLargeSubset(AbstractKeys serial } @DontInline - private void serializeLargeSubset(Ranges serialize, int serializeCount, Ranges superset, int supersetCount, DataOutputPlus out) throws IOException + private void serializeLargeSubset(AbstractRanges serialize, int serializeCount, AbstractRanges superset, int supersetCount, DataOutputPlus out) throws IOException { out.writeUnsignedVInt32(supersetCount - serializeCount); int serializeIndex = 0, supersetIndex = 0; @@ -323,7 +325,7 @@ private void serializeLargeSubset(Ranges serialize, int serializeCount, Ranges s } @DontInline - private Seekables deserializeLargeSubset(DataInputPlus in, Seekables superset, int supersetCount, int delta) throws IOException + private Unseekables deserializeLargeSubset(DataInputPlus in, Unseekables superset, int supersetCount, int delta) throws IOException { int deserializeCount = supersetCount - delta; switch (superset.domain()) @@ -331,8 +333,8 @@ private void serializeLargeSubset(Ranges serialize, int serializeCount, Ranges s default: throw new AssertionError("Unhandled domain: " + superset.domain()); case Key: { - Keys keys = (Keys)superset; - Key[] out = new Key[deserializeCount]; + RoutingKeys keys = (RoutingKeys) superset; + RoutingKey[] out = new RoutingKey[deserializeCount]; int supersetIndex = 0; int count = 0; while (count < deserializeCount) @@ -341,11 +343,11 @@ private void serializeLargeSubset(Ranges serialize, int serializeCount, Ranges s while (takeCount-- > 0) out[count++] = keys.get(supersetIndex++); supersetIndex += in.readUnsignedVInt32(); } - return Keys.ofSortedUnique(out); + return RoutingKeys.ofSortedUnique(out); } case Range: { - Ranges ranges = (Ranges)superset; + AbstractRanges ranges = (AbstractRanges)superset; Range[] out = new Range[deserializeCount]; int supersetIndex = 0; int count = 0; @@ -386,7 +388,7 @@ private long serializeLargeSubsetSize(AbstractKeys se } @DontInline - private long serializeLargeSubsetSize(Ranges serialize, int serializeCount, Ranges superset, int supersetCount) + private long serializeLargeSubsetSize(AbstractRanges serialize, int serializeCount, AbstractRanges superset, int supersetCount) { long size = TypeSizes.sizeofUnsignedVInt(supersetCount - serializeCount); int serializeIndex = 0, supersetIndex = 0; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java index d23e6c99b92e..59c2d461b63e 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/InformDurableSerializers.java @@ -20,9 +20,9 @@ import java.io.IOException; -import accord.local.Status; import accord.messages.InformDurable; import accord.primitives.Route; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java index 1e105d8192ad..f2f9f8ac1a64 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/KeySerializers.java @@ -203,6 +203,8 @@ public long serializedSize(FullRangeRoute ranges, int version) EnumSet.allOf(UnseekablesKind.class) ); + public static final IVersionedSerializer> nullableParticipants = NullableSerializer.wrap(participants); + static class AbstractRoutablesSerializer> implements IVersionedSerializer { final EnumSet permitted; @@ -337,6 +339,7 @@ public long serializedSize(Seekables t, int version) }; public static final IVersionedSerializer> nullableSeekables = NullableSerializer.wrap(seekables); + public static final IVersionedSerializer> nullableUnseekables = NullableSerializer.wrap(unseekables); public static abstract class AbstractKeysSerializer> implements IVersionedSerializer { diff --git a/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java index 89f4abdec113..a030ba63b435 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/PreacceptSerializers.java @@ -42,14 +42,14 @@ public class PreacceptSerializers { private PreacceptSerializers() {} - public static final IVersionedSerializer request = new WithUnsyncedSerializer() + public static final IVersionedSerializer request = new WithUnsyncedSerializer<>() { @Override public void serializeBody(PreAccept msg, DataOutputPlus out, int version) throws IOException { CommandSerializers.partialTxn.serialize(msg.partialTxn, out, version); serializeNullable(msg.route, out, version, KeySerializers.fullRoute); - out.writeUnsignedVInt(msg.maxEpoch - msg.minEpoch); + out.writeUnsignedVInt(msg.acceptEpoch - msg.minEpoch); } @Override @@ -57,9 +57,9 @@ public PreAccept deserializeBody(DataInputPlus in, int version, TxnId txnId, Rou { PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); @Nullable FullRoute fullRoute = deserializeNullable(in, version, KeySerializers.fullRoute); - long maxEpoch = in.readUnsignedVInt() + minEpoch; + long acceptEpoch = in.readUnsignedVInt() + minEpoch; return PreAccept.SerializerSupport.create(txnId, scope, waitForEpoch, minEpoch, - maxEpoch, partialTxn, fullRoute); + acceptEpoch, partialTxn, fullRoute); } @Override @@ -67,11 +67,11 @@ public long serializedBodySize(PreAccept msg, int version) { return CommandSerializers.partialTxn.serializedSize(msg.partialTxn, version) + serializedNullableSize(msg.route, version, KeySerializers.fullRoute) - + TypeSizes.sizeofUnsignedVInt(msg.maxEpoch - msg.minEpoch); + + TypeSizes.sizeofUnsignedVInt(msg.acceptEpoch - msg.minEpoch); } }; - public static final IVersionedSerializer reply = new IVersionedSerializer() + public static final IVersionedSerializer reply = new IVersionedSerializer<>() { @Override public void serialize(PreAcceptReply reply, DataOutputPlus out, int version) throws IOException @@ -83,7 +83,7 @@ public void serialize(PreAcceptReply reply, DataOutputPlus out, int version) thr PreAcceptOk preAcceptOk = (PreAcceptOk) reply; CommandSerializers.txnId.serialize(preAcceptOk.txnId, out, version); CommandSerializers.timestamp.serialize(preAcceptOk.witnessedAt, out, version); - DepsSerializer.partialDeps.serialize(preAcceptOk.deps, out, version); + DepsSerializer.deps.serialize(preAcceptOk.deps, out, version); } @Override @@ -94,7 +94,7 @@ public PreAcceptReply deserialize(DataInputPlus in, int version) throws IOExcept return new PreAcceptOk(CommandSerializers.txnId.deserialize(in, version), CommandSerializers.timestamp.deserialize(in, version), - DepsSerializer.partialDeps.deserialize(in, version)); + DepsSerializer.deps.deserialize(in, version)); } @Override @@ -107,7 +107,7 @@ public long serializedSize(PreAcceptReply reply, int version) PreAcceptOk preAcceptOk = (PreAcceptOk) reply; size += CommandSerializers.txnId.serializedSize(preAcceptOk.txnId, version); size += CommandSerializers.timestamp.serializedSize(preAcceptOk.witnessedAt, version); - size += DepsSerializer.partialDeps.serializedSize(preAcceptOk.deps, version); + size += DepsSerializer.deps.serializedSize(preAcceptOk.deps, version); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java index 00728a68f944..6ef51b957d3b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/ReadDataSerializers.java @@ -87,7 +87,6 @@ public void serialize(ApplyThenWaitUntilApplied msg, DataOutputPlus out, int ver DepsSerializer.partialDeps.serialize(msg.deps, out, version); CommandSerializers.writes.serialize(msg.writes, out, version); TxnResult.serializer.serialize((TxnResult) msg.result, out, version); - KeySerializers.nullableSeekables.serialize(msg.notify, out, version); } @Override @@ -101,8 +100,7 @@ public ApplyThenWaitUntilApplied deserialize(DataInputPlus in, int version) thro CommandSerializers.partialTxn.deserialize(in, version), DepsSerializer.partialDeps.deserialize(in, version), CommandSerializers.writes.deserialize(in, version), - TxnResult.serializer.deserialize(in, version), - KeySerializers.nullableSeekables.deserialize(in, version)); + TxnResult.serializer.deserialize(in, version)); } @Override @@ -115,8 +113,7 @@ public long serializedSize(ApplyThenWaitUntilApplied msg, int version) + CommandSerializers.partialTxn.serializedSize(msg.txn, version) + DepsSerializer.partialDeps.serializedSize(msg.deps, version) + CommandSerializers.writes.serializedSize(msg.writes, version) - + TxnResult.serializer.serializedSize((TxnData)msg.result, version) - + KeySerializers.nullableSeekables.serializedSize(msg.notify, version); + + TxnResult.serializer.serializedSize((TxnData)msg.result, version); } } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java index 5caab8fb2b83..05f7d42c9008 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/RecoverySerializers.java @@ -24,7 +24,6 @@ import accord.api.Result; import accord.api.RoutingKey; -import accord.local.Status; import accord.messages.BeginRecovery; import accord.messages.BeginRecovery.RecoverNack; import accord.messages.BeginRecovery.RecoverOk; @@ -32,9 +31,11 @@ import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; +import accord.primitives.Known.KnownDeps; import accord.primitives.LatestDeps; import accord.primitives.PartialTxn; import accord.primitives.Route; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; @@ -42,6 +43,7 @@ import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.service.accord.serializers.TxnRequestSerializer.WithUnsyncedSerializer; import static org.apache.cassandra.utils.NullableSerializer.deserializeNullable; import static org.apache.cassandra.utils.NullableSerializer.serializeNullable; @@ -49,7 +51,7 @@ public class RecoverySerializers { - public static final IVersionedSerializer request = new TxnRequestSerializer() + public static final IVersionedSerializer request = new WithUnsyncedSerializer() { @Override public void serializeBody(BeginRecovery recover, DataOutputPlus out, int version) throws IOException @@ -57,15 +59,17 @@ public void serializeBody(BeginRecovery recover, DataOutputPlus out, int version CommandSerializers.partialTxn.serialize(recover.partialTxn, out, version); CommandSerializers.ballot.serialize(recover.ballot, out, version); serializeNullable(recover.route, out, version, KeySerializers.fullRoute); + out.writeUnsignedVInt(recover.executeAtOrTxnIdEpoch - recover.txnId.epoch()); } @Override - public BeginRecovery deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch) throws IOException + public BeginRecovery deserializeBody(DataInputPlus in, int version, TxnId txnId, Route scope, long waitForEpoch, long minEpoch) throws IOException { PartialTxn partialTxn = CommandSerializers.partialTxn.deserialize(in, version); Ballot ballot = CommandSerializers.ballot.deserialize(in, version); @Nullable FullRoute route = deserializeNullable(in, version, KeySerializers.fullRoute); - return BeginRecovery.SerializationSupport.create(txnId, scope, waitForEpoch, partialTxn, ballot, route); + long executeAtOrTxnIdEpoch = in.readUnsignedVInt32() + txnId.epoch(); + return BeginRecovery.SerializationSupport.create(txnId, scope, waitForEpoch, minEpoch, partialTxn, ballot, route, executeAtOrTxnIdEpoch); } @Override @@ -73,7 +77,8 @@ public long serializedBodySize(BeginRecovery recover, int version) { return CommandSerializers.partialTxn.serializedSize(recover.partialTxn, version) + CommandSerializers.ballot.serializedSize(recover.ballot, version) - + serializedNullableSize(recover.route, version, KeySerializers.fullRoute); + + serializedNullableSize(recover.route, version, KeySerializers.fullRoute) + + TypeSizes.sizeofUnsignedVInt(recover.executeAtOrTxnIdEpoch - recover.txnId.epoch()); } }; @@ -93,6 +98,7 @@ void serializeOk(RecoverOk recoverOk, DataOutputPlus out, int version) throws IO latestDeps.serialize(recoverOk.deps, out, version); DepsSerializer.deps.serialize(recoverOk.earlierCommittedWitness, out, version); DepsSerializer.deps.serialize(recoverOk.earlierAcceptedNoWitness, out, version); + out.writeBoolean(recoverOk.acceptsFastPath); out.writeBoolean(recoverOk.rejectsFastPath); CommandSerializers.nullableWrites.serialize(recoverOk.writes, out, version); } @@ -112,9 +118,9 @@ RecoverNack deserializeNack(Ballot supersededBy, DataInputPlus in, int version) return new RecoverNack(supersededBy); } - RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, @Nonnull LatestDeps deps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) + RecoverOk deserializeOk(TxnId txnId, Status status, Ballot accepted, Timestamp executeAt, @Nonnull LatestDeps deps, Deps earlierCommittedWitness, Deps earlierAcceptedNoWitness, boolean acceptsFastPath, boolean rejectsFastPath, Writes writes, Result result, DataInputPlus in, int version) { - return new RecoverOk(txnId, status, accepted, executeAt, deps, earlierCommittedWitness, earlierAcceptedNoWitness, rejectsFastPath, writes, result); + return new RecoverOk(txnId, status, accepted, executeAt, deps, earlierCommittedWitness, earlierAcceptedNoWitness, acceptsFastPath, rejectsFastPath, writes, result); } @Override @@ -139,6 +145,7 @@ public RecoverReply deserialize(DataInputPlus in, int version) throws IOExceptio DepsSerializer.deps.deserialize(in, version), DepsSerializer.deps.deserialize(in, version), in.readBoolean(), + in.readBoolean(), CommandSerializers.nullableWrites.deserialize(in, version), result, in, @@ -159,6 +166,7 @@ long serializedOkSize(RecoverOk recoverOk, int version) size += latestDeps.serializedSize(recoverOk.deps, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierCommittedWitness, version); size += DepsSerializer.deps.serializedSize(recoverOk.earlierAcceptedNoWitness, version); + size += TypeSizes.sizeof(recoverOk.acceptsFastPath); size += TypeSizes.sizeof(recoverOk.rejectsFastPath); size += CommandSerializers.nullableWrites.serializedSize(recoverOk.writes, version); return size; @@ -207,7 +215,7 @@ public LatestDeps deserialize(DataInputPlus in, int version) throws IOException for (int i = 0 ; i < size ; ++i) { starts[i] = KeySerializers.routingKey.deserialize(in, version); - Status.KnownDeps knownDeps = CommandSerializers.nullableKnownDeps.deserialize(in, version); + KnownDeps knownDeps = CommandSerializers.nullableKnownDeps.deserialize(in, version); if (knownDeps == null) continue; diff --git a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java index f42ff8687035..c1cd9b7eb72c 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/SetDurableSerializers.java @@ -19,11 +19,10 @@ import java.io.IOException; -import accord.api.RoutingKey; import accord.messages.SetGloballyDurable; import accord.messages.SetShardDurable; import accord.primitives.Deps; -import accord.primitives.Seekables; +import accord.primitives.FullRoute; import accord.primitives.SyncPoint; import accord.primitives.TxnId; import org.apache.cassandra.io.IVersionedSerializer; @@ -58,21 +57,19 @@ public long serializedSize(SetShardDurable msg, int version) @Override public void serialize(SetGloballyDurable msg, DataOutputPlus out, int version) throws IOException { - CommandSerializers.txnId.serialize(msg.txnId, out, version); CommandStoreSerializers.durableBefore.serialize(msg.durableBefore, out, version); } @Override public SetGloballyDurable deserialize(DataInputPlus in, int version) throws IOException { - return new SetGloballyDurable(CommandSerializers.txnId.deserialize(in, version), CommandStoreSerializers.durableBefore.deserialize(in, version)); + return new SetGloballyDurable(CommandStoreSerializers.durableBefore.deserialize(in, version)); } @Override public long serializedSize(SetGloballyDurable msg, int version) { - return CommandSerializers.txnId.serializedSize(msg.txnId, version) - + CommandStoreSerializers.durableBefore.serializedSize(msg.durableBefore, version); + return CommandStoreSerializers.durableBefore.serializedSize(msg.durableBefore, version); } }; @@ -83,8 +80,7 @@ public void serialize(SyncPoint sp, DataOutputPlus out, int version) throws IOEx { CommandSerializers.txnId.serialize(sp.syncId, out, version); DepsSerializer.deps.serialize(sp.waitFor, out, version); - KeySerializers.seekables.serialize(sp.keysOrRanges, out, version); - KeySerializers.routingKey.serialize(sp.homeKey, out, version); + KeySerializers.fullRoute.serialize(sp.route, out, version); } @Override @@ -92,9 +88,8 @@ public SyncPoint deserialize(DataInputPlus in, int version) throws IOException { TxnId syncId = CommandSerializers.txnId.deserialize(in, version); Deps waitFor = DepsSerializer.deps.deserialize(in, version); - Seekables keysOrRanges = KeySerializers.seekables.deserialize(in, version); - RoutingKey homeKey = KeySerializers.routingKey.deserialize(in, version); - return SyncPoint.SerializationSupport.construct(syncId, waitFor, keysOrRanges, homeKey); + FullRoute route = KeySerializers.fullRoute.deserialize(in, version); + return SyncPoint.SerializationSupport.construct(syncId, waitFor, route); } @Override @@ -102,8 +97,7 @@ public long serializedSize(SyncPoint sp, int version) { return CommandSerializers.txnId.serializedSize(sp.syncId, version) + DepsSerializer.deps.serializedSize(sp.waitFor, version) - + KeySerializers.seekables.serializedSize(sp.keysOrRanges, version) - + KeySerializers.routingKey.serializedSize(sp.homeKey, version); + + KeySerializers.fullRoute.serializedSize(sp.route, version); } }; } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java index 73708c125fa9..4d5fbad5242b 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/TopologySerializers.java @@ -152,7 +152,7 @@ public long serializedSize(Shard shard, int version) } }; - public static final IVersionedSerializer topology = new IVersionedSerializer() + public static final IVersionedSerializer topology = new IVersionedSerializer<>() { @Override public void serialize(Topology topology, DataOutputPlus out, int version) throws IOException @@ -167,7 +167,7 @@ public Topology deserialize(DataInputPlus in, int version) throws IOException { long epoch = in.readLong(); Shard[] shards = ArraySerializers.deserializeArray(in, version, shard, Shard[]::new); - Set staleIds = CollectionSerializers.deserializeSet(in, version, TopologySerializers.nodeId); + SortedArrayList staleIds = CollectionSerializers.deserializeSortedArrayList(in, version, TopologySerializers.nodeId, Node.Id[]::new); return new Topology(epoch, staleIds, shards); } diff --git a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java index fab09f235423..021f01ec753a 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/WaitingOnSerializer.java @@ -23,9 +23,9 @@ import accord.local.Command.WaitingOn; import accord.primitives.KeyDeps; -import accord.primitives.Keys; import accord.primitives.RangeDeps; import accord.primitives.Routable; +import accord.primitives.RoutingKeys; import accord.primitives.TxnId; import accord.utils.ImmutableBitSet; import accord.utils.Invariants; @@ -84,7 +84,7 @@ private static void serialize(int length, SimpleBitSet write, ByteBuffer out) out.putLong(bits[i]); } - public static WaitingOn deserialize(TxnId txnId, Keys keys, RangeDeps directRangeDeps, KeyDeps directKeyDeps, ByteBuffer in) throws IOException + public static WaitingOn deserialize(TxnId txnId, RoutingKeys keys, RangeDeps directRangeDeps, KeyDeps directKeyDeps, ByteBuffer in) throws IOException { int txnIdCount = directRangeDeps.txnIdCount() + directKeyDeps.txnIdCount(); int waitingOnLength = (txnIdCount + keys.size() + 63) / 64; diff --git a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java index f7081927eb65..cc005bf6e1fe 100644 --- a/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java +++ b/src/java/org/apache/cassandra/service/accord/txn/TxnWrite.java @@ -45,6 +45,7 @@ import accord.primitives.RoutableKey; import accord.primitives.Seekable; import accord.primitives.Timestamp; +import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; @@ -144,7 +145,7 @@ public AsyncChain write(boolean preserveTimestamps, @Nonnull Function apply(Seekable key, SafeCommandStore safeStore, Timestamp executeAt, DataStore store, PartialTxn txn) + public AsyncChain apply(Seekable key, SafeCommandStore safeStore, TxnId txnId, Timestamp executeAt, DataStore store, PartialTxn txn) { // TODO (expected, efficiency): 99.9999% of the time we can just use executeAt.hlc(), so can avoid bringing // cfk into memory by retaining at all times in memory key ranges that are dirty and must use this logic; // any that aren't can just use executeAt.hlc - TimestampsForKey cfk = TimestampsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, (Key) key, executeAt, true); + TimestampsForKey cfk = TimestampsForKeys.updateLastExecutionTimestamps((AbstractSafeCommandStore) safeStore, ((Key) key).toUnseekable(), txnId, executeAt, true); long timestamp = AccordSafeTimestampsForKey.timestampMicrosFor(cfk, executeAt, true); // TODO (low priority - do we need to compute nowInSeconds, or can we just use executeAt?) int nowInSeconds = AccordSafeTimestampsForKey.nowInSecondsFor(cfk, executeAt, true); diff --git a/src/java/org/apache/cassandra/tcm/membership/Directory.java b/src/java/org/apache/cassandra/tcm/membership/Directory.java index a4dab0bc2135..e2ba275cda8c 100644 --- a/src/java/org/apache/cassandra/tcm/membership/Directory.java +++ b/src/java/org/apache/cassandra/tcm/membership/Directory.java @@ -27,7 +27,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; @@ -360,9 +359,9 @@ public ImmutableList allAddresses() return ImmutableList.copyOf(peers.values()); } - public ImmutableSet peerIds() + public NavigableSet peerIds() { - return ImmutableSet.copyOf(peers.keySet()); + return peers.keySet(); } public NodeAddresses getNodeAddresses(NodeId id) diff --git a/src/java/org/apache/cassandra/utils/FBUtilities.java b/src/java/org/apache/cassandra/utils/FBUtilities.java index 7417f757bc88..a4493f30a201 100644 --- a/src/java/org/apache/cassandra/utils/FBUtilities.java +++ b/src/java/org/apache/cassandra/utils/FBUtilities.java @@ -65,11 +65,13 @@ import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; -import com.vdurmont.semver4j.Semver; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; +import com.vdurmont.semver4j.Semver; import org.apache.cassandra.audit.IAuditLogger; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.DecoratedKey; @@ -1458,4 +1460,16 @@ public static Order compare(A a, B b, AsymmetricOrdering comparator if (rc == 0) return Order.EQ; return Order.GT; } + + public static AsyncResult futureToAsyncResult(org.apache.cassandra.utils.concurrent.Future future) + { + AsyncResult.Settable adapter = AsyncResults.settable(); + future.addCallback((value, failure) -> { + if (failure != null) + adapter.tryFailure(failure); + else + adapter.trySuccess(value); + }); + return adapter; + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/utils/btree/AbstractBTreeMap.java b/src/java/org/apache/cassandra/utils/btree/AbstractBTreeMap.java index 3d0baf08f178..3abef3bca039 100644 --- a/src/java/org/apache/cassandra/utils/btree/AbstractBTreeMap.java +++ b/src/java/org/apache/cassandra/utils/btree/AbstractBTreeMap.java @@ -24,6 +24,7 @@ import java.util.Comparator; import java.util.Iterator; import java.util.Map; +import java.util.NavigableSet; import java.util.Set; import com.google.common.collect.Iterators; @@ -98,9 +99,9 @@ public V get(Object key) return null; } - private Set keySet = null; + private NavigableSet keySet = null; @Override - public Set keySet() + public NavigableSet keySet() { if (keySet == null) keySet = BTreeSet.wrap(BTree.transformAndFilter(tree, (entry) -> ((Map.Entry)entry).getKey()), comparator.keyComparator); diff --git a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java index dc873f02100f..9a0e2c5cb76e 100644 --- a/src/java/org/apache/cassandra/utils/vint/VIntCoding.java +++ b/src/java/org/apache/cassandra/utils/vint/VIntCoding.java @@ -331,6 +331,11 @@ public static long readVInt(DataInput input) throws IOException return decodeZigZag64(readUnsignedVInt(input)); } + public static long readVInt(ByteBuffer input) + { + return decodeZigZag64(readUnsignedVInt(input)); + } + /** * Read up to a signed 32-bit integer back. * diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 14ae7d1cde7a..483f7e4f37a4 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -453,7 +453,7 @@ public void moveTest() throws Throwable PartitionKey partitionKey = new PartitionKey(tableId, dk); - awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(PreLoadContext.contextFor(partitionKey), + awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(PreLoadContext.contextFor(partitionKey.toUnseekable()), partitionKey.toUnseekable(), moveMax, moveMax, safeStore -> { if (!safeStore.ranges().allAt(preMove).contains(partitionKey)) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java index 732fe303cb7a..09a445b30ace 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordDropTableBase.java @@ -22,7 +22,7 @@ import com.google.common.base.Throwables; -import accord.api.Key; +import accord.api.RoutingKey; import accord.local.CommandStores; import accord.local.KeyHistory; import accord.local.PreLoadContext; @@ -45,8 +45,6 @@ import org.apache.cassandra.service.accord.TokenRange; import org.assertj.core.api.Assertions; -import static org.apache.cassandra.service.accord.AccordTestUtils.wrapInTxn; - public class AccordDropTableBase extends TestBaseImpl { protected static void addChaos(Cluster cluster, int example) @@ -137,7 +135,7 @@ protected static void validateAccord(Cluster cluster, TableId id) AccordCommandStore store = (AccordCommandStore) stores.forId(storeId); AsyncChains.getUnchecked(store.submit(ctx, input -> { AccordSafeCommandStore safe = (AccordSafeCommandStore) input; - for (Key key : safe.commandsForKeysKeys()) + for (RoutingKey key : safe.commandsForKeysKeys()) { AccordSafeCommandsForKey safeCFK = safe.maybeCommandsForKey(key); if (safeCFK == null) // we read and found a key, but its null at load time... so ignore it diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java index e30706a47ef4..c563650d728b 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordIncrementalRepairTest.java @@ -40,9 +40,11 @@ import accord.local.Node; import accord.local.PreLoadContext; import accord.local.SafeCommand; -import accord.local.Status; +import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; +import accord.local.cfk.SafeCommandsForKey; import accord.primitives.Seekables; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.async.AsyncChains; @@ -61,13 +63,14 @@ import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.api.AccordAgent; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Clock; import org.apache.cassandra.utils.concurrent.Future; import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; +import static accord.local.KeyHistory.COMMANDS; import static java.lang.String.format; public class AccordIncrementalRepairTest extends AccordTestBase @@ -100,9 +103,9 @@ public String toString() private final List barriers = new ArrayList<>(); @Override - public void onLocalBarrier(@Nonnull Seekables keysOrRanges, @Nonnull TxnId txnId) + public void onSuccessfulBarrier(@Nonnull TxnId txnId, @Nonnull Seekables keysOrRanges) { - super.onLocalBarrier(keysOrRanges, txnId); + super.onSuccessfulBarrier(txnId, keysOrRanges); synchronized (barriers) { barriers.add(new ExecutedBarrier(keysOrRanges, txnId)); @@ -215,19 +218,27 @@ private static V getUninterruptibly(Future future) return getUninterruptibly(future, 1, TimeUnit.MINUTES); } - private static TxnId awaitLocalApplyOnKey(PartitionKey key) + private static TxnId awaitLocalApplyOnKey(TableMetadata metadata, int k) + { + return awaitLocalApplyOnKey(new TokenKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(k)).getToken())); + } + + private static TxnId awaitLocalApplyOnKey(TokenKey key) { Node node = accordService().node(); AtomicReference waitFor = new AtomicReference<>(null); - AsyncChains.awaitUninterruptibly(node.commandStores().ifLocal(PreLoadContext.contextFor(key), key.toUnseekable(), 0, Long.MAX_VALUE, safeStore -> { + AsyncChains.awaitUninterruptibly(node.commandStores().ifLocal(PreLoadContext.contextFor(key, COMMANDS), key.toUnseekable(), 0, Long.MAX_VALUE, safeStore -> { AccordSafeCommandStore store = (AccordSafeCommandStore) safeStore; - CommandsForKey commands = store.maybeCommandsForKey(key).current(); - int size = commands.size(); + SafeCommandsForKey safeCfk = store.maybeCommandsForKey(key); + if (safeCfk == null) + return; + CommandsForKey cfk = safeCfk.current(); + int size = cfk.size(); if (size < 1) return; // if txnId is an instance of CommandsForKey.TxnInfo, copying it into a // new txnId instance will prevent any issues related to TxnInfo#hashCode - waitFor.set(new TxnId(commands.txnId(size - 1))); + waitFor.set(new TxnId(cfk.txnId(size - 1))); })); Assert.assertNotNull(waitFor.get()); TxnId txnId = waitFor.get(); @@ -239,7 +250,7 @@ private static TxnId awaitLocalApplyOnKey(PartitionKey key) if (now - start > TimeUnit.MINUTES.toMillis(1)) throw new AssertionError("Timeout"); AsyncChains.awaitUninterruptibly(node.commandStores().ifLocal(PreLoadContext.contextFor(txnId), key.toUnseekable(), 0, Long.MAX_VALUE, safeStore -> { - SafeCommand command = safeStore.get(txnId, key.toUnseekable()); + SafeCommand command = safeStore.get(txnId, StoreParticipants.empty(txnId)); Assert.assertNotNull(command.current()); if (command.current().status().hasBeen(Status.Applied)) applied.set(true); @@ -267,7 +278,7 @@ public void txnRepairTest() throws Throwable SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); - awaitLocalApplyOnKey(new PartitionKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(1)))); + awaitLocalApplyOnKey(metadata, 1); })); SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> agent().reset())); @@ -299,14 +310,12 @@ public void txnRepairTest() throws Throwable awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); nodetool(SHARED_CLUSTER.get(1), "repair", KEYSPACE); - SHARED_CLUSTER.forEach(instance -> { - instance.runOnInstance(() -> { - Assert.assertFalse( agent().executedBarriers().isEmpty()); - ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); - Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); - cfs.getLiveSSTables().forEach(sstable -> { - Assert.assertTrue(sstable.isRepaired() || sstable.isPendingRepair()); - }); + SHARED_CLUSTER.get(1).runOnInstance(() -> { + Assert.assertFalse( agent().executedBarriers().isEmpty()); + ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); + Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); + cfs.getLiveSSTables().forEach(sstable -> { + Assert.assertTrue(sstable.isRepaired() || sstable.isPendingRepair()); }); }); } @@ -351,7 +360,7 @@ private void testSingleNodeWrite(TransactionalMode mode) })); nodetool(SHARED_CLUSTER.get(1), "repair", KEYSPACE); - SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> { + SHARED_CLUSTER.get(1).runOnInstance(() -> { Assert.assertFalse( agent().executedBarriers().isEmpty()); ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table); Assert.assertFalse(cfs.getLiveSSTables().isEmpty()); @@ -364,7 +373,7 @@ private void testSingleNodeWrite(TransactionalMode mode) UntypedResultSet.Row row = Iterables.getOnlyElement(result); Assert.assertEquals(1, row.getInt("k")); Assert.assertEquals(2, row.getInt("v")); - })); + }); } /** @@ -402,7 +411,7 @@ public void onlyAccordTest() SHARED_CLUSTER.get(1, 2).forEach(instance -> instance.runOnInstance(() -> { TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); - awaitLocalApplyOnKey(new PartitionKey(metadata.id, metadata.partitioner.decorateKey(ByteBufferUtil.bytes(1)))); + awaitLocalApplyOnKey(metadata, 1); })); SHARED_CLUSTER.forEach(instance -> instance.runOnInstance(() -> agent().reset())); @@ -411,11 +420,8 @@ public void onlyAccordTest() awaitEndpointUp(SHARED_CLUSTER.get(1), SHARED_CLUSTER.get(3)); nodetool(SHARED_CLUSTER.get(1), "repair", "--accord-only", KEYSPACE); - SHARED_CLUSTER.forEach(instance -> { - logger().info("checking instance {}", instance.broadcastAddress()); - instance.runOnInstance(() -> { - Assert.assertFalse( agent().executedBarriers().isEmpty()); - }); + SHARED_CLUSTER.get(1).runOnInstance(() -> { + Assert.assertFalse( agent().executedBarriers().isEmpty()); }); } } diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java index 80d48b6091b6..66d677080ebd 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java @@ -19,16 +19,20 @@ package org.apache.cassandra.distributed.test.accord; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import org.junit.Assert; import org.junit.Test; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.shared.ClusterUtils; import org.apache.cassandra.distributed.shared.WithProperties; import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.concurrent.CountDownLatch; public class AccordJournalIntegrationTest extends TestBaseImpl @@ -36,14 +40,9 @@ public class AccordJournalIntegrationTest extends TestBaseImpl @Test public void saveLoadSanityCheck() throws Throwable { - String timeout = "10s"; try (WithProperties wp = new WithProperties().set(CassandraRelevantProperties.DTEST_ACCORD_JOURNAL_SANITY_CHECK_ENABLED, "true"); Cluster cluster = init(Cluster.build(1) .withoutVNodes() - .withConfig(c -> c - .set("read_request_timeout", timeout) - .set("transaction_timeout", timeout) - ) .start())) { final String TABLE = KEYSPACE + ".test_table"; @@ -86,4 +85,41 @@ public void saveLoadSanityCheck() throws Throwable cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); } } -} + + @Test + public void memtableStateReloadingTest() throws Throwable + { + try (Cluster cluster = Cluster.build(1) + .withoutVNodes() + .start()) + { + cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};"); + final String TABLE = KEYSPACE + ".test_table"; + cluster.schemaChange("CREATE TABLE " + TABLE + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'"); + + for (int j = 0; j < 1_000; j++) + { + cluster.coordinator(1).execute("BEGIN TRANSACTION\n" + + "INSERT INTO " + TABLE + "(k, c, v) VALUES (?, ?, ?);\n" + + "COMMIT TRANSACTION", + ConsistencyLevel.ALL, + j, j, 1 + ); + } + + Object[][] before = cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); + + cluster.get(1).runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTesting(); + }); + ClusterUtils.stopUnchecked(cluster.get(1)); + cluster.get(1).startup(); + + Object[][] after = cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); + for (int i = 0; i < before.length; i++) + { + Assert.assertTrue(Arrays.equals(before[i], after[i])); + } + } + } +} \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java index 318b922d2924..92e81c73f378 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordTestBase.java @@ -337,7 +337,8 @@ private static Cluster createCluster(int nodes, Function optio .withConfig(c -> c.with(Feature.GOSSIP) .set("write_request_timeout", "10s") .set("transaction_timeout", "15s") - .set("native_transport_timeout", "30s")) + .set("native_transport_timeout", "30s") + .set("accord.shard_count", "2")) .withInstanceInitializer(EnforceUpdateDoesNotPerformRead::install); builder = options.apply(builder); return init(builder.start()); diff --git a/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java deleted file mode 100644 index a2161c438661..000000000000 --- a/test/distributed/org/apache/cassandra/journal/AccordJournalCompactionTest.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.journal; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.io.util.File; -import org.apache.cassandra.service.accord.AccordJournalTable; -import org.apache.cassandra.service.accord.AccordSegmentCompactor; -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.TimeUUID; - -import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID; - -public class AccordJournalCompactionTest -{ - private static final Set SENTINEL_HOSTS = Collections.singleton(0); - - @BeforeClass - public static void setUp() - { - DatabaseDescriptor.daemonInitialization(); - ServerTestUtils.prepareServer(); - } - - @Test - public void segmentMergeTest() throws IOException - { - File directory = new File(Files.createTempDirectory(null)); - directory.deleteOnExit(); - - Journal journal = journal(directory); - AccordJournalTable journalTable = new AccordJournalTable<>(journal, journal.keySupport, journal.params.userVersion()); - journal.start(); - - Map> uuids = new HashMap<>(); - - int count = 0; - for (int i = 0; i < 1024 * 5; i++) - { - TimeUUID uuid = nextTimeUUID(); - for (long j = 0; j < 5; j++) - { - ByteBuffer buf = ByteBuffer.allocate(1024); - for (int k = 0; k < 1024; k++) - buf.put((byte) count); - count++; - buf.rewind(); - uuids.computeIfAbsent(uuid, (k) -> new ArrayList<>()) - .add(buf); - journal.asyncWrite(uuid, buf, SENTINEL_HOSTS); - } - } - - journal.closeCurrentSegmentForTesting(); - Runnable checkAll = () -> { - for (Map.Entry> e : uuids.entrySet()) - { - List expected = e.getValue(); - - List actual = new ArrayList<>(); - journalTable.readAll(e.getKey(), (key, in, userVersion) -> actual.add(journal.valueSerializer.deserialize(key, in, userVersion))); - Assert.assertEquals(actual.size(), expected.size()); - for (int i = 0; i < actual.size(); i++) - { - if (!actual.get(i).equals(expected.get(i))) - { - StringBuilder sb = new StringBuilder(); - sb.append("Actual:\n"); - for (ByteBuffer bb : actual) - sb.append(ByteBufferUtil.bytesToHex(bb)).append('\n'); - sb.append("Expected:\n"); - for (ByteBuffer bb : expected) - sb.append(ByteBufferUtil.bytesToHex(bb)).append('\n'); - throw new AssertionError(sb.toString()); - } - } - } - }; - - checkAll.run(); - journal.runCompactorForTesting(); - checkAll.run(); - journal.shutdown(); - } - - private static Journal journal(File directory) - { - return new Journal<>("TestJournal", directory, - new TestParams() { - @Override - public int segmentSize() - { - return 1024 * 1024; - } - - @Override - public boolean enableCompaction() - { - return false; - } - }, - TimeUUIDKeySupport.INSTANCE, - JournalTest.ByteBufferSerializer.INSTANCE, - new AccordSegmentCompactor<>()); - } -} diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java new file mode 100644 index 000000000000..545d9c8b9f32 --- /dev/null +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service.accord; + +import java.nio.file.Files; +import java.util.NavigableMap; +import java.util.concurrent.atomic.AtomicInteger; + +import com.google.common.collect.ImmutableSortedMap; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.primitives.Deps; +import accord.primitives.KeyDeps; +import accord.primitives.Ranges; +import accord.primitives.Timestamp; +import accord.primitives.TxnId; +import accord.utils.AccordGens; +import accord.utils.DefaultRandom; +import accord.utils.Gen; +import accord.utils.RandomSource; +import org.apache.cassandra.ServerTestUtils; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.journal.TestParams; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; +import org.apache.cassandra.utils.AccordGenerators; +import org.apache.cassandra.utils.concurrent.Condition; + +import static accord.local.CommandStores.RangesForEpoch; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; +import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; + + +public class AccordJournalCompactionTest +{ + @BeforeClass + public static void setUp() throws Throwable + { + ServerTestUtils.daemonInitialization(); + StorageService.instance.registerMBeans(); + StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); + ServerTestUtils.prepareServerNoRegister(); + + StorageService.instance.initServer(); + Keyspace.setInitialized(); + } + + private AtomicInteger counter = new AtomicInteger(); + @Before + public void beforeTest() throws Throwable + { + File directory = new File(Files.createTempDirectory(Integer.toString(counter.incrementAndGet()))); + directory.deleteRecursiveOnExit(); + DatabaseDescriptor.setAccordJournalDirectory(directory.path()); + } + + @Test + public void segmentMergeTest() throws InterruptedException + { + RedundantBeforeAccumulator redundantBeforeAccumulator = new RedundantBeforeAccumulator(); + DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); + IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); + IdentityAccumulator> safeToReadAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); + IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); + HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); + + Gen basicRedundantBeforeGen = AccordGenerators.redundantBefore(DatabaseDescriptor.getPartitioner()); + Gen redundantBeforeGen = rs -> { + // TODO: find a better way to generate consecutive redundant befores + while (true) + { + RedundantBefore next = basicRedundantBeforeGen.next(rs); + try + { + RedundantBefore.merge(redundantBeforeAccumulator.get(), next); + return next; + } + catch (Throwable t) + { + // retry; + } + } + }; + Gen durableBeforeGen = AccordGenerators.durableBeforeGen(DatabaseDescriptor.getPartitioner()); + Gen> safeToReadGen = AccordGenerators.safeToReadGen(DatabaseDescriptor.getPartitioner()); + Gen rangesForEpochGen = AccordGenerators.rangesForEpoch(DatabaseDescriptor.getPartitioner()); + Gen historicalTransactionsGen = depsGen(); + + AccordJournal journal = new AccordJournal(new TestParams() + { + @Override + public int segmentSize() + { + return 1024 * 1024; + } + + @Override + public boolean enableCompaction() + { + return false; + } + }); + try + { + journal.start(null); + Timestamp timestamp = Timestamp.NONE; + + RandomSource rs = new DefaultRandom(); + + int count = 1_000; + Condition condition = Condition.newOneTimeCondition(); + for (int i = 0; i <= count; i++) + { + timestamp = timestamp.next(); + AccordSafeCommandStore.FieldUpdates updates = new AccordSafeCommandStore.FieldUpdates(); + updates.durableBefore = durableBeforeGen.next(rs); + updates.redundantBefore = redundantBeforeGen.next(rs); + updates.safeToRead = safeToReadGen.next(rs); + updates.rangesForEpoch = rangesForEpochGen.next(rs); + updates.historicalTransactions = historicalTransactionsGen.next(rs); + + if (i == count) + journal.persistStoreState(1, updates, condition::signal); + else + journal.persistStoreState(1, updates, null); + + redundantBeforeAccumulator.update(updates.redundantBefore); + durableBeforeAccumulator.update(updates.durableBefore); + if (updates.bootstrapBeganAt != null) + bootstrapBeganAtAccumulator.update(updates.bootstrapBeganAt); + safeToReadAccumulator.update(updates.safeToRead); + rangesForEpochAccumulator.update(updates.rangesForEpoch); + historicalTransactionsAccumulator.update(updates.historicalTransactions); + } + + condition.await(); + + journal.closeCurrentSegmentForTesting(); + journal.runCompactorForTesting(); + + Assert.assertEquals(redundantBeforeAccumulator.get(), journal.loadRedundantBefore(1)); + Assert.assertEquals(durableBeforeAccumulator.get(), journal.loadDurableBefore(1)); + Assert.assertEquals(bootstrapBeganAtAccumulator.get(), journal.loadBootstrapBeganAt(1)); + Assert.assertEquals(safeToReadAccumulator.get(), journal.loadSafeToRead(1)); + Assert.assertEquals(rangesForEpochAccumulator.get(), journal.loadRangesForEpoch(1)); + Assert.assertEquals(historicalTransactionsAccumulator.get(), journal.loadHistoricalTransactions(1)); + } + finally + { + journal.shutdown(); + } + } + + public static Gen depsGen() + { + Gen keyDepsGen = AccordGenerators.keyDepsGen(DatabaseDescriptor.getPartitioner()); + return AccordGens.deps(keyDepsGen::next, + (rs) -> Deps.NONE.rangeDeps, + (rs) -> Deps.NONE.directKeyDeps); + } +} diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 8ff893864511..4c339ebcfef4 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -64,7 +64,7 @@ public void simpleRWTest() ListenableFileSystem fs = new ListenableFileSystem(Jimfs.newFileSystem()); File.unsafeSetFilesystem(fs); DatabaseDescriptor.daemonInitialization(); - DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); // + DatabaseDescriptor.setCommitLogCompression(new ParameterizedClass("LZ4Compressor", ImmutableMap.of())); DatabaseDescriptor.setCommitLogWriteDiskAccessMode(Config.DiskAccessMode.standard); DatabaseDescriptor.initializeCommitLogDiskAccessMode(); DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance); @@ -130,12 +130,6 @@ public static void check() @Isolated public static class IdentityValueSerializer implements ValueSerializer { - @Override - public int serializedSize(String key, String value, int userVersion) - { - return TypeSizes.INT_SIZE + key.length(); - } - @Override public void serialize(String key, String value, DataOutputPlus out, int userVersion) throws IOException { diff --git a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java index 368687b5afb1..ad8f1ba08c4c 100644 --- a/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java +++ b/test/unit/org/apache/cassandra/cql3/conditions/ColumnConditionTest.java @@ -28,11 +28,6 @@ import org.junit.Test; import org.apache.cassandra.cql3.*; -import org.apache.cassandra.cql3.terms.Constants; -import org.apache.cassandra.cql3.terms.MultiElements; -import org.apache.cassandra.cql3.terms.Sets; -import org.apache.cassandra.cql3.terms.Term; -import org.apache.cassandra.cql3.terms.Terms; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.Int32Type; diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index e69bffb8e17e..181d2c7ca6ff 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -33,6 +33,7 @@ import com.google.common.collect.Iterators; import accord.local.CommandStores; +import accord.local.StoreParticipants; import accord.primitives.Route; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.distributed.shared.WithProperties; @@ -51,9 +52,9 @@ import accord.local.CommandStore; import accord.local.DurableBefore; import accord.local.RedundantBefore; -import accord.local.SaveStatus; -import accord.local.Status; -import accord.local.Status.Durability; +import accord.primitives.SaveStatus; +import accord.primitives.Status; +import accord.primitives.Status.Durability; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; @@ -85,7 +86,7 @@ import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; @@ -187,10 +188,10 @@ private void testAccordCommandsPurger(boolean singleCompaction) throws Throwable this.singleCompaction = singleCompaction; // Null redudnant before should make no change since we have no information on this CommandStore testAccordCommandsPurger(null, DurableBefore.EMPTY, expectAccordCommandsNoChange()); - // Universally durable (and global to boot) should be erased since literally everyone knows about it - // The way Commands.shouldCleanup was implemented (when this was written) it doesn't check redundantBefore - // at all for this - testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(UNIVERSAL), expectAccordCommandsErase()); + // Universally and locally durable (and global to boot) should be erased since literally everyone knows about it + testAccordCommandsPurger(redundantBefore(GT_TXN_ID), durableBefore(UNIVERSAL), expectAccordCommandsErase()); + // Universally durable but not locally; we're stale, but shouldn't erase + testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(UNIVERSAL), expectAccordCommandsNoChange()); // With redundantBefore at the txnId there should be no change because it is < not <= testAccordCommandsPurger(redundantBefore(TXN_ID), durableBefore(MAJORITY), expectAccordCommandsNoChange()); testAccordCommandsPurger(redundantBefore(LT_TXN_ID), durableBefore(MAJORITY), expectAccordCommandsNoChange()); @@ -262,7 +263,7 @@ private static Consumer> expectedAccordCommandsForKeyNoChange() return partitions -> { assertEquals(1, partitions.size()); Partition partition = partitions.get(0); - PartitionKey partitionKey = new PartitionKey(partition.metadata().id, partition.partitionKey()); + TokenKey partitionKey = new TokenKey(partition.metadata().id, partition.partitionKey().getToken()); CommandsForKey cfk = CommandsForKeysAccessor.getCommandsForKey(partitionKey, ((Row) partition.unfilteredIterator().next())); assertEquals(TXN_IDS.length, cfk.size()); for (int i = 0; i < TXN_IDS.length; ++i) @@ -384,7 +385,7 @@ private static RedundantBefore redundantBefore(TxnId txnId) { Ranges ranges = AccordTestUtils.fullRange(AccordTestUtils.keys(table, 42)); txnId = txnId.as(Kind.Read, Range); - return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, LT_TXN_ID.as(Range)); + return RedundantBefore.create(ranges, Long.MIN_VALUE, Long.MAX_VALUE, txnId, txnId, txnId, LT_TXN_ID.as(Range)); } enum DurableBeforeType @@ -469,19 +470,19 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add PartialDeps partialDeps = Deps.NONE.intersecting(AccordTestUtils.fullRange(txn)); PartialTxn partialTxn = txn.slice(commandStore.unsafeRangesForEpoch().currentRanges(), true); Route partialRoute = route.slice(commandStore.unsafeRangesForEpoch().currentRanges()); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, route, COMMANDS), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), txnId, partialDeps, appendDiffToKeyspace(commandStore)); + getUninterruptibly(commandStore.execute(contextFor(txnId, route, COMMANDS), safe -> { + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, route, COMMANDS), safe -> { CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, txnId, partialDeps, appendDiffToKeyspace(commandStore)); }).beginAsResult()); flush(commandStore); - getUninterruptibly(commandStore.execute(contextFor(txnId, txn.keys(), COMMANDS), safe -> { + getUninterruptibly(commandStore.execute(contextFor(txnId, route, COMMANDS), safe -> { Pair result = AccordTestUtils.processTxnResultDirect(safe, txnId, partialTxn, txnId); CheckedCommands.apply(safe, txnId, route, txnId, partialDeps, partialTxn, result.left, result.right, appendDiffToKeyspace(commandStore)); }).beginAsResult()); @@ -489,8 +490,9 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add // The apply chain is asychronous, so it is easiest to just spin until it is applied // in order to have the updated state in the system table spinAssertEquals(true, 5, () -> { - return getUninterruptibly(commandStore.submit(contextFor(txnId, txn.keys(), COMMANDS), safe -> { - Command command = safe.get(txnId, route.homeKey()).current(); + return getUninterruptibly(commandStore.submit(contextFor(txnId, route, COMMANDS), safe -> { + StoreParticipants participants = StoreParticipants.all(route); + Command command = safe.get(txnId, participants).current(); appendDiffToKeyspace(commandStore).accept(null, command); return command.hasBeen(Status.Applied); }).beginAsResult()); @@ -506,7 +508,7 @@ private void testWithCommandStoreInternal(TestWithCommandStore test, boolean add UntypedResultSet commandsForKeyTable = QueryProcessor.executeInternal("SELECT * FROM " + ACCORD_KEYSPACE_NAME + "." + COMMANDS_FOR_KEY + ";"); logger.info(commandsForKeyTable.toStringUnsafe()); assertEquals(1, commandsForKeyTable.size()); - CommandsForKey cfk = CommandsForKeySerializer.fromBytes((Key) key, commandsForKeyTable.iterator().next().getBytes("data")); + CommandsForKey cfk = CommandsForKeySerializer.fromBytes(((Key) key).toUnseekable(), commandsForKeyTable.iterator().next().getBytes("data")); assertEquals(txnIds.length, cfk.size()); for (int i = 0; i < txnIds.length; ++i) assertEquals(txnIds[i], cfk.txnId(i)); diff --git a/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java b/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java index 59eae7ab0c1a..7277edfa15c8 100644 --- a/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java +++ b/test/unit/org/apache/cassandra/db/virtual/AccordVirtualTablesTest.java @@ -31,7 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.messages.TxnRequest; import accord.primitives.Routable; import accord.primitives.Txn; diff --git a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java index 59a7cf437d71..fb278581dfa3 100644 --- a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java +++ b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java @@ -41,8 +41,9 @@ import accord.api.RoutingKey; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.FullKeyRoute; import accord.primitives.Range; import accord.primitives.Ranges; @@ -399,7 +400,7 @@ private void writeRecords(RandomSource rs, int minToken, int maxToken, int numRecords) { - var cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, route, durability) VALUES (?, ?, ?, ?, ?, ?)"; + var cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, participants, durability) VALUES (?, ?, ?, ?, ?, ?)"; for (int i = 0; i < numRecords; i++) { int store = rs.nextInt(0, numStores); @@ -409,8 +410,8 @@ private void writeRecords(RandomSource rs, ByteBuffer routeBB; try { - Route route = createRoute(rs, numRecords, i, rs.nextInt(1, 20), tables, minToken, maxToken); - for (var u : route) + StoreParticipants participants = StoreParticipants.all(createRoute(rs, numRecords, i, rs.nextInt(1, 20), tables, minToken, maxToken)); + for (var u : participants.route()) { switch (u.domain()) { @@ -439,7 +440,7 @@ private void writeRecords(RandomSource rs, throw new AssertionError("Unexpected domain: " + u.domain()); } } - routeBB = AccordKeyspace.serializeRoute(route); + routeBB = AccordKeyspace.serializeParticipants(participants); } catch (IOException e) { diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index edc7f2c517a8..bd5bc5eb934a 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -36,8 +36,9 @@ import accord.api.RoutingKey; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status.Durability; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; +import accord.primitives.Status.Durability; import accord.primitives.FullKeyRoute; import accord.primitives.Range; import accord.primitives.Ranges; @@ -250,12 +251,12 @@ private static Route createRoute(State state, RandomSource rs, Domain domain, private class InsertTxn implements UnitCommand { - private static final String cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, route, durability) VALUES (?, ?, ?, ?, ?, ?)"; + private static final String cql = "INSERT INTO system_accord.commands (store_id, domain, txn_id, status, participants, durability) VALUES (?, ?, ?, ?, ?, ?)"; private final int storeId; private final TxnId txnId; private final SaveStatus saveStatus; private final Durability durability; - private final Route route; + private final StoreParticipants participants; private InsertTxn(int storeId, TxnId txnId, SaveStatus saveStatus, Durability durability, Route route) { @@ -263,13 +264,13 @@ private InsertTxn(int storeId, TxnId txnId, SaveStatus saveStatus, Durability du this.txnId = txnId; this.saveStatus = saveStatus; this.durability = durability; - this.route = route; + this.participants = StoreParticipants.all(route); } @Override public void applyUnit(State state) { - for (var u : route) + for (var u : participants.route()) { switch (u.domain()) { @@ -302,7 +303,7 @@ public void applyUnit(State state) @Override public void runUnit(ColumnFamilyStore sut) throws Throwable { - execute(cql, storeId, txnId.domain().ordinal(), AccordKeyspace.serializeTimestamp(txnId), saveStatus.ordinal(), AccordKeyspace.serializeRoute(route), durability.ordinal()); + execute(cql, storeId, txnId.domain().ordinal(), AccordKeyspace.serializeTimestamp(txnId), saveStatus.ordinal(), AccordKeyspace.serializeParticipants(participants), durability.ordinal()); } @Override @@ -313,7 +314,7 @@ public String toString() ", txnId=" + txnId + ", saveStatus=" + saveStatus + ", durability=" + durability + - ", route=" + route + + ", participants=" + participants + '}'; } } diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index bab37ca1504f..30952a96d877 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -18,10 +18,8 @@ package org.apache.cassandra.journal; import java.io.IOException; -import java.nio.ByteBuffer; import java.nio.file.Files; import java.util.Collections; -import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; @@ -38,8 +36,6 @@ public class JournalTest { - private static final Set SENTINEL_HOSTS = Collections.singleton(0); - @BeforeClass public static void setUp() { @@ -87,29 +83,6 @@ public void testSimpleReadWrite() throws IOException journal.shutdown(); } - static class ByteBufferSerializer implements ValueSerializer - { - static final ByteBufferSerializer INSTANCE = new ByteBufferSerializer(); - - public int serializedSize(TimeUUID key, ByteBuffer value, int userVersion) - { - return Integer.BYTES + value.capacity(); - } - - public void serialize(TimeUUID key, ByteBuffer value, DataOutputPlus out, int userVersion) throws IOException - { - out.writeInt(value.capacity()); - out.write(value); - } - - public ByteBuffer deserialize(TimeUUID key, DataInputPlus in, int userVersion) throws IOException - { - byte[] bytes = new byte[in.readInt()]; - in.readFully(bytes); - return ByteBuffer.wrap(bytes); - } - } - static class LongSerializer implements ValueSerializer { static final LongSerializer INSTANCE = new LongSerializer(); diff --git a/test/unit/org/apache/cassandra/service/StorageServiceTest.java b/test/unit/org/apache/cassandra/service/StorageServiceTest.java index 0b7ef7b1c471..8742cdac2395 100644 --- a/test/unit/org/apache/cassandra/service/StorageServiceTest.java +++ b/test/unit/org/apache/cassandra/service/StorageServiceTest.java @@ -68,6 +68,7 @@ public static void setUpClass() throws Exception ServerTestUtils.prepareServerNoRegister(); DatabaseDescriptor.daemonInitialization(); DatabaseDescriptor.setTransientReplicationEnabledUnsafe(true); + DatabaseDescriptor.setAccordTransactionsEnabled(false); ClusterMetadataService.instance().commit(new Register(NodeAddresses.current(), SimpleLocationProvider.LOCATION, diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index 036e513ae3fc..b6fca2e9ccd7 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -34,9 +34,10 @@ import accord.impl.TimestampsForKey; import accord.impl.TimestampsForKeys; import accord.local.Command; +import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; import accord.local.CommonAttributes; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.primitives.Ballot; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; @@ -61,12 +62,13 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.utils.Pair; -import static accord.local.Status.Durability.Majority; +import static accord.primitives.Status.Durability.Majority; import static com.google.common.collect.Iterables.getOnlyElement; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.Commands.preaccepted; @@ -125,7 +127,7 @@ public void commandLoadSave() throws Throwable PartialTxn txn = createPartialTxn(0); Route route = RoutingKeys.of(key.toUnseekable()).toRoute(key.toUnseekable()); attrs.partialTxn(txn); - attrs.route(route); + attrs.setParticipants(StoreParticipants.all(route)); attrs.durability(Majority); attrs.partialTxn(txn); Ballot promised = ballot(1, clock.incrementAndGet(), 1); @@ -160,7 +162,7 @@ public void timestampsForKeyLoadSave() Timestamp maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) getOnlyElement(txn.keys())).toUnseekable(); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); @@ -170,10 +172,10 @@ public void timestampsForKeyLoadSave() AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); tfk.initialize(); - TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, true); + TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, txnId1, true); Assert.assertEquals(txnId1.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId1, true)); - TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, true); + TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, txnId2, true); Assert.assertEquals(txnId2.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId2, true)); Assert.assertEquals(txnId2, tfk.current().lastExecutedTimestamp()); @@ -200,7 +202,7 @@ public void commandsForKeyLoadSave() AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); PartialTxn txn = createPartialTxn(1); - PartitionKey key = (PartitionKey) getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) getOnlyElement(txn.keys())).toUnseekable(); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 8f01b8f6859b..28f54ce688fa 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -26,6 +26,7 @@ import accord.api.Key; import accord.api.RoutingKey; +import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; import accord.local.Command; import accord.local.KeyHistory; @@ -33,7 +34,7 @@ import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.local.Status; +import accord.primitives.Status; import accord.messages.Accept; import accord.messages.Commit; import accord.messages.PreAccept; @@ -105,7 +106,7 @@ public void basicCycleTest() throws Throwable // Check preaccept getUninterruptibly(commandStore.execute(preAccept, safeStore -> { - SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + SafeCommand safeCommand = safeStore.get(txnId, StoreParticipants.all(route)); Command before = safeCommand.current(); PreAccept.PreAcceptReply reply = preAccept.apply(safeStore); Command after = safeCommand.current(); @@ -120,12 +121,12 @@ public void basicCycleTest() throws Throwable getUninterruptibly(commandStore.execute(preAccept, safeStore -> { Command before = safeStore.ifInitialised(txnId).current(); - SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + SafeCommand safeCommand = safeStore.get(txnId, StoreParticipants.all(route)); Assert.assertEquals(txnId, before.executeAt()); Assert.assertEquals(Status.PreAccepted, before.status()); Assert.assertTrue(before.partialDeps() == null || before.partialDeps().isEmpty()); - CommandsForKey cfk = safeStore.get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1).toUnseekable()).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); Command after = safeCommand.current(); AccordTestUtils.appendCommandsBlocking(commandStore, before, after); @@ -137,10 +138,10 @@ public void basicCycleTest() throws Throwable PartialDeps deps; try (PartialDeps.Builder builder = PartialDeps.builder(route)) { - builder.add(key, txnId2); + builder.add(key.toUnseekable(), txnId2); deps = builder.build(); } - Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, Ballot.ZERO, executeAt, partialTxn.keys(), deps); + Accept accept = Accept.SerializerSupport.create(txnId, route, 1, 1, Ballot.ZERO, executeAt, deps); getUninterruptibly(commandStore.execute(accept, safeStore -> { Command before = safeStore.ifInitialised(txnId).current(); @@ -157,23 +158,23 @@ public void basicCycleTest() throws Throwable Assert.assertEquals(Status.Accepted, before.status()); Assert.assertEquals(deps, before.partialDeps()); - CommandsForKey cfk = safeStore.get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1).toUnseekable()).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); Command after = safeStore.ifInitialised(txnId).current(); AccordTestUtils.appendCommandsBlocking(commandStore, before, after); })); // check commit - Commit commit = Commit.SerializerSupport.create(txnId, route, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn.keys(), partialTxn, deps, fullRoute, null); + Commit commit = Commit.SerializerSupport.create(txnId, route, 1, 1, Commit.Kind.StableWithTxnAndDeps, Ballot.ZERO, executeAt, partialTxn, deps, fullRoute, null); getUninterruptibly(commandStore.execute(commit, commit::apply)); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key), KeyHistory.COMMANDS), safeStore -> { + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txnId, Keys.of(key).toParticipants(), KeyHistory.COMMANDS), safeStore -> { Command before = safeStore.ifInitialised(txnId).current(); Assert.assertEquals(commit.executeAt, before.executeAt()); Assert.assertTrue(before.hasBeen(Status.Committed)); Assert.assertEquals(commit.partialDeps, before.partialDeps()); - CommandsForKey cfk = safeStore.get(key(1)).current(); + CommandsForKey cfk = safeStore.get(key(1).toUnseekable()).current(); Assert.assertTrue(cfk.indexOf(txnId) >= 0); Command after = safeStore.ifInitialised(txnId).current(); AccordTestUtils.appendCommandsBlocking(commandStore, before, after); @@ -216,7 +217,7 @@ public void computeDeps() throws Throwable private static void persistDiff(AccordCommandStore commandStore, SafeCommandStore safeStore, TxnId txnId, Route route, Runnable runnable) { - SafeCommand safeCommand = safeStore.get(txnId, txnId, route); + SafeCommand safeCommand = safeStore.get(txnId, StoreParticipants.all(route)); Command before = safeCommand.current(); runnable.run(); Command after = safeCommand.current(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java index 2c6dc9221c19..34a270f544e3 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.service.accord; -import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Random; @@ -64,7 +63,7 @@ public void simpleKeyTest() { if (new File(DatabaseDescriptor.getAccordJournalDirectory()).exists()) ServerTestUtils.cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); - AccordJournal accordJournal = new AccordJournal(SimpleAccordEndpointMapper.INSTANCE, TestParams.INSTANCE); + AccordJournal accordJournal = new AccordJournal(TestParams.INSTANCE); accordJournal.start(null); RandomSource randomSource = RandomSource.wrap(new Random()); TxnId id1 = AccordGens.txnIds().next(randomSource); @@ -74,11 +73,10 @@ public void simpleKeyTest() for (int i = 0; i < 10_000; i++) { TxnId txnId = randomSource.nextBoolean() ? id1 : id2; - JournalKey key = new JournalKey(txnId, randomSource.nextInt(5)); + JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, randomSource.nextInt(5)); res.compute(key, (k, prev) -> prev == null ? 1 : prev + 1); accordJournal.appendCommand(key.commandStoreId, - Collections.singletonList(new SavedCommand.DiffWriter(txnId, null, null)), - null, + new SavedCommand.DiffWriter(txnId, null, null), () -> {}); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java index 75a07196e220..c7b9a05c8c39 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalTest.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; +import java.nio.file.Files; import java.util.ArrayList; import java.util.List; @@ -30,9 +31,15 @@ import accord.utils.AccordGens; import accord.utils.Gen; import accord.utils.Gens; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.io.util.File; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.AsymmetricOrdering; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.FBUtilities.Order; @@ -45,9 +52,20 @@ public class AccordJournalTest { @BeforeClass - public static void setCompatibilityMode() + public static void setCompatibilityMode() throws IOException { CassandraRelevantProperties.TEST_STORAGE_COMPATIBILITY_MODE.setEnum(StorageCompatibilityMode.NONE); + + ServerTestUtils.daemonInitialization(); + StorageService.instance.registerMBeans(); + StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance); + ServerTestUtils.prepareServerNoRegister(); + + File directory = new File(Files.createTempDirectory(null)); + directory.deleteRecursiveOnExit(); + DatabaseDescriptor.setAccordJournalDirectory(directory.path()); + StorageService.instance.initServer(); + Keyspace.setInitialized(); } @Test @@ -122,6 +140,6 @@ private static ByteBuffer toBuffer(JournalKey k) private Gen keyGen() { Gen txnIdGen = AccordGens.txnIds(); - return rs -> new JournalKey(txnIdGen.next(rs)); + return rs -> new JournalKey(txnIdGen.next(rs), JournalKey.Type.COMMAND_DIFF, -1); } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java index 3939371ad93b..82e8f25afb1b 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordKeyspaceTest.java @@ -35,8 +35,7 @@ import accord.local.Command; import accord.local.CommonAttributes; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.local.StoreParticipants; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; @@ -46,6 +45,8 @@ import accord.primitives.RangeDeps; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -67,7 +68,7 @@ import org.apache.cassandra.schema.SchemaProvider; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.utils.CassandraGenerators; import org.assertj.core.api.Assertions; import org.mockito.Mockito; @@ -109,11 +110,12 @@ public void serde() PartialTxn partialTxn = txn.slice(scope, true); RoutingKey routingKey = partialTxn.keys().get(0).asKey().toUnseekable(); FullRoute route = partialTxn.keys().toRoute(routingKey); - Deps deps = new Deps(KeyDeps.none((Keys) txn.keys()), RangeDeps.NONE, KeyDeps.NONE); + StoreParticipants participants = StoreParticipants.all(route); + Deps deps = new Deps(KeyDeps.none(((Keys) txn.keys()).toParticipants()), RangeDeps.NONE, KeyDeps.NONE); CommonAttributes.Mutable common = new CommonAttributes.Mutable(id); common.partialTxn(partialTxn); - common.route(route); + common.setParticipants(participants); common.partialDeps(deps.intersecting(scope)); common.durability(Status.Durability.NotDurable); Command.WaitingOn waitingOn = null; @@ -171,13 +173,13 @@ public void findOverlappingKeys() int numStores = rs.nextInt(1, 3); // The model of the DB - TreeMap> storesToKeys = new TreeMap<>(); + TreeMap> storesToKeys = new TreeMap<>(); // write to the table and the model for (int i = 0, numKeys = rs.nextInt(10, 20); i < numKeys; i++) { int store = rs.nextInt(0, numStores); var keys = storesToKeys.computeIfAbsent(store, ignore -> new TreeSet<>()); - PartitionKey pk = null; + TokenKey pk = null; // LocalPartitioner may have a type with a very small domain (boolean, vector, etc.), so need to bound the attempts // else this will loop forever... for (int attempt = 0; attempt < 10; attempt++) @@ -191,7 +193,7 @@ else if (partitioner instanceof LocalPartitioner) data = fromQT(getTypeSupport(partitioner.getTokenValidator()).bytesGen()).next(rs); else data = Int32Type.instance.decompose(rs.nextInt()); - PartitionKey key = new PartitionKey(tableId, tables.get(tableId).decorateKey(data)); + TokenKey key = new TokenKey(tableId, tables.get(tableId).decorateKey(data).getToken()); if (keys.add(key)) { pk = key; @@ -206,8 +208,8 @@ else if (partitioner instanceof LocalPartitioner) // The memtable will allow the write, but it will be dropped when writing to the SSTable... //TODO (now, correctness): since we store the user token + user key, if a key is close to the PK limits then we could tip over and loose our CFK // new Mutation(AccordKeyspace.getCommandsForKeyPartitionUpdate(store, pk, 42, ByteBufferUtil.EMPTY_BYTE_BUFFER)).apply(); - execute("INSERT INTO system_accord.commands_for_key (store_id, table_id, key_token, key) VALUES (?, ?, ?, ?)", - store, pk.table().asUUID(), AccordKeyspace.serializeRoutingKeyNoTable(pk.toUnseekable()), pk.partitionKey().getKey()); + execute("INSERT INTO system_accord.commands_for_key (store_id, table_id, key_token) VALUES (?, ?, ?)", + store, pk.table().asUUID(), AccordKeyspace.serializeRoutingKeyNoTable(pk)); } catch (IllegalArgumentException | InvalidRequestException e) { @@ -241,10 +243,10 @@ else if (partitioner instanceof LocalPartitioner) for (var e : storesToKeys.entrySet()) { int store = e.getKey(); - SortedSet keys = e.getValue(); + SortedSet keys = e.getValue(); if (keys.isEmpty()) continue; - expectedCqlStoresToKeys.put(store, new TreeSet<>(keys.stream().map(p -> AccordKeyspace.serializeRoutingKeyNoTable(p.toUnseekable())).collect(Collectors.toList()))); + expectedCqlStoresToKeys.put(store, new TreeSet<>(keys.stream().map(AccordKeyspace::serializeRoutingKeyNoTable).collect(Collectors.toList()))); } // make sure no data loss... when this test was written sstable had all the rows but the sstable didn't... this @@ -255,7 +257,6 @@ else if (partitioner instanceof LocalPartitioner) { int storeId = row.getInt("store_id"); ByteBuffer bb = row.getBytes("key_token"); - // FIXME: include table_id cqlStoresToKeys.computeIfAbsent(storeId, ignore -> new TreeSet<>()).add(bb); } Assertions.assertThat(cqlStoresToKeys).isEqualTo(expectedCqlStoresToKeys); @@ -280,12 +281,12 @@ else if (partitioner instanceof LocalPartitioner) offset = rs.nextInt(0, keysForStore.size()); offsetEnd = rs.nextInt(offset, keysForStore.size()) + 1; } - List expected = keysForStore.subList(offset, offsetEnd); - PartitionKey start = expected.get(0); - PartitionKey end = expected.get(expected.size() - 1); + List expected = keysForStore.subList(offset, offsetEnd); + TokenKey start = expected.get(0); + TokenKey end = expected.get(expected.size() - 1); - AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(store, start.toUnseekable(), true, end.toUnseekable(), true, callback)); - List actual = AsyncChains.getUnchecked(map); + AsyncChain> map = Observable.asChain(callback -> AccordKeyspace.findAllKeysBetween(store, start, true, end, true, callback)); + List actual = AsyncChains.getUnchecked(map); Assertions.assertThat(actual).isEqualTo(expected); } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java index 5969770e2e18..8682d1df8113 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordSyncPropagatorTest.java @@ -44,7 +44,7 @@ import accord.impl.AbstractConfigurationService; import accord.impl.TestAgent; import accord.impl.basic.PendingQueue; -import accord.impl.basic.PropagatingPendingQueue; +import accord.impl.basic.MonitoredPendingQueue; import accord.impl.basic.RandomDelayQueue; import accord.impl.basic.SimulatedDelayedExecutorService; import accord.local.Node; @@ -103,7 +103,7 @@ public void burnTest() List failures = new ArrayList<>(); RandomDelayQueue delayQueue = new RandomDelayQueue.Factory(rs).get(); - PendingQueue queue = new PropagatingPendingQueue(failures, delayQueue); + PendingQueue queue = new MonitoredPendingQueue(failures, delayQueue); Agent agent = new TestAgent.RethrowAgent(); SimulatedDelayedExecutorService globalExecutor = new SimulatedDelayedExecutorService(queue, agent); ScheduledExecutorPlus scheduler = new AdaptingScheduledExecutorPlus(globalExecutor); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 80a7b4176918..57006fdbf26a 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -32,20 +32,15 @@ import java.util.stream.IntStream; import com.google.common.collect.Sets; +import org.junit.Assert; +import accord.api.Data; import accord.api.LocalListeners; import accord.api.ProgressLog.NoOpProgressLog; import accord.api.RemoteListeners; -import accord.impl.DefaultLocalListeners; -import accord.utils.SortedArrays.SortedArrayList; -import org.apache.cassandra.ServerTestUtils; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.io.util.File; -import org.junit.Assert; - -import accord.api.Data; import accord.api.Result; import accord.api.RoutingKey; +import accord.impl.DefaultLocalListeners; import accord.impl.InMemoryCommandStore; import accord.local.Command; import accord.local.CommandStore; @@ -57,7 +52,8 @@ import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.local.SaveStatus; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; import accord.primitives.Ballot; import accord.primitives.FullKeyRoute; import accord.primitives.FullRoute; @@ -74,12 +70,15 @@ import accord.primitives.Writes; import accord.topology.Shard; import accord.topology.Topology; +import accord.utils.SortedArrays.SortedArrayList; import accord.utils.async.AsyncChains; +import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.concurrent.ImmediateExecutor; import org.apache.cassandra.concurrent.ManualExecutor; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.AccordSpec; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.QueryOptions; import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.statements.TransactionStatement; @@ -88,6 +87,7 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.util.File; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; @@ -124,7 +124,7 @@ public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp execute { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); attrs.partialTxn(txn); - attrs.route(route(txn)); + attrs.setParticipants(StoreParticipants.all(route(txn))); return Command.SerializerSupport.preaccepted(attrs, executeAt, Ballot.ZERO); } @@ -132,7 +132,7 @@ public static Command committed(TxnId txnId, PartialTxn txn, Timestamp executeAt { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId).partialDeps(PartialDeps.NONE); attrs.partialTxn(txn); - attrs.route(route(txn)); + attrs.setParticipants(StoreParticipants.all(route(txn))); return Command.SerializerSupport.committed(attrs, SaveStatus.Committed, executeAt, @@ -145,7 +145,7 @@ public static Command stable(TxnId txnId, PartialTxn txn, Timestamp executeAt) { CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId).partialDeps(PartialDeps.NONE); attrs.partialTxn(txn); - attrs.route(route(txn)); + attrs.setParticipants(StoreParticipants.all(route(txn))); return Command.SerializerSupport.committed(attrs, SaveStatus.Stable, executeAt, @@ -229,7 +229,7 @@ public static Ballot ballot(long epoch, long hlc, int node) public static Pair processTxnResult(AccordCommandStore commandStore, TxnId txnId, PartialTxn txn, Timestamp executeAt) throws Throwable { AtomicReference> result = new AtomicReference<>(); - getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txn.keys()), + getUninterruptibly(commandStore.execute(PreLoadContext.contextFor(txn.keys().toParticipants()), safeStore -> result.set(processTxnResultDirect(safeStore, txnId, txn, executeAt)))); return result.get(); } @@ -398,7 +398,7 @@ public static AccordCommandStore createAccordCommandStore( if (new File(DatabaseDescriptor.getAccordJournalDirectory()).exists()) ServerTestUtils.cleanupDirectory(DatabaseDescriptor.getAccordJournalDirectory()); - AccordJournal journal = new AccordJournal(SimpleAccordEndpointMapper.INSTANCE, new AccordSpec.JournalSpec()); + AccordJournal journal = new AccordJournal(new AccordSpec.JournalSpec()); journal.start(null); SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); @@ -418,7 +418,11 @@ public static AccordCommandStore createAccordCommandStore( saveExecutor, new AccordStateCacheMetrics(AccordCommandStores.ACCORD_STATE_CACHE + System.currentTimeMillis())); holder.set(result); - result.updateRangesForEpoch(); + + // TODO: CompactionAccordIteratorsTest relies on this + result.execute(PreLoadContext.empty(), + result::updateRangesForEpoch) + .beginAsResult(); return result; } @@ -508,10 +512,10 @@ public static void appendCommandsBlocking(AccordCommandStore commandStore, Comma public static void appendCommandsBlocking(AccordCommandStore commandStore, Command before, Command after) { - SavedCommand.Writer diff = SavedCommand.diff(before, after); + SavedCommand.DiffWriter diff = SavedCommand.diff(before, after); if (diff == null) return; Condition condition = Condition.newOneTimeCondition(); - commandStore.appendCommands(Collections.singletonList(diff), null, condition::signal); + commandStore.appendCommands(Collections.singletonList(diff), condition::signal); condition.awaitUninterruptibly(30, TimeUnit.SECONDS); } } diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java index 5993d1d1338c..93e35bc92c25 100644 --- a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java +++ b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java @@ -27,7 +27,7 @@ import accord.api.RoutingKey; import accord.impl.IntKey; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.TxnId; diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index a20ee61a0807..9f6827f1b941 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -18,39 +18,60 @@ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.NavigableMap; import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSortedMap; import accord.api.Result; import accord.local.Command; +import accord.local.CommandStores; import accord.local.CommonAttributes; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; +import accord.local.StoreParticipants; +import accord.primitives.Known; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Ballot; +import accord.primitives.Deps; import accord.primitives.PartialDeps; import accord.primitives.PartialTxn; -import accord.primitives.Route; -import accord.primitives.Seekables; +import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.Invariants; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; import org.apache.cassandra.service.accord.serializers.CommandSerializers; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - public class MockJournal implements IJournal { private final Map> commands = new HashMap<>(); + private static class FieldUpdates + { + final RedundantBeforeAccumulator redundantBeforeAccumulator = new RedundantBeforeAccumulator(); + final DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); + final IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); + final IdentityAccumulator> safeToReadAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); + final IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); + final HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); + } + + private final Map fieldUpdates = new HashMap<>(); @Override - public Command loadCommand(int commandStoreId, TxnId txnId) + public Command loadCommand(int store, TxnId txnId) { - JournalKey key = new JournalKey(txnId, commandStoreId); + JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, store); List saved = commands.get(key); if (saved == null) return null; @@ -58,16 +79,77 @@ public Command loadCommand(int commandStoreId, TxnId txnId) } @Override - public void appendCommand(int commandStoreId, List> diffs, List sanityCheck, Runnable onFlush) + public RedundantBefore loadRedundantBefore(int store) { - for (SavedCommand.Writer diff : diffs) - { - SavedCommand.DiffWriter writer = (SavedCommand.DiffWriter) diff; + return fieldUpdates(store).redundantBeforeAccumulator.get(); + } + + @Override + public DurableBefore loadDurableBefore(int store) + { + return fieldUpdates(store).durableBeforeAccumulator.get(); + } + + @Override + public NavigableMap loadBootstrapBeganAt(int store) + { + return fieldUpdates(store).bootstrapBeganAtAccumulator.get(); + } + + @Override + public NavigableMap loadSafeToRead(int store) + { + return fieldUpdates(store).safeToReadAccumulator.get(); + } + + @Override + public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) + { + return fieldUpdates(store).rangesForEpochAccumulator.get(); + } - JournalKey key = new JournalKey(diff.key(), commandStoreId); - commands.computeIfAbsent(key, (ignore_) -> new ArrayList<>()) - .add(diff(writer.before(), writer.after())); + @Override + public List loadHistoricalTransactions(int store) + { + return fieldUpdates(store).historicalTransactionsAccumulator.get(); + } + + @Override + public void appendCommand(int store, SavedCommand.DiffWriter diff, Runnable onFlush) + { + if (diff != null) + { + commands.computeIfAbsent(new JournalKey(diff.after().txnId(), JournalKey.Type.COMMAND_DIFF, store), + (ignore_) -> new ArrayList<>()) + .add(diff(diff.before(), diff.after())); } + + if (onFlush != null) + onFlush.run(); + } + + private FieldUpdates fieldUpdates(int store) + { + return fieldUpdates.computeIfAbsent(store, (o) -> new FieldUpdates()); + } + + @Override + public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fieldUpdates, Runnable onFlush) + { + FieldUpdates updates = fieldUpdates(store); + if (fieldUpdates.redundantBefore != null) + updates.redundantBeforeAccumulator.update(fieldUpdates.redundantBefore); + if (fieldUpdates.durableBefore != null) + updates.durableBeforeAccumulator.update(fieldUpdates.durableBefore); + if (fieldUpdates.bootstrapBeganAt != null) + updates.bootstrapBeganAtAccumulator.update(fieldUpdates.bootstrapBeganAt); + if (fieldUpdates.safeToRead != null) + updates.safeToReadAccumulator.update(fieldUpdates.safeToRead); + if (fieldUpdates.rangesForEpoch != null) + updates.rangesForEpochAccumulator.update(fieldUpdates.rangesForEpoch); + if (fieldUpdates.historicalTransactions != null) + updates.historicalTransactionsAccumulator.update(fieldUpdates.historicalTransactions); + onFlush.run(); } @@ -90,10 +172,9 @@ public static LoadedDiff diff(Command before, Command after) ifNotEqual(before, after, Command::acceptedOrCommitted, false), ifNotEqual(before, after, Command::promised, false), - ifNotEqual(before, after, Command::route, true), + ifNotEqual(before, after, Command::participants, false), ifNotEqual(before, after, Command::partialTxn, false), ifNotEqual(before, after, Command::partialDeps, false), - ifNotEqual(before, after, Command::additionalKeysOrRanges, false), new NewValue<>((k, deps) -> waitingOn), ifNotEqual(before, after, Command::writes, false)); @@ -122,10 +203,9 @@ static Command reconstructFromDiff(List diffs, Result result) Ballot acceptedOrCommitted = Ballot.ZERO; Ballot promised = null; - Route route = null; + StoreParticipants participants = null; PartialTxn partialTxn = null; PartialDeps partialDeps = null; - Seekables additionalKeysOrRanges = null; SavedCommand.WaitingOnProvider waitingOnProvider = null; Writes writes = null; @@ -146,14 +226,12 @@ static Command reconstructFromDiff(List diffs, Result result) if (diff.promised != null) promised = diff.promised.get(); - if (diff.route != null) - route = diff.route.get(); + if (diff.participants != null) + participants = diff.participants.get(); if (diff.partialTxn != null) partialTxn = diff.partialTxn.get(); if (diff.partialDeps != null) partialDeps = diff.partialDeps.get(); - if (diff.additionalKeysOrRanges != null) - additionalKeysOrRanges = diff.additionalKeysOrRanges.get(); if (diff.waitingOn != null) waitingOnProvider = diff.waitingOn.get(); @@ -166,15 +244,13 @@ static Command reconstructFromDiff(List diffs, Result result) attrs.partialTxn(partialTxn); if (durability != null) attrs.durability(durability); - if (route != null) - attrs.route(route); + if (participants != null) + attrs.setParticipants(participants); if (partialDeps != null && - (saveStatus.known.deps != Status.KnownDeps.NoDeps && - saveStatus.known.deps != Status.KnownDeps.DepsErased && - saveStatus.known.deps != Status.KnownDeps.DepsUnknown)) + (saveStatus.known.deps != Known.KnownDeps.NoDeps && + saveStatus.known.deps != Known.KnownDeps.DepsErased && + saveStatus.known.deps != Known.KnownDeps.DepsUnknown)) attrs.partialDeps(partialDeps); - if (additionalKeysOrRanges != null) - attrs.additionalKeysOrRanges(additionalKeysOrRanges); Command.WaitingOn waitingOn = null; if (waitingOnProvider != null) @@ -279,10 +355,9 @@ public static class LoadedDiff extends SavedCommand public final NewValue acceptedOrCommitted; public final NewValue promised; - public final NewValue> route; + public final NewValue participants; public final NewValue partialTxn; public final NewValue partialDeps; - public final NewValue> additionalKeysOrRanges; public final NewValue writes; public final NewValue waitingOn; @@ -295,10 +370,9 @@ public LoadedDiff(TxnId txnId, NewValue acceptedOrCommitted, NewValue promised, - NewValue> route, + NewValue participants, NewValue partialTxn, NewValue partialDeps, - NewValue> additionalKeysOrRanges, NewValue waitingOn, NewValue writes) @@ -311,10 +385,9 @@ public LoadedDiff(TxnId txnId, this.acceptedOrCommitted = acceptedOrCommitted; this.promised = promised; - this.route = route; + this.participants = participants; this.partialTxn = partialTxn; this.partialDeps = partialDeps; - this.additionalKeysOrRanges = additionalKeysOrRanges; this.writes = writes; diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java index 5135bde35f8c..1d86856922e6 100644 --- a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -27,7 +27,7 @@ import org.junit.Test; import accord.local.Command; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.primitives.TxnId; import accord.utils.Gen; import accord.utils.LazyToString; @@ -74,7 +74,7 @@ public void allNull() public void simpleNullChangeCheck() { int flags = getFlags(null, Command.NotDefined.uninitialised(TxnId.NONE)); - EnumSet has = EnumSet.of(Fields.TXN_ID, Fields.SAVE_STATUS, Fields.DURABILITY, Fields.PROMISED, + EnumSet has = EnumSet.of(Fields.TXN_ID, Fields.SAVE_STATUS, Fields.PARTICIPANTS, Fields.DURABILITY, Fields.PROMISED, Fields.ACCEPTED /* this is Zero... which kinda means null... */); Set missing = Sets.difference(ALL, has); assertHas(flags, has); @@ -87,7 +87,7 @@ public void serde() Gen gen = AccordGenerators.commandsBuilder(); try (DataOutputBuffer out = new DataOutputBuffer()) { - qt().forAll(gen).check(cmdBuilder -> { + qt().forAll(gen).withSeed(3447978952908153749L).check(cmdBuilder -> { int userVersion = 1; //TODO (maintance): where can we fetch all supported versions? SoftAssertions checks = new SoftAssertions(); for (SaveStatus saveStatus : SaveStatus.values()) diff --git a/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java index 3d54d3af9d80..dd8678c1ef4e 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimpleSimulatedAccordCommandStoreTest.java @@ -21,8 +21,8 @@ import org.junit.Test; import accord.local.PreLoadContext; -import accord.local.SaveStatus; -import accord.primitives.Ranges; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; import accord.primitives.TxnId; import accord.utils.AccordGens; import org.assertj.core.api.Assertions; @@ -42,7 +42,7 @@ public void emptyTxns() { TxnId id = AccordGens.txnIds().next(rs); instance.process(PreLoadContext.contextFor(id), (safe) -> { - var safeCommand = safe.get(id, id, Ranges.EMPTY); + var safeCommand = safe.get(id, StoreParticipants.empty(id)); var command = safeCommand.current(); Assertions.assertThat(command.saveStatus()).isEqualTo(SaveStatus.Uninitialised); return null; diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 68eadd0d1b7c..06469b0bb246 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -31,6 +31,7 @@ import accord.api.LocalListeners; import accord.api.ProgressLog; import accord.api.RemoteListeners; +import accord.api.RoutingKey; import accord.impl.DefaultLocalListeners; import accord.impl.SizeOfIntersectionSorter; import accord.impl.TestAgent; @@ -45,17 +46,18 @@ import accord.messages.BeginRecovery; import accord.messages.PreAccept; import accord.messages.TxnRequest; +import accord.primitives.AbstractUnseekableKeys; import accord.primitives.Ballot; import accord.primitives.FullRoute; -import accord.primitives.Keys; import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; import accord.primitives.Routables; -import accord.primitives.Seekables; +import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.topology.Topologies; import accord.topology.Topology; import accord.utils.Gens; @@ -227,22 +229,22 @@ public TxnId nextTxnId(Txn.Kind kind, Routable.Domain domain) return new TxnId(timeService.epoch(), timeService.now(), kind, domain, nodeId); } - public void maybeCacheEvict(Seekables keysOrRanges) + public void maybeCacheEvict(Unseekables keysOrRanges) { switch (keysOrRanges.domain()) { case Key: - maybeCacheEvict((Keys) keysOrRanges, Ranges.EMPTY); + maybeCacheEvict((AbstractUnseekableKeys) keysOrRanges, Ranges.EMPTY); break; case Range: - maybeCacheEvict(Keys.EMPTY, (Ranges) keysOrRanges); + maybeCacheEvict(RoutingKeys.EMPTY, (Ranges) keysOrRanges); break; default: throw new UnsupportedOperationException("Unknown domain: " + keysOrRanges.domain()); } } - public void maybeCacheEvict(Keys keys, Ranges ranges) + public void maybeCacheEvict(Unseekables keys, Ranges ranges) { AccordStateCache cache = store.cache(); cache.forEach(state -> { @@ -294,14 +296,14 @@ else if (RoutableKey.class.isAssignableFrom(keyType)) } } - private static boolean intersects(ColumnFamilyStore store, Memtable memtable, Keys keys, Ranges ranges) + private static boolean intersects(ColumnFamilyStore store, Memtable memtable, Unseekables keys, Ranges ranges) { if (keys.isEmpty() && ranges.isEmpty()) // shouldn't happen, but just in case... return false; switch (store.name) { case "commands_for_key": - // pk = (store_id, key_token, key) + // pk = (store_id, routing_key) // since this is simulating a single store, store_id is a constant, so check key try (var it = memtable.partitionIterator(ColumnFilter.NONE, DataRange.allData(store.getPartitioner()), null)) { @@ -365,7 +367,7 @@ public Pair> enqueueBeginRecovery(Tx { TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); Ballot ballot = Ballot.fromValues(timeService.epoch(), timeService.now(), nodeId); - BeginRecovery br = new BeginRecovery(nodeId, topologies, txnId, txn, route, ballot); + BeginRecovery br = new BeginRecovery(nodeId, topologies, txnId, null, txn, route, ballot); return Pair.create(txnId, processAsync(br, safe -> { var reply = br.apply(safe); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java index 1c05c0a0ad98..f9f09f5c5c33 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -30,7 +30,7 @@ import org.junit.Before; import org.junit.BeforeClass; -import accord.api.Key; +import accord.api.RoutingKey; import accord.impl.SizeOfIntersectionSorter; import accord.local.Node; import accord.messages.BeginRecovery; @@ -44,8 +44,10 @@ import accord.primitives.Range; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.RoutingKeys; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.topology.Topologies; import accord.utils.Gen; import accord.utils.Gens; @@ -148,11 +150,11 @@ protected static AccordRoutingKey.TokenKey tokenKey(TableId id, long token) return new AccordRoutingKey.TokenKey(id, new Murmur3Partitioner.LongToken(token)); } - protected static Map> keyConflicts(List list, Keys keys) + protected static Map> keyConflicts(List list, Unseekables keys) { if (list.isEmpty()) return Collections.emptyMap(); - Map> kc = Maps.newHashMapWithExpectedSize(keys.size()); - for (Key key : keys) + Map> kc = Maps.newHashMapWithExpectedSize(keys.size()); + for (RoutingKey key : keys) kc.put(key, list); return kc; } @@ -169,7 +171,7 @@ protected static Map> rangeConflicts(List list, Ranges protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, DepsMessage messageType, Txn txn, FullRoute route, - Map> keyConflicts) throws ExecutionException, InterruptedException + Map> keyConflicts) throws ExecutionException, InterruptedException { return assertDepsMessage(instance, messageType, txn, route, keyConflicts, Collections.emptyMap()); } @@ -177,7 +179,7 @@ protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, DepsMessage messageType, Txn txn, FullRoute route, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) throws ExecutionException, InterruptedException { var pair = assertDepsMessageAsync(instance, messageType, txn, route, keyConflicts, rangeConflicts); @@ -190,7 +192,7 @@ protected static TxnId assertDepsMessage(SimulatedAccordCommandStore instance, protected static Pair> assertDepsMessageAsync(SimulatedAccordCommandStore instance, DepsMessage messageType, Txn txn, FullRoute route, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) { switch (messageType) @@ -208,10 +210,10 @@ protected static Pair> assertDepsMessageAsync(SimulatedAcc protected static Pair> assertPreAcceptAsync(SimulatedAccordCommandStore instance, Txn txn, FullRoute route, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) { - Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() .filter(e -> !e.getValue().isEmpty()) .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() @@ -226,10 +228,10 @@ protected static Pair> assertPreAcceptAsync(SimulatedAccor protected static Pair> assertBeginRecoveryAsync(SimulatedAccordCommandStore instance, Txn txn, FullRoute route, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) { - Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() .filter(e -> !e.getValue().isEmpty()) .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() @@ -245,10 +247,10 @@ protected static Pair> assertBeginRecoveryAsync(SimulatedA protected static Pair> assertBeginRecoveryAfterPreAcceptAsync(SimulatedAccordCommandStore instance, Txn txn, FullRoute route, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) { - Map> cloneKeyConflicts = keyConflicts.entrySet().stream() + Map> cloneKeyConflicts = keyConflicts.entrySet().stream() .filter(e -> !e.getValue().isEmpty()) .collect(Collectors.toMap(e -> e.getKey(), e -> new ArrayList(e.getValue()))); Map> cloneRangeConflicts = rangeConflicts.entrySet().stream() @@ -267,7 +269,7 @@ protected static Pair> assertBeginRecoveryAfterPreAcceptAs }); var delay = preAcceptAsync.flatMap(ignore -> AsyncChains.ofCallable(instance.unorderedScheduled, () -> { Ballot ballot = Ballot.fromValues(instance.timeService.epoch(), instance.timeService.now(), nodeId); - return new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, instance.topology), txnId, txn, route, ballot); + return new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, instance.topology), txnId, null, txn, route, ballot); })); var recoverAsync = delay.flatMap(br -> instance.processAsync(br, safe -> { var reply = br.apply(safe); @@ -282,7 +284,7 @@ protected static Pair> assertBeginRecoveryAfterPreAcceptAs } protected static void assertDeps(TxnId txnId, Deps deps, - Map> keyConflicts, + Map> keyConflicts, Map> rangeConflicts) { if (rangeConflicts.isEmpty()) @@ -291,7 +293,7 @@ protected static void assertDeps(TxnId txnId, Deps deps, } else { - List actualRanges = IntStream.range(0, deps.rangeDeps.rangeCount()).mapToObj(i -> deps.rangeDeps.range(i)).collect(Collectors.toList()); + List actualRanges = IntStream.range(0, deps.rangeDeps.rangeCount()).mapToObj(deps.rangeDeps::range).collect(Collectors.toList()); // Assertions.assertThat(deps.rangeDeps.rangeCount()).describedAs("Txn %s Expected ranges size; %s", txnId, deps.rangeDeps).isEqualTo(rangeConflicts.size()); Assertions.assertThat(Ranges.of(actualRanges.toArray(Range[]::new))) .describedAs("Txn %s had different ranges than expected", txnId) @@ -324,7 +326,7 @@ protected static void assertDeps(TxnId txnId, Deps deps, } else { - Assertions.assertThat(deps.keyDeps.keys()).describedAs("Txn %s Keys", txnId).isEqualTo(Keys.of(keyConflicts.keySet())); + Assertions.assertThat(deps.keyDeps.keys()).describedAs("Txn %s Keys", txnId).isEqualTo(RoutingKeys.of(keyConflicts.keySet())); for (var key : keyConflicts.keySet()) Assertions.assertThat(deps.keyDeps.txnIds(key)).describedAs("Txn %s for key %s", txnId, key).isEqualTo(keyConflicts.get(key)); } diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java index 9fda5cd16f45..e3526e30b9e3 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -28,7 +28,7 @@ import org.junit.Test; -import accord.api.Key; +import accord.api.RoutingKey; import accord.primitives.FullKeyRoute; import accord.primitives.FullRangeRoute; import accord.primitives.FullRoute; @@ -40,6 +40,7 @@ import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import static accord.utils.Property.qt; @@ -65,8 +66,8 @@ public void keyConflicts() List conflicts = new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { - instance.maybeCacheEvict(keys, Ranges.EMPTY); - conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, keys))); + instance.maybeCacheEvict(route, Ranges.EMPTY); + conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, route))); } } }); @@ -89,8 +90,8 @@ public void rangePartialKeyMatch() long outOfRangeToken = token - 10; if (outOfRangeToken == Long.MIN_VALUE) // if this wraps around that is fine, just can't be min outOfRangeToken++; - Key key = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(token))); - Key outOfRangeKey = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(outOfRangeToken))); + RoutingKey key = new TokenKey(tbl.id, new LongToken(token)); + RoutingKey outOfRangeKey = new TokenKey(tbl.id, new LongToken(outOfRangeToken)); Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)", "INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(LongToken.keyForToken(token), 42, @@ -111,7 +112,7 @@ public void rangePartialKeyMatch() List rangeConflicts = new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { - instance.maybeCacheEvict((Keys) keyTxn.keys(), partialRange); + instance.maybeCacheEvict(((Keys) keyTxn.keys()).toParticipants(), partialRange); for (int j = 0; j < numConflictKeyTxns; j++) outOfRangeKeyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), conflictingKeyTxn, conflictingRoute, Map.of(outOfRangeKey, outOfRangeKeyConflicts))); @@ -153,9 +154,9 @@ public void simpleRangeConflicts() List rangeConflicts = new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { - instance.maybeCacheEvict(keys, ranges); - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts(rangeConflicts, instance.slice(ranges)))); + instance.maybeCacheEvict(keyRoute, ranges); + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts(rangeConflicts, instance.slice(ranges)))); } } }); @@ -187,9 +188,9 @@ public void expandingRangeConflicts() Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); try { - instance.maybeCacheEvict(keys, partialRange); - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); - rangeConflicts.put(partialRange.get(0), Collections.singletonList(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts))); + instance.maybeCacheEvict(keyRoute, partialRange); + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); + rangeConflicts.put(partialRange.get(0), Collections.singletonList(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts))); } catch (Throwable t) { @@ -231,12 +232,12 @@ public void overlappingRangeConflicts() Ranges partialRange = Ranges.of(rs.nextBoolean() ? left : right); try { - instance.maybeCacheEvict(keys, partialRange); - keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keys))); + instance.maybeCacheEvict(keyRoute, partialRange); + keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); FullRangeRoute rangeRoute = partialRange.toRoute(pk.toUnseekable()); Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, partialRange); - rangeConflicts.get(partialRange.get(0)).add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keys), rangeConflicts)); + rangeConflicts.get(partialRange.get(0)).add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts)); } catch (Throwable t) { diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java index 11497133b3cd..feaddeff8c4e 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedMultiKeyAndRangeTest.java @@ -73,7 +73,7 @@ public void test() Gen.LongGen tokenGen = tokenDistribution.next(rs); Gen domainGen = domainDistribution.next(rs); Gen msgGen = msgDistribution.next(rs); - Map> keyConflicts = new HashMap<>(); + Map> keyConflicts = new HashMap<>(); RangeTree rangeConflicts = RTree.create(RangeTreeRangeAccessor.instance); Gen.IntGen keyCountGen = keyDistribution.next(rs); @@ -97,13 +97,13 @@ public void test() binds.add(42); }); Txn txn = createTxn(wrapInTxn(inserts), binds); - FullRoute route = keys.toRoute(keys.get(0).toUnseekable()); + FullRoute route = keys.toRoute(keys.get(0).toUnseekable()); - Map> expectedConflicts = new HashMap<>(); - keys.forEach(k -> expectedConflicts.put(k, keyConflicts.computeIfAbsent(k, ignore -> new ArrayList<>()))); + Map> expectedConflicts = new HashMap<>(); + route.forEach(k -> expectedConflicts.put(k, keyConflicts.computeIfAbsent(k, ignore -> new ArrayList<>()))); TxnId id = assertDepsMessage(instance, msgGen.next(rs), txn, route, expectedConflicts, Collections.emptyMap()); - keys.forEach(k -> keyConflicts.get(k).add(id)); + route.forEach(k -> keyConflicts.get(k).add(id)); } break; case Range: @@ -133,7 +133,7 @@ public void test() FullRangeRoute route = ranges.toRoute(ranges.get(0).end()); Txn txn = createTxn(Txn.Kind.ExclusiveSyncPoint, ranges); - Map> expectedKeyConflicts = keyConflicts.entrySet().stream() + Map> expectedKeyConflicts = keyConflicts.entrySet().stream() .filter(e -> ranges.contains(e.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); Map> expectedRangeConflicts = new HashMap<>(); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java index 31880d3297ec..9e9ac1fad7bb 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -27,14 +27,15 @@ import org.junit.Test; -import accord.api.Key; +import accord.api.RoutingKey; import accord.primitives.FullRangeRoute; import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.Ranges; import accord.primitives.Txn; import accord.primitives.TxnId; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import static accord.utils.Property.qt; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; @@ -55,18 +56,18 @@ public void keysAllOverConflictingWithRange() AccordKeyspace.unsafeClear(); try (var instance = new SimulatedAccordCommandStore(rs)) { - Map> keyConflicts = new HashMap<>(); + Map> keyConflicts = new HashMap<>(); List rangeConflicts = new ArrayList<>(numSamples); for (int i = 0; i < numSamples; i++) { long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); - Key key = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(keyForToken(token))); + RoutingKey key = new TokenKey(tbl.id, new LongToken(token)); Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(keyForToken(token), 42)); Keys keys = (Keys) keyTxn.keys(); - FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); + FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); - instance.maybeCacheEvict((Keys) keyTxn.keys(), wholeRange); + instance.maybeCacheEvict(keyRoute, wholeRange); // the full range is (-Inf, +Inf] but the store could be [(-Inf, Number], (Number, +Inf]], so need to slice to the store to get a matching range Ranges wholeRangeSlicedShard = instance.slice(wholeRange); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 6ef6a9cde885..3a29e63e99f9 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -31,18 +31,20 @@ import org.junit.BeforeClass; import org.junit.Test; -import accord.api.Key; +import accord.api.RoutingKey; import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.KeyHistory; -import accord.primitives.Keys; import accord.primitives.PartialTxn; +import accord.primitives.RoutingKeys; import accord.primitives.TxnId; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; import accord.utils.async.AsyncResults; import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.concurrent.ExecutorFactory; +import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.concurrent.ManualExecutor; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.service.StorageService; @@ -53,9 +55,11 @@ import org.apache.cassandra.service.accord.AccordSafeState; import org.apache.cassandra.service.accord.AccordSafeTimestampsForKey; import org.apache.cassandra.service.accord.AccordStateCache; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.service.accord.async.AsyncOperation.Context; import org.apache.cassandra.utils.concurrent.AsyncPromise; +import org.apache.cassandra.utils.concurrent.Condition; import static accord.local.KeyHistory.COMMANDS; import static accord.local.KeyHistory.TIMESTAMPS; @@ -95,10 +99,10 @@ public void cachedTest() AccordStateCache.Instance commandCache = commandStore.commandCache(); commandStore.executeBlocking(() -> commandStore.setCapacity(1024)); - AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); + AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); // acquire / release @@ -108,13 +112,13 @@ public void cachedTest() AccordCachingState safeCommandGlobal = safeCommand.global(); commandCache.release(safeCommand); - timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((PartitionKey) k)); + timestampsCache.unsafeSetLoadFunction(k -> new TimestampsForKey((TokenKey) k)); AccordSafeTimestampsForKey safeTimestamps = timestampsCache.acquire(key); testLoad(executor, safeTimestamps, new TimestampsForKey(key)); - AccordCachingState safeTimestampsGlobal = safeTimestamps.global(); + AccordCachingState safeTimestampsGlobal = safeTimestamps.global(); timestampsCache.release(safeTimestamps); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), TIMESTAMPS); // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { @@ -139,7 +143,7 @@ public void loadTest() AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); // create / persist AccordSafeCommand safeCommand = new AccordSafeCommand(loaded(txnId, null)); @@ -154,7 +158,7 @@ public void loadTest() AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, timestamps.current(), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), TIMESTAMPS); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -191,7 +195,7 @@ public void partialLoadTest() AccordStateCache.Instance commandCache = commandStore.commandCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); // acquire /release, create / persist commandCache.unsafeSetLoadFunction(id -> notDefined(id, txn)); @@ -202,7 +206,7 @@ public void partialLoadTest() AccordKeyspace.getTimestampsForKeyMutation(commandStore.id(), null, new TimestampsForKey(key), commandStore.nextSystemTimestampMicros()).apply(); // resources are on disk only, so the loader should suspend... - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), TIMESTAMPS); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), TIMESTAMPS); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -242,7 +246,7 @@ public void inProgressLoadTest() throws Throwable AccordStateCache.Instance commandCache = commandStore.commandCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); commandCache.unsafeSetLoadFunction(id -> { Assert.assertEquals(txnId, id); return notDefined(id, txn); }); AccordSafeCommand safeCommand = commandCache.acquire(txnId); @@ -250,7 +254,7 @@ public void inProgressLoadTest() throws Throwable Assert.assertTrue(commandCache.isReferenced(txnId)); Assert.assertFalse(commandCache.isLoaded(txnId)); - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(key), KeyHistory.NONE); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(key), KeyHistory.NONE); // since there's a read future associated with the txnId, we'll wait for it to load AsyncPromise cbFired = new AsyncPromise<>(); @@ -284,7 +288,9 @@ public void inProgressLoadTest() throws Throwable public void failedLoadTest() throws Throwable { AtomicLong clock = new AtomicLong(0); - AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); + ExecutorPlus executor = ExecutorFactory.Global.executorFactory().sequential("GlobalLogFollower"); + AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); + TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); TxnId txnId2 = txnId(1, clock.incrementAndGet(), 1); @@ -292,27 +298,38 @@ public void failedLoadTest() throws Throwable AsyncResult.Settable callback = AsyncResults.settable(); RuntimeException failure = new RuntimeException(); + Condition startResponding = Condition.newOneTimeCondition(); + Condition loadedAll = Condition.newOneTimeCondition(); execute(commandStore, () -> { AtomicInteger loadCalls = new AtomicInteger(); commandStore.commandCache().unsafeSetLoadFunction(txnId -> { + startResponding.awaitUninterruptibly(); loadCalls.incrementAndGet(); - if (txnId.equals(txnId1)) + + if (!txnId.equals(txnId1) && !txnId.equals(txnId2)) + throw new AssertionError("Unknown txnId: " + txnId); + + if (loadCalls.get() == 2) + { + loadedAll.signal(); throw failure; - else if (txnId.equals(txnId2)) - return notDefined(txnId, null); - throw new AssertionError("Unknown txnId: " + txnId); + } + + return notDefined(txnId, null); }); - AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), Keys.EMPTY, KeyHistory.COMMANDS); + AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), RoutingKeys.EMPTY, KeyHistory.COMMANDS); - boolean result = loader.load(new Context(), (u, t) -> { + boolean result = loader.load(new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); Assert.assertNull(u); Assert.assertEquals(failure, t); callback.trySuccess(null); }); + startResponding.signal(); + loadedAll.awaitUninterruptibly(); Assert.assertFalse(result); Assert.assertEquals(2, loadCalls.get()); }); @@ -348,7 +365,7 @@ public void inProgressCommandSaveTest() Assert.assertEquals(AccordCachingState.Status.SAVING, commandCache.getUnsafe(txnId).status()); // since the command is still saving, the loader shouldn't be able to acquire a reference - AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), Keys.of(), KeyHistory.NONE); + AsyncLoader loader = new AsyncLoader(commandStore, singleton(txnId), RoutingKeys.of(), KeyHistory.NONE); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { @@ -382,10 +399,10 @@ public void inProgressCFKSaveTest() @Test public void inProgressTFKSaveTest() { - inProgressCFKSaveTest(TIMESTAMPS, AccordCommandStore::timestampsForKeyCache, context -> context.timestampsForKey, TimestampsForKey::new, (tfk, c) -> new TimestampsForKey(tfk.key(), c.executeAt(), c.executeAt().hlc(), c.executeAt())); + inProgressCFKSaveTest(TIMESTAMPS, AccordCommandStore::timestampsForKeyCache, context -> context.timestampsForKey, TimestampsForKey::new, (tfk, c) -> new TimestampsForKey(tfk.key(), c.executeAt(), c.executeAt().hlc(), c.txnId(), c.executeAt())); } - private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) + private , C extends AccordStateCache.Instance> void inProgressCFKSaveTest(KeyHistory history, Function getter, Function> inContext, Function initialiser, BiFunction update) { AtomicLong clock = new AtomicLong(0); ManualExecutor executor = new ManualExecutor(); @@ -397,11 +414,11 @@ private , C extends AccordStateCache.Ins TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); Command preaccepted = preaccepted(txnId, txn, txnId); // acquire / release - T2 safe = cache.acquireOrInitialize(key, k -> initialiser.apply((Key)k)); + T2 safe = cache.acquireOrInitialize(key, k -> initialiser.apply(k)); safe.preExecute(); safe.set(update.apply(safe.current(), preaccepted)); cache.release(safe); @@ -411,7 +428,7 @@ private , C extends AccordStateCache.Ins Assert.assertEquals(AccordCachingState.Status.SAVING, cache.getUnsafe(key).status()); // since the command is still saving, the loader shouldn't be able to acquire a reference - AsyncLoader loader = new AsyncLoader(commandStore, emptyList(), Keys.of(key), history); + AsyncLoader loader = new AsyncLoader(commandStore, emptyList(), RoutingKeys.of(key), history); AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index aca870245406..31988224b437 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -27,6 +27,8 @@ import java.util.function.BiConsumer; import java.util.function.Consumer; +import accord.local.StoreParticipants; +import accord.primitives.Participants; import accord.primitives.Route; import accord.utils.DefaultRandom; import com.google.common.collect.Iterables; @@ -48,7 +50,7 @@ import accord.local.PreLoadContext; import accord.local.SafeCommand; import accord.local.SafeCommandStore; -import accord.local.SaveStatus; +import accord.primitives.SaveStatus; import accord.primitives.Ballot; import accord.primitives.FullRoute; import accord.primitives.Keys; @@ -78,6 +80,7 @@ import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordStateCache; import org.apache.cassandra.service.accord.AccordTestUtils; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.AssertionUtils; import org.apache.cassandra.utils.FBUtilities; @@ -150,7 +153,8 @@ public void touchUnknownTxn() throws Throwable TxnId txnId = txnId(1, clock.incrementAndGet(), 1); getUninterruptibly(commandStore.execute(contextFor(txnId), safe -> { - SafeCommand command = safe.get(txnId, txnId, safe.ranges().currentRanges()); + StoreParticipants participants = StoreParticipants.empty(txnId); + SafeCommand command = safe.get(txnId, participants); Assert.assertNotNull(command); })); @@ -163,7 +167,7 @@ public void optionalCommandsForKeyTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); Txn txn = AccordTestUtils.createWriteTxn((int)clock.incrementAndGet()); - PartitionKey key = (PartitionKey) Iterables.getOnlyElement(txn.keys()); + TokenKey key = ((PartitionKey) Iterables.getOnlyElement(txn.keys())).toUnseekable(); getUninterruptibly(commandStore.execute(contextFor(key), instance -> { SafeCommandsForKey cfk = ((AccordSafeCommandStore) instance).maybeCommandsForKey(key); @@ -211,7 +215,7 @@ private static Command createStableUsingFastLifeCycle(AccordCommandStore command try { - Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, route, COMMANDS), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToLog(commandStore)); CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); @@ -258,9 +262,9 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command try { - Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, partialTxn.keys(), COMMANDS), safe -> { + Command command = getUninterruptibly(commandStore.submit(contextFor(txnId, route, COMMANDS), safe -> { CheckedCommands.preaccept(safe, txnId, partialTxn, route, appendDiffToLog(commandStore)); - CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, partialTxn.keys(), executeAt, deps, appendDiffToLog(commandStore)); + CheckedCommands.accept(safe, txnId, Ballot.ZERO, partialRoute, executeAt, deps, appendDiffToLog(commandStore)); CheckedCommands.commit(safe, SaveStatus.Committed, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); CheckedCommands.commit(safe, SaveStatus.Stable, Ballot.ZERO, txnId, route, partialTxn, executeAt, deps, appendDiffToLog(commandStore)); return safe.ifInitialised(txnId).current(); @@ -370,13 +374,13 @@ public void loadFail() .check((rs, ids) -> { before(); // truncate tables - - assertNoReferences(commandStore, ids, keys); + Participants participants = keys.toParticipants(); + assertNoReferences(commandStore, ids, participants); createCommand(commandStore, rs, ids); - awaitDone(commandStore, ids, keys); - assertNoReferences(commandStore, ids, keys); + awaitDone(commandStore, ids, participants); + assertNoReferences(commandStore, ids, participants); - PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); + PreLoadContext ctx = contextFor(null, ids, participants, COMMANDS); Consumer consumer = Mockito.mock(Consumer.class); Map failed = selectFailedTxn(rs, ids); @@ -396,10 +400,10 @@ public void loadFail() Mockito.verifyNoInteractions(consumer); - assertNoReferences(commandStore, ids, keys); + assertNoReferences(commandStore, ids, participants); // the first failed load causes the whole operation to fail, so some ids may still be pending // to make sure the next operation does not see a PENDING that will fail, wait for all loads to complete - awaitDone(commandStore, ids, keys); + awaitDone(commandStore, ids, participants); // can we recover? commandStore.commandCache().unsafeSetLoadFunction(txnId -> { @@ -412,8 +416,8 @@ public void loadFail() }); }); getUninterruptibly(o2); - awaitDone(commandStore, ids, keys); - assertNoReferences(commandStore, ids, keys); + awaitDone(commandStore, ids, participants); + assertNoReferences(commandStore, ids, participants); }); } @@ -432,10 +436,11 @@ public void consumerFails() logger.info("Test #{}", counter.incrementAndGet()); before(); // truncate tables - assertNoReferences(commandStore, ids, keys); + Participants participants = keys.toParticipants(); + assertNoReferences(commandStore, ids, participants); createCommand(commandStore, rs, ids); - PreLoadContext ctx = contextFor(null, ids, keys, COMMANDS); + PreLoadContext ctx = contextFor(null, ids, participants, COMMANDS); Consumer consumer = Mockito.mock(Consumer.class); String errorMsg = "txn_ids " + ids; @@ -449,7 +454,7 @@ public void consumerFails() .hasMessage(errorMsg) .hasNoSuppressedExceptions(); - assertNoReferences(commandStore, ids, keys); + assertNoReferences(commandStore, ids, participants); }); } @@ -482,7 +487,7 @@ private static Map selectFailedTxn(RandomSource rs, List return failed; } - private static void assertNoReferences(AccordCommandStore commandStore, List ids, Keys keys) + private static void assertNoReferences(AccordCommandStore commandStore, List ids, Participants keys) { AssertionError error = null; try @@ -533,7 +538,7 @@ private static void assertNoReferences(AccordStateCache.Instance ca if (error != null) throw error; } - private static void awaitDone(AccordCommandStore commandStore, List ids, Keys keys) + private static void awaitDone(AccordCommandStore commandStore, List ids, Participants keys) { awaitDone(commandStore.commandCache(), ids); awaitDone(commandStore.commandsForKeyCache(), keys); diff --git a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java index 9ade46795475..39b12a862a92 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java @@ -26,18 +26,18 @@ import org.junit.Before; import org.junit.Test; -import accord.api.Key; +import accord.api.RoutingKey; import accord.impl.basic.SimulatedFault; import accord.local.PreLoadContext; import accord.local.SafeCommandStore; import accord.messages.PreAccept; import accord.primitives.FullRoute; -import accord.primitives.Keys; import accord.primitives.Range; import accord.primitives.Ranges; -import accord.primitives.Seekables; +import accord.primitives.RoutingKeys; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.primitives.Unseekables; import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; @@ -51,7 +51,6 @@ import org.apache.cassandra.service.accord.SimulatedAccordCommandStoreTestBase; import org.apache.cassandra.service.accord.TokenRange; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; -import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; @@ -90,10 +89,10 @@ private static void test(RandomSource rs, int numSamples, TableMetadata tbl, Gen long minToken = 0; long maxToken = numKeys; - Gen keyGen = Gens.longs().between(minToken + 1, maxToken).map(t -> new PartitionKey(tbl.id, tbl.partitioner.decorateKey(LongToken.keyForToken(t)))); - Gen keysGen = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).map(l -> Keys.of(l)); + Gen keyGen = Gens.longs().between(minToken + 1, maxToken).map(t -> new TokenKey(tbl.id, new LongToken(t))); + Gen keysGen = Gens.lists(keyGen).unique().ofSizeBetween(1, 10).map(l -> RoutingKeys.of(l)); Gen rangesGen = Gens.lists(rangeInsideRange(tbl.id, minToken, maxToken)).uniqueBestEffort().ofSizeBetween(1, 10).map(l -> Ranges.of(l.toArray(Range[]::new))); - Gen> seekablesGen = Gens.oneOf(keysGen, rangesGen); + Gen> unseekablesGen = Gens.oneOf(keysGen, rangesGen); Gen>> txnGen = randomTxn(mixedDomainGen.next(rs), mixedTokenGen.next(rs)); try (var instance = new SimulatedAccordCommandStore(rs)) @@ -107,7 +106,7 @@ private static void test(RandomSource rs, int numSamples, TableMetadata tbl, Gen { case Task: { - PreLoadContext ctx = PreLoadContext.contextFor(seekablesGen.next(rs)); + PreLoadContext ctx = PreLoadContext.contextFor(unseekablesGen.next(rs)); instance.maybeCacheEvict(ctx.keys()); operation(instance, ctx, actionGen.next(rs), rs::nextBoolean).begin(counter); } @@ -129,7 +128,7 @@ public PreAcceptReply apply(SafeCommandStore safeStore) return result; } }; - instance.maybeCacheEvict(txn.keys()); + instance.maybeCacheEvict(txn.keys().toParticipants()); instance.processAsync(preAccept).begin(counter); } break; diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java index d966757e9597..d62bd1b42d80 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CheckStatusSerializersTest.java @@ -25,9 +25,8 @@ import org.junit.Test; import accord.api.RoutingKey; -import accord.coordinate.Infer; -import accord.local.SaveStatus; -import accord.messages.CheckStatus.FoundKnownMap; +import accord.primitives.SaveStatus; +import accord.primitives.KnownMap; import accord.primitives.Ballot; import accord.primitives.FullKeyRoute; import accord.primitives.Routable; @@ -61,7 +60,7 @@ public class CheckStatusSerializersTest public void serde() { DataOutputBuffer buffer = new DataOutputBuffer(); - qt().forAll(foundKnownMap()).check(map -> Assertions.assertThat(serde(CheckStatusSerializers.foundKnownMap, MessagingService.Version.CURRENT.value, buffer, map)).isEqualTo(map)); + qt().forAll(foundKnownMap()).check(map -> Assertions.assertThat(serde(CheckStatusSerializers.knownMap, MessagingService.Version.CURRENT.value, buffer, map)).isEqualTo(map)); } private static T serde(IVersionedSerializer serializer, int version, DataOutputBuffer buffer, T value) throws IOException @@ -76,11 +75,10 @@ private static T serde(IVersionedSerializer serializer, int version, Data } } - private static Gen foundKnownMap() + private static Gen foundKnownMap() { return rs -> { SaveStatus saveStatus = Gens.pick(SaveStatus.values()).next(rs); - Infer.InvalidIfNot invalidIfNot = Gens.pick(Infer.InvalidIfNot.values()).next(rs); Ballot promised = AccordGens.ballot().next(rs); Routable.Domain domain = Gens.pick(Routable.Domain.values()).next(rs); Unseekables keysOrRanges; @@ -101,7 +99,7 @@ private static Gen foundKnownMap() default: throw new AssertionError("Unknown domain"); } - return FoundKnownMap.create(keysOrRanges, saveStatus, invalidIfNot, promised); + return KnownMap.create(keysOrRanges, saveStatus); }; } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 93d66eb1ec3e..6f41f0083023 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -39,6 +39,8 @@ import org.junit.Test; import accord.api.Key; +import accord.api.RoutingKey; +import accord.local.StoreParticipants; import accord.local.cfk.CommandsForKey; import accord.local.cfk.CommandsForKey.InternalStatus; import accord.local.Command; @@ -47,8 +49,8 @@ import accord.local.CommonAttributes; import accord.local.CommonAttributes.Mutable; import accord.local.Node; -import accord.local.SaveStatus; -import accord.local.Status; +import accord.primitives.SaveStatus; +import accord.primitives.Status; import accord.primitives.Ballot; import accord.primitives.KeyDeps; import accord.primitives.PartialDeps; @@ -66,27 +68,28 @@ import accord.utils.SortedArrays; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.SchemaLoader; -import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordTestUtils; -import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.txn.TxnData; import org.apache.cassandra.service.accord.txn.TxnWrite; import org.apache.cassandra.simulator.RandomSource.Choices; import org.apache.cassandra.utils.AccordGenerators; import org.apache.cassandra.utils.CassandraGenerators; -import static accord.local.Status.Durability.NotDurable; -import static accord.local.Status.KnownExecuteAt.ExecuteAtErased; -import static accord.local.Status.KnownExecuteAt.ExecuteAtUnknown; +import static accord.primitives.Status.Durability.NotDurable; +import static accord.primitives.Known.KnownExecuteAt.ExecuteAtErased; +import static accord.primitives.Known.KnownExecuteAt.ExecuteAtUnknown; import static accord.utils.Property.qt; import static accord.utils.SortedArrays.Search.FAST; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; +// TODO (required): test statusOverrides public class CommandsForKeySerializerTest { @BeforeClass @@ -125,14 +128,14 @@ CommonAttributes attributes() if (saveStatus.known.isDefinitionKnown()) mutable.partialTxn(txn); - mutable.route(txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null))); + mutable.setParticipants(StoreParticipants.all(txn.keys().toRoute(txn.keys().get(0).someIntersectingRoutingKey(null)))); mutable.durability(NotDurable); if (saveStatus.known.deps.hasProposedOrDecidedDeps()) { try (KeyDeps.Builder builder = KeyDeps.builder();) { for (TxnId id : deps) - builder.add((Key)txn.keys().get(0), id); + builder.add(((Key)txn.keys().get(0)).toUnseekable(), id); mutable.partialDeps(new PartialDeps(AccordTestUtils.fullRange(txn), builder.build(), RangeDeps.NONE, KeyDeps.NONE)); } } @@ -183,7 +186,7 @@ Command toCommand() else return Command.SerializerSupport.truncatedApply(attributes(), saveStatus, executeAt, new Writes(txnId, executeAt, txn.keys(), new TxnWrite(Collections.emptyList(), true)), new TxnData()); case Erased: - case ErasedOrInvalidOrVestigial: + case ErasedOrVestigial: case Invalidated: return Command.SerializerSupport.invalidated(txnId); } @@ -356,7 +359,7 @@ private static Function timestampSupplier(Se @Test public void serde() { - testOne(-8928257345122888710L); + testOne(3466420662549679178L); Random random = new Random(); for (int i = 0 ; i < 10000 ; ++i) { @@ -446,7 +449,7 @@ private static void testOne(long seed) } PartialTxn txn = createPartialTxn(0); - Key key = (Key) txn.keys().get(0); + RoutingKey key = ((Key) txn.keys().get(0)).toUnseekable(); ObjectGraph graph = generateObjectGraph(source.nextInt(0, 100), () -> txnIdSupplier.apply(null), saveStatusSupplier, ignore -> txn, executeAtSupplier, ballotSupplier, missingCountSupplier, source); List commands = graph.toCommands(); CommandsForKey cfk = new CommandsForKey(key); @@ -471,7 +474,7 @@ private static void testOne(long seed) if (expectStatus == null) expectStatus = InternalStatus.TRANSITIVELY_KNOWN; if (expectStatus.hasExecuteAtOrDeps) Assert.assertEquals(cmd.executeAt, info.executeAt); - Assert.assertEquals(expectStatus, info.status); + Assert.assertEquals(expectStatus, info.status()); Assert.assertArrayEquals(cmd.missing.toArray(TxnId[]::new), info.missing()); if (expectStatus.hasBallot) Assert.assertEquals(cmd.ballot, info.ballot()); @@ -495,7 +498,7 @@ public void test() var txnIdGen = AccordGens.txnIds(rs -> rs.nextLong(0, 100), rs -> rs.nextLong(100), rs -> rs.nextInt(10)); qt().check(rs -> { TableId table = tableGen.next(rs); - PartitionKey pk = new PartitionKey(table, Murmur3Partitioner.instance.decorateKey(Murmur3Partitioner.LongToken.keyForToken(rs.nextLong()))); + TokenKey pk = new TokenKey(table, new Murmur3Partitioner.LongToken(rs.nextLong())); var redudentBefore = txnIdGen.next(rs); TxnId[] ids = Gens.arrays(TxnId.class, rs0 -> { TxnId next = txnIdGen.next(rs0); @@ -508,7 +511,7 @@ public void test() for (int i = 0; i < info.length; i++) { InternalStatus status = rs.pick(InternalStatus.values()); - info[i] = TxnInfo.create(ids[i], status, ids[i], TxnId.NO_TXNIDS, Ballot.ZERO); + info[i] = TxnInfo.create(ids[i], status, true, ids[i], TxnId.NO_TXNIDS, Ballot.ZERO); } Gen pendingGen = Gens.enums().allMixedDistribution(Unmanaged.Pending.class).next(rs); @@ -527,7 +530,7 @@ public void test() { int idx = Arrays.binarySearch(ids, u.txnId); if (idx < 0) - missing.add(TxnInfo.create(u.txnId, InternalStatus.TRANSITIVELY_KNOWN)); + missing.add(TxnInfo.create(u.txnId, InternalStatus.TRANSITIVELY_KNOWN, true, u.txnId, Ballot.ZERO)); } if (!missing.isEmpty()) { @@ -549,11 +552,11 @@ public void test() public void thereAndBackAgain() { long tokenValue = -2311778975040348869L; - DecoratedKey key = Murmur3Partitioner.instance.decorateKey(Murmur3Partitioner.LongToken.keyForToken(tokenValue)); - PartitionKey pk = new PartitionKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), key); + Token token = new Murmur3Partitioner.LongToken(tokenValue); + TokenKey pk = new TokenKey(TableId.fromString("1b255f4d-ef25-40a6-0000-000000000009"), token); TxnId txnId = TxnId.fromValues(11,34052499,2,1); CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, - new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, txnId, TxnId.NO_TXNIDS, Ballot.ZERO) }, + new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, true, txnId, TxnId.NO_TXNIDS, Ballot.ZERO) }, CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE, TxnId.NONE); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); diff --git a/test/unit/org/apache/cassandra/utils/AccordGenerators.java b/test/unit/org/apache/cassandra/utils/AccordGenerators.java index cf23494d8463..28ae575fb27b 100644 --- a/test/unit/org/apache/cassandra/utils/AccordGenerators.java +++ b/test/unit/org/apache/cassandra/utils/AccordGenerators.java @@ -21,16 +21,23 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; +import java.util.NavigableMap; import java.util.Set; import java.util.function.BiFunction; import java.util.stream.Stream; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSortedMap; + import accord.local.Command; import accord.local.CommonAttributes; +import accord.local.DurableBefore; import accord.local.RedundantBefore; -import accord.local.SaveStatus; +import accord.local.StoreParticipants; +import accord.primitives.SaveStatus; import accord.primitives.Ballot; import accord.primitives.Deps; import accord.primitives.FullRoute; @@ -50,6 +57,7 @@ import accord.utils.Gen; import accord.utils.Gens; import accord.utils.RandomSource; +import accord.utils.ReducingRangeMap; import accord.utils.TriFunction; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.dht.AccordSplitter; @@ -64,7 +72,8 @@ import org.apache.cassandra.service.accord.txn.TxnWrite; import org.quicktheories.impl.JavaRandom; -import static accord.local.Status.Durability.NotDurable; +import static accord.local.CommandStores.RangesForEpoch; +import static accord.primitives.Status.Durability.NotDurable; import static org.apache.cassandra.service.accord.AccordTestUtils.TABLE_ID1; import static org.apache.cassandra.service.accord.AccordTestUtils.createPartialTxn; @@ -210,7 +219,7 @@ private CommonAttributes attributes(SaveStatus saveStatus) if (saveStatus.known.deps.hasProposedOrDecidedDeps()) mutable.partialDeps(partialDeps); - mutable.route(route); + mutable.setParticipants(StoreParticipants.all(route)); mutable.durability(NotDurable); return mutable; @@ -261,7 +270,7 @@ public Command build(SaveStatus saveStatus) else return Command.SerializerSupport.truncatedApply(attributes(saveStatus), saveStatus, executeAt, new Writes(txnId, executeAt, keysOrRanges, new TxnWrite(Collections.emptyList(), true)), new TxnData()); case Erased: - case ErasedOrInvalidOrVestigial: + case ErasedOrVestigial: case Invalidated: return Command.SerializerSupport.invalidated(txnId); } @@ -285,6 +294,18 @@ public static Gen keys(Gen tableIdGen, Gen return rs -> new PartitionKey(tableIdGen.next(rs), key.next(rs)); } + public static Gen routingKeys() + { + return routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), + fromQT(CassandraGenerators.token())); + } + + public static Gen routingKeys(IPartitioner partitioner) + { + return routingKeyGen(fromQT(CassandraGenerators.TABLE_ID_GEN), + fromQT(CassandraGenerators.token(partitioner))); + } + public static Gen routingKeyGen(Gen tableIdGen, Gen tokenGen) { return routingKeyGen(tableIdGen, Gens.enums().all(AccordRoutingKey.RoutingKeyKind.class), tokenGen); @@ -382,22 +403,22 @@ public static Gen rangesArbitrary(IPartitioner partitioner) public static Gen keyDepsGen() { - return AccordGens.keyDeps(AccordGenerators.keys()); + return AccordGens.keyDeps(AccordGenerators.routingKeys()); } public static Gen keyDepsGen(IPartitioner partitioner) { - return AccordGens.keyDeps(AccordGenerators.keys(partitioner)); + return AccordGens.keyDeps(AccordGenerators.routingKeys(partitioner)); } public static Gen directKeyDepsGen() { - return AccordGens.directKeyDeps(AccordGenerators.keys()); + return AccordGens.directKeyDeps(AccordGenerators.routingKeys()); } public static Gen directKeyDepsGen(IPartitioner partitioner) { - return AccordGens.directKeyDeps(AccordGenerators.keys(partitioner)); + return AccordGens.directKeyDeps(AccordGenerators.routingKeys(partitioner)); } public static Gen rangeDepsGen() @@ -430,14 +451,17 @@ public static Gen redundantBeforeEntry(Gen empty return rs -> { Range range = rangeGen.next(rs); TxnId locallyAppliedOrInvalidatedBefore = emptyGen.next(rs) ? TxnId.NONE : txnIdGen.next(rs); // emptyable or range + TxnId locallyDecidedAndAppliedOrInvalidatedBefore = locallyAppliedOrInvalidatedBefore; TxnId shardAppliedOrInvalidatedBefore = emptyGen.next(rs) ? TxnId.NONE : txnIdGen.next(rs); // emptyable or range + TxnId shardOnlyAppliedOrInvalidatedBefore = shardAppliedOrInvalidatedBefore; + TxnId gcBefore = shardAppliedOrInvalidatedBefore; TxnId bootstrappedAt = txnIdGen.next(rs); Timestamp staleUntilAtLeast = emptyGen.next(rs) ? null : txnIdGen.next(rs); // nullable long maxEpoch = Stream.of(locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast).filter(t -> t != null).mapToLong(Timestamp::epoch).max().getAsLong(); long startEpoch = rs.nextLong(maxEpoch); long endEpoch = emptyGen.next(rs) ? Long.MAX_VALUE : 1 + rs.nextLong(startEpoch, Long.MAX_VALUE); - return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, bootstrappedAt, staleUntilAtLeast); + return new RedundantBefore.Entry(range, startEpoch, endEpoch, locallyAppliedOrInvalidatedBefore, locallyDecidedAndAppliedOrInvalidatedBefore, shardAppliedOrInvalidatedBefore, shardOnlyAppliedOrInvalidatedBefore, gcBefore, bootstrappedAt, staleUntilAtLeast); }; } @@ -449,6 +473,65 @@ public static Gen redundantBefore(IPartitioner partitioner) return AccordGens.redundantBefore(rangeGen, entryGen); } + public static Gen durableBeforeGen(IPartitioner partitioner) + { + Gen rangeGen = rangesArbitrary(partitioner); + Gen txnIdGen = AccordGens.txnIds(Gens.pick(Txn.Kind.SyncPoint, Txn.Kind.ExclusiveSyncPoint), ignore -> Routable.Domain.Range); + + return (rs) -> { + Ranges ranges = rangeGen.next(rs); + TxnId majority = txnIdGen.next(rs); + TxnId universal = majority; + return DurableBefore.create(ranges, majority, universal); + }; + } + + public static Gen> rejectBeforeGen(IPartitioner partitioner) + { + Gen rangeGen = rangesArbitrary(partitioner); + Gen timestampGen = AccordGens.timestamps(); + + return (rs) -> { + ReducingRangeMap initial = new ReducingRangeMap<>(); + int size = rs.nextInt(10); + for (int i = 0; i < size; i++) + initial = ReducingRangeMap.add(initial, rangeGen.next(rs), timestampGen.next(rs)); + + return initial; + }; + } + + public static Gen> safeToReadGen(IPartitioner partitioner) + { + Gen rangeGen = ranges(partitioner); + Gen timestampGen = AccordGens.timestamps(); + + return (rs) -> { + ImmutableMap.Builder initial = new ImmutableSortedMap.Builder<>(Comparator.comparing(o -> o)); + int size = rs.nextInt(10); + for (int i = 0; i < size; i++) + initial.put(timestampGen.next(rs), rangeGen.next(rs)); + + return (NavigableMap) initial.build(); + }; + } + + public static Gen rangesForEpoch(IPartitioner partitioner) + { + Gen rangesGen = ranges(partitioner); + + return rs -> { + int size = rs.nextInt(1, 5); + long[] epochs = new long[size]; + for (int i = 0; i < size; i++) + epochs[i] = rs.nextLong(1, 10_000); + Ranges[] ranges = new Ranges[size]; + for (int i = 0; i < size; i++) + ranges[i] = rangesGen.next(rs); + return new RangesForEpoch.Snapshot(epochs, ranges); + }; + } + public static Gen fromQT(org.quicktheories.core.Gen qt) { return rs -> { From 562894dcb1a9e5d1fca922c1297e9453553a1b15 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Mon, 30 Sep 2024 18:18:45 +0100 Subject: [PATCH 153/340] improve AccordLoadTest to support more keys --- modules/accord | 2 +- .../test/accord/AccordLoadTest.java | 22 ++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/modules/accord b/modules/accord index 2a7aceb96cb1..b0ad8e8cfbcc 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 2a7aceb96cb1e03bcfe150403b9d245b1d2562f9 +Subproject commit b0ad8e8cfbcc35fc8152850760cd1f34f06836e7 diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index c5aa059a4cf4..8478a060d760 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.BitSet; import java.util.Comparator; import java.util.Date; import java.util.List; @@ -82,11 +83,10 @@ public boolean matches(int i, int i1, IMessage iMessage) final int batchSize = 1000; final int concurrency = 100; final int ratePerSecond = 1000; - final int keyCount = 10; + final int keyCount = 100000; final float readChance = 0.33f; long nextRepairAt = repairInterval; - for (int i = 1; i <= keyCount; i++) - coordinator.execute("INSERT INTO " + qualifiedAccordTableName + " (k, v) VALUES (0, 0) USING TIMESTAMP 0;", ConsistencyLevel.ALL, i); + final BitSet initialised = new BitSet(); Random random = new Random(); // CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); @@ -103,21 +103,31 @@ public boolean matches(int i, int i1, IMessage iMessage) inFlight.acquire(); rateLimiter.acquire(); long commandStart = System.nanoTime(); + int k = random.nextInt(keyCount); if (random.nextFloat() < readChance) { coordinator.executeWithResult((success, fail) -> { inFlight.release(); if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); // else exceptions.add(fail); - }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, random.nextInt(keyCount)); + }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, k); } - else + else if (initialised.get(i)) { coordinator.executeWithResult((success, fail) -> { inFlight.release(); if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); // else exceptions.add(fail); - }, "UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, random.nextInt(keyCount)); + }, "UPDATE " + qualifiedAccordTableName + " SET v += 1 WHERE k = ? IF EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); + } + else + { + initialised.set(i); + coordinator.executeWithResult((success, fail) -> { + inFlight.release(); + if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); + // else exceptions.add(fail); + }, "UPDATE " + qualifiedAccordTableName + " SET v = 0 WHERE k = ? IF NOT EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); } } From d0a3586bd9b68145180de13e8734ec2bb2d1f852 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 30 Sep 2024 15:30:28 +0200 Subject: [PATCH 154/340] Add purging to Accord Journal table Patch by Alex Petrov; reviewed by Aleksey Yeshchenko and Benedict Elliott Smith for CASSANDRA-19877 --- .../db/compaction/CompactionIterator.java | 178 +++++++++++++++++- .../service/accord/AccordJournal.java | 8 +- .../service/accord/AccordKeyspace.java | 2 +- .../service/accord/AccordService.java | 77 ++++---- .../service/accord/IAccordService.java | 3 + .../cassandra/service/accord/JournalKey.java | 6 +- .../service/accord/SavedCommand.java | 74 ++++++++ .../accord/AccordJournalCompactionTest.java | 29 +-- 8 files changed, 323 insertions(+), 54 deletions(-) diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index f9b4b9e8ce06..ab072f68681d 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.db.compaction; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; @@ -32,6 +33,7 @@ import com.google.common.collect.Ordering; import accord.local.Cleanup; +import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; import accord.local.DurableBefore; import accord.local.RedundantBefore; @@ -43,6 +45,7 @@ import accord.primitives.TxnId; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.AbstractCompactionController; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Columns; @@ -53,6 +56,8 @@ import org.apache.cassandra.db.RegularAndStaticColumns; import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.partitions.PurgeFunction; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterators; @@ -73,12 +78,18 @@ import org.apache.cassandra.index.transactions.IndexTransaction; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.util.DataInputBuffer; +import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.metrics.TopPartitionTracker; +import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.CompactionParams.TombstoneOption; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers; +import org.apache.cassandra.service.accord.AccordJournalValueSerializers.FlyweightSerializer; import org.apache.cassandra.service.accord.AccordKeyspace; import org.apache.cassandra.service.accord.AccordKeyspace.CommandRows; import org.apache.cassandra.service.accord.AccordKeyspace.CommandsColumns; @@ -86,12 +97,15 @@ import org.apache.cassandra.service.accord.AccordKeyspace.TimestampsForKeyRows; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.IAccordService; +import org.apache.cassandra.service.accord.JournalKey; +import org.apache.cassandra.service.accord.SavedCommand; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.paxos.PaxosRepairHistory; import org.apache.cassandra.service.paxos.uncommitted.PaxosRows; import org.apache.cassandra.utils.TimeUUID; import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; +import static accord.local.Cleanup.shouldCleanup; import static accord.local.Cleanup.shouldCleanupPartial; import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; @@ -222,7 +236,8 @@ private Transformation purger(ColumnFamilyStore cfs, Supp return new AccordCommandsPurger(accordService); if (isAccordTimestampsForKey(cfs)) return new AccordTimestampsForKeyPurger(accordService); - + if (isAccordJournal(cfs)) + return new AccordJournalPurger(accordService); if (isAccordCommandsForKey(cfs)) return new AccordCommandsForKeyPurger(AccordKeyspace.CommandsForKeysAccessor, accordService); @@ -990,6 +1005,159 @@ protected Row applyToStatic(Row row) } } + class AccordJournalPurger extends AbstractPurger + { + final Int2ObjectHashMap redundantBefores; + final Int2ObjectHashMap ranges; + final DurableBefore durableBefore; + final ColumnMetadata recordColumn; + final ColumnMetadata versionColumn; + final KeySupport keySupport = JournalKey.SUPPORT; + final AccordService service; + + JournalKey key = null; + Object builder = null; + FlyweightSerializer serializer = null; + Object[] lastClustering = null; + long maxSeenTimestamp = -1; + final int userVersion; + + public AccordJournalPurger(Supplier serviceSupplier) + { + service = (AccordService) serviceSupplier.get(); + // TODO: test serialization version logic + userVersion = service.journalConfiguration().userVersion(); + IAccordService.CompactionInfo compactionInfo = service.getCompactionInfo(); + + this.redundantBefores = compactionInfo.redundantBefores; + this.ranges = compactionInfo.ranges; + this.durableBefore = compactionInfo.durableBefore; + ColumnFamilyStore cfs = Keyspace.open(AccordKeyspace.metadata().name).getColumnFamilyStore(AccordKeyspace.JOURNAL); + this.recordColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("record", false)); + this.versionColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("user_version", false)); + } + + @SuppressWarnings("unchecked") + @Override + protected void beginPartition(UnfilteredRowIterator partition) + { + key = keySupport.deserialize(partition.partitionKey().getKey(), 0, userVersion); + serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; + builder = serializer.mergerFor(key); + maxSeenTimestamp = -1; + } + + @Override + protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition) + { + beginPartition(partition); + + if (partition.isEmpty()) + return null; + + try + { + PartitionUpdate.SimpleBuilder newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); + + while (partition.hasNext()) + applyToRow((Row) partition.next()); + + if (key.type != JournalKey.Type.COMMAND_DIFF) + { + try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) + { + serializer.reserialize(key, builder, out, userVersion); + newVersion.row(lastClustering) + .add("record", out.asNewBuffer()) + .add("user_version", userVersion); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + + return newVersion.build().unfilteredIterator(); + } + + SavedCommand.Builder commandBuilder = (SavedCommand.Builder) builder; + + // Do not have txnId in selected SSTables; remove + if (commandBuilder.txnId() == null) + return newVersion.build().unfilteredIterator(); + + RedundantBefore redundantBefore = redundantBefores.get(key.commandStoreId); + + Cleanup cleanup = shouldCleanup(commandBuilder.txnId(), commandBuilder.saveStatus(), + commandBuilder.durability(), commandBuilder.participants(), + redundantBefore, durableBefore); + switch (cleanup) + { + case EXPUNGE: + return null; + + case EXPUNGE_PARTIAL: + newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); + commandBuilder = commandBuilder.expungePartial(); + + newVersion.row(lastClustering) + .add(recordColumn.name.toString(), commandBuilder.asByteBuffer(userVersion)); + + return newVersion.build().unfilteredIterator(); + + case ERASE: + return PartitionUpdate.fullPartitionDelete(metadata(), partition.partitionKey(), maxSeenTimestamp, nowInSec).unfilteredIterator(); + + case VESTIGIAL: + case INVALIDATE: + case TRUNCATE_WITH_OUTCOME: + case TRUNCATE: + newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); + commandBuilder = commandBuilder.saveStatusOnly(); + + newVersion.row(lastClustering) + .add(recordColumn.name.toString(), commandBuilder.asByteBuffer(userVersion)); + + return newVersion.build().unfilteredIterator(); + + case NO: + return newVersion.build().unfilteredIterator(); + default: + throw new IllegalStateException("Unknown cleanup: " + cleanup);} + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Override + protected Row applyToRow(Row row) + { + updateProgress(); + maxSeenTimestamp = row.primaryKeyLivenessInfo().timestamp(); + ByteBuffer record = row.getCell(recordColumn).buffer(); + try (DataInputBuffer in = new DataInputBuffer(record, false)) + { + int userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); + serializer.deserialize(key, builder, in, userVersion); + lastClustering = row.clustering().getBufferArray(); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + return null; + } + + @Override + protected Row applyToStatic(Row row) + { + checkState(row.isStatic() && row.isEmpty()); + return row; + } + } + + private static class AbortableUnfilteredPartitionTransformation extends Transformation { private final AbortableUnfilteredRowTransformation abortableIter; @@ -1035,6 +1203,7 @@ private static boolean requiresAccordSpecificPurger(ColumnFamilyStore cfs) return cfs.getKeyspaceName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME) && ImmutableSet.of(AccordKeyspace.COMMANDS, AccordKeyspace.TIMESTAMPS_FOR_KEY, + AccordKeyspace.JOURNAL, AccordKeyspace.COMMANDS_FOR_KEY) .contains(cfs.getTableName()); } @@ -1044,6 +1213,11 @@ private static boolean isAccordTable(ColumnFamilyStore cfs, String name) return cfs.name.equals(name) && cfs.getKeyspaceName().equals(SchemaConstants.ACCORD_KEYSPACE_NAME); } + private static boolean isAccordJournal(ColumnFamilyStore cfs) + { + return isAccordTable(cfs, AccordKeyspace.JOURNAL); + } + private static boolean isAccordCommands(ColumnFamilyStore cfs) { return isAccordTable(cfs, AccordKeyspace.COMMANDS); @@ -1058,4 +1232,4 @@ private static boolean isAccordCommandsForKey(ColumnFamilyStore cfs) { return isAccordTable(cfs, AccordKeyspace.COMMANDS_FOR_KEY); } -} \ No newline at end of file +} diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index f497b591b260..52b65a33a2f3 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -82,7 +82,7 @@ public class AccordJournal implements IJournal, Shutdownable private final Journal journal; private final AccordJournalTable journalTable; - + private final Params params; Node node; enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } @@ -110,6 +110,7 @@ public Object deserialize(JournalKey key, DataInputPlus in, int userVersion) }, new AccordSegmentCompactor<>(JournalKey.SUPPORT, params.userVersion())); this.journalTable = new AccordJournalTable<>(journal, JournalKey.SUPPORT, params.userVersion()); + this.params = params; } public AccordJournal start(Node node) @@ -122,6 +123,11 @@ public AccordJournal start(Node node) return this; } + public Params configuration() + { + return params; + } + @Override public boolean isTerminated() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 104096930a9f..f4c2e65d0a56 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -235,7 +235,7 @@ static TokenType valueOf(Token token) + "user_version int," + "record blob," + "PRIMARY KEY(key, descriptor, offset)" - + ')') + + ") WITH compression = {'class':'NoopCompressor'};") .partitioner(new LocalPartitioner(BytesType.instance)) .build(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index b2f22b2ad140..e347f5f474de 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -21,8 +21,8 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -45,54 +45,35 @@ import com.google.common.base.Throwables; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; - -import accord.coordinate.Barrier; -import accord.coordinate.CoordinateSyncPoint; -import accord.coordinate.Exhausted; -import accord.coordinate.FailureAccumulator; -import accord.coordinate.Invalidated; -import accord.coordinate.TopologyMismatch; -import accord.impl.CoordinateDurabilityScheduling; -import accord.local.Command; -import accord.local.PreLoadContext; -import accord.primitives.Ranges; -import accord.primitives.SyncPoint; -import accord.topology.Topology; -import org.apache.cassandra.config.CassandraRelevantProperties; -import org.apache.cassandra.cql3.statements.RequestValidations; -import org.apache.cassandra.exceptions.RequestExecutionException; -import org.apache.cassandra.repair.SharedContext; -import org.apache.cassandra.schema.KeyspaceMetadata; -import org.apache.cassandra.schema.TableMetadata; -import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; -import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; -import org.apache.cassandra.tcm.ClusterMetadata; -import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; -import org.apache.cassandra.tcm.ClusterMetadataService; -import org.apache.cassandra.tcm.ownership.DataPlacement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.BarrierType; import accord.api.LocalConfig; import accord.api.Result; +import accord.api.RoutingKey; +import accord.coordinate.Barrier; import accord.coordinate.Barrier.AsyncSyncPoint; +import accord.coordinate.CoordinateSyncPoint; import accord.coordinate.CoordinationAdapter.Adapters.SyncPointAdapter; import accord.coordinate.CoordinationFailed; +import accord.coordinate.ExecuteSyncPoint; +import accord.coordinate.Exhausted; +import accord.coordinate.FailureAccumulator; +import accord.coordinate.Invalidated; import accord.coordinate.Preempted; import accord.coordinate.Timeout; -import accord.api.RoutingKey; -import accord.coordinate.ExecuteSyncPoint; +import accord.coordinate.TopologyMismatch; import accord.coordinate.tracking.AllTracker; import accord.coordinate.tracking.RequestStatus; import accord.impl.AbstractConfigurationService; +import accord.impl.CoordinateDurabilityScheduling; import accord.impl.DefaultLocalListeners; import accord.impl.DefaultRemoteListeners; import accord.impl.DefaultRequestTimeouts; import accord.impl.SizeOfIntersectionSorter; import accord.impl.progresslog.DefaultProgressLogs; +import accord.local.Command; import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; @@ -101,6 +82,7 @@ import accord.local.Node; import accord.local.Node.Id; import accord.local.NodeTimeService; +import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.ShardDistributor.EvenSplit; import accord.local.cfk.CommandsForKey; @@ -108,18 +90,21 @@ import accord.messages.ReadData; import accord.messages.Request; import accord.messages.WaitUntilApplied; -import accord.primitives.Keys; -import accord.primitives.Seekable; -import accord.primitives.Seekables; import accord.primitives.FullRoute; +import accord.primitives.Keys; +import accord.primitives.Ranges; import accord.primitives.RoutingKeys; import accord.primitives.SaveStatus; +import accord.primitives.Seekable; +import accord.primitives.Seekables; import accord.primitives.Status; +import accord.primitives.SyncPoint; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.Txn.Kind; import accord.primitives.TxnId; import accord.topology.Topologies; +import accord.topology.Topology; import accord.topology.TopologyManager; import accord.utils.DefaultRandom; import accord.utils.Invariants; @@ -129,21 +114,30 @@ import accord.utils.async.AsyncResults; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.concurrent.Shutdownable; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.statements.RequestValidations; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.WriteType; import org.apache.cassandra.dht.AccordSplitter; import org.apache.cassandra.exceptions.ReadTimeoutException; +import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.exceptions.RequestTimeoutException; import org.apache.cassandra.exceptions.WriteTimeoutException; +import org.apache.cassandra.journal.Params; +import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.metrics.AccordClientRequestMetrics; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; import org.apache.cassandra.net.MessageDelivery; import org.apache.cassandra.net.MessagingService; +import org.apache.cassandra.repair.SharedContext; +import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordSyncPropagator.Notification; import org.apache.cassandra.service.accord.api.AccordAgent; import org.apache.cassandra.service.accord.api.AccordRoutingKey.KeyspaceSplitter; @@ -152,13 +146,19 @@ import org.apache.cassandra.service.accord.api.AccordTopologySorter; import org.apache.cassandra.service.accord.api.CompositeTopologySorter; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.service.accord.exceptions.ReadExhaustedException; import org.apache.cassandra.service.accord.exceptions.ReadPreemptedException; import org.apache.cassandra.service.accord.exceptions.WritePreemptedException; +import org.apache.cassandra.service.accord.interop.AccordInteropAdapter.AccordInteropFactory; +import org.apache.cassandra.service.accord.repair.RepairSyncPointAdapter; import org.apache.cassandra.service.accord.txn.TxnResult; import org.apache.cassandra.service.consensus.TransactionalMode; import org.apache.cassandra.service.consensus.migration.TableMigrationState; +import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.utils.Blocking; @@ -319,6 +319,12 @@ public void tryMarkRemoved(Topology topology, Id node) { } + + @Override + public Params journalConfiguration() + { + throw new UnsupportedOperationException("Cannot return configuration when accord.enabled = false in cassandra.yaml"); + } }; private static volatile IAccordService instance = null; @@ -1303,6 +1309,11 @@ public void awaitTableDrop(TableId id) } } + public Params journalConfiguration() + { + return journal.configuration(); + } + private AsyncChain awaitTableDrop(ColumnFamilyStore cfs, TokenRange range, BigInteger targetSplitSize) { List splits = split(cfs, range, targetSplitSize); diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index c1c7651a80ff..1be920bc1bde 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -46,6 +46,7 @@ import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.journal.Params; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.net.IVerbHandler; import org.apache.cassandra.net.Message; @@ -163,4 +164,6 @@ default void awaitTableDrop(TableId id) { } + + Params journalConfiguration(); } diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index 97c2e2e0ba1b..d3775869da98 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -46,7 +46,7 @@ public final class JournalKey { - final Type type; + public final Type type; public final Timestamp timestamp; public final int commandStoreId; @@ -261,8 +261,8 @@ public enum Type HISTORICAL_TRANSACTIONS (6, new HistoricalTransactionsSerializer()) ; - final int id; - final FlyweightSerializer serializer; + public final int id; + public final FlyweightSerializer serializer; Type(int id, FlyweightSerializer serializer) { diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index a22467a9f372..75348c0e1533 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -38,6 +38,7 @@ import accord.primitives.TxnId; import accord.primitives.Writes; import org.apache.cassandra.io.util.DataInputPlus; +import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.journal.Journal; import org.apache.cassandra.service.accord.serializers.CommandSerializers; @@ -114,6 +115,16 @@ public TxnId key() } } + + public static ByteBuffer asSerializedDiff(Command after, int userVersion) throws IOException + { + try (DataOutputBuffer out = new DataOutputBuffer()) + { + diff(null, after).write(out, userVersion); + return out.asNewBuffer(); + } + } + @Nullable public static DiffWriter diff(Command original, Command current) { @@ -397,6 +408,69 @@ public int count() return count; } + public Builder expungePartial() + { + Builder builder = new Builder(); + + builder.count++; + builder.nextCalled = true; + + // TODO: these accesses can be abstracted away + if (txnId != null) + { + builder.flags = setFieldChanged(Fields.TXN_ID, builder.flags); + builder.txnId = txnId; + } + if (executeAt != null) + { + builder.flags = setFieldChanged(Fields.EXECUTE_AT, builder.flags); + builder.executeAt = executeAt; + } + if (durability != null) + { + builder.flags = setFieldChanged(Fields.DURABILITY, builder.flags); + builder.durability = durability; + } + if (participants != null) + { + builder.flags = setFieldChanged(Fields.PARTICIPANTS, builder.flags); + builder.participants = participants; + } + + return builder; + } + + public Builder saveStatusOnly() + { + Builder builder = new Builder(); + + builder.count++; + builder.nextCalled = true; + + // TODO: these accesses can be abstracted away + if (txnId != null) + { + builder.flags = setFieldChanged(Fields.TXN_ID, builder.flags); + builder.txnId = txnId; + } + if (saveStatus != null) + { + builder.flags = setFieldChanged(Fields.SAVE_STATUS, builder.flags); + builder.saveStatus = saveStatus; + } + + return builder; + } + + public ByteBuffer asByteBuffer(int userVersion) throws IOException + { + try (DataOutputBuffer out = new DataOutputBuffer()) + { + serialize(out, userVersion); + return out.asNewBuffer(); + } + } + public void serialize(DataOutputPlus out, int userVersion) throws IOException { out.writeInt(flags); diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index 545d9c8b9f32..e702d3bac38b 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -41,14 +41,15 @@ import accord.utils.RandomSource; import org.apache.cassandra.ServerTestUtils; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.util.File; import org.apache.cassandra.journal.TestParams; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.utils.AccordGenerators; -import org.apache.cassandra.utils.concurrent.Condition; import static accord.local.CommandStores.RangesForEpoch; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; @@ -82,6 +83,9 @@ public void beforeTest() throws Throwable @Test public void segmentMergeTest() throws InterruptedException { + ColumnFamilyStore cfs = Keyspace.open(SchemaConstants.ACCORD_KEYSPACE_NAME).getColumnFamilyStore(AccordKeyspace.JOURNAL); + cfs.disableAutoCompaction(); + RedundantBeforeAccumulator redundantBeforeAccumulator = new RedundantBeforeAccumulator(); DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); @@ -133,21 +137,18 @@ public boolean enableCompaction() RandomSource rs = new DefaultRandom(); int count = 1_000; - Condition condition = Condition.newOneTimeCondition(); for (int i = 0; i <= count; i++) { timestamp = timestamp.next(); AccordSafeCommandStore.FieldUpdates updates = new AccordSafeCommandStore.FieldUpdates(); updates.durableBefore = durableBeforeGen.next(rs); - updates.redundantBefore = redundantBeforeGen.next(rs); + // TODO: improve redundant before generator and re-enable +// updates.redundantBefore = redundantBeforeGen.next(rs); updates.safeToRead = safeToReadGen.next(rs); updates.rangesForEpoch = rangesForEpochGen.next(rs); updates.historicalTransactions = historicalTransactionsGen.next(rs); - if (i == count) - journal.persistStoreState(1, updates, condition::signal); - else - journal.persistStoreState(1, updates, null); + journal.persistStoreState(1, updates, null); redundantBeforeAccumulator.update(updates.redundantBefore); durableBeforeAccumulator.update(updates.durableBefore); @@ -156,14 +157,14 @@ public boolean enableCompaction() safeToReadAccumulator.update(updates.safeToRead); rangesForEpochAccumulator.update(updates.rangesForEpoch); historicalTransactionsAccumulator.update(updates.historicalTransactions); - } - condition.await(); - - journal.closeCurrentSegmentForTesting(); - journal.runCompactorForTesting(); + if (i % 100 == 0) + journal.closeCurrentSegmentForTesting(); + if (i % 200 == 0) + journal.runCompactorForTesting(); + } - Assert.assertEquals(redundantBeforeAccumulator.get(), journal.loadRedundantBefore(1)); +// Assert.assertEquals(redundantBeforeAccumulator.get(), journal.loadRedundantBefore(1)); Assert.assertEquals(durableBeforeAccumulator.get(), journal.loadDurableBefore(1)); Assert.assertEquals(bootstrapBeganAtAccumulator.get(), journal.loadBootstrapBeganAt(1)); Assert.assertEquals(safeToReadAccumulator.get(), journal.loadSafeToRead(1)); @@ -183,4 +184,4 @@ public static Gen depsGen() (rs) -> Deps.NONE.rangeDeps, (rs) -> Deps.NONE.directKeyDeps); } -} +} \ No newline at end of file From d034661ae2e4e51dc2a8e9856d77c751d4d33e13 Mon Sep 17 00:00:00 2001 From: David Capwell Date: Mon, 30 Sep 2024 11:41:04 -0700 Subject: [PATCH 155/340] Support Restart node in Accord patch by David Capwell; reviewed by Alex Petrov for CASSANDRA-19969 --- .../accord/AccordConfigurationService.java | 34 +++++--- .../service/accord/AccordKeyspace.java | 42 ++++----- .../service/accord/AccordService.java | 16 ++-- .../org/apache/cassandra/tcm/Processor.java | 3 + .../distributed/test/PaxosRepair2Test.java | 1 + .../accord/AccordJournalIntegrationTest.java | 62 ++++++++++--- .../test/log/BootWithMetadataTest.java | 4 + .../test/log/CoordinatorPathTestBase.java | 4 +- .../fuzz/topology/TopologyMixupTestBase.java | 87 ++++++++++++++----- .../AccordConfigurationServiceTest.java | 4 +- 10 files changed, 172 insertions(+), 85 deletions(-) diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index 09a04140c9e2..f52d7a935ed0 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -22,6 +22,7 @@ import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -230,9 +231,19 @@ public synchronized void start(Consumer callback) state = State.LOADING; EndpointMapping snapshot = mapping; //TODO (restart): if there are topologies loaded then there is likely failures if reporting is needed, as mapping is not setup yet - diskState = diskStateManager.loadTopologies(((epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { - if (topology != null) - reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); + AtomicReference previousRef = new AtomicReference<>(null); + diskState = diskStateManager.loadTopologies(((epoch, metadata, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant) -> { + updateMapping(metadata); + reportTopology(topology, syncStatus == SyncStatus.NOT_STARTED); + Topology previous = previousRef.get(); + if (previous != null) + { + // for all nodes removed, or pending removal, mark them as removed so we don't wait on their replies + Sets.SetView removedNodes = Sets.difference(previous.nodes(), topology.nodes()); + if (!removedNodes.isEmpty()) + onNodesRemoved(topology.epoch(), currentTopology(), removedNodes); + } + previousRef.set(topology); getOrCreateEpochState(epoch).setSyncStatus(syncStatus); if (syncStatus == SyncStatus.NOTIFYING) @@ -331,14 +342,7 @@ synchronized void reportMetadataInternal(ClusterMetadata metadata) // for all nodes removed, or pending removal, mark them as removed so we don't wait on their replies Sets.SetView removedNodes = Sets.difference(current.nodes(), topology.nodes()); if (!removedNodes.isEmpty()) - { - onNodesRemoved(topology.epoch(), removedNodes); - for (Node.Id node : removedNodes) - { - if (shareShard(current, node, localId)) - AccordService.instance().tryMarkRemoved(current, node); - } - } + onNodesRemoved(topology.epoch(), current, removedNodes); } private static boolean shareShard(Topology current, Node.Id target, Node.Id self) @@ -351,7 +355,7 @@ private static boolean shareShard(Topology current, Node.Id target, Node.Id self return false; } - public synchronized void onNodesRemoved(long epoch, Set removed) + public synchronized void onNodesRemoved(long epoch, Topology current, Set removed) { if (removed.isEmpty()) return; syncPropagator.onNodesRemoved(removed); @@ -361,6 +365,12 @@ public synchronized void onNodesRemoved(long epoch, Set removed) receiveRemoteSyncCompletePreListenerNotify(node, oldEpoch); } listeners.forEach(l -> l.onRemoveNodes(epoch, removed)); + + for (Node.Id node : removed) + { + if (shareShard(current, node, localId)) + AccordService.instance().tryMarkRemoved(current, node); + } } private long[] nonCompletedEpochsBefore(long max) diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index f4c2e65d0a56..5190d723d2bf 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -37,6 +37,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import org.apache.cassandra.tcm.ClusterMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -137,7 +138,6 @@ import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; -import org.apache.cassandra.service.accord.serializers.TopologySerializers; import org.apache.cassandra.utils.Clock.Global; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.btree.BTree; @@ -264,7 +264,6 @@ static TokenType valueOf(Token token) public static class LocalVersionedSerializers { static final LocalVersionedSerializer participants = localSerializer(CommandSerializers.participants); - static final LocalVersionedSerializer topology = localSerializer(TopologySerializers.topology); private static LocalVersionedSerializer localSerializer(IVersionedSerializer serializer) { @@ -708,7 +707,6 @@ public LocalCompositePrefixPartitioner.AbstractCompositePrefixToken getPrefixTok "accord topologies", "CREATE TABLE %s (" + "epoch bigint primary key, " + - "topology blob, " + "sync_state int, " + "pending_sync_notify set, " + // nodes that need to be told we're synced "remote_sync_complete set, " + // nodes that have told us they're synced @@ -1387,22 +1385,7 @@ private static EpochDiskState maybeUpdateMaxEpoch(EpochDiskState diskState, long public static EpochDiskState saveTopology(Topology topology, EpochDiskState diskState) { - diskState = maybeUpdateMaxEpoch(diskState, topology.epoch()); - - try - { - String cql = "UPDATE " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + - "SET topology=? WHERE epoch=?"; - executeInternal(cql, - serialize(topology, LocalVersionedSerializers.topology), topology.epoch()); - flush(Topologies); - } - catch (IOException e) - { - throw new UncheckedIOException(e); - } - - return diskState; + return maybeUpdateMaxEpoch(diskState, topology.epoch()); } public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, EpochDiskState diskState) @@ -1487,21 +1470,26 @@ public static EpochDiskState truncateTopologyUntil(final long epoch, EpochDiskSt public interface TopologyLoadConsumer { - void load(long epoch, Topology topology, SyncStatus syncStatus, Set pendingSyncNotify, Set remoteSyncComplete, Ranges closed, Ranges redundant); + void load(long epoch, ClusterMetadata metadata, Topology topology, SyncStatus syncStatus, Set pendingSyncNotify, Set remoteSyncComplete, Ranges closed, Ranges redundant); } @VisibleForTesting - public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws IOException + public static void loadEpoch(long epoch, ClusterMetadata metadata, TopologyLoadConsumer consumer) throws IOException { + Topology topology = AccordTopology.createAccordTopology(metadata); + String cql = "SELECT * FROM " + ACCORD_KEYSPACE_NAME + '.' + TOPOLOGIES + ' ' + "WHERE epoch=?"; UntypedResultSet result = executeInternal(cql, epoch); + if (result.isEmpty()) + { + // topology updates disk state for epoch but doesn't save the topology to the table, so there maybe an epoch we know about, but no fields are present + consumer.load(epoch, metadata, topology, SyncStatus.NOT_STARTED, Collections.emptySet(), Collections.emptySet(), Ranges.EMPTY, Ranges.EMPTY); + return; + } checkState(!result.isEmpty(), "Nothing found for epoch %d", epoch); UntypedResultSet.Row row = result.one(); - Topology topology = row.has("topology") - ? deserialize(row.getBytes("topology"), LocalVersionedSerializers.topology) - : null; SyncStatus syncStatus = row.has("sync_state") ? SyncStatus.values()[row.getInt("sync_state")] @@ -1515,7 +1503,7 @@ public static void loadEpoch(long epoch, TopologyLoadConsumer consumer) throws I Ranges closed = row.has("closed") ? blobMapToRanges(row.getMap("closed", BytesType.instance, BytesType.instance)) : Ranges.EMPTY; Ranges redundant = row.has("redundant") ? blobMapToRanges(row.getMap("redundant", BytesType.instance, BytesType.instance)) : Ranges.EMPTY; - consumer.load(epoch, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant); + consumer.load(epoch, metadata, topology, syncStatus, pendingSyncNotify, remoteSyncComplete, closed, redundant); } public static EpochDiskState loadTopologies(TopologyLoadConsumer consumer) @@ -1526,8 +1514,8 @@ public static EpochDiskState loadTopologies(TopologyLoadConsumer consumer) if (diskState == null) return EpochDiskState.EMPTY; - for (long epoch=diskState.minEpoch; epoch<=diskState.maxEpoch; epoch++) - loadEpoch(epoch, consumer); + for (ClusterMetadata metadata : AccordService.tcmLoadRange(diskState.minEpoch, diskState.maxEpoch)) + loadEpoch(metadata.epoch.getEpoch(), metadata, consumer); return diskState; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index e347f5f474de..4b9533755954 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -443,10 +443,9 @@ public synchronized void startup() class Ref { List historic = Collections.emptyList();} Ref ref = new Ref(); configService.start((optMaxEpoch -> { - // when max epoch isn't know, this means the node started for the first time; check cluster's min epoch - // when max epoch is known, then there is no reason to discover min epoch (we already did it) - if (optMaxEpoch.isPresent()) return; - List historic = ref.historic = discoverHistoric(node, cms); + List historic = ref.historic = !optMaxEpoch.isEmpty() + ? tcmLoadRange(optMaxEpoch.getAsLong(), Long.MAX_VALUE) + : discoverHistoric(node, cms); for (ClusterMetadata m : historic) configService.reportMetadataInternal(m); })); @@ -531,14 +530,17 @@ private List discoverHistoric(Node node, ClusterMetadataService return tcmLoadRange(minEpoch, current.epoch.getEpoch()); } - private static List tcmLoadRange(long min, long max) + public static List tcmLoadRange(long min, long max) { - List afterLoad = ClusterMetadataService.instance().processor().reconstructFull(Epoch.create(min - 1), Epoch.create(max)); + List afterLoad = ClusterMetadataService.instance().processor().reconstructFull(Epoch.create(min), Epoch.create(max)); + if (Invariants.isParanoid()) + assert afterLoad.get(0).epoch.getEpoch() == min : String.format("Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); while (!afterLoad.isEmpty() && afterLoad.get(0).epoch.getEpoch() < min) afterLoad.remove(0); assert !afterLoad.isEmpty() : String.format("TCM was unable to return the needed epochs: %d -> %d", min, max); assert afterLoad.get(0).epoch.getEpoch() == min : String.format("Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); - assert afterLoad.get(afterLoad.size() - 1).epoch.getEpoch() == max : String.format("Unexpected epoch: expected %d but given %d", max, afterLoad.get(afterLoad.size() - 1).epoch.getEpoch()); + if (max != Long.MAX_VALUE) + assert afterLoad.get(afterLoad.size() - 1).epoch.getEpoch() == max : String.format("Unexpected epoch: expected %d but given %d", max, afterLoad.get(afterLoad.size() - 1).epoch.getEpoch()); return afterLoad; } diff --git a/src/java/org/apache/cassandra/tcm/Processor.java b/src/java/org/apache/cassandra/tcm/Processor.java index 46fce6aeab34..168b7f9c786b 100644 --- a/src/java/org/apache/cassandra/tcm/Processor.java +++ b/src/java/org/apache/cassandra/tcm/Processor.java @@ -19,6 +19,7 @@ package org.apache.cassandra.tcm; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; @@ -114,8 +115,10 @@ default List reconstructFull(Epoch lowEpoch, Epoch highEpoch) { LogState logState = reconstruct(lowEpoch, highEpoch, Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(NANOSECONDS), TCMMetrics.instance.commitRetries)); + if (logState.isEmpty()) return Collections.emptyList(); List cms = new ArrayList<>(logState.entries.size()); ClusterMetadata accum = logState.baseState; + cms.add(accum); for (Entry entry : logState.entries) { Transformation.Result res = entry.transform.execute(accum); diff --git a/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java b/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java index 175f70c7973f..4fb99e62a4be 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java +++ b/test/distributed/org/apache/cassandra/distributed/test/PaxosRepair2Test.java @@ -194,6 +194,7 @@ public void paxosRepairPreventsStaleReproposal() throws Throwable Ballot staleBallot = Paxos.newBallot(Ballot.none(), org.apache.cassandra.db.ConsistencyLevel.SERIAL); try (Cluster cluster = init(Cluster.create(3, cfg -> cfg .set("paxos_variant", "v2") + .set("accord.enabled", false) // this test monkeys with TCM which can cause confussion for Accord while it fetches epochs... .set("paxos_purge_grace_period", "0s") .set("truncate_request_timeout_in_ms", 1000L))) ) diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java index 66d677080ebd..19a675b8d244 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java @@ -18,11 +18,13 @@ package org.apache.cassandra.distributed.test.accord; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.cassandra.distributed.api.IInvokableInstance; import org.junit.Assert; import org.junit.Test; @@ -35,6 +37,9 @@ import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.concurrent.CountDownLatch; +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.api.Feature.NETWORK; + public class AccordJournalIntegrationTest extends TestBaseImpl { @Test @@ -45,8 +50,7 @@ public void saveLoadSanityCheck() throws Throwable .withoutVNodes() .start())) { - final String TABLE = KEYSPACE + ".test_table"; - cluster.schemaChange("CREATE TABLE " + TABLE + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'"); + final String TABLE = createTable(cluster); List threads = new ArrayList<>(); int numThreads = 10; CountDownLatch latch = CountDownLatch.newCountDownLatch(numThreads); @@ -94,18 +98,9 @@ public void memtableStateReloadingTest() throws Throwable .start()) { cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};"); - final String TABLE = KEYSPACE + ".test_table"; - cluster.schemaChange("CREATE TABLE " + TABLE + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'"); + final String TABLE = createTable(cluster); - for (int j = 0; j < 1_000; j++) - { - cluster.coordinator(1).execute("BEGIN TRANSACTION\n" + - "INSERT INTO " + TABLE + "(k, c, v) VALUES (?, ?, ?);\n" + - "COMMIT TRANSACTION", - ConsistencyLevel.ALL, - j, j, 1 - ); - } + insertData(cluster, TABLE); Object[][] before = cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); @@ -122,4 +117,45 @@ public void memtableStateReloadingTest() throws Throwable } } } + + @Test + public void restartWithEpochChanges() throws IOException + { + try (Cluster cluster = Cluster.build(3).withoutVNodes().withConfig(c -> c.with(GOSSIP).with(NETWORK)).start()) + { + init(cluster); + final String TABLE = createTable(cluster); + cluster.get(1).nodetoolResult("cms", "reconfigure", "3").asserts().success(); + + insertData(cluster, TABLE); + + IInvokableInstance restartNode = cluster.get(1); + ClusterUtils.stopUnchecked(restartNode); + + // make epoch changes + for (int i = 0; i < 10; i++) + cluster.schemaChange("ALTER TABLE " + TABLE + " WITH comment = 'change " + i + "'", true, cluster.get(2)); + + restartNode.startup(); + insertData(cluster, TABLE); + } + } + + private void insertData(Cluster cluster, String TABLE) { + for (int j = 0; j < 1_000; j++) + { + cluster.coordinator(1).execute("BEGIN TRANSACTION\n" + + "INSERT INTO " + TABLE + "(k, c, v) VALUES (?, ?, ?);\n" + + "COMMIT TRANSACTION", + ConsistencyLevel.ALL, + j, j, 1 + ); + } + } + + private String createTable(Cluster cluster) { + final String TABLE = KEYSPACE + ".test_table"; + cluster.schemaChange("CREATE TABLE " + TABLE + " (k int, c int, v int, primary key (k, c)) WITH transactional_mode='full'"); + return TABLE; + } } \ No newline at end of file diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/BootWithMetadataTest.java b/test/distributed/org/apache/cassandra/distributed/test/log/BootWithMetadataTest.java index fc6dad5adf0f..ae269dbdd6e6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/BootWithMetadataTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/BootWithMetadataTest.java @@ -52,6 +52,8 @@ public class BootWithMetadataTest extends TestBaseImpl public void resetTest() throws IOException, ExecutionException, InterruptedException { try (Cluster cluster = init(builder().withNodes(3) + // Accord tracks epochs, and if the expose no longer exist it is not able to process anything, causing it to crash... + .withConfig(c -> c.set("accord.enabled", false)) .start())) { long epoch = 0; @@ -94,6 +96,8 @@ public void resetTest() throws IOException, ExecutionException, InterruptedExcep public void newCMSTest() throws IOException, ExecutionException, InterruptedException { try (Cluster cluster = init(builder().withNodes(4) + // Accord tracks epochs, and if the expose no longer exist it is not able to process anything, causing it to crash... + .withConfig(c -> c.set("accord.enabled", false)) .start())) { for (int i = 0; i < 10; i++) diff --git a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java index 2bf1047b5587..458dbe13ba98 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/log/CoordinatorPathTestBase.java @@ -127,7 +127,9 @@ public void coordinatorPathTest(TokenPlacementModel.ReplicationFactor rf, TestRu try (Cluster cluster = builder().withNodes(1) .withConfig(cfg -> cfg.set("seed_provider", new ParameterizedClass(SimpleSeedProvider.class.getName(), - Collections.singletonMap("seeds", fakeCmsNode.id() + ":7012")))) + Collections.singletonMap("seeds", fakeCmsNode.id() + ":7012"))) + // Accord depends on Processor.reconstruct, but those verbs are not simulated, causing the tests to fail + .set("accord.enabled", false)) .withTokenSupplier(factory) .withNodeIdTopology(NetworkTopology.singleDcNetworkTopology(10, "dc0", "rack0")) .createWithoutStarting(); diff --git a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java index a0275188479b..57a1efa5e98b 100644 --- a/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java +++ b/test/distributed/org/apache/cassandra/fuzz/topology/TopologyMixupTestBase.java @@ -104,9 +104,9 @@ private enum TopologyChange AddNode, RemoveNode, HostReplace, + StopNode, + StartNode, //TODO (coverage): add the following states once supported -// StopNode, -// StartNode, // MoveToken //TODO (coverage): node migrate to another rack or dc (unsupported on trunk as of this writing, but planned work for TCM) // MoveNodeToNewRack, @@ -128,7 +128,7 @@ private enum RemoveType private Command, Void, ?> repairCommand(int toCoordinate) { return new SimpleCommand<>(state -> "nodetool repair " + state.schemaSpec.keyspaceName() + ' ' + state.schemaSpec.name() + " from node" + toCoordinate + state.commandNamePostfix(), - state -> state.cluster.get(toCoordinate).nodetoolResult("repair", state.schemaSpec.keyspaceName(), state.schemaSpec.name()).asserts().success()); + state -> state.cluster.get(toCoordinate).nodetoolResult("repair", state.schemaSpec.keyspaceName(), state.schemaSpec.name(), "--force").asserts().success()); } private Command, Void, ?> waitForCMSToQuiesce() @@ -137,6 +137,29 @@ private enum RemoveType state -> ClusterUtils.waitForCMSToQuiesce(state.cluster, state.cmsGroup)); } + private Command, Void, ?> stopInstance(RandomSource rs, State state) + { + int toStop = rs.pickInt(state.topologyHistory.up()); + return stopInstance(toStop, "Normal Stop"); + } + + private Command, Void, ?> startInstance(RandomSource rs, State state) + { + int toStop = rs.pickInt(state.topologyHistory.down()); + return startInstance(toStop); + } + + private Command, Void, ?> startInstance(int toStart) + { + return new SimpleCommand<>(state -> "Start Node" + toStart + state.commandNamePostfix(), + state -> { + IInvokableInstance inst = state.cluster.get(toStart); + TopologyHistory.Node node = state.topologyHistory.node(toStart); + inst.startup(); + node.up(); + }); + } + private Command, Void, ?> stopInstance(int toRemove, String why) { return new SimpleCommand<>(state -> "Stop Node" + toRemove + " for " + why + state.commandNamePostfix(), @@ -256,21 +279,18 @@ private enum RemoveType TopologyHistory.Node adding = state.topologyHistory.replace(nodeToReplace); TopologyHistory.Node removing = state.topologyHistory.nodes.get(nodeToReplace); - return multistep(new SimpleCommand<>("Stop Node" + nodeToReplace + " for HostReplace; Node" + adding.id + state.commandNamePostfix(), s2 -> { - ClusterUtils.stopUnchecked(toReplace); - removing.down(); - }), - new SimpleCommand<>("Host Replace Node" + nodeToReplace + "; Node" + adding.id + state.commandNamePostfix(), s2 -> { - logger.info("node{} starting host replacement; epoch={}", adding.id, HackSerialization.tcmEpochAndSync(s2.cluster.getFirstRunningInstance())); - removing.status = TopologyHistory.Node.Status.BeingReplaced; - IInvokableInstance inst = ClusterUtils.replaceHostAndStart(s2.cluster, toReplace); - s2.topologyHistory.replaced(removing, adding); - long epoch = HackSerialization.tcmEpoch(inst); - s2.currentEpoch.set(epoch); - logger.info("{} completed host replacement in epoch={}", inst, epoch); - }), - //TODO (remove after rebase to trunk): https://issues.apache.org/jira/browse/CASSANDRA-19705 After the rebase to trunk this is not needed. The issue is that the CMS placement removes the node, it does not promote another node, this cases rf=3 to become rf=2 - new SimpleCommand<>("CMS reconfigure on Node" + adding.id + state.commandNamePostfix(), s2 -> s2.cluster.get(adding.id).nodetoolResult("cms", "reconfigure", Integer.toString(TARGET_RF)).asserts().success()) + return multistep(stopInstance(nodeToReplace, "HostReplace; Node" + adding.id), + new SimpleCommand<>("Host Replace Node" + nodeToReplace + "; Node" + adding.id + state.commandNamePostfix(), s2 -> { + logger.info("node{} starting host replacement; epoch={}", adding.id, HackSerialization.tcmEpochAndSync(s2.cluster.getFirstRunningInstance())); + removing.status = TopologyHistory.Node.Status.BeingReplaced; + IInvokableInstance inst = ClusterUtils.replaceHostAndStart(s2.cluster, toReplace); + s2.topologyHistory.replaced(removing, adding); + long epoch = HackSerialization.tcmEpoch(inst); + s2.currentEpoch.set(epoch); + logger.info("{} completed host replacement in epoch={}", inst, epoch); + }), + //TODO (remove after rebase to trunk): https://issues.apache.org/jira/browse/CASSANDRA-19705 After the rebase to trunk this is not needed. The issue is that the CMS placement removes the node, it does not promote another node, this cases rf=3 to become rf=2 + new SimpleCommand<>("CMS reconfigure on Node" + adding.id + state.commandNamePostfix(), s2 -> s2.cluster.get(adding.id).nodetoolResult("cms", "reconfigure", Integer.toString(TARGET_RF)).asserts().success()) ); } @@ -314,15 +334,20 @@ private EnumSet possibleTopologyChanges(State state) EnumSet possibleTopologyChanges = EnumSet.noneOf(TopologyChange.class); // up or down is logically more correct, but since this runs sequentially and after the topology changes are complete, we don't have downed nodes at this point // so up is enough to know the topology size - int size = state.topologyHistory.up().length; - if (size < state.topologyHistory.maxNodes) + int up = state.topologyHistory.up().length; + int down = state.topologyHistory.down().length; + int total = up + down; + if (total < state.topologyHistory.maxNodes) possibleTopologyChanges.add(TopologyChange.AddNode); - if (size > state.topologyHistory.quorum()) + if (up > state.topologyHistory.quorum()) { - if (size > TARGET_RF) + if (up > TARGET_RF) possibleTopologyChanges.add(TopologyChange.RemoveNode); possibleTopologyChanges.add(TopologyChange.HostReplace); + possibleTopologyChanges.add(TopologyChange.StopNode); } + if (down > 0) + possibleTopologyChanges.add(TopologyChange.StartNode); return possibleTopologyChanges; } @@ -342,6 +367,12 @@ private EnumSet possibleTopologyChanges(State state) case HostReplace: possible.put(rs -> multistep(hostReplace(rs, state), waitForCMSToQuiesce()), 1); break; + case StartNode: + possible.put(rs -> startInstance(rs, state), 1); + break; + case StopNode: + possible.put(rs -> stopInstance(rs, state), 1); + break; default: throw new UnsupportedOperationException(task.name()); } @@ -568,11 +599,21 @@ public Collection tokens(int i) } public int[] up() + { + return nodes(Node.Status.Up); + } + + public int[] down() + { + return nodes(Node.Status.Down); + } + + private int[] nodes(Node.Status target) { IntArrayList up = new IntArrayList(nodes.size(), -1); for (Map.Entry n : nodes.entrySet()) { - if (n.getValue().status == Node.Status.Up) + if (n.getValue().status == target) up.add(n.getKey()); } int[] ints = up.toIntArray(); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 94e181a853b3..9daa1bb9fa45 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -185,7 +185,7 @@ public void initialEpochTest() throws Throwable Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); service.reportTopology(topology1); - loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { + loadEpoch(1, null, (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertTrue(remoteSync.isEmpty()); }); @@ -193,7 +193,7 @@ public void initialEpochTest() throws Throwable service.receiveRemoteSyncComplete(ID1, 1); service.receiveRemoteSyncComplete(ID2, 1); - loadEpoch(1, (epoch, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { + loadEpoch(1, null, (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertEquals(Sets.newHashSet(ID1, ID2), remoteSync); }); From 282bacb84b2219306433e3caa4c190785a0e4fb4 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Mon, 30 Sep 2024 23:52:34 +0100 Subject: [PATCH 156/340] ninja: fix CFK serializer --- modules/accord | 2 +- .../serializers/CommandsForKeySerializer.java | 20 ++++++++++--------- .../CommandsForKeySerializerTest.java | 5 +++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/modules/accord b/modules/accord index b0ad8e8cfbcc..e2e72287a70b 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit b0ad8e8cfbcc35fc8152850760cd1f34f06836e7 +Subproject commit e2e72287a70bfff4cf6a5639d4a60a0c9e8086b4 diff --git a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java index a12b17098ed4..c5ce8054f720 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializer.java @@ -137,7 +137,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) { if (nodeIdCount + 3 >= nodeIds.length) { - nodeIdCount = compact(nodeIds); + nodeIdCount = compact(nodeIds, nodeIdCount); if (nodeIdCount > nodeIds.length/2 || nodeIdCount + 2 >= nodeIds.length) nodeIds = cachedInts().resize(nodeIds, nodeIds.length, nodeIds.length * 2); } @@ -169,7 +169,7 @@ public static ByteBuffer toBytesWithoutKey(CommandsForKey cfk) } } } - nodeIdCount = compact(nodeIds); + nodeIdCount = compact(nodeIds, nodeIdCount); Invariants.checkState(nodeIdCount > 0); } @@ -679,11 +679,10 @@ public static CommandsForKey fromBytes(RoutingKey key, ByteBuffer in) RedundantBefore.Entry boundsInfo = NO_BOUNDS_INFO.withEpochs(minEpoch, maxEpoch); long prevEpoch = minEpoch + VIntCoding.readVInt(in); long prevHlc = VIntCoding.readUnsignedVInt(in); - TxnId redundantBefore; { int flags = VIntCoding.readUnsignedVInt32(in); Node.Id node = nodeIds[VIntCoding.readUnsignedVInt32(in)]; - redundantBefore = TxnId.fromValues(prevEpoch, prevHlc, flags, node); + boundsInfo = boundsInfo.withGcBeforeBeforeAtLeast(TxnId.fromValues(prevEpoch, prevHlc, flags, node)); } int prunedBeforeIndex = VIntCoding.readUnsignedVInt32(in) - 1; @@ -938,7 +937,7 @@ public static CommandsForKey fromBytes(RoutingKey key, ByteBuffer in) } cachedTxnIds().forceDiscard(txnIds, commandCount); - return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, redundantBefore, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex]); + return CommandsForKey.SerializerSupport.create(key, txns, unmanageds, prunedBeforeIndex == -1 ? TxnId.NONE : txns[prunedBeforeIndex], boundsInfo); } private static TxnInfo create(RedundantBefore.Entry boundsInfo, @Nonnull TxnId txnId, InternalStatus status, int statusOverrides, @Nonnull Timestamp executeAt, @Nonnull TxnId[] missing, @Nonnull Ballot ballot) @@ -984,16 +983,16 @@ private static int hlcBytesLookupToHlcFlagLookup(int bytesLookup) return flagsLookup; } - private static int compact(int[] buffer) + private static int compact(int[] buffer, int usedSize) { - Arrays.sort(buffer); + Arrays.sort(buffer, 0, usedSize); int count = 0; int j = 0; - while (j < buffer.length) + while (j < usedSize) { int prev; buffer[count++] = prev = buffer[j]; - while (++j < buffer.length && buffer[j] == prev) {} + while (++j < usedSize && buffer[j] == prev) {} } return count; } @@ -1015,6 +1014,9 @@ static final class BitReader long read(int readCount, ByteBuffer in) { + if (readCount == 64 && bitCount == 0) + return in.getLong(); + long result = bitBuffer >>> (64 - readCount); int remaining = bitCount - readCount; if (remaining >= 0) diff --git a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java index 6f41f0083023..ae11f3fa5980 100644 --- a/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java +++ b/test/unit/org/apache/cassandra/service/accord/serializers/CommandsForKeySerializerTest.java @@ -81,6 +81,7 @@ import org.apache.cassandra.utils.AccordGenerators; import org.apache.cassandra.utils.CassandraGenerators; +import static accord.local.cfk.CommandsForKey.NO_BOUNDS_INFO; import static accord.primitives.Status.Durability.NotDurable; import static accord.primitives.Known.KnownExecuteAt.ExecuteAtErased; import static accord.primitives.Known.KnownExecuteAt.ExecuteAtUnknown; @@ -540,7 +541,7 @@ public void test() } else unmanaged = CommandsForKey.NO_PENDING_UNMANAGED; - CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged, TxnId.NONE, TxnId.NONE); + CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, info, unmanaged, TxnId.NONE, NO_BOUNDS_INFO); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); @@ -557,7 +558,7 @@ public void thereAndBackAgain() TxnId txnId = TxnId.fromValues(11,34052499,2,1); CommandsForKey expected = CommandsForKey.SerializerSupport.create(pk, new TxnInfo[] { TxnInfo.create(txnId, InternalStatus.PREACCEPTED_OR_ACCEPTED_INVALIDATE, true, txnId, TxnId.NO_TXNIDS, Ballot.ZERO) }, - CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE, TxnId.NONE); + CommandsForKey.NO_PENDING_UNMANAGED, TxnId.NONE, NO_BOUNDS_INFO); ByteBuffer buffer = CommandsForKeySerializer.toBytesWithoutKey(expected); CommandsForKey roundTrip = CommandsForKeySerializer.fromBytes(pk, buffer); From 5693d0e2f44cc0e8d190cd31759e963db8d82b68 Mon Sep 17 00:00:00 2001 From: Ariel Weisberg Date: Tue, 1 Oct 2024 10:24:50 -0400 Subject: [PATCH 157/340] ninja: Fix AcceptSerializer --- .../cassandra/service/accord/serializers/AcceptSerializers.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java index 9c6052832e64..9bb7429c5c32 100644 --- a/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/serializers/AcceptSerializers.java @@ -121,7 +121,7 @@ public void serialize(AcceptReply reply, DataOutputPlus out, int version) throws CommandSerializers.ballot.serialize(reply.supersededBy, out, version); break; case Redundant: - int flags = 5 | (reply.supersededBy == null ? 0x8 : 0) | (reply.committedExecuteAt == null ? 0x10 : 0); + int flags = 5 | (reply.supersededBy != null ? 0x8 : 0) | (reply.committedExecuteAt != null ? 0x10 : 0); out.writeByte(flags); if (reply.supersededBy != null) CommandSerializers.ballot.serialize(reply.supersededBy, out, version); From 8f7b1c2351a9c46a552221898ec3e4d32edf1c5c Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Tue, 1 Oct 2024 10:29:45 +0200 Subject: [PATCH 158/340] Follow-up to CASSANDRA-19967 and CASSANDRA-19869 patch by Alex Petrov, Ariel Weisberg, Benedict Elliott Smith, Blake Eggleston and David Capwell --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 15 +- .../db/compaction/CompactionIterator.java | 111 +++++--- .../db/partitions/PartitionUpdate.java | 24 ++ .../cassandra/journal/ActiveSegment.java | 34 ++- .../apache/cassandra/journal/Compactor.java | 31 ++- .../org/apache/cassandra/journal/Journal.java | 25 +- .../apache/cassandra/journal/OnDiskIndex.java | 2 +- .../apache/cassandra/journal/Segments.java | 8 + .../cassandra/journal/StaticSegment.java | 9 +- src/java/org/apache/cassandra/net/Verb.java | 2 +- .../service/accord/AccordCachingState.java | 9 +- .../service/accord/AccordCommandStore.java | 250 ++++++++++-------- .../service/accord/AccordCommandStores.java | 52 +++- .../accord/AccordConfigurationService.java | 16 +- .../service/accord/AccordJournal.java | 143 ++++++++-- .../accord/AccordJournalValueSerializers.java | 2 +- .../service/accord/AccordKeyspace.java | 2 - .../service/accord/AccordObjectSizes.java | 2 +- .../accord/AccordSafeCommandStore.java | 90 +++++-- .../accord/AccordSegmentCompactor.java | 12 +- .../service/accord/AccordService.java | 20 +- .../service/accord/AccordStateCache.java | 19 +- .../service/accord/AccordTopology.java | 2 + .../service/accord/AccordVerbHandler.java | 2 - .../accord/CommandsForRangesLoader.java | 2 +- .../service/accord/IAccordService.java | 8 +- .../cassandra/service/accord/IJournal.java | 2 +- .../cassandra/service/accord/JournalKey.java | 134 ++++------ .../service/accord/SavedCommand.java | 146 +++++++--- .../service/accord/async/AsyncOperation.java | 24 +- .../tcm/AtomicLongBackedProcessor.java | 59 ++++- .../cassandra/tcm/ClusterMetadataService.java | 2 +- .../org/apache/cassandra/tcm/Startup.java | 6 +- .../apache/cassandra/tcm/log/LocalLog.java | 5 + .../apache/cassandra/tcm/log/LogReader.java | 6 + .../test/accord/AccordBootstrapTest.java | 28 +- .../accord/AccordJournalIntegrationTest.java | 2 +- .../test/accord/AccordLoadTest.java | 56 +++- .../accord/AccordMigrationRaceTestBase.java | 5 +- .../accord/AccordJournalCompactionTest.java | 24 +- .../org/apache/cassandra/ServerTestUtils.java | 6 +- .../config/YamlConfigurationLoaderTest.java | 17 ++ .../CompactionAccordIteratorsTest.java | 11 +- .../accord/AccordCommandStoreTest.java | 5 +- .../service/accord/AccordCommandTest.java | 4 +- .../AccordConfigurationServiceTest.java | 136 +++++++--- .../accord/AccordJournalOrderTest.java | 4 +- .../service/accord/AccordTestUtils.java | 12 +- .../cassandra/service/accord/MockJournal.java | 26 +- .../service/accord/SavedCommandTest.java | 5 +- .../accord/SimulatedAccordCommandStore.java | 12 +- .../service/accord/SimulatedDepsTest.java | 62 ++++- ...ulatedRandomKeysWithRangeConflictTest.java | 104 +++++--- .../service/accord/async/AsyncLoaderTest.java | 4 +- .../accord/async/AsyncOperationTest.java | 14 +- .../tcm/ValidatingClusterMetadataService.java | 63 +++++ 57 files changed, 1278 insertions(+), 600 deletions(-) diff --git a/modules/accord b/modules/accord index e2e72287a70b..4a8566af7b7d 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit e2e72287a70bfff4cf6a5639d4a60a0c9e8086b4 +Subproject commit 4a8566af7b7de2ddec2c7527d7e2da593f99865f diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 0350e587c629..9e0e3305cef9 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -63,12 +63,12 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public DurationSpec.IntMillisecondsBound range_barrier_timeout = new DurationSpec.IntMillisecondsBound("2m"); - public volatile DurationSpec fast_path_update_delay = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec schedule_durability_frequency = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec durability_txnid_lag = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec shard_durability_cycle = new DurationSpec.IntMinutesBound(1); - public volatile DurationSpec global_durability_cycle = new DurationSpec.IntMinutesBound(10); + public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(5); + public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(1, TimeUnit.MINUTES); + public volatile DurationSpec.IntSecondsBound global_durability_cycle = new DurationSpec.IntSecondsBound(10, TimeUnit.MINUTES); public enum TransactionalRangeMigration { @@ -87,7 +87,7 @@ public enum TransactionalRangeMigration * default transactional mode for tables created by this node when no transactional mode has been specified in the DDL */ public TransactionalMode default_transactional_mode = TransactionalMode.off; - public boolean ephemeralReadEnabled = false; + public boolean ephemeralReadEnabled = true; public boolean state_cache_listener_jfr_enabled = true; public final JournalSpec journal = new JournalSpec(); public final MinEpochRetrySpec minEpochSyncRetry = new MinEpochRetrySpec(); @@ -107,6 +107,7 @@ public static class JournalSpec implements Params public FlushMode flushMode = FlushMode.PERIODIC; public DurationSpec.IntMillisecondsBound flushPeriod; // pulls default from 'commitlog_sync_period' public DurationSpec.IntMillisecondsBound periodicFlushLagBlock = new DurationSpec.IntMillisecondsBound("1500ms"); + public DurationSpec.IntMillisecondsBound compactionPeriod = new DurationSpec.IntMillisecondsBound("60000ms"); @Override public int segmentSize() @@ -135,7 +136,7 @@ public boolean enableCompaction() @Override public int compactionPeriodMillis() { - return 60_000; + return compactionPeriod.toMilliseconds(); } @JsonIgnore diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index ab072f68681d..a32ad7634654 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -32,6 +32,9 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Ordering; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import accord.local.Cleanup; import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; @@ -43,6 +46,7 @@ import accord.primitives.Status.Durability; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.Invariants; import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.ColumnIdentifier; @@ -57,6 +61,7 @@ import org.apache.cassandra.db.SystemKeyspace; import org.apache.cassandra.db.filter.ColumnFilter; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.db.partitions.PurgeFunction; import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator; @@ -104,8 +109,9 @@ import org.apache.cassandra.service.paxos.uncommitted.PaxosRows; import org.apache.cassandra.utils.TimeUUID; +import static accord.local.Cleanup.ERASE; +import static accord.local.Cleanup.TRUNCATE; import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; -import static accord.local.Cleanup.shouldCleanup; import static accord.local.Cleanup.shouldCleanupPartial; import static com.google.common.base.Preconditions.checkState; import static java.util.concurrent.TimeUnit.MICROSECONDS; @@ -142,7 +148,9 @@ */ public class CompactionIterator extends CompactionInfo.Holder implements UnfilteredPartitionIterator { + private static final Logger logger = LoggerFactory.getLogger(CompactionIterator.class); private static final long UNFILTERED_TO_UPDATE_PROGRESS = 100; + private static Object[] TRUNCATE_CLUSTERING_VALUE = new Object[] { Long.MAX_VALUE, Integer.MAX_VALUE }; private final OperationType type; private final AbstractCompactionController controller; @@ -801,8 +809,8 @@ protected Row applyToRow(Row row) class AccordCommandsPurger extends AbstractPurger { final Int2ObjectHashMap redundantBefores; + final Int2ObjectHashMap durableBefores; final Int2ObjectHashMap ranges; - final DurableBefore durableBefore; int storeId; TxnId txnId; @@ -812,7 +820,7 @@ class AccordCommandsPurger extends AbstractPurger IAccordService.CompactionInfo compactionInfo = accordService.get().getCompactionInfo(); this.redundantBefores = compactionInfo.redundantBefores; this.ranges = compactionInfo.ranges; - this.durableBefore = compactionInfo.durableBefore; + this.durableBefores = compactionInfo.durableBefores; } protected void beginPartition(UnfilteredRowIterator partition) @@ -828,6 +836,7 @@ protected Row applyToRow(Row row) updateProgress(); RedundantBefore redundantBefore = redundantBefores.get(storeId); + DurableBefore durableBefore = durableBefores.get(storeId); // TODO (expected): if the store has been retired, this should return null if (redundantBefore == null) return row; @@ -1008,8 +1017,8 @@ protected Row applyToStatic(Row row) class AccordJournalPurger extends AbstractPurger { final Int2ObjectHashMap redundantBefores; + final Int2ObjectHashMap durableBefores; final Int2ObjectHashMap ranges; - final DurableBefore durableBefore; final ColumnMetadata recordColumn; final ColumnMetadata versionColumn; final KeySupport keySupport = JournalKey.SUPPORT; @@ -1021,6 +1030,8 @@ class AccordJournalPurger extends AbstractPurger Object[] lastClustering = null; long maxSeenTimestamp = -1; final int userVersion; + long lastDescriptor = -1; + int lastOffset = -1; public AccordJournalPurger(Supplier serviceSupplier) { @@ -1031,7 +1042,7 @@ public AccordJournalPurger(Supplier serviceSupplier) this.redundantBefores = compactionInfo.redundantBefores; this.ranges = compactionInfo.ranges; - this.durableBefore = compactionInfo.durableBefore; + this.durableBefores = compactionInfo.durableBefores; ColumnFamilyStore cfs = Keyspace.open(AccordKeyspace.metadata().name).getColumnFamilyStore(AccordKeyspace.JOURNAL); this.recordColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("record", false)); this.versionColumn = cfs.metadata().getColumn(ColumnIdentifier.getInterned("user_version", false)); @@ -1045,6 +1056,8 @@ protected void beginPartition(UnfilteredRowIterator partition) serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; builder = serializer.mergerFor(key); maxSeenTimestamp = -1; + lastDescriptor = -1; + lastOffset = -1; } @Override @@ -1057,13 +1070,17 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition try { - PartitionUpdate.SimpleBuilder newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); - + List rows = new ArrayList<>(); while (partition.hasNext()) - applyToRow((Row) partition.next()); + { + Row row = (Row) partition.next(); + rows.add(row); + collect(row); + } if (key.type != JournalKey.Type.COMMAND_DIFF) { + PartitionUpdate.SimpleBuilder newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) { serializer.reserialize(key, builder, out, userVersion); @@ -1080,49 +1097,40 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition } SavedCommand.Builder commandBuilder = (SavedCommand.Builder) builder; - - // Do not have txnId in selected SSTables; remove - if (commandBuilder.txnId() == null) - return newVersion.build().unfilteredIterator(); + if (commandBuilder.isEmpty()) + { + Invariants.checkState(rows.isEmpty()); + return partition; + } RedundantBefore redundantBefore = redundantBefores.get(key.commandStoreId); + DurableBefore durableBefore = durableBefores.get(key.commandStoreId); + Cleanup cleanup = commandBuilder.shouldCleanup(redundantBefore, durableBefore); + if (cleanup == ERASE) + return PartitionUpdate.fullPartitionDelete(metadata(), partition.partitionKey(), maxSeenTimestamp, nowInSec).unfilteredIterator(); - Cleanup cleanup = shouldCleanup(commandBuilder.txnId(), commandBuilder.saveStatus(), - commandBuilder.durability(), commandBuilder.participants(), - redundantBefore, durableBefore); - switch (cleanup) + commandBuilder = commandBuilder.maybeCleanup(cleanup); + if (commandBuilder != builder) { - case EXPUNGE: + if (commandBuilder == null) return null; - case EXPUNGE_PARTIAL: - newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); - commandBuilder = commandBuilder.expungePartial(); - - newVersion.row(lastClustering) - .add(recordColumn.name.toString(), commandBuilder.asByteBuffer(userVersion)); - - return newVersion.build().unfilteredIterator(); + PartitionUpdate.SimpleBuilder newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); - case ERASE: - return PartitionUpdate.fullPartitionDelete(metadata(), partition.partitionKey(), maxSeenTimestamp, nowInSec).unfilteredIterator(); + Row.SimpleBuilder rowBuilder; + if (cleanup == TRUNCATE || cleanup == TRUNCATE_WITH_OUTCOME) + rowBuilder = newVersion.row(TRUNCATE_CLUSTERING_VALUE); + else + rowBuilder = newVersion.row(lastClustering); - case VESTIGIAL: - case INVALIDATE: - case TRUNCATE_WITH_OUTCOME: - case TRUNCATE: - newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); - commandBuilder = commandBuilder.saveStatusOnly(); + rowBuilder.add("record", commandBuilder.asByteBuffer(userVersion)) + .add("user_version", userVersion); - newVersion.row(lastClustering) - .add(recordColumn.name.toString(), commandBuilder.asByteBuffer(userVersion)); - - return newVersion.build().unfilteredIterator(); + return newVersion.build().unfilteredIterator(); + } - case NO: - return newVersion.build().unfilteredIterator(); - default: - throw new IllegalStateException("Unknown cleanup: " + cleanup);} + return PartitionUpdate.multiRowUpdate(AccordKeyspace.Journal, partition.partitionKey(), rows) + .unfilteredIterator(); } catch (IOException e) { @@ -1132,10 +1140,29 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition @Override protected Row applyToRow(Row row) + { + return row; + } + + protected void collect(Row row) { updateProgress(); maxSeenTimestamp = row.primaryKeyLivenessInfo().timestamp(); ByteBuffer record = row.getCell(recordColumn).buffer(); + long descriptor = LongType.instance.compose(row.clustering().getBufferArray()[0]); + int offset = Int32Type.instance.compose(row.clustering().getBufferArray()[1]); + + if (lastOffset != -1) + { + Invariants.checkState(descriptor >= lastDescriptor, + "Descriptors were accessed out of order: %d was accessed after %d", descriptor, lastDescriptor); + Invariants.checkState(descriptor != lastDescriptor || + offset > lastOffset, + "Offsets within %s were accessed out of order: %d was accessed after %s", offset, lastOffset); + } + lastDescriptor = descriptor; + lastOffset = offset; + try (DataInputBuffer in = new DataInputBuffer(record, false)) { int userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); @@ -1146,7 +1173,6 @@ protected Row applyToRow(Row row) { throw new RuntimeException(e); } - return null; } @Override @@ -1157,7 +1183,6 @@ protected Row applyToStatic(Row row) } } - private static class AbortableUnfilteredPartitionTransformation extends Transformation { private final AbortableUnfilteredRowTransformation abortableIter; diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java index 1b543bd2c7df..87e2154e5239 100644 --- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java +++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java @@ -199,6 +199,30 @@ public static PartitionUpdate singleRowUpdate(TableMetadata metadata, DecoratedK return singleRowUpdate(metadata, key, row.isStatic() ? null : row, row.isStatic() ? row : null); } + /** + * Creates an immutable partition update that contains a single row update. + * + * @param metadata the metadata for the created update. + * @param key the partition key for the partition to update. + * @param rows the rows for the update (may not be static). + * + * @return the newly created partition update containing only {@code row}. + */ + public static PartitionUpdate multiRowUpdate(TableMetadata metadata, DecoratedKey key, List rows) + { + if (rows.isEmpty()) + return emptyUpdate(metadata, key); + MutableDeletionInfo deletionInfo = MutableDeletionInfo.live(); + Columns columns = Columns.NONE; + for (Row row : rows) + columns = columns.mergeTo(Columns.from(row)); + + BTreePartitionData holder = new BTreePartitionData(new RegularAndStaticColumns(Columns.NONE, columns), + BTree.build(rows), deletionInfo, Rows.EMPTY_STATIC_ROW, + EncodingStats.NO_STATS); + return new PartitionUpdate(metadata, metadata.epoch, key, holder, deletionInfo, false); + } + /** * Creates an immutable partition update that contains a single row update. * diff --git a/src/java/org/apache/cassandra/journal/ActiveSegment.java b/src/java/org/apache/cassandra/journal/ActiveSegment.java index 1fd99054909c..69f2f3323c8f 100644 --- a/src/java/org/apache/cassandra/journal/ActiveSegment.java +++ b/src/java/org/apache/cassandra/journal/ActiveSegment.java @@ -23,7 +23,6 @@ import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.concurrent.locks.LockSupport; @@ -45,7 +44,8 @@ final class ActiveSegment extends Segment private final OpOrder appendOrder = new OpOrder(); // position in the buffer we are allocating from - private final AtomicInteger allocatePosition = new AtomicInteger(0); + private volatile int allocateOffset = 0; + private static final AtomicIntegerFieldUpdater allocateOffsetUpdater = AtomicIntegerFieldUpdater.newUpdater(ActiveSegment.class, "allocateOffset"); /* * Everything before this offset has been written and flushed. @@ -102,6 +102,11 @@ InMemoryIndex index() return index; } + boolean isEmpty() + { + return allocateOffset == 0; + } + @Override boolean isActive() { @@ -258,8 +263,8 @@ public String name() boolean shouldFlush() { - int allocatePosition = this.allocatePosition.get(); - return lastFlushedOffset < allocatePosition; + int allocateOffset = this.allocateOffset; + return lastFlushedOffset < allocateOffset; } public boolean isFlushed(long position) @@ -279,18 +284,18 @@ public long lastFlushedOffset() */ synchronized int flush(boolean fsync) { - int allocatePosition = this.allocatePosition.get(); - if (lastFlushedOffset >= allocatePosition) + int allocateOffset = this.allocateOffset; + if (lastFlushedOffset >= allocateOffset) return lastFlushedOffset; waitForModifications(); if (fsync) { fsyncInternal(); - lastFsyncOffsetUpdater.accumulateAndGet(this, allocatePosition, Math::max); + lastFsyncOffsetUpdater.accumulateAndGet(this, allocateOffset, Math::max); } - lastFlushedOffset = allocatePosition; - int syncedOffset = Math.min(allocatePosition, endOfBuffer); + lastFlushedOffset = allocateOffset; + int syncedOffset = Math.min(allocateOffset, endOfBuffer); syncedOffsets.mark(syncedOffset, fsync); flushComplete.signalAll(); return syncedOffset; @@ -343,7 +348,8 @@ private void fsyncInternal() boolean isFullyFlushed() { - return lastFsyncOffset >= allocatePosition.get(); + int allocateOffset = this.allocateOffset; + return lastFsyncOffset >= allocateOffset; } /** @@ -358,7 +364,7 @@ boolean discardUnusedTail() { while (true) { - int prev = allocatePosition.get(); + int prev = allocateOffset; int next = endOfBuffer + 1; if (prev >= next) @@ -368,7 +374,7 @@ boolean discardUnusedTail() return false; } - if (allocatePosition.compareAndSet(prev, next)) + if (allocateOffsetUpdater.compareAndSet(this, prev, next)) { // stopped allocating now; can only succeed once, no further allocation or discardUnusedTail can succeed endOfBuffer = prev; @@ -416,11 +422,11 @@ private int allocateBytes(int size) { while (true) { - int prev = allocatePosition.get(); + int prev = allocateOffset; int next = prev + size; if (next >= endOfBuffer) return -1; - if (allocatePosition.compareAndSet(prev, next)) + if (allocateOffsetUpdater.compareAndSet(this, prev, next)) { assert buffer != null; return prev; diff --git a/src/java/org/apache/cassandra/journal/Compactor.java b/src/java/org/apache/cassandra/journal/Compactor.java index a4638266fec9..51b2fec97b59 100644 --- a/src/java/org/apache/cassandra/journal/Compactor.java +++ b/src/java/org/apache/cassandra/journal/Compactor.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.cassandra.concurrent.ScheduledExecutorPlus; @@ -28,11 +29,12 @@ import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -final class Compactor implements Runnable, Shutdownable +public final class Compactor implements Runnable, Shutdownable { private final Journal journal; private final SegmentCompactor segmentCompactor; private final ScheduledExecutorPlus executor; + private Future scheduled; Compactor(Journal journal, SegmentCompactor segmentCompactor) { @@ -41,15 +43,26 @@ final class Compactor implements Runnable, Shutdownable this.segmentCompactor = segmentCompactor; } - void start() + synchronized void start() { - if (journal.params.enableCompaction()) - { - executor.scheduleWithFixedDelay(this, - journal.params.compactionPeriodMillis(), - journal.params.compactionPeriodMillis(), - TimeUnit.MILLISECONDS); - } + if (!journal.params.enableCompaction()) + schedule(journal.params.compactionPeriodMillis(), TimeUnit.MILLISECONDS); + } + + private synchronized void schedule(long period, TimeUnit units) + { + scheduled = executor.scheduleWithFixedDelay(this, period, period, units); + } + + public synchronized void updateCompactionPeriod(int period, TimeUnit units) + { + if (!journal.params.enableCompaction()) + return; + + if (scheduled != null) + scheduled.cancel(false); + + schedule(period, units); } @Override diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 624250d60583..5e91c7d3d3e7 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -236,6 +236,11 @@ public void runCompactorForTesting() compactor.run(); } + public Compactor compactor() + { + return compactor; + } + /** * Cleans up unfinished component files from previous run (metadata and index) */ @@ -520,6 +525,8 @@ private ActiveSegment.Allocation allocate(int entrySize, Set host ActiveSegment.Allocation alloc; while (null == (alloc = segment.allocate(entrySize, hosts))) { + if (entrySize >= (params.segmentSize() * 3) / 4) + throw new IllegalStateException("entrySize " + entrySize + " too large for a segmentSize of " + params.segmentSize()); // failed to allocate; move to a new segment with enough room advanceSegment(segment); segment = currentSegment; @@ -771,6 +778,11 @@ private void addNewActiveSegment(ActiveSegment activeSegment) swapSegments(current -> current.withNewActiveSegment(activeSegment)); } + private void removeEmptySegment(ActiveSegment activeSegment) + { + swapSegments(current -> current.withoutEmptySegment(activeSegment)); + } + private void replaceCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) { swapSegments(current -> current.withCompletedSegment(activeSegment, staticSegment)); @@ -864,13 +876,22 @@ public void run() void closeActiveSegmentAndOpenAsStatic(ActiveSegment activeSegment) { + if (activeSegment.isEmpty()) + { + removeEmptySegment(activeSegment); + activeSegment.closeAndDiscard(); + return; + } + closer.execute(new CloseActiveSegmentRunnable(activeSegment)); } @VisibleForTesting - public void closeCurrentSegmentForTesting() + public void closeCurrentSegmentForTestingIfNonEmpty() { ActiveSegment segment = currentSegment; + if (segment.isEmpty()) + return; advanceSegment(segment); while (!segments().isSwitched(segment)) { @@ -966,7 +987,7 @@ public class StaticSegmentIterator implements Closeable private StaticSegmentIterator() { this.segments = selectAndReference(Segment::isStatic); - this.readers = new PriorityQueue<>((o1, o2) -> keySupport.compare(o1.key(), o2.key())); + this.readers = new PriorityQueue<>(); for (Segment segment : this.segments.all()) { StaticSegment staticSegment = (StaticSegment)segment; diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java index fe2c2713b99f..b2f1487ad0a5 100644 --- a/src/java/org/apache/cassandra/journal/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -356,7 +356,7 @@ private int binarySearch(K key) int low = 0, mid = entryCount, high = mid - 1, result = -1; while (low <= high) { - mid = (low + high) >> 1; + mid = (low + high) >>> 1; result = compareWithKeyAt(key, mid); if (result > 0) { diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index a779aebf23fd..94282e9d8755 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -63,6 +63,14 @@ Segments withNewActiveSegment(ActiveSegment activeSegment) return new Segments<>(newSegments); } + Segments withoutEmptySegment(ActiveSegment activeSegment) + { + Long2ObjectHashMap> newSegments = new Long2ObjectHashMap<>(segments); + Segment oldValue = segments.remove(activeSegment.descriptor.timestamp); + Invariants.checkState(oldValue.asActive().isEmpty()); + return new Segments<>(newSegments); + } + Segments withCompletedSegment(ActiveSegment activeSegment, StaticSegment staticSegment) { Invariants.checkArgument(activeSegment.descriptor.equals(staticSegment.descriptor)); diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index c7ac7ce4103b..f5f15ee13c61 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -473,9 +473,12 @@ public int compareTo(KeyOrderReader that) that.ensureHasAdvanced(); int cmp = keySupport.compare(this.key(), that.key()); - return cmp != 0 - ? cmp - : this.descriptor.compareTo(that.descriptor); + if (cmp != 0) + return cmp; + cmp = Long.compare(this.descriptor.timestamp, that.descriptor.timestamp); + if (cmp != 0) + return cmp; + return Integer.compare(this.offset, that.offset); } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 42bce20c8c75..1115b77ac8f2 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -333,7 +333,7 @@ public enum Verb ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), ACCORD_CALCULATE_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CALCULATE_DEPS_REQ (144, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CALCULATE_DEPS_RSP), + ACCORD_CALCULATE_DEPS_REQ (144, P2, longTimeout, IMMEDIATE, () -> CalculateDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CALCULATE_DEPS_RSP), ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), diff --git a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java index 35ceec450dfe..50a48be1fb79 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCachingState.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCachingState.java @@ -64,14 +64,14 @@ static Factory defaultFactory() int references = 0; int lastQueriedEstimatedSizeOnHeap = 0; - final byte index; + final int index; private boolean shouldUpdateSize; AccordCachingState(K key, int index) { this.key = key; - Invariants.checkArgument(index >= 0 && index <= Byte.MAX_VALUE); - this.index = (byte) index; + Invariants.checkArgument(index >= 0); + this.index = index; //noinspection unchecked this.state = (State) Uninitialized.instance; } @@ -79,8 +79,7 @@ static Factory defaultFactory() private AccordCachingState(K key, int index, State state) { this.key = key; - Invariants.checkArgument(index >= 0 && index <= Byte.MAX_VALUE); - this.index = (byte) index; + this.index = index; this.state = state; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index ca654ddaae3f..f39080bf0873 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -26,9 +26,11 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.function.Function; +import java.util.function.IntFunction; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; @@ -40,7 +42,6 @@ import accord.api.LocalListeners; import accord.api.ProgressLog; import accord.api.RoutingKey; -import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Cleanup; import accord.local.Command; @@ -54,6 +55,7 @@ import accord.local.RedundantBefore; import accord.local.SafeCommand; import accord.local.SafeCommandStore; +import accord.local.cfk.CommandsForKey; import accord.primitives.Deps; import accord.primitives.Participants; import accord.primitives.Range; @@ -67,12 +69,10 @@ import accord.utils.async.AsyncChain; import accord.utils.async.AsyncChains; import org.apache.cassandra.cache.CacheSize; -import org.apache.cassandra.concurrent.ExecutorPlus; -import org.apache.cassandra.concurrent.Stage; +import org.apache.cassandra.concurrent.SequentialExecutorPlus; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.Mutation; -import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.async.AsyncOperation; import org.apache.cassandra.service.accord.events.CacheEvents; @@ -88,36 +88,15 @@ import static accord.primitives.Status.Stable; import static accord.primitives.Status.Truncated; import static accord.utils.Invariants.checkState; -import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; -public class AccordCommandStore extends CommandStore implements CacheSize +public class AccordCommandStore extends CommandStore { private static final Logger logger = LoggerFactory.getLogger(AccordCommandStore.class); private static final boolean CHECK_THREADS = CassandraRelevantProperties.TEST_ACCORD_STORE_THREAD_CHECKS_ENABLED.getBoolean(); - private static long getThreadId(ExecutorService executor) - { - if (!CHECK_THREADS) - return 0; - try - { - return executor.submit(() -> Thread.currentThread().getId()).get(); - } - catch (InterruptedException e) - { - throw new AssertionError(e); - } - catch (ExecutionException e) - { - throw new RuntimeException(e); - } - } - - private final long threadId; public final String loggingId; private final IJournal journal; - private final ExecutorService executor; - private final AccordStateCache stateCache; + private final CommandStoreExecutor executor; private final AccordStateCache.Instance commandCache; private final AccordStateCache.Instance timestampsForKeyCache; private final AccordStateCache.Instance commandsForKeyCache; @@ -126,29 +105,6 @@ private static long getThreadId(ExecutorService executor) private long lastSystemTimestampMicros = Long.MIN_VALUE; private final CommandsForRangesLoader commandsForRangesLoader; - public AccordCommandStore(int id, - NodeTimeService time, - Agent agent, - DataStore dataStore, - ProgressLog.Factory progressLogFactory, - LocalListeners.Factory listenerFactory, - EpochUpdateHolder epochUpdateHolder, - IJournal journal, - AccordStateCacheMetrics cacheMetrics) - { - this(id, - time, - agent, - dataStore, - progressLogFactory, - listenerFactory, - epochUpdateHolder, - journal, - Stage.READ.executor(), - Stage.MUTATION.executor(), - cacheMetrics); - } - private static void registerJfrListener(int id, AccordStateCache.Instance instance, String name) { if (!DatabaseDescriptor.getAccordStateCacheListenerJFREnabled()) @@ -216,7 +172,6 @@ private static void updateMutable(AccordStateCache.Instance instance, A event.update(); } - @VisibleForTesting public AccordCommandStore(int id, NodeTimeService time, Agent agent, @@ -225,16 +180,13 @@ public AccordCommandStore(int id, LocalListeners.Factory listenerFactory, EpochUpdateHolder epochUpdateHolder, IJournal journal, - ExecutorPlus loadExecutor, - ExecutorPlus saveExecutor, - AccordStateCacheMetrics cacheMetrics) + CommandStoreExecutor commandStoreExecutor) { super(id, time, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder); this.journal = journal; loggingId = String.format("[%s]", id); - executor = executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']'); - threadId = getThreadId(executor); - stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20, cacheMetrics); + executor = commandStoreExecutor; + AccordStateCache stateCache = executor.stateCache; commandCache = stateCache.instance(TxnId.class, AccordSafeCommand.class, @@ -276,10 +228,10 @@ public AccordCommandStore(int id, executor.execute(() -> CommandStore.register(this)); } - static Factory factory(AccordJournal journal, AccordStateCacheMetrics cacheMetrics) + static Factory factory(AccordJournal journal, IntFunction executorFactory) { return (id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch) -> - new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch, journal, cacheMetrics); + new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch, journal, executorFactory.apply(id)); } public CommandsForRangesLoader diskCommandsForRanges() @@ -296,34 +248,7 @@ public void markShardDurable(SafeCommandStore safeStore, TxnId globalSyncId, Ran @Override public boolean inStore() { - if (!CHECK_THREADS) - return true; - return Thread.currentThread().getId() == threadId; - } - - @Override - public void setCapacity(long bytes) - { - checkInStoreThread(); - stateCache.setCapacity(bytes); - } - - @Override - public long capacity() - { - return stateCache.capacity(); - } - - @Override - public int size() - { - return stateCache.size(); - } - - @Override - public long weightedSize() - { - return stateCache.weightedSize(); + return executor.isInThread(); } public void checkInStoreThread() @@ -340,7 +265,15 @@ public void checkNotInStoreThread() public ExecutorService executor() { - return executor; + return executor.delegate(); + } + + /** + * Note that this cache is shared with other commandStores! + */ + public AccordStateCache cache() + { + return executor.cache(); } public AccordStateCache.Instance commandCache() @@ -444,18 +377,6 @@ private Runnable saveCommandsForKey(CommandsForKey before, CommandsForKey after) return null != mutation ? mutation::applyUnsafe : null; } - @VisibleForTesting - public AccordStateCache cache() - { - return stateCache; - } - - @VisibleForTesting - public void unsafeClearCache() - { - stateCache.unsafeClear(); - } - public void setCurrentOperation(AsyncOperation operation) { checkState(currentOperation == null); @@ -488,7 +409,7 @@ public AsyncChain submit(PreLoadContext loadCtx, Function AsyncChain submit(Callable task) { - return AsyncChains.ofCallable(executor, task); + return AsyncChains.ofCallable(executor.delegate(), task); } public DataStore dataStore() @@ -571,15 +492,6 @@ public void abortCurrentOperation() @Override public void shutdown() { - executor.shutdown(); - try - { - executor.awaitTermination(20, TimeUnit.SECONDS); - } - catch (InterruptedException t) - { - throw new RuntimeException("Could not shut down command store " + this); - } } public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore) @@ -591,9 +503,8 @@ public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore // We find a set of dependencies for a range then update CommandsFor to know about them Ranges allRanges = safeStore.ranges().all(); deps.keyDeps.keys().forEach(allRanges, key -> { - // TODO (now): batch register to minimise GC + // TODO (desired): batch register to minimise GC deps.keyDeps.forEach(key, (txnId, txnIdx) -> { - // TODO (desired, efficiency): this can be made more efficient by batching by epoch if (ranges.coordinates(txnId).contains(key)) return; // already coordinates, no need to replicate if (!ranges.allBefore(txnId.epoch()).contains(key)) @@ -614,13 +525,13 @@ public void registerHistoricalTransactions(Deps deps, SafeCommandStore safeStore if (!ranges.allBefore(txnId.epoch()).intersects(range)) return; + // TODO (required): this is potentially not safe - it should not be persisted until we save in journal + // but, preferable to retire historical transactions as a concept entirely, and rely on ExclusiveSyncPoints instead diskCommandsForRanges().mergeHistoricalTransaction(txnId, Ranges.single(range).slice(allRanges), Ranges::with); }); } } - public NavigableMap safeToRead() { return super.safeToRead(); } - public void appendCommands(List diffs, Runnable onFlush) { for (int i = 0; i < diffs.size(); i++) @@ -634,7 +545,7 @@ public void appendCommands(List diffs, Runnable onFlush @VisibleForTesting public Command loadCommand(TxnId txnId) { - return journal.loadCommand(id, txnId); + return journal.loadCommand(id, txnId, unsafeGetRedundantBefore(), unsafeGetDurableBefore()); } public interface Loader @@ -681,7 +592,7 @@ public Promise load(Command command) Command local = command; if (local.status() != Truncated && local.status() != Invalidated) { - Cleanup cleanup = Cleanup.shouldCleanup(AccordCommandStore.this, local, local.participants()); + Cleanup cleanup = Cleanup.shouldCleanup(local, unsafeGetRedundantBefore(), unsafeGetDurableBefore()); switch (cleanup) { case NO: @@ -778,4 +689,111 @@ void loadHistoricalTransactions(List deps) }); } } + + public static class CommandStoreExecutor implements CacheSize + { + final AccordStateCache stateCache; + final SequentialExecutorPlus delegate; + final long threadId; + + CommandStoreExecutor(AccordStateCache stateCache, SequentialExecutorPlus delegate, long threadId) + { + this.stateCache = stateCache; + this.delegate = delegate; + this.threadId = threadId; + } + + CommandStoreExecutor(AccordStateCache stateCache, SequentialExecutorPlus delegate) + { + this.stateCache = stateCache; + this.delegate = delegate; + this.threadId = getThreadId(); + } + + public boolean isInThread() + { + if (!CHECK_THREADS) + return true; + + return threadId == Thread.currentThread().getId(); + } + + public void shutdown() + { + delegate.shutdown(); + } + + public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException + { + return delegate.awaitTermination(timeout, unit); + } + + public Future submit(Runnable task) + { + return delegate.submit(task); + } + + public ExecutorService delegate() + { + return delegate; + } + + public void execute(Runnable command) + { + delegate.submit(command); + } + + private long getThreadId() + { + try + { + return delegate.submit(() -> Thread.currentThread().getId()).get(); + } + catch (InterruptedException e) + { + throw new AssertionError(e); + } + catch (ExecutionException e) + { + throw new RuntimeException(e); + } + } + + @VisibleForTesting + public AccordStateCache cache() + { + return stateCache; + } + + @VisibleForTesting + public void unsafeClearCache() + { + stateCache.unsafeClear(); + } + + @Override + public void setCapacity(long bytes) + { + Invariants.checkState(isInThread()); + stateCache.setCapacity(bytes); + } + + @Override + public long capacity() + { + return stateCache.capacity(); + } + + @Override + public int size() + { + return stateCache.size(); + } + + @Override + public long weightedSize() + { + return stateCache.weightedSize(); + } + } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 60cbcf84ffb8..bc34a7ad50ee 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -17,6 +17,7 @@ */ package org.apache.cassandra.service.accord; +import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import accord.api.Agent; @@ -24,6 +25,7 @@ import accord.api.DataStore; import accord.api.LocalListeners; import accord.api.ProgressLog; +import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.Node; import accord.local.NodeTimeService; @@ -32,33 +34,48 @@ import accord.topology.Topology; import accord.utils.RandomSource; import org.apache.cassandra.cache.CacheSize; +import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.metrics.AccordStateCacheMetrics; import org.apache.cassandra.metrics.CacheSizeMetrics; import org.apache.cassandra.schema.TableId; +import org.apache.cassandra.service.accord.AccordCommandStore.CommandStoreExecutor; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; + public class AccordCommandStores extends CommandStores implements CacheSize { public static final String ACCORD_STATE_CACHE = "AccordStateCache"; private final CacheSizeMetrics cacheSizeMetrics; + private final CommandStoreExecutor[] executors; private long cacheSize; AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, LocalListeners.Factory listenerFactory, - AccordJournal journal) + AccordJournal journal, CommandStoreExecutor[] executors) { super(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, - AccordCommandStore.factory(journal, new AccordStateCacheMetrics(ACCORD_STATE_CACHE))); + AccordCommandStore.factory(journal, id -> executors[id % executors.length])); setCapacity(DatabaseDescriptor.getAccordCacheSizeInMiB() << 20); + this.executors = executors; this.cacheSizeMetrics = new CacheSizeMetrics(ACCORD_STATE_CACHE, this); } static Factory factory(AccordJournal journal) { - return (time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory) -> - new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, journal); + return (time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory) -> { + CommandStoreExecutor[] executors = new CommandStoreExecutor[DatabaseDescriptor.getAccordShardCount()]; + for (int id = 0; id < executors.length; id++) + { + AccordStateCacheMetrics metrics = new AccordStateCacheMetrics(ACCORD_STATE_CACHE); + AccordStateCache stateCache = new AccordStateCache(Stage.READ.executor(), Stage.MUTATION.executor(), 8 << 20, metrics); + executors[id] = new CommandStoreExecutor(stateCache, executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + id + ']')); + } + + return new AccordCommandStores(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, journal, executors); + }; } @Override @@ -96,22 +113,29 @@ public long capacity() @Override public int size() { - return unsafeFoldLeft(0, (size, commandStore) -> size + ((AccordCommandStore) commandStore).size()); + int size = 0; + for (CommandStoreExecutor executor : executors) + size += executor.size(); + return size; } @Override public long weightedSize() { - return unsafeFoldLeft(0L, (size, commandStore) -> size + ((AccordCommandStore) commandStore).weightedSize()); + long size = 0; + for (CommandStoreExecutor executor : executors) + size += executor.weightedSize(); + return size; } synchronized void refreshCacheSizes() { if (count() == 0) return; - long perStore = cacheSize / count(); + long perExecutor = cacheSize / executors.length; // TODO (low priority, safety): we might transiently breach our limit if we increase one store before decreasing another - forEach(commandStore -> ((AccordSafeCommandStore) commandStore).commandStore().setCapacity(perStore)); + for (CommandStoreExecutor executor : executors) + executor.execute(() -> executor.setCapacity(perExecutor)); } @Override @@ -134,6 +158,18 @@ public synchronized Supplier updateTopology(Node node, Topology newT public synchronized void shutdown() { super.shutdown(); + for (CommandStoreExecutor executor : executors) + { + executor.shutdown(); + try + { + executor.awaitTermination(1, TimeUnit.MINUTES); + } + catch (InterruptedException e) + { + throw new RuntimeException(e); + } + } //TODO shutdown isn't useful by itself, we need a way to "wait" as well. Should be AutoCloseable or offer awaitTermination as well (think Shutdownable interface) } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java index f52d7a935ed0..0a2a192aa196 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordConfigurationService.java @@ -412,15 +412,25 @@ public void notifyPostCommit(ClusterMetadata prev, ClusterMetadata next, boolean protected void fetchTopologyInternal(long epoch) { ClusterMetadata metadata = ClusterMetadata.current(); - if (metadata.directory.peerIds().size() < 2) + if (metadata.directory.peerIds().isEmpty()) return; // just let CMS handle it when it's ready // TODO (desired): randomise NodeId first = metadata.directory.peerIds().first(); InetAddressAndPort peer = metadata.directory.getNodeAddresses(first).broadcastAddress; if (FBUtilities.getBroadcastAddressAndPort().equals(peer)) - peer = metadata.directory.getNodeAddresses(metadata.directory.peerIds().higher(first)).broadcastAddress;; - ClusterMetadataService.instance().fetchLogFromPeerOrCMSAsync(metadata, peer, Epoch.create(epoch)); + { + NodeId second = metadata.directory.peerIds().higher(first); + if (second == null) + return; + + peer = metadata.directory.getNodeAddresses(second).broadcastAddress; + } + ClusterMetadataService.instance().fetchLogFromPeerOrCMSAsync(metadata, peer, Epoch.create(epoch)) + .addCallback((success, fail) -> { + if (fail != null) + fetchTopologyInternal(epoch); + }); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 52b65a33a2f3..eb8c0007f26d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -32,6 +32,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import accord.impl.ErasedSafeCommand; +import accord.local.Cleanup; import accord.local.Command; import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; @@ -50,6 +52,7 @@ import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.io.util.File; +import org.apache.cassandra.journal.Compactor; import org.apache.cassandra.journal.Journal; import org.apache.cassandra.journal.Params; import org.apache.cassandra.journal.RecordPointer; @@ -57,8 +60,10 @@ import org.apache.cassandra.net.MessagingService; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; +import org.apache.cassandra.service.accord.JournalKey.JournalKeySupport; import org.apache.cassandra.utils.ExecutorUtils; +import static accord.primitives.SaveStatus.ErasedOrVestigial; import static accord.primitives.Status.Truncated; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; @@ -78,7 +83,7 @@ public class AccordJournal implements IJournal, Shutdownable private static final Set SENTINEL_HOSTS = Collections.singleton(0); - static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[23]); + static final ThreadLocal keyCRCBytes = ThreadLocal.withInitial(() -> new byte[JournalKeySupport.TOTAL_SIZE]); private final Journal journal; private final AccordJournalTable journalTable; @@ -128,6 +133,11 @@ public Params configuration() return params; } + public Compactor compactor() + { + return journal.compactor(); + } + @Override public boolean isTerminated() { @@ -165,50 +175,59 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted } @Override - public Command loadCommand(int commandStoreId, TxnId txnId) + public Command loadCommand(int commandStoreId, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore) { - return loadDiffs(commandStoreId, txnId).construct(); + SavedCommand.Builder builder = loadDiffs(commandStoreId, txnId); + Cleanup cleanup = builder.shouldCleanup(redundantBefore, durableBefore); + switch (cleanup) + { + case EXPUNGE_PARTIAL: + case EXPUNGE: + case ERASE: + return ErasedSafeCommand.erased(txnId, ErasedOrVestigial); + } + return builder.construct(); } @VisibleForTesting public RedundantBefore loadRedundantBefore(int store) { - RedundantBeforeAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.REDUNDANT_BEFORE, store)); + RedundantBeforeAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.REDUNDANT_BEFORE, store)); return accumulator.get(); } @Override public DurableBefore loadDurableBefore(int store) { - DurableBeforeAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.DURABLE_BEFORE, store)); + DurableBeforeAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, store)); return accumulator.get(); } @Override public NavigableMap loadBootstrapBeganAt(int store) { - IdentityAccumulator> accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store)); + IdentityAccumulator> accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store)); return accumulator.get(); } @Override public NavigableMap loadSafeToRead(int store) { - IdentityAccumulator> accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.SAFE_TO_READ, store)); + IdentityAccumulator> accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.SAFE_TO_READ, store)); return accumulator.get(); } @Override public CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int store) { - IdentityAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store)); + IdentityAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store)); return accumulator.get(); } @Override public List loadHistoricalTransactions(int store) { - HistoricalTransactionsAccumulator accumulator = readAll(new JournalKey(Timestamp.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store)); + HistoricalTransactionsAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store)); return accumulator.get(); } @@ -234,18 +253,18 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie { RecordPointer pointer = null; // TODO: avoid allocating keys - if (fieldUpdates.redundantBefore != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.REDUNDANT_BEFORE, store), fieldUpdates.redundantBefore); - if (fieldUpdates.durableBefore != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.DURABLE_BEFORE, store), fieldUpdates.durableBefore); - if (fieldUpdates.bootstrapBeganAt != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store), fieldUpdates.bootstrapBeganAt); - if (fieldUpdates.safeToRead != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.SAFE_TO_READ, store), fieldUpdates.safeToRead); - if (fieldUpdates.rangesForEpoch != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store), fieldUpdates.rangesForEpoch); - if (fieldUpdates.historicalTransactions != null) - pointer = appendInternal(new JournalKey(Timestamp.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store), fieldUpdates.historicalTransactions); + if (fieldUpdates.addRedundantBefore != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.REDUNDANT_BEFORE, store), fieldUpdates.addRedundantBefore); + if (fieldUpdates.addDurableBefore != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, store), fieldUpdates.addDurableBefore); + if (fieldUpdates.newBootstrapBeganAt != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store), fieldUpdates.newBootstrapBeganAt); + if (fieldUpdates.newSafeToRead != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.SAFE_TO_READ, store), fieldUpdates.newSafeToRead); + if (fieldUpdates.newRangesForEpoch != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.RANGES_FOR_EPOCH, store), fieldUpdates.newRangesForEpoch); + if (fieldUpdates.addHistoricalTransactions != null) + pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.HISTORICAL_TRANSACTIONS, store), fieldUpdates.addHistoricalTransactions); if (onFlush == null) return; @@ -260,11 +279,23 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) { JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, commandStoreId); - SavedCommand.Builder builder = new SavedCommand.Builder(); + SavedCommand.Builder builder = new SavedCommand.Builder(txnId); journalTable.readAll(key, builder::deserializeNext); return builder; } + public List loadSeparateDiffs(int commandStoreId, TxnId txnId) + { + JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, commandStoreId); + List builders = new ArrayList<>(); + journalTable.readAll(key, (in, version) -> { + SavedCommand.Builder builder = new SavedCommand.Builder(txnId); + builder.deserializeNext(in, version); + builders.add(builder); + }); + return builders; + } + private BUILDER readAll(JournalKey key) { BUILDER builder = (BUILDER) key.type.serializer.mergerFor(key); @@ -281,9 +312,9 @@ private RecordPointer appendInternal(JournalKey key, Object write) } @VisibleForTesting - public void closeCurrentSegmentForTesting() + public void closeCurrentSegmentForTestingIfNonEmpty() { - journal.closeCurrentSegmentForTesting(); + journal.closeCurrentSegmentForTestingIfNonEmpty(); } public void sanityCheck(int commandStoreId, Command orig) @@ -313,7 +344,7 @@ public void runCompactorForTesting() public void replay() { - // TODO: optimize replay memory footprint + // TODO (expected): optimize replay memory footprint class ToApply { final JournalKey key; @@ -331,11 +362,11 @@ class ToApply { isReplay.set(true); - JournalKey key = null; + JournalKey key; SavedCommand.Builder builder = new SavedCommand.Builder(); while ((key = iter.key()) != null) { - builder.clear(); + builder.reset(key.id); if (key.type != JournalKey.Type.COMMAND_DIFF) { // TODO (required): add "skip" for the key to avoid getting stuck @@ -383,4 +414,62 @@ class ToApply isReplay.set(false); } } + + // TODO: this is here temporarily; for debugging purposes + @VisibleForTesting + public void checkAllCommands() + { + try (AccordJournalTable.KeyOrderIterator iter = journalTable.readAll()) + { + IAccordService.CompactionInfo compactionInfo = AccordService.instance().getCompactionInfo(); + JournalKey key; + SavedCommand.Builder builder = new SavedCommand.Builder(); + while ((key = iter.key()) != null) + { + builder.reset(key.id); + if (key.type != JournalKey.Type.COMMAND_DIFF) + { + // TODO (required): add "skip" for the key to avoid getting stuck + iter.readAllForKey(key, (segment, position, key1, buffer, hosts, userVersion) -> {}); + continue; + } + + JournalKey finalKey = key; + List pointers = new ArrayList<>(); + try + { + iter.readAllForKey(key, (segment, position, local, buffer, hosts, userVersion) -> { + pointers.add(new RecordPointer(segment, position)); + Invariants.checkState(finalKey.equals(local)); + try (DataInputBuffer in = new DataInputBuffer(buffer, false)) + { + builder.deserializeNext(in, userVersion); + } + catch (IOException e) + { + // can only throw if serializer is buggy + throw new RuntimeException(e); + } + }); + + Cleanup cleanup = builder.shouldCleanup(compactionInfo.redundantBefores.get(key.commandStoreId), compactionInfo.durableBefores.get(key.commandStoreId)); + switch (cleanup) + { + case ERASE: + case EXPUNGE: + case EXPUNGE_PARTIAL: + case VESTIGIAL: + continue; + } + builder.construct(); + } + catch (Throwable t) + { + throw new RuntimeException(String.format("Caught an exception after iterating over: %s", pointers), + t); + } + } + + } + } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java index f06ccd4c4fa4..289b5c1b9841 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -63,7 +63,7 @@ public static class CommandDiffSerializer @Override public SavedCommand.Builder mergerFor(JournalKey journalKey) { - return new SavedCommand.Builder(); + return new SavedCommand.Builder(journalKey.id); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 5190d723d2bf..acf0add780b7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -1395,7 +1395,6 @@ public static EpochDiskState markRemoteTopologySync(Node.Id node, long epoch, Ep "SET remote_sync_complete = remote_sync_complete + ? WHERE epoch = ?"; executeInternal(cql, Collections.singleton(node.id), epoch); - flush(Topologies); return diskState; } @@ -1406,7 +1405,6 @@ public static EpochDiskState markClosed(Ranges ranges, long epoch, EpochDiskStat "SET closed = closed + ? WHERE epoch = ?"; executeInternal(cql, KeySerializers.rangesToBlobMap(ranges), epoch); - flush(Topologies); return diskState; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java index ff86a41ed066..d9351f8daea7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java +++ b/src/java/org/apache/cassandra/service/accord/AccordObjectSizes.java @@ -292,7 +292,7 @@ private static CommonAttributes attrs(boolean hasDeps, boolean hasTxn) private static final Result EMPTY_RESULT = new Result() {}; final static long NOT_DEFINED = measure(Command.SerializerSupport.notDefined(attrs(false, false), Ballot.ZERO)); - final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, null));; + final static long PREACCEPTED = measure(Command.SerializerSupport.preaccepted(attrs(false, true), EMPTY_TXNID, Ballot.ZERO));; final static long ACCEPTED = measure(Command.SerializerSupport.accepted(attrs(true, false), SaveStatus.Accepted, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO)); final static long COMMITTED = measure(Command.SerializerSupport.committed(attrs(true, true), SaveStatus.Committed, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, null)); final static long EXECUTED = measure(Command.SerializerSupport.executed(attrs(true, true), SaveStatus.Applied, EMPTY_TXNID, Ballot.ZERO, Ballot.ZERO, WaitingOn.empty(Domain.Key), EMPTY_WRITES, EMPTY_RESULT)); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index fae5e4634f5d..34cb57ed5b1a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -284,45 +284,78 @@ public String toString() @Override public void upsertRedundantBefore(RedundantBefore addRedundantBefore) { - // TODO (now): this is a temporary measure, see comment on AccordJournalValueSerializers; upsert instead + // TODO (required): this is a temporary measure, see comment on AccordJournalValueSerializers; upsert instead // when modifying, only modify together with AccordJournalValueSerializers - ensureFieldUpdates().redundantBefore = RedundantBefore.merge(commandStore.redundantBefore(), addRedundantBefore); - super.upsertRedundantBefore(addRedundantBefore); + ensureFieldUpdates().newRedundantBefore = ensureFieldUpdates().addRedundantBefore = RedundantBefore.merge(redundantBefore(), addRedundantBefore); } @Override public void setBootstrapBeganAt(NavigableMap newBootstrapBeganAt) { - ensureFieldUpdates().bootstrapBeganAt = newBootstrapBeganAt; - super.setBootstrapBeganAt(newBootstrapBeganAt); + ensureFieldUpdates().newBootstrapBeganAt = newBootstrapBeganAt; } @Override public void upsertDurableBefore(DurableBefore addDurableBefore) { - ensureFieldUpdates().durableBefore = addDurableBefore; - super.upsertDurableBefore(addDurableBefore); + ensureFieldUpdates().addDurableBefore = addDurableBefore; } @Override public void setSafeToRead(NavigableMap newSafeToRead) { - ensureFieldUpdates().safeToRead = newSafeToRead; - super.setSafeToRead(newSafeToRead); + ensureFieldUpdates().newSafeToRead = newSafeToRead; } @Override public void setRangesForEpoch(CommandStores.RangesForEpoch rangesForEpoch) { - ensureFieldUpdates().rangesForEpoch = rangesForEpoch.snapshot(); - super.setRangesForEpoch(rangesForEpoch); + ensureFieldUpdates().newRangesForEpoch = rangesForEpoch.snapshot(); ranges = rangesForEpoch; } + @Override + public NavigableMap bootstrapBeganAt() + { + if (fieldUpdates != null && fieldUpdates.newBootstrapBeganAt != null) + return fieldUpdates.newBootstrapBeganAt; + + return super.bootstrapBeganAt(); + } + + @Override + public NavigableMap safeToReadAt() + { + if (fieldUpdates != null && fieldUpdates.newSafeToRead != null) + return fieldUpdates.newSafeToRead; + + return super.safeToReadAt(); + } + + @Override + public RedundantBefore redundantBefore() + { + if (fieldUpdates != null && fieldUpdates.newRedundantBefore != null) + return fieldUpdates.newRedundantBefore; + + return super.redundantBefore(); + } + + @Override + public DurableBefore durableBefore() + { + if (fieldUpdates != null && fieldUpdates.newDurableBefore != null) + return fieldUpdates.newDurableBefore; + + return super.durableBefore(); + } + @Override protected void registerHistoricalTransactions(Deps deps) { - ensureFieldUpdates().historicalTransactions = deps; + ensureFieldUpdates().addHistoricalTransactions = deps; + // TODO (required): it is potentially unsafe to propagate this synchronously, as if we fail to write to the journal we may be in an inconsistent state + // however, we can and should retire the concept of historical transactions in favour of ExclusiveSyncPoints ensuring their deps are known super.registerHistoricalTransactions(deps); } @@ -337,13 +370,34 @@ public FieldUpdates fieldUpdates() return fieldUpdates; } + public void postExecute() + { + if (fieldUpdates == null) + return; + + if (fieldUpdates.newRedundantBefore != null) + super.unsafeSetRedundantBefore(fieldUpdates.newRedundantBefore); + + if (fieldUpdates.newDurableBefore != null) + super.unsafeSetDurableBefore(fieldUpdates.newDurableBefore); + + if (fieldUpdates.newBootstrapBeganAt != null) + super.setBootstrapBeganAt(fieldUpdates.newBootstrapBeganAt); + + if (fieldUpdates.newSafeToRead != null) + super.setSafeToRead(fieldUpdates.newSafeToRead); + + if (fieldUpdates.newRangesForEpoch != null) + super.setRangesForEpoch(ranges); + } + public static class FieldUpdates { - public RedundantBefore redundantBefore; - public DurableBefore durableBefore; - public NavigableMap bootstrapBeganAt; - public NavigableMap safeToRead; - public RangesForEpoch.Snapshot rangesForEpoch; - public Deps historicalTransactions; + public RedundantBefore addRedundantBefore, newRedundantBefore; + public DurableBefore addDurableBefore, newDurableBefore; + public NavigableMap newBootstrapBeganAt; + public NavigableMap newSafeToRead; + public RangesForEpoch.Snapshot newRangesForEpoch; + public Deps addHistoricalTransactions; } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java index 1dc8389c2083..f94510b8b8f7 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -74,7 +74,7 @@ public Collection> compact(Collection> compact(Collection) key.type.serializer; builder = serializer.mergerFor(key); + lastOffset = -1; + lastDescriptor = -1; } boolean advanced; @@ -106,6 +108,14 @@ public Collection> compact(Collection= lastDescriptor, + "Descriptors were accessed out of order: %d was accessed after %d", reader.descriptor.timestamp, lastDescriptor); + Invariants.checkState(reader.descriptor.timestamp != lastDescriptor || + reader.offset() > lastOffset, + "Offsets within %s were accessed out of order: %d was accessed after %s", reader.offset(), lastOffset); + } serializer.deserialize(key, builder, in, reader.descriptor.userVersion); lastDescriptor = reader.descriptor.timestamp; lastOffset = reader.offset(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 4b9533755954..87872666bd65 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -31,7 +31,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -298,7 +297,7 @@ public void receive(Message> message) {} @Override public CompactionInfo getCompactionInfo() { - return new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); + return new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>()); } @Override @@ -534,13 +533,12 @@ public static List tcmLoadRange(long min, long max) { List afterLoad = ClusterMetadataService.instance().processor().reconstructFull(Epoch.create(min), Epoch.create(max)); if (Invariants.isParanoid()) - assert afterLoad.get(0).epoch.getEpoch() == min : String.format("Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); + Invariants.checkState(afterLoad.get(0).epoch.getEpoch() == min, "Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); while (!afterLoad.isEmpty() && afterLoad.get(0).epoch.getEpoch() < min) afterLoad.remove(0); - assert !afterLoad.isEmpty() : String.format("TCM was unable to return the needed epochs: %d -> %d", min, max); - assert afterLoad.get(0).epoch.getEpoch() == min : String.format("Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); - if (max != Long.MAX_VALUE) - assert afterLoad.get(afterLoad.size() - 1).epoch.getEpoch() == max : String.format("Unexpected epoch: expected %d but given %d", max, afterLoad.get(afterLoad.size() - 1).epoch.getEpoch()); + Invariants.checkState(!afterLoad.isEmpty(), "TCM was unable to return the needed epochs: %d -> %d", min, max); + Invariants.checkState(afterLoad.get(0).epoch.getEpoch() == min, "Unexpected epoch: expected %d but given %d", min, afterLoad.get(0).epoch.getEpoch()); + Invariants.checkState(max == Long.MAX_VALUE || afterLoad.get(afterLoad.size() - 1).epoch.getEpoch() == max, "Unexpected epoch: expected %d but given %d", max, afterLoad.get(afterLoad.size() - 1).epoch.getEpoch()); return afterLoad; } @@ -1262,17 +1260,17 @@ public AccordConfigurationService configurationService() public CompactionInfo getCompactionInfo() { Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); + Int2ObjectHashMap durableBefores = new Int2ObjectHashMap<>(); Int2ObjectHashMap ranges = new Int2ObjectHashMap<>(); - AtomicReference durableBefore = new AtomicReference<>(DurableBefore.EMPTY); AsyncChains.getBlockingAndRethrow(node.commandStores().forEach(safeStore -> { synchronized (redundantBefores) { - redundantBefores.put(safeStore.commandStore().id(), safeStore.commandStore().redundantBefore()); + redundantBefores.put(safeStore.commandStore().id(), safeStore.redundantBefore()); ranges.put(safeStore.commandStore().id(), safeStore.ranges()); + durableBefores.put(safeStore.commandStore().id(), safeStore.durableBefore()); } - durableBefore.set(DurableBefore.merge(durableBefore.get(), safeStore.commandStore().durableBefore())); })); - return new CompactionInfo(redundantBefores, ranges, durableBefore.get()); + return new CompactionInfo(redundantBefores, ranges, durableBefores); } @Override diff --git a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java index 04f618e33819..8439a05f2eae 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordStateCache.java +++ b/src/java/org/apache/cassandra/service/accord/AccordStateCache.java @@ -29,13 +29,13 @@ import java.util.stream.Stream; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.utils.IntrusiveLinkedList; import accord.utils.Invariants; import accord.utils.async.AsyncChains; +import org.agrona.collections.Int2ObjectHashMap; import org.apache.cassandra.cache.CacheSize; import org.apache.cassandra.concurrent.ExecutorPlus; import org.apache.cassandra.metrics.AccordStateCacheMetrics; @@ -90,7 +90,9 @@ public ImmutableStats(Stats stats) } } - private ImmutableList> instances = ImmutableList.of(); + // TODO (required): cleanup on drop table, or else share between command stores + private Int2ObjectHashMap> instances = new Int2ObjectHashMap<>(); + private int nextIndex; private final ExecutorPlus loadExecutor, saveExecutor; @@ -243,13 +245,14 @@ public > Instance instance( ToLongFunction heapEstimator, AccordCachingState.Factory nodeFactory) { - int index = instances.size(); - + int index = ++nextIndex; Instance instance = new Instance<>(index, keyClass, safeRefFactory, loadFunction, saveFunction, validateFunction, heapEstimator, nodeFactory); - instances = ImmutableList.>builder().addAll(instances).add(instance).build(); + Int2ObjectHashMap> newInstances = new Int2ObjectHashMap<>(instances); + newInstances.put(index, instance); + instances = newInstances; return instance; } @@ -268,7 +271,7 @@ public > Instance instance( public Collection> instances() { - return instances; + return instances.values(); } public interface Listener @@ -701,7 +704,7 @@ void unsafeClear() { bytesCached = 0; metrics.reset();; - instances.forEach(instance -> { + instances.values().forEach(instance -> { instance.cache.forEach((k, v) -> Invariants.checkState(v.references == 0)); instance.cache.clear(); instance.bytesCached = 0; @@ -739,7 +742,7 @@ public void awaitSaveResults() private int cacheSize() { int size = 0; - for (Instance instance : instances) + for (Instance instance : instances.values()) size += instance.cache.size(); return size; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordTopology.java b/src/java/org/apache/cassandra/service/accord/AccordTopology.java index 2489929c1672..bced75a40172 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordTopology.java +++ b/src/java/org/apache/cassandra/service/accord/AccordTopology.java @@ -43,6 +43,7 @@ import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.fastpath.FastPathStrategy; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ClusterMetadataService; import org.apache.cassandra.tcm.Epoch; import org.apache.cassandra.tcm.membership.Directory; import org.apache.cassandra.tcm.membership.NodeId; @@ -333,6 +334,7 @@ public static void awaitTopologyReadiness(Keyspaces.KeyspacesDiff keyspacesDiff, try { + ClusterMetadataService.instance().fetchLogFromCMS(epoch); AccordService.instance().epochReady(epoch).get(DatabaseDescriptor.getTransactionTimeout(MILLISECONDS), MILLISECONDS); } catch (InterruptedException e) diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 9bda1950e910..5d8747d4a506 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -43,8 +43,6 @@ public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper) @Override public void doVerb(Message message) throws IOException { - // TODO (desired): need a non-blocking way to inform CMS of an unknown epoch and add callback to it's receipt -// ClusterMetadataService.instance().maybeCatchup(message.epoch()); logger.trace("Receiving {} from {}", message.payload, message.from()); T request = message.payload; diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index bdf1aa98886f..1f5088365daa 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -189,7 +189,7 @@ private NavigableMap load(Ranges ranges, Map cac { //TODO (now): this logic is kinda duplicate of org.apache.cassandra.service.accord.CommandsForRange.mapReduce // should figure out if this can be improved... also what is correct? - DurableBefore durableBefore = store.durableBefore(); + DurableBefore durableBefore = store.unsafeGetDurableBefore(); NavigableMap map = new TreeMap<>(); for (TxnId txnId : possibleTxns) { diff --git a/src/java/org/apache/cassandra/service/accord/IAccordService.java b/src/java/org/apache/cassandra/service/accord/IAccordService.java index 1be920bc1bde..e5e2d125f1cc 100644 --- a/src/java/org/apache/cassandra/service/accord/IAccordService.java +++ b/src/java/org/apache/cassandra/service/accord/IAccordService.java @@ -134,17 +134,17 @@ public AsyncTxnResult(@Nonnull TxnId txnId) class CompactionInfo { - static final Supplier NO_OP = () -> new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), DurableBefore.EMPTY); + static final Supplier NO_OP = () -> new CompactionInfo(new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>(), new Int2ObjectHashMap<>()); public final Int2ObjectHashMap redundantBefores; + public final Int2ObjectHashMap durableBefores; public final Int2ObjectHashMap ranges; - public final DurableBefore durableBefore; - public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2ObjectHashMap ranges, DurableBefore durableBefore) + public CompactionInfo(Int2ObjectHashMap redundantBefores, Int2ObjectHashMap ranges, Int2ObjectHashMap durableBefores) { this.redundantBefores = redundantBefores; this.ranges = ranges; - this.durableBefore = durableBefore; + this.durableBefores = durableBefores; } } diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index 721d69c5a665..e327c1a5ce31 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -32,7 +32,7 @@ public interface IJournal { - Command loadCommand(int commandStoreId, TxnId txnId); + Command loadCommand(int commandStoreId, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore); RedundantBefore loadRedundantBefore(int commandStoreId); DurableBefore loadDurableBefore(int commandStoreId); diff --git a/src/java/org/apache/cassandra/service/accord/JournalKey.java b/src/java/org/apache/cassandra/service/accord/JournalKey.java index d3775869da98..b8b09ef236b8 100644 --- a/src/java/org/apache/cassandra/service/accord/JournalKey.java +++ b/src/java/org/apache/cassandra/service/accord/JournalKey.java @@ -25,6 +25,7 @@ import accord.local.Node.Id; import accord.primitives.Timestamp; +import accord.primitives.TxnId; import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; @@ -40,22 +41,21 @@ import static org.apache.cassandra.db.TypeSizes.BYTE_SIZE; import static org.apache.cassandra.db.TypeSizes.INT_SIZE; import static org.apache.cassandra.db.TypeSizes.LONG_SIZE; -import static org.apache.cassandra.db.TypeSizes.SHORT_SIZE; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RangesForEpochSerializer; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.SafeToReadSerializer; public final class JournalKey { public final Type type; - public final Timestamp timestamp; + public final TxnId id; public final int commandStoreId; - public JournalKey(Timestamp timestamp, Type type, int commandStoreId) + public JournalKey(TxnId id, Type type, int commandStoreId) { Invariants.nonNull(type); - Invariants.nonNull(timestamp); + Invariants.nonNull(id); this.type = type; - this.timestamp = timestamp; + this.id = id; this.commandStoreId = commandStoreId; } @@ -67,86 +67,85 @@ public JournalKey(Timestamp timestamp, Type type, int commandStoreId) * when ordering timestamps. This is done for more precise elimination of candidate * segments by min/max record key in segment. */ - public static final KeySupport SUPPORT = new KeySupport<>() + public static final JournalKeySupport SUPPORT = new JournalKeySupport(); + + public static final class JournalKeySupport implements KeySupport { - private static final int HLC_OFFSET = 0; - private static final int EPOCH_AND_FLAGS_OFFSET = HLC_OFFSET + LONG_SIZE; - private static final int NODE_OFFSET = EPOCH_AND_FLAGS_OFFSET + LONG_SIZE; + private static final int MSB_OFFSET = 0; + private static final int LSB_OFFSET = MSB_OFFSET + LONG_SIZE; + private static final int NODE_OFFSET = LSB_OFFSET + LONG_SIZE; private static final int TYPE_OFFSET = NODE_OFFSET + INT_SIZE; private static final int CS_ID_OFFSET = TYPE_OFFSET + BYTE_SIZE; + // TODO (required): revisit commandStoreId - this can go arbitrarily high so may want to use vint + public static final int TOTAL_SIZE = CS_ID_OFFSET + INT_SIZE; @Override public int serializedSize(int userVersion) { - return LONG_SIZE // timestamp.hlc() - + 6 // timestamp.epoch() - + 2 // timestamp.flags() - + INT_SIZE // timestamp.node - + BYTE_SIZE // type - + SHORT_SIZE; // commandStoreId + return TOTAL_SIZE; } @Override public void serialize(JournalKey key, DataOutputPlus out, int userVersion) throws IOException { - serializeTimestamp(key.timestamp, out); + serializeTxnId(key.id, out); out.writeByte(key.type.id); - out.writeShort(key.commandStoreId); + out.writeInt(key.commandStoreId); } private void serialize(JournalKey key, byte[] out) { - serializeTimestamp(key.timestamp, out); - out[20] = (byte) (key.type.id & 0xFF); - ByteArrayUtil.putShort(out, 21, (short) key.commandStoreId); + serializeTxnId(key.id, out); + out[TYPE_OFFSET] = (byte) (key.type.id & 0xFF); + ByteArrayUtil.putInt(out, CS_ID_OFFSET, key.commandStoreId); } @Override public JournalKey deserialize(DataInputPlus in, int userVersion) throws IOException { - Timestamp timestamp = deserializeTimestamp(in); + TxnId txnId = deserializeTxnId(in); int type = in.readByte(); - int commandStoreId = in.readShort(); - return new JournalKey(timestamp, Type.fromId(type), commandStoreId); + int commandStoreId = in.readInt(); + return new JournalKey(txnId, Type.fromId(type), commandStoreId); } @Override public JournalKey deserialize(ByteBuffer buffer, int position, int userVersion) { - Timestamp timestamp = deserializeTimestamp(buffer, position); + TxnId txnId = deserializeTxnId(buffer, position); int type = buffer.get(position + TYPE_OFFSET); - int commandStoreId = buffer.getShort(position + CS_ID_OFFSET); - return new JournalKey(timestamp, Type.fromId(type), commandStoreId); + int commandStoreId = buffer.getInt(position + CS_ID_OFFSET); + return new JournalKey(txnId, Type.fromId(type), commandStoreId); } - private void serializeTimestamp(Timestamp timestamp, DataOutputPlus out) throws IOException + private void serializeTxnId(TxnId txnId, DataOutputPlus out) throws IOException { - out.writeLong(timestamp.hlc()); - out.writeLong(epochAndFlags(timestamp)); - out.writeInt(timestamp.node.id); + out.writeLong(txnId.msb); + out.writeLong(txnId.lsb); + out.writeInt(txnId.node.id); } - private Timestamp deserializeTimestamp(DataInputPlus in) throws IOException + private TxnId deserializeTxnId(DataInputPlus in) throws IOException { - long hlc = in.readLong(); - long epochAndFlags = in.readLong(); + long msb = in.readLong(); + long lsb = in.readLong(); int nodeId = in.readInt(); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + return TxnId.fromBits(msb, lsb, new Id(nodeId)); } - private void serializeTimestamp(Timestamp timestamp, byte[] out) + private void serializeTxnId(TxnId txnId, byte[] out) { - ByteArrayUtil.putLong(out, 0, timestamp.hlc()); - ByteArrayUtil.putLong(out, 8, epochAndFlags(timestamp)); - ByteArrayUtil.putInt(out, 16, timestamp.node.id); + ByteArrayUtil.putLong(out, MSB_OFFSET, txnId.msb); + ByteArrayUtil.putLong(out, LSB_OFFSET, txnId.lsb); + ByteArrayUtil.putInt(out, NODE_OFFSET, txnId.node.id); } - private Timestamp deserializeTimestamp(ByteBuffer buffer, int position) + private TxnId deserializeTxnId(ByteBuffer buffer, int position) { - long hlc = buffer.getLong(position + HLC_OFFSET); - long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); + long msb = buffer.getLong(position + MSB_OFFSET); + long lsb = buffer.getLong(position + LSB_OFFSET); int nodeId = buffer.getInt(position + NODE_OFFSET); - return Timestamp.fromValues(epoch(epochAndFlags), hlc, flags(epochAndFlags), new Id(nodeId)); + return TxnId.fromBits(msb, lsb, new Id(nodeId)); } @Override @@ -160,64 +159,41 @@ public void updateChecksum(Checksum crc, JournalKey key, int userVersion) @Override public int compareWithKeyAt(JournalKey k, ByteBuffer buffer, int position, int userVersion) { - int cmp = compareWithTimestampAt(k.timestamp, buffer, position); + int cmp = compareWithTxnIdAt(k.id, buffer, position); if (cmp != 0) return cmp; byte type = buffer.get(position + TYPE_OFFSET); cmp = Byte.compare((byte) k.type.id, type); if (cmp != 0) return cmp; - short commandStoreId = buffer.getShort(position + CS_ID_OFFSET); - cmp = Short.compare((byte) k.commandStoreId, commandStoreId); + int commandStoreId = buffer.getInt(position + CS_ID_OFFSET); + cmp = Integer.compare(k.commandStoreId, commandStoreId); return cmp; } - private int compareWithTimestampAt(Timestamp timestamp, ByteBuffer buffer, int position) + private int compareWithTxnIdAt(TxnId txnId, ByteBuffer buffer, int position) { - long hlc = buffer.getLong(position + HLC_OFFSET); - int cmp = Long.compareUnsigned(timestamp.hlc(), hlc); + long msb = buffer.getLong(position + MSB_OFFSET); + int cmp = Timestamp.compareMsb(txnId.msb, msb); if (cmp != 0) return cmp; - long epochAndFlags = buffer.getLong(position + EPOCH_AND_FLAGS_OFFSET); - cmp = Long.compareUnsigned(epochAndFlags(timestamp), epochAndFlags); + long lsb = buffer.getLong(position + LSB_OFFSET); + cmp = Timestamp.compareLsb(txnId.lsb, lsb); if (cmp != 0) return cmp; int nodeId = buffer.getInt(position + NODE_OFFSET); - cmp = Integer.compareUnsigned(timestamp.node.id, nodeId); + cmp = Integer.compare(txnId.node.id, nodeId); return cmp; } @Override public int compare(JournalKey k1, JournalKey k2) { - int cmp = compare(k1.timestamp, k2.timestamp); + int cmp = k1.id.compareTo(k2.id); if (cmp == 0) cmp = Byte.compare((byte) k1.type.id, (byte) k2.type.id); - if (cmp == 0) cmp = Short.compare((short) k1.commandStoreId, (short) k2.commandStoreId); + if (cmp == 0) cmp = Integer.compare(k1.commandStoreId, k2.commandStoreId); return cmp; } - - private int compare(Timestamp timestamp1, Timestamp timestamp2) - { - int cmp = Long.compareUnsigned(timestamp1.hlc(), timestamp2.hlc()); - if (cmp == 0) cmp = Long.compareUnsigned(epochAndFlags(timestamp1), epochAndFlags(timestamp2)); - if (cmp == 0) cmp = Integer.compareUnsigned(timestamp1.node.id, timestamp2.node.id); - return cmp; - } - - private long epochAndFlags(Timestamp timestamp) - { - return (timestamp.epoch() << 16) | (long) timestamp.flags(); - } - - private long epoch(long epochAndFlags) - { - return epochAndFlags >>> 16; - } - - private int flags(long epochAndFlags) - { - return (int) (epochAndFlags & ((1 << 16) - 1)); - } }; @Override @@ -230,7 +206,7 @@ public boolean equals(Object other) boolean equals(JournalKey other) { - return this.timestamp.equals(other.timestamp) && + return this.id.equals(other.id) && this.type == other.type && this.commandStoreId == other.commandStoreId; } @@ -238,13 +214,13 @@ boolean equals(JournalKey other) @Override public int hashCode() { - return Objects.hash(timestamp, type, commandStoreId); + return Objects.hash(id, type, commandStoreId); } public String toString() { return "Key{" + - "timestamp=" + timestamp + + "id=" + id + "type=" + type + ", commandStoreId=" + commandStoreId + '}'; diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 75348c0e1533..209208989f28 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -26,8 +26,11 @@ import com.google.common.annotations.VisibleForTesting; import accord.api.Result; +import accord.local.Cleanup; import accord.local.Command; import accord.local.CommonAttributes; +import accord.local.DurableBefore; +import accord.local.RedundantBefore; import accord.local.StoreParticipants; import accord.primitives.Ballot; import accord.primitives.PartialDeps; @@ -37,6 +40,7 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.primitives.Writes; +import accord.utils.Invariants; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.util.DataOutputPlus; @@ -46,9 +50,12 @@ import org.apache.cassandra.service.accord.serializers.WaitingOnSerializer; import org.apache.cassandra.utils.Throwables; +import static accord.local.Cleanup.NO; +import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; import static accord.primitives.Known.KnownDeps.DepsErased; import static accord.primitives.Known.KnownDeps.DepsUnknown; import static accord.primitives.Known.KnownDeps.NoDeps; +import static accord.primitives.Status.Durability.NotDurable; import static accord.utils.Invariants.illegalState; public class SavedCommand @@ -56,7 +63,6 @@ public class SavedCommand // This enum is order-dependent public enum Fields { - TXN_ID, EXECUTE_AT, EXECUTES_AT_LEAST, SAVE_STATUS, @@ -68,6 +74,10 @@ public enum Fields PARTIAL_DEPS, WAITING_ON, WRITES, + CLEANUP + ; + + public static final Fields[] FIELDS = values(); } // TODO: maybe rename this and enclosing classes? @@ -115,7 +125,6 @@ public TxnId key() } } - public static ByteBuffer asSerializedDiff(Command after, int userVersion) throws IOException { try (DataOutputBuffer out = new DataOutputBuffer()) @@ -156,8 +165,6 @@ public static void serialize(Command before, Command after, DataOutputPlus out, out.writeInt(flags); // We encode all changed fields unless their value is null - if (getFieldChanged(Fields.TXN_ID, flags) && after.txnId() != null) - CommandSerializers.txnId.serialize(after.txnId(), out, userVersion); if (getFieldChanged(Fields.EXECUTE_AT, flags) && after.executeAt() != null) CommandSerializers.timestamp.serialize(after.executeAt(), out, userVersion); // TODO (desired): check if this can fold into executeAt @@ -198,7 +205,6 @@ static int getFlags(Command before, Command after) { int flags = 0; - flags = collectFlags(before, after, Command::txnId, true, Fields.TXN_ID, flags); flags = collectFlags(before, after, Command::executeAt, true, Fields.EXECUTE_AT, flags); flags = collectFlags(before, after, Command::executesAtLeast, true, Fields.EXECUTES_AT_LEAST, flags); flags = collectFlags(before, after, Command::saveStatus, false, Fields.SAVE_STATUS, flags); @@ -299,13 +305,18 @@ public static class Builder SavedCommand.WaitingOnProvider waitingOn; Writes writes; Result result; + Cleanup cleanup; boolean nextCalled; int count; + public Builder(TxnId txnId) + { + init(txnId); + } + public Builder() { - clear(); } public TxnId txnId() @@ -376,28 +387,45 @@ public Result result() public void clear() { flags = 0; - txnId = null; executeAt = null; + executeAtLeast = null; saveStatus = null; durability = null; - acceptedOrCommitted = Ballot.ZERO; + acceptedOrCommitted = null; promised = null; participants = null; partialTxn = null; partialDeps = null; - waitingOn = (txn, deps) -> null; + waitingOnBytes = null; + waitingOn = null; writes = null; - result = CommandSerializers.APPLIED; + result = null; + cleanup = null; nextCalled = false; count = 0; } + public void reset(TxnId txnId) + { + clear(); + init(txnId); + } + + public void init(TxnId txnId) + { + this.txnId = txnId; + durability = NotDurable; + acceptedOrCommitted = promised = Ballot.ZERO; + waitingOn = (txn, deps) -> null; + result = CommandSerializers.APPLIED; + } + public boolean isEmpty() { return !nextCalled; @@ -408,19 +436,62 @@ public int count() return count; } - public Builder expungePartial() + public Cleanup shouldCleanup(RedundantBefore redundantBefore, DurableBefore durableBefore) + { + if (!nextCalled) + return NO; + + if (saveStatus == null || participants == null) + return Cleanup.NO; + + Cleanup cleanup = Cleanup.shouldCleanup(txnId, saveStatus, durability, participants, redundantBefore, durableBefore); + if (this.cleanup != null && this.cleanup.compareTo(cleanup) > 0) + cleanup = this.cleanup; + return cleanup; + } + + // TODO (expected): avoid allocating new builder + public Builder maybeCleanup(Cleanup cleanup) { - Builder builder = new Builder(); + if (saveStatus() == null) + return this; + + switch (cleanup) + { + case EXPUNGE: + case ERASE: + return null; + + case EXPUNGE_PARTIAL: + return expungePartial(cleanup, saveStatus, true); + + case VESTIGIAL: + case INVALIDATE: + return saveStatusOnly(); + + case TRUNCATE_WITH_OUTCOME: + case TRUNCATE: + return expungePartial(cleanup, cleanup.appliesIfNot, cleanup == TRUNCATE_WITH_OUTCOME); + + case NO: + return this; + default: + throw new IllegalStateException("Unknown cleanup: " + cleanup);} + } + + public Builder expungePartial(Cleanup cleanup, SaveStatus saveStatus, boolean includeOutcome) + { + Invariants.checkState(txnId != null); + Builder builder = new Builder(txnId); builder.count++; builder.nextCalled = true; - // TODO: these accesses can be abstracted away - if (txnId != null) - { - builder.flags = setFieldChanged(Fields.TXN_ID, builder.flags); - builder.txnId = txnId; - } + Invariants.checkState(saveStatus != null); + builder.flags = setFieldChanged(Fields.SAVE_STATUS, builder.flags); + builder.saveStatus = saveStatus; + builder.flags = setFieldChanged(Fields.CLEANUP, builder.flags); + builder.cleanup = cleanup; if (executeAt != null) { builder.flags = setFieldChanged(Fields.EXECUTE_AT, builder.flags); @@ -436,23 +507,24 @@ public Builder expungePartial() builder.flags = setFieldChanged(Fields.PARTICIPANTS, builder.flags); builder.participants = participants; } + if (includeOutcome && builder.writes != null) + { + builder.flags = setFieldChanged(Fields.WRITES, builder.flags); + builder.writes = writes; + } return builder; } public Builder saveStatusOnly() { - Builder builder = new Builder(); + Invariants.checkState(txnId != null); + Builder builder = new Builder(txnId); builder.count++; builder.nextCalled = true; // TODO: these accesses can be abstracted away - if (txnId != null) - { - builder.flags = setFieldChanged(Fields.TXN_ID, builder.flags); - builder.txnId = txnId; - } if (saveStatus != null) { builder.flags = setFieldChanged(Fields.SAVE_STATUS, builder.flags); @@ -476,8 +548,6 @@ public void serialize(DataOutputPlus out, int userVersion) throws IOException out.writeInt(flags); // We encode all changed fields unless their value is null - if (getFieldChanged(Fields.TXN_ID, flags) && !getFieldIsNull(Fields.TXN_ID, flags)) - CommandSerializers.txnId.serialize(txnId(), out, userVersion); if (getFieldChanged(Fields.EXECUTE_AT, flags) && !getFieldIsNull(Fields.EXECUTE_AT, flags)) CommandSerializers.timestamp.serialize(executeAt(), out, userVersion); // TODO (desired): check if this can fold into executeAt @@ -508,6 +578,9 @@ public void serialize(DataOutputPlus out, int userVersion) throws IOException if (getFieldChanged(Fields.WRITES, flags) && !getFieldIsNull(Fields.WRITES, flags)) CommandSerializers.writes.serialize(writes(), out, userVersion); + + if (getFieldChanged(Fields.CLEANUP, flags)) + out.writeByte(cleanup.ordinal()); } @@ -515,11 +588,12 @@ public void serialize(DataOutputPlus out, int userVersion) throws IOException @SuppressWarnings({ "rawtypes", "unchecked" }) public void deserializeNext(DataInputPlus in, int userVersion) throws IOException { + Invariants.checkState(txnId != null); final int flags = in.readInt(); nextCalled = true; count++; - for (Fields field : Fields.values()) + for (Fields field : Fields.FIELDS) { if (getFieldChanged(field, flags)) { @@ -531,14 +605,6 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio } } - if (getFieldChanged(Fields.TXN_ID, flags)) - { - if (getFieldIsNull(Fields.TXN_ID, flags)) - txnId = null; - else - txnId = CommandSerializers.txnId.deserialize(in, userVersion); - } - if (getFieldChanged(Fields.EXECUTE_AT, flags)) { if (getFieldIsNull(Fields.EXECUTE_AT, flags)) @@ -625,6 +691,7 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio waitingOn = (localTxnId, deps) -> { try { + Invariants.nonNull(deps); return WaitingOnSerializer.deserialize(localTxnId, deps.keyDeps.keys(), deps.rangeDeps, deps.directKeyDeps, buffer); } catch (IOException e) @@ -642,7 +709,13 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio else writes = CommandSerializers.writes.deserialize(in, userVersion); } - + + if (getFieldChanged(Fields.CLEANUP, flags)) + { + Cleanup newCleanup = Cleanup.forOrdinal(in.readByte()); + if (cleanup == null || newCleanup.compareTo(cleanup) > 0) + cleanup = newCleanup; + } } public void forceResult(Result newValue) @@ -655,6 +728,7 @@ public Command construct() if (!nextCalled) return null; + Invariants.checkState(txnId != null); CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); if (partialTxn != null) attrs.partialTxn(partialTxn); diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java index cbcfe22f1894..2c2867aa654f 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncOperation.java @@ -269,25 +269,23 @@ protected boolean runInternal(boolean loadOnly) } } - commandStore.completeOperation(safeStore); - - context.releaseResources(commandStore); - state(COMPLETING); + boolean flushed = false; if (diffs != null || safeStore.fieldUpdates() != null) { Runnable onFlush = () -> finish(result, null); if (safeStore.fieldUpdates() != null) - { - if (diffs != null) - appendCommands(diffs, null); - commandStore.persistFieldUpdates(safeStore.fieldUpdates(), onFlush); - } - else - { + commandStore.persistFieldUpdates(safeStore.fieldUpdates(), diffs == null ? onFlush : null); + if (diffs != null) appendCommands(diffs, onFlush); - } - return false; + flushed = true; } + + commandStore.completeOperation(safeStore); + context.releaseResources(commandStore); + state(COMPLETING); + if (flushed) + return false; + case COMPLETING: finish(result, null); case FINISHED: diff --git a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java index 41efdb245b15..9a43c3eee992 100644 --- a/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java +++ b/src/java/org/apache/cassandra/tcm/AtomicLongBackedProcessor.java @@ -18,8 +18,10 @@ package org.apache.cassandra.tcm; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -30,8 +32,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.tcm.log.Entry; import org.apache.cassandra.tcm.log.LocalLog; +import org.apache.cassandra.tcm.log.LogReader; import org.apache.cassandra.tcm.log.LogState; import org.apache.cassandra.tcm.log.LogStorage; @@ -76,9 +80,31 @@ public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retry) return log.waitForHighestConsecutive(); } + @Override public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) { - return log.getLocalEntries(lowEpoch); + try + { + LogReader.EntryHolder state = log.storage().getEntries(Epoch.EMPTY, highEpoch); + ClusterMetadata metadata = new ClusterMetadata(DatabaseDescriptor.getPartitioner()); + + Iterator iter = state.iterator(); + ImmutableList.Builder rest = new ImmutableList.Builder<>(); + while (iter.hasNext()) + { + Entry current = iter.next(); + if (current.epoch.isEqualOrBefore(lowEpoch)) + metadata = current.transform.execute(metadata).success().metadata; + else + rest.add(current); + } + + return new LogState(metadata, rest.build()); + } + catch (IOException t) + { + throw new RuntimeException(t); + } } public static class InMemoryStorage implements LogStorage @@ -133,12 +159,37 @@ public synchronized MetadataSnapshots snapshots() @Override public synchronized EntryHolder getEntries(Epoch since) { - throw new IllegalStateException("We have overridden all callers of this method, it should never be called"); + EntryHolder entryHolder = new EntryHolder(since); + entries.stream().filter(e -> e.epoch.isAfter(since)).forEach(entryHolder::add); + return entryHolder; + } + + @Override + public synchronized EntryHolder getEntries(Epoch since, Epoch until) + { + EntryHolder entryHolder = new EntryHolder(since); + entries.stream().filter(e -> e.epoch.isAfter(since) && e.epoch.isEqualOrBefore(until)).forEach(entryHolder::add); + return entryHolder; } - public EntryHolder getEntries(Epoch since, Epoch until) + public LogState getLogState(Epoch start, Epoch end) { - throw new IllegalStateException("We have overridden all callers of this method, it should never be called"); + EntryHolder state = getEntries(Epoch.EMPTY); + ClusterMetadata metadata = new ClusterMetadata(DatabaseDescriptor.getPartitioner());; + Iterator iter = state.iterator(); + ImmutableList.Builder rest = new ImmutableList.Builder<>(); + while (iter.hasNext()) + { + Entry current = iter.next(); + if (current.epoch.isAfter(end)) + break; + if (current.epoch.isEqualOrBefore(start)) + metadata = current.transform.execute(metadata).success().metadata; + else + rest.add(current); + } + + return new LogState(metadata, rest.build()); } } diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java index b9063adf5fc8..8195c7955109 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadataService.java @@ -166,7 +166,7 @@ public static State state(ClusterMetadata metadata) Processor localProcessor; if (CassandraRelevantProperties.TCM_USE_ATOMIC_LONG_PROCESSOR.getBoolean()) { - log = logSpec.sync().createLog(); + log = logSpec.sync().withStorage(new AtomicLongBackedProcessor.InMemoryStorage()).createLog(); localProcessor = wrapProcessor.apply(new AtomicLongBackedProcessor(log, logSpec.isReset())); fetchLogHandler = new FetchCMSLog.Handler((e, ignored) -> logSpec.storage().getLogState(e)); } diff --git a/src/java/org/apache/cassandra/tcm/Startup.java b/src/java/org/apache/cassandra/tcm/Startup.java index 537a505eb66e..07d6dd60a352 100644 --- a/src/java/org/apache/cassandra/tcm/Startup.java +++ b/src/java/org/apache/cassandra/tcm/Startup.java @@ -566,15 +566,17 @@ static StartupMode get(Set seeds) } if (seeds.isEmpty()) throw new IllegalArgumentException("Can not initialize CMS without any seeds"); - boolean hasAnyEpoch = SystemKeyspaceStorage.hasAnyEpoch(); + // For CCM and local dev clusters boolean isOnlySeed = DatabaseDescriptor.getSeeds().size() == 1 && DatabaseDescriptor.getSeeds().contains(FBUtilities.getBroadcastAddressAndPort()) && DatabaseDescriptor.getSeeds().iterator().next().getAddress().isLoopbackAddress(); boolean hasBootedBefore = SystemKeyspace.getLocalHostId() != null; logger.info("hasAnyEpoch = {}, hasBootedBefore = {}", hasAnyEpoch, hasBootedBefore); - if (!hasAnyEpoch && hasBootedBefore) + if (!hasAnyEpoch && hasBootedBefore && + // Atomic long processor currently does not support upgrades + !CassandraRelevantProperties.TCM_USE_ATOMIC_LONG_PROCESSOR.getBoolean()) return UPGRADE; else if (hasAnyEpoch) return NORMAL; diff --git a/src/java/org/apache/cassandra/tcm/log/LocalLog.java b/src/java/org/apache/cassandra/tcm/log/LocalLog.java index a84f0920e0e3..32f775096bfd 100644 --- a/src/java/org/apache/cassandra/tcm/log/LocalLog.java +++ b/src/java/org/apache/cassandra/tcm/log/LocalLog.java @@ -300,6 +300,11 @@ public void bootstrap(InetAddressAndPort addr, String datacenter) assert metadata.epoch.is(Epoch.FIRST) : String.format("Epoch: %s. CMS: %s", metadata.epoch, metadata.fullCMSMembers()); } + public LogStorage storage() + { + return storage; + } + public ClusterMetadata metadata() { return committed.get(); diff --git a/src/java/org/apache/cassandra/tcm/log/LogReader.java b/src/java/org/apache/cassandra/tcm/log/LogReader.java index b8d62e75876a..b1e7ab326419 100644 --- a/src/java/org/apache/cassandra/tcm/log/LogReader.java +++ b/src/java/org/apache/cassandra/tcm/log/LogReader.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; @@ -195,6 +196,11 @@ private boolean isContinuous() return true; } + public Iterator iterator() + { + return entries.iterator(); + } + private ImmutableList immutable() { return ImmutableList.copyOf(entries); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java index 483f7e4f37a4..fa93afa2aedd 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordBootstrapTest.java @@ -47,9 +47,9 @@ import org.apache.cassandra.schema.Schema; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.StorageService; -import org.apache.cassandra.service.accord.AccordCommandStore; import org.apache.cassandra.service.accord.AccordConfigurationService; import org.apache.cassandra.service.accord.AccordConfigurationService.EpochSnapshot; +import org.apache.cassandra.service.accord.AccordSafeCommandStore; import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.service.accord.api.PartitionKey; import org.apache.cassandra.streaming.StreamManager; @@ -271,9 +271,9 @@ public void bootstrapTest() throws Throwable }); awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { - AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); - Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.bootstrapBeganAt().keySet())); - Assert.assertEquals(Timestamp.NONE, getOnlyElement(commandStore.safeToRead().keySet())); + AccordSafeCommandStore ss = (AccordSafeCommandStore) safeStore; + Assert.assertEquals(Timestamp.NONE, getOnlyElement(ss.bootstrapBeganAt().keySet())); + Assert.assertEquals(Timestamp.NONE, getOnlyElement(ss.safeToReadAt().keySet())); // // Assert.assertTrue(commandStore.maxBootstrapEpoch() > 0); // Assert.assertTrue(commandStore.bootstrapBeganAt().isEmpty()); @@ -316,17 +316,17 @@ public void bootstrapTest() throws Throwable awaitUninterruptiblyAndRethrow(service().node().commandStores().forEach(safeStore -> { if (safeStore.ranges().currentRanges().contains(partitionKey)) { - AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); - Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); - Assert.assertFalse(commandStore.safeToRead().isEmpty()); + AccordSafeCommandStore ss = (AccordSafeCommandStore) safeStore; + Assert.assertFalse(ss.bootstrapBeganAt().isEmpty()); + Assert.assertFalse(ss.safeToReadAt().isEmpty()); - Assert.assertEquals(1, commandStore.bootstrapBeganAt().entrySet().stream() + Assert.assertEquals(1, ss.bootstrapBeganAt().entrySet().stream() .filter(entry -> entry.getValue().contains(partitionKey)) .map(entry -> { Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); return entry; }).count()); - Assert.assertEquals(1, commandStore.safeToRead().entrySet().stream() + Assert.assertEquals(1, ss.safeToReadAt().entrySet().stream() .filter(entry -> entry.getValue().contains(partitionKey)) .map(entry -> { Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); @@ -458,17 +458,17 @@ public void moveTest() throws Throwable safeStore -> { if (!safeStore.ranges().allAt(preMove).contains(partitionKey)) { - AccordCommandStore commandStore = (AccordCommandStore) safeStore.commandStore(); - Assert.assertFalse(commandStore.bootstrapBeganAt().isEmpty()); - Assert.assertFalse(commandStore.safeToRead().isEmpty()); + AccordSafeCommandStore ss = (AccordSafeCommandStore) safeStore; + Assert.assertFalse(ss.bootstrapBeganAt().isEmpty()); + Assert.assertFalse(ss.safeToReadAt().isEmpty()); - Assert.assertEquals(1, commandStore.bootstrapBeganAt().entrySet().stream() + Assert.assertEquals(1, ss.bootstrapBeganAt().entrySet().stream() .filter(entry -> entry.getValue().contains(partitionKey)) .map(entry -> { Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); return entry; }).count()); - Assert.assertEquals(1, commandStore.safeToRead().entrySet().stream() + Assert.assertEquals(1, ss.safeToReadAt().entrySet().stream() .filter(entry -> entry.getValue().contains(partitionKey)) .map(entry -> { Assert.assertTrue(entry.getKey().compareTo(Timestamp.NONE) > 0); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java index 19a675b8d244..19c774a5f48c 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordJournalIntegrationTest.java @@ -105,7 +105,7 @@ public void memtableStateReloadingTest() throws Throwable Object[][] before = cluster.coordinator(1).execute("SELECT * FROM " + TABLE + " WHERE k = ?;", ConsistencyLevel.SERIAL, 1); cluster.get(1).runOnInstance(() -> { - ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTesting(); + ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTestingIfNonEmpty(); }); ClusterUtils.stopUnchecked(cluster.get(1)); cluster.get(1).startup(); diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java index 8478a060d760..d9315cf2c7c6 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordLoadTest.java @@ -28,6 +28,7 @@ import java.util.Random; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import com.google.common.util.concurrent.RateLimiter; @@ -44,11 +45,13 @@ import org.apache.cassandra.distributed.api.IMessageFilters; import org.apache.cassandra.distributed.shared.DistributedTestBase; import org.apache.cassandra.net.Verb; +import org.apache.cassandra.service.accord.AccordService; import org.apache.cassandra.utils.EstimatedHistogram; import static java.lang.System.currentTimeMillis; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; public class AccordLoadTest extends AccordTestBase { @@ -78,27 +81,40 @@ public boolean matches(int i, int i1, IMessage iMessage) return false; } }).drop(); + + cluster.forEach(i -> i.runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().compactor().updateCompactionPeriod(1, SECONDS); +// ((AccordSpec.JournalSpec)((AccordService) AccordService.instance()).journal().configuration()).segmentSize = 128 << 10; + })); + ICoordinator coordinator = cluster.coordinator(1); final int repairInterval = 3000; - final int batchSize = 1000; + final int compactionInterval = 3000; + final int flushInterval = 1000; + final int batchSizeLimit = 1000; + final long batchTime = TimeUnit.SECONDS.toNanos(10); final int concurrency = 100; final int ratePerSecond = 1000; - final int keyCount = 100000; + final int keyCount = 1000000; final float readChance = 0.33f; long nextRepairAt = repairInterval; + long nextCompactionAt = compactionInterval; + long nextFlushAt = flushInterval; final BitSet initialised = new BitSet(); Random random = new Random(); // CopyOnWriteArrayList exceptions = new CopyOnWriteArrayList<>(); final Semaphore inFlight = new Semaphore(concurrency); final RateLimiter rateLimiter = RateLimiter.create(ratePerSecond); - long testStart = System.nanoTime(); +// long testStart = System.nanoTime(); // while (NANOSECONDS.toMinutes(System.nanoTime() - testStart) < 10 && exceptions.size() < 10000) while (true) { final EstimatedHistogram histogram = new EstimatedHistogram(200); long batchStart = System.nanoTime(); - for (int i = 0 ; i < batchSize ; ++i) + long batchEnd = batchStart + batchTime; + int batchSize = 0; + while (batchSize < batchSizeLimit) { inFlight.acquire(); rateLimiter.acquire(); @@ -112,7 +128,7 @@ public boolean matches(int i, int i1, IMessage iMessage) // else exceptions.add(fail); }, "SELECT * FROM " + qualifiedAccordTableName + " WHERE k = ?;", ConsistencyLevel.SERIAL, k); } - else if (initialised.get(i)) + else if (initialised.get(k)) { coordinator.executeWithResult((success, fail) -> { inFlight.release(); @@ -122,13 +138,16 @@ else if (initialised.get(i)) } else { - initialised.set(i); + initialised.set(k); coordinator.executeWithResult((success, fail) -> { inFlight.release(); if (fail == null) histogram.add(NANOSECONDS.toMicros(System.nanoTime() - commandStart)); // else exceptions.add(fail); }, "UPDATE " + qualifiedAccordTableName + " SET v = 0 WHERE k = ? IF NOT EXISTS;", ConsistencyLevel.SERIAL, ConsistencyLevel.QUORUM, k); } + batchSize++; + if (System.nanoTime() >= batchEnd) + break; } if ((nextRepairAt -= batchSize) <= 0) @@ -138,8 +157,31 @@ else if (initialised.get(i)) cluster.coordinator(1).instance().nodetool("repair", qualifiedAccordTableName); } + if ((nextCompactionAt -= batchSize) <= 0) + { + nextCompactionAt += compactionInterval; + System.out.println("compacting accord..."); + cluster.forEach(i -> { + i.nodetool("compact", "system_accord.journal"); + i.runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().checkAllCommands(); + }); + }); + + } + + if ((nextFlushAt -= batchSize) <= 0) + { + nextFlushAt += flushInterval; + System.out.println("flushing journal..."); + cluster.forEach(i -> i.runOnInstance(() -> { + ((AccordService) AccordService.instance()).journal().closeCurrentSegmentForTestingIfNonEmpty(); + ((AccordService) AccordService.instance()).journal().checkAllCommands(); + })); + } + final Date date = new Date(); - System.out.printf("%tT rate: %.2f/s\n", date, (((float)batchSize * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart))); + System.out.printf("%tT rate: %.2f/s (%d total)\n", date, (((float)batchSizeLimit * 1000) / NANOSECONDS.toMillis(System.nanoTime() - batchStart)), batchSize); System.out.printf("%tT percentiles: %d %d %d %d\n", date, histogram.percentile(.25)/1000, histogram.percentile(.5)/1000, histogram.percentile(.75)/1000, histogram.percentile(1)/1000); class VerbCount diff --git a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java index 1be3fd686dc2..0503684e1145 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java +++ b/test/distributed/org/apache/cassandra/distributed/test/accord/AccordMigrationRaceTestBase.java @@ -542,15 +542,14 @@ private void testSplitAndRetry(String batchCQL, Consumer validation, Sc if (unpauseAfterBatchLogCreatesTransaction) { logger.info("Creating thread to unpause after batchlog creates Accord transaction"); - new Thread(() -> - { + new Thread(() -> { try { // Unpause so it can route incorrectly instead of timing out waiting to fetch the epoch, need the transaction to be created first // otherwise it will just be routed straight to non-Accord. logger.info("Spinning waiting on a transaction"); Util.spinUntilTrue(() -> !((AccordService)AccordService.instance()).node().coordinating().isEmpty(), 20); - logger.info("Foudn transaction, unpausing"); + logger.info("Found transaction, unpausing"); TestChangeListener.instance.unpause(); unpaused.trySuccess(null); } diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index e702d3bac38b..6229e8148f6d 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -141,25 +141,25 @@ public boolean enableCompaction() { timestamp = timestamp.next(); AccordSafeCommandStore.FieldUpdates updates = new AccordSafeCommandStore.FieldUpdates(); - updates.durableBefore = durableBeforeGen.next(rs); + updates.addDurableBefore = durableBeforeGen.next(rs); // TODO: improve redundant before generator and re-enable // updates.redundantBefore = redundantBeforeGen.next(rs); - updates.safeToRead = safeToReadGen.next(rs); - updates.rangesForEpoch = rangesForEpochGen.next(rs); - updates.historicalTransactions = historicalTransactionsGen.next(rs); + updates.newSafeToRead = safeToReadGen.next(rs); + updates.newRangesForEpoch = rangesForEpochGen.next(rs); + updates.addHistoricalTransactions = historicalTransactionsGen.next(rs); journal.persistStoreState(1, updates, null); - redundantBeforeAccumulator.update(updates.redundantBefore); - durableBeforeAccumulator.update(updates.durableBefore); - if (updates.bootstrapBeganAt != null) - bootstrapBeganAtAccumulator.update(updates.bootstrapBeganAt); - safeToReadAccumulator.update(updates.safeToRead); - rangesForEpochAccumulator.update(updates.rangesForEpoch); - historicalTransactionsAccumulator.update(updates.historicalTransactions); + redundantBeforeAccumulator.update(updates.addRedundantBefore); + durableBeforeAccumulator.update(updates.addDurableBefore); + if (updates.newBootstrapBeganAt != null) + bootstrapBeganAtAccumulator.update(updates.newBootstrapBeganAt); + safeToReadAccumulator.update(updates.newSafeToRead); + rangesForEpochAccumulator.update(updates.newRangesForEpoch); + historicalTransactionsAccumulator.update(updates.addHistoricalTransactions); if (i % 100 == 0) - journal.closeCurrentSegmentForTesting(); + journal.closeCurrentSegmentForTestingIfNonEmpty(); if (i % 200 == 0) journal.runCompactorForTesting(); } diff --git a/test/unit/org/apache/cassandra/ServerTestUtils.java b/test/unit/org/apache/cassandra/ServerTestUtils.java index 884272a2a714..08b91075fa3d 100644 --- a/test/unit/org/apache/cassandra/ServerTestUtils.java +++ b/test/unit/org/apache/cassandra/ServerTestUtils.java @@ -28,7 +28,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; -import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -284,7 +283,6 @@ public static void initCMS() // log entries is always done by the dedicated log follower thread. DatabaseDescriptor.setMetadataSnapshotFrequency(Integer.MAX_VALUE); - Function processorFactory = AtomicLongBackedProcessor::new; IPartitioner partitioner = DatabaseDescriptor.getPartitioner(); Location location = DatabaseDescriptor.getLocator().local(); boolean addListeners = true; @@ -292,15 +290,17 @@ public static void initCMS() if (!Keyspace.isInitialized()) Keyspace.setInitialized(); + AtomicLongBackedProcessor.InMemoryStorage storage = new AtomicLongBackedProcessor.InMemoryStorage(); LocalLog log = LocalLog.logSpec() .withInitialState(initial) .withDefaultListeners(addListeners) + .withStorage(storage) .createLog(); ResettableClusterMetadataService service = new ResettableClusterMetadataService(new UniformRangePlacement(), MetadataSnapshots.NO_OP, log, - processorFactory.apply(log), + new AtomicLongBackedProcessor(log), Commit.Replicator.NO_OP, true); diff --git a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java index 372619d1457d..0bdc561818c3 100644 --- a/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java +++ b/test/unit/org/apache/cassandra/config/YamlConfigurationLoaderTest.java @@ -27,6 +27,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Predicate; @@ -502,6 +503,22 @@ public void testBackwardCompatibilityOfAuthenticatorPropertyAsString() throws IO assertTrue(config.authenticator.parameters.isEmpty()); } + @Test + public void testAccordConfig() + { + Map accordSpec = ImmutableMap.of("fast_path_update_delay", "60s", + "schedule_durability_frequency", "60s", + "durability_txnid_lag", "60s", + "shard_durability_cycle", "60s", + "global_durability_cycle", "60s"); + AccordSpec spec = from("accord", accordSpec).accord; + assertThat(spec.fast_path_update_delay.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.schedule_durability_frequency.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.durability_txnid_lag.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.shard_durability_cycle.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + assertThat(spec.global_durability_cycle.to(TimeUnit.NANOSECONDS)).isEqualTo(60000000000L); + } + private static Config fromYaml(Object... values) { assert values.length % 2 == 0 : "Map can only be created with an even number of inputs: given " + values.length; diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java index 181d2c7ca6ff..a329be58640c 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionAccordIteratorsTest.java @@ -420,9 +420,12 @@ private static IAccordService mockAccordService(CommandStore commandStore, Redun Int2ObjectHashMap redundantBefores = new Int2ObjectHashMap<>(); if (redundantBefore != null) redundantBefores.put(commandStore.id(), redundantBefore); + Int2ObjectHashMap durableBefores = new Int2ObjectHashMap<>(); + if (durableBefore != null) + durableBefores.put(commandStore.id(), durableBefore); Int2ObjectHashMap rangesForEpochs = new Int2ObjectHashMap<>(); rangesForEpochs.put(commandStore.id(), commandStore.unsafeRangesForEpoch()); - when(mockAccordService.getCompactionInfo()).thenReturn(new IAccordService.CompactionInfo(redundantBefores, rangesForEpochs, durableBefore)); + when(mockAccordService.getCompactionInfo()).thenReturn(new IAccordService.CompactionInfo(redundantBefores, rangesForEpochs, durableBefores)); return mockAccordService; } @@ -436,9 +439,9 @@ private static void flush(AccordCommandStore commandStore) { commandStore.executeBlocking(() -> { // clear cache and wait for post-eviction writes to complete - long cacheSize = commandStore.capacity(); - commandStore.setCapacity(0); - commandStore.setCapacity(cacheSize); + long cacheSize = commandStore.cache().capacity(); + commandStore.cache().setCapacity(0); + commandStore.cache().setCapacity(cacheSize); commandStore.cache().awaitSaveResults(); }); commands.forceBlockingFlush(FlushReason.UNIT_TESTS); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java index b6fca2e9ccd7..e41e4ca579aa 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandStoreTest.java @@ -159,6 +159,7 @@ public void timestampsForKeyLoadSave() { AtomicLong clock = new AtomicLong(0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); +// SafeCommandStore safeStore = Timestamp maxTimestamp = timestamp(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(1); @@ -172,10 +173,10 @@ public void timestampsForKeyLoadSave() AccordSafeTimestampsForKey tfk = new AccordSafeTimestampsForKey(loaded(key, null)); tfk.initialize(); - TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId1, txnId1, true); + TimestampsForKeys.updateLastExecutionTimestamps(null, tfk, txnId1, txnId1, true); Assert.assertEquals(txnId1.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId1, true)); - TimestampsForKeys.updateLastExecutionTimestamps(commandStore, tfk, txnId2, txnId2, true); + TimestampsForKeys.updateLastExecutionTimestamps(null, tfk, txnId2, txnId2, true); Assert.assertEquals(txnId2.hlc(), AccordSafeTimestampsForKey.timestampMicrosFor(tfk.current(), txnId2, true)); Assert.assertEquals(txnId2, tfk.current().lastExecutedTimestamp()); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java index 28f54ce688fa..4a5351286203 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordCommandTest.java @@ -93,7 +93,7 @@ private static PartitionKey key(int k) public void basicCycleTest() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCapacity(0))); + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.cache().setCapacity(0))); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); Txn txn = createWriteTxn(1); @@ -185,7 +185,7 @@ public void basicCycleTest() throws Throwable public void computeDeps() throws Throwable { AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.setCapacity(0))); + getUninterruptibly(commandStore.execute(PreLoadContext.empty(), unused -> commandStore.cache().setCapacity(0))); TxnId txnId1 = txnId(1, clock.incrementAndGet(), 1); Txn txn = createWriteTxn(2); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java index 9daa1bb9fa45..7210d4a09716 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordConfigurationServiceTest.java @@ -20,12 +20,27 @@ import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import java.util.Set; +import java.util.Optional; import java.util.UUID; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.schema.DistributedSchema; +import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.Tables; +import org.apache.cassandra.tcm.ValidatingClusterMetadataService; +import org.apache.cassandra.tcm.membership.Location; +import org.apache.cassandra.tcm.membership.NodeAddresses; +import org.apache.cassandra.tcm.membership.NodeId; +import org.apache.cassandra.tcm.membership.NodeVersion; +import org.apache.cassandra.tcm.ownership.DataPlacement; +import org.apache.cassandra.tcm.serialization.Version; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -33,9 +48,7 @@ import accord.api.ConfigurationService.EpochReady; import accord.impl.AbstractConfigurationServiceTest; -import accord.local.Node; import accord.local.Node.Id; -import accord.topology.Shard; import accord.topology.Topology; import accord.utils.SortedArrays.SortedArrayList; import org.apache.cassandra.SchemaLoader; @@ -57,7 +70,6 @@ import org.apache.cassandra.utils.concurrent.Future; import static accord.impl.AbstractConfigurationServiceTest.TestListener; -import static com.google.common.collect.ImmutableSet.of; import static java.lang.String.format; import static org.apache.cassandra.cql3.QueryProcessor.executeInternal; import static org.apache.cassandra.cql3.statements.schema.CreateTableStatement.parse; @@ -72,9 +84,8 @@ public class AccordConfigurationServiceTest private static final Id ID2 = new Id(2); private static final Id ID3 = new Id(3); private static final SortedArrayList ID_LIST = new SortedArrayList<>(new Id[] { ID1, ID2, ID3 }); - private static final Set ID_SET = ImmutableSet.copyOf(ID_LIST); - private static final TableId TBL1 = TableId.fromUUID(new UUID(0, 1)); - private static final TableId TBL2 = TableId.fromUUID(new UUID(0, 2)); + private static final String KEYSPACE_NAME = "test_ks"; + private static final TableId TBL_ID = TableId.fromUUID(new UUID(0, 1)); private static EndpointMapping mappingForEpoch(long epoch) { @@ -92,21 +103,6 @@ private static EndpointMapping mappingForEpoch(long epoch) } } - private static EndpointMapping mappingForTopology(Topology topology) - { - try - { - EndpointMapping.Builder builder = EndpointMapping.builder(topology.epoch()); - for (Node.Id id : topology.nodes()) - builder.add(InetAddressAndPort.getByName("127.0.0." + id.id), id); - return builder.build(); - } - catch (UnknownHostException e) - { - throw new RuntimeException(e); - } - } - private static class Messaging implements MessageDelivery { static class Request @@ -177,15 +173,17 @@ public void setup() @Test public void initialEpochTest() throws Throwable { + ValidatingClusterMetadataService cms = ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); service.start(); Assert.assertEquals(null, AccordKeyspace.loadEpochDiskState()); Assert.assertTrue(executeInternal(format("SELECT * FROM %s.%s WHERE epoch=1", ACCORD_KEYSPACE_NAME, TOPOLOGIES)).isEmpty()); - Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); + Topology topology1 = createTopology(cms); service.reportTopology(topology1); - loadEpoch(1, null, (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { + loadEpoch(1, cms.metadata(), (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertTrue(remoteSync.isEmpty()); }); @@ -193,37 +191,39 @@ public void initialEpochTest() throws Throwable service.receiveRemoteSyncComplete(ID1, 1); service.receiveRemoteSyncComplete(ID2, 1); - loadEpoch(1, null, (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { + loadEpoch(1, cms.metadata(), (epoch, cm, topology, syncStatus, pendingSync, remoteSync, closed, redundant) -> { Assert.assertEquals(topology1, topology); Assert.assertEquals(Sets.newHashSet(ID1, ID2), remoteSync); }); } @Test - public void loadTest() throws Throwable + public void loadTest() { + ValidatingClusterMetadataService cms = ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); service.start(); - Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); - service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + Topology topology1 = createTopology(cms); + service.updateMapping(mappingForEpoch(cms.metadata().epoch.getEpoch() + 1)); service.reportTopology(topology1); service.acknowledgeEpoch(EpochReady.done(1), true); service.receiveRemoteSyncComplete(ID1, 1); service.receiveRemoteSyncComplete(ID2, 1); service.receiveRemoteSyncComplete(ID3, 1); - Topology topology2 = new Topology(2, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); + Topology topology2 = createTopology(cms); service.reportTopology(topology2); service.acknowledgeEpoch(EpochReady.done(2), true); service.receiveRemoteSyncComplete(ID1, 2); - Topology topology3 = new Topology(3, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); + Topology topology3 = createTopology(cms); service.reportTopology(topology3); service.acknowledgeEpoch(EpochReady.done(3), true); AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); - loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + loaded.updateMapping(mappingForEpoch(cms.metadata().epoch.getEpoch() + 1)); AbstractConfigurationServiceTest.TestListener listener = new AbstractConfigurationServiceTest.TestListener(loaded, true); loaded.registerListener(listener); loaded.start(); @@ -241,29 +241,91 @@ public void loadTest() throws Throwable @Test public void truncateTest() { + ValidatingClusterMetadataService cms = ValidatingClusterMetadataService.createAndRegister(Version.MIN_ACCORD_VERSION); + AccordConfigurationService service = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); TestListener serviceListener = new TestListener(service, true); service.registerListener(serviceListener); service.start(); - Topology topology1 = new Topology(1, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, ID_SET)); - service.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + Topology topology1 = createTopology(cms); + service.updateMapping(mappingForEpoch(cms.metadata().epoch.getEpoch() + 1)); service.reportTopology(topology1); - Topology topology2 = new Topology(2, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); + Topology topology2 = createTopology(cms); service.reportTopology(topology2); - Topology topology3 = new Topology(3, new Shard(AccordTopology.fullRange(TBL1), ID_LIST, of(ID1, ID2))); + Topology topology3 = createTopology(cms); service.reportTopology(topology3); service.truncateTopologiesUntil(3); Assert.assertEquals(EpochDiskState.create(3), service.diskState()); serviceListener.assertTruncates(3L); AccordConfigurationService loaded = new AccordConfigurationService(ID1, new Messaging(), new MockFailureDetector(), AccordConfigurationService.SystemTableDiskStateManager.instance, ScheduledExecutors.scheduledTasks); - loaded.updateMapping(mappingForEpoch(ClusterMetadata.current().epoch.getEpoch() + 1)); + loaded.updateMapping(mappingForEpoch(cms.metadata().epoch.getEpoch() + 1)); TestListener loadListener = new TestListener(loaded, true); loaded.registerListener(loadListener); loaded.start(); loadListener.assertTopologiesFor(3L); } + + private static Topology createTopology(ValidatingClusterMetadataService cms) + { + ClusterMetadata previous = cms.metadata(); + ClusterMetadata.Transformer next = previous.transformer(); + maybeCreateTable(previous, next); + + ClusterMetadata metadata = next.build().metadata; + cms.setMetadata(metadata); + return AccordTopology.createAccordTopology(metadata); + } + + private static void maybeCreateTable(ClusterMetadata previous, ClusterMetadata.Transformer next) + { + Optional ks = previous.schema.getKeyspaces().get(KEYSPACE_NAME); + if (ks.isPresent()) return; + // lets create it + TableMetadata table = TableMetadata.builder(KEYSPACE_NAME, "tbl") + .id(TBL_ID) + .kind(TableMetadata.Kind.REGULAR) + .partitioner(Murmur3Partitioner.instance) + .addPartitionKeyColumn("pk", Int32Type.instance) + .build(); + KeyspaceMetadata keyspace = KeyspaceMetadata.create(KEYSPACE_NAME, KeyspaceParams.simple(ID_LIST.size())) + .withSwapped(Tables.builder().add(table).build()); + + next.with(new DistributedSchema(previous.schema.getKeyspaces().with(keyspace))); + + for (Id node : ID_LIST) + { + // not forcing the cms node id to match as they do when this logic was first added... + next.register(new NodeAddresses(getAddress(node)), + new Location("dc1", "rack1"), + NodeVersion.CURRENT); + + next.proposeToken(new NodeId(node.id), Collections.singleton(new Murmur3Partitioner.LongToken(node.id))); + } + + DataPlacement.Builder replication = DataPlacement.builder(); + Range fullRange = new Range<>(Murmur3Partitioner.MINIMUM, Murmur3Partitioner.MINIMUM); + for (int i = 0; i < ID_LIST.size(); i++) + { + InetAddressAndPort address = getAddress(ID_LIST.get(i)); + Replica replica = new Replica(address, fullRange, true); + replication.withReadReplica(next.epoch(), replica).withWriteReplica(next.epoch(), replica); + } + next.with(previous.placements.unbuild().with(keyspace.params.replication, replication.build()).build()); + } + + private static InetAddressAndPort getAddress(Id node) + { + try + { + return InetAddressAndPort.getByAddress(new byte[]{127, 0, 0, (byte) node.id}); + } + catch (UnknownHostException e) + { + throw new RuntimeException(e); + } + } } diff --git a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java index 34a270f544e3..33b8419a6d4d 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordJournalOrderTest.java @@ -83,14 +83,14 @@ public void simpleKeyTest() Runnable check = () -> { for (JournalKey key : res.keySet()) { - SavedCommand.Builder diffs = accordJournal.loadDiffs(key.commandStoreId, (TxnId) key.timestamp); + SavedCommand.Builder diffs = accordJournal.loadDiffs(key.commandStoreId, (TxnId) key.id); Assert.assertEquals(String.format("%d != %d for key %s", diffs.count(), res.get(key).intValue(), key), diffs.count(), res.get(key).intValue()); } }; check.run(); - accordJournal.closeCurrentSegmentForTesting(); + accordJournal.closeCurrentSegmentForTestingIfNonEmpty(); check.run(); } } \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 57006fdbf26a..3e74728f70ef 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -53,7 +53,6 @@ import accord.local.SafeCommand; import accord.local.SafeCommandStore; import accord.local.StoreParticipants; -import accord.primitives.SaveStatus; import accord.primitives.Ballot; import accord.primitives.FullKeyRoute; import accord.primitives.FullRoute; @@ -62,8 +61,10 @@ import accord.primitives.PartialTxn; import accord.primitives.Ranges; import accord.primitives.Routable; +import accord.primitives.SaveStatus; import accord.primitives.Seekable; import accord.primitives.Seekables; +import accord.primitives.Status; import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; @@ -105,6 +106,7 @@ import static accord.primitives.Routable.Domain.Key; import static accord.utils.async.AsyncChains.getUninterruptibly; import static java.lang.String.format; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; public class AccordTestUtils { @@ -125,6 +127,7 @@ public static Command preaccepted(TxnId txnId, PartialTxn txn, Timestamp execute CommonAttributes.Mutable attrs = new CommonAttributes.Mutable(txnId); attrs.partialTxn(txn); attrs.setParticipants(StoreParticipants.all(route(txn))); + attrs.durability(Status.Durability.NotDurable); return Command.SerializerSupport.preaccepted(attrs, executeAt, Ballot.ZERO); } @@ -401,6 +404,7 @@ public static AccordCommandStore createAccordCommandStore( AccordJournal journal = new AccordJournal(new AccordSpec.JournalSpec()); journal.start(null); + AccordStateCache stateCache = new AccordStateCache(loadExecutor, saveExecutor, 8 << 20, new AccordStateCacheMetrics("test")); SingleEpochRanges holder = new SingleEpochRanges(topology.rangesForNode(node)); AccordCommandStore result = new AccordCommandStore(0, time, @@ -414,9 +418,7 @@ public static AccordCommandStore createAccordCommandStore( }), holder, journal, - loadExecutor, - saveExecutor, - new AccordStateCacheMetrics(AccordCommandStores.ACCORD_STATE_CACHE + System.currentTimeMillis())); + new AccordCommandStore.CommandStoreExecutor(stateCache, executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + 0 + ']'))); holder.set(result); // TODO: CompactionAccordIteratorsTest relies on this @@ -439,7 +441,7 @@ public static AccordCommandStore createAccordCommandStore( Node.Id node = new Id(1); Topology topology = new Topology(1, new Shard(range, new SortedArrayList<>(new Id[] { node }), Sets.newHashSet(node), Collections.emptySet())); AccordCommandStore store = createAccordCommandStore(node, now, topology, loadExecutor, saveExecutor); - store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).setCapacity(1 << 20)); + store.execute(PreLoadContext.empty(), safeStore -> ((AccordCommandStore)safeStore.commandStore()).cache().setCapacity(1 << 20)); return store; } diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index 9f6827f1b941..dd7377ab584b 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -69,7 +69,7 @@ private static class FieldUpdates private final Map fieldUpdates = new HashMap<>(); @Override - public Command loadCommand(int store, TxnId txnId) + public Command loadCommand(int store, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore) { JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, store); List saved = commands.get(key); @@ -137,18 +137,18 @@ private FieldUpdates fieldUpdates(int store) public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fieldUpdates, Runnable onFlush) { FieldUpdates updates = fieldUpdates(store); - if (fieldUpdates.redundantBefore != null) - updates.redundantBeforeAccumulator.update(fieldUpdates.redundantBefore); - if (fieldUpdates.durableBefore != null) - updates.durableBeforeAccumulator.update(fieldUpdates.durableBefore); - if (fieldUpdates.bootstrapBeganAt != null) - updates.bootstrapBeganAtAccumulator.update(fieldUpdates.bootstrapBeganAt); - if (fieldUpdates.safeToRead != null) - updates.safeToReadAccumulator.update(fieldUpdates.safeToRead); - if (fieldUpdates.rangesForEpoch != null) - updates.rangesForEpochAccumulator.update(fieldUpdates.rangesForEpoch); - if (fieldUpdates.historicalTransactions != null) - updates.historicalTransactionsAccumulator.update(fieldUpdates.historicalTransactions); + if (fieldUpdates.addRedundantBefore != null) + updates.redundantBeforeAccumulator.update(fieldUpdates.addRedundantBefore); + if (fieldUpdates.addDurableBefore != null) + updates.durableBeforeAccumulator.update(fieldUpdates.addDurableBefore); + if (fieldUpdates.newBootstrapBeganAt != null) + updates.bootstrapBeganAtAccumulator.update(fieldUpdates.newBootstrapBeganAt); + if (fieldUpdates.newSafeToRead != null) + updates.safeToReadAccumulator.update(fieldUpdates.newSafeToRead); + if (fieldUpdates.newRangesForEpoch != null) + updates.rangesForEpochAccumulator.update(fieldUpdates.newRangesForEpoch); + if (fieldUpdates.addHistoricalTransactions != null) + updates.historicalTransactionsAccumulator.update(fieldUpdates.addHistoricalTransactions); onFlush.run(); } diff --git a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java index 1d86856922e6..99963c3af013 100644 --- a/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SavedCommandTest.java @@ -74,7 +74,7 @@ public void allNull() public void simpleNullChangeCheck() { int flags = getFlags(null, Command.NotDefined.uninitialised(TxnId.NONE)); - EnumSet has = EnumSet.of(Fields.TXN_ID, Fields.SAVE_STATUS, Fields.PARTICIPANTS, Fields.DURABILITY, Fields.PROMISED, + EnumSet has = EnumSet.of(Fields.SAVE_STATUS, Fields.PARTICIPANTS, Fields.DURABILITY, Fields.PROMISED, Fields.ACCEPTED /* this is Zero... which kinda means null... */); Set missing = Sets.difference(ALL, has); assertHas(flags, has); @@ -96,7 +96,7 @@ public void serde() out.clear(); Command orig = cmdBuilder.build(saveStatus); SavedCommand.serialize(null, orig, out, userVersion); - SavedCommand.Builder builder = new SavedCommand.Builder(); + SavedCommand.Builder builder = new SavedCommand.Builder(orig.txnId()); builder.deserializeNext(new DataInputBuffer(out.unsafeGetBufferAndFlip(), false), userVersion); // We are not persisting the result, so force it for strict equality builder.forceResult(orig.result()); @@ -132,6 +132,7 @@ private void assertMissing(int flags, Set missing) SoftAssertions checks = new SoftAssertions(); for (Fields field : missing) { + if (field == Fields.CLEANUP) continue; checks.assertThat(SavedCommand.getFieldChanged(field, flags)) .describedAs("field %s changed", field) .isFalse(); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 06469b0bb246..78708f5538f6 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -52,7 +52,6 @@ import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.RoutableKey; -import accord.primitives.Routables; import accord.primitives.RoutingKeys; import accord.primitives.Timestamp; import accord.primitives.Txn; @@ -81,6 +80,7 @@ import org.apache.cassandra.utils.Pair; import org.assertj.core.api.Assertions; +import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS; import static org.apache.cassandra.schema.SchemaConstants.ACCORD_KEYSPACE_NAME; import static org.apache.cassandra.utils.AccordGenerators.fromQT; @@ -109,7 +109,7 @@ public SimulatedAccordCommandStore(RandomSource rs) ExecutorFactory.Global.unsafeSet(globalExecutor); Stage.READ.unsafeSetExecutor(unorderedScheduled); Stage.MUTATION.unsafeSetExecutor(unorderedScheduled); - for (Stage stage : Arrays.asList(Stage.MISC, Stage.ACCORD_MIGRATION)) + for (Stage stage : Arrays.asList(Stage.MISC, Stage.ACCORD_MIGRATION, Stage.READ, Stage.MUTATION)) stage.unsafeSetExecutor(globalExecutor.configureSequential("ignore").build()); this.updateHolder = new CommandStore.EpochUpdateHolder(); @@ -152,6 +152,7 @@ public Timestamp uniqueNow(Timestamp atLeast) } }; + AccordStateCache stateCache = new AccordStateCache(Stage.READ.executor(), Stage.MUTATION.executor(), 8 << 20, new AccordStateCacheMetrics("test")); this.journal = new MockJournal(); this.store = new AccordCommandStore(0, timeService, @@ -179,7 +180,7 @@ public void onUncaughtException(Throwable t) }), updateHolder, journal, - new AccordStateCacheMetrics("test")); + new AccordCommandStore.CommandStoreExecutor(stateCache, executorFactory().sequential(CommandStore.class.getSimpleName() + '[' + 0 + ']'), Thread.currentThread().getId())); store.cache().instances().forEach(i -> { i.register(new AccordStateCache.Listener() @@ -213,11 +214,6 @@ public void onEvict(AccordCachingState state) shouldCompact = boolSource(rs.fork()); } - public Ranges slice(Ranges ranges) - { - return ranges.slice(topology.ranges(), Routables.Slice.Minimal); - } - private static BooleanSupplier boolSource(RandomSource rs) { var gen = Gens.bools().mixedDistribution().next(rs); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java index e3526e30b9e3..961a1dfdb3a2 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedDepsTest.java @@ -18,16 +18,6 @@ package org.apache.cassandra.service.accord; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.junit.Test; - import accord.api.RoutingKey; import accord.primitives.FullKeyRoute; import accord.primitives.FullRangeRoute; @@ -37,11 +27,23 @@ import accord.primitives.Ranges; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.utils.Gen; import org.apache.cassandra.db.marshal.Int32Type; +import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; import org.apache.cassandra.service.accord.api.PartitionKey; +import org.apache.cassandra.utils.Generators; +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import static accord.utils.Property.qt; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; @@ -73,6 +75,40 @@ public void keyConflicts() }); } + @Test + public void tokenConflicts() + { + TableMetadata tbl = reverseTokenTbl; + int numSamples = 100; + Gen rawKey = Generators.toGen(Generators.bytes(16, 16)); + + qt().withExamples(10).check(rs -> { + AccordKeyspace.unsafeClear(); + + ByteBuffer key = rawKey.next(rs); + PartitionKey pk = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(key)); + Keys keys = Keys.of(pk); + FullKeyRoute route = keys.toRoute(pk.toUnseekable()); + Txn txn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(key, 42)); + + ByteBuffer tokenConflictKey = Murmur3Partitioner.LongToken.keyForToken((LongToken) Murmur3Partitioner.instance.decorateKey(key).getToken()); + PartitionKey pkTokenConflict = new PartitionKey(tbl.id, tbl.partitioner.decorateKey(tokenConflictKey)); + Keys keysTokenConflict = Keys.of(pkTokenConflict); + FullKeyRoute routeTokenConflict = keysTokenConflict.toRoute(pkTokenConflict.toUnseekable()); + Txn txnTokenConflict = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), Arrays.asList(tokenConflictKey, 42)); + try (var instance = new SimulatedAccordCommandStore(rs)) + { + List conflicts = new ArrayList<>(numSamples); + for (int i = 0; i < numSamples; i++) + { + instance.maybeCacheEvict(route, Ranges.EMPTY); + conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txn, route, keyConflicts(conflicts, route))); + conflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), txnTokenConflict, routeTokenConflict, keyConflicts(conflicts, routeTokenConflict))); + } + } + }); + } + @Test public void rangePartialKeyMatch() { @@ -86,7 +122,7 @@ public void rangePartialKeyMatch() { long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); Ranges partialRange = Ranges.of(tokenRange(tbl.id, token - 1, token)); - Ranges partialRangeSliced = instance.slice(partialRange); + long outOfRangeToken = token - 10; if (outOfRangeToken == Long.MIN_VALUE) // if this wraps around that is fine, just can't be min outOfRangeToken++; @@ -120,7 +156,7 @@ public void rangePartialKeyMatch() keyConflicts.add(id); outOfRangeKeyConflicts.add(id); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, Map.of(key, keyConflicts), rangeConflicts(rangeConflicts, partialRangeSliced))); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, Map.of(key, keyConflicts), rangeConflicts(rangeConflicts, partialRange))); } } }); @@ -156,7 +192,7 @@ public void simpleRangeConflicts() { instance.maybeCacheEvict(keyRoute, ranges); keyConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, keyConflicts(keyConflicts, keyRoute))); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts(rangeConflicts, instance.slice(ranges)))); + rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts(keyConflicts, keyRoute), rangeConflicts(rangeConflicts, ranges))); } } }); diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java index 9e9ac1fad7bb..30f0f0aa7fa2 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedRandomKeysWithRangeConflictTest.java @@ -18,64 +18,92 @@ package org.apache.cassandra.service.accord; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.junit.Test; - import accord.api.RoutingKey; import accord.primitives.FullRangeRoute; import accord.primitives.FullRoute; import accord.primitives.Keys; import accord.primitives.Ranges; +import accord.primitives.RoutingKeys; import accord.primitives.Txn; import accord.primitives.TxnId; +import accord.utils.Property; +import accord.utils.RandomSource; import org.apache.cassandra.dht.Murmur3Partitioner.LongToken; +import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; +import org.apache.cassandra.utils.FailingConsumer; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; -import static accord.utils.Property.qt; +import static accord.utils.Property.commands; +import static accord.utils.Property.stateful; import static org.apache.cassandra.dht.Murmur3Partitioner.LongToken.keyForToken; import static org.apache.cassandra.service.accord.AccordTestUtils.createTxn; public class SimulatedRandomKeysWithRangeConflictTest extends SimulatedAccordCommandStoreTestBase { + private static Property.SimpleCommand insertKey(RandomSource rs, State state) + { + long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); + RoutingKey key = new TokenKey(state.tbl.id, new LongToken(token)); + Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + state.tbl + "(pk, value) VALUES (?, ?)"), + Arrays.asList(keyForToken(token), 42)); + Keys keys = (Keys) keyTxn.keys(); + FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); + + return new Property.SimpleCommand<>("Write Txn: " + keys, FailingConsumer.orFail(s -> { + s.instance.maybeCacheEvict(keyRoute, s.wholeRange); + var k = assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, s.keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); + s.keyConflicts.get(key).add(k); + })); + } + + private static Property.SimpleCommand insertRange(RandomSource rs, State state) + { + return new Property.SimpleCommand<>("Range Txn: " + state.wholeRange, FailingConsumer.orFail(s -> { + s.instance.maybeCacheEvict(RoutingKeys.EMPTY, s.wholeRange); + s.rangeConflicts.add(assertDepsMessage(s.instance, rs.pick(DepsMessage.values()), s.rangeTxn, s.rangeRoute, s.keyConflicts, rangeConflicts(s.rangeConflicts, s.wholeRange))); + })); + } + + @Test public void keysAllOverConflictingWithRange() { - var tbl = reverseTokenTbl; - Ranges wholeRange = Ranges.of(fullRange(tbl.id)); - FullRangeRoute rangeRoute = wholeRange.toRoute(wholeRange.get(0).end()); - Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, wholeRange); - int numSamples = 300; + stateful().withSteps(State.steps).check(commands(() -> State::new) + .add(SimulatedRandomKeysWithRangeConflictTest::insertKey) + .add(SimulatedRandomKeysWithRangeConflictTest::insertRange) + .build()); + } - qt().withExamples(10).check(rs -> { - AccordKeyspace.unsafeClear(); - try (var instance = new SimulatedAccordCommandStore(rs)) - { - Map> keyConflicts = new HashMap<>(); - List rangeConflicts = new ArrayList<>(numSamples); - for (int i = 0; i < numSamples; i++) - { - long token = rs.nextLong(Long.MIN_VALUE + 1, Long.MAX_VALUE); - RoutingKey key = new TokenKey(tbl.id, new LongToken(token)); - Txn keyTxn = createTxn(wrapInTxn("INSERT INTO " + tbl + "(pk, value) VALUES (?, ?)"), - Arrays.asList(keyForToken(token), 42)); - Keys keys = (Keys) keyTxn.keys(); - FullRoute keyRoute = keys.toRoute(keys.get(0).toUnseekable()); + public static class State + { + static final int steps = 300; + final SimulatedAccordCommandStore instance; + final Map> keyConflicts = new HashMap<>(); + final List rangeConflicts = new ArrayList<>(steps); + + final TableMetadata tbl = reverseTokenTbl; + final Ranges wholeRange = Ranges.of(fullRange(tbl.id)); + final FullRangeRoute rangeRoute = wholeRange.toRoute(wholeRange.get(0).end()); + final Txn rangeTxn = createTxn(Txn.Kind.ExclusiveSyncPoint, wholeRange); - instance.maybeCacheEvict(keyRoute, wholeRange); + public State(RandomSource rs) + { + AccordKeyspace.unsafeClear(); + this.instance = new SimulatedAccordCommandStore(rs); + } - // the full range is (-Inf, +Inf] but the store could be [(-Inf, Number], (Number, +Inf]], so need to slice to the store to get a matching range - Ranges wholeRangeSlicedShard = instance.slice(wholeRange); - var k = assertDepsMessage(instance, rs.pick(DepsMessage.values()), keyTxn, keyRoute, Map.of(key, keyConflicts.computeIfAbsent(key, ignore -> new ArrayList<>())), Collections.emptyMap()); - keyConflicts.get(key).add(k); - rangeConflicts.add(assertDepsMessage(instance, rs.pick(DepsMessage.values()), rangeTxn, rangeRoute, keyConflicts, rangeConflicts(rangeConflicts, wholeRangeSlicedShard))); - } - } - }); + @Override + public String toString() + { + return "Storage Ranges: " + instance.topology.ranges(); + } } } diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 3a29e63e99f9..78af413da85f 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -97,7 +97,7 @@ public void cachedTest() AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); AccordStateCache.Instance commandCache = commandStore.commandCache(); - commandStore.executeBlocking(() -> commandStore.setCapacity(1024)); + commandStore.executeBlocking(() -> commandStore.cache().setCapacity(1024)); AccordStateCache.Instance timestampsCache = commandStore.timestampsForKeyCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); @@ -242,7 +242,7 @@ public void inProgressLoadTest() throws Throwable ManualExecutor executor = new ManualExecutor(); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl", executor, executor); - commandStore.executor().submit(() -> commandStore.setCapacity(1024)).get(); + commandStore.executor().submit(() -> commandStore.cache().setCapacity(1024)).get(); AccordStateCache.Instance commandCache = commandStore.commandCache(); TxnId txnId = txnId(1, clock.incrementAndGet(), 1); PartialTxn txn = createPartialTxn(0); diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java index 31988224b437..20f123572f24 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncOperationTest.java @@ -223,9 +223,9 @@ private static Command createStableUsingFastLifeCycle(AccordCommandStore command // clear cache commandStore.executeBlocking(() -> { - long cacheSize = commandStore.capacity(); - commandStore.setCapacity(0); - commandStore.setCapacity(cacheSize); + long cacheSize = commandStore.cache().capacity(); + commandStore.cache().setCapacity(0); + commandStore.cache().setCapacity(cacheSize); commandStore.cache().awaitSaveResults(); }); @@ -272,9 +272,9 @@ private static Command createStableUsingSlowLifeCycle(AccordCommandStore command // clear cache commandStore.executeBlocking(() -> { - long cacheSize = commandStore.capacity(); - commandStore.setCapacity(0); - commandStore.setCapacity(cacheSize); + long cacheSize = commandStore.cache().capacity(); + commandStore.cache().setCapacity(0); + commandStore.cache().setCapacity(cacheSize); commandStore.cache().awaitSaveResults(); }); @@ -365,7 +365,7 @@ public void loadFail() // all txn use the same key; 0 Keys keys = keys(Schema.instance.getTableMetadata("ks", "tbl"), 0); AccordCommandStore commandStore = createAccordCommandStore(clock::incrementAndGet, "ks", "tbl"); - commandStore.executeBlocking(() -> commandStore.setCapacity(0)); + commandStore.executeBlocking(() -> commandStore.cache().setCapacity(0)); Gen txnIdGen = rs -> txnId(1, clock.incrementAndGet(), 1); qt().withPure(false) diff --git a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java index fd37346aa07c..128fdeca7b71 100644 --- a/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java +++ b/test/unit/org/apache/cassandra/tcm/ValidatingClusterMetadataService.java @@ -20,11 +20,17 @@ import java.io.IOException; import java.util.List; +import java.util.TreeMap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.io.util.DataOutputBuffer; +import org.apache.cassandra.tcm.log.Entry; +import org.apache.cassandra.tcm.log.LogState; +import org.apache.cassandra.tcm.sequences.LockedRanges; import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializer; import org.apache.cassandra.tcm.serialization.AsymmetricMetadataSerializers; import org.apache.cassandra.tcm.serialization.Version; @@ -33,6 +39,7 @@ public class ValidatingClusterMetadataService extends StubClusterMetadataService { private final List supportedVersions; + private final TreeMap epochs = new TreeMap<>(); private ValidatingClusterMetadataService(List supportedVersions) { @@ -102,6 +109,62 @@ public void setMetadata(ClusterMetadata metadata) if (!metadata.epoch.equals(metadata().epoch.nextEpoch())) throw new AssertionError("Epochs were not sequential: expected " + metadata().epoch.nextEpoch() + " but given " + metadata.epoch); testSerde(ClusterMetadata.serializer, metadata); + epochs.put(metadata.epoch, metadata); super.setMetadata(metadata); } + + @Override + public Processor processor() + { + Processor delegate = super.processor(); + return new Processor() + { + @Override + public Commit.Result commit(Entry.Id entryId, Transformation transform, Epoch lastKnown, Retry.Deadline retryPolicy) + { + return delegate.commit(entryId, transform, lastKnown, retryPolicy); + } + + @Override + public ClusterMetadata fetchLogAndWait(Epoch waitFor, Retry.Deadline retryPolicy) + { + return delegate.fetchLogAndWait(waitFor, retryPolicy); + } + + @Override + public LogState reconstruct(Epoch lowEpoch, Epoch highEpoch, Retry.Deadline retryPolicy) + { + if (!epochs.containsKey(lowEpoch)) + throw new AssertionError("Unknown epoch: " + lowEpoch); + ClusterMetadata base = epochs.get(lowEpoch); + ImmutableList.Builder entries = ImmutableList.builder(); + int id = 0; + for (ClusterMetadata cm : epochs.subMap(lowEpoch, false, highEpoch, true).values()) + entries.add(new Entry(new Entry.Id(id++), cm.epoch, new MockTransformer(cm))); + return new LogState(base, entries.build()); + } + }; + } + + private static class MockTransformer implements Transformation + { + private final ClusterMetadata result; + + private MockTransformer(ClusterMetadata result) + { + this.result = result; + } + + @Override + public Kind kind() + { + return null; + } + + @Override + public Result execute(ClusterMetadata metadata) + { + return new Success(result, LockedRanges.AffectedRanges.EMPTY, ImmutableSet.of()); + } + } } From 0e1819bbb30a100923c1f7f77a3c5c8d3bb01684 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 3 Oct 2024 13:24:03 +0100 Subject: [PATCH 159/340] Accord: Share DurableBefore between CommandStores --- modules/accord | 2 +- .../service/accord/AccordCommandStore.java | 26 +++++------- .../service/accord/AccordCommandStores.java | 6 +-- .../service/accord/AccordJournal.java | 40 ++++++++++++++----- .../accord/AccordSafeCommandStore.java | 22 +--------- .../service/accord/AccordService.java | 2 + .../accord/CommandsForRangesLoader.java | 2 +- .../cassandra/service/accord/IJournal.java | 3 +- .../accord/AccordJournalCompactionTest.java | 31 +++++--------- .../service/accord/AccordTestUtils.java | 11 +++-- .../cassandra/service/accord/MockJournal.java | 25 +++++++++--- .../accord/SimulatedAccordCommandStore.java | 14 ++++--- .../SimulatedAccordCommandStoreTestBase.java | 2 +- 13 files changed, 96 insertions(+), 90 deletions(-) diff --git a/modules/accord b/modules/accord index 4a8566af7b7d..8fb29905ee96 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 4a8566af7b7de2ddec2c7527d7e2da593f99865f +Subproject commit 8fb29905ee96e089640ff88cccafdf43846b9262 diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index f39080bf0873..0f174e640fcc 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -48,9 +48,8 @@ import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.Commands; -import accord.local.DurableBefore; import accord.local.KeyHistory; -import accord.local.NodeTimeService; +import accord.local.NodeCommandStoreService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; import accord.local.SafeCommand; @@ -173,7 +172,7 @@ private static void updateMutable(AccordStateCache.Instance instance, A } public AccordCommandStore(int id, - NodeTimeService time, + NodeCommandStoreService node, Agent agent, DataStore dataStore, ProgressLog.Factory progressLogFactory, @@ -182,7 +181,7 @@ public AccordCommandStore(int id, IJournal journal, CommandStoreExecutor commandStoreExecutor) { - super(id, time, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder); + super(id, node, agent, dataStore, progressLogFactory, listenerFactory, epochUpdateHolder); this.journal = journal; loggingId = String.format("[%s]", id); executor = commandStoreExecutor; @@ -219,7 +218,6 @@ public AccordCommandStore(int id, this.commandsForRangesLoader = new CommandsForRangesLoader(this); loadRedundantBefore(journal.loadRedundantBefore(id())); - loadDurableBefore(journal.loadDurableBefore(id())); loadBootstrapBeganAt(journal.loadBootstrapBeganAt(id())); loadSafeToRead(journal.loadSafeToRead(id())); loadRangesForEpoch(journal.loadRangesForEpoch(id())); @@ -230,8 +228,8 @@ public AccordCommandStore(int id, static Factory factory(AccordJournal journal, IntFunction executorFactory) { - return (id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch) -> - new AccordCommandStore(id, time, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch, journal, executorFactory.apply(id)); + return (id, node, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch) -> + new AccordCommandStore(id, node, agent, dataStore, progressLogFactory, listenerFactory, rangesForEpoch, journal, executorFactory.apply(id)); } public CommandsForRangesLoader diskCommandsForRanges() @@ -417,9 +415,9 @@ public DataStore dataStore() return store; } - NodeTimeService time() + NodeCommandStoreService node() { - return time; + return node; } ProgressLog progressLog() @@ -545,7 +543,7 @@ public void appendCommands(List diffs, Runnable onFlush @VisibleForTesting public Command loadCommand(TxnId txnId) { - return journal.loadCommand(id, txnId, unsafeGetRedundantBefore(), unsafeGetDurableBefore()); + return journal.loadCommand(id, txnId, unsafeGetRedundantBefore(), durableBefore()); } public interface Loader @@ -592,7 +590,7 @@ public Promise load(Command command) Command local = command; if (local.status() != Truncated && local.status() != Invalidated) { - Cleanup cleanup = Cleanup.shouldCleanup(local, unsafeGetRedundantBefore(), unsafeGetDurableBefore()); + Cleanup cleanup = Cleanup.shouldCleanup(local, unsafeGetRedundantBefore(), durableBefore()); switch (cleanup) { case NO: @@ -654,12 +652,6 @@ void loadRedundantBefore(RedundantBefore redundantBefore) unsafeSetRedundantBefore(redundantBefore); } - void loadDurableBefore(DurableBefore durableBefore) - { - if (durableBefore != null) - unsafeSetDurableBefore(durableBefore); - } - void loadBootstrapBeganAt(NavigableMap bootstrapBeganAt) { if (bootstrapBeganAt != null) diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index bc34a7ad50ee..6d9744310f37 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -28,7 +28,7 @@ import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.Node; -import accord.local.NodeTimeService; +import accord.local.NodeCommandStoreService; import accord.local.ShardDistributor; import accord.primitives.Range; import accord.topology.Topology; @@ -52,11 +52,11 @@ public class AccordCommandStores extends CommandStores implements CacheSize private final CommandStoreExecutor[] executors; private long cacheSize; - AccordCommandStores(NodeTimeService time, Agent agent, DataStore store, RandomSource random, + AccordCommandStores(NodeCommandStoreService node, Agent agent, DataStore store, RandomSource random, ShardDistributor shardDistributor, ProgressLog.Factory progressLogFactory, LocalListeners.Factory listenerFactory, AccordJournal journal, CommandStoreExecutor[] executors) { - super(time, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, + super(node, agent, store, random, shardDistributor, progressLogFactory, listenerFactory, AccordCommandStore.factory(journal, id -> executors[id % executors.length])); setCapacity(DatabaseDescriptor.getAccordCacheSizeInMiB() << 20); this.executors = executors; diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index eb8c0007f26d..310bbeb6367b 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -46,6 +46,9 @@ import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; +import accord.utils.PersistentField; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.concurrent.Shutdownable; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.util.DataInputBuffer; @@ -196,13 +199,6 @@ public RedundantBefore loadRedundantBefore(int store) return accumulator.get(); } - @Override - public DurableBefore loadDurableBefore(int store) - { - DurableBeforeAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, store)); - return accumulator.get(); - } - @Override public NavigableMap loadBootstrapBeganAt(int store) { @@ -248,6 +244,34 @@ public void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onF journal.onFlush(pointer, onFlush); } + @Override + public PersistentField.Persister durableBeforePersister() + { + return new PersistentField.Persister<>() + { + @Override + public AsyncResult persist(DurableBefore addDurableBefore, DurableBefore newDurableBefore) + { + if (isReplay.get()) + return AsyncResults.success(null); + + AsyncResult.Settable result = AsyncResults.settable(); + JournalKey key = new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, 0); + RecordPointer pointer = appendInternal(key, addDurableBefore); + // TODO (required): what happens on failure? + journal.onFlush(pointer, () -> result.setSuccess(null)); + return result; + } + + @Override + public DurableBefore load() + { + DurableBeforeAccumulator accumulator = readAll(new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, 0)); + return accumulator.get(); + } + }; + } + @Override public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fieldUpdates, Runnable onFlush) { @@ -255,8 +279,6 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie // TODO: avoid allocating keys if (fieldUpdates.addRedundantBefore != null) pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.REDUNDANT_BEFORE, store), fieldUpdates.addRedundantBefore); - if (fieldUpdates.addDurableBefore != null) - pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.DURABLE_BEFORE, store), fieldUpdates.addDurableBefore); if (fieldUpdates.newBootstrapBeganAt != null) pointer = appendInternal(new JournalKey(TxnId.NONE, JournalKey.Type.BOOTSTRAP_BEGAN_AT, store), fieldUpdates.newBootstrapBeganAt); if (fieldUpdates.newSafeToRead != null) diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 34cb57ed5b1a..0b256574f494 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -35,7 +35,6 @@ import accord.impl.CommandsSummary; import accord.local.CommandStores; import accord.local.CommandStores.RangesForEpoch; -import accord.local.DurableBefore; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.RedundantBefore; @@ -182,7 +181,7 @@ public ProgressLog progressLog() public NodeTimeService time() { // TODO: safe command store should not have arbitrary time - return commandStore.time(); + return commandStore.node(); } @Override @@ -295,12 +294,6 @@ public void setBootstrapBeganAt(NavigableMap newBootstrapBeganAt) ensureFieldUpdates().newBootstrapBeganAt = newBootstrapBeganAt; } - @Override - public void upsertDurableBefore(DurableBefore addDurableBefore) - { - ensureFieldUpdates().addDurableBefore = addDurableBefore; - } - @Override public void setSafeToRead(NavigableMap newSafeToRead) { @@ -341,15 +334,6 @@ public RedundantBefore redundantBefore() return super.redundantBefore(); } - @Override - public DurableBefore durableBefore() - { - if (fieldUpdates != null && fieldUpdates.newDurableBefore != null) - return fieldUpdates.newDurableBefore; - - return super.durableBefore(); - } - @Override protected void registerHistoricalTransactions(Deps deps) { @@ -378,9 +362,6 @@ public void postExecute() if (fieldUpdates.newRedundantBefore != null) super.unsafeSetRedundantBefore(fieldUpdates.newRedundantBefore); - if (fieldUpdates.newDurableBefore != null) - super.unsafeSetDurableBefore(fieldUpdates.newDurableBefore); - if (fieldUpdates.newBootstrapBeganAt != null) super.setBootstrapBeganAt(fieldUpdates.newBootstrapBeganAt); @@ -394,7 +375,6 @@ public void postExecute() public static class FieldUpdates { public RedundantBefore addRedundantBefore, newRedundantBefore; - public DurableBefore addDurableBefore, newDurableBefore; public NavigableMap newBootstrapBeganAt; public NavigableMap newSafeToRead; public RangesForEpoch.Snapshot newRangesForEpoch; diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index 87872666bd65..e1e88cde0551 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -426,6 +426,7 @@ private AccordService(Id localId) DefaultLocalListeners.Factory::new, AccordCommandStores.factory(journal), new AccordInteropFactory(agent, configService), + journal.durableBeforePersister(), configuration); this.nodeShutdown = toShutdownable(node); this.durabilityScheduling = new CoordinateDurabilityScheduling(node); @@ -438,6 +439,7 @@ public synchronized void startup() if (state != State.INIT) return; journal.start(node); + node.load(); ClusterMetadataService cms = ClusterMetadataService.instance(); class Ref { List historic = Collections.emptyList();} Ref ref = new Ref(); diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index 1f5088365daa..bdf1aa98886f 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -189,7 +189,7 @@ private NavigableMap load(Ranges ranges, Map cac { //TODO (now): this logic is kinda duplicate of org.apache.cassandra.service.accord.CommandsForRange.mapReduce // should figure out if this can be improved... also what is correct? - DurableBefore durableBefore = store.unsafeGetDurableBefore(); + DurableBefore durableBefore = store.durableBefore(); NavigableMap map = new TreeMap<>(); for (TxnId txnId : possibleTxns) { diff --git a/src/java/org/apache/cassandra/service/accord/IJournal.java b/src/java/org/apache/cassandra/service/accord/IJournal.java index e327c1a5ce31..7de9d68602d5 100644 --- a/src/java/org/apache/cassandra/service/accord/IJournal.java +++ b/src/java/org/apache/cassandra/service/accord/IJournal.java @@ -29,19 +29,20 @@ import accord.primitives.Ranges; import accord.primitives.Timestamp; import accord.primitives.TxnId; +import accord.utils.PersistentField.Persister; public interface IJournal { Command loadCommand(int commandStoreId, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore); RedundantBefore loadRedundantBefore(int commandStoreId); - DurableBefore loadDurableBefore(int commandStoreId); NavigableMap loadBootstrapBeganAt(int commandStoreId); NavigableMap loadSafeToRead(int commandStoreId); CommandStores.RangesForEpoch.Snapshot loadRangesForEpoch(int commandStoreId); List loadHistoricalTransactions(int store); void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush); + Persister durableBeforePersister(); void persistStoreState(int store, // TODO: this class should not live under ASCS AccordSafeCommandStore.FieldUpdates fieldUpdates, diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index 6229e8148f6d..a24cc81e9b74 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -93,23 +93,7 @@ public void segmentMergeTest() throws InterruptedException IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); - Gen basicRedundantBeforeGen = AccordGenerators.redundantBefore(DatabaseDescriptor.getPartitioner()); - Gen redundantBeforeGen = rs -> { - // TODO: find a better way to generate consecutive redundant befores - while (true) - { - RedundantBefore next = basicRedundantBeforeGen.next(rs); - try - { - RedundantBefore.merge(redundantBeforeAccumulator.get(), next); - return next; - } - catch (Throwable t) - { - // retry; - } - } - }; + Gen redundantBeforeGen = AccordGenerators.redundantBefore(DatabaseDescriptor.getPartitioner()); Gen durableBeforeGen = AccordGenerators.durableBeforeGen(DatabaseDescriptor.getPartitioner()); Gen> safeToReadGen = AccordGenerators.safeToReadGen(DatabaseDescriptor.getPartitioner()); Gen rangesForEpochGen = AccordGenerators.rangesForEpoch(DatabaseDescriptor.getPartitioner()); @@ -137,21 +121,24 @@ public boolean enableCompaction() RandomSource rs = new DefaultRandom(); int count = 1_000; +// RedundantBefore redundantBefore = RedundantBefore.EMPTY; for (int i = 0; i <= count; i++) { timestamp = timestamp.next(); AccordSafeCommandStore.FieldUpdates updates = new AccordSafeCommandStore.FieldUpdates(); - updates.addDurableBefore = durableBeforeGen.next(rs); + DurableBefore addDurableBefore = durableBeforeGen.next(rs); // TODO: improve redundant before generator and re-enable -// updates.redundantBefore = redundantBeforeGen.next(rs); +// updates.addRedundantBefore = redundantBeforeGen.next(rs); +// updates.newRedundantBefore = redundantBefore = RedundantBefore.merge(redundantBefore, updates.addRedundantBefore); updates.newSafeToRead = safeToReadGen.next(rs); updates.newRangesForEpoch = rangesForEpochGen.next(rs); updates.addHistoricalTransactions = historicalTransactionsGen.next(rs); + journal.durableBeforePersister().persist(addDurableBefore, null); journal.persistStoreState(1, updates, null); - redundantBeforeAccumulator.update(updates.addRedundantBefore); - durableBeforeAccumulator.update(updates.addDurableBefore); + redundantBeforeAccumulator.update(updates.newRedundantBefore); + durableBeforeAccumulator.update(addDurableBefore); if (updates.newBootstrapBeganAt != null) bootstrapBeganAtAccumulator.update(updates.newBootstrapBeganAt); safeToReadAccumulator.update(updates.newSafeToRead); @@ -165,7 +152,7 @@ public boolean enableCompaction() } // Assert.assertEquals(redundantBeforeAccumulator.get(), journal.loadRedundantBefore(1)); - Assert.assertEquals(durableBeforeAccumulator.get(), journal.loadDurableBefore(1)); + Assert.assertEquals(durableBeforeAccumulator.get(), journal.durableBeforePersister().load()); Assert.assertEquals(bootstrapBeganAtAccumulator.get(), journal.loadBootstrapBeganAt(1)); Assert.assertEquals(safeToReadAccumulator.get(), journal.loadSafeToRead(1)); Assert.assertEquals(rangesForEpochAccumulator.get(), journal.loadRangesForEpoch(1)); diff --git a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java index 3e74728f70ef..c75690f2e2cc 100644 --- a/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java +++ b/test/unit/org/apache/cassandra/service/accord/AccordTestUtils.java @@ -46,8 +46,10 @@ import accord.local.CommandStore; import accord.local.CommandStores; import accord.local.CommonAttributes; +import accord.local.DurableBefore; import accord.local.Node; import accord.local.Node.Id; +import accord.local.NodeCommandStoreService; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommand; @@ -364,16 +366,16 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS TableMetadata metadata = Schema.instance.getTableMetadata(keyspace, table); TokenRange range = TokenRange.fullRange(metadata.id); Node.Id node = new Id(1); - NodeTimeService time = new NodeTimeService() + NodeCommandStoreService time = new NodeCommandStoreService() { private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); @Override public Id id() { return node;} + @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } @Override public Timestamp uniqueNow(Timestamp atLeast) { return Timestamp.fromValues(1, now.getAsLong(), node); } - @Override - public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } + @Override public long elapsed(TimeUnit timeUnit) { return elapsed.applyAsLong(timeUnit); } }; SingleEpochRanges holder = new SingleEpochRanges(Ranges.of(range)); @@ -386,10 +388,11 @@ public static InMemoryCommandStore.Synchronized createInMemoryCommandStore(LongS public static AccordCommandStore createAccordCommandStore( Node.Id node, LongSupplier now, Topology topology, ExecutorPlus loadExecutor, ExecutorPlus saveExecutor) { - NodeTimeService time = new NodeTimeService() + NodeCommandStoreService time = new NodeCommandStoreService() { private ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.MICROSECONDS, this::now); + @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } @Override public Id id() { return node;} @Override public long epoch() {return 1; } @Override public long now() {return now.getAsLong(); } diff --git a/test/unit/org/apache/cassandra/service/accord/MockJournal.java b/test/unit/org/apache/cassandra/service/accord/MockJournal.java index dd7377ab584b..64caa97216d3 100644 --- a/test/unit/org/apache/cassandra/service/accord/MockJournal.java +++ b/test/unit/org/apache/cassandra/service/accord/MockJournal.java @@ -47,6 +47,9 @@ import accord.primitives.TxnId; import accord.primitives.Writes; import accord.utils.Invariants; +import accord.utils.PersistentField.Persister; +import accord.utils.async.AsyncResult; +import accord.utils.async.AsyncResults; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.HistoricalTransactionsAccumulator; import org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; @@ -60,13 +63,13 @@ public class MockJournal implements IJournal private static class FieldUpdates { final RedundantBeforeAccumulator redundantBeforeAccumulator = new RedundantBeforeAccumulator(); - final DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); final IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); final IdentityAccumulator> safeToReadAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); final IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); final HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); } + final DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); private final Map fieldUpdates = new HashMap<>(); @Override public Command loadCommand(int store, TxnId txnId, RedundantBefore redundantBefore, DurableBefore durableBefore) @@ -85,9 +88,23 @@ public RedundantBefore loadRedundantBefore(int store) } @Override - public DurableBefore loadDurableBefore(int store) + public Persister durableBeforePersister() { - return fieldUpdates(store).durableBeforeAccumulator.get(); + return new Persister<>() + { + @Override + public AsyncResult persist(DurableBefore addDurableBefore, DurableBefore newDurableBefore) + { + durableBeforeAccumulator.update(addDurableBefore); + return AsyncResults.success(null); + } + + @Override + public DurableBefore load() + { + return durableBeforeAccumulator.get(); + } + }; } @Override @@ -139,8 +156,6 @@ public void persistStoreState(int store, AccordSafeCommandStore.FieldUpdates fie FieldUpdates updates = fieldUpdates(store); if (fieldUpdates.addRedundantBefore != null) updates.redundantBeforeAccumulator.update(fieldUpdates.addRedundantBefore); - if (fieldUpdates.addDurableBefore != null) - updates.durableBeforeAccumulator.update(fieldUpdates.addDurableBefore); if (fieldUpdates.newBootstrapBeganAt != null) updates.bootstrapBeganAtAccumulator.update(fieldUpdates.newBootstrapBeganAt); if (fieldUpdates.newSafeToRead != null) diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java index 78708f5538f6..72581b0cf211 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStore.java @@ -38,7 +38,9 @@ import accord.local.Command; import accord.local.CommandStore; import accord.local.CommandStores; +import accord.local.DurableBefore; import accord.local.Node; +import accord.local.NodeCommandStoreService; import accord.local.NodeTimeService; import accord.local.PreLoadContext; import accord.local.SafeCommand; @@ -92,7 +94,7 @@ public class SimulatedAccordCommandStore implements AutoCloseable private final CommandStore.EpochUpdateHolder updateHolder; private final BooleanSupplier shouldEvict, shouldFlush, shouldCompact; - public final NodeTimeService timeService; + public final NodeCommandStoreService storeService; public final AccordCommandStore store; public final Node.Id nodeId; public final Topology topology; @@ -114,10 +116,12 @@ public SimulatedAccordCommandStore(RandomSource rs) this.updateHolder = new CommandStore.EpochUpdateHolder(); this.nodeId = AccordTopology.tcmIdToAccord(ClusterMetadata.currentNullable().myNodeId()); - this.timeService = new NodeTimeService() + this.storeService = new NodeCommandStoreService() { private final ToLongFunction elapsed = NodeTimeService.elapsedWrapperFromNonMonotonicSource(TimeUnit.NANOSECONDS, this::now); + @Override public DurableBefore durableBefore() { return DurableBefore.EMPTY; } + @Override public Node.Id id() { @@ -155,7 +159,7 @@ public Timestamp uniqueNow(Timestamp atLeast) AccordStateCache stateCache = new AccordStateCache(Stage.READ.executor(), Stage.MUTATION.executor(), 8 << 20, new AccordStateCacheMetrics("test")); this.journal = new MockJournal(); this.store = new AccordCommandStore(0, - timeService, + storeService, new TestAgent.RethrowAgent() { @Override @@ -222,7 +226,7 @@ private static BooleanSupplier boolSource(RandomSource rs) public TxnId nextTxnId(Txn.Kind kind, Routable.Domain domain) { - return new TxnId(timeService.epoch(), timeService.now(), kind, domain, nodeId); + return new TxnId(storeService.epoch(), storeService.now(), kind, domain, nodeId); } public void maybeCacheEvict(Unseekables keysOrRanges) @@ -362,7 +366,7 @@ public Pair> enqueuePreAccept(Txn txn, public Pair> enqueueBeginRecovery(Txn txn, FullRoute route) { TxnId txnId = nextTxnId(txn.kind(), txn.keys().domain()); - Ballot ballot = Ballot.fromValues(timeService.epoch(), timeService.now(), nodeId); + Ballot ballot = Ballot.fromValues(storeService.epoch(), storeService.now(), nodeId); BeginRecovery br = new BeginRecovery(nodeId, topologies, txnId, null, txn, route, ballot); return Pair.create(txnId, processAsync(br, safe -> { diff --git a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java index f9f09f5c5c33..74ee74a4d124 100644 --- a/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java +++ b/test/unit/org/apache/cassandra/service/accord/SimulatedAccordCommandStoreTestBase.java @@ -268,7 +268,7 @@ protected static Pair> assertBeginRecoveryAfterPreAcceptAs return success; }); var delay = preAcceptAsync.flatMap(ignore -> AsyncChains.ofCallable(instance.unorderedScheduled, () -> { - Ballot ballot = Ballot.fromValues(instance.timeService.epoch(), instance.timeService.now(), nodeId); + Ballot ballot = Ballot.fromValues(instance.storeService.epoch(), instance.storeService.now(), nodeId); return new BeginRecovery(nodeId, new Topologies.Single(SizeOfIntersectionSorter.SUPPLIER, instance.topology), txnId, null, txn, route, ballot); })); var recoverAsync = delay.flatMap(br -> instance.processAsync(br, safe -> { From ab6ef97d2814be136a8271338f7f1caac729666c Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Thu, 3 Oct 2024 23:26:32 +0100 Subject: [PATCH 160/340] ninja: increase durability scheduling delays ninja: fix NPE disable ephemeral reads --- modules/accord | 2 +- src/java/org/apache/cassandra/config/AccordSpec.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/accord b/modules/accord index 8fb29905ee96..3846a378bfec 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 8fb29905ee96e089640ff88cccafdf43846b9262 +Subproject commit 3846a378bfec8d28b312b40cd8541fad2a76e840 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 9e0e3305cef9..4861069409ed 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -65,9 +65,9 @@ public long recoveryDelayFor(TxnId txnId, TimeUnit unit) public volatile DurationSpec.IntSecondsBound fast_path_update_delay = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(5); - public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(1, TimeUnit.MINUTES); + public volatile DurationSpec.IntSecondsBound schedule_durability_frequency = new DurationSpec.IntSecondsBound(120); + public volatile DurationSpec.IntSecondsBound durability_txnid_lag = new DurationSpec.IntSecondsBound(10); + public volatile DurationSpec.IntSecondsBound shard_durability_cycle = new DurationSpec.IntSecondsBound(5, TimeUnit.MINUTES); public volatile DurationSpec.IntSecondsBound global_durability_cycle = new DurationSpec.IntSecondsBound(10, TimeUnit.MINUTES); public enum TransactionalRangeMigration @@ -87,7 +87,7 @@ public enum TransactionalRangeMigration * default transactional mode for tables created by this node when no transactional mode has been specified in the DDL */ public TransactionalMode default_transactional_mode = TransactionalMode.off; - public boolean ephemeralReadEnabled = true; + public boolean ephemeralReadEnabled = false; public boolean state_cache_listener_jfr_enabled = true; public final JournalSpec journal = new JournalSpec(); public final MinEpochRetrySpec minEpochSyncRetry = new MinEpochRetrySpec(); From d8ccc36e4d45ee60f6fa62f692f58c65cde7b265 Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Fri, 4 Oct 2024 10:25:14 +0100 Subject: [PATCH 161/340] visit journal backwards to save time parsing don't load range commands that are redundant, and load least possible use MISC verb handler for maintenance tasks --- modules/accord | 2 +- .../db/compaction/CompactionIterator.java | 20 +- src/java/org/apache/cassandra/dht/Token.java | 8 + .../index/accord/RoutesSearcher.java | 20 +- .../cassandra/journal/InMemoryIndex.java | 27 +- .../org/apache/cassandra/journal/Index.java | 2 +- .../org/apache/cassandra/journal/Journal.java | 15 +- .../apache/cassandra/journal/OnDiskIndex.java | 69 +--- .../cassandra/journal/RecordConsumer.java | 1 - .../org/apache/cassandra/journal/Segment.java | 10 +- .../apache/cassandra/journal/Segments.java | 5 +- .../cassandra/journal/StaticSegment.java | 4 +- src/java/org/apache/cassandra/net/Verb.java | 8 +- .../org/apache/cassandra/schema/TableId.java | 7 +- .../service/accord/AccordJournal.java | 1 + .../service/accord/AccordJournalTable.java | 13 +- .../accord/AccordJournalValueSerializers.java | 4 + .../service/accord/AccordKeyspace.java | 3 +- .../accord/AccordSafeCommandStore.java | 3 +- .../accord/AccordSegmentCompactor.java | 23 +- .../service/accord/CommandsForRanges.java | 7 +- .../accord/CommandsForRangesLoader.java | 108 ++--- .../service/accord/SavedCommand.java | 372 ++++++++++-------- .../cassandra/service/accord/TokenRange.java | 11 +- .../service/accord/api/AccordRoutableKey.java | 8 + .../service/accord/api/AccordRoutingKey.java | 38 +- .../service/accord/api/PartitionKey.java | 11 +- .../service/accord/async/AsyncLoader.java | 16 +- .../service/accord/async/AsyncOperation.java | 2 +- .../serializers/CommandSerializers.java | 12 + .../accord/serializers/KeySerializers.java | 48 ++- .../accord/AccordJournalCompactionTest.java | 29 +- .../test/AccordJournalSimulationTest.java | 2 +- .../index/accord/AccordIndexStressTest.java | 25 +- .../index/accord/RouteIndexTest.java | 3 +- .../apache/cassandra/journal/IndexTest.java | 25 +- .../apache/cassandra/journal/JournalTest.java | 16 +- .../apache/cassandra/journal/SegmentTest.java | 16 +- .../service/accord/CommandsForRangesTest.java | 2 +- .../service/accord/async/AsyncLoaderTest.java | 24 +- .../async/SimulatedAsyncOperationTest.java | 2 +- 41 files changed, 609 insertions(+), 413 deletions(-) diff --git a/modules/accord b/modules/accord index 3846a378bfec..08ee5ce1c630 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 3846a378bfec8d28b312b40cd8541fad2a76e840 +Subproject commit 08ee5ce1c6301201ccaf7d580a6af289ab4c5765 diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index a32ad7634654..2086fddc8832 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -150,7 +150,6 @@ public class CompactionIterator extends CompactionInfo.Holder implements Unfilte { private static final Logger logger = LoggerFactory.getLogger(CompactionIterator.class); private static final long UNFILTERED_TO_UPDATE_PROGRESS = 100; - private static Object[] TRUNCATE_CLUSTERING_VALUE = new Object[] { Long.MAX_VALUE, Integer.MAX_VALUE }; private final OperationType type; private final AbstractCompactionController controller; @@ -1027,7 +1026,7 @@ class AccordJournalPurger extends AbstractPurger JournalKey key = null; Object builder = null; FlyweightSerializer serializer = null; - Object[] lastClustering = null; + Object[] firstClustering = null; long maxSeenTimestamp = -1; final int userVersion; long lastDescriptor = -1; @@ -1058,6 +1057,7 @@ protected void beginPartition(UnfilteredRowIterator partition) maxSeenTimestamp = -1; lastDescriptor = -1; lastOffset = -1; + firstClustering = null; } @Override @@ -1084,7 +1084,7 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition try (DataOutputBuffer out = DataOutputBuffer.scratchBuffer.get()) { serializer.reserialize(key, builder, out, userVersion); - newVersion.row(lastClustering) + newVersion.row(firstClustering) .add("record", out.asNewBuffer()) .add("user_version", userVersion); } @@ -1117,12 +1117,7 @@ protected UnfilteredRowIterator applyToPartition(UnfilteredRowIterator partition PartitionUpdate.SimpleBuilder newVersion = PartitionUpdate.simpleBuilder(AccordKeyspace.Journal, partition.partitionKey()); - Row.SimpleBuilder rowBuilder; - if (cleanup == TRUNCATE || cleanup == TRUNCATE_WITH_OUTCOME) - rowBuilder = newVersion.row(TRUNCATE_CLUSTERING_VALUE); - else - rowBuilder = newVersion.row(lastClustering); - + Row.SimpleBuilder rowBuilder = newVersion.row(firstClustering); rowBuilder.add("record", commandBuilder.asByteBuffer(userVersion)) .add("user_version", userVersion); @@ -1154,10 +1149,10 @@ protected void collect(Row row) if (lastOffset != -1) { - Invariants.checkState(descriptor >= lastDescriptor, + Invariants.checkState(descriptor <= lastDescriptor, "Descriptors were accessed out of order: %d was accessed after %d", descriptor, lastDescriptor); Invariants.checkState(descriptor != lastDescriptor || - offset > lastOffset, + offset < lastOffset, "Offsets within %s were accessed out of order: %d was accessed after %s", offset, lastOffset); } lastDescriptor = descriptor; @@ -1167,7 +1162,8 @@ protected void collect(Row row) { int userVersion = Int32Type.instance.compose(row.getCell(versionColumn).buffer()); serializer.deserialize(key, builder, in, userVersion); - lastClustering = row.clustering().getBufferArray(); + if (firstClustering == null) + firstClustering = row.clustering().getBufferArray(); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/dht/Token.java b/src/java/org/apache/cassandra/dht/Token.java index 92481745f403..048d1a039469 100644 --- a/src/java/org/apache/cassandra/dht/Token.java +++ b/src/java/org/apache/cassandra/dht/Token.java @@ -223,6 +223,14 @@ public Token deserialize(ByteBuffer in, IPartitioner p) return p.getTokenFactory().fromByteArray(ByteBuffer.wrap(bytes)); } + public void skip(DataInputPlus in, IPartitioner p, int version) throws IOException + { + int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); + if (logPartitioner && deserializePartitioners.add(p.getClass())) + logger.debug("Deserializing token with partitioner " + p); + in.skipBytesFully(size); + } + public Token deserialize(DataInputPlus in, IPartitioner p, int version) throws IOException { int size = p.isFixedLength() ? p.getMaxTokenSize() : in.readUnsignedVInt32(); diff --git a/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java index 7975b95c7b6a..cade5d28ef28 100644 --- a/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java +++ b/src/java/org/apache/cassandra/index/accord/RoutesSearcher.java @@ -20,10 +20,11 @@ import java.nio.ByteBuffer; import java.util.Collections; -import java.util.HashSet; import java.util.Set; +import accord.primitives.Timestamp; import accord.primitives.TxnId; +import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DataRange; @@ -69,10 +70,10 @@ private CloseableIterator searchKeysAccord(int store, AccordRoutingKey st limits, dataRange); Index.Searcher s = index.searcherFor(cmd); - try (ReadExecutionController controler = cmd.executionController()) + try (ReadExecutionController controller = cmd.executionController()) { - UnfilteredPartitionIterator partitionIterator = s.search(controler); - return new CloseableIterator() + UnfilteredPartitionIterator partitionIterator = s.search(controller); + return new CloseableIterator<>() { private final Entry entry = new Entry(); @Override @@ -100,21 +101,22 @@ public Entry next() } } - public Set intersects(int store, TokenRange range) + public Set intersects(int store, TokenRange range, TxnId minTxnId, Timestamp maxTxnId) { - return intersects(store, (AccordRoutingKey) range.start(), (AccordRoutingKey) range.end()); + return intersects(store, range.start(), range.end(), minTxnId, maxTxnId); } - public Set intersects(int store, AccordRoutingKey start, AccordRoutingKey end) + public Set intersects(int store, AccordRoutingKey start, AccordRoutingKey end, TxnId minTxnId, Timestamp maxTxnId) { - HashSet set = new HashSet(); + ObjectHashSet set = new ObjectHashSet(); try (CloseableIterator it = searchKeysAccord(store, start, end)) { while (it.hasNext()) { Entry next = it.next(); if (next.store_id != store) continue; // the index should filter out, but just in case... - set.add(next.txnId); + if (next.txnId.compareTo(minTxnId) >= 0 && next.txnId.compareTo(maxTxnId) < 0) + set.add(next.txnId); } } return set.isEmpty() ? Collections.emptySet() : set; diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java index 2c71d8c4ffd6..1f0da7fd2881 100644 --- a/src/java/org/apache/cassandra/journal/InMemoryIndex.java +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -18,7 +18,6 @@ package org.apache.cassandra.journal; import java.io.IOException; -import java.util.Arrays; import java.util.NavigableMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentSkipListMap; @@ -61,16 +60,24 @@ public void update(K id, int offset, int size) index.merge(id, new long[] { currentOffsetAndSize }, (current, value) -> { - int idx = Arrays.binarySearch(current, currentOffsetAndSize); - if (idx >= 0) // repeat update() call; shouldn't occur, but we might as well allow this NOOP - return current; + long inserting = value[0]; + int idx = 0; + while (idx < current.length) + { + long cur = current[idx]; + if (cur <= inserting) + { + if (cur == inserting) + return current; // TODO (expected): throw exception? + break; + } + ++idx; + } - /* Merge the new offset with existing values */ - int pos = -idx - 1; long[] merged = new long[current.length + 1]; - System.arraycopy(current, 0, merged, 0, pos); - merged[pos] = currentOffsetAndSize; - System.arraycopy(current, pos, merged, pos + 1, current.length - pos); + System.arraycopy(current, 0, merged, 0, idx); + merged[idx] = inserting; + System.arraycopy(current, idx, merged, idx + 1, current.length - idx); return merged; }); @@ -98,7 +105,7 @@ public long[] lookUp(K id) } @Override - public long lookUpFirst(K id) + public long lookUpLast(K id) { long[] offsets = lookUp(id); return offsets.length == 0 ? -1 : offsets[0]; diff --git a/src/java/org/apache/cassandra/journal/Index.java b/src/java/org/apache/cassandra/journal/Index.java index bf6ab5d0c11e..ac1e7c1d9168 100644 --- a/src/java/org/apache/cassandra/journal/Index.java +++ b/src/java/org/apache/cassandra/journal/Index.java @@ -51,7 +51,7 @@ abstract class Index implements Closeable * * @return the first offset into the segment, or -1 is none were found */ - abstract long lookUpFirst(K id); + abstract long lookUpLast(K id); abstract long[] lookUpAll(K id); diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index 5e91c7d3d3e7..ba4bf503b25e 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -306,15 +306,15 @@ public boolean awaitTermination(long timeout, TimeUnit units) throws Interrupted * @return deserialized record if found, null otherwise */ @SuppressWarnings("unused") - public V readFirst(K id) + public V readLast(K id) { EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); try (ReferencedSegments segments = selectAndReference(id)) { - for (Segment segment : segments.allSorted()) + for (Segment segment : segments.allSorted(true)) { - if (segment.readFirst(id, holder)) + if (segment.readLast(id, holder)) { try (DataInputBuffer in = new DataInputBuffer(holder.value, false)) { @@ -336,8 +336,7 @@ public void readAll(K id, RecordConsumer consumer) EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); try (ReferencedSegments segments = selectAndReference(id)) { - consumer.init(); - for (Segment segment : segments.allSorted()) + for (Segment segment : segments.allSorted(false)) segment.readAll(id, holder, consumer); } } @@ -422,12 +421,12 @@ public V readFirstMatching(K id, Predicate condition) * @return true if the record was found, false otherwise */ @SuppressWarnings("unused") - public boolean readFirst(K id, RecordConsumer consumer) + public boolean readLast(K id, RecordConsumer consumer) { try (ReferencedSegments segments = selectAndReference(id)) { for (Segment segment : segments.all()) - if (segment.readFirst(id, consumer)) + if (segment.readLast(id, consumer)) return true; } return false; @@ -448,7 +447,7 @@ public Set test(Set test) { for (K id : test) { - if (segment.index().lookUpFirst(id) != -1) + if (segment.index().lookUpLast(id) != -1) { present.add(id); if (test.size() == present.size()) diff --git a/src/java/org/apache/cassandra/journal/OnDiskIndex.java b/src/java/org/apache/cassandra/journal/OnDiskIndex.java index b2f1487ad0a5..4994e3a0871f 100644 --- a/src/java/org/apache/cassandra/journal/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/journal/OnDiskIndex.java @@ -21,7 +21,6 @@ import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; -import java.util.Arrays; import java.util.Map; import java.util.NavigableMap; import java.util.zip.CRC32; @@ -172,8 +171,8 @@ static void write( if (prev != -1) { long tmp = prev; - Invariants.checkState(readOffset(offsetAndSize) > readOffset(prev), - () -> String.format("Offsets should be strictly monotonic, but found %d following %d", + Invariants.checkState(readOffset(offsetAndSize) < readOffset(prev), + () -> String.format("Offsets should be strictly reverse monotonic, but found %d following %d", readOffset(offsetAndSize), readOffset(tmp))); } out.writeLong(offsetAndSize); @@ -202,53 +201,16 @@ public K lastId() @Override public long[] lookUp(K id) { - if (!mayContainId(id)) - return EMPTY; - - int keyIndex = binarySearch(id); - if (keyIndex < 0) - return EMPTY; - - long[] records = new long[] { recordAtIndex(keyIndex) }; - - /* - * Duplicate entries are possible within one segment (but should be rare). - * Check and add entries before and after the found result (not guaranteed to be first). - */ - - for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) - { - int length = records.length; - records = Arrays.copyOf(records, length + 1); - records[length] = recordAtIndex(i); - } - - for (int i = keyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) - { - int length = records.length; - records = Arrays.copyOf(records, length + 1); - records[length] = recordAtIndex(i); - } - - Arrays.sort(records); - return records; + return lookUpAll(id); } @Override - public long lookUpFirst(K id) + public long lookUpLast(K id) { if (!mayContainId(id)) return -1L; int keyIndex = binarySearch(id); - - /* - * Duplicate entries are possible within one segment (but should be rare). - * Check and add entries before until we find the first occurrence of key. - */ - for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) - keyIndex = i; - return keyIndex < 0 ? -1 : recordAtIndex(keyIndex); } @@ -258,27 +220,22 @@ public long[] lookUpAll(K id) if (!mayContainId(id)) return EMPTY; - int start = binarySearch(id); - int firstKeyIndex = start; - - for (int i = firstKeyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--) - firstKeyIndex = i; - - if (firstKeyIndex < 0) + int someIndex = binarySearch(id); + if (someIndex < 0) return EMPTY; - int lastKeyIndex = start; + int firstKeyIndex = someIndex; + while (firstKeyIndex > 0 && id.equals(keyAtIndex(firstKeyIndex - 1))) + --firstKeyIndex; - for (int i = lastKeyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); i++) - lastKeyIndex = i; + int lastKeyIndex = someIndex; + while (lastKeyIndex + 1 < entryCount && id.equals(keyAtIndex(lastKeyIndex + 1))) + ++lastKeyIndex; long[] all = new long[lastKeyIndex - firstKeyIndex + 1]; int idx = firstKeyIndex; for (int i = 0; i < all.length; i++) - { - all[i] = recordAtIndex(idx); - idx++; - } + all[i] = recordAtIndex(idx++); return all; } diff --git a/src/java/org/apache/cassandra/journal/RecordConsumer.java b/src/java/org/apache/cassandra/journal/RecordConsumer.java index 3403cd0f2301..e16194001dd2 100644 --- a/src/java/org/apache/cassandra/journal/RecordConsumer.java +++ b/src/java/org/apache/cassandra/journal/RecordConsumer.java @@ -24,6 +24,5 @@ @FunctionalInterface public interface RecordConsumer { - default void init() {} void accept(long segment, int position, K key, ByteBuffer buffer, IntHashSet hosts, int userVersion); } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index 0da59118b701..7f955669cdae 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -56,9 +56,9 @@ public abstract class Segment implements Closeable, RefCounted consumer) + boolean readLast(K id, RecordConsumer consumer) { - long offsetAndSize = index().lookUpFirst(id); + long offsetAndSize = index().lookUpLast(id); if (offsetAndSize == -1) return false; @@ -74,9 +74,9 @@ boolean readFirst(K id, RecordConsumer consumer) return false; } - boolean readFirst(K id, EntrySerializer.EntryHolder into) + boolean readLast(K id, EntrySerializer.EntryHolder into) { - long offsetAndSize = index().lookUpFirst(id); + long offsetAndSize = index().lookUpLast(id); if (offsetAndSize == -1 || !read(Index.readOffset(offsetAndSize), Index.readSize(offsetAndSize), into)) return false; Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); @@ -86,10 +86,12 @@ boolean readFirst(K id, EntrySerializer.EntryHolder into) void readAll(K id, EntrySerializer.EntryHolder into, RecordConsumer onEntry) { long[] all = index().lookUpAll(id); + int prevOffset = Integer.MAX_VALUE; for (int i = 0; i < all.length; i++) { int offset = Index.readOffset(all[i]); int size = Index.readSize(all[i]); + Invariants.checkState(offset < prevOffset); Invariants.checkState(read(offset, size, into), "Read should always return true"); onEntry.accept(descriptor.timestamp, offset, into.key, into.value, into.hosts, into.userVersion); } diff --git a/src/java/org/apache/cassandra/journal/Segments.java b/src/java/org/apache/cassandra/journal/Segments.java index 94282e9d8755..cc98750fc45d 100644 --- a/src/java/org/apache/cassandra/journal/Segments.java +++ b/src/java/org/apache/cassandra/journal/Segments.java @@ -106,10 +106,11 @@ Iterable> all() /** * Returns segments in timestamp order. Will allocate and sort the segment collection. */ - List> allSorted() + List> allSorted(boolean asc) { List> segments = new ArrayList<>(this.segments.values()); - segments.sort(Comparator.comparing(s -> s.descriptor)); + if (asc) segments.sort(Comparator.comparing(s -> s.descriptor)); + else segments.sort((o1, o2) -> -o1.descriptor.compareTo(o2.descriptor)); return segments; } diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index f5f15ee13c61..3a8c03bb1a32 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -475,10 +475,10 @@ public int compareTo(KeyOrderReader that) int cmp = keySupport.compare(this.key(), that.key()); if (cmp != 0) return cmp; - cmp = Long.compare(this.descriptor.timestamp, that.descriptor.timestamp); + cmp = Long.compare(that.descriptor.timestamp, this.descriptor.timestamp); if (cmp != 0) return cmp; - return Integer.compare(this.offset, that.offset); + return Integer.compare(that.offset, this.offset); } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index 1115b77ac8f2..f146a5381a2b 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -338,10 +338,10 @@ public enum Verb ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), - ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout,IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (146, P2, writeTimeout,IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), - ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, IMMEDIATE, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), + ACCORD_FETCH_DATA_REQ (146, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), + ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, MISC, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, MISC, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), ACCORD_QUERY_DURABLE_BEFORE_REQ (150, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP ), diff --git a/src/java/org/apache/cassandra/schema/TableId.java b/src/java/org/apache/cassandra/schema/TableId.java index ac486f71d985..302d7db6bf13 100644 --- a/src/java/org/apache/cassandra/schema/TableId.java +++ b/src/java/org/apache/cassandra/schema/TableId.java @@ -185,6 +185,11 @@ public final int serializedSize() return 16; } + public static int staticSerializedSize() + { + return 16; + } + public static TableId deserialize(DataInput in) throws IOException { return new TableId(new UUID(in.readLong(), in.readLong())); @@ -201,7 +206,7 @@ public int compareTo(TableId o) return id.compareTo(o.id); } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final IVersionedSerializer serializer = new IVersionedSerializer<>() { @Override public void serialize(TableId t, DataOutputPlus out, int version) throws IOException diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 310bbeb6367b..49af8042a8aa 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -323,6 +323,7 @@ private BUILDER readAll(JournalKey key) BUILDER builder = (BUILDER) key.type.serializer.mergerFor(key); // TODO: this can be further improved to avoid allocating lambdas AccordJournalValueSerializers.FlyweightSerializer serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; + // TODO (expected): for those where we store an image, read only the first entry we find in DESC order journalTable.readAll(key, (in, userVersion) -> serializer.deserialize(key, builder, in, userVersion)); return builder; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java index ef3ac9eff3a9..5935a910b5f8 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalTable.java @@ -143,8 +143,7 @@ private class JournalAndTableRecordConsumer extends AbstractRecordConsumer this.tableRecordConsumer = new TableRecordConsumer(reader); } - @Override - public void init() + void readTable() { readAllFromTable(key, tableRecordConsumer); } @@ -164,7 +163,9 @@ public void accept(long segment, int position, K key, ByteBuffer buffer, IntHash */ public void readAll(K key, Reader reader) { - journal.readAll(key, new JournalAndTableRecordConsumer(key, reader)); + JournalAndTableRecordConsumer consumer = new JournalAndTableRecordConsumer(key, reader); + journal.readAll(key, consumer); + consumer.readTable(); } private void readAllFromTable(K key, TableRecordConsumer onEntry) @@ -332,15 +333,15 @@ public void readAllForKey(K key, RecordConsumer reader) { K tableKey = tableIterator.key(); K journalKey = staticSegmentIterator.key(); - if (tableKey != null && keySupport.compare(tableKey, key) == 0) - tableIterator.readAllForKey(key, reader); - if (journalKey != null && keySupport.compare(journalKey, key) == 0) staticSegmentIterator.readAllForKey(key, (segment, position, key1, buffer, hosts, userVersion) -> { if (!tableIterator.visited(segment)) reader.accept(segment, position, key1, buffer, hosts, userVersion); }); + if (tableKey != null && keySupport.compare(tableKey, key) == 0) + tableIterator.readAllForKey(key, reader); + tableIterator.clear(); } diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java index 289b5c1b9841..60a1ef4f5817 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournalValueSerializers.java @@ -116,6 +116,7 @@ public A get() public static class IdentityAccumulator extends Accumulator { + boolean hasRead; public IdentityAccumulator(T initial) { super(initial); @@ -124,6 +125,9 @@ public IdentityAccumulator(T initial) @Override protected T accumulate(T oldValue, T newValue) { + if (hasRead) + return oldValue; + hasRead = true; return newValue; } } diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index acf0add780b7..953a6f7318be 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -174,6 +174,7 @@ public class AccordKeyspace TOPOLOGIES, EPOCH_METADATA, JOURNAL); + // TODO (desired): implement a custom type so we can get correct sort order private static final TupleType TIMESTAMP_TYPE = new TupleType(Lists.newArrayList(LongType.instance, LongType.instance, Int32Type.instance)); private static final String TIMESTAMP_TUPLE = TIMESTAMP_TYPE.asCQL3Type().toString(); private static final TupleType KEY_TYPE = new TupleType(Arrays.asList(UUIDType.instance, BytesType.instance)); @@ -235,7 +236,7 @@ static TokenType valueOf(Token token) + "user_version int," + "record blob," + "PRIMARY KEY(key, descriptor, offset)" - + ") WITH compression = {'class':'NoopCompressor'};") + + ") WITH CLUSTERING ORDER BY (descriptor DESC, offset DESC) WITH compression = {'class':'NoopCompressor'};") .partitioner(new LocalPartitioner(BytesType.instance)) .build(); diff --git a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java index 0b256574f494..624fcc378bba 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSafeCommandStore.java @@ -48,6 +48,7 @@ import accord.primitives.Txn; import accord.primitives.TxnId; import accord.primitives.Unseekables; +import accord.utils.Invariants; public class AccordSafeCommandStore extends AbstractSafeCommandStore { @@ -74,7 +75,7 @@ private AccordSafeCommandStore(PreLoadContext context, this.commandStore = commandStore; commandStore.updateRangesForEpoch(this); if (this.ranges == null) - this.ranges = commandStore.unsafeRangesForEpoch(); + this.ranges = Invariants.nonNull(commandStore.unsafeRangesForEpoch()); } public static AccordSafeCommandStore create(PreLoadContext preLoadContext, diff --git a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java index f94510b8b8f7..c6fba012a562 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java +++ b/src/java/org/apache/cassandra/service/accord/AccordSegmentCompactor.java @@ -85,8 +85,8 @@ public Collection> compact(Collection serializer = null; - long lastDescriptor = -1; - int lastOffset = -1; + long firstDescriptor = -1, lastDescriptor = -1; + int firstOffset = -1, lastOffset = -1; try { KeyOrderReader reader; @@ -94,13 +94,13 @@ public Collection> compact(Collection) key.type.serializer; builder = serializer.mergerFor(key); - lastOffset = -1; - lastDescriptor = -1; + firstDescriptor = lastDescriptor = -1; + firstOffset = lastOffset = -1; } boolean advanced; @@ -110,15 +110,20 @@ public Collection> compact(Collection= lastDescriptor, + Invariants.checkState(reader.descriptor.timestamp <= lastDescriptor, "Descriptors were accessed out of order: %d was accessed after %d", reader.descriptor.timestamp, lastDescriptor); Invariants.checkState(reader.descriptor.timestamp != lastDescriptor || - reader.offset() > lastOffset, - "Offsets within %s were accessed out of order: %d was accessed after %s", reader.offset(), lastOffset); + reader.offset() < lastOffset, + "Offsets were accessed out of order: %d was accessed after %s", reader.offset(), lastOffset); } serializer.deserialize(key, builder, in, reader.descriptor.userVersion); lastDescriptor = reader.descriptor.timestamp; lastOffset = reader.offset(); + if (firstDescriptor == -1) + { + firstDescriptor = lastDescriptor; + firstOffset = lastOffset; + } } } while ((advanced = reader.advance()) && reader.key().equals(key)); @@ -126,7 +131,7 @@ public Collection> compact(Collection T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId te // range specific logic... ranges don't update CommandsForRange based off the life cycle and instead // merge the cache with the disk state; so exclude states that should get removed from CommandsFor* - if (summary.saveStatus.compareTo(SaveStatus.Erased) >= 0) + if (summary.saveStatus != null && summary.saveStatus.compareTo(SaveStatus.Erased) >= 0) return; switch (testStatus) @@ -153,8 +154,10 @@ private T mapReduce(@Nonnull Timestamp testTimestamp, @Nullable TxnId te // and so it is safe to execute, when in fact it is only a dependency on a different shard // (and that other shard, perhaps, does not know that it is a dependency - and so it is not durably known) // TODO (required): consider this some more - if ((testDep == WITH) == !summary.depsIds.contains(testTxnId)) + if ((testDep == WITH) == !summary.hasAsDep) return; + + Invariants.checkState(testTxnId.equals(summary.findAsDep)); } // TODO (required): ensure we are excluding any ranges that are now shard-redundant (not sure if this is enforced yet) diff --git a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java index bdf1aa98886f..6b564b7573da 100644 --- a/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java +++ b/src/java/org/apache/cassandra/service/accord/CommandsForRangesLoader.java @@ -21,9 +21,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Set; @@ -35,7 +32,8 @@ import com.google.common.collect.ImmutableMap; import accord.local.Command; -import accord.local.DurableBefore; +import accord.local.KeyHistory; +import accord.local.RedundantBefore; import accord.primitives.PartialDeps; import accord.primitives.SaveStatus; import accord.primitives.Status; @@ -48,16 +46,19 @@ import accord.primitives.TxnId; import accord.utils.async.AsyncChains; import accord.utils.async.AsyncResult; +import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.index.accord.RoutesSearcher; import org.apache.cassandra.service.accord.api.AccordRoutingKey; import org.apache.cassandra.utils.Pair; +import static accord.primitives.Txn.Kind.ExclusiveSyncPoint; + public class CommandsForRangesLoader { private final RoutesSearcher searcher = new RoutesSearcher(); //TODO (now, durability): find solution for this... - private final Map historicalTransaction = new HashMap<>(); + private final NavigableMap historicalTransaction = new TreeMap<>(); private final AccordCommandStore store; public CommandsForRangesLoader(AccordCommandStore store) @@ -65,34 +66,38 @@ public CommandsForRangesLoader(AccordCommandStore store) this.store = store; } - public AsyncResult>> get(Ranges ranges) + public AsyncResult>> get(@Nullable TxnId primaryTxnId, KeyHistory keyHistory, Ranges ranges) { - Watcher watcher = fromCache(ranges); + RedundantBefore redundantBefore = store.unsafeGetRedundantBefore(); + TxnId minTxnId = redundantBefore.minGcBefore(ranges); + Timestamp maxTxnId = primaryTxnId == null || keyHistory == KeyHistory.RECOVERY || !primaryTxnId.is(ExclusiveSyncPoint) ? Timestamp.MAX : primaryTxnId; + TxnId findAsDep = primaryTxnId != null && keyHistory == KeyHistory.RECOVERY ? primaryTxnId : null; + Watcher watcher = fromCache(findAsDep, ranges, minTxnId, maxTxnId, redundantBefore); ImmutableMap before = ImmutableMap.copyOf(watcher.get()); - return AsyncChains.ofCallable(Stage.READ.executor(), () -> get(ranges, before)) + return AsyncChains.ofCallable(Stage.READ.executor(), () -> get(ranges, before, findAsDep, minTxnId, maxTxnId, redundantBefore)) .map(map -> Pair.create(watcher, map), store) .beginAsResult(); } - private NavigableMap get(Ranges ranges, Map cacheHits) + private NavigableMap get(Ranges ranges, Map cacheHits, @Nullable TxnId findAsDep, TxnId minTxnId, Timestamp maxTxnId, RedundantBefore redundantBefore) { - Set matches = new HashSet<>(); + Set matches = new ObjectHashSet<>(); for (Range range : ranges) - matches.addAll(intersects(range)); + matches.addAll(intersects(range, minTxnId, maxTxnId)); if (matches.isEmpty()) return new TreeMap<>(); - return load(ranges, cacheHits, matches); + return load(ranges, cacheHits, matches, findAsDep, redundantBefore); } - private Collection intersects(Range range) + private Collection intersects(Range range, TxnId minTxnId, Timestamp maxTxnId) { assert range instanceof TokenRange : "Require TokenRange but given " + range.getClass(); - Set intersects = searcher.intersects(store.id(), (TokenRange) range); + Set intersects = searcher.intersects(store.id(), (TokenRange) range, minTxnId, maxTxnId); if (!historicalTransaction.isEmpty()) { if (intersects.isEmpty()) - intersects = new HashSet<>(); - for (Map.Entry e : historicalTransaction.entrySet()) + intersects = new ObjectHashSet<>(); + for (Map.Entry e : historicalTransaction.tailMap(minTxnId, true).entrySet()) { if (e.getValue().intersects(range)) intersects.add(e.getKey()); @@ -106,13 +111,21 @@ private Collection intersects(Range range) public class Watcher implements AccordStateCache.Listener, AutoCloseable { private final Ranges ranges; + private final @Nullable TxnId findAsDep; + private final TxnId minTxnId; + private final Timestamp maxTxnId; + private final RedundantBefore redundantBefore; private NavigableMap summaries = null; - private List> needToDoubleCheck = null; + private Set> needToDoubleCheck = null; - public Watcher(Ranges ranges) + public Watcher(Ranges ranges, @Nullable TxnId findAsDep, TxnId minTxnId, Timestamp maxTxnId, RedundantBefore redundantBefore) { this.ranges = ranges; + this.findAsDep = findAsDep; + this.minTxnId = minTxnId; + this.maxTxnId = maxTxnId; + this.redundantBefore = redundantBefore; } public NavigableMap get() @@ -125,15 +138,19 @@ public void onAdd(AccordCachingState n) { if (n.key().domain() != Routable.Domain.Range) return; + + if (n.key().compareTo(minTxnId) < 0 || n.key().compareTo(maxTxnId) >= 0) + return; + AccordCachingState.State state = n.state(); if (state instanceof AccordCachingState.Loading) { if (needToDoubleCheck == null) - needToDoubleCheck = new ArrayList<>(); + needToDoubleCheck = new ObjectHashSet<>(); needToDoubleCheck.add(n); return; } - //TODO (now): include FailedToSave? Most likely need to, but need to improve test coverage to have failed writes + //TODO (required): include FailedToSave? Most likely need to, but need to improve test coverage to have failed writes if (!(state instanceof AccordCachingState.Loaded || state instanceof AccordCachingState.Modified || state instanceof AccordCachingState.Saving)) @@ -142,7 +159,7 @@ public void onAdd(AccordCachingState n) Command cmd = state.get(); if (cmd == null) return; - Summary summary = create(cmd, ranges, null); + Summary summary = create(cmd, ranges, findAsDep, redundantBefore); if (summary != null) { if (summaries == null) @@ -169,7 +186,7 @@ public void close() store.commandCache().unregister(this); if (needToDoubleCheck != null) { - List> copy = needToDoubleCheck; + Set> copy = needToDoubleCheck; needToDoubleCheck = null; copy.forEach(this::onAdd); } @@ -177,19 +194,18 @@ public void close() } } - private Watcher fromCache(Ranges ranges) + private Watcher fromCache(@Nullable TxnId findAsDep, Ranges ranges, TxnId minTxnId, Timestamp maxTxnId, RedundantBefore redundantBefore) { - Watcher watcher = new Watcher(ranges); + Watcher watcher = new Watcher(ranges, findAsDep, minTxnId, maxTxnId, redundantBefore); store.commandCache().stream().forEach(watcher::onAdd); store.commandCache().register(watcher); return watcher; } - private NavigableMap load(Ranges ranges, Map cacheHits, Collection possibleTxns) + private NavigableMap load(Ranges ranges, Map cacheHits, Collection possibleTxns, @Nullable TxnId findAsDep, RedundantBefore redundantBefore) { - //TODO (now): this logic is kinda duplicate of org.apache.cassandra.service.accord.CommandsForRange.mapReduce + //TODO (required): this logic is kinda duplicate of org.apache.cassandra.service.accord.CommandsForRange.mapReduce // should figure out if this can be improved... also what is correct? - DurableBefore durableBefore = store.durableBefore(); NavigableMap map = new TreeMap<>(); for (TxnId txnId : possibleTxns) { @@ -198,7 +214,7 @@ private NavigableMap load(Ranges ranges, Map cac Command cmd = store.loadCommand(txnId); if (cmd == null) continue; // unknown command - Summary summary = create(cmd, ranges, durableBefore); + Summary summary = create(cmd, ranges, findAsDep, redundantBefore); if (summary == null) continue; map.put(txnId, summary); @@ -206,9 +222,9 @@ private NavigableMap load(Ranges ranges, Map cac return map; } - private static Summary create(Command cmd, Ranges cacheRanges, @Nullable DurableBefore durableBefore) + private static Summary create(Command cmd, Ranges cacheRanges, @Nullable TxnId findAsDep, @Nullable RedundantBefore redundantBefore) { - //TODO (now, correctness): C* did Invalidated, accord-core did Erased... what is correct? + //TODO (required, correctness): C* did Invalidated, accord-core did Erased... what is correct? SaveStatus saveStatus = cmd.saveStatus(); if (saveStatus == SaveStatus.Invalidated || saveStatus == SaveStatus.Erased @@ -225,10 +241,10 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable Durable if (!ranges.intersects(cacheRanges)) return null; - if (durableBefore != null) + if (redundantBefore != null) { - Ranges durableAlready = Ranges.of(durableBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { - if (e.universalBefore.compareTo(cmd.txnId()) < 0) + Ranges durableAlready = Ranges.of(redundantBefore.foldlWithBounds(ranges, (e, accum, start, end) -> { + if (e.gcBefore.compareTo(cmd.txnId()) < 0) return accum; accum.add(new TokenRange((AccordRoutingKey) start, (AccordRoutingKey) end)); return accum; @@ -240,8 +256,8 @@ private static Summary create(Command cmd, Ranges cacheRanges, @Nullable Durable } PartialDeps partialDeps = cmd.partialDeps(); - List deps = partialDeps == null ? Collections.emptyList() : partialDeps.txnIds(); - return new Summary(cmd.txnId(), cmd.executeAt(), saveStatus, ranges, deps); + boolean hasAsDep = findAsDep != null && partialDeps.rangeDeps.intersects(findAsDep, ranges); + return new Summary(cmd.txnId(), cmd.executeAt(), saveStatus, ranges, findAsDep, hasAsDep); } public void mergeHistoricalTransaction(TxnId txnId, Ranges ranges, BiFunction remappingFunction) @@ -252,25 +268,28 @@ public void mergeHistoricalTransaction(TxnId txnId, Ranges ranges, BiFunction depsIds; + @Nullable public final Timestamp executeAt; + @Nullable public final SaveStatus saveStatus; + @Nullable public final Ranges ranges; + + // TODO (required): this logic is still broken (was already): needs to consider exact range matches + public final TxnId findAsDep; + public final boolean hasAsDep; @VisibleForTesting - Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatus, Ranges ranges, List depsIds) + Summary(TxnId txnId, @Nullable Timestamp executeAt, SaveStatus saveStatus, Ranges ranges, TxnId findAsDep, boolean hasAsDep) { this.txnId = txnId; this.executeAt = executeAt; this.saveStatus = saveStatus; this.ranges = ranges; - this.depsIds = depsIds; + this.findAsDep = findAsDep; + this.hasAsDep = hasAsDep; } public Summary slice(Ranges slice) { - return new Summary(txnId, executeAt, saveStatus, ranges.slice(slice, Routables.Slice.Minimal), depsIds); + return new Summary(txnId, executeAt, saveStatus, ranges.slice(slice, Routables.Slice.Minimal), findAsDep, hasAsDep); } @Override @@ -281,7 +300,8 @@ public String toString() ", executeAt=" + executeAt + ", saveStatus=" + saveStatus + ", ranges=" + ranges + - ", depsIds=" + depsIds + + ", findAsDep=" + findAsDep + + ", hasAsDep=" + hasAsDep + '}'; } } diff --git a/src/java/org/apache/cassandra/service/accord/SavedCommand.java b/src/java/org/apache/cassandra/service/accord/SavedCommand.java index 209208989f28..a0cd86bb5b68 100644 --- a/src/java/org/apache/cassandra/service/accord/SavedCommand.java +++ b/src/java/org/apache/cassandra/service/accord/SavedCommand.java @@ -161,43 +161,63 @@ private static boolean anyFieldChanged(Command before, Command after) public static void serialize(Command before, Command after, DataOutputPlus out, int userVersion) throws IOException { int flags = getFlags(before, after); - out.writeInt(flags); - // We encode all changed fields unless their value is null - if (getFieldChanged(Fields.EXECUTE_AT, flags) && after.executeAt() != null) - CommandSerializers.timestamp.serialize(after.executeAt(), out, userVersion); - // TODO (desired): check if this can fold into executeAt - if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags) && after.executesAtLeast() != null) - CommandSerializers.timestamp.serialize(after.executesAtLeast(), out, userVersion); - if (getFieldChanged(Fields.SAVE_STATUS, flags)) - out.writeInt(after.saveStatus().ordinal()); - if (getFieldChanged(Fields.DURABILITY, flags) && after.durability() != null) - out.writeInt(after.durability().ordinal()); - - if (getFieldChanged(Fields.ACCEPTED, flags) && after.acceptedOrCommitted() != null) - CommandSerializers.ballot.serialize(after.acceptedOrCommitted(), out, userVersion); - if (getFieldChanged(Fields.PROMISED, flags) && after.promised() != null) - CommandSerializers.ballot.serialize(after.promised(), out, userVersion); - - if (getFieldChanged(Fields.PARTICIPANTS, flags) && after.participants() != null) - CommandSerializers.participants.serialize(after.participants(), out, userVersion); - if (getFieldChanged(Fields.PARTIAL_TXN, flags) && after.partialTxn() != null) - CommandSerializers.partialTxn.serialize(after.partialTxn(), out, userVersion); - if (getFieldChanged(Fields.PARTIAL_DEPS, flags) && after.partialDeps() != null) - DepsSerializer.partialDeps.serialize(after.partialDeps(), out, userVersion); - - Command.WaitingOn waitingOn = getWaitingOn(after); - if (getFieldChanged(Fields.WAITING_ON, flags) && waitingOn != null) - { - long size = WaitingOnSerializer.serializedSize(waitingOn); - ByteBuffer serialized = WaitingOnSerializer.serialize(after.txnId(), waitingOn); - out.writeInt((int) size); - out.write(serialized); - } - - if (getFieldChanged(Fields.WRITES, flags) && after.writes() != null) - CommandSerializers.writes.serialize(after.writes(), out, userVersion); + int iterable = toIterableSetFields(flags); + while (iterable != 0) + { + Fields field = nextSetField(iterable); + if (getFieldIsNull(field, flags)) + { + iterable = unsetIterableFields(field, iterable); + continue; + } + + switch (field) + { + case EXECUTE_AT: + CommandSerializers.timestamp.serialize(after.executeAt(), out, userVersion); + break; + case EXECUTES_AT_LEAST: + CommandSerializers.timestamp.serialize(after.executesAtLeast(), out, userVersion); + break; + case SAVE_STATUS: + out.writeShort(after.saveStatus().ordinal()); + break; + case DURABILITY: + out.writeByte(after.durability().ordinal()); + break; + case ACCEPTED: + CommandSerializers.ballot.serialize(after.acceptedOrCommitted(), out, userVersion); + break; + case PROMISED: + CommandSerializers.ballot.serialize(after.promised(), out, userVersion); + break; + case PARTICIPANTS: + CommandSerializers.participants.serialize(after.participants(), out, userVersion); + break; + case PARTIAL_TXN: + CommandSerializers.partialTxn.serialize(after.partialTxn(), out, userVersion); + break; + case PARTIAL_DEPS: + DepsSerializer.partialDeps.serialize(after.partialDeps(), out, userVersion); + break; + case WAITING_ON: + Command.WaitingOn waitingOn = getWaitingOn(after); + long size = WaitingOnSerializer.serializedSize(waitingOn); + ByteBuffer serialized = WaitingOnSerializer.serialize(after.txnId(), waitingOn); + out.writeInt((int) size); + out.write(serialized); + break; + case WRITES: + CommandSerializers.writes.serialize(after.writes(), out, userVersion); + break; + case CLEANUP: + throw new IllegalStateException(); + } + + iterable = unsetIterableFields(field, iterable); + } } @VisibleForTesting @@ -258,13 +278,29 @@ private static int collectFlags(OBJ lo, OBJ ro, Function co private static int setFieldChanged(Fields field, int oldFlags) { - return oldFlags | (1 << (field.ordinal() + Short.SIZE)); + return oldFlags | (0x10000 << field.ordinal()); } @VisibleForTesting static boolean getFieldChanged(Fields field, int oldFlags) { - return (oldFlags & (1 << (field.ordinal() + Short.SIZE))) != 0; + return (oldFlags & (0x10000 << field.ordinal())) != 0; + } + + static int toIterableSetFields(int flags) + { + return flags >>> 16; + } + + static Fields nextSetField(int iterable) + { + int i = Integer.numberOfTrailingZeros(Integer.lowestOneBit(iterable)); + return i == 32 ? null : Fields.FIELDS[i]; + } + + static int unsetIterableFields(Fields field, int iterable) + { + return iterable & ~(1 << field.ordinal()); } @VisibleForTesting @@ -547,43 +583,61 @@ public void serialize(DataOutputPlus out, int userVersion) throws IOException { out.writeInt(flags); - // We encode all changed fields unless their value is null - if (getFieldChanged(Fields.EXECUTE_AT, flags) && !getFieldIsNull(Fields.EXECUTE_AT, flags)) - CommandSerializers.timestamp.serialize(executeAt(), out, userVersion); - // TODO (desired): check if this can fold into executeAt - if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags) && !getFieldIsNull(Fields.EXECUTES_AT_LEAST, flags)) - CommandSerializers.timestamp.serialize(executeAtLeast(), out, userVersion); - if (getFieldChanged(Fields.SAVE_STATUS, flags) && !getFieldIsNull(Fields.SAVE_STATUS, flags)) - out.writeInt(saveStatus().ordinal()); - if (getFieldChanged(Fields.DURABILITY, flags) && !getFieldIsNull(Fields.DURABILITY, flags)) - out.writeInt(durability().ordinal()); - - if (getFieldChanged(Fields.ACCEPTED, flags) && !getFieldIsNull(Fields.ACCEPTED, flags)) - CommandSerializers.ballot.serialize(acceptedOrCommitted(), out, userVersion); - if (getFieldChanged(Fields.PROMISED, flags) && !getFieldIsNull(Fields.PROMISED, flags)) - CommandSerializers.ballot.serialize(promised(), out, userVersion); - - if (getFieldChanged(Fields.PARTICIPANTS, flags) && !getFieldIsNull(Fields.PARTICIPANTS, flags)) - CommandSerializers.participants.serialize(participants(), out, userVersion); - if (getFieldChanged(Fields.PARTIAL_TXN, flags) && !getFieldIsNull(Fields.PARTIAL_TXN, flags)) - CommandSerializers.partialTxn.serialize(partialTxn(), out, userVersion); - if (getFieldChanged(Fields.PARTIAL_DEPS, flags) && !getFieldIsNull(Fields.PARTIAL_DEPS, flags)) - DepsSerializer.partialDeps.serialize(partialDeps(), out, userVersion); - - if (getFieldChanged(Fields.WAITING_ON, flags) && !getFieldIsNull(Fields.WAITING_ON, flags)) + int iterable = toIterableSetFields(flags); + while (iterable != 0) { - out.writeInt(waitingOnBytes.length); - out.write(waitingOnBytes); - } + Fields field = nextSetField(iterable); + if (getFieldIsNull(field, flags)) + { + iterable = unsetIterableFields(field, iterable); + continue; + } - if (getFieldChanged(Fields.WRITES, flags) && !getFieldIsNull(Fields.WRITES, flags)) - CommandSerializers.writes.serialize(writes(), out, userVersion); + switch (field) + { + case EXECUTE_AT: + CommandSerializers.timestamp.serialize(executeAt(), out, userVersion); + break; + case EXECUTES_AT_LEAST: + CommandSerializers.timestamp.serialize(executeAtLeast(), out, userVersion); + break; + case SAVE_STATUS: + out.writeShort(saveStatus().ordinal()); + break; + case DURABILITY: + out.writeByte(durability().ordinal()); + break; + case ACCEPTED: + CommandSerializers.ballot.serialize(acceptedOrCommitted(), out, userVersion); + break; + case PROMISED: + CommandSerializers.ballot.serialize(promised(), out, userVersion); + break; + case PARTICIPANTS: + CommandSerializers.participants.serialize(participants(), out, userVersion); + break; + case PARTIAL_TXN: + CommandSerializers.partialTxn.serialize(partialTxn(), out, userVersion); + break; + case PARTIAL_DEPS: + DepsSerializer.partialDeps.serialize(partialDeps(), out, userVersion); + break; + case WAITING_ON: + out.writeInt(waitingOnBytes.length); + out.write(waitingOnBytes); + break; + case WRITES: + CommandSerializers.writes.serialize(writes(), out, userVersion); + break; + case CLEANUP: + out.writeByte(cleanup.ordinal()); + break; + } - if (getFieldChanged(Fields.CLEANUP, flags)) - out.writeByte(cleanup.ordinal()); + iterable = unsetIterableFields(field, iterable); + } } - // TODO: we seem to be writing some form of empty transaction @SuppressWarnings({ "rawtypes", "unchecked" }) public void deserializeNext(DataInputPlus in, int userVersion) throws IOException @@ -593,97 +647,65 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio nextCalled = true; count++; - for (Fields field : Fields.FIELDS) + int iterable = toIterableSetFields(flags); + while (iterable != 0) { - if (getFieldChanged(field, flags)) + Fields field = nextSetField(iterable); + if (getFieldChanged(field, this.flags)) { - this.flags = setFieldChanged(field, this.flags); - if (getFieldIsNull(field, flags)) - this.flags = setFieldIsNull(field, this.flags); - else - this.flags = unsetFieldIsNull(field, this.flags); - } - } + if (!getFieldIsNull(field, flags)) + skip(field, in, userVersion); - if (getFieldChanged(Fields.EXECUTE_AT, flags)) - { - if (getFieldIsNull(Fields.EXECUTE_AT, flags)) - executeAt = null; - else - executeAt = CommandSerializers.timestamp.deserialize(in, userVersion); - } + iterable = unsetIterableFields(field, iterable); + continue; + } + this.flags = setFieldChanged(field, this.flags); - if (getFieldChanged(Fields.EXECUTES_AT_LEAST, flags)) - { - if (getFieldIsNull(Fields.EXECUTES_AT_LEAST, flags)) - executeAtLeast = null; + if (getFieldIsNull(field, flags)) + { + this.flags = setFieldIsNull(field, this.flags); + } else - executeAtLeast = CommandSerializers.timestamp.deserialize(in, userVersion); - } + { + deserialize(field, in, userVersion); + } - if (getFieldChanged(Fields.SAVE_STATUS, flags)) - { - if (getFieldIsNull(Fields.SAVE_STATUS, flags)) - saveStatus = null; - else - saveStatus = SaveStatus.values()[in.readInt()]; - } - if (getFieldChanged(Fields.DURABILITY, flags)) - { - if (getFieldIsNull(Fields.DURABILITY, flags)) - durability = null; - else - durability = Status.Durability.values()[in.readInt()]; + iterable = unsetIterableFields(field, iterable); } + } - if (getFieldChanged(Fields.ACCEPTED, flags)) + private void deserialize(Fields field, DataInputPlus in, int userVersion) throws IOException + { + switch (field) { - if (getFieldIsNull(Fields.ACCEPTED, flags)) - acceptedOrCommitted = null; - else + case EXECUTE_AT: + executeAt = CommandSerializers.timestamp.deserialize(in, userVersion); + break; + case EXECUTES_AT_LEAST: + executeAtLeast = CommandSerializers.timestamp.deserialize(in, userVersion); + break; + case SAVE_STATUS: + saveStatus = SaveStatus.values()[in.readShort()]; + break; + case DURABILITY: + durability = Status.Durability.values()[in.readByte()]; + break; + case ACCEPTED: acceptedOrCommitted = CommandSerializers.ballot.deserialize(in, userVersion); - } - - if (getFieldChanged(Fields.PROMISED, flags)) - { - if (getFieldIsNull(Fields.PROMISED, flags)) - promised = null; - else + break; + case PROMISED: promised = CommandSerializers.ballot.deserialize(in, userVersion); - } - - if (getFieldChanged(Fields.PARTICIPANTS, flags)) - { - if (getFieldIsNull(Fields.PARTICIPANTS, flags)) - participants = null; - else + break; + case PARTICIPANTS: participants = CommandSerializers.participants.deserialize(in, userVersion); - } - - if (getFieldChanged(Fields.PARTIAL_TXN, flags)) - { - if (getFieldIsNull(Fields.PARTIAL_TXN, flags)) - partialTxn = null; - else + break; + case PARTIAL_TXN: partialTxn = CommandSerializers.partialTxn.deserialize(in, userVersion); - } - - if (getFieldChanged(Fields.PARTIAL_DEPS, flags)) - { - if (getFieldIsNull(Fields.PARTIAL_DEPS, flags)) - partialDeps = null; - else + break; + case PARTIAL_DEPS: partialDeps = DepsSerializer.partialDeps.deserialize(in, userVersion); - } - - if (getFieldChanged(Fields.WAITING_ON, flags)) - { - if (getFieldIsNull(Fields.WAITING_ON, flags)) - { - waitingOn = null; - } - else - { + break; + case WAITING_ON: int size = in.readInt(); waitingOnBytes = new byte[size]; in.readFully(waitingOnBytes); @@ -699,22 +721,56 @@ public void deserializeNext(DataInputPlus in, int userVersion) throws IOExceptio throw Throwables.unchecked(e); } }; - } - } - - if (getFieldChanged(Fields.WRITES, flags)) - { - if (getFieldIsNull(Fields.WRITES, flags)) - writes = null; - else + break; + case WRITES: writes = CommandSerializers.writes.deserialize(in, userVersion); + break; + case CLEANUP: + Cleanup newCleanup = Cleanup.forOrdinal(in.readByte()); + if (cleanup == null || newCleanup.compareTo(cleanup) > 0) + cleanup = newCleanup; + break; } + } - if (getFieldChanged(Fields.CLEANUP, flags)) + private void skip(Fields field, DataInputPlus in, int userVersion) throws IOException + { + switch (field) { - Cleanup newCleanup = Cleanup.forOrdinal(in.readByte()); - if (cleanup == null || newCleanup.compareTo(cleanup) > 0) - cleanup = newCleanup; + case EXECUTE_AT: + case EXECUTES_AT_LEAST: + CommandSerializers.timestamp.skip(in, userVersion); + break; + case SAVE_STATUS: + in.readShort(); + break; + case DURABILITY: + in.readByte(); + break; + case ACCEPTED: + case PROMISED: + CommandSerializers.ballot.skip(in, userVersion); + break; + case PARTICIPANTS: + CommandSerializers.participants.deserialize(in, userVersion); + break; + case PARTIAL_TXN: + CommandSerializers.partialTxn.deserialize(in, userVersion); + break; + case PARTIAL_DEPS: + DepsSerializer.partialDeps.deserialize(in, userVersion); + break; + case WAITING_ON: + int size = in.readInt(); + in.skipBytesFully(size); + break; + case WRITES: + // TODO (expected): skip + CommandSerializers.writes.deserialize(in, userVersion); + break; + case CLEANUP: + in.readByte(); + break; } } diff --git a/src/java/org/apache/cassandra/service/accord/TokenRange.java b/src/java/org/apache/cassandra/service/accord/TokenRange.java index aed027097970..70135b7a363c 100644 --- a/src/java/org/apache/cassandra/service/accord/TokenRange.java +++ b/src/java/org/apache/cassandra/service/accord/TokenRange.java @@ -104,7 +104,10 @@ public org.apache.cassandra.dht.Range toKeyspaceRange () return new org.apache.cassandra.dht.Range<>(left, right); } - public static final IVersionedSerializer serializer = new IVersionedSerializer<>() + + public static final Serializer serializer = new Serializer(); + + public static final class Serializer implements IVersionedSerializer { @Override public void serialize(TokenRange range, DataOutputPlus out, int version) throws IOException @@ -113,6 +116,12 @@ public void serialize(TokenRange range, DataOutputPlus out, int version) throws AccordRoutingKey.serializer.serialize(range.end(), out, version); } + public void skip(DataInputPlus in, int version) throws IOException + { + AccordRoutingKey.serializer.skip(in, version); + AccordRoutingKey.serializer.skip(in, version); + } + @Override public TokenRange deserialize(DataInputPlus in, int version) throws IOException { diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java index 1869310f5b9b..18d6926bc571 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutableKey.java @@ -18,16 +18,24 @@ package org.apache.cassandra.service.accord.api; +import java.io.IOException; import java.util.Objects; import accord.primitives.RoutableKey; import org.apache.cassandra.dht.Token; +import org.apache.cassandra.io.IVersionedSerializer; +import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.api.AccordRoutingKey.SentinelKey; import org.apache.cassandra.service.accord.api.AccordRoutingKey.TokenKey; public abstract class AccordRoutableKey implements RoutableKey { + public interface AccordKeySerializer extends IVersionedSerializer + { + void skip(DataInputPlus in, int version) throws IOException; + } + final TableId table; // TODO (desired): use an id (TrM) protected AccordRoutableKey(TableId table) diff --git a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java index 1f29c16d9689..6d8d2b818453 100644 --- a/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/AccordRoutingKey.java @@ -37,7 +37,6 @@ import org.apache.cassandra.db.marshal.ByteBufferAccessor; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputBuffer; @@ -168,7 +167,7 @@ public String suffix() return isMin ? "-Inf" : "+Inf"; } - public static final IVersionedSerializer serializer = new IVersionedSerializer() + public static final AccordKeySerializer serializer = new AccordKeySerializer() { @Override public void serialize(SentinelKey key, DataOutputPlus out, int version) throws IOException @@ -177,6 +176,12 @@ public void serialize(SentinelKey key, DataOutputPlus out, int version) throws I out.writeBoolean(key.isMin); } + @Override + public void skip(DataInputPlus in, int version) throws IOException + { + in.skipBytesFully(TableId.staticSerializedSize() + 1); + } + @Override public SentinelKey deserialize(DataInputPlus in, int version) throws IOException { @@ -260,7 +265,7 @@ public AccordRoutingKey withTable(TableId table) } public static final Serializer serializer = new Serializer(); - public static class Serializer implements IVersionedSerializer + public static class Serializer implements AccordKeySerializer { private Serializer() {} @@ -271,6 +276,14 @@ public void serialize(TokenKey key, DataOutputPlus out, int version) throws IOEx Token.compactSerializer.serialize(key.token, out, version); } + @Override + public void skip(DataInputPlus in, int version) throws IOException + { + in.skipBytesFully(TableId.staticSerializedSize()); + // TODO (expected): should we be using the TableId partitioner here? + Token.compactSerializer.skip(in, getPartitioner(), version); + } + @Override public TokenKey deserialize(DataInputPlus in, int version) throws IOException { @@ -306,7 +319,7 @@ public long serializedSize(TokenKey key, int version) } } - public static class Serializer implements IVersionedSerializer + public static class Serializer implements AccordKeySerializer { static final RoutingKeyKind[] kinds = RoutingKeyKind.values(); @@ -358,6 +371,23 @@ public AccordRoutingKey deserialize(ByteBuffer buffer) } } + @Override + public void skip(DataInputPlus in, int version) throws IOException + { + RoutingKeyKind kind = kinds[in.readByte()]; + switch (kind) + { + case TOKEN: + TokenKey.serializer.skip(in, version); + break; + case SENTINEL: + SentinelKey.serializer.skip(in, version); + break; + default: + throw new IllegalArgumentException(); + } + } + @Override public AccordRoutingKey deserialize(DataInputPlus in, int version) throws IOException { diff --git a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java index 71feb7d88e58..fc78fe669262 100644 --- a/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java +++ b/src/java/org/apache/cassandra/service/accord/api/PartitionKey.java @@ -35,7 +35,6 @@ import org.apache.cassandra.db.partitions.PartitionUpdate; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.IVersionedSerializer; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; import org.apache.cassandra.schema.Schema; @@ -119,7 +118,7 @@ public static PartitionKey toPartitionKey(Routable routable) } public static final Serializer serializer = new Serializer(); - public static class Serializer implements IVersionedSerializer + public static class Serializer implements AccordKeySerializer { // TODO: add vint to value accessor and use vints private Serializer() {} @@ -144,6 +143,14 @@ public int serialize(PartitionKey key, V dst, ValueAccessor accessor, int } + @Override + public void skip(DataInputPlus in, int version) throws IOException + { + TableId tableId = TableId.deserialize(in); + IPartitioner partitioner = Schema.instance.getExistingTablePartitioner(tableId); + ByteBufferUtil.skipShortLength(in); + } + @Override public PartitionKey deserialize(DataInputPlus in, int version) throws IOException { diff --git a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java index 4c71bdcb42a9..71758981a646 100644 --- a/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java +++ b/src/java/org/apache/cassandra/service/accord/async/AsyncLoader.java @@ -43,6 +43,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.stream.Collectors; +import javax.annotation.Nullable; public class AsyncLoader { @@ -127,6 +128,7 @@ private void referenceAndAssembleReadsForKey(RoutingKey key, referenceAndAssembleReadsForKey(key, context.timestampsForKey, commandStore.timestampsForKeyCache(), listenChains); break; case COMMANDS: + case RECOVERY: referenceAndAssembleReadsForKey(key, context.commandsForKey, commandStore.commandsForKeyCache(), listenChains); case NONE: break; @@ -142,7 +144,7 @@ private > void referenceAndAssembleReads(I keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, cache, listenChains)); } - private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) + private AsyncResult referenceAndDispatchReads(@Nullable TxnId primaryTxnId, AsyncOperation.Context context) { List> chains = new ArrayList<>(); @@ -155,7 +157,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) keys.forEach(key -> referenceAndAssembleReadsForKey(key, context, chains)); break; case Range: - chains.add(referenceAndDispatchReadsForRange(context)); + chains.add(referenceAndDispatchReadsForRange(primaryTxnId, context)); break; default: throw new UnsupportedOperationException("Unable to process keys of " + keysOrRanges.domain()); @@ -164,7 +166,7 @@ private AsyncResult referenceAndDispatchReads(AsyncOperation.Context context) return !chains.isEmpty() ? AsyncChains.reduce(chains, (a, b) -> null).beginAsResult() : null; } - private AsyncChain referenceAndDispatchReadsForRange(AsyncOperation.Context context) + private AsyncChain referenceAndDispatchReadsForRange(@Nullable TxnId primaryTxnId, AsyncOperation.Context context) { Ranges ranges = ((AbstractRanges) keysOrRanges).toRanges(); @@ -184,6 +186,8 @@ public void onAdd(AccordCachingState state) cached.add(pk); } } + + // TODO (required): this needs to be optimised (e.g. to not load redundant commands, but maybe to be avoided altogether with async evaluation) Watcher watcher = new Watcher(); commandStore.commandsForKeyCache().register(watcher); root.add(findOverlappingKeys(ranges).flatMap(keys -> { @@ -196,7 +200,7 @@ public void onAdd(AccordCachingState state) return chains.isEmpty() ? AsyncChains.success(null) : AsyncChains.reduce(chains, (a, b) -> null); }, commandStore)); - AsyncResult>> chain = commandStore.diskCommandsForRanges().get(ranges); + AsyncResult>> chain = commandStore.diskCommandsForRanges().get(primaryTxnId, keyHistory, ranges); root.add(chain); context.commandsForRanges = new AccordSafeCommandsForRanges(ranges, chain); @@ -235,7 +239,7 @@ void state(State state) this.state = state; } - public boolean load(AsyncOperation.Context context, BiConsumer callback) + public boolean load(@Nullable TxnId primaryTxnId, AsyncOperation.Context context, BiConsumer callback) { logger.trace("Running load for {} with state {}: {} {}", callback, state, txnIds, keysOrRanges); commandStore.checkInStoreThread(); @@ -245,7 +249,7 @@ public boolean load(AsyncOperation.Context context, BiConsumer key = (IVersionedSerializer) (IVersionedSerializer) PartitionKey.serializer; - public static final IVersionedSerializer routingKey = (IVersionedSerializer) (IVersionedSerializer) AccordRoutingKey.serializer; + public static final AccordKeySerializer key = (AccordKeySerializer) (AccordKeySerializer) PartitionKey.serializer; + public static final IVersionedSerializer routingKey = (AccordKeySerializer) (AccordKeySerializer) AccordRoutingKey.serializer; public static final IVersionedSerializer nullableRoutingKey = NullableSerializer.wrap(routingKey); - public static final IVersionedSerializer routingKeys = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + public static final AbstractKeysSerializer routingKeys = new AbstractKeysSerializer<>(routingKey, RoutingKey[]::new) { @Override RoutingKeys deserialize(DataInputPlus in, int version, RoutingKey[] keys) { @@ -78,7 +79,7 @@ private KeySerializers() {} } }; - public static final IVersionedSerializer ranges = new AbstractRangesSerializer() + public static final AbstractRangesSerializer ranges = new AbstractRangesSerializer() { @Override public Ranges deserialize(DataInputPlus in, int version, Range[] ranges) @@ -87,7 +88,7 @@ public Ranges deserialize(DataInputPlus in, int version, Range[] ranges) } }; - public static final IVersionedSerializer partialKeyRoute = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) + public static final AbstractKeysSerializer partialKeyRoute = new AbstractKeysSerializer(routingKey, RoutingKey[]::new) { @Override PartialKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException { @@ -110,7 +111,7 @@ public long serializedSize(PartialKeyRoute keys, int version) } }; - public static final IVersionedSerializer fullKeyRoute = new AbstractKeysSerializer<>(routingKey, RoutingKey[]::new) + public static final AbstractKeysSerializer fullKeyRoute = new AbstractKeysSerializer<>(routingKey, RoutingKey[]::new) { @Override FullKeyRoute deserialize(DataInputPlus in, int version, RoutingKey[] keys) throws IOException { @@ -133,7 +134,7 @@ public long serializedSize(FullKeyRoute route, int version) } }; - public static final IVersionedSerializer partialRangeRoute = new AbstractRangesSerializer() + public static final AbstractRangesSerializer partialRangeRoute = new AbstractRangesSerializer<>() { @Override PartialRangeRoute deserialize(DataInputPlus in, int version, Range[] rs) throws IOException { @@ -157,7 +158,7 @@ public long serializedSize(PartialRangeRoute rs, int version) } }; - public static final IVersionedSerializer fullRangeRoute = new AbstractRangesSerializer() + public static final AbstractRangesSerializer fullRangeRoute = new AbstractRangesSerializer<>() { @Override FullRangeRoute deserialize(DataInputPlus in, int version, Range[] Ranges) throws IOException { @@ -180,7 +181,7 @@ public long serializedSize(FullRangeRoute ranges, int version) } }; - public static final IVersionedSerializer> route = new AbstractRoutablesSerializer<>( + public static final AbstractRoutablesSerializer> route = new AbstractRoutablesSerializer<>( EnumSet.of(UnseekablesKind.PartialKeyRoute, UnseekablesKind.FullKeyRoute, UnseekablesKind.PartialRangeRoute, UnseekablesKind.FullRangeRoute) ); public static final IVersionedSerializer> nullableRoute = NullableSerializer.wrap(route); @@ -271,6 +272,21 @@ public RS deserialize(DataInputPlus in, int version) throws IOException return result; } + public void skip(DataInputPlus in, int version) throws IOException + { + byte b = in.readByte(); + switch (b) + { + default: throw new IOException("Corrupted input: expected byte 1, 2, 3, 4 or 5; received " + b); + case 1: routingKeys.skip(in, version); break; + case 2: partialKeyRoute.skip(in, version); break; + case 3: fullKeyRoute.skip(in, version); break; + case 4: ranges.skip(in, version); break; + case 5: partialRangeRoute.skip(in, version); break; + case 6: fullRangeRoute.skip(in, version); break; + } + } + @Override public long serializedSize(RS t, int version) { @@ -362,6 +378,13 @@ public void serialize(KS keys, DataOutputPlus out, int version) throws IOExcepti abstract KS deserialize(DataInputPlus in, int version, K[] keys) throws IOException; + public void skip(DataInputPlus in, int version) throws IOException + { + int count = in.readUnsignedVInt32(); + for (int i = 0; i < count ; i++) + keySerializer.deserialize(in, version); + } + @Override public KS deserialize(DataInputPlus in, int version) throws IOException { @@ -391,6 +414,13 @@ public void serialize(RS ranges, DataOutputPlus out, int version) throws IOExcep TokenRange.serializer.serialize((TokenRange) ranges.get(i), out, version); } + public void skip(DataInputPlus in, int version) throws IOException + { + int count = in.readUnsignedVInt32(); + for (int i = 0; i < count ; i++) + TokenRange.serializer.deserialize(in, version); + } + @Override public RS deserialize(DataInputPlus in, int version) throws IOException { diff --git a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java index a24cc81e9b74..1e69174d3085 100644 --- a/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java +++ b/test/distributed/org/apache/cassandra/service/accord/AccordJournalCompactionTest.java @@ -19,6 +19,8 @@ package org.apache.cassandra.service.accord; import java.nio.file.Files; +import java.util.Collections; +import java.util.List; import java.util.NavigableMap; import java.util.concurrent.atomic.AtomicInteger; @@ -53,7 +55,6 @@ import static accord.local.CommandStores.RangesForEpoch; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.DurableBeforeAccumulator; -import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.IdentityAccumulator; import static org.apache.cassandra.service.accord.AccordJournalValueSerializers.RedundantBeforeAccumulator; @@ -88,9 +89,9 @@ public void segmentMergeTest() throws InterruptedException RedundantBeforeAccumulator redundantBeforeAccumulator = new RedundantBeforeAccumulator(); DurableBeforeAccumulator durableBeforeAccumulator = new DurableBeforeAccumulator(); - IdentityAccumulator> bootstrapBeganAtAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY)); - IdentityAccumulator> safeToReadAccumulator = new IdentityAccumulator<>(ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY)); - IdentityAccumulator rangesForEpochAccumulator = new IdentityAccumulator<>(null); + NavigableMap safeToReadAtAccumulator = ImmutableSortedMap.of(Timestamp.NONE, Ranges.EMPTY); + NavigableMap bootstrapBeganAtAccumulator = ImmutableSortedMap.of(TxnId.NONE, Ranges.EMPTY); + RangesForEpoch.Snapshot rangesForEpochAccumulator = null; HistoricalTransactionsAccumulator historicalTransactionsAccumulator = new HistoricalTransactionsAccumulator(); Gen redundantBeforeGen = AccordGenerators.redundantBefore(DatabaseDescriptor.getPartitioner()); @@ -118,7 +119,7 @@ public boolean enableCompaction() journal.start(null); Timestamp timestamp = Timestamp.NONE; - RandomSource rs = new DefaultRandom(); + RandomSource rs = new DefaultRandom(1); int count = 1_000; // RedundantBefore redundantBefore = RedundantBefore.EMPTY; @@ -140,9 +141,11 @@ public boolean enableCompaction() redundantBeforeAccumulator.update(updates.newRedundantBefore); durableBeforeAccumulator.update(addDurableBefore); if (updates.newBootstrapBeganAt != null) - bootstrapBeganAtAccumulator.update(updates.newBootstrapBeganAt); - safeToReadAccumulator.update(updates.newSafeToRead); - rangesForEpochAccumulator.update(updates.newRangesForEpoch); + bootstrapBeganAtAccumulator = updates.newBootstrapBeganAt; + if (updates.newSafeToRead != null) + safeToReadAtAccumulator = updates.newSafeToRead; + if (updates.newRangesForEpoch != null) + rangesForEpochAccumulator = updates.newRangesForEpoch; historicalTransactionsAccumulator.update(updates.addHistoricalTransactions); if (i % 100 == 0) @@ -153,10 +156,12 @@ public boolean enableCompaction() // Assert.assertEquals(redundantBeforeAccumulator.get(), journal.loadRedundantBefore(1)); Assert.assertEquals(durableBeforeAccumulator.get(), journal.durableBeforePersister().load()); - Assert.assertEquals(bootstrapBeganAtAccumulator.get(), journal.loadBootstrapBeganAt(1)); - Assert.assertEquals(safeToReadAccumulator.get(), journal.loadSafeToRead(1)); - Assert.assertEquals(rangesForEpochAccumulator.get(), journal.loadRangesForEpoch(1)); - Assert.assertEquals(historicalTransactionsAccumulator.get(), journal.loadHistoricalTransactions(1)); + Assert.assertEquals(bootstrapBeganAtAccumulator, journal.loadBootstrapBeganAt(1)); + Assert.assertEquals(safeToReadAtAccumulator, journal.loadSafeToRead(1)); + Assert.assertEquals(rangesForEpochAccumulator, journal.loadRangesForEpoch(1)); + List historical = historicalTransactionsAccumulator.get(); + Collections.reverse(historical); + Assert.assertEquals(historical, journal.loadHistoricalTransactions(1)); } finally { diff --git a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java index 4c339ebcfef4..73bb34348945 100644 --- a/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java +++ b/test/simulator/test/org/apache/cassandra/simulator/test/AccordJournalSimulationTest.java @@ -105,7 +105,7 @@ public static void check() for (int i = 0; i < count; i++) { State.logger.debug("Reading {}", i); - Assert.assertEquals(State.journal.readFirst("test" + i), "test" + i); + Assert.assertEquals(State.journal.readLast("test" + i), "test" + i); } } diff --git a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java index fb278581dfa3..bf41340c8872 100644 --- a/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java +++ b/test/unit/org/apache/cassandra/index/accord/AccordIndexStressTest.java @@ -49,11 +49,13 @@ import accord.primitives.Ranges; import accord.primitives.Routable; import accord.primitives.Route; +import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.RandomSource; import org.agrona.collections.Int2ObjectHashMap; import org.agrona.collections.Long2ObjectHashMap; +import org.agrona.collections.ObjectHashSet; import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.cql3.UntypedResultSet; @@ -297,7 +299,8 @@ else if (domain == Routable.Domain.Range && store2Table2Ranges == null) } var startNs = nanoTime(); - Set actual = read(store, start, end); + // TODO (desired): randomise lower bound for reading + Set actual = read(store, start, end, TxnId.NONE, Timestamp.MAX); var durationNs = nanoTime() - startNs; samples[size] = durationNs; counts[size++] = actual.size(); @@ -334,32 +337,36 @@ private static boolean slow(long durationNs) return durationNs >= SLOW_NS; } - private Set read(int store, AccordRoutingKey start, AccordRoutingKey end) + private Set read(int store, AccordRoutingKey start, AccordRoutingKey end, TxnId minTxnId, Timestamp maxTxnId) { switch (read) { case INDEX: - return readIndex(store, start, end); + return readIndex(store, start, end, minTxnId, maxTxnId); case CQL: - return readCQL(store, start, end); + return readCQL(store, start, end, minTxnId, maxTxnId); default: throw new AssertionError("Unknown read type: " + read); } } - private Set readIndex(int store, AccordRoutingKey start, AccordRoutingKey end) + private Set readIndex(int store, AccordRoutingKey start, AccordRoutingKey end, TxnId minTxnId, Timestamp maxTxnId) { - return searcher.intersects(store, start, end); + return searcher.intersects(store, start, end, minTxnId, maxTxnId); } - private Set readCQL(int store, AccordRoutingKey start, AccordRoutingKey end) + private Set readCQL(int store, AccordRoutingKey start, AccordRoutingKey end, TxnId minTxnId, Timestamp maxTxnId) { - Set actual = new HashSet<>(); + Set actual = new ObjectHashSet<>(); try { UntypedResultSet results = execute("SELECT txn_id FROM system_accord.commands WHERE store_id = ? AND route > ? AND route <= ?", store, OrderedRouteSerializer.serializeRoutingKey(start), OrderedRouteSerializer.serializeRoutingKey(end)); for (var row : results) - actual.add(AccordKeyspace.deserializeTxnId(row)); + { + TxnId txnId = AccordKeyspace.deserializeTxnId(row); + if (txnId.compareTo(minTxnId) >= 0 && txnId.compareTo(maxTxnId) < 0) + actual.add(txnId); + } } catch (ReadSizeAbortException e) { diff --git a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java index bd5bc5eb934a..371623675649 100644 --- a/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java +++ b/test/unit/org/apache/cassandra/index/accord/RouteIndexTest.java @@ -44,6 +44,7 @@ import accord.primitives.Ranges; import accord.primitives.Routable.Domain; import accord.primitives.Route; +import accord.primitives.Timestamp; import accord.primitives.Txn; import accord.primitives.TxnId; import accord.utils.Gen; @@ -345,7 +346,7 @@ public Set apply(State state) throws Throwable @Override public Set run(ColumnFamilyStore sut) throws Throwable { - return ROUTES_SEARCHER.intersects(storeId, range); + return ROUTES_SEARCHER.intersects(storeId, range, TxnId.NONE, Timestamp.MAX); } @Override diff --git a/test/unit/org/apache/cassandra/journal/IndexTest.java b/test/unit/org/apache/cassandra/journal/IndexTest.java index 8f1046f2c319..ce5ed00ebe15 100644 --- a/test/unit/org/apache/cassandra/journal/IndexTest.java +++ b/test/unit/org/apache/cassandra/journal/IndexTest.java @@ -32,6 +32,7 @@ import org.junit.Assert; import org.junit.Test; +import accord.utils.Invariants; import org.agrona.collections.IntHashSet; import org.apache.cassandra.io.util.File; import org.apache.cassandra.utils.Generators; @@ -87,8 +88,8 @@ public void testInMemoryIndexBasics() assertArrayEquals(EMPTY, index.lookUp(key0)); assertArrayEquals(new long[] { composeOffsetAndSize(val11, 1) }, index.lookUp(key1)); - assertArrayEquals(new long[] { composeOffsetAndSize(val21, 2), composeOffsetAndSize(val22, 3) }, index.lookUp(key2)); - assertArrayEquals(new long[] { composeOffsetAndSize(val31, 4), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val33, 6) }, index.lookUp(key3)); + assertArrayEquals(new long[] { composeOffsetAndSize(val22, 3), composeOffsetAndSize(val21, 2) }, index.lookUp(key2)); + assertArrayEquals(new long[] { composeOffsetAndSize(val33, 6), composeOffsetAndSize(val32, 5), composeOffsetAndSize(val31, 4) }, index.lookUp(key3)); assertArrayEquals(EMPTY, index.lookUp(key4)); assertEquals(key1, index.firstId()); @@ -160,13 +161,23 @@ public void prop() throws IOException Gen valueGen = rs -> { long[] array = new long[(int) rs.next(valueSizeConstraint)]; IntHashSet uniq = new IntHashSet(); - for (int i = 0; i < array.length; i++) + for (int i = 0 ; i < array.length ; ++i) { int offset = (int) rs.next(positionConstraint); while (!uniq.add(offset)) offset = (int) rs.next(positionConstraint); array[i] = Index.composeOffsetAndSize(offset, (int) rs.next(positionConstraint)); } + + Arrays.sort(array); + for (int i = 0 ; i < array.length / 2 ; ++i) + { + int back = array.length - (1 + i); + long v = array[i]; + array[i] = array[back]; + array[back] = v; + } + return array; }; Gen> gen = rs -> { @@ -190,7 +201,7 @@ public void prop() throws IOException }); File directory = new File(Files.createTempDirectory(null)); directory.deleteOnExit(); - qt().forAll(gen).checkAssert(map -> test(directory, map)); + qt().withFixedSeed(185124544959375L).forAll(gen).checkAssert(map -> test(directory, map)); } private static void test(File directory, Map map) @@ -206,7 +217,8 @@ private static void test(File directory, Map map) continue; for (long i : value) inMemory.update(key, Index.readOffset(i), Index.readSize(i)); - Arrays.sort(value); + for (int i = 1 ; i < value.length ; ++i) + Invariants.checkState(value[i - 1] > value[i]); } assertIndex(map, inMemory); @@ -262,6 +274,9 @@ private static void assertIndex(Map expected, Index long[] value = e.getValue(); long[] read = actual.lookUp(key); + if (!Arrays.equals(value, read)) + actual.lookUp(key); + if (value.length == 0) { assertThat(read).describedAs("Index %s returned wrong values for %s", actual, key).isEmpty(); diff --git a/test/unit/org/apache/cassandra/journal/JournalTest.java b/test/unit/org/apache/cassandra/journal/JournalTest.java index 30952a96d877..7dd1c948a81c 100644 --- a/test/unit/org/apache/cassandra/journal/JournalTest.java +++ b/test/unit/org/apache/cassandra/journal/JournalTest.java @@ -65,20 +65,20 @@ public void testSimpleReadWrite() throws IOException journal.blockingWrite(id3, 3L, Collections.singleton(1)); journal.blockingWrite(id4, 4L, Collections.singleton(1)); - assertEquals(1L, (long) journal.readFirst(id1)); - assertEquals(2L, (long) journal.readFirst(id2)); - assertEquals(3L, (long) journal.readFirst(id3)); - assertEquals(4L, (long) journal.readFirst(id4)); + assertEquals(1L, (long) journal.readLast(id1)); + assertEquals(2L, (long) journal.readLast(id2)); + assertEquals(3L, (long) journal.readLast(id3)); + assertEquals(4L, (long) journal.readLast(id4)); journal.shutdown(); journal = new Journal<>("TestJournal", directory, TestParams.INSTANCE, TimeUUIDKeySupport.INSTANCE, LongSerializer.INSTANCE, SegmentCompactor.noop()); journal.start(); - assertEquals(1L, (long) journal.readFirst(id1)); - assertEquals(2L, (long) journal.readFirst(id2)); - assertEquals(3L, (long) journal.readFirst(id3)); - assertEquals(4L, (long) journal.readFirst(id4)); + assertEquals(1L, (long) journal.readLast(id1)); + assertEquals(2L, (long) journal.readLast(id2)); + assertEquals(3L, (long) journal.readLast(id3)); + assertEquals(4L, (long) journal.readLast(id4)); journal.shutdown(); } diff --git a/test/unit/org/apache/cassandra/journal/SegmentTest.java b/test/unit/org/apache/cassandra/journal/SegmentTest.java index 573ba4c9e059..d78fae8ecf0a 100644 --- a/test/unit/org/apache/cassandra/journal/SegmentTest.java +++ b/test/unit/org/apache/cassandra/journal/SegmentTest.java @@ -76,22 +76,22 @@ public void testWriteReadActiveSegment() throws IOException // read all 4 entries by id and compare with originals EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - segment.readFirst(id1, holder); + segment.readLast(id1, holder); assertEquals(id1, holder.key); assertEquals(hosts1, holder.hosts); assertEquals(record1, holder.value); - segment.readFirst(id2, holder); + segment.readLast(id2, holder); assertEquals(id2, holder.key); assertEquals(hosts2, holder.hosts); assertEquals(record2, holder.value); - segment.readFirst(id3, holder); + segment.readLast(id3, holder); assertEquals(id3, holder.key); assertEquals(hosts3, holder.hosts); assertEquals(record3, holder.value); - segment.readFirst(id4, holder); + segment.readLast(id4, holder); assertEquals(id4, holder.key); assertEquals(hosts4, holder.hosts); assertEquals(record4, holder.value); @@ -143,22 +143,22 @@ public void testReadClosedSegmentByID() throws IOException // read all 4 entries by id and compare with originals EntrySerializer.EntryHolder holder = new EntrySerializer.EntryHolder<>(); - staticSegment.readFirst(id1, holder); + staticSegment.readLast(id1, holder); assertEquals(id1, holder.key); assertEquals(hosts1, holder.hosts); assertEquals(record1, holder.value); - staticSegment.readFirst(id2, holder); + staticSegment.readLast(id2, holder); assertEquals(id2, holder.key); assertEquals(hosts2, holder.hosts); assertEquals(record2, holder.value); - staticSegment.readFirst(id3, holder); + staticSegment.readLast(id3, holder); assertEquals(id3, holder.key); assertEquals(hosts3, holder.hosts); assertEquals(record3, holder.value); - staticSegment.readFirst(id4, holder); + staticSegment.readLast(id4, holder); assertEquals(id4, holder.key); assertEquals(hosts4, holder.hosts); assertEquals(record4, holder.value); diff --git a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java index 93e35bc92c25..115a231d5f9a 100644 --- a/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java +++ b/test/unit/org/apache/cassandra/service/accord/CommandsForRangesTest.java @@ -51,7 +51,7 @@ public class CommandsForRangesTest for (int i = 0; i < numTxn; i++) { TxnId id = TXN_ID_GEN.next(rs); - map.put(id, new CommandsForRangesLoader.Summary(id, id, SaveStatus.ReadyToExecute, ranges, Collections.emptyList())); + map.put(id, new CommandsForRangesLoader.Summary(id, id, SaveStatus.ReadyToExecute, ranges, null, false)); } return CommandsForRanges.create(ranges, map); }; diff --git a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java index 78af413da85f..680777bde8ed 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/AsyncLoaderTest.java @@ -123,7 +123,7 @@ public void cachedTest() // everything is cached, so the loader should return immediately commandStore.executeBlocking(() -> { Context context = new Context(); - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertEquals(safeCommandGlobal, context.commands.get(txnId).global()); Assert.assertEquals(safeTimestampsGlobal, context.timestampsForKey.get(key).global()); Assert.assertTrue(result); @@ -162,7 +162,7 @@ public void loadTest() AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> { + boolean result = loader.load(txnId, context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertTrue(context.timestampsForKey.containsKey(key)); @@ -175,7 +175,7 @@ public void loadTest() // then return immediately after the callback has fired commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertTrue(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); @@ -210,7 +210,7 @@ public void partialLoadTest() AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> { + boolean result = loader.load(txnId, context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertTrue(context.timestampsForKey.containsKey(key)); @@ -225,7 +225,7 @@ public void partialLoadTest() // then return immediately after the callback has fired commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertTrue(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); @@ -260,7 +260,7 @@ public void inProgressLoadTest() throws Throwable AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> { + boolean result = loader.load(txnId, context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertFalse(context.timestampsForKey.containsKey(key)); @@ -277,7 +277,7 @@ public void inProgressLoadTest() throws Throwable // then return immediately after the callback has fired commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertFalse(context.timestampsForKey.containsKey(key)); Assert.assertTrue(result); @@ -322,7 +322,7 @@ public void failedLoadTest() throws Throwable AsyncLoader loader = new AsyncLoader(commandStore, ImmutableList.of(txnId1, txnId2), RoutingKeys.EMPTY, KeyHistory.COMMANDS); - boolean result = loader.load(new Context(), (u, t) -> { + boolean result = loader.load(txnId1, new Context(), (u, t) -> { Assert.assertFalse(callback.isDone()); Assert.assertNull(u); Assert.assertEquals(failure, t); @@ -369,7 +369,7 @@ public void inProgressCommandSaveTest() AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> { + boolean result = loader.load(txnId, context, (o, t) -> { Assert.assertNull(t); Assert.assertTrue(context.commands.containsKey(txnId)); cbFired.setSuccess(null); @@ -384,7 +384,7 @@ public void inProgressCommandSaveTest() // then return immediately after the callback has fired commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertTrue(context.commands.containsKey(txnId)); Assert.assertTrue(result); }); @@ -432,7 +432,7 @@ private , C extends AccordStateCa AsyncPromise cbFired = new AsyncPromise<>(); Context context = new Context(); commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> { + boolean result = loader.load(txnId, context, (o, t) -> { Assert.assertNull(t); Assert.assertEquals(context.timestampsForKey.containsKey(key), inContext.apply(context) == context.timestampsForKey); Assert.assertEquals(context.commandsForKey.containsKey(key), inContext.apply(context) == context.commandsForKey); @@ -446,7 +446,7 @@ private , C extends AccordStateCa // then return immediately after the callback has fired commandStore.executeBlocking(() -> { - boolean result = loader.load(context, (o, t) -> Assert.fail()); + boolean result = loader.load(txnId, context, (o, t) -> Assert.fail()); Assert.assertEquals(context.timestampsForKey.containsKey(key), inContext.apply(context) == context.timestampsForKey); Assert.assertEquals(context.commandsForKey.containsKey(key), inContext.apply(context) == context.commandsForKey); Assert.assertTrue(result); diff --git a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java index 39b12a862a92..124749025de5 100644 --- a/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java +++ b/test/unit/org/apache/cassandra/service/accord/async/SimulatedAsyncOperationTest.java @@ -235,7 +235,7 @@ enum Action { SUCCESS, FAILURE} } @Override - public boolean load(AsyncOperation.Context context, BiConsumer callback) + public boolean load(TxnId primaryTxnId, AsyncOperation.Context context, BiConsumer callback) { if (delay) { From cc321720e31bc9a2b8ad33bcd5430d34b9ff69f5 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Fri, 4 Oct 2024 11:35:01 +0200 Subject: [PATCH 162/340] Fix condition on where we shut down accord; move scheduled executor shutdown until after MS. Wake up segment prepared after shutting down allocator, as no new segments will ever be allocated. Shut down flusher slightly differently: we do not signal from fsync complete, since all blocks should have been fsynced by then, but we will add invariant check to notice runaway threads. Wait for quiescense Truncate blocking Wait for scheduler shutdown before shutting down command store Shut down accord after shutting down messaging Truncate caches before replay --- .../org/apache/cassandra/journal/Flusher.java | 8 ++- .../org/apache/cassandra/journal/Journal.java | 32 +++++++++++- .../cassandra/service/StorageService.java | 11 ++-- .../service/accord/AccordCommandStore.java | 5 ++ .../service/accord/AccordCommandStores.java | 38 ++++++++++++++ .../service/accord/AccordJournal.java | 51 ++++++++----------- .../service/accord/AccordKeyspace.java | 17 +++++-- .../service/accord/AccordService.java | 7 ++- .../service/accord/AccordVerbHandler.java | 6 +++ .../apache/cassandra/utils/ExecutorUtils.java | 28 ++++++++++ 10 files changed, 161 insertions(+), 42 deletions(-) diff --git a/src/java/org/apache/cassandra/journal/Flusher.java b/src/java/org/apache/cassandra/journal/Flusher.java index a0aa4ef11730..7982dcec7ce0 100644 --- a/src/java/org/apache/cassandra/journal/Flusher.java +++ b/src/java/org/apache/cassandra/journal/Flusher.java @@ -490,7 +490,13 @@ private void awaitFsyncAt(long flushTime, Timer.Context context) { WaitQueue.Signal signal = fsyncComplete.register(context, Timer.Context::stop); if (fsyncFinishedFor < flushTime) - signal.awaitUninterruptibly(); + { + signal.awaitThrowUncheckedOnInterrupt(); + + Journal.State state = journal.state.get(); + Invariants.checkState(state == Journal.State.NORMAL, + "Thread %s outlived journal, which is in %s state", Thread.currentThread(), state); + } else signal.cancel(); } diff --git a/src/java/org/apache/cassandra/journal/Journal.java b/src/java/org/apache/cassandra/journal/Journal.java index ba4bf503b25e..5501146d8d97 100644 --- a/src/java/org/apache/cassandra/journal/Journal.java +++ b/src/java/org/apache/cassandra/journal/Journal.java @@ -111,7 +111,10 @@ public class Journal implements Shutdownable private final AtomicReference> segments = new AtomicReference<>(); + final AtomicReference state = new AtomicReference<>(State.UNINITIALIZED); + Interruptible allocator; + // TODO (required): we do not need wait queues here, we can just wait on a signal on a segment while its byte buffer is being allocated private final WaitQueue segmentPrepared = newWaitQueue(); private final WaitQueue allocatorThreadWaitQueue = newWaitQueue(); private final BooleanSupplier allocatorThreadWaitCondition = () -> (availableSegment == null); @@ -210,6 +213,8 @@ public void onFlush(RecordPointer recordPointer, Runnable runnable) public void start() { + Invariants.checkState(state.compareAndSet(State.UNINITIALIZED, State.INITIALIZING), + "Unexpected journal state during initialization", state); metrics.register(flusher); deleteTmpFiles(); @@ -228,6 +233,8 @@ public void start() advanceSegment(null); flusher.start(); compactor.start(); + Invariants.checkState(state.compareAndSet(State.INITIALIZING, State.NORMAL), + "Unexpected journal state after initialization", state); } @VisibleForTesting @@ -253,16 +260,19 @@ private void deleteTmpFiles() @Override public boolean isTerminated() { - return false; + return state.get() == State.TERMINATED; } public void shutdown() { try { + Invariants.checkState(state.compareAndSet(State.NORMAL, State.SHUTDOWN), + "Unexpected journal state while trying to shut down", state); allocator.shutdown(); wakeAllocator(); // Wake allocator to force it into shutdown allocator.awaitTermination(1, TimeUnit.MINUTES); + segmentPrepared.signalAll(); // Wake up all threads waiting on the new segment compactor.shutdown(); compactor.awaitTermination(1, TimeUnit.MINUTES); flusher.shutdown(); @@ -270,6 +280,8 @@ public void shutdown() closer.awaitTermination(1, TimeUnit.MINUTES); closeAllSegments(); metrics.deregister(); + Invariants.checkState(state.compareAndSet(State.SHUTDOWN, State.TERMINATED), + "Unexpected journal state while trying to shut down", state); } catch (InterruptedException e) { @@ -574,7 +586,14 @@ private void awaitAvailableSegment(ActiveSegment currentActiveSegment) { WaitQueue.Signal prepared = segmentPrepared.register(metrics.waitingOnSegmentAllocation.time(), Context::stop); if (availableSegment == null && currentSegment == currentActiveSegment) - prepared.awaitUninterruptibly(); + { + prepared.awaitThrowUncheckedOnInterrupt(); + + // In case we woke up due to shutdown signal or interrupt, check mode + State state = this.state.get(); + if (state.ordinal() > State.NORMAL.ordinal()) + throw new IllegalStateException("Can not obtain allocated segment due to shutdown " + state); + } else prepared.cancel(); } @@ -1024,4 +1043,13 @@ public void close() segments.close(); } } + + enum State + { + UNINITIALIZED, + INITIALIZING, + NORMAL, + SHUTDOWN, + TERMINATED + } } diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java index 98da35158b96..4f8c4cc9f8a9 100644 --- a/src/java/org/apache/cassandra/service/StorageService.java +++ b/src/java/org/apache/cassandra/service/StorageService.java @@ -3795,9 +3795,6 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I logger.debug(msg); transientMode = Optional.of(Mode.DRAINING); - if (DatabaseDescriptor.getAccordTransactionsEnabled()) - AccordService.instance().shutdownAndWait(1, MINUTES); - try { /* not clear this is reasonable time, but propagated from prior embedded behaviour */ @@ -3813,7 +3810,7 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I if (daemon != null) shutdownClientServers(); - ScheduledExecutors.optionalTasks.shutdown(); + Gossiper.instance.stop(); ActiveRepairService.instance().stop(); @@ -3823,6 +3820,9 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I transientMode = Optional.of(Mode.DRAINING); } + if (AccordService.isSetup()) + AccordService.instance().shutdownAndWait(1, MINUTES); + // In-progress writes originating here could generate hints to be written, // which is currently scheduled on the mutation stage. So shut down MessagingService // before mutation stage, so we can get all the hints saved before shutting down. @@ -3837,6 +3837,9 @@ protected synchronized void drain(boolean isFinalShutdown) throws IOException, I logger.error("Messaging service timed out shutting down", t); } + // ScheduledExecutors shuts down after MessagingService, as MessagingService may issue tasks to it. + ScheduledExecutors.optionalTasks.shutdown(); + if (!isFinalShutdown) { logger.debug("clearing mutation stage"); diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java index 0f174e640fcc..455154b4ad5a 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStore.java @@ -695,6 +695,11 @@ public static class CommandStoreExecutor implements CacheSize this.threadId = threadId; } + public boolean hasTasks() + { + return delegate.getPendingTaskCount() > 0 || delegate.getActiveTaskCount() > 0; + } + CommandStoreExecutor(AccordStateCache stateCache, SequentialExecutorPlus delegate) { this.stateCache = stateCache; diff --git a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java index 6d9744310f37..620bad8b1699 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java +++ b/src/java/org/apache/cassandra/service/accord/AccordCommandStores.java @@ -17,6 +17,10 @@ */ package org.apache.cassandra.service.accord; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; @@ -41,6 +45,7 @@ import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.accord.AccordCommandStore.CommandStoreExecutor; import org.apache.cassandra.service.accord.api.AccordRoutingKey; +import org.apache.cassandra.utils.concurrent.UncheckedInterruptedException; import static org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory; @@ -154,6 +159,39 @@ public synchronized Supplier updateTopology(Node node, Topology newT }; } + public void waitForQuiescense() + { + boolean hadPending; + try + { + do + { + hadPending = false; + List> futures = new ArrayList<>(); + for (CommandStoreExecutor executor : executors) + { + if (executor.hasTasks()) + { + futures.add(executor.submit(() -> {})); + hadPending = true; + } + } + for (Future future : futures) + future.get(); + futures.clear(); + } + while (hadPending); + } + catch (ExecutionException e) + { + throw new IllegalStateException("Should have never been thrown", e); + } + catch (InterruptedException e) + { + throw new UncheckedInterruptedException(e); + } + } + @Override public synchronized void shutdown() { diff --git a/src/java/org/apache/cassandra/service/accord/AccordJournal.java b/src/java/org/apache/cassandra/service/accord/AccordJournal.java index 49af8042a8aa..38bd9f9101d2 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordJournal.java +++ b/src/java/org/apache/cassandra/service/accord/AccordJournal.java @@ -26,7 +26,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; @@ -40,9 +39,10 @@ import accord.local.DurableBefore; import accord.local.Node; import accord.local.RedundantBefore; -import accord.primitives.SaveStatus; +import accord.local.cfk.CommandsForKey; import accord.primitives.Deps; import accord.primitives.Ranges; +import accord.primitives.SaveStatus; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.utils.Invariants; @@ -73,9 +73,6 @@ public class AccordJournal implements IJournal, Shutdownable { - - private final AtomicBoolean isReplay = new AtomicBoolean(false); - static { // make noise early if we forget to update our version mappings @@ -93,7 +90,7 @@ public class AccordJournal implements IJournal, Shutdownable private final Params params; Node node; - enum Status { INITIALIZED, STARTING, STARTED, TERMINATING, TERMINATED } + enum Status { INITIALIZED, STARTING, REPLAY, STARTED, TERMINATING, TERMINATED } private volatile Status status = Status.INITIALIZED; @VisibleForTesting @@ -127,10 +124,14 @@ public AccordJournal start(Node node) this.node = node; status = Status.STARTING; journal.start(); - status = Status.STARTED; return this; } + public boolean started() + { + return status == Status.STARTED; + } + public Params configuration() { return params; @@ -150,7 +151,7 @@ public boolean isTerminated() @Override public void shutdown() { - Invariants.checkState(status == Status.STARTED); + Invariants.checkState(status == Status.REPLAY || status == Status.STARTED); status = Status.TERMINATING; journal.shutdown(); status = Status.TERMINATED; @@ -230,7 +231,7 @@ public List loadHistoricalTransactions(int store) @Override public void appendCommand(int store, SavedCommand.DiffWriter value, Runnable onFlush) { - if (value == null || isReplay.get()) + if (value == null || status == Status.REPLAY) { if (onFlush != null) onFlush.run(); @@ -252,7 +253,7 @@ public PersistentField.Persister durableBeforePers @Override public AsyncResult persist(DurableBefore addDurableBefore, DurableBefore newDurableBefore) { - if (isReplay.get()) + if (status == Status.REPLAY) return AsyncResults.success(null); AsyncResult.Settable result = AsyncResults.settable(); @@ -306,18 +307,6 @@ public SavedCommand.Builder loadDiffs(int commandStoreId, TxnId txnId) return builder; } - public List loadSeparateDiffs(int commandStoreId, TxnId txnId) - { - JournalKey key = new JournalKey(txnId, JournalKey.Type.COMMAND_DIFF, commandStoreId); - List builders = new ArrayList<>(); - journalTable.readAll(key, (in, version) -> { - SavedCommand.Builder builder = new SavedCommand.Builder(txnId); - builder.deserializeNext(in, version); - builders.add(builder); - }); - return builders; - } - private BUILDER readAll(JournalKey key) { BUILDER builder = (BUILDER) key.type.serializer.mergerFor(key); @@ -367,6 +356,10 @@ public void runCompactorForTesting() public void replay() { + logger.info("Starting journal replay."); + CommandsForKey.disableLinearizabilityViolationsReporting(); + AccordKeyspace.truncateAllCaches(); + // TODO (expected): optimize replay memory footprint class ToApply { @@ -383,8 +376,6 @@ class ToApply List toApply = new ArrayList<>(); try (AccordJournalTable.KeyOrderIterator iter = journalTable.readAll()) { - isReplay.set(true); - JournalKey key; SavedCommand.Builder builder = new SavedCommand.Builder(); while ((key = iter.key()) != null) @@ -425,17 +416,20 @@ class ToApply for (ToApply apply : toApply) { AccordCommandStore commandStore = (AccordCommandStore) node.commandStores().forId(apply.key.commandStoreId); + logger.info("Apply {}", apply.command); commandStore.loader().apply(apply.command); } + + logger.info("Waiting for command stores to quiesce."); + ((AccordCommandStores)node.commandStores()).waitForQuiescense(); + CommandsForKey.enableLinearizabilityViolationsReporting(); + logger.info("Finished journal replay."); + status = Status.STARTED; } catch (Throwable t) { throw new RuntimeException("Can not replay journal.", t); } - finally - { - isReplay.set(false); - } } // TODO: this is here temporarily; for debugging purposes @@ -492,7 +486,6 @@ public void checkAllCommands() t); } } - } } } \ No newline at end of file diff --git a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java index 953a6f7318be..4a17c437941d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java +++ b/src/java/org/apache/cassandra/service/accord/AccordKeyspace.java @@ -37,23 +37,22 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import org.apache.cassandra.tcm.ClusterMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import accord.api.RoutingKey; -import accord.local.StoreParticipants; -import accord.local.cfk.CommandsForKey; import accord.impl.TimestampsForKey; import accord.local.Command; import accord.local.CommandStore; import accord.local.Node; import accord.local.RedundantBefore; +import accord.local.StoreParticipants; +import accord.local.cfk.CommandsForKey; +import accord.primitives.Ranges; +import accord.primitives.Route; import accord.primitives.SaveStatus; import accord.primitives.Status; import accord.primitives.Status.Durability; -import accord.primitives.Ranges; -import accord.primitives.Route; import accord.primitives.Timestamp; import accord.primitives.TxnId; import accord.topology.Topology; @@ -138,6 +137,7 @@ import org.apache.cassandra.service.accord.serializers.CommandSerializers; import org.apache.cassandra.service.accord.serializers.CommandsForKeySerializer; import org.apache.cassandra.service.accord.serializers.KeySerializers; +import org.apache.cassandra.tcm.ClusterMetadata; import org.apache.cassandra.utils.Clock.Global; import org.apache.cassandra.utils.CloseableIterator; import org.apache.cassandra.utils.btree.BTree; @@ -747,6 +747,13 @@ public static Tables tables() return Tables.of(Commands, TimestampsForKeys, CommandsForKeys, Topologies, EpochMetadata, Journal); } + public static void truncateAllCaches() + { + Keyspace ks = Keyspace.open(ACCORD_KEYSPACE_NAME); + for (String table : new String[]{ TimestampsForKeys.name, CommandsForKeys.name }) + ks.getColumnFamilyStore(table).truncateBlocking(); + } + private static ByteBuffer serialize(T obj, LocalVersionedSerializer serializer) throws IOException { int size = (int) serializer.serializedSize(obj); diff --git a/src/java/org/apache/cassandra/service/accord/AccordService.java b/src/java/org/apache/cassandra/service/accord/AccordService.java index e1e88cde0551..7acabd776c1d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordService.java +++ b/src/java/org/apache/cassandra/service/accord/AccordService.java @@ -201,6 +201,7 @@ private enum State {INIT, STARTED, SHUTDOWN} private final CoordinateDurabilityScheduling durabilityScheduling; private final AccordVerbHandler requestHandler; private final LocalConfig configuration; + @GuardedBy("this") private State state = State.INIT; @@ -381,6 +382,10 @@ public static void shutdownServiceAndWait(long timeout, TimeUnit unit) throws In i.shutdownAndWait(timeout, unit); } + public boolean shouldAcceptMessages() + { + return state == State.STARTED && journal.started(); + } public static IAccordService instance() { if (!DatabaseDescriptor.getAccordTransactionsEnabled()) @@ -964,7 +969,7 @@ public synchronized void shutdown() { if (state != State.STARTED) return; - ExecutorUtils.shutdown(shutdownableSubsystems()); + ExecutorUtils.shutdownSequentiallyAndWait(shutdownableSubsystems(), 1, TimeUnit.MINUTES); state = State.SHUTDOWN; } diff --git a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java index 5d8747d4a506..34c7b26bd95d 100644 --- a/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java +++ b/src/java/org/apache/cassandra/service/accord/AccordVerbHandler.java @@ -43,6 +43,12 @@ public AccordVerbHandler(Node node, AccordEndpointMapper endpointMapper) @Override public void doVerb(Message message) throws IOException { + if (!((AccordService)AccordService.instance()).shouldAcceptMessages()) + { + logger.debug("Dropping message {} from {}", message.verb(), message.from()); + return; + } + logger.trace("Receiving {} from {}", message.payload, message.from()); T request = message.payload; diff --git a/src/java/org/apache/cassandra/utils/ExecutorUtils.java b/src/java/org/apache/cassandra/utils/ExecutorUtils.java index 5bb841f32bdd..83fc72530a9c 100644 --- a/src/java/org/apache/cassandra/utils/ExecutorUtils.java +++ b/src/java/org/apache/cassandra/utils/ExecutorUtils.java @@ -79,6 +79,34 @@ else if (executor != null) } } + public static void shutdownSequentiallyAndWait(Iterable executors, long timeout, TimeUnit unit) + { + long deadline = nanoTime() + unit.toNanos(timeout); + + for (Object executor : executors) + { + try + { + if (executor instanceof ExecutorService) + { + ((ExecutorService) executor).shutdown(); + ((ExecutorService) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS); + } + else if (executor instanceof Shutdownable) + { + ((Shutdownable) executor).shutdown(); + ((Shutdownable) executor).awaitTermination(Math.max(0, deadline - nanoTime()), NANOSECONDS); + } + else + throw new IllegalArgumentException(executor.toString()); + } + catch (Throwable t) + { + throw new IllegalStateException("Caught interrupt while shutting down " + executor); + } + } + } + public static void shutdown(ExecutorService ... executors) { shutdown(Arrays.asList(executors)); From 82ae3adcb26fa34ca1ff64bf0cbcebb009367d5b Mon Sep 17 00:00:00 2001 From: Benedict Elliott Smith Date: Sat, 5 Oct 2024 12:32:06 +0100 Subject: [PATCH 163/340] ExclusiveSyncPoints should always wait for a simple quorum split JournalKey in journal table so we can index it reorder journal fields so we can easily index on route (when present) use Message.expiresAtNanos for callback expiration do not notify slow for range barriers Accord: Do not contact faulty replicas, and promptly report slow replies for preaccept/read. Do not wait for stale or left nodes for durability. --- modules/accord | 2 +- .../apache/cassandra/config/AccordSpec.java | 2 + .../db/compaction/CompactionIterator.java | 5 +- .../apache/cassandra/journal/Compactor.java | 2 +- .../cassandra/journal/InMemoryIndex.java | 12 +- .../org/apache/cassandra/journal/Segment.java | 1 + .../cassandra/journal/StaticSegment.java | 7 +- .../cassandra/metrics/AccordMetrics.java | 10 +- src/java/org/apache/cassandra/net/Verb.java | 106 ++-- .../cassandra/service/RetryStrategy.java | 299 ++++++++++ .../cassandra/service/TimeoutStrategy.java | 368 ++++++++++++ .../service/accord/AccordCommandStore.java | 9 +- .../service/accord/AccordJournal.java | 40 +- .../service/accord/AccordJournalTable.java | 29 +- .../service/accord/AccordKeyspace.java | 69 ++- .../service/accord/AccordMessageSink.java | 60 +- .../accord/AccordResponseVerbHandler.java | 74 +++ .../accord/AccordSafeCommandStore.java | 4 +- .../accord/AccordSegmentCompactor.java | 7 +- .../service/accord/AccordService.java | 48 +- .../service/accord/AccordStateCache.java | 2 - .../service/accord/AccordVerbHandler.java | 2 +- .../accord/CommandsForRangesLoader.java | 32 +- .../service/accord/IAccordService.java | 4 +- .../cassandra/service/accord/JournalKey.java | 34 +- .../service/accord/SavedCommand.java | 26 +- .../service/accord/api/AccordTimeService.java | 47 ++ .../accord/api/AccordTopologySorter.java | 27 +- .../accord/api/CompositeTopologySorter.java | 11 + .../accord/repair/RepairSyncPointAdapter.java | 2 +- .../repair/RequiredResponseTracker.java | 4 +- .../serializers/ReadDataSerializers.java | 2 +- .../service/paxos/ContentionStrategy.java | 548 ++---------------- .../distributed/test/ForBenchmarks.java | 36 ++ .../test/accord/AccordLoadTest.java | 288 +++++---- .../accord/AccordJournalCompactionTest.java | 1 + .../apache/cassandra/journal/IndexTest.java | 4 +- .../cassandra/service/RetryStrategyTest.java | 482 +++++++++++++++ .../service/accord/AccordMessageSinkTest.java | 8 +- .../service/accord/AccordTestUtils.java | 9 +- .../service/accord/EpochSyncTest.java | 4 +- .../accord/SimulatedAccordCommandStore.java | 10 +- .../repair/RequiredResponseTrackerTest.java | 3 +- .../service/paxos/ContentionStrategyTest.java | 466 --------------- 44 files changed, 1912 insertions(+), 1294 deletions(-) create mode 100644 src/java/org/apache/cassandra/service/RetryStrategy.java create mode 100644 src/java/org/apache/cassandra/service/TimeoutStrategy.java create mode 100644 src/java/org/apache/cassandra/service/accord/AccordResponseVerbHandler.java create mode 100644 src/java/org/apache/cassandra/service/accord/api/AccordTimeService.java create mode 100644 test/distributed/org/apache/cassandra/distributed/test/ForBenchmarks.java create mode 100644 test/unit/org/apache/cassandra/service/RetryStrategyTest.java delete mode 100644 test/unit/org/apache/cassandra/service/paxos/ContentionStrategyTest.java diff --git a/modules/accord b/modules/accord index 08ee5ce1c630..841e139bc8a9 160000 --- a/modules/accord +++ b/modules/accord @@ -1 +1 @@ -Subproject commit 08ee5ce1c6301201ccaf7d580a6af289ab4c5765 +Subproject commit 841e139bc8a974ac674ce8eae847bd52255ca544 diff --git a/src/java/org/apache/cassandra/config/AccordSpec.java b/src/java/org/apache/cassandra/config/AccordSpec.java index 4861069409ed..451bfeaa5495 100644 --- a/src/java/org/apache/cassandra/config/AccordSpec.java +++ b/src/java/org/apache/cassandra/config/AccordSpec.java @@ -40,6 +40,8 @@ public class AccordSpec // TODO (expected): we should be able to support lower recover delays, at least for txns public volatile DurationSpec.IntMillisecondsBound recover_delay = new DurationSpec.IntMillisecondsBound(5000); public volatile DurationSpec.IntMillisecondsBound range_sync_recover_delay = new DurationSpec.IntMillisecondsBound(10000); + public String slowPreAccept = "30ms <= p50*2 <= 100ms"; + public String slowRead = "30ms <= p50*2 <= 100ms"; public long recoveryDelayFor(TxnId txnId, TimeUnit unit) { diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java index 2086fddc8832..3c7b1131369d 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java @@ -85,7 +85,6 @@ import org.apache.cassandra.io.sstable.format.SSTableReader; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.DataOutputBuffer; -import org.apache.cassandra.journal.KeySupport; import org.apache.cassandra.metrics.TopPartitionTracker; import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.CompactionParams.TombstoneOption; @@ -110,7 +109,6 @@ import org.apache.cassandra.utils.TimeUUID; import static accord.local.Cleanup.ERASE; -import static accord.local.Cleanup.TRUNCATE; import static accord.local.Cleanup.TRUNCATE_WITH_OUTCOME; import static accord.local.Cleanup.shouldCleanupPartial; import static com.google.common.base.Preconditions.checkState; @@ -1020,7 +1018,6 @@ class AccordJournalPurger extends AbstractPurger final Int2ObjectHashMap ranges; final ColumnMetadata recordColumn; final ColumnMetadata versionColumn; - final KeySupport keySupport = JournalKey.SUPPORT; final AccordService service; JournalKey key = null; @@ -1051,7 +1048,7 @@ public AccordJournalPurger(Supplier serviceSupplier) @Override protected void beginPartition(UnfilteredRowIterator partition) { - key = keySupport.deserialize(partition.partitionKey().getKey(), 0, userVersion); + key = AccordKeyspace.JournalColumns.getJournalKey(partition.partitionKey()); serializer = (AccordJournalValueSerializers.FlyweightSerializer) key.type.serializer; builder = serializer.mergerFor(key); maxSeenTimestamp = -1; diff --git a/src/java/org/apache/cassandra/journal/Compactor.java b/src/java/org/apache/cassandra/journal/Compactor.java index 51b2fec97b59..4ecfb7409195 100644 --- a/src/java/org/apache/cassandra/journal/Compactor.java +++ b/src/java/org/apache/cassandra/journal/Compactor.java @@ -45,7 +45,7 @@ public final class Compactor implements Runnable, Shutdownable synchronized void start() { - if (!journal.params.enableCompaction()) + if (journal.params.enableCompaction()) schedule(journal.params.compactionPeriodMillis(), TimeUnit.MILLISECONDS); } diff --git a/src/java/org/apache/cassandra/journal/InMemoryIndex.java b/src/java/org/apache/cassandra/journal/InMemoryIndex.java index 1f0da7fd2881..77fd7352eedc 100644 --- a/src/java/org/apache/cassandra/journal/InMemoryIndex.java +++ b/src/java/org/apache/cassandra/journal/InMemoryIndex.java @@ -27,6 +27,7 @@ import org.apache.cassandra.io.util.File; import org.apache.cassandra.io.util.FileOutputStreamPlus; +import org.apache.cassandra.journal.StaticSegment.SequentialReader; /** * An index for a segment that's still being updated by journal writers concurrently. @@ -138,17 +139,10 @@ static InMemoryIndex rebuild(Descriptor descriptor, KeySupport keySupp { InMemoryIndex index = new InMemoryIndex<>(keySupport, new TreeMap<>(keySupport)); - try (StaticSegment.SequentialReader reader = StaticSegment.sequentialReader(descriptor, keySupport, fsyncedLimit)) + try (SequentialReader reader = StaticSegment.sequentialReader(descriptor, keySupport, fsyncedLimit)) { - int last = -1; while (reader.advance()) - { - int current = reader.offset(); - if (last >= 0) - index.update(reader.key(), last, current); - last = current; - } - + index.update(reader.key(), reader.offset, reader.buffer.position() - reader.offset); } return index; } diff --git a/src/java/org/apache/cassandra/journal/Segment.java b/src/java/org/apache/cassandra/journal/Segment.java index 7f955669cdae..77f7c68fea27 100644 --- a/src/java/org/apache/cassandra/journal/Segment.java +++ b/src/java/org/apache/cassandra/journal/Segment.java @@ -93,6 +93,7 @@ void readAll(K id, EntrySerializer.EntryHolder into, RecordConsumer onEntr int size = Index.readSize(all[i]); Invariants.checkState(offset < prevOffset); Invariants.checkState(read(offset, size, into), "Read should always return true"); + Invariants.checkState(id.equals(into.key), "Index for %s read incorrect key: expected %s but read %s", descriptor, id, into.key); onEntry.accept(descriptor.timestamp, offset, into.key, into.value, into.hosts, into.userVersion); } } diff --git a/src/java/org/apache/cassandra/journal/StaticSegment.java b/src/java/org/apache/cassandra/journal/StaticSegment.java index 3a8c03bb1a32..bf46ca8ecddb 100644 --- a/src/java/org/apache/cassandra/journal/StaticSegment.java +++ b/src/java/org/apache/cassandra/journal/StaticSegment.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.concurrent.locks.LockSupport; +import accord.utils.Invariants; import org.agrona.collections.IntHashSet; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.io.util.File; @@ -260,7 +261,11 @@ boolean read(int offset, int size, EntrySerializer.EntryHolder into) ByteBuffer duplicate = buffer.duplicate().position(offset).limit(offset + size); try (DataInputBuffer in = new DataInputBuffer(duplicate, false)) { - return EntrySerializer.tryRead(into, keySupport, duplicate, in, syncedOffsets.syncedOffset(), descriptor.userVersion); + if (!EntrySerializer.tryRead(into, keySupport, duplicate, in, syncedOffsets.syncedOffset(), descriptor.userVersion)) + return false; + + Invariants.checkState(in.available() == 0); + return true; } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/metrics/AccordMetrics.java b/src/java/org/apache/cassandra/metrics/AccordMetrics.java index c9d8e2968730..8064dee66a6d 100644 --- a/src/java/org/apache/cassandra/metrics/AccordMetrics.java +++ b/src/java/org/apache/cassandra/metrics/AccordMetrics.java @@ -31,7 +31,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.Timer; -import org.apache.cassandra.service.accord.AccordService; +import org.apache.cassandra.service.accord.api.AccordTimeService; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics; @@ -206,7 +206,7 @@ else if (txnId.isRead()) @Override public void onStable(Command cmd) { - long now = AccordService.now(); + long now = AccordTimeService.nowMicros(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -218,7 +218,7 @@ public void onStable(Command cmd) @Override public void onExecuted(Command cmd) { - long now = AccordService.now(); + long now = AccordTimeService.nowMicros(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -232,7 +232,7 @@ public void onExecuted(Command cmd) @Override public void onApplied(Command cmd, long applyStartTimestamp) { - long now = AccordService.now(); + long now = AccordTimeService.nowMicros(); AccordMetrics metrics = forTransaction(cmd.txnId()); if (metrics != null) { @@ -270,7 +270,7 @@ public void onRecover(TxnId txnId, Timestamp recoveryTimestamp) AccordMetrics metrics = forTransaction(txnId); if (metrics != null) { - long now = AccordService.now(); + long now = AccordTimeService.nowMicros(); metrics.recoveryDuration.update(now - recoveryTimestamp.hlc(), MICROSECONDS); metrics.recoveryDelay.update(recoveryTimestamp.hlc() - txnId.hlc(), MICROSECONDS); diff --git a/src/java/org/apache/cassandra/net/Verb.java b/src/java/org/apache/cassandra/net/Verb.java index f146a5381a2b..bb1c3b61ee88 100644 --- a/src/java/org/apache/cassandra/net/Verb.java +++ b/src/java/org/apache/cassandra/net/Verb.java @@ -309,56 +309,57 @@ public enum Verb DATA_MOVEMENT_EXECUTED_REQ (817, P1, rpcTimeout, MISC, () -> DataMovement.Status.serializer, () -> DataMovements.instance, DATA_MOVEMENT_EXECUTED_RSP ), // accord - ACCORD_SIMPLE_RSP (119, P2, writeTimeout, IMMEDIATE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_RSP (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_PRE_ACCEPT_REQ (121, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), - ACCORD_ACCEPT_RSP (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.reply, RESPONSE_HANDLER ), - ACCORD_ACCEPT_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), - ACCORD_ACCEPT_INVALIDATE_REQ (124, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::verbHandlerOrNoop, ACCORD_ACCEPT_RSP ), - ACCORD_READ_RSP (125, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.reply, RESPONSE_HANDLER ), - ACCORD_READ_REQ (126, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::verbHandlerOrNoop ), - ACCORD_APPLY_RSP (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.reply, RESPONSE_HANDLER ), - ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP ), - ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_RECOVER_REQ (132, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), - ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.reply, RESPONSE_HANDLER ), - ACCORD_BEGIN_INVALIDATE_REQ (134, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), - ACCORD_AWAIT_RSP (136, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.syncReply, RESPONSE_HANDLER ), - ACCORD_AWAIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.request, AccordService::verbHandlerOrNoop, ACCORD_AWAIT_RSP ), - ACCORD_AWAIT_ASYNC_RSP_REQ (137, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.asyncReply, AccordService::verbHandlerOrNoop ), - ACCORD_WAIT_UNTIL_APPLIED_REQ (138, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP ), - ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), - ACCORD_CALCULATE_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_CALCULATE_DEPS_REQ (144, P2, longTimeout, IMMEDIATE, () -> CalculateDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_CALCULATE_DEPS_RSP), - ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), - ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, RESPONSE_HANDLER ), - ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), - ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.reply, RESPONSE_HANDLER ), - ACCORD_FETCH_DATA_REQ (146, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.request, AccordService::verbHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), - ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, MISC, () -> SetDurableSerializers.shardDurable, AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, MISC, () -> SetDurableSerializers.globallyDurable,AccordService::verbHandlerOrNoop, ACCORD_SIMPLE_RSP ), - ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.reply, RESPONSE_HANDLER ), - ACCORD_QUERY_DURABLE_BEFORE_REQ (150, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::verbHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP ), - - ACCORD_SYNC_NOTIFY_REQ (151, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SIMPLE_RSP ), - - ACCORD_APPLY_AND_WAIT_REQ (152, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::verbHandlerOrNoop, ACCORD_READ_RSP), - - CONSENSUS_KEY_MIGRATION (153, P1, writeTimeout, MUTATION, () -> ConsensusKeyMigrationFinished.serializer,() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), - - ACCORD_INTEROP_READ_RSP (154, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.replySerializer, RESPONSE_HANDLER), - ACCORD_INTEROP_READ_REQ (155, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.requestSerializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_RSP), - ACCORD_INTEROP_COMMIT_REQ (156, P2, writeTimeout, IMMEDIATE, () -> AccordInteropCommit.serializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_RSP), - ACCORD_INTEROP_READ_REPAIR_RSP (157, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.replySerializer, RESPONSE_HANDLER), - ACCORD_INTEROP_READ_REPAIR_REQ (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, AccordService::verbHandlerOrNoop, ACCORD_INTEROP_READ_REPAIR_RSP), - ACCORD_INTEROP_APPLY_REQ (160, P2, writeTimeout, IMMEDIATE, () -> AccordInteropApply.serializer, AccordService::verbHandlerOrNoop, ACCORD_APPLY_RSP), - ACCORD_FETCH_MIN_EPOCH_RSP (166, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.Response.serializer, RESPONSE_HANDLER), - ACCORD_FETCH_MIN_EPOCH_REQ (165, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.serializer, () -> FetchMinEpoch.handler, ACCORD_FETCH_MIN_EPOCH_RSP), + ACCORD_SIMPLE_RSP (119, P2, writeTimeout, IMMEDIATE, () -> EnumSerializer.simpleReply, AccordService::responseHandlerOrNoop ), + ACCORD_PRE_ACCEPT_RSP (120, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_PRE_ACCEPT_REQ (121, P2, writeTimeout, IMMEDIATE, () -> PreacceptSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_PRE_ACCEPT_RSP ), + ACCORD_ACCEPT_RSP (122, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_ACCEPT_REQ (123, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_ACCEPT_RSP ), + ACCORD_ACCEPT_INVALIDATE_REQ (124, P2, writeTimeout, IMMEDIATE, () -> AcceptSerializers.invalidate, AccordService::requestHandlerOrNoop, ACCORD_ACCEPT_RSP ), + ACCORD_READ_RSP (125, P2, readTimeout, IMMEDIATE, () -> ReadDataSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_READ_REQ (126, P2, readTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::requestHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_REQ (127, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_COMMIT_INVALIDATE_REQ (128, P2, writeTimeout, IMMEDIATE, () -> CommitSerializers.invalidate, AccordService::requestHandlerOrNoop ), + ACCORD_APPLY_RSP (129, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_APPLY_REQ (130, P2, writeTimeout, IMMEDIATE, () -> ApplySerializers.request, AccordService::requestHandlerOrNoop, ACCORD_APPLY_RSP ), + ACCORD_BEGIN_RECOVER_RSP (131, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_BEGIN_RECOVER_REQ (132, P2, writeTimeout, IMMEDIATE, () -> RecoverySerializers.request, AccordService::requestHandlerOrNoop, ACCORD_BEGIN_RECOVER_RSP ), + ACCORD_BEGIN_INVALIDATE_RSP (133, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_BEGIN_INVALIDATE_REQ (134, P2, writeTimeout, IMMEDIATE, () -> BeginInvalidationSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_BEGIN_INVALIDATE_RSP ), + ACCORD_AWAIT_RSP (136, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.syncReply, AccordService::responseHandlerOrNoop ), + ACCORD_AWAIT_REQ (135, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.request, AccordService::requestHandlerOrNoop, ACCORD_AWAIT_RSP ), + ACCORD_AWAIT_ASYNC_RSP_REQ (137, P2, writeTimeout, IMMEDIATE, () -> AwaitSerializer.asyncReply, AccordService::requestHandlerOrNoop ), + ACCORD_WAIT_UNTIL_APPLIED_REQ (138, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.waitUntilApplied, AccordService::requestHandlerOrNoop, ACCORD_READ_RSP ), + ACCORD_INFORM_DURABLE_REQ (140, P2, writeTimeout, IMMEDIATE, () -> InformDurableSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_CHECK_STATUS_RSP (141, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_CHECK_STATUS_REQ (142, P2, writeTimeout, IMMEDIATE, () -> CheckStatusSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_CHECK_STATUS_RSP ), + ACCORD_CALCULATE_DEPS_RSP (143, P2, writeTimeout, IMMEDIATE, () -> CalculateDepsSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_CALCULATE_DEPS_REQ (144, P2, longTimeout, IMMEDIATE, () -> CalculateDepsSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_CALCULATE_DEPS_RSP), + ACCORD_GET_EPHMRL_READ_DEPS_RSP (161, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_GET_EPHMRL_READ_DEPS_REQ (162, P2, writeTimeout, IMMEDIATE, () -> GetEphmrlReadDepsSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_GET_EPHMRL_READ_DEPS_RSP), + ACCORD_GET_MAX_CONFLICT_RSP (163, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_GET_MAX_CONFLICT_REQ (164, P2, writeTimeout, IMMEDIATE, () -> GetMaxConflictSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_GET_MAX_CONFLICT_RSP), + ACCORD_FETCH_DATA_RSP (145, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_FETCH_DATA_REQ (146, P2, writeTimeout, IMMEDIATE, () -> FetchSerializers.request, AccordService::requestHandlerOrNoop, ACCORD_FETCH_DATA_RSP ), + ACCORD_SET_SHARD_DURABLE_REQ (147, P2, writeTimeout, MISC, () -> SetDurableSerializers.shardDurable, AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_SET_GLOBALLY_DURABLE_REQ (148, P2, writeTimeout, MISC, () -> SetDurableSerializers.globallyDurable,AccordService::requestHandlerOrNoop, ACCORD_SIMPLE_RSP ), + ACCORD_QUERY_DURABLE_BEFORE_RSP (149, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.reply, AccordService::responseHandlerOrNoop ), + ACCORD_QUERY_DURABLE_BEFORE_REQ (150, P2, writeTimeout, IMMEDIATE, () -> QueryDurableBeforeSerializers.request,AccordService::requestHandlerOrNoop, ACCORD_QUERY_DURABLE_BEFORE_RSP ), + + ACCORD_SYNC_NOTIFY_RSP (151, P2, writeTimeout, IMMEDIATE, () -> EnumSerializer.simpleReply, RESPONSE_HANDLER), + ACCORD_SYNC_NOTIFY_REQ (152, P2, writeTimeout, IMMEDIATE, () -> Notification.listSerializer, () -> AccordSyncPropagator.verbHandler, ACCORD_SYNC_NOTIFY_RSP ), + + ACCORD_APPLY_AND_WAIT_REQ (153, P2, writeTimeout, IMMEDIATE, () -> ReadDataSerializers.readData, AccordService::requestHandlerOrNoop, ACCORD_READ_RSP), + + CONSENSUS_KEY_MIGRATION (154, P1, writeTimeout, MUTATION, () -> ConsensusKeyMigrationFinished.serializer,() -> ConsensusKeyMigrationState.consensusKeyMigrationFinishedHandler), + + ACCORD_INTEROP_READ_RSP (155, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.replySerializer, AccordService::responseHandlerOrNoop), + ACCORD_INTEROP_READ_REQ (156, P2, writeTimeout, IMMEDIATE, () -> AccordInteropRead.requestSerializer, AccordService::requestHandlerOrNoop, ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_COMMIT_REQ (157, P2, writeTimeout, IMMEDIATE, () -> AccordInteropCommit.serializer, AccordService::requestHandlerOrNoop, ACCORD_INTEROP_READ_RSP), + ACCORD_INTEROP_READ_REPAIR_RSP (158, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.replySerializer, AccordService::responseHandlerOrNoop), + ACCORD_INTEROP_READ_REPAIR_REQ (159, P2, writeTimeout, IMMEDIATE, () -> AccordInteropReadRepair.requestSerializer, AccordService::requestHandlerOrNoop, ACCORD_INTEROP_READ_REPAIR_RSP), + ACCORD_INTEROP_APPLY_REQ (160, P2, writeTimeout, IMMEDIATE, () -> AccordInteropApply.serializer, AccordService::requestHandlerOrNoop, ACCORD_APPLY_RSP), + ACCORD_FETCH_MIN_EPOCH_RSP (166, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.Response.serializer, RESPONSE_HANDLER), + ACCORD_FETCH_MIN_EPOCH_REQ (165, P2, writeTimeout, IMMEDIATE, () -> FetchMinEpoch.serializer, () -> FetchMinEpoch.handler, ACCORD_FETCH_MIN_EPOCH_RSP), // generic failure response FAILURE_RSP (99, P0, noTimeout, REQUEST_RESPONSE, () -> RequestFailure.serializer, RESPONSE_HANDLER ), @@ -490,6 +491,11 @@ public long expiresAfterNanos() return expiration.applyAsLong(NANOSECONDS); } + public long expiresAfter(TimeUnit units) + { + return expiration.applyAsLong(units); + } + // this is a little hacky, but reduces the number of parameters up top public boolean isResponse() { diff --git a/src/java/org/apache/cassandra/service/RetryStrategy.java b/src/java/org/apache/cassandra/service/RetryStrategy.java new file mode 100644 index 000000000000..7f48612a8f7c --- /dev/null +++ b/src/java/org/apache/cassandra/service/RetryStrategy.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.service; + +import com.google.common.annotations.VisibleForTesting; + +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.service.TimeoutStrategy.LatencySourceFactory; +import org.apache.cassandra.service.TimeoutStrategy.Wait; + +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.DoubleSupplier; +import java.util.function.LongBinaryOperator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static java.lang.Math.*; +import static java.util.Arrays.stream; +import static java.util.concurrent.TimeUnit.*; +import static org.apache.cassandra.service.TimeoutStrategy.parseWait; +import static org.apache.cassandra.utils.Clock.Global.nanoTime; + +/** + *